{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.1260226129285296, "eval_steps": 500, "global_step": 34900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.226385379634652e-05, "grad_norm": 73.0, "learning_rate": 2.8571428571428572e-08, "loss": 3.8294, "step": 1 }, { "epoch": 6.452770759269304e-05, "grad_norm": 73.5, "learning_rate": 5.7142857142857144e-08, "loss": 3.8099, "step": 2 }, { "epoch": 9.679156138903956e-05, "grad_norm": 74.5, "learning_rate": 8.571428571428572e-08, "loss": 3.8216, "step": 3 }, { "epoch": 0.0001290554151853861, "grad_norm": 73.0, "learning_rate": 1.1428571428571429e-07, "loss": 3.8641, "step": 4 }, { "epoch": 0.0001613192689817326, "grad_norm": 72.0, "learning_rate": 1.4285714285714287e-07, "loss": 3.7941, "step": 5 }, { "epoch": 0.00019358312277807912, "grad_norm": 71.5, "learning_rate": 1.7142857142857143e-07, "loss": 3.788, "step": 6 }, { "epoch": 0.00022584697657442566, "grad_norm": 71.5, "learning_rate": 2.0000000000000002e-07, "loss": 3.8271, "step": 7 }, { "epoch": 0.0002581108303707722, "grad_norm": 69.5, "learning_rate": 2.2857142857142858e-07, "loss": 3.8117, "step": 8 }, { "epoch": 0.0002903746841671187, "grad_norm": 72.5, "learning_rate": 2.5714285714285716e-07, "loss": 3.8127, "step": 9 }, { "epoch": 0.0003226385379634652, "grad_norm": 71.5, "learning_rate": 2.8571428571428575e-07, "loss": 3.7996, "step": 10 }, { "epoch": 0.00035490239175981175, "grad_norm": 75.5, "learning_rate": 3.142857142857143e-07, "loss": 3.8618, "step": 11 }, { "epoch": 0.00038716624555615824, "grad_norm": 70.5, "learning_rate": 3.4285714285714286e-07, "loss": 3.8067, "step": 12 }, { "epoch": 0.0004194300993525048, "grad_norm": 72.5, "learning_rate": 3.7142857142857145e-07, "loss": 3.7922, "step": 13 }, { "epoch": 0.0004516939531488513, "grad_norm": 75.5, "learning_rate": 4.0000000000000003e-07, "loss": 3.8633, "step": 14 }, { "epoch": 0.0004839578069451978, "grad_norm": 70.5, "learning_rate": 4.2857142857142857e-07, "loss": 3.7449, "step": 15 }, { "epoch": 0.0005162216607415444, "grad_norm": 71.0, "learning_rate": 4.5714285714285715e-07, "loss": 3.8492, "step": 16 }, { "epoch": 0.0005484855145378908, "grad_norm": 70.0, "learning_rate": 4.857142857142857e-07, "loss": 3.7614, "step": 17 }, { "epoch": 0.0005807493683342374, "grad_norm": 70.5, "learning_rate": 5.142857142857143e-07, "loss": 3.7961, "step": 18 }, { "epoch": 0.0006130132221305839, "grad_norm": 68.0, "learning_rate": 5.428571428571429e-07, "loss": 3.7802, "step": 19 }, { "epoch": 0.0006452770759269304, "grad_norm": 71.5, "learning_rate": 5.714285714285715e-07, "loss": 3.7555, "step": 20 }, { "epoch": 0.000677540929723277, "grad_norm": 68.5, "learning_rate": 6.000000000000001e-07, "loss": 3.7527, "step": 21 }, { "epoch": 0.0007098047835196235, "grad_norm": 68.0, "learning_rate": 6.285714285714286e-07, "loss": 3.7512, "step": 22 }, { "epoch": 0.00074206863731597, "grad_norm": 65.0, "learning_rate": 6.571428571428572e-07, "loss": 3.6944, "step": 23 }, { "epoch": 0.0007743324911123165, "grad_norm": 65.0, "learning_rate": 6.857142857142857e-07, "loss": 3.7007, "step": 24 }, { "epoch": 0.0008065963449086631, "grad_norm": 62.75, "learning_rate": 7.142857142857143e-07, "loss": 3.6449, "step": 25 }, { "epoch": 0.0008388601987050096, "grad_norm": 66.0, "learning_rate": 7.428571428571429e-07, "loss": 3.6831, "step": 26 }, { "epoch": 0.000871124052501356, "grad_norm": 63.25, "learning_rate": 7.714285714285715e-07, "loss": 3.6745, "step": 27 }, { "epoch": 0.0009033879062977026, "grad_norm": 65.0, "learning_rate": 8.000000000000001e-07, "loss": 3.6752, "step": 28 }, { "epoch": 0.0009356517600940491, "grad_norm": 62.0, "learning_rate": 8.285714285714287e-07, "loss": 3.6101, "step": 29 }, { "epoch": 0.0009679156138903956, "grad_norm": 60.25, "learning_rate": 8.571428571428571e-07, "loss": 3.617, "step": 30 }, { "epoch": 0.0010001794676867421, "grad_norm": 60.75, "learning_rate": 8.857142857142858e-07, "loss": 3.5806, "step": 31 }, { "epoch": 0.0010324433214830887, "grad_norm": 57.25, "learning_rate": 9.142857142857143e-07, "loss": 3.5685, "step": 32 }, { "epoch": 0.0010647071752794353, "grad_norm": 60.75, "learning_rate": 9.42857142857143e-07, "loss": 3.6039, "step": 33 }, { "epoch": 0.0010969710290757817, "grad_norm": 55.25, "learning_rate": 9.714285714285715e-07, "loss": 3.528, "step": 34 }, { "epoch": 0.0011292348828721283, "grad_norm": 55.75, "learning_rate": 1e-06, "loss": 3.5301, "step": 35 }, { "epoch": 0.0011614987366684749, "grad_norm": 59.0, "learning_rate": 1.0285714285714286e-06, "loss": 3.5614, "step": 36 }, { "epoch": 0.0011937625904648213, "grad_norm": 55.5, "learning_rate": 1.0571428571428571e-06, "loss": 3.4915, "step": 37 }, { "epoch": 0.0012260264442611679, "grad_norm": 51.5, "learning_rate": 1.0857142857142858e-06, "loss": 3.4326, "step": 38 }, { "epoch": 0.0012582902980575144, "grad_norm": 47.75, "learning_rate": 1.1142857142857143e-06, "loss": 3.3866, "step": 39 }, { "epoch": 0.0012905541518538608, "grad_norm": 45.5, "learning_rate": 1.142857142857143e-06, "loss": 3.3738, "step": 40 }, { "epoch": 0.0013228180056502074, "grad_norm": 42.0, "learning_rate": 1.1714285714285715e-06, "loss": 3.3584, "step": 41 }, { "epoch": 0.001355081859446554, "grad_norm": 38.75, "learning_rate": 1.2000000000000002e-06, "loss": 3.2913, "step": 42 }, { "epoch": 0.0013873457132429004, "grad_norm": 36.5, "learning_rate": 1.2285714285714286e-06, "loss": 3.3495, "step": 43 }, { "epoch": 0.001419609567039247, "grad_norm": 34.0, "learning_rate": 1.2571428571428571e-06, "loss": 3.3018, "step": 44 }, { "epoch": 0.0014518734208355934, "grad_norm": 28.25, "learning_rate": 1.2857142857142858e-06, "loss": 3.2069, "step": 45 }, { "epoch": 0.00148413727463194, "grad_norm": 26.5, "learning_rate": 1.3142857142857145e-06, "loss": 3.2243, "step": 46 }, { "epoch": 0.0015164011284282866, "grad_norm": 24.75, "learning_rate": 1.3428571428571428e-06, "loss": 3.2094, "step": 47 }, { "epoch": 0.001548664982224633, "grad_norm": 23.375, "learning_rate": 1.3714285714285715e-06, "loss": 3.2074, "step": 48 }, { "epoch": 0.0015809288360209795, "grad_norm": 21.75, "learning_rate": 1.4000000000000001e-06, "loss": 3.207, "step": 49 }, { "epoch": 0.0016131926898173261, "grad_norm": 19.375, "learning_rate": 1.4285714285714286e-06, "loss": 3.1651, "step": 50 }, { "epoch": 0.0016454565436136725, "grad_norm": 19.875, "learning_rate": 1.4571428571428571e-06, "loss": 3.1862, "step": 51 }, { "epoch": 0.0016777203974100191, "grad_norm": 17.75, "learning_rate": 1.4857142857142858e-06, "loss": 3.1351, "step": 52 }, { "epoch": 0.0017099842512063657, "grad_norm": 17.5, "learning_rate": 1.5142857142857143e-06, "loss": 3.1421, "step": 53 }, { "epoch": 0.001742248105002712, "grad_norm": 17.125, "learning_rate": 1.542857142857143e-06, "loss": 3.1451, "step": 54 }, { "epoch": 0.0017745119587990587, "grad_norm": 15.375, "learning_rate": 1.5714285714285714e-06, "loss": 3.0993, "step": 55 }, { "epoch": 0.0018067758125954053, "grad_norm": 14.875, "learning_rate": 1.6000000000000001e-06, "loss": 3.0951, "step": 56 }, { "epoch": 0.0018390396663917517, "grad_norm": 14.375, "learning_rate": 1.6285714285714286e-06, "loss": 3.0844, "step": 57 }, { "epoch": 0.0018713035201880983, "grad_norm": 14.0625, "learning_rate": 1.6571428571428573e-06, "loss": 3.1022, "step": 58 }, { "epoch": 0.0019035673739844449, "grad_norm": 12.6875, "learning_rate": 1.6857142857142858e-06, "loss": 3.0324, "step": 59 }, { "epoch": 0.0019358312277807912, "grad_norm": 11.9375, "learning_rate": 1.7142857142857143e-06, "loss": 3.0036, "step": 60 }, { "epoch": 0.001968095081577138, "grad_norm": 11.625, "learning_rate": 1.742857142857143e-06, "loss": 3.045, "step": 61 }, { "epoch": 0.0020003589353734842, "grad_norm": 10.8125, "learning_rate": 1.7714285714285717e-06, "loss": 3.0042, "step": 62 }, { "epoch": 0.002032622789169831, "grad_norm": 10.625, "learning_rate": 1.8e-06, "loss": 3.0255, "step": 63 }, { "epoch": 0.0020648866429661774, "grad_norm": 9.5, "learning_rate": 1.8285714285714286e-06, "loss": 2.9943, "step": 64 }, { "epoch": 0.002097150496762524, "grad_norm": 9.0625, "learning_rate": 1.8571428571428573e-06, "loss": 2.9698, "step": 65 }, { "epoch": 0.0021294143505588706, "grad_norm": 8.5625, "learning_rate": 1.885714285714286e-06, "loss": 2.9229, "step": 66 }, { "epoch": 0.002161678204355217, "grad_norm": 7.90625, "learning_rate": 1.9142857142857145e-06, "loss": 2.9335, "step": 67 }, { "epoch": 0.0021939420581515634, "grad_norm": 7.34375, "learning_rate": 1.942857142857143e-06, "loss": 2.9583, "step": 68 }, { "epoch": 0.00222620591194791, "grad_norm": 6.65625, "learning_rate": 1.9714285714285714e-06, "loss": 2.892, "step": 69 }, { "epoch": 0.0022584697657442566, "grad_norm": 6.3125, "learning_rate": 2e-06, "loss": 2.895, "step": 70 }, { "epoch": 0.002290733619540603, "grad_norm": 5.8125, "learning_rate": 2.028571428571429e-06, "loss": 2.9195, "step": 71 }, { "epoch": 0.0023229974733369498, "grad_norm": 5.75, "learning_rate": 2.0571428571428573e-06, "loss": 2.8606, "step": 72 }, { "epoch": 0.002355261327133296, "grad_norm": 5.375, "learning_rate": 2.0857142857142858e-06, "loss": 2.8722, "step": 73 }, { "epoch": 0.0023875251809296425, "grad_norm": 5.0, "learning_rate": 2.1142857142857143e-06, "loss": 2.8414, "step": 74 }, { "epoch": 0.0024197890347259893, "grad_norm": 4.59375, "learning_rate": 2.1428571428571427e-06, "loss": 2.8385, "step": 75 }, { "epoch": 0.0024520528885223357, "grad_norm": 4.4375, "learning_rate": 2.1714285714285716e-06, "loss": 2.8437, "step": 76 }, { "epoch": 0.002484316742318682, "grad_norm": 4.0625, "learning_rate": 2.2e-06, "loss": 2.8063, "step": 77 }, { "epoch": 0.002516580596115029, "grad_norm": 3.890625, "learning_rate": 2.2285714285714286e-06, "loss": 2.8333, "step": 78 }, { "epoch": 0.0025488444499113753, "grad_norm": 3.609375, "learning_rate": 2.2571428571428575e-06, "loss": 2.8226, "step": 79 }, { "epoch": 0.0025811083037077217, "grad_norm": 3.390625, "learning_rate": 2.285714285714286e-06, "loss": 2.7465, "step": 80 }, { "epoch": 0.0026133721575040685, "grad_norm": 3.296875, "learning_rate": 2.314285714285714e-06, "loss": 2.6639, "step": 81 }, { "epoch": 0.002645636011300415, "grad_norm": 3.15625, "learning_rate": 2.342857142857143e-06, "loss": 2.7095, "step": 82 }, { "epoch": 0.0026778998650967612, "grad_norm": 2.984375, "learning_rate": 2.3714285714285714e-06, "loss": 2.6373, "step": 83 }, { "epoch": 0.002710163718893108, "grad_norm": 2.6875, "learning_rate": 2.4000000000000003e-06, "loss": 2.7333, "step": 84 }, { "epoch": 0.0027424275726894544, "grad_norm": 2.46875, "learning_rate": 2.428571428571429e-06, "loss": 2.7282, "step": 85 }, { "epoch": 0.002774691426485801, "grad_norm": 2.4375, "learning_rate": 2.4571428571428573e-06, "loss": 2.7659, "step": 86 }, { "epoch": 0.0028069552802821476, "grad_norm": 2.296875, "learning_rate": 2.4857142857142858e-06, "loss": 2.7519, "step": 87 }, { "epoch": 0.002839219134078494, "grad_norm": 2.078125, "learning_rate": 2.5142857142857142e-06, "loss": 2.7576, "step": 88 }, { "epoch": 0.0028714829878748404, "grad_norm": 2.03125, "learning_rate": 2.5428571428571427e-06, "loss": 2.7362, "step": 89 }, { "epoch": 0.0029037468416711868, "grad_norm": 1.8671875, "learning_rate": 2.5714285714285716e-06, "loss": 2.7292, "step": 90 }, { "epoch": 0.0029360106954675336, "grad_norm": 1.8359375, "learning_rate": 2.6e-06, "loss": 2.7452, "step": 91 }, { "epoch": 0.00296827454926388, "grad_norm": 1.71875, "learning_rate": 2.628571428571429e-06, "loss": 2.7076, "step": 92 }, { "epoch": 0.0030005384030602263, "grad_norm": 1.5546875, "learning_rate": 2.657142857142857e-06, "loss": 2.7404, "step": 93 }, { "epoch": 0.003032802256856573, "grad_norm": 1.515625, "learning_rate": 2.6857142857142855e-06, "loss": 2.6938, "step": 94 }, { "epoch": 0.0030650661106529195, "grad_norm": 1.53125, "learning_rate": 2.7142857142857144e-06, "loss": 2.7273, "step": 95 }, { "epoch": 0.003097329964449266, "grad_norm": 1.40625, "learning_rate": 2.742857142857143e-06, "loss": 2.7273, "step": 96 }, { "epoch": 0.0031295938182456127, "grad_norm": 1.3125, "learning_rate": 2.7714285714285714e-06, "loss": 2.6866, "step": 97 }, { "epoch": 0.003161857672041959, "grad_norm": 1.265625, "learning_rate": 2.8000000000000003e-06, "loss": 2.7144, "step": 98 }, { "epoch": 0.0031941215258383055, "grad_norm": 1.21875, "learning_rate": 2.8285714285714288e-06, "loss": 2.6982, "step": 99 }, { "epoch": 0.0032263853796346523, "grad_norm": 1.1484375, "learning_rate": 2.8571428571428573e-06, "loss": 2.6944, "step": 100 }, { "epoch": 0.0032586492334309987, "grad_norm": 1.1328125, "learning_rate": 2.8857142857142857e-06, "loss": 2.6843, "step": 101 }, { "epoch": 0.003290913087227345, "grad_norm": 1.09375, "learning_rate": 2.9142857142857142e-06, "loss": 2.6981, "step": 102 }, { "epoch": 0.003323176941023692, "grad_norm": 0.984375, "learning_rate": 2.942857142857143e-06, "loss": 2.6996, "step": 103 }, { "epoch": 0.0033554407948200382, "grad_norm": 0.953125, "learning_rate": 2.9714285714285716e-06, "loss": 2.6835, "step": 104 }, { "epoch": 0.0033877046486163846, "grad_norm": 0.94140625, "learning_rate": 3e-06, "loss": 2.6667, "step": 105 }, { "epoch": 0.0034199685024127314, "grad_norm": 0.8984375, "learning_rate": 3.0285714285714286e-06, "loss": 2.6787, "step": 106 }, { "epoch": 0.003452232356209078, "grad_norm": 0.87109375, "learning_rate": 3.057142857142857e-06, "loss": 2.6692, "step": 107 }, { "epoch": 0.003484496210005424, "grad_norm": 0.83203125, "learning_rate": 3.085714285714286e-06, "loss": 2.6661, "step": 108 }, { "epoch": 0.003516760063801771, "grad_norm": 0.796875, "learning_rate": 3.1142857142857144e-06, "loss": 2.6794, "step": 109 }, { "epoch": 0.0035490239175981174, "grad_norm": 0.796875, "learning_rate": 3.142857142857143e-06, "loss": 2.634, "step": 110 }, { "epoch": 0.0035812877713944638, "grad_norm": 0.765625, "learning_rate": 3.171428571428572e-06, "loss": 2.6755, "step": 111 }, { "epoch": 0.0036135516251908106, "grad_norm": 0.734375, "learning_rate": 3.2000000000000003e-06, "loss": 2.6629, "step": 112 }, { "epoch": 0.003645815478987157, "grad_norm": 0.75390625, "learning_rate": 3.2285714285714283e-06, "loss": 2.6917, "step": 113 }, { "epoch": 0.0036780793327835033, "grad_norm": 0.703125, "learning_rate": 3.2571428571428572e-06, "loss": 2.6572, "step": 114 }, { "epoch": 0.00371034318657985, "grad_norm": 0.66796875, "learning_rate": 3.2857142857142857e-06, "loss": 2.6165, "step": 115 }, { "epoch": 0.0037426070403761965, "grad_norm": 0.6484375, "learning_rate": 3.3142857142857146e-06, "loss": 2.6275, "step": 116 }, { "epoch": 0.003774870894172543, "grad_norm": 0.64453125, "learning_rate": 3.342857142857143e-06, "loss": 2.6729, "step": 117 }, { "epoch": 0.0038071347479688897, "grad_norm": 0.6484375, "learning_rate": 3.3714285714285716e-06, "loss": 2.6711, "step": 118 }, { "epoch": 0.003839398601765236, "grad_norm": 0.609375, "learning_rate": 3.4e-06, "loss": 2.654, "step": 119 }, { "epoch": 0.0038716624555615825, "grad_norm": 0.609375, "learning_rate": 3.4285714285714285e-06, "loss": 2.6627, "step": 120 }, { "epoch": 0.0039039263093579293, "grad_norm": 0.58984375, "learning_rate": 3.457142857142857e-06, "loss": 2.6617, "step": 121 }, { "epoch": 0.003936190163154276, "grad_norm": 0.5625, "learning_rate": 3.485714285714286e-06, "loss": 2.6506, "step": 122 }, { "epoch": 0.003968454016950622, "grad_norm": 0.5703125, "learning_rate": 3.5142857142857144e-06, "loss": 2.6534, "step": 123 }, { "epoch": 0.0040007178707469684, "grad_norm": 0.5703125, "learning_rate": 3.5428571428571433e-06, "loss": 2.6397, "step": 124 }, { "epoch": 0.004032981724543315, "grad_norm": 0.5546875, "learning_rate": 3.5714285714285714e-06, "loss": 2.6473, "step": 125 }, { "epoch": 0.004065245578339662, "grad_norm": 0.54296875, "learning_rate": 3.6e-06, "loss": 2.6525, "step": 126 }, { "epoch": 0.0040975094321360084, "grad_norm": 0.56640625, "learning_rate": 3.6285714285714287e-06, "loss": 2.6444, "step": 127 }, { "epoch": 0.004129773285932355, "grad_norm": 0.54296875, "learning_rate": 3.6571428571428572e-06, "loss": 2.6514, "step": 128 }, { "epoch": 0.004162037139728701, "grad_norm": 0.51953125, "learning_rate": 3.6857142857142857e-06, "loss": 2.6695, "step": 129 }, { "epoch": 0.004194300993525048, "grad_norm": 0.51171875, "learning_rate": 3.7142857142857146e-06, "loss": 2.631, "step": 130 }, { "epoch": 0.004226564847321394, "grad_norm": 0.515625, "learning_rate": 3.742857142857143e-06, "loss": 2.6211, "step": 131 }, { "epoch": 0.004258828701117741, "grad_norm": 0.50390625, "learning_rate": 3.771428571428572e-06, "loss": 2.6387, "step": 132 }, { "epoch": 0.004291092554914088, "grad_norm": 0.49609375, "learning_rate": 3.8000000000000005e-06, "loss": 2.6469, "step": 133 }, { "epoch": 0.004323356408710434, "grad_norm": 0.49609375, "learning_rate": 3.828571428571429e-06, "loss": 2.6259, "step": 134 }, { "epoch": 0.00435562026250678, "grad_norm": 0.4765625, "learning_rate": 3.857142857142857e-06, "loss": 2.5932, "step": 135 }, { "epoch": 0.004387884116303127, "grad_norm": 0.48828125, "learning_rate": 3.885714285714286e-06, "loss": 2.5666, "step": 136 }, { "epoch": 0.004420147970099473, "grad_norm": 0.470703125, "learning_rate": 3.914285714285714e-06, "loss": 2.6, "step": 137 }, { "epoch": 0.00445241182389582, "grad_norm": 0.482421875, "learning_rate": 3.942857142857143e-06, "loss": 2.6193, "step": 138 }, { "epoch": 0.004484675677692167, "grad_norm": 0.546875, "learning_rate": 3.971428571428572e-06, "loss": 2.6345, "step": 139 }, { "epoch": 0.004516939531488513, "grad_norm": 0.462890625, "learning_rate": 4e-06, "loss": 2.631, "step": 140 }, { "epoch": 0.0045492033852848595, "grad_norm": 0.46484375, "learning_rate": 4.028571428571429e-06, "loss": 2.6144, "step": 141 }, { "epoch": 0.004581467239081206, "grad_norm": 0.451171875, "learning_rate": 4.057142857142858e-06, "loss": 2.6442, "step": 142 }, { "epoch": 0.004613731092877552, "grad_norm": 0.44140625, "learning_rate": 4.085714285714286e-06, "loss": 2.6421, "step": 143 }, { "epoch": 0.0046459949466738995, "grad_norm": 0.458984375, "learning_rate": 4.114285714285715e-06, "loss": 2.6095, "step": 144 }, { "epoch": 0.004678258800470246, "grad_norm": 0.44921875, "learning_rate": 4.1428571428571435e-06, "loss": 2.5935, "step": 145 }, { "epoch": 0.004710522654266592, "grad_norm": 0.4375, "learning_rate": 4.1714285714285715e-06, "loss": 2.6307, "step": 146 }, { "epoch": 0.004742786508062939, "grad_norm": 0.431640625, "learning_rate": 4.2000000000000004e-06, "loss": 2.6189, "step": 147 }, { "epoch": 0.004775050361859285, "grad_norm": 0.427734375, "learning_rate": 4.2285714285714285e-06, "loss": 2.6024, "step": 148 }, { "epoch": 0.004807314215655631, "grad_norm": 0.431640625, "learning_rate": 4.2571428571428566e-06, "loss": 2.6206, "step": 149 }, { "epoch": 0.004839578069451979, "grad_norm": 0.44921875, "learning_rate": 4.2857142857142855e-06, "loss": 2.6218, "step": 150 }, { "epoch": 0.004871841923248325, "grad_norm": 0.419921875, "learning_rate": 4.314285714285714e-06, "loss": 2.6061, "step": 151 }, { "epoch": 0.004904105777044671, "grad_norm": 0.427734375, "learning_rate": 4.342857142857143e-06, "loss": 2.6148, "step": 152 }, { "epoch": 0.004936369630841018, "grad_norm": 0.419921875, "learning_rate": 4.371428571428571e-06, "loss": 2.622, "step": 153 }, { "epoch": 0.004968633484637364, "grad_norm": 0.40625, "learning_rate": 4.4e-06, "loss": 2.5847, "step": 154 }, { "epoch": 0.0050008973384337106, "grad_norm": 0.427734375, "learning_rate": 4.428571428571429e-06, "loss": 2.6087, "step": 155 }, { "epoch": 0.005033161192230058, "grad_norm": 0.427734375, "learning_rate": 4.457142857142857e-06, "loss": 2.5679, "step": 156 }, { "epoch": 0.005065425046026404, "grad_norm": 0.412109375, "learning_rate": 4.485714285714286e-06, "loss": 2.5873, "step": 157 }, { "epoch": 0.0050976888998227506, "grad_norm": 0.408203125, "learning_rate": 4.514285714285715e-06, "loss": 2.6028, "step": 158 }, { "epoch": 0.005129952753619097, "grad_norm": 0.416015625, "learning_rate": 4.542857142857143e-06, "loss": 2.6236, "step": 159 }, { "epoch": 0.005162216607415443, "grad_norm": 0.419921875, "learning_rate": 4.571428571428572e-06, "loss": 2.5839, "step": 160 }, { "epoch": 0.00519448046121179, "grad_norm": 0.3984375, "learning_rate": 4.6e-06, "loss": 2.6127, "step": 161 }, { "epoch": 0.005226744315008137, "grad_norm": 0.408203125, "learning_rate": 4.628571428571428e-06, "loss": 2.5526, "step": 162 }, { "epoch": 0.005259008168804483, "grad_norm": 0.400390625, "learning_rate": 4.657142857142857e-06, "loss": 2.5972, "step": 163 }, { "epoch": 0.00529127202260083, "grad_norm": 0.455078125, "learning_rate": 4.685714285714286e-06, "loss": 2.5133, "step": 164 }, { "epoch": 0.005323535876397176, "grad_norm": 0.421875, "learning_rate": 4.714285714285714e-06, "loss": 2.5118, "step": 165 }, { "epoch": 0.0053557997301935225, "grad_norm": 0.4140625, "learning_rate": 4.742857142857143e-06, "loss": 2.5068, "step": 166 }, { "epoch": 0.005388063583989869, "grad_norm": 0.416015625, "learning_rate": 4.771428571428572e-06, "loss": 2.5208, "step": 167 }, { "epoch": 0.005420327437786216, "grad_norm": 0.3984375, "learning_rate": 4.800000000000001e-06, "loss": 2.5303, "step": 168 }, { "epoch": 0.0054525912915825625, "grad_norm": 0.40234375, "learning_rate": 4.828571428571429e-06, "loss": 2.5094, "step": 169 }, { "epoch": 0.005484855145378909, "grad_norm": 0.408203125, "learning_rate": 4.857142857142858e-06, "loss": 2.4954, "step": 170 }, { "epoch": 0.005517118999175255, "grad_norm": 0.4140625, "learning_rate": 4.8857142857142865e-06, "loss": 2.4892, "step": 171 }, { "epoch": 0.005549382852971602, "grad_norm": 0.390625, "learning_rate": 4.9142857142857145e-06, "loss": 2.4594, "step": 172 }, { "epoch": 0.005581646706767948, "grad_norm": 0.3984375, "learning_rate": 4.942857142857143e-06, "loss": 2.4676, "step": 173 }, { "epoch": 0.005613910560564295, "grad_norm": 0.390625, "learning_rate": 4.9714285714285715e-06, "loss": 2.4908, "step": 174 }, { "epoch": 0.005646174414360642, "grad_norm": 0.396484375, "learning_rate": 4.9999999999999996e-06, "loss": 2.4844, "step": 175 }, { "epoch": 0.005678438268156988, "grad_norm": 0.40234375, "learning_rate": 5.0285714285714285e-06, "loss": 2.4714, "step": 176 }, { "epoch": 0.005710702121953334, "grad_norm": 0.376953125, "learning_rate": 5.057142857142857e-06, "loss": 2.4788, "step": 177 }, { "epoch": 0.005742965975749681, "grad_norm": 0.396484375, "learning_rate": 5.0857142857142854e-06, "loss": 2.5272, "step": 178 }, { "epoch": 0.005775229829546027, "grad_norm": 0.380859375, "learning_rate": 5.114285714285714e-06, "loss": 2.4946, "step": 179 }, { "epoch": 0.0058074936833423735, "grad_norm": 0.3828125, "learning_rate": 5.142857142857143e-06, "loss": 2.4961, "step": 180 }, { "epoch": 0.005839757537138721, "grad_norm": 0.392578125, "learning_rate": 5.171428571428571e-06, "loss": 2.4878, "step": 181 }, { "epoch": 0.005872021390935067, "grad_norm": 0.37890625, "learning_rate": 5.2e-06, "loss": 2.4645, "step": 182 }, { "epoch": 0.0059042852447314135, "grad_norm": 0.3828125, "learning_rate": 5.228571428571429e-06, "loss": 2.4903, "step": 183 }, { "epoch": 0.00593654909852776, "grad_norm": 0.390625, "learning_rate": 5.257142857142858e-06, "loss": 2.483, "step": 184 }, { "epoch": 0.005968812952324106, "grad_norm": 0.375, "learning_rate": 5.285714285714286e-06, "loss": 2.4947, "step": 185 }, { "epoch": 0.006001076806120453, "grad_norm": 0.38671875, "learning_rate": 5.314285714285714e-06, "loss": 2.5018, "step": 186 }, { "epoch": 0.0060333406599168, "grad_norm": 0.373046875, "learning_rate": 5.342857142857143e-06, "loss": 2.4894, "step": 187 }, { "epoch": 0.006065604513713146, "grad_norm": 0.3828125, "learning_rate": 5.371428571428571e-06, "loss": 2.5078, "step": 188 }, { "epoch": 0.006097868367509493, "grad_norm": 0.396484375, "learning_rate": 5.4e-06, "loss": 2.4784, "step": 189 }, { "epoch": 0.006130132221305839, "grad_norm": 0.380859375, "learning_rate": 5.428571428571429e-06, "loss": 2.5143, "step": 190 }, { "epoch": 0.006162396075102185, "grad_norm": 0.396484375, "learning_rate": 5.457142857142857e-06, "loss": 2.5255, "step": 191 }, { "epoch": 0.006194659928898532, "grad_norm": 0.384765625, "learning_rate": 5.485714285714286e-06, "loss": 2.4693, "step": 192 }, { "epoch": 0.006226923782694879, "grad_norm": 0.373046875, "learning_rate": 5.514285714285715e-06, "loss": 2.4938, "step": 193 }, { "epoch": 0.0062591876364912254, "grad_norm": 0.37890625, "learning_rate": 5.542857142857143e-06, "loss": 2.4601, "step": 194 }, { "epoch": 0.006291451490287572, "grad_norm": 0.369140625, "learning_rate": 5.571428571428572e-06, "loss": 2.4833, "step": 195 }, { "epoch": 0.006323715344083918, "grad_norm": 0.359375, "learning_rate": 5.600000000000001e-06, "loss": 2.4756, "step": 196 }, { "epoch": 0.006355979197880265, "grad_norm": 0.36328125, "learning_rate": 5.628571428571429e-06, "loss": 2.46, "step": 197 }, { "epoch": 0.006388243051676611, "grad_norm": 0.400390625, "learning_rate": 5.6571428571428576e-06, "loss": 2.4605, "step": 198 }, { "epoch": 0.006420506905472958, "grad_norm": 0.3671875, "learning_rate": 5.685714285714286e-06, "loss": 2.4093, "step": 199 }, { "epoch": 0.006452770759269305, "grad_norm": 0.37109375, "learning_rate": 5.7142857142857145e-06, "loss": 2.5028, "step": 200 }, { "epoch": 0.006485034613065651, "grad_norm": 0.36328125, "learning_rate": 5.7428571428571426e-06, "loss": 2.471, "step": 201 }, { "epoch": 0.006517298466861997, "grad_norm": 0.357421875, "learning_rate": 5.7714285714285715e-06, "loss": 2.4558, "step": 202 }, { "epoch": 0.006549562320658344, "grad_norm": 0.37890625, "learning_rate": 5.8e-06, "loss": 2.4307, "step": 203 }, { "epoch": 0.00658182617445469, "grad_norm": 0.365234375, "learning_rate": 5.8285714285714284e-06, "loss": 2.4553, "step": 204 }, { "epoch": 0.006614090028251037, "grad_norm": 0.369140625, "learning_rate": 5.857142857142857e-06, "loss": 2.4944, "step": 205 }, { "epoch": 0.006646353882047384, "grad_norm": 0.3671875, "learning_rate": 5.885714285714286e-06, "loss": 2.4786, "step": 206 }, { "epoch": 0.00667861773584373, "grad_norm": 0.35546875, "learning_rate": 5.914285714285714e-06, "loss": 2.4664, "step": 207 }, { "epoch": 0.0067108815896400765, "grad_norm": 0.359375, "learning_rate": 5.942857142857143e-06, "loss": 2.4846, "step": 208 }, { "epoch": 0.006743145443436423, "grad_norm": 0.357421875, "learning_rate": 5.971428571428572e-06, "loss": 2.4694, "step": 209 }, { "epoch": 0.006775409297232769, "grad_norm": 0.373046875, "learning_rate": 6e-06, "loss": 2.4915, "step": 210 }, { "epoch": 0.0068076731510291165, "grad_norm": 0.376953125, "learning_rate": 6.028571428571429e-06, "loss": 2.4701, "step": 211 }, { "epoch": 0.006839937004825463, "grad_norm": 0.37890625, "learning_rate": 6.057142857142857e-06, "loss": 2.494, "step": 212 }, { "epoch": 0.006872200858621809, "grad_norm": 0.369140625, "learning_rate": 6.085714285714285e-06, "loss": 2.4981, "step": 213 }, { "epoch": 0.006904464712418156, "grad_norm": 0.369140625, "learning_rate": 6.114285714285714e-06, "loss": 2.4921, "step": 214 }, { "epoch": 0.006936728566214502, "grad_norm": 0.365234375, "learning_rate": 6.142857142857143e-06, "loss": 2.4882, "step": 215 }, { "epoch": 0.006968992420010848, "grad_norm": 0.8671875, "learning_rate": 6.171428571428572e-06, "loss": 2.4942, "step": 216 }, { "epoch": 0.007001256273807196, "grad_norm": 0.41015625, "learning_rate": 6.2e-06, "loss": 2.4741, "step": 217 }, { "epoch": 0.007033520127603542, "grad_norm": 0.3828125, "learning_rate": 6.228571428571429e-06, "loss": 2.4333, "step": 218 }, { "epoch": 0.007065783981399888, "grad_norm": 0.375, "learning_rate": 6.257142857142858e-06, "loss": 2.4663, "step": 219 }, { "epoch": 0.007098047835196235, "grad_norm": 0.38671875, "learning_rate": 6.285714285714286e-06, "loss": 2.4642, "step": 220 }, { "epoch": 0.007130311688992581, "grad_norm": 0.369140625, "learning_rate": 6.314285714285715e-06, "loss": 2.4377, "step": 221 }, { "epoch": 0.0071625755427889275, "grad_norm": 0.3828125, "learning_rate": 6.342857142857144e-06, "loss": 2.4726, "step": 222 }, { "epoch": 0.007194839396585275, "grad_norm": 0.359375, "learning_rate": 6.371428571428572e-06, "loss": 2.458, "step": 223 }, { "epoch": 0.007227103250381621, "grad_norm": 0.3671875, "learning_rate": 6.4000000000000006e-06, "loss": 2.4667, "step": 224 }, { "epoch": 0.0072593671041779675, "grad_norm": 0.3671875, "learning_rate": 6.428571428571429e-06, "loss": 2.4881, "step": 225 }, { "epoch": 0.007291630957974314, "grad_norm": 0.35546875, "learning_rate": 6.457142857142857e-06, "loss": 2.4495, "step": 226 }, { "epoch": 0.00732389481177066, "grad_norm": 0.400390625, "learning_rate": 6.4857142857142856e-06, "loss": 2.4634, "step": 227 }, { "epoch": 0.007356158665567007, "grad_norm": 0.36328125, "learning_rate": 6.5142857142857145e-06, "loss": 2.5103, "step": 228 }, { "epoch": 0.007388422519363354, "grad_norm": 0.375, "learning_rate": 6.5428571428571425e-06, "loss": 2.4723, "step": 229 }, { "epoch": 0.0074206863731597, "grad_norm": 0.357421875, "learning_rate": 6.5714285714285714e-06, "loss": 2.4614, "step": 230 }, { "epoch": 0.007452950226956047, "grad_norm": 0.365234375, "learning_rate": 6.6e-06, "loss": 2.4356, "step": 231 }, { "epoch": 0.007485214080752393, "grad_norm": 0.369140625, "learning_rate": 6.628571428571429e-06, "loss": 2.4867, "step": 232 }, { "epoch": 0.0075174779345487395, "grad_norm": 0.36328125, "learning_rate": 6.657142857142857e-06, "loss": 2.4657, "step": 233 }, { "epoch": 0.007549741788345086, "grad_norm": 0.349609375, "learning_rate": 6.685714285714286e-06, "loss": 2.4529, "step": 234 }, { "epoch": 0.007582005642141432, "grad_norm": 0.357421875, "learning_rate": 6.714285714285715e-06, "loss": 2.417, "step": 235 }, { "epoch": 0.0076142694959377795, "grad_norm": 0.353515625, "learning_rate": 6.742857142857143e-06, "loss": 2.4624, "step": 236 }, { "epoch": 0.007646533349734126, "grad_norm": 0.35546875, "learning_rate": 6.771428571428571e-06, "loss": 2.4739, "step": 237 }, { "epoch": 0.007678797203530472, "grad_norm": 0.353515625, "learning_rate": 6.8e-06, "loss": 2.478, "step": 238 }, { "epoch": 0.007711061057326819, "grad_norm": 0.361328125, "learning_rate": 6.828571428571428e-06, "loss": 2.4726, "step": 239 }, { "epoch": 0.007743324911123165, "grad_norm": 0.3515625, "learning_rate": 6.857142857142857e-06, "loss": 2.4264, "step": 240 }, { "epoch": 0.007775588764919511, "grad_norm": 0.345703125, "learning_rate": 6.885714285714286e-06, "loss": 2.4649, "step": 241 }, { "epoch": 0.007807852618715859, "grad_norm": 0.353515625, "learning_rate": 6.914285714285714e-06, "loss": 2.4489, "step": 242 }, { "epoch": 0.007840116472512204, "grad_norm": 0.359375, "learning_rate": 6.942857142857143e-06, "loss": 2.4669, "step": 243 }, { "epoch": 0.007872380326308551, "grad_norm": 0.34765625, "learning_rate": 6.971428571428572e-06, "loss": 2.4701, "step": 244 }, { "epoch": 0.007904644180104899, "grad_norm": 0.357421875, "learning_rate": 7e-06, "loss": 2.455, "step": 245 }, { "epoch": 0.007936908033901244, "grad_norm": 0.36328125, "learning_rate": 7.028571428571429e-06, "loss": 2.4245, "step": 246 }, { "epoch": 0.007969171887697591, "grad_norm": 0.357421875, "learning_rate": 7.057142857142858e-06, "loss": 2.4627, "step": 247 }, { "epoch": 0.008001435741493937, "grad_norm": 0.375, "learning_rate": 7.085714285714287e-06, "loss": 2.4351, "step": 248 }, { "epoch": 0.008033699595290284, "grad_norm": 0.361328125, "learning_rate": 7.114285714285715e-06, "loss": 2.4584, "step": 249 }, { "epoch": 0.00806596344908663, "grad_norm": 0.3671875, "learning_rate": 7.142857142857143e-06, "loss": 2.4016, "step": 250 }, { "epoch": 0.008098227302882977, "grad_norm": 0.81640625, "learning_rate": 7.171428571428572e-06, "loss": 2.5726, "step": 251 }, { "epoch": 0.008130491156679324, "grad_norm": 0.671875, "learning_rate": 7.2e-06, "loss": 2.5621, "step": 252 }, { "epoch": 0.00816275501047567, "grad_norm": 0.53515625, "learning_rate": 7.228571428571429e-06, "loss": 2.4669, "step": 253 }, { "epoch": 0.008195018864272017, "grad_norm": 0.51953125, "learning_rate": 7.2571428571428575e-06, "loss": 2.553, "step": 254 }, { "epoch": 0.008227282718068362, "grad_norm": 0.578125, "learning_rate": 7.2857142857142855e-06, "loss": 2.5601, "step": 255 }, { "epoch": 0.00825954657186471, "grad_norm": 0.6015625, "learning_rate": 7.3142857142857144e-06, "loss": 2.5739, "step": 256 }, { "epoch": 0.008291810425661057, "grad_norm": 0.53515625, "learning_rate": 7.342857142857143e-06, "loss": 2.5508, "step": 257 }, { "epoch": 0.008324074279457402, "grad_norm": 0.48046875, "learning_rate": 7.371428571428571e-06, "loss": 2.5356, "step": 258 }, { "epoch": 0.00835633813325375, "grad_norm": 0.45703125, "learning_rate": 7.4e-06, "loss": 2.5637, "step": 259 }, { "epoch": 0.008388601987050095, "grad_norm": 0.44921875, "learning_rate": 7.428571428571429e-06, "loss": 2.5301, "step": 260 }, { "epoch": 0.008420865840846442, "grad_norm": 0.412109375, "learning_rate": 7.457142857142857e-06, "loss": 2.5544, "step": 261 }, { "epoch": 0.008453129694642788, "grad_norm": 0.435546875, "learning_rate": 7.485714285714286e-06, "loss": 2.5539, "step": 262 }, { "epoch": 0.008485393548439135, "grad_norm": 0.43359375, "learning_rate": 7.514285714285714e-06, "loss": 2.564, "step": 263 }, { "epoch": 0.008517657402235482, "grad_norm": 0.41796875, "learning_rate": 7.542857142857144e-06, "loss": 2.5133, "step": 264 }, { "epoch": 0.008549921256031828, "grad_norm": 0.41015625, "learning_rate": 7.571428571428571e-06, "loss": 2.5339, "step": 265 }, { "epoch": 0.008582185109828175, "grad_norm": 0.40625, "learning_rate": 7.600000000000001e-06, "loss": 2.5477, "step": 266 }, { "epoch": 0.00861444896362452, "grad_norm": 0.380859375, "learning_rate": 7.628571428571429e-06, "loss": 2.5357, "step": 267 }, { "epoch": 0.008646712817420868, "grad_norm": 0.40625, "learning_rate": 7.657142857142858e-06, "loss": 2.5217, "step": 268 }, { "epoch": 0.008678976671217215, "grad_norm": 0.38671875, "learning_rate": 7.685714285714286e-06, "loss": 2.5576, "step": 269 }, { "epoch": 0.00871124052501356, "grad_norm": 0.380859375, "learning_rate": 7.714285714285714e-06, "loss": 2.5258, "step": 270 }, { "epoch": 0.008743504378809908, "grad_norm": 0.384765625, "learning_rate": 7.742857142857144e-06, "loss": 2.5497, "step": 271 }, { "epoch": 0.008775768232606253, "grad_norm": 0.37890625, "learning_rate": 7.771428571428572e-06, "loss": 2.5283, "step": 272 }, { "epoch": 0.0088080320864026, "grad_norm": 0.4140625, "learning_rate": 7.8e-06, "loss": 2.5311, "step": 273 }, { "epoch": 0.008840295940198946, "grad_norm": 0.3828125, "learning_rate": 7.828571428571428e-06, "loss": 2.5309, "step": 274 }, { "epoch": 0.008872559793995293, "grad_norm": 0.369140625, "learning_rate": 7.857142857142858e-06, "loss": 2.5424, "step": 275 }, { "epoch": 0.00890482364779164, "grad_norm": 0.376953125, "learning_rate": 7.885714285714286e-06, "loss": 2.5274, "step": 276 }, { "epoch": 0.008937087501587986, "grad_norm": 0.3671875, "learning_rate": 7.914285714285715e-06, "loss": 2.5156, "step": 277 }, { "epoch": 0.008969351355384333, "grad_norm": 0.36328125, "learning_rate": 7.942857142857144e-06, "loss": 2.5354, "step": 278 }, { "epoch": 0.009001615209180679, "grad_norm": 0.376953125, "learning_rate": 7.971428571428572e-06, "loss": 2.5047, "step": 279 }, { "epoch": 0.009033879062977026, "grad_norm": 0.375, "learning_rate": 8e-06, "loss": 2.5614, "step": 280 }, { "epoch": 0.009066142916773373, "grad_norm": 0.359375, "learning_rate": 8.02857142857143e-06, "loss": 2.5197, "step": 281 }, { "epoch": 0.009098406770569719, "grad_norm": 0.37109375, "learning_rate": 8.057142857142857e-06, "loss": 2.5388, "step": 282 }, { "epoch": 0.009130670624366066, "grad_norm": 0.3671875, "learning_rate": 8.085714285714285e-06, "loss": 2.5366, "step": 283 }, { "epoch": 0.009162934478162412, "grad_norm": 0.3515625, "learning_rate": 8.114285714285715e-06, "loss": 2.5026, "step": 284 }, { "epoch": 0.009195198331958759, "grad_norm": 0.361328125, "learning_rate": 8.142857142857142e-06, "loss": 2.5585, "step": 285 }, { "epoch": 0.009227462185755105, "grad_norm": 0.361328125, "learning_rate": 8.171428571428571e-06, "loss": 2.4785, "step": 286 }, { "epoch": 0.009259726039551452, "grad_norm": 0.361328125, "learning_rate": 8.2e-06, "loss": 2.5596, "step": 287 }, { "epoch": 0.009291989893347799, "grad_norm": 0.3515625, "learning_rate": 8.22857142857143e-06, "loss": 2.5297, "step": 288 }, { "epoch": 0.009324253747144145, "grad_norm": 0.376953125, "learning_rate": 8.257142857142857e-06, "loss": 2.5117, "step": 289 }, { "epoch": 0.009356517600940492, "grad_norm": 0.34375, "learning_rate": 8.285714285714287e-06, "loss": 2.516, "step": 290 }, { "epoch": 0.009388781454736837, "grad_norm": 0.365234375, "learning_rate": 8.314285714285715e-06, "loss": 2.5359, "step": 291 }, { "epoch": 0.009421045308533185, "grad_norm": 0.359375, "learning_rate": 8.342857142857143e-06, "loss": 2.5232, "step": 292 }, { "epoch": 0.00945330916232953, "grad_norm": 0.361328125, "learning_rate": 8.371428571428571e-06, "loss": 2.5342, "step": 293 }, { "epoch": 0.009485573016125877, "grad_norm": 0.408203125, "learning_rate": 8.400000000000001e-06, "loss": 2.5266, "step": 294 }, { "epoch": 0.009517836869922225, "grad_norm": 0.37109375, "learning_rate": 8.428571428571429e-06, "loss": 2.5272, "step": 295 }, { "epoch": 0.00955010072371857, "grad_norm": 0.365234375, "learning_rate": 8.457142857142857e-06, "loss": 2.5181, "step": 296 }, { "epoch": 0.009582364577514917, "grad_norm": 0.361328125, "learning_rate": 8.485714285714287e-06, "loss": 2.5123, "step": 297 }, { "epoch": 0.009614628431311263, "grad_norm": 0.375, "learning_rate": 8.514285714285713e-06, "loss": 2.5404, "step": 298 }, { "epoch": 0.00964689228510761, "grad_norm": 0.3515625, "learning_rate": 8.542857142857143e-06, "loss": 2.5178, "step": 299 }, { "epoch": 0.009679156138903957, "grad_norm": 0.365234375, "learning_rate": 8.571428571428571e-06, "loss": 2.4536, "step": 300 }, { "epoch": 0.009711419992700303, "grad_norm": 0.376953125, "learning_rate": 8.6e-06, "loss": 2.5324, "step": 301 }, { "epoch": 0.00974368384649665, "grad_norm": 0.361328125, "learning_rate": 8.628571428571429e-06, "loss": 2.4938, "step": 302 }, { "epoch": 0.009775947700292996, "grad_norm": 0.3828125, "learning_rate": 8.657142857142858e-06, "loss": 2.5034, "step": 303 }, { "epoch": 0.009808211554089343, "grad_norm": 0.369140625, "learning_rate": 8.685714285714287e-06, "loss": 2.51, "step": 304 }, { "epoch": 0.009840475407885688, "grad_norm": 0.37109375, "learning_rate": 8.714285714285715e-06, "loss": 2.5217, "step": 305 }, { "epoch": 0.009872739261682036, "grad_norm": 0.39453125, "learning_rate": 8.742857142857143e-06, "loss": 2.5156, "step": 306 }, { "epoch": 0.009905003115478383, "grad_norm": 0.349609375, "learning_rate": 8.771428571428572e-06, "loss": 2.5233, "step": 307 }, { "epoch": 0.009937266969274728, "grad_norm": 0.38671875, "learning_rate": 8.8e-06, "loss": 2.5272, "step": 308 }, { "epoch": 0.009969530823071076, "grad_norm": 0.3671875, "learning_rate": 8.828571428571429e-06, "loss": 2.5097, "step": 309 }, { "epoch": 0.010001794676867421, "grad_norm": 0.376953125, "learning_rate": 8.857142857142858e-06, "loss": 2.5308, "step": 310 }, { "epoch": 0.010034058530663768, "grad_norm": 0.3828125, "learning_rate": 8.885714285714285e-06, "loss": 2.4956, "step": 311 }, { "epoch": 0.010066322384460116, "grad_norm": 0.37109375, "learning_rate": 8.914285714285714e-06, "loss": 2.5111, "step": 312 }, { "epoch": 0.010098586238256461, "grad_norm": 0.36328125, "learning_rate": 8.942857142857142e-06, "loss": 2.483, "step": 313 }, { "epoch": 0.010130850092052808, "grad_norm": 0.37890625, "learning_rate": 8.971428571428572e-06, "loss": 2.5402, "step": 314 }, { "epoch": 0.010163113945849154, "grad_norm": 0.373046875, "learning_rate": 9e-06, "loss": 2.5133, "step": 315 }, { "epoch": 0.010195377799645501, "grad_norm": 0.388671875, "learning_rate": 9.02857142857143e-06, "loss": 2.4878, "step": 316 }, { "epoch": 0.010227641653441847, "grad_norm": 0.41796875, "learning_rate": 9.057142857142856e-06, "loss": 2.4998, "step": 317 }, { "epoch": 0.010259905507238194, "grad_norm": 0.3515625, "learning_rate": 9.085714285714286e-06, "loss": 2.4835, "step": 318 }, { "epoch": 0.010292169361034541, "grad_norm": 0.38671875, "learning_rate": 9.114285714285714e-06, "loss": 2.4455, "step": 319 }, { "epoch": 0.010324433214830887, "grad_norm": 0.36328125, "learning_rate": 9.142857142857144e-06, "loss": 2.4671, "step": 320 }, { "epoch": 0.010356697068627234, "grad_norm": 0.38671875, "learning_rate": 9.171428571428572e-06, "loss": 2.5188, "step": 321 }, { "epoch": 0.01038896092242358, "grad_norm": 0.369140625, "learning_rate": 9.2e-06, "loss": 2.4985, "step": 322 }, { "epoch": 0.010421224776219927, "grad_norm": 0.388671875, "learning_rate": 9.22857142857143e-06, "loss": 2.5022, "step": 323 }, { "epoch": 0.010453488630016274, "grad_norm": 0.361328125, "learning_rate": 9.257142857142856e-06, "loss": 2.5275, "step": 324 }, { "epoch": 0.01048575248381262, "grad_norm": 0.3671875, "learning_rate": 9.285714285714286e-06, "loss": 2.5045, "step": 325 }, { "epoch": 0.010518016337608967, "grad_norm": 0.365234375, "learning_rate": 9.314285714285714e-06, "loss": 2.4918, "step": 326 }, { "epoch": 0.010550280191405312, "grad_norm": 0.375, "learning_rate": 9.342857142857144e-06, "loss": 2.4943, "step": 327 }, { "epoch": 0.01058254404520166, "grad_norm": 0.365234375, "learning_rate": 9.371428571428572e-06, "loss": 2.5024, "step": 328 }, { "epoch": 0.010614807898998005, "grad_norm": 0.373046875, "learning_rate": 9.400000000000001e-06, "loss": 2.4626, "step": 329 }, { "epoch": 0.010647071752794352, "grad_norm": 0.3671875, "learning_rate": 9.428571428571428e-06, "loss": 2.4455, "step": 330 }, { "epoch": 0.0106793356065907, "grad_norm": 0.380859375, "learning_rate": 9.457142857142858e-06, "loss": 2.4099, "step": 331 }, { "epoch": 0.010711599460387045, "grad_norm": 0.357421875, "learning_rate": 9.485714285714286e-06, "loss": 2.4195, "step": 332 }, { "epoch": 0.010743863314183392, "grad_norm": 0.380859375, "learning_rate": 9.514285714285714e-06, "loss": 2.412, "step": 333 }, { "epoch": 0.010776127167979738, "grad_norm": 0.40234375, "learning_rate": 9.542857142857143e-06, "loss": 2.5288, "step": 334 }, { "epoch": 0.010808391021776085, "grad_norm": 0.3984375, "learning_rate": 9.571428571428572e-06, "loss": 2.4755, "step": 335 }, { "epoch": 0.010840654875572432, "grad_norm": 0.38671875, "learning_rate": 9.600000000000001e-06, "loss": 2.4898, "step": 336 }, { "epoch": 0.010872918729368778, "grad_norm": 0.41796875, "learning_rate": 9.628571428571428e-06, "loss": 2.5157, "step": 337 }, { "epoch": 0.010905182583165125, "grad_norm": 0.396484375, "learning_rate": 9.657142857142857e-06, "loss": 2.5148, "step": 338 }, { "epoch": 0.01093744643696147, "grad_norm": 0.41796875, "learning_rate": 9.685714285714285e-06, "loss": 2.4715, "step": 339 }, { "epoch": 0.010969710290757818, "grad_norm": 0.388671875, "learning_rate": 9.714285714285715e-06, "loss": 2.5016, "step": 340 }, { "epoch": 0.011001974144554163, "grad_norm": 0.37109375, "learning_rate": 9.742857142857143e-06, "loss": 2.5222, "step": 341 }, { "epoch": 0.01103423799835051, "grad_norm": 0.375, "learning_rate": 9.771428571428573e-06, "loss": 2.487, "step": 342 }, { "epoch": 0.011066501852146858, "grad_norm": 0.39453125, "learning_rate": 9.8e-06, "loss": 2.4761, "step": 343 }, { "epoch": 0.011098765705943203, "grad_norm": 0.38671875, "learning_rate": 9.828571428571429e-06, "loss": 2.4769, "step": 344 }, { "epoch": 0.01113102955973955, "grad_norm": 0.86328125, "learning_rate": 9.857142857142857e-06, "loss": 2.4701, "step": 345 }, { "epoch": 0.011163293413535896, "grad_norm": 0.78125, "learning_rate": 9.885714285714285e-06, "loss": 2.4094, "step": 346 }, { "epoch": 0.011195557267332243, "grad_norm": 0.62109375, "learning_rate": 9.914285714285715e-06, "loss": 2.3701, "step": 347 }, { "epoch": 0.01122782112112859, "grad_norm": 0.55078125, "learning_rate": 9.942857142857143e-06, "loss": 2.387, "step": 348 }, { "epoch": 0.011260084974924936, "grad_norm": 0.54296875, "learning_rate": 9.971428571428571e-06, "loss": 2.3873, "step": 349 }, { "epoch": 0.011292348828721283, "grad_norm": 0.53515625, "learning_rate": 9.999999999999999e-06, "loss": 2.4106, "step": 350 }, { "epoch": 0.011324612682517629, "grad_norm": 0.48046875, "learning_rate": 1.0028571428571429e-05, "loss": 2.3405, "step": 351 }, { "epoch": 0.011356876536313976, "grad_norm": 0.5390625, "learning_rate": 1.0057142857142857e-05, "loss": 2.3847, "step": 352 }, { "epoch": 0.011389140390110322, "grad_norm": 0.46875, "learning_rate": 1.0085714285714287e-05, "loss": 2.3546, "step": 353 }, { "epoch": 0.011421404243906669, "grad_norm": 0.50390625, "learning_rate": 1.0114285714285715e-05, "loss": 2.362, "step": 354 }, { "epoch": 0.011453668097703016, "grad_norm": 0.466796875, "learning_rate": 1.0142857142857144e-05, "loss": 2.4085, "step": 355 }, { "epoch": 0.011485931951499362, "grad_norm": 0.4375, "learning_rate": 1.0171428571428571e-05, "loss": 2.3797, "step": 356 }, { "epoch": 0.011518195805295709, "grad_norm": 0.40625, "learning_rate": 1.02e-05, "loss": 2.3613, "step": 357 }, { "epoch": 0.011550459659092054, "grad_norm": 0.451171875, "learning_rate": 1.0228571428571429e-05, "loss": 2.376, "step": 358 }, { "epoch": 0.011582723512888402, "grad_norm": 0.470703125, "learning_rate": 1.0257142857142857e-05, "loss": 2.3526, "step": 359 }, { "epoch": 0.011614987366684747, "grad_norm": 0.416015625, "learning_rate": 1.0285714285714286e-05, "loss": 2.3962, "step": 360 }, { "epoch": 0.011647251220481094, "grad_norm": 0.451171875, "learning_rate": 1.0314285714285715e-05, "loss": 2.3808, "step": 361 }, { "epoch": 0.011679515074277442, "grad_norm": 0.6328125, "learning_rate": 1.0342857142857143e-05, "loss": 2.3793, "step": 362 }, { "epoch": 0.011711778928073787, "grad_norm": 0.49609375, "learning_rate": 1.037142857142857e-05, "loss": 2.4117, "step": 363 }, { "epoch": 0.011744042781870134, "grad_norm": 0.478515625, "learning_rate": 1.04e-05, "loss": 2.3849, "step": 364 }, { "epoch": 0.01177630663566648, "grad_norm": 0.431640625, "learning_rate": 1.0428571428571428e-05, "loss": 2.3292, "step": 365 }, { "epoch": 0.011808570489462827, "grad_norm": 0.451171875, "learning_rate": 1.0457142857142858e-05, "loss": 2.3279, "step": 366 }, { "epoch": 0.011840834343259174, "grad_norm": 0.443359375, "learning_rate": 1.0485714285714286e-05, "loss": 2.3787, "step": 367 }, { "epoch": 0.01187309819705552, "grad_norm": 0.44140625, "learning_rate": 1.0514285714285716e-05, "loss": 2.3669, "step": 368 }, { "epoch": 0.011905362050851867, "grad_norm": 0.43359375, "learning_rate": 1.0542857142857142e-05, "loss": 2.36, "step": 369 }, { "epoch": 0.011937625904648213, "grad_norm": 0.44140625, "learning_rate": 1.0571428571428572e-05, "loss": 2.3497, "step": 370 }, { "epoch": 0.01196988975844456, "grad_norm": 0.419921875, "learning_rate": 1.06e-05, "loss": 2.3783, "step": 371 }, { "epoch": 0.012002153612240905, "grad_norm": 0.458984375, "learning_rate": 1.0628571428571428e-05, "loss": 2.3354, "step": 372 }, { "epoch": 0.012034417466037253, "grad_norm": 0.4296875, "learning_rate": 1.0657142857142858e-05, "loss": 2.3533, "step": 373 }, { "epoch": 0.0120666813198336, "grad_norm": 0.40625, "learning_rate": 1.0685714285714286e-05, "loss": 2.3481, "step": 374 }, { "epoch": 0.012098945173629945, "grad_norm": 0.4921875, "learning_rate": 1.0714285714285714e-05, "loss": 2.3802, "step": 375 }, { "epoch": 0.012131209027426293, "grad_norm": 0.431640625, "learning_rate": 1.0742857142857142e-05, "loss": 2.3923, "step": 376 }, { "epoch": 0.012163472881222638, "grad_norm": 0.42578125, "learning_rate": 1.0771428571428572e-05, "loss": 2.3936, "step": 377 }, { "epoch": 0.012195736735018985, "grad_norm": 0.41796875, "learning_rate": 1.08e-05, "loss": 2.329, "step": 378 }, { "epoch": 0.012228000588815333, "grad_norm": 0.458984375, "learning_rate": 1.082857142857143e-05, "loss": 2.3875, "step": 379 }, { "epoch": 0.012260264442611678, "grad_norm": 0.42578125, "learning_rate": 1.0857142857142858e-05, "loss": 2.3311, "step": 380 }, { "epoch": 0.012292528296408025, "grad_norm": 0.408203125, "learning_rate": 1.0885714285714286e-05, "loss": 2.3837, "step": 381 }, { "epoch": 0.01232479215020437, "grad_norm": 0.41015625, "learning_rate": 1.0914285714285714e-05, "loss": 2.3425, "step": 382 }, { "epoch": 0.012357056004000718, "grad_norm": 0.55078125, "learning_rate": 1.0942857142857144e-05, "loss": 2.3795, "step": 383 }, { "epoch": 0.012389319857797064, "grad_norm": 0.470703125, "learning_rate": 1.0971428571428572e-05, "loss": 2.3663, "step": 384 }, { "epoch": 0.01242158371159341, "grad_norm": 0.451171875, "learning_rate": 1.1e-05, "loss": 2.414, "step": 385 }, { "epoch": 0.012453847565389758, "grad_norm": 0.5234375, "learning_rate": 1.102857142857143e-05, "loss": 2.3799, "step": 386 }, { "epoch": 0.012486111419186104, "grad_norm": 0.59765625, "learning_rate": 1.1057142857142858e-05, "loss": 2.4026, "step": 387 }, { "epoch": 0.012518375272982451, "grad_norm": 0.478515625, "learning_rate": 1.1085714285714286e-05, "loss": 2.3857, "step": 388 }, { "epoch": 0.012550639126778796, "grad_norm": 0.546875, "learning_rate": 1.1114285714285714e-05, "loss": 2.367, "step": 389 }, { "epoch": 0.012582902980575144, "grad_norm": 0.5078125, "learning_rate": 1.1142857142857143e-05, "loss": 2.3735, "step": 390 }, { "epoch": 0.012615166834371491, "grad_norm": 0.46484375, "learning_rate": 1.1171428571428571e-05, "loss": 2.402, "step": 391 }, { "epoch": 0.012647430688167836, "grad_norm": 0.498046875, "learning_rate": 1.1200000000000001e-05, "loss": 2.376, "step": 392 }, { "epoch": 0.012679694541964184, "grad_norm": 0.52734375, "learning_rate": 1.122857142857143e-05, "loss": 2.3613, "step": 393 }, { "epoch": 0.01271195839576053, "grad_norm": 0.5390625, "learning_rate": 1.1257142857142857e-05, "loss": 2.3633, "step": 394 }, { "epoch": 0.012744222249556876, "grad_norm": 0.515625, "learning_rate": 1.1285714285714285e-05, "loss": 2.3556, "step": 395 }, { "epoch": 0.012776486103353222, "grad_norm": 0.53125, "learning_rate": 1.1314285714285715e-05, "loss": 2.3831, "step": 396 }, { "epoch": 0.01280874995714957, "grad_norm": 0.4921875, "learning_rate": 1.1342857142857143e-05, "loss": 2.3591, "step": 397 }, { "epoch": 0.012841013810945916, "grad_norm": 0.498046875, "learning_rate": 1.1371428571428571e-05, "loss": 2.3187, "step": 398 }, { "epoch": 0.012873277664742262, "grad_norm": 0.474609375, "learning_rate": 1.1400000000000001e-05, "loss": 2.3405, "step": 399 }, { "epoch": 0.01290554151853861, "grad_norm": 0.451171875, "learning_rate": 1.1428571428571429e-05, "loss": 2.3185, "step": 400 }, { "epoch": 0.012937805372334955, "grad_norm": 0.453125, "learning_rate": 1.1457142857142857e-05, "loss": 2.35, "step": 401 }, { "epoch": 0.012970069226131302, "grad_norm": 0.44140625, "learning_rate": 1.1485714285714285e-05, "loss": 2.3475, "step": 402 }, { "epoch": 0.01300233307992765, "grad_norm": 0.46875, "learning_rate": 1.1514285714285715e-05, "loss": 2.3379, "step": 403 }, { "epoch": 0.013034596933723995, "grad_norm": 0.44921875, "learning_rate": 1.1542857142857143e-05, "loss": 2.3706, "step": 404 }, { "epoch": 0.013066860787520342, "grad_norm": 0.423828125, "learning_rate": 1.1571428571428573e-05, "loss": 2.3696, "step": 405 }, { "epoch": 0.013099124641316687, "grad_norm": 0.44140625, "learning_rate": 1.16e-05, "loss": 2.3212, "step": 406 }, { "epoch": 0.013131388495113035, "grad_norm": 0.44921875, "learning_rate": 1.1628571428571429e-05, "loss": 2.3384, "step": 407 }, { "epoch": 0.01316365234890938, "grad_norm": 0.4453125, "learning_rate": 1.1657142857142857e-05, "loss": 2.3385, "step": 408 }, { "epoch": 0.013195916202705727, "grad_norm": 0.482421875, "learning_rate": 1.1685714285714287e-05, "loss": 2.3389, "step": 409 }, { "epoch": 0.013228180056502075, "grad_norm": 0.4296875, "learning_rate": 1.1714285714285715e-05, "loss": 2.3323, "step": 410 }, { "epoch": 0.01326044391029842, "grad_norm": 0.408203125, "learning_rate": 1.1742857142857143e-05, "loss": 2.366, "step": 411 }, { "epoch": 0.013292707764094767, "grad_norm": 0.40234375, "learning_rate": 1.1771428571428572e-05, "loss": 2.3963, "step": 412 }, { "epoch": 0.013324971617891113, "grad_norm": 0.404296875, "learning_rate": 1.1799999999999999e-05, "loss": 2.3959, "step": 413 }, { "epoch": 0.01335723547168746, "grad_norm": 0.396484375, "learning_rate": 1.1828571428571429e-05, "loss": 2.3917, "step": 414 }, { "epoch": 0.013389499325483806, "grad_norm": 0.3828125, "learning_rate": 1.1857142857142857e-05, "loss": 2.3737, "step": 415 }, { "epoch": 0.013421763179280153, "grad_norm": 0.40234375, "learning_rate": 1.1885714285714286e-05, "loss": 2.3852, "step": 416 }, { "epoch": 0.0134540270330765, "grad_norm": 0.4609375, "learning_rate": 1.1914285714285714e-05, "loss": 2.3498, "step": 417 }, { "epoch": 0.013486290886872846, "grad_norm": 0.490234375, "learning_rate": 1.1942857142857144e-05, "loss": 2.3384, "step": 418 }, { "epoch": 0.013518554740669193, "grad_norm": 0.423828125, "learning_rate": 1.1971428571428572e-05, "loss": 2.3326, "step": 419 }, { "epoch": 0.013550818594465538, "grad_norm": 0.51953125, "learning_rate": 1.2e-05, "loss": 2.3399, "step": 420 }, { "epoch": 0.013583082448261886, "grad_norm": 0.59375, "learning_rate": 1.2028571428571428e-05, "loss": 2.3478, "step": 421 }, { "epoch": 0.013615346302058233, "grad_norm": 0.40625, "learning_rate": 1.2057142857142858e-05, "loss": 2.3502, "step": 422 }, { "epoch": 0.013647610155854578, "grad_norm": 0.5, "learning_rate": 1.2085714285714286e-05, "loss": 2.3464, "step": 423 }, { "epoch": 0.013679874009650926, "grad_norm": 0.431640625, "learning_rate": 1.2114285714285714e-05, "loss": 2.3418, "step": 424 }, { "epoch": 0.013712137863447271, "grad_norm": 0.416015625, "learning_rate": 1.2142857142857144e-05, "loss": 2.3589, "step": 425 }, { "epoch": 0.013744401717243619, "grad_norm": 0.53515625, "learning_rate": 1.217142857142857e-05, "loss": 2.296, "step": 426 }, { "epoch": 0.013776665571039964, "grad_norm": 0.53125, "learning_rate": 1.22e-05, "loss": 2.3323, "step": 427 }, { "epoch": 0.013808929424836311, "grad_norm": 0.435546875, "learning_rate": 1.2228571428571428e-05, "loss": 2.3442, "step": 428 }, { "epoch": 0.013841193278632659, "grad_norm": 0.5546875, "learning_rate": 1.2257142857142858e-05, "loss": 2.3266, "step": 429 }, { "epoch": 0.013873457132429004, "grad_norm": 0.45703125, "learning_rate": 1.2285714285714286e-05, "loss": 2.3926, "step": 430 }, { "epoch": 0.013905720986225351, "grad_norm": 0.474609375, "learning_rate": 1.2314285714285716e-05, "loss": 2.3172, "step": 431 }, { "epoch": 0.013937984840021697, "grad_norm": 0.42578125, "learning_rate": 1.2342857142857144e-05, "loss": 2.297, "step": 432 }, { "epoch": 0.013970248693818044, "grad_norm": 0.451171875, "learning_rate": 1.2371428571428572e-05, "loss": 2.2935, "step": 433 }, { "epoch": 0.014002512547614391, "grad_norm": 0.42578125, "learning_rate": 1.24e-05, "loss": 2.3401, "step": 434 }, { "epoch": 0.014034776401410737, "grad_norm": 0.423828125, "learning_rate": 1.242857142857143e-05, "loss": 2.3346, "step": 435 }, { "epoch": 0.014067040255207084, "grad_norm": 0.443359375, "learning_rate": 1.2457142857142858e-05, "loss": 2.2772, "step": 436 }, { "epoch": 0.01409930410900343, "grad_norm": 0.453125, "learning_rate": 1.2485714285714286e-05, "loss": 2.3521, "step": 437 }, { "epoch": 0.014131567962799777, "grad_norm": 0.44140625, "learning_rate": 1.2514285714285715e-05, "loss": 2.3453, "step": 438 }, { "epoch": 0.014163831816596122, "grad_norm": 0.451171875, "learning_rate": 1.2542857142857142e-05, "loss": 2.3371, "step": 439 }, { "epoch": 0.01419609567039247, "grad_norm": 0.447265625, "learning_rate": 1.2571428571428572e-05, "loss": 2.3421, "step": 440 }, { "epoch": 0.014228359524188817, "grad_norm": 0.41796875, "learning_rate": 1.26e-05, "loss": 2.3001, "step": 441 }, { "epoch": 0.014260623377985162, "grad_norm": 0.404296875, "learning_rate": 1.262857142857143e-05, "loss": 2.3002, "step": 442 }, { "epoch": 0.01429288723178151, "grad_norm": 0.53125, "learning_rate": 1.2657142857142857e-05, "loss": 2.3187, "step": 443 }, { "epoch": 0.014325151085577855, "grad_norm": 0.431640625, "learning_rate": 1.2685714285714287e-05, "loss": 2.312, "step": 444 }, { "epoch": 0.014357414939374202, "grad_norm": 0.427734375, "learning_rate": 1.2714285714285714e-05, "loss": 2.35, "step": 445 }, { "epoch": 0.01438967879317055, "grad_norm": 0.4375, "learning_rate": 1.2742857142857143e-05, "loss": 2.3415, "step": 446 }, { "epoch": 0.014421942646966895, "grad_norm": 0.48046875, "learning_rate": 1.2771428571428571e-05, "loss": 2.3139, "step": 447 }, { "epoch": 0.014454206500763242, "grad_norm": 0.462890625, "learning_rate": 1.2800000000000001e-05, "loss": 2.3073, "step": 448 }, { "epoch": 0.014486470354559588, "grad_norm": 0.439453125, "learning_rate": 1.282857142857143e-05, "loss": 2.3355, "step": 449 }, { "epoch": 0.014518734208355935, "grad_norm": 0.53125, "learning_rate": 1.2857142857142857e-05, "loss": 2.3285, "step": 450 }, { "epoch": 0.01455099806215228, "grad_norm": 0.5078125, "learning_rate": 1.2885714285714287e-05, "loss": 2.3461, "step": 451 }, { "epoch": 0.014583261915948628, "grad_norm": 0.5, "learning_rate": 1.2914285714285713e-05, "loss": 2.3343, "step": 452 }, { "epoch": 0.014615525769744975, "grad_norm": 0.48828125, "learning_rate": 1.2942857142857143e-05, "loss": 2.3505, "step": 453 }, { "epoch": 0.01464778962354132, "grad_norm": 0.48046875, "learning_rate": 1.2971428571428571e-05, "loss": 2.3221, "step": 454 }, { "epoch": 0.014680053477337668, "grad_norm": 0.458984375, "learning_rate": 1.3000000000000001e-05, "loss": 2.3381, "step": 455 }, { "epoch": 0.014712317331134013, "grad_norm": 0.44921875, "learning_rate": 1.3028571428571429e-05, "loss": 2.3431, "step": 456 }, { "epoch": 0.01474458118493036, "grad_norm": 0.458984375, "learning_rate": 1.3057142857142859e-05, "loss": 2.3416, "step": 457 }, { "epoch": 0.014776845038726708, "grad_norm": 0.408203125, "learning_rate": 1.3085714285714285e-05, "loss": 2.3585, "step": 458 }, { "epoch": 0.014809108892523053, "grad_norm": 0.4375, "learning_rate": 1.3114285714285715e-05, "loss": 2.3489, "step": 459 }, { "epoch": 0.0148413727463194, "grad_norm": 0.423828125, "learning_rate": 1.3142857142857143e-05, "loss": 2.3484, "step": 460 }, { "epoch": 0.014873636600115746, "grad_norm": 0.421875, "learning_rate": 1.3171428571428571e-05, "loss": 2.3472, "step": 461 }, { "epoch": 0.014905900453912093, "grad_norm": 0.412109375, "learning_rate": 1.32e-05, "loss": 2.3439, "step": 462 }, { "epoch": 0.014938164307708439, "grad_norm": 0.44140625, "learning_rate": 1.3228571428571429e-05, "loss": 2.2903, "step": 463 }, { "epoch": 0.014970428161504786, "grad_norm": 0.64453125, "learning_rate": 1.3257142857142858e-05, "loss": 2.359, "step": 464 }, { "epoch": 0.015002692015301133, "grad_norm": 0.416015625, "learning_rate": 1.3285714285714285e-05, "loss": 2.3293, "step": 465 }, { "epoch": 0.015034955869097479, "grad_norm": 0.49609375, "learning_rate": 1.3314285714285715e-05, "loss": 2.3006, "step": 466 }, { "epoch": 0.015067219722893826, "grad_norm": 0.49609375, "learning_rate": 1.3342857142857143e-05, "loss": 2.3327, "step": 467 }, { "epoch": 0.015099483576690172, "grad_norm": 0.41796875, "learning_rate": 1.3371428571428572e-05, "loss": 2.3029, "step": 468 }, { "epoch": 0.015131747430486519, "grad_norm": 0.421875, "learning_rate": 1.34e-05, "loss": 2.3243, "step": 469 }, { "epoch": 0.015164011284282864, "grad_norm": 0.4296875, "learning_rate": 1.342857142857143e-05, "loss": 2.2706, "step": 470 }, { "epoch": 0.015196275138079212, "grad_norm": 0.419921875, "learning_rate": 1.3457142857142857e-05, "loss": 2.2941, "step": 471 }, { "epoch": 0.015228538991875559, "grad_norm": 0.43359375, "learning_rate": 1.3485714285714286e-05, "loss": 2.292, "step": 472 }, { "epoch": 0.015260802845671904, "grad_norm": 0.44921875, "learning_rate": 1.3514285714285714e-05, "loss": 2.3265, "step": 473 }, { "epoch": 0.015293066699468252, "grad_norm": 0.44140625, "learning_rate": 1.3542857142857142e-05, "loss": 2.3298, "step": 474 }, { "epoch": 0.015325330553264597, "grad_norm": 0.408203125, "learning_rate": 1.3571428571428572e-05, "loss": 2.3025, "step": 475 }, { "epoch": 0.015357594407060944, "grad_norm": 0.43359375, "learning_rate": 1.36e-05, "loss": 2.3032, "step": 476 }, { "epoch": 0.015389858260857292, "grad_norm": 0.396484375, "learning_rate": 1.3628571428571428e-05, "loss": 2.2714, "step": 477 }, { "epoch": 0.015422122114653637, "grad_norm": 0.474609375, "learning_rate": 1.3657142857142856e-05, "loss": 2.295, "step": 478 }, { "epoch": 0.015454385968449984, "grad_norm": 0.458984375, "learning_rate": 1.3685714285714286e-05, "loss": 2.3181, "step": 479 }, { "epoch": 0.01548664982224633, "grad_norm": 0.416015625, "learning_rate": 1.3714285714285714e-05, "loss": 2.3314, "step": 480 }, { "epoch": 0.015518913676042677, "grad_norm": 0.416015625, "learning_rate": 1.3742857142857144e-05, "loss": 2.2635, "step": 481 }, { "epoch": 0.015551177529839023, "grad_norm": 0.427734375, "learning_rate": 1.3771428571428572e-05, "loss": 2.3176, "step": 482 }, { "epoch": 0.01558344138363537, "grad_norm": 0.396484375, "learning_rate": 1.3800000000000002e-05, "loss": 2.3371, "step": 483 }, { "epoch": 0.015615705237431717, "grad_norm": 0.404296875, "learning_rate": 1.3828571428571428e-05, "loss": 2.2759, "step": 484 }, { "epoch": 0.015647969091228064, "grad_norm": 0.421875, "learning_rate": 1.3857142857142858e-05, "loss": 2.2986, "step": 485 }, { "epoch": 0.015680232945024408, "grad_norm": 0.390625, "learning_rate": 1.3885714285714286e-05, "loss": 2.2857, "step": 486 }, { "epoch": 0.015712496798820755, "grad_norm": 0.42578125, "learning_rate": 1.3914285714285714e-05, "loss": 2.3045, "step": 487 }, { "epoch": 0.015744760652617103, "grad_norm": 0.40625, "learning_rate": 1.3942857142857144e-05, "loss": 2.3209, "step": 488 }, { "epoch": 0.01577702450641345, "grad_norm": 0.3828125, "learning_rate": 1.3971428571428572e-05, "loss": 2.3313, "step": 489 }, { "epoch": 0.015809288360209797, "grad_norm": 0.42578125, "learning_rate": 1.4e-05, "loss": 2.3264, "step": 490 }, { "epoch": 0.01584155221400614, "grad_norm": 0.400390625, "learning_rate": 1.4028571428571428e-05, "loss": 2.2853, "step": 491 }, { "epoch": 0.015873816067802488, "grad_norm": 0.419921875, "learning_rate": 1.4057142857142858e-05, "loss": 2.3629, "step": 492 }, { "epoch": 0.015906079921598835, "grad_norm": 0.5, "learning_rate": 1.4085714285714286e-05, "loss": 2.3289, "step": 493 }, { "epoch": 0.015938343775395183, "grad_norm": 0.4296875, "learning_rate": 1.4114285714285715e-05, "loss": 2.3789, "step": 494 }, { "epoch": 0.01597060762919153, "grad_norm": 0.416015625, "learning_rate": 1.4142857142857143e-05, "loss": 2.3513, "step": 495 }, { "epoch": 0.016002871482987874, "grad_norm": 0.46484375, "learning_rate": 1.4171428571428573e-05, "loss": 2.3208, "step": 496 }, { "epoch": 0.01603513533678422, "grad_norm": 0.43359375, "learning_rate": 1.42e-05, "loss": 2.4006, "step": 497 }, { "epoch": 0.016067399190580568, "grad_norm": 0.396484375, "learning_rate": 1.422857142857143e-05, "loss": 2.312, "step": 498 }, { "epoch": 0.016099663044376916, "grad_norm": 0.412109375, "learning_rate": 1.4257142857142857e-05, "loss": 2.3686, "step": 499 }, { "epoch": 0.01613192689817326, "grad_norm": 0.404296875, "learning_rate": 1.4285714285714285e-05, "loss": 2.3704, "step": 500 }, { "epoch": 0.016164190751969607, "grad_norm": 0.419921875, "learning_rate": 1.4314285714285715e-05, "loss": 2.3557, "step": 501 }, { "epoch": 0.016196454605765954, "grad_norm": 0.404296875, "learning_rate": 1.4342857142857143e-05, "loss": 2.317, "step": 502 }, { "epoch": 0.0162287184595623, "grad_norm": 0.4140625, "learning_rate": 1.4371428571428571e-05, "loss": 2.3833, "step": 503 }, { "epoch": 0.01626098231335865, "grad_norm": 0.41015625, "learning_rate": 1.44e-05, "loss": 2.3655, "step": 504 }, { "epoch": 0.016293246167154992, "grad_norm": 0.400390625, "learning_rate": 1.4428571428571429e-05, "loss": 2.3325, "step": 505 }, { "epoch": 0.01632551002095134, "grad_norm": 0.416015625, "learning_rate": 1.4457142857142857e-05, "loss": 2.3717, "step": 506 }, { "epoch": 0.016357773874747687, "grad_norm": 0.4140625, "learning_rate": 1.4485714285714287e-05, "loss": 2.3296, "step": 507 }, { "epoch": 0.016390037728544034, "grad_norm": 0.39453125, "learning_rate": 1.4514285714285715e-05, "loss": 2.3643, "step": 508 }, { "epoch": 0.01642230158234038, "grad_norm": 0.388671875, "learning_rate": 1.4542857142857143e-05, "loss": 2.337, "step": 509 }, { "epoch": 0.016454565436136725, "grad_norm": 0.404296875, "learning_rate": 1.4571428571428571e-05, "loss": 2.3354, "step": 510 }, { "epoch": 0.016486829289933072, "grad_norm": 0.396484375, "learning_rate": 1.46e-05, "loss": 2.368, "step": 511 }, { "epoch": 0.01651909314372942, "grad_norm": 0.392578125, "learning_rate": 1.4628571428571429e-05, "loss": 2.3133, "step": 512 }, { "epoch": 0.016551356997525767, "grad_norm": 0.400390625, "learning_rate": 1.4657142857142857e-05, "loss": 2.3448, "step": 513 }, { "epoch": 0.016583620851322114, "grad_norm": 0.41015625, "learning_rate": 1.4685714285714287e-05, "loss": 2.3821, "step": 514 }, { "epoch": 0.016615884705118458, "grad_norm": 0.3984375, "learning_rate": 1.4714285714285715e-05, "loss": 2.3767, "step": 515 }, { "epoch": 0.016648148558914805, "grad_norm": 0.40625, "learning_rate": 1.4742857142857143e-05, "loss": 2.3362, "step": 516 }, { "epoch": 0.016680412412711152, "grad_norm": 0.421875, "learning_rate": 1.4771428571428571e-05, "loss": 2.3467, "step": 517 }, { "epoch": 0.0167126762665075, "grad_norm": 0.408203125, "learning_rate": 1.48e-05, "loss": 2.3343, "step": 518 }, { "epoch": 0.016744940120303847, "grad_norm": 0.3828125, "learning_rate": 1.4828571428571429e-05, "loss": 2.3509, "step": 519 }, { "epoch": 0.01677720397410019, "grad_norm": 0.43359375, "learning_rate": 1.4857142857142858e-05, "loss": 2.3466, "step": 520 }, { "epoch": 0.016809467827896538, "grad_norm": 0.4453125, "learning_rate": 1.4885714285714286e-05, "loss": 2.3457, "step": 521 }, { "epoch": 0.016841731681692885, "grad_norm": 0.392578125, "learning_rate": 1.4914285714285715e-05, "loss": 2.3387, "step": 522 }, { "epoch": 0.016873995535489232, "grad_norm": 0.44921875, "learning_rate": 1.4942857142857143e-05, "loss": 2.3303, "step": 523 }, { "epoch": 0.016906259389285576, "grad_norm": 0.46875, "learning_rate": 1.4971428571428572e-05, "loss": 2.3297, "step": 524 }, { "epoch": 0.016938523243081923, "grad_norm": 0.421875, "learning_rate": 1.5e-05, "loss": 2.3536, "step": 525 }, { "epoch": 0.01697078709687827, "grad_norm": 0.41796875, "learning_rate": 1.5028571428571428e-05, "loss": 2.3439, "step": 526 }, { "epoch": 0.017003050950674618, "grad_norm": 0.42578125, "learning_rate": 1.5057142857142856e-05, "loss": 2.3446, "step": 527 }, { "epoch": 0.017035314804470965, "grad_norm": 0.408203125, "learning_rate": 1.5085714285714288e-05, "loss": 2.3568, "step": 528 }, { "epoch": 0.01706757865826731, "grad_norm": 0.4296875, "learning_rate": 1.5114285714285714e-05, "loss": 2.3456, "step": 529 }, { "epoch": 0.017099842512063656, "grad_norm": 0.431640625, "learning_rate": 1.5142857142857142e-05, "loss": 2.3657, "step": 530 }, { "epoch": 0.017132106365860003, "grad_norm": 0.4140625, "learning_rate": 1.517142857142857e-05, "loss": 2.3624, "step": 531 }, { "epoch": 0.01716437021965635, "grad_norm": 0.44921875, "learning_rate": 1.5200000000000002e-05, "loss": 2.3519, "step": 532 }, { "epoch": 0.017196634073452698, "grad_norm": 0.4375, "learning_rate": 1.522857142857143e-05, "loss": 2.3275, "step": 533 }, { "epoch": 0.01722889792724904, "grad_norm": 0.421875, "learning_rate": 1.5257142857142858e-05, "loss": 2.3624, "step": 534 }, { "epoch": 0.01726116178104539, "grad_norm": 0.462890625, "learning_rate": 1.5285714285714286e-05, "loss": 2.3129, "step": 535 }, { "epoch": 0.017293425634841736, "grad_norm": 0.443359375, "learning_rate": 1.5314285714285716e-05, "loss": 2.3324, "step": 536 }, { "epoch": 0.017325689488638083, "grad_norm": 0.421875, "learning_rate": 1.5342857142857146e-05, "loss": 2.319, "step": 537 }, { "epoch": 0.01735795334243443, "grad_norm": 0.478515625, "learning_rate": 1.5371428571428572e-05, "loss": 2.3393, "step": 538 }, { "epoch": 0.017390217196230774, "grad_norm": 0.390625, "learning_rate": 1.5399999999999998e-05, "loss": 2.3566, "step": 539 }, { "epoch": 0.01742248105002712, "grad_norm": 0.470703125, "learning_rate": 1.5428571428571428e-05, "loss": 2.3404, "step": 540 }, { "epoch": 0.01745474490382347, "grad_norm": 0.421875, "learning_rate": 1.5457142857142858e-05, "loss": 2.3152, "step": 541 }, { "epoch": 0.017487008757619816, "grad_norm": 0.41015625, "learning_rate": 1.5485714285714287e-05, "loss": 2.3577, "step": 542 }, { "epoch": 0.01751927261141616, "grad_norm": 0.451171875, "learning_rate": 1.5514285714285714e-05, "loss": 2.3393, "step": 543 }, { "epoch": 0.017551536465212507, "grad_norm": 0.416015625, "learning_rate": 1.5542857142857144e-05, "loss": 2.3597, "step": 544 }, { "epoch": 0.017583800319008854, "grad_norm": 0.455078125, "learning_rate": 1.5571428571428573e-05, "loss": 2.3031, "step": 545 }, { "epoch": 0.0176160641728052, "grad_norm": 0.4140625, "learning_rate": 1.56e-05, "loss": 2.341, "step": 546 }, { "epoch": 0.01764832802660155, "grad_norm": 0.412109375, "learning_rate": 1.562857142857143e-05, "loss": 2.3547, "step": 547 }, { "epoch": 0.017680591880397892, "grad_norm": 0.4375, "learning_rate": 1.5657142857142856e-05, "loss": 2.3354, "step": 548 }, { "epoch": 0.01771285573419424, "grad_norm": 0.421875, "learning_rate": 1.568571428571429e-05, "loss": 2.3356, "step": 549 }, { "epoch": 0.017745119587990587, "grad_norm": 0.44140625, "learning_rate": 1.5714285714285715e-05, "loss": 2.3148, "step": 550 }, { "epoch": 0.017777383441786934, "grad_norm": 0.439453125, "learning_rate": 1.574285714285714e-05, "loss": 2.329, "step": 551 }, { "epoch": 0.01780964729558328, "grad_norm": 0.40234375, "learning_rate": 1.577142857142857e-05, "loss": 2.3175, "step": 552 }, { "epoch": 0.017841911149379625, "grad_norm": 0.484375, "learning_rate": 1.5799999999999998e-05, "loss": 2.3519, "step": 553 }, { "epoch": 0.017874175003175972, "grad_norm": 0.431640625, "learning_rate": 1.582857142857143e-05, "loss": 2.3009, "step": 554 }, { "epoch": 0.01790643885697232, "grad_norm": 0.44140625, "learning_rate": 1.5857142857142857e-05, "loss": 2.3103, "step": 555 }, { "epoch": 0.017938702710768667, "grad_norm": 0.412109375, "learning_rate": 1.5885714285714287e-05, "loss": 2.3494, "step": 556 }, { "epoch": 0.017970966564565014, "grad_norm": 0.404296875, "learning_rate": 1.5914285714285713e-05, "loss": 2.3512, "step": 557 }, { "epoch": 0.018003230418361358, "grad_norm": 0.43359375, "learning_rate": 1.5942857142857143e-05, "loss": 2.3265, "step": 558 }, { "epoch": 0.018035494272157705, "grad_norm": 0.404296875, "learning_rate": 1.5971428571428573e-05, "loss": 2.3448, "step": 559 }, { "epoch": 0.018067758125954052, "grad_norm": 0.408203125, "learning_rate": 1.6e-05, "loss": 2.3281, "step": 560 }, { "epoch": 0.0181000219797504, "grad_norm": 0.435546875, "learning_rate": 1.602857142857143e-05, "loss": 2.2871, "step": 561 }, { "epoch": 0.018132285833546747, "grad_norm": 0.427734375, "learning_rate": 1.605714285714286e-05, "loss": 2.3451, "step": 562 }, { "epoch": 0.01816454968734309, "grad_norm": 0.416015625, "learning_rate": 1.608571428571429e-05, "loss": 2.3082, "step": 563 }, { "epoch": 0.018196813541139438, "grad_norm": 0.439453125, "learning_rate": 1.6114285714285715e-05, "loss": 2.3462, "step": 564 }, { "epoch": 0.018229077394935785, "grad_norm": 0.408203125, "learning_rate": 1.614285714285714e-05, "loss": 2.3214, "step": 565 }, { "epoch": 0.018261341248732132, "grad_norm": 0.4296875, "learning_rate": 1.617142857142857e-05, "loss": 2.2856, "step": 566 }, { "epoch": 0.018293605102528476, "grad_norm": 0.400390625, "learning_rate": 1.62e-05, "loss": 2.3332, "step": 567 }, { "epoch": 0.018325868956324824, "grad_norm": 0.421875, "learning_rate": 1.622857142857143e-05, "loss": 2.3514, "step": 568 }, { "epoch": 0.01835813281012117, "grad_norm": 0.443359375, "learning_rate": 1.6257142857142857e-05, "loss": 2.3179, "step": 569 }, { "epoch": 0.018390396663917518, "grad_norm": 0.466796875, "learning_rate": 1.6285714285714283e-05, "loss": 2.3284, "step": 570 }, { "epoch": 0.018422660517713865, "grad_norm": 0.4296875, "learning_rate": 1.6314285714285716e-05, "loss": 2.3018, "step": 571 }, { "epoch": 0.01845492437151021, "grad_norm": 0.453125, "learning_rate": 1.6342857142857143e-05, "loss": 2.3237, "step": 572 }, { "epoch": 0.018487188225306556, "grad_norm": 0.453125, "learning_rate": 1.6371428571428572e-05, "loss": 2.3401, "step": 573 }, { "epoch": 0.018519452079102904, "grad_norm": 0.44921875, "learning_rate": 1.64e-05, "loss": 2.3297, "step": 574 }, { "epoch": 0.01855171593289925, "grad_norm": 0.435546875, "learning_rate": 1.6428571428571432e-05, "loss": 2.3169, "step": 575 }, { "epoch": 0.018583979786695598, "grad_norm": 0.453125, "learning_rate": 1.645714285714286e-05, "loss": 2.3273, "step": 576 }, { "epoch": 0.018616243640491942, "grad_norm": 0.478515625, "learning_rate": 1.6485714285714285e-05, "loss": 2.3499, "step": 577 }, { "epoch": 0.01864850749428829, "grad_norm": 0.44140625, "learning_rate": 1.6514285714285714e-05, "loss": 2.3494, "step": 578 }, { "epoch": 0.018680771348084636, "grad_norm": 0.4609375, "learning_rate": 1.654285714285714e-05, "loss": 2.347, "step": 579 }, { "epoch": 0.018713035201880984, "grad_norm": 0.412109375, "learning_rate": 1.6571428571428574e-05, "loss": 2.3333, "step": 580 }, { "epoch": 0.01874529905567733, "grad_norm": 0.4453125, "learning_rate": 1.66e-05, "loss": 2.3078, "step": 581 }, { "epoch": 0.018777562909473675, "grad_norm": 0.400390625, "learning_rate": 1.662857142857143e-05, "loss": 2.335, "step": 582 }, { "epoch": 0.018809826763270022, "grad_norm": 0.52734375, "learning_rate": 1.6657142857142856e-05, "loss": 2.3466, "step": 583 }, { "epoch": 0.01884209061706637, "grad_norm": 0.5078125, "learning_rate": 1.6685714285714286e-05, "loss": 2.2473, "step": 584 }, { "epoch": 0.018874354470862716, "grad_norm": 0.490234375, "learning_rate": 1.6714285714285716e-05, "loss": 2.2933, "step": 585 }, { "epoch": 0.01890661832465906, "grad_norm": 0.5546875, "learning_rate": 1.6742857142857142e-05, "loss": 2.2505, "step": 586 }, { "epoch": 0.018938882178455407, "grad_norm": 0.50390625, "learning_rate": 1.6771428571428572e-05, "loss": 2.277, "step": 587 }, { "epoch": 0.018971146032251755, "grad_norm": 0.48046875, "learning_rate": 1.6800000000000002e-05, "loss": 2.313, "step": 588 }, { "epoch": 0.019003409886048102, "grad_norm": 0.5234375, "learning_rate": 1.6828571428571428e-05, "loss": 2.3035, "step": 589 }, { "epoch": 0.01903567373984445, "grad_norm": 0.486328125, "learning_rate": 1.6857142857142858e-05, "loss": 2.2857, "step": 590 }, { "epoch": 0.019067937593640793, "grad_norm": 0.47265625, "learning_rate": 1.6885714285714284e-05, "loss": 2.2886, "step": 591 }, { "epoch": 0.01910020144743714, "grad_norm": 0.46875, "learning_rate": 1.6914285714285714e-05, "loss": 2.2182, "step": 592 }, { "epoch": 0.019132465301233487, "grad_norm": 0.53515625, "learning_rate": 1.6942857142857144e-05, "loss": 2.2688, "step": 593 }, { "epoch": 0.019164729155029835, "grad_norm": 0.54296875, "learning_rate": 1.6971428571428574e-05, "loss": 2.257, "step": 594 }, { "epoch": 0.019196993008826182, "grad_norm": 0.48828125, "learning_rate": 1.7e-05, "loss": 2.2716, "step": 595 }, { "epoch": 0.019229256862622526, "grad_norm": 0.453125, "learning_rate": 1.7028571428571426e-05, "loss": 2.2616, "step": 596 }, { "epoch": 0.019261520716418873, "grad_norm": 0.482421875, "learning_rate": 1.705714285714286e-05, "loss": 2.2512, "step": 597 }, { "epoch": 0.01929378457021522, "grad_norm": 0.46875, "learning_rate": 1.7085714285714286e-05, "loss": 2.2494, "step": 598 }, { "epoch": 0.019326048424011567, "grad_norm": 0.46875, "learning_rate": 1.7114285714285715e-05, "loss": 2.234, "step": 599 }, { "epoch": 0.019358312277807915, "grad_norm": 0.52734375, "learning_rate": 1.7142857142857142e-05, "loss": 2.2539, "step": 600 }, { "epoch": 0.01939057613160426, "grad_norm": 0.44921875, "learning_rate": 1.7171428571428575e-05, "loss": 2.2619, "step": 601 }, { "epoch": 0.019422839985400606, "grad_norm": 0.451171875, "learning_rate": 1.72e-05, "loss": 2.2927, "step": 602 }, { "epoch": 0.019455103839196953, "grad_norm": 0.486328125, "learning_rate": 1.7228571428571428e-05, "loss": 2.288, "step": 603 }, { "epoch": 0.0194873676929933, "grad_norm": 0.4453125, "learning_rate": 1.7257142857142857e-05, "loss": 2.2456, "step": 604 }, { "epoch": 0.019519631546789647, "grad_norm": 0.48046875, "learning_rate": 1.7285714285714284e-05, "loss": 2.2671, "step": 605 }, { "epoch": 0.01955189540058599, "grad_norm": 0.50390625, "learning_rate": 1.7314285714285717e-05, "loss": 2.2704, "step": 606 }, { "epoch": 0.01958415925438234, "grad_norm": 0.439453125, "learning_rate": 1.7342857142857143e-05, "loss": 2.2912, "step": 607 }, { "epoch": 0.019616423108178686, "grad_norm": 0.4609375, "learning_rate": 1.7371428571428573e-05, "loss": 2.2696, "step": 608 }, { "epoch": 0.019648686961975033, "grad_norm": 0.50390625, "learning_rate": 1.74e-05, "loss": 2.2488, "step": 609 }, { "epoch": 0.019680950815771377, "grad_norm": 0.51171875, "learning_rate": 1.742857142857143e-05, "loss": 2.2812, "step": 610 }, { "epoch": 0.019713214669567724, "grad_norm": 0.419921875, "learning_rate": 1.745714285714286e-05, "loss": 2.3053, "step": 611 }, { "epoch": 0.01974547852336407, "grad_norm": 0.51953125, "learning_rate": 1.7485714285714285e-05, "loss": 2.2731, "step": 612 }, { "epoch": 0.01977774237716042, "grad_norm": 0.4140625, "learning_rate": 1.7514285714285715e-05, "loss": 2.2617, "step": 613 }, { "epoch": 0.019810006230956766, "grad_norm": 0.44921875, "learning_rate": 1.7542857142857145e-05, "loss": 2.28, "step": 614 }, { "epoch": 0.01984227008475311, "grad_norm": 0.42578125, "learning_rate": 1.757142857142857e-05, "loss": 2.2363, "step": 615 }, { "epoch": 0.019874533938549457, "grad_norm": 0.3984375, "learning_rate": 1.76e-05, "loss": 2.2555, "step": 616 }, { "epoch": 0.019906797792345804, "grad_norm": 0.462890625, "learning_rate": 1.7628571428571427e-05, "loss": 2.3056, "step": 617 }, { "epoch": 0.01993906164614215, "grad_norm": 0.3984375, "learning_rate": 1.7657142857142857e-05, "loss": 2.2803, "step": 618 }, { "epoch": 0.0199713254999385, "grad_norm": 0.466796875, "learning_rate": 1.7685714285714287e-05, "loss": 2.2911, "step": 619 }, { "epoch": 0.020003589353734842, "grad_norm": 0.45703125, "learning_rate": 1.7714285714285717e-05, "loss": 2.276, "step": 620 }, { "epoch": 0.02003585320753119, "grad_norm": 0.443359375, "learning_rate": 1.7742857142857143e-05, "loss": 2.2619, "step": 621 }, { "epoch": 0.020068117061327537, "grad_norm": 0.423828125, "learning_rate": 1.777142857142857e-05, "loss": 2.2401, "step": 622 }, { "epoch": 0.020100380915123884, "grad_norm": 0.5390625, "learning_rate": 1.7800000000000002e-05, "loss": 2.2542, "step": 623 }, { "epoch": 0.02013264476892023, "grad_norm": 0.44140625, "learning_rate": 1.782857142857143e-05, "loss": 2.2705, "step": 624 }, { "epoch": 0.020164908622716575, "grad_norm": 0.466796875, "learning_rate": 1.785714285714286e-05, "loss": 2.2947, "step": 625 }, { "epoch": 0.020197172476512922, "grad_norm": 0.5078125, "learning_rate": 1.7885714285714285e-05, "loss": 2.3132, "step": 626 }, { "epoch": 0.02022943633030927, "grad_norm": 0.5234375, "learning_rate": 1.7914285714285718e-05, "loss": 2.2865, "step": 627 }, { "epoch": 0.020261700184105617, "grad_norm": 0.54296875, "learning_rate": 1.7942857142857144e-05, "loss": 2.256, "step": 628 }, { "epoch": 0.020293964037901964, "grad_norm": 0.51171875, "learning_rate": 1.797142857142857e-05, "loss": 2.2636, "step": 629 }, { "epoch": 0.020326227891698308, "grad_norm": 0.486328125, "learning_rate": 1.8e-05, "loss": 2.2864, "step": 630 }, { "epoch": 0.020358491745494655, "grad_norm": 0.5390625, "learning_rate": 1.8028571428571427e-05, "loss": 2.2563, "step": 631 }, { "epoch": 0.020390755599291002, "grad_norm": 0.69140625, "learning_rate": 1.805714285714286e-05, "loss": 2.3665, "step": 632 }, { "epoch": 0.02042301945308735, "grad_norm": 0.65625, "learning_rate": 1.8085714285714286e-05, "loss": 2.4182, "step": 633 }, { "epoch": 0.020455283306883693, "grad_norm": 0.70703125, "learning_rate": 1.8114285714285713e-05, "loss": 2.3958, "step": 634 }, { "epoch": 0.02048754716068004, "grad_norm": 0.78125, "learning_rate": 1.8142857142857142e-05, "loss": 2.3969, "step": 635 }, { "epoch": 0.020519811014476388, "grad_norm": 0.62890625, "learning_rate": 1.8171428571428572e-05, "loss": 2.3904, "step": 636 }, { "epoch": 0.020552074868272735, "grad_norm": 0.640625, "learning_rate": 1.8200000000000002e-05, "loss": 2.4026, "step": 637 }, { "epoch": 0.020584338722069082, "grad_norm": 0.56640625, "learning_rate": 1.8228571428571428e-05, "loss": 2.4056, "step": 638 }, { "epoch": 0.020616602575865426, "grad_norm": 0.58984375, "learning_rate": 1.8257142857142858e-05, "loss": 2.3978, "step": 639 }, { "epoch": 0.020648866429661773, "grad_norm": 0.50390625, "learning_rate": 1.8285714285714288e-05, "loss": 2.4124, "step": 640 }, { "epoch": 0.02068113028345812, "grad_norm": 0.56640625, "learning_rate": 1.8314285714285714e-05, "loss": 2.4295, "step": 641 }, { "epoch": 0.020713394137254468, "grad_norm": 0.51953125, "learning_rate": 1.8342857142857144e-05, "loss": 2.4044, "step": 642 }, { "epoch": 0.020745657991050815, "grad_norm": 0.515625, "learning_rate": 1.837142857142857e-05, "loss": 2.4052, "step": 643 }, { "epoch": 0.02077792184484716, "grad_norm": 0.5625, "learning_rate": 1.84e-05, "loss": 2.4182, "step": 644 }, { "epoch": 0.020810185698643506, "grad_norm": 0.494140625, "learning_rate": 1.842857142857143e-05, "loss": 2.3992, "step": 645 }, { "epoch": 0.020842449552439853, "grad_norm": 0.466796875, "learning_rate": 1.845714285714286e-05, "loss": 2.3887, "step": 646 }, { "epoch": 0.0208747134062362, "grad_norm": 0.458984375, "learning_rate": 1.8485714285714286e-05, "loss": 2.4225, "step": 647 }, { "epoch": 0.020906977260032548, "grad_norm": 0.4765625, "learning_rate": 1.8514285714285712e-05, "loss": 2.3835, "step": 648 }, { "epoch": 0.02093924111382889, "grad_norm": 0.5078125, "learning_rate": 1.8542857142857145e-05, "loss": 2.4034, "step": 649 }, { "epoch": 0.02097150496762524, "grad_norm": 0.453125, "learning_rate": 1.8571428571428572e-05, "loss": 2.4043, "step": 650 }, { "epoch": 0.021003768821421586, "grad_norm": 0.51953125, "learning_rate": 1.86e-05, "loss": 2.3821, "step": 651 }, { "epoch": 0.021036032675217933, "grad_norm": 0.462890625, "learning_rate": 1.8628571428571428e-05, "loss": 2.3835, "step": 652 }, { "epoch": 0.021068296529014277, "grad_norm": 0.453125, "learning_rate": 1.8657142857142858e-05, "loss": 2.4002, "step": 653 }, { "epoch": 0.021100560382810624, "grad_norm": 0.51171875, "learning_rate": 1.8685714285714287e-05, "loss": 2.3895, "step": 654 }, { "epoch": 0.02113282423660697, "grad_norm": 0.474609375, "learning_rate": 1.8714285714285714e-05, "loss": 2.3684, "step": 655 }, { "epoch": 0.02116508809040332, "grad_norm": 0.482421875, "learning_rate": 1.8742857142857143e-05, "loss": 2.408, "step": 656 }, { "epoch": 0.021197351944199666, "grad_norm": 0.458984375, "learning_rate": 1.877142857142857e-05, "loss": 2.4083, "step": 657 }, { "epoch": 0.02122961579799601, "grad_norm": 0.419921875, "learning_rate": 1.8800000000000003e-05, "loss": 2.365, "step": 658 }, { "epoch": 0.021261879651792357, "grad_norm": 0.4453125, "learning_rate": 1.882857142857143e-05, "loss": 2.4168, "step": 659 }, { "epoch": 0.021294143505588704, "grad_norm": 0.439453125, "learning_rate": 1.8857142857142856e-05, "loss": 2.383, "step": 660 }, { "epoch": 0.02132640735938505, "grad_norm": 0.44140625, "learning_rate": 1.8885714285714285e-05, "loss": 2.3956, "step": 661 }, { "epoch": 0.0213586712131814, "grad_norm": 0.443359375, "learning_rate": 1.8914285714285715e-05, "loss": 2.4089, "step": 662 }, { "epoch": 0.021390935066977743, "grad_norm": 0.470703125, "learning_rate": 1.8942857142857145e-05, "loss": 2.2893, "step": 663 }, { "epoch": 0.02142319892077409, "grad_norm": 0.4375, "learning_rate": 1.897142857142857e-05, "loss": 2.2858, "step": 664 }, { "epoch": 0.021455462774570437, "grad_norm": 0.451171875, "learning_rate": 1.9e-05, "loss": 2.2822, "step": 665 }, { "epoch": 0.021487726628366784, "grad_norm": 0.4765625, "learning_rate": 1.9028571428571427e-05, "loss": 2.2888, "step": 666 }, { "epoch": 0.02151999048216313, "grad_norm": 0.484375, "learning_rate": 1.9057142857142857e-05, "loss": 2.3582, "step": 667 }, { "epoch": 0.021552254335959475, "grad_norm": 0.4765625, "learning_rate": 1.9085714285714287e-05, "loss": 2.4097, "step": 668 }, { "epoch": 0.021584518189755823, "grad_norm": 0.4609375, "learning_rate": 1.9114285714285713e-05, "loss": 2.3999, "step": 669 }, { "epoch": 0.02161678204355217, "grad_norm": 0.59375, "learning_rate": 1.9142857142857143e-05, "loss": 2.3722, "step": 670 }, { "epoch": 0.021649045897348517, "grad_norm": 0.67578125, "learning_rate": 1.9171428571428573e-05, "loss": 2.3917, "step": 671 }, { "epoch": 0.021681309751144864, "grad_norm": 0.66015625, "learning_rate": 1.9200000000000003e-05, "loss": 2.3808, "step": 672 }, { "epoch": 0.021713573604941208, "grad_norm": 0.515625, "learning_rate": 1.922857142857143e-05, "loss": 2.3706, "step": 673 }, { "epoch": 0.021745837458737555, "grad_norm": 0.48828125, "learning_rate": 1.9257142857142855e-05, "loss": 2.387, "step": 674 }, { "epoch": 0.021778101312533903, "grad_norm": 0.62890625, "learning_rate": 1.928571428571429e-05, "loss": 2.3665, "step": 675 }, { "epoch": 0.02181036516633025, "grad_norm": 0.51953125, "learning_rate": 1.9314285714285715e-05, "loss": 2.3546, "step": 676 }, { "epoch": 0.021842629020126594, "grad_norm": 0.45703125, "learning_rate": 1.9342857142857144e-05, "loss": 2.3854, "step": 677 }, { "epoch": 0.02187489287392294, "grad_norm": 0.5390625, "learning_rate": 1.937142857142857e-05, "loss": 2.3803, "step": 678 }, { "epoch": 0.021907156727719288, "grad_norm": 0.44921875, "learning_rate": 1.9399999999999997e-05, "loss": 2.3993, "step": 679 }, { "epoch": 0.021939420581515635, "grad_norm": 0.443359375, "learning_rate": 1.942857142857143e-05, "loss": 2.3907, "step": 680 }, { "epoch": 0.021971684435311983, "grad_norm": 0.4453125, "learning_rate": 1.9457142857142857e-05, "loss": 2.3426, "step": 681 }, { "epoch": 0.022003948289108326, "grad_norm": 0.408203125, "learning_rate": 1.9485714285714286e-05, "loss": 2.372, "step": 682 }, { "epoch": 0.022036212142904674, "grad_norm": 0.466796875, "learning_rate": 1.9514285714285713e-05, "loss": 2.3556, "step": 683 }, { "epoch": 0.02206847599670102, "grad_norm": 0.42578125, "learning_rate": 1.9542857142857146e-05, "loss": 2.3845, "step": 684 }, { "epoch": 0.022100739850497368, "grad_norm": 0.52734375, "learning_rate": 1.9571428571428572e-05, "loss": 2.3806, "step": 685 }, { "epoch": 0.022133003704293715, "grad_norm": 0.55859375, "learning_rate": 1.96e-05, "loss": 2.3951, "step": 686 }, { "epoch": 0.02216526755809006, "grad_norm": 0.52734375, "learning_rate": 1.962857142857143e-05, "loss": 2.3685, "step": 687 }, { "epoch": 0.022197531411886406, "grad_norm": 0.470703125, "learning_rate": 1.9657142857142858e-05, "loss": 2.3719, "step": 688 }, { "epoch": 0.022229795265682754, "grad_norm": 0.66015625, "learning_rate": 1.9685714285714288e-05, "loss": 2.3862, "step": 689 }, { "epoch": 0.0222620591194791, "grad_norm": 0.55859375, "learning_rate": 1.9714285714285714e-05, "loss": 2.3988, "step": 690 }, { "epoch": 0.022294322973275448, "grad_norm": 0.486328125, "learning_rate": 1.9742857142857144e-05, "loss": 2.3975, "step": 691 }, { "epoch": 0.022326586827071792, "grad_norm": 0.48046875, "learning_rate": 1.977142857142857e-05, "loss": 2.3921, "step": 692 }, { "epoch": 0.02235885068086814, "grad_norm": 0.5078125, "learning_rate": 1.98e-05, "loss": 2.3784, "step": 693 }, { "epoch": 0.022391114534664486, "grad_norm": 0.470703125, "learning_rate": 1.982857142857143e-05, "loss": 2.3538, "step": 694 }, { "epoch": 0.022423378388460834, "grad_norm": 0.46875, "learning_rate": 1.9857142857142856e-05, "loss": 2.3505, "step": 695 }, { "epoch": 0.02245564224225718, "grad_norm": 0.515625, "learning_rate": 1.9885714285714286e-05, "loss": 2.3779, "step": 696 }, { "epoch": 0.022487906096053525, "grad_norm": 0.50390625, "learning_rate": 1.9914285714285716e-05, "loss": 2.378, "step": 697 }, { "epoch": 0.022520169949849872, "grad_norm": 0.47265625, "learning_rate": 1.9942857142857142e-05, "loss": 2.3743, "step": 698 }, { "epoch": 0.02255243380364622, "grad_norm": 0.578125, "learning_rate": 1.9971428571428572e-05, "loss": 2.4005, "step": 699 }, { "epoch": 0.022584697657442566, "grad_norm": 0.431640625, "learning_rate": 1.9999999999999998e-05, "loss": 2.3796, "step": 700 }, { "epoch": 0.02261696151123891, "grad_norm": 0.53125, "learning_rate": 2.002857142857143e-05, "loss": 2.3512, "step": 701 }, { "epoch": 0.022649225365035257, "grad_norm": 0.5234375, "learning_rate": 2.0057142857142858e-05, "loss": 2.3714, "step": 702 }, { "epoch": 0.022681489218831605, "grad_norm": 0.45703125, "learning_rate": 2.0085714285714287e-05, "loss": 2.3867, "step": 703 }, { "epoch": 0.022713753072627952, "grad_norm": 0.546875, "learning_rate": 2.0114285714285714e-05, "loss": 2.3667, "step": 704 }, { "epoch": 0.0227460169264243, "grad_norm": 0.42578125, "learning_rate": 2.014285714285714e-05, "loss": 2.3507, "step": 705 }, { "epoch": 0.022778280780220643, "grad_norm": 0.546875, "learning_rate": 2.0171428571428573e-05, "loss": 2.3797, "step": 706 }, { "epoch": 0.02281054463401699, "grad_norm": 0.4921875, "learning_rate": 2.02e-05, "loss": 2.352, "step": 707 }, { "epoch": 0.022842808487813338, "grad_norm": 0.56640625, "learning_rate": 2.022857142857143e-05, "loss": 2.373, "step": 708 }, { "epoch": 0.022875072341609685, "grad_norm": 0.50390625, "learning_rate": 2.0257142857142856e-05, "loss": 2.3896, "step": 709 }, { "epoch": 0.022907336195406032, "grad_norm": 0.458984375, "learning_rate": 2.028571428571429e-05, "loss": 2.3731, "step": 710 }, { "epoch": 0.022939600049202376, "grad_norm": 0.498046875, "learning_rate": 2.0314285714285715e-05, "loss": 2.371, "step": 711 }, { "epoch": 0.022971863902998723, "grad_norm": 0.478515625, "learning_rate": 2.0342857142857142e-05, "loss": 2.3468, "step": 712 }, { "epoch": 0.02300412775679507, "grad_norm": 0.515625, "learning_rate": 2.037142857142857e-05, "loss": 2.3779, "step": 713 }, { "epoch": 0.023036391610591418, "grad_norm": 0.48828125, "learning_rate": 2.04e-05, "loss": 2.363, "step": 714 }, { "epoch": 0.023068655464387765, "grad_norm": 0.5, "learning_rate": 2.042857142857143e-05, "loss": 2.3546, "step": 715 }, { "epoch": 0.02310091931818411, "grad_norm": 0.458984375, "learning_rate": 2.0457142857142857e-05, "loss": 2.3629, "step": 716 }, { "epoch": 0.023133183171980456, "grad_norm": 0.50390625, "learning_rate": 2.0485714285714287e-05, "loss": 2.3275, "step": 717 }, { "epoch": 0.023165447025776803, "grad_norm": 0.498046875, "learning_rate": 2.0514285714285713e-05, "loss": 2.3781, "step": 718 }, { "epoch": 0.02319771087957315, "grad_norm": 0.515625, "learning_rate": 2.0542857142857143e-05, "loss": 2.3861, "step": 719 }, { "epoch": 0.023229974733369494, "grad_norm": 0.546875, "learning_rate": 2.0571428571428573e-05, "loss": 2.3074, "step": 720 }, { "epoch": 0.02326223858716584, "grad_norm": 0.46484375, "learning_rate": 2.06e-05, "loss": 2.3735, "step": 721 }, { "epoch": 0.02329450244096219, "grad_norm": 0.482421875, "learning_rate": 2.062857142857143e-05, "loss": 2.3552, "step": 722 }, { "epoch": 0.023326766294758536, "grad_norm": 0.40625, "learning_rate": 2.065714285714286e-05, "loss": 2.3862, "step": 723 }, { "epoch": 0.023359030148554883, "grad_norm": 0.50390625, "learning_rate": 2.0685714285714285e-05, "loss": 2.3186, "step": 724 }, { "epoch": 0.023391294002351227, "grad_norm": 0.490234375, "learning_rate": 2.0714285714285715e-05, "loss": 2.3885, "step": 725 }, { "epoch": 0.023423557856147574, "grad_norm": 0.4609375, "learning_rate": 2.074285714285714e-05, "loss": 2.3775, "step": 726 }, { "epoch": 0.02345582170994392, "grad_norm": 0.443359375, "learning_rate": 2.0771428571428574e-05, "loss": 2.3737, "step": 727 }, { "epoch": 0.02348808556374027, "grad_norm": 0.447265625, "learning_rate": 2.08e-05, "loss": 2.3185, "step": 728 }, { "epoch": 0.023520349417536616, "grad_norm": 0.5078125, "learning_rate": 2.082857142857143e-05, "loss": 2.3513, "step": 729 }, { "epoch": 0.02355261327133296, "grad_norm": 0.486328125, "learning_rate": 2.0857142857142857e-05, "loss": 2.3564, "step": 730 }, { "epoch": 0.023584877125129307, "grad_norm": 0.52734375, "learning_rate": 2.0885714285714283e-05, "loss": 2.3673, "step": 731 }, { "epoch": 0.023617140978925654, "grad_norm": 0.494140625, "learning_rate": 2.0914285714285716e-05, "loss": 2.3744, "step": 732 }, { "epoch": 0.023649404832722, "grad_norm": 0.447265625, "learning_rate": 2.0942857142857143e-05, "loss": 2.3649, "step": 733 }, { "epoch": 0.02368166868651835, "grad_norm": 0.486328125, "learning_rate": 2.0971428571428572e-05, "loss": 2.3519, "step": 734 }, { "epoch": 0.023713932540314692, "grad_norm": 0.45703125, "learning_rate": 2.1e-05, "loss": 2.3425, "step": 735 }, { "epoch": 0.02374619639411104, "grad_norm": 0.50390625, "learning_rate": 2.1028571428571432e-05, "loss": 2.3825, "step": 736 }, { "epoch": 0.023778460247907387, "grad_norm": 0.466796875, "learning_rate": 2.105714285714286e-05, "loss": 2.361, "step": 737 }, { "epoch": 0.023810724101703734, "grad_norm": 0.48046875, "learning_rate": 2.1085714285714285e-05, "loss": 2.3757, "step": 738 }, { "epoch": 0.02384298795550008, "grad_norm": 0.44140625, "learning_rate": 2.1114285714285714e-05, "loss": 2.3571, "step": 739 }, { "epoch": 0.023875251809296425, "grad_norm": 0.4453125, "learning_rate": 2.1142857142857144e-05, "loss": 2.3487, "step": 740 }, { "epoch": 0.023907515663092772, "grad_norm": 0.484375, "learning_rate": 2.1171428571428574e-05, "loss": 2.3738, "step": 741 }, { "epoch": 0.02393977951688912, "grad_norm": 0.466796875, "learning_rate": 2.12e-05, "loss": 2.3722, "step": 742 }, { "epoch": 0.023972043370685467, "grad_norm": 0.50390625, "learning_rate": 2.1228571428571427e-05, "loss": 2.3685, "step": 743 }, { "epoch": 0.02400430722448181, "grad_norm": 0.494140625, "learning_rate": 2.1257142857142856e-05, "loss": 2.3472, "step": 744 }, { "epoch": 0.024036571078278158, "grad_norm": 0.482421875, "learning_rate": 2.1285714285714286e-05, "loss": 2.3584, "step": 745 }, { "epoch": 0.024068834932074505, "grad_norm": 0.5390625, "learning_rate": 2.1314285714285716e-05, "loss": 2.3801, "step": 746 }, { "epoch": 0.024101098785870852, "grad_norm": 0.59375, "learning_rate": 2.1342857142857142e-05, "loss": 2.2902, "step": 747 }, { "epoch": 0.0241333626396672, "grad_norm": 0.78125, "learning_rate": 2.1371428571428572e-05, "loss": 2.2781, "step": 748 }, { "epoch": 0.024165626493463543, "grad_norm": 1.1796875, "learning_rate": 2.1400000000000002e-05, "loss": 2.3094, "step": 749 }, { "epoch": 0.02419789034725989, "grad_norm": 0.78125, "learning_rate": 2.1428571428571428e-05, "loss": 2.2936, "step": 750 }, { "epoch": 0.024230154201056238, "grad_norm": 0.51953125, "learning_rate": 2.1457142857142858e-05, "loss": 2.2466, "step": 751 }, { "epoch": 0.024262418054852585, "grad_norm": 0.609375, "learning_rate": 2.1485714285714284e-05, "loss": 2.2798, "step": 752 }, { "epoch": 0.024294681908648932, "grad_norm": 0.51953125, "learning_rate": 2.1514285714285717e-05, "loss": 2.2731, "step": 753 }, { "epoch": 0.024326945762445276, "grad_norm": 0.5, "learning_rate": 2.1542857142857144e-05, "loss": 2.2606, "step": 754 }, { "epoch": 0.024359209616241623, "grad_norm": 0.546875, "learning_rate": 2.1571428571428574e-05, "loss": 2.2814, "step": 755 }, { "epoch": 0.02439147347003797, "grad_norm": 0.515625, "learning_rate": 2.16e-05, "loss": 2.2862, "step": 756 }, { "epoch": 0.024423737323834318, "grad_norm": 0.5078125, "learning_rate": 2.1628571428571426e-05, "loss": 2.2809, "step": 757 }, { "epoch": 0.024456001177630665, "grad_norm": 0.44140625, "learning_rate": 2.165714285714286e-05, "loss": 2.2895, "step": 758 }, { "epoch": 0.02448826503142701, "grad_norm": 0.5, "learning_rate": 2.1685714285714286e-05, "loss": 2.2937, "step": 759 }, { "epoch": 0.024520528885223356, "grad_norm": 0.4453125, "learning_rate": 2.1714285714285715e-05, "loss": 2.2967, "step": 760 }, { "epoch": 0.024552792739019703, "grad_norm": 0.474609375, "learning_rate": 2.1742857142857142e-05, "loss": 2.2694, "step": 761 }, { "epoch": 0.02458505659281605, "grad_norm": 0.466796875, "learning_rate": 2.177142857142857e-05, "loss": 2.2688, "step": 762 }, { "epoch": 0.024617320446612394, "grad_norm": 0.462890625, "learning_rate": 2.18e-05, "loss": 2.2634, "step": 763 }, { "epoch": 0.02464958430040874, "grad_norm": 0.44140625, "learning_rate": 2.1828571428571428e-05, "loss": 2.2382, "step": 764 }, { "epoch": 0.02468184815420509, "grad_norm": 0.49609375, "learning_rate": 2.1857142857142857e-05, "loss": 2.2823, "step": 765 }, { "epoch": 0.024714112008001436, "grad_norm": 0.49609375, "learning_rate": 2.1885714285714287e-05, "loss": 2.2775, "step": 766 }, { "epoch": 0.024746375861797783, "grad_norm": 0.451171875, "learning_rate": 2.1914285714285717e-05, "loss": 2.2616, "step": 767 }, { "epoch": 0.024778639715594127, "grad_norm": 0.46484375, "learning_rate": 2.1942857142857143e-05, "loss": 2.285, "step": 768 }, { "epoch": 0.024810903569390474, "grad_norm": 0.51171875, "learning_rate": 2.197142857142857e-05, "loss": 2.291, "step": 769 }, { "epoch": 0.02484316742318682, "grad_norm": 0.42578125, "learning_rate": 2.2e-05, "loss": 2.2748, "step": 770 }, { "epoch": 0.02487543127698317, "grad_norm": 0.515625, "learning_rate": 2.202857142857143e-05, "loss": 2.2983, "step": 771 }, { "epoch": 0.024907695130779516, "grad_norm": 0.44921875, "learning_rate": 2.205714285714286e-05, "loss": 2.215, "step": 772 }, { "epoch": 0.02493995898457586, "grad_norm": 0.466796875, "learning_rate": 2.2085714285714285e-05, "loss": 2.2779, "step": 773 }, { "epoch": 0.024972222838372207, "grad_norm": 0.5, "learning_rate": 2.2114285714285715e-05, "loss": 2.2768, "step": 774 }, { "epoch": 0.025004486692168554, "grad_norm": 0.46875, "learning_rate": 2.2142857142857145e-05, "loss": 2.2642, "step": 775 }, { "epoch": 0.025036750545964902, "grad_norm": 0.45703125, "learning_rate": 2.217142857142857e-05, "loss": 2.2837, "step": 776 }, { "epoch": 0.02506901439976125, "grad_norm": 0.5625, "learning_rate": 2.22e-05, "loss": 2.2852, "step": 777 }, { "epoch": 0.025101278253557593, "grad_norm": 0.546875, "learning_rate": 2.2228571428571427e-05, "loss": 2.2871, "step": 778 }, { "epoch": 0.02513354210735394, "grad_norm": 0.5625, "learning_rate": 2.225714285714286e-05, "loss": 2.2929, "step": 779 }, { "epoch": 0.025165805961150287, "grad_norm": 0.462890625, "learning_rate": 2.2285714285714287e-05, "loss": 2.2839, "step": 780 }, { "epoch": 0.025198069814946635, "grad_norm": 0.54296875, "learning_rate": 2.2314285714285717e-05, "loss": 2.282, "step": 781 }, { "epoch": 0.025230333668742982, "grad_norm": 0.63671875, "learning_rate": 2.2342857142857143e-05, "loss": 2.2629, "step": 782 }, { "epoch": 0.025262597522539326, "grad_norm": 0.65234375, "learning_rate": 2.237142857142857e-05, "loss": 2.2616, "step": 783 }, { "epoch": 0.025294861376335673, "grad_norm": 0.6015625, "learning_rate": 2.2400000000000002e-05, "loss": 2.2213, "step": 784 }, { "epoch": 0.02532712523013202, "grad_norm": 0.4765625, "learning_rate": 2.242857142857143e-05, "loss": 2.289, "step": 785 }, { "epoch": 0.025359389083928367, "grad_norm": 0.515625, "learning_rate": 2.245714285714286e-05, "loss": 2.2378, "step": 786 }, { "epoch": 0.02539165293772471, "grad_norm": 0.63671875, "learning_rate": 2.2485714285714285e-05, "loss": 2.2929, "step": 787 }, { "epoch": 0.02542391679152106, "grad_norm": 0.6328125, "learning_rate": 2.2514285714285715e-05, "loss": 2.2862, "step": 788 }, { "epoch": 0.025456180645317406, "grad_norm": 0.50390625, "learning_rate": 2.2542857142857144e-05, "loss": 2.2899, "step": 789 }, { "epoch": 0.025488444499113753, "grad_norm": 0.58203125, "learning_rate": 2.257142857142857e-05, "loss": 2.2485, "step": 790 }, { "epoch": 0.0255207083529101, "grad_norm": 0.59375, "learning_rate": 2.26e-05, "loss": 2.2619, "step": 791 }, { "epoch": 0.025552972206706444, "grad_norm": 0.45703125, "learning_rate": 2.262857142857143e-05, "loss": 2.2577, "step": 792 }, { "epoch": 0.02558523606050279, "grad_norm": 0.6171875, "learning_rate": 2.265714285714286e-05, "loss": 2.2508, "step": 793 }, { "epoch": 0.02561749991429914, "grad_norm": 0.5859375, "learning_rate": 2.2685714285714286e-05, "loss": 2.2935, "step": 794 }, { "epoch": 0.025649763768095486, "grad_norm": 0.56640625, "learning_rate": 2.2714285714285713e-05, "loss": 2.2744, "step": 795 }, { "epoch": 0.025682027621891833, "grad_norm": 0.478515625, "learning_rate": 2.2742857142857142e-05, "loss": 2.2753, "step": 796 }, { "epoch": 0.025714291475688177, "grad_norm": 0.67578125, "learning_rate": 2.2771428571428572e-05, "loss": 2.2759, "step": 797 }, { "epoch": 0.025746555329484524, "grad_norm": 0.58984375, "learning_rate": 2.2800000000000002e-05, "loss": 2.3125, "step": 798 }, { "epoch": 0.02577881918328087, "grad_norm": 0.578125, "learning_rate": 2.2828571428571428e-05, "loss": 2.2687, "step": 799 }, { "epoch": 0.02581108303707722, "grad_norm": 0.65625, "learning_rate": 2.2857142857142858e-05, "loss": 2.2784, "step": 800 }, { "epoch": 0.025843346890873566, "grad_norm": 0.57421875, "learning_rate": 2.2885714285714288e-05, "loss": 2.2867, "step": 801 }, { "epoch": 0.02587561074466991, "grad_norm": 0.515625, "learning_rate": 2.2914285714285714e-05, "loss": 2.281, "step": 802 }, { "epoch": 0.025907874598466257, "grad_norm": 0.5234375, "learning_rate": 2.2942857142857144e-05, "loss": 2.2618, "step": 803 }, { "epoch": 0.025940138452262604, "grad_norm": 0.50390625, "learning_rate": 2.297142857142857e-05, "loss": 2.2421, "step": 804 }, { "epoch": 0.02597240230605895, "grad_norm": 0.59375, "learning_rate": 2.3000000000000003e-05, "loss": 2.2666, "step": 805 }, { "epoch": 0.0260046661598553, "grad_norm": 0.5, "learning_rate": 2.302857142857143e-05, "loss": 2.1983, "step": 806 }, { "epoch": 0.026036930013651642, "grad_norm": 0.5390625, "learning_rate": 2.3057142857142856e-05, "loss": 2.2935, "step": 807 }, { "epoch": 0.02606919386744799, "grad_norm": 0.62890625, "learning_rate": 2.3085714285714286e-05, "loss": 2.2829, "step": 808 }, { "epoch": 0.026101457721244337, "grad_norm": 0.48828125, "learning_rate": 2.3114285714285712e-05, "loss": 2.2482, "step": 809 }, { "epoch": 0.026133721575040684, "grad_norm": 0.52734375, "learning_rate": 2.3142857142857145e-05, "loss": 2.2601, "step": 810 }, { "epoch": 0.026165985428837028, "grad_norm": 0.546875, "learning_rate": 2.3171428571428572e-05, "loss": 2.3086, "step": 811 }, { "epoch": 0.026198249282633375, "grad_norm": 0.5078125, "learning_rate": 2.32e-05, "loss": 2.2589, "step": 812 }, { "epoch": 0.026230513136429722, "grad_norm": 0.484375, "learning_rate": 2.3228571428571428e-05, "loss": 2.2656, "step": 813 }, { "epoch": 0.02626277699022607, "grad_norm": 0.494140625, "learning_rate": 2.3257142857142858e-05, "loss": 2.2615, "step": 814 }, { "epoch": 0.026295040844022417, "grad_norm": 0.55078125, "learning_rate": 2.3285714285714287e-05, "loss": 2.2465, "step": 815 }, { "epoch": 0.02632730469781876, "grad_norm": 0.490234375, "learning_rate": 2.3314285714285714e-05, "loss": 2.2686, "step": 816 }, { "epoch": 0.026359568551615108, "grad_norm": 0.470703125, "learning_rate": 2.3342857142857143e-05, "loss": 2.2802, "step": 817 }, { "epoch": 0.026391832405411455, "grad_norm": 0.5390625, "learning_rate": 2.3371428571428573e-05, "loss": 2.2879, "step": 818 }, { "epoch": 0.026424096259207802, "grad_norm": 0.447265625, "learning_rate": 2.3400000000000003e-05, "loss": 2.2623, "step": 819 }, { "epoch": 0.02645636011300415, "grad_norm": 0.46484375, "learning_rate": 2.342857142857143e-05, "loss": 2.2389, "step": 820 }, { "epoch": 0.026488623966800493, "grad_norm": 0.55078125, "learning_rate": 2.3457142857142856e-05, "loss": 2.2515, "step": 821 }, { "epoch": 0.02652088782059684, "grad_norm": 0.50390625, "learning_rate": 2.3485714285714285e-05, "loss": 2.2615, "step": 822 }, { "epoch": 0.026553151674393188, "grad_norm": 0.494140625, "learning_rate": 2.3514285714285715e-05, "loss": 2.3072, "step": 823 }, { "epoch": 0.026585415528189535, "grad_norm": 0.43359375, "learning_rate": 2.3542857142857145e-05, "loss": 2.2648, "step": 824 }, { "epoch": 0.026617679381985882, "grad_norm": 0.48046875, "learning_rate": 2.357142857142857e-05, "loss": 2.2692, "step": 825 }, { "epoch": 0.026649943235782226, "grad_norm": 0.51171875, "learning_rate": 2.3599999999999998e-05, "loss": 2.2813, "step": 826 }, { "epoch": 0.026682207089578573, "grad_norm": 0.443359375, "learning_rate": 2.362857142857143e-05, "loss": 2.2313, "step": 827 }, { "epoch": 0.02671447094337492, "grad_norm": 0.46875, "learning_rate": 2.3657142857142857e-05, "loss": 2.261, "step": 828 }, { "epoch": 0.026746734797171268, "grad_norm": 0.474609375, "learning_rate": 2.3685714285714287e-05, "loss": 2.2058, "step": 829 }, { "epoch": 0.02677899865096761, "grad_norm": 0.439453125, "learning_rate": 2.3714285714285713e-05, "loss": 2.2706, "step": 830 }, { "epoch": 0.02681126250476396, "grad_norm": 0.5078125, "learning_rate": 2.3742857142857146e-05, "loss": 2.2576, "step": 831 }, { "epoch": 0.026843526358560306, "grad_norm": 0.474609375, "learning_rate": 2.3771428571428573e-05, "loss": 2.2371, "step": 832 }, { "epoch": 0.026875790212356653, "grad_norm": 0.60546875, "learning_rate": 2.38e-05, "loss": 2.2964, "step": 833 }, { "epoch": 0.026908054066153, "grad_norm": 0.90625, "learning_rate": 2.382857142857143e-05, "loss": 2.3076, "step": 834 }, { "epoch": 0.026940317919949344, "grad_norm": 1.6171875, "learning_rate": 2.3857142857142855e-05, "loss": 2.3692, "step": 835 }, { "epoch": 0.02697258177374569, "grad_norm": 0.71875, "learning_rate": 2.388571428571429e-05, "loss": 2.348, "step": 836 }, { "epoch": 0.02700484562754204, "grad_norm": 1.296875, "learning_rate": 2.3914285714285715e-05, "loss": 2.3283, "step": 837 }, { "epoch": 0.027037109481338386, "grad_norm": 0.87109375, "learning_rate": 2.3942857142857144e-05, "loss": 2.3522, "step": 838 }, { "epoch": 0.027069373335134733, "grad_norm": 0.83984375, "learning_rate": 2.397142857142857e-05, "loss": 2.3297, "step": 839 }, { "epoch": 0.027101637188931077, "grad_norm": 0.8984375, "learning_rate": 2.4e-05, "loss": 2.3339, "step": 840 }, { "epoch": 0.027133901042727424, "grad_norm": 0.65625, "learning_rate": 2.402857142857143e-05, "loss": 2.3503, "step": 841 }, { "epoch": 0.02716616489652377, "grad_norm": 0.703125, "learning_rate": 2.4057142857142857e-05, "loss": 2.3299, "step": 842 }, { "epoch": 0.02719842875032012, "grad_norm": 0.625, "learning_rate": 2.4085714285714286e-05, "loss": 2.3344, "step": 843 }, { "epoch": 0.027230692604116466, "grad_norm": 0.67578125, "learning_rate": 2.4114285714285716e-05, "loss": 2.2977, "step": 844 }, { "epoch": 0.02726295645791281, "grad_norm": 0.5546875, "learning_rate": 2.4142857142857146e-05, "loss": 2.2672, "step": 845 }, { "epoch": 0.027295220311709157, "grad_norm": 0.6875, "learning_rate": 2.4171428571428572e-05, "loss": 2.349, "step": 846 }, { "epoch": 0.027327484165505504, "grad_norm": 0.486328125, "learning_rate": 2.42e-05, "loss": 2.3262, "step": 847 }, { "epoch": 0.02735974801930185, "grad_norm": 0.640625, "learning_rate": 2.422857142857143e-05, "loss": 2.3374, "step": 848 }, { "epoch": 0.0273920118730982, "grad_norm": 0.48046875, "learning_rate": 2.4257142857142858e-05, "loss": 2.3627, "step": 849 }, { "epoch": 0.027424275726894543, "grad_norm": 0.5859375, "learning_rate": 2.4285714285714288e-05, "loss": 2.3215, "step": 850 }, { "epoch": 0.02745653958069089, "grad_norm": 0.5078125, "learning_rate": 2.4314285714285714e-05, "loss": 2.3137, "step": 851 }, { "epoch": 0.027488803434487237, "grad_norm": 0.56640625, "learning_rate": 2.434285714285714e-05, "loss": 2.3275, "step": 852 }, { "epoch": 0.027521067288283584, "grad_norm": 0.5625, "learning_rate": 2.4371428571428574e-05, "loss": 2.3859, "step": 853 }, { "epoch": 0.027553331142079928, "grad_norm": 0.5390625, "learning_rate": 2.44e-05, "loss": 2.3425, "step": 854 }, { "epoch": 0.027585594995876275, "grad_norm": 0.498046875, "learning_rate": 2.442857142857143e-05, "loss": 2.3495, "step": 855 }, { "epoch": 0.027617858849672623, "grad_norm": 0.6015625, "learning_rate": 2.4457142857142856e-05, "loss": 2.3303, "step": 856 }, { "epoch": 0.02765012270346897, "grad_norm": 0.5, "learning_rate": 2.448571428571429e-05, "loss": 2.3281, "step": 857 }, { "epoch": 0.027682386557265317, "grad_norm": 0.55078125, "learning_rate": 2.4514285714285716e-05, "loss": 2.2902, "step": 858 }, { "epoch": 0.02771465041106166, "grad_norm": 0.51953125, "learning_rate": 2.4542857142857142e-05, "loss": 2.3282, "step": 859 }, { "epoch": 0.027746914264858008, "grad_norm": 0.447265625, "learning_rate": 2.4571428571428572e-05, "loss": 2.3251, "step": 860 }, { "epoch": 0.027779178118654355, "grad_norm": 0.5390625, "learning_rate": 2.4599999999999998e-05, "loss": 2.3405, "step": 861 }, { "epoch": 0.027811441972450703, "grad_norm": 0.470703125, "learning_rate": 2.462857142857143e-05, "loss": 2.3369, "step": 862 }, { "epoch": 0.02784370582624705, "grad_norm": 0.490234375, "learning_rate": 2.4657142857142858e-05, "loss": 2.3266, "step": 863 }, { "epoch": 0.027875969680043394, "grad_norm": 0.5234375, "learning_rate": 2.4685714285714288e-05, "loss": 2.3143, "step": 864 }, { "epoch": 0.02790823353383974, "grad_norm": 0.498046875, "learning_rate": 2.4714285714285714e-05, "loss": 2.3056, "step": 865 }, { "epoch": 0.027940497387636088, "grad_norm": 0.5078125, "learning_rate": 2.4742857142857144e-05, "loss": 2.33, "step": 866 }, { "epoch": 0.027972761241432435, "grad_norm": 0.451171875, "learning_rate": 2.4771428571428573e-05, "loss": 2.3418, "step": 867 }, { "epoch": 0.028005025095228783, "grad_norm": 0.51171875, "learning_rate": 2.48e-05, "loss": 2.3449, "step": 868 }, { "epoch": 0.028037288949025126, "grad_norm": 0.44921875, "learning_rate": 2.482857142857143e-05, "loss": 2.3106, "step": 869 }, { "epoch": 0.028069552802821474, "grad_norm": 0.5078125, "learning_rate": 2.485714285714286e-05, "loss": 2.3089, "step": 870 }, { "epoch": 0.02810181665661782, "grad_norm": 0.45703125, "learning_rate": 2.4885714285714286e-05, "loss": 2.3298, "step": 871 }, { "epoch": 0.028134080510414168, "grad_norm": 0.462890625, "learning_rate": 2.4914285714285715e-05, "loss": 2.3494, "step": 872 }, { "epoch": 0.028166344364210512, "grad_norm": 0.5703125, "learning_rate": 2.4942857142857142e-05, "loss": 2.3326, "step": 873 }, { "epoch": 0.02819860821800686, "grad_norm": 0.67578125, "learning_rate": 2.497142857142857e-05, "loss": 2.3395, "step": 874 }, { "epoch": 0.028230872071803206, "grad_norm": 0.84765625, "learning_rate": 2.5e-05, "loss": 2.3443, "step": 875 }, { "epoch": 0.028263135925599554, "grad_norm": 1.1796875, "learning_rate": 2.502857142857143e-05, "loss": 2.2981, "step": 876 }, { "epoch": 0.0282953997793959, "grad_norm": 0.83203125, "learning_rate": 2.5057142857142857e-05, "loss": 2.3208, "step": 877 }, { "epoch": 0.028327663633192245, "grad_norm": 0.478515625, "learning_rate": 2.5085714285714284e-05, "loss": 2.3296, "step": 878 }, { "epoch": 0.028359927486988592, "grad_norm": 0.82421875, "learning_rate": 2.5114285714285717e-05, "loss": 2.3377, "step": 879 }, { "epoch": 0.02839219134078494, "grad_norm": 0.75, "learning_rate": 2.5142857142857143e-05, "loss": 2.3246, "step": 880 }, { "epoch": 0.028424455194581286, "grad_norm": 0.4921875, "learning_rate": 2.5171428571428573e-05, "loss": 2.3101, "step": 881 }, { "epoch": 0.028456719048377634, "grad_norm": 0.68359375, "learning_rate": 2.52e-05, "loss": 2.3253, "step": 882 }, { "epoch": 0.028488982902173977, "grad_norm": 0.63671875, "learning_rate": 2.5228571428571432e-05, "loss": 2.3224, "step": 883 }, { "epoch": 0.028521246755970325, "grad_norm": 0.52734375, "learning_rate": 2.525714285714286e-05, "loss": 2.314, "step": 884 }, { "epoch": 0.028553510609766672, "grad_norm": 0.5703125, "learning_rate": 2.5285714285714285e-05, "loss": 2.3329, "step": 885 }, { "epoch": 0.02858577446356302, "grad_norm": 0.4765625, "learning_rate": 2.5314285714285715e-05, "loss": 2.3444, "step": 886 }, { "epoch": 0.028618038317359366, "grad_norm": 0.546875, "learning_rate": 2.534285714285714e-05, "loss": 2.3183, "step": 887 }, { "epoch": 0.02865030217115571, "grad_norm": 0.5859375, "learning_rate": 2.5371428571428574e-05, "loss": 2.2875, "step": 888 }, { "epoch": 0.028682566024952057, "grad_norm": 0.56640625, "learning_rate": 2.54e-05, "loss": 2.3396, "step": 889 }, { "epoch": 0.028714829878748405, "grad_norm": 0.486328125, "learning_rate": 2.5428571428571427e-05, "loss": 2.2935, "step": 890 }, { "epoch": 0.028747093732544752, "grad_norm": 0.51953125, "learning_rate": 2.5457142857142857e-05, "loss": 2.3056, "step": 891 }, { "epoch": 0.0287793575863411, "grad_norm": 0.5625, "learning_rate": 2.5485714285714287e-05, "loss": 2.3304, "step": 892 }, { "epoch": 0.028811621440137443, "grad_norm": 0.55859375, "learning_rate": 2.5514285714285716e-05, "loss": 2.344, "step": 893 }, { "epoch": 0.02884388529393379, "grad_norm": 0.59765625, "learning_rate": 2.5542857142857143e-05, "loss": 2.3314, "step": 894 }, { "epoch": 0.028876149147730137, "grad_norm": 0.70703125, "learning_rate": 2.5571428571428572e-05, "loss": 2.3309, "step": 895 }, { "epoch": 0.028908413001526485, "grad_norm": 0.5546875, "learning_rate": 2.5600000000000002e-05, "loss": 2.3185, "step": 896 }, { "epoch": 0.02894067685532283, "grad_norm": 0.55078125, "learning_rate": 2.562857142857143e-05, "loss": 2.3187, "step": 897 }, { "epoch": 0.028972940709119176, "grad_norm": 0.5703125, "learning_rate": 2.565714285714286e-05, "loss": 2.3316, "step": 898 }, { "epoch": 0.029005204562915523, "grad_norm": 0.51953125, "learning_rate": 2.5685714285714285e-05, "loss": 2.3271, "step": 899 }, { "epoch": 0.02903746841671187, "grad_norm": 0.5234375, "learning_rate": 2.5714285714285714e-05, "loss": 2.3241, "step": 900 }, { "epoch": 0.029069732270508217, "grad_norm": 0.55859375, "learning_rate": 2.5742857142857144e-05, "loss": 2.3184, "step": 901 }, { "epoch": 0.02910199612430456, "grad_norm": 0.5859375, "learning_rate": 2.5771428571428574e-05, "loss": 2.359, "step": 902 }, { "epoch": 0.02913425997810091, "grad_norm": 0.61328125, "learning_rate": 2.58e-05, "loss": 2.3295, "step": 903 }, { "epoch": 0.029166523831897256, "grad_norm": 0.6875, "learning_rate": 2.5828571428571427e-05, "loss": 2.3229, "step": 904 }, { "epoch": 0.029198787685693603, "grad_norm": 0.54296875, "learning_rate": 2.585714285714286e-05, "loss": 2.3409, "step": 905 }, { "epoch": 0.02923105153948995, "grad_norm": 0.58203125, "learning_rate": 2.5885714285714286e-05, "loss": 2.3398, "step": 906 }, { "epoch": 0.029263315393286294, "grad_norm": 0.578125, "learning_rate": 2.5914285714285716e-05, "loss": 2.3082, "step": 907 }, { "epoch": 0.02929557924708264, "grad_norm": 0.5625, "learning_rate": 2.5942857142857142e-05, "loss": 2.3296, "step": 908 }, { "epoch": 0.02932784310087899, "grad_norm": 0.55078125, "learning_rate": 2.5971428571428575e-05, "loss": 2.3398, "step": 909 }, { "epoch": 0.029360106954675336, "grad_norm": 0.5234375, "learning_rate": 2.6000000000000002e-05, "loss": 2.3264, "step": 910 }, { "epoch": 0.029392370808471683, "grad_norm": 0.53515625, "learning_rate": 2.6028571428571428e-05, "loss": 2.2711, "step": 911 }, { "epoch": 0.029424634662268027, "grad_norm": 0.52734375, "learning_rate": 2.6057142857142858e-05, "loss": 2.3342, "step": 912 }, { "epoch": 0.029456898516064374, "grad_norm": 0.5625, "learning_rate": 2.6085714285714284e-05, "loss": 2.31, "step": 913 }, { "epoch": 0.02948916236986072, "grad_norm": 0.515625, "learning_rate": 2.6114285714285717e-05, "loss": 2.3346, "step": 914 }, { "epoch": 0.02952142622365707, "grad_norm": 0.58984375, "learning_rate": 2.6142857142857144e-05, "loss": 2.325, "step": 915 }, { "epoch": 0.029553690077453416, "grad_norm": 0.6171875, "learning_rate": 2.617142857142857e-05, "loss": 2.3166, "step": 916 }, { "epoch": 0.02958595393124976, "grad_norm": 0.494140625, "learning_rate": 2.62e-05, "loss": 2.2774, "step": 917 }, { "epoch": 0.029618217785046107, "grad_norm": 0.5703125, "learning_rate": 2.622857142857143e-05, "loss": 2.3432, "step": 918 }, { "epoch": 0.029650481638842454, "grad_norm": 0.53125, "learning_rate": 2.625714285714286e-05, "loss": 2.333, "step": 919 }, { "epoch": 0.0296827454926388, "grad_norm": 0.53515625, "learning_rate": 2.6285714285714286e-05, "loss": 2.2923, "step": 920 }, { "epoch": 0.029715009346435145, "grad_norm": 0.54296875, "learning_rate": 2.6314285714285715e-05, "loss": 2.2944, "step": 921 }, { "epoch": 0.029747273200231492, "grad_norm": 0.482421875, "learning_rate": 2.6342857142857142e-05, "loss": 2.3112, "step": 922 }, { "epoch": 0.02977953705402784, "grad_norm": 0.578125, "learning_rate": 2.637142857142857e-05, "loss": 2.2934, "step": 923 }, { "epoch": 0.029811800907824187, "grad_norm": 0.6328125, "learning_rate": 2.64e-05, "loss": 2.331, "step": 924 }, { "epoch": 0.029844064761620534, "grad_norm": 0.62109375, "learning_rate": 2.6428571428571428e-05, "loss": 2.3276, "step": 925 }, { "epoch": 0.029876328615416878, "grad_norm": 0.84765625, "learning_rate": 2.6457142857142857e-05, "loss": 2.323, "step": 926 }, { "epoch": 0.029908592469213225, "grad_norm": 1.2734375, "learning_rate": 2.6485714285714287e-05, "loss": 2.2925, "step": 927 }, { "epoch": 0.029940856323009572, "grad_norm": 1.0546875, "learning_rate": 2.6514285714285717e-05, "loss": 2.2962, "step": 928 }, { "epoch": 0.02997312017680592, "grad_norm": 0.51171875, "learning_rate": 2.6542857142857143e-05, "loss": 2.3168, "step": 929 }, { "epoch": 0.030005384030602267, "grad_norm": 0.87109375, "learning_rate": 2.657142857142857e-05, "loss": 2.3346, "step": 930 }, { "epoch": 0.03003764788439861, "grad_norm": 0.81640625, "learning_rate": 2.6600000000000003e-05, "loss": 2.3303, "step": 931 }, { "epoch": 0.030069911738194958, "grad_norm": 0.55078125, "learning_rate": 2.662857142857143e-05, "loss": 2.3484, "step": 932 }, { "epoch": 0.030102175591991305, "grad_norm": 0.73828125, "learning_rate": 2.665714285714286e-05, "loss": 2.3511, "step": 933 }, { "epoch": 0.030134439445787652, "grad_norm": 0.609375, "learning_rate": 2.6685714285714285e-05, "loss": 2.3369, "step": 934 }, { "epoch": 0.030166703299584, "grad_norm": 0.5234375, "learning_rate": 2.671428571428571e-05, "loss": 2.3177, "step": 935 }, { "epoch": 0.030198967153380343, "grad_norm": 0.60546875, "learning_rate": 2.6742857142857145e-05, "loss": 2.2975, "step": 936 }, { "epoch": 0.03023123100717669, "grad_norm": 0.55078125, "learning_rate": 2.677142857142857e-05, "loss": 2.3079, "step": 937 }, { "epoch": 0.030263494860973038, "grad_norm": 0.45703125, "learning_rate": 2.68e-05, "loss": 2.3135, "step": 938 }, { "epoch": 0.030295758714769385, "grad_norm": 0.54296875, "learning_rate": 2.6828571428571427e-05, "loss": 2.3163, "step": 939 }, { "epoch": 0.03032802256856573, "grad_norm": 0.5, "learning_rate": 2.685714285714286e-05, "loss": 2.3422, "step": 940 }, { "epoch": 0.030360286422362076, "grad_norm": 0.578125, "learning_rate": 2.6885714285714287e-05, "loss": 2.307, "step": 941 }, { "epoch": 0.030392550276158423, "grad_norm": 0.58203125, "learning_rate": 2.6914285714285713e-05, "loss": 2.3204, "step": 942 }, { "epoch": 0.03042481412995477, "grad_norm": 0.50390625, "learning_rate": 2.6942857142857143e-05, "loss": 2.3309, "step": 943 }, { "epoch": 0.030457077983751118, "grad_norm": 0.59375, "learning_rate": 2.6971428571428573e-05, "loss": 2.3007, "step": 944 }, { "epoch": 0.03048934183754746, "grad_norm": 0.59765625, "learning_rate": 2.7000000000000002e-05, "loss": 2.2995, "step": 945 }, { "epoch": 0.03052160569134381, "grad_norm": 0.47265625, "learning_rate": 2.702857142857143e-05, "loss": 2.2891, "step": 946 }, { "epoch": 0.030553869545140156, "grad_norm": 0.54296875, "learning_rate": 2.705714285714286e-05, "loss": 2.3265, "step": 947 }, { "epoch": 0.030586133398936503, "grad_norm": 0.51953125, "learning_rate": 2.7085714285714285e-05, "loss": 2.3017, "step": 948 }, { "epoch": 0.03061839725273285, "grad_norm": 0.515625, "learning_rate": 2.7114285714285715e-05, "loss": 2.3021, "step": 949 }, { "epoch": 0.030650661106529194, "grad_norm": 0.54296875, "learning_rate": 2.7142857142857144e-05, "loss": 2.26, "step": 950 }, { "epoch": 0.03068292496032554, "grad_norm": 0.5546875, "learning_rate": 2.717142857142857e-05, "loss": 2.3064, "step": 951 }, { "epoch": 0.03071518881412189, "grad_norm": 0.546875, "learning_rate": 2.72e-05, "loss": 2.3507, "step": 952 }, { "epoch": 0.030747452667918236, "grad_norm": 0.55859375, "learning_rate": 2.722857142857143e-05, "loss": 2.3094, "step": 953 }, { "epoch": 0.030779716521714583, "grad_norm": 0.52734375, "learning_rate": 2.7257142857142857e-05, "loss": 2.3196, "step": 954 }, { "epoch": 0.030811980375510927, "grad_norm": 0.53125, "learning_rate": 2.7285714285714286e-05, "loss": 2.2895, "step": 955 }, { "epoch": 0.030844244229307274, "grad_norm": 0.56640625, "learning_rate": 2.7314285714285713e-05, "loss": 2.296, "step": 956 }, { "epoch": 0.03087650808310362, "grad_norm": 0.52734375, "learning_rate": 2.7342857142857146e-05, "loss": 2.3194, "step": 957 }, { "epoch": 0.03090877193689997, "grad_norm": 0.470703125, "learning_rate": 2.7371428571428572e-05, "loss": 2.2842, "step": 958 }, { "epoch": 0.030941035790696316, "grad_norm": 0.578125, "learning_rate": 2.7400000000000002e-05, "loss": 2.3148, "step": 959 }, { "epoch": 0.03097329964449266, "grad_norm": 0.6015625, "learning_rate": 2.7428571428571428e-05, "loss": 2.3264, "step": 960 }, { "epoch": 0.031005563498289007, "grad_norm": 0.65625, "learning_rate": 2.7457142857142855e-05, "loss": 2.2889, "step": 961 }, { "epoch": 0.031037827352085354, "grad_norm": 0.72265625, "learning_rate": 2.7485714285714288e-05, "loss": 2.2972, "step": 962 }, { "epoch": 0.0310700912058817, "grad_norm": 0.765625, "learning_rate": 2.7514285714285714e-05, "loss": 2.3105, "step": 963 }, { "epoch": 0.031102355059678045, "grad_norm": 0.8359375, "learning_rate": 2.7542857142857144e-05, "loss": 2.2816, "step": 964 }, { "epoch": 0.031134618913474393, "grad_norm": 1.1875, "learning_rate": 2.757142857142857e-05, "loss": 2.3176, "step": 965 }, { "epoch": 0.03116688276727074, "grad_norm": 1.0078125, "learning_rate": 2.7600000000000003e-05, "loss": 2.2897, "step": 966 }, { "epoch": 0.031199146621067087, "grad_norm": 0.58984375, "learning_rate": 2.762857142857143e-05, "loss": 2.3027, "step": 967 }, { "epoch": 0.031231410474863434, "grad_norm": 0.72265625, "learning_rate": 2.7657142857142856e-05, "loss": 2.2927, "step": 968 }, { "epoch": 0.03126367432865978, "grad_norm": 0.84375, "learning_rate": 2.7685714285714286e-05, "loss": 2.2906, "step": 969 }, { "epoch": 0.03129593818245613, "grad_norm": 0.59375, "learning_rate": 2.7714285714285716e-05, "loss": 2.3182, "step": 970 }, { "epoch": 0.03132820203625247, "grad_norm": 0.6015625, "learning_rate": 2.7742857142857145e-05, "loss": 2.2862, "step": 971 }, { "epoch": 0.031360465890048816, "grad_norm": 0.55078125, "learning_rate": 2.7771428571428572e-05, "loss": 2.2987, "step": 972 }, { "epoch": 0.03139272974384517, "grad_norm": 0.51171875, "learning_rate": 2.78e-05, "loss": 2.3183, "step": 973 }, { "epoch": 0.03142499359764151, "grad_norm": 0.52734375, "learning_rate": 2.7828571428571428e-05, "loss": 2.3073, "step": 974 }, { "epoch": 0.03145725745143786, "grad_norm": 0.53515625, "learning_rate": 2.7857142857142858e-05, "loss": 2.3087, "step": 975 }, { "epoch": 0.031489521305234205, "grad_norm": 0.515625, "learning_rate": 2.7885714285714287e-05, "loss": 2.3099, "step": 976 }, { "epoch": 0.03152178515903055, "grad_norm": 0.5, "learning_rate": 2.7914285714285714e-05, "loss": 2.2952, "step": 977 }, { "epoch": 0.0315540490128269, "grad_norm": 0.51953125, "learning_rate": 2.7942857142857143e-05, "loss": 2.2902, "step": 978 }, { "epoch": 0.031586312866623244, "grad_norm": 0.51953125, "learning_rate": 2.7971428571428573e-05, "loss": 2.2886, "step": 979 }, { "epoch": 0.031618576720419594, "grad_norm": 0.52734375, "learning_rate": 2.8e-05, "loss": 2.3109, "step": 980 }, { "epoch": 0.03165084057421594, "grad_norm": 0.52734375, "learning_rate": 2.802857142857143e-05, "loss": 2.3216, "step": 981 }, { "epoch": 0.03168310442801228, "grad_norm": 0.546875, "learning_rate": 2.8057142857142856e-05, "loss": 2.3055, "step": 982 }, { "epoch": 0.03171536828180863, "grad_norm": 0.515625, "learning_rate": 2.808571428571429e-05, "loss": 2.2807, "step": 983 }, { "epoch": 0.031747632135604976, "grad_norm": 0.482421875, "learning_rate": 2.8114285714285715e-05, "loss": 2.2937, "step": 984 }, { "epoch": 0.03177989598940133, "grad_norm": 0.51171875, "learning_rate": 2.8142857142857145e-05, "loss": 2.2939, "step": 985 }, { "epoch": 0.03181215984319767, "grad_norm": 0.5546875, "learning_rate": 2.817142857142857e-05, "loss": 2.2953, "step": 986 }, { "epoch": 0.031844423696994015, "grad_norm": 0.6171875, "learning_rate": 2.8199999999999998e-05, "loss": 2.3097, "step": 987 }, { "epoch": 0.031876687550790365, "grad_norm": 0.73828125, "learning_rate": 2.822857142857143e-05, "loss": 2.3147, "step": 988 }, { "epoch": 0.03190895140458671, "grad_norm": 0.88671875, "learning_rate": 2.8257142857142857e-05, "loss": 2.2719, "step": 989 }, { "epoch": 0.03194121525838306, "grad_norm": 0.9296875, "learning_rate": 2.8285714285714287e-05, "loss": 2.2984, "step": 990 }, { "epoch": 0.031973479112179404, "grad_norm": 0.73828125, "learning_rate": 2.8314285714285713e-05, "loss": 2.2888, "step": 991 }, { "epoch": 0.03200574296597575, "grad_norm": 0.52734375, "learning_rate": 2.8342857142857146e-05, "loss": 2.3071, "step": 992 }, { "epoch": 0.0320380068197721, "grad_norm": 0.6640625, "learning_rate": 2.8371428571428573e-05, "loss": 2.2578, "step": 993 }, { "epoch": 0.03207027067356844, "grad_norm": 0.66015625, "learning_rate": 2.84e-05, "loss": 2.2013, "step": 994 }, { "epoch": 0.032102534527364786, "grad_norm": 0.56640625, "learning_rate": 2.842857142857143e-05, "loss": 2.2359, "step": 995 }, { "epoch": 0.032134798381161137, "grad_norm": 1.1015625, "learning_rate": 2.845714285714286e-05, "loss": 2.2877, "step": 996 }, { "epoch": 0.03216706223495748, "grad_norm": 0.57421875, "learning_rate": 2.848571428571429e-05, "loss": 2.3216, "step": 997 }, { "epoch": 0.03219932608875383, "grad_norm": 0.66015625, "learning_rate": 2.8514285714285715e-05, "loss": 2.2723, "step": 998 }, { "epoch": 0.032231589942550175, "grad_norm": 0.63671875, "learning_rate": 2.854285714285714e-05, "loss": 2.3265, "step": 999 }, { "epoch": 0.03226385379634652, "grad_norm": 0.55859375, "learning_rate": 2.857142857142857e-05, "loss": 2.3184, "step": 1000 }, { "epoch": 0.03229611765014287, "grad_norm": 0.64453125, "learning_rate": 2.86e-05, "loss": 2.2855, "step": 1001 }, { "epoch": 0.03232838150393921, "grad_norm": 0.76953125, "learning_rate": 2.862857142857143e-05, "loss": 2.2959, "step": 1002 }, { "epoch": 0.032360645357735564, "grad_norm": 0.69140625, "learning_rate": 2.8657142857142857e-05, "loss": 2.2963, "step": 1003 }, { "epoch": 0.03239290921153191, "grad_norm": 0.640625, "learning_rate": 2.8685714285714286e-05, "loss": 2.3124, "step": 1004 }, { "epoch": 0.03242517306532825, "grad_norm": 0.609375, "learning_rate": 2.8714285714285716e-05, "loss": 2.2847, "step": 1005 }, { "epoch": 0.0324574369191246, "grad_norm": 0.57421875, "learning_rate": 2.8742857142857143e-05, "loss": 2.3148, "step": 1006 }, { "epoch": 0.032489700772920946, "grad_norm": 0.5859375, "learning_rate": 2.8771428571428572e-05, "loss": 2.2906, "step": 1007 }, { "epoch": 0.0325219646267173, "grad_norm": 0.62890625, "learning_rate": 2.88e-05, "loss": 2.3218, "step": 1008 }, { "epoch": 0.03255422848051364, "grad_norm": 0.5390625, "learning_rate": 2.8828571428571432e-05, "loss": 2.299, "step": 1009 }, { "epoch": 0.032586492334309984, "grad_norm": 0.53125, "learning_rate": 2.8857142857142858e-05, "loss": 2.3135, "step": 1010 }, { "epoch": 0.032618756188106335, "grad_norm": 0.56640625, "learning_rate": 2.8885714285714288e-05, "loss": 2.2913, "step": 1011 }, { "epoch": 0.03265102004190268, "grad_norm": 0.6015625, "learning_rate": 2.8914285714285714e-05, "loss": 2.2684, "step": 1012 }, { "epoch": 0.03268328389569903, "grad_norm": 0.6796875, "learning_rate": 2.894285714285714e-05, "loss": 2.2636, "step": 1013 }, { "epoch": 0.03271554774949537, "grad_norm": 0.640625, "learning_rate": 2.8971428571428574e-05, "loss": 2.3392, "step": 1014 }, { "epoch": 0.03274781160329172, "grad_norm": 0.57421875, "learning_rate": 2.9e-05, "loss": 2.2827, "step": 1015 }, { "epoch": 0.03278007545708807, "grad_norm": 0.640625, "learning_rate": 2.902857142857143e-05, "loss": 2.2745, "step": 1016 }, { "epoch": 0.03281233931088441, "grad_norm": 0.6640625, "learning_rate": 2.9057142857142856e-05, "loss": 2.3151, "step": 1017 }, { "epoch": 0.03284460316468076, "grad_norm": 0.6796875, "learning_rate": 2.9085714285714286e-05, "loss": 2.3053, "step": 1018 }, { "epoch": 0.032876867018477106, "grad_norm": 0.66015625, "learning_rate": 2.9114285714285716e-05, "loss": 2.2911, "step": 1019 }, { "epoch": 0.03290913087227345, "grad_norm": 0.72265625, "learning_rate": 2.9142857142857142e-05, "loss": 2.3101, "step": 1020 }, { "epoch": 0.0329413947260698, "grad_norm": 0.5859375, "learning_rate": 2.9171428571428572e-05, "loss": 2.3072, "step": 1021 }, { "epoch": 0.032973658579866144, "grad_norm": 0.59375, "learning_rate": 2.92e-05, "loss": 2.2719, "step": 1022 }, { "epoch": 0.033005922433662495, "grad_norm": 0.87109375, "learning_rate": 2.922857142857143e-05, "loss": 2.3005, "step": 1023 }, { "epoch": 0.03303818628745884, "grad_norm": 1.1484375, "learning_rate": 2.9257142857142858e-05, "loss": 2.2672, "step": 1024 }, { "epoch": 0.03307045014125518, "grad_norm": 0.9453125, "learning_rate": 2.9285714285714284e-05, "loss": 2.2819, "step": 1025 }, { "epoch": 0.03310271399505153, "grad_norm": 0.765625, "learning_rate": 2.9314285714285714e-05, "loss": 2.3068, "step": 1026 }, { "epoch": 0.03313497784884788, "grad_norm": 0.61328125, "learning_rate": 2.9342857142857144e-05, "loss": 2.2974, "step": 1027 }, { "epoch": 0.03316724170264423, "grad_norm": 0.7421875, "learning_rate": 2.9371428571428573e-05, "loss": 2.2833, "step": 1028 }, { "epoch": 0.03319950555644057, "grad_norm": 0.7109375, "learning_rate": 2.94e-05, "loss": 2.2722, "step": 1029 }, { "epoch": 0.033231769410236915, "grad_norm": 0.5546875, "learning_rate": 2.942857142857143e-05, "loss": 2.2739, "step": 1030 }, { "epoch": 0.033264033264033266, "grad_norm": 0.73828125, "learning_rate": 2.945714285714286e-05, "loss": 2.2825, "step": 1031 }, { "epoch": 0.03329629711782961, "grad_norm": 0.75, "learning_rate": 2.9485714285714286e-05, "loss": 2.2897, "step": 1032 }, { "epoch": 0.03332856097162596, "grad_norm": 0.6015625, "learning_rate": 2.9514285714285715e-05, "loss": 2.2827, "step": 1033 }, { "epoch": 0.033360824825422304, "grad_norm": 0.703125, "learning_rate": 2.9542857142857142e-05, "loss": 2.2908, "step": 1034 }, { "epoch": 0.03339308867921865, "grad_norm": 0.70703125, "learning_rate": 2.9571428571428575e-05, "loss": 2.2859, "step": 1035 }, { "epoch": 0.033425352533015, "grad_norm": 0.6796875, "learning_rate": 2.96e-05, "loss": 2.2958, "step": 1036 }, { "epoch": 0.03345761638681134, "grad_norm": 0.5859375, "learning_rate": 2.962857142857143e-05, "loss": 2.2852, "step": 1037 }, { "epoch": 0.03348988024060769, "grad_norm": 0.6640625, "learning_rate": 2.9657142857142857e-05, "loss": 2.292, "step": 1038 }, { "epoch": 0.03352214409440404, "grad_norm": 0.625, "learning_rate": 2.9685714285714284e-05, "loss": 2.3027, "step": 1039 }, { "epoch": 0.03355440794820038, "grad_norm": 0.484375, "learning_rate": 2.9714285714285717e-05, "loss": 2.2828, "step": 1040 }, { "epoch": 0.03358667180199673, "grad_norm": 0.6171875, "learning_rate": 2.9742857142857143e-05, "loss": 2.2514, "step": 1041 }, { "epoch": 0.033618935655793075, "grad_norm": 0.6484375, "learning_rate": 2.9771428571428573e-05, "loss": 2.2644, "step": 1042 }, { "epoch": 0.03365119950958942, "grad_norm": 0.61328125, "learning_rate": 2.98e-05, "loss": 2.3204, "step": 1043 }, { "epoch": 0.03368346336338577, "grad_norm": 0.57421875, "learning_rate": 2.982857142857143e-05, "loss": 2.2589, "step": 1044 }, { "epoch": 0.03371572721718211, "grad_norm": 0.51953125, "learning_rate": 2.985714285714286e-05, "loss": 2.284, "step": 1045 }, { "epoch": 0.033747991070978464, "grad_norm": 0.578125, "learning_rate": 2.9885714285714285e-05, "loss": 2.2668, "step": 1046 }, { "epoch": 0.03378025492477481, "grad_norm": 0.6640625, "learning_rate": 2.9914285714285715e-05, "loss": 2.3015, "step": 1047 }, { "epoch": 0.03381251877857115, "grad_norm": 0.63671875, "learning_rate": 2.9942857142857145e-05, "loss": 2.2612, "step": 1048 }, { "epoch": 0.0338447826323675, "grad_norm": 0.578125, "learning_rate": 2.9971428571428574e-05, "loss": 2.2831, "step": 1049 }, { "epoch": 0.033877046486163846, "grad_norm": 0.466796875, "learning_rate": 3e-05, "loss": 2.2684, "step": 1050 }, { "epoch": 0.0339093103399602, "grad_norm": 0.5546875, "learning_rate": 2.999999991744538e-05, "loss": 2.2865, "step": 1051 }, { "epoch": 0.03394157419375654, "grad_norm": 0.58203125, "learning_rate": 2.9999999669781523e-05, "loss": 2.3054, "step": 1052 }, { "epoch": 0.033973838047552885, "grad_norm": 0.59375, "learning_rate": 2.9999999257008432e-05, "loss": 2.263, "step": 1053 }, { "epoch": 0.034006101901349235, "grad_norm": 0.61328125, "learning_rate": 2.9999998679126106e-05, "loss": 2.3083, "step": 1054 }, { "epoch": 0.03403836575514558, "grad_norm": 0.625, "learning_rate": 2.9999997936134558e-05, "loss": 2.2949, "step": 1055 }, { "epoch": 0.03407062960894193, "grad_norm": 0.578125, "learning_rate": 2.9999997028033795e-05, "loss": 2.3071, "step": 1056 }, { "epoch": 0.034102893462738273, "grad_norm": 0.62890625, "learning_rate": 2.999999595482382e-05, "loss": 2.3063, "step": 1057 }, { "epoch": 0.03413515731653462, "grad_norm": 0.61328125, "learning_rate": 2.9999994716504656e-05, "loss": 2.2789, "step": 1058 }, { "epoch": 0.03416742117033097, "grad_norm": 0.7265625, "learning_rate": 2.9999993313076308e-05, "loss": 2.2933, "step": 1059 }, { "epoch": 0.03419968502412731, "grad_norm": 0.796875, "learning_rate": 2.99999917445388e-05, "loss": 2.3009, "step": 1060 }, { "epoch": 0.03423194887792366, "grad_norm": 0.86328125, "learning_rate": 2.999999001089214e-05, "loss": 2.2913, "step": 1061 }, { "epoch": 0.034264212731720006, "grad_norm": 0.76171875, "learning_rate": 2.999998811213635e-05, "loss": 2.2738, "step": 1062 }, { "epoch": 0.03429647658551635, "grad_norm": 0.5625, "learning_rate": 2.999998604827145e-05, "loss": 2.3016, "step": 1063 }, { "epoch": 0.0343287404393127, "grad_norm": 0.5859375, "learning_rate": 2.999998381929747e-05, "loss": 2.2548, "step": 1064 }, { "epoch": 0.034361004293109045, "grad_norm": 0.62890625, "learning_rate": 2.9999981425214424e-05, "loss": 2.2865, "step": 1065 }, { "epoch": 0.034393268146905395, "grad_norm": 0.57421875, "learning_rate": 2.9999978866022346e-05, "loss": 2.2605, "step": 1066 }, { "epoch": 0.03442553200070174, "grad_norm": 0.58203125, "learning_rate": 2.9999976141721262e-05, "loss": 2.2847, "step": 1067 }, { "epoch": 0.03445779585449808, "grad_norm": 0.50390625, "learning_rate": 2.99999732523112e-05, "loss": 2.2742, "step": 1068 }, { "epoch": 0.034490059708294434, "grad_norm": 0.5234375, "learning_rate": 2.9999970197792194e-05, "loss": 2.2687, "step": 1069 }, { "epoch": 0.03452232356209078, "grad_norm": 0.57421875, "learning_rate": 2.9999966978164274e-05, "loss": 2.2944, "step": 1070 }, { "epoch": 0.03455458741588713, "grad_norm": 0.65625, "learning_rate": 2.9999963593427484e-05, "loss": 2.269, "step": 1071 }, { "epoch": 0.03458685126968347, "grad_norm": 0.6171875, "learning_rate": 2.9999960043581852e-05, "loss": 2.2954, "step": 1072 }, { "epoch": 0.034619115123479816, "grad_norm": 0.546875, "learning_rate": 2.9999956328627422e-05, "loss": 2.306, "step": 1073 }, { "epoch": 0.034651378977276166, "grad_norm": 0.55859375, "learning_rate": 2.9999952448564237e-05, "loss": 2.3173, "step": 1074 }, { "epoch": 0.03468364283107251, "grad_norm": 0.59765625, "learning_rate": 2.999994840339233e-05, "loss": 2.2671, "step": 1075 }, { "epoch": 0.03471590668486886, "grad_norm": 0.56640625, "learning_rate": 2.9999944193111756e-05, "loss": 2.2288, "step": 1076 }, { "epoch": 0.034748170538665205, "grad_norm": 0.5390625, "learning_rate": 2.999993981772256e-05, "loss": 2.2293, "step": 1077 }, { "epoch": 0.03478043439246155, "grad_norm": 0.6484375, "learning_rate": 2.9999935277224783e-05, "loss": 2.2095, "step": 1078 }, { "epoch": 0.0348126982462579, "grad_norm": 0.74609375, "learning_rate": 2.9999930571618476e-05, "loss": 2.2055, "step": 1079 }, { "epoch": 0.03484496210005424, "grad_norm": 0.69921875, "learning_rate": 2.99999257009037e-05, "loss": 2.2123, "step": 1080 }, { "epoch": 0.034877225953850594, "grad_norm": 0.5625, "learning_rate": 2.99999206650805e-05, "loss": 2.2218, "step": 1081 }, { "epoch": 0.03490948980764694, "grad_norm": 0.5703125, "learning_rate": 2.9999915464148934e-05, "loss": 2.2313, "step": 1082 }, { "epoch": 0.03494175366144328, "grad_norm": 0.546875, "learning_rate": 2.9999910098109063e-05, "loss": 2.2227, "step": 1083 }, { "epoch": 0.03497401751523963, "grad_norm": 0.60546875, "learning_rate": 2.999990456696094e-05, "loss": 2.2367, "step": 1084 }, { "epoch": 0.035006281369035976, "grad_norm": 0.75390625, "learning_rate": 2.999989887070463e-05, "loss": 2.2401, "step": 1085 }, { "epoch": 0.03503854522283232, "grad_norm": 0.83203125, "learning_rate": 2.999989300934019e-05, "loss": 2.2201, "step": 1086 }, { "epoch": 0.03507080907662867, "grad_norm": 0.84765625, "learning_rate": 2.9999886982867693e-05, "loss": 2.2077, "step": 1087 }, { "epoch": 0.035103072930425014, "grad_norm": 0.9375, "learning_rate": 2.99998807912872e-05, "loss": 2.2226, "step": 1088 }, { "epoch": 0.035135336784221365, "grad_norm": 1.0078125, "learning_rate": 2.9999874434598778e-05, "loss": 2.2357, "step": 1089 }, { "epoch": 0.03516760063801771, "grad_norm": 0.7734375, "learning_rate": 2.99998679128025e-05, "loss": 2.2284, "step": 1090 }, { "epoch": 0.03519986449181405, "grad_norm": 0.5078125, "learning_rate": 2.9999861225898436e-05, "loss": 2.2229, "step": 1091 }, { "epoch": 0.0352321283456104, "grad_norm": 0.703125, "learning_rate": 2.9999854373886663e-05, "loss": 2.1919, "step": 1092 }, { "epoch": 0.03526439219940675, "grad_norm": 0.609375, "learning_rate": 2.9999847356767254e-05, "loss": 2.2133, "step": 1093 }, { "epoch": 0.0352966560532031, "grad_norm": 0.5625, "learning_rate": 2.999984017454028e-05, "loss": 2.1871, "step": 1094 }, { "epoch": 0.03532891990699944, "grad_norm": 0.5546875, "learning_rate": 2.999983282720583e-05, "loss": 2.2042, "step": 1095 }, { "epoch": 0.035361183760795785, "grad_norm": 0.5859375, "learning_rate": 2.9999825314763984e-05, "loss": 2.2286, "step": 1096 }, { "epoch": 0.035393447614592136, "grad_norm": 0.53125, "learning_rate": 2.9999817637214823e-05, "loss": 2.2316, "step": 1097 }, { "epoch": 0.03542571146838848, "grad_norm": 0.58203125, "learning_rate": 2.9999809794558424e-05, "loss": 2.2064, "step": 1098 }, { "epoch": 0.03545797532218483, "grad_norm": 0.640625, "learning_rate": 2.9999801786794884e-05, "loss": 2.2389, "step": 1099 }, { "epoch": 0.035490239175981174, "grad_norm": 0.65625, "learning_rate": 2.9999793613924284e-05, "loss": 2.1887, "step": 1100 }, { "epoch": 0.03552250302977752, "grad_norm": 0.5546875, "learning_rate": 2.9999785275946718e-05, "loss": 2.198, "step": 1101 }, { "epoch": 0.03555476688357387, "grad_norm": 0.51953125, "learning_rate": 2.9999776772862278e-05, "loss": 2.1775, "step": 1102 }, { "epoch": 0.03558703073737021, "grad_norm": 0.5703125, "learning_rate": 2.9999768104671056e-05, "loss": 2.225, "step": 1103 }, { "epoch": 0.03561929459116656, "grad_norm": 0.5234375, "learning_rate": 2.999975927137315e-05, "loss": 2.2024, "step": 1104 }, { "epoch": 0.03565155844496291, "grad_norm": 0.49609375, "learning_rate": 2.9999750272968646e-05, "loss": 2.2087, "step": 1105 }, { "epoch": 0.03568382229875925, "grad_norm": 0.5390625, "learning_rate": 2.9999741109457655e-05, "loss": 2.2112, "step": 1106 }, { "epoch": 0.0357160861525556, "grad_norm": 0.5546875, "learning_rate": 2.999973178084028e-05, "loss": 2.2256, "step": 1107 }, { "epoch": 0.035748350006351945, "grad_norm": 0.58984375, "learning_rate": 2.9999722287116614e-05, "loss": 2.2477, "step": 1108 }, { "epoch": 0.035780613860148296, "grad_norm": 0.625, "learning_rate": 2.999971262828677e-05, "loss": 2.2117, "step": 1109 }, { "epoch": 0.03581287771394464, "grad_norm": 0.6484375, "learning_rate": 2.9999702804350844e-05, "loss": 2.2142, "step": 1110 }, { "epoch": 0.03584514156774098, "grad_norm": 0.59765625, "learning_rate": 2.999969281530895e-05, "loss": 2.1884, "step": 1111 }, { "epoch": 0.035877405421537334, "grad_norm": 0.54296875, "learning_rate": 2.99996826611612e-05, "loss": 2.2205, "step": 1112 }, { "epoch": 0.03590966927533368, "grad_norm": 0.5859375, "learning_rate": 2.999967234190771e-05, "loss": 2.2165, "step": 1113 }, { "epoch": 0.03594193312913003, "grad_norm": 0.59375, "learning_rate": 2.999966185754858e-05, "loss": 2.1911, "step": 1114 }, { "epoch": 0.03597419698292637, "grad_norm": 0.57421875, "learning_rate": 2.999965120808394e-05, "loss": 2.2337, "step": 1115 }, { "epoch": 0.036006460836722716, "grad_norm": 0.5859375, "learning_rate": 2.999964039351389e-05, "loss": 2.2037, "step": 1116 }, { "epoch": 0.03603872469051907, "grad_norm": 0.65234375, "learning_rate": 2.999962941383857e-05, "loss": 2.2028, "step": 1117 }, { "epoch": 0.03607098854431541, "grad_norm": 0.5703125, "learning_rate": 2.9999618269058084e-05, "loss": 2.2136, "step": 1118 }, { "epoch": 0.03610325239811176, "grad_norm": 0.6328125, "learning_rate": 2.999960695917256e-05, "loss": 2.2245, "step": 1119 }, { "epoch": 0.036135516251908105, "grad_norm": 0.8671875, "learning_rate": 2.9999595484182125e-05, "loss": 2.2115, "step": 1120 }, { "epoch": 0.03616778010570445, "grad_norm": 1.125, "learning_rate": 2.9999583844086906e-05, "loss": 2.1945, "step": 1121 }, { "epoch": 0.0362000439595008, "grad_norm": 1.375, "learning_rate": 2.9999572038887025e-05, "loss": 2.1813, "step": 1122 }, { "epoch": 0.03623230781329714, "grad_norm": 0.6328125, "learning_rate": 2.999956006858262e-05, "loss": 2.2274, "step": 1123 }, { "epoch": 0.036264571667093494, "grad_norm": 0.9375, "learning_rate": 2.9999547933173814e-05, "loss": 2.2016, "step": 1124 }, { "epoch": 0.03629683552088984, "grad_norm": 1.2109375, "learning_rate": 2.9999535632660745e-05, "loss": 2.1952, "step": 1125 }, { "epoch": 0.03632909937468618, "grad_norm": 0.5625, "learning_rate": 2.999952316704355e-05, "loss": 2.1826, "step": 1126 }, { "epoch": 0.03636136322848253, "grad_norm": 0.9609375, "learning_rate": 2.9999510536322367e-05, "loss": 2.2478, "step": 1127 }, { "epoch": 0.036393627082278876, "grad_norm": 0.89453125, "learning_rate": 2.9999497740497326e-05, "loss": 2.2189, "step": 1128 }, { "epoch": 0.03642589093607522, "grad_norm": 0.62890625, "learning_rate": 2.999948477956858e-05, "loss": 2.2217, "step": 1129 }, { "epoch": 0.03645815478987157, "grad_norm": 0.9296875, "learning_rate": 2.9999471653536267e-05, "loss": 2.223, "step": 1130 }, { "epoch": 0.036490418643667914, "grad_norm": 0.8125, "learning_rate": 2.999945836240053e-05, "loss": 2.2008, "step": 1131 }, { "epoch": 0.036522682497464265, "grad_norm": 0.61328125, "learning_rate": 2.9999444906161512e-05, "loss": 2.2046, "step": 1132 }, { "epoch": 0.03655494635126061, "grad_norm": 0.78125, "learning_rate": 2.9999431284819366e-05, "loss": 2.1534, "step": 1133 }, { "epoch": 0.03658721020505695, "grad_norm": 0.5859375, "learning_rate": 2.9999417498374243e-05, "loss": 2.2231, "step": 1134 }, { "epoch": 0.0366194740588533, "grad_norm": 0.6640625, "learning_rate": 2.999940354682629e-05, "loss": 2.2242, "step": 1135 }, { "epoch": 0.03665173791264965, "grad_norm": 0.58984375, "learning_rate": 2.9999389430175666e-05, "loss": 2.2112, "step": 1136 }, { "epoch": 0.036684001766446, "grad_norm": 0.57421875, "learning_rate": 2.9999375148422525e-05, "loss": 2.2011, "step": 1137 }, { "epoch": 0.03671626562024234, "grad_norm": 0.640625, "learning_rate": 2.999936070156702e-05, "loss": 2.2215, "step": 1138 }, { "epoch": 0.036748529474038685, "grad_norm": 0.5703125, "learning_rate": 2.999934608960931e-05, "loss": 2.2157, "step": 1139 }, { "epoch": 0.036780793327835036, "grad_norm": 0.58203125, "learning_rate": 2.9999331312549564e-05, "loss": 2.2177, "step": 1140 }, { "epoch": 0.03681305718163138, "grad_norm": 0.58203125, "learning_rate": 2.999931637038794e-05, "loss": 2.1844, "step": 1141 }, { "epoch": 0.03684532103542773, "grad_norm": 0.55078125, "learning_rate": 2.99993012631246e-05, "loss": 2.2219, "step": 1142 }, { "epoch": 0.036877584889224074, "grad_norm": 0.546875, "learning_rate": 2.9999285990759712e-05, "loss": 2.1962, "step": 1143 }, { "epoch": 0.03690984874302042, "grad_norm": 0.498046875, "learning_rate": 2.9999270553293445e-05, "loss": 2.2036, "step": 1144 }, { "epoch": 0.03694211259681677, "grad_norm": 0.5234375, "learning_rate": 2.9999254950725965e-05, "loss": 2.2258, "step": 1145 }, { "epoch": 0.03697437645061311, "grad_norm": 0.51953125, "learning_rate": 2.9999239183057446e-05, "loss": 2.2058, "step": 1146 }, { "epoch": 0.03700664030440946, "grad_norm": 0.5078125, "learning_rate": 2.999922325028807e-05, "loss": 2.2093, "step": 1147 }, { "epoch": 0.03703890415820581, "grad_norm": 0.490234375, "learning_rate": 2.9999207152418e-05, "loss": 2.19, "step": 1148 }, { "epoch": 0.03707116801200215, "grad_norm": 0.4453125, "learning_rate": 2.9999190889447416e-05, "loss": 2.2084, "step": 1149 }, { "epoch": 0.0371034318657985, "grad_norm": 0.4609375, "learning_rate": 2.9999174461376506e-05, "loss": 2.1739, "step": 1150 }, { "epoch": 0.037135695719594845, "grad_norm": 0.47265625, "learning_rate": 2.9999157868205435e-05, "loss": 2.2126, "step": 1151 }, { "epoch": 0.037167959573391196, "grad_norm": 0.482421875, "learning_rate": 2.99991411099344e-05, "loss": 2.2119, "step": 1152 }, { "epoch": 0.03720022342718754, "grad_norm": 0.60546875, "learning_rate": 2.9999124186563582e-05, "loss": 2.1937, "step": 1153 }, { "epoch": 0.037232487280983884, "grad_norm": 0.55078125, "learning_rate": 2.9999107098093162e-05, "loss": 2.2159, "step": 1154 }, { "epoch": 0.037264751134780234, "grad_norm": 0.453125, "learning_rate": 2.999908984452333e-05, "loss": 2.201, "step": 1155 }, { "epoch": 0.03729701498857658, "grad_norm": 0.4765625, "learning_rate": 2.999907242585428e-05, "loss": 2.2097, "step": 1156 }, { "epoch": 0.03732927884237293, "grad_norm": 0.451171875, "learning_rate": 2.99990548420862e-05, "loss": 2.2195, "step": 1157 }, { "epoch": 0.03736154269616927, "grad_norm": 0.484375, "learning_rate": 2.9999037093219282e-05, "loss": 2.2065, "step": 1158 }, { "epoch": 0.037393806549965616, "grad_norm": 0.57421875, "learning_rate": 2.999901917925373e-05, "loss": 2.2189, "step": 1159 }, { "epoch": 0.03742607040376197, "grad_norm": 0.5390625, "learning_rate": 2.999900110018973e-05, "loss": 2.2068, "step": 1160 }, { "epoch": 0.03745833425755831, "grad_norm": 0.55859375, "learning_rate": 2.9998982856027484e-05, "loss": 2.148, "step": 1161 }, { "epoch": 0.03749059811135466, "grad_norm": 1.125, "learning_rate": 2.9998964446767203e-05, "loss": 2.2561, "step": 1162 }, { "epoch": 0.037522861965151005, "grad_norm": 1.125, "learning_rate": 2.9998945872409077e-05, "loss": 2.2923, "step": 1163 }, { "epoch": 0.03755512581894735, "grad_norm": 1.15625, "learning_rate": 2.999892713295332e-05, "loss": 2.3257, "step": 1164 }, { "epoch": 0.0375873896727437, "grad_norm": 0.75390625, "learning_rate": 2.9998908228400124e-05, "loss": 2.2537, "step": 1165 }, { "epoch": 0.037619653526540044, "grad_norm": 0.9140625, "learning_rate": 2.999888915874971e-05, "loss": 2.2742, "step": 1166 }, { "epoch": 0.037651917380336394, "grad_norm": 0.921875, "learning_rate": 2.999886992400229e-05, "loss": 2.2915, "step": 1167 }, { "epoch": 0.03768418123413274, "grad_norm": 1.484375, "learning_rate": 2.9998850524158064e-05, "loss": 2.2687, "step": 1168 }, { "epoch": 0.03771644508792908, "grad_norm": 0.8984375, "learning_rate": 2.999883095921725e-05, "loss": 2.29, "step": 1169 }, { "epoch": 0.03774870894172543, "grad_norm": 0.66015625, "learning_rate": 2.999881122918007e-05, "loss": 2.2863, "step": 1170 }, { "epoch": 0.037780972795521776, "grad_norm": 1.0234375, "learning_rate": 2.999879133404673e-05, "loss": 2.2906, "step": 1171 }, { "epoch": 0.03781323664931812, "grad_norm": 0.9921875, "learning_rate": 2.999877127381746e-05, "loss": 2.2751, "step": 1172 }, { "epoch": 0.03784550050311447, "grad_norm": 0.80078125, "learning_rate": 2.9998751048492478e-05, "loss": 2.28, "step": 1173 }, { "epoch": 0.037877764356910815, "grad_norm": 0.6328125, "learning_rate": 2.9998730658072e-05, "loss": 2.2869, "step": 1174 }, { "epoch": 0.037910028210707165, "grad_norm": 0.72265625, "learning_rate": 2.9998710102556258e-05, "loss": 2.2924, "step": 1175 }, { "epoch": 0.03794229206450351, "grad_norm": 0.62109375, "learning_rate": 2.9998689381945473e-05, "loss": 2.2914, "step": 1176 }, { "epoch": 0.03797455591829985, "grad_norm": 0.65625, "learning_rate": 2.9998668496239873e-05, "loss": 2.283, "step": 1177 }, { "epoch": 0.038006819772096204, "grad_norm": 0.5703125, "learning_rate": 2.9998647445439695e-05, "loss": 2.2498, "step": 1178 }, { "epoch": 0.03803908362589255, "grad_norm": 0.56640625, "learning_rate": 2.9998626229545165e-05, "loss": 2.2495, "step": 1179 }, { "epoch": 0.0380713474796889, "grad_norm": 0.5390625, "learning_rate": 2.9998604848556514e-05, "loss": 2.2452, "step": 1180 }, { "epoch": 0.03810361133348524, "grad_norm": 0.53125, "learning_rate": 2.9998583302473983e-05, "loss": 2.2614, "step": 1181 }, { "epoch": 0.038135875187281586, "grad_norm": 0.51953125, "learning_rate": 2.999856159129781e-05, "loss": 2.2216, "step": 1182 }, { "epoch": 0.038168139041077936, "grad_norm": 0.50390625, "learning_rate": 2.9998539715028227e-05, "loss": 2.2856, "step": 1183 }, { "epoch": 0.03820040289487428, "grad_norm": 0.53515625, "learning_rate": 2.9998517673665483e-05, "loss": 2.2736, "step": 1184 }, { "epoch": 0.03823266674867063, "grad_norm": 0.5390625, "learning_rate": 2.999849546720981e-05, "loss": 2.2916, "step": 1185 }, { "epoch": 0.038264930602466975, "grad_norm": 0.48828125, "learning_rate": 2.9998473095661462e-05, "loss": 2.2723, "step": 1186 }, { "epoch": 0.03829719445626332, "grad_norm": 0.51171875, "learning_rate": 2.9998450559020682e-05, "loss": 2.2548, "step": 1187 }, { "epoch": 0.03832945831005967, "grad_norm": 0.5, "learning_rate": 2.999842785728772e-05, "loss": 2.2507, "step": 1188 }, { "epoch": 0.03836172216385601, "grad_norm": 0.474609375, "learning_rate": 2.999840499046282e-05, "loss": 2.2877, "step": 1189 }, { "epoch": 0.038393986017652364, "grad_norm": 0.494140625, "learning_rate": 2.999838195854624e-05, "loss": 2.2773, "step": 1190 }, { "epoch": 0.03842624987144871, "grad_norm": 0.515625, "learning_rate": 2.999835876153823e-05, "loss": 2.2492, "step": 1191 }, { "epoch": 0.03845851372524505, "grad_norm": 0.5, "learning_rate": 2.999833539943905e-05, "loss": 2.2754, "step": 1192 }, { "epoch": 0.0384907775790414, "grad_norm": 0.54296875, "learning_rate": 2.999831187224895e-05, "loss": 2.2639, "step": 1193 }, { "epoch": 0.038523041432837746, "grad_norm": 0.55859375, "learning_rate": 2.9998288179968193e-05, "loss": 2.2562, "step": 1194 }, { "epoch": 0.038555305286634096, "grad_norm": 0.494140625, "learning_rate": 2.999826432259704e-05, "loss": 2.2691, "step": 1195 }, { "epoch": 0.03858756914043044, "grad_norm": 0.470703125, "learning_rate": 2.9998240300135753e-05, "loss": 2.2806, "step": 1196 }, { "epoch": 0.038619832994226784, "grad_norm": 0.41015625, "learning_rate": 2.9998216112584594e-05, "loss": 2.2632, "step": 1197 }, { "epoch": 0.038652096848023135, "grad_norm": 0.46875, "learning_rate": 2.9998191759943837e-05, "loss": 2.2674, "step": 1198 }, { "epoch": 0.03868436070181948, "grad_norm": 0.54296875, "learning_rate": 2.9998167242213747e-05, "loss": 2.241, "step": 1199 }, { "epoch": 0.03871662455561583, "grad_norm": 0.50390625, "learning_rate": 2.9998142559394583e-05, "loss": 2.2674, "step": 1200 }, { "epoch": 0.03874888840941217, "grad_norm": 0.484375, "learning_rate": 2.999811771148663e-05, "loss": 2.2868, "step": 1201 }, { "epoch": 0.03878115226320852, "grad_norm": 0.4921875, "learning_rate": 2.9998092698490156e-05, "loss": 2.2845, "step": 1202 }, { "epoch": 0.03881341611700487, "grad_norm": 0.46484375, "learning_rate": 2.9998067520405435e-05, "loss": 2.2754, "step": 1203 }, { "epoch": 0.03884567997080121, "grad_norm": 0.45703125, "learning_rate": 2.999804217723275e-05, "loss": 2.2417, "step": 1204 }, { "epoch": 0.03887794382459756, "grad_norm": 0.421875, "learning_rate": 2.9998016668972375e-05, "loss": 2.2353, "step": 1205 }, { "epoch": 0.038910207678393906, "grad_norm": 0.458984375, "learning_rate": 2.999799099562459e-05, "loss": 2.265, "step": 1206 }, { "epoch": 0.03894247153219025, "grad_norm": 0.44921875, "learning_rate": 2.9997965157189685e-05, "loss": 2.2692, "step": 1207 }, { "epoch": 0.0389747353859866, "grad_norm": 0.5078125, "learning_rate": 2.9997939153667934e-05, "loss": 2.2415, "step": 1208 }, { "epoch": 0.039006999239782944, "grad_norm": 0.55859375, "learning_rate": 2.9997912985059628e-05, "loss": 2.27, "step": 1209 }, { "epoch": 0.039039263093579295, "grad_norm": 0.69140625, "learning_rate": 2.9997886651365055e-05, "loss": 2.2677, "step": 1210 }, { "epoch": 0.03907152694737564, "grad_norm": 1.1015625, "learning_rate": 2.999786015258451e-05, "loss": 2.291, "step": 1211 }, { "epoch": 0.03910379080117198, "grad_norm": 1.8046875, "learning_rate": 2.9997833488718278e-05, "loss": 2.2926, "step": 1212 }, { "epoch": 0.03913605465496833, "grad_norm": 0.62890625, "learning_rate": 2.999780665976665e-05, "loss": 2.2709, "step": 1213 }, { "epoch": 0.03916831850876468, "grad_norm": 2.75, "learning_rate": 2.999777966572993e-05, "loss": 2.2588, "step": 1214 }, { "epoch": 0.03920058236256103, "grad_norm": 1.96875, "learning_rate": 2.9997752506608415e-05, "loss": 2.2976, "step": 1215 }, { "epoch": 0.03923284621635737, "grad_norm": 3.171875, "learning_rate": 2.999772518240239e-05, "loss": 2.282, "step": 1216 }, { "epoch": 0.039265110070153715, "grad_norm": 3.234375, "learning_rate": 2.999769769311217e-05, "loss": 2.2763, "step": 1217 }, { "epoch": 0.039297373923950066, "grad_norm": 0.9140625, "learning_rate": 2.9997670038738053e-05, "loss": 2.2787, "step": 1218 }, { "epoch": 0.03932963777774641, "grad_norm": 4.875, "learning_rate": 2.9997642219280344e-05, "loss": 2.2674, "step": 1219 }, { "epoch": 0.03936190163154275, "grad_norm": 5.3125, "learning_rate": 2.999761423473935e-05, "loss": 2.2794, "step": 1220 }, { "epoch": 0.039394165485339104, "grad_norm": 3.5, "learning_rate": 2.9997586085115373e-05, "loss": 2.2615, "step": 1221 }, { "epoch": 0.03942642933913545, "grad_norm": 1.90625, "learning_rate": 2.9997557770408735e-05, "loss": 2.2751, "step": 1222 }, { "epoch": 0.0394586931929318, "grad_norm": 2.34375, "learning_rate": 2.9997529290619733e-05, "loss": 2.2749, "step": 1223 }, { "epoch": 0.03949095704672814, "grad_norm": 1.6015625, "learning_rate": 2.999750064574869e-05, "loss": 2.2681, "step": 1224 }, { "epoch": 0.039523220900524486, "grad_norm": 1.6171875, "learning_rate": 2.9997471835795923e-05, "loss": 2.2624, "step": 1225 }, { "epoch": 0.03955548475432084, "grad_norm": 1.578125, "learning_rate": 2.9997442860761742e-05, "loss": 2.2861, "step": 1226 }, { "epoch": 0.03958774860811718, "grad_norm": 1.015625, "learning_rate": 2.999741372064647e-05, "loss": 2.254, "step": 1227 }, { "epoch": 0.03962001246191353, "grad_norm": 1.3046875, "learning_rate": 2.9997384415450428e-05, "loss": 2.2696, "step": 1228 }, { "epoch": 0.039652276315709875, "grad_norm": 1.09375, "learning_rate": 2.9997354945173934e-05, "loss": 2.2545, "step": 1229 }, { "epoch": 0.03968454016950622, "grad_norm": 0.96875, "learning_rate": 2.9997325309817317e-05, "loss": 2.2717, "step": 1230 }, { "epoch": 0.03971680402330257, "grad_norm": 0.98828125, "learning_rate": 2.99972955093809e-05, "loss": 2.2528, "step": 1231 }, { "epoch": 0.03974906787709891, "grad_norm": 0.82421875, "learning_rate": 2.9997265543865018e-05, "loss": 2.2351, "step": 1232 }, { "epoch": 0.039781331730895264, "grad_norm": 0.87890625, "learning_rate": 2.999723541326999e-05, "loss": 2.2738, "step": 1233 }, { "epoch": 0.03981359558469161, "grad_norm": 0.6953125, "learning_rate": 2.9997205117596157e-05, "loss": 2.2465, "step": 1234 }, { "epoch": 0.03984585943848795, "grad_norm": 0.74609375, "learning_rate": 2.999717465684385e-05, "loss": 2.2432, "step": 1235 }, { "epoch": 0.0398781232922843, "grad_norm": 0.703125, "learning_rate": 2.99971440310134e-05, "loss": 2.2682, "step": 1236 }, { "epoch": 0.039910387146080646, "grad_norm": 0.6484375, "learning_rate": 2.999711324010515e-05, "loss": 2.2614, "step": 1237 }, { "epoch": 0.039942650999877, "grad_norm": 0.69140625, "learning_rate": 2.9997082284119437e-05, "loss": 2.2753, "step": 1238 }, { "epoch": 0.03997491485367334, "grad_norm": 0.609375, "learning_rate": 2.99970511630566e-05, "loss": 2.2518, "step": 1239 }, { "epoch": 0.040007178707469684, "grad_norm": 0.609375, "learning_rate": 2.999701987691698e-05, "loss": 2.2808, "step": 1240 }, { "epoch": 0.040039442561266035, "grad_norm": 0.62890625, "learning_rate": 2.999698842570093e-05, "loss": 2.2113, "step": 1241 }, { "epoch": 0.04007170641506238, "grad_norm": 0.5390625, "learning_rate": 2.9996956809408784e-05, "loss": 2.2025, "step": 1242 }, { "epoch": 0.04010397026885873, "grad_norm": 0.54296875, "learning_rate": 2.99969250280409e-05, "loss": 2.1753, "step": 1243 }, { "epoch": 0.04013623412265507, "grad_norm": 0.51953125, "learning_rate": 2.9996893081597623e-05, "loss": 2.1955, "step": 1244 }, { "epoch": 0.04016849797645142, "grad_norm": 0.50390625, "learning_rate": 2.9996860970079304e-05, "loss": 2.2314, "step": 1245 }, { "epoch": 0.04020076183024777, "grad_norm": 0.49609375, "learning_rate": 2.99968286934863e-05, "loss": 2.1863, "step": 1246 }, { "epoch": 0.04023302568404411, "grad_norm": 0.52734375, "learning_rate": 2.9996796251818968e-05, "loss": 2.2665, "step": 1247 }, { "epoch": 0.04026528953784046, "grad_norm": 0.47265625, "learning_rate": 2.9996763645077652e-05, "loss": 2.2703, "step": 1248 }, { "epoch": 0.040297553391636806, "grad_norm": 0.462890625, "learning_rate": 2.999673087326273e-05, "loss": 2.2504, "step": 1249 }, { "epoch": 0.04032981724543315, "grad_norm": 0.49609375, "learning_rate": 2.999669793637455e-05, "loss": 2.2447, "step": 1250 }, { "epoch": 0.0403620810992295, "grad_norm": 0.5234375, "learning_rate": 2.9996664834413476e-05, "loss": 2.2723, "step": 1251 }, { "epoch": 0.040394344953025844, "grad_norm": 0.478515625, "learning_rate": 2.9996631567379875e-05, "loss": 2.2948, "step": 1252 }, { "epoch": 0.040426608806822195, "grad_norm": 0.48828125, "learning_rate": 2.999659813527411e-05, "loss": 2.2699, "step": 1253 }, { "epoch": 0.04045887266061854, "grad_norm": 0.50390625, "learning_rate": 2.9996564538096557e-05, "loss": 2.2298, "step": 1254 }, { "epoch": 0.04049113651441488, "grad_norm": 0.5, "learning_rate": 2.9996530775847575e-05, "loss": 2.2701, "step": 1255 }, { "epoch": 0.04052340036821123, "grad_norm": 0.478515625, "learning_rate": 2.9996496848527543e-05, "loss": 2.2426, "step": 1256 }, { "epoch": 0.04055566422200758, "grad_norm": 0.50390625, "learning_rate": 2.999646275613683e-05, "loss": 2.2271, "step": 1257 }, { "epoch": 0.04058792807580393, "grad_norm": 0.490234375, "learning_rate": 2.9996428498675817e-05, "loss": 2.2578, "step": 1258 }, { "epoch": 0.04062019192960027, "grad_norm": 0.4765625, "learning_rate": 2.9996394076144877e-05, "loss": 2.2509, "step": 1259 }, { "epoch": 0.040652455783396615, "grad_norm": 0.439453125, "learning_rate": 2.9996359488544386e-05, "loss": 2.2645, "step": 1260 }, { "epoch": 0.040684719637192966, "grad_norm": 0.453125, "learning_rate": 2.999632473587473e-05, "loss": 2.2298, "step": 1261 }, { "epoch": 0.04071698349098931, "grad_norm": 0.46875, "learning_rate": 2.9996289818136292e-05, "loss": 2.2647, "step": 1262 }, { "epoch": 0.040749247344785654, "grad_norm": 0.453125, "learning_rate": 2.9996254735329448e-05, "loss": 2.2598, "step": 1263 }, { "epoch": 0.040781511198582004, "grad_norm": 0.466796875, "learning_rate": 2.999621948745459e-05, "loss": 2.2849, "step": 1264 }, { "epoch": 0.04081377505237835, "grad_norm": 0.47265625, "learning_rate": 2.9996184074512115e-05, "loss": 2.2711, "step": 1265 }, { "epoch": 0.0408460389061747, "grad_norm": 0.515625, "learning_rate": 2.9996148496502396e-05, "loss": 2.2467, "step": 1266 }, { "epoch": 0.04087830275997104, "grad_norm": 0.484375, "learning_rate": 2.9996112753425837e-05, "loss": 2.2735, "step": 1267 }, { "epoch": 0.040910566613767387, "grad_norm": 0.47265625, "learning_rate": 2.9996076845282825e-05, "loss": 2.2569, "step": 1268 }, { "epoch": 0.04094283046756374, "grad_norm": 0.482421875, "learning_rate": 2.999604077207376e-05, "loss": 2.2638, "step": 1269 }, { "epoch": 0.04097509432136008, "grad_norm": 0.5625, "learning_rate": 2.999600453379903e-05, "loss": 2.2333, "step": 1270 }, { "epoch": 0.04100735817515643, "grad_norm": 0.72265625, "learning_rate": 2.9995968130459043e-05, "loss": 2.2646, "step": 1271 }, { "epoch": 0.041039622028952776, "grad_norm": 0.86328125, "learning_rate": 2.9995931562054198e-05, "loss": 2.2685, "step": 1272 }, { "epoch": 0.04107188588274912, "grad_norm": 0.96875, "learning_rate": 2.9995894828584897e-05, "loss": 2.2599, "step": 1273 }, { "epoch": 0.04110414973654547, "grad_norm": 1.0703125, "learning_rate": 2.9995857930051542e-05, "loss": 2.254, "step": 1274 }, { "epoch": 0.041136413590341814, "grad_norm": 0.73828125, "learning_rate": 2.9995820866454545e-05, "loss": 2.2595, "step": 1275 }, { "epoch": 0.041168677444138164, "grad_norm": 0.578125, "learning_rate": 2.9995783637794303e-05, "loss": 2.2455, "step": 1276 }, { "epoch": 0.04120094129793451, "grad_norm": 0.67578125, "learning_rate": 2.9995746244071237e-05, "loss": 2.2439, "step": 1277 }, { "epoch": 0.04123320515173085, "grad_norm": 0.6015625, "learning_rate": 2.9995708685285755e-05, "loss": 2.2545, "step": 1278 }, { "epoch": 0.0412654690055272, "grad_norm": 0.6171875, "learning_rate": 2.9995670961438268e-05, "loss": 2.2518, "step": 1279 }, { "epoch": 0.04129773285932355, "grad_norm": 0.640625, "learning_rate": 2.9995633072529193e-05, "loss": 2.2537, "step": 1280 }, { "epoch": 0.0413299967131199, "grad_norm": 0.59765625, "learning_rate": 2.9995595018558947e-05, "loss": 2.2259, "step": 1281 }, { "epoch": 0.04136226056691624, "grad_norm": 0.58203125, "learning_rate": 2.999555679952795e-05, "loss": 2.233, "step": 1282 }, { "epoch": 0.041394524420712585, "grad_norm": 0.60546875, "learning_rate": 2.999551841543662e-05, "loss": 2.227, "step": 1283 }, { "epoch": 0.041426788274508936, "grad_norm": 0.546875, "learning_rate": 2.9995479866285383e-05, "loss": 2.2582, "step": 1284 }, { "epoch": 0.04145905212830528, "grad_norm": 0.62109375, "learning_rate": 2.999544115207466e-05, "loss": 2.2483, "step": 1285 }, { "epoch": 0.04149131598210163, "grad_norm": 0.498046875, "learning_rate": 2.9995402272804873e-05, "loss": 2.2533, "step": 1286 }, { "epoch": 0.041523579835897974, "grad_norm": 0.5390625, "learning_rate": 2.9995363228476465e-05, "loss": 2.2328, "step": 1287 }, { "epoch": 0.04155584368969432, "grad_norm": 0.6015625, "learning_rate": 2.999532401908985e-05, "loss": 2.2448, "step": 1288 }, { "epoch": 0.04158810754349067, "grad_norm": 0.5703125, "learning_rate": 2.999528464464547e-05, "loss": 2.2655, "step": 1289 }, { "epoch": 0.04162037139728701, "grad_norm": 0.59375, "learning_rate": 2.9995245105143748e-05, "loss": 2.2726, "step": 1290 }, { "epoch": 0.04165263525108336, "grad_norm": 0.58984375, "learning_rate": 2.9995205400585127e-05, "loss": 2.2265, "step": 1291 }, { "epoch": 0.04168489910487971, "grad_norm": 0.66796875, "learning_rate": 2.9995165530970048e-05, "loss": 2.2113, "step": 1292 }, { "epoch": 0.04171716295867605, "grad_norm": 0.61328125, "learning_rate": 2.999512549629894e-05, "loss": 2.238, "step": 1293 }, { "epoch": 0.0417494268124724, "grad_norm": 0.640625, "learning_rate": 2.999508529657225e-05, "loss": 2.251, "step": 1294 }, { "epoch": 0.041781690666268745, "grad_norm": 0.53515625, "learning_rate": 2.9995044931790415e-05, "loss": 2.236, "step": 1295 }, { "epoch": 0.041813954520065096, "grad_norm": 0.546875, "learning_rate": 2.999500440195389e-05, "loss": 2.2301, "step": 1296 }, { "epoch": 0.04184621837386144, "grad_norm": 0.515625, "learning_rate": 2.9994963707063106e-05, "loss": 2.2233, "step": 1297 }, { "epoch": 0.04187848222765778, "grad_norm": 0.53515625, "learning_rate": 2.9994922847118525e-05, "loss": 2.2508, "step": 1298 }, { "epoch": 0.041910746081454134, "grad_norm": 0.515625, "learning_rate": 2.9994881822120585e-05, "loss": 2.2542, "step": 1299 }, { "epoch": 0.04194300993525048, "grad_norm": 0.53515625, "learning_rate": 2.999484063206975e-05, "loss": 2.2477, "step": 1300 }, { "epoch": 0.04197527378904683, "grad_norm": 0.51171875, "learning_rate": 2.9994799276966462e-05, "loss": 2.2538, "step": 1301 }, { "epoch": 0.04200753764284317, "grad_norm": 0.5078125, "learning_rate": 2.999475775681118e-05, "loss": 2.2571, "step": 1302 }, { "epoch": 0.042039801496639516, "grad_norm": 0.5546875, "learning_rate": 2.999471607160436e-05, "loss": 2.2689, "step": 1303 }, { "epoch": 0.04207206535043587, "grad_norm": 0.5390625, "learning_rate": 2.999467422134647e-05, "loss": 2.2442, "step": 1304 }, { "epoch": 0.04210432920423221, "grad_norm": 0.494140625, "learning_rate": 2.9994632206037956e-05, "loss": 2.2337, "step": 1305 }, { "epoch": 0.042136593058028554, "grad_norm": 0.578125, "learning_rate": 2.999459002567929e-05, "loss": 2.2678, "step": 1306 }, { "epoch": 0.042168856911824905, "grad_norm": 0.5234375, "learning_rate": 2.9994547680270938e-05, "loss": 2.2706, "step": 1307 }, { "epoch": 0.04220112076562125, "grad_norm": 0.435546875, "learning_rate": 2.9994505169813354e-05, "loss": 2.2538, "step": 1308 }, { "epoch": 0.0422333846194176, "grad_norm": 0.45703125, "learning_rate": 2.9994462494307023e-05, "loss": 2.2759, "step": 1309 }, { "epoch": 0.04226564847321394, "grad_norm": 0.484375, "learning_rate": 2.9994419653752403e-05, "loss": 2.2372, "step": 1310 }, { "epoch": 0.04229791232701029, "grad_norm": 0.50390625, "learning_rate": 2.9994376648149963e-05, "loss": 2.2305, "step": 1311 }, { "epoch": 0.04233017618080664, "grad_norm": 0.57421875, "learning_rate": 2.999433347750019e-05, "loss": 2.2446, "step": 1312 }, { "epoch": 0.04236244003460298, "grad_norm": 0.609375, "learning_rate": 2.9994290141803545e-05, "loss": 2.2522, "step": 1313 }, { "epoch": 0.04239470388839933, "grad_norm": 0.5703125, "learning_rate": 2.9994246641060515e-05, "loss": 2.2412, "step": 1314 }, { "epoch": 0.042426967742195676, "grad_norm": 0.609375, "learning_rate": 2.999420297527157e-05, "loss": 2.2765, "step": 1315 }, { "epoch": 0.04245923159599202, "grad_norm": 0.72265625, "learning_rate": 2.9994159144437203e-05, "loss": 2.271, "step": 1316 }, { "epoch": 0.04249149544978837, "grad_norm": 0.80078125, "learning_rate": 2.9994115148557882e-05, "loss": 2.2365, "step": 1317 }, { "epoch": 0.042523759303584714, "grad_norm": 0.8125, "learning_rate": 2.99940709876341e-05, "loss": 2.2433, "step": 1318 }, { "epoch": 0.042556023157381065, "grad_norm": 0.78515625, "learning_rate": 2.999402666166634e-05, "loss": 2.2324, "step": 1319 }, { "epoch": 0.04258828701117741, "grad_norm": 0.6484375, "learning_rate": 2.9993982170655094e-05, "loss": 2.2382, "step": 1320 }, { "epoch": 0.04262055086497375, "grad_norm": 0.6328125, "learning_rate": 2.9993937514600846e-05, "loss": 2.2624, "step": 1321 }, { "epoch": 0.0426528147187701, "grad_norm": 0.6328125, "learning_rate": 2.9993892693504094e-05, "loss": 2.2239, "step": 1322 }, { "epoch": 0.04268507857256645, "grad_norm": 0.6484375, "learning_rate": 2.9993847707365325e-05, "loss": 2.2501, "step": 1323 }, { "epoch": 0.0427173424263628, "grad_norm": 0.5390625, "learning_rate": 2.999380255618504e-05, "loss": 2.2696, "step": 1324 }, { "epoch": 0.04274960628015914, "grad_norm": 0.52734375, "learning_rate": 2.9993757239963732e-05, "loss": 2.2592, "step": 1325 }, { "epoch": 0.042781870133955485, "grad_norm": 0.5703125, "learning_rate": 2.99937117587019e-05, "loss": 2.2316, "step": 1326 }, { "epoch": 0.042814133987751836, "grad_norm": 0.50390625, "learning_rate": 2.9993666112400043e-05, "loss": 2.189, "step": 1327 }, { "epoch": 0.04284639784154818, "grad_norm": 0.52734375, "learning_rate": 2.999362030105867e-05, "loss": 2.1711, "step": 1328 }, { "epoch": 0.04287866169534453, "grad_norm": 0.5078125, "learning_rate": 2.9993574324678275e-05, "loss": 2.1439, "step": 1329 }, { "epoch": 0.042910925549140874, "grad_norm": 0.59765625, "learning_rate": 2.9993528183259376e-05, "loss": 2.1687, "step": 1330 }, { "epoch": 0.04294318940293722, "grad_norm": 0.494140625, "learning_rate": 2.9993481876802474e-05, "loss": 2.1818, "step": 1331 }, { "epoch": 0.04297545325673357, "grad_norm": 0.5078125, "learning_rate": 2.999343540530808e-05, "loss": 2.1653, "step": 1332 }, { "epoch": 0.04300771711052991, "grad_norm": 0.546875, "learning_rate": 2.9993388768776705e-05, "loss": 2.1755, "step": 1333 }, { "epoch": 0.04303998096432626, "grad_norm": 0.60546875, "learning_rate": 2.9993341967208862e-05, "loss": 2.1385, "step": 1334 }, { "epoch": 0.04307224481812261, "grad_norm": 0.55859375, "learning_rate": 2.9993295000605065e-05, "loss": 2.1965, "step": 1335 }, { "epoch": 0.04310450867191895, "grad_norm": 0.4609375, "learning_rate": 2.9993247868965833e-05, "loss": 2.1514, "step": 1336 }, { "epoch": 0.0431367725257153, "grad_norm": 0.5703125, "learning_rate": 2.999320057229169e-05, "loss": 2.1883, "step": 1337 }, { "epoch": 0.043169036379511645, "grad_norm": 0.70703125, "learning_rate": 2.9993153110583147e-05, "loss": 2.2072, "step": 1338 }, { "epoch": 0.043201300233307996, "grad_norm": 0.671875, "learning_rate": 2.999310548384073e-05, "loss": 2.2072, "step": 1339 }, { "epoch": 0.04323356408710434, "grad_norm": 0.58984375, "learning_rate": 2.9993057692064968e-05, "loss": 2.193, "step": 1340 }, { "epoch": 0.043265827940900684, "grad_norm": 0.490234375, "learning_rate": 2.9993009735256376e-05, "loss": 2.1601, "step": 1341 }, { "epoch": 0.043298091794697034, "grad_norm": 0.5234375, "learning_rate": 2.9992961613415493e-05, "loss": 2.1766, "step": 1342 }, { "epoch": 0.04333035564849338, "grad_norm": 0.72265625, "learning_rate": 2.9992913326542844e-05, "loss": 2.1731, "step": 1343 }, { "epoch": 0.04336261950228973, "grad_norm": 0.6640625, "learning_rate": 2.999286487463896e-05, "loss": 2.2012, "step": 1344 }, { "epoch": 0.04339488335608607, "grad_norm": 0.455078125, "learning_rate": 2.9992816257704376e-05, "loss": 2.1932, "step": 1345 }, { "epoch": 0.043427147209882416, "grad_norm": 0.5078125, "learning_rate": 2.9992767475739628e-05, "loss": 2.1959, "step": 1346 }, { "epoch": 0.04345941106367877, "grad_norm": 0.484375, "learning_rate": 2.999271852874525e-05, "loss": 2.1863, "step": 1347 }, { "epoch": 0.04349167491747511, "grad_norm": 0.490234375, "learning_rate": 2.9992669416721782e-05, "loss": 2.1935, "step": 1348 }, { "epoch": 0.043523938771271455, "grad_norm": 0.53125, "learning_rate": 2.9992620139669763e-05, "loss": 2.1147, "step": 1349 }, { "epoch": 0.043556202625067805, "grad_norm": 0.515625, "learning_rate": 2.999257069758974e-05, "loss": 2.2092, "step": 1350 }, { "epoch": 0.04358846647886415, "grad_norm": 0.59375, "learning_rate": 2.9992521090482257e-05, "loss": 2.1921, "step": 1351 }, { "epoch": 0.0436207303326605, "grad_norm": 0.58203125, "learning_rate": 2.9992471318347852e-05, "loss": 2.1694, "step": 1352 }, { "epoch": 0.043652994186456844, "grad_norm": 0.65625, "learning_rate": 2.999242138118708e-05, "loss": 2.1616, "step": 1353 }, { "epoch": 0.04368525804025319, "grad_norm": 0.52734375, "learning_rate": 2.999237127900049e-05, "loss": 2.1346, "step": 1354 }, { "epoch": 0.04371752189404954, "grad_norm": 0.546875, "learning_rate": 2.9992321011788633e-05, "loss": 2.1888, "step": 1355 }, { "epoch": 0.04374978574784588, "grad_norm": 0.5546875, "learning_rate": 2.999227057955206e-05, "loss": 2.183, "step": 1356 }, { "epoch": 0.04378204960164223, "grad_norm": 0.494140625, "learning_rate": 2.9992219982291328e-05, "loss": 2.1802, "step": 1357 }, { "epoch": 0.043814313455438576, "grad_norm": 0.515625, "learning_rate": 2.9992169220006995e-05, "loss": 2.1152, "step": 1358 }, { "epoch": 0.04384657730923492, "grad_norm": 0.546875, "learning_rate": 2.9992118292699617e-05, "loss": 2.2036, "step": 1359 }, { "epoch": 0.04387884116303127, "grad_norm": 0.484375, "learning_rate": 2.9992067200369757e-05, "loss": 2.1777, "step": 1360 }, { "epoch": 0.043911105016827615, "grad_norm": 0.49609375, "learning_rate": 2.9992015943017973e-05, "loss": 2.1502, "step": 1361 }, { "epoch": 0.043943368870623965, "grad_norm": 0.49609375, "learning_rate": 2.9991964520644842e-05, "loss": 2.1696, "step": 1362 }, { "epoch": 0.04397563272442031, "grad_norm": 0.494140625, "learning_rate": 2.9991912933250912e-05, "loss": 2.1925, "step": 1363 }, { "epoch": 0.04400789657821665, "grad_norm": 0.54296875, "learning_rate": 2.9991861180836764e-05, "loss": 2.1951, "step": 1364 }, { "epoch": 0.044040160432013004, "grad_norm": 0.55078125, "learning_rate": 2.9991809263402958e-05, "loss": 2.1836, "step": 1365 }, { "epoch": 0.04407242428580935, "grad_norm": 0.58203125, "learning_rate": 2.9991757180950075e-05, "loss": 2.1685, "step": 1366 }, { "epoch": 0.0441046881396057, "grad_norm": 0.70703125, "learning_rate": 2.9991704933478684e-05, "loss": 2.1717, "step": 1367 }, { "epoch": 0.04413695199340204, "grad_norm": 0.99609375, "learning_rate": 2.999165252098936e-05, "loss": 2.1663, "step": 1368 }, { "epoch": 0.044169215847198386, "grad_norm": 1.515625, "learning_rate": 2.9991599943482678e-05, "loss": 2.1852, "step": 1369 }, { "epoch": 0.044201479700994736, "grad_norm": 0.56640625, "learning_rate": 2.9991547200959218e-05, "loss": 2.1298, "step": 1370 }, { "epoch": 0.04423374355479108, "grad_norm": 1.203125, "learning_rate": 2.9991494293419568e-05, "loss": 2.201, "step": 1371 }, { "epoch": 0.04426600740858743, "grad_norm": 0.85546875, "learning_rate": 2.99914412208643e-05, "loss": 2.1303, "step": 1372 }, { "epoch": 0.044298271262383775, "grad_norm": 0.7578125, "learning_rate": 2.9991387983294002e-05, "loss": 2.1637, "step": 1373 }, { "epoch": 0.04433053511618012, "grad_norm": 0.83203125, "learning_rate": 2.9991334580709253e-05, "loss": 2.1535, "step": 1374 }, { "epoch": 0.04436279896997647, "grad_norm": 0.66015625, "learning_rate": 2.9991281013110657e-05, "loss": 2.1715, "step": 1375 }, { "epoch": 0.04439506282377281, "grad_norm": 0.6796875, "learning_rate": 2.9991227280498788e-05, "loss": 2.177, "step": 1376 }, { "epoch": 0.044427326677569164, "grad_norm": 0.60546875, "learning_rate": 2.9991173382874245e-05, "loss": 2.1623, "step": 1377 }, { "epoch": 0.04445959053136551, "grad_norm": 0.65625, "learning_rate": 2.9991119320237625e-05, "loss": 2.1848, "step": 1378 }, { "epoch": 0.04449185438516185, "grad_norm": 0.56640625, "learning_rate": 2.9991065092589513e-05, "loss": 2.1982, "step": 1379 }, { "epoch": 0.0445241182389582, "grad_norm": 0.5859375, "learning_rate": 2.9991010699930516e-05, "loss": 2.1759, "step": 1380 }, { "epoch": 0.044556382092754546, "grad_norm": 0.57421875, "learning_rate": 2.999095614226122e-05, "loss": 2.2026, "step": 1381 }, { "epoch": 0.044588645946550896, "grad_norm": 0.53125, "learning_rate": 2.999090141958224e-05, "loss": 2.1804, "step": 1382 }, { "epoch": 0.04462090980034724, "grad_norm": 0.515625, "learning_rate": 2.999084653189417e-05, "loss": 2.1609, "step": 1383 }, { "epoch": 0.044653173654143584, "grad_norm": 0.494140625, "learning_rate": 2.9990791479197617e-05, "loss": 2.1735, "step": 1384 }, { "epoch": 0.044685437507939935, "grad_norm": 0.54296875, "learning_rate": 2.999073626149318e-05, "loss": 2.1973, "step": 1385 }, { "epoch": 0.04471770136173628, "grad_norm": 0.55078125, "learning_rate": 2.999068087878148e-05, "loss": 2.1977, "step": 1386 }, { "epoch": 0.04474996521553263, "grad_norm": 0.51171875, "learning_rate": 2.9990625331063114e-05, "loss": 2.1682, "step": 1387 }, { "epoch": 0.04478222906932897, "grad_norm": 0.62890625, "learning_rate": 2.99905696183387e-05, "loss": 2.1915, "step": 1388 }, { "epoch": 0.04481449292312532, "grad_norm": 0.451171875, "learning_rate": 2.999051374060885e-05, "loss": 2.1672, "step": 1389 }, { "epoch": 0.04484675677692167, "grad_norm": 0.6171875, "learning_rate": 2.9990457697874175e-05, "loss": 2.1668, "step": 1390 }, { "epoch": 0.04487902063071801, "grad_norm": 0.55078125, "learning_rate": 2.9990401490135303e-05, "loss": 2.1761, "step": 1391 }, { "epoch": 0.04491128448451436, "grad_norm": 0.53125, "learning_rate": 2.9990345117392844e-05, "loss": 2.1472, "step": 1392 }, { "epoch": 0.044943548338310706, "grad_norm": 0.47265625, "learning_rate": 2.9990288579647418e-05, "loss": 2.1843, "step": 1393 }, { "epoch": 0.04497581219210705, "grad_norm": 0.5625, "learning_rate": 2.999023187689965e-05, "loss": 2.1653, "step": 1394 }, { "epoch": 0.0450080760459034, "grad_norm": 0.57421875, "learning_rate": 2.999017500915016e-05, "loss": 2.1243, "step": 1395 }, { "epoch": 0.045040339899699744, "grad_norm": 0.53515625, "learning_rate": 2.9990117976399577e-05, "loss": 2.2018, "step": 1396 }, { "epoch": 0.04507260375349609, "grad_norm": 0.515625, "learning_rate": 2.9990060778648535e-05, "loss": 2.1529, "step": 1397 }, { "epoch": 0.04510486760729244, "grad_norm": 0.48046875, "learning_rate": 2.9990003415897657e-05, "loss": 2.1604, "step": 1398 }, { "epoch": 0.04513713146108878, "grad_norm": 0.546875, "learning_rate": 2.998994588814757e-05, "loss": 2.1686, "step": 1399 }, { "epoch": 0.04516939531488513, "grad_norm": 0.515625, "learning_rate": 2.9989888195398916e-05, "loss": 2.1893, "step": 1400 }, { "epoch": 0.04520165916868148, "grad_norm": 0.435546875, "learning_rate": 2.9989830337652328e-05, "loss": 2.1747, "step": 1401 }, { "epoch": 0.04523392302247782, "grad_norm": 0.5625, "learning_rate": 2.998977231490844e-05, "loss": 2.149, "step": 1402 }, { "epoch": 0.04526618687627417, "grad_norm": 0.4609375, "learning_rate": 2.998971412716789e-05, "loss": 2.1543, "step": 1403 }, { "epoch": 0.045298450730070515, "grad_norm": 0.4765625, "learning_rate": 2.9989655774431323e-05, "loss": 2.1598, "step": 1404 }, { "epoch": 0.045330714583866866, "grad_norm": 0.4921875, "learning_rate": 2.998959725669938e-05, "loss": 2.1718, "step": 1405 }, { "epoch": 0.04536297843766321, "grad_norm": 0.458984375, "learning_rate": 2.9989538573972702e-05, "loss": 2.1585, "step": 1406 }, { "epoch": 0.04539524229145955, "grad_norm": 0.470703125, "learning_rate": 2.998947972625193e-05, "loss": 2.1812, "step": 1407 }, { "epoch": 0.045427506145255904, "grad_norm": 0.44921875, "learning_rate": 2.998942071353773e-05, "loss": 2.1493, "step": 1408 }, { "epoch": 0.04545976999905225, "grad_norm": 0.5078125, "learning_rate": 2.9989361535830736e-05, "loss": 2.1956, "step": 1409 }, { "epoch": 0.0454920338528486, "grad_norm": 0.5703125, "learning_rate": 2.99893021931316e-05, "loss": 2.1829, "step": 1410 }, { "epoch": 0.04552429770664494, "grad_norm": 0.63671875, "learning_rate": 2.998924268544098e-05, "loss": 2.1665, "step": 1411 }, { "epoch": 0.045556561560441286, "grad_norm": 0.67578125, "learning_rate": 2.9989183012759532e-05, "loss": 2.1674, "step": 1412 }, { "epoch": 0.04558882541423764, "grad_norm": 0.72265625, "learning_rate": 2.998912317508791e-05, "loss": 2.2405, "step": 1413 }, { "epoch": 0.04562108926803398, "grad_norm": 0.83984375, "learning_rate": 2.9989063172426773e-05, "loss": 2.2478, "step": 1414 }, { "epoch": 0.04565335312183033, "grad_norm": 0.9140625, "learning_rate": 2.9989003004776783e-05, "loss": 2.224, "step": 1415 }, { "epoch": 0.045685616975626675, "grad_norm": 0.8828125, "learning_rate": 2.9988942672138596e-05, "loss": 2.2392, "step": 1416 }, { "epoch": 0.04571788082942302, "grad_norm": 0.6640625, "learning_rate": 2.9988882174512888e-05, "loss": 2.1984, "step": 1417 }, { "epoch": 0.04575014468321937, "grad_norm": 0.625, "learning_rate": 2.9988821511900314e-05, "loss": 2.2724, "step": 1418 }, { "epoch": 0.04578240853701571, "grad_norm": 0.75390625, "learning_rate": 2.9988760684301547e-05, "loss": 2.2565, "step": 1419 }, { "epoch": 0.045814672390812064, "grad_norm": 0.63671875, "learning_rate": 2.9988699691717253e-05, "loss": 2.2456, "step": 1420 }, { "epoch": 0.04584693624460841, "grad_norm": 0.58203125, "learning_rate": 2.9988638534148108e-05, "loss": 2.2307, "step": 1421 }, { "epoch": 0.04587920009840475, "grad_norm": 0.61328125, "learning_rate": 2.9988577211594783e-05, "loss": 2.2469, "step": 1422 }, { "epoch": 0.0459114639522011, "grad_norm": 0.57421875, "learning_rate": 2.9988515724057953e-05, "loss": 2.2417, "step": 1423 }, { "epoch": 0.045943727805997446, "grad_norm": 0.640625, "learning_rate": 2.9988454071538292e-05, "loss": 2.2591, "step": 1424 }, { "epoch": 0.0459759916597938, "grad_norm": 0.6015625, "learning_rate": 2.9988392254036484e-05, "loss": 2.2469, "step": 1425 }, { "epoch": 0.04600825551359014, "grad_norm": 0.45703125, "learning_rate": 2.9988330271553204e-05, "loss": 2.2411, "step": 1426 }, { "epoch": 0.046040519367386484, "grad_norm": 0.640625, "learning_rate": 2.998826812408914e-05, "loss": 2.2434, "step": 1427 }, { "epoch": 0.046072783221182835, "grad_norm": 0.67578125, "learning_rate": 2.9988205811644974e-05, "loss": 2.2608, "step": 1428 }, { "epoch": 0.04610504707497918, "grad_norm": 0.52734375, "learning_rate": 2.998814333422139e-05, "loss": 2.2115, "step": 1429 }, { "epoch": 0.04613731092877553, "grad_norm": 0.5859375, "learning_rate": 2.998808069181907e-05, "loss": 2.2299, "step": 1430 }, { "epoch": 0.04616957478257187, "grad_norm": 0.53515625, "learning_rate": 2.998801788443872e-05, "loss": 2.2648, "step": 1431 }, { "epoch": 0.04620183863636822, "grad_norm": 0.58984375, "learning_rate": 2.9987954912081014e-05, "loss": 2.2389, "step": 1432 }, { "epoch": 0.04623410249016457, "grad_norm": 0.5859375, "learning_rate": 2.9987891774746658e-05, "loss": 2.241, "step": 1433 }, { "epoch": 0.04626636634396091, "grad_norm": 0.53515625, "learning_rate": 2.9987828472436337e-05, "loss": 2.2284, "step": 1434 }, { "epoch": 0.04629863019775726, "grad_norm": 0.5, "learning_rate": 2.9987765005150752e-05, "loss": 2.195, "step": 1435 }, { "epoch": 0.046330894051553606, "grad_norm": 0.51171875, "learning_rate": 2.9987701372890606e-05, "loss": 2.2247, "step": 1436 }, { "epoch": 0.04636315790534995, "grad_norm": 0.52734375, "learning_rate": 2.9987637575656593e-05, "loss": 2.2172, "step": 1437 }, { "epoch": 0.0463954217591463, "grad_norm": 0.4921875, "learning_rate": 2.998757361344942e-05, "loss": 2.2457, "step": 1438 }, { "epoch": 0.046427685612942644, "grad_norm": 0.498046875, "learning_rate": 2.9987509486269785e-05, "loss": 2.2233, "step": 1439 }, { "epoch": 0.04645994946673899, "grad_norm": 0.5625, "learning_rate": 2.99874451941184e-05, "loss": 2.2278, "step": 1440 }, { "epoch": 0.04649221332053534, "grad_norm": 0.4765625, "learning_rate": 2.9987380736995973e-05, "loss": 2.2582, "step": 1441 }, { "epoch": 0.04652447717433168, "grad_norm": 0.5, "learning_rate": 2.9987316114903205e-05, "loss": 2.2314, "step": 1442 }, { "epoch": 0.04655674102812803, "grad_norm": 0.453125, "learning_rate": 2.9987251327840815e-05, "loss": 2.2321, "step": 1443 }, { "epoch": 0.04658900488192438, "grad_norm": 0.4765625, "learning_rate": 2.9987186375809513e-05, "loss": 2.2457, "step": 1444 }, { "epoch": 0.04662126873572072, "grad_norm": 0.474609375, "learning_rate": 2.9987121258810023e-05, "loss": 2.2471, "step": 1445 }, { "epoch": 0.04665353258951707, "grad_norm": 0.4453125, "learning_rate": 2.9987055976843043e-05, "loss": 2.2341, "step": 1446 }, { "epoch": 0.046685796443313415, "grad_norm": 0.498046875, "learning_rate": 2.998699052990931e-05, "loss": 2.2438, "step": 1447 }, { "epoch": 0.046718060297109766, "grad_norm": 0.50390625, "learning_rate": 2.9986924918009532e-05, "loss": 2.2384, "step": 1448 }, { "epoch": 0.04675032415090611, "grad_norm": 0.55078125, "learning_rate": 2.998685914114444e-05, "loss": 2.2319, "step": 1449 }, { "epoch": 0.046782588004702454, "grad_norm": 0.51953125, "learning_rate": 2.9986793199314753e-05, "loss": 2.2319, "step": 1450 }, { "epoch": 0.046814851858498804, "grad_norm": 0.59375, "learning_rate": 2.99867270925212e-05, "loss": 2.2122, "step": 1451 }, { "epoch": 0.04684711571229515, "grad_norm": 0.68359375, "learning_rate": 2.9986660820764503e-05, "loss": 2.2441, "step": 1452 }, { "epoch": 0.0468793795660915, "grad_norm": 0.74609375, "learning_rate": 2.9986594384045394e-05, "loss": 2.2604, "step": 1453 }, { "epoch": 0.04691164341988784, "grad_norm": 0.91796875, "learning_rate": 2.9986527782364606e-05, "loss": 2.2352, "step": 1454 }, { "epoch": 0.046943907273684186, "grad_norm": 1.125, "learning_rate": 2.9986461015722875e-05, "loss": 2.2685, "step": 1455 }, { "epoch": 0.04697617112748054, "grad_norm": 0.7890625, "learning_rate": 2.998639408412093e-05, "loss": 2.2183, "step": 1456 }, { "epoch": 0.04700843498127688, "grad_norm": 0.546875, "learning_rate": 2.998632698755951e-05, "loss": 2.2159, "step": 1457 }, { "epoch": 0.04704069883507323, "grad_norm": 0.65234375, "learning_rate": 2.998625972603936e-05, "loss": 2.2399, "step": 1458 }, { "epoch": 0.047072962688869575, "grad_norm": 0.62109375, "learning_rate": 2.9986192299561204e-05, "loss": 2.2503, "step": 1459 }, { "epoch": 0.04710522654266592, "grad_norm": 0.490234375, "learning_rate": 2.9986124708125798e-05, "loss": 2.242, "step": 1460 }, { "epoch": 0.04713749039646227, "grad_norm": 0.58203125, "learning_rate": 2.9986056951733884e-05, "loss": 2.2618, "step": 1461 }, { "epoch": 0.047169754250258614, "grad_norm": 0.55078125, "learning_rate": 2.9985989030386206e-05, "loss": 2.2578, "step": 1462 }, { "epoch": 0.047202018104054964, "grad_norm": 0.53515625, "learning_rate": 2.998592094408351e-05, "loss": 2.2515, "step": 1463 }, { "epoch": 0.04723428195785131, "grad_norm": 0.5234375, "learning_rate": 2.9985852692826543e-05, "loss": 2.2434, "step": 1464 }, { "epoch": 0.04726654581164765, "grad_norm": 0.44140625, "learning_rate": 2.9985784276616068e-05, "loss": 2.2496, "step": 1465 }, { "epoch": 0.047298809665444, "grad_norm": 0.5, "learning_rate": 2.9985715695452824e-05, "loss": 2.2363, "step": 1466 }, { "epoch": 0.047331073519240346, "grad_norm": 0.482421875, "learning_rate": 2.998564694933758e-05, "loss": 2.2051, "step": 1467 }, { "epoch": 0.0473633373730367, "grad_norm": 0.52734375, "learning_rate": 2.998557803827108e-05, "loss": 2.2586, "step": 1468 }, { "epoch": 0.04739560122683304, "grad_norm": 0.6171875, "learning_rate": 2.9985508962254085e-05, "loss": 2.2521, "step": 1469 }, { "epoch": 0.047427865080629385, "grad_norm": 0.70703125, "learning_rate": 2.9985439721287365e-05, "loss": 2.2531, "step": 1470 }, { "epoch": 0.047460128934425735, "grad_norm": 0.5703125, "learning_rate": 2.9985370315371668e-05, "loss": 2.2462, "step": 1471 }, { "epoch": 0.04749239278822208, "grad_norm": 0.515625, "learning_rate": 2.9985300744507767e-05, "loss": 2.2426, "step": 1472 }, { "epoch": 0.04752465664201843, "grad_norm": 0.6796875, "learning_rate": 2.9985231008696427e-05, "loss": 2.2465, "step": 1473 }, { "epoch": 0.047556920495814774, "grad_norm": 0.6484375, "learning_rate": 2.9985161107938417e-05, "loss": 2.2643, "step": 1474 }, { "epoch": 0.04758918434961112, "grad_norm": 0.490234375, "learning_rate": 2.99850910422345e-05, "loss": 2.2486, "step": 1475 }, { "epoch": 0.04762144820340747, "grad_norm": 0.6015625, "learning_rate": 2.998502081158545e-05, "loss": 2.2353, "step": 1476 }, { "epoch": 0.04765371205720381, "grad_norm": 0.671875, "learning_rate": 2.9984950415992048e-05, "loss": 2.2341, "step": 1477 }, { "epoch": 0.04768597591100016, "grad_norm": 0.5, "learning_rate": 2.9984879855455058e-05, "loss": 2.2316, "step": 1478 }, { "epoch": 0.047718239764796506, "grad_norm": 0.62109375, "learning_rate": 2.9984809129975257e-05, "loss": 2.2483, "step": 1479 }, { "epoch": 0.04775050361859285, "grad_norm": 0.609375, "learning_rate": 2.998473823955343e-05, "loss": 2.2453, "step": 1480 }, { "epoch": 0.0477827674723892, "grad_norm": 0.50390625, "learning_rate": 2.9984667184190358e-05, "loss": 2.2081, "step": 1481 }, { "epoch": 0.047815031326185545, "grad_norm": 0.56640625, "learning_rate": 2.998459596388681e-05, "loss": 2.2399, "step": 1482 }, { "epoch": 0.04784729517998189, "grad_norm": 0.50390625, "learning_rate": 2.9984524578643588e-05, "loss": 2.2334, "step": 1483 }, { "epoch": 0.04787955903377824, "grad_norm": 0.5546875, "learning_rate": 2.9984453028461468e-05, "loss": 2.2471, "step": 1484 }, { "epoch": 0.04791182288757458, "grad_norm": 0.640625, "learning_rate": 2.9984381313341236e-05, "loss": 2.2467, "step": 1485 }, { "epoch": 0.047944086741370934, "grad_norm": 0.55859375, "learning_rate": 2.998430943328369e-05, "loss": 2.2334, "step": 1486 }, { "epoch": 0.04797635059516728, "grad_norm": 0.48046875, "learning_rate": 2.9984237388289608e-05, "loss": 2.246, "step": 1487 }, { "epoch": 0.04800861444896362, "grad_norm": 0.5078125, "learning_rate": 2.9984165178359796e-05, "loss": 2.2208, "step": 1488 }, { "epoch": 0.04804087830275997, "grad_norm": 0.49609375, "learning_rate": 2.998409280349504e-05, "loss": 2.224, "step": 1489 }, { "epoch": 0.048073142156556316, "grad_norm": 0.5, "learning_rate": 2.9984020263696143e-05, "loss": 2.2045, "step": 1490 }, { "epoch": 0.048105406010352667, "grad_norm": 0.427734375, "learning_rate": 2.9983947558963897e-05, "loss": 2.2376, "step": 1491 }, { "epoch": 0.04813766986414901, "grad_norm": 0.5703125, "learning_rate": 2.9983874689299107e-05, "loss": 2.1961, "step": 1492 }, { "epoch": 0.048169933717945354, "grad_norm": 0.58984375, "learning_rate": 2.9983801654702575e-05, "loss": 2.2275, "step": 1493 }, { "epoch": 0.048202197571741705, "grad_norm": 0.5234375, "learning_rate": 2.9983728455175105e-05, "loss": 2.2386, "step": 1494 }, { "epoch": 0.04823446142553805, "grad_norm": 0.5078125, "learning_rate": 2.99836550907175e-05, "loss": 2.2409, "step": 1495 }, { "epoch": 0.0482667252793344, "grad_norm": 0.6640625, "learning_rate": 2.9983581561330567e-05, "loss": 2.2108, "step": 1496 }, { "epoch": 0.04829898913313074, "grad_norm": 0.7265625, "learning_rate": 2.998350786701512e-05, "loss": 2.2655, "step": 1497 }, { "epoch": 0.04833125298692709, "grad_norm": 0.69921875, "learning_rate": 2.9983434007771962e-05, "loss": 2.2297, "step": 1498 }, { "epoch": 0.04836351684072344, "grad_norm": 0.515625, "learning_rate": 2.9983359983601915e-05, "loss": 2.2375, "step": 1499 }, { "epoch": 0.04839578069451978, "grad_norm": 0.484375, "learning_rate": 2.99832857945058e-05, "loss": 2.2475, "step": 1500 }, { "epoch": 0.04842804454831613, "grad_norm": 0.515625, "learning_rate": 2.9983211440484412e-05, "loss": 2.2379, "step": 1501 }, { "epoch": 0.048460308402112476, "grad_norm": 0.5, "learning_rate": 2.9983136921538585e-05, "loss": 2.2165, "step": 1502 }, { "epoch": 0.04849257225590882, "grad_norm": 0.45703125, "learning_rate": 2.9983062237669134e-05, "loss": 2.2631, "step": 1503 }, { "epoch": 0.04852483610970517, "grad_norm": 0.45703125, "learning_rate": 2.9982987388876884e-05, "loss": 2.2317, "step": 1504 }, { "epoch": 0.048557099963501514, "grad_norm": 0.4921875, "learning_rate": 2.9982912375162656e-05, "loss": 2.1934, "step": 1505 }, { "epoch": 0.048589363817297865, "grad_norm": 0.51953125, "learning_rate": 2.9982837196527278e-05, "loss": 2.2324, "step": 1506 }, { "epoch": 0.04862162767109421, "grad_norm": 0.51953125, "learning_rate": 2.998276185297158e-05, "loss": 2.2284, "step": 1507 }, { "epoch": 0.04865389152489055, "grad_norm": 0.4375, "learning_rate": 2.9982686344496382e-05, "loss": 2.2321, "step": 1508 }, { "epoch": 0.0486861553786869, "grad_norm": 0.462890625, "learning_rate": 2.9982610671102528e-05, "loss": 2.2334, "step": 1509 }, { "epoch": 0.04871841923248325, "grad_norm": 0.4921875, "learning_rate": 2.9982534832790836e-05, "loss": 2.2281, "step": 1510 }, { "epoch": 0.0487506830862796, "grad_norm": 0.47265625, "learning_rate": 2.9982458829562156e-05, "loss": 2.2351, "step": 1511 }, { "epoch": 0.04878294694007594, "grad_norm": 0.462890625, "learning_rate": 2.9982382661417315e-05, "loss": 2.2425, "step": 1512 }, { "epoch": 0.048815210793872285, "grad_norm": 0.484375, "learning_rate": 2.9982306328357156e-05, "loss": 2.1959, "step": 1513 }, { "epoch": 0.048847474647668636, "grad_norm": 0.53125, "learning_rate": 2.9982229830382514e-05, "loss": 2.2273, "step": 1514 }, { "epoch": 0.04887973850146498, "grad_norm": 0.51953125, "learning_rate": 2.9982153167494237e-05, "loss": 2.2376, "step": 1515 }, { "epoch": 0.04891200235526133, "grad_norm": 0.5078125, "learning_rate": 2.9982076339693165e-05, "loss": 2.2096, "step": 1516 }, { "epoch": 0.048944266209057674, "grad_norm": 0.478515625, "learning_rate": 2.9981999346980144e-05, "loss": 2.2186, "step": 1517 }, { "epoch": 0.04897653006285402, "grad_norm": 0.451171875, "learning_rate": 2.9981922189356023e-05, "loss": 2.2209, "step": 1518 }, { "epoch": 0.04900879391665037, "grad_norm": 0.490234375, "learning_rate": 2.998184486682165e-05, "loss": 2.2435, "step": 1519 }, { "epoch": 0.04904105777044671, "grad_norm": 0.47265625, "learning_rate": 2.998176737937788e-05, "loss": 2.2437, "step": 1520 }, { "epoch": 0.04907332162424306, "grad_norm": 0.486328125, "learning_rate": 2.998168972702556e-05, "loss": 2.2309, "step": 1521 }, { "epoch": 0.04910558547803941, "grad_norm": 0.4765625, "learning_rate": 2.9981611909765547e-05, "loss": 2.1943, "step": 1522 }, { "epoch": 0.04913784933183575, "grad_norm": 0.486328125, "learning_rate": 2.9981533927598697e-05, "loss": 2.2468, "step": 1523 }, { "epoch": 0.0491701131856321, "grad_norm": 0.451171875, "learning_rate": 2.998145578052587e-05, "loss": 2.2456, "step": 1524 }, { "epoch": 0.049202377039428445, "grad_norm": 0.47265625, "learning_rate": 2.9981377468547928e-05, "loss": 2.205, "step": 1525 }, { "epoch": 0.04923464089322479, "grad_norm": 0.486328125, "learning_rate": 2.9981298991665725e-05, "loss": 2.2239, "step": 1526 }, { "epoch": 0.04926690474702114, "grad_norm": 0.50390625, "learning_rate": 2.9981220349880134e-05, "loss": 2.2219, "step": 1527 }, { "epoch": 0.04929916860081748, "grad_norm": 0.484375, "learning_rate": 2.9981141543192016e-05, "loss": 2.2314, "step": 1528 }, { "epoch": 0.049331432454613834, "grad_norm": 0.515625, "learning_rate": 2.998106257160224e-05, "loss": 2.2432, "step": 1529 }, { "epoch": 0.04936369630841018, "grad_norm": 0.546875, "learning_rate": 2.9980983435111674e-05, "loss": 2.2507, "step": 1530 }, { "epoch": 0.04939596016220652, "grad_norm": 0.546875, "learning_rate": 2.9980904133721187e-05, "loss": 2.2261, "step": 1531 }, { "epoch": 0.04942822401600287, "grad_norm": 0.57421875, "learning_rate": 2.9980824667431657e-05, "loss": 2.2039, "step": 1532 }, { "epoch": 0.049460487869799216, "grad_norm": 1.109375, "learning_rate": 2.998074503624396e-05, "loss": 2.1979, "step": 1533 }, { "epoch": 0.04949275172359557, "grad_norm": 2.0625, "learning_rate": 2.9980665240158964e-05, "loss": 2.2573, "step": 1534 }, { "epoch": 0.04952501557739191, "grad_norm": 0.89453125, "learning_rate": 2.998058527917755e-05, "loss": 2.2047, "step": 1535 }, { "epoch": 0.049557279431188254, "grad_norm": 4.5, "learning_rate": 2.9980505153300606e-05, "loss": 2.2157, "step": 1536 }, { "epoch": 0.049589543284984605, "grad_norm": 4.375, "learning_rate": 2.998042486252901e-05, "loss": 2.2255, "step": 1537 }, { "epoch": 0.04962180713878095, "grad_norm": 1.46875, "learning_rate": 2.998034440686364e-05, "loss": 2.2347, "step": 1538 }, { "epoch": 0.0496540709925773, "grad_norm": 5.09375, "learning_rate": 2.998026378630538e-05, "loss": 2.2567, "step": 1539 }, { "epoch": 0.04968633484637364, "grad_norm": 5.78125, "learning_rate": 2.9980183000855132e-05, "loss": 2.2582, "step": 1540 }, { "epoch": 0.04971859870016999, "grad_norm": 4.5, "learning_rate": 2.998010205051377e-05, "loss": 2.2544, "step": 1541 }, { "epoch": 0.04975086255396634, "grad_norm": 1.578125, "learning_rate": 2.99800209352822e-05, "loss": 2.2107, "step": 1542 }, { "epoch": 0.04978312640776268, "grad_norm": 3.953125, "learning_rate": 2.9979939655161297e-05, "loss": 2.2445, "step": 1543 }, { "epoch": 0.04981539026155903, "grad_norm": 4.40625, "learning_rate": 2.9979858210151967e-05, "loss": 2.2522, "step": 1544 }, { "epoch": 0.049847654115355376, "grad_norm": 3.453125, "learning_rate": 2.9979776600255107e-05, "loss": 2.2558, "step": 1545 }, { "epoch": 0.04987991796915172, "grad_norm": 1.7578125, "learning_rate": 2.997969482547161e-05, "loss": 2.2527, "step": 1546 }, { "epoch": 0.04991218182294807, "grad_norm": 2.34375, "learning_rate": 2.997961288580238e-05, "loss": 2.2483, "step": 1547 }, { "epoch": 0.049944445676744414, "grad_norm": 2.578125, "learning_rate": 2.9979530781248315e-05, "loss": 2.2359, "step": 1548 }, { "epoch": 0.049976709530540765, "grad_norm": 2.171875, "learning_rate": 2.997944851181033e-05, "loss": 2.2187, "step": 1549 }, { "epoch": 0.05000897338433711, "grad_norm": 1.2734375, "learning_rate": 2.997936607748931e-05, "loss": 2.2207, "step": 1550 }, { "epoch": 0.05004123723813345, "grad_norm": 1.5390625, "learning_rate": 2.9979283478286183e-05, "loss": 2.2123, "step": 1551 }, { "epoch": 0.050073501091929803, "grad_norm": 1.71875, "learning_rate": 2.9979200714201846e-05, "loss": 2.224, "step": 1552 }, { "epoch": 0.05010576494572615, "grad_norm": 1.234375, "learning_rate": 2.9979117785237214e-05, "loss": 2.2283, "step": 1553 }, { "epoch": 0.0501380287995225, "grad_norm": 0.90625, "learning_rate": 2.99790346913932e-05, "loss": 2.2191, "step": 1554 }, { "epoch": 0.05017029265331884, "grad_norm": 1.296875, "learning_rate": 2.997895143267072e-05, "loss": 2.2399, "step": 1555 }, { "epoch": 0.050202556507115186, "grad_norm": 1.25, "learning_rate": 2.9978868009070684e-05, "loss": 2.2353, "step": 1556 }, { "epoch": 0.050234820360911536, "grad_norm": 0.69140625, "learning_rate": 2.9978784420594018e-05, "loss": 2.2275, "step": 1557 }, { "epoch": 0.05026708421470788, "grad_norm": 0.97265625, "learning_rate": 2.9978700667241637e-05, "loss": 2.1841, "step": 1558 }, { "epoch": 0.05029934806850423, "grad_norm": 0.92578125, "learning_rate": 2.9978616749014466e-05, "loss": 2.2278, "step": 1559 }, { "epoch": 0.050331611922300575, "grad_norm": 0.69921875, "learning_rate": 2.997853266591343e-05, "loss": 2.213, "step": 1560 }, { "epoch": 0.05036387577609692, "grad_norm": 0.7109375, "learning_rate": 2.9978448417939448e-05, "loss": 2.2169, "step": 1561 }, { "epoch": 0.05039613962989327, "grad_norm": 0.765625, "learning_rate": 2.9978364005093452e-05, "loss": 2.2252, "step": 1562 }, { "epoch": 0.05042840348368961, "grad_norm": 0.6796875, "learning_rate": 2.997827942737637e-05, "loss": 2.2316, "step": 1563 }, { "epoch": 0.050460667337485964, "grad_norm": 0.63671875, "learning_rate": 2.9978194684789133e-05, "loss": 2.2591, "step": 1564 }, { "epoch": 0.05049293119128231, "grad_norm": 0.671875, "learning_rate": 2.9978109777332674e-05, "loss": 2.1805, "step": 1565 }, { "epoch": 0.05052519504507865, "grad_norm": 0.734375, "learning_rate": 2.997802470500793e-05, "loss": 2.2705, "step": 1566 }, { "epoch": 0.050557458898875, "grad_norm": 0.625, "learning_rate": 2.997793946781584e-05, "loss": 2.2426, "step": 1567 }, { "epoch": 0.050589722752671346, "grad_norm": 0.56640625, "learning_rate": 2.9977854065757327e-05, "loss": 2.2327, "step": 1568 }, { "epoch": 0.05062198660646769, "grad_norm": 0.65234375, "learning_rate": 2.9977768498833348e-05, "loss": 2.1986, "step": 1569 }, { "epoch": 0.05065425046026404, "grad_norm": 0.6171875, "learning_rate": 2.9977682767044838e-05, "loss": 2.2115, "step": 1570 }, { "epoch": 0.050686514314060384, "grad_norm": 0.482421875, "learning_rate": 2.9977596870392742e-05, "loss": 2.2281, "step": 1571 }, { "epoch": 0.050718778167856735, "grad_norm": 0.61328125, "learning_rate": 2.9977510808878e-05, "loss": 2.1806, "step": 1572 }, { "epoch": 0.05075104202165308, "grad_norm": 0.5859375, "learning_rate": 2.9977424582501567e-05, "loss": 2.1216, "step": 1573 }, { "epoch": 0.05078330587544942, "grad_norm": 0.44140625, "learning_rate": 2.9977338191264386e-05, "loss": 2.1214, "step": 1574 }, { "epoch": 0.05081556972924577, "grad_norm": 0.55078125, "learning_rate": 2.9977251635167413e-05, "loss": 2.2096, "step": 1575 }, { "epoch": 0.05084783358304212, "grad_norm": 0.53125, "learning_rate": 2.99771649142116e-05, "loss": 2.2309, "step": 1576 }, { "epoch": 0.05088009743683847, "grad_norm": 0.4765625, "learning_rate": 2.99770780283979e-05, "loss": 2.2326, "step": 1577 }, { "epoch": 0.05091236129063481, "grad_norm": 0.50390625, "learning_rate": 2.997699097772727e-05, "loss": 2.2178, "step": 1578 }, { "epoch": 0.050944625144431155, "grad_norm": 0.4921875, "learning_rate": 2.9976903762200662e-05, "loss": 2.2333, "step": 1579 }, { "epoch": 0.050976888998227506, "grad_norm": 0.462890625, "learning_rate": 2.9976816381819045e-05, "loss": 2.1894, "step": 1580 }, { "epoch": 0.05100915285202385, "grad_norm": 0.4765625, "learning_rate": 2.9976728836583383e-05, "loss": 2.2102, "step": 1581 }, { "epoch": 0.0510414167058202, "grad_norm": 0.474609375, "learning_rate": 2.997664112649463e-05, "loss": 2.2079, "step": 1582 }, { "epoch": 0.051073680559616544, "grad_norm": 0.50390625, "learning_rate": 2.9976553251553754e-05, "loss": 2.2333, "step": 1583 }, { "epoch": 0.05110594441341289, "grad_norm": 0.455078125, "learning_rate": 2.9976465211761724e-05, "loss": 2.2145, "step": 1584 }, { "epoch": 0.05113820826720924, "grad_norm": 0.46875, "learning_rate": 2.9976377007119512e-05, "loss": 2.2409, "step": 1585 }, { "epoch": 0.05117047212100558, "grad_norm": 0.46484375, "learning_rate": 2.9976288637628083e-05, "loss": 2.1903, "step": 1586 }, { "epoch": 0.05120273597480193, "grad_norm": 0.48046875, "learning_rate": 2.9976200103288417e-05, "loss": 2.2024, "step": 1587 }, { "epoch": 0.05123499982859828, "grad_norm": 0.51953125, "learning_rate": 2.997611140410148e-05, "loss": 2.212, "step": 1588 }, { "epoch": 0.05126726368239462, "grad_norm": 0.48046875, "learning_rate": 2.9976022540068254e-05, "loss": 2.2008, "step": 1589 }, { "epoch": 0.05129952753619097, "grad_norm": 0.46484375, "learning_rate": 2.9975933511189712e-05, "loss": 2.2074, "step": 1590 }, { "epoch": 0.051331791389987315, "grad_norm": 0.5078125, "learning_rate": 2.9975844317466846e-05, "loss": 2.2013, "step": 1591 }, { "epoch": 0.051364055243783666, "grad_norm": 0.5546875, "learning_rate": 2.9975754958900622e-05, "loss": 2.2226, "step": 1592 }, { "epoch": 0.05139631909758001, "grad_norm": 0.484375, "learning_rate": 2.997566543549203e-05, "loss": 2.2507, "step": 1593 }, { "epoch": 0.05142858295137635, "grad_norm": 0.52734375, "learning_rate": 2.997557574724206e-05, "loss": 2.2469, "step": 1594 }, { "epoch": 0.051460846805172704, "grad_norm": 0.46484375, "learning_rate": 2.9975485894151696e-05, "loss": 2.2131, "step": 1595 }, { "epoch": 0.05149311065896905, "grad_norm": 0.494140625, "learning_rate": 2.9975395876221927e-05, "loss": 2.2497, "step": 1596 }, { "epoch": 0.0515253745127654, "grad_norm": 0.48046875, "learning_rate": 2.9975305693453746e-05, "loss": 2.2077, "step": 1597 }, { "epoch": 0.05155763836656174, "grad_norm": 0.515625, "learning_rate": 2.9975215345848138e-05, "loss": 2.2059, "step": 1598 }, { "epoch": 0.051589902220358086, "grad_norm": 0.5, "learning_rate": 2.99751248334061e-05, "loss": 2.2532, "step": 1599 }, { "epoch": 0.05162216607415444, "grad_norm": 0.4453125, "learning_rate": 2.997503415612864e-05, "loss": 2.2102, "step": 1600 }, { "epoch": 0.05165442992795078, "grad_norm": 0.55859375, "learning_rate": 2.9974943314016744e-05, "loss": 2.2584, "step": 1601 }, { "epoch": 0.05168669378174713, "grad_norm": 0.546875, "learning_rate": 2.997485230707141e-05, "loss": 2.2392, "step": 1602 }, { "epoch": 0.051718957635543475, "grad_norm": 0.44140625, "learning_rate": 2.997476113529365e-05, "loss": 2.2148, "step": 1603 }, { "epoch": 0.05175122148933982, "grad_norm": 0.51953125, "learning_rate": 2.997466979868446e-05, "loss": 2.2106, "step": 1604 }, { "epoch": 0.05178348534313617, "grad_norm": 0.55859375, "learning_rate": 2.9974578297244846e-05, "loss": 2.2484, "step": 1605 }, { "epoch": 0.05181574919693251, "grad_norm": 0.47265625, "learning_rate": 2.997448663097582e-05, "loss": 2.2479, "step": 1606 }, { "epoch": 0.051848013050728864, "grad_norm": 0.431640625, "learning_rate": 2.9974394799878385e-05, "loss": 2.2255, "step": 1607 }, { "epoch": 0.05188027690452521, "grad_norm": 0.48046875, "learning_rate": 2.9974302803953554e-05, "loss": 2.2273, "step": 1608 }, { "epoch": 0.05191254075832155, "grad_norm": 0.4296875, "learning_rate": 2.9974210643202345e-05, "loss": 2.2099, "step": 1609 }, { "epoch": 0.0519448046121179, "grad_norm": 0.46484375, "learning_rate": 2.9974118317625765e-05, "loss": 2.2186, "step": 1610 }, { "epoch": 0.051977068465914246, "grad_norm": 0.41015625, "learning_rate": 2.997402582722483e-05, "loss": 2.2376, "step": 1611 }, { "epoch": 0.0520093323197106, "grad_norm": 0.474609375, "learning_rate": 2.9973933172000565e-05, "loss": 2.2477, "step": 1612 }, { "epoch": 0.05204159617350694, "grad_norm": 0.470703125, "learning_rate": 2.9973840351953987e-05, "loss": 2.2516, "step": 1613 }, { "epoch": 0.052073860027303284, "grad_norm": 0.5078125, "learning_rate": 2.9973747367086115e-05, "loss": 2.2235, "step": 1614 }, { "epoch": 0.052106123881099635, "grad_norm": 0.52734375, "learning_rate": 2.9973654217397973e-05, "loss": 2.2295, "step": 1615 }, { "epoch": 0.05213838773489598, "grad_norm": 0.53515625, "learning_rate": 2.9973560902890587e-05, "loss": 2.2082, "step": 1616 }, { "epoch": 0.05217065158869232, "grad_norm": 0.5390625, "learning_rate": 2.9973467423564985e-05, "loss": 2.2115, "step": 1617 }, { "epoch": 0.05220291544248867, "grad_norm": 0.4921875, "learning_rate": 2.9973373779422198e-05, "loss": 2.2151, "step": 1618 }, { "epoch": 0.05223517929628502, "grad_norm": 0.44921875, "learning_rate": 2.997327997046325e-05, "loss": 2.2299, "step": 1619 }, { "epoch": 0.05226744315008137, "grad_norm": 0.52734375, "learning_rate": 2.997318599668918e-05, "loss": 2.2064, "step": 1620 }, { "epoch": 0.05229970700387771, "grad_norm": 0.5703125, "learning_rate": 2.9973091858101023e-05, "loss": 2.2064, "step": 1621 }, { "epoch": 0.052331970857674055, "grad_norm": 0.52734375, "learning_rate": 2.9972997554699808e-05, "loss": 2.1749, "step": 1622 }, { "epoch": 0.052364234711470406, "grad_norm": 0.51953125, "learning_rate": 2.9972903086486584e-05, "loss": 2.2335, "step": 1623 }, { "epoch": 0.05239649856526675, "grad_norm": 0.515625, "learning_rate": 2.9972808453462375e-05, "loss": 2.2108, "step": 1624 }, { "epoch": 0.0524287624190631, "grad_norm": 0.4765625, "learning_rate": 2.9972713655628235e-05, "loss": 2.2046, "step": 1625 }, { "epoch": 0.052461026272859444, "grad_norm": 0.515625, "learning_rate": 2.9972618692985207e-05, "loss": 2.2177, "step": 1626 }, { "epoch": 0.05249329012665579, "grad_norm": 0.515625, "learning_rate": 2.997252356553434e-05, "loss": 2.2297, "step": 1627 }, { "epoch": 0.05252555398045214, "grad_norm": 0.5078125, "learning_rate": 2.9972428273276662e-05, "loss": 2.2029, "step": 1628 }, { "epoch": 0.05255781783424848, "grad_norm": 0.55859375, "learning_rate": 2.9972332816213242e-05, "loss": 2.2512, "step": 1629 }, { "epoch": 0.05259008168804483, "grad_norm": 0.62890625, "learning_rate": 2.9972237194345122e-05, "loss": 2.2375, "step": 1630 }, { "epoch": 0.05262234554184118, "grad_norm": 0.69921875, "learning_rate": 2.9972141407673355e-05, "loss": 2.2254, "step": 1631 }, { "epoch": 0.05265460939563752, "grad_norm": 0.79296875, "learning_rate": 2.9972045456199e-05, "loss": 2.2263, "step": 1632 }, { "epoch": 0.05268687324943387, "grad_norm": 0.8671875, "learning_rate": 2.9971949339923105e-05, "loss": 2.2166, "step": 1633 }, { "epoch": 0.052719137103230215, "grad_norm": 0.77734375, "learning_rate": 2.9971853058846735e-05, "loss": 2.2135, "step": 1634 }, { "epoch": 0.052751400957026566, "grad_norm": 0.55859375, "learning_rate": 2.9971756612970947e-05, "loss": 2.2243, "step": 1635 }, { "epoch": 0.05278366481082291, "grad_norm": 0.64453125, "learning_rate": 2.99716600022968e-05, "loss": 2.204, "step": 1636 }, { "epoch": 0.052815928664619254, "grad_norm": 0.65625, "learning_rate": 2.9971563226825366e-05, "loss": 2.2351, "step": 1637 }, { "epoch": 0.052848192518415604, "grad_norm": 0.515625, "learning_rate": 2.9971466286557703e-05, "loss": 2.2044, "step": 1638 }, { "epoch": 0.05288045637221195, "grad_norm": 0.578125, "learning_rate": 2.997136918149488e-05, "loss": 2.1828, "step": 1639 }, { "epoch": 0.0529127202260083, "grad_norm": 0.49609375, "learning_rate": 2.997127191163796e-05, "loss": 2.2414, "step": 1640 }, { "epoch": 0.05294498407980464, "grad_norm": 0.466796875, "learning_rate": 2.997117447698802e-05, "loss": 2.1776, "step": 1641 }, { "epoch": 0.052977247933600986, "grad_norm": 0.50390625, "learning_rate": 2.9971076877546136e-05, "loss": 2.2388, "step": 1642 }, { "epoch": 0.05300951178739734, "grad_norm": 0.49609375, "learning_rate": 2.9970979113313378e-05, "loss": 2.2313, "step": 1643 }, { "epoch": 0.05304177564119368, "grad_norm": 0.416015625, "learning_rate": 2.9970881184290818e-05, "loss": 2.1947, "step": 1644 }, { "epoch": 0.05307403949499003, "grad_norm": 0.48046875, "learning_rate": 2.9970783090479542e-05, "loss": 2.2249, "step": 1645 }, { "epoch": 0.053106303348786375, "grad_norm": 0.51953125, "learning_rate": 2.9970684831880626e-05, "loss": 2.2021, "step": 1646 }, { "epoch": 0.05313856720258272, "grad_norm": 0.515625, "learning_rate": 2.9970586408495146e-05, "loss": 2.2208, "step": 1647 }, { "epoch": 0.05317083105637907, "grad_norm": 0.41015625, "learning_rate": 2.9970487820324198e-05, "loss": 2.2367, "step": 1648 }, { "epoch": 0.053203094910175414, "grad_norm": 0.4609375, "learning_rate": 2.9970389067368854e-05, "loss": 2.2105, "step": 1649 }, { "epoch": 0.053235358763971764, "grad_norm": 0.4921875, "learning_rate": 2.9970290149630206e-05, "loss": 2.2393, "step": 1650 }, { "epoch": 0.05326762261776811, "grad_norm": 0.458984375, "learning_rate": 2.9970191067109348e-05, "loss": 2.2374, "step": 1651 }, { "epoch": 0.05329988647156445, "grad_norm": 0.490234375, "learning_rate": 2.9970091819807365e-05, "loss": 2.2308, "step": 1652 }, { "epoch": 0.0533321503253608, "grad_norm": 0.474609375, "learning_rate": 2.9969992407725348e-05, "loss": 2.2417, "step": 1653 }, { "epoch": 0.053364414179157146, "grad_norm": 0.486328125, "learning_rate": 2.9969892830864397e-05, "loss": 2.2025, "step": 1654 }, { "epoch": 0.0533966780329535, "grad_norm": 0.447265625, "learning_rate": 2.9969793089225607e-05, "loss": 2.1366, "step": 1655 }, { "epoch": 0.05342894188674984, "grad_norm": 0.4296875, "learning_rate": 2.996969318281007e-05, "loss": 2.1545, "step": 1656 }, { "epoch": 0.053461205740546185, "grad_norm": 0.455078125, "learning_rate": 2.9969593111618894e-05, "loss": 2.1801, "step": 1657 }, { "epoch": 0.053493469594342535, "grad_norm": 0.490234375, "learning_rate": 2.996949287565317e-05, "loss": 2.1468, "step": 1658 }, { "epoch": 0.05352573344813888, "grad_norm": 0.498046875, "learning_rate": 2.9969392474914012e-05, "loss": 2.1603, "step": 1659 }, { "epoch": 0.05355799730193522, "grad_norm": 0.51953125, "learning_rate": 2.996929190940252e-05, "loss": 2.1782, "step": 1660 }, { "epoch": 0.053590261155731574, "grad_norm": 0.59765625, "learning_rate": 2.99691911791198e-05, "loss": 2.1818, "step": 1661 }, { "epoch": 0.05362252500952792, "grad_norm": 0.482421875, "learning_rate": 2.9969090284066966e-05, "loss": 2.1492, "step": 1662 }, { "epoch": 0.05365478886332427, "grad_norm": 0.515625, "learning_rate": 2.9968989224245122e-05, "loss": 2.1473, "step": 1663 }, { "epoch": 0.05368705271712061, "grad_norm": 0.5546875, "learning_rate": 2.9968887999655384e-05, "loss": 2.1551, "step": 1664 }, { "epoch": 0.053719316570916956, "grad_norm": 0.5078125, "learning_rate": 2.996878661029886e-05, "loss": 2.1526, "step": 1665 }, { "epoch": 0.053751580424713306, "grad_norm": 0.5078125, "learning_rate": 2.996868505617668e-05, "loss": 2.1241, "step": 1666 }, { "epoch": 0.05378384427850965, "grad_norm": 0.447265625, "learning_rate": 2.996858333728995e-05, "loss": 2.1588, "step": 1667 }, { "epoch": 0.053816108132306, "grad_norm": 0.52734375, "learning_rate": 2.9968481453639792e-05, "loss": 2.1688, "step": 1668 }, { "epoch": 0.053848371986102345, "grad_norm": 0.5, "learning_rate": 2.9968379405227333e-05, "loss": 2.1465, "step": 1669 }, { "epoch": 0.05388063583989869, "grad_norm": 0.46875, "learning_rate": 2.996827719205369e-05, "loss": 2.16, "step": 1670 }, { "epoch": 0.05391289969369504, "grad_norm": 0.482421875, "learning_rate": 2.9968174814119986e-05, "loss": 2.136, "step": 1671 }, { "epoch": 0.05394516354749138, "grad_norm": 0.48828125, "learning_rate": 2.9968072271427353e-05, "loss": 2.1135, "step": 1672 }, { "epoch": 0.053977427401287734, "grad_norm": 0.58984375, "learning_rate": 2.996796956397692e-05, "loss": 2.1603, "step": 1673 }, { "epoch": 0.05400969125508408, "grad_norm": 0.6328125, "learning_rate": 2.9967866691769818e-05, "loss": 2.1727, "step": 1674 }, { "epoch": 0.05404195510888042, "grad_norm": 0.828125, "learning_rate": 2.9967763654807175e-05, "loss": 2.1389, "step": 1675 }, { "epoch": 0.05407421896267677, "grad_norm": 1.2578125, "learning_rate": 2.9967660453090122e-05, "loss": 2.1629, "step": 1676 }, { "epoch": 0.054106482816473116, "grad_norm": 1.0078125, "learning_rate": 2.9967557086619808e-05, "loss": 2.1292, "step": 1677 }, { "epoch": 0.054138746670269466, "grad_norm": 0.63671875, "learning_rate": 2.9967453555397364e-05, "loss": 2.1199, "step": 1678 }, { "epoch": 0.05417101052406581, "grad_norm": 0.7578125, "learning_rate": 2.9967349859423922e-05, "loss": 2.1221, "step": 1679 }, { "epoch": 0.054203274377862154, "grad_norm": 0.703125, "learning_rate": 2.9967245998700634e-05, "loss": 2.1255, "step": 1680 }, { "epoch": 0.054235538231658505, "grad_norm": 0.54296875, "learning_rate": 2.9967141973228644e-05, "loss": 2.118, "step": 1681 }, { "epoch": 0.05426780208545485, "grad_norm": 0.8125, "learning_rate": 2.9967037783009085e-05, "loss": 2.1574, "step": 1682 }, { "epoch": 0.0543000659392512, "grad_norm": 0.59765625, "learning_rate": 2.996693342804312e-05, "loss": 2.154, "step": 1683 }, { "epoch": 0.05433232979304754, "grad_norm": 0.56640625, "learning_rate": 2.9966828908331877e-05, "loss": 2.1441, "step": 1684 }, { "epoch": 0.05436459364684389, "grad_norm": 0.59375, "learning_rate": 2.996672422387653e-05, "loss": 2.179, "step": 1685 }, { "epoch": 0.05439685750064024, "grad_norm": 0.486328125, "learning_rate": 2.9966619374678215e-05, "loss": 2.1851, "step": 1686 }, { "epoch": 0.05442912135443658, "grad_norm": 0.52734375, "learning_rate": 2.9966514360738088e-05, "loss": 2.1158, "step": 1687 }, { "epoch": 0.05446138520823293, "grad_norm": 0.470703125, "learning_rate": 2.9966409182057314e-05, "loss": 2.1561, "step": 1688 }, { "epoch": 0.054493649062029276, "grad_norm": 0.5, "learning_rate": 2.996630383863704e-05, "loss": 2.1907, "step": 1689 }, { "epoch": 0.05452591291582562, "grad_norm": 0.48046875, "learning_rate": 2.996619833047843e-05, "loss": 2.1685, "step": 1690 }, { "epoch": 0.05455817676962197, "grad_norm": 0.5078125, "learning_rate": 2.996609265758265e-05, "loss": 2.1227, "step": 1691 }, { "epoch": 0.054590440623418314, "grad_norm": 0.453125, "learning_rate": 2.9965986819950857e-05, "loss": 2.1577, "step": 1692 }, { "epoch": 0.054622704477214665, "grad_norm": 0.470703125, "learning_rate": 2.9965880817584216e-05, "loss": 2.1505, "step": 1693 }, { "epoch": 0.05465496833101101, "grad_norm": 0.498046875, "learning_rate": 2.9965774650483898e-05, "loss": 2.152, "step": 1694 }, { "epoch": 0.05468723218480735, "grad_norm": 0.51953125, "learning_rate": 2.9965668318651067e-05, "loss": 2.1349, "step": 1695 }, { "epoch": 0.0547194960386037, "grad_norm": 0.4609375, "learning_rate": 2.996556182208689e-05, "loss": 2.1659, "step": 1696 }, { "epoch": 0.05475175989240005, "grad_norm": 0.498046875, "learning_rate": 2.9965455160792556e-05, "loss": 2.1204, "step": 1697 }, { "epoch": 0.0547840237461964, "grad_norm": 0.51171875, "learning_rate": 2.996534833476922e-05, "loss": 2.1678, "step": 1698 }, { "epoch": 0.05481628759999274, "grad_norm": 0.453125, "learning_rate": 2.9965241344018065e-05, "loss": 2.139, "step": 1699 }, { "epoch": 0.054848551453789085, "grad_norm": 0.447265625, "learning_rate": 2.996513418854027e-05, "loss": 2.1566, "step": 1700 }, { "epoch": 0.054880815307585436, "grad_norm": 0.453125, "learning_rate": 2.9965026868337018e-05, "loss": 2.1389, "step": 1701 }, { "epoch": 0.05491307916138178, "grad_norm": 0.458984375, "learning_rate": 2.996491938340948e-05, "loss": 2.1654, "step": 1702 }, { "epoch": 0.05494534301517812, "grad_norm": 0.4296875, "learning_rate": 2.9964811733758853e-05, "loss": 2.135, "step": 1703 }, { "epoch": 0.054977606868974474, "grad_norm": 0.447265625, "learning_rate": 2.996470391938631e-05, "loss": 2.1791, "step": 1704 }, { "epoch": 0.05500987072277082, "grad_norm": 0.45703125, "learning_rate": 2.996459594029304e-05, "loss": 2.1829, "step": 1705 }, { "epoch": 0.05504213457656717, "grad_norm": 0.484375, "learning_rate": 2.9964487796480237e-05, "loss": 2.1416, "step": 1706 }, { "epoch": 0.05507439843036351, "grad_norm": 0.55859375, "learning_rate": 2.9964379487949087e-05, "loss": 2.1498, "step": 1707 }, { "epoch": 0.055106662284159856, "grad_norm": 0.68359375, "learning_rate": 2.996427101470078e-05, "loss": 2.1377, "step": 1708 }, { "epoch": 0.05513892613795621, "grad_norm": 0.53515625, "learning_rate": 2.9964162376736514e-05, "loss": 2.1833, "step": 1709 }, { "epoch": 0.05517118999175255, "grad_norm": 0.50390625, "learning_rate": 2.996405357405749e-05, "loss": 2.1105, "step": 1710 }, { "epoch": 0.0552034538455489, "grad_norm": 0.484375, "learning_rate": 2.9963944606664892e-05, "loss": 2.149, "step": 1711 }, { "epoch": 0.055235717699345245, "grad_norm": 0.54296875, "learning_rate": 2.996383547455993e-05, "loss": 2.1481, "step": 1712 }, { "epoch": 0.05526798155314159, "grad_norm": 0.51171875, "learning_rate": 2.9963726177743803e-05, "loss": 2.1746, "step": 1713 }, { "epoch": 0.05530024540693794, "grad_norm": 0.494140625, "learning_rate": 2.996361671621771e-05, "loss": 2.138, "step": 1714 }, { "epoch": 0.05533250926073428, "grad_norm": 0.482421875, "learning_rate": 2.9963507089982862e-05, "loss": 2.1507, "step": 1715 }, { "epoch": 0.055364773114530634, "grad_norm": 0.53125, "learning_rate": 2.996339729904046e-05, "loss": 2.1505, "step": 1716 }, { "epoch": 0.05539703696832698, "grad_norm": 0.51953125, "learning_rate": 2.9963287343391717e-05, "loss": 2.1433, "step": 1717 }, { "epoch": 0.05542930082212332, "grad_norm": 0.4765625, "learning_rate": 2.996317722303784e-05, "loss": 2.1124, "step": 1718 }, { "epoch": 0.05546156467591967, "grad_norm": 0.447265625, "learning_rate": 2.9963066937980045e-05, "loss": 2.1604, "step": 1719 }, { "epoch": 0.055493828529716016, "grad_norm": 0.5078125, "learning_rate": 2.996295648821954e-05, "loss": 2.1113, "step": 1720 }, { "epoch": 0.05552609238351237, "grad_norm": 0.48828125, "learning_rate": 2.996284587375755e-05, "loss": 2.1218, "step": 1721 }, { "epoch": 0.05555835623730871, "grad_norm": 0.48828125, "learning_rate": 2.996273509459528e-05, "loss": 2.1338, "step": 1722 }, { "epoch": 0.055590620091105054, "grad_norm": 0.515625, "learning_rate": 2.996262415073396e-05, "loss": 2.145, "step": 1723 }, { "epoch": 0.055622883944901405, "grad_norm": 0.55859375, "learning_rate": 2.996251304217481e-05, "loss": 2.1644, "step": 1724 }, { "epoch": 0.05565514779869775, "grad_norm": 0.52734375, "learning_rate": 2.9962401768919046e-05, "loss": 2.1824, "step": 1725 }, { "epoch": 0.0556874116524941, "grad_norm": 0.453125, "learning_rate": 2.9962290330967897e-05, "loss": 2.1352, "step": 1726 }, { "epoch": 0.05571967550629044, "grad_norm": 0.5234375, "learning_rate": 2.9962178728322593e-05, "loss": 2.1285, "step": 1727 }, { "epoch": 0.05575193936008679, "grad_norm": 0.53515625, "learning_rate": 2.9962066960984363e-05, "loss": 2.1609, "step": 1728 }, { "epoch": 0.05578420321388314, "grad_norm": 0.439453125, "learning_rate": 2.9961955028954428e-05, "loss": 2.1427, "step": 1729 }, { "epoch": 0.05581646706767948, "grad_norm": 0.546875, "learning_rate": 2.9961842932234026e-05, "loss": 2.1512, "step": 1730 }, { "epoch": 0.05584873092147583, "grad_norm": 0.578125, "learning_rate": 2.996173067082439e-05, "loss": 2.1148, "step": 1731 }, { "epoch": 0.055880994775272176, "grad_norm": 0.84375, "learning_rate": 2.996161824472676e-05, "loss": 2.1867, "step": 1732 }, { "epoch": 0.05591325862906852, "grad_norm": 1.640625, "learning_rate": 2.996150565394237e-05, "loss": 2.0869, "step": 1733 }, { "epoch": 0.05594552248286487, "grad_norm": 0.55078125, "learning_rate": 2.9961392898472454e-05, "loss": 2.127, "step": 1734 }, { "epoch": 0.055977786336661214, "grad_norm": 1.0546875, "learning_rate": 2.996127997831826e-05, "loss": 2.1327, "step": 1735 }, { "epoch": 0.056010050190457565, "grad_norm": 1.1796875, "learning_rate": 2.996116689348103e-05, "loss": 2.132, "step": 1736 }, { "epoch": 0.05604231404425391, "grad_norm": 0.67578125, "learning_rate": 2.996105364396201e-05, "loss": 2.1466, "step": 1737 }, { "epoch": 0.05607457789805025, "grad_norm": 1.03125, "learning_rate": 2.996094022976244e-05, "loss": 2.1487, "step": 1738 }, { "epoch": 0.0561068417518466, "grad_norm": 0.77734375, "learning_rate": 2.9960826650883577e-05, "loss": 2.1452, "step": 1739 }, { "epoch": 0.05613910560564295, "grad_norm": 0.59375, "learning_rate": 2.9960712907326666e-05, "loss": 2.1411, "step": 1740 }, { "epoch": 0.0561713694594393, "grad_norm": 0.75, "learning_rate": 2.996059899909296e-05, "loss": 2.1545, "step": 1741 }, { "epoch": 0.05620363331323564, "grad_norm": 0.5, "learning_rate": 2.9960484926183712e-05, "loss": 2.1575, "step": 1742 }, { "epoch": 0.056235897167031985, "grad_norm": 0.70703125, "learning_rate": 2.996037068860018e-05, "loss": 2.136, "step": 1743 }, { "epoch": 0.056268161020828336, "grad_norm": 0.546875, "learning_rate": 2.996025628634362e-05, "loss": 2.1436, "step": 1744 }, { "epoch": 0.05630042487462468, "grad_norm": 0.68359375, "learning_rate": 2.9960141719415287e-05, "loss": 2.2395, "step": 1745 }, { "epoch": 0.056332688728421024, "grad_norm": 0.6796875, "learning_rate": 2.996002698781645e-05, "loss": 2.2282, "step": 1746 }, { "epoch": 0.056364952582217374, "grad_norm": 0.6875, "learning_rate": 2.9959912091548368e-05, "loss": 2.2346, "step": 1747 }, { "epoch": 0.05639721643601372, "grad_norm": 0.66796875, "learning_rate": 2.9959797030612305e-05, "loss": 2.2413, "step": 1748 }, { "epoch": 0.05642948028981007, "grad_norm": 0.6796875, "learning_rate": 2.995968180500953e-05, "loss": 2.2129, "step": 1749 }, { "epoch": 0.05646174414360641, "grad_norm": 0.66015625, "learning_rate": 2.9959566414741308e-05, "loss": 2.2321, "step": 1750 }, { "epoch": 0.056494007997402756, "grad_norm": 0.5859375, "learning_rate": 2.995945085980891e-05, "loss": 2.1864, "step": 1751 }, { "epoch": 0.05652627185119911, "grad_norm": 0.56640625, "learning_rate": 2.9959335140213615e-05, "loss": 2.2105, "step": 1752 }, { "epoch": 0.05655853570499545, "grad_norm": 0.52734375, "learning_rate": 2.995921925595668e-05, "loss": 2.172, "step": 1753 }, { "epoch": 0.0565907995587918, "grad_norm": 0.53125, "learning_rate": 2.99591032070394e-05, "loss": 2.2034, "step": 1754 }, { "epoch": 0.056623063412588145, "grad_norm": 0.4921875, "learning_rate": 2.995898699346304e-05, "loss": 2.2015, "step": 1755 }, { "epoch": 0.05665532726638449, "grad_norm": 0.515625, "learning_rate": 2.9958870615228883e-05, "loss": 2.2056, "step": 1756 }, { "epoch": 0.05668759112018084, "grad_norm": 0.515625, "learning_rate": 2.995875407233821e-05, "loss": 2.2058, "step": 1757 }, { "epoch": 0.056719854973977184, "grad_norm": 0.494140625, "learning_rate": 2.9958637364792302e-05, "loss": 2.2276, "step": 1758 }, { "epoch": 0.056752118827773534, "grad_norm": 0.5, "learning_rate": 2.9958520492592447e-05, "loss": 2.2106, "step": 1759 }, { "epoch": 0.05678438268156988, "grad_norm": 0.46875, "learning_rate": 2.995840345573993e-05, "loss": 2.2373, "step": 1760 }, { "epoch": 0.05681664653536622, "grad_norm": 0.51171875, "learning_rate": 2.995828625423604e-05, "loss": 2.2139, "step": 1761 }, { "epoch": 0.05684891038916257, "grad_norm": 0.423828125, "learning_rate": 2.9958168888082064e-05, "loss": 2.1961, "step": 1762 }, { "epoch": 0.056881174242958916, "grad_norm": 0.5078125, "learning_rate": 2.9958051357279293e-05, "loss": 2.1917, "step": 1763 }, { "epoch": 0.05691343809675527, "grad_norm": 0.42578125, "learning_rate": 2.9957933661829028e-05, "loss": 2.2355, "step": 1764 }, { "epoch": 0.05694570195055161, "grad_norm": 0.466796875, "learning_rate": 2.995781580173256e-05, "loss": 2.2341, "step": 1765 }, { "epoch": 0.056977965804347955, "grad_norm": 0.45703125, "learning_rate": 2.9957697776991188e-05, "loss": 2.186, "step": 1766 }, { "epoch": 0.057010229658144305, "grad_norm": 0.51953125, "learning_rate": 2.9957579587606202e-05, "loss": 2.213, "step": 1767 }, { "epoch": 0.05704249351194065, "grad_norm": 0.451171875, "learning_rate": 2.9957461233578916e-05, "loss": 2.1917, "step": 1768 }, { "epoch": 0.057074757365737, "grad_norm": 0.5390625, "learning_rate": 2.9957342714910627e-05, "loss": 2.2027, "step": 1769 }, { "epoch": 0.057107021219533344, "grad_norm": 0.466796875, "learning_rate": 2.9957224031602637e-05, "loss": 2.1883, "step": 1770 }, { "epoch": 0.05713928507332969, "grad_norm": 0.486328125, "learning_rate": 2.9957105183656257e-05, "loss": 2.2185, "step": 1771 }, { "epoch": 0.05717154892712604, "grad_norm": 0.51171875, "learning_rate": 2.9956986171072794e-05, "loss": 2.2294, "step": 1772 }, { "epoch": 0.05720381278092238, "grad_norm": 0.466796875, "learning_rate": 2.9956866993853557e-05, "loss": 2.2092, "step": 1773 }, { "epoch": 0.05723607663471873, "grad_norm": 0.5, "learning_rate": 2.9956747651999856e-05, "loss": 2.1933, "step": 1774 }, { "epoch": 0.057268340488515077, "grad_norm": 0.57421875, "learning_rate": 2.995662814551301e-05, "loss": 2.2116, "step": 1775 }, { "epoch": 0.05730060434231142, "grad_norm": 0.53125, "learning_rate": 2.995650847439433e-05, "loss": 2.1842, "step": 1776 }, { "epoch": 0.05733286819610777, "grad_norm": 0.4765625, "learning_rate": 2.9956388638645133e-05, "loss": 2.1863, "step": 1777 }, { "epoch": 0.057365132049904115, "grad_norm": 0.48828125, "learning_rate": 2.995626863826674e-05, "loss": 2.1892, "step": 1778 }, { "epoch": 0.057397395903700466, "grad_norm": 0.470703125, "learning_rate": 2.9956148473260473e-05, "loss": 2.1922, "step": 1779 }, { "epoch": 0.05742965975749681, "grad_norm": 0.5, "learning_rate": 2.995602814362765e-05, "loss": 2.2312, "step": 1780 }, { "epoch": 0.05746192361129315, "grad_norm": 0.546875, "learning_rate": 2.99559076493696e-05, "loss": 2.2018, "step": 1781 }, { "epoch": 0.057494187465089504, "grad_norm": 0.5390625, "learning_rate": 2.995578699048765e-05, "loss": 2.2478, "step": 1782 }, { "epoch": 0.05752645131888585, "grad_norm": 0.49609375, "learning_rate": 2.9955666166983125e-05, "loss": 2.211, "step": 1783 }, { "epoch": 0.0575587151726822, "grad_norm": 0.57421875, "learning_rate": 2.9955545178857357e-05, "loss": 2.2181, "step": 1784 }, { "epoch": 0.05759097902647854, "grad_norm": 0.6015625, "learning_rate": 2.9955424026111675e-05, "loss": 2.1996, "step": 1785 }, { "epoch": 0.057623242880274886, "grad_norm": 0.484375, "learning_rate": 2.9955302708747415e-05, "loss": 2.201, "step": 1786 }, { "epoch": 0.05765550673407124, "grad_norm": 0.50390625, "learning_rate": 2.995518122676591e-05, "loss": 2.2145, "step": 1787 }, { "epoch": 0.05768777058786758, "grad_norm": 0.48046875, "learning_rate": 2.9955059580168497e-05, "loss": 2.2225, "step": 1788 }, { "epoch": 0.05772003444166393, "grad_norm": 0.5078125, "learning_rate": 2.9954937768956525e-05, "loss": 2.2001, "step": 1789 }, { "epoch": 0.057752298295460275, "grad_norm": 0.64453125, "learning_rate": 2.995481579313132e-05, "loss": 2.2141, "step": 1790 }, { "epoch": 0.05778456214925662, "grad_norm": 0.73046875, "learning_rate": 2.995469365269423e-05, "loss": 2.2207, "step": 1791 }, { "epoch": 0.05781682600305297, "grad_norm": 0.890625, "learning_rate": 2.9954571347646608e-05, "loss": 2.218, "step": 1792 }, { "epoch": 0.05784908985684931, "grad_norm": 1.0234375, "learning_rate": 2.9954448877989786e-05, "loss": 2.2256, "step": 1793 }, { "epoch": 0.05788135371064566, "grad_norm": 0.63671875, "learning_rate": 2.995432624372512e-05, "loss": 2.2022, "step": 1794 }, { "epoch": 0.05791361756444201, "grad_norm": 0.546875, "learning_rate": 2.9954203444853962e-05, "loss": 2.186, "step": 1795 }, { "epoch": 0.05794588141823835, "grad_norm": 0.75, "learning_rate": 2.995408048137766e-05, "loss": 2.2349, "step": 1796 }, { "epoch": 0.0579781452720347, "grad_norm": 0.50390625, "learning_rate": 2.9953957353297568e-05, "loss": 2.2043, "step": 1797 }, { "epoch": 0.058010409125831046, "grad_norm": 0.5859375, "learning_rate": 2.995383406061504e-05, "loss": 2.1872, "step": 1798 }, { "epoch": 0.05804267297962739, "grad_norm": 0.5703125, "learning_rate": 2.9953710603331436e-05, "loss": 2.2208, "step": 1799 }, { "epoch": 0.05807493683342374, "grad_norm": 0.515625, "learning_rate": 2.9953586981448113e-05, "loss": 2.2134, "step": 1800 }, { "epoch": 0.058107200687220084, "grad_norm": 0.546875, "learning_rate": 2.995346319496643e-05, "loss": 2.2324, "step": 1801 }, { "epoch": 0.058139464541016435, "grad_norm": 0.482421875, "learning_rate": 2.9953339243887755e-05, "loss": 2.2129, "step": 1802 }, { "epoch": 0.05817172839481278, "grad_norm": 0.498046875, "learning_rate": 2.995321512821345e-05, "loss": 2.2085, "step": 1803 }, { "epoch": 0.05820399224860912, "grad_norm": 0.48828125, "learning_rate": 2.9953090847944872e-05, "loss": 2.2227, "step": 1804 }, { "epoch": 0.05823625610240547, "grad_norm": 0.470703125, "learning_rate": 2.9952966403083403e-05, "loss": 2.1819, "step": 1805 }, { "epoch": 0.05826851995620182, "grad_norm": 0.44921875, "learning_rate": 2.995284179363041e-05, "loss": 2.1859, "step": 1806 }, { "epoch": 0.05830078380999817, "grad_norm": 0.50390625, "learning_rate": 2.9952717019587256e-05, "loss": 2.2078, "step": 1807 }, { "epoch": 0.05833304766379451, "grad_norm": 0.498046875, "learning_rate": 2.9952592080955326e-05, "loss": 2.2029, "step": 1808 }, { "epoch": 0.058365311517590855, "grad_norm": 0.5078125, "learning_rate": 2.9952466977735988e-05, "loss": 2.2008, "step": 1809 }, { "epoch": 0.058397575371387206, "grad_norm": 0.52734375, "learning_rate": 2.995234170993062e-05, "loss": 2.1987, "step": 1810 }, { "epoch": 0.05842983922518355, "grad_norm": 0.443359375, "learning_rate": 2.9952216277540596e-05, "loss": 2.2239, "step": 1811 }, { "epoch": 0.0584621030789799, "grad_norm": 0.53125, "learning_rate": 2.9952090680567307e-05, "loss": 2.1952, "step": 1812 }, { "epoch": 0.058494366932776244, "grad_norm": 0.431640625, "learning_rate": 2.995196491901213e-05, "loss": 2.2289, "step": 1813 }, { "epoch": 0.05852663078657259, "grad_norm": 0.478515625, "learning_rate": 2.9951838992876446e-05, "loss": 2.1886, "step": 1814 }, { "epoch": 0.05855889464036894, "grad_norm": 0.490234375, "learning_rate": 2.995171290216165e-05, "loss": 2.2069, "step": 1815 }, { "epoch": 0.05859115849416528, "grad_norm": 0.42578125, "learning_rate": 2.9951586646869122e-05, "loss": 2.2212, "step": 1816 }, { "epoch": 0.05862342234796163, "grad_norm": 0.50390625, "learning_rate": 2.9951460227000253e-05, "loss": 2.2036, "step": 1817 }, { "epoch": 0.05865568620175798, "grad_norm": 0.4375, "learning_rate": 2.9951333642556435e-05, "loss": 2.2091, "step": 1818 }, { "epoch": 0.05868795005555432, "grad_norm": 0.474609375, "learning_rate": 2.9951206893539066e-05, "loss": 2.1988, "step": 1819 }, { "epoch": 0.05872021390935067, "grad_norm": 0.51171875, "learning_rate": 2.9951079979949535e-05, "loss": 2.2089, "step": 1820 }, { "epoch": 0.058752477763147015, "grad_norm": 0.5, "learning_rate": 2.9950952901789243e-05, "loss": 2.2338, "step": 1821 }, { "epoch": 0.058784741616943366, "grad_norm": 0.54296875, "learning_rate": 2.995082565905959e-05, "loss": 2.2264, "step": 1822 }, { "epoch": 0.05881700547073971, "grad_norm": 0.5234375, "learning_rate": 2.995069825176197e-05, "loss": 2.2241, "step": 1823 }, { "epoch": 0.058849269324536053, "grad_norm": 0.451171875, "learning_rate": 2.995057067989779e-05, "loss": 2.1614, "step": 1824 }, { "epoch": 0.058881533178332404, "grad_norm": 0.455078125, "learning_rate": 2.9950442943468453e-05, "loss": 2.1274, "step": 1825 }, { "epoch": 0.05891379703212875, "grad_norm": 0.453125, "learning_rate": 2.9950315042475366e-05, "loss": 2.1376, "step": 1826 }, { "epoch": 0.0589460608859251, "grad_norm": 0.5390625, "learning_rate": 2.9950186976919936e-05, "loss": 2.2149, "step": 1827 }, { "epoch": 0.05897832473972144, "grad_norm": 0.50390625, "learning_rate": 2.9950058746803574e-05, "loss": 2.2032, "step": 1828 }, { "epoch": 0.059010588593517786, "grad_norm": 0.53125, "learning_rate": 2.994993035212769e-05, "loss": 2.2197, "step": 1829 }, { "epoch": 0.05904285244731414, "grad_norm": 0.5078125, "learning_rate": 2.99498017928937e-05, "loss": 2.2236, "step": 1830 }, { "epoch": 0.05907511630111048, "grad_norm": 0.50390625, "learning_rate": 2.9949673069103013e-05, "loss": 2.2208, "step": 1831 }, { "epoch": 0.05910738015490683, "grad_norm": 0.48828125, "learning_rate": 2.994954418075705e-05, "loss": 2.1999, "step": 1832 }, { "epoch": 0.059139644008703175, "grad_norm": 0.458984375, "learning_rate": 2.994941512785723e-05, "loss": 2.1807, "step": 1833 }, { "epoch": 0.05917190786249952, "grad_norm": 0.47265625, "learning_rate": 2.994928591040497e-05, "loss": 2.1954, "step": 1834 }, { "epoch": 0.05920417171629587, "grad_norm": 0.53515625, "learning_rate": 2.99491565284017e-05, "loss": 2.2094, "step": 1835 }, { "epoch": 0.059236435570092213, "grad_norm": 0.486328125, "learning_rate": 2.9949026981848837e-05, "loss": 2.1962, "step": 1836 }, { "epoch": 0.05926869942388856, "grad_norm": 0.451171875, "learning_rate": 2.9948897270747813e-05, "loss": 2.2013, "step": 1837 }, { "epoch": 0.05930096327768491, "grad_norm": 0.44140625, "learning_rate": 2.994876739510005e-05, "loss": 2.2462, "step": 1838 }, { "epoch": 0.05933322713148125, "grad_norm": 0.4375, "learning_rate": 2.994863735490698e-05, "loss": 2.2056, "step": 1839 }, { "epoch": 0.0593654909852776, "grad_norm": 0.498046875, "learning_rate": 2.9948507150170033e-05, "loss": 2.1906, "step": 1840 }, { "epoch": 0.059397754839073946, "grad_norm": 0.494140625, "learning_rate": 2.9948376780890642e-05, "loss": 2.2082, "step": 1841 }, { "epoch": 0.05943001869287029, "grad_norm": 0.5078125, "learning_rate": 2.9948246247070246e-05, "loss": 2.1915, "step": 1842 }, { "epoch": 0.05946228254666664, "grad_norm": 0.625, "learning_rate": 2.9948115548710277e-05, "loss": 2.2167, "step": 1843 }, { "epoch": 0.059494546400462985, "grad_norm": 0.921875, "learning_rate": 2.9947984685812174e-05, "loss": 2.2248, "step": 1844 }, { "epoch": 0.059526810254259335, "grad_norm": 1.6484375, "learning_rate": 2.9947853658377388e-05, "loss": 2.217, "step": 1845 }, { "epoch": 0.05955907410805568, "grad_norm": 0.47265625, "learning_rate": 2.9947722466407344e-05, "loss": 2.2132, "step": 1846 }, { "epoch": 0.05959133796185202, "grad_norm": 1.265625, "learning_rate": 2.9947591109903494e-05, "loss": 2.1951, "step": 1847 }, { "epoch": 0.059623601815648374, "grad_norm": 0.8203125, "learning_rate": 2.994745958886729e-05, "loss": 2.1645, "step": 1848 }, { "epoch": 0.05965586566944472, "grad_norm": 0.625, "learning_rate": 2.994732790330017e-05, "loss": 2.203, "step": 1849 }, { "epoch": 0.05968812952324107, "grad_norm": 0.82421875, "learning_rate": 2.9947196053203592e-05, "loss": 2.1872, "step": 1850 }, { "epoch": 0.05972039337703741, "grad_norm": 0.55078125, "learning_rate": 2.9947064038578998e-05, "loss": 2.2308, "step": 1851 }, { "epoch": 0.059752657230833756, "grad_norm": 0.66015625, "learning_rate": 2.994693185942785e-05, "loss": 2.2097, "step": 1852 }, { "epoch": 0.059784921084630106, "grad_norm": 0.482421875, "learning_rate": 2.99467995157516e-05, "loss": 2.2089, "step": 1853 }, { "epoch": 0.05981718493842645, "grad_norm": 0.57421875, "learning_rate": 2.9946667007551697e-05, "loss": 2.2111, "step": 1854 }, { "epoch": 0.0598494487922228, "grad_norm": 0.486328125, "learning_rate": 2.994653433482961e-05, "loss": 2.1891, "step": 1855 }, { "epoch": 0.059881712646019145, "grad_norm": 0.56640625, "learning_rate": 2.9946401497586796e-05, "loss": 2.19, "step": 1856 }, { "epoch": 0.05991397649981549, "grad_norm": 0.55859375, "learning_rate": 2.9946268495824712e-05, "loss": 2.1935, "step": 1857 }, { "epoch": 0.05994624035361184, "grad_norm": 0.482421875, "learning_rate": 2.9946135329544833e-05, "loss": 2.2032, "step": 1858 }, { "epoch": 0.05997850420740818, "grad_norm": 0.515625, "learning_rate": 2.994600199874862e-05, "loss": 2.196, "step": 1859 }, { "epoch": 0.060010768061204534, "grad_norm": 0.431640625, "learning_rate": 2.9945868503437535e-05, "loss": 2.2053, "step": 1860 }, { "epoch": 0.06004303191500088, "grad_norm": 0.53515625, "learning_rate": 2.994573484361305e-05, "loss": 2.1918, "step": 1861 }, { "epoch": 0.06007529576879722, "grad_norm": 0.46484375, "learning_rate": 2.9945601019276643e-05, "loss": 2.2016, "step": 1862 }, { "epoch": 0.06010755962259357, "grad_norm": 0.46875, "learning_rate": 2.994546703042978e-05, "loss": 2.2191, "step": 1863 }, { "epoch": 0.060139823476389916, "grad_norm": 0.45703125, "learning_rate": 2.994533287707394e-05, "loss": 2.2049, "step": 1864 }, { "epoch": 0.060172087330186266, "grad_norm": 0.4375, "learning_rate": 2.9945198559210593e-05, "loss": 2.2229, "step": 1865 }, { "epoch": 0.06020435118398261, "grad_norm": 0.48046875, "learning_rate": 2.994506407684122e-05, "loss": 2.1885, "step": 1866 }, { "epoch": 0.060236615037778954, "grad_norm": 0.48828125, "learning_rate": 2.994492942996731e-05, "loss": 2.2124, "step": 1867 }, { "epoch": 0.060268878891575305, "grad_norm": 0.4921875, "learning_rate": 2.9944794618590335e-05, "loss": 2.1892, "step": 1868 }, { "epoch": 0.06030114274537165, "grad_norm": 0.439453125, "learning_rate": 2.994465964271178e-05, "loss": 2.1789, "step": 1869 }, { "epoch": 0.060333406599168, "grad_norm": 0.478515625, "learning_rate": 2.9944524502333137e-05, "loss": 2.1769, "step": 1870 }, { "epoch": 0.06036567045296434, "grad_norm": 0.53515625, "learning_rate": 2.9944389197455886e-05, "loss": 2.171, "step": 1871 }, { "epoch": 0.06039793430676069, "grad_norm": 0.451171875, "learning_rate": 2.994425372808152e-05, "loss": 2.1612, "step": 1872 }, { "epoch": 0.06043019816055704, "grad_norm": 0.443359375, "learning_rate": 2.9944118094211537e-05, "loss": 2.197, "step": 1873 }, { "epoch": 0.06046246201435338, "grad_norm": 0.474609375, "learning_rate": 2.9943982295847416e-05, "loss": 2.1817, "step": 1874 }, { "epoch": 0.06049472586814973, "grad_norm": 0.47265625, "learning_rate": 2.9943846332990656e-05, "loss": 2.2265, "step": 1875 }, { "epoch": 0.060526989721946076, "grad_norm": 0.41796875, "learning_rate": 2.9943710205642764e-05, "loss": 2.2285, "step": 1876 }, { "epoch": 0.06055925357574242, "grad_norm": 0.435546875, "learning_rate": 2.9943573913805227e-05, "loss": 2.1586, "step": 1877 }, { "epoch": 0.06059151742953877, "grad_norm": 0.490234375, "learning_rate": 2.9943437457479547e-05, "loss": 2.2196, "step": 1878 }, { "epoch": 0.060623781283335114, "grad_norm": 0.486328125, "learning_rate": 2.994330083666723e-05, "loss": 2.1993, "step": 1879 }, { "epoch": 0.06065604513713146, "grad_norm": 0.470703125, "learning_rate": 2.9943164051369776e-05, "loss": 2.2047, "step": 1880 }, { "epoch": 0.06068830899092781, "grad_norm": 0.515625, "learning_rate": 2.9943027101588695e-05, "loss": 2.2102, "step": 1881 }, { "epoch": 0.06072057284472415, "grad_norm": 0.5625, "learning_rate": 2.994288998732549e-05, "loss": 2.2252, "step": 1882 }, { "epoch": 0.0607528366985205, "grad_norm": 0.51953125, "learning_rate": 2.994275270858167e-05, "loss": 2.2169, "step": 1883 }, { "epoch": 0.06078510055231685, "grad_norm": 0.53125, "learning_rate": 2.994261526535875e-05, "loss": 2.2195, "step": 1884 }, { "epoch": 0.06081736440611319, "grad_norm": 0.5, "learning_rate": 2.994247765765824e-05, "loss": 2.2014, "step": 1885 }, { "epoch": 0.06084962825990954, "grad_norm": 0.47265625, "learning_rate": 2.994233988548166e-05, "loss": 2.2019, "step": 1886 }, { "epoch": 0.060881892113705885, "grad_norm": 0.46875, "learning_rate": 2.9942201948830518e-05, "loss": 2.1789, "step": 1887 }, { "epoch": 0.060914155967502236, "grad_norm": 0.515625, "learning_rate": 2.9942063847706336e-05, "loss": 2.1978, "step": 1888 }, { "epoch": 0.06094641982129858, "grad_norm": 0.458984375, "learning_rate": 2.9941925582110637e-05, "loss": 2.1929, "step": 1889 }, { "epoch": 0.06097868367509492, "grad_norm": 0.494140625, "learning_rate": 2.9941787152044938e-05, "loss": 2.1883, "step": 1890 }, { "epoch": 0.061010947528891274, "grad_norm": 0.470703125, "learning_rate": 2.994164855751077e-05, "loss": 2.1833, "step": 1891 }, { "epoch": 0.06104321138268762, "grad_norm": 0.421875, "learning_rate": 2.9941509798509648e-05, "loss": 2.205, "step": 1892 }, { "epoch": 0.06107547523648397, "grad_norm": 0.5234375, "learning_rate": 2.994137087504311e-05, "loss": 2.1921, "step": 1893 }, { "epoch": 0.06110773909028031, "grad_norm": 0.56640625, "learning_rate": 2.9941231787112677e-05, "loss": 2.2229, "step": 1894 }, { "epoch": 0.061140002944076656, "grad_norm": 0.6328125, "learning_rate": 2.9941092534719884e-05, "loss": 2.1937, "step": 1895 }, { "epoch": 0.06117226679787301, "grad_norm": 0.609375, "learning_rate": 2.9940953117866263e-05, "loss": 2.1982, "step": 1896 }, { "epoch": 0.06120453065166935, "grad_norm": 0.5234375, "learning_rate": 2.9940813536553348e-05, "loss": 2.1953, "step": 1897 }, { "epoch": 0.0612367945054657, "grad_norm": 0.490234375, "learning_rate": 2.9940673790782674e-05, "loss": 2.2122, "step": 1898 }, { "epoch": 0.061269058359262045, "grad_norm": 0.45703125, "learning_rate": 2.9940533880555787e-05, "loss": 2.1544, "step": 1899 }, { "epoch": 0.06130132221305839, "grad_norm": 0.44140625, "learning_rate": 2.9940393805874217e-05, "loss": 2.2162, "step": 1900 }, { "epoch": 0.06133358606685474, "grad_norm": 0.50390625, "learning_rate": 2.9940253566739513e-05, "loss": 2.1937, "step": 1901 }, { "epoch": 0.06136584992065108, "grad_norm": 0.5625, "learning_rate": 2.9940113163153217e-05, "loss": 2.2261, "step": 1902 }, { "epoch": 0.061398113774447434, "grad_norm": 0.70703125, "learning_rate": 2.993997259511687e-05, "loss": 2.2111, "step": 1903 }, { "epoch": 0.06143037762824378, "grad_norm": 0.83203125, "learning_rate": 2.9939831862632024e-05, "loss": 2.1645, "step": 1904 }, { "epoch": 0.06146264148204012, "grad_norm": 1.0703125, "learning_rate": 2.9939690965700223e-05, "loss": 2.1806, "step": 1905 }, { "epoch": 0.06149490533583647, "grad_norm": 0.8203125, "learning_rate": 2.9939549904323024e-05, "loss": 2.1726, "step": 1906 }, { "epoch": 0.061527169189632816, "grad_norm": 0.4765625, "learning_rate": 2.9939408678501982e-05, "loss": 2.1938, "step": 1907 }, { "epoch": 0.06155943304342917, "grad_norm": 0.921875, "learning_rate": 2.9939267288238644e-05, "loss": 2.2342, "step": 1908 }, { "epoch": 0.06159169689722551, "grad_norm": 0.75, "learning_rate": 2.9939125733534565e-05, "loss": 2.2039, "step": 1909 }, { "epoch": 0.061623960751021854, "grad_norm": 0.5390625, "learning_rate": 2.9938984014391312e-05, "loss": 2.2184, "step": 1910 }, { "epoch": 0.061656224604818205, "grad_norm": 0.6875, "learning_rate": 2.9938842130810432e-05, "loss": 2.2036, "step": 1911 }, { "epoch": 0.06168848845861455, "grad_norm": 0.50390625, "learning_rate": 2.9938700082793505e-05, "loss": 2.2045, "step": 1912 }, { "epoch": 0.0617207523124109, "grad_norm": 0.56640625, "learning_rate": 2.9938557870342078e-05, "loss": 2.2095, "step": 1913 }, { "epoch": 0.06175301616620724, "grad_norm": 0.5234375, "learning_rate": 2.9938415493457728e-05, "loss": 2.2221, "step": 1914 }, { "epoch": 0.06178528002000359, "grad_norm": 0.5, "learning_rate": 2.9938272952142012e-05, "loss": 2.1792, "step": 1915 }, { "epoch": 0.06181754387379994, "grad_norm": 0.4921875, "learning_rate": 2.993813024639651e-05, "loss": 2.2031, "step": 1916 }, { "epoch": 0.06184980772759628, "grad_norm": 0.5625, "learning_rate": 2.993798737622278e-05, "loss": 2.2023, "step": 1917 }, { "epoch": 0.06188207158139263, "grad_norm": 0.412109375, "learning_rate": 2.9937844341622408e-05, "loss": 2.1866, "step": 1918 }, { "epoch": 0.061914335435188976, "grad_norm": 0.515625, "learning_rate": 2.9937701142596956e-05, "loss": 2.2064, "step": 1919 }, { "epoch": 0.06194659928898532, "grad_norm": 0.4765625, "learning_rate": 2.9937557779148012e-05, "loss": 2.1843, "step": 1920 }, { "epoch": 0.06197886314278167, "grad_norm": 0.47265625, "learning_rate": 2.9937414251277142e-05, "loss": 2.2135, "step": 1921 }, { "epoch": 0.062011126996578014, "grad_norm": 0.54296875, "learning_rate": 2.9937270558985933e-05, "loss": 2.2203, "step": 1922 }, { "epoch": 0.06204339085037436, "grad_norm": 0.55078125, "learning_rate": 2.9937126702275968e-05, "loss": 2.2011, "step": 1923 }, { "epoch": 0.06207565470417071, "grad_norm": 0.482421875, "learning_rate": 2.9936982681148828e-05, "loss": 2.1968, "step": 1924 }, { "epoch": 0.06210791855796705, "grad_norm": 0.484375, "learning_rate": 2.9936838495606094e-05, "loss": 2.2038, "step": 1925 }, { "epoch": 0.0621401824117634, "grad_norm": 0.52734375, "learning_rate": 2.9936694145649366e-05, "loss": 2.2282, "step": 1926 }, { "epoch": 0.06217244626555975, "grad_norm": 0.4375, "learning_rate": 2.9936549631280216e-05, "loss": 2.1781, "step": 1927 }, { "epoch": 0.06220471011935609, "grad_norm": 0.55078125, "learning_rate": 2.9936404952500246e-05, "loss": 2.2408, "step": 1928 }, { "epoch": 0.06223697397315244, "grad_norm": 0.451171875, "learning_rate": 2.9936260109311053e-05, "loss": 2.2078, "step": 1929 }, { "epoch": 0.062269237826948785, "grad_norm": 0.49609375, "learning_rate": 2.9936115101714214e-05, "loss": 2.2014, "step": 1930 }, { "epoch": 0.062301501680745136, "grad_norm": 0.45703125, "learning_rate": 2.9935969929711336e-05, "loss": 2.1975, "step": 1931 }, { "epoch": 0.06233376553454148, "grad_norm": 0.439453125, "learning_rate": 2.993582459330402e-05, "loss": 2.1713, "step": 1932 }, { "epoch": 0.062366029388337824, "grad_norm": 0.482421875, "learning_rate": 2.9935679092493858e-05, "loss": 2.1691, "step": 1933 }, { "epoch": 0.062398293242134174, "grad_norm": 0.4453125, "learning_rate": 2.993553342728246e-05, "loss": 2.193, "step": 1934 }, { "epoch": 0.06243055709593052, "grad_norm": 0.494140625, "learning_rate": 2.993538759767142e-05, "loss": 2.2187, "step": 1935 }, { "epoch": 0.06246282094972687, "grad_norm": 0.44921875, "learning_rate": 2.993524160366235e-05, "loss": 2.1801, "step": 1936 }, { "epoch": 0.06249508480352321, "grad_norm": 0.52734375, "learning_rate": 2.993509544525686e-05, "loss": 2.2189, "step": 1937 }, { "epoch": 0.06252734865731956, "grad_norm": 0.5, "learning_rate": 2.9934949122456547e-05, "loss": 2.1855, "step": 1938 }, { "epoch": 0.0625596125111159, "grad_norm": 0.53125, "learning_rate": 2.9934802635263027e-05, "loss": 2.1961, "step": 1939 }, { "epoch": 0.06259187636491226, "grad_norm": 0.47265625, "learning_rate": 2.993465598367792e-05, "loss": 2.207, "step": 1940 }, { "epoch": 0.0626241402187086, "grad_norm": 0.4296875, "learning_rate": 2.9934509167702827e-05, "loss": 2.1948, "step": 1941 }, { "epoch": 0.06265640407250495, "grad_norm": 0.4296875, "learning_rate": 2.9934362187339377e-05, "loss": 2.2254, "step": 1942 }, { "epoch": 0.06268866792630129, "grad_norm": 0.50390625, "learning_rate": 2.993421504258918e-05, "loss": 2.1651, "step": 1943 }, { "epoch": 0.06272093178009763, "grad_norm": 0.55078125, "learning_rate": 2.993406773345386e-05, "loss": 2.1857, "step": 1944 }, { "epoch": 0.06275319563389399, "grad_norm": 0.78125, "learning_rate": 2.993392025993503e-05, "loss": 2.1956, "step": 1945 }, { "epoch": 0.06278545948769033, "grad_norm": 1.0625, "learning_rate": 2.9933772622034326e-05, "loss": 2.2108, "step": 1946 }, { "epoch": 0.06281772334148668, "grad_norm": 0.97265625, "learning_rate": 2.993362481975336e-05, "loss": 2.2209, "step": 1947 }, { "epoch": 0.06284998719528302, "grad_norm": 0.5546875, "learning_rate": 2.9933476853093775e-05, "loss": 2.2184, "step": 1948 }, { "epoch": 0.06288225104907937, "grad_norm": 0.62890625, "learning_rate": 2.993332872205718e-05, "loss": 2.2017, "step": 1949 }, { "epoch": 0.06291451490287572, "grad_norm": 0.671875, "learning_rate": 2.993318042664522e-05, "loss": 2.1939, "step": 1950 }, { "epoch": 0.06294677875667207, "grad_norm": 0.4609375, "learning_rate": 2.993303196685952e-05, "loss": 2.1517, "step": 1951 }, { "epoch": 0.06297904261046841, "grad_norm": 0.7265625, "learning_rate": 2.993288334270172e-05, "loss": 2.1837, "step": 1952 }, { "epoch": 0.06301130646426475, "grad_norm": 0.5234375, "learning_rate": 2.9932734554173452e-05, "loss": 2.2069, "step": 1953 }, { "epoch": 0.0630435703180611, "grad_norm": 0.578125, "learning_rate": 2.993258560127635e-05, "loss": 2.1627, "step": 1954 }, { "epoch": 0.06307583417185746, "grad_norm": 0.58984375, "learning_rate": 2.9932436484012068e-05, "loss": 2.2206, "step": 1955 }, { "epoch": 0.0631080980256538, "grad_norm": 0.470703125, "learning_rate": 2.9932287202382232e-05, "loss": 2.2017, "step": 1956 }, { "epoch": 0.06314036187945014, "grad_norm": 0.5546875, "learning_rate": 2.993213775638849e-05, "loss": 2.1746, "step": 1957 }, { "epoch": 0.06317262573324649, "grad_norm": 0.52734375, "learning_rate": 2.993198814603249e-05, "loss": 2.2006, "step": 1958 }, { "epoch": 0.06320488958704283, "grad_norm": 0.51953125, "learning_rate": 2.9931838371315877e-05, "loss": 2.1813, "step": 1959 }, { "epoch": 0.06323715344083919, "grad_norm": 0.55078125, "learning_rate": 2.99316884322403e-05, "loss": 2.1762, "step": 1960 }, { "epoch": 0.06326941729463553, "grad_norm": 0.4609375, "learning_rate": 2.9931538328807406e-05, "loss": 2.2184, "step": 1961 }, { "epoch": 0.06330168114843188, "grad_norm": 0.50390625, "learning_rate": 2.993138806101885e-05, "loss": 2.1983, "step": 1962 }, { "epoch": 0.06333394500222822, "grad_norm": 0.455078125, "learning_rate": 2.9931237628876288e-05, "loss": 2.2084, "step": 1963 }, { "epoch": 0.06336620885602456, "grad_norm": 0.4765625, "learning_rate": 2.993108703238137e-05, "loss": 2.2212, "step": 1964 }, { "epoch": 0.06339847270982092, "grad_norm": 0.486328125, "learning_rate": 2.993093627153576e-05, "loss": 2.1972, "step": 1965 }, { "epoch": 0.06343073656361727, "grad_norm": 0.408203125, "learning_rate": 2.9930785346341117e-05, "loss": 2.2149, "step": 1966 }, { "epoch": 0.06346300041741361, "grad_norm": 0.4765625, "learning_rate": 2.99306342567991e-05, "loss": 2.1934, "step": 1967 }, { "epoch": 0.06349526427120995, "grad_norm": 0.470703125, "learning_rate": 2.993048300291137e-05, "loss": 2.1855, "step": 1968 }, { "epoch": 0.0635275281250063, "grad_norm": 0.68359375, "learning_rate": 2.9930331584679595e-05, "loss": 2.1535, "step": 1969 }, { "epoch": 0.06355979197880265, "grad_norm": 0.50390625, "learning_rate": 2.993018000210544e-05, "loss": 2.1861, "step": 1970 }, { "epoch": 0.063592055832599, "grad_norm": 0.47265625, "learning_rate": 2.9930028255190576e-05, "loss": 2.1892, "step": 1971 }, { "epoch": 0.06362431968639534, "grad_norm": 0.53125, "learning_rate": 2.9929876343936668e-05, "loss": 2.2062, "step": 1972 }, { "epoch": 0.06365658354019169, "grad_norm": 0.453125, "learning_rate": 2.9929724268345396e-05, "loss": 2.1792, "step": 1973 }, { "epoch": 0.06368884739398803, "grad_norm": 0.5078125, "learning_rate": 2.992957202841843e-05, "loss": 2.1811, "step": 1974 }, { "epoch": 0.06372111124778439, "grad_norm": 0.443359375, "learning_rate": 2.9929419624157444e-05, "loss": 2.1741, "step": 1975 }, { "epoch": 0.06375337510158073, "grad_norm": 0.490234375, "learning_rate": 2.992926705556412e-05, "loss": 2.2156, "step": 1976 }, { "epoch": 0.06378563895537707, "grad_norm": 0.4609375, "learning_rate": 2.992911432264013e-05, "loss": 2.2073, "step": 1977 }, { "epoch": 0.06381790280917342, "grad_norm": 0.455078125, "learning_rate": 2.9928961425387157e-05, "loss": 2.1898, "step": 1978 }, { "epoch": 0.06385016666296976, "grad_norm": 0.494140625, "learning_rate": 2.9928808363806894e-05, "loss": 2.1647, "step": 1979 }, { "epoch": 0.06388243051676612, "grad_norm": 0.4921875, "learning_rate": 2.9928655137901013e-05, "loss": 2.1666, "step": 1980 }, { "epoch": 0.06391469437056246, "grad_norm": 0.453125, "learning_rate": 2.992850174767121e-05, "loss": 2.189, "step": 1981 }, { "epoch": 0.06394695822435881, "grad_norm": 0.458984375, "learning_rate": 2.9928348193119166e-05, "loss": 2.2028, "step": 1982 }, { "epoch": 0.06397922207815515, "grad_norm": 0.42578125, "learning_rate": 2.9928194474246577e-05, "loss": 2.1833, "step": 1983 }, { "epoch": 0.0640114859319515, "grad_norm": 0.431640625, "learning_rate": 2.9928040591055133e-05, "loss": 2.2248, "step": 1984 }, { "epoch": 0.06404374978574785, "grad_norm": 0.45703125, "learning_rate": 2.9927886543546528e-05, "loss": 2.1792, "step": 1985 }, { "epoch": 0.0640760136395442, "grad_norm": 0.4453125, "learning_rate": 2.992773233172245e-05, "loss": 2.1978, "step": 1986 }, { "epoch": 0.06410827749334054, "grad_norm": 0.4609375, "learning_rate": 2.9927577955584612e-05, "loss": 2.1375, "step": 1987 }, { "epoch": 0.06414054134713688, "grad_norm": 0.51171875, "learning_rate": 2.9927423415134704e-05, "loss": 2.1282, "step": 1988 }, { "epoch": 0.06417280520093323, "grad_norm": 0.4921875, "learning_rate": 2.9927268710374427e-05, "loss": 2.1411, "step": 1989 }, { "epoch": 0.06420506905472957, "grad_norm": 0.515625, "learning_rate": 2.9927113841305486e-05, "loss": 2.1475, "step": 1990 }, { "epoch": 0.06423733290852593, "grad_norm": 0.54296875, "learning_rate": 2.992695880792958e-05, "loss": 2.1548, "step": 1991 }, { "epoch": 0.06426959676232227, "grad_norm": 0.54296875, "learning_rate": 2.9926803610248422e-05, "loss": 2.1471, "step": 1992 }, { "epoch": 0.06430186061611862, "grad_norm": 0.478515625, "learning_rate": 2.9926648248263717e-05, "loss": 2.1295, "step": 1993 }, { "epoch": 0.06433412446991496, "grad_norm": 0.455078125, "learning_rate": 2.992649272197718e-05, "loss": 2.126, "step": 1994 }, { "epoch": 0.0643663883237113, "grad_norm": 0.58203125, "learning_rate": 2.992633703139052e-05, "loss": 2.1432, "step": 1995 }, { "epoch": 0.06439865217750766, "grad_norm": 0.7265625, "learning_rate": 2.9926181176505443e-05, "loss": 2.1311, "step": 1996 }, { "epoch": 0.064430916031304, "grad_norm": 1.15625, "learning_rate": 2.9926025157323678e-05, "loss": 2.1274, "step": 1997 }, { "epoch": 0.06446317988510035, "grad_norm": 1.2421875, "learning_rate": 2.9925868973846936e-05, "loss": 2.1406, "step": 1998 }, { "epoch": 0.0644954437388967, "grad_norm": 0.486328125, "learning_rate": 2.9925712626076932e-05, "loss": 2.1251, "step": 1999 }, { "epoch": 0.06452770759269304, "grad_norm": 0.92578125, "learning_rate": 2.9925556114015395e-05, "loss": 2.1252, "step": 2000 }, { "epoch": 0.0645599714464894, "grad_norm": 0.9375, "learning_rate": 2.9925399437664042e-05, "loss": 2.1441, "step": 2001 }, { "epoch": 0.06459223530028574, "grad_norm": 0.47265625, "learning_rate": 2.9925242597024597e-05, "loss": 2.1004, "step": 2002 }, { "epoch": 0.06462449915408208, "grad_norm": 0.83984375, "learning_rate": 2.992508559209879e-05, "loss": 2.1447, "step": 2003 }, { "epoch": 0.06465676300787843, "grad_norm": 0.53515625, "learning_rate": 2.992492842288835e-05, "loss": 2.1188, "step": 2004 }, { "epoch": 0.06468902686167477, "grad_norm": 0.5859375, "learning_rate": 2.9924771089395006e-05, "loss": 2.1465, "step": 2005 }, { "epoch": 0.06472129071547113, "grad_norm": 0.61328125, "learning_rate": 2.9924613591620486e-05, "loss": 2.126, "step": 2006 }, { "epoch": 0.06475355456926747, "grad_norm": 0.423828125, "learning_rate": 2.9924455929566528e-05, "loss": 2.152, "step": 2007 }, { "epoch": 0.06478581842306382, "grad_norm": 0.6640625, "learning_rate": 2.9924298103234867e-05, "loss": 2.1071, "step": 2008 }, { "epoch": 0.06481808227686016, "grad_norm": 0.4453125, "learning_rate": 2.9924140112627237e-05, "loss": 2.1316, "step": 2009 }, { "epoch": 0.0648503461306565, "grad_norm": 0.60546875, "learning_rate": 2.992398195774538e-05, "loss": 2.1267, "step": 2010 }, { "epoch": 0.06488260998445286, "grad_norm": 0.48046875, "learning_rate": 2.9923823638591033e-05, "loss": 2.1425, "step": 2011 }, { "epoch": 0.0649148738382492, "grad_norm": 0.474609375, "learning_rate": 2.9923665155165947e-05, "loss": 2.1433, "step": 2012 }, { "epoch": 0.06494713769204555, "grad_norm": 0.46875, "learning_rate": 2.9923506507471857e-05, "loss": 2.1295, "step": 2013 }, { "epoch": 0.06497940154584189, "grad_norm": 0.482421875, "learning_rate": 2.9923347695510517e-05, "loss": 2.0997, "step": 2014 }, { "epoch": 0.06501166539963824, "grad_norm": 0.5625, "learning_rate": 2.992318871928367e-05, "loss": 2.1442, "step": 2015 }, { "epoch": 0.0650439292534346, "grad_norm": 0.498046875, "learning_rate": 2.9923029578793063e-05, "loss": 2.109, "step": 2016 }, { "epoch": 0.06507619310723094, "grad_norm": 0.51171875, "learning_rate": 2.9922870274040457e-05, "loss": 2.1301, "step": 2017 }, { "epoch": 0.06510845696102728, "grad_norm": 0.51953125, "learning_rate": 2.99227108050276e-05, "loss": 2.1313, "step": 2018 }, { "epoch": 0.06514072081482362, "grad_norm": 0.447265625, "learning_rate": 2.9922551171756252e-05, "loss": 2.1105, "step": 2019 }, { "epoch": 0.06517298466861997, "grad_norm": 0.46875, "learning_rate": 2.9922391374228157e-05, "loss": 2.1085, "step": 2020 }, { "epoch": 0.06520524852241633, "grad_norm": 0.515625, "learning_rate": 2.992223141244509e-05, "loss": 2.1043, "step": 2021 }, { "epoch": 0.06523751237621267, "grad_norm": 0.419921875, "learning_rate": 2.9922071286408806e-05, "loss": 2.1215, "step": 2022 }, { "epoch": 0.06526977623000901, "grad_norm": 0.4609375, "learning_rate": 2.9921910996121062e-05, "loss": 2.1342, "step": 2023 }, { "epoch": 0.06530204008380536, "grad_norm": 0.486328125, "learning_rate": 2.9921750541583626e-05, "loss": 2.1002, "step": 2024 }, { "epoch": 0.0653343039376017, "grad_norm": 0.412109375, "learning_rate": 2.9921589922798267e-05, "loss": 2.113, "step": 2025 }, { "epoch": 0.06536656779139806, "grad_norm": 0.458984375, "learning_rate": 2.9921429139766752e-05, "loss": 2.1213, "step": 2026 }, { "epoch": 0.0653988316451944, "grad_norm": 0.453125, "learning_rate": 2.9921268192490846e-05, "loss": 2.1507, "step": 2027 }, { "epoch": 0.06543109549899075, "grad_norm": 0.51171875, "learning_rate": 2.9921107080972324e-05, "loss": 2.1163, "step": 2028 }, { "epoch": 0.06546335935278709, "grad_norm": 0.48046875, "learning_rate": 2.9920945805212965e-05, "loss": 2.1028, "step": 2029 }, { "epoch": 0.06549562320658343, "grad_norm": 0.458984375, "learning_rate": 2.9920784365214535e-05, "loss": 2.1522, "step": 2030 }, { "epoch": 0.06552788706037979, "grad_norm": 0.46875, "learning_rate": 2.9920622760978814e-05, "loss": 2.1147, "step": 2031 }, { "epoch": 0.06556015091417614, "grad_norm": 0.4296875, "learning_rate": 2.9920460992507583e-05, "loss": 2.1374, "step": 2032 }, { "epoch": 0.06559241476797248, "grad_norm": 0.46484375, "learning_rate": 2.9920299059802626e-05, "loss": 2.1554, "step": 2033 }, { "epoch": 0.06562467862176882, "grad_norm": 0.419921875, "learning_rate": 2.9920136962865713e-05, "loss": 2.1692, "step": 2034 }, { "epoch": 0.06565694247556517, "grad_norm": 0.423828125, "learning_rate": 2.9919974701698638e-05, "loss": 2.1516, "step": 2035 }, { "epoch": 0.06568920632936152, "grad_norm": 0.4609375, "learning_rate": 2.9919812276303188e-05, "loss": 2.1103, "step": 2036 }, { "epoch": 0.06572147018315787, "grad_norm": 0.46484375, "learning_rate": 2.9919649686681145e-05, "loss": 2.1349, "step": 2037 }, { "epoch": 0.06575373403695421, "grad_norm": 0.5625, "learning_rate": 2.9919486932834304e-05, "loss": 2.1234, "step": 2038 }, { "epoch": 0.06578599789075056, "grad_norm": 0.5703125, "learning_rate": 2.991932401476445e-05, "loss": 2.1266, "step": 2039 }, { "epoch": 0.0658182617445469, "grad_norm": 0.494140625, "learning_rate": 2.9919160932473385e-05, "loss": 2.1089, "step": 2040 }, { "epoch": 0.06585052559834326, "grad_norm": 0.439453125, "learning_rate": 2.9918997685962898e-05, "loss": 2.1245, "step": 2041 }, { "epoch": 0.0658827894521396, "grad_norm": 0.453125, "learning_rate": 2.9918834275234786e-05, "loss": 2.1257, "step": 2042 }, { "epoch": 0.06591505330593594, "grad_norm": 0.50390625, "learning_rate": 2.991867070029085e-05, "loss": 2.1111, "step": 2043 }, { "epoch": 0.06594731715973229, "grad_norm": 0.462890625, "learning_rate": 2.991850696113289e-05, "loss": 2.129, "step": 2044 }, { "epoch": 0.06597958101352863, "grad_norm": 0.421875, "learning_rate": 2.9918343057762706e-05, "loss": 2.1353, "step": 2045 }, { "epoch": 0.06601184486732499, "grad_norm": 0.45703125, "learning_rate": 2.9918178990182106e-05, "loss": 2.0935, "step": 2046 }, { "epoch": 0.06604410872112133, "grad_norm": 0.5078125, "learning_rate": 2.991801475839289e-05, "loss": 2.1432, "step": 2047 }, { "epoch": 0.06607637257491768, "grad_norm": 0.48828125, "learning_rate": 2.9917850362396874e-05, "loss": 2.1311, "step": 2048 }, { "epoch": 0.06610863642871402, "grad_norm": 0.45703125, "learning_rate": 2.9917685802195863e-05, "loss": 2.1398, "step": 2049 }, { "epoch": 0.06614090028251036, "grad_norm": 0.54296875, "learning_rate": 2.991752107779167e-05, "loss": 2.1264, "step": 2050 }, { "epoch": 0.06617316413630672, "grad_norm": 0.55078125, "learning_rate": 2.9917356189186102e-05, "loss": 2.1214, "step": 2051 }, { "epoch": 0.06620542799010307, "grad_norm": 0.44140625, "learning_rate": 2.9917191136380982e-05, "loss": 2.1421, "step": 2052 }, { "epoch": 0.06623769184389941, "grad_norm": 0.55078125, "learning_rate": 2.991702591937812e-05, "loss": 2.1514, "step": 2053 }, { "epoch": 0.06626995569769575, "grad_norm": 0.625, "learning_rate": 2.9916860538179346e-05, "loss": 2.1383, "step": 2054 }, { "epoch": 0.0663022195514921, "grad_norm": 0.5625, "learning_rate": 2.9916694992786465e-05, "loss": 2.0949, "step": 2055 }, { "epoch": 0.06633448340528846, "grad_norm": 0.53515625, "learning_rate": 2.9916529283201312e-05, "loss": 2.1255, "step": 2056 }, { "epoch": 0.0663667472590848, "grad_norm": 0.5234375, "learning_rate": 2.99163634094257e-05, "loss": 2.115, "step": 2057 }, { "epoch": 0.06639901111288114, "grad_norm": 0.578125, "learning_rate": 2.9916197371461466e-05, "loss": 2.1196, "step": 2058 }, { "epoch": 0.06643127496667749, "grad_norm": 0.53515625, "learning_rate": 2.9916031169310425e-05, "loss": 2.1113, "step": 2059 }, { "epoch": 0.06646353882047383, "grad_norm": 0.55859375, "learning_rate": 2.9915864802974418e-05, "loss": 2.1027, "step": 2060 }, { "epoch": 0.06649580267427019, "grad_norm": 0.515625, "learning_rate": 2.9915698272455273e-05, "loss": 2.1348, "step": 2061 }, { "epoch": 0.06652806652806653, "grad_norm": 0.5, "learning_rate": 2.9915531577754817e-05, "loss": 2.0774, "step": 2062 }, { "epoch": 0.06656033038186288, "grad_norm": 0.45703125, "learning_rate": 2.9915364718874897e-05, "loss": 2.1184, "step": 2063 }, { "epoch": 0.06659259423565922, "grad_norm": 0.4921875, "learning_rate": 2.991519769581734e-05, "loss": 2.0942, "step": 2064 }, { "epoch": 0.06662485808945556, "grad_norm": 0.48046875, "learning_rate": 2.9915030508583982e-05, "loss": 2.0962, "step": 2065 }, { "epoch": 0.06665712194325192, "grad_norm": 0.470703125, "learning_rate": 2.9914863157176673e-05, "loss": 2.0995, "step": 2066 }, { "epoch": 0.06668938579704826, "grad_norm": 0.466796875, "learning_rate": 2.991469564159725e-05, "loss": 2.1016, "step": 2067 }, { "epoch": 0.06672164965084461, "grad_norm": 0.5078125, "learning_rate": 2.991452796184756e-05, "loss": 2.1287, "step": 2068 }, { "epoch": 0.06675391350464095, "grad_norm": 0.55859375, "learning_rate": 2.991436011792944e-05, "loss": 2.1463, "step": 2069 }, { "epoch": 0.0667861773584373, "grad_norm": 0.439453125, "learning_rate": 2.991419210984474e-05, "loss": 2.1122, "step": 2070 }, { "epoch": 0.06681844121223365, "grad_norm": 0.46484375, "learning_rate": 2.9914023937595323e-05, "loss": 2.1447, "step": 2071 }, { "epoch": 0.06685070506603, "grad_norm": 0.55078125, "learning_rate": 2.9913855601183022e-05, "loss": 2.1348, "step": 2072 }, { "epoch": 0.06688296891982634, "grad_norm": 0.609375, "learning_rate": 2.9913687100609702e-05, "loss": 2.0981, "step": 2073 }, { "epoch": 0.06691523277362268, "grad_norm": 0.75390625, "learning_rate": 2.9913518435877212e-05, "loss": 2.1267, "step": 2074 }, { "epoch": 0.06694749662741903, "grad_norm": 0.96484375, "learning_rate": 2.991334960698741e-05, "loss": 2.2089, "step": 2075 }, { "epoch": 0.06697976048121539, "grad_norm": 1.09375, "learning_rate": 2.9913180613942154e-05, "loss": 2.1568, "step": 2076 }, { "epoch": 0.06701202433501173, "grad_norm": 0.73828125, "learning_rate": 2.9913011456743306e-05, "loss": 2.1545, "step": 2077 }, { "epoch": 0.06704428818880807, "grad_norm": 0.62890625, "learning_rate": 2.9912842135392725e-05, "loss": 2.1925, "step": 2078 }, { "epoch": 0.06707655204260442, "grad_norm": 0.91796875, "learning_rate": 2.9912672649892276e-05, "loss": 2.2097, "step": 2079 }, { "epoch": 0.06710881589640076, "grad_norm": 0.59765625, "learning_rate": 2.9912503000243827e-05, "loss": 2.2119, "step": 2080 }, { "epoch": 0.0671410797501971, "grad_norm": 0.65625, "learning_rate": 2.991233318644924e-05, "loss": 2.1946, "step": 2081 }, { "epoch": 0.06717334360399346, "grad_norm": 0.71875, "learning_rate": 2.991216320851039e-05, "loss": 2.2078, "step": 2082 }, { "epoch": 0.0672056074577898, "grad_norm": 0.48046875, "learning_rate": 2.9911993066429143e-05, "loss": 2.188, "step": 2083 }, { "epoch": 0.06723787131158615, "grad_norm": 0.73046875, "learning_rate": 2.9911822760207375e-05, "loss": 2.1579, "step": 2084 }, { "epoch": 0.0672701351653825, "grad_norm": 0.466796875, "learning_rate": 2.991165228984696e-05, "loss": 2.1561, "step": 2085 }, { "epoch": 0.06730239901917884, "grad_norm": 0.6171875, "learning_rate": 2.9911481655349775e-05, "loss": 2.2196, "step": 2086 }, { "epoch": 0.0673346628729752, "grad_norm": 0.50390625, "learning_rate": 2.99113108567177e-05, "loss": 2.2013, "step": 2087 }, { "epoch": 0.06736692672677154, "grad_norm": 0.515625, "learning_rate": 2.9911139893952605e-05, "loss": 2.1938, "step": 2088 }, { "epoch": 0.06739919058056788, "grad_norm": 0.546875, "learning_rate": 2.9910968767056382e-05, "loss": 2.1926, "step": 2089 }, { "epoch": 0.06743145443436423, "grad_norm": 0.4609375, "learning_rate": 2.9910797476030914e-05, "loss": 2.2094, "step": 2090 }, { "epoch": 0.06746371828816057, "grad_norm": 0.63671875, "learning_rate": 2.9910626020878084e-05, "loss": 2.1748, "step": 2091 }, { "epoch": 0.06749598214195693, "grad_norm": 0.515625, "learning_rate": 2.991045440159978e-05, "loss": 2.2059, "step": 2092 }, { "epoch": 0.06752824599575327, "grad_norm": 0.5546875, "learning_rate": 2.9910282618197887e-05, "loss": 2.1652, "step": 2093 }, { "epoch": 0.06756050984954962, "grad_norm": 0.5859375, "learning_rate": 2.9910110670674302e-05, "loss": 2.166, "step": 2094 }, { "epoch": 0.06759277370334596, "grad_norm": 0.484375, "learning_rate": 2.9909938559030917e-05, "loss": 2.1728, "step": 2095 }, { "epoch": 0.0676250375571423, "grad_norm": 0.60546875, "learning_rate": 2.990976628326962e-05, "loss": 2.1831, "step": 2096 }, { "epoch": 0.06765730141093866, "grad_norm": 0.40625, "learning_rate": 2.9909593843392318e-05, "loss": 2.1204, "step": 2097 }, { "epoch": 0.067689565264735, "grad_norm": 0.53515625, "learning_rate": 2.99094212394009e-05, "loss": 2.184, "step": 2098 }, { "epoch": 0.06772182911853135, "grad_norm": 0.490234375, "learning_rate": 2.990924847129727e-05, "loss": 2.1893, "step": 2099 }, { "epoch": 0.06775409297232769, "grad_norm": 0.453125, "learning_rate": 2.9909075539083324e-05, "loss": 2.1529, "step": 2100 }, { "epoch": 0.06778635682612404, "grad_norm": 0.65234375, "learning_rate": 2.9908902442760977e-05, "loss": 2.1925, "step": 2101 }, { "epoch": 0.0678186206799204, "grad_norm": 0.5, "learning_rate": 2.9908729182332126e-05, "loss": 2.1576, "step": 2102 }, { "epoch": 0.06785088453371674, "grad_norm": 0.490234375, "learning_rate": 2.9908555757798677e-05, "loss": 2.1813, "step": 2103 }, { "epoch": 0.06788314838751308, "grad_norm": 0.5625, "learning_rate": 2.990838216916254e-05, "loss": 2.1957, "step": 2104 }, { "epoch": 0.06791541224130943, "grad_norm": 0.416015625, "learning_rate": 2.9908208416425633e-05, "loss": 2.1947, "step": 2105 }, { "epoch": 0.06794767609510577, "grad_norm": 0.48046875, "learning_rate": 2.990803449958986e-05, "loss": 2.2088, "step": 2106 }, { "epoch": 0.06797993994890213, "grad_norm": 0.4609375, "learning_rate": 2.9907860418657136e-05, "loss": 2.2077, "step": 2107 }, { "epoch": 0.06801220380269847, "grad_norm": 0.58203125, "learning_rate": 2.990768617362938e-05, "loss": 2.1881, "step": 2108 }, { "epoch": 0.06804446765649481, "grad_norm": 0.48046875, "learning_rate": 2.9907511764508514e-05, "loss": 2.1859, "step": 2109 }, { "epoch": 0.06807673151029116, "grad_norm": 0.40625, "learning_rate": 2.990733719129645e-05, "loss": 2.1991, "step": 2110 }, { "epoch": 0.0681089953640875, "grad_norm": 0.453125, "learning_rate": 2.990716245399511e-05, "loss": 2.178, "step": 2111 }, { "epoch": 0.06814125921788386, "grad_norm": 0.45703125, "learning_rate": 2.990698755260642e-05, "loss": 2.1544, "step": 2112 }, { "epoch": 0.0681735230716802, "grad_norm": 0.44921875, "learning_rate": 2.990681248713231e-05, "loss": 2.1881, "step": 2113 }, { "epoch": 0.06820578692547655, "grad_norm": 0.423828125, "learning_rate": 2.99066372575747e-05, "loss": 2.154, "step": 2114 }, { "epoch": 0.06823805077927289, "grad_norm": 0.4609375, "learning_rate": 2.9906461863935523e-05, "loss": 2.1896, "step": 2115 }, { "epoch": 0.06827031463306923, "grad_norm": 0.431640625, "learning_rate": 2.9906286306216705e-05, "loss": 2.1978, "step": 2116 }, { "epoch": 0.06830257848686559, "grad_norm": 0.44140625, "learning_rate": 2.9906110584420183e-05, "loss": 2.2114, "step": 2117 }, { "epoch": 0.06833484234066194, "grad_norm": 0.462890625, "learning_rate": 2.9905934698547886e-05, "loss": 2.1793, "step": 2118 }, { "epoch": 0.06836710619445828, "grad_norm": 0.41796875, "learning_rate": 2.9905758648601757e-05, "loss": 2.1816, "step": 2119 }, { "epoch": 0.06839937004825462, "grad_norm": 0.45703125, "learning_rate": 2.9905582434583726e-05, "loss": 2.2104, "step": 2120 }, { "epoch": 0.06843163390205097, "grad_norm": 0.49609375, "learning_rate": 2.9905406056495742e-05, "loss": 2.2295, "step": 2121 }, { "epoch": 0.06846389775584732, "grad_norm": 0.4765625, "learning_rate": 2.990522951433974e-05, "loss": 2.1697, "step": 2122 }, { "epoch": 0.06849616160964367, "grad_norm": 0.458984375, "learning_rate": 2.9905052808117663e-05, "loss": 2.1847, "step": 2123 }, { "epoch": 0.06852842546344001, "grad_norm": 0.484375, "learning_rate": 2.9904875937831458e-05, "loss": 2.1888, "step": 2124 }, { "epoch": 0.06856068931723636, "grad_norm": 0.439453125, "learning_rate": 2.9904698903483068e-05, "loss": 2.1657, "step": 2125 }, { "epoch": 0.0685929531710327, "grad_norm": 0.4296875, "learning_rate": 2.990452170507445e-05, "loss": 2.1526, "step": 2126 }, { "epoch": 0.06862521702482906, "grad_norm": 0.4453125, "learning_rate": 2.9904344342607546e-05, "loss": 2.1788, "step": 2127 }, { "epoch": 0.0686574808786254, "grad_norm": 0.462890625, "learning_rate": 2.9904166816084313e-05, "loss": 2.1797, "step": 2128 }, { "epoch": 0.06868974473242175, "grad_norm": 0.546875, "learning_rate": 2.9903989125506705e-05, "loss": 2.1745, "step": 2129 }, { "epoch": 0.06872200858621809, "grad_norm": 0.4921875, "learning_rate": 2.9903811270876674e-05, "loss": 2.188, "step": 2130 }, { "epoch": 0.06875427244001443, "grad_norm": 0.462890625, "learning_rate": 2.9903633252196186e-05, "loss": 2.2003, "step": 2131 }, { "epoch": 0.06878653629381079, "grad_norm": 0.5078125, "learning_rate": 2.990345506946719e-05, "loss": 2.1997, "step": 2132 }, { "epoch": 0.06881880014760713, "grad_norm": 0.47265625, "learning_rate": 2.990327672269165e-05, "loss": 2.1626, "step": 2133 }, { "epoch": 0.06885106400140348, "grad_norm": 0.435546875, "learning_rate": 2.990309821187154e-05, "loss": 2.167, "step": 2134 }, { "epoch": 0.06888332785519982, "grad_norm": 0.51171875, "learning_rate": 2.9902919537008812e-05, "loss": 2.1473, "step": 2135 }, { "epoch": 0.06891559170899617, "grad_norm": 0.46875, "learning_rate": 2.9902740698105436e-05, "loss": 2.1886, "step": 2136 }, { "epoch": 0.06894785556279252, "grad_norm": 0.57421875, "learning_rate": 2.990256169516338e-05, "loss": 2.1892, "step": 2137 }, { "epoch": 0.06898011941658887, "grad_norm": 0.79296875, "learning_rate": 2.990238252818462e-05, "loss": 2.1853, "step": 2138 }, { "epoch": 0.06901238327038521, "grad_norm": 1.1640625, "learning_rate": 2.990220319717112e-05, "loss": 2.1844, "step": 2139 }, { "epoch": 0.06904464712418155, "grad_norm": 1.1171875, "learning_rate": 2.990202370212486e-05, "loss": 2.1988, "step": 2140 }, { "epoch": 0.0690769109779779, "grad_norm": 0.6015625, "learning_rate": 2.9901844043047816e-05, "loss": 2.1921, "step": 2141 }, { "epoch": 0.06910917483177426, "grad_norm": 0.99609375, "learning_rate": 2.990166421994196e-05, "loss": 2.1783, "step": 2142 }, { "epoch": 0.0691414386855706, "grad_norm": 0.8125, "learning_rate": 2.9901484232809277e-05, "loss": 2.1759, "step": 2143 }, { "epoch": 0.06917370253936694, "grad_norm": 0.625, "learning_rate": 2.9901304081651742e-05, "loss": 2.2064, "step": 2144 }, { "epoch": 0.06920596639316329, "grad_norm": 0.93359375, "learning_rate": 2.9901123766471346e-05, "loss": 2.1847, "step": 2145 }, { "epoch": 0.06923823024695963, "grad_norm": 0.51953125, "learning_rate": 2.990094328727007e-05, "loss": 2.1835, "step": 2146 }, { "epoch": 0.06927049410075599, "grad_norm": 0.71875, "learning_rate": 2.9900762644049897e-05, "loss": 2.219, "step": 2147 }, { "epoch": 0.06930275795455233, "grad_norm": 0.6484375, "learning_rate": 2.990058183681282e-05, "loss": 2.1997, "step": 2148 }, { "epoch": 0.06933502180834868, "grad_norm": 0.51953125, "learning_rate": 2.9900400865560827e-05, "loss": 2.1982, "step": 2149 }, { "epoch": 0.06936728566214502, "grad_norm": 0.67578125, "learning_rate": 2.990021973029591e-05, "loss": 2.1823, "step": 2150 }, { "epoch": 0.06939954951594136, "grad_norm": 0.50390625, "learning_rate": 2.9900038431020066e-05, "loss": 2.1571, "step": 2151 }, { "epoch": 0.06943181336973772, "grad_norm": 0.66015625, "learning_rate": 2.9899856967735286e-05, "loss": 2.185, "step": 2152 }, { "epoch": 0.06946407722353407, "grad_norm": 0.52734375, "learning_rate": 2.9899675340443574e-05, "loss": 2.2181, "step": 2153 }, { "epoch": 0.06949634107733041, "grad_norm": 0.70703125, "learning_rate": 2.989949354914692e-05, "loss": 2.1479, "step": 2154 }, { "epoch": 0.06952860493112675, "grad_norm": 0.796875, "learning_rate": 2.9899311593847332e-05, "loss": 2.1491, "step": 2155 }, { "epoch": 0.0695608687849231, "grad_norm": 1.1015625, "learning_rate": 2.989912947454681e-05, "loss": 2.1579, "step": 2156 }, { "epoch": 0.06959313263871945, "grad_norm": 0.8984375, "learning_rate": 2.9898947191247362e-05, "loss": 2.1133, "step": 2157 }, { "epoch": 0.0696253964925158, "grad_norm": 0.93359375, "learning_rate": 2.9898764743950986e-05, "loss": 2.0788, "step": 2158 }, { "epoch": 0.06965766034631214, "grad_norm": 2.15625, "learning_rate": 2.98985821326597e-05, "loss": 2.2206, "step": 2159 }, { "epoch": 0.06968992420010849, "grad_norm": 1.375, "learning_rate": 2.989839935737551e-05, "loss": 2.2379, "step": 2160 }, { "epoch": 0.06972218805390483, "grad_norm": 1.4375, "learning_rate": 2.9898216418100425e-05, "loss": 2.2618, "step": 2161 }, { "epoch": 0.06975445190770119, "grad_norm": 0.90625, "learning_rate": 2.989803331483647e-05, "loss": 2.1538, "step": 2162 }, { "epoch": 0.06978671576149753, "grad_norm": 1.2109375, "learning_rate": 2.989785004758564e-05, "loss": 2.1589, "step": 2163 }, { "epoch": 0.06981897961529387, "grad_norm": 0.79296875, "learning_rate": 2.9897666616349972e-05, "loss": 2.2041, "step": 2164 }, { "epoch": 0.06985124346909022, "grad_norm": 0.9765625, "learning_rate": 2.9897483021131476e-05, "loss": 2.2436, "step": 2165 }, { "epoch": 0.06988350732288656, "grad_norm": 0.765625, "learning_rate": 2.9897299261932172e-05, "loss": 2.2131, "step": 2166 }, { "epoch": 0.0699157711766829, "grad_norm": 0.83984375, "learning_rate": 2.989711533875409e-05, "loss": 2.1784, "step": 2167 }, { "epoch": 0.06994803503047926, "grad_norm": 0.75390625, "learning_rate": 2.9896931251599248e-05, "loss": 2.1868, "step": 2168 }, { "epoch": 0.06998029888427561, "grad_norm": 0.66015625, "learning_rate": 2.9896747000469673e-05, "loss": 2.1568, "step": 2169 }, { "epoch": 0.07001256273807195, "grad_norm": 0.69140625, "learning_rate": 2.989656258536739e-05, "loss": 2.1472, "step": 2170 }, { "epoch": 0.0700448265918683, "grad_norm": 0.62109375, "learning_rate": 2.9896378006294438e-05, "loss": 2.138, "step": 2171 }, { "epoch": 0.07007709044566464, "grad_norm": 0.60546875, "learning_rate": 2.989619326325284e-05, "loss": 2.2035, "step": 2172 }, { "epoch": 0.070109354299461, "grad_norm": 0.609375, "learning_rate": 2.9896008356244634e-05, "loss": 2.1674, "step": 2173 }, { "epoch": 0.07014161815325734, "grad_norm": 0.5703125, "learning_rate": 2.9895823285271856e-05, "loss": 2.1119, "step": 2174 }, { "epoch": 0.07017388200705368, "grad_norm": 0.59375, "learning_rate": 2.9895638050336542e-05, "loss": 2.1803, "step": 2175 }, { "epoch": 0.07020614586085003, "grad_norm": 0.53515625, "learning_rate": 2.9895452651440728e-05, "loss": 2.1987, "step": 2176 }, { "epoch": 0.07023840971464637, "grad_norm": 0.5625, "learning_rate": 2.989526708858646e-05, "loss": 2.2058, "step": 2177 }, { "epoch": 0.07027067356844273, "grad_norm": 0.51171875, "learning_rate": 2.9895081361775777e-05, "loss": 2.1815, "step": 2178 }, { "epoch": 0.07030293742223907, "grad_norm": 0.5703125, "learning_rate": 2.9894895471010723e-05, "loss": 2.2582, "step": 2179 }, { "epoch": 0.07033520127603542, "grad_norm": 0.8515625, "learning_rate": 2.989470941629334e-05, "loss": 2.21, "step": 2180 }, { "epoch": 0.07036746512983176, "grad_norm": 0.62890625, "learning_rate": 2.9894523197625688e-05, "loss": 2.217, "step": 2181 }, { "epoch": 0.0703997289836281, "grad_norm": 0.65234375, "learning_rate": 2.9894336815009807e-05, "loss": 2.2543, "step": 2182 }, { "epoch": 0.07043199283742446, "grad_norm": 0.67578125, "learning_rate": 2.9894150268447756e-05, "loss": 2.228, "step": 2183 }, { "epoch": 0.0704642566912208, "grad_norm": 0.6015625, "learning_rate": 2.989396355794158e-05, "loss": 2.1972, "step": 2184 }, { "epoch": 0.07049652054501715, "grad_norm": 0.55078125, "learning_rate": 2.9893776683493338e-05, "loss": 2.0866, "step": 2185 }, { "epoch": 0.0705287843988135, "grad_norm": 0.61328125, "learning_rate": 2.9893589645105088e-05, "loss": 2.0432, "step": 2186 }, { "epoch": 0.07056104825260984, "grad_norm": 0.62109375, "learning_rate": 2.9893402442778887e-05, "loss": 2.042, "step": 2187 }, { "epoch": 0.0705933121064062, "grad_norm": 0.55078125, "learning_rate": 2.9893215076516794e-05, "loss": 2.0813, "step": 2188 }, { "epoch": 0.07062557596020254, "grad_norm": 0.63671875, "learning_rate": 2.9893027546320876e-05, "loss": 2.209, "step": 2189 }, { "epoch": 0.07065783981399888, "grad_norm": 0.56640625, "learning_rate": 2.9892839852193197e-05, "loss": 2.2163, "step": 2190 }, { "epoch": 0.07069010366779523, "grad_norm": 0.55078125, "learning_rate": 2.989265199413582e-05, "loss": 2.2048, "step": 2191 }, { "epoch": 0.07072236752159157, "grad_norm": 0.6015625, "learning_rate": 2.989246397215081e-05, "loss": 2.1433, "step": 2192 }, { "epoch": 0.07075463137538793, "grad_norm": 0.60546875, "learning_rate": 2.9892275786240247e-05, "loss": 2.1031, "step": 2193 }, { "epoch": 0.07078689522918427, "grad_norm": 0.56640625, "learning_rate": 2.9892087436406193e-05, "loss": 2.1481, "step": 2194 }, { "epoch": 0.07081915908298062, "grad_norm": 0.5859375, "learning_rate": 2.989189892265072e-05, "loss": 2.1819, "step": 2195 }, { "epoch": 0.07085142293677696, "grad_norm": 0.515625, "learning_rate": 2.9891710244975916e-05, "loss": 2.1704, "step": 2196 }, { "epoch": 0.0708836867905733, "grad_norm": 0.51953125, "learning_rate": 2.9891521403383843e-05, "loss": 2.21, "step": 2197 }, { "epoch": 0.07091595064436966, "grad_norm": 0.54296875, "learning_rate": 2.9891332397876583e-05, "loss": 2.1253, "step": 2198 }, { "epoch": 0.070948214498166, "grad_norm": 0.5234375, "learning_rate": 2.9891143228456224e-05, "loss": 2.2053, "step": 2199 }, { "epoch": 0.07098047835196235, "grad_norm": 0.5546875, "learning_rate": 2.9890953895124842e-05, "loss": 2.2352, "step": 2200 }, { "epoch": 0.07101274220575869, "grad_norm": 0.5234375, "learning_rate": 2.989076439788452e-05, "loss": 2.1622, "step": 2201 }, { "epoch": 0.07104500605955504, "grad_norm": 0.6875, "learning_rate": 2.989057473673735e-05, "loss": 2.119, "step": 2202 }, { "epoch": 0.07107726991335139, "grad_norm": 0.60546875, "learning_rate": 2.989038491168541e-05, "loss": 2.1598, "step": 2203 }, { "epoch": 0.07110953376714774, "grad_norm": 0.5625, "learning_rate": 2.9890194922730802e-05, "loss": 2.0558, "step": 2204 }, { "epoch": 0.07114179762094408, "grad_norm": 0.515625, "learning_rate": 2.9890004769875606e-05, "loss": 2.1599, "step": 2205 }, { "epoch": 0.07117406147474042, "grad_norm": 0.515625, "learning_rate": 2.9889814453121916e-05, "loss": 2.17, "step": 2206 }, { "epoch": 0.07120632532853677, "grad_norm": 0.494140625, "learning_rate": 2.9889623972471837e-05, "loss": 2.1494, "step": 2207 }, { "epoch": 0.07123858918233313, "grad_norm": 0.49609375, "learning_rate": 2.988943332792746e-05, "loss": 2.2292, "step": 2208 }, { "epoch": 0.07127085303612947, "grad_norm": 0.478515625, "learning_rate": 2.988924251949088e-05, "loss": 2.1983, "step": 2209 }, { "epoch": 0.07130311688992581, "grad_norm": 0.50390625, "learning_rate": 2.9889051547164196e-05, "loss": 2.2378, "step": 2210 }, { "epoch": 0.07133538074372216, "grad_norm": 0.46484375, "learning_rate": 2.9888860410949522e-05, "loss": 2.1881, "step": 2211 }, { "epoch": 0.0713676445975185, "grad_norm": 0.4609375, "learning_rate": 2.9888669110848948e-05, "loss": 2.1633, "step": 2212 }, { "epoch": 0.07139990845131486, "grad_norm": 0.49609375, "learning_rate": 2.988847764686459e-05, "loss": 2.1815, "step": 2213 }, { "epoch": 0.0714321723051112, "grad_norm": 0.5, "learning_rate": 2.9888286018998544e-05, "loss": 2.2095, "step": 2214 }, { "epoch": 0.07146443615890755, "grad_norm": 0.53515625, "learning_rate": 2.9888094227252925e-05, "loss": 2.1386, "step": 2215 }, { "epoch": 0.07149670001270389, "grad_norm": 0.640625, "learning_rate": 2.9887902271629857e-05, "loss": 2.1444, "step": 2216 }, { "epoch": 0.07152896386650023, "grad_norm": 0.71484375, "learning_rate": 2.9887710152131433e-05, "loss": 2.0945, "step": 2217 }, { "epoch": 0.07156122772029659, "grad_norm": 0.77734375, "learning_rate": 2.9887517868759774e-05, "loss": 2.2227, "step": 2218 }, { "epoch": 0.07159349157409294, "grad_norm": 0.71875, "learning_rate": 2.9887325421517008e-05, "loss": 2.1437, "step": 2219 }, { "epoch": 0.07162575542788928, "grad_norm": 0.5859375, "learning_rate": 2.9887132810405235e-05, "loss": 2.2488, "step": 2220 }, { "epoch": 0.07165801928168562, "grad_norm": 0.6171875, "learning_rate": 2.9886940035426587e-05, "loss": 2.2025, "step": 2221 }, { "epoch": 0.07169028313548197, "grad_norm": 0.57421875, "learning_rate": 2.9886747096583184e-05, "loss": 2.2069, "step": 2222 }, { "epoch": 0.07172254698927832, "grad_norm": 0.53515625, "learning_rate": 2.9886553993877145e-05, "loss": 2.2145, "step": 2223 }, { "epoch": 0.07175481084307467, "grad_norm": 0.55859375, "learning_rate": 2.9886360727310606e-05, "loss": 2.2502, "step": 2224 }, { "epoch": 0.07178707469687101, "grad_norm": 0.5625, "learning_rate": 2.9886167296885683e-05, "loss": 2.1479, "step": 2225 }, { "epoch": 0.07181933855066736, "grad_norm": 0.51953125, "learning_rate": 2.9885973702604513e-05, "loss": 2.239, "step": 2226 }, { "epoch": 0.0718516024044637, "grad_norm": 0.54296875, "learning_rate": 2.9885779944469214e-05, "loss": 2.1841, "step": 2227 }, { "epoch": 0.07188386625826006, "grad_norm": 0.52734375, "learning_rate": 2.988558602248194e-05, "loss": 2.171, "step": 2228 }, { "epoch": 0.0719161301120564, "grad_norm": 0.515625, "learning_rate": 2.988539193664481e-05, "loss": 2.1271, "step": 2229 }, { "epoch": 0.07194839396585274, "grad_norm": 0.52734375, "learning_rate": 2.988519768695996e-05, "loss": 2.1901, "step": 2230 }, { "epoch": 0.07198065781964909, "grad_norm": 0.57421875, "learning_rate": 2.9885003273429538e-05, "loss": 2.2498, "step": 2231 }, { "epoch": 0.07201292167344543, "grad_norm": 0.5078125, "learning_rate": 2.9884808696055675e-05, "loss": 2.1808, "step": 2232 }, { "epoch": 0.07204518552724179, "grad_norm": 0.59765625, "learning_rate": 2.988461395484052e-05, "loss": 2.114, "step": 2233 }, { "epoch": 0.07207744938103813, "grad_norm": 0.55078125, "learning_rate": 2.988441904978621e-05, "loss": 2.2166, "step": 2234 }, { "epoch": 0.07210971323483448, "grad_norm": 0.5078125, "learning_rate": 2.988422398089489e-05, "loss": 2.2553, "step": 2235 }, { "epoch": 0.07214197708863082, "grad_norm": 0.55078125, "learning_rate": 2.988402874816872e-05, "loss": 2.2168, "step": 2236 }, { "epoch": 0.07217424094242716, "grad_norm": 0.50390625, "learning_rate": 2.988383335160983e-05, "loss": 2.2102, "step": 2237 }, { "epoch": 0.07220650479622352, "grad_norm": 0.51953125, "learning_rate": 2.9883637791220385e-05, "loss": 2.2243, "step": 2238 }, { "epoch": 0.07223876865001987, "grad_norm": 0.5625, "learning_rate": 2.988344206700253e-05, "loss": 2.2011, "step": 2239 }, { "epoch": 0.07227103250381621, "grad_norm": 0.47265625, "learning_rate": 2.9883246178958422e-05, "loss": 2.1776, "step": 2240 }, { "epoch": 0.07230329635761255, "grad_norm": 0.51953125, "learning_rate": 2.9883050127090218e-05, "loss": 2.1602, "step": 2241 }, { "epoch": 0.0723355602114089, "grad_norm": 0.51171875, "learning_rate": 2.9882853911400075e-05, "loss": 2.0953, "step": 2242 }, { "epoch": 0.07236782406520526, "grad_norm": 0.484375, "learning_rate": 2.9882657531890155e-05, "loss": 2.1397, "step": 2243 }, { "epoch": 0.0724000879190016, "grad_norm": 0.494140625, "learning_rate": 2.9882460988562616e-05, "loss": 2.1233, "step": 2244 }, { "epoch": 0.07243235177279794, "grad_norm": 0.44140625, "learning_rate": 2.988226428141962e-05, "loss": 2.1085, "step": 2245 }, { "epoch": 0.07246461562659429, "grad_norm": 0.458984375, "learning_rate": 2.9882067410463343e-05, "loss": 2.1126, "step": 2246 }, { "epoch": 0.07249687948039063, "grad_norm": 0.4375, "learning_rate": 2.9881870375695935e-05, "loss": 2.1088, "step": 2247 }, { "epoch": 0.07252914333418699, "grad_norm": 0.4453125, "learning_rate": 2.9881673177119582e-05, "loss": 2.0811, "step": 2248 }, { "epoch": 0.07256140718798333, "grad_norm": 0.46484375, "learning_rate": 2.9881475814736445e-05, "loss": 2.1226, "step": 2249 }, { "epoch": 0.07259367104177968, "grad_norm": 0.4296875, "learning_rate": 2.9881278288548697e-05, "loss": 2.0929, "step": 2250 }, { "epoch": 0.07262593489557602, "grad_norm": 0.431640625, "learning_rate": 2.9881080598558515e-05, "loss": 2.0944, "step": 2251 }, { "epoch": 0.07265819874937236, "grad_norm": 0.439453125, "learning_rate": 2.988088274476807e-05, "loss": 2.162, "step": 2252 }, { "epoch": 0.07269046260316872, "grad_norm": 0.466796875, "learning_rate": 2.9880684727179543e-05, "loss": 2.1436, "step": 2253 }, { "epoch": 0.07272272645696506, "grad_norm": 0.470703125, "learning_rate": 2.9880486545795117e-05, "loss": 2.1201, "step": 2254 }, { "epoch": 0.07275499031076141, "grad_norm": 0.44140625, "learning_rate": 2.9880288200616968e-05, "loss": 2.1089, "step": 2255 }, { "epoch": 0.07278725416455775, "grad_norm": 0.462890625, "learning_rate": 2.9880089691647283e-05, "loss": 2.1407, "step": 2256 }, { "epoch": 0.0728195180183541, "grad_norm": 0.47265625, "learning_rate": 2.9879891018888246e-05, "loss": 2.1211, "step": 2257 }, { "epoch": 0.07285178187215044, "grad_norm": 0.46484375, "learning_rate": 2.9879692182342046e-05, "loss": 2.1373, "step": 2258 }, { "epoch": 0.0728840457259468, "grad_norm": 0.46875, "learning_rate": 2.9879493182010865e-05, "loss": 2.1307, "step": 2259 }, { "epoch": 0.07291630957974314, "grad_norm": 0.458984375, "learning_rate": 2.9879294017896895e-05, "loss": 2.0712, "step": 2260 }, { "epoch": 0.07294857343353948, "grad_norm": 0.470703125, "learning_rate": 2.9879094690002333e-05, "loss": 2.1265, "step": 2261 }, { "epoch": 0.07298083728733583, "grad_norm": 0.609375, "learning_rate": 2.9878895198329372e-05, "loss": 2.126, "step": 2262 }, { "epoch": 0.07301310114113217, "grad_norm": 0.87890625, "learning_rate": 2.987869554288021e-05, "loss": 2.1318, "step": 2263 }, { "epoch": 0.07304536499492853, "grad_norm": 1.2421875, "learning_rate": 2.9878495723657037e-05, "loss": 2.1091, "step": 2264 }, { "epoch": 0.07307762884872487, "grad_norm": 0.58984375, "learning_rate": 2.9878295740662056e-05, "loss": 2.0856, "step": 2265 }, { "epoch": 0.07310989270252122, "grad_norm": 0.58984375, "learning_rate": 2.9878095593897474e-05, "loss": 2.1095, "step": 2266 }, { "epoch": 0.07314215655631756, "grad_norm": 0.82421875, "learning_rate": 2.9877895283365484e-05, "loss": 2.1112, "step": 2267 }, { "epoch": 0.0731744204101139, "grad_norm": 0.490234375, "learning_rate": 2.98776948090683e-05, "loss": 2.0989, "step": 2268 }, { "epoch": 0.07320668426391026, "grad_norm": 0.61328125, "learning_rate": 2.9877494171008122e-05, "loss": 2.1273, "step": 2269 }, { "epoch": 0.0732389481177066, "grad_norm": 0.58203125, "learning_rate": 2.9877293369187164e-05, "loss": 2.1393, "step": 2270 }, { "epoch": 0.07327121197150295, "grad_norm": 0.4609375, "learning_rate": 2.9877092403607632e-05, "loss": 2.1427, "step": 2271 }, { "epoch": 0.0733034758252993, "grad_norm": 0.5625, "learning_rate": 2.987689127427174e-05, "loss": 2.1374, "step": 2272 }, { "epoch": 0.07333573967909564, "grad_norm": 0.4609375, "learning_rate": 2.9876689981181697e-05, "loss": 2.0994, "step": 2273 }, { "epoch": 0.073368003532892, "grad_norm": 0.546875, "learning_rate": 2.987648852433973e-05, "loss": 2.141, "step": 2274 }, { "epoch": 0.07340026738668834, "grad_norm": 0.494140625, "learning_rate": 2.9876286903748046e-05, "loss": 2.1144, "step": 2275 }, { "epoch": 0.07343253124048468, "grad_norm": 0.484375, "learning_rate": 2.987608511940887e-05, "loss": 2.1248, "step": 2276 }, { "epoch": 0.07346479509428103, "grad_norm": 0.5234375, "learning_rate": 2.987588317132442e-05, "loss": 2.1107, "step": 2277 }, { "epoch": 0.07349705894807737, "grad_norm": 0.427734375, "learning_rate": 2.987568105949692e-05, "loss": 2.1502, "step": 2278 }, { "epoch": 0.07352932280187373, "grad_norm": 0.53515625, "learning_rate": 2.9875478783928595e-05, "loss": 2.1028, "step": 2279 }, { "epoch": 0.07356158665567007, "grad_norm": 0.458984375, "learning_rate": 2.987527634462167e-05, "loss": 2.1159, "step": 2280 }, { "epoch": 0.07359385050946642, "grad_norm": 0.4921875, "learning_rate": 2.9875073741578376e-05, "loss": 2.1182, "step": 2281 }, { "epoch": 0.07362611436326276, "grad_norm": 0.50390625, "learning_rate": 2.987487097480094e-05, "loss": 2.1224, "step": 2282 }, { "epoch": 0.0736583782170591, "grad_norm": 0.44921875, "learning_rate": 2.9874668044291596e-05, "loss": 2.1291, "step": 2283 }, { "epoch": 0.07369064207085546, "grad_norm": 0.52734375, "learning_rate": 2.9874464950052575e-05, "loss": 2.1716, "step": 2284 }, { "epoch": 0.0737229059246518, "grad_norm": 0.453125, "learning_rate": 2.9874261692086118e-05, "loss": 2.1379, "step": 2285 }, { "epoch": 0.07375516977844815, "grad_norm": 0.451171875, "learning_rate": 2.9874058270394456e-05, "loss": 2.0979, "step": 2286 }, { "epoch": 0.07378743363224449, "grad_norm": 0.451171875, "learning_rate": 2.9873854684979837e-05, "loss": 2.1177, "step": 2287 }, { "epoch": 0.07381969748604084, "grad_norm": 0.435546875, "learning_rate": 2.9873650935844496e-05, "loss": 2.0956, "step": 2288 }, { "epoch": 0.0738519613398372, "grad_norm": 0.4375, "learning_rate": 2.987344702299067e-05, "loss": 2.0951, "step": 2289 }, { "epoch": 0.07388422519363354, "grad_norm": 0.408203125, "learning_rate": 2.987324294642061e-05, "loss": 2.1281, "step": 2290 }, { "epoch": 0.07391648904742988, "grad_norm": 0.412109375, "learning_rate": 2.9873038706136563e-05, "loss": 2.1271, "step": 2291 }, { "epoch": 0.07394875290122623, "grad_norm": 0.423828125, "learning_rate": 2.987283430214078e-05, "loss": 2.1243, "step": 2292 }, { "epoch": 0.07398101675502257, "grad_norm": 0.46875, "learning_rate": 2.98726297344355e-05, "loss": 2.118, "step": 2293 }, { "epoch": 0.07401328060881893, "grad_norm": 0.40625, "learning_rate": 2.9872425003022983e-05, "loss": 2.1294, "step": 2294 }, { "epoch": 0.07404554446261527, "grad_norm": 0.4453125, "learning_rate": 2.9872220107905482e-05, "loss": 2.1116, "step": 2295 }, { "epoch": 0.07407780831641161, "grad_norm": 0.443359375, "learning_rate": 2.9872015049085253e-05, "loss": 2.0765, "step": 2296 }, { "epoch": 0.07411007217020796, "grad_norm": 0.470703125, "learning_rate": 2.9871809826564547e-05, "loss": 2.096, "step": 2297 }, { "epoch": 0.0741423360240043, "grad_norm": 0.462890625, "learning_rate": 2.9871604440345634e-05, "loss": 2.0946, "step": 2298 }, { "epoch": 0.07417459987780066, "grad_norm": 0.49609375, "learning_rate": 2.9871398890430763e-05, "loss": 2.1012, "step": 2299 }, { "epoch": 0.074206863731597, "grad_norm": 0.46875, "learning_rate": 2.9871193176822207e-05, "loss": 2.1225, "step": 2300 }, { "epoch": 0.07423912758539335, "grad_norm": 0.451171875, "learning_rate": 2.9870987299522218e-05, "loss": 2.1029, "step": 2301 }, { "epoch": 0.07427139143918969, "grad_norm": 0.490234375, "learning_rate": 2.9870781258533077e-05, "loss": 2.1232, "step": 2302 }, { "epoch": 0.07430365529298603, "grad_norm": 0.48828125, "learning_rate": 2.9870575053857038e-05, "loss": 2.1059, "step": 2303 }, { "epoch": 0.07433591914678239, "grad_norm": 0.46875, "learning_rate": 2.987036868549638e-05, "loss": 2.121, "step": 2304 }, { "epoch": 0.07436818300057874, "grad_norm": 0.455078125, "learning_rate": 2.9870162153453368e-05, "loss": 2.1242, "step": 2305 }, { "epoch": 0.07440044685437508, "grad_norm": 0.453125, "learning_rate": 2.9869955457730283e-05, "loss": 2.0936, "step": 2306 }, { "epoch": 0.07443271070817142, "grad_norm": 0.4375, "learning_rate": 2.9869748598329394e-05, "loss": 2.0793, "step": 2307 }, { "epoch": 0.07446497456196777, "grad_norm": 0.43359375, "learning_rate": 2.9869541575252978e-05, "loss": 2.1032, "step": 2308 }, { "epoch": 0.07449723841576412, "grad_norm": 0.4765625, "learning_rate": 2.9869334388503315e-05, "loss": 2.0865, "step": 2309 }, { "epoch": 0.07452950226956047, "grad_norm": 0.48046875, "learning_rate": 2.986912703808269e-05, "loss": 2.1187, "step": 2310 }, { "epoch": 0.07456176612335681, "grad_norm": 0.443359375, "learning_rate": 2.9868919523993382e-05, "loss": 2.1296, "step": 2311 }, { "epoch": 0.07459402997715316, "grad_norm": 0.453125, "learning_rate": 2.9868711846237672e-05, "loss": 2.1118, "step": 2312 }, { "epoch": 0.0746262938309495, "grad_norm": 0.609375, "learning_rate": 2.986850400481785e-05, "loss": 2.1322, "step": 2313 }, { "epoch": 0.07465855768474586, "grad_norm": 0.69140625, "learning_rate": 2.9868295999736202e-05, "loss": 2.1149, "step": 2314 }, { "epoch": 0.0746908215385422, "grad_norm": 0.8203125, "learning_rate": 2.986808783099502e-05, "loss": 2.0883, "step": 2315 }, { "epoch": 0.07472308539233855, "grad_norm": 0.828125, "learning_rate": 2.986787949859659e-05, "loss": 2.0879, "step": 2316 }, { "epoch": 0.07475534924613489, "grad_norm": 0.66015625, "learning_rate": 2.9867671002543216e-05, "loss": 2.1089, "step": 2317 }, { "epoch": 0.07478761309993123, "grad_norm": 0.53125, "learning_rate": 2.9867462342837176e-05, "loss": 2.1143, "step": 2318 }, { "epoch": 0.07481987695372759, "grad_norm": 0.63671875, "learning_rate": 2.9867253519480783e-05, "loss": 2.1071, "step": 2319 }, { "epoch": 0.07485214080752393, "grad_norm": 0.5234375, "learning_rate": 2.986704453247633e-05, "loss": 2.091, "step": 2320 }, { "epoch": 0.07488440466132028, "grad_norm": 0.5703125, "learning_rate": 2.986683538182611e-05, "loss": 2.12, "step": 2321 }, { "epoch": 0.07491666851511662, "grad_norm": 0.5546875, "learning_rate": 2.9866626067532437e-05, "loss": 2.1005, "step": 2322 }, { "epoch": 0.07494893236891297, "grad_norm": 0.47265625, "learning_rate": 2.9866416589597608e-05, "loss": 2.1102, "step": 2323 }, { "epoch": 0.07498119622270932, "grad_norm": 0.62890625, "learning_rate": 2.9866206948023925e-05, "loss": 2.1601, "step": 2324 }, { "epoch": 0.07501346007650567, "grad_norm": 0.55859375, "learning_rate": 2.986599714281371e-05, "loss": 2.1925, "step": 2325 }, { "epoch": 0.07504572393030201, "grad_norm": 0.5, "learning_rate": 2.9865787173969257e-05, "loss": 2.1935, "step": 2326 }, { "epoch": 0.07507798778409835, "grad_norm": 0.61328125, "learning_rate": 2.9865577041492883e-05, "loss": 2.1843, "step": 2327 }, { "epoch": 0.0751102516378947, "grad_norm": 0.51171875, "learning_rate": 2.9865366745386904e-05, "loss": 2.1947, "step": 2328 }, { "epoch": 0.07514251549169106, "grad_norm": 0.51953125, "learning_rate": 2.9865156285653635e-05, "loss": 2.2002, "step": 2329 }, { "epoch": 0.0751747793454874, "grad_norm": 0.578125, "learning_rate": 2.9864945662295386e-05, "loss": 2.168, "step": 2330 }, { "epoch": 0.07520704319928374, "grad_norm": 0.515625, "learning_rate": 2.986473487531448e-05, "loss": 2.1826, "step": 2331 }, { "epoch": 0.07523930705308009, "grad_norm": 0.51171875, "learning_rate": 2.9864523924713236e-05, "loss": 2.1814, "step": 2332 }, { "epoch": 0.07527157090687643, "grad_norm": 0.515625, "learning_rate": 2.9864312810493976e-05, "loss": 2.1801, "step": 2333 }, { "epoch": 0.07530383476067279, "grad_norm": 0.48828125, "learning_rate": 2.9864101532659027e-05, "loss": 2.1811, "step": 2334 }, { "epoch": 0.07533609861446913, "grad_norm": 0.51953125, "learning_rate": 2.9863890091210706e-05, "loss": 2.1752, "step": 2335 }, { "epoch": 0.07536836246826548, "grad_norm": 0.53125, "learning_rate": 2.986367848615135e-05, "loss": 2.198, "step": 2336 }, { "epoch": 0.07540062632206182, "grad_norm": 0.4765625, "learning_rate": 2.9863466717483286e-05, "loss": 2.1911, "step": 2337 }, { "epoch": 0.07543289017585816, "grad_norm": 0.5078125, "learning_rate": 2.986325478520884e-05, "loss": 2.1855, "step": 2338 }, { "epoch": 0.07546515402965452, "grad_norm": 0.490234375, "learning_rate": 2.9863042689330354e-05, "loss": 2.2033, "step": 2339 }, { "epoch": 0.07549741788345087, "grad_norm": 0.392578125, "learning_rate": 2.9862830429850153e-05, "loss": 2.1962, "step": 2340 }, { "epoch": 0.07552968173724721, "grad_norm": 0.482421875, "learning_rate": 2.9862618006770575e-05, "loss": 2.1913, "step": 2341 }, { "epoch": 0.07556194559104355, "grad_norm": 0.423828125, "learning_rate": 2.9862405420093963e-05, "loss": 2.1908, "step": 2342 }, { "epoch": 0.0755942094448399, "grad_norm": 0.44921875, "learning_rate": 2.986219266982265e-05, "loss": 2.1889, "step": 2343 }, { "epoch": 0.07562647329863624, "grad_norm": 0.40625, "learning_rate": 2.9861979755958988e-05, "loss": 2.1494, "step": 2344 }, { "epoch": 0.0756587371524326, "grad_norm": 0.431640625, "learning_rate": 2.986176667850531e-05, "loss": 2.1651, "step": 2345 }, { "epoch": 0.07569100100622894, "grad_norm": 0.43359375, "learning_rate": 2.9861553437463973e-05, "loss": 2.1709, "step": 2346 }, { "epoch": 0.07572326486002529, "grad_norm": 0.435546875, "learning_rate": 2.986134003283731e-05, "loss": 2.1581, "step": 2347 }, { "epoch": 0.07575552871382163, "grad_norm": 0.41796875, "learning_rate": 2.9861126464627684e-05, "loss": 2.1964, "step": 2348 }, { "epoch": 0.07578779256761797, "grad_norm": 0.486328125, "learning_rate": 2.9860912732837433e-05, "loss": 2.178, "step": 2349 }, { "epoch": 0.07582005642141433, "grad_norm": 0.466796875, "learning_rate": 2.986069883746892e-05, "loss": 2.1611, "step": 2350 }, { "epoch": 0.07585232027521067, "grad_norm": 0.458984375, "learning_rate": 2.986048477852449e-05, "loss": 2.1961, "step": 2351 }, { "epoch": 0.07588458412900702, "grad_norm": 0.421875, "learning_rate": 2.9860270556006512e-05, "loss": 2.202, "step": 2352 }, { "epoch": 0.07591684798280336, "grad_norm": 0.44921875, "learning_rate": 2.9860056169917332e-05, "loss": 2.1717, "step": 2353 }, { "epoch": 0.0759491118365997, "grad_norm": 0.44921875, "learning_rate": 2.9859841620259314e-05, "loss": 2.1523, "step": 2354 }, { "epoch": 0.07598137569039606, "grad_norm": 0.421875, "learning_rate": 2.985962690703482e-05, "loss": 2.2037, "step": 2355 }, { "epoch": 0.07601363954419241, "grad_norm": 0.40234375, "learning_rate": 2.985941203024622e-05, "loss": 2.1651, "step": 2356 }, { "epoch": 0.07604590339798875, "grad_norm": 0.455078125, "learning_rate": 2.985919698989586e-05, "loss": 2.1795, "step": 2357 }, { "epoch": 0.0760781672517851, "grad_norm": 0.470703125, "learning_rate": 2.9858981785986125e-05, "loss": 2.1649, "step": 2358 }, { "epoch": 0.07611043110558144, "grad_norm": 0.55859375, "learning_rate": 2.9858766418519382e-05, "loss": 2.1559, "step": 2359 }, { "epoch": 0.0761426949593778, "grad_norm": 0.65234375, "learning_rate": 2.9858550887497994e-05, "loss": 2.1954, "step": 2360 }, { "epoch": 0.07617495881317414, "grad_norm": 0.69921875, "learning_rate": 2.985833519292434e-05, "loss": 2.1778, "step": 2361 }, { "epoch": 0.07620722266697048, "grad_norm": 0.6015625, "learning_rate": 2.985811933480079e-05, "loss": 2.1865, "step": 2362 }, { "epoch": 0.07623948652076683, "grad_norm": 0.45703125, "learning_rate": 2.985790331312972e-05, "loss": 2.1757, "step": 2363 }, { "epoch": 0.07627175037456317, "grad_norm": 0.5, "learning_rate": 2.985768712791351e-05, "loss": 2.1748, "step": 2364 }, { "epoch": 0.07630401422835953, "grad_norm": 0.63671875, "learning_rate": 2.9857470779154544e-05, "loss": 2.1899, "step": 2365 }, { "epoch": 0.07633627808215587, "grad_norm": 0.5, "learning_rate": 2.9857254266855193e-05, "loss": 2.1891, "step": 2366 }, { "epoch": 0.07636854193595222, "grad_norm": 0.470703125, "learning_rate": 2.985703759101785e-05, "loss": 2.2105, "step": 2367 }, { "epoch": 0.07640080578974856, "grad_norm": 0.54296875, "learning_rate": 2.9856820751644887e-05, "loss": 2.117, "step": 2368 }, { "epoch": 0.0764330696435449, "grad_norm": 0.50390625, "learning_rate": 2.9856603748738707e-05, "loss": 2.187, "step": 2369 }, { "epoch": 0.07646533349734126, "grad_norm": 0.4296875, "learning_rate": 2.9856386582301686e-05, "loss": 2.17, "step": 2370 }, { "epoch": 0.0764975973511376, "grad_norm": 0.5, "learning_rate": 2.9856169252336223e-05, "loss": 2.1777, "step": 2371 }, { "epoch": 0.07652986120493395, "grad_norm": 0.455078125, "learning_rate": 2.9855951758844708e-05, "loss": 2.178, "step": 2372 }, { "epoch": 0.0765621250587303, "grad_norm": 0.435546875, "learning_rate": 2.9855734101829528e-05, "loss": 2.1629, "step": 2373 }, { "epoch": 0.07659438891252664, "grad_norm": 0.53515625, "learning_rate": 2.9855516281293092e-05, "loss": 2.1486, "step": 2374 }, { "epoch": 0.076626652766323, "grad_norm": 0.458984375, "learning_rate": 2.9855298297237783e-05, "loss": 2.1989, "step": 2375 }, { "epoch": 0.07665891662011934, "grad_norm": 0.451171875, "learning_rate": 2.9855080149666012e-05, "loss": 2.185, "step": 2376 }, { "epoch": 0.07669118047391568, "grad_norm": 0.46875, "learning_rate": 2.9854861838580173e-05, "loss": 2.1887, "step": 2377 }, { "epoch": 0.07672344432771203, "grad_norm": 0.4921875, "learning_rate": 2.9854643363982673e-05, "loss": 2.1869, "step": 2378 }, { "epoch": 0.07675570818150837, "grad_norm": 0.47265625, "learning_rate": 2.9854424725875915e-05, "loss": 2.2138, "step": 2379 }, { "epoch": 0.07678797203530473, "grad_norm": 0.4453125, "learning_rate": 2.9854205924262304e-05, "loss": 2.1776, "step": 2380 }, { "epoch": 0.07682023588910107, "grad_norm": 0.462890625, "learning_rate": 2.9853986959144254e-05, "loss": 2.1978, "step": 2381 }, { "epoch": 0.07685249974289741, "grad_norm": 0.515625, "learning_rate": 2.9853767830524172e-05, "loss": 2.182, "step": 2382 }, { "epoch": 0.07688476359669376, "grad_norm": 0.451171875, "learning_rate": 2.9853548538404468e-05, "loss": 2.1879, "step": 2383 }, { "epoch": 0.0769170274504901, "grad_norm": 0.3984375, "learning_rate": 2.985332908278756e-05, "loss": 2.1839, "step": 2384 }, { "epoch": 0.07694929130428646, "grad_norm": 0.48046875, "learning_rate": 2.985310946367586e-05, "loss": 2.1792, "step": 2385 }, { "epoch": 0.0769815551580828, "grad_norm": 0.4609375, "learning_rate": 2.9852889681071783e-05, "loss": 2.1857, "step": 2386 }, { "epoch": 0.07701381901187915, "grad_norm": 0.421875, "learning_rate": 2.985266973497776e-05, "loss": 2.1821, "step": 2387 }, { "epoch": 0.07704608286567549, "grad_norm": 0.427734375, "learning_rate": 2.98524496253962e-05, "loss": 2.1775, "step": 2388 }, { "epoch": 0.07707834671947184, "grad_norm": 0.443359375, "learning_rate": 2.9852229352329525e-05, "loss": 2.179, "step": 2389 }, { "epoch": 0.07711061057326819, "grad_norm": 0.458984375, "learning_rate": 2.9852008915780167e-05, "loss": 2.1982, "step": 2390 }, { "epoch": 0.07714287442706454, "grad_norm": 0.423828125, "learning_rate": 2.985178831575055e-05, "loss": 2.1544, "step": 2391 }, { "epoch": 0.07717513828086088, "grad_norm": 0.421875, "learning_rate": 2.9851567552243103e-05, "loss": 2.1873, "step": 2392 }, { "epoch": 0.07720740213465722, "grad_norm": 0.451171875, "learning_rate": 2.9851346625260253e-05, "loss": 2.1348, "step": 2393 }, { "epoch": 0.07723966598845357, "grad_norm": 0.486328125, "learning_rate": 2.9851125534804434e-05, "loss": 2.1922, "step": 2394 }, { "epoch": 0.07727192984224993, "grad_norm": 0.474609375, "learning_rate": 2.985090428087808e-05, "loss": 2.1617, "step": 2395 }, { "epoch": 0.07730419369604627, "grad_norm": 0.5, "learning_rate": 2.9850682863483628e-05, "loss": 2.1739, "step": 2396 }, { "epoch": 0.07733645754984261, "grad_norm": 0.40625, "learning_rate": 2.985046128262351e-05, "loss": 2.1115, "step": 2397 }, { "epoch": 0.07736872140363896, "grad_norm": 0.54296875, "learning_rate": 2.9850239538300166e-05, "loss": 2.1796, "step": 2398 }, { "epoch": 0.0774009852574353, "grad_norm": 0.51171875, "learning_rate": 2.9850017630516042e-05, "loss": 2.1587, "step": 2399 }, { "epoch": 0.07743324911123166, "grad_norm": 0.443359375, "learning_rate": 2.9849795559273574e-05, "loss": 2.1932, "step": 2400 }, { "epoch": 0.077465512965028, "grad_norm": 0.455078125, "learning_rate": 2.984957332457521e-05, "loss": 2.2074, "step": 2401 }, { "epoch": 0.07749777681882435, "grad_norm": 0.54296875, "learning_rate": 2.98493509264234e-05, "loss": 2.1544, "step": 2402 }, { "epoch": 0.07753004067262069, "grad_norm": 0.5703125, "learning_rate": 2.9849128364820585e-05, "loss": 2.1615, "step": 2403 }, { "epoch": 0.07756230452641703, "grad_norm": 0.49609375, "learning_rate": 2.984890563976922e-05, "loss": 2.1082, "step": 2404 }, { "epoch": 0.07759456838021339, "grad_norm": 0.494140625, "learning_rate": 2.9848682751271754e-05, "loss": 2.1814, "step": 2405 }, { "epoch": 0.07762683223400973, "grad_norm": 0.55078125, "learning_rate": 2.984845969933064e-05, "loss": 2.1854, "step": 2406 }, { "epoch": 0.07765909608780608, "grad_norm": 0.474609375, "learning_rate": 2.9848236483948332e-05, "loss": 2.2104, "step": 2407 }, { "epoch": 0.07769135994160242, "grad_norm": 0.53125, "learning_rate": 2.9848013105127287e-05, "loss": 2.144, "step": 2408 }, { "epoch": 0.07772362379539877, "grad_norm": 0.6328125, "learning_rate": 2.984778956286997e-05, "loss": 2.2018, "step": 2409 }, { "epoch": 0.07775588764919512, "grad_norm": 0.66015625, "learning_rate": 2.9847565857178836e-05, "loss": 2.1709, "step": 2410 }, { "epoch": 0.07778815150299147, "grad_norm": 0.71484375, "learning_rate": 2.9847341988056347e-05, "loss": 2.1751, "step": 2411 }, { "epoch": 0.07782041535678781, "grad_norm": 0.60546875, "learning_rate": 2.984711795550497e-05, "loss": 2.2003, "step": 2412 }, { "epoch": 0.07785267921058416, "grad_norm": 0.431640625, "learning_rate": 2.984689375952717e-05, "loss": 2.1818, "step": 2413 }, { "epoch": 0.0778849430643805, "grad_norm": 0.6484375, "learning_rate": 2.9846669400125413e-05, "loss": 2.1586, "step": 2414 }, { "epoch": 0.07791720691817686, "grad_norm": 0.6796875, "learning_rate": 2.9846444877302174e-05, "loss": 2.1863, "step": 2415 }, { "epoch": 0.0779494707719732, "grad_norm": 0.5390625, "learning_rate": 2.984622019105992e-05, "loss": 2.1655, "step": 2416 }, { "epoch": 0.07798173462576954, "grad_norm": 0.5, "learning_rate": 2.9845995341401124e-05, "loss": 2.1651, "step": 2417 }, { "epoch": 0.07801399847956589, "grad_norm": 0.55859375, "learning_rate": 2.984577032832826e-05, "loss": 2.1667, "step": 2418 }, { "epoch": 0.07804626233336223, "grad_norm": 0.48828125, "learning_rate": 2.984554515184381e-05, "loss": 2.1921, "step": 2419 }, { "epoch": 0.07807852618715859, "grad_norm": 0.478515625, "learning_rate": 2.9845319811950248e-05, "loss": 2.15, "step": 2420 }, { "epoch": 0.07811079004095493, "grad_norm": 0.494140625, "learning_rate": 2.9845094308650055e-05, "loss": 2.1877, "step": 2421 }, { "epoch": 0.07814305389475128, "grad_norm": 0.474609375, "learning_rate": 2.984486864194571e-05, "loss": 2.1952, "step": 2422 }, { "epoch": 0.07817531774854762, "grad_norm": 0.5078125, "learning_rate": 2.98446428118397e-05, "loss": 2.1475, "step": 2423 }, { "epoch": 0.07820758160234396, "grad_norm": 0.44921875, "learning_rate": 2.9844416818334518e-05, "loss": 2.1963, "step": 2424 }, { "epoch": 0.07823984545614032, "grad_norm": 0.5234375, "learning_rate": 2.984419066143264e-05, "loss": 2.1641, "step": 2425 }, { "epoch": 0.07827210930993667, "grad_norm": 0.51953125, "learning_rate": 2.9843964341136562e-05, "loss": 2.1551, "step": 2426 }, { "epoch": 0.07830437316373301, "grad_norm": 0.431640625, "learning_rate": 2.9843737857448777e-05, "loss": 2.1741, "step": 2427 }, { "epoch": 0.07833663701752935, "grad_norm": 0.53515625, "learning_rate": 2.9843511210371773e-05, "loss": 2.1842, "step": 2428 }, { "epoch": 0.0783689008713257, "grad_norm": 0.546875, "learning_rate": 2.984328439990804e-05, "loss": 2.1835, "step": 2429 }, { "epoch": 0.07840116472512206, "grad_norm": 0.50390625, "learning_rate": 2.9843057426060088e-05, "loss": 2.172, "step": 2430 }, { "epoch": 0.0784334285789184, "grad_norm": 0.51953125, "learning_rate": 2.984283028883041e-05, "loss": 2.1862, "step": 2431 }, { "epoch": 0.07846569243271474, "grad_norm": 0.486328125, "learning_rate": 2.9842602988221495e-05, "loss": 2.152, "step": 2432 }, { "epoch": 0.07849795628651109, "grad_norm": 0.470703125, "learning_rate": 2.984237552423586e-05, "loss": 2.1645, "step": 2433 }, { "epoch": 0.07853022014030743, "grad_norm": 0.427734375, "learning_rate": 2.9842147896876003e-05, "loss": 2.172, "step": 2434 }, { "epoch": 0.07856248399410377, "grad_norm": 0.451171875, "learning_rate": 2.9841920106144427e-05, "loss": 2.1475, "step": 2435 }, { "epoch": 0.07859474784790013, "grad_norm": 0.431640625, "learning_rate": 2.9841692152043645e-05, "loss": 2.1757, "step": 2436 }, { "epoch": 0.07862701170169648, "grad_norm": 0.416015625, "learning_rate": 2.9841464034576163e-05, "loss": 2.165, "step": 2437 }, { "epoch": 0.07865927555549282, "grad_norm": 0.453125, "learning_rate": 2.984123575374449e-05, "loss": 2.1774, "step": 2438 }, { "epoch": 0.07869153940928916, "grad_norm": 0.486328125, "learning_rate": 2.9841007309551142e-05, "loss": 2.176, "step": 2439 }, { "epoch": 0.0787238032630855, "grad_norm": 0.494140625, "learning_rate": 2.9840778701998634e-05, "loss": 2.1754, "step": 2440 }, { "epoch": 0.07875606711688186, "grad_norm": 0.515625, "learning_rate": 2.9840549931089482e-05, "loss": 2.1768, "step": 2441 }, { "epoch": 0.07878833097067821, "grad_norm": 0.435546875, "learning_rate": 2.98403209968262e-05, "loss": 2.1939, "step": 2442 }, { "epoch": 0.07882059482447455, "grad_norm": 0.4921875, "learning_rate": 2.9840091899211313e-05, "loss": 2.1527, "step": 2443 }, { "epoch": 0.0788528586782709, "grad_norm": 0.578125, "learning_rate": 2.983986263824734e-05, "loss": 2.1804, "step": 2444 }, { "epoch": 0.07888512253206724, "grad_norm": 0.470703125, "learning_rate": 2.9839633213936804e-05, "loss": 2.1591, "step": 2445 }, { "epoch": 0.0789173863858636, "grad_norm": 0.482421875, "learning_rate": 2.983940362628223e-05, "loss": 2.1743, "step": 2446 }, { "epoch": 0.07894965023965994, "grad_norm": 0.50390625, "learning_rate": 2.9839173875286148e-05, "loss": 2.2131, "step": 2447 }, { "epoch": 0.07898191409345628, "grad_norm": 0.466796875, "learning_rate": 2.9838943960951083e-05, "loss": 2.1789, "step": 2448 }, { "epoch": 0.07901417794725263, "grad_norm": 0.4765625, "learning_rate": 2.9838713883279576e-05, "loss": 2.1709, "step": 2449 }, { "epoch": 0.07904644180104897, "grad_norm": 0.4296875, "learning_rate": 2.9838483642274146e-05, "loss": 2.1194, "step": 2450 }, { "epoch": 0.07907870565484533, "grad_norm": 0.439453125, "learning_rate": 2.983825323793733e-05, "loss": 2.1892, "step": 2451 }, { "epoch": 0.07911096950864167, "grad_norm": 0.47265625, "learning_rate": 2.9838022670271674e-05, "loss": 2.1632, "step": 2452 }, { "epoch": 0.07914323336243802, "grad_norm": 0.443359375, "learning_rate": 2.9837791939279707e-05, "loss": 2.1451, "step": 2453 }, { "epoch": 0.07917549721623436, "grad_norm": 0.466796875, "learning_rate": 2.9837561044963972e-05, "loss": 2.1619, "step": 2454 }, { "epoch": 0.0792077610700307, "grad_norm": 0.482421875, "learning_rate": 2.983732998732701e-05, "loss": 2.1947, "step": 2455 }, { "epoch": 0.07924002492382706, "grad_norm": 0.63671875, "learning_rate": 2.983709876637136e-05, "loss": 2.1657, "step": 2456 }, { "epoch": 0.0792722887776234, "grad_norm": 0.80078125, "learning_rate": 2.983686738209957e-05, "loss": 2.1838, "step": 2457 }, { "epoch": 0.07930455263141975, "grad_norm": 1.0234375, "learning_rate": 2.983663583451419e-05, "loss": 2.1968, "step": 2458 }, { "epoch": 0.0793368164852161, "grad_norm": 0.95703125, "learning_rate": 2.983640412361777e-05, "loss": 2.1321, "step": 2459 }, { "epoch": 0.07936908033901244, "grad_norm": 0.5703125, "learning_rate": 2.9836172249412854e-05, "loss": 2.1168, "step": 2460 }, { "epoch": 0.0794013441928088, "grad_norm": 0.69140625, "learning_rate": 2.9835940211901995e-05, "loss": 2.1538, "step": 2461 }, { "epoch": 0.07943360804660514, "grad_norm": 0.625, "learning_rate": 2.9835708011087754e-05, "loss": 2.1781, "step": 2462 }, { "epoch": 0.07946587190040148, "grad_norm": 0.578125, "learning_rate": 2.983547564697268e-05, "loss": 2.1642, "step": 2463 }, { "epoch": 0.07949813575419783, "grad_norm": 0.7734375, "learning_rate": 2.983524311955933e-05, "loss": 2.1851, "step": 2464 }, { "epoch": 0.07953039960799417, "grad_norm": 0.48046875, "learning_rate": 2.983501042885027e-05, "loss": 2.1719, "step": 2465 }, { "epoch": 0.07956266346179053, "grad_norm": 0.625, "learning_rate": 2.983477757484806e-05, "loss": 2.175, "step": 2466 }, { "epoch": 0.07959492731558687, "grad_norm": 0.55859375, "learning_rate": 2.9834544557555254e-05, "loss": 2.1505, "step": 2467 }, { "epoch": 0.07962719116938322, "grad_norm": 0.51171875, "learning_rate": 2.983431137697443e-05, "loss": 2.1786, "step": 2468 }, { "epoch": 0.07965945502317956, "grad_norm": 0.59765625, "learning_rate": 2.983407803310815e-05, "loss": 2.1496, "step": 2469 }, { "epoch": 0.0796917188769759, "grad_norm": 0.431640625, "learning_rate": 2.9833844525958975e-05, "loss": 2.1808, "step": 2470 }, { "epoch": 0.07972398273077226, "grad_norm": 0.52734375, "learning_rate": 2.9833610855529483e-05, "loss": 2.1733, "step": 2471 }, { "epoch": 0.0797562465845686, "grad_norm": 0.462890625, "learning_rate": 2.9833377021822244e-05, "loss": 2.1518, "step": 2472 }, { "epoch": 0.07978851043836495, "grad_norm": 0.44140625, "learning_rate": 2.983314302483983e-05, "loss": 2.1704, "step": 2473 }, { "epoch": 0.07982077429216129, "grad_norm": 0.51171875, "learning_rate": 2.9832908864584824e-05, "loss": 2.1821, "step": 2474 }, { "epoch": 0.07985303814595764, "grad_norm": 0.412109375, "learning_rate": 2.9832674541059793e-05, "loss": 2.1736, "step": 2475 }, { "epoch": 0.079885301999754, "grad_norm": 0.53125, "learning_rate": 2.983244005426733e-05, "loss": 2.1725, "step": 2476 }, { "epoch": 0.07991756585355034, "grad_norm": 0.427734375, "learning_rate": 2.983220540421e-05, "loss": 2.1878, "step": 2477 }, { "epoch": 0.07994982970734668, "grad_norm": 0.4609375, "learning_rate": 2.9831970590890397e-05, "loss": 2.1763, "step": 2478 }, { "epoch": 0.07998209356114303, "grad_norm": 0.494140625, "learning_rate": 2.9831735614311102e-05, "loss": 2.1642, "step": 2479 }, { "epoch": 0.08001435741493937, "grad_norm": 0.478515625, "learning_rate": 2.9831500474474698e-05, "loss": 2.1768, "step": 2480 }, { "epoch": 0.08004662126873573, "grad_norm": 0.515625, "learning_rate": 2.983126517138378e-05, "loss": 2.1648, "step": 2481 }, { "epoch": 0.08007888512253207, "grad_norm": 0.55078125, "learning_rate": 2.9831029705040934e-05, "loss": 2.1811, "step": 2482 }, { "epoch": 0.08011114897632841, "grad_norm": 0.46484375, "learning_rate": 2.9830794075448754e-05, "loss": 2.1568, "step": 2483 }, { "epoch": 0.08014341283012476, "grad_norm": 0.52734375, "learning_rate": 2.9830558282609833e-05, "loss": 2.0938, "step": 2484 }, { "epoch": 0.0801756766839211, "grad_norm": 0.52734375, "learning_rate": 2.983032232652676e-05, "loss": 2.1254, "step": 2485 }, { "epoch": 0.08020794053771746, "grad_norm": 0.5546875, "learning_rate": 2.9830086207202144e-05, "loss": 2.1984, "step": 2486 }, { "epoch": 0.0802402043915138, "grad_norm": 0.5625, "learning_rate": 2.9829849924638574e-05, "loss": 2.1522, "step": 2487 }, { "epoch": 0.08027246824531015, "grad_norm": 0.5390625, "learning_rate": 2.9829613478838657e-05, "loss": 2.1436, "step": 2488 }, { "epoch": 0.08030473209910649, "grad_norm": 0.466796875, "learning_rate": 2.9829376869804995e-05, "loss": 2.1935, "step": 2489 }, { "epoch": 0.08033699595290283, "grad_norm": 0.47265625, "learning_rate": 2.9829140097540192e-05, "loss": 2.1714, "step": 2490 }, { "epoch": 0.08036925980669919, "grad_norm": 0.49609375, "learning_rate": 2.982890316204685e-05, "loss": 2.1653, "step": 2491 }, { "epoch": 0.08040152366049554, "grad_norm": 0.453125, "learning_rate": 2.982866606332758e-05, "loss": 2.1635, "step": 2492 }, { "epoch": 0.08043378751429188, "grad_norm": 0.4921875, "learning_rate": 2.9828428801384994e-05, "loss": 2.1541, "step": 2493 }, { "epoch": 0.08046605136808822, "grad_norm": 0.4453125, "learning_rate": 2.9828191376221703e-05, "loss": 2.1364, "step": 2494 }, { "epoch": 0.08049831522188457, "grad_norm": 0.494140625, "learning_rate": 2.9827953787840314e-05, "loss": 2.1626, "step": 2495 }, { "epoch": 0.08053057907568092, "grad_norm": 0.49609375, "learning_rate": 2.9827716036243452e-05, "loss": 2.1966, "step": 2496 }, { "epoch": 0.08056284292947727, "grad_norm": 0.416015625, "learning_rate": 2.9827478121433725e-05, "loss": 2.1595, "step": 2497 }, { "epoch": 0.08059510678327361, "grad_norm": 0.451171875, "learning_rate": 2.982724004341376e-05, "loss": 2.1754, "step": 2498 }, { "epoch": 0.08062737063706996, "grad_norm": 0.46875, "learning_rate": 2.9827001802186175e-05, "loss": 2.1937, "step": 2499 }, { "epoch": 0.0806596344908663, "grad_norm": 0.5234375, "learning_rate": 2.9826763397753586e-05, "loss": 2.1713, "step": 2500 }, { "epoch": 0.08069189834466266, "grad_norm": 0.4453125, "learning_rate": 2.9826524830118623e-05, "loss": 2.1232, "step": 2501 }, { "epoch": 0.080724162198459, "grad_norm": 0.408203125, "learning_rate": 2.982628609928391e-05, "loss": 2.1674, "step": 2502 }, { "epoch": 0.08075642605225535, "grad_norm": 0.5234375, "learning_rate": 2.982604720525208e-05, "loss": 2.1684, "step": 2503 }, { "epoch": 0.08078868990605169, "grad_norm": 0.486328125, "learning_rate": 2.9825808148025757e-05, "loss": 2.1359, "step": 2504 }, { "epoch": 0.08082095375984803, "grad_norm": 0.458984375, "learning_rate": 2.982556892760757e-05, "loss": 2.128, "step": 2505 }, { "epoch": 0.08085321761364439, "grad_norm": 0.5078125, "learning_rate": 2.982532954400016e-05, "loss": 2.1732, "step": 2506 }, { "epoch": 0.08088548146744073, "grad_norm": 0.41796875, "learning_rate": 2.982508999720616e-05, "loss": 2.1762, "step": 2507 }, { "epoch": 0.08091774532123708, "grad_norm": 0.486328125, "learning_rate": 2.9824850287228205e-05, "loss": 2.146, "step": 2508 }, { "epoch": 0.08095000917503342, "grad_norm": 0.46875, "learning_rate": 2.9824610414068932e-05, "loss": 2.1629, "step": 2509 }, { "epoch": 0.08098227302882977, "grad_norm": 0.451171875, "learning_rate": 2.9824370377730978e-05, "loss": 2.1312, "step": 2510 }, { "epoch": 0.08101453688262612, "grad_norm": 0.49609375, "learning_rate": 2.9824130178216993e-05, "loss": 2.2134, "step": 2511 }, { "epoch": 0.08104680073642247, "grad_norm": 0.51953125, "learning_rate": 2.9823889815529618e-05, "loss": 2.1733, "step": 2512 }, { "epoch": 0.08107906459021881, "grad_norm": 0.5078125, "learning_rate": 2.9823649289671503e-05, "loss": 2.1892, "step": 2513 }, { "epoch": 0.08111132844401515, "grad_norm": 0.4765625, "learning_rate": 2.9823408600645285e-05, "loss": 2.1535, "step": 2514 }, { "epoch": 0.0811435922978115, "grad_norm": 0.51171875, "learning_rate": 2.9823167748453622e-05, "loss": 2.1272, "step": 2515 }, { "epoch": 0.08117585615160786, "grad_norm": 0.5, "learning_rate": 2.9822926733099162e-05, "loss": 2.1906, "step": 2516 }, { "epoch": 0.0812081200054042, "grad_norm": 0.62890625, "learning_rate": 2.9822685554584553e-05, "loss": 2.1912, "step": 2517 }, { "epoch": 0.08124038385920054, "grad_norm": 0.890625, "learning_rate": 2.982244421291246e-05, "loss": 2.1626, "step": 2518 }, { "epoch": 0.08127264771299689, "grad_norm": 1.4140625, "learning_rate": 2.9822202708085537e-05, "loss": 2.1713, "step": 2519 }, { "epoch": 0.08130491156679323, "grad_norm": 0.51171875, "learning_rate": 2.9821961040106438e-05, "loss": 2.1439, "step": 2520 }, { "epoch": 0.08133717542058957, "grad_norm": 0.98046875, "learning_rate": 2.9821719208977826e-05, "loss": 2.1839, "step": 2521 }, { "epoch": 0.08136943927438593, "grad_norm": 1.0078125, "learning_rate": 2.982147721470236e-05, "loss": 2.1488, "step": 2522 }, { "epoch": 0.08140170312818228, "grad_norm": 0.54296875, "learning_rate": 2.9821235057282708e-05, "loss": 2.1797, "step": 2523 }, { "epoch": 0.08143396698197862, "grad_norm": 0.8203125, "learning_rate": 2.9820992736721532e-05, "loss": 2.1692, "step": 2524 }, { "epoch": 0.08146623083577496, "grad_norm": 0.470703125, "learning_rate": 2.9820750253021502e-05, "loss": 2.151, "step": 2525 }, { "epoch": 0.08149849468957131, "grad_norm": 0.6875, "learning_rate": 2.982050760618528e-05, "loss": 2.1827, "step": 2526 }, { "epoch": 0.08153075854336767, "grad_norm": 0.458984375, "learning_rate": 2.982026479621555e-05, "loss": 2.1758, "step": 2527 }, { "epoch": 0.08156302239716401, "grad_norm": 0.59765625, "learning_rate": 2.9820021823114973e-05, "loss": 2.1774, "step": 2528 }, { "epoch": 0.08159528625096035, "grad_norm": 0.46875, "learning_rate": 2.981977868688623e-05, "loss": 2.1768, "step": 2529 }, { "epoch": 0.0816275501047567, "grad_norm": 0.5078125, "learning_rate": 2.981953538753199e-05, "loss": 2.1701, "step": 2530 }, { "epoch": 0.08165981395855304, "grad_norm": 0.474609375, "learning_rate": 2.981929192505494e-05, "loss": 2.1961, "step": 2531 }, { "epoch": 0.0816920778123494, "grad_norm": 0.51171875, "learning_rate": 2.9819048299457753e-05, "loss": 2.1332, "step": 2532 }, { "epoch": 0.08172434166614574, "grad_norm": 0.498046875, "learning_rate": 2.9818804510743118e-05, "loss": 2.1388, "step": 2533 }, { "epoch": 0.08175660551994209, "grad_norm": 0.482421875, "learning_rate": 2.981856055891371e-05, "loss": 2.1857, "step": 2534 }, { "epoch": 0.08178886937373843, "grad_norm": 0.4609375, "learning_rate": 2.981831644397222e-05, "loss": 2.1612, "step": 2535 }, { "epoch": 0.08182113322753477, "grad_norm": 0.478515625, "learning_rate": 2.9818072165921332e-05, "loss": 2.2036, "step": 2536 }, { "epoch": 0.08185339708133113, "grad_norm": 0.4375, "learning_rate": 2.9817827724763736e-05, "loss": 2.1859, "step": 2537 }, { "epoch": 0.08188566093512747, "grad_norm": 0.4453125, "learning_rate": 2.9817583120502127e-05, "loss": 2.1911, "step": 2538 }, { "epoch": 0.08191792478892382, "grad_norm": 0.44140625, "learning_rate": 2.9817338353139187e-05, "loss": 2.1618, "step": 2539 }, { "epoch": 0.08195018864272016, "grad_norm": 0.423828125, "learning_rate": 2.9817093422677617e-05, "loss": 2.1714, "step": 2540 }, { "epoch": 0.0819824524965165, "grad_norm": 0.4453125, "learning_rate": 2.9816848329120117e-05, "loss": 2.172, "step": 2541 }, { "epoch": 0.08201471635031286, "grad_norm": 0.474609375, "learning_rate": 2.9816603072469378e-05, "loss": 2.1888, "step": 2542 }, { "epoch": 0.08204698020410921, "grad_norm": 0.53515625, "learning_rate": 2.9816357652728103e-05, "loss": 2.1475, "step": 2543 }, { "epoch": 0.08207924405790555, "grad_norm": 0.4296875, "learning_rate": 2.981611206989899e-05, "loss": 2.1568, "step": 2544 }, { "epoch": 0.0821115079117019, "grad_norm": 0.466796875, "learning_rate": 2.981586632398474e-05, "loss": 2.184, "step": 2545 }, { "epoch": 0.08214377176549824, "grad_norm": 0.474609375, "learning_rate": 2.9815620414988066e-05, "loss": 2.1765, "step": 2546 }, { "epoch": 0.0821760356192946, "grad_norm": 0.41015625, "learning_rate": 2.9815374342911675e-05, "loss": 2.1493, "step": 2547 }, { "epoch": 0.08220829947309094, "grad_norm": 0.451171875, "learning_rate": 2.981512810775827e-05, "loss": 2.1623, "step": 2548 }, { "epoch": 0.08224056332688728, "grad_norm": 0.48046875, "learning_rate": 2.981488170953056e-05, "loss": 2.1523, "step": 2549 }, { "epoch": 0.08227282718068363, "grad_norm": 0.51953125, "learning_rate": 2.981463514823126e-05, "loss": 2.1209, "step": 2550 }, { "epoch": 0.08230509103447997, "grad_norm": 0.484375, "learning_rate": 2.9814388423863087e-05, "loss": 2.1856, "step": 2551 }, { "epoch": 0.08233735488827633, "grad_norm": 0.45703125, "learning_rate": 2.9814141536428756e-05, "loss": 2.1899, "step": 2552 }, { "epoch": 0.08236961874207267, "grad_norm": 0.458984375, "learning_rate": 2.981389448593098e-05, "loss": 2.1758, "step": 2553 }, { "epoch": 0.08240188259586902, "grad_norm": 0.484375, "learning_rate": 2.981364727237248e-05, "loss": 2.1648, "step": 2554 }, { "epoch": 0.08243414644966536, "grad_norm": 0.458984375, "learning_rate": 2.981339989575598e-05, "loss": 2.1734, "step": 2555 }, { "epoch": 0.0824664103034617, "grad_norm": 0.40625, "learning_rate": 2.9813152356084197e-05, "loss": 2.1753, "step": 2556 }, { "epoch": 0.08249867415725806, "grad_norm": 0.490234375, "learning_rate": 2.9812904653359865e-05, "loss": 2.1815, "step": 2557 }, { "epoch": 0.0825309380110544, "grad_norm": 0.4375, "learning_rate": 2.98126567875857e-05, "loss": 2.166, "step": 2558 }, { "epoch": 0.08256320186485075, "grad_norm": 0.462890625, "learning_rate": 2.981240875876444e-05, "loss": 2.1474, "step": 2559 }, { "epoch": 0.0825954657186471, "grad_norm": 0.44140625, "learning_rate": 2.9812160566898812e-05, "loss": 2.117, "step": 2560 }, { "epoch": 0.08262772957244344, "grad_norm": 0.4296875, "learning_rate": 2.9811912211991545e-05, "loss": 2.1582, "step": 2561 }, { "epoch": 0.0826599934262398, "grad_norm": 0.50390625, "learning_rate": 2.981166369404537e-05, "loss": 2.1524, "step": 2562 }, { "epoch": 0.08269225728003614, "grad_norm": 0.41796875, "learning_rate": 2.9811415013063035e-05, "loss": 2.1654, "step": 2563 }, { "epoch": 0.08272452113383248, "grad_norm": 0.46875, "learning_rate": 2.9811166169047265e-05, "loss": 2.1691, "step": 2564 }, { "epoch": 0.08275678498762883, "grad_norm": 0.44921875, "learning_rate": 2.98109171620008e-05, "loss": 2.0657, "step": 2565 }, { "epoch": 0.08278904884142517, "grad_norm": 0.470703125, "learning_rate": 2.981066799192639e-05, "loss": 2.1101, "step": 2566 }, { "epoch": 0.08282131269522153, "grad_norm": 0.447265625, "learning_rate": 2.9810418658826767e-05, "loss": 2.0924, "step": 2567 }, { "epoch": 0.08285357654901787, "grad_norm": 0.53125, "learning_rate": 2.981016916270468e-05, "loss": 2.0909, "step": 2568 }, { "epoch": 0.08288584040281421, "grad_norm": 0.62890625, "learning_rate": 2.980991950356288e-05, "loss": 2.087, "step": 2569 }, { "epoch": 0.08291810425661056, "grad_norm": 0.6171875, "learning_rate": 2.980966968140411e-05, "loss": 2.1174, "step": 2570 }, { "epoch": 0.0829503681104069, "grad_norm": 0.6015625, "learning_rate": 2.9809419696231116e-05, "loss": 2.0841, "step": 2571 }, { "epoch": 0.08298263196420326, "grad_norm": 0.5859375, "learning_rate": 2.9809169548046662e-05, "loss": 2.1125, "step": 2572 }, { "epoch": 0.0830148958179996, "grad_norm": 0.57421875, "learning_rate": 2.980891923685349e-05, "loss": 2.1265, "step": 2573 }, { "epoch": 0.08304715967179595, "grad_norm": 0.482421875, "learning_rate": 2.9808668762654355e-05, "loss": 2.0819, "step": 2574 }, { "epoch": 0.08307942352559229, "grad_norm": 0.453125, "learning_rate": 2.9808418125452024e-05, "loss": 2.1057, "step": 2575 }, { "epoch": 0.08311168737938864, "grad_norm": 0.458984375, "learning_rate": 2.9808167325249244e-05, "loss": 2.1213, "step": 2576 }, { "epoch": 0.08314395123318499, "grad_norm": 0.408203125, "learning_rate": 2.9807916362048784e-05, "loss": 2.0856, "step": 2577 }, { "epoch": 0.08317621508698134, "grad_norm": 0.44921875, "learning_rate": 2.9807665235853404e-05, "loss": 2.1002, "step": 2578 }, { "epoch": 0.08320847894077768, "grad_norm": 0.46875, "learning_rate": 2.980741394666587e-05, "loss": 2.0891, "step": 2579 }, { "epoch": 0.08324074279457402, "grad_norm": 0.478515625, "learning_rate": 2.9807162494488945e-05, "loss": 2.1218, "step": 2580 }, { "epoch": 0.08327300664837037, "grad_norm": 0.49609375, "learning_rate": 2.9806910879325396e-05, "loss": 2.1292, "step": 2581 }, { "epoch": 0.08330527050216673, "grad_norm": 0.5234375, "learning_rate": 2.9806659101178e-05, "loss": 2.1152, "step": 2582 }, { "epoch": 0.08333753435596307, "grad_norm": 0.453125, "learning_rate": 2.9806407160049514e-05, "loss": 2.1171, "step": 2583 }, { "epoch": 0.08336979820975941, "grad_norm": 0.486328125, "learning_rate": 2.9806155055942724e-05, "loss": 2.0789, "step": 2584 }, { "epoch": 0.08340206206355576, "grad_norm": 0.51953125, "learning_rate": 2.9805902788860402e-05, "loss": 2.1052, "step": 2585 }, { "epoch": 0.0834343259173521, "grad_norm": 0.5, "learning_rate": 2.9805650358805326e-05, "loss": 2.0868, "step": 2586 }, { "epoch": 0.08346658977114846, "grad_norm": 0.5390625, "learning_rate": 2.9805397765780268e-05, "loss": 2.0966, "step": 2587 }, { "epoch": 0.0834988536249448, "grad_norm": 0.5625, "learning_rate": 2.980514500978801e-05, "loss": 2.1126, "step": 2588 }, { "epoch": 0.08353111747874115, "grad_norm": 0.427734375, "learning_rate": 2.980489209083135e-05, "loss": 2.1078, "step": 2589 }, { "epoch": 0.08356338133253749, "grad_norm": 0.4453125, "learning_rate": 2.9804639008913046e-05, "loss": 2.0903, "step": 2590 }, { "epoch": 0.08359564518633383, "grad_norm": 0.419921875, "learning_rate": 2.98043857640359e-05, "loss": 2.0944, "step": 2591 }, { "epoch": 0.08362790904013019, "grad_norm": 0.45703125, "learning_rate": 2.98041323562027e-05, "loss": 2.059, "step": 2592 }, { "epoch": 0.08366017289392653, "grad_norm": 0.416015625, "learning_rate": 2.9803878785416224e-05, "loss": 2.0424, "step": 2593 }, { "epoch": 0.08369243674772288, "grad_norm": 0.45703125, "learning_rate": 2.9803625051679278e-05, "loss": 2.1095, "step": 2594 }, { "epoch": 0.08372470060151922, "grad_norm": 0.458984375, "learning_rate": 2.9803371154994643e-05, "loss": 2.1152, "step": 2595 }, { "epoch": 0.08375696445531557, "grad_norm": 0.43359375, "learning_rate": 2.980311709536512e-05, "loss": 2.0947, "step": 2596 }, { "epoch": 0.08378922830911192, "grad_norm": 0.466796875, "learning_rate": 2.9802862872793504e-05, "loss": 2.0954, "step": 2597 }, { "epoch": 0.08382149216290827, "grad_norm": 0.4453125, "learning_rate": 2.9802608487282595e-05, "loss": 2.0872, "step": 2598 }, { "epoch": 0.08385375601670461, "grad_norm": 0.5, "learning_rate": 2.9802353938835187e-05, "loss": 2.0852, "step": 2599 }, { "epoch": 0.08388601987050096, "grad_norm": 0.578125, "learning_rate": 2.980209922745409e-05, "loss": 2.098, "step": 2600 }, { "epoch": 0.0839182837242973, "grad_norm": 0.65234375, "learning_rate": 2.98018443531421e-05, "loss": 2.0987, "step": 2601 }, { "epoch": 0.08395054757809366, "grad_norm": 0.6484375, "learning_rate": 2.9801589315902028e-05, "loss": 2.0946, "step": 2602 }, { "epoch": 0.08398281143189, "grad_norm": 0.5390625, "learning_rate": 2.9801334115736676e-05, "loss": 2.1172, "step": 2603 }, { "epoch": 0.08401507528568634, "grad_norm": 0.4921875, "learning_rate": 2.9801078752648857e-05, "loss": 2.072, "step": 2604 }, { "epoch": 0.08404733913948269, "grad_norm": 0.4921875, "learning_rate": 2.9800823226641386e-05, "loss": 2.108, "step": 2605 }, { "epoch": 0.08407960299327903, "grad_norm": 0.4921875, "learning_rate": 2.980056753771707e-05, "loss": 2.0772, "step": 2606 }, { "epoch": 0.08411186684707539, "grad_norm": 0.431640625, "learning_rate": 2.9800311685878722e-05, "loss": 2.0941, "step": 2607 }, { "epoch": 0.08414413070087173, "grad_norm": 0.44921875, "learning_rate": 2.9800055671129164e-05, "loss": 2.0839, "step": 2608 }, { "epoch": 0.08417639455466808, "grad_norm": 0.44140625, "learning_rate": 2.979979949347121e-05, "loss": 2.1055, "step": 2609 }, { "epoch": 0.08420865840846442, "grad_norm": 0.4375, "learning_rate": 2.9799543152907678e-05, "loss": 2.0978, "step": 2610 }, { "epoch": 0.08424092226226076, "grad_norm": 0.455078125, "learning_rate": 2.9799286649441394e-05, "loss": 2.0444, "step": 2611 }, { "epoch": 0.08427318611605711, "grad_norm": 0.44140625, "learning_rate": 2.9799029983075176e-05, "loss": 2.1045, "step": 2612 }, { "epoch": 0.08430544996985347, "grad_norm": 0.462890625, "learning_rate": 2.9798773153811855e-05, "loss": 2.1082, "step": 2613 }, { "epoch": 0.08433771382364981, "grad_norm": 0.41796875, "learning_rate": 2.9798516161654256e-05, "loss": 2.0853, "step": 2614 }, { "epoch": 0.08436997767744615, "grad_norm": 0.43359375, "learning_rate": 2.9798259006605207e-05, "loss": 2.0998, "step": 2615 }, { "epoch": 0.0844022415312425, "grad_norm": 0.43359375, "learning_rate": 2.9798001688667542e-05, "loss": 2.0858, "step": 2616 }, { "epoch": 0.08443450538503884, "grad_norm": 0.4609375, "learning_rate": 2.9797744207844085e-05, "loss": 2.0744, "step": 2617 }, { "epoch": 0.0844667692388352, "grad_norm": 0.490234375, "learning_rate": 2.979748656413768e-05, "loss": 2.0895, "step": 2618 }, { "epoch": 0.08449903309263154, "grad_norm": 0.5234375, "learning_rate": 2.9797228757551157e-05, "loss": 2.0424, "step": 2619 }, { "epoch": 0.08453129694642789, "grad_norm": 0.5234375, "learning_rate": 2.9796970788087353e-05, "loss": 2.0811, "step": 2620 }, { "epoch": 0.08456356080022423, "grad_norm": 0.46875, "learning_rate": 2.9796712655749118e-05, "loss": 2.1128, "step": 2621 }, { "epoch": 0.08459582465402057, "grad_norm": 0.470703125, "learning_rate": 2.9796454360539275e-05, "loss": 2.096, "step": 2622 }, { "epoch": 0.08462808850781693, "grad_norm": 0.421875, "learning_rate": 2.9796195902460683e-05, "loss": 2.0745, "step": 2623 }, { "epoch": 0.08466035236161328, "grad_norm": 0.447265625, "learning_rate": 2.9795937281516182e-05, "loss": 2.1251, "step": 2624 }, { "epoch": 0.08469261621540962, "grad_norm": 0.4765625, "learning_rate": 2.9795678497708618e-05, "loss": 2.083, "step": 2625 }, { "epoch": 0.08472488006920596, "grad_norm": 0.462890625, "learning_rate": 2.9795419551040836e-05, "loss": 2.0729, "step": 2626 }, { "epoch": 0.0847571439230023, "grad_norm": 0.427734375, "learning_rate": 2.979516044151569e-05, "loss": 2.0956, "step": 2627 }, { "epoch": 0.08478940777679866, "grad_norm": 0.4609375, "learning_rate": 2.9794901169136038e-05, "loss": 2.1076, "step": 2628 }, { "epoch": 0.08482167163059501, "grad_norm": 0.451171875, "learning_rate": 2.979464173390472e-05, "loss": 2.097, "step": 2629 }, { "epoch": 0.08485393548439135, "grad_norm": 0.4375, "learning_rate": 2.97943821358246e-05, "loss": 2.0755, "step": 2630 }, { "epoch": 0.0848861993381877, "grad_norm": 0.515625, "learning_rate": 2.9794122374898538e-05, "loss": 2.1149, "step": 2631 }, { "epoch": 0.08491846319198404, "grad_norm": 0.609375, "learning_rate": 2.979386245112939e-05, "loss": 2.0905, "step": 2632 }, { "epoch": 0.0849507270457804, "grad_norm": 0.82421875, "learning_rate": 2.979360236452002e-05, "loss": 2.1058, "step": 2633 }, { "epoch": 0.08498299089957674, "grad_norm": 1.125, "learning_rate": 2.9793342115073285e-05, "loss": 2.108, "step": 2634 }, { "epoch": 0.08501525475337308, "grad_norm": 0.8125, "learning_rate": 2.9793081702792048e-05, "loss": 2.0893, "step": 2635 }, { "epoch": 0.08504751860716943, "grad_norm": 0.5546875, "learning_rate": 2.979282112767919e-05, "loss": 2.1246, "step": 2636 }, { "epoch": 0.08507978246096577, "grad_norm": 0.703125, "learning_rate": 2.9792560389737563e-05, "loss": 2.0858, "step": 2637 }, { "epoch": 0.08511204631476213, "grad_norm": 0.6171875, "learning_rate": 2.9792299488970043e-05, "loss": 2.1023, "step": 2638 }, { "epoch": 0.08514431016855847, "grad_norm": 0.5078125, "learning_rate": 2.97920384253795e-05, "loss": 2.0952, "step": 2639 }, { "epoch": 0.08517657402235482, "grad_norm": 0.76171875, "learning_rate": 2.979177719896882e-05, "loss": 2.1012, "step": 2640 }, { "epoch": 0.08520883787615116, "grad_norm": 0.486328125, "learning_rate": 2.9791515809740857e-05, "loss": 2.1019, "step": 2641 }, { "epoch": 0.0852411017299475, "grad_norm": 0.61328125, "learning_rate": 2.9791254257698507e-05, "loss": 2.0914, "step": 2642 }, { "epoch": 0.08527336558374386, "grad_norm": 0.578125, "learning_rate": 2.9790992542844637e-05, "loss": 2.1077, "step": 2643 }, { "epoch": 0.0853056294375402, "grad_norm": 0.453125, "learning_rate": 2.9790730665182134e-05, "loss": 2.0849, "step": 2644 }, { "epoch": 0.08533789329133655, "grad_norm": 0.6015625, "learning_rate": 2.9790468624713878e-05, "loss": 2.0811, "step": 2645 }, { "epoch": 0.0853701571451329, "grad_norm": 0.4140625, "learning_rate": 2.9790206421442752e-05, "loss": 2.1054, "step": 2646 }, { "epoch": 0.08540242099892924, "grad_norm": 0.58203125, "learning_rate": 2.9789944055371646e-05, "loss": 2.1172, "step": 2647 }, { "epoch": 0.0854346848527256, "grad_norm": 0.470703125, "learning_rate": 2.978968152650345e-05, "loss": 2.0774, "step": 2648 }, { "epoch": 0.08546694870652194, "grad_norm": 0.53125, "learning_rate": 2.9789418834841042e-05, "loss": 2.1012, "step": 2649 }, { "epoch": 0.08549921256031828, "grad_norm": 0.53515625, "learning_rate": 2.9789155980387327e-05, "loss": 2.0821, "step": 2650 }, { "epoch": 0.08553147641411463, "grad_norm": 0.466796875, "learning_rate": 2.9788892963145193e-05, "loss": 2.0876, "step": 2651 }, { "epoch": 0.08556374026791097, "grad_norm": 0.578125, "learning_rate": 2.978862978311753e-05, "loss": 2.064, "step": 2652 }, { "epoch": 0.08559600412170733, "grad_norm": 0.4453125, "learning_rate": 2.9788366440307243e-05, "loss": 2.0863, "step": 2653 }, { "epoch": 0.08562826797550367, "grad_norm": 0.498046875, "learning_rate": 2.9788102934717228e-05, "loss": 2.0656, "step": 2654 }, { "epoch": 0.08566053182930002, "grad_norm": 0.5, "learning_rate": 2.9787839266350384e-05, "loss": 2.1012, "step": 2655 }, { "epoch": 0.08569279568309636, "grad_norm": 0.423828125, "learning_rate": 2.9787575435209612e-05, "loss": 2.0844, "step": 2656 }, { "epoch": 0.0857250595368927, "grad_norm": 0.53125, "learning_rate": 2.978731144129782e-05, "loss": 2.1183, "step": 2657 }, { "epoch": 0.08575732339068906, "grad_norm": 0.462890625, "learning_rate": 2.9787047284617914e-05, "loss": 2.1542, "step": 2658 }, { "epoch": 0.0857895872444854, "grad_norm": 0.51953125, "learning_rate": 2.9786782965172798e-05, "loss": 2.1715, "step": 2659 }, { "epoch": 0.08582185109828175, "grad_norm": 0.470703125, "learning_rate": 2.978651848296538e-05, "loss": 2.1704, "step": 2660 }, { "epoch": 0.08585411495207809, "grad_norm": 0.5, "learning_rate": 2.978625383799858e-05, "loss": 2.1669, "step": 2661 }, { "epoch": 0.08588637880587444, "grad_norm": 0.4921875, "learning_rate": 2.97859890302753e-05, "loss": 2.1782, "step": 2662 }, { "epoch": 0.0859186426596708, "grad_norm": 0.43359375, "learning_rate": 2.9785724059798463e-05, "loss": 2.1771, "step": 2663 }, { "epoch": 0.08595090651346714, "grad_norm": 0.52734375, "learning_rate": 2.9785458926570984e-05, "loss": 2.1737, "step": 2664 }, { "epoch": 0.08598317036726348, "grad_norm": 0.46484375, "learning_rate": 2.978519363059578e-05, "loss": 2.1384, "step": 2665 }, { "epoch": 0.08601543422105982, "grad_norm": 0.447265625, "learning_rate": 2.978492817187577e-05, "loss": 2.1474, "step": 2666 }, { "epoch": 0.08604769807485617, "grad_norm": 0.443359375, "learning_rate": 2.9784662550413877e-05, "loss": 2.1322, "step": 2667 }, { "epoch": 0.08607996192865253, "grad_norm": 0.453125, "learning_rate": 2.9784396766213022e-05, "loss": 2.1782, "step": 2668 }, { "epoch": 0.08611222578244887, "grad_norm": 0.4296875, "learning_rate": 2.978413081927614e-05, "loss": 2.1967, "step": 2669 }, { "epoch": 0.08614448963624521, "grad_norm": 0.39453125, "learning_rate": 2.978386470960615e-05, "loss": 2.1735, "step": 2670 }, { "epoch": 0.08617675349004156, "grad_norm": 0.427734375, "learning_rate": 2.978359843720598e-05, "loss": 2.1713, "step": 2671 }, { "epoch": 0.0862090173438379, "grad_norm": 0.400390625, "learning_rate": 2.9783332002078565e-05, "loss": 2.1818, "step": 2672 }, { "epoch": 0.08624128119763426, "grad_norm": 0.43359375, "learning_rate": 2.978306540422684e-05, "loss": 2.1574, "step": 2673 }, { "epoch": 0.0862735450514306, "grad_norm": 0.48046875, "learning_rate": 2.9782798643653733e-05, "loss": 2.1602, "step": 2674 }, { "epoch": 0.08630580890522695, "grad_norm": 0.44921875, "learning_rate": 2.978253172036219e-05, "loss": 2.1645, "step": 2675 }, { "epoch": 0.08633807275902329, "grad_norm": 0.41015625, "learning_rate": 2.9782264634355133e-05, "loss": 2.147, "step": 2676 }, { "epoch": 0.08637033661281963, "grad_norm": 0.4140625, "learning_rate": 2.978199738563552e-05, "loss": 2.1768, "step": 2677 }, { "epoch": 0.08640260046661599, "grad_norm": 0.412109375, "learning_rate": 2.978172997420628e-05, "loss": 2.177, "step": 2678 }, { "epoch": 0.08643486432041234, "grad_norm": 0.5078125, "learning_rate": 2.9781462400070363e-05, "loss": 2.2029, "step": 2679 }, { "epoch": 0.08646712817420868, "grad_norm": 0.59765625, "learning_rate": 2.978119466323071e-05, "loss": 2.1683, "step": 2680 }, { "epoch": 0.08649939202800502, "grad_norm": 0.625, "learning_rate": 2.978092676369027e-05, "loss": 2.1943, "step": 2681 }, { "epoch": 0.08653165588180137, "grad_norm": 0.50390625, "learning_rate": 2.9780658701451995e-05, "loss": 2.1965, "step": 2682 }, { "epoch": 0.08656391973559772, "grad_norm": 0.45703125, "learning_rate": 2.978039047651883e-05, "loss": 2.1415, "step": 2683 }, { "epoch": 0.08659618358939407, "grad_norm": 0.60546875, "learning_rate": 2.9780122088893733e-05, "loss": 2.1654, "step": 2684 }, { "epoch": 0.08662844744319041, "grad_norm": 0.625, "learning_rate": 2.9779853538579653e-05, "loss": 2.1654, "step": 2685 }, { "epoch": 0.08666071129698676, "grad_norm": 0.5, "learning_rate": 2.977958482557955e-05, "loss": 2.1617, "step": 2686 }, { "epoch": 0.0866929751507831, "grad_norm": 0.451171875, "learning_rate": 2.9779315949896373e-05, "loss": 2.1677, "step": 2687 }, { "epoch": 0.08672523900457946, "grad_norm": 0.50390625, "learning_rate": 2.9779046911533097e-05, "loss": 2.1704, "step": 2688 }, { "epoch": 0.0867575028583758, "grad_norm": 0.50390625, "learning_rate": 2.977877771049267e-05, "loss": 2.1644, "step": 2689 }, { "epoch": 0.08678976671217215, "grad_norm": 0.5390625, "learning_rate": 2.9778508346778065e-05, "loss": 2.1723, "step": 2690 }, { "epoch": 0.08682203056596849, "grad_norm": 0.54296875, "learning_rate": 2.9778238820392236e-05, "loss": 2.1796, "step": 2691 }, { "epoch": 0.08685429441976483, "grad_norm": 0.46484375, "learning_rate": 2.9777969131338158e-05, "loss": 2.1685, "step": 2692 }, { "epoch": 0.08688655827356119, "grad_norm": 0.5078125, "learning_rate": 2.97776992796188e-05, "loss": 2.1612, "step": 2693 }, { "epoch": 0.08691882212735753, "grad_norm": 0.4609375, "learning_rate": 2.977742926523713e-05, "loss": 2.1416, "step": 2694 }, { "epoch": 0.08695108598115388, "grad_norm": 0.43359375, "learning_rate": 2.9777159088196118e-05, "loss": 2.1955, "step": 2695 }, { "epoch": 0.08698334983495022, "grad_norm": 0.478515625, "learning_rate": 2.977688874849874e-05, "loss": 2.1901, "step": 2696 }, { "epoch": 0.08701561368874657, "grad_norm": 0.44921875, "learning_rate": 2.9776618246147976e-05, "loss": 2.1336, "step": 2697 }, { "epoch": 0.08704787754254291, "grad_norm": 0.439453125, "learning_rate": 2.977634758114679e-05, "loss": 2.179, "step": 2698 }, { "epoch": 0.08708014139633927, "grad_norm": 0.419921875, "learning_rate": 2.9776076753498177e-05, "loss": 2.155, "step": 2699 }, { "epoch": 0.08711240525013561, "grad_norm": 0.435546875, "learning_rate": 2.977580576320511e-05, "loss": 2.1813, "step": 2700 }, { "epoch": 0.08714466910393195, "grad_norm": 0.462890625, "learning_rate": 2.9775534610270572e-05, "loss": 2.1801, "step": 2701 }, { "epoch": 0.0871769329577283, "grad_norm": 0.470703125, "learning_rate": 2.977526329469755e-05, "loss": 2.1485, "step": 2702 }, { "epoch": 0.08720919681152464, "grad_norm": 0.408203125, "learning_rate": 2.977499181648903e-05, "loss": 2.1482, "step": 2703 }, { "epoch": 0.087241460665321, "grad_norm": 0.46875, "learning_rate": 2.9774720175648e-05, "loss": 2.1781, "step": 2704 }, { "epoch": 0.08727372451911734, "grad_norm": 0.455078125, "learning_rate": 2.977444837217745e-05, "loss": 2.1553, "step": 2705 }, { "epoch": 0.08730598837291369, "grad_norm": 0.5546875, "learning_rate": 2.977417640608037e-05, "loss": 2.1788, "step": 2706 }, { "epoch": 0.08733825222671003, "grad_norm": 0.54296875, "learning_rate": 2.977390427735975e-05, "loss": 2.1266, "step": 2707 }, { "epoch": 0.08737051608050637, "grad_norm": 0.6328125, "learning_rate": 2.97736319860186e-05, "loss": 2.1539, "step": 2708 }, { "epoch": 0.08740277993430273, "grad_norm": 0.78515625, "learning_rate": 2.9773359532059903e-05, "loss": 2.1706, "step": 2709 }, { "epoch": 0.08743504378809908, "grad_norm": 0.9140625, "learning_rate": 2.977308691548666e-05, "loss": 2.1701, "step": 2710 }, { "epoch": 0.08746730764189542, "grad_norm": 0.71875, "learning_rate": 2.977281413630188e-05, "loss": 2.1482, "step": 2711 }, { "epoch": 0.08749957149569176, "grad_norm": 0.482421875, "learning_rate": 2.9772541194508555e-05, "loss": 2.1701, "step": 2712 }, { "epoch": 0.08753183534948811, "grad_norm": 0.78515625, "learning_rate": 2.9772268090109698e-05, "loss": 2.1671, "step": 2713 }, { "epoch": 0.08756409920328447, "grad_norm": 0.5546875, "learning_rate": 2.977199482310831e-05, "loss": 2.1519, "step": 2714 }, { "epoch": 0.08759636305708081, "grad_norm": 0.55859375, "learning_rate": 2.9771721393507403e-05, "loss": 2.1319, "step": 2715 }, { "epoch": 0.08762862691087715, "grad_norm": 0.67578125, "learning_rate": 2.977144780130998e-05, "loss": 2.1747, "step": 2716 }, { "epoch": 0.0876608907646735, "grad_norm": 0.4453125, "learning_rate": 2.9771174046519056e-05, "loss": 2.142, "step": 2717 }, { "epoch": 0.08769315461846984, "grad_norm": 0.57421875, "learning_rate": 2.977090012913765e-05, "loss": 2.1378, "step": 2718 }, { "epoch": 0.0877254184722662, "grad_norm": 0.458984375, "learning_rate": 2.977062604916877e-05, "loss": 2.1752, "step": 2719 }, { "epoch": 0.08775768232606254, "grad_norm": 0.474609375, "learning_rate": 2.9770351806615433e-05, "loss": 2.157, "step": 2720 }, { "epoch": 0.08778994617985889, "grad_norm": 0.40625, "learning_rate": 2.9770077401480656e-05, "loss": 2.1887, "step": 2721 }, { "epoch": 0.08782221003365523, "grad_norm": 0.400390625, "learning_rate": 2.9769802833767472e-05, "loss": 2.1215, "step": 2722 }, { "epoch": 0.08785447388745157, "grad_norm": 0.4140625, "learning_rate": 2.9769528103478888e-05, "loss": 2.159, "step": 2723 }, { "epoch": 0.08788673774124793, "grad_norm": 0.423828125, "learning_rate": 2.9769253210617935e-05, "loss": 2.1676, "step": 2724 }, { "epoch": 0.08791900159504427, "grad_norm": 0.392578125, "learning_rate": 2.976897815518764e-05, "loss": 2.1359, "step": 2725 }, { "epoch": 0.08795126544884062, "grad_norm": 0.431640625, "learning_rate": 2.9768702937191026e-05, "loss": 2.1538, "step": 2726 }, { "epoch": 0.08798352930263696, "grad_norm": 0.40234375, "learning_rate": 2.9768427556631125e-05, "loss": 2.1771, "step": 2727 }, { "epoch": 0.0880157931564333, "grad_norm": 0.4296875, "learning_rate": 2.976815201351097e-05, "loss": 2.1652, "step": 2728 }, { "epoch": 0.08804805701022966, "grad_norm": 0.42578125, "learning_rate": 2.9767876307833597e-05, "loss": 2.162, "step": 2729 }, { "epoch": 0.08808032086402601, "grad_norm": 0.4453125, "learning_rate": 2.976760043960203e-05, "loss": 2.1672, "step": 2730 }, { "epoch": 0.08811258471782235, "grad_norm": 0.44140625, "learning_rate": 2.976732440881931e-05, "loss": 2.1667, "step": 2731 }, { "epoch": 0.0881448485716187, "grad_norm": 0.474609375, "learning_rate": 2.976704821548848e-05, "loss": 2.1507, "step": 2732 }, { "epoch": 0.08817711242541504, "grad_norm": 0.412109375, "learning_rate": 2.9766771859612577e-05, "loss": 2.146, "step": 2733 }, { "epoch": 0.0882093762792114, "grad_norm": 0.486328125, "learning_rate": 2.9766495341194643e-05, "loss": 2.1364, "step": 2734 }, { "epoch": 0.08824164013300774, "grad_norm": 0.478515625, "learning_rate": 2.9766218660237722e-05, "loss": 2.1856, "step": 2735 }, { "epoch": 0.08827390398680408, "grad_norm": 0.466796875, "learning_rate": 2.976594181674486e-05, "loss": 2.164, "step": 2736 }, { "epoch": 0.08830616784060043, "grad_norm": 0.48046875, "learning_rate": 2.9765664810719103e-05, "loss": 2.1318, "step": 2737 }, { "epoch": 0.08833843169439677, "grad_norm": 0.4453125, "learning_rate": 2.97653876421635e-05, "loss": 2.154, "step": 2738 }, { "epoch": 0.08837069554819313, "grad_norm": 0.466796875, "learning_rate": 2.97651103110811e-05, "loss": 2.153, "step": 2739 }, { "epoch": 0.08840295940198947, "grad_norm": 0.48046875, "learning_rate": 2.9764832817474964e-05, "loss": 2.1565, "step": 2740 }, { "epoch": 0.08843522325578582, "grad_norm": 0.51953125, "learning_rate": 2.976455516134813e-05, "loss": 2.1747, "step": 2741 }, { "epoch": 0.08846748710958216, "grad_norm": 0.52734375, "learning_rate": 2.9764277342703675e-05, "loss": 2.1619, "step": 2742 }, { "epoch": 0.0884997509633785, "grad_norm": 0.5, "learning_rate": 2.9763999361544645e-05, "loss": 2.16, "step": 2743 }, { "epoch": 0.08853201481717486, "grad_norm": 0.58984375, "learning_rate": 2.97637212178741e-05, "loss": 2.1659, "step": 2744 }, { "epoch": 0.0885642786709712, "grad_norm": 0.5078125, "learning_rate": 2.9763442911695106e-05, "loss": 2.1426, "step": 2745 }, { "epoch": 0.08859654252476755, "grad_norm": 0.439453125, "learning_rate": 2.976316444301072e-05, "loss": 2.176, "step": 2746 }, { "epoch": 0.08862880637856389, "grad_norm": 0.58203125, "learning_rate": 2.9762885811824012e-05, "loss": 2.1498, "step": 2747 }, { "epoch": 0.08866107023236024, "grad_norm": 0.54296875, "learning_rate": 2.976260701813805e-05, "loss": 2.1907, "step": 2748 }, { "epoch": 0.0886933340861566, "grad_norm": 0.458984375, "learning_rate": 2.9762328061955894e-05, "loss": 2.1368, "step": 2749 }, { "epoch": 0.08872559793995294, "grad_norm": 0.486328125, "learning_rate": 2.976204894328063e-05, "loss": 2.1381, "step": 2750 }, { "epoch": 0.08875786179374928, "grad_norm": 0.462890625, "learning_rate": 2.9761769662115313e-05, "loss": 2.1655, "step": 2751 }, { "epoch": 0.08879012564754563, "grad_norm": 0.5078125, "learning_rate": 2.9761490218463033e-05, "loss": 2.1497, "step": 2752 }, { "epoch": 0.08882238950134197, "grad_norm": 0.55078125, "learning_rate": 2.9761210612326856e-05, "loss": 2.152, "step": 2753 }, { "epoch": 0.08885465335513833, "grad_norm": 0.4765625, "learning_rate": 2.9760930843709858e-05, "loss": 2.187, "step": 2754 }, { "epoch": 0.08888691720893467, "grad_norm": 0.44921875, "learning_rate": 2.9760650912615128e-05, "loss": 2.1648, "step": 2755 }, { "epoch": 0.08891918106273101, "grad_norm": 0.44140625, "learning_rate": 2.9760370819045744e-05, "loss": 2.1495, "step": 2756 }, { "epoch": 0.08895144491652736, "grad_norm": 0.435546875, "learning_rate": 2.976009056300478e-05, "loss": 2.1685, "step": 2757 }, { "epoch": 0.0889837087703237, "grad_norm": 0.4609375, "learning_rate": 2.9759810144495333e-05, "loss": 2.159, "step": 2758 }, { "epoch": 0.08901597262412006, "grad_norm": 0.412109375, "learning_rate": 2.975952956352048e-05, "loss": 2.1965, "step": 2759 }, { "epoch": 0.0890482364779164, "grad_norm": 0.486328125, "learning_rate": 2.975924882008332e-05, "loss": 2.1945, "step": 2760 }, { "epoch": 0.08908050033171275, "grad_norm": 0.58984375, "learning_rate": 2.9758967914186933e-05, "loss": 2.1255, "step": 2761 }, { "epoch": 0.08911276418550909, "grad_norm": 0.58203125, "learning_rate": 2.9758686845834418e-05, "loss": 2.194, "step": 2762 }, { "epoch": 0.08914502803930544, "grad_norm": 0.54296875, "learning_rate": 2.9758405615028864e-05, "loss": 2.1675, "step": 2763 }, { "epoch": 0.08917729189310179, "grad_norm": 0.494140625, "learning_rate": 2.975812422177337e-05, "loss": 2.1169, "step": 2764 }, { "epoch": 0.08920955574689814, "grad_norm": 0.42578125, "learning_rate": 2.975784266607103e-05, "loss": 2.1392, "step": 2765 }, { "epoch": 0.08924181960069448, "grad_norm": 0.55859375, "learning_rate": 2.9757560947924944e-05, "loss": 2.1719, "step": 2766 }, { "epoch": 0.08927408345449082, "grad_norm": 0.58203125, "learning_rate": 2.9757279067338218e-05, "loss": 2.166, "step": 2767 }, { "epoch": 0.08930634730828717, "grad_norm": 0.4453125, "learning_rate": 2.975699702431395e-05, "loss": 2.1653, "step": 2768 }, { "epoch": 0.08933861116208353, "grad_norm": 0.515625, "learning_rate": 2.9756714818855245e-05, "loss": 2.1572, "step": 2769 }, { "epoch": 0.08937087501587987, "grad_norm": 0.57421875, "learning_rate": 2.9756432450965213e-05, "loss": 2.1364, "step": 2770 }, { "epoch": 0.08940313886967621, "grad_norm": 0.51171875, "learning_rate": 2.975614992064695e-05, "loss": 2.1542, "step": 2771 }, { "epoch": 0.08943540272347256, "grad_norm": 0.427734375, "learning_rate": 2.9755867227903583e-05, "loss": 2.1684, "step": 2772 }, { "epoch": 0.0894676665772689, "grad_norm": 0.51953125, "learning_rate": 2.975558437273821e-05, "loss": 2.1635, "step": 2773 }, { "epoch": 0.08949993043106526, "grad_norm": 0.43359375, "learning_rate": 2.975530135515395e-05, "loss": 2.1526, "step": 2774 }, { "epoch": 0.0895321942848616, "grad_norm": 0.44140625, "learning_rate": 2.9755018175153923e-05, "loss": 2.1406, "step": 2775 }, { "epoch": 0.08956445813865795, "grad_norm": 0.52734375, "learning_rate": 2.9754734832741236e-05, "loss": 2.1539, "step": 2776 }, { "epoch": 0.08959672199245429, "grad_norm": 0.515625, "learning_rate": 2.9754451327919018e-05, "loss": 2.1569, "step": 2777 }, { "epoch": 0.08962898584625063, "grad_norm": 0.45703125, "learning_rate": 2.9754167660690378e-05, "loss": 2.1826, "step": 2778 }, { "epoch": 0.08966124970004699, "grad_norm": 0.51953125, "learning_rate": 2.975388383105845e-05, "loss": 2.1794, "step": 2779 }, { "epoch": 0.08969351355384333, "grad_norm": 0.4921875, "learning_rate": 2.9753599839026355e-05, "loss": 2.1676, "step": 2780 }, { "epoch": 0.08972577740763968, "grad_norm": 0.43359375, "learning_rate": 2.975331568459721e-05, "loss": 2.1622, "step": 2781 }, { "epoch": 0.08975804126143602, "grad_norm": 0.5625, "learning_rate": 2.975303136777416e-05, "loss": 2.1872, "step": 2782 }, { "epoch": 0.08979030511523237, "grad_norm": 0.49609375, "learning_rate": 2.9752746888560313e-05, "loss": 2.1688, "step": 2783 }, { "epoch": 0.08982256896902872, "grad_norm": 0.5078125, "learning_rate": 2.9752462246958818e-05, "loss": 2.1756, "step": 2784 }, { "epoch": 0.08985483282282507, "grad_norm": 0.62890625, "learning_rate": 2.9752177442972803e-05, "loss": 2.1704, "step": 2785 }, { "epoch": 0.08988709667662141, "grad_norm": 0.515625, "learning_rate": 2.97518924766054e-05, "loss": 2.1248, "step": 2786 }, { "epoch": 0.08991936053041776, "grad_norm": 0.4375, "learning_rate": 2.9751607347859746e-05, "loss": 2.1413, "step": 2787 }, { "epoch": 0.0899516243842141, "grad_norm": 0.5546875, "learning_rate": 2.9751322056738984e-05, "loss": 2.1423, "step": 2788 }, { "epoch": 0.08998388823801044, "grad_norm": 0.5, "learning_rate": 2.975103660324625e-05, "loss": 2.1533, "step": 2789 }, { "epoch": 0.0900161520918068, "grad_norm": 0.47265625, "learning_rate": 2.9750750987384687e-05, "loss": 2.1564, "step": 2790 }, { "epoch": 0.09004841594560314, "grad_norm": 0.494140625, "learning_rate": 2.9750465209157443e-05, "loss": 2.1438, "step": 2791 }, { "epoch": 0.09008067979939949, "grad_norm": 0.51953125, "learning_rate": 2.9750179268567657e-05, "loss": 2.1284, "step": 2792 }, { "epoch": 0.09011294365319583, "grad_norm": 0.546875, "learning_rate": 2.9749893165618477e-05, "loss": 2.1162, "step": 2793 }, { "epoch": 0.09014520750699218, "grad_norm": 0.47265625, "learning_rate": 2.9749606900313058e-05, "loss": 2.1469, "step": 2794 }, { "epoch": 0.09017747136078853, "grad_norm": 0.453125, "learning_rate": 2.9749320472654547e-05, "loss": 2.1506, "step": 2795 }, { "epoch": 0.09020973521458488, "grad_norm": 0.4921875, "learning_rate": 2.9749033882646098e-05, "loss": 2.1717, "step": 2796 }, { "epoch": 0.09024199906838122, "grad_norm": 0.408203125, "learning_rate": 2.9748747130290863e-05, "loss": 2.1635, "step": 2797 }, { "epoch": 0.09027426292217756, "grad_norm": 0.443359375, "learning_rate": 2.9748460215591998e-05, "loss": 2.173, "step": 2798 }, { "epoch": 0.09030652677597391, "grad_norm": 0.44921875, "learning_rate": 2.9748173138552666e-05, "loss": 2.1614, "step": 2799 }, { "epoch": 0.09033879062977027, "grad_norm": 0.419921875, "learning_rate": 2.9747885899176027e-05, "loss": 2.1581, "step": 2800 }, { "epoch": 0.09037105448356661, "grad_norm": 0.4453125, "learning_rate": 2.9747598497465236e-05, "loss": 2.1394, "step": 2801 }, { "epoch": 0.09040331833736295, "grad_norm": 0.435546875, "learning_rate": 2.9747310933423463e-05, "loss": 2.1687, "step": 2802 }, { "epoch": 0.0904355821911593, "grad_norm": 0.43359375, "learning_rate": 2.974702320705387e-05, "loss": 2.1615, "step": 2803 }, { "epoch": 0.09046784604495564, "grad_norm": 0.486328125, "learning_rate": 2.9746735318359624e-05, "loss": 2.1152, "step": 2804 }, { "epoch": 0.090500109898752, "grad_norm": 0.408203125, "learning_rate": 2.9746447267343895e-05, "loss": 2.155, "step": 2805 }, { "epoch": 0.09053237375254834, "grad_norm": 0.412109375, "learning_rate": 2.9746159054009858e-05, "loss": 2.1252, "step": 2806 }, { "epoch": 0.09056463760634469, "grad_norm": 0.53125, "learning_rate": 2.9745870678360678e-05, "loss": 2.1553, "step": 2807 }, { "epoch": 0.09059690146014103, "grad_norm": 0.5078125, "learning_rate": 2.9745582140399532e-05, "loss": 2.1457, "step": 2808 }, { "epoch": 0.09062916531393737, "grad_norm": 0.49609375, "learning_rate": 2.9745293440129597e-05, "loss": 2.1604, "step": 2809 }, { "epoch": 0.09066142916773373, "grad_norm": 0.42578125, "learning_rate": 2.9745004577554048e-05, "loss": 2.1607, "step": 2810 }, { "epoch": 0.09069369302153008, "grad_norm": 0.41796875, "learning_rate": 2.9744715552676067e-05, "loss": 2.1899, "step": 2811 }, { "epoch": 0.09072595687532642, "grad_norm": 0.43359375, "learning_rate": 2.9744426365498837e-05, "loss": 2.1604, "step": 2812 }, { "epoch": 0.09075822072912276, "grad_norm": 0.5078125, "learning_rate": 2.9744137016025538e-05, "loss": 2.1657, "step": 2813 }, { "epoch": 0.0907904845829191, "grad_norm": 0.51953125, "learning_rate": 2.9743847504259354e-05, "loss": 2.1733, "step": 2814 }, { "epoch": 0.09082274843671546, "grad_norm": 0.490234375, "learning_rate": 2.9743557830203477e-05, "loss": 2.1474, "step": 2815 }, { "epoch": 0.09085501229051181, "grad_norm": 0.5, "learning_rate": 2.974326799386109e-05, "loss": 2.1754, "step": 2816 }, { "epoch": 0.09088727614430815, "grad_norm": 0.4375, "learning_rate": 2.974297799523539e-05, "loss": 2.1366, "step": 2817 }, { "epoch": 0.0909195399981045, "grad_norm": 0.4453125, "learning_rate": 2.9742687834329564e-05, "loss": 2.0925, "step": 2818 }, { "epoch": 0.09095180385190084, "grad_norm": 0.5, "learning_rate": 2.9742397511146803e-05, "loss": 2.0858, "step": 2819 }, { "epoch": 0.0909840677056972, "grad_norm": 0.5546875, "learning_rate": 2.9742107025690306e-05, "loss": 2.0836, "step": 2820 }, { "epoch": 0.09101633155949354, "grad_norm": 0.54296875, "learning_rate": 2.9741816377963273e-05, "loss": 2.0904, "step": 2821 }, { "epoch": 0.09104859541328988, "grad_norm": 0.423828125, "learning_rate": 2.97415255679689e-05, "loss": 2.0488, "step": 2822 }, { "epoch": 0.09108085926708623, "grad_norm": 0.53515625, "learning_rate": 2.9741234595710393e-05, "loss": 2.1031, "step": 2823 }, { "epoch": 0.09111312312088257, "grad_norm": 0.734375, "learning_rate": 2.9740943461190947e-05, "loss": 2.0621, "step": 2824 }, { "epoch": 0.09114538697467893, "grad_norm": 0.9140625, "learning_rate": 2.9740652164413767e-05, "loss": 2.0922, "step": 2825 }, { "epoch": 0.09117765082847527, "grad_norm": 0.859375, "learning_rate": 2.9740360705382074e-05, "loss": 2.0653, "step": 2826 }, { "epoch": 0.09120991468227162, "grad_norm": 0.5390625, "learning_rate": 2.9740069084099054e-05, "loss": 2.1017, "step": 2827 }, { "epoch": 0.09124217853606796, "grad_norm": 0.63671875, "learning_rate": 2.9739777300567934e-05, "loss": 2.0717, "step": 2828 }, { "epoch": 0.0912744423898643, "grad_norm": 0.61328125, "learning_rate": 2.9739485354791918e-05, "loss": 2.0866, "step": 2829 }, { "epoch": 0.09130670624366066, "grad_norm": 0.470703125, "learning_rate": 2.973919324677422e-05, "loss": 2.0864, "step": 2830 }, { "epoch": 0.091338970097457, "grad_norm": 0.609375, "learning_rate": 2.9738900976518058e-05, "loss": 2.0782, "step": 2831 }, { "epoch": 0.09137123395125335, "grad_norm": 0.52734375, "learning_rate": 2.9738608544026645e-05, "loss": 2.0914, "step": 2832 }, { "epoch": 0.0914034978050497, "grad_norm": 0.49609375, "learning_rate": 2.973831594930321e-05, "loss": 2.0892, "step": 2833 }, { "epoch": 0.09143576165884604, "grad_norm": 0.5234375, "learning_rate": 2.973802319235096e-05, "loss": 2.0997, "step": 2834 }, { "epoch": 0.0914680255126424, "grad_norm": 0.4375, "learning_rate": 2.9737730273173123e-05, "loss": 2.0936, "step": 2835 }, { "epoch": 0.09150028936643874, "grad_norm": 0.4921875, "learning_rate": 2.9737437191772927e-05, "loss": 2.0998, "step": 2836 }, { "epoch": 0.09153255322023508, "grad_norm": 0.41015625, "learning_rate": 2.9737143948153595e-05, "loss": 2.105, "step": 2837 }, { "epoch": 0.09156481707403143, "grad_norm": 0.41796875, "learning_rate": 2.9736850542318353e-05, "loss": 2.1049, "step": 2838 }, { "epoch": 0.09159708092782777, "grad_norm": 0.40234375, "learning_rate": 2.9736556974270434e-05, "loss": 2.111, "step": 2839 }, { "epoch": 0.09162934478162413, "grad_norm": 0.4140625, "learning_rate": 2.9736263244013066e-05, "loss": 2.0756, "step": 2840 }, { "epoch": 0.09166160863542047, "grad_norm": 0.412109375, "learning_rate": 2.9735969351549485e-05, "loss": 2.094, "step": 2841 }, { "epoch": 0.09169387248921682, "grad_norm": 0.41015625, "learning_rate": 2.9735675296882923e-05, "loss": 2.0762, "step": 2842 }, { "epoch": 0.09172613634301316, "grad_norm": 0.392578125, "learning_rate": 2.9735381080016622e-05, "loss": 2.1036, "step": 2843 }, { "epoch": 0.0917584001968095, "grad_norm": 0.416015625, "learning_rate": 2.9735086700953816e-05, "loss": 2.1064, "step": 2844 }, { "epoch": 0.09179066405060586, "grad_norm": 0.4375, "learning_rate": 2.973479215969775e-05, "loss": 2.1184, "step": 2845 }, { "epoch": 0.0918229279044022, "grad_norm": 0.41796875, "learning_rate": 2.9734497456251654e-05, "loss": 2.1099, "step": 2846 }, { "epoch": 0.09185519175819855, "grad_norm": 0.46484375, "learning_rate": 2.9734202590618784e-05, "loss": 2.0916, "step": 2847 }, { "epoch": 0.09188745561199489, "grad_norm": 0.482421875, "learning_rate": 2.9733907562802388e-05, "loss": 2.1095, "step": 2848 }, { "epoch": 0.09191971946579124, "grad_norm": 0.451171875, "learning_rate": 2.97336123728057e-05, "loss": 2.1013, "step": 2849 }, { "epoch": 0.0919519833195876, "grad_norm": 0.431640625, "learning_rate": 2.9733317020631983e-05, "loss": 2.1187, "step": 2850 }, { "epoch": 0.09198424717338394, "grad_norm": 0.462890625, "learning_rate": 2.9733021506284476e-05, "loss": 2.1077, "step": 2851 }, { "epoch": 0.09201651102718028, "grad_norm": 0.4609375, "learning_rate": 2.973272582976644e-05, "loss": 2.0829, "step": 2852 }, { "epoch": 0.09204877488097662, "grad_norm": 0.51171875, "learning_rate": 2.9732429991081128e-05, "loss": 2.089, "step": 2853 }, { "epoch": 0.09208103873477297, "grad_norm": 0.5390625, "learning_rate": 2.9732133990231795e-05, "loss": 2.1047, "step": 2854 }, { "epoch": 0.09211330258856933, "grad_norm": 0.6015625, "learning_rate": 2.97318378272217e-05, "loss": 2.1106, "step": 2855 }, { "epoch": 0.09214556644236567, "grad_norm": 0.5390625, "learning_rate": 2.9731541502054104e-05, "loss": 2.0477, "step": 2856 }, { "epoch": 0.09217783029616201, "grad_norm": 0.44140625, "learning_rate": 2.9731245014732265e-05, "loss": 2.0714, "step": 2857 }, { "epoch": 0.09221009414995836, "grad_norm": 0.447265625, "learning_rate": 2.9730948365259453e-05, "loss": 2.0862, "step": 2858 }, { "epoch": 0.0922423580037547, "grad_norm": 0.470703125, "learning_rate": 2.9730651553638924e-05, "loss": 2.0778, "step": 2859 }, { "epoch": 0.09227462185755106, "grad_norm": 0.46875, "learning_rate": 2.973035457987395e-05, "loss": 2.1144, "step": 2860 }, { "epoch": 0.0923068857113474, "grad_norm": 0.53515625, "learning_rate": 2.9730057443967807e-05, "loss": 2.0923, "step": 2861 }, { "epoch": 0.09233914956514375, "grad_norm": 0.6015625, "learning_rate": 2.9729760145923758e-05, "loss": 2.1068, "step": 2862 }, { "epoch": 0.09237141341894009, "grad_norm": 0.53125, "learning_rate": 2.9729462685745074e-05, "loss": 2.1102, "step": 2863 }, { "epoch": 0.09240367727273643, "grad_norm": 0.494140625, "learning_rate": 2.972916506343503e-05, "loss": 2.1035, "step": 2864 }, { "epoch": 0.09243594112653279, "grad_norm": 0.490234375, "learning_rate": 2.9728867278996907e-05, "loss": 2.0981, "step": 2865 }, { "epoch": 0.09246820498032914, "grad_norm": 0.52734375, "learning_rate": 2.9728569332433974e-05, "loss": 2.0996, "step": 2866 }, { "epoch": 0.09250046883412548, "grad_norm": 0.53125, "learning_rate": 2.9728271223749525e-05, "loss": 2.0957, "step": 2867 }, { "epoch": 0.09253273268792182, "grad_norm": 0.4453125, "learning_rate": 2.9727972952946825e-05, "loss": 2.1186, "step": 2868 }, { "epoch": 0.09256499654171817, "grad_norm": 0.4375, "learning_rate": 2.9727674520029165e-05, "loss": 2.0784, "step": 2869 }, { "epoch": 0.09259726039551452, "grad_norm": 0.4609375, "learning_rate": 2.9727375924999834e-05, "loss": 2.0741, "step": 2870 }, { "epoch": 0.09262952424931087, "grad_norm": 0.4375, "learning_rate": 2.9727077167862112e-05, "loss": 2.1069, "step": 2871 }, { "epoch": 0.09266178810310721, "grad_norm": 0.484375, "learning_rate": 2.9726778248619286e-05, "loss": 2.1017, "step": 2872 }, { "epoch": 0.09269405195690356, "grad_norm": 0.451171875, "learning_rate": 2.972647916727465e-05, "loss": 2.1106, "step": 2873 }, { "epoch": 0.0927263158106999, "grad_norm": 0.51171875, "learning_rate": 2.9726179923831503e-05, "loss": 2.0999, "step": 2874 }, { "epoch": 0.09275857966449624, "grad_norm": 0.44140625, "learning_rate": 2.9725880518293126e-05, "loss": 2.0929, "step": 2875 }, { "epoch": 0.0927908435182926, "grad_norm": 0.494140625, "learning_rate": 2.972558095066282e-05, "loss": 2.1044, "step": 2876 }, { "epoch": 0.09282310737208894, "grad_norm": 0.439453125, "learning_rate": 2.972528122094389e-05, "loss": 2.0866, "step": 2877 }, { "epoch": 0.09285537122588529, "grad_norm": 0.451171875, "learning_rate": 2.972498132913962e-05, "loss": 2.0561, "step": 2878 }, { "epoch": 0.09288763507968163, "grad_norm": 0.42578125, "learning_rate": 2.9724681275253322e-05, "loss": 2.085, "step": 2879 }, { "epoch": 0.09291989893347798, "grad_norm": 0.458984375, "learning_rate": 2.9724381059288302e-05, "loss": 2.0693, "step": 2880 }, { "epoch": 0.09295216278727433, "grad_norm": 0.46875, "learning_rate": 2.972408068124785e-05, "loss": 2.0786, "step": 2881 }, { "epoch": 0.09298442664107068, "grad_norm": 0.44140625, "learning_rate": 2.9723780141135283e-05, "loss": 2.072, "step": 2882 }, { "epoch": 0.09301669049486702, "grad_norm": 0.41796875, "learning_rate": 2.972347943895391e-05, "loss": 2.0626, "step": 2883 }, { "epoch": 0.09304895434866337, "grad_norm": 0.5078125, "learning_rate": 2.9723178574707037e-05, "loss": 2.0666, "step": 2884 }, { "epoch": 0.09308121820245971, "grad_norm": 0.47265625, "learning_rate": 2.9722877548397973e-05, "loss": 2.1125, "step": 2885 }, { "epoch": 0.09311348205625607, "grad_norm": 0.458984375, "learning_rate": 2.972257636003004e-05, "loss": 2.0779, "step": 2886 }, { "epoch": 0.09314574591005241, "grad_norm": 0.435546875, "learning_rate": 2.9722275009606546e-05, "loss": 2.0347, "step": 2887 }, { "epoch": 0.09317800976384875, "grad_norm": 0.443359375, "learning_rate": 2.9721973497130813e-05, "loss": 2.0742, "step": 2888 }, { "epoch": 0.0932102736176451, "grad_norm": 0.46484375, "learning_rate": 2.972167182260615e-05, "loss": 2.097, "step": 2889 }, { "epoch": 0.09324253747144144, "grad_norm": 0.4921875, "learning_rate": 2.9721369986035894e-05, "loss": 2.0933, "step": 2890 }, { "epoch": 0.0932748013252378, "grad_norm": 0.578125, "learning_rate": 2.9721067987423348e-05, "loss": 2.0853, "step": 2891 }, { "epoch": 0.09330706517903414, "grad_norm": 0.7890625, "learning_rate": 2.9720765826771858e-05, "loss": 2.079, "step": 2892 }, { "epoch": 0.09333932903283049, "grad_norm": 1.3203125, "learning_rate": 2.9720463504084733e-05, "loss": 2.0456, "step": 2893 }, { "epoch": 0.09337159288662683, "grad_norm": 0.65625, "learning_rate": 2.9720161019365305e-05, "loss": 2.1022, "step": 2894 }, { "epoch": 0.09340385674042317, "grad_norm": 0.68359375, "learning_rate": 2.9719858372616903e-05, "loss": 2.0811, "step": 2895 }, { "epoch": 0.09343612059421953, "grad_norm": 0.9296875, "learning_rate": 2.9719555563842863e-05, "loss": 2.0976, "step": 2896 }, { "epoch": 0.09346838444801588, "grad_norm": 0.55859375, "learning_rate": 2.9719252593046517e-05, "loss": 2.0926, "step": 2897 }, { "epoch": 0.09350064830181222, "grad_norm": 0.59375, "learning_rate": 2.9718949460231196e-05, "loss": 2.0719, "step": 2898 }, { "epoch": 0.09353291215560856, "grad_norm": 0.5859375, "learning_rate": 2.971864616540024e-05, "loss": 2.1152, "step": 2899 }, { "epoch": 0.09356517600940491, "grad_norm": 0.51171875, "learning_rate": 2.971834270855698e-05, "loss": 2.073, "step": 2900 }, { "epoch": 0.09359743986320126, "grad_norm": 0.58203125, "learning_rate": 2.971803908970477e-05, "loss": 2.0969, "step": 2901 }, { "epoch": 0.09362970371699761, "grad_norm": 0.51953125, "learning_rate": 2.971773530884694e-05, "loss": 2.069, "step": 2902 }, { "epoch": 0.09366196757079395, "grad_norm": 0.56640625, "learning_rate": 2.971743136598684e-05, "loss": 2.1063, "step": 2903 }, { "epoch": 0.0936942314245903, "grad_norm": 0.4921875, "learning_rate": 2.9717127261127814e-05, "loss": 2.0697, "step": 2904 }, { "epoch": 0.09372649527838664, "grad_norm": 0.498046875, "learning_rate": 2.971682299427321e-05, "loss": 2.0849, "step": 2905 }, { "epoch": 0.093758759132183, "grad_norm": 0.4453125, "learning_rate": 2.9716518565426373e-05, "loss": 2.0599, "step": 2906 }, { "epoch": 0.09379102298597934, "grad_norm": 0.482421875, "learning_rate": 2.971621397459066e-05, "loss": 2.0738, "step": 2907 }, { "epoch": 0.09382328683977569, "grad_norm": 0.48828125, "learning_rate": 2.971590922176942e-05, "loss": 2.1405, "step": 2908 }, { "epoch": 0.09385555069357203, "grad_norm": 0.44921875, "learning_rate": 2.9715604306966005e-05, "loss": 2.1683, "step": 2909 }, { "epoch": 0.09388781454736837, "grad_norm": 0.48828125, "learning_rate": 2.9715299230183776e-05, "loss": 2.1579, "step": 2910 }, { "epoch": 0.09392007840116473, "grad_norm": 0.44140625, "learning_rate": 2.9714993991426095e-05, "loss": 2.1633, "step": 2911 }, { "epoch": 0.09395234225496107, "grad_norm": 0.48046875, "learning_rate": 2.9714688590696314e-05, "loss": 2.1459, "step": 2912 }, { "epoch": 0.09398460610875742, "grad_norm": 0.439453125, "learning_rate": 2.9714383027997795e-05, "loss": 2.152, "step": 2913 }, { "epoch": 0.09401686996255376, "grad_norm": 0.4296875, "learning_rate": 2.9714077303333907e-05, "loss": 2.1395, "step": 2914 }, { "epoch": 0.0940491338163501, "grad_norm": 0.47265625, "learning_rate": 2.971377141670801e-05, "loss": 2.1459, "step": 2915 }, { "epoch": 0.09408139767014646, "grad_norm": 0.419921875, "learning_rate": 2.9713465368123474e-05, "loss": 2.1477, "step": 2916 }, { "epoch": 0.09411366152394281, "grad_norm": 0.41015625, "learning_rate": 2.9713159157583666e-05, "loss": 2.1565, "step": 2917 }, { "epoch": 0.09414592537773915, "grad_norm": 0.38671875, "learning_rate": 2.9712852785091955e-05, "loss": 2.1442, "step": 2918 }, { "epoch": 0.0941781892315355, "grad_norm": 0.421875, "learning_rate": 2.9712546250651716e-05, "loss": 2.1377, "step": 2919 }, { "epoch": 0.09421045308533184, "grad_norm": 0.4453125, "learning_rate": 2.9712239554266322e-05, "loss": 2.1639, "step": 2920 }, { "epoch": 0.0942427169391282, "grad_norm": 0.388671875, "learning_rate": 2.9711932695939156e-05, "loss": 2.1388, "step": 2921 }, { "epoch": 0.09427498079292454, "grad_norm": 0.44140625, "learning_rate": 2.9711625675673585e-05, "loss": 2.1445, "step": 2922 }, { "epoch": 0.09430724464672088, "grad_norm": 0.435546875, "learning_rate": 2.971131849347299e-05, "loss": 2.1375, "step": 2923 }, { "epoch": 0.09433950850051723, "grad_norm": 0.4609375, "learning_rate": 2.9711011149340756e-05, "loss": 2.1438, "step": 2924 }, { "epoch": 0.09437177235431357, "grad_norm": 0.412109375, "learning_rate": 2.9710703643280268e-05, "loss": 2.1478, "step": 2925 }, { "epoch": 0.09440403620810993, "grad_norm": 0.451171875, "learning_rate": 2.9710395975294907e-05, "loss": 2.1424, "step": 2926 }, { "epoch": 0.09443630006190627, "grad_norm": 0.453125, "learning_rate": 2.9710088145388058e-05, "loss": 2.1571, "step": 2927 }, { "epoch": 0.09446856391570262, "grad_norm": 0.400390625, "learning_rate": 2.970978015356311e-05, "loss": 2.1669, "step": 2928 }, { "epoch": 0.09450082776949896, "grad_norm": 0.46875, "learning_rate": 2.970947199982346e-05, "loss": 2.1612, "step": 2929 }, { "epoch": 0.0945330916232953, "grad_norm": 0.396484375, "learning_rate": 2.970916368417249e-05, "loss": 2.1673, "step": 2930 }, { "epoch": 0.09456535547709166, "grad_norm": 0.419921875, "learning_rate": 2.9708855206613602e-05, "loss": 2.1388, "step": 2931 }, { "epoch": 0.094597619330888, "grad_norm": 0.416015625, "learning_rate": 2.9708546567150186e-05, "loss": 2.1637, "step": 2932 }, { "epoch": 0.09462988318468435, "grad_norm": 0.423828125, "learning_rate": 2.9708237765785644e-05, "loss": 2.156, "step": 2933 }, { "epoch": 0.09466214703848069, "grad_norm": 0.46484375, "learning_rate": 2.9707928802523372e-05, "loss": 2.1596, "step": 2934 }, { "epoch": 0.09469441089227704, "grad_norm": 0.44921875, "learning_rate": 2.9707619677366768e-05, "loss": 2.1519, "step": 2935 }, { "epoch": 0.0947266747460734, "grad_norm": 0.4375, "learning_rate": 2.970731039031924e-05, "loss": 2.134, "step": 2936 }, { "epoch": 0.09475893859986974, "grad_norm": 0.486328125, "learning_rate": 2.970700094138419e-05, "loss": 2.1814, "step": 2937 }, { "epoch": 0.09479120245366608, "grad_norm": 0.54296875, "learning_rate": 2.9706691330565023e-05, "loss": 2.1688, "step": 2938 }, { "epoch": 0.09482346630746243, "grad_norm": 0.54296875, "learning_rate": 2.970638155786515e-05, "loss": 2.1586, "step": 2939 }, { "epoch": 0.09485573016125877, "grad_norm": 0.53125, "learning_rate": 2.970607162328798e-05, "loss": 2.1305, "step": 2940 }, { "epoch": 0.09488799401505513, "grad_norm": 0.49609375, "learning_rate": 2.9705761526836915e-05, "loss": 2.1575, "step": 2941 }, { "epoch": 0.09492025786885147, "grad_norm": 0.53515625, "learning_rate": 2.970545126851539e-05, "loss": 2.187, "step": 2942 }, { "epoch": 0.09495252172264781, "grad_norm": 0.482421875, "learning_rate": 2.9705140848326798e-05, "loss": 2.1735, "step": 2943 }, { "epoch": 0.09498478557644416, "grad_norm": 0.43359375, "learning_rate": 2.9704830266274567e-05, "loss": 2.1496, "step": 2944 }, { "epoch": 0.0950170494302405, "grad_norm": 0.47265625, "learning_rate": 2.9704519522362117e-05, "loss": 2.169, "step": 2945 }, { "epoch": 0.09504931328403686, "grad_norm": 0.431640625, "learning_rate": 2.970420861659286e-05, "loss": 2.1293, "step": 2946 }, { "epoch": 0.0950815771378332, "grad_norm": 0.44140625, "learning_rate": 2.9703897548970226e-05, "loss": 2.1638, "step": 2947 }, { "epoch": 0.09511384099162955, "grad_norm": 0.494140625, "learning_rate": 2.970358631949764e-05, "loss": 2.1497, "step": 2948 }, { "epoch": 0.09514610484542589, "grad_norm": 0.44921875, "learning_rate": 2.970327492817852e-05, "loss": 2.1115, "step": 2949 }, { "epoch": 0.09517836869922223, "grad_norm": 0.51953125, "learning_rate": 2.9702963375016298e-05, "loss": 2.0926, "step": 2950 }, { "epoch": 0.09521063255301859, "grad_norm": 0.484375, "learning_rate": 2.9702651660014404e-05, "loss": 2.1144, "step": 2951 }, { "epoch": 0.09524289640681494, "grad_norm": 0.490234375, "learning_rate": 2.9702339783176267e-05, "loss": 2.1684, "step": 2952 }, { "epoch": 0.09527516026061128, "grad_norm": 0.5, "learning_rate": 2.970202774450532e-05, "loss": 2.1631, "step": 2953 }, { "epoch": 0.09530742411440762, "grad_norm": 0.39453125, "learning_rate": 2.9701715544005e-05, "loss": 2.1641, "step": 2954 }, { "epoch": 0.09533968796820397, "grad_norm": 0.466796875, "learning_rate": 2.9701403181678743e-05, "loss": 2.1532, "step": 2955 }, { "epoch": 0.09537195182200033, "grad_norm": 0.45703125, "learning_rate": 2.9701090657529988e-05, "loss": 2.1436, "step": 2956 }, { "epoch": 0.09540421567579667, "grad_norm": 0.53125, "learning_rate": 2.970077797156217e-05, "loss": 2.1435, "step": 2957 }, { "epoch": 0.09543647952959301, "grad_norm": 0.609375, "learning_rate": 2.9700465123778736e-05, "loss": 2.1367, "step": 2958 }, { "epoch": 0.09546874338338936, "grad_norm": 0.58984375, "learning_rate": 2.970015211418313e-05, "loss": 2.151, "step": 2959 }, { "epoch": 0.0955010072371857, "grad_norm": 0.58984375, "learning_rate": 2.969983894277879e-05, "loss": 2.1793, "step": 2960 }, { "epoch": 0.09553327109098204, "grad_norm": 0.482421875, "learning_rate": 2.9699525609569172e-05, "loss": 2.1704, "step": 2961 }, { "epoch": 0.0955655349447784, "grad_norm": 0.455078125, "learning_rate": 2.969921211455772e-05, "loss": 2.1467, "step": 2962 }, { "epoch": 0.09559779879857475, "grad_norm": 0.57421875, "learning_rate": 2.9698898457747886e-05, "loss": 2.1593, "step": 2963 }, { "epoch": 0.09563006265237109, "grad_norm": 0.53125, "learning_rate": 2.9698584639143127e-05, "loss": 2.1367, "step": 2964 }, { "epoch": 0.09566232650616743, "grad_norm": 0.494140625, "learning_rate": 2.969827065874689e-05, "loss": 2.1265, "step": 2965 }, { "epoch": 0.09569459035996378, "grad_norm": 0.470703125, "learning_rate": 2.9697956516562635e-05, "loss": 2.1483, "step": 2966 }, { "epoch": 0.09572685421376013, "grad_norm": 0.515625, "learning_rate": 2.9697642212593818e-05, "loss": 2.1514, "step": 2967 }, { "epoch": 0.09575911806755648, "grad_norm": 0.42578125, "learning_rate": 2.96973277468439e-05, "loss": 2.1529, "step": 2968 }, { "epoch": 0.09579138192135282, "grad_norm": 0.48828125, "learning_rate": 2.969701311931634e-05, "loss": 2.1715, "step": 2969 }, { "epoch": 0.09582364577514917, "grad_norm": 0.41796875, "learning_rate": 2.9696698330014608e-05, "loss": 2.1493, "step": 2970 }, { "epoch": 0.09585590962894551, "grad_norm": 0.453125, "learning_rate": 2.969638337894216e-05, "loss": 2.1715, "step": 2971 }, { "epoch": 0.09588817348274187, "grad_norm": 0.412109375, "learning_rate": 2.969606826610247e-05, "loss": 2.1519, "step": 2972 }, { "epoch": 0.09592043733653821, "grad_norm": 0.4296875, "learning_rate": 2.9695752991499e-05, "loss": 2.1729, "step": 2973 }, { "epoch": 0.09595270119033456, "grad_norm": 0.423828125, "learning_rate": 2.9695437555135224e-05, "loss": 2.139, "step": 2974 }, { "epoch": 0.0959849650441309, "grad_norm": 0.498046875, "learning_rate": 2.9695121957014616e-05, "loss": 2.1679, "step": 2975 }, { "epoch": 0.09601722889792724, "grad_norm": 0.447265625, "learning_rate": 2.9694806197140646e-05, "loss": 2.1489, "step": 2976 }, { "epoch": 0.0960494927517236, "grad_norm": 0.5078125, "learning_rate": 2.9694490275516793e-05, "loss": 2.1367, "step": 2977 }, { "epoch": 0.09608175660551994, "grad_norm": 0.66796875, "learning_rate": 2.9694174192146534e-05, "loss": 2.1384, "step": 2978 }, { "epoch": 0.09611402045931629, "grad_norm": 0.58203125, "learning_rate": 2.9693857947033345e-05, "loss": 2.1504, "step": 2979 }, { "epoch": 0.09614628431311263, "grad_norm": 0.7109375, "learning_rate": 2.9693541540180705e-05, "loss": 2.1349, "step": 2980 }, { "epoch": 0.09617854816690898, "grad_norm": 0.796875, "learning_rate": 2.969322497159211e-05, "loss": 2.148, "step": 2981 }, { "epoch": 0.09621081202070533, "grad_norm": 0.67578125, "learning_rate": 2.9692908241271026e-05, "loss": 2.1728, "step": 2982 }, { "epoch": 0.09624307587450168, "grad_norm": 0.44140625, "learning_rate": 2.9692591349220957e-05, "loss": 2.1307, "step": 2983 }, { "epoch": 0.09627533972829802, "grad_norm": 0.546875, "learning_rate": 2.969227429544538e-05, "loss": 2.1433, "step": 2984 }, { "epoch": 0.09630760358209436, "grad_norm": 0.6171875, "learning_rate": 2.9691957079947783e-05, "loss": 2.1504, "step": 2985 }, { "epoch": 0.09633986743589071, "grad_norm": 0.44140625, "learning_rate": 2.9691639702731668e-05, "loss": 2.1553, "step": 2986 }, { "epoch": 0.09637213128968707, "grad_norm": 0.53125, "learning_rate": 2.9691322163800517e-05, "loss": 2.1278, "step": 2987 }, { "epoch": 0.09640439514348341, "grad_norm": 0.4609375, "learning_rate": 2.9691004463157838e-05, "loss": 2.144, "step": 2988 }, { "epoch": 0.09643665899727975, "grad_norm": 0.439453125, "learning_rate": 2.969068660080712e-05, "loss": 2.1381, "step": 2989 }, { "epoch": 0.0964689228510761, "grad_norm": 0.54296875, "learning_rate": 2.969036857675186e-05, "loss": 2.1641, "step": 2990 }, { "epoch": 0.09650118670487244, "grad_norm": 0.41796875, "learning_rate": 2.9690050390995564e-05, "loss": 2.1825, "step": 2991 }, { "epoch": 0.0965334505586688, "grad_norm": 0.484375, "learning_rate": 2.968973204354173e-05, "loss": 2.1667, "step": 2992 }, { "epoch": 0.09656571441246514, "grad_norm": 0.51953125, "learning_rate": 2.9689413534393862e-05, "loss": 2.1422, "step": 2993 }, { "epoch": 0.09659797826626149, "grad_norm": 0.4453125, "learning_rate": 2.9689094863555474e-05, "loss": 2.1366, "step": 2994 }, { "epoch": 0.09663024212005783, "grad_norm": 0.431640625, "learning_rate": 2.9688776031030067e-05, "loss": 2.1496, "step": 2995 }, { "epoch": 0.09666250597385417, "grad_norm": 0.431640625, "learning_rate": 2.9688457036821143e-05, "loss": 2.1479, "step": 2996 }, { "epoch": 0.09669476982765053, "grad_norm": 0.412109375, "learning_rate": 2.9688137880932228e-05, "loss": 2.141, "step": 2997 }, { "epoch": 0.09672703368144688, "grad_norm": 0.4453125, "learning_rate": 2.9687818563366827e-05, "loss": 2.1645, "step": 2998 }, { "epoch": 0.09675929753524322, "grad_norm": 0.41796875, "learning_rate": 2.968749908412846e-05, "loss": 2.1181, "step": 2999 }, { "epoch": 0.09679156138903956, "grad_norm": 0.408203125, "learning_rate": 2.9687179443220634e-05, "loss": 2.1704, "step": 3000 }, { "epoch": 0.0968238252428359, "grad_norm": 0.46484375, "learning_rate": 2.9686859640646874e-05, "loss": 2.1494, "step": 3001 }, { "epoch": 0.09685608909663226, "grad_norm": 0.46875, "learning_rate": 2.96865396764107e-05, "loss": 2.1424, "step": 3002 }, { "epoch": 0.09688835295042861, "grad_norm": 0.431640625, "learning_rate": 2.9686219550515636e-05, "loss": 2.1507, "step": 3003 }, { "epoch": 0.09692061680422495, "grad_norm": 0.4140625, "learning_rate": 2.96858992629652e-05, "loss": 2.1648, "step": 3004 }, { "epoch": 0.0969528806580213, "grad_norm": 0.470703125, "learning_rate": 2.9685578813762925e-05, "loss": 2.1209, "step": 3005 }, { "epoch": 0.09698514451181764, "grad_norm": 0.451171875, "learning_rate": 2.968525820291233e-05, "loss": 2.1604, "step": 3006 }, { "epoch": 0.097017408365614, "grad_norm": 0.4140625, "learning_rate": 2.968493743041695e-05, "loss": 2.1273, "step": 3007 }, { "epoch": 0.09704967221941034, "grad_norm": 0.453125, "learning_rate": 2.968461649628031e-05, "loss": 2.1783, "step": 3008 }, { "epoch": 0.09708193607320668, "grad_norm": 0.466796875, "learning_rate": 2.968429540050595e-05, "loss": 2.1801, "step": 3009 }, { "epoch": 0.09711419992700303, "grad_norm": 0.419921875, "learning_rate": 2.9683974143097395e-05, "loss": 2.1314, "step": 3010 }, { "epoch": 0.09714646378079937, "grad_norm": 0.376953125, "learning_rate": 2.9683652724058194e-05, "loss": 2.1378, "step": 3011 }, { "epoch": 0.09717872763459573, "grad_norm": 0.427734375, "learning_rate": 2.9683331143391876e-05, "loss": 2.1573, "step": 3012 }, { "epoch": 0.09721099148839207, "grad_norm": 0.443359375, "learning_rate": 2.9683009401101978e-05, "loss": 2.1386, "step": 3013 }, { "epoch": 0.09724325534218842, "grad_norm": 0.3828125, "learning_rate": 2.968268749719205e-05, "loss": 2.1418, "step": 3014 }, { "epoch": 0.09727551919598476, "grad_norm": 0.42578125, "learning_rate": 2.9682365431665632e-05, "loss": 2.1571, "step": 3015 }, { "epoch": 0.0973077830497811, "grad_norm": 0.453125, "learning_rate": 2.968204320452627e-05, "loss": 2.1395, "step": 3016 }, { "epoch": 0.09734004690357746, "grad_norm": 0.44140625, "learning_rate": 2.9681720815777505e-05, "loss": 2.1624, "step": 3017 }, { "epoch": 0.0973723107573738, "grad_norm": 0.44921875, "learning_rate": 2.968139826542289e-05, "loss": 2.1768, "step": 3018 }, { "epoch": 0.09740457461117015, "grad_norm": 0.421875, "learning_rate": 2.968107555346598e-05, "loss": 2.1557, "step": 3019 }, { "epoch": 0.0974368384649665, "grad_norm": 0.453125, "learning_rate": 2.968075267991032e-05, "loss": 2.1712, "step": 3020 }, { "epoch": 0.09746910231876284, "grad_norm": 0.466796875, "learning_rate": 2.9680429644759468e-05, "loss": 2.1797, "step": 3021 }, { "epoch": 0.0975013661725592, "grad_norm": 0.470703125, "learning_rate": 2.968010644801697e-05, "loss": 2.1473, "step": 3022 }, { "epoch": 0.09753363002635554, "grad_norm": 0.5234375, "learning_rate": 2.96797830896864e-05, "loss": 2.1536, "step": 3023 }, { "epoch": 0.09756589388015188, "grad_norm": 0.45703125, "learning_rate": 2.967945956977131e-05, "loss": 2.1702, "step": 3024 }, { "epoch": 0.09759815773394823, "grad_norm": 0.46875, "learning_rate": 2.967913588827525e-05, "loss": 2.1253, "step": 3025 }, { "epoch": 0.09763042158774457, "grad_norm": 0.443359375, "learning_rate": 2.9678812045201802e-05, "loss": 2.1476, "step": 3026 }, { "epoch": 0.09766268544154093, "grad_norm": 0.421875, "learning_rate": 2.967848804055452e-05, "loss": 2.1558, "step": 3027 }, { "epoch": 0.09769494929533727, "grad_norm": 0.546875, "learning_rate": 2.9678163874336968e-05, "loss": 2.1631, "step": 3028 }, { "epoch": 0.09772721314913362, "grad_norm": 0.67578125, "learning_rate": 2.967783954655272e-05, "loss": 2.1721, "step": 3029 }, { "epoch": 0.09775947700292996, "grad_norm": 0.83203125, "learning_rate": 2.967751505720535e-05, "loss": 2.1713, "step": 3030 }, { "epoch": 0.0977917408567263, "grad_norm": 0.8828125, "learning_rate": 2.967719040629842e-05, "loss": 2.1487, "step": 3031 }, { "epoch": 0.09782400471052266, "grad_norm": 0.6640625, "learning_rate": 2.9676865593835505e-05, "loss": 2.1444, "step": 3032 }, { "epoch": 0.097856268564319, "grad_norm": 0.578125, "learning_rate": 2.967654061982018e-05, "loss": 2.1781, "step": 3033 }, { "epoch": 0.09788853241811535, "grad_norm": 0.6875, "learning_rate": 2.9676215484256027e-05, "loss": 2.1576, "step": 3034 }, { "epoch": 0.09792079627191169, "grad_norm": 0.462890625, "learning_rate": 2.9675890187146628e-05, "loss": 2.1441, "step": 3035 }, { "epoch": 0.09795306012570804, "grad_norm": 0.5703125, "learning_rate": 2.9675564728495558e-05, "loss": 2.1691, "step": 3036 }, { "epoch": 0.0979853239795044, "grad_norm": 0.51953125, "learning_rate": 2.967523910830639e-05, "loss": 2.1373, "step": 3037 }, { "epoch": 0.09801758783330074, "grad_norm": 0.47265625, "learning_rate": 2.9674913326582726e-05, "loss": 2.1683, "step": 3038 }, { "epoch": 0.09804985168709708, "grad_norm": 0.55078125, "learning_rate": 2.9674587383328145e-05, "loss": 2.155, "step": 3039 }, { "epoch": 0.09808211554089342, "grad_norm": 0.396484375, "learning_rate": 2.9674261278546227e-05, "loss": 2.1297, "step": 3040 }, { "epoch": 0.09811437939468977, "grad_norm": 0.53125, "learning_rate": 2.9673935012240578e-05, "loss": 2.1424, "step": 3041 }, { "epoch": 0.09814664324848613, "grad_norm": 0.421875, "learning_rate": 2.9673608584414768e-05, "loss": 2.1466, "step": 3042 }, { "epoch": 0.09817890710228247, "grad_norm": 0.45703125, "learning_rate": 2.967328199507241e-05, "loss": 2.1566, "step": 3043 }, { "epoch": 0.09821117095607881, "grad_norm": 0.4453125, "learning_rate": 2.9672955244217084e-05, "loss": 2.1167, "step": 3044 }, { "epoch": 0.09824343480987516, "grad_norm": 0.41015625, "learning_rate": 2.96726283318524e-05, "loss": 2.1627, "step": 3045 }, { "epoch": 0.0982756986636715, "grad_norm": 0.4765625, "learning_rate": 2.9672301257981943e-05, "loss": 2.1572, "step": 3046 }, { "epoch": 0.09830796251746786, "grad_norm": 0.404296875, "learning_rate": 2.9671974022609324e-05, "loss": 2.1594, "step": 3047 }, { "epoch": 0.0983402263712642, "grad_norm": 0.484375, "learning_rate": 2.9671646625738137e-05, "loss": 2.1544, "step": 3048 }, { "epoch": 0.09837249022506055, "grad_norm": 0.44140625, "learning_rate": 2.9671319067371995e-05, "loss": 2.1319, "step": 3049 }, { "epoch": 0.09840475407885689, "grad_norm": 0.400390625, "learning_rate": 2.967099134751449e-05, "loss": 2.133, "step": 3050 }, { "epoch": 0.09843701793265323, "grad_norm": 0.423828125, "learning_rate": 2.967066346616924e-05, "loss": 2.168, "step": 3051 }, { "epoch": 0.09846928178644958, "grad_norm": 0.435546875, "learning_rate": 2.9670335423339852e-05, "loss": 2.1458, "step": 3052 }, { "epoch": 0.09850154564024594, "grad_norm": 0.439453125, "learning_rate": 2.9670007219029938e-05, "loss": 2.142, "step": 3053 }, { "epoch": 0.09853380949404228, "grad_norm": 0.4375, "learning_rate": 2.966967885324311e-05, "loss": 2.1741, "step": 3054 }, { "epoch": 0.09856607334783862, "grad_norm": 0.384765625, "learning_rate": 2.9669350325982975e-05, "loss": 2.1513, "step": 3055 }, { "epoch": 0.09859833720163497, "grad_norm": 0.392578125, "learning_rate": 2.966902163725316e-05, "loss": 2.1727, "step": 3056 }, { "epoch": 0.09863060105543131, "grad_norm": 0.408203125, "learning_rate": 2.966869278705728e-05, "loss": 2.1623, "step": 3057 }, { "epoch": 0.09866286490922767, "grad_norm": 0.40234375, "learning_rate": 2.9668363775398948e-05, "loss": 2.1337, "step": 3058 }, { "epoch": 0.09869512876302401, "grad_norm": 0.408203125, "learning_rate": 2.9668034602281792e-05, "loss": 2.1492, "step": 3059 }, { "epoch": 0.09872739261682036, "grad_norm": 0.4296875, "learning_rate": 2.9667705267709433e-05, "loss": 2.1854, "step": 3060 }, { "epoch": 0.0987596564706167, "grad_norm": 0.41015625, "learning_rate": 2.96673757716855e-05, "loss": 2.158, "step": 3061 }, { "epoch": 0.09879192032441304, "grad_norm": 0.421875, "learning_rate": 2.9667046114213616e-05, "loss": 2.1479, "step": 3062 }, { "epoch": 0.0988241841782094, "grad_norm": 0.4140625, "learning_rate": 2.966671629529741e-05, "loss": 2.1476, "step": 3063 }, { "epoch": 0.09885644803200574, "grad_norm": 0.4375, "learning_rate": 2.9666386314940514e-05, "loss": 2.1814, "step": 3064 }, { "epoch": 0.09888871188580209, "grad_norm": 0.46875, "learning_rate": 2.966605617314656e-05, "loss": 2.1528, "step": 3065 }, { "epoch": 0.09892097573959843, "grad_norm": 0.478515625, "learning_rate": 2.9665725869919176e-05, "loss": 2.1522, "step": 3066 }, { "epoch": 0.09895323959339478, "grad_norm": 0.435546875, "learning_rate": 2.9665395405262004e-05, "loss": 2.0561, "step": 3067 }, { "epoch": 0.09898550344719113, "grad_norm": 0.462890625, "learning_rate": 2.9665064779178683e-05, "loss": 2.0962, "step": 3068 }, { "epoch": 0.09901776730098748, "grad_norm": 0.50390625, "learning_rate": 2.966473399167285e-05, "loss": 2.0857, "step": 3069 }, { "epoch": 0.09905003115478382, "grad_norm": 0.48046875, "learning_rate": 2.9664403042748142e-05, "loss": 2.1421, "step": 3070 }, { "epoch": 0.09908229500858017, "grad_norm": 0.490234375, "learning_rate": 2.9664071932408207e-05, "loss": 2.1207, "step": 3071 }, { "epoch": 0.09911455886237651, "grad_norm": 0.458984375, "learning_rate": 2.966374066065669e-05, "loss": 2.1661, "step": 3072 }, { "epoch": 0.09914682271617287, "grad_norm": 0.482421875, "learning_rate": 2.9663409227497233e-05, "loss": 2.1467, "step": 3073 }, { "epoch": 0.09917908656996921, "grad_norm": 0.470703125, "learning_rate": 2.966307763293349e-05, "loss": 2.1435, "step": 3074 }, { "epoch": 0.09921135042376555, "grad_norm": 0.494140625, "learning_rate": 2.9662745876969107e-05, "loss": 2.1543, "step": 3075 }, { "epoch": 0.0992436142775619, "grad_norm": 0.462890625, "learning_rate": 2.9662413959607734e-05, "loss": 2.1766, "step": 3076 }, { "epoch": 0.09927587813135824, "grad_norm": 0.51953125, "learning_rate": 2.9662081880853028e-05, "loss": 2.1473, "step": 3077 }, { "epoch": 0.0993081419851546, "grad_norm": 0.44921875, "learning_rate": 2.9661749640708642e-05, "loss": 2.1385, "step": 3078 }, { "epoch": 0.09934040583895094, "grad_norm": 0.421875, "learning_rate": 2.9661417239178235e-05, "loss": 2.1431, "step": 3079 }, { "epoch": 0.09937266969274729, "grad_norm": 0.41796875, "learning_rate": 2.9661084676265466e-05, "loss": 2.133, "step": 3080 }, { "epoch": 0.09940493354654363, "grad_norm": 0.40625, "learning_rate": 2.9660751951973995e-05, "loss": 2.1576, "step": 3081 }, { "epoch": 0.09943719740033997, "grad_norm": 0.396484375, "learning_rate": 2.9660419066307484e-05, "loss": 2.1372, "step": 3082 }, { "epoch": 0.09946946125413633, "grad_norm": 0.392578125, "learning_rate": 2.96600860192696e-05, "loss": 2.1569, "step": 3083 }, { "epoch": 0.09950172510793268, "grad_norm": 0.404296875, "learning_rate": 2.9659752810864e-05, "loss": 2.1309, "step": 3084 }, { "epoch": 0.09953398896172902, "grad_norm": 0.380859375, "learning_rate": 2.9659419441094364e-05, "loss": 2.1732, "step": 3085 }, { "epoch": 0.09956625281552536, "grad_norm": 0.376953125, "learning_rate": 2.965908590996435e-05, "loss": 2.1746, "step": 3086 }, { "epoch": 0.09959851666932171, "grad_norm": 0.3984375, "learning_rate": 2.9658752217477634e-05, "loss": 2.1581, "step": 3087 }, { "epoch": 0.09963078052311806, "grad_norm": 0.43359375, "learning_rate": 2.965841836363789e-05, "loss": 2.1634, "step": 3088 }, { "epoch": 0.09966304437691441, "grad_norm": 0.421875, "learning_rate": 2.9658084348448798e-05, "loss": 2.1391, "step": 3089 }, { "epoch": 0.09969530823071075, "grad_norm": 0.42578125, "learning_rate": 2.9657750171914026e-05, "loss": 2.1495, "step": 3090 }, { "epoch": 0.0997275720845071, "grad_norm": 0.41015625, "learning_rate": 2.9657415834037256e-05, "loss": 2.1701, "step": 3091 }, { "epoch": 0.09975983593830344, "grad_norm": 0.421875, "learning_rate": 2.965708133482217e-05, "loss": 2.1545, "step": 3092 }, { "epoch": 0.0997920997920998, "grad_norm": 0.43359375, "learning_rate": 2.9656746674272442e-05, "loss": 2.1435, "step": 3093 }, { "epoch": 0.09982436364589614, "grad_norm": 0.48828125, "learning_rate": 2.9656411852391765e-05, "loss": 2.1409, "step": 3094 }, { "epoch": 0.09985662749969249, "grad_norm": 0.462890625, "learning_rate": 2.9656076869183824e-05, "loss": 2.1341, "step": 3095 }, { "epoch": 0.09988889135348883, "grad_norm": 0.4609375, "learning_rate": 2.9655741724652296e-05, "loss": 2.1653, "step": 3096 }, { "epoch": 0.09992115520728517, "grad_norm": 0.609375, "learning_rate": 2.965540641880088e-05, "loss": 2.1431, "step": 3097 }, { "epoch": 0.09995341906108153, "grad_norm": 1.0234375, "learning_rate": 2.9655070951633267e-05, "loss": 2.1264, "step": 3098 }, { "epoch": 0.09998568291487787, "grad_norm": 1.609375, "learning_rate": 2.9654735323153144e-05, "loss": 2.1704, "step": 3099 }, { "epoch": 0.10001794676867422, "grad_norm": 0.5625, "learning_rate": 2.9654399533364205e-05, "loss": 2.1576, "step": 3100 }, { "epoch": 0.10005021062247056, "grad_norm": 1.75, "learning_rate": 2.9654063582270153e-05, "loss": 2.1577, "step": 3101 }, { "epoch": 0.1000824744762669, "grad_norm": 0.953125, "learning_rate": 2.965372746987468e-05, "loss": 2.1531, "step": 3102 }, { "epoch": 0.10011473833006326, "grad_norm": 3.40625, "learning_rate": 2.965339119618149e-05, "loss": 2.1662, "step": 3103 }, { "epoch": 0.10014700218385961, "grad_norm": 3.46875, "learning_rate": 2.965305476119428e-05, "loss": 2.1777, "step": 3104 }, { "epoch": 0.10017926603765595, "grad_norm": 1.2890625, "learning_rate": 2.9652718164916753e-05, "loss": 2.1406, "step": 3105 }, { "epoch": 0.1002115298914523, "grad_norm": 4.28125, "learning_rate": 2.965238140735262e-05, "loss": 2.1725, "step": 3106 }, { "epoch": 0.10024379374524864, "grad_norm": 4.75, "learning_rate": 2.965204448850558e-05, "loss": 2.1914, "step": 3107 }, { "epoch": 0.100276057599045, "grad_norm": 3.921875, "learning_rate": 2.9651707408379346e-05, "loss": 2.1766, "step": 3108 }, { "epoch": 0.10030832145284134, "grad_norm": 1.3046875, "learning_rate": 2.9651370166977634e-05, "loss": 2.1374, "step": 3109 }, { "epoch": 0.10034058530663768, "grad_norm": 4.03125, "learning_rate": 2.9651032764304143e-05, "loss": 2.1506, "step": 3110 }, { "epoch": 0.10037284916043403, "grad_norm": 5.0625, "learning_rate": 2.9650695200362598e-05, "loss": 2.1865, "step": 3111 }, { "epoch": 0.10040511301423037, "grad_norm": 4.5625, "learning_rate": 2.9650357475156707e-05, "loss": 2.1688, "step": 3112 }, { "epoch": 0.10043737686802673, "grad_norm": 2.65625, "learning_rate": 2.9650019588690197e-05, "loss": 2.1565, "step": 3113 }, { "epoch": 0.10046964072182307, "grad_norm": 1.5234375, "learning_rate": 2.9649681540966777e-05, "loss": 2.14, "step": 3114 }, { "epoch": 0.10050190457561942, "grad_norm": 2.09375, "learning_rate": 2.9649343331990175e-05, "loss": 2.1504, "step": 3115 }, { "epoch": 0.10053416842941576, "grad_norm": 1.9609375, "learning_rate": 2.9649004961764107e-05, "loss": 2.1664, "step": 3116 }, { "epoch": 0.1005664322832121, "grad_norm": 1.2890625, "learning_rate": 2.9648666430292303e-05, "loss": 2.1758, "step": 3117 }, { "epoch": 0.10059869613700846, "grad_norm": 1.078125, "learning_rate": 2.964832773757849e-05, "loss": 2.1257, "step": 3118 }, { "epoch": 0.1006309599908048, "grad_norm": 1.4921875, "learning_rate": 2.9647988883626392e-05, "loss": 2.1035, "step": 3119 }, { "epoch": 0.10066322384460115, "grad_norm": 1.3046875, "learning_rate": 2.964764986843974e-05, "loss": 2.1737, "step": 3120 }, { "epoch": 0.10069548769839749, "grad_norm": 0.734375, "learning_rate": 2.964731069202227e-05, "loss": 2.1459, "step": 3121 }, { "epoch": 0.10072775155219384, "grad_norm": 1.0703125, "learning_rate": 2.9646971354377707e-05, "loss": 2.1444, "step": 3122 }, { "epoch": 0.1007600154059902, "grad_norm": 0.95703125, "learning_rate": 2.9646631855509795e-05, "loss": 2.173, "step": 3123 }, { "epoch": 0.10079227925978654, "grad_norm": 0.66796875, "learning_rate": 2.9646292195422264e-05, "loss": 2.1604, "step": 3124 }, { "epoch": 0.10082454311358288, "grad_norm": 0.74609375, "learning_rate": 2.964595237411886e-05, "loss": 2.1445, "step": 3125 }, { "epoch": 0.10085680696737923, "grad_norm": 0.78515625, "learning_rate": 2.9645612391603314e-05, "loss": 2.1808, "step": 3126 }, { "epoch": 0.10088907082117557, "grad_norm": 0.59765625, "learning_rate": 2.9645272247879376e-05, "loss": 2.1489, "step": 3127 }, { "epoch": 0.10092133467497193, "grad_norm": 0.5703125, "learning_rate": 2.964493194295079e-05, "loss": 2.1312, "step": 3128 }, { "epoch": 0.10095359852876827, "grad_norm": 0.6640625, "learning_rate": 2.9644591476821293e-05, "loss": 2.1181, "step": 3129 }, { "epoch": 0.10098586238256461, "grad_norm": 0.54296875, "learning_rate": 2.9644250849494645e-05, "loss": 2.1511, "step": 3130 }, { "epoch": 0.10101812623636096, "grad_norm": 0.5234375, "learning_rate": 2.9643910060974585e-05, "loss": 2.0899, "step": 3131 }, { "epoch": 0.1010503900901573, "grad_norm": 0.50390625, "learning_rate": 2.9643569111264873e-05, "loss": 2.1136, "step": 3132 }, { "epoch": 0.10108265394395366, "grad_norm": 0.515625, "learning_rate": 2.9643228000369257e-05, "loss": 2.1555, "step": 3133 }, { "epoch": 0.10111491779775, "grad_norm": 0.443359375, "learning_rate": 2.964288672829149e-05, "loss": 2.1196, "step": 3134 }, { "epoch": 0.10114718165154635, "grad_norm": 0.46484375, "learning_rate": 2.9642545295035334e-05, "loss": 2.1632, "step": 3135 }, { "epoch": 0.10117944550534269, "grad_norm": 0.46484375, "learning_rate": 2.964220370060454e-05, "loss": 2.1864, "step": 3136 }, { "epoch": 0.10121170935913903, "grad_norm": 0.4453125, "learning_rate": 2.9641861945002873e-05, "loss": 2.1677, "step": 3137 }, { "epoch": 0.10124397321293538, "grad_norm": 0.423828125, "learning_rate": 2.964152002823409e-05, "loss": 2.1616, "step": 3138 }, { "epoch": 0.10127623706673174, "grad_norm": 0.451171875, "learning_rate": 2.9641177950301965e-05, "loss": 2.1471, "step": 3139 }, { "epoch": 0.10130850092052808, "grad_norm": 0.44140625, "learning_rate": 2.9640835711210254e-05, "loss": 2.1475, "step": 3140 }, { "epoch": 0.10134076477432442, "grad_norm": 0.43359375, "learning_rate": 2.9640493310962725e-05, "loss": 2.1427, "step": 3141 }, { "epoch": 0.10137302862812077, "grad_norm": 0.43359375, "learning_rate": 2.9640150749563153e-05, "loss": 2.1418, "step": 3142 }, { "epoch": 0.10140529248191711, "grad_norm": 0.3984375, "learning_rate": 2.9639808027015298e-05, "loss": 2.1645, "step": 3143 }, { "epoch": 0.10143755633571347, "grad_norm": 0.419921875, "learning_rate": 2.9639465143322946e-05, "loss": 2.1495, "step": 3144 }, { "epoch": 0.10146982018950981, "grad_norm": 0.40234375, "learning_rate": 2.963912209848986e-05, "loss": 2.1752, "step": 3145 }, { "epoch": 0.10150208404330616, "grad_norm": 0.412109375, "learning_rate": 2.9638778892519817e-05, "loss": 2.1468, "step": 3146 }, { "epoch": 0.1015343478971025, "grad_norm": 0.443359375, "learning_rate": 2.96384355254166e-05, "loss": 2.158, "step": 3147 }, { "epoch": 0.10156661175089884, "grad_norm": 0.388671875, "learning_rate": 2.9638091997183993e-05, "loss": 2.1189, "step": 3148 }, { "epoch": 0.1015988756046952, "grad_norm": 0.451171875, "learning_rate": 2.9637748307825763e-05, "loss": 2.1493, "step": 3149 }, { "epoch": 0.10163113945849155, "grad_norm": 0.447265625, "learning_rate": 2.9637404457345702e-05, "loss": 2.0881, "step": 3150 }, { "epoch": 0.10166340331228789, "grad_norm": 0.419921875, "learning_rate": 2.9637060445747594e-05, "loss": 2.0616, "step": 3151 }, { "epoch": 0.10169566716608423, "grad_norm": 0.447265625, "learning_rate": 2.9636716273035223e-05, "loss": 2.0652, "step": 3152 }, { "epoch": 0.10172793101988058, "grad_norm": 0.455078125, "learning_rate": 2.9636371939212382e-05, "loss": 2.0819, "step": 3153 }, { "epoch": 0.10176019487367693, "grad_norm": 0.384765625, "learning_rate": 2.963602744428286e-05, "loss": 2.091, "step": 3154 }, { "epoch": 0.10179245872747328, "grad_norm": 0.458984375, "learning_rate": 2.9635682788250445e-05, "loss": 2.0587, "step": 3155 }, { "epoch": 0.10182472258126962, "grad_norm": 0.41015625, "learning_rate": 2.963533797111894e-05, "loss": 2.0971, "step": 3156 }, { "epoch": 0.10185698643506597, "grad_norm": 0.435546875, "learning_rate": 2.9634992992892127e-05, "loss": 2.1293, "step": 3157 }, { "epoch": 0.10188925028886231, "grad_norm": 0.41015625, "learning_rate": 2.963464785357381e-05, "loss": 2.0987, "step": 3158 }, { "epoch": 0.10192151414265867, "grad_norm": 0.421875, "learning_rate": 2.9634302553167793e-05, "loss": 2.063, "step": 3159 }, { "epoch": 0.10195377799645501, "grad_norm": 0.38671875, "learning_rate": 2.963395709167787e-05, "loss": 2.1007, "step": 3160 }, { "epoch": 0.10198604185025135, "grad_norm": 0.41015625, "learning_rate": 2.9633611469107846e-05, "loss": 2.093, "step": 3161 }, { "epoch": 0.1020183057040477, "grad_norm": 0.408203125, "learning_rate": 2.963326568546152e-05, "loss": 2.0487, "step": 3162 }, { "epoch": 0.10205056955784404, "grad_norm": 0.419921875, "learning_rate": 2.963291974074271e-05, "loss": 2.0486, "step": 3163 }, { "epoch": 0.1020828334116404, "grad_norm": 0.423828125, "learning_rate": 2.9632573634955217e-05, "loss": 2.0723, "step": 3164 }, { "epoch": 0.10211509726543674, "grad_norm": 0.416015625, "learning_rate": 2.9632227368102848e-05, "loss": 2.0888, "step": 3165 }, { "epoch": 0.10214736111923309, "grad_norm": 0.41015625, "learning_rate": 2.9631880940189416e-05, "loss": 2.0725, "step": 3166 }, { "epoch": 0.10217962497302943, "grad_norm": 0.40234375, "learning_rate": 2.9631534351218738e-05, "loss": 2.0662, "step": 3167 }, { "epoch": 0.10221188882682578, "grad_norm": 0.3984375, "learning_rate": 2.9631187601194627e-05, "loss": 2.0491, "step": 3168 }, { "epoch": 0.10224415268062213, "grad_norm": 0.421875, "learning_rate": 2.9630840690120897e-05, "loss": 2.0551, "step": 3169 }, { "epoch": 0.10227641653441848, "grad_norm": 0.40625, "learning_rate": 2.963049361800137e-05, "loss": 2.0879, "step": 3170 }, { "epoch": 0.10230868038821482, "grad_norm": 0.396484375, "learning_rate": 2.9630146384839864e-05, "loss": 2.062, "step": 3171 }, { "epoch": 0.10234094424201116, "grad_norm": 0.439453125, "learning_rate": 2.9629798990640204e-05, "loss": 2.0726, "step": 3172 }, { "epoch": 0.10237320809580751, "grad_norm": 0.40625, "learning_rate": 2.962945143540621e-05, "loss": 2.0373, "step": 3173 }, { "epoch": 0.10240547194960387, "grad_norm": 0.38671875, "learning_rate": 2.962910371914171e-05, "loss": 2.0232, "step": 3174 }, { "epoch": 0.10243773580340021, "grad_norm": 0.40625, "learning_rate": 2.9628755841850532e-05, "loss": 2.0968, "step": 3175 }, { "epoch": 0.10246999965719655, "grad_norm": 0.42578125, "learning_rate": 2.962840780353651e-05, "loss": 2.0696, "step": 3176 }, { "epoch": 0.1025022635109929, "grad_norm": 0.40234375, "learning_rate": 2.962805960420346e-05, "loss": 2.0862, "step": 3177 }, { "epoch": 0.10253452736478924, "grad_norm": 0.458984375, "learning_rate": 2.9627711243855227e-05, "loss": 2.0643, "step": 3178 }, { "epoch": 0.1025667912185856, "grad_norm": 0.41015625, "learning_rate": 2.962736272249564e-05, "loss": 2.0864, "step": 3179 }, { "epoch": 0.10259905507238194, "grad_norm": 0.39453125, "learning_rate": 2.9627014040128543e-05, "loss": 2.0989, "step": 3180 }, { "epoch": 0.10263131892617829, "grad_norm": 0.412109375, "learning_rate": 2.9626665196757768e-05, "loss": 2.0806, "step": 3181 }, { "epoch": 0.10266358277997463, "grad_norm": 0.439453125, "learning_rate": 2.9626316192387154e-05, "loss": 2.0949, "step": 3182 }, { "epoch": 0.10269584663377097, "grad_norm": 0.4375, "learning_rate": 2.9625967027020544e-05, "loss": 2.0974, "step": 3183 }, { "epoch": 0.10272811048756733, "grad_norm": 0.484375, "learning_rate": 2.962561770066178e-05, "loss": 2.0787, "step": 3184 }, { "epoch": 0.10276037434136367, "grad_norm": 0.44921875, "learning_rate": 2.9625268213314712e-05, "loss": 2.0996, "step": 3185 }, { "epoch": 0.10279263819516002, "grad_norm": 0.431640625, "learning_rate": 2.9624918564983184e-05, "loss": 2.0892, "step": 3186 }, { "epoch": 0.10282490204895636, "grad_norm": 0.412109375, "learning_rate": 2.9624568755671042e-05, "loss": 2.0178, "step": 3187 }, { "epoch": 0.1028571659027527, "grad_norm": 0.458984375, "learning_rate": 2.962421878538214e-05, "loss": 2.1026, "step": 3188 }, { "epoch": 0.10288942975654906, "grad_norm": 0.466796875, "learning_rate": 2.9623868654120328e-05, "loss": 2.1086, "step": 3189 }, { "epoch": 0.10292169361034541, "grad_norm": 0.4453125, "learning_rate": 2.962351836188946e-05, "loss": 2.0691, "step": 3190 }, { "epoch": 0.10295395746414175, "grad_norm": 0.427734375, "learning_rate": 2.96231679086934e-05, "loss": 2.0522, "step": 3191 }, { "epoch": 0.1029862213179381, "grad_norm": 0.4453125, "learning_rate": 2.9622817294535992e-05, "loss": 2.0784, "step": 3192 }, { "epoch": 0.10301848517173444, "grad_norm": 0.431640625, "learning_rate": 2.9622466519421098e-05, "loss": 2.1152, "step": 3193 }, { "epoch": 0.1030507490255308, "grad_norm": 0.470703125, "learning_rate": 2.9622115583352587e-05, "loss": 2.0659, "step": 3194 }, { "epoch": 0.10308301287932714, "grad_norm": 0.59375, "learning_rate": 2.962176448633432e-05, "loss": 2.0939, "step": 3195 }, { "epoch": 0.10311527673312348, "grad_norm": 0.55078125, "learning_rate": 2.962141322837016e-05, "loss": 2.0928, "step": 3196 }, { "epoch": 0.10314754058691983, "grad_norm": 0.55859375, "learning_rate": 2.9621061809463963e-05, "loss": 2.0667, "step": 3197 }, { "epoch": 0.10317980444071617, "grad_norm": 0.734375, "learning_rate": 2.962071022961961e-05, "loss": 2.0425, "step": 3198 }, { "epoch": 0.10321206829451253, "grad_norm": 0.6796875, "learning_rate": 2.9620358488840976e-05, "loss": 2.1031, "step": 3199 }, { "epoch": 0.10324433214830887, "grad_norm": 0.578125, "learning_rate": 2.9620006587131917e-05, "loss": 2.0868, "step": 3200 }, { "epoch": 0.10327659600210522, "grad_norm": 0.451171875, "learning_rate": 2.9619654524496317e-05, "loss": 2.0823, "step": 3201 }, { "epoch": 0.10330885985590156, "grad_norm": 0.50390625, "learning_rate": 2.9619302300938045e-05, "loss": 2.0571, "step": 3202 }, { "epoch": 0.1033411237096979, "grad_norm": 0.53515625, "learning_rate": 2.9618949916460985e-05, "loss": 2.0854, "step": 3203 }, { "epoch": 0.10337338756349426, "grad_norm": 0.416015625, "learning_rate": 2.9618597371069006e-05, "loss": 2.0518, "step": 3204 }, { "epoch": 0.1034056514172906, "grad_norm": 0.50390625, "learning_rate": 2.9618244664766e-05, "loss": 2.1153, "step": 3205 }, { "epoch": 0.10343791527108695, "grad_norm": 0.63671875, "learning_rate": 2.961789179755584e-05, "loss": 2.0686, "step": 3206 }, { "epoch": 0.1034701791248833, "grad_norm": 0.435546875, "learning_rate": 2.9617538769442415e-05, "loss": 2.0767, "step": 3207 }, { "epoch": 0.10350244297867964, "grad_norm": 0.447265625, "learning_rate": 2.9617185580429612e-05, "loss": 2.052, "step": 3208 }, { "epoch": 0.103534706832476, "grad_norm": 0.474609375, "learning_rate": 2.9616832230521316e-05, "loss": 2.0661, "step": 3209 }, { "epoch": 0.10356697068627234, "grad_norm": 0.376953125, "learning_rate": 2.961647871972142e-05, "loss": 2.0962, "step": 3210 }, { "epoch": 0.10359923454006868, "grad_norm": 0.462890625, "learning_rate": 2.9616125048033803e-05, "loss": 2.0888, "step": 3211 }, { "epoch": 0.10363149839386503, "grad_norm": 0.43359375, "learning_rate": 2.9615771215462376e-05, "loss": 2.089, "step": 3212 }, { "epoch": 0.10366376224766137, "grad_norm": 0.431640625, "learning_rate": 2.961541722201102e-05, "loss": 2.0748, "step": 3213 }, { "epoch": 0.10369602610145773, "grad_norm": 0.439453125, "learning_rate": 2.9615063067683637e-05, "loss": 2.0935, "step": 3214 }, { "epoch": 0.10372828995525407, "grad_norm": 0.435546875, "learning_rate": 2.9614708752484126e-05, "loss": 2.1088, "step": 3215 }, { "epoch": 0.10376055380905042, "grad_norm": 0.38671875, "learning_rate": 2.9614354276416385e-05, "loss": 2.0722, "step": 3216 }, { "epoch": 0.10379281766284676, "grad_norm": 0.470703125, "learning_rate": 2.9613999639484314e-05, "loss": 2.0754, "step": 3217 }, { "epoch": 0.1038250815166431, "grad_norm": 0.474609375, "learning_rate": 2.961364484169182e-05, "loss": 2.0759, "step": 3218 }, { "epoch": 0.10385734537043946, "grad_norm": 0.3984375, "learning_rate": 2.9613289883042808e-05, "loss": 2.096, "step": 3219 }, { "epoch": 0.1038896092242358, "grad_norm": 0.4140625, "learning_rate": 2.9612934763541186e-05, "loss": 2.0377, "step": 3220 }, { "epoch": 0.10392187307803215, "grad_norm": 0.4609375, "learning_rate": 2.961257948319086e-05, "loss": 2.0635, "step": 3221 }, { "epoch": 0.10395413693182849, "grad_norm": 0.416015625, "learning_rate": 2.9612224041995737e-05, "loss": 2.0748, "step": 3222 }, { "epoch": 0.10398640078562484, "grad_norm": 0.458984375, "learning_rate": 2.961186843995974e-05, "loss": 2.089, "step": 3223 }, { "epoch": 0.1040186646394212, "grad_norm": 0.4609375, "learning_rate": 2.961151267708678e-05, "loss": 2.0915, "step": 3224 }, { "epoch": 0.10405092849321754, "grad_norm": 0.38671875, "learning_rate": 2.9611156753380768e-05, "loss": 2.0837, "step": 3225 }, { "epoch": 0.10408319234701388, "grad_norm": 0.455078125, "learning_rate": 2.961080066884563e-05, "loss": 2.0474, "step": 3226 }, { "epoch": 0.10411545620081022, "grad_norm": 0.400390625, "learning_rate": 2.961044442348527e-05, "loss": 2.0656, "step": 3227 }, { "epoch": 0.10414772005460657, "grad_norm": 0.4296875, "learning_rate": 2.9610088017303623e-05, "loss": 2.1068, "step": 3228 }, { "epoch": 0.10417998390840291, "grad_norm": 0.455078125, "learning_rate": 2.960973145030461e-05, "loss": 2.0763, "step": 3229 }, { "epoch": 0.10421224776219927, "grad_norm": 0.408203125, "learning_rate": 2.9609374722492152e-05, "loss": 2.0787, "step": 3230 }, { "epoch": 0.10424451161599561, "grad_norm": 0.4296875, "learning_rate": 2.9609017833870178e-05, "loss": 2.0715, "step": 3231 }, { "epoch": 0.10427677546979196, "grad_norm": 0.435546875, "learning_rate": 2.960866078444262e-05, "loss": 2.0795, "step": 3232 }, { "epoch": 0.1043090393235883, "grad_norm": 0.4609375, "learning_rate": 2.96083035742134e-05, "loss": 2.0746, "step": 3233 }, { "epoch": 0.10434130317738464, "grad_norm": 0.5078125, "learning_rate": 2.9607946203186457e-05, "loss": 2.0296, "step": 3234 }, { "epoch": 0.104373567031181, "grad_norm": 0.466796875, "learning_rate": 2.9607588671365722e-05, "loss": 2.0719, "step": 3235 }, { "epoch": 0.10440583088497735, "grad_norm": 0.466796875, "learning_rate": 2.9607230978755128e-05, "loss": 2.1447, "step": 3236 }, { "epoch": 0.10443809473877369, "grad_norm": 0.47265625, "learning_rate": 2.9606873125358615e-05, "loss": 2.1559, "step": 3237 }, { "epoch": 0.10447035859257003, "grad_norm": 0.52734375, "learning_rate": 2.960651511118012e-05, "loss": 2.1257, "step": 3238 }, { "epoch": 0.10450262244636638, "grad_norm": 0.494140625, "learning_rate": 2.9606156936223586e-05, "loss": 2.1425, "step": 3239 }, { "epoch": 0.10453488630016274, "grad_norm": 0.55859375, "learning_rate": 2.960579860049296e-05, "loss": 2.1301, "step": 3240 }, { "epoch": 0.10456715015395908, "grad_norm": 0.4765625, "learning_rate": 2.960544010399218e-05, "loss": 2.1417, "step": 3241 }, { "epoch": 0.10459941400775542, "grad_norm": 0.447265625, "learning_rate": 2.960508144672519e-05, "loss": 2.1467, "step": 3242 }, { "epoch": 0.10463167786155177, "grad_norm": 0.4609375, "learning_rate": 2.960472262869594e-05, "loss": 2.1478, "step": 3243 }, { "epoch": 0.10466394171534811, "grad_norm": 0.44921875, "learning_rate": 2.9604363649908383e-05, "loss": 2.0908, "step": 3244 }, { "epoch": 0.10469620556914447, "grad_norm": 0.49609375, "learning_rate": 2.9604004510366466e-05, "loss": 2.1449, "step": 3245 }, { "epoch": 0.10472846942294081, "grad_norm": 0.474609375, "learning_rate": 2.9603645210074148e-05, "loss": 2.1326, "step": 3246 }, { "epoch": 0.10476073327673716, "grad_norm": 0.4921875, "learning_rate": 2.960328574903538e-05, "loss": 2.1841, "step": 3247 }, { "epoch": 0.1047929971305335, "grad_norm": 0.48828125, "learning_rate": 2.9602926127254112e-05, "loss": 2.1592, "step": 3248 }, { "epoch": 0.10482526098432984, "grad_norm": 0.4765625, "learning_rate": 2.9602566344734316e-05, "loss": 2.177, "step": 3249 }, { "epoch": 0.1048575248381262, "grad_norm": 0.48046875, "learning_rate": 2.960220640147994e-05, "loss": 2.1441, "step": 3250 }, { "epoch": 0.10488978869192254, "grad_norm": 0.4296875, "learning_rate": 2.9601846297494953e-05, "loss": 2.1638, "step": 3251 }, { "epoch": 0.10492205254571889, "grad_norm": 0.427734375, "learning_rate": 2.9601486032783325e-05, "loss": 2.1462, "step": 3252 }, { "epoch": 0.10495431639951523, "grad_norm": 0.443359375, "learning_rate": 2.9601125607349005e-05, "loss": 2.1278, "step": 3253 }, { "epoch": 0.10498658025331158, "grad_norm": 0.404296875, "learning_rate": 2.9600765021195973e-05, "loss": 2.1659, "step": 3254 }, { "epoch": 0.10501884410710793, "grad_norm": 0.46484375, "learning_rate": 2.960040427432819e-05, "loss": 2.1726, "step": 3255 }, { "epoch": 0.10505110796090428, "grad_norm": 0.443359375, "learning_rate": 2.9600043366749635e-05, "loss": 2.1317, "step": 3256 }, { "epoch": 0.10508337181470062, "grad_norm": 0.421875, "learning_rate": 2.9599682298464278e-05, "loss": 2.1394, "step": 3257 }, { "epoch": 0.10511563566849697, "grad_norm": 0.9296875, "learning_rate": 2.9599321069476092e-05, "loss": 2.17, "step": 3258 }, { "epoch": 0.10514789952229331, "grad_norm": 0.451171875, "learning_rate": 2.9598959679789047e-05, "loss": 2.1495, "step": 3259 }, { "epoch": 0.10518016337608967, "grad_norm": 0.5625, "learning_rate": 2.9598598129407135e-05, "loss": 2.1202, "step": 3260 }, { "epoch": 0.10521242722988601, "grad_norm": 0.81640625, "learning_rate": 2.9598236418334322e-05, "loss": 2.1454, "step": 3261 }, { "epoch": 0.10524469108368235, "grad_norm": 1.296875, "learning_rate": 2.9597874546574598e-05, "loss": 2.1598, "step": 3262 }, { "epoch": 0.1052769549374787, "grad_norm": 0.62109375, "learning_rate": 2.9597512514131945e-05, "loss": 2.1235, "step": 3263 }, { "epoch": 0.10530921879127504, "grad_norm": 0.6171875, "learning_rate": 2.9597150321010344e-05, "loss": 2.1579, "step": 3264 }, { "epoch": 0.1053414826450714, "grad_norm": 0.890625, "learning_rate": 2.9596787967213782e-05, "loss": 2.1629, "step": 3265 }, { "epoch": 0.10537374649886774, "grad_norm": 0.443359375, "learning_rate": 2.959642545274625e-05, "loss": 2.176, "step": 3266 }, { "epoch": 0.10540601035266409, "grad_norm": 0.6953125, "learning_rate": 2.9596062777611745e-05, "loss": 2.109, "step": 3267 }, { "epoch": 0.10543827420646043, "grad_norm": 0.50390625, "learning_rate": 2.9595699941814246e-05, "loss": 2.1368, "step": 3268 }, { "epoch": 0.10547053806025677, "grad_norm": 0.55859375, "learning_rate": 2.9595336945357754e-05, "loss": 2.1479, "step": 3269 }, { "epoch": 0.10550280191405313, "grad_norm": 0.55859375, "learning_rate": 2.9594973788246265e-05, "loss": 2.1481, "step": 3270 }, { "epoch": 0.10553506576784948, "grad_norm": 0.451171875, "learning_rate": 2.9594610470483773e-05, "loss": 2.132, "step": 3271 }, { "epoch": 0.10556732962164582, "grad_norm": 0.54296875, "learning_rate": 2.959424699207428e-05, "loss": 2.1279, "step": 3272 }, { "epoch": 0.10559959347544216, "grad_norm": 0.455078125, "learning_rate": 2.9593883353021785e-05, "loss": 2.1346, "step": 3273 }, { "epoch": 0.10563185732923851, "grad_norm": 0.5859375, "learning_rate": 2.9593519553330293e-05, "loss": 2.1548, "step": 3274 }, { "epoch": 0.10566412118303486, "grad_norm": 0.427734375, "learning_rate": 2.9593155593003806e-05, "loss": 2.1466, "step": 3275 }, { "epoch": 0.10569638503683121, "grad_norm": 0.5234375, "learning_rate": 2.9592791472046333e-05, "loss": 2.1736, "step": 3276 }, { "epoch": 0.10572864889062755, "grad_norm": 0.47265625, "learning_rate": 2.959242719046188e-05, "loss": 2.12, "step": 3277 }, { "epoch": 0.1057609127444239, "grad_norm": 0.484375, "learning_rate": 2.959206274825446e-05, "loss": 2.1468, "step": 3278 }, { "epoch": 0.10579317659822024, "grad_norm": 0.48046875, "learning_rate": 2.9591698145428074e-05, "loss": 2.1672, "step": 3279 }, { "epoch": 0.1058254404520166, "grad_norm": 0.427734375, "learning_rate": 2.9591333381986746e-05, "loss": 2.1701, "step": 3280 }, { "epoch": 0.10585770430581294, "grad_norm": 0.458984375, "learning_rate": 2.959096845793449e-05, "loss": 2.1195, "step": 3281 }, { "epoch": 0.10588996815960929, "grad_norm": 0.419921875, "learning_rate": 2.959060337327532e-05, "loss": 2.1524, "step": 3282 }, { "epoch": 0.10592223201340563, "grad_norm": 0.419921875, "learning_rate": 2.9590238128013256e-05, "loss": 2.1333, "step": 3283 }, { "epoch": 0.10595449586720197, "grad_norm": 0.392578125, "learning_rate": 2.958987272215231e-05, "loss": 2.1474, "step": 3284 }, { "epoch": 0.10598675972099833, "grad_norm": 0.65625, "learning_rate": 2.9589507155696522e-05, "loss": 2.1703, "step": 3285 }, { "epoch": 0.10601902357479467, "grad_norm": 0.41796875, "learning_rate": 2.9589141428649903e-05, "loss": 2.1401, "step": 3286 }, { "epoch": 0.10605128742859102, "grad_norm": 0.4296875, "learning_rate": 2.958877554101648e-05, "loss": 2.1472, "step": 3287 }, { "epoch": 0.10608355128238736, "grad_norm": 0.443359375, "learning_rate": 2.958840949280028e-05, "loss": 2.1366, "step": 3288 }, { "epoch": 0.1061158151361837, "grad_norm": 0.41796875, "learning_rate": 2.958804328400534e-05, "loss": 2.1327, "step": 3289 }, { "epoch": 0.10614807898998006, "grad_norm": 0.498046875, "learning_rate": 2.9587676914635676e-05, "loss": 2.1585, "step": 3290 }, { "epoch": 0.1061803428437764, "grad_norm": 0.4296875, "learning_rate": 2.958731038469534e-05, "loss": 2.1678, "step": 3291 }, { "epoch": 0.10621260669757275, "grad_norm": 0.443359375, "learning_rate": 2.9586943694188346e-05, "loss": 2.1278, "step": 3292 }, { "epoch": 0.1062448705513691, "grad_norm": 0.484375, "learning_rate": 2.9586576843118744e-05, "loss": 2.1457, "step": 3293 }, { "epoch": 0.10627713440516544, "grad_norm": 0.416015625, "learning_rate": 2.9586209831490573e-05, "loss": 2.1477, "step": 3294 }, { "epoch": 0.1063093982589618, "grad_norm": 0.46484375, "learning_rate": 2.9585842659307863e-05, "loss": 2.1446, "step": 3295 }, { "epoch": 0.10634166211275814, "grad_norm": 0.4296875, "learning_rate": 2.9585475326574666e-05, "loss": 2.1024, "step": 3296 }, { "epoch": 0.10637392596655448, "grad_norm": 0.41015625, "learning_rate": 2.9585107833295013e-05, "loss": 2.1623, "step": 3297 }, { "epoch": 0.10640618982035083, "grad_norm": 0.4140625, "learning_rate": 2.9584740179472964e-05, "loss": 2.1291, "step": 3298 }, { "epoch": 0.10643845367414717, "grad_norm": 0.400390625, "learning_rate": 2.958437236511255e-05, "loss": 2.1356, "step": 3299 }, { "epoch": 0.10647071752794353, "grad_norm": 0.431640625, "learning_rate": 2.9584004390217837e-05, "loss": 2.1189, "step": 3300 }, { "epoch": 0.10650298138173987, "grad_norm": 0.40625, "learning_rate": 2.9583636254792858e-05, "loss": 2.1614, "step": 3301 }, { "epoch": 0.10653524523553622, "grad_norm": 0.408203125, "learning_rate": 2.9583267958841677e-05, "loss": 2.1509, "step": 3302 }, { "epoch": 0.10656750908933256, "grad_norm": 0.396484375, "learning_rate": 2.9582899502368344e-05, "loss": 2.1768, "step": 3303 }, { "epoch": 0.1065997729431289, "grad_norm": 0.421875, "learning_rate": 2.9582530885376918e-05, "loss": 2.1176, "step": 3304 }, { "epoch": 0.10663203679692526, "grad_norm": 0.447265625, "learning_rate": 2.958216210787145e-05, "loss": 2.1405, "step": 3305 }, { "epoch": 0.1066643006507216, "grad_norm": 0.474609375, "learning_rate": 2.9581793169856e-05, "loss": 2.1271, "step": 3306 }, { "epoch": 0.10669656450451795, "grad_norm": 0.4375, "learning_rate": 2.9581424071334638e-05, "loss": 2.1231, "step": 3307 }, { "epoch": 0.10672882835831429, "grad_norm": 0.43359375, "learning_rate": 2.9581054812311418e-05, "loss": 2.1037, "step": 3308 }, { "epoch": 0.10676109221211064, "grad_norm": 0.4921875, "learning_rate": 2.9580685392790406e-05, "loss": 2.1234, "step": 3309 }, { "epoch": 0.106793356065907, "grad_norm": 0.5546875, "learning_rate": 2.9580315812775672e-05, "loss": 2.1213, "step": 3310 }, { "epoch": 0.10682561991970334, "grad_norm": 0.51953125, "learning_rate": 2.9579946072271277e-05, "loss": 2.149, "step": 3311 }, { "epoch": 0.10685788377349968, "grad_norm": 0.490234375, "learning_rate": 2.95795761712813e-05, "loss": 2.154, "step": 3312 }, { "epoch": 0.10689014762729603, "grad_norm": 0.51953125, "learning_rate": 2.9579206109809805e-05, "loss": 2.1557, "step": 3313 }, { "epoch": 0.10692241148109237, "grad_norm": 0.44140625, "learning_rate": 2.9578835887860868e-05, "loss": 2.1264, "step": 3314 }, { "epoch": 0.10695467533488871, "grad_norm": 0.51953125, "learning_rate": 2.9578465505438562e-05, "loss": 2.1706, "step": 3315 }, { "epoch": 0.10698693918868507, "grad_norm": 0.48828125, "learning_rate": 2.9578094962546975e-05, "loss": 2.0902, "step": 3316 }, { "epoch": 0.10701920304248141, "grad_norm": 0.458984375, "learning_rate": 2.957772425919017e-05, "loss": 2.1555, "step": 3317 }, { "epoch": 0.10705146689627776, "grad_norm": 0.52734375, "learning_rate": 2.9577353395372232e-05, "loss": 2.1532, "step": 3318 }, { "epoch": 0.1070837307500741, "grad_norm": 0.455078125, "learning_rate": 2.957698237109725e-05, "loss": 2.1637, "step": 3319 }, { "epoch": 0.10711599460387045, "grad_norm": 0.470703125, "learning_rate": 2.9576611186369303e-05, "loss": 2.1264, "step": 3320 }, { "epoch": 0.1071482584576668, "grad_norm": 0.462890625, "learning_rate": 2.957623984119248e-05, "loss": 2.1174, "step": 3321 }, { "epoch": 0.10718052231146315, "grad_norm": 0.49609375, "learning_rate": 2.957586833557086e-05, "loss": 2.1391, "step": 3322 }, { "epoch": 0.10721278616525949, "grad_norm": 0.46875, "learning_rate": 2.957549666950854e-05, "loss": 2.1522, "step": 3323 }, { "epoch": 0.10724505001905583, "grad_norm": 0.455078125, "learning_rate": 2.957512484300961e-05, "loss": 2.1347, "step": 3324 }, { "epoch": 0.10727731387285218, "grad_norm": 0.470703125, "learning_rate": 2.957475285607816e-05, "loss": 2.1687, "step": 3325 }, { "epoch": 0.10730957772664854, "grad_norm": 0.45703125, "learning_rate": 2.9574380708718292e-05, "loss": 2.1273, "step": 3326 }, { "epoch": 0.10734184158044488, "grad_norm": 0.455078125, "learning_rate": 2.957400840093409e-05, "loss": 2.1385, "step": 3327 }, { "epoch": 0.10737410543424122, "grad_norm": 0.40234375, "learning_rate": 2.9573635932729663e-05, "loss": 2.1722, "step": 3328 }, { "epoch": 0.10740636928803757, "grad_norm": 0.4140625, "learning_rate": 2.9573263304109103e-05, "loss": 2.1193, "step": 3329 }, { "epoch": 0.10743863314183391, "grad_norm": 0.404296875, "learning_rate": 2.957289051507652e-05, "loss": 2.1627, "step": 3330 }, { "epoch": 0.10747089699563027, "grad_norm": 0.431640625, "learning_rate": 2.957251756563601e-05, "loss": 2.1459, "step": 3331 }, { "epoch": 0.10750316084942661, "grad_norm": 0.416015625, "learning_rate": 2.9572144455791683e-05, "loss": 2.1473, "step": 3332 }, { "epoch": 0.10753542470322296, "grad_norm": 0.408203125, "learning_rate": 2.9571771185547644e-05, "loss": 2.151, "step": 3333 }, { "epoch": 0.1075676885570193, "grad_norm": 0.42578125, "learning_rate": 2.9571397754908e-05, "loss": 2.125, "step": 3334 }, { "epoch": 0.10759995241081564, "grad_norm": 0.4140625, "learning_rate": 2.957102416387686e-05, "loss": 2.1479, "step": 3335 }, { "epoch": 0.107632216264612, "grad_norm": 0.458984375, "learning_rate": 2.9570650412458345e-05, "loss": 2.1589, "step": 3336 }, { "epoch": 0.10766448011840835, "grad_norm": 0.5390625, "learning_rate": 2.957027650065656e-05, "loss": 2.1263, "step": 3337 }, { "epoch": 0.10769674397220469, "grad_norm": 0.59765625, "learning_rate": 2.9569902428475623e-05, "loss": 2.118, "step": 3338 }, { "epoch": 0.10772900782600103, "grad_norm": 0.62890625, "learning_rate": 2.9569528195919654e-05, "loss": 2.1423, "step": 3339 }, { "epoch": 0.10776127167979738, "grad_norm": 0.56640625, "learning_rate": 2.956915380299277e-05, "loss": 2.1486, "step": 3340 }, { "epoch": 0.10779353553359373, "grad_norm": 0.419921875, "learning_rate": 2.9568779249699092e-05, "loss": 2.1121, "step": 3341 }, { "epoch": 0.10782579938739008, "grad_norm": 0.4765625, "learning_rate": 2.9568404536042745e-05, "loss": 2.1501, "step": 3342 }, { "epoch": 0.10785806324118642, "grad_norm": 0.54296875, "learning_rate": 2.956802966202785e-05, "loss": 2.1377, "step": 3343 }, { "epoch": 0.10789032709498277, "grad_norm": 0.46875, "learning_rate": 2.956765462765854e-05, "loss": 2.1365, "step": 3344 }, { "epoch": 0.10792259094877911, "grad_norm": 0.4296875, "learning_rate": 2.9567279432938936e-05, "loss": 2.1323, "step": 3345 }, { "epoch": 0.10795485480257547, "grad_norm": 0.54296875, "learning_rate": 2.9566904077873174e-05, "loss": 2.1508, "step": 3346 }, { "epoch": 0.10798711865637181, "grad_norm": 0.625, "learning_rate": 2.9566528562465377e-05, "loss": 2.1362, "step": 3347 }, { "epoch": 0.10801938251016815, "grad_norm": 0.52734375, "learning_rate": 2.9566152886719687e-05, "loss": 2.1405, "step": 3348 }, { "epoch": 0.1080516463639645, "grad_norm": 0.404296875, "learning_rate": 2.9565777050640235e-05, "loss": 2.1197, "step": 3349 }, { "epoch": 0.10808391021776084, "grad_norm": 0.48046875, "learning_rate": 2.9565401054231155e-05, "loss": 2.1269, "step": 3350 }, { "epoch": 0.1081161740715572, "grad_norm": 0.47265625, "learning_rate": 2.9565024897496593e-05, "loss": 2.1315, "step": 3351 }, { "epoch": 0.10814843792535354, "grad_norm": 0.447265625, "learning_rate": 2.9564648580440684e-05, "loss": 2.1581, "step": 3352 }, { "epoch": 0.10818070177914989, "grad_norm": 0.515625, "learning_rate": 2.9564272103067573e-05, "loss": 2.1393, "step": 3353 }, { "epoch": 0.10821296563294623, "grad_norm": 0.451171875, "learning_rate": 2.956389546538141e-05, "loss": 2.1241, "step": 3354 }, { "epoch": 0.10824522948674258, "grad_norm": 0.447265625, "learning_rate": 2.956351866738632e-05, "loss": 2.1332, "step": 3355 }, { "epoch": 0.10827749334053893, "grad_norm": 0.431640625, "learning_rate": 2.9563141709086473e-05, "loss": 2.1357, "step": 3356 }, { "epoch": 0.10830975719433528, "grad_norm": 0.443359375, "learning_rate": 2.9562764590486006e-05, "loss": 2.1238, "step": 3357 }, { "epoch": 0.10834202104813162, "grad_norm": 0.40234375, "learning_rate": 2.9562387311589077e-05, "loss": 2.1175, "step": 3358 }, { "epoch": 0.10837428490192796, "grad_norm": 0.43359375, "learning_rate": 2.9562009872399833e-05, "loss": 2.104, "step": 3359 }, { "epoch": 0.10840654875572431, "grad_norm": 0.396484375, "learning_rate": 2.9561632272922434e-05, "loss": 2.1286, "step": 3360 }, { "epoch": 0.10843881260952067, "grad_norm": 0.38671875, "learning_rate": 2.9561254513161032e-05, "loss": 2.1306, "step": 3361 }, { "epoch": 0.10847107646331701, "grad_norm": 0.41015625, "learning_rate": 2.956087659311979e-05, "loss": 2.1542, "step": 3362 }, { "epoch": 0.10850334031711335, "grad_norm": 0.451171875, "learning_rate": 2.9560498512802857e-05, "loss": 2.1457, "step": 3363 }, { "epoch": 0.1085356041709097, "grad_norm": 0.46484375, "learning_rate": 2.9560120272214407e-05, "loss": 2.1256, "step": 3364 }, { "epoch": 0.10856786802470604, "grad_norm": 0.4375, "learning_rate": 2.9559741871358595e-05, "loss": 2.1411, "step": 3365 }, { "epoch": 0.1086001318785024, "grad_norm": 0.51953125, "learning_rate": 2.9559363310239595e-05, "loss": 2.1497, "step": 3366 }, { "epoch": 0.10863239573229874, "grad_norm": 0.52734375, "learning_rate": 2.9558984588861563e-05, "loss": 2.145, "step": 3367 }, { "epoch": 0.10866465958609509, "grad_norm": 0.447265625, "learning_rate": 2.9558605707228678e-05, "loss": 2.127, "step": 3368 }, { "epoch": 0.10869692343989143, "grad_norm": 0.44921875, "learning_rate": 2.9558226665345104e-05, "loss": 2.1353, "step": 3369 }, { "epoch": 0.10872918729368777, "grad_norm": 0.478515625, "learning_rate": 2.9557847463215013e-05, "loss": 2.141, "step": 3370 }, { "epoch": 0.10876145114748413, "grad_norm": 0.4609375, "learning_rate": 2.9557468100842578e-05, "loss": 2.1429, "step": 3371 }, { "epoch": 0.10879371500128047, "grad_norm": 0.431640625, "learning_rate": 2.9557088578231984e-05, "loss": 2.1353, "step": 3372 }, { "epoch": 0.10882597885507682, "grad_norm": 0.4140625, "learning_rate": 2.95567088953874e-05, "loss": 2.1313, "step": 3373 }, { "epoch": 0.10885824270887316, "grad_norm": 0.453125, "learning_rate": 2.955632905231301e-05, "loss": 2.1629, "step": 3374 }, { "epoch": 0.1088905065626695, "grad_norm": 0.50390625, "learning_rate": 2.9555949049012986e-05, "loss": 2.1236, "step": 3375 }, { "epoch": 0.10892277041646586, "grad_norm": 0.455078125, "learning_rate": 2.955556888549152e-05, "loss": 2.1521, "step": 3376 }, { "epoch": 0.10895503427026221, "grad_norm": 0.44140625, "learning_rate": 2.9555188561752794e-05, "loss": 2.1498, "step": 3377 }, { "epoch": 0.10898729812405855, "grad_norm": 0.46875, "learning_rate": 2.9554808077800995e-05, "loss": 2.1405, "step": 3378 }, { "epoch": 0.1090195619778549, "grad_norm": 0.482421875, "learning_rate": 2.9554427433640313e-05, "loss": 2.1302, "step": 3379 }, { "epoch": 0.10905182583165124, "grad_norm": 0.48046875, "learning_rate": 2.955404662927493e-05, "loss": 2.155, "step": 3380 }, { "epoch": 0.1090840896854476, "grad_norm": 0.484375, "learning_rate": 2.9553665664709043e-05, "loss": 2.1338, "step": 3381 }, { "epoch": 0.10911635353924394, "grad_norm": 0.52734375, "learning_rate": 2.9553284539946848e-05, "loss": 2.136, "step": 3382 }, { "epoch": 0.10914861739304028, "grad_norm": 0.498046875, "learning_rate": 2.9552903254992538e-05, "loss": 2.1584, "step": 3383 }, { "epoch": 0.10918088124683663, "grad_norm": 0.447265625, "learning_rate": 2.9552521809850304e-05, "loss": 2.1202, "step": 3384 }, { "epoch": 0.10921314510063297, "grad_norm": 0.451171875, "learning_rate": 2.9552140204524352e-05, "loss": 2.1211, "step": 3385 }, { "epoch": 0.10924540895442933, "grad_norm": 0.470703125, "learning_rate": 2.9551758439018887e-05, "loss": 2.1417, "step": 3386 }, { "epoch": 0.10927767280822567, "grad_norm": 0.47265625, "learning_rate": 2.9551376513338098e-05, "loss": 2.1318, "step": 3387 }, { "epoch": 0.10930993666202202, "grad_norm": 0.451171875, "learning_rate": 2.9550994427486194e-05, "loss": 2.1535, "step": 3388 }, { "epoch": 0.10934220051581836, "grad_norm": 0.46875, "learning_rate": 2.9550612181467387e-05, "loss": 2.162, "step": 3389 }, { "epoch": 0.1093744643696147, "grad_norm": 0.45703125, "learning_rate": 2.9550229775285875e-05, "loss": 2.1185, "step": 3390 }, { "epoch": 0.10940672822341106, "grad_norm": 0.44140625, "learning_rate": 2.9549847208945875e-05, "loss": 2.1592, "step": 3391 }, { "epoch": 0.1094389920772074, "grad_norm": 0.38671875, "learning_rate": 2.9549464482451593e-05, "loss": 2.1293, "step": 3392 }, { "epoch": 0.10947125593100375, "grad_norm": 0.4375, "learning_rate": 2.9549081595807247e-05, "loss": 2.144, "step": 3393 }, { "epoch": 0.1095035197848001, "grad_norm": 0.3671875, "learning_rate": 2.9548698549017048e-05, "loss": 2.1212, "step": 3394 }, { "epoch": 0.10953578363859644, "grad_norm": 0.421875, "learning_rate": 2.9548315342085207e-05, "loss": 2.1398, "step": 3395 }, { "epoch": 0.1095680474923928, "grad_norm": 0.416015625, "learning_rate": 2.9547931975015953e-05, "loss": 2.0956, "step": 3396 }, { "epoch": 0.10960031134618914, "grad_norm": 0.408203125, "learning_rate": 2.95475484478135e-05, "loss": 2.0687, "step": 3397 }, { "epoch": 0.10963257519998548, "grad_norm": 0.427734375, "learning_rate": 2.954716476048207e-05, "loss": 2.0835, "step": 3398 }, { "epoch": 0.10966483905378183, "grad_norm": 0.41015625, "learning_rate": 2.9546780913025882e-05, "loss": 2.0805, "step": 3399 }, { "epoch": 0.10969710290757817, "grad_norm": 0.375, "learning_rate": 2.9546396905449172e-05, "loss": 2.0609, "step": 3400 }, { "epoch": 0.10972936676137453, "grad_norm": 0.40234375, "learning_rate": 2.9546012737756154e-05, "loss": 2.0782, "step": 3401 }, { "epoch": 0.10976163061517087, "grad_norm": 0.396484375, "learning_rate": 2.9545628409951065e-05, "loss": 2.0932, "step": 3402 }, { "epoch": 0.10979389446896722, "grad_norm": 0.404296875, "learning_rate": 2.954524392203814e-05, "loss": 2.0787, "step": 3403 }, { "epoch": 0.10982615832276356, "grad_norm": 0.40625, "learning_rate": 2.9544859274021596e-05, "loss": 2.0628, "step": 3404 }, { "epoch": 0.1098584221765599, "grad_norm": 0.41015625, "learning_rate": 2.9544474465905682e-05, "loss": 2.0214, "step": 3405 }, { "epoch": 0.10989068603035625, "grad_norm": 0.41796875, "learning_rate": 2.9544089497694622e-05, "loss": 2.0636, "step": 3406 }, { "epoch": 0.1099229498841526, "grad_norm": 0.40625, "learning_rate": 2.9543704369392665e-05, "loss": 2.0538, "step": 3407 }, { "epoch": 0.10995521373794895, "grad_norm": 0.427734375, "learning_rate": 2.9543319081004035e-05, "loss": 2.091, "step": 3408 }, { "epoch": 0.10998747759174529, "grad_norm": 0.43359375, "learning_rate": 2.954293363253299e-05, "loss": 2.0786, "step": 3409 }, { "epoch": 0.11001974144554164, "grad_norm": 0.4375, "learning_rate": 2.9542548023983757e-05, "loss": 2.0788, "step": 3410 }, { "epoch": 0.11005200529933798, "grad_norm": 0.4296875, "learning_rate": 2.9542162255360595e-05, "loss": 2.0743, "step": 3411 }, { "epoch": 0.11008426915313434, "grad_norm": 0.453125, "learning_rate": 2.9541776326667743e-05, "loss": 2.085, "step": 3412 }, { "epoch": 0.11011653300693068, "grad_norm": 0.462890625, "learning_rate": 2.9541390237909445e-05, "loss": 2.0625, "step": 3413 }, { "epoch": 0.11014879686072702, "grad_norm": 0.55859375, "learning_rate": 2.9541003989089956e-05, "loss": 2.0537, "step": 3414 }, { "epoch": 0.11018106071452337, "grad_norm": 0.8671875, "learning_rate": 2.9540617580213526e-05, "loss": 2.0562, "step": 3415 }, { "epoch": 0.11021332456831971, "grad_norm": 1.3828125, "learning_rate": 2.9540231011284414e-05, "loss": 2.0454, "step": 3416 }, { "epoch": 0.11024558842211607, "grad_norm": 0.44140625, "learning_rate": 2.9539844282306867e-05, "loss": 2.095, "step": 3417 }, { "epoch": 0.11027785227591241, "grad_norm": 1.015625, "learning_rate": 2.9539457393285146e-05, "loss": 2.0727, "step": 3418 }, { "epoch": 0.11031011612970876, "grad_norm": 0.87109375, "learning_rate": 2.9539070344223508e-05, "loss": 2.0577, "step": 3419 }, { "epoch": 0.1103423799835051, "grad_norm": 0.53515625, "learning_rate": 2.9538683135126213e-05, "loss": 2.0996, "step": 3420 }, { "epoch": 0.11037464383730144, "grad_norm": 0.859375, "learning_rate": 2.9538295765997527e-05, "loss": 2.0556, "step": 3421 }, { "epoch": 0.1104069076910978, "grad_norm": 0.466796875, "learning_rate": 2.9537908236841707e-05, "loss": 2.0484, "step": 3422 }, { "epoch": 0.11043917154489415, "grad_norm": 0.66796875, "learning_rate": 2.9537520547663028e-05, "loss": 2.0589, "step": 3423 }, { "epoch": 0.11047143539869049, "grad_norm": 0.5078125, "learning_rate": 2.9537132698465746e-05, "loss": 2.0792, "step": 3424 }, { "epoch": 0.11050369925248683, "grad_norm": 0.609375, "learning_rate": 2.953674468925414e-05, "loss": 2.0725, "step": 3425 }, { "epoch": 0.11053596310628318, "grad_norm": 0.48828125, "learning_rate": 2.9536356520032473e-05, "loss": 2.1269, "step": 3426 }, { "epoch": 0.11056822696007954, "grad_norm": 0.5625, "learning_rate": 2.9535968190805026e-05, "loss": 2.0786, "step": 3427 }, { "epoch": 0.11060049081387588, "grad_norm": 0.494140625, "learning_rate": 2.9535579701576067e-05, "loss": 2.0727, "step": 3428 }, { "epoch": 0.11063275466767222, "grad_norm": 0.45703125, "learning_rate": 2.9535191052349875e-05, "loss": 2.0722, "step": 3429 }, { "epoch": 0.11066501852146857, "grad_norm": 0.435546875, "learning_rate": 2.9534802243130727e-05, "loss": 2.0998, "step": 3430 }, { "epoch": 0.11069728237526491, "grad_norm": 0.41796875, "learning_rate": 2.9534413273922905e-05, "loss": 2.0603, "step": 3431 }, { "epoch": 0.11072954622906127, "grad_norm": 0.45703125, "learning_rate": 2.9534024144730688e-05, "loss": 2.1133, "step": 3432 }, { "epoch": 0.11076181008285761, "grad_norm": 0.435546875, "learning_rate": 2.953363485555836e-05, "loss": 2.0921, "step": 3433 }, { "epoch": 0.11079407393665396, "grad_norm": 0.462890625, "learning_rate": 2.9533245406410206e-05, "loss": 2.0934, "step": 3434 }, { "epoch": 0.1108263377904503, "grad_norm": 0.421875, "learning_rate": 2.953285579729051e-05, "loss": 2.069, "step": 3435 }, { "epoch": 0.11085860164424664, "grad_norm": 0.486328125, "learning_rate": 2.9532466028203567e-05, "loss": 2.0502, "step": 3436 }, { "epoch": 0.110890865498043, "grad_norm": 0.447265625, "learning_rate": 2.953207609915366e-05, "loss": 2.0681, "step": 3437 }, { "epoch": 0.11092312935183934, "grad_norm": 0.41796875, "learning_rate": 2.9531686010145082e-05, "loss": 2.0731, "step": 3438 }, { "epoch": 0.11095539320563569, "grad_norm": 0.43359375, "learning_rate": 2.953129576118214e-05, "loss": 2.0723, "step": 3439 }, { "epoch": 0.11098765705943203, "grad_norm": 0.439453125, "learning_rate": 2.9530905352269106e-05, "loss": 2.08, "step": 3440 }, { "epoch": 0.11101992091322838, "grad_norm": 0.419921875, "learning_rate": 2.95305147834103e-05, "loss": 2.1008, "step": 3441 }, { "epoch": 0.11105218476702473, "grad_norm": 0.412109375, "learning_rate": 2.9530124054610006e-05, "loss": 2.0435, "step": 3442 }, { "epoch": 0.11108444862082108, "grad_norm": 0.4453125, "learning_rate": 2.952973316587253e-05, "loss": 2.098, "step": 3443 }, { "epoch": 0.11111671247461742, "grad_norm": 0.427734375, "learning_rate": 2.952934211720218e-05, "loss": 2.0842, "step": 3444 }, { "epoch": 0.11114897632841376, "grad_norm": 0.41796875, "learning_rate": 2.952895090860325e-05, "loss": 2.0945, "step": 3445 }, { "epoch": 0.11118124018221011, "grad_norm": 0.482421875, "learning_rate": 2.9528559540080057e-05, "loss": 2.0532, "step": 3446 }, { "epoch": 0.11121350403600647, "grad_norm": 0.44921875, "learning_rate": 2.9528168011636903e-05, "loss": 2.0757, "step": 3447 }, { "epoch": 0.11124576788980281, "grad_norm": 0.462890625, "learning_rate": 2.9527776323278096e-05, "loss": 2.0392, "step": 3448 }, { "epoch": 0.11127803174359915, "grad_norm": 0.431640625, "learning_rate": 2.9527384475007943e-05, "loss": 2.0702, "step": 3449 }, { "epoch": 0.1113102955973955, "grad_norm": 0.453125, "learning_rate": 2.9526992466830772e-05, "loss": 2.0638, "step": 3450 }, { "epoch": 0.11134255945119184, "grad_norm": 0.41015625, "learning_rate": 2.9526600298750888e-05, "loss": 2.0581, "step": 3451 }, { "epoch": 0.1113748233049882, "grad_norm": 0.50390625, "learning_rate": 2.9526207970772607e-05, "loss": 2.0724, "step": 3452 }, { "epoch": 0.11140708715878454, "grad_norm": 0.455078125, "learning_rate": 2.952581548290025e-05, "loss": 2.0686, "step": 3453 }, { "epoch": 0.11143935101258089, "grad_norm": 0.431640625, "learning_rate": 2.9525422835138135e-05, "loss": 2.086, "step": 3454 }, { "epoch": 0.11147161486637723, "grad_norm": 0.515625, "learning_rate": 2.9525030027490595e-05, "loss": 2.0866, "step": 3455 }, { "epoch": 0.11150387872017357, "grad_norm": 0.474609375, "learning_rate": 2.9524637059961936e-05, "loss": 2.0858, "step": 3456 }, { "epoch": 0.11153614257396993, "grad_norm": 0.451171875, "learning_rate": 2.9524243932556494e-05, "loss": 2.1081, "step": 3457 }, { "epoch": 0.11156840642776628, "grad_norm": 0.48046875, "learning_rate": 2.9523850645278597e-05, "loss": 2.0666, "step": 3458 }, { "epoch": 0.11160067028156262, "grad_norm": 0.45703125, "learning_rate": 2.9523457198132565e-05, "loss": 2.1002, "step": 3459 }, { "epoch": 0.11163293413535896, "grad_norm": 0.45703125, "learning_rate": 2.952306359112274e-05, "loss": 2.0683, "step": 3460 }, { "epoch": 0.11166519798915531, "grad_norm": 0.46875, "learning_rate": 2.952266982425345e-05, "loss": 2.0867, "step": 3461 }, { "epoch": 0.11169746184295166, "grad_norm": 0.494140625, "learning_rate": 2.952227589752903e-05, "loss": 2.0639, "step": 3462 }, { "epoch": 0.11172972569674801, "grad_norm": 0.41015625, "learning_rate": 2.9521881810953813e-05, "loss": 2.0673, "step": 3463 }, { "epoch": 0.11176198955054435, "grad_norm": 0.48828125, "learning_rate": 2.952148756453214e-05, "loss": 2.0597, "step": 3464 }, { "epoch": 0.1117942534043407, "grad_norm": 0.498046875, "learning_rate": 2.952109315826835e-05, "loss": 2.091, "step": 3465 }, { "epoch": 0.11182651725813704, "grad_norm": 0.423828125, "learning_rate": 2.9520698592166783e-05, "loss": 2.0806, "step": 3466 }, { "epoch": 0.1118587811119334, "grad_norm": 0.48828125, "learning_rate": 2.9520303866231783e-05, "loss": 2.086, "step": 3467 }, { "epoch": 0.11189104496572974, "grad_norm": 0.45703125, "learning_rate": 2.9519908980467694e-05, "loss": 2.071, "step": 3468 }, { "epoch": 0.11192330881952609, "grad_norm": 0.48046875, "learning_rate": 2.9519513934878866e-05, "loss": 2.0923, "step": 3469 }, { "epoch": 0.11195557267332243, "grad_norm": 0.4765625, "learning_rate": 2.9519118729469642e-05, "loss": 2.0624, "step": 3470 }, { "epoch": 0.11198783652711877, "grad_norm": 0.419921875, "learning_rate": 2.9518723364244376e-05, "loss": 2.1103, "step": 3471 }, { "epoch": 0.11202010038091513, "grad_norm": 0.46484375, "learning_rate": 2.9518327839207422e-05, "loss": 2.0812, "step": 3472 }, { "epoch": 0.11205236423471147, "grad_norm": 0.43359375, "learning_rate": 2.9517932154363127e-05, "loss": 2.0779, "step": 3473 }, { "epoch": 0.11208462808850782, "grad_norm": 0.44140625, "learning_rate": 2.951753630971585e-05, "loss": 2.0769, "step": 3474 }, { "epoch": 0.11211689194230416, "grad_norm": 0.490234375, "learning_rate": 2.951714030526995e-05, "loss": 2.0987, "step": 3475 }, { "epoch": 0.1121491557961005, "grad_norm": 0.4609375, "learning_rate": 2.9516744141029785e-05, "loss": 2.0714, "step": 3476 }, { "epoch": 0.11218141964989686, "grad_norm": 0.462890625, "learning_rate": 2.951634781699971e-05, "loss": 2.049, "step": 3477 }, { "epoch": 0.1122136835036932, "grad_norm": 0.412109375, "learning_rate": 2.9515951333184092e-05, "loss": 2.095, "step": 3478 }, { "epoch": 0.11224594735748955, "grad_norm": 0.4375, "learning_rate": 2.95155546895873e-05, "loss": 2.0943, "step": 3479 }, { "epoch": 0.1122782112112859, "grad_norm": 0.41796875, "learning_rate": 2.9515157886213694e-05, "loss": 2.0628, "step": 3480 }, { "epoch": 0.11231047506508224, "grad_norm": 0.4375, "learning_rate": 2.9514760923067644e-05, "loss": 2.0734, "step": 3481 }, { "epoch": 0.1123427389188786, "grad_norm": 0.443359375, "learning_rate": 2.9514363800153518e-05, "loss": 2.0426, "step": 3482 }, { "epoch": 0.11237500277267494, "grad_norm": 0.408203125, "learning_rate": 2.9513966517475686e-05, "loss": 2.0458, "step": 3483 }, { "epoch": 0.11240726662647128, "grad_norm": 0.40625, "learning_rate": 2.9513569075038523e-05, "loss": 2.0494, "step": 3484 }, { "epoch": 0.11243953048026763, "grad_norm": 0.43359375, "learning_rate": 2.9513171472846407e-05, "loss": 2.0542, "step": 3485 }, { "epoch": 0.11247179433406397, "grad_norm": 0.421875, "learning_rate": 2.9512773710903707e-05, "loss": 2.1221, "step": 3486 }, { "epoch": 0.11250405818786033, "grad_norm": 0.5078125, "learning_rate": 2.9512375789214807e-05, "loss": 2.1263, "step": 3487 }, { "epoch": 0.11253632204165667, "grad_norm": 0.5546875, "learning_rate": 2.9511977707784087e-05, "loss": 2.1363, "step": 3488 }, { "epoch": 0.11256858589545302, "grad_norm": 0.63671875, "learning_rate": 2.9511579466615925e-05, "loss": 2.1504, "step": 3489 }, { "epoch": 0.11260084974924936, "grad_norm": 0.5859375, "learning_rate": 2.951118106571471e-05, "loss": 2.1317, "step": 3490 }, { "epoch": 0.1126331136030457, "grad_norm": 0.51953125, "learning_rate": 2.951078250508482e-05, "loss": 2.1194, "step": 3491 }, { "epoch": 0.11266537745684205, "grad_norm": 0.431640625, "learning_rate": 2.951038378473065e-05, "loss": 2.1172, "step": 3492 }, { "epoch": 0.1126976413106384, "grad_norm": 0.55078125, "learning_rate": 2.9509984904656578e-05, "loss": 2.1103, "step": 3493 }, { "epoch": 0.11272990516443475, "grad_norm": 0.455078125, "learning_rate": 2.9509585864867008e-05, "loss": 2.1169, "step": 3494 }, { "epoch": 0.11276216901823109, "grad_norm": 0.498046875, "learning_rate": 2.9509186665366325e-05, "loss": 2.1177, "step": 3495 }, { "epoch": 0.11279443287202744, "grad_norm": 0.53125, "learning_rate": 2.9508787306158926e-05, "loss": 2.1396, "step": 3496 }, { "epoch": 0.11282669672582378, "grad_norm": 0.5, "learning_rate": 2.95083877872492e-05, "loss": 2.1413, "step": 3497 }, { "epoch": 0.11285896057962014, "grad_norm": 0.439453125, "learning_rate": 2.9507988108641554e-05, "loss": 2.1383, "step": 3498 }, { "epoch": 0.11289122443341648, "grad_norm": 0.443359375, "learning_rate": 2.9507588270340383e-05, "loss": 2.1049, "step": 3499 }, { "epoch": 0.11292348828721283, "grad_norm": 0.53125, "learning_rate": 2.9507188272350086e-05, "loss": 2.1258, "step": 3500 }, { "epoch": 0.11295575214100917, "grad_norm": 0.44921875, "learning_rate": 2.9506788114675065e-05, "loss": 2.13, "step": 3501 }, { "epoch": 0.11298801599480551, "grad_norm": 0.45703125, "learning_rate": 2.950638779731973e-05, "loss": 2.1289, "step": 3502 }, { "epoch": 0.11302027984860187, "grad_norm": 0.419921875, "learning_rate": 2.9505987320288487e-05, "loss": 2.1516, "step": 3503 }, { "epoch": 0.11305254370239821, "grad_norm": 0.431640625, "learning_rate": 2.9505586683585742e-05, "loss": 2.1531, "step": 3504 }, { "epoch": 0.11308480755619456, "grad_norm": 0.455078125, "learning_rate": 2.95051858872159e-05, "loss": 2.1177, "step": 3505 }, { "epoch": 0.1131170714099909, "grad_norm": 0.42578125, "learning_rate": 2.9504784931183382e-05, "loss": 2.1313, "step": 3506 }, { "epoch": 0.11314933526378725, "grad_norm": 0.42578125, "learning_rate": 2.9504383815492596e-05, "loss": 2.1265, "step": 3507 }, { "epoch": 0.1131815991175836, "grad_norm": 0.416015625, "learning_rate": 2.9503982540147962e-05, "loss": 2.1296, "step": 3508 }, { "epoch": 0.11321386297137995, "grad_norm": 0.41015625, "learning_rate": 2.950358110515389e-05, "loss": 2.1182, "step": 3509 }, { "epoch": 0.11324612682517629, "grad_norm": 0.421875, "learning_rate": 2.9503179510514802e-05, "loss": 2.1495, "step": 3510 }, { "epoch": 0.11327839067897263, "grad_norm": 0.3984375, "learning_rate": 2.9502777756235117e-05, "loss": 2.092, "step": 3511 }, { "epoch": 0.11331065453276898, "grad_norm": 0.4609375, "learning_rate": 2.9502375842319262e-05, "loss": 2.1168, "step": 3512 }, { "epoch": 0.11334291838656534, "grad_norm": 0.53125, "learning_rate": 2.9501973768771652e-05, "loss": 2.1685, "step": 3513 }, { "epoch": 0.11337518224036168, "grad_norm": 0.458984375, "learning_rate": 2.9501571535596727e-05, "loss": 2.1462, "step": 3514 }, { "epoch": 0.11340744609415802, "grad_norm": 0.45703125, "learning_rate": 2.95011691427989e-05, "loss": 2.1277, "step": 3515 }, { "epoch": 0.11343970994795437, "grad_norm": 0.380859375, "learning_rate": 2.950076659038261e-05, "loss": 2.1543, "step": 3516 }, { "epoch": 0.11347197380175071, "grad_norm": 0.435546875, "learning_rate": 2.950036387835228e-05, "loss": 2.1086, "step": 3517 }, { "epoch": 0.11350423765554707, "grad_norm": 0.5625, "learning_rate": 2.949996100671235e-05, "loss": 2.1281, "step": 3518 }, { "epoch": 0.11353650150934341, "grad_norm": 0.57421875, "learning_rate": 2.9499557975467246e-05, "loss": 2.1376, "step": 3519 }, { "epoch": 0.11356876536313976, "grad_norm": 0.48828125, "learning_rate": 2.9499154784621416e-05, "loss": 2.1293, "step": 3520 }, { "epoch": 0.1136010292169361, "grad_norm": 0.39453125, "learning_rate": 2.949875143417929e-05, "loss": 2.1491, "step": 3521 }, { "epoch": 0.11363329307073244, "grad_norm": 0.51953125, "learning_rate": 2.949834792414531e-05, "loss": 2.1501, "step": 3522 }, { "epoch": 0.1136655569245288, "grad_norm": 0.6015625, "learning_rate": 2.9497944254523915e-05, "loss": 2.1379, "step": 3523 }, { "epoch": 0.11369782077832515, "grad_norm": 0.5625, "learning_rate": 2.9497540425319552e-05, "loss": 2.1317, "step": 3524 }, { "epoch": 0.11373008463212149, "grad_norm": 0.408203125, "learning_rate": 2.9497136436536665e-05, "loss": 2.1411, "step": 3525 }, { "epoch": 0.11376234848591783, "grad_norm": 0.46484375, "learning_rate": 2.9496732288179697e-05, "loss": 2.1428, "step": 3526 }, { "epoch": 0.11379461233971418, "grad_norm": 0.5078125, "learning_rate": 2.9496327980253105e-05, "loss": 2.1525, "step": 3527 }, { "epoch": 0.11382687619351053, "grad_norm": 0.42578125, "learning_rate": 2.949592351276133e-05, "loss": 2.1199, "step": 3528 }, { "epoch": 0.11385914004730688, "grad_norm": 0.4609375, "learning_rate": 2.949551888570883e-05, "loss": 2.1536, "step": 3529 }, { "epoch": 0.11389140390110322, "grad_norm": 0.462890625, "learning_rate": 2.949511409910006e-05, "loss": 2.1583, "step": 3530 }, { "epoch": 0.11392366775489957, "grad_norm": 0.41015625, "learning_rate": 2.9494709152939468e-05, "loss": 2.1357, "step": 3531 }, { "epoch": 0.11395593160869591, "grad_norm": 0.5390625, "learning_rate": 2.9494304047231517e-05, "loss": 2.0997, "step": 3532 }, { "epoch": 0.11398819546249227, "grad_norm": 0.47265625, "learning_rate": 2.9493898781980668e-05, "loss": 2.1337, "step": 3533 }, { "epoch": 0.11402045931628861, "grad_norm": 0.43359375, "learning_rate": 2.9493493357191377e-05, "loss": 2.1203, "step": 3534 }, { "epoch": 0.11405272317008495, "grad_norm": 0.546875, "learning_rate": 2.949308777286811e-05, "loss": 2.1333, "step": 3535 }, { "epoch": 0.1140849870238813, "grad_norm": 0.4921875, "learning_rate": 2.9492682029015328e-05, "loss": 2.1453, "step": 3536 }, { "epoch": 0.11411725087767764, "grad_norm": 0.4375, "learning_rate": 2.94922761256375e-05, "loss": 2.1448, "step": 3537 }, { "epoch": 0.114149514731474, "grad_norm": 0.5390625, "learning_rate": 2.9491870062739093e-05, "loss": 2.1463, "step": 3538 }, { "epoch": 0.11418177858527034, "grad_norm": 0.4765625, "learning_rate": 2.949146384032458e-05, "loss": 2.1243, "step": 3539 }, { "epoch": 0.11421404243906669, "grad_norm": 0.412109375, "learning_rate": 2.9491057458398424e-05, "loss": 2.1206, "step": 3540 }, { "epoch": 0.11424630629286303, "grad_norm": 0.44140625, "learning_rate": 2.9490650916965104e-05, "loss": 2.1349, "step": 3541 }, { "epoch": 0.11427857014665938, "grad_norm": 0.443359375, "learning_rate": 2.9490244216029097e-05, "loss": 2.1508, "step": 3542 }, { "epoch": 0.11431083400045573, "grad_norm": 0.41015625, "learning_rate": 2.9489837355594875e-05, "loss": 2.1597, "step": 3543 }, { "epoch": 0.11434309785425208, "grad_norm": 0.392578125, "learning_rate": 2.9489430335666917e-05, "loss": 2.1043, "step": 3544 }, { "epoch": 0.11437536170804842, "grad_norm": 0.447265625, "learning_rate": 2.948902315624971e-05, "loss": 2.1337, "step": 3545 }, { "epoch": 0.11440762556184476, "grad_norm": 0.423828125, "learning_rate": 2.9488615817347727e-05, "loss": 2.1397, "step": 3546 }, { "epoch": 0.11443988941564111, "grad_norm": 0.423828125, "learning_rate": 2.9488208318965454e-05, "loss": 2.1488, "step": 3547 }, { "epoch": 0.11447215326943747, "grad_norm": 0.435546875, "learning_rate": 2.948780066110738e-05, "loss": 2.155, "step": 3548 }, { "epoch": 0.11450441712323381, "grad_norm": 0.43359375, "learning_rate": 2.9487392843777992e-05, "loss": 2.1307, "step": 3549 }, { "epoch": 0.11453668097703015, "grad_norm": 0.41796875, "learning_rate": 2.9486984866981773e-05, "loss": 2.143, "step": 3550 }, { "epoch": 0.1145689448308265, "grad_norm": 0.453125, "learning_rate": 2.9486576730723215e-05, "loss": 2.125, "step": 3551 }, { "epoch": 0.11460120868462284, "grad_norm": 0.5390625, "learning_rate": 2.948616843500682e-05, "loss": 2.1593, "step": 3552 }, { "epoch": 0.1146334725384192, "grad_norm": 0.4296875, "learning_rate": 2.948575997983707e-05, "loss": 2.1592, "step": 3553 }, { "epoch": 0.11466573639221554, "grad_norm": 0.4453125, "learning_rate": 2.948535136521847e-05, "loss": 2.1307, "step": 3554 }, { "epoch": 0.11469800024601189, "grad_norm": 0.44921875, "learning_rate": 2.948494259115551e-05, "loss": 2.1464, "step": 3555 }, { "epoch": 0.11473026409980823, "grad_norm": 0.439453125, "learning_rate": 2.9484533657652693e-05, "loss": 2.1271, "step": 3556 }, { "epoch": 0.11476252795360457, "grad_norm": 0.451171875, "learning_rate": 2.948412456471452e-05, "loss": 2.1476, "step": 3557 }, { "epoch": 0.11479479180740093, "grad_norm": 0.51953125, "learning_rate": 2.94837153123455e-05, "loss": 2.139, "step": 3558 }, { "epoch": 0.11482705566119727, "grad_norm": 0.486328125, "learning_rate": 2.948330590055013e-05, "loss": 2.1055, "step": 3559 }, { "epoch": 0.11485931951499362, "grad_norm": 0.65625, "learning_rate": 2.948289632933292e-05, "loss": 2.1438, "step": 3560 }, { "epoch": 0.11489158336878996, "grad_norm": 0.7265625, "learning_rate": 2.9482486598698374e-05, "loss": 2.1487, "step": 3561 }, { "epoch": 0.1149238472225863, "grad_norm": 0.671875, "learning_rate": 2.9482076708651007e-05, "loss": 2.1225, "step": 3562 }, { "epoch": 0.11495611107638266, "grad_norm": 0.470703125, "learning_rate": 2.948166665919533e-05, "loss": 2.1218, "step": 3563 }, { "epoch": 0.11498837493017901, "grad_norm": 0.546875, "learning_rate": 2.9481256450335852e-05, "loss": 2.149, "step": 3564 }, { "epoch": 0.11502063878397535, "grad_norm": 0.51171875, "learning_rate": 2.9480846082077096e-05, "loss": 2.1306, "step": 3565 }, { "epoch": 0.1150529026377717, "grad_norm": 0.466796875, "learning_rate": 2.948043555442357e-05, "loss": 2.0329, "step": 3566 }, { "epoch": 0.11508516649156804, "grad_norm": 0.48046875, "learning_rate": 2.9480024867379805e-05, "loss": 2.1193, "step": 3567 }, { "epoch": 0.1151174303453644, "grad_norm": 0.45703125, "learning_rate": 2.947961402095031e-05, "loss": 2.1329, "step": 3568 }, { "epoch": 0.11514969419916074, "grad_norm": 0.455078125, "learning_rate": 2.947920301513961e-05, "loss": 2.139, "step": 3569 }, { "epoch": 0.11518195805295708, "grad_norm": 0.44140625, "learning_rate": 2.9478791849952235e-05, "loss": 2.1239, "step": 3570 }, { "epoch": 0.11521422190675343, "grad_norm": 0.388671875, "learning_rate": 2.94783805253927e-05, "loss": 2.1284, "step": 3571 }, { "epoch": 0.11524648576054977, "grad_norm": 0.451171875, "learning_rate": 2.9477969041465542e-05, "loss": 2.1454, "step": 3572 }, { "epoch": 0.11527874961434613, "grad_norm": 0.439453125, "learning_rate": 2.9477557398175287e-05, "loss": 2.172, "step": 3573 }, { "epoch": 0.11531101346814247, "grad_norm": 0.44140625, "learning_rate": 2.9477145595526465e-05, "loss": 2.158, "step": 3574 }, { "epoch": 0.11534327732193882, "grad_norm": 0.447265625, "learning_rate": 2.9476733633523614e-05, "loss": 2.1196, "step": 3575 }, { "epoch": 0.11537554117573516, "grad_norm": 0.51171875, "learning_rate": 2.9476321512171264e-05, "loss": 2.1289, "step": 3576 }, { "epoch": 0.1154078050295315, "grad_norm": 0.44140625, "learning_rate": 2.947590923147395e-05, "loss": 2.1061, "step": 3577 }, { "epoch": 0.11544006888332786, "grad_norm": 0.44921875, "learning_rate": 2.9475496791436214e-05, "loss": 2.0906, "step": 3578 }, { "epoch": 0.1154723327371242, "grad_norm": 0.451171875, "learning_rate": 2.9475084192062594e-05, "loss": 2.113, "step": 3579 }, { "epoch": 0.11550459659092055, "grad_norm": 0.46875, "learning_rate": 2.947467143335763e-05, "loss": 2.1325, "step": 3580 }, { "epoch": 0.1155368604447169, "grad_norm": 0.478515625, "learning_rate": 2.9474258515325865e-05, "loss": 2.1301, "step": 3581 }, { "epoch": 0.11556912429851324, "grad_norm": 0.431640625, "learning_rate": 2.947384543797185e-05, "loss": 2.1389, "step": 3582 }, { "epoch": 0.11560138815230958, "grad_norm": 0.5, "learning_rate": 2.9473432201300127e-05, "loss": 2.1374, "step": 3583 }, { "epoch": 0.11563365200610594, "grad_norm": 0.49609375, "learning_rate": 2.9473018805315243e-05, "loss": 2.1622, "step": 3584 }, { "epoch": 0.11566591585990228, "grad_norm": 0.44921875, "learning_rate": 2.9472605250021754e-05, "loss": 2.1107, "step": 3585 }, { "epoch": 0.11569817971369863, "grad_norm": 0.40234375, "learning_rate": 2.9472191535424208e-05, "loss": 2.0857, "step": 3586 }, { "epoch": 0.11573044356749497, "grad_norm": 0.48828125, "learning_rate": 2.947177766152716e-05, "loss": 2.1143, "step": 3587 }, { "epoch": 0.11576270742129131, "grad_norm": 0.5078125, "learning_rate": 2.9471363628335162e-05, "loss": 2.134, "step": 3588 }, { "epoch": 0.11579497127508767, "grad_norm": 0.470703125, "learning_rate": 2.947094943585278e-05, "loss": 2.1465, "step": 3589 }, { "epoch": 0.11582723512888402, "grad_norm": 0.431640625, "learning_rate": 2.9470535084084565e-05, "loss": 2.1433, "step": 3590 }, { "epoch": 0.11585949898268036, "grad_norm": 0.431640625, "learning_rate": 2.947012057303508e-05, "loss": 2.1197, "step": 3591 }, { "epoch": 0.1158917628364767, "grad_norm": 0.458984375, "learning_rate": 2.946970590270889e-05, "loss": 2.1203, "step": 3592 }, { "epoch": 0.11592402669027305, "grad_norm": 0.474609375, "learning_rate": 2.946929107311056e-05, "loss": 2.134, "step": 3593 }, { "epoch": 0.1159562905440694, "grad_norm": 0.380859375, "learning_rate": 2.9468876084244656e-05, "loss": 2.1256, "step": 3594 }, { "epoch": 0.11598855439786575, "grad_norm": 0.50390625, "learning_rate": 2.9468460936115743e-05, "loss": 2.1178, "step": 3595 }, { "epoch": 0.11602081825166209, "grad_norm": 0.546875, "learning_rate": 2.946804562872839e-05, "loss": 2.1542, "step": 3596 }, { "epoch": 0.11605308210545844, "grad_norm": 0.5546875, "learning_rate": 2.9467630162087165e-05, "loss": 2.1353, "step": 3597 }, { "epoch": 0.11608534595925478, "grad_norm": 0.486328125, "learning_rate": 2.946721453619665e-05, "loss": 2.1328, "step": 3598 }, { "epoch": 0.11611760981305114, "grad_norm": 0.4609375, "learning_rate": 2.946679875106142e-05, "loss": 2.1033, "step": 3599 }, { "epoch": 0.11614987366684748, "grad_norm": 0.408203125, "learning_rate": 2.9466382806686044e-05, "loss": 2.1643, "step": 3600 }, { "epoch": 0.11618213752064382, "grad_norm": 0.515625, "learning_rate": 2.9465966703075103e-05, "loss": 2.1312, "step": 3601 }, { "epoch": 0.11621440137444017, "grad_norm": 0.5078125, "learning_rate": 2.9465550440233178e-05, "loss": 2.1283, "step": 3602 }, { "epoch": 0.11624666522823651, "grad_norm": 0.455078125, "learning_rate": 2.9465134018164855e-05, "loss": 2.0863, "step": 3603 }, { "epoch": 0.11627892908203287, "grad_norm": 0.431640625, "learning_rate": 2.946471743687471e-05, "loss": 2.1155, "step": 3604 }, { "epoch": 0.11631119293582921, "grad_norm": 0.484375, "learning_rate": 2.946430069636734e-05, "loss": 2.118, "step": 3605 }, { "epoch": 0.11634345678962556, "grad_norm": 0.443359375, "learning_rate": 2.9463883796647315e-05, "loss": 2.1298, "step": 3606 }, { "epoch": 0.1163757206434219, "grad_norm": 0.384765625, "learning_rate": 2.946346673771924e-05, "loss": 2.1173, "step": 3607 }, { "epoch": 0.11640798449721824, "grad_norm": 0.412109375, "learning_rate": 2.9463049519587696e-05, "loss": 2.1397, "step": 3608 }, { "epoch": 0.1164402483510146, "grad_norm": 0.4140625, "learning_rate": 2.946263214225728e-05, "loss": 2.1417, "step": 3609 }, { "epoch": 0.11647251220481095, "grad_norm": 0.392578125, "learning_rate": 2.9462214605732585e-05, "loss": 2.1149, "step": 3610 }, { "epoch": 0.11650477605860729, "grad_norm": 0.404296875, "learning_rate": 2.9461796910018204e-05, "loss": 2.1372, "step": 3611 }, { "epoch": 0.11653703991240363, "grad_norm": 0.43359375, "learning_rate": 2.946137905511874e-05, "loss": 2.1327, "step": 3612 }, { "epoch": 0.11656930376619998, "grad_norm": 0.38671875, "learning_rate": 2.9460961041038788e-05, "loss": 2.123, "step": 3613 }, { "epoch": 0.11660156761999634, "grad_norm": 0.4296875, "learning_rate": 2.9460542867782952e-05, "loss": 2.1192, "step": 3614 }, { "epoch": 0.11663383147379268, "grad_norm": 0.46875, "learning_rate": 2.946012453535584e-05, "loss": 2.1146, "step": 3615 }, { "epoch": 0.11666609532758902, "grad_norm": 0.400390625, "learning_rate": 2.9459706043762044e-05, "loss": 2.1081, "step": 3616 }, { "epoch": 0.11669835918138537, "grad_norm": 0.412109375, "learning_rate": 2.945928739300618e-05, "loss": 2.1301, "step": 3617 }, { "epoch": 0.11673062303518171, "grad_norm": 0.451171875, "learning_rate": 2.9458868583092852e-05, "loss": 2.1036, "step": 3618 }, { "epoch": 0.11676288688897807, "grad_norm": 0.41796875, "learning_rate": 2.945844961402667e-05, "loss": 2.1476, "step": 3619 }, { "epoch": 0.11679515074277441, "grad_norm": 0.39453125, "learning_rate": 2.9458030485812247e-05, "loss": 2.1163, "step": 3620 }, { "epoch": 0.11682741459657076, "grad_norm": 0.40234375, "learning_rate": 2.9457611198454203e-05, "loss": 2.1168, "step": 3621 }, { "epoch": 0.1168596784503671, "grad_norm": 0.3828125, "learning_rate": 2.945719175195714e-05, "loss": 2.1397, "step": 3622 }, { "epoch": 0.11689194230416344, "grad_norm": 0.388671875, "learning_rate": 2.9456772146325683e-05, "loss": 2.1227, "step": 3623 }, { "epoch": 0.1169242061579598, "grad_norm": 0.40234375, "learning_rate": 2.945635238156445e-05, "loss": 2.1291, "step": 3624 }, { "epoch": 0.11695647001175614, "grad_norm": 0.41796875, "learning_rate": 2.945593245767806e-05, "loss": 2.1364, "step": 3625 }, { "epoch": 0.11698873386555249, "grad_norm": 0.44140625, "learning_rate": 2.945551237467114e-05, "loss": 2.149, "step": 3626 }, { "epoch": 0.11702099771934883, "grad_norm": 0.53515625, "learning_rate": 2.9455092132548303e-05, "loss": 2.1292, "step": 3627 }, { "epoch": 0.11705326157314518, "grad_norm": 0.58203125, "learning_rate": 2.9454671731314188e-05, "loss": 2.0935, "step": 3628 }, { "epoch": 0.11708552542694153, "grad_norm": 0.734375, "learning_rate": 2.9454251170973414e-05, "loss": 2.1409, "step": 3629 }, { "epoch": 0.11711778928073788, "grad_norm": 0.82421875, "learning_rate": 2.9453830451530612e-05, "loss": 2.084, "step": 3630 }, { "epoch": 0.11715005313453422, "grad_norm": 0.70703125, "learning_rate": 2.945340957299042e-05, "loss": 2.1205, "step": 3631 }, { "epoch": 0.11718231698833056, "grad_norm": 0.46484375, "learning_rate": 2.9452988535357455e-05, "loss": 2.1249, "step": 3632 }, { "epoch": 0.11721458084212691, "grad_norm": 0.5859375, "learning_rate": 2.9452567338636365e-05, "loss": 2.1329, "step": 3633 }, { "epoch": 0.11724684469592327, "grad_norm": 0.58984375, "learning_rate": 2.9452145982831782e-05, "loss": 2.1309, "step": 3634 }, { "epoch": 0.11727910854971961, "grad_norm": 0.416015625, "learning_rate": 2.9451724467948343e-05, "loss": 2.109, "step": 3635 }, { "epoch": 0.11731137240351595, "grad_norm": 0.59765625, "learning_rate": 2.945130279399069e-05, "loss": 2.1303, "step": 3636 }, { "epoch": 0.1173436362573123, "grad_norm": 0.427734375, "learning_rate": 2.945088096096346e-05, "loss": 2.1185, "step": 3637 }, { "epoch": 0.11737590011110864, "grad_norm": 0.51171875, "learning_rate": 2.9450458968871302e-05, "loss": 2.1394, "step": 3638 }, { "epoch": 0.117408163964905, "grad_norm": 0.44140625, "learning_rate": 2.945003681771886e-05, "loss": 2.1102, "step": 3639 }, { "epoch": 0.11744042781870134, "grad_norm": 0.51171875, "learning_rate": 2.944961450751078e-05, "loss": 2.1488, "step": 3640 }, { "epoch": 0.11747269167249769, "grad_norm": 0.48828125, "learning_rate": 2.9449192038251704e-05, "loss": 2.1319, "step": 3641 }, { "epoch": 0.11750495552629403, "grad_norm": 0.41796875, "learning_rate": 2.944876940994629e-05, "loss": 2.1053, "step": 3642 }, { "epoch": 0.11753721938009037, "grad_norm": 0.427734375, "learning_rate": 2.9448346622599187e-05, "loss": 2.1231, "step": 3643 }, { "epoch": 0.11756948323388673, "grad_norm": 0.43359375, "learning_rate": 2.944792367621505e-05, "loss": 2.1545, "step": 3644 }, { "epoch": 0.11760174708768308, "grad_norm": 0.4453125, "learning_rate": 2.944750057079854e-05, "loss": 2.1328, "step": 3645 }, { "epoch": 0.11763401094147942, "grad_norm": 0.453125, "learning_rate": 2.94470773063543e-05, "loss": 2.0521, "step": 3646 }, { "epoch": 0.11766627479527576, "grad_norm": 0.453125, "learning_rate": 2.9446653882887e-05, "loss": 2.0662, "step": 3647 }, { "epoch": 0.11769853864907211, "grad_norm": 0.435546875, "learning_rate": 2.94462303004013e-05, "loss": 2.0892, "step": 3648 }, { "epoch": 0.11773080250286846, "grad_norm": 0.439453125, "learning_rate": 2.944580655890186e-05, "loss": 2.0716, "step": 3649 }, { "epoch": 0.11776306635666481, "grad_norm": 0.447265625, "learning_rate": 2.9445382658393345e-05, "loss": 2.1467, "step": 3650 }, { "epoch": 0.11779533021046115, "grad_norm": 0.439453125, "learning_rate": 2.944495859888042e-05, "loss": 2.1593, "step": 3651 }, { "epoch": 0.1178275940642575, "grad_norm": 0.44140625, "learning_rate": 2.944453438036775e-05, "loss": 2.1399, "step": 3652 }, { "epoch": 0.11785985791805384, "grad_norm": 0.4140625, "learning_rate": 2.9444110002860016e-05, "loss": 2.1043, "step": 3653 }, { "epoch": 0.1178921217718502, "grad_norm": 0.482421875, "learning_rate": 2.9443685466361873e-05, "loss": 2.1207, "step": 3654 }, { "epoch": 0.11792438562564654, "grad_norm": 0.443359375, "learning_rate": 2.9443260770878008e-05, "loss": 2.135, "step": 3655 }, { "epoch": 0.11795664947944288, "grad_norm": 0.431640625, "learning_rate": 2.944283591641309e-05, "loss": 2.1048, "step": 3656 }, { "epoch": 0.11798891333323923, "grad_norm": 0.478515625, "learning_rate": 2.944241090297179e-05, "loss": 2.1336, "step": 3657 }, { "epoch": 0.11802117718703557, "grad_norm": 0.451171875, "learning_rate": 2.9441985730558798e-05, "loss": 2.101, "step": 3658 }, { "epoch": 0.11805344104083193, "grad_norm": 0.49609375, "learning_rate": 2.9441560399178784e-05, "loss": 2.1473, "step": 3659 }, { "epoch": 0.11808570489462827, "grad_norm": 0.458984375, "learning_rate": 2.944113490883643e-05, "loss": 2.1327, "step": 3660 }, { "epoch": 0.11811796874842462, "grad_norm": 0.478515625, "learning_rate": 2.944070925953643e-05, "loss": 2.1396, "step": 3661 }, { "epoch": 0.11815023260222096, "grad_norm": 0.4921875, "learning_rate": 2.944028345128346e-05, "loss": 2.112, "step": 3662 }, { "epoch": 0.1181824964560173, "grad_norm": 0.4921875, "learning_rate": 2.9439857484082207e-05, "loss": 2.1432, "step": 3663 }, { "epoch": 0.11821476030981366, "grad_norm": 0.45703125, "learning_rate": 2.9439431357937366e-05, "loss": 2.1558, "step": 3664 }, { "epoch": 0.11824702416361, "grad_norm": 0.435546875, "learning_rate": 2.943900507285362e-05, "loss": 2.1587, "step": 3665 }, { "epoch": 0.11827928801740635, "grad_norm": 0.498046875, "learning_rate": 2.9438578628835664e-05, "loss": 2.1214, "step": 3666 }, { "epoch": 0.1183115518712027, "grad_norm": 0.462890625, "learning_rate": 2.9438152025888194e-05, "loss": 2.1345, "step": 3667 }, { "epoch": 0.11834381572499904, "grad_norm": 0.431640625, "learning_rate": 2.9437725264015907e-05, "loss": 2.1318, "step": 3668 }, { "epoch": 0.11837607957879538, "grad_norm": 0.5078125, "learning_rate": 2.9437298343223495e-05, "loss": 2.1299, "step": 3669 }, { "epoch": 0.11840834343259174, "grad_norm": 0.4453125, "learning_rate": 2.943687126351566e-05, "loss": 2.1346, "step": 3670 }, { "epoch": 0.11844060728638808, "grad_norm": 0.42578125, "learning_rate": 2.9436444024897106e-05, "loss": 2.1358, "step": 3671 }, { "epoch": 0.11847287114018443, "grad_norm": 0.53125, "learning_rate": 2.9436016627372528e-05, "loss": 2.1557, "step": 3672 }, { "epoch": 0.11850513499398077, "grad_norm": 0.48828125, "learning_rate": 2.943558907094664e-05, "loss": 2.1393, "step": 3673 }, { "epoch": 0.11853739884777711, "grad_norm": 0.44140625, "learning_rate": 2.943516135562414e-05, "loss": 2.1249, "step": 3674 }, { "epoch": 0.11856966270157347, "grad_norm": 0.462890625, "learning_rate": 2.943473348140974e-05, "loss": 2.1074, "step": 3675 }, { "epoch": 0.11860192655536982, "grad_norm": 0.41015625, "learning_rate": 2.943430544830815e-05, "loss": 2.1618, "step": 3676 }, { "epoch": 0.11863419040916616, "grad_norm": 0.41796875, "learning_rate": 2.9433877256324082e-05, "loss": 2.1377, "step": 3677 }, { "epoch": 0.1186664542629625, "grad_norm": 0.435546875, "learning_rate": 2.9433448905462245e-05, "loss": 2.131, "step": 3678 }, { "epoch": 0.11869871811675885, "grad_norm": 0.404296875, "learning_rate": 2.9433020395727365e-05, "loss": 2.1631, "step": 3679 }, { "epoch": 0.1187309819705552, "grad_norm": 0.41796875, "learning_rate": 2.9432591727124143e-05, "loss": 2.1294, "step": 3680 }, { "epoch": 0.11876324582435155, "grad_norm": 0.421875, "learning_rate": 2.9432162899657307e-05, "loss": 2.1324, "step": 3681 }, { "epoch": 0.11879550967814789, "grad_norm": 0.4453125, "learning_rate": 2.943173391333157e-05, "loss": 2.1634, "step": 3682 }, { "epoch": 0.11882777353194424, "grad_norm": 0.455078125, "learning_rate": 2.9431304768151667e-05, "loss": 2.137, "step": 3683 }, { "epoch": 0.11886003738574058, "grad_norm": 0.484375, "learning_rate": 2.9430875464122313e-05, "loss": 2.1181, "step": 3684 }, { "epoch": 0.11889230123953694, "grad_norm": 0.404296875, "learning_rate": 2.9430446001248233e-05, "loss": 2.1309, "step": 3685 }, { "epoch": 0.11892456509333328, "grad_norm": 0.40625, "learning_rate": 2.9430016379534157e-05, "loss": 2.1119, "step": 3686 }, { "epoch": 0.11895682894712963, "grad_norm": 0.44140625, "learning_rate": 2.942958659898481e-05, "loss": 2.142, "step": 3687 }, { "epoch": 0.11898909280092597, "grad_norm": 0.40234375, "learning_rate": 2.9429156659604924e-05, "loss": 2.1263, "step": 3688 }, { "epoch": 0.11902135665472231, "grad_norm": 0.3828125, "learning_rate": 2.942872656139924e-05, "loss": 2.0968, "step": 3689 }, { "epoch": 0.11905362050851867, "grad_norm": 0.458984375, "learning_rate": 2.942829630437248e-05, "loss": 2.1303, "step": 3690 }, { "epoch": 0.11908588436231501, "grad_norm": 0.490234375, "learning_rate": 2.9427865888529387e-05, "loss": 2.1398, "step": 3691 }, { "epoch": 0.11911814821611136, "grad_norm": 0.47265625, "learning_rate": 2.9427435313874697e-05, "loss": 2.1556, "step": 3692 }, { "epoch": 0.1191504120699077, "grad_norm": 0.419921875, "learning_rate": 2.9427004580413148e-05, "loss": 2.1226, "step": 3693 }, { "epoch": 0.11918267592370405, "grad_norm": 0.4375, "learning_rate": 2.9426573688149482e-05, "loss": 2.1323, "step": 3694 }, { "epoch": 0.1192149397775004, "grad_norm": 0.3984375, "learning_rate": 2.9426142637088446e-05, "loss": 2.1052, "step": 3695 }, { "epoch": 0.11924720363129675, "grad_norm": 0.421875, "learning_rate": 2.9425711427234774e-05, "loss": 2.1456, "step": 3696 }, { "epoch": 0.11927946748509309, "grad_norm": 0.458984375, "learning_rate": 2.9425280058593226e-05, "loss": 2.1561, "step": 3697 }, { "epoch": 0.11931173133888943, "grad_norm": 0.416015625, "learning_rate": 2.942484853116854e-05, "loss": 2.1334, "step": 3698 }, { "epoch": 0.11934399519268578, "grad_norm": 0.42578125, "learning_rate": 2.942441684496547e-05, "loss": 2.1061, "step": 3699 }, { "epoch": 0.11937625904648214, "grad_norm": 0.419921875, "learning_rate": 2.9423984999988772e-05, "loss": 2.133, "step": 3700 }, { "epoch": 0.11940852290027848, "grad_norm": 0.416015625, "learning_rate": 2.942355299624319e-05, "loss": 2.154, "step": 3701 }, { "epoch": 0.11944078675407482, "grad_norm": 0.5078125, "learning_rate": 2.9423120833733487e-05, "loss": 2.1272, "step": 3702 }, { "epoch": 0.11947305060787117, "grad_norm": 0.4296875, "learning_rate": 2.9422688512464416e-05, "loss": 2.132, "step": 3703 }, { "epoch": 0.11950531446166751, "grad_norm": 0.4296875, "learning_rate": 2.9422256032440736e-05, "loss": 2.1312, "step": 3704 }, { "epoch": 0.11953757831546387, "grad_norm": 0.462890625, "learning_rate": 2.9421823393667205e-05, "loss": 2.1161, "step": 3705 }, { "epoch": 0.11956984216926021, "grad_norm": 0.474609375, "learning_rate": 2.9421390596148597e-05, "loss": 2.1539, "step": 3706 }, { "epoch": 0.11960210602305656, "grad_norm": 0.58984375, "learning_rate": 2.942095763988966e-05, "loss": 2.117, "step": 3707 }, { "epoch": 0.1196343698768529, "grad_norm": 0.80859375, "learning_rate": 2.942052452489517e-05, "loss": 2.1012, "step": 3708 }, { "epoch": 0.11966663373064924, "grad_norm": 1.0625, "learning_rate": 2.9420091251169892e-05, "loss": 2.1172, "step": 3709 }, { "epoch": 0.1196988975844456, "grad_norm": 0.8125, "learning_rate": 2.941965781871859e-05, "loss": 2.133, "step": 3710 }, { "epoch": 0.11973116143824195, "grad_norm": 0.5078125, "learning_rate": 2.9419224227546043e-05, "loss": 2.1214, "step": 3711 }, { "epoch": 0.11976342529203829, "grad_norm": 0.7578125, "learning_rate": 2.9418790477657017e-05, "loss": 2.1151, "step": 3712 }, { "epoch": 0.11979568914583463, "grad_norm": 0.46484375, "learning_rate": 2.9418356569056293e-05, "loss": 2.1303, "step": 3713 }, { "epoch": 0.11982795299963098, "grad_norm": 0.6171875, "learning_rate": 2.9417922501748642e-05, "loss": 2.1586, "step": 3714 }, { "epoch": 0.11986021685342733, "grad_norm": 0.43359375, "learning_rate": 2.941748827573884e-05, "loss": 2.1258, "step": 3715 }, { "epoch": 0.11989248070722368, "grad_norm": 0.55078125, "learning_rate": 2.9417053891031675e-05, "loss": 2.1283, "step": 3716 }, { "epoch": 0.11992474456102002, "grad_norm": 0.50390625, "learning_rate": 2.941661934763192e-05, "loss": 2.1262, "step": 3717 }, { "epoch": 0.11995700841481637, "grad_norm": 0.44921875, "learning_rate": 2.9416184645544366e-05, "loss": 2.1425, "step": 3718 }, { "epoch": 0.11998927226861271, "grad_norm": 0.5, "learning_rate": 2.941574978477379e-05, "loss": 2.1053, "step": 3719 }, { "epoch": 0.12002153612240907, "grad_norm": 0.3984375, "learning_rate": 2.9415314765324982e-05, "loss": 2.1296, "step": 3720 }, { "epoch": 0.12005379997620541, "grad_norm": 0.515625, "learning_rate": 2.9414879587202732e-05, "loss": 2.1518, "step": 3721 }, { "epoch": 0.12008606383000175, "grad_norm": 0.408203125, "learning_rate": 2.941444425041183e-05, "loss": 2.1233, "step": 3722 }, { "epoch": 0.1201183276837981, "grad_norm": 0.515625, "learning_rate": 2.9414008754957066e-05, "loss": 2.1505, "step": 3723 }, { "epoch": 0.12015059153759444, "grad_norm": 0.42578125, "learning_rate": 2.9413573100843235e-05, "loss": 2.147, "step": 3724 }, { "epoch": 0.1201828553913908, "grad_norm": 0.455078125, "learning_rate": 2.941313728807513e-05, "loss": 2.1036, "step": 3725 }, { "epoch": 0.12021511924518714, "grad_norm": 0.466796875, "learning_rate": 2.9412701316657548e-05, "loss": 2.1603, "step": 3726 }, { "epoch": 0.12024738309898349, "grad_norm": 0.46484375, "learning_rate": 2.9412265186595287e-05, "loss": 2.1166, "step": 3727 }, { "epoch": 0.12027964695277983, "grad_norm": 0.43359375, "learning_rate": 2.9411828897893158e-05, "loss": 2.1193, "step": 3728 }, { "epoch": 0.12031191080657617, "grad_norm": 0.427734375, "learning_rate": 2.9411392450555953e-05, "loss": 2.0813, "step": 3729 }, { "epoch": 0.12034417466037253, "grad_norm": 0.470703125, "learning_rate": 2.9410955844588475e-05, "loss": 2.0837, "step": 3730 }, { "epoch": 0.12037643851416888, "grad_norm": 0.4453125, "learning_rate": 2.9410519079995537e-05, "loss": 2.0935, "step": 3731 }, { "epoch": 0.12040870236796522, "grad_norm": 0.431640625, "learning_rate": 2.941008215678194e-05, "loss": 2.063, "step": 3732 }, { "epoch": 0.12044096622176156, "grad_norm": 0.462890625, "learning_rate": 2.94096450749525e-05, "loss": 2.0636, "step": 3733 }, { "epoch": 0.12047323007555791, "grad_norm": 0.384765625, "learning_rate": 2.940920783451202e-05, "loss": 2.0681, "step": 3734 }, { "epoch": 0.12050549392935427, "grad_norm": 0.4140625, "learning_rate": 2.940877043546532e-05, "loss": 2.0734, "step": 3735 }, { "epoch": 0.12053775778315061, "grad_norm": 0.388671875, "learning_rate": 2.9408332877817213e-05, "loss": 2.0225, "step": 3736 }, { "epoch": 0.12057002163694695, "grad_norm": 0.40234375, "learning_rate": 2.9407895161572514e-05, "loss": 2.0653, "step": 3737 }, { "epoch": 0.1206022854907433, "grad_norm": 0.388671875, "learning_rate": 2.9407457286736036e-05, "loss": 2.0725, "step": 3738 }, { "epoch": 0.12063454934453964, "grad_norm": 0.408203125, "learning_rate": 2.940701925331261e-05, "loss": 2.0681, "step": 3739 }, { "epoch": 0.120666813198336, "grad_norm": 0.44921875, "learning_rate": 2.940658106130705e-05, "loss": 2.056, "step": 3740 }, { "epoch": 0.12069907705213234, "grad_norm": 0.404296875, "learning_rate": 2.940614271072418e-05, "loss": 2.0684, "step": 3741 }, { "epoch": 0.12073134090592869, "grad_norm": 0.447265625, "learning_rate": 2.940570420156883e-05, "loss": 2.0911, "step": 3742 }, { "epoch": 0.12076360475972503, "grad_norm": 0.4375, "learning_rate": 2.9405265533845817e-05, "loss": 2.0501, "step": 3743 }, { "epoch": 0.12079586861352137, "grad_norm": 0.43359375, "learning_rate": 2.9404826707559978e-05, "loss": 2.0903, "step": 3744 }, { "epoch": 0.12082813246731773, "grad_norm": 0.412109375, "learning_rate": 2.940438772271614e-05, "loss": 2.0681, "step": 3745 }, { "epoch": 0.12086039632111407, "grad_norm": 0.4140625, "learning_rate": 2.9403948579319137e-05, "loss": 2.0591, "step": 3746 }, { "epoch": 0.12089266017491042, "grad_norm": 0.42578125, "learning_rate": 2.94035092773738e-05, "loss": 2.0771, "step": 3747 }, { "epoch": 0.12092492402870676, "grad_norm": 0.39453125, "learning_rate": 2.9403069816884966e-05, "loss": 2.0558, "step": 3748 }, { "epoch": 0.1209571878825031, "grad_norm": 0.38671875, "learning_rate": 2.940263019785747e-05, "loss": 2.0796, "step": 3749 }, { "epoch": 0.12098945173629946, "grad_norm": 0.4375, "learning_rate": 2.9402190420296156e-05, "loss": 2.0838, "step": 3750 }, { "epoch": 0.12102171559009581, "grad_norm": 0.431640625, "learning_rate": 2.9401750484205863e-05, "loss": 2.0742, "step": 3751 }, { "epoch": 0.12105397944389215, "grad_norm": 0.396484375, "learning_rate": 2.940131038959143e-05, "loss": 2.063, "step": 3752 }, { "epoch": 0.1210862432976885, "grad_norm": 0.412109375, "learning_rate": 2.9400870136457705e-05, "loss": 2.0877, "step": 3753 }, { "epoch": 0.12111850715148484, "grad_norm": 0.470703125, "learning_rate": 2.9400429724809532e-05, "loss": 2.0666, "step": 3754 }, { "epoch": 0.1211507710052812, "grad_norm": 0.423828125, "learning_rate": 2.939998915465176e-05, "loss": 2.0534, "step": 3755 }, { "epoch": 0.12118303485907754, "grad_norm": 0.45703125, "learning_rate": 2.9399548425989237e-05, "loss": 2.0326, "step": 3756 }, { "epoch": 0.12121529871287388, "grad_norm": 0.462890625, "learning_rate": 2.9399107538826816e-05, "loss": 2.0688, "step": 3757 }, { "epoch": 0.12124756256667023, "grad_norm": 0.427734375, "learning_rate": 2.939866649316935e-05, "loss": 2.0597, "step": 3758 }, { "epoch": 0.12127982642046657, "grad_norm": 0.52734375, "learning_rate": 2.9398225289021693e-05, "loss": 2.11, "step": 3759 }, { "epoch": 0.12131209027426292, "grad_norm": 0.421875, "learning_rate": 2.9397783926388698e-05, "loss": 2.0477, "step": 3760 }, { "epoch": 0.12134435412805927, "grad_norm": 0.43359375, "learning_rate": 2.9397342405275228e-05, "loss": 2.0648, "step": 3761 }, { "epoch": 0.12137661798185562, "grad_norm": 0.453125, "learning_rate": 2.9396900725686143e-05, "loss": 2.1022, "step": 3762 }, { "epoch": 0.12140888183565196, "grad_norm": 0.4140625, "learning_rate": 2.93964588876263e-05, "loss": 2.0666, "step": 3763 }, { "epoch": 0.1214411456894483, "grad_norm": 0.44140625, "learning_rate": 2.939601689110057e-05, "loss": 2.0841, "step": 3764 }, { "epoch": 0.12147340954324465, "grad_norm": 0.55078125, "learning_rate": 2.939557473611381e-05, "loss": 2.0509, "step": 3765 }, { "epoch": 0.121505673397041, "grad_norm": 0.671875, "learning_rate": 2.9395132422670894e-05, "loss": 2.0844, "step": 3766 }, { "epoch": 0.12153793725083735, "grad_norm": 0.73046875, "learning_rate": 2.9394689950776684e-05, "loss": 2.0667, "step": 3767 }, { "epoch": 0.1215702011046337, "grad_norm": 0.6953125, "learning_rate": 2.9394247320436053e-05, "loss": 2.04, "step": 3768 }, { "epoch": 0.12160246495843004, "grad_norm": 0.4375, "learning_rate": 2.9393804531653877e-05, "loss": 2.0696, "step": 3769 }, { "epoch": 0.12163472881222638, "grad_norm": 0.53125, "learning_rate": 2.9393361584435026e-05, "loss": 2.0945, "step": 3770 }, { "epoch": 0.12166699266602274, "grad_norm": 0.484375, "learning_rate": 2.9392918478784378e-05, "loss": 2.0655, "step": 3771 }, { "epoch": 0.12169925651981908, "grad_norm": 0.4140625, "learning_rate": 2.9392475214706808e-05, "loss": 2.0138, "step": 3772 }, { "epoch": 0.12173152037361543, "grad_norm": 0.4765625, "learning_rate": 2.9392031792207192e-05, "loss": 2.0556, "step": 3773 }, { "epoch": 0.12176378422741177, "grad_norm": 0.498046875, "learning_rate": 2.9391588211290416e-05, "loss": 2.0418, "step": 3774 }, { "epoch": 0.12179604808120811, "grad_norm": 0.41015625, "learning_rate": 2.9391144471961362e-05, "loss": 2.0827, "step": 3775 }, { "epoch": 0.12182831193500447, "grad_norm": 0.5, "learning_rate": 2.9390700574224912e-05, "loss": 2.0623, "step": 3776 }, { "epoch": 0.12186057578880082, "grad_norm": 0.50390625, "learning_rate": 2.939025651808596e-05, "loss": 2.0636, "step": 3777 }, { "epoch": 0.12189283964259716, "grad_norm": 0.416015625, "learning_rate": 2.9389812303549385e-05, "loss": 2.0816, "step": 3778 }, { "epoch": 0.1219251034963935, "grad_norm": 0.466796875, "learning_rate": 2.938936793062008e-05, "loss": 2.0269, "step": 3779 }, { "epoch": 0.12195736735018985, "grad_norm": 0.447265625, "learning_rate": 2.9388923399302934e-05, "loss": 2.0396, "step": 3780 }, { "epoch": 0.1219896312039862, "grad_norm": 0.3984375, "learning_rate": 2.938847870960284e-05, "loss": 2.0256, "step": 3781 }, { "epoch": 0.12202189505778255, "grad_norm": 0.439453125, "learning_rate": 2.93880338615247e-05, "loss": 2.0742, "step": 3782 }, { "epoch": 0.12205415891157889, "grad_norm": 0.404296875, "learning_rate": 2.93875888550734e-05, "loss": 2.0532, "step": 3783 }, { "epoch": 0.12208642276537524, "grad_norm": 0.43359375, "learning_rate": 2.9387143690253847e-05, "loss": 2.0688, "step": 3784 }, { "epoch": 0.12211868661917158, "grad_norm": 0.447265625, "learning_rate": 2.9386698367070936e-05, "loss": 2.0402, "step": 3785 }, { "epoch": 0.12215095047296794, "grad_norm": 0.416015625, "learning_rate": 2.938625288552957e-05, "loss": 2.0315, "step": 3786 }, { "epoch": 0.12218321432676428, "grad_norm": 0.41796875, "learning_rate": 2.9385807245634654e-05, "loss": 2.0633, "step": 3787 }, { "epoch": 0.12221547818056062, "grad_norm": 0.486328125, "learning_rate": 2.938536144739109e-05, "loss": 2.0587, "step": 3788 }, { "epoch": 0.12224774203435697, "grad_norm": 0.404296875, "learning_rate": 2.938491549080379e-05, "loss": 2.0414, "step": 3789 }, { "epoch": 0.12228000588815331, "grad_norm": 0.416015625, "learning_rate": 2.938446937587766e-05, "loss": 2.053, "step": 3790 }, { "epoch": 0.12231226974194967, "grad_norm": 0.478515625, "learning_rate": 2.9384023102617606e-05, "loss": 2.0702, "step": 3791 }, { "epoch": 0.12234453359574601, "grad_norm": 0.43359375, "learning_rate": 2.9383576671028553e-05, "loss": 2.0615, "step": 3792 }, { "epoch": 0.12237679744954236, "grad_norm": 0.41796875, "learning_rate": 2.93831300811154e-05, "loss": 2.0732, "step": 3793 }, { "epoch": 0.1224090613033387, "grad_norm": 0.4296875, "learning_rate": 2.938268333288307e-05, "loss": 2.0387, "step": 3794 }, { "epoch": 0.12244132515713504, "grad_norm": 0.419921875, "learning_rate": 2.9382236426336483e-05, "loss": 2.0666, "step": 3795 }, { "epoch": 0.1224735890109314, "grad_norm": 0.39453125, "learning_rate": 2.9381789361480553e-05, "loss": 2.0757, "step": 3796 }, { "epoch": 0.12250585286472775, "grad_norm": 0.42578125, "learning_rate": 2.9381342138320203e-05, "loss": 2.0073, "step": 3797 }, { "epoch": 0.12253811671852409, "grad_norm": 0.3828125, "learning_rate": 2.9380894756860356e-05, "loss": 2.0528, "step": 3798 }, { "epoch": 0.12257038057232043, "grad_norm": 0.392578125, "learning_rate": 2.9380447217105938e-05, "loss": 2.0754, "step": 3799 }, { "epoch": 0.12260264442611678, "grad_norm": 0.46484375, "learning_rate": 2.9379999519061872e-05, "loss": 2.0757, "step": 3800 }, { "epoch": 0.12263490827991314, "grad_norm": 0.46875, "learning_rate": 2.9379551662733088e-05, "loss": 2.0672, "step": 3801 }, { "epoch": 0.12266717213370948, "grad_norm": 0.412109375, "learning_rate": 2.937910364812451e-05, "loss": 2.0425, "step": 3802 }, { "epoch": 0.12269943598750582, "grad_norm": 0.40234375, "learning_rate": 2.9378655475241083e-05, "loss": 2.0402, "step": 3803 }, { "epoch": 0.12273169984130217, "grad_norm": 0.443359375, "learning_rate": 2.937820714408773e-05, "loss": 2.0526, "step": 3804 }, { "epoch": 0.12276396369509851, "grad_norm": 0.384765625, "learning_rate": 2.9377758654669387e-05, "loss": 2.0656, "step": 3805 }, { "epoch": 0.12279622754889487, "grad_norm": 0.421875, "learning_rate": 2.9377310006990988e-05, "loss": 2.0608, "step": 3806 }, { "epoch": 0.12282849140269121, "grad_norm": 0.44921875, "learning_rate": 2.9376861201057476e-05, "loss": 2.0261, "step": 3807 }, { "epoch": 0.12286075525648756, "grad_norm": 0.462890625, "learning_rate": 2.9376412236873792e-05, "loss": 2.077, "step": 3808 }, { "epoch": 0.1228930191102839, "grad_norm": 0.5078125, "learning_rate": 2.937596311444487e-05, "loss": 2.0394, "step": 3809 }, { "epoch": 0.12292528296408024, "grad_norm": 0.5, "learning_rate": 2.9375513833775668e-05, "loss": 2.0402, "step": 3810 }, { "epoch": 0.1229575468178766, "grad_norm": 0.412109375, "learning_rate": 2.9375064394871118e-05, "loss": 2.0559, "step": 3811 }, { "epoch": 0.12298981067167294, "grad_norm": 0.41015625, "learning_rate": 2.9374614797736172e-05, "loss": 2.0741, "step": 3812 }, { "epoch": 0.12302207452546929, "grad_norm": 0.466796875, "learning_rate": 2.937416504237578e-05, "loss": 2.0998, "step": 3813 }, { "epoch": 0.12305433837926563, "grad_norm": 0.3984375, "learning_rate": 2.9373715128794887e-05, "loss": 2.0732, "step": 3814 }, { "epoch": 0.12308660223306198, "grad_norm": 0.4296875, "learning_rate": 2.9373265056998453e-05, "loss": 2.0756, "step": 3815 }, { "epoch": 0.12311886608685833, "grad_norm": 0.435546875, "learning_rate": 2.937281482699143e-05, "loss": 2.1458, "step": 3816 }, { "epoch": 0.12315112994065468, "grad_norm": 0.4921875, "learning_rate": 2.937236443877877e-05, "loss": 2.1485, "step": 3817 }, { "epoch": 0.12318339379445102, "grad_norm": 0.447265625, "learning_rate": 2.9371913892365434e-05, "loss": 2.1471, "step": 3818 }, { "epoch": 0.12321565764824736, "grad_norm": 0.421875, "learning_rate": 2.937146318775638e-05, "loss": 2.1204, "step": 3819 }, { "epoch": 0.12324792150204371, "grad_norm": 0.453125, "learning_rate": 2.9371012324956567e-05, "loss": 2.1232, "step": 3820 }, { "epoch": 0.12328018535584007, "grad_norm": 0.42578125, "learning_rate": 2.937056130397096e-05, "loss": 2.1616, "step": 3821 }, { "epoch": 0.12331244920963641, "grad_norm": 0.427734375, "learning_rate": 2.9370110124804527e-05, "loss": 2.1453, "step": 3822 }, { "epoch": 0.12334471306343275, "grad_norm": 0.416015625, "learning_rate": 2.9369658787462227e-05, "loss": 2.1179, "step": 3823 }, { "epoch": 0.1233769769172291, "grad_norm": 0.45703125, "learning_rate": 2.9369207291949037e-05, "loss": 2.1281, "step": 3824 }, { "epoch": 0.12340924077102544, "grad_norm": 0.53515625, "learning_rate": 2.936875563826992e-05, "loss": 2.0802, "step": 3825 }, { "epoch": 0.1234415046248218, "grad_norm": 0.66796875, "learning_rate": 2.9368303826429846e-05, "loss": 2.102, "step": 3826 }, { "epoch": 0.12347376847861814, "grad_norm": 0.8203125, "learning_rate": 2.9367851856433793e-05, "loss": 2.1438, "step": 3827 }, { "epoch": 0.12350603233241449, "grad_norm": 0.859375, "learning_rate": 2.9367399728286734e-05, "loss": 2.1309, "step": 3828 }, { "epoch": 0.12353829618621083, "grad_norm": 0.51171875, "learning_rate": 2.9366947441993644e-05, "loss": 2.139, "step": 3829 }, { "epoch": 0.12357056004000717, "grad_norm": 0.56640625, "learning_rate": 2.9366494997559505e-05, "loss": 2.1368, "step": 3830 }, { "epoch": 0.12360282389380353, "grad_norm": 0.69921875, "learning_rate": 2.9366042394989296e-05, "loss": 2.1274, "step": 3831 }, { "epoch": 0.12363508774759988, "grad_norm": 0.423828125, "learning_rate": 2.9365589634288e-05, "loss": 2.1151, "step": 3832 }, { "epoch": 0.12366735160139622, "grad_norm": 0.6796875, "learning_rate": 2.9365136715460595e-05, "loss": 2.1294, "step": 3833 }, { "epoch": 0.12369961545519256, "grad_norm": 0.49609375, "learning_rate": 2.9364683638512078e-05, "loss": 2.1441, "step": 3834 }, { "epoch": 0.1237318793089889, "grad_norm": 0.5859375, "learning_rate": 2.9364230403447422e-05, "loss": 2.1353, "step": 3835 }, { "epoch": 0.12376414316278526, "grad_norm": 0.52734375, "learning_rate": 2.9363777010271625e-05, "loss": 2.1268, "step": 3836 }, { "epoch": 0.12379640701658161, "grad_norm": 0.45703125, "learning_rate": 2.9363323458989675e-05, "loss": 2.0869, "step": 3837 }, { "epoch": 0.12382867087037795, "grad_norm": 0.55078125, "learning_rate": 2.9362869749606562e-05, "loss": 2.1587, "step": 3838 }, { "epoch": 0.1238609347241743, "grad_norm": 0.466796875, "learning_rate": 2.9362415882127287e-05, "loss": 2.1195, "step": 3839 }, { "epoch": 0.12389319857797064, "grad_norm": 0.50390625, "learning_rate": 2.936196185655684e-05, "loss": 2.1438, "step": 3840 }, { "epoch": 0.123925462431767, "grad_norm": 0.443359375, "learning_rate": 2.9361507672900215e-05, "loss": 2.1438, "step": 3841 }, { "epoch": 0.12395772628556334, "grad_norm": 0.45703125, "learning_rate": 2.9361053331162426e-05, "loss": 2.1272, "step": 3842 }, { "epoch": 0.12398999013935968, "grad_norm": 0.458984375, "learning_rate": 2.936059883134846e-05, "loss": 2.1285, "step": 3843 }, { "epoch": 0.12402225399315603, "grad_norm": 0.490234375, "learning_rate": 2.936014417346332e-05, "loss": 2.1356, "step": 3844 }, { "epoch": 0.12405451784695237, "grad_norm": 0.51171875, "learning_rate": 2.935968935751202e-05, "loss": 2.1221, "step": 3845 }, { "epoch": 0.12408678170074872, "grad_norm": 0.404296875, "learning_rate": 2.935923438349956e-05, "loss": 2.1224, "step": 3846 }, { "epoch": 0.12411904555454507, "grad_norm": 0.53515625, "learning_rate": 2.9358779251430946e-05, "loss": 2.1751, "step": 3847 }, { "epoch": 0.12415130940834142, "grad_norm": 0.48046875, "learning_rate": 2.9358323961311196e-05, "loss": 2.1131, "step": 3848 }, { "epoch": 0.12418357326213776, "grad_norm": 0.404296875, "learning_rate": 2.9357868513145314e-05, "loss": 2.1117, "step": 3849 }, { "epoch": 0.1242158371159341, "grad_norm": 0.515625, "learning_rate": 2.9357412906938316e-05, "loss": 2.1402, "step": 3850 }, { "epoch": 0.12424810096973045, "grad_norm": 0.41796875, "learning_rate": 2.9356957142695215e-05, "loss": 2.1322, "step": 3851 }, { "epoch": 0.1242803648235268, "grad_norm": 0.44140625, "learning_rate": 2.935650122042103e-05, "loss": 2.1432, "step": 3852 }, { "epoch": 0.12431262867732315, "grad_norm": 0.43359375, "learning_rate": 2.9356045140120782e-05, "loss": 2.1235, "step": 3853 }, { "epoch": 0.1243448925311195, "grad_norm": 0.388671875, "learning_rate": 2.9355588901799487e-05, "loss": 2.1219, "step": 3854 }, { "epoch": 0.12437715638491584, "grad_norm": 0.408203125, "learning_rate": 2.9355132505462168e-05, "loss": 2.1113, "step": 3855 }, { "epoch": 0.12440942023871218, "grad_norm": 0.3984375, "learning_rate": 2.935467595111385e-05, "loss": 2.1413, "step": 3856 }, { "epoch": 0.12444168409250854, "grad_norm": 0.37890625, "learning_rate": 2.9354219238759553e-05, "loss": 2.1444, "step": 3857 }, { "epoch": 0.12447394794630488, "grad_norm": 0.419921875, "learning_rate": 2.935376236840431e-05, "loss": 2.116, "step": 3858 }, { "epoch": 0.12450621180010123, "grad_norm": 0.40234375, "learning_rate": 2.9353305340053146e-05, "loss": 2.1201, "step": 3859 }, { "epoch": 0.12453847565389757, "grad_norm": 0.400390625, "learning_rate": 2.9352848153711095e-05, "loss": 2.1273, "step": 3860 }, { "epoch": 0.12457073950769391, "grad_norm": 0.412109375, "learning_rate": 2.9352390809383193e-05, "loss": 2.1058, "step": 3861 }, { "epoch": 0.12460300336149027, "grad_norm": 0.41015625, "learning_rate": 2.9351933307074464e-05, "loss": 2.1365, "step": 3862 }, { "epoch": 0.12463526721528662, "grad_norm": 0.4375, "learning_rate": 2.9351475646789954e-05, "loss": 2.1277, "step": 3863 }, { "epoch": 0.12466753106908296, "grad_norm": 0.3984375, "learning_rate": 2.935101782853469e-05, "loss": 2.1301, "step": 3864 }, { "epoch": 0.1246997949228793, "grad_norm": 0.4296875, "learning_rate": 2.935055985231372e-05, "loss": 2.1442, "step": 3865 }, { "epoch": 0.12473205877667565, "grad_norm": 0.380859375, "learning_rate": 2.935010171813208e-05, "loss": 2.1205, "step": 3866 }, { "epoch": 0.124764322630472, "grad_norm": 0.4375, "learning_rate": 2.934964342599482e-05, "loss": 2.1549, "step": 3867 }, { "epoch": 0.12479658648426835, "grad_norm": 0.392578125, "learning_rate": 2.9349184975906977e-05, "loss": 2.133, "step": 3868 }, { "epoch": 0.12482885033806469, "grad_norm": 0.41796875, "learning_rate": 2.93487263678736e-05, "loss": 2.1359, "step": 3869 }, { "epoch": 0.12486111419186104, "grad_norm": 0.39453125, "learning_rate": 2.9348267601899737e-05, "loss": 2.1044, "step": 3870 }, { "epoch": 0.12489337804565738, "grad_norm": 0.416015625, "learning_rate": 2.934780867799044e-05, "loss": 2.1412, "step": 3871 }, { "epoch": 0.12492564189945374, "grad_norm": 0.43359375, "learning_rate": 2.934734959615075e-05, "loss": 2.1301, "step": 3872 }, { "epoch": 0.12495790575325008, "grad_norm": 0.4296875, "learning_rate": 2.9346890356385736e-05, "loss": 2.1318, "step": 3873 }, { "epoch": 0.12499016960704643, "grad_norm": 0.44140625, "learning_rate": 2.9346430958700438e-05, "loss": 2.0977, "step": 3874 }, { "epoch": 0.12502243346084277, "grad_norm": 0.427734375, "learning_rate": 2.9345971403099927e-05, "loss": 2.1196, "step": 3875 }, { "epoch": 0.1250546973146391, "grad_norm": 0.46484375, "learning_rate": 2.9345511689589254e-05, "loss": 2.1146, "step": 3876 }, { "epoch": 0.12508696116843546, "grad_norm": 0.4921875, "learning_rate": 2.9345051818173478e-05, "loss": 2.113, "step": 3877 }, { "epoch": 0.1251192250222318, "grad_norm": 0.4765625, "learning_rate": 2.9344591788857663e-05, "loss": 2.1121, "step": 3878 }, { "epoch": 0.12515148887602817, "grad_norm": 0.498046875, "learning_rate": 2.9344131601646873e-05, "loss": 2.1368, "step": 3879 }, { "epoch": 0.12518375272982452, "grad_norm": 0.482421875, "learning_rate": 2.934367125654617e-05, "loss": 2.1191, "step": 3880 }, { "epoch": 0.12521601658362086, "grad_norm": 0.419921875, "learning_rate": 2.934321075356063e-05, "loss": 2.0968, "step": 3881 }, { "epoch": 0.1252482804374172, "grad_norm": 0.416015625, "learning_rate": 2.934275009269531e-05, "loss": 2.0913, "step": 3882 }, { "epoch": 0.12528054429121355, "grad_norm": 0.451171875, "learning_rate": 2.934228927395529e-05, "loss": 2.1357, "step": 3883 }, { "epoch": 0.1253128081450099, "grad_norm": 0.45703125, "learning_rate": 2.934182829734563e-05, "loss": 2.1407, "step": 3884 }, { "epoch": 0.12534507199880623, "grad_norm": 0.404296875, "learning_rate": 2.9341367162871425e-05, "loss": 2.1059, "step": 3885 }, { "epoch": 0.12537733585260258, "grad_norm": 0.412109375, "learning_rate": 2.9340905870537735e-05, "loss": 2.1126, "step": 3886 }, { "epoch": 0.12540959970639892, "grad_norm": 0.42578125, "learning_rate": 2.934044442034964e-05, "loss": 2.1247, "step": 3887 }, { "epoch": 0.12544186356019527, "grad_norm": 0.4765625, "learning_rate": 2.933998281231222e-05, "loss": 2.1404, "step": 3888 }, { "epoch": 0.1254741274139916, "grad_norm": 0.455078125, "learning_rate": 2.9339521046430555e-05, "loss": 2.1221, "step": 3889 }, { "epoch": 0.12550639126778798, "grad_norm": 0.451171875, "learning_rate": 2.9339059122709734e-05, "loss": 2.1148, "step": 3890 }, { "epoch": 0.12553865512158432, "grad_norm": 0.373046875, "learning_rate": 2.9338597041154836e-05, "loss": 2.1183, "step": 3891 }, { "epoch": 0.12557091897538067, "grad_norm": 0.412109375, "learning_rate": 2.9338134801770945e-05, "loss": 2.1522, "step": 3892 }, { "epoch": 0.125603182829177, "grad_norm": 0.41796875, "learning_rate": 2.933767240456315e-05, "loss": 2.1003, "step": 3893 }, { "epoch": 0.12563544668297336, "grad_norm": 0.44140625, "learning_rate": 2.933720984953655e-05, "loss": 2.1094, "step": 3894 }, { "epoch": 0.1256677105367697, "grad_norm": 0.47265625, "learning_rate": 2.9336747136696225e-05, "loss": 2.0975, "step": 3895 }, { "epoch": 0.12569997439056604, "grad_norm": 0.470703125, "learning_rate": 2.9336284266047274e-05, "loss": 2.1412, "step": 3896 }, { "epoch": 0.1257322382443624, "grad_norm": 0.443359375, "learning_rate": 2.9335821237594794e-05, "loss": 2.1181, "step": 3897 }, { "epoch": 0.12576450209815873, "grad_norm": 0.43359375, "learning_rate": 2.9335358051343873e-05, "loss": 2.0988, "step": 3898 }, { "epoch": 0.12579676595195508, "grad_norm": 0.5078125, "learning_rate": 2.9334894707299616e-05, "loss": 2.118, "step": 3899 }, { "epoch": 0.12582902980575145, "grad_norm": 0.54296875, "learning_rate": 2.9334431205467123e-05, "loss": 2.1069, "step": 3900 }, { "epoch": 0.1258612936595478, "grad_norm": 0.51953125, "learning_rate": 2.9333967545851494e-05, "loss": 2.085, "step": 3901 }, { "epoch": 0.12589355751334413, "grad_norm": 0.3828125, "learning_rate": 2.9333503728457833e-05, "loss": 2.1219, "step": 3902 }, { "epoch": 0.12592582136714048, "grad_norm": 0.5625, "learning_rate": 2.933303975329125e-05, "loss": 2.1271, "step": 3903 }, { "epoch": 0.12595808522093682, "grad_norm": 0.671875, "learning_rate": 2.9332575620356843e-05, "loss": 2.1376, "step": 3904 }, { "epoch": 0.12599034907473317, "grad_norm": 0.703125, "learning_rate": 2.9332111329659724e-05, "loss": 2.1069, "step": 3905 }, { "epoch": 0.1260226129285295, "grad_norm": 0.671875, "learning_rate": 2.933164688120501e-05, "loss": 2.1237, "step": 3906 }, { "epoch": 0.12605487678232585, "grad_norm": 0.46875, "learning_rate": 2.933118227499781e-05, "loss": 2.1148, "step": 3907 }, { "epoch": 0.1260871406361222, "grad_norm": 0.625, "learning_rate": 2.9330717511043234e-05, "loss": 2.1056, "step": 3908 }, { "epoch": 0.12611940448991854, "grad_norm": 0.57421875, "learning_rate": 2.93302525893464e-05, "loss": 2.1147, "step": 3909 }, { "epoch": 0.1261516683437149, "grad_norm": 0.439453125, "learning_rate": 2.9329787509912424e-05, "loss": 2.1352, "step": 3910 }, { "epoch": 0.12618393219751126, "grad_norm": 0.5625, "learning_rate": 2.932932227274643e-05, "loss": 2.1395, "step": 3911 }, { "epoch": 0.1262161960513076, "grad_norm": 0.53515625, "learning_rate": 2.9328856877853538e-05, "loss": 2.1044, "step": 3912 }, { "epoch": 0.12624845990510394, "grad_norm": 0.484375, "learning_rate": 2.9328391325238863e-05, "loss": 2.1485, "step": 3913 }, { "epoch": 0.1262807237589003, "grad_norm": 0.494140625, "learning_rate": 2.932792561490754e-05, "loss": 2.1403, "step": 3914 }, { "epoch": 0.12631298761269663, "grad_norm": 0.48828125, "learning_rate": 2.9327459746864692e-05, "loss": 2.1383, "step": 3915 }, { "epoch": 0.12634525146649297, "grad_norm": 0.4296875, "learning_rate": 2.932699372111544e-05, "loss": 2.1229, "step": 3916 }, { "epoch": 0.12637751532028932, "grad_norm": 0.458984375, "learning_rate": 2.9326527537664926e-05, "loss": 2.1196, "step": 3917 }, { "epoch": 0.12640977917408566, "grad_norm": 0.423828125, "learning_rate": 2.9326061196518268e-05, "loss": 2.1091, "step": 3918 }, { "epoch": 0.126442043027882, "grad_norm": 0.43359375, "learning_rate": 2.9325594697680608e-05, "loss": 2.1204, "step": 3919 }, { "epoch": 0.12647430688167838, "grad_norm": 0.4296875, "learning_rate": 2.932512804115708e-05, "loss": 2.0932, "step": 3920 }, { "epoch": 0.12650657073547472, "grad_norm": 0.44140625, "learning_rate": 2.932466122695282e-05, "loss": 2.1472, "step": 3921 }, { "epoch": 0.12653883458927107, "grad_norm": 0.462890625, "learning_rate": 2.932419425507297e-05, "loss": 2.1044, "step": 3922 }, { "epoch": 0.1265710984430674, "grad_norm": 0.412109375, "learning_rate": 2.932372712552266e-05, "loss": 2.1167, "step": 3923 }, { "epoch": 0.12660336229686375, "grad_norm": 0.423828125, "learning_rate": 2.9323259838307036e-05, "loss": 2.1222, "step": 3924 }, { "epoch": 0.1266356261506601, "grad_norm": 0.419921875, "learning_rate": 2.9322792393431247e-05, "loss": 2.1389, "step": 3925 }, { "epoch": 0.12666789000445644, "grad_norm": 0.404296875, "learning_rate": 2.932232479090043e-05, "loss": 2.1192, "step": 3926 }, { "epoch": 0.12670015385825278, "grad_norm": 0.455078125, "learning_rate": 2.932185703071974e-05, "loss": 2.1183, "step": 3927 }, { "epoch": 0.12673241771204913, "grad_norm": 0.462890625, "learning_rate": 2.932138911289432e-05, "loss": 2.1279, "step": 3928 }, { "epoch": 0.12676468156584547, "grad_norm": 0.39453125, "learning_rate": 2.9320921037429325e-05, "loss": 2.1196, "step": 3929 }, { "epoch": 0.12679694541964184, "grad_norm": 0.458984375, "learning_rate": 2.9320452804329907e-05, "loss": 2.1406, "step": 3930 }, { "epoch": 0.1268292092734382, "grad_norm": 0.466796875, "learning_rate": 2.931998441360121e-05, "loss": 2.1369, "step": 3931 }, { "epoch": 0.12686147312723453, "grad_norm": 0.435546875, "learning_rate": 2.9319515865248408e-05, "loss": 2.1243, "step": 3932 }, { "epoch": 0.12689373698103087, "grad_norm": 0.47265625, "learning_rate": 2.9319047159276638e-05, "loss": 2.1418, "step": 3933 }, { "epoch": 0.12692600083482722, "grad_norm": 0.45703125, "learning_rate": 2.9318578295691078e-05, "loss": 2.0972, "step": 3934 }, { "epoch": 0.12695826468862356, "grad_norm": 0.54296875, "learning_rate": 2.9318109274496877e-05, "loss": 2.129, "step": 3935 }, { "epoch": 0.1269905285424199, "grad_norm": 0.474609375, "learning_rate": 2.93176400956992e-05, "loss": 2.0513, "step": 3936 }, { "epoch": 0.12702279239621625, "grad_norm": 0.421875, "learning_rate": 2.9317170759303213e-05, "loss": 2.1427, "step": 3937 }, { "epoch": 0.1270550562500126, "grad_norm": 0.50390625, "learning_rate": 2.931670126531408e-05, "loss": 2.1332, "step": 3938 }, { "epoch": 0.12708732010380894, "grad_norm": 0.42578125, "learning_rate": 2.9316231613736974e-05, "loss": 2.1128, "step": 3939 }, { "epoch": 0.1271195839576053, "grad_norm": 0.466796875, "learning_rate": 2.931576180457706e-05, "loss": 2.1325, "step": 3940 }, { "epoch": 0.12715184781140165, "grad_norm": 0.5078125, "learning_rate": 2.9315291837839505e-05, "loss": 2.162, "step": 3941 }, { "epoch": 0.127184111665198, "grad_norm": 0.453125, "learning_rate": 2.931482171352949e-05, "loss": 2.1364, "step": 3942 }, { "epoch": 0.12721637551899434, "grad_norm": 0.42578125, "learning_rate": 2.931435143165219e-05, "loss": 2.1411, "step": 3943 }, { "epoch": 0.12724863937279068, "grad_norm": 0.466796875, "learning_rate": 2.931388099221277e-05, "loss": 2.1388, "step": 3944 }, { "epoch": 0.12728090322658703, "grad_norm": 0.412109375, "learning_rate": 2.9313410395216426e-05, "loss": 2.1036, "step": 3945 }, { "epoch": 0.12731316708038337, "grad_norm": 0.41796875, "learning_rate": 2.9312939640668323e-05, "loss": 2.1244, "step": 3946 }, { "epoch": 0.12734543093417972, "grad_norm": 0.40625, "learning_rate": 2.9312468728573655e-05, "loss": 2.1347, "step": 3947 }, { "epoch": 0.12737769478797606, "grad_norm": 0.412109375, "learning_rate": 2.9311997658937594e-05, "loss": 2.1099, "step": 3948 }, { "epoch": 0.1274099586417724, "grad_norm": 0.392578125, "learning_rate": 2.931152643176533e-05, "loss": 2.0963, "step": 3949 }, { "epoch": 0.12744222249556877, "grad_norm": 0.48828125, "learning_rate": 2.9311055047062053e-05, "loss": 2.0936, "step": 3950 }, { "epoch": 0.12747448634936512, "grad_norm": 0.392578125, "learning_rate": 2.9310583504832948e-05, "loss": 2.1164, "step": 3951 }, { "epoch": 0.12750675020316146, "grad_norm": 0.408203125, "learning_rate": 2.9310111805083203e-05, "loss": 2.1102, "step": 3952 }, { "epoch": 0.1275390140569578, "grad_norm": 0.376953125, "learning_rate": 2.9309639947818014e-05, "loss": 2.1012, "step": 3953 }, { "epoch": 0.12757127791075415, "grad_norm": 0.416015625, "learning_rate": 2.9309167933042577e-05, "loss": 2.1262, "step": 3954 }, { "epoch": 0.1276035417645505, "grad_norm": 0.4453125, "learning_rate": 2.9308695760762085e-05, "loss": 2.082, "step": 3955 }, { "epoch": 0.12763580561834684, "grad_norm": 0.54296875, "learning_rate": 2.9308223430981732e-05, "loss": 2.1473, "step": 3956 }, { "epoch": 0.12766806947214318, "grad_norm": 0.6953125, "learning_rate": 2.9307750943706722e-05, "loss": 2.1277, "step": 3957 }, { "epoch": 0.12770033332593952, "grad_norm": 0.859375, "learning_rate": 2.9307278298942253e-05, "loss": 2.1159, "step": 3958 }, { "epoch": 0.12773259717973587, "grad_norm": 0.78125, "learning_rate": 2.930680549669353e-05, "loss": 2.1299, "step": 3959 }, { "epoch": 0.12776486103353224, "grad_norm": 0.419921875, "learning_rate": 2.9306332536965754e-05, "loss": 2.1494, "step": 3960 }, { "epoch": 0.12779712488732858, "grad_norm": 0.67578125, "learning_rate": 2.9305859419764132e-05, "loss": 2.0788, "step": 3961 }, { "epoch": 0.12782938874112493, "grad_norm": 0.51953125, "learning_rate": 2.9305386145093878e-05, "loss": 2.1377, "step": 3962 }, { "epoch": 0.12786165259492127, "grad_norm": 0.50390625, "learning_rate": 2.930491271296019e-05, "loss": 2.1184, "step": 3963 }, { "epoch": 0.12789391644871761, "grad_norm": 0.6015625, "learning_rate": 2.9304439123368286e-05, "loss": 2.1233, "step": 3964 }, { "epoch": 0.12792618030251396, "grad_norm": 0.416015625, "learning_rate": 2.930396537632338e-05, "loss": 2.0982, "step": 3965 }, { "epoch": 0.1279584441563103, "grad_norm": 0.65625, "learning_rate": 2.9303491471830684e-05, "loss": 2.1196, "step": 3966 }, { "epoch": 0.12799070801010665, "grad_norm": 0.37890625, "learning_rate": 2.9303017409895415e-05, "loss": 2.1344, "step": 3967 }, { "epoch": 0.128022971863903, "grad_norm": 0.5078125, "learning_rate": 2.9302543190522793e-05, "loss": 2.1269, "step": 3968 }, { "epoch": 0.12805523571769933, "grad_norm": 0.43359375, "learning_rate": 2.9302068813718035e-05, "loss": 2.1353, "step": 3969 }, { "epoch": 0.1280874995714957, "grad_norm": 0.478515625, "learning_rate": 2.930159427948636e-05, "loss": 2.1359, "step": 3970 }, { "epoch": 0.12811976342529205, "grad_norm": 0.3828125, "learning_rate": 2.9301119587833e-05, "loss": 2.151, "step": 3971 }, { "epoch": 0.1281520272790884, "grad_norm": 0.447265625, "learning_rate": 2.9300644738763172e-05, "loss": 2.133, "step": 3972 }, { "epoch": 0.12818429113288474, "grad_norm": 0.44140625, "learning_rate": 2.9300169732282107e-05, "loss": 2.0924, "step": 3973 }, { "epoch": 0.12821655498668108, "grad_norm": 0.384765625, "learning_rate": 2.9299694568395036e-05, "loss": 2.1234, "step": 3974 }, { "epoch": 0.12824881884047742, "grad_norm": 0.3984375, "learning_rate": 2.929921924710718e-05, "loss": 2.0888, "step": 3975 }, { "epoch": 0.12828108269427377, "grad_norm": 0.447265625, "learning_rate": 2.9298743768423782e-05, "loss": 2.0336, "step": 3976 }, { "epoch": 0.1283133465480701, "grad_norm": 0.37109375, "learning_rate": 2.9298268132350066e-05, "loss": 2.0437, "step": 3977 }, { "epoch": 0.12834561040186646, "grad_norm": 0.388671875, "learning_rate": 2.9297792338891276e-05, "loss": 2.0699, "step": 3978 }, { "epoch": 0.1283778742556628, "grad_norm": 0.3828125, "learning_rate": 2.9297316388052643e-05, "loss": 2.068, "step": 3979 }, { "epoch": 0.12841013810945914, "grad_norm": 0.404296875, "learning_rate": 2.9296840279839408e-05, "loss": 2.0991, "step": 3980 }, { "epoch": 0.12844240196325551, "grad_norm": 0.421875, "learning_rate": 2.929636401425681e-05, "loss": 2.1132, "step": 3981 }, { "epoch": 0.12847466581705186, "grad_norm": 0.404296875, "learning_rate": 2.9295887591310095e-05, "loss": 2.1305, "step": 3982 }, { "epoch": 0.1285069296708482, "grad_norm": 0.443359375, "learning_rate": 2.9295411011004504e-05, "loss": 2.1289, "step": 3983 }, { "epoch": 0.12853919352464455, "grad_norm": 0.396484375, "learning_rate": 2.9294934273345287e-05, "loss": 2.1125, "step": 3984 }, { "epoch": 0.1285714573784409, "grad_norm": 0.484375, "learning_rate": 2.9294457378337686e-05, "loss": 2.1487, "step": 3985 }, { "epoch": 0.12860372123223723, "grad_norm": 0.474609375, "learning_rate": 2.929398032598695e-05, "loss": 2.1131, "step": 3986 }, { "epoch": 0.12863598508603358, "grad_norm": 0.412109375, "learning_rate": 2.929350311629834e-05, "loss": 2.1107, "step": 3987 }, { "epoch": 0.12866824893982992, "grad_norm": 0.38671875, "learning_rate": 2.92930257492771e-05, "loss": 2.1081, "step": 3988 }, { "epoch": 0.12870051279362626, "grad_norm": 0.384765625, "learning_rate": 2.9292548224928487e-05, "loss": 2.1207, "step": 3989 }, { "epoch": 0.1287327766474226, "grad_norm": 0.416015625, "learning_rate": 2.9292070543257755e-05, "loss": 2.1366, "step": 3990 }, { "epoch": 0.12876504050121898, "grad_norm": 0.40625, "learning_rate": 2.929159270427016e-05, "loss": 2.1303, "step": 3991 }, { "epoch": 0.12879730435501532, "grad_norm": 0.435546875, "learning_rate": 2.9291114707970967e-05, "loss": 2.1216, "step": 3992 }, { "epoch": 0.12882956820881167, "grad_norm": 0.439453125, "learning_rate": 2.9290636554365438e-05, "loss": 2.1372, "step": 3993 }, { "epoch": 0.128861832062608, "grad_norm": 0.3984375, "learning_rate": 2.9290158243458836e-05, "loss": 2.1205, "step": 3994 }, { "epoch": 0.12889409591640436, "grad_norm": 0.416015625, "learning_rate": 2.9289679775256416e-05, "loss": 2.1484, "step": 3995 }, { "epoch": 0.1289263597702007, "grad_norm": 0.44140625, "learning_rate": 2.928920114976346e-05, "loss": 2.0987, "step": 3996 }, { "epoch": 0.12895862362399704, "grad_norm": 0.41796875, "learning_rate": 2.9288722366985223e-05, "loss": 2.1428, "step": 3997 }, { "epoch": 0.1289908874777934, "grad_norm": 0.443359375, "learning_rate": 2.9288243426926984e-05, "loss": 2.1371, "step": 3998 }, { "epoch": 0.12902315133158973, "grad_norm": 0.478515625, "learning_rate": 2.9287764329594017e-05, "loss": 2.1314, "step": 3999 }, { "epoch": 0.12905541518538607, "grad_norm": 0.5078125, "learning_rate": 2.928728507499158e-05, "loss": 2.128, "step": 4000 }, { "epoch": 0.12908767903918245, "grad_norm": 0.44140625, "learning_rate": 2.9286805663124963e-05, "loss": 2.1172, "step": 4001 }, { "epoch": 0.1291199428929788, "grad_norm": 0.42578125, "learning_rate": 2.928632609399944e-05, "loss": 2.1271, "step": 4002 }, { "epoch": 0.12915220674677513, "grad_norm": 0.462890625, "learning_rate": 2.9285846367620288e-05, "loss": 2.1396, "step": 4003 }, { "epoch": 0.12918447060057148, "grad_norm": 0.451171875, "learning_rate": 2.9285366483992786e-05, "loss": 2.1405, "step": 4004 }, { "epoch": 0.12921673445436782, "grad_norm": 0.49609375, "learning_rate": 2.928488644312222e-05, "loss": 2.1398, "step": 4005 }, { "epoch": 0.12924899830816416, "grad_norm": 0.4453125, "learning_rate": 2.928440624501387e-05, "loss": 2.1182, "step": 4006 }, { "epoch": 0.1292812621619605, "grad_norm": 0.419921875, "learning_rate": 2.9283925889673024e-05, "loss": 2.1138, "step": 4007 }, { "epoch": 0.12931352601575685, "grad_norm": 0.46484375, "learning_rate": 2.9283445377104968e-05, "loss": 2.1233, "step": 4008 }, { "epoch": 0.1293457898695532, "grad_norm": 0.470703125, "learning_rate": 2.9282964707314997e-05, "loss": 2.119, "step": 4009 }, { "epoch": 0.12937805372334954, "grad_norm": 0.404296875, "learning_rate": 2.9282483880308396e-05, "loss": 2.1079, "step": 4010 }, { "epoch": 0.1294103175771459, "grad_norm": 0.431640625, "learning_rate": 2.9282002896090453e-05, "loss": 2.0802, "step": 4011 }, { "epoch": 0.12944258143094226, "grad_norm": 0.43359375, "learning_rate": 2.9281521754666472e-05, "loss": 2.1144, "step": 4012 }, { "epoch": 0.1294748452847386, "grad_norm": 0.439453125, "learning_rate": 2.928104045604174e-05, "loss": 2.118, "step": 4013 }, { "epoch": 0.12950710913853494, "grad_norm": 0.41796875, "learning_rate": 2.9280559000221567e-05, "loss": 2.1287, "step": 4014 }, { "epoch": 0.1295393729923313, "grad_norm": 0.41796875, "learning_rate": 2.9280077387211247e-05, "loss": 2.129, "step": 4015 }, { "epoch": 0.12957163684612763, "grad_norm": 0.41796875, "learning_rate": 2.9279595617016075e-05, "loss": 2.0972, "step": 4016 }, { "epoch": 0.12960390069992397, "grad_norm": 0.4140625, "learning_rate": 2.9279113689641355e-05, "loss": 2.1382, "step": 4017 }, { "epoch": 0.12963616455372032, "grad_norm": 0.43359375, "learning_rate": 2.92786316050924e-05, "loss": 2.1314, "step": 4018 }, { "epoch": 0.12966842840751666, "grad_norm": 0.380859375, "learning_rate": 2.9278149363374513e-05, "loss": 2.1176, "step": 4019 }, { "epoch": 0.129700692261313, "grad_norm": 0.412109375, "learning_rate": 2.9277666964492998e-05, "loss": 2.1149, "step": 4020 }, { "epoch": 0.12973295611510938, "grad_norm": 0.435546875, "learning_rate": 2.927718440845317e-05, "loss": 2.1277, "step": 4021 }, { "epoch": 0.12976521996890572, "grad_norm": 0.435546875, "learning_rate": 2.9276701695260337e-05, "loss": 2.1278, "step": 4022 }, { "epoch": 0.12979748382270206, "grad_norm": 0.458984375, "learning_rate": 2.9276218824919812e-05, "loss": 2.1069, "step": 4023 }, { "epoch": 0.1298297476764984, "grad_norm": 0.58203125, "learning_rate": 2.9275735797436912e-05, "loss": 2.1433, "step": 4024 }, { "epoch": 0.12986201153029475, "grad_norm": 0.73046875, "learning_rate": 2.927525261281696e-05, "loss": 2.1395, "step": 4025 }, { "epoch": 0.1298942753840911, "grad_norm": 0.9609375, "learning_rate": 2.9274769271065263e-05, "loss": 2.1464, "step": 4026 }, { "epoch": 0.12992653923788744, "grad_norm": 0.875, "learning_rate": 2.927428577218715e-05, "loss": 2.1219, "step": 4027 }, { "epoch": 0.12995880309168378, "grad_norm": 0.447265625, "learning_rate": 2.9273802116187933e-05, "loss": 2.1269, "step": 4028 }, { "epoch": 0.12999106694548013, "grad_norm": 0.71484375, "learning_rate": 2.9273318303072948e-05, "loss": 2.1298, "step": 4029 }, { "epoch": 0.13002333079927647, "grad_norm": 0.515625, "learning_rate": 2.927283433284751e-05, "loss": 2.1109, "step": 4030 }, { "epoch": 0.13005559465307284, "grad_norm": 0.52734375, "learning_rate": 2.9272350205516954e-05, "loss": 2.115, "step": 4031 }, { "epoch": 0.1300878585068692, "grad_norm": 0.546875, "learning_rate": 2.927186592108661e-05, "loss": 2.1176, "step": 4032 }, { "epoch": 0.13012012236066553, "grad_norm": 0.453125, "learning_rate": 2.9271381479561798e-05, "loss": 2.1101, "step": 4033 }, { "epoch": 0.13015238621446187, "grad_norm": 0.5390625, "learning_rate": 2.927089688094786e-05, "loss": 2.128, "step": 4034 }, { "epoch": 0.13018465006825822, "grad_norm": 0.4296875, "learning_rate": 2.9270412125250125e-05, "loss": 2.1456, "step": 4035 }, { "epoch": 0.13021691392205456, "grad_norm": 0.52734375, "learning_rate": 2.9269927212473932e-05, "loss": 2.1502, "step": 4036 }, { "epoch": 0.1302491777758509, "grad_norm": 0.416015625, "learning_rate": 2.9269442142624617e-05, "loss": 2.132, "step": 4037 }, { "epoch": 0.13028144162964725, "grad_norm": 0.447265625, "learning_rate": 2.9268956915707523e-05, "loss": 2.1042, "step": 4038 }, { "epoch": 0.1303137054834436, "grad_norm": 0.41015625, "learning_rate": 2.9268471531727985e-05, "loss": 2.1111, "step": 4039 }, { "epoch": 0.13034596933723994, "grad_norm": 0.40625, "learning_rate": 2.926798599069135e-05, "loss": 2.1082, "step": 4040 }, { "epoch": 0.1303782331910363, "grad_norm": 0.40625, "learning_rate": 2.9267500292602955e-05, "loss": 2.105, "step": 4041 }, { "epoch": 0.13041049704483265, "grad_norm": 0.40625, "learning_rate": 2.9267014437468154e-05, "loss": 2.1369, "step": 4042 }, { "epoch": 0.130442760898629, "grad_norm": 0.40234375, "learning_rate": 2.9266528425292297e-05, "loss": 2.1294, "step": 4043 }, { "epoch": 0.13047502475242534, "grad_norm": 0.388671875, "learning_rate": 2.926604225608073e-05, "loss": 2.1104, "step": 4044 }, { "epoch": 0.13050728860622168, "grad_norm": 0.419921875, "learning_rate": 2.9265555929838807e-05, "loss": 2.1321, "step": 4045 }, { "epoch": 0.13053955246001803, "grad_norm": 0.412109375, "learning_rate": 2.9265069446571874e-05, "loss": 2.0953, "step": 4046 }, { "epoch": 0.13057181631381437, "grad_norm": 0.39453125, "learning_rate": 2.9264582806285287e-05, "loss": 2.047, "step": 4047 }, { "epoch": 0.13060408016761071, "grad_norm": 0.447265625, "learning_rate": 2.926409600898441e-05, "loss": 2.1395, "step": 4048 }, { "epoch": 0.13063634402140706, "grad_norm": 0.404296875, "learning_rate": 2.9263609054674598e-05, "loss": 2.1, "step": 4049 }, { "epoch": 0.1306686078752034, "grad_norm": 0.427734375, "learning_rate": 2.9263121943361212e-05, "loss": 2.1157, "step": 4050 }, { "epoch": 0.13070087172899977, "grad_norm": 0.435546875, "learning_rate": 2.926263467504961e-05, "loss": 2.1333, "step": 4051 }, { "epoch": 0.13073313558279612, "grad_norm": 0.39453125, "learning_rate": 2.926214724974515e-05, "loss": 2.0978, "step": 4052 }, { "epoch": 0.13076539943659246, "grad_norm": 0.4375, "learning_rate": 2.9261659667453213e-05, "loss": 2.135, "step": 4053 }, { "epoch": 0.1307976632903888, "grad_norm": 0.45703125, "learning_rate": 2.9261171928179155e-05, "loss": 2.1324, "step": 4054 }, { "epoch": 0.13082992714418515, "grad_norm": 0.40234375, "learning_rate": 2.926068403192835e-05, "loss": 2.1249, "step": 4055 }, { "epoch": 0.1308621909979815, "grad_norm": 0.4765625, "learning_rate": 2.9260195978706165e-05, "loss": 2.1095, "step": 4056 }, { "epoch": 0.13089445485177784, "grad_norm": 0.455078125, "learning_rate": 2.925970776851797e-05, "loss": 2.1024, "step": 4057 }, { "epoch": 0.13092671870557418, "grad_norm": 0.419921875, "learning_rate": 2.9259219401369145e-05, "loss": 2.1373, "step": 4058 }, { "epoch": 0.13095898255937052, "grad_norm": 0.41015625, "learning_rate": 2.9258730877265058e-05, "loss": 2.1184, "step": 4059 }, { "epoch": 0.13099124641316687, "grad_norm": 0.44140625, "learning_rate": 2.9258242196211095e-05, "loss": 2.0522, "step": 4060 }, { "epoch": 0.13102351026696324, "grad_norm": 0.435546875, "learning_rate": 2.925775335821263e-05, "loss": 2.0557, "step": 4061 }, { "epoch": 0.13105577412075958, "grad_norm": 0.38671875, "learning_rate": 2.9257264363275043e-05, "loss": 2.0513, "step": 4062 }, { "epoch": 0.13108803797455593, "grad_norm": 0.41796875, "learning_rate": 2.925677521140372e-05, "loss": 2.066, "step": 4063 }, { "epoch": 0.13112030182835227, "grad_norm": 0.392578125, "learning_rate": 2.925628590260404e-05, "loss": 2.0459, "step": 4064 }, { "epoch": 0.13115256568214861, "grad_norm": 0.416015625, "learning_rate": 2.9255796436881394e-05, "loss": 2.0845, "step": 4065 }, { "epoch": 0.13118482953594496, "grad_norm": 0.4453125, "learning_rate": 2.925530681424117e-05, "loss": 2.0801, "step": 4066 }, { "epoch": 0.1312170933897413, "grad_norm": 0.4140625, "learning_rate": 2.9254817034688753e-05, "loss": 2.0731, "step": 4067 }, { "epoch": 0.13124935724353765, "grad_norm": 0.4375, "learning_rate": 2.925432709822954e-05, "loss": 2.0381, "step": 4068 }, { "epoch": 0.131281621097334, "grad_norm": 0.453125, "learning_rate": 2.9253837004868914e-05, "loss": 2.0659, "step": 4069 }, { "epoch": 0.13131388495113033, "grad_norm": 0.400390625, "learning_rate": 2.925334675461228e-05, "loss": 2.054, "step": 4070 }, { "epoch": 0.13134614880492668, "grad_norm": 0.443359375, "learning_rate": 2.925285634746503e-05, "loss": 2.0656, "step": 4071 }, { "epoch": 0.13137841265872305, "grad_norm": 0.443359375, "learning_rate": 2.925236578343256e-05, "loss": 2.0316, "step": 4072 }, { "epoch": 0.1314106765125194, "grad_norm": 0.38671875, "learning_rate": 2.9251875062520276e-05, "loss": 2.0227, "step": 4073 }, { "epoch": 0.13144294036631574, "grad_norm": 0.42578125, "learning_rate": 2.9251384184733568e-05, "loss": 2.0415, "step": 4074 }, { "epoch": 0.13147520422011208, "grad_norm": 0.41796875, "learning_rate": 2.9250893150077856e-05, "loss": 2.0367, "step": 4075 }, { "epoch": 0.13150746807390842, "grad_norm": 0.376953125, "learning_rate": 2.9250401958558533e-05, "loss": 2.0292, "step": 4076 }, { "epoch": 0.13153973192770477, "grad_norm": 0.39453125, "learning_rate": 2.9249910610181004e-05, "loss": 2.0335, "step": 4077 }, { "epoch": 0.1315719957815011, "grad_norm": 0.443359375, "learning_rate": 2.924941910495069e-05, "loss": 2.0742, "step": 4078 }, { "epoch": 0.13160425963529745, "grad_norm": 0.458984375, "learning_rate": 2.9248927442872983e-05, "loss": 2.0429, "step": 4079 }, { "epoch": 0.1316365234890938, "grad_norm": 0.40234375, "learning_rate": 2.9248435623953314e-05, "loss": 2.0517, "step": 4080 }, { "epoch": 0.13166878734289014, "grad_norm": 0.419921875, "learning_rate": 2.9247943648197078e-05, "loss": 2.0792, "step": 4081 }, { "epoch": 0.1317010511966865, "grad_norm": 0.484375, "learning_rate": 2.9247451515609707e-05, "loss": 2.0418, "step": 4082 }, { "epoch": 0.13173331505048286, "grad_norm": 0.470703125, "learning_rate": 2.924695922619661e-05, "loss": 2.0409, "step": 4083 }, { "epoch": 0.1317655789042792, "grad_norm": 0.4296875, "learning_rate": 2.9246466779963207e-05, "loss": 2.0649, "step": 4084 }, { "epoch": 0.13179784275807555, "grad_norm": 0.462890625, "learning_rate": 2.9245974176914913e-05, "loss": 2.0641, "step": 4085 }, { "epoch": 0.1318301066118719, "grad_norm": 0.404296875, "learning_rate": 2.9245481417057164e-05, "loss": 2.0821, "step": 4086 }, { "epoch": 0.13186237046566823, "grad_norm": 0.4140625, "learning_rate": 2.924498850039537e-05, "loss": 2.0394, "step": 4087 }, { "epoch": 0.13189463431946458, "grad_norm": 0.3984375, "learning_rate": 2.924449542693496e-05, "loss": 2.0567, "step": 4088 }, { "epoch": 0.13192689817326092, "grad_norm": 0.431640625, "learning_rate": 2.9244002196681364e-05, "loss": 2.0394, "step": 4089 }, { "epoch": 0.13195916202705726, "grad_norm": 0.44140625, "learning_rate": 2.924350880964001e-05, "loss": 2.0486, "step": 4090 }, { "epoch": 0.1319914258808536, "grad_norm": 0.419921875, "learning_rate": 2.9243015265816332e-05, "loss": 2.0671, "step": 4091 }, { "epoch": 0.13202368973464998, "grad_norm": 0.447265625, "learning_rate": 2.9242521565215762e-05, "loss": 2.0688, "step": 4092 }, { "epoch": 0.13205595358844632, "grad_norm": 0.494140625, "learning_rate": 2.9242027707843722e-05, "loss": 2.0794, "step": 4093 }, { "epoch": 0.13208821744224267, "grad_norm": 0.58203125, "learning_rate": 2.9241533693705667e-05, "loss": 2.0576, "step": 4094 }, { "epoch": 0.132120481296039, "grad_norm": 0.7109375, "learning_rate": 2.9241039522807024e-05, "loss": 2.0459, "step": 4095 }, { "epoch": 0.13215274514983535, "grad_norm": 0.703125, "learning_rate": 2.9240545195153235e-05, "loss": 2.0266, "step": 4096 }, { "epoch": 0.1321850090036317, "grad_norm": 0.50390625, "learning_rate": 2.9240050710749742e-05, "loss": 2.062, "step": 4097 }, { "epoch": 0.13221727285742804, "grad_norm": 0.4765625, "learning_rate": 2.923955606960198e-05, "loss": 2.0687, "step": 4098 }, { "epoch": 0.13224953671122439, "grad_norm": 0.6015625, "learning_rate": 2.923906127171541e-05, "loss": 2.0733, "step": 4099 }, { "epoch": 0.13228180056502073, "grad_norm": 0.455078125, "learning_rate": 2.9238566317095463e-05, "loss": 2.0066, "step": 4100 }, { "epoch": 0.13231406441881707, "grad_norm": 0.466796875, "learning_rate": 2.923807120574759e-05, "loss": 2.047, "step": 4101 }, { "epoch": 0.13234632827261344, "grad_norm": 0.578125, "learning_rate": 2.9237575937677247e-05, "loss": 2.0922, "step": 4102 }, { "epoch": 0.1323785921264098, "grad_norm": 0.419921875, "learning_rate": 2.9237080512889885e-05, "loss": 2.0601, "step": 4103 }, { "epoch": 0.13241085598020613, "grad_norm": 0.48828125, "learning_rate": 2.9236584931390947e-05, "loss": 2.0343, "step": 4104 }, { "epoch": 0.13244311983400248, "grad_norm": 0.4375, "learning_rate": 2.92360891931859e-05, "loss": 2.0643, "step": 4105 }, { "epoch": 0.13247538368779882, "grad_norm": 0.423828125, "learning_rate": 2.9235593298280195e-05, "loss": 2.0801, "step": 4106 }, { "epoch": 0.13250764754159516, "grad_norm": 0.51171875, "learning_rate": 2.9235097246679294e-05, "loss": 2.074, "step": 4107 }, { "epoch": 0.1325399113953915, "grad_norm": 0.376953125, "learning_rate": 2.923460103838865e-05, "loss": 2.0723, "step": 4108 }, { "epoch": 0.13257217524918785, "grad_norm": 0.462890625, "learning_rate": 2.9234104673413734e-05, "loss": 2.0471, "step": 4109 }, { "epoch": 0.1326044391029842, "grad_norm": 0.41015625, "learning_rate": 2.9233608151760002e-05, "loss": 2.0377, "step": 4110 }, { "epoch": 0.13263670295678054, "grad_norm": 0.40625, "learning_rate": 2.9233111473432928e-05, "loss": 2.0417, "step": 4111 }, { "epoch": 0.1326689668105769, "grad_norm": 0.482421875, "learning_rate": 2.923261463843797e-05, "loss": 2.0482, "step": 4112 }, { "epoch": 0.13270123066437325, "grad_norm": 0.419921875, "learning_rate": 2.92321176467806e-05, "loss": 2.0303, "step": 4113 }, { "epoch": 0.1327334945181696, "grad_norm": 0.388671875, "learning_rate": 2.9231620498466287e-05, "loss": 2.048, "step": 4114 }, { "epoch": 0.13276575837196594, "grad_norm": 0.466796875, "learning_rate": 2.923112319350051e-05, "loss": 2.0199, "step": 4115 }, { "epoch": 0.13279802222576229, "grad_norm": 0.421875, "learning_rate": 2.9230625731888734e-05, "loss": 2.0662, "step": 4116 }, { "epoch": 0.13283028607955863, "grad_norm": 0.404296875, "learning_rate": 2.9230128113636442e-05, "loss": 2.0498, "step": 4117 }, { "epoch": 0.13286254993335497, "grad_norm": 0.482421875, "learning_rate": 2.922963033874911e-05, "loss": 2.0227, "step": 4118 }, { "epoch": 0.13289481378715132, "grad_norm": 0.42578125, "learning_rate": 2.922913240723221e-05, "loss": 2.0754, "step": 4119 }, { "epoch": 0.13292707764094766, "grad_norm": 0.4140625, "learning_rate": 2.9228634319091233e-05, "loss": 2.0434, "step": 4120 }, { "epoch": 0.132959341494744, "grad_norm": 0.515625, "learning_rate": 2.9228136074331655e-05, "loss": 2.0308, "step": 4121 }, { "epoch": 0.13299160534854038, "grad_norm": 0.447265625, "learning_rate": 2.922763767295896e-05, "loss": 2.0713, "step": 4122 }, { "epoch": 0.13302386920233672, "grad_norm": 0.404296875, "learning_rate": 2.922713911497864e-05, "loss": 2.0743, "step": 4123 }, { "epoch": 0.13305613305613306, "grad_norm": 0.48828125, "learning_rate": 2.922664040039618e-05, "loss": 2.0295, "step": 4124 }, { "epoch": 0.1330883969099294, "grad_norm": 0.458984375, "learning_rate": 2.9226141529217065e-05, "loss": 2.0793, "step": 4125 }, { "epoch": 0.13312066076372575, "grad_norm": 0.490234375, "learning_rate": 2.9225642501446793e-05, "loss": 2.0581, "step": 4126 }, { "epoch": 0.1331529246175221, "grad_norm": 0.47265625, "learning_rate": 2.9225143317090846e-05, "loss": 2.0613, "step": 4127 }, { "epoch": 0.13318518847131844, "grad_norm": 0.421875, "learning_rate": 2.9224643976154733e-05, "loss": 2.0856, "step": 4128 }, { "epoch": 0.13321745232511478, "grad_norm": 0.43359375, "learning_rate": 2.9224144478643947e-05, "loss": 2.0385, "step": 4129 }, { "epoch": 0.13324971617891113, "grad_norm": 0.4765625, "learning_rate": 2.9223644824563978e-05, "loss": 2.0358, "step": 4130 }, { "epoch": 0.13328198003270747, "grad_norm": 0.443359375, "learning_rate": 2.922314501392033e-05, "loss": 2.0636, "step": 4131 }, { "epoch": 0.13331424388650384, "grad_norm": 0.453125, "learning_rate": 2.9222645046718512e-05, "loss": 2.0352, "step": 4132 }, { "epoch": 0.13334650774030019, "grad_norm": 0.478515625, "learning_rate": 2.9222144922964016e-05, "loss": 2.081, "step": 4133 }, { "epoch": 0.13337877159409653, "grad_norm": 0.4609375, "learning_rate": 2.922164464266235e-05, "loss": 2.0647, "step": 4134 }, { "epoch": 0.13341103544789287, "grad_norm": 0.431640625, "learning_rate": 2.9221144205819026e-05, "loss": 2.0639, "step": 4135 }, { "epoch": 0.13344329930168922, "grad_norm": 0.498046875, "learning_rate": 2.9220643612439543e-05, "loss": 2.0819, "step": 4136 }, { "epoch": 0.13347556315548556, "grad_norm": 0.44140625, "learning_rate": 2.922014286252942e-05, "loss": 2.0455, "step": 4137 }, { "epoch": 0.1335078270092819, "grad_norm": 0.45703125, "learning_rate": 2.921964195609417e-05, "loss": 2.0554, "step": 4138 }, { "epoch": 0.13354009086307825, "grad_norm": 0.50390625, "learning_rate": 2.9219140893139297e-05, "loss": 2.065, "step": 4139 }, { "epoch": 0.1335723547168746, "grad_norm": 0.4296875, "learning_rate": 2.9218639673670326e-05, "loss": 2.0438, "step": 4140 }, { "epoch": 0.13360461857067094, "grad_norm": 0.42578125, "learning_rate": 2.9218138297692766e-05, "loss": 2.0619, "step": 4141 }, { "epoch": 0.1336368824244673, "grad_norm": 0.484375, "learning_rate": 2.921763676521214e-05, "loss": 2.0475, "step": 4142 }, { "epoch": 0.13366914627826365, "grad_norm": 0.423828125, "learning_rate": 2.9217135076233972e-05, "loss": 2.0433, "step": 4143 }, { "epoch": 0.13370141013206, "grad_norm": 0.46875, "learning_rate": 2.9216633230763777e-05, "loss": 2.0749, "step": 4144 }, { "epoch": 0.13373367398585634, "grad_norm": 0.51171875, "learning_rate": 2.9216131228807083e-05, "loss": 2.0449, "step": 4145 }, { "epoch": 0.13376593783965268, "grad_norm": 0.44921875, "learning_rate": 2.9215629070369415e-05, "loss": 2.043, "step": 4146 }, { "epoch": 0.13379820169344903, "grad_norm": 0.458984375, "learning_rate": 2.9215126755456304e-05, "loss": 2.075, "step": 4147 }, { "epoch": 0.13383046554724537, "grad_norm": 0.44140625, "learning_rate": 2.921462428407327e-05, "loss": 2.0766, "step": 4148 }, { "epoch": 0.1338627294010417, "grad_norm": 0.416015625, "learning_rate": 2.9214121656225853e-05, "loss": 2.0468, "step": 4149 }, { "epoch": 0.13389499325483806, "grad_norm": 0.4921875, "learning_rate": 2.9213618871919582e-05, "loss": 2.0875, "step": 4150 }, { "epoch": 0.1339272571086344, "grad_norm": 0.58984375, "learning_rate": 2.9213115931159993e-05, "loss": 2.0492, "step": 4151 }, { "epoch": 0.13395952096243077, "grad_norm": 0.578125, "learning_rate": 2.9212612833952612e-05, "loss": 2.0663, "step": 4152 }, { "epoch": 0.13399178481622712, "grad_norm": 0.45703125, "learning_rate": 2.9212109580302993e-05, "loss": 2.0954, "step": 4153 }, { "epoch": 0.13402404867002346, "grad_norm": 0.51953125, "learning_rate": 2.921160617021666e-05, "loss": 2.1347, "step": 4154 }, { "epoch": 0.1340563125238198, "grad_norm": 0.58203125, "learning_rate": 2.921110260369917e-05, "loss": 2.1263, "step": 4155 }, { "epoch": 0.13408857637761615, "grad_norm": 0.58203125, "learning_rate": 2.9210598880756055e-05, "loss": 2.0904, "step": 4156 }, { "epoch": 0.1341208402314125, "grad_norm": 0.484375, "learning_rate": 2.921009500139286e-05, "loss": 2.1103, "step": 4157 }, { "epoch": 0.13415310408520884, "grad_norm": 0.546875, "learning_rate": 2.920959096561514e-05, "loss": 2.1089, "step": 4158 }, { "epoch": 0.13418536793900518, "grad_norm": 0.54296875, "learning_rate": 2.9209086773428426e-05, "loss": 2.1012, "step": 4159 }, { "epoch": 0.13421763179280152, "grad_norm": 0.46875, "learning_rate": 2.9208582424838286e-05, "loss": 2.1286, "step": 4160 }, { "epoch": 0.13424989564659787, "grad_norm": 0.44921875, "learning_rate": 2.920807791985026e-05, "loss": 2.1046, "step": 4161 }, { "epoch": 0.1342821595003942, "grad_norm": 0.47265625, "learning_rate": 2.920757325846991e-05, "loss": 2.1464, "step": 4162 }, { "epoch": 0.13431442335419058, "grad_norm": 0.44140625, "learning_rate": 2.9207068440702778e-05, "loss": 2.1589, "step": 4163 }, { "epoch": 0.13434668720798693, "grad_norm": 0.427734375, "learning_rate": 2.9206563466554436e-05, "loss": 2.1293, "step": 4164 }, { "epoch": 0.13437895106178327, "grad_norm": 0.451171875, "learning_rate": 2.9206058336030435e-05, "loss": 2.127, "step": 4165 }, { "epoch": 0.1344112149155796, "grad_norm": 0.412109375, "learning_rate": 2.920555304913633e-05, "loss": 2.1377, "step": 4166 }, { "epoch": 0.13444347876937596, "grad_norm": 0.4140625, "learning_rate": 2.920504760587769e-05, "loss": 2.1213, "step": 4167 }, { "epoch": 0.1344757426231723, "grad_norm": 0.466796875, "learning_rate": 2.9204542006260078e-05, "loss": 2.1229, "step": 4168 }, { "epoch": 0.13450800647696864, "grad_norm": 0.466796875, "learning_rate": 2.920403625028906e-05, "loss": 2.1349, "step": 4169 }, { "epoch": 0.134540270330765, "grad_norm": 0.408203125, "learning_rate": 2.9203530337970196e-05, "loss": 2.1083, "step": 4170 }, { "epoch": 0.13457253418456133, "grad_norm": 0.45703125, "learning_rate": 2.920302426930906e-05, "loss": 2.0924, "step": 4171 }, { "epoch": 0.13460479803835768, "grad_norm": 0.40625, "learning_rate": 2.9202518044311224e-05, "loss": 2.1109, "step": 4172 }, { "epoch": 0.13463706189215405, "grad_norm": 0.462890625, "learning_rate": 2.920201166298226e-05, "loss": 2.1141, "step": 4173 }, { "epoch": 0.1346693257459504, "grad_norm": 0.390625, "learning_rate": 2.920150512532774e-05, "loss": 2.1071, "step": 4174 }, { "epoch": 0.13470158959974673, "grad_norm": 0.478515625, "learning_rate": 2.9200998431353238e-05, "loss": 2.1035, "step": 4175 }, { "epoch": 0.13473385345354308, "grad_norm": 0.447265625, "learning_rate": 2.920049158106433e-05, "loss": 2.1175, "step": 4176 }, { "epoch": 0.13476611730733942, "grad_norm": 0.39453125, "learning_rate": 2.91999845744666e-05, "loss": 2.1098, "step": 4177 }, { "epoch": 0.13479838116113577, "grad_norm": 0.4375, "learning_rate": 2.919947741156563e-05, "loss": 2.1187, "step": 4178 }, { "epoch": 0.1348306450149321, "grad_norm": 0.431640625, "learning_rate": 2.9198970092366995e-05, "loss": 2.1043, "step": 4179 }, { "epoch": 0.13486290886872845, "grad_norm": 0.439453125, "learning_rate": 2.9198462616876285e-05, "loss": 2.1128, "step": 4180 }, { "epoch": 0.1348951727225248, "grad_norm": 0.42578125, "learning_rate": 2.9197954985099085e-05, "loss": 2.1169, "step": 4181 }, { "epoch": 0.13492743657632114, "grad_norm": 0.35546875, "learning_rate": 2.9197447197040983e-05, "loss": 2.1369, "step": 4182 }, { "epoch": 0.1349597004301175, "grad_norm": 0.408203125, "learning_rate": 2.9196939252707565e-05, "loss": 2.1323, "step": 4183 }, { "epoch": 0.13499196428391386, "grad_norm": 0.466796875, "learning_rate": 2.919643115210443e-05, "loss": 2.0999, "step": 4184 }, { "epoch": 0.1350242281377102, "grad_norm": 0.4765625, "learning_rate": 2.919592289523716e-05, "loss": 2.1157, "step": 4185 }, { "epoch": 0.13505649199150654, "grad_norm": 0.423828125, "learning_rate": 2.9195414482111357e-05, "loss": 2.0827, "step": 4186 }, { "epoch": 0.1350887558453029, "grad_norm": 0.39453125, "learning_rate": 2.919490591273261e-05, "loss": 2.1281, "step": 4187 }, { "epoch": 0.13512101969909923, "grad_norm": 0.3984375, "learning_rate": 2.9194397187106527e-05, "loss": 2.1245, "step": 4188 }, { "epoch": 0.13515328355289558, "grad_norm": 0.427734375, "learning_rate": 2.9193888305238703e-05, "loss": 2.1469, "step": 4189 }, { "epoch": 0.13518554740669192, "grad_norm": 0.4296875, "learning_rate": 2.919337926713474e-05, "loss": 2.1431, "step": 4190 }, { "epoch": 0.13521781126048826, "grad_norm": 0.421875, "learning_rate": 2.9192870072800236e-05, "loss": 2.1108, "step": 4191 }, { "epoch": 0.1352500751142846, "grad_norm": 0.43359375, "learning_rate": 2.919236072224081e-05, "loss": 2.1349, "step": 4192 }, { "epoch": 0.13528233896808098, "grad_norm": 0.47265625, "learning_rate": 2.9191851215462048e-05, "loss": 2.125, "step": 4193 }, { "epoch": 0.13531460282187732, "grad_norm": 0.43359375, "learning_rate": 2.9191341552469574e-05, "loss": 2.1206, "step": 4194 }, { "epoch": 0.13534686667567367, "grad_norm": 0.4140625, "learning_rate": 2.9190831733268992e-05, "loss": 2.1026, "step": 4195 }, { "epoch": 0.13537913052947, "grad_norm": 0.400390625, "learning_rate": 2.9190321757865916e-05, "loss": 2.1417, "step": 4196 }, { "epoch": 0.13541139438326635, "grad_norm": 0.435546875, "learning_rate": 2.9189811626265952e-05, "loss": 2.1224, "step": 4197 }, { "epoch": 0.1354436582370627, "grad_norm": 0.40234375, "learning_rate": 2.918930133847473e-05, "loss": 2.1066, "step": 4198 }, { "epoch": 0.13547592209085904, "grad_norm": 0.404296875, "learning_rate": 2.918879089449785e-05, "loss": 2.0817, "step": 4199 }, { "epoch": 0.13550818594465538, "grad_norm": 0.412109375, "learning_rate": 2.918828029434095e-05, "loss": 2.1237, "step": 4200 }, { "epoch": 0.13554044979845173, "grad_norm": 0.43359375, "learning_rate": 2.9187769538009626e-05, "loss": 2.1109, "step": 4201 }, { "epoch": 0.13557271365224807, "grad_norm": 0.458984375, "learning_rate": 2.9187258625509518e-05, "loss": 2.1289, "step": 4202 }, { "epoch": 0.13560497750604444, "grad_norm": 0.5625, "learning_rate": 2.918674755684625e-05, "loss": 2.1268, "step": 4203 }, { "epoch": 0.1356372413598408, "grad_norm": 0.7421875, "learning_rate": 2.918623633202544e-05, "loss": 2.1273, "step": 4204 }, { "epoch": 0.13566950521363713, "grad_norm": 0.86328125, "learning_rate": 2.9185724951052713e-05, "loss": 2.116, "step": 4205 }, { "epoch": 0.13570176906743348, "grad_norm": 0.6484375, "learning_rate": 2.918521341393371e-05, "loss": 2.1047, "step": 4206 }, { "epoch": 0.13573403292122982, "grad_norm": 0.4375, "learning_rate": 2.9184701720674044e-05, "loss": 2.1091, "step": 4207 }, { "epoch": 0.13576629677502616, "grad_norm": 0.67578125, "learning_rate": 2.9184189871279368e-05, "loss": 2.1158, "step": 4208 }, { "epoch": 0.1357985606288225, "grad_norm": 0.498046875, "learning_rate": 2.91836778657553e-05, "loss": 2.0907, "step": 4209 }, { "epoch": 0.13583082448261885, "grad_norm": 0.5078125, "learning_rate": 2.9183165704107484e-05, "loss": 2.1257, "step": 4210 }, { "epoch": 0.1358630883364152, "grad_norm": 0.54296875, "learning_rate": 2.9182653386341552e-05, "loss": 2.0945, "step": 4211 }, { "epoch": 0.13589535219021154, "grad_norm": 0.404296875, "learning_rate": 2.9182140912463148e-05, "loss": 2.105, "step": 4212 }, { "epoch": 0.1359276160440079, "grad_norm": 0.5703125, "learning_rate": 2.918162828247791e-05, "loss": 2.1202, "step": 4213 }, { "epoch": 0.13595987989780425, "grad_norm": 0.4140625, "learning_rate": 2.918111549639148e-05, "loss": 2.1323, "step": 4214 }, { "epoch": 0.1359921437516006, "grad_norm": 0.482421875, "learning_rate": 2.918060255420951e-05, "loss": 2.087, "step": 4215 }, { "epoch": 0.13602440760539694, "grad_norm": 0.416015625, "learning_rate": 2.9180089455937634e-05, "loss": 2.1287, "step": 4216 }, { "epoch": 0.13605667145919328, "grad_norm": 0.466796875, "learning_rate": 2.9179576201581514e-05, "loss": 2.1142, "step": 4217 }, { "epoch": 0.13608893531298963, "grad_norm": 0.408203125, "learning_rate": 2.9179062791146783e-05, "loss": 2.1259, "step": 4218 }, { "epoch": 0.13612119916678597, "grad_norm": 0.4296875, "learning_rate": 2.9178549224639107e-05, "loss": 2.1291, "step": 4219 }, { "epoch": 0.13615346302058232, "grad_norm": 0.408203125, "learning_rate": 2.9178035502064126e-05, "loss": 2.1196, "step": 4220 }, { "epoch": 0.13618572687437866, "grad_norm": 0.380859375, "learning_rate": 2.9177521623427505e-05, "loss": 2.1129, "step": 4221 }, { "epoch": 0.136217990728175, "grad_norm": 0.38671875, "learning_rate": 2.91770075887349e-05, "loss": 2.1267, "step": 4222 }, { "epoch": 0.13625025458197138, "grad_norm": 0.361328125, "learning_rate": 2.917649339799196e-05, "loss": 2.1194, "step": 4223 }, { "epoch": 0.13628251843576772, "grad_norm": 0.388671875, "learning_rate": 2.9175979051204357e-05, "loss": 2.09, "step": 4224 }, { "epoch": 0.13631478228956406, "grad_norm": 0.392578125, "learning_rate": 2.9175464548377743e-05, "loss": 2.1272, "step": 4225 }, { "epoch": 0.1363470461433604, "grad_norm": 0.380859375, "learning_rate": 2.9174949889517784e-05, "loss": 2.1399, "step": 4226 }, { "epoch": 0.13637930999715675, "grad_norm": 0.400390625, "learning_rate": 2.9174435074630147e-05, "loss": 2.1444, "step": 4227 }, { "epoch": 0.1364115738509531, "grad_norm": 0.37109375, "learning_rate": 2.91739201037205e-05, "loss": 2.1177, "step": 4228 }, { "epoch": 0.13644383770474944, "grad_norm": 0.396484375, "learning_rate": 2.9173404976794504e-05, "loss": 2.1265, "step": 4229 }, { "epoch": 0.13647610155854578, "grad_norm": 0.3828125, "learning_rate": 2.917288969385784e-05, "loss": 2.1476, "step": 4230 }, { "epoch": 0.13650836541234213, "grad_norm": 0.3828125, "learning_rate": 2.9172374254916166e-05, "loss": 2.1119, "step": 4231 }, { "epoch": 0.13654062926613847, "grad_norm": 0.3984375, "learning_rate": 2.9171858659975166e-05, "loss": 2.1104, "step": 4232 }, { "epoch": 0.13657289311993484, "grad_norm": 0.416015625, "learning_rate": 2.9171342909040514e-05, "loss": 2.1125, "step": 4233 }, { "epoch": 0.13660515697373118, "grad_norm": 0.41796875, "learning_rate": 2.9170827002117886e-05, "loss": 2.1586, "step": 4234 }, { "epoch": 0.13663742082752753, "grad_norm": 0.431640625, "learning_rate": 2.9170310939212963e-05, "loss": 2.1308, "step": 4235 }, { "epoch": 0.13666968468132387, "grad_norm": 0.462890625, "learning_rate": 2.9169794720331418e-05, "loss": 2.1305, "step": 4236 }, { "epoch": 0.13670194853512022, "grad_norm": 0.419921875, "learning_rate": 2.916927834547894e-05, "loss": 2.1408, "step": 4237 }, { "epoch": 0.13673421238891656, "grad_norm": 0.375, "learning_rate": 2.9168761814661212e-05, "loss": 2.1383, "step": 4238 }, { "epoch": 0.1367664762427129, "grad_norm": 0.408203125, "learning_rate": 2.9168245127883916e-05, "loss": 2.1368, "step": 4239 }, { "epoch": 0.13679874009650925, "grad_norm": 0.4140625, "learning_rate": 2.9167728285152742e-05, "loss": 2.1472, "step": 4240 }, { "epoch": 0.1368310039503056, "grad_norm": 0.400390625, "learning_rate": 2.9167211286473384e-05, "loss": 2.1477, "step": 4241 }, { "epoch": 0.13686326780410193, "grad_norm": 0.390625, "learning_rate": 2.9166694131851524e-05, "loss": 2.1165, "step": 4242 }, { "epoch": 0.13689553165789828, "grad_norm": 0.400390625, "learning_rate": 2.916617682129286e-05, "loss": 2.1158, "step": 4243 }, { "epoch": 0.13692779551169465, "grad_norm": 0.40234375, "learning_rate": 2.9165659354803083e-05, "loss": 2.1344, "step": 4244 }, { "epoch": 0.136960059365491, "grad_norm": 0.38671875, "learning_rate": 2.916514173238789e-05, "loss": 2.1151, "step": 4245 }, { "epoch": 0.13699232321928734, "grad_norm": 0.4453125, "learning_rate": 2.9164623954052977e-05, "loss": 2.1361, "step": 4246 }, { "epoch": 0.13702458707308368, "grad_norm": 0.5078125, "learning_rate": 2.9164106019804048e-05, "loss": 2.1385, "step": 4247 }, { "epoch": 0.13705685092688003, "grad_norm": 0.48828125, "learning_rate": 2.91635879296468e-05, "loss": 2.1156, "step": 4248 }, { "epoch": 0.13708911478067637, "grad_norm": 0.431640625, "learning_rate": 2.9163069683586936e-05, "loss": 2.1182, "step": 4249 }, { "epoch": 0.1371213786344727, "grad_norm": 0.44921875, "learning_rate": 2.9162551281630164e-05, "loss": 2.0569, "step": 4250 }, { "epoch": 0.13715364248826906, "grad_norm": 0.408203125, "learning_rate": 2.9162032723782183e-05, "loss": 2.1336, "step": 4251 }, { "epoch": 0.1371859063420654, "grad_norm": 0.41015625, "learning_rate": 2.916151401004871e-05, "loss": 2.1393, "step": 4252 }, { "epoch": 0.13721817019586174, "grad_norm": 0.427734375, "learning_rate": 2.9160995140435447e-05, "loss": 2.1481, "step": 4253 }, { "epoch": 0.13725043404965812, "grad_norm": 0.4140625, "learning_rate": 2.9160476114948115e-05, "loss": 2.103, "step": 4254 }, { "epoch": 0.13728269790345446, "grad_norm": 0.439453125, "learning_rate": 2.9159956933592415e-05, "loss": 2.1115, "step": 4255 }, { "epoch": 0.1373149617572508, "grad_norm": 0.369140625, "learning_rate": 2.915943759637407e-05, "loss": 2.0816, "step": 4256 }, { "epoch": 0.13734722561104715, "grad_norm": 0.439453125, "learning_rate": 2.9158918103298794e-05, "loss": 2.1371, "step": 4257 }, { "epoch": 0.1373794894648435, "grad_norm": 0.4765625, "learning_rate": 2.91583984543723e-05, "loss": 2.1474, "step": 4258 }, { "epoch": 0.13741175331863983, "grad_norm": 0.41015625, "learning_rate": 2.915787864960032e-05, "loss": 2.1197, "step": 4259 }, { "epoch": 0.13744401717243618, "grad_norm": 0.40234375, "learning_rate": 2.9157358688988564e-05, "loss": 2.1113, "step": 4260 }, { "epoch": 0.13747628102623252, "grad_norm": 0.43359375, "learning_rate": 2.9156838572542758e-05, "loss": 2.0818, "step": 4261 }, { "epoch": 0.13750854488002887, "grad_norm": 0.416015625, "learning_rate": 2.9156318300268636e-05, "loss": 2.1302, "step": 4262 }, { "epoch": 0.1375408087338252, "grad_norm": 0.4453125, "learning_rate": 2.915579787217191e-05, "loss": 2.1026, "step": 4263 }, { "epoch": 0.13757307258762158, "grad_norm": 0.462890625, "learning_rate": 2.915527728825832e-05, "loss": 2.0903, "step": 4264 }, { "epoch": 0.13760533644141792, "grad_norm": 0.4921875, "learning_rate": 2.9154756548533594e-05, "loss": 2.1279, "step": 4265 }, { "epoch": 0.13763760029521427, "grad_norm": 0.46875, "learning_rate": 2.9154235653003464e-05, "loss": 2.1168, "step": 4266 }, { "epoch": 0.1376698641490106, "grad_norm": 0.4296875, "learning_rate": 2.915371460167366e-05, "loss": 2.1258, "step": 4267 }, { "epoch": 0.13770212800280696, "grad_norm": 0.380859375, "learning_rate": 2.9153193394549917e-05, "loss": 2.1192, "step": 4268 }, { "epoch": 0.1377343918566033, "grad_norm": 0.431640625, "learning_rate": 2.9152672031637978e-05, "loss": 2.1034, "step": 4269 }, { "epoch": 0.13776665571039964, "grad_norm": 0.427734375, "learning_rate": 2.9152150512943575e-05, "loss": 2.1246, "step": 4270 }, { "epoch": 0.137798919564196, "grad_norm": 0.4375, "learning_rate": 2.9151628838472453e-05, "loss": 2.1091, "step": 4271 }, { "epoch": 0.13783118341799233, "grad_norm": 0.447265625, "learning_rate": 2.9151107008230357e-05, "loss": 2.0978, "step": 4272 }, { "epoch": 0.13786344727178867, "grad_norm": 0.3828125, "learning_rate": 2.9150585022223025e-05, "loss": 2.1074, "step": 4273 }, { "epoch": 0.13789571112558505, "grad_norm": 0.462890625, "learning_rate": 2.9150062880456203e-05, "loss": 2.1093, "step": 4274 }, { "epoch": 0.1379279749793814, "grad_norm": 0.51171875, "learning_rate": 2.9149540582935642e-05, "loss": 2.0925, "step": 4275 }, { "epoch": 0.13796023883317773, "grad_norm": 0.62109375, "learning_rate": 2.9149018129667088e-05, "loss": 2.0854, "step": 4276 }, { "epoch": 0.13799250268697408, "grad_norm": 0.609375, "learning_rate": 2.9148495520656293e-05, "loss": 2.1127, "step": 4277 }, { "epoch": 0.13802476654077042, "grad_norm": 0.5625, "learning_rate": 2.9147972755909007e-05, "loss": 2.1214, "step": 4278 }, { "epoch": 0.13805703039456677, "grad_norm": 0.43359375, "learning_rate": 2.914744983543099e-05, "loss": 2.1152, "step": 4279 }, { "epoch": 0.1380892942483631, "grad_norm": 0.578125, "learning_rate": 2.9146926759227996e-05, "loss": 2.11, "step": 4280 }, { "epoch": 0.13812155810215945, "grad_norm": 0.6640625, "learning_rate": 2.9146403527305776e-05, "loss": 2.1331, "step": 4281 }, { "epoch": 0.1381538219559558, "grad_norm": 0.458984375, "learning_rate": 2.9145880139670095e-05, "loss": 2.12, "step": 4282 }, { "epoch": 0.13818608580975214, "grad_norm": 0.45703125, "learning_rate": 2.9145356596326716e-05, "loss": 2.1389, "step": 4283 }, { "epoch": 0.1382183496635485, "grad_norm": 0.91015625, "learning_rate": 2.9144832897281398e-05, "loss": 2.1123, "step": 4284 }, { "epoch": 0.13825061351734486, "grad_norm": 0.4375, "learning_rate": 2.9144309042539912e-05, "loss": 2.0976, "step": 4285 }, { "epoch": 0.1382828773711412, "grad_norm": 0.44921875, "learning_rate": 2.9143785032108013e-05, "loss": 2.0997, "step": 4286 }, { "epoch": 0.13831514122493754, "grad_norm": 0.4140625, "learning_rate": 2.9143260865991476e-05, "loss": 2.129, "step": 4287 }, { "epoch": 0.1383474050787339, "grad_norm": 0.400390625, "learning_rate": 2.9142736544196073e-05, "loss": 2.1209, "step": 4288 }, { "epoch": 0.13837966893253023, "grad_norm": 0.42578125, "learning_rate": 2.914221206672757e-05, "loss": 2.1377, "step": 4289 }, { "epoch": 0.13841193278632657, "grad_norm": 0.404296875, "learning_rate": 2.914168743359174e-05, "loss": 2.1373, "step": 4290 }, { "epoch": 0.13844419664012292, "grad_norm": 0.39453125, "learning_rate": 2.9141162644794365e-05, "loss": 2.0771, "step": 4291 }, { "epoch": 0.13847646049391926, "grad_norm": 0.404296875, "learning_rate": 2.9140637700341215e-05, "loss": 2.1136, "step": 4292 }, { "epoch": 0.1385087243477156, "grad_norm": 0.373046875, "learning_rate": 2.9140112600238065e-05, "loss": 2.0786, "step": 4293 }, { "epoch": 0.13854098820151198, "grad_norm": 0.4140625, "learning_rate": 2.9139587344490704e-05, "loss": 2.1316, "step": 4294 }, { "epoch": 0.13857325205530832, "grad_norm": 0.451171875, "learning_rate": 2.913906193310491e-05, "loss": 2.1454, "step": 4295 }, { "epoch": 0.13860551590910467, "grad_norm": 0.39453125, "learning_rate": 2.913853636608646e-05, "loss": 2.1029, "step": 4296 }, { "epoch": 0.138637779762901, "grad_norm": 0.4375, "learning_rate": 2.913801064344115e-05, "loss": 2.1109, "step": 4297 }, { "epoch": 0.13867004361669735, "grad_norm": 0.376953125, "learning_rate": 2.9137484765174758e-05, "loss": 2.1317, "step": 4298 }, { "epoch": 0.1387023074704937, "grad_norm": 0.41796875, "learning_rate": 2.913695873129308e-05, "loss": 2.0994, "step": 4299 }, { "epoch": 0.13873457132429004, "grad_norm": 0.40625, "learning_rate": 2.91364325418019e-05, "loss": 2.1117, "step": 4300 }, { "epoch": 0.13876683517808638, "grad_norm": 0.37109375, "learning_rate": 2.913590619670701e-05, "loss": 2.0916, "step": 4301 }, { "epoch": 0.13879909903188273, "grad_norm": 0.388671875, "learning_rate": 2.9135379696014205e-05, "loss": 2.095, "step": 4302 }, { "epoch": 0.13883136288567907, "grad_norm": 0.36328125, "learning_rate": 2.9134853039729283e-05, "loss": 2.109, "step": 4303 }, { "epoch": 0.13886362673947544, "grad_norm": 0.384765625, "learning_rate": 2.9134326227858044e-05, "loss": 2.1272, "step": 4304 }, { "epoch": 0.1388958905932718, "grad_norm": 0.375, "learning_rate": 2.9133799260406276e-05, "loss": 2.1072, "step": 4305 }, { "epoch": 0.13892815444706813, "grad_norm": 0.37890625, "learning_rate": 2.9133272137379787e-05, "loss": 2.1172, "step": 4306 }, { "epoch": 0.13896041830086447, "grad_norm": 0.369140625, "learning_rate": 2.9132744858784374e-05, "loss": 2.1228, "step": 4307 }, { "epoch": 0.13899268215466082, "grad_norm": 0.38671875, "learning_rate": 2.9132217424625848e-05, "loss": 2.1228, "step": 4308 }, { "epoch": 0.13902494600845716, "grad_norm": 0.396484375, "learning_rate": 2.9131689834910013e-05, "loss": 2.1441, "step": 4309 }, { "epoch": 0.1390572098622535, "grad_norm": 0.4296875, "learning_rate": 2.9131162089642672e-05, "loss": 2.1215, "step": 4310 }, { "epoch": 0.13908947371604985, "grad_norm": 0.494140625, "learning_rate": 2.9130634188829636e-05, "loss": 2.1215, "step": 4311 }, { "epoch": 0.1391217375698462, "grad_norm": 0.67578125, "learning_rate": 2.9130106132476712e-05, "loss": 2.0719, "step": 4312 }, { "epoch": 0.13915400142364254, "grad_norm": 0.90234375, "learning_rate": 2.9129577920589722e-05, "loss": 2.0442, "step": 4313 }, { "epoch": 0.1391862652774389, "grad_norm": 0.84765625, "learning_rate": 2.9129049553174472e-05, "loss": 2.0348, "step": 4314 }, { "epoch": 0.13921852913123525, "grad_norm": 0.40625, "learning_rate": 2.9128521030236784e-05, "loss": 2.0408, "step": 4315 }, { "epoch": 0.1392507929850316, "grad_norm": 0.76171875, "learning_rate": 2.912799235178247e-05, "loss": 2.0662, "step": 4316 }, { "epoch": 0.13928305683882794, "grad_norm": 0.57421875, "learning_rate": 2.9127463517817354e-05, "loss": 2.0459, "step": 4317 }, { "epoch": 0.13931532069262428, "grad_norm": 0.48046875, "learning_rate": 2.912693452834725e-05, "loss": 2.0708, "step": 4318 }, { "epoch": 0.13934758454642063, "grad_norm": 0.6640625, "learning_rate": 2.912640538337799e-05, "loss": 2.0974, "step": 4319 }, { "epoch": 0.13937984840021697, "grad_norm": 0.37109375, "learning_rate": 2.912587608291539e-05, "loss": 2.0518, "step": 4320 }, { "epoch": 0.13941211225401332, "grad_norm": 0.56640625, "learning_rate": 2.912534662696528e-05, "loss": 2.0423, "step": 4321 }, { "epoch": 0.13944437610780966, "grad_norm": 0.416015625, "learning_rate": 2.9124817015533492e-05, "loss": 2.0745, "step": 4322 }, { "epoch": 0.139476639961606, "grad_norm": 0.55859375, "learning_rate": 2.912428724862585e-05, "loss": 2.0692, "step": 4323 }, { "epoch": 0.13950890381540237, "grad_norm": 0.57421875, "learning_rate": 2.9123757326248183e-05, "loss": 2.0644, "step": 4324 }, { "epoch": 0.13954116766919872, "grad_norm": 0.4453125, "learning_rate": 2.912322724840633e-05, "loss": 2.0381, "step": 4325 }, { "epoch": 0.13957343152299506, "grad_norm": 0.69140625, "learning_rate": 2.9122697015106124e-05, "loss": 2.0554, "step": 4326 }, { "epoch": 0.1396056953767914, "grad_norm": 0.41015625, "learning_rate": 2.9122166626353398e-05, "loss": 2.0534, "step": 4327 }, { "epoch": 0.13963795923058775, "grad_norm": 0.53125, "learning_rate": 2.9121636082153998e-05, "loss": 2.0653, "step": 4328 }, { "epoch": 0.1396702230843841, "grad_norm": 0.43359375, "learning_rate": 2.9121105382513752e-05, "loss": 2.044, "step": 4329 }, { "epoch": 0.13970248693818044, "grad_norm": 0.4453125, "learning_rate": 2.9120574527438515e-05, "loss": 2.0367, "step": 4330 }, { "epoch": 0.13973475079197678, "grad_norm": 0.50390625, "learning_rate": 2.912004351693412e-05, "loss": 2.0555, "step": 4331 }, { "epoch": 0.13976701464577312, "grad_norm": 0.435546875, "learning_rate": 2.911951235100642e-05, "loss": 2.0912, "step": 4332 }, { "epoch": 0.13979927849956947, "grad_norm": 0.486328125, "learning_rate": 2.9118981029661252e-05, "loss": 2.0461, "step": 4333 }, { "epoch": 0.1398315423533658, "grad_norm": 0.431640625, "learning_rate": 2.9118449552904473e-05, "loss": 2.0567, "step": 4334 }, { "epoch": 0.13986380620716218, "grad_norm": 0.482421875, "learning_rate": 2.9117917920741928e-05, "loss": 2.0538, "step": 4335 }, { "epoch": 0.13989607006095853, "grad_norm": 0.431640625, "learning_rate": 2.9117386133179473e-05, "loss": 2.078, "step": 4336 }, { "epoch": 0.13992833391475487, "grad_norm": 0.48046875, "learning_rate": 2.9116854190222956e-05, "loss": 2.036, "step": 4337 }, { "epoch": 0.13996059776855121, "grad_norm": 0.4453125, "learning_rate": 2.9116322091878238e-05, "loss": 2.0555, "step": 4338 }, { "epoch": 0.13999286162234756, "grad_norm": 0.421875, "learning_rate": 2.911578983815117e-05, "loss": 2.0759, "step": 4339 }, { "epoch": 0.1400251254761439, "grad_norm": 0.466796875, "learning_rate": 2.9115257429047617e-05, "loss": 2.0673, "step": 4340 }, { "epoch": 0.14005738932994025, "grad_norm": 0.41015625, "learning_rate": 2.9114724864573438e-05, "loss": 2.0625, "step": 4341 }, { "epoch": 0.1400896531837366, "grad_norm": 0.404296875, "learning_rate": 2.9114192144734493e-05, "loss": 2.0554, "step": 4342 }, { "epoch": 0.14012191703753293, "grad_norm": 0.4140625, "learning_rate": 2.911365926953664e-05, "loss": 2.0434, "step": 4343 }, { "epoch": 0.14015418089132928, "grad_norm": 0.462890625, "learning_rate": 2.911312623898576e-05, "loss": 2.0576, "step": 4344 }, { "epoch": 0.14018644474512565, "grad_norm": 0.40234375, "learning_rate": 2.9112593053087702e-05, "loss": 2.057, "step": 4345 }, { "epoch": 0.140218708598922, "grad_norm": 0.3984375, "learning_rate": 2.9112059711848352e-05, "loss": 2.0278, "step": 4346 }, { "epoch": 0.14025097245271834, "grad_norm": 0.421875, "learning_rate": 2.911152621527357e-05, "loss": 2.0185, "step": 4347 }, { "epoch": 0.14028323630651468, "grad_norm": 0.396484375, "learning_rate": 2.911099256336923e-05, "loss": 2.0574, "step": 4348 }, { "epoch": 0.14031550016031102, "grad_norm": 0.427734375, "learning_rate": 2.9110458756141206e-05, "loss": 2.0768, "step": 4349 }, { "epoch": 0.14034776401410737, "grad_norm": 0.38671875, "learning_rate": 2.9109924793595383e-05, "loss": 2.0434, "step": 4350 }, { "epoch": 0.1403800278679037, "grad_norm": 0.400390625, "learning_rate": 2.910939067573762e-05, "loss": 2.0641, "step": 4351 }, { "epoch": 0.14041229172170006, "grad_norm": 0.421875, "learning_rate": 2.910885640257381e-05, "loss": 2.0751, "step": 4352 }, { "epoch": 0.1404445555754964, "grad_norm": 0.40625, "learning_rate": 2.9108321974109832e-05, "loss": 2.0385, "step": 4353 }, { "epoch": 0.14047681942929274, "grad_norm": 0.416015625, "learning_rate": 2.9107787390351567e-05, "loss": 2.056, "step": 4354 }, { "epoch": 0.14050908328308911, "grad_norm": 0.474609375, "learning_rate": 2.9107252651304897e-05, "loss": 2.0426, "step": 4355 }, { "epoch": 0.14054134713688546, "grad_norm": 0.46875, "learning_rate": 2.9106717756975707e-05, "loss": 2.0538, "step": 4356 }, { "epoch": 0.1405736109906818, "grad_norm": 0.39453125, "learning_rate": 2.9106182707369893e-05, "loss": 2.0542, "step": 4357 }, { "epoch": 0.14060587484447815, "grad_norm": 0.4140625, "learning_rate": 2.910564750249334e-05, "loss": 2.0388, "step": 4358 }, { "epoch": 0.1406381386982745, "grad_norm": 0.431640625, "learning_rate": 2.9105112142351937e-05, "loss": 2.0367, "step": 4359 }, { "epoch": 0.14067040255207083, "grad_norm": 0.40625, "learning_rate": 2.910457662695158e-05, "loss": 2.0542, "step": 4360 }, { "epoch": 0.14070266640586718, "grad_norm": 0.46484375, "learning_rate": 2.910404095629816e-05, "loss": 2.0366, "step": 4361 }, { "epoch": 0.14073493025966352, "grad_norm": 0.400390625, "learning_rate": 2.9103505130397575e-05, "loss": 2.0586, "step": 4362 }, { "epoch": 0.14076719411345986, "grad_norm": 0.431640625, "learning_rate": 2.9102969149255727e-05, "loss": 2.0563, "step": 4363 }, { "epoch": 0.1407994579672562, "grad_norm": 0.41015625, "learning_rate": 2.9102433012878507e-05, "loss": 2.0532, "step": 4364 }, { "epoch": 0.14083172182105258, "grad_norm": 0.431640625, "learning_rate": 2.9101896721271823e-05, "loss": 2.0564, "step": 4365 }, { "epoch": 0.14086398567484892, "grad_norm": 0.408203125, "learning_rate": 2.9101360274441578e-05, "loss": 2.076, "step": 4366 }, { "epoch": 0.14089624952864527, "grad_norm": 0.439453125, "learning_rate": 2.9100823672393677e-05, "loss": 2.0454, "step": 4367 }, { "epoch": 0.1409285133824416, "grad_norm": 0.4375, "learning_rate": 2.9100286915134025e-05, "loss": 2.0808, "step": 4368 }, { "epoch": 0.14096077723623796, "grad_norm": 0.4375, "learning_rate": 2.9099750002668525e-05, "loss": 2.0229, "step": 4369 }, { "epoch": 0.1409930410900343, "grad_norm": 0.443359375, "learning_rate": 2.9099212935003096e-05, "loss": 2.0524, "step": 4370 }, { "epoch": 0.14102530494383064, "grad_norm": 0.419921875, "learning_rate": 2.9098675712143646e-05, "loss": 2.0158, "step": 4371 }, { "epoch": 0.141057568797627, "grad_norm": 0.412109375, "learning_rate": 2.9098138334096085e-05, "loss": 2.0258, "step": 4372 }, { "epoch": 0.14108983265142333, "grad_norm": 0.484375, "learning_rate": 2.9097600800866335e-05, "loss": 2.0416, "step": 4373 }, { "epoch": 0.14112209650521967, "grad_norm": 0.5, "learning_rate": 2.9097063112460305e-05, "loss": 2.0215, "step": 4374 }, { "epoch": 0.14115436035901605, "grad_norm": 0.52734375, "learning_rate": 2.909652526888392e-05, "loss": 2.0458, "step": 4375 }, { "epoch": 0.1411866242128124, "grad_norm": 0.388671875, "learning_rate": 2.9095987270143098e-05, "loss": 2.0615, "step": 4376 }, { "epoch": 0.14121888806660873, "grad_norm": 0.427734375, "learning_rate": 2.9095449116243763e-05, "loss": 2.0391, "step": 4377 }, { "epoch": 0.14125115192040508, "grad_norm": 0.453125, "learning_rate": 2.9094910807191827e-05, "loss": 2.0615, "step": 4378 }, { "epoch": 0.14128341577420142, "grad_norm": 0.439453125, "learning_rate": 2.9094372342993233e-05, "loss": 2.069, "step": 4379 }, { "epoch": 0.14131567962799776, "grad_norm": 0.43359375, "learning_rate": 2.9093833723653894e-05, "loss": 2.0408, "step": 4380 }, { "epoch": 0.1413479434817941, "grad_norm": 0.408203125, "learning_rate": 2.9093294949179752e-05, "loss": 2.0356, "step": 4381 }, { "epoch": 0.14138020733559045, "grad_norm": 0.48828125, "learning_rate": 2.9092756019576724e-05, "loss": 2.0562, "step": 4382 }, { "epoch": 0.1414124711893868, "grad_norm": 0.56640625, "learning_rate": 2.9092216934850748e-05, "loss": 2.0946, "step": 4383 }, { "epoch": 0.14144473504318314, "grad_norm": 0.66796875, "learning_rate": 2.909167769500776e-05, "loss": 2.0468, "step": 4384 }, { "epoch": 0.1414769988969795, "grad_norm": 0.6953125, "learning_rate": 2.909113830005369e-05, "loss": 2.0327, "step": 4385 }, { "epoch": 0.14150926275077585, "grad_norm": 0.44921875, "learning_rate": 2.9090598749994478e-05, "loss": 2.0323, "step": 4386 }, { "epoch": 0.1415415266045722, "grad_norm": 0.53125, "learning_rate": 2.909005904483607e-05, "loss": 2.0395, "step": 4387 }, { "epoch": 0.14157379045836854, "grad_norm": 0.52734375, "learning_rate": 2.9089519184584392e-05, "loss": 2.0361, "step": 4388 }, { "epoch": 0.1416060543121649, "grad_norm": 0.4296875, "learning_rate": 2.90889791692454e-05, "loss": 2.0576, "step": 4389 }, { "epoch": 0.14163831816596123, "grad_norm": 0.4765625, "learning_rate": 2.908843899882503e-05, "loss": 2.0667, "step": 4390 }, { "epoch": 0.14167058201975757, "grad_norm": 0.423828125, "learning_rate": 2.9087898673329235e-05, "loss": 2.0353, "step": 4391 }, { "epoch": 0.14170284587355392, "grad_norm": 0.421875, "learning_rate": 2.908735819276395e-05, "loss": 2.0297, "step": 4392 }, { "epoch": 0.14173510972735026, "grad_norm": 0.462890625, "learning_rate": 2.908681755713514e-05, "loss": 2.0624, "step": 4393 }, { "epoch": 0.1417673735811466, "grad_norm": 0.50390625, "learning_rate": 2.9086276766448744e-05, "loss": 2.0668, "step": 4394 }, { "epoch": 0.14179963743494298, "grad_norm": 0.40234375, "learning_rate": 2.908573582071072e-05, "loss": 2.0133, "step": 4395 }, { "epoch": 0.14183190128873932, "grad_norm": 0.443359375, "learning_rate": 2.908519471992702e-05, "loss": 2.0577, "step": 4396 }, { "epoch": 0.14186416514253566, "grad_norm": 0.41796875, "learning_rate": 2.9084653464103603e-05, "loss": 2.0353, "step": 4397 }, { "epoch": 0.141896428996332, "grad_norm": 0.412109375, "learning_rate": 2.9084112053246423e-05, "loss": 2.052, "step": 4398 }, { "epoch": 0.14192869285012835, "grad_norm": 0.453125, "learning_rate": 2.9083570487361445e-05, "loss": 2.1094, "step": 4399 }, { "epoch": 0.1419609567039247, "grad_norm": 0.44921875, "learning_rate": 2.9083028766454624e-05, "loss": 2.1246, "step": 4400 }, { "epoch": 0.14199322055772104, "grad_norm": 0.423828125, "learning_rate": 2.9082486890531924e-05, "loss": 2.0915, "step": 4401 }, { "epoch": 0.14202548441151738, "grad_norm": 0.435546875, "learning_rate": 2.9081944859599312e-05, "loss": 2.1333, "step": 4402 }, { "epoch": 0.14205774826531373, "grad_norm": 0.4375, "learning_rate": 2.9081402673662756e-05, "loss": 2.1151, "step": 4403 }, { "epoch": 0.14209001211911007, "grad_norm": 0.404296875, "learning_rate": 2.908086033272822e-05, "loss": 2.1222, "step": 4404 }, { "epoch": 0.14212227597290644, "grad_norm": 0.392578125, "learning_rate": 2.9080317836801673e-05, "loss": 2.1297, "step": 4405 }, { "epoch": 0.14215453982670279, "grad_norm": 0.41015625, "learning_rate": 2.907977518588909e-05, "loss": 2.1111, "step": 4406 }, { "epoch": 0.14218680368049913, "grad_norm": 0.408203125, "learning_rate": 2.907923237999644e-05, "loss": 2.1296, "step": 4407 }, { "epoch": 0.14221906753429547, "grad_norm": 0.384765625, "learning_rate": 2.9078689419129707e-05, "loss": 2.1037, "step": 4408 }, { "epoch": 0.14225133138809182, "grad_norm": 0.416015625, "learning_rate": 2.9078146303294856e-05, "loss": 2.1147, "step": 4409 }, { "epoch": 0.14228359524188816, "grad_norm": 0.408203125, "learning_rate": 2.9077603032497868e-05, "loss": 2.1003, "step": 4410 }, { "epoch": 0.1423158590956845, "grad_norm": 0.40234375, "learning_rate": 2.9077059606744728e-05, "loss": 2.1186, "step": 4411 }, { "epoch": 0.14234812294948085, "grad_norm": 0.400390625, "learning_rate": 2.9076516026041414e-05, "loss": 2.1195, "step": 4412 }, { "epoch": 0.1423803868032772, "grad_norm": 0.447265625, "learning_rate": 2.9075972290393914e-05, "loss": 2.1388, "step": 4413 }, { "epoch": 0.14241265065707354, "grad_norm": 0.435546875, "learning_rate": 2.9075428399808204e-05, "loss": 2.1183, "step": 4414 }, { "epoch": 0.1424449145108699, "grad_norm": 0.41796875, "learning_rate": 2.9074884354290275e-05, "loss": 2.0999, "step": 4415 }, { "epoch": 0.14247717836466625, "grad_norm": 0.40234375, "learning_rate": 2.907434015384612e-05, "loss": 2.1024, "step": 4416 }, { "epoch": 0.1425094422184626, "grad_norm": 0.3984375, "learning_rate": 2.9073795798481726e-05, "loss": 2.1169, "step": 4417 }, { "epoch": 0.14254170607225894, "grad_norm": 0.38671875, "learning_rate": 2.9073251288203083e-05, "loss": 2.0686, "step": 4418 }, { "epoch": 0.14257396992605528, "grad_norm": 0.412109375, "learning_rate": 2.9072706623016182e-05, "loss": 2.1095, "step": 4419 }, { "epoch": 0.14260623377985163, "grad_norm": 0.439453125, "learning_rate": 2.9072161802927026e-05, "loss": 2.108, "step": 4420 }, { "epoch": 0.14263849763364797, "grad_norm": 0.396484375, "learning_rate": 2.9071616827941606e-05, "loss": 2.1367, "step": 4421 }, { "epoch": 0.14267076148744431, "grad_norm": 0.423828125, "learning_rate": 2.9071071698065928e-05, "loss": 2.1241, "step": 4422 }, { "epoch": 0.14270302534124066, "grad_norm": 0.462890625, "learning_rate": 2.9070526413305983e-05, "loss": 2.1247, "step": 4423 }, { "epoch": 0.142735289195037, "grad_norm": 0.4140625, "learning_rate": 2.906998097366778e-05, "loss": 2.1177, "step": 4424 }, { "epoch": 0.14276755304883335, "grad_norm": 0.44140625, "learning_rate": 2.9069435379157316e-05, "loss": 2.102, "step": 4425 }, { "epoch": 0.14279981690262972, "grad_norm": 0.5078125, "learning_rate": 2.9068889629780606e-05, "loss": 2.103, "step": 4426 }, { "epoch": 0.14283208075642606, "grad_norm": 0.455078125, "learning_rate": 2.9068343725543647e-05, "loss": 2.0744, "step": 4427 }, { "epoch": 0.1428643446102224, "grad_norm": 0.447265625, "learning_rate": 2.9067797666452457e-05, "loss": 2.1099, "step": 4428 }, { "epoch": 0.14289660846401875, "grad_norm": 0.5703125, "learning_rate": 2.9067251452513042e-05, "loss": 2.0842, "step": 4429 }, { "epoch": 0.1429288723178151, "grad_norm": 0.62109375, "learning_rate": 2.906670508373141e-05, "loss": 2.1177, "step": 4430 }, { "epoch": 0.14296113617161144, "grad_norm": 0.87109375, "learning_rate": 2.9066158560113586e-05, "loss": 2.1211, "step": 4431 }, { "epoch": 0.14299340002540778, "grad_norm": 0.85546875, "learning_rate": 2.9065611881665575e-05, "loss": 2.1048, "step": 4432 }, { "epoch": 0.14302566387920412, "grad_norm": 0.427734375, "learning_rate": 2.9065065048393403e-05, "loss": 2.0994, "step": 4433 }, { "epoch": 0.14305792773300047, "grad_norm": 0.65234375, "learning_rate": 2.906451806030308e-05, "loss": 2.1055, "step": 4434 }, { "epoch": 0.1430901915867968, "grad_norm": 0.515625, "learning_rate": 2.9063970917400636e-05, "loss": 2.0924, "step": 4435 }, { "epoch": 0.14312245544059318, "grad_norm": 0.51171875, "learning_rate": 2.906342361969209e-05, "loss": 2.0899, "step": 4436 }, { "epoch": 0.14315471929438953, "grad_norm": 0.6875, "learning_rate": 2.9062876167183464e-05, "loss": 2.1458, "step": 4437 }, { "epoch": 0.14318698314818587, "grad_norm": 0.4453125, "learning_rate": 2.9062328559880787e-05, "loss": 2.13, "step": 4438 }, { "epoch": 0.14321924700198221, "grad_norm": 0.59375, "learning_rate": 2.9061780797790086e-05, "loss": 2.1087, "step": 4439 }, { "epoch": 0.14325151085577856, "grad_norm": 0.470703125, "learning_rate": 2.906123288091739e-05, "loss": 2.1264, "step": 4440 }, { "epoch": 0.1432837747095749, "grad_norm": 0.451171875, "learning_rate": 2.9060684809268726e-05, "loss": 2.1328, "step": 4441 }, { "epoch": 0.14331603856337125, "grad_norm": 0.5, "learning_rate": 2.9060136582850135e-05, "loss": 2.0922, "step": 4442 }, { "epoch": 0.1433483024171676, "grad_norm": 0.380859375, "learning_rate": 2.9059588201667644e-05, "loss": 2.1233, "step": 4443 }, { "epoch": 0.14338056627096393, "grad_norm": 0.498046875, "learning_rate": 2.9059039665727294e-05, "loss": 2.1308, "step": 4444 }, { "epoch": 0.14341283012476028, "grad_norm": 0.423828125, "learning_rate": 2.905849097503512e-05, "loss": 2.141, "step": 4445 }, { "epoch": 0.14344509397855665, "grad_norm": 0.443359375, "learning_rate": 2.9057942129597166e-05, "loss": 2.1203, "step": 4446 }, { "epoch": 0.143477357832353, "grad_norm": 0.41796875, "learning_rate": 2.905739312941947e-05, "loss": 2.1343, "step": 4447 }, { "epoch": 0.14350962168614934, "grad_norm": 0.427734375, "learning_rate": 2.9056843974508072e-05, "loss": 2.1158, "step": 4448 }, { "epoch": 0.14354188553994568, "grad_norm": 0.4375, "learning_rate": 2.905629466486902e-05, "loss": 2.0647, "step": 4449 }, { "epoch": 0.14357414939374202, "grad_norm": 0.4140625, "learning_rate": 2.9055745200508362e-05, "loss": 2.1416, "step": 4450 }, { "epoch": 0.14360641324753837, "grad_norm": 0.484375, "learning_rate": 2.9055195581432145e-05, "loss": 2.1135, "step": 4451 }, { "epoch": 0.1436386771013347, "grad_norm": 0.466796875, "learning_rate": 2.9054645807646418e-05, "loss": 2.1197, "step": 4452 }, { "epoch": 0.14367094095513105, "grad_norm": 0.455078125, "learning_rate": 2.905409587915723e-05, "loss": 2.1192, "step": 4453 }, { "epoch": 0.1437032048089274, "grad_norm": 0.486328125, "learning_rate": 2.905354579597064e-05, "loss": 2.1368, "step": 4454 }, { "epoch": 0.14373546866272374, "grad_norm": 0.40625, "learning_rate": 2.9052995558092696e-05, "loss": 2.1287, "step": 4455 }, { "epoch": 0.1437677325165201, "grad_norm": 0.494140625, "learning_rate": 2.9052445165529465e-05, "loss": 2.0984, "step": 4456 }, { "epoch": 0.14379999637031646, "grad_norm": 0.490234375, "learning_rate": 2.9051894618286993e-05, "loss": 2.105, "step": 4457 }, { "epoch": 0.1438322602241128, "grad_norm": 0.404296875, "learning_rate": 2.9051343916371347e-05, "loss": 2.0878, "step": 4458 }, { "epoch": 0.14386452407790914, "grad_norm": 0.48046875, "learning_rate": 2.9050793059788587e-05, "loss": 2.1219, "step": 4459 }, { "epoch": 0.1438967879317055, "grad_norm": 0.400390625, "learning_rate": 2.905024204854478e-05, "loss": 2.1201, "step": 4460 }, { "epoch": 0.14392905178550183, "grad_norm": 0.4765625, "learning_rate": 2.9049690882645983e-05, "loss": 2.064, "step": 4461 }, { "epoch": 0.14396131563929818, "grad_norm": 0.439453125, "learning_rate": 2.9049139562098277e-05, "loss": 2.1306, "step": 4462 }, { "epoch": 0.14399357949309452, "grad_norm": 0.45703125, "learning_rate": 2.9048588086907714e-05, "loss": 2.0841, "step": 4463 }, { "epoch": 0.14402584334689086, "grad_norm": 0.5625, "learning_rate": 2.9048036457080373e-05, "loss": 2.1255, "step": 4464 }, { "epoch": 0.1440581072006872, "grad_norm": 0.400390625, "learning_rate": 2.9047484672622327e-05, "loss": 2.1232, "step": 4465 }, { "epoch": 0.14409037105448358, "grad_norm": 0.453125, "learning_rate": 2.9046932733539645e-05, "loss": 2.1086, "step": 4466 }, { "epoch": 0.14412263490827992, "grad_norm": 0.373046875, "learning_rate": 2.9046380639838404e-05, "loss": 2.0757, "step": 4467 }, { "epoch": 0.14415489876207627, "grad_norm": 0.431640625, "learning_rate": 2.9045828391524682e-05, "loss": 2.135, "step": 4468 }, { "epoch": 0.1441871626158726, "grad_norm": 0.41015625, "learning_rate": 2.904527598860456e-05, "loss": 2.0959, "step": 4469 }, { "epoch": 0.14421942646966895, "grad_norm": 0.373046875, "learning_rate": 2.9044723431084116e-05, "loss": 2.0511, "step": 4470 }, { "epoch": 0.1442516903234653, "grad_norm": 0.3828125, "learning_rate": 2.9044170718969426e-05, "loss": 2.0918, "step": 4471 }, { "epoch": 0.14428395417726164, "grad_norm": 0.38671875, "learning_rate": 2.904361785226659e-05, "loss": 2.1178, "step": 4472 }, { "epoch": 0.14431621803105799, "grad_norm": 0.376953125, "learning_rate": 2.9043064830981678e-05, "loss": 2.1271, "step": 4473 }, { "epoch": 0.14434848188485433, "grad_norm": 0.373046875, "learning_rate": 2.9042511655120782e-05, "loss": 2.1024, "step": 4474 }, { "epoch": 0.14438074573865067, "grad_norm": 0.365234375, "learning_rate": 2.904195832468999e-05, "loss": 2.0899, "step": 4475 }, { "epoch": 0.14441300959244704, "grad_norm": 0.396484375, "learning_rate": 2.90414048396954e-05, "loss": 2.1232, "step": 4476 }, { "epoch": 0.1444452734462434, "grad_norm": 0.37890625, "learning_rate": 2.90408512001431e-05, "loss": 2.1424, "step": 4477 }, { "epoch": 0.14447753730003973, "grad_norm": 0.404296875, "learning_rate": 2.9040297406039174e-05, "loss": 2.1045, "step": 4478 }, { "epoch": 0.14450980115383608, "grad_norm": 0.416015625, "learning_rate": 2.903974345738973e-05, "loss": 2.1077, "step": 4479 }, { "epoch": 0.14454206500763242, "grad_norm": 0.4375, "learning_rate": 2.9039189354200865e-05, "loss": 2.118, "step": 4480 }, { "epoch": 0.14457432886142876, "grad_norm": 0.40625, "learning_rate": 2.9038635096478675e-05, "loss": 2.1183, "step": 4481 }, { "epoch": 0.1446065927152251, "grad_norm": 0.38671875, "learning_rate": 2.903808068422926e-05, "loss": 2.1112, "step": 4482 }, { "epoch": 0.14463885656902145, "grad_norm": 0.412109375, "learning_rate": 2.9037526117458722e-05, "loss": 2.1121, "step": 4483 }, { "epoch": 0.1446711204228178, "grad_norm": 0.39453125, "learning_rate": 2.9036971396173167e-05, "loss": 2.115, "step": 4484 }, { "epoch": 0.14470338427661414, "grad_norm": 0.384765625, "learning_rate": 2.9036416520378705e-05, "loss": 2.0944, "step": 4485 }, { "epoch": 0.1447356481304105, "grad_norm": 0.392578125, "learning_rate": 2.9035861490081436e-05, "loss": 2.1211, "step": 4486 }, { "epoch": 0.14476791198420685, "grad_norm": 0.423828125, "learning_rate": 2.9035306305287475e-05, "loss": 2.1011, "step": 4487 }, { "epoch": 0.1448001758380032, "grad_norm": 0.41796875, "learning_rate": 2.903475096600293e-05, "loss": 2.0804, "step": 4488 }, { "epoch": 0.14483243969179954, "grad_norm": 0.390625, "learning_rate": 2.9034195472233913e-05, "loss": 2.1467, "step": 4489 }, { "epoch": 0.14486470354559589, "grad_norm": 0.45703125, "learning_rate": 2.9033639823986546e-05, "loss": 2.1149, "step": 4490 }, { "epoch": 0.14489696739939223, "grad_norm": 0.53125, "learning_rate": 2.9033084021266935e-05, "loss": 2.1078, "step": 4491 }, { "epoch": 0.14492923125318857, "grad_norm": 0.56640625, "learning_rate": 2.9032528064081204e-05, "loss": 2.0892, "step": 4492 }, { "epoch": 0.14496149510698492, "grad_norm": 0.6171875, "learning_rate": 2.903197195243547e-05, "loss": 2.1025, "step": 4493 }, { "epoch": 0.14499375896078126, "grad_norm": 0.5625, "learning_rate": 2.9031415686335854e-05, "loss": 2.1196, "step": 4494 }, { "epoch": 0.1450260228145776, "grad_norm": 0.453125, "learning_rate": 2.9030859265788484e-05, "loss": 2.1224, "step": 4495 }, { "epoch": 0.14505828666837398, "grad_norm": 0.462890625, "learning_rate": 2.9030302690799476e-05, "loss": 2.1179, "step": 4496 }, { "epoch": 0.14509055052217032, "grad_norm": 0.53125, "learning_rate": 2.9029745961374966e-05, "loss": 2.106, "step": 4497 }, { "epoch": 0.14512281437596666, "grad_norm": 0.4140625, "learning_rate": 2.9029189077521075e-05, "loss": 2.0818, "step": 4498 }, { "epoch": 0.145155078229763, "grad_norm": 0.462890625, "learning_rate": 2.9028632039243937e-05, "loss": 2.0913, "step": 4499 }, { "epoch": 0.14518734208355935, "grad_norm": 0.49609375, "learning_rate": 2.902807484654968e-05, "loss": 2.0968, "step": 4500 }, { "epoch": 0.1452196059373557, "grad_norm": 0.396484375, "learning_rate": 2.902751749944444e-05, "loss": 2.1194, "step": 4501 }, { "epoch": 0.14525186979115204, "grad_norm": 0.4453125, "learning_rate": 2.9026959997934347e-05, "loss": 2.095, "step": 4502 }, { "epoch": 0.14528413364494838, "grad_norm": 0.40625, "learning_rate": 2.9026402342025544e-05, "loss": 2.1356, "step": 4503 }, { "epoch": 0.14531639749874473, "grad_norm": 0.470703125, "learning_rate": 2.9025844531724166e-05, "loss": 2.115, "step": 4504 }, { "epoch": 0.14534866135254107, "grad_norm": 0.43359375, "learning_rate": 2.9025286567036354e-05, "loss": 2.1038, "step": 4505 }, { "epoch": 0.14538092520633744, "grad_norm": 0.43359375, "learning_rate": 2.9024728447968248e-05, "loss": 2.1017, "step": 4506 }, { "epoch": 0.14541318906013379, "grad_norm": 0.490234375, "learning_rate": 2.9024170174525993e-05, "loss": 2.111, "step": 4507 }, { "epoch": 0.14544545291393013, "grad_norm": 0.435546875, "learning_rate": 2.9023611746715733e-05, "loss": 2.1244, "step": 4508 }, { "epoch": 0.14547771676772647, "grad_norm": 0.44140625, "learning_rate": 2.9023053164543616e-05, "loss": 2.1166, "step": 4509 }, { "epoch": 0.14550998062152282, "grad_norm": 0.4453125, "learning_rate": 2.902249442801579e-05, "loss": 2.1176, "step": 4510 }, { "epoch": 0.14554224447531916, "grad_norm": 0.419921875, "learning_rate": 2.9021935537138408e-05, "loss": 2.1236, "step": 4511 }, { "epoch": 0.1455745083291155, "grad_norm": 0.48046875, "learning_rate": 2.9021376491917617e-05, "loss": 2.13, "step": 4512 }, { "epoch": 0.14560677218291185, "grad_norm": 0.4296875, "learning_rate": 2.9020817292359573e-05, "loss": 2.1216, "step": 4513 }, { "epoch": 0.1456390360367082, "grad_norm": 0.4375, "learning_rate": 2.9020257938470425e-05, "loss": 2.1177, "step": 4514 }, { "epoch": 0.14567129989050454, "grad_norm": 0.48828125, "learning_rate": 2.9019698430256342e-05, "loss": 2.1266, "step": 4515 }, { "epoch": 0.14570356374430088, "grad_norm": 0.50390625, "learning_rate": 2.9019138767723476e-05, "loss": 2.1171, "step": 4516 }, { "epoch": 0.14573582759809725, "grad_norm": 0.5078125, "learning_rate": 2.9018578950877984e-05, "loss": 2.1159, "step": 4517 }, { "epoch": 0.1457680914518936, "grad_norm": 0.435546875, "learning_rate": 2.9018018979726034e-05, "loss": 2.1302, "step": 4518 }, { "epoch": 0.14580035530568994, "grad_norm": 0.453125, "learning_rate": 2.9017458854273788e-05, "loss": 2.1232, "step": 4519 }, { "epoch": 0.14583261915948628, "grad_norm": 0.435546875, "learning_rate": 2.901689857452741e-05, "loss": 2.1197, "step": 4520 }, { "epoch": 0.14586488301328263, "grad_norm": 0.39453125, "learning_rate": 2.9016338140493068e-05, "loss": 2.1081, "step": 4521 }, { "epoch": 0.14589714686707897, "grad_norm": 0.4765625, "learning_rate": 2.9015777552176934e-05, "loss": 2.1197, "step": 4522 }, { "epoch": 0.1459294107208753, "grad_norm": 0.42578125, "learning_rate": 2.9015216809585173e-05, "loss": 2.1051, "step": 4523 }, { "epoch": 0.14596167457467166, "grad_norm": 0.39453125, "learning_rate": 2.9014655912723956e-05, "loss": 2.0965, "step": 4524 }, { "epoch": 0.145993938428468, "grad_norm": 0.439453125, "learning_rate": 2.901409486159947e-05, "loss": 2.0976, "step": 4525 }, { "epoch": 0.14602620228226434, "grad_norm": 0.39453125, "learning_rate": 2.9013533656217874e-05, "loss": 2.1278, "step": 4526 }, { "epoch": 0.14605846613606072, "grad_norm": 0.44140625, "learning_rate": 2.9012972296585352e-05, "loss": 2.1309, "step": 4527 }, { "epoch": 0.14609072998985706, "grad_norm": 0.486328125, "learning_rate": 2.901241078270809e-05, "loss": 2.1426, "step": 4528 }, { "epoch": 0.1461229938436534, "grad_norm": 0.478515625, "learning_rate": 2.901184911459226e-05, "loss": 2.1252, "step": 4529 }, { "epoch": 0.14615525769744975, "grad_norm": 0.482421875, "learning_rate": 2.901128729224405e-05, "loss": 2.1351, "step": 4530 }, { "epoch": 0.1461875215512461, "grad_norm": 0.439453125, "learning_rate": 2.9010725315669635e-05, "loss": 2.105, "step": 4531 }, { "epoch": 0.14621978540504244, "grad_norm": 0.478515625, "learning_rate": 2.9010163184875208e-05, "loss": 2.1118, "step": 4532 }, { "epoch": 0.14625204925883878, "grad_norm": 0.482421875, "learning_rate": 2.900960089986696e-05, "loss": 2.1141, "step": 4533 }, { "epoch": 0.14628431311263512, "grad_norm": 0.47265625, "learning_rate": 2.9009038460651072e-05, "loss": 2.1021, "step": 4534 }, { "epoch": 0.14631657696643147, "grad_norm": 0.4296875, "learning_rate": 2.900847586723374e-05, "loss": 2.1153, "step": 4535 }, { "epoch": 0.1463488408202278, "grad_norm": 0.4375, "learning_rate": 2.9007913119621153e-05, "loss": 2.1107, "step": 4536 }, { "epoch": 0.14638110467402418, "grad_norm": 0.462890625, "learning_rate": 2.9007350217819512e-05, "loss": 2.1196, "step": 4537 }, { "epoch": 0.14641336852782053, "grad_norm": 0.42578125, "learning_rate": 2.9006787161835005e-05, "loss": 2.0879, "step": 4538 }, { "epoch": 0.14644563238161687, "grad_norm": 0.40625, "learning_rate": 2.9006223951673835e-05, "loss": 2.1161, "step": 4539 }, { "epoch": 0.1464778962354132, "grad_norm": 0.40625, "learning_rate": 2.9005660587342196e-05, "loss": 2.1278, "step": 4540 }, { "epoch": 0.14651016008920956, "grad_norm": 0.44140625, "learning_rate": 2.90050970688463e-05, "loss": 2.12, "step": 4541 }, { "epoch": 0.1465424239430059, "grad_norm": 0.412109375, "learning_rate": 2.900453339619234e-05, "loss": 2.112, "step": 4542 }, { "epoch": 0.14657468779680224, "grad_norm": 0.396484375, "learning_rate": 2.9003969569386522e-05, "loss": 2.1038, "step": 4543 }, { "epoch": 0.1466069516505986, "grad_norm": 0.447265625, "learning_rate": 2.900340558843505e-05, "loss": 2.1246, "step": 4544 }, { "epoch": 0.14663921550439493, "grad_norm": 0.490234375, "learning_rate": 2.900284145334414e-05, "loss": 2.0712, "step": 4545 }, { "epoch": 0.14667147935819128, "grad_norm": 0.498046875, "learning_rate": 2.9002277164119995e-05, "loss": 2.1147, "step": 4546 }, { "epoch": 0.14670374321198765, "grad_norm": 0.474609375, "learning_rate": 2.900171272076883e-05, "loss": 2.1075, "step": 4547 }, { "epoch": 0.146736007065784, "grad_norm": 0.421875, "learning_rate": 2.9001148123296854e-05, "loss": 2.1349, "step": 4548 }, { "epoch": 0.14676827091958033, "grad_norm": 0.412109375, "learning_rate": 2.9000583371710283e-05, "loss": 2.1133, "step": 4549 }, { "epoch": 0.14680053477337668, "grad_norm": 0.4453125, "learning_rate": 2.9000018466015338e-05, "loss": 2.1107, "step": 4550 }, { "epoch": 0.14683279862717302, "grad_norm": 0.435546875, "learning_rate": 2.8999453406218232e-05, "loss": 2.1129, "step": 4551 }, { "epoch": 0.14686506248096937, "grad_norm": 0.439453125, "learning_rate": 2.8998888192325185e-05, "loss": 2.1327, "step": 4552 }, { "epoch": 0.1468973263347657, "grad_norm": 0.546875, "learning_rate": 2.899832282434242e-05, "loss": 2.1085, "step": 4553 }, { "epoch": 0.14692959018856205, "grad_norm": 0.640625, "learning_rate": 2.899775730227616e-05, "loss": 2.1079, "step": 4554 }, { "epoch": 0.1469618540423584, "grad_norm": 0.5703125, "learning_rate": 2.8997191626132626e-05, "loss": 2.0807, "step": 4555 }, { "epoch": 0.14699411789615474, "grad_norm": 0.45703125, "learning_rate": 2.8996625795918054e-05, "loss": 2.135, "step": 4556 }, { "epoch": 0.1470263817499511, "grad_norm": 0.490234375, "learning_rate": 2.8996059811638662e-05, "loss": 2.1291, "step": 4557 }, { "epoch": 0.14705864560374746, "grad_norm": 0.515625, "learning_rate": 2.8995493673300687e-05, "loss": 2.0892, "step": 4558 }, { "epoch": 0.1470909094575438, "grad_norm": 0.4375, "learning_rate": 2.8994927380910357e-05, "loss": 2.0508, "step": 4559 }, { "epoch": 0.14712317331134014, "grad_norm": 0.451171875, "learning_rate": 2.8994360934473905e-05, "loss": 2.0855, "step": 4560 }, { "epoch": 0.1471554371651365, "grad_norm": 0.474609375, "learning_rate": 2.899379433399757e-05, "loss": 2.1258, "step": 4561 }, { "epoch": 0.14718770101893283, "grad_norm": 0.42578125, "learning_rate": 2.8993227579487585e-05, "loss": 2.1151, "step": 4562 }, { "epoch": 0.14721996487272918, "grad_norm": 0.43359375, "learning_rate": 2.8992660670950185e-05, "loss": 2.1379, "step": 4563 }, { "epoch": 0.14725222872652552, "grad_norm": 0.421875, "learning_rate": 2.8992093608391623e-05, "loss": 2.0872, "step": 4564 }, { "epoch": 0.14728449258032186, "grad_norm": 0.38671875, "learning_rate": 2.8991526391818125e-05, "loss": 2.1032, "step": 4565 }, { "epoch": 0.1473167564341182, "grad_norm": 0.412109375, "learning_rate": 2.899095902123595e-05, "loss": 2.0806, "step": 4566 }, { "epoch": 0.14734902028791458, "grad_norm": 0.4296875, "learning_rate": 2.899039149665133e-05, "loss": 2.1237, "step": 4567 }, { "epoch": 0.14738128414171092, "grad_norm": 0.427734375, "learning_rate": 2.8989823818070513e-05, "loss": 2.1015, "step": 4568 }, { "epoch": 0.14741354799550727, "grad_norm": 0.4296875, "learning_rate": 2.8989255985499758e-05, "loss": 2.1346, "step": 4569 }, { "epoch": 0.1474458118493036, "grad_norm": 0.4296875, "learning_rate": 2.8988687998945308e-05, "loss": 2.1058, "step": 4570 }, { "epoch": 0.14747807570309995, "grad_norm": 0.408203125, "learning_rate": 2.8988119858413414e-05, "loss": 2.1259, "step": 4571 }, { "epoch": 0.1475103395568963, "grad_norm": 0.447265625, "learning_rate": 2.8987551563910334e-05, "loss": 2.1062, "step": 4572 }, { "epoch": 0.14754260341069264, "grad_norm": 0.48828125, "learning_rate": 2.898698311544232e-05, "loss": 2.105, "step": 4573 }, { "epoch": 0.14757486726448898, "grad_norm": 0.443359375, "learning_rate": 2.898641451301563e-05, "loss": 2.1037, "step": 4574 }, { "epoch": 0.14760713111828533, "grad_norm": 0.5546875, "learning_rate": 2.898584575663652e-05, "loss": 2.1132, "step": 4575 }, { "epoch": 0.14763939497208167, "grad_norm": 0.5234375, "learning_rate": 2.8985276846311254e-05, "loss": 2.1077, "step": 4576 }, { "epoch": 0.14767165882587804, "grad_norm": 0.416015625, "learning_rate": 2.89847077820461e-05, "loss": 2.1063, "step": 4577 }, { "epoch": 0.1477039226796744, "grad_norm": 0.4453125, "learning_rate": 2.898413856384731e-05, "loss": 2.0817, "step": 4578 }, { "epoch": 0.14773618653347073, "grad_norm": 0.4296875, "learning_rate": 2.8983569191721154e-05, "loss": 2.111, "step": 4579 }, { "epoch": 0.14776845038726708, "grad_norm": 0.4453125, "learning_rate": 2.8982999665673904e-05, "loss": 2.1312, "step": 4580 }, { "epoch": 0.14780071424106342, "grad_norm": 0.42578125, "learning_rate": 2.8982429985711822e-05, "loss": 2.1465, "step": 4581 }, { "epoch": 0.14783297809485976, "grad_norm": 0.44140625, "learning_rate": 2.898186015184118e-05, "loss": 2.1086, "step": 4582 }, { "epoch": 0.1478652419486561, "grad_norm": 0.4296875, "learning_rate": 2.8981290164068255e-05, "loss": 2.1266, "step": 4583 }, { "epoch": 0.14789750580245245, "grad_norm": 0.4921875, "learning_rate": 2.8980720022399315e-05, "loss": 2.1331, "step": 4584 }, { "epoch": 0.1479297696562488, "grad_norm": 0.498046875, "learning_rate": 2.8980149726840647e-05, "loss": 2.1132, "step": 4585 }, { "epoch": 0.14796203351004514, "grad_norm": 0.431640625, "learning_rate": 2.897957927739851e-05, "loss": 2.1143, "step": 4586 }, { "epoch": 0.1479942973638415, "grad_norm": 0.40625, "learning_rate": 2.8979008674079197e-05, "loss": 2.1195, "step": 4587 }, { "epoch": 0.14802656121763785, "grad_norm": 0.3984375, "learning_rate": 2.897843791688899e-05, "loss": 2.0941, "step": 4588 }, { "epoch": 0.1480588250714342, "grad_norm": 0.416015625, "learning_rate": 2.897786700583416e-05, "loss": 2.0647, "step": 4589 }, { "epoch": 0.14809108892523054, "grad_norm": 0.388671875, "learning_rate": 2.8977295940921e-05, "loss": 2.0992, "step": 4590 }, { "epoch": 0.14812335277902688, "grad_norm": 0.484375, "learning_rate": 2.8976724722155794e-05, "loss": 2.1121, "step": 4591 }, { "epoch": 0.14815561663282323, "grad_norm": 0.408203125, "learning_rate": 2.8976153349544826e-05, "loss": 2.1184, "step": 4592 }, { "epoch": 0.14818788048661957, "grad_norm": 0.443359375, "learning_rate": 2.897558182309439e-05, "loss": 2.0864, "step": 4593 }, { "epoch": 0.14822014434041592, "grad_norm": 0.443359375, "learning_rate": 2.897501014281078e-05, "loss": 2.1308, "step": 4594 }, { "epoch": 0.14825240819421226, "grad_norm": 0.53515625, "learning_rate": 2.8974438308700276e-05, "loss": 2.1243, "step": 4595 }, { "epoch": 0.1482846720480086, "grad_norm": 0.5859375, "learning_rate": 2.8973866320769186e-05, "loss": 2.1203, "step": 4596 }, { "epoch": 0.14831693590180495, "grad_norm": 0.54296875, "learning_rate": 2.8973294179023798e-05, "loss": 2.1016, "step": 4597 }, { "epoch": 0.14834919975560132, "grad_norm": 0.439453125, "learning_rate": 2.8972721883470413e-05, "loss": 2.126, "step": 4598 }, { "epoch": 0.14838146360939766, "grad_norm": 0.494140625, "learning_rate": 2.897214943411533e-05, "loss": 2.1073, "step": 4599 }, { "epoch": 0.148413727463194, "grad_norm": 0.5546875, "learning_rate": 2.8971576830964847e-05, "loss": 2.1169, "step": 4600 }, { "epoch": 0.14844599131699035, "grad_norm": 0.5078125, "learning_rate": 2.897100407402527e-05, "loss": 2.0855, "step": 4601 }, { "epoch": 0.1484782551707867, "grad_norm": 0.435546875, "learning_rate": 2.897043116330291e-05, "loss": 2.1325, "step": 4602 }, { "epoch": 0.14851051902458304, "grad_norm": 0.578125, "learning_rate": 2.8969858098804052e-05, "loss": 2.1111, "step": 4603 }, { "epoch": 0.14854278287837938, "grad_norm": 0.59375, "learning_rate": 2.896928488053503e-05, "loss": 2.1206, "step": 4604 }, { "epoch": 0.14857504673217573, "grad_norm": 0.416015625, "learning_rate": 2.8968711508502138e-05, "loss": 2.0873, "step": 4605 }, { "epoch": 0.14860731058597207, "grad_norm": 0.5546875, "learning_rate": 2.896813798271169e-05, "loss": 2.0785, "step": 4606 }, { "epoch": 0.1486395744397684, "grad_norm": 0.51953125, "learning_rate": 2.896756430317e-05, "loss": 2.1364, "step": 4607 }, { "epoch": 0.14867183829356478, "grad_norm": 0.443359375, "learning_rate": 2.8966990469883384e-05, "loss": 2.1337, "step": 4608 }, { "epoch": 0.14870410214736113, "grad_norm": 0.453125, "learning_rate": 2.8966416482858157e-05, "loss": 2.1127, "step": 4609 }, { "epoch": 0.14873636600115747, "grad_norm": 0.423828125, "learning_rate": 2.8965842342100634e-05, "loss": 2.0797, "step": 4610 }, { "epoch": 0.14876862985495382, "grad_norm": 0.416015625, "learning_rate": 2.8965268047617137e-05, "loss": 2.0926, "step": 4611 }, { "epoch": 0.14880089370875016, "grad_norm": 0.46484375, "learning_rate": 2.8964693599413993e-05, "loss": 2.1296, "step": 4612 }, { "epoch": 0.1488331575625465, "grad_norm": 0.462890625, "learning_rate": 2.8964118997497517e-05, "loss": 2.0932, "step": 4613 }, { "epoch": 0.14886542141634285, "grad_norm": 0.3828125, "learning_rate": 2.8963544241874034e-05, "loss": 2.1397, "step": 4614 }, { "epoch": 0.1488976852701392, "grad_norm": 0.47265625, "learning_rate": 2.8962969332549877e-05, "loss": 2.1053, "step": 4615 }, { "epoch": 0.14892994912393553, "grad_norm": 0.412109375, "learning_rate": 2.8962394269531372e-05, "loss": 2.1143, "step": 4616 }, { "epoch": 0.14896221297773188, "grad_norm": 0.42578125, "learning_rate": 2.8961819052824844e-05, "loss": 2.1005, "step": 4617 }, { "epoch": 0.14899447683152825, "grad_norm": 0.408203125, "learning_rate": 2.896124368243663e-05, "loss": 2.0771, "step": 4618 }, { "epoch": 0.1490267406853246, "grad_norm": 0.41796875, "learning_rate": 2.896066815837306e-05, "loss": 2.1527, "step": 4619 }, { "epoch": 0.14905900453912094, "grad_norm": 0.419921875, "learning_rate": 2.8960092480640473e-05, "loss": 2.0972, "step": 4620 }, { "epoch": 0.14909126839291728, "grad_norm": 0.40234375, "learning_rate": 2.89595166492452e-05, "loss": 2.1145, "step": 4621 }, { "epoch": 0.14912353224671362, "grad_norm": 0.412109375, "learning_rate": 2.8958940664193582e-05, "loss": 2.1231, "step": 4622 }, { "epoch": 0.14915579610050997, "grad_norm": 0.412109375, "learning_rate": 2.895836452549196e-05, "loss": 2.1169, "step": 4623 }, { "epoch": 0.1491880599543063, "grad_norm": 0.416015625, "learning_rate": 2.8957788233146676e-05, "loss": 2.1328, "step": 4624 }, { "epoch": 0.14922032380810266, "grad_norm": 0.45703125, "learning_rate": 2.8957211787164074e-05, "loss": 2.1169, "step": 4625 }, { "epoch": 0.149252587661899, "grad_norm": 0.41796875, "learning_rate": 2.895663518755049e-05, "loss": 2.1019, "step": 4626 }, { "epoch": 0.14928485151569534, "grad_norm": 0.421875, "learning_rate": 2.895605843431229e-05, "loss": 2.1094, "step": 4627 }, { "epoch": 0.14931711536949172, "grad_norm": 0.435546875, "learning_rate": 2.89554815274558e-05, "loss": 2.1487, "step": 4628 }, { "epoch": 0.14934937922328806, "grad_norm": 0.41796875, "learning_rate": 2.8954904466987387e-05, "loss": 2.1013, "step": 4629 }, { "epoch": 0.1493816430770844, "grad_norm": 0.43359375, "learning_rate": 2.8954327252913398e-05, "loss": 2.111, "step": 4630 }, { "epoch": 0.14941390693088075, "grad_norm": 0.451171875, "learning_rate": 2.895374988524018e-05, "loss": 2.0948, "step": 4631 }, { "epoch": 0.1494461707846771, "grad_norm": 0.42578125, "learning_rate": 2.8953172363974095e-05, "loss": 2.0854, "step": 4632 }, { "epoch": 0.14947843463847343, "grad_norm": 0.4921875, "learning_rate": 2.8952594689121504e-05, "loss": 2.0881, "step": 4633 }, { "epoch": 0.14951069849226978, "grad_norm": 0.451171875, "learning_rate": 2.8952016860688755e-05, "loss": 2.112, "step": 4634 }, { "epoch": 0.14954296234606612, "grad_norm": 0.376953125, "learning_rate": 2.8951438878682214e-05, "loss": 2.1244, "step": 4635 }, { "epoch": 0.14957522619986247, "grad_norm": 0.458984375, "learning_rate": 2.895086074310824e-05, "loss": 2.1231, "step": 4636 }, { "epoch": 0.1496074900536588, "grad_norm": 0.51953125, "learning_rate": 2.8950282453973204e-05, "loss": 2.124, "step": 4637 }, { "epoch": 0.14963975390745518, "grad_norm": 0.4453125, "learning_rate": 2.8949704011283467e-05, "loss": 2.1459, "step": 4638 }, { "epoch": 0.14967201776125152, "grad_norm": 0.423828125, "learning_rate": 2.894912541504539e-05, "loss": 2.081, "step": 4639 }, { "epoch": 0.14970428161504787, "grad_norm": 0.5, "learning_rate": 2.8948546665265353e-05, "loss": 2.0411, "step": 4640 }, { "epoch": 0.1497365454688442, "grad_norm": 0.56640625, "learning_rate": 2.8947967761949722e-05, "loss": 2.073, "step": 4641 }, { "epoch": 0.14976880932264056, "grad_norm": 0.4765625, "learning_rate": 2.894738870510486e-05, "loss": 2.0133, "step": 4642 }, { "epoch": 0.1498010731764369, "grad_norm": 0.3984375, "learning_rate": 2.8946809494737155e-05, "loss": 2.0563, "step": 4643 }, { "epoch": 0.14983333703023324, "grad_norm": 0.5078125, "learning_rate": 2.8946230130852976e-05, "loss": 2.0716, "step": 4644 }, { "epoch": 0.1498656008840296, "grad_norm": 0.5234375, "learning_rate": 2.89456506134587e-05, "loss": 2.0351, "step": 4645 }, { "epoch": 0.14989786473782593, "grad_norm": 0.453125, "learning_rate": 2.894507094256071e-05, "loss": 2.0793, "step": 4646 }, { "epoch": 0.14993012859162227, "grad_norm": 0.462890625, "learning_rate": 2.8944491118165378e-05, "loss": 2.0796, "step": 4647 }, { "epoch": 0.14996239244541865, "grad_norm": 0.5078125, "learning_rate": 2.8943911140279094e-05, "loss": 2.0647, "step": 4648 }, { "epoch": 0.149994656299215, "grad_norm": 0.443359375, "learning_rate": 2.894333100890824e-05, "loss": 2.0433, "step": 4649 }, { "epoch": 0.15002692015301133, "grad_norm": 0.423828125, "learning_rate": 2.8942750724059203e-05, "loss": 2.055, "step": 4650 }, { "epoch": 0.15005918400680768, "grad_norm": 0.423828125, "learning_rate": 2.8942170285738362e-05, "loss": 2.0357, "step": 4651 }, { "epoch": 0.15009144786060402, "grad_norm": 0.427734375, "learning_rate": 2.8941589693952116e-05, "loss": 2.0504, "step": 4652 }, { "epoch": 0.15012371171440037, "grad_norm": 0.404296875, "learning_rate": 2.8941008948706853e-05, "loss": 2.0404, "step": 4653 }, { "epoch": 0.1501559755681967, "grad_norm": 0.421875, "learning_rate": 2.8940428050008962e-05, "loss": 2.0515, "step": 4654 }, { "epoch": 0.15018823942199305, "grad_norm": 0.45703125, "learning_rate": 2.8939846997864842e-05, "loss": 2.0328, "step": 4655 }, { "epoch": 0.1502205032757894, "grad_norm": 0.412109375, "learning_rate": 2.893926579228089e-05, "loss": 2.0431, "step": 4656 }, { "epoch": 0.15025276712958574, "grad_norm": 0.41796875, "learning_rate": 2.8938684433263496e-05, "loss": 2.0689, "step": 4657 }, { "epoch": 0.1502850309833821, "grad_norm": 0.419921875, "learning_rate": 2.8938102920819063e-05, "loss": 2.0297, "step": 4658 }, { "epoch": 0.15031729483717846, "grad_norm": 0.462890625, "learning_rate": 2.8937521254954e-05, "loss": 2.0473, "step": 4659 }, { "epoch": 0.1503495586909748, "grad_norm": 0.41015625, "learning_rate": 2.893693943567469e-05, "loss": 2.0277, "step": 4660 }, { "epoch": 0.15038182254477114, "grad_norm": 0.455078125, "learning_rate": 2.8936357462987554e-05, "loss": 2.0613, "step": 4661 }, { "epoch": 0.1504140863985675, "grad_norm": 0.458984375, "learning_rate": 2.8935775336898996e-05, "loss": 2.0636, "step": 4662 }, { "epoch": 0.15044635025236383, "grad_norm": 0.423828125, "learning_rate": 2.8935193057415418e-05, "loss": 2.0472, "step": 4663 }, { "epoch": 0.15047861410616017, "grad_norm": 0.474609375, "learning_rate": 2.893461062454323e-05, "loss": 2.0257, "step": 4664 }, { "epoch": 0.15051087795995652, "grad_norm": 0.51171875, "learning_rate": 2.8934028038288848e-05, "loss": 2.0745, "step": 4665 }, { "epoch": 0.15054314181375286, "grad_norm": 0.482421875, "learning_rate": 2.8933445298658677e-05, "loss": 2.0482, "step": 4666 }, { "epoch": 0.1505754056675492, "grad_norm": 0.462890625, "learning_rate": 2.8932862405659144e-05, "loss": 2.0669, "step": 4667 }, { "epoch": 0.15060766952134558, "grad_norm": 0.408203125, "learning_rate": 2.893227935929665e-05, "loss": 2.0611, "step": 4668 }, { "epoch": 0.15063993337514192, "grad_norm": 0.419921875, "learning_rate": 2.8931696159577624e-05, "loss": 2.0539, "step": 4669 }, { "epoch": 0.15067219722893826, "grad_norm": 0.421875, "learning_rate": 2.893111280650848e-05, "loss": 2.042, "step": 4670 }, { "epoch": 0.1507044610827346, "grad_norm": 0.412109375, "learning_rate": 2.8930529300095638e-05, "loss": 2.0774, "step": 4671 }, { "epoch": 0.15073672493653095, "grad_norm": 0.373046875, "learning_rate": 2.8929945640345523e-05, "loss": 2.0113, "step": 4672 }, { "epoch": 0.1507689887903273, "grad_norm": 0.4296875, "learning_rate": 2.892936182726456e-05, "loss": 2.0127, "step": 4673 }, { "epoch": 0.15080125264412364, "grad_norm": 0.388671875, "learning_rate": 2.8928777860859178e-05, "loss": 2.0041, "step": 4674 }, { "epoch": 0.15083351649791998, "grad_norm": 0.431640625, "learning_rate": 2.89281937411358e-05, "loss": 2.04, "step": 4675 }, { "epoch": 0.15086578035171633, "grad_norm": 0.4296875, "learning_rate": 2.892760946810086e-05, "loss": 2.0397, "step": 4676 }, { "epoch": 0.15089804420551267, "grad_norm": 0.4453125, "learning_rate": 2.8927025041760785e-05, "loss": 2.0414, "step": 4677 }, { "epoch": 0.15093030805930904, "grad_norm": 0.46484375, "learning_rate": 2.8926440462122007e-05, "loss": 2.0172, "step": 4678 }, { "epoch": 0.1509625719131054, "grad_norm": 0.455078125, "learning_rate": 2.8925855729190966e-05, "loss": 2.0461, "step": 4679 }, { "epoch": 0.15099483576690173, "grad_norm": 0.400390625, "learning_rate": 2.8925270842974094e-05, "loss": 2.0674, "step": 4680 }, { "epoch": 0.15102709962069807, "grad_norm": 0.4375, "learning_rate": 2.892468580347783e-05, "loss": 2.032, "step": 4681 }, { "epoch": 0.15105936347449442, "grad_norm": 0.474609375, "learning_rate": 2.8924100610708613e-05, "loss": 2.0525, "step": 4682 }, { "epoch": 0.15109162732829076, "grad_norm": 0.439453125, "learning_rate": 2.892351526467289e-05, "loss": 2.0212, "step": 4683 }, { "epoch": 0.1511238911820871, "grad_norm": 0.40625, "learning_rate": 2.8922929765377094e-05, "loss": 2.0497, "step": 4684 }, { "epoch": 0.15115615503588345, "grad_norm": 0.4296875, "learning_rate": 2.8922344112827678e-05, "loss": 2.0567, "step": 4685 }, { "epoch": 0.1511884188896798, "grad_norm": 0.4296875, "learning_rate": 2.8921758307031086e-05, "loss": 2.061, "step": 4686 }, { "epoch": 0.15122068274347614, "grad_norm": 0.404296875, "learning_rate": 2.892117234799377e-05, "loss": 2.0297, "step": 4687 }, { "epoch": 0.15125294659727248, "grad_norm": 0.427734375, "learning_rate": 2.892058623572217e-05, "loss": 2.0506, "step": 4688 }, { "epoch": 0.15128521045106885, "grad_norm": 0.4296875, "learning_rate": 2.8919999970222747e-05, "loss": 2.0565, "step": 4689 }, { "epoch": 0.1513174743048652, "grad_norm": 0.408203125, "learning_rate": 2.891941355150195e-05, "loss": 2.0706, "step": 4690 }, { "epoch": 0.15134973815866154, "grad_norm": 0.412109375, "learning_rate": 2.8918826979566234e-05, "loss": 2.0775, "step": 4691 }, { "epoch": 0.15138200201245788, "grad_norm": 0.41015625, "learning_rate": 2.8918240254422056e-05, "loss": 2.0407, "step": 4692 }, { "epoch": 0.15141426586625423, "grad_norm": 0.404296875, "learning_rate": 2.8917653376075873e-05, "loss": 2.0624, "step": 4693 }, { "epoch": 0.15144652972005057, "grad_norm": 0.392578125, "learning_rate": 2.8917066344534146e-05, "loss": 2.0054, "step": 4694 }, { "epoch": 0.15147879357384691, "grad_norm": 0.427734375, "learning_rate": 2.891647915980334e-05, "loss": 2.055, "step": 4695 }, { "epoch": 0.15151105742764326, "grad_norm": 0.51171875, "learning_rate": 2.891589182188991e-05, "loss": 2.0554, "step": 4696 }, { "epoch": 0.1515433212814396, "grad_norm": 0.5390625, "learning_rate": 2.8915304330800327e-05, "loss": 2.0218, "step": 4697 }, { "epoch": 0.15157558513523595, "grad_norm": 0.62109375, "learning_rate": 2.891471668654106e-05, "loss": 2.046, "step": 4698 }, { "epoch": 0.15160784898903232, "grad_norm": 0.6640625, "learning_rate": 2.891412888911857e-05, "loss": 2.0301, "step": 4699 }, { "epoch": 0.15164011284282866, "grad_norm": 0.5390625, "learning_rate": 2.8913540938539336e-05, "loss": 2.0452, "step": 4700 }, { "epoch": 0.151672376696625, "grad_norm": 0.42578125, "learning_rate": 2.891295283480982e-05, "loss": 2.0574, "step": 4701 }, { "epoch": 0.15170464055042135, "grad_norm": 0.61328125, "learning_rate": 2.891236457793651e-05, "loss": 2.0355, "step": 4702 }, { "epoch": 0.1517369044042177, "grad_norm": 0.5, "learning_rate": 2.8911776167925858e-05, "loss": 2.0631, "step": 4703 }, { "epoch": 0.15176916825801404, "grad_norm": 0.408203125, "learning_rate": 2.891118760478436e-05, "loss": 2.025, "step": 4704 }, { "epoch": 0.15180143211181038, "grad_norm": 0.515625, "learning_rate": 2.8910598888518492e-05, "loss": 2.0579, "step": 4705 }, { "epoch": 0.15183369596560672, "grad_norm": 0.431640625, "learning_rate": 2.8910010019134725e-05, "loss": 2.0502, "step": 4706 }, { "epoch": 0.15186595981940307, "grad_norm": 0.4296875, "learning_rate": 2.890942099663955e-05, "loss": 2.0376, "step": 4707 }, { "epoch": 0.1518982236731994, "grad_norm": 0.494140625, "learning_rate": 2.890883182103945e-05, "loss": 2.0662, "step": 4708 }, { "epoch": 0.15193048752699578, "grad_norm": 0.4140625, "learning_rate": 2.8908242492340903e-05, "loss": 2.0327, "step": 4709 }, { "epoch": 0.15196275138079213, "grad_norm": 0.423828125, "learning_rate": 2.89076530105504e-05, "loss": 2.011, "step": 4710 }, { "epoch": 0.15199501523458847, "grad_norm": 0.455078125, "learning_rate": 2.8907063375674432e-05, "loss": 2.0799, "step": 4711 }, { "epoch": 0.15202727908838481, "grad_norm": 0.400390625, "learning_rate": 2.8906473587719487e-05, "loss": 2.039, "step": 4712 }, { "epoch": 0.15205954294218116, "grad_norm": 0.443359375, "learning_rate": 2.8905883646692058e-05, "loss": 2.0307, "step": 4713 }, { "epoch": 0.1520918067959775, "grad_norm": 0.419921875, "learning_rate": 2.8905293552598637e-05, "loss": 2.0462, "step": 4714 }, { "epoch": 0.15212407064977385, "grad_norm": 0.4140625, "learning_rate": 2.890470330544572e-05, "loss": 2.0323, "step": 4715 }, { "epoch": 0.1521563345035702, "grad_norm": 0.42578125, "learning_rate": 2.8904112905239804e-05, "loss": 2.0356, "step": 4716 }, { "epoch": 0.15218859835736653, "grad_norm": 0.478515625, "learning_rate": 2.8903522351987386e-05, "loss": 2.0643, "step": 4717 }, { "epoch": 0.15222086221116288, "grad_norm": 0.443359375, "learning_rate": 2.8902931645694973e-05, "loss": 2.0391, "step": 4718 }, { "epoch": 0.15225312606495925, "grad_norm": 0.408203125, "learning_rate": 2.890234078636906e-05, "loss": 2.0474, "step": 4719 }, { "epoch": 0.1522853899187556, "grad_norm": 0.47265625, "learning_rate": 2.8901749774016155e-05, "loss": 2.0232, "step": 4720 }, { "epoch": 0.15231765377255194, "grad_norm": 0.423828125, "learning_rate": 2.8901158608642757e-05, "loss": 2.0674, "step": 4721 }, { "epoch": 0.15234991762634828, "grad_norm": 0.39453125, "learning_rate": 2.890056729025538e-05, "loss": 2.0263, "step": 4722 }, { "epoch": 0.15238218148014462, "grad_norm": 0.3984375, "learning_rate": 2.8899975818860533e-05, "loss": 2.0494, "step": 4723 }, { "epoch": 0.15241444533394097, "grad_norm": 0.4140625, "learning_rate": 2.889938419446472e-05, "loss": 2.0589, "step": 4724 }, { "epoch": 0.1524467091877373, "grad_norm": 0.419921875, "learning_rate": 2.889879241707446e-05, "loss": 2.044, "step": 4725 }, { "epoch": 0.15247897304153366, "grad_norm": 0.404296875, "learning_rate": 2.889820048669626e-05, "loss": 2.0686, "step": 4726 }, { "epoch": 0.15251123689533, "grad_norm": 0.41015625, "learning_rate": 2.8897608403336647e-05, "loss": 2.0354, "step": 4727 }, { "epoch": 0.15254350074912634, "grad_norm": 0.40234375, "learning_rate": 2.8897016167002126e-05, "loss": 2.0585, "step": 4728 }, { "epoch": 0.15257576460292271, "grad_norm": 0.41015625, "learning_rate": 2.889642377769922e-05, "loss": 2.024, "step": 4729 }, { "epoch": 0.15260802845671906, "grad_norm": 0.419921875, "learning_rate": 2.889583123543445e-05, "loss": 2.042, "step": 4730 }, { "epoch": 0.1526402923105154, "grad_norm": 0.392578125, "learning_rate": 2.8895238540214342e-05, "loss": 2.0506, "step": 4731 }, { "epoch": 0.15267255616431175, "grad_norm": 0.439453125, "learning_rate": 2.8894645692045412e-05, "loss": 2.0831, "step": 4732 }, { "epoch": 0.1527048200181081, "grad_norm": 0.408203125, "learning_rate": 2.889405269093419e-05, "loss": 2.1408, "step": 4733 }, { "epoch": 0.15273708387190443, "grad_norm": 0.43359375, "learning_rate": 2.889345953688721e-05, "loss": 2.0934, "step": 4734 }, { "epoch": 0.15276934772570078, "grad_norm": 0.427734375, "learning_rate": 2.889286622991099e-05, "loss": 2.1418, "step": 4735 }, { "epoch": 0.15280161157949712, "grad_norm": 0.40234375, "learning_rate": 2.8892272770012066e-05, "loss": 2.0991, "step": 4736 }, { "epoch": 0.15283387543329346, "grad_norm": 0.4296875, "learning_rate": 2.889167915719697e-05, "loss": 2.1355, "step": 4737 }, { "epoch": 0.1528661392870898, "grad_norm": 0.46875, "learning_rate": 2.8891085391472236e-05, "loss": 2.0934, "step": 4738 }, { "epoch": 0.15289840314088618, "grad_norm": 0.486328125, "learning_rate": 2.88904914728444e-05, "loss": 2.1197, "step": 4739 }, { "epoch": 0.15293066699468252, "grad_norm": 0.515625, "learning_rate": 2.8889897401319997e-05, "loss": 2.1297, "step": 4740 }, { "epoch": 0.15296293084847887, "grad_norm": 0.4375, "learning_rate": 2.8889303176905568e-05, "loss": 2.1139, "step": 4741 }, { "epoch": 0.1529951947022752, "grad_norm": 0.416015625, "learning_rate": 2.8888708799607652e-05, "loss": 2.1093, "step": 4742 }, { "epoch": 0.15302745855607156, "grad_norm": 0.42578125, "learning_rate": 2.88881142694328e-05, "loss": 2.1118, "step": 4743 }, { "epoch": 0.1530597224098679, "grad_norm": 0.41015625, "learning_rate": 2.8887519586387543e-05, "loss": 2.1218, "step": 4744 }, { "epoch": 0.15309198626366424, "grad_norm": 0.44921875, "learning_rate": 2.8886924750478437e-05, "loss": 2.1212, "step": 4745 }, { "epoch": 0.1531242501174606, "grad_norm": 0.421875, "learning_rate": 2.888632976171202e-05, "loss": 2.1019, "step": 4746 }, { "epoch": 0.15315651397125693, "grad_norm": 0.45703125, "learning_rate": 2.8885734620094853e-05, "loss": 2.112, "step": 4747 }, { "epoch": 0.15318877782505327, "grad_norm": 0.44140625, "learning_rate": 2.888513932563348e-05, "loss": 2.1028, "step": 4748 }, { "epoch": 0.15322104167884965, "grad_norm": 0.4140625, "learning_rate": 2.8884543878334452e-05, "loss": 2.0925, "step": 4749 }, { "epoch": 0.153253305532646, "grad_norm": 0.47265625, "learning_rate": 2.8883948278204327e-05, "loss": 2.1114, "step": 4750 }, { "epoch": 0.15328556938644233, "grad_norm": 0.470703125, "learning_rate": 2.8883352525249658e-05, "loss": 2.1046, "step": 4751 }, { "epoch": 0.15331783324023868, "grad_norm": 0.458984375, "learning_rate": 2.8882756619477008e-05, "loss": 2.1294, "step": 4752 }, { "epoch": 0.15335009709403502, "grad_norm": 0.4375, "learning_rate": 2.8882160560892932e-05, "loss": 2.1274, "step": 4753 }, { "epoch": 0.15338236094783136, "grad_norm": 0.3984375, "learning_rate": 2.8881564349503985e-05, "loss": 2.1347, "step": 4754 }, { "epoch": 0.1534146248016277, "grad_norm": 0.51171875, "learning_rate": 2.8880967985316747e-05, "loss": 2.0756, "step": 4755 }, { "epoch": 0.15344688865542405, "grad_norm": 0.53515625, "learning_rate": 2.8880371468337764e-05, "loss": 2.1344, "step": 4756 }, { "epoch": 0.1534791525092204, "grad_norm": 0.5234375, "learning_rate": 2.887977479857361e-05, "loss": 2.1327, "step": 4757 }, { "epoch": 0.15351141636301674, "grad_norm": 0.4375, "learning_rate": 2.8879177976030858e-05, "loss": 2.1117, "step": 4758 }, { "epoch": 0.1535436802168131, "grad_norm": 0.380859375, "learning_rate": 2.8878581000716064e-05, "loss": 2.1197, "step": 4759 }, { "epoch": 0.15357594407060945, "grad_norm": 0.474609375, "learning_rate": 2.8877983872635813e-05, "loss": 2.1153, "step": 4760 }, { "epoch": 0.1536082079244058, "grad_norm": 0.625, "learning_rate": 2.8877386591796666e-05, "loss": 2.1194, "step": 4761 }, { "epoch": 0.15364047177820214, "grad_norm": 0.5859375, "learning_rate": 2.8876789158205207e-05, "loss": 2.1232, "step": 4762 }, { "epoch": 0.15367273563199849, "grad_norm": 0.404296875, "learning_rate": 2.8876191571868008e-05, "loss": 2.1099, "step": 4763 }, { "epoch": 0.15370499948579483, "grad_norm": 0.4765625, "learning_rate": 2.8875593832791646e-05, "loss": 2.1042, "step": 4764 }, { "epoch": 0.15373726333959117, "grad_norm": 0.484375, "learning_rate": 2.88749959409827e-05, "loss": 2.1154, "step": 4765 }, { "epoch": 0.15376952719338752, "grad_norm": 0.435546875, "learning_rate": 2.8874397896447756e-05, "loss": 2.1002, "step": 4766 }, { "epoch": 0.15380179104718386, "grad_norm": 0.427734375, "learning_rate": 2.887379969919339e-05, "loss": 2.118, "step": 4767 }, { "epoch": 0.1538340549009802, "grad_norm": 0.44921875, "learning_rate": 2.887320134922619e-05, "loss": 2.0814, "step": 4768 }, { "epoch": 0.15386631875477658, "grad_norm": 0.392578125, "learning_rate": 2.8872602846552745e-05, "loss": 2.1282, "step": 4769 }, { "epoch": 0.15389858260857292, "grad_norm": 0.41015625, "learning_rate": 2.8872004191179637e-05, "loss": 2.0829, "step": 4770 }, { "epoch": 0.15393084646236926, "grad_norm": 0.41796875, "learning_rate": 2.8871405383113463e-05, "loss": 2.0946, "step": 4771 }, { "epoch": 0.1539631103161656, "grad_norm": 0.396484375, "learning_rate": 2.8870806422360807e-05, "loss": 2.1247, "step": 4772 }, { "epoch": 0.15399537416996195, "grad_norm": 0.4453125, "learning_rate": 2.8870207308928263e-05, "loss": 2.1018, "step": 4773 }, { "epoch": 0.1540276380237583, "grad_norm": 0.423828125, "learning_rate": 2.886960804282243e-05, "loss": 2.1351, "step": 4774 }, { "epoch": 0.15405990187755464, "grad_norm": 0.392578125, "learning_rate": 2.88690086240499e-05, "loss": 2.0985, "step": 4775 }, { "epoch": 0.15409216573135098, "grad_norm": 0.4921875, "learning_rate": 2.8868409052617274e-05, "loss": 2.0953, "step": 4776 }, { "epoch": 0.15412442958514733, "grad_norm": 0.400390625, "learning_rate": 2.8867809328531147e-05, "loss": 2.107, "step": 4777 }, { "epoch": 0.15415669343894367, "grad_norm": 0.421875, "learning_rate": 2.8867209451798126e-05, "loss": 2.0767, "step": 4778 }, { "epoch": 0.15418895729274001, "grad_norm": 0.486328125, "learning_rate": 2.8866609422424818e-05, "loss": 2.1147, "step": 4779 }, { "epoch": 0.15422122114653639, "grad_norm": 0.412109375, "learning_rate": 2.886600924041781e-05, "loss": 2.1176, "step": 4780 }, { "epoch": 0.15425348500033273, "grad_norm": 0.455078125, "learning_rate": 2.8865408905783727e-05, "loss": 2.1456, "step": 4781 }, { "epoch": 0.15428574885412907, "grad_norm": 0.49609375, "learning_rate": 2.8864808418529168e-05, "loss": 2.0982, "step": 4782 }, { "epoch": 0.15431801270792542, "grad_norm": 0.404296875, "learning_rate": 2.8864207778660742e-05, "loss": 2.0936, "step": 4783 }, { "epoch": 0.15435027656172176, "grad_norm": 0.421875, "learning_rate": 2.8863606986185067e-05, "loss": 2.0947, "step": 4784 }, { "epoch": 0.1543825404155181, "grad_norm": 0.43359375, "learning_rate": 2.8863006041108753e-05, "loss": 2.1111, "step": 4785 }, { "epoch": 0.15441480426931445, "grad_norm": 0.47265625, "learning_rate": 2.8862404943438406e-05, "loss": 2.1289, "step": 4786 }, { "epoch": 0.1544470681231108, "grad_norm": 0.451171875, "learning_rate": 2.8861803693180658e-05, "loss": 2.0466, "step": 4787 }, { "epoch": 0.15447933197690714, "grad_norm": 0.4609375, "learning_rate": 2.8861202290342118e-05, "loss": 2.1041, "step": 4788 }, { "epoch": 0.15451159583070348, "grad_norm": 0.50390625, "learning_rate": 2.8860600734929403e-05, "loss": 2.1301, "step": 4789 }, { "epoch": 0.15454385968449985, "grad_norm": 0.39453125, "learning_rate": 2.8859999026949145e-05, "loss": 2.0572, "step": 4790 }, { "epoch": 0.1545761235382962, "grad_norm": 0.50390625, "learning_rate": 2.8859397166407956e-05, "loss": 2.1071, "step": 4791 }, { "epoch": 0.15460838739209254, "grad_norm": 0.46484375, "learning_rate": 2.8858795153312467e-05, "loss": 2.1054, "step": 4792 }, { "epoch": 0.15464065124588888, "grad_norm": 0.3984375, "learning_rate": 2.8858192987669303e-05, "loss": 2.0939, "step": 4793 }, { "epoch": 0.15467291509968523, "grad_norm": 0.392578125, "learning_rate": 2.885759066948509e-05, "loss": 2.1172, "step": 4794 }, { "epoch": 0.15470517895348157, "grad_norm": 0.443359375, "learning_rate": 2.8856988198766465e-05, "loss": 2.1086, "step": 4795 }, { "epoch": 0.15473744280727791, "grad_norm": 0.419921875, "learning_rate": 2.8856385575520052e-05, "loss": 2.1355, "step": 4796 }, { "epoch": 0.15476970666107426, "grad_norm": 0.40625, "learning_rate": 2.8855782799752486e-05, "loss": 2.1056, "step": 4797 }, { "epoch": 0.1548019705148706, "grad_norm": 0.390625, "learning_rate": 2.8855179871470404e-05, "loss": 2.0925, "step": 4798 }, { "epoch": 0.15483423436866695, "grad_norm": 0.388671875, "learning_rate": 2.8854576790680446e-05, "loss": 2.0994, "step": 4799 }, { "epoch": 0.15486649822246332, "grad_norm": 0.390625, "learning_rate": 2.8853973557389243e-05, "loss": 2.1166, "step": 4800 }, { "epoch": 0.15489876207625966, "grad_norm": 0.416015625, "learning_rate": 2.8853370171603436e-05, "loss": 2.1123, "step": 4801 }, { "epoch": 0.154931025930056, "grad_norm": 0.4609375, "learning_rate": 2.885276663332967e-05, "loss": 2.103, "step": 4802 }, { "epoch": 0.15496328978385235, "grad_norm": 0.453125, "learning_rate": 2.8852162942574588e-05, "loss": 2.1146, "step": 4803 }, { "epoch": 0.1549955536376487, "grad_norm": 0.40234375, "learning_rate": 2.8851559099344833e-05, "loss": 2.0919, "step": 4804 }, { "epoch": 0.15502781749144504, "grad_norm": 0.451171875, "learning_rate": 2.8850955103647053e-05, "loss": 2.0808, "step": 4805 }, { "epoch": 0.15506008134524138, "grad_norm": 0.52734375, "learning_rate": 2.8850350955487894e-05, "loss": 2.1092, "step": 4806 }, { "epoch": 0.15509234519903772, "grad_norm": 0.515625, "learning_rate": 2.884974665487401e-05, "loss": 2.1146, "step": 4807 }, { "epoch": 0.15512460905283407, "grad_norm": 0.46484375, "learning_rate": 2.8849142201812047e-05, "loss": 2.1327, "step": 4808 }, { "epoch": 0.1551568729066304, "grad_norm": 0.419921875, "learning_rate": 2.8848537596308665e-05, "loss": 2.107, "step": 4809 }, { "epoch": 0.15518913676042678, "grad_norm": 0.439453125, "learning_rate": 2.8847932838370517e-05, "loss": 2.1016, "step": 4810 }, { "epoch": 0.15522140061422313, "grad_norm": 0.435546875, "learning_rate": 2.8847327928004253e-05, "loss": 2.0739, "step": 4811 }, { "epoch": 0.15525366446801947, "grad_norm": 0.400390625, "learning_rate": 2.8846722865216542e-05, "loss": 2.0762, "step": 4812 }, { "epoch": 0.1552859283218158, "grad_norm": 0.421875, "learning_rate": 2.884611765001404e-05, "loss": 2.0921, "step": 4813 }, { "epoch": 0.15531819217561216, "grad_norm": 0.40625, "learning_rate": 2.8845512282403407e-05, "loss": 2.1237, "step": 4814 }, { "epoch": 0.1553504560294085, "grad_norm": 0.40234375, "learning_rate": 2.884490676239131e-05, "loss": 2.1209, "step": 4815 }, { "epoch": 0.15538271988320485, "grad_norm": 0.423828125, "learning_rate": 2.8844301089984407e-05, "loss": 2.106, "step": 4816 }, { "epoch": 0.1554149837370012, "grad_norm": 0.40234375, "learning_rate": 2.884369526518937e-05, "loss": 2.1017, "step": 4817 }, { "epoch": 0.15544724759079753, "grad_norm": 0.421875, "learning_rate": 2.8843089288012867e-05, "loss": 2.1033, "step": 4818 }, { "epoch": 0.15547951144459388, "grad_norm": 0.439453125, "learning_rate": 2.8842483158461572e-05, "loss": 2.1323, "step": 4819 }, { "epoch": 0.15551177529839025, "grad_norm": 0.3984375, "learning_rate": 2.884187687654215e-05, "loss": 2.102, "step": 4820 }, { "epoch": 0.1555440391521866, "grad_norm": 0.408203125, "learning_rate": 2.8841270442261278e-05, "loss": 2.104, "step": 4821 }, { "epoch": 0.15557630300598294, "grad_norm": 0.42578125, "learning_rate": 2.884066385562563e-05, "loss": 2.1122, "step": 4822 }, { "epoch": 0.15560856685977928, "grad_norm": 0.423828125, "learning_rate": 2.8840057116641886e-05, "loss": 2.1184, "step": 4823 }, { "epoch": 0.15564083071357562, "grad_norm": 0.39453125, "learning_rate": 2.8839450225316717e-05, "loss": 2.1276, "step": 4824 }, { "epoch": 0.15567309456737197, "grad_norm": 0.416015625, "learning_rate": 2.883884318165681e-05, "loss": 2.133, "step": 4825 }, { "epoch": 0.1557053584211683, "grad_norm": 0.4140625, "learning_rate": 2.883823598566885e-05, "loss": 2.1227, "step": 4826 }, { "epoch": 0.15573762227496465, "grad_norm": 0.4609375, "learning_rate": 2.883762863735951e-05, "loss": 2.082, "step": 4827 }, { "epoch": 0.155769886128761, "grad_norm": 0.484375, "learning_rate": 2.8837021136735484e-05, "loss": 2.1026, "step": 4828 }, { "epoch": 0.15580214998255734, "grad_norm": 0.51953125, "learning_rate": 2.8836413483803457e-05, "loss": 2.1037, "step": 4829 }, { "epoch": 0.1558344138363537, "grad_norm": 0.494140625, "learning_rate": 2.8835805678570115e-05, "loss": 2.1264, "step": 4830 }, { "epoch": 0.15586667769015006, "grad_norm": 0.4765625, "learning_rate": 2.883519772104215e-05, "loss": 2.1279, "step": 4831 }, { "epoch": 0.1558989415439464, "grad_norm": 0.42578125, "learning_rate": 2.8834589611226252e-05, "loss": 2.0981, "step": 4832 }, { "epoch": 0.15593120539774274, "grad_norm": 0.380859375, "learning_rate": 2.883398134912912e-05, "loss": 2.1077, "step": 4833 }, { "epoch": 0.1559634692515391, "grad_norm": 0.421875, "learning_rate": 2.883337293475744e-05, "loss": 2.1149, "step": 4834 }, { "epoch": 0.15599573310533543, "grad_norm": 0.51953125, "learning_rate": 2.883276436811792e-05, "loss": 2.1354, "step": 4835 }, { "epoch": 0.15602799695913178, "grad_norm": 0.49609375, "learning_rate": 2.883215564921725e-05, "loss": 2.1188, "step": 4836 }, { "epoch": 0.15606026081292812, "grad_norm": 0.400390625, "learning_rate": 2.8831546778062137e-05, "loss": 2.1251, "step": 4837 }, { "epoch": 0.15609252466672446, "grad_norm": 0.419921875, "learning_rate": 2.883093775465928e-05, "loss": 2.1385, "step": 4838 }, { "epoch": 0.1561247885205208, "grad_norm": 0.51171875, "learning_rate": 2.883032857901538e-05, "loss": 2.1141, "step": 4839 }, { "epoch": 0.15615705237431718, "grad_norm": 0.494140625, "learning_rate": 2.8829719251137146e-05, "loss": 2.1003, "step": 4840 }, { "epoch": 0.15618931622811352, "grad_norm": 0.400390625, "learning_rate": 2.8829109771031282e-05, "loss": 2.1198, "step": 4841 }, { "epoch": 0.15622158008190987, "grad_norm": 0.44921875, "learning_rate": 2.88285001387045e-05, "loss": 2.0984, "step": 4842 }, { "epoch": 0.1562538439357062, "grad_norm": 0.46875, "learning_rate": 2.882789035416351e-05, "loss": 2.0885, "step": 4843 }, { "epoch": 0.15628610778950255, "grad_norm": 0.400390625, "learning_rate": 2.8827280417415024e-05, "loss": 2.1205, "step": 4844 }, { "epoch": 0.1563183716432989, "grad_norm": 0.490234375, "learning_rate": 2.882667032846575e-05, "loss": 2.1374, "step": 4845 }, { "epoch": 0.15635063549709524, "grad_norm": 0.458984375, "learning_rate": 2.8826060087322414e-05, "loss": 2.1183, "step": 4846 }, { "epoch": 0.15638289935089159, "grad_norm": 0.41796875, "learning_rate": 2.8825449693991723e-05, "loss": 2.1267, "step": 4847 }, { "epoch": 0.15641516320468793, "grad_norm": 0.54296875, "learning_rate": 2.8824839148480402e-05, "loss": 2.119, "step": 4848 }, { "epoch": 0.15644742705848427, "grad_norm": 0.474609375, "learning_rate": 2.882422845079517e-05, "loss": 2.1185, "step": 4849 }, { "epoch": 0.15647969091228064, "grad_norm": 0.44921875, "learning_rate": 2.882361760094275e-05, "loss": 2.1273, "step": 4850 }, { "epoch": 0.156511954766077, "grad_norm": 0.49609375, "learning_rate": 2.882300659892986e-05, "loss": 2.1176, "step": 4851 }, { "epoch": 0.15654421861987333, "grad_norm": 0.45703125, "learning_rate": 2.882239544476323e-05, "loss": 2.1133, "step": 4852 }, { "epoch": 0.15657648247366968, "grad_norm": 0.435546875, "learning_rate": 2.8821784138449597e-05, "loss": 2.1024, "step": 4853 }, { "epoch": 0.15660874632746602, "grad_norm": 0.439453125, "learning_rate": 2.882117267999567e-05, "loss": 2.0924, "step": 4854 }, { "epoch": 0.15664101018126236, "grad_norm": 0.427734375, "learning_rate": 2.882056106940819e-05, "loss": 2.1134, "step": 4855 }, { "epoch": 0.1566732740350587, "grad_norm": 0.400390625, "learning_rate": 2.8819949306693892e-05, "loss": 2.1144, "step": 4856 }, { "epoch": 0.15670553788885505, "grad_norm": 0.453125, "learning_rate": 2.8819337391859507e-05, "loss": 2.1283, "step": 4857 }, { "epoch": 0.1567378017426514, "grad_norm": 0.4140625, "learning_rate": 2.8818725324911763e-05, "loss": 2.1255, "step": 4858 }, { "epoch": 0.15677006559644774, "grad_norm": 0.435546875, "learning_rate": 2.881811310585741e-05, "loss": 2.1286, "step": 4859 }, { "epoch": 0.1568023294502441, "grad_norm": 0.4296875, "learning_rate": 2.8817500734703178e-05, "loss": 2.0897, "step": 4860 }, { "epoch": 0.15683459330404045, "grad_norm": 0.439453125, "learning_rate": 2.8816888211455818e-05, "loss": 2.0946, "step": 4861 }, { "epoch": 0.1568668571578368, "grad_norm": 0.431640625, "learning_rate": 2.8816275536122056e-05, "loss": 2.1189, "step": 4862 }, { "epoch": 0.15689912101163314, "grad_norm": 0.42578125, "learning_rate": 2.8815662708708647e-05, "loss": 2.0902, "step": 4863 }, { "epoch": 0.15693138486542949, "grad_norm": 0.439453125, "learning_rate": 2.8815049729222338e-05, "loss": 2.0725, "step": 4864 }, { "epoch": 0.15696364871922583, "grad_norm": 0.42578125, "learning_rate": 2.8814436597669868e-05, "loss": 2.1246, "step": 4865 }, { "epoch": 0.15699591257302217, "grad_norm": 0.4765625, "learning_rate": 2.8813823314057995e-05, "loss": 2.0704, "step": 4866 }, { "epoch": 0.15702817642681852, "grad_norm": 0.4921875, "learning_rate": 2.881320987839346e-05, "loss": 2.0967, "step": 4867 }, { "epoch": 0.15706044028061486, "grad_norm": 0.51171875, "learning_rate": 2.8812596290683024e-05, "loss": 2.1312, "step": 4868 }, { "epoch": 0.1570927041344112, "grad_norm": 0.43359375, "learning_rate": 2.8811982550933433e-05, "loss": 2.1148, "step": 4869 }, { "epoch": 0.15712496798820755, "grad_norm": 0.470703125, "learning_rate": 2.881136865915145e-05, "loss": 2.1169, "step": 4870 }, { "epoch": 0.15715723184200392, "grad_norm": 0.486328125, "learning_rate": 2.8810754615343828e-05, "loss": 2.1093, "step": 4871 }, { "epoch": 0.15718949569580026, "grad_norm": 0.423828125, "learning_rate": 2.8810140419517326e-05, "loss": 2.092, "step": 4872 }, { "epoch": 0.1572217595495966, "grad_norm": 0.423828125, "learning_rate": 2.8809526071678703e-05, "loss": 2.0994, "step": 4873 }, { "epoch": 0.15725402340339295, "grad_norm": 0.416015625, "learning_rate": 2.8808911571834732e-05, "loss": 2.1087, "step": 4874 }, { "epoch": 0.1572862872571893, "grad_norm": 0.423828125, "learning_rate": 2.8808296919992166e-05, "loss": 2.1267, "step": 4875 }, { "epoch": 0.15731855111098564, "grad_norm": 0.408203125, "learning_rate": 2.880768211615777e-05, "loss": 2.1074, "step": 4876 }, { "epoch": 0.15735081496478198, "grad_norm": 0.392578125, "learning_rate": 2.8807067160338316e-05, "loss": 2.0707, "step": 4877 }, { "epoch": 0.15738307881857833, "grad_norm": 0.416015625, "learning_rate": 2.8806452052540574e-05, "loss": 2.1088, "step": 4878 }, { "epoch": 0.15741534267237467, "grad_norm": 0.470703125, "learning_rate": 2.880583679277131e-05, "loss": 2.0953, "step": 4879 }, { "epoch": 0.157447606526171, "grad_norm": 0.458984375, "learning_rate": 2.88052213810373e-05, "loss": 2.1206, "step": 4880 }, { "epoch": 0.15747987037996738, "grad_norm": 0.423828125, "learning_rate": 2.880460581734532e-05, "loss": 2.0705, "step": 4881 }, { "epoch": 0.15751213423376373, "grad_norm": 0.400390625, "learning_rate": 2.8803990101702138e-05, "loss": 2.1144, "step": 4882 }, { "epoch": 0.15754439808756007, "grad_norm": 0.390625, "learning_rate": 2.8803374234114538e-05, "loss": 2.1008, "step": 4883 }, { "epoch": 0.15757666194135642, "grad_norm": 0.38671875, "learning_rate": 2.8802758214589292e-05, "loss": 2.091, "step": 4884 }, { "epoch": 0.15760892579515276, "grad_norm": 0.4140625, "learning_rate": 2.8802142043133196e-05, "loss": 2.1257, "step": 4885 }, { "epoch": 0.1576411896489491, "grad_norm": 0.408203125, "learning_rate": 2.880152571975301e-05, "loss": 2.111, "step": 4886 }, { "epoch": 0.15767345350274545, "grad_norm": 0.40625, "learning_rate": 2.880090924445554e-05, "loss": 2.1093, "step": 4887 }, { "epoch": 0.1577057173565418, "grad_norm": 0.40625, "learning_rate": 2.880029261724756e-05, "loss": 2.1259, "step": 4888 }, { "epoch": 0.15773798121033814, "grad_norm": 0.384765625, "learning_rate": 2.8799675838135855e-05, "loss": 2.1102, "step": 4889 }, { "epoch": 0.15777024506413448, "grad_norm": 0.40625, "learning_rate": 2.879905890712722e-05, "loss": 2.0854, "step": 4890 }, { "epoch": 0.15780250891793085, "grad_norm": 0.43359375, "learning_rate": 2.8798441824228443e-05, "loss": 2.1154, "step": 4891 }, { "epoch": 0.1578347727717272, "grad_norm": 0.4140625, "learning_rate": 2.8797824589446318e-05, "loss": 2.0251, "step": 4892 }, { "epoch": 0.15786703662552354, "grad_norm": 0.388671875, "learning_rate": 2.879720720278764e-05, "loss": 2.0482, "step": 4893 }, { "epoch": 0.15789930047931988, "grad_norm": 0.427734375, "learning_rate": 2.87965896642592e-05, "loss": 2.0474, "step": 4894 }, { "epoch": 0.15793156433311623, "grad_norm": 0.384765625, "learning_rate": 2.87959719738678e-05, "loss": 2.0438, "step": 4895 }, { "epoch": 0.15796382818691257, "grad_norm": 0.423828125, "learning_rate": 2.8795354131620243e-05, "loss": 2.0706, "step": 4896 }, { "epoch": 0.1579960920407089, "grad_norm": 0.44140625, "learning_rate": 2.879473613752332e-05, "loss": 2.0444, "step": 4897 }, { "epoch": 0.15802835589450526, "grad_norm": 0.451171875, "learning_rate": 2.879411799158384e-05, "loss": 2.0424, "step": 4898 }, { "epoch": 0.1580606197483016, "grad_norm": 0.427734375, "learning_rate": 2.87934996938086e-05, "loss": 2.0706, "step": 4899 }, { "epoch": 0.15809288360209794, "grad_norm": 0.484375, "learning_rate": 2.879288124420441e-05, "loss": 2.051, "step": 4900 }, { "epoch": 0.15812514745589432, "grad_norm": 0.61328125, "learning_rate": 2.8792262642778082e-05, "loss": 2.0494, "step": 4901 }, { "epoch": 0.15815741130969066, "grad_norm": 0.7109375, "learning_rate": 2.8791643889536422e-05, "loss": 2.053, "step": 4902 }, { "epoch": 0.158189675163487, "grad_norm": 0.8984375, "learning_rate": 2.879102498448624e-05, "loss": 2.0741, "step": 4903 }, { "epoch": 0.15822193901728335, "grad_norm": 0.73828125, "learning_rate": 2.8790405927634348e-05, "loss": 2.0346, "step": 4904 }, { "epoch": 0.1582542028710797, "grad_norm": 0.46875, "learning_rate": 2.878978671898756e-05, "loss": 2.0519, "step": 4905 }, { "epoch": 0.15828646672487603, "grad_norm": 0.77734375, "learning_rate": 2.878916735855269e-05, "loss": 2.0405, "step": 4906 }, { "epoch": 0.15831873057867238, "grad_norm": 0.52734375, "learning_rate": 2.8788547846336563e-05, "loss": 2.0342, "step": 4907 }, { "epoch": 0.15835099443246872, "grad_norm": 0.65234375, "learning_rate": 2.878792818234599e-05, "loss": 2.0122, "step": 4908 }, { "epoch": 0.15838325828626507, "grad_norm": 0.55078125, "learning_rate": 2.87873083665878e-05, "loss": 2.0692, "step": 4909 }, { "epoch": 0.1584155221400614, "grad_norm": 0.5546875, "learning_rate": 2.8786688399068803e-05, "loss": 2.0405, "step": 4910 }, { "epoch": 0.15844778599385778, "grad_norm": 0.57421875, "learning_rate": 2.8786068279795838e-05, "loss": 2.0658, "step": 4911 }, { "epoch": 0.15848004984765413, "grad_norm": 0.466796875, "learning_rate": 2.8785448008775717e-05, "loss": 2.0521, "step": 4912 }, { "epoch": 0.15851231370145047, "grad_norm": 0.5859375, "learning_rate": 2.878482758601528e-05, "loss": 2.0455, "step": 4913 }, { "epoch": 0.1585445775552468, "grad_norm": 0.490234375, "learning_rate": 2.8784207011521346e-05, "loss": 2.0765, "step": 4914 }, { "epoch": 0.15857684140904316, "grad_norm": 0.5546875, "learning_rate": 2.878358628530075e-05, "loss": 2.0251, "step": 4915 }, { "epoch": 0.1586091052628395, "grad_norm": 0.421875, "learning_rate": 2.8782965407360326e-05, "loss": 2.0483, "step": 4916 }, { "epoch": 0.15864136911663584, "grad_norm": 0.5625, "learning_rate": 2.8782344377706907e-05, "loss": 2.0195, "step": 4917 }, { "epoch": 0.1586736329704322, "grad_norm": 0.40625, "learning_rate": 2.878172319634733e-05, "loss": 2.0271, "step": 4918 }, { "epoch": 0.15870589682422853, "grad_norm": 0.515625, "learning_rate": 2.8781101863288425e-05, "loss": 2.0468, "step": 4919 }, { "epoch": 0.15873816067802488, "grad_norm": 0.3984375, "learning_rate": 2.878048037853704e-05, "loss": 2.0459, "step": 4920 }, { "epoch": 0.15877042453182125, "grad_norm": 0.458984375, "learning_rate": 2.8779858742100016e-05, "loss": 2.02, "step": 4921 }, { "epoch": 0.1588026883856176, "grad_norm": 0.392578125, "learning_rate": 2.8779236953984188e-05, "loss": 2.0519, "step": 4922 }, { "epoch": 0.15883495223941393, "grad_norm": 0.427734375, "learning_rate": 2.8778615014196408e-05, "loss": 2.0382, "step": 4923 }, { "epoch": 0.15886721609321028, "grad_norm": 0.404296875, "learning_rate": 2.877799292274352e-05, "loss": 2.0711, "step": 4924 }, { "epoch": 0.15889947994700662, "grad_norm": 0.41796875, "learning_rate": 2.8777370679632367e-05, "loss": 2.0626, "step": 4925 }, { "epoch": 0.15893174380080297, "grad_norm": 0.41796875, "learning_rate": 2.8776748284869804e-05, "loss": 2.0444, "step": 4926 }, { "epoch": 0.1589640076545993, "grad_norm": 0.404296875, "learning_rate": 2.877612573846268e-05, "loss": 2.0652, "step": 4927 }, { "epoch": 0.15899627150839565, "grad_norm": 0.400390625, "learning_rate": 2.8775503040417845e-05, "loss": 2.0181, "step": 4928 }, { "epoch": 0.159028535362192, "grad_norm": 0.46875, "learning_rate": 2.877488019074215e-05, "loss": 2.0578, "step": 4929 }, { "epoch": 0.15906079921598834, "grad_norm": 0.4296875, "learning_rate": 2.877425718944246e-05, "loss": 2.0726, "step": 4930 }, { "epoch": 0.1590930630697847, "grad_norm": 0.416015625, "learning_rate": 2.877363403652563e-05, "loss": 2.0416, "step": 4931 }, { "epoch": 0.15912532692358106, "grad_norm": 0.478515625, "learning_rate": 2.877301073199852e-05, "loss": 2.0447, "step": 4932 }, { "epoch": 0.1591575907773774, "grad_norm": 0.37109375, "learning_rate": 2.8772387275867986e-05, "loss": 2.0341, "step": 4933 }, { "epoch": 0.15918985463117374, "grad_norm": 0.470703125, "learning_rate": 2.8771763668140886e-05, "loss": 2.0672, "step": 4934 }, { "epoch": 0.1592221184849701, "grad_norm": 0.388671875, "learning_rate": 2.8771139908824102e-05, "loss": 2.0645, "step": 4935 }, { "epoch": 0.15925438233876643, "grad_norm": 0.44921875, "learning_rate": 2.8770515997924484e-05, "loss": 2.0701, "step": 4936 }, { "epoch": 0.15928664619256278, "grad_norm": 0.431640625, "learning_rate": 2.8769891935448905e-05, "loss": 2.0318, "step": 4937 }, { "epoch": 0.15931891004635912, "grad_norm": 0.453125, "learning_rate": 2.8769267721404235e-05, "loss": 2.0466, "step": 4938 }, { "epoch": 0.15935117390015546, "grad_norm": 0.408203125, "learning_rate": 2.8768643355797347e-05, "loss": 2.0387, "step": 4939 }, { "epoch": 0.1593834377539518, "grad_norm": 0.42578125, "learning_rate": 2.8768018838635106e-05, "loss": 2.0564, "step": 4940 }, { "epoch": 0.15941570160774818, "grad_norm": 0.435546875, "learning_rate": 2.876739416992439e-05, "loss": 2.0352, "step": 4941 }, { "epoch": 0.15944796546154452, "grad_norm": 0.4296875, "learning_rate": 2.876676934967208e-05, "loss": 1.9971, "step": 4942 }, { "epoch": 0.15948022931534087, "grad_norm": 0.466796875, "learning_rate": 2.8766144377885043e-05, "loss": 2.0574, "step": 4943 }, { "epoch": 0.1595124931691372, "grad_norm": 0.43359375, "learning_rate": 2.876551925457017e-05, "loss": 2.0208, "step": 4944 }, { "epoch": 0.15954475702293355, "grad_norm": 0.447265625, "learning_rate": 2.8764893979734337e-05, "loss": 2.0382, "step": 4945 }, { "epoch": 0.1595770208767299, "grad_norm": 0.427734375, "learning_rate": 2.8764268553384423e-05, "loss": 2.0549, "step": 4946 }, { "epoch": 0.15960928473052624, "grad_norm": 0.42578125, "learning_rate": 2.876364297552732e-05, "loss": 2.0316, "step": 4947 }, { "epoch": 0.15964154858432258, "grad_norm": 0.451171875, "learning_rate": 2.87630172461699e-05, "loss": 2.0293, "step": 4948 }, { "epoch": 0.15967381243811893, "grad_norm": 0.42578125, "learning_rate": 2.876239136531907e-05, "loss": 2.0227, "step": 4949 }, { "epoch": 0.15970607629191527, "grad_norm": 0.462890625, "learning_rate": 2.87617653329817e-05, "loss": 2.0416, "step": 4950 }, { "epoch": 0.15973834014571162, "grad_norm": 0.39453125, "learning_rate": 2.8761139149164696e-05, "loss": 2.0571, "step": 4951 }, { "epoch": 0.159770603999508, "grad_norm": 0.4140625, "learning_rate": 2.876051281387494e-05, "loss": 2.0419, "step": 4952 }, { "epoch": 0.15980286785330433, "grad_norm": 0.421875, "learning_rate": 2.8759886327119337e-05, "loss": 2.018, "step": 4953 }, { "epoch": 0.15983513170710067, "grad_norm": 0.462890625, "learning_rate": 2.875925968890477e-05, "loss": 2.0399, "step": 4954 }, { "epoch": 0.15986739556089702, "grad_norm": 0.44140625, "learning_rate": 2.8758632899238146e-05, "loss": 2.0407, "step": 4955 }, { "epoch": 0.15989965941469336, "grad_norm": 0.4140625, "learning_rate": 2.8758005958126358e-05, "loss": 2.0428, "step": 4956 }, { "epoch": 0.1599319232684897, "grad_norm": 0.423828125, "learning_rate": 2.8757378865576314e-05, "loss": 2.0586, "step": 4957 }, { "epoch": 0.15996418712228605, "grad_norm": 0.45703125, "learning_rate": 2.8756751621594913e-05, "loss": 2.0114, "step": 4958 }, { "epoch": 0.1599964509760824, "grad_norm": 0.412109375, "learning_rate": 2.875612422618906e-05, "loss": 2.0608, "step": 4959 }, { "epoch": 0.16002871482987874, "grad_norm": 0.46484375, "learning_rate": 2.875549667936566e-05, "loss": 2.0294, "step": 4960 }, { "epoch": 0.16006097868367508, "grad_norm": 0.42578125, "learning_rate": 2.8754868981131616e-05, "loss": 2.0678, "step": 4961 }, { "epoch": 0.16009324253747145, "grad_norm": 0.419921875, "learning_rate": 2.875424113149385e-05, "loss": 2.0449, "step": 4962 }, { "epoch": 0.1601255063912678, "grad_norm": 0.40234375, "learning_rate": 2.875361313045926e-05, "loss": 2.0482, "step": 4963 }, { "epoch": 0.16015777024506414, "grad_norm": 0.39453125, "learning_rate": 2.8752984978034758e-05, "loss": 2.0372, "step": 4964 }, { "epoch": 0.16019003409886048, "grad_norm": 0.390625, "learning_rate": 2.875235667422727e-05, "loss": 2.059, "step": 4965 }, { "epoch": 0.16022229795265683, "grad_norm": 0.42578125, "learning_rate": 2.8751728219043707e-05, "loss": 2.0611, "step": 4966 }, { "epoch": 0.16025456180645317, "grad_norm": 0.45703125, "learning_rate": 2.8751099612490985e-05, "loss": 2.03, "step": 4967 }, { "epoch": 0.16028682566024952, "grad_norm": 0.46875, "learning_rate": 2.875047085457602e-05, "loss": 2.0669, "step": 4968 }, { "epoch": 0.16031908951404586, "grad_norm": 0.4765625, "learning_rate": 2.874984194530574e-05, "loss": 2.0441, "step": 4969 }, { "epoch": 0.1603513533678422, "grad_norm": 0.453125, "learning_rate": 2.8749212884687064e-05, "loss": 2.0492, "step": 4970 }, { "epoch": 0.16038361722163855, "grad_norm": 0.46875, "learning_rate": 2.8748583672726914e-05, "loss": 2.0418, "step": 4971 }, { "epoch": 0.16041588107543492, "grad_norm": 0.482421875, "learning_rate": 2.8747954309432216e-05, "loss": 2.0269, "step": 4972 }, { "epoch": 0.16044814492923126, "grad_norm": 0.4765625, "learning_rate": 2.8747324794809904e-05, "loss": 2.048, "step": 4973 }, { "epoch": 0.1604804087830276, "grad_norm": 0.380859375, "learning_rate": 2.87466951288669e-05, "loss": 1.9993, "step": 4974 }, { "epoch": 0.16051267263682395, "grad_norm": 0.4140625, "learning_rate": 2.874606531161014e-05, "loss": 2.0345, "step": 4975 }, { "epoch": 0.1605449364906203, "grad_norm": 0.439453125, "learning_rate": 2.874543534304655e-05, "loss": 2.031, "step": 4976 }, { "epoch": 0.16057720034441664, "grad_norm": 0.412109375, "learning_rate": 2.8744805223183078e-05, "loss": 2.069, "step": 4977 }, { "epoch": 0.16060946419821298, "grad_norm": 0.4140625, "learning_rate": 2.8744174952026642e-05, "loss": 2.0344, "step": 4978 }, { "epoch": 0.16064172805200932, "grad_norm": 0.447265625, "learning_rate": 2.874354452958419e-05, "loss": 2.0864, "step": 4979 }, { "epoch": 0.16067399190580567, "grad_norm": 0.435546875, "learning_rate": 2.8742913955862662e-05, "loss": 2.1239, "step": 4980 }, { "epoch": 0.160706255759602, "grad_norm": 0.435546875, "learning_rate": 2.8742283230868992e-05, "loss": 2.1132, "step": 4981 }, { "epoch": 0.16073851961339838, "grad_norm": 0.416015625, "learning_rate": 2.874165235461013e-05, "loss": 2.1037, "step": 4982 }, { "epoch": 0.16077078346719473, "grad_norm": 0.447265625, "learning_rate": 2.8741021327093017e-05, "loss": 2.1011, "step": 4983 }, { "epoch": 0.16080304732099107, "grad_norm": 0.419921875, "learning_rate": 2.87403901483246e-05, "loss": 2.1238, "step": 4984 }, { "epoch": 0.16083531117478742, "grad_norm": 0.4453125, "learning_rate": 2.8739758818311822e-05, "loss": 2.0768, "step": 4985 }, { "epoch": 0.16086757502858376, "grad_norm": 0.515625, "learning_rate": 2.8739127337061638e-05, "loss": 2.1242, "step": 4986 }, { "epoch": 0.1608998388823801, "grad_norm": 0.57421875, "learning_rate": 2.8738495704580995e-05, "loss": 2.1412, "step": 4987 }, { "epoch": 0.16093210273617645, "grad_norm": 0.6171875, "learning_rate": 2.873786392087685e-05, "loss": 2.1041, "step": 4988 }, { "epoch": 0.1609643665899728, "grad_norm": 0.69140625, "learning_rate": 2.873723198595615e-05, "loss": 2.0866, "step": 4989 }, { "epoch": 0.16099663044376913, "grad_norm": 0.57421875, "learning_rate": 2.873659989982586e-05, "loss": 2.0995, "step": 4990 }, { "epoch": 0.16102889429756548, "grad_norm": 0.447265625, "learning_rate": 2.8735967662492936e-05, "loss": 2.1266, "step": 4991 }, { "epoch": 0.16106115815136185, "grad_norm": 0.59765625, "learning_rate": 2.8735335273964324e-05, "loss": 2.107, "step": 4992 }, { "epoch": 0.1610934220051582, "grad_norm": 0.5234375, "learning_rate": 2.8734702734247004e-05, "loss": 2.1139, "step": 4993 }, { "epoch": 0.16112568585895454, "grad_norm": 0.46484375, "learning_rate": 2.873407004334793e-05, "loss": 2.1003, "step": 4994 }, { "epoch": 0.16115794971275088, "grad_norm": 0.5234375, "learning_rate": 2.873343720127406e-05, "loss": 2.113, "step": 4995 }, { "epoch": 0.16119021356654722, "grad_norm": 0.421875, "learning_rate": 2.8732804208032364e-05, "loss": 2.1045, "step": 4996 }, { "epoch": 0.16122247742034357, "grad_norm": 0.484375, "learning_rate": 2.8732171063629816e-05, "loss": 2.0812, "step": 4997 }, { "epoch": 0.1612547412741399, "grad_norm": 0.447265625, "learning_rate": 2.873153776807338e-05, "loss": 2.124, "step": 4998 }, { "epoch": 0.16128700512793626, "grad_norm": 0.408203125, "learning_rate": 2.8730904321370027e-05, "loss": 2.0827, "step": 4999 }, { "epoch": 0.1613192689817326, "grad_norm": 0.47265625, "learning_rate": 2.8730270723526727e-05, "loss": 2.1094, "step": 5000 }, { "epoch": 0.16135153283552894, "grad_norm": 0.416015625, "learning_rate": 2.872963697455046e-05, "loss": 2.0909, "step": 5001 }, { "epoch": 0.16138379668932532, "grad_norm": 0.380859375, "learning_rate": 2.8729003074448196e-05, "loss": 2.0972, "step": 5002 }, { "epoch": 0.16141606054312166, "grad_norm": 0.439453125, "learning_rate": 2.8728369023226917e-05, "loss": 2.1256, "step": 5003 }, { "epoch": 0.161448324396918, "grad_norm": 0.38671875, "learning_rate": 2.87277348208936e-05, "loss": 2.0938, "step": 5004 }, { "epoch": 0.16148058825071435, "grad_norm": 0.400390625, "learning_rate": 2.872710046745523e-05, "loss": 2.1131, "step": 5005 }, { "epoch": 0.1615128521045107, "grad_norm": 0.400390625, "learning_rate": 2.8726465962918782e-05, "loss": 2.1326, "step": 5006 }, { "epoch": 0.16154511595830703, "grad_norm": 0.41015625, "learning_rate": 2.8725831307291244e-05, "loss": 2.1087, "step": 5007 }, { "epoch": 0.16157737981210338, "grad_norm": 0.390625, "learning_rate": 2.8725196500579602e-05, "loss": 2.0991, "step": 5008 }, { "epoch": 0.16160964366589972, "grad_norm": 0.390625, "learning_rate": 2.872456154279084e-05, "loss": 2.0979, "step": 5009 }, { "epoch": 0.16164190751969607, "grad_norm": 0.4140625, "learning_rate": 2.872392643393196e-05, "loss": 2.1299, "step": 5010 }, { "epoch": 0.1616741713734924, "grad_norm": 0.38671875, "learning_rate": 2.8723291174009934e-05, "loss": 2.1287, "step": 5011 }, { "epoch": 0.16170643522728878, "grad_norm": 0.3984375, "learning_rate": 2.8722655763031767e-05, "loss": 2.0906, "step": 5012 }, { "epoch": 0.16173869908108512, "grad_norm": 0.40625, "learning_rate": 2.872202020100445e-05, "loss": 2.1013, "step": 5013 }, { "epoch": 0.16177096293488147, "grad_norm": 0.41015625, "learning_rate": 2.872138448793498e-05, "loss": 2.0715, "step": 5014 }, { "epoch": 0.1618032267886778, "grad_norm": 0.388671875, "learning_rate": 2.8720748623830356e-05, "loss": 2.1222, "step": 5015 }, { "epoch": 0.16183549064247416, "grad_norm": 0.400390625, "learning_rate": 2.8720112608697567e-05, "loss": 2.0998, "step": 5016 }, { "epoch": 0.1618677544962705, "grad_norm": 0.44921875, "learning_rate": 2.8719476442543625e-05, "loss": 2.0994, "step": 5017 }, { "epoch": 0.16190001835006684, "grad_norm": 0.435546875, "learning_rate": 2.871884012537553e-05, "loss": 2.1098, "step": 5018 }, { "epoch": 0.1619322822038632, "grad_norm": 0.384765625, "learning_rate": 2.8718203657200284e-05, "loss": 2.1106, "step": 5019 }, { "epoch": 0.16196454605765953, "grad_norm": 0.396484375, "learning_rate": 2.871756703802489e-05, "loss": 2.118, "step": 5020 }, { "epoch": 0.16199680991145587, "grad_norm": 0.392578125, "learning_rate": 2.8716930267856363e-05, "loss": 2.1141, "step": 5021 }, { "epoch": 0.16202907376525225, "grad_norm": 0.4140625, "learning_rate": 2.8716293346701704e-05, "loss": 2.1391, "step": 5022 }, { "epoch": 0.1620613376190486, "grad_norm": 0.40234375, "learning_rate": 2.8715656274567932e-05, "loss": 2.1256, "step": 5023 }, { "epoch": 0.16209360147284493, "grad_norm": 0.4296875, "learning_rate": 2.8715019051462057e-05, "loss": 2.1324, "step": 5024 }, { "epoch": 0.16212586532664128, "grad_norm": 0.419921875, "learning_rate": 2.8714381677391088e-05, "loss": 2.0896, "step": 5025 }, { "epoch": 0.16215812918043762, "grad_norm": 0.39453125, "learning_rate": 2.8713744152362043e-05, "loss": 2.1073, "step": 5026 }, { "epoch": 0.16219039303423397, "grad_norm": 0.4140625, "learning_rate": 2.8713106476381942e-05, "loss": 2.0971, "step": 5027 }, { "epoch": 0.1622226568880303, "grad_norm": 0.41796875, "learning_rate": 2.8712468649457805e-05, "loss": 2.1305, "step": 5028 }, { "epoch": 0.16225492074182665, "grad_norm": 0.39453125, "learning_rate": 2.8711830671596646e-05, "loss": 2.1208, "step": 5029 }, { "epoch": 0.162287184595623, "grad_norm": 0.41015625, "learning_rate": 2.8711192542805494e-05, "loss": 2.1168, "step": 5030 }, { "epoch": 0.16231944844941934, "grad_norm": 0.408203125, "learning_rate": 2.8710554263091374e-05, "loss": 2.1036, "step": 5031 }, { "epoch": 0.1623517123032157, "grad_norm": 0.54296875, "learning_rate": 2.8709915832461303e-05, "loss": 2.1128, "step": 5032 }, { "epoch": 0.16238397615701206, "grad_norm": 0.68359375, "learning_rate": 2.8709277250922316e-05, "loss": 2.0891, "step": 5033 }, { "epoch": 0.1624162400108084, "grad_norm": 0.8203125, "learning_rate": 2.8708638518481443e-05, "loss": 2.0866, "step": 5034 }, { "epoch": 0.16244850386460474, "grad_norm": 0.79296875, "learning_rate": 2.8707999635145705e-05, "loss": 2.1211, "step": 5035 }, { "epoch": 0.1624807677184011, "grad_norm": 0.447265625, "learning_rate": 2.8707360600922147e-05, "loss": 2.0828, "step": 5036 }, { "epoch": 0.16251303157219743, "grad_norm": 0.68359375, "learning_rate": 2.8706721415817793e-05, "loss": 2.0959, "step": 5037 }, { "epoch": 0.16254529542599377, "grad_norm": 0.6015625, "learning_rate": 2.870608207983969e-05, "loss": 2.1057, "step": 5038 }, { "epoch": 0.16257755927979012, "grad_norm": 0.466796875, "learning_rate": 2.870544259299486e-05, "loss": 2.1149, "step": 5039 }, { "epoch": 0.16260982313358646, "grad_norm": 0.5390625, "learning_rate": 2.8704802955290353e-05, "loss": 2.1024, "step": 5040 }, { "epoch": 0.1626420869873828, "grad_norm": 0.42578125, "learning_rate": 2.870416316673321e-05, "loss": 2.0786, "step": 5041 }, { "epoch": 0.16267435084117915, "grad_norm": 0.51171875, "learning_rate": 2.8703523227330467e-05, "loss": 2.119, "step": 5042 }, { "epoch": 0.16270661469497552, "grad_norm": 0.41015625, "learning_rate": 2.8702883137089168e-05, "loss": 2.0896, "step": 5043 }, { "epoch": 0.16273887854877186, "grad_norm": 0.546875, "learning_rate": 2.870224289601637e-05, "loss": 2.1166, "step": 5044 }, { "epoch": 0.1627711424025682, "grad_norm": 0.427734375, "learning_rate": 2.8701602504119108e-05, "loss": 2.1225, "step": 5045 }, { "epoch": 0.16280340625636455, "grad_norm": 0.50390625, "learning_rate": 2.8700961961404433e-05, "loss": 2.1142, "step": 5046 }, { "epoch": 0.1628356701101609, "grad_norm": 0.453125, "learning_rate": 2.87003212678794e-05, "loss": 2.0967, "step": 5047 }, { "epoch": 0.16286793396395724, "grad_norm": 0.4609375, "learning_rate": 2.869968042355106e-05, "loss": 2.086, "step": 5048 }, { "epoch": 0.16290019781775358, "grad_norm": 0.427734375, "learning_rate": 2.869903942842646e-05, "loss": 2.1151, "step": 5049 }, { "epoch": 0.16293246167154993, "grad_norm": 0.41796875, "learning_rate": 2.8698398282512668e-05, "loss": 2.1163, "step": 5050 }, { "epoch": 0.16296472552534627, "grad_norm": 0.423828125, "learning_rate": 2.8697756985816735e-05, "loss": 2.1066, "step": 5051 }, { "epoch": 0.16299698937914261, "grad_norm": 0.3828125, "learning_rate": 2.869711553834572e-05, "loss": 2.1238, "step": 5052 }, { "epoch": 0.163029253232939, "grad_norm": 0.421875, "learning_rate": 2.8696473940106677e-05, "loss": 2.1091, "step": 5053 }, { "epoch": 0.16306151708673533, "grad_norm": 0.412109375, "learning_rate": 2.8695832191106684e-05, "loss": 2.0958, "step": 5054 }, { "epoch": 0.16309378094053167, "grad_norm": 0.384765625, "learning_rate": 2.8695190291352786e-05, "loss": 2.1238, "step": 5055 }, { "epoch": 0.16312604479432802, "grad_norm": 0.40234375, "learning_rate": 2.8694548240852066e-05, "loss": 2.1055, "step": 5056 }, { "epoch": 0.16315830864812436, "grad_norm": 0.404296875, "learning_rate": 2.8693906039611577e-05, "loss": 2.1129, "step": 5057 }, { "epoch": 0.1631905725019207, "grad_norm": 0.42578125, "learning_rate": 2.86932636876384e-05, "loss": 2.093, "step": 5058 }, { "epoch": 0.16322283635571705, "grad_norm": 0.408203125, "learning_rate": 2.86926211849396e-05, "loss": 2.1121, "step": 5059 }, { "epoch": 0.1632551002095134, "grad_norm": 0.400390625, "learning_rate": 2.8691978531522243e-05, "loss": 2.1006, "step": 5060 }, { "epoch": 0.16328736406330974, "grad_norm": 0.37890625, "learning_rate": 2.869133572739341e-05, "loss": 2.0945, "step": 5061 }, { "epoch": 0.16331962791710608, "grad_norm": 0.396484375, "learning_rate": 2.869069277256018e-05, "loss": 2.1228, "step": 5062 }, { "epoch": 0.16335189177090245, "grad_norm": 0.41796875, "learning_rate": 2.8690049667029625e-05, "loss": 2.1376, "step": 5063 }, { "epoch": 0.1633841556246988, "grad_norm": 0.375, "learning_rate": 2.868940641080882e-05, "loss": 2.1028, "step": 5064 }, { "epoch": 0.16341641947849514, "grad_norm": 0.3828125, "learning_rate": 2.8688763003904857e-05, "loss": 2.1237, "step": 5065 }, { "epoch": 0.16344868333229148, "grad_norm": 0.40234375, "learning_rate": 2.8688119446324804e-05, "loss": 2.0788, "step": 5066 }, { "epoch": 0.16348094718608783, "grad_norm": 0.3828125, "learning_rate": 2.8687475738075753e-05, "loss": 2.1159, "step": 5067 }, { "epoch": 0.16351321103988417, "grad_norm": 0.396484375, "learning_rate": 2.8686831879164792e-05, "loss": 2.113, "step": 5068 }, { "epoch": 0.16354547489368051, "grad_norm": 0.419921875, "learning_rate": 2.8686187869599004e-05, "loss": 2.1423, "step": 5069 }, { "epoch": 0.16357773874747686, "grad_norm": 0.416015625, "learning_rate": 2.8685543709385478e-05, "loss": 2.1208, "step": 5070 }, { "epoch": 0.1636100026012732, "grad_norm": 0.390625, "learning_rate": 2.8684899398531303e-05, "loss": 2.0964, "step": 5071 }, { "epoch": 0.16364226645506955, "grad_norm": 0.427734375, "learning_rate": 2.8684254937043574e-05, "loss": 2.1218, "step": 5072 }, { "epoch": 0.16367453030886592, "grad_norm": 0.462890625, "learning_rate": 2.8683610324929382e-05, "loss": 2.1097, "step": 5073 }, { "epoch": 0.16370679416266226, "grad_norm": 0.478515625, "learning_rate": 2.8682965562195825e-05, "loss": 2.1211, "step": 5074 }, { "epoch": 0.1637390580164586, "grad_norm": 0.4140625, "learning_rate": 2.8682320648850005e-05, "loss": 2.1164, "step": 5075 }, { "epoch": 0.16377132187025495, "grad_norm": 0.39453125, "learning_rate": 2.8681675584899007e-05, "loss": 2.0849, "step": 5076 }, { "epoch": 0.1638035857240513, "grad_norm": 0.466796875, "learning_rate": 2.868103037034994e-05, "loss": 2.0671, "step": 5077 }, { "epoch": 0.16383584957784764, "grad_norm": 0.41015625, "learning_rate": 2.868038500520991e-05, "loss": 2.1101, "step": 5078 }, { "epoch": 0.16386811343164398, "grad_norm": 0.44140625, "learning_rate": 2.867973948948601e-05, "loss": 2.1061, "step": 5079 }, { "epoch": 0.16390037728544032, "grad_norm": 0.4453125, "learning_rate": 2.867909382318536e-05, "loss": 2.0937, "step": 5080 }, { "epoch": 0.16393264113923667, "grad_norm": 0.609375, "learning_rate": 2.8678448006315052e-05, "loss": 2.1098, "step": 5081 }, { "epoch": 0.163964904993033, "grad_norm": 0.3984375, "learning_rate": 2.8677802038882204e-05, "loss": 2.1067, "step": 5082 }, { "epoch": 0.16399716884682938, "grad_norm": 0.4140625, "learning_rate": 2.8677155920893923e-05, "loss": 2.1358, "step": 5083 }, { "epoch": 0.16402943270062573, "grad_norm": 0.408203125, "learning_rate": 2.867650965235732e-05, "loss": 2.0962, "step": 5084 }, { "epoch": 0.16406169655442207, "grad_norm": 0.400390625, "learning_rate": 2.867586323327951e-05, "loss": 2.0845, "step": 5085 }, { "epoch": 0.16409396040821841, "grad_norm": 0.40625, "learning_rate": 2.8675216663667614e-05, "loss": 2.0961, "step": 5086 }, { "epoch": 0.16412622426201476, "grad_norm": 0.419921875, "learning_rate": 2.867456994352874e-05, "loss": 2.1098, "step": 5087 }, { "epoch": 0.1641584881158111, "grad_norm": 0.396484375, "learning_rate": 2.8673923072870007e-05, "loss": 2.1121, "step": 5088 }, { "epoch": 0.16419075196960745, "grad_norm": 0.404296875, "learning_rate": 2.867327605169854e-05, "loss": 2.0937, "step": 5089 }, { "epoch": 0.1642230158234038, "grad_norm": 0.431640625, "learning_rate": 2.8672628880021462e-05, "loss": 2.0855, "step": 5090 }, { "epoch": 0.16425527967720013, "grad_norm": 0.419921875, "learning_rate": 2.8671981557845895e-05, "loss": 2.0989, "step": 5091 }, { "epoch": 0.16428754353099648, "grad_norm": 0.435546875, "learning_rate": 2.867133408517896e-05, "loss": 2.0916, "step": 5092 }, { "epoch": 0.16431980738479285, "grad_norm": 0.451171875, "learning_rate": 2.867068646202779e-05, "loss": 2.1194, "step": 5093 }, { "epoch": 0.1643520712385892, "grad_norm": 0.421875, "learning_rate": 2.8670038688399504e-05, "loss": 2.1205, "step": 5094 }, { "epoch": 0.16438433509238554, "grad_norm": 0.404296875, "learning_rate": 2.8669390764301243e-05, "loss": 2.1023, "step": 5095 }, { "epoch": 0.16441659894618188, "grad_norm": 0.431640625, "learning_rate": 2.866874268974014e-05, "loss": 2.0978, "step": 5096 }, { "epoch": 0.16444886279997822, "grad_norm": 0.39453125, "learning_rate": 2.8668094464723316e-05, "loss": 2.0621, "step": 5097 }, { "epoch": 0.16448112665377457, "grad_norm": 0.427734375, "learning_rate": 2.8667446089257918e-05, "loss": 2.1236, "step": 5098 }, { "epoch": 0.1645133905075709, "grad_norm": 0.41796875, "learning_rate": 2.8666797563351076e-05, "loss": 2.0877, "step": 5099 }, { "epoch": 0.16454565436136726, "grad_norm": 0.439453125, "learning_rate": 2.8666148887009932e-05, "loss": 2.1004, "step": 5100 }, { "epoch": 0.1645779182151636, "grad_norm": 0.41796875, "learning_rate": 2.8665500060241622e-05, "loss": 2.0819, "step": 5101 }, { "epoch": 0.16461018206895994, "grad_norm": 0.46484375, "learning_rate": 2.8664851083053298e-05, "loss": 2.1564, "step": 5102 }, { "epoch": 0.16464244592275631, "grad_norm": 0.5234375, "learning_rate": 2.866420195545209e-05, "loss": 2.0813, "step": 5103 }, { "epoch": 0.16467470977655266, "grad_norm": 0.4921875, "learning_rate": 2.866355267744515e-05, "loss": 2.1145, "step": 5104 }, { "epoch": 0.164706973630349, "grad_norm": 0.51171875, "learning_rate": 2.8662903249039627e-05, "loss": 2.0807, "step": 5105 }, { "epoch": 0.16473923748414535, "grad_norm": 0.439453125, "learning_rate": 2.8662253670242666e-05, "loss": 2.1053, "step": 5106 }, { "epoch": 0.1647715013379417, "grad_norm": 0.447265625, "learning_rate": 2.866160394106142e-05, "loss": 2.0989, "step": 5107 }, { "epoch": 0.16480376519173803, "grad_norm": 0.462890625, "learning_rate": 2.8660954061503035e-05, "loss": 2.1163, "step": 5108 }, { "epoch": 0.16483602904553438, "grad_norm": 0.384765625, "learning_rate": 2.8660304031574673e-05, "loss": 2.1111, "step": 5109 }, { "epoch": 0.16486829289933072, "grad_norm": 0.451171875, "learning_rate": 2.8659653851283478e-05, "loss": 2.1152, "step": 5110 }, { "epoch": 0.16490055675312706, "grad_norm": 0.4765625, "learning_rate": 2.8659003520636613e-05, "loss": 2.1073, "step": 5111 }, { "epoch": 0.1649328206069234, "grad_norm": 0.46484375, "learning_rate": 2.8658353039641238e-05, "loss": 2.098, "step": 5112 }, { "epoch": 0.16496508446071978, "grad_norm": 0.419921875, "learning_rate": 2.8657702408304515e-05, "loss": 2.1026, "step": 5113 }, { "epoch": 0.16499734831451612, "grad_norm": 0.41015625, "learning_rate": 2.86570516266336e-05, "loss": 2.0948, "step": 5114 }, { "epoch": 0.16502961216831247, "grad_norm": 0.4140625, "learning_rate": 2.8656400694635652e-05, "loss": 2.0824, "step": 5115 }, { "epoch": 0.1650618760221088, "grad_norm": 0.4453125, "learning_rate": 2.865574961231785e-05, "loss": 2.1321, "step": 5116 }, { "epoch": 0.16509413987590515, "grad_norm": 0.44140625, "learning_rate": 2.8655098379687347e-05, "loss": 2.0897, "step": 5117 }, { "epoch": 0.1651264037297015, "grad_norm": 0.43359375, "learning_rate": 2.8654446996751324e-05, "loss": 2.1029, "step": 5118 }, { "epoch": 0.16515866758349784, "grad_norm": 0.3984375, "learning_rate": 2.865379546351694e-05, "loss": 2.0893, "step": 5119 }, { "epoch": 0.1651909314372942, "grad_norm": 0.43359375, "learning_rate": 2.865314377999137e-05, "loss": 2.0895, "step": 5120 }, { "epoch": 0.16522319529109053, "grad_norm": 0.408203125, "learning_rate": 2.865249194618179e-05, "loss": 2.09, "step": 5121 }, { "epoch": 0.16525545914488687, "grad_norm": 0.43359375, "learning_rate": 2.865183996209537e-05, "loss": 2.1241, "step": 5122 }, { "epoch": 0.16528772299868325, "grad_norm": 0.40625, "learning_rate": 2.865118782773929e-05, "loss": 2.0782, "step": 5123 }, { "epoch": 0.1653199868524796, "grad_norm": 0.412109375, "learning_rate": 2.865053554312073e-05, "loss": 2.1135, "step": 5124 }, { "epoch": 0.16535225070627593, "grad_norm": 0.427734375, "learning_rate": 2.8649883108246865e-05, "loss": 2.1344, "step": 5125 }, { "epoch": 0.16538451456007228, "grad_norm": 0.458984375, "learning_rate": 2.864923052312488e-05, "loss": 2.1162, "step": 5126 }, { "epoch": 0.16541677841386862, "grad_norm": 0.45703125, "learning_rate": 2.8648577787761958e-05, "loss": 2.1061, "step": 5127 }, { "epoch": 0.16544904226766496, "grad_norm": 0.408203125, "learning_rate": 2.8647924902165285e-05, "loss": 2.1001, "step": 5128 }, { "epoch": 0.1654813061214613, "grad_norm": 0.404296875, "learning_rate": 2.8647271866342042e-05, "loss": 2.1267, "step": 5129 }, { "epoch": 0.16551356997525765, "grad_norm": 0.396484375, "learning_rate": 2.864661868029942e-05, "loss": 2.0973, "step": 5130 }, { "epoch": 0.165545833829054, "grad_norm": 0.48828125, "learning_rate": 2.8645965344044614e-05, "loss": 2.0991, "step": 5131 }, { "epoch": 0.16557809768285034, "grad_norm": 0.419921875, "learning_rate": 2.864531185758481e-05, "loss": 2.1003, "step": 5132 }, { "epoch": 0.16561036153664668, "grad_norm": 0.37890625, "learning_rate": 2.86446582209272e-05, "loss": 2.1246, "step": 5133 }, { "epoch": 0.16564262539044305, "grad_norm": 0.41796875, "learning_rate": 2.8644004434078984e-05, "loss": 2.1006, "step": 5134 }, { "epoch": 0.1656748892442394, "grad_norm": 0.41015625, "learning_rate": 2.8643350497047357e-05, "loss": 2.1357, "step": 5135 }, { "epoch": 0.16570715309803574, "grad_norm": 0.3984375, "learning_rate": 2.864269640983951e-05, "loss": 2.1321, "step": 5136 }, { "epoch": 0.16573941695183209, "grad_norm": 0.408203125, "learning_rate": 2.864204217246265e-05, "loss": 2.0469, "step": 5137 }, { "epoch": 0.16577168080562843, "grad_norm": 0.40234375, "learning_rate": 2.8641387784923976e-05, "loss": 2.0082, "step": 5138 }, { "epoch": 0.16580394465942477, "grad_norm": 0.453125, "learning_rate": 2.864073324723069e-05, "loss": 2.018, "step": 5139 }, { "epoch": 0.16583620851322112, "grad_norm": 0.6171875, "learning_rate": 2.8640078559390005e-05, "loss": 2.0359, "step": 5140 }, { "epoch": 0.16586847236701746, "grad_norm": 0.57421875, "learning_rate": 2.8639423721409113e-05, "loss": 2.054, "step": 5141 }, { "epoch": 0.1659007362208138, "grad_norm": 0.640625, "learning_rate": 2.8638768733295233e-05, "loss": 2.0428, "step": 5142 }, { "epoch": 0.16593300007461015, "grad_norm": 0.578125, "learning_rate": 2.8638113595055572e-05, "loss": 2.0106, "step": 5143 }, { "epoch": 0.16596526392840652, "grad_norm": 0.44140625, "learning_rate": 2.863745830669734e-05, "loss": 2.047, "step": 5144 }, { "epoch": 0.16599752778220286, "grad_norm": 0.484375, "learning_rate": 2.863680286822775e-05, "loss": 2.1098, "step": 5145 }, { "epoch": 0.1660297916359992, "grad_norm": 0.478515625, "learning_rate": 2.8636147279654016e-05, "loss": 2.0876, "step": 5146 }, { "epoch": 0.16606205548979555, "grad_norm": 0.439453125, "learning_rate": 2.8635491540983354e-05, "loss": 2.0819, "step": 5147 }, { "epoch": 0.1660943193435919, "grad_norm": 0.46875, "learning_rate": 2.8634835652222987e-05, "loss": 2.0865, "step": 5148 }, { "epoch": 0.16612658319738824, "grad_norm": 0.4296875, "learning_rate": 2.863417961338013e-05, "loss": 2.1085, "step": 5149 }, { "epoch": 0.16615884705118458, "grad_norm": 0.466796875, "learning_rate": 2.8633523424462005e-05, "loss": 2.1501, "step": 5150 }, { "epoch": 0.16619111090498093, "grad_norm": 0.50390625, "learning_rate": 2.863286708547583e-05, "loss": 2.1038, "step": 5151 }, { "epoch": 0.16622337475877727, "grad_norm": 0.8125, "learning_rate": 2.8632210596428842e-05, "loss": 2.1358, "step": 5152 }, { "epoch": 0.16625563861257361, "grad_norm": 0.431640625, "learning_rate": 2.8631553957328255e-05, "loss": 2.0768, "step": 5153 }, { "epoch": 0.16628790246636999, "grad_norm": 0.435546875, "learning_rate": 2.86308971681813e-05, "loss": 2.1022, "step": 5154 }, { "epoch": 0.16632016632016633, "grad_norm": 0.408203125, "learning_rate": 2.8630240228995207e-05, "loss": 2.1113, "step": 5155 }, { "epoch": 0.16635243017396267, "grad_norm": 0.43359375, "learning_rate": 2.8629583139777212e-05, "loss": 2.0932, "step": 5156 }, { "epoch": 0.16638469402775902, "grad_norm": 0.455078125, "learning_rate": 2.8628925900534545e-05, "loss": 2.1278, "step": 5157 }, { "epoch": 0.16641695788155536, "grad_norm": 0.390625, "learning_rate": 2.8628268511274434e-05, "loss": 2.1102, "step": 5158 }, { "epoch": 0.1664492217353517, "grad_norm": 0.443359375, "learning_rate": 2.862761097200412e-05, "loss": 2.0725, "step": 5159 }, { "epoch": 0.16648148558914805, "grad_norm": 0.455078125, "learning_rate": 2.8626953282730847e-05, "loss": 2.1216, "step": 5160 }, { "epoch": 0.1665137494429444, "grad_norm": 0.392578125, "learning_rate": 2.8626295443461843e-05, "loss": 2.1309, "step": 5161 }, { "epoch": 0.16654601329674074, "grad_norm": 0.486328125, "learning_rate": 2.8625637454204356e-05, "loss": 2.1159, "step": 5162 }, { "epoch": 0.16657827715053708, "grad_norm": 0.55078125, "learning_rate": 2.8624979314965627e-05, "loss": 2.1092, "step": 5163 }, { "epoch": 0.16661054100433345, "grad_norm": 0.412109375, "learning_rate": 2.86243210257529e-05, "loss": 2.1015, "step": 5164 }, { "epoch": 0.1666428048581298, "grad_norm": 0.4609375, "learning_rate": 2.862366258657342e-05, "loss": 2.0556, "step": 5165 }, { "epoch": 0.16667506871192614, "grad_norm": 0.50390625, "learning_rate": 2.8623003997434437e-05, "loss": 2.1168, "step": 5166 }, { "epoch": 0.16670733256572248, "grad_norm": 0.423828125, "learning_rate": 2.8622345258343197e-05, "loss": 2.1166, "step": 5167 }, { "epoch": 0.16673959641951883, "grad_norm": 0.41796875, "learning_rate": 2.8621686369306957e-05, "loss": 2.096, "step": 5168 }, { "epoch": 0.16677186027331517, "grad_norm": 0.515625, "learning_rate": 2.8621027330332963e-05, "loss": 2.1108, "step": 5169 }, { "epoch": 0.1668041241271115, "grad_norm": 0.4296875, "learning_rate": 2.8620368141428468e-05, "loss": 2.0991, "step": 5170 }, { "epoch": 0.16683638798090786, "grad_norm": 0.474609375, "learning_rate": 2.8619708802600738e-05, "loss": 2.1159, "step": 5171 }, { "epoch": 0.1668686518347042, "grad_norm": 0.4453125, "learning_rate": 2.8619049313857026e-05, "loss": 2.1155, "step": 5172 }, { "epoch": 0.16690091568850055, "grad_norm": 0.423828125, "learning_rate": 2.8618389675204582e-05, "loss": 2.1086, "step": 5173 }, { "epoch": 0.16693317954229692, "grad_norm": 0.41796875, "learning_rate": 2.861772988665068e-05, "loss": 2.1199, "step": 5174 }, { "epoch": 0.16696544339609326, "grad_norm": 0.419921875, "learning_rate": 2.861706994820257e-05, "loss": 2.1222, "step": 5175 }, { "epoch": 0.1669977072498896, "grad_norm": 0.435546875, "learning_rate": 2.8616409859867533e-05, "loss": 2.0936, "step": 5176 }, { "epoch": 0.16702997110368595, "grad_norm": 0.404296875, "learning_rate": 2.8615749621652817e-05, "loss": 2.1168, "step": 5177 }, { "epoch": 0.1670622349574823, "grad_norm": 0.396484375, "learning_rate": 2.8615089233565696e-05, "loss": 2.106, "step": 5178 }, { "epoch": 0.16709449881127864, "grad_norm": 0.43359375, "learning_rate": 2.8614428695613446e-05, "loss": 2.0912, "step": 5179 }, { "epoch": 0.16712676266507498, "grad_norm": 0.419921875, "learning_rate": 2.8613768007803328e-05, "loss": 2.0754, "step": 5180 }, { "epoch": 0.16715902651887132, "grad_norm": 0.400390625, "learning_rate": 2.8613107170142622e-05, "loss": 2.1121, "step": 5181 }, { "epoch": 0.16719129037266767, "grad_norm": 0.443359375, "learning_rate": 2.861244618263859e-05, "loss": 2.0957, "step": 5182 }, { "epoch": 0.167223554226464, "grad_norm": 0.4140625, "learning_rate": 2.861178504529852e-05, "loss": 2.0985, "step": 5183 }, { "epoch": 0.16725581808026038, "grad_norm": 0.443359375, "learning_rate": 2.861112375812969e-05, "loss": 2.0924, "step": 5184 }, { "epoch": 0.16728808193405673, "grad_norm": 0.40234375, "learning_rate": 2.861046232113937e-05, "loss": 2.0894, "step": 5185 }, { "epoch": 0.16732034578785307, "grad_norm": 0.431640625, "learning_rate": 2.8609800734334846e-05, "loss": 2.0861, "step": 5186 }, { "epoch": 0.1673526096416494, "grad_norm": 0.419921875, "learning_rate": 2.86091389977234e-05, "loss": 2.0983, "step": 5187 }, { "epoch": 0.16738487349544576, "grad_norm": 0.421875, "learning_rate": 2.8608477111312312e-05, "loss": 2.1015, "step": 5188 }, { "epoch": 0.1674171373492421, "grad_norm": 0.408203125, "learning_rate": 2.8607815075108873e-05, "loss": 2.0946, "step": 5189 }, { "epoch": 0.16744940120303844, "grad_norm": 0.4375, "learning_rate": 2.8607152889120373e-05, "loss": 2.0945, "step": 5190 }, { "epoch": 0.1674816650568348, "grad_norm": 0.443359375, "learning_rate": 2.8606490553354086e-05, "loss": 2.1018, "step": 5191 }, { "epoch": 0.16751392891063113, "grad_norm": 0.48828125, "learning_rate": 2.8605828067817317e-05, "loss": 2.0791, "step": 5192 }, { "epoch": 0.16754619276442748, "grad_norm": 0.490234375, "learning_rate": 2.8605165432517353e-05, "loss": 2.1116, "step": 5193 }, { "epoch": 0.16757845661822385, "grad_norm": 0.3984375, "learning_rate": 2.8604502647461488e-05, "loss": 2.1199, "step": 5194 }, { "epoch": 0.1676107204720202, "grad_norm": 0.4609375, "learning_rate": 2.8603839712657017e-05, "loss": 2.106, "step": 5195 }, { "epoch": 0.16764298432581654, "grad_norm": 0.412109375, "learning_rate": 2.860317662811124e-05, "loss": 2.1201, "step": 5196 }, { "epoch": 0.16767524817961288, "grad_norm": 0.41015625, "learning_rate": 2.8602513393831453e-05, "loss": 2.1035, "step": 5197 }, { "epoch": 0.16770751203340922, "grad_norm": 0.427734375, "learning_rate": 2.8601850009824953e-05, "loss": 2.127, "step": 5198 }, { "epoch": 0.16773977588720557, "grad_norm": 0.400390625, "learning_rate": 2.860118647609905e-05, "loss": 2.1296, "step": 5199 }, { "epoch": 0.1677720397410019, "grad_norm": 0.396484375, "learning_rate": 2.8600522792661044e-05, "loss": 2.1059, "step": 5200 }, { "epoch": 0.16780430359479825, "grad_norm": 0.42578125, "learning_rate": 2.859985895951824e-05, "loss": 2.1044, "step": 5201 }, { "epoch": 0.1678365674485946, "grad_norm": 0.40625, "learning_rate": 2.8599194976677946e-05, "loss": 2.097, "step": 5202 }, { "epoch": 0.16786883130239094, "grad_norm": 0.4609375, "learning_rate": 2.8598530844147465e-05, "loss": 2.0986, "step": 5203 }, { "epoch": 0.1679010951561873, "grad_norm": 0.4765625, "learning_rate": 2.8597866561934115e-05, "loss": 2.1015, "step": 5204 }, { "epoch": 0.16793335900998366, "grad_norm": 0.400390625, "learning_rate": 2.8597202130045202e-05, "loss": 2.1009, "step": 5205 }, { "epoch": 0.16796562286378, "grad_norm": 0.408203125, "learning_rate": 2.8596537548488048e-05, "loss": 2.0833, "step": 5206 }, { "epoch": 0.16799788671757634, "grad_norm": 0.41015625, "learning_rate": 2.859587281726996e-05, "loss": 2.0952, "step": 5207 }, { "epoch": 0.1680301505713727, "grad_norm": 0.427734375, "learning_rate": 2.859520793639826e-05, "loss": 2.0843, "step": 5208 }, { "epoch": 0.16806241442516903, "grad_norm": 0.431640625, "learning_rate": 2.859454290588026e-05, "loss": 2.0755, "step": 5209 }, { "epoch": 0.16809467827896538, "grad_norm": 0.390625, "learning_rate": 2.8593877725723285e-05, "loss": 2.1032, "step": 5210 }, { "epoch": 0.16812694213276172, "grad_norm": 0.376953125, "learning_rate": 2.859321239593466e-05, "loss": 2.0991, "step": 5211 }, { "epoch": 0.16815920598655806, "grad_norm": 0.43359375, "learning_rate": 2.85925469165217e-05, "loss": 2.1124, "step": 5212 }, { "epoch": 0.1681914698403544, "grad_norm": 0.421875, "learning_rate": 2.859188128749174e-05, "loss": 2.0878, "step": 5213 }, { "epoch": 0.16822373369415078, "grad_norm": 0.388671875, "learning_rate": 2.8591215508852098e-05, "loss": 2.1004, "step": 5214 }, { "epoch": 0.16825599754794712, "grad_norm": 0.37890625, "learning_rate": 2.8590549580610105e-05, "loss": 2.0921, "step": 5215 }, { "epoch": 0.16828826140174347, "grad_norm": 0.41015625, "learning_rate": 2.8589883502773095e-05, "loss": 2.1071, "step": 5216 }, { "epoch": 0.1683205252555398, "grad_norm": 0.41015625, "learning_rate": 2.8589217275348392e-05, "loss": 2.1031, "step": 5217 }, { "epoch": 0.16835278910933615, "grad_norm": 0.42578125, "learning_rate": 2.858855089834334e-05, "loss": 2.0666, "step": 5218 }, { "epoch": 0.1683850529631325, "grad_norm": 0.453125, "learning_rate": 2.8587884371765262e-05, "loss": 2.0799, "step": 5219 }, { "epoch": 0.16841731681692884, "grad_norm": 0.462890625, "learning_rate": 2.8587217695621504e-05, "loss": 2.1133, "step": 5220 }, { "epoch": 0.16844958067072519, "grad_norm": 0.482421875, "learning_rate": 2.8586550869919403e-05, "loss": 2.08, "step": 5221 }, { "epoch": 0.16848184452452153, "grad_norm": 0.470703125, "learning_rate": 2.8585883894666296e-05, "loss": 2.1263, "step": 5222 }, { "epoch": 0.16851410837831787, "grad_norm": 0.4375, "learning_rate": 2.8585216769869526e-05, "loss": 2.1159, "step": 5223 }, { "epoch": 0.16854637223211422, "grad_norm": 0.38671875, "learning_rate": 2.8584549495536434e-05, "loss": 2.0603, "step": 5224 }, { "epoch": 0.1685786360859106, "grad_norm": 0.45703125, "learning_rate": 2.8583882071674366e-05, "loss": 2.0255, "step": 5225 }, { "epoch": 0.16861089993970693, "grad_norm": 0.494140625, "learning_rate": 2.858321449829067e-05, "loss": 2.0484, "step": 5226 }, { "epoch": 0.16864316379350328, "grad_norm": 0.5234375, "learning_rate": 2.858254677539269e-05, "loss": 2.0419, "step": 5227 }, { "epoch": 0.16867542764729962, "grad_norm": 0.44140625, "learning_rate": 2.8581878902987786e-05, "loss": 1.9864, "step": 5228 }, { "epoch": 0.16870769150109596, "grad_norm": 0.38671875, "learning_rate": 2.85812108810833e-05, "loss": 2.064, "step": 5229 }, { "epoch": 0.1687399553548923, "grad_norm": 0.43359375, "learning_rate": 2.858054270968659e-05, "loss": 2.0598, "step": 5230 }, { "epoch": 0.16877221920868865, "grad_norm": 0.41796875, "learning_rate": 2.8579874388805005e-05, "loss": 2.0508, "step": 5231 }, { "epoch": 0.168804483062485, "grad_norm": 0.416015625, "learning_rate": 2.8579205918445905e-05, "loss": 2.0501, "step": 5232 }, { "epoch": 0.16883674691628134, "grad_norm": 0.41796875, "learning_rate": 2.857853729861665e-05, "loss": 1.9916, "step": 5233 }, { "epoch": 0.16886901077007768, "grad_norm": 0.416015625, "learning_rate": 2.85778685293246e-05, "loss": 2.0336, "step": 5234 }, { "epoch": 0.16890127462387405, "grad_norm": 0.40625, "learning_rate": 2.8577199610577105e-05, "loss": 2.0136, "step": 5235 }, { "epoch": 0.1689335384776704, "grad_norm": 0.435546875, "learning_rate": 2.8576530542381544e-05, "loss": 2.0509, "step": 5236 }, { "epoch": 0.16896580233146674, "grad_norm": 0.40234375, "learning_rate": 2.857586132474527e-05, "loss": 2.0478, "step": 5237 }, { "epoch": 0.16899806618526308, "grad_norm": 0.404296875, "learning_rate": 2.857519195767566e-05, "loss": 2.0589, "step": 5238 }, { "epoch": 0.16903033003905943, "grad_norm": 0.44140625, "learning_rate": 2.8574522441180068e-05, "loss": 2.0434, "step": 5239 }, { "epoch": 0.16906259389285577, "grad_norm": 0.41796875, "learning_rate": 2.8573852775265878e-05, "loss": 2.0531, "step": 5240 }, { "epoch": 0.16909485774665212, "grad_norm": 0.423828125, "learning_rate": 2.857318295994045e-05, "loss": 2.0344, "step": 5241 }, { "epoch": 0.16912712160044846, "grad_norm": 0.400390625, "learning_rate": 2.8572512995211164e-05, "loss": 2.0269, "step": 5242 }, { "epoch": 0.1691593854542448, "grad_norm": 0.47265625, "learning_rate": 2.8571842881085394e-05, "loss": 2.032, "step": 5243 }, { "epoch": 0.16919164930804115, "grad_norm": 0.466796875, "learning_rate": 2.8571172617570507e-05, "loss": 2.0578, "step": 5244 }, { "epoch": 0.16922391316183752, "grad_norm": 0.46484375, "learning_rate": 2.8570502204673893e-05, "loss": 2.03, "step": 5245 }, { "epoch": 0.16925617701563386, "grad_norm": 0.43359375, "learning_rate": 2.8569831642402927e-05, "loss": 2.0527, "step": 5246 }, { "epoch": 0.1692884408694302, "grad_norm": 0.423828125, "learning_rate": 2.8569160930764985e-05, "loss": 2.0531, "step": 5247 }, { "epoch": 0.16932070472322655, "grad_norm": 0.419921875, "learning_rate": 2.856849006976746e-05, "loss": 2.0243, "step": 5248 }, { "epoch": 0.1693529685770229, "grad_norm": 0.44140625, "learning_rate": 2.8567819059417725e-05, "loss": 2.0514, "step": 5249 }, { "epoch": 0.16938523243081924, "grad_norm": 0.416015625, "learning_rate": 2.8567147899723175e-05, "loss": 2.0645, "step": 5250 }, { "epoch": 0.16941749628461558, "grad_norm": 0.384765625, "learning_rate": 2.856647659069119e-05, "loss": 2.0248, "step": 5251 }, { "epoch": 0.16944976013841193, "grad_norm": 0.4375, "learning_rate": 2.8565805132329166e-05, "loss": 2.0372, "step": 5252 }, { "epoch": 0.16948202399220827, "grad_norm": 0.392578125, "learning_rate": 2.8565133524644488e-05, "loss": 2.0063, "step": 5253 }, { "epoch": 0.1695142878460046, "grad_norm": 0.400390625, "learning_rate": 2.8564461767644555e-05, "loss": 2.0493, "step": 5254 }, { "epoch": 0.16954655169980098, "grad_norm": 0.412109375, "learning_rate": 2.8563789861336754e-05, "loss": 2.0168, "step": 5255 }, { "epoch": 0.16957881555359733, "grad_norm": 0.4453125, "learning_rate": 2.856311780572849e-05, "loss": 2.0291, "step": 5256 }, { "epoch": 0.16961107940739367, "grad_norm": 0.380859375, "learning_rate": 2.8562445600827154e-05, "loss": 2.0362, "step": 5257 }, { "epoch": 0.16964334326119002, "grad_norm": 0.53125, "learning_rate": 2.8561773246640142e-05, "loss": 1.9852, "step": 5258 }, { "epoch": 0.16967560711498636, "grad_norm": 0.404296875, "learning_rate": 2.856110074317487e-05, "loss": 2.0461, "step": 5259 }, { "epoch": 0.1697078709687827, "grad_norm": 0.4375, "learning_rate": 2.8560428090438716e-05, "loss": 2.078, "step": 5260 }, { "epoch": 0.16974013482257905, "grad_norm": 0.49609375, "learning_rate": 2.8559755288439104e-05, "loss": 2.044, "step": 5261 }, { "epoch": 0.1697723986763754, "grad_norm": 0.6484375, "learning_rate": 2.8559082337183436e-05, "loss": 2.0482, "step": 5262 }, { "epoch": 0.16980466253017173, "grad_norm": 0.765625, "learning_rate": 2.8558409236679117e-05, "loss": 2.0278, "step": 5263 }, { "epoch": 0.16983692638396808, "grad_norm": 0.62109375, "learning_rate": 2.8557735986933552e-05, "loss": 2.0699, "step": 5264 }, { "epoch": 0.16986919023776445, "grad_norm": 0.43359375, "learning_rate": 2.855706258795416e-05, "loss": 2.0306, "step": 5265 }, { "epoch": 0.1699014540915608, "grad_norm": 0.59765625, "learning_rate": 2.8556389039748342e-05, "loss": 2.0274, "step": 5266 }, { "epoch": 0.16993371794535714, "grad_norm": 0.435546875, "learning_rate": 2.8555715342323523e-05, "loss": 2.0434, "step": 5267 }, { "epoch": 0.16996598179915348, "grad_norm": 0.4375, "learning_rate": 2.8555041495687112e-05, "loss": 2.0493, "step": 5268 }, { "epoch": 0.16999824565294983, "grad_norm": 0.5078125, "learning_rate": 2.8554367499846535e-05, "loss": 2.0267, "step": 5269 }, { "epoch": 0.17003050950674617, "grad_norm": 0.416015625, "learning_rate": 2.85536933548092e-05, "loss": 2.0554, "step": 5270 }, { "epoch": 0.1700627733605425, "grad_norm": 0.4921875, "learning_rate": 2.8553019060582532e-05, "loss": 2.0307, "step": 5271 }, { "epoch": 0.17009503721433886, "grad_norm": 0.4609375, "learning_rate": 2.855234461717395e-05, "loss": 2.0451, "step": 5272 }, { "epoch": 0.1701273010681352, "grad_norm": 0.4375, "learning_rate": 2.8551670024590882e-05, "loss": 2.0453, "step": 5273 }, { "epoch": 0.17015956492193154, "grad_norm": 0.4140625, "learning_rate": 2.855099528284076e-05, "loss": 2.0456, "step": 5274 }, { "epoch": 0.17019182877572792, "grad_norm": 0.3984375, "learning_rate": 2.8550320391930996e-05, "loss": 2.046, "step": 5275 }, { "epoch": 0.17022409262952426, "grad_norm": 0.41796875, "learning_rate": 2.8549645351869028e-05, "loss": 2.0026, "step": 5276 }, { "epoch": 0.1702563564833206, "grad_norm": 0.41796875, "learning_rate": 2.8548970162662285e-05, "loss": 2.0651, "step": 5277 }, { "epoch": 0.17028862033711695, "grad_norm": 0.40625, "learning_rate": 2.8548294824318196e-05, "loss": 2.0328, "step": 5278 }, { "epoch": 0.1703208841909133, "grad_norm": 0.435546875, "learning_rate": 2.8547619336844196e-05, "loss": 2.049, "step": 5279 }, { "epoch": 0.17035314804470963, "grad_norm": 0.455078125, "learning_rate": 2.8546943700247728e-05, "loss": 2.0329, "step": 5280 }, { "epoch": 0.17038541189850598, "grad_norm": 0.400390625, "learning_rate": 2.8546267914536216e-05, "loss": 2.0576, "step": 5281 }, { "epoch": 0.17041767575230232, "grad_norm": 0.470703125, "learning_rate": 2.8545591979717106e-05, "loss": 2.0526, "step": 5282 }, { "epoch": 0.17044993960609867, "grad_norm": 0.400390625, "learning_rate": 2.854491589579784e-05, "loss": 2.0731, "step": 5283 }, { "epoch": 0.170482203459895, "grad_norm": 0.39453125, "learning_rate": 2.854423966278586e-05, "loss": 2.0318, "step": 5284 }, { "epoch": 0.17051446731369138, "grad_norm": 0.447265625, "learning_rate": 2.8543563280688597e-05, "loss": 2.0178, "step": 5285 }, { "epoch": 0.17054673116748773, "grad_norm": 0.396484375, "learning_rate": 2.8542886749513508e-05, "loss": 2.0294, "step": 5286 }, { "epoch": 0.17057899502128407, "grad_norm": 0.4375, "learning_rate": 2.8542210069268044e-05, "loss": 2.0699, "step": 5287 }, { "epoch": 0.1706112588750804, "grad_norm": 0.431640625, "learning_rate": 2.854153323995964e-05, "loss": 2.0418, "step": 5288 }, { "epoch": 0.17064352272887676, "grad_norm": 0.40234375, "learning_rate": 2.8540856261595754e-05, "loss": 2.022, "step": 5289 }, { "epoch": 0.1706757865826731, "grad_norm": 0.443359375, "learning_rate": 2.8540179134183836e-05, "loss": 2.016, "step": 5290 }, { "epoch": 0.17070805043646944, "grad_norm": 0.416015625, "learning_rate": 2.853950185773134e-05, "loss": 2.0416, "step": 5291 }, { "epoch": 0.1707403142902658, "grad_norm": 0.478515625, "learning_rate": 2.8538824432245725e-05, "loss": 2.0326, "step": 5292 }, { "epoch": 0.17077257814406213, "grad_norm": 0.5078125, "learning_rate": 2.8538146857734442e-05, "loss": 2.0343, "step": 5293 }, { "epoch": 0.17080484199785848, "grad_norm": 0.447265625, "learning_rate": 2.8537469134204945e-05, "loss": 2.0655, "step": 5294 }, { "epoch": 0.17083710585165485, "grad_norm": 0.435546875, "learning_rate": 2.8536791261664706e-05, "loss": 2.0326, "step": 5295 }, { "epoch": 0.1708693697054512, "grad_norm": 0.44921875, "learning_rate": 2.8536113240121174e-05, "loss": 2.0425, "step": 5296 }, { "epoch": 0.17090163355924753, "grad_norm": 0.44140625, "learning_rate": 2.8535435069581822e-05, "loss": 2.0272, "step": 5297 }, { "epoch": 0.17093389741304388, "grad_norm": 0.4375, "learning_rate": 2.853475675005411e-05, "loss": 2.0272, "step": 5298 }, { "epoch": 0.17096616126684022, "grad_norm": 0.435546875, "learning_rate": 2.8534078281545507e-05, "loss": 2.011, "step": 5299 }, { "epoch": 0.17099842512063657, "grad_norm": 0.42578125, "learning_rate": 2.853339966406348e-05, "loss": 2.034, "step": 5300 }, { "epoch": 0.1710306889744329, "grad_norm": 0.44140625, "learning_rate": 2.8532720897615495e-05, "loss": 2.0666, "step": 5301 }, { "epoch": 0.17106295282822925, "grad_norm": 0.439453125, "learning_rate": 2.8532041982209027e-05, "loss": 2.0233, "step": 5302 }, { "epoch": 0.1710952166820256, "grad_norm": 0.396484375, "learning_rate": 2.8531362917851555e-05, "loss": 2.0132, "step": 5303 }, { "epoch": 0.17112748053582194, "grad_norm": 0.43359375, "learning_rate": 2.853068370455054e-05, "loss": 2.0589, "step": 5304 }, { "epoch": 0.17115974438961828, "grad_norm": 0.44921875, "learning_rate": 2.853000434231347e-05, "loss": 2.0431, "step": 5305 }, { "epoch": 0.17119200824341466, "grad_norm": 0.42578125, "learning_rate": 2.8529324831147817e-05, "loss": 2.0494, "step": 5306 }, { "epoch": 0.171224272097211, "grad_norm": 0.48046875, "learning_rate": 2.8528645171061058e-05, "loss": 2.0546, "step": 5307 }, { "epoch": 0.17125653595100734, "grad_norm": 0.478515625, "learning_rate": 2.8527965362060685e-05, "loss": 2.0359, "step": 5308 }, { "epoch": 0.1712887998048037, "grad_norm": 0.5, "learning_rate": 2.8527285404154174e-05, "loss": 2.0422, "step": 5309 }, { "epoch": 0.17132106365860003, "grad_norm": 0.498046875, "learning_rate": 2.8526605297349008e-05, "loss": 2.0882, "step": 5310 }, { "epoch": 0.17135332751239638, "grad_norm": 0.427734375, "learning_rate": 2.852592504165267e-05, "loss": 2.117, "step": 5311 }, { "epoch": 0.17138559136619272, "grad_norm": 0.486328125, "learning_rate": 2.8525244637072656e-05, "loss": 2.0626, "step": 5312 }, { "epoch": 0.17141785521998906, "grad_norm": 0.427734375, "learning_rate": 2.8524564083616453e-05, "loss": 2.1062, "step": 5313 }, { "epoch": 0.1714501190737854, "grad_norm": 0.43359375, "learning_rate": 2.8523883381291552e-05, "loss": 2.0845, "step": 5314 }, { "epoch": 0.17148238292758175, "grad_norm": 0.43359375, "learning_rate": 2.8523202530105443e-05, "loss": 2.0985, "step": 5315 }, { "epoch": 0.17151464678137812, "grad_norm": 0.431640625, "learning_rate": 2.8522521530065623e-05, "loss": 2.0781, "step": 5316 }, { "epoch": 0.17154691063517447, "grad_norm": 0.41015625, "learning_rate": 2.852184038117959e-05, "loss": 2.0866, "step": 5317 }, { "epoch": 0.1715791744889708, "grad_norm": 0.419921875, "learning_rate": 2.8521159083454834e-05, "loss": 2.1125, "step": 5318 }, { "epoch": 0.17161143834276715, "grad_norm": 0.4375, "learning_rate": 2.8520477636898855e-05, "loss": 2.1292, "step": 5319 }, { "epoch": 0.1716437021965635, "grad_norm": 0.4140625, "learning_rate": 2.8519796041519166e-05, "loss": 2.1111, "step": 5320 }, { "epoch": 0.17167596605035984, "grad_norm": 0.470703125, "learning_rate": 2.8519114297323256e-05, "loss": 2.0953, "step": 5321 }, { "epoch": 0.17170822990415618, "grad_norm": 0.470703125, "learning_rate": 2.851843240431864e-05, "loss": 2.0888, "step": 5322 }, { "epoch": 0.17174049375795253, "grad_norm": 0.419921875, "learning_rate": 2.851775036251281e-05, "loss": 2.0766, "step": 5323 }, { "epoch": 0.17177275761174887, "grad_norm": 0.41796875, "learning_rate": 2.851706817191329e-05, "loss": 2.135, "step": 5324 }, { "epoch": 0.17180502146554522, "grad_norm": 0.390625, "learning_rate": 2.8516385832527575e-05, "loss": 2.1111, "step": 5325 }, { "epoch": 0.1718372853193416, "grad_norm": 0.4375, "learning_rate": 2.8515703344363183e-05, "loss": 2.0672, "step": 5326 }, { "epoch": 0.17186954917313793, "grad_norm": 0.53515625, "learning_rate": 2.851502070742763e-05, "loss": 2.0606, "step": 5327 }, { "epoch": 0.17190181302693427, "grad_norm": 0.61328125, "learning_rate": 2.8514337921728416e-05, "loss": 2.1037, "step": 5328 }, { "epoch": 0.17193407688073062, "grad_norm": 0.51953125, "learning_rate": 2.851365498727307e-05, "loss": 2.0982, "step": 5329 }, { "epoch": 0.17196634073452696, "grad_norm": 0.376953125, "learning_rate": 2.8512971904069104e-05, "loss": 2.0873, "step": 5330 }, { "epoch": 0.1719986045883233, "grad_norm": 0.494140625, "learning_rate": 2.8512288672124037e-05, "loss": 2.1134, "step": 5331 }, { "epoch": 0.17203086844211965, "grad_norm": 0.486328125, "learning_rate": 2.8511605291445386e-05, "loss": 2.1016, "step": 5332 }, { "epoch": 0.172063132295916, "grad_norm": 0.357421875, "learning_rate": 2.851092176204068e-05, "loss": 2.0797, "step": 5333 }, { "epoch": 0.17209539614971234, "grad_norm": 0.470703125, "learning_rate": 2.8510238083917446e-05, "loss": 2.1026, "step": 5334 }, { "epoch": 0.17212766000350868, "grad_norm": 0.46875, "learning_rate": 2.8509554257083193e-05, "loss": 2.0663, "step": 5335 }, { "epoch": 0.17215992385730505, "grad_norm": 0.40234375, "learning_rate": 2.8508870281545466e-05, "loss": 2.0952, "step": 5336 }, { "epoch": 0.1721921877111014, "grad_norm": 0.5390625, "learning_rate": 2.8508186157311783e-05, "loss": 2.0901, "step": 5337 }, { "epoch": 0.17222445156489774, "grad_norm": 0.49609375, "learning_rate": 2.850750188438968e-05, "loss": 2.09, "step": 5338 }, { "epoch": 0.17225671541869408, "grad_norm": 0.416015625, "learning_rate": 2.8506817462786686e-05, "loss": 2.0928, "step": 5339 }, { "epoch": 0.17228897927249043, "grad_norm": 0.48046875, "learning_rate": 2.8506132892510335e-05, "loss": 2.089, "step": 5340 }, { "epoch": 0.17232124312628677, "grad_norm": 0.404296875, "learning_rate": 2.850544817356816e-05, "loss": 2.1154, "step": 5341 }, { "epoch": 0.17235350698008312, "grad_norm": 0.439453125, "learning_rate": 2.85047633059677e-05, "loss": 2.1149, "step": 5342 }, { "epoch": 0.17238577083387946, "grad_norm": 0.388671875, "learning_rate": 2.8504078289716497e-05, "loss": 2.0934, "step": 5343 }, { "epoch": 0.1724180346876758, "grad_norm": 0.40234375, "learning_rate": 2.850339312482209e-05, "loss": 2.108, "step": 5344 }, { "epoch": 0.17245029854147215, "grad_norm": 0.392578125, "learning_rate": 2.850270781129201e-05, "loss": 2.1225, "step": 5345 }, { "epoch": 0.17248256239526852, "grad_norm": 0.37890625, "learning_rate": 2.8502022349133817e-05, "loss": 2.0654, "step": 5346 }, { "epoch": 0.17251482624906486, "grad_norm": 0.419921875, "learning_rate": 2.8501336738355045e-05, "loss": 2.0917, "step": 5347 }, { "epoch": 0.1725470901028612, "grad_norm": 0.40234375, "learning_rate": 2.8500650978963245e-05, "loss": 2.1001, "step": 5348 }, { "epoch": 0.17257935395665755, "grad_norm": 2.0625, "learning_rate": 2.8499965070965964e-05, "loss": 2.1006, "step": 5349 }, { "epoch": 0.1726116178104539, "grad_norm": 0.4765625, "learning_rate": 2.8499279014370753e-05, "loss": 2.1133, "step": 5350 }, { "epoch": 0.17264388166425024, "grad_norm": 0.494140625, "learning_rate": 2.849859280918516e-05, "loss": 2.0844, "step": 5351 }, { "epoch": 0.17267614551804658, "grad_norm": 0.447265625, "learning_rate": 2.8497906455416748e-05, "loss": 2.0941, "step": 5352 }, { "epoch": 0.17270840937184292, "grad_norm": 0.44921875, "learning_rate": 2.8497219953073057e-05, "loss": 2.1084, "step": 5353 }, { "epoch": 0.17274067322563927, "grad_norm": 0.79296875, "learning_rate": 2.8496533302161658e-05, "loss": 2.0493, "step": 5354 }, { "epoch": 0.1727729370794356, "grad_norm": 0.90234375, "learning_rate": 2.84958465026901e-05, "loss": 2.036, "step": 5355 }, { "epoch": 0.17280520093323198, "grad_norm": 0.78515625, "learning_rate": 2.8495159554665945e-05, "loss": 2.0386, "step": 5356 }, { "epoch": 0.17283746478702833, "grad_norm": 0.625, "learning_rate": 2.8494472458096756e-05, "loss": 2.0617, "step": 5357 }, { "epoch": 0.17286972864082467, "grad_norm": 0.66015625, "learning_rate": 2.8493785212990094e-05, "loss": 2.0344, "step": 5358 }, { "epoch": 0.17290199249462102, "grad_norm": 0.578125, "learning_rate": 2.8493097819353525e-05, "loss": 2.0347, "step": 5359 }, { "epoch": 0.17293425634841736, "grad_norm": 0.5390625, "learning_rate": 2.8492410277194618e-05, "loss": 1.9913, "step": 5360 }, { "epoch": 0.1729665202022137, "grad_norm": 0.58203125, "learning_rate": 2.849172258652093e-05, "loss": 2.0046, "step": 5361 }, { "epoch": 0.17299878405601005, "grad_norm": 0.6171875, "learning_rate": 2.8491034747340048e-05, "loss": 2.0512, "step": 5362 }, { "epoch": 0.1730310479098064, "grad_norm": 0.5703125, "learning_rate": 2.849034675965953e-05, "loss": 2.0558, "step": 5363 }, { "epoch": 0.17306331176360273, "grad_norm": 0.5546875, "learning_rate": 2.8489658623486957e-05, "loss": 2.0386, "step": 5364 }, { "epoch": 0.17309557561739908, "grad_norm": 0.59375, "learning_rate": 2.8488970338829895e-05, "loss": 2.037, "step": 5365 }, { "epoch": 0.17312783947119545, "grad_norm": 0.6328125, "learning_rate": 2.848828190569592e-05, "loss": 2.0164, "step": 5366 }, { "epoch": 0.1731601033249918, "grad_norm": 0.58984375, "learning_rate": 2.8487593324092623e-05, "loss": 2.0433, "step": 5367 }, { "epoch": 0.17319236717878814, "grad_norm": 0.546875, "learning_rate": 2.848690459402757e-05, "loss": 2.0348, "step": 5368 }, { "epoch": 0.17322463103258448, "grad_norm": 0.625, "learning_rate": 2.8486215715508347e-05, "loss": 2.0067, "step": 5369 }, { "epoch": 0.17325689488638082, "grad_norm": 0.478515625, "learning_rate": 2.8485526688542537e-05, "loss": 2.04, "step": 5370 }, { "epoch": 0.17328915874017717, "grad_norm": 0.53515625, "learning_rate": 2.8484837513137723e-05, "loss": 2.0388, "step": 5371 }, { "epoch": 0.1733214225939735, "grad_norm": 0.70703125, "learning_rate": 2.848414818930149e-05, "loss": 2.0201, "step": 5372 }, { "epoch": 0.17335368644776986, "grad_norm": 0.69140625, "learning_rate": 2.8483458717041434e-05, "loss": 2.0252, "step": 5373 }, { "epoch": 0.1733859503015662, "grad_norm": 0.5390625, "learning_rate": 2.848276909636513e-05, "loss": 1.9951, "step": 5374 }, { "epoch": 0.17341821415536254, "grad_norm": 0.6015625, "learning_rate": 2.848207932728018e-05, "loss": 2.0863, "step": 5375 }, { "epoch": 0.17345047800915891, "grad_norm": 0.53515625, "learning_rate": 2.8481389409794172e-05, "loss": 2.0638, "step": 5376 }, { "epoch": 0.17348274186295526, "grad_norm": 0.50390625, "learning_rate": 2.84806993439147e-05, "loss": 1.9896, "step": 5377 }, { "epoch": 0.1735150057167516, "grad_norm": 0.53515625, "learning_rate": 2.8480009129649363e-05, "loss": 2.0216, "step": 5378 }, { "epoch": 0.17354726957054795, "grad_norm": 0.4921875, "learning_rate": 2.847931876700575e-05, "loss": 2.0064, "step": 5379 }, { "epoch": 0.1735795334243443, "grad_norm": 0.49609375, "learning_rate": 2.847862825599147e-05, "loss": 2.001, "step": 5380 }, { "epoch": 0.17361179727814063, "grad_norm": 0.50390625, "learning_rate": 2.8477937596614123e-05, "loss": 2.0017, "step": 5381 }, { "epoch": 0.17364406113193698, "grad_norm": 0.46484375, "learning_rate": 2.8477246788881304e-05, "loss": 1.9822, "step": 5382 }, { "epoch": 0.17367632498573332, "grad_norm": 0.46484375, "learning_rate": 2.8476555832800623e-05, "loss": 1.9974, "step": 5383 }, { "epoch": 0.17370858883952967, "grad_norm": 0.416015625, "learning_rate": 2.8475864728379682e-05, "loss": 2.0073, "step": 5384 }, { "epoch": 0.173740852693326, "grad_norm": 0.423828125, "learning_rate": 2.8475173475626094e-05, "loss": 2.0226, "step": 5385 }, { "epoch": 0.17377311654712238, "grad_norm": 0.427734375, "learning_rate": 2.8474482074547455e-05, "loss": 1.9715, "step": 5386 }, { "epoch": 0.17380538040091872, "grad_norm": 0.3984375, "learning_rate": 2.847379052515139e-05, "loss": 2.0237, "step": 5387 }, { "epoch": 0.17383764425471507, "grad_norm": 0.41015625, "learning_rate": 2.84730988274455e-05, "loss": 1.9981, "step": 5388 }, { "epoch": 0.1738699081085114, "grad_norm": 0.408203125, "learning_rate": 2.8472406981437413e-05, "loss": 2.0146, "step": 5389 }, { "epoch": 0.17390217196230776, "grad_norm": 0.40625, "learning_rate": 2.8471714987134726e-05, "loss": 1.9925, "step": 5390 }, { "epoch": 0.1739344358161041, "grad_norm": 0.431640625, "learning_rate": 2.8471022844545072e-05, "loss": 1.9481, "step": 5391 }, { "epoch": 0.17396669966990044, "grad_norm": 0.435546875, "learning_rate": 2.8470330553676055e-05, "loss": 1.9814, "step": 5392 }, { "epoch": 0.1739989635236968, "grad_norm": 0.421875, "learning_rate": 2.8469638114535307e-05, "loss": 1.995, "step": 5393 }, { "epoch": 0.17403122737749313, "grad_norm": 0.390625, "learning_rate": 2.8468945527130447e-05, "loss": 1.9879, "step": 5394 }, { "epoch": 0.17406349123128947, "grad_norm": 0.439453125, "learning_rate": 2.8468252791469092e-05, "loss": 2.0098, "step": 5395 }, { "epoch": 0.17409575508508582, "grad_norm": 0.48046875, "learning_rate": 2.846755990755888e-05, "loss": 1.9725, "step": 5396 }, { "epoch": 0.1741280189388822, "grad_norm": 0.5, "learning_rate": 2.8466866875407423e-05, "loss": 1.9928, "step": 5397 }, { "epoch": 0.17416028279267853, "grad_norm": 0.490234375, "learning_rate": 2.8466173695022363e-05, "loss": 2.0187, "step": 5398 }, { "epoch": 0.17419254664647488, "grad_norm": 0.4296875, "learning_rate": 2.8465480366411317e-05, "loss": 2.0078, "step": 5399 }, { "epoch": 0.17422481050027122, "grad_norm": 0.41015625, "learning_rate": 2.8464786889581923e-05, "loss": 2.0036, "step": 5400 }, { "epoch": 0.17425707435406756, "grad_norm": 0.478515625, "learning_rate": 2.8464093264541818e-05, "loss": 1.9907, "step": 5401 }, { "epoch": 0.1742893382078639, "grad_norm": 0.458984375, "learning_rate": 2.8463399491298635e-05, "loss": 2.0227, "step": 5402 }, { "epoch": 0.17432160206166025, "grad_norm": 0.40625, "learning_rate": 2.8462705569860007e-05, "loss": 2.0177, "step": 5403 }, { "epoch": 0.1743538659154566, "grad_norm": 0.42578125, "learning_rate": 2.8462011500233575e-05, "loss": 2.0054, "step": 5404 }, { "epoch": 0.17438612976925294, "grad_norm": 0.455078125, "learning_rate": 2.8461317282426976e-05, "loss": 1.9679, "step": 5405 }, { "epoch": 0.17441839362304928, "grad_norm": 0.39453125, "learning_rate": 2.8460622916447856e-05, "loss": 2.0053, "step": 5406 }, { "epoch": 0.17445065747684566, "grad_norm": 0.392578125, "learning_rate": 2.8459928402303856e-05, "loss": 1.9865, "step": 5407 }, { "epoch": 0.174482921330642, "grad_norm": 0.46875, "learning_rate": 2.8459233740002617e-05, "loss": 1.9735, "step": 5408 }, { "epoch": 0.17451518518443834, "grad_norm": 0.4375, "learning_rate": 2.845853892955179e-05, "loss": 2.0119, "step": 5409 }, { "epoch": 0.1745474490382347, "grad_norm": 0.365234375, "learning_rate": 2.8457843970959022e-05, "loss": 2.0078, "step": 5410 }, { "epoch": 0.17457971289203103, "grad_norm": 0.421875, "learning_rate": 2.8457148864231962e-05, "loss": 2.0105, "step": 5411 }, { "epoch": 0.17461197674582737, "grad_norm": 0.412109375, "learning_rate": 2.8456453609378263e-05, "loss": 2.0056, "step": 5412 }, { "epoch": 0.17464424059962372, "grad_norm": 0.37109375, "learning_rate": 2.8455758206405573e-05, "loss": 2.0029, "step": 5413 }, { "epoch": 0.17467650445342006, "grad_norm": 0.455078125, "learning_rate": 2.8455062655321552e-05, "loss": 2.0192, "step": 5414 }, { "epoch": 0.1747087683072164, "grad_norm": 0.4140625, "learning_rate": 2.845436695613385e-05, "loss": 2.0107, "step": 5415 }, { "epoch": 0.17474103216101275, "grad_norm": 0.37890625, "learning_rate": 2.8453671108850136e-05, "loss": 1.9824, "step": 5416 }, { "epoch": 0.17477329601480912, "grad_norm": 0.47265625, "learning_rate": 2.8452975113478056e-05, "loss": 2.0273, "step": 5417 }, { "epoch": 0.17480555986860546, "grad_norm": 0.50390625, "learning_rate": 2.8452278970025277e-05, "loss": 1.9966, "step": 5418 }, { "epoch": 0.1748378237224018, "grad_norm": 0.40625, "learning_rate": 2.8451582678499467e-05, "loss": 2.0172, "step": 5419 }, { "epoch": 0.17487008757619815, "grad_norm": 0.5078125, "learning_rate": 2.8450886238908283e-05, "loss": 2.0379, "step": 5420 }, { "epoch": 0.1749023514299945, "grad_norm": 0.50390625, "learning_rate": 2.845018965125939e-05, "loss": 2.0498, "step": 5421 }, { "epoch": 0.17493461528379084, "grad_norm": 0.423828125, "learning_rate": 2.844949291556046e-05, "loss": 1.9697, "step": 5422 }, { "epoch": 0.17496687913758718, "grad_norm": 0.482421875, "learning_rate": 2.844879603181916e-05, "loss": 2.0401, "step": 5423 }, { "epoch": 0.17499914299138353, "grad_norm": 0.55078125, "learning_rate": 2.844809900004316e-05, "loss": 2.032, "step": 5424 }, { "epoch": 0.17503140684517987, "grad_norm": 0.44140625, "learning_rate": 2.844740182024014e-05, "loss": 2.0201, "step": 5425 }, { "epoch": 0.17506367069897621, "grad_norm": 0.49609375, "learning_rate": 2.8446704492417763e-05, "loss": 2.0107, "step": 5426 }, { "epoch": 0.1750959345527726, "grad_norm": 0.53125, "learning_rate": 2.8446007016583713e-05, "loss": 2.0124, "step": 5427 }, { "epoch": 0.17512819840656893, "grad_norm": 0.57421875, "learning_rate": 2.8445309392745662e-05, "loss": 2.0031, "step": 5428 }, { "epoch": 0.17516046226036527, "grad_norm": 0.50390625, "learning_rate": 2.844461162091129e-05, "loss": 2.0434, "step": 5429 }, { "epoch": 0.17519272611416162, "grad_norm": 0.52734375, "learning_rate": 2.8443913701088275e-05, "loss": 2.0412, "step": 5430 }, { "epoch": 0.17522498996795796, "grad_norm": 0.43359375, "learning_rate": 2.844321563328431e-05, "loss": 1.9955, "step": 5431 }, { "epoch": 0.1752572538217543, "grad_norm": 0.46484375, "learning_rate": 2.8442517417507067e-05, "loss": 2.003, "step": 5432 }, { "epoch": 0.17528951767555065, "grad_norm": 0.5703125, "learning_rate": 2.844181905376424e-05, "loss": 2.0112, "step": 5433 }, { "epoch": 0.175321781529347, "grad_norm": 0.69921875, "learning_rate": 2.8441120542063504e-05, "loss": 2.0246, "step": 5434 }, { "epoch": 0.17535404538314334, "grad_norm": 0.515625, "learning_rate": 2.8440421882412562e-05, "loss": 2.0314, "step": 5435 }, { "epoch": 0.17538630923693968, "grad_norm": 0.458984375, "learning_rate": 2.8439723074819092e-05, "loss": 2.0091, "step": 5436 }, { "epoch": 0.17541857309073605, "grad_norm": 0.52734375, "learning_rate": 2.8439024119290796e-05, "loss": 2.0178, "step": 5437 }, { "epoch": 0.1754508369445324, "grad_norm": 0.474609375, "learning_rate": 2.8438325015835366e-05, "loss": 2.0224, "step": 5438 }, { "epoch": 0.17548310079832874, "grad_norm": 0.462890625, "learning_rate": 2.8437625764460495e-05, "loss": 2.0359, "step": 5439 }, { "epoch": 0.17551536465212508, "grad_norm": 0.4609375, "learning_rate": 2.8436926365173877e-05, "loss": 2.0375, "step": 5440 }, { "epoch": 0.17554762850592143, "grad_norm": 0.484375, "learning_rate": 2.8436226817983212e-05, "loss": 2.0411, "step": 5441 }, { "epoch": 0.17557989235971777, "grad_norm": 0.455078125, "learning_rate": 2.8435527122896206e-05, "loss": 2.0102, "step": 5442 }, { "epoch": 0.17561215621351411, "grad_norm": 0.48046875, "learning_rate": 2.843482727992055e-05, "loss": 2.0225, "step": 5443 }, { "epoch": 0.17564442006731046, "grad_norm": 0.49609375, "learning_rate": 2.8434127289063956e-05, "loss": 2.0196, "step": 5444 }, { "epoch": 0.1756766839211068, "grad_norm": 0.447265625, "learning_rate": 2.8433427150334127e-05, "loss": 2.0361, "step": 5445 }, { "epoch": 0.17570894777490315, "grad_norm": 0.4453125, "learning_rate": 2.843272686373877e-05, "loss": 2.0335, "step": 5446 }, { "epoch": 0.17574121162869952, "grad_norm": 0.439453125, "learning_rate": 2.843202642928559e-05, "loss": 2.0505, "step": 5447 }, { "epoch": 0.17577347548249586, "grad_norm": 0.412109375, "learning_rate": 2.84313258469823e-05, "loss": 2.0173, "step": 5448 }, { "epoch": 0.1758057393362922, "grad_norm": 0.41015625, "learning_rate": 2.843062511683661e-05, "loss": 2.0076, "step": 5449 }, { "epoch": 0.17583800319008855, "grad_norm": 0.443359375, "learning_rate": 2.8429924238856236e-05, "loss": 2.0187, "step": 5450 }, { "epoch": 0.1758702670438849, "grad_norm": 0.423828125, "learning_rate": 2.8429223213048893e-05, "loss": 2.0484, "step": 5451 }, { "epoch": 0.17590253089768124, "grad_norm": 0.484375, "learning_rate": 2.8428522039422286e-05, "loss": 2.0344, "step": 5452 }, { "epoch": 0.17593479475147758, "grad_norm": 0.44921875, "learning_rate": 2.8427820717984148e-05, "loss": 2.0349, "step": 5453 }, { "epoch": 0.17596705860527392, "grad_norm": 0.435546875, "learning_rate": 2.8427119248742195e-05, "loss": 2.0199, "step": 5454 }, { "epoch": 0.17599932245907027, "grad_norm": 0.439453125, "learning_rate": 2.842641763170414e-05, "loss": 1.9749, "step": 5455 }, { "epoch": 0.1760315863128666, "grad_norm": 0.4375, "learning_rate": 2.8425715866877715e-05, "loss": 2.009, "step": 5456 }, { "epoch": 0.17606385016666298, "grad_norm": 0.439453125, "learning_rate": 2.842501395427064e-05, "loss": 2.031, "step": 5457 }, { "epoch": 0.17609611402045933, "grad_norm": 0.412109375, "learning_rate": 2.8424311893890642e-05, "loss": 2.0273, "step": 5458 }, { "epoch": 0.17612837787425567, "grad_norm": 0.455078125, "learning_rate": 2.842360968574545e-05, "loss": 2.0165, "step": 5459 }, { "epoch": 0.17616064172805201, "grad_norm": 0.400390625, "learning_rate": 2.8422907329842793e-05, "loss": 2.0319, "step": 5460 }, { "epoch": 0.17619290558184836, "grad_norm": 0.455078125, "learning_rate": 2.8422204826190402e-05, "loss": 2.0166, "step": 5461 }, { "epoch": 0.1762251694356447, "grad_norm": 0.41796875, "learning_rate": 2.842150217479601e-05, "loss": 2.0535, "step": 5462 }, { "epoch": 0.17625743328944105, "grad_norm": 0.443359375, "learning_rate": 2.842079937566735e-05, "loss": 1.9959, "step": 5463 }, { "epoch": 0.1762896971432374, "grad_norm": 0.44921875, "learning_rate": 2.842009642881216e-05, "loss": 1.9968, "step": 5464 }, { "epoch": 0.17632196099703373, "grad_norm": 0.453125, "learning_rate": 2.8419393334238176e-05, "loss": 2.0142, "step": 5465 }, { "epoch": 0.17635422485083008, "grad_norm": 0.484375, "learning_rate": 2.8418690091953133e-05, "loss": 2.011, "step": 5466 }, { "epoch": 0.17638648870462645, "grad_norm": 0.421875, "learning_rate": 2.841798670196478e-05, "loss": 2.0129, "step": 5467 }, { "epoch": 0.1764187525584228, "grad_norm": 0.5078125, "learning_rate": 2.841728316428085e-05, "loss": 1.9961, "step": 5468 }, { "epoch": 0.17645101641221914, "grad_norm": 0.474609375, "learning_rate": 2.84165794789091e-05, "loss": 2.0253, "step": 5469 }, { "epoch": 0.17648328026601548, "grad_norm": 0.5078125, "learning_rate": 2.841587564585726e-05, "loss": 2.0367, "step": 5470 }, { "epoch": 0.17651554411981182, "grad_norm": 0.494140625, "learning_rate": 2.8415171665133092e-05, "loss": 2.0, "step": 5471 }, { "epoch": 0.17654780797360817, "grad_norm": 0.4609375, "learning_rate": 2.8414467536744335e-05, "loss": 2.0266, "step": 5472 }, { "epoch": 0.1765800718274045, "grad_norm": 0.447265625, "learning_rate": 2.8413763260698748e-05, "loss": 2.0546, "step": 5473 }, { "epoch": 0.17661233568120085, "grad_norm": 0.46484375, "learning_rate": 2.841305883700407e-05, "loss": 2.0111, "step": 5474 }, { "epoch": 0.1766445995349972, "grad_norm": 0.56640625, "learning_rate": 2.841235426566807e-05, "loss": 2.0482, "step": 5475 }, { "epoch": 0.17667686338879354, "grad_norm": 0.60546875, "learning_rate": 2.841164954669849e-05, "loss": 1.9999, "step": 5476 }, { "epoch": 0.17670912724258991, "grad_norm": 0.6640625, "learning_rate": 2.84109446801031e-05, "loss": 2.0335, "step": 5477 }, { "epoch": 0.17674139109638626, "grad_norm": 0.77734375, "learning_rate": 2.8410239665889647e-05, "loss": 2.0279, "step": 5478 }, { "epoch": 0.1767736549501826, "grad_norm": 0.69921875, "learning_rate": 2.8409534504065898e-05, "loss": 2.0626, "step": 5479 }, { "epoch": 0.17680591880397895, "grad_norm": 0.78125, "learning_rate": 2.840882919463961e-05, "loss": 2.0303, "step": 5480 }, { "epoch": 0.1768381826577753, "grad_norm": 0.65625, "learning_rate": 2.8408123737618553e-05, "loss": 2.05, "step": 5481 }, { "epoch": 0.17687044651157163, "grad_norm": 0.53515625, "learning_rate": 2.840741813301049e-05, "loss": 2.0803, "step": 5482 }, { "epoch": 0.17690271036536798, "grad_norm": 0.62890625, "learning_rate": 2.840671238082318e-05, "loss": 2.0344, "step": 5483 }, { "epoch": 0.17693497421916432, "grad_norm": 0.546875, "learning_rate": 2.840600648106441e-05, "loss": 2.0398, "step": 5484 }, { "epoch": 0.17696723807296066, "grad_norm": 0.52734375, "learning_rate": 2.8405300433741927e-05, "loss": 2.0115, "step": 5485 }, { "epoch": 0.176999501926757, "grad_norm": 0.55859375, "learning_rate": 2.840459423886352e-05, "loss": 1.9969, "step": 5486 }, { "epoch": 0.17703176578055335, "grad_norm": 0.51953125, "learning_rate": 2.8403887896436952e-05, "loss": 1.999, "step": 5487 }, { "epoch": 0.17706402963434972, "grad_norm": 0.498046875, "learning_rate": 2.840318140647e-05, "loss": 2.0437, "step": 5488 }, { "epoch": 0.17709629348814607, "grad_norm": 0.50390625, "learning_rate": 2.840247476897045e-05, "loss": 2.0458, "step": 5489 }, { "epoch": 0.1771285573419424, "grad_norm": 0.494140625, "learning_rate": 2.8401767983946068e-05, "loss": 1.9962, "step": 5490 }, { "epoch": 0.17716082119573875, "grad_norm": 0.46875, "learning_rate": 2.840106105140464e-05, "loss": 2.0644, "step": 5491 }, { "epoch": 0.1771930850495351, "grad_norm": 0.431640625, "learning_rate": 2.8400353971353946e-05, "loss": 2.034, "step": 5492 }, { "epoch": 0.17722534890333144, "grad_norm": 0.46875, "learning_rate": 2.8399646743801763e-05, "loss": 2.0065, "step": 5493 }, { "epoch": 0.17725761275712779, "grad_norm": 0.455078125, "learning_rate": 2.839893936875589e-05, "loss": 2.0168, "step": 5494 }, { "epoch": 0.17728987661092413, "grad_norm": 0.51953125, "learning_rate": 2.83982318462241e-05, "loss": 2.0238, "step": 5495 }, { "epoch": 0.17732214046472047, "grad_norm": 0.515625, "learning_rate": 2.8397524176214186e-05, "loss": 2.0467, "step": 5496 }, { "epoch": 0.17735440431851682, "grad_norm": 0.56640625, "learning_rate": 2.8396816358733936e-05, "loss": 2.0235, "step": 5497 }, { "epoch": 0.1773866681723132, "grad_norm": 0.50390625, "learning_rate": 2.8396108393791146e-05, "loss": 2.0287, "step": 5498 }, { "epoch": 0.17741893202610953, "grad_norm": 0.5078125, "learning_rate": 2.8395400281393604e-05, "loss": 2.0328, "step": 5499 }, { "epoch": 0.17745119587990588, "grad_norm": 0.466796875, "learning_rate": 2.8394692021549107e-05, "loss": 2.0583, "step": 5500 }, { "epoch": 0.17748345973370222, "grad_norm": 0.466796875, "learning_rate": 2.8393983614265447e-05, "loss": 2.0479, "step": 5501 }, { "epoch": 0.17751572358749856, "grad_norm": 0.47265625, "learning_rate": 2.8393275059550424e-05, "loss": 2.0538, "step": 5502 }, { "epoch": 0.1775479874412949, "grad_norm": 0.44921875, "learning_rate": 2.839256635741184e-05, "loss": 2.103, "step": 5503 }, { "epoch": 0.17758025129509125, "grad_norm": 0.458984375, "learning_rate": 2.839185750785749e-05, "loss": 2.0863, "step": 5504 }, { "epoch": 0.1776125151488876, "grad_norm": 0.455078125, "learning_rate": 2.839114851089518e-05, "loss": 2.0944, "step": 5505 }, { "epoch": 0.17764477900268394, "grad_norm": 0.427734375, "learning_rate": 2.8390439366532713e-05, "loss": 2.0754, "step": 5506 }, { "epoch": 0.17767704285648028, "grad_norm": 0.734375, "learning_rate": 2.83897300747779e-05, "loss": 2.0642, "step": 5507 }, { "epoch": 0.17770930671027665, "grad_norm": 0.76171875, "learning_rate": 2.8389020635638542e-05, "loss": 1.9518, "step": 5508 }, { "epoch": 0.177741570564073, "grad_norm": 1.1640625, "learning_rate": 2.838831104912245e-05, "loss": 1.912, "step": 5509 }, { "epoch": 0.17777383441786934, "grad_norm": 1.203125, "learning_rate": 2.8387601315237435e-05, "loss": 1.9148, "step": 5510 }, { "epoch": 0.17780609827166569, "grad_norm": 0.8359375, "learning_rate": 2.838689143399131e-05, "loss": 1.9244, "step": 5511 }, { "epoch": 0.17783836212546203, "grad_norm": 0.70703125, "learning_rate": 2.8386181405391887e-05, "loss": 1.9503, "step": 5512 }, { "epoch": 0.17787062597925837, "grad_norm": 0.58984375, "learning_rate": 2.8385471229446984e-05, "loss": 1.9227, "step": 5513 }, { "epoch": 0.17790288983305472, "grad_norm": 0.60546875, "learning_rate": 2.8384760906164412e-05, "loss": 1.9194, "step": 5514 }, { "epoch": 0.17793515368685106, "grad_norm": 0.55859375, "learning_rate": 2.8384050435551997e-05, "loss": 1.9204, "step": 5515 }, { "epoch": 0.1779674175406474, "grad_norm": 0.52734375, "learning_rate": 2.8383339817617556e-05, "loss": 1.8408, "step": 5516 }, { "epoch": 0.17799968139444375, "grad_norm": 0.515625, "learning_rate": 2.8382629052368914e-05, "loss": 1.8495, "step": 5517 }, { "epoch": 0.17803194524824012, "grad_norm": 0.458984375, "learning_rate": 2.8381918139813887e-05, "loss": 1.8752, "step": 5518 }, { "epoch": 0.17806420910203646, "grad_norm": 0.443359375, "learning_rate": 2.8381207079960308e-05, "loss": 1.9086, "step": 5519 }, { "epoch": 0.1780964729558328, "grad_norm": 0.462890625, "learning_rate": 2.8380495872816e-05, "loss": 1.9038, "step": 5520 }, { "epoch": 0.17812873680962915, "grad_norm": 0.419921875, "learning_rate": 2.837978451838879e-05, "loss": 1.872, "step": 5521 }, { "epoch": 0.1781610006634255, "grad_norm": 0.439453125, "learning_rate": 2.8379073016686515e-05, "loss": 1.8924, "step": 5522 }, { "epoch": 0.17819326451722184, "grad_norm": 0.435546875, "learning_rate": 2.8378361367717002e-05, "loss": 1.9019, "step": 5523 }, { "epoch": 0.17822552837101818, "grad_norm": 0.482421875, "learning_rate": 2.8377649571488086e-05, "loss": 1.9332, "step": 5524 }, { "epoch": 0.17825779222481453, "grad_norm": 0.5546875, "learning_rate": 2.83769376280076e-05, "loss": 1.9827, "step": 5525 }, { "epoch": 0.17829005607861087, "grad_norm": 0.57421875, "learning_rate": 2.837622553728338e-05, "loss": 1.9459, "step": 5526 }, { "epoch": 0.17832231993240721, "grad_norm": 0.5, "learning_rate": 2.8375513299323265e-05, "loss": 1.9431, "step": 5527 }, { "epoch": 0.17835458378620359, "grad_norm": 0.515625, "learning_rate": 2.8374800914135092e-05, "loss": 1.9639, "step": 5528 }, { "epoch": 0.17838684763999993, "grad_norm": 0.4921875, "learning_rate": 2.837408838172671e-05, "loss": 1.955, "step": 5529 }, { "epoch": 0.17841911149379627, "grad_norm": 0.486328125, "learning_rate": 2.837337570210596e-05, "loss": 1.9941, "step": 5530 }, { "epoch": 0.17845137534759262, "grad_norm": 0.462890625, "learning_rate": 2.837266287528068e-05, "loss": 1.9629, "step": 5531 }, { "epoch": 0.17848363920138896, "grad_norm": 0.45703125, "learning_rate": 2.837194990125872e-05, "loss": 1.9909, "step": 5532 }, { "epoch": 0.1785159030551853, "grad_norm": 0.455078125, "learning_rate": 2.8371236780047927e-05, "loss": 1.9811, "step": 5533 }, { "epoch": 0.17854816690898165, "grad_norm": 0.43359375, "learning_rate": 2.837052351165616e-05, "loss": 1.9647, "step": 5534 }, { "epoch": 0.178580430762778, "grad_norm": 0.42578125, "learning_rate": 2.8369810096091254e-05, "loss": 1.9864, "step": 5535 }, { "epoch": 0.17861269461657434, "grad_norm": 0.47265625, "learning_rate": 2.8369096533361073e-05, "loss": 1.9699, "step": 5536 }, { "epoch": 0.17864495847037068, "grad_norm": 0.423828125, "learning_rate": 2.836838282347347e-05, "loss": 1.9792, "step": 5537 }, { "epoch": 0.17867722232416705, "grad_norm": 0.4765625, "learning_rate": 2.8367668966436297e-05, "loss": 1.969, "step": 5538 }, { "epoch": 0.1787094861779634, "grad_norm": 0.41015625, "learning_rate": 2.8366954962257413e-05, "loss": 1.9539, "step": 5539 }, { "epoch": 0.17874175003175974, "grad_norm": 0.470703125, "learning_rate": 2.836624081094468e-05, "loss": 1.9422, "step": 5540 }, { "epoch": 0.17877401388555608, "grad_norm": 0.400390625, "learning_rate": 2.8365526512505954e-05, "loss": 1.9451, "step": 5541 }, { "epoch": 0.17880627773935243, "grad_norm": 0.5, "learning_rate": 2.8364812066949107e-05, "loss": 1.9499, "step": 5542 }, { "epoch": 0.17883854159314877, "grad_norm": 0.6171875, "learning_rate": 2.8364097474281994e-05, "loss": 1.932, "step": 5543 }, { "epoch": 0.1788708054469451, "grad_norm": 0.54296875, "learning_rate": 2.836338273451248e-05, "loss": 1.9491, "step": 5544 }, { "epoch": 0.17890306930074146, "grad_norm": 0.50390625, "learning_rate": 2.8362667847648437e-05, "loss": 1.9388, "step": 5545 }, { "epoch": 0.1789353331545378, "grad_norm": 0.52734375, "learning_rate": 2.8361952813697734e-05, "loss": 1.8835, "step": 5546 }, { "epoch": 0.17896759700833414, "grad_norm": 0.484375, "learning_rate": 2.836123763266824e-05, "loss": 1.8712, "step": 5547 }, { "epoch": 0.17899986086213052, "grad_norm": 0.44921875, "learning_rate": 2.836052230456783e-05, "loss": 1.9207, "step": 5548 }, { "epoch": 0.17903212471592686, "grad_norm": 0.453125, "learning_rate": 2.835980682940437e-05, "loss": 1.9277, "step": 5549 }, { "epoch": 0.1790643885697232, "grad_norm": 0.494140625, "learning_rate": 2.8359091207185746e-05, "loss": 1.95, "step": 5550 }, { "epoch": 0.17909665242351955, "grad_norm": 0.46484375, "learning_rate": 2.835837543791983e-05, "loss": 1.9503, "step": 5551 }, { "epoch": 0.1791289162773159, "grad_norm": 0.5, "learning_rate": 2.8357659521614492e-05, "loss": 1.9787, "step": 5552 }, { "epoch": 0.17916118013111224, "grad_norm": 0.451171875, "learning_rate": 2.835694345827763e-05, "loss": 1.9643, "step": 5553 }, { "epoch": 0.17919344398490858, "grad_norm": 0.482421875, "learning_rate": 2.8356227247917113e-05, "loss": 1.9461, "step": 5554 }, { "epoch": 0.17922570783870492, "grad_norm": 0.48046875, "learning_rate": 2.835551089054083e-05, "loss": 1.9381, "step": 5555 }, { "epoch": 0.17925797169250127, "grad_norm": 0.46484375, "learning_rate": 2.8354794386156658e-05, "loss": 2.0427, "step": 5556 }, { "epoch": 0.1792902355462976, "grad_norm": 0.447265625, "learning_rate": 2.8354077734772493e-05, "loss": 2.0408, "step": 5557 }, { "epoch": 0.17932249940009398, "grad_norm": 0.41796875, "learning_rate": 2.8353360936396223e-05, "loss": 2.0556, "step": 5558 }, { "epoch": 0.17935476325389033, "grad_norm": 0.384765625, "learning_rate": 2.8352643991035732e-05, "loss": 2.0239, "step": 5559 }, { "epoch": 0.17938702710768667, "grad_norm": 0.423828125, "learning_rate": 2.8351926898698925e-05, "loss": 2.0375, "step": 5560 }, { "epoch": 0.179419290961483, "grad_norm": 0.515625, "learning_rate": 2.8351209659393674e-05, "loss": 2.0472, "step": 5561 }, { "epoch": 0.17945155481527936, "grad_norm": 0.58203125, "learning_rate": 2.8350492273127892e-05, "loss": 2.0581, "step": 5562 }, { "epoch": 0.1794838186690757, "grad_norm": 0.58203125, "learning_rate": 2.8349774739909463e-05, "loss": 2.0268, "step": 5563 }, { "epoch": 0.17951608252287204, "grad_norm": 0.376953125, "learning_rate": 2.834905705974629e-05, "loss": 2.0144, "step": 5564 }, { "epoch": 0.1795483463766684, "grad_norm": 0.4921875, "learning_rate": 2.834833923264628e-05, "loss": 2.0503, "step": 5565 }, { "epoch": 0.17958061023046473, "grad_norm": 0.5234375, "learning_rate": 2.8347621258617326e-05, "loss": 2.0698, "step": 5566 }, { "epoch": 0.17961287408426108, "grad_norm": 0.361328125, "learning_rate": 2.834690313766733e-05, "loss": 2.0255, "step": 5567 }, { "epoch": 0.17964513793805745, "grad_norm": 0.4765625, "learning_rate": 2.83461848698042e-05, "loss": 2.0354, "step": 5568 }, { "epoch": 0.1796774017918538, "grad_norm": 0.4140625, "learning_rate": 2.834546645503584e-05, "loss": 2.0352, "step": 5569 }, { "epoch": 0.17970966564565014, "grad_norm": 0.40234375, "learning_rate": 2.8344747893370164e-05, "loss": 2.0396, "step": 5570 }, { "epoch": 0.17974192949944648, "grad_norm": 0.4765625, "learning_rate": 2.8344029184815075e-05, "loss": 2.0349, "step": 5571 }, { "epoch": 0.17977419335324282, "grad_norm": 0.404296875, "learning_rate": 2.8343310329378483e-05, "loss": 2.0271, "step": 5572 }, { "epoch": 0.17980645720703917, "grad_norm": 0.42578125, "learning_rate": 2.834259132706831e-05, "loss": 2.0489, "step": 5573 }, { "epoch": 0.1798387210608355, "grad_norm": 0.380859375, "learning_rate": 2.8341872177892454e-05, "loss": 2.0666, "step": 5574 }, { "epoch": 0.17987098491463185, "grad_norm": 0.40234375, "learning_rate": 2.8341152881858847e-05, "loss": 2.0392, "step": 5575 }, { "epoch": 0.1799032487684282, "grad_norm": 0.4140625, "learning_rate": 2.8340433438975395e-05, "loss": 2.0344, "step": 5576 }, { "epoch": 0.17993551262222454, "grad_norm": 0.390625, "learning_rate": 2.8339713849250027e-05, "loss": 2.0304, "step": 5577 }, { "epoch": 0.17996777647602089, "grad_norm": 0.40234375, "learning_rate": 2.8338994112690657e-05, "loss": 2.0296, "step": 5578 }, { "epoch": 0.18000004032981726, "grad_norm": 0.41796875, "learning_rate": 2.8338274229305207e-05, "loss": 2.039, "step": 5579 }, { "epoch": 0.1800323041836136, "grad_norm": 0.38671875, "learning_rate": 2.833755419910161e-05, "loss": 1.9958, "step": 5580 }, { "epoch": 0.18006456803740994, "grad_norm": 0.4453125, "learning_rate": 2.8336834022087776e-05, "loss": 2.0522, "step": 5581 }, { "epoch": 0.1800968318912063, "grad_norm": 0.412109375, "learning_rate": 2.8336113698271648e-05, "loss": 2.0086, "step": 5582 }, { "epoch": 0.18012909574500263, "grad_norm": 0.375, "learning_rate": 2.8335393227661142e-05, "loss": 2.0339, "step": 5583 }, { "epoch": 0.18016135959879898, "grad_norm": 0.41015625, "learning_rate": 2.83346726102642e-05, "loss": 2.0406, "step": 5584 }, { "epoch": 0.18019362345259532, "grad_norm": 0.396484375, "learning_rate": 2.8333951846088745e-05, "loss": 2.0347, "step": 5585 }, { "epoch": 0.18022588730639166, "grad_norm": 0.41015625, "learning_rate": 2.8333230935142714e-05, "loss": 2.0539, "step": 5586 }, { "epoch": 0.180258151160188, "grad_norm": 0.404296875, "learning_rate": 2.8332509877434042e-05, "loss": 2.0494, "step": 5587 }, { "epoch": 0.18029041501398435, "grad_norm": 0.3828125, "learning_rate": 2.8331788672970664e-05, "loss": 2.0409, "step": 5588 }, { "epoch": 0.18032267886778072, "grad_norm": 0.400390625, "learning_rate": 2.8331067321760526e-05, "loss": 2.0243, "step": 5589 }, { "epoch": 0.18035494272157707, "grad_norm": 0.42578125, "learning_rate": 2.8330345823811558e-05, "loss": 2.0593, "step": 5590 }, { "epoch": 0.1803872065753734, "grad_norm": 0.4921875, "learning_rate": 2.8329624179131706e-05, "loss": 2.043, "step": 5591 }, { "epoch": 0.18041947042916975, "grad_norm": 0.421875, "learning_rate": 2.8328902387728913e-05, "loss": 2.0385, "step": 5592 }, { "epoch": 0.1804517342829661, "grad_norm": 0.412109375, "learning_rate": 2.832818044961113e-05, "loss": 2.0608, "step": 5593 }, { "epoch": 0.18048399813676244, "grad_norm": 0.447265625, "learning_rate": 2.832745836478629e-05, "loss": 2.0042, "step": 5594 }, { "epoch": 0.18051626199055879, "grad_norm": 0.380859375, "learning_rate": 2.8326736133262358e-05, "loss": 2.0212, "step": 5595 }, { "epoch": 0.18054852584435513, "grad_norm": 0.52734375, "learning_rate": 2.8326013755047268e-05, "loss": 2.0449, "step": 5596 }, { "epoch": 0.18058078969815147, "grad_norm": 0.380859375, "learning_rate": 2.8325291230148986e-05, "loss": 2.0348, "step": 5597 }, { "epoch": 0.18061305355194782, "grad_norm": 0.388671875, "learning_rate": 2.8324568558575452e-05, "loss": 2.0391, "step": 5598 }, { "epoch": 0.1806453174057442, "grad_norm": 0.376953125, "learning_rate": 2.8323845740334627e-05, "loss": 2.0637, "step": 5599 }, { "epoch": 0.18067758125954053, "grad_norm": 0.3828125, "learning_rate": 2.832312277543447e-05, "loss": 2.0432, "step": 5600 }, { "epoch": 0.18070984511333688, "grad_norm": 0.400390625, "learning_rate": 2.8322399663882932e-05, "loss": 2.0142, "step": 5601 }, { "epoch": 0.18074210896713322, "grad_norm": 0.3984375, "learning_rate": 2.8321676405687976e-05, "loss": 2.0654, "step": 5602 }, { "epoch": 0.18077437282092956, "grad_norm": 0.369140625, "learning_rate": 2.8320953000857563e-05, "loss": 2.0329, "step": 5603 }, { "epoch": 0.1808066366747259, "grad_norm": 0.375, "learning_rate": 2.832022944939966e-05, "loss": 2.0286, "step": 5604 }, { "epoch": 0.18083890052852225, "grad_norm": 0.384765625, "learning_rate": 2.831950575132222e-05, "loss": 2.0333, "step": 5605 }, { "epoch": 0.1808711643823186, "grad_norm": 0.388671875, "learning_rate": 2.831878190663322e-05, "loss": 2.0368, "step": 5606 }, { "epoch": 0.18090342823611494, "grad_norm": 0.40625, "learning_rate": 2.8318057915340627e-05, "loss": 2.0316, "step": 5607 }, { "epoch": 0.18093569208991128, "grad_norm": 0.44140625, "learning_rate": 2.8317333777452404e-05, "loss": 2.0228, "step": 5608 }, { "epoch": 0.18096795594370765, "grad_norm": 0.39453125, "learning_rate": 2.8316609492976523e-05, "loss": 2.0452, "step": 5609 }, { "epoch": 0.181000219797504, "grad_norm": 0.361328125, "learning_rate": 2.8315885061920958e-05, "loss": 2.0443, "step": 5610 }, { "epoch": 0.18103248365130034, "grad_norm": 0.375, "learning_rate": 2.8315160484293683e-05, "loss": 2.0142, "step": 5611 }, { "epoch": 0.18106474750509668, "grad_norm": 0.37890625, "learning_rate": 2.8314435760102674e-05, "loss": 2.0024, "step": 5612 }, { "epoch": 0.18109701135889303, "grad_norm": 0.404296875, "learning_rate": 2.8313710889355908e-05, "loss": 2.0438, "step": 5613 }, { "epoch": 0.18112927521268937, "grad_norm": 0.39453125, "learning_rate": 2.831298587206137e-05, "loss": 2.0535, "step": 5614 }, { "epoch": 0.18116153906648572, "grad_norm": 0.390625, "learning_rate": 2.8312260708227026e-05, "loss": 2.0475, "step": 5615 }, { "epoch": 0.18119380292028206, "grad_norm": 0.3984375, "learning_rate": 2.8311535397860866e-05, "loss": 2.0399, "step": 5616 }, { "epoch": 0.1812260667740784, "grad_norm": 0.4609375, "learning_rate": 2.8310809940970878e-05, "loss": 2.0513, "step": 5617 }, { "epoch": 0.18125833062787475, "grad_norm": 0.53125, "learning_rate": 2.8310084337565043e-05, "loss": 1.9994, "step": 5618 }, { "epoch": 0.18129059448167112, "grad_norm": 0.60546875, "learning_rate": 2.8309358587651346e-05, "loss": 2.0208, "step": 5619 }, { "epoch": 0.18132285833546746, "grad_norm": 0.65234375, "learning_rate": 2.830863269123778e-05, "loss": 2.0193, "step": 5620 }, { "epoch": 0.1813551221892638, "grad_norm": 0.4609375, "learning_rate": 2.830790664833233e-05, "loss": 2.0074, "step": 5621 }, { "epoch": 0.18138738604306015, "grad_norm": 0.44921875, "learning_rate": 2.8307180458942997e-05, "loss": 2.04, "step": 5622 }, { "epoch": 0.1814196498968565, "grad_norm": 0.56640625, "learning_rate": 2.8306454123077758e-05, "loss": 2.0325, "step": 5623 }, { "epoch": 0.18145191375065284, "grad_norm": 0.45703125, "learning_rate": 2.8305727640744625e-05, "loss": 2.0337, "step": 5624 }, { "epoch": 0.18148417760444918, "grad_norm": 0.439453125, "learning_rate": 2.8305001011951588e-05, "loss": 2.018, "step": 5625 }, { "epoch": 0.18151644145824553, "grad_norm": 0.52734375, "learning_rate": 2.8304274236706645e-05, "loss": 2.045, "step": 5626 }, { "epoch": 0.18154870531204187, "grad_norm": 0.435546875, "learning_rate": 2.830354731501779e-05, "loss": 2.0339, "step": 5627 }, { "epoch": 0.1815809691658382, "grad_norm": 0.435546875, "learning_rate": 2.8302820246893036e-05, "loss": 2.0427, "step": 5628 }, { "epoch": 0.18161323301963458, "grad_norm": 0.498046875, "learning_rate": 2.830209303234038e-05, "loss": 2.0508, "step": 5629 }, { "epoch": 0.18164549687343093, "grad_norm": 0.408203125, "learning_rate": 2.8301365671367822e-05, "loss": 2.0654, "step": 5630 }, { "epoch": 0.18167776072722727, "grad_norm": 0.44140625, "learning_rate": 2.8300638163983375e-05, "loss": 2.0374, "step": 5631 }, { "epoch": 0.18171002458102362, "grad_norm": 0.431640625, "learning_rate": 2.829991051019505e-05, "loss": 2.0312, "step": 5632 }, { "epoch": 0.18174228843481996, "grad_norm": 0.400390625, "learning_rate": 2.8299182710010842e-05, "loss": 1.9965, "step": 5633 }, { "epoch": 0.1817745522886163, "grad_norm": 0.466796875, "learning_rate": 2.829845476343878e-05, "loss": 2.0506, "step": 5634 }, { "epoch": 0.18180681614241265, "grad_norm": 0.392578125, "learning_rate": 2.8297726670486866e-05, "loss": 2.0324, "step": 5635 }, { "epoch": 0.181839079996209, "grad_norm": 0.427734375, "learning_rate": 2.8296998431163116e-05, "loss": 2.0604, "step": 5636 }, { "epoch": 0.18187134385000533, "grad_norm": 0.4140625, "learning_rate": 2.8296270045475542e-05, "loss": 2.0395, "step": 5637 }, { "epoch": 0.18190360770380168, "grad_norm": 0.423828125, "learning_rate": 2.829554151343217e-05, "loss": 2.0625, "step": 5638 }, { "epoch": 0.18193587155759805, "grad_norm": 0.384765625, "learning_rate": 2.8294812835041016e-05, "loss": 2.0358, "step": 5639 }, { "epoch": 0.1819681354113944, "grad_norm": 0.40625, "learning_rate": 2.82940840103101e-05, "loss": 2.0321, "step": 5640 }, { "epoch": 0.18200039926519074, "grad_norm": 0.4140625, "learning_rate": 2.8293355039247444e-05, "loss": 2.0258, "step": 5641 }, { "epoch": 0.18203266311898708, "grad_norm": 0.41015625, "learning_rate": 2.829262592186107e-05, "loss": 1.9756, "step": 5642 }, { "epoch": 0.18206492697278343, "grad_norm": 0.423828125, "learning_rate": 2.8291896658159008e-05, "loss": 2.0246, "step": 5643 }, { "epoch": 0.18209719082657977, "grad_norm": 0.52734375, "learning_rate": 2.8291167248149284e-05, "loss": 2.0311, "step": 5644 }, { "epoch": 0.1821294546803761, "grad_norm": 0.5390625, "learning_rate": 2.8290437691839924e-05, "loss": 2.0485, "step": 5645 }, { "epoch": 0.18216171853417246, "grad_norm": 0.53125, "learning_rate": 2.828970798923896e-05, "loss": 2.0338, "step": 5646 }, { "epoch": 0.1821939823879688, "grad_norm": 0.5546875, "learning_rate": 2.828897814035443e-05, "loss": 2.0292, "step": 5647 }, { "epoch": 0.18222624624176514, "grad_norm": 0.47265625, "learning_rate": 2.8288248145194355e-05, "loss": 2.0039, "step": 5648 }, { "epoch": 0.18225851009556152, "grad_norm": 0.51171875, "learning_rate": 2.828751800376678e-05, "loss": 2.0066, "step": 5649 }, { "epoch": 0.18229077394935786, "grad_norm": 0.53125, "learning_rate": 2.8286787716079742e-05, "loss": 2.0479, "step": 5650 }, { "epoch": 0.1823230378031542, "grad_norm": 0.4921875, "learning_rate": 2.8286057282141273e-05, "loss": 2.0008, "step": 5651 }, { "epoch": 0.18235530165695055, "grad_norm": 0.47265625, "learning_rate": 2.8285326701959415e-05, "loss": 2.005, "step": 5652 }, { "epoch": 0.1823875655107469, "grad_norm": 0.5078125, "learning_rate": 2.8284595975542218e-05, "loss": 1.9975, "step": 5653 }, { "epoch": 0.18241982936454323, "grad_norm": 0.474609375, "learning_rate": 2.8283865102897715e-05, "loss": 2.0314, "step": 5654 }, { "epoch": 0.18245209321833958, "grad_norm": 0.466796875, "learning_rate": 2.8283134084033952e-05, "loss": 1.9862, "step": 5655 }, { "epoch": 0.18248435707213592, "grad_norm": 0.490234375, "learning_rate": 2.8282402918958985e-05, "loss": 2.0013, "step": 5656 }, { "epoch": 0.18251662092593227, "grad_norm": 0.5, "learning_rate": 2.828167160768085e-05, "loss": 2.0307, "step": 5657 }, { "epoch": 0.1825488847797286, "grad_norm": 0.43359375, "learning_rate": 2.8280940150207603e-05, "loss": 1.996, "step": 5658 }, { "epoch": 0.18258114863352495, "grad_norm": 0.48046875, "learning_rate": 2.8280208546547297e-05, "loss": 1.9994, "step": 5659 }, { "epoch": 0.18261341248732132, "grad_norm": 0.48828125, "learning_rate": 2.827947679670798e-05, "loss": 2.0209, "step": 5660 }, { "epoch": 0.18264567634111767, "grad_norm": 0.4140625, "learning_rate": 2.827874490069771e-05, "loss": 2.0177, "step": 5661 }, { "epoch": 0.182677940194914, "grad_norm": 0.447265625, "learning_rate": 2.8278012858524544e-05, "loss": 2.0476, "step": 5662 }, { "epoch": 0.18271020404871036, "grad_norm": 0.43359375, "learning_rate": 2.8277280670196534e-05, "loss": 2.0426, "step": 5663 }, { "epoch": 0.1827424679025067, "grad_norm": 0.44140625, "learning_rate": 2.8276548335721747e-05, "loss": 2.0079, "step": 5664 }, { "epoch": 0.18277473175630304, "grad_norm": 0.3984375, "learning_rate": 2.8275815855108243e-05, "loss": 2.012, "step": 5665 }, { "epoch": 0.1828069956100994, "grad_norm": 0.404296875, "learning_rate": 2.8275083228364077e-05, "loss": 1.9934, "step": 5666 }, { "epoch": 0.18283925946389573, "grad_norm": 0.4140625, "learning_rate": 2.827435045549732e-05, "loss": 1.9962, "step": 5667 }, { "epoch": 0.18287152331769208, "grad_norm": 0.404296875, "learning_rate": 2.8273617536516035e-05, "loss": 2.0045, "step": 5668 }, { "epoch": 0.18290378717148842, "grad_norm": 0.427734375, "learning_rate": 2.827288447142829e-05, "loss": 2.0196, "step": 5669 }, { "epoch": 0.1829360510252848, "grad_norm": 0.3828125, "learning_rate": 2.8272151260242155e-05, "loss": 1.9906, "step": 5670 }, { "epoch": 0.18296831487908113, "grad_norm": 0.38671875, "learning_rate": 2.8271417902965702e-05, "loss": 1.9969, "step": 5671 }, { "epoch": 0.18300057873287748, "grad_norm": 0.4296875, "learning_rate": 2.8270684399607007e-05, "loss": 1.9871, "step": 5672 }, { "epoch": 0.18303284258667382, "grad_norm": 0.431640625, "learning_rate": 2.8269950750174132e-05, "loss": 2.0272, "step": 5673 }, { "epoch": 0.18306510644047017, "grad_norm": 0.43359375, "learning_rate": 2.8269216954675155e-05, "loss": 2.0382, "step": 5674 }, { "epoch": 0.1830973702942665, "grad_norm": 0.40625, "learning_rate": 2.8268483013118164e-05, "loss": 2.0437, "step": 5675 }, { "epoch": 0.18312963414806285, "grad_norm": 0.404296875, "learning_rate": 2.826774892551123e-05, "loss": 2.01, "step": 5676 }, { "epoch": 0.1831618980018592, "grad_norm": 0.404296875, "learning_rate": 2.826701469186243e-05, "loss": 1.9912, "step": 5677 }, { "epoch": 0.18319416185565554, "grad_norm": 0.4140625, "learning_rate": 2.8266280312179854e-05, "loss": 2.0, "step": 5678 }, { "epoch": 0.18322642570945188, "grad_norm": 0.408203125, "learning_rate": 2.826554578647158e-05, "loss": 2.0098, "step": 5679 }, { "epoch": 0.18325868956324826, "grad_norm": 0.3984375, "learning_rate": 2.8264811114745694e-05, "loss": 2.0162, "step": 5680 }, { "epoch": 0.1832909534170446, "grad_norm": 0.375, "learning_rate": 2.826407629701028e-05, "loss": 2.0418, "step": 5681 }, { "epoch": 0.18332321727084094, "grad_norm": 0.416015625, "learning_rate": 2.8263341333273437e-05, "loss": 2.0547, "step": 5682 }, { "epoch": 0.1833554811246373, "grad_norm": 0.400390625, "learning_rate": 2.8262606223543243e-05, "loss": 2.0302, "step": 5683 }, { "epoch": 0.18338774497843363, "grad_norm": 0.41015625, "learning_rate": 2.82618709678278e-05, "loss": 2.0443, "step": 5684 }, { "epoch": 0.18342000883222997, "grad_norm": 0.44921875, "learning_rate": 2.826113556613519e-05, "loss": 2.0431, "step": 5685 }, { "epoch": 0.18345227268602632, "grad_norm": 0.423828125, "learning_rate": 2.8260400018473512e-05, "loss": 2.0324, "step": 5686 }, { "epoch": 0.18348453653982266, "grad_norm": 0.498046875, "learning_rate": 2.825966432485087e-05, "loss": 2.0487, "step": 5687 }, { "epoch": 0.183516800393619, "grad_norm": 0.44140625, "learning_rate": 2.825892848527535e-05, "loss": 2.0582, "step": 5688 }, { "epoch": 0.18354906424741535, "grad_norm": 0.462890625, "learning_rate": 2.8258192499755054e-05, "loss": 2.047, "step": 5689 }, { "epoch": 0.18358132810121172, "grad_norm": 0.5078125, "learning_rate": 2.8257456368298096e-05, "loss": 2.061, "step": 5690 }, { "epoch": 0.18361359195500807, "grad_norm": 0.5078125, "learning_rate": 2.8256720090912566e-05, "loss": 2.0554, "step": 5691 }, { "epoch": 0.1836458558088044, "grad_norm": 0.60546875, "learning_rate": 2.8255983667606567e-05, "loss": 2.0302, "step": 5692 }, { "epoch": 0.18367811966260075, "grad_norm": 0.69140625, "learning_rate": 2.8255247098388212e-05, "loss": 2.0634, "step": 5693 }, { "epoch": 0.1837103835163971, "grad_norm": 0.609375, "learning_rate": 2.8254510383265603e-05, "loss": 2.048, "step": 5694 }, { "epoch": 0.18374264737019344, "grad_norm": 0.4375, "learning_rate": 2.8253773522246857e-05, "loss": 1.9698, "step": 5695 }, { "epoch": 0.18377491122398978, "grad_norm": 0.5546875, "learning_rate": 2.825303651534008e-05, "loss": 1.9945, "step": 5696 }, { "epoch": 0.18380717507778613, "grad_norm": 0.54296875, "learning_rate": 2.8252299362553386e-05, "loss": 2.0315, "step": 5697 }, { "epoch": 0.18383943893158247, "grad_norm": 0.50390625, "learning_rate": 2.8251562063894885e-05, "loss": 2.0206, "step": 5698 }, { "epoch": 0.18387170278537882, "grad_norm": 0.55859375, "learning_rate": 2.8250824619372696e-05, "loss": 2.0256, "step": 5699 }, { "epoch": 0.1839039666391752, "grad_norm": 0.5078125, "learning_rate": 2.8250087028994938e-05, "loss": 1.9893, "step": 5700 }, { "epoch": 0.18393623049297153, "grad_norm": 0.494140625, "learning_rate": 2.8249349292769723e-05, "loss": 2.0637, "step": 5701 }, { "epoch": 0.18396849434676787, "grad_norm": 0.53125, "learning_rate": 2.8248611410705178e-05, "loss": 2.0284, "step": 5702 }, { "epoch": 0.18400075820056422, "grad_norm": 0.515625, "learning_rate": 2.8247873382809426e-05, "loss": 2.0196, "step": 5703 }, { "epoch": 0.18403302205436056, "grad_norm": 0.50390625, "learning_rate": 2.8247135209090583e-05, "loss": 2.0233, "step": 5704 }, { "epoch": 0.1840652859081569, "grad_norm": 0.46484375, "learning_rate": 2.824639688955678e-05, "loss": 2.0184, "step": 5705 }, { "epoch": 0.18409754976195325, "grad_norm": 0.455078125, "learning_rate": 2.8245658424216148e-05, "loss": 2.0142, "step": 5706 }, { "epoch": 0.1841298136157496, "grad_norm": 0.4453125, "learning_rate": 2.8244919813076806e-05, "loss": 2.0163, "step": 5707 }, { "epoch": 0.18416207746954594, "grad_norm": 0.44921875, "learning_rate": 2.8244181056146888e-05, "loss": 2.0596, "step": 5708 }, { "epoch": 0.18419434132334228, "grad_norm": 0.484375, "learning_rate": 2.8243442153434526e-05, "loss": 2.0402, "step": 5709 }, { "epoch": 0.18422660517713865, "grad_norm": 0.470703125, "learning_rate": 2.8242703104947858e-05, "loss": 2.0416, "step": 5710 }, { "epoch": 0.184258869030935, "grad_norm": 0.54296875, "learning_rate": 2.824196391069501e-05, "loss": 2.0288, "step": 5711 }, { "epoch": 0.18429113288473134, "grad_norm": 0.458984375, "learning_rate": 2.8241224570684124e-05, "loss": 2.0172, "step": 5712 }, { "epoch": 0.18432339673852768, "grad_norm": 0.8359375, "learning_rate": 2.8240485084923338e-05, "loss": 2.0058, "step": 5713 }, { "epoch": 0.18435566059232403, "grad_norm": 0.50390625, "learning_rate": 2.8239745453420792e-05, "loss": 2.0337, "step": 5714 }, { "epoch": 0.18438792444612037, "grad_norm": 0.41015625, "learning_rate": 2.8239005676184625e-05, "loss": 2.0357, "step": 5715 }, { "epoch": 0.18442018829991672, "grad_norm": 0.44140625, "learning_rate": 2.8238265753222982e-05, "loss": 2.0065, "step": 5716 }, { "epoch": 0.18445245215371306, "grad_norm": 0.416015625, "learning_rate": 2.8237525684544e-05, "loss": 2.0451, "step": 5717 }, { "epoch": 0.1844847160075094, "grad_norm": 0.4140625, "learning_rate": 2.8236785470155842e-05, "loss": 2.0509, "step": 5718 }, { "epoch": 0.18451697986130575, "grad_norm": 0.462890625, "learning_rate": 2.823604511006664e-05, "loss": 2.0611, "step": 5719 }, { "epoch": 0.18454924371510212, "grad_norm": 0.41796875, "learning_rate": 2.823530460428455e-05, "loss": 2.0401, "step": 5720 }, { "epoch": 0.18458150756889846, "grad_norm": 0.39453125, "learning_rate": 2.8234563952817724e-05, "loss": 2.0362, "step": 5721 }, { "epoch": 0.1846137714226948, "grad_norm": 0.404296875, "learning_rate": 2.8233823155674312e-05, "loss": 2.0313, "step": 5722 }, { "epoch": 0.18464603527649115, "grad_norm": 0.44140625, "learning_rate": 2.8233082212862466e-05, "loss": 2.0188, "step": 5723 }, { "epoch": 0.1846782991302875, "grad_norm": 0.390625, "learning_rate": 2.8232341124390345e-05, "loss": 2.0351, "step": 5724 }, { "epoch": 0.18471056298408384, "grad_norm": 0.419921875, "learning_rate": 2.8231599890266105e-05, "loss": 2.0146, "step": 5725 }, { "epoch": 0.18474282683788018, "grad_norm": 0.412109375, "learning_rate": 2.8230858510497907e-05, "loss": 2.0042, "step": 5726 }, { "epoch": 0.18477509069167652, "grad_norm": 0.4375, "learning_rate": 2.823011698509391e-05, "loss": 2.0264, "step": 5727 }, { "epoch": 0.18480735454547287, "grad_norm": 0.380859375, "learning_rate": 2.822937531406228e-05, "loss": 1.9969, "step": 5728 }, { "epoch": 0.1848396183992692, "grad_norm": 0.41015625, "learning_rate": 2.822863349741117e-05, "loss": 2.0328, "step": 5729 }, { "epoch": 0.18487188225306558, "grad_norm": 0.388671875, "learning_rate": 2.8227891535148755e-05, "loss": 2.0165, "step": 5730 }, { "epoch": 0.18490414610686193, "grad_norm": 0.375, "learning_rate": 2.8227149427283202e-05, "loss": 2.0207, "step": 5731 }, { "epoch": 0.18493640996065827, "grad_norm": 0.427734375, "learning_rate": 2.8226407173822678e-05, "loss": 2.0389, "step": 5732 }, { "epoch": 0.18496867381445461, "grad_norm": 0.427734375, "learning_rate": 2.822566477477535e-05, "loss": 2.0606, "step": 5733 }, { "epoch": 0.18500093766825096, "grad_norm": 0.427734375, "learning_rate": 2.8224922230149394e-05, "loss": 2.0546, "step": 5734 }, { "epoch": 0.1850332015220473, "grad_norm": 0.400390625, "learning_rate": 2.8224179539952983e-05, "loss": 2.0319, "step": 5735 }, { "epoch": 0.18506546537584365, "grad_norm": 0.462890625, "learning_rate": 2.8223436704194285e-05, "loss": 2.0158, "step": 5736 }, { "epoch": 0.18509772922964, "grad_norm": 0.48046875, "learning_rate": 2.8222693722881486e-05, "loss": 2.0362, "step": 5737 }, { "epoch": 0.18512999308343633, "grad_norm": 0.4921875, "learning_rate": 2.822195059602276e-05, "loss": 2.0409, "step": 5738 }, { "epoch": 0.18516225693723268, "grad_norm": 0.48046875, "learning_rate": 2.8221207323626287e-05, "loss": 1.9855, "step": 5739 }, { "epoch": 0.18519452079102905, "grad_norm": 0.4453125, "learning_rate": 2.8220463905700252e-05, "loss": 2.0193, "step": 5740 }, { "epoch": 0.1852267846448254, "grad_norm": 0.419921875, "learning_rate": 2.821972034225283e-05, "loss": 2.0148, "step": 5741 }, { "epoch": 0.18525904849862174, "grad_norm": 0.498046875, "learning_rate": 2.821897663329221e-05, "loss": 2.0503, "step": 5742 }, { "epoch": 0.18529131235241808, "grad_norm": 0.431640625, "learning_rate": 2.821823277882658e-05, "loss": 2.0299, "step": 5743 }, { "epoch": 0.18532357620621442, "grad_norm": 0.4296875, "learning_rate": 2.8217488778864128e-05, "loss": 2.0123, "step": 5744 }, { "epoch": 0.18535584006001077, "grad_norm": 0.47265625, "learning_rate": 2.8216744633413038e-05, "loss": 2.0202, "step": 5745 }, { "epoch": 0.1853881039138071, "grad_norm": 0.4453125, "learning_rate": 2.8216000342481508e-05, "loss": 2.035, "step": 5746 }, { "epoch": 0.18542036776760346, "grad_norm": 0.421875, "learning_rate": 2.8215255906077724e-05, "loss": 2.0421, "step": 5747 }, { "epoch": 0.1854526316213998, "grad_norm": 0.412109375, "learning_rate": 2.8214511324209886e-05, "loss": 2.0018, "step": 5748 }, { "epoch": 0.18548489547519614, "grad_norm": 0.392578125, "learning_rate": 2.821376659688619e-05, "loss": 2.0274, "step": 5749 }, { "epoch": 0.1855171593289925, "grad_norm": 0.451171875, "learning_rate": 2.8213021724114826e-05, "loss": 2.0476, "step": 5750 }, { "epoch": 0.18554942318278886, "grad_norm": 0.423828125, "learning_rate": 2.8212276705904002e-05, "loss": 2.0543, "step": 5751 }, { "epoch": 0.1855816870365852, "grad_norm": 0.4296875, "learning_rate": 2.821153154226191e-05, "loss": 2.0348, "step": 5752 }, { "epoch": 0.18561395089038155, "grad_norm": 0.40234375, "learning_rate": 2.821078623319676e-05, "loss": 2.0293, "step": 5753 }, { "epoch": 0.1856462147441779, "grad_norm": 0.42578125, "learning_rate": 2.8210040778716747e-05, "loss": 2.0019, "step": 5754 }, { "epoch": 0.18567847859797423, "grad_norm": 0.4609375, "learning_rate": 2.820929517883009e-05, "loss": 2.0171, "step": 5755 }, { "epoch": 0.18571074245177058, "grad_norm": 0.431640625, "learning_rate": 2.8208549433544985e-05, "loss": 2.0403, "step": 5756 }, { "epoch": 0.18574300630556692, "grad_norm": 0.455078125, "learning_rate": 2.8207803542869643e-05, "loss": 2.0255, "step": 5757 }, { "epoch": 0.18577527015936326, "grad_norm": 0.396484375, "learning_rate": 2.8207057506812273e-05, "loss": 2.0133, "step": 5758 }, { "epoch": 0.1858075340131596, "grad_norm": 0.443359375, "learning_rate": 2.8206311325381094e-05, "loss": 1.9906, "step": 5759 }, { "epoch": 0.18583979786695595, "grad_norm": 0.41015625, "learning_rate": 2.8205564998584307e-05, "loss": 2.0443, "step": 5760 }, { "epoch": 0.18587206172075232, "grad_norm": 0.431640625, "learning_rate": 2.820481852643014e-05, "loss": 1.998, "step": 5761 }, { "epoch": 0.18590432557454867, "grad_norm": 0.40625, "learning_rate": 2.8204071908926797e-05, "loss": 2.0023, "step": 5762 }, { "epoch": 0.185936589428345, "grad_norm": 0.4296875, "learning_rate": 2.820332514608251e-05, "loss": 2.0524, "step": 5763 }, { "epoch": 0.18596885328214136, "grad_norm": 0.453125, "learning_rate": 2.820257823790549e-05, "loss": 2.0181, "step": 5764 }, { "epoch": 0.1860011171359377, "grad_norm": 0.4453125, "learning_rate": 2.820183118440396e-05, "loss": 2.0356, "step": 5765 }, { "epoch": 0.18603338098973404, "grad_norm": 0.7265625, "learning_rate": 2.820108398558614e-05, "loss": 2.0214, "step": 5766 }, { "epoch": 0.1860656448435304, "grad_norm": 0.5078125, "learning_rate": 2.820033664146026e-05, "loss": 1.9951, "step": 5767 }, { "epoch": 0.18609790869732673, "grad_norm": 0.427734375, "learning_rate": 2.8199589152034547e-05, "loss": 2.008, "step": 5768 }, { "epoch": 0.18613017255112307, "grad_norm": 0.46875, "learning_rate": 2.819884151731722e-05, "loss": 2.0189, "step": 5769 }, { "epoch": 0.18616243640491942, "grad_norm": 0.46484375, "learning_rate": 2.819809373731652e-05, "loss": 2.0374, "step": 5770 }, { "epoch": 0.1861947002587158, "grad_norm": 0.4921875, "learning_rate": 2.8197345812040676e-05, "loss": 2.021, "step": 5771 }, { "epoch": 0.18622696411251213, "grad_norm": 0.4921875, "learning_rate": 2.819659774149791e-05, "loss": 2.05, "step": 5772 }, { "epoch": 0.18625922796630848, "grad_norm": 0.4765625, "learning_rate": 2.8195849525696463e-05, "loss": 2.0185, "step": 5773 }, { "epoch": 0.18629149182010482, "grad_norm": 0.40234375, "learning_rate": 2.8195101164644573e-05, "loss": 2.0047, "step": 5774 }, { "epoch": 0.18632375567390116, "grad_norm": 0.41796875, "learning_rate": 2.8194352658350474e-05, "loss": 2.0125, "step": 5775 }, { "epoch": 0.1863560195276975, "grad_norm": 0.419921875, "learning_rate": 2.819360400682241e-05, "loss": 2.0139, "step": 5776 }, { "epoch": 0.18638828338149385, "grad_norm": 0.5, "learning_rate": 2.8192855210068615e-05, "loss": 1.9574, "step": 5777 }, { "epoch": 0.1864205472352902, "grad_norm": 0.4609375, "learning_rate": 2.8192106268097336e-05, "loss": 2.0143, "step": 5778 }, { "epoch": 0.18645281108908654, "grad_norm": 0.435546875, "learning_rate": 2.8191357180916817e-05, "loss": 1.9918, "step": 5779 }, { "epoch": 0.18648507494288288, "grad_norm": 0.455078125, "learning_rate": 2.8190607948535297e-05, "loss": 2.0341, "step": 5780 }, { "epoch": 0.18651733879667926, "grad_norm": 0.423828125, "learning_rate": 2.8189858570961035e-05, "loss": 1.9831, "step": 5781 }, { "epoch": 0.1865496026504756, "grad_norm": 0.408203125, "learning_rate": 2.8189109048202262e-05, "loss": 1.9697, "step": 5782 }, { "epoch": 0.18658186650427194, "grad_norm": 0.4296875, "learning_rate": 2.8188359380267246e-05, "loss": 2.0178, "step": 5783 }, { "epoch": 0.1866141303580683, "grad_norm": 0.404296875, "learning_rate": 2.8187609567164224e-05, "loss": 2.0085, "step": 5784 }, { "epoch": 0.18664639421186463, "grad_norm": 0.42578125, "learning_rate": 2.8186859608901466e-05, "loss": 2.0181, "step": 5785 }, { "epoch": 0.18667865806566097, "grad_norm": 0.44921875, "learning_rate": 2.8186109505487212e-05, "loss": 1.9804, "step": 5786 }, { "epoch": 0.18671092191945732, "grad_norm": 0.388671875, "learning_rate": 2.8185359256929725e-05, "loss": 2.01, "step": 5787 }, { "epoch": 0.18674318577325366, "grad_norm": 0.388671875, "learning_rate": 2.8184608863237263e-05, "loss": 2.0354, "step": 5788 }, { "epoch": 0.18677544962705, "grad_norm": 0.42578125, "learning_rate": 2.8183858324418086e-05, "loss": 2.027, "step": 5789 }, { "epoch": 0.18680771348084635, "grad_norm": 0.4296875, "learning_rate": 2.818310764048045e-05, "loss": 2.0271, "step": 5790 }, { "epoch": 0.18683997733464272, "grad_norm": 0.416015625, "learning_rate": 2.818235681143263e-05, "loss": 2.0424, "step": 5791 }, { "epoch": 0.18687224118843906, "grad_norm": 0.431640625, "learning_rate": 2.8181605837282878e-05, "loss": 2.0092, "step": 5792 }, { "epoch": 0.1869045050422354, "grad_norm": 0.453125, "learning_rate": 2.8180854718039466e-05, "loss": 2.03, "step": 5793 }, { "epoch": 0.18693676889603175, "grad_norm": 0.44921875, "learning_rate": 2.8180103453710664e-05, "loss": 2.0524, "step": 5794 }, { "epoch": 0.1869690327498281, "grad_norm": 0.478515625, "learning_rate": 2.8179352044304735e-05, "loss": 2.0429, "step": 5795 }, { "epoch": 0.18700129660362444, "grad_norm": 0.427734375, "learning_rate": 2.8178600489829956e-05, "loss": 1.9971, "step": 5796 }, { "epoch": 0.18703356045742078, "grad_norm": 0.416015625, "learning_rate": 2.8177848790294595e-05, "loss": 2.0334, "step": 5797 }, { "epoch": 0.18706582431121713, "grad_norm": 0.400390625, "learning_rate": 2.817709694570693e-05, "loss": 2.0279, "step": 5798 }, { "epoch": 0.18709808816501347, "grad_norm": 0.41796875, "learning_rate": 2.8176344956075233e-05, "loss": 2.0427, "step": 5799 }, { "epoch": 0.18713035201880981, "grad_norm": 0.466796875, "learning_rate": 2.8175592821407785e-05, "loss": 2.0425, "step": 5800 }, { "epoch": 0.1871626158726062, "grad_norm": 0.3828125, "learning_rate": 2.8174840541712863e-05, "loss": 2.0451, "step": 5801 }, { "epoch": 0.18719487972640253, "grad_norm": 0.392578125, "learning_rate": 2.817408811699875e-05, "loss": 2.0293, "step": 5802 }, { "epoch": 0.18722714358019887, "grad_norm": 0.400390625, "learning_rate": 2.8173335547273722e-05, "loss": 2.0124, "step": 5803 }, { "epoch": 0.18725940743399522, "grad_norm": 0.443359375, "learning_rate": 2.8172582832546068e-05, "loss": 2.0557, "step": 5804 }, { "epoch": 0.18729167128779156, "grad_norm": 0.4296875, "learning_rate": 2.817182997282408e-05, "loss": 2.0261, "step": 5805 }, { "epoch": 0.1873239351415879, "grad_norm": 0.3828125, "learning_rate": 2.8171076968116022e-05, "loss": 2.0058, "step": 5806 }, { "epoch": 0.18735619899538425, "grad_norm": 0.412109375, "learning_rate": 2.817032381843021e-05, "loss": 2.0532, "step": 5807 }, { "epoch": 0.1873884628491806, "grad_norm": 0.421875, "learning_rate": 2.8169570523774915e-05, "loss": 2.0414, "step": 5808 }, { "epoch": 0.18742072670297694, "grad_norm": 0.43359375, "learning_rate": 2.816881708415844e-05, "loss": 2.0295, "step": 5809 }, { "epoch": 0.18745299055677328, "grad_norm": 0.41796875, "learning_rate": 2.816806349958907e-05, "loss": 1.9773, "step": 5810 }, { "epoch": 0.18748525441056965, "grad_norm": 0.408203125, "learning_rate": 2.8167309770075107e-05, "loss": 2.0388, "step": 5811 }, { "epoch": 0.187517518264366, "grad_norm": 0.431640625, "learning_rate": 2.816655589562484e-05, "loss": 2.0071, "step": 5812 }, { "epoch": 0.18754978211816234, "grad_norm": 0.388671875, "learning_rate": 2.8165801876246575e-05, "loss": 2.0324, "step": 5813 }, { "epoch": 0.18758204597195868, "grad_norm": 0.439453125, "learning_rate": 2.8165047711948608e-05, "loss": 2.0444, "step": 5814 }, { "epoch": 0.18761430982575503, "grad_norm": 0.40625, "learning_rate": 2.816429340273924e-05, "loss": 2.0493, "step": 5815 }, { "epoch": 0.18764657367955137, "grad_norm": 0.412109375, "learning_rate": 2.8163538948626776e-05, "loss": 2.0259, "step": 5816 }, { "epoch": 0.18767883753334771, "grad_norm": 0.40234375, "learning_rate": 2.8162784349619517e-05, "loss": 2.0423, "step": 5817 }, { "epoch": 0.18771110138714406, "grad_norm": 0.376953125, "learning_rate": 2.8162029605725768e-05, "loss": 2.014, "step": 5818 }, { "epoch": 0.1877433652409404, "grad_norm": 0.37890625, "learning_rate": 2.8161274716953843e-05, "loss": 2.0313, "step": 5819 }, { "epoch": 0.18777562909473675, "grad_norm": 0.408203125, "learning_rate": 2.8160519683312047e-05, "loss": 2.0266, "step": 5820 }, { "epoch": 0.18780789294853312, "grad_norm": 0.47265625, "learning_rate": 2.8159764504808688e-05, "loss": 2.051, "step": 5821 }, { "epoch": 0.18784015680232946, "grad_norm": 0.53515625, "learning_rate": 2.815900918145209e-05, "loss": 2.0235, "step": 5822 }, { "epoch": 0.1878724206561258, "grad_norm": 0.546875, "learning_rate": 2.8158253713250554e-05, "loss": 2.0283, "step": 5823 }, { "epoch": 0.18790468450992215, "grad_norm": 0.439453125, "learning_rate": 2.81574981002124e-05, "loss": 2.0416, "step": 5824 }, { "epoch": 0.1879369483637185, "grad_norm": 0.392578125, "learning_rate": 2.8156742342345946e-05, "loss": 2.0186, "step": 5825 }, { "epoch": 0.18796921221751484, "grad_norm": 0.427734375, "learning_rate": 2.8155986439659513e-05, "loss": 2.0186, "step": 5826 }, { "epoch": 0.18800147607131118, "grad_norm": 0.40234375, "learning_rate": 2.815523039216142e-05, "loss": 2.026, "step": 5827 }, { "epoch": 0.18803373992510752, "grad_norm": 0.375, "learning_rate": 2.8154474199859987e-05, "loss": 2.0439, "step": 5828 }, { "epoch": 0.18806600377890387, "grad_norm": 0.392578125, "learning_rate": 2.8153717862763536e-05, "loss": 2.017, "step": 5829 }, { "epoch": 0.1880982676327002, "grad_norm": 0.40234375, "learning_rate": 2.8152961380880402e-05, "loss": 2.0514, "step": 5830 }, { "epoch": 0.18813053148649658, "grad_norm": 0.392578125, "learning_rate": 2.8152204754218898e-05, "loss": 2.0386, "step": 5831 }, { "epoch": 0.18816279534029293, "grad_norm": 0.388671875, "learning_rate": 2.8151447982787363e-05, "loss": 2.0022, "step": 5832 }, { "epoch": 0.18819505919408927, "grad_norm": 0.3828125, "learning_rate": 2.8150691066594123e-05, "loss": 1.9958, "step": 5833 }, { "epoch": 0.18822732304788561, "grad_norm": 0.408203125, "learning_rate": 2.8149934005647514e-05, "loss": 2.0561, "step": 5834 }, { "epoch": 0.18825958690168196, "grad_norm": 0.453125, "learning_rate": 2.814917679995586e-05, "loss": 2.0066, "step": 5835 }, { "epoch": 0.1882918507554783, "grad_norm": 0.408203125, "learning_rate": 2.8148419449527502e-05, "loss": 2.0229, "step": 5836 }, { "epoch": 0.18832411460927465, "grad_norm": 0.390625, "learning_rate": 2.8147661954370777e-05, "loss": 2.0337, "step": 5837 }, { "epoch": 0.188356378463071, "grad_norm": 0.486328125, "learning_rate": 2.8146904314494017e-05, "loss": 2.0357, "step": 5838 }, { "epoch": 0.18838864231686733, "grad_norm": 0.44140625, "learning_rate": 2.8146146529905567e-05, "loss": 2.0376, "step": 5839 }, { "epoch": 0.18842090617066368, "grad_norm": 0.42578125, "learning_rate": 2.8145388600613774e-05, "loss": 2.0676, "step": 5840 }, { "epoch": 0.18845317002446002, "grad_norm": 0.4375, "learning_rate": 2.8144630526626963e-05, "loss": 2.0221, "step": 5841 }, { "epoch": 0.1884854338782564, "grad_norm": 0.41015625, "learning_rate": 2.8143872307953495e-05, "loss": 2.017, "step": 5842 }, { "epoch": 0.18851769773205274, "grad_norm": 0.390625, "learning_rate": 2.8143113944601714e-05, "loss": 2.0368, "step": 5843 }, { "epoch": 0.18854996158584908, "grad_norm": 0.443359375, "learning_rate": 2.8142355436579955e-05, "loss": 2.021, "step": 5844 }, { "epoch": 0.18858222543964542, "grad_norm": 0.41796875, "learning_rate": 2.814159678389658e-05, "loss": 1.9966, "step": 5845 }, { "epoch": 0.18861448929344177, "grad_norm": 0.40234375, "learning_rate": 2.8140837986559936e-05, "loss": 2.038, "step": 5846 }, { "epoch": 0.1886467531472381, "grad_norm": 0.44140625, "learning_rate": 2.8140079044578376e-05, "loss": 2.0646, "step": 5847 }, { "epoch": 0.18867901700103445, "grad_norm": 0.390625, "learning_rate": 2.8139319957960245e-05, "loss": 2.035, "step": 5848 }, { "epoch": 0.1887112808548308, "grad_norm": 0.4921875, "learning_rate": 2.8138560726713912e-05, "loss": 2.0184, "step": 5849 }, { "epoch": 0.18874354470862714, "grad_norm": 0.546875, "learning_rate": 2.813780135084773e-05, "loss": 2.0356, "step": 5850 }, { "epoch": 0.18877580856242349, "grad_norm": 0.66015625, "learning_rate": 2.8137041830370055e-05, "loss": 2.0358, "step": 5851 }, { "epoch": 0.18880807241621986, "grad_norm": 0.6640625, "learning_rate": 2.8136282165289245e-05, "loss": 2.0387, "step": 5852 }, { "epoch": 0.1888403362700162, "grad_norm": 0.50390625, "learning_rate": 2.8135522355613663e-05, "loss": 2.0503, "step": 5853 }, { "epoch": 0.18887260012381255, "grad_norm": 0.5546875, "learning_rate": 2.8134762401351678e-05, "loss": 1.9813, "step": 5854 }, { "epoch": 0.1889048639776089, "grad_norm": 0.515625, "learning_rate": 2.813400230251165e-05, "loss": 2.0115, "step": 5855 }, { "epoch": 0.18893712783140523, "grad_norm": 0.4609375, "learning_rate": 2.813324205910195e-05, "loss": 2.0415, "step": 5856 }, { "epoch": 0.18896939168520158, "grad_norm": 0.484375, "learning_rate": 2.813248167113094e-05, "loss": 2.0254, "step": 5857 }, { "epoch": 0.18900165553899792, "grad_norm": 0.458984375, "learning_rate": 2.8131721138606992e-05, "loss": 1.995, "step": 5858 }, { "epoch": 0.18903391939279426, "grad_norm": 0.451171875, "learning_rate": 2.813096046153848e-05, "loss": 2.0433, "step": 5859 }, { "epoch": 0.1890661832465906, "grad_norm": 0.490234375, "learning_rate": 2.813019963993378e-05, "loss": 2.0255, "step": 5860 }, { "epoch": 0.18909844710038695, "grad_norm": 0.40234375, "learning_rate": 2.8129438673801255e-05, "loss": 2.0429, "step": 5861 }, { "epoch": 0.18913071095418332, "grad_norm": 0.48046875, "learning_rate": 2.8128677563149287e-05, "loss": 2.0372, "step": 5862 }, { "epoch": 0.18916297480797967, "grad_norm": 0.419921875, "learning_rate": 2.812791630798626e-05, "loss": 2.0395, "step": 5863 }, { "epoch": 0.189195238661776, "grad_norm": 0.44140625, "learning_rate": 2.8127154908320546e-05, "loss": 2.0143, "step": 5864 }, { "epoch": 0.18922750251557235, "grad_norm": 0.40234375, "learning_rate": 2.812639336416053e-05, "loss": 2.0315, "step": 5865 }, { "epoch": 0.1892597663693687, "grad_norm": 0.431640625, "learning_rate": 2.8125631675514588e-05, "loss": 2.0122, "step": 5866 }, { "epoch": 0.18929203022316504, "grad_norm": 0.40625, "learning_rate": 2.8124869842391116e-05, "loss": 1.9906, "step": 5867 }, { "epoch": 0.18932429407696139, "grad_norm": 0.390625, "learning_rate": 2.8124107864798487e-05, "loss": 2.0282, "step": 5868 }, { "epoch": 0.18935655793075773, "grad_norm": 0.423828125, "learning_rate": 2.8123345742745094e-05, "loss": 2.0315, "step": 5869 }, { "epoch": 0.18938882178455407, "grad_norm": 0.3828125, "learning_rate": 2.8122583476239332e-05, "loss": 2.0279, "step": 5870 }, { "epoch": 0.18942108563835042, "grad_norm": 0.4453125, "learning_rate": 2.812182106528958e-05, "loss": 2.0484, "step": 5871 }, { "epoch": 0.1894533494921468, "grad_norm": 0.39453125, "learning_rate": 2.812105850990423e-05, "loss": 1.9944, "step": 5872 }, { "epoch": 0.18948561334594313, "grad_norm": 0.451171875, "learning_rate": 2.812029581009169e-05, "loss": 2.0424, "step": 5873 }, { "epoch": 0.18951787719973948, "grad_norm": 0.42578125, "learning_rate": 2.8119532965860338e-05, "loss": 2.0, "step": 5874 }, { "epoch": 0.18955014105353582, "grad_norm": 0.39453125, "learning_rate": 2.8118769977218585e-05, "loss": 2.0343, "step": 5875 }, { "epoch": 0.18958240490733216, "grad_norm": 0.439453125, "learning_rate": 2.8118006844174825e-05, "loss": 2.0518, "step": 5876 }, { "epoch": 0.1896146687611285, "grad_norm": 0.427734375, "learning_rate": 2.8117243566737453e-05, "loss": 2.028, "step": 5877 }, { "epoch": 0.18964693261492485, "grad_norm": 0.390625, "learning_rate": 2.8116480144914875e-05, "loss": 2.0436, "step": 5878 }, { "epoch": 0.1896791964687212, "grad_norm": 0.41015625, "learning_rate": 2.8115716578715494e-05, "loss": 2.0081, "step": 5879 }, { "epoch": 0.18971146032251754, "grad_norm": 0.384765625, "learning_rate": 2.8114952868147713e-05, "loss": 2.0163, "step": 5880 }, { "epoch": 0.18974372417631388, "grad_norm": 0.41796875, "learning_rate": 2.8114189013219937e-05, "loss": 2.0177, "step": 5881 }, { "epoch": 0.18977598803011025, "grad_norm": 0.453125, "learning_rate": 2.8113425013940583e-05, "loss": 2.0068, "step": 5882 }, { "epoch": 0.1898082518839066, "grad_norm": 0.451171875, "learning_rate": 2.8112660870318046e-05, "loss": 2.0268, "step": 5883 }, { "epoch": 0.18984051573770294, "grad_norm": 0.416015625, "learning_rate": 2.811189658236075e-05, "loss": 1.9923, "step": 5884 }, { "epoch": 0.18987277959149929, "grad_norm": 0.416015625, "learning_rate": 2.8111132150077105e-05, "loss": 2.0161, "step": 5885 }, { "epoch": 0.18990504344529563, "grad_norm": 0.404296875, "learning_rate": 2.811036757347552e-05, "loss": 2.019, "step": 5886 }, { "epoch": 0.18993730729909197, "grad_norm": 0.400390625, "learning_rate": 2.8109602852564413e-05, "loss": 2.0288, "step": 5887 }, { "epoch": 0.18996957115288832, "grad_norm": 0.416015625, "learning_rate": 2.8108837987352204e-05, "loss": 2.0108, "step": 5888 }, { "epoch": 0.19000183500668466, "grad_norm": 0.4609375, "learning_rate": 2.810807297784731e-05, "loss": 2.0239, "step": 5889 }, { "epoch": 0.190034098860481, "grad_norm": 0.40625, "learning_rate": 2.810730782405815e-05, "loss": 2.0103, "step": 5890 }, { "epoch": 0.19006636271427735, "grad_norm": 0.42578125, "learning_rate": 2.8106542525993153e-05, "loss": 2.0264, "step": 5891 }, { "epoch": 0.19009862656807372, "grad_norm": 0.3828125, "learning_rate": 2.8105777083660734e-05, "loss": 1.9947, "step": 5892 }, { "epoch": 0.19013089042187006, "grad_norm": 0.43359375, "learning_rate": 2.8105011497069328e-05, "loss": 1.9988, "step": 5893 }, { "epoch": 0.1901631542756664, "grad_norm": 0.41015625, "learning_rate": 2.810424576622736e-05, "loss": 2.0419, "step": 5894 }, { "epoch": 0.19019541812946275, "grad_norm": 0.427734375, "learning_rate": 2.8103479891143247e-05, "loss": 2.0334, "step": 5895 }, { "epoch": 0.1902276819832591, "grad_norm": 0.3984375, "learning_rate": 2.810271387182543e-05, "loss": 1.9952, "step": 5896 }, { "epoch": 0.19025994583705544, "grad_norm": 0.42578125, "learning_rate": 2.810194770828234e-05, "loss": 2.0128, "step": 5897 }, { "epoch": 0.19029220969085178, "grad_norm": 0.41796875, "learning_rate": 2.8101181400522408e-05, "loss": 2.0457, "step": 5898 }, { "epoch": 0.19032447354464813, "grad_norm": 0.4140625, "learning_rate": 2.8100414948554072e-05, "loss": 2.0465, "step": 5899 }, { "epoch": 0.19035673739844447, "grad_norm": 0.38671875, "learning_rate": 2.8099648352385763e-05, "loss": 2.0188, "step": 5900 }, { "epoch": 0.1903890012522408, "grad_norm": 0.4140625, "learning_rate": 2.8098881612025927e-05, "loss": 2.0307, "step": 5901 }, { "epoch": 0.19042126510603719, "grad_norm": 0.40625, "learning_rate": 2.8098114727482998e-05, "loss": 2.0171, "step": 5902 }, { "epoch": 0.19045352895983353, "grad_norm": 0.435546875, "learning_rate": 2.8097347698765415e-05, "loss": 1.9892, "step": 5903 }, { "epoch": 0.19048579281362987, "grad_norm": 6.53125, "learning_rate": 2.8096580525881625e-05, "loss": 2.0407, "step": 5904 }, { "epoch": 0.19051805666742622, "grad_norm": 1.3125, "learning_rate": 2.8095813208840078e-05, "loss": 1.9989, "step": 5905 }, { "epoch": 0.19055032052122256, "grad_norm": 0.84765625, "learning_rate": 2.8095045747649207e-05, "loss": 1.9909, "step": 5906 }, { "epoch": 0.1905825843750189, "grad_norm": 0.7109375, "learning_rate": 2.809427814231747e-05, "loss": 2.0206, "step": 5907 }, { "epoch": 0.19061484822881525, "grad_norm": 0.6875, "learning_rate": 2.8093510392853314e-05, "loss": 2.0016, "step": 5908 }, { "epoch": 0.1906471120826116, "grad_norm": 0.77734375, "learning_rate": 2.8092742499265185e-05, "loss": 1.9938, "step": 5909 }, { "epoch": 0.19067937593640794, "grad_norm": 1.09375, "learning_rate": 2.8091974461561545e-05, "loss": 1.9939, "step": 5910 }, { "epoch": 0.19071163979020428, "grad_norm": 1.7109375, "learning_rate": 2.8091206279750838e-05, "loss": 2.0241, "step": 5911 }, { "epoch": 0.19074390364400065, "grad_norm": 3.765625, "learning_rate": 2.8090437953841524e-05, "loss": 2.0526, "step": 5912 }, { "epoch": 0.190776167497797, "grad_norm": 1.171875, "learning_rate": 2.8089669483842064e-05, "loss": 2.0457, "step": 5913 }, { "epoch": 0.19080843135159334, "grad_norm": 1.46875, "learning_rate": 2.8088900869760906e-05, "loss": 2.0188, "step": 5914 }, { "epoch": 0.19084069520538968, "grad_norm": 1.09375, "learning_rate": 2.8088132111606522e-05, "loss": 2.0335, "step": 5915 }, { "epoch": 0.19087295905918603, "grad_norm": 1.515625, "learning_rate": 2.808736320938737e-05, "loss": 1.981, "step": 5916 }, { "epoch": 0.19090522291298237, "grad_norm": 1.3984375, "learning_rate": 2.8086594163111908e-05, "loss": 2.0469, "step": 5917 }, { "epoch": 0.1909374867667787, "grad_norm": 0.828125, "learning_rate": 2.8085824972788612e-05, "loss": 2.0373, "step": 5918 }, { "epoch": 0.19096975062057506, "grad_norm": 1.09375, "learning_rate": 2.8085055638425937e-05, "loss": 2.0479, "step": 5919 }, { "epoch": 0.1910020144743714, "grad_norm": 0.5703125, "learning_rate": 2.8084286160032357e-05, "loss": 2.0295, "step": 5920 }, { "epoch": 0.19103427832816774, "grad_norm": 0.9140625, "learning_rate": 2.8083516537616345e-05, "loss": 2.019, "step": 5921 }, { "epoch": 0.1910665421819641, "grad_norm": 0.69921875, "learning_rate": 2.808274677118637e-05, "loss": 2.0294, "step": 5922 }, { "epoch": 0.19109880603576046, "grad_norm": 0.7421875, "learning_rate": 2.8081976860750897e-05, "loss": 2.0497, "step": 5923 }, { "epoch": 0.1911310698895568, "grad_norm": 0.69921875, "learning_rate": 2.8081206806318412e-05, "loss": 2.0155, "step": 5924 }, { "epoch": 0.19116333374335315, "grad_norm": 0.5625, "learning_rate": 2.8080436607897387e-05, "loss": 2.0363, "step": 5925 }, { "epoch": 0.1911955975971495, "grad_norm": 0.65234375, "learning_rate": 2.80796662654963e-05, "loss": 2.0195, "step": 5926 }, { "epoch": 0.19122786145094584, "grad_norm": 0.5546875, "learning_rate": 2.807889577912363e-05, "loss": 2.0125, "step": 5927 }, { "epoch": 0.19126012530474218, "grad_norm": 0.494140625, "learning_rate": 2.807812514878786e-05, "loss": 2.0026, "step": 5928 }, { "epoch": 0.19129238915853852, "grad_norm": 0.58203125, "learning_rate": 2.8077354374497465e-05, "loss": 2.0175, "step": 5929 }, { "epoch": 0.19132465301233487, "grad_norm": 0.455078125, "learning_rate": 2.807658345626094e-05, "loss": 2.0142, "step": 5930 }, { "epoch": 0.1913569168661312, "grad_norm": 0.494140625, "learning_rate": 2.807581239408676e-05, "loss": 2.0314, "step": 5931 }, { "epoch": 0.19138918071992755, "grad_norm": 0.5546875, "learning_rate": 2.8075041187983425e-05, "loss": 2.0229, "step": 5932 }, { "epoch": 0.19142144457372393, "grad_norm": 0.431640625, "learning_rate": 2.807426983795941e-05, "loss": 1.9945, "step": 5933 }, { "epoch": 0.19145370842752027, "grad_norm": 0.50390625, "learning_rate": 2.8073498344023214e-05, "loss": 2.0489, "step": 5934 }, { "epoch": 0.1914859722813166, "grad_norm": 0.44921875, "learning_rate": 2.807272670618333e-05, "loss": 2.0083, "step": 5935 }, { "epoch": 0.19151823613511296, "grad_norm": 0.50390625, "learning_rate": 2.8071954924448248e-05, "loss": 2.0155, "step": 5936 }, { "epoch": 0.1915504999889093, "grad_norm": 0.421875, "learning_rate": 2.807118299882646e-05, "loss": 2.0057, "step": 5937 }, { "epoch": 0.19158276384270564, "grad_norm": 0.43359375, "learning_rate": 2.807041092932647e-05, "loss": 2.0121, "step": 5938 }, { "epoch": 0.191615027696502, "grad_norm": 0.453125, "learning_rate": 2.806963871595677e-05, "loss": 1.9904, "step": 5939 }, { "epoch": 0.19164729155029833, "grad_norm": 0.46484375, "learning_rate": 2.8068866358725865e-05, "loss": 1.9598, "step": 5940 }, { "epoch": 0.19167955540409468, "grad_norm": 0.439453125, "learning_rate": 2.806809385764226e-05, "loss": 1.9967, "step": 5941 }, { "epoch": 0.19171181925789102, "grad_norm": 0.484375, "learning_rate": 2.8067321212714448e-05, "loss": 2.0043, "step": 5942 }, { "epoch": 0.1917440831116874, "grad_norm": 0.408203125, "learning_rate": 2.8066548423950938e-05, "loss": 2.0228, "step": 5943 }, { "epoch": 0.19177634696548373, "grad_norm": 0.59375, "learning_rate": 2.8065775491360235e-05, "loss": 2.0334, "step": 5944 }, { "epoch": 0.19180861081928008, "grad_norm": 0.43359375, "learning_rate": 2.8065002414950855e-05, "loss": 2.0523, "step": 5945 }, { "epoch": 0.19184087467307642, "grad_norm": 0.4453125, "learning_rate": 2.8064229194731293e-05, "loss": 2.0454, "step": 5946 }, { "epoch": 0.19187313852687277, "grad_norm": 0.419921875, "learning_rate": 2.8063455830710074e-05, "loss": 2.0162, "step": 5947 }, { "epoch": 0.1919054023806691, "grad_norm": 0.4140625, "learning_rate": 2.8062682322895704e-05, "loss": 2.0243, "step": 5948 }, { "epoch": 0.19193766623446545, "grad_norm": 0.3984375, "learning_rate": 2.8061908671296703e-05, "loss": 2.0157, "step": 5949 }, { "epoch": 0.1919699300882618, "grad_norm": 0.4296875, "learning_rate": 2.8061134875921573e-05, "loss": 1.9998, "step": 5950 }, { "epoch": 0.19200219394205814, "grad_norm": 0.38671875, "learning_rate": 2.8060360936778846e-05, "loss": 1.9975, "step": 5951 }, { "epoch": 0.19203445779585449, "grad_norm": 0.400390625, "learning_rate": 2.805958685387703e-05, "loss": 1.996, "step": 5952 }, { "epoch": 0.19206672164965086, "grad_norm": 0.392578125, "learning_rate": 2.8058812627224654e-05, "loss": 2.0022, "step": 5953 }, { "epoch": 0.1920989855034472, "grad_norm": 0.3984375, "learning_rate": 2.8058038256830238e-05, "loss": 2.0233, "step": 5954 }, { "epoch": 0.19213124935724354, "grad_norm": 0.40234375, "learning_rate": 2.8057263742702305e-05, "loss": 2.0612, "step": 5955 }, { "epoch": 0.1921635132110399, "grad_norm": 0.384765625, "learning_rate": 2.805648908484938e-05, "loss": 2.0458, "step": 5956 }, { "epoch": 0.19219577706483623, "grad_norm": 0.412109375, "learning_rate": 2.8055714283279986e-05, "loss": 2.0098, "step": 5957 }, { "epoch": 0.19222804091863258, "grad_norm": 0.4140625, "learning_rate": 2.8054939338002658e-05, "loss": 2.0134, "step": 5958 }, { "epoch": 0.19226030477242892, "grad_norm": 0.44921875, "learning_rate": 2.8054164249025923e-05, "loss": 2.0289, "step": 5959 }, { "epoch": 0.19229256862622526, "grad_norm": 0.412109375, "learning_rate": 2.8053389016358315e-05, "loss": 2.0014, "step": 5960 }, { "epoch": 0.1923248324800216, "grad_norm": 0.412109375, "learning_rate": 2.8052613640008363e-05, "loss": 2.051, "step": 5961 }, { "epoch": 0.19235709633381795, "grad_norm": 0.431640625, "learning_rate": 2.8051838119984604e-05, "loss": 2.0528, "step": 5962 }, { "epoch": 0.19238936018761432, "grad_norm": 0.412109375, "learning_rate": 2.8051062456295574e-05, "loss": 2.057, "step": 5963 }, { "epoch": 0.19242162404141067, "grad_norm": 0.373046875, "learning_rate": 2.8050286648949813e-05, "loss": 2.0322, "step": 5964 }, { "epoch": 0.192453887895207, "grad_norm": 0.427734375, "learning_rate": 2.8049510697955854e-05, "loss": 2.034, "step": 5965 }, { "epoch": 0.19248615174900335, "grad_norm": 0.42578125, "learning_rate": 2.8048734603322247e-05, "loss": 2.0316, "step": 5966 }, { "epoch": 0.1925184156027997, "grad_norm": 0.53125, "learning_rate": 2.804795836505753e-05, "loss": 2.068, "step": 5967 }, { "epoch": 0.19255067945659604, "grad_norm": 0.470703125, "learning_rate": 2.804718198317025e-05, "loss": 2.0445, "step": 5968 }, { "epoch": 0.19258294331039238, "grad_norm": 0.84375, "learning_rate": 2.8046405457668947e-05, "loss": 1.9845, "step": 5969 }, { "epoch": 0.19261520716418873, "grad_norm": 0.65234375, "learning_rate": 2.8045628788562173e-05, "loss": 2.0119, "step": 5970 }, { "epoch": 0.19264747101798507, "grad_norm": 0.625, "learning_rate": 2.804485197585848e-05, "loss": 2.0511, "step": 5971 }, { "epoch": 0.19267973487178142, "grad_norm": 0.703125, "learning_rate": 2.8044075019566413e-05, "loss": 2.0024, "step": 5972 }, { "epoch": 0.1927119987255778, "grad_norm": 0.59765625, "learning_rate": 2.8043297919694525e-05, "loss": 2.0049, "step": 5973 }, { "epoch": 0.19274426257937413, "grad_norm": 0.55859375, "learning_rate": 2.8042520676251372e-05, "loss": 2.0302, "step": 5974 }, { "epoch": 0.19277652643317048, "grad_norm": 0.58203125, "learning_rate": 2.8041743289245503e-05, "loss": 2.0004, "step": 5975 }, { "epoch": 0.19280879028696682, "grad_norm": 0.54296875, "learning_rate": 2.8040965758685485e-05, "loss": 2.0326, "step": 5976 }, { "epoch": 0.19284105414076316, "grad_norm": 0.478515625, "learning_rate": 2.8040188084579872e-05, "loss": 2.0169, "step": 5977 }, { "epoch": 0.1928733179945595, "grad_norm": 0.4765625, "learning_rate": 2.803941026693722e-05, "loss": 2.0246, "step": 5978 }, { "epoch": 0.19290558184835585, "grad_norm": 0.453125, "learning_rate": 2.8038632305766097e-05, "loss": 2.0199, "step": 5979 }, { "epoch": 0.1929378457021522, "grad_norm": 0.4453125, "learning_rate": 2.803785420107506e-05, "loss": 2.0096, "step": 5980 }, { "epoch": 0.19297010955594854, "grad_norm": 0.408203125, "learning_rate": 2.8037075952872683e-05, "loss": 1.9831, "step": 5981 }, { "epoch": 0.19300237340974488, "grad_norm": 0.451171875, "learning_rate": 2.803629756116752e-05, "loss": 2.0473, "step": 5982 }, { "epoch": 0.19303463726354125, "grad_norm": 0.435546875, "learning_rate": 2.8035519025968148e-05, "loss": 2.0633, "step": 5983 }, { "epoch": 0.1930669011173376, "grad_norm": 0.40625, "learning_rate": 2.8034740347283136e-05, "loss": 2.0669, "step": 5984 }, { "epoch": 0.19309916497113394, "grad_norm": 0.408203125, "learning_rate": 2.8033961525121053e-05, "loss": 2.0523, "step": 5985 }, { "epoch": 0.19313142882493028, "grad_norm": 0.388671875, "learning_rate": 2.803318255949047e-05, "loss": 2.0076, "step": 5986 }, { "epoch": 0.19316369267872663, "grad_norm": 0.396484375, "learning_rate": 2.8032403450399965e-05, "loss": 2.0305, "step": 5987 }, { "epoch": 0.19319595653252297, "grad_norm": 0.423828125, "learning_rate": 2.803162419785811e-05, "loss": 2.0514, "step": 5988 }, { "epoch": 0.19322822038631932, "grad_norm": 0.369140625, "learning_rate": 2.803084480187349e-05, "loss": 2.0402, "step": 5989 }, { "epoch": 0.19326048424011566, "grad_norm": 0.53515625, "learning_rate": 2.803006526245468e-05, "loss": 2.0159, "step": 5990 }, { "epoch": 0.193292748093912, "grad_norm": 0.431640625, "learning_rate": 2.802928557961025e-05, "loss": 2.0373, "step": 5991 }, { "epoch": 0.19332501194770835, "grad_norm": 0.40234375, "learning_rate": 2.8028505753348797e-05, "loss": 2.003, "step": 5992 }, { "epoch": 0.19335727580150472, "grad_norm": 0.474609375, "learning_rate": 2.8027725783678902e-05, "loss": 2.0249, "step": 5993 }, { "epoch": 0.19338953965530106, "grad_norm": 0.462890625, "learning_rate": 2.802694567060914e-05, "loss": 2.0276, "step": 5994 }, { "epoch": 0.1934218035090974, "grad_norm": 0.423828125, "learning_rate": 2.802616541414811e-05, "loss": 2.0082, "step": 5995 }, { "epoch": 0.19345406736289375, "grad_norm": 0.439453125, "learning_rate": 2.8025385014304398e-05, "loss": 2.0001, "step": 5996 }, { "epoch": 0.1934863312166901, "grad_norm": 0.474609375, "learning_rate": 2.802460447108659e-05, "loss": 2.0616, "step": 5997 }, { "epoch": 0.19351859507048644, "grad_norm": 0.44921875, "learning_rate": 2.8023823784503278e-05, "loss": 2.0459, "step": 5998 }, { "epoch": 0.19355085892428278, "grad_norm": 0.42578125, "learning_rate": 2.802304295456306e-05, "loss": 2.0199, "step": 5999 }, { "epoch": 0.19358312277807913, "grad_norm": 0.4140625, "learning_rate": 2.8022261981274528e-05, "loss": 2.0418, "step": 6000 }, { "epoch": 0.19361538663187547, "grad_norm": 0.404296875, "learning_rate": 2.8021480864646274e-05, "loss": 2.0437, "step": 6001 }, { "epoch": 0.1936476504856718, "grad_norm": 0.412109375, "learning_rate": 2.8020699604686902e-05, "loss": 2.0676, "step": 6002 }, { "epoch": 0.19367991433946818, "grad_norm": 0.4140625, "learning_rate": 2.8019918201405016e-05, "loss": 2.0011, "step": 6003 }, { "epoch": 0.19371217819326453, "grad_norm": 0.41015625, "learning_rate": 2.80191366548092e-05, "loss": 2.0203, "step": 6004 }, { "epoch": 0.19374444204706087, "grad_norm": 0.388671875, "learning_rate": 2.8018354964908075e-05, "loss": 2.0201, "step": 6005 }, { "epoch": 0.19377670590085722, "grad_norm": 0.3984375, "learning_rate": 2.8017573131710236e-05, "loss": 2.0323, "step": 6006 }, { "epoch": 0.19380896975465356, "grad_norm": 0.3984375, "learning_rate": 2.801679115522429e-05, "loss": 2.0046, "step": 6007 }, { "epoch": 0.1938412336084499, "grad_norm": 0.40234375, "learning_rate": 2.8016009035458848e-05, "loss": 2.0227, "step": 6008 }, { "epoch": 0.19387349746224625, "grad_norm": 0.396484375, "learning_rate": 2.8015226772422514e-05, "loss": 2.0474, "step": 6009 }, { "epoch": 0.1939057613160426, "grad_norm": 0.40625, "learning_rate": 2.8014444366123904e-05, "loss": 2.0312, "step": 6010 }, { "epoch": 0.19393802516983893, "grad_norm": 0.396484375, "learning_rate": 2.8013661816571622e-05, "loss": 2.0266, "step": 6011 }, { "epoch": 0.19397028902363528, "grad_norm": 0.412109375, "learning_rate": 2.801287912377429e-05, "loss": 2.0145, "step": 6012 }, { "epoch": 0.19400255287743162, "grad_norm": 0.412109375, "learning_rate": 2.801209628774052e-05, "loss": 2.0673, "step": 6013 }, { "epoch": 0.194034816731228, "grad_norm": 0.431640625, "learning_rate": 2.8011313308478923e-05, "loss": 2.0053, "step": 6014 }, { "epoch": 0.19406708058502434, "grad_norm": 0.390625, "learning_rate": 2.801053018599813e-05, "loss": 2.0158, "step": 6015 }, { "epoch": 0.19409934443882068, "grad_norm": 0.40625, "learning_rate": 2.8009746920306752e-05, "loss": 1.9991, "step": 6016 }, { "epoch": 0.19413160829261703, "grad_norm": 0.412109375, "learning_rate": 2.8008963511413416e-05, "loss": 1.9899, "step": 6017 }, { "epoch": 0.19416387214641337, "grad_norm": 0.365234375, "learning_rate": 2.8008179959326737e-05, "loss": 2.038, "step": 6018 }, { "epoch": 0.1941961360002097, "grad_norm": 0.443359375, "learning_rate": 2.800739626405535e-05, "loss": 2.0146, "step": 6019 }, { "epoch": 0.19422839985400606, "grad_norm": 0.388671875, "learning_rate": 2.8006612425607872e-05, "loss": 2.0165, "step": 6020 }, { "epoch": 0.1942606637078024, "grad_norm": 0.412109375, "learning_rate": 2.800582844399294e-05, "loss": 2.01, "step": 6021 }, { "epoch": 0.19429292756159874, "grad_norm": 0.37890625, "learning_rate": 2.8005044319219176e-05, "loss": 2.0107, "step": 6022 }, { "epoch": 0.1943251914153951, "grad_norm": 0.4140625, "learning_rate": 2.800426005129521e-05, "loss": 2.0203, "step": 6023 }, { "epoch": 0.19435745526919146, "grad_norm": 0.41015625, "learning_rate": 2.8003475640229683e-05, "loss": 2.0588, "step": 6024 }, { "epoch": 0.1943897191229878, "grad_norm": 0.41015625, "learning_rate": 2.800269108603123e-05, "loss": 2.0276, "step": 6025 }, { "epoch": 0.19442198297678415, "grad_norm": 0.41015625, "learning_rate": 2.800190638870847e-05, "loss": 1.9903, "step": 6026 }, { "epoch": 0.1944542468305805, "grad_norm": 0.416015625, "learning_rate": 2.8001121548270064e-05, "loss": 2.0332, "step": 6027 }, { "epoch": 0.19448651068437683, "grad_norm": 0.45703125, "learning_rate": 2.800033656472463e-05, "loss": 2.0509, "step": 6028 }, { "epoch": 0.19451877453817318, "grad_norm": 0.59375, "learning_rate": 2.799955143808082e-05, "loss": 2.0174, "step": 6029 }, { "epoch": 0.19455103839196952, "grad_norm": 0.80078125, "learning_rate": 2.7998766168347274e-05, "loss": 2.0515, "step": 6030 }, { "epoch": 0.19458330224576587, "grad_norm": 0.80078125, "learning_rate": 2.7997980755532634e-05, "loss": 2.0251, "step": 6031 }, { "epoch": 0.1946155660995622, "grad_norm": 0.419921875, "learning_rate": 2.799719519964555e-05, "loss": 2.0291, "step": 6032 }, { "epoch": 0.19464782995335855, "grad_norm": 0.625, "learning_rate": 2.7996409500694663e-05, "loss": 2.0254, "step": 6033 }, { "epoch": 0.19468009380715492, "grad_norm": 0.47265625, "learning_rate": 2.799562365868863e-05, "loss": 2.0043, "step": 6034 }, { "epoch": 0.19471235766095127, "grad_norm": 0.490234375, "learning_rate": 2.7994837673636083e-05, "loss": 2.0394, "step": 6035 }, { "epoch": 0.1947446215147476, "grad_norm": 0.65234375, "learning_rate": 2.7994051545545694e-05, "loss": 1.9964, "step": 6036 }, { "epoch": 0.19477688536854396, "grad_norm": 0.408203125, "learning_rate": 2.7993265274426108e-05, "loss": 2.0027, "step": 6037 }, { "epoch": 0.1948091492223403, "grad_norm": 0.60546875, "learning_rate": 2.7992478860285974e-05, "loss": 2.0161, "step": 6038 }, { "epoch": 0.19484141307613664, "grad_norm": 0.421875, "learning_rate": 2.7991692303133955e-05, "loss": 2.0211, "step": 6039 }, { "epoch": 0.194873676929933, "grad_norm": 0.5703125, "learning_rate": 2.799090560297871e-05, "loss": 1.9938, "step": 6040 }, { "epoch": 0.19490594078372933, "grad_norm": 0.40625, "learning_rate": 2.799011875982889e-05, "loss": 2.033, "step": 6041 }, { "epoch": 0.19493820463752567, "grad_norm": 0.55078125, "learning_rate": 2.7989331773693167e-05, "loss": 2.0926, "step": 6042 }, { "epoch": 0.19497046849132202, "grad_norm": 0.451171875, "learning_rate": 2.7988544644580196e-05, "loss": 2.0252, "step": 6043 }, { "epoch": 0.1950027323451184, "grad_norm": 0.458984375, "learning_rate": 2.7987757372498645e-05, "loss": 2.0342, "step": 6044 }, { "epoch": 0.19503499619891473, "grad_norm": 0.52734375, "learning_rate": 2.7986969957457177e-05, "loss": 2.0352, "step": 6045 }, { "epoch": 0.19506726005271108, "grad_norm": 0.3828125, "learning_rate": 2.7986182399464462e-05, "loss": 2.039, "step": 6046 }, { "epoch": 0.19509952390650742, "grad_norm": 0.49609375, "learning_rate": 2.7985394698529168e-05, "loss": 2.0166, "step": 6047 }, { "epoch": 0.19513178776030377, "grad_norm": 0.376953125, "learning_rate": 2.798460685465996e-05, "loss": 2.0162, "step": 6048 }, { "epoch": 0.1951640516141001, "grad_norm": 0.484375, "learning_rate": 2.798381886786552e-05, "loss": 2.0329, "step": 6049 }, { "epoch": 0.19519631546789645, "grad_norm": 0.3828125, "learning_rate": 2.798303073815451e-05, "loss": 2.067, "step": 6050 }, { "epoch": 0.1952285793216928, "grad_norm": 0.4453125, "learning_rate": 2.798224246553562e-05, "loss": 1.9677, "step": 6051 }, { "epoch": 0.19526084317548914, "grad_norm": 0.4140625, "learning_rate": 2.7981454050017513e-05, "loss": 2.0122, "step": 6052 }, { "epoch": 0.19529310702928548, "grad_norm": 0.44140625, "learning_rate": 2.7980665491608875e-05, "loss": 2.002, "step": 6053 }, { "epoch": 0.19532537088308186, "grad_norm": 0.431640625, "learning_rate": 2.7979876790318383e-05, "loss": 2.0117, "step": 6054 }, { "epoch": 0.1953576347368782, "grad_norm": 0.40234375, "learning_rate": 2.797908794615472e-05, "loss": 2.0261, "step": 6055 }, { "epoch": 0.19538989859067454, "grad_norm": 0.447265625, "learning_rate": 2.7978298959126564e-05, "loss": 2.0082, "step": 6056 }, { "epoch": 0.1954221624444709, "grad_norm": 0.388671875, "learning_rate": 2.7977509829242608e-05, "loss": 2.0036, "step": 6057 }, { "epoch": 0.19545442629826723, "grad_norm": 0.484375, "learning_rate": 2.7976720556511533e-05, "loss": 2.0502, "step": 6058 }, { "epoch": 0.19548669015206357, "grad_norm": 0.4296875, "learning_rate": 2.797593114094203e-05, "loss": 2.0472, "step": 6059 }, { "epoch": 0.19551895400585992, "grad_norm": 0.408203125, "learning_rate": 2.797514158254278e-05, "loss": 2.0068, "step": 6060 }, { "epoch": 0.19555121785965626, "grad_norm": 0.4921875, "learning_rate": 2.7974351881322485e-05, "loss": 2.0521, "step": 6061 }, { "epoch": 0.1955834817134526, "grad_norm": 0.474609375, "learning_rate": 2.7973562037289825e-05, "loss": 2.05, "step": 6062 }, { "epoch": 0.19561574556724895, "grad_norm": 0.42578125, "learning_rate": 2.7972772050453505e-05, "loss": 2.0241, "step": 6063 }, { "epoch": 0.19564800942104532, "grad_norm": 0.5, "learning_rate": 2.7971981920822223e-05, "loss": 2.0335, "step": 6064 }, { "epoch": 0.19568027327484167, "grad_norm": 0.404296875, "learning_rate": 2.7971191648404663e-05, "loss": 2.0067, "step": 6065 }, { "epoch": 0.195712537128638, "grad_norm": 0.474609375, "learning_rate": 2.7970401233209534e-05, "loss": 2.0405, "step": 6066 }, { "epoch": 0.19574480098243435, "grad_norm": 0.4140625, "learning_rate": 2.796961067524553e-05, "loss": 2.0199, "step": 6067 }, { "epoch": 0.1957770648362307, "grad_norm": 0.455078125, "learning_rate": 2.796881997452136e-05, "loss": 2.0299, "step": 6068 }, { "epoch": 0.19580932869002704, "grad_norm": 0.435546875, "learning_rate": 2.796802913104572e-05, "loss": 2.0323, "step": 6069 }, { "epoch": 0.19584159254382338, "grad_norm": 0.470703125, "learning_rate": 2.7967238144827322e-05, "loss": 2.0377, "step": 6070 }, { "epoch": 0.19587385639761973, "grad_norm": 0.400390625, "learning_rate": 2.7966447015874867e-05, "loss": 1.9765, "step": 6071 }, { "epoch": 0.19590612025141607, "grad_norm": 0.462890625, "learning_rate": 2.7965655744197065e-05, "loss": 2.03, "step": 6072 }, { "epoch": 0.19593838410521242, "grad_norm": 0.41015625, "learning_rate": 2.7964864329802627e-05, "loss": 2.001, "step": 6073 }, { "epoch": 0.1959706479590088, "grad_norm": 0.404296875, "learning_rate": 2.7964072772700265e-05, "loss": 1.9859, "step": 6074 }, { "epoch": 0.19600291181280513, "grad_norm": 0.43359375, "learning_rate": 2.7963281072898686e-05, "loss": 2.0133, "step": 6075 }, { "epoch": 0.19603517566660147, "grad_norm": 0.40234375, "learning_rate": 2.796248923040661e-05, "loss": 2.026, "step": 6076 }, { "epoch": 0.19606743952039782, "grad_norm": 0.427734375, "learning_rate": 2.7961697245232757e-05, "loss": 2.0147, "step": 6077 }, { "epoch": 0.19609970337419416, "grad_norm": 0.41015625, "learning_rate": 2.7960905117385833e-05, "loss": 2.0447, "step": 6078 }, { "epoch": 0.1961319672279905, "grad_norm": 0.419921875, "learning_rate": 2.7960112846874565e-05, "loss": 2.0564, "step": 6079 }, { "epoch": 0.19616423108178685, "grad_norm": 0.42578125, "learning_rate": 2.7959320433707672e-05, "loss": 2.037, "step": 6080 }, { "epoch": 0.1961964949355832, "grad_norm": 0.390625, "learning_rate": 2.7958527877893882e-05, "loss": 2.0254, "step": 6081 }, { "epoch": 0.19622875878937954, "grad_norm": 0.388671875, "learning_rate": 2.795773517944191e-05, "loss": 2.0357, "step": 6082 }, { "epoch": 0.19626102264317588, "grad_norm": 0.42578125, "learning_rate": 2.7956942338360482e-05, "loss": 2.0351, "step": 6083 }, { "epoch": 0.19629328649697225, "grad_norm": 0.412109375, "learning_rate": 2.7956149354658335e-05, "loss": 2.0247, "step": 6084 }, { "epoch": 0.1963255503507686, "grad_norm": 0.4296875, "learning_rate": 2.7955356228344182e-05, "loss": 2.0159, "step": 6085 }, { "epoch": 0.19635781420456494, "grad_norm": 0.443359375, "learning_rate": 2.7954562959426767e-05, "loss": 2.0301, "step": 6086 }, { "epoch": 0.19639007805836128, "grad_norm": 0.447265625, "learning_rate": 2.7953769547914814e-05, "loss": 2.0526, "step": 6087 }, { "epoch": 0.19642234191215763, "grad_norm": 0.43359375, "learning_rate": 2.7952975993817062e-05, "loss": 1.9714, "step": 6088 }, { "epoch": 0.19645460576595397, "grad_norm": 0.43359375, "learning_rate": 2.795218229714224e-05, "loss": 2.0493, "step": 6089 }, { "epoch": 0.19648686961975032, "grad_norm": 0.421875, "learning_rate": 2.7951388457899088e-05, "loss": 2.0046, "step": 6090 }, { "epoch": 0.19651913347354666, "grad_norm": 0.423828125, "learning_rate": 2.7950594476096342e-05, "loss": 1.9898, "step": 6091 }, { "epoch": 0.196551397327343, "grad_norm": 0.3984375, "learning_rate": 2.794980035174275e-05, "loss": 2.0382, "step": 6092 }, { "epoch": 0.19658366118113935, "grad_norm": 0.412109375, "learning_rate": 2.7949006084847032e-05, "loss": 2.0217, "step": 6093 }, { "epoch": 0.19661592503493572, "grad_norm": 0.37890625, "learning_rate": 2.7948211675417954e-05, "loss": 1.9991, "step": 6094 }, { "epoch": 0.19664818888873206, "grad_norm": 0.419921875, "learning_rate": 2.794741712346425e-05, "loss": 2.0172, "step": 6095 }, { "epoch": 0.1966804527425284, "grad_norm": 0.4375, "learning_rate": 2.7946622428994667e-05, "loss": 2.0143, "step": 6096 }, { "epoch": 0.19671271659632475, "grad_norm": 0.41796875, "learning_rate": 2.794582759201795e-05, "loss": 2.0414, "step": 6097 }, { "epoch": 0.1967449804501211, "grad_norm": 0.423828125, "learning_rate": 2.7945032612542852e-05, "loss": 1.9922, "step": 6098 }, { "epoch": 0.19677724430391744, "grad_norm": 0.431640625, "learning_rate": 2.794423749057812e-05, "loss": 2.0305, "step": 6099 }, { "epoch": 0.19680950815771378, "grad_norm": 0.466796875, "learning_rate": 2.7943442226132507e-05, "loss": 2.017, "step": 6100 }, { "epoch": 0.19684177201151012, "grad_norm": 0.458984375, "learning_rate": 2.7942646819214774e-05, "loss": 2.0154, "step": 6101 }, { "epoch": 0.19687403586530647, "grad_norm": 0.51171875, "learning_rate": 2.7941851269833665e-05, "loss": 2.0342, "step": 6102 }, { "epoch": 0.1969062997191028, "grad_norm": 0.484375, "learning_rate": 2.7941055577997942e-05, "loss": 2.0356, "step": 6103 }, { "epoch": 0.19693856357289916, "grad_norm": 0.4609375, "learning_rate": 2.7940259743716363e-05, "loss": 2.0513, "step": 6104 }, { "epoch": 0.19697082742669553, "grad_norm": 0.451171875, "learning_rate": 2.7939463766997686e-05, "loss": 2.0163, "step": 6105 }, { "epoch": 0.19700309128049187, "grad_norm": 0.412109375, "learning_rate": 2.793866764785068e-05, "loss": 2.0471, "step": 6106 }, { "epoch": 0.19703535513428821, "grad_norm": 0.42578125, "learning_rate": 2.7937871386284097e-05, "loss": 2.0466, "step": 6107 }, { "epoch": 0.19706761898808456, "grad_norm": 0.41796875, "learning_rate": 2.793707498230671e-05, "loss": 2.0349, "step": 6108 }, { "epoch": 0.1970998828418809, "grad_norm": 0.42578125, "learning_rate": 2.7936278435927284e-05, "loss": 2.0352, "step": 6109 }, { "epoch": 0.19713214669567725, "grad_norm": 0.4296875, "learning_rate": 2.7935481747154585e-05, "loss": 2.0058, "step": 6110 }, { "epoch": 0.1971644105494736, "grad_norm": 0.439453125, "learning_rate": 2.793468491599738e-05, "loss": 2.0423, "step": 6111 }, { "epoch": 0.19719667440326993, "grad_norm": 0.39453125, "learning_rate": 2.7933887942464444e-05, "loss": 2.0233, "step": 6112 }, { "epoch": 0.19722893825706628, "grad_norm": 0.46875, "learning_rate": 2.7933090826564546e-05, "loss": 2.0309, "step": 6113 }, { "epoch": 0.19726120211086262, "grad_norm": 0.45703125, "learning_rate": 2.7932293568306467e-05, "loss": 2.0552, "step": 6114 }, { "epoch": 0.197293465964659, "grad_norm": 0.421875, "learning_rate": 2.7931496167698975e-05, "loss": 2.009, "step": 6115 }, { "epoch": 0.19732572981845534, "grad_norm": 0.447265625, "learning_rate": 2.7930698624750853e-05, "loss": 1.9948, "step": 6116 }, { "epoch": 0.19735799367225168, "grad_norm": 0.451171875, "learning_rate": 2.7929900939470874e-05, "loss": 1.9859, "step": 6117 }, { "epoch": 0.19739025752604802, "grad_norm": 0.470703125, "learning_rate": 2.792910311186782e-05, "loss": 2.0008, "step": 6118 }, { "epoch": 0.19742252137984437, "grad_norm": 0.41015625, "learning_rate": 2.7928305141950476e-05, "loss": 2.0197, "step": 6119 }, { "epoch": 0.1974547852336407, "grad_norm": 0.44140625, "learning_rate": 2.7927507029727627e-05, "loss": 1.9867, "step": 6120 }, { "epoch": 0.19748704908743706, "grad_norm": 0.43359375, "learning_rate": 2.7926708775208053e-05, "loss": 1.9886, "step": 6121 }, { "epoch": 0.1975193129412334, "grad_norm": 0.39453125, "learning_rate": 2.792591037840054e-05, "loss": 2.0115, "step": 6122 }, { "epoch": 0.19755157679502974, "grad_norm": 0.4375, "learning_rate": 2.792511183931388e-05, "loss": 1.9907, "step": 6123 }, { "epoch": 0.1975838406488261, "grad_norm": 0.466796875, "learning_rate": 2.792431315795686e-05, "loss": 2.0261, "step": 6124 }, { "epoch": 0.19761610450262246, "grad_norm": 0.4765625, "learning_rate": 2.7923514334338276e-05, "loss": 2.0172, "step": 6125 }, { "epoch": 0.1976483683564188, "grad_norm": 0.498046875, "learning_rate": 2.7922715368466915e-05, "loss": 2.0081, "step": 6126 }, { "epoch": 0.19768063221021515, "grad_norm": 0.404296875, "learning_rate": 2.7921916260351573e-05, "loss": 1.9768, "step": 6127 }, { "epoch": 0.1977128960640115, "grad_norm": 0.38671875, "learning_rate": 2.7921117010001053e-05, "loss": 2.0409, "step": 6128 }, { "epoch": 0.19774515991780783, "grad_norm": 0.44921875, "learning_rate": 2.792031761742414e-05, "loss": 2.0566, "step": 6129 }, { "epoch": 0.19777742377160418, "grad_norm": 0.412109375, "learning_rate": 2.791951808262964e-05, "loss": 2.002, "step": 6130 }, { "epoch": 0.19780968762540052, "grad_norm": 0.396484375, "learning_rate": 2.791871840562636e-05, "loss": 2.0502, "step": 6131 }, { "epoch": 0.19784195147919686, "grad_norm": 0.431640625, "learning_rate": 2.7917918586423086e-05, "loss": 2.0576, "step": 6132 }, { "epoch": 0.1978742153329932, "grad_norm": 0.388671875, "learning_rate": 2.7917118625028637e-05, "loss": 2.0308, "step": 6133 }, { "epoch": 0.19790647918678955, "grad_norm": 0.419921875, "learning_rate": 2.791631852145181e-05, "loss": 2.0235, "step": 6134 }, { "epoch": 0.19793874304058592, "grad_norm": 0.380859375, "learning_rate": 2.7915518275701418e-05, "loss": 2.0368, "step": 6135 }, { "epoch": 0.19797100689438227, "grad_norm": 0.41015625, "learning_rate": 2.7914717887786266e-05, "loss": 2.0234, "step": 6136 }, { "epoch": 0.1980032707481786, "grad_norm": 0.408203125, "learning_rate": 2.7913917357715167e-05, "loss": 2.0247, "step": 6137 }, { "epoch": 0.19803553460197496, "grad_norm": 0.384765625, "learning_rate": 2.7913116685496923e-05, "loss": 2.013, "step": 6138 }, { "epoch": 0.1980677984557713, "grad_norm": 0.39453125, "learning_rate": 2.791231587114036e-05, "loss": 2.0153, "step": 6139 }, { "epoch": 0.19810006230956764, "grad_norm": 0.41796875, "learning_rate": 2.7911514914654285e-05, "loss": 2.0251, "step": 6140 }, { "epoch": 0.198132326163364, "grad_norm": 0.416015625, "learning_rate": 2.7910713816047516e-05, "loss": 2.0272, "step": 6141 }, { "epoch": 0.19816459001716033, "grad_norm": 0.427734375, "learning_rate": 2.7909912575328872e-05, "loss": 2.008, "step": 6142 }, { "epoch": 0.19819685387095667, "grad_norm": 0.43359375, "learning_rate": 2.7909111192507174e-05, "loss": 1.9963, "step": 6143 }, { "epoch": 0.19822911772475302, "grad_norm": 0.40234375, "learning_rate": 2.7908309667591236e-05, "loss": 2.0393, "step": 6144 }, { "epoch": 0.1982613815785494, "grad_norm": 0.388671875, "learning_rate": 2.7907508000589893e-05, "loss": 2.03, "step": 6145 }, { "epoch": 0.19829364543234573, "grad_norm": 0.40625, "learning_rate": 2.7906706191511955e-05, "loss": 2.0119, "step": 6146 }, { "epoch": 0.19832590928614208, "grad_norm": 0.52734375, "learning_rate": 2.7905904240366257e-05, "loss": 2.0435, "step": 6147 }, { "epoch": 0.19835817313993842, "grad_norm": 0.62890625, "learning_rate": 2.790510214716162e-05, "loss": 2.0273, "step": 6148 }, { "epoch": 0.19839043699373476, "grad_norm": 0.6484375, "learning_rate": 2.790429991190688e-05, "loss": 2.0061, "step": 6149 }, { "epoch": 0.1984227008475311, "grad_norm": 0.44921875, "learning_rate": 2.790349753461087e-05, "loss": 1.9992, "step": 6150 }, { "epoch": 0.19845496470132745, "grad_norm": 0.486328125, "learning_rate": 2.790269501528241e-05, "loss": 2.0533, "step": 6151 }, { "epoch": 0.1984872285551238, "grad_norm": 0.60546875, "learning_rate": 2.790189235393034e-05, "loss": 2.0039, "step": 6152 }, { "epoch": 0.19851949240892014, "grad_norm": 0.388671875, "learning_rate": 2.7901089550563494e-05, "loss": 2.0195, "step": 6153 }, { "epoch": 0.19855175626271648, "grad_norm": 0.498046875, "learning_rate": 2.790028660519071e-05, "loss": 2.0105, "step": 6154 }, { "epoch": 0.19858402011651285, "grad_norm": 0.41796875, "learning_rate": 2.789948351782083e-05, "loss": 2.0072, "step": 6155 }, { "epoch": 0.1986162839703092, "grad_norm": 0.458984375, "learning_rate": 2.789868028846268e-05, "loss": 2.0414, "step": 6156 }, { "epoch": 0.19864854782410554, "grad_norm": 0.470703125, "learning_rate": 2.7897876917125117e-05, "loss": 2.0044, "step": 6157 }, { "epoch": 0.1986808116779019, "grad_norm": 0.4140625, "learning_rate": 2.7897073403816978e-05, "loss": 2.0173, "step": 6158 }, { "epoch": 0.19871307553169823, "grad_norm": 0.4609375, "learning_rate": 2.7896269748547106e-05, "loss": 2.0147, "step": 6159 }, { "epoch": 0.19874533938549457, "grad_norm": 0.419921875, "learning_rate": 2.789546595132435e-05, "loss": 2.0199, "step": 6160 }, { "epoch": 0.19877760323929092, "grad_norm": 0.423828125, "learning_rate": 2.7894662012157554e-05, "loss": 2.0419, "step": 6161 }, { "epoch": 0.19880986709308726, "grad_norm": 0.416015625, "learning_rate": 2.7893857931055566e-05, "loss": 2.0263, "step": 6162 }, { "epoch": 0.1988421309468836, "grad_norm": 0.376953125, "learning_rate": 2.7893053708027245e-05, "loss": 1.9741, "step": 6163 }, { "epoch": 0.19887439480067995, "grad_norm": 0.421875, "learning_rate": 2.789224934308144e-05, "loss": 1.9998, "step": 6164 }, { "epoch": 0.19890665865447632, "grad_norm": 0.423828125, "learning_rate": 2.7891444836227e-05, "loss": 2.0267, "step": 6165 }, { "epoch": 0.19893892250827266, "grad_norm": 0.392578125, "learning_rate": 2.789064018747278e-05, "loss": 2.0141, "step": 6166 }, { "epoch": 0.198971186362069, "grad_norm": 0.396484375, "learning_rate": 2.788983539682765e-05, "loss": 2.0109, "step": 6167 }, { "epoch": 0.19900345021586535, "grad_norm": 0.39453125, "learning_rate": 2.7889030464300448e-05, "loss": 2.0357, "step": 6168 }, { "epoch": 0.1990357140696617, "grad_norm": 0.408203125, "learning_rate": 2.788822538990005e-05, "loss": 2.0545, "step": 6169 }, { "epoch": 0.19906797792345804, "grad_norm": 0.42578125, "learning_rate": 2.7887420173635315e-05, "loss": 2.0146, "step": 6170 }, { "epoch": 0.19910024177725438, "grad_norm": 0.40234375, "learning_rate": 2.7886614815515102e-05, "loss": 2.0415, "step": 6171 }, { "epoch": 0.19913250563105073, "grad_norm": 0.408203125, "learning_rate": 2.788580931554828e-05, "loss": 2.0239, "step": 6172 }, { "epoch": 0.19916476948484707, "grad_norm": 0.396484375, "learning_rate": 2.788500367374371e-05, "loss": 1.9651, "step": 6173 }, { "epoch": 0.19919703333864341, "grad_norm": 0.3984375, "learning_rate": 2.7884197890110266e-05, "loss": 1.9954, "step": 6174 }, { "epoch": 0.19922929719243979, "grad_norm": 0.40234375, "learning_rate": 2.788339196465681e-05, "loss": 2.0118, "step": 6175 }, { "epoch": 0.19926156104623613, "grad_norm": 0.4140625, "learning_rate": 2.7882585897392222e-05, "loss": 2.0554, "step": 6176 }, { "epoch": 0.19929382490003247, "grad_norm": 0.392578125, "learning_rate": 2.788177968832537e-05, "loss": 2.0291, "step": 6177 }, { "epoch": 0.19932608875382882, "grad_norm": 0.4765625, "learning_rate": 2.788097333746513e-05, "loss": 2.0431, "step": 6178 }, { "epoch": 0.19935835260762516, "grad_norm": 0.41015625, "learning_rate": 2.7880166844820373e-05, "loss": 2.0156, "step": 6179 }, { "epoch": 0.1993906164614215, "grad_norm": 0.408203125, "learning_rate": 2.7879360210399982e-05, "loss": 2.0103, "step": 6180 }, { "epoch": 0.19942288031521785, "grad_norm": 0.427734375, "learning_rate": 2.7878553434212832e-05, "loss": 2.004, "step": 6181 }, { "epoch": 0.1994551441690142, "grad_norm": 0.37890625, "learning_rate": 2.7877746516267806e-05, "loss": 2.0275, "step": 6182 }, { "epoch": 0.19948740802281054, "grad_norm": 0.69140625, "learning_rate": 2.787693945657378e-05, "loss": 2.0209, "step": 6183 }, { "epoch": 0.19951967187660688, "grad_norm": 0.439453125, "learning_rate": 2.787613225513965e-05, "loss": 1.9993, "step": 6184 }, { "epoch": 0.19955193573040325, "grad_norm": 0.39453125, "learning_rate": 2.7875324911974292e-05, "loss": 2.0422, "step": 6185 }, { "epoch": 0.1995841995841996, "grad_norm": 0.404296875, "learning_rate": 2.787451742708659e-05, "loss": 2.0246, "step": 6186 }, { "epoch": 0.19961646343799594, "grad_norm": 0.4140625, "learning_rate": 2.787370980048544e-05, "loss": 2.0209, "step": 6187 }, { "epoch": 0.19964872729179228, "grad_norm": 0.5, "learning_rate": 2.7872902032179724e-05, "loss": 2.0009, "step": 6188 }, { "epoch": 0.19968099114558863, "grad_norm": 0.462890625, "learning_rate": 2.787209412217834e-05, "loss": 1.9976, "step": 6189 }, { "epoch": 0.19971325499938497, "grad_norm": 0.39453125, "learning_rate": 2.7871286070490177e-05, "loss": 2.0171, "step": 6190 }, { "epoch": 0.19974551885318131, "grad_norm": 0.419921875, "learning_rate": 2.7870477877124136e-05, "loss": 2.0165, "step": 6191 }, { "epoch": 0.19977778270697766, "grad_norm": 0.478515625, "learning_rate": 2.78696695420891e-05, "loss": 2.045, "step": 6192 }, { "epoch": 0.199810046560774, "grad_norm": 0.498046875, "learning_rate": 2.786886106539398e-05, "loss": 1.9988, "step": 6193 }, { "epoch": 0.19984231041457035, "grad_norm": 0.421875, "learning_rate": 2.7868052447047666e-05, "loss": 2.0239, "step": 6194 }, { "epoch": 0.1998745742683667, "grad_norm": 0.40234375, "learning_rate": 2.7867243687059063e-05, "loss": 2.0533, "step": 6195 }, { "epoch": 0.19990683812216306, "grad_norm": 0.4453125, "learning_rate": 2.7866434785437075e-05, "loss": 2.0171, "step": 6196 }, { "epoch": 0.1999391019759594, "grad_norm": 0.408203125, "learning_rate": 2.78656257421906e-05, "loss": 2.0249, "step": 6197 }, { "epoch": 0.19997136582975575, "grad_norm": 0.37890625, "learning_rate": 2.7864816557328548e-05, "loss": 2.0182, "step": 6198 }, { "epoch": 0.2000036296835521, "grad_norm": 0.4140625, "learning_rate": 2.7864007230859825e-05, "loss": 2.0246, "step": 6199 }, { "epoch": 0.20003589353734844, "grad_norm": 0.36328125, "learning_rate": 2.7863197762793338e-05, "loss": 2.0128, "step": 6200 }, { "epoch": 0.20006815739114478, "grad_norm": 0.4296875, "learning_rate": 2.7862388153138e-05, "loss": 2.0414, "step": 6201 }, { "epoch": 0.20010042124494112, "grad_norm": 0.44921875, "learning_rate": 2.7861578401902718e-05, "loss": 2.0323, "step": 6202 }, { "epoch": 0.20013268509873747, "grad_norm": 0.380859375, "learning_rate": 2.7860768509096412e-05, "loss": 1.9844, "step": 6203 }, { "epoch": 0.2001649489525338, "grad_norm": 0.42578125, "learning_rate": 2.7859958474727987e-05, "loss": 2.0063, "step": 6204 }, { "epoch": 0.20019721280633015, "grad_norm": 0.44140625, "learning_rate": 2.7859148298806366e-05, "loss": 2.0082, "step": 6205 }, { "epoch": 0.20022947666012653, "grad_norm": 0.41796875, "learning_rate": 2.7858337981340465e-05, "loss": 2.0128, "step": 6206 }, { "epoch": 0.20026174051392287, "grad_norm": 0.396484375, "learning_rate": 2.7857527522339207e-05, "loss": 1.9886, "step": 6207 }, { "epoch": 0.20029400436771921, "grad_norm": 0.421875, "learning_rate": 2.7856716921811507e-05, "loss": 2.0282, "step": 6208 }, { "epoch": 0.20032626822151556, "grad_norm": 0.396484375, "learning_rate": 2.7855906179766296e-05, "loss": 2.0339, "step": 6209 }, { "epoch": 0.2003585320753119, "grad_norm": 0.4140625, "learning_rate": 2.7855095296212484e-05, "loss": 2.0086, "step": 6210 }, { "epoch": 0.20039079592910825, "grad_norm": 0.447265625, "learning_rate": 2.7854284271159012e-05, "loss": 2.0326, "step": 6211 }, { "epoch": 0.2004230597829046, "grad_norm": 0.421875, "learning_rate": 2.7853473104614803e-05, "loss": 2.0883, "step": 6212 }, { "epoch": 0.20045532363670093, "grad_norm": 0.404296875, "learning_rate": 2.7852661796588773e-05, "loss": 2.032, "step": 6213 }, { "epoch": 0.20048758749049728, "grad_norm": 0.40625, "learning_rate": 2.785185034708987e-05, "loss": 2.0185, "step": 6214 }, { "epoch": 0.20051985134429362, "grad_norm": 0.400390625, "learning_rate": 2.785103875612702e-05, "loss": 2.0086, "step": 6215 }, { "epoch": 0.20055211519809, "grad_norm": 0.404296875, "learning_rate": 2.7850227023709147e-05, "loss": 1.977, "step": 6216 }, { "epoch": 0.20058437905188634, "grad_norm": 0.41015625, "learning_rate": 2.78494151498452e-05, "loss": 2.0472, "step": 6217 }, { "epoch": 0.20061664290568268, "grad_norm": 0.4140625, "learning_rate": 2.7848603134544107e-05, "loss": 2.0114, "step": 6218 }, { "epoch": 0.20064890675947902, "grad_norm": 0.419921875, "learning_rate": 2.7847790977814804e-05, "loss": 2.0158, "step": 6219 }, { "epoch": 0.20068117061327537, "grad_norm": 0.458984375, "learning_rate": 2.784697867966624e-05, "loss": 2.0344, "step": 6220 }, { "epoch": 0.2007134344670717, "grad_norm": 0.388671875, "learning_rate": 2.784616624010735e-05, "loss": 2.0339, "step": 6221 }, { "epoch": 0.20074569832086805, "grad_norm": 0.416015625, "learning_rate": 2.7845353659147078e-05, "loss": 2.0367, "step": 6222 }, { "epoch": 0.2007779621746644, "grad_norm": 0.48046875, "learning_rate": 2.784454093679437e-05, "loss": 2.0259, "step": 6223 }, { "epoch": 0.20081022602846074, "grad_norm": 0.4453125, "learning_rate": 2.7843728073058166e-05, "loss": 2.0274, "step": 6224 }, { "epoch": 0.20084248988225709, "grad_norm": 0.43359375, "learning_rate": 2.784291506794742e-05, "loss": 2.0242, "step": 6225 }, { "epoch": 0.20087475373605346, "grad_norm": 0.400390625, "learning_rate": 2.7842101921471075e-05, "loss": 1.9987, "step": 6226 }, { "epoch": 0.2009070175898498, "grad_norm": 0.421875, "learning_rate": 2.7841288633638088e-05, "loss": 1.9954, "step": 6227 }, { "epoch": 0.20093928144364614, "grad_norm": 0.4140625, "learning_rate": 2.784047520445741e-05, "loss": 2.01, "step": 6228 }, { "epoch": 0.2009715452974425, "grad_norm": 0.42578125, "learning_rate": 2.7839661633937988e-05, "loss": 2.0351, "step": 6229 }, { "epoch": 0.20100380915123883, "grad_norm": 0.3984375, "learning_rate": 2.7838847922088784e-05, "loss": 2.0136, "step": 6230 }, { "epoch": 0.20103607300503518, "grad_norm": 0.458984375, "learning_rate": 2.783803406891875e-05, "loss": 2.0184, "step": 6231 }, { "epoch": 0.20106833685883152, "grad_norm": 0.4140625, "learning_rate": 2.7837220074436852e-05, "loss": 1.9807, "step": 6232 }, { "epoch": 0.20110060071262786, "grad_norm": 0.412109375, "learning_rate": 2.7836405938652046e-05, "loss": 2.0171, "step": 6233 }, { "epoch": 0.2011328645664242, "grad_norm": 0.40234375, "learning_rate": 2.7835591661573288e-05, "loss": 2.0257, "step": 6234 }, { "epoch": 0.20116512842022055, "grad_norm": 0.42578125, "learning_rate": 2.7834777243209546e-05, "loss": 2.0211, "step": 6235 }, { "epoch": 0.20119739227401692, "grad_norm": 0.43359375, "learning_rate": 2.783396268356978e-05, "loss": 2.01, "step": 6236 }, { "epoch": 0.20122965612781327, "grad_norm": 0.39453125, "learning_rate": 2.7833147982662964e-05, "loss": 2.0049, "step": 6237 }, { "epoch": 0.2012619199816096, "grad_norm": 0.384765625, "learning_rate": 2.783233314049806e-05, "loss": 2.0211, "step": 6238 }, { "epoch": 0.20129418383540595, "grad_norm": 0.404296875, "learning_rate": 2.7831518157084044e-05, "loss": 1.9944, "step": 6239 }, { "epoch": 0.2013264476892023, "grad_norm": 0.390625, "learning_rate": 2.7830703032429875e-05, "loss": 2.0056, "step": 6240 }, { "epoch": 0.20135871154299864, "grad_norm": 0.4609375, "learning_rate": 2.7829887766544533e-05, "loss": 2.0425, "step": 6241 }, { "epoch": 0.20139097539679499, "grad_norm": 0.62890625, "learning_rate": 2.7829072359436992e-05, "loss": 2.0371, "step": 6242 }, { "epoch": 0.20142323925059133, "grad_norm": 0.79296875, "learning_rate": 2.7828256811116225e-05, "loss": 2.0454, "step": 6243 }, { "epoch": 0.20145550310438767, "grad_norm": 0.671875, "learning_rate": 2.782744112159121e-05, "loss": 2.0279, "step": 6244 }, { "epoch": 0.20148776695818402, "grad_norm": 0.4140625, "learning_rate": 2.782662529087093e-05, "loss": 2.0338, "step": 6245 }, { "epoch": 0.2015200308119804, "grad_norm": 0.74609375, "learning_rate": 2.7825809318964353e-05, "loss": 2.0405, "step": 6246 }, { "epoch": 0.20155229466577673, "grad_norm": 0.5234375, "learning_rate": 2.7824993205880475e-05, "loss": 2.0339, "step": 6247 }, { "epoch": 0.20158455851957308, "grad_norm": 0.51171875, "learning_rate": 2.7824176951628263e-05, "loss": 2.025, "step": 6248 }, { "epoch": 0.20161682237336942, "grad_norm": 0.51953125, "learning_rate": 2.782336055621672e-05, "loss": 2.0051, "step": 6249 }, { "epoch": 0.20164908622716576, "grad_norm": 0.443359375, "learning_rate": 2.7822544019654817e-05, "loss": 2.0193, "step": 6250 }, { "epoch": 0.2016813500809621, "grad_norm": 0.482421875, "learning_rate": 2.7821727341951553e-05, "loss": 1.9935, "step": 6251 }, { "epoch": 0.20171361393475845, "grad_norm": 0.42578125, "learning_rate": 2.782091052311591e-05, "loss": 2.0077, "step": 6252 }, { "epoch": 0.2017458777885548, "grad_norm": 0.439453125, "learning_rate": 2.782009356315689e-05, "loss": 1.9863, "step": 6253 }, { "epoch": 0.20177814164235114, "grad_norm": 0.40234375, "learning_rate": 2.7819276462083467e-05, "loss": 1.9992, "step": 6254 }, { "epoch": 0.20181040549614748, "grad_norm": 0.404296875, "learning_rate": 2.781845921990465e-05, "loss": 2.0062, "step": 6255 }, { "epoch": 0.20184266934994385, "grad_norm": 0.373046875, "learning_rate": 2.7817641836629426e-05, "loss": 2.0014, "step": 6256 }, { "epoch": 0.2018749332037402, "grad_norm": 0.443359375, "learning_rate": 2.78168243122668e-05, "loss": 2.001, "step": 6257 }, { "epoch": 0.20190719705753654, "grad_norm": 0.408203125, "learning_rate": 2.7816006646825766e-05, "loss": 2.0073, "step": 6258 }, { "epoch": 0.20193946091133289, "grad_norm": 0.451171875, "learning_rate": 2.7815188840315324e-05, "loss": 2.0252, "step": 6259 }, { "epoch": 0.20197172476512923, "grad_norm": 0.4140625, "learning_rate": 2.7814370892744477e-05, "loss": 2.0227, "step": 6260 }, { "epoch": 0.20200398861892557, "grad_norm": 0.400390625, "learning_rate": 2.7813552804122227e-05, "loss": 2.038, "step": 6261 }, { "epoch": 0.20203625247272192, "grad_norm": 0.412109375, "learning_rate": 2.7812734574457584e-05, "loss": 1.9942, "step": 6262 }, { "epoch": 0.20206851632651826, "grad_norm": 0.447265625, "learning_rate": 2.7811916203759546e-05, "loss": 1.987, "step": 6263 }, { "epoch": 0.2021007801803146, "grad_norm": 0.421875, "learning_rate": 2.781109769203713e-05, "loss": 1.9965, "step": 6264 }, { "epoch": 0.20213304403411095, "grad_norm": 0.408203125, "learning_rate": 2.7810279039299337e-05, "loss": 2.0475, "step": 6265 }, { "epoch": 0.20216530788790732, "grad_norm": 0.443359375, "learning_rate": 2.7809460245555183e-05, "loss": 2.031, "step": 6266 }, { "epoch": 0.20219757174170366, "grad_norm": 0.4140625, "learning_rate": 2.7808641310813684e-05, "loss": 2.015, "step": 6267 }, { "epoch": 0.2022298355955, "grad_norm": 0.451171875, "learning_rate": 2.7807822235083846e-05, "loss": 1.99, "step": 6268 }, { "epoch": 0.20226209944929635, "grad_norm": 0.412109375, "learning_rate": 2.780700301837469e-05, "loss": 2.0402, "step": 6269 }, { "epoch": 0.2022943633030927, "grad_norm": 0.455078125, "learning_rate": 2.7806183660695235e-05, "loss": 2.0094, "step": 6270 }, { "epoch": 0.20232662715688904, "grad_norm": 0.4375, "learning_rate": 2.7805364162054494e-05, "loss": 2.0195, "step": 6271 }, { "epoch": 0.20235889101068538, "grad_norm": 0.412109375, "learning_rate": 2.780454452246149e-05, "loss": 1.9861, "step": 6272 }, { "epoch": 0.20239115486448173, "grad_norm": 0.453125, "learning_rate": 2.780372474192525e-05, "loss": 1.9997, "step": 6273 }, { "epoch": 0.20242341871827807, "grad_norm": 0.43359375, "learning_rate": 2.780290482045479e-05, "loss": 2.0276, "step": 6274 }, { "epoch": 0.2024556825720744, "grad_norm": 0.439453125, "learning_rate": 2.780208475805914e-05, "loss": 2.0222, "step": 6275 }, { "epoch": 0.20248794642587076, "grad_norm": 0.4375, "learning_rate": 2.7801264554747325e-05, "loss": 2.0289, "step": 6276 }, { "epoch": 0.20252021027966713, "grad_norm": 0.416015625, "learning_rate": 2.7800444210528378e-05, "loss": 2.0199, "step": 6277 }, { "epoch": 0.20255247413346347, "grad_norm": 0.419921875, "learning_rate": 2.7799623725411315e-05, "loss": 2.0316, "step": 6278 }, { "epoch": 0.20258473798725982, "grad_norm": 0.427734375, "learning_rate": 2.7798803099405185e-05, "loss": 2.0265, "step": 6279 }, { "epoch": 0.20261700184105616, "grad_norm": 0.423828125, "learning_rate": 2.7797982332519007e-05, "loss": 2.026, "step": 6280 }, { "epoch": 0.2026492656948525, "grad_norm": 0.42578125, "learning_rate": 2.779716142476182e-05, "loss": 2.007, "step": 6281 }, { "epoch": 0.20268152954864885, "grad_norm": 0.421875, "learning_rate": 2.779634037614267e-05, "loss": 1.9868, "step": 6282 }, { "epoch": 0.2027137934024452, "grad_norm": 0.4453125, "learning_rate": 2.779551918667057e-05, "loss": 2.0046, "step": 6283 }, { "epoch": 0.20274605725624154, "grad_norm": 0.431640625, "learning_rate": 2.7794697856354587e-05, "loss": 1.9967, "step": 6284 }, { "epoch": 0.20277832111003788, "grad_norm": 0.53125, "learning_rate": 2.7793876385203746e-05, "loss": 2.0155, "step": 6285 }, { "epoch": 0.20281058496383422, "grad_norm": 0.4609375, "learning_rate": 2.779305477322709e-05, "loss": 2.0306, "step": 6286 }, { "epoch": 0.2028428488176306, "grad_norm": 0.4453125, "learning_rate": 2.7792233020433665e-05, "loss": 2.0066, "step": 6287 }, { "epoch": 0.20287511267142694, "grad_norm": 0.45703125, "learning_rate": 2.7791411126832514e-05, "loss": 1.9843, "step": 6288 }, { "epoch": 0.20290737652522328, "grad_norm": 0.427734375, "learning_rate": 2.779058909243269e-05, "loss": 2.0113, "step": 6289 }, { "epoch": 0.20293964037901963, "grad_norm": 0.453125, "learning_rate": 2.7789766917243237e-05, "loss": 2.0225, "step": 6290 }, { "epoch": 0.20297190423281597, "grad_norm": 0.404296875, "learning_rate": 2.77889446012732e-05, "loss": 1.9929, "step": 6291 }, { "epoch": 0.2030041680866123, "grad_norm": 0.396484375, "learning_rate": 2.778812214453164e-05, "loss": 2.0323, "step": 6292 }, { "epoch": 0.20303643194040866, "grad_norm": 0.43359375, "learning_rate": 2.7787299547027604e-05, "loss": 2.0352, "step": 6293 }, { "epoch": 0.203068695794205, "grad_norm": 0.53515625, "learning_rate": 2.7786476808770148e-05, "loss": 2.019, "step": 6294 }, { "epoch": 0.20310095964800134, "grad_norm": 0.59765625, "learning_rate": 2.7785653929768326e-05, "loss": 2.0099, "step": 6295 }, { "epoch": 0.2031332235017977, "grad_norm": 0.58984375, "learning_rate": 2.7784830910031206e-05, "loss": 2.0137, "step": 6296 }, { "epoch": 0.20316548735559406, "grad_norm": 0.4140625, "learning_rate": 2.7784007749567833e-05, "loss": 2.0293, "step": 6297 }, { "epoch": 0.2031977512093904, "grad_norm": 0.54296875, "learning_rate": 2.7783184448387277e-05, "loss": 2.0033, "step": 6298 }, { "epoch": 0.20323001506318675, "grad_norm": 0.482421875, "learning_rate": 2.7782361006498593e-05, "loss": 2.0565, "step": 6299 }, { "epoch": 0.2032622789169831, "grad_norm": 0.40234375, "learning_rate": 2.778153742391085e-05, "loss": 2.0378, "step": 6300 }, { "epoch": 0.20329454277077944, "grad_norm": 0.478515625, "learning_rate": 2.7780713700633113e-05, "loss": 2.0289, "step": 6301 }, { "epoch": 0.20332680662457578, "grad_norm": 0.37890625, "learning_rate": 2.777988983667445e-05, "loss": 2.0097, "step": 6302 }, { "epoch": 0.20335907047837212, "grad_norm": 0.439453125, "learning_rate": 2.7779065832043927e-05, "loss": 2.0301, "step": 6303 }, { "epoch": 0.20339133433216847, "grad_norm": 0.44140625, "learning_rate": 2.7778241686750614e-05, "loss": 2.0336, "step": 6304 }, { "epoch": 0.2034235981859648, "grad_norm": 0.37890625, "learning_rate": 2.7777417400803586e-05, "loss": 2.0041, "step": 6305 }, { "epoch": 0.20345586203976115, "grad_norm": 0.4375, "learning_rate": 2.777659297421191e-05, "loss": 2.0174, "step": 6306 }, { "epoch": 0.20348812589355753, "grad_norm": 0.498046875, "learning_rate": 2.7775768406984666e-05, "loss": 2.0271, "step": 6307 }, { "epoch": 0.20352038974735387, "grad_norm": 0.412109375, "learning_rate": 2.7774943699130934e-05, "loss": 2.0178, "step": 6308 }, { "epoch": 0.2035526536011502, "grad_norm": 0.4453125, "learning_rate": 2.777411885065978e-05, "loss": 2.0195, "step": 6309 }, { "epoch": 0.20358491745494656, "grad_norm": 0.466796875, "learning_rate": 2.777329386158029e-05, "loss": 2.0479, "step": 6310 }, { "epoch": 0.2036171813087429, "grad_norm": 0.41796875, "learning_rate": 2.7772468731901547e-05, "loss": 2.0465, "step": 6311 }, { "epoch": 0.20364944516253924, "grad_norm": 0.421875, "learning_rate": 2.777164346163263e-05, "loss": 2.0112, "step": 6312 }, { "epoch": 0.2036817090163356, "grad_norm": 0.431640625, "learning_rate": 2.7770818050782628e-05, "loss": 2.0287, "step": 6313 }, { "epoch": 0.20371397287013193, "grad_norm": 0.3984375, "learning_rate": 2.776999249936062e-05, "loss": 2.0405, "step": 6314 }, { "epoch": 0.20374623672392828, "grad_norm": 0.439453125, "learning_rate": 2.7769166807375693e-05, "loss": 2.0367, "step": 6315 }, { "epoch": 0.20377850057772462, "grad_norm": 0.4140625, "learning_rate": 2.776834097483694e-05, "loss": 2.0233, "step": 6316 }, { "epoch": 0.203810764431521, "grad_norm": 0.4453125, "learning_rate": 2.776751500175345e-05, "loss": 1.9836, "step": 6317 }, { "epoch": 0.20384302828531733, "grad_norm": 0.466796875, "learning_rate": 2.776668888813431e-05, "loss": 2.0017, "step": 6318 }, { "epoch": 0.20387529213911368, "grad_norm": 0.408203125, "learning_rate": 2.7765862633988624e-05, "loss": 1.999, "step": 6319 }, { "epoch": 0.20390755599291002, "grad_norm": 0.474609375, "learning_rate": 2.776503623932548e-05, "loss": 2.0203, "step": 6320 }, { "epoch": 0.20393981984670637, "grad_norm": 0.462890625, "learning_rate": 2.7764209704153968e-05, "loss": 2.037, "step": 6321 }, { "epoch": 0.2039720837005027, "grad_norm": 0.3984375, "learning_rate": 2.7763383028483197e-05, "loss": 2.0465, "step": 6322 }, { "epoch": 0.20400434755429905, "grad_norm": 0.484375, "learning_rate": 2.7762556212322263e-05, "loss": 2.0576, "step": 6323 }, { "epoch": 0.2040366114080954, "grad_norm": 0.40234375, "learning_rate": 2.7761729255680268e-05, "loss": 2.0614, "step": 6324 }, { "epoch": 0.20406887526189174, "grad_norm": 0.44921875, "learning_rate": 2.776090215856631e-05, "loss": 2.0001, "step": 6325 }, { "epoch": 0.20410113911568808, "grad_norm": 0.48046875, "learning_rate": 2.7760074920989492e-05, "loss": 2.0328, "step": 6326 }, { "epoch": 0.20413340296948446, "grad_norm": 0.43359375, "learning_rate": 2.7759247542958933e-05, "loss": 2.0163, "step": 6327 }, { "epoch": 0.2041656668232808, "grad_norm": 0.4765625, "learning_rate": 2.775842002448372e-05, "loss": 2.0247, "step": 6328 }, { "epoch": 0.20419793067707714, "grad_norm": 0.5234375, "learning_rate": 2.7757592365572977e-05, "loss": 2.0175, "step": 6329 }, { "epoch": 0.2042301945308735, "grad_norm": 0.435546875, "learning_rate": 2.775676456623581e-05, "loss": 2.0199, "step": 6330 }, { "epoch": 0.20426245838466983, "grad_norm": 0.421875, "learning_rate": 2.7755936626481327e-05, "loss": 1.9896, "step": 6331 }, { "epoch": 0.20429472223846618, "grad_norm": 0.515625, "learning_rate": 2.775510854631865e-05, "loss": 2.0102, "step": 6332 }, { "epoch": 0.20432698609226252, "grad_norm": 0.42578125, "learning_rate": 2.7754280325756882e-05, "loss": 1.9951, "step": 6333 }, { "epoch": 0.20435924994605886, "grad_norm": 0.416015625, "learning_rate": 2.775345196480515e-05, "loss": 2.0241, "step": 6334 }, { "epoch": 0.2043915137998552, "grad_norm": 0.4375, "learning_rate": 2.775262346347257e-05, "loss": 2.0375, "step": 6335 }, { "epoch": 0.20442377765365155, "grad_norm": 0.40234375, "learning_rate": 2.7751794821768256e-05, "loss": 2.0123, "step": 6336 }, { "epoch": 0.20445604150744792, "grad_norm": 0.390625, "learning_rate": 2.7750966039701335e-05, "loss": 2.0317, "step": 6337 }, { "epoch": 0.20448830536124427, "grad_norm": 0.3828125, "learning_rate": 2.7750137117280926e-05, "loss": 2.0188, "step": 6338 }, { "epoch": 0.2045205692150406, "grad_norm": 0.423828125, "learning_rate": 2.7749308054516156e-05, "loss": 2.0257, "step": 6339 }, { "epoch": 0.20455283306883695, "grad_norm": 0.419921875, "learning_rate": 2.7748478851416146e-05, "loss": 2.0505, "step": 6340 }, { "epoch": 0.2045850969226333, "grad_norm": 0.3984375, "learning_rate": 2.774764950799003e-05, "loss": 2.0165, "step": 6341 }, { "epoch": 0.20461736077642964, "grad_norm": 0.38671875, "learning_rate": 2.7746820024246933e-05, "loss": 2.0388, "step": 6342 }, { "epoch": 0.20464962463022598, "grad_norm": 0.40234375, "learning_rate": 2.7745990400195984e-05, "loss": 1.9758, "step": 6343 }, { "epoch": 0.20468188848402233, "grad_norm": 0.380859375, "learning_rate": 2.7745160635846316e-05, "loss": 1.9981, "step": 6344 }, { "epoch": 0.20471415233781867, "grad_norm": 0.396484375, "learning_rate": 2.7744330731207063e-05, "loss": 2.0149, "step": 6345 }, { "epoch": 0.20474641619161502, "grad_norm": 0.41015625, "learning_rate": 2.7743500686287364e-05, "loss": 2.0149, "step": 6346 }, { "epoch": 0.2047786800454114, "grad_norm": 0.384765625, "learning_rate": 2.774267050109635e-05, "loss": 2.0288, "step": 6347 }, { "epoch": 0.20481094389920773, "grad_norm": 0.3828125, "learning_rate": 2.7741840175643156e-05, "loss": 1.9952, "step": 6348 }, { "epoch": 0.20484320775300408, "grad_norm": 0.37890625, "learning_rate": 2.774100970993693e-05, "loss": 2.0444, "step": 6349 }, { "epoch": 0.20487547160680042, "grad_norm": 0.373046875, "learning_rate": 2.774017910398681e-05, "loss": 2.0058, "step": 6350 }, { "epoch": 0.20490773546059676, "grad_norm": 0.392578125, "learning_rate": 2.773934835780194e-05, "loss": 2.0149, "step": 6351 }, { "epoch": 0.2049399993143931, "grad_norm": 0.38671875, "learning_rate": 2.773851747139146e-05, "loss": 1.9922, "step": 6352 }, { "epoch": 0.20497226316818945, "grad_norm": 0.408203125, "learning_rate": 2.7737686444764517e-05, "loss": 1.9816, "step": 6353 }, { "epoch": 0.2050045270219858, "grad_norm": 0.455078125, "learning_rate": 2.7736855277930262e-05, "loss": 2.0194, "step": 6354 }, { "epoch": 0.20503679087578214, "grad_norm": 0.431640625, "learning_rate": 2.7736023970897838e-05, "loss": 2.0092, "step": 6355 }, { "epoch": 0.20506905472957848, "grad_norm": 0.41015625, "learning_rate": 2.7735192523676405e-05, "loss": 2.0069, "step": 6356 }, { "epoch": 0.20510131858337485, "grad_norm": 0.41015625, "learning_rate": 2.7734360936275102e-05, "loss": 1.9725, "step": 6357 }, { "epoch": 0.2051335824371712, "grad_norm": 0.38671875, "learning_rate": 2.7733529208703095e-05, "loss": 2.0105, "step": 6358 }, { "epoch": 0.20516584629096754, "grad_norm": 0.421875, "learning_rate": 2.773269734096953e-05, "loss": 1.995, "step": 6359 }, { "epoch": 0.20519811014476388, "grad_norm": 0.4375, "learning_rate": 2.7731865333083566e-05, "loss": 2.0314, "step": 6360 }, { "epoch": 0.20523037399856023, "grad_norm": 0.435546875, "learning_rate": 2.7731033185054367e-05, "loss": 2.0199, "step": 6361 }, { "epoch": 0.20526263785235657, "grad_norm": 0.42578125, "learning_rate": 2.7730200896891083e-05, "loss": 2.027, "step": 6362 }, { "epoch": 0.20529490170615292, "grad_norm": 0.44140625, "learning_rate": 2.7729368468602884e-05, "loss": 2.0202, "step": 6363 }, { "epoch": 0.20532716555994926, "grad_norm": 0.431640625, "learning_rate": 2.7728535900198928e-05, "loss": 2.0616, "step": 6364 }, { "epoch": 0.2053594294137456, "grad_norm": 0.45703125, "learning_rate": 2.772770319168838e-05, "loss": 2.0163, "step": 6365 }, { "epoch": 0.20539169326754195, "grad_norm": 0.546875, "learning_rate": 2.7726870343080404e-05, "loss": 2.0309, "step": 6366 }, { "epoch": 0.2054239571213383, "grad_norm": 0.578125, "learning_rate": 2.7726037354384172e-05, "loss": 2.024, "step": 6367 }, { "epoch": 0.20545622097513466, "grad_norm": 0.53515625, "learning_rate": 2.772520422560885e-05, "loss": 2.0351, "step": 6368 }, { "epoch": 0.205488484828931, "grad_norm": 0.470703125, "learning_rate": 2.7724370956763605e-05, "loss": 2.0214, "step": 6369 }, { "epoch": 0.20552074868272735, "grad_norm": 0.458984375, "learning_rate": 2.7723537547857618e-05, "loss": 2.0538, "step": 6370 }, { "epoch": 0.2055530125365237, "grad_norm": 0.578125, "learning_rate": 2.7722703998900056e-05, "loss": 2.042, "step": 6371 }, { "epoch": 0.20558527639032004, "grad_norm": 0.515625, "learning_rate": 2.7721870309900096e-05, "loss": 2.0261, "step": 6372 }, { "epoch": 0.20561754024411638, "grad_norm": 0.49609375, "learning_rate": 2.7721036480866912e-05, "loss": 2.0148, "step": 6373 }, { "epoch": 0.20564980409791273, "grad_norm": 0.421875, "learning_rate": 2.772020251180969e-05, "loss": 2.0133, "step": 6374 }, { "epoch": 0.20568206795170907, "grad_norm": 0.4921875, "learning_rate": 2.7719368402737597e-05, "loss": 2.0368, "step": 6375 }, { "epoch": 0.2057143318055054, "grad_norm": 0.46875, "learning_rate": 2.7718534153659825e-05, "loss": 2.0258, "step": 6376 }, { "epoch": 0.20574659565930176, "grad_norm": 0.4453125, "learning_rate": 2.7717699764585557e-05, "loss": 2.0612, "step": 6377 }, { "epoch": 0.20577885951309813, "grad_norm": 0.439453125, "learning_rate": 2.771686523552397e-05, "loss": 2.0106, "step": 6378 }, { "epoch": 0.20581112336689447, "grad_norm": 0.408203125, "learning_rate": 2.771603056648425e-05, "loss": 2.0489, "step": 6379 }, { "epoch": 0.20584338722069082, "grad_norm": 0.44140625, "learning_rate": 2.771519575747559e-05, "loss": 2.0437, "step": 6380 }, { "epoch": 0.20587565107448716, "grad_norm": 0.41015625, "learning_rate": 2.771436080850718e-05, "loss": 2.0166, "step": 6381 }, { "epoch": 0.2059079149282835, "grad_norm": 0.458984375, "learning_rate": 2.7713525719588204e-05, "loss": 2.0341, "step": 6382 }, { "epoch": 0.20594017878207985, "grad_norm": 0.4375, "learning_rate": 2.771269049072786e-05, "loss": 2.0544, "step": 6383 }, { "epoch": 0.2059724426358762, "grad_norm": 0.431640625, "learning_rate": 2.7711855121935338e-05, "loss": 2.0319, "step": 6384 }, { "epoch": 0.20600470648967253, "grad_norm": 0.431640625, "learning_rate": 2.7711019613219834e-05, "loss": 2.0378, "step": 6385 }, { "epoch": 0.20603697034346888, "grad_norm": 0.44140625, "learning_rate": 2.7710183964590547e-05, "loss": 2.0192, "step": 6386 }, { "epoch": 0.20606923419726522, "grad_norm": 0.470703125, "learning_rate": 2.7709348176056673e-05, "loss": 2.0441, "step": 6387 }, { "epoch": 0.2061014980510616, "grad_norm": 0.390625, "learning_rate": 2.770851224762741e-05, "loss": 2.0431, "step": 6388 }, { "epoch": 0.20613376190485794, "grad_norm": 0.466796875, "learning_rate": 2.7707676179311962e-05, "loss": 1.9883, "step": 6389 }, { "epoch": 0.20616602575865428, "grad_norm": 0.3828125, "learning_rate": 2.7706839971119527e-05, "loss": 1.9969, "step": 6390 }, { "epoch": 0.20619828961245062, "grad_norm": 0.39453125, "learning_rate": 2.770600362305932e-05, "loss": 2.0429, "step": 6391 }, { "epoch": 0.20623055346624697, "grad_norm": 0.375, "learning_rate": 2.770516713514054e-05, "loss": 2.0263, "step": 6392 }, { "epoch": 0.2062628173200433, "grad_norm": 0.404296875, "learning_rate": 2.770433050737239e-05, "loss": 2.0151, "step": 6393 }, { "epoch": 0.20629508117383966, "grad_norm": 0.427734375, "learning_rate": 2.770349373976409e-05, "loss": 2.0357, "step": 6394 }, { "epoch": 0.206327345027636, "grad_norm": 0.392578125, "learning_rate": 2.7702656832324838e-05, "loss": 1.9847, "step": 6395 }, { "epoch": 0.20635960888143234, "grad_norm": 0.408203125, "learning_rate": 2.7701819785063852e-05, "loss": 2.057, "step": 6396 }, { "epoch": 0.2063918727352287, "grad_norm": 0.369140625, "learning_rate": 2.770098259799035e-05, "loss": 2.0019, "step": 6397 }, { "epoch": 0.20642413658902506, "grad_norm": 0.39453125, "learning_rate": 2.770014527111354e-05, "loss": 2.0124, "step": 6398 }, { "epoch": 0.2064564004428214, "grad_norm": 0.3671875, "learning_rate": 2.769930780444265e-05, "loss": 2.032, "step": 6399 }, { "epoch": 0.20648866429661775, "grad_norm": 0.388671875, "learning_rate": 2.769847019798688e-05, "loss": 2.0206, "step": 6400 }, { "epoch": 0.2065209281504141, "grad_norm": 0.38671875, "learning_rate": 2.769763245175547e-05, "loss": 2.0307, "step": 6401 }, { "epoch": 0.20655319200421043, "grad_norm": 0.36328125, "learning_rate": 2.7696794565757625e-05, "loss": 2.0026, "step": 6402 }, { "epoch": 0.20658545585800678, "grad_norm": 0.380859375, "learning_rate": 2.7695956540002572e-05, "loss": 2.0048, "step": 6403 }, { "epoch": 0.20661771971180312, "grad_norm": 0.43359375, "learning_rate": 2.769511837449954e-05, "loss": 2.0224, "step": 6404 }, { "epoch": 0.20664998356559947, "grad_norm": 0.50390625, "learning_rate": 2.7694280069257753e-05, "loss": 2.0072, "step": 6405 }, { "epoch": 0.2066822474193958, "grad_norm": 0.515625, "learning_rate": 2.7693441624286434e-05, "loss": 2.0071, "step": 6406 }, { "epoch": 0.20671451127319215, "grad_norm": 0.447265625, "learning_rate": 2.769260303959482e-05, "loss": 2.0048, "step": 6407 }, { "epoch": 0.20674677512698852, "grad_norm": 0.412109375, "learning_rate": 2.769176431519214e-05, "loss": 1.9941, "step": 6408 }, { "epoch": 0.20677903898078487, "grad_norm": 0.46875, "learning_rate": 2.7690925451087617e-05, "loss": 2.0346, "step": 6409 }, { "epoch": 0.2068113028345812, "grad_norm": 0.470703125, "learning_rate": 2.7690086447290496e-05, "loss": 2.0139, "step": 6410 }, { "epoch": 0.20684356668837756, "grad_norm": 0.40234375, "learning_rate": 2.768924730381e-05, "loss": 2.041, "step": 6411 }, { "epoch": 0.2068758305421739, "grad_norm": 0.404296875, "learning_rate": 2.7688408020655382e-05, "loss": 1.9981, "step": 6412 }, { "epoch": 0.20690809439597024, "grad_norm": 0.421875, "learning_rate": 2.7687568597835866e-05, "loss": 2.0429, "step": 6413 }, { "epoch": 0.2069403582497666, "grad_norm": 0.419921875, "learning_rate": 2.7686729035360695e-05, "loss": 1.9919, "step": 6414 }, { "epoch": 0.20697262210356293, "grad_norm": 0.416015625, "learning_rate": 2.768588933323912e-05, "loss": 2.0047, "step": 6415 }, { "epoch": 0.20700488595735927, "grad_norm": 0.45703125, "learning_rate": 2.768504949148037e-05, "loss": 2.0478, "step": 6416 }, { "epoch": 0.20703714981115562, "grad_norm": 0.419921875, "learning_rate": 2.7684209510093696e-05, "loss": 2.0113, "step": 6417 }, { "epoch": 0.207069413664952, "grad_norm": 0.400390625, "learning_rate": 2.7683369389088347e-05, "loss": 2.0568, "step": 6418 }, { "epoch": 0.20710167751874833, "grad_norm": 0.388671875, "learning_rate": 2.7682529128473564e-05, "loss": 2.0344, "step": 6419 }, { "epoch": 0.20713394137254468, "grad_norm": 0.39453125, "learning_rate": 2.7681688728258596e-05, "loss": 2.0179, "step": 6420 }, { "epoch": 0.20716620522634102, "grad_norm": 0.361328125, "learning_rate": 2.76808481884527e-05, "loss": 2.0571, "step": 6421 }, { "epoch": 0.20719846908013737, "grad_norm": 0.39453125, "learning_rate": 2.7680007509065123e-05, "loss": 1.9831, "step": 6422 }, { "epoch": 0.2072307329339337, "grad_norm": 0.376953125, "learning_rate": 2.7679166690105123e-05, "loss": 2.0282, "step": 6423 }, { "epoch": 0.20726299678773005, "grad_norm": 0.408203125, "learning_rate": 2.7678325731581944e-05, "loss": 2.0269, "step": 6424 }, { "epoch": 0.2072952606415264, "grad_norm": 0.439453125, "learning_rate": 2.7677484633504856e-05, "loss": 2.0225, "step": 6425 }, { "epoch": 0.20732752449532274, "grad_norm": 0.404296875, "learning_rate": 2.7676643395883114e-05, "loss": 1.9896, "step": 6426 }, { "epoch": 0.20735978834911908, "grad_norm": 0.390625, "learning_rate": 2.7675802018725976e-05, "loss": 2.0219, "step": 6427 }, { "epoch": 0.20739205220291546, "grad_norm": 0.416015625, "learning_rate": 2.76749605020427e-05, "loss": 2.0516, "step": 6428 }, { "epoch": 0.2074243160567118, "grad_norm": 0.41015625, "learning_rate": 2.767411884584255e-05, "loss": 2.0309, "step": 6429 }, { "epoch": 0.20745657991050814, "grad_norm": 0.376953125, "learning_rate": 2.7673277050134798e-05, "loss": 1.9854, "step": 6430 }, { "epoch": 0.2074888437643045, "grad_norm": 0.380859375, "learning_rate": 2.76724351149287e-05, "loss": 1.997, "step": 6431 }, { "epoch": 0.20752110761810083, "grad_norm": 0.392578125, "learning_rate": 2.7671593040233525e-05, "loss": 2.0317, "step": 6432 }, { "epoch": 0.20755337147189717, "grad_norm": 0.390625, "learning_rate": 2.767075082605855e-05, "loss": 2.0477, "step": 6433 }, { "epoch": 0.20758563532569352, "grad_norm": 0.388671875, "learning_rate": 2.7669908472413036e-05, "loss": 2.0191, "step": 6434 }, { "epoch": 0.20761789917948986, "grad_norm": 0.37109375, "learning_rate": 2.766906597930626e-05, "loss": 1.9771, "step": 6435 }, { "epoch": 0.2076501630332862, "grad_norm": 0.396484375, "learning_rate": 2.7668223346747496e-05, "loss": 2.0286, "step": 6436 }, { "epoch": 0.20768242688708255, "grad_norm": 0.41015625, "learning_rate": 2.7667380574746015e-05, "loss": 2.0355, "step": 6437 }, { "epoch": 0.20771469074087892, "grad_norm": 0.3828125, "learning_rate": 2.76665376633111e-05, "loss": 2.0194, "step": 6438 }, { "epoch": 0.20774695459467526, "grad_norm": 0.3828125, "learning_rate": 2.766569461245202e-05, "loss": 2.0376, "step": 6439 }, { "epoch": 0.2077792184484716, "grad_norm": 0.4140625, "learning_rate": 2.7664851422178063e-05, "loss": 2.0212, "step": 6440 }, { "epoch": 0.20781148230226795, "grad_norm": 0.400390625, "learning_rate": 2.7664008092498505e-05, "loss": 2.037, "step": 6441 }, { "epoch": 0.2078437461560643, "grad_norm": 0.451171875, "learning_rate": 2.7663164623422634e-05, "loss": 2.0183, "step": 6442 }, { "epoch": 0.20787601000986064, "grad_norm": 0.4296875, "learning_rate": 2.766232101495973e-05, "loss": 2.0187, "step": 6443 }, { "epoch": 0.20790827386365698, "grad_norm": 0.42578125, "learning_rate": 2.766147726711908e-05, "loss": 2.0095, "step": 6444 }, { "epoch": 0.20794053771745333, "grad_norm": 0.44921875, "learning_rate": 2.7660633379909972e-05, "loss": 2.0262, "step": 6445 }, { "epoch": 0.20797280157124967, "grad_norm": 0.373046875, "learning_rate": 2.7659789353341697e-05, "loss": 1.9814, "step": 6446 }, { "epoch": 0.20800506542504602, "grad_norm": 0.41796875, "learning_rate": 2.7658945187423536e-05, "loss": 1.9959, "step": 6447 }, { "epoch": 0.2080373292788424, "grad_norm": 0.40234375, "learning_rate": 2.7658100882164793e-05, "loss": 1.9898, "step": 6448 }, { "epoch": 0.20806959313263873, "grad_norm": 0.458984375, "learning_rate": 2.7657256437574754e-05, "loss": 2.0004, "step": 6449 }, { "epoch": 0.20810185698643507, "grad_norm": 0.52734375, "learning_rate": 2.765641185366272e-05, "loss": 2.0193, "step": 6450 }, { "epoch": 0.20813412084023142, "grad_norm": 0.4453125, "learning_rate": 2.7655567130437985e-05, "loss": 2.0228, "step": 6451 }, { "epoch": 0.20816638469402776, "grad_norm": 0.400390625, "learning_rate": 2.765472226790984e-05, "loss": 2.012, "step": 6452 }, { "epoch": 0.2081986485478241, "grad_norm": 0.51171875, "learning_rate": 2.7653877266087594e-05, "loss": 2.0419, "step": 6453 }, { "epoch": 0.20823091240162045, "grad_norm": 0.4375, "learning_rate": 2.7653032124980547e-05, "loss": 2.0346, "step": 6454 }, { "epoch": 0.2082631762554168, "grad_norm": 0.4765625, "learning_rate": 2.7652186844597993e-05, "loss": 2.0212, "step": 6455 }, { "epoch": 0.20829544010921314, "grad_norm": 0.396484375, "learning_rate": 2.765134142494925e-05, "loss": 2.0301, "step": 6456 }, { "epoch": 0.20832770396300948, "grad_norm": 0.404296875, "learning_rate": 2.765049586604361e-05, "loss": 1.9809, "step": 6457 }, { "epoch": 0.20835996781680582, "grad_norm": 0.400390625, "learning_rate": 2.764965016789039e-05, "loss": 2.0083, "step": 6458 }, { "epoch": 0.2083922316706022, "grad_norm": 0.384765625, "learning_rate": 2.76488043304989e-05, "loss": 2.0414, "step": 6459 }, { "epoch": 0.20842449552439854, "grad_norm": 0.443359375, "learning_rate": 2.764795835387844e-05, "loss": 1.9913, "step": 6460 }, { "epoch": 0.20845675937819488, "grad_norm": 0.390625, "learning_rate": 2.7647112238038333e-05, "loss": 2.0193, "step": 6461 }, { "epoch": 0.20848902323199123, "grad_norm": 0.396484375, "learning_rate": 2.7646265982987886e-05, "loss": 1.9948, "step": 6462 }, { "epoch": 0.20852128708578757, "grad_norm": 0.412109375, "learning_rate": 2.7645419588736416e-05, "loss": 1.9973, "step": 6463 }, { "epoch": 0.20855355093958391, "grad_norm": 0.484375, "learning_rate": 2.7644573055293236e-05, "loss": 2.0004, "step": 6464 }, { "epoch": 0.20858581479338026, "grad_norm": 0.470703125, "learning_rate": 2.7643726382667668e-05, "loss": 2.031, "step": 6465 }, { "epoch": 0.2086180786471766, "grad_norm": 0.40625, "learning_rate": 2.7642879570869034e-05, "loss": 2.0569, "step": 6466 }, { "epoch": 0.20865034250097295, "grad_norm": 0.45703125, "learning_rate": 2.7642032619906648e-05, "loss": 2.0476, "step": 6467 }, { "epoch": 0.2086826063547693, "grad_norm": 0.412109375, "learning_rate": 2.7641185529789836e-05, "loss": 2.0019, "step": 6468 }, { "epoch": 0.20871487020856566, "grad_norm": 0.423828125, "learning_rate": 2.764033830052792e-05, "loss": 2.0023, "step": 6469 }, { "epoch": 0.208747134062362, "grad_norm": 0.4609375, "learning_rate": 2.7639490932130236e-05, "loss": 2.0133, "step": 6470 }, { "epoch": 0.20877939791615835, "grad_norm": 0.400390625, "learning_rate": 2.7638643424606094e-05, "loss": 2.0181, "step": 6471 }, { "epoch": 0.2088116617699547, "grad_norm": 0.478515625, "learning_rate": 2.763779577796484e-05, "loss": 2.0304, "step": 6472 }, { "epoch": 0.20884392562375104, "grad_norm": 0.45703125, "learning_rate": 2.7636947992215795e-05, "loss": 2.0497, "step": 6473 }, { "epoch": 0.20887618947754738, "grad_norm": 0.484375, "learning_rate": 2.763610006736829e-05, "loss": 1.9916, "step": 6474 }, { "epoch": 0.20890845333134372, "grad_norm": 0.451171875, "learning_rate": 2.763525200343166e-05, "loss": 2.0087, "step": 6475 }, { "epoch": 0.20894071718514007, "grad_norm": 0.4375, "learning_rate": 2.763440380041524e-05, "loss": 2.0028, "step": 6476 }, { "epoch": 0.2089729810389364, "grad_norm": 0.458984375, "learning_rate": 2.7633555458328372e-05, "loss": 1.9967, "step": 6477 }, { "epoch": 0.20900524489273276, "grad_norm": 0.41015625, "learning_rate": 2.7632706977180387e-05, "loss": 1.982, "step": 6478 }, { "epoch": 0.20903750874652913, "grad_norm": 0.42578125, "learning_rate": 2.763185835698062e-05, "loss": 2.0189, "step": 6479 }, { "epoch": 0.20906977260032547, "grad_norm": 0.421875, "learning_rate": 2.7631009597738427e-05, "loss": 1.9961, "step": 6480 }, { "epoch": 0.20910203645412181, "grad_norm": 0.447265625, "learning_rate": 2.7630160699463134e-05, "loss": 1.9504, "step": 6481 }, { "epoch": 0.20913430030791816, "grad_norm": 0.453125, "learning_rate": 2.76293116621641e-05, "loss": 2.0001, "step": 6482 }, { "epoch": 0.2091665641617145, "grad_norm": 0.3828125, "learning_rate": 2.762846248585066e-05, "loss": 2.0133, "step": 6483 }, { "epoch": 0.20919882801551085, "grad_norm": 0.4453125, "learning_rate": 2.7627613170532166e-05, "loss": 2.013, "step": 6484 }, { "epoch": 0.2092310918693072, "grad_norm": 0.4375, "learning_rate": 2.7626763716217958e-05, "loss": 2.0108, "step": 6485 }, { "epoch": 0.20926335572310353, "grad_norm": 0.41796875, "learning_rate": 2.76259141229174e-05, "loss": 2.0306, "step": 6486 }, { "epoch": 0.20929561957689988, "grad_norm": 0.51171875, "learning_rate": 2.7625064390639837e-05, "loss": 2.0334, "step": 6487 }, { "epoch": 0.20932788343069622, "grad_norm": 0.455078125, "learning_rate": 2.7624214519394624e-05, "loss": 1.9529, "step": 6488 }, { "epoch": 0.2093601472844926, "grad_norm": 0.4609375, "learning_rate": 2.762336450919111e-05, "loss": 1.9754, "step": 6489 }, { "epoch": 0.20939241113828894, "grad_norm": 0.416015625, "learning_rate": 2.762251436003866e-05, "loss": 2.0192, "step": 6490 }, { "epoch": 0.20942467499208528, "grad_norm": 0.4296875, "learning_rate": 2.762166407194662e-05, "loss": 2.0213, "step": 6491 }, { "epoch": 0.20945693884588162, "grad_norm": 0.404296875, "learning_rate": 2.762081364492436e-05, "loss": 1.9798, "step": 6492 }, { "epoch": 0.20948920269967797, "grad_norm": 0.431640625, "learning_rate": 2.761996307898124e-05, "loss": 2.0158, "step": 6493 }, { "epoch": 0.2095214665534743, "grad_norm": 0.412109375, "learning_rate": 2.761911237412662e-05, "loss": 1.9826, "step": 6494 }, { "epoch": 0.20955373040727066, "grad_norm": 0.453125, "learning_rate": 2.7618261530369862e-05, "loss": 2.0049, "step": 6495 }, { "epoch": 0.209585994261067, "grad_norm": 0.48046875, "learning_rate": 2.7617410547720335e-05, "loss": 1.99, "step": 6496 }, { "epoch": 0.20961825811486334, "grad_norm": 0.447265625, "learning_rate": 2.7616559426187403e-05, "loss": 2.0172, "step": 6497 }, { "epoch": 0.2096505219686597, "grad_norm": 0.4296875, "learning_rate": 2.7615708165780434e-05, "loss": 1.9945, "step": 6498 }, { "epoch": 0.20968278582245606, "grad_norm": 0.427734375, "learning_rate": 2.7614856766508803e-05, "loss": 2.0116, "step": 6499 }, { "epoch": 0.2097150496762524, "grad_norm": 0.421875, "learning_rate": 2.761400522838188e-05, "loss": 2.0447, "step": 6500 }, { "epoch": 0.20974731353004875, "grad_norm": 0.40625, "learning_rate": 2.7613153551409033e-05, "loss": 2.0211, "step": 6501 }, { "epoch": 0.2097795773838451, "grad_norm": 0.412109375, "learning_rate": 2.7612301735599643e-05, "loss": 1.9603, "step": 6502 }, { "epoch": 0.20981184123764143, "grad_norm": 0.41796875, "learning_rate": 2.7611449780963082e-05, "loss": 2.0043, "step": 6503 }, { "epoch": 0.20984410509143778, "grad_norm": 0.39453125, "learning_rate": 2.761059768750873e-05, "loss": 1.9898, "step": 6504 }, { "epoch": 0.20987636894523412, "grad_norm": 0.47265625, "learning_rate": 2.7609745455245963e-05, "loss": 2.0251, "step": 6505 }, { "epoch": 0.20990863279903046, "grad_norm": 0.427734375, "learning_rate": 2.7608893084184165e-05, "loss": 1.9918, "step": 6506 }, { "epoch": 0.2099408966528268, "grad_norm": 0.46484375, "learning_rate": 2.760804057433272e-05, "loss": 2.0319, "step": 6507 }, { "epoch": 0.20997316050662315, "grad_norm": 0.4609375, "learning_rate": 2.7607187925701007e-05, "loss": 1.9874, "step": 6508 }, { "epoch": 0.21000542436041952, "grad_norm": 0.431640625, "learning_rate": 2.7606335138298416e-05, "loss": 1.9693, "step": 6509 }, { "epoch": 0.21003768821421587, "grad_norm": 0.515625, "learning_rate": 2.7605482212134332e-05, "loss": 2.0085, "step": 6510 }, { "epoch": 0.2100699520680122, "grad_norm": 0.62890625, "learning_rate": 2.760462914721814e-05, "loss": 2.0039, "step": 6511 }, { "epoch": 0.21010221592180855, "grad_norm": 0.56640625, "learning_rate": 2.7603775943559233e-05, "loss": 2.0188, "step": 6512 }, { "epoch": 0.2101344797756049, "grad_norm": 0.390625, "learning_rate": 2.7602922601167e-05, "loss": 1.995, "step": 6513 }, { "epoch": 0.21016674362940124, "grad_norm": 0.5078125, "learning_rate": 2.760206912005084e-05, "loss": 2.0154, "step": 6514 }, { "epoch": 0.2101990074831976, "grad_norm": 0.41796875, "learning_rate": 2.7601215500220146e-05, "loss": 2.0317, "step": 6515 }, { "epoch": 0.21023127133699393, "grad_norm": 0.435546875, "learning_rate": 2.7600361741684313e-05, "loss": 1.9878, "step": 6516 }, { "epoch": 0.21026353519079027, "grad_norm": 0.447265625, "learning_rate": 2.759950784445273e-05, "loss": 2.0364, "step": 6517 }, { "epoch": 0.21029579904458662, "grad_norm": 0.390625, "learning_rate": 2.759865380853481e-05, "loss": 2.0203, "step": 6518 }, { "epoch": 0.210328062898383, "grad_norm": 0.474609375, "learning_rate": 2.759779963393994e-05, "loss": 2.011, "step": 6519 }, { "epoch": 0.21036032675217933, "grad_norm": 0.443359375, "learning_rate": 2.7596945320677536e-05, "loss": 1.9824, "step": 6520 }, { "epoch": 0.21039259060597568, "grad_norm": 0.400390625, "learning_rate": 2.7596090868756992e-05, "loss": 2.026, "step": 6521 }, { "epoch": 0.21042485445977202, "grad_norm": 0.44140625, "learning_rate": 2.759523627818772e-05, "loss": 2.0197, "step": 6522 }, { "epoch": 0.21045711831356836, "grad_norm": 0.48046875, "learning_rate": 2.759438154897912e-05, "loss": 2.0257, "step": 6523 }, { "epoch": 0.2104893821673647, "grad_norm": 0.51171875, "learning_rate": 2.75935266811406e-05, "loss": 2.0018, "step": 6524 }, { "epoch": 0.21052164602116105, "grad_norm": 0.4453125, "learning_rate": 2.7592671674681578e-05, "loss": 2.0215, "step": 6525 }, { "epoch": 0.2105539098749574, "grad_norm": 0.447265625, "learning_rate": 2.7591816529611453e-05, "loss": 2.0133, "step": 6526 }, { "epoch": 0.21058617372875374, "grad_norm": 0.44921875, "learning_rate": 2.7590961245939652e-05, "loss": 1.9734, "step": 6527 }, { "epoch": 0.21061843758255008, "grad_norm": 0.38671875, "learning_rate": 2.759010582367558e-05, "loss": 1.9978, "step": 6528 }, { "epoch": 0.21065070143634645, "grad_norm": 0.46484375, "learning_rate": 2.7589250262828652e-05, "loss": 1.9861, "step": 6529 }, { "epoch": 0.2106829652901428, "grad_norm": 0.392578125, "learning_rate": 2.7588394563408293e-05, "loss": 2.0343, "step": 6530 }, { "epoch": 0.21071522914393914, "grad_norm": 0.4375, "learning_rate": 2.758753872542392e-05, "loss": 2.0233, "step": 6531 }, { "epoch": 0.21074749299773549, "grad_norm": 0.390625, "learning_rate": 2.7586682748884945e-05, "loss": 2.0075, "step": 6532 }, { "epoch": 0.21077975685153183, "grad_norm": 0.408203125, "learning_rate": 2.7585826633800796e-05, "loss": 2.0485, "step": 6533 }, { "epoch": 0.21081202070532817, "grad_norm": 0.40625, "learning_rate": 2.75849703801809e-05, "loss": 1.9982, "step": 6534 }, { "epoch": 0.21084428455912452, "grad_norm": 0.41796875, "learning_rate": 2.7584113988034678e-05, "loss": 2.0117, "step": 6535 }, { "epoch": 0.21087654841292086, "grad_norm": 0.447265625, "learning_rate": 2.7583257457371557e-05, "loss": 1.9932, "step": 6536 }, { "epoch": 0.2109088122667172, "grad_norm": 0.392578125, "learning_rate": 2.758240078820096e-05, "loss": 1.9959, "step": 6537 }, { "epoch": 0.21094107612051355, "grad_norm": 0.43359375, "learning_rate": 2.7581543980532327e-05, "loss": 2.0157, "step": 6538 }, { "epoch": 0.21097333997430992, "grad_norm": 0.404296875, "learning_rate": 2.7580687034375083e-05, "loss": 2.0069, "step": 6539 }, { "epoch": 0.21100560382810626, "grad_norm": 0.404296875, "learning_rate": 2.7579829949738663e-05, "loss": 2.0225, "step": 6540 }, { "epoch": 0.2110378676819026, "grad_norm": 0.375, "learning_rate": 2.7578972726632497e-05, "loss": 1.9922, "step": 6541 }, { "epoch": 0.21107013153569895, "grad_norm": 0.416015625, "learning_rate": 2.7578115365066024e-05, "loss": 2.0229, "step": 6542 }, { "epoch": 0.2111023953894953, "grad_norm": 0.375, "learning_rate": 2.757725786504868e-05, "loss": 2.0264, "step": 6543 }, { "epoch": 0.21113465924329164, "grad_norm": 0.421875, "learning_rate": 2.7576400226589905e-05, "loss": 2.0009, "step": 6544 }, { "epoch": 0.21116692309708798, "grad_norm": 0.40234375, "learning_rate": 2.757554244969914e-05, "loss": 1.9957, "step": 6545 }, { "epoch": 0.21119918695088433, "grad_norm": 0.390625, "learning_rate": 2.7574684534385824e-05, "loss": 2.0556, "step": 6546 }, { "epoch": 0.21123145080468067, "grad_norm": 0.404296875, "learning_rate": 2.7573826480659398e-05, "loss": 2.0351, "step": 6547 }, { "epoch": 0.21126371465847701, "grad_norm": 0.39453125, "learning_rate": 2.757296828852932e-05, "loss": 1.9994, "step": 6548 }, { "epoch": 0.21129597851227336, "grad_norm": 0.4140625, "learning_rate": 2.7572109958005015e-05, "loss": 1.9924, "step": 6549 }, { "epoch": 0.21132824236606973, "grad_norm": 0.423828125, "learning_rate": 2.757125148909595e-05, "loss": 2.0261, "step": 6550 }, { "epoch": 0.21136050621986607, "grad_norm": 0.376953125, "learning_rate": 2.7570392881811565e-05, "loss": 2.0139, "step": 6551 }, { "epoch": 0.21139277007366242, "grad_norm": 0.404296875, "learning_rate": 2.7569534136161316e-05, "loss": 2.0243, "step": 6552 }, { "epoch": 0.21142503392745876, "grad_norm": 0.4140625, "learning_rate": 2.7568675252154655e-05, "loss": 2.0072, "step": 6553 }, { "epoch": 0.2114572977812551, "grad_norm": 0.41015625, "learning_rate": 2.7567816229801026e-05, "loss": 2.0273, "step": 6554 }, { "epoch": 0.21148956163505145, "grad_norm": 0.39453125, "learning_rate": 2.7566957069109897e-05, "loss": 2.0107, "step": 6555 }, { "epoch": 0.2115218254888478, "grad_norm": 0.431640625, "learning_rate": 2.756609777009072e-05, "loss": 1.9851, "step": 6556 }, { "epoch": 0.21155408934264414, "grad_norm": 0.400390625, "learning_rate": 2.756523833275295e-05, "loss": 1.9568, "step": 6557 }, { "epoch": 0.21158635319644048, "grad_norm": 0.41015625, "learning_rate": 2.756437875710605e-05, "loss": 1.9974, "step": 6558 }, { "epoch": 0.21161861705023682, "grad_norm": 0.42578125, "learning_rate": 2.756351904315949e-05, "loss": 1.9765, "step": 6559 }, { "epoch": 0.2116508809040332, "grad_norm": 0.357421875, "learning_rate": 2.7562659190922718e-05, "loss": 1.9967, "step": 6560 }, { "epoch": 0.21168314475782954, "grad_norm": 0.42578125, "learning_rate": 2.756179920040521e-05, "loss": 1.9889, "step": 6561 }, { "epoch": 0.21171540861162588, "grad_norm": 0.390625, "learning_rate": 2.7560939071616425e-05, "loss": 2.0201, "step": 6562 }, { "epoch": 0.21174767246542223, "grad_norm": 0.44921875, "learning_rate": 2.7560078804565836e-05, "loss": 2.0034, "step": 6563 }, { "epoch": 0.21177993631921857, "grad_norm": 0.45703125, "learning_rate": 2.7559218399262906e-05, "loss": 2.0244, "step": 6564 }, { "epoch": 0.21181220017301491, "grad_norm": 0.462890625, "learning_rate": 2.7558357855717115e-05, "loss": 1.9829, "step": 6565 }, { "epoch": 0.21184446402681126, "grad_norm": 0.515625, "learning_rate": 2.7557497173937928e-05, "loss": 2.023, "step": 6566 }, { "epoch": 0.2118767278806076, "grad_norm": 0.44921875, "learning_rate": 2.7556636353934823e-05, "loss": 2.0128, "step": 6567 }, { "epoch": 0.21190899173440395, "grad_norm": 0.404296875, "learning_rate": 2.7555775395717272e-05, "loss": 1.9861, "step": 6568 }, { "epoch": 0.2119412555882003, "grad_norm": 0.40234375, "learning_rate": 2.7554914299294753e-05, "loss": 1.9847, "step": 6569 }, { "epoch": 0.21197351944199666, "grad_norm": 0.45703125, "learning_rate": 2.755405306467674e-05, "loss": 2.0285, "step": 6570 }, { "epoch": 0.212005783295793, "grad_norm": 0.462890625, "learning_rate": 2.755319169187272e-05, "loss": 1.9965, "step": 6571 }, { "epoch": 0.21203804714958935, "grad_norm": 0.380859375, "learning_rate": 2.7552330180892176e-05, "loss": 1.9804, "step": 6572 }, { "epoch": 0.2120703110033857, "grad_norm": 0.458984375, "learning_rate": 2.755146853174458e-05, "loss": 1.9667, "step": 6573 }, { "epoch": 0.21210257485718204, "grad_norm": 0.466796875, "learning_rate": 2.755060674443943e-05, "loss": 2.0002, "step": 6574 }, { "epoch": 0.21213483871097838, "grad_norm": 0.44921875, "learning_rate": 2.7549744818986197e-05, "loss": 2.0094, "step": 6575 }, { "epoch": 0.21216710256477472, "grad_norm": 0.400390625, "learning_rate": 2.754888275539438e-05, "loss": 2.0343, "step": 6576 }, { "epoch": 0.21219936641857107, "grad_norm": 0.408203125, "learning_rate": 2.754802055367346e-05, "loss": 2.0019, "step": 6577 }, { "epoch": 0.2122316302723674, "grad_norm": 0.421875, "learning_rate": 2.754715821383294e-05, "loss": 2.0508, "step": 6578 }, { "epoch": 0.21226389412616375, "grad_norm": 0.42578125, "learning_rate": 2.7546295735882293e-05, "loss": 1.9937, "step": 6579 }, { "epoch": 0.21229615797996013, "grad_norm": 0.390625, "learning_rate": 2.754543311983103e-05, "loss": 1.9782, "step": 6580 }, { "epoch": 0.21232842183375647, "grad_norm": 0.400390625, "learning_rate": 2.754457036568864e-05, "loss": 1.9747, "step": 6581 }, { "epoch": 0.2123606856875528, "grad_norm": 0.390625, "learning_rate": 2.7543707473464615e-05, "loss": 1.9935, "step": 6582 }, { "epoch": 0.21239294954134916, "grad_norm": 0.412109375, "learning_rate": 2.754284444316846e-05, "loss": 1.9756, "step": 6583 }, { "epoch": 0.2124252133951455, "grad_norm": 0.431640625, "learning_rate": 2.7541981274809675e-05, "loss": 1.9949, "step": 6584 }, { "epoch": 0.21245747724894185, "grad_norm": 0.431640625, "learning_rate": 2.754111796839775e-05, "loss": 1.9708, "step": 6585 }, { "epoch": 0.2124897411027382, "grad_norm": 0.38671875, "learning_rate": 2.75402545239422e-05, "loss": 1.9826, "step": 6586 }, { "epoch": 0.21252200495653453, "grad_norm": 0.45703125, "learning_rate": 2.753939094145252e-05, "loss": 1.9936, "step": 6587 }, { "epoch": 0.21255426881033088, "grad_norm": 0.5625, "learning_rate": 2.7538527220938225e-05, "loss": 1.9904, "step": 6588 }, { "epoch": 0.21258653266412722, "grad_norm": 0.5078125, "learning_rate": 2.7537663362408818e-05, "loss": 1.9879, "step": 6589 }, { "epoch": 0.2126187965179236, "grad_norm": 0.443359375, "learning_rate": 2.7536799365873806e-05, "loss": 1.9688, "step": 6590 }, { "epoch": 0.21265106037171994, "grad_norm": 0.4375, "learning_rate": 2.7535935231342696e-05, "loss": 2.0063, "step": 6591 }, { "epoch": 0.21268332422551628, "grad_norm": 0.498046875, "learning_rate": 2.7535070958825007e-05, "loss": 1.9662, "step": 6592 }, { "epoch": 0.21271558807931262, "grad_norm": 0.486328125, "learning_rate": 2.7534206548330252e-05, "loss": 1.9681, "step": 6593 }, { "epoch": 0.21274785193310897, "grad_norm": 0.39453125, "learning_rate": 2.7533341999867942e-05, "loss": 1.9302, "step": 6594 }, { "epoch": 0.2127801157869053, "grad_norm": 0.48046875, "learning_rate": 2.7532477313447593e-05, "loss": 1.9199, "step": 6595 }, { "epoch": 0.21281237964070165, "grad_norm": 0.46484375, "learning_rate": 2.7531612489078726e-05, "loss": 1.9862, "step": 6596 }, { "epoch": 0.212844643494498, "grad_norm": 0.453125, "learning_rate": 2.753074752677086e-05, "loss": 1.9328, "step": 6597 }, { "epoch": 0.21287690734829434, "grad_norm": 0.482421875, "learning_rate": 2.752988242653351e-05, "loss": 1.9313, "step": 6598 }, { "epoch": 0.21290917120209069, "grad_norm": 0.46484375, "learning_rate": 2.7529017188376207e-05, "loss": 1.9852, "step": 6599 }, { "epoch": 0.21294143505588706, "grad_norm": 0.443359375, "learning_rate": 2.752815181230847e-05, "loss": 1.9927, "step": 6600 }, { "epoch": 0.2129736989096834, "grad_norm": 0.51171875, "learning_rate": 2.752728629833983e-05, "loss": 2.0189, "step": 6601 }, { "epoch": 0.21300596276347974, "grad_norm": 0.42578125, "learning_rate": 2.75264206464798e-05, "loss": 1.9805, "step": 6602 }, { "epoch": 0.2130382266172761, "grad_norm": 0.4375, "learning_rate": 2.752555485673793e-05, "loss": 1.9407, "step": 6603 }, { "epoch": 0.21307049047107243, "grad_norm": 0.494140625, "learning_rate": 2.752468892912373e-05, "loss": 2.0109, "step": 6604 }, { "epoch": 0.21310275432486878, "grad_norm": 0.40625, "learning_rate": 2.7523822863646742e-05, "loss": 2.0145, "step": 6605 }, { "epoch": 0.21313501817866512, "grad_norm": 0.46875, "learning_rate": 2.752295666031649e-05, "loss": 2.0241, "step": 6606 }, { "epoch": 0.21316728203246146, "grad_norm": 0.3984375, "learning_rate": 2.752209031914252e-05, "loss": 2.0162, "step": 6607 }, { "epoch": 0.2131995458862578, "grad_norm": 0.43359375, "learning_rate": 2.752122384013437e-05, "loss": 2.0326, "step": 6608 }, { "epoch": 0.21323180974005415, "grad_norm": 0.42578125, "learning_rate": 2.752035722330156e-05, "loss": 1.9739, "step": 6609 }, { "epoch": 0.21326407359385052, "grad_norm": 0.408203125, "learning_rate": 2.7519490468653645e-05, "loss": 2.0118, "step": 6610 }, { "epoch": 0.21329633744764687, "grad_norm": 0.390625, "learning_rate": 2.7518623576200157e-05, "loss": 1.9919, "step": 6611 }, { "epoch": 0.2133286013014432, "grad_norm": 0.400390625, "learning_rate": 2.7517756545950643e-05, "loss": 2.0287, "step": 6612 }, { "epoch": 0.21336086515523955, "grad_norm": 0.38671875, "learning_rate": 2.751688937791465e-05, "loss": 2.0251, "step": 6613 }, { "epoch": 0.2133931290090359, "grad_norm": 0.40625, "learning_rate": 2.751602207210171e-05, "loss": 1.9989, "step": 6614 }, { "epoch": 0.21342539286283224, "grad_norm": 0.443359375, "learning_rate": 2.7515154628521382e-05, "loss": 2.0036, "step": 6615 }, { "epoch": 0.21345765671662859, "grad_norm": 0.38671875, "learning_rate": 2.751428704718321e-05, "loss": 2.0344, "step": 6616 }, { "epoch": 0.21348992057042493, "grad_norm": 0.421875, "learning_rate": 2.7513419328096747e-05, "loss": 2.0288, "step": 6617 }, { "epoch": 0.21352218442422127, "grad_norm": 0.388671875, "learning_rate": 2.7512551471271538e-05, "loss": 2.0257, "step": 6618 }, { "epoch": 0.21355444827801762, "grad_norm": 0.44921875, "learning_rate": 2.751168347671714e-05, "loss": 2.0455, "step": 6619 }, { "epoch": 0.213586712131814, "grad_norm": 0.42578125, "learning_rate": 2.7510815344443104e-05, "loss": 2.0312, "step": 6620 }, { "epoch": 0.21361897598561033, "grad_norm": 0.392578125, "learning_rate": 2.7509947074458988e-05, "loss": 2.0103, "step": 6621 }, { "epoch": 0.21365123983940668, "grad_norm": 0.392578125, "learning_rate": 2.7509078666774348e-05, "loss": 2.0223, "step": 6622 }, { "epoch": 0.21368350369320302, "grad_norm": 0.416015625, "learning_rate": 2.750821012139875e-05, "loss": 1.9933, "step": 6623 }, { "epoch": 0.21371576754699936, "grad_norm": 0.427734375, "learning_rate": 2.750734143834174e-05, "loss": 2.0108, "step": 6624 }, { "epoch": 0.2137480314007957, "grad_norm": 0.412109375, "learning_rate": 2.7506472617612896e-05, "loss": 2.0009, "step": 6625 }, { "epoch": 0.21378029525459205, "grad_norm": 0.375, "learning_rate": 2.7505603659221773e-05, "loss": 1.9901, "step": 6626 }, { "epoch": 0.2138125591083884, "grad_norm": 0.41015625, "learning_rate": 2.7504734563177933e-05, "loss": 2.0036, "step": 6627 }, { "epoch": 0.21384482296218474, "grad_norm": 0.3828125, "learning_rate": 2.7503865329490945e-05, "loss": 2.0002, "step": 6628 }, { "epoch": 0.21387708681598108, "grad_norm": 0.390625, "learning_rate": 2.750299595817038e-05, "loss": 2.006, "step": 6629 }, { "epoch": 0.21390935066977743, "grad_norm": 0.4375, "learning_rate": 2.7502126449225807e-05, "loss": 2.0276, "step": 6630 }, { "epoch": 0.2139416145235738, "grad_norm": 0.44140625, "learning_rate": 2.7501256802666788e-05, "loss": 2.0062, "step": 6631 }, { "epoch": 0.21397387837737014, "grad_norm": 0.40234375, "learning_rate": 2.750038701850291e-05, "loss": 1.9639, "step": 6632 }, { "epoch": 0.21400614223116649, "grad_norm": 0.431640625, "learning_rate": 2.7499517096743732e-05, "loss": 2.0058, "step": 6633 }, { "epoch": 0.21403840608496283, "grad_norm": 0.484375, "learning_rate": 2.7498647037398842e-05, "loss": 1.994, "step": 6634 }, { "epoch": 0.21407066993875917, "grad_norm": 0.515625, "learning_rate": 2.7497776840477814e-05, "loss": 2.0068, "step": 6635 }, { "epoch": 0.21410293379255552, "grad_norm": 0.5, "learning_rate": 2.7496906505990222e-05, "loss": 2.0129, "step": 6636 }, { "epoch": 0.21413519764635186, "grad_norm": 0.37109375, "learning_rate": 2.7496036033945646e-05, "loss": 1.9949, "step": 6637 }, { "epoch": 0.2141674615001482, "grad_norm": 0.490234375, "learning_rate": 2.7495165424353672e-05, "loss": 2.0283, "step": 6638 }, { "epoch": 0.21419972535394455, "grad_norm": 0.3984375, "learning_rate": 2.7494294677223884e-05, "loss": 1.9998, "step": 6639 }, { "epoch": 0.2142319892077409, "grad_norm": 0.4453125, "learning_rate": 2.749342379256586e-05, "loss": 1.9693, "step": 6640 }, { "epoch": 0.21426425306153726, "grad_norm": 0.3984375, "learning_rate": 2.7492552770389192e-05, "loss": 2.0017, "step": 6641 }, { "epoch": 0.2142965169153336, "grad_norm": 0.41015625, "learning_rate": 2.7491681610703458e-05, "loss": 1.9936, "step": 6642 }, { "epoch": 0.21432878076912995, "grad_norm": 0.40234375, "learning_rate": 2.7490810313518263e-05, "loss": 1.9989, "step": 6643 }, { "epoch": 0.2143610446229263, "grad_norm": 0.38671875, "learning_rate": 2.7489938878843185e-05, "loss": 1.9622, "step": 6644 }, { "epoch": 0.21439330847672264, "grad_norm": 0.400390625, "learning_rate": 2.7489067306687822e-05, "loss": 1.9978, "step": 6645 }, { "epoch": 0.21442557233051898, "grad_norm": 0.390625, "learning_rate": 2.7488195597061766e-05, "loss": 2.0033, "step": 6646 }, { "epoch": 0.21445783618431533, "grad_norm": 0.4921875, "learning_rate": 2.7487323749974606e-05, "loss": 2.0153, "step": 6647 }, { "epoch": 0.21449010003811167, "grad_norm": 0.455078125, "learning_rate": 2.748645176543595e-05, "loss": 2.0202, "step": 6648 }, { "epoch": 0.214522363891908, "grad_norm": 0.3984375, "learning_rate": 2.748557964345539e-05, "loss": 2.0167, "step": 6649 }, { "epoch": 0.21455462774570436, "grad_norm": 0.45703125, "learning_rate": 2.7484707384042526e-05, "loss": 2.0157, "step": 6650 }, { "epoch": 0.21458689159950073, "grad_norm": 0.52734375, "learning_rate": 2.748383498720696e-05, "loss": 2.0212, "step": 6651 }, { "epoch": 0.21461915545329707, "grad_norm": 0.5625, "learning_rate": 2.7482962452958292e-05, "loss": 1.995, "step": 6652 }, { "epoch": 0.21465141930709342, "grad_norm": 0.451171875, "learning_rate": 2.748208978130613e-05, "loss": 2.0121, "step": 6653 }, { "epoch": 0.21468368316088976, "grad_norm": 0.47265625, "learning_rate": 2.7481216972260078e-05, "loss": 1.9862, "step": 6654 }, { "epoch": 0.2147159470146861, "grad_norm": 0.51171875, "learning_rate": 2.7480344025829747e-05, "loss": 2.0129, "step": 6655 }, { "epoch": 0.21474821086848245, "grad_norm": 0.453125, "learning_rate": 2.747947094202474e-05, "loss": 2.0062, "step": 6656 }, { "epoch": 0.2147804747222788, "grad_norm": 0.474609375, "learning_rate": 2.7478597720854667e-05, "loss": 2.0128, "step": 6657 }, { "epoch": 0.21481273857607514, "grad_norm": 0.55859375, "learning_rate": 2.7477724362329143e-05, "loss": 2.0206, "step": 6658 }, { "epoch": 0.21484500242987148, "grad_norm": 0.51171875, "learning_rate": 2.7476850866457782e-05, "loss": 2.0227, "step": 6659 }, { "epoch": 0.21487726628366782, "grad_norm": 0.53515625, "learning_rate": 2.74759772332502e-05, "loss": 2.0337, "step": 6660 }, { "epoch": 0.2149095301374642, "grad_norm": 0.546875, "learning_rate": 2.747510346271601e-05, "loss": 2.0143, "step": 6661 }, { "epoch": 0.21494179399126054, "grad_norm": 0.470703125, "learning_rate": 2.747422955486483e-05, "loss": 2.0227, "step": 6662 }, { "epoch": 0.21497405784505688, "grad_norm": 0.494140625, "learning_rate": 2.7473355509706278e-05, "loss": 1.9872, "step": 6663 }, { "epoch": 0.21500632169885323, "grad_norm": 0.478515625, "learning_rate": 2.7472481327249976e-05, "loss": 1.9944, "step": 6664 }, { "epoch": 0.21503858555264957, "grad_norm": 0.494140625, "learning_rate": 2.747160700750555e-05, "loss": 1.9776, "step": 6665 }, { "epoch": 0.2150708494064459, "grad_norm": 0.42578125, "learning_rate": 2.747073255048262e-05, "loss": 2.0197, "step": 6666 }, { "epoch": 0.21510311326024226, "grad_norm": 0.451171875, "learning_rate": 2.7469857956190817e-05, "loss": 2.0155, "step": 6667 }, { "epoch": 0.2151353771140386, "grad_norm": 0.421875, "learning_rate": 2.746898322463976e-05, "loss": 2.0188, "step": 6668 }, { "epoch": 0.21516764096783494, "grad_norm": 0.474609375, "learning_rate": 2.746810835583908e-05, "loss": 2.028, "step": 6669 }, { "epoch": 0.2151999048216313, "grad_norm": 0.39453125, "learning_rate": 2.7467233349798407e-05, "loss": 2.0074, "step": 6670 }, { "epoch": 0.21523216867542766, "grad_norm": 0.4296875, "learning_rate": 2.7466358206527375e-05, "loss": 2.0086, "step": 6671 }, { "epoch": 0.215264432529224, "grad_norm": 0.3984375, "learning_rate": 2.7465482926035616e-05, "loss": 1.9956, "step": 6672 }, { "epoch": 0.21529669638302035, "grad_norm": 0.427734375, "learning_rate": 2.746460750833276e-05, "loss": 2.0028, "step": 6673 }, { "epoch": 0.2153289602368167, "grad_norm": 0.474609375, "learning_rate": 2.7463731953428448e-05, "loss": 2.0332, "step": 6674 }, { "epoch": 0.21536122409061303, "grad_norm": 0.3828125, "learning_rate": 2.7462856261332318e-05, "loss": 2.0082, "step": 6675 }, { "epoch": 0.21539348794440938, "grad_norm": 0.392578125, "learning_rate": 2.7461980432054008e-05, "loss": 1.9882, "step": 6676 }, { "epoch": 0.21542575179820572, "grad_norm": 0.390625, "learning_rate": 2.7461104465603153e-05, "loss": 1.9481, "step": 6677 }, { "epoch": 0.21545801565200207, "grad_norm": 0.404296875, "learning_rate": 2.7460228361989405e-05, "loss": 1.9515, "step": 6678 }, { "epoch": 0.2154902795057984, "grad_norm": 0.4375, "learning_rate": 2.74593521212224e-05, "loss": 1.9561, "step": 6679 }, { "epoch": 0.21552254335959475, "grad_norm": 0.42578125, "learning_rate": 2.7458475743311787e-05, "loss": 2.0373, "step": 6680 }, { "epoch": 0.21555480721339113, "grad_norm": 0.380859375, "learning_rate": 2.7457599228267207e-05, "loss": 2.0274, "step": 6681 }, { "epoch": 0.21558707106718747, "grad_norm": 0.4296875, "learning_rate": 2.7456722576098313e-05, "loss": 2.028, "step": 6682 }, { "epoch": 0.2156193349209838, "grad_norm": 0.404296875, "learning_rate": 2.745584578681475e-05, "loss": 2.0009, "step": 6683 }, { "epoch": 0.21565159877478016, "grad_norm": 0.376953125, "learning_rate": 2.7454968860426178e-05, "loss": 2.0145, "step": 6684 }, { "epoch": 0.2156838626285765, "grad_norm": 0.396484375, "learning_rate": 2.7454091796942242e-05, "loss": 1.9989, "step": 6685 }, { "epoch": 0.21571612648237284, "grad_norm": 0.400390625, "learning_rate": 2.74532145963726e-05, "loss": 2.0341, "step": 6686 }, { "epoch": 0.2157483903361692, "grad_norm": 0.37109375, "learning_rate": 2.74523372587269e-05, "loss": 2.0023, "step": 6687 }, { "epoch": 0.21578065418996553, "grad_norm": 0.392578125, "learning_rate": 2.7451459784014808e-05, "loss": 2.0046, "step": 6688 }, { "epoch": 0.21581291804376188, "grad_norm": 0.365234375, "learning_rate": 2.7450582172245978e-05, "loss": 2.0228, "step": 6689 }, { "epoch": 0.21584518189755822, "grad_norm": 0.37890625, "learning_rate": 2.7449704423430076e-05, "loss": 2.004, "step": 6690 }, { "epoch": 0.2158774457513546, "grad_norm": 0.373046875, "learning_rate": 2.7448826537576756e-05, "loss": 2.003, "step": 6691 }, { "epoch": 0.21590970960515093, "grad_norm": 0.421875, "learning_rate": 2.744794851469568e-05, "loss": 2.0015, "step": 6692 }, { "epoch": 0.21594197345894728, "grad_norm": 0.431640625, "learning_rate": 2.7447070354796523e-05, "loss": 2.0097, "step": 6693 }, { "epoch": 0.21597423731274362, "grad_norm": 0.412109375, "learning_rate": 2.7446192057888945e-05, "loss": 2.0112, "step": 6694 }, { "epoch": 0.21600650116653997, "grad_norm": 0.50390625, "learning_rate": 2.7445313623982615e-05, "loss": 2.0126, "step": 6695 }, { "epoch": 0.2160387650203363, "grad_norm": 0.490234375, "learning_rate": 2.7444435053087194e-05, "loss": 2.0038, "step": 6696 }, { "epoch": 0.21607102887413265, "grad_norm": 0.486328125, "learning_rate": 2.7443556345212363e-05, "loss": 2.0105, "step": 6697 }, { "epoch": 0.216103292727929, "grad_norm": 0.4140625, "learning_rate": 2.744267750036779e-05, "loss": 2.0309, "step": 6698 }, { "epoch": 0.21613555658172534, "grad_norm": 0.42578125, "learning_rate": 2.7441798518563155e-05, "loss": 1.9949, "step": 6699 }, { "epoch": 0.21616782043552168, "grad_norm": 0.50390625, "learning_rate": 2.7440919399808122e-05, "loss": 2.0276, "step": 6700 }, { "epoch": 0.21620008428931806, "grad_norm": 0.412109375, "learning_rate": 2.744004014411237e-05, "loss": 1.9903, "step": 6701 }, { "epoch": 0.2162323481431144, "grad_norm": 0.466796875, "learning_rate": 2.7439160751485585e-05, "loss": 2.0027, "step": 6702 }, { "epoch": 0.21626461199691074, "grad_norm": 0.458984375, "learning_rate": 2.7438281221937448e-05, "loss": 1.9896, "step": 6703 }, { "epoch": 0.2162968758507071, "grad_norm": 0.416015625, "learning_rate": 2.7437401555477626e-05, "loss": 2.0526, "step": 6704 }, { "epoch": 0.21632913970450343, "grad_norm": 0.4140625, "learning_rate": 2.7436521752115812e-05, "loss": 2.0189, "step": 6705 }, { "epoch": 0.21636140355829978, "grad_norm": 0.39453125, "learning_rate": 2.743564181186169e-05, "loss": 1.9899, "step": 6706 }, { "epoch": 0.21639366741209612, "grad_norm": 0.41015625, "learning_rate": 2.743476173472494e-05, "loss": 2.0066, "step": 6707 }, { "epoch": 0.21642593126589246, "grad_norm": 0.412109375, "learning_rate": 2.743388152071526e-05, "loss": 2.0382, "step": 6708 }, { "epoch": 0.2164581951196888, "grad_norm": 0.396484375, "learning_rate": 2.743300116984233e-05, "loss": 2.0529, "step": 6709 }, { "epoch": 0.21649045897348515, "grad_norm": 0.412109375, "learning_rate": 2.743212068211584e-05, "loss": 2.0104, "step": 6710 }, { "epoch": 0.21652272282728152, "grad_norm": 0.390625, "learning_rate": 2.7431240057545486e-05, "loss": 1.9916, "step": 6711 }, { "epoch": 0.21655498668107787, "grad_norm": 0.408203125, "learning_rate": 2.743035929614096e-05, "loss": 2.0004, "step": 6712 }, { "epoch": 0.2165872505348742, "grad_norm": 0.43359375, "learning_rate": 2.7429478397911957e-05, "loss": 2.006, "step": 6713 }, { "epoch": 0.21661951438867055, "grad_norm": 0.41796875, "learning_rate": 2.7428597362868168e-05, "loss": 2.0078, "step": 6714 }, { "epoch": 0.2166517782424669, "grad_norm": 0.470703125, "learning_rate": 2.74277161910193e-05, "loss": 2.0341, "step": 6715 }, { "epoch": 0.21668404209626324, "grad_norm": 0.427734375, "learning_rate": 2.742683488237505e-05, "loss": 1.9868, "step": 6716 }, { "epoch": 0.21671630595005958, "grad_norm": 0.408203125, "learning_rate": 2.7425953436945113e-05, "loss": 2.0025, "step": 6717 }, { "epoch": 0.21674856980385593, "grad_norm": 0.4375, "learning_rate": 2.7425071854739195e-05, "loss": 1.9924, "step": 6718 }, { "epoch": 0.21678083365765227, "grad_norm": 0.52734375, "learning_rate": 2.7424190135767e-05, "loss": 2.0576, "step": 6719 }, { "epoch": 0.21681309751144862, "grad_norm": 0.455078125, "learning_rate": 2.7423308280038234e-05, "loss": 2.0307, "step": 6720 }, { "epoch": 0.21684536136524496, "grad_norm": 0.412109375, "learning_rate": 2.74224262875626e-05, "loss": 2.0128, "step": 6721 }, { "epoch": 0.21687762521904133, "grad_norm": 0.47265625, "learning_rate": 2.7421544158349813e-05, "loss": 1.9872, "step": 6722 }, { "epoch": 0.21690988907283767, "grad_norm": 0.40625, "learning_rate": 2.742066189240958e-05, "loss": 2.0277, "step": 6723 }, { "epoch": 0.21694215292663402, "grad_norm": 0.455078125, "learning_rate": 2.7419779489751605e-05, "loss": 2.0085, "step": 6724 }, { "epoch": 0.21697441678043036, "grad_norm": 0.478515625, "learning_rate": 2.741889695038561e-05, "loss": 2.0376, "step": 6725 }, { "epoch": 0.2170066806342267, "grad_norm": 0.41015625, "learning_rate": 2.741801427432131e-05, "loss": 1.9675, "step": 6726 }, { "epoch": 0.21703894448802305, "grad_norm": 0.427734375, "learning_rate": 2.7417131461568416e-05, "loss": 1.9912, "step": 6727 }, { "epoch": 0.2170712083418194, "grad_norm": 0.4453125, "learning_rate": 2.7416248512136647e-05, "loss": 2.011, "step": 6728 }, { "epoch": 0.21710347219561574, "grad_norm": 0.412109375, "learning_rate": 2.741536542603572e-05, "loss": 1.9989, "step": 6729 }, { "epoch": 0.21713573604941208, "grad_norm": 0.455078125, "learning_rate": 2.741448220327536e-05, "loss": 2.0139, "step": 6730 }, { "epoch": 0.21716799990320843, "grad_norm": 0.462890625, "learning_rate": 2.741359884386528e-05, "loss": 2.0262, "step": 6731 }, { "epoch": 0.2172002637570048, "grad_norm": 0.400390625, "learning_rate": 2.7412715347815215e-05, "loss": 2.0192, "step": 6732 }, { "epoch": 0.21723252761080114, "grad_norm": 0.421875, "learning_rate": 2.7411831715134883e-05, "loss": 2.0186, "step": 6733 }, { "epoch": 0.21726479146459748, "grad_norm": 0.419921875, "learning_rate": 2.741094794583401e-05, "loss": 2.0305, "step": 6734 }, { "epoch": 0.21729705531839383, "grad_norm": 0.419921875, "learning_rate": 2.741006403992233e-05, "loss": 2.0009, "step": 6735 }, { "epoch": 0.21732931917219017, "grad_norm": 0.384765625, "learning_rate": 2.7409179997409564e-05, "loss": 1.983, "step": 6736 }, { "epoch": 0.21736158302598652, "grad_norm": 0.388671875, "learning_rate": 2.7408295818305448e-05, "loss": 1.9967, "step": 6737 }, { "epoch": 0.21739384687978286, "grad_norm": 0.384765625, "learning_rate": 2.7407411502619713e-05, "loss": 2.0105, "step": 6738 }, { "epoch": 0.2174261107335792, "grad_norm": 0.373046875, "learning_rate": 2.7406527050362092e-05, "loss": 1.9853, "step": 6739 }, { "epoch": 0.21745837458737555, "grad_norm": 0.396484375, "learning_rate": 2.7405642461542326e-05, "loss": 2.0287, "step": 6740 }, { "epoch": 0.2174906384411719, "grad_norm": 0.408203125, "learning_rate": 2.7404757736170142e-05, "loss": 2.0064, "step": 6741 }, { "epoch": 0.21752290229496826, "grad_norm": 0.384765625, "learning_rate": 2.740387287425529e-05, "loss": 2.0201, "step": 6742 }, { "epoch": 0.2175551661487646, "grad_norm": 0.404296875, "learning_rate": 2.7402987875807496e-05, "loss": 2.0055, "step": 6743 }, { "epoch": 0.21758743000256095, "grad_norm": 0.369140625, "learning_rate": 2.740210274083652e-05, "loss": 2.0398, "step": 6744 }, { "epoch": 0.2176196938563573, "grad_norm": 0.390625, "learning_rate": 2.7401217469352087e-05, "loss": 2.0006, "step": 6745 }, { "epoch": 0.21765195771015364, "grad_norm": 0.392578125, "learning_rate": 2.7400332061363946e-05, "loss": 2.0335, "step": 6746 }, { "epoch": 0.21768422156394998, "grad_norm": 0.423828125, "learning_rate": 2.739944651688185e-05, "loss": 2.0362, "step": 6747 }, { "epoch": 0.21771648541774632, "grad_norm": 0.396484375, "learning_rate": 2.739856083591554e-05, "loss": 2.0316, "step": 6748 }, { "epoch": 0.21774874927154267, "grad_norm": 0.380859375, "learning_rate": 2.739767501847477e-05, "loss": 2.0132, "step": 6749 }, { "epoch": 0.217781013125339, "grad_norm": 0.423828125, "learning_rate": 2.7396789064569286e-05, "loss": 2.0249, "step": 6750 }, { "epoch": 0.21781327697913536, "grad_norm": 0.453125, "learning_rate": 2.7395902974208844e-05, "loss": 2.0176, "step": 6751 }, { "epoch": 0.21784554083293173, "grad_norm": 0.416015625, "learning_rate": 2.7395016747403192e-05, "loss": 2.0168, "step": 6752 }, { "epoch": 0.21787780468672807, "grad_norm": 0.419921875, "learning_rate": 2.7394130384162087e-05, "loss": 1.9776, "step": 6753 }, { "epoch": 0.21791006854052442, "grad_norm": 0.375, "learning_rate": 2.7393243884495288e-05, "loss": 1.9693, "step": 6754 }, { "epoch": 0.21794233239432076, "grad_norm": 0.431640625, "learning_rate": 2.7392357248412552e-05, "loss": 2.0422, "step": 6755 }, { "epoch": 0.2179745962481171, "grad_norm": 0.4140625, "learning_rate": 2.7391470475923633e-05, "loss": 1.9974, "step": 6756 }, { "epoch": 0.21800686010191345, "grad_norm": 0.40625, "learning_rate": 2.73905835670383e-05, "loss": 2.021, "step": 6757 }, { "epoch": 0.2180391239557098, "grad_norm": 0.3671875, "learning_rate": 2.738969652176631e-05, "loss": 2.0448, "step": 6758 }, { "epoch": 0.21807138780950613, "grad_norm": 0.41796875, "learning_rate": 2.7388809340117432e-05, "loss": 2.0221, "step": 6759 }, { "epoch": 0.21810365166330248, "grad_norm": 0.37890625, "learning_rate": 2.738792202210143e-05, "loss": 2.0091, "step": 6760 }, { "epoch": 0.21813591551709882, "grad_norm": 0.43359375, "learning_rate": 2.7387034567728065e-05, "loss": 2.0365, "step": 6761 }, { "epoch": 0.2181681793708952, "grad_norm": 0.3828125, "learning_rate": 2.7386146977007115e-05, "loss": 1.9349, "step": 6762 }, { "epoch": 0.21820044322469154, "grad_norm": 0.384765625, "learning_rate": 2.7385259249948338e-05, "loss": 1.9444, "step": 6763 }, { "epoch": 0.21823270707848788, "grad_norm": 0.388671875, "learning_rate": 2.7384371386561516e-05, "loss": 2.0197, "step": 6764 }, { "epoch": 0.21826497093228422, "grad_norm": 0.39453125, "learning_rate": 2.738348338685642e-05, "loss": 2.0156, "step": 6765 }, { "epoch": 0.21829723478608057, "grad_norm": 0.38671875, "learning_rate": 2.7382595250842818e-05, "loss": 2.0323, "step": 6766 }, { "epoch": 0.2183294986398769, "grad_norm": 0.39453125, "learning_rate": 2.7381706978530495e-05, "loss": 1.9803, "step": 6767 }, { "epoch": 0.21836176249367326, "grad_norm": 0.416015625, "learning_rate": 2.7380818569929222e-05, "loss": 1.993, "step": 6768 }, { "epoch": 0.2183940263474696, "grad_norm": 0.46484375, "learning_rate": 2.7379930025048784e-05, "loss": 2.0299, "step": 6769 }, { "epoch": 0.21842629020126594, "grad_norm": 0.4140625, "learning_rate": 2.7379041343898953e-05, "loss": 2.0072, "step": 6770 }, { "epoch": 0.2184585540550623, "grad_norm": 0.39453125, "learning_rate": 2.7378152526489514e-05, "loss": 2.0433, "step": 6771 }, { "epoch": 0.21849081790885866, "grad_norm": 0.40234375, "learning_rate": 2.7377263572830257e-05, "loss": 2.0045, "step": 6772 }, { "epoch": 0.218523081762655, "grad_norm": 0.439453125, "learning_rate": 2.7376374482930963e-05, "loss": 2.0289, "step": 6773 }, { "epoch": 0.21855534561645135, "grad_norm": 0.40625, "learning_rate": 2.737548525680141e-05, "loss": 1.9953, "step": 6774 }, { "epoch": 0.2185876094702477, "grad_norm": 0.44921875, "learning_rate": 2.73745958944514e-05, "loss": 1.9901, "step": 6775 }, { "epoch": 0.21861987332404403, "grad_norm": 0.498046875, "learning_rate": 2.7373706395890713e-05, "loss": 2.0376, "step": 6776 }, { "epoch": 0.21865213717784038, "grad_norm": 0.51953125, "learning_rate": 2.7372816761129143e-05, "loss": 2.0169, "step": 6777 }, { "epoch": 0.21868440103163672, "grad_norm": 0.5, "learning_rate": 2.7371926990176483e-05, "loss": 2.0253, "step": 6778 }, { "epoch": 0.21871666488543307, "grad_norm": 0.41796875, "learning_rate": 2.7371037083042528e-05, "loss": 1.9952, "step": 6779 }, { "epoch": 0.2187489287392294, "grad_norm": 0.42578125, "learning_rate": 2.737014703973707e-05, "loss": 2.0159, "step": 6780 }, { "epoch": 0.21878119259302575, "grad_norm": 0.4140625, "learning_rate": 2.736925686026991e-05, "loss": 2.0085, "step": 6781 }, { "epoch": 0.21881345644682212, "grad_norm": 0.408203125, "learning_rate": 2.736836654465084e-05, "loss": 2.0077, "step": 6782 }, { "epoch": 0.21884572030061847, "grad_norm": 0.439453125, "learning_rate": 2.7367476092889664e-05, "loss": 1.9888, "step": 6783 }, { "epoch": 0.2188779841544148, "grad_norm": 0.38671875, "learning_rate": 2.7366585504996187e-05, "loss": 2.0097, "step": 6784 }, { "epoch": 0.21891024800821116, "grad_norm": 0.423828125, "learning_rate": 2.7365694780980205e-05, "loss": 2.0307, "step": 6785 }, { "epoch": 0.2189425118620075, "grad_norm": 0.37890625, "learning_rate": 2.736480392085153e-05, "loss": 1.9863, "step": 6786 }, { "epoch": 0.21897477571580384, "grad_norm": 0.474609375, "learning_rate": 2.7363912924619963e-05, "loss": 2.0521, "step": 6787 }, { "epoch": 0.2190070395696002, "grad_norm": 0.423828125, "learning_rate": 2.736302179229531e-05, "loss": 2.0098, "step": 6788 }, { "epoch": 0.21903930342339653, "grad_norm": 0.390625, "learning_rate": 2.7362130523887383e-05, "loss": 2.0275, "step": 6789 }, { "epoch": 0.21907156727719287, "grad_norm": 0.41015625, "learning_rate": 2.7361239119405996e-05, "loss": 2.0228, "step": 6790 }, { "epoch": 0.21910383113098922, "grad_norm": 0.38671875, "learning_rate": 2.7360347578860952e-05, "loss": 2.0109, "step": 6791 }, { "epoch": 0.2191360949847856, "grad_norm": 0.453125, "learning_rate": 2.735945590226207e-05, "loss": 2.0054, "step": 6792 }, { "epoch": 0.21916835883858193, "grad_norm": 0.400390625, "learning_rate": 2.7358564089619163e-05, "loss": 2.0307, "step": 6793 }, { "epoch": 0.21920062269237828, "grad_norm": 0.384765625, "learning_rate": 2.7357672140942048e-05, "loss": 2.0355, "step": 6794 }, { "epoch": 0.21923288654617462, "grad_norm": 0.40625, "learning_rate": 2.7356780056240546e-05, "loss": 1.9758, "step": 6795 }, { "epoch": 0.21926515039997097, "grad_norm": 0.41015625, "learning_rate": 2.7355887835524474e-05, "loss": 2.0389, "step": 6796 }, { "epoch": 0.2192974142537673, "grad_norm": 0.38671875, "learning_rate": 2.735499547880365e-05, "loss": 2.0313, "step": 6797 }, { "epoch": 0.21932967810756365, "grad_norm": 0.380859375, "learning_rate": 2.73541029860879e-05, "loss": 2.0152, "step": 6798 }, { "epoch": 0.21936194196136, "grad_norm": 0.37890625, "learning_rate": 2.7353210357387048e-05, "loss": 2.0274, "step": 6799 }, { "epoch": 0.21939420581515634, "grad_norm": 0.39453125, "learning_rate": 2.7352317592710924e-05, "loss": 2.0294, "step": 6800 }, { "epoch": 0.21942646966895268, "grad_norm": 0.380859375, "learning_rate": 2.7351424692069343e-05, "loss": 2.0237, "step": 6801 }, { "epoch": 0.21945873352274906, "grad_norm": 0.384765625, "learning_rate": 2.735053165547214e-05, "loss": 2.024, "step": 6802 }, { "epoch": 0.2194909973765454, "grad_norm": 0.43359375, "learning_rate": 2.7349638482929147e-05, "loss": 2.0261, "step": 6803 }, { "epoch": 0.21952326123034174, "grad_norm": 0.4375, "learning_rate": 2.734874517445019e-05, "loss": 2.0117, "step": 6804 }, { "epoch": 0.2195555250841381, "grad_norm": 0.494140625, "learning_rate": 2.7347851730045106e-05, "loss": 2.0023, "step": 6805 }, { "epoch": 0.21958778893793443, "grad_norm": 0.5, "learning_rate": 2.734695814972373e-05, "loss": 2.0232, "step": 6806 }, { "epoch": 0.21962005279173077, "grad_norm": 0.47265625, "learning_rate": 2.7346064433495894e-05, "loss": 1.972, "step": 6807 }, { "epoch": 0.21965231664552712, "grad_norm": 0.439453125, "learning_rate": 2.7345170581371443e-05, "loss": 1.9379, "step": 6808 }, { "epoch": 0.21968458049932346, "grad_norm": 0.4140625, "learning_rate": 2.734427659336021e-05, "loss": 1.9918, "step": 6809 }, { "epoch": 0.2197168443531198, "grad_norm": 0.4375, "learning_rate": 2.734338246947203e-05, "loss": 2.0365, "step": 6810 }, { "epoch": 0.21974910820691615, "grad_norm": 0.412109375, "learning_rate": 2.7342488209716754e-05, "loss": 2.0047, "step": 6811 }, { "epoch": 0.2197813720607125, "grad_norm": 0.41796875, "learning_rate": 2.7341593814104223e-05, "loss": 1.9899, "step": 6812 }, { "epoch": 0.21981363591450886, "grad_norm": 0.400390625, "learning_rate": 2.734069928264428e-05, "loss": 1.9423, "step": 6813 }, { "epoch": 0.2198458997683052, "grad_norm": 0.4296875, "learning_rate": 2.733980461534677e-05, "loss": 2.0115, "step": 6814 }, { "epoch": 0.21987816362210155, "grad_norm": 0.396484375, "learning_rate": 2.7338909812221548e-05, "loss": 2.0074, "step": 6815 }, { "epoch": 0.2199104274758979, "grad_norm": 0.412109375, "learning_rate": 2.7338014873278456e-05, "loss": 2.0283, "step": 6816 }, { "epoch": 0.21994269132969424, "grad_norm": 0.390625, "learning_rate": 2.7337119798527353e-05, "loss": 1.9999, "step": 6817 }, { "epoch": 0.21997495518349058, "grad_norm": 0.40625, "learning_rate": 2.7336224587978074e-05, "loss": 2.0155, "step": 6818 }, { "epoch": 0.22000721903728693, "grad_norm": 0.380859375, "learning_rate": 2.73353292416405e-05, "loss": 2.0236, "step": 6819 }, { "epoch": 0.22003948289108327, "grad_norm": 0.421875, "learning_rate": 2.733443375952446e-05, "loss": 1.9847, "step": 6820 }, { "epoch": 0.22007174674487961, "grad_norm": 0.478515625, "learning_rate": 2.7333538141639825e-05, "loss": 1.9771, "step": 6821 }, { "epoch": 0.22010401059867596, "grad_norm": 0.46875, "learning_rate": 2.733264238799645e-05, "loss": 1.9963, "step": 6822 }, { "epoch": 0.22013627445247233, "grad_norm": 0.390625, "learning_rate": 2.7331746498604193e-05, "loss": 2.0158, "step": 6823 }, { "epoch": 0.22016853830626867, "grad_norm": 0.4140625, "learning_rate": 2.733085047347292e-05, "loss": 1.9668, "step": 6824 }, { "epoch": 0.22020080216006502, "grad_norm": 0.4609375, "learning_rate": 2.7329954312612497e-05, "loss": 2.0216, "step": 6825 }, { "epoch": 0.22023306601386136, "grad_norm": 0.43359375, "learning_rate": 2.7329058016032773e-05, "loss": 2.0335, "step": 6826 }, { "epoch": 0.2202653298676577, "grad_norm": 0.3984375, "learning_rate": 2.7328161583743625e-05, "loss": 1.9876, "step": 6827 }, { "epoch": 0.22029759372145405, "grad_norm": 0.482421875, "learning_rate": 2.7327265015754917e-05, "loss": 2.0097, "step": 6828 }, { "epoch": 0.2203298575752504, "grad_norm": 0.462890625, "learning_rate": 2.7326368312076525e-05, "loss": 2.0209, "step": 6829 }, { "epoch": 0.22036212142904674, "grad_norm": 0.443359375, "learning_rate": 2.732547147271831e-05, "loss": 2.0097, "step": 6830 }, { "epoch": 0.22039438528284308, "grad_norm": 0.421875, "learning_rate": 2.7324574497690147e-05, "loss": 1.9904, "step": 6831 }, { "epoch": 0.22042664913663942, "grad_norm": 0.400390625, "learning_rate": 2.732367738700191e-05, "loss": 2.0028, "step": 6832 }, { "epoch": 0.2204589129904358, "grad_norm": 0.466796875, "learning_rate": 2.7322780140663472e-05, "loss": 1.9801, "step": 6833 }, { "epoch": 0.22049117684423214, "grad_norm": 0.45703125, "learning_rate": 2.7321882758684715e-05, "loss": 1.9973, "step": 6834 }, { "epoch": 0.22052344069802848, "grad_norm": 0.431640625, "learning_rate": 2.7320985241075508e-05, "loss": 2.022, "step": 6835 }, { "epoch": 0.22055570455182483, "grad_norm": 0.4765625, "learning_rate": 2.7320087587845735e-05, "loss": 2.0096, "step": 6836 }, { "epoch": 0.22058796840562117, "grad_norm": 0.515625, "learning_rate": 2.7319189799005275e-05, "loss": 2.0468, "step": 6837 }, { "epoch": 0.22062023225941751, "grad_norm": 0.58203125, "learning_rate": 2.7318291874564013e-05, "loss": 2.0073, "step": 6838 }, { "epoch": 0.22065249611321386, "grad_norm": 0.41796875, "learning_rate": 2.7317393814531834e-05, "loss": 2.0432, "step": 6839 }, { "epoch": 0.2206847599670102, "grad_norm": 0.474609375, "learning_rate": 2.7316495618918617e-05, "loss": 1.9976, "step": 6840 }, { "epoch": 0.22071702382080655, "grad_norm": 0.43359375, "learning_rate": 2.7315597287734255e-05, "loss": 1.9761, "step": 6841 }, { "epoch": 0.2207492876746029, "grad_norm": 0.439453125, "learning_rate": 2.731469882098863e-05, "loss": 1.9913, "step": 6842 }, { "epoch": 0.22078155152839926, "grad_norm": 0.443359375, "learning_rate": 2.7313800218691638e-05, "loss": 2.0046, "step": 6843 }, { "epoch": 0.2208138153821956, "grad_norm": 0.5078125, "learning_rate": 2.7312901480853164e-05, "loss": 2.0021, "step": 6844 }, { "epoch": 0.22084607923599195, "grad_norm": 0.4375, "learning_rate": 2.7312002607483106e-05, "loss": 1.9887, "step": 6845 }, { "epoch": 0.2208783430897883, "grad_norm": 0.4375, "learning_rate": 2.7311103598591355e-05, "loss": 2.0272, "step": 6846 }, { "epoch": 0.22091060694358464, "grad_norm": 0.486328125, "learning_rate": 2.7310204454187807e-05, "loss": 2.0099, "step": 6847 }, { "epoch": 0.22094287079738098, "grad_norm": 0.494140625, "learning_rate": 2.7309305174282362e-05, "loss": 2.0311, "step": 6848 }, { "epoch": 0.22097513465117732, "grad_norm": 0.5078125, "learning_rate": 2.7308405758884913e-05, "loss": 2.0309, "step": 6849 }, { "epoch": 0.22100739850497367, "grad_norm": 0.439453125, "learning_rate": 2.7307506208005365e-05, "loss": 2.0159, "step": 6850 }, { "epoch": 0.22103966235877, "grad_norm": 0.4609375, "learning_rate": 2.730660652165362e-05, "loss": 2.0449, "step": 6851 }, { "epoch": 0.22107192621256636, "grad_norm": 0.494140625, "learning_rate": 2.730570669983957e-05, "loss": 2.0006, "step": 6852 }, { "epoch": 0.22110419006636273, "grad_norm": 0.470703125, "learning_rate": 2.730480674257314e-05, "loss": 2.0186, "step": 6853 }, { "epoch": 0.22113645392015907, "grad_norm": 0.498046875, "learning_rate": 2.730390664986422e-05, "loss": 2.0467, "step": 6854 }, { "epoch": 0.22116871777395541, "grad_norm": 0.458984375, "learning_rate": 2.7303006421722725e-05, "loss": 2.0011, "step": 6855 }, { "epoch": 0.22120098162775176, "grad_norm": 0.4453125, "learning_rate": 2.7302106058158557e-05, "loss": 2.0217, "step": 6856 }, { "epoch": 0.2212332454815481, "grad_norm": 0.41796875, "learning_rate": 2.7301205559181633e-05, "loss": 2.0136, "step": 6857 }, { "epoch": 0.22126550933534445, "grad_norm": 0.416015625, "learning_rate": 2.7300304924801862e-05, "loss": 2.0062, "step": 6858 }, { "epoch": 0.2212977731891408, "grad_norm": 0.47265625, "learning_rate": 2.7299404155029163e-05, "loss": 1.9912, "step": 6859 }, { "epoch": 0.22133003704293713, "grad_norm": 0.439453125, "learning_rate": 2.7298503249873443e-05, "loss": 1.9578, "step": 6860 }, { "epoch": 0.22136230089673348, "grad_norm": 0.5078125, "learning_rate": 2.7297602209344623e-05, "loss": 1.9542, "step": 6861 }, { "epoch": 0.22139456475052982, "grad_norm": 0.38671875, "learning_rate": 2.729670103345262e-05, "loss": 2.0137, "step": 6862 }, { "epoch": 0.2214268286043262, "grad_norm": 0.408203125, "learning_rate": 2.7295799722207353e-05, "loss": 2.0365, "step": 6863 }, { "epoch": 0.22145909245812254, "grad_norm": 0.416015625, "learning_rate": 2.729489827561874e-05, "loss": 2.013, "step": 6864 }, { "epoch": 0.22149135631191888, "grad_norm": 0.384765625, "learning_rate": 2.7293996693696713e-05, "loss": 1.9668, "step": 6865 }, { "epoch": 0.22152362016571522, "grad_norm": 0.3984375, "learning_rate": 2.729309497645119e-05, "loss": 1.9552, "step": 6866 }, { "epoch": 0.22155588401951157, "grad_norm": 0.40234375, "learning_rate": 2.7292193123892094e-05, "loss": 1.9298, "step": 6867 }, { "epoch": 0.2215881478733079, "grad_norm": 0.40625, "learning_rate": 2.7291291136029357e-05, "loss": 2.0139, "step": 6868 }, { "epoch": 0.22162041172710426, "grad_norm": 0.388671875, "learning_rate": 2.72903890128729e-05, "loss": 2.0307, "step": 6869 }, { "epoch": 0.2216526755809006, "grad_norm": 0.376953125, "learning_rate": 2.7289486754432665e-05, "loss": 2.0005, "step": 6870 }, { "epoch": 0.22168493943469694, "grad_norm": 0.380859375, "learning_rate": 2.728858436071857e-05, "loss": 1.9975, "step": 6871 }, { "epoch": 0.2217172032884933, "grad_norm": 0.392578125, "learning_rate": 2.7287681831740554e-05, "loss": 1.9662, "step": 6872 }, { "epoch": 0.22174946714228966, "grad_norm": 0.4375, "learning_rate": 2.7286779167508555e-05, "loss": 2.0153, "step": 6873 }, { "epoch": 0.221781730996086, "grad_norm": 0.43359375, "learning_rate": 2.7285876368032505e-05, "loss": 2.0138, "step": 6874 }, { "epoch": 0.22181399484988235, "grad_norm": 0.462890625, "learning_rate": 2.7284973433322344e-05, "loss": 2.0102, "step": 6875 }, { "epoch": 0.2218462587036787, "grad_norm": 2.28125, "learning_rate": 2.7284070363388003e-05, "loss": 2.0187, "step": 6876 }, { "epoch": 0.22187852255747503, "grad_norm": 0.47265625, "learning_rate": 2.7283167158239432e-05, "loss": 2.012, "step": 6877 }, { "epoch": 0.22191078641127138, "grad_norm": 0.54296875, "learning_rate": 2.7282263817886567e-05, "loss": 2.0027, "step": 6878 }, { "epoch": 0.22194305026506772, "grad_norm": 0.51953125, "learning_rate": 2.7281360342339353e-05, "loss": 1.9577, "step": 6879 }, { "epoch": 0.22197531411886406, "grad_norm": 0.4140625, "learning_rate": 2.7280456731607734e-05, "loss": 2.0341, "step": 6880 }, { "epoch": 0.2220075779726604, "grad_norm": 0.53515625, "learning_rate": 2.7279552985701664e-05, "loss": 2.0295, "step": 6881 }, { "epoch": 0.22203984182645675, "grad_norm": 0.48046875, "learning_rate": 2.7278649104631077e-05, "loss": 2.0113, "step": 6882 }, { "epoch": 0.22207210568025312, "grad_norm": 0.388671875, "learning_rate": 2.727774508840593e-05, "loss": 2.0249, "step": 6883 }, { "epoch": 0.22210436953404947, "grad_norm": 0.421875, "learning_rate": 2.7276840937036173e-05, "loss": 2.019, "step": 6884 }, { "epoch": 0.2221366333878458, "grad_norm": 0.4375, "learning_rate": 2.7275936650531762e-05, "loss": 1.9885, "step": 6885 }, { "epoch": 0.22216889724164215, "grad_norm": 0.419921875, "learning_rate": 2.7275032228902648e-05, "loss": 1.9867, "step": 6886 }, { "epoch": 0.2222011610954385, "grad_norm": 0.4296875, "learning_rate": 2.7274127672158778e-05, "loss": 2.0286, "step": 6887 }, { "epoch": 0.22223342494923484, "grad_norm": 0.435546875, "learning_rate": 2.727322298031012e-05, "loss": 2.0155, "step": 6888 }, { "epoch": 0.2222656888030312, "grad_norm": 0.435546875, "learning_rate": 2.727231815336663e-05, "loss": 2.0044, "step": 6889 }, { "epoch": 0.22229795265682753, "grad_norm": 0.390625, "learning_rate": 2.7271413191338265e-05, "loss": 2.0299, "step": 6890 }, { "epoch": 0.22233021651062387, "grad_norm": 0.4609375, "learning_rate": 2.7270508094234988e-05, "loss": 2.0347, "step": 6891 }, { "epoch": 0.22236248036442022, "grad_norm": 0.38671875, "learning_rate": 2.7269602862066755e-05, "loss": 2.0206, "step": 6892 }, { "epoch": 0.2223947442182166, "grad_norm": 0.416015625, "learning_rate": 2.7268697494843543e-05, "loss": 2.0357, "step": 6893 }, { "epoch": 0.22242700807201293, "grad_norm": 0.42578125, "learning_rate": 2.7267791992575307e-05, "loss": 2.014, "step": 6894 }, { "epoch": 0.22245927192580928, "grad_norm": 0.400390625, "learning_rate": 2.7266886355272018e-05, "loss": 1.988, "step": 6895 }, { "epoch": 0.22249153577960562, "grad_norm": 0.39453125, "learning_rate": 2.7265980582943644e-05, "loss": 2.01, "step": 6896 }, { "epoch": 0.22252379963340196, "grad_norm": 0.384765625, "learning_rate": 2.7265074675600157e-05, "loss": 2.0121, "step": 6897 }, { "epoch": 0.2225560634871983, "grad_norm": 0.408203125, "learning_rate": 2.7264168633251526e-05, "loss": 1.9745, "step": 6898 }, { "epoch": 0.22258832734099465, "grad_norm": 0.392578125, "learning_rate": 2.726326245590772e-05, "loss": 1.9623, "step": 6899 }, { "epoch": 0.222620591194791, "grad_norm": 0.384765625, "learning_rate": 2.7262356143578723e-05, "loss": 2.0115, "step": 6900 }, { "epoch": 0.22265285504858734, "grad_norm": 0.375, "learning_rate": 2.7261449696274507e-05, "loss": 1.9977, "step": 6901 }, { "epoch": 0.22268511890238368, "grad_norm": 0.40234375, "learning_rate": 2.7260543114005053e-05, "loss": 2.0024, "step": 6902 }, { "epoch": 0.22271738275618003, "grad_norm": 0.396484375, "learning_rate": 2.725963639678033e-05, "loss": 1.9657, "step": 6903 }, { "epoch": 0.2227496466099764, "grad_norm": 0.388671875, "learning_rate": 2.7258729544610326e-05, "loss": 1.9929, "step": 6904 }, { "epoch": 0.22278191046377274, "grad_norm": 0.375, "learning_rate": 2.7257822557505023e-05, "loss": 2.0056, "step": 6905 }, { "epoch": 0.22281417431756909, "grad_norm": 0.3984375, "learning_rate": 2.72569154354744e-05, "loss": 2.0196, "step": 6906 }, { "epoch": 0.22284643817136543, "grad_norm": 0.4375, "learning_rate": 2.7256008178528443e-05, "loss": 1.9885, "step": 6907 }, { "epoch": 0.22287870202516177, "grad_norm": 0.396484375, "learning_rate": 2.7255100786677146e-05, "loss": 1.9998, "step": 6908 }, { "epoch": 0.22291096587895812, "grad_norm": 0.40625, "learning_rate": 2.725419325993049e-05, "loss": 2.0029, "step": 6909 }, { "epoch": 0.22294322973275446, "grad_norm": 0.384765625, "learning_rate": 2.7253285598298467e-05, "loss": 1.9901, "step": 6910 }, { "epoch": 0.2229754935865508, "grad_norm": 0.388671875, "learning_rate": 2.7252377801791065e-05, "loss": 1.9958, "step": 6911 }, { "epoch": 0.22300775744034715, "grad_norm": 0.4453125, "learning_rate": 2.7251469870418277e-05, "loss": 1.9802, "step": 6912 }, { "epoch": 0.2230400212941435, "grad_norm": 0.404296875, "learning_rate": 2.7250561804190098e-05, "loss": 2.0097, "step": 6913 }, { "epoch": 0.22307228514793986, "grad_norm": 0.3984375, "learning_rate": 2.7249653603116528e-05, "loss": 1.9831, "step": 6914 }, { "epoch": 0.2231045490017362, "grad_norm": 0.42578125, "learning_rate": 2.724874526720755e-05, "loss": 1.9926, "step": 6915 }, { "epoch": 0.22313681285553255, "grad_norm": 0.41015625, "learning_rate": 2.7247836796473177e-05, "loss": 2.0311, "step": 6916 }, { "epoch": 0.2231690767093289, "grad_norm": 0.451171875, "learning_rate": 2.7246928190923406e-05, "loss": 2.0338, "step": 6917 }, { "epoch": 0.22320134056312524, "grad_norm": 0.41015625, "learning_rate": 2.7246019450568228e-05, "loss": 2.0433, "step": 6918 }, { "epoch": 0.22323360441692158, "grad_norm": 0.48046875, "learning_rate": 2.7245110575417655e-05, "loss": 2.0441, "step": 6919 }, { "epoch": 0.22326586827071793, "grad_norm": 0.453125, "learning_rate": 2.7244201565481695e-05, "loss": 2.0011, "step": 6920 }, { "epoch": 0.22329813212451427, "grad_norm": 0.466796875, "learning_rate": 2.7243292420770344e-05, "loss": 2.017, "step": 6921 }, { "epoch": 0.22333039597831061, "grad_norm": 0.41015625, "learning_rate": 2.724238314129361e-05, "loss": 2.0422, "step": 6922 }, { "epoch": 0.22336265983210696, "grad_norm": 0.44140625, "learning_rate": 2.724147372706151e-05, "loss": 2.0282, "step": 6923 }, { "epoch": 0.22339492368590333, "grad_norm": 0.41015625, "learning_rate": 2.724056417808405e-05, "loss": 2.024, "step": 6924 }, { "epoch": 0.22342718753969967, "grad_norm": 0.4609375, "learning_rate": 2.7239654494371237e-05, "loss": 2.0471, "step": 6925 }, { "epoch": 0.22345945139349602, "grad_norm": 0.427734375, "learning_rate": 2.723874467593309e-05, "loss": 2.0226, "step": 6926 }, { "epoch": 0.22349171524729236, "grad_norm": 0.388671875, "learning_rate": 2.7237834722779623e-05, "loss": 2.0006, "step": 6927 }, { "epoch": 0.2235239791010887, "grad_norm": 0.404296875, "learning_rate": 2.7236924634920846e-05, "loss": 1.9626, "step": 6928 }, { "epoch": 0.22355624295488505, "grad_norm": 0.408203125, "learning_rate": 2.7236014412366786e-05, "loss": 1.9969, "step": 6929 }, { "epoch": 0.2235885068086814, "grad_norm": 0.404296875, "learning_rate": 2.7235104055127453e-05, "loss": 1.9929, "step": 6930 }, { "epoch": 0.22362077066247774, "grad_norm": 0.416015625, "learning_rate": 2.7234193563212876e-05, "loss": 1.911, "step": 6931 }, { "epoch": 0.22365303451627408, "grad_norm": 0.45703125, "learning_rate": 2.7233282936633075e-05, "loss": 1.9469, "step": 6932 }, { "epoch": 0.22368529837007042, "grad_norm": 0.47265625, "learning_rate": 2.7232372175398067e-05, "loss": 1.9781, "step": 6933 }, { "epoch": 0.2237175622238668, "grad_norm": 0.4140625, "learning_rate": 2.7231461279517883e-05, "loss": 2.0315, "step": 6934 }, { "epoch": 0.22374982607766314, "grad_norm": 0.419921875, "learning_rate": 2.723055024900255e-05, "loss": 2.0212, "step": 6935 }, { "epoch": 0.22378208993145948, "grad_norm": 0.421875, "learning_rate": 2.722963908386209e-05, "loss": 2.0233, "step": 6936 }, { "epoch": 0.22381435378525583, "grad_norm": 0.408203125, "learning_rate": 2.722872778410654e-05, "loss": 1.9883, "step": 6937 }, { "epoch": 0.22384661763905217, "grad_norm": 0.54296875, "learning_rate": 2.7227816349745922e-05, "loss": 1.9793, "step": 6938 }, { "epoch": 0.2238788814928485, "grad_norm": 0.486328125, "learning_rate": 2.722690478079028e-05, "loss": 2.032, "step": 6939 }, { "epoch": 0.22391114534664486, "grad_norm": 0.4609375, "learning_rate": 2.722599307724964e-05, "loss": 2.0102, "step": 6940 }, { "epoch": 0.2239434092004412, "grad_norm": 0.458984375, "learning_rate": 2.722508123913404e-05, "loss": 1.9935, "step": 6941 }, { "epoch": 0.22397567305423755, "grad_norm": 0.423828125, "learning_rate": 2.7224169266453514e-05, "loss": 1.9749, "step": 6942 }, { "epoch": 0.2240079369080339, "grad_norm": 0.458984375, "learning_rate": 2.72232571592181e-05, "loss": 2.025, "step": 6943 }, { "epoch": 0.22404020076183026, "grad_norm": 0.4453125, "learning_rate": 2.7222344917437844e-05, "loss": 2.0123, "step": 6944 }, { "epoch": 0.2240724646156266, "grad_norm": 0.466796875, "learning_rate": 2.7221432541122784e-05, "loss": 1.9692, "step": 6945 }, { "epoch": 0.22410472846942295, "grad_norm": 0.453125, "learning_rate": 2.722052003028296e-05, "loss": 2.0259, "step": 6946 }, { "epoch": 0.2241369923232193, "grad_norm": 0.453125, "learning_rate": 2.721960738492842e-05, "loss": 2.0048, "step": 6947 }, { "epoch": 0.22416925617701564, "grad_norm": 0.4140625, "learning_rate": 2.7218694605069205e-05, "loss": 1.9887, "step": 6948 }, { "epoch": 0.22420152003081198, "grad_norm": 0.474609375, "learning_rate": 2.721778169071537e-05, "loss": 1.9984, "step": 6949 }, { "epoch": 0.22423378388460832, "grad_norm": 0.50390625, "learning_rate": 2.7216868641876956e-05, "loss": 2.0053, "step": 6950 }, { "epoch": 0.22426604773840467, "grad_norm": 0.451171875, "learning_rate": 2.7215955458564015e-05, "loss": 2.0101, "step": 6951 }, { "epoch": 0.224298311592201, "grad_norm": 0.8125, "learning_rate": 2.72150421407866e-05, "loss": 2.0419, "step": 6952 }, { "epoch": 0.22433057544599735, "grad_norm": 0.408203125, "learning_rate": 2.721412868855477e-05, "loss": 2.0056, "step": 6953 }, { "epoch": 0.22436283929979373, "grad_norm": 0.455078125, "learning_rate": 2.721321510187857e-05, "loss": 2.0104, "step": 6954 }, { "epoch": 0.22439510315359007, "grad_norm": 0.48046875, "learning_rate": 2.721230138076806e-05, "loss": 2.0244, "step": 6955 }, { "epoch": 0.2244273670073864, "grad_norm": 0.4296875, "learning_rate": 2.72113875252333e-05, "loss": 1.9904, "step": 6956 }, { "epoch": 0.22445963086118276, "grad_norm": 0.3984375, "learning_rate": 2.721047353528434e-05, "loss": 1.9955, "step": 6957 }, { "epoch": 0.2244918947149791, "grad_norm": 0.431640625, "learning_rate": 2.7209559410931254e-05, "loss": 2.0163, "step": 6958 }, { "epoch": 0.22452415856877544, "grad_norm": 0.423828125, "learning_rate": 2.720864515218409e-05, "loss": 1.9933, "step": 6959 }, { "epoch": 0.2245564224225718, "grad_norm": 0.39453125, "learning_rate": 2.7207730759052925e-05, "loss": 1.9985, "step": 6960 }, { "epoch": 0.22458868627636813, "grad_norm": 0.39453125, "learning_rate": 2.7206816231547812e-05, "loss": 1.9845, "step": 6961 }, { "epoch": 0.22462095013016448, "grad_norm": 0.396484375, "learning_rate": 2.7205901569678828e-05, "loss": 2.0207, "step": 6962 }, { "epoch": 0.22465321398396082, "grad_norm": 0.412109375, "learning_rate": 2.7204986773456036e-05, "loss": 2.0106, "step": 6963 }, { "epoch": 0.2246854778377572, "grad_norm": 0.3984375, "learning_rate": 2.7204071842889506e-05, "loss": 2.0129, "step": 6964 }, { "epoch": 0.22471774169155354, "grad_norm": 0.384765625, "learning_rate": 2.7203156777989307e-05, "loss": 2.006, "step": 6965 }, { "epoch": 0.22475000554534988, "grad_norm": 0.421875, "learning_rate": 2.7202241578765508e-05, "loss": 2.0521, "step": 6966 }, { "epoch": 0.22478226939914622, "grad_norm": 0.392578125, "learning_rate": 2.7201326245228196e-05, "loss": 2.0113, "step": 6967 }, { "epoch": 0.22481453325294257, "grad_norm": 0.39453125, "learning_rate": 2.7200410777387427e-05, "loss": 2.0367, "step": 6968 }, { "epoch": 0.2248467971067389, "grad_norm": 0.3984375, "learning_rate": 2.7199495175253298e-05, "loss": 1.9977, "step": 6969 }, { "epoch": 0.22487906096053525, "grad_norm": 0.388671875, "learning_rate": 2.7198579438835874e-05, "loss": 2.0419, "step": 6970 }, { "epoch": 0.2249113248143316, "grad_norm": 0.392578125, "learning_rate": 2.7197663568145243e-05, "loss": 2.0079, "step": 6971 }, { "epoch": 0.22494358866812794, "grad_norm": 0.369140625, "learning_rate": 2.719674756319148e-05, "loss": 2.011, "step": 6972 }, { "epoch": 0.22497585252192429, "grad_norm": 0.39453125, "learning_rate": 2.7195831423984662e-05, "loss": 1.9949, "step": 6973 }, { "epoch": 0.22500811637572066, "grad_norm": 0.3671875, "learning_rate": 2.719491515053489e-05, "loss": 1.9793, "step": 6974 }, { "epoch": 0.225040380229517, "grad_norm": 0.380859375, "learning_rate": 2.7193998742852237e-05, "loss": 1.9849, "step": 6975 }, { "epoch": 0.22507264408331334, "grad_norm": 0.4375, "learning_rate": 2.7193082200946796e-05, "loss": 2.0221, "step": 6976 }, { "epoch": 0.2251049079371097, "grad_norm": 0.416015625, "learning_rate": 2.7192165524828653e-05, "loss": 1.9864, "step": 6977 }, { "epoch": 0.22513717179090603, "grad_norm": 0.400390625, "learning_rate": 2.71912487145079e-05, "loss": 2.0064, "step": 6978 }, { "epoch": 0.22516943564470238, "grad_norm": 0.412109375, "learning_rate": 2.719033176999462e-05, "loss": 1.995, "step": 6979 }, { "epoch": 0.22520169949849872, "grad_norm": 0.455078125, "learning_rate": 2.718941469129892e-05, "loss": 2.0348, "step": 6980 }, { "epoch": 0.22523396335229506, "grad_norm": 0.423828125, "learning_rate": 2.7188497478430888e-05, "loss": 2.0152, "step": 6981 }, { "epoch": 0.2252662272060914, "grad_norm": 0.4140625, "learning_rate": 2.7187580131400616e-05, "loss": 1.9943, "step": 6982 }, { "epoch": 0.22529849105988775, "grad_norm": 0.443359375, "learning_rate": 2.718666265021821e-05, "loss": 2.0324, "step": 6983 }, { "epoch": 0.2253307549136841, "grad_norm": 0.498046875, "learning_rate": 2.7185745034893755e-05, "loss": 2.0172, "step": 6984 }, { "epoch": 0.22536301876748047, "grad_norm": 0.51953125, "learning_rate": 2.718482728543737e-05, "loss": 2.0317, "step": 6985 }, { "epoch": 0.2253952826212768, "grad_norm": 0.462890625, "learning_rate": 2.7183909401859146e-05, "loss": 2.0276, "step": 6986 }, { "epoch": 0.22542754647507315, "grad_norm": 0.390625, "learning_rate": 2.7182991384169184e-05, "loss": 2.0208, "step": 6987 }, { "epoch": 0.2254598103288695, "grad_norm": 0.4453125, "learning_rate": 2.718207323237759e-05, "loss": 2.009, "step": 6988 }, { "epoch": 0.22549207418266584, "grad_norm": 0.4296875, "learning_rate": 2.7181154946494487e-05, "loss": 2.0158, "step": 6989 }, { "epoch": 0.22552433803646219, "grad_norm": 0.388671875, "learning_rate": 2.7180236526529957e-05, "loss": 2.0343, "step": 6990 }, { "epoch": 0.22555660189025853, "grad_norm": 0.423828125, "learning_rate": 2.7179317972494128e-05, "loss": 2.0202, "step": 6991 }, { "epoch": 0.22558886574405487, "grad_norm": 0.41796875, "learning_rate": 2.7178399284397097e-05, "loss": 2.0336, "step": 6992 }, { "epoch": 0.22562112959785122, "grad_norm": 0.41796875, "learning_rate": 2.717748046224899e-05, "loss": 2.0358, "step": 6993 }, { "epoch": 0.22565339345164756, "grad_norm": 0.412109375, "learning_rate": 2.717656150605991e-05, "loss": 2.0356, "step": 6994 }, { "epoch": 0.22568565730544393, "grad_norm": 0.369140625, "learning_rate": 2.7175642415839976e-05, "loss": 2.0183, "step": 6995 }, { "epoch": 0.22571792115924028, "grad_norm": 0.5390625, "learning_rate": 2.7174723191599312e-05, "loss": 2.0074, "step": 6996 }, { "epoch": 0.22575018501303662, "grad_norm": 0.42578125, "learning_rate": 2.7173803833348024e-05, "loss": 2.0006, "step": 6997 }, { "epoch": 0.22578244886683296, "grad_norm": 0.416015625, "learning_rate": 2.7172884341096236e-05, "loss": 2.0135, "step": 6998 }, { "epoch": 0.2258147127206293, "grad_norm": 0.39453125, "learning_rate": 2.717196471485407e-05, "loss": 2.0504, "step": 6999 }, { "epoch": 0.22584697657442565, "grad_norm": 0.3984375, "learning_rate": 2.717104495463165e-05, "loss": 2.0309, "step": 7000 }, { "epoch": 0.225879240428222, "grad_norm": 0.40234375, "learning_rate": 2.71701250604391e-05, "loss": 2.0215, "step": 7001 }, { "epoch": 0.22591150428201834, "grad_norm": 0.3984375, "learning_rate": 2.7169205032286542e-05, "loss": 2.0485, "step": 7002 }, { "epoch": 0.22594376813581468, "grad_norm": 0.41015625, "learning_rate": 2.71682848701841e-05, "loss": 1.9965, "step": 7003 }, { "epoch": 0.22597603198961103, "grad_norm": 0.421875, "learning_rate": 2.7167364574141915e-05, "loss": 2.0077, "step": 7004 }, { "epoch": 0.2260082958434074, "grad_norm": 0.484375, "learning_rate": 2.716644414417011e-05, "loss": 1.9953, "step": 7005 }, { "epoch": 0.22604055969720374, "grad_norm": 0.4140625, "learning_rate": 2.716552358027881e-05, "loss": 1.9924, "step": 7006 }, { "epoch": 0.22607282355100008, "grad_norm": 0.451171875, "learning_rate": 2.716460288247816e-05, "loss": 2.0137, "step": 7007 }, { "epoch": 0.22610508740479643, "grad_norm": 0.41015625, "learning_rate": 2.716368205077829e-05, "loss": 2.0299, "step": 7008 }, { "epoch": 0.22613735125859277, "grad_norm": 0.4375, "learning_rate": 2.7162761085189326e-05, "loss": 2.0313, "step": 7009 }, { "epoch": 0.22616961511238912, "grad_norm": 0.4296875, "learning_rate": 2.716183998572142e-05, "loss": 2.044, "step": 7010 }, { "epoch": 0.22620187896618546, "grad_norm": 0.41015625, "learning_rate": 2.7160918752384702e-05, "loss": 1.9881, "step": 7011 }, { "epoch": 0.2262341428199818, "grad_norm": 0.45703125, "learning_rate": 2.7159997385189313e-05, "loss": 2.0237, "step": 7012 }, { "epoch": 0.22626640667377815, "grad_norm": 0.400390625, "learning_rate": 2.71590758841454e-05, "loss": 2.0233, "step": 7013 }, { "epoch": 0.2262986705275745, "grad_norm": 0.484375, "learning_rate": 2.7158154249263095e-05, "loss": 2.0307, "step": 7014 }, { "epoch": 0.22633093438137086, "grad_norm": 0.388671875, "learning_rate": 2.715723248055256e-05, "loss": 1.984, "step": 7015 }, { "epoch": 0.2263631982351672, "grad_norm": 0.40625, "learning_rate": 2.7156310578023927e-05, "loss": 2.0018, "step": 7016 }, { "epoch": 0.22639546208896355, "grad_norm": 0.390625, "learning_rate": 2.715538854168735e-05, "loss": 2.0282, "step": 7017 }, { "epoch": 0.2264277259427599, "grad_norm": 0.44140625, "learning_rate": 2.715446637155297e-05, "loss": 1.9983, "step": 7018 }, { "epoch": 0.22645998979655624, "grad_norm": 0.453125, "learning_rate": 2.715354406763095e-05, "loss": 2.0336, "step": 7019 }, { "epoch": 0.22649225365035258, "grad_norm": 0.396484375, "learning_rate": 2.7152621629931436e-05, "loss": 1.9819, "step": 7020 }, { "epoch": 0.22652451750414893, "grad_norm": 0.59765625, "learning_rate": 2.7151699058464576e-05, "loss": 2.0161, "step": 7021 }, { "epoch": 0.22655678135794527, "grad_norm": 0.42578125, "learning_rate": 2.7150776353240537e-05, "loss": 2.0307, "step": 7022 }, { "epoch": 0.2265890452117416, "grad_norm": 0.380859375, "learning_rate": 2.714985351426946e-05, "loss": 2.0165, "step": 7023 }, { "epoch": 0.22662130906553796, "grad_norm": 0.3984375, "learning_rate": 2.7148930541561522e-05, "loss": 2.043, "step": 7024 }, { "epoch": 0.22665357291933433, "grad_norm": 0.37890625, "learning_rate": 2.7148007435126867e-05, "loss": 2.0209, "step": 7025 }, { "epoch": 0.22668583677313067, "grad_norm": 0.408203125, "learning_rate": 2.714708419497566e-05, "loss": 2.0268, "step": 7026 }, { "epoch": 0.22671810062692702, "grad_norm": 0.41796875, "learning_rate": 2.7146160821118067e-05, "loss": 2.0116, "step": 7027 }, { "epoch": 0.22675036448072336, "grad_norm": 0.388671875, "learning_rate": 2.714523731356425e-05, "loss": 2.0358, "step": 7028 }, { "epoch": 0.2267826283345197, "grad_norm": 0.38671875, "learning_rate": 2.714431367232437e-05, "loss": 1.9712, "step": 7029 }, { "epoch": 0.22681489218831605, "grad_norm": 0.388671875, "learning_rate": 2.71433898974086e-05, "loss": 2.0431, "step": 7030 }, { "epoch": 0.2268471560421124, "grad_norm": 0.43359375, "learning_rate": 2.7142465988827107e-05, "loss": 1.9688, "step": 7031 }, { "epoch": 0.22687941989590873, "grad_norm": 0.4375, "learning_rate": 2.7141541946590055e-05, "loss": 2.0156, "step": 7032 }, { "epoch": 0.22691168374970508, "grad_norm": 0.3671875, "learning_rate": 2.714061777070762e-05, "loss": 2.0233, "step": 7033 }, { "epoch": 0.22694394760350142, "grad_norm": 0.4140625, "learning_rate": 2.713969346118998e-05, "loss": 2.0194, "step": 7034 }, { "epoch": 0.2269762114572978, "grad_norm": 0.384765625, "learning_rate": 2.7138769018047298e-05, "loss": 2.0085, "step": 7035 }, { "epoch": 0.22700847531109414, "grad_norm": 0.400390625, "learning_rate": 2.7137844441289756e-05, "loss": 2.0107, "step": 7036 }, { "epoch": 0.22704073916489048, "grad_norm": 0.353515625, "learning_rate": 2.7136919730927532e-05, "loss": 2.0195, "step": 7037 }, { "epoch": 0.22707300301868683, "grad_norm": 0.42578125, "learning_rate": 2.71359948869708e-05, "loss": 2.0136, "step": 7038 }, { "epoch": 0.22710526687248317, "grad_norm": 0.423828125, "learning_rate": 2.713506990942974e-05, "loss": 2.0193, "step": 7039 }, { "epoch": 0.2271375307262795, "grad_norm": 0.451171875, "learning_rate": 2.713414479831454e-05, "loss": 1.9767, "step": 7040 }, { "epoch": 0.22716979458007586, "grad_norm": 0.451171875, "learning_rate": 2.7133219553635383e-05, "loss": 1.9902, "step": 7041 }, { "epoch": 0.2272020584338722, "grad_norm": 0.3984375, "learning_rate": 2.7132294175402446e-05, "loss": 2.0188, "step": 7042 }, { "epoch": 0.22723432228766854, "grad_norm": 0.4453125, "learning_rate": 2.7131368663625918e-05, "loss": 2.0294, "step": 7043 }, { "epoch": 0.2272665861414649, "grad_norm": 0.462890625, "learning_rate": 2.7130443018315984e-05, "loss": 2.0283, "step": 7044 }, { "epoch": 0.22729884999526126, "grad_norm": 0.48828125, "learning_rate": 2.712951723948284e-05, "loss": 2.0228, "step": 7045 }, { "epoch": 0.2273311138490576, "grad_norm": 0.462890625, "learning_rate": 2.7128591327136666e-05, "loss": 1.9869, "step": 7046 }, { "epoch": 0.22736337770285395, "grad_norm": 0.451171875, "learning_rate": 2.7127665281287667e-05, "loss": 2.0384, "step": 7047 }, { "epoch": 0.2273956415566503, "grad_norm": 0.40234375, "learning_rate": 2.7126739101946025e-05, "loss": 2.0006, "step": 7048 }, { "epoch": 0.22742790541044663, "grad_norm": 0.408203125, "learning_rate": 2.7125812789121938e-05, "loss": 2.0063, "step": 7049 }, { "epoch": 0.22746016926424298, "grad_norm": 0.41796875, "learning_rate": 2.7124886342825605e-05, "loss": 2.0094, "step": 7050 }, { "epoch": 0.22749243311803932, "grad_norm": 0.3984375, "learning_rate": 2.7123959763067223e-05, "loss": 1.993, "step": 7051 }, { "epoch": 0.22752469697183567, "grad_norm": 0.423828125, "learning_rate": 2.7123033049856984e-05, "loss": 2.0134, "step": 7052 }, { "epoch": 0.227556960825632, "grad_norm": 0.392578125, "learning_rate": 2.7122106203205097e-05, "loss": 2.0309, "step": 7053 }, { "epoch": 0.22758922467942835, "grad_norm": 0.40625, "learning_rate": 2.7121179223121763e-05, "loss": 1.9837, "step": 7054 }, { "epoch": 0.22762148853322473, "grad_norm": 0.431640625, "learning_rate": 2.7120252109617184e-05, "loss": 1.991, "step": 7055 }, { "epoch": 0.22765375238702107, "grad_norm": 0.455078125, "learning_rate": 2.7119324862701565e-05, "loss": 2.0209, "step": 7056 }, { "epoch": 0.2276860162408174, "grad_norm": 0.419921875, "learning_rate": 2.7118397482385107e-05, "loss": 2.008, "step": 7057 }, { "epoch": 0.22771828009461376, "grad_norm": 0.42578125, "learning_rate": 2.7117469968678026e-05, "loss": 1.9838, "step": 7058 }, { "epoch": 0.2277505439484101, "grad_norm": 0.396484375, "learning_rate": 2.711654232159053e-05, "loss": 2.0344, "step": 7059 }, { "epoch": 0.22778280780220644, "grad_norm": 0.447265625, "learning_rate": 2.7115614541132826e-05, "loss": 1.9879, "step": 7060 }, { "epoch": 0.2278150716560028, "grad_norm": 0.396484375, "learning_rate": 2.7114686627315133e-05, "loss": 2.0146, "step": 7061 }, { "epoch": 0.22784733550979913, "grad_norm": 0.4140625, "learning_rate": 2.711375858014766e-05, "loss": 1.9914, "step": 7062 }, { "epoch": 0.22787959936359548, "grad_norm": 0.41796875, "learning_rate": 2.7112830399640622e-05, "loss": 1.9985, "step": 7063 }, { "epoch": 0.22791186321739182, "grad_norm": 0.41796875, "learning_rate": 2.7111902085804233e-05, "loss": 2.0084, "step": 7064 }, { "epoch": 0.2279441270711882, "grad_norm": 0.388671875, "learning_rate": 2.7110973638648722e-05, "loss": 1.9882, "step": 7065 }, { "epoch": 0.22797639092498453, "grad_norm": 0.40234375, "learning_rate": 2.7110045058184295e-05, "loss": 2.0323, "step": 7066 }, { "epoch": 0.22800865477878088, "grad_norm": 0.404296875, "learning_rate": 2.7109116344421182e-05, "loss": 2.0248, "step": 7067 }, { "epoch": 0.22804091863257722, "grad_norm": 0.416015625, "learning_rate": 2.7108187497369602e-05, "loss": 1.9984, "step": 7068 }, { "epoch": 0.22807318248637357, "grad_norm": 0.435546875, "learning_rate": 2.7107258517039786e-05, "loss": 2.0097, "step": 7069 }, { "epoch": 0.2281054463401699, "grad_norm": 0.44140625, "learning_rate": 2.710632940344195e-05, "loss": 2.0179, "step": 7070 }, { "epoch": 0.22813771019396625, "grad_norm": 0.5, "learning_rate": 2.7105400156586322e-05, "loss": 1.9886, "step": 7071 }, { "epoch": 0.2281699740477626, "grad_norm": 0.49609375, "learning_rate": 2.710447077648314e-05, "loss": 2.0042, "step": 7072 }, { "epoch": 0.22820223790155894, "grad_norm": 0.45703125, "learning_rate": 2.710354126314262e-05, "loss": 2.0111, "step": 7073 }, { "epoch": 0.22823450175535528, "grad_norm": 0.671875, "learning_rate": 2.7102611616575005e-05, "loss": 1.9932, "step": 7074 }, { "epoch": 0.22826676560915163, "grad_norm": 0.458984375, "learning_rate": 2.7101681836790523e-05, "loss": 2.0092, "step": 7075 }, { "epoch": 0.228299029462948, "grad_norm": 0.40625, "learning_rate": 2.7100751923799407e-05, "loss": 2.0181, "step": 7076 }, { "epoch": 0.22833129331674434, "grad_norm": 0.462890625, "learning_rate": 2.7099821877611902e-05, "loss": 2.0563, "step": 7077 }, { "epoch": 0.2283635571705407, "grad_norm": 0.404296875, "learning_rate": 2.7098891698238228e-05, "loss": 2.0084, "step": 7078 }, { "epoch": 0.22839582102433703, "grad_norm": 0.42578125, "learning_rate": 2.7097961385688643e-05, "loss": 2.0257, "step": 7079 }, { "epoch": 0.22842808487813338, "grad_norm": 0.40625, "learning_rate": 2.709703093997337e-05, "loss": 2.0066, "step": 7080 }, { "epoch": 0.22846034873192972, "grad_norm": 0.4140625, "learning_rate": 2.7096100361102664e-05, "loss": 2.0328, "step": 7081 }, { "epoch": 0.22849261258572606, "grad_norm": 0.439453125, "learning_rate": 2.7095169649086765e-05, "loss": 2.0321, "step": 7082 }, { "epoch": 0.2285248764395224, "grad_norm": 0.41796875, "learning_rate": 2.709423880393591e-05, "loss": 2.0389, "step": 7083 }, { "epoch": 0.22855714029331875, "grad_norm": 0.431640625, "learning_rate": 2.7093307825660357e-05, "loss": 1.9893, "step": 7084 }, { "epoch": 0.2285894041471151, "grad_norm": 0.447265625, "learning_rate": 2.709237671427034e-05, "loss": 2.0505, "step": 7085 }, { "epoch": 0.22862166800091147, "grad_norm": 0.408203125, "learning_rate": 2.709144546977612e-05, "loss": 2.034, "step": 7086 }, { "epoch": 0.2286539318547078, "grad_norm": 0.400390625, "learning_rate": 2.709051409218794e-05, "loss": 1.9742, "step": 7087 }, { "epoch": 0.22868619570850415, "grad_norm": 0.400390625, "learning_rate": 2.7089582581516057e-05, "loss": 2.0425, "step": 7088 }, { "epoch": 0.2287184595623005, "grad_norm": 0.439453125, "learning_rate": 2.7088650937770722e-05, "loss": 1.9986, "step": 7089 }, { "epoch": 0.22875072341609684, "grad_norm": 0.390625, "learning_rate": 2.7087719160962183e-05, "loss": 2.0066, "step": 7090 }, { "epoch": 0.22878298726989318, "grad_norm": 0.390625, "learning_rate": 2.7086787251100712e-05, "loss": 1.9817, "step": 7091 }, { "epoch": 0.22881525112368953, "grad_norm": 0.388671875, "learning_rate": 2.7085855208196556e-05, "loss": 2.0273, "step": 7092 }, { "epoch": 0.22884751497748587, "grad_norm": 0.390625, "learning_rate": 2.7084923032259973e-05, "loss": 2.025, "step": 7093 }, { "epoch": 0.22887977883128222, "grad_norm": 0.4296875, "learning_rate": 2.7083990723301227e-05, "loss": 2.0227, "step": 7094 }, { "epoch": 0.22891204268507856, "grad_norm": 0.40625, "learning_rate": 2.7083058281330582e-05, "loss": 1.9914, "step": 7095 }, { "epoch": 0.22894430653887493, "grad_norm": 0.40625, "learning_rate": 2.7082125706358303e-05, "loss": 1.9776, "step": 7096 }, { "epoch": 0.22897657039267127, "grad_norm": 0.423828125, "learning_rate": 2.7081192998394647e-05, "loss": 2.0123, "step": 7097 }, { "epoch": 0.22900883424646762, "grad_norm": 0.396484375, "learning_rate": 2.708026015744989e-05, "loss": 1.9671, "step": 7098 }, { "epoch": 0.22904109810026396, "grad_norm": 0.416015625, "learning_rate": 2.7079327183534286e-05, "loss": 2.0357, "step": 7099 }, { "epoch": 0.2290733619540603, "grad_norm": 0.4140625, "learning_rate": 2.7078394076658127e-05, "loss": 2.0112, "step": 7100 }, { "epoch": 0.22910562580785665, "grad_norm": 0.400390625, "learning_rate": 2.7077460836831662e-05, "loss": 2.0254, "step": 7101 }, { "epoch": 0.229137889661653, "grad_norm": 0.423828125, "learning_rate": 2.7076527464065175e-05, "loss": 1.9877, "step": 7102 }, { "epoch": 0.22917015351544934, "grad_norm": 0.37890625, "learning_rate": 2.707559395836894e-05, "loss": 2.039, "step": 7103 }, { "epoch": 0.22920241736924568, "grad_norm": 0.388671875, "learning_rate": 2.7074660319753227e-05, "loss": 2.0355, "step": 7104 }, { "epoch": 0.22923468122304202, "grad_norm": 0.380859375, "learning_rate": 2.7073726548228315e-05, "loss": 2.0258, "step": 7105 }, { "epoch": 0.2292669450768384, "grad_norm": 0.388671875, "learning_rate": 2.7072792643804482e-05, "loss": 2.0282, "step": 7106 }, { "epoch": 0.22929920893063474, "grad_norm": 0.38671875, "learning_rate": 2.707185860649201e-05, "loss": 2.0236, "step": 7107 }, { "epoch": 0.22933147278443108, "grad_norm": 0.376953125, "learning_rate": 2.707092443630118e-05, "loss": 2.0244, "step": 7108 }, { "epoch": 0.22936373663822743, "grad_norm": 0.37890625, "learning_rate": 2.7069990133242273e-05, "loss": 1.9861, "step": 7109 }, { "epoch": 0.22939600049202377, "grad_norm": 0.38671875, "learning_rate": 2.7069055697325568e-05, "loss": 2.0019, "step": 7110 }, { "epoch": 0.22942826434582012, "grad_norm": 0.3984375, "learning_rate": 2.7068121128561366e-05, "loss": 2.0318, "step": 7111 }, { "epoch": 0.22946052819961646, "grad_norm": 0.392578125, "learning_rate": 2.7067186426959936e-05, "loss": 2.0216, "step": 7112 }, { "epoch": 0.2294927920534128, "grad_norm": 0.41015625, "learning_rate": 2.706625159253158e-05, "loss": 2.0315, "step": 7113 }, { "epoch": 0.22952505590720915, "grad_norm": 0.439453125, "learning_rate": 2.706531662528658e-05, "loss": 2.011, "step": 7114 }, { "epoch": 0.2295573197610055, "grad_norm": 0.470703125, "learning_rate": 2.7064381525235233e-05, "loss": 2.0044, "step": 7115 }, { "epoch": 0.22958958361480186, "grad_norm": 0.498046875, "learning_rate": 2.7063446292387823e-05, "loss": 2.0371, "step": 7116 }, { "epoch": 0.2296218474685982, "grad_norm": 0.44140625, "learning_rate": 2.7062510926754656e-05, "loss": 2.0109, "step": 7117 }, { "epoch": 0.22965411132239455, "grad_norm": 0.37109375, "learning_rate": 2.7061575428346025e-05, "loss": 1.9846, "step": 7118 }, { "epoch": 0.2296863751761909, "grad_norm": 0.41796875, "learning_rate": 2.706063979717222e-05, "loss": 2.0305, "step": 7119 }, { "epoch": 0.22971863902998724, "grad_norm": 0.43359375, "learning_rate": 2.705970403324354e-05, "loss": 1.9847, "step": 7120 }, { "epoch": 0.22975090288378358, "grad_norm": 0.376953125, "learning_rate": 2.7058768136570295e-05, "loss": 1.9975, "step": 7121 }, { "epoch": 0.22978316673757992, "grad_norm": 0.421875, "learning_rate": 2.7057832107162785e-05, "loss": 2.0005, "step": 7122 }, { "epoch": 0.22981543059137627, "grad_norm": 0.462890625, "learning_rate": 2.7056895945031303e-05, "loss": 2.0071, "step": 7123 }, { "epoch": 0.2298476944451726, "grad_norm": 0.384765625, "learning_rate": 2.705595965018616e-05, "loss": 2.0376, "step": 7124 }, { "epoch": 0.22987995829896896, "grad_norm": 0.4375, "learning_rate": 2.7055023222637664e-05, "loss": 2.0669, "step": 7125 }, { "epoch": 0.22991222215276533, "grad_norm": 0.419921875, "learning_rate": 2.7054086662396123e-05, "loss": 2.0066, "step": 7126 }, { "epoch": 0.22994448600656167, "grad_norm": 0.421875, "learning_rate": 2.7053149969471842e-05, "loss": 2.0171, "step": 7127 }, { "epoch": 0.22997674986035802, "grad_norm": 0.466796875, "learning_rate": 2.7052213143875133e-05, "loss": 2.0122, "step": 7128 }, { "epoch": 0.23000901371415436, "grad_norm": 0.416015625, "learning_rate": 2.705127618561631e-05, "loss": 2.0016, "step": 7129 }, { "epoch": 0.2300412775679507, "grad_norm": 0.515625, "learning_rate": 2.705033909470568e-05, "loss": 2.059, "step": 7130 }, { "epoch": 0.23007354142174705, "grad_norm": 0.38671875, "learning_rate": 2.7049401871153564e-05, "loss": 2.025, "step": 7131 }, { "epoch": 0.2301058052755434, "grad_norm": 0.443359375, "learning_rate": 2.7048464514970274e-05, "loss": 2.0024, "step": 7132 }, { "epoch": 0.23013806912933973, "grad_norm": 0.4296875, "learning_rate": 2.7047527026166134e-05, "loss": 1.9994, "step": 7133 }, { "epoch": 0.23017033298313608, "grad_norm": 0.431640625, "learning_rate": 2.7046589404751458e-05, "loss": 2.014, "step": 7134 }, { "epoch": 0.23020259683693242, "grad_norm": 0.4453125, "learning_rate": 2.7045651650736565e-05, "loss": 1.9584, "step": 7135 }, { "epoch": 0.2302348606907288, "grad_norm": 0.40234375, "learning_rate": 2.7044713764131784e-05, "loss": 1.9732, "step": 7136 }, { "epoch": 0.23026712454452514, "grad_norm": 0.42578125, "learning_rate": 2.7043775744947437e-05, "loss": 1.9688, "step": 7137 }, { "epoch": 0.23029938839832148, "grad_norm": 0.41796875, "learning_rate": 2.7042837593193842e-05, "loss": 1.9809, "step": 7138 }, { "epoch": 0.23033165225211782, "grad_norm": 0.443359375, "learning_rate": 2.704189930888133e-05, "loss": 1.966, "step": 7139 }, { "epoch": 0.23036391610591417, "grad_norm": 0.451171875, "learning_rate": 2.7040960892020234e-05, "loss": 1.9947, "step": 7140 }, { "epoch": 0.2303961799597105, "grad_norm": 0.41015625, "learning_rate": 2.7040022342620872e-05, "loss": 1.9905, "step": 7141 }, { "epoch": 0.23042844381350686, "grad_norm": 0.53125, "learning_rate": 2.7039083660693587e-05, "loss": 2.0304, "step": 7142 }, { "epoch": 0.2304607076673032, "grad_norm": 0.62109375, "learning_rate": 2.7038144846248705e-05, "loss": 2.0303, "step": 7143 }, { "epoch": 0.23049297152109954, "grad_norm": 0.474609375, "learning_rate": 2.703720589929656e-05, "loss": 2.022, "step": 7144 }, { "epoch": 0.2305252353748959, "grad_norm": 0.4453125, "learning_rate": 2.7036266819847487e-05, "loss": 2.044, "step": 7145 }, { "epoch": 0.23055749922869226, "grad_norm": 0.55078125, "learning_rate": 2.7035327607911825e-05, "loss": 2.0466, "step": 7146 }, { "epoch": 0.2305897630824886, "grad_norm": 0.451171875, "learning_rate": 2.703438826349991e-05, "loss": 2.0247, "step": 7147 }, { "epoch": 0.23062202693628495, "grad_norm": 0.462890625, "learning_rate": 2.7033448786622082e-05, "loss": 2.0385, "step": 7148 }, { "epoch": 0.2306542907900813, "grad_norm": 0.52734375, "learning_rate": 2.703250917728868e-05, "loss": 2.0014, "step": 7149 }, { "epoch": 0.23068655464387763, "grad_norm": 0.45703125, "learning_rate": 2.7031569435510053e-05, "loss": 1.9896, "step": 7150 }, { "epoch": 0.23071881849767398, "grad_norm": 0.423828125, "learning_rate": 2.703062956129654e-05, "loss": 1.982, "step": 7151 }, { "epoch": 0.23075108235147032, "grad_norm": 0.4609375, "learning_rate": 2.7029689554658488e-05, "loss": 2.0133, "step": 7152 }, { "epoch": 0.23078334620526667, "grad_norm": 0.40625, "learning_rate": 2.7028749415606248e-05, "loss": 1.9697, "step": 7153 }, { "epoch": 0.230815610059063, "grad_norm": 0.4375, "learning_rate": 2.7027809144150158e-05, "loss": 2.0164, "step": 7154 }, { "epoch": 0.23084787391285935, "grad_norm": 0.400390625, "learning_rate": 2.702686874030057e-05, "loss": 1.9933, "step": 7155 }, { "epoch": 0.23088013776665572, "grad_norm": 0.421875, "learning_rate": 2.7025928204067847e-05, "loss": 1.9956, "step": 7156 }, { "epoch": 0.23091240162045207, "grad_norm": 0.421875, "learning_rate": 2.7024987535462327e-05, "loss": 2.0291, "step": 7157 }, { "epoch": 0.2309446654742484, "grad_norm": 0.412109375, "learning_rate": 2.7024046734494378e-05, "loss": 2.0258, "step": 7158 }, { "epoch": 0.23097692932804476, "grad_norm": 0.39453125, "learning_rate": 2.7023105801174342e-05, "loss": 1.9993, "step": 7159 }, { "epoch": 0.2310091931818411, "grad_norm": 0.41796875, "learning_rate": 2.7022164735512588e-05, "loss": 2.0025, "step": 7160 }, { "epoch": 0.23104145703563744, "grad_norm": 0.392578125, "learning_rate": 2.7021223537519462e-05, "loss": 1.981, "step": 7161 }, { "epoch": 0.2310737208894338, "grad_norm": 0.458984375, "learning_rate": 2.702028220720534e-05, "loss": 2.0728, "step": 7162 }, { "epoch": 0.23110598474323013, "grad_norm": 0.439453125, "learning_rate": 2.701934074458057e-05, "loss": 2.0351, "step": 7163 }, { "epoch": 0.23113824859702647, "grad_norm": 0.4140625, "learning_rate": 2.701839914965552e-05, "loss": 2.0118, "step": 7164 }, { "epoch": 0.23117051245082282, "grad_norm": 0.404296875, "learning_rate": 2.7017457422440555e-05, "loss": 1.9937, "step": 7165 }, { "epoch": 0.23120277630461916, "grad_norm": 0.384765625, "learning_rate": 2.7016515562946037e-05, "loss": 1.9932, "step": 7166 }, { "epoch": 0.23123504015841553, "grad_norm": 0.416015625, "learning_rate": 2.701557357118234e-05, "loss": 2.0048, "step": 7167 }, { "epoch": 0.23126730401221188, "grad_norm": 0.416015625, "learning_rate": 2.7014631447159828e-05, "loss": 2.0147, "step": 7168 }, { "epoch": 0.23129956786600822, "grad_norm": 0.44921875, "learning_rate": 2.701368919088887e-05, "loss": 1.9661, "step": 7169 }, { "epoch": 0.23133183171980456, "grad_norm": 0.41796875, "learning_rate": 2.7012746802379846e-05, "loss": 1.9899, "step": 7170 }, { "epoch": 0.2313640955736009, "grad_norm": 0.4140625, "learning_rate": 2.7011804281643116e-05, "loss": 2.0137, "step": 7171 }, { "epoch": 0.23139635942739725, "grad_norm": 0.447265625, "learning_rate": 2.701086162868907e-05, "loss": 1.9854, "step": 7172 }, { "epoch": 0.2314286232811936, "grad_norm": 0.416015625, "learning_rate": 2.7009918843528067e-05, "loss": 1.9896, "step": 7173 }, { "epoch": 0.23146088713498994, "grad_norm": 0.423828125, "learning_rate": 2.7008975926170497e-05, "loss": 2.0075, "step": 7174 }, { "epoch": 0.23149315098878628, "grad_norm": 0.41796875, "learning_rate": 2.7008032876626734e-05, "loss": 2.0293, "step": 7175 }, { "epoch": 0.23152541484258263, "grad_norm": 0.390625, "learning_rate": 2.7007089694907162e-05, "loss": 1.9975, "step": 7176 }, { "epoch": 0.231557678696379, "grad_norm": 0.41796875, "learning_rate": 2.700614638102216e-05, "loss": 2.0574, "step": 7177 }, { "epoch": 0.23158994255017534, "grad_norm": 0.421875, "learning_rate": 2.700520293498211e-05, "loss": 2.0289, "step": 7178 }, { "epoch": 0.2316222064039717, "grad_norm": 0.43359375, "learning_rate": 2.70042593567974e-05, "loss": 2.0185, "step": 7179 }, { "epoch": 0.23165447025776803, "grad_norm": 0.447265625, "learning_rate": 2.7003315646478412e-05, "loss": 2.0105, "step": 7180 }, { "epoch": 0.23168673411156437, "grad_norm": 0.455078125, "learning_rate": 2.7002371804035544e-05, "loss": 2.0155, "step": 7181 }, { "epoch": 0.23171899796536072, "grad_norm": 0.4375, "learning_rate": 2.7001427829479168e-05, "loss": 1.9847, "step": 7182 }, { "epoch": 0.23175126181915706, "grad_norm": 0.423828125, "learning_rate": 2.7000483722819693e-05, "loss": 2.0259, "step": 7183 }, { "epoch": 0.2317835256729534, "grad_norm": 0.42578125, "learning_rate": 2.6999539484067496e-05, "loss": 2.0037, "step": 7184 }, { "epoch": 0.23181578952674975, "grad_norm": 0.470703125, "learning_rate": 2.699859511323298e-05, "loss": 1.9892, "step": 7185 }, { "epoch": 0.2318480533805461, "grad_norm": 0.439453125, "learning_rate": 2.699765061032653e-05, "loss": 2.0294, "step": 7186 }, { "epoch": 0.23188031723434246, "grad_norm": 0.435546875, "learning_rate": 2.6996705975358556e-05, "loss": 2.0557, "step": 7187 }, { "epoch": 0.2319125810881388, "grad_norm": 0.478515625, "learning_rate": 2.699576120833945e-05, "loss": 1.9993, "step": 7188 }, { "epoch": 0.23194484494193515, "grad_norm": 0.4609375, "learning_rate": 2.699481630927961e-05, "loss": 2.0161, "step": 7189 }, { "epoch": 0.2319771087957315, "grad_norm": 0.400390625, "learning_rate": 2.6993871278189435e-05, "loss": 2.0271, "step": 7190 }, { "epoch": 0.23200937264952784, "grad_norm": 0.53515625, "learning_rate": 2.699292611507933e-05, "loss": 2.0278, "step": 7191 }, { "epoch": 0.23204163650332418, "grad_norm": 0.4296875, "learning_rate": 2.6991980819959698e-05, "loss": 2.0009, "step": 7192 }, { "epoch": 0.23207390035712053, "grad_norm": 0.423828125, "learning_rate": 2.6991035392840944e-05, "loss": 2.0251, "step": 7193 }, { "epoch": 0.23210616421091687, "grad_norm": 0.451171875, "learning_rate": 2.699008983373347e-05, "loss": 1.9972, "step": 7194 }, { "epoch": 0.23213842806471321, "grad_norm": 0.447265625, "learning_rate": 2.6989144142647696e-05, "loss": 2.0245, "step": 7195 }, { "epoch": 0.23217069191850956, "grad_norm": 0.4140625, "learning_rate": 2.698819831959402e-05, "loss": 2.0338, "step": 7196 }, { "epoch": 0.23220295577230593, "grad_norm": 0.408203125, "learning_rate": 2.698725236458286e-05, "loss": 2.022, "step": 7197 }, { "epoch": 0.23223521962610227, "grad_norm": 0.404296875, "learning_rate": 2.6986306277624626e-05, "loss": 2.009, "step": 7198 }, { "epoch": 0.23226748347989862, "grad_norm": 0.40625, "learning_rate": 2.698536005872973e-05, "loss": 2.0347, "step": 7199 }, { "epoch": 0.23229974733369496, "grad_norm": 0.41015625, "learning_rate": 2.6984413707908586e-05, "loss": 2.0401, "step": 7200 }, { "epoch": 0.2323320111874913, "grad_norm": 0.388671875, "learning_rate": 2.698346722517162e-05, "loss": 2.0276, "step": 7201 }, { "epoch": 0.23236427504128765, "grad_norm": 0.447265625, "learning_rate": 2.6982520610529235e-05, "loss": 2.0336, "step": 7202 }, { "epoch": 0.232396538895084, "grad_norm": 0.4140625, "learning_rate": 2.6981573863991867e-05, "loss": 2.005, "step": 7203 }, { "epoch": 0.23242880274888034, "grad_norm": 0.384765625, "learning_rate": 2.6980626985569927e-05, "loss": 2.0347, "step": 7204 }, { "epoch": 0.23246106660267668, "grad_norm": 0.3828125, "learning_rate": 2.6979679975273838e-05, "loss": 2.029, "step": 7205 }, { "epoch": 0.23249333045647302, "grad_norm": 0.408203125, "learning_rate": 2.6978732833114025e-05, "loss": 1.9775, "step": 7206 }, { "epoch": 0.2325255943102694, "grad_norm": 0.373046875, "learning_rate": 2.697778555910092e-05, "loss": 1.9876, "step": 7207 }, { "epoch": 0.23255785816406574, "grad_norm": 0.41796875, "learning_rate": 2.6976838153244943e-05, "loss": 1.9894, "step": 7208 }, { "epoch": 0.23259012201786208, "grad_norm": 0.4140625, "learning_rate": 2.697589061555652e-05, "loss": 2.0011, "step": 7209 }, { "epoch": 0.23262238587165843, "grad_norm": 0.390625, "learning_rate": 2.6974942946046087e-05, "loss": 2.0152, "step": 7210 }, { "epoch": 0.23265464972545477, "grad_norm": 0.384765625, "learning_rate": 2.6973995144724072e-05, "loss": 2.0063, "step": 7211 }, { "epoch": 0.23268691357925111, "grad_norm": 0.404296875, "learning_rate": 2.6973047211600908e-05, "loss": 2.0327, "step": 7212 }, { "epoch": 0.23271917743304746, "grad_norm": 0.38671875, "learning_rate": 2.697209914668703e-05, "loss": 1.9992, "step": 7213 }, { "epoch": 0.2327514412868438, "grad_norm": 0.400390625, "learning_rate": 2.6971150949992875e-05, "loss": 1.9892, "step": 7214 }, { "epoch": 0.23278370514064015, "grad_norm": 0.388671875, "learning_rate": 2.6970202621528877e-05, "loss": 2.0388, "step": 7215 }, { "epoch": 0.2328159689944365, "grad_norm": 0.3671875, "learning_rate": 2.6969254161305475e-05, "loss": 2.0079, "step": 7216 }, { "epoch": 0.23284823284823286, "grad_norm": 0.390625, "learning_rate": 2.6968305569333115e-05, "loss": 1.9924, "step": 7217 }, { "epoch": 0.2328804967020292, "grad_norm": 0.458984375, "learning_rate": 2.696735684562223e-05, "loss": 2.0484, "step": 7218 }, { "epoch": 0.23291276055582555, "grad_norm": 0.3828125, "learning_rate": 2.6966407990183266e-05, "loss": 2.0051, "step": 7219 }, { "epoch": 0.2329450244096219, "grad_norm": 0.38671875, "learning_rate": 2.6965459003026665e-05, "loss": 2.0053, "step": 7220 }, { "epoch": 0.23297728826341824, "grad_norm": 0.37890625, "learning_rate": 2.696450988416288e-05, "loss": 1.9754, "step": 7221 }, { "epoch": 0.23300955211721458, "grad_norm": 0.404296875, "learning_rate": 2.696356063360235e-05, "loss": 2.0104, "step": 7222 }, { "epoch": 0.23304181597101092, "grad_norm": 0.380859375, "learning_rate": 2.6962611251355528e-05, "loss": 1.9706, "step": 7223 }, { "epoch": 0.23307407982480727, "grad_norm": 0.4296875, "learning_rate": 2.6961661737432867e-05, "loss": 2.0281, "step": 7224 }, { "epoch": 0.2331063436786036, "grad_norm": 0.4296875, "learning_rate": 2.696071209184481e-05, "loss": 1.9857, "step": 7225 }, { "epoch": 0.23313860753239996, "grad_norm": 0.498046875, "learning_rate": 2.695976231460181e-05, "loss": 2.0253, "step": 7226 }, { "epoch": 0.23317087138619633, "grad_norm": 0.44921875, "learning_rate": 2.6958812405714338e-05, "loss": 2.0127, "step": 7227 }, { "epoch": 0.23320313523999267, "grad_norm": 0.408203125, "learning_rate": 2.6957862365192832e-05, "loss": 1.9924, "step": 7228 }, { "epoch": 0.23323539909378901, "grad_norm": 0.44921875, "learning_rate": 2.6956912193047756e-05, "loss": 2.0355, "step": 7229 }, { "epoch": 0.23326766294758536, "grad_norm": 0.5859375, "learning_rate": 2.695596188928957e-05, "loss": 2.0247, "step": 7230 }, { "epoch": 0.2332999268013817, "grad_norm": 0.546875, "learning_rate": 2.6955011453928733e-05, "loss": 1.993, "step": 7231 }, { "epoch": 0.23333219065517805, "grad_norm": 0.3984375, "learning_rate": 2.6954060886975705e-05, "loss": 2.008, "step": 7232 }, { "epoch": 0.2333644545089744, "grad_norm": 0.53515625, "learning_rate": 2.695311018844095e-05, "loss": 2.062, "step": 7233 }, { "epoch": 0.23339671836277073, "grad_norm": 0.462890625, "learning_rate": 2.695215935833493e-05, "loss": 2.0102, "step": 7234 }, { "epoch": 0.23342898221656708, "grad_norm": 0.435546875, "learning_rate": 2.6951208396668123e-05, "loss": 2.0061, "step": 7235 }, { "epoch": 0.23346124607036342, "grad_norm": 0.46875, "learning_rate": 2.6950257303450984e-05, "loss": 2.0033, "step": 7236 }, { "epoch": 0.2334935099241598, "grad_norm": 0.42578125, "learning_rate": 2.694930607869398e-05, "loss": 2.0307, "step": 7237 }, { "epoch": 0.23352577377795614, "grad_norm": 0.45703125, "learning_rate": 2.6948354722407592e-05, "loss": 2.0123, "step": 7238 }, { "epoch": 0.23355803763175248, "grad_norm": 0.42578125, "learning_rate": 2.6947403234602286e-05, "loss": 1.9651, "step": 7239 }, { "epoch": 0.23359030148554882, "grad_norm": 0.4296875, "learning_rate": 2.6946451615288537e-05, "loss": 2.0037, "step": 7240 }, { "epoch": 0.23362256533934517, "grad_norm": 0.419921875, "learning_rate": 2.694549986447682e-05, "loss": 2.0039, "step": 7241 }, { "epoch": 0.2336548291931415, "grad_norm": 0.44921875, "learning_rate": 2.6944547982177607e-05, "loss": 2.0145, "step": 7242 }, { "epoch": 0.23368709304693785, "grad_norm": 0.392578125, "learning_rate": 2.6943595968401385e-05, "loss": 2.0206, "step": 7243 }, { "epoch": 0.2337193569007342, "grad_norm": 0.4296875, "learning_rate": 2.6942643823158622e-05, "loss": 1.9866, "step": 7244 }, { "epoch": 0.23375162075453054, "grad_norm": 0.390625, "learning_rate": 2.69416915464598e-05, "loss": 2.021, "step": 7245 }, { "epoch": 0.2337838846083269, "grad_norm": 0.40625, "learning_rate": 2.6940739138315413e-05, "loss": 2.0168, "step": 7246 }, { "epoch": 0.23381614846212326, "grad_norm": 0.435546875, "learning_rate": 2.693978659873593e-05, "loss": 2.03, "step": 7247 }, { "epoch": 0.2338484123159196, "grad_norm": 0.39453125, "learning_rate": 2.693883392773184e-05, "loss": 2.0356, "step": 7248 }, { "epoch": 0.23388067616971595, "grad_norm": 0.439453125, "learning_rate": 2.693788112531364e-05, "loss": 2.0348, "step": 7249 }, { "epoch": 0.2339129400235123, "grad_norm": 0.427734375, "learning_rate": 2.6936928191491797e-05, "loss": 2.0001, "step": 7250 }, { "epoch": 0.23394520387730863, "grad_norm": 0.39453125, "learning_rate": 2.6935975126276816e-05, "loss": 1.9932, "step": 7251 }, { "epoch": 0.23397746773110498, "grad_norm": 0.419921875, "learning_rate": 2.6935021929679188e-05, "loss": 2.0277, "step": 7252 }, { "epoch": 0.23400973158490132, "grad_norm": 0.416015625, "learning_rate": 2.69340686017094e-05, "loss": 2.0168, "step": 7253 }, { "epoch": 0.23404199543869766, "grad_norm": 0.43359375, "learning_rate": 2.693311514237794e-05, "loss": 2.0058, "step": 7254 }, { "epoch": 0.234074259292494, "grad_norm": 0.419921875, "learning_rate": 2.6932161551695313e-05, "loss": 2.0536, "step": 7255 }, { "epoch": 0.23410652314629035, "grad_norm": 0.439453125, "learning_rate": 2.6931207829672014e-05, "loss": 2.0499, "step": 7256 }, { "epoch": 0.2341387870000867, "grad_norm": 0.416015625, "learning_rate": 2.6930253976318534e-05, "loss": 2.0054, "step": 7257 }, { "epoch": 0.23417105085388307, "grad_norm": 0.416015625, "learning_rate": 2.6929299991645385e-05, "loss": 2.0217, "step": 7258 }, { "epoch": 0.2342033147076794, "grad_norm": 0.41796875, "learning_rate": 2.692834587566305e-05, "loss": 1.987, "step": 7259 }, { "epoch": 0.23423557856147575, "grad_norm": 0.44140625, "learning_rate": 2.692739162838205e-05, "loss": 2.0171, "step": 7260 }, { "epoch": 0.2342678424152721, "grad_norm": 0.408203125, "learning_rate": 2.6926437249812874e-05, "loss": 2.0011, "step": 7261 }, { "epoch": 0.23430010626906844, "grad_norm": 0.41015625, "learning_rate": 2.6925482739966034e-05, "loss": 1.9436, "step": 7262 }, { "epoch": 0.23433237012286479, "grad_norm": 0.423828125, "learning_rate": 2.6924528098852034e-05, "loss": 1.9356, "step": 7263 }, { "epoch": 0.23436463397666113, "grad_norm": 0.443359375, "learning_rate": 2.6923573326481382e-05, "loss": 2.0221, "step": 7264 }, { "epoch": 0.23439689783045747, "grad_norm": 0.43359375, "learning_rate": 2.6922618422864595e-05, "loss": 1.989, "step": 7265 }, { "epoch": 0.23442916168425382, "grad_norm": 0.404296875, "learning_rate": 2.6921663388012172e-05, "loss": 1.9831, "step": 7266 }, { "epoch": 0.23446142553805016, "grad_norm": 0.43359375, "learning_rate": 2.6920708221934636e-05, "loss": 2.0329, "step": 7267 }, { "epoch": 0.23449368939184653, "grad_norm": 0.427734375, "learning_rate": 2.691975292464249e-05, "loss": 2.0239, "step": 7268 }, { "epoch": 0.23452595324564288, "grad_norm": 0.400390625, "learning_rate": 2.6918797496146258e-05, "loss": 1.987, "step": 7269 }, { "epoch": 0.23455821709943922, "grad_norm": 0.41015625, "learning_rate": 2.6917841936456452e-05, "loss": 1.9323, "step": 7270 }, { "epoch": 0.23459048095323556, "grad_norm": 0.419921875, "learning_rate": 2.6916886245583594e-05, "loss": 2.018, "step": 7271 }, { "epoch": 0.2346227448070319, "grad_norm": 0.44140625, "learning_rate": 2.69159304235382e-05, "loss": 1.9878, "step": 7272 }, { "epoch": 0.23465500866082825, "grad_norm": 0.47265625, "learning_rate": 2.6914974470330797e-05, "loss": 2.0251, "step": 7273 }, { "epoch": 0.2346872725146246, "grad_norm": 0.45703125, "learning_rate": 2.6914018385971897e-05, "loss": 1.9894, "step": 7274 }, { "epoch": 0.23471953636842094, "grad_norm": 0.474609375, "learning_rate": 2.6913062170472032e-05, "loss": 2.0464, "step": 7275 }, { "epoch": 0.23475180022221728, "grad_norm": 0.443359375, "learning_rate": 2.6912105823841724e-05, "loss": 2.0072, "step": 7276 }, { "epoch": 0.23478406407601363, "grad_norm": 0.42578125, "learning_rate": 2.69111493460915e-05, "loss": 2.0298, "step": 7277 }, { "epoch": 0.23481632792981, "grad_norm": 0.41015625, "learning_rate": 2.691019273723189e-05, "loss": 1.961, "step": 7278 }, { "epoch": 0.23484859178360634, "grad_norm": 0.455078125, "learning_rate": 2.6909235997273426e-05, "loss": 1.9955, "step": 7279 }, { "epoch": 0.23488085563740269, "grad_norm": 0.41796875, "learning_rate": 2.690827912622663e-05, "loss": 2.0071, "step": 7280 }, { "epoch": 0.23491311949119903, "grad_norm": 0.419921875, "learning_rate": 2.6907322124102042e-05, "loss": 1.9903, "step": 7281 }, { "epoch": 0.23494538334499537, "grad_norm": 0.40625, "learning_rate": 2.6906364990910196e-05, "loss": 2.0226, "step": 7282 }, { "epoch": 0.23497764719879172, "grad_norm": 0.396484375, "learning_rate": 2.6905407726661624e-05, "loss": 2.0059, "step": 7283 }, { "epoch": 0.23500991105258806, "grad_norm": 0.43359375, "learning_rate": 2.6904450331366864e-05, "loss": 2.0312, "step": 7284 }, { "epoch": 0.2350421749063844, "grad_norm": 0.40625, "learning_rate": 2.690349280503646e-05, "loss": 2.0404, "step": 7285 }, { "epoch": 0.23507443876018075, "grad_norm": 0.39453125, "learning_rate": 2.6902535147680944e-05, "loss": 2.0189, "step": 7286 }, { "epoch": 0.2351067026139771, "grad_norm": 0.390625, "learning_rate": 2.6901577359310855e-05, "loss": 2.0216, "step": 7287 }, { "epoch": 0.23513896646777346, "grad_norm": 0.373046875, "learning_rate": 2.6900619439936746e-05, "loss": 2.0137, "step": 7288 }, { "epoch": 0.2351712303215698, "grad_norm": 0.412109375, "learning_rate": 2.6899661389569158e-05, "loss": 2.0301, "step": 7289 }, { "epoch": 0.23520349417536615, "grad_norm": 0.451171875, "learning_rate": 2.689870320821863e-05, "loss": 2.0299, "step": 7290 }, { "epoch": 0.2352357580291625, "grad_norm": 0.4140625, "learning_rate": 2.6897744895895715e-05, "loss": 2.0321, "step": 7291 }, { "epoch": 0.23526802188295884, "grad_norm": 0.4296875, "learning_rate": 2.689678645261096e-05, "loss": 2.0243, "step": 7292 }, { "epoch": 0.23530028573675518, "grad_norm": 0.396484375, "learning_rate": 2.689582787837491e-05, "loss": 1.9924, "step": 7293 }, { "epoch": 0.23533254959055153, "grad_norm": 0.388671875, "learning_rate": 2.6894869173198123e-05, "loss": 2.0143, "step": 7294 }, { "epoch": 0.23536481344434787, "grad_norm": 0.41015625, "learning_rate": 2.689391033709115e-05, "loss": 2.0259, "step": 7295 }, { "epoch": 0.23539707729814421, "grad_norm": 0.380859375, "learning_rate": 2.689295137006455e-05, "loss": 1.9993, "step": 7296 }, { "epoch": 0.23542934115194056, "grad_norm": 0.41796875, "learning_rate": 2.6891992272128868e-05, "loss": 2.0123, "step": 7297 }, { "epoch": 0.23546160500573693, "grad_norm": 0.431640625, "learning_rate": 2.6891033043294664e-05, "loss": 2.0125, "step": 7298 }, { "epoch": 0.23549386885953327, "grad_norm": 0.42578125, "learning_rate": 2.68900736835725e-05, "loss": 2.0311, "step": 7299 }, { "epoch": 0.23552613271332962, "grad_norm": 0.44140625, "learning_rate": 2.6889114192972938e-05, "loss": 2.0168, "step": 7300 }, { "epoch": 0.23555839656712596, "grad_norm": 0.384765625, "learning_rate": 2.688815457150653e-05, "loss": 2.014, "step": 7301 }, { "epoch": 0.2355906604209223, "grad_norm": 0.396484375, "learning_rate": 2.688719481918385e-05, "loss": 1.9953, "step": 7302 }, { "epoch": 0.23562292427471865, "grad_norm": 0.4921875, "learning_rate": 2.688623493601546e-05, "loss": 2.0205, "step": 7303 }, { "epoch": 0.235655188128515, "grad_norm": 0.4765625, "learning_rate": 2.688527492201192e-05, "loss": 2.0342, "step": 7304 }, { "epoch": 0.23568745198231134, "grad_norm": 0.42578125, "learning_rate": 2.68843147771838e-05, "loss": 2.0089, "step": 7305 }, { "epoch": 0.23571971583610768, "grad_norm": 0.36328125, "learning_rate": 2.6883354501541667e-05, "loss": 2.0203, "step": 7306 }, { "epoch": 0.23575197968990402, "grad_norm": 0.466796875, "learning_rate": 2.6882394095096093e-05, "loss": 2.0266, "step": 7307 }, { "epoch": 0.2357842435437004, "grad_norm": 0.462890625, "learning_rate": 2.6881433557857654e-05, "loss": 2.0208, "step": 7308 }, { "epoch": 0.23581650739749674, "grad_norm": 0.37109375, "learning_rate": 2.6880472889836915e-05, "loss": 2.0514, "step": 7309 }, { "epoch": 0.23584877125129308, "grad_norm": 0.46875, "learning_rate": 2.6879512091044453e-05, "loss": 2.0178, "step": 7310 }, { "epoch": 0.23588103510508943, "grad_norm": 0.427734375, "learning_rate": 2.6878551161490843e-05, "loss": 2.0203, "step": 7311 }, { "epoch": 0.23591329895888577, "grad_norm": 0.38671875, "learning_rate": 2.6877590101186662e-05, "loss": 1.9963, "step": 7312 }, { "epoch": 0.2359455628126821, "grad_norm": 0.498046875, "learning_rate": 2.6876628910142495e-05, "loss": 2.0377, "step": 7313 }, { "epoch": 0.23597782666647846, "grad_norm": 0.40234375, "learning_rate": 2.6875667588368913e-05, "loss": 1.9758, "step": 7314 }, { "epoch": 0.2360100905202748, "grad_norm": 0.466796875, "learning_rate": 2.6874706135876504e-05, "loss": 2.0321, "step": 7315 }, { "epoch": 0.23604235437407114, "grad_norm": 0.44921875, "learning_rate": 2.687374455267585e-05, "loss": 2.04, "step": 7316 }, { "epoch": 0.2360746182278675, "grad_norm": 0.40625, "learning_rate": 2.6872782838777536e-05, "loss": 2.0137, "step": 7317 }, { "epoch": 0.23610688208166386, "grad_norm": 0.400390625, "learning_rate": 2.687182099419214e-05, "loss": 1.9694, "step": 7318 }, { "epoch": 0.2361391459354602, "grad_norm": 0.42578125, "learning_rate": 2.6870859018930256e-05, "loss": 2.0264, "step": 7319 }, { "epoch": 0.23617140978925655, "grad_norm": 0.39453125, "learning_rate": 2.6869896913002473e-05, "loss": 2.02, "step": 7320 }, { "epoch": 0.2362036736430529, "grad_norm": 0.416015625, "learning_rate": 2.6868934676419382e-05, "loss": 2.0404, "step": 7321 }, { "epoch": 0.23623593749684924, "grad_norm": 0.396484375, "learning_rate": 2.6867972309191574e-05, "loss": 2.0252, "step": 7322 }, { "epoch": 0.23626820135064558, "grad_norm": 0.42578125, "learning_rate": 2.6867009811329634e-05, "loss": 2.014, "step": 7323 }, { "epoch": 0.23630046520444192, "grad_norm": 0.42578125, "learning_rate": 2.6866047182844173e-05, "loss": 2.0298, "step": 7324 }, { "epoch": 0.23633272905823827, "grad_norm": 0.37109375, "learning_rate": 2.686508442374577e-05, "loss": 2.0393, "step": 7325 }, { "epoch": 0.2363649929120346, "grad_norm": 0.388671875, "learning_rate": 2.6864121534045034e-05, "loss": 2.0108, "step": 7326 }, { "epoch": 0.23639725676583095, "grad_norm": 0.36328125, "learning_rate": 2.6863158513752556e-05, "loss": 2.0068, "step": 7327 }, { "epoch": 0.23642952061962733, "grad_norm": 0.361328125, "learning_rate": 2.686219536287894e-05, "loss": 2.0579, "step": 7328 }, { "epoch": 0.23646178447342367, "grad_norm": 0.373046875, "learning_rate": 2.686123208143479e-05, "loss": 1.9933, "step": 7329 }, { "epoch": 0.23649404832722, "grad_norm": 0.369140625, "learning_rate": 2.6860268669430708e-05, "loss": 2.0089, "step": 7330 }, { "epoch": 0.23652631218101636, "grad_norm": 0.3984375, "learning_rate": 2.685930512687729e-05, "loss": 1.9723, "step": 7331 }, { "epoch": 0.2365585760348127, "grad_norm": 0.369140625, "learning_rate": 2.6858341453785154e-05, "loss": 2.0071, "step": 7332 }, { "epoch": 0.23659083988860904, "grad_norm": 0.373046875, "learning_rate": 2.68573776501649e-05, "loss": 2.0323, "step": 7333 }, { "epoch": 0.2366231037424054, "grad_norm": 0.36328125, "learning_rate": 2.6856413716027143e-05, "loss": 2.0064, "step": 7334 }, { "epoch": 0.23665536759620173, "grad_norm": 0.37109375, "learning_rate": 2.685544965138249e-05, "loss": 2.0253, "step": 7335 }, { "epoch": 0.23668763144999808, "grad_norm": 0.380859375, "learning_rate": 2.6854485456241545e-05, "loss": 2.0326, "step": 7336 }, { "epoch": 0.23671989530379442, "grad_norm": 0.373046875, "learning_rate": 2.6853521130614934e-05, "loss": 2.0016, "step": 7337 }, { "epoch": 0.23675215915759076, "grad_norm": 0.404296875, "learning_rate": 2.6852556674513265e-05, "loss": 2.0371, "step": 7338 }, { "epoch": 0.23678442301138714, "grad_norm": 0.4140625, "learning_rate": 2.6851592087947157e-05, "loss": 2.0338, "step": 7339 }, { "epoch": 0.23681668686518348, "grad_norm": 0.412109375, "learning_rate": 2.6850627370927224e-05, "loss": 2.0019, "step": 7340 }, { "epoch": 0.23684895071897982, "grad_norm": 0.35546875, "learning_rate": 2.6849662523464085e-05, "loss": 2.0163, "step": 7341 }, { "epoch": 0.23688121457277617, "grad_norm": 0.3984375, "learning_rate": 2.6848697545568362e-05, "loss": 2.0528, "step": 7342 }, { "epoch": 0.2369134784265725, "grad_norm": 0.419921875, "learning_rate": 2.6847732437250677e-05, "loss": 2.0279, "step": 7343 }, { "epoch": 0.23694574228036885, "grad_norm": 0.39453125, "learning_rate": 2.6846767198521658e-05, "loss": 2.0034, "step": 7344 }, { "epoch": 0.2369780061341652, "grad_norm": 0.408203125, "learning_rate": 2.684580182939192e-05, "loss": 2.0001, "step": 7345 }, { "epoch": 0.23701026998796154, "grad_norm": 0.44140625, "learning_rate": 2.684483632987209e-05, "loss": 2.0013, "step": 7346 }, { "epoch": 0.23704253384175789, "grad_norm": 0.474609375, "learning_rate": 2.6843870699972805e-05, "loss": 2.0053, "step": 7347 }, { "epoch": 0.23707479769555423, "grad_norm": 0.392578125, "learning_rate": 2.6842904939704685e-05, "loss": 2.0113, "step": 7348 }, { "epoch": 0.2371070615493506, "grad_norm": 0.419921875, "learning_rate": 2.6841939049078364e-05, "loss": 2.0323, "step": 7349 }, { "epoch": 0.23713932540314694, "grad_norm": 0.40234375, "learning_rate": 2.6840973028104474e-05, "loss": 2.0075, "step": 7350 }, { "epoch": 0.2371715892569433, "grad_norm": 0.45703125, "learning_rate": 2.684000687679365e-05, "loss": 2.0089, "step": 7351 }, { "epoch": 0.23720385311073963, "grad_norm": 0.43359375, "learning_rate": 2.6839040595156517e-05, "loss": 2.0001, "step": 7352 }, { "epoch": 0.23723611696453598, "grad_norm": 0.388671875, "learning_rate": 2.6838074183203728e-05, "loss": 2.0073, "step": 7353 }, { "epoch": 0.23726838081833232, "grad_norm": 0.421875, "learning_rate": 2.6837107640945904e-05, "loss": 2.026, "step": 7354 }, { "epoch": 0.23730064467212866, "grad_norm": 0.41796875, "learning_rate": 2.6836140968393695e-05, "loss": 2.0269, "step": 7355 }, { "epoch": 0.237332908525925, "grad_norm": 0.390625, "learning_rate": 2.6835174165557732e-05, "loss": 2.0413, "step": 7356 }, { "epoch": 0.23736517237972135, "grad_norm": 0.400390625, "learning_rate": 2.683420723244867e-05, "loss": 2.0135, "step": 7357 }, { "epoch": 0.2373974362335177, "grad_norm": 0.39453125, "learning_rate": 2.683324016907714e-05, "loss": 1.9916, "step": 7358 }, { "epoch": 0.23742970008731407, "grad_norm": 0.451171875, "learning_rate": 2.683227297545379e-05, "loss": 2.0198, "step": 7359 }, { "epoch": 0.2374619639411104, "grad_norm": 0.369140625, "learning_rate": 2.683130565158927e-05, "loss": 1.991, "step": 7360 }, { "epoch": 0.23749422779490675, "grad_norm": 0.421875, "learning_rate": 2.6830338197494226e-05, "loss": 1.976, "step": 7361 }, { "epoch": 0.2375264916487031, "grad_norm": 0.39453125, "learning_rate": 2.6829370613179303e-05, "loss": 1.9729, "step": 7362 }, { "epoch": 0.23755875550249944, "grad_norm": 0.427734375, "learning_rate": 2.6828402898655154e-05, "loss": 2.0249, "step": 7363 }, { "epoch": 0.23759101935629579, "grad_norm": 0.41015625, "learning_rate": 2.6827435053932437e-05, "loss": 1.9982, "step": 7364 }, { "epoch": 0.23762328321009213, "grad_norm": 0.36328125, "learning_rate": 2.68264670790218e-05, "loss": 1.9956, "step": 7365 }, { "epoch": 0.23765554706388847, "grad_norm": 0.384765625, "learning_rate": 2.682549897393389e-05, "loss": 2.0179, "step": 7366 }, { "epoch": 0.23768781091768482, "grad_norm": 0.357421875, "learning_rate": 2.6824530738679377e-05, "loss": 2.0041, "step": 7367 }, { "epoch": 0.23772007477148116, "grad_norm": 0.396484375, "learning_rate": 2.6823562373268912e-05, "loss": 2.0151, "step": 7368 }, { "epoch": 0.23775233862527753, "grad_norm": 0.380859375, "learning_rate": 2.6822593877713152e-05, "loss": 2.0072, "step": 7369 }, { "epoch": 0.23778460247907388, "grad_norm": 0.365234375, "learning_rate": 2.682162525202276e-05, "loss": 2.0001, "step": 7370 }, { "epoch": 0.23781686633287022, "grad_norm": 0.369140625, "learning_rate": 2.68206564962084e-05, "loss": 1.9711, "step": 7371 }, { "epoch": 0.23784913018666656, "grad_norm": 0.40234375, "learning_rate": 2.681968761028073e-05, "loss": 2.021, "step": 7372 }, { "epoch": 0.2378813940404629, "grad_norm": 0.41015625, "learning_rate": 2.6818718594250422e-05, "loss": 1.9967, "step": 7373 }, { "epoch": 0.23791365789425925, "grad_norm": 0.427734375, "learning_rate": 2.6817749448128137e-05, "loss": 2.005, "step": 7374 }, { "epoch": 0.2379459217480556, "grad_norm": 0.392578125, "learning_rate": 2.681678017192455e-05, "loss": 2.0036, "step": 7375 }, { "epoch": 0.23797818560185194, "grad_norm": 0.419921875, "learning_rate": 2.6815810765650315e-05, "loss": 2.0643, "step": 7376 }, { "epoch": 0.23801044945564828, "grad_norm": 0.41796875, "learning_rate": 2.6814841229316113e-05, "loss": 2.0477, "step": 7377 }, { "epoch": 0.23804271330944463, "grad_norm": 0.5, "learning_rate": 2.681387156293262e-05, "loss": 2.0243, "step": 7378 }, { "epoch": 0.238074977163241, "grad_norm": 0.5, "learning_rate": 2.6812901766510504e-05, "loss": 2.0129, "step": 7379 }, { "epoch": 0.23810724101703734, "grad_norm": 0.416015625, "learning_rate": 2.6811931840060434e-05, "loss": 2.0256, "step": 7380 }, { "epoch": 0.23813950487083368, "grad_norm": 0.42578125, "learning_rate": 2.6810961783593095e-05, "loss": 2.0298, "step": 7381 }, { "epoch": 0.23817176872463003, "grad_norm": 0.466796875, "learning_rate": 2.680999159711916e-05, "loss": 2.0291, "step": 7382 }, { "epoch": 0.23820403257842637, "grad_norm": 0.400390625, "learning_rate": 2.6809021280649315e-05, "loss": 1.9868, "step": 7383 }, { "epoch": 0.23823629643222272, "grad_norm": 0.416015625, "learning_rate": 2.6808050834194233e-05, "loss": 2.019, "step": 7384 }, { "epoch": 0.23826856028601906, "grad_norm": 0.5078125, "learning_rate": 2.6807080257764596e-05, "loss": 2.0103, "step": 7385 }, { "epoch": 0.2383008241398154, "grad_norm": 0.419921875, "learning_rate": 2.6806109551371095e-05, "loss": 2.0093, "step": 7386 }, { "epoch": 0.23833308799361175, "grad_norm": 0.47265625, "learning_rate": 2.6805138715024407e-05, "loss": 2.0336, "step": 7387 }, { "epoch": 0.2383653518474081, "grad_norm": 0.44140625, "learning_rate": 2.6804167748735217e-05, "loss": 1.9979, "step": 7388 }, { "epoch": 0.23839761570120446, "grad_norm": 0.44921875, "learning_rate": 2.6803196652514223e-05, "loss": 1.974, "step": 7389 }, { "epoch": 0.2384298795550008, "grad_norm": 0.435546875, "learning_rate": 2.6802225426372104e-05, "loss": 1.9908, "step": 7390 }, { "epoch": 0.23846214340879715, "grad_norm": 0.474609375, "learning_rate": 2.6801254070319555e-05, "loss": 1.9697, "step": 7391 }, { "epoch": 0.2384944072625935, "grad_norm": 0.375, "learning_rate": 2.6800282584367267e-05, "loss": 1.975, "step": 7392 }, { "epoch": 0.23852667111638984, "grad_norm": 0.45703125, "learning_rate": 2.6799310968525936e-05, "loss": 2.0138, "step": 7393 }, { "epoch": 0.23855893497018618, "grad_norm": 0.455078125, "learning_rate": 2.679833922280625e-05, "loss": 2.0098, "step": 7394 }, { "epoch": 0.23859119882398253, "grad_norm": 0.384765625, "learning_rate": 2.679736734721892e-05, "loss": 2.0074, "step": 7395 }, { "epoch": 0.23862346267777887, "grad_norm": 0.392578125, "learning_rate": 2.6796395341774626e-05, "loss": 1.9766, "step": 7396 }, { "epoch": 0.2386557265315752, "grad_norm": 0.404296875, "learning_rate": 2.679542320648407e-05, "loss": 1.9703, "step": 7397 }, { "epoch": 0.23868799038537156, "grad_norm": 0.38671875, "learning_rate": 2.679445094135797e-05, "loss": 2.0143, "step": 7398 }, { "epoch": 0.23872025423916793, "grad_norm": 0.41015625, "learning_rate": 2.6793478546407005e-05, "loss": 2.0007, "step": 7399 }, { "epoch": 0.23875251809296427, "grad_norm": 0.453125, "learning_rate": 2.6792506021641893e-05, "loss": 2.0025, "step": 7400 }, { "epoch": 0.23878478194676062, "grad_norm": 0.400390625, "learning_rate": 2.679153336707333e-05, "loss": 2.0085, "step": 7401 }, { "epoch": 0.23881704580055696, "grad_norm": 0.384765625, "learning_rate": 2.679056058271203e-05, "loss": 2.0105, "step": 7402 }, { "epoch": 0.2388493096543533, "grad_norm": 0.44921875, "learning_rate": 2.6789587668568702e-05, "loss": 2.0226, "step": 7403 }, { "epoch": 0.23888157350814965, "grad_norm": 0.388671875, "learning_rate": 2.6788614624654047e-05, "loss": 1.9897, "step": 7404 }, { "epoch": 0.238913837361946, "grad_norm": 0.392578125, "learning_rate": 2.6787641450978782e-05, "loss": 2.0067, "step": 7405 }, { "epoch": 0.23894610121574233, "grad_norm": 0.388671875, "learning_rate": 2.6786668147553612e-05, "loss": 2.0286, "step": 7406 }, { "epoch": 0.23897836506953868, "grad_norm": 0.380859375, "learning_rate": 2.678569471438926e-05, "loss": 2.0378, "step": 7407 }, { "epoch": 0.23901062892333502, "grad_norm": 0.40234375, "learning_rate": 2.678472115149643e-05, "loss": 1.9982, "step": 7408 }, { "epoch": 0.2390428927771314, "grad_norm": 0.41015625, "learning_rate": 2.678374745888584e-05, "loss": 2.0184, "step": 7409 }, { "epoch": 0.23907515663092774, "grad_norm": 0.396484375, "learning_rate": 2.6782773636568225e-05, "loss": 1.9811, "step": 7410 }, { "epoch": 0.23910742048472408, "grad_norm": 0.41015625, "learning_rate": 2.6781799684554276e-05, "loss": 2.0124, "step": 7411 }, { "epoch": 0.23913968433852043, "grad_norm": 0.419921875, "learning_rate": 2.678082560285474e-05, "loss": 2.0299, "step": 7412 }, { "epoch": 0.23917194819231677, "grad_norm": 0.375, "learning_rate": 2.6779851391480322e-05, "loss": 2.015, "step": 7413 }, { "epoch": 0.2392042120461131, "grad_norm": 0.427734375, "learning_rate": 2.677887705044175e-05, "loss": 1.9986, "step": 7414 }, { "epoch": 0.23923647589990946, "grad_norm": 0.408203125, "learning_rate": 2.677790257974975e-05, "loss": 2.0363, "step": 7415 }, { "epoch": 0.2392687397537058, "grad_norm": 0.462890625, "learning_rate": 2.6776927979415054e-05, "loss": 1.9581, "step": 7416 }, { "epoch": 0.23930100360750214, "grad_norm": 0.39453125, "learning_rate": 2.6775953249448374e-05, "loss": 2.0071, "step": 7417 }, { "epoch": 0.2393332674612985, "grad_norm": 0.3984375, "learning_rate": 2.6774978389860456e-05, "loss": 1.9964, "step": 7418 }, { "epoch": 0.23936553131509486, "grad_norm": 0.388671875, "learning_rate": 2.677400340066202e-05, "loss": 2.0201, "step": 7419 }, { "epoch": 0.2393977951688912, "grad_norm": 0.400390625, "learning_rate": 2.6773028281863802e-05, "loss": 1.9491, "step": 7420 }, { "epoch": 0.23943005902268755, "grad_norm": 0.3984375, "learning_rate": 2.6772053033476533e-05, "loss": 1.976, "step": 7421 }, { "epoch": 0.2394623228764839, "grad_norm": 0.388671875, "learning_rate": 2.677107765551095e-05, "loss": 2.0117, "step": 7422 }, { "epoch": 0.23949458673028023, "grad_norm": 0.388671875, "learning_rate": 2.6770102147977793e-05, "loss": 2.0315, "step": 7423 }, { "epoch": 0.23952685058407658, "grad_norm": 0.423828125, "learning_rate": 2.676912651088779e-05, "loss": 1.9893, "step": 7424 }, { "epoch": 0.23955911443787292, "grad_norm": 0.4296875, "learning_rate": 2.6768150744251685e-05, "loss": 1.9858, "step": 7425 }, { "epoch": 0.23959137829166927, "grad_norm": 0.408203125, "learning_rate": 2.6767174848080225e-05, "loss": 2.0135, "step": 7426 }, { "epoch": 0.2396236421454656, "grad_norm": 0.41796875, "learning_rate": 2.6766198822384142e-05, "loss": 2.0252, "step": 7427 }, { "epoch": 0.23965590599926195, "grad_norm": 0.39453125, "learning_rate": 2.6765222667174184e-05, "loss": 2.0276, "step": 7428 }, { "epoch": 0.2396881698530583, "grad_norm": 0.408203125, "learning_rate": 2.6764246382461094e-05, "loss": 2.0094, "step": 7429 }, { "epoch": 0.23972043370685467, "grad_norm": 0.44921875, "learning_rate": 2.676326996825562e-05, "loss": 2.0144, "step": 7430 }, { "epoch": 0.239752697560651, "grad_norm": 0.4609375, "learning_rate": 2.6762293424568506e-05, "loss": 2.0034, "step": 7431 }, { "epoch": 0.23978496141444736, "grad_norm": 0.41796875, "learning_rate": 2.676131675141051e-05, "loss": 1.982, "step": 7432 }, { "epoch": 0.2398172252682437, "grad_norm": 0.40625, "learning_rate": 2.6760339948792374e-05, "loss": 1.9573, "step": 7433 }, { "epoch": 0.23984948912204004, "grad_norm": 0.392578125, "learning_rate": 2.6759363016724854e-05, "loss": 2.0067, "step": 7434 }, { "epoch": 0.2398817529758364, "grad_norm": 0.373046875, "learning_rate": 2.6758385955218703e-05, "loss": 2.0294, "step": 7435 }, { "epoch": 0.23991401682963273, "grad_norm": 0.40234375, "learning_rate": 2.6757408764284673e-05, "loss": 2.0206, "step": 7436 }, { "epoch": 0.23994628068342908, "grad_norm": 0.40625, "learning_rate": 2.6756431443933517e-05, "loss": 2.0082, "step": 7437 }, { "epoch": 0.23997854453722542, "grad_norm": 0.427734375, "learning_rate": 2.6755453994176004e-05, "loss": 2.0557, "step": 7438 }, { "epoch": 0.24001080839102176, "grad_norm": 0.421875, "learning_rate": 2.675447641502289e-05, "loss": 2.0037, "step": 7439 }, { "epoch": 0.24004307224481813, "grad_norm": 0.361328125, "learning_rate": 2.6753498706484928e-05, "loss": 1.9703, "step": 7440 }, { "epoch": 0.24007533609861448, "grad_norm": 0.427734375, "learning_rate": 2.6752520868572883e-05, "loss": 2.0301, "step": 7441 }, { "epoch": 0.24010759995241082, "grad_norm": 0.384765625, "learning_rate": 2.6751542901297518e-05, "loss": 2.0358, "step": 7442 }, { "epoch": 0.24013986380620717, "grad_norm": 0.37890625, "learning_rate": 2.6750564804669603e-05, "loss": 1.9679, "step": 7443 }, { "epoch": 0.2401721276600035, "grad_norm": 0.37109375, "learning_rate": 2.67495865786999e-05, "loss": 1.9747, "step": 7444 }, { "epoch": 0.24020439151379985, "grad_norm": 0.3828125, "learning_rate": 2.6748608223399175e-05, "loss": 2.0004, "step": 7445 }, { "epoch": 0.2402366553675962, "grad_norm": 0.412109375, "learning_rate": 2.67476297387782e-05, "loss": 1.9814, "step": 7446 }, { "epoch": 0.24026891922139254, "grad_norm": 0.375, "learning_rate": 2.6746651124847746e-05, "loss": 2.0276, "step": 7447 }, { "epoch": 0.24030118307518888, "grad_norm": 0.42578125, "learning_rate": 2.6745672381618582e-05, "loss": 2.0279, "step": 7448 }, { "epoch": 0.24033344692898523, "grad_norm": 0.423828125, "learning_rate": 2.674469350910148e-05, "loss": 2.0028, "step": 7449 }, { "epoch": 0.2403657107827816, "grad_norm": 0.423828125, "learning_rate": 2.6743714507307224e-05, "loss": 2.0144, "step": 7450 }, { "epoch": 0.24039797463657794, "grad_norm": 0.5390625, "learning_rate": 2.6742735376246577e-05, "loss": 2.0369, "step": 7451 }, { "epoch": 0.2404302384903743, "grad_norm": 0.5546875, "learning_rate": 2.6741756115930328e-05, "loss": 2.036, "step": 7452 }, { "epoch": 0.24046250234417063, "grad_norm": 0.43359375, "learning_rate": 2.674077672636925e-05, "loss": 1.9934, "step": 7453 }, { "epoch": 0.24049476619796697, "grad_norm": 0.4375, "learning_rate": 2.673979720757412e-05, "loss": 2.0181, "step": 7454 }, { "epoch": 0.24052703005176332, "grad_norm": 0.4453125, "learning_rate": 2.673881755955573e-05, "loss": 2.0324, "step": 7455 }, { "epoch": 0.24055929390555966, "grad_norm": 0.431640625, "learning_rate": 2.673783778232486e-05, "loss": 1.9907, "step": 7456 }, { "epoch": 0.240591557759356, "grad_norm": 0.388671875, "learning_rate": 2.6736857875892285e-05, "loss": 2.0006, "step": 7457 }, { "epoch": 0.24062382161315235, "grad_norm": 0.408203125, "learning_rate": 2.6735877840268803e-05, "loss": 2.0173, "step": 7458 }, { "epoch": 0.2406560854669487, "grad_norm": 0.375, "learning_rate": 2.6734897675465196e-05, "loss": 1.9613, "step": 7459 }, { "epoch": 0.24068834932074507, "grad_norm": 0.388671875, "learning_rate": 2.6733917381492257e-05, "loss": 2.0335, "step": 7460 }, { "epoch": 0.2407206131745414, "grad_norm": 0.5703125, "learning_rate": 2.6732936958360766e-05, "loss": 2.0487, "step": 7461 }, { "epoch": 0.24075287702833775, "grad_norm": 0.388671875, "learning_rate": 2.6731956406081527e-05, "loss": 2.0022, "step": 7462 }, { "epoch": 0.2407851408821341, "grad_norm": 0.3671875, "learning_rate": 2.673097572466533e-05, "loss": 2.0192, "step": 7463 }, { "epoch": 0.24081740473593044, "grad_norm": 0.3828125, "learning_rate": 2.6729994914122964e-05, "loss": 2.0178, "step": 7464 }, { "epoch": 0.24084966858972678, "grad_norm": 0.39453125, "learning_rate": 2.6729013974465232e-05, "loss": 2.0248, "step": 7465 }, { "epoch": 0.24088193244352313, "grad_norm": 0.38671875, "learning_rate": 2.6728032905702925e-05, "loss": 2.0069, "step": 7466 }, { "epoch": 0.24091419629731947, "grad_norm": 0.416015625, "learning_rate": 2.6727051707846853e-05, "loss": 1.9975, "step": 7467 }, { "epoch": 0.24094646015111582, "grad_norm": 0.38671875, "learning_rate": 2.67260703809078e-05, "loss": 2.0107, "step": 7468 }, { "epoch": 0.24097872400491216, "grad_norm": 0.39453125, "learning_rate": 2.6725088924896583e-05, "loss": 2.0044, "step": 7469 }, { "epoch": 0.24101098785870853, "grad_norm": 0.3984375, "learning_rate": 2.6724107339823994e-05, "loss": 2.0263, "step": 7470 }, { "epoch": 0.24104325171250487, "grad_norm": 0.39453125, "learning_rate": 2.6723125625700843e-05, "loss": 1.9856, "step": 7471 }, { "epoch": 0.24107551556630122, "grad_norm": 0.4140625, "learning_rate": 2.6722143782537938e-05, "loss": 2.0079, "step": 7472 }, { "epoch": 0.24110777942009756, "grad_norm": 0.376953125, "learning_rate": 2.6721161810346078e-05, "loss": 2.0093, "step": 7473 }, { "epoch": 0.2411400432738939, "grad_norm": 0.40625, "learning_rate": 2.6720179709136083e-05, "loss": 1.9995, "step": 7474 }, { "epoch": 0.24117230712769025, "grad_norm": 0.40234375, "learning_rate": 2.6719197478918756e-05, "loss": 2.0046, "step": 7475 }, { "epoch": 0.2412045709814866, "grad_norm": 0.390625, "learning_rate": 2.671821511970491e-05, "loss": 1.9502, "step": 7476 }, { "epoch": 0.24123683483528294, "grad_norm": 0.408203125, "learning_rate": 2.6717232631505363e-05, "loss": 1.9917, "step": 7477 }, { "epoch": 0.24126909868907928, "grad_norm": 0.45703125, "learning_rate": 2.671625001433092e-05, "loss": 1.9787, "step": 7478 }, { "epoch": 0.24130136254287562, "grad_norm": 0.376953125, "learning_rate": 2.67152672681924e-05, "loss": 2.008, "step": 7479 }, { "epoch": 0.241333626396672, "grad_norm": 0.46875, "learning_rate": 2.6714284393100627e-05, "loss": 1.9734, "step": 7480 }, { "epoch": 0.24136589025046834, "grad_norm": 0.462890625, "learning_rate": 2.6713301389066414e-05, "loss": 1.9974, "step": 7481 }, { "epoch": 0.24139815410426468, "grad_norm": 0.408203125, "learning_rate": 2.6712318256100582e-05, "loss": 2.0006, "step": 7482 }, { "epoch": 0.24143041795806103, "grad_norm": 0.46484375, "learning_rate": 2.6711334994213954e-05, "loss": 2.024, "step": 7483 }, { "epoch": 0.24146268181185737, "grad_norm": 0.41796875, "learning_rate": 2.671035160341735e-05, "loss": 1.9942, "step": 7484 }, { "epoch": 0.24149494566565372, "grad_norm": 0.47265625, "learning_rate": 2.67093680837216e-05, "loss": 2.0195, "step": 7485 }, { "epoch": 0.24152720951945006, "grad_norm": 0.60546875, "learning_rate": 2.6708384435137525e-05, "loss": 2.0423, "step": 7486 }, { "epoch": 0.2415594733732464, "grad_norm": 0.70703125, "learning_rate": 2.6707400657675955e-05, "loss": 2.0153, "step": 7487 }, { "epoch": 0.24159173722704275, "grad_norm": 0.5703125, "learning_rate": 2.6706416751347714e-05, "loss": 2.0012, "step": 7488 }, { "epoch": 0.2416240010808391, "grad_norm": 0.435546875, "learning_rate": 2.6705432716163637e-05, "loss": 1.9901, "step": 7489 }, { "epoch": 0.24165626493463546, "grad_norm": 0.66796875, "learning_rate": 2.6704448552134553e-05, "loss": 1.9975, "step": 7490 }, { "epoch": 0.2416885287884318, "grad_norm": 0.45703125, "learning_rate": 2.6703464259271297e-05, "loss": 2.0029, "step": 7491 }, { "epoch": 0.24172079264222815, "grad_norm": 0.52734375, "learning_rate": 2.6702479837584704e-05, "loss": 1.976, "step": 7492 }, { "epoch": 0.2417530564960245, "grad_norm": 0.46875, "learning_rate": 2.6701495287085607e-05, "loss": 2.0184, "step": 7493 }, { "epoch": 0.24178532034982084, "grad_norm": 0.470703125, "learning_rate": 2.6700510607784846e-05, "loss": 2.0171, "step": 7494 }, { "epoch": 0.24181758420361718, "grad_norm": 0.466796875, "learning_rate": 2.6699525799693254e-05, "loss": 1.9924, "step": 7495 }, { "epoch": 0.24184984805741352, "grad_norm": 0.4296875, "learning_rate": 2.6698540862821677e-05, "loss": 2.0125, "step": 7496 }, { "epoch": 0.24188211191120987, "grad_norm": 0.478515625, "learning_rate": 2.6697555797180956e-05, "loss": 2.0378, "step": 7497 }, { "epoch": 0.2419143757650062, "grad_norm": 0.396484375, "learning_rate": 2.6696570602781933e-05, "loss": 2.007, "step": 7498 }, { "epoch": 0.24194663961880256, "grad_norm": 0.474609375, "learning_rate": 2.6695585279635447e-05, "loss": 2.0358, "step": 7499 }, { "epoch": 0.24197890347259893, "grad_norm": 0.4296875, "learning_rate": 2.6694599827752357e-05, "loss": 2.0173, "step": 7500 }, { "epoch": 0.24201116732639527, "grad_norm": 0.474609375, "learning_rate": 2.6693614247143495e-05, "loss": 1.9902, "step": 7501 }, { "epoch": 0.24204343118019161, "grad_norm": 0.392578125, "learning_rate": 2.669262853781972e-05, "loss": 2.014, "step": 7502 }, { "epoch": 0.24207569503398796, "grad_norm": 0.490234375, "learning_rate": 2.6691642699791877e-05, "loss": 2.0239, "step": 7503 }, { "epoch": 0.2421079588877843, "grad_norm": 0.4375, "learning_rate": 2.669065673307082e-05, "loss": 2.0074, "step": 7504 }, { "epoch": 0.24214022274158065, "grad_norm": 0.44921875, "learning_rate": 2.66896706376674e-05, "loss": 1.9967, "step": 7505 }, { "epoch": 0.242172486595377, "grad_norm": 0.474609375, "learning_rate": 2.668868441359247e-05, "loss": 1.9763, "step": 7506 }, { "epoch": 0.24220475044917333, "grad_norm": 0.423828125, "learning_rate": 2.6687698060856888e-05, "loss": 2.0162, "step": 7507 }, { "epoch": 0.24223701430296968, "grad_norm": 0.470703125, "learning_rate": 2.6686711579471512e-05, "loss": 2.0403, "step": 7508 }, { "epoch": 0.24226927815676602, "grad_norm": 0.41015625, "learning_rate": 2.6685724969447192e-05, "loss": 2.0086, "step": 7509 }, { "epoch": 0.2423015420105624, "grad_norm": 0.486328125, "learning_rate": 2.6684738230794804e-05, "loss": 1.9832, "step": 7510 }, { "epoch": 0.24233380586435874, "grad_norm": 0.44921875, "learning_rate": 2.66837513635252e-05, "loss": 2.0017, "step": 7511 }, { "epoch": 0.24236606971815508, "grad_norm": 0.43359375, "learning_rate": 2.6682764367649236e-05, "loss": 1.9867, "step": 7512 }, { "epoch": 0.24239833357195142, "grad_norm": 0.435546875, "learning_rate": 2.6681777243177785e-05, "loss": 1.9949, "step": 7513 }, { "epoch": 0.24243059742574777, "grad_norm": 0.416015625, "learning_rate": 2.6680789990121712e-05, "loss": 2.0086, "step": 7514 }, { "epoch": 0.2424628612795441, "grad_norm": 0.435546875, "learning_rate": 2.6679802608491885e-05, "loss": 2.0049, "step": 7515 }, { "epoch": 0.24249512513334046, "grad_norm": 0.419921875, "learning_rate": 2.667881509829917e-05, "loss": 2.0041, "step": 7516 }, { "epoch": 0.2425273889871368, "grad_norm": 0.400390625, "learning_rate": 2.6677827459554433e-05, "loss": 2.0049, "step": 7517 }, { "epoch": 0.24255965284093314, "grad_norm": 0.423828125, "learning_rate": 2.667683969226855e-05, "loss": 2.0041, "step": 7518 }, { "epoch": 0.2425919166947295, "grad_norm": 0.384765625, "learning_rate": 2.6675851796452394e-05, "loss": 1.9871, "step": 7519 }, { "epoch": 0.24262418054852583, "grad_norm": 0.408203125, "learning_rate": 2.6674863772116833e-05, "loss": 1.9934, "step": 7520 }, { "epoch": 0.2426564444023222, "grad_norm": 0.392578125, "learning_rate": 2.6673875619272754e-05, "loss": 2.0036, "step": 7521 }, { "epoch": 0.24268870825611855, "grad_norm": 0.427734375, "learning_rate": 2.6672887337931022e-05, "loss": 2.0236, "step": 7522 }, { "epoch": 0.2427209721099149, "grad_norm": 0.44140625, "learning_rate": 2.6671898928102524e-05, "loss": 1.9898, "step": 7523 }, { "epoch": 0.24275323596371123, "grad_norm": 0.4140625, "learning_rate": 2.667091038979813e-05, "loss": 1.9606, "step": 7524 }, { "epoch": 0.24278549981750758, "grad_norm": 0.4296875, "learning_rate": 2.6669921723028738e-05, "loss": 1.9837, "step": 7525 }, { "epoch": 0.24281776367130392, "grad_norm": 0.484375, "learning_rate": 2.666893292780521e-05, "loss": 2.0014, "step": 7526 }, { "epoch": 0.24285002752510026, "grad_norm": 0.4296875, "learning_rate": 2.666794400413844e-05, "loss": 1.983, "step": 7527 }, { "epoch": 0.2428822913788966, "grad_norm": 0.443359375, "learning_rate": 2.6666954952039315e-05, "loss": 1.9866, "step": 7528 }, { "epoch": 0.24291455523269295, "grad_norm": 0.455078125, "learning_rate": 2.6665965771518723e-05, "loss": 2.0147, "step": 7529 }, { "epoch": 0.2429468190864893, "grad_norm": 0.404296875, "learning_rate": 2.6664976462587543e-05, "loss": 2.0123, "step": 7530 }, { "epoch": 0.24297908294028567, "grad_norm": 0.42578125, "learning_rate": 2.6663987025256675e-05, "loss": 1.9882, "step": 7531 }, { "epoch": 0.243011346794082, "grad_norm": 0.38671875, "learning_rate": 2.6662997459537e-05, "loss": 1.94, "step": 7532 }, { "epoch": 0.24304361064787836, "grad_norm": 0.408203125, "learning_rate": 2.6662007765439425e-05, "loss": 1.979, "step": 7533 }, { "epoch": 0.2430758745016747, "grad_norm": 0.361328125, "learning_rate": 2.666101794297483e-05, "loss": 1.9794, "step": 7534 }, { "epoch": 0.24310813835547104, "grad_norm": 0.392578125, "learning_rate": 2.6660027992154112e-05, "loss": 1.9676, "step": 7535 }, { "epoch": 0.2431404022092674, "grad_norm": 0.396484375, "learning_rate": 2.6659037912988175e-05, "loss": 1.9788, "step": 7536 }, { "epoch": 0.24317266606306373, "grad_norm": 0.373046875, "learning_rate": 2.665804770548791e-05, "loss": 2.0427, "step": 7537 }, { "epoch": 0.24320492991686007, "grad_norm": 0.39453125, "learning_rate": 2.665705736966422e-05, "loss": 2.0099, "step": 7538 }, { "epoch": 0.24323719377065642, "grad_norm": 0.3984375, "learning_rate": 2.6656066905528007e-05, "loss": 2.0339, "step": 7539 }, { "epoch": 0.24326945762445276, "grad_norm": 0.40234375, "learning_rate": 2.665507631309017e-05, "loss": 2.0348, "step": 7540 }, { "epoch": 0.24330172147824913, "grad_norm": 0.375, "learning_rate": 2.6654085592361614e-05, "loss": 2.0149, "step": 7541 }, { "epoch": 0.24333398533204548, "grad_norm": 0.375, "learning_rate": 2.6653094743353246e-05, "loss": 2.0288, "step": 7542 }, { "epoch": 0.24336624918584182, "grad_norm": 0.38671875, "learning_rate": 2.665210376607597e-05, "loss": 1.9921, "step": 7543 }, { "epoch": 0.24339851303963816, "grad_norm": 0.388671875, "learning_rate": 2.6651112660540693e-05, "loss": 1.9633, "step": 7544 }, { "epoch": 0.2434307768934345, "grad_norm": 0.400390625, "learning_rate": 2.665012142675833e-05, "loss": 1.9806, "step": 7545 }, { "epoch": 0.24346304074723085, "grad_norm": 0.373046875, "learning_rate": 2.664913006473979e-05, "loss": 2.0216, "step": 7546 }, { "epoch": 0.2434953046010272, "grad_norm": 0.37890625, "learning_rate": 2.6648138574495974e-05, "loss": 2.0104, "step": 7547 }, { "epoch": 0.24352756845482354, "grad_norm": 0.41796875, "learning_rate": 2.6647146956037814e-05, "loss": 2.0009, "step": 7548 }, { "epoch": 0.24355983230861988, "grad_norm": 0.37109375, "learning_rate": 2.664615520937621e-05, "loss": 1.98, "step": 7549 }, { "epoch": 0.24359209616241623, "grad_norm": 0.392578125, "learning_rate": 2.6645163334522092e-05, "loss": 2.0018, "step": 7550 }, { "epoch": 0.2436243600162126, "grad_norm": 0.486328125, "learning_rate": 2.6644171331486363e-05, "loss": 1.9864, "step": 7551 }, { "epoch": 0.24365662387000894, "grad_norm": 0.55859375, "learning_rate": 2.6643179200279953e-05, "loss": 2.0284, "step": 7552 }, { "epoch": 0.2436888877238053, "grad_norm": 0.447265625, "learning_rate": 2.6642186940913775e-05, "loss": 2.0133, "step": 7553 }, { "epoch": 0.24372115157760163, "grad_norm": 0.40234375, "learning_rate": 2.6641194553398756e-05, "loss": 1.9832, "step": 7554 }, { "epoch": 0.24375341543139797, "grad_norm": 0.484375, "learning_rate": 2.6640202037745826e-05, "loss": 2.0042, "step": 7555 }, { "epoch": 0.24378567928519432, "grad_norm": 0.41015625, "learning_rate": 2.6639209393965895e-05, "loss": 2.0017, "step": 7556 }, { "epoch": 0.24381794313899066, "grad_norm": 0.404296875, "learning_rate": 2.66382166220699e-05, "loss": 1.993, "step": 7557 }, { "epoch": 0.243850206992787, "grad_norm": 0.400390625, "learning_rate": 2.6637223722068763e-05, "loss": 1.9799, "step": 7558 }, { "epoch": 0.24388247084658335, "grad_norm": 0.392578125, "learning_rate": 2.6636230693973416e-05, "loss": 2.0241, "step": 7559 }, { "epoch": 0.2439147347003797, "grad_norm": 0.41796875, "learning_rate": 2.663523753779479e-05, "loss": 1.9963, "step": 7560 }, { "epoch": 0.24394699855417606, "grad_norm": 0.42578125, "learning_rate": 2.663424425354382e-05, "loss": 1.9787, "step": 7561 }, { "epoch": 0.2439792624079724, "grad_norm": 0.3828125, "learning_rate": 2.663325084123143e-05, "loss": 1.9835, "step": 7562 }, { "epoch": 0.24401152626176875, "grad_norm": 0.42578125, "learning_rate": 2.6632257300868568e-05, "loss": 1.9945, "step": 7563 }, { "epoch": 0.2440437901155651, "grad_norm": 0.419921875, "learning_rate": 2.6631263632466155e-05, "loss": 1.9985, "step": 7564 }, { "epoch": 0.24407605396936144, "grad_norm": 0.431640625, "learning_rate": 2.6630269836035143e-05, "loss": 2.0124, "step": 7565 }, { "epoch": 0.24410831782315778, "grad_norm": 0.4140625, "learning_rate": 2.6629275911586457e-05, "loss": 2.0034, "step": 7566 }, { "epoch": 0.24414058167695413, "grad_norm": 0.396484375, "learning_rate": 2.6628281859131046e-05, "loss": 2.0093, "step": 7567 }, { "epoch": 0.24417284553075047, "grad_norm": 0.5, "learning_rate": 2.6627287678679853e-05, "loss": 1.9991, "step": 7568 }, { "epoch": 0.24420510938454681, "grad_norm": 0.3828125, "learning_rate": 2.6626293370243817e-05, "loss": 1.9651, "step": 7569 }, { "epoch": 0.24423737323834316, "grad_norm": 0.423828125, "learning_rate": 2.6625298933833887e-05, "loss": 1.9836, "step": 7570 }, { "epoch": 0.24426963709213953, "grad_norm": 0.390625, "learning_rate": 2.6624304369461002e-05, "loss": 1.9718, "step": 7571 }, { "epoch": 0.24430190094593587, "grad_norm": 0.404296875, "learning_rate": 2.6623309677136116e-05, "loss": 2.0001, "step": 7572 }, { "epoch": 0.24433416479973222, "grad_norm": 0.390625, "learning_rate": 2.6622314856870177e-05, "loss": 2.0056, "step": 7573 }, { "epoch": 0.24436642865352856, "grad_norm": 0.431640625, "learning_rate": 2.662131990867413e-05, "loss": 2.0006, "step": 7574 }, { "epoch": 0.2443986925073249, "grad_norm": 0.416015625, "learning_rate": 2.662032483255894e-05, "loss": 1.9951, "step": 7575 }, { "epoch": 0.24443095636112125, "grad_norm": 0.423828125, "learning_rate": 2.661932962853554e-05, "loss": 2.0099, "step": 7576 }, { "epoch": 0.2444632202149176, "grad_norm": 0.470703125, "learning_rate": 2.6618334296614897e-05, "loss": 2.007, "step": 7577 }, { "epoch": 0.24449548406871394, "grad_norm": 0.443359375, "learning_rate": 2.6617338836807966e-05, "loss": 1.9806, "step": 7578 }, { "epoch": 0.24452774792251028, "grad_norm": 0.455078125, "learning_rate": 2.6616343249125707e-05, "loss": 2.0301, "step": 7579 }, { "epoch": 0.24456001177630662, "grad_norm": 0.458984375, "learning_rate": 2.6615347533579072e-05, "loss": 2.0188, "step": 7580 }, { "epoch": 0.244592275630103, "grad_norm": 0.421875, "learning_rate": 2.6614351690179027e-05, "loss": 1.9765, "step": 7581 }, { "epoch": 0.24462453948389934, "grad_norm": 0.498046875, "learning_rate": 2.6613355718936523e-05, "loss": 1.9726, "step": 7582 }, { "epoch": 0.24465680333769568, "grad_norm": 0.494140625, "learning_rate": 2.6612359619862537e-05, "loss": 1.98, "step": 7583 }, { "epoch": 0.24468906719149203, "grad_norm": 0.53515625, "learning_rate": 2.6611363392968023e-05, "loss": 1.9598, "step": 7584 }, { "epoch": 0.24472133104528837, "grad_norm": 0.462890625, "learning_rate": 2.6610367038263956e-05, "loss": 1.936, "step": 7585 }, { "epoch": 0.24475359489908471, "grad_norm": 0.5, "learning_rate": 2.6609370555761296e-05, "loss": 1.9717, "step": 7586 }, { "epoch": 0.24478585875288106, "grad_norm": 0.466796875, "learning_rate": 2.6608373945471014e-05, "loss": 1.9551, "step": 7587 }, { "epoch": 0.2448181226066774, "grad_norm": 0.4609375, "learning_rate": 2.6607377207404077e-05, "loss": 2.0281, "step": 7588 }, { "epoch": 0.24485038646047375, "grad_norm": 0.46875, "learning_rate": 2.660638034157146e-05, "loss": 1.9855, "step": 7589 }, { "epoch": 0.2448826503142701, "grad_norm": 0.470703125, "learning_rate": 2.660538334798413e-05, "loss": 2.0441, "step": 7590 }, { "epoch": 0.24491491416806646, "grad_norm": 0.46875, "learning_rate": 2.6604386226653075e-05, "loss": 2.0309, "step": 7591 }, { "epoch": 0.2449471780218628, "grad_norm": 0.494140625, "learning_rate": 2.6603388977589253e-05, "loss": 1.9853, "step": 7592 }, { "epoch": 0.24497944187565915, "grad_norm": 0.47265625, "learning_rate": 2.6602391600803658e-05, "loss": 2.0175, "step": 7593 }, { "epoch": 0.2450117057294555, "grad_norm": 0.53515625, "learning_rate": 2.660139409630725e-05, "loss": 2.0118, "step": 7594 }, { "epoch": 0.24504396958325184, "grad_norm": 0.51953125, "learning_rate": 2.6600396464111025e-05, "loss": 1.9974, "step": 7595 }, { "epoch": 0.24507623343704818, "grad_norm": 0.515625, "learning_rate": 2.6599398704225956e-05, "loss": 2.0013, "step": 7596 }, { "epoch": 0.24510849729084452, "grad_norm": 0.58984375, "learning_rate": 2.6598400816663027e-05, "loss": 2.0411, "step": 7597 }, { "epoch": 0.24514076114464087, "grad_norm": 0.5234375, "learning_rate": 2.6597402801433223e-05, "loss": 1.9492, "step": 7598 }, { "epoch": 0.2451730249984372, "grad_norm": 0.474609375, "learning_rate": 2.659640465854753e-05, "loss": 2.0273, "step": 7599 }, { "epoch": 0.24520528885223355, "grad_norm": 0.5, "learning_rate": 2.6595406388016932e-05, "loss": 2.0255, "step": 7600 }, { "epoch": 0.2452375527060299, "grad_norm": 0.5, "learning_rate": 2.659440798985242e-05, "loss": 2.0283, "step": 7601 }, { "epoch": 0.24526981655982627, "grad_norm": 0.431640625, "learning_rate": 2.6593409464064984e-05, "loss": 2.0209, "step": 7602 }, { "epoch": 0.24530208041362261, "grad_norm": 0.435546875, "learning_rate": 2.6592410810665612e-05, "loss": 2.0042, "step": 7603 }, { "epoch": 0.24533434426741896, "grad_norm": 0.39453125, "learning_rate": 2.6591412029665296e-05, "loss": 2.0259, "step": 7604 }, { "epoch": 0.2453666081212153, "grad_norm": 0.435546875, "learning_rate": 2.6590413121075036e-05, "loss": 2.0123, "step": 7605 }, { "epoch": 0.24539887197501165, "grad_norm": 0.416015625, "learning_rate": 2.6589414084905822e-05, "loss": 1.978, "step": 7606 }, { "epoch": 0.245431135828808, "grad_norm": 0.404296875, "learning_rate": 2.6588414921168655e-05, "loss": 1.9846, "step": 7607 }, { "epoch": 0.24546339968260433, "grad_norm": 0.46875, "learning_rate": 2.658741562987452e-05, "loss": 1.983, "step": 7608 }, { "epoch": 0.24549566353640068, "grad_norm": 0.4375, "learning_rate": 2.6586416211034434e-05, "loss": 1.9758, "step": 7609 }, { "epoch": 0.24552792739019702, "grad_norm": 0.5078125, "learning_rate": 2.6585416664659393e-05, "loss": 2.015, "step": 7610 }, { "epoch": 0.24556019124399336, "grad_norm": 0.435546875, "learning_rate": 2.658441699076039e-05, "loss": 2.0165, "step": 7611 }, { "epoch": 0.24559245509778974, "grad_norm": 0.4765625, "learning_rate": 2.658341718934844e-05, "loss": 2.0158, "step": 7612 }, { "epoch": 0.24562471895158608, "grad_norm": 0.435546875, "learning_rate": 2.658241726043454e-05, "loss": 1.9958, "step": 7613 }, { "epoch": 0.24565698280538242, "grad_norm": 0.43359375, "learning_rate": 2.65814172040297e-05, "loss": 1.9833, "step": 7614 }, { "epoch": 0.24568924665917877, "grad_norm": 0.455078125, "learning_rate": 2.6580417020144934e-05, "loss": 2.0223, "step": 7615 }, { "epoch": 0.2457215105129751, "grad_norm": 0.48046875, "learning_rate": 2.6579416708791236e-05, "loss": 1.9531, "step": 7616 }, { "epoch": 0.24575377436677145, "grad_norm": 0.443359375, "learning_rate": 2.6578416269979636e-05, "loss": 1.9682, "step": 7617 }, { "epoch": 0.2457860382205678, "grad_norm": 0.50390625, "learning_rate": 2.657741570372113e-05, "loss": 1.993, "step": 7618 }, { "epoch": 0.24581830207436414, "grad_norm": 0.3984375, "learning_rate": 2.657641501002674e-05, "loss": 1.9864, "step": 7619 }, { "epoch": 0.24585056592816049, "grad_norm": 0.458984375, "learning_rate": 2.6575414188907478e-05, "loss": 1.9843, "step": 7620 }, { "epoch": 0.24588282978195683, "grad_norm": 0.458984375, "learning_rate": 2.6574413240374357e-05, "loss": 1.9512, "step": 7621 }, { "epoch": 0.2459150936357532, "grad_norm": 0.427734375, "learning_rate": 2.6573412164438404e-05, "loss": 2.0234, "step": 7622 }, { "epoch": 0.24594735748954955, "grad_norm": 0.435546875, "learning_rate": 2.6572410961110627e-05, "loss": 2.0077, "step": 7623 }, { "epoch": 0.2459796213433459, "grad_norm": 0.41015625, "learning_rate": 2.6571409630402058e-05, "loss": 2.0297, "step": 7624 }, { "epoch": 0.24601188519714223, "grad_norm": 0.41796875, "learning_rate": 2.6570408172323707e-05, "loss": 2.0284, "step": 7625 }, { "epoch": 0.24604414905093858, "grad_norm": 0.419921875, "learning_rate": 2.6569406586886608e-05, "loss": 2.0165, "step": 7626 }, { "epoch": 0.24607641290473492, "grad_norm": 0.400390625, "learning_rate": 2.656840487410178e-05, "loss": 2.0012, "step": 7627 }, { "epoch": 0.24610867675853126, "grad_norm": 0.4296875, "learning_rate": 2.656740303398025e-05, "loss": 2.0232, "step": 7628 }, { "epoch": 0.2461409406123276, "grad_norm": 0.4453125, "learning_rate": 2.6566401066533045e-05, "loss": 1.9772, "step": 7629 }, { "epoch": 0.24617320446612395, "grad_norm": 0.37890625, "learning_rate": 2.6565398971771193e-05, "loss": 1.9711, "step": 7630 }, { "epoch": 0.2462054683199203, "grad_norm": 0.427734375, "learning_rate": 2.6564396749705733e-05, "loss": 2.0101, "step": 7631 }, { "epoch": 0.24623773217371667, "grad_norm": 0.376953125, "learning_rate": 2.6563394400347684e-05, "loss": 2.0041, "step": 7632 }, { "epoch": 0.246269996027513, "grad_norm": 0.384765625, "learning_rate": 2.6562391923708088e-05, "loss": 1.9932, "step": 7633 }, { "epoch": 0.24630225988130935, "grad_norm": 0.3828125, "learning_rate": 2.6561389319797973e-05, "loss": 2.0174, "step": 7634 }, { "epoch": 0.2463345237351057, "grad_norm": 0.400390625, "learning_rate": 2.656038658862838e-05, "loss": 2.0116, "step": 7635 }, { "epoch": 0.24636678758890204, "grad_norm": 0.353515625, "learning_rate": 2.6559383730210348e-05, "loss": 2.0101, "step": 7636 }, { "epoch": 0.24639905144269839, "grad_norm": 0.361328125, "learning_rate": 2.6558380744554912e-05, "loss": 2.0142, "step": 7637 }, { "epoch": 0.24643131529649473, "grad_norm": 0.380859375, "learning_rate": 2.6557377631673107e-05, "loss": 2.0321, "step": 7638 }, { "epoch": 0.24646357915029107, "grad_norm": 0.376953125, "learning_rate": 2.655637439157598e-05, "loss": 2.0044, "step": 7639 }, { "epoch": 0.24649584300408742, "grad_norm": 0.369140625, "learning_rate": 2.6555371024274582e-05, "loss": 2.0282, "step": 7640 }, { "epoch": 0.24652810685788376, "grad_norm": 0.375, "learning_rate": 2.6554367529779946e-05, "loss": 2.0027, "step": 7641 }, { "epoch": 0.24656037071168013, "grad_norm": 0.376953125, "learning_rate": 2.655336390810312e-05, "loss": 2.0086, "step": 7642 }, { "epoch": 0.24659263456547648, "grad_norm": 0.3828125, "learning_rate": 2.6552360159255154e-05, "loss": 2.0297, "step": 7643 }, { "epoch": 0.24662489841927282, "grad_norm": 0.40625, "learning_rate": 2.6551356283247097e-05, "loss": 2.0048, "step": 7644 }, { "epoch": 0.24665716227306916, "grad_norm": 0.40234375, "learning_rate": 2.6550352280089992e-05, "loss": 2.002, "step": 7645 }, { "epoch": 0.2466894261268655, "grad_norm": 0.3984375, "learning_rate": 2.6549348149794898e-05, "loss": 2.0226, "step": 7646 }, { "epoch": 0.24672168998066185, "grad_norm": 0.400390625, "learning_rate": 2.654834389237286e-05, "loss": 2.0101, "step": 7647 }, { "epoch": 0.2467539538344582, "grad_norm": 0.392578125, "learning_rate": 2.6547339507834947e-05, "loss": 1.9804, "step": 7648 }, { "epoch": 0.24678621768825454, "grad_norm": 0.40625, "learning_rate": 2.6546334996192197e-05, "loss": 1.9815, "step": 7649 }, { "epoch": 0.24681848154205088, "grad_norm": 0.373046875, "learning_rate": 2.654533035745568e-05, "loss": 1.9876, "step": 7650 }, { "epoch": 0.24685074539584723, "grad_norm": 0.3671875, "learning_rate": 2.6544325591636448e-05, "loss": 1.9683, "step": 7651 }, { "epoch": 0.2468830092496436, "grad_norm": 0.392578125, "learning_rate": 2.6543320698745554e-05, "loss": 2.0006, "step": 7652 }, { "epoch": 0.24691527310343994, "grad_norm": 0.37890625, "learning_rate": 2.6542315678794073e-05, "loss": 2.042, "step": 7653 }, { "epoch": 0.24694753695723629, "grad_norm": 0.375, "learning_rate": 2.6541310531793065e-05, "loss": 2.0076, "step": 7654 }, { "epoch": 0.24697980081103263, "grad_norm": 0.40625, "learning_rate": 2.6540305257753587e-05, "loss": 1.9848, "step": 7655 }, { "epoch": 0.24701206466482897, "grad_norm": 0.388671875, "learning_rate": 2.6539299856686706e-05, "loss": 1.9685, "step": 7656 }, { "epoch": 0.24704432851862532, "grad_norm": 0.416015625, "learning_rate": 2.653829432860349e-05, "loss": 2.0032, "step": 7657 }, { "epoch": 0.24707659237242166, "grad_norm": 0.353515625, "learning_rate": 2.653728867351501e-05, "loss": 2.0124, "step": 7658 }, { "epoch": 0.247108856226218, "grad_norm": 0.423828125, "learning_rate": 2.6536282891432337e-05, "loss": 2.0359, "step": 7659 }, { "epoch": 0.24714112008001435, "grad_norm": 0.431640625, "learning_rate": 2.6535276982366533e-05, "loss": 2.0072, "step": 7660 }, { "epoch": 0.2471733839338107, "grad_norm": 0.466796875, "learning_rate": 2.6534270946328674e-05, "loss": 1.9946, "step": 7661 }, { "epoch": 0.24720564778760706, "grad_norm": 0.396484375, "learning_rate": 2.653326478332984e-05, "loss": 1.9939, "step": 7662 }, { "epoch": 0.2472379116414034, "grad_norm": 0.41796875, "learning_rate": 2.6532258493381095e-05, "loss": 2.0128, "step": 7663 }, { "epoch": 0.24727017549519975, "grad_norm": 0.49609375, "learning_rate": 2.6531252076493523e-05, "loss": 1.9717, "step": 7664 }, { "epoch": 0.2473024393489961, "grad_norm": 0.380859375, "learning_rate": 2.6530245532678206e-05, "loss": 2.0136, "step": 7665 }, { "epoch": 0.24733470320279244, "grad_norm": 0.484375, "learning_rate": 2.6529238861946213e-05, "loss": 2.0056, "step": 7666 }, { "epoch": 0.24736696705658878, "grad_norm": 0.43359375, "learning_rate": 2.652823206430863e-05, "loss": 1.9903, "step": 7667 }, { "epoch": 0.24739923091038513, "grad_norm": 0.439453125, "learning_rate": 2.6527225139776537e-05, "loss": 2.0158, "step": 7668 }, { "epoch": 0.24743149476418147, "grad_norm": 0.388671875, "learning_rate": 2.6526218088361026e-05, "loss": 1.9988, "step": 7669 }, { "epoch": 0.2474637586179778, "grad_norm": 0.4453125, "learning_rate": 2.652521091007317e-05, "loss": 2.0155, "step": 7670 }, { "epoch": 0.24749602247177416, "grad_norm": 0.427734375, "learning_rate": 2.652420360492406e-05, "loss": 1.9536, "step": 7671 }, { "epoch": 0.24752828632557053, "grad_norm": 0.384765625, "learning_rate": 2.6523196172924788e-05, "loss": 2.0011, "step": 7672 }, { "epoch": 0.24756055017936687, "grad_norm": 0.3828125, "learning_rate": 2.6522188614086434e-05, "loss": 1.9844, "step": 7673 }, { "epoch": 0.24759281403316322, "grad_norm": 0.416015625, "learning_rate": 2.6521180928420096e-05, "loss": 2.0308, "step": 7674 }, { "epoch": 0.24762507788695956, "grad_norm": 0.359375, "learning_rate": 2.652017311593687e-05, "loss": 2.0162, "step": 7675 }, { "epoch": 0.2476573417407559, "grad_norm": 0.396484375, "learning_rate": 2.6519165176647836e-05, "loss": 2.002, "step": 7676 }, { "epoch": 0.24768960559455225, "grad_norm": 0.3671875, "learning_rate": 2.6518157110564097e-05, "loss": 1.9936, "step": 7677 }, { "epoch": 0.2477218694483486, "grad_norm": 0.392578125, "learning_rate": 2.6517148917696744e-05, "loss": 2.033, "step": 7678 }, { "epoch": 0.24775413330214494, "grad_norm": 0.416015625, "learning_rate": 2.651614059805688e-05, "loss": 2.0155, "step": 7679 }, { "epoch": 0.24778639715594128, "grad_norm": 0.41015625, "learning_rate": 2.6515132151655605e-05, "loss": 2.0034, "step": 7680 }, { "epoch": 0.24781866100973762, "grad_norm": 0.40234375, "learning_rate": 2.6514123578504015e-05, "loss": 2.0, "step": 7681 }, { "epoch": 0.247850924863534, "grad_norm": 0.40625, "learning_rate": 2.6513114878613212e-05, "loss": 2.0196, "step": 7682 }, { "epoch": 0.24788318871733034, "grad_norm": 0.3671875, "learning_rate": 2.6512106051994304e-05, "loss": 1.9917, "step": 7683 }, { "epoch": 0.24791545257112668, "grad_norm": 0.390625, "learning_rate": 2.6511097098658386e-05, "loss": 2.0102, "step": 7684 }, { "epoch": 0.24794771642492303, "grad_norm": 0.373046875, "learning_rate": 2.651008801861657e-05, "loss": 1.9934, "step": 7685 }, { "epoch": 0.24797998027871937, "grad_norm": 0.37890625, "learning_rate": 2.6509078811879965e-05, "loss": 1.9646, "step": 7686 }, { "epoch": 0.2480122441325157, "grad_norm": 0.3828125, "learning_rate": 2.6508069478459675e-05, "loss": 2.023, "step": 7687 }, { "epoch": 0.24804450798631206, "grad_norm": 0.390625, "learning_rate": 2.6507060018366808e-05, "loss": 2.0132, "step": 7688 }, { "epoch": 0.2480767718401084, "grad_norm": 0.388671875, "learning_rate": 2.6506050431612486e-05, "loss": 2.0121, "step": 7689 }, { "epoch": 0.24810903569390474, "grad_norm": 0.41015625, "learning_rate": 2.6505040718207812e-05, "loss": 2.0197, "step": 7690 }, { "epoch": 0.2481412995477011, "grad_norm": 0.3984375, "learning_rate": 2.6504030878163906e-05, "loss": 2.0056, "step": 7691 }, { "epoch": 0.24817356340149743, "grad_norm": 0.388671875, "learning_rate": 2.650302091149188e-05, "loss": 1.9952, "step": 7692 }, { "epoch": 0.2482058272552938, "grad_norm": 0.4375, "learning_rate": 2.650201081820285e-05, "loss": 2.0059, "step": 7693 }, { "epoch": 0.24823809110909015, "grad_norm": 0.35546875, "learning_rate": 2.650100059830794e-05, "loss": 2.0028, "step": 7694 }, { "epoch": 0.2482703549628865, "grad_norm": 0.40625, "learning_rate": 2.6499990251818263e-05, "loss": 2.0003, "step": 7695 }, { "epoch": 0.24830261881668284, "grad_norm": 0.38671875, "learning_rate": 2.6498979778744945e-05, "loss": 2.0053, "step": 7696 }, { "epoch": 0.24833488267047918, "grad_norm": 0.384765625, "learning_rate": 2.6497969179099107e-05, "loss": 2.0054, "step": 7697 }, { "epoch": 0.24836714652427552, "grad_norm": 0.396484375, "learning_rate": 2.6496958452891873e-05, "loss": 2.0025, "step": 7698 }, { "epoch": 0.24839941037807187, "grad_norm": 0.373046875, "learning_rate": 2.6495947600134366e-05, "loss": 2.0477, "step": 7699 }, { "epoch": 0.2484316742318682, "grad_norm": 0.404296875, "learning_rate": 2.6494936620837716e-05, "loss": 1.9695, "step": 7700 }, { "epoch": 0.24846393808566455, "grad_norm": 0.44140625, "learning_rate": 2.6493925515013053e-05, "loss": 1.9752, "step": 7701 }, { "epoch": 0.2484962019394609, "grad_norm": 0.486328125, "learning_rate": 2.64929142826715e-05, "loss": 1.9662, "step": 7702 }, { "epoch": 0.24852846579325727, "grad_norm": 0.54296875, "learning_rate": 2.6491902923824194e-05, "loss": 2.0024, "step": 7703 }, { "epoch": 0.2485607296470536, "grad_norm": 0.4296875, "learning_rate": 2.649089143848226e-05, "loss": 1.9541, "step": 7704 }, { "epoch": 0.24859299350084996, "grad_norm": 0.376953125, "learning_rate": 2.6489879826656846e-05, "loss": 2.0044, "step": 7705 }, { "epoch": 0.2486252573546463, "grad_norm": 0.421875, "learning_rate": 2.648886808835907e-05, "loss": 2.0232, "step": 7706 }, { "epoch": 0.24865752120844264, "grad_norm": 0.38671875, "learning_rate": 2.6487856223600076e-05, "loss": 2.0312, "step": 7707 }, { "epoch": 0.248689785062239, "grad_norm": 0.40625, "learning_rate": 2.6486844232391003e-05, "loss": 2.0279, "step": 7708 }, { "epoch": 0.24872204891603533, "grad_norm": 0.3671875, "learning_rate": 2.648583211474299e-05, "loss": 1.9939, "step": 7709 }, { "epoch": 0.24875431276983168, "grad_norm": 0.38671875, "learning_rate": 2.648481987066718e-05, "loss": 2.0141, "step": 7710 }, { "epoch": 0.24878657662362802, "grad_norm": 0.400390625, "learning_rate": 2.6483807500174708e-05, "loss": 1.9741, "step": 7711 }, { "epoch": 0.24881884047742436, "grad_norm": 0.369140625, "learning_rate": 2.6482795003276726e-05, "loss": 2.0249, "step": 7712 }, { "epoch": 0.24885110433122073, "grad_norm": 0.451171875, "learning_rate": 2.648178237998437e-05, "loss": 1.9932, "step": 7713 }, { "epoch": 0.24888336818501708, "grad_norm": 0.396484375, "learning_rate": 2.648076963030879e-05, "loss": 1.9789, "step": 7714 }, { "epoch": 0.24891563203881342, "grad_norm": 0.419921875, "learning_rate": 2.647975675426114e-05, "loss": 1.9618, "step": 7715 }, { "epoch": 0.24894789589260977, "grad_norm": 0.443359375, "learning_rate": 2.647874375185256e-05, "loss": 1.9738, "step": 7716 }, { "epoch": 0.2489801597464061, "grad_norm": 0.431640625, "learning_rate": 2.64777306230942e-05, "loss": 1.9823, "step": 7717 }, { "epoch": 0.24901242360020245, "grad_norm": 0.45703125, "learning_rate": 2.6476717367997218e-05, "loss": 1.9838, "step": 7718 }, { "epoch": 0.2490446874539988, "grad_norm": 0.396484375, "learning_rate": 2.6475703986572765e-05, "loss": 1.979, "step": 7719 }, { "epoch": 0.24907695130779514, "grad_norm": 0.416015625, "learning_rate": 2.6474690478831993e-05, "loss": 1.9957, "step": 7720 }, { "epoch": 0.24910921516159149, "grad_norm": 0.427734375, "learning_rate": 2.6473676844786065e-05, "loss": 1.988, "step": 7721 }, { "epoch": 0.24914147901538783, "grad_norm": 0.42578125, "learning_rate": 2.6472663084446126e-05, "loss": 1.9976, "step": 7722 }, { "epoch": 0.2491737428691842, "grad_norm": 0.408203125, "learning_rate": 2.647164919782335e-05, "loss": 1.9512, "step": 7723 }, { "epoch": 0.24920600672298054, "grad_norm": 0.404296875, "learning_rate": 2.6470635184928888e-05, "loss": 1.9449, "step": 7724 }, { "epoch": 0.2492382705767769, "grad_norm": 0.453125, "learning_rate": 2.6469621045773897e-05, "loss": 1.9656, "step": 7725 }, { "epoch": 0.24927053443057323, "grad_norm": 0.41796875, "learning_rate": 2.6468606780369553e-05, "loss": 1.998, "step": 7726 }, { "epoch": 0.24930279828436958, "grad_norm": 0.408203125, "learning_rate": 2.646759238872701e-05, "loss": 1.9851, "step": 7727 }, { "epoch": 0.24933506213816592, "grad_norm": 0.4375, "learning_rate": 2.6466577870857435e-05, "loss": 2.0068, "step": 7728 }, { "epoch": 0.24936732599196226, "grad_norm": 0.515625, "learning_rate": 2.6465563226772e-05, "loss": 1.9717, "step": 7729 }, { "epoch": 0.2493995898457586, "grad_norm": 0.474609375, "learning_rate": 2.6464548456481866e-05, "loss": 1.9572, "step": 7730 }, { "epoch": 0.24943185369955495, "grad_norm": 0.4296875, "learning_rate": 2.6463533559998213e-05, "loss": 2.0102, "step": 7731 }, { "epoch": 0.2494641175533513, "grad_norm": 0.478515625, "learning_rate": 2.6462518537332205e-05, "loss": 1.9564, "step": 7732 }, { "epoch": 0.24949638140714767, "grad_norm": 0.5078125, "learning_rate": 2.646150338849501e-05, "loss": 2.0026, "step": 7733 }, { "epoch": 0.249528645260944, "grad_norm": 0.40234375, "learning_rate": 2.646048811349782e-05, "loss": 1.994, "step": 7734 }, { "epoch": 0.24956090911474035, "grad_norm": 0.462890625, "learning_rate": 2.6459472712351788e-05, "loss": 1.965, "step": 7735 }, { "epoch": 0.2495931729685367, "grad_norm": 0.494140625, "learning_rate": 2.6458457185068103e-05, "loss": 2.0113, "step": 7736 }, { "epoch": 0.24962543682233304, "grad_norm": 0.478515625, "learning_rate": 2.6457441531657947e-05, "loss": 1.9964, "step": 7737 }, { "epoch": 0.24965770067612938, "grad_norm": 0.41796875, "learning_rate": 2.645642575213249e-05, "loss": 2.0166, "step": 7738 }, { "epoch": 0.24968996452992573, "grad_norm": 0.55078125, "learning_rate": 2.6455409846502913e-05, "loss": 1.9917, "step": 7739 }, { "epoch": 0.24972222838372207, "grad_norm": 0.490234375, "learning_rate": 2.6454393814780412e-05, "loss": 1.977, "step": 7740 }, { "epoch": 0.24975449223751842, "grad_norm": 0.40625, "learning_rate": 2.6453377656976156e-05, "loss": 2.008, "step": 7741 }, { "epoch": 0.24978675609131476, "grad_norm": 0.5, "learning_rate": 2.6452361373101335e-05, "loss": 1.9668, "step": 7742 }, { "epoch": 0.24981901994511113, "grad_norm": 0.380859375, "learning_rate": 2.6451344963167137e-05, "loss": 2.0044, "step": 7743 }, { "epoch": 0.24985128379890748, "grad_norm": 0.43359375, "learning_rate": 2.6450328427184746e-05, "loss": 1.9594, "step": 7744 }, { "epoch": 0.24988354765270382, "grad_norm": 0.412109375, "learning_rate": 2.644931176516536e-05, "loss": 1.9772, "step": 7745 }, { "epoch": 0.24991581150650016, "grad_norm": 0.43359375, "learning_rate": 2.6448294977120158e-05, "loss": 2.0036, "step": 7746 }, { "epoch": 0.2499480753602965, "grad_norm": 0.421875, "learning_rate": 2.6447278063060342e-05, "loss": 1.9984, "step": 7747 }, { "epoch": 0.24998033921409285, "grad_norm": 0.4140625, "learning_rate": 2.6446261022997098e-05, "loss": 1.9744, "step": 7748 }, { "epoch": 0.2500126030678892, "grad_norm": 0.439453125, "learning_rate": 2.6445243856941627e-05, "loss": 1.9559, "step": 7749 }, { "epoch": 0.25004486692168554, "grad_norm": 0.44921875, "learning_rate": 2.6444226564905122e-05, "loss": 1.9522, "step": 7750 }, { "epoch": 0.2500771307754819, "grad_norm": 0.4140625, "learning_rate": 2.644320914689878e-05, "loss": 1.9838, "step": 7751 }, { "epoch": 0.2501093946292782, "grad_norm": 0.419921875, "learning_rate": 2.6442191602933798e-05, "loss": 1.9691, "step": 7752 }, { "epoch": 0.25014165848307457, "grad_norm": 0.447265625, "learning_rate": 2.6441173933021384e-05, "loss": 1.9494, "step": 7753 }, { "epoch": 0.2501739223368709, "grad_norm": 0.38671875, "learning_rate": 2.6440156137172735e-05, "loss": 1.927, "step": 7754 }, { "epoch": 0.25020618619066726, "grad_norm": 0.419921875, "learning_rate": 2.6439138215399052e-05, "loss": 1.9311, "step": 7755 }, { "epoch": 0.2502384500444636, "grad_norm": 0.40234375, "learning_rate": 2.6438120167711543e-05, "loss": 1.9746, "step": 7756 }, { "epoch": 0.25027071389825994, "grad_norm": 0.427734375, "learning_rate": 2.6437101994121415e-05, "loss": 1.9127, "step": 7757 }, { "epoch": 0.25030297775205634, "grad_norm": 0.4609375, "learning_rate": 2.643608369463987e-05, "loss": 1.9314, "step": 7758 }, { "epoch": 0.2503352416058527, "grad_norm": 0.3984375, "learning_rate": 2.643506526927812e-05, "loss": 1.931, "step": 7759 }, { "epoch": 0.25036750545964903, "grad_norm": 0.4296875, "learning_rate": 2.6434046718047377e-05, "loss": 1.9455, "step": 7760 }, { "epoch": 0.2503997693134454, "grad_norm": 0.41015625, "learning_rate": 2.643302804095885e-05, "loss": 2.0013, "step": 7761 }, { "epoch": 0.2504320331672417, "grad_norm": 0.421875, "learning_rate": 2.6432009238023754e-05, "loss": 1.9839, "step": 7762 }, { "epoch": 0.25046429702103806, "grad_norm": 0.40625, "learning_rate": 2.6430990309253295e-05, "loss": 2.0014, "step": 7763 }, { "epoch": 0.2504965608748344, "grad_norm": 0.3828125, "learning_rate": 2.6429971254658704e-05, "loss": 1.9619, "step": 7764 }, { "epoch": 0.25052882472863075, "grad_norm": 0.42578125, "learning_rate": 2.6428952074251185e-05, "loss": 2.009, "step": 7765 }, { "epoch": 0.2505610885824271, "grad_norm": 0.373046875, "learning_rate": 2.642793276804196e-05, "loss": 1.9953, "step": 7766 }, { "epoch": 0.25059335243622344, "grad_norm": 0.408203125, "learning_rate": 2.642691333604225e-05, "loss": 1.9946, "step": 7767 }, { "epoch": 0.2506256162900198, "grad_norm": 0.392578125, "learning_rate": 2.6425893778263275e-05, "loss": 2.0036, "step": 7768 }, { "epoch": 0.2506578801438161, "grad_norm": 0.392578125, "learning_rate": 2.6424874094716258e-05, "loss": 2.0081, "step": 7769 }, { "epoch": 0.25069014399761247, "grad_norm": 0.380859375, "learning_rate": 2.6423854285412424e-05, "loss": 1.9719, "step": 7770 }, { "epoch": 0.2507224078514088, "grad_norm": 0.376953125, "learning_rate": 2.6422834350363e-05, "loss": 2.0159, "step": 7771 }, { "epoch": 0.25075467170520516, "grad_norm": 0.376953125, "learning_rate": 2.6421814289579206e-05, "loss": 2.0188, "step": 7772 }, { "epoch": 0.2507869355590015, "grad_norm": 0.380859375, "learning_rate": 2.6420794103072275e-05, "loss": 1.9994, "step": 7773 }, { "epoch": 0.25081919941279784, "grad_norm": 0.39453125, "learning_rate": 2.6419773790853436e-05, "loss": 2.0297, "step": 7774 }, { "epoch": 0.2508514632665942, "grad_norm": 0.376953125, "learning_rate": 2.6418753352933923e-05, "loss": 2.0258, "step": 7775 }, { "epoch": 0.25088372712039053, "grad_norm": 0.423828125, "learning_rate": 2.6417732789324962e-05, "loss": 2.0123, "step": 7776 }, { "epoch": 0.2509159909741869, "grad_norm": 0.404296875, "learning_rate": 2.6416712100037793e-05, "loss": 2.0055, "step": 7777 }, { "epoch": 0.2509482548279832, "grad_norm": 0.373046875, "learning_rate": 2.641569128508364e-05, "loss": 2.0097, "step": 7778 }, { "epoch": 0.2509805186817796, "grad_norm": 0.40234375, "learning_rate": 2.6414670344473752e-05, "loss": 2.0119, "step": 7779 }, { "epoch": 0.25101278253557596, "grad_norm": 0.375, "learning_rate": 2.6413649278219357e-05, "loss": 2.0027, "step": 7780 }, { "epoch": 0.2510450463893723, "grad_norm": 0.400390625, "learning_rate": 2.6412628086331707e-05, "loss": 2.0002, "step": 7781 }, { "epoch": 0.25107731024316865, "grad_norm": 0.376953125, "learning_rate": 2.641160676882203e-05, "loss": 1.9897, "step": 7782 }, { "epoch": 0.251109574096965, "grad_norm": 0.39453125, "learning_rate": 2.641058532570157e-05, "loss": 2.0153, "step": 7783 }, { "epoch": 0.25114183795076134, "grad_norm": 0.388671875, "learning_rate": 2.640956375698158e-05, "loss": 2.004, "step": 7784 }, { "epoch": 0.2511741018045577, "grad_norm": 0.392578125, "learning_rate": 2.640854206267329e-05, "loss": 2.0123, "step": 7785 }, { "epoch": 0.251206365658354, "grad_norm": 0.41796875, "learning_rate": 2.6407520242787956e-05, "loss": 1.9864, "step": 7786 }, { "epoch": 0.25123862951215037, "grad_norm": 0.380859375, "learning_rate": 2.640649829733682e-05, "loss": 2.0041, "step": 7787 }, { "epoch": 0.2512708933659467, "grad_norm": 0.416015625, "learning_rate": 2.640547622633114e-05, "loss": 1.9743, "step": 7788 }, { "epoch": 0.25130315721974306, "grad_norm": 0.40625, "learning_rate": 2.6404454029782155e-05, "loss": 2.0012, "step": 7789 }, { "epoch": 0.2513354210735394, "grad_norm": 0.48828125, "learning_rate": 2.6403431707701123e-05, "loss": 2.0114, "step": 7790 }, { "epoch": 0.25136768492733574, "grad_norm": 0.58984375, "learning_rate": 2.6402409260099296e-05, "loss": 1.9962, "step": 7791 }, { "epoch": 0.2513999487811321, "grad_norm": 0.671875, "learning_rate": 2.6401386686987928e-05, "loss": 2.0228, "step": 7792 }, { "epoch": 0.25143221263492843, "grad_norm": 0.4765625, "learning_rate": 2.6400363988378275e-05, "loss": 2.0049, "step": 7793 }, { "epoch": 0.2514644764887248, "grad_norm": 0.484375, "learning_rate": 2.639934116428159e-05, "loss": 1.9925, "step": 7794 }, { "epoch": 0.2514967403425211, "grad_norm": 0.55078125, "learning_rate": 2.639831821470914e-05, "loss": 1.9988, "step": 7795 }, { "epoch": 0.25152900419631746, "grad_norm": 0.392578125, "learning_rate": 2.639729513967218e-05, "loss": 2.0079, "step": 7796 }, { "epoch": 0.2515612680501138, "grad_norm": 0.58984375, "learning_rate": 2.639627193918197e-05, "loss": 2.0132, "step": 7797 }, { "epoch": 0.25159353190391015, "grad_norm": 0.427734375, "learning_rate": 2.6395248613249772e-05, "loss": 2.0187, "step": 7798 }, { "epoch": 0.25162579575770655, "grad_norm": 0.51953125, "learning_rate": 2.6394225161886855e-05, "loss": 1.9841, "step": 7799 }, { "epoch": 0.2516580596115029, "grad_norm": 0.4609375, "learning_rate": 2.639320158510448e-05, "loss": 2.0046, "step": 7800 }, { "epoch": 0.25169032346529924, "grad_norm": 0.4296875, "learning_rate": 2.639217788291392e-05, "loss": 2.0113, "step": 7801 }, { "epoch": 0.2517225873190956, "grad_norm": 0.439453125, "learning_rate": 2.6391154055326434e-05, "loss": 2.011, "step": 7802 }, { "epoch": 0.2517548511728919, "grad_norm": 0.412109375, "learning_rate": 2.6390130102353295e-05, "loss": 2.0079, "step": 7803 }, { "epoch": 0.25178711502668827, "grad_norm": 0.44140625, "learning_rate": 2.6389106024005775e-05, "loss": 1.9652, "step": 7804 }, { "epoch": 0.2518193788804846, "grad_norm": 0.390625, "learning_rate": 2.638808182029515e-05, "loss": 2.0053, "step": 7805 }, { "epoch": 0.25185164273428096, "grad_norm": 0.443359375, "learning_rate": 2.638705749123269e-05, "loss": 1.9615, "step": 7806 }, { "epoch": 0.2518839065880773, "grad_norm": 0.39453125, "learning_rate": 2.638603303682967e-05, "loss": 1.9733, "step": 7807 }, { "epoch": 0.25191617044187364, "grad_norm": 0.439453125, "learning_rate": 2.6385008457097363e-05, "loss": 2.0088, "step": 7808 }, { "epoch": 0.25194843429567, "grad_norm": 0.4375, "learning_rate": 2.6383983752047052e-05, "loss": 2.0042, "step": 7809 }, { "epoch": 0.25198069814946633, "grad_norm": 0.439453125, "learning_rate": 2.6382958921690018e-05, "loss": 1.9486, "step": 7810 }, { "epoch": 0.2520129620032627, "grad_norm": 0.44140625, "learning_rate": 2.6381933966037535e-05, "loss": 1.9666, "step": 7811 }, { "epoch": 0.252045225857059, "grad_norm": 0.404296875, "learning_rate": 2.638090888510089e-05, "loss": 1.9934, "step": 7812 }, { "epoch": 0.25207748971085536, "grad_norm": 0.41796875, "learning_rate": 2.6379883678891362e-05, "loss": 1.9899, "step": 7813 }, { "epoch": 0.2521097535646517, "grad_norm": 0.390625, "learning_rate": 2.637885834742024e-05, "loss": 1.9889, "step": 7814 }, { "epoch": 0.25214201741844805, "grad_norm": 0.427734375, "learning_rate": 2.637783289069881e-05, "loss": 1.9656, "step": 7815 }, { "epoch": 0.2521742812722444, "grad_norm": 0.400390625, "learning_rate": 2.6376807308738356e-05, "loss": 1.9922, "step": 7816 }, { "epoch": 0.25220654512604074, "grad_norm": 0.427734375, "learning_rate": 2.637578160155017e-05, "loss": 2.0225, "step": 7817 }, { "epoch": 0.2522388089798371, "grad_norm": 0.41796875, "learning_rate": 2.637475576914554e-05, "loss": 2.0089, "step": 7818 }, { "epoch": 0.2522710728336335, "grad_norm": 0.3984375, "learning_rate": 2.637372981153576e-05, "loss": 2.0174, "step": 7819 }, { "epoch": 0.2523033366874298, "grad_norm": 0.443359375, "learning_rate": 2.6372703728732122e-05, "loss": 2.0306, "step": 7820 }, { "epoch": 0.25233560054122617, "grad_norm": 0.412109375, "learning_rate": 2.6371677520745916e-05, "loss": 1.9828, "step": 7821 }, { "epoch": 0.2523678643950225, "grad_norm": 0.419921875, "learning_rate": 2.6370651187588447e-05, "loss": 2.0094, "step": 7822 }, { "epoch": 0.25240012824881886, "grad_norm": 0.43359375, "learning_rate": 2.6369624729271e-05, "loss": 1.9854, "step": 7823 }, { "epoch": 0.2524323921026152, "grad_norm": 0.42578125, "learning_rate": 2.636859814580489e-05, "loss": 1.9994, "step": 7824 }, { "epoch": 0.25246465595641154, "grad_norm": 0.423828125, "learning_rate": 2.6367571437201404e-05, "loss": 1.9644, "step": 7825 }, { "epoch": 0.2524969198102079, "grad_norm": 0.435546875, "learning_rate": 2.6366544603471848e-05, "loss": 2.0061, "step": 7826 }, { "epoch": 0.25252918366400423, "grad_norm": 0.412109375, "learning_rate": 2.6365517644627516e-05, "loss": 1.9823, "step": 7827 }, { "epoch": 0.2525614475178006, "grad_norm": 0.421875, "learning_rate": 2.6364490560679725e-05, "loss": 2.0042, "step": 7828 }, { "epoch": 0.2525937113715969, "grad_norm": 0.375, "learning_rate": 2.6363463351639776e-05, "loss": 1.9741, "step": 7829 }, { "epoch": 0.25262597522539326, "grad_norm": 0.423828125, "learning_rate": 2.636243601751897e-05, "loss": 1.9886, "step": 7830 }, { "epoch": 0.2526582390791896, "grad_norm": 0.3828125, "learning_rate": 2.6361408558328625e-05, "loss": 1.9894, "step": 7831 }, { "epoch": 0.25269050293298595, "grad_norm": 0.3984375, "learning_rate": 2.6360380974080037e-05, "loss": 1.9218, "step": 7832 }, { "epoch": 0.2527227667867823, "grad_norm": 0.392578125, "learning_rate": 2.6359353264784534e-05, "loss": 1.9679, "step": 7833 }, { "epoch": 0.25275503064057864, "grad_norm": 0.369140625, "learning_rate": 2.6358325430453412e-05, "loss": 1.9783, "step": 7834 }, { "epoch": 0.252787294494375, "grad_norm": 0.435546875, "learning_rate": 2.6357297471097998e-05, "loss": 2.0292, "step": 7835 }, { "epoch": 0.2528195583481713, "grad_norm": 0.42578125, "learning_rate": 2.6356269386729598e-05, "loss": 2.0092, "step": 7836 }, { "epoch": 0.25285182220196767, "grad_norm": 0.451171875, "learning_rate": 2.6355241177359534e-05, "loss": 1.9701, "step": 7837 }, { "epoch": 0.252884086055764, "grad_norm": 0.400390625, "learning_rate": 2.635421284299912e-05, "loss": 1.9734, "step": 7838 }, { "epoch": 0.2529163499095604, "grad_norm": 0.416015625, "learning_rate": 2.6353184383659677e-05, "loss": 1.9831, "step": 7839 }, { "epoch": 0.25294861376335676, "grad_norm": 0.388671875, "learning_rate": 2.635215579935253e-05, "loss": 2.0063, "step": 7840 }, { "epoch": 0.2529808776171531, "grad_norm": 0.390625, "learning_rate": 2.6351127090088984e-05, "loss": 1.9988, "step": 7841 }, { "epoch": 0.25301314147094944, "grad_norm": 0.41015625, "learning_rate": 2.635009825588038e-05, "loss": 2.0028, "step": 7842 }, { "epoch": 0.2530454053247458, "grad_norm": 0.400390625, "learning_rate": 2.634906929673804e-05, "loss": 1.9781, "step": 7843 }, { "epoch": 0.25307766917854213, "grad_norm": 0.392578125, "learning_rate": 2.6348040212673285e-05, "loss": 1.9571, "step": 7844 }, { "epoch": 0.2531099330323385, "grad_norm": 0.4375, "learning_rate": 2.634701100369744e-05, "loss": 2.0381, "step": 7845 }, { "epoch": 0.2531421968861348, "grad_norm": 0.4375, "learning_rate": 2.6345981669821847e-05, "loss": 2.0106, "step": 7846 }, { "epoch": 0.25317446073993116, "grad_norm": 0.380859375, "learning_rate": 2.634495221105782e-05, "loss": 1.9513, "step": 7847 }, { "epoch": 0.2532067245937275, "grad_norm": 0.384765625, "learning_rate": 2.6343922627416705e-05, "loss": 1.9524, "step": 7848 }, { "epoch": 0.25323898844752385, "grad_norm": 0.40625, "learning_rate": 2.634289291890982e-05, "loss": 1.9812, "step": 7849 }, { "epoch": 0.2532712523013202, "grad_norm": 0.48046875, "learning_rate": 2.6341863085548513e-05, "loss": 1.96, "step": 7850 }, { "epoch": 0.25330351615511654, "grad_norm": 0.474609375, "learning_rate": 2.634083312734411e-05, "loss": 2.0033, "step": 7851 }, { "epoch": 0.2533357800089129, "grad_norm": 0.376953125, "learning_rate": 2.6339803044307954e-05, "loss": 1.989, "step": 7852 }, { "epoch": 0.2533680438627092, "grad_norm": 0.390625, "learning_rate": 2.633877283645138e-05, "loss": 1.9785, "step": 7853 }, { "epoch": 0.25340030771650557, "grad_norm": 0.380859375, "learning_rate": 2.6337742503785726e-05, "loss": 2.0166, "step": 7854 }, { "epoch": 0.2534325715703019, "grad_norm": 0.384765625, "learning_rate": 2.6336712046322342e-05, "loss": 1.9741, "step": 7855 }, { "epoch": 0.25346483542409826, "grad_norm": 0.42578125, "learning_rate": 2.6335681464072564e-05, "loss": 1.9271, "step": 7856 }, { "epoch": 0.2534970992778946, "grad_norm": 0.45703125, "learning_rate": 2.6334650757047735e-05, "loss": 1.905, "step": 7857 }, { "epoch": 0.25352936313169094, "grad_norm": 0.384765625, "learning_rate": 2.6333619925259198e-05, "loss": 1.951, "step": 7858 }, { "epoch": 0.25356162698548734, "grad_norm": 0.4140625, "learning_rate": 2.6332588968718307e-05, "loss": 1.9684, "step": 7859 }, { "epoch": 0.2535938908392837, "grad_norm": 0.462890625, "learning_rate": 2.6331557887436408e-05, "loss": 1.9571, "step": 7860 }, { "epoch": 0.25362615469308003, "grad_norm": 0.439453125, "learning_rate": 2.633052668142485e-05, "loss": 1.9636, "step": 7861 }, { "epoch": 0.2536584185468764, "grad_norm": 0.388671875, "learning_rate": 2.632949535069498e-05, "loss": 1.9996, "step": 7862 }, { "epoch": 0.2536906824006727, "grad_norm": 0.41796875, "learning_rate": 2.6328463895258154e-05, "loss": 1.9364, "step": 7863 }, { "epoch": 0.25372294625446906, "grad_norm": 0.3828125, "learning_rate": 2.6327432315125722e-05, "loss": 1.9785, "step": 7864 }, { "epoch": 0.2537552101082654, "grad_norm": 0.39453125, "learning_rate": 2.6326400610309044e-05, "loss": 1.9191, "step": 7865 }, { "epoch": 0.25378747396206175, "grad_norm": 0.431640625, "learning_rate": 2.6325368780819473e-05, "loss": 1.9626, "step": 7866 }, { "epoch": 0.2538197378158581, "grad_norm": 0.3671875, "learning_rate": 2.632433682666837e-05, "loss": 1.9831, "step": 7867 }, { "epoch": 0.25385200166965444, "grad_norm": 0.43359375, "learning_rate": 2.6323304747867087e-05, "loss": 1.9892, "step": 7868 }, { "epoch": 0.2538842655234508, "grad_norm": 0.421875, "learning_rate": 2.632227254442699e-05, "loss": 2.0133, "step": 7869 }, { "epoch": 0.2539165293772471, "grad_norm": 0.416015625, "learning_rate": 2.6321240216359436e-05, "loss": 2.016, "step": 7870 }, { "epoch": 0.25394879323104347, "grad_norm": 0.404296875, "learning_rate": 2.6320207763675796e-05, "loss": 2.0198, "step": 7871 }, { "epoch": 0.2539810570848398, "grad_norm": 0.4296875, "learning_rate": 2.631917518638743e-05, "loss": 1.9932, "step": 7872 }, { "epoch": 0.25401332093863616, "grad_norm": 0.419921875, "learning_rate": 2.6318142484505705e-05, "loss": 2.0202, "step": 7873 }, { "epoch": 0.2540455847924325, "grad_norm": 0.431640625, "learning_rate": 2.6317109658041984e-05, "loss": 1.994, "step": 7874 }, { "epoch": 0.25407784864622884, "grad_norm": 0.41796875, "learning_rate": 2.631607670700764e-05, "loss": 2.0232, "step": 7875 }, { "epoch": 0.2541101125000252, "grad_norm": 0.40234375, "learning_rate": 2.631504363141404e-05, "loss": 1.9785, "step": 7876 }, { "epoch": 0.25414237635382153, "grad_norm": 0.453125, "learning_rate": 2.631401043127256e-05, "loss": 2.0155, "step": 7877 }, { "epoch": 0.2541746402076179, "grad_norm": 0.453125, "learning_rate": 2.631297710659457e-05, "loss": 2.021, "step": 7878 }, { "epoch": 0.2542069040614142, "grad_norm": 0.3984375, "learning_rate": 2.631194365739144e-05, "loss": 1.9979, "step": 7879 }, { "epoch": 0.2542391679152106, "grad_norm": 0.39453125, "learning_rate": 2.6310910083674553e-05, "loss": 2.0243, "step": 7880 }, { "epoch": 0.25427143176900696, "grad_norm": 0.392578125, "learning_rate": 2.630987638545528e-05, "loss": 2.0091, "step": 7881 }, { "epoch": 0.2543036956228033, "grad_norm": 0.408203125, "learning_rate": 2.6308842562745e-05, "loss": 1.9699, "step": 7882 }, { "epoch": 0.25433595947659965, "grad_norm": 0.380859375, "learning_rate": 2.63078086155551e-05, "loss": 2.0074, "step": 7883 }, { "epoch": 0.254368223330396, "grad_norm": 0.3828125, "learning_rate": 2.630677454389695e-05, "loss": 2.0134, "step": 7884 }, { "epoch": 0.25440048718419234, "grad_norm": 0.40234375, "learning_rate": 2.6305740347781937e-05, "loss": 2.0197, "step": 7885 }, { "epoch": 0.2544327510379887, "grad_norm": 0.38671875, "learning_rate": 2.630470602722145e-05, "loss": 2.0085, "step": 7886 }, { "epoch": 0.254465014891785, "grad_norm": 0.40234375, "learning_rate": 2.6303671582226864e-05, "loss": 1.9813, "step": 7887 }, { "epoch": 0.25449727874558137, "grad_norm": 0.37109375, "learning_rate": 2.630263701280957e-05, "loss": 2.0142, "step": 7888 }, { "epoch": 0.2545295425993777, "grad_norm": 0.423828125, "learning_rate": 2.630160231898096e-05, "loss": 2.0258, "step": 7889 }, { "epoch": 0.25456180645317406, "grad_norm": 0.408203125, "learning_rate": 2.630056750075242e-05, "loss": 2.0101, "step": 7890 }, { "epoch": 0.2545940703069704, "grad_norm": 0.427734375, "learning_rate": 2.6299532558135334e-05, "loss": 2.0266, "step": 7891 }, { "epoch": 0.25462633416076674, "grad_norm": 0.376953125, "learning_rate": 2.6298497491141105e-05, "loss": 1.9806, "step": 7892 }, { "epoch": 0.2546585980145631, "grad_norm": 0.421875, "learning_rate": 2.6297462299781118e-05, "loss": 2.0135, "step": 7893 }, { "epoch": 0.25469086186835943, "grad_norm": 0.453125, "learning_rate": 2.6296426984066773e-05, "loss": 1.9953, "step": 7894 }, { "epoch": 0.2547231257221558, "grad_norm": 0.392578125, "learning_rate": 2.6295391544009463e-05, "loss": 2.0124, "step": 7895 }, { "epoch": 0.2547553895759521, "grad_norm": 0.443359375, "learning_rate": 2.6294355979620587e-05, "loss": 2.0129, "step": 7896 }, { "epoch": 0.25478765342974846, "grad_norm": 0.392578125, "learning_rate": 2.6293320290911544e-05, "loss": 1.996, "step": 7897 }, { "epoch": 0.2548199172835448, "grad_norm": 0.443359375, "learning_rate": 2.629228447789373e-05, "loss": 2.0232, "step": 7898 }, { "epoch": 0.25485218113734115, "grad_norm": 0.416015625, "learning_rate": 2.6291248540578546e-05, "loss": 1.9975, "step": 7899 }, { "epoch": 0.25488444499113755, "grad_norm": 0.408203125, "learning_rate": 2.629021247897741e-05, "loss": 2.0275, "step": 7900 }, { "epoch": 0.2549167088449339, "grad_norm": 0.37890625, "learning_rate": 2.6289176293101708e-05, "loss": 2.0119, "step": 7901 }, { "epoch": 0.25494897269873024, "grad_norm": 0.408203125, "learning_rate": 2.6288139982962846e-05, "loss": 2.0294, "step": 7902 }, { "epoch": 0.2549812365525266, "grad_norm": 0.3828125, "learning_rate": 2.6287103548572248e-05, "loss": 2.0017, "step": 7903 }, { "epoch": 0.2550135004063229, "grad_norm": 0.41796875, "learning_rate": 2.6286066989941303e-05, "loss": 2.0425, "step": 7904 }, { "epoch": 0.25504576426011927, "grad_norm": 0.39453125, "learning_rate": 2.6285030307081436e-05, "loss": 2.0415, "step": 7905 }, { "epoch": 0.2550780281139156, "grad_norm": 0.40625, "learning_rate": 2.6283993500004042e-05, "loss": 2.0341, "step": 7906 }, { "epoch": 0.25511029196771196, "grad_norm": 0.451171875, "learning_rate": 2.6282956568720552e-05, "loss": 2.0068, "step": 7907 }, { "epoch": 0.2551425558215083, "grad_norm": 0.4140625, "learning_rate": 2.6281919513242364e-05, "loss": 1.9913, "step": 7908 }, { "epoch": 0.25517481967530464, "grad_norm": 0.46875, "learning_rate": 2.6280882333580906e-05, "loss": 2.0031, "step": 7909 }, { "epoch": 0.255207083529101, "grad_norm": 0.49609375, "learning_rate": 2.6279845029747585e-05, "loss": 1.9748, "step": 7910 }, { "epoch": 0.25523934738289733, "grad_norm": 0.474609375, "learning_rate": 2.627880760175382e-05, "loss": 1.9936, "step": 7911 }, { "epoch": 0.2552716112366937, "grad_norm": 0.38671875, "learning_rate": 2.6277770049611034e-05, "loss": 2.0275, "step": 7912 }, { "epoch": 0.25530387509049, "grad_norm": 0.43359375, "learning_rate": 2.6276732373330645e-05, "loss": 2.0528, "step": 7913 }, { "epoch": 0.25533613894428636, "grad_norm": 0.451171875, "learning_rate": 2.627569457292408e-05, "loss": 2.0094, "step": 7914 }, { "epoch": 0.2553684027980827, "grad_norm": 0.3984375, "learning_rate": 2.6274656648402753e-05, "loss": 2.0136, "step": 7915 }, { "epoch": 0.25540066665187905, "grad_norm": 0.439453125, "learning_rate": 2.62736185997781e-05, "loss": 2.0186, "step": 7916 }, { "epoch": 0.2554329305056754, "grad_norm": 0.447265625, "learning_rate": 2.6272580427061535e-05, "loss": 2.0482, "step": 7917 }, { "epoch": 0.25546519435947174, "grad_norm": 0.42578125, "learning_rate": 2.627154213026449e-05, "loss": 2.0184, "step": 7918 }, { "epoch": 0.2554974582132681, "grad_norm": 0.388671875, "learning_rate": 2.6270503709398405e-05, "loss": 2.0231, "step": 7919 }, { "epoch": 0.2555297220670645, "grad_norm": 0.369140625, "learning_rate": 2.626946516447469e-05, "loss": 2.0059, "step": 7920 }, { "epoch": 0.2555619859208608, "grad_norm": 0.396484375, "learning_rate": 2.6268426495504796e-05, "loss": 2.0154, "step": 7921 }, { "epoch": 0.25559424977465717, "grad_norm": 0.376953125, "learning_rate": 2.6267387702500144e-05, "loss": 1.9986, "step": 7922 }, { "epoch": 0.2556265136284535, "grad_norm": 0.376953125, "learning_rate": 2.626634878547217e-05, "loss": 1.9769, "step": 7923 }, { "epoch": 0.25565877748224985, "grad_norm": 0.3828125, "learning_rate": 2.626530974443231e-05, "loss": 2.0114, "step": 7924 }, { "epoch": 0.2556910413360462, "grad_norm": 0.34765625, "learning_rate": 2.6264270579392003e-05, "loss": 2.0038, "step": 7925 }, { "epoch": 0.25572330518984254, "grad_norm": 0.375, "learning_rate": 2.626323129036269e-05, "loss": 2.015, "step": 7926 }, { "epoch": 0.2557555690436389, "grad_norm": 0.361328125, "learning_rate": 2.6262191877355805e-05, "loss": 1.9738, "step": 7927 }, { "epoch": 0.25578783289743523, "grad_norm": 0.3828125, "learning_rate": 2.626115234038279e-05, "loss": 1.9955, "step": 7928 }, { "epoch": 0.2558200967512316, "grad_norm": 0.376953125, "learning_rate": 2.6260112679455087e-05, "loss": 2.019, "step": 7929 }, { "epoch": 0.2558523606050279, "grad_norm": 0.388671875, "learning_rate": 2.625907289458414e-05, "loss": 1.9877, "step": 7930 }, { "epoch": 0.25588462445882426, "grad_norm": 0.36328125, "learning_rate": 2.6258032985781402e-05, "loss": 2.0273, "step": 7931 }, { "epoch": 0.2559168883126206, "grad_norm": 0.427734375, "learning_rate": 2.625699295305831e-05, "loss": 2.0284, "step": 7932 }, { "epoch": 0.25594915216641695, "grad_norm": 0.392578125, "learning_rate": 2.6255952796426316e-05, "loss": 1.9781, "step": 7933 }, { "epoch": 0.2559814160202133, "grad_norm": 0.404296875, "learning_rate": 2.6254912515896873e-05, "loss": 1.9738, "step": 7934 }, { "epoch": 0.25601367987400964, "grad_norm": 0.3828125, "learning_rate": 2.6253872111481418e-05, "loss": 2.017, "step": 7935 }, { "epoch": 0.256045943727806, "grad_norm": 0.373046875, "learning_rate": 2.6252831583191422e-05, "loss": 1.9884, "step": 7936 }, { "epoch": 0.2560782075816023, "grad_norm": 0.388671875, "learning_rate": 2.625179093103832e-05, "loss": 1.9945, "step": 7937 }, { "epoch": 0.25611047143539867, "grad_norm": 0.390625, "learning_rate": 2.625075015503358e-05, "loss": 1.991, "step": 7938 }, { "epoch": 0.256142735289195, "grad_norm": 0.390625, "learning_rate": 2.6249709255188654e-05, "loss": 1.9756, "step": 7939 }, { "epoch": 0.2561749991429914, "grad_norm": 0.40625, "learning_rate": 2.6248668231515e-05, "loss": 2.0225, "step": 7940 }, { "epoch": 0.25620726299678775, "grad_norm": 0.4140625, "learning_rate": 2.6247627084024076e-05, "loss": 2.0404, "step": 7941 }, { "epoch": 0.2562395268505841, "grad_norm": 0.4140625, "learning_rate": 2.6246585812727336e-05, "loss": 2.0068, "step": 7942 }, { "epoch": 0.25627179070438044, "grad_norm": 0.404296875, "learning_rate": 2.6245544417636254e-05, "loss": 2.0407, "step": 7943 }, { "epoch": 0.2563040545581768, "grad_norm": 0.421875, "learning_rate": 2.624450289876228e-05, "loss": 2.0117, "step": 7944 }, { "epoch": 0.25633631841197313, "grad_norm": 0.419921875, "learning_rate": 2.6243461256116892e-05, "loss": 2.0006, "step": 7945 }, { "epoch": 0.2563685822657695, "grad_norm": 0.408203125, "learning_rate": 2.6242419489711546e-05, "loss": 1.9725, "step": 7946 }, { "epoch": 0.2564008461195658, "grad_norm": 0.38671875, "learning_rate": 2.624137759955771e-05, "loss": 2.0012, "step": 7947 }, { "epoch": 0.25643310997336216, "grad_norm": 0.392578125, "learning_rate": 2.6240335585666856e-05, "loss": 2.0423, "step": 7948 }, { "epoch": 0.2564653738271585, "grad_norm": 0.380859375, "learning_rate": 2.6239293448050446e-05, "loss": 2.003, "step": 7949 }, { "epoch": 0.25649763768095485, "grad_norm": 0.396484375, "learning_rate": 2.6238251186719965e-05, "loss": 2.0289, "step": 7950 }, { "epoch": 0.2565299015347512, "grad_norm": 0.65234375, "learning_rate": 2.6237208801686874e-05, "loss": 2.0157, "step": 7951 }, { "epoch": 0.25656216538854754, "grad_norm": 0.4140625, "learning_rate": 2.623616629296265e-05, "loss": 2.0053, "step": 7952 }, { "epoch": 0.2565944292423439, "grad_norm": 0.546875, "learning_rate": 2.6235123660558768e-05, "loss": 2.0385, "step": 7953 }, { "epoch": 0.2566266930961402, "grad_norm": 0.546875, "learning_rate": 2.6234080904486706e-05, "loss": 2.0102, "step": 7954 }, { "epoch": 0.25665895694993657, "grad_norm": 0.48046875, "learning_rate": 2.6233038024757938e-05, "loss": 2.0063, "step": 7955 }, { "epoch": 0.2566912208037329, "grad_norm": 0.46875, "learning_rate": 2.6231995021383945e-05, "loss": 1.9989, "step": 7956 }, { "epoch": 0.25672348465752926, "grad_norm": 0.4609375, "learning_rate": 2.6230951894376213e-05, "loss": 1.9792, "step": 7957 }, { "epoch": 0.2567557485113256, "grad_norm": 0.44140625, "learning_rate": 2.6229908643746222e-05, "loss": 1.9685, "step": 7958 }, { "epoch": 0.25678801236512194, "grad_norm": 0.4453125, "learning_rate": 2.6228865269505443e-05, "loss": 1.9653, "step": 7959 }, { "epoch": 0.2568202762189183, "grad_norm": 0.427734375, "learning_rate": 2.622782177166538e-05, "loss": 1.9718, "step": 7960 }, { "epoch": 0.2568525400727147, "grad_norm": 0.431640625, "learning_rate": 2.6226778150237503e-05, "loss": 1.9657, "step": 7961 }, { "epoch": 0.25688480392651103, "grad_norm": 0.5, "learning_rate": 2.622573440523331e-05, "loss": 1.9816, "step": 7962 }, { "epoch": 0.2569170677803074, "grad_norm": 0.5078125, "learning_rate": 2.6224690536664286e-05, "loss": 2.006, "step": 7963 }, { "epoch": 0.2569493316341037, "grad_norm": 0.455078125, "learning_rate": 2.622364654454192e-05, "loss": 1.985, "step": 7964 }, { "epoch": 0.25698159548790006, "grad_norm": 0.390625, "learning_rate": 2.6222602428877705e-05, "loss": 1.9877, "step": 7965 }, { "epoch": 0.2570138593416964, "grad_norm": 0.466796875, "learning_rate": 2.6221558189683133e-05, "loss": 2.0034, "step": 7966 }, { "epoch": 0.25704612319549275, "grad_norm": 0.42578125, "learning_rate": 2.62205138269697e-05, "loss": 2.0172, "step": 7967 }, { "epoch": 0.2570783870492891, "grad_norm": 0.419921875, "learning_rate": 2.62194693407489e-05, "loss": 2.0267, "step": 7968 }, { "epoch": 0.25711065090308544, "grad_norm": 0.46484375, "learning_rate": 2.6218424731032228e-05, "loss": 2.0104, "step": 7969 }, { "epoch": 0.2571429147568818, "grad_norm": 0.443359375, "learning_rate": 2.6217379997831185e-05, "loss": 1.9877, "step": 7970 }, { "epoch": 0.2571751786106781, "grad_norm": 0.392578125, "learning_rate": 2.6216335141157272e-05, "loss": 2.0022, "step": 7971 }, { "epoch": 0.25720744246447447, "grad_norm": 0.40625, "learning_rate": 2.6215290161021992e-05, "loss": 1.9572, "step": 7972 }, { "epoch": 0.2572397063182708, "grad_norm": 0.408203125, "learning_rate": 2.6214245057436837e-05, "loss": 1.9686, "step": 7973 }, { "epoch": 0.25727197017206715, "grad_norm": 0.384765625, "learning_rate": 2.6213199830413323e-05, "loss": 1.9932, "step": 7974 }, { "epoch": 0.2573042340258635, "grad_norm": 0.423828125, "learning_rate": 2.6212154479962946e-05, "loss": 1.9967, "step": 7975 }, { "epoch": 0.25733649787965984, "grad_norm": 0.369140625, "learning_rate": 2.6211109006097216e-05, "loss": 2.0073, "step": 7976 }, { "epoch": 0.2573687617334562, "grad_norm": 0.427734375, "learning_rate": 2.621006340882764e-05, "loss": 1.9836, "step": 7977 }, { "epoch": 0.25740102558725253, "grad_norm": 0.400390625, "learning_rate": 2.620901768816573e-05, "loss": 1.9884, "step": 7978 }, { "epoch": 0.2574332894410489, "grad_norm": 0.4609375, "learning_rate": 2.6207971844122994e-05, "loss": 1.9904, "step": 7979 }, { "epoch": 0.2574655532948452, "grad_norm": 0.427734375, "learning_rate": 2.6206925876710945e-05, "loss": 1.9538, "step": 7980 }, { "epoch": 0.2574978171486416, "grad_norm": 0.447265625, "learning_rate": 2.6205879785941098e-05, "loss": 1.959, "step": 7981 }, { "epoch": 0.25753008100243796, "grad_norm": 0.466796875, "learning_rate": 2.6204833571824964e-05, "loss": 1.9597, "step": 7982 }, { "epoch": 0.2575623448562343, "grad_norm": 0.462890625, "learning_rate": 2.6203787234374063e-05, "loss": 2.0091, "step": 7983 }, { "epoch": 0.25759460871003065, "grad_norm": 0.5078125, "learning_rate": 2.62027407735999e-05, "loss": 2.0034, "step": 7984 }, { "epoch": 0.257626872563827, "grad_norm": 0.458984375, "learning_rate": 2.620169418951401e-05, "loss": 1.9826, "step": 7985 }, { "epoch": 0.25765913641762334, "grad_norm": 0.44140625, "learning_rate": 2.6200647482127907e-05, "loss": 1.9586, "step": 7986 }, { "epoch": 0.2576914002714197, "grad_norm": 0.4609375, "learning_rate": 2.6199600651453113e-05, "loss": 1.9839, "step": 7987 }, { "epoch": 0.257723664125216, "grad_norm": 0.400390625, "learning_rate": 2.6198553697501144e-05, "loss": 1.9816, "step": 7988 }, { "epoch": 0.25775592797901237, "grad_norm": 0.423828125, "learning_rate": 2.6197506620283536e-05, "loss": 2.0027, "step": 7989 }, { "epoch": 0.2577881918328087, "grad_norm": 0.45703125, "learning_rate": 2.6196459419811806e-05, "loss": 1.9938, "step": 7990 }, { "epoch": 0.25782045568660505, "grad_norm": 0.474609375, "learning_rate": 2.619541209609748e-05, "loss": 2.0248, "step": 7991 }, { "epoch": 0.2578527195404014, "grad_norm": 0.423828125, "learning_rate": 2.6194364649152094e-05, "loss": 2.0169, "step": 7992 }, { "epoch": 0.25788498339419774, "grad_norm": 0.4296875, "learning_rate": 2.6193317078987166e-05, "loss": 2.0155, "step": 7993 }, { "epoch": 0.2579172472479941, "grad_norm": 0.4375, "learning_rate": 2.6192269385614238e-05, "loss": 2.0164, "step": 7994 }, { "epoch": 0.25794951110179043, "grad_norm": 0.38671875, "learning_rate": 2.619122156904484e-05, "loss": 1.9589, "step": 7995 }, { "epoch": 0.2579817749555868, "grad_norm": 0.4609375, "learning_rate": 2.61901736292905e-05, "loss": 2.0028, "step": 7996 }, { "epoch": 0.2580140388093831, "grad_norm": 0.390625, "learning_rate": 2.6189125566362757e-05, "loss": 1.9774, "step": 7997 }, { "epoch": 0.25804630266317946, "grad_norm": 0.41015625, "learning_rate": 2.618807738027315e-05, "loss": 1.9809, "step": 7998 }, { "epoch": 0.2580785665169758, "grad_norm": 0.416015625, "learning_rate": 2.6187029071033207e-05, "loss": 2.0126, "step": 7999 }, { "epoch": 0.25811083037077215, "grad_norm": 0.40625, "learning_rate": 2.6185980638654482e-05, "loss": 1.9992, "step": 8000 }, { "epoch": 0.25814309422456855, "grad_norm": 0.412109375, "learning_rate": 2.6184932083148503e-05, "loss": 1.9892, "step": 8001 }, { "epoch": 0.2581753580783649, "grad_norm": 0.40234375, "learning_rate": 2.618388340452681e-05, "loss": 2.0145, "step": 8002 }, { "epoch": 0.25820762193216124, "grad_norm": 0.427734375, "learning_rate": 2.618283460280096e-05, "loss": 1.9935, "step": 8003 }, { "epoch": 0.2582398857859576, "grad_norm": 0.45703125, "learning_rate": 2.6181785677982487e-05, "loss": 2.0077, "step": 8004 }, { "epoch": 0.2582721496397539, "grad_norm": 0.37890625, "learning_rate": 2.618073663008294e-05, "loss": 2.0288, "step": 8005 }, { "epoch": 0.25830441349355027, "grad_norm": 0.45703125, "learning_rate": 2.6179687459113864e-05, "loss": 1.9827, "step": 8006 }, { "epoch": 0.2583366773473466, "grad_norm": 0.41015625, "learning_rate": 2.6178638165086807e-05, "loss": 2.0112, "step": 8007 }, { "epoch": 0.25836894120114295, "grad_norm": 0.4765625, "learning_rate": 2.617758874801332e-05, "loss": 2.0238, "step": 8008 }, { "epoch": 0.2584012050549393, "grad_norm": 0.39453125, "learning_rate": 2.6176539207904958e-05, "loss": 2.0152, "step": 8009 }, { "epoch": 0.25843346890873564, "grad_norm": 0.44140625, "learning_rate": 2.617548954477327e-05, "loss": 1.9936, "step": 8010 }, { "epoch": 0.258465732762532, "grad_norm": 0.390625, "learning_rate": 2.6174439758629807e-05, "loss": 2.0214, "step": 8011 }, { "epoch": 0.25849799661632833, "grad_norm": 0.44140625, "learning_rate": 2.617338984948613e-05, "loss": 2.0013, "step": 8012 }, { "epoch": 0.2585302604701247, "grad_norm": 0.392578125, "learning_rate": 2.617233981735379e-05, "loss": 2.0003, "step": 8013 }, { "epoch": 0.258562524323921, "grad_norm": 0.44921875, "learning_rate": 2.6171289662244356e-05, "loss": 2.0036, "step": 8014 }, { "epoch": 0.25859478817771736, "grad_norm": 0.423828125, "learning_rate": 2.6170239384169376e-05, "loss": 2.0179, "step": 8015 }, { "epoch": 0.2586270520315137, "grad_norm": 0.390625, "learning_rate": 2.6169188983140412e-05, "loss": 1.9868, "step": 8016 }, { "epoch": 0.25865931588531005, "grad_norm": 0.41015625, "learning_rate": 2.6168138459169034e-05, "loss": 2.0167, "step": 8017 }, { "epoch": 0.2586915797391064, "grad_norm": 0.3828125, "learning_rate": 2.616708781226679e-05, "loss": 1.9954, "step": 8018 }, { "epoch": 0.25872384359290274, "grad_norm": 0.412109375, "learning_rate": 2.6166037042445263e-05, "loss": 2.0, "step": 8019 }, { "epoch": 0.2587561074466991, "grad_norm": 0.453125, "learning_rate": 2.616498614971601e-05, "loss": 2.0073, "step": 8020 }, { "epoch": 0.2587883713004955, "grad_norm": 0.515625, "learning_rate": 2.6163935134090595e-05, "loss": 2.0166, "step": 8021 }, { "epoch": 0.2588206351542918, "grad_norm": 0.50390625, "learning_rate": 2.6162883995580595e-05, "loss": 2.0279, "step": 8022 }, { "epoch": 0.25885289900808817, "grad_norm": 0.41015625, "learning_rate": 2.6161832734197576e-05, "loss": 2.0362, "step": 8023 }, { "epoch": 0.2588851628618845, "grad_norm": 0.4453125, "learning_rate": 2.6160781349953106e-05, "loss": 2.0229, "step": 8024 }, { "epoch": 0.25891742671568085, "grad_norm": 0.4375, "learning_rate": 2.6159729842858764e-05, "loss": 1.9915, "step": 8025 }, { "epoch": 0.2589496905694772, "grad_norm": 0.416015625, "learning_rate": 2.6158678212926123e-05, "loss": 1.9998, "step": 8026 }, { "epoch": 0.25898195442327354, "grad_norm": 0.462890625, "learning_rate": 2.6157626460166752e-05, "loss": 2.0018, "step": 8027 }, { "epoch": 0.2590142182770699, "grad_norm": 0.470703125, "learning_rate": 2.615657458459224e-05, "loss": 2.0217, "step": 8028 }, { "epoch": 0.25904648213086623, "grad_norm": 0.439453125, "learning_rate": 2.6155522586214153e-05, "loss": 2.0192, "step": 8029 }, { "epoch": 0.2590787459846626, "grad_norm": 0.416015625, "learning_rate": 2.615447046504408e-05, "loss": 2.0171, "step": 8030 }, { "epoch": 0.2591110098384589, "grad_norm": 0.42578125, "learning_rate": 2.6153418221093602e-05, "loss": 2.0276, "step": 8031 }, { "epoch": 0.25914327369225526, "grad_norm": 0.404296875, "learning_rate": 2.6152365854374295e-05, "loss": 2.0, "step": 8032 }, { "epoch": 0.2591755375460516, "grad_norm": 0.404296875, "learning_rate": 2.6151313364897742e-05, "loss": 1.9782, "step": 8033 }, { "epoch": 0.25920780139984795, "grad_norm": 0.388671875, "learning_rate": 2.615026075267553e-05, "loss": 1.9942, "step": 8034 }, { "epoch": 0.2592400652536443, "grad_norm": 0.412109375, "learning_rate": 2.614920801771925e-05, "loss": 2.0013, "step": 8035 }, { "epoch": 0.25927232910744064, "grad_norm": 0.408203125, "learning_rate": 2.6148155160040487e-05, "loss": 2.0016, "step": 8036 }, { "epoch": 0.259304592961237, "grad_norm": 0.388671875, "learning_rate": 2.6147102179650834e-05, "loss": 1.9771, "step": 8037 }, { "epoch": 0.2593368568150333, "grad_norm": 0.4296875, "learning_rate": 2.614604907656187e-05, "loss": 2.01, "step": 8038 }, { "epoch": 0.25936912066882967, "grad_norm": 0.41796875, "learning_rate": 2.6144995850785192e-05, "loss": 2.0461, "step": 8039 }, { "epoch": 0.259401384522626, "grad_norm": 0.41796875, "learning_rate": 2.6143942502332402e-05, "loss": 2.0462, "step": 8040 }, { "epoch": 0.2594336483764224, "grad_norm": 0.43359375, "learning_rate": 2.6142889031215085e-05, "loss": 1.9962, "step": 8041 }, { "epoch": 0.25946591223021875, "grad_norm": 0.412109375, "learning_rate": 2.614183543744484e-05, "loss": 2.0077, "step": 8042 }, { "epoch": 0.2594981760840151, "grad_norm": 0.431640625, "learning_rate": 2.6140781721033266e-05, "loss": 2.0223, "step": 8043 }, { "epoch": 0.25953043993781144, "grad_norm": 0.400390625, "learning_rate": 2.6139727881991952e-05, "loss": 2.0326, "step": 8044 }, { "epoch": 0.2595627037916078, "grad_norm": 0.388671875, "learning_rate": 2.6138673920332513e-05, "loss": 2.0027, "step": 8045 }, { "epoch": 0.25959496764540413, "grad_norm": 0.39453125, "learning_rate": 2.6137619836066534e-05, "loss": 1.9902, "step": 8046 }, { "epoch": 0.2596272314992005, "grad_norm": 0.376953125, "learning_rate": 2.6136565629205632e-05, "loss": 1.9666, "step": 8047 }, { "epoch": 0.2596594953529968, "grad_norm": 0.396484375, "learning_rate": 2.6135511299761403e-05, "loss": 2.0232, "step": 8048 }, { "epoch": 0.25969175920679316, "grad_norm": 0.36328125, "learning_rate": 2.6134456847745453e-05, "loss": 2.0262, "step": 8049 }, { "epoch": 0.2597240230605895, "grad_norm": 0.412109375, "learning_rate": 2.6133402273169394e-05, "loss": 2.0372, "step": 8050 }, { "epoch": 0.25975628691438585, "grad_norm": 0.39453125, "learning_rate": 2.6132347576044826e-05, "loss": 2.004, "step": 8051 }, { "epoch": 0.2597885507681822, "grad_norm": 0.392578125, "learning_rate": 2.6131292756383362e-05, "loss": 1.998, "step": 8052 }, { "epoch": 0.25982081462197854, "grad_norm": 0.4140625, "learning_rate": 2.6130237814196614e-05, "loss": 1.9924, "step": 8053 }, { "epoch": 0.2598530784757749, "grad_norm": 0.484375, "learning_rate": 2.6129182749496197e-05, "loss": 1.9989, "step": 8054 }, { "epoch": 0.2598853423295712, "grad_norm": 0.53515625, "learning_rate": 2.6128127562293717e-05, "loss": 2.0253, "step": 8055 }, { "epoch": 0.25991760618336757, "grad_norm": 0.486328125, "learning_rate": 2.612707225260079e-05, "loss": 2.0203, "step": 8056 }, { "epoch": 0.2599498700371639, "grad_norm": 0.3828125, "learning_rate": 2.6126016820429036e-05, "loss": 2.0342, "step": 8057 }, { "epoch": 0.25998213389096025, "grad_norm": 0.431640625, "learning_rate": 2.6124961265790067e-05, "loss": 1.9814, "step": 8058 }, { "epoch": 0.2600143977447566, "grad_norm": 0.3828125, "learning_rate": 2.6123905588695514e-05, "loss": 1.988, "step": 8059 }, { "epoch": 0.26004666159855294, "grad_norm": 0.396484375, "learning_rate": 2.612284978915698e-05, "loss": 1.942, "step": 8060 }, { "epoch": 0.2600789254523493, "grad_norm": 0.49609375, "learning_rate": 2.6121793867186096e-05, "loss": 2.0159, "step": 8061 }, { "epoch": 0.2601111893061457, "grad_norm": 0.458984375, "learning_rate": 2.612073782279449e-05, "loss": 2.0146, "step": 8062 }, { "epoch": 0.26014345315994203, "grad_norm": 0.40625, "learning_rate": 2.6119681655993775e-05, "loss": 1.9632, "step": 8063 }, { "epoch": 0.2601757170137384, "grad_norm": 0.44140625, "learning_rate": 2.6118625366795582e-05, "loss": 2.0202, "step": 8064 }, { "epoch": 0.2602079808675347, "grad_norm": 0.42578125, "learning_rate": 2.6117568955211538e-05, "loss": 1.9984, "step": 8065 }, { "epoch": 0.26024024472133106, "grad_norm": 0.44140625, "learning_rate": 2.6116512421253272e-05, "loss": 1.9781, "step": 8066 }, { "epoch": 0.2602725085751274, "grad_norm": 0.53125, "learning_rate": 2.611545576493241e-05, "loss": 2.0109, "step": 8067 }, { "epoch": 0.26030477242892375, "grad_norm": 0.419921875, "learning_rate": 2.6114398986260586e-05, "loss": 2.0246, "step": 8068 }, { "epoch": 0.2603370362827201, "grad_norm": 0.4609375, "learning_rate": 2.6113342085249432e-05, "loss": 2.0069, "step": 8069 }, { "epoch": 0.26036930013651644, "grad_norm": 0.53125, "learning_rate": 2.6112285061910583e-05, "loss": 1.9651, "step": 8070 }, { "epoch": 0.2604015639903128, "grad_norm": 0.431640625, "learning_rate": 2.611122791625567e-05, "loss": 1.9911, "step": 8071 }, { "epoch": 0.2604338278441091, "grad_norm": 0.455078125, "learning_rate": 2.6110170648296328e-05, "loss": 1.982, "step": 8072 }, { "epoch": 0.26046609169790547, "grad_norm": 0.5, "learning_rate": 2.61091132580442e-05, "loss": 2.0009, "step": 8073 }, { "epoch": 0.2604983555517018, "grad_norm": 0.421875, "learning_rate": 2.6108055745510928e-05, "loss": 1.9859, "step": 8074 }, { "epoch": 0.26053061940549815, "grad_norm": 0.47265625, "learning_rate": 2.6106998110708142e-05, "loss": 1.9505, "step": 8075 }, { "epoch": 0.2605628832592945, "grad_norm": 0.443359375, "learning_rate": 2.610594035364749e-05, "loss": 1.9416, "step": 8076 }, { "epoch": 0.26059514711309084, "grad_norm": 0.484375, "learning_rate": 2.610488247434061e-05, "loss": 1.9685, "step": 8077 }, { "epoch": 0.2606274109668872, "grad_norm": 0.439453125, "learning_rate": 2.6103824472799156e-05, "loss": 1.9708, "step": 8078 }, { "epoch": 0.26065967482068353, "grad_norm": 0.443359375, "learning_rate": 2.6102766349034766e-05, "loss": 2.0189, "step": 8079 }, { "epoch": 0.2606919386744799, "grad_norm": 0.435546875, "learning_rate": 2.610170810305909e-05, "loss": 1.9928, "step": 8080 }, { "epoch": 0.2607242025282762, "grad_norm": 0.421875, "learning_rate": 2.6100649734883775e-05, "loss": 2.0073, "step": 8081 }, { "epoch": 0.2607564663820726, "grad_norm": 0.4296875, "learning_rate": 2.6099591244520474e-05, "loss": 1.9924, "step": 8082 }, { "epoch": 0.26078873023586896, "grad_norm": 0.42578125, "learning_rate": 2.609853263198083e-05, "loss": 2.0244, "step": 8083 }, { "epoch": 0.2608209940896653, "grad_norm": 0.4140625, "learning_rate": 2.6097473897276504e-05, "loss": 1.9935, "step": 8084 }, { "epoch": 0.26085325794346165, "grad_norm": 0.392578125, "learning_rate": 2.6096415040419142e-05, "loss": 1.9881, "step": 8085 }, { "epoch": 0.260885521797258, "grad_norm": 0.40625, "learning_rate": 2.6095356061420413e-05, "loss": 1.9511, "step": 8086 }, { "epoch": 0.26091778565105433, "grad_norm": 0.373046875, "learning_rate": 2.6094296960291953e-05, "loss": 1.9845, "step": 8087 }, { "epoch": 0.2609500495048507, "grad_norm": 0.37890625, "learning_rate": 2.6093237737045442e-05, "loss": 2.0383, "step": 8088 }, { "epoch": 0.260982313358647, "grad_norm": 0.380859375, "learning_rate": 2.609217839169252e-05, "loss": 1.9887, "step": 8089 }, { "epoch": 0.26101457721244337, "grad_norm": 0.380859375, "learning_rate": 2.6091118924244863e-05, "loss": 2.009, "step": 8090 }, { "epoch": 0.2610468410662397, "grad_norm": 0.3984375, "learning_rate": 2.609005933471412e-05, "loss": 2.0089, "step": 8091 }, { "epoch": 0.26107910492003605, "grad_norm": 0.375, "learning_rate": 2.6088999623111962e-05, "loss": 1.9822, "step": 8092 }, { "epoch": 0.2611113687738324, "grad_norm": 0.38671875, "learning_rate": 2.6087939789450053e-05, "loss": 2.0282, "step": 8093 }, { "epoch": 0.26114363262762874, "grad_norm": 0.3828125, "learning_rate": 2.608687983374005e-05, "loss": 1.9878, "step": 8094 }, { "epoch": 0.2611758964814251, "grad_norm": 0.390625, "learning_rate": 2.6085819755993638e-05, "loss": 2.0102, "step": 8095 }, { "epoch": 0.26120816033522143, "grad_norm": 0.458984375, "learning_rate": 2.6084759556222466e-05, "loss": 2.0128, "step": 8096 }, { "epoch": 0.2612404241890178, "grad_norm": 0.3984375, "learning_rate": 2.6083699234438218e-05, "loss": 1.9569, "step": 8097 }, { "epoch": 0.2612726880428141, "grad_norm": 0.423828125, "learning_rate": 2.608263879065256e-05, "loss": 2.0018, "step": 8098 }, { "epoch": 0.26130495189661046, "grad_norm": 0.38671875, "learning_rate": 2.6081578224877162e-05, "loss": 1.9704, "step": 8099 }, { "epoch": 0.2613372157504068, "grad_norm": 0.466796875, "learning_rate": 2.6080517537123702e-05, "loss": 1.9945, "step": 8100 }, { "epoch": 0.26136947960420315, "grad_norm": 0.435546875, "learning_rate": 2.6079456727403858e-05, "loss": 2.0014, "step": 8101 }, { "epoch": 0.26140174345799955, "grad_norm": 0.38671875, "learning_rate": 2.6078395795729297e-05, "loss": 1.9854, "step": 8102 }, { "epoch": 0.2614340073117959, "grad_norm": 0.431640625, "learning_rate": 2.6077334742111703e-05, "loss": 1.9943, "step": 8103 }, { "epoch": 0.26146627116559223, "grad_norm": 0.412109375, "learning_rate": 2.607627356656276e-05, "loss": 1.9825, "step": 8104 }, { "epoch": 0.2614985350193886, "grad_norm": 0.443359375, "learning_rate": 2.607521226909414e-05, "loss": 1.9961, "step": 8105 }, { "epoch": 0.2615307988731849, "grad_norm": 0.419921875, "learning_rate": 2.6074150849717532e-05, "loss": 2.0002, "step": 8106 }, { "epoch": 0.26156306272698127, "grad_norm": 0.44921875, "learning_rate": 2.607308930844461e-05, "loss": 1.9385, "step": 8107 }, { "epoch": 0.2615953265807776, "grad_norm": 0.435546875, "learning_rate": 2.6072027645287068e-05, "loss": 1.9669, "step": 8108 }, { "epoch": 0.26162759043457395, "grad_norm": 0.4296875, "learning_rate": 2.6070965860256588e-05, "loss": 1.9128, "step": 8109 }, { "epoch": 0.2616598542883703, "grad_norm": 0.474609375, "learning_rate": 2.6069903953364863e-05, "loss": 1.9928, "step": 8110 }, { "epoch": 0.26169211814216664, "grad_norm": 0.423828125, "learning_rate": 2.606884192462357e-05, "loss": 2.0192, "step": 8111 }, { "epoch": 0.261724381995963, "grad_norm": 0.46484375, "learning_rate": 2.606777977404441e-05, "loss": 1.9641, "step": 8112 }, { "epoch": 0.26175664584975933, "grad_norm": 0.49609375, "learning_rate": 2.6066717501639067e-05, "loss": 1.9856, "step": 8113 }, { "epoch": 0.2617889097035557, "grad_norm": 0.4140625, "learning_rate": 2.6065655107419238e-05, "loss": 1.9599, "step": 8114 }, { "epoch": 0.261821173557352, "grad_norm": 0.453125, "learning_rate": 2.6064592591396617e-05, "loss": 2.0068, "step": 8115 }, { "epoch": 0.26185343741114836, "grad_norm": 0.435546875, "learning_rate": 2.60635299535829e-05, "loss": 2.0107, "step": 8116 }, { "epoch": 0.2618857012649447, "grad_norm": 0.431640625, "learning_rate": 2.606246719398978e-05, "loss": 2.0226, "step": 8117 }, { "epoch": 0.26191796511874105, "grad_norm": 0.3984375, "learning_rate": 2.606140431262896e-05, "loss": 2.0084, "step": 8118 }, { "epoch": 0.2619502289725374, "grad_norm": 0.390625, "learning_rate": 2.606034130951213e-05, "loss": 2.0156, "step": 8119 }, { "epoch": 0.26198249282633373, "grad_norm": 0.412109375, "learning_rate": 2.6059278184651003e-05, "loss": 2.0054, "step": 8120 }, { "epoch": 0.2620147566801301, "grad_norm": 0.4140625, "learning_rate": 2.6058214938057275e-05, "loss": 1.9811, "step": 8121 }, { "epoch": 0.2620470205339265, "grad_norm": 0.40625, "learning_rate": 2.6057151569742652e-05, "loss": 2.0052, "step": 8122 }, { "epoch": 0.2620792843877228, "grad_norm": 0.44140625, "learning_rate": 2.6056088079718834e-05, "loss": 2.0186, "step": 8123 }, { "epoch": 0.26211154824151917, "grad_norm": 0.46484375, "learning_rate": 2.605502446799753e-05, "loss": 1.9982, "step": 8124 }, { "epoch": 0.2621438120953155, "grad_norm": 0.462890625, "learning_rate": 2.6053960734590444e-05, "loss": 1.984, "step": 8125 }, { "epoch": 0.26217607594911185, "grad_norm": 0.4296875, "learning_rate": 2.6052896879509297e-05, "loss": 2.0601, "step": 8126 }, { "epoch": 0.2622083398029082, "grad_norm": 0.5, "learning_rate": 2.6051832902765785e-05, "loss": 1.9901, "step": 8127 }, { "epoch": 0.26224060365670454, "grad_norm": 0.53125, "learning_rate": 2.605076880437163e-05, "loss": 2.0191, "step": 8128 }, { "epoch": 0.2622728675105009, "grad_norm": 0.52734375, "learning_rate": 2.604970458433853e-05, "loss": 2.0028, "step": 8129 }, { "epoch": 0.26230513136429723, "grad_norm": 0.423828125, "learning_rate": 2.6048640242678215e-05, "loss": 1.9754, "step": 8130 }, { "epoch": 0.26233739521809357, "grad_norm": 0.6015625, "learning_rate": 2.6047575779402394e-05, "loss": 1.9587, "step": 8131 }, { "epoch": 0.2623696590718899, "grad_norm": 0.44921875, "learning_rate": 2.6046511194522785e-05, "loss": 1.9783, "step": 8132 }, { "epoch": 0.26240192292568626, "grad_norm": 0.47265625, "learning_rate": 2.6045446488051106e-05, "loss": 2.0122, "step": 8133 }, { "epoch": 0.2624341867794826, "grad_norm": 0.453125, "learning_rate": 2.6044381659999076e-05, "loss": 2.0052, "step": 8134 }, { "epoch": 0.26246645063327895, "grad_norm": 0.4453125, "learning_rate": 2.6043316710378414e-05, "loss": 2.0075, "step": 8135 }, { "epoch": 0.2624987144870753, "grad_norm": 0.466796875, "learning_rate": 2.6042251639200843e-05, "loss": 1.9962, "step": 8136 }, { "epoch": 0.26253097834087163, "grad_norm": 0.431640625, "learning_rate": 2.604118644647809e-05, "loss": 2.0349, "step": 8137 }, { "epoch": 0.262563242194668, "grad_norm": 0.48828125, "learning_rate": 2.6040121132221876e-05, "loss": 2.0072, "step": 8138 }, { "epoch": 0.2625955060484643, "grad_norm": 0.482421875, "learning_rate": 2.603905569644393e-05, "loss": 2.0016, "step": 8139 }, { "epoch": 0.26262776990226067, "grad_norm": 0.431640625, "learning_rate": 2.603799013915598e-05, "loss": 2.0079, "step": 8140 }, { "epoch": 0.262660033756057, "grad_norm": 0.44140625, "learning_rate": 2.603692446036975e-05, "loss": 2.0042, "step": 8141 }, { "epoch": 0.26269229760985335, "grad_norm": 0.498046875, "learning_rate": 2.6035858660096975e-05, "loss": 1.991, "step": 8142 }, { "epoch": 0.26272456146364975, "grad_norm": 0.431640625, "learning_rate": 2.6034792738349386e-05, "loss": 2.0043, "step": 8143 }, { "epoch": 0.2627568253174461, "grad_norm": 0.48046875, "learning_rate": 2.6033726695138712e-05, "loss": 2.0078, "step": 8144 }, { "epoch": 0.26278908917124244, "grad_norm": 0.42578125, "learning_rate": 2.6032660530476694e-05, "loss": 2.0657, "step": 8145 }, { "epoch": 0.2628213530250388, "grad_norm": 0.42578125, "learning_rate": 2.603159424437506e-05, "loss": 1.9715, "step": 8146 }, { "epoch": 0.26285361687883513, "grad_norm": 0.408203125, "learning_rate": 2.603052783684555e-05, "loss": 2.0161, "step": 8147 }, { "epoch": 0.26288588073263147, "grad_norm": 0.42578125, "learning_rate": 2.6029461307899904e-05, "loss": 2.0156, "step": 8148 }, { "epoch": 0.2629181445864278, "grad_norm": 0.408203125, "learning_rate": 2.602839465754986e-05, "loss": 2.0262, "step": 8149 }, { "epoch": 0.26295040844022416, "grad_norm": 0.4140625, "learning_rate": 2.6027327885807162e-05, "loss": 2.0231, "step": 8150 }, { "epoch": 0.2629826722940205, "grad_norm": 0.392578125, "learning_rate": 2.602626099268355e-05, "loss": 1.989, "step": 8151 }, { "epoch": 0.26301493614781685, "grad_norm": 0.39453125, "learning_rate": 2.602519397819076e-05, "loss": 2.0122, "step": 8152 }, { "epoch": 0.2630472000016132, "grad_norm": 0.384765625, "learning_rate": 2.6024126842340556e-05, "loss": 2.0096, "step": 8153 }, { "epoch": 0.26307946385540953, "grad_norm": 0.37109375, "learning_rate": 2.6023059585144664e-05, "loss": 1.9943, "step": 8154 }, { "epoch": 0.2631117277092059, "grad_norm": 0.38671875, "learning_rate": 2.6021992206614843e-05, "loss": 1.9783, "step": 8155 }, { "epoch": 0.2631439915630022, "grad_norm": 0.421875, "learning_rate": 2.6020924706762833e-05, "loss": 2.0075, "step": 8156 }, { "epoch": 0.26317625541679857, "grad_norm": 0.376953125, "learning_rate": 2.6019857085600398e-05, "loss": 1.9946, "step": 8157 }, { "epoch": 0.2632085192705949, "grad_norm": 0.400390625, "learning_rate": 2.6018789343139277e-05, "loss": 1.9861, "step": 8158 }, { "epoch": 0.26324078312439125, "grad_norm": 0.375, "learning_rate": 2.601772147939123e-05, "loss": 1.9863, "step": 8159 }, { "epoch": 0.2632730469781876, "grad_norm": 0.39453125, "learning_rate": 2.601665349436801e-05, "loss": 2.0108, "step": 8160 }, { "epoch": 0.26330531083198394, "grad_norm": 0.408203125, "learning_rate": 2.601558538808137e-05, "loss": 1.9844, "step": 8161 }, { "epoch": 0.2633375746857803, "grad_norm": 0.375, "learning_rate": 2.601451716054307e-05, "loss": 1.9665, "step": 8162 }, { "epoch": 0.2633698385395767, "grad_norm": 0.390625, "learning_rate": 2.6013448811764866e-05, "loss": 2.0039, "step": 8163 }, { "epoch": 0.263402102393373, "grad_norm": 0.375, "learning_rate": 2.6012380341758516e-05, "loss": 2.0298, "step": 8164 }, { "epoch": 0.26343436624716937, "grad_norm": 0.37109375, "learning_rate": 2.6011311750535786e-05, "loss": 2.0249, "step": 8165 }, { "epoch": 0.2634666301009657, "grad_norm": 0.373046875, "learning_rate": 2.6010243038108434e-05, "loss": 2.0016, "step": 8166 }, { "epoch": 0.26349889395476206, "grad_norm": 0.396484375, "learning_rate": 2.6009174204488228e-05, "loss": 2.0223, "step": 8167 }, { "epoch": 0.2635311578085584, "grad_norm": 0.431640625, "learning_rate": 2.6008105249686925e-05, "loss": 1.9982, "step": 8168 }, { "epoch": 0.26356342166235475, "grad_norm": 0.38671875, "learning_rate": 2.6007036173716302e-05, "loss": 2.0011, "step": 8169 }, { "epoch": 0.2635956855161511, "grad_norm": 0.396484375, "learning_rate": 2.6005966976588122e-05, "loss": 2.0295, "step": 8170 }, { "epoch": 0.26362794936994743, "grad_norm": 0.412109375, "learning_rate": 2.6004897658314153e-05, "loss": 2.0269, "step": 8171 }, { "epoch": 0.2636602132237438, "grad_norm": 0.384765625, "learning_rate": 2.600382821890616e-05, "loss": 2.0272, "step": 8172 }, { "epoch": 0.2636924770775401, "grad_norm": 0.408203125, "learning_rate": 2.6002758658375924e-05, "loss": 2.0231, "step": 8173 }, { "epoch": 0.26372474093133647, "grad_norm": 0.37890625, "learning_rate": 2.6001688976735216e-05, "loss": 2.0209, "step": 8174 }, { "epoch": 0.2637570047851328, "grad_norm": 0.416015625, "learning_rate": 2.6000619173995802e-05, "loss": 2.0163, "step": 8175 }, { "epoch": 0.26378926863892915, "grad_norm": 0.390625, "learning_rate": 2.5999549250169468e-05, "loss": 2.0343, "step": 8176 }, { "epoch": 0.2638215324927255, "grad_norm": 0.404296875, "learning_rate": 2.599847920526799e-05, "loss": 1.9757, "step": 8177 }, { "epoch": 0.26385379634652184, "grad_norm": 0.392578125, "learning_rate": 2.5997409039303136e-05, "loss": 2.0059, "step": 8178 }, { "epoch": 0.2638860602003182, "grad_norm": 0.43359375, "learning_rate": 2.5996338752286696e-05, "loss": 1.9653, "step": 8179 }, { "epoch": 0.26391832405411453, "grad_norm": 0.396484375, "learning_rate": 2.5995268344230453e-05, "loss": 1.9698, "step": 8180 }, { "epoch": 0.26395058790791087, "grad_norm": 0.3984375, "learning_rate": 2.599419781514618e-05, "loss": 2.008, "step": 8181 }, { "epoch": 0.2639828517617072, "grad_norm": 0.419921875, "learning_rate": 2.599312716504566e-05, "loss": 2.0245, "step": 8182 }, { "epoch": 0.2640151156155036, "grad_norm": 0.416015625, "learning_rate": 2.599205639394069e-05, "loss": 1.9997, "step": 8183 }, { "epoch": 0.26404737946929996, "grad_norm": 0.43359375, "learning_rate": 2.5990985501843045e-05, "loss": 1.9841, "step": 8184 }, { "epoch": 0.2640796433230963, "grad_norm": 0.404296875, "learning_rate": 2.5989914488764518e-05, "loss": 2.0417, "step": 8185 }, { "epoch": 0.26411190717689265, "grad_norm": 0.4453125, "learning_rate": 2.5988843354716895e-05, "loss": 2.011, "step": 8186 }, { "epoch": 0.264144171030689, "grad_norm": 0.435546875, "learning_rate": 2.598777209971197e-05, "loss": 2.016, "step": 8187 }, { "epoch": 0.26417643488448533, "grad_norm": 0.419921875, "learning_rate": 2.5986700723761528e-05, "loss": 2.004, "step": 8188 }, { "epoch": 0.2642086987382817, "grad_norm": 0.4140625, "learning_rate": 2.598562922687737e-05, "loss": 1.9837, "step": 8189 }, { "epoch": 0.264240962592078, "grad_norm": 0.447265625, "learning_rate": 2.5984557609071286e-05, "loss": 1.98, "step": 8190 }, { "epoch": 0.26427322644587437, "grad_norm": 0.484375, "learning_rate": 2.598348587035507e-05, "loss": 1.9944, "step": 8191 }, { "epoch": 0.2643054902996707, "grad_norm": 0.416015625, "learning_rate": 2.5982414010740522e-05, "loss": 2.0079, "step": 8192 }, { "epoch": 0.26433775415346705, "grad_norm": 0.453125, "learning_rate": 2.5981342030239442e-05, "loss": 2.0498, "step": 8193 }, { "epoch": 0.2643700180072634, "grad_norm": 0.400390625, "learning_rate": 2.5980269928863626e-05, "loss": 2.0391, "step": 8194 }, { "epoch": 0.26440228186105974, "grad_norm": 0.40234375, "learning_rate": 2.597919770662487e-05, "loss": 1.983, "step": 8195 }, { "epoch": 0.2644345457148561, "grad_norm": 0.43359375, "learning_rate": 2.5978125363534987e-05, "loss": 1.9676, "step": 8196 }, { "epoch": 0.26446680956865243, "grad_norm": 0.416015625, "learning_rate": 2.5977052899605776e-05, "loss": 1.966, "step": 8197 }, { "epoch": 0.26449907342244877, "grad_norm": 0.4140625, "learning_rate": 2.597598031484904e-05, "loss": 2.0324, "step": 8198 }, { "epoch": 0.2645313372762451, "grad_norm": 0.40625, "learning_rate": 2.597490760927659e-05, "loss": 1.9999, "step": 8199 }, { "epoch": 0.26456360113004146, "grad_norm": 0.40234375, "learning_rate": 2.597383478290023e-05, "loss": 2.0056, "step": 8200 }, { "epoch": 0.2645958649838378, "grad_norm": 0.4140625, "learning_rate": 2.5972761835731766e-05, "loss": 2.0164, "step": 8201 }, { "epoch": 0.26462812883763415, "grad_norm": 0.3984375, "learning_rate": 2.5971688767783015e-05, "loss": 1.9943, "step": 8202 }, { "epoch": 0.26466039269143055, "grad_norm": 0.38671875, "learning_rate": 2.597061557906578e-05, "loss": 1.9814, "step": 8203 }, { "epoch": 0.2646926565452269, "grad_norm": 0.396484375, "learning_rate": 2.5969542269591882e-05, "loss": 2.0112, "step": 8204 }, { "epoch": 0.26472492039902323, "grad_norm": 0.39453125, "learning_rate": 2.5968468839373133e-05, "loss": 2.0024, "step": 8205 }, { "epoch": 0.2647571842528196, "grad_norm": 0.44921875, "learning_rate": 2.5967395288421347e-05, "loss": 1.9907, "step": 8206 }, { "epoch": 0.2647894481066159, "grad_norm": 0.384765625, "learning_rate": 2.596632161674834e-05, "loss": 2.0096, "step": 8207 }, { "epoch": 0.26482171196041226, "grad_norm": 0.388671875, "learning_rate": 2.5965247824365934e-05, "loss": 2.0304, "step": 8208 }, { "epoch": 0.2648539758142086, "grad_norm": 0.373046875, "learning_rate": 2.5964173911285947e-05, "loss": 2.0224, "step": 8209 }, { "epoch": 0.26488623966800495, "grad_norm": 0.45703125, "learning_rate": 2.59630998775202e-05, "loss": 1.9581, "step": 8210 }, { "epoch": 0.2649185035218013, "grad_norm": 0.4375, "learning_rate": 2.5962025723080512e-05, "loss": 2.0079, "step": 8211 }, { "epoch": 0.26495076737559764, "grad_norm": 0.376953125, "learning_rate": 2.5960951447978707e-05, "loss": 1.9996, "step": 8212 }, { "epoch": 0.264983031229394, "grad_norm": 0.39453125, "learning_rate": 2.5959877052226617e-05, "loss": 2.0027, "step": 8213 }, { "epoch": 0.2650152950831903, "grad_norm": 0.396484375, "learning_rate": 2.595880253583606e-05, "loss": 1.9729, "step": 8214 }, { "epoch": 0.26504755893698667, "grad_norm": 0.400390625, "learning_rate": 2.5957727898818866e-05, "loss": 1.9779, "step": 8215 }, { "epoch": 0.265079822790783, "grad_norm": 0.376953125, "learning_rate": 2.5956653141186867e-05, "loss": 2.0079, "step": 8216 }, { "epoch": 0.26511208664457936, "grad_norm": 0.396484375, "learning_rate": 2.5955578262951887e-05, "loss": 2.0169, "step": 8217 }, { "epoch": 0.2651443504983757, "grad_norm": 0.390625, "learning_rate": 2.5954503264125763e-05, "loss": 2.0294, "step": 8218 }, { "epoch": 0.26517661435217205, "grad_norm": 0.375, "learning_rate": 2.5953428144720324e-05, "loss": 2.0153, "step": 8219 }, { "epoch": 0.2652088782059684, "grad_norm": 0.4453125, "learning_rate": 2.595235290474741e-05, "loss": 1.9835, "step": 8220 }, { "epoch": 0.26524114205976473, "grad_norm": 0.375, "learning_rate": 2.595127754421885e-05, "loss": 1.9878, "step": 8221 }, { "epoch": 0.2652734059135611, "grad_norm": 0.412109375, "learning_rate": 2.5950202063146482e-05, "loss": 2.0132, "step": 8222 }, { "epoch": 0.2653056697673574, "grad_norm": 0.3828125, "learning_rate": 2.5949126461542144e-05, "loss": 2.0147, "step": 8223 }, { "epoch": 0.2653379336211538, "grad_norm": 0.408203125, "learning_rate": 2.5948050739417682e-05, "loss": 1.9942, "step": 8224 }, { "epoch": 0.26537019747495016, "grad_norm": 0.359375, "learning_rate": 2.5946974896784928e-05, "loss": 1.9693, "step": 8225 }, { "epoch": 0.2654024613287465, "grad_norm": 0.431640625, "learning_rate": 2.5945898933655727e-05, "loss": 2.0053, "step": 8226 }, { "epoch": 0.26543472518254285, "grad_norm": 0.412109375, "learning_rate": 2.5944822850041926e-05, "loss": 2.0099, "step": 8227 }, { "epoch": 0.2654669890363392, "grad_norm": 0.388671875, "learning_rate": 2.5943746645955365e-05, "loss": 2.0078, "step": 8228 }, { "epoch": 0.26549925289013554, "grad_norm": 0.3671875, "learning_rate": 2.594267032140789e-05, "loss": 2.032, "step": 8229 }, { "epoch": 0.2655315167439319, "grad_norm": 0.380859375, "learning_rate": 2.5941593876411357e-05, "loss": 2.01, "step": 8230 }, { "epoch": 0.2655637805977282, "grad_norm": 0.380859375, "learning_rate": 2.5940517310977603e-05, "loss": 2.0042, "step": 8231 }, { "epoch": 0.26559604445152457, "grad_norm": 0.390625, "learning_rate": 2.5939440625118484e-05, "loss": 2.0149, "step": 8232 }, { "epoch": 0.2656283083053209, "grad_norm": 0.392578125, "learning_rate": 2.593836381884585e-05, "loss": 2.0252, "step": 8233 }, { "epoch": 0.26566057215911726, "grad_norm": 0.380859375, "learning_rate": 2.5937286892171555e-05, "loss": 2.0239, "step": 8234 }, { "epoch": 0.2656928360129136, "grad_norm": 0.388671875, "learning_rate": 2.5936209845107454e-05, "loss": 1.9798, "step": 8235 }, { "epoch": 0.26572509986670995, "grad_norm": 0.412109375, "learning_rate": 2.5935132677665395e-05, "loss": 2.0033, "step": 8236 }, { "epoch": 0.2657573637205063, "grad_norm": 0.37890625, "learning_rate": 2.5934055389857244e-05, "loss": 1.9658, "step": 8237 }, { "epoch": 0.26578962757430263, "grad_norm": 0.396484375, "learning_rate": 2.5932977981694856e-05, "loss": 2.0058, "step": 8238 }, { "epoch": 0.265821891428099, "grad_norm": 0.376953125, "learning_rate": 2.5931900453190086e-05, "loss": 1.9967, "step": 8239 }, { "epoch": 0.2658541552818953, "grad_norm": 0.390625, "learning_rate": 2.5930822804354802e-05, "loss": 2.0081, "step": 8240 }, { "epoch": 0.26588641913569167, "grad_norm": 0.390625, "learning_rate": 2.5929745035200863e-05, "loss": 2.0136, "step": 8241 }, { "epoch": 0.265918682989488, "grad_norm": 0.396484375, "learning_rate": 2.5928667145740127e-05, "loss": 1.9643, "step": 8242 }, { "epoch": 0.26595094684328435, "grad_norm": 0.390625, "learning_rate": 2.5927589135984467e-05, "loss": 1.9925, "step": 8243 }, { "epoch": 0.26598321069708075, "grad_norm": 0.392578125, "learning_rate": 2.5926511005945748e-05, "loss": 2.0056, "step": 8244 }, { "epoch": 0.2660154745508771, "grad_norm": 0.388671875, "learning_rate": 2.592543275563583e-05, "loss": 2.0059, "step": 8245 }, { "epoch": 0.26604773840467344, "grad_norm": 0.380859375, "learning_rate": 2.5924354385066585e-05, "loss": 2.0041, "step": 8246 }, { "epoch": 0.2660800022584698, "grad_norm": 0.40625, "learning_rate": 2.592327589424989e-05, "loss": 2.0307, "step": 8247 }, { "epoch": 0.2661122661122661, "grad_norm": 0.373046875, "learning_rate": 2.59221972831976e-05, "loss": 1.9882, "step": 8248 }, { "epoch": 0.26614452996606247, "grad_norm": 0.390625, "learning_rate": 2.592111855192161e-05, "loss": 1.9872, "step": 8249 }, { "epoch": 0.2661767938198588, "grad_norm": 0.376953125, "learning_rate": 2.5920039700433773e-05, "loss": 2.0072, "step": 8250 }, { "epoch": 0.26620905767365516, "grad_norm": 0.3828125, "learning_rate": 2.5918960728745976e-05, "loss": 1.9985, "step": 8251 }, { "epoch": 0.2662413215274515, "grad_norm": 0.392578125, "learning_rate": 2.5917881636870093e-05, "loss": 1.9831, "step": 8252 }, { "epoch": 0.26627358538124785, "grad_norm": 0.380859375, "learning_rate": 2.5916802424818002e-05, "loss": 1.9997, "step": 8253 }, { "epoch": 0.2663058492350442, "grad_norm": 0.396484375, "learning_rate": 2.5915723092601578e-05, "loss": 1.9598, "step": 8254 }, { "epoch": 0.26633811308884053, "grad_norm": 0.392578125, "learning_rate": 2.5914643640232708e-05, "loss": 2.0131, "step": 8255 }, { "epoch": 0.2663703769426369, "grad_norm": 0.369140625, "learning_rate": 2.5913564067723273e-05, "loss": 1.997, "step": 8256 }, { "epoch": 0.2664026407964332, "grad_norm": 0.37890625, "learning_rate": 2.591248437508515e-05, "loss": 2.0036, "step": 8257 }, { "epoch": 0.26643490465022956, "grad_norm": 0.369140625, "learning_rate": 2.591140456233023e-05, "loss": 1.9787, "step": 8258 }, { "epoch": 0.2664671685040259, "grad_norm": 0.40234375, "learning_rate": 2.5910324629470397e-05, "loss": 1.9893, "step": 8259 }, { "epoch": 0.26649943235782225, "grad_norm": 0.375, "learning_rate": 2.5909244576517535e-05, "loss": 1.961, "step": 8260 }, { "epoch": 0.2665316962116186, "grad_norm": 0.3984375, "learning_rate": 2.5908164403483537e-05, "loss": 1.9884, "step": 8261 }, { "epoch": 0.26656396006541494, "grad_norm": 0.380859375, "learning_rate": 2.590708411038029e-05, "loss": 2.0139, "step": 8262 }, { "epoch": 0.2665962239192113, "grad_norm": 0.40234375, "learning_rate": 2.5906003697219684e-05, "loss": 2.0081, "step": 8263 }, { "epoch": 0.2666284877730077, "grad_norm": 0.466796875, "learning_rate": 2.5904923164013615e-05, "loss": 1.9925, "step": 8264 }, { "epoch": 0.266660751626804, "grad_norm": 0.60546875, "learning_rate": 2.5903842510773975e-05, "loss": 1.9861, "step": 8265 }, { "epoch": 0.26669301548060037, "grad_norm": 0.71875, "learning_rate": 2.590276173751266e-05, "loss": 2.0152, "step": 8266 }, { "epoch": 0.2667252793343967, "grad_norm": 0.53515625, "learning_rate": 2.5901680844241565e-05, "loss": 1.9851, "step": 8267 }, { "epoch": 0.26675754318819306, "grad_norm": 0.439453125, "learning_rate": 2.5900599830972584e-05, "loss": 1.9792, "step": 8268 }, { "epoch": 0.2667898070419894, "grad_norm": 0.5625, "learning_rate": 2.5899518697717624e-05, "loss": 1.9865, "step": 8269 }, { "epoch": 0.26682207089578575, "grad_norm": 0.388671875, "learning_rate": 2.5898437444488583e-05, "loss": 2.0077, "step": 8270 }, { "epoch": 0.2668543347495821, "grad_norm": 0.546875, "learning_rate": 2.589735607129736e-05, "loss": 2.0044, "step": 8271 }, { "epoch": 0.26688659860337843, "grad_norm": 0.388671875, "learning_rate": 2.5896274578155857e-05, "loss": 1.9762, "step": 8272 }, { "epoch": 0.2669188624571748, "grad_norm": 0.48046875, "learning_rate": 2.5895192965075978e-05, "loss": 2.0082, "step": 8273 }, { "epoch": 0.2669511263109711, "grad_norm": 0.4140625, "learning_rate": 2.5894111232069636e-05, "loss": 1.9866, "step": 8274 }, { "epoch": 0.26698339016476746, "grad_norm": 0.4609375, "learning_rate": 2.5893029379148732e-05, "loss": 1.9857, "step": 8275 }, { "epoch": 0.2670156540185638, "grad_norm": 0.423828125, "learning_rate": 2.5891947406325178e-05, "loss": 2.0084, "step": 8276 }, { "epoch": 0.26704791787236015, "grad_norm": 0.4140625, "learning_rate": 2.5890865313610876e-05, "loss": 2.0184, "step": 8277 }, { "epoch": 0.2670801817261565, "grad_norm": 0.41796875, "learning_rate": 2.5889783101017743e-05, "loss": 1.983, "step": 8278 }, { "epoch": 0.26711244557995284, "grad_norm": 0.390625, "learning_rate": 2.588870076855769e-05, "loss": 2.008, "step": 8279 }, { "epoch": 0.2671447094337492, "grad_norm": 0.439453125, "learning_rate": 2.5887618316242626e-05, "loss": 1.9749, "step": 8280 }, { "epoch": 0.2671769732875455, "grad_norm": 0.431640625, "learning_rate": 2.588653574408448e-05, "loss": 2.0046, "step": 8281 }, { "epoch": 0.26720923714134187, "grad_norm": 0.490234375, "learning_rate": 2.5885453052095155e-05, "loss": 2.0011, "step": 8282 }, { "epoch": 0.2672415009951382, "grad_norm": 0.3984375, "learning_rate": 2.588437024028657e-05, "loss": 1.9919, "step": 8283 }, { "epoch": 0.2672737648489346, "grad_norm": 0.443359375, "learning_rate": 2.5883287308670642e-05, "loss": 2.0086, "step": 8284 }, { "epoch": 0.26730602870273096, "grad_norm": 0.3828125, "learning_rate": 2.58822042572593e-05, "loss": 2.006, "step": 8285 }, { "epoch": 0.2673382925565273, "grad_norm": 0.41015625, "learning_rate": 2.5881121086064462e-05, "loss": 1.9932, "step": 8286 }, { "epoch": 0.26737055641032365, "grad_norm": 0.408203125, "learning_rate": 2.5880037795098044e-05, "loss": 1.9499, "step": 8287 }, { "epoch": 0.26740282026412, "grad_norm": 0.431640625, "learning_rate": 2.587895438437198e-05, "loss": 1.9932, "step": 8288 }, { "epoch": 0.26743508411791633, "grad_norm": 0.423828125, "learning_rate": 2.587787085389819e-05, "loss": 1.9816, "step": 8289 }, { "epoch": 0.2674673479717127, "grad_norm": 0.421875, "learning_rate": 2.5876787203688595e-05, "loss": 1.9987, "step": 8290 }, { "epoch": 0.267499611825509, "grad_norm": 0.43359375, "learning_rate": 2.587570343375514e-05, "loss": 2.0078, "step": 8291 }, { "epoch": 0.26753187567930536, "grad_norm": 0.458984375, "learning_rate": 2.5874619544109737e-05, "loss": 2.0134, "step": 8292 }, { "epoch": 0.2675641395331017, "grad_norm": 0.388671875, "learning_rate": 2.5873535534764322e-05, "loss": 1.9698, "step": 8293 }, { "epoch": 0.26759640338689805, "grad_norm": 0.4609375, "learning_rate": 2.5872451405730834e-05, "loss": 2.0153, "step": 8294 }, { "epoch": 0.2676286672406944, "grad_norm": 0.39453125, "learning_rate": 2.5871367157021197e-05, "loss": 1.9836, "step": 8295 }, { "epoch": 0.26766093109449074, "grad_norm": 0.42578125, "learning_rate": 2.5870282788647352e-05, "loss": 1.9848, "step": 8296 }, { "epoch": 0.2676931949482871, "grad_norm": 0.41015625, "learning_rate": 2.586919830062123e-05, "loss": 1.9782, "step": 8297 }, { "epoch": 0.2677254588020834, "grad_norm": 0.390625, "learning_rate": 2.5868113692954774e-05, "loss": 2.0337, "step": 8298 }, { "epoch": 0.26775772265587977, "grad_norm": 0.41015625, "learning_rate": 2.5867028965659913e-05, "loss": 1.9917, "step": 8299 }, { "epoch": 0.2677899865096761, "grad_norm": 0.369140625, "learning_rate": 2.5865944118748598e-05, "loss": 1.9089, "step": 8300 }, { "epoch": 0.26782225036347246, "grad_norm": 0.419921875, "learning_rate": 2.5864859152232764e-05, "loss": 1.9968, "step": 8301 }, { "epoch": 0.2678545142172688, "grad_norm": 0.375, "learning_rate": 2.5863774066124356e-05, "loss": 1.9833, "step": 8302 }, { "epoch": 0.26788677807106515, "grad_norm": 0.392578125, "learning_rate": 2.586268886043532e-05, "loss": 1.9924, "step": 8303 }, { "epoch": 0.26791904192486155, "grad_norm": 0.462890625, "learning_rate": 2.586160353517759e-05, "loss": 1.9733, "step": 8304 }, { "epoch": 0.2679513057786579, "grad_norm": 0.423828125, "learning_rate": 2.586051809036312e-05, "loss": 1.985, "step": 8305 }, { "epoch": 0.26798356963245423, "grad_norm": 0.419921875, "learning_rate": 2.5859432526003865e-05, "loss": 1.998, "step": 8306 }, { "epoch": 0.2680158334862506, "grad_norm": 0.4296875, "learning_rate": 2.5858346842111762e-05, "loss": 1.9927, "step": 8307 }, { "epoch": 0.2680480973400469, "grad_norm": 0.408203125, "learning_rate": 2.585726103869877e-05, "loss": 1.9707, "step": 8308 }, { "epoch": 0.26808036119384326, "grad_norm": 0.4375, "learning_rate": 2.5856175115776837e-05, "loss": 1.9493, "step": 8309 }, { "epoch": 0.2681126250476396, "grad_norm": 0.416015625, "learning_rate": 2.5855089073357918e-05, "loss": 1.9977, "step": 8310 }, { "epoch": 0.26814488890143595, "grad_norm": 0.404296875, "learning_rate": 2.5854002911453962e-05, "loss": 1.9611, "step": 8311 }, { "epoch": 0.2681771527552323, "grad_norm": 0.4609375, "learning_rate": 2.585291663007693e-05, "loss": 1.9561, "step": 8312 }, { "epoch": 0.26820941660902864, "grad_norm": 0.40625, "learning_rate": 2.585183022923878e-05, "loss": 1.9898, "step": 8313 }, { "epoch": 0.268241680462825, "grad_norm": 0.412109375, "learning_rate": 2.5850743708951466e-05, "loss": 1.9767, "step": 8314 }, { "epoch": 0.2682739443166213, "grad_norm": 0.423828125, "learning_rate": 2.5849657069226946e-05, "loss": 1.9406, "step": 8315 }, { "epoch": 0.26830620817041767, "grad_norm": 0.392578125, "learning_rate": 2.584857031007719e-05, "loss": 1.9841, "step": 8316 }, { "epoch": 0.268338472024214, "grad_norm": 0.41015625, "learning_rate": 2.5847483431514155e-05, "loss": 1.9801, "step": 8317 }, { "epoch": 0.26837073587801036, "grad_norm": 0.4296875, "learning_rate": 2.58463964335498e-05, "loss": 2.0013, "step": 8318 }, { "epoch": 0.2684029997318067, "grad_norm": 0.3984375, "learning_rate": 2.5845309316196094e-05, "loss": 1.9877, "step": 8319 }, { "epoch": 0.26843526358560305, "grad_norm": 0.39453125, "learning_rate": 2.584422207946501e-05, "loss": 2.0014, "step": 8320 }, { "epoch": 0.2684675274393994, "grad_norm": 0.41796875, "learning_rate": 2.5843134723368508e-05, "loss": 1.9735, "step": 8321 }, { "epoch": 0.26849979129319573, "grad_norm": 0.404296875, "learning_rate": 2.584204724791855e-05, "loss": 1.9465, "step": 8322 }, { "epoch": 0.2685320551469921, "grad_norm": 0.404296875, "learning_rate": 2.584095965312712e-05, "loss": 1.9824, "step": 8323 }, { "epoch": 0.2685643190007884, "grad_norm": 0.4609375, "learning_rate": 2.5839871939006184e-05, "loss": 1.9892, "step": 8324 }, { "epoch": 0.2685965828545848, "grad_norm": 0.46484375, "learning_rate": 2.583878410556771e-05, "loss": 1.9228, "step": 8325 }, { "epoch": 0.26862884670838116, "grad_norm": 0.404296875, "learning_rate": 2.583769615282368e-05, "loss": 1.9934, "step": 8326 }, { "epoch": 0.2686611105621775, "grad_norm": 0.419921875, "learning_rate": 2.5836608080786063e-05, "loss": 2.0147, "step": 8327 }, { "epoch": 0.26869337441597385, "grad_norm": 0.390625, "learning_rate": 2.583551988946684e-05, "loss": 1.9897, "step": 8328 }, { "epoch": 0.2687256382697702, "grad_norm": 0.384765625, "learning_rate": 2.5834431578877986e-05, "loss": 2.0, "step": 8329 }, { "epoch": 0.26875790212356654, "grad_norm": 0.37109375, "learning_rate": 2.5833343149031483e-05, "loss": 1.9789, "step": 8330 }, { "epoch": 0.2687901659773629, "grad_norm": 0.39453125, "learning_rate": 2.5832254599939312e-05, "loss": 1.9852, "step": 8331 }, { "epoch": 0.2688224298311592, "grad_norm": 0.376953125, "learning_rate": 2.583116593161345e-05, "loss": 1.9954, "step": 8332 }, { "epoch": 0.26885469368495557, "grad_norm": 0.375, "learning_rate": 2.5830077144065887e-05, "loss": 2.0018, "step": 8333 }, { "epoch": 0.2688869575387519, "grad_norm": 0.388671875, "learning_rate": 2.58289882373086e-05, "loss": 1.9662, "step": 8334 }, { "epoch": 0.26891922139254826, "grad_norm": 0.388671875, "learning_rate": 2.5827899211353583e-05, "loss": 1.9891, "step": 8335 }, { "epoch": 0.2689514852463446, "grad_norm": 0.41796875, "learning_rate": 2.5826810066212815e-05, "loss": 2.0167, "step": 8336 }, { "epoch": 0.26898374910014095, "grad_norm": 0.412109375, "learning_rate": 2.5825720801898292e-05, "loss": 1.981, "step": 8337 }, { "epoch": 0.2690160129539373, "grad_norm": 0.435546875, "learning_rate": 2.5824631418422002e-05, "loss": 1.9666, "step": 8338 }, { "epoch": 0.26904827680773363, "grad_norm": 0.431640625, "learning_rate": 2.5823541915795932e-05, "loss": 1.9979, "step": 8339 }, { "epoch": 0.26908054066153, "grad_norm": 0.419921875, "learning_rate": 2.5822452294032083e-05, "loss": 1.9999, "step": 8340 }, { "epoch": 0.2691128045153263, "grad_norm": 0.466796875, "learning_rate": 2.5821362553142434e-05, "loss": 1.9669, "step": 8341 }, { "epoch": 0.26914506836912266, "grad_norm": 0.404296875, "learning_rate": 2.5820272693138998e-05, "loss": 1.9794, "step": 8342 }, { "epoch": 0.269177332222919, "grad_norm": 0.439453125, "learning_rate": 2.5819182714033757e-05, "loss": 1.977, "step": 8343 }, { "epoch": 0.26920959607671535, "grad_norm": 0.427734375, "learning_rate": 2.5818092615838717e-05, "loss": 2.0011, "step": 8344 }, { "epoch": 0.26924185993051175, "grad_norm": 0.37890625, "learning_rate": 2.5817002398565874e-05, "loss": 2.011, "step": 8345 }, { "epoch": 0.2692741237843081, "grad_norm": 0.3984375, "learning_rate": 2.581591206222723e-05, "loss": 1.9596, "step": 8346 }, { "epoch": 0.26930638763810444, "grad_norm": 0.365234375, "learning_rate": 2.5814821606834783e-05, "loss": 1.9695, "step": 8347 }, { "epoch": 0.2693386514919008, "grad_norm": 0.412109375, "learning_rate": 2.5813731032400536e-05, "loss": 2.0276, "step": 8348 }, { "epoch": 0.2693709153456971, "grad_norm": 0.365234375, "learning_rate": 2.5812640338936497e-05, "loss": 2.0119, "step": 8349 }, { "epoch": 0.26940317919949347, "grad_norm": 0.419921875, "learning_rate": 2.5811549526454676e-05, "loss": 1.9848, "step": 8350 }, { "epoch": 0.2694354430532898, "grad_norm": 0.404296875, "learning_rate": 2.581045859496707e-05, "loss": 2.0184, "step": 8351 }, { "epoch": 0.26946770690708616, "grad_norm": 0.375, "learning_rate": 2.5809367544485688e-05, "loss": 1.9853, "step": 8352 }, { "epoch": 0.2694999707608825, "grad_norm": 0.4140625, "learning_rate": 2.5808276375022542e-05, "loss": 1.9963, "step": 8353 }, { "epoch": 0.26953223461467885, "grad_norm": 0.439453125, "learning_rate": 2.5807185086589652e-05, "loss": 1.9779, "step": 8354 }, { "epoch": 0.2695644984684752, "grad_norm": 0.4296875, "learning_rate": 2.5806093679199014e-05, "loss": 2.0093, "step": 8355 }, { "epoch": 0.26959676232227153, "grad_norm": 0.423828125, "learning_rate": 2.580500215286265e-05, "loss": 2.007, "step": 8356 }, { "epoch": 0.2696290261760679, "grad_norm": 0.392578125, "learning_rate": 2.5803910507592577e-05, "loss": 1.9738, "step": 8357 }, { "epoch": 0.2696612900298642, "grad_norm": 0.408203125, "learning_rate": 2.5802818743400806e-05, "loss": 2.0228, "step": 8358 }, { "epoch": 0.26969355388366056, "grad_norm": 0.376953125, "learning_rate": 2.580172686029936e-05, "loss": 2.0003, "step": 8359 }, { "epoch": 0.2697258177374569, "grad_norm": 0.392578125, "learning_rate": 2.580063485830025e-05, "loss": 1.988, "step": 8360 }, { "epoch": 0.26975808159125325, "grad_norm": 0.392578125, "learning_rate": 2.5799542737415497e-05, "loss": 1.9741, "step": 8361 }, { "epoch": 0.2697903454450496, "grad_norm": 0.400390625, "learning_rate": 2.579845049765713e-05, "loss": 2.0314, "step": 8362 }, { "epoch": 0.26982260929884594, "grad_norm": 0.375, "learning_rate": 2.5797358139037165e-05, "loss": 1.9849, "step": 8363 }, { "epoch": 0.2698548731526423, "grad_norm": 0.388671875, "learning_rate": 2.579626566156763e-05, "loss": 1.9678, "step": 8364 }, { "epoch": 0.2698871370064387, "grad_norm": 0.400390625, "learning_rate": 2.579517306526054e-05, "loss": 1.9989, "step": 8365 }, { "epoch": 0.269919400860235, "grad_norm": 0.359375, "learning_rate": 2.5794080350127936e-05, "loss": 2.0118, "step": 8366 }, { "epoch": 0.26995166471403137, "grad_norm": 0.404296875, "learning_rate": 2.5792987516181837e-05, "loss": 2.006, "step": 8367 }, { "epoch": 0.2699839285678277, "grad_norm": 0.37890625, "learning_rate": 2.579189456343427e-05, "loss": 2.0085, "step": 8368 }, { "epoch": 0.27001619242162406, "grad_norm": 0.39453125, "learning_rate": 2.5790801491897277e-05, "loss": 1.9892, "step": 8369 }, { "epoch": 0.2700484562754204, "grad_norm": 0.39453125, "learning_rate": 2.5789708301582882e-05, "loss": 2.013, "step": 8370 }, { "epoch": 0.27008072012921674, "grad_norm": 0.400390625, "learning_rate": 2.5788614992503118e-05, "loss": 2.0055, "step": 8371 }, { "epoch": 0.2701129839830131, "grad_norm": 0.365234375, "learning_rate": 2.578752156467002e-05, "loss": 1.9911, "step": 8372 }, { "epoch": 0.27014524783680943, "grad_norm": 0.388671875, "learning_rate": 2.5786428018095616e-05, "loss": 2.0181, "step": 8373 }, { "epoch": 0.2701775116906058, "grad_norm": 0.375, "learning_rate": 2.5785334352791962e-05, "loss": 2.011, "step": 8374 }, { "epoch": 0.2702097755444021, "grad_norm": 0.369140625, "learning_rate": 2.5784240568771076e-05, "loss": 2.011, "step": 8375 }, { "epoch": 0.27024203939819846, "grad_norm": 0.380859375, "learning_rate": 2.578314666604501e-05, "loss": 2.0104, "step": 8376 }, { "epoch": 0.2702743032519948, "grad_norm": 0.373046875, "learning_rate": 2.5782052644625805e-05, "loss": 1.987, "step": 8377 }, { "epoch": 0.27030656710579115, "grad_norm": 0.38671875, "learning_rate": 2.5780958504525496e-05, "loss": 2.0065, "step": 8378 }, { "epoch": 0.2703388309595875, "grad_norm": 0.38671875, "learning_rate": 2.5779864245756125e-05, "loss": 1.9866, "step": 8379 }, { "epoch": 0.27037109481338384, "grad_norm": 0.423828125, "learning_rate": 2.5778769868329747e-05, "loss": 2.0037, "step": 8380 }, { "epoch": 0.2704033586671802, "grad_norm": 0.375, "learning_rate": 2.5777675372258404e-05, "loss": 1.9958, "step": 8381 }, { "epoch": 0.2704356225209765, "grad_norm": 0.380859375, "learning_rate": 2.577658075755414e-05, "loss": 2.0127, "step": 8382 }, { "epoch": 0.27046788637477287, "grad_norm": 0.416015625, "learning_rate": 2.577548602422901e-05, "loss": 2.0401, "step": 8383 }, { "epoch": 0.2705001502285692, "grad_norm": 0.369140625, "learning_rate": 2.5774391172295057e-05, "loss": 1.9842, "step": 8384 }, { "epoch": 0.2705324140823656, "grad_norm": 0.38671875, "learning_rate": 2.5773296201764334e-05, "loss": 2.0205, "step": 8385 }, { "epoch": 0.27056467793616196, "grad_norm": 0.38671875, "learning_rate": 2.5772201112648893e-05, "loss": 2.0133, "step": 8386 }, { "epoch": 0.2705969417899583, "grad_norm": 0.5390625, "learning_rate": 2.5771105904960794e-05, "loss": 1.9955, "step": 8387 }, { "epoch": 0.27062920564375464, "grad_norm": 0.412109375, "learning_rate": 2.5770010578712093e-05, "loss": 1.9994, "step": 8388 }, { "epoch": 0.270661469497551, "grad_norm": 0.40625, "learning_rate": 2.5768915133914836e-05, "loss": 2.0002, "step": 8389 }, { "epoch": 0.27069373335134733, "grad_norm": 0.423828125, "learning_rate": 2.5767819570581088e-05, "loss": 1.9494, "step": 8390 }, { "epoch": 0.2707259972051437, "grad_norm": 0.466796875, "learning_rate": 2.5766723888722904e-05, "loss": 1.9808, "step": 8391 }, { "epoch": 0.27075826105894, "grad_norm": 0.416015625, "learning_rate": 2.576562808835235e-05, "loss": 2.0243, "step": 8392 }, { "epoch": 0.27079052491273636, "grad_norm": 0.408203125, "learning_rate": 2.5764532169481485e-05, "loss": 1.9976, "step": 8393 }, { "epoch": 0.2708227887665327, "grad_norm": 0.384765625, "learning_rate": 2.576343613212237e-05, "loss": 2.0137, "step": 8394 }, { "epoch": 0.27085505262032905, "grad_norm": 0.40625, "learning_rate": 2.5762339976287077e-05, "loss": 2.0109, "step": 8395 }, { "epoch": 0.2708873164741254, "grad_norm": 0.392578125, "learning_rate": 2.5761243701987663e-05, "loss": 2.024, "step": 8396 }, { "epoch": 0.27091958032792174, "grad_norm": 0.421875, "learning_rate": 2.5760147309236204e-05, "loss": 1.967, "step": 8397 }, { "epoch": 0.2709518441817181, "grad_norm": 0.4375, "learning_rate": 2.5759050798044758e-05, "loss": 2.0158, "step": 8398 }, { "epoch": 0.2709841080355144, "grad_norm": 0.384765625, "learning_rate": 2.5757954168425397e-05, "loss": 2.0223, "step": 8399 }, { "epoch": 0.27101637188931077, "grad_norm": 0.412109375, "learning_rate": 2.5756857420390197e-05, "loss": 2.0021, "step": 8400 }, { "epoch": 0.2710486357431071, "grad_norm": 0.373046875, "learning_rate": 2.5755760553951225e-05, "loss": 2.0092, "step": 8401 }, { "epoch": 0.27108089959690346, "grad_norm": 0.43359375, "learning_rate": 2.575466356912056e-05, "loss": 2.0035, "step": 8402 }, { "epoch": 0.2711131634506998, "grad_norm": 0.40625, "learning_rate": 2.5753566465910273e-05, "loss": 2.0194, "step": 8403 }, { "epoch": 0.27114542730449614, "grad_norm": 0.390625, "learning_rate": 2.575246924433244e-05, "loss": 2.033, "step": 8404 }, { "epoch": 0.2711776911582925, "grad_norm": 0.392578125, "learning_rate": 2.5751371904399144e-05, "loss": 1.9948, "step": 8405 }, { "epoch": 0.2712099550120889, "grad_norm": 0.3984375, "learning_rate": 2.575027444612245e-05, "loss": 1.9938, "step": 8406 }, { "epoch": 0.27124221886588523, "grad_norm": 0.390625, "learning_rate": 2.5749176869514457e-05, "loss": 2.018, "step": 8407 }, { "epoch": 0.2712744827196816, "grad_norm": 0.3984375, "learning_rate": 2.574807917458723e-05, "loss": 2.026, "step": 8408 }, { "epoch": 0.2713067465734779, "grad_norm": 0.396484375, "learning_rate": 2.5746981361352862e-05, "loss": 2.0151, "step": 8409 }, { "epoch": 0.27133901042727426, "grad_norm": 0.380859375, "learning_rate": 2.5745883429823427e-05, "loss": 2.0161, "step": 8410 }, { "epoch": 0.2713712742810706, "grad_norm": 0.408203125, "learning_rate": 2.5744785380011022e-05, "loss": 2.0046, "step": 8411 }, { "epoch": 0.27140353813486695, "grad_norm": 0.3984375, "learning_rate": 2.5743687211927728e-05, "loss": 2.014, "step": 8412 }, { "epoch": 0.2714358019886633, "grad_norm": 0.423828125, "learning_rate": 2.5742588925585628e-05, "loss": 2.0303, "step": 8413 }, { "epoch": 0.27146806584245964, "grad_norm": 0.43359375, "learning_rate": 2.5741490520996817e-05, "loss": 2.032, "step": 8414 }, { "epoch": 0.271500329696256, "grad_norm": 0.5234375, "learning_rate": 2.5740391998173383e-05, "loss": 2.0628, "step": 8415 }, { "epoch": 0.2715325935500523, "grad_norm": 0.5, "learning_rate": 2.5739293357127427e-05, "loss": 2.0377, "step": 8416 }, { "epoch": 0.27156485740384867, "grad_norm": 0.45703125, "learning_rate": 2.5738194597871027e-05, "loss": 2.0449, "step": 8417 }, { "epoch": 0.271597121257645, "grad_norm": 0.474609375, "learning_rate": 2.5737095720416285e-05, "loss": 1.9584, "step": 8418 }, { "epoch": 0.27162938511144136, "grad_norm": 0.4453125, "learning_rate": 2.57359967247753e-05, "loss": 1.995, "step": 8419 }, { "epoch": 0.2716616489652377, "grad_norm": 0.59375, "learning_rate": 2.5734897610960157e-05, "loss": 1.9796, "step": 8420 }, { "epoch": 0.27169391281903404, "grad_norm": 0.6015625, "learning_rate": 2.573379837898297e-05, "loss": 2.0424, "step": 8421 }, { "epoch": 0.2717261766728304, "grad_norm": 0.51171875, "learning_rate": 2.573269902885583e-05, "loss": 2.0409, "step": 8422 }, { "epoch": 0.27175844052662673, "grad_norm": 0.462890625, "learning_rate": 2.5731599560590836e-05, "loss": 2.0414, "step": 8423 }, { "epoch": 0.2717907043804231, "grad_norm": 0.5234375, "learning_rate": 2.5730499974200092e-05, "loss": 1.9916, "step": 8424 }, { "epoch": 0.2718229682342194, "grad_norm": 0.48828125, "learning_rate": 2.5729400269695705e-05, "loss": 2.0265, "step": 8425 }, { "epoch": 0.2718552320880158, "grad_norm": 0.4453125, "learning_rate": 2.572830044708978e-05, "loss": 1.9959, "step": 8426 }, { "epoch": 0.27188749594181216, "grad_norm": 0.515625, "learning_rate": 2.5727200506394412e-05, "loss": 2.0116, "step": 8427 }, { "epoch": 0.2719197597956085, "grad_norm": 0.4921875, "learning_rate": 2.572610044762172e-05, "loss": 2.0279, "step": 8428 }, { "epoch": 0.27195202364940485, "grad_norm": 0.498046875, "learning_rate": 2.5725000270783814e-05, "loss": 1.9832, "step": 8429 }, { "epoch": 0.2719842875032012, "grad_norm": 0.494140625, "learning_rate": 2.5723899975892795e-05, "loss": 1.9829, "step": 8430 }, { "epoch": 0.27201655135699754, "grad_norm": 0.6953125, "learning_rate": 2.5722799562960778e-05, "loss": 1.9753, "step": 8431 }, { "epoch": 0.2720488152107939, "grad_norm": 0.49609375, "learning_rate": 2.5721699031999877e-05, "loss": 2.0057, "step": 8432 }, { "epoch": 0.2720810790645902, "grad_norm": 0.578125, "learning_rate": 2.5720598383022204e-05, "loss": 2.0419, "step": 8433 }, { "epoch": 0.27211334291838657, "grad_norm": 0.64453125, "learning_rate": 2.5719497616039875e-05, "loss": 1.9972, "step": 8434 }, { "epoch": 0.2721456067721829, "grad_norm": 0.609375, "learning_rate": 2.5718396731065006e-05, "loss": 2.0028, "step": 8435 }, { "epoch": 0.27217787062597926, "grad_norm": 0.53125, "learning_rate": 2.5717295728109713e-05, "loss": 2.0275, "step": 8436 }, { "epoch": 0.2722101344797756, "grad_norm": 0.55859375, "learning_rate": 2.571619460718612e-05, "loss": 2.032, "step": 8437 }, { "epoch": 0.27224239833357194, "grad_norm": 0.478515625, "learning_rate": 2.5715093368306346e-05, "loss": 2.0162, "step": 8438 }, { "epoch": 0.2722746621873683, "grad_norm": 0.54296875, "learning_rate": 2.571399201148251e-05, "loss": 2.045, "step": 8439 }, { "epoch": 0.27230692604116463, "grad_norm": 0.474609375, "learning_rate": 2.5712890536726735e-05, "loss": 2.0463, "step": 8440 }, { "epoch": 0.272339189894961, "grad_norm": 0.484375, "learning_rate": 2.5711788944051146e-05, "loss": 2.0117, "step": 8441 }, { "epoch": 0.2723714537487573, "grad_norm": 0.43359375, "learning_rate": 2.571068723346787e-05, "loss": 2.0163, "step": 8442 }, { "epoch": 0.27240371760255366, "grad_norm": 0.458984375, "learning_rate": 2.570958540498903e-05, "loss": 2.0326, "step": 8443 }, { "epoch": 0.27243598145635, "grad_norm": 0.4375, "learning_rate": 2.570848345862676e-05, "loss": 2.0418, "step": 8444 }, { "epoch": 0.27246824531014635, "grad_norm": 0.412109375, "learning_rate": 2.570738139439319e-05, "loss": 1.9702, "step": 8445 }, { "epoch": 0.27250050916394275, "grad_norm": 0.470703125, "learning_rate": 2.570627921230044e-05, "loss": 2.0363, "step": 8446 }, { "epoch": 0.2725327730177391, "grad_norm": 0.404296875, "learning_rate": 2.5705176912360655e-05, "loss": 2.0382, "step": 8447 }, { "epoch": 0.27256503687153544, "grad_norm": 0.40234375, "learning_rate": 2.570407449458596e-05, "loss": 2.0548, "step": 8448 }, { "epoch": 0.2725973007253318, "grad_norm": 0.423828125, "learning_rate": 2.570297195898849e-05, "loss": 2.0037, "step": 8449 }, { "epoch": 0.2726295645791281, "grad_norm": 0.392578125, "learning_rate": 2.5701869305580384e-05, "loss": 1.9898, "step": 8450 }, { "epoch": 0.27266182843292447, "grad_norm": 0.3984375, "learning_rate": 2.5700766534373777e-05, "loss": 2.0016, "step": 8451 }, { "epoch": 0.2726940922867208, "grad_norm": 0.412109375, "learning_rate": 2.5699663645380812e-05, "loss": 1.9956, "step": 8452 }, { "epoch": 0.27272635614051716, "grad_norm": 0.380859375, "learning_rate": 2.569856063861362e-05, "loss": 2.0105, "step": 8453 }, { "epoch": 0.2727586199943135, "grad_norm": 0.41796875, "learning_rate": 2.569745751408435e-05, "loss": 2.0218, "step": 8454 }, { "epoch": 0.27279088384810984, "grad_norm": 0.50390625, "learning_rate": 2.5696354271805145e-05, "loss": 1.986, "step": 8455 }, { "epoch": 0.2728231477019062, "grad_norm": 0.447265625, "learning_rate": 2.569525091178815e-05, "loss": 1.9933, "step": 8456 }, { "epoch": 0.27285541155570253, "grad_norm": 0.421875, "learning_rate": 2.5694147434045497e-05, "loss": 1.9908, "step": 8457 }, { "epoch": 0.2728876754094989, "grad_norm": 0.47265625, "learning_rate": 2.569304383858934e-05, "loss": 1.9701, "step": 8458 }, { "epoch": 0.2729199392632952, "grad_norm": 0.47265625, "learning_rate": 2.5691940125431834e-05, "loss": 2.0215, "step": 8459 }, { "epoch": 0.27295220311709156, "grad_norm": 0.41796875, "learning_rate": 2.569083629458512e-05, "loss": 1.9694, "step": 8460 }, { "epoch": 0.2729844669708879, "grad_norm": 0.455078125, "learning_rate": 2.5689732346061348e-05, "loss": 2.0124, "step": 8461 }, { "epoch": 0.27301673082468425, "grad_norm": 0.4375, "learning_rate": 2.568862827987267e-05, "loss": 1.977, "step": 8462 }, { "epoch": 0.2730489946784806, "grad_norm": 0.42578125, "learning_rate": 2.5687524096031242e-05, "loss": 2.0238, "step": 8463 }, { "epoch": 0.27308125853227694, "grad_norm": 0.439453125, "learning_rate": 2.5686419794549218e-05, "loss": 2.0143, "step": 8464 }, { "epoch": 0.2731135223860733, "grad_norm": 0.41796875, "learning_rate": 2.568531537543875e-05, "loss": 1.9629, "step": 8465 }, { "epoch": 0.2731457862398697, "grad_norm": 0.443359375, "learning_rate": 2.568421083871199e-05, "loss": 1.9566, "step": 8466 }, { "epoch": 0.273178050093666, "grad_norm": 0.447265625, "learning_rate": 2.5683106184381107e-05, "loss": 2.0155, "step": 8467 }, { "epoch": 0.27321031394746237, "grad_norm": 0.47265625, "learning_rate": 2.5682001412458254e-05, "loss": 1.9955, "step": 8468 }, { "epoch": 0.2732425778012587, "grad_norm": 0.416015625, "learning_rate": 2.5680896522955593e-05, "loss": 1.9896, "step": 8469 }, { "epoch": 0.27327484165505506, "grad_norm": 0.443359375, "learning_rate": 2.5679791515885287e-05, "loss": 1.98, "step": 8470 }, { "epoch": 0.2733071055088514, "grad_norm": 0.443359375, "learning_rate": 2.5678686391259498e-05, "loss": 1.9964, "step": 8471 }, { "epoch": 0.27333936936264774, "grad_norm": 0.3828125, "learning_rate": 2.5677581149090387e-05, "loss": 1.9897, "step": 8472 }, { "epoch": 0.2733716332164441, "grad_norm": 0.43359375, "learning_rate": 2.5676475789390125e-05, "loss": 1.9962, "step": 8473 }, { "epoch": 0.27340389707024043, "grad_norm": 0.3984375, "learning_rate": 2.567537031217088e-05, "loss": 1.9887, "step": 8474 }, { "epoch": 0.2734361609240368, "grad_norm": 0.369140625, "learning_rate": 2.5674264717444808e-05, "loss": 1.9675, "step": 8475 }, { "epoch": 0.2734684247778331, "grad_norm": 0.416015625, "learning_rate": 2.567315900522409e-05, "loss": 1.9753, "step": 8476 }, { "epoch": 0.27350068863162946, "grad_norm": 0.408203125, "learning_rate": 2.5672053175520903e-05, "loss": 1.9579, "step": 8477 }, { "epoch": 0.2735329524854258, "grad_norm": 0.38671875, "learning_rate": 2.5670947228347405e-05, "loss": 1.95, "step": 8478 }, { "epoch": 0.27356521633922215, "grad_norm": 0.4375, "learning_rate": 2.5669841163715773e-05, "loss": 1.9582, "step": 8479 }, { "epoch": 0.2735974801930185, "grad_norm": 0.404296875, "learning_rate": 2.566873498163819e-05, "loss": 2.0192, "step": 8480 }, { "epoch": 0.27362974404681484, "grad_norm": 0.44140625, "learning_rate": 2.566762868212682e-05, "loss": 2.0164, "step": 8481 }, { "epoch": 0.2736620079006112, "grad_norm": 0.390625, "learning_rate": 2.566652226519385e-05, "loss": 1.9956, "step": 8482 }, { "epoch": 0.2736942717544075, "grad_norm": 0.427734375, "learning_rate": 2.5665415730851453e-05, "loss": 2.0241, "step": 8483 }, { "epoch": 0.27372653560820387, "grad_norm": 0.38671875, "learning_rate": 2.566430907911181e-05, "loss": 1.9791, "step": 8484 }, { "epoch": 0.2737587994620002, "grad_norm": 0.40625, "learning_rate": 2.5663202309987107e-05, "loss": 1.9719, "step": 8485 }, { "epoch": 0.27379106331579656, "grad_norm": 0.375, "learning_rate": 2.5662095423489524e-05, "loss": 1.9827, "step": 8486 }, { "epoch": 0.27382332716959296, "grad_norm": 0.392578125, "learning_rate": 2.5660988419631236e-05, "loss": 1.9861, "step": 8487 }, { "epoch": 0.2738555910233893, "grad_norm": 0.396484375, "learning_rate": 2.5659881298424445e-05, "loss": 2.003, "step": 8488 }, { "epoch": 0.27388785487718564, "grad_norm": 0.37890625, "learning_rate": 2.5658774059881317e-05, "loss": 1.9743, "step": 8489 }, { "epoch": 0.273920118730982, "grad_norm": 0.390625, "learning_rate": 2.565766670401406e-05, "loss": 1.9737, "step": 8490 }, { "epoch": 0.27395238258477833, "grad_norm": 0.42578125, "learning_rate": 2.565655923083485e-05, "loss": 2.002, "step": 8491 }, { "epoch": 0.2739846464385747, "grad_norm": 0.375, "learning_rate": 2.565545164035588e-05, "loss": 2.005, "step": 8492 }, { "epoch": 0.274016910292371, "grad_norm": 0.384765625, "learning_rate": 2.5654343932589343e-05, "loss": 1.9862, "step": 8493 }, { "epoch": 0.27404917414616736, "grad_norm": 0.427734375, "learning_rate": 2.5653236107547433e-05, "loss": 1.984, "step": 8494 }, { "epoch": 0.2740814379999637, "grad_norm": 0.392578125, "learning_rate": 2.5652128165242343e-05, "loss": 1.9479, "step": 8495 }, { "epoch": 0.27411370185376005, "grad_norm": 0.376953125, "learning_rate": 2.5651020105686266e-05, "loss": 1.9282, "step": 8496 }, { "epoch": 0.2741459657075564, "grad_norm": 0.39453125, "learning_rate": 2.56499119288914e-05, "loss": 1.9781, "step": 8497 }, { "epoch": 0.27417822956135274, "grad_norm": 0.431640625, "learning_rate": 2.5648803634869946e-05, "loss": 1.9753, "step": 8498 }, { "epoch": 0.2742104934151491, "grad_norm": 0.42578125, "learning_rate": 2.56476952236341e-05, "loss": 1.9915, "step": 8499 }, { "epoch": 0.2742427572689454, "grad_norm": 0.373046875, "learning_rate": 2.5646586695196065e-05, "loss": 2.0047, "step": 8500 }, { "epoch": 0.27427502112274177, "grad_norm": 0.400390625, "learning_rate": 2.564547804956804e-05, "loss": 1.9769, "step": 8501 }, { "epoch": 0.2743072849765381, "grad_norm": 0.376953125, "learning_rate": 2.564436928676223e-05, "loss": 1.9648, "step": 8502 }, { "epoch": 0.27433954883033446, "grad_norm": 0.3984375, "learning_rate": 2.564326040679084e-05, "loss": 1.9831, "step": 8503 }, { "epoch": 0.2743718126841308, "grad_norm": 0.3828125, "learning_rate": 2.564215140966607e-05, "loss": 1.9395, "step": 8504 }, { "epoch": 0.27440407653792714, "grad_norm": 0.392578125, "learning_rate": 2.5641042295400137e-05, "loss": 1.9955, "step": 8505 }, { "epoch": 0.2744363403917235, "grad_norm": 0.44140625, "learning_rate": 2.5639933064005244e-05, "loss": 1.9546, "step": 8506 }, { "epoch": 0.2744686042455199, "grad_norm": 0.421875, "learning_rate": 2.56388237154936e-05, "loss": 2.0244, "step": 8507 }, { "epoch": 0.27450086809931623, "grad_norm": 0.4140625, "learning_rate": 2.5637714249877418e-05, "loss": 2.0398, "step": 8508 }, { "epoch": 0.2745331319531126, "grad_norm": 0.42578125, "learning_rate": 2.563660466716891e-05, "loss": 1.9882, "step": 8509 }, { "epoch": 0.2745653958069089, "grad_norm": 0.431640625, "learning_rate": 2.5635494967380285e-05, "loss": 1.9813, "step": 8510 }, { "epoch": 0.27459765966070526, "grad_norm": 0.4296875, "learning_rate": 2.5634385150523764e-05, "loss": 2.0039, "step": 8511 }, { "epoch": 0.2746299235145016, "grad_norm": 0.388671875, "learning_rate": 2.5633275216611555e-05, "loss": 2.0001, "step": 8512 }, { "epoch": 0.27466218736829795, "grad_norm": 0.427734375, "learning_rate": 2.5632165165655886e-05, "loss": 1.9662, "step": 8513 }, { "epoch": 0.2746944512220943, "grad_norm": 0.45703125, "learning_rate": 2.5631054997668968e-05, "loss": 1.9833, "step": 8514 }, { "epoch": 0.27472671507589064, "grad_norm": 0.470703125, "learning_rate": 2.5629944712663026e-05, "loss": 1.9788, "step": 8515 }, { "epoch": 0.274758978929687, "grad_norm": 0.51171875, "learning_rate": 2.5628834310650276e-05, "loss": 2.005, "step": 8516 }, { "epoch": 0.2747912427834833, "grad_norm": 0.43359375, "learning_rate": 2.5627723791642944e-05, "loss": 1.9889, "step": 8517 }, { "epoch": 0.27482350663727967, "grad_norm": 0.453125, "learning_rate": 2.5626613155653253e-05, "loss": 2.0014, "step": 8518 }, { "epoch": 0.274855770491076, "grad_norm": 0.45703125, "learning_rate": 2.562550240269343e-05, "loss": 1.964, "step": 8519 }, { "epoch": 0.27488803434487236, "grad_norm": 0.39453125, "learning_rate": 2.562439153277569e-05, "loss": 1.961, "step": 8520 }, { "epoch": 0.2749202981986687, "grad_norm": 0.408203125, "learning_rate": 2.562328054591228e-05, "loss": 1.9819, "step": 8521 }, { "epoch": 0.27495256205246504, "grad_norm": 0.380859375, "learning_rate": 2.5622169442115415e-05, "loss": 2.0093, "step": 8522 }, { "epoch": 0.2749848259062614, "grad_norm": 0.3828125, "learning_rate": 2.562105822139733e-05, "loss": 1.956, "step": 8523 }, { "epoch": 0.27501708976005773, "grad_norm": 0.4140625, "learning_rate": 2.5619946883770255e-05, "loss": 2.0196, "step": 8524 }, { "epoch": 0.2750493536138541, "grad_norm": 0.421875, "learning_rate": 2.5618835429246426e-05, "loss": 2.0146, "step": 8525 }, { "epoch": 0.2750816174676504, "grad_norm": 0.376953125, "learning_rate": 2.561772385783807e-05, "loss": 1.9741, "step": 8526 }, { "epoch": 0.2751138813214468, "grad_norm": 0.41796875, "learning_rate": 2.561661216955743e-05, "loss": 1.9783, "step": 8527 }, { "epoch": 0.27514614517524316, "grad_norm": 0.40625, "learning_rate": 2.5615500364416738e-05, "loss": 1.96, "step": 8528 }, { "epoch": 0.2751784090290395, "grad_norm": 0.419921875, "learning_rate": 2.5614388442428233e-05, "loss": 1.956, "step": 8529 }, { "epoch": 0.27521067288283585, "grad_norm": 0.392578125, "learning_rate": 2.5613276403604154e-05, "loss": 1.913, "step": 8530 }, { "epoch": 0.2752429367366322, "grad_norm": 0.43359375, "learning_rate": 2.5612164247956746e-05, "loss": 1.9867, "step": 8531 }, { "epoch": 0.27527520059042854, "grad_norm": 0.4765625, "learning_rate": 2.5611051975498242e-05, "loss": 1.9815, "step": 8532 }, { "epoch": 0.2753074644442249, "grad_norm": 0.42578125, "learning_rate": 2.5609939586240895e-05, "loss": 2.0017, "step": 8533 }, { "epoch": 0.2753397282980212, "grad_norm": 0.4375, "learning_rate": 2.5608827080196942e-05, "loss": 1.985, "step": 8534 }, { "epoch": 0.27537199215181757, "grad_norm": 0.451171875, "learning_rate": 2.5607714457378635e-05, "loss": 2.0204, "step": 8535 }, { "epoch": 0.2754042560056139, "grad_norm": 0.44140625, "learning_rate": 2.5606601717798212e-05, "loss": 1.9902, "step": 8536 }, { "epoch": 0.27543651985941026, "grad_norm": 0.404296875, "learning_rate": 2.560548886146793e-05, "loss": 2.0004, "step": 8537 }, { "epoch": 0.2754687837132066, "grad_norm": 0.419921875, "learning_rate": 2.5604375888400034e-05, "loss": 1.9727, "step": 8538 }, { "epoch": 0.27550104756700294, "grad_norm": 0.390625, "learning_rate": 2.5603262798606775e-05, "loss": 1.9874, "step": 8539 }, { "epoch": 0.2755333114207993, "grad_norm": 0.400390625, "learning_rate": 2.560214959210041e-05, "loss": 1.9992, "step": 8540 }, { "epoch": 0.27556557527459563, "grad_norm": 0.3828125, "learning_rate": 2.560103626889318e-05, "loss": 1.9987, "step": 8541 }, { "epoch": 0.275597839128392, "grad_norm": 0.41796875, "learning_rate": 2.5599922828997355e-05, "loss": 2.0003, "step": 8542 }, { "epoch": 0.2756301029821883, "grad_norm": 0.380859375, "learning_rate": 2.5598809272425184e-05, "loss": 1.9268, "step": 8543 }, { "epoch": 0.27566236683598466, "grad_norm": 0.390625, "learning_rate": 2.559769559918892e-05, "loss": 1.9715, "step": 8544 }, { "epoch": 0.275694630689781, "grad_norm": 0.39453125, "learning_rate": 2.5596581809300827e-05, "loss": 1.996, "step": 8545 }, { "epoch": 0.27572689454357735, "grad_norm": 0.37890625, "learning_rate": 2.559546790277316e-05, "loss": 2.0051, "step": 8546 }, { "epoch": 0.27575915839737375, "grad_norm": 0.392578125, "learning_rate": 2.5594353879618188e-05, "loss": 1.965, "step": 8547 }, { "epoch": 0.2757914222511701, "grad_norm": 0.40625, "learning_rate": 2.559323973984817e-05, "loss": 2.006, "step": 8548 }, { "epoch": 0.27582368610496644, "grad_norm": 0.408203125, "learning_rate": 2.5592125483475365e-05, "loss": 2.0698, "step": 8549 }, { "epoch": 0.2758559499587628, "grad_norm": 0.40625, "learning_rate": 2.5591011110512043e-05, "loss": 2.0005, "step": 8550 }, { "epoch": 0.2758882138125591, "grad_norm": 0.46484375, "learning_rate": 2.558989662097047e-05, "loss": 2.0177, "step": 8551 }, { "epoch": 0.27592047766635547, "grad_norm": 0.466796875, "learning_rate": 2.5588782014862906e-05, "loss": 2.03, "step": 8552 }, { "epoch": 0.2759527415201518, "grad_norm": 0.400390625, "learning_rate": 2.5587667292201636e-05, "loss": 2.0, "step": 8553 }, { "epoch": 0.27598500537394816, "grad_norm": 0.388671875, "learning_rate": 2.558655245299891e-05, "loss": 1.9906, "step": 8554 }, { "epoch": 0.2760172692277445, "grad_norm": 0.41796875, "learning_rate": 2.5585437497267014e-05, "loss": 1.9904, "step": 8555 }, { "epoch": 0.27604953308154084, "grad_norm": 0.39453125, "learning_rate": 2.5584322425018217e-05, "loss": 1.9866, "step": 8556 }, { "epoch": 0.2760817969353372, "grad_norm": 0.39453125, "learning_rate": 2.5583207236264792e-05, "loss": 1.9957, "step": 8557 }, { "epoch": 0.27611406078913353, "grad_norm": 0.375, "learning_rate": 2.558209193101901e-05, "loss": 1.9972, "step": 8558 }, { "epoch": 0.2761463246429299, "grad_norm": 0.37890625, "learning_rate": 2.5580976509293154e-05, "loss": 1.9681, "step": 8559 }, { "epoch": 0.2761785884967262, "grad_norm": 0.384765625, "learning_rate": 2.5579860971099502e-05, "loss": 1.9741, "step": 8560 }, { "epoch": 0.27621085235052256, "grad_norm": 0.3984375, "learning_rate": 2.557874531645033e-05, "loss": 1.9982, "step": 8561 }, { "epoch": 0.2762431162043189, "grad_norm": 0.349609375, "learning_rate": 2.5577629545357914e-05, "loss": 1.9542, "step": 8562 }, { "epoch": 0.27627538005811525, "grad_norm": 0.37109375, "learning_rate": 2.5576513657834546e-05, "loss": 1.9911, "step": 8563 }, { "epoch": 0.2763076439119116, "grad_norm": 0.376953125, "learning_rate": 2.55753976538925e-05, "loss": 2.0174, "step": 8564 }, { "epoch": 0.27633990776570794, "grad_norm": 0.369140625, "learning_rate": 2.5574281533544063e-05, "loss": 2.0145, "step": 8565 }, { "epoch": 0.2763721716195043, "grad_norm": 0.384765625, "learning_rate": 2.5573165296801522e-05, "loss": 1.9943, "step": 8566 }, { "epoch": 0.2764044354733007, "grad_norm": 0.390625, "learning_rate": 2.557204894367716e-05, "loss": 2.0248, "step": 8567 }, { "epoch": 0.276436699327097, "grad_norm": 0.5078125, "learning_rate": 2.557093247418327e-05, "loss": 2.01, "step": 8568 }, { "epoch": 0.27646896318089337, "grad_norm": 0.390625, "learning_rate": 2.5569815888332137e-05, "loss": 2.0133, "step": 8569 }, { "epoch": 0.2765012270346897, "grad_norm": 0.39453125, "learning_rate": 2.5568699186136058e-05, "loss": 1.9992, "step": 8570 }, { "epoch": 0.27653349088848606, "grad_norm": 0.376953125, "learning_rate": 2.556758236760732e-05, "loss": 2.004, "step": 8571 }, { "epoch": 0.2765657547422824, "grad_norm": 0.39453125, "learning_rate": 2.556646543275821e-05, "loss": 2.0064, "step": 8572 }, { "epoch": 0.27659801859607874, "grad_norm": 0.37109375, "learning_rate": 2.5565348381601037e-05, "loss": 1.9995, "step": 8573 }, { "epoch": 0.2766302824498751, "grad_norm": 0.392578125, "learning_rate": 2.5564231214148082e-05, "loss": 1.9835, "step": 8574 }, { "epoch": 0.27666254630367143, "grad_norm": 0.435546875, "learning_rate": 2.5563113930411647e-05, "loss": 1.9709, "step": 8575 }, { "epoch": 0.2766948101574678, "grad_norm": 0.447265625, "learning_rate": 2.5561996530404038e-05, "loss": 1.9734, "step": 8576 }, { "epoch": 0.2767270740112641, "grad_norm": 0.41015625, "learning_rate": 2.556087901413755e-05, "loss": 1.9857, "step": 8577 }, { "epoch": 0.27675933786506046, "grad_norm": 0.3671875, "learning_rate": 2.5559761381624476e-05, "loss": 2.015, "step": 8578 }, { "epoch": 0.2767916017188568, "grad_norm": 0.39453125, "learning_rate": 2.5558643632877127e-05, "loss": 2.007, "step": 8579 }, { "epoch": 0.27682386557265315, "grad_norm": 0.375, "learning_rate": 2.55575257679078e-05, "loss": 2.0058, "step": 8580 }, { "epoch": 0.2768561294264495, "grad_norm": 0.375, "learning_rate": 2.5556407786728807e-05, "loss": 2.0052, "step": 8581 }, { "epoch": 0.27688839328024584, "grad_norm": 0.404296875, "learning_rate": 2.555528968935245e-05, "loss": 2.0369, "step": 8582 }, { "epoch": 0.2769206571340422, "grad_norm": 0.3984375, "learning_rate": 2.5554171475791038e-05, "loss": 2.0154, "step": 8583 }, { "epoch": 0.2769529209878385, "grad_norm": 0.37890625, "learning_rate": 2.5553053146056872e-05, "loss": 1.9988, "step": 8584 }, { "epoch": 0.27698518484163487, "grad_norm": 0.369140625, "learning_rate": 2.5551934700162274e-05, "loss": 1.9914, "step": 8585 }, { "epoch": 0.2770174486954312, "grad_norm": 0.390625, "learning_rate": 2.5550816138119544e-05, "loss": 1.9598, "step": 8586 }, { "epoch": 0.27704971254922756, "grad_norm": 0.361328125, "learning_rate": 2.5549697459941003e-05, "loss": 2.0037, "step": 8587 }, { "epoch": 0.27708197640302396, "grad_norm": 0.380859375, "learning_rate": 2.5548578665638962e-05, "loss": 2.0, "step": 8588 }, { "epoch": 0.2771142402568203, "grad_norm": 0.396484375, "learning_rate": 2.554745975522573e-05, "loss": 2.0065, "step": 8589 }, { "epoch": 0.27714650411061664, "grad_norm": 0.37109375, "learning_rate": 2.5546340728713628e-05, "loss": 2.0241, "step": 8590 }, { "epoch": 0.277178767964413, "grad_norm": 0.380859375, "learning_rate": 2.5545221586114976e-05, "loss": 1.9537, "step": 8591 }, { "epoch": 0.27721103181820933, "grad_norm": 0.43359375, "learning_rate": 2.5544102327442087e-05, "loss": 1.9866, "step": 8592 }, { "epoch": 0.2772432956720057, "grad_norm": 0.515625, "learning_rate": 2.5542982952707287e-05, "loss": 1.9908, "step": 8593 }, { "epoch": 0.277275559525802, "grad_norm": 0.453125, "learning_rate": 2.554186346192289e-05, "loss": 2.0129, "step": 8594 }, { "epoch": 0.27730782337959836, "grad_norm": 0.357421875, "learning_rate": 2.5540743855101226e-05, "loss": 1.979, "step": 8595 }, { "epoch": 0.2773400872333947, "grad_norm": 0.5, "learning_rate": 2.5539624132254617e-05, "loss": 1.9942, "step": 8596 }, { "epoch": 0.27737235108719105, "grad_norm": 0.498046875, "learning_rate": 2.553850429339538e-05, "loss": 1.9878, "step": 8597 }, { "epoch": 0.2774046149409874, "grad_norm": 0.37109375, "learning_rate": 2.5537384338535857e-05, "loss": 1.9904, "step": 8598 }, { "epoch": 0.27743687879478374, "grad_norm": 0.44921875, "learning_rate": 2.5536264267688357e-05, "loss": 1.9932, "step": 8599 }, { "epoch": 0.2774691426485801, "grad_norm": 0.439453125, "learning_rate": 2.5535144080865225e-05, "loss": 2.0126, "step": 8600 }, { "epoch": 0.2775014065023764, "grad_norm": 0.4375, "learning_rate": 2.5534023778078786e-05, "loss": 2.0048, "step": 8601 }, { "epoch": 0.27753367035617277, "grad_norm": 0.412109375, "learning_rate": 2.553290335934137e-05, "loss": 1.9997, "step": 8602 }, { "epoch": 0.2775659342099691, "grad_norm": 0.451171875, "learning_rate": 2.5531782824665303e-05, "loss": 2.0162, "step": 8603 }, { "epoch": 0.27759819806376546, "grad_norm": 0.392578125, "learning_rate": 2.5530662174062935e-05, "loss": 1.9931, "step": 8604 }, { "epoch": 0.2776304619175618, "grad_norm": 0.462890625, "learning_rate": 2.5529541407546588e-05, "loss": 1.9784, "step": 8605 }, { "epoch": 0.27766272577135814, "grad_norm": 0.412109375, "learning_rate": 2.55284205251286e-05, "loss": 2.0036, "step": 8606 }, { "epoch": 0.2776949896251545, "grad_norm": 0.42578125, "learning_rate": 2.552729952682132e-05, "loss": 1.9819, "step": 8607 }, { "epoch": 0.2777272534789509, "grad_norm": 0.42578125, "learning_rate": 2.5526178412637073e-05, "loss": 1.994, "step": 8608 }, { "epoch": 0.27775951733274723, "grad_norm": 0.423828125, "learning_rate": 2.5525057182588204e-05, "loss": 2.0009, "step": 8609 }, { "epoch": 0.2777917811865436, "grad_norm": 0.419921875, "learning_rate": 2.5523935836687064e-05, "loss": 1.9871, "step": 8610 }, { "epoch": 0.2778240450403399, "grad_norm": 0.3828125, "learning_rate": 2.552281437494598e-05, "loss": 1.9924, "step": 8611 }, { "epoch": 0.27785630889413626, "grad_norm": 0.38671875, "learning_rate": 2.5521692797377306e-05, "loss": 2.0097, "step": 8612 }, { "epoch": 0.2778885727479326, "grad_norm": 0.373046875, "learning_rate": 2.552057110399339e-05, "loss": 2.0116, "step": 8613 }, { "epoch": 0.27792083660172895, "grad_norm": 0.376953125, "learning_rate": 2.5519449294806574e-05, "loss": 2.0112, "step": 8614 }, { "epoch": 0.2779531004555253, "grad_norm": 0.369140625, "learning_rate": 2.5518327369829208e-05, "loss": 1.9893, "step": 8615 }, { "epoch": 0.27798536430932164, "grad_norm": 0.43359375, "learning_rate": 2.5517205329073636e-05, "loss": 2.0017, "step": 8616 }, { "epoch": 0.278017628163118, "grad_norm": 0.412109375, "learning_rate": 2.5516083172552214e-05, "loss": 2.015, "step": 8617 }, { "epoch": 0.2780498920169143, "grad_norm": 0.376953125, "learning_rate": 2.551496090027729e-05, "loss": 1.9854, "step": 8618 }, { "epoch": 0.27808215587071067, "grad_norm": 0.42578125, "learning_rate": 2.5513838512261227e-05, "loss": 1.9877, "step": 8619 }, { "epoch": 0.278114419724507, "grad_norm": 0.396484375, "learning_rate": 2.5512716008516365e-05, "loss": 1.9983, "step": 8620 }, { "epoch": 0.27814668357830336, "grad_norm": 0.375, "learning_rate": 2.551159338905507e-05, "loss": 1.9952, "step": 8621 }, { "epoch": 0.2781789474320997, "grad_norm": 0.412109375, "learning_rate": 2.5510470653889696e-05, "loss": 2.0088, "step": 8622 }, { "epoch": 0.27821121128589604, "grad_norm": 0.359375, "learning_rate": 2.55093478030326e-05, "loss": 1.9639, "step": 8623 }, { "epoch": 0.2782434751396924, "grad_norm": 0.36328125, "learning_rate": 2.5508224836496142e-05, "loss": 1.9984, "step": 8624 }, { "epoch": 0.27827573899348873, "grad_norm": 0.4296875, "learning_rate": 2.5507101754292687e-05, "loss": 1.995, "step": 8625 }, { "epoch": 0.2783080028472851, "grad_norm": 0.40234375, "learning_rate": 2.5505978556434594e-05, "loss": 2.0012, "step": 8626 }, { "epoch": 0.2783402667010814, "grad_norm": 0.388671875, "learning_rate": 2.5504855242934224e-05, "loss": 1.9893, "step": 8627 }, { "epoch": 0.2783725305548778, "grad_norm": 0.43359375, "learning_rate": 2.550373181380394e-05, "loss": 1.9701, "step": 8628 }, { "epoch": 0.27840479440867416, "grad_norm": 0.451171875, "learning_rate": 2.5502608269056117e-05, "loss": 2.0029, "step": 8629 }, { "epoch": 0.2784370582624705, "grad_norm": 0.48046875, "learning_rate": 2.5501484608703117e-05, "loss": 1.9743, "step": 8630 }, { "epoch": 0.27846932211626685, "grad_norm": 0.4296875, "learning_rate": 2.5500360832757303e-05, "loss": 1.9426, "step": 8631 }, { "epoch": 0.2785015859700632, "grad_norm": 0.482421875, "learning_rate": 2.549923694123105e-05, "loss": 1.9364, "step": 8632 }, { "epoch": 0.27853384982385954, "grad_norm": 0.458984375, "learning_rate": 2.5498112934136735e-05, "loss": 2.0149, "step": 8633 }, { "epoch": 0.2785661136776559, "grad_norm": 0.4140625, "learning_rate": 2.5496988811486722e-05, "loss": 1.974, "step": 8634 }, { "epoch": 0.2785983775314522, "grad_norm": 0.447265625, "learning_rate": 2.5495864573293385e-05, "loss": 2.0101, "step": 8635 }, { "epoch": 0.27863064138524857, "grad_norm": 0.400390625, "learning_rate": 2.5494740219569105e-05, "loss": 1.9922, "step": 8636 }, { "epoch": 0.2786629052390449, "grad_norm": 0.4296875, "learning_rate": 2.5493615750326244e-05, "loss": 1.9931, "step": 8637 }, { "epoch": 0.27869516909284126, "grad_norm": 0.4140625, "learning_rate": 2.5492491165577193e-05, "loss": 1.9626, "step": 8638 }, { "epoch": 0.2787274329466376, "grad_norm": 0.400390625, "learning_rate": 2.5491366465334333e-05, "loss": 1.9903, "step": 8639 }, { "epoch": 0.27875969680043394, "grad_norm": 0.423828125, "learning_rate": 2.549024164961003e-05, "loss": 1.9788, "step": 8640 }, { "epoch": 0.2787919606542303, "grad_norm": 0.40234375, "learning_rate": 2.5489116718416676e-05, "loss": 1.9914, "step": 8641 }, { "epoch": 0.27882422450802663, "grad_norm": 0.453125, "learning_rate": 2.5487991671766648e-05, "loss": 1.9435, "step": 8642 }, { "epoch": 0.278856488361823, "grad_norm": 0.404296875, "learning_rate": 2.5486866509672334e-05, "loss": 1.9421, "step": 8643 }, { "epoch": 0.2788887522156193, "grad_norm": 0.4140625, "learning_rate": 2.5485741232146114e-05, "loss": 1.9726, "step": 8644 }, { "epoch": 0.27892101606941566, "grad_norm": 0.431640625, "learning_rate": 2.548461583920038e-05, "loss": 1.9953, "step": 8645 }, { "epoch": 0.278953279923212, "grad_norm": 0.443359375, "learning_rate": 2.5483490330847512e-05, "loss": 1.9614, "step": 8646 }, { "epoch": 0.27898554377700835, "grad_norm": 0.384765625, "learning_rate": 2.548236470709991e-05, "loss": 1.9814, "step": 8647 }, { "epoch": 0.27901780763080475, "grad_norm": 0.412109375, "learning_rate": 2.548123896796995e-05, "loss": 1.9794, "step": 8648 }, { "epoch": 0.2790500714846011, "grad_norm": 0.42578125, "learning_rate": 2.5480113113470037e-05, "loss": 1.9732, "step": 8649 }, { "epoch": 0.27908233533839744, "grad_norm": 0.37890625, "learning_rate": 2.5478987143612555e-05, "loss": 1.9611, "step": 8650 }, { "epoch": 0.2791145991921938, "grad_norm": 0.443359375, "learning_rate": 2.54778610584099e-05, "loss": 1.9791, "step": 8651 }, { "epoch": 0.2791468630459901, "grad_norm": 0.384765625, "learning_rate": 2.5476734857874463e-05, "loss": 1.9884, "step": 8652 }, { "epoch": 0.27917912689978647, "grad_norm": 0.4140625, "learning_rate": 2.547560854201865e-05, "loss": 2.0081, "step": 8653 }, { "epoch": 0.2792113907535828, "grad_norm": 0.380859375, "learning_rate": 2.547448211085485e-05, "loss": 1.9663, "step": 8654 }, { "epoch": 0.27924365460737915, "grad_norm": 0.392578125, "learning_rate": 2.547335556439547e-05, "loss": 1.9809, "step": 8655 }, { "epoch": 0.2792759184611755, "grad_norm": 0.380859375, "learning_rate": 2.54722289026529e-05, "loss": 2.0103, "step": 8656 }, { "epoch": 0.27930818231497184, "grad_norm": 0.4140625, "learning_rate": 2.5471102125639553e-05, "loss": 1.9978, "step": 8657 }, { "epoch": 0.2793404461687682, "grad_norm": 0.37109375, "learning_rate": 2.5469975233367823e-05, "loss": 1.9667, "step": 8658 }, { "epoch": 0.27937271002256453, "grad_norm": 0.384765625, "learning_rate": 2.546884822585011e-05, "loss": 1.9553, "step": 8659 }, { "epoch": 0.2794049738763609, "grad_norm": 0.421875, "learning_rate": 2.5467721103098832e-05, "loss": 1.9662, "step": 8660 }, { "epoch": 0.2794372377301572, "grad_norm": 0.404296875, "learning_rate": 2.5466593865126396e-05, "loss": 1.969, "step": 8661 }, { "epoch": 0.27946950158395356, "grad_norm": 0.400390625, "learning_rate": 2.5465466511945194e-05, "loss": 1.9855, "step": 8662 }, { "epoch": 0.2795017654377499, "grad_norm": 0.44140625, "learning_rate": 2.5464339043567647e-05, "loss": 1.9631, "step": 8663 }, { "epoch": 0.27953402929154625, "grad_norm": 0.41015625, "learning_rate": 2.5463211460006166e-05, "loss": 1.9808, "step": 8664 }, { "epoch": 0.2795662931453426, "grad_norm": 0.41796875, "learning_rate": 2.5462083761273153e-05, "loss": 1.9273, "step": 8665 }, { "epoch": 0.27959855699913894, "grad_norm": 0.419921875, "learning_rate": 2.5460955947381035e-05, "loss": 1.9576, "step": 8666 }, { "epoch": 0.2796308208529353, "grad_norm": 0.388671875, "learning_rate": 2.5459828018342217e-05, "loss": 1.9697, "step": 8667 }, { "epoch": 0.2796630847067316, "grad_norm": 0.369140625, "learning_rate": 2.545869997416911e-05, "loss": 1.9961, "step": 8668 }, { "epoch": 0.279695348560528, "grad_norm": 0.396484375, "learning_rate": 2.5457571814874144e-05, "loss": 1.9753, "step": 8669 }, { "epoch": 0.27972761241432437, "grad_norm": 0.408203125, "learning_rate": 2.5456443540469728e-05, "loss": 1.9792, "step": 8670 }, { "epoch": 0.2797598762681207, "grad_norm": 0.375, "learning_rate": 2.5455315150968276e-05, "loss": 1.9885, "step": 8671 }, { "epoch": 0.27979214012191705, "grad_norm": 0.396484375, "learning_rate": 2.5454186646382226e-05, "loss": 1.9757, "step": 8672 }, { "epoch": 0.2798244039757134, "grad_norm": 0.39453125, "learning_rate": 2.545305802672398e-05, "loss": 1.9563, "step": 8673 }, { "epoch": 0.27985666782950974, "grad_norm": 0.375, "learning_rate": 2.5451929292005974e-05, "loss": 1.9834, "step": 8674 }, { "epoch": 0.2798889316833061, "grad_norm": 0.419921875, "learning_rate": 2.5450800442240632e-05, "loss": 2.0107, "step": 8675 }, { "epoch": 0.27992119553710243, "grad_norm": 0.39453125, "learning_rate": 2.5449671477440372e-05, "loss": 1.9919, "step": 8676 }, { "epoch": 0.2799534593908988, "grad_norm": 0.63671875, "learning_rate": 2.5448542397617625e-05, "loss": 1.9483, "step": 8677 }, { "epoch": 0.2799857232446951, "grad_norm": 0.453125, "learning_rate": 2.5447413202784823e-05, "loss": 1.9728, "step": 8678 }, { "epoch": 0.28001798709849146, "grad_norm": 0.408203125, "learning_rate": 2.544628389295439e-05, "loss": 1.9925, "step": 8679 }, { "epoch": 0.2800502509522878, "grad_norm": 0.404296875, "learning_rate": 2.544515446813875e-05, "loss": 1.9691, "step": 8680 }, { "epoch": 0.28008251480608415, "grad_norm": 0.439453125, "learning_rate": 2.5444024928350354e-05, "loss": 1.9968, "step": 8681 }, { "epoch": 0.2801147786598805, "grad_norm": 0.412109375, "learning_rate": 2.5442895273601616e-05, "loss": 1.9907, "step": 8682 }, { "epoch": 0.28014704251367684, "grad_norm": 0.443359375, "learning_rate": 2.544176550390498e-05, "loss": 1.9738, "step": 8683 }, { "epoch": 0.2801793063674732, "grad_norm": 0.392578125, "learning_rate": 2.5440635619272887e-05, "loss": 2.0022, "step": 8684 }, { "epoch": 0.2802115702212695, "grad_norm": 0.4140625, "learning_rate": 2.543950561971776e-05, "loss": 1.9922, "step": 8685 }, { "epoch": 0.28024383407506587, "grad_norm": 0.41796875, "learning_rate": 2.5438375505252048e-05, "loss": 1.9877, "step": 8686 }, { "epoch": 0.2802760979288622, "grad_norm": 0.423828125, "learning_rate": 2.5437245275888183e-05, "loss": 1.9966, "step": 8687 }, { "epoch": 0.28030836178265855, "grad_norm": 0.38671875, "learning_rate": 2.5436114931638617e-05, "loss": 2.0003, "step": 8688 }, { "epoch": 0.28034062563645495, "grad_norm": 0.4375, "learning_rate": 2.543498447251578e-05, "loss": 2.009, "step": 8689 }, { "epoch": 0.2803728894902513, "grad_norm": 0.423828125, "learning_rate": 2.543385389853212e-05, "loss": 2.0027, "step": 8690 }, { "epoch": 0.28040515334404764, "grad_norm": 0.416015625, "learning_rate": 2.5432723209700076e-05, "loss": 1.9946, "step": 8691 }, { "epoch": 0.280437417197844, "grad_norm": 0.40625, "learning_rate": 2.5431592406032108e-05, "loss": 1.9992, "step": 8692 }, { "epoch": 0.28046968105164033, "grad_norm": 0.373046875, "learning_rate": 2.543046148754065e-05, "loss": 1.9554, "step": 8693 }, { "epoch": 0.2805019449054367, "grad_norm": 0.4140625, "learning_rate": 2.5429330454238158e-05, "loss": 1.9878, "step": 8694 }, { "epoch": 0.280534208759233, "grad_norm": 0.39453125, "learning_rate": 2.5428199306137076e-05, "loss": 1.9814, "step": 8695 }, { "epoch": 0.28056647261302936, "grad_norm": 0.3984375, "learning_rate": 2.5427068043249856e-05, "loss": 1.9559, "step": 8696 }, { "epoch": 0.2805987364668257, "grad_norm": 0.42578125, "learning_rate": 2.5425936665588954e-05, "loss": 2.0138, "step": 8697 }, { "epoch": 0.28063100032062205, "grad_norm": 0.447265625, "learning_rate": 2.5424805173166817e-05, "loss": 2.017, "step": 8698 }, { "epoch": 0.2806632641744184, "grad_norm": 0.40625, "learning_rate": 2.5423673565995907e-05, "loss": 2.0184, "step": 8699 }, { "epoch": 0.28069552802821474, "grad_norm": 0.369140625, "learning_rate": 2.5422541844088676e-05, "loss": 2.0162, "step": 8700 }, { "epoch": 0.2807277918820111, "grad_norm": 0.3828125, "learning_rate": 2.542141000745758e-05, "loss": 2.0026, "step": 8701 }, { "epoch": 0.2807600557358074, "grad_norm": 0.40625, "learning_rate": 2.5420278056115077e-05, "loss": 1.9767, "step": 8702 }, { "epoch": 0.28079231958960377, "grad_norm": 0.380859375, "learning_rate": 2.5419145990073636e-05, "loss": 2.0076, "step": 8703 }, { "epoch": 0.2808245834434001, "grad_norm": 0.38671875, "learning_rate": 2.5418013809345705e-05, "loss": 1.997, "step": 8704 }, { "epoch": 0.28085684729719645, "grad_norm": 0.400390625, "learning_rate": 2.541688151394375e-05, "loss": 1.9839, "step": 8705 }, { "epoch": 0.2808891111509928, "grad_norm": 0.40625, "learning_rate": 2.5415749103880238e-05, "loss": 2.0022, "step": 8706 }, { "epoch": 0.28092137500478914, "grad_norm": 0.365234375, "learning_rate": 2.5414616579167637e-05, "loss": 2.0052, "step": 8707 }, { "epoch": 0.2809536388585855, "grad_norm": 0.4375, "learning_rate": 2.54134839398184e-05, "loss": 2.0011, "step": 8708 }, { "epoch": 0.2809859027123819, "grad_norm": 0.447265625, "learning_rate": 2.5412351185845012e-05, "loss": 1.9864, "step": 8709 }, { "epoch": 0.28101816656617823, "grad_norm": 0.44140625, "learning_rate": 2.5411218317259925e-05, "loss": 2.0225, "step": 8710 }, { "epoch": 0.2810504304199746, "grad_norm": 0.373046875, "learning_rate": 2.5410085334075617e-05, "loss": 2.0165, "step": 8711 }, { "epoch": 0.2810826942737709, "grad_norm": 0.38671875, "learning_rate": 2.540895223630456e-05, "loss": 1.9886, "step": 8712 }, { "epoch": 0.28111495812756726, "grad_norm": 0.390625, "learning_rate": 2.540781902395922e-05, "loss": 2.0183, "step": 8713 }, { "epoch": 0.2811472219813636, "grad_norm": 0.37109375, "learning_rate": 2.5406685697052077e-05, "loss": 2.0026, "step": 8714 }, { "epoch": 0.28117948583515995, "grad_norm": 0.373046875, "learning_rate": 2.5405552255595602e-05, "loss": 1.9748, "step": 8715 }, { "epoch": 0.2812117496889563, "grad_norm": 0.376953125, "learning_rate": 2.5404418699602275e-05, "loss": 2.0095, "step": 8716 }, { "epoch": 0.28124401354275264, "grad_norm": 0.361328125, "learning_rate": 2.5403285029084567e-05, "loss": 1.9451, "step": 8717 }, { "epoch": 0.281276277396549, "grad_norm": 0.376953125, "learning_rate": 2.5402151244054968e-05, "loss": 2.0138, "step": 8718 }, { "epoch": 0.2813085412503453, "grad_norm": 0.349609375, "learning_rate": 2.5401017344525945e-05, "loss": 1.9838, "step": 8719 }, { "epoch": 0.28134080510414167, "grad_norm": 0.365234375, "learning_rate": 2.5399883330509987e-05, "loss": 1.9744, "step": 8720 }, { "epoch": 0.281373068957938, "grad_norm": 0.369140625, "learning_rate": 2.5398749202019572e-05, "loss": 2.0119, "step": 8721 }, { "epoch": 0.28140533281173435, "grad_norm": 0.390625, "learning_rate": 2.5397614959067187e-05, "loss": 1.9995, "step": 8722 }, { "epoch": 0.2814375966655307, "grad_norm": 0.40234375, "learning_rate": 2.5396480601665316e-05, "loss": 1.9951, "step": 8723 }, { "epoch": 0.28146986051932704, "grad_norm": 0.408203125, "learning_rate": 2.5395346129826447e-05, "loss": 2.0006, "step": 8724 }, { "epoch": 0.2815021243731234, "grad_norm": 0.486328125, "learning_rate": 2.5394211543563063e-05, "loss": 2.0161, "step": 8725 }, { "epoch": 0.28153438822691973, "grad_norm": 0.484375, "learning_rate": 2.5393076842887656e-05, "loss": 1.9928, "step": 8726 }, { "epoch": 0.2815666520807161, "grad_norm": 0.42578125, "learning_rate": 2.539194202781271e-05, "loss": 2.0074, "step": 8727 }, { "epoch": 0.2815989159345124, "grad_norm": 0.375, "learning_rate": 2.5390807098350727e-05, "loss": 1.9989, "step": 8728 }, { "epoch": 0.2816311797883088, "grad_norm": 0.392578125, "learning_rate": 2.5389672054514195e-05, "loss": 1.9811, "step": 8729 }, { "epoch": 0.28166344364210516, "grad_norm": 0.3828125, "learning_rate": 2.5388536896315602e-05, "loss": 2.0019, "step": 8730 }, { "epoch": 0.2816957074959015, "grad_norm": 0.41796875, "learning_rate": 2.538740162376745e-05, "loss": 1.9919, "step": 8731 }, { "epoch": 0.28172797134969785, "grad_norm": 0.400390625, "learning_rate": 2.538626623688223e-05, "loss": 2.021, "step": 8732 }, { "epoch": 0.2817602352034942, "grad_norm": 0.392578125, "learning_rate": 2.5385130735672442e-05, "loss": 2.0096, "step": 8733 }, { "epoch": 0.28179249905729054, "grad_norm": 0.390625, "learning_rate": 2.5383995120150593e-05, "loss": 2.0163, "step": 8734 }, { "epoch": 0.2818247629110869, "grad_norm": 0.39453125, "learning_rate": 2.538285939032917e-05, "loss": 2.0298, "step": 8735 }, { "epoch": 0.2818570267648832, "grad_norm": 0.37109375, "learning_rate": 2.538172354622068e-05, "loss": 1.9915, "step": 8736 }, { "epoch": 0.28188929061867957, "grad_norm": 0.388671875, "learning_rate": 2.5380587587837623e-05, "loss": 1.9809, "step": 8737 }, { "epoch": 0.2819215544724759, "grad_norm": 0.39453125, "learning_rate": 2.537945151519251e-05, "loss": 1.9766, "step": 8738 }, { "epoch": 0.28195381832627225, "grad_norm": 0.359375, "learning_rate": 2.5378315328297832e-05, "loss": 2.007, "step": 8739 }, { "epoch": 0.2819860821800686, "grad_norm": 0.3984375, "learning_rate": 2.5377179027166113e-05, "loss": 1.995, "step": 8740 }, { "epoch": 0.28201834603386494, "grad_norm": 0.39453125, "learning_rate": 2.537604261180985e-05, "loss": 1.9925, "step": 8741 }, { "epoch": 0.2820506098876613, "grad_norm": 0.38671875, "learning_rate": 2.5374906082241546e-05, "loss": 1.9975, "step": 8742 }, { "epoch": 0.28208287374145763, "grad_norm": 0.373046875, "learning_rate": 2.5373769438473726e-05, "loss": 2.0088, "step": 8743 }, { "epoch": 0.282115137595254, "grad_norm": 0.3828125, "learning_rate": 2.5372632680518893e-05, "loss": 2.0108, "step": 8744 }, { "epoch": 0.2821474014490503, "grad_norm": 0.376953125, "learning_rate": 2.537149580838956e-05, "loss": 1.9723, "step": 8745 }, { "epoch": 0.28217966530284666, "grad_norm": 0.87109375, "learning_rate": 2.5370358822098244e-05, "loss": 1.9642, "step": 8746 }, { "epoch": 0.282211929156643, "grad_norm": 1.109375, "learning_rate": 2.5369221721657456e-05, "loss": 1.9188, "step": 8747 }, { "epoch": 0.28224419301043935, "grad_norm": 1.078125, "learning_rate": 2.536808450707971e-05, "loss": 1.9475, "step": 8748 }, { "epoch": 0.28227645686423575, "grad_norm": 0.90625, "learning_rate": 2.5366947178377537e-05, "loss": 1.9303, "step": 8749 }, { "epoch": 0.2823087207180321, "grad_norm": 0.78125, "learning_rate": 2.536580973556344e-05, "loss": 1.9313, "step": 8750 }, { "epoch": 0.28234098457182844, "grad_norm": 0.7890625, "learning_rate": 2.5364672178649946e-05, "loss": 1.8762, "step": 8751 }, { "epoch": 0.2823732484256248, "grad_norm": 0.69140625, "learning_rate": 2.5363534507649578e-05, "loss": 1.9008, "step": 8752 }, { "epoch": 0.2824055122794211, "grad_norm": 0.671875, "learning_rate": 2.5362396722574858e-05, "loss": 1.8991, "step": 8753 }, { "epoch": 0.28243777613321747, "grad_norm": 0.58203125, "learning_rate": 2.5361258823438308e-05, "loss": 1.9203, "step": 8754 }, { "epoch": 0.2824700399870138, "grad_norm": 0.55859375, "learning_rate": 2.5360120810252455e-05, "loss": 1.8981, "step": 8755 }, { "epoch": 0.28250230384081015, "grad_norm": 0.55078125, "learning_rate": 2.535898268302982e-05, "loss": 1.8835, "step": 8756 }, { "epoch": 0.2825345676946065, "grad_norm": 0.51953125, "learning_rate": 2.535784444178294e-05, "loss": 1.9117, "step": 8757 }, { "epoch": 0.28256683154840284, "grad_norm": 0.5234375, "learning_rate": 2.5356706086524335e-05, "loss": 1.8992, "step": 8758 }, { "epoch": 0.2825990954021992, "grad_norm": 0.455078125, "learning_rate": 2.535556761726654e-05, "loss": 1.8849, "step": 8759 }, { "epoch": 0.28263135925599553, "grad_norm": 0.51953125, "learning_rate": 2.535442903402209e-05, "loss": 1.899, "step": 8760 }, { "epoch": 0.2826636231097919, "grad_norm": 0.466796875, "learning_rate": 2.535329033680351e-05, "loss": 1.8972, "step": 8761 }, { "epoch": 0.2826958869635882, "grad_norm": 0.421875, "learning_rate": 2.5352151525623338e-05, "loss": 1.8794, "step": 8762 }, { "epoch": 0.28272815081738456, "grad_norm": 0.458984375, "learning_rate": 2.535101260049411e-05, "loss": 1.8682, "step": 8763 }, { "epoch": 0.2827604146711809, "grad_norm": 0.421875, "learning_rate": 2.5349873561428358e-05, "loss": 1.8735, "step": 8764 }, { "epoch": 0.28279267852497725, "grad_norm": 0.4375, "learning_rate": 2.5348734408438623e-05, "loss": 1.8921, "step": 8765 }, { "epoch": 0.2828249423787736, "grad_norm": 0.435546875, "learning_rate": 2.5347595141537448e-05, "loss": 1.891, "step": 8766 }, { "epoch": 0.28285720623256994, "grad_norm": 0.41015625, "learning_rate": 2.5346455760737365e-05, "loss": 1.8692, "step": 8767 }, { "epoch": 0.2828894700863663, "grad_norm": 0.392578125, "learning_rate": 2.534531626605092e-05, "loss": 1.9019, "step": 8768 }, { "epoch": 0.2829217339401626, "grad_norm": 0.40234375, "learning_rate": 2.5344176657490655e-05, "loss": 1.9077, "step": 8769 }, { "epoch": 0.282953997793959, "grad_norm": 0.380859375, "learning_rate": 2.5343036935069116e-05, "loss": 1.8929, "step": 8770 }, { "epoch": 0.28298626164775537, "grad_norm": 0.396484375, "learning_rate": 2.534189709879885e-05, "loss": 1.9076, "step": 8771 }, { "epoch": 0.2830185255015517, "grad_norm": 0.36328125, "learning_rate": 2.534075714869239e-05, "loss": 1.8793, "step": 8772 }, { "epoch": 0.28305078935534805, "grad_norm": 0.79296875, "learning_rate": 2.5339617084762303e-05, "loss": 1.898, "step": 8773 }, { "epoch": 0.2830830532091444, "grad_norm": 0.44921875, "learning_rate": 2.533847690702112e-05, "loss": 1.8923, "step": 8774 }, { "epoch": 0.28311531706294074, "grad_norm": 0.458984375, "learning_rate": 2.533733661548141e-05, "loss": 1.8257, "step": 8775 }, { "epoch": 0.2831475809167371, "grad_norm": 0.4375, "learning_rate": 2.5336196210155707e-05, "loss": 1.8317, "step": 8776 }, { "epoch": 0.28317984477053343, "grad_norm": 0.421875, "learning_rate": 2.5335055691056574e-05, "loss": 1.8966, "step": 8777 }, { "epoch": 0.2832121086243298, "grad_norm": 0.44140625, "learning_rate": 2.5333915058196563e-05, "loss": 1.907, "step": 8778 }, { "epoch": 0.2832443724781261, "grad_norm": 0.439453125, "learning_rate": 2.533277431158823e-05, "loss": 1.8941, "step": 8779 }, { "epoch": 0.28327663633192246, "grad_norm": 0.41796875, "learning_rate": 2.5331633451244128e-05, "loss": 1.8812, "step": 8780 }, { "epoch": 0.2833089001857188, "grad_norm": 0.3984375, "learning_rate": 2.5330492477176813e-05, "loss": 1.8999, "step": 8781 }, { "epoch": 0.28334116403951515, "grad_norm": 0.4296875, "learning_rate": 2.5329351389398855e-05, "loss": 1.9067, "step": 8782 }, { "epoch": 0.2833734278933115, "grad_norm": 0.388671875, "learning_rate": 2.53282101879228e-05, "loss": 1.9106, "step": 8783 }, { "epoch": 0.28340569174710784, "grad_norm": 0.4140625, "learning_rate": 2.532706887276122e-05, "loss": 1.9153, "step": 8784 }, { "epoch": 0.2834379556009042, "grad_norm": 0.6171875, "learning_rate": 2.5325927443926676e-05, "loss": 1.8772, "step": 8785 }, { "epoch": 0.2834702194547005, "grad_norm": 0.47265625, "learning_rate": 2.5324785901431728e-05, "loss": 1.9111, "step": 8786 }, { "epoch": 0.28350248330849687, "grad_norm": 0.47265625, "learning_rate": 2.5323644245288947e-05, "loss": 1.8859, "step": 8787 }, { "epoch": 0.2835347471622932, "grad_norm": 0.427734375, "learning_rate": 2.5322502475510896e-05, "loss": 1.8774, "step": 8788 }, { "epoch": 0.28356701101608955, "grad_norm": 0.44921875, "learning_rate": 2.532136059211014e-05, "loss": 1.873, "step": 8789 }, { "epoch": 0.28359927486988595, "grad_norm": 0.435546875, "learning_rate": 2.532021859509925e-05, "loss": 1.8928, "step": 8790 }, { "epoch": 0.2836315387236823, "grad_norm": 0.390625, "learning_rate": 2.5319076484490805e-05, "loss": 1.8769, "step": 8791 }, { "epoch": 0.28366380257747864, "grad_norm": 0.421875, "learning_rate": 2.531793426029736e-05, "loss": 1.8521, "step": 8792 }, { "epoch": 0.283696066431275, "grad_norm": 0.4140625, "learning_rate": 2.53167919225315e-05, "loss": 1.8848, "step": 8793 }, { "epoch": 0.28372833028507133, "grad_norm": 0.37890625, "learning_rate": 2.53156494712058e-05, "loss": 1.8575, "step": 8794 }, { "epoch": 0.28376059413886767, "grad_norm": 0.40234375, "learning_rate": 2.5314506906332825e-05, "loss": 1.8713, "step": 8795 }, { "epoch": 0.283792857992664, "grad_norm": 0.3828125, "learning_rate": 2.5313364227925157e-05, "loss": 1.8748, "step": 8796 }, { "epoch": 0.28382512184646036, "grad_norm": 0.36328125, "learning_rate": 2.5312221435995382e-05, "loss": 1.8853, "step": 8797 }, { "epoch": 0.2838573857002567, "grad_norm": 0.376953125, "learning_rate": 2.5311078530556063e-05, "loss": 1.9018, "step": 8798 }, { "epoch": 0.28388964955405305, "grad_norm": 0.365234375, "learning_rate": 2.5309935511619795e-05, "loss": 1.8735, "step": 8799 }, { "epoch": 0.2839219134078494, "grad_norm": 0.375, "learning_rate": 2.5308792379199153e-05, "loss": 1.8364, "step": 8800 }, { "epoch": 0.28395417726164573, "grad_norm": 0.35546875, "learning_rate": 2.530764913330671e-05, "loss": 1.8743, "step": 8801 }, { "epoch": 0.2839864411154421, "grad_norm": 0.361328125, "learning_rate": 2.530650577395507e-05, "loss": 1.8847, "step": 8802 }, { "epoch": 0.2840187049692384, "grad_norm": 0.357421875, "learning_rate": 2.5305362301156807e-05, "loss": 1.8827, "step": 8803 }, { "epoch": 0.28405096882303477, "grad_norm": 0.35546875, "learning_rate": 2.530421871492451e-05, "loss": 1.8911, "step": 8804 }, { "epoch": 0.2840832326768311, "grad_norm": 0.341796875, "learning_rate": 2.530307501527076e-05, "loss": 1.8458, "step": 8805 }, { "epoch": 0.28411549653062745, "grad_norm": 0.59375, "learning_rate": 2.5301931202208154e-05, "loss": 1.896, "step": 8806 }, { "epoch": 0.2841477603844238, "grad_norm": 0.451171875, "learning_rate": 2.5300787275749283e-05, "loss": 1.8987, "step": 8807 }, { "epoch": 0.28418002423822014, "grad_norm": 0.40234375, "learning_rate": 2.5299643235906735e-05, "loss": 1.8443, "step": 8808 }, { "epoch": 0.2842122880920165, "grad_norm": 0.419921875, "learning_rate": 2.52984990826931e-05, "loss": 1.9045, "step": 8809 }, { "epoch": 0.2842445519458129, "grad_norm": 0.390625, "learning_rate": 2.5297354816120978e-05, "loss": 2.0214, "step": 8810 }, { "epoch": 0.28427681579960923, "grad_norm": 0.4765625, "learning_rate": 2.5296210436202958e-05, "loss": 2.0024, "step": 8811 }, { "epoch": 0.28430907965340557, "grad_norm": 0.546875, "learning_rate": 2.529506594295164e-05, "loss": 1.965, "step": 8812 }, { "epoch": 0.2843413435072019, "grad_norm": 0.69921875, "learning_rate": 2.5293921336379625e-05, "loss": 2.004, "step": 8813 }, { "epoch": 0.28437360736099826, "grad_norm": 0.55859375, "learning_rate": 2.5292776616499507e-05, "loss": 1.9857, "step": 8814 }, { "epoch": 0.2844058712147946, "grad_norm": 0.423828125, "learning_rate": 2.529163178332389e-05, "loss": 2.0127, "step": 8815 }, { "epoch": 0.28443813506859095, "grad_norm": 0.55078125, "learning_rate": 2.5290486836865373e-05, "loss": 2.0217, "step": 8816 }, { "epoch": 0.2844703989223873, "grad_norm": 0.451171875, "learning_rate": 2.528934177713656e-05, "loss": 1.9782, "step": 8817 }, { "epoch": 0.28450266277618363, "grad_norm": 0.42578125, "learning_rate": 2.528819660415005e-05, "loss": 1.9694, "step": 8818 }, { "epoch": 0.28453492662998, "grad_norm": 0.46484375, "learning_rate": 2.5287051317918455e-05, "loss": 1.9652, "step": 8819 }, { "epoch": 0.2845671904837763, "grad_norm": 0.408203125, "learning_rate": 2.528590591845438e-05, "loss": 1.9649, "step": 8820 }, { "epoch": 0.28459945433757267, "grad_norm": 0.4375, "learning_rate": 2.528476040577043e-05, "loss": 2.0118, "step": 8821 }, { "epoch": 0.284631718191369, "grad_norm": 0.396484375, "learning_rate": 2.5283614779879218e-05, "loss": 2.038, "step": 8822 }, { "epoch": 0.28466398204516535, "grad_norm": 0.439453125, "learning_rate": 2.5282469040793353e-05, "loss": 1.9936, "step": 8823 }, { "epoch": 0.2846962458989617, "grad_norm": 0.37109375, "learning_rate": 2.528132318852544e-05, "loss": 1.9834, "step": 8824 }, { "epoch": 0.28472850975275804, "grad_norm": 0.404296875, "learning_rate": 2.5280177223088098e-05, "loss": 1.9844, "step": 8825 }, { "epoch": 0.2847607736065544, "grad_norm": 0.375, "learning_rate": 2.5279031144493946e-05, "loss": 2.0313, "step": 8826 }, { "epoch": 0.28479303746035073, "grad_norm": 0.384765625, "learning_rate": 2.5277884952755585e-05, "loss": 1.9758, "step": 8827 }, { "epoch": 0.2848253013141471, "grad_norm": 0.37890625, "learning_rate": 2.5276738647885647e-05, "loss": 1.9923, "step": 8828 }, { "epoch": 0.2848575651679434, "grad_norm": 0.40625, "learning_rate": 2.5275592229896736e-05, "loss": 2.0218, "step": 8829 }, { "epoch": 0.2848898290217398, "grad_norm": 0.388671875, "learning_rate": 2.5274445698801486e-05, "loss": 1.9844, "step": 8830 }, { "epoch": 0.28492209287553616, "grad_norm": 0.3828125, "learning_rate": 2.52732990546125e-05, "loss": 1.9734, "step": 8831 }, { "epoch": 0.2849543567293325, "grad_norm": 0.3984375, "learning_rate": 2.5272152297342412e-05, "loss": 2.0068, "step": 8832 }, { "epoch": 0.28498662058312885, "grad_norm": 0.384765625, "learning_rate": 2.527100542700384e-05, "loss": 2.0036, "step": 8833 }, { "epoch": 0.2850188844369252, "grad_norm": 0.40234375, "learning_rate": 2.526985844360941e-05, "loss": 2.0146, "step": 8834 }, { "epoch": 0.28505114829072153, "grad_norm": 0.404296875, "learning_rate": 2.526871134717175e-05, "loss": 1.9908, "step": 8835 }, { "epoch": 0.2850834121445179, "grad_norm": 0.39453125, "learning_rate": 2.5267564137703474e-05, "loss": 1.9586, "step": 8836 }, { "epoch": 0.2851156759983142, "grad_norm": 0.400390625, "learning_rate": 2.5266416815217224e-05, "loss": 1.9936, "step": 8837 }, { "epoch": 0.28514793985211057, "grad_norm": 0.369140625, "learning_rate": 2.5265269379725614e-05, "loss": 1.9949, "step": 8838 }, { "epoch": 0.2851802037059069, "grad_norm": 0.375, "learning_rate": 2.526412183124129e-05, "loss": 1.9932, "step": 8839 }, { "epoch": 0.28521246755970325, "grad_norm": 0.36328125, "learning_rate": 2.526297416977688e-05, "loss": 2.0106, "step": 8840 }, { "epoch": 0.2852447314134996, "grad_norm": 0.349609375, "learning_rate": 2.5261826395345005e-05, "loss": 1.9911, "step": 8841 }, { "epoch": 0.28527699526729594, "grad_norm": 0.3828125, "learning_rate": 2.526067850795831e-05, "loss": 2.0102, "step": 8842 }, { "epoch": 0.2853092591210923, "grad_norm": 0.361328125, "learning_rate": 2.5259530507629427e-05, "loss": 2.0111, "step": 8843 }, { "epoch": 0.28534152297488863, "grad_norm": 0.373046875, "learning_rate": 2.525838239437099e-05, "loss": 2.029, "step": 8844 }, { "epoch": 0.28537378682868497, "grad_norm": 0.373046875, "learning_rate": 2.5257234168195643e-05, "loss": 1.9888, "step": 8845 }, { "epoch": 0.2854060506824813, "grad_norm": 0.384765625, "learning_rate": 2.5256085829116018e-05, "loss": 1.9986, "step": 8846 }, { "epoch": 0.28543831453627766, "grad_norm": 0.396484375, "learning_rate": 2.5254937377144763e-05, "loss": 1.9484, "step": 8847 }, { "epoch": 0.285470578390074, "grad_norm": 0.4140625, "learning_rate": 2.5253788812294506e-05, "loss": 1.9961, "step": 8848 }, { "epoch": 0.28550284224387035, "grad_norm": 0.376953125, "learning_rate": 2.52526401345779e-05, "loss": 1.9726, "step": 8849 }, { "epoch": 0.2855351060976667, "grad_norm": 0.40625, "learning_rate": 2.525149134400759e-05, "loss": 1.9967, "step": 8850 }, { "epoch": 0.2855673699514631, "grad_norm": 0.39453125, "learning_rate": 2.5250342440596218e-05, "loss": 2.0342, "step": 8851 }, { "epoch": 0.28559963380525943, "grad_norm": 0.3984375, "learning_rate": 2.524919342435643e-05, "loss": 2.0117, "step": 8852 }, { "epoch": 0.2856318976590558, "grad_norm": 0.3984375, "learning_rate": 2.524804429530087e-05, "loss": 2.0051, "step": 8853 }, { "epoch": 0.2856641615128521, "grad_norm": 0.40234375, "learning_rate": 2.5246895053442195e-05, "loss": 2.0061, "step": 8854 }, { "epoch": 0.28569642536664847, "grad_norm": 0.365234375, "learning_rate": 2.524574569879304e-05, "loss": 1.9728, "step": 8855 }, { "epoch": 0.2857286892204448, "grad_norm": 0.375, "learning_rate": 2.524459623136608e-05, "loss": 1.9995, "step": 8856 }, { "epoch": 0.28576095307424115, "grad_norm": 0.357421875, "learning_rate": 2.5243446651173944e-05, "loss": 2.0085, "step": 8857 }, { "epoch": 0.2857932169280375, "grad_norm": 0.384765625, "learning_rate": 2.5242296958229298e-05, "loss": 2.001, "step": 8858 }, { "epoch": 0.28582548078183384, "grad_norm": 0.361328125, "learning_rate": 2.5241147152544803e-05, "loss": 2.002, "step": 8859 }, { "epoch": 0.2858577446356302, "grad_norm": 0.39453125, "learning_rate": 2.5239997234133097e-05, "loss": 1.9895, "step": 8860 }, { "epoch": 0.28589000848942653, "grad_norm": 0.443359375, "learning_rate": 2.5238847203006853e-05, "loss": 2.0041, "step": 8861 }, { "epoch": 0.28592227234322287, "grad_norm": 0.3671875, "learning_rate": 2.5237697059178725e-05, "loss": 1.9965, "step": 8862 }, { "epoch": 0.2859545361970192, "grad_norm": 0.3828125, "learning_rate": 2.5236546802661367e-05, "loss": 1.9768, "step": 8863 }, { "epoch": 0.28598680005081556, "grad_norm": 0.38671875, "learning_rate": 2.523539643346745e-05, "loss": 2.0038, "step": 8864 }, { "epoch": 0.2860190639046119, "grad_norm": 0.41015625, "learning_rate": 2.523424595160963e-05, "loss": 1.99, "step": 8865 }, { "epoch": 0.28605132775840825, "grad_norm": 0.400390625, "learning_rate": 2.5233095357100573e-05, "loss": 1.9895, "step": 8866 }, { "epoch": 0.2860835916122046, "grad_norm": 0.4140625, "learning_rate": 2.523194464995294e-05, "loss": 1.9606, "step": 8867 }, { "epoch": 0.28611585546600093, "grad_norm": 0.53125, "learning_rate": 2.5230793830179407e-05, "loss": 1.9095, "step": 8868 }, { "epoch": 0.2861481193197973, "grad_norm": 0.5234375, "learning_rate": 2.522964289779263e-05, "loss": 1.9239, "step": 8869 }, { "epoch": 0.2861803831735936, "grad_norm": 0.4453125, "learning_rate": 2.5228491852805283e-05, "loss": 1.8848, "step": 8870 }, { "epoch": 0.28621264702739, "grad_norm": 0.55078125, "learning_rate": 2.522734069523004e-05, "loss": 1.8915, "step": 8871 }, { "epoch": 0.28624491088118637, "grad_norm": 0.50390625, "learning_rate": 2.5226189425079567e-05, "loss": 1.8825, "step": 8872 }, { "epoch": 0.2862771747349827, "grad_norm": 0.431640625, "learning_rate": 2.522503804236654e-05, "loss": 1.8887, "step": 8873 }, { "epoch": 0.28630943858877905, "grad_norm": 0.412109375, "learning_rate": 2.522388654710362e-05, "loss": 1.8757, "step": 8874 }, { "epoch": 0.2863417024425754, "grad_norm": 0.435546875, "learning_rate": 2.5222734939303497e-05, "loss": 1.9117, "step": 8875 }, { "epoch": 0.28637396629637174, "grad_norm": 0.404296875, "learning_rate": 2.522158321897884e-05, "loss": 1.9137, "step": 8876 }, { "epoch": 0.2864062301501681, "grad_norm": 0.404296875, "learning_rate": 2.522043138614233e-05, "loss": 1.8945, "step": 8877 }, { "epoch": 0.28643849400396443, "grad_norm": 0.44140625, "learning_rate": 2.5219279440806642e-05, "loss": 1.9141, "step": 8878 }, { "epoch": 0.28647075785776077, "grad_norm": 0.50390625, "learning_rate": 2.5218127382984455e-05, "loss": 1.8619, "step": 8879 }, { "epoch": 0.2865030217115571, "grad_norm": 0.46484375, "learning_rate": 2.5216975212688456e-05, "loss": 1.8728, "step": 8880 }, { "epoch": 0.28653528556535346, "grad_norm": 0.443359375, "learning_rate": 2.521582292993132e-05, "loss": 1.8622, "step": 8881 }, { "epoch": 0.2865675494191498, "grad_norm": 0.431640625, "learning_rate": 2.521467053472574e-05, "loss": 1.8798, "step": 8882 }, { "epoch": 0.28659981327294615, "grad_norm": 0.455078125, "learning_rate": 2.521351802708439e-05, "loss": 1.8623, "step": 8883 }, { "epoch": 0.2866320771267425, "grad_norm": 0.416015625, "learning_rate": 2.5212365407019962e-05, "loss": 1.8399, "step": 8884 }, { "epoch": 0.28666434098053883, "grad_norm": 0.427734375, "learning_rate": 2.521121267454514e-05, "loss": 1.8282, "step": 8885 }, { "epoch": 0.2866966048343352, "grad_norm": 0.380859375, "learning_rate": 2.5210059829672616e-05, "loss": 1.8624, "step": 8886 }, { "epoch": 0.2867288686881315, "grad_norm": 0.412109375, "learning_rate": 2.5208906872415077e-05, "loss": 1.8801, "step": 8887 }, { "epoch": 0.28676113254192787, "grad_norm": 0.41015625, "learning_rate": 2.5207753802785218e-05, "loss": 1.8444, "step": 8888 }, { "epoch": 0.2867933963957242, "grad_norm": 0.400390625, "learning_rate": 2.5206600620795723e-05, "loss": 1.8368, "step": 8889 }, { "epoch": 0.28682566024952055, "grad_norm": 0.37890625, "learning_rate": 2.5205447326459298e-05, "loss": 1.8604, "step": 8890 }, { "epoch": 0.28685792410331695, "grad_norm": 0.380859375, "learning_rate": 2.5204293919788625e-05, "loss": 1.8573, "step": 8891 }, { "epoch": 0.2868901879571133, "grad_norm": 0.388671875, "learning_rate": 2.5203140400796406e-05, "loss": 1.8675, "step": 8892 }, { "epoch": 0.28692245181090964, "grad_norm": 0.36328125, "learning_rate": 2.5201986769495333e-05, "loss": 1.8232, "step": 8893 }, { "epoch": 0.286954715664706, "grad_norm": 0.361328125, "learning_rate": 2.520083302589811e-05, "loss": 1.8571, "step": 8894 }, { "epoch": 0.2869869795185023, "grad_norm": 0.37109375, "learning_rate": 2.5199679170017444e-05, "loss": 1.8195, "step": 8895 }, { "epoch": 0.28701924337229867, "grad_norm": 0.373046875, "learning_rate": 2.5198525201866018e-05, "loss": 1.89, "step": 8896 }, { "epoch": 0.287051507226095, "grad_norm": 0.41015625, "learning_rate": 2.5197371121456543e-05, "loss": 1.9166, "step": 8897 }, { "epoch": 0.28708377107989136, "grad_norm": 0.396484375, "learning_rate": 2.5196216928801725e-05, "loss": 1.9205, "step": 8898 }, { "epoch": 0.2871160349336877, "grad_norm": 0.3984375, "learning_rate": 2.5195062623914265e-05, "loss": 1.8697, "step": 8899 }, { "epoch": 0.28714829878748405, "grad_norm": 0.388671875, "learning_rate": 2.519390820680687e-05, "loss": 1.8875, "step": 8900 }, { "epoch": 0.2871805626412804, "grad_norm": 0.396484375, "learning_rate": 2.5192753677492247e-05, "loss": 1.8613, "step": 8901 }, { "epoch": 0.28721282649507673, "grad_norm": 0.37890625, "learning_rate": 2.5191599035983107e-05, "loss": 1.8781, "step": 8902 }, { "epoch": 0.2872450903488731, "grad_norm": 0.41015625, "learning_rate": 2.519044428229215e-05, "loss": 1.8561, "step": 8903 }, { "epoch": 0.2872773542026694, "grad_norm": 0.416015625, "learning_rate": 2.5189289416432096e-05, "loss": 1.8884, "step": 8904 }, { "epoch": 0.28730961805646577, "grad_norm": 0.388671875, "learning_rate": 2.5188134438415654e-05, "loss": 1.8765, "step": 8905 }, { "epoch": 0.2873418819102621, "grad_norm": 0.38671875, "learning_rate": 2.5186979348255537e-05, "loss": 1.911, "step": 8906 }, { "epoch": 0.28737414576405845, "grad_norm": 0.48828125, "learning_rate": 2.518582414596446e-05, "loss": 1.9516, "step": 8907 }, { "epoch": 0.2874064096178548, "grad_norm": 0.48046875, "learning_rate": 2.5184668831555133e-05, "loss": 1.9567, "step": 8908 }, { "epoch": 0.28743867347165114, "grad_norm": 0.52734375, "learning_rate": 2.5183513405040282e-05, "loss": 1.9113, "step": 8909 }, { "epoch": 0.2874709373254475, "grad_norm": 0.46875, "learning_rate": 2.518235786643262e-05, "loss": 1.9254, "step": 8910 }, { "epoch": 0.2875032011792439, "grad_norm": 0.46875, "learning_rate": 2.5181202215744866e-05, "loss": 1.934, "step": 8911 }, { "epoch": 0.2875354650330402, "grad_norm": 0.4609375, "learning_rate": 2.5180046452989746e-05, "loss": 1.9293, "step": 8912 }, { "epoch": 0.28756772888683657, "grad_norm": 0.439453125, "learning_rate": 2.517889057817998e-05, "loss": 1.9179, "step": 8913 }, { "epoch": 0.2875999927406329, "grad_norm": 0.5703125, "learning_rate": 2.517773459132828e-05, "loss": 1.9044, "step": 8914 }, { "epoch": 0.28763225659442926, "grad_norm": 0.478515625, "learning_rate": 2.5176578492447387e-05, "loss": 1.8942, "step": 8915 }, { "epoch": 0.2876645204482256, "grad_norm": 0.52734375, "learning_rate": 2.5175422281550018e-05, "loss": 1.8608, "step": 8916 }, { "epoch": 0.28769678430202195, "grad_norm": 0.44921875, "learning_rate": 2.5174265958648896e-05, "loss": 1.8741, "step": 8917 }, { "epoch": 0.2877290481558183, "grad_norm": 0.5, "learning_rate": 2.5173109523756757e-05, "loss": 1.875, "step": 8918 }, { "epoch": 0.28776131200961463, "grad_norm": 0.52734375, "learning_rate": 2.5171952976886325e-05, "loss": 1.8807, "step": 8919 }, { "epoch": 0.287793575863411, "grad_norm": 0.462890625, "learning_rate": 2.5170796318050327e-05, "loss": 1.8691, "step": 8920 }, { "epoch": 0.2878258397172073, "grad_norm": 0.49609375, "learning_rate": 2.516963954726151e-05, "loss": 1.8718, "step": 8921 }, { "epoch": 0.28785810357100367, "grad_norm": 0.40234375, "learning_rate": 2.516848266453259e-05, "loss": 1.8906, "step": 8922 }, { "epoch": 0.2878903674248, "grad_norm": 0.412109375, "learning_rate": 2.5167325669876305e-05, "loss": 1.8588, "step": 8923 }, { "epoch": 0.28792263127859635, "grad_norm": 0.41015625, "learning_rate": 2.51661685633054e-05, "loss": 1.8838, "step": 8924 }, { "epoch": 0.2879548951323927, "grad_norm": 0.373046875, "learning_rate": 2.51650113448326e-05, "loss": 1.8279, "step": 8925 }, { "epoch": 0.28798715898618904, "grad_norm": 0.38671875, "learning_rate": 2.516385401447065e-05, "loss": 1.8657, "step": 8926 }, { "epoch": 0.2880194228399854, "grad_norm": 0.380859375, "learning_rate": 2.5162696572232282e-05, "loss": 1.831, "step": 8927 }, { "epoch": 0.2880516866937817, "grad_norm": 0.365234375, "learning_rate": 2.5161539018130245e-05, "loss": 1.8712, "step": 8928 }, { "epoch": 0.28808395054757807, "grad_norm": 0.404296875, "learning_rate": 2.5160381352177277e-05, "loss": 1.887, "step": 8929 }, { "epoch": 0.2881162144013744, "grad_norm": 0.40625, "learning_rate": 2.5159223574386117e-05, "loss": 1.9097, "step": 8930 }, { "epoch": 0.28814847825517076, "grad_norm": 0.408203125, "learning_rate": 2.5158065684769512e-05, "loss": 1.8974, "step": 8931 }, { "epoch": 0.28818074210896716, "grad_norm": 0.40625, "learning_rate": 2.515690768334021e-05, "loss": 1.9115, "step": 8932 }, { "epoch": 0.2882130059627635, "grad_norm": 0.4765625, "learning_rate": 2.5155749570110956e-05, "loss": 1.9173, "step": 8933 }, { "epoch": 0.28824526981655985, "grad_norm": 0.404296875, "learning_rate": 2.5154591345094494e-05, "loss": 1.9077, "step": 8934 }, { "epoch": 0.2882775336703562, "grad_norm": 0.65625, "learning_rate": 2.5153433008303575e-05, "loss": 2.0313, "step": 8935 }, { "epoch": 0.28830979752415253, "grad_norm": 0.578125, "learning_rate": 2.515227455975095e-05, "loss": 2.014, "step": 8936 }, { "epoch": 0.2883420613779489, "grad_norm": 0.5234375, "learning_rate": 2.515111599944937e-05, "loss": 2.0006, "step": 8937 }, { "epoch": 0.2883743252317452, "grad_norm": 0.55859375, "learning_rate": 2.5149957327411587e-05, "loss": 2.0224, "step": 8938 }, { "epoch": 0.28840658908554156, "grad_norm": 0.5625, "learning_rate": 2.5148798543650354e-05, "loss": 2.04, "step": 8939 }, { "epoch": 0.2884388529393379, "grad_norm": 0.58984375, "learning_rate": 2.5147639648178427e-05, "loss": 2.0364, "step": 8940 }, { "epoch": 0.28847111679313425, "grad_norm": 0.4453125, "learning_rate": 2.514648064100857e-05, "loss": 2.0192, "step": 8941 }, { "epoch": 0.2885033806469306, "grad_norm": 0.5625, "learning_rate": 2.5145321522153523e-05, "loss": 2.007, "step": 8942 }, { "epoch": 0.28853564450072694, "grad_norm": 0.546875, "learning_rate": 2.514416229162606e-05, "loss": 2.0011, "step": 8943 }, { "epoch": 0.2885679083545233, "grad_norm": 0.45703125, "learning_rate": 2.5143002949438934e-05, "loss": 2.0486, "step": 8944 }, { "epoch": 0.2886001722083196, "grad_norm": 0.5, "learning_rate": 2.514184349560491e-05, "loss": 2.0225, "step": 8945 }, { "epoch": 0.28863243606211597, "grad_norm": 0.455078125, "learning_rate": 2.5140683930136747e-05, "loss": 2.0198, "step": 8946 }, { "epoch": 0.2886646999159123, "grad_norm": 0.443359375, "learning_rate": 2.5139524253047218e-05, "loss": 2.0088, "step": 8947 }, { "epoch": 0.28869696376970866, "grad_norm": 0.50390625, "learning_rate": 2.513836446434907e-05, "loss": 2.0253, "step": 8948 }, { "epoch": 0.288729227623505, "grad_norm": 0.423828125, "learning_rate": 2.513720456405508e-05, "loss": 1.9831, "step": 8949 }, { "epoch": 0.28876149147730135, "grad_norm": 0.46484375, "learning_rate": 2.513604455217802e-05, "loss": 2.0416, "step": 8950 }, { "epoch": 0.2887937553310977, "grad_norm": 0.408203125, "learning_rate": 2.5134884428730648e-05, "loss": 1.9832, "step": 8951 }, { "epoch": 0.2888260191848941, "grad_norm": 0.4140625, "learning_rate": 2.5133724193725744e-05, "loss": 1.9978, "step": 8952 }, { "epoch": 0.28885828303869043, "grad_norm": 0.400390625, "learning_rate": 2.513256384717607e-05, "loss": 2.0154, "step": 8953 }, { "epoch": 0.2888905468924868, "grad_norm": 0.4296875, "learning_rate": 2.5131403389094406e-05, "loss": 1.9985, "step": 8954 }, { "epoch": 0.2889228107462831, "grad_norm": 0.4375, "learning_rate": 2.5130242819493518e-05, "loss": 2.0195, "step": 8955 }, { "epoch": 0.28895507460007946, "grad_norm": 0.42578125, "learning_rate": 2.5129082138386183e-05, "loss": 1.9934, "step": 8956 }, { "epoch": 0.2889873384538758, "grad_norm": 0.416015625, "learning_rate": 2.5127921345785183e-05, "loss": 1.9771, "step": 8957 }, { "epoch": 0.28901960230767215, "grad_norm": 0.404296875, "learning_rate": 2.5126760441703286e-05, "loss": 1.976, "step": 8958 }, { "epoch": 0.2890518661614685, "grad_norm": 0.42578125, "learning_rate": 2.512559942615328e-05, "loss": 2.0124, "step": 8959 }, { "epoch": 0.28908413001526484, "grad_norm": 0.419921875, "learning_rate": 2.5124438299147934e-05, "loss": 2.008, "step": 8960 }, { "epoch": 0.2891163938690612, "grad_norm": 0.4296875, "learning_rate": 2.5123277060700042e-05, "loss": 1.9957, "step": 8961 }, { "epoch": 0.2891486577228575, "grad_norm": 0.380859375, "learning_rate": 2.5122115710822367e-05, "loss": 1.9779, "step": 8962 }, { "epoch": 0.28918092157665387, "grad_norm": 0.423828125, "learning_rate": 2.5120954249527713e-05, "loss": 1.9958, "step": 8963 }, { "epoch": 0.2892131854304502, "grad_norm": 0.37890625, "learning_rate": 2.5119792676828854e-05, "loss": 1.9807, "step": 8964 }, { "epoch": 0.28924544928424656, "grad_norm": 0.41796875, "learning_rate": 2.5118630992738578e-05, "loss": 2.0226, "step": 8965 }, { "epoch": 0.2892777131380429, "grad_norm": 0.36328125, "learning_rate": 2.511746919726967e-05, "loss": 2.0108, "step": 8966 }, { "epoch": 0.28930997699183925, "grad_norm": 0.412109375, "learning_rate": 2.5116307290434917e-05, "loss": 2.0316, "step": 8967 }, { "epoch": 0.2893422408456356, "grad_norm": 0.3671875, "learning_rate": 2.5115145272247115e-05, "loss": 2.0026, "step": 8968 }, { "epoch": 0.28937450469943193, "grad_norm": 0.44921875, "learning_rate": 2.511398314271905e-05, "loss": 1.9562, "step": 8969 }, { "epoch": 0.2894067685532283, "grad_norm": 0.408203125, "learning_rate": 2.511282090186351e-05, "loss": 2.0033, "step": 8970 }, { "epoch": 0.2894390324070246, "grad_norm": 0.396484375, "learning_rate": 2.5111658549693294e-05, "loss": 2.0271, "step": 8971 }, { "epoch": 0.289471296260821, "grad_norm": 0.41796875, "learning_rate": 2.5110496086221197e-05, "loss": 2.0051, "step": 8972 }, { "epoch": 0.28950356011461736, "grad_norm": 0.3984375, "learning_rate": 2.510933351146001e-05, "loss": 2.0051, "step": 8973 }, { "epoch": 0.2895358239684137, "grad_norm": 0.478515625, "learning_rate": 2.510817082542253e-05, "loss": 2.039, "step": 8974 }, { "epoch": 0.28956808782221005, "grad_norm": 0.4296875, "learning_rate": 2.5107008028121562e-05, "loss": 2.0376, "step": 8975 }, { "epoch": 0.2896003516760064, "grad_norm": 0.38671875, "learning_rate": 2.5105845119569897e-05, "loss": 2.0242, "step": 8976 }, { "epoch": 0.28963261552980274, "grad_norm": 0.490234375, "learning_rate": 2.510468209978034e-05, "loss": 2.0216, "step": 8977 }, { "epoch": 0.2896648793835991, "grad_norm": 0.4765625, "learning_rate": 2.5103518968765694e-05, "loss": 2.0242, "step": 8978 }, { "epoch": 0.2896971432373954, "grad_norm": 0.376953125, "learning_rate": 2.5102355726538755e-05, "loss": 2.0323, "step": 8979 }, { "epoch": 0.28972940709119177, "grad_norm": 0.466796875, "learning_rate": 2.5101192373112332e-05, "loss": 2.0178, "step": 8980 }, { "epoch": 0.2897616709449881, "grad_norm": 0.41015625, "learning_rate": 2.5100028908499235e-05, "loss": 2.0241, "step": 8981 }, { "epoch": 0.28979393479878446, "grad_norm": 0.39453125, "learning_rate": 2.509886533271226e-05, "loss": 1.9799, "step": 8982 }, { "epoch": 0.2898261986525808, "grad_norm": 0.408203125, "learning_rate": 2.5097701645764226e-05, "loss": 2.0058, "step": 8983 }, { "epoch": 0.28985846250637715, "grad_norm": 0.388671875, "learning_rate": 2.509653784766793e-05, "loss": 2.0146, "step": 8984 }, { "epoch": 0.2898907263601735, "grad_norm": 0.453125, "learning_rate": 2.509537393843619e-05, "loss": 2.0084, "step": 8985 }, { "epoch": 0.28992299021396983, "grad_norm": 0.46875, "learning_rate": 2.509420991808182e-05, "loss": 2.0285, "step": 8986 }, { "epoch": 0.2899552540677662, "grad_norm": 0.396484375, "learning_rate": 2.5093045786617626e-05, "loss": 2.0193, "step": 8987 }, { "epoch": 0.2899875179215625, "grad_norm": 0.3984375, "learning_rate": 2.5091881544056425e-05, "loss": 2.015, "step": 8988 }, { "epoch": 0.29001978177535886, "grad_norm": 0.41015625, "learning_rate": 2.509071719041103e-05, "loss": 2.028, "step": 8989 }, { "epoch": 0.2900520456291552, "grad_norm": 0.390625, "learning_rate": 2.508955272569426e-05, "loss": 2.0254, "step": 8990 }, { "epoch": 0.29008430948295155, "grad_norm": 0.41015625, "learning_rate": 2.5088388149918935e-05, "loss": 2.0221, "step": 8991 }, { "epoch": 0.29011657333674795, "grad_norm": 0.396484375, "learning_rate": 2.5087223463097868e-05, "loss": 1.9873, "step": 8992 }, { "epoch": 0.2901488371905443, "grad_norm": 0.4140625, "learning_rate": 2.5086058665243883e-05, "loss": 1.9849, "step": 8993 }, { "epoch": 0.29018110104434064, "grad_norm": 0.392578125, "learning_rate": 2.5084893756369802e-05, "loss": 2.0226, "step": 8994 }, { "epoch": 0.290213364898137, "grad_norm": 0.404296875, "learning_rate": 2.5083728736488444e-05, "loss": 2.0281, "step": 8995 }, { "epoch": 0.2902456287519333, "grad_norm": 0.4375, "learning_rate": 2.508256360561263e-05, "loss": 2.0267, "step": 8996 }, { "epoch": 0.29027789260572967, "grad_norm": 0.421875, "learning_rate": 2.5081398363755194e-05, "loss": 2.0008, "step": 8997 }, { "epoch": 0.290310156459526, "grad_norm": 0.390625, "learning_rate": 2.5080233010928952e-05, "loss": 2.033, "step": 8998 }, { "epoch": 0.29034242031332236, "grad_norm": 0.392578125, "learning_rate": 2.5079067547146738e-05, "loss": 2.0142, "step": 8999 }, { "epoch": 0.2903746841671187, "grad_norm": 0.404296875, "learning_rate": 2.507790197242138e-05, "loss": 2.0054, "step": 9000 }, { "epoch": 0.29040694802091505, "grad_norm": 0.376953125, "learning_rate": 2.5076736286765706e-05, "loss": 1.9943, "step": 9001 }, { "epoch": 0.2904392118747114, "grad_norm": 0.408203125, "learning_rate": 2.507557049019255e-05, "loss": 2.0079, "step": 9002 }, { "epoch": 0.29047147572850773, "grad_norm": 0.427734375, "learning_rate": 2.507440458271474e-05, "loss": 2.0017, "step": 9003 }, { "epoch": 0.2905037395823041, "grad_norm": 0.396484375, "learning_rate": 2.5073238564345116e-05, "loss": 2.0174, "step": 9004 }, { "epoch": 0.2905360034361004, "grad_norm": 0.416015625, "learning_rate": 2.5072072435096505e-05, "loss": 2.0046, "step": 9005 }, { "epoch": 0.29056826728989676, "grad_norm": 0.421875, "learning_rate": 2.5070906194981747e-05, "loss": 2.0291, "step": 9006 }, { "epoch": 0.2906005311436931, "grad_norm": 0.390625, "learning_rate": 2.506973984401368e-05, "loss": 2.0248, "step": 9007 }, { "epoch": 0.29063279499748945, "grad_norm": 0.4140625, "learning_rate": 2.5068573382205137e-05, "loss": 2.0367, "step": 9008 }, { "epoch": 0.2906650588512858, "grad_norm": 0.419921875, "learning_rate": 2.5067406809568964e-05, "loss": 1.9958, "step": 9009 }, { "epoch": 0.29069732270508214, "grad_norm": 0.41796875, "learning_rate": 2.5066240126118e-05, "loss": 2.0065, "step": 9010 }, { "epoch": 0.2907295865588785, "grad_norm": 0.3984375, "learning_rate": 2.5065073331865084e-05, "loss": 1.9901, "step": 9011 }, { "epoch": 0.2907618504126749, "grad_norm": 0.51953125, "learning_rate": 2.5063906426823065e-05, "loss": 2.0177, "step": 9012 }, { "epoch": 0.2907941142664712, "grad_norm": 0.5390625, "learning_rate": 2.506273941100478e-05, "loss": 2.0229, "step": 9013 }, { "epoch": 0.29082637812026757, "grad_norm": 0.443359375, "learning_rate": 2.506157228442308e-05, "loss": 2.041, "step": 9014 }, { "epoch": 0.2908586419740639, "grad_norm": 0.44921875, "learning_rate": 2.506040504709081e-05, "loss": 2.0408, "step": 9015 }, { "epoch": 0.29089090582786026, "grad_norm": 0.4375, "learning_rate": 2.5059237699020818e-05, "loss": 2.0148, "step": 9016 }, { "epoch": 0.2909231696816566, "grad_norm": 0.40625, "learning_rate": 2.5058070240225955e-05, "loss": 2.0165, "step": 9017 }, { "epoch": 0.29095543353545295, "grad_norm": 0.427734375, "learning_rate": 2.505690267071907e-05, "loss": 2.0151, "step": 9018 }, { "epoch": 0.2909876973892493, "grad_norm": 0.4375, "learning_rate": 2.5055734990513015e-05, "loss": 2.029, "step": 9019 }, { "epoch": 0.29101996124304563, "grad_norm": 0.380859375, "learning_rate": 2.5054567199620643e-05, "loss": 2.0334, "step": 9020 }, { "epoch": 0.291052225096842, "grad_norm": 0.41015625, "learning_rate": 2.505339929805481e-05, "loss": 1.9966, "step": 9021 }, { "epoch": 0.2910844889506383, "grad_norm": 0.373046875, "learning_rate": 2.505223128582837e-05, "loss": 2.0155, "step": 9022 }, { "epoch": 0.29111675280443466, "grad_norm": 0.384765625, "learning_rate": 2.5051063162954177e-05, "loss": 2.0108, "step": 9023 }, { "epoch": 0.291149016658231, "grad_norm": 0.36328125, "learning_rate": 2.5049894929445088e-05, "loss": 1.9964, "step": 9024 }, { "epoch": 0.29118128051202735, "grad_norm": 0.373046875, "learning_rate": 2.5048726585313974e-05, "loss": 2.0151, "step": 9025 }, { "epoch": 0.2912135443658237, "grad_norm": 0.369140625, "learning_rate": 2.5047558130573682e-05, "loss": 2.0085, "step": 9026 }, { "epoch": 0.29124580821962004, "grad_norm": 0.427734375, "learning_rate": 2.5046389565237074e-05, "loss": 2.0033, "step": 9027 }, { "epoch": 0.2912780720734164, "grad_norm": 0.49609375, "learning_rate": 2.5045220889317025e-05, "loss": 2.0154, "step": 9028 }, { "epoch": 0.2913103359272127, "grad_norm": 0.49609375, "learning_rate": 2.5044052102826386e-05, "loss": 2.0123, "step": 9029 }, { "epoch": 0.29134259978100907, "grad_norm": 0.578125, "learning_rate": 2.504288320577803e-05, "loss": 1.9683, "step": 9030 }, { "epoch": 0.2913748636348054, "grad_norm": 0.474609375, "learning_rate": 2.504171419818482e-05, "loss": 1.9698, "step": 9031 }, { "epoch": 0.29140712748860176, "grad_norm": 0.55859375, "learning_rate": 2.504054508005962e-05, "loss": 1.9821, "step": 9032 }, { "epoch": 0.29143939134239816, "grad_norm": 0.5078125, "learning_rate": 2.5039375851415304e-05, "loss": 1.9884, "step": 9033 }, { "epoch": 0.2914716551961945, "grad_norm": 0.45703125, "learning_rate": 2.5038206512264744e-05, "loss": 1.9781, "step": 9034 }, { "epoch": 0.29150391904999085, "grad_norm": 0.46875, "learning_rate": 2.503703706262081e-05, "loss": 2.0384, "step": 9035 }, { "epoch": 0.2915361829037872, "grad_norm": 0.44921875, "learning_rate": 2.5035867502496365e-05, "loss": 2.0308, "step": 9036 }, { "epoch": 0.29156844675758353, "grad_norm": 0.46484375, "learning_rate": 2.5034697831904296e-05, "loss": 2.027, "step": 9037 }, { "epoch": 0.2916007106113799, "grad_norm": 0.53515625, "learning_rate": 2.503352805085747e-05, "loss": 2.0072, "step": 9038 }, { "epoch": 0.2916329744651762, "grad_norm": 0.462890625, "learning_rate": 2.5032358159368764e-05, "loss": 1.9951, "step": 9039 }, { "epoch": 0.29166523831897256, "grad_norm": 0.431640625, "learning_rate": 2.503118815745106e-05, "loss": 2.0226, "step": 9040 }, { "epoch": 0.2916975021727689, "grad_norm": 0.44921875, "learning_rate": 2.5030018045117235e-05, "loss": 1.9896, "step": 9041 }, { "epoch": 0.29172976602656525, "grad_norm": 0.47265625, "learning_rate": 2.5028847822380164e-05, "loss": 1.9595, "step": 9042 }, { "epoch": 0.2917620298803616, "grad_norm": 0.396484375, "learning_rate": 2.5027677489252727e-05, "loss": 1.9633, "step": 9043 }, { "epoch": 0.29179429373415794, "grad_norm": 0.453125, "learning_rate": 2.502650704574781e-05, "loss": 1.9858, "step": 9044 }, { "epoch": 0.2918265575879543, "grad_norm": 0.416015625, "learning_rate": 2.5025336491878305e-05, "loss": 1.909, "step": 9045 }, { "epoch": 0.2918588214417506, "grad_norm": 0.43359375, "learning_rate": 2.5024165827657083e-05, "loss": 1.9655, "step": 9046 }, { "epoch": 0.29189108529554697, "grad_norm": 0.447265625, "learning_rate": 2.502299505309703e-05, "loss": 1.9387, "step": 9047 }, { "epoch": 0.2919233491493433, "grad_norm": 0.4296875, "learning_rate": 2.5021824168211044e-05, "loss": 1.9374, "step": 9048 }, { "epoch": 0.29195561300313966, "grad_norm": 0.3984375, "learning_rate": 2.5020653173012006e-05, "loss": 1.9832, "step": 9049 }, { "epoch": 0.291987876856936, "grad_norm": 0.404296875, "learning_rate": 2.5019482067512806e-05, "loss": 2.0012, "step": 9050 }, { "epoch": 0.29202014071073235, "grad_norm": 0.4140625, "learning_rate": 2.5018310851726334e-05, "loss": 2.0319, "step": 9051 }, { "epoch": 0.2920524045645287, "grad_norm": 0.39453125, "learning_rate": 2.5017139525665484e-05, "loss": 1.955, "step": 9052 }, { "epoch": 0.2920846684183251, "grad_norm": 0.3828125, "learning_rate": 2.501596808934315e-05, "loss": 1.9779, "step": 9053 }, { "epoch": 0.29211693227212143, "grad_norm": 0.390625, "learning_rate": 2.5014796542772223e-05, "loss": 2.017, "step": 9054 }, { "epoch": 0.2921491961259178, "grad_norm": 0.380859375, "learning_rate": 2.5013624885965603e-05, "loss": 2.0073, "step": 9055 }, { "epoch": 0.2921814599797141, "grad_norm": 0.376953125, "learning_rate": 2.501245311893618e-05, "loss": 1.9884, "step": 9056 }, { "epoch": 0.29221372383351046, "grad_norm": 0.369140625, "learning_rate": 2.501128124169686e-05, "loss": 1.9733, "step": 9057 }, { "epoch": 0.2922459876873068, "grad_norm": 0.392578125, "learning_rate": 2.5010109254260535e-05, "loss": 2.0053, "step": 9058 }, { "epoch": 0.29227825154110315, "grad_norm": 0.353515625, "learning_rate": 2.500893715664011e-05, "loss": 1.978, "step": 9059 }, { "epoch": 0.2923105153948995, "grad_norm": 0.37890625, "learning_rate": 2.500776494884849e-05, "loss": 1.9921, "step": 9060 }, { "epoch": 0.29234277924869584, "grad_norm": 0.375, "learning_rate": 2.5006592630898562e-05, "loss": 2.0217, "step": 9061 }, { "epoch": 0.2923750431024922, "grad_norm": 0.375, "learning_rate": 2.500542020280325e-05, "loss": 1.961, "step": 9062 }, { "epoch": 0.2924073069562885, "grad_norm": 0.44140625, "learning_rate": 2.5004247664575446e-05, "loss": 1.9705, "step": 9063 }, { "epoch": 0.29243957081008487, "grad_norm": 0.51953125, "learning_rate": 2.500307501622806e-05, "loss": 2.0277, "step": 9064 }, { "epoch": 0.2924718346638812, "grad_norm": 0.490234375, "learning_rate": 2.5001902257774004e-05, "loss": 1.9727, "step": 9065 }, { "epoch": 0.29250409851767756, "grad_norm": 0.408203125, "learning_rate": 2.5000729389226183e-05, "loss": 1.9816, "step": 9066 }, { "epoch": 0.2925363623714739, "grad_norm": 0.44921875, "learning_rate": 2.49995564105975e-05, "loss": 1.9974, "step": 9067 }, { "epoch": 0.29256862622527025, "grad_norm": 0.4375, "learning_rate": 2.4998383321900887e-05, "loss": 1.9926, "step": 9068 }, { "epoch": 0.2926008900790666, "grad_norm": 0.380859375, "learning_rate": 2.499721012314923e-05, "loss": 1.9795, "step": 9069 }, { "epoch": 0.29263315393286293, "grad_norm": 0.4296875, "learning_rate": 2.4996036814355464e-05, "loss": 2.0004, "step": 9070 }, { "epoch": 0.2926654177866593, "grad_norm": 0.37890625, "learning_rate": 2.4994863395532496e-05, "loss": 1.9801, "step": 9071 }, { "epoch": 0.2926976816404556, "grad_norm": 0.37890625, "learning_rate": 2.499368986669324e-05, "loss": 1.9885, "step": 9072 }, { "epoch": 0.292729945494252, "grad_norm": 0.38671875, "learning_rate": 2.499251622785061e-05, "loss": 1.9521, "step": 9073 }, { "epoch": 0.29276220934804836, "grad_norm": 0.3671875, "learning_rate": 2.499134247901754e-05, "loss": 2.0241, "step": 9074 }, { "epoch": 0.2927944732018447, "grad_norm": 0.376953125, "learning_rate": 2.4990168620206932e-05, "loss": 1.9745, "step": 9075 }, { "epoch": 0.29282673705564105, "grad_norm": 0.37890625, "learning_rate": 2.498899465143172e-05, "loss": 1.9714, "step": 9076 }, { "epoch": 0.2928590009094374, "grad_norm": 0.421875, "learning_rate": 2.4987820572704816e-05, "loss": 1.9928, "step": 9077 }, { "epoch": 0.29289126476323374, "grad_norm": 0.40625, "learning_rate": 2.4986646384039153e-05, "loss": 1.971, "step": 9078 }, { "epoch": 0.2929235286170301, "grad_norm": 0.400390625, "learning_rate": 2.4985472085447647e-05, "loss": 2.0121, "step": 9079 }, { "epoch": 0.2929557924708264, "grad_norm": 0.439453125, "learning_rate": 2.498429767694323e-05, "loss": 1.9875, "step": 9080 }, { "epoch": 0.29298805632462277, "grad_norm": 0.455078125, "learning_rate": 2.498312315853883e-05, "loss": 1.9494, "step": 9081 }, { "epoch": 0.2930203201784191, "grad_norm": 0.4921875, "learning_rate": 2.498194853024737e-05, "loss": 1.9454, "step": 9082 }, { "epoch": 0.29305258403221546, "grad_norm": 0.427734375, "learning_rate": 2.4980773792081776e-05, "loss": 1.992, "step": 9083 }, { "epoch": 0.2930848478860118, "grad_norm": 0.458984375, "learning_rate": 2.4979598944054986e-05, "loss": 1.9812, "step": 9084 }, { "epoch": 0.29311711173980814, "grad_norm": 0.4296875, "learning_rate": 2.497842398617994e-05, "loss": 1.9584, "step": 9085 }, { "epoch": 0.2931493755936045, "grad_norm": 0.44140625, "learning_rate": 2.497724891846955e-05, "loss": 1.9744, "step": 9086 }, { "epoch": 0.29318163944740083, "grad_norm": 0.43359375, "learning_rate": 2.4976073740936764e-05, "loss": 1.9957, "step": 9087 }, { "epoch": 0.2932139033011972, "grad_norm": 0.421875, "learning_rate": 2.4974898453594517e-05, "loss": 1.9843, "step": 9088 }, { "epoch": 0.2932461671549935, "grad_norm": 0.400390625, "learning_rate": 2.4973723056455743e-05, "loss": 1.9513, "step": 9089 }, { "epoch": 0.29327843100878986, "grad_norm": 0.39453125, "learning_rate": 2.497254754953338e-05, "loss": 1.9525, "step": 9090 }, { "epoch": 0.2933106948625862, "grad_norm": 0.3828125, "learning_rate": 2.497137193284037e-05, "loss": 1.9871, "step": 9091 }, { "epoch": 0.29334295871638255, "grad_norm": 0.388671875, "learning_rate": 2.497019620638965e-05, "loss": 1.9657, "step": 9092 }, { "epoch": 0.29337522257017895, "grad_norm": 0.396484375, "learning_rate": 2.4969020370194166e-05, "loss": 1.9664, "step": 9093 }, { "epoch": 0.2934074864239753, "grad_norm": 0.375, "learning_rate": 2.4967844424266852e-05, "loss": 1.9815, "step": 9094 }, { "epoch": 0.29343975027777164, "grad_norm": 0.416015625, "learning_rate": 2.496666836862066e-05, "loss": 1.9406, "step": 9095 }, { "epoch": 0.293472014131568, "grad_norm": 0.376953125, "learning_rate": 2.4965492203268532e-05, "loss": 1.9877, "step": 9096 }, { "epoch": 0.2935042779853643, "grad_norm": 0.37890625, "learning_rate": 2.496431592822342e-05, "loss": 2.0015, "step": 9097 }, { "epoch": 0.29353654183916067, "grad_norm": 0.408203125, "learning_rate": 2.496313954349826e-05, "loss": 2.0005, "step": 9098 }, { "epoch": 0.293568805692957, "grad_norm": 0.419921875, "learning_rate": 2.496196304910601e-05, "loss": 1.9502, "step": 9099 }, { "epoch": 0.29360106954675336, "grad_norm": 0.427734375, "learning_rate": 2.4960786445059617e-05, "loss": 1.9804, "step": 9100 }, { "epoch": 0.2936333334005497, "grad_norm": 0.423828125, "learning_rate": 2.495960973137203e-05, "loss": 1.9842, "step": 9101 }, { "epoch": 0.29366559725434604, "grad_norm": 0.51171875, "learning_rate": 2.495843290805621e-05, "loss": 2.0265, "step": 9102 }, { "epoch": 0.2936978611081424, "grad_norm": 0.5078125, "learning_rate": 2.49572559751251e-05, "loss": 2.0455, "step": 9103 }, { "epoch": 0.29373012496193873, "grad_norm": 0.498046875, "learning_rate": 2.495607893259166e-05, "loss": 1.9861, "step": 9104 }, { "epoch": 0.2937623888157351, "grad_norm": 0.4765625, "learning_rate": 2.495490178046885e-05, "loss": 1.9974, "step": 9105 }, { "epoch": 0.2937946526695314, "grad_norm": 0.455078125, "learning_rate": 2.4953724518769615e-05, "loss": 2.0045, "step": 9106 }, { "epoch": 0.29382691652332776, "grad_norm": 0.451171875, "learning_rate": 2.4952547147506926e-05, "loss": 1.9633, "step": 9107 }, { "epoch": 0.2938591803771241, "grad_norm": 0.423828125, "learning_rate": 2.495136966669374e-05, "loss": 1.9503, "step": 9108 }, { "epoch": 0.29389144423092045, "grad_norm": 0.44140625, "learning_rate": 2.495019207634301e-05, "loss": 1.8427, "step": 9109 }, { "epoch": 0.2939237080847168, "grad_norm": 0.44140625, "learning_rate": 2.494901437646771e-05, "loss": 1.8961, "step": 9110 }, { "epoch": 0.29395597193851314, "grad_norm": 0.46484375, "learning_rate": 2.4947836567080796e-05, "loss": 1.9942, "step": 9111 }, { "epoch": 0.2939882357923095, "grad_norm": 0.44921875, "learning_rate": 2.4946658648195227e-05, "loss": 2.0316, "step": 9112 }, { "epoch": 0.2940204996461058, "grad_norm": 0.4375, "learning_rate": 2.4945480619823985e-05, "loss": 2.0162, "step": 9113 }, { "epoch": 0.2940527634999022, "grad_norm": 0.474609375, "learning_rate": 2.4944302481980022e-05, "loss": 2.023, "step": 9114 }, { "epoch": 0.29408502735369857, "grad_norm": 0.443359375, "learning_rate": 2.494312423467631e-05, "loss": 2.0221, "step": 9115 }, { "epoch": 0.2941172912074949, "grad_norm": 0.5, "learning_rate": 2.4941945877925823e-05, "loss": 2.0207, "step": 9116 }, { "epoch": 0.29414955506129126, "grad_norm": 0.47265625, "learning_rate": 2.4940767411741526e-05, "loss": 1.9646, "step": 9117 }, { "epoch": 0.2941818189150876, "grad_norm": 0.4453125, "learning_rate": 2.4939588836136395e-05, "loss": 2.0049, "step": 9118 }, { "epoch": 0.29421408276888394, "grad_norm": 0.4140625, "learning_rate": 2.4938410151123396e-05, "loss": 1.9759, "step": 9119 }, { "epoch": 0.2942463466226803, "grad_norm": 0.46875, "learning_rate": 2.4937231356715512e-05, "loss": 1.9938, "step": 9120 }, { "epoch": 0.29427861047647663, "grad_norm": 0.416015625, "learning_rate": 2.4936052452925714e-05, "loss": 1.9894, "step": 9121 }, { "epoch": 0.294310874330273, "grad_norm": 0.396484375, "learning_rate": 2.4934873439766977e-05, "loss": 1.9955, "step": 9122 }, { "epoch": 0.2943431381840693, "grad_norm": 0.4296875, "learning_rate": 2.4933694317252282e-05, "loss": 1.9905, "step": 9123 }, { "epoch": 0.29437540203786566, "grad_norm": 0.4140625, "learning_rate": 2.4932515085394602e-05, "loss": 2.0181, "step": 9124 }, { "epoch": 0.294407665891662, "grad_norm": 0.404296875, "learning_rate": 2.4931335744206927e-05, "loss": 1.9873, "step": 9125 }, { "epoch": 0.29443992974545835, "grad_norm": 0.41015625, "learning_rate": 2.493015629370223e-05, "loss": 2.0128, "step": 9126 }, { "epoch": 0.2944721935992547, "grad_norm": 0.421875, "learning_rate": 2.49289767338935e-05, "loss": 2.0138, "step": 9127 }, { "epoch": 0.29450445745305104, "grad_norm": 0.375, "learning_rate": 2.4927797064793715e-05, "loss": 2.0255, "step": 9128 }, { "epoch": 0.2945367213068474, "grad_norm": 0.408203125, "learning_rate": 2.492661728641586e-05, "loss": 1.9861, "step": 9129 }, { "epoch": 0.2945689851606437, "grad_norm": 0.431640625, "learning_rate": 2.4925437398772925e-05, "loss": 1.982, "step": 9130 }, { "epoch": 0.29460124901444007, "grad_norm": 0.376953125, "learning_rate": 2.4924257401877896e-05, "loss": 2.0027, "step": 9131 }, { "epoch": 0.2946335128682364, "grad_norm": 0.408203125, "learning_rate": 2.4923077295743758e-05, "loss": 2.0047, "step": 9132 }, { "epoch": 0.29466577672203276, "grad_norm": 0.400390625, "learning_rate": 2.492189708038351e-05, "loss": 1.9905, "step": 9133 }, { "epoch": 0.29469804057582916, "grad_norm": 0.396484375, "learning_rate": 2.4920716755810132e-05, "loss": 1.9889, "step": 9134 }, { "epoch": 0.2947303044296255, "grad_norm": 0.4140625, "learning_rate": 2.4919536322036623e-05, "loss": 1.9791, "step": 9135 }, { "epoch": 0.29476256828342184, "grad_norm": 0.4453125, "learning_rate": 2.491835577907597e-05, "loss": 2.001, "step": 9136 }, { "epoch": 0.2947948321372182, "grad_norm": 0.376953125, "learning_rate": 2.4917175126941176e-05, "loss": 2.0069, "step": 9137 }, { "epoch": 0.29482709599101453, "grad_norm": 0.392578125, "learning_rate": 2.491599436564523e-05, "loss": 1.9504, "step": 9138 }, { "epoch": 0.2948593598448109, "grad_norm": 0.3671875, "learning_rate": 2.4914813495201136e-05, "loss": 1.9954, "step": 9139 }, { "epoch": 0.2948916236986072, "grad_norm": 0.39453125, "learning_rate": 2.4913632515621887e-05, "loss": 2.0061, "step": 9140 }, { "epoch": 0.29492388755240356, "grad_norm": 0.376953125, "learning_rate": 2.4912451426920484e-05, "loss": 1.9893, "step": 9141 }, { "epoch": 0.2949561514061999, "grad_norm": 0.404296875, "learning_rate": 2.4911270229109925e-05, "loss": 1.9994, "step": 9142 }, { "epoch": 0.29498841525999625, "grad_norm": 0.3828125, "learning_rate": 2.4910088922203213e-05, "loss": 2.021, "step": 9143 }, { "epoch": 0.2950206791137926, "grad_norm": 0.41015625, "learning_rate": 2.490890750621335e-05, "loss": 1.9897, "step": 9144 }, { "epoch": 0.29505294296758894, "grad_norm": 0.3828125, "learning_rate": 2.4907725981153347e-05, "loss": 1.9938, "step": 9145 }, { "epoch": 0.2950852068213853, "grad_norm": 0.39453125, "learning_rate": 2.49065443470362e-05, "loss": 2.0097, "step": 9146 }, { "epoch": 0.2951174706751816, "grad_norm": 0.404296875, "learning_rate": 2.490536260387492e-05, "loss": 1.9759, "step": 9147 }, { "epoch": 0.29514973452897797, "grad_norm": 0.37109375, "learning_rate": 2.4904180751682516e-05, "loss": 2.0045, "step": 9148 }, { "epoch": 0.2951819983827743, "grad_norm": 0.390625, "learning_rate": 2.4902998790471997e-05, "loss": 2.0044, "step": 9149 }, { "epoch": 0.29521426223657066, "grad_norm": 0.373046875, "learning_rate": 2.490181672025637e-05, "loss": 1.9939, "step": 9150 }, { "epoch": 0.295246526090367, "grad_norm": 0.376953125, "learning_rate": 2.4900634541048648e-05, "loss": 1.9653, "step": 9151 }, { "epoch": 0.29527878994416334, "grad_norm": 0.37890625, "learning_rate": 2.4899452252861842e-05, "loss": 1.9615, "step": 9152 }, { "epoch": 0.2953110537979597, "grad_norm": 0.365234375, "learning_rate": 2.4898269855708967e-05, "loss": 1.973, "step": 9153 }, { "epoch": 0.2953433176517561, "grad_norm": 0.3828125, "learning_rate": 2.4897087349603047e-05, "loss": 1.9483, "step": 9154 }, { "epoch": 0.29537558150555243, "grad_norm": 0.384765625, "learning_rate": 2.489590473455708e-05, "loss": 1.9977, "step": 9155 }, { "epoch": 0.2954078453593488, "grad_norm": 0.373046875, "learning_rate": 2.4894722010584098e-05, "loss": 1.9934, "step": 9156 }, { "epoch": 0.2954401092131451, "grad_norm": 0.359375, "learning_rate": 2.4893539177697112e-05, "loss": 1.9748, "step": 9157 }, { "epoch": 0.29547237306694146, "grad_norm": 0.365234375, "learning_rate": 2.489235623590914e-05, "loss": 2.0085, "step": 9158 }, { "epoch": 0.2955046369207378, "grad_norm": 0.359375, "learning_rate": 2.4891173185233216e-05, "loss": 1.9826, "step": 9159 }, { "epoch": 0.29553690077453415, "grad_norm": 0.375, "learning_rate": 2.4889990025682347e-05, "loss": 2.0316, "step": 9160 }, { "epoch": 0.2955691646283305, "grad_norm": 0.38671875, "learning_rate": 2.4888806757269567e-05, "loss": 2.0246, "step": 9161 }, { "epoch": 0.29560142848212684, "grad_norm": 0.380859375, "learning_rate": 2.48876233800079e-05, "loss": 1.9671, "step": 9162 }, { "epoch": 0.2956336923359232, "grad_norm": 0.34765625, "learning_rate": 2.488643989391036e-05, "loss": 1.9818, "step": 9163 }, { "epoch": 0.2956659561897195, "grad_norm": 0.39453125, "learning_rate": 2.4885256298989983e-05, "loss": 1.9997, "step": 9164 }, { "epoch": 0.29569822004351587, "grad_norm": 0.396484375, "learning_rate": 2.48840725952598e-05, "loss": 1.9938, "step": 9165 }, { "epoch": 0.2957304838973122, "grad_norm": 0.37890625, "learning_rate": 2.4882888782732836e-05, "loss": 2.0234, "step": 9166 }, { "epoch": 0.29576274775110856, "grad_norm": 0.359375, "learning_rate": 2.488170486142212e-05, "loss": 1.9884, "step": 9167 }, { "epoch": 0.2957950116049049, "grad_norm": 0.3515625, "learning_rate": 2.4880520831340688e-05, "loss": 1.9711, "step": 9168 }, { "epoch": 0.29582727545870124, "grad_norm": 0.369140625, "learning_rate": 2.487933669250157e-05, "loss": 2.0041, "step": 9169 }, { "epoch": 0.2958595393124976, "grad_norm": 0.3671875, "learning_rate": 2.48781524449178e-05, "loss": 2.0127, "step": 9170 }, { "epoch": 0.29589180316629393, "grad_norm": 0.3515625, "learning_rate": 2.487696808860242e-05, "loss": 1.9776, "step": 9171 }, { "epoch": 0.2959240670200903, "grad_norm": 0.37890625, "learning_rate": 2.4875783623568454e-05, "loss": 1.9857, "step": 9172 }, { "epoch": 0.2959563308738866, "grad_norm": 0.392578125, "learning_rate": 2.487459904982895e-05, "loss": 1.9831, "step": 9173 }, { "epoch": 0.295988594727683, "grad_norm": 0.41015625, "learning_rate": 2.487341436739694e-05, "loss": 1.9958, "step": 9174 }, { "epoch": 0.29602085858147936, "grad_norm": 0.3984375, "learning_rate": 2.4872229576285474e-05, "loss": 1.9851, "step": 9175 }, { "epoch": 0.2960531224352757, "grad_norm": 0.359375, "learning_rate": 2.4871044676507586e-05, "loss": 2.0085, "step": 9176 }, { "epoch": 0.29608538628907205, "grad_norm": 0.3828125, "learning_rate": 2.4869859668076317e-05, "loss": 2.005, "step": 9177 }, { "epoch": 0.2961176501428684, "grad_norm": 0.431640625, "learning_rate": 2.4868674551004714e-05, "loss": 1.9719, "step": 9178 }, { "epoch": 0.29614991399666474, "grad_norm": 0.40625, "learning_rate": 2.486748932530582e-05, "loss": 1.9787, "step": 9179 }, { "epoch": 0.2961821778504611, "grad_norm": 0.37109375, "learning_rate": 2.4866303990992686e-05, "loss": 1.9662, "step": 9180 }, { "epoch": 0.2962144417042574, "grad_norm": 0.396484375, "learning_rate": 2.4865118548078353e-05, "loss": 1.9988, "step": 9181 }, { "epoch": 0.29624670555805377, "grad_norm": 0.39453125, "learning_rate": 2.4863932996575874e-05, "loss": 1.9702, "step": 9182 }, { "epoch": 0.2962789694118501, "grad_norm": 0.357421875, "learning_rate": 2.4862747336498296e-05, "loss": 1.9801, "step": 9183 }, { "epoch": 0.29631123326564646, "grad_norm": 0.375, "learning_rate": 2.486156156785867e-05, "loss": 1.9858, "step": 9184 }, { "epoch": 0.2963434971194428, "grad_norm": 0.419921875, "learning_rate": 2.4860375690670054e-05, "loss": 1.9719, "step": 9185 }, { "epoch": 0.29637576097323914, "grad_norm": 0.376953125, "learning_rate": 2.4859189704945485e-05, "loss": 2.0223, "step": 9186 }, { "epoch": 0.2964080248270355, "grad_norm": 0.380859375, "learning_rate": 2.4858003610698042e-05, "loss": 1.991, "step": 9187 }, { "epoch": 0.29644028868083183, "grad_norm": 0.443359375, "learning_rate": 2.485681740794076e-05, "loss": 1.9711, "step": 9188 }, { "epoch": 0.2964725525346282, "grad_norm": 0.39453125, "learning_rate": 2.4855631096686705e-05, "loss": 2.0084, "step": 9189 }, { "epoch": 0.2965048163884245, "grad_norm": 0.41015625, "learning_rate": 2.4854444676948932e-05, "loss": 1.9856, "step": 9190 }, { "epoch": 0.29653708024222086, "grad_norm": 0.416015625, "learning_rate": 2.48532581487405e-05, "loss": 2.0088, "step": 9191 }, { "epoch": 0.2965693440960172, "grad_norm": 0.400390625, "learning_rate": 2.485207151207447e-05, "loss": 1.9607, "step": 9192 }, { "epoch": 0.29660160794981355, "grad_norm": 0.388671875, "learning_rate": 2.4850884766963908e-05, "loss": 2.0059, "step": 9193 }, { "epoch": 0.2966338718036099, "grad_norm": 0.384765625, "learning_rate": 2.4849697913421877e-05, "loss": 1.99, "step": 9194 }, { "epoch": 0.2966661356574063, "grad_norm": 0.392578125, "learning_rate": 2.484851095146143e-05, "loss": 1.9899, "step": 9195 }, { "epoch": 0.29669839951120264, "grad_norm": 0.3984375, "learning_rate": 2.4847323881095643e-05, "loss": 2.0142, "step": 9196 }, { "epoch": 0.296730663364999, "grad_norm": 0.365234375, "learning_rate": 2.484613670233758e-05, "loss": 1.9509, "step": 9197 }, { "epoch": 0.2967629272187953, "grad_norm": 0.369140625, "learning_rate": 2.4844949415200308e-05, "loss": 1.9931, "step": 9198 }, { "epoch": 0.29679519107259167, "grad_norm": 0.35546875, "learning_rate": 2.4843762019696894e-05, "loss": 2.0078, "step": 9199 }, { "epoch": 0.296827454926388, "grad_norm": 0.359375, "learning_rate": 2.484257451584041e-05, "loss": 1.9761, "step": 9200 }, { "epoch": 0.29685971878018436, "grad_norm": 0.361328125, "learning_rate": 2.4841386903643927e-05, "loss": 1.9864, "step": 9201 }, { "epoch": 0.2968919826339807, "grad_norm": 0.404296875, "learning_rate": 2.484019918312052e-05, "loss": 2.0071, "step": 9202 }, { "epoch": 0.29692424648777704, "grad_norm": 0.458984375, "learning_rate": 2.4839011354283257e-05, "loss": 2.0041, "step": 9203 }, { "epoch": 0.2969565103415734, "grad_norm": 0.39453125, "learning_rate": 2.4837823417145215e-05, "loss": 2.001, "step": 9204 }, { "epoch": 0.29698877419536973, "grad_norm": 0.375, "learning_rate": 2.4836635371719475e-05, "loss": 1.9549, "step": 9205 }, { "epoch": 0.2970210380491661, "grad_norm": 0.388671875, "learning_rate": 2.48354472180191e-05, "loss": 1.9957, "step": 9206 }, { "epoch": 0.2970533019029624, "grad_norm": 0.455078125, "learning_rate": 2.4834258956057186e-05, "loss": 2.0016, "step": 9207 }, { "epoch": 0.29708556575675876, "grad_norm": 0.41796875, "learning_rate": 2.48330705858468e-05, "loss": 1.9893, "step": 9208 }, { "epoch": 0.2971178296105551, "grad_norm": 0.404296875, "learning_rate": 2.483188210740103e-05, "loss": 2.0047, "step": 9209 }, { "epoch": 0.29715009346435145, "grad_norm": 0.470703125, "learning_rate": 2.4830693520732952e-05, "loss": 2.0037, "step": 9210 }, { "epoch": 0.2971823573181478, "grad_norm": 0.44140625, "learning_rate": 2.4829504825855654e-05, "loss": 1.9853, "step": 9211 }, { "epoch": 0.29721462117194414, "grad_norm": 0.419921875, "learning_rate": 2.482831602278222e-05, "loss": 2.0319, "step": 9212 }, { "epoch": 0.2972468850257405, "grad_norm": 0.3984375, "learning_rate": 2.4827127111525737e-05, "loss": 1.9764, "step": 9213 }, { "epoch": 0.2972791488795368, "grad_norm": 0.40625, "learning_rate": 2.482593809209928e-05, "loss": 1.987, "step": 9214 }, { "epoch": 0.2973114127333332, "grad_norm": 0.4296875, "learning_rate": 2.482474896451595e-05, "loss": 1.9697, "step": 9215 }, { "epoch": 0.29734367658712957, "grad_norm": 0.423828125, "learning_rate": 2.4823559728788837e-05, "loss": 1.944, "step": 9216 }, { "epoch": 0.2973759404409259, "grad_norm": 0.43359375, "learning_rate": 2.4822370384931015e-05, "loss": 1.9658, "step": 9217 }, { "epoch": 0.29740820429472226, "grad_norm": 0.466796875, "learning_rate": 2.4821180932955597e-05, "loss": 1.9846, "step": 9218 }, { "epoch": 0.2974404681485186, "grad_norm": 0.416015625, "learning_rate": 2.481999137287566e-05, "loss": 1.9889, "step": 9219 }, { "epoch": 0.29747273200231494, "grad_norm": 0.8515625, "learning_rate": 2.48188017047043e-05, "loss": 2.068, "step": 9220 }, { "epoch": 0.2975049958561113, "grad_norm": 1.34375, "learning_rate": 2.481761192845462e-05, "loss": 2.1068, "step": 9221 }, { "epoch": 0.29753725970990763, "grad_norm": 0.6875, "learning_rate": 2.481642204413971e-05, "loss": 2.1312, "step": 9222 }, { "epoch": 0.297569523563704, "grad_norm": 0.6796875, "learning_rate": 2.4815232051772666e-05, "loss": 2.1388, "step": 9223 }, { "epoch": 0.2976017874175003, "grad_norm": 0.77734375, "learning_rate": 2.4814041951366592e-05, "loss": 2.1135, "step": 9224 }, { "epoch": 0.29763405127129666, "grad_norm": 0.73046875, "learning_rate": 2.481285174293458e-05, "loss": 2.092, "step": 9225 }, { "epoch": 0.297666315125093, "grad_norm": 0.52734375, "learning_rate": 2.481166142648974e-05, "loss": 2.149, "step": 9226 }, { "epoch": 0.29769857897888935, "grad_norm": 0.68359375, "learning_rate": 2.481047100204517e-05, "loss": 2.1296, "step": 9227 }, { "epoch": 0.2977308428326857, "grad_norm": 0.60546875, "learning_rate": 2.4809280469613974e-05, "loss": 2.1184, "step": 9228 }, { "epoch": 0.29776310668648204, "grad_norm": 0.51171875, "learning_rate": 2.480808982920925e-05, "loss": 2.0976, "step": 9229 }, { "epoch": 0.2977953705402784, "grad_norm": 0.55859375, "learning_rate": 2.4806899080844113e-05, "loss": 2.0912, "step": 9230 }, { "epoch": 0.2978276343940747, "grad_norm": 0.48046875, "learning_rate": 2.4805708224531667e-05, "loss": 2.1155, "step": 9231 }, { "epoch": 0.29785989824787107, "grad_norm": 0.53125, "learning_rate": 2.4804517260285018e-05, "loss": 2.0921, "step": 9232 }, { "epoch": 0.2978921621016674, "grad_norm": 0.484375, "learning_rate": 2.480332618811728e-05, "loss": 2.1369, "step": 9233 }, { "epoch": 0.29792442595546376, "grad_norm": 0.515625, "learning_rate": 2.4802135008041555e-05, "loss": 2.1426, "step": 9234 }, { "epoch": 0.29795668980926016, "grad_norm": 0.494140625, "learning_rate": 2.4800943720070965e-05, "loss": 2.1757, "step": 9235 }, { "epoch": 0.2979889536630565, "grad_norm": 0.462890625, "learning_rate": 2.4799752324218616e-05, "loss": 2.1145, "step": 9236 }, { "epoch": 0.29802121751685284, "grad_norm": 0.451171875, "learning_rate": 2.479856082049762e-05, "loss": 2.1599, "step": 9237 }, { "epoch": 0.2980534813706492, "grad_norm": 0.4765625, "learning_rate": 2.47973692089211e-05, "loss": 2.1106, "step": 9238 }, { "epoch": 0.29808574522444553, "grad_norm": 0.443359375, "learning_rate": 2.479617748950217e-05, "loss": 2.1381, "step": 9239 }, { "epoch": 0.2981180090782419, "grad_norm": 0.486328125, "learning_rate": 2.479498566225394e-05, "loss": 2.1452, "step": 9240 }, { "epoch": 0.2981502729320382, "grad_norm": 0.474609375, "learning_rate": 2.479379372718954e-05, "loss": 2.1434, "step": 9241 }, { "epoch": 0.29818253678583456, "grad_norm": 0.50390625, "learning_rate": 2.4792601684322082e-05, "loss": 2.1461, "step": 9242 }, { "epoch": 0.2982148006396309, "grad_norm": 0.443359375, "learning_rate": 2.4791409533664688e-05, "loss": 2.1137, "step": 9243 }, { "epoch": 0.29824706449342725, "grad_norm": 0.50390625, "learning_rate": 2.479021727523049e-05, "loss": 2.0891, "step": 9244 }, { "epoch": 0.2982793283472236, "grad_norm": 0.47265625, "learning_rate": 2.4789024909032596e-05, "loss": 2.1132, "step": 9245 }, { "epoch": 0.29831159220101994, "grad_norm": 0.45703125, "learning_rate": 2.478783243508414e-05, "loss": 2.1255, "step": 9246 }, { "epoch": 0.2983438560548163, "grad_norm": 0.578125, "learning_rate": 2.4786639853398248e-05, "loss": 2.1579, "step": 9247 }, { "epoch": 0.2983761199086126, "grad_norm": 0.625, "learning_rate": 2.4785447163988042e-05, "loss": 2.0932, "step": 9248 }, { "epoch": 0.29840838376240897, "grad_norm": 0.482421875, "learning_rate": 2.478425436686666e-05, "loss": 2.1177, "step": 9249 }, { "epoch": 0.2984406476162053, "grad_norm": 0.66015625, "learning_rate": 2.478306146204722e-05, "loss": 2.1237, "step": 9250 }, { "epoch": 0.29847291147000166, "grad_norm": 0.50390625, "learning_rate": 2.478186844954286e-05, "loss": 2.1051, "step": 9251 }, { "epoch": 0.298505175323798, "grad_norm": 0.59375, "learning_rate": 2.4780675329366708e-05, "loss": 2.1223, "step": 9252 }, { "epoch": 0.29853743917759434, "grad_norm": 0.49609375, "learning_rate": 2.4779482101531898e-05, "loss": 2.1291, "step": 9253 }, { "epoch": 0.2985697030313907, "grad_norm": 0.5859375, "learning_rate": 2.4778288766051566e-05, "loss": 2.1341, "step": 9254 }, { "epoch": 0.2986019668851871, "grad_norm": 0.45703125, "learning_rate": 2.4777095322938846e-05, "loss": 2.1026, "step": 9255 }, { "epoch": 0.29863423073898343, "grad_norm": 0.640625, "learning_rate": 2.4775901772206877e-05, "loss": 2.1549, "step": 9256 }, { "epoch": 0.2986664945927798, "grad_norm": 0.48046875, "learning_rate": 2.477470811386879e-05, "loss": 2.1618, "step": 9257 }, { "epoch": 0.2986987584465761, "grad_norm": 0.51171875, "learning_rate": 2.477351434793773e-05, "loss": 2.1298, "step": 9258 }, { "epoch": 0.29873102230037246, "grad_norm": 0.466796875, "learning_rate": 2.4772320474426838e-05, "loss": 2.1262, "step": 9259 }, { "epoch": 0.2987632861541688, "grad_norm": 0.49609375, "learning_rate": 2.4771126493349253e-05, "loss": 2.1272, "step": 9260 }, { "epoch": 0.29879555000796515, "grad_norm": 0.494140625, "learning_rate": 2.4769932404718118e-05, "loss": 2.1276, "step": 9261 }, { "epoch": 0.2988278138617615, "grad_norm": 0.474609375, "learning_rate": 2.476873820854657e-05, "loss": 2.1107, "step": 9262 }, { "epoch": 0.29886007771555784, "grad_norm": 0.64453125, "learning_rate": 2.4767543904847764e-05, "loss": 2.1462, "step": 9263 }, { "epoch": 0.2988923415693542, "grad_norm": 0.515625, "learning_rate": 2.4766349493634845e-05, "loss": 2.1391, "step": 9264 }, { "epoch": 0.2989246054231505, "grad_norm": 0.5390625, "learning_rate": 2.4765154974920952e-05, "loss": 2.0945, "step": 9265 }, { "epoch": 0.29895686927694687, "grad_norm": 0.52734375, "learning_rate": 2.476396034871924e-05, "loss": 2.1335, "step": 9266 }, { "epoch": 0.2989891331307432, "grad_norm": 0.50390625, "learning_rate": 2.4762765615042855e-05, "loss": 2.1198, "step": 9267 }, { "epoch": 0.29902139698453956, "grad_norm": 0.45703125, "learning_rate": 2.4761570773904952e-05, "loss": 2.1333, "step": 9268 }, { "epoch": 0.2990536608383359, "grad_norm": 0.52734375, "learning_rate": 2.4760375825318684e-05, "loss": 2.1705, "step": 9269 }, { "epoch": 0.29908592469213224, "grad_norm": 0.53125, "learning_rate": 2.4759180769297197e-05, "loss": 2.166, "step": 9270 }, { "epoch": 0.2991181885459286, "grad_norm": 0.51953125, "learning_rate": 2.475798560585365e-05, "loss": 2.1363, "step": 9271 }, { "epoch": 0.29915045239972493, "grad_norm": 0.58984375, "learning_rate": 2.4756790335001194e-05, "loss": 2.1414, "step": 9272 }, { "epoch": 0.2991827162535213, "grad_norm": 0.53125, "learning_rate": 2.4755594956753e-05, "loss": 2.1175, "step": 9273 }, { "epoch": 0.2992149801073176, "grad_norm": 0.53515625, "learning_rate": 2.4754399471122207e-05, "loss": 2.1405, "step": 9274 }, { "epoch": 0.299247243961114, "grad_norm": 0.57421875, "learning_rate": 2.4753203878121984e-05, "loss": 2.1436, "step": 9275 }, { "epoch": 0.29927950781491036, "grad_norm": 0.54296875, "learning_rate": 2.4752008177765488e-05, "loss": 2.1456, "step": 9276 }, { "epoch": 0.2993117716687067, "grad_norm": 0.54296875, "learning_rate": 2.4750812370065882e-05, "loss": 2.1053, "step": 9277 }, { "epoch": 0.29934403552250305, "grad_norm": 0.50390625, "learning_rate": 2.4749616455036336e-05, "loss": 2.1281, "step": 9278 }, { "epoch": 0.2993762993762994, "grad_norm": 0.5390625, "learning_rate": 2.4748420432689996e-05, "loss": 2.1396, "step": 9279 }, { "epoch": 0.29940856323009574, "grad_norm": 0.49609375, "learning_rate": 2.4747224303040045e-05, "loss": 2.135, "step": 9280 }, { "epoch": 0.2994408270838921, "grad_norm": 0.482421875, "learning_rate": 2.474602806609964e-05, "loss": 2.132, "step": 9281 }, { "epoch": 0.2994730909376884, "grad_norm": 0.48828125, "learning_rate": 2.4744831721881948e-05, "loss": 2.1218, "step": 9282 }, { "epoch": 0.29950535479148477, "grad_norm": 0.44140625, "learning_rate": 2.4743635270400137e-05, "loss": 2.1088, "step": 9283 }, { "epoch": 0.2995376186452811, "grad_norm": 0.439453125, "learning_rate": 2.4742438711667385e-05, "loss": 2.1354, "step": 9284 }, { "epoch": 0.29956988249907746, "grad_norm": 0.447265625, "learning_rate": 2.4741242045696853e-05, "loss": 2.1282, "step": 9285 }, { "epoch": 0.2996021463528738, "grad_norm": 0.40625, "learning_rate": 2.4740045272501717e-05, "loss": 2.1035, "step": 9286 }, { "epoch": 0.29963441020667014, "grad_norm": 0.421875, "learning_rate": 2.4738848392095152e-05, "loss": 2.0738, "step": 9287 }, { "epoch": 0.2996666740604665, "grad_norm": 0.408203125, "learning_rate": 2.473765140449033e-05, "loss": 2.0986, "step": 9288 }, { "epoch": 0.29969893791426283, "grad_norm": 0.400390625, "learning_rate": 2.4736454309700426e-05, "loss": 2.0716, "step": 9289 }, { "epoch": 0.2997312017680592, "grad_norm": 0.515625, "learning_rate": 2.4735257107738616e-05, "loss": 2.1358, "step": 9290 }, { "epoch": 0.2997634656218555, "grad_norm": 0.45703125, "learning_rate": 2.4734059798618084e-05, "loss": 2.1013, "step": 9291 }, { "epoch": 0.29979572947565186, "grad_norm": 0.46484375, "learning_rate": 2.4732862382352005e-05, "loss": 2.0889, "step": 9292 }, { "epoch": 0.2998279933294482, "grad_norm": 0.44140625, "learning_rate": 2.4731664858953554e-05, "loss": 2.0072, "step": 9293 }, { "epoch": 0.29986025718324455, "grad_norm": 0.39453125, "learning_rate": 2.4730467228435922e-05, "loss": 2.0045, "step": 9294 }, { "epoch": 0.2998925210370409, "grad_norm": 0.427734375, "learning_rate": 2.4729269490812288e-05, "loss": 2.007, "step": 9295 }, { "epoch": 0.2999247848908373, "grad_norm": 0.39453125, "learning_rate": 2.4728071646095833e-05, "loss": 1.9854, "step": 9296 }, { "epoch": 0.29995704874463364, "grad_norm": 0.396484375, "learning_rate": 2.4726873694299742e-05, "loss": 1.9834, "step": 9297 }, { "epoch": 0.29998931259843, "grad_norm": 0.39453125, "learning_rate": 2.472567563543721e-05, "loss": 1.9891, "step": 9298 }, { "epoch": 0.3000215764522263, "grad_norm": 0.384765625, "learning_rate": 2.4724477469521412e-05, "loss": 2.0114, "step": 9299 }, { "epoch": 0.30005384030602267, "grad_norm": 0.384765625, "learning_rate": 2.4723279196565542e-05, "loss": 1.9789, "step": 9300 }, { "epoch": 0.300086104159819, "grad_norm": 0.357421875, "learning_rate": 2.4722080816582794e-05, "loss": 1.9888, "step": 9301 }, { "epoch": 0.30011836801361536, "grad_norm": 0.38671875, "learning_rate": 2.4720882329586354e-05, "loss": 1.979, "step": 9302 }, { "epoch": 0.3001506318674117, "grad_norm": 0.359375, "learning_rate": 2.4719683735589415e-05, "loss": 1.9624, "step": 9303 }, { "epoch": 0.30018289572120804, "grad_norm": 0.375, "learning_rate": 2.4718485034605168e-05, "loss": 2.0022, "step": 9304 }, { "epoch": 0.3002151595750044, "grad_norm": 0.388671875, "learning_rate": 2.4717286226646812e-05, "loss": 2.0074, "step": 9305 }, { "epoch": 0.30024742342880073, "grad_norm": 0.392578125, "learning_rate": 2.4716087311727542e-05, "loss": 2.0037, "step": 9306 }, { "epoch": 0.3002796872825971, "grad_norm": 0.359375, "learning_rate": 2.4714888289860546e-05, "loss": 1.9922, "step": 9307 }, { "epoch": 0.3003119511363934, "grad_norm": 0.36328125, "learning_rate": 2.4713689161059036e-05, "loss": 1.9843, "step": 9308 }, { "epoch": 0.30034421499018976, "grad_norm": 0.404296875, "learning_rate": 2.4712489925336204e-05, "loss": 2.0927, "step": 9309 }, { "epoch": 0.3003764788439861, "grad_norm": 0.37890625, "learning_rate": 2.4711290582705248e-05, "loss": 2.1083, "step": 9310 }, { "epoch": 0.30040874269778245, "grad_norm": 0.4296875, "learning_rate": 2.4710091133179374e-05, "loss": 2.1357, "step": 9311 }, { "epoch": 0.3004410065515788, "grad_norm": 0.41796875, "learning_rate": 2.4708891576771783e-05, "loss": 2.1071, "step": 9312 }, { "epoch": 0.30047327040537514, "grad_norm": 0.42578125, "learning_rate": 2.4707691913495675e-05, "loss": 2.1578, "step": 9313 }, { "epoch": 0.3005055342591715, "grad_norm": 0.4765625, "learning_rate": 2.4706492143364263e-05, "loss": 2.1222, "step": 9314 }, { "epoch": 0.3005377981129678, "grad_norm": 0.47265625, "learning_rate": 2.4705292266390747e-05, "loss": 2.124, "step": 9315 }, { "epoch": 0.3005700619667642, "grad_norm": 0.56640625, "learning_rate": 2.4704092282588336e-05, "loss": 2.1253, "step": 9316 }, { "epoch": 0.30060232582056057, "grad_norm": 0.51171875, "learning_rate": 2.4702892191970238e-05, "loss": 2.121, "step": 9317 }, { "epoch": 0.3006345896743569, "grad_norm": 0.53515625, "learning_rate": 2.4701691994549667e-05, "loss": 2.1307, "step": 9318 }, { "epoch": 0.30066685352815326, "grad_norm": 0.51171875, "learning_rate": 2.4700491690339828e-05, "loss": 2.1221, "step": 9319 }, { "epoch": 0.3006991173819496, "grad_norm": 0.470703125, "learning_rate": 2.4699291279353934e-05, "loss": 2.1188, "step": 9320 }, { "epoch": 0.30073138123574594, "grad_norm": 0.458984375, "learning_rate": 2.4698090761605205e-05, "loss": 2.0932, "step": 9321 }, { "epoch": 0.3007636450895423, "grad_norm": 0.435546875, "learning_rate": 2.469689013710684e-05, "loss": 2.0879, "step": 9322 }, { "epoch": 0.30079590894333863, "grad_norm": 0.4609375, "learning_rate": 2.4695689405872074e-05, "loss": 2.0932, "step": 9323 }, { "epoch": 0.300828172797135, "grad_norm": 0.48046875, "learning_rate": 2.469448856791411e-05, "loss": 2.1347, "step": 9324 }, { "epoch": 0.3008604366509313, "grad_norm": 0.54296875, "learning_rate": 2.469328762324618e-05, "loss": 2.108, "step": 9325 }, { "epoch": 0.30089270050472766, "grad_norm": 0.60546875, "learning_rate": 2.4692086571881486e-05, "loss": 2.1134, "step": 9326 }, { "epoch": 0.300924964358524, "grad_norm": 0.65625, "learning_rate": 2.4690885413833257e-05, "loss": 2.1347, "step": 9327 }, { "epoch": 0.30095722821232035, "grad_norm": 0.53515625, "learning_rate": 2.4689684149114714e-05, "loss": 2.1404, "step": 9328 }, { "epoch": 0.3009894920661167, "grad_norm": 0.5625, "learning_rate": 2.468848277773908e-05, "loss": 2.1071, "step": 9329 }, { "epoch": 0.30102175591991304, "grad_norm": 0.498046875, "learning_rate": 2.4687281299719574e-05, "loss": 2.1432, "step": 9330 }, { "epoch": 0.3010540197737094, "grad_norm": 0.486328125, "learning_rate": 2.468607971506943e-05, "loss": 2.0614, "step": 9331 }, { "epoch": 0.3010862836275057, "grad_norm": 0.4765625, "learning_rate": 2.4684878023801864e-05, "loss": 2.079, "step": 9332 }, { "epoch": 0.30111854748130207, "grad_norm": 0.486328125, "learning_rate": 2.468367622593011e-05, "loss": 2.1257, "step": 9333 }, { "epoch": 0.3011508113350984, "grad_norm": 0.52734375, "learning_rate": 2.46824743214674e-05, "loss": 2.1363, "step": 9334 }, { "epoch": 0.30118307518889476, "grad_norm": 0.5, "learning_rate": 2.4681272310426957e-05, "loss": 2.1231, "step": 9335 }, { "epoch": 0.30121533904269115, "grad_norm": 0.5234375, "learning_rate": 2.4680070192822008e-05, "loss": 2.1164, "step": 9336 }, { "epoch": 0.3012476028964875, "grad_norm": 0.52734375, "learning_rate": 2.4678867968665803e-05, "loss": 2.1002, "step": 9337 }, { "epoch": 0.30127986675028384, "grad_norm": 0.421875, "learning_rate": 2.4677665637971552e-05, "loss": 2.084, "step": 9338 }, { "epoch": 0.3013121306040802, "grad_norm": 0.515625, "learning_rate": 2.4676463200752503e-05, "loss": 2.0688, "step": 9339 }, { "epoch": 0.30134439445787653, "grad_norm": 0.486328125, "learning_rate": 2.467526065702189e-05, "loss": 2.0829, "step": 9340 }, { "epoch": 0.3013766583116729, "grad_norm": 0.498046875, "learning_rate": 2.467405800679295e-05, "loss": 2.0756, "step": 9341 }, { "epoch": 0.3014089221654692, "grad_norm": 0.44921875, "learning_rate": 2.4672855250078918e-05, "loss": 2.0903, "step": 9342 }, { "epoch": 0.30144118601926556, "grad_norm": 0.56640625, "learning_rate": 2.4671652386893034e-05, "loss": 2.1104, "step": 9343 }, { "epoch": 0.3014734498730619, "grad_norm": 0.5, "learning_rate": 2.4670449417248536e-05, "loss": 2.1259, "step": 9344 }, { "epoch": 0.30150571372685825, "grad_norm": 0.59765625, "learning_rate": 2.4669246341158674e-05, "loss": 2.1008, "step": 9345 }, { "epoch": 0.3015379775806546, "grad_norm": 0.482421875, "learning_rate": 2.4668043158636683e-05, "loss": 2.1059, "step": 9346 }, { "epoch": 0.30157024143445094, "grad_norm": 0.61328125, "learning_rate": 2.4666839869695807e-05, "loss": 2.1056, "step": 9347 }, { "epoch": 0.3016025052882473, "grad_norm": 0.498046875, "learning_rate": 2.4665636474349294e-05, "loss": 2.1065, "step": 9348 }, { "epoch": 0.3016347691420436, "grad_norm": 0.52734375, "learning_rate": 2.4664432972610394e-05, "loss": 2.0936, "step": 9349 }, { "epoch": 0.30166703299583997, "grad_norm": 0.5546875, "learning_rate": 2.4663229364492342e-05, "loss": 2.1167, "step": 9350 }, { "epoch": 0.3016992968496363, "grad_norm": 0.498046875, "learning_rate": 2.466202565000839e-05, "loss": 2.075, "step": 9351 }, { "epoch": 0.30173156070343266, "grad_norm": 0.46875, "learning_rate": 2.4660821829171804e-05, "loss": 2.0886, "step": 9352 }, { "epoch": 0.301763824557229, "grad_norm": 0.515625, "learning_rate": 2.465961790199581e-05, "loss": 2.1287, "step": 9353 }, { "epoch": 0.30179608841102534, "grad_norm": 0.486328125, "learning_rate": 2.465841386849368e-05, "loss": 2.0758, "step": 9354 }, { "epoch": 0.3018283522648217, "grad_norm": 0.498046875, "learning_rate": 2.4657209728678656e-05, "loss": 2.0913, "step": 9355 }, { "epoch": 0.3018606161186181, "grad_norm": 0.42578125, "learning_rate": 2.4656005482563994e-05, "loss": 2.0695, "step": 9356 }, { "epoch": 0.30189287997241443, "grad_norm": 0.48828125, "learning_rate": 2.4654801130162953e-05, "loss": 2.0629, "step": 9357 }, { "epoch": 0.3019251438262108, "grad_norm": 0.5, "learning_rate": 2.4653596671488787e-05, "loss": 2.1416, "step": 9358 }, { "epoch": 0.3019574076800071, "grad_norm": 0.447265625, "learning_rate": 2.4652392106554752e-05, "loss": 2.1051, "step": 9359 }, { "epoch": 0.30198967153380346, "grad_norm": 0.45703125, "learning_rate": 2.465118743537411e-05, "loss": 2.1172, "step": 9360 }, { "epoch": 0.3020219353875998, "grad_norm": 0.5078125, "learning_rate": 2.4649982657960123e-05, "loss": 2.0399, "step": 9361 }, { "epoch": 0.30205419924139615, "grad_norm": 0.69921875, "learning_rate": 2.464877777432605e-05, "loss": 2.0386, "step": 9362 }, { "epoch": 0.3020864630951925, "grad_norm": 0.53125, "learning_rate": 2.464757278448515e-05, "loss": 1.99, "step": 9363 }, { "epoch": 0.30211872694898884, "grad_norm": 0.5703125, "learning_rate": 2.464636768845069e-05, "loss": 1.9794, "step": 9364 }, { "epoch": 0.3021509908027852, "grad_norm": 0.53515625, "learning_rate": 2.4645162486235938e-05, "loss": 1.9902, "step": 9365 }, { "epoch": 0.3021832546565815, "grad_norm": 0.51953125, "learning_rate": 2.4643957177854156e-05, "loss": 2.0282, "step": 9366 }, { "epoch": 0.30221551851037787, "grad_norm": 0.466796875, "learning_rate": 2.4642751763318607e-05, "loss": 2.0542, "step": 9367 }, { "epoch": 0.3022477823641742, "grad_norm": 0.48828125, "learning_rate": 2.464154624264257e-05, "loss": 2.0144, "step": 9368 }, { "epoch": 0.30228004621797055, "grad_norm": 0.4453125, "learning_rate": 2.4640340615839307e-05, "loss": 2.0337, "step": 9369 }, { "epoch": 0.3023123100717669, "grad_norm": 0.46484375, "learning_rate": 2.463913488292209e-05, "loss": 2.0466, "step": 9370 }, { "epoch": 0.30234457392556324, "grad_norm": 0.421875, "learning_rate": 2.4637929043904194e-05, "loss": 2.0355, "step": 9371 }, { "epoch": 0.3023768377793596, "grad_norm": 0.40625, "learning_rate": 2.4636723098798886e-05, "loss": 2.0086, "step": 9372 }, { "epoch": 0.30240910163315593, "grad_norm": 0.79296875, "learning_rate": 2.4635517047619445e-05, "loss": 1.981, "step": 9373 }, { "epoch": 0.3024413654869523, "grad_norm": 0.43359375, "learning_rate": 2.4634310890379146e-05, "loss": 1.992, "step": 9374 }, { "epoch": 0.3024736293407486, "grad_norm": 0.44921875, "learning_rate": 2.4633104627091258e-05, "loss": 1.9622, "step": 9375 }, { "epoch": 0.30250589319454496, "grad_norm": 0.443359375, "learning_rate": 2.463189825776907e-05, "loss": 1.967, "step": 9376 }, { "epoch": 0.30253815704834136, "grad_norm": 0.458984375, "learning_rate": 2.4630691782425855e-05, "loss": 1.9841, "step": 9377 }, { "epoch": 0.3025704209021377, "grad_norm": 0.455078125, "learning_rate": 2.4629485201074896e-05, "loss": 1.9855, "step": 9378 }, { "epoch": 0.30260268475593405, "grad_norm": 0.41015625, "learning_rate": 2.4628278513729473e-05, "loss": 1.989, "step": 9379 }, { "epoch": 0.3026349486097304, "grad_norm": 0.412109375, "learning_rate": 2.4627071720402864e-05, "loss": 1.9873, "step": 9380 }, { "epoch": 0.30266721246352674, "grad_norm": 0.400390625, "learning_rate": 2.4625864821108354e-05, "loss": 2.0111, "step": 9381 }, { "epoch": 0.3026994763173231, "grad_norm": 0.39453125, "learning_rate": 2.462465781585924e-05, "loss": 2.0057, "step": 9382 }, { "epoch": 0.3027317401711194, "grad_norm": 0.392578125, "learning_rate": 2.4623450704668785e-05, "loss": 1.9273, "step": 9383 }, { "epoch": 0.30276400402491577, "grad_norm": 0.392578125, "learning_rate": 2.462224348755029e-05, "loss": 2.0046, "step": 9384 }, { "epoch": 0.3027962678787121, "grad_norm": 0.39453125, "learning_rate": 2.4621036164517046e-05, "loss": 1.9751, "step": 9385 }, { "epoch": 0.30282853173250845, "grad_norm": 0.400390625, "learning_rate": 2.461982873558234e-05, "loss": 1.9927, "step": 9386 }, { "epoch": 0.3028607955863048, "grad_norm": 0.39453125, "learning_rate": 2.461862120075946e-05, "loss": 2.0134, "step": 9387 }, { "epoch": 0.30289305944010114, "grad_norm": 0.404296875, "learning_rate": 2.4617413560061693e-05, "loss": 2.0268, "step": 9388 }, { "epoch": 0.3029253232938975, "grad_norm": 0.423828125, "learning_rate": 2.4616205813502342e-05, "loss": 2.0337, "step": 9389 }, { "epoch": 0.30295758714769383, "grad_norm": 0.431640625, "learning_rate": 2.461499796109469e-05, "loss": 1.9903, "step": 9390 }, { "epoch": 0.3029898510014902, "grad_norm": 0.4296875, "learning_rate": 2.4613790002852042e-05, "loss": 2.0136, "step": 9391 }, { "epoch": 0.3030221148552865, "grad_norm": 0.4140625, "learning_rate": 2.4612581938787693e-05, "loss": 1.9957, "step": 9392 }, { "epoch": 0.30305437870908286, "grad_norm": 0.388671875, "learning_rate": 2.4611373768914933e-05, "loss": 1.9773, "step": 9393 }, { "epoch": 0.3030866425628792, "grad_norm": 0.4296875, "learning_rate": 2.461016549324707e-05, "loss": 1.9921, "step": 9394 }, { "epoch": 0.30311890641667555, "grad_norm": 0.447265625, "learning_rate": 2.46089571117974e-05, "loss": 2.0171, "step": 9395 }, { "epoch": 0.3031511702704719, "grad_norm": 0.373046875, "learning_rate": 2.4607748624579218e-05, "loss": 1.9856, "step": 9396 }, { "epoch": 0.3031834341242683, "grad_norm": 0.388671875, "learning_rate": 2.460654003160584e-05, "loss": 1.969, "step": 9397 }, { "epoch": 0.30321569797806464, "grad_norm": 0.3984375, "learning_rate": 2.4605331332890553e-05, "loss": 2.0239, "step": 9398 }, { "epoch": 0.303247961831861, "grad_norm": 0.376953125, "learning_rate": 2.4604122528446675e-05, "loss": 1.9839, "step": 9399 }, { "epoch": 0.3032802256856573, "grad_norm": 0.38671875, "learning_rate": 2.4602913618287505e-05, "loss": 1.9919, "step": 9400 }, { "epoch": 0.30331248953945367, "grad_norm": 0.408203125, "learning_rate": 2.4601704602426345e-05, "loss": 2.0019, "step": 9401 }, { "epoch": 0.30334475339325, "grad_norm": 0.396484375, "learning_rate": 2.4600495480876517e-05, "loss": 1.9689, "step": 9402 }, { "epoch": 0.30337701724704635, "grad_norm": 0.359375, "learning_rate": 2.459928625365132e-05, "loss": 1.9698, "step": 9403 }, { "epoch": 0.3034092811008427, "grad_norm": 0.384765625, "learning_rate": 2.4598076920764064e-05, "loss": 1.9982, "step": 9404 }, { "epoch": 0.30344154495463904, "grad_norm": 0.35546875, "learning_rate": 2.4596867482228066e-05, "loss": 1.9948, "step": 9405 }, { "epoch": 0.3034738088084354, "grad_norm": 0.345703125, "learning_rate": 2.4595657938056634e-05, "loss": 1.9838, "step": 9406 }, { "epoch": 0.30350607266223173, "grad_norm": 0.36328125, "learning_rate": 2.459444828826308e-05, "loss": 1.9884, "step": 9407 }, { "epoch": 0.3035383365160281, "grad_norm": 0.357421875, "learning_rate": 2.4593238532860727e-05, "loss": 1.9893, "step": 9408 }, { "epoch": 0.3035706003698244, "grad_norm": 0.35546875, "learning_rate": 2.4592028671862882e-05, "loss": 2.0237, "step": 9409 }, { "epoch": 0.30360286422362076, "grad_norm": 0.353515625, "learning_rate": 2.4590818705282872e-05, "loss": 1.9856, "step": 9410 }, { "epoch": 0.3036351280774171, "grad_norm": 0.3515625, "learning_rate": 2.458960863313401e-05, "loss": 1.9743, "step": 9411 }, { "epoch": 0.30366739193121345, "grad_norm": 0.3828125, "learning_rate": 2.4588398455429607e-05, "loss": 1.9947, "step": 9412 }, { "epoch": 0.3036996557850098, "grad_norm": 0.361328125, "learning_rate": 2.4587188172183005e-05, "loss": 2.0112, "step": 9413 }, { "epoch": 0.30373191963880614, "grad_norm": 0.36328125, "learning_rate": 2.4585977783407503e-05, "loss": 2.006, "step": 9414 }, { "epoch": 0.3037641834926025, "grad_norm": 0.349609375, "learning_rate": 2.458476728911644e-05, "loss": 1.9828, "step": 9415 }, { "epoch": 0.3037964473463988, "grad_norm": 0.400390625, "learning_rate": 2.4583556689323137e-05, "loss": 1.9939, "step": 9416 }, { "epoch": 0.3038287112001952, "grad_norm": 0.390625, "learning_rate": 2.4582345984040913e-05, "loss": 2.0113, "step": 9417 }, { "epoch": 0.30386097505399157, "grad_norm": 0.35546875, "learning_rate": 2.4581135173283097e-05, "loss": 1.9872, "step": 9418 }, { "epoch": 0.3038932389077879, "grad_norm": 0.365234375, "learning_rate": 2.457992425706302e-05, "loss": 2.0036, "step": 9419 }, { "epoch": 0.30392550276158425, "grad_norm": 0.3671875, "learning_rate": 2.4578713235394015e-05, "loss": 1.9702, "step": 9420 }, { "epoch": 0.3039577666153806, "grad_norm": 0.369140625, "learning_rate": 2.4577502108289397e-05, "loss": 2.0225, "step": 9421 }, { "epoch": 0.30399003046917694, "grad_norm": 0.380859375, "learning_rate": 2.4576290875762516e-05, "loss": 1.9731, "step": 9422 }, { "epoch": 0.3040222943229733, "grad_norm": 0.400390625, "learning_rate": 2.4575079537826687e-05, "loss": 1.9371, "step": 9423 }, { "epoch": 0.30405455817676963, "grad_norm": 0.37109375, "learning_rate": 2.4573868094495258e-05, "loss": 2.0146, "step": 9424 }, { "epoch": 0.304086822030566, "grad_norm": 0.365234375, "learning_rate": 2.4572656545781552e-05, "loss": 1.9476, "step": 9425 }, { "epoch": 0.3041190858843623, "grad_norm": 0.396484375, "learning_rate": 2.4571444891698913e-05, "loss": 1.9917, "step": 9426 }, { "epoch": 0.30415134973815866, "grad_norm": 0.384765625, "learning_rate": 2.4570233132260675e-05, "loss": 1.986, "step": 9427 }, { "epoch": 0.304183613591955, "grad_norm": 0.3828125, "learning_rate": 2.4569021267480176e-05, "loss": 2.0146, "step": 9428 }, { "epoch": 0.30421587744575135, "grad_norm": 0.3984375, "learning_rate": 2.4567809297370754e-05, "loss": 2.0227, "step": 9429 }, { "epoch": 0.3042481412995477, "grad_norm": 0.39453125, "learning_rate": 2.4566597221945758e-05, "loss": 2.0059, "step": 9430 }, { "epoch": 0.30428040515334404, "grad_norm": 0.400390625, "learning_rate": 2.4565385041218516e-05, "loss": 1.9608, "step": 9431 }, { "epoch": 0.3043126690071404, "grad_norm": 0.39453125, "learning_rate": 2.4564172755202376e-05, "loss": 2.0052, "step": 9432 }, { "epoch": 0.3043449328609367, "grad_norm": 0.416015625, "learning_rate": 2.456296036391069e-05, "loss": 1.9862, "step": 9433 }, { "epoch": 0.30437719671473307, "grad_norm": 0.37890625, "learning_rate": 2.4561747867356792e-05, "loss": 2.0055, "step": 9434 }, { "epoch": 0.3044094605685294, "grad_norm": 0.451171875, "learning_rate": 2.4560535265554033e-05, "loss": 1.9997, "step": 9435 }, { "epoch": 0.30444172442232575, "grad_norm": 0.37109375, "learning_rate": 2.4559322558515766e-05, "loss": 2.0046, "step": 9436 }, { "epoch": 0.30447398827612215, "grad_norm": 0.408203125, "learning_rate": 2.455810974625533e-05, "loss": 2.0135, "step": 9437 }, { "epoch": 0.3045062521299185, "grad_norm": 0.4375, "learning_rate": 2.4556896828786076e-05, "loss": 1.9905, "step": 9438 }, { "epoch": 0.30453851598371484, "grad_norm": 0.423828125, "learning_rate": 2.455568380612136e-05, "loss": 2.0028, "step": 9439 }, { "epoch": 0.3045707798375112, "grad_norm": 0.384765625, "learning_rate": 2.4554470678274536e-05, "loss": 2.0021, "step": 9440 }, { "epoch": 0.30460304369130753, "grad_norm": 0.4296875, "learning_rate": 2.455325744525895e-05, "loss": 2.0206, "step": 9441 }, { "epoch": 0.3046353075451039, "grad_norm": 0.431640625, "learning_rate": 2.4552044107087958e-05, "loss": 2.0149, "step": 9442 }, { "epoch": 0.3046675713989002, "grad_norm": 0.3671875, "learning_rate": 2.455083066377492e-05, "loss": 1.9775, "step": 9443 }, { "epoch": 0.30469983525269656, "grad_norm": 0.4140625, "learning_rate": 2.4549617115333188e-05, "loss": 2.0159, "step": 9444 }, { "epoch": 0.3047320991064929, "grad_norm": 0.42578125, "learning_rate": 2.4548403461776124e-05, "loss": 1.9976, "step": 9445 }, { "epoch": 0.30476436296028925, "grad_norm": 0.3984375, "learning_rate": 2.454718970311708e-05, "loss": 1.9964, "step": 9446 }, { "epoch": 0.3047966268140856, "grad_norm": 0.396484375, "learning_rate": 2.4545975839369427e-05, "loss": 2.0087, "step": 9447 }, { "epoch": 0.30482889066788194, "grad_norm": 0.35546875, "learning_rate": 2.4544761870546513e-05, "loss": 2.0069, "step": 9448 }, { "epoch": 0.3048611545216783, "grad_norm": 0.41796875, "learning_rate": 2.4543547796661716e-05, "loss": 1.9746, "step": 9449 }, { "epoch": 0.3048934183754746, "grad_norm": 0.369140625, "learning_rate": 2.4542333617728386e-05, "loss": 2.0058, "step": 9450 }, { "epoch": 0.30492568222927097, "grad_norm": 0.41015625, "learning_rate": 2.4541119333759892e-05, "loss": 1.9957, "step": 9451 }, { "epoch": 0.3049579460830673, "grad_norm": 0.4140625, "learning_rate": 2.4539904944769602e-05, "loss": 1.9833, "step": 9452 }, { "epoch": 0.30499020993686365, "grad_norm": 0.41796875, "learning_rate": 2.4538690450770886e-05, "loss": 2.0128, "step": 9453 }, { "epoch": 0.30502247379066, "grad_norm": 0.376953125, "learning_rate": 2.4537475851777106e-05, "loss": 1.9682, "step": 9454 }, { "epoch": 0.30505473764445634, "grad_norm": 0.392578125, "learning_rate": 2.4536261147801633e-05, "loss": 2.0051, "step": 9455 }, { "epoch": 0.3050870014982527, "grad_norm": 0.5, "learning_rate": 2.4535046338857842e-05, "loss": 1.9967, "step": 9456 }, { "epoch": 0.30511926535204903, "grad_norm": 0.46875, "learning_rate": 2.4533831424959096e-05, "loss": 1.986, "step": 9457 }, { "epoch": 0.30515152920584543, "grad_norm": 0.46484375, "learning_rate": 2.4532616406118776e-05, "loss": 1.9652, "step": 9458 }, { "epoch": 0.3051837930596418, "grad_norm": 0.486328125, "learning_rate": 2.4531401282350254e-05, "loss": 1.9964, "step": 9459 }, { "epoch": 0.3052160569134381, "grad_norm": 0.380859375, "learning_rate": 2.4530186053666902e-05, "loss": 1.9945, "step": 9460 }, { "epoch": 0.30524832076723446, "grad_norm": 0.435546875, "learning_rate": 2.45289707200821e-05, "loss": 2.0108, "step": 9461 }, { "epoch": 0.3052805846210308, "grad_norm": 0.408203125, "learning_rate": 2.4527755281609225e-05, "loss": 1.9522, "step": 9462 }, { "epoch": 0.30531284847482715, "grad_norm": 0.431640625, "learning_rate": 2.4526539738261655e-05, "loss": 2.0118, "step": 9463 }, { "epoch": 0.3053451123286235, "grad_norm": 0.392578125, "learning_rate": 2.452532409005277e-05, "loss": 1.9723, "step": 9464 }, { "epoch": 0.30537737618241984, "grad_norm": 0.423828125, "learning_rate": 2.452410833699595e-05, "loss": 1.9868, "step": 9465 }, { "epoch": 0.3054096400362162, "grad_norm": 0.421875, "learning_rate": 2.452289247910458e-05, "loss": 1.9875, "step": 9466 }, { "epoch": 0.3054419038900125, "grad_norm": 0.4375, "learning_rate": 2.4521676516392044e-05, "loss": 1.9635, "step": 9467 }, { "epoch": 0.30547416774380887, "grad_norm": 0.40234375, "learning_rate": 2.4520460448871718e-05, "loss": 1.9703, "step": 9468 }, { "epoch": 0.3055064315976052, "grad_norm": 0.41015625, "learning_rate": 2.4519244276556994e-05, "loss": 1.9982, "step": 9469 }, { "epoch": 0.30553869545140155, "grad_norm": 0.388671875, "learning_rate": 2.451802799946126e-05, "loss": 1.9673, "step": 9470 }, { "epoch": 0.3055709593051979, "grad_norm": 0.42578125, "learning_rate": 2.45168116175979e-05, "loss": 1.9396, "step": 9471 }, { "epoch": 0.30560322315899424, "grad_norm": 0.44921875, "learning_rate": 2.451559513098031e-05, "loss": 1.9509, "step": 9472 }, { "epoch": 0.3056354870127906, "grad_norm": 0.404296875, "learning_rate": 2.451437853962187e-05, "loss": 1.9463, "step": 9473 }, { "epoch": 0.30566775086658693, "grad_norm": 0.376953125, "learning_rate": 2.451316184353598e-05, "loss": 1.981, "step": 9474 }, { "epoch": 0.3057000147203833, "grad_norm": 0.447265625, "learning_rate": 2.4511945042736025e-05, "loss": 1.9832, "step": 9475 }, { "epoch": 0.3057322785741796, "grad_norm": 0.5234375, "learning_rate": 2.4510728137235406e-05, "loss": 2.0001, "step": 9476 }, { "epoch": 0.30576454242797596, "grad_norm": 0.38671875, "learning_rate": 2.4509511127047512e-05, "loss": 1.9869, "step": 9477 }, { "epoch": 0.30579680628177236, "grad_norm": 0.443359375, "learning_rate": 2.4508294012185742e-05, "loss": 1.9873, "step": 9478 }, { "epoch": 0.3058290701355687, "grad_norm": 0.4375, "learning_rate": 2.4507076792663494e-05, "loss": 1.9831, "step": 9479 }, { "epoch": 0.30586133398936505, "grad_norm": 0.361328125, "learning_rate": 2.4505859468494167e-05, "loss": 1.9935, "step": 9480 }, { "epoch": 0.3058935978431614, "grad_norm": 0.455078125, "learning_rate": 2.4504642039691158e-05, "loss": 2.014, "step": 9481 }, { "epoch": 0.30592586169695773, "grad_norm": 0.423828125, "learning_rate": 2.4503424506267864e-05, "loss": 1.97, "step": 9482 }, { "epoch": 0.3059581255507541, "grad_norm": 0.353515625, "learning_rate": 2.4502206868237692e-05, "loss": 1.9959, "step": 9483 }, { "epoch": 0.3059903894045504, "grad_norm": 0.427734375, "learning_rate": 2.450098912561405e-05, "loss": 1.9929, "step": 9484 }, { "epoch": 0.30602265325834677, "grad_norm": 0.373046875, "learning_rate": 2.4499771278410334e-05, "loss": 1.9932, "step": 9485 }, { "epoch": 0.3060549171121431, "grad_norm": 0.419921875, "learning_rate": 2.449855332663994e-05, "loss": 1.9835, "step": 9486 }, { "epoch": 0.30608718096593945, "grad_norm": 0.396484375, "learning_rate": 2.4497335270316297e-05, "loss": 1.957, "step": 9487 }, { "epoch": 0.3061194448197358, "grad_norm": 0.37890625, "learning_rate": 2.4496117109452795e-05, "loss": 1.9971, "step": 9488 }, { "epoch": 0.30615170867353214, "grad_norm": 0.431640625, "learning_rate": 2.4494898844062846e-05, "loss": 1.9991, "step": 9489 }, { "epoch": 0.3061839725273285, "grad_norm": 0.357421875, "learning_rate": 2.4493680474159866e-05, "loss": 2.0182, "step": 9490 }, { "epoch": 0.30621623638112483, "grad_norm": 0.439453125, "learning_rate": 2.4492461999757267e-05, "loss": 2.0083, "step": 9491 }, { "epoch": 0.3062485002349212, "grad_norm": 0.36328125, "learning_rate": 2.449124342086845e-05, "loss": 1.9968, "step": 9492 }, { "epoch": 0.3062807640887175, "grad_norm": 0.421875, "learning_rate": 2.4490024737506837e-05, "loss": 2.0045, "step": 9493 }, { "epoch": 0.30631302794251386, "grad_norm": 0.384765625, "learning_rate": 2.4488805949685838e-05, "loss": 2.0227, "step": 9494 }, { "epoch": 0.3063452917963102, "grad_norm": 0.404296875, "learning_rate": 2.448758705741887e-05, "loss": 2.0007, "step": 9495 }, { "epoch": 0.30637755565010655, "grad_norm": 0.388671875, "learning_rate": 2.448636806071935e-05, "loss": 2.0049, "step": 9496 }, { "epoch": 0.3064098195039029, "grad_norm": 0.41015625, "learning_rate": 2.4485148959600697e-05, "loss": 1.9865, "step": 9497 }, { "epoch": 0.3064420833576993, "grad_norm": 0.388671875, "learning_rate": 2.4483929754076327e-05, "loss": 1.992, "step": 9498 }, { "epoch": 0.30647434721149563, "grad_norm": 0.4140625, "learning_rate": 2.4482710444159665e-05, "loss": 1.9955, "step": 9499 }, { "epoch": 0.306506611065292, "grad_norm": 0.39453125, "learning_rate": 2.4481491029864123e-05, "loss": 1.9511, "step": 9500 }, { "epoch": 0.3065388749190883, "grad_norm": 0.392578125, "learning_rate": 2.448027151120314e-05, "loss": 1.9938, "step": 9501 }, { "epoch": 0.30657113877288467, "grad_norm": 0.384765625, "learning_rate": 2.4479051888190122e-05, "loss": 2.0153, "step": 9502 }, { "epoch": 0.306603402626681, "grad_norm": 0.37109375, "learning_rate": 2.4477832160838504e-05, "loss": 1.9906, "step": 9503 }, { "epoch": 0.30663566648047735, "grad_norm": 0.396484375, "learning_rate": 2.4476612329161708e-05, "loss": 1.9833, "step": 9504 }, { "epoch": 0.3066679303342737, "grad_norm": 0.380859375, "learning_rate": 2.447539239317316e-05, "loss": 1.9292, "step": 9505 }, { "epoch": 0.30670019418807004, "grad_norm": 0.396484375, "learning_rate": 2.4474172352886293e-05, "loss": 1.9655, "step": 9506 }, { "epoch": 0.3067324580418664, "grad_norm": 0.404296875, "learning_rate": 2.4472952208314533e-05, "loss": 1.9961, "step": 9507 }, { "epoch": 0.30676472189566273, "grad_norm": 0.6015625, "learning_rate": 2.4471731959471312e-05, "loss": 2.0255, "step": 9508 }, { "epoch": 0.3067969857494591, "grad_norm": 0.58984375, "learning_rate": 2.447051160637006e-05, "loss": 2.0604, "step": 9509 }, { "epoch": 0.3068292496032554, "grad_norm": 0.53515625, "learning_rate": 2.4469291149024207e-05, "loss": 2.0575, "step": 9510 }, { "epoch": 0.30686151345705176, "grad_norm": 0.478515625, "learning_rate": 2.446807058744719e-05, "loss": 2.0287, "step": 9511 }, { "epoch": 0.3068937773108481, "grad_norm": 0.486328125, "learning_rate": 2.446684992165245e-05, "loss": 2.0789, "step": 9512 }, { "epoch": 0.30692604116464445, "grad_norm": 0.439453125, "learning_rate": 2.4465629151653417e-05, "loss": 2.0125, "step": 9513 }, { "epoch": 0.3069583050184408, "grad_norm": 0.482421875, "learning_rate": 2.4464408277463522e-05, "loss": 2.054, "step": 9514 }, { "epoch": 0.30699056887223714, "grad_norm": 0.40234375, "learning_rate": 2.446318729909622e-05, "loss": 2.0036, "step": 9515 }, { "epoch": 0.3070228327260335, "grad_norm": 0.44140625, "learning_rate": 2.4461966216564936e-05, "loss": 2.0738, "step": 9516 }, { "epoch": 0.3070550965798298, "grad_norm": 0.388671875, "learning_rate": 2.446074502988311e-05, "loss": 2.0632, "step": 9517 }, { "epoch": 0.3070873604336262, "grad_norm": 0.392578125, "learning_rate": 2.44595237390642e-05, "loss": 2.0421, "step": 9518 }, { "epoch": 0.30711962428742257, "grad_norm": 0.40234375, "learning_rate": 2.4458302344121637e-05, "loss": 2.0499, "step": 9519 }, { "epoch": 0.3071518881412189, "grad_norm": 0.412109375, "learning_rate": 2.445708084506886e-05, "loss": 2.0491, "step": 9520 }, { "epoch": 0.30718415199501525, "grad_norm": 0.3828125, "learning_rate": 2.4455859241919327e-05, "loss": 2.0521, "step": 9521 }, { "epoch": 0.3072164158488116, "grad_norm": 0.390625, "learning_rate": 2.4454637534686477e-05, "loss": 2.0484, "step": 9522 }, { "epoch": 0.30724867970260794, "grad_norm": 0.380859375, "learning_rate": 2.4453415723383762e-05, "loss": 2.0466, "step": 9523 }, { "epoch": 0.3072809435564043, "grad_norm": 0.51953125, "learning_rate": 2.4452193808024627e-05, "loss": 2.0664, "step": 9524 }, { "epoch": 0.30731320741020063, "grad_norm": 0.37890625, "learning_rate": 2.4450971788622525e-05, "loss": 2.0344, "step": 9525 }, { "epoch": 0.30734547126399697, "grad_norm": 0.416015625, "learning_rate": 2.4449749665190903e-05, "loss": 2.0546, "step": 9526 }, { "epoch": 0.3073777351177933, "grad_norm": 0.388671875, "learning_rate": 2.444852743774322e-05, "loss": 2.0261, "step": 9527 }, { "epoch": 0.30740999897158966, "grad_norm": 0.392578125, "learning_rate": 2.4447305106292924e-05, "loss": 2.0336, "step": 9528 }, { "epoch": 0.307442262825386, "grad_norm": 0.373046875, "learning_rate": 2.444608267085347e-05, "loss": 2.0099, "step": 9529 }, { "epoch": 0.30747452667918235, "grad_norm": 0.384765625, "learning_rate": 2.4444860131438315e-05, "loss": 2.0404, "step": 9530 }, { "epoch": 0.3075067905329787, "grad_norm": 0.369140625, "learning_rate": 2.444363748806091e-05, "loss": 2.0359, "step": 9531 }, { "epoch": 0.30753905438677503, "grad_norm": 0.3671875, "learning_rate": 2.4442414740734727e-05, "loss": 2.0064, "step": 9532 }, { "epoch": 0.3075713182405714, "grad_norm": 0.7734375, "learning_rate": 2.4441191889473216e-05, "loss": 2.1272, "step": 9533 }, { "epoch": 0.3076035820943677, "grad_norm": 1.7265625, "learning_rate": 2.443996893428983e-05, "loss": 2.2925, "step": 9534 }, { "epoch": 0.30763584594816407, "grad_norm": 1.5390625, "learning_rate": 2.4438745875198046e-05, "loss": 2.2999, "step": 9535 }, { "epoch": 0.3076681098019604, "grad_norm": 0.71875, "learning_rate": 2.4437522712211314e-05, "loss": 2.1844, "step": 9536 }, { "epoch": 0.30770037365575675, "grad_norm": 0.6796875, "learning_rate": 2.4436299445343102e-05, "loss": 1.9862, "step": 9537 }, { "epoch": 0.30773263750955315, "grad_norm": 0.71484375, "learning_rate": 2.4435076074606882e-05, "loss": 1.9854, "step": 9538 }, { "epoch": 0.3077649013633495, "grad_norm": 0.51171875, "learning_rate": 2.4433852600016102e-05, "loss": 1.9911, "step": 9539 }, { "epoch": 0.30779716521714584, "grad_norm": 0.63671875, "learning_rate": 2.4432629021584246e-05, "loss": 2.0058, "step": 9540 }, { "epoch": 0.3078294290709422, "grad_norm": 0.51171875, "learning_rate": 2.4431405339324778e-05, "loss": 1.9968, "step": 9541 }, { "epoch": 0.30786169292473853, "grad_norm": 0.52734375, "learning_rate": 2.4430181553251167e-05, "loss": 2.0047, "step": 9542 }, { "epoch": 0.30789395677853487, "grad_norm": 0.5078125, "learning_rate": 2.442895766337688e-05, "loss": 2.0215, "step": 9543 }, { "epoch": 0.3079262206323312, "grad_norm": 0.451171875, "learning_rate": 2.4427733669715396e-05, "loss": 1.9989, "step": 9544 }, { "epoch": 0.30795848448612756, "grad_norm": 0.4765625, "learning_rate": 2.442650957228018e-05, "loss": 2.0174, "step": 9545 }, { "epoch": 0.3079907483399239, "grad_norm": 0.439453125, "learning_rate": 2.4425285371084708e-05, "loss": 1.9704, "step": 9546 }, { "epoch": 0.30802301219372025, "grad_norm": 0.47265625, "learning_rate": 2.4424061066142457e-05, "loss": 1.9602, "step": 9547 }, { "epoch": 0.3080552760475166, "grad_norm": 0.400390625, "learning_rate": 2.4422836657466904e-05, "loss": 1.9844, "step": 9548 }, { "epoch": 0.30808753990131293, "grad_norm": 0.57421875, "learning_rate": 2.4421612145071522e-05, "loss": 2.1244, "step": 9549 }, { "epoch": 0.3081198037551093, "grad_norm": 0.609375, "learning_rate": 2.4420387528969797e-05, "loss": 2.3374, "step": 9550 }, { "epoch": 0.3081520676089056, "grad_norm": 1.2578125, "learning_rate": 2.4419162809175197e-05, "loss": 2.3259, "step": 9551 }, { "epoch": 0.30818433146270197, "grad_norm": 1.0546875, "learning_rate": 2.441793798570122e-05, "loss": 2.2501, "step": 9552 }, { "epoch": 0.3082165953164983, "grad_norm": 2.390625, "learning_rate": 2.4416713058561332e-05, "loss": 2.3135, "step": 9553 }, { "epoch": 0.30824885917029465, "grad_norm": 1.5625, "learning_rate": 2.441548802776902e-05, "loss": 2.3449, "step": 9554 }, { "epoch": 0.308281123024091, "grad_norm": 1.8984375, "learning_rate": 2.4414262893337776e-05, "loss": 2.3563, "step": 9555 }, { "epoch": 0.30831338687788734, "grad_norm": 1.390625, "learning_rate": 2.4413037655281076e-05, "loss": 2.311, "step": 9556 }, { "epoch": 0.3083456507316837, "grad_norm": 1.3125, "learning_rate": 2.4411812313612412e-05, "loss": 2.3345, "step": 9557 }, { "epoch": 0.30837791458548003, "grad_norm": 1.2734375, "learning_rate": 2.441058686834527e-05, "loss": 2.3291, "step": 9558 }, { "epoch": 0.30841017843927643, "grad_norm": 0.8203125, "learning_rate": 2.4409361319493134e-05, "loss": 2.3434, "step": 9559 }, { "epoch": 0.30844244229307277, "grad_norm": 0.96875, "learning_rate": 2.4408135667069505e-05, "loss": 2.3151, "step": 9560 }, { "epoch": 0.3084747061468691, "grad_norm": 0.76953125, "learning_rate": 2.4406909911087865e-05, "loss": 2.345, "step": 9561 }, { "epoch": 0.30850697000066546, "grad_norm": 0.81640625, "learning_rate": 2.4405684051561713e-05, "loss": 2.2845, "step": 9562 }, { "epoch": 0.3085392338544618, "grad_norm": 0.67578125, "learning_rate": 2.4404458088504536e-05, "loss": 2.3202, "step": 9563 }, { "epoch": 0.30857149770825815, "grad_norm": 0.6953125, "learning_rate": 2.4403232021929833e-05, "loss": 2.3171, "step": 9564 }, { "epoch": 0.3086037615620545, "grad_norm": 0.6796875, "learning_rate": 2.4402005851851096e-05, "loss": 2.3513, "step": 9565 }, { "epoch": 0.30863602541585083, "grad_norm": 0.5390625, "learning_rate": 2.440077957828182e-05, "loss": 2.3462, "step": 9566 }, { "epoch": 0.3086682892696472, "grad_norm": 0.5546875, "learning_rate": 2.439955320123551e-05, "loss": 2.3248, "step": 9567 }, { "epoch": 0.3087005531234435, "grad_norm": 0.578125, "learning_rate": 2.4398326720725662e-05, "loss": 2.2916, "step": 9568 }, { "epoch": 0.30873281697723987, "grad_norm": 0.5078125, "learning_rate": 2.439710013676578e-05, "loss": 2.3257, "step": 9569 }, { "epoch": 0.3087650808310362, "grad_norm": 0.486328125, "learning_rate": 2.439587344936935e-05, "loss": 2.3473, "step": 9570 }, { "epoch": 0.30879734468483255, "grad_norm": 0.486328125, "learning_rate": 2.4394646658549893e-05, "loss": 2.3126, "step": 9571 }, { "epoch": 0.3088296085386289, "grad_norm": 0.48046875, "learning_rate": 2.4393419764320903e-05, "loss": 2.3017, "step": 9572 }, { "epoch": 0.30886187239242524, "grad_norm": 0.478515625, "learning_rate": 2.4392192766695892e-05, "loss": 2.3215, "step": 9573 }, { "epoch": 0.3088941362462216, "grad_norm": 0.61328125, "learning_rate": 2.4390965665688355e-05, "loss": 2.3057, "step": 9574 }, { "epoch": 0.30892640010001793, "grad_norm": 0.609375, "learning_rate": 2.438973846131181e-05, "loss": 2.3153, "step": 9575 }, { "epoch": 0.30895866395381427, "grad_norm": 0.431640625, "learning_rate": 2.4388511153579757e-05, "loss": 2.3006, "step": 9576 }, { "epoch": 0.3089909278076106, "grad_norm": 0.5, "learning_rate": 2.438728374250571e-05, "loss": 2.3356, "step": 9577 }, { "epoch": 0.30902319166140696, "grad_norm": 0.435546875, "learning_rate": 2.4386056228103174e-05, "loss": 2.3103, "step": 9578 }, { "epoch": 0.30905545551520336, "grad_norm": 0.400390625, "learning_rate": 2.4384828610385664e-05, "loss": 2.3115, "step": 9579 }, { "epoch": 0.3090877193689997, "grad_norm": 0.41796875, "learning_rate": 2.43836008893667e-05, "loss": 2.3063, "step": 9580 }, { "epoch": 0.30911998322279605, "grad_norm": 0.419921875, "learning_rate": 2.438237306505979e-05, "loss": 2.3533, "step": 9581 }, { "epoch": 0.3091522470765924, "grad_norm": 0.423828125, "learning_rate": 2.438114513747844e-05, "loss": 2.3026, "step": 9582 }, { "epoch": 0.30918451093038873, "grad_norm": 0.40234375, "learning_rate": 2.437991710663618e-05, "loss": 2.31, "step": 9583 }, { "epoch": 0.3092167747841851, "grad_norm": 0.396484375, "learning_rate": 2.437868897254652e-05, "loss": 2.3413, "step": 9584 }, { "epoch": 0.3092490386379814, "grad_norm": 0.3828125, "learning_rate": 2.437746073522298e-05, "loss": 2.3455, "step": 9585 }, { "epoch": 0.30928130249177777, "grad_norm": 0.380859375, "learning_rate": 2.437623239467908e-05, "loss": 2.3322, "step": 9586 }, { "epoch": 0.3093135663455741, "grad_norm": 0.4453125, "learning_rate": 2.437500395092834e-05, "loss": 2.2952, "step": 9587 }, { "epoch": 0.30934583019937045, "grad_norm": 1.078125, "learning_rate": 2.4373775403984282e-05, "loss": 2.2433, "step": 9588 }, { "epoch": 0.3093780940531668, "grad_norm": 1.0859375, "learning_rate": 2.4372546753860428e-05, "loss": 2.3057, "step": 9589 }, { "epoch": 0.30941035790696314, "grad_norm": 0.8046875, "learning_rate": 2.4371318000570305e-05, "loss": 2.2988, "step": 9590 }, { "epoch": 0.3094426217607595, "grad_norm": 0.953125, "learning_rate": 2.4370089144127436e-05, "loss": 2.2904, "step": 9591 }, { "epoch": 0.30947488561455583, "grad_norm": 0.703125, "learning_rate": 2.436886018454535e-05, "loss": 2.3099, "step": 9592 }, { "epoch": 0.30950714946835217, "grad_norm": 0.70703125, "learning_rate": 2.4367631121837566e-05, "loss": 2.3161, "step": 9593 }, { "epoch": 0.3095394133221485, "grad_norm": 0.68359375, "learning_rate": 2.4366401956017624e-05, "loss": 2.2996, "step": 9594 }, { "epoch": 0.30957167717594486, "grad_norm": 0.609375, "learning_rate": 2.4365172687099048e-05, "loss": 2.3303, "step": 9595 }, { "epoch": 0.3096039410297412, "grad_norm": 0.609375, "learning_rate": 2.436394331509537e-05, "loss": 2.2743, "step": 9596 }, { "epoch": 0.30963620488353755, "grad_norm": 0.59765625, "learning_rate": 2.436271384002012e-05, "loss": 2.2853, "step": 9597 }, { "epoch": 0.3096684687373339, "grad_norm": 0.5703125, "learning_rate": 2.4361484261886832e-05, "loss": 2.3004, "step": 9598 }, { "epoch": 0.3097007325911303, "grad_norm": 0.50390625, "learning_rate": 2.4360254580709043e-05, "loss": 2.3096, "step": 9599 }, { "epoch": 0.30973299644492663, "grad_norm": 0.56640625, "learning_rate": 2.435902479650029e-05, "loss": 2.1119, "step": 9600 }, { "epoch": 0.309765260298723, "grad_norm": 0.484375, "learning_rate": 2.43577949092741e-05, "loss": 1.9928, "step": 9601 }, { "epoch": 0.3097975241525193, "grad_norm": 0.484375, "learning_rate": 2.435656491904402e-05, "loss": 1.9994, "step": 9602 }, { "epoch": 0.30982978800631567, "grad_norm": 0.462890625, "learning_rate": 2.4355334825823585e-05, "loss": 2.0197, "step": 9603 }, { "epoch": 0.309862051860112, "grad_norm": 0.478515625, "learning_rate": 2.4354104629626334e-05, "loss": 2.0063, "step": 9604 }, { "epoch": 0.30989431571390835, "grad_norm": 0.41796875, "learning_rate": 2.4352874330465812e-05, "loss": 2.005, "step": 9605 }, { "epoch": 0.3099265795677047, "grad_norm": 0.40625, "learning_rate": 2.435164392835556e-05, "loss": 1.983, "step": 9606 }, { "epoch": 0.30995884342150104, "grad_norm": 0.44140625, "learning_rate": 2.4350413423309115e-05, "loss": 2.0096, "step": 9607 }, { "epoch": 0.3099911072752974, "grad_norm": 0.39453125, "learning_rate": 2.4349182815340035e-05, "loss": 1.999, "step": 9608 }, { "epoch": 0.3100233711290937, "grad_norm": 0.388671875, "learning_rate": 2.4347952104461856e-05, "loss": 2.0022, "step": 9609 }, { "epoch": 0.31005563498289007, "grad_norm": 0.40625, "learning_rate": 2.434672129068812e-05, "loss": 1.9869, "step": 9610 }, { "epoch": 0.3100878988366864, "grad_norm": 0.3828125, "learning_rate": 2.4345490374032387e-05, "loss": 1.9913, "step": 9611 }, { "epoch": 0.31012016269048276, "grad_norm": 0.376953125, "learning_rate": 2.43442593545082e-05, "loss": 1.9953, "step": 9612 }, { "epoch": 0.3101524265442791, "grad_norm": 0.373046875, "learning_rate": 2.434302823212911e-05, "loss": 1.9983, "step": 9613 }, { "epoch": 0.31018469039807545, "grad_norm": 0.357421875, "learning_rate": 2.4341797006908665e-05, "loss": 2.0188, "step": 9614 }, { "epoch": 0.3102169542518718, "grad_norm": 0.36328125, "learning_rate": 2.434056567886042e-05, "loss": 2.0145, "step": 9615 }, { "epoch": 0.31024921810566813, "grad_norm": 0.384765625, "learning_rate": 2.433933424799793e-05, "loss": 1.9795, "step": 9616 }, { "epoch": 0.3102814819594645, "grad_norm": 0.353515625, "learning_rate": 2.4338102714334752e-05, "loss": 2.0047, "step": 9617 }, { "epoch": 0.3103137458132608, "grad_norm": 0.376953125, "learning_rate": 2.4336871077884434e-05, "loss": 2.0098, "step": 9618 }, { "epoch": 0.3103460096670572, "grad_norm": 0.353515625, "learning_rate": 2.4335639338660538e-05, "loss": 2.0209, "step": 9619 }, { "epoch": 0.31037827352085356, "grad_norm": 0.36328125, "learning_rate": 2.4334407496676628e-05, "loss": 2.0114, "step": 9620 }, { "epoch": 0.3104105373746499, "grad_norm": 0.361328125, "learning_rate": 2.433317555194625e-05, "loss": 1.9871, "step": 9621 }, { "epoch": 0.31044280122844625, "grad_norm": 0.390625, "learning_rate": 2.433194350448297e-05, "loss": 1.9862, "step": 9622 }, { "epoch": 0.3104750650822426, "grad_norm": 0.353515625, "learning_rate": 2.4330711354300357e-05, "loss": 1.9739, "step": 9623 }, { "epoch": 0.31050732893603894, "grad_norm": 0.37109375, "learning_rate": 2.432947910141196e-05, "loss": 2.0013, "step": 9624 }, { "epoch": 0.3105395927898353, "grad_norm": 0.375, "learning_rate": 2.4328246745831353e-05, "loss": 1.9633, "step": 9625 }, { "epoch": 0.3105718566436316, "grad_norm": 0.3515625, "learning_rate": 2.4327014287572098e-05, "loss": 2.006, "step": 9626 }, { "epoch": 0.31060412049742797, "grad_norm": 0.365234375, "learning_rate": 2.4325781726647764e-05, "loss": 1.991, "step": 9627 }, { "epoch": 0.3106363843512243, "grad_norm": 0.345703125, "learning_rate": 2.432454906307191e-05, "loss": 2.0001, "step": 9628 }, { "epoch": 0.31066864820502066, "grad_norm": 0.37109375, "learning_rate": 2.432331629685811e-05, "loss": 2.0018, "step": 9629 }, { "epoch": 0.310700912058817, "grad_norm": 0.376953125, "learning_rate": 2.432208342801994e-05, "loss": 2.013, "step": 9630 }, { "epoch": 0.31073317591261335, "grad_norm": 0.376953125, "learning_rate": 2.4320850456570955e-05, "loss": 1.9887, "step": 9631 }, { "epoch": 0.3107654397664097, "grad_norm": 0.34375, "learning_rate": 2.4319617382524738e-05, "loss": 1.993, "step": 9632 }, { "epoch": 0.31079770362020603, "grad_norm": 0.388671875, "learning_rate": 2.4318384205894855e-05, "loss": 1.9968, "step": 9633 }, { "epoch": 0.3108299674740024, "grad_norm": 0.36328125, "learning_rate": 2.431715092669489e-05, "loss": 1.9584, "step": 9634 }, { "epoch": 0.3108622313277987, "grad_norm": 0.369140625, "learning_rate": 2.4315917544938408e-05, "loss": 1.9664, "step": 9635 }, { "epoch": 0.31089449518159507, "grad_norm": 0.365234375, "learning_rate": 2.4314684060638986e-05, "loss": 2.0091, "step": 9636 }, { "epoch": 0.3109267590353914, "grad_norm": 0.359375, "learning_rate": 2.431345047381021e-05, "loss": 2.0033, "step": 9637 }, { "epoch": 0.31095902288918775, "grad_norm": 0.34765625, "learning_rate": 2.431221678446565e-05, "loss": 1.9994, "step": 9638 }, { "epoch": 0.3109912867429841, "grad_norm": 0.359375, "learning_rate": 2.4310982992618892e-05, "loss": 1.9996, "step": 9639 }, { "epoch": 0.3110235505967805, "grad_norm": 0.361328125, "learning_rate": 2.4309749098283506e-05, "loss": 1.9864, "step": 9640 }, { "epoch": 0.31105581445057684, "grad_norm": 0.359375, "learning_rate": 2.4308515101473084e-05, "loss": 1.974, "step": 9641 }, { "epoch": 0.3110880783043732, "grad_norm": 0.361328125, "learning_rate": 2.4307281002201206e-05, "loss": 1.9811, "step": 9642 }, { "epoch": 0.3111203421581695, "grad_norm": 0.37890625, "learning_rate": 2.4306046800481456e-05, "loss": 1.985, "step": 9643 }, { "epoch": 0.31115260601196587, "grad_norm": 0.35546875, "learning_rate": 2.4304812496327417e-05, "loss": 2.0047, "step": 9644 }, { "epoch": 0.3111848698657622, "grad_norm": 0.37109375, "learning_rate": 2.430357808975268e-05, "loss": 2.004, "step": 9645 }, { "epoch": 0.31121713371955856, "grad_norm": 0.34375, "learning_rate": 2.430234358077083e-05, "loss": 1.9868, "step": 9646 }, { "epoch": 0.3112493975733549, "grad_norm": 0.375, "learning_rate": 2.4301108969395447e-05, "loss": 1.9658, "step": 9647 }, { "epoch": 0.31128166142715125, "grad_norm": 0.359375, "learning_rate": 2.4299874255640137e-05, "loss": 1.991, "step": 9648 }, { "epoch": 0.3113139252809476, "grad_norm": 0.3984375, "learning_rate": 2.4298639439518483e-05, "loss": 1.9943, "step": 9649 }, { "epoch": 0.31134618913474393, "grad_norm": 0.375, "learning_rate": 2.4297404521044077e-05, "loss": 2.0053, "step": 9650 }, { "epoch": 0.3113784529885403, "grad_norm": 0.365234375, "learning_rate": 2.4296169500230514e-05, "loss": 1.9743, "step": 9651 }, { "epoch": 0.3114107168423366, "grad_norm": 0.361328125, "learning_rate": 2.4294934377091384e-05, "loss": 1.9801, "step": 9652 }, { "epoch": 0.31144298069613296, "grad_norm": 0.384765625, "learning_rate": 2.4293699151640278e-05, "loss": 1.9936, "step": 9653 }, { "epoch": 0.3114752445499293, "grad_norm": 0.390625, "learning_rate": 2.4292463823890805e-05, "loss": 1.9767, "step": 9654 }, { "epoch": 0.31150750840372565, "grad_norm": 0.359375, "learning_rate": 2.4291228393856553e-05, "loss": 1.9854, "step": 9655 }, { "epoch": 0.311539772257522, "grad_norm": 0.361328125, "learning_rate": 2.428999286155113e-05, "loss": 1.987, "step": 9656 }, { "epoch": 0.31157203611131834, "grad_norm": 0.373046875, "learning_rate": 2.428875722698813e-05, "loss": 1.9893, "step": 9657 }, { "epoch": 0.3116042999651147, "grad_norm": 0.375, "learning_rate": 2.428752149018115e-05, "loss": 2.0291, "step": 9658 }, { "epoch": 0.311636563818911, "grad_norm": 0.37109375, "learning_rate": 2.4286285651143795e-05, "loss": 1.9842, "step": 9659 }, { "epoch": 0.3116688276727074, "grad_norm": 0.361328125, "learning_rate": 2.4285049709889676e-05, "loss": 2.0014, "step": 9660 }, { "epoch": 0.31170109152650377, "grad_norm": 0.37890625, "learning_rate": 2.4283813666432385e-05, "loss": 2.0119, "step": 9661 }, { "epoch": 0.3117333553803001, "grad_norm": 0.361328125, "learning_rate": 2.4282577520785533e-05, "loss": 1.9927, "step": 9662 }, { "epoch": 0.31176561923409646, "grad_norm": 0.365234375, "learning_rate": 2.4281341272962733e-05, "loss": 2.0089, "step": 9663 }, { "epoch": 0.3117978830878928, "grad_norm": 0.373046875, "learning_rate": 2.428010492297758e-05, "loss": 1.9858, "step": 9664 }, { "epoch": 0.31183014694168915, "grad_norm": 0.357421875, "learning_rate": 2.4278868470843696e-05, "loss": 1.9978, "step": 9665 }, { "epoch": 0.3118624107954855, "grad_norm": 0.357421875, "learning_rate": 2.4277631916574682e-05, "loss": 1.9817, "step": 9666 }, { "epoch": 0.31189467464928183, "grad_norm": 0.375, "learning_rate": 2.4276395260184147e-05, "loss": 1.9844, "step": 9667 }, { "epoch": 0.3119269385030782, "grad_norm": 0.37890625, "learning_rate": 2.4275158501685715e-05, "loss": 1.9733, "step": 9668 }, { "epoch": 0.3119592023568745, "grad_norm": 0.361328125, "learning_rate": 2.427392164109299e-05, "loss": 1.9652, "step": 9669 }, { "epoch": 0.31199146621067086, "grad_norm": 0.390625, "learning_rate": 2.4272684678419586e-05, "loss": 2.0152, "step": 9670 }, { "epoch": 0.3120237300644672, "grad_norm": 0.41796875, "learning_rate": 2.4271447613679122e-05, "loss": 2.0148, "step": 9671 }, { "epoch": 0.31205599391826355, "grad_norm": 0.400390625, "learning_rate": 2.4270210446885212e-05, "loss": 1.9723, "step": 9672 }, { "epoch": 0.3120882577720599, "grad_norm": 0.416015625, "learning_rate": 2.426897317805148e-05, "loss": 2.0156, "step": 9673 }, { "epoch": 0.31212052162585624, "grad_norm": 0.353515625, "learning_rate": 2.4267735807191544e-05, "loss": 1.9747, "step": 9674 }, { "epoch": 0.3121527854796526, "grad_norm": 0.416015625, "learning_rate": 2.426649833431901e-05, "loss": 1.9765, "step": 9675 }, { "epoch": 0.3121850493334489, "grad_norm": 0.369140625, "learning_rate": 2.4265260759447518e-05, "loss": 2.0166, "step": 9676 }, { "epoch": 0.31221731318724527, "grad_norm": 0.40234375, "learning_rate": 2.426402308259068e-05, "loss": 2.0085, "step": 9677 }, { "epoch": 0.3122495770410416, "grad_norm": 0.373046875, "learning_rate": 2.4262785303762122e-05, "loss": 1.9642, "step": 9678 }, { "epoch": 0.31228184089483796, "grad_norm": 0.3984375, "learning_rate": 2.426154742297547e-05, "loss": 1.9838, "step": 9679 }, { "epoch": 0.31231410474863436, "grad_norm": 0.392578125, "learning_rate": 2.4260309440244348e-05, "loss": 1.9521, "step": 9680 }, { "epoch": 0.3123463686024307, "grad_norm": 0.35546875, "learning_rate": 2.425907135558238e-05, "loss": 1.9816, "step": 9681 }, { "epoch": 0.31237863245622705, "grad_norm": 0.400390625, "learning_rate": 2.4257833169003202e-05, "loss": 2.0086, "step": 9682 }, { "epoch": 0.3124108963100234, "grad_norm": 0.48046875, "learning_rate": 2.4256594880520437e-05, "loss": 1.9989, "step": 9683 }, { "epoch": 0.31244316016381973, "grad_norm": 0.41015625, "learning_rate": 2.4255356490147712e-05, "loss": 1.9952, "step": 9684 }, { "epoch": 0.3124754240176161, "grad_norm": 0.359375, "learning_rate": 2.4254117997898665e-05, "loss": 1.9838, "step": 9685 }, { "epoch": 0.3125076878714124, "grad_norm": 0.423828125, "learning_rate": 2.4252879403786924e-05, "loss": 1.9783, "step": 9686 }, { "epoch": 0.31253995172520876, "grad_norm": 0.375, "learning_rate": 2.4251640707826128e-05, "loss": 1.9884, "step": 9687 }, { "epoch": 0.3125722155790051, "grad_norm": 0.447265625, "learning_rate": 2.4250401910029905e-05, "loss": 1.9888, "step": 9688 }, { "epoch": 0.31260447943280145, "grad_norm": 0.447265625, "learning_rate": 2.4249163010411895e-05, "loss": 1.9863, "step": 9689 }, { "epoch": 0.3126367432865978, "grad_norm": 0.427734375, "learning_rate": 2.4247924008985733e-05, "loss": 1.9701, "step": 9690 }, { "epoch": 0.31266900714039414, "grad_norm": 0.38671875, "learning_rate": 2.4246684905765066e-05, "loss": 1.9819, "step": 9691 }, { "epoch": 0.3127012709941905, "grad_norm": 0.42578125, "learning_rate": 2.4245445700763513e-05, "loss": 1.9769, "step": 9692 }, { "epoch": 0.3127335348479868, "grad_norm": 0.41015625, "learning_rate": 2.4244206393994733e-05, "loss": 1.9884, "step": 9693 }, { "epoch": 0.31276579870178317, "grad_norm": 0.376953125, "learning_rate": 2.4242966985472363e-05, "loss": 1.9927, "step": 9694 }, { "epoch": 0.3127980625555795, "grad_norm": 0.43359375, "learning_rate": 2.4241727475210036e-05, "loss": 1.9768, "step": 9695 }, { "epoch": 0.31283032640937586, "grad_norm": 0.376953125, "learning_rate": 2.424048786322141e-05, "loss": 1.9655, "step": 9696 }, { "epoch": 0.3128625902631722, "grad_norm": 0.3671875, "learning_rate": 2.423924814952012e-05, "loss": 1.966, "step": 9697 }, { "epoch": 0.31289485411696855, "grad_norm": 0.369140625, "learning_rate": 2.423800833411981e-05, "loss": 1.9995, "step": 9698 }, { "epoch": 0.3129271179707649, "grad_norm": 0.359375, "learning_rate": 2.4236768417034138e-05, "loss": 2.0195, "step": 9699 }, { "epoch": 0.3129593818245613, "grad_norm": 0.359375, "learning_rate": 2.4235528398276744e-05, "loss": 2.0474, "step": 9700 }, { "epoch": 0.31299164567835763, "grad_norm": 0.369140625, "learning_rate": 2.4234288277861277e-05, "loss": 1.9942, "step": 9701 }, { "epoch": 0.313023909532154, "grad_norm": 0.369140625, "learning_rate": 2.423304805580139e-05, "loss": 2.007, "step": 9702 }, { "epoch": 0.3130561733859503, "grad_norm": 0.369140625, "learning_rate": 2.4231807732110735e-05, "loss": 1.9244, "step": 9703 }, { "epoch": 0.31308843723974666, "grad_norm": 0.38671875, "learning_rate": 2.4230567306802965e-05, "loss": 1.9825, "step": 9704 }, { "epoch": 0.313120701093543, "grad_norm": 0.376953125, "learning_rate": 2.422932677989173e-05, "loss": 1.9677, "step": 9705 }, { "epoch": 0.31315296494733935, "grad_norm": 0.380859375, "learning_rate": 2.4228086151390684e-05, "loss": 1.9754, "step": 9706 }, { "epoch": 0.3131852288011357, "grad_norm": 0.359375, "learning_rate": 2.422684542131349e-05, "loss": 2.0003, "step": 9707 }, { "epoch": 0.31321749265493204, "grad_norm": 0.375, "learning_rate": 2.42256045896738e-05, "loss": 1.984, "step": 9708 }, { "epoch": 0.3132497565087284, "grad_norm": 0.373046875, "learning_rate": 2.422436365648527e-05, "loss": 1.9804, "step": 9709 }, { "epoch": 0.3132820203625247, "grad_norm": 0.357421875, "learning_rate": 2.422312262176157e-05, "loss": 2.0197, "step": 9710 }, { "epoch": 0.31331428421632107, "grad_norm": 0.375, "learning_rate": 2.4221881485516345e-05, "loss": 1.9836, "step": 9711 }, { "epoch": 0.3133465480701174, "grad_norm": 0.353515625, "learning_rate": 2.4220640247763265e-05, "loss": 1.9791, "step": 9712 }, { "epoch": 0.31337881192391376, "grad_norm": 0.384765625, "learning_rate": 2.4219398908515998e-05, "loss": 1.9719, "step": 9713 }, { "epoch": 0.3134110757777101, "grad_norm": 0.380859375, "learning_rate": 2.4218157467788196e-05, "loss": 2.0199, "step": 9714 }, { "epoch": 0.31344333963150645, "grad_norm": 0.421875, "learning_rate": 2.421691592559353e-05, "loss": 1.9928, "step": 9715 }, { "epoch": 0.3134756034853028, "grad_norm": 0.353515625, "learning_rate": 2.421567428194567e-05, "loss": 1.9766, "step": 9716 }, { "epoch": 0.31350786733909913, "grad_norm": 0.380859375, "learning_rate": 2.4214432536858275e-05, "loss": 2.0061, "step": 9717 }, { "epoch": 0.3135401311928955, "grad_norm": 0.37109375, "learning_rate": 2.4213190690345018e-05, "loss": 2.0209, "step": 9718 }, { "epoch": 0.3135723950466918, "grad_norm": 0.369140625, "learning_rate": 2.421194874241957e-05, "loss": 2.0141, "step": 9719 }, { "epoch": 0.3136046589004882, "grad_norm": 0.39453125, "learning_rate": 2.4210706693095593e-05, "loss": 2.0091, "step": 9720 }, { "epoch": 0.31363692275428456, "grad_norm": 0.39453125, "learning_rate": 2.420946454238677e-05, "loss": 1.9903, "step": 9721 }, { "epoch": 0.3136691866080809, "grad_norm": 0.43359375, "learning_rate": 2.4208222290306766e-05, "loss": 1.9847, "step": 9722 }, { "epoch": 0.31370145046187725, "grad_norm": 0.3984375, "learning_rate": 2.4206979936869255e-05, "loss": 1.9851, "step": 9723 }, { "epoch": 0.3137337143156736, "grad_norm": 0.345703125, "learning_rate": 2.4205737482087916e-05, "loss": 2.0097, "step": 9724 }, { "epoch": 0.31376597816946994, "grad_norm": 0.390625, "learning_rate": 2.4204494925976423e-05, "loss": 1.9988, "step": 9725 }, { "epoch": 0.3137982420232663, "grad_norm": 0.396484375, "learning_rate": 2.4203252268548453e-05, "loss": 1.9909, "step": 9726 }, { "epoch": 0.3138305058770626, "grad_norm": 0.36328125, "learning_rate": 2.4202009509817687e-05, "loss": 1.9877, "step": 9727 }, { "epoch": 0.31386276973085897, "grad_norm": 0.375, "learning_rate": 2.42007666497978e-05, "loss": 1.9796, "step": 9728 }, { "epoch": 0.3138950335846553, "grad_norm": 0.3984375, "learning_rate": 2.4199523688502468e-05, "loss": 1.9606, "step": 9729 }, { "epoch": 0.31392729743845166, "grad_norm": 0.388671875, "learning_rate": 2.4198280625945388e-05, "loss": 1.998, "step": 9730 }, { "epoch": 0.313959561292248, "grad_norm": 0.396484375, "learning_rate": 2.419703746214023e-05, "loss": 2.0123, "step": 9731 }, { "epoch": 0.31399182514604435, "grad_norm": 0.384765625, "learning_rate": 2.419579419710068e-05, "loss": 2.0149, "step": 9732 }, { "epoch": 0.3140240889998407, "grad_norm": 0.376953125, "learning_rate": 2.419455083084043e-05, "loss": 2.0183, "step": 9733 }, { "epoch": 0.31405635285363703, "grad_norm": 0.416015625, "learning_rate": 2.4193307363373153e-05, "loss": 1.9893, "step": 9734 }, { "epoch": 0.3140886167074334, "grad_norm": 0.388671875, "learning_rate": 2.419206379471255e-05, "loss": 1.9866, "step": 9735 }, { "epoch": 0.3141208805612297, "grad_norm": 0.396484375, "learning_rate": 2.41908201248723e-05, "loss": 1.9836, "step": 9736 }, { "epoch": 0.31415314441502606, "grad_norm": 0.392578125, "learning_rate": 2.4189576353866093e-05, "loss": 1.9822, "step": 9737 }, { "epoch": 0.3141854082688224, "grad_norm": 0.41015625, "learning_rate": 2.4188332481707628e-05, "loss": 1.9815, "step": 9738 }, { "epoch": 0.31421767212261875, "grad_norm": 0.439453125, "learning_rate": 2.418708850841059e-05, "loss": 1.9846, "step": 9739 }, { "epoch": 0.3142499359764151, "grad_norm": 0.392578125, "learning_rate": 2.418584443398867e-05, "loss": 1.9986, "step": 9740 }, { "epoch": 0.3142821998302115, "grad_norm": 0.408203125, "learning_rate": 2.4184600258455562e-05, "loss": 1.9898, "step": 9741 }, { "epoch": 0.31431446368400784, "grad_norm": 0.37109375, "learning_rate": 2.418335598182497e-05, "loss": 2.0126, "step": 9742 }, { "epoch": 0.3143467275378042, "grad_norm": 0.375, "learning_rate": 2.418211160411058e-05, "loss": 2.0152, "step": 9743 }, { "epoch": 0.3143789913916005, "grad_norm": 0.3828125, "learning_rate": 2.418086712532609e-05, "loss": 2.0208, "step": 9744 }, { "epoch": 0.31441125524539687, "grad_norm": 0.3515625, "learning_rate": 2.4179622545485205e-05, "loss": 1.9673, "step": 9745 }, { "epoch": 0.3144435190991932, "grad_norm": 0.38671875, "learning_rate": 2.4178377864601616e-05, "loss": 1.9792, "step": 9746 }, { "epoch": 0.31447578295298956, "grad_norm": 0.384765625, "learning_rate": 2.4177133082689033e-05, "loss": 1.995, "step": 9747 }, { "epoch": 0.3145080468067859, "grad_norm": 0.4140625, "learning_rate": 2.4175888199761154e-05, "loss": 2.0009, "step": 9748 }, { "epoch": 0.31454031066058225, "grad_norm": 0.384765625, "learning_rate": 2.4174643215831674e-05, "loss": 1.9929, "step": 9749 }, { "epoch": 0.3145725745143786, "grad_norm": 0.375, "learning_rate": 2.4173398130914305e-05, "loss": 1.9828, "step": 9750 }, { "epoch": 0.31460483836817493, "grad_norm": 0.373046875, "learning_rate": 2.4172152945022754e-05, "loss": 1.9934, "step": 9751 }, { "epoch": 0.3146371022219713, "grad_norm": 0.380859375, "learning_rate": 2.417090765817072e-05, "loss": 1.9716, "step": 9752 }, { "epoch": 0.3146693660757676, "grad_norm": 0.365234375, "learning_rate": 2.4169662270371913e-05, "loss": 1.9837, "step": 9753 }, { "epoch": 0.31470162992956396, "grad_norm": 0.359375, "learning_rate": 2.4168416781640043e-05, "loss": 2.0031, "step": 9754 }, { "epoch": 0.3147338937833603, "grad_norm": 0.35546875, "learning_rate": 2.4167171191988822e-05, "loss": 2.0073, "step": 9755 }, { "epoch": 0.31476615763715665, "grad_norm": 0.3671875, "learning_rate": 2.4165925501431955e-05, "loss": 1.9777, "step": 9756 }, { "epoch": 0.314798421490953, "grad_norm": 0.375, "learning_rate": 2.4164679709983154e-05, "loss": 1.9978, "step": 9757 }, { "epoch": 0.31483068534474934, "grad_norm": 0.390625, "learning_rate": 2.4163433817656134e-05, "loss": 2.0153, "step": 9758 }, { "epoch": 0.3148629491985457, "grad_norm": 0.365234375, "learning_rate": 2.4162187824464613e-05, "loss": 1.9992, "step": 9759 }, { "epoch": 0.314895213052342, "grad_norm": 0.42578125, "learning_rate": 2.4160941730422294e-05, "loss": 2.0135, "step": 9760 }, { "epoch": 0.3149274769061384, "grad_norm": 0.40625, "learning_rate": 2.4159695535542905e-05, "loss": 1.9767, "step": 9761 }, { "epoch": 0.31495974075993477, "grad_norm": 0.37109375, "learning_rate": 2.415844923984016e-05, "loss": 1.9952, "step": 9762 }, { "epoch": 0.3149920046137311, "grad_norm": 0.380859375, "learning_rate": 2.415720284332777e-05, "loss": 2.0036, "step": 9763 }, { "epoch": 0.31502426846752746, "grad_norm": 0.4140625, "learning_rate": 2.4155956346019464e-05, "loss": 1.9656, "step": 9764 }, { "epoch": 0.3150565323213238, "grad_norm": 0.43359375, "learning_rate": 2.415470974792896e-05, "loss": 1.996, "step": 9765 }, { "epoch": 0.31508879617512014, "grad_norm": 0.37109375, "learning_rate": 2.4153463049069974e-05, "loss": 1.9889, "step": 9766 }, { "epoch": 0.3151210600289165, "grad_norm": 0.396484375, "learning_rate": 2.415221624945624e-05, "loss": 2.0101, "step": 9767 }, { "epoch": 0.31515332388271283, "grad_norm": 0.369140625, "learning_rate": 2.4150969349101473e-05, "loss": 1.996, "step": 9768 }, { "epoch": 0.3151855877365092, "grad_norm": 0.37109375, "learning_rate": 2.4149722348019397e-05, "loss": 1.9907, "step": 9769 }, { "epoch": 0.3152178515903055, "grad_norm": 0.375, "learning_rate": 2.4148475246223746e-05, "loss": 1.9614, "step": 9770 }, { "epoch": 0.31525011544410186, "grad_norm": 0.39453125, "learning_rate": 2.414722804372824e-05, "loss": 1.9896, "step": 9771 }, { "epoch": 0.3152823792978982, "grad_norm": 0.361328125, "learning_rate": 2.414598074054661e-05, "loss": 1.9704, "step": 9772 }, { "epoch": 0.31531464315169455, "grad_norm": 0.3671875, "learning_rate": 2.4144733336692584e-05, "loss": 1.9924, "step": 9773 }, { "epoch": 0.3153469070054909, "grad_norm": 0.419921875, "learning_rate": 2.4143485832179895e-05, "loss": 1.9669, "step": 9774 }, { "epoch": 0.31537917085928724, "grad_norm": 0.380859375, "learning_rate": 2.4142238227022277e-05, "loss": 2.0007, "step": 9775 }, { "epoch": 0.3154114347130836, "grad_norm": 0.359375, "learning_rate": 2.4140990521233455e-05, "loss": 1.9821, "step": 9776 }, { "epoch": 0.3154436985668799, "grad_norm": 0.412109375, "learning_rate": 2.4139742714827168e-05, "loss": 2.0084, "step": 9777 }, { "epoch": 0.31547596242067627, "grad_norm": 0.44140625, "learning_rate": 2.413849480781715e-05, "loss": 2.005, "step": 9778 }, { "epoch": 0.3155082262744726, "grad_norm": 0.5, "learning_rate": 2.4137246800217137e-05, "loss": 2.0078, "step": 9779 }, { "epoch": 0.31554049012826896, "grad_norm": 0.38671875, "learning_rate": 2.4135998692040865e-05, "loss": 2.0015, "step": 9780 }, { "epoch": 0.31557275398206536, "grad_norm": 0.396484375, "learning_rate": 2.4134750483302076e-05, "loss": 1.9952, "step": 9781 }, { "epoch": 0.3156050178358617, "grad_norm": 0.451171875, "learning_rate": 2.4133502174014507e-05, "loss": 1.9775, "step": 9782 }, { "epoch": 0.31563728168965804, "grad_norm": 0.396484375, "learning_rate": 2.4132253764191896e-05, "loss": 2.0058, "step": 9783 }, { "epoch": 0.3156695455434544, "grad_norm": 0.4453125, "learning_rate": 2.4131005253847992e-05, "loss": 2.0531, "step": 9784 }, { "epoch": 0.31570180939725073, "grad_norm": 0.435546875, "learning_rate": 2.4129756642996527e-05, "loss": 2.0124, "step": 9785 }, { "epoch": 0.3157340732510471, "grad_norm": 0.4296875, "learning_rate": 2.4128507931651254e-05, "loss": 2.0149, "step": 9786 }, { "epoch": 0.3157663371048434, "grad_norm": 0.4453125, "learning_rate": 2.4127259119825912e-05, "loss": 2.0227, "step": 9787 }, { "epoch": 0.31579860095863976, "grad_norm": 0.44921875, "learning_rate": 2.412601020753425e-05, "loss": 2.022, "step": 9788 }, { "epoch": 0.3158308648124361, "grad_norm": 0.455078125, "learning_rate": 2.4124761194790015e-05, "loss": 2.0015, "step": 9789 }, { "epoch": 0.31586312866623245, "grad_norm": 0.443359375, "learning_rate": 2.4123512081606955e-05, "loss": 2.0303, "step": 9790 }, { "epoch": 0.3158953925200288, "grad_norm": 0.427734375, "learning_rate": 2.412226286799882e-05, "loss": 1.9736, "step": 9791 }, { "epoch": 0.31592765637382514, "grad_norm": 0.44921875, "learning_rate": 2.4121013553979362e-05, "loss": 2.0339, "step": 9792 }, { "epoch": 0.3159599202276215, "grad_norm": 0.390625, "learning_rate": 2.4119764139562326e-05, "loss": 1.9926, "step": 9793 }, { "epoch": 0.3159921840814178, "grad_norm": 0.451171875, "learning_rate": 2.4118514624761467e-05, "loss": 2.0387, "step": 9794 }, { "epoch": 0.31602444793521417, "grad_norm": 0.47265625, "learning_rate": 2.4117265009590545e-05, "loss": 2.0138, "step": 9795 }, { "epoch": 0.3160567117890105, "grad_norm": 0.396484375, "learning_rate": 2.411601529406331e-05, "loss": 2.0158, "step": 9796 }, { "epoch": 0.31608897564280686, "grad_norm": 0.45703125, "learning_rate": 2.4114765478193513e-05, "loss": 2.0172, "step": 9797 }, { "epoch": 0.3161212394966032, "grad_norm": 0.423828125, "learning_rate": 2.4113515561994925e-05, "loss": 2.0146, "step": 9798 }, { "epoch": 0.31615350335039955, "grad_norm": 0.396484375, "learning_rate": 2.4112265545481294e-05, "loss": 2.007, "step": 9799 }, { "epoch": 0.3161857672041959, "grad_norm": 0.4140625, "learning_rate": 2.4111015428666374e-05, "loss": 2.0417, "step": 9800 }, { "epoch": 0.3162180310579923, "grad_norm": 0.3828125, "learning_rate": 2.410976521156394e-05, "loss": 2.0246, "step": 9801 }, { "epoch": 0.31625029491178863, "grad_norm": 0.392578125, "learning_rate": 2.4108514894187742e-05, "loss": 2.033, "step": 9802 }, { "epoch": 0.316282558765585, "grad_norm": 0.67578125, "learning_rate": 2.4107264476551548e-05, "loss": 2.1039, "step": 9803 }, { "epoch": 0.3163148226193813, "grad_norm": 0.55078125, "learning_rate": 2.4106013958669116e-05, "loss": 2.0973, "step": 9804 }, { "epoch": 0.31634708647317766, "grad_norm": 0.51171875, "learning_rate": 2.410476334055422e-05, "loss": 2.1033, "step": 9805 }, { "epoch": 0.316379350326974, "grad_norm": 0.5390625, "learning_rate": 2.410351262222062e-05, "loss": 2.0979, "step": 9806 }, { "epoch": 0.31641161418077035, "grad_norm": 0.52734375, "learning_rate": 2.4102261803682084e-05, "loss": 2.1086, "step": 9807 }, { "epoch": 0.3164438780345667, "grad_norm": 0.65234375, "learning_rate": 2.4101010884952375e-05, "loss": 2.1384, "step": 9808 }, { "epoch": 0.31647614188836304, "grad_norm": 0.54296875, "learning_rate": 2.4099759866045276e-05, "loss": 2.1076, "step": 9809 }, { "epoch": 0.3165084057421594, "grad_norm": 0.59765625, "learning_rate": 2.4098508746974546e-05, "loss": 2.1141, "step": 9810 }, { "epoch": 0.3165406695959557, "grad_norm": 0.59765625, "learning_rate": 2.4097257527753954e-05, "loss": 2.1217, "step": 9811 }, { "epoch": 0.31657293344975207, "grad_norm": 0.494140625, "learning_rate": 2.4096006208397282e-05, "loss": 2.1072, "step": 9812 }, { "epoch": 0.3166051973035484, "grad_norm": 0.59375, "learning_rate": 2.4094754788918297e-05, "loss": 2.1148, "step": 9813 }, { "epoch": 0.31663746115734476, "grad_norm": 0.51953125, "learning_rate": 2.4093503269330775e-05, "loss": 2.1339, "step": 9814 }, { "epoch": 0.3166697250111411, "grad_norm": 0.7890625, "learning_rate": 2.4092251649648496e-05, "loss": 2.1353, "step": 9815 }, { "epoch": 0.31670198886493744, "grad_norm": 0.65625, "learning_rate": 2.4090999929885232e-05, "loss": 2.0697, "step": 9816 }, { "epoch": 0.3167342527187338, "grad_norm": 0.6484375, "learning_rate": 2.408974811005476e-05, "loss": 2.13, "step": 9817 }, { "epoch": 0.31676651657253013, "grad_norm": 0.61328125, "learning_rate": 2.4088496190170864e-05, "loss": 2.0911, "step": 9818 }, { "epoch": 0.3167987804263265, "grad_norm": 0.640625, "learning_rate": 2.4087244170247323e-05, "loss": 2.1511, "step": 9819 }, { "epoch": 0.3168310442801228, "grad_norm": 0.57421875, "learning_rate": 2.4085992050297915e-05, "loss": 2.1067, "step": 9820 }, { "epoch": 0.31686330813391916, "grad_norm": 0.60546875, "learning_rate": 2.4084739830336427e-05, "loss": 2.12, "step": 9821 }, { "epoch": 0.31689557198771556, "grad_norm": 0.52734375, "learning_rate": 2.408348751037664e-05, "loss": 2.0727, "step": 9822 }, { "epoch": 0.3169278358415119, "grad_norm": 0.482421875, "learning_rate": 2.408223509043234e-05, "loss": 2.0937, "step": 9823 }, { "epoch": 0.31696009969530825, "grad_norm": 0.52734375, "learning_rate": 2.408098257051731e-05, "loss": 2.1022, "step": 9824 }, { "epoch": 0.3169923635491046, "grad_norm": 0.5078125, "learning_rate": 2.4079729950645336e-05, "loss": 2.1121, "step": 9825 }, { "epoch": 0.31702462740290094, "grad_norm": 0.46875, "learning_rate": 2.4078477230830214e-05, "loss": 2.09, "step": 9826 }, { "epoch": 0.3170568912566973, "grad_norm": 0.5078125, "learning_rate": 2.4077224411085722e-05, "loss": 2.1178, "step": 9827 }, { "epoch": 0.3170891551104936, "grad_norm": 0.439453125, "learning_rate": 2.4075971491425658e-05, "loss": 2.1004, "step": 9828 }, { "epoch": 0.31712141896428997, "grad_norm": 0.451171875, "learning_rate": 2.407471847186381e-05, "loss": 2.1424, "step": 9829 }, { "epoch": 0.3171536828180863, "grad_norm": 0.52734375, "learning_rate": 2.4073465352413977e-05, "loss": 2.1311, "step": 9830 }, { "epoch": 0.31718594667188266, "grad_norm": 0.443359375, "learning_rate": 2.407221213308994e-05, "loss": 2.1202, "step": 9831 }, { "epoch": 0.317218210525679, "grad_norm": 0.482421875, "learning_rate": 2.40709588139055e-05, "loss": 2.0965, "step": 9832 }, { "epoch": 0.31725047437947534, "grad_norm": 0.4921875, "learning_rate": 2.406970539487446e-05, "loss": 2.0998, "step": 9833 }, { "epoch": 0.3172827382332717, "grad_norm": 0.455078125, "learning_rate": 2.4068451876010603e-05, "loss": 2.108, "step": 9834 }, { "epoch": 0.31731500208706803, "grad_norm": 0.470703125, "learning_rate": 2.4067198257327735e-05, "loss": 2.1406, "step": 9835 }, { "epoch": 0.3173472659408644, "grad_norm": 0.50390625, "learning_rate": 2.4065944538839653e-05, "loss": 2.1019, "step": 9836 }, { "epoch": 0.3173795297946607, "grad_norm": 0.458984375, "learning_rate": 2.4064690720560157e-05, "loss": 2.1158, "step": 9837 }, { "epoch": 0.31741179364845706, "grad_norm": 0.474609375, "learning_rate": 2.406343680250305e-05, "loss": 2.1073, "step": 9838 }, { "epoch": 0.3174440575022534, "grad_norm": 0.435546875, "learning_rate": 2.4062182784682133e-05, "loss": 2.1122, "step": 9839 }, { "epoch": 0.31747632135604975, "grad_norm": 0.455078125, "learning_rate": 2.4060928667111205e-05, "loss": 2.1133, "step": 9840 }, { "epoch": 0.3175085852098461, "grad_norm": 0.427734375, "learning_rate": 2.4059674449804082e-05, "loss": 2.1263, "step": 9841 }, { "epoch": 0.3175408490636425, "grad_norm": 0.45703125, "learning_rate": 2.4058420132774557e-05, "loss": 2.1124, "step": 9842 }, { "epoch": 0.31757311291743884, "grad_norm": 0.44921875, "learning_rate": 2.405716571603644e-05, "loss": 2.1471, "step": 9843 }, { "epoch": 0.3176053767712352, "grad_norm": 0.4765625, "learning_rate": 2.4055911199603546e-05, "loss": 2.1078, "step": 9844 }, { "epoch": 0.3176376406250315, "grad_norm": 0.42578125, "learning_rate": 2.4054656583489672e-05, "loss": 2.1088, "step": 9845 }, { "epoch": 0.31766990447882787, "grad_norm": 0.47265625, "learning_rate": 2.405340186770864e-05, "loss": 2.1017, "step": 9846 }, { "epoch": 0.3177021683326242, "grad_norm": 0.41015625, "learning_rate": 2.4052147052274254e-05, "loss": 2.0872, "step": 9847 }, { "epoch": 0.31773443218642056, "grad_norm": 0.4375, "learning_rate": 2.4050892137200325e-05, "loss": 2.1077, "step": 9848 }, { "epoch": 0.3177666960402169, "grad_norm": 0.451171875, "learning_rate": 2.404963712250067e-05, "loss": 2.1072, "step": 9849 }, { "epoch": 0.31779895989401324, "grad_norm": 0.439453125, "learning_rate": 2.4048382008189097e-05, "loss": 2.0839, "step": 9850 }, { "epoch": 0.3178312237478096, "grad_norm": 0.400390625, "learning_rate": 2.4047126794279434e-05, "loss": 2.0987, "step": 9851 }, { "epoch": 0.31786348760160593, "grad_norm": 0.474609375, "learning_rate": 2.4045871480785483e-05, "loss": 2.1179, "step": 9852 }, { "epoch": 0.3178957514554023, "grad_norm": 0.439453125, "learning_rate": 2.4044616067721067e-05, "loss": 2.1311, "step": 9853 }, { "epoch": 0.3179280153091986, "grad_norm": 0.455078125, "learning_rate": 2.4043360555100014e-05, "loss": 2.1104, "step": 9854 }, { "epoch": 0.31796027916299496, "grad_norm": 0.390625, "learning_rate": 2.404210494293613e-05, "loss": 2.0739, "step": 9855 }, { "epoch": 0.3179925430167913, "grad_norm": 0.40625, "learning_rate": 2.4040849231243247e-05, "loss": 2.0099, "step": 9856 }, { "epoch": 0.31802480687058765, "grad_norm": 0.396484375, "learning_rate": 2.4039593420035175e-05, "loss": 2.0145, "step": 9857 }, { "epoch": 0.318057070724384, "grad_norm": 0.3984375, "learning_rate": 2.4038337509325744e-05, "loss": 1.994, "step": 9858 }, { "epoch": 0.31808933457818034, "grad_norm": 0.369140625, "learning_rate": 2.403708149912878e-05, "loss": 1.9973, "step": 9859 }, { "epoch": 0.3181215984319767, "grad_norm": 0.3828125, "learning_rate": 2.4035825389458105e-05, "loss": 2.0063, "step": 9860 }, { "epoch": 0.318153862285773, "grad_norm": 0.369140625, "learning_rate": 2.403456918032755e-05, "loss": 1.9855, "step": 9861 }, { "epoch": 0.3181861261395694, "grad_norm": 0.37109375, "learning_rate": 2.4033312871750933e-05, "loss": 1.9841, "step": 9862 }, { "epoch": 0.31821838999336577, "grad_norm": 0.369140625, "learning_rate": 2.4032056463742095e-05, "loss": 1.9598, "step": 9863 }, { "epoch": 0.3182506538471621, "grad_norm": 0.361328125, "learning_rate": 2.4030799956314855e-05, "loss": 2.0052, "step": 9864 }, { "epoch": 0.31828291770095846, "grad_norm": 0.353515625, "learning_rate": 2.4029543349483045e-05, "loss": 1.9874, "step": 9865 }, { "epoch": 0.3183151815547548, "grad_norm": 0.361328125, "learning_rate": 2.4028286643260507e-05, "loss": 2.0068, "step": 9866 }, { "epoch": 0.31834744540855114, "grad_norm": 0.384765625, "learning_rate": 2.402702983766106e-05, "loss": 1.9708, "step": 9867 }, { "epoch": 0.3183797092623475, "grad_norm": 0.349609375, "learning_rate": 2.4025772932698546e-05, "loss": 1.9822, "step": 9868 }, { "epoch": 0.31841197311614383, "grad_norm": 0.357421875, "learning_rate": 2.40245159283868e-05, "loss": 1.981, "step": 9869 }, { "epoch": 0.3184442369699402, "grad_norm": 0.357421875, "learning_rate": 2.4023258824739653e-05, "loss": 1.9862, "step": 9870 }, { "epoch": 0.3184765008237365, "grad_norm": 0.384765625, "learning_rate": 2.402200162177095e-05, "loss": 1.9728, "step": 9871 }, { "epoch": 0.31850876467753286, "grad_norm": 0.373046875, "learning_rate": 2.4020744319494528e-05, "loss": 2.0044, "step": 9872 }, { "epoch": 0.3185410285313292, "grad_norm": 0.37890625, "learning_rate": 2.401948691792422e-05, "loss": 2.0012, "step": 9873 }, { "epoch": 0.31857329238512555, "grad_norm": 0.38671875, "learning_rate": 2.4018229417073872e-05, "loss": 2.0217, "step": 9874 }, { "epoch": 0.3186055562389219, "grad_norm": 0.3515625, "learning_rate": 2.4016971816957326e-05, "loss": 1.9868, "step": 9875 }, { "epoch": 0.31863782009271824, "grad_norm": 0.365234375, "learning_rate": 2.4015714117588417e-05, "loss": 1.9733, "step": 9876 }, { "epoch": 0.3186700839465146, "grad_norm": 0.349609375, "learning_rate": 2.4014456318981002e-05, "loss": 1.9798, "step": 9877 }, { "epoch": 0.3187023478003109, "grad_norm": 0.380859375, "learning_rate": 2.4013198421148912e-05, "loss": 1.9669, "step": 9878 }, { "epoch": 0.31873461165410727, "grad_norm": 0.3984375, "learning_rate": 2.4011940424106004e-05, "loss": 1.9733, "step": 9879 }, { "epoch": 0.3187668755079036, "grad_norm": 0.458984375, "learning_rate": 2.4010682327866123e-05, "loss": 1.9971, "step": 9880 }, { "epoch": 0.31879913936169996, "grad_norm": 0.43359375, "learning_rate": 2.400942413244311e-05, "loss": 1.9955, "step": 9881 }, { "epoch": 0.31883140321549636, "grad_norm": 0.396484375, "learning_rate": 2.4008165837850818e-05, "loss": 1.9772, "step": 9882 }, { "epoch": 0.3188636670692927, "grad_norm": 0.53125, "learning_rate": 2.400690744410311e-05, "loss": 2.0891, "step": 9883 }, { "epoch": 0.31889593092308904, "grad_norm": 0.44921875, "learning_rate": 2.4005648951213816e-05, "loss": 2.111, "step": 9884 }, { "epoch": 0.3189281947768854, "grad_norm": 0.5234375, "learning_rate": 2.4004390359196803e-05, "loss": 2.0642, "step": 9885 }, { "epoch": 0.31896045863068173, "grad_norm": 0.458984375, "learning_rate": 2.400313166806592e-05, "loss": 2.1298, "step": 9886 }, { "epoch": 0.3189927224844781, "grad_norm": 0.44140625, "learning_rate": 2.400187287783502e-05, "loss": 2.1092, "step": 9887 }, { "epoch": 0.3190249863382744, "grad_norm": 0.455078125, "learning_rate": 2.4000613988517964e-05, "loss": 2.0996, "step": 9888 }, { "epoch": 0.31905725019207076, "grad_norm": 0.447265625, "learning_rate": 2.3999355000128606e-05, "loss": 2.0801, "step": 9889 }, { "epoch": 0.3190895140458671, "grad_norm": 0.4609375, "learning_rate": 2.39980959126808e-05, "loss": 2.1028, "step": 9890 }, { "epoch": 0.31912177789966345, "grad_norm": 0.431640625, "learning_rate": 2.3996836726188416e-05, "loss": 2.1124, "step": 9891 }, { "epoch": 0.3191540417534598, "grad_norm": 0.408203125, "learning_rate": 2.3995577440665308e-05, "loss": 2.1044, "step": 9892 }, { "epoch": 0.31918630560725614, "grad_norm": 0.455078125, "learning_rate": 2.3994318056125326e-05, "loss": 2.0812, "step": 9893 }, { "epoch": 0.3192185694610525, "grad_norm": 0.484375, "learning_rate": 2.3993058572582354e-05, "loss": 2.1167, "step": 9894 }, { "epoch": 0.3192508333148488, "grad_norm": 0.50390625, "learning_rate": 2.3991798990050243e-05, "loss": 2.1109, "step": 9895 }, { "epoch": 0.31928309716864517, "grad_norm": 0.5546875, "learning_rate": 2.3990539308542857e-05, "loss": 2.087, "step": 9896 }, { "epoch": 0.3193153610224415, "grad_norm": 0.6484375, "learning_rate": 2.3989279528074065e-05, "loss": 2.0945, "step": 9897 }, { "epoch": 0.31934762487623786, "grad_norm": 0.70703125, "learning_rate": 2.3988019648657733e-05, "loss": 2.1311, "step": 9898 }, { "epoch": 0.3193798887300342, "grad_norm": 0.6328125, "learning_rate": 2.398675967030773e-05, "loss": 2.1193, "step": 9899 }, { "epoch": 0.31941215258383054, "grad_norm": 0.66796875, "learning_rate": 2.3985499593037924e-05, "loss": 2.0931, "step": 9900 }, { "epoch": 0.3194444164376269, "grad_norm": 0.51171875, "learning_rate": 2.3984239416862183e-05, "loss": 2.093, "step": 9901 }, { "epoch": 0.31947668029142323, "grad_norm": 0.62890625, "learning_rate": 2.398297914179438e-05, "loss": 2.1402, "step": 9902 }, { "epoch": 0.31950894414521963, "grad_norm": 0.5546875, "learning_rate": 2.3981718767848384e-05, "loss": 2.0842, "step": 9903 }, { "epoch": 0.319541207999016, "grad_norm": 0.921875, "learning_rate": 2.3980458295038076e-05, "loss": 2.0586, "step": 9904 }, { "epoch": 0.3195734718528123, "grad_norm": 0.85546875, "learning_rate": 2.3979197723377322e-05, "loss": 2.1056, "step": 9905 }, { "epoch": 0.31960573570660866, "grad_norm": 0.6171875, "learning_rate": 2.3977937052880003e-05, "loss": 2.0945, "step": 9906 }, { "epoch": 0.319637999560405, "grad_norm": 0.6953125, "learning_rate": 2.3976676283559995e-05, "loss": 2.0747, "step": 9907 }, { "epoch": 0.31967026341420135, "grad_norm": 0.65234375, "learning_rate": 2.3975415415431173e-05, "loss": 2.0889, "step": 9908 }, { "epoch": 0.3197025272679977, "grad_norm": 0.578125, "learning_rate": 2.3974154448507418e-05, "loss": 2.1112, "step": 9909 }, { "epoch": 0.31973479112179404, "grad_norm": 0.65234375, "learning_rate": 2.3972893382802608e-05, "loss": 2.0938, "step": 9910 }, { "epoch": 0.3197670549755904, "grad_norm": 0.53515625, "learning_rate": 2.3971632218330623e-05, "loss": 2.1248, "step": 9911 }, { "epoch": 0.3197993188293867, "grad_norm": 0.76953125, "learning_rate": 2.3970370955105347e-05, "loss": 2.1452, "step": 9912 }, { "epoch": 0.31983158268318307, "grad_norm": 0.51953125, "learning_rate": 2.3969109593140664e-05, "loss": 2.1374, "step": 9913 }, { "epoch": 0.3198638465369794, "grad_norm": 0.484375, "learning_rate": 2.396784813245046e-05, "loss": 2.052, "step": 9914 }, { "epoch": 0.31989611039077576, "grad_norm": 0.50390625, "learning_rate": 2.396658657304861e-05, "loss": 2.1357, "step": 9915 }, { "epoch": 0.3199283742445721, "grad_norm": 0.494140625, "learning_rate": 2.3965324914949016e-05, "loss": 2.0963, "step": 9916 }, { "epoch": 0.31996063809836844, "grad_norm": 0.486328125, "learning_rate": 2.396406315816555e-05, "loss": 2.1066, "step": 9917 }, { "epoch": 0.3199929019521648, "grad_norm": 0.490234375, "learning_rate": 2.3962801302712116e-05, "loss": 2.1215, "step": 9918 }, { "epoch": 0.32002516580596113, "grad_norm": 0.478515625, "learning_rate": 2.3961539348602587e-05, "loss": 2.0939, "step": 9919 }, { "epoch": 0.3200574296597575, "grad_norm": 0.439453125, "learning_rate": 2.3960277295850867e-05, "loss": 2.1035, "step": 9920 }, { "epoch": 0.3200896935135538, "grad_norm": 0.439453125, "learning_rate": 2.3959015144470837e-05, "loss": 2.1218, "step": 9921 }, { "epoch": 0.32012195736735016, "grad_norm": 0.41796875, "learning_rate": 2.39577528944764e-05, "loss": 2.0647, "step": 9922 }, { "epoch": 0.32015422122114656, "grad_norm": 0.4375, "learning_rate": 2.395649054588144e-05, "loss": 2.0925, "step": 9923 }, { "epoch": 0.3201864850749429, "grad_norm": 0.39453125, "learning_rate": 2.395522809869986e-05, "loss": 2.0908, "step": 9924 }, { "epoch": 0.32021874892873925, "grad_norm": 0.388671875, "learning_rate": 2.3953965552945557e-05, "loss": 2.0585, "step": 9925 }, { "epoch": 0.3202510127825356, "grad_norm": 0.3984375, "learning_rate": 2.3952702908632426e-05, "loss": 2.0814, "step": 9926 }, { "epoch": 0.32028327663633194, "grad_norm": 0.3984375, "learning_rate": 2.3951440165774358e-05, "loss": 2.0953, "step": 9927 }, { "epoch": 0.3203155404901283, "grad_norm": 0.404296875, "learning_rate": 2.3950177324385264e-05, "loss": 2.082, "step": 9928 }, { "epoch": 0.3203478043439246, "grad_norm": 0.416015625, "learning_rate": 2.3948914384479034e-05, "loss": 2.1245, "step": 9929 }, { "epoch": 0.32038006819772097, "grad_norm": 0.4140625, "learning_rate": 2.394765134606958e-05, "loss": 2.0982, "step": 9930 }, { "epoch": 0.3204123320515173, "grad_norm": 0.390625, "learning_rate": 2.394638820917079e-05, "loss": 2.1228, "step": 9931 }, { "epoch": 0.32044459590531366, "grad_norm": 0.421875, "learning_rate": 2.3945124973796586e-05, "loss": 2.1117, "step": 9932 }, { "epoch": 0.32047685975911, "grad_norm": 0.373046875, "learning_rate": 2.394386163996086e-05, "loss": 2.0837, "step": 9933 }, { "epoch": 0.32050912361290634, "grad_norm": 0.41015625, "learning_rate": 2.394259820767752e-05, "loss": 2.0937, "step": 9934 }, { "epoch": 0.3205413874667027, "grad_norm": 0.37109375, "learning_rate": 2.394133467696048e-05, "loss": 2.0795, "step": 9935 }, { "epoch": 0.32057365132049903, "grad_norm": 0.38671875, "learning_rate": 2.394007104782363e-05, "loss": 2.0897, "step": 9936 }, { "epoch": 0.3206059151742954, "grad_norm": 0.38671875, "learning_rate": 2.3938807320280902e-05, "loss": 2.1114, "step": 9937 }, { "epoch": 0.3206381790280917, "grad_norm": 0.40234375, "learning_rate": 2.393754349434619e-05, "loss": 2.1166, "step": 9938 }, { "epoch": 0.32067044288188806, "grad_norm": 0.4453125, "learning_rate": 2.393627957003342e-05, "loss": 2.1077, "step": 9939 }, { "epoch": 0.3207027067356844, "grad_norm": 0.486328125, "learning_rate": 2.3935015547356483e-05, "loss": 2.1038, "step": 9940 }, { "epoch": 0.32073497058948075, "grad_norm": 0.45703125, "learning_rate": 2.3933751426329315e-05, "loss": 2.072, "step": 9941 }, { "epoch": 0.3207672344432771, "grad_norm": 0.455078125, "learning_rate": 2.3932487206965815e-05, "loss": 2.1131, "step": 9942 }, { "epoch": 0.3207994982970735, "grad_norm": 0.54296875, "learning_rate": 2.3931222889279907e-05, "loss": 2.107, "step": 9943 }, { "epoch": 0.32083176215086984, "grad_norm": 0.59375, "learning_rate": 2.3929958473285502e-05, "loss": 2.0954, "step": 9944 }, { "epoch": 0.3208640260046662, "grad_norm": 0.53125, "learning_rate": 2.392869395899652e-05, "loss": 2.0985, "step": 9945 }, { "epoch": 0.3208962898584625, "grad_norm": 0.54296875, "learning_rate": 2.392742934642688e-05, "loss": 2.106, "step": 9946 }, { "epoch": 0.32092855371225887, "grad_norm": 0.5546875, "learning_rate": 2.3926164635590504e-05, "loss": 2.0991, "step": 9947 }, { "epoch": 0.3209608175660552, "grad_norm": 0.52734375, "learning_rate": 2.3924899826501315e-05, "loss": 2.1415, "step": 9948 }, { "epoch": 0.32099308141985156, "grad_norm": 0.5234375, "learning_rate": 2.3923634919173228e-05, "loss": 2.1584, "step": 9949 }, { "epoch": 0.3210253452736479, "grad_norm": 0.49609375, "learning_rate": 2.3922369913620167e-05, "loss": 2.0962, "step": 9950 }, { "epoch": 0.32105760912744424, "grad_norm": 0.51953125, "learning_rate": 2.3921104809856066e-05, "loss": 2.0965, "step": 9951 }, { "epoch": 0.3210898729812406, "grad_norm": 0.43359375, "learning_rate": 2.3919839607894832e-05, "loss": 2.1049, "step": 9952 }, { "epoch": 0.32112213683503693, "grad_norm": 0.51953125, "learning_rate": 2.3918574307750415e-05, "loss": 2.076, "step": 9953 }, { "epoch": 0.3211544006888333, "grad_norm": 0.41796875, "learning_rate": 2.391730890943672e-05, "loss": 2.1078, "step": 9954 }, { "epoch": 0.3211866645426296, "grad_norm": 0.45703125, "learning_rate": 2.391604341296769e-05, "loss": 2.0815, "step": 9955 }, { "epoch": 0.32121892839642596, "grad_norm": 0.41015625, "learning_rate": 2.391477781835725e-05, "loss": 2.0966, "step": 9956 }, { "epoch": 0.3212511922502223, "grad_norm": 0.427734375, "learning_rate": 2.3913512125619333e-05, "loss": 2.1125, "step": 9957 }, { "epoch": 0.32128345610401865, "grad_norm": 0.3984375, "learning_rate": 2.391224633476787e-05, "loss": 2.1117, "step": 9958 }, { "epoch": 0.321315719957815, "grad_norm": 0.42578125, "learning_rate": 2.3910980445816785e-05, "loss": 2.134, "step": 9959 }, { "epoch": 0.32134798381161134, "grad_norm": 0.412109375, "learning_rate": 2.390971445878003e-05, "loss": 2.1156, "step": 9960 }, { "epoch": 0.3213802476654077, "grad_norm": 0.4140625, "learning_rate": 2.3908448373671523e-05, "loss": 2.1144, "step": 9961 }, { "epoch": 0.321412511519204, "grad_norm": 0.435546875, "learning_rate": 2.3907182190505212e-05, "loss": 2.1504, "step": 9962 }, { "epoch": 0.3214447753730004, "grad_norm": 0.423828125, "learning_rate": 2.3905915909295028e-05, "loss": 2.2128, "step": 9963 }, { "epoch": 0.32147703922679677, "grad_norm": 0.443359375, "learning_rate": 2.3904649530054908e-05, "loss": 2.2147, "step": 9964 }, { "epoch": 0.3215093030805931, "grad_norm": 0.447265625, "learning_rate": 2.3903383052798795e-05, "loss": 2.2197, "step": 9965 }, { "epoch": 0.32154156693438946, "grad_norm": 0.55078125, "learning_rate": 2.390211647754063e-05, "loss": 2.2448, "step": 9966 }, { "epoch": 0.3215738307881858, "grad_norm": 0.53125, "learning_rate": 2.3900849804294352e-05, "loss": 2.2202, "step": 9967 }, { "epoch": 0.32160609464198214, "grad_norm": 0.5390625, "learning_rate": 2.389958303307391e-05, "loss": 2.2061, "step": 9968 }, { "epoch": 0.3216383584957785, "grad_norm": 0.50390625, "learning_rate": 2.3898316163893237e-05, "loss": 2.2073, "step": 9969 }, { "epoch": 0.32167062234957483, "grad_norm": 0.46875, "learning_rate": 2.3897049196766285e-05, "loss": 2.1383, "step": 9970 }, { "epoch": 0.3217028862033712, "grad_norm": 0.51171875, "learning_rate": 2.3895782131706996e-05, "loss": 2.1215, "step": 9971 }, { "epoch": 0.3217351500571675, "grad_norm": 0.478515625, "learning_rate": 2.389451496872932e-05, "loss": 2.1272, "step": 9972 }, { "epoch": 0.32176741391096386, "grad_norm": 0.47265625, "learning_rate": 2.3893247707847208e-05, "loss": 2.1082, "step": 9973 }, { "epoch": 0.3217996777647602, "grad_norm": 0.482421875, "learning_rate": 2.38919803490746e-05, "loss": 2.1149, "step": 9974 }, { "epoch": 0.32183194161855655, "grad_norm": 0.490234375, "learning_rate": 2.3890712892425454e-05, "loss": 2.0632, "step": 9975 }, { "epoch": 0.3218642054723529, "grad_norm": 0.498046875, "learning_rate": 2.388944533791372e-05, "loss": 2.1044, "step": 9976 }, { "epoch": 0.32189646932614924, "grad_norm": 0.462890625, "learning_rate": 2.3888177685553346e-05, "loss": 2.0975, "step": 9977 }, { "epoch": 0.3219287331799456, "grad_norm": 0.48828125, "learning_rate": 2.3886909935358288e-05, "loss": 2.0981, "step": 9978 }, { "epoch": 0.3219609970337419, "grad_norm": 0.4375, "learning_rate": 2.3885642087342502e-05, "loss": 2.1236, "step": 9979 }, { "epoch": 0.32199326088753827, "grad_norm": 0.4609375, "learning_rate": 2.3884374141519947e-05, "loss": 2.0562, "step": 9980 }, { "epoch": 0.3220255247413346, "grad_norm": 0.41796875, "learning_rate": 2.388310609790457e-05, "loss": 2.0745, "step": 9981 }, { "epoch": 0.32205778859513096, "grad_norm": 0.470703125, "learning_rate": 2.3881837956510336e-05, "loss": 2.108, "step": 9982 }, { "epoch": 0.32209005244892736, "grad_norm": 0.462890625, "learning_rate": 2.38805697173512e-05, "loss": 2.113, "step": 9983 }, { "epoch": 0.3221223163027237, "grad_norm": 0.439453125, "learning_rate": 2.387930138044112e-05, "loss": 2.0699, "step": 9984 }, { "epoch": 0.32215458015652004, "grad_norm": 0.408203125, "learning_rate": 2.3878032945794067e-05, "loss": 2.1128, "step": 9985 }, { "epoch": 0.3221868440103164, "grad_norm": 0.43359375, "learning_rate": 2.387676441342399e-05, "loss": 2.1074, "step": 9986 }, { "epoch": 0.32221910786411273, "grad_norm": 0.423828125, "learning_rate": 2.3875495783344864e-05, "loss": 2.1204, "step": 9987 }, { "epoch": 0.3222513717179091, "grad_norm": 0.39453125, "learning_rate": 2.3874227055570647e-05, "loss": 2.1176, "step": 9988 }, { "epoch": 0.3222836355717054, "grad_norm": 0.42578125, "learning_rate": 2.3872958230115306e-05, "loss": 2.0821, "step": 9989 }, { "epoch": 0.32231589942550176, "grad_norm": 0.404296875, "learning_rate": 2.38716893069928e-05, "loss": 2.0993, "step": 9990 }, { "epoch": 0.3223481632792981, "grad_norm": 0.4375, "learning_rate": 2.3870420286217108e-05, "loss": 2.0724, "step": 9991 }, { "epoch": 0.32238042713309445, "grad_norm": 0.388671875, "learning_rate": 2.386915116780219e-05, "loss": 2.0977, "step": 9992 }, { "epoch": 0.3224126909868908, "grad_norm": 0.41015625, "learning_rate": 2.3867881951762022e-05, "loss": 2.1094, "step": 9993 }, { "epoch": 0.32244495484068714, "grad_norm": 0.3984375, "learning_rate": 2.3866612638110572e-05, "loss": 2.091, "step": 9994 }, { "epoch": 0.3224772186944835, "grad_norm": 0.384765625, "learning_rate": 2.38653432268618e-05, "loss": 2.1205, "step": 9995 }, { "epoch": 0.3225094825482798, "grad_norm": 0.38671875, "learning_rate": 2.3864073718029704e-05, "loss": 2.0758, "step": 9996 }, { "epoch": 0.32254174640207617, "grad_norm": 0.369140625, "learning_rate": 2.386280411162824e-05, "loss": 2.0715, "step": 9997 }, { "epoch": 0.3225740102558725, "grad_norm": 0.416015625, "learning_rate": 2.3861534407671382e-05, "loss": 2.0861, "step": 9998 }, { "epoch": 0.32260627410966886, "grad_norm": 0.3671875, "learning_rate": 2.3860264606173112e-05, "loss": 2.1109, "step": 9999 }, { "epoch": 0.3226385379634652, "grad_norm": 0.392578125, "learning_rate": 2.3858994707147408e-05, "loss": 2.0978, "step": 10000 }, { "epoch": 0.32267080181726154, "grad_norm": 0.39453125, "learning_rate": 2.385772471060824e-05, "loss": 2.0829, "step": 10001 }, { "epoch": 0.3227030656710579, "grad_norm": 0.3828125, "learning_rate": 2.38564546165696e-05, "loss": 2.1093, "step": 10002 }, { "epoch": 0.32273532952485423, "grad_norm": 0.421875, "learning_rate": 2.385518442504546e-05, "loss": 2.1019, "step": 10003 }, { "epoch": 0.32276759337865063, "grad_norm": 0.369140625, "learning_rate": 2.38539141360498e-05, "loss": 2.1103, "step": 10004 }, { "epoch": 0.322799857232447, "grad_norm": 0.392578125, "learning_rate": 2.3852643749596603e-05, "loss": 2.1046, "step": 10005 }, { "epoch": 0.3228321210862433, "grad_norm": 0.404296875, "learning_rate": 2.3851373265699856e-05, "loss": 2.0917, "step": 10006 }, { "epoch": 0.32286438494003966, "grad_norm": 0.404296875, "learning_rate": 2.3850102684373544e-05, "loss": 2.0859, "step": 10007 }, { "epoch": 0.322896648793836, "grad_norm": 0.3828125, "learning_rate": 2.384883200563165e-05, "loss": 2.0899, "step": 10008 }, { "epoch": 0.32292891264763235, "grad_norm": 0.39453125, "learning_rate": 2.3847561229488163e-05, "loss": 2.0896, "step": 10009 }, { "epoch": 0.3229611765014287, "grad_norm": 0.396484375, "learning_rate": 2.3846290355957066e-05, "loss": 2.0911, "step": 10010 }, { "epoch": 0.32299344035522504, "grad_norm": 0.38671875, "learning_rate": 2.3845019385052357e-05, "loss": 2.1253, "step": 10011 }, { "epoch": 0.3230257042090214, "grad_norm": 0.423828125, "learning_rate": 2.3843748316788006e-05, "loss": 2.0803, "step": 10012 }, { "epoch": 0.3230579680628177, "grad_norm": 0.419921875, "learning_rate": 2.3842477151178035e-05, "loss": 2.1002, "step": 10013 }, { "epoch": 0.32309023191661407, "grad_norm": 0.400390625, "learning_rate": 2.3841205888236413e-05, "loss": 2.106, "step": 10014 }, { "epoch": 0.3231224957704104, "grad_norm": 0.40234375, "learning_rate": 2.3839934527977135e-05, "loss": 2.1009, "step": 10015 }, { "epoch": 0.32315475962420676, "grad_norm": 0.462890625, "learning_rate": 2.3838663070414203e-05, "loss": 2.125, "step": 10016 }, { "epoch": 0.3231870234780031, "grad_norm": 0.46484375, "learning_rate": 2.3837391515561608e-05, "loss": 2.0973, "step": 10017 }, { "epoch": 0.32321928733179944, "grad_norm": 0.41015625, "learning_rate": 2.3836119863433344e-05, "loss": 2.097, "step": 10018 }, { "epoch": 0.3232515511855958, "grad_norm": 0.408203125, "learning_rate": 2.3834848114043414e-05, "loss": 2.0753, "step": 10019 }, { "epoch": 0.32328381503939213, "grad_norm": 0.478515625, "learning_rate": 2.3833576267405814e-05, "loss": 2.1032, "step": 10020 }, { "epoch": 0.3233160788931885, "grad_norm": 0.4453125, "learning_rate": 2.3832304323534547e-05, "loss": 2.1126, "step": 10021 }, { "epoch": 0.3233483427469848, "grad_norm": 0.47265625, "learning_rate": 2.3831032282443604e-05, "loss": 2.0913, "step": 10022 }, { "epoch": 0.32338060660078116, "grad_norm": 0.4921875, "learning_rate": 2.3829760144147e-05, "loss": 2.1015, "step": 10023 }, { "epoch": 0.32341287045457756, "grad_norm": 0.486328125, "learning_rate": 2.3828487908658726e-05, "loss": 2.0554, "step": 10024 }, { "epoch": 0.3234451343083739, "grad_norm": 0.640625, "learning_rate": 2.3827215575992794e-05, "loss": 2.127, "step": 10025 }, { "epoch": 0.32347739816217025, "grad_norm": 0.4609375, "learning_rate": 2.38259431461632e-05, "loss": 2.0688, "step": 10026 }, { "epoch": 0.3235096620159666, "grad_norm": 0.62890625, "learning_rate": 2.382467061918396e-05, "loss": 2.0515, "step": 10027 }, { "epoch": 0.32354192586976294, "grad_norm": 0.498046875, "learning_rate": 2.3823397995069076e-05, "loss": 2.1124, "step": 10028 }, { "epoch": 0.3235741897235593, "grad_norm": 0.52734375, "learning_rate": 2.3822125273832557e-05, "loss": 2.0788, "step": 10029 }, { "epoch": 0.3236064535773556, "grad_norm": 0.482421875, "learning_rate": 2.3820852455488413e-05, "loss": 2.1009, "step": 10030 }, { "epoch": 0.32363871743115197, "grad_norm": 0.4921875, "learning_rate": 2.381957954005065e-05, "loss": 2.0889, "step": 10031 }, { "epoch": 0.3236709812849483, "grad_norm": 0.5703125, "learning_rate": 2.3818306527533283e-05, "loss": 2.086, "step": 10032 }, { "epoch": 0.32370324513874466, "grad_norm": 0.55859375, "learning_rate": 2.3817033417950328e-05, "loss": 2.1111, "step": 10033 }, { "epoch": 0.323735508992541, "grad_norm": 0.48828125, "learning_rate": 2.3815760211315795e-05, "loss": 2.1235, "step": 10034 }, { "epoch": 0.32376777284633734, "grad_norm": 0.5390625, "learning_rate": 2.381448690764369e-05, "loss": 2.1321, "step": 10035 }, { "epoch": 0.3238000367001337, "grad_norm": 0.46875, "learning_rate": 2.3813213506948043e-05, "loss": 2.0931, "step": 10036 }, { "epoch": 0.32383230055393003, "grad_norm": 0.5234375, "learning_rate": 2.3811940009242862e-05, "loss": 2.0626, "step": 10037 }, { "epoch": 0.3238645644077264, "grad_norm": 0.41796875, "learning_rate": 2.3810666414542166e-05, "loss": 2.0843, "step": 10038 }, { "epoch": 0.3238968282615227, "grad_norm": 0.498046875, "learning_rate": 2.3809392722859978e-05, "loss": 2.0976, "step": 10039 }, { "epoch": 0.32392909211531906, "grad_norm": 0.447265625, "learning_rate": 2.380811893421031e-05, "loss": 2.0971, "step": 10040 }, { "epoch": 0.3239613559691154, "grad_norm": 0.484375, "learning_rate": 2.3806845048607193e-05, "loss": 2.1002, "step": 10041 }, { "epoch": 0.32399361982291175, "grad_norm": 0.478515625, "learning_rate": 2.380557106606464e-05, "loss": 2.1025, "step": 10042 }, { "epoch": 0.3240258836767081, "grad_norm": 0.4375, "learning_rate": 2.380429698659668e-05, "loss": 2.1005, "step": 10043 }, { "epoch": 0.3240581475305045, "grad_norm": 0.453125, "learning_rate": 2.3803022810217337e-05, "loss": 2.1182, "step": 10044 }, { "epoch": 0.32409041138430084, "grad_norm": 0.453125, "learning_rate": 2.3801748536940627e-05, "loss": 2.1075, "step": 10045 }, { "epoch": 0.3241226752380972, "grad_norm": 0.431640625, "learning_rate": 2.380047416678059e-05, "loss": 2.1037, "step": 10046 }, { "epoch": 0.3241549390918935, "grad_norm": 0.439453125, "learning_rate": 2.3799199699751246e-05, "loss": 2.1003, "step": 10047 }, { "epoch": 0.32418720294568987, "grad_norm": 0.4140625, "learning_rate": 2.3797925135866626e-05, "loss": 2.0878, "step": 10048 }, { "epoch": 0.3242194667994862, "grad_norm": 0.412109375, "learning_rate": 2.379665047514075e-05, "loss": 2.0934, "step": 10049 }, { "epoch": 0.32425173065328255, "grad_norm": 0.427734375, "learning_rate": 2.379537571758766e-05, "loss": 2.111, "step": 10050 }, { "epoch": 0.3242839945070789, "grad_norm": 0.40234375, "learning_rate": 2.3794100863221386e-05, "loss": 1.9684, "step": 10051 }, { "epoch": 0.32431625836087524, "grad_norm": 0.3828125, "learning_rate": 2.379282591205596e-05, "loss": 1.9759, "step": 10052 }, { "epoch": 0.3243485222146716, "grad_norm": 0.390625, "learning_rate": 2.379155086410541e-05, "loss": 1.9866, "step": 10053 }, { "epoch": 0.32438078606846793, "grad_norm": 0.37890625, "learning_rate": 2.3790275719383778e-05, "loss": 2.0008, "step": 10054 }, { "epoch": 0.3244130499222643, "grad_norm": 0.384765625, "learning_rate": 2.3789000477905095e-05, "loss": 1.9758, "step": 10055 }, { "epoch": 0.3244453137760606, "grad_norm": 0.380859375, "learning_rate": 2.3787725139683398e-05, "loss": 2.0029, "step": 10056 }, { "epoch": 0.32447757762985696, "grad_norm": 0.37890625, "learning_rate": 2.378644970473273e-05, "loss": 2.0014, "step": 10057 }, { "epoch": 0.3245098414836533, "grad_norm": 0.38671875, "learning_rate": 2.3785174173067126e-05, "loss": 1.9911, "step": 10058 }, { "epoch": 0.32454210533744965, "grad_norm": 0.38671875, "learning_rate": 2.378389854470063e-05, "loss": 1.9861, "step": 10059 }, { "epoch": 0.324574369191246, "grad_norm": 0.365234375, "learning_rate": 2.378262281964728e-05, "loss": 2.0081, "step": 10060 }, { "epoch": 0.32460663304504234, "grad_norm": 0.380859375, "learning_rate": 2.3781346997921118e-05, "loss": 2.0018, "step": 10061 }, { "epoch": 0.3246388968988387, "grad_norm": 0.37109375, "learning_rate": 2.3780071079536184e-05, "loss": 1.9892, "step": 10062 }, { "epoch": 0.324671160752635, "grad_norm": 0.369140625, "learning_rate": 2.377879506450653e-05, "loss": 2.011, "step": 10063 }, { "epoch": 0.3247034246064314, "grad_norm": 0.375, "learning_rate": 2.3777518952846197e-05, "loss": 2.0092, "step": 10064 }, { "epoch": 0.32473568846022777, "grad_norm": 0.359375, "learning_rate": 2.377624274456923e-05, "loss": 1.978, "step": 10065 }, { "epoch": 0.3247679523140241, "grad_norm": 0.390625, "learning_rate": 2.3774966439689682e-05, "loss": 1.9904, "step": 10066 }, { "epoch": 0.32480021616782045, "grad_norm": 0.435546875, "learning_rate": 2.3773690038221594e-05, "loss": 1.9758, "step": 10067 }, { "epoch": 0.3248324800216168, "grad_norm": 0.38671875, "learning_rate": 2.3772413540179028e-05, "loss": 1.9691, "step": 10068 }, { "epoch": 0.32486474387541314, "grad_norm": 0.40234375, "learning_rate": 2.377113694557602e-05, "loss": 2.002, "step": 10069 }, { "epoch": 0.3248970077292095, "grad_norm": 0.384765625, "learning_rate": 2.3769860254426635e-05, "loss": 1.977, "step": 10070 }, { "epoch": 0.32492927158300583, "grad_norm": 0.408203125, "learning_rate": 2.3768583466744913e-05, "loss": 2.01, "step": 10071 }, { "epoch": 0.3249615354368022, "grad_norm": 0.376953125, "learning_rate": 2.3767306582544922e-05, "loss": 2.0023, "step": 10072 }, { "epoch": 0.3249937992905985, "grad_norm": 0.451171875, "learning_rate": 2.3766029601840706e-05, "loss": 1.9835, "step": 10073 }, { "epoch": 0.32502606314439486, "grad_norm": 0.51171875, "learning_rate": 2.3764752524646324e-05, "loss": 2.0034, "step": 10074 }, { "epoch": 0.3250583269981912, "grad_norm": 0.421875, "learning_rate": 2.3763475350975835e-05, "loss": 1.9919, "step": 10075 }, { "epoch": 0.32509059085198755, "grad_norm": 0.404296875, "learning_rate": 2.37621980808433e-05, "loss": 1.9903, "step": 10076 }, { "epoch": 0.3251228547057839, "grad_norm": 0.416015625, "learning_rate": 2.3760920714262765e-05, "loss": 2.0006, "step": 10077 }, { "epoch": 0.32515511855958024, "grad_norm": 0.419921875, "learning_rate": 2.375964325124831e-05, "loss": 1.9822, "step": 10078 }, { "epoch": 0.3251873824133766, "grad_norm": 0.390625, "learning_rate": 2.3758365691813984e-05, "loss": 2.0222, "step": 10079 }, { "epoch": 0.3252196462671729, "grad_norm": 0.41015625, "learning_rate": 2.3757088035973845e-05, "loss": 1.9867, "step": 10080 }, { "epoch": 0.32525191012096927, "grad_norm": 0.380859375, "learning_rate": 2.375581028374197e-05, "loss": 1.9754, "step": 10081 }, { "epoch": 0.3252841739747656, "grad_norm": 0.40625, "learning_rate": 2.3754532435132414e-05, "loss": 1.9785, "step": 10082 }, { "epoch": 0.32531643782856196, "grad_norm": 0.41796875, "learning_rate": 2.3753254490159246e-05, "loss": 1.964, "step": 10083 }, { "epoch": 0.3253487016823583, "grad_norm": 0.416015625, "learning_rate": 2.3751976448836535e-05, "loss": 1.9633, "step": 10084 }, { "epoch": 0.3253809655361547, "grad_norm": 0.388671875, "learning_rate": 2.3750698311178337e-05, "loss": 1.956, "step": 10085 }, { "epoch": 0.32541322938995104, "grad_norm": 0.40625, "learning_rate": 2.374942007719874e-05, "loss": 2.0086, "step": 10086 }, { "epoch": 0.3254454932437474, "grad_norm": 0.396484375, "learning_rate": 2.37481417469118e-05, "loss": 1.9837, "step": 10087 }, { "epoch": 0.32547775709754373, "grad_norm": 0.38671875, "learning_rate": 2.3746863320331588e-05, "loss": 2.0218, "step": 10088 }, { "epoch": 0.3255100209513401, "grad_norm": 0.435546875, "learning_rate": 2.3745584797472184e-05, "loss": 2.0339, "step": 10089 }, { "epoch": 0.3255422848051364, "grad_norm": 0.41796875, "learning_rate": 2.3744306178347655e-05, "loss": 1.9861, "step": 10090 }, { "epoch": 0.32557454865893276, "grad_norm": 0.38671875, "learning_rate": 2.3743027462972078e-05, "loss": 1.978, "step": 10091 }, { "epoch": 0.3256068125127291, "grad_norm": 0.369140625, "learning_rate": 2.3741748651359525e-05, "loss": 1.9801, "step": 10092 }, { "epoch": 0.32563907636652545, "grad_norm": 0.4140625, "learning_rate": 2.3740469743524077e-05, "loss": 1.9895, "step": 10093 }, { "epoch": 0.3256713402203218, "grad_norm": 0.380859375, "learning_rate": 2.3739190739479804e-05, "loss": 1.9951, "step": 10094 }, { "epoch": 0.32570360407411814, "grad_norm": 0.3671875, "learning_rate": 2.3737911639240792e-05, "loss": 1.9852, "step": 10095 }, { "epoch": 0.3257358679279145, "grad_norm": 0.36328125, "learning_rate": 2.3736632442821117e-05, "loss": 1.9678, "step": 10096 }, { "epoch": 0.3257681317817108, "grad_norm": 0.400390625, "learning_rate": 2.373535315023486e-05, "loss": 2.02, "step": 10097 }, { "epoch": 0.32580039563550717, "grad_norm": 0.37890625, "learning_rate": 2.3734073761496097e-05, "loss": 1.9698, "step": 10098 }, { "epoch": 0.3258326594893035, "grad_norm": 0.39453125, "learning_rate": 2.3732794276618925e-05, "loss": 1.9687, "step": 10099 }, { "epoch": 0.32586492334309985, "grad_norm": 0.4140625, "learning_rate": 2.3731514695617412e-05, "loss": 2.0148, "step": 10100 }, { "epoch": 0.3258971871968962, "grad_norm": 0.36328125, "learning_rate": 2.373023501850565e-05, "loss": 1.9974, "step": 10101 }, { "epoch": 0.32592945105069254, "grad_norm": 0.37890625, "learning_rate": 2.3728955245297722e-05, "loss": 1.9838, "step": 10102 }, { "epoch": 0.3259617149044889, "grad_norm": 0.373046875, "learning_rate": 2.3727675376007725e-05, "loss": 1.9923, "step": 10103 }, { "epoch": 0.32599397875828523, "grad_norm": 0.37890625, "learning_rate": 2.372639541064973e-05, "loss": 1.9925, "step": 10104 }, { "epoch": 0.32602624261208163, "grad_norm": 0.365234375, "learning_rate": 2.372511534923784e-05, "loss": 1.9894, "step": 10105 }, { "epoch": 0.326058506465878, "grad_norm": 0.380859375, "learning_rate": 2.3723835191786136e-05, "loss": 1.9733, "step": 10106 }, { "epoch": 0.3260907703196743, "grad_norm": 0.36328125, "learning_rate": 2.3722554938308716e-05, "loss": 2.0131, "step": 10107 }, { "epoch": 0.32612303417347066, "grad_norm": 0.3671875, "learning_rate": 2.3721274588819664e-05, "loss": 1.9765, "step": 10108 }, { "epoch": 0.326155298027267, "grad_norm": 0.353515625, "learning_rate": 2.3719994143333083e-05, "loss": 1.9782, "step": 10109 }, { "epoch": 0.32618756188106335, "grad_norm": 0.35546875, "learning_rate": 2.371871360186306e-05, "loss": 1.9988, "step": 10110 }, { "epoch": 0.3262198257348597, "grad_norm": 0.380859375, "learning_rate": 2.371743296442369e-05, "loss": 2.0254, "step": 10111 }, { "epoch": 0.32625208958865604, "grad_norm": 0.4140625, "learning_rate": 2.3716152231029077e-05, "loss": 1.9767, "step": 10112 }, { "epoch": 0.3262843534424524, "grad_norm": 0.447265625, "learning_rate": 2.371487140169331e-05, "loss": 1.9726, "step": 10113 }, { "epoch": 0.3263166172962487, "grad_norm": 0.37109375, "learning_rate": 2.3713590476430492e-05, "loss": 1.9928, "step": 10114 }, { "epoch": 0.32634888115004507, "grad_norm": 0.3671875, "learning_rate": 2.3712309455254718e-05, "loss": 1.9631, "step": 10115 }, { "epoch": 0.3263811450038414, "grad_norm": 0.373046875, "learning_rate": 2.3711028338180097e-05, "loss": 2.016, "step": 10116 }, { "epoch": 0.32641340885763775, "grad_norm": 0.349609375, "learning_rate": 2.370974712522072e-05, "loss": 2.0114, "step": 10117 }, { "epoch": 0.3264456727114341, "grad_norm": 0.357421875, "learning_rate": 2.3708465816390696e-05, "loss": 1.9888, "step": 10118 }, { "epoch": 0.32647793656523044, "grad_norm": 0.36328125, "learning_rate": 2.3707184411704127e-05, "loss": 2.009, "step": 10119 }, { "epoch": 0.3265102004190268, "grad_norm": 0.361328125, "learning_rate": 2.3705902911175122e-05, "loss": 1.9902, "step": 10120 }, { "epoch": 0.32654246427282313, "grad_norm": 0.38671875, "learning_rate": 2.3704621314817783e-05, "loss": 1.9634, "step": 10121 }, { "epoch": 0.3265747281266195, "grad_norm": 0.392578125, "learning_rate": 2.3703339622646215e-05, "loss": 2.0009, "step": 10122 }, { "epoch": 0.3266069919804158, "grad_norm": 0.369140625, "learning_rate": 2.370205783467453e-05, "loss": 1.9781, "step": 10123 }, { "epoch": 0.32663925583421216, "grad_norm": 0.44140625, "learning_rate": 2.3700775950916836e-05, "loss": 2.0217, "step": 10124 }, { "epoch": 0.32667151968800856, "grad_norm": 0.41796875, "learning_rate": 2.3699493971387236e-05, "loss": 2.0991, "step": 10125 }, { "epoch": 0.3267037835418049, "grad_norm": 0.484375, "learning_rate": 2.3698211896099855e-05, "loss": 2.1115, "step": 10126 }, { "epoch": 0.32673604739560125, "grad_norm": 0.4140625, "learning_rate": 2.3696929725068795e-05, "loss": 2.0918, "step": 10127 }, { "epoch": 0.3267683112493976, "grad_norm": 0.5, "learning_rate": 2.3695647458308168e-05, "loss": 2.0673, "step": 10128 }, { "epoch": 0.32680057510319394, "grad_norm": 0.435546875, "learning_rate": 2.3694365095832097e-05, "loss": 2.0842, "step": 10129 }, { "epoch": 0.3268328389569903, "grad_norm": 0.423828125, "learning_rate": 2.3693082637654693e-05, "loss": 2.0879, "step": 10130 }, { "epoch": 0.3268651028107866, "grad_norm": 0.66015625, "learning_rate": 2.3691800083790067e-05, "loss": 2.1212, "step": 10131 }, { "epoch": 0.32689736666458297, "grad_norm": 0.5625, "learning_rate": 2.3690517434252345e-05, "loss": 2.0861, "step": 10132 }, { "epoch": 0.3269296305183793, "grad_norm": 0.54296875, "learning_rate": 2.3689234689055637e-05, "loss": 2.068, "step": 10133 }, { "epoch": 0.32696189437217565, "grad_norm": 0.52734375, "learning_rate": 2.368795184821407e-05, "loss": 2.077, "step": 10134 }, { "epoch": 0.326994158225972, "grad_norm": 0.470703125, "learning_rate": 2.3686668911741766e-05, "loss": 2.0708, "step": 10135 }, { "epoch": 0.32702642207976834, "grad_norm": 0.48046875, "learning_rate": 2.3685385879652834e-05, "loss": 2.1103, "step": 10136 }, { "epoch": 0.3270586859335647, "grad_norm": 0.490234375, "learning_rate": 2.3684102751961408e-05, "loss": 2.0961, "step": 10137 }, { "epoch": 0.32709094978736103, "grad_norm": 0.46484375, "learning_rate": 2.3682819528681613e-05, "loss": 2.0843, "step": 10138 }, { "epoch": 0.3271232136411574, "grad_norm": 0.466796875, "learning_rate": 2.3681536209827566e-05, "loss": 2.1153, "step": 10139 }, { "epoch": 0.3271554774949537, "grad_norm": 0.458984375, "learning_rate": 2.36802527954134e-05, "loss": 2.0778, "step": 10140 }, { "epoch": 0.32718774134875006, "grad_norm": 0.447265625, "learning_rate": 2.3678969285453237e-05, "loss": 2.0839, "step": 10141 }, { "epoch": 0.3272200052025464, "grad_norm": 0.451171875, "learning_rate": 2.3677685679961202e-05, "loss": 2.1068, "step": 10142 }, { "epoch": 0.32725226905634275, "grad_norm": 0.50390625, "learning_rate": 2.3676401978951432e-05, "loss": 2.1051, "step": 10143 }, { "epoch": 0.3272845329101391, "grad_norm": 0.486328125, "learning_rate": 2.3675118182438055e-05, "loss": 2.0913, "step": 10144 }, { "epoch": 0.3273167967639355, "grad_norm": 0.4375, "learning_rate": 2.3673834290435197e-05, "loss": 2.1195, "step": 10145 }, { "epoch": 0.32734906061773184, "grad_norm": 0.443359375, "learning_rate": 2.3672550302956998e-05, "loss": 2.1182, "step": 10146 }, { "epoch": 0.3273813244715282, "grad_norm": 0.4453125, "learning_rate": 2.3671266220017582e-05, "loss": 2.1451, "step": 10147 }, { "epoch": 0.3274135883253245, "grad_norm": 0.42578125, "learning_rate": 2.3669982041631093e-05, "loss": 2.0991, "step": 10148 }, { "epoch": 0.32744585217912087, "grad_norm": 0.419921875, "learning_rate": 2.3668697767811658e-05, "loss": 2.1132, "step": 10149 }, { "epoch": 0.3274781160329172, "grad_norm": 0.423828125, "learning_rate": 2.366741339857342e-05, "loss": 2.1084, "step": 10150 }, { "epoch": 0.32751037988671355, "grad_norm": 0.41796875, "learning_rate": 2.366612893393051e-05, "loss": 2.0724, "step": 10151 }, { "epoch": 0.3275426437405099, "grad_norm": 0.42578125, "learning_rate": 2.3664844373897068e-05, "loss": 2.1069, "step": 10152 }, { "epoch": 0.32757490759430624, "grad_norm": 0.42578125, "learning_rate": 2.3663559718487237e-05, "loss": 2.1202, "step": 10153 }, { "epoch": 0.3276071714481026, "grad_norm": 0.416015625, "learning_rate": 2.366227496771516e-05, "loss": 2.1195, "step": 10154 }, { "epoch": 0.32763943530189893, "grad_norm": 0.44140625, "learning_rate": 2.3660990121594973e-05, "loss": 2.0624, "step": 10155 }, { "epoch": 0.3276716991556953, "grad_norm": 0.421875, "learning_rate": 2.3659705180140815e-05, "loss": 2.0659, "step": 10156 }, { "epoch": 0.3277039630094916, "grad_norm": 0.41796875, "learning_rate": 2.365842014336684e-05, "loss": 2.0661, "step": 10157 }, { "epoch": 0.32773622686328796, "grad_norm": 0.43359375, "learning_rate": 2.365713501128719e-05, "loss": 2.1077, "step": 10158 }, { "epoch": 0.3277684907170843, "grad_norm": 0.396484375, "learning_rate": 2.3655849783916e-05, "loss": 2.0806, "step": 10159 }, { "epoch": 0.32780075457088065, "grad_norm": 0.421875, "learning_rate": 2.3654564461267428e-05, "loss": 2.0851, "step": 10160 }, { "epoch": 0.327833018424677, "grad_norm": 0.384765625, "learning_rate": 2.3653279043355624e-05, "loss": 2.0864, "step": 10161 }, { "epoch": 0.32786528227847334, "grad_norm": 0.4765625, "learning_rate": 2.3651993530194728e-05, "loss": 2.0898, "step": 10162 }, { "epoch": 0.3278975461322697, "grad_norm": 0.42578125, "learning_rate": 2.3650707921798895e-05, "loss": 2.1108, "step": 10163 }, { "epoch": 0.327929809986066, "grad_norm": 0.5, "learning_rate": 2.3649422218182278e-05, "loss": 2.1045, "step": 10164 }, { "epoch": 0.32796207383986237, "grad_norm": 0.482421875, "learning_rate": 2.3648136419359022e-05, "loss": 2.0998, "step": 10165 }, { "epoch": 0.32799433769365877, "grad_norm": 0.447265625, "learning_rate": 2.3646850525343287e-05, "loss": 2.0874, "step": 10166 }, { "epoch": 0.3280266015474551, "grad_norm": 0.43359375, "learning_rate": 2.3645564536149224e-05, "loss": 2.043, "step": 10167 }, { "epoch": 0.32805886540125145, "grad_norm": 0.46875, "learning_rate": 2.3644278451790985e-05, "loss": 2.0852, "step": 10168 }, { "epoch": 0.3280911292550478, "grad_norm": 0.412109375, "learning_rate": 2.3642992272282734e-05, "loss": 2.1042, "step": 10169 }, { "epoch": 0.32812339310884414, "grad_norm": 0.478515625, "learning_rate": 2.3641705997638625e-05, "loss": 2.1186, "step": 10170 }, { "epoch": 0.3281556569626405, "grad_norm": 0.4140625, "learning_rate": 2.3640419627872815e-05, "loss": 2.0798, "step": 10171 }, { "epoch": 0.32818792081643683, "grad_norm": 0.421875, "learning_rate": 2.3639133162999462e-05, "loss": 2.0693, "step": 10172 }, { "epoch": 0.3282201846702332, "grad_norm": 0.38671875, "learning_rate": 2.3637846603032732e-05, "loss": 2.0948, "step": 10173 }, { "epoch": 0.3282524485240295, "grad_norm": 0.404296875, "learning_rate": 2.3636559947986784e-05, "loss": 2.0971, "step": 10174 }, { "epoch": 0.32828471237782586, "grad_norm": 0.36328125, "learning_rate": 2.363527319787578e-05, "loss": 2.1033, "step": 10175 }, { "epoch": 0.3283169762316222, "grad_norm": 0.40625, "learning_rate": 2.363398635271388e-05, "loss": 2.1105, "step": 10176 }, { "epoch": 0.32834924008541855, "grad_norm": 0.412109375, "learning_rate": 2.3632699412515256e-05, "loss": 2.116, "step": 10177 }, { "epoch": 0.3283815039392149, "grad_norm": 0.447265625, "learning_rate": 2.3631412377294067e-05, "loss": 2.0752, "step": 10178 }, { "epoch": 0.32841376779301124, "grad_norm": 0.453125, "learning_rate": 2.3630125247064485e-05, "loss": 2.0844, "step": 10179 }, { "epoch": 0.3284460316468076, "grad_norm": 0.4140625, "learning_rate": 2.3628838021840677e-05, "loss": 2.1068, "step": 10180 }, { "epoch": 0.3284782955006039, "grad_norm": 0.43359375, "learning_rate": 2.3627550701636808e-05, "loss": 2.0899, "step": 10181 }, { "epoch": 0.32851055935440027, "grad_norm": 0.40625, "learning_rate": 2.3626263286467047e-05, "loss": 2.0867, "step": 10182 }, { "epoch": 0.3285428232081966, "grad_norm": 0.443359375, "learning_rate": 2.3624975776345574e-05, "loss": 2.0893, "step": 10183 }, { "epoch": 0.32857508706199295, "grad_norm": 0.431640625, "learning_rate": 2.3623688171286548e-05, "loss": 2.0936, "step": 10184 }, { "epoch": 0.3286073509157893, "grad_norm": 0.408203125, "learning_rate": 2.3622400471304154e-05, "loss": 2.1351, "step": 10185 }, { "epoch": 0.3286396147695857, "grad_norm": 0.408203125, "learning_rate": 2.3621112676412562e-05, "loss": 2.127, "step": 10186 }, { "epoch": 0.32867187862338204, "grad_norm": 0.419921875, "learning_rate": 2.361982478662595e-05, "loss": 2.1028, "step": 10187 }, { "epoch": 0.3287041424771784, "grad_norm": 0.4765625, "learning_rate": 2.361853680195848e-05, "loss": 2.0919, "step": 10188 }, { "epoch": 0.32873640633097473, "grad_norm": 0.421875, "learning_rate": 2.361724872242435e-05, "loss": 2.0696, "step": 10189 }, { "epoch": 0.3287686701847711, "grad_norm": 0.439453125, "learning_rate": 2.361596054803772e-05, "loss": 2.0499, "step": 10190 }, { "epoch": 0.3288009340385674, "grad_norm": 0.404296875, "learning_rate": 2.3614672278812783e-05, "loss": 2.0756, "step": 10191 }, { "epoch": 0.32883319789236376, "grad_norm": 0.41796875, "learning_rate": 2.3613383914763714e-05, "loss": 2.0863, "step": 10192 }, { "epoch": 0.3288654617461601, "grad_norm": 0.41015625, "learning_rate": 2.361209545590469e-05, "loss": 2.0838, "step": 10193 }, { "epoch": 0.32889772559995645, "grad_norm": 0.400390625, "learning_rate": 2.36108069022499e-05, "loss": 2.0786, "step": 10194 }, { "epoch": 0.3289299894537528, "grad_norm": 0.40625, "learning_rate": 2.3609518253813525e-05, "loss": 2.0919, "step": 10195 }, { "epoch": 0.32896225330754914, "grad_norm": 0.43359375, "learning_rate": 2.3608229510609748e-05, "loss": 2.1211, "step": 10196 }, { "epoch": 0.3289945171613455, "grad_norm": 0.41796875, "learning_rate": 2.360694067265276e-05, "loss": 2.0984, "step": 10197 }, { "epoch": 0.3290267810151418, "grad_norm": 0.412109375, "learning_rate": 2.360565173995674e-05, "loss": 2.1212, "step": 10198 }, { "epoch": 0.32905904486893817, "grad_norm": 0.388671875, "learning_rate": 2.360436271253588e-05, "loss": 2.1038, "step": 10199 }, { "epoch": 0.3290913087227345, "grad_norm": 0.404296875, "learning_rate": 2.360307359040437e-05, "loss": 2.109, "step": 10200 }, { "epoch": 0.32912357257653085, "grad_norm": 0.384765625, "learning_rate": 2.3601784373576388e-05, "loss": 2.111, "step": 10201 }, { "epoch": 0.3291558364303272, "grad_norm": 0.419921875, "learning_rate": 2.3600495062066148e-05, "loss": 2.0669, "step": 10202 }, { "epoch": 0.32918810028412354, "grad_norm": 0.37109375, "learning_rate": 2.3599205655887823e-05, "loss": 2.0747, "step": 10203 }, { "epoch": 0.3292203641379199, "grad_norm": 0.4296875, "learning_rate": 2.359791615505561e-05, "loss": 2.101, "step": 10204 }, { "epoch": 0.32925262799171623, "grad_norm": 0.412109375, "learning_rate": 2.3596626559583702e-05, "loss": 2.1055, "step": 10205 }, { "epoch": 0.32928489184551263, "grad_norm": 0.5078125, "learning_rate": 2.3595336869486303e-05, "loss": 2.1206, "step": 10206 }, { "epoch": 0.32931715569930897, "grad_norm": 0.5234375, "learning_rate": 2.3594047084777596e-05, "loss": 2.1001, "step": 10207 }, { "epoch": 0.3293494195531053, "grad_norm": 0.439453125, "learning_rate": 2.3592757205471783e-05, "loss": 2.0585, "step": 10208 }, { "epoch": 0.32938168340690166, "grad_norm": 0.5703125, "learning_rate": 2.359146723158307e-05, "loss": 2.1209, "step": 10209 }, { "epoch": 0.329413947260698, "grad_norm": 0.474609375, "learning_rate": 2.3590177163125644e-05, "loss": 2.1049, "step": 10210 }, { "epoch": 0.32944621111449435, "grad_norm": 0.498046875, "learning_rate": 2.3588887000113714e-05, "loss": 2.1082, "step": 10211 }, { "epoch": 0.3294784749682907, "grad_norm": 0.462890625, "learning_rate": 2.3587596742561472e-05, "loss": 2.0677, "step": 10212 }, { "epoch": 0.32951073882208703, "grad_norm": 0.474609375, "learning_rate": 2.3586306390483132e-05, "loss": 1.9768, "step": 10213 }, { "epoch": 0.3295430026758834, "grad_norm": 0.431640625, "learning_rate": 2.3585015943892886e-05, "loss": 1.9786, "step": 10214 }, { "epoch": 0.3295752665296797, "grad_norm": 0.4765625, "learning_rate": 2.3583725402804947e-05, "loss": 1.9812, "step": 10215 }, { "epoch": 0.32960753038347607, "grad_norm": 0.384765625, "learning_rate": 2.3582434767233512e-05, "loss": 1.9401, "step": 10216 }, { "epoch": 0.3296397942372724, "grad_norm": 0.431640625, "learning_rate": 2.3581144037192794e-05, "loss": 1.9704, "step": 10217 }, { "epoch": 0.32967205809106875, "grad_norm": 0.3828125, "learning_rate": 2.3579853212697003e-05, "loss": 1.9826, "step": 10218 }, { "epoch": 0.3297043219448651, "grad_norm": 0.3984375, "learning_rate": 2.3578562293760338e-05, "loss": 1.9115, "step": 10219 }, { "epoch": 0.32973658579866144, "grad_norm": 0.376953125, "learning_rate": 2.357727128039702e-05, "loss": 1.9859, "step": 10220 }, { "epoch": 0.3297688496524578, "grad_norm": 0.384765625, "learning_rate": 2.3575980172621243e-05, "loss": 1.9986, "step": 10221 }, { "epoch": 0.32980111350625413, "grad_norm": 0.38671875, "learning_rate": 2.3574688970447237e-05, "loss": 2.0043, "step": 10222 }, { "epoch": 0.3298333773600505, "grad_norm": 0.41796875, "learning_rate": 2.35733976738892e-05, "loss": 1.9751, "step": 10223 }, { "epoch": 0.3298656412138468, "grad_norm": 0.40234375, "learning_rate": 2.357210628296136e-05, "loss": 1.9953, "step": 10224 }, { "epoch": 0.32989790506764316, "grad_norm": 0.37109375, "learning_rate": 2.3570814797677916e-05, "loss": 1.983, "step": 10225 }, { "epoch": 0.32993016892143956, "grad_norm": 0.41796875, "learning_rate": 2.3569523218053092e-05, "loss": 1.9941, "step": 10226 }, { "epoch": 0.3299624327752359, "grad_norm": 0.4765625, "learning_rate": 2.3568231544101108e-05, "loss": 2.0831, "step": 10227 }, { "epoch": 0.32999469662903225, "grad_norm": 0.396484375, "learning_rate": 2.356693977583618e-05, "loss": 2.1128, "step": 10228 }, { "epoch": 0.3300269604828286, "grad_norm": 0.486328125, "learning_rate": 2.3565647913272518e-05, "loss": 2.073, "step": 10229 }, { "epoch": 0.33005922433662493, "grad_norm": 0.455078125, "learning_rate": 2.3564355956424353e-05, "loss": 2.0602, "step": 10230 }, { "epoch": 0.3300914881904213, "grad_norm": 0.390625, "learning_rate": 2.3563063905305897e-05, "loss": 2.063, "step": 10231 }, { "epoch": 0.3301237520442176, "grad_norm": 0.4765625, "learning_rate": 2.3561771759931386e-05, "loss": 2.1101, "step": 10232 }, { "epoch": 0.33015601589801397, "grad_norm": 0.435546875, "learning_rate": 2.356047952031502e-05, "loss": 2.0758, "step": 10233 }, { "epoch": 0.3301882797518103, "grad_norm": 0.400390625, "learning_rate": 2.3559187186471048e-05, "loss": 2.0727, "step": 10234 }, { "epoch": 0.33022054360560665, "grad_norm": 0.453125, "learning_rate": 2.3557894758413677e-05, "loss": 2.0625, "step": 10235 }, { "epoch": 0.330252807459403, "grad_norm": 0.4453125, "learning_rate": 2.355660223615715e-05, "loss": 2.0976, "step": 10236 }, { "epoch": 0.33028507131319934, "grad_norm": 0.419921875, "learning_rate": 2.3555309619715677e-05, "loss": 2.05, "step": 10237 }, { "epoch": 0.3303173351669957, "grad_norm": 0.43359375, "learning_rate": 2.355401690910349e-05, "loss": 2.0793, "step": 10238 }, { "epoch": 0.33034959902079203, "grad_norm": 0.427734375, "learning_rate": 2.3552724104334823e-05, "loss": 2.0982, "step": 10239 }, { "epoch": 0.3303818628745884, "grad_norm": 0.46875, "learning_rate": 2.3551431205423912e-05, "loss": 2.1117, "step": 10240 }, { "epoch": 0.3304141267283847, "grad_norm": 0.490234375, "learning_rate": 2.3550138212384974e-05, "loss": 2.0735, "step": 10241 }, { "epoch": 0.33044639058218106, "grad_norm": 0.431640625, "learning_rate": 2.354884512523225e-05, "loss": 2.0808, "step": 10242 }, { "epoch": 0.3304786544359774, "grad_norm": 0.5, "learning_rate": 2.3547551943979972e-05, "loss": 2.1105, "step": 10243 }, { "epoch": 0.33051091828977375, "grad_norm": 0.416015625, "learning_rate": 2.354625866864237e-05, "loss": 2.1001, "step": 10244 }, { "epoch": 0.3305431821435701, "grad_norm": 0.498046875, "learning_rate": 2.354496529923369e-05, "loss": 2.081, "step": 10245 }, { "epoch": 0.3305754459973665, "grad_norm": 0.44140625, "learning_rate": 2.3543671835768165e-05, "loss": 2.1203, "step": 10246 }, { "epoch": 0.33060770985116283, "grad_norm": 0.45703125, "learning_rate": 2.3542378278260023e-05, "loss": 2.0945, "step": 10247 }, { "epoch": 0.3306399737049592, "grad_norm": 0.427734375, "learning_rate": 2.3541084626723506e-05, "loss": 2.0836, "step": 10248 }, { "epoch": 0.3306722375587555, "grad_norm": 0.416015625, "learning_rate": 2.3539790881172865e-05, "loss": 2.1, "step": 10249 }, { "epoch": 0.33070450141255187, "grad_norm": 0.4296875, "learning_rate": 2.3538497041622325e-05, "loss": 2.1048, "step": 10250 }, { "epoch": 0.3307367652663482, "grad_norm": 0.390625, "learning_rate": 2.353720310808614e-05, "loss": 2.1079, "step": 10251 }, { "epoch": 0.33076902912014455, "grad_norm": 0.408203125, "learning_rate": 2.3535909080578547e-05, "loss": 2.0569, "step": 10252 }, { "epoch": 0.3308012929739409, "grad_norm": 0.41015625, "learning_rate": 2.3534614959113792e-05, "loss": 2.0752, "step": 10253 }, { "epoch": 0.33083355682773724, "grad_norm": 0.4609375, "learning_rate": 2.3533320743706117e-05, "loss": 2.0787, "step": 10254 }, { "epoch": 0.3308658206815336, "grad_norm": 0.447265625, "learning_rate": 2.3532026434369766e-05, "loss": 2.0916, "step": 10255 }, { "epoch": 0.33089808453532993, "grad_norm": 0.498046875, "learning_rate": 2.3530732031118997e-05, "loss": 2.109, "step": 10256 }, { "epoch": 0.33093034838912627, "grad_norm": 0.451171875, "learning_rate": 2.3529437533968042e-05, "loss": 2.0735, "step": 10257 }, { "epoch": 0.3309626122429226, "grad_norm": 0.484375, "learning_rate": 2.352814294293116e-05, "loss": 2.1219, "step": 10258 }, { "epoch": 0.33099487609671896, "grad_norm": 0.431640625, "learning_rate": 2.3526848258022598e-05, "loss": 2.0685, "step": 10259 }, { "epoch": 0.3310271399505153, "grad_norm": 0.43359375, "learning_rate": 2.3525553479256607e-05, "loss": 2.0602, "step": 10260 }, { "epoch": 0.33105940380431165, "grad_norm": 0.4609375, "learning_rate": 2.3524258606647436e-05, "loss": 2.0922, "step": 10261 }, { "epoch": 0.331091667658108, "grad_norm": 0.427734375, "learning_rate": 2.352296364020935e-05, "loss": 2.1219, "step": 10262 }, { "epoch": 0.33112393151190433, "grad_norm": 0.52734375, "learning_rate": 2.3521668579956593e-05, "loss": 2.0685, "step": 10263 }, { "epoch": 0.3311561953657007, "grad_norm": 0.41015625, "learning_rate": 2.3520373425903415e-05, "loss": 2.0833, "step": 10264 }, { "epoch": 0.331188459219497, "grad_norm": 0.5, "learning_rate": 2.3519078178064082e-05, "loss": 2.078, "step": 10265 }, { "epoch": 0.33122072307329337, "grad_norm": 0.421875, "learning_rate": 2.351778283645285e-05, "loss": 2.0711, "step": 10266 }, { "epoch": 0.33125298692708977, "grad_norm": 0.470703125, "learning_rate": 2.3516487401083974e-05, "loss": 2.0904, "step": 10267 }, { "epoch": 0.3312852507808861, "grad_norm": 0.41015625, "learning_rate": 2.3515191871971715e-05, "loss": 2.0748, "step": 10268 }, { "epoch": 0.33131751463468245, "grad_norm": 0.478515625, "learning_rate": 2.3513896249130334e-05, "loss": 2.0974, "step": 10269 }, { "epoch": 0.3313497784884788, "grad_norm": 0.451171875, "learning_rate": 2.351260053257409e-05, "loss": 2.0727, "step": 10270 }, { "epoch": 0.33138204234227514, "grad_norm": 0.44140625, "learning_rate": 2.3511304722317243e-05, "loss": 2.0892, "step": 10271 }, { "epoch": 0.3314143061960715, "grad_norm": 0.439453125, "learning_rate": 2.351000881837406e-05, "loss": 2.0976, "step": 10272 }, { "epoch": 0.33144657004986783, "grad_norm": 0.435546875, "learning_rate": 2.350871282075881e-05, "loss": 2.0755, "step": 10273 }, { "epoch": 0.33147883390366417, "grad_norm": 0.41015625, "learning_rate": 2.350741672948575e-05, "loss": 2.081, "step": 10274 }, { "epoch": 0.3315110977574605, "grad_norm": 0.423828125, "learning_rate": 2.350612054456915e-05, "loss": 2.075, "step": 10275 }, { "epoch": 0.33154336161125686, "grad_norm": 0.388671875, "learning_rate": 2.3504824266023275e-05, "loss": 2.0531, "step": 10276 }, { "epoch": 0.3315756254650532, "grad_norm": 0.40625, "learning_rate": 2.35035278938624e-05, "loss": 2.0826, "step": 10277 }, { "epoch": 0.33160788931884955, "grad_norm": 0.40234375, "learning_rate": 2.3502231428100787e-05, "loss": 2.0689, "step": 10278 }, { "epoch": 0.3316401531726459, "grad_norm": 0.41015625, "learning_rate": 2.3500934868752715e-05, "loss": 2.0627, "step": 10279 }, { "epoch": 0.33167241702644223, "grad_norm": 0.404296875, "learning_rate": 2.3499638215832447e-05, "loss": 2.0991, "step": 10280 }, { "epoch": 0.3317046808802386, "grad_norm": 0.396484375, "learning_rate": 2.3498341469354255e-05, "loss": 2.11, "step": 10281 }, { "epoch": 0.3317369447340349, "grad_norm": 0.4453125, "learning_rate": 2.3497044629332425e-05, "loss": 2.0801, "step": 10282 }, { "epoch": 0.33176920858783127, "grad_norm": 0.408203125, "learning_rate": 2.349574769578122e-05, "loss": 2.0813, "step": 10283 }, { "epoch": 0.3318014724416276, "grad_norm": 0.416015625, "learning_rate": 2.3494450668714917e-05, "loss": 2.0733, "step": 10284 }, { "epoch": 0.33183373629542395, "grad_norm": 0.404296875, "learning_rate": 2.3493153548147796e-05, "loss": 2.062, "step": 10285 }, { "epoch": 0.3318660001492203, "grad_norm": 0.3828125, "learning_rate": 2.3491856334094132e-05, "loss": 2.086, "step": 10286 }, { "epoch": 0.3318982640030167, "grad_norm": 0.3984375, "learning_rate": 2.3490559026568208e-05, "loss": 2.0275, "step": 10287 }, { "epoch": 0.33193052785681304, "grad_norm": 0.388671875, "learning_rate": 2.3489261625584303e-05, "loss": 2.0852, "step": 10288 }, { "epoch": 0.3319627917106094, "grad_norm": 0.41015625, "learning_rate": 2.3487964131156693e-05, "loss": 2.0993, "step": 10289 }, { "epoch": 0.3319950555644057, "grad_norm": 0.439453125, "learning_rate": 2.3486666543299665e-05, "loss": 2.0977, "step": 10290 }, { "epoch": 0.33202731941820207, "grad_norm": 0.47265625, "learning_rate": 2.34853688620275e-05, "loss": 2.0828, "step": 10291 }, { "epoch": 0.3320595832719984, "grad_norm": 0.474609375, "learning_rate": 2.3484071087354482e-05, "loss": 2.0698, "step": 10292 }, { "epoch": 0.33209184712579476, "grad_norm": 0.435546875, "learning_rate": 2.3482773219294893e-05, "loss": 2.1192, "step": 10293 }, { "epoch": 0.3321241109795911, "grad_norm": 0.4140625, "learning_rate": 2.3481475257863027e-05, "loss": 2.1084, "step": 10294 }, { "epoch": 0.33215637483338745, "grad_norm": 0.416015625, "learning_rate": 2.348017720307316e-05, "loss": 2.0883, "step": 10295 }, { "epoch": 0.3321886386871838, "grad_norm": 0.40625, "learning_rate": 2.3478879054939593e-05, "loss": 2.0922, "step": 10296 }, { "epoch": 0.33222090254098013, "grad_norm": 0.40625, "learning_rate": 2.3477580813476604e-05, "loss": 2.0795, "step": 10297 }, { "epoch": 0.3322531663947765, "grad_norm": 0.4296875, "learning_rate": 2.3476282478698486e-05, "loss": 2.0895, "step": 10298 }, { "epoch": 0.3322854302485728, "grad_norm": 0.48828125, "learning_rate": 2.347498405061953e-05, "loss": 2.1047, "step": 10299 }, { "epoch": 0.33231769410236917, "grad_norm": 0.439453125, "learning_rate": 2.3473685529254033e-05, "loss": 2.0733, "step": 10300 }, { "epoch": 0.3323499579561655, "grad_norm": 0.439453125, "learning_rate": 2.3472386914616287e-05, "loss": 2.0879, "step": 10301 }, { "epoch": 0.33238222180996185, "grad_norm": 0.427734375, "learning_rate": 2.347108820672058e-05, "loss": 2.1003, "step": 10302 }, { "epoch": 0.3324144856637582, "grad_norm": 0.439453125, "learning_rate": 2.3469789405581212e-05, "loss": 2.1124, "step": 10303 }, { "epoch": 0.33244674951755454, "grad_norm": 0.4375, "learning_rate": 2.3468490511212475e-05, "loss": 2.0979, "step": 10304 }, { "epoch": 0.3324790133713509, "grad_norm": 0.4375, "learning_rate": 2.3467191523628677e-05, "loss": 2.0872, "step": 10305 }, { "epoch": 0.33251127722514723, "grad_norm": 0.439453125, "learning_rate": 2.3465892442844102e-05, "loss": 2.1176, "step": 10306 }, { "epoch": 0.3325435410789436, "grad_norm": 0.404296875, "learning_rate": 2.3464593268873062e-05, "loss": 2.0973, "step": 10307 }, { "epoch": 0.33257580493273997, "grad_norm": 0.4296875, "learning_rate": 2.3463294001729848e-05, "loss": 2.0975, "step": 10308 }, { "epoch": 0.3326080687865363, "grad_norm": 0.40234375, "learning_rate": 2.3461994641428768e-05, "loss": 2.1083, "step": 10309 }, { "epoch": 0.33264033264033266, "grad_norm": 0.46875, "learning_rate": 2.346069518798412e-05, "loss": 2.124, "step": 10310 }, { "epoch": 0.332672596494129, "grad_norm": 0.41015625, "learning_rate": 2.3459395641410208e-05, "loss": 2.0824, "step": 10311 }, { "epoch": 0.33270486034792535, "grad_norm": 0.43359375, "learning_rate": 2.3458096001721338e-05, "loss": 2.1083, "step": 10312 }, { "epoch": 0.3327371242017217, "grad_norm": 0.43359375, "learning_rate": 2.3456796268931817e-05, "loss": 2.0865, "step": 10313 }, { "epoch": 0.33276938805551803, "grad_norm": 0.4296875, "learning_rate": 2.3455496443055947e-05, "loss": 2.0137, "step": 10314 }, { "epoch": 0.3328016519093144, "grad_norm": 0.375, "learning_rate": 2.345419652410804e-05, "loss": 2.008, "step": 10315 }, { "epoch": 0.3328339157631107, "grad_norm": 0.376953125, "learning_rate": 2.3452896512102404e-05, "loss": 2.0017, "step": 10316 }, { "epoch": 0.33286617961690707, "grad_norm": 0.3828125, "learning_rate": 2.345159640705334e-05, "loss": 1.9873, "step": 10317 }, { "epoch": 0.3328984434707034, "grad_norm": 0.3671875, "learning_rate": 2.3450296208975175e-05, "loss": 1.9745, "step": 10318 }, { "epoch": 0.33293070732449975, "grad_norm": 0.390625, "learning_rate": 2.3448995917882207e-05, "loss": 1.9997, "step": 10319 }, { "epoch": 0.3329629711782961, "grad_norm": 0.35546875, "learning_rate": 2.3447695533788754e-05, "loss": 1.9888, "step": 10320 }, { "epoch": 0.33299523503209244, "grad_norm": 0.369140625, "learning_rate": 2.344639505670913e-05, "loss": 1.9857, "step": 10321 }, { "epoch": 0.3330274988858888, "grad_norm": 0.36328125, "learning_rate": 2.3445094486657647e-05, "loss": 1.9761, "step": 10322 }, { "epoch": 0.33305976273968513, "grad_norm": 0.361328125, "learning_rate": 2.3443793823648622e-05, "loss": 2.016, "step": 10323 }, { "epoch": 0.33309202659348147, "grad_norm": 0.37109375, "learning_rate": 2.3442493067696373e-05, "loss": 1.9742, "step": 10324 }, { "epoch": 0.3331242904472778, "grad_norm": 0.376953125, "learning_rate": 2.3441192218815217e-05, "loss": 2.0132, "step": 10325 }, { "epoch": 0.33315655430107416, "grad_norm": 0.36328125, "learning_rate": 2.343989127701947e-05, "loss": 1.9967, "step": 10326 }, { "epoch": 0.33318881815487056, "grad_norm": 0.3515625, "learning_rate": 2.343859024232346e-05, "loss": 1.9248, "step": 10327 }, { "epoch": 0.3332210820086669, "grad_norm": 0.361328125, "learning_rate": 2.34372891147415e-05, "loss": 1.9483, "step": 10328 }, { "epoch": 0.33325334586246325, "grad_norm": 0.353515625, "learning_rate": 2.343598789428791e-05, "loss": 1.9737, "step": 10329 }, { "epoch": 0.3332856097162596, "grad_norm": 0.3828125, "learning_rate": 2.343468658097702e-05, "loss": 1.9787, "step": 10330 }, { "epoch": 0.33331787357005593, "grad_norm": 0.353515625, "learning_rate": 2.343338517482316e-05, "loss": 1.9505, "step": 10331 }, { "epoch": 0.3333501374238523, "grad_norm": 0.39453125, "learning_rate": 2.3432083675840635e-05, "loss": 1.9886, "step": 10332 }, { "epoch": 0.3333824012776486, "grad_norm": 0.373046875, "learning_rate": 2.3430782084043785e-05, "loss": 2.0095, "step": 10333 }, { "epoch": 0.33341466513144496, "grad_norm": 0.380859375, "learning_rate": 2.3429480399446933e-05, "loss": 1.9717, "step": 10334 }, { "epoch": 0.3334469289852413, "grad_norm": 0.361328125, "learning_rate": 2.3428178622064412e-05, "loss": 1.9774, "step": 10335 }, { "epoch": 0.33347919283903765, "grad_norm": 0.404296875, "learning_rate": 2.3426876751910543e-05, "loss": 2.0147, "step": 10336 }, { "epoch": 0.333511456692834, "grad_norm": 0.361328125, "learning_rate": 2.342557478899966e-05, "loss": 1.969, "step": 10337 }, { "epoch": 0.33354372054663034, "grad_norm": 0.400390625, "learning_rate": 2.3424272733346098e-05, "loss": 1.9671, "step": 10338 }, { "epoch": 0.3335759844004267, "grad_norm": 0.353515625, "learning_rate": 2.3422970584964186e-05, "loss": 1.9206, "step": 10339 }, { "epoch": 0.333608248254223, "grad_norm": 0.392578125, "learning_rate": 2.342166834386825e-05, "loss": 2.0058, "step": 10340 }, { "epoch": 0.33364051210801937, "grad_norm": 0.384765625, "learning_rate": 2.3420366010072635e-05, "loss": 1.9952, "step": 10341 }, { "epoch": 0.3336727759618157, "grad_norm": 0.35546875, "learning_rate": 2.341906358359167e-05, "loss": 2.0098, "step": 10342 }, { "epoch": 0.33370503981561206, "grad_norm": 0.369140625, "learning_rate": 2.3417761064439696e-05, "loss": 2.0038, "step": 10343 }, { "epoch": 0.3337373036694084, "grad_norm": 0.359375, "learning_rate": 2.3416458452631043e-05, "loss": 1.9842, "step": 10344 }, { "epoch": 0.33376956752320475, "grad_norm": 0.365234375, "learning_rate": 2.3415155748180058e-05, "loss": 1.9699, "step": 10345 }, { "epoch": 0.3338018313770011, "grad_norm": 0.369140625, "learning_rate": 2.3413852951101074e-05, "loss": 1.9833, "step": 10346 }, { "epoch": 0.33383409523079743, "grad_norm": 0.3671875, "learning_rate": 2.341255006140843e-05, "loss": 2.008, "step": 10347 }, { "epoch": 0.33386635908459383, "grad_norm": 0.369140625, "learning_rate": 2.3411247079116474e-05, "loss": 1.986, "step": 10348 }, { "epoch": 0.3338986229383902, "grad_norm": 0.375, "learning_rate": 2.340994400423954e-05, "loss": 2.0015, "step": 10349 }, { "epoch": 0.3339308867921865, "grad_norm": 0.359375, "learning_rate": 2.3408640836791978e-05, "loss": 1.9946, "step": 10350 }, { "epoch": 0.33396315064598286, "grad_norm": 0.404296875, "learning_rate": 2.3407337576788125e-05, "loss": 1.9866, "step": 10351 }, { "epoch": 0.3339954144997792, "grad_norm": 0.361328125, "learning_rate": 2.340603422424234e-05, "loss": 1.9916, "step": 10352 }, { "epoch": 0.33402767835357555, "grad_norm": 0.390625, "learning_rate": 2.3404730779168957e-05, "loss": 1.9915, "step": 10353 }, { "epoch": 0.3340599422073719, "grad_norm": 0.365234375, "learning_rate": 2.3403427241582325e-05, "loss": 1.9614, "step": 10354 }, { "epoch": 0.33409220606116824, "grad_norm": 0.38671875, "learning_rate": 2.3402123611496795e-05, "loss": 2.0632, "step": 10355 }, { "epoch": 0.3341244699149646, "grad_norm": 0.416015625, "learning_rate": 2.340081988892672e-05, "loss": 2.1067, "step": 10356 }, { "epoch": 0.3341567337687609, "grad_norm": 0.40625, "learning_rate": 2.339951607388644e-05, "loss": 2.0597, "step": 10357 }, { "epoch": 0.33418899762255727, "grad_norm": 0.40234375, "learning_rate": 2.3398212166390317e-05, "loss": 2.0721, "step": 10358 }, { "epoch": 0.3342212614763536, "grad_norm": 0.388671875, "learning_rate": 2.3396908166452702e-05, "loss": 2.1132, "step": 10359 }, { "epoch": 0.33425352533014996, "grad_norm": 0.431640625, "learning_rate": 2.339560407408794e-05, "loss": 2.0953, "step": 10360 }, { "epoch": 0.3342857891839463, "grad_norm": 0.3984375, "learning_rate": 2.3394299889310396e-05, "loss": 2.0861, "step": 10361 }, { "epoch": 0.33431805303774265, "grad_norm": 0.453125, "learning_rate": 2.3392995612134418e-05, "loss": 2.082, "step": 10362 }, { "epoch": 0.334350316891539, "grad_norm": 0.4140625, "learning_rate": 2.3391691242574366e-05, "loss": 2.0484, "step": 10363 }, { "epoch": 0.33438258074533533, "grad_norm": 0.38671875, "learning_rate": 2.33903867806446e-05, "loss": 2.0867, "step": 10364 }, { "epoch": 0.3344148445991317, "grad_norm": 0.439453125, "learning_rate": 2.3389082226359472e-05, "loss": 2.0648, "step": 10365 }, { "epoch": 0.334447108452928, "grad_norm": 0.484375, "learning_rate": 2.3387777579733344e-05, "loss": 2.0966, "step": 10366 }, { "epoch": 0.33447937230672437, "grad_norm": 0.408203125, "learning_rate": 2.338647284078058e-05, "loss": 2.0403, "step": 10367 }, { "epoch": 0.33451163616052076, "grad_norm": 0.494140625, "learning_rate": 2.3385168009515538e-05, "loss": 2.0894, "step": 10368 }, { "epoch": 0.3345439000143171, "grad_norm": 0.5, "learning_rate": 2.3383863085952584e-05, "loss": 2.0907, "step": 10369 }, { "epoch": 0.33457616386811345, "grad_norm": 0.484375, "learning_rate": 2.338255807010608e-05, "loss": 2.0849, "step": 10370 }, { "epoch": 0.3346084277219098, "grad_norm": 0.427734375, "learning_rate": 2.3381252961990383e-05, "loss": 2.0837, "step": 10371 }, { "epoch": 0.33464069157570614, "grad_norm": 0.486328125, "learning_rate": 2.3379947761619878e-05, "loss": 2.0933, "step": 10372 }, { "epoch": 0.3346729554295025, "grad_norm": 0.380859375, "learning_rate": 2.3378642469008914e-05, "loss": 2.0676, "step": 10373 }, { "epoch": 0.3347052192832988, "grad_norm": 0.453125, "learning_rate": 2.3377337084171865e-05, "loss": 2.0864, "step": 10374 }, { "epoch": 0.33473748313709517, "grad_norm": 0.42578125, "learning_rate": 2.3376031607123095e-05, "loss": 2.0875, "step": 10375 }, { "epoch": 0.3347697469908915, "grad_norm": 0.43359375, "learning_rate": 2.3374726037876982e-05, "loss": 2.0619, "step": 10376 }, { "epoch": 0.33480201084468786, "grad_norm": 0.4140625, "learning_rate": 2.3373420376447897e-05, "loss": 2.0754, "step": 10377 }, { "epoch": 0.3348342746984842, "grad_norm": 0.40234375, "learning_rate": 2.3372114622850202e-05, "loss": 2.1127, "step": 10378 }, { "epoch": 0.33486653855228055, "grad_norm": 0.419921875, "learning_rate": 2.3370808777098277e-05, "loss": 2.1035, "step": 10379 }, { "epoch": 0.3348988024060769, "grad_norm": 0.41796875, "learning_rate": 2.336950283920649e-05, "loss": 2.0993, "step": 10380 }, { "epoch": 0.33493106625987323, "grad_norm": 0.427734375, "learning_rate": 2.3368196809189226e-05, "loss": 2.1111, "step": 10381 }, { "epoch": 0.3349633301136696, "grad_norm": 0.3984375, "learning_rate": 2.3366890687060856e-05, "loss": 2.0821, "step": 10382 }, { "epoch": 0.3349955939674659, "grad_norm": 0.404296875, "learning_rate": 2.3365584472835748e-05, "loss": 2.1215, "step": 10383 }, { "epoch": 0.33502785782126226, "grad_norm": 0.416015625, "learning_rate": 2.3364278166528296e-05, "loss": 2.0869, "step": 10384 }, { "epoch": 0.3350601216750586, "grad_norm": 0.388671875, "learning_rate": 2.3362971768152862e-05, "loss": 2.1041, "step": 10385 }, { "epoch": 0.33509238552885495, "grad_norm": 0.4453125, "learning_rate": 2.3361665277723843e-05, "loss": 2.0999, "step": 10386 }, { "epoch": 0.3351246493826513, "grad_norm": 0.466796875, "learning_rate": 2.3360358695255603e-05, "loss": 2.089, "step": 10387 }, { "epoch": 0.3351569132364477, "grad_norm": 0.484375, "learning_rate": 2.3359052020762536e-05, "loss": 2.1221, "step": 10388 }, { "epoch": 0.33518917709024404, "grad_norm": 0.61328125, "learning_rate": 2.3357745254259024e-05, "loss": 2.0701, "step": 10389 }, { "epoch": 0.3352214409440404, "grad_norm": 0.56640625, "learning_rate": 2.3356438395759444e-05, "loss": 2.0937, "step": 10390 }, { "epoch": 0.3352537047978367, "grad_norm": 0.494140625, "learning_rate": 2.3355131445278187e-05, "loss": 2.0962, "step": 10391 }, { "epoch": 0.33528596865163307, "grad_norm": 0.498046875, "learning_rate": 2.3353824402829634e-05, "loss": 2.11, "step": 10392 }, { "epoch": 0.3353182325054294, "grad_norm": 0.4921875, "learning_rate": 2.3352517268428177e-05, "loss": 2.0776, "step": 10393 }, { "epoch": 0.33535049635922576, "grad_norm": 0.5, "learning_rate": 2.33512100420882e-05, "loss": 2.1033, "step": 10394 }, { "epoch": 0.3353827602130221, "grad_norm": 0.5390625, "learning_rate": 2.33499027238241e-05, "loss": 2.0876, "step": 10395 }, { "epoch": 0.33541502406681845, "grad_norm": 0.47265625, "learning_rate": 2.3348595313650258e-05, "loss": 2.0672, "step": 10396 }, { "epoch": 0.3354472879206148, "grad_norm": 0.5078125, "learning_rate": 2.3347287811581068e-05, "loss": 2.0694, "step": 10397 }, { "epoch": 0.33547955177441113, "grad_norm": 0.470703125, "learning_rate": 2.3345980217630918e-05, "loss": 2.0952, "step": 10398 }, { "epoch": 0.3355118156282075, "grad_norm": 0.462890625, "learning_rate": 2.3344672531814214e-05, "loss": 2.073, "step": 10399 }, { "epoch": 0.3355440794820038, "grad_norm": 0.4140625, "learning_rate": 2.334336475414533e-05, "loss": 2.0747, "step": 10400 }, { "epoch": 0.33557634333580016, "grad_norm": 0.4453125, "learning_rate": 2.334205688463868e-05, "loss": 2.086, "step": 10401 }, { "epoch": 0.3356086071895965, "grad_norm": 0.466796875, "learning_rate": 2.334074892330865e-05, "loss": 2.0348, "step": 10402 }, { "epoch": 0.33564087104339285, "grad_norm": 0.4296875, "learning_rate": 2.333944087016964e-05, "loss": 2.0889, "step": 10403 }, { "epoch": 0.3356731348971892, "grad_norm": 0.4296875, "learning_rate": 2.333813272523605e-05, "loss": 2.0804, "step": 10404 }, { "epoch": 0.33570539875098554, "grad_norm": 0.458984375, "learning_rate": 2.333682448852227e-05, "loss": 2.0538, "step": 10405 }, { "epoch": 0.3357376626047819, "grad_norm": 0.42578125, "learning_rate": 2.3335516160042716e-05, "loss": 2.0794, "step": 10406 }, { "epoch": 0.3357699264585782, "grad_norm": 0.392578125, "learning_rate": 2.3334207739811776e-05, "loss": 2.0267, "step": 10407 }, { "epoch": 0.3358021903123746, "grad_norm": 0.44921875, "learning_rate": 2.333289922784385e-05, "loss": 2.0807, "step": 10408 }, { "epoch": 0.33583445416617097, "grad_norm": 0.4140625, "learning_rate": 2.3331590624153356e-05, "loss": 2.0912, "step": 10409 }, { "epoch": 0.3358667180199673, "grad_norm": 0.458984375, "learning_rate": 2.3330281928754683e-05, "loss": 2.0849, "step": 10410 }, { "epoch": 0.33589898187376366, "grad_norm": 0.443359375, "learning_rate": 2.3328973141662245e-05, "loss": 2.1207, "step": 10411 }, { "epoch": 0.33593124572756, "grad_norm": 0.4296875, "learning_rate": 2.332766426289045e-05, "loss": 2.1153, "step": 10412 }, { "epoch": 0.33596350958135635, "grad_norm": 0.48828125, "learning_rate": 2.3326355292453698e-05, "loss": 2.13, "step": 10413 }, { "epoch": 0.3359957734351527, "grad_norm": 0.408203125, "learning_rate": 2.33250462303664e-05, "loss": 2.1008, "step": 10414 }, { "epoch": 0.33602803728894903, "grad_norm": 0.5078125, "learning_rate": 2.3323737076642963e-05, "loss": 2.1066, "step": 10415 }, { "epoch": 0.3360603011427454, "grad_norm": 0.4453125, "learning_rate": 2.33224278312978e-05, "loss": 2.0807, "step": 10416 }, { "epoch": 0.3360925649965417, "grad_norm": 0.451171875, "learning_rate": 2.3321118494345327e-05, "loss": 2.0795, "step": 10417 }, { "epoch": 0.33612482885033806, "grad_norm": 0.49609375, "learning_rate": 2.3319809065799946e-05, "loss": 2.0698, "step": 10418 }, { "epoch": 0.3361570927041344, "grad_norm": 0.5, "learning_rate": 2.3318499545676075e-05, "loss": 2.0649, "step": 10419 }, { "epoch": 0.33618935655793075, "grad_norm": 0.43359375, "learning_rate": 2.3317189933988134e-05, "loss": 2.0886, "step": 10420 }, { "epoch": 0.3362216204117271, "grad_norm": 0.466796875, "learning_rate": 2.3315880230750528e-05, "loss": 2.0728, "step": 10421 }, { "epoch": 0.33625388426552344, "grad_norm": 0.416015625, "learning_rate": 2.331457043597768e-05, "loss": 2.0802, "step": 10422 }, { "epoch": 0.3362861481193198, "grad_norm": 0.494140625, "learning_rate": 2.3313260549684006e-05, "loss": 2.0738, "step": 10423 }, { "epoch": 0.3363184119731161, "grad_norm": 0.45703125, "learning_rate": 2.3311950571883923e-05, "loss": 2.0663, "step": 10424 }, { "epoch": 0.33635067582691247, "grad_norm": 0.466796875, "learning_rate": 2.3310640502591854e-05, "loss": 2.0993, "step": 10425 }, { "epoch": 0.3363829396807088, "grad_norm": 0.423828125, "learning_rate": 2.3309330341822212e-05, "loss": 2.0289, "step": 10426 }, { "epoch": 0.33641520353450516, "grad_norm": 0.474609375, "learning_rate": 2.3308020089589427e-05, "loss": 2.0635, "step": 10427 }, { "epoch": 0.33644746738830156, "grad_norm": 0.400390625, "learning_rate": 2.330670974590791e-05, "loss": 2.1153, "step": 10428 }, { "epoch": 0.3364797312420979, "grad_norm": 0.421875, "learning_rate": 2.33053993107921e-05, "loss": 2.0496, "step": 10429 }, { "epoch": 0.33651199509589425, "grad_norm": 0.482421875, "learning_rate": 2.3304088784256405e-05, "loss": 2.0563, "step": 10430 }, { "epoch": 0.3365442589496906, "grad_norm": 0.54296875, "learning_rate": 2.3302778166315264e-05, "loss": 2.099, "step": 10431 }, { "epoch": 0.33657652280348693, "grad_norm": 0.4375, "learning_rate": 2.3301467456983095e-05, "loss": 2.1014, "step": 10432 }, { "epoch": 0.3366087866572833, "grad_norm": 0.55078125, "learning_rate": 2.3300156656274325e-05, "loss": 2.0931, "step": 10433 }, { "epoch": 0.3366410505110796, "grad_norm": 0.486328125, "learning_rate": 2.3298845764203392e-05, "loss": 2.0778, "step": 10434 }, { "epoch": 0.33667331436487596, "grad_norm": 0.46484375, "learning_rate": 2.3297534780784713e-05, "loss": 2.1266, "step": 10435 }, { "epoch": 0.3367055782186723, "grad_norm": 0.470703125, "learning_rate": 2.3296223706032724e-05, "loss": 2.109, "step": 10436 }, { "epoch": 0.33673784207246865, "grad_norm": 0.44140625, "learning_rate": 2.3294912539961856e-05, "loss": 2.1085, "step": 10437 }, { "epoch": 0.336770105926265, "grad_norm": 0.44140625, "learning_rate": 2.3293601282586542e-05, "loss": 2.0854, "step": 10438 }, { "epoch": 0.33680236978006134, "grad_norm": 0.431640625, "learning_rate": 2.3292289933921215e-05, "loss": 2.0922, "step": 10439 }, { "epoch": 0.3368346336338577, "grad_norm": 0.44140625, "learning_rate": 2.329097849398031e-05, "loss": 2.0914, "step": 10440 }, { "epoch": 0.336866897487654, "grad_norm": 0.443359375, "learning_rate": 2.328966696277826e-05, "loss": 2.0245, "step": 10441 }, { "epoch": 0.33689916134145037, "grad_norm": 0.4765625, "learning_rate": 2.3288355340329502e-05, "loss": 1.9949, "step": 10442 }, { "epoch": 0.3369314251952467, "grad_norm": 0.419921875, "learning_rate": 2.328704362664848e-05, "loss": 1.9872, "step": 10443 }, { "epoch": 0.33696368904904306, "grad_norm": 0.47265625, "learning_rate": 2.3285731821749625e-05, "loss": 1.9427, "step": 10444 }, { "epoch": 0.3369959529028394, "grad_norm": 0.408203125, "learning_rate": 2.3284419925647375e-05, "loss": 1.9411, "step": 10445 }, { "epoch": 0.33702821675663575, "grad_norm": 0.4140625, "learning_rate": 2.3283107938356174e-05, "loss": 1.9448, "step": 10446 }, { "epoch": 0.3370604806104321, "grad_norm": 0.392578125, "learning_rate": 2.3281795859890464e-05, "loss": 1.984, "step": 10447 }, { "epoch": 0.33709274446422843, "grad_norm": 0.4140625, "learning_rate": 2.3280483690264688e-05, "loss": 1.962, "step": 10448 }, { "epoch": 0.33712500831802483, "grad_norm": 0.390625, "learning_rate": 2.327917142949329e-05, "loss": 1.9663, "step": 10449 }, { "epoch": 0.3371572721718212, "grad_norm": 0.40625, "learning_rate": 2.3277859077590706e-05, "loss": 1.9898, "step": 10450 }, { "epoch": 0.3371895360256175, "grad_norm": 0.408203125, "learning_rate": 2.327654663457139e-05, "loss": 1.959, "step": 10451 }, { "epoch": 0.33722179987941386, "grad_norm": 0.373046875, "learning_rate": 2.3275234100449786e-05, "loss": 1.9491, "step": 10452 }, { "epoch": 0.3372540637332102, "grad_norm": 0.435546875, "learning_rate": 2.3273921475240346e-05, "loss": 1.9314, "step": 10453 }, { "epoch": 0.33728632758700655, "grad_norm": 0.46875, "learning_rate": 2.3272608758957513e-05, "loss": 1.9685, "step": 10454 }, { "epoch": 0.3373185914408029, "grad_norm": 0.400390625, "learning_rate": 2.3271295951615738e-05, "loss": 1.9709, "step": 10455 }, { "epoch": 0.33735085529459924, "grad_norm": 0.435546875, "learning_rate": 2.326998305322946e-05, "loss": 1.932, "step": 10456 }, { "epoch": 0.3373831191483956, "grad_norm": 0.431640625, "learning_rate": 2.3268670063813156e-05, "loss": 1.9386, "step": 10457 }, { "epoch": 0.3374153830021919, "grad_norm": 0.390625, "learning_rate": 2.3267356983381257e-05, "loss": 1.9757, "step": 10458 }, { "epoch": 0.33744764685598827, "grad_norm": 0.478515625, "learning_rate": 2.326604381194822e-05, "loss": 1.9842, "step": 10459 }, { "epoch": 0.3374799107097846, "grad_norm": 0.51171875, "learning_rate": 2.326473054952851e-05, "loss": 1.9264, "step": 10460 }, { "epoch": 0.33751217456358096, "grad_norm": 0.431640625, "learning_rate": 2.3263417196136572e-05, "loss": 1.9247, "step": 10461 }, { "epoch": 0.3375444384173773, "grad_norm": 0.474609375, "learning_rate": 2.3262103751786864e-05, "loss": 1.977, "step": 10462 }, { "epoch": 0.33757670227117365, "grad_norm": 0.50390625, "learning_rate": 2.326079021649385e-05, "loss": 1.9686, "step": 10463 }, { "epoch": 0.33760896612497, "grad_norm": 0.4375, "learning_rate": 2.325947659027198e-05, "loss": 1.9882, "step": 10464 }, { "epoch": 0.33764122997876633, "grad_norm": 0.5234375, "learning_rate": 2.3258162873135714e-05, "loss": 2.0029, "step": 10465 }, { "epoch": 0.3376734938325627, "grad_norm": 0.494140625, "learning_rate": 2.325684906509952e-05, "loss": 1.9865, "step": 10466 }, { "epoch": 0.337705757686359, "grad_norm": 0.443359375, "learning_rate": 2.3255535166177856e-05, "loss": 1.9863, "step": 10467 }, { "epoch": 0.33773802154015536, "grad_norm": 0.462890625, "learning_rate": 2.325422117638518e-05, "loss": 2.0127, "step": 10468 }, { "epoch": 0.33777028539395176, "grad_norm": 0.51953125, "learning_rate": 2.325290709573596e-05, "loss": 2.0197, "step": 10469 }, { "epoch": 0.3378025492477481, "grad_norm": 0.47265625, "learning_rate": 2.325159292424466e-05, "loss": 2.0778, "step": 10470 }, { "epoch": 0.33783481310154445, "grad_norm": 0.5078125, "learning_rate": 2.3250278661925745e-05, "loss": 2.1404, "step": 10471 }, { "epoch": 0.3378670769553408, "grad_norm": 0.47265625, "learning_rate": 2.324896430879368e-05, "loss": 2.1326, "step": 10472 }, { "epoch": 0.33789934080913714, "grad_norm": 0.52734375, "learning_rate": 2.3247649864862935e-05, "loss": 2.0761, "step": 10473 }, { "epoch": 0.3379316046629335, "grad_norm": 0.51953125, "learning_rate": 2.3246335330147975e-05, "loss": 2.0915, "step": 10474 }, { "epoch": 0.3379638685167298, "grad_norm": 0.46875, "learning_rate": 2.3245020704663277e-05, "loss": 2.1645, "step": 10475 }, { "epoch": 0.33799613237052617, "grad_norm": 0.474609375, "learning_rate": 2.3243705988423295e-05, "loss": 2.0854, "step": 10476 }, { "epoch": 0.3380283962243225, "grad_norm": 0.51953125, "learning_rate": 2.324239118144252e-05, "loss": 2.0567, "step": 10477 }, { "epoch": 0.33806066007811886, "grad_norm": 0.470703125, "learning_rate": 2.3241076283735415e-05, "loss": 2.0732, "step": 10478 }, { "epoch": 0.3380929239319152, "grad_norm": 0.4453125, "learning_rate": 2.3239761295316452e-05, "loss": 2.0923, "step": 10479 }, { "epoch": 0.33812518778571155, "grad_norm": 0.439453125, "learning_rate": 2.3238446216200108e-05, "loss": 2.0907, "step": 10480 }, { "epoch": 0.3381574516395079, "grad_norm": 0.42578125, "learning_rate": 2.3237131046400864e-05, "loss": 2.066, "step": 10481 }, { "epoch": 0.33818971549330423, "grad_norm": 0.408203125, "learning_rate": 2.3235815785933184e-05, "loss": 2.0462, "step": 10482 }, { "epoch": 0.3382219793471006, "grad_norm": 0.4296875, "learning_rate": 2.3234500434811556e-05, "loss": 2.058, "step": 10483 }, { "epoch": 0.3382542432008969, "grad_norm": 0.435546875, "learning_rate": 2.3233184993050453e-05, "loss": 2.0713, "step": 10484 }, { "epoch": 0.33828650705469326, "grad_norm": 0.421875, "learning_rate": 2.3231869460664356e-05, "loss": 2.0761, "step": 10485 }, { "epoch": 0.3383187709084896, "grad_norm": 0.4296875, "learning_rate": 2.3230553837667747e-05, "loss": 2.1045, "step": 10486 }, { "epoch": 0.33835103476228595, "grad_norm": 0.427734375, "learning_rate": 2.3229238124075102e-05, "loss": 2.0813, "step": 10487 }, { "epoch": 0.3383832986160823, "grad_norm": 0.423828125, "learning_rate": 2.322792231990091e-05, "loss": 2.0562, "step": 10488 }, { "epoch": 0.3384155624698787, "grad_norm": 0.396484375, "learning_rate": 2.3226606425159655e-05, "loss": 2.1045, "step": 10489 }, { "epoch": 0.33844782632367504, "grad_norm": 0.4140625, "learning_rate": 2.322529043986581e-05, "loss": 2.0711, "step": 10490 }, { "epoch": 0.3384800901774714, "grad_norm": 0.40234375, "learning_rate": 2.322397436403388e-05, "loss": 2.0466, "step": 10491 }, { "epoch": 0.3385123540312677, "grad_norm": 0.392578125, "learning_rate": 2.3222658197678332e-05, "loss": 2.0905, "step": 10492 }, { "epoch": 0.33854461788506407, "grad_norm": 0.390625, "learning_rate": 2.3221341940813663e-05, "loss": 2.0727, "step": 10493 }, { "epoch": 0.3385768817388604, "grad_norm": 0.380859375, "learning_rate": 2.322002559345436e-05, "loss": 2.0913, "step": 10494 }, { "epoch": 0.33860914559265676, "grad_norm": 0.390625, "learning_rate": 2.321870915561491e-05, "loss": 2.1008, "step": 10495 }, { "epoch": 0.3386414094464531, "grad_norm": 0.494140625, "learning_rate": 2.321739262730981e-05, "loss": 2.0297, "step": 10496 }, { "epoch": 0.33867367330024944, "grad_norm": 0.408203125, "learning_rate": 2.3216076008553546e-05, "loss": 2.0771, "step": 10497 }, { "epoch": 0.3387059371540458, "grad_norm": 0.48828125, "learning_rate": 2.3214759299360608e-05, "loss": 2.0771, "step": 10498 }, { "epoch": 0.33873820100784213, "grad_norm": 0.474609375, "learning_rate": 2.3213442499745497e-05, "loss": 2.0522, "step": 10499 }, { "epoch": 0.3387704648616385, "grad_norm": 0.4140625, "learning_rate": 2.32121256097227e-05, "loss": 2.0917, "step": 10500 }, { "epoch": 0.3388027287154348, "grad_norm": 0.45703125, "learning_rate": 2.3210808629306716e-05, "loss": 2.0962, "step": 10501 }, { "epoch": 0.33883499256923116, "grad_norm": 0.3984375, "learning_rate": 2.3209491558512045e-05, "loss": 2.0686, "step": 10502 }, { "epoch": 0.3388672564230275, "grad_norm": 0.45703125, "learning_rate": 2.3208174397353182e-05, "loss": 2.0789, "step": 10503 }, { "epoch": 0.33889952027682385, "grad_norm": 0.443359375, "learning_rate": 2.3206857145844615e-05, "loss": 2.0859, "step": 10504 }, { "epoch": 0.3389317841306202, "grad_norm": 0.45703125, "learning_rate": 2.3205539804000855e-05, "loss": 2.0453, "step": 10505 }, { "epoch": 0.33896404798441654, "grad_norm": 0.41796875, "learning_rate": 2.320422237183641e-05, "loss": 2.0695, "step": 10506 }, { "epoch": 0.3389963118382129, "grad_norm": 0.462890625, "learning_rate": 2.3202904849365756e-05, "loss": 2.0823, "step": 10507 }, { "epoch": 0.3390285756920092, "grad_norm": 0.396484375, "learning_rate": 2.3201587236603417e-05, "loss": 2.0546, "step": 10508 }, { "epoch": 0.3390608395458056, "grad_norm": 0.447265625, "learning_rate": 2.320026953356389e-05, "loss": 2.0554, "step": 10509 }, { "epoch": 0.33909310339960197, "grad_norm": 0.421875, "learning_rate": 2.3198951740261675e-05, "loss": 2.0981, "step": 10510 }, { "epoch": 0.3391253672533983, "grad_norm": 0.431640625, "learning_rate": 2.319763385671128e-05, "loss": 2.0797, "step": 10511 }, { "epoch": 0.33915763110719466, "grad_norm": 0.390625, "learning_rate": 2.319631588292722e-05, "loss": 2.0477, "step": 10512 }, { "epoch": 0.339189894960991, "grad_norm": 0.41015625, "learning_rate": 2.3194997818923992e-05, "loss": 2.0669, "step": 10513 }, { "epoch": 0.33922215881478734, "grad_norm": 0.435546875, "learning_rate": 2.3193679664716106e-05, "loss": 2.0748, "step": 10514 }, { "epoch": 0.3392544226685837, "grad_norm": 0.56640625, "learning_rate": 2.3192361420318072e-05, "loss": 2.0799, "step": 10515 }, { "epoch": 0.33928668652238003, "grad_norm": 0.490234375, "learning_rate": 2.31910430857444e-05, "loss": 2.0731, "step": 10516 }, { "epoch": 0.3393189503761764, "grad_norm": 0.60546875, "learning_rate": 2.3189724661009602e-05, "loss": 2.0342, "step": 10517 }, { "epoch": 0.3393512142299727, "grad_norm": 0.56640625, "learning_rate": 2.318840614612819e-05, "loss": 2.0497, "step": 10518 }, { "epoch": 0.33938347808376906, "grad_norm": 0.53125, "learning_rate": 2.318708754111468e-05, "loss": 2.0516, "step": 10519 }, { "epoch": 0.3394157419375654, "grad_norm": 0.48828125, "learning_rate": 2.3185768845983585e-05, "loss": 2.0225, "step": 10520 }, { "epoch": 0.33944800579136175, "grad_norm": 0.5, "learning_rate": 2.3184450060749414e-05, "loss": 2.0608, "step": 10521 }, { "epoch": 0.3394802696451581, "grad_norm": 0.484375, "learning_rate": 2.3183131185426693e-05, "loss": 2.1082, "step": 10522 }, { "epoch": 0.33951253349895444, "grad_norm": 0.451171875, "learning_rate": 2.3181812220029932e-05, "loss": 2.0474, "step": 10523 }, { "epoch": 0.3395447973527508, "grad_norm": 0.458984375, "learning_rate": 2.318049316457365e-05, "loss": 2.0709, "step": 10524 }, { "epoch": 0.3395770612065471, "grad_norm": 0.462890625, "learning_rate": 2.3179174019072372e-05, "loss": 2.0961, "step": 10525 }, { "epoch": 0.33960932506034347, "grad_norm": 0.423828125, "learning_rate": 2.317785478354061e-05, "loss": 2.0339, "step": 10526 }, { "epoch": 0.3396415889141398, "grad_norm": 0.453125, "learning_rate": 2.317653545799289e-05, "loss": 2.0794, "step": 10527 }, { "epoch": 0.33967385276793616, "grad_norm": 0.40625, "learning_rate": 2.3175216042443735e-05, "loss": 2.068, "step": 10528 }, { "epoch": 0.3397061166217325, "grad_norm": 0.400390625, "learning_rate": 2.3173896536907667e-05, "loss": 2.0547, "step": 10529 }, { "epoch": 0.3397383804755289, "grad_norm": 0.41015625, "learning_rate": 2.3172576941399207e-05, "loss": 2.0799, "step": 10530 }, { "epoch": 0.33977064432932524, "grad_norm": 0.40625, "learning_rate": 2.3171257255932887e-05, "loss": 2.0738, "step": 10531 }, { "epoch": 0.3398029081831216, "grad_norm": 0.40625, "learning_rate": 2.3169937480523225e-05, "loss": 2.0925, "step": 10532 }, { "epoch": 0.33983517203691793, "grad_norm": 0.416015625, "learning_rate": 2.3168617615184756e-05, "loss": 2.0894, "step": 10533 }, { "epoch": 0.3398674358907143, "grad_norm": 0.416015625, "learning_rate": 2.3167297659932e-05, "loss": 2.0709, "step": 10534 }, { "epoch": 0.3398996997445106, "grad_norm": 0.458984375, "learning_rate": 2.3165977614779497e-05, "loss": 2.0501, "step": 10535 }, { "epoch": 0.33993196359830696, "grad_norm": 0.62109375, "learning_rate": 2.3164657479741766e-05, "loss": 2.0989, "step": 10536 }, { "epoch": 0.3399642274521033, "grad_norm": 0.47265625, "learning_rate": 2.316333725483335e-05, "loss": 2.1091, "step": 10537 }, { "epoch": 0.33999649130589965, "grad_norm": 0.51171875, "learning_rate": 2.3162016940068763e-05, "loss": 2.0504, "step": 10538 }, { "epoch": 0.340028755159696, "grad_norm": 0.423828125, "learning_rate": 2.3160696535462556e-05, "loss": 2.0958, "step": 10539 }, { "epoch": 0.34006101901349234, "grad_norm": 0.484375, "learning_rate": 2.315937604102925e-05, "loss": 2.08, "step": 10540 }, { "epoch": 0.3400932828672887, "grad_norm": 0.423828125, "learning_rate": 2.315805545678339e-05, "loss": 2.0872, "step": 10541 }, { "epoch": 0.340125546721085, "grad_norm": 0.453125, "learning_rate": 2.315673478273951e-05, "loss": 2.1155, "step": 10542 }, { "epoch": 0.34015781057488137, "grad_norm": 0.453125, "learning_rate": 2.3155414018912142e-05, "loss": 2.1065, "step": 10543 }, { "epoch": 0.3401900744286777, "grad_norm": 0.423828125, "learning_rate": 2.3154093165315827e-05, "loss": 2.0758, "step": 10544 }, { "epoch": 0.34022233828247406, "grad_norm": 0.44140625, "learning_rate": 2.315277222196511e-05, "loss": 2.1762, "step": 10545 }, { "epoch": 0.3402546021362704, "grad_norm": 0.42578125, "learning_rate": 2.3151451188874516e-05, "loss": 2.1986, "step": 10546 }, { "epoch": 0.34028686599006674, "grad_norm": 0.48046875, "learning_rate": 2.3150130066058598e-05, "loss": 2.1822, "step": 10547 }, { "epoch": 0.3403191298438631, "grad_norm": 0.4296875, "learning_rate": 2.31488088535319e-05, "loss": 2.1413, "step": 10548 }, { "epoch": 0.34035139369765943, "grad_norm": 0.4375, "learning_rate": 2.3147487551308954e-05, "loss": 2.1654, "step": 10549 }, { "epoch": 0.34038365755145583, "grad_norm": 0.427734375, "learning_rate": 2.3146166159404312e-05, "loss": 2.1567, "step": 10550 }, { "epoch": 0.3404159214052522, "grad_norm": 0.44140625, "learning_rate": 2.314484467783252e-05, "loss": 2.2128, "step": 10551 }, { "epoch": 0.3404481852590485, "grad_norm": 0.412109375, "learning_rate": 2.314352310660812e-05, "loss": 2.1689, "step": 10552 }, { "epoch": 0.34048044911284486, "grad_norm": 0.4765625, "learning_rate": 2.3142201445745657e-05, "loss": 2.1704, "step": 10553 }, { "epoch": 0.3405127129666412, "grad_norm": 0.416015625, "learning_rate": 2.3140879695259686e-05, "loss": 2.1666, "step": 10554 }, { "epoch": 0.34054497682043755, "grad_norm": 0.4453125, "learning_rate": 2.3139557855164745e-05, "loss": 2.1577, "step": 10555 }, { "epoch": 0.3405772406742339, "grad_norm": 0.404296875, "learning_rate": 2.3138235925475395e-05, "loss": 2.1502, "step": 10556 }, { "epoch": 0.34060950452803024, "grad_norm": 0.4296875, "learning_rate": 2.3136913906206182e-05, "loss": 2.1494, "step": 10557 }, { "epoch": 0.3406417683818266, "grad_norm": 0.39453125, "learning_rate": 2.3135591797371658e-05, "loss": 2.0777, "step": 10558 }, { "epoch": 0.3406740322356229, "grad_norm": 0.39453125, "learning_rate": 2.3134269598986374e-05, "loss": 2.0703, "step": 10559 }, { "epoch": 0.34070629608941927, "grad_norm": 0.400390625, "learning_rate": 2.313294731106489e-05, "loss": 2.0975, "step": 10560 }, { "epoch": 0.3407385599432156, "grad_norm": 0.423828125, "learning_rate": 2.313162493362175e-05, "loss": 2.0922, "step": 10561 }, { "epoch": 0.34077082379701196, "grad_norm": 0.451171875, "learning_rate": 2.313030246667152e-05, "loss": 2.0687, "step": 10562 }, { "epoch": 0.3408030876508083, "grad_norm": 0.3984375, "learning_rate": 2.312897991022876e-05, "loss": 2.0833, "step": 10563 }, { "epoch": 0.34083535150460464, "grad_norm": 0.482421875, "learning_rate": 2.3127657264308006e-05, "loss": 2.0597, "step": 10564 }, { "epoch": 0.340867615358401, "grad_norm": 0.37109375, "learning_rate": 2.3126334528923844e-05, "loss": 2.0898, "step": 10565 }, { "epoch": 0.34089987921219733, "grad_norm": 0.4296875, "learning_rate": 2.3125011704090814e-05, "loss": 2.0912, "step": 10566 }, { "epoch": 0.3409321430659937, "grad_norm": 0.39453125, "learning_rate": 2.312368878982349e-05, "loss": 2.119, "step": 10567 }, { "epoch": 0.34096440691979, "grad_norm": 0.458984375, "learning_rate": 2.3122365786136427e-05, "loss": 2.0825, "step": 10568 }, { "epoch": 0.34099667077358636, "grad_norm": 0.46484375, "learning_rate": 2.3121042693044183e-05, "loss": 2.0898, "step": 10569 }, { "epoch": 0.34102893462738276, "grad_norm": 0.4375, "learning_rate": 2.311971951056133e-05, "loss": 2.0798, "step": 10570 }, { "epoch": 0.3410611984811791, "grad_norm": 0.427734375, "learning_rate": 2.3118396238702427e-05, "loss": 2.0614, "step": 10571 }, { "epoch": 0.34109346233497545, "grad_norm": 0.435546875, "learning_rate": 2.311707287748205e-05, "loss": 2.0511, "step": 10572 }, { "epoch": 0.3411257261887718, "grad_norm": 0.423828125, "learning_rate": 2.3115749426914753e-05, "loss": 2.0935, "step": 10573 }, { "epoch": 0.34115799004256814, "grad_norm": 0.484375, "learning_rate": 2.3114425887015107e-05, "loss": 2.0816, "step": 10574 }, { "epoch": 0.3411902538963645, "grad_norm": 0.44140625, "learning_rate": 2.3113102257797682e-05, "loss": 2.0954, "step": 10575 }, { "epoch": 0.3412225177501608, "grad_norm": 0.41015625, "learning_rate": 2.311177853927705e-05, "loss": 2.0927, "step": 10576 }, { "epoch": 0.34125478160395717, "grad_norm": 0.45703125, "learning_rate": 2.311045473146778e-05, "loss": 2.0866, "step": 10577 }, { "epoch": 0.3412870454577535, "grad_norm": 0.421875, "learning_rate": 2.310913083438444e-05, "loss": 2.0732, "step": 10578 }, { "epoch": 0.34131930931154986, "grad_norm": 0.41796875, "learning_rate": 2.3107806848041607e-05, "loss": 2.0854, "step": 10579 }, { "epoch": 0.3413515731653462, "grad_norm": 0.4453125, "learning_rate": 2.3106482772453855e-05, "loss": 2.034, "step": 10580 }, { "epoch": 0.34138383701914254, "grad_norm": 0.412109375, "learning_rate": 2.310515860763575e-05, "loss": 2.1093, "step": 10581 }, { "epoch": 0.3414161008729389, "grad_norm": 0.458984375, "learning_rate": 2.3103834353601876e-05, "loss": 2.0794, "step": 10582 }, { "epoch": 0.34144836472673523, "grad_norm": 0.41796875, "learning_rate": 2.3102510010366808e-05, "loss": 2.0976, "step": 10583 }, { "epoch": 0.3414806285805316, "grad_norm": 0.4140625, "learning_rate": 2.3101185577945122e-05, "loss": 2.0834, "step": 10584 }, { "epoch": 0.3415128924343279, "grad_norm": 0.419921875, "learning_rate": 2.30998610563514e-05, "loss": 2.0684, "step": 10585 }, { "epoch": 0.34154515628812426, "grad_norm": 0.408203125, "learning_rate": 2.3098536445600216e-05, "loss": 2.0705, "step": 10586 }, { "epoch": 0.3415774201419206, "grad_norm": 0.4375, "learning_rate": 2.309721174570615e-05, "loss": 2.0941, "step": 10587 }, { "epoch": 0.34160968399571695, "grad_norm": 0.3984375, "learning_rate": 2.309588695668379e-05, "loss": 2.1057, "step": 10588 }, { "epoch": 0.3416419478495133, "grad_norm": 0.427734375, "learning_rate": 2.3094562078547713e-05, "loss": 2.0879, "step": 10589 }, { "epoch": 0.3416742117033097, "grad_norm": 0.392578125, "learning_rate": 2.3093237111312505e-05, "loss": 2.1124, "step": 10590 }, { "epoch": 0.34170647555710604, "grad_norm": 0.392578125, "learning_rate": 2.3091912054992744e-05, "loss": 2.097, "step": 10591 }, { "epoch": 0.3417387394109024, "grad_norm": 0.396484375, "learning_rate": 2.3090586909603028e-05, "loss": 2.1185, "step": 10592 }, { "epoch": 0.3417710032646987, "grad_norm": 0.462890625, "learning_rate": 2.308926167515793e-05, "loss": 2.076, "step": 10593 }, { "epoch": 0.34180326711849507, "grad_norm": 0.453125, "learning_rate": 2.3087936351672045e-05, "loss": 2.0496, "step": 10594 }, { "epoch": 0.3418355309722914, "grad_norm": 0.423828125, "learning_rate": 2.3086610939159958e-05, "loss": 2.0582, "step": 10595 }, { "epoch": 0.34186779482608776, "grad_norm": 0.3828125, "learning_rate": 2.3085285437636255e-05, "loss": 2.029, "step": 10596 }, { "epoch": 0.3419000586798841, "grad_norm": 0.396484375, "learning_rate": 2.3083959847115535e-05, "loss": 2.0797, "step": 10597 }, { "epoch": 0.34193232253368044, "grad_norm": 0.37109375, "learning_rate": 2.3082634167612382e-05, "loss": 2.1089, "step": 10598 }, { "epoch": 0.3419645863874768, "grad_norm": 0.412109375, "learning_rate": 2.3081308399141394e-05, "loss": 2.0723, "step": 10599 }, { "epoch": 0.34199685024127313, "grad_norm": 0.40625, "learning_rate": 2.3079982541717157e-05, "loss": 2.0931, "step": 10600 }, { "epoch": 0.3420291140950695, "grad_norm": 0.384765625, "learning_rate": 2.307865659535427e-05, "loss": 2.0758, "step": 10601 }, { "epoch": 0.3420613779488658, "grad_norm": 0.4140625, "learning_rate": 2.3077330560067328e-05, "loss": 2.094, "step": 10602 }, { "epoch": 0.34209364180266216, "grad_norm": 0.412109375, "learning_rate": 2.307600443587093e-05, "loss": 2.0844, "step": 10603 }, { "epoch": 0.3421259056564585, "grad_norm": 0.375, "learning_rate": 2.307467822277966e-05, "loss": 2.1097, "step": 10604 }, { "epoch": 0.34215816951025485, "grad_norm": 0.4140625, "learning_rate": 2.3073351920808132e-05, "loss": 2.0954, "step": 10605 }, { "epoch": 0.3421904333640512, "grad_norm": 0.3984375, "learning_rate": 2.3072025529970934e-05, "loss": 2.0465, "step": 10606 }, { "epoch": 0.34222269721784754, "grad_norm": 0.39453125, "learning_rate": 2.307069905028267e-05, "loss": 2.0795, "step": 10607 }, { "epoch": 0.3422549610716439, "grad_norm": 0.40234375, "learning_rate": 2.306937248175794e-05, "loss": 2.0626, "step": 10608 }, { "epoch": 0.3422872249254402, "grad_norm": 0.3984375, "learning_rate": 2.306804582441135e-05, "loss": 2.0832, "step": 10609 }, { "epoch": 0.34231948877923657, "grad_norm": 0.380859375, "learning_rate": 2.3066719078257495e-05, "loss": 2.0921, "step": 10610 }, { "epoch": 0.34235175263303297, "grad_norm": 0.455078125, "learning_rate": 2.306539224331099e-05, "loss": 2.1119, "step": 10611 }, { "epoch": 0.3423840164868293, "grad_norm": 0.47265625, "learning_rate": 2.3064065319586425e-05, "loss": 2.1004, "step": 10612 }, { "epoch": 0.34241628034062566, "grad_norm": 0.4296875, "learning_rate": 2.3062738307098425e-05, "loss": 2.0739, "step": 10613 }, { "epoch": 0.342448544194422, "grad_norm": 0.462890625, "learning_rate": 2.306141120586158e-05, "loss": 2.0835, "step": 10614 }, { "epoch": 0.34248080804821834, "grad_norm": 0.4375, "learning_rate": 2.3060084015890497e-05, "loss": 2.0897, "step": 10615 }, { "epoch": 0.3425130719020147, "grad_norm": 0.44140625, "learning_rate": 2.3058756737199798e-05, "loss": 2.0952, "step": 10616 }, { "epoch": 0.34254533575581103, "grad_norm": 0.3984375, "learning_rate": 2.3057429369804085e-05, "loss": 2.084, "step": 10617 }, { "epoch": 0.3425775996096074, "grad_norm": 0.423828125, "learning_rate": 2.305610191371797e-05, "loss": 2.0833, "step": 10618 }, { "epoch": 0.3426098634634037, "grad_norm": 0.392578125, "learning_rate": 2.3054774368956063e-05, "loss": 2.0771, "step": 10619 }, { "epoch": 0.34264212731720006, "grad_norm": 0.427734375, "learning_rate": 2.3053446735532984e-05, "loss": 2.1, "step": 10620 }, { "epoch": 0.3426743911709964, "grad_norm": 0.423828125, "learning_rate": 2.305211901346334e-05, "loss": 2.0765, "step": 10621 }, { "epoch": 0.34270665502479275, "grad_norm": 0.45703125, "learning_rate": 2.3050791202761742e-05, "loss": 2.1132, "step": 10622 }, { "epoch": 0.3427389188785891, "grad_norm": 0.427734375, "learning_rate": 2.304946330344281e-05, "loss": 2.0554, "step": 10623 }, { "epoch": 0.34277118273238544, "grad_norm": 0.44921875, "learning_rate": 2.304813531552116e-05, "loss": 2.0964, "step": 10624 }, { "epoch": 0.3428034465861818, "grad_norm": 0.404296875, "learning_rate": 2.3046807239011417e-05, "loss": 2.0963, "step": 10625 }, { "epoch": 0.3428357104399781, "grad_norm": 0.40625, "learning_rate": 2.3045479073928183e-05, "loss": 2.0777, "step": 10626 }, { "epoch": 0.34286797429377447, "grad_norm": 0.462890625, "learning_rate": 2.3044150820286095e-05, "loss": 2.1075, "step": 10627 }, { "epoch": 0.3429002381475708, "grad_norm": 0.51171875, "learning_rate": 2.304282247809976e-05, "loss": 2.0922, "step": 10628 }, { "epoch": 0.34293250200136716, "grad_norm": 0.453125, "learning_rate": 2.304149404738381e-05, "loss": 2.088, "step": 10629 }, { "epoch": 0.3429647658551635, "grad_norm": 0.453125, "learning_rate": 2.304016552815286e-05, "loss": 2.1298, "step": 10630 }, { "epoch": 0.3429970297089599, "grad_norm": 0.46484375, "learning_rate": 2.303883692042154e-05, "loss": 2.0955, "step": 10631 }, { "epoch": 0.34302929356275624, "grad_norm": 0.4140625, "learning_rate": 2.303750822420446e-05, "loss": 2.0992, "step": 10632 }, { "epoch": 0.3430615574165526, "grad_norm": 0.419921875, "learning_rate": 2.3036179439516263e-05, "loss": 2.0214, "step": 10633 }, { "epoch": 0.34309382127034893, "grad_norm": 0.3828125, "learning_rate": 2.3034850566371567e-05, "loss": 2.013, "step": 10634 }, { "epoch": 0.3431260851241453, "grad_norm": 0.392578125, "learning_rate": 2.3033521604785004e-05, "loss": 1.9748, "step": 10635 }, { "epoch": 0.3431583489779416, "grad_norm": 0.40234375, "learning_rate": 2.303219255477119e-05, "loss": 1.9722, "step": 10636 }, { "epoch": 0.34319061283173796, "grad_norm": 0.396484375, "learning_rate": 2.303086341634477e-05, "loss": 1.982, "step": 10637 }, { "epoch": 0.3432228766855343, "grad_norm": 0.392578125, "learning_rate": 2.3029534189520363e-05, "loss": 1.9869, "step": 10638 }, { "epoch": 0.34325514053933065, "grad_norm": 0.431640625, "learning_rate": 2.3028204874312604e-05, "loss": 1.9493, "step": 10639 }, { "epoch": 0.343287404393127, "grad_norm": 0.408203125, "learning_rate": 2.3026875470736124e-05, "loss": 2.0129, "step": 10640 }, { "epoch": 0.34331966824692334, "grad_norm": 0.392578125, "learning_rate": 2.3025545978805563e-05, "loss": 2.0155, "step": 10641 }, { "epoch": 0.3433519321007197, "grad_norm": 0.3984375, "learning_rate": 2.3024216398535543e-05, "loss": 1.9771, "step": 10642 }, { "epoch": 0.343384195954516, "grad_norm": 0.37109375, "learning_rate": 2.3022886729940706e-05, "loss": 1.9786, "step": 10643 }, { "epoch": 0.34341645980831237, "grad_norm": 0.375, "learning_rate": 2.302155697303569e-05, "loss": 1.9903, "step": 10644 }, { "epoch": 0.3434487236621087, "grad_norm": 0.37109375, "learning_rate": 2.302022712783513e-05, "loss": 1.99, "step": 10645 }, { "epoch": 0.34348098751590506, "grad_norm": 0.357421875, "learning_rate": 2.301889719435366e-05, "loss": 2.0156, "step": 10646 }, { "epoch": 0.3435132513697014, "grad_norm": 0.388671875, "learning_rate": 2.3017567172605932e-05, "loss": 1.9903, "step": 10647 }, { "epoch": 0.34354551522349774, "grad_norm": 0.396484375, "learning_rate": 2.3016237062606566e-05, "loss": 1.9826, "step": 10648 }, { "epoch": 0.3435777790772941, "grad_norm": 0.3671875, "learning_rate": 2.3014906864370216e-05, "loss": 2.0064, "step": 10649 }, { "epoch": 0.34361004293109043, "grad_norm": 0.408203125, "learning_rate": 2.301357657791152e-05, "loss": 2.0146, "step": 10650 }, { "epoch": 0.34364230678488683, "grad_norm": 0.369140625, "learning_rate": 2.301224620324512e-05, "loss": 1.9771, "step": 10651 }, { "epoch": 0.3436745706386832, "grad_norm": 0.419921875, "learning_rate": 2.3010915740385673e-05, "loss": 2.0143, "step": 10652 }, { "epoch": 0.3437068344924795, "grad_norm": 0.404296875, "learning_rate": 2.3009585189347805e-05, "loss": 1.9912, "step": 10653 }, { "epoch": 0.34373909834627586, "grad_norm": 0.4296875, "learning_rate": 2.3008254550146167e-05, "loss": 1.9584, "step": 10654 }, { "epoch": 0.3437713622000722, "grad_norm": 0.390625, "learning_rate": 2.300692382279541e-05, "loss": 1.9993, "step": 10655 }, { "epoch": 0.34380362605386855, "grad_norm": 0.416015625, "learning_rate": 2.300559300731018e-05, "loss": 2.0131, "step": 10656 }, { "epoch": 0.3438358899076649, "grad_norm": 0.375, "learning_rate": 2.3004262103705126e-05, "loss": 1.9841, "step": 10657 }, { "epoch": 0.34386815376146124, "grad_norm": 0.396484375, "learning_rate": 2.3002931111994894e-05, "loss": 1.9843, "step": 10658 }, { "epoch": 0.3439004176152576, "grad_norm": 0.388671875, "learning_rate": 2.3001600032194144e-05, "loss": 1.9819, "step": 10659 }, { "epoch": 0.3439326814690539, "grad_norm": 0.400390625, "learning_rate": 2.300026886431751e-05, "loss": 1.9796, "step": 10660 }, { "epoch": 0.34396494532285027, "grad_norm": 0.35546875, "learning_rate": 2.2998937608379665e-05, "loss": 1.9914, "step": 10661 }, { "epoch": 0.3439972091766466, "grad_norm": 0.3828125, "learning_rate": 2.299760626439525e-05, "loss": 2.0085, "step": 10662 }, { "epoch": 0.34402947303044296, "grad_norm": 0.35546875, "learning_rate": 2.299627483237892e-05, "loss": 1.985, "step": 10663 }, { "epoch": 0.3440617368842393, "grad_norm": 0.37109375, "learning_rate": 2.299494331234534e-05, "loss": 1.962, "step": 10664 }, { "epoch": 0.34409400073803564, "grad_norm": 0.38671875, "learning_rate": 2.299361170430916e-05, "loss": 2.0083, "step": 10665 }, { "epoch": 0.344126264591832, "grad_norm": 0.36328125, "learning_rate": 2.299228000828503e-05, "loss": 1.9438, "step": 10666 }, { "epoch": 0.34415852844562833, "grad_norm": 0.3828125, "learning_rate": 2.2990948224287618e-05, "loss": 1.9776, "step": 10667 }, { "epoch": 0.3441907922994247, "grad_norm": 0.392578125, "learning_rate": 2.298961635233158e-05, "loss": 2.0043, "step": 10668 }, { "epoch": 0.344223056153221, "grad_norm": 0.373046875, "learning_rate": 2.2988284392431582e-05, "loss": 1.9674, "step": 10669 }, { "epoch": 0.34425532000701736, "grad_norm": 0.3984375, "learning_rate": 2.2986952344602274e-05, "loss": 1.9753, "step": 10670 }, { "epoch": 0.34428758386081376, "grad_norm": 0.39453125, "learning_rate": 2.2985620208858325e-05, "loss": 1.9997, "step": 10671 }, { "epoch": 0.3443198477146101, "grad_norm": 0.376953125, "learning_rate": 2.2984287985214403e-05, "loss": 1.999, "step": 10672 }, { "epoch": 0.34435211156840645, "grad_norm": 0.369140625, "learning_rate": 2.298295567368516e-05, "loss": 1.996, "step": 10673 }, { "epoch": 0.3443843754222028, "grad_norm": 0.380859375, "learning_rate": 2.298162327428527e-05, "loss": 1.9949, "step": 10674 }, { "epoch": 0.34441663927599914, "grad_norm": 0.3671875, "learning_rate": 2.29802907870294e-05, "loss": 1.9963, "step": 10675 }, { "epoch": 0.3444489031297955, "grad_norm": 0.36328125, "learning_rate": 2.2978958211932214e-05, "loss": 1.9692, "step": 10676 }, { "epoch": 0.3444811669835918, "grad_norm": 0.4140625, "learning_rate": 2.2977625549008376e-05, "loss": 1.9913, "step": 10677 }, { "epoch": 0.34451343083738817, "grad_norm": 0.380859375, "learning_rate": 2.297629279827256e-05, "loss": 1.9919, "step": 10678 }, { "epoch": 0.3445456946911845, "grad_norm": 0.345703125, "learning_rate": 2.297495995973944e-05, "loss": 1.9613, "step": 10679 }, { "epoch": 0.34457795854498086, "grad_norm": 0.3984375, "learning_rate": 2.297362703342368e-05, "loss": 1.9786, "step": 10680 }, { "epoch": 0.3446102223987772, "grad_norm": 0.392578125, "learning_rate": 2.2972294019339948e-05, "loss": 1.9797, "step": 10681 }, { "epoch": 0.34464248625257354, "grad_norm": 0.365234375, "learning_rate": 2.2970960917502922e-05, "loss": 1.9998, "step": 10682 }, { "epoch": 0.3446747501063699, "grad_norm": 0.384765625, "learning_rate": 2.2969627727927286e-05, "loss": 2.0125, "step": 10683 }, { "epoch": 0.34470701396016623, "grad_norm": 0.392578125, "learning_rate": 2.29682944506277e-05, "loss": 1.9917, "step": 10684 }, { "epoch": 0.3447392778139626, "grad_norm": 0.3671875, "learning_rate": 2.2966961085618845e-05, "loss": 2.0033, "step": 10685 }, { "epoch": 0.3447715416677589, "grad_norm": 0.380859375, "learning_rate": 2.2965627632915398e-05, "loss": 1.9811, "step": 10686 }, { "epoch": 0.34480380552155526, "grad_norm": 0.34765625, "learning_rate": 2.2964294092532037e-05, "loss": 1.9742, "step": 10687 }, { "epoch": 0.3448360693753516, "grad_norm": 0.3671875, "learning_rate": 2.2962960464483443e-05, "loss": 1.9878, "step": 10688 }, { "epoch": 0.34486833322914795, "grad_norm": 0.341796875, "learning_rate": 2.296162674878429e-05, "loss": 1.9905, "step": 10689 }, { "epoch": 0.3449005970829443, "grad_norm": 0.3671875, "learning_rate": 2.296029294544926e-05, "loss": 1.9684, "step": 10690 }, { "epoch": 0.3449328609367407, "grad_norm": 0.375, "learning_rate": 2.2958959054493037e-05, "loss": 1.9877, "step": 10691 }, { "epoch": 0.34496512479053704, "grad_norm": 0.349609375, "learning_rate": 2.2957625075930307e-05, "loss": 2.0071, "step": 10692 }, { "epoch": 0.3449973886443334, "grad_norm": 0.38671875, "learning_rate": 2.2956291009775744e-05, "loss": 1.9742, "step": 10693 }, { "epoch": 0.3450296524981297, "grad_norm": 0.357421875, "learning_rate": 2.2954956856044035e-05, "loss": 1.9728, "step": 10694 }, { "epoch": 0.34506191635192607, "grad_norm": 0.37890625, "learning_rate": 2.2953622614749872e-05, "loss": 1.9641, "step": 10695 }, { "epoch": 0.3450941802057224, "grad_norm": 0.361328125, "learning_rate": 2.2952288285907936e-05, "loss": 1.9447, "step": 10696 }, { "epoch": 0.34512644405951876, "grad_norm": 0.369140625, "learning_rate": 2.2950953869532916e-05, "loss": 1.9807, "step": 10697 }, { "epoch": 0.3451587079133151, "grad_norm": 0.373046875, "learning_rate": 2.2949619365639502e-05, "loss": 1.9854, "step": 10698 }, { "epoch": 0.34519097176711144, "grad_norm": 0.369140625, "learning_rate": 2.2948284774242376e-05, "loss": 1.9926, "step": 10699 }, { "epoch": 0.3452232356209078, "grad_norm": 0.353515625, "learning_rate": 2.2946950095356237e-05, "loss": 1.9788, "step": 10700 }, { "epoch": 0.34525549947470413, "grad_norm": 0.365234375, "learning_rate": 2.294561532899577e-05, "loss": 1.995, "step": 10701 }, { "epoch": 0.3452877633285005, "grad_norm": 0.373046875, "learning_rate": 2.294428047517567e-05, "loss": 1.9918, "step": 10702 }, { "epoch": 0.3453200271822968, "grad_norm": 0.3671875, "learning_rate": 2.2942945533910633e-05, "loss": 1.9717, "step": 10703 }, { "epoch": 0.34535229103609316, "grad_norm": 0.376953125, "learning_rate": 2.2941610505215347e-05, "loss": 1.9725, "step": 10704 }, { "epoch": 0.3453845548898895, "grad_norm": 0.376953125, "learning_rate": 2.2940275389104506e-05, "loss": 1.9487, "step": 10705 }, { "epoch": 0.34541681874368585, "grad_norm": 0.373046875, "learning_rate": 2.2938940185592817e-05, "loss": 1.9822, "step": 10706 }, { "epoch": 0.3454490825974822, "grad_norm": 0.37109375, "learning_rate": 2.293760489469497e-05, "loss": 1.9928, "step": 10707 }, { "epoch": 0.34548134645127854, "grad_norm": 0.361328125, "learning_rate": 2.2936269516425655e-05, "loss": 1.9815, "step": 10708 }, { "epoch": 0.3455136103050749, "grad_norm": 0.37109375, "learning_rate": 2.2934934050799584e-05, "loss": 1.9889, "step": 10709 }, { "epoch": 0.3455458741588712, "grad_norm": 0.373046875, "learning_rate": 2.2933598497831453e-05, "loss": 1.9694, "step": 10710 }, { "epoch": 0.34557813801266757, "grad_norm": 0.3828125, "learning_rate": 2.2932262857535956e-05, "loss": 1.97, "step": 10711 }, { "epoch": 0.34561040186646397, "grad_norm": 0.482421875, "learning_rate": 2.2930927129927805e-05, "loss": 2.0944, "step": 10712 }, { "epoch": 0.3456426657202603, "grad_norm": 0.4453125, "learning_rate": 2.2929591315021697e-05, "loss": 2.1048, "step": 10713 }, { "epoch": 0.34567492957405666, "grad_norm": 0.41015625, "learning_rate": 2.2928255412832338e-05, "loss": 2.078, "step": 10714 }, { "epoch": 0.345707193427853, "grad_norm": 0.46875, "learning_rate": 2.2926919423374424e-05, "loss": 2.0848, "step": 10715 }, { "epoch": 0.34573945728164934, "grad_norm": 0.515625, "learning_rate": 2.292558334666267e-05, "loss": 2.0592, "step": 10716 }, { "epoch": 0.3457717211354457, "grad_norm": 0.546875, "learning_rate": 2.2924247182711785e-05, "loss": 2.089, "step": 10717 }, { "epoch": 0.34580398498924203, "grad_norm": 0.49609375, "learning_rate": 2.292291093153647e-05, "loss": 2.0877, "step": 10718 }, { "epoch": 0.3458362488430384, "grad_norm": 0.44921875, "learning_rate": 2.2921574593151433e-05, "loss": 2.0851, "step": 10719 }, { "epoch": 0.3458685126968347, "grad_norm": 0.50390625, "learning_rate": 2.292023816757139e-05, "loss": 2.0748, "step": 10720 }, { "epoch": 0.34590077655063106, "grad_norm": 0.4765625, "learning_rate": 2.291890165481105e-05, "loss": 2.0559, "step": 10721 }, { "epoch": 0.3459330404044274, "grad_norm": 0.453125, "learning_rate": 2.291756505488511e-05, "loss": 2.0569, "step": 10722 }, { "epoch": 0.34596530425822375, "grad_norm": 0.5, "learning_rate": 2.2916228367808298e-05, "loss": 2.0982, "step": 10723 }, { "epoch": 0.3459975681120201, "grad_norm": 0.42578125, "learning_rate": 2.2914891593595328e-05, "loss": 2.0833, "step": 10724 }, { "epoch": 0.34602983196581644, "grad_norm": 0.486328125, "learning_rate": 2.291355473226091e-05, "loss": 2.0702, "step": 10725 }, { "epoch": 0.3460620958196128, "grad_norm": 0.462890625, "learning_rate": 2.2912217783819753e-05, "loss": 2.0958, "step": 10726 }, { "epoch": 0.3460943596734091, "grad_norm": 0.423828125, "learning_rate": 2.2910880748286585e-05, "loss": 2.0624, "step": 10727 }, { "epoch": 0.34612662352720547, "grad_norm": 0.46484375, "learning_rate": 2.2909543625676112e-05, "loss": 2.0898, "step": 10728 }, { "epoch": 0.3461588873810018, "grad_norm": 0.4375, "learning_rate": 2.2908206416003053e-05, "loss": 2.0476, "step": 10729 }, { "epoch": 0.34619115123479816, "grad_norm": 0.435546875, "learning_rate": 2.290686911928214e-05, "loss": 2.0836, "step": 10730 }, { "epoch": 0.3462234150885945, "grad_norm": 0.46484375, "learning_rate": 2.2905531735528078e-05, "loss": 2.0561, "step": 10731 }, { "epoch": 0.3462556789423909, "grad_norm": 0.458984375, "learning_rate": 2.2904194264755598e-05, "loss": 2.0817, "step": 10732 }, { "epoch": 0.34628794279618724, "grad_norm": 0.4375, "learning_rate": 2.2902856706979414e-05, "loss": 2.0658, "step": 10733 }, { "epoch": 0.3463202066499836, "grad_norm": 0.431640625, "learning_rate": 2.290151906221426e-05, "loss": 2.0789, "step": 10734 }, { "epoch": 0.34635247050377993, "grad_norm": 0.416015625, "learning_rate": 2.2900181330474847e-05, "loss": 2.0656, "step": 10735 }, { "epoch": 0.3463847343575763, "grad_norm": 0.439453125, "learning_rate": 2.2898843511775903e-05, "loss": 2.0882, "step": 10736 }, { "epoch": 0.3464169982113726, "grad_norm": 0.427734375, "learning_rate": 2.289750560613216e-05, "loss": 2.1141, "step": 10737 }, { "epoch": 0.34644926206516896, "grad_norm": 0.404296875, "learning_rate": 2.2896167613558343e-05, "loss": 2.0927, "step": 10738 }, { "epoch": 0.3464815259189653, "grad_norm": 0.41796875, "learning_rate": 2.2894829534069173e-05, "loss": 2.0968, "step": 10739 }, { "epoch": 0.34651378977276165, "grad_norm": 0.392578125, "learning_rate": 2.289349136767939e-05, "loss": 2.0899, "step": 10740 }, { "epoch": 0.346546053626558, "grad_norm": 0.388671875, "learning_rate": 2.2892153114403714e-05, "loss": 2.0794, "step": 10741 }, { "epoch": 0.34657831748035434, "grad_norm": 0.392578125, "learning_rate": 2.2890814774256875e-05, "loss": 2.0724, "step": 10742 }, { "epoch": 0.3466105813341507, "grad_norm": 0.400390625, "learning_rate": 2.2889476347253607e-05, "loss": 2.0842, "step": 10743 }, { "epoch": 0.346642845187947, "grad_norm": 0.390625, "learning_rate": 2.288813783340865e-05, "loss": 2.0617, "step": 10744 }, { "epoch": 0.34667510904174337, "grad_norm": 0.40234375, "learning_rate": 2.2886799232736725e-05, "loss": 2.0605, "step": 10745 }, { "epoch": 0.3467073728955397, "grad_norm": 0.36328125, "learning_rate": 2.288546054525257e-05, "loss": 2.0798, "step": 10746 }, { "epoch": 0.34673963674933606, "grad_norm": 0.3828125, "learning_rate": 2.2884121770970928e-05, "loss": 2.066, "step": 10747 }, { "epoch": 0.3467719006031324, "grad_norm": 0.37890625, "learning_rate": 2.2882782909906527e-05, "loss": 2.0685, "step": 10748 }, { "epoch": 0.34680416445692874, "grad_norm": 0.37890625, "learning_rate": 2.2881443962074104e-05, "loss": 2.0885, "step": 10749 }, { "epoch": 0.3468364283107251, "grad_norm": 0.396484375, "learning_rate": 2.2880104927488405e-05, "loss": 2.073, "step": 10750 }, { "epoch": 0.34686869216452143, "grad_norm": 0.3828125, "learning_rate": 2.2878765806164163e-05, "loss": 2.049, "step": 10751 }, { "epoch": 0.34690095601831783, "grad_norm": 0.48046875, "learning_rate": 2.287742659811612e-05, "loss": 2.0943, "step": 10752 }, { "epoch": 0.3469332198721142, "grad_norm": 0.4140625, "learning_rate": 2.2876087303359013e-05, "loss": 2.1028, "step": 10753 }, { "epoch": 0.3469654837259105, "grad_norm": 0.48046875, "learning_rate": 2.287474792190759e-05, "loss": 2.0642, "step": 10754 }, { "epoch": 0.34699774757970686, "grad_norm": 0.546875, "learning_rate": 2.2873408453776592e-05, "loss": 2.0611, "step": 10755 }, { "epoch": 0.3470300114335032, "grad_norm": 0.6484375, "learning_rate": 2.287206889898076e-05, "loss": 2.0972, "step": 10756 }, { "epoch": 0.34706227528729955, "grad_norm": 0.60546875, "learning_rate": 2.287072925753484e-05, "loss": 2.0664, "step": 10757 }, { "epoch": 0.3470945391410959, "grad_norm": 0.61328125, "learning_rate": 2.2869389529453578e-05, "loss": 2.0927, "step": 10758 }, { "epoch": 0.34712680299489224, "grad_norm": 0.58203125, "learning_rate": 2.2868049714751726e-05, "loss": 2.083, "step": 10759 }, { "epoch": 0.3471590668486886, "grad_norm": 0.546875, "learning_rate": 2.2866709813444022e-05, "loss": 2.1115, "step": 10760 }, { "epoch": 0.3471913307024849, "grad_norm": 0.59765625, "learning_rate": 2.2865369825545224e-05, "loss": 2.033, "step": 10761 }, { "epoch": 0.34722359455628127, "grad_norm": 0.578125, "learning_rate": 2.2864029751070073e-05, "loss": 2.0925, "step": 10762 }, { "epoch": 0.3472558584100776, "grad_norm": 0.49609375, "learning_rate": 2.286268959003333e-05, "loss": 2.096, "step": 10763 }, { "epoch": 0.34728812226387396, "grad_norm": 0.515625, "learning_rate": 2.2861349342449737e-05, "loss": 2.1063, "step": 10764 }, { "epoch": 0.3473203861176703, "grad_norm": 0.498046875, "learning_rate": 2.286000900833405e-05, "loss": 2.1075, "step": 10765 }, { "epoch": 0.34735264997146664, "grad_norm": 0.4453125, "learning_rate": 2.285866858770103e-05, "loss": 2.0996, "step": 10766 }, { "epoch": 0.347384913825263, "grad_norm": 0.50390625, "learning_rate": 2.2857328080565412e-05, "loss": 2.0973, "step": 10767 }, { "epoch": 0.34741717767905933, "grad_norm": 0.4453125, "learning_rate": 2.2855987486941973e-05, "loss": 2.1051, "step": 10768 }, { "epoch": 0.3474494415328557, "grad_norm": 0.435546875, "learning_rate": 2.2854646806845458e-05, "loss": 2.069, "step": 10769 }, { "epoch": 0.347481705386652, "grad_norm": 0.447265625, "learning_rate": 2.285330604029062e-05, "loss": 2.0681, "step": 10770 }, { "epoch": 0.34751396924044836, "grad_norm": 0.4296875, "learning_rate": 2.2851965187292224e-05, "loss": 2.1152, "step": 10771 }, { "epoch": 0.34754623309424476, "grad_norm": 0.4296875, "learning_rate": 2.2850624247865036e-05, "loss": 2.0829, "step": 10772 }, { "epoch": 0.3475784969480411, "grad_norm": 0.42578125, "learning_rate": 2.2849283222023804e-05, "loss": 2.0744, "step": 10773 }, { "epoch": 0.34761076080183745, "grad_norm": 0.4375, "learning_rate": 2.284794210978329e-05, "loss": 2.0819, "step": 10774 }, { "epoch": 0.3476430246556338, "grad_norm": 0.421875, "learning_rate": 2.2846600911158263e-05, "loss": 2.096, "step": 10775 }, { "epoch": 0.34767528850943014, "grad_norm": 0.4140625, "learning_rate": 2.2845259626163476e-05, "loss": 2.0897, "step": 10776 }, { "epoch": 0.3477075523632265, "grad_norm": 0.3984375, "learning_rate": 2.2843918254813707e-05, "loss": 2.1002, "step": 10777 }, { "epoch": 0.3477398162170228, "grad_norm": 0.40234375, "learning_rate": 2.2842576797123714e-05, "loss": 2.0709, "step": 10778 }, { "epoch": 0.34777208007081917, "grad_norm": 0.423828125, "learning_rate": 2.2841235253108257e-05, "loss": 2.1014, "step": 10779 }, { "epoch": 0.3478043439246155, "grad_norm": 0.4375, "learning_rate": 2.2839893622782105e-05, "loss": 2.09, "step": 10780 }, { "epoch": 0.34783660777841185, "grad_norm": 0.5859375, "learning_rate": 2.283855190616003e-05, "loss": 2.0448, "step": 10781 }, { "epoch": 0.3478688716322082, "grad_norm": 0.73046875, "learning_rate": 2.2837210103256802e-05, "loss": 2.1031, "step": 10782 }, { "epoch": 0.34790113548600454, "grad_norm": 0.54296875, "learning_rate": 2.283586821408719e-05, "loss": 2.1346, "step": 10783 }, { "epoch": 0.3479333993398009, "grad_norm": 0.515625, "learning_rate": 2.2834526238665954e-05, "loss": 2.1019, "step": 10784 }, { "epoch": 0.34796566319359723, "grad_norm": 0.5234375, "learning_rate": 2.2833184177007883e-05, "loss": 2.1186, "step": 10785 }, { "epoch": 0.3479979270473936, "grad_norm": 0.490234375, "learning_rate": 2.2831842029127734e-05, "loss": 2.114, "step": 10786 }, { "epoch": 0.3480301909011899, "grad_norm": 0.462890625, "learning_rate": 2.283049979504029e-05, "loss": 2.104, "step": 10787 }, { "epoch": 0.34806245475498626, "grad_norm": 0.458984375, "learning_rate": 2.2829157474760318e-05, "loss": 2.0872, "step": 10788 }, { "epoch": 0.3480947186087826, "grad_norm": 0.4453125, "learning_rate": 2.2827815068302602e-05, "loss": 2.0899, "step": 10789 }, { "epoch": 0.34812698246257895, "grad_norm": 0.435546875, "learning_rate": 2.2826472575681906e-05, "loss": 2.1073, "step": 10790 }, { "epoch": 0.3481592463163753, "grad_norm": 0.4296875, "learning_rate": 2.282512999691302e-05, "loss": 2.1186, "step": 10791 }, { "epoch": 0.34819151017017164, "grad_norm": 0.4453125, "learning_rate": 2.282378733201072e-05, "loss": 2.1039, "step": 10792 }, { "epoch": 0.34822377402396804, "grad_norm": 0.41015625, "learning_rate": 2.2822444580989776e-05, "loss": 2.09, "step": 10793 }, { "epoch": 0.3482560378777644, "grad_norm": 0.421875, "learning_rate": 2.282110174386497e-05, "loss": 2.1028, "step": 10794 }, { "epoch": 0.3482883017315607, "grad_norm": 0.431640625, "learning_rate": 2.2819758820651096e-05, "loss": 2.0783, "step": 10795 }, { "epoch": 0.34832056558535707, "grad_norm": 0.39453125, "learning_rate": 2.2818415811362922e-05, "loss": 2.0713, "step": 10796 }, { "epoch": 0.3483528294391534, "grad_norm": 0.421875, "learning_rate": 2.2817072716015238e-05, "loss": 2.0561, "step": 10797 }, { "epoch": 0.34838509329294975, "grad_norm": 0.423828125, "learning_rate": 2.281572953462282e-05, "loss": 2.0635, "step": 10798 }, { "epoch": 0.3484173571467461, "grad_norm": 0.388671875, "learning_rate": 2.2814386267200465e-05, "loss": 2.0468, "step": 10799 }, { "epoch": 0.34844962100054244, "grad_norm": 0.443359375, "learning_rate": 2.281304291376295e-05, "loss": 2.0152, "step": 10800 }, { "epoch": 0.3484818848543388, "grad_norm": 0.3671875, "learning_rate": 2.281169947432506e-05, "loss": 1.9668, "step": 10801 }, { "epoch": 0.34851414870813513, "grad_norm": 0.419921875, "learning_rate": 2.281035594890159e-05, "loss": 1.9718, "step": 10802 }, { "epoch": 0.3485464125619315, "grad_norm": 0.396484375, "learning_rate": 2.280901233750733e-05, "loss": 1.9986, "step": 10803 }, { "epoch": 0.3485786764157278, "grad_norm": 0.4296875, "learning_rate": 2.2807668640157057e-05, "loss": 2.0104, "step": 10804 }, { "epoch": 0.34861094026952416, "grad_norm": 0.3671875, "learning_rate": 2.2806324856865568e-05, "loss": 1.9849, "step": 10805 }, { "epoch": 0.3486432041233205, "grad_norm": 0.390625, "learning_rate": 2.2804980987647658e-05, "loss": 2.0168, "step": 10806 }, { "epoch": 0.34867546797711685, "grad_norm": 0.373046875, "learning_rate": 2.2803637032518118e-05, "loss": 1.9862, "step": 10807 }, { "epoch": 0.3487077318309132, "grad_norm": 0.380859375, "learning_rate": 2.2802292991491735e-05, "loss": 1.9943, "step": 10808 }, { "epoch": 0.34873999568470954, "grad_norm": 0.37109375, "learning_rate": 2.2800948864583313e-05, "loss": 1.9941, "step": 10809 }, { "epoch": 0.3487722595385059, "grad_norm": 0.36328125, "learning_rate": 2.2799604651807637e-05, "loss": 1.9558, "step": 10810 }, { "epoch": 0.3488045233923022, "grad_norm": 0.373046875, "learning_rate": 2.2798260353179514e-05, "loss": 1.9695, "step": 10811 }, { "epoch": 0.34883678724609857, "grad_norm": 0.38671875, "learning_rate": 2.2796915968713733e-05, "loss": 1.991, "step": 10812 }, { "epoch": 0.34886905109989497, "grad_norm": 0.384765625, "learning_rate": 2.2795571498425092e-05, "loss": 2.0201, "step": 10813 }, { "epoch": 0.3489013149536913, "grad_norm": 0.408203125, "learning_rate": 2.2794226942328396e-05, "loss": 2.0883, "step": 10814 }, { "epoch": 0.34893357880748765, "grad_norm": 0.37109375, "learning_rate": 2.2792882300438437e-05, "loss": 2.0813, "step": 10815 }, { "epoch": 0.348965842661284, "grad_norm": 0.376953125, "learning_rate": 2.2791537572770026e-05, "loss": 2.0535, "step": 10816 }, { "epoch": 0.34899810651508034, "grad_norm": 0.38671875, "learning_rate": 2.279019275933796e-05, "loss": 2.0611, "step": 10817 }, { "epoch": 0.3490303703688767, "grad_norm": 0.373046875, "learning_rate": 2.2788847860157035e-05, "loss": 2.0964, "step": 10818 }, { "epoch": 0.34906263422267303, "grad_norm": 0.408203125, "learning_rate": 2.2787502875242062e-05, "loss": 2.1043, "step": 10819 }, { "epoch": 0.3490948980764694, "grad_norm": 0.37890625, "learning_rate": 2.2786157804607846e-05, "loss": 2.092, "step": 10820 }, { "epoch": 0.3491271619302657, "grad_norm": 0.390625, "learning_rate": 2.278481264826919e-05, "loss": 2.0908, "step": 10821 }, { "epoch": 0.34915942578406206, "grad_norm": 0.41796875, "learning_rate": 2.27834674062409e-05, "loss": 2.0751, "step": 10822 }, { "epoch": 0.3491916896378584, "grad_norm": 0.423828125, "learning_rate": 2.278212207853779e-05, "loss": 2.1096, "step": 10823 }, { "epoch": 0.34922395349165475, "grad_norm": 0.412109375, "learning_rate": 2.2780776665174653e-05, "loss": 2.1096, "step": 10824 }, { "epoch": 0.3492562173454511, "grad_norm": 0.482421875, "learning_rate": 2.2779431166166315e-05, "loss": 2.0989, "step": 10825 }, { "epoch": 0.34928848119924744, "grad_norm": 0.498046875, "learning_rate": 2.277808558152758e-05, "loss": 2.0842, "step": 10826 }, { "epoch": 0.3493207450530438, "grad_norm": 0.48828125, "learning_rate": 2.2776739911273256e-05, "loss": 2.0582, "step": 10827 }, { "epoch": 0.3493530089068401, "grad_norm": 0.4921875, "learning_rate": 2.277539415541816e-05, "loss": 2.0589, "step": 10828 }, { "epoch": 0.34938527276063647, "grad_norm": 0.46875, "learning_rate": 2.2774048313977103e-05, "loss": 2.0814, "step": 10829 }, { "epoch": 0.3494175366144328, "grad_norm": 0.4296875, "learning_rate": 2.2772702386964896e-05, "loss": 2.0853, "step": 10830 }, { "epoch": 0.34944980046822915, "grad_norm": 0.43359375, "learning_rate": 2.2771356374396365e-05, "loss": 2.0576, "step": 10831 }, { "epoch": 0.3494820643220255, "grad_norm": 0.44921875, "learning_rate": 2.277001027628631e-05, "loss": 2.0941, "step": 10832 }, { "epoch": 0.3495143281758219, "grad_norm": 0.392578125, "learning_rate": 2.2768664092649557e-05, "loss": 2.102, "step": 10833 }, { "epoch": 0.34954659202961824, "grad_norm": 0.431640625, "learning_rate": 2.276731782350093e-05, "loss": 2.0782, "step": 10834 }, { "epoch": 0.3495788558834146, "grad_norm": 0.4140625, "learning_rate": 2.2765971468855233e-05, "loss": 2.0806, "step": 10835 }, { "epoch": 0.34961111973721093, "grad_norm": 0.380859375, "learning_rate": 2.2764625028727296e-05, "loss": 2.0687, "step": 10836 }, { "epoch": 0.3496433835910073, "grad_norm": 0.443359375, "learning_rate": 2.276327850313194e-05, "loss": 2.056, "step": 10837 }, { "epoch": 0.3496756474448036, "grad_norm": 0.41015625, "learning_rate": 2.2761931892083978e-05, "loss": 2.0931, "step": 10838 }, { "epoch": 0.34970791129859996, "grad_norm": 0.416015625, "learning_rate": 2.2760585195598237e-05, "loss": 2.0684, "step": 10839 }, { "epoch": 0.3497401751523963, "grad_norm": 0.4375, "learning_rate": 2.275923841368955e-05, "loss": 2.0904, "step": 10840 }, { "epoch": 0.34977243900619265, "grad_norm": 0.43359375, "learning_rate": 2.2757891546372727e-05, "loss": 2.0949, "step": 10841 }, { "epoch": 0.349804702859989, "grad_norm": 0.427734375, "learning_rate": 2.2756544593662606e-05, "loss": 2.0758, "step": 10842 }, { "epoch": 0.34983696671378534, "grad_norm": 0.4453125, "learning_rate": 2.2755197555574e-05, "loss": 2.0773, "step": 10843 }, { "epoch": 0.3498692305675817, "grad_norm": 0.466796875, "learning_rate": 2.2753850432121745e-05, "loss": 2.1106, "step": 10844 }, { "epoch": 0.349901494421378, "grad_norm": 0.42578125, "learning_rate": 2.2752503223320668e-05, "loss": 2.0936, "step": 10845 }, { "epoch": 0.34993375827517437, "grad_norm": 0.39453125, "learning_rate": 2.275115592918559e-05, "loss": 2.0662, "step": 10846 }, { "epoch": 0.3499660221289707, "grad_norm": 0.447265625, "learning_rate": 2.2749808549731354e-05, "loss": 2.0823, "step": 10847 }, { "epoch": 0.34999828598276705, "grad_norm": 0.388671875, "learning_rate": 2.2748461084972788e-05, "loss": 2.094, "step": 10848 }, { "epoch": 0.3500305498365634, "grad_norm": 0.431640625, "learning_rate": 2.2747113534924715e-05, "loss": 2.1269, "step": 10849 }, { "epoch": 0.35006281369035974, "grad_norm": 0.4140625, "learning_rate": 2.2745765899601976e-05, "loss": 2.0959, "step": 10850 }, { "epoch": 0.3500950775441561, "grad_norm": 0.39453125, "learning_rate": 2.2744418179019405e-05, "loss": 2.0469, "step": 10851 }, { "epoch": 0.35012734139795243, "grad_norm": 0.4140625, "learning_rate": 2.2743070373191834e-05, "loss": 2.0775, "step": 10852 }, { "epoch": 0.35015960525174883, "grad_norm": 0.392578125, "learning_rate": 2.2741722482134098e-05, "loss": 2.1165, "step": 10853 }, { "epoch": 0.3501918691055452, "grad_norm": 0.404296875, "learning_rate": 2.2740374505861038e-05, "loss": 2.0804, "step": 10854 }, { "epoch": 0.3502241329593415, "grad_norm": 0.40234375, "learning_rate": 2.2739026444387482e-05, "loss": 2.1245, "step": 10855 }, { "epoch": 0.35025639681313786, "grad_norm": 0.400390625, "learning_rate": 2.273767829772828e-05, "loss": 2.1215, "step": 10856 }, { "epoch": 0.3502886606669342, "grad_norm": 0.47265625, "learning_rate": 2.2736330065898265e-05, "loss": 2.1115, "step": 10857 }, { "epoch": 0.35032092452073055, "grad_norm": 0.42578125, "learning_rate": 2.2734981748912278e-05, "loss": 2.0692, "step": 10858 }, { "epoch": 0.3503531883745269, "grad_norm": 0.494140625, "learning_rate": 2.273363334678516e-05, "loss": 2.0872, "step": 10859 }, { "epoch": 0.35038545222832324, "grad_norm": 0.419921875, "learning_rate": 2.2732284859531757e-05, "loss": 2.0914, "step": 10860 }, { "epoch": 0.3504177160821196, "grad_norm": 0.421875, "learning_rate": 2.2730936287166905e-05, "loss": 2.0911, "step": 10861 }, { "epoch": 0.3504499799359159, "grad_norm": 0.392578125, "learning_rate": 2.2729587629705456e-05, "loss": 2.1066, "step": 10862 }, { "epoch": 0.35048224378971227, "grad_norm": 0.408203125, "learning_rate": 2.2728238887162252e-05, "loss": 2.0875, "step": 10863 }, { "epoch": 0.3505145076435086, "grad_norm": 0.408203125, "learning_rate": 2.2726890059552136e-05, "loss": 2.1211, "step": 10864 }, { "epoch": 0.35054677149730495, "grad_norm": 0.38671875, "learning_rate": 2.272554114688996e-05, "loss": 2.0398, "step": 10865 }, { "epoch": 0.3505790353511013, "grad_norm": 0.365234375, "learning_rate": 2.272419214919057e-05, "loss": 2.0171, "step": 10866 }, { "epoch": 0.35061129920489764, "grad_norm": 0.404296875, "learning_rate": 2.2722843066468813e-05, "loss": 2.0777, "step": 10867 }, { "epoch": 0.350643563058694, "grad_norm": 0.365234375, "learning_rate": 2.2721493898739538e-05, "loss": 2.0689, "step": 10868 }, { "epoch": 0.35067582691249033, "grad_norm": 0.400390625, "learning_rate": 2.27201446460176e-05, "loss": 2.0664, "step": 10869 }, { "epoch": 0.3507080907662867, "grad_norm": 0.3671875, "learning_rate": 2.2718795308317844e-05, "loss": 2.0895, "step": 10870 }, { "epoch": 0.350740354620083, "grad_norm": 0.40234375, "learning_rate": 2.2717445885655132e-05, "loss": 2.0958, "step": 10871 }, { "epoch": 0.35077261847387936, "grad_norm": 0.3671875, "learning_rate": 2.271609637804431e-05, "loss": 2.0839, "step": 10872 }, { "epoch": 0.3508048823276757, "grad_norm": 0.380859375, "learning_rate": 2.2714746785500236e-05, "loss": 2.0804, "step": 10873 }, { "epoch": 0.3508371461814721, "grad_norm": 0.380859375, "learning_rate": 2.271339710803776e-05, "loss": 2.0866, "step": 10874 }, { "epoch": 0.35086941003526845, "grad_norm": 0.408203125, "learning_rate": 2.2712047345671744e-05, "loss": 2.0781, "step": 10875 }, { "epoch": 0.3509016738890648, "grad_norm": 0.392578125, "learning_rate": 2.2710697498417047e-05, "loss": 2.0461, "step": 10876 }, { "epoch": 0.35093393774286114, "grad_norm": 0.41796875, "learning_rate": 2.270934756628852e-05, "loss": 2.0799, "step": 10877 }, { "epoch": 0.3509662015966575, "grad_norm": 0.390625, "learning_rate": 2.2707997549301024e-05, "loss": 2.0638, "step": 10878 }, { "epoch": 0.3509984654504538, "grad_norm": 0.3984375, "learning_rate": 2.2706647447469423e-05, "loss": 2.0688, "step": 10879 }, { "epoch": 0.35103072930425017, "grad_norm": 0.400390625, "learning_rate": 2.2705297260808576e-05, "loss": 2.0961, "step": 10880 }, { "epoch": 0.3510629931580465, "grad_norm": 0.4375, "learning_rate": 2.270394698933334e-05, "loss": 2.0921, "step": 10881 }, { "epoch": 0.35109525701184285, "grad_norm": 0.421875, "learning_rate": 2.270259663305859e-05, "loss": 2.0775, "step": 10882 }, { "epoch": 0.3511275208656392, "grad_norm": 0.41015625, "learning_rate": 2.270124619199918e-05, "loss": 2.0853, "step": 10883 }, { "epoch": 0.35115978471943554, "grad_norm": 0.4375, "learning_rate": 2.2699895666169972e-05, "loss": 2.0789, "step": 10884 }, { "epoch": 0.3511920485732319, "grad_norm": 0.40234375, "learning_rate": 2.2698545055585845e-05, "loss": 2.0736, "step": 10885 }, { "epoch": 0.35122431242702823, "grad_norm": 0.431640625, "learning_rate": 2.2697194360261646e-05, "loss": 2.0757, "step": 10886 }, { "epoch": 0.3512565762808246, "grad_norm": 0.478515625, "learning_rate": 2.2695843580212264e-05, "loss": 2.1026, "step": 10887 }, { "epoch": 0.3512888401346209, "grad_norm": 0.423828125, "learning_rate": 2.269449271545255e-05, "loss": 2.0775, "step": 10888 }, { "epoch": 0.35132110398841726, "grad_norm": 0.3984375, "learning_rate": 2.269314176599738e-05, "loss": 2.0714, "step": 10889 }, { "epoch": 0.3513533678422136, "grad_norm": 0.4375, "learning_rate": 2.2691790731861628e-05, "loss": 2.1052, "step": 10890 }, { "epoch": 0.35138563169600995, "grad_norm": 0.412109375, "learning_rate": 2.2690439613060162e-05, "loss": 2.104, "step": 10891 }, { "epoch": 0.3514178955498063, "grad_norm": 0.42578125, "learning_rate": 2.2689088409607847e-05, "loss": 2.1024, "step": 10892 }, { "epoch": 0.35145015940360264, "grad_norm": 0.423828125, "learning_rate": 2.268773712151957e-05, "loss": 2.0955, "step": 10893 }, { "epoch": 0.35148242325739903, "grad_norm": 0.423828125, "learning_rate": 2.2686385748810196e-05, "loss": 2.0915, "step": 10894 }, { "epoch": 0.3515146871111954, "grad_norm": 0.39453125, "learning_rate": 2.26850342914946e-05, "loss": 2.0669, "step": 10895 }, { "epoch": 0.3515469509649917, "grad_norm": 0.40234375, "learning_rate": 2.2683682749587663e-05, "loss": 2.0405, "step": 10896 }, { "epoch": 0.35157921481878807, "grad_norm": 0.380859375, "learning_rate": 2.2682331123104258e-05, "loss": 2.0518, "step": 10897 }, { "epoch": 0.3516114786725844, "grad_norm": 0.3828125, "learning_rate": 2.2680979412059263e-05, "loss": 2.0571, "step": 10898 }, { "epoch": 0.35164374252638075, "grad_norm": 0.400390625, "learning_rate": 2.2679627616467553e-05, "loss": 2.0298, "step": 10899 }, { "epoch": 0.3516760063801771, "grad_norm": 0.3828125, "learning_rate": 2.2678275736344014e-05, "loss": 2.0545, "step": 10900 }, { "epoch": 0.35170827023397344, "grad_norm": 0.40625, "learning_rate": 2.2676923771703525e-05, "loss": 2.0872, "step": 10901 }, { "epoch": 0.3517405340877698, "grad_norm": 0.41015625, "learning_rate": 2.267557172256097e-05, "loss": 1.9975, "step": 10902 }, { "epoch": 0.35177279794156613, "grad_norm": 0.43359375, "learning_rate": 2.267421958893122e-05, "loss": 1.9679, "step": 10903 }, { "epoch": 0.3518050617953625, "grad_norm": 0.38671875, "learning_rate": 2.2672867370829175e-05, "loss": 1.9774, "step": 10904 }, { "epoch": 0.3518373256491588, "grad_norm": 0.40234375, "learning_rate": 2.2671515068269703e-05, "loss": 1.9896, "step": 10905 }, { "epoch": 0.35186958950295516, "grad_norm": 0.396484375, "learning_rate": 2.26701626812677e-05, "loss": 1.9862, "step": 10906 }, { "epoch": 0.3519018533567515, "grad_norm": 0.3671875, "learning_rate": 2.266881020983805e-05, "loss": 1.9374, "step": 10907 }, { "epoch": 0.35193411721054785, "grad_norm": 0.38671875, "learning_rate": 2.2667457653995635e-05, "loss": 1.9795, "step": 10908 }, { "epoch": 0.3519663810643442, "grad_norm": 0.369140625, "learning_rate": 2.2666105013755348e-05, "loss": 1.9648, "step": 10909 }, { "epoch": 0.35199864491814054, "grad_norm": 0.376953125, "learning_rate": 2.2664752289132076e-05, "loss": 1.9805, "step": 10910 }, { "epoch": 0.3520309087719369, "grad_norm": 0.3671875, "learning_rate": 2.2663399480140715e-05, "loss": 1.9924, "step": 10911 }, { "epoch": 0.3520631726257332, "grad_norm": 0.384765625, "learning_rate": 2.2662046586796145e-05, "loss": 1.99, "step": 10912 }, { "epoch": 0.35209543647952957, "grad_norm": 0.34765625, "learning_rate": 2.2660693609113267e-05, "loss": 1.9891, "step": 10913 }, { "epoch": 0.35212770033332597, "grad_norm": 0.388671875, "learning_rate": 2.2659340547106964e-05, "loss": 1.9523, "step": 10914 }, { "epoch": 0.3521599641871223, "grad_norm": 0.365234375, "learning_rate": 2.2657987400792135e-05, "loss": 2.0137, "step": 10915 }, { "epoch": 0.35219222804091865, "grad_norm": 0.373046875, "learning_rate": 2.265663417018368e-05, "loss": 1.9811, "step": 10916 }, { "epoch": 0.352224491894715, "grad_norm": 0.37109375, "learning_rate": 2.2655280855296487e-05, "loss": 1.9534, "step": 10917 }, { "epoch": 0.35225675574851134, "grad_norm": 0.388671875, "learning_rate": 2.2653927456145454e-05, "loss": 1.9808, "step": 10918 }, { "epoch": 0.3522890196023077, "grad_norm": 0.357421875, "learning_rate": 2.2652573972745478e-05, "loss": 1.9729, "step": 10919 }, { "epoch": 0.35232128345610403, "grad_norm": 0.39453125, "learning_rate": 2.2651220405111456e-05, "loss": 1.993, "step": 10920 }, { "epoch": 0.3523535473099004, "grad_norm": 0.35546875, "learning_rate": 2.264986675325829e-05, "loss": 1.9855, "step": 10921 }, { "epoch": 0.3523858111636967, "grad_norm": 0.40234375, "learning_rate": 2.2648513017200878e-05, "loss": 2.0185, "step": 10922 }, { "epoch": 0.35241807501749306, "grad_norm": 0.365234375, "learning_rate": 2.2647159196954124e-05, "loss": 1.9983, "step": 10923 }, { "epoch": 0.3524503388712894, "grad_norm": 0.359375, "learning_rate": 2.2645805292532928e-05, "loss": 1.9385, "step": 10924 }, { "epoch": 0.35248260272508575, "grad_norm": 0.376953125, "learning_rate": 2.264445130395219e-05, "loss": 1.9867, "step": 10925 }, { "epoch": 0.3525148665788821, "grad_norm": 0.365234375, "learning_rate": 2.2643097231226813e-05, "loss": 1.9753, "step": 10926 }, { "epoch": 0.35254713043267843, "grad_norm": 0.34765625, "learning_rate": 2.2641743074371713e-05, "loss": 1.9954, "step": 10927 }, { "epoch": 0.3525793942864748, "grad_norm": 0.359375, "learning_rate": 2.2640388833401783e-05, "loss": 1.9957, "step": 10928 }, { "epoch": 0.3526116581402711, "grad_norm": 0.353515625, "learning_rate": 2.263903450833193e-05, "loss": 1.9809, "step": 10929 }, { "epoch": 0.35264392199406747, "grad_norm": 0.3671875, "learning_rate": 2.2637680099177072e-05, "loss": 2.0114, "step": 10930 }, { "epoch": 0.3526761858478638, "grad_norm": 0.373046875, "learning_rate": 2.2636325605952107e-05, "loss": 1.967, "step": 10931 }, { "epoch": 0.35270844970166015, "grad_norm": 0.388671875, "learning_rate": 2.263497102867195e-05, "loss": 1.9703, "step": 10932 }, { "epoch": 0.3527407135554565, "grad_norm": 0.35546875, "learning_rate": 2.2633616367351508e-05, "loss": 1.9702, "step": 10933 }, { "epoch": 0.3527729774092529, "grad_norm": 0.3828125, "learning_rate": 2.2632261622005692e-05, "loss": 1.9898, "step": 10934 }, { "epoch": 0.35280524126304924, "grad_norm": 0.408203125, "learning_rate": 2.263090679264942e-05, "loss": 1.9649, "step": 10935 }, { "epoch": 0.3528375051168456, "grad_norm": 0.369140625, "learning_rate": 2.2629551879297595e-05, "loss": 1.9865, "step": 10936 }, { "epoch": 0.35286976897064193, "grad_norm": 0.37109375, "learning_rate": 2.262819688196514e-05, "loss": 1.9706, "step": 10937 }, { "epoch": 0.35290203282443827, "grad_norm": 0.390625, "learning_rate": 2.2626841800666967e-05, "loss": 1.9861, "step": 10938 }, { "epoch": 0.3529342966782346, "grad_norm": 0.359375, "learning_rate": 2.2625486635417985e-05, "loss": 2.0033, "step": 10939 }, { "epoch": 0.35296656053203096, "grad_norm": 0.37109375, "learning_rate": 2.262413138623312e-05, "loss": 1.9705, "step": 10940 }, { "epoch": 0.3529988243858273, "grad_norm": 0.373046875, "learning_rate": 2.2622776053127286e-05, "loss": 1.9907, "step": 10941 }, { "epoch": 0.35303108823962365, "grad_norm": 0.36328125, "learning_rate": 2.2621420636115406e-05, "loss": 1.9927, "step": 10942 }, { "epoch": 0.35306335209342, "grad_norm": 0.369140625, "learning_rate": 2.2620065135212393e-05, "loss": 2.0012, "step": 10943 }, { "epoch": 0.35309561594721633, "grad_norm": 0.41015625, "learning_rate": 2.2618709550433165e-05, "loss": 2.0864, "step": 10944 }, { "epoch": 0.3531278798010127, "grad_norm": 0.412109375, "learning_rate": 2.2617353881792655e-05, "loss": 2.1215, "step": 10945 }, { "epoch": 0.353160143654809, "grad_norm": 0.376953125, "learning_rate": 2.261599812930577e-05, "loss": 2.1046, "step": 10946 }, { "epoch": 0.35319240750860537, "grad_norm": 0.408203125, "learning_rate": 2.2614642292987446e-05, "loss": 2.0284, "step": 10947 }, { "epoch": 0.3532246713624017, "grad_norm": 0.41015625, "learning_rate": 2.26132863728526e-05, "loss": 2.0572, "step": 10948 }, { "epoch": 0.35325693521619805, "grad_norm": 0.453125, "learning_rate": 2.2611930368916166e-05, "loss": 2.0668, "step": 10949 }, { "epoch": 0.3532891990699944, "grad_norm": 0.5, "learning_rate": 2.2610574281193057e-05, "loss": 2.0829, "step": 10950 }, { "epoch": 0.35332146292379074, "grad_norm": 0.58984375, "learning_rate": 2.2609218109698207e-05, "loss": 2.0851, "step": 10951 }, { "epoch": 0.3533537267775871, "grad_norm": 0.578125, "learning_rate": 2.2607861854446546e-05, "loss": 2.0924, "step": 10952 }, { "epoch": 0.35338599063138343, "grad_norm": 0.490234375, "learning_rate": 2.2606505515452998e-05, "loss": 2.1171, "step": 10953 }, { "epoch": 0.35341825448517983, "grad_norm": 0.5234375, "learning_rate": 2.2605149092732488e-05, "loss": 2.0954, "step": 10954 }, { "epoch": 0.35345051833897617, "grad_norm": 0.455078125, "learning_rate": 2.260379258629996e-05, "loss": 2.0439, "step": 10955 }, { "epoch": 0.3534827821927725, "grad_norm": 0.47265625, "learning_rate": 2.2602435996170338e-05, "loss": 2.0467, "step": 10956 }, { "epoch": 0.35351504604656886, "grad_norm": 0.41015625, "learning_rate": 2.260107932235855e-05, "loss": 2.0714, "step": 10957 }, { "epoch": 0.3535473099003652, "grad_norm": 0.44921875, "learning_rate": 2.259972256487954e-05, "loss": 2.0875, "step": 10958 }, { "epoch": 0.35357957375416155, "grad_norm": 0.396484375, "learning_rate": 2.2598365723748235e-05, "loss": 2.0541, "step": 10959 }, { "epoch": 0.3536118376079579, "grad_norm": 0.443359375, "learning_rate": 2.259700879897956e-05, "loss": 2.0674, "step": 10960 }, { "epoch": 0.35364410146175423, "grad_norm": 0.462890625, "learning_rate": 2.2595651790588478e-05, "loss": 2.0833, "step": 10961 }, { "epoch": 0.3536763653155506, "grad_norm": 0.43359375, "learning_rate": 2.25942946985899e-05, "loss": 2.0721, "step": 10962 }, { "epoch": 0.3537086291693469, "grad_norm": 0.451171875, "learning_rate": 2.259293752299878e-05, "loss": 2.0776, "step": 10963 }, { "epoch": 0.35374089302314327, "grad_norm": 0.416015625, "learning_rate": 2.2591580263830046e-05, "loss": 2.1083, "step": 10964 }, { "epoch": 0.3537731568769396, "grad_norm": 0.50390625, "learning_rate": 2.2590222921098646e-05, "loss": 2.1204, "step": 10965 }, { "epoch": 0.35380542073073595, "grad_norm": 0.41796875, "learning_rate": 2.2588865494819518e-05, "loss": 2.1069, "step": 10966 }, { "epoch": 0.3538376845845323, "grad_norm": 0.458984375, "learning_rate": 2.2587507985007597e-05, "loss": 2.102, "step": 10967 }, { "epoch": 0.35386994843832864, "grad_norm": 0.412109375, "learning_rate": 2.2586150391677837e-05, "loss": 2.0951, "step": 10968 }, { "epoch": 0.353902212292125, "grad_norm": 0.46875, "learning_rate": 2.2584792714845174e-05, "loss": 2.0711, "step": 10969 }, { "epoch": 0.35393447614592133, "grad_norm": 0.400390625, "learning_rate": 2.2583434954524557e-05, "loss": 2.0652, "step": 10970 }, { "epoch": 0.35396673999971767, "grad_norm": 0.447265625, "learning_rate": 2.258207711073092e-05, "loss": 2.0833, "step": 10971 }, { "epoch": 0.353999003853514, "grad_norm": 0.42578125, "learning_rate": 2.258071918347923e-05, "loss": 2.0781, "step": 10972 }, { "epoch": 0.35403126770731036, "grad_norm": 0.400390625, "learning_rate": 2.257936117278441e-05, "loss": 2.052, "step": 10973 }, { "epoch": 0.3540635315611067, "grad_norm": 0.41796875, "learning_rate": 2.2578003078661423e-05, "loss": 2.048, "step": 10974 }, { "epoch": 0.3540957954149031, "grad_norm": 0.404296875, "learning_rate": 2.2576644901125217e-05, "loss": 2.081, "step": 10975 }, { "epoch": 0.35412805926869945, "grad_norm": 0.384765625, "learning_rate": 2.2575286640190743e-05, "loss": 2.0702, "step": 10976 }, { "epoch": 0.3541603231224958, "grad_norm": 0.384765625, "learning_rate": 2.2573928295872936e-05, "loss": 2.0924, "step": 10977 }, { "epoch": 0.35419258697629213, "grad_norm": 0.39453125, "learning_rate": 2.257256986818677e-05, "loss": 2.0753, "step": 10978 }, { "epoch": 0.3542248508300885, "grad_norm": 0.400390625, "learning_rate": 2.2571211357147185e-05, "loss": 2.0747, "step": 10979 }, { "epoch": 0.3542571146838848, "grad_norm": 0.37890625, "learning_rate": 2.2569852762769138e-05, "loss": 2.0634, "step": 10980 }, { "epoch": 0.35428937853768117, "grad_norm": 0.412109375, "learning_rate": 2.256849408506758e-05, "loss": 2.0864, "step": 10981 }, { "epoch": 0.3543216423914775, "grad_norm": 0.435546875, "learning_rate": 2.2567135324057467e-05, "loss": 2.1179, "step": 10982 }, { "epoch": 0.35435390624527385, "grad_norm": 0.400390625, "learning_rate": 2.256577647975376e-05, "loss": 2.0806, "step": 10983 }, { "epoch": 0.3543861700990702, "grad_norm": 0.4140625, "learning_rate": 2.2564417552171414e-05, "loss": 2.0585, "step": 10984 }, { "epoch": 0.35441843395286654, "grad_norm": 0.404296875, "learning_rate": 2.256305854132538e-05, "loss": 2.0871, "step": 10985 }, { "epoch": 0.3544506978066629, "grad_norm": 0.416015625, "learning_rate": 2.2561699447230633e-05, "loss": 2.079, "step": 10986 }, { "epoch": 0.35448296166045923, "grad_norm": 0.39453125, "learning_rate": 2.256034026990212e-05, "loss": 2.0851, "step": 10987 }, { "epoch": 0.35451522551425557, "grad_norm": 0.412109375, "learning_rate": 2.25589810093548e-05, "loss": 2.0753, "step": 10988 }, { "epoch": 0.3545474893680519, "grad_norm": 0.40625, "learning_rate": 2.2557621665603646e-05, "loss": 2.0745, "step": 10989 }, { "epoch": 0.35457975322184826, "grad_norm": 0.408203125, "learning_rate": 2.255626223866361e-05, "loss": 2.1071, "step": 10990 }, { "epoch": 0.3546120170756446, "grad_norm": 0.439453125, "learning_rate": 2.255490272854966e-05, "loss": 2.1015, "step": 10991 }, { "epoch": 0.35464428092944095, "grad_norm": 0.41796875, "learning_rate": 2.255354313527677e-05, "loss": 2.0904, "step": 10992 }, { "epoch": 0.3546765447832373, "grad_norm": 0.4453125, "learning_rate": 2.2552183458859886e-05, "loss": 2.0836, "step": 10993 }, { "epoch": 0.35470880863703363, "grad_norm": 0.419921875, "learning_rate": 2.255082369931399e-05, "loss": 2.0687, "step": 10994 }, { "epoch": 0.35474107249083003, "grad_norm": 0.435546875, "learning_rate": 2.2549463856654036e-05, "loss": 2.0662, "step": 10995 }, { "epoch": 0.3547733363446264, "grad_norm": 0.421875, "learning_rate": 2.2548103930895006e-05, "loss": 2.1096, "step": 10996 }, { "epoch": 0.3548056001984227, "grad_norm": 0.3828125, "learning_rate": 2.2546743922051863e-05, "loss": 2.1104, "step": 10997 }, { "epoch": 0.35483786405221907, "grad_norm": 0.41015625, "learning_rate": 2.2545383830139576e-05, "loss": 2.0844, "step": 10998 }, { "epoch": 0.3548701279060154, "grad_norm": 0.435546875, "learning_rate": 2.2544023655173113e-05, "loss": 2.0924, "step": 10999 }, { "epoch": 0.35490239175981175, "grad_norm": 0.43359375, "learning_rate": 2.2542663397167456e-05, "loss": 2.1165, "step": 11000 }, { "epoch": 0.3549346556136081, "grad_norm": 0.4140625, "learning_rate": 2.2541303056137568e-05, "loss": 2.0839, "step": 11001 }, { "epoch": 0.35496691946740444, "grad_norm": 0.4140625, "learning_rate": 2.2539942632098424e-05, "loss": 2.0715, "step": 11002 }, { "epoch": 0.3549991833212008, "grad_norm": 0.431640625, "learning_rate": 2.2538582125065006e-05, "loss": 2.0862, "step": 11003 }, { "epoch": 0.35503144717499713, "grad_norm": 0.43359375, "learning_rate": 2.2537221535052284e-05, "loss": 2.0583, "step": 11004 }, { "epoch": 0.35506371102879347, "grad_norm": 0.48828125, "learning_rate": 2.253586086207523e-05, "loss": 2.083, "step": 11005 }, { "epoch": 0.3550959748825898, "grad_norm": 0.421875, "learning_rate": 2.2534500106148826e-05, "loss": 2.0934, "step": 11006 }, { "epoch": 0.35512823873638616, "grad_norm": 0.439453125, "learning_rate": 2.2533139267288053e-05, "loss": 2.0764, "step": 11007 }, { "epoch": 0.3551605025901825, "grad_norm": 0.4140625, "learning_rate": 2.2531778345507883e-05, "loss": 2.074, "step": 11008 }, { "epoch": 0.35519276644397885, "grad_norm": 0.439453125, "learning_rate": 2.2530417340823303e-05, "loss": 2.107, "step": 11009 }, { "epoch": 0.3552250302977752, "grad_norm": 0.40625, "learning_rate": 2.2529056253249296e-05, "loss": 2.1168, "step": 11010 }, { "epoch": 0.35525729415157153, "grad_norm": 0.470703125, "learning_rate": 2.2527695082800836e-05, "loss": 2.0804, "step": 11011 }, { "epoch": 0.3552895580053679, "grad_norm": 0.453125, "learning_rate": 2.25263338294929e-05, "loss": 2.0992, "step": 11012 }, { "epoch": 0.3553218218591642, "grad_norm": 0.462890625, "learning_rate": 2.2524972493340488e-05, "loss": 2.1182, "step": 11013 }, { "epoch": 0.35535408571296057, "grad_norm": 0.48828125, "learning_rate": 2.252361107435858e-05, "loss": 2.0946, "step": 11014 }, { "epoch": 0.35538634956675696, "grad_norm": 0.4375, "learning_rate": 2.2522249572562156e-05, "loss": 2.0796, "step": 11015 }, { "epoch": 0.3554186134205533, "grad_norm": 0.484375, "learning_rate": 2.2520887987966203e-05, "loss": 2.1003, "step": 11016 }, { "epoch": 0.35545087727434965, "grad_norm": 0.4296875, "learning_rate": 2.2519526320585712e-05, "loss": 2.1022, "step": 11017 }, { "epoch": 0.355483141128146, "grad_norm": 0.41796875, "learning_rate": 2.2518164570435672e-05, "loss": 2.0914, "step": 11018 }, { "epoch": 0.35551540498194234, "grad_norm": 0.43359375, "learning_rate": 2.2516802737531065e-05, "loss": 2.0841, "step": 11019 }, { "epoch": 0.3555476688357387, "grad_norm": 0.404296875, "learning_rate": 2.251544082188689e-05, "loss": 2.0555, "step": 11020 }, { "epoch": 0.355579932689535, "grad_norm": 0.43359375, "learning_rate": 2.251407882351814e-05, "loss": 2.1109, "step": 11021 }, { "epoch": 0.35561219654333137, "grad_norm": 0.466796875, "learning_rate": 2.2512716742439787e-05, "loss": 2.1198, "step": 11022 }, { "epoch": 0.3556444603971277, "grad_norm": 0.416015625, "learning_rate": 2.2511354578666848e-05, "loss": 2.092, "step": 11023 }, { "epoch": 0.35567672425092406, "grad_norm": 0.462890625, "learning_rate": 2.2509992332214302e-05, "loss": 2.0822, "step": 11024 }, { "epoch": 0.3557089881047204, "grad_norm": 0.4140625, "learning_rate": 2.250863000309715e-05, "loss": 2.1227, "step": 11025 }, { "epoch": 0.35574125195851675, "grad_norm": 0.40625, "learning_rate": 2.250726759133038e-05, "loss": 2.0984, "step": 11026 }, { "epoch": 0.3557735158123131, "grad_norm": 0.439453125, "learning_rate": 2.2505905096929004e-05, "loss": 2.086, "step": 11027 }, { "epoch": 0.35580577966610943, "grad_norm": 0.400390625, "learning_rate": 2.2504542519908006e-05, "loss": 2.0996, "step": 11028 }, { "epoch": 0.3558380435199058, "grad_norm": 0.400390625, "learning_rate": 2.250317986028238e-05, "loss": 2.1302, "step": 11029 }, { "epoch": 0.3558703073737021, "grad_norm": 0.404296875, "learning_rate": 2.2501817118067138e-05, "loss": 2.09, "step": 11030 }, { "epoch": 0.35590257122749847, "grad_norm": 0.400390625, "learning_rate": 2.2500454293277277e-05, "loss": 1.9827, "step": 11031 }, { "epoch": 0.3559348350812948, "grad_norm": 0.423828125, "learning_rate": 2.2499091385927796e-05, "loss": 1.9647, "step": 11032 }, { "epoch": 0.35596709893509115, "grad_norm": 0.42578125, "learning_rate": 2.2497728396033693e-05, "loss": 1.9993, "step": 11033 }, { "epoch": 0.3559993627888875, "grad_norm": 0.40234375, "learning_rate": 2.249636532360998e-05, "loss": 1.9815, "step": 11034 }, { "epoch": 0.3560316266426839, "grad_norm": 0.466796875, "learning_rate": 2.2495002168671652e-05, "loss": 2.0272, "step": 11035 }, { "epoch": 0.35606389049648024, "grad_norm": 0.380859375, "learning_rate": 2.2493638931233716e-05, "loss": 1.9659, "step": 11036 }, { "epoch": 0.3560961543502766, "grad_norm": 0.3828125, "learning_rate": 2.2492275611311182e-05, "loss": 1.9872, "step": 11037 }, { "epoch": 0.3561284182040729, "grad_norm": 0.404296875, "learning_rate": 2.2490912208919055e-05, "loss": 2.0067, "step": 11038 }, { "epoch": 0.35616068205786927, "grad_norm": 0.408203125, "learning_rate": 2.2489548724072332e-05, "loss": 1.9884, "step": 11039 }, { "epoch": 0.3561929459116656, "grad_norm": 0.435546875, "learning_rate": 2.248818515678603e-05, "loss": 1.9975, "step": 11040 }, { "epoch": 0.35622520976546196, "grad_norm": 0.41796875, "learning_rate": 2.248682150707517e-05, "loss": 2.0086, "step": 11041 }, { "epoch": 0.3562574736192583, "grad_norm": 0.3671875, "learning_rate": 2.248545777495474e-05, "loss": 2.0175, "step": 11042 }, { "epoch": 0.35628973747305465, "grad_norm": 0.41796875, "learning_rate": 2.2484093960439757e-05, "loss": 1.9979, "step": 11043 }, { "epoch": 0.356322001326851, "grad_norm": 0.361328125, "learning_rate": 2.2482730063545248e-05, "loss": 1.9992, "step": 11044 }, { "epoch": 0.35635426518064733, "grad_norm": 0.388671875, "learning_rate": 2.2481366084286207e-05, "loss": 1.9992, "step": 11045 }, { "epoch": 0.3563865290344437, "grad_norm": 0.36328125, "learning_rate": 2.2480002022677654e-05, "loss": 2.0028, "step": 11046 }, { "epoch": 0.35641879288824, "grad_norm": 0.388671875, "learning_rate": 2.247863787873461e-05, "loss": 2.0181, "step": 11047 }, { "epoch": 0.35645105674203637, "grad_norm": 0.48828125, "learning_rate": 2.2477273652472087e-05, "loss": 2.1195, "step": 11048 }, { "epoch": 0.3564833205958327, "grad_norm": 0.46484375, "learning_rate": 2.247590934390509e-05, "loss": 2.1101, "step": 11049 }, { "epoch": 0.35651558444962905, "grad_norm": 0.46484375, "learning_rate": 2.2474544953048655e-05, "loss": 2.085, "step": 11050 }, { "epoch": 0.3565478483034254, "grad_norm": 0.453125, "learning_rate": 2.2473180479917788e-05, "loss": 2.0723, "step": 11051 }, { "epoch": 0.35658011215722174, "grad_norm": 0.416015625, "learning_rate": 2.2471815924527515e-05, "loss": 2.098, "step": 11052 }, { "epoch": 0.3566123760110181, "grad_norm": 0.412109375, "learning_rate": 2.2470451286892845e-05, "loss": 2.0992, "step": 11053 }, { "epoch": 0.35664463986481443, "grad_norm": 0.455078125, "learning_rate": 2.2469086567028815e-05, "loss": 2.0942, "step": 11054 }, { "epoch": 0.35667690371861077, "grad_norm": 0.47265625, "learning_rate": 2.2467721764950436e-05, "loss": 2.0868, "step": 11055 }, { "epoch": 0.35670916757240717, "grad_norm": 0.384765625, "learning_rate": 2.246635688067273e-05, "loss": 2.0701, "step": 11056 }, { "epoch": 0.3567414314262035, "grad_norm": 0.41015625, "learning_rate": 2.2464991914210725e-05, "loss": 2.0659, "step": 11057 }, { "epoch": 0.35677369527999986, "grad_norm": 0.431640625, "learning_rate": 2.2463626865579446e-05, "loss": 2.1135, "step": 11058 }, { "epoch": 0.3568059591337962, "grad_norm": 0.40625, "learning_rate": 2.2462261734793916e-05, "loss": 2.084, "step": 11059 }, { "epoch": 0.35683822298759255, "grad_norm": 0.44921875, "learning_rate": 2.246089652186916e-05, "loss": 2.0351, "step": 11060 }, { "epoch": 0.3568704868413889, "grad_norm": 0.388671875, "learning_rate": 2.2459531226820213e-05, "loss": 2.129, "step": 11061 }, { "epoch": 0.35690275069518523, "grad_norm": 0.41015625, "learning_rate": 2.2458165849662092e-05, "loss": 2.0635, "step": 11062 }, { "epoch": 0.3569350145489816, "grad_norm": 0.4140625, "learning_rate": 2.2456800390409833e-05, "loss": 2.0451, "step": 11063 }, { "epoch": 0.3569672784027779, "grad_norm": 0.390625, "learning_rate": 2.2455434849078466e-05, "loss": 2.0782, "step": 11064 }, { "epoch": 0.35699954225657426, "grad_norm": 0.421875, "learning_rate": 2.2454069225683022e-05, "loss": 2.0865, "step": 11065 }, { "epoch": 0.3570318061103706, "grad_norm": 0.388671875, "learning_rate": 2.245270352023853e-05, "loss": 2.0668, "step": 11066 }, { "epoch": 0.35706406996416695, "grad_norm": 0.423828125, "learning_rate": 2.245133773276002e-05, "loss": 2.0818, "step": 11067 }, { "epoch": 0.3570963338179633, "grad_norm": 0.421875, "learning_rate": 2.2449971863262536e-05, "loss": 2.0729, "step": 11068 }, { "epoch": 0.35712859767175964, "grad_norm": 0.396484375, "learning_rate": 2.244860591176111e-05, "loss": 2.0547, "step": 11069 }, { "epoch": 0.357160861525556, "grad_norm": 0.43359375, "learning_rate": 2.2447239878270764e-05, "loss": 2.0943, "step": 11070 }, { "epoch": 0.3571931253793523, "grad_norm": 0.416015625, "learning_rate": 2.2445873762806546e-05, "loss": 2.0524, "step": 11071 }, { "epoch": 0.35722538923314867, "grad_norm": 0.388671875, "learning_rate": 2.2444507565383495e-05, "loss": 2.0699, "step": 11072 }, { "epoch": 0.357257653086945, "grad_norm": 0.392578125, "learning_rate": 2.2443141286016647e-05, "loss": 2.062, "step": 11073 }, { "epoch": 0.35728991694074136, "grad_norm": 0.400390625, "learning_rate": 2.2441774924721032e-05, "loss": 2.0566, "step": 11074 }, { "epoch": 0.3573221807945377, "grad_norm": 0.404296875, "learning_rate": 2.2440408481511703e-05, "loss": 2.067, "step": 11075 }, { "epoch": 0.3573544446483341, "grad_norm": 0.42578125, "learning_rate": 2.2439041956403693e-05, "loss": 2.0761, "step": 11076 }, { "epoch": 0.35738670850213045, "grad_norm": 0.392578125, "learning_rate": 2.2437675349412047e-05, "loss": 2.0801, "step": 11077 }, { "epoch": 0.3574189723559268, "grad_norm": 0.421875, "learning_rate": 2.2436308660551806e-05, "loss": 2.0764, "step": 11078 }, { "epoch": 0.35745123620972313, "grad_norm": 0.40234375, "learning_rate": 2.243494188983801e-05, "loss": 2.087, "step": 11079 }, { "epoch": 0.3574835000635195, "grad_norm": 0.396484375, "learning_rate": 2.2433575037285716e-05, "loss": 2.0833, "step": 11080 }, { "epoch": 0.3575157639173158, "grad_norm": 0.37109375, "learning_rate": 2.2432208102909953e-05, "loss": 2.084, "step": 11081 }, { "epoch": 0.35754802777111216, "grad_norm": 0.37890625, "learning_rate": 2.2430841086725783e-05, "loss": 2.0742, "step": 11082 }, { "epoch": 0.3575802916249085, "grad_norm": 0.388671875, "learning_rate": 2.242947398874824e-05, "loss": 2.0694, "step": 11083 }, { "epoch": 0.35761255547870485, "grad_norm": 0.380859375, "learning_rate": 2.242810680899237e-05, "loss": 2.0953, "step": 11084 }, { "epoch": 0.3576448193325012, "grad_norm": 0.373046875, "learning_rate": 2.2426739547473244e-05, "loss": 2.0734, "step": 11085 }, { "epoch": 0.35767708318629754, "grad_norm": 0.376953125, "learning_rate": 2.2425372204205892e-05, "loss": 2.0764, "step": 11086 }, { "epoch": 0.3577093470400939, "grad_norm": 0.384765625, "learning_rate": 2.2424004779205366e-05, "loss": 2.1068, "step": 11087 }, { "epoch": 0.3577416108938902, "grad_norm": 0.3828125, "learning_rate": 2.2422637272486724e-05, "loss": 2.0764, "step": 11088 }, { "epoch": 0.35777387474768657, "grad_norm": 0.39453125, "learning_rate": 2.2421269684065015e-05, "loss": 2.1358, "step": 11089 }, { "epoch": 0.3578061386014829, "grad_norm": 0.37890625, "learning_rate": 2.2419902013955292e-05, "loss": 2.1096, "step": 11090 }, { "epoch": 0.35783840245527926, "grad_norm": 0.439453125, "learning_rate": 2.2418534262172613e-05, "loss": 2.1041, "step": 11091 }, { "epoch": 0.3578706663090756, "grad_norm": 0.43359375, "learning_rate": 2.2417166428732028e-05, "loss": 2.0985, "step": 11092 }, { "epoch": 0.35790293016287195, "grad_norm": 0.392578125, "learning_rate": 2.24157985136486e-05, "loss": 2.0561, "step": 11093 }, { "epoch": 0.3579351940166683, "grad_norm": 0.458984375, "learning_rate": 2.2414430516937376e-05, "loss": 2.047, "step": 11094 }, { "epoch": 0.35796745787046463, "grad_norm": 0.478515625, "learning_rate": 2.2413062438613423e-05, "loss": 2.0873, "step": 11095 }, { "epoch": 0.35799972172426103, "grad_norm": 0.45703125, "learning_rate": 2.24116942786918e-05, "loss": 2.0964, "step": 11096 }, { "epoch": 0.3580319855780574, "grad_norm": 0.42578125, "learning_rate": 2.241032603718756e-05, "loss": 2.0553, "step": 11097 }, { "epoch": 0.3580642494318537, "grad_norm": 0.48046875, "learning_rate": 2.2408957714115765e-05, "loss": 2.1063, "step": 11098 }, { "epoch": 0.35809651328565006, "grad_norm": 0.474609375, "learning_rate": 2.2407589309491485e-05, "loss": 2.0818, "step": 11099 }, { "epoch": 0.3581287771394464, "grad_norm": 0.46484375, "learning_rate": 2.2406220823329778e-05, "loss": 2.0936, "step": 11100 }, { "epoch": 0.35816104099324275, "grad_norm": 0.48046875, "learning_rate": 2.2404852255645695e-05, "loss": 2.1089, "step": 11101 }, { "epoch": 0.3581933048470391, "grad_norm": 0.4609375, "learning_rate": 2.2403483606454316e-05, "loss": 2.063, "step": 11102 }, { "epoch": 0.35822556870083544, "grad_norm": 0.458984375, "learning_rate": 2.2402114875770703e-05, "loss": 2.1018, "step": 11103 }, { "epoch": 0.3582578325546318, "grad_norm": 0.42578125, "learning_rate": 2.2400746063609918e-05, "loss": 2.1096, "step": 11104 }, { "epoch": 0.3582900964084281, "grad_norm": 0.4140625, "learning_rate": 2.239937716998703e-05, "loss": 2.0627, "step": 11105 }, { "epoch": 0.35832236026222447, "grad_norm": 0.431640625, "learning_rate": 2.2398008194917104e-05, "loss": 2.0748, "step": 11106 }, { "epoch": 0.3583546241160208, "grad_norm": 0.4296875, "learning_rate": 2.239663913841521e-05, "loss": 2.0699, "step": 11107 }, { "epoch": 0.35838688796981716, "grad_norm": 0.4375, "learning_rate": 2.239527000049642e-05, "loss": 2.0793, "step": 11108 }, { "epoch": 0.3584191518236135, "grad_norm": 0.46484375, "learning_rate": 2.2393900781175806e-05, "loss": 2.1016, "step": 11109 }, { "epoch": 0.35845141567740985, "grad_norm": 0.453125, "learning_rate": 2.2392531480468436e-05, "loss": 2.0641, "step": 11110 }, { "epoch": 0.3584836795312062, "grad_norm": 0.42578125, "learning_rate": 2.239116209838938e-05, "loss": 2.1334, "step": 11111 }, { "epoch": 0.35851594338500253, "grad_norm": 0.451171875, "learning_rate": 2.238979263495371e-05, "loss": 2.179, "step": 11112 }, { "epoch": 0.3585482072387989, "grad_norm": 0.451171875, "learning_rate": 2.2388423090176514e-05, "loss": 2.1853, "step": 11113 }, { "epoch": 0.3585804710925952, "grad_norm": 0.4921875, "learning_rate": 2.2387053464072854e-05, "loss": 2.1604, "step": 11114 }, { "epoch": 0.35861273494639156, "grad_norm": 0.50390625, "learning_rate": 2.2385683756657806e-05, "loss": 2.1512, "step": 11115 }, { "epoch": 0.35864499880018796, "grad_norm": 0.478515625, "learning_rate": 2.2384313967946454e-05, "loss": 2.1854, "step": 11116 }, { "epoch": 0.3586772626539843, "grad_norm": 0.478515625, "learning_rate": 2.2382944097953867e-05, "loss": 2.1822, "step": 11117 }, { "epoch": 0.35870952650778065, "grad_norm": 0.44140625, "learning_rate": 2.238157414669513e-05, "loss": 2.1608, "step": 11118 }, { "epoch": 0.358741790361577, "grad_norm": 0.4609375, "learning_rate": 2.238020411418532e-05, "loss": 2.1681, "step": 11119 }, { "epoch": 0.35877405421537334, "grad_norm": 0.451171875, "learning_rate": 2.2378834000439518e-05, "loss": 2.1871, "step": 11120 }, { "epoch": 0.3588063180691697, "grad_norm": 0.490234375, "learning_rate": 2.2377463805472804e-05, "loss": 2.156, "step": 11121 }, { "epoch": 0.358838581922966, "grad_norm": 0.5234375, "learning_rate": 2.237609352930026e-05, "loss": 2.1923, "step": 11122 }, { "epoch": 0.35887084577676237, "grad_norm": 0.439453125, "learning_rate": 2.2374723171936976e-05, "loss": 2.1902, "step": 11123 }, { "epoch": 0.3589031096305587, "grad_norm": 0.47265625, "learning_rate": 2.2373352733398023e-05, "loss": 2.1735, "step": 11124 }, { "epoch": 0.35893537348435506, "grad_norm": 0.41796875, "learning_rate": 2.2371982213698494e-05, "loss": 2.1437, "step": 11125 }, { "epoch": 0.3589676373381514, "grad_norm": 0.4375, "learning_rate": 2.2370611612853476e-05, "loss": 2.1586, "step": 11126 }, { "epoch": 0.35899990119194775, "grad_norm": 0.447265625, "learning_rate": 2.236924093087805e-05, "loss": 2.1853, "step": 11127 }, { "epoch": 0.3590321650457441, "grad_norm": 0.47265625, "learning_rate": 2.236787016778731e-05, "loss": 2.1651, "step": 11128 }, { "epoch": 0.35906442889954043, "grad_norm": 0.42578125, "learning_rate": 2.2366499323596334e-05, "loss": 2.1653, "step": 11129 }, { "epoch": 0.3590966927533368, "grad_norm": 0.447265625, "learning_rate": 2.2365128398320225e-05, "loss": 2.1675, "step": 11130 }, { "epoch": 0.3591289566071331, "grad_norm": 0.458984375, "learning_rate": 2.2363757391974068e-05, "loss": 2.1959, "step": 11131 }, { "epoch": 0.35916122046092946, "grad_norm": 0.423828125, "learning_rate": 2.2362386304572946e-05, "loss": 2.1774, "step": 11132 }, { "epoch": 0.3591934843147258, "grad_norm": 0.470703125, "learning_rate": 2.236101513613196e-05, "loss": 2.189, "step": 11133 }, { "epoch": 0.35922574816852215, "grad_norm": 0.474609375, "learning_rate": 2.23596438866662e-05, "loss": 2.1761, "step": 11134 }, { "epoch": 0.3592580120223185, "grad_norm": 0.392578125, "learning_rate": 2.235827255619076e-05, "loss": 2.0904, "step": 11135 }, { "epoch": 0.3592902758761149, "grad_norm": 0.455078125, "learning_rate": 2.2356901144720735e-05, "loss": 2.0889, "step": 11136 }, { "epoch": 0.35932253972991124, "grad_norm": 0.388671875, "learning_rate": 2.2355529652271222e-05, "loss": 2.1042, "step": 11137 }, { "epoch": 0.3593548035837076, "grad_norm": 0.4296875, "learning_rate": 2.235415807885731e-05, "loss": 2.0526, "step": 11138 }, { "epoch": 0.3593870674375039, "grad_norm": 0.5703125, "learning_rate": 2.2352786424494104e-05, "loss": 2.1002, "step": 11139 }, { "epoch": 0.35941933129130027, "grad_norm": 0.39453125, "learning_rate": 2.2351414689196705e-05, "loss": 2.0885, "step": 11140 }, { "epoch": 0.3594515951450966, "grad_norm": 0.416015625, "learning_rate": 2.23500428729802e-05, "loss": 2.0812, "step": 11141 }, { "epoch": 0.35948385899889296, "grad_norm": 0.41015625, "learning_rate": 2.2348670975859702e-05, "loss": 2.0914, "step": 11142 }, { "epoch": 0.3595161228526893, "grad_norm": 0.4375, "learning_rate": 2.23472989978503e-05, "loss": 2.0961, "step": 11143 }, { "epoch": 0.35954838670648565, "grad_norm": 0.40625, "learning_rate": 2.2345926938967105e-05, "loss": 2.0825, "step": 11144 }, { "epoch": 0.359580650560282, "grad_norm": 0.421875, "learning_rate": 2.2344554799225215e-05, "loss": 2.0789, "step": 11145 }, { "epoch": 0.35961291441407833, "grad_norm": 0.412109375, "learning_rate": 2.2343182578639733e-05, "loss": 2.0612, "step": 11146 }, { "epoch": 0.3596451782678747, "grad_norm": 0.388671875, "learning_rate": 2.2341810277225766e-05, "loss": 2.0441, "step": 11147 }, { "epoch": 0.359677442121671, "grad_norm": 0.375, "learning_rate": 2.2340437894998422e-05, "loss": 2.0623, "step": 11148 }, { "epoch": 0.35970970597546736, "grad_norm": 0.3828125, "learning_rate": 2.2339065431972802e-05, "loss": 2.038, "step": 11149 }, { "epoch": 0.3597419698292637, "grad_norm": 0.390625, "learning_rate": 2.233769288816401e-05, "loss": 2.0791, "step": 11150 }, { "epoch": 0.35977423368306005, "grad_norm": 0.392578125, "learning_rate": 2.2336320263587166e-05, "loss": 2.0788, "step": 11151 }, { "epoch": 0.3598064975368564, "grad_norm": 0.369140625, "learning_rate": 2.2334947558257366e-05, "loss": 2.0692, "step": 11152 }, { "epoch": 0.35983876139065274, "grad_norm": 0.392578125, "learning_rate": 2.2333574772189727e-05, "loss": 2.0794, "step": 11153 }, { "epoch": 0.3598710252444491, "grad_norm": 0.369140625, "learning_rate": 2.233220190539936e-05, "loss": 2.1074, "step": 11154 }, { "epoch": 0.3599032890982454, "grad_norm": 0.44140625, "learning_rate": 2.2330828957901374e-05, "loss": 2.0854, "step": 11155 }, { "epoch": 0.35993555295204177, "grad_norm": 0.3984375, "learning_rate": 2.232945592971088e-05, "loss": 2.1031, "step": 11156 }, { "epoch": 0.35996781680583817, "grad_norm": 0.478515625, "learning_rate": 2.2328082820842998e-05, "loss": 2.0899, "step": 11157 }, { "epoch": 0.3600000806596345, "grad_norm": 0.40625, "learning_rate": 2.232670963131283e-05, "loss": 2.0877, "step": 11158 }, { "epoch": 0.36003234451343086, "grad_norm": 0.44921875, "learning_rate": 2.2325336361135505e-05, "loss": 2.0953, "step": 11159 }, { "epoch": 0.3600646083672272, "grad_norm": 0.419921875, "learning_rate": 2.232396301032613e-05, "loss": 2.0648, "step": 11160 }, { "epoch": 0.36009687222102355, "grad_norm": 0.42578125, "learning_rate": 2.232258957889982e-05, "loss": 2.0813, "step": 11161 }, { "epoch": 0.3601291360748199, "grad_norm": 0.435546875, "learning_rate": 2.2321216066871706e-05, "loss": 2.0531, "step": 11162 }, { "epoch": 0.36016139992861623, "grad_norm": 0.421875, "learning_rate": 2.2319842474256892e-05, "loss": 2.0979, "step": 11163 }, { "epoch": 0.3601936637824126, "grad_norm": 0.408203125, "learning_rate": 2.23184688010705e-05, "loss": 2.1024, "step": 11164 }, { "epoch": 0.3602259276362089, "grad_norm": 0.42578125, "learning_rate": 2.231709504732766e-05, "loss": 2.0883, "step": 11165 }, { "epoch": 0.36025819149000526, "grad_norm": 0.443359375, "learning_rate": 2.231572121304349e-05, "loss": 2.09, "step": 11166 }, { "epoch": 0.3602904553438016, "grad_norm": 0.423828125, "learning_rate": 2.23143472982331e-05, "loss": 2.0807, "step": 11167 }, { "epoch": 0.36032271919759795, "grad_norm": 0.400390625, "learning_rate": 2.2312973302911627e-05, "loss": 2.0692, "step": 11168 }, { "epoch": 0.3603549830513943, "grad_norm": 0.41015625, "learning_rate": 2.231159922709419e-05, "loss": 2.0879, "step": 11169 }, { "epoch": 0.36038724690519064, "grad_norm": 0.390625, "learning_rate": 2.2310225070795912e-05, "loss": 2.0807, "step": 11170 }, { "epoch": 0.360419510758987, "grad_norm": 0.4140625, "learning_rate": 2.2308850834031926e-05, "loss": 2.0733, "step": 11171 }, { "epoch": 0.3604517746127833, "grad_norm": 0.416015625, "learning_rate": 2.2307476516817346e-05, "loss": 2.0657, "step": 11172 }, { "epoch": 0.36048403846657967, "grad_norm": 0.4140625, "learning_rate": 2.2306102119167312e-05, "loss": 2.0354, "step": 11173 }, { "epoch": 0.360516302320376, "grad_norm": 0.37109375, "learning_rate": 2.2304727641096944e-05, "loss": 2.0774, "step": 11174 }, { "epoch": 0.36054856617417236, "grad_norm": 0.3984375, "learning_rate": 2.2303353082621375e-05, "loss": 2.0887, "step": 11175 }, { "epoch": 0.3605808300279687, "grad_norm": 0.357421875, "learning_rate": 2.230197844375574e-05, "loss": 2.0967, "step": 11176 }, { "epoch": 0.3606130938817651, "grad_norm": 0.38671875, "learning_rate": 2.230060372451516e-05, "loss": 2.0738, "step": 11177 }, { "epoch": 0.36064535773556144, "grad_norm": 0.404296875, "learning_rate": 2.2299228924914772e-05, "loss": 2.0502, "step": 11178 }, { "epoch": 0.3606776215893578, "grad_norm": 0.380859375, "learning_rate": 2.2297854044969707e-05, "loss": 2.0877, "step": 11179 }, { "epoch": 0.36070988544315413, "grad_norm": 0.400390625, "learning_rate": 2.2296479084695104e-05, "loss": 2.0836, "step": 11180 }, { "epoch": 0.3607421492969505, "grad_norm": 0.3828125, "learning_rate": 2.229510404410609e-05, "loss": 2.0855, "step": 11181 }, { "epoch": 0.3607744131507468, "grad_norm": 0.41796875, "learning_rate": 2.2293728923217807e-05, "loss": 2.0762, "step": 11182 }, { "epoch": 0.36080667700454316, "grad_norm": 0.408203125, "learning_rate": 2.2292353722045387e-05, "loss": 2.0775, "step": 11183 }, { "epoch": 0.3608389408583395, "grad_norm": 0.388671875, "learning_rate": 2.2290978440603968e-05, "loss": 2.0888, "step": 11184 }, { "epoch": 0.36087120471213585, "grad_norm": 0.4375, "learning_rate": 2.2289603078908694e-05, "loss": 2.0766, "step": 11185 }, { "epoch": 0.3609034685659322, "grad_norm": 0.42578125, "learning_rate": 2.228822763697469e-05, "loss": 2.072, "step": 11186 }, { "epoch": 0.36093573241972854, "grad_norm": 0.40234375, "learning_rate": 2.2286852114817112e-05, "loss": 2.057, "step": 11187 }, { "epoch": 0.3609679962735249, "grad_norm": 0.41796875, "learning_rate": 2.2285476512451087e-05, "loss": 2.0923, "step": 11188 }, { "epoch": 0.3610002601273212, "grad_norm": 0.400390625, "learning_rate": 2.228410082989177e-05, "loss": 2.1159, "step": 11189 }, { "epoch": 0.36103252398111757, "grad_norm": 0.41015625, "learning_rate": 2.2282725067154294e-05, "loss": 2.0419, "step": 11190 }, { "epoch": 0.3610647878349139, "grad_norm": 0.40625, "learning_rate": 2.2281349224253806e-05, "loss": 2.0904, "step": 11191 }, { "epoch": 0.36109705168871026, "grad_norm": 0.40234375, "learning_rate": 2.2279973301205448e-05, "loss": 2.0501, "step": 11192 }, { "epoch": 0.3611293155425066, "grad_norm": 0.39453125, "learning_rate": 2.2278597298024367e-05, "loss": 2.1198, "step": 11193 }, { "epoch": 0.36116157939630295, "grad_norm": 0.453125, "learning_rate": 2.2277221214725706e-05, "loss": 2.0761, "step": 11194 }, { "epoch": 0.3611938432500993, "grad_norm": 0.439453125, "learning_rate": 2.2275845051324614e-05, "loss": 2.065, "step": 11195 }, { "epoch": 0.36122610710389563, "grad_norm": 0.39453125, "learning_rate": 2.2274468807836248e-05, "loss": 2.0644, "step": 11196 }, { "epoch": 0.36125837095769203, "grad_norm": 0.419921875, "learning_rate": 2.227309248427574e-05, "loss": 2.0665, "step": 11197 }, { "epoch": 0.3612906348114884, "grad_norm": 0.400390625, "learning_rate": 2.2271716080658248e-05, "loss": 2.0781, "step": 11198 }, { "epoch": 0.3613228986652847, "grad_norm": 0.404296875, "learning_rate": 2.2270339596998927e-05, "loss": 2.078, "step": 11199 }, { "epoch": 0.36135516251908106, "grad_norm": 0.396484375, "learning_rate": 2.2268963033312917e-05, "loss": 2.0946, "step": 11200 }, { "epoch": 0.3613874263728774, "grad_norm": 0.396484375, "learning_rate": 2.226758638961538e-05, "loss": 2.0089, "step": 11201 }, { "epoch": 0.36141969022667375, "grad_norm": 0.46484375, "learning_rate": 2.226620966592147e-05, "loss": 1.9908, "step": 11202 }, { "epoch": 0.3614519540804701, "grad_norm": 0.474609375, "learning_rate": 2.2264832862246328e-05, "loss": 1.9614, "step": 11203 }, { "epoch": 0.36148421793426644, "grad_norm": 0.392578125, "learning_rate": 2.2263455978605128e-05, "loss": 2.0003, "step": 11204 }, { "epoch": 0.3615164817880628, "grad_norm": 0.421875, "learning_rate": 2.2262079015013006e-05, "loss": 1.9846, "step": 11205 }, { "epoch": 0.3615487456418591, "grad_norm": 0.38671875, "learning_rate": 2.226070197148513e-05, "loss": 1.9842, "step": 11206 }, { "epoch": 0.36158100949565547, "grad_norm": 0.3984375, "learning_rate": 2.2259324848036662e-05, "loss": 2.0127, "step": 11207 }, { "epoch": 0.3616132733494518, "grad_norm": 0.36328125, "learning_rate": 2.2257947644682747e-05, "loss": 1.9776, "step": 11208 }, { "epoch": 0.36164553720324816, "grad_norm": 0.40234375, "learning_rate": 2.2256570361438548e-05, "loss": 1.9921, "step": 11209 }, { "epoch": 0.3616778010570445, "grad_norm": 0.375, "learning_rate": 2.2255192998319237e-05, "loss": 2.0189, "step": 11210 }, { "epoch": 0.36171006491084084, "grad_norm": 0.396484375, "learning_rate": 2.2253815555339962e-05, "loss": 2.0, "step": 11211 }, { "epoch": 0.3617423287646372, "grad_norm": 0.36328125, "learning_rate": 2.2252438032515892e-05, "loss": 1.9722, "step": 11212 }, { "epoch": 0.36177459261843353, "grad_norm": 0.357421875, "learning_rate": 2.2251060429862182e-05, "loss": 1.9619, "step": 11213 }, { "epoch": 0.3618068564722299, "grad_norm": 0.37109375, "learning_rate": 2.2249682747394004e-05, "loss": 1.9747, "step": 11214 }, { "epoch": 0.3618391203260262, "grad_norm": 0.38671875, "learning_rate": 2.2248304985126517e-05, "loss": 1.9911, "step": 11215 }, { "epoch": 0.36187138417982256, "grad_norm": 0.359375, "learning_rate": 2.2246927143074896e-05, "loss": 1.9841, "step": 11216 }, { "epoch": 0.36190364803361896, "grad_norm": 0.380859375, "learning_rate": 2.2245549221254292e-05, "loss": 1.9934, "step": 11217 }, { "epoch": 0.3619359118874153, "grad_norm": 0.375, "learning_rate": 2.2244171219679882e-05, "loss": 1.9934, "step": 11218 }, { "epoch": 0.36196817574121165, "grad_norm": 0.392578125, "learning_rate": 2.2242793138366834e-05, "loss": 2.0117, "step": 11219 }, { "epoch": 0.362000439595008, "grad_norm": 0.408203125, "learning_rate": 2.224141497733031e-05, "loss": 1.9804, "step": 11220 }, { "epoch": 0.36203270344880434, "grad_norm": 0.349609375, "learning_rate": 2.2240036736585495e-05, "loss": 1.9555, "step": 11221 }, { "epoch": 0.3620649673026007, "grad_norm": 0.373046875, "learning_rate": 2.223865841614754e-05, "loss": 1.9857, "step": 11222 }, { "epoch": 0.362097231156397, "grad_norm": 0.376953125, "learning_rate": 2.2237280016031627e-05, "loss": 2.0299, "step": 11223 }, { "epoch": 0.36212949501019337, "grad_norm": 0.357421875, "learning_rate": 2.2235901536252935e-05, "loss": 1.9803, "step": 11224 }, { "epoch": 0.3621617588639897, "grad_norm": 0.373046875, "learning_rate": 2.223452297682662e-05, "loss": 2.0115, "step": 11225 }, { "epoch": 0.36219402271778606, "grad_norm": 0.33984375, "learning_rate": 2.2233144337767872e-05, "loss": 1.9738, "step": 11226 }, { "epoch": 0.3622262865715824, "grad_norm": 0.3828125, "learning_rate": 2.2231765619091862e-05, "loss": 1.9878, "step": 11227 }, { "epoch": 0.36225855042537874, "grad_norm": 0.341796875, "learning_rate": 2.223038682081376e-05, "loss": 1.983, "step": 11228 }, { "epoch": 0.3622908142791751, "grad_norm": 0.373046875, "learning_rate": 2.2229007942948745e-05, "loss": 1.9786, "step": 11229 }, { "epoch": 0.36232307813297143, "grad_norm": 0.34765625, "learning_rate": 2.2227628985512006e-05, "loss": 1.9778, "step": 11230 }, { "epoch": 0.3623553419867678, "grad_norm": 0.35546875, "learning_rate": 2.2226249948518704e-05, "loss": 1.999, "step": 11231 }, { "epoch": 0.3623876058405641, "grad_norm": 0.361328125, "learning_rate": 2.2224870831984027e-05, "loss": 1.9428, "step": 11232 }, { "epoch": 0.36241986969436046, "grad_norm": 0.357421875, "learning_rate": 2.2223491635923155e-05, "loss": 1.9838, "step": 11233 }, { "epoch": 0.3624521335481568, "grad_norm": 0.380859375, "learning_rate": 2.222211236035127e-05, "loss": 1.9792, "step": 11234 }, { "epoch": 0.36248439740195315, "grad_norm": 0.357421875, "learning_rate": 2.222073300528356e-05, "loss": 1.9879, "step": 11235 }, { "epoch": 0.3625166612557495, "grad_norm": 0.349609375, "learning_rate": 2.2219353570735192e-05, "loss": 1.9526, "step": 11236 }, { "epoch": 0.36254892510954584, "grad_norm": 0.376953125, "learning_rate": 2.2217974056721363e-05, "loss": 1.9692, "step": 11237 }, { "epoch": 0.36258118896334224, "grad_norm": 0.369140625, "learning_rate": 2.2216594463257254e-05, "loss": 1.9647, "step": 11238 }, { "epoch": 0.3626134528171386, "grad_norm": 0.396484375, "learning_rate": 2.221521479035805e-05, "loss": 1.9661, "step": 11239 }, { "epoch": 0.3626457166709349, "grad_norm": 0.37890625, "learning_rate": 2.2213835038038935e-05, "loss": 1.9637, "step": 11240 }, { "epoch": 0.36267798052473127, "grad_norm": 0.375, "learning_rate": 2.22124552063151e-05, "loss": 2.006, "step": 11241 }, { "epoch": 0.3627102443785276, "grad_norm": 0.396484375, "learning_rate": 2.221107529520174e-05, "loss": 1.9763, "step": 11242 }, { "epoch": 0.36274250823232396, "grad_norm": 0.40234375, "learning_rate": 2.2209695304714023e-05, "loss": 1.9707, "step": 11243 }, { "epoch": 0.3627747720861203, "grad_norm": 0.39453125, "learning_rate": 2.2208315234867162e-05, "loss": 1.9715, "step": 11244 }, { "epoch": 0.36280703593991664, "grad_norm": 0.380859375, "learning_rate": 2.2206935085676334e-05, "loss": 1.9939, "step": 11245 }, { "epoch": 0.362839299793713, "grad_norm": 0.40234375, "learning_rate": 2.2205554857156733e-05, "loss": 1.9987, "step": 11246 }, { "epoch": 0.36287156364750933, "grad_norm": 0.36328125, "learning_rate": 2.2204174549323562e-05, "loss": 1.9576, "step": 11247 }, { "epoch": 0.3629038275013057, "grad_norm": 0.40625, "learning_rate": 2.2202794162191997e-05, "loss": 1.975, "step": 11248 }, { "epoch": 0.362936091355102, "grad_norm": 0.375, "learning_rate": 2.220141369577725e-05, "loss": 1.9809, "step": 11249 }, { "epoch": 0.36296835520889836, "grad_norm": 0.423828125, "learning_rate": 2.2200033150094497e-05, "loss": 2.0012, "step": 11250 }, { "epoch": 0.3630006190626947, "grad_norm": 0.37890625, "learning_rate": 2.219865252515895e-05, "loss": 1.9837, "step": 11251 }, { "epoch": 0.36303288291649105, "grad_norm": 0.3828125, "learning_rate": 2.21972718209858e-05, "loss": 1.9988, "step": 11252 }, { "epoch": 0.3630651467702874, "grad_norm": 0.37890625, "learning_rate": 2.2195891037590246e-05, "loss": 1.9953, "step": 11253 }, { "epoch": 0.36309741062408374, "grad_norm": 0.357421875, "learning_rate": 2.219451017498748e-05, "loss": 1.9616, "step": 11254 }, { "epoch": 0.3631296744778801, "grad_norm": 0.45703125, "learning_rate": 2.2193129233192716e-05, "loss": 2.0003, "step": 11255 }, { "epoch": 0.3631619383316764, "grad_norm": 0.36328125, "learning_rate": 2.2191748212221138e-05, "loss": 1.9764, "step": 11256 }, { "epoch": 0.36319420218547277, "grad_norm": 0.376953125, "learning_rate": 2.2190367112087958e-05, "loss": 1.9865, "step": 11257 }, { "epoch": 0.36322646603926917, "grad_norm": 0.349609375, "learning_rate": 2.2188985932808375e-05, "loss": 1.9914, "step": 11258 }, { "epoch": 0.3632587298930655, "grad_norm": 0.37109375, "learning_rate": 2.21876046743976e-05, "loss": 2.004, "step": 11259 }, { "epoch": 0.36329099374686186, "grad_norm": 0.392578125, "learning_rate": 2.2186223336870814e-05, "loss": 1.9858, "step": 11260 }, { "epoch": 0.3633232576006582, "grad_norm": 0.3515625, "learning_rate": 2.218484192024325e-05, "loss": 1.967, "step": 11261 }, { "epoch": 0.36335552145445454, "grad_norm": 0.36328125, "learning_rate": 2.218346042453009e-05, "loss": 1.9821, "step": 11262 }, { "epoch": 0.3633877853082509, "grad_norm": 0.3671875, "learning_rate": 2.2182078849746562e-05, "loss": 1.9786, "step": 11263 }, { "epoch": 0.36342004916204723, "grad_norm": 0.36328125, "learning_rate": 2.2180697195907856e-05, "loss": 1.9742, "step": 11264 }, { "epoch": 0.3634523130158436, "grad_norm": 0.3515625, "learning_rate": 2.2179315463029182e-05, "loss": 1.9789, "step": 11265 }, { "epoch": 0.3634845768696399, "grad_norm": 0.3671875, "learning_rate": 2.217793365112576e-05, "loss": 1.9876, "step": 11266 }, { "epoch": 0.36351684072343626, "grad_norm": 0.376953125, "learning_rate": 2.2176551760212795e-05, "loss": 2.0102, "step": 11267 }, { "epoch": 0.3635491045772326, "grad_norm": 0.431640625, "learning_rate": 2.2175169790305493e-05, "loss": 1.9483, "step": 11268 }, { "epoch": 0.36358136843102895, "grad_norm": 0.376953125, "learning_rate": 2.217378774141907e-05, "loss": 1.9776, "step": 11269 }, { "epoch": 0.3636136322848253, "grad_norm": 0.37109375, "learning_rate": 2.2172405613568744e-05, "loss": 1.9488, "step": 11270 }, { "epoch": 0.36364589613862164, "grad_norm": 0.392578125, "learning_rate": 2.2171023406769712e-05, "loss": 2.0053, "step": 11271 }, { "epoch": 0.363678159992418, "grad_norm": 0.45703125, "learning_rate": 2.216964112103721e-05, "loss": 2.0455, "step": 11272 }, { "epoch": 0.3637104238462143, "grad_norm": 0.384765625, "learning_rate": 2.2168258756386433e-05, "loss": 2.1093, "step": 11273 }, { "epoch": 0.36374268770001067, "grad_norm": 0.466796875, "learning_rate": 2.216687631283261e-05, "loss": 2.0774, "step": 11274 }, { "epoch": 0.363774951553807, "grad_norm": 0.419921875, "learning_rate": 2.2165493790390953e-05, "loss": 2.0825, "step": 11275 }, { "epoch": 0.36380721540760336, "grad_norm": 0.44921875, "learning_rate": 2.2164111189076684e-05, "loss": 2.1014, "step": 11276 }, { "epoch": 0.3638394792613997, "grad_norm": 0.4296875, "learning_rate": 2.216272850890501e-05, "loss": 2.0847, "step": 11277 }, { "epoch": 0.3638717431151961, "grad_norm": 0.443359375, "learning_rate": 2.2161345749891167e-05, "loss": 2.1029, "step": 11278 }, { "epoch": 0.36390400696899244, "grad_norm": 0.416015625, "learning_rate": 2.2159962912050367e-05, "loss": 2.0689, "step": 11279 }, { "epoch": 0.3639362708227888, "grad_norm": 0.416015625, "learning_rate": 2.215857999539783e-05, "loss": 2.0773, "step": 11280 }, { "epoch": 0.36396853467658513, "grad_norm": 0.384765625, "learning_rate": 2.215719699994878e-05, "loss": 2.0809, "step": 11281 }, { "epoch": 0.3640007985303815, "grad_norm": 0.41015625, "learning_rate": 2.215581392571844e-05, "loss": 2.0015, "step": 11282 }, { "epoch": 0.3640330623841778, "grad_norm": 0.40234375, "learning_rate": 2.2154430772722037e-05, "loss": 2.0546, "step": 11283 }, { "epoch": 0.36406532623797416, "grad_norm": 0.416015625, "learning_rate": 2.2153047540974788e-05, "loss": 2.0682, "step": 11284 }, { "epoch": 0.3640975900917705, "grad_norm": 0.40234375, "learning_rate": 2.215166423049192e-05, "loss": 2.0975, "step": 11285 }, { "epoch": 0.36412985394556685, "grad_norm": 0.39453125, "learning_rate": 2.2150280841288674e-05, "loss": 2.0717, "step": 11286 }, { "epoch": 0.3641621177993632, "grad_norm": 0.37890625, "learning_rate": 2.2148897373380257e-05, "loss": 2.0738, "step": 11287 }, { "epoch": 0.36419438165315954, "grad_norm": 0.5, "learning_rate": 2.2147513826781907e-05, "loss": 2.0978, "step": 11288 }, { "epoch": 0.3642266455069559, "grad_norm": 0.462890625, "learning_rate": 2.2146130201508857e-05, "loss": 2.0938, "step": 11289 }, { "epoch": 0.3642589093607522, "grad_norm": 0.4921875, "learning_rate": 2.2144746497576333e-05, "loss": 2.0963, "step": 11290 }, { "epoch": 0.36429117321454857, "grad_norm": 0.58203125, "learning_rate": 2.2143362714999557e-05, "loss": 2.0384, "step": 11291 }, { "epoch": 0.3643234370683449, "grad_norm": 0.703125, "learning_rate": 2.2141978853793773e-05, "loss": 2.0677, "step": 11292 }, { "epoch": 0.36435570092214126, "grad_norm": 0.625, "learning_rate": 2.2140594913974218e-05, "loss": 2.0835, "step": 11293 }, { "epoch": 0.3643879647759376, "grad_norm": 0.56640625, "learning_rate": 2.2139210895556104e-05, "loss": 2.064, "step": 11294 }, { "epoch": 0.36442022862973394, "grad_norm": 0.53125, "learning_rate": 2.2137826798554685e-05, "loss": 2.0846, "step": 11295 }, { "epoch": 0.3644524924835303, "grad_norm": 0.53125, "learning_rate": 2.2136442622985186e-05, "loss": 2.0646, "step": 11296 }, { "epoch": 0.36448475633732663, "grad_norm": 0.52734375, "learning_rate": 2.2135058368862848e-05, "loss": 2.0674, "step": 11297 }, { "epoch": 0.36451702019112303, "grad_norm": 0.65234375, "learning_rate": 2.2133674036202907e-05, "loss": 2.1259, "step": 11298 }, { "epoch": 0.3645492840449194, "grad_norm": 1.1796875, "learning_rate": 2.2132289625020595e-05, "loss": 2.2822, "step": 11299 }, { "epoch": 0.3645815478987157, "grad_norm": 1.53125, "learning_rate": 2.213090513533116e-05, "loss": 2.2649, "step": 11300 }, { "epoch": 0.36461381175251206, "grad_norm": 1.359375, "learning_rate": 2.212952056714984e-05, "loss": 2.2944, "step": 11301 }, { "epoch": 0.3646460756063084, "grad_norm": 1.2421875, "learning_rate": 2.2128135920491864e-05, "loss": 2.2875, "step": 11302 }, { "epoch": 0.36467833946010475, "grad_norm": 0.97265625, "learning_rate": 2.212675119537249e-05, "loss": 2.2684, "step": 11303 }, { "epoch": 0.3647106033139011, "grad_norm": 0.99609375, "learning_rate": 2.212536639180695e-05, "loss": 2.2728, "step": 11304 }, { "epoch": 0.36474286716769744, "grad_norm": 0.83203125, "learning_rate": 2.2123981509810478e-05, "loss": 2.28, "step": 11305 }, { "epoch": 0.3647751310214938, "grad_norm": 0.8125, "learning_rate": 2.2122596549398342e-05, "loss": 2.2701, "step": 11306 }, { "epoch": 0.3648073948752901, "grad_norm": 0.69921875, "learning_rate": 2.212121151058577e-05, "loss": 2.2947, "step": 11307 }, { "epoch": 0.36483965872908647, "grad_norm": 0.734375, "learning_rate": 2.2119826393388002e-05, "loss": 2.2641, "step": 11308 }, { "epoch": 0.3648719225828828, "grad_norm": 0.57421875, "learning_rate": 2.21184411978203e-05, "loss": 2.2673, "step": 11309 }, { "epoch": 0.36490418643667916, "grad_norm": 0.59765625, "learning_rate": 2.2117055923897903e-05, "loss": 2.2407, "step": 11310 }, { "epoch": 0.3649364502904755, "grad_norm": 0.5859375, "learning_rate": 2.2115670571636058e-05, "loss": 2.2679, "step": 11311 }, { "epoch": 0.36496871414427184, "grad_norm": 0.5234375, "learning_rate": 2.211428514105002e-05, "loss": 2.2252, "step": 11312 }, { "epoch": 0.3650009779980682, "grad_norm": 0.498046875, "learning_rate": 2.2112899632155027e-05, "loss": 2.2525, "step": 11313 }, { "epoch": 0.36503324185186453, "grad_norm": 0.470703125, "learning_rate": 2.2111514044966348e-05, "loss": 2.2426, "step": 11314 }, { "epoch": 0.3650655057056609, "grad_norm": 0.53515625, "learning_rate": 2.2110128379499218e-05, "loss": 2.1444, "step": 11315 }, { "epoch": 0.3650977695594572, "grad_norm": 0.44921875, "learning_rate": 2.21087426357689e-05, "loss": 2.1853, "step": 11316 }, { "epoch": 0.36513003341325356, "grad_norm": 0.5078125, "learning_rate": 2.210735681379064e-05, "loss": 2.1871, "step": 11317 }, { "epoch": 0.3651622972670499, "grad_norm": 0.455078125, "learning_rate": 2.210597091357969e-05, "loss": 2.1958, "step": 11318 }, { "epoch": 0.3651945611208463, "grad_norm": 0.466796875, "learning_rate": 2.2104584935151318e-05, "loss": 2.1809, "step": 11319 }, { "epoch": 0.36522682497464265, "grad_norm": 0.455078125, "learning_rate": 2.210319887852077e-05, "loss": 2.1916, "step": 11320 }, { "epoch": 0.365259088828439, "grad_norm": 0.4140625, "learning_rate": 2.21018127437033e-05, "loss": 2.1898, "step": 11321 }, { "epoch": 0.36529135268223534, "grad_norm": 0.61328125, "learning_rate": 2.2100426530714172e-05, "loss": 2.1772, "step": 11322 }, { "epoch": 0.3653236165360317, "grad_norm": 0.4765625, "learning_rate": 2.2099040239568645e-05, "loss": 2.1946, "step": 11323 }, { "epoch": 0.365355880389828, "grad_norm": 0.51953125, "learning_rate": 2.2097653870281977e-05, "loss": 2.1782, "step": 11324 }, { "epoch": 0.36538814424362437, "grad_norm": 0.453125, "learning_rate": 2.2096267422869425e-05, "loss": 2.2023, "step": 11325 }, { "epoch": 0.3654204080974207, "grad_norm": 0.484375, "learning_rate": 2.2094880897346253e-05, "loss": 2.1895, "step": 11326 }, { "epoch": 0.36545267195121706, "grad_norm": 0.427734375, "learning_rate": 2.2093494293727717e-05, "loss": 2.1882, "step": 11327 }, { "epoch": 0.3654849358050134, "grad_norm": 0.435546875, "learning_rate": 2.209210761202909e-05, "loss": 2.1912, "step": 11328 }, { "epoch": 0.36551719965880974, "grad_norm": 0.447265625, "learning_rate": 2.209072085226563e-05, "loss": 2.2228, "step": 11329 }, { "epoch": 0.3655494635126061, "grad_norm": 0.44921875, "learning_rate": 2.20893340144526e-05, "loss": 2.1959, "step": 11330 }, { "epoch": 0.36558172736640243, "grad_norm": 0.443359375, "learning_rate": 2.2087947098605268e-05, "loss": 2.1851, "step": 11331 }, { "epoch": 0.3656139912201988, "grad_norm": 0.41015625, "learning_rate": 2.2086560104738897e-05, "loss": 2.1896, "step": 11332 }, { "epoch": 0.3656462550739951, "grad_norm": 0.435546875, "learning_rate": 2.2085173032868757e-05, "loss": 2.1874, "step": 11333 }, { "epoch": 0.36567851892779146, "grad_norm": 0.421875, "learning_rate": 2.208378588301012e-05, "loss": 2.1648, "step": 11334 }, { "epoch": 0.3657107827815878, "grad_norm": 0.396484375, "learning_rate": 2.2082398655178245e-05, "loss": 2.183, "step": 11335 }, { "epoch": 0.36574304663538415, "grad_norm": 0.5546875, "learning_rate": 2.2081011349388403e-05, "loss": 2.2239, "step": 11336 }, { "epoch": 0.3657753104891805, "grad_norm": 0.39453125, "learning_rate": 2.2079623965655877e-05, "loss": 2.2305, "step": 11337 }, { "epoch": 0.36580757434297684, "grad_norm": 0.54296875, "learning_rate": 2.2078236503995923e-05, "loss": 2.2028, "step": 11338 }, { "epoch": 0.36583983819677324, "grad_norm": 0.435546875, "learning_rate": 2.2076848964423823e-05, "loss": 2.1773, "step": 11339 }, { "epoch": 0.3658721020505696, "grad_norm": 0.4609375, "learning_rate": 2.207546134695484e-05, "loss": 2.1867, "step": 11340 }, { "epoch": 0.3659043659043659, "grad_norm": 0.458984375, "learning_rate": 2.2074073651604264e-05, "loss": 2.1808, "step": 11341 }, { "epoch": 0.36593662975816227, "grad_norm": 0.404296875, "learning_rate": 2.2072685878387354e-05, "loss": 2.1939, "step": 11342 }, { "epoch": 0.3659688936119586, "grad_norm": 0.44140625, "learning_rate": 2.2071298027319392e-05, "loss": 2.1688, "step": 11343 }, { "epoch": 0.36600115746575496, "grad_norm": 0.48828125, "learning_rate": 2.2069910098415655e-05, "loss": 2.1937, "step": 11344 }, { "epoch": 0.3660334213195513, "grad_norm": 0.9140625, "learning_rate": 2.2068522091691422e-05, "loss": 2.19, "step": 11345 }, { "epoch": 0.36606568517334764, "grad_norm": 0.6953125, "learning_rate": 2.2067134007161966e-05, "loss": 2.1881, "step": 11346 }, { "epoch": 0.366097949027144, "grad_norm": 0.55859375, "learning_rate": 2.206574584484257e-05, "loss": 2.1836, "step": 11347 }, { "epoch": 0.36613021288094033, "grad_norm": 0.68359375, "learning_rate": 2.206435760474851e-05, "loss": 2.1668, "step": 11348 }, { "epoch": 0.3661624767347367, "grad_norm": 0.65234375, "learning_rate": 2.2062969286895078e-05, "loss": 2.1829, "step": 11349 }, { "epoch": 0.366194740588533, "grad_norm": 0.53125, "learning_rate": 2.206158089129753e-05, "loss": 2.1476, "step": 11350 }, { "epoch": 0.36622700444232936, "grad_norm": 0.65625, "learning_rate": 2.2060192417971183e-05, "loss": 2.1887, "step": 11351 }, { "epoch": 0.3662592682961257, "grad_norm": 0.58203125, "learning_rate": 2.2058803866931297e-05, "loss": 2.1573, "step": 11352 }, { "epoch": 0.36629153214992205, "grad_norm": 0.61328125, "learning_rate": 2.2057415238193157e-05, "loss": 2.1679, "step": 11353 }, { "epoch": 0.3663237960037184, "grad_norm": 0.58203125, "learning_rate": 2.205602653177206e-05, "loss": 2.167, "step": 11354 }, { "epoch": 0.36635605985751474, "grad_norm": 0.54296875, "learning_rate": 2.2054637747683282e-05, "loss": 2.1659, "step": 11355 }, { "epoch": 0.3663883237113111, "grad_norm": 0.56640625, "learning_rate": 2.2053248885942113e-05, "loss": 2.1959, "step": 11356 }, { "epoch": 0.3664205875651074, "grad_norm": 0.51171875, "learning_rate": 2.2051859946563834e-05, "loss": 2.1864, "step": 11357 }, { "epoch": 0.36645285141890377, "grad_norm": 0.65234375, "learning_rate": 2.2050470929563748e-05, "loss": 2.2642, "step": 11358 }, { "epoch": 0.36648511527270017, "grad_norm": 0.5, "learning_rate": 2.2049081834957134e-05, "loss": 2.2519, "step": 11359 }, { "epoch": 0.3665173791264965, "grad_norm": 0.5546875, "learning_rate": 2.2047692662759283e-05, "loss": 2.1435, "step": 11360 }, { "epoch": 0.36654964298029286, "grad_norm": 0.451171875, "learning_rate": 2.2046303412985487e-05, "loss": 1.9577, "step": 11361 }, { "epoch": 0.3665819068340892, "grad_norm": 0.482421875, "learning_rate": 2.204491408565104e-05, "loss": 1.9958, "step": 11362 }, { "epoch": 0.36661417068788554, "grad_norm": 0.44921875, "learning_rate": 2.204352468077123e-05, "loss": 2.027, "step": 11363 }, { "epoch": 0.3666464345416819, "grad_norm": 0.41796875, "learning_rate": 2.2042135198361353e-05, "loss": 2.0048, "step": 11364 }, { "epoch": 0.36667869839547823, "grad_norm": 0.43359375, "learning_rate": 2.204074563843671e-05, "loss": 2.019, "step": 11365 }, { "epoch": 0.3667109622492746, "grad_norm": 0.400390625, "learning_rate": 2.2039356001012588e-05, "loss": 1.9838, "step": 11366 }, { "epoch": 0.3667432261030709, "grad_norm": 0.46484375, "learning_rate": 2.2037966286104275e-05, "loss": 2.0029, "step": 11367 }, { "epoch": 0.36677548995686726, "grad_norm": 0.38671875, "learning_rate": 2.203657649372709e-05, "loss": 1.9904, "step": 11368 }, { "epoch": 0.3668077538106636, "grad_norm": 0.3984375, "learning_rate": 2.2035186623896314e-05, "loss": 1.9877, "step": 11369 }, { "epoch": 0.36684001766445995, "grad_norm": 0.3828125, "learning_rate": 2.2033796676627253e-05, "loss": 1.9857, "step": 11370 }, { "epoch": 0.3668722815182563, "grad_norm": 0.39453125, "learning_rate": 2.20324066519352e-05, "loss": 2.0164, "step": 11371 }, { "epoch": 0.36690454537205264, "grad_norm": 0.38671875, "learning_rate": 2.2031016549835466e-05, "loss": 1.9893, "step": 11372 }, { "epoch": 0.366936809225849, "grad_norm": 0.3828125, "learning_rate": 2.202962637034334e-05, "loss": 1.9994, "step": 11373 }, { "epoch": 0.3669690730796453, "grad_norm": 0.376953125, "learning_rate": 2.2028236113474133e-05, "loss": 2.0003, "step": 11374 }, { "epoch": 0.36700133693344167, "grad_norm": 0.3671875, "learning_rate": 2.2026845779243146e-05, "loss": 1.9777, "step": 11375 }, { "epoch": 0.367033600787238, "grad_norm": 0.404296875, "learning_rate": 2.2025455367665677e-05, "loss": 1.9567, "step": 11376 }, { "epoch": 0.36706586464103436, "grad_norm": 0.3671875, "learning_rate": 2.202406487875704e-05, "loss": 1.9968, "step": 11377 }, { "epoch": 0.3670981284948307, "grad_norm": 0.3671875, "learning_rate": 2.2022674312532532e-05, "loss": 1.9633, "step": 11378 }, { "epoch": 0.3671303923486271, "grad_norm": 0.3671875, "learning_rate": 2.2021283669007464e-05, "loss": 1.9799, "step": 11379 }, { "epoch": 0.36716265620242344, "grad_norm": 0.373046875, "learning_rate": 2.201989294819715e-05, "loss": 2.0067, "step": 11380 }, { "epoch": 0.3671949200562198, "grad_norm": 0.36328125, "learning_rate": 2.201850215011688e-05, "loss": 1.9752, "step": 11381 }, { "epoch": 0.36722718391001613, "grad_norm": 0.36328125, "learning_rate": 2.2017111274781977e-05, "loss": 1.9751, "step": 11382 }, { "epoch": 0.3672594477638125, "grad_norm": 0.353515625, "learning_rate": 2.201572032220775e-05, "loss": 1.9987, "step": 11383 }, { "epoch": 0.3672917116176088, "grad_norm": 0.36328125, "learning_rate": 2.2014329292409502e-05, "loss": 2.0001, "step": 11384 }, { "epoch": 0.36732397547140516, "grad_norm": 0.36328125, "learning_rate": 2.201293818540255e-05, "loss": 2.0083, "step": 11385 }, { "epoch": 0.3673562393252015, "grad_norm": 0.3671875, "learning_rate": 2.2011547001202207e-05, "loss": 2.018, "step": 11386 }, { "epoch": 0.36738850317899785, "grad_norm": 0.3515625, "learning_rate": 2.2010155739823786e-05, "loss": 2.0137, "step": 11387 }, { "epoch": 0.3674207670327942, "grad_norm": 0.37109375, "learning_rate": 2.20087644012826e-05, "loss": 1.9954, "step": 11388 }, { "epoch": 0.36745303088659054, "grad_norm": 0.34375, "learning_rate": 2.2007372985593963e-05, "loss": 1.9912, "step": 11389 }, { "epoch": 0.3674852947403869, "grad_norm": 0.41015625, "learning_rate": 2.2005981492773188e-05, "loss": 2.0499, "step": 11390 }, { "epoch": 0.3675175585941832, "grad_norm": 0.396484375, "learning_rate": 2.2004589922835595e-05, "loss": 2.0532, "step": 11391 }, { "epoch": 0.36754982244797957, "grad_norm": 0.50390625, "learning_rate": 2.2003198275796507e-05, "loss": 2.0602, "step": 11392 }, { "epoch": 0.3675820863017759, "grad_norm": 0.439453125, "learning_rate": 2.200180655167123e-05, "loss": 2.059, "step": 11393 }, { "epoch": 0.36761435015557226, "grad_norm": 0.58203125, "learning_rate": 2.2000414750475093e-05, "loss": 2.054, "step": 11394 }, { "epoch": 0.3676466140093686, "grad_norm": 0.66796875, "learning_rate": 2.1999022872223406e-05, "loss": 2.0907, "step": 11395 }, { "epoch": 0.36767887786316494, "grad_norm": 0.96875, "learning_rate": 2.199763091693151e-05, "loss": 2.0821, "step": 11396 }, { "epoch": 0.3677111417169613, "grad_norm": 0.6640625, "learning_rate": 2.1996238884614707e-05, "loss": 2.1175, "step": 11397 }, { "epoch": 0.36774340557075763, "grad_norm": 0.7578125, "learning_rate": 2.199484677528832e-05, "loss": 2.108, "step": 11398 }, { "epoch": 0.36777566942455403, "grad_norm": 0.54296875, "learning_rate": 2.199345458896768e-05, "loss": 2.0835, "step": 11399 }, { "epoch": 0.3678079332783504, "grad_norm": 0.6484375, "learning_rate": 2.199206232566811e-05, "loss": 2.0866, "step": 11400 }, { "epoch": 0.3678401971321467, "grad_norm": 0.5390625, "learning_rate": 2.1990669985404932e-05, "loss": 2.0402, "step": 11401 }, { "epoch": 0.36787246098594306, "grad_norm": 0.6015625, "learning_rate": 2.1989277568193476e-05, "loss": 2.1148, "step": 11402 }, { "epoch": 0.3679047248397394, "grad_norm": 0.5, "learning_rate": 2.198788507404907e-05, "loss": 2.109, "step": 11403 }, { "epoch": 0.36793698869353575, "grad_norm": 0.498046875, "learning_rate": 2.1986492502987034e-05, "loss": 2.0677, "step": 11404 }, { "epoch": 0.3679692525473321, "grad_norm": 0.45703125, "learning_rate": 2.1985099855022702e-05, "loss": 2.0741, "step": 11405 }, { "epoch": 0.36800151640112844, "grad_norm": 0.44140625, "learning_rate": 2.1983707130171403e-05, "loss": 2.0607, "step": 11406 }, { "epoch": 0.3680337802549248, "grad_norm": 0.482421875, "learning_rate": 2.198231432844846e-05, "loss": 2.0828, "step": 11407 }, { "epoch": 0.3680660441087211, "grad_norm": 0.435546875, "learning_rate": 2.1980921449869216e-05, "loss": 2.1052, "step": 11408 }, { "epoch": 0.36809830796251747, "grad_norm": 0.44921875, "learning_rate": 2.1979528494448997e-05, "loss": 2.1003, "step": 11409 }, { "epoch": 0.3681305718163138, "grad_norm": 0.4921875, "learning_rate": 2.1978135462203133e-05, "loss": 2.0978, "step": 11410 }, { "epoch": 0.36816283567011016, "grad_norm": 0.4375, "learning_rate": 2.197674235314696e-05, "loss": 2.0982, "step": 11411 }, { "epoch": 0.3681950995239065, "grad_norm": 0.4609375, "learning_rate": 2.1975349167295807e-05, "loss": 2.0658, "step": 11412 }, { "epoch": 0.36822736337770284, "grad_norm": 0.4140625, "learning_rate": 2.1973955904665022e-05, "loss": 2.0743, "step": 11413 }, { "epoch": 0.3682596272314992, "grad_norm": 0.4609375, "learning_rate": 2.1972562565269934e-05, "loss": 2.0911, "step": 11414 }, { "epoch": 0.36829189108529553, "grad_norm": 0.408203125, "learning_rate": 2.197116914912588e-05, "loss": 2.0706, "step": 11415 }, { "epoch": 0.3683241549390919, "grad_norm": 0.44921875, "learning_rate": 2.196977565624819e-05, "loss": 2.0556, "step": 11416 }, { "epoch": 0.3683564187928882, "grad_norm": 0.419921875, "learning_rate": 2.1968382086652218e-05, "loss": 2.0575, "step": 11417 }, { "epoch": 0.36838868264668456, "grad_norm": 0.412109375, "learning_rate": 2.1966988440353285e-05, "loss": 2.0711, "step": 11418 }, { "epoch": 0.3684209465004809, "grad_norm": 0.40234375, "learning_rate": 2.196559471736675e-05, "loss": 2.1162, "step": 11419 }, { "epoch": 0.3684532103542773, "grad_norm": 0.390625, "learning_rate": 2.1964200917707945e-05, "loss": 2.0591, "step": 11420 }, { "epoch": 0.36848547420807365, "grad_norm": 0.40234375, "learning_rate": 2.1962807041392208e-05, "loss": 2.0592, "step": 11421 }, { "epoch": 0.36851773806187, "grad_norm": 0.380859375, "learning_rate": 2.1961413088434887e-05, "loss": 2.0981, "step": 11422 }, { "epoch": 0.36855000191566634, "grad_norm": 0.396484375, "learning_rate": 2.1960019058851326e-05, "loss": 2.0942, "step": 11423 }, { "epoch": 0.3685822657694627, "grad_norm": 0.380859375, "learning_rate": 2.1958624952656867e-05, "loss": 2.073, "step": 11424 }, { "epoch": 0.368614529623259, "grad_norm": 0.365234375, "learning_rate": 2.1957230769866863e-05, "loss": 2.0573, "step": 11425 }, { "epoch": 0.36864679347705537, "grad_norm": 0.376953125, "learning_rate": 2.1955836510496646e-05, "loss": 2.0734, "step": 11426 }, { "epoch": 0.3686790573308517, "grad_norm": 0.384765625, "learning_rate": 2.1954442174561574e-05, "loss": 2.0756, "step": 11427 }, { "epoch": 0.36871132118464806, "grad_norm": 0.390625, "learning_rate": 2.195304776207699e-05, "loss": 2.0948, "step": 11428 }, { "epoch": 0.3687435850384444, "grad_norm": 0.3671875, "learning_rate": 2.195165327305824e-05, "loss": 2.061, "step": 11429 }, { "epoch": 0.36877584889224074, "grad_norm": 0.390625, "learning_rate": 2.195025870752069e-05, "loss": 2.0693, "step": 11430 }, { "epoch": 0.3688081127460371, "grad_norm": 0.365234375, "learning_rate": 2.1948864065479673e-05, "loss": 2.0898, "step": 11431 }, { "epoch": 0.36884037659983343, "grad_norm": 0.408203125, "learning_rate": 2.194746934695054e-05, "loss": 2.0361, "step": 11432 }, { "epoch": 0.3688726404536298, "grad_norm": 0.38671875, "learning_rate": 2.1946074551948653e-05, "loss": 2.0991, "step": 11433 }, { "epoch": 0.3689049043074261, "grad_norm": 0.412109375, "learning_rate": 2.1944679680489362e-05, "loss": 2.0663, "step": 11434 }, { "epoch": 0.36893716816122246, "grad_norm": 0.39453125, "learning_rate": 2.1943284732588015e-05, "loss": 2.0914, "step": 11435 }, { "epoch": 0.3689694320150188, "grad_norm": 0.390625, "learning_rate": 2.1941889708259976e-05, "loss": 2.0746, "step": 11436 }, { "epoch": 0.36900169586881515, "grad_norm": 0.41796875, "learning_rate": 2.1940494607520593e-05, "loss": 2.0809, "step": 11437 }, { "epoch": 0.3690339597226115, "grad_norm": 0.4140625, "learning_rate": 2.193909943038522e-05, "loss": 2.0878, "step": 11438 }, { "epoch": 0.36906622357640784, "grad_norm": 0.40625, "learning_rate": 2.193770417686922e-05, "loss": 2.0577, "step": 11439 }, { "epoch": 0.36909848743020424, "grad_norm": 0.423828125, "learning_rate": 2.1936308846987957e-05, "loss": 2.0596, "step": 11440 }, { "epoch": 0.3691307512840006, "grad_norm": 0.400390625, "learning_rate": 2.1934913440756777e-05, "loss": 2.0904, "step": 11441 }, { "epoch": 0.3691630151377969, "grad_norm": 0.3984375, "learning_rate": 2.1933517958191045e-05, "loss": 2.0844, "step": 11442 }, { "epoch": 0.36919527899159327, "grad_norm": 0.37890625, "learning_rate": 2.1932122399306117e-05, "loss": 2.0751, "step": 11443 }, { "epoch": 0.3692275428453896, "grad_norm": 0.396484375, "learning_rate": 2.1930726764117364e-05, "loss": 2.1007, "step": 11444 }, { "epoch": 0.36925980669918596, "grad_norm": 0.3828125, "learning_rate": 2.1929331052640145e-05, "loss": 2.0863, "step": 11445 }, { "epoch": 0.3692920705529823, "grad_norm": 0.494140625, "learning_rate": 2.1927935264889818e-05, "loss": 2.0636, "step": 11446 }, { "epoch": 0.36932433440677864, "grad_norm": 0.46484375, "learning_rate": 2.1926539400881745e-05, "loss": 2.0861, "step": 11447 }, { "epoch": 0.369356598260575, "grad_norm": 0.46484375, "learning_rate": 2.1925143460631305e-05, "loss": 2.088, "step": 11448 }, { "epoch": 0.36938886211437133, "grad_norm": 0.435546875, "learning_rate": 2.1923747444153845e-05, "loss": 2.0986, "step": 11449 }, { "epoch": 0.3694211259681677, "grad_norm": 0.45703125, "learning_rate": 2.1922351351464742e-05, "loss": 2.0926, "step": 11450 }, { "epoch": 0.369453389821964, "grad_norm": 0.400390625, "learning_rate": 2.1920955182579367e-05, "loss": 2.0978, "step": 11451 }, { "epoch": 0.36948565367576036, "grad_norm": 0.4296875, "learning_rate": 2.191955893751308e-05, "loss": 2.0628, "step": 11452 }, { "epoch": 0.3695179175295567, "grad_norm": 0.423828125, "learning_rate": 2.1918162616281246e-05, "loss": 2.0398, "step": 11453 }, { "epoch": 0.36955018138335305, "grad_norm": 0.4140625, "learning_rate": 2.1916766218899247e-05, "loss": 2.0696, "step": 11454 }, { "epoch": 0.3695824452371494, "grad_norm": 0.37890625, "learning_rate": 2.1915369745382446e-05, "loss": 2.0434, "step": 11455 }, { "epoch": 0.36961470909094574, "grad_norm": 0.40234375, "learning_rate": 2.1913973195746217e-05, "loss": 2.1108, "step": 11456 }, { "epoch": 0.3696469729447421, "grad_norm": 0.396484375, "learning_rate": 2.191257657000593e-05, "loss": 2.0627, "step": 11457 }, { "epoch": 0.3696792367985384, "grad_norm": 0.396484375, "learning_rate": 2.1911179868176957e-05, "loss": 2.0756, "step": 11458 }, { "epoch": 0.36971150065233477, "grad_norm": 0.373046875, "learning_rate": 2.190978309027468e-05, "loss": 2.0561, "step": 11459 }, { "epoch": 0.36974376450613117, "grad_norm": 0.375, "learning_rate": 2.1908386236314464e-05, "loss": 2.0639, "step": 11460 }, { "epoch": 0.3697760283599275, "grad_norm": 0.408203125, "learning_rate": 2.190698930631169e-05, "loss": 2.0913, "step": 11461 }, { "epoch": 0.36980829221372385, "grad_norm": 0.37890625, "learning_rate": 2.1905592300281735e-05, "loss": 2.0975, "step": 11462 }, { "epoch": 0.3698405560675202, "grad_norm": 0.419921875, "learning_rate": 2.1904195218239966e-05, "loss": 2.0619, "step": 11463 }, { "epoch": 0.36987281992131654, "grad_norm": 0.431640625, "learning_rate": 2.1902798060201773e-05, "loss": 2.0602, "step": 11464 }, { "epoch": 0.3699050837751129, "grad_norm": 0.482421875, "learning_rate": 2.1901400826182533e-05, "loss": 2.0673, "step": 11465 }, { "epoch": 0.36993734762890923, "grad_norm": 0.43359375, "learning_rate": 2.1900003516197625e-05, "loss": 2.0792, "step": 11466 }, { "epoch": 0.3699696114827056, "grad_norm": 0.404296875, "learning_rate": 2.1898606130262423e-05, "loss": 2.1073, "step": 11467 }, { "epoch": 0.3700018753365019, "grad_norm": 0.46484375, "learning_rate": 2.189720866839232e-05, "loss": 2.0942, "step": 11468 }, { "epoch": 0.37003413919029826, "grad_norm": 0.42578125, "learning_rate": 2.1895811130602687e-05, "loss": 2.0833, "step": 11469 }, { "epoch": 0.3700664030440946, "grad_norm": 0.390625, "learning_rate": 2.189441351690891e-05, "loss": 2.0739, "step": 11470 }, { "epoch": 0.37009866689789095, "grad_norm": 0.416015625, "learning_rate": 2.1893015827326383e-05, "loss": 2.0849, "step": 11471 }, { "epoch": 0.3701309307516873, "grad_norm": 0.404296875, "learning_rate": 2.1891618061870476e-05, "loss": 2.1175, "step": 11472 }, { "epoch": 0.37016319460548364, "grad_norm": 0.39453125, "learning_rate": 2.1890220220556586e-05, "loss": 2.1238, "step": 11473 }, { "epoch": 0.37019545845928, "grad_norm": 0.419921875, "learning_rate": 2.1888822303400093e-05, "loss": 2.0785, "step": 11474 }, { "epoch": 0.3702277223130763, "grad_norm": 0.369140625, "learning_rate": 2.1887424310416385e-05, "loss": 2.0464, "step": 11475 }, { "epoch": 0.37025998616687267, "grad_norm": 0.38671875, "learning_rate": 2.1886026241620852e-05, "loss": 2.0821, "step": 11476 }, { "epoch": 0.370292250020669, "grad_norm": 0.375, "learning_rate": 2.188462809702888e-05, "loss": 2.0877, "step": 11477 }, { "epoch": 0.37032451387446536, "grad_norm": 0.38671875, "learning_rate": 2.1883229876655865e-05, "loss": 2.0802, "step": 11478 }, { "epoch": 0.3703567777282617, "grad_norm": 0.37890625, "learning_rate": 2.188183158051719e-05, "loss": 1.9888, "step": 11479 }, { "epoch": 0.3703890415820581, "grad_norm": 0.365234375, "learning_rate": 2.1880433208628252e-05, "loss": 1.9677, "step": 11480 }, { "epoch": 0.37042130543585444, "grad_norm": 0.37890625, "learning_rate": 2.187903476100444e-05, "loss": 1.9608, "step": 11481 }, { "epoch": 0.3704535692896508, "grad_norm": 0.33984375, "learning_rate": 2.1877636237661152e-05, "loss": 1.9181, "step": 11482 }, { "epoch": 0.37048583314344713, "grad_norm": 0.37890625, "learning_rate": 2.1876237638613776e-05, "loss": 1.9599, "step": 11483 }, { "epoch": 0.3705180969972435, "grad_norm": 0.345703125, "learning_rate": 2.1874838963877707e-05, "loss": 1.9878, "step": 11484 }, { "epoch": 0.3705503608510398, "grad_norm": 0.375, "learning_rate": 2.1873440213468348e-05, "loss": 1.9688, "step": 11485 }, { "epoch": 0.37058262470483616, "grad_norm": 0.359375, "learning_rate": 2.187204138740108e-05, "loss": 1.9762, "step": 11486 }, { "epoch": 0.3706148885586325, "grad_norm": 0.373046875, "learning_rate": 2.1870642485691323e-05, "loss": 1.9774, "step": 11487 }, { "epoch": 0.37064715241242885, "grad_norm": 0.37109375, "learning_rate": 2.1869243508354455e-05, "loss": 1.9815, "step": 11488 }, { "epoch": 0.3706794162662252, "grad_norm": 0.36328125, "learning_rate": 2.1867844455405883e-05, "loss": 2.0048, "step": 11489 }, { "epoch": 0.37071168012002154, "grad_norm": 0.353515625, "learning_rate": 2.1866445326861006e-05, "loss": 1.9719, "step": 11490 }, { "epoch": 0.3707439439738179, "grad_norm": 0.361328125, "learning_rate": 2.1865046122735224e-05, "loss": 1.9903, "step": 11491 }, { "epoch": 0.3707762078276142, "grad_norm": 0.39453125, "learning_rate": 2.186364684304394e-05, "loss": 1.9856, "step": 11492 }, { "epoch": 0.37080847168141057, "grad_norm": 0.400390625, "learning_rate": 2.186224748780256e-05, "loss": 1.9665, "step": 11493 }, { "epoch": 0.3708407355352069, "grad_norm": 0.380859375, "learning_rate": 2.1860848057026477e-05, "loss": 2.0113, "step": 11494 }, { "epoch": 0.37087299938900326, "grad_norm": 0.404296875, "learning_rate": 2.1859448550731103e-05, "loss": 1.9374, "step": 11495 }, { "epoch": 0.3709052632427996, "grad_norm": 0.423828125, "learning_rate": 2.185804896893184e-05, "loss": 2.0012, "step": 11496 }, { "epoch": 0.37093752709659594, "grad_norm": 0.36328125, "learning_rate": 2.1856649311644093e-05, "loss": 2.0022, "step": 11497 }, { "epoch": 0.3709697909503923, "grad_norm": 0.4140625, "learning_rate": 2.1855249578883273e-05, "loss": 1.9654, "step": 11498 }, { "epoch": 0.37100205480418863, "grad_norm": 0.353515625, "learning_rate": 2.185384977066478e-05, "loss": 1.9982, "step": 11499 }, { "epoch": 0.371034318657985, "grad_norm": 0.419921875, "learning_rate": 2.1852449887004027e-05, "loss": 1.9941, "step": 11500 }, { "epoch": 0.3710665825117814, "grad_norm": 0.419921875, "learning_rate": 2.185104992791642e-05, "loss": 1.9708, "step": 11501 }, { "epoch": 0.3710988463655777, "grad_norm": 0.34375, "learning_rate": 2.1849649893417372e-05, "loss": 1.9699, "step": 11502 }, { "epoch": 0.37113111021937406, "grad_norm": 0.408203125, "learning_rate": 2.184824978352229e-05, "loss": 1.9808, "step": 11503 }, { "epoch": 0.3711633740731704, "grad_norm": 0.3671875, "learning_rate": 2.184684959824659e-05, "loss": 1.9796, "step": 11504 }, { "epoch": 0.37119563792696675, "grad_norm": 0.4453125, "learning_rate": 2.184544933760568e-05, "loss": 2.0105, "step": 11505 }, { "epoch": 0.3712279017807631, "grad_norm": 0.361328125, "learning_rate": 2.184404900161497e-05, "loss": 1.991, "step": 11506 }, { "epoch": 0.37126016563455944, "grad_norm": 0.40625, "learning_rate": 2.184264859028989e-05, "loss": 1.9722, "step": 11507 }, { "epoch": 0.3712924294883558, "grad_norm": 0.3828125, "learning_rate": 2.1841248103645833e-05, "loss": 2.0019, "step": 11508 }, { "epoch": 0.3713246933421521, "grad_norm": 0.3828125, "learning_rate": 2.1839847541698226e-05, "loss": 1.9765, "step": 11509 }, { "epoch": 0.37135695719594847, "grad_norm": 0.375, "learning_rate": 2.183844690446249e-05, "loss": 1.973, "step": 11510 }, { "epoch": 0.3713892210497448, "grad_norm": 0.373046875, "learning_rate": 2.1837046191954027e-05, "loss": 1.9709, "step": 11511 }, { "epoch": 0.37142148490354115, "grad_norm": 0.37109375, "learning_rate": 2.1835645404188274e-05, "loss": 1.9867, "step": 11512 }, { "epoch": 0.3714537487573375, "grad_norm": 0.37109375, "learning_rate": 2.1834244541180635e-05, "loss": 2.0241, "step": 11513 }, { "epoch": 0.37148601261113384, "grad_norm": 0.40234375, "learning_rate": 2.1832843602946536e-05, "loss": 2.002, "step": 11514 }, { "epoch": 0.3715182764649302, "grad_norm": 0.369140625, "learning_rate": 2.1831442589501396e-05, "loss": 1.9908, "step": 11515 }, { "epoch": 0.37155054031872653, "grad_norm": 0.390625, "learning_rate": 2.183004150086064e-05, "loss": 1.9901, "step": 11516 }, { "epoch": 0.3715828041725229, "grad_norm": 0.36328125, "learning_rate": 2.1828640337039684e-05, "loss": 1.981, "step": 11517 }, { "epoch": 0.3716150680263192, "grad_norm": 0.375, "learning_rate": 2.1827239098053957e-05, "loss": 1.9951, "step": 11518 }, { "epoch": 0.37164733188011556, "grad_norm": 0.39453125, "learning_rate": 2.1825837783918874e-05, "loss": 1.9492, "step": 11519 }, { "epoch": 0.3716795957339119, "grad_norm": 0.349609375, "learning_rate": 2.1824436394649868e-05, "loss": 1.9914, "step": 11520 }, { "epoch": 0.3717118595877083, "grad_norm": 0.38671875, "learning_rate": 2.1823034930262365e-05, "loss": 1.9783, "step": 11521 }, { "epoch": 0.37174412344150465, "grad_norm": 0.373046875, "learning_rate": 2.1821633390771787e-05, "loss": 1.9831, "step": 11522 }, { "epoch": 0.371776387295301, "grad_norm": 0.40625, "learning_rate": 2.182023177619356e-05, "loss": 1.9953, "step": 11523 }, { "epoch": 0.37180865114909734, "grad_norm": 0.388671875, "learning_rate": 2.181883008654312e-05, "loss": 1.9923, "step": 11524 }, { "epoch": 0.3718409150028937, "grad_norm": 0.376953125, "learning_rate": 2.1817428321835883e-05, "loss": 1.9775, "step": 11525 }, { "epoch": 0.37187317885669, "grad_norm": 0.3515625, "learning_rate": 2.1816026482087284e-05, "loss": 1.9508, "step": 11526 }, { "epoch": 0.37190544271048637, "grad_norm": 0.41015625, "learning_rate": 2.1814624567312763e-05, "loss": 1.9804, "step": 11527 }, { "epoch": 0.3719377065642827, "grad_norm": 0.494140625, "learning_rate": 2.181322257752774e-05, "loss": 2.0937, "step": 11528 }, { "epoch": 0.37196997041807905, "grad_norm": 0.47265625, "learning_rate": 2.1811820512747652e-05, "loss": 2.0896, "step": 11529 }, { "epoch": 0.3720022342718754, "grad_norm": 0.408203125, "learning_rate": 2.1810418372987932e-05, "loss": 2.0812, "step": 11530 }, { "epoch": 0.37203449812567174, "grad_norm": 0.40625, "learning_rate": 2.180901615826401e-05, "loss": 2.0572, "step": 11531 }, { "epoch": 0.3720667619794681, "grad_norm": 0.419921875, "learning_rate": 2.1807613868591325e-05, "loss": 2.071, "step": 11532 }, { "epoch": 0.37209902583326443, "grad_norm": 0.40625, "learning_rate": 2.180621150398531e-05, "loss": 2.0657, "step": 11533 }, { "epoch": 0.3721312896870608, "grad_norm": 0.41796875, "learning_rate": 2.1804809064461396e-05, "loss": 2.0777, "step": 11534 }, { "epoch": 0.3721635535408571, "grad_norm": 0.4375, "learning_rate": 2.1803406550035036e-05, "loss": 2.0514, "step": 11535 }, { "epoch": 0.37219581739465346, "grad_norm": 0.443359375, "learning_rate": 2.180200396072165e-05, "loss": 2.0617, "step": 11536 }, { "epoch": 0.3722280812484498, "grad_norm": 0.43359375, "learning_rate": 2.180060129653668e-05, "loss": 2.0354, "step": 11537 }, { "epoch": 0.37226034510224615, "grad_norm": 0.42578125, "learning_rate": 2.1799198557495575e-05, "loss": 2.0617, "step": 11538 }, { "epoch": 0.3722926089560425, "grad_norm": 0.455078125, "learning_rate": 2.1797795743613775e-05, "loss": 2.1208, "step": 11539 }, { "epoch": 0.37232487280983884, "grad_norm": 0.435546875, "learning_rate": 2.1796392854906706e-05, "loss": 2.09, "step": 11540 }, { "epoch": 0.37235713666363524, "grad_norm": 0.421875, "learning_rate": 2.179498989138983e-05, "loss": 2.0703, "step": 11541 }, { "epoch": 0.3723894005174316, "grad_norm": 0.4453125, "learning_rate": 2.179358685307857e-05, "loss": 2.0541, "step": 11542 }, { "epoch": 0.3724216643712279, "grad_norm": 0.3828125, "learning_rate": 2.1792183739988387e-05, "loss": 2.0873, "step": 11543 }, { "epoch": 0.37245392822502427, "grad_norm": 0.419921875, "learning_rate": 2.1790780552134717e-05, "loss": 2.0467, "step": 11544 }, { "epoch": 0.3724861920788206, "grad_norm": 0.36328125, "learning_rate": 2.1789377289533004e-05, "loss": 2.0765, "step": 11545 }, { "epoch": 0.37251845593261695, "grad_norm": 0.453125, "learning_rate": 2.1787973952198696e-05, "loss": 2.0794, "step": 11546 }, { "epoch": 0.3725507197864133, "grad_norm": 0.380859375, "learning_rate": 2.1786570540147246e-05, "loss": 2.0881, "step": 11547 }, { "epoch": 0.37258298364020964, "grad_norm": 0.4140625, "learning_rate": 2.178516705339409e-05, "loss": 2.0608, "step": 11548 }, { "epoch": 0.372615247494006, "grad_norm": 0.375, "learning_rate": 2.1783763491954688e-05, "loss": 2.078, "step": 11549 }, { "epoch": 0.37264751134780233, "grad_norm": 0.404296875, "learning_rate": 2.178235985584448e-05, "loss": 2.075, "step": 11550 }, { "epoch": 0.3726797752015987, "grad_norm": 0.4296875, "learning_rate": 2.178095614507892e-05, "loss": 2.09, "step": 11551 }, { "epoch": 0.372712039055395, "grad_norm": 0.466796875, "learning_rate": 2.1779552359673465e-05, "loss": 2.0834, "step": 11552 }, { "epoch": 0.37274430290919136, "grad_norm": 0.37890625, "learning_rate": 2.1778148499643555e-05, "loss": 2.0773, "step": 11553 }, { "epoch": 0.3727765667629877, "grad_norm": 0.408203125, "learning_rate": 2.1776744565004652e-05, "loss": 2.0746, "step": 11554 }, { "epoch": 0.37280883061678405, "grad_norm": 0.380859375, "learning_rate": 2.1775340555772206e-05, "loss": 2.059, "step": 11555 }, { "epoch": 0.3728410944705804, "grad_norm": 0.40234375, "learning_rate": 2.177393647196167e-05, "loss": 2.0255, "step": 11556 }, { "epoch": 0.37287335832437674, "grad_norm": 0.427734375, "learning_rate": 2.1772532313588503e-05, "loss": 2.0744, "step": 11557 }, { "epoch": 0.3729056221781731, "grad_norm": 0.54296875, "learning_rate": 2.1771128080668157e-05, "loss": 2.074, "step": 11558 }, { "epoch": 0.3729378860319694, "grad_norm": 0.416015625, "learning_rate": 2.176972377321609e-05, "loss": 2.0911, "step": 11559 }, { "epoch": 0.37297014988576577, "grad_norm": 0.490234375, "learning_rate": 2.1768319391247762e-05, "loss": 2.0734, "step": 11560 }, { "epoch": 0.37300241373956217, "grad_norm": 0.4140625, "learning_rate": 2.1766914934778633e-05, "loss": 2.0698, "step": 11561 }, { "epoch": 0.3730346775933585, "grad_norm": 0.4375, "learning_rate": 2.1765510403824155e-05, "loss": 2.0741, "step": 11562 }, { "epoch": 0.37306694144715485, "grad_norm": 0.4375, "learning_rate": 2.1764105798399796e-05, "loss": 2.054, "step": 11563 }, { "epoch": 0.3730992053009512, "grad_norm": 0.3984375, "learning_rate": 2.1762701118521007e-05, "loss": 2.0827, "step": 11564 }, { "epoch": 0.37313146915474754, "grad_norm": 0.427734375, "learning_rate": 2.1761296364203256e-05, "loss": 2.0575, "step": 11565 }, { "epoch": 0.3731637330085439, "grad_norm": 0.3828125, "learning_rate": 2.1759891535462012e-05, "loss": 2.0749, "step": 11566 }, { "epoch": 0.37319599686234023, "grad_norm": 0.41796875, "learning_rate": 2.1758486632312724e-05, "loss": 2.0588, "step": 11567 }, { "epoch": 0.3732282607161366, "grad_norm": 0.390625, "learning_rate": 2.1757081654770868e-05, "loss": 2.0654, "step": 11568 }, { "epoch": 0.3732605245699329, "grad_norm": 0.384765625, "learning_rate": 2.1755676602851905e-05, "loss": 2.0317, "step": 11569 }, { "epoch": 0.37329278842372926, "grad_norm": 0.37890625, "learning_rate": 2.17542714765713e-05, "loss": 2.0743, "step": 11570 }, { "epoch": 0.3733250522775256, "grad_norm": 0.369140625, "learning_rate": 2.1752866275944513e-05, "loss": 2.0378, "step": 11571 }, { "epoch": 0.37335731613132195, "grad_norm": 0.404296875, "learning_rate": 2.1751461000987027e-05, "loss": 2.0983, "step": 11572 }, { "epoch": 0.3733895799851183, "grad_norm": 0.388671875, "learning_rate": 2.1750055651714303e-05, "loss": 2.06, "step": 11573 }, { "epoch": 0.37342184383891464, "grad_norm": 0.462890625, "learning_rate": 2.17486502281418e-05, "loss": 2.0695, "step": 11574 }, { "epoch": 0.373454107692711, "grad_norm": 0.412109375, "learning_rate": 2.1747244730285005e-05, "loss": 2.078, "step": 11575 }, { "epoch": 0.3734863715465073, "grad_norm": 0.462890625, "learning_rate": 2.174583915815938e-05, "loss": 2.0829, "step": 11576 }, { "epoch": 0.37351863540030367, "grad_norm": 0.388671875, "learning_rate": 2.174443351178039e-05, "loss": 2.0648, "step": 11577 }, { "epoch": 0.3735508992541, "grad_norm": 0.447265625, "learning_rate": 2.174302779116352e-05, "loss": 2.0353, "step": 11578 }, { "epoch": 0.37358316310789635, "grad_norm": 0.42578125, "learning_rate": 2.174162199632424e-05, "loss": 2.0702, "step": 11579 }, { "epoch": 0.3736154269616927, "grad_norm": 0.44140625, "learning_rate": 2.1740216127278016e-05, "loss": 2.0756, "step": 11580 }, { "epoch": 0.37364769081548904, "grad_norm": 0.4453125, "learning_rate": 2.1738810184040327e-05, "loss": 2.0396, "step": 11581 }, { "epoch": 0.37367995466928544, "grad_norm": 0.431640625, "learning_rate": 2.173740416662665e-05, "loss": 2.0524, "step": 11582 }, { "epoch": 0.3737122185230818, "grad_norm": 0.435546875, "learning_rate": 2.173599807505247e-05, "loss": 2.0511, "step": 11583 }, { "epoch": 0.37374448237687813, "grad_norm": 0.396484375, "learning_rate": 2.1734591909333246e-05, "loss": 2.0498, "step": 11584 }, { "epoch": 0.3737767462306745, "grad_norm": 0.455078125, "learning_rate": 2.173318566948447e-05, "loss": 2.0608, "step": 11585 }, { "epoch": 0.3738090100844708, "grad_norm": 0.44140625, "learning_rate": 2.173177935552162e-05, "loss": 2.0779, "step": 11586 }, { "epoch": 0.37384127393826716, "grad_norm": 0.37890625, "learning_rate": 2.173037296746017e-05, "loss": 2.0568, "step": 11587 }, { "epoch": 0.3738735377920635, "grad_norm": 0.4296875, "learning_rate": 2.1728966505315592e-05, "loss": 2.0566, "step": 11588 }, { "epoch": 0.37390580164585985, "grad_norm": 0.37890625, "learning_rate": 2.172755996910339e-05, "loss": 2.0666, "step": 11589 }, { "epoch": 0.3739380654996562, "grad_norm": 0.40625, "learning_rate": 2.1726153358839034e-05, "loss": 2.0738, "step": 11590 }, { "epoch": 0.37397032935345254, "grad_norm": 0.4296875, "learning_rate": 2.1724746674538e-05, "loss": 2.0859, "step": 11591 }, { "epoch": 0.3740025932072489, "grad_norm": 0.390625, "learning_rate": 2.1723339916215785e-05, "loss": 2.0642, "step": 11592 }, { "epoch": 0.3740348570610452, "grad_norm": 0.4296875, "learning_rate": 2.1721933083887866e-05, "loss": 2.0874, "step": 11593 }, { "epoch": 0.37406712091484157, "grad_norm": 0.384765625, "learning_rate": 2.1720526177569728e-05, "loss": 2.0706, "step": 11594 }, { "epoch": 0.3740993847686379, "grad_norm": 0.462890625, "learning_rate": 2.1719119197276862e-05, "loss": 2.0632, "step": 11595 }, { "epoch": 0.37413164862243425, "grad_norm": 0.455078125, "learning_rate": 2.1717712143024752e-05, "loss": 2.0688, "step": 11596 }, { "epoch": 0.3741639124762306, "grad_norm": 0.443359375, "learning_rate": 2.1716305014828885e-05, "loss": 2.0493, "step": 11597 }, { "epoch": 0.37419617633002694, "grad_norm": 0.416015625, "learning_rate": 2.1714897812704748e-05, "loss": 2.0515, "step": 11598 }, { "epoch": 0.3742284401838233, "grad_norm": 0.431640625, "learning_rate": 2.1713490536667835e-05, "loss": 2.101, "step": 11599 }, { "epoch": 0.37426070403761963, "grad_norm": 0.451171875, "learning_rate": 2.1712083186733636e-05, "loss": 2.0372, "step": 11600 }, { "epoch": 0.374292967891416, "grad_norm": 0.431640625, "learning_rate": 2.171067576291764e-05, "loss": 2.0805, "step": 11601 }, { "epoch": 0.3743252317452124, "grad_norm": 0.478515625, "learning_rate": 2.1709268265235338e-05, "loss": 2.085, "step": 11602 }, { "epoch": 0.3743574955990087, "grad_norm": 0.453125, "learning_rate": 2.1707860693702226e-05, "loss": 2.0948, "step": 11603 }, { "epoch": 0.37438975945280506, "grad_norm": 0.46484375, "learning_rate": 2.1706453048333795e-05, "loss": 2.0894, "step": 11604 }, { "epoch": 0.3744220233066014, "grad_norm": 0.4296875, "learning_rate": 2.1705045329145537e-05, "loss": 2.0732, "step": 11605 }, { "epoch": 0.37445428716039775, "grad_norm": 0.44140625, "learning_rate": 2.1703637536152954e-05, "loss": 2.0626, "step": 11606 }, { "epoch": 0.3744865510141941, "grad_norm": 0.423828125, "learning_rate": 2.170222966937154e-05, "loss": 2.0882, "step": 11607 }, { "epoch": 0.37451881486799043, "grad_norm": 0.41015625, "learning_rate": 2.1700821728816785e-05, "loss": 2.0706, "step": 11608 }, { "epoch": 0.3745510787217868, "grad_norm": 0.404296875, "learning_rate": 2.1699413714504194e-05, "loss": 2.0889, "step": 11609 }, { "epoch": 0.3745833425755831, "grad_norm": 1.921875, "learning_rate": 2.1698005626449262e-05, "loss": 2.0907, "step": 11610 }, { "epoch": 0.37461560642937947, "grad_norm": 0.435546875, "learning_rate": 2.1696597464667493e-05, "loss": 2.0549, "step": 11611 }, { "epoch": 0.3746478702831758, "grad_norm": 0.390625, "learning_rate": 2.1695189229174378e-05, "loss": 2.0997, "step": 11612 }, { "epoch": 0.37468013413697215, "grad_norm": 0.423828125, "learning_rate": 2.1693780919985423e-05, "loss": 2.0972, "step": 11613 }, { "epoch": 0.3747123979907685, "grad_norm": 0.41015625, "learning_rate": 2.1692372537116134e-05, "loss": 2.0196, "step": 11614 }, { "epoch": 0.37474466184456484, "grad_norm": 0.40625, "learning_rate": 2.1690964080582006e-05, "loss": 1.994, "step": 11615 }, { "epoch": 0.3747769256983612, "grad_norm": 0.423828125, "learning_rate": 2.1689555550398547e-05, "loss": 1.9766, "step": 11616 }, { "epoch": 0.37480918955215753, "grad_norm": 0.41796875, "learning_rate": 2.168814694658126e-05, "loss": 1.9846, "step": 11617 }, { "epoch": 0.3748414534059539, "grad_norm": 0.404296875, "learning_rate": 2.168673826914565e-05, "loss": 1.9891, "step": 11618 }, { "epoch": 0.3748737172597502, "grad_norm": 0.42578125, "learning_rate": 2.1685329518107218e-05, "loss": 1.9963, "step": 11619 }, { "epoch": 0.37490598111354656, "grad_norm": 0.462890625, "learning_rate": 2.168392069348148e-05, "loss": 1.9859, "step": 11620 }, { "epoch": 0.3749382449673429, "grad_norm": 0.421875, "learning_rate": 2.1682511795283938e-05, "loss": 1.997, "step": 11621 }, { "epoch": 0.3749705088211393, "grad_norm": 0.390625, "learning_rate": 2.168110282353009e-05, "loss": 1.9772, "step": 11622 }, { "epoch": 0.37500277267493565, "grad_norm": 0.384765625, "learning_rate": 2.1679693778235467e-05, "loss": 1.982, "step": 11623 }, { "epoch": 0.375035036528732, "grad_norm": 0.41015625, "learning_rate": 2.1678284659415565e-05, "loss": 1.9911, "step": 11624 }, { "epoch": 0.37506730038252833, "grad_norm": 0.359375, "learning_rate": 2.167687546708589e-05, "loss": 1.9907, "step": 11625 }, { "epoch": 0.3750995642363247, "grad_norm": 0.392578125, "learning_rate": 2.1675466201261967e-05, "loss": 1.9719, "step": 11626 }, { "epoch": 0.375131828090121, "grad_norm": 0.369140625, "learning_rate": 2.1674056861959293e-05, "loss": 2.0198, "step": 11627 }, { "epoch": 0.37516409194391737, "grad_norm": 0.369140625, "learning_rate": 2.1672647449193398e-05, "loss": 1.9538, "step": 11628 }, { "epoch": 0.3751963557977137, "grad_norm": 0.3515625, "learning_rate": 2.167123796297978e-05, "loss": 1.9606, "step": 11629 }, { "epoch": 0.37522861965151005, "grad_norm": 0.3828125, "learning_rate": 2.1669828403333962e-05, "loss": 1.9505, "step": 11630 }, { "epoch": 0.3752608835053064, "grad_norm": 0.3515625, "learning_rate": 2.166841877027146e-05, "loss": 1.996, "step": 11631 }, { "epoch": 0.37529314735910274, "grad_norm": 0.3828125, "learning_rate": 2.166700906380779e-05, "loss": 1.9872, "step": 11632 }, { "epoch": 0.3753254112128991, "grad_norm": 0.35546875, "learning_rate": 2.166559928395846e-05, "loss": 1.9668, "step": 11633 }, { "epoch": 0.37535767506669543, "grad_norm": 0.361328125, "learning_rate": 2.1664189430739e-05, "loss": 1.9861, "step": 11634 }, { "epoch": 0.3753899389204918, "grad_norm": 0.38671875, "learning_rate": 2.1662779504164928e-05, "loss": 2.0083, "step": 11635 }, { "epoch": 0.3754222027742881, "grad_norm": 0.345703125, "learning_rate": 2.166136950425175e-05, "loss": 1.9613, "step": 11636 }, { "epoch": 0.37545446662808446, "grad_norm": 0.373046875, "learning_rate": 2.1659959431015003e-05, "loss": 1.9816, "step": 11637 }, { "epoch": 0.3754867304818808, "grad_norm": 0.369140625, "learning_rate": 2.1658549284470195e-05, "loss": 1.9945, "step": 11638 }, { "epoch": 0.37551899433567715, "grad_norm": 0.3515625, "learning_rate": 2.1657139064632856e-05, "loss": 1.9434, "step": 11639 }, { "epoch": 0.3755512581894735, "grad_norm": 0.36328125, "learning_rate": 2.1655728771518503e-05, "loss": 1.9799, "step": 11640 }, { "epoch": 0.37558352204326984, "grad_norm": 0.353515625, "learning_rate": 2.1654318405142665e-05, "loss": 1.9765, "step": 11641 }, { "epoch": 0.37561578589706623, "grad_norm": 0.373046875, "learning_rate": 2.1652907965520862e-05, "loss": 1.9711, "step": 11642 }, { "epoch": 0.3756480497508626, "grad_norm": 0.3671875, "learning_rate": 2.1651497452668623e-05, "loss": 1.9869, "step": 11643 }, { "epoch": 0.3756803136046589, "grad_norm": 0.349609375, "learning_rate": 2.1650086866601468e-05, "loss": 2.005, "step": 11644 }, { "epoch": 0.37571257745845527, "grad_norm": 0.40234375, "learning_rate": 2.1648676207334935e-05, "loss": 2.0009, "step": 11645 }, { "epoch": 0.3757448413122516, "grad_norm": 0.423828125, "learning_rate": 2.1647265474884535e-05, "loss": 2.0684, "step": 11646 }, { "epoch": 0.37577710516604795, "grad_norm": 0.384765625, "learning_rate": 2.164585466926581e-05, "loss": 2.0738, "step": 11647 }, { "epoch": 0.3758093690198443, "grad_norm": 0.408203125, "learning_rate": 2.1644443790494287e-05, "loss": 2.0796, "step": 11648 }, { "epoch": 0.37584163287364064, "grad_norm": 0.4375, "learning_rate": 2.164303283858549e-05, "loss": 2.0359, "step": 11649 }, { "epoch": 0.375873896727437, "grad_norm": 0.486328125, "learning_rate": 2.1641621813554956e-05, "loss": 2.0199, "step": 11650 }, { "epoch": 0.37590616058123333, "grad_norm": 0.400390625, "learning_rate": 2.164021071541821e-05, "loss": 2.0763, "step": 11651 }, { "epoch": 0.37593842443502967, "grad_norm": 0.4453125, "learning_rate": 2.1638799544190795e-05, "loss": 2.0887, "step": 11652 }, { "epoch": 0.375970688288826, "grad_norm": 0.40625, "learning_rate": 2.1637388299888228e-05, "loss": 2.0786, "step": 11653 }, { "epoch": 0.37600295214262236, "grad_norm": 0.427734375, "learning_rate": 2.1635976982526057e-05, "loss": 2.0861, "step": 11654 }, { "epoch": 0.3760352159964187, "grad_norm": 0.40234375, "learning_rate": 2.163456559211981e-05, "loss": 2.0954, "step": 11655 }, { "epoch": 0.37606747985021505, "grad_norm": 0.396484375, "learning_rate": 2.163315412868503e-05, "loss": 2.0806, "step": 11656 }, { "epoch": 0.3760997437040114, "grad_norm": 0.384765625, "learning_rate": 2.163174259223724e-05, "loss": 2.0764, "step": 11657 }, { "epoch": 0.37613200755780773, "grad_norm": 0.427734375, "learning_rate": 2.163033098279199e-05, "loss": 2.082, "step": 11658 }, { "epoch": 0.3761642714116041, "grad_norm": 0.384765625, "learning_rate": 2.1628919300364813e-05, "loss": 2.0798, "step": 11659 }, { "epoch": 0.3761965352654004, "grad_norm": 0.384765625, "learning_rate": 2.162750754497125e-05, "loss": 2.0777, "step": 11660 }, { "epoch": 0.37622879911919677, "grad_norm": 0.423828125, "learning_rate": 2.162609571662683e-05, "loss": 2.0595, "step": 11661 }, { "epoch": 0.37626106297299317, "grad_norm": 0.40625, "learning_rate": 2.162468381534711e-05, "loss": 2.0568, "step": 11662 }, { "epoch": 0.3762933268267895, "grad_norm": 0.41015625, "learning_rate": 2.1623271841147624e-05, "loss": 2.1159, "step": 11663 }, { "epoch": 0.37632559068058585, "grad_norm": 0.421875, "learning_rate": 2.1621859794043906e-05, "loss": 2.0976, "step": 11664 }, { "epoch": 0.3763578545343822, "grad_norm": 0.392578125, "learning_rate": 2.162044767405151e-05, "loss": 2.0515, "step": 11665 }, { "epoch": 0.37639011838817854, "grad_norm": 0.38671875, "learning_rate": 2.1619035481185977e-05, "loss": 2.0673, "step": 11666 }, { "epoch": 0.3764223822419749, "grad_norm": 0.40625, "learning_rate": 2.1617623215462845e-05, "loss": 2.0639, "step": 11667 }, { "epoch": 0.37645464609577123, "grad_norm": 0.404296875, "learning_rate": 2.1616210876897668e-05, "loss": 2.0971, "step": 11668 }, { "epoch": 0.37648690994956757, "grad_norm": 0.376953125, "learning_rate": 2.1614798465505987e-05, "loss": 2.0872, "step": 11669 }, { "epoch": 0.3765191738033639, "grad_norm": 0.384765625, "learning_rate": 2.161338598130335e-05, "loss": 2.082, "step": 11670 }, { "epoch": 0.37655143765716026, "grad_norm": 0.365234375, "learning_rate": 2.1611973424305304e-05, "loss": 2.0873, "step": 11671 }, { "epoch": 0.3765837015109566, "grad_norm": 0.365234375, "learning_rate": 2.16105607945274e-05, "loss": 2.101, "step": 11672 }, { "epoch": 0.37661596536475295, "grad_norm": 0.380859375, "learning_rate": 2.1609148091985184e-05, "loss": 2.0599, "step": 11673 }, { "epoch": 0.3766482292185493, "grad_norm": 0.388671875, "learning_rate": 2.1607735316694207e-05, "loss": 2.0695, "step": 11674 }, { "epoch": 0.37668049307234563, "grad_norm": 0.369140625, "learning_rate": 2.1606322468670018e-05, "loss": 2.0768, "step": 11675 }, { "epoch": 0.376712756926142, "grad_norm": 0.37890625, "learning_rate": 2.1604909547928173e-05, "loss": 2.0644, "step": 11676 }, { "epoch": 0.3767450207799383, "grad_norm": 0.380859375, "learning_rate": 2.1603496554484223e-05, "loss": 2.0858, "step": 11677 }, { "epoch": 0.37677728463373467, "grad_norm": 0.388671875, "learning_rate": 2.1602083488353716e-05, "loss": 2.0883, "step": 11678 }, { "epoch": 0.376809548487531, "grad_norm": 0.376953125, "learning_rate": 2.1600670349552214e-05, "loss": 2.0746, "step": 11679 }, { "epoch": 0.37684181234132735, "grad_norm": 0.38671875, "learning_rate": 2.159925713809527e-05, "loss": 2.0659, "step": 11680 }, { "epoch": 0.3768740761951237, "grad_norm": 0.384765625, "learning_rate": 2.159784385399843e-05, "loss": 2.0695, "step": 11681 }, { "epoch": 0.37690634004892004, "grad_norm": 0.384765625, "learning_rate": 2.1596430497277266e-05, "loss": 2.0367, "step": 11682 }, { "epoch": 0.37693860390271644, "grad_norm": 0.369140625, "learning_rate": 2.159501706794733e-05, "loss": 1.9939, "step": 11683 }, { "epoch": 0.3769708677565128, "grad_norm": 0.37109375, "learning_rate": 2.1593603566024167e-05, "loss": 2.0672, "step": 11684 }, { "epoch": 0.37700313161030913, "grad_norm": 0.375, "learning_rate": 2.159218999152335e-05, "loss": 2.0639, "step": 11685 }, { "epoch": 0.37703539546410547, "grad_norm": 0.37890625, "learning_rate": 2.159077634446044e-05, "loss": 2.0694, "step": 11686 }, { "epoch": 0.3770676593179018, "grad_norm": 0.376953125, "learning_rate": 2.1589362624850984e-05, "loss": 2.0766, "step": 11687 }, { "epoch": 0.37709992317169816, "grad_norm": 0.37109375, "learning_rate": 2.1587948832710557e-05, "loss": 2.0971, "step": 11688 }, { "epoch": 0.3771321870254945, "grad_norm": 0.37890625, "learning_rate": 2.1586534968054718e-05, "loss": 2.0763, "step": 11689 }, { "epoch": 0.37716445087929085, "grad_norm": 0.375, "learning_rate": 2.158512103089902e-05, "loss": 2.058, "step": 11690 }, { "epoch": 0.3771967147330872, "grad_norm": 0.39453125, "learning_rate": 2.1583707021259033e-05, "loss": 2.1217, "step": 11691 }, { "epoch": 0.37722897858688353, "grad_norm": 0.384765625, "learning_rate": 2.1582292939150325e-05, "loss": 2.0863, "step": 11692 }, { "epoch": 0.3772612424406799, "grad_norm": 0.39453125, "learning_rate": 2.1580878784588464e-05, "loss": 2.0941, "step": 11693 }, { "epoch": 0.3772935062944762, "grad_norm": 0.435546875, "learning_rate": 2.1579464557589006e-05, "loss": 2.0797, "step": 11694 }, { "epoch": 0.37732577014827257, "grad_norm": 0.396484375, "learning_rate": 2.157805025816752e-05, "loss": 2.0765, "step": 11695 }, { "epoch": 0.3773580340020689, "grad_norm": 0.40625, "learning_rate": 2.157663588633958e-05, "loss": 2.038, "step": 11696 }, { "epoch": 0.37739029785586525, "grad_norm": 0.435546875, "learning_rate": 2.157522144212075e-05, "loss": 2.0957, "step": 11697 }, { "epoch": 0.3774225617096616, "grad_norm": 0.404296875, "learning_rate": 2.1573806925526592e-05, "loss": 2.0721, "step": 11698 }, { "epoch": 0.37745482556345794, "grad_norm": 0.40625, "learning_rate": 2.1572392336572694e-05, "loss": 2.1001, "step": 11699 }, { "epoch": 0.3774870894172543, "grad_norm": 0.439453125, "learning_rate": 2.1570977675274613e-05, "loss": 2.0786, "step": 11700 }, { "epoch": 0.37751935327105063, "grad_norm": 0.447265625, "learning_rate": 2.156956294164792e-05, "loss": 2.0752, "step": 11701 }, { "epoch": 0.37755161712484697, "grad_norm": 0.40234375, "learning_rate": 2.156814813570819e-05, "loss": 2.0665, "step": 11702 }, { "epoch": 0.37758388097864337, "grad_norm": 0.44140625, "learning_rate": 2.1566733257471004e-05, "loss": 2.0294, "step": 11703 }, { "epoch": 0.3776161448324397, "grad_norm": 0.427734375, "learning_rate": 2.1565318306951927e-05, "loss": 2.0866, "step": 11704 }, { "epoch": 0.37764840868623606, "grad_norm": 0.384765625, "learning_rate": 2.1563903284166533e-05, "loss": 2.0583, "step": 11705 }, { "epoch": 0.3776806725400324, "grad_norm": 0.392578125, "learning_rate": 2.15624881891304e-05, "loss": 2.0586, "step": 11706 }, { "epoch": 0.37771293639382875, "grad_norm": 0.392578125, "learning_rate": 2.156107302185911e-05, "loss": 2.0734, "step": 11707 }, { "epoch": 0.3777452002476251, "grad_norm": 0.419921875, "learning_rate": 2.155965778236823e-05, "loss": 2.0897, "step": 11708 }, { "epoch": 0.37777746410142143, "grad_norm": 0.41015625, "learning_rate": 2.1558242470673344e-05, "loss": 2.0696, "step": 11709 }, { "epoch": 0.3778097279552178, "grad_norm": 0.41796875, "learning_rate": 2.1556827086790035e-05, "loss": 2.0581, "step": 11710 }, { "epoch": 0.3778419918090141, "grad_norm": 0.421875, "learning_rate": 2.1555411630733872e-05, "loss": 2.0737, "step": 11711 }, { "epoch": 0.37787425566281047, "grad_norm": 0.419921875, "learning_rate": 2.155399610252044e-05, "loss": 2.084, "step": 11712 }, { "epoch": 0.3779065195166068, "grad_norm": 0.44921875, "learning_rate": 2.1552580502165322e-05, "loss": 2.0908, "step": 11713 }, { "epoch": 0.37793878337040315, "grad_norm": 0.40234375, "learning_rate": 2.1551164829684103e-05, "loss": 2.0754, "step": 11714 }, { "epoch": 0.3779710472241995, "grad_norm": 0.41796875, "learning_rate": 2.1549749085092356e-05, "loss": 2.0708, "step": 11715 }, { "epoch": 0.37800331107799584, "grad_norm": 0.400390625, "learning_rate": 2.154833326840567e-05, "loss": 2.0561, "step": 11716 }, { "epoch": 0.3780355749317922, "grad_norm": 0.416015625, "learning_rate": 2.1546917379639633e-05, "loss": 2.0544, "step": 11717 }, { "epoch": 0.37806783878558853, "grad_norm": 0.392578125, "learning_rate": 2.1545501418809825e-05, "loss": 2.0901, "step": 11718 }, { "epoch": 0.37810010263938487, "grad_norm": 0.43359375, "learning_rate": 2.154408538593183e-05, "loss": 2.0368, "step": 11719 }, { "epoch": 0.3781323664931812, "grad_norm": 0.3984375, "learning_rate": 2.154266928102124e-05, "loss": 2.0528, "step": 11720 }, { "epoch": 0.37816463034697756, "grad_norm": 0.4140625, "learning_rate": 2.154125310409364e-05, "loss": 2.0383, "step": 11721 }, { "epoch": 0.3781968942007739, "grad_norm": 0.416015625, "learning_rate": 2.1539836855164618e-05, "loss": 2.0769, "step": 11722 }, { "epoch": 0.3782291580545703, "grad_norm": 0.3984375, "learning_rate": 2.1538420534249763e-05, "loss": 2.0394, "step": 11723 }, { "epoch": 0.37826142190836665, "grad_norm": 0.404296875, "learning_rate": 2.1537004141364667e-05, "loss": 2.0514, "step": 11724 }, { "epoch": 0.378293685762163, "grad_norm": 0.380859375, "learning_rate": 2.153558767652492e-05, "loss": 2.0885, "step": 11725 }, { "epoch": 0.37832594961595933, "grad_norm": 0.396484375, "learning_rate": 2.153417113974611e-05, "loss": 2.088, "step": 11726 }, { "epoch": 0.3783582134697557, "grad_norm": 0.408203125, "learning_rate": 2.1532754531043833e-05, "loss": 2.0693, "step": 11727 }, { "epoch": 0.378390477323552, "grad_norm": 0.369140625, "learning_rate": 2.1531337850433682e-05, "loss": 2.0831, "step": 11728 }, { "epoch": 0.37842274117734837, "grad_norm": 0.41796875, "learning_rate": 2.1529921097931245e-05, "loss": 2.0709, "step": 11729 }, { "epoch": 0.3784550050311447, "grad_norm": 0.400390625, "learning_rate": 2.1528504273552123e-05, "loss": 2.0204, "step": 11730 }, { "epoch": 0.37848726888494105, "grad_norm": 0.392578125, "learning_rate": 2.1527087377311914e-05, "loss": 2.0624, "step": 11731 }, { "epoch": 0.3785195327387374, "grad_norm": 0.4140625, "learning_rate": 2.1525670409226205e-05, "loss": 2.0716, "step": 11732 }, { "epoch": 0.37855179659253374, "grad_norm": 0.423828125, "learning_rate": 2.1524253369310598e-05, "loss": 2.0845, "step": 11733 }, { "epoch": 0.3785840604463301, "grad_norm": 0.40625, "learning_rate": 2.152283625758069e-05, "loss": 2.0783, "step": 11734 }, { "epoch": 0.37861632430012643, "grad_norm": 0.412109375, "learning_rate": 2.152141907405208e-05, "loss": 1.981, "step": 11735 }, { "epoch": 0.37864858815392277, "grad_norm": 0.376953125, "learning_rate": 2.152000181874037e-05, "loss": 1.9753, "step": 11736 }, { "epoch": 0.3786808520077191, "grad_norm": 0.38671875, "learning_rate": 2.1518584491661156e-05, "loss": 2.0028, "step": 11737 }, { "epoch": 0.37871311586151546, "grad_norm": 0.37109375, "learning_rate": 2.151716709283004e-05, "loss": 1.9902, "step": 11738 }, { "epoch": 0.3787453797153118, "grad_norm": 0.373046875, "learning_rate": 2.1515749622262626e-05, "loss": 1.9629, "step": 11739 }, { "epoch": 0.37877764356910815, "grad_norm": 0.376953125, "learning_rate": 2.1514332079974512e-05, "loss": 2.0178, "step": 11740 }, { "epoch": 0.3788099074229045, "grad_norm": 0.388671875, "learning_rate": 2.1512914465981302e-05, "loss": 2.0269, "step": 11741 }, { "epoch": 0.37884217127670083, "grad_norm": 0.3828125, "learning_rate": 2.1511496780298607e-05, "loss": 2.0794, "step": 11742 }, { "epoch": 0.37887443513049723, "grad_norm": 0.50390625, "learning_rate": 2.1510079022942018e-05, "loss": 2.1078, "step": 11743 }, { "epoch": 0.3789066989842936, "grad_norm": 0.60546875, "learning_rate": 2.150866119392716e-05, "loss": 2.0813, "step": 11744 }, { "epoch": 0.3789389628380899, "grad_norm": 0.44140625, "learning_rate": 2.1507243293269627e-05, "loss": 2.0944, "step": 11745 }, { "epoch": 0.37897122669188626, "grad_norm": 0.62890625, "learning_rate": 2.1505825320985024e-05, "loss": 2.0667, "step": 11746 }, { "epoch": 0.3790034905456826, "grad_norm": 0.515625, "learning_rate": 2.1504407277088966e-05, "loss": 2.0801, "step": 11747 }, { "epoch": 0.37903575439947895, "grad_norm": 0.57421875, "learning_rate": 2.1502989161597055e-05, "loss": 2.0803, "step": 11748 }, { "epoch": 0.3790680182532753, "grad_norm": 0.447265625, "learning_rate": 2.1501570974524906e-05, "loss": 2.0896, "step": 11749 }, { "epoch": 0.37910028210707164, "grad_norm": 0.51953125, "learning_rate": 2.150015271588813e-05, "loss": 2.0369, "step": 11750 }, { "epoch": 0.379132545960868, "grad_norm": 0.490234375, "learning_rate": 2.149873438570234e-05, "loss": 2.0911, "step": 11751 }, { "epoch": 0.3791648098146643, "grad_norm": 0.47265625, "learning_rate": 2.1497315983983138e-05, "loss": 2.087, "step": 11752 }, { "epoch": 0.37919707366846067, "grad_norm": 0.515625, "learning_rate": 2.149589751074614e-05, "loss": 2.0675, "step": 11753 }, { "epoch": 0.379229337522257, "grad_norm": 0.484375, "learning_rate": 2.1494478966006967e-05, "loss": 2.0832, "step": 11754 }, { "epoch": 0.37926160137605336, "grad_norm": 0.453125, "learning_rate": 2.149306034978123e-05, "loss": 2.0657, "step": 11755 }, { "epoch": 0.3792938652298497, "grad_norm": 0.44140625, "learning_rate": 2.149164166208454e-05, "loss": 2.0351, "step": 11756 }, { "epoch": 0.37932612908364605, "grad_norm": 0.423828125, "learning_rate": 2.1490222902932517e-05, "loss": 2.0715, "step": 11757 }, { "epoch": 0.3793583929374424, "grad_norm": 0.439453125, "learning_rate": 2.1488804072340775e-05, "loss": 2.0542, "step": 11758 }, { "epoch": 0.37939065679123873, "grad_norm": 0.4140625, "learning_rate": 2.1487385170324935e-05, "loss": 2.0804, "step": 11759 }, { "epoch": 0.3794229206450351, "grad_norm": 0.41015625, "learning_rate": 2.148596619690061e-05, "loss": 2.1008, "step": 11760 }, { "epoch": 0.3794551844988314, "grad_norm": 0.416015625, "learning_rate": 2.1484547152083423e-05, "loss": 2.0724, "step": 11761 }, { "epoch": 0.37948744835262777, "grad_norm": 0.42578125, "learning_rate": 2.1483128035888997e-05, "loss": 2.0678, "step": 11762 }, { "epoch": 0.3795197122064241, "grad_norm": 0.41015625, "learning_rate": 2.1481708848332943e-05, "loss": 2.0422, "step": 11763 }, { "epoch": 0.3795519760602205, "grad_norm": 0.400390625, "learning_rate": 2.1480289589430887e-05, "loss": 2.078, "step": 11764 }, { "epoch": 0.37958423991401685, "grad_norm": 0.384765625, "learning_rate": 2.1478870259198458e-05, "loss": 2.0756, "step": 11765 }, { "epoch": 0.3796165037678132, "grad_norm": 0.3984375, "learning_rate": 2.147745085765127e-05, "loss": 2.0727, "step": 11766 }, { "epoch": 0.37964876762160954, "grad_norm": 0.4296875, "learning_rate": 2.1476031384804947e-05, "loss": 2.0984, "step": 11767 }, { "epoch": 0.3796810314754059, "grad_norm": 0.4296875, "learning_rate": 2.1474611840675124e-05, "loss": 2.0745, "step": 11768 }, { "epoch": 0.3797132953292022, "grad_norm": 0.427734375, "learning_rate": 2.1473192225277412e-05, "loss": 2.073, "step": 11769 }, { "epoch": 0.37974555918299857, "grad_norm": 0.439453125, "learning_rate": 2.147177253862745e-05, "loss": 2.1018, "step": 11770 }, { "epoch": 0.3797778230367949, "grad_norm": 0.4375, "learning_rate": 2.1470352780740855e-05, "loss": 2.0958, "step": 11771 }, { "epoch": 0.37981008689059126, "grad_norm": 0.416015625, "learning_rate": 2.1468932951633254e-05, "loss": 2.0611, "step": 11772 }, { "epoch": 0.3798423507443876, "grad_norm": 0.423828125, "learning_rate": 2.146751305132029e-05, "loss": 2.0517, "step": 11773 }, { "epoch": 0.37987461459818395, "grad_norm": 0.4140625, "learning_rate": 2.146609307981757e-05, "loss": 2.0545, "step": 11774 }, { "epoch": 0.3799068784519803, "grad_norm": 0.44921875, "learning_rate": 2.1464673037140746e-05, "loss": 2.0547, "step": 11775 }, { "epoch": 0.37993914230577663, "grad_norm": 0.443359375, "learning_rate": 2.146325292330544e-05, "loss": 2.1001, "step": 11776 }, { "epoch": 0.379971406159573, "grad_norm": 0.51953125, "learning_rate": 2.1461832738327272e-05, "loss": 2.0807, "step": 11777 }, { "epoch": 0.3800036700133693, "grad_norm": 0.474609375, "learning_rate": 2.146041248222189e-05, "loss": 2.0773, "step": 11778 }, { "epoch": 0.38003593386716567, "grad_norm": 0.515625, "learning_rate": 2.145899215500493e-05, "loss": 2.0997, "step": 11779 }, { "epoch": 0.380068197720962, "grad_norm": 0.439453125, "learning_rate": 2.1457571756692008e-05, "loss": 2.0863, "step": 11780 }, { "epoch": 0.38010046157475835, "grad_norm": 0.474609375, "learning_rate": 2.1456151287298772e-05, "loss": 2.0555, "step": 11781 }, { "epoch": 0.3801327254285547, "grad_norm": 0.458984375, "learning_rate": 2.1454730746840854e-05, "loss": 2.0719, "step": 11782 }, { "epoch": 0.38016498928235104, "grad_norm": 0.435546875, "learning_rate": 2.145331013533389e-05, "loss": 2.0933, "step": 11783 }, { "epoch": 0.38019725313614744, "grad_norm": 0.470703125, "learning_rate": 2.1451889452793514e-05, "loss": 2.0765, "step": 11784 }, { "epoch": 0.3802295169899438, "grad_norm": 0.4296875, "learning_rate": 2.1450468699235378e-05, "loss": 2.0783, "step": 11785 }, { "epoch": 0.3802617808437401, "grad_norm": 0.435546875, "learning_rate": 2.14490478746751e-05, "loss": 2.1011, "step": 11786 }, { "epoch": 0.38029404469753647, "grad_norm": 0.41796875, "learning_rate": 2.1447626979128334e-05, "loss": 2.0885, "step": 11787 }, { "epoch": 0.3803263085513328, "grad_norm": 0.451171875, "learning_rate": 2.1446206012610712e-05, "loss": 2.0645, "step": 11788 }, { "epoch": 0.38035857240512916, "grad_norm": 0.396484375, "learning_rate": 2.1444784975137883e-05, "loss": 2.077, "step": 11789 }, { "epoch": 0.3803908362589255, "grad_norm": 0.4140625, "learning_rate": 2.144336386672549e-05, "loss": 2.0146, "step": 11790 }, { "epoch": 0.38042310011272185, "grad_norm": 0.404296875, "learning_rate": 2.1441942687389158e-05, "loss": 2.0772, "step": 11791 }, { "epoch": 0.3804553639665182, "grad_norm": 0.39453125, "learning_rate": 2.1440521437144546e-05, "loss": 2.0837, "step": 11792 }, { "epoch": 0.38048762782031453, "grad_norm": 0.423828125, "learning_rate": 2.1439100116007297e-05, "loss": 2.0588, "step": 11793 }, { "epoch": 0.3805198916741109, "grad_norm": 0.421875, "learning_rate": 2.1437678723993053e-05, "loss": 2.0497, "step": 11794 }, { "epoch": 0.3805521555279072, "grad_norm": 0.396484375, "learning_rate": 2.1436257261117458e-05, "loss": 2.084, "step": 11795 }, { "epoch": 0.38058441938170356, "grad_norm": 0.41796875, "learning_rate": 2.1434835727396166e-05, "loss": 2.0843, "step": 11796 }, { "epoch": 0.3806166832354999, "grad_norm": 0.37109375, "learning_rate": 2.143341412284481e-05, "loss": 2.086, "step": 11797 }, { "epoch": 0.38064894708929625, "grad_norm": 0.427734375, "learning_rate": 2.1431992447479048e-05, "loss": 2.062, "step": 11798 }, { "epoch": 0.3806812109430926, "grad_norm": 0.388671875, "learning_rate": 2.1430570701314534e-05, "loss": 2.0612, "step": 11799 }, { "epoch": 0.38071347479688894, "grad_norm": 0.39453125, "learning_rate": 2.1429148884366906e-05, "loss": 2.113, "step": 11800 }, { "epoch": 0.3807457386506853, "grad_norm": 0.45703125, "learning_rate": 2.1427726996651823e-05, "loss": 2.0957, "step": 11801 }, { "epoch": 0.3807780025044816, "grad_norm": 0.39453125, "learning_rate": 2.142630503818493e-05, "loss": 2.0908, "step": 11802 }, { "epoch": 0.38081026635827797, "grad_norm": 0.421875, "learning_rate": 2.142488300898188e-05, "loss": 2.0443, "step": 11803 }, { "epoch": 0.38084253021207437, "grad_norm": 0.38671875, "learning_rate": 2.1423460909058334e-05, "loss": 2.0891, "step": 11804 }, { "epoch": 0.3808747940658707, "grad_norm": 0.416015625, "learning_rate": 2.142203873842993e-05, "loss": 2.081, "step": 11805 }, { "epoch": 0.38090705791966706, "grad_norm": 0.412109375, "learning_rate": 2.1420616497112334e-05, "loss": 2.0645, "step": 11806 }, { "epoch": 0.3809393217734634, "grad_norm": 0.39453125, "learning_rate": 2.1419194185121195e-05, "loss": 2.0703, "step": 11807 }, { "epoch": 0.38097158562725975, "grad_norm": 0.39453125, "learning_rate": 2.141777180247217e-05, "loss": 2.0773, "step": 11808 }, { "epoch": 0.3810038494810561, "grad_norm": 0.376953125, "learning_rate": 2.1416349349180924e-05, "loss": 2.0712, "step": 11809 }, { "epoch": 0.38103611333485243, "grad_norm": 0.37890625, "learning_rate": 2.1414926825263108e-05, "loss": 2.078, "step": 11810 }, { "epoch": 0.3810683771886488, "grad_norm": 0.38671875, "learning_rate": 2.1413504230734372e-05, "loss": 2.0608, "step": 11811 }, { "epoch": 0.3811006410424451, "grad_norm": 0.38671875, "learning_rate": 2.1412081565610387e-05, "loss": 2.106, "step": 11812 }, { "epoch": 0.38113290489624146, "grad_norm": 0.40234375, "learning_rate": 2.1410658829906812e-05, "loss": 2.122, "step": 11813 }, { "epoch": 0.3811651687500378, "grad_norm": 0.435546875, "learning_rate": 2.1409236023639297e-05, "loss": 2.1812, "step": 11814 }, { "epoch": 0.38119743260383415, "grad_norm": 0.546875, "learning_rate": 2.1407813146823513e-05, "loss": 2.1775, "step": 11815 }, { "epoch": 0.3812296964576305, "grad_norm": 0.5, "learning_rate": 2.1406390199475115e-05, "loss": 2.1588, "step": 11816 }, { "epoch": 0.38126196031142684, "grad_norm": 0.51171875, "learning_rate": 2.1404967181609772e-05, "loss": 2.153, "step": 11817 }, { "epoch": 0.3812942241652232, "grad_norm": 0.4375, "learning_rate": 2.140354409324315e-05, "loss": 2.1916, "step": 11818 }, { "epoch": 0.3813264880190195, "grad_norm": 0.45703125, "learning_rate": 2.1402120934390902e-05, "loss": 2.1706, "step": 11819 }, { "epoch": 0.38135875187281587, "grad_norm": 0.427734375, "learning_rate": 2.1400697705068702e-05, "loss": 2.2066, "step": 11820 }, { "epoch": 0.3813910157266122, "grad_norm": 0.42578125, "learning_rate": 2.1399274405292213e-05, "loss": 2.1976, "step": 11821 }, { "epoch": 0.38142327958040856, "grad_norm": 0.435546875, "learning_rate": 2.13978510350771e-05, "loss": 2.1857, "step": 11822 }, { "epoch": 0.3814555434342049, "grad_norm": 0.41796875, "learning_rate": 2.1396427594439036e-05, "loss": 2.1369, "step": 11823 }, { "epoch": 0.3814878072880013, "grad_norm": 0.423828125, "learning_rate": 2.1395004083393685e-05, "loss": 2.2142, "step": 11824 }, { "epoch": 0.38152007114179765, "grad_norm": 0.396484375, "learning_rate": 2.1393580501956715e-05, "loss": 2.1934, "step": 11825 }, { "epoch": 0.381552334995594, "grad_norm": 0.40625, "learning_rate": 2.1392156850143802e-05, "loss": 2.1325, "step": 11826 }, { "epoch": 0.38158459884939033, "grad_norm": 0.44140625, "learning_rate": 2.139073312797061e-05, "loss": 2.1705, "step": 11827 }, { "epoch": 0.3816168627031867, "grad_norm": 0.408203125, "learning_rate": 2.138930933545281e-05, "loss": 2.1618, "step": 11828 }, { "epoch": 0.381649126556983, "grad_norm": 0.384765625, "learning_rate": 2.1387885472606076e-05, "loss": 2.1776, "step": 11829 }, { "epoch": 0.38168139041077936, "grad_norm": 0.4296875, "learning_rate": 2.1386461539446088e-05, "loss": 2.062, "step": 11830 }, { "epoch": 0.3817136542645757, "grad_norm": 0.421875, "learning_rate": 2.1385037535988507e-05, "loss": 2.0702, "step": 11831 }, { "epoch": 0.38174591811837205, "grad_norm": 0.365234375, "learning_rate": 2.1383613462249015e-05, "loss": 2.0797, "step": 11832 }, { "epoch": 0.3817781819721684, "grad_norm": 0.392578125, "learning_rate": 2.1382189318243285e-05, "loss": 2.1223, "step": 11833 }, { "epoch": 0.38181044582596474, "grad_norm": 0.400390625, "learning_rate": 2.1380765103986997e-05, "loss": 2.1051, "step": 11834 }, { "epoch": 0.3818427096797611, "grad_norm": 0.388671875, "learning_rate": 2.137934081949582e-05, "loss": 2.072, "step": 11835 }, { "epoch": 0.3818749735335574, "grad_norm": 0.44921875, "learning_rate": 2.1377916464785438e-05, "loss": 2.092, "step": 11836 }, { "epoch": 0.38190723738735377, "grad_norm": 0.4375, "learning_rate": 2.1376492039871524e-05, "loss": 2.0689, "step": 11837 }, { "epoch": 0.3819395012411501, "grad_norm": 0.404296875, "learning_rate": 2.1375067544769766e-05, "loss": 2.0595, "step": 11838 }, { "epoch": 0.38197176509494646, "grad_norm": 0.41015625, "learning_rate": 2.1373642979495832e-05, "loss": 2.0527, "step": 11839 }, { "epoch": 0.3820040289487428, "grad_norm": 0.380859375, "learning_rate": 2.137221834406541e-05, "loss": 2.0766, "step": 11840 }, { "epoch": 0.38203629280253915, "grad_norm": 0.423828125, "learning_rate": 2.137079363849418e-05, "loss": 2.1095, "step": 11841 }, { "epoch": 0.3820685566563355, "grad_norm": 0.39453125, "learning_rate": 2.1369368862797824e-05, "loss": 2.095, "step": 11842 }, { "epoch": 0.38210082051013183, "grad_norm": 0.451171875, "learning_rate": 2.1367944016992026e-05, "loss": 2.0788, "step": 11843 }, { "epoch": 0.3821330843639282, "grad_norm": 0.373046875, "learning_rate": 2.136651910109247e-05, "loss": 2.0683, "step": 11844 }, { "epoch": 0.3821653482177246, "grad_norm": 0.4453125, "learning_rate": 2.1365094115114837e-05, "loss": 2.1025, "step": 11845 }, { "epoch": 0.3821976120715209, "grad_norm": 0.400390625, "learning_rate": 2.1363669059074814e-05, "loss": 2.0971, "step": 11846 }, { "epoch": 0.38222987592531726, "grad_norm": 0.4140625, "learning_rate": 2.1362243932988087e-05, "loss": 2.0779, "step": 11847 }, { "epoch": 0.3822621397791136, "grad_norm": 0.388671875, "learning_rate": 2.1360818736870346e-05, "loss": 2.0678, "step": 11848 }, { "epoch": 0.38229440363290995, "grad_norm": 0.421875, "learning_rate": 2.1359393470737275e-05, "loss": 2.0545, "step": 11849 }, { "epoch": 0.3823266674867063, "grad_norm": 0.3984375, "learning_rate": 2.135796813460456e-05, "loss": 2.0647, "step": 11850 }, { "epoch": 0.38235893134050264, "grad_norm": 0.39453125, "learning_rate": 2.1356542728487894e-05, "loss": 2.0805, "step": 11851 }, { "epoch": 0.382391195194299, "grad_norm": 0.388671875, "learning_rate": 2.135511725240297e-05, "loss": 2.0352, "step": 11852 }, { "epoch": 0.3824234590480953, "grad_norm": 0.380859375, "learning_rate": 2.1353691706365465e-05, "loss": 2.0613, "step": 11853 }, { "epoch": 0.38245572290189167, "grad_norm": 0.37890625, "learning_rate": 2.135226609039109e-05, "loss": 2.0926, "step": 11854 }, { "epoch": 0.382487986755688, "grad_norm": 0.37890625, "learning_rate": 2.135084040449552e-05, "loss": 2.0442, "step": 11855 }, { "epoch": 0.38252025060948436, "grad_norm": 0.3828125, "learning_rate": 2.134941464869446e-05, "loss": 2.0809, "step": 11856 }, { "epoch": 0.3825525144632807, "grad_norm": 0.3828125, "learning_rate": 2.134798882300359e-05, "loss": 2.0855, "step": 11857 }, { "epoch": 0.38258477831707705, "grad_norm": 0.361328125, "learning_rate": 2.1346562927438623e-05, "loss": 2.0419, "step": 11858 }, { "epoch": 0.3826170421708734, "grad_norm": 0.392578125, "learning_rate": 2.1345136962015236e-05, "loss": 2.0827, "step": 11859 }, { "epoch": 0.38264930602466973, "grad_norm": 0.375, "learning_rate": 2.1343710926749137e-05, "loss": 2.0733, "step": 11860 }, { "epoch": 0.3826815698784661, "grad_norm": 0.388671875, "learning_rate": 2.1342284821656023e-05, "loss": 2.0486, "step": 11861 }, { "epoch": 0.3827138337322624, "grad_norm": 0.3828125, "learning_rate": 2.1340858646751583e-05, "loss": 2.0867, "step": 11862 }, { "epoch": 0.38274609758605876, "grad_norm": 0.384765625, "learning_rate": 2.1339432402051522e-05, "loss": 2.0914, "step": 11863 }, { "epoch": 0.3827783614398551, "grad_norm": 0.40625, "learning_rate": 2.1338006087571536e-05, "loss": 2.1092, "step": 11864 }, { "epoch": 0.3828106252936515, "grad_norm": 0.40625, "learning_rate": 2.1336579703327327e-05, "loss": 2.1099, "step": 11865 }, { "epoch": 0.38284288914744785, "grad_norm": 0.43359375, "learning_rate": 2.1335153249334594e-05, "loss": 2.054, "step": 11866 }, { "epoch": 0.3828751530012442, "grad_norm": 0.412109375, "learning_rate": 2.133372672560904e-05, "loss": 2.0962, "step": 11867 }, { "epoch": 0.38290741685504054, "grad_norm": 0.453125, "learning_rate": 2.133230013216636e-05, "loss": 2.081, "step": 11868 }, { "epoch": 0.3829396807088369, "grad_norm": 0.40234375, "learning_rate": 2.133087346902227e-05, "loss": 2.0748, "step": 11869 }, { "epoch": 0.3829719445626332, "grad_norm": 0.455078125, "learning_rate": 2.132944673619246e-05, "loss": 2.066, "step": 11870 }, { "epoch": 0.38300420841642957, "grad_norm": 0.388671875, "learning_rate": 2.1328019933692646e-05, "loss": 2.067, "step": 11871 }, { "epoch": 0.3830364722702259, "grad_norm": 0.435546875, "learning_rate": 2.1326593061538527e-05, "loss": 2.0894, "step": 11872 }, { "epoch": 0.38306873612402226, "grad_norm": 0.404296875, "learning_rate": 2.1325166119745814e-05, "loss": 2.0623, "step": 11873 }, { "epoch": 0.3831009999778186, "grad_norm": 0.412109375, "learning_rate": 2.1323739108330202e-05, "loss": 2.0761, "step": 11874 }, { "epoch": 0.38313326383161495, "grad_norm": 0.392578125, "learning_rate": 2.1322312027307414e-05, "loss": 2.0787, "step": 11875 }, { "epoch": 0.3831655276854113, "grad_norm": 0.396484375, "learning_rate": 2.1320884876693145e-05, "loss": 2.0744, "step": 11876 }, { "epoch": 0.38319779153920763, "grad_norm": 0.419921875, "learning_rate": 2.131945765650312e-05, "loss": 2.1086, "step": 11877 }, { "epoch": 0.383230055393004, "grad_norm": 0.40625, "learning_rate": 2.1318030366753025e-05, "loss": 2.0664, "step": 11878 }, { "epoch": 0.3832623192468003, "grad_norm": 0.419921875, "learning_rate": 2.131660300745859e-05, "loss": 2.0755, "step": 11879 }, { "epoch": 0.38329458310059666, "grad_norm": 0.41796875, "learning_rate": 2.1315175578635525e-05, "loss": 2.0751, "step": 11880 }, { "epoch": 0.383326846954393, "grad_norm": 0.41015625, "learning_rate": 2.131374808029953e-05, "loss": 2.06, "step": 11881 }, { "epoch": 0.38335911080818935, "grad_norm": 0.427734375, "learning_rate": 2.131232051246633e-05, "loss": 2.0675, "step": 11882 }, { "epoch": 0.3833913746619857, "grad_norm": 0.431640625, "learning_rate": 2.1310892875151637e-05, "loss": 2.0648, "step": 11883 }, { "epoch": 0.38342363851578204, "grad_norm": 0.41015625, "learning_rate": 2.1309465168371157e-05, "loss": 2.0837, "step": 11884 }, { "epoch": 0.38345590236957844, "grad_norm": 0.4296875, "learning_rate": 2.1308037392140613e-05, "loss": 2.0694, "step": 11885 }, { "epoch": 0.3834881662233748, "grad_norm": 0.384765625, "learning_rate": 2.1306609546475718e-05, "loss": 2.0688, "step": 11886 }, { "epoch": 0.3835204300771711, "grad_norm": 0.3984375, "learning_rate": 2.1305181631392195e-05, "loss": 2.0722, "step": 11887 }, { "epoch": 0.38355269393096747, "grad_norm": 0.435546875, "learning_rate": 2.130375364690575e-05, "loss": 2.079, "step": 11888 }, { "epoch": 0.3835849577847638, "grad_norm": 0.412109375, "learning_rate": 2.130232559303211e-05, "loss": 2.1122, "step": 11889 }, { "epoch": 0.38361722163856016, "grad_norm": 0.4296875, "learning_rate": 2.1300897469786986e-05, "loss": 2.0968, "step": 11890 }, { "epoch": 0.3836494854923565, "grad_norm": 0.380859375, "learning_rate": 2.129946927718611e-05, "loss": 2.1006, "step": 11891 }, { "epoch": 0.38368174934615284, "grad_norm": 0.408203125, "learning_rate": 2.129804101524519e-05, "loss": 2.0401, "step": 11892 }, { "epoch": 0.3837140131999492, "grad_norm": 0.431640625, "learning_rate": 2.1296612683979953e-05, "loss": 2.1041, "step": 11893 }, { "epoch": 0.38374627705374553, "grad_norm": 0.3984375, "learning_rate": 2.1295184283406127e-05, "loss": 2.0762, "step": 11894 }, { "epoch": 0.3837785409075419, "grad_norm": 0.443359375, "learning_rate": 2.129375581353942e-05, "loss": 2.0543, "step": 11895 }, { "epoch": 0.3838108047613382, "grad_norm": 0.439453125, "learning_rate": 2.1292327274395568e-05, "loss": 2.0469, "step": 11896 }, { "epoch": 0.38384306861513456, "grad_norm": 0.380859375, "learning_rate": 2.1290898665990294e-05, "loss": 2.0834, "step": 11897 }, { "epoch": 0.3838753324689309, "grad_norm": 0.40625, "learning_rate": 2.1289469988339315e-05, "loss": 2.0602, "step": 11898 }, { "epoch": 0.38390759632272725, "grad_norm": 0.380859375, "learning_rate": 2.1288041241458365e-05, "loss": 2.0722, "step": 11899 }, { "epoch": 0.3839398601765236, "grad_norm": 0.376953125, "learning_rate": 2.128661242536317e-05, "loss": 2.0763, "step": 11900 }, { "epoch": 0.38397212403031994, "grad_norm": 0.41796875, "learning_rate": 2.1285183540069455e-05, "loss": 2.0458, "step": 11901 }, { "epoch": 0.3840043878841163, "grad_norm": 0.3671875, "learning_rate": 2.1283754585592944e-05, "loss": 1.9929, "step": 11902 }, { "epoch": 0.3840366517379126, "grad_norm": 0.384765625, "learning_rate": 2.128232556194938e-05, "loss": 1.9395, "step": 11903 }, { "epoch": 0.38406891559170897, "grad_norm": 0.369140625, "learning_rate": 2.1280896469154474e-05, "loss": 1.9862, "step": 11904 }, { "epoch": 0.38410117944550537, "grad_norm": 0.3671875, "learning_rate": 2.1279467307223964e-05, "loss": 1.9831, "step": 11905 }, { "epoch": 0.3841334432993017, "grad_norm": 0.3671875, "learning_rate": 2.1278038076173594e-05, "loss": 1.9865, "step": 11906 }, { "epoch": 0.38416570715309806, "grad_norm": 0.40234375, "learning_rate": 2.1276608776019075e-05, "loss": 1.9536, "step": 11907 }, { "epoch": 0.3841979710068944, "grad_norm": 0.3515625, "learning_rate": 2.1275179406776153e-05, "loss": 1.9759, "step": 11908 }, { "epoch": 0.38423023486069074, "grad_norm": 0.388671875, "learning_rate": 2.1273749968460555e-05, "loss": 1.9831, "step": 11909 }, { "epoch": 0.3842624987144871, "grad_norm": 0.37890625, "learning_rate": 2.127232046108802e-05, "loss": 1.9668, "step": 11910 }, { "epoch": 0.38429476256828343, "grad_norm": 0.373046875, "learning_rate": 2.1270890884674288e-05, "loss": 1.9789, "step": 11911 }, { "epoch": 0.3843270264220798, "grad_norm": 0.365234375, "learning_rate": 2.1269461239235078e-05, "loss": 1.9994, "step": 11912 }, { "epoch": 0.3843592902758761, "grad_norm": 0.376953125, "learning_rate": 2.126803152478614e-05, "loss": 1.9896, "step": 11913 }, { "epoch": 0.38439155412967246, "grad_norm": 0.373046875, "learning_rate": 2.126660174134321e-05, "loss": 1.979, "step": 11914 }, { "epoch": 0.3844238179834688, "grad_norm": 0.37109375, "learning_rate": 2.1265171888922018e-05, "loss": 1.9839, "step": 11915 }, { "epoch": 0.38445608183726515, "grad_norm": 0.341796875, "learning_rate": 2.1263741967538312e-05, "loss": 1.948, "step": 11916 }, { "epoch": 0.3844883456910615, "grad_norm": 0.36328125, "learning_rate": 2.126231197720783e-05, "loss": 1.9703, "step": 11917 }, { "epoch": 0.38452060954485784, "grad_norm": 0.359375, "learning_rate": 2.1260881917946308e-05, "loss": 2.0008, "step": 11918 }, { "epoch": 0.3845528733986542, "grad_norm": 0.376953125, "learning_rate": 2.1259451789769486e-05, "loss": 1.9812, "step": 11919 }, { "epoch": 0.3845851372524505, "grad_norm": 0.3515625, "learning_rate": 2.1258021592693113e-05, "loss": 1.9552, "step": 11920 }, { "epoch": 0.38461740110624687, "grad_norm": 0.353515625, "learning_rate": 2.125659132673293e-05, "loss": 1.9903, "step": 11921 }, { "epoch": 0.3846496649600432, "grad_norm": 0.37890625, "learning_rate": 2.1255160991904675e-05, "loss": 1.9731, "step": 11922 }, { "epoch": 0.38468192881383956, "grad_norm": 0.34375, "learning_rate": 2.1253730588224095e-05, "loss": 2.0214, "step": 11923 }, { "epoch": 0.3847141926676359, "grad_norm": 0.373046875, "learning_rate": 2.1252300115706933e-05, "loss": 1.9656, "step": 11924 }, { "epoch": 0.3847464565214323, "grad_norm": 0.369140625, "learning_rate": 2.1250869574368944e-05, "loss": 1.9894, "step": 11925 }, { "epoch": 0.38477872037522864, "grad_norm": 0.3984375, "learning_rate": 2.1249438964225862e-05, "loss": 1.9476, "step": 11926 }, { "epoch": 0.384810984229025, "grad_norm": 0.35546875, "learning_rate": 2.1248008285293435e-05, "loss": 1.9918, "step": 11927 }, { "epoch": 0.38484324808282133, "grad_norm": 0.388671875, "learning_rate": 2.1246577537587425e-05, "loss": 1.9556, "step": 11928 }, { "epoch": 0.3848755119366177, "grad_norm": 0.369140625, "learning_rate": 2.1245146721123563e-05, "loss": 2.0205, "step": 11929 }, { "epoch": 0.384907775790414, "grad_norm": 0.369140625, "learning_rate": 2.1243715835917607e-05, "loss": 1.9822, "step": 11930 }, { "epoch": 0.38494003964421036, "grad_norm": 0.380859375, "learning_rate": 2.1242284881985312e-05, "loss": 1.9926, "step": 11931 }, { "epoch": 0.3849723034980067, "grad_norm": 0.376953125, "learning_rate": 2.124085385934242e-05, "loss": 1.9796, "step": 11932 }, { "epoch": 0.38500456735180305, "grad_norm": 0.369140625, "learning_rate": 2.1239422768004684e-05, "loss": 1.9745, "step": 11933 }, { "epoch": 0.3850368312055994, "grad_norm": 0.359375, "learning_rate": 2.123799160798786e-05, "loss": 1.9804, "step": 11934 }, { "epoch": 0.38506909505939574, "grad_norm": 0.359375, "learning_rate": 2.12365603793077e-05, "loss": 1.9938, "step": 11935 }, { "epoch": 0.3851013589131921, "grad_norm": 0.35546875, "learning_rate": 2.123512908197995e-05, "loss": 1.9876, "step": 11936 }, { "epoch": 0.3851336227669884, "grad_norm": 0.359375, "learning_rate": 2.1233697716020384e-05, "loss": 1.9554, "step": 11937 }, { "epoch": 0.38516588662078477, "grad_norm": 0.35546875, "learning_rate": 2.1232266281444742e-05, "loss": 1.9534, "step": 11938 }, { "epoch": 0.3851981504745811, "grad_norm": 0.375, "learning_rate": 2.123083477826878e-05, "loss": 1.9289, "step": 11939 }, { "epoch": 0.38523041432837746, "grad_norm": 0.3671875, "learning_rate": 2.122940320650826e-05, "loss": 1.9983, "step": 11940 }, { "epoch": 0.3852626781821738, "grad_norm": 0.361328125, "learning_rate": 2.122797156617894e-05, "loss": 1.9424, "step": 11941 }, { "epoch": 0.38529494203597014, "grad_norm": 0.37890625, "learning_rate": 2.122653985729658e-05, "loss": 1.9811, "step": 11942 }, { "epoch": 0.3853272058897665, "grad_norm": 0.38671875, "learning_rate": 2.1225108079876928e-05, "loss": 1.9916, "step": 11943 }, { "epoch": 0.38535946974356283, "grad_norm": 0.34375, "learning_rate": 2.1223676233935757e-05, "loss": 1.9346, "step": 11944 }, { "epoch": 0.3853917335973592, "grad_norm": 0.404296875, "learning_rate": 2.1222244319488822e-05, "loss": 1.9655, "step": 11945 }, { "epoch": 0.3854239974511556, "grad_norm": 0.36328125, "learning_rate": 2.1220812336551884e-05, "loss": 1.9737, "step": 11946 }, { "epoch": 0.3854562613049519, "grad_norm": 0.400390625, "learning_rate": 2.1219380285140707e-05, "loss": 1.9672, "step": 11947 }, { "epoch": 0.38548852515874826, "grad_norm": 0.40234375, "learning_rate": 2.121794816527106e-05, "loss": 1.9783, "step": 11948 }, { "epoch": 0.3855207890125446, "grad_norm": 0.421875, "learning_rate": 2.1216515976958694e-05, "loss": 1.9557, "step": 11949 }, { "epoch": 0.38555305286634095, "grad_norm": 0.376953125, "learning_rate": 2.121508372021938e-05, "loss": 1.9793, "step": 11950 }, { "epoch": 0.3855853167201373, "grad_norm": 0.396484375, "learning_rate": 2.1213651395068885e-05, "loss": 1.9943, "step": 11951 }, { "epoch": 0.38561758057393364, "grad_norm": 0.3671875, "learning_rate": 2.121221900152297e-05, "loss": 1.966, "step": 11952 }, { "epoch": 0.38564984442773, "grad_norm": 0.38671875, "learning_rate": 2.1210786539597408e-05, "loss": 2.0044, "step": 11953 }, { "epoch": 0.3856821082815263, "grad_norm": 0.380859375, "learning_rate": 2.1209354009307962e-05, "loss": 1.9468, "step": 11954 }, { "epoch": 0.38571437213532267, "grad_norm": 0.3984375, "learning_rate": 2.1207921410670404e-05, "loss": 1.9993, "step": 11955 }, { "epoch": 0.385746635989119, "grad_norm": 0.349609375, "learning_rate": 2.1206488743700496e-05, "loss": 1.9814, "step": 11956 }, { "epoch": 0.38577889984291536, "grad_norm": 0.373046875, "learning_rate": 2.1205056008414016e-05, "loss": 1.984, "step": 11957 }, { "epoch": 0.3858111636967117, "grad_norm": 0.3828125, "learning_rate": 2.1203623204826727e-05, "loss": 1.9537, "step": 11958 }, { "epoch": 0.38584342755050804, "grad_norm": 0.37109375, "learning_rate": 2.1202190332954412e-05, "loss": 1.9805, "step": 11959 }, { "epoch": 0.3858756914043044, "grad_norm": 0.423828125, "learning_rate": 2.1200757392812828e-05, "loss": 1.9933, "step": 11960 }, { "epoch": 0.38590795525810073, "grad_norm": 0.373046875, "learning_rate": 2.119932438441776e-05, "loss": 2.014, "step": 11961 }, { "epoch": 0.3859402191118971, "grad_norm": 0.373046875, "learning_rate": 2.119789130778497e-05, "loss": 1.9954, "step": 11962 }, { "epoch": 0.3859724829656934, "grad_norm": 0.359375, "learning_rate": 2.1196458162930245e-05, "loss": 1.9755, "step": 11963 }, { "epoch": 0.38600474681948976, "grad_norm": 0.39453125, "learning_rate": 2.119502494986935e-05, "loss": 1.9962, "step": 11964 }, { "epoch": 0.3860370106732861, "grad_norm": 0.361328125, "learning_rate": 2.1193591668618066e-05, "loss": 2.0007, "step": 11965 }, { "epoch": 0.3860692745270825, "grad_norm": 0.38671875, "learning_rate": 2.1192158319192174e-05, "loss": 2.0191, "step": 11966 }, { "epoch": 0.38610153838087885, "grad_norm": 0.349609375, "learning_rate": 2.119072490160744e-05, "loss": 2.005, "step": 11967 }, { "epoch": 0.3861338022346752, "grad_norm": 0.3671875, "learning_rate": 2.1189291415879644e-05, "loss": 1.9776, "step": 11968 }, { "epoch": 0.38616606608847154, "grad_norm": 0.345703125, "learning_rate": 2.1187857862024573e-05, "loss": 1.9667, "step": 11969 }, { "epoch": 0.3861983299422679, "grad_norm": 0.361328125, "learning_rate": 2.1186424240057997e-05, "loss": 1.9842, "step": 11970 }, { "epoch": 0.3862305937960642, "grad_norm": 0.341796875, "learning_rate": 2.1184990549995705e-05, "loss": 1.9826, "step": 11971 }, { "epoch": 0.38626285764986057, "grad_norm": 0.359375, "learning_rate": 2.1183556791853475e-05, "loss": 2.0033, "step": 11972 }, { "epoch": 0.3862951215036569, "grad_norm": 0.44140625, "learning_rate": 2.1182122965647086e-05, "loss": 2.1018, "step": 11973 }, { "epoch": 0.38632738535745326, "grad_norm": 0.396484375, "learning_rate": 2.1180689071392326e-05, "loss": 2.0708, "step": 11974 }, { "epoch": 0.3863596492112496, "grad_norm": 0.447265625, "learning_rate": 2.1179255109104968e-05, "loss": 2.1197, "step": 11975 }, { "epoch": 0.38639191306504594, "grad_norm": 0.40234375, "learning_rate": 2.117782107880081e-05, "loss": 2.0783, "step": 11976 }, { "epoch": 0.3864241769188423, "grad_norm": 0.3984375, "learning_rate": 2.1176386980495624e-05, "loss": 2.0838, "step": 11977 }, { "epoch": 0.38645644077263863, "grad_norm": 0.419921875, "learning_rate": 2.11749528142052e-05, "loss": 2.0853, "step": 11978 }, { "epoch": 0.386488704626435, "grad_norm": 0.4453125, "learning_rate": 2.117351857994533e-05, "loss": 2.0798, "step": 11979 }, { "epoch": 0.3865209684802313, "grad_norm": 0.396484375, "learning_rate": 2.11720842777318e-05, "loss": 2.0362, "step": 11980 }, { "epoch": 0.38655323233402766, "grad_norm": 0.423828125, "learning_rate": 2.1170649907580385e-05, "loss": 2.0686, "step": 11981 }, { "epoch": 0.386585496187824, "grad_norm": 0.419921875, "learning_rate": 2.1169215469506888e-05, "loss": 2.0761, "step": 11982 }, { "epoch": 0.38661776004162035, "grad_norm": 0.466796875, "learning_rate": 2.1167780963527096e-05, "loss": 2.103, "step": 11983 }, { "epoch": 0.3866500238954167, "grad_norm": 0.46484375, "learning_rate": 2.1166346389656786e-05, "loss": 2.0939, "step": 11984 }, { "epoch": 0.38668228774921304, "grad_norm": 0.43359375, "learning_rate": 2.1164911747911764e-05, "loss": 2.0915, "step": 11985 }, { "epoch": 0.38671455160300944, "grad_norm": 0.423828125, "learning_rate": 2.1163477038307816e-05, "loss": 2.0645, "step": 11986 }, { "epoch": 0.3867468154568058, "grad_norm": 0.443359375, "learning_rate": 2.1162042260860737e-05, "loss": 2.0831, "step": 11987 }, { "epoch": 0.3867790793106021, "grad_norm": 0.4296875, "learning_rate": 2.1160607415586312e-05, "loss": 2.0553, "step": 11988 }, { "epoch": 0.38681134316439847, "grad_norm": 0.4140625, "learning_rate": 2.1159172502500347e-05, "loss": 2.0621, "step": 11989 }, { "epoch": 0.3868436070181948, "grad_norm": 0.43359375, "learning_rate": 2.115773752161862e-05, "loss": 2.0677, "step": 11990 }, { "epoch": 0.38687587087199116, "grad_norm": 0.384765625, "learning_rate": 2.1156302472956945e-05, "loss": 2.0935, "step": 11991 }, { "epoch": 0.3869081347257875, "grad_norm": 0.421875, "learning_rate": 2.1154867356531105e-05, "loss": 2.0341, "step": 11992 }, { "epoch": 0.38694039857958384, "grad_norm": 0.4453125, "learning_rate": 2.1153432172356904e-05, "loss": 2.0747, "step": 11993 }, { "epoch": 0.3869726624333802, "grad_norm": 0.443359375, "learning_rate": 2.1151996920450134e-05, "loss": 2.0998, "step": 11994 }, { "epoch": 0.38700492628717653, "grad_norm": 0.41015625, "learning_rate": 2.115056160082659e-05, "loss": 2.0821, "step": 11995 }, { "epoch": 0.3870371901409729, "grad_norm": 0.42578125, "learning_rate": 2.1149126213502083e-05, "loss": 2.0855, "step": 11996 }, { "epoch": 0.3870694539947692, "grad_norm": 0.40234375, "learning_rate": 2.1147690758492404e-05, "loss": 2.1104, "step": 11997 }, { "epoch": 0.38710171784856556, "grad_norm": 0.408203125, "learning_rate": 2.1146255235813354e-05, "loss": 2.0369, "step": 11998 }, { "epoch": 0.3871339817023619, "grad_norm": 0.39453125, "learning_rate": 2.114481964548074e-05, "loss": 2.0866, "step": 11999 }, { "epoch": 0.38716624555615825, "grad_norm": 0.396484375, "learning_rate": 2.1143383987510355e-05, "loss": 2.0604, "step": 12000 }, { "epoch": 0.3871985094099546, "grad_norm": 0.37890625, "learning_rate": 2.114194826191801e-05, "loss": 2.0663, "step": 12001 }, { "epoch": 0.38723077326375094, "grad_norm": 0.375, "learning_rate": 2.11405124687195e-05, "loss": 2.0783, "step": 12002 }, { "epoch": 0.3872630371175473, "grad_norm": 0.375, "learning_rate": 2.1139076607930637e-05, "loss": 2.0469, "step": 12003 }, { "epoch": 0.3872953009713436, "grad_norm": 0.373046875, "learning_rate": 2.1137640679567224e-05, "loss": 2.0596, "step": 12004 }, { "epoch": 0.38732756482513997, "grad_norm": 0.376953125, "learning_rate": 2.113620468364506e-05, "loss": 2.0607, "step": 12005 }, { "epoch": 0.38735982867893637, "grad_norm": 0.35546875, "learning_rate": 2.1134768620179962e-05, "loss": 2.0677, "step": 12006 }, { "epoch": 0.3873920925327327, "grad_norm": 0.365234375, "learning_rate": 2.1133332489187735e-05, "loss": 2.0961, "step": 12007 }, { "epoch": 0.38742435638652906, "grad_norm": 0.36328125, "learning_rate": 2.113189629068418e-05, "loss": 2.1038, "step": 12008 }, { "epoch": 0.3874566202403254, "grad_norm": 0.359375, "learning_rate": 2.1130460024685102e-05, "loss": 2.0834, "step": 12009 }, { "epoch": 0.38748888409412174, "grad_norm": 0.384765625, "learning_rate": 2.1129023691206332e-05, "loss": 2.0899, "step": 12010 }, { "epoch": 0.3875211479479181, "grad_norm": 0.376953125, "learning_rate": 2.1127587290263658e-05, "loss": 2.0828, "step": 12011 }, { "epoch": 0.38755341180171443, "grad_norm": 0.3828125, "learning_rate": 2.11261508218729e-05, "loss": 2.085, "step": 12012 }, { "epoch": 0.3875856756555108, "grad_norm": 0.37109375, "learning_rate": 2.112471428604987e-05, "loss": 2.0508, "step": 12013 }, { "epoch": 0.3876179395093071, "grad_norm": 0.482421875, "learning_rate": 2.112327768281038e-05, "loss": 2.0626, "step": 12014 }, { "epoch": 0.38765020336310346, "grad_norm": 0.4296875, "learning_rate": 2.112184101217024e-05, "loss": 2.0784, "step": 12015 }, { "epoch": 0.3876824672168998, "grad_norm": 0.5390625, "learning_rate": 2.1120404274145266e-05, "loss": 2.1007, "step": 12016 }, { "epoch": 0.38771473107069615, "grad_norm": 0.455078125, "learning_rate": 2.1118967468751274e-05, "loss": 2.1037, "step": 12017 }, { "epoch": 0.3877469949244925, "grad_norm": 0.443359375, "learning_rate": 2.1117530596004075e-05, "loss": 2.1003, "step": 12018 }, { "epoch": 0.38777925877828884, "grad_norm": 0.40234375, "learning_rate": 2.1116093655919492e-05, "loss": 2.0788, "step": 12019 }, { "epoch": 0.3878115226320852, "grad_norm": 0.46484375, "learning_rate": 2.1114656648513337e-05, "loss": 2.0742, "step": 12020 }, { "epoch": 0.3878437864858815, "grad_norm": 0.451171875, "learning_rate": 2.1113219573801425e-05, "loss": 2.0959, "step": 12021 }, { "epoch": 0.38787605033967787, "grad_norm": 0.46484375, "learning_rate": 2.111178243179958e-05, "loss": 2.0436, "step": 12022 }, { "epoch": 0.3879083141934742, "grad_norm": 0.404296875, "learning_rate": 2.1110345222523616e-05, "loss": 2.0691, "step": 12023 }, { "epoch": 0.38794057804727056, "grad_norm": 0.4296875, "learning_rate": 2.110890794598936e-05, "loss": 2.0903, "step": 12024 }, { "epoch": 0.3879728419010669, "grad_norm": 0.412109375, "learning_rate": 2.1107470602212628e-05, "loss": 2.0803, "step": 12025 }, { "epoch": 0.38800510575486324, "grad_norm": 0.416015625, "learning_rate": 2.1106033191209232e-05, "loss": 2.0917, "step": 12026 }, { "epoch": 0.38803736960865964, "grad_norm": 0.423828125, "learning_rate": 2.1104595712995012e-05, "loss": 2.0686, "step": 12027 }, { "epoch": 0.388069633462456, "grad_norm": 0.3984375, "learning_rate": 2.110315816758578e-05, "loss": 2.0635, "step": 12028 }, { "epoch": 0.38810189731625233, "grad_norm": 0.388671875, "learning_rate": 2.1101720554997356e-05, "loss": 2.0982, "step": 12029 }, { "epoch": 0.3881341611700487, "grad_norm": 0.376953125, "learning_rate": 2.1100282875245574e-05, "loss": 2.0717, "step": 12030 }, { "epoch": 0.388166425023845, "grad_norm": 0.70703125, "learning_rate": 2.1098845128346256e-05, "loss": 2.0614, "step": 12031 }, { "epoch": 0.38819868887764136, "grad_norm": 0.39453125, "learning_rate": 2.1097407314315226e-05, "loss": 2.0381, "step": 12032 }, { "epoch": 0.3882309527314377, "grad_norm": 0.40625, "learning_rate": 2.1095969433168307e-05, "loss": 2.0743, "step": 12033 }, { "epoch": 0.38826321658523405, "grad_norm": 0.404296875, "learning_rate": 2.1094531484921332e-05, "loss": 2.0989, "step": 12034 }, { "epoch": 0.3882954804390304, "grad_norm": 0.3671875, "learning_rate": 2.1093093469590127e-05, "loss": 2.0828, "step": 12035 }, { "epoch": 0.38832774429282674, "grad_norm": 0.41015625, "learning_rate": 2.1091655387190517e-05, "loss": 2.1028, "step": 12036 }, { "epoch": 0.3883600081466231, "grad_norm": 0.40234375, "learning_rate": 2.109021723773834e-05, "loss": 2.0831, "step": 12037 }, { "epoch": 0.3883922720004194, "grad_norm": 0.416015625, "learning_rate": 2.1088779021249417e-05, "loss": 2.1027, "step": 12038 }, { "epoch": 0.38842453585421577, "grad_norm": 0.412109375, "learning_rate": 2.1087340737739588e-05, "loss": 2.0498, "step": 12039 }, { "epoch": 0.3884567997080121, "grad_norm": 0.3984375, "learning_rate": 2.1085902387224666e-05, "loss": 2.0716, "step": 12040 }, { "epoch": 0.38848906356180846, "grad_norm": 0.392578125, "learning_rate": 2.1084463969720506e-05, "loss": 2.0991, "step": 12041 }, { "epoch": 0.3885213274156048, "grad_norm": 0.451171875, "learning_rate": 2.1083025485242935e-05, "loss": 2.0657, "step": 12042 }, { "epoch": 0.38855359126940114, "grad_norm": 0.396484375, "learning_rate": 2.1081586933807776e-05, "loss": 2.0877, "step": 12043 }, { "epoch": 0.3885858551231975, "grad_norm": 0.42578125, "learning_rate": 2.1080148315430875e-05, "loss": 2.0935, "step": 12044 }, { "epoch": 0.38861811897699383, "grad_norm": 0.421875, "learning_rate": 2.1078709630128064e-05, "loss": 2.0507, "step": 12045 }, { "epoch": 0.3886503828307902, "grad_norm": 0.41015625, "learning_rate": 2.1077270877915176e-05, "loss": 2.0777, "step": 12046 }, { "epoch": 0.3886826466845866, "grad_norm": 0.4296875, "learning_rate": 2.107583205880805e-05, "loss": 2.0426, "step": 12047 }, { "epoch": 0.3887149105383829, "grad_norm": 0.37109375, "learning_rate": 2.1074393172822524e-05, "loss": 2.0803, "step": 12048 }, { "epoch": 0.38874717439217926, "grad_norm": 0.43359375, "learning_rate": 2.1072954219974433e-05, "loss": 2.071, "step": 12049 }, { "epoch": 0.3887794382459756, "grad_norm": 0.396484375, "learning_rate": 2.1071515200279623e-05, "loss": 2.0808, "step": 12050 }, { "epoch": 0.38881170209977195, "grad_norm": 0.419921875, "learning_rate": 2.107007611375393e-05, "loss": 2.0614, "step": 12051 }, { "epoch": 0.3888439659535683, "grad_norm": 0.55078125, "learning_rate": 2.106863696041319e-05, "loss": 2.0622, "step": 12052 }, { "epoch": 0.38887622980736464, "grad_norm": 0.384765625, "learning_rate": 2.106719774027325e-05, "loss": 2.0918, "step": 12053 }, { "epoch": 0.388908493661161, "grad_norm": 0.4296875, "learning_rate": 2.106575845334995e-05, "loss": 2.1103, "step": 12054 }, { "epoch": 0.3889407575149573, "grad_norm": 0.44921875, "learning_rate": 2.106431909965913e-05, "loss": 2.0611, "step": 12055 }, { "epoch": 0.38897302136875367, "grad_norm": 0.416015625, "learning_rate": 2.1062879679216642e-05, "loss": 2.0609, "step": 12056 }, { "epoch": 0.38900528522255, "grad_norm": 0.408203125, "learning_rate": 2.106144019203831e-05, "loss": 2.0773, "step": 12057 }, { "epoch": 0.38903754907634636, "grad_norm": 0.412109375, "learning_rate": 2.106000063814001e-05, "loss": 2.0599, "step": 12058 }, { "epoch": 0.3890698129301427, "grad_norm": 0.376953125, "learning_rate": 2.1058561017537565e-05, "loss": 2.0767, "step": 12059 }, { "epoch": 0.38910207678393904, "grad_norm": 0.390625, "learning_rate": 2.1057121330246822e-05, "loss": 2.0698, "step": 12060 }, { "epoch": 0.3891343406377354, "grad_norm": 0.37109375, "learning_rate": 2.1055681576283637e-05, "loss": 1.9768, "step": 12061 }, { "epoch": 0.38916660449153173, "grad_norm": 0.376953125, "learning_rate": 2.105424175566385e-05, "loss": 1.9699, "step": 12062 }, { "epoch": 0.3891988683453281, "grad_norm": 0.376953125, "learning_rate": 2.1052801868403316e-05, "loss": 2.0121, "step": 12063 }, { "epoch": 0.3892311321991244, "grad_norm": 0.380859375, "learning_rate": 2.1051361914517878e-05, "loss": 2.0114, "step": 12064 }, { "epoch": 0.38926339605292076, "grad_norm": 0.380859375, "learning_rate": 2.1049921894023395e-05, "loss": 2.0008, "step": 12065 }, { "epoch": 0.3892956599067171, "grad_norm": 0.376953125, "learning_rate": 2.104848180693571e-05, "loss": 2.0278, "step": 12066 }, { "epoch": 0.3893279237605135, "grad_norm": 0.400390625, "learning_rate": 2.104704165327067e-05, "loss": 1.9862, "step": 12067 }, { "epoch": 0.38936018761430985, "grad_norm": 0.376953125, "learning_rate": 2.104560143304414e-05, "loss": 2.014, "step": 12068 }, { "epoch": 0.3893924514681062, "grad_norm": 0.38671875, "learning_rate": 2.104416114627197e-05, "loss": 1.9733, "step": 12069 }, { "epoch": 0.38942471532190254, "grad_norm": 0.3828125, "learning_rate": 2.104272079297e-05, "loss": 2.008, "step": 12070 }, { "epoch": 0.3894569791756989, "grad_norm": 0.373046875, "learning_rate": 2.10412803731541e-05, "loss": 1.9998, "step": 12071 }, { "epoch": 0.3894892430294952, "grad_norm": 0.361328125, "learning_rate": 2.1039839886840123e-05, "loss": 1.9399, "step": 12072 }, { "epoch": 0.38952150688329157, "grad_norm": 0.357421875, "learning_rate": 2.1038399334043918e-05, "loss": 1.9738, "step": 12073 }, { "epoch": 0.3895537707370879, "grad_norm": 0.357421875, "learning_rate": 2.1036958714781343e-05, "loss": 1.9318, "step": 12074 }, { "epoch": 0.38958603459088426, "grad_norm": 0.361328125, "learning_rate": 2.1035518029068258e-05, "loss": 1.9921, "step": 12075 }, { "epoch": 0.3896182984446806, "grad_norm": 0.388671875, "learning_rate": 2.1034077276920525e-05, "loss": 2.0077, "step": 12076 }, { "epoch": 0.38965056229847694, "grad_norm": 0.359375, "learning_rate": 2.103263645835399e-05, "loss": 1.9995, "step": 12077 }, { "epoch": 0.3896828261522733, "grad_norm": 0.40625, "learning_rate": 2.1031195573384526e-05, "loss": 2.0091, "step": 12078 }, { "epoch": 0.38971509000606963, "grad_norm": 0.40234375, "learning_rate": 2.102975462202799e-05, "loss": 1.9869, "step": 12079 }, { "epoch": 0.389747353859866, "grad_norm": 0.3515625, "learning_rate": 2.102831360430024e-05, "loss": 1.9739, "step": 12080 }, { "epoch": 0.3897796177136623, "grad_norm": 0.40625, "learning_rate": 2.1026872520217133e-05, "loss": 1.9713, "step": 12081 }, { "epoch": 0.38981188156745866, "grad_norm": 0.34765625, "learning_rate": 2.1025431369794546e-05, "loss": 2.0049, "step": 12082 }, { "epoch": 0.389844145421255, "grad_norm": 0.404296875, "learning_rate": 2.1023990153048327e-05, "loss": 2.0249, "step": 12083 }, { "epoch": 0.38987640927505135, "grad_norm": 0.3671875, "learning_rate": 2.1022548869994346e-05, "loss": 1.9701, "step": 12084 }, { "epoch": 0.3899086731288477, "grad_norm": 0.384765625, "learning_rate": 2.102110752064847e-05, "loss": 1.9731, "step": 12085 }, { "epoch": 0.38994093698264404, "grad_norm": 0.37109375, "learning_rate": 2.1019666105026558e-05, "loss": 2.018, "step": 12086 }, { "epoch": 0.38997320083644044, "grad_norm": 0.42578125, "learning_rate": 2.101822462314448e-05, "loss": 1.9648, "step": 12087 }, { "epoch": 0.3900054646902368, "grad_norm": 0.380859375, "learning_rate": 2.1016783075018105e-05, "loss": 1.9986, "step": 12088 }, { "epoch": 0.3900377285440331, "grad_norm": 0.365234375, "learning_rate": 2.10153414606633e-05, "loss": 2.0076, "step": 12089 }, { "epoch": 0.39006999239782947, "grad_norm": 0.412109375, "learning_rate": 2.101389978009593e-05, "loss": 1.9742, "step": 12090 }, { "epoch": 0.3901022562516258, "grad_norm": 0.373046875, "learning_rate": 2.101245803333187e-05, "loss": 1.9684, "step": 12091 }, { "epoch": 0.39013452010542216, "grad_norm": 0.375, "learning_rate": 2.1011016220386978e-05, "loss": 2.0169, "step": 12092 }, { "epoch": 0.3901667839592185, "grad_norm": 0.35546875, "learning_rate": 2.1009574341277135e-05, "loss": 1.9274, "step": 12093 }, { "epoch": 0.39019904781301484, "grad_norm": 0.35546875, "learning_rate": 2.1008132396018207e-05, "loss": 1.9888, "step": 12094 }, { "epoch": 0.3902313116668112, "grad_norm": 0.34765625, "learning_rate": 2.100669038462607e-05, "loss": 1.9914, "step": 12095 }, { "epoch": 0.39026357552060753, "grad_norm": 0.34765625, "learning_rate": 2.1005248307116598e-05, "loss": 1.9874, "step": 12096 }, { "epoch": 0.3902958393744039, "grad_norm": 0.35546875, "learning_rate": 2.1003806163505653e-05, "loss": 2.0044, "step": 12097 }, { "epoch": 0.3903281032282002, "grad_norm": 0.35546875, "learning_rate": 2.100236395380912e-05, "loss": 1.971, "step": 12098 }, { "epoch": 0.39036036708199656, "grad_norm": 0.3515625, "learning_rate": 2.1000921678042873e-05, "loss": 1.9589, "step": 12099 }, { "epoch": 0.3903926309357929, "grad_norm": 0.353515625, "learning_rate": 2.0999479336222783e-05, "loss": 1.9731, "step": 12100 }, { "epoch": 0.39042489478958925, "grad_norm": 0.337890625, "learning_rate": 2.0998036928364732e-05, "loss": 1.9797, "step": 12101 }, { "epoch": 0.3904571586433856, "grad_norm": 0.359375, "learning_rate": 2.099659445448459e-05, "loss": 1.9828, "step": 12102 }, { "epoch": 0.39048942249718194, "grad_norm": 0.34765625, "learning_rate": 2.0995151914598236e-05, "loss": 1.9822, "step": 12103 }, { "epoch": 0.3905216863509783, "grad_norm": 0.359375, "learning_rate": 2.0993709308721554e-05, "loss": 1.971, "step": 12104 }, { "epoch": 0.3905539502047746, "grad_norm": 0.357421875, "learning_rate": 2.0992266636870418e-05, "loss": 1.9835, "step": 12105 }, { "epoch": 0.39058621405857097, "grad_norm": 0.345703125, "learning_rate": 2.0990823899060708e-05, "loss": 1.9955, "step": 12106 }, { "epoch": 0.39061847791236737, "grad_norm": 0.3671875, "learning_rate": 2.0989381095308314e-05, "loss": 2.0062, "step": 12107 }, { "epoch": 0.3906507417661637, "grad_norm": 0.34375, "learning_rate": 2.0987938225629103e-05, "loss": 1.9558, "step": 12108 }, { "epoch": 0.39068300561996006, "grad_norm": 0.357421875, "learning_rate": 2.098649529003896e-05, "loss": 2.0084, "step": 12109 }, { "epoch": 0.3907152694737564, "grad_norm": 0.365234375, "learning_rate": 2.0985052288553783e-05, "loss": 1.9593, "step": 12110 }, { "epoch": 0.39074753332755274, "grad_norm": 0.345703125, "learning_rate": 2.098360922118944e-05, "loss": 1.9585, "step": 12111 }, { "epoch": 0.3907797971813491, "grad_norm": 0.3828125, "learning_rate": 2.0982166087961814e-05, "loss": 1.9733, "step": 12112 }, { "epoch": 0.39081206103514543, "grad_norm": 0.373046875, "learning_rate": 2.0980722888886797e-05, "loss": 1.9502, "step": 12113 }, { "epoch": 0.3908443248889418, "grad_norm": 0.349609375, "learning_rate": 2.097927962398028e-05, "loss": 1.9492, "step": 12114 }, { "epoch": 0.3908765887427381, "grad_norm": 0.392578125, "learning_rate": 2.0977836293258137e-05, "loss": 1.97, "step": 12115 }, { "epoch": 0.39090885259653446, "grad_norm": 0.408203125, "learning_rate": 2.097639289673626e-05, "loss": 1.9466, "step": 12116 }, { "epoch": 0.3909411164503308, "grad_norm": 0.375, "learning_rate": 2.0974949434430536e-05, "loss": 1.9973, "step": 12117 }, { "epoch": 0.39097338030412715, "grad_norm": 0.36328125, "learning_rate": 2.097350590635686e-05, "loss": 2.0118, "step": 12118 }, { "epoch": 0.3910056441579235, "grad_norm": 0.365234375, "learning_rate": 2.097206231253111e-05, "loss": 1.9971, "step": 12119 }, { "epoch": 0.39103790801171984, "grad_norm": 0.388671875, "learning_rate": 2.0970618652969186e-05, "loss": 1.9757, "step": 12120 }, { "epoch": 0.3910701718655162, "grad_norm": 0.376953125, "learning_rate": 2.096917492768698e-05, "loss": 1.9998, "step": 12121 }, { "epoch": 0.3911024357193125, "grad_norm": 0.361328125, "learning_rate": 2.0967731136700368e-05, "loss": 1.9838, "step": 12122 }, { "epoch": 0.39113469957310887, "grad_norm": 0.3671875, "learning_rate": 2.0966287280025258e-05, "loss": 1.963, "step": 12123 }, { "epoch": 0.3911669634269052, "grad_norm": 0.353515625, "learning_rate": 2.0964843357677542e-05, "loss": 1.9804, "step": 12124 }, { "epoch": 0.39119922728070156, "grad_norm": 0.373046875, "learning_rate": 2.0963399369673104e-05, "loss": 1.9753, "step": 12125 }, { "epoch": 0.3912314911344979, "grad_norm": 0.38671875, "learning_rate": 2.0961955316027844e-05, "loss": 1.9701, "step": 12126 }, { "epoch": 0.39126375498829424, "grad_norm": 0.376953125, "learning_rate": 2.096051119675766e-05, "loss": 1.992, "step": 12127 }, { "epoch": 0.39129601884209064, "grad_norm": 0.34765625, "learning_rate": 2.0959067011878445e-05, "loss": 1.9775, "step": 12128 }, { "epoch": 0.391328282695887, "grad_norm": 0.380859375, "learning_rate": 2.0957622761406093e-05, "loss": 1.9941, "step": 12129 }, { "epoch": 0.39136054654968333, "grad_norm": 0.359375, "learning_rate": 2.0956178445356504e-05, "loss": 1.9814, "step": 12130 }, { "epoch": 0.3913928104034797, "grad_norm": 0.375, "learning_rate": 2.0954734063745576e-05, "loss": 1.9792, "step": 12131 }, { "epoch": 0.391425074257276, "grad_norm": 0.361328125, "learning_rate": 2.095328961658921e-05, "loss": 2.0034, "step": 12132 }, { "epoch": 0.39145733811107236, "grad_norm": 0.376953125, "learning_rate": 2.09518451039033e-05, "loss": 1.9735, "step": 12133 }, { "epoch": 0.3914896019648687, "grad_norm": 0.34375, "learning_rate": 2.0950400525703747e-05, "loss": 2.0065, "step": 12134 }, { "epoch": 0.39152186581866505, "grad_norm": 0.37109375, "learning_rate": 2.094895588200646e-05, "loss": 1.9923, "step": 12135 }, { "epoch": 0.3915541296724614, "grad_norm": 0.361328125, "learning_rate": 2.0947511172827328e-05, "loss": 2.0017, "step": 12136 }, { "epoch": 0.39158639352625774, "grad_norm": 0.388671875, "learning_rate": 2.094606639818226e-05, "loss": 1.9811, "step": 12137 }, { "epoch": 0.3916186573800541, "grad_norm": 0.349609375, "learning_rate": 2.0944621558087166e-05, "loss": 1.9556, "step": 12138 }, { "epoch": 0.3916509212338504, "grad_norm": 0.380859375, "learning_rate": 2.0943176652557934e-05, "loss": 1.9624, "step": 12139 }, { "epoch": 0.39168318508764677, "grad_norm": 0.375, "learning_rate": 2.094173168161048e-05, "loss": 1.986, "step": 12140 }, { "epoch": 0.3917154489414431, "grad_norm": 0.400390625, "learning_rate": 2.0940286645260706e-05, "loss": 1.9863, "step": 12141 }, { "epoch": 0.39174771279523946, "grad_norm": 0.37890625, "learning_rate": 2.0938841543524517e-05, "loss": 1.9868, "step": 12142 }, { "epoch": 0.3917799766490358, "grad_norm": 0.38671875, "learning_rate": 2.0937396376417823e-05, "loss": 1.9692, "step": 12143 }, { "epoch": 0.39181224050283214, "grad_norm": 0.376953125, "learning_rate": 2.0935951143956533e-05, "loss": 2.0012, "step": 12144 }, { "epoch": 0.3918445043566285, "grad_norm": 0.39453125, "learning_rate": 2.0934505846156544e-05, "loss": 1.9912, "step": 12145 }, { "epoch": 0.39187676821042483, "grad_norm": 0.365234375, "learning_rate": 2.0933060483033777e-05, "loss": 1.9705, "step": 12146 }, { "epoch": 0.3919090320642212, "grad_norm": 0.408203125, "learning_rate": 2.0931615054604134e-05, "loss": 1.9857, "step": 12147 }, { "epoch": 0.3919412959180176, "grad_norm": 0.37890625, "learning_rate": 2.0930169560883527e-05, "loss": 1.9857, "step": 12148 }, { "epoch": 0.3919735597718139, "grad_norm": 0.4140625, "learning_rate": 2.092872400188787e-05, "loss": 1.9909, "step": 12149 }, { "epoch": 0.39200582362561026, "grad_norm": 0.408203125, "learning_rate": 2.0927278377633073e-05, "loss": 2.0008, "step": 12150 }, { "epoch": 0.3920380874794066, "grad_norm": 0.47265625, "learning_rate": 2.0925832688135045e-05, "loss": 2.0738, "step": 12151 }, { "epoch": 0.39207035133320295, "grad_norm": 0.51953125, "learning_rate": 2.0924386933409702e-05, "loss": 2.0472, "step": 12152 }, { "epoch": 0.3921026151869993, "grad_norm": 0.498046875, "learning_rate": 2.092294111347296e-05, "loss": 2.0764, "step": 12153 }, { "epoch": 0.39213487904079564, "grad_norm": 0.50390625, "learning_rate": 2.092149522834073e-05, "loss": 2.0726, "step": 12154 }, { "epoch": 0.392167142894592, "grad_norm": 0.546875, "learning_rate": 2.0920049278028933e-05, "loss": 2.1071, "step": 12155 }, { "epoch": 0.3921994067483883, "grad_norm": 0.515625, "learning_rate": 2.0918603262553473e-05, "loss": 2.0851, "step": 12156 }, { "epoch": 0.39223167060218467, "grad_norm": 0.458984375, "learning_rate": 2.0917157181930276e-05, "loss": 2.0722, "step": 12157 }, { "epoch": 0.392263934455981, "grad_norm": 0.51953125, "learning_rate": 2.0915711036175263e-05, "loss": 2.0693, "step": 12158 }, { "epoch": 0.39229619830977736, "grad_norm": 0.404296875, "learning_rate": 2.0914264825304342e-05, "loss": 2.0914, "step": 12159 }, { "epoch": 0.3923284621635737, "grad_norm": 0.5078125, "learning_rate": 2.091281854933344e-05, "loss": 2.1082, "step": 12160 }, { "epoch": 0.39236072601737004, "grad_norm": 0.44140625, "learning_rate": 2.091137220827847e-05, "loss": 2.0563, "step": 12161 }, { "epoch": 0.3923929898711664, "grad_norm": 0.439453125, "learning_rate": 2.090992580215536e-05, "loss": 2.0629, "step": 12162 }, { "epoch": 0.39242525372496273, "grad_norm": 0.4296875, "learning_rate": 2.0908479330980022e-05, "loss": 2.0691, "step": 12163 }, { "epoch": 0.3924575175787591, "grad_norm": 0.416015625, "learning_rate": 2.0907032794768384e-05, "loss": 2.0735, "step": 12164 }, { "epoch": 0.3924897814325554, "grad_norm": 0.416015625, "learning_rate": 2.0905586193536366e-05, "loss": 2.0882, "step": 12165 }, { "epoch": 0.39252204528635176, "grad_norm": 0.396484375, "learning_rate": 2.090413952729989e-05, "loss": 2.0628, "step": 12166 }, { "epoch": 0.3925543091401481, "grad_norm": 0.404296875, "learning_rate": 2.0902692796074883e-05, "loss": 2.0801, "step": 12167 }, { "epoch": 0.3925865729939445, "grad_norm": 0.388671875, "learning_rate": 2.0901245999877273e-05, "loss": 2.0877, "step": 12168 }, { "epoch": 0.39261883684774085, "grad_norm": 0.4453125, "learning_rate": 2.0899799138722977e-05, "loss": 2.0503, "step": 12169 }, { "epoch": 0.3926511007015372, "grad_norm": 0.41015625, "learning_rate": 2.089835221262792e-05, "loss": 2.0914, "step": 12170 }, { "epoch": 0.39268336455533354, "grad_norm": 0.439453125, "learning_rate": 2.0896905221608037e-05, "loss": 2.0456, "step": 12171 }, { "epoch": 0.3927156284091299, "grad_norm": 0.416015625, "learning_rate": 2.0895458165679257e-05, "loss": 2.0747, "step": 12172 }, { "epoch": 0.3927478922629262, "grad_norm": 0.4296875, "learning_rate": 2.0894011044857496e-05, "loss": 2.0756, "step": 12173 }, { "epoch": 0.39278015611672257, "grad_norm": 0.484375, "learning_rate": 2.089256385915869e-05, "loss": 2.0659, "step": 12174 }, { "epoch": 0.3928124199705189, "grad_norm": 0.4453125, "learning_rate": 2.0891116608598774e-05, "loss": 2.0895, "step": 12175 }, { "epoch": 0.39284468382431526, "grad_norm": 0.484375, "learning_rate": 2.0889669293193668e-05, "loss": 2.095, "step": 12176 }, { "epoch": 0.3928769476781116, "grad_norm": 0.484375, "learning_rate": 2.088822191295931e-05, "loss": 2.1159, "step": 12177 }, { "epoch": 0.39290921153190794, "grad_norm": 0.44140625, "learning_rate": 2.0886774467911625e-05, "loss": 2.0949, "step": 12178 }, { "epoch": 0.3929414753857043, "grad_norm": 0.470703125, "learning_rate": 2.0885326958066552e-05, "loss": 2.0868, "step": 12179 }, { "epoch": 0.39297373923950063, "grad_norm": 0.4140625, "learning_rate": 2.0883879383440025e-05, "loss": 1.9828, "step": 12180 }, { "epoch": 0.393006003093297, "grad_norm": 0.416015625, "learning_rate": 2.088243174404797e-05, "loss": 1.9848, "step": 12181 }, { "epoch": 0.3930382669470933, "grad_norm": 0.404296875, "learning_rate": 2.0880984039906333e-05, "loss": 1.9797, "step": 12182 }, { "epoch": 0.39307053080088966, "grad_norm": 0.390625, "learning_rate": 2.087953627103104e-05, "loss": 1.9812, "step": 12183 }, { "epoch": 0.393102794654686, "grad_norm": 0.384765625, "learning_rate": 2.0878088437438026e-05, "loss": 2.0236, "step": 12184 }, { "epoch": 0.39313505850848235, "grad_norm": 0.3828125, "learning_rate": 2.0876640539143232e-05, "loss": 1.9727, "step": 12185 }, { "epoch": 0.3931673223622787, "grad_norm": 0.373046875, "learning_rate": 2.08751925761626e-05, "loss": 1.9581, "step": 12186 }, { "epoch": 0.39319958621607504, "grad_norm": 0.38671875, "learning_rate": 2.0873744548512062e-05, "loss": 1.9748, "step": 12187 }, { "epoch": 0.39323185006987144, "grad_norm": 0.353515625, "learning_rate": 2.087229645620755e-05, "loss": 1.9881, "step": 12188 }, { "epoch": 0.3932641139236678, "grad_norm": 0.396484375, "learning_rate": 2.087084829926502e-05, "loss": 2.0025, "step": 12189 }, { "epoch": 0.3932963777774641, "grad_norm": 0.357421875, "learning_rate": 2.08694000777004e-05, "loss": 1.9872, "step": 12190 }, { "epoch": 0.39332864163126047, "grad_norm": 0.38671875, "learning_rate": 2.0867951791529636e-05, "loss": 1.9797, "step": 12191 }, { "epoch": 0.3933609054850568, "grad_norm": 0.375, "learning_rate": 2.0866503440768667e-05, "loss": 1.9493, "step": 12192 }, { "epoch": 0.39339316933885315, "grad_norm": 0.3671875, "learning_rate": 2.0865055025433436e-05, "loss": 1.9378, "step": 12193 }, { "epoch": 0.3934254331926495, "grad_norm": 0.3984375, "learning_rate": 2.086360654553989e-05, "loss": 1.9918, "step": 12194 }, { "epoch": 0.39345769704644584, "grad_norm": 0.3671875, "learning_rate": 2.0862158001103965e-05, "loss": 2.0015, "step": 12195 }, { "epoch": 0.3934899609002422, "grad_norm": 0.365234375, "learning_rate": 2.0860709392141614e-05, "loss": 1.978, "step": 12196 }, { "epoch": 0.39352222475403853, "grad_norm": 0.353515625, "learning_rate": 2.085926071866878e-05, "loss": 1.9642, "step": 12197 }, { "epoch": 0.3935544886078349, "grad_norm": 0.359375, "learning_rate": 2.0857811980701403e-05, "loss": 1.9862, "step": 12198 }, { "epoch": 0.3935867524616312, "grad_norm": 0.345703125, "learning_rate": 2.0856363178255435e-05, "loss": 1.9849, "step": 12199 }, { "epoch": 0.39361901631542756, "grad_norm": 0.38671875, "learning_rate": 2.0854914311346822e-05, "loss": 1.9865, "step": 12200 }, { "epoch": 0.3936512801692239, "grad_norm": 0.35546875, "learning_rate": 2.085346537999152e-05, "loss": 1.9987, "step": 12201 }, { "epoch": 0.39368354402302025, "grad_norm": 0.373046875, "learning_rate": 2.0852016384205458e-05, "loss": 1.9886, "step": 12202 }, { "epoch": 0.3937158078768166, "grad_norm": 0.375, "learning_rate": 2.0850567324004607e-05, "loss": 2.0036, "step": 12203 }, { "epoch": 0.39374807173061294, "grad_norm": 0.34375, "learning_rate": 2.0849118199404908e-05, "loss": 1.9658, "step": 12204 }, { "epoch": 0.3937803355844093, "grad_norm": 0.373046875, "learning_rate": 2.0847669010422302e-05, "loss": 2.0188, "step": 12205 }, { "epoch": 0.3938125994382056, "grad_norm": 0.373046875, "learning_rate": 2.0846219757072762e-05, "loss": 2.0016, "step": 12206 }, { "epoch": 0.39384486329200197, "grad_norm": 0.3671875, "learning_rate": 2.0844770439372224e-05, "loss": 1.9927, "step": 12207 }, { "epoch": 0.3938771271457983, "grad_norm": 0.375, "learning_rate": 2.0843321057336648e-05, "loss": 1.97, "step": 12208 }, { "epoch": 0.3939093909995947, "grad_norm": 0.412109375, "learning_rate": 2.0841871610981984e-05, "loss": 1.9896, "step": 12209 }, { "epoch": 0.39394165485339105, "grad_norm": 0.365234375, "learning_rate": 2.0840422100324186e-05, "loss": 1.9852, "step": 12210 }, { "epoch": 0.3939739187071874, "grad_norm": 0.3515625, "learning_rate": 2.0838972525379218e-05, "loss": 1.978, "step": 12211 }, { "epoch": 0.39400618256098374, "grad_norm": 0.392578125, "learning_rate": 2.0837522886163025e-05, "loss": 2.0029, "step": 12212 }, { "epoch": 0.3940384464147801, "grad_norm": 0.38671875, "learning_rate": 2.0836073182691564e-05, "loss": 1.986, "step": 12213 }, { "epoch": 0.39407071026857643, "grad_norm": 0.3515625, "learning_rate": 2.0834623414980806e-05, "loss": 2.005, "step": 12214 }, { "epoch": 0.3941029741223728, "grad_norm": 0.3828125, "learning_rate": 2.083317358304669e-05, "loss": 2.0044, "step": 12215 }, { "epoch": 0.3941352379761691, "grad_norm": 0.37890625, "learning_rate": 2.0831723686905186e-05, "loss": 2.0223, "step": 12216 }, { "epoch": 0.39416750182996546, "grad_norm": 0.35546875, "learning_rate": 2.0830273726572257e-05, "loss": 1.9783, "step": 12217 }, { "epoch": 0.3941997656837618, "grad_norm": 0.3671875, "learning_rate": 2.0828823702063854e-05, "loss": 1.9979, "step": 12218 }, { "epoch": 0.39423202953755815, "grad_norm": 0.359375, "learning_rate": 2.082737361339594e-05, "loss": 1.9868, "step": 12219 }, { "epoch": 0.3942642933913545, "grad_norm": 0.359375, "learning_rate": 2.0825923460584478e-05, "loss": 2.0288, "step": 12220 }, { "epoch": 0.39429655724515084, "grad_norm": 0.375, "learning_rate": 2.0824473243645432e-05, "loss": 1.9944, "step": 12221 }, { "epoch": 0.3943288210989472, "grad_norm": 0.39453125, "learning_rate": 2.082302296259476e-05, "loss": 1.9871, "step": 12222 }, { "epoch": 0.3943610849527435, "grad_norm": 0.384765625, "learning_rate": 2.082157261744843e-05, "loss": 1.9957, "step": 12223 }, { "epoch": 0.39439334880653987, "grad_norm": 0.37890625, "learning_rate": 2.0820122208222407e-05, "loss": 1.9787, "step": 12224 }, { "epoch": 0.3944256126603362, "grad_norm": 0.37109375, "learning_rate": 2.0818671734932655e-05, "loss": 1.999, "step": 12225 }, { "epoch": 0.39445787651413255, "grad_norm": 0.3671875, "learning_rate": 2.0817221197595133e-05, "loss": 2.0073, "step": 12226 }, { "epoch": 0.3944901403679289, "grad_norm": 0.3671875, "learning_rate": 2.0815770596225817e-05, "loss": 2.0031, "step": 12227 }, { "epoch": 0.39452240422172524, "grad_norm": 0.3828125, "learning_rate": 2.081431993084067e-05, "loss": 1.9955, "step": 12228 }, { "epoch": 0.39455466807552164, "grad_norm": 0.349609375, "learning_rate": 2.0812869201455657e-05, "loss": 1.9675, "step": 12229 }, { "epoch": 0.394586931929318, "grad_norm": 0.38671875, "learning_rate": 2.0811418408086753e-05, "loss": 1.977, "step": 12230 }, { "epoch": 0.39461919578311433, "grad_norm": 0.37109375, "learning_rate": 2.0809967550749925e-05, "loss": 1.9879, "step": 12231 }, { "epoch": 0.3946514596369107, "grad_norm": 0.34375, "learning_rate": 2.0808516629461147e-05, "loss": 2.0135, "step": 12232 }, { "epoch": 0.394683723490707, "grad_norm": 0.384765625, "learning_rate": 2.0807065644236374e-05, "loss": 2.0052, "step": 12233 }, { "epoch": 0.39471598734450336, "grad_norm": 0.349609375, "learning_rate": 2.08056145950916e-05, "loss": 1.9987, "step": 12234 }, { "epoch": 0.3947482511982997, "grad_norm": 0.48828125, "learning_rate": 2.080416348204278e-05, "loss": 1.9862, "step": 12235 }, { "epoch": 0.39478051505209605, "grad_norm": 0.3828125, "learning_rate": 2.080271230510589e-05, "loss": 1.9705, "step": 12236 }, { "epoch": 0.3948127789058924, "grad_norm": 0.400390625, "learning_rate": 2.080126106429691e-05, "loss": 1.995, "step": 12237 }, { "epoch": 0.39484504275968874, "grad_norm": 0.3984375, "learning_rate": 2.0799809759631803e-05, "loss": 1.9857, "step": 12238 }, { "epoch": 0.3948773066134851, "grad_norm": 0.40234375, "learning_rate": 2.0798358391126556e-05, "loss": 2.0172, "step": 12239 }, { "epoch": 0.3949095704672814, "grad_norm": 0.38671875, "learning_rate": 2.079690695879714e-05, "loss": 1.9629, "step": 12240 }, { "epoch": 0.39494183432107777, "grad_norm": 0.376953125, "learning_rate": 2.079545546265953e-05, "loss": 2.0103, "step": 12241 }, { "epoch": 0.3949740981748741, "grad_norm": 0.3671875, "learning_rate": 2.0794003902729703e-05, "loss": 2.011, "step": 12242 }, { "epoch": 0.39500636202867045, "grad_norm": 0.375, "learning_rate": 2.0792552279023637e-05, "loss": 2.0199, "step": 12243 }, { "epoch": 0.3950386258824668, "grad_norm": 0.357421875, "learning_rate": 2.0791100591557312e-05, "loss": 1.9899, "step": 12244 }, { "epoch": 0.39507088973626314, "grad_norm": 0.376953125, "learning_rate": 2.0789648840346713e-05, "loss": 2.0015, "step": 12245 }, { "epoch": 0.3951031535900595, "grad_norm": 0.37890625, "learning_rate": 2.0788197025407804e-05, "loss": 2.0072, "step": 12246 }, { "epoch": 0.39513541744385583, "grad_norm": 0.345703125, "learning_rate": 2.0786745146756578e-05, "loss": 1.9568, "step": 12247 }, { "epoch": 0.3951676812976522, "grad_norm": 0.36328125, "learning_rate": 2.0785293204409016e-05, "loss": 2.0066, "step": 12248 }, { "epoch": 0.3951999451514486, "grad_norm": 0.3671875, "learning_rate": 2.078384119838109e-05, "loss": 2.0123, "step": 12249 }, { "epoch": 0.3952322090052449, "grad_norm": 0.36328125, "learning_rate": 2.078238912868879e-05, "loss": 1.945, "step": 12250 }, { "epoch": 0.39526447285904126, "grad_norm": 0.46484375, "learning_rate": 2.0780936995348106e-05, "loss": 1.9929, "step": 12251 }, { "epoch": 0.3952967367128376, "grad_norm": 0.376953125, "learning_rate": 2.0779484798375007e-05, "loss": 2.002, "step": 12252 }, { "epoch": 0.39532900056663395, "grad_norm": 0.376953125, "learning_rate": 2.0778032537785488e-05, "loss": 1.9854, "step": 12253 }, { "epoch": 0.3953612644204303, "grad_norm": 0.357421875, "learning_rate": 2.077658021359553e-05, "loss": 1.9939, "step": 12254 }, { "epoch": 0.39539352827422664, "grad_norm": 0.38671875, "learning_rate": 2.0775127825821126e-05, "loss": 1.9832, "step": 12255 }, { "epoch": 0.395425792128023, "grad_norm": 0.353515625, "learning_rate": 2.0773675374478256e-05, "loss": 1.9648, "step": 12256 }, { "epoch": 0.3954580559818193, "grad_norm": 0.373046875, "learning_rate": 2.077222285958291e-05, "loss": 2.0013, "step": 12257 }, { "epoch": 0.39549031983561567, "grad_norm": 0.35546875, "learning_rate": 2.0770770281151075e-05, "loss": 1.9701, "step": 12258 }, { "epoch": 0.395522583689412, "grad_norm": 0.376953125, "learning_rate": 2.076931763919874e-05, "loss": 2.0003, "step": 12259 }, { "epoch": 0.39555484754320835, "grad_norm": 0.353515625, "learning_rate": 2.0767864933741894e-05, "loss": 1.9756, "step": 12260 }, { "epoch": 0.3955871113970047, "grad_norm": 0.392578125, "learning_rate": 2.0766412164796532e-05, "loss": 1.9608, "step": 12261 }, { "epoch": 0.39561937525080104, "grad_norm": 0.361328125, "learning_rate": 2.0764959332378642e-05, "loss": 1.9939, "step": 12262 }, { "epoch": 0.3956516391045974, "grad_norm": 0.375, "learning_rate": 2.0763506436504215e-05, "loss": 1.9941, "step": 12263 }, { "epoch": 0.39568390295839373, "grad_norm": 0.458984375, "learning_rate": 2.076205347718924e-05, "loss": 1.976, "step": 12264 }, { "epoch": 0.3957161668121901, "grad_norm": 0.35546875, "learning_rate": 2.0760600454449716e-05, "loss": 1.9633, "step": 12265 }, { "epoch": 0.3957484306659864, "grad_norm": 0.390625, "learning_rate": 2.0759147368301636e-05, "loss": 1.9796, "step": 12266 }, { "epoch": 0.39578069451978276, "grad_norm": 0.375, "learning_rate": 2.0757694218760993e-05, "loss": 1.9742, "step": 12267 }, { "epoch": 0.3958129583735791, "grad_norm": 0.37890625, "learning_rate": 2.075624100584378e-05, "loss": 1.9887, "step": 12268 }, { "epoch": 0.3958452222273755, "grad_norm": 0.384765625, "learning_rate": 2.0754787729566002e-05, "loss": 2.0139, "step": 12269 }, { "epoch": 0.39587748608117185, "grad_norm": 0.37109375, "learning_rate": 2.075333438994364e-05, "loss": 1.9333, "step": 12270 }, { "epoch": 0.3959097499349682, "grad_norm": 0.36328125, "learning_rate": 2.0751880986992706e-05, "loss": 1.9941, "step": 12271 }, { "epoch": 0.39594201378876454, "grad_norm": 0.373046875, "learning_rate": 2.0750427520729192e-05, "loss": 1.9934, "step": 12272 }, { "epoch": 0.3959742776425609, "grad_norm": 0.373046875, "learning_rate": 2.07489739911691e-05, "loss": 1.9851, "step": 12273 }, { "epoch": 0.3960065414963572, "grad_norm": 0.392578125, "learning_rate": 2.0747520398328423e-05, "loss": 1.9651, "step": 12274 }, { "epoch": 0.39603880535015357, "grad_norm": 0.36328125, "learning_rate": 2.0746066742223162e-05, "loss": 2.0, "step": 12275 }, { "epoch": 0.3960710692039499, "grad_norm": 0.380859375, "learning_rate": 2.0744613022869327e-05, "loss": 2.0005, "step": 12276 }, { "epoch": 0.39610333305774625, "grad_norm": 0.3515625, "learning_rate": 2.074315924028291e-05, "loss": 1.9785, "step": 12277 }, { "epoch": 0.3961355969115426, "grad_norm": 0.37109375, "learning_rate": 2.0741705394479915e-05, "loss": 2.021, "step": 12278 }, { "epoch": 0.39616786076533894, "grad_norm": 0.3515625, "learning_rate": 2.074025148547635e-05, "loss": 1.971, "step": 12279 }, { "epoch": 0.3962001246191353, "grad_norm": 0.361328125, "learning_rate": 2.073879751328821e-05, "loss": 1.9785, "step": 12280 }, { "epoch": 0.39623238847293163, "grad_norm": 0.34765625, "learning_rate": 2.0737343477931506e-05, "loss": 1.9553, "step": 12281 }, { "epoch": 0.396264652326728, "grad_norm": 0.375, "learning_rate": 2.073588937942224e-05, "loss": 1.9989, "step": 12282 }, { "epoch": 0.3962969161805243, "grad_norm": 0.376953125, "learning_rate": 2.073443521777642e-05, "loss": 1.9526, "step": 12283 }, { "epoch": 0.39632918003432066, "grad_norm": 0.3515625, "learning_rate": 2.073298099301005e-05, "loss": 1.9887, "step": 12284 }, { "epoch": 0.396361443888117, "grad_norm": 0.39453125, "learning_rate": 2.0731526705139137e-05, "loss": 1.98, "step": 12285 }, { "epoch": 0.39639370774191335, "grad_norm": 0.3671875, "learning_rate": 2.0730072354179695e-05, "loss": 1.991, "step": 12286 }, { "epoch": 0.3964259715957097, "grad_norm": 0.373046875, "learning_rate": 2.072861794014772e-05, "loss": 1.9706, "step": 12287 }, { "epoch": 0.39645823544950604, "grad_norm": 0.375, "learning_rate": 2.072716346305923e-05, "loss": 1.9765, "step": 12288 }, { "epoch": 0.3964904993033024, "grad_norm": 0.365234375, "learning_rate": 2.0725708922930236e-05, "loss": 1.994, "step": 12289 }, { "epoch": 0.3965227631570988, "grad_norm": 0.357421875, "learning_rate": 2.072425431977675e-05, "loss": 1.9903, "step": 12290 }, { "epoch": 0.3965550270108951, "grad_norm": 0.376953125, "learning_rate": 2.072279965361477e-05, "loss": 1.9571, "step": 12291 }, { "epoch": 0.39658729086469147, "grad_norm": 0.369140625, "learning_rate": 2.0721344924460322e-05, "loss": 1.9798, "step": 12292 }, { "epoch": 0.3966195547184878, "grad_norm": 0.345703125, "learning_rate": 2.0719890132329413e-05, "loss": 1.9893, "step": 12293 }, { "epoch": 0.39665181857228415, "grad_norm": 0.369140625, "learning_rate": 2.0718435277238058e-05, "loss": 1.9973, "step": 12294 }, { "epoch": 0.3966840824260805, "grad_norm": 0.349609375, "learning_rate": 2.0716980359202262e-05, "loss": 1.9948, "step": 12295 }, { "epoch": 0.39671634627987684, "grad_norm": 0.3671875, "learning_rate": 2.071552537823806e-05, "loss": 1.9754, "step": 12296 }, { "epoch": 0.3967486101336732, "grad_norm": 0.361328125, "learning_rate": 2.0714070334361448e-05, "loss": 1.9898, "step": 12297 }, { "epoch": 0.39678087398746953, "grad_norm": 0.353515625, "learning_rate": 2.071261522758845e-05, "loss": 1.9976, "step": 12298 }, { "epoch": 0.3968131378412659, "grad_norm": 0.361328125, "learning_rate": 2.071116005793508e-05, "loss": 1.9994, "step": 12299 }, { "epoch": 0.3968454016950622, "grad_norm": 0.37109375, "learning_rate": 2.070970482541736e-05, "loss": 1.9477, "step": 12300 }, { "epoch": 0.39687766554885856, "grad_norm": 0.345703125, "learning_rate": 2.0708249530051304e-05, "loss": 1.9603, "step": 12301 }, { "epoch": 0.3969099294026549, "grad_norm": 0.396484375, "learning_rate": 2.0706794171852932e-05, "loss": 1.9796, "step": 12302 }, { "epoch": 0.39694219325645125, "grad_norm": 0.353515625, "learning_rate": 2.0705338750838265e-05, "loss": 2.002, "step": 12303 }, { "epoch": 0.3969744571102476, "grad_norm": 0.39453125, "learning_rate": 2.0703883267023322e-05, "loss": 1.9703, "step": 12304 }, { "epoch": 0.39700672096404394, "grad_norm": 0.390625, "learning_rate": 2.070242772042412e-05, "loss": 2.0097, "step": 12305 }, { "epoch": 0.3970389848178403, "grad_norm": 0.376953125, "learning_rate": 2.0700972111056686e-05, "loss": 1.9897, "step": 12306 }, { "epoch": 0.3970712486716366, "grad_norm": 0.4296875, "learning_rate": 2.0699516438937045e-05, "loss": 1.982, "step": 12307 }, { "epoch": 0.39710351252543297, "grad_norm": 0.392578125, "learning_rate": 2.0698060704081214e-05, "loss": 2.0071, "step": 12308 }, { "epoch": 0.3971357763792293, "grad_norm": 0.41015625, "learning_rate": 2.0696604906505216e-05, "loss": 1.9859, "step": 12309 }, { "epoch": 0.3971680402330257, "grad_norm": 1.09375, "learning_rate": 2.069514904622508e-05, "loss": 2.1976, "step": 12310 }, { "epoch": 0.39720030408682205, "grad_norm": 0.90234375, "learning_rate": 2.0693693123256832e-05, "loss": 2.1119, "step": 12311 }, { "epoch": 0.3972325679406184, "grad_norm": 0.80859375, "learning_rate": 2.069223713761649e-05, "loss": 2.1171, "step": 12312 }, { "epoch": 0.39726483179441474, "grad_norm": 0.7109375, "learning_rate": 2.0690781089320085e-05, "loss": 2.1457, "step": 12313 }, { "epoch": 0.3972970956482111, "grad_norm": 0.6640625, "learning_rate": 2.0689324978383652e-05, "loss": 2.1144, "step": 12314 }, { "epoch": 0.39732935950200743, "grad_norm": 0.59765625, "learning_rate": 2.06878688048232e-05, "loss": 1.9883, "step": 12315 }, { "epoch": 0.3973616233558038, "grad_norm": 0.59765625, "learning_rate": 2.0686412568654775e-05, "loss": 1.9925, "step": 12316 }, { "epoch": 0.3973938872096001, "grad_norm": 0.5546875, "learning_rate": 2.06849562698944e-05, "loss": 1.965, "step": 12317 }, { "epoch": 0.39742615106339646, "grad_norm": 0.546875, "learning_rate": 2.0683499908558104e-05, "loss": 1.9819, "step": 12318 }, { "epoch": 0.3974584149171928, "grad_norm": 0.5078125, "learning_rate": 2.0682043484661916e-05, "loss": 1.9865, "step": 12319 }, { "epoch": 0.39749067877098915, "grad_norm": 0.494140625, "learning_rate": 2.0680586998221872e-05, "loss": 1.9809, "step": 12320 }, { "epoch": 0.3975229426247855, "grad_norm": 0.498046875, "learning_rate": 2.0679130449254e-05, "loss": 1.9542, "step": 12321 }, { "epoch": 0.39755520647858184, "grad_norm": 0.5, "learning_rate": 2.0677673837774335e-05, "loss": 2.001, "step": 12322 }, { "epoch": 0.3975874703323782, "grad_norm": 0.494140625, "learning_rate": 2.067621716379891e-05, "loss": 1.9858, "step": 12323 }, { "epoch": 0.3976197341861745, "grad_norm": 0.44140625, "learning_rate": 2.067476042734376e-05, "loss": 1.9758, "step": 12324 }, { "epoch": 0.39765199803997087, "grad_norm": 0.47265625, "learning_rate": 2.0673303628424917e-05, "loss": 2.0019, "step": 12325 }, { "epoch": 0.3976842618937672, "grad_norm": 0.384765625, "learning_rate": 2.0671846767058414e-05, "loss": 1.9874, "step": 12326 }, { "epoch": 0.39771652574756355, "grad_norm": 0.421875, "learning_rate": 2.0670389843260294e-05, "loss": 1.9861, "step": 12327 }, { "epoch": 0.3977487896013599, "grad_norm": 0.4375, "learning_rate": 2.0668932857046593e-05, "loss": 1.9723, "step": 12328 }, { "epoch": 0.39778105345515624, "grad_norm": 0.384765625, "learning_rate": 2.0667475808433344e-05, "loss": 1.9834, "step": 12329 }, { "epoch": 0.39781331730895264, "grad_norm": 0.439453125, "learning_rate": 2.0666018697436586e-05, "loss": 1.9937, "step": 12330 }, { "epoch": 0.397845581162749, "grad_norm": 0.388671875, "learning_rate": 2.0664561524072358e-05, "loss": 2.0204, "step": 12331 }, { "epoch": 0.39787784501654533, "grad_norm": 0.412109375, "learning_rate": 2.0663104288356703e-05, "loss": 1.9922, "step": 12332 }, { "epoch": 0.39791010887034167, "grad_norm": 0.3828125, "learning_rate": 2.0661646990305658e-05, "loss": 1.9603, "step": 12333 }, { "epoch": 0.397942372724138, "grad_norm": 0.37890625, "learning_rate": 2.0660189629935265e-05, "loss": 1.9922, "step": 12334 }, { "epoch": 0.39797463657793436, "grad_norm": 0.380859375, "learning_rate": 2.0658732207261563e-05, "loss": 1.9808, "step": 12335 }, { "epoch": 0.3980069004317307, "grad_norm": 0.38671875, "learning_rate": 2.06572747223006e-05, "loss": 1.9949, "step": 12336 }, { "epoch": 0.39803916428552705, "grad_norm": 0.357421875, "learning_rate": 2.065581717506841e-05, "loss": 1.9882, "step": 12337 }, { "epoch": 0.3980714281393234, "grad_norm": 0.392578125, "learning_rate": 2.0654359565581047e-05, "loss": 1.9957, "step": 12338 }, { "epoch": 0.39810369199311973, "grad_norm": 0.380859375, "learning_rate": 2.065290189385455e-05, "loss": 1.9756, "step": 12339 }, { "epoch": 0.3981359558469161, "grad_norm": 0.373046875, "learning_rate": 2.065144415990496e-05, "loss": 1.9848, "step": 12340 }, { "epoch": 0.3981682197007124, "grad_norm": 0.37890625, "learning_rate": 2.064998636374833e-05, "loss": 1.9704, "step": 12341 }, { "epoch": 0.39820048355450877, "grad_norm": 0.361328125, "learning_rate": 2.0648528505400704e-05, "loss": 1.9501, "step": 12342 }, { "epoch": 0.3982327474083051, "grad_norm": 0.37890625, "learning_rate": 2.0647070584878126e-05, "loss": 1.9814, "step": 12343 }, { "epoch": 0.39826501126210145, "grad_norm": 0.392578125, "learning_rate": 2.0645612602196648e-05, "loss": 1.9966, "step": 12344 }, { "epoch": 0.3982972751158978, "grad_norm": 0.361328125, "learning_rate": 2.064415455737232e-05, "loss": 2.0139, "step": 12345 }, { "epoch": 0.39832953896969414, "grad_norm": 0.376953125, "learning_rate": 2.064269645042118e-05, "loss": 1.9589, "step": 12346 }, { "epoch": 0.3983618028234905, "grad_norm": 0.359375, "learning_rate": 2.0641238281359292e-05, "loss": 2.0012, "step": 12347 }, { "epoch": 0.39839406667728683, "grad_norm": 0.357421875, "learning_rate": 2.06397800502027e-05, "loss": 1.9721, "step": 12348 }, { "epoch": 0.3984263305310832, "grad_norm": 0.373046875, "learning_rate": 2.0638321756967452e-05, "loss": 1.9912, "step": 12349 }, { "epoch": 0.39845859438487957, "grad_norm": 0.36328125, "learning_rate": 2.06368634016696e-05, "loss": 1.9819, "step": 12350 }, { "epoch": 0.3984908582386759, "grad_norm": 0.3515625, "learning_rate": 2.0635404984325207e-05, "loss": 1.9992, "step": 12351 }, { "epoch": 0.39852312209247226, "grad_norm": 0.361328125, "learning_rate": 2.0633946504950318e-05, "loss": 2.0122, "step": 12352 }, { "epoch": 0.3985553859462686, "grad_norm": 0.34375, "learning_rate": 2.0632487963560985e-05, "loss": 1.9931, "step": 12353 }, { "epoch": 0.39858764980006495, "grad_norm": 0.353515625, "learning_rate": 2.0631029360173258e-05, "loss": 1.9873, "step": 12354 }, { "epoch": 0.3986199136538613, "grad_norm": 0.349609375, "learning_rate": 2.062957069480321e-05, "loss": 1.9627, "step": 12355 }, { "epoch": 0.39865217750765763, "grad_norm": 0.34765625, "learning_rate": 2.0628111967466884e-05, "loss": 1.9761, "step": 12356 }, { "epoch": 0.398684441361454, "grad_norm": 0.365234375, "learning_rate": 2.0626653178180338e-05, "loss": 1.9792, "step": 12357 }, { "epoch": 0.3987167052152503, "grad_norm": 0.337890625, "learning_rate": 2.062519432695963e-05, "loss": 1.9186, "step": 12358 }, { "epoch": 0.39874896906904667, "grad_norm": 0.349609375, "learning_rate": 2.0623735413820822e-05, "loss": 1.9705, "step": 12359 }, { "epoch": 0.398781232922843, "grad_norm": 0.34765625, "learning_rate": 2.0622276438779965e-05, "loss": 2.0063, "step": 12360 }, { "epoch": 0.39881349677663935, "grad_norm": 0.359375, "learning_rate": 2.0620817401853122e-05, "loss": 1.9844, "step": 12361 }, { "epoch": 0.3988457606304357, "grad_norm": 0.35546875, "learning_rate": 2.0619358303056357e-05, "loss": 1.9738, "step": 12362 }, { "epoch": 0.39887802448423204, "grad_norm": 0.361328125, "learning_rate": 2.0617899142405725e-05, "loss": 2.0016, "step": 12363 }, { "epoch": 0.3989102883380284, "grad_norm": 0.345703125, "learning_rate": 2.0616439919917284e-05, "loss": 1.9893, "step": 12364 }, { "epoch": 0.39894255219182473, "grad_norm": 0.357421875, "learning_rate": 2.0614980635607114e-05, "loss": 1.9452, "step": 12365 }, { "epoch": 0.3989748160456211, "grad_norm": 0.365234375, "learning_rate": 2.0613521289491256e-05, "loss": 1.9969, "step": 12366 }, { "epoch": 0.3990070798994174, "grad_norm": 0.33984375, "learning_rate": 2.0612061881585785e-05, "loss": 1.9788, "step": 12367 }, { "epoch": 0.39903934375321376, "grad_norm": 0.357421875, "learning_rate": 2.061060241190676e-05, "loss": 2.0027, "step": 12368 }, { "epoch": 0.3990716076070101, "grad_norm": 0.35546875, "learning_rate": 2.060914288047025e-05, "loss": 1.9627, "step": 12369 }, { "epoch": 0.3991038714608065, "grad_norm": 0.35546875, "learning_rate": 2.0607683287292325e-05, "loss": 2.0036, "step": 12370 }, { "epoch": 0.39913613531460285, "grad_norm": 0.353515625, "learning_rate": 2.0606223632389037e-05, "loss": 2.0079, "step": 12371 }, { "epoch": 0.3991683991683992, "grad_norm": 0.359375, "learning_rate": 2.0604763915776466e-05, "loss": 1.987, "step": 12372 }, { "epoch": 0.39920066302219553, "grad_norm": 0.357421875, "learning_rate": 2.0603304137470677e-05, "loss": 1.9835, "step": 12373 }, { "epoch": 0.3992329268759919, "grad_norm": 0.34375, "learning_rate": 2.0601844297487732e-05, "loss": 1.9827, "step": 12374 }, { "epoch": 0.3992651907297882, "grad_norm": 0.3671875, "learning_rate": 2.0600384395843704e-05, "loss": 1.9664, "step": 12375 }, { "epoch": 0.39929745458358457, "grad_norm": 0.345703125, "learning_rate": 2.0598924432554663e-05, "loss": 1.9941, "step": 12376 }, { "epoch": 0.3993297184373809, "grad_norm": 0.34375, "learning_rate": 2.0597464407636678e-05, "loss": 2.0126, "step": 12377 }, { "epoch": 0.39936198229117725, "grad_norm": 0.349609375, "learning_rate": 2.0596004321105818e-05, "loss": 1.9912, "step": 12378 }, { "epoch": 0.3993942461449736, "grad_norm": 0.34375, "learning_rate": 2.0594544172978163e-05, "loss": 1.9621, "step": 12379 }, { "epoch": 0.39942650999876994, "grad_norm": 0.3828125, "learning_rate": 2.0593083963269776e-05, "loss": 1.9839, "step": 12380 }, { "epoch": 0.3994587738525663, "grad_norm": 0.423828125, "learning_rate": 2.059162369199673e-05, "loss": 1.9861, "step": 12381 }, { "epoch": 0.39949103770636263, "grad_norm": 0.353515625, "learning_rate": 2.0590163359175106e-05, "loss": 1.9938, "step": 12382 }, { "epoch": 0.39952330156015897, "grad_norm": 0.37890625, "learning_rate": 2.058870296482097e-05, "loss": 1.97, "step": 12383 }, { "epoch": 0.3995555654139553, "grad_norm": 0.380859375, "learning_rate": 2.058724250895041e-05, "loss": 1.9674, "step": 12384 }, { "epoch": 0.39958782926775166, "grad_norm": 0.35546875, "learning_rate": 2.0585781991579482e-05, "loss": 1.9739, "step": 12385 }, { "epoch": 0.399620093121548, "grad_norm": 0.3828125, "learning_rate": 2.0584321412724277e-05, "loss": 2.0021, "step": 12386 }, { "epoch": 0.39965235697534435, "grad_norm": 0.388671875, "learning_rate": 2.058286077240087e-05, "loss": 1.9844, "step": 12387 }, { "epoch": 0.3996846208291407, "grad_norm": 0.470703125, "learning_rate": 2.0581400070625335e-05, "loss": 1.9891, "step": 12388 }, { "epoch": 0.39971688468293703, "grad_norm": 0.37109375, "learning_rate": 2.0579939307413755e-05, "loss": 1.9471, "step": 12389 }, { "epoch": 0.3997491485367334, "grad_norm": 0.373046875, "learning_rate": 2.0578478482782206e-05, "loss": 1.9906, "step": 12390 }, { "epoch": 0.3997814123905298, "grad_norm": 0.357421875, "learning_rate": 2.0577017596746762e-05, "loss": 1.9742, "step": 12391 }, { "epoch": 0.3998136762443261, "grad_norm": 0.357421875, "learning_rate": 2.057555664932351e-05, "loss": 1.9877, "step": 12392 }, { "epoch": 0.39984594009812247, "grad_norm": 0.37890625, "learning_rate": 2.0574095640528537e-05, "loss": 1.9606, "step": 12393 }, { "epoch": 0.3998782039519188, "grad_norm": 0.359375, "learning_rate": 2.0572634570377916e-05, "loss": 1.9657, "step": 12394 }, { "epoch": 0.39991046780571515, "grad_norm": 0.390625, "learning_rate": 2.0571173438887726e-05, "loss": 1.9957, "step": 12395 }, { "epoch": 0.3999427316595115, "grad_norm": 0.353515625, "learning_rate": 2.0569712246074062e-05, "loss": 2.0044, "step": 12396 }, { "epoch": 0.39997499551330784, "grad_norm": 0.359375, "learning_rate": 2.0568250991952994e-05, "loss": 1.9592, "step": 12397 }, { "epoch": 0.4000072593671042, "grad_norm": 0.380859375, "learning_rate": 2.056678967654062e-05, "loss": 2.0027, "step": 12398 }, { "epoch": 0.40003952322090053, "grad_norm": 0.353515625, "learning_rate": 2.056532829985302e-05, "loss": 1.9548, "step": 12399 }, { "epoch": 0.40007178707469687, "grad_norm": 0.373046875, "learning_rate": 2.0563866861906275e-05, "loss": 2.0046, "step": 12400 }, { "epoch": 0.4001040509284932, "grad_norm": 0.37890625, "learning_rate": 2.0562405362716474e-05, "loss": 1.9857, "step": 12401 }, { "epoch": 0.40013631478228956, "grad_norm": 0.3515625, "learning_rate": 2.0560943802299707e-05, "loss": 1.9697, "step": 12402 }, { "epoch": 0.4001685786360859, "grad_norm": 0.369140625, "learning_rate": 2.0559482180672063e-05, "loss": 2.0243, "step": 12403 }, { "epoch": 0.40020084248988225, "grad_norm": 0.369140625, "learning_rate": 2.0558020497849625e-05, "loss": 1.9958, "step": 12404 }, { "epoch": 0.4002331063436786, "grad_norm": 0.359375, "learning_rate": 2.0556558753848482e-05, "loss": 1.9775, "step": 12405 }, { "epoch": 0.40026537019747493, "grad_norm": 0.359375, "learning_rate": 2.055509694868473e-05, "loss": 1.9628, "step": 12406 }, { "epoch": 0.4002976340512713, "grad_norm": 0.3828125, "learning_rate": 2.0553635082374454e-05, "loss": 1.9977, "step": 12407 }, { "epoch": 0.4003298979050676, "grad_norm": 0.37109375, "learning_rate": 2.0552173154933746e-05, "loss": 1.9919, "step": 12408 }, { "epoch": 0.40036216175886397, "grad_norm": 0.3671875, "learning_rate": 2.05507111663787e-05, "loss": 1.9981, "step": 12409 }, { "epoch": 0.4003944256126603, "grad_norm": 0.36328125, "learning_rate": 2.054924911672541e-05, "loss": 1.9776, "step": 12410 }, { "epoch": 0.4004266894664567, "grad_norm": 0.3671875, "learning_rate": 2.0547787005989967e-05, "loss": 1.9777, "step": 12411 }, { "epoch": 0.40045895332025305, "grad_norm": 0.357421875, "learning_rate": 2.054632483418846e-05, "loss": 1.9492, "step": 12412 }, { "epoch": 0.4004912171740494, "grad_norm": 0.3515625, "learning_rate": 2.0544862601336994e-05, "loss": 1.9954, "step": 12413 }, { "epoch": 0.40052348102784574, "grad_norm": 0.357421875, "learning_rate": 2.0543400307451652e-05, "loss": 1.9671, "step": 12414 }, { "epoch": 0.4005557448816421, "grad_norm": 0.349609375, "learning_rate": 2.054193795254854e-05, "loss": 1.988, "step": 12415 }, { "epoch": 0.40058800873543843, "grad_norm": 0.349609375, "learning_rate": 2.0540475536643754e-05, "loss": 1.9982, "step": 12416 }, { "epoch": 0.40062027258923477, "grad_norm": 0.353515625, "learning_rate": 2.0539013059753384e-05, "loss": 1.9974, "step": 12417 }, { "epoch": 0.4006525364430311, "grad_norm": 0.345703125, "learning_rate": 2.0537550521893528e-05, "loss": 1.992, "step": 12418 }, { "epoch": 0.40068480029682746, "grad_norm": 0.361328125, "learning_rate": 2.0536087923080293e-05, "loss": 1.9505, "step": 12419 }, { "epoch": 0.4007170641506238, "grad_norm": 0.349609375, "learning_rate": 2.0534625263329773e-05, "loss": 1.9857, "step": 12420 }, { "epoch": 0.40074932800442015, "grad_norm": 0.349609375, "learning_rate": 2.053316254265807e-05, "loss": 1.9647, "step": 12421 }, { "epoch": 0.4007815918582165, "grad_norm": 0.361328125, "learning_rate": 2.0531699761081284e-05, "loss": 2.0014, "step": 12422 }, { "epoch": 0.40081385571201283, "grad_norm": 0.357421875, "learning_rate": 2.053023691861551e-05, "loss": 1.9844, "step": 12423 }, { "epoch": 0.4008461195658092, "grad_norm": 0.359375, "learning_rate": 2.0528774015276864e-05, "loss": 1.9699, "step": 12424 }, { "epoch": 0.4008783834196055, "grad_norm": 0.36328125, "learning_rate": 2.052731105108143e-05, "loss": 1.9604, "step": 12425 }, { "epoch": 0.40091064727340187, "grad_norm": 0.359375, "learning_rate": 2.052584802604533e-05, "loss": 1.9829, "step": 12426 }, { "epoch": 0.4009429111271982, "grad_norm": 0.357421875, "learning_rate": 2.0524384940184653e-05, "loss": 1.9959, "step": 12427 }, { "epoch": 0.40097517498099455, "grad_norm": 0.361328125, "learning_rate": 2.0522921793515512e-05, "loss": 1.9789, "step": 12428 }, { "epoch": 0.4010074388347909, "grad_norm": 0.37890625, "learning_rate": 2.0521458586054014e-05, "loss": 1.9652, "step": 12429 }, { "epoch": 0.40103970268858724, "grad_norm": 0.390625, "learning_rate": 2.0519995317816255e-05, "loss": 1.999, "step": 12430 }, { "epoch": 0.40107196654238364, "grad_norm": 0.361328125, "learning_rate": 2.051853198881835e-05, "loss": 2.002, "step": 12431 }, { "epoch": 0.40110423039618, "grad_norm": 0.35546875, "learning_rate": 2.0517068599076405e-05, "loss": 1.9993, "step": 12432 }, { "epoch": 0.4011364942499763, "grad_norm": 0.333984375, "learning_rate": 2.0515605148606524e-05, "loss": 1.9852, "step": 12433 }, { "epoch": 0.40116875810377267, "grad_norm": 0.384765625, "learning_rate": 2.051414163742482e-05, "loss": 1.9594, "step": 12434 }, { "epoch": 0.401201021957569, "grad_norm": 0.353515625, "learning_rate": 2.05126780655474e-05, "loss": 1.9644, "step": 12435 }, { "epoch": 0.40123328581136536, "grad_norm": 0.375, "learning_rate": 2.0511214432990374e-05, "loss": 1.9733, "step": 12436 }, { "epoch": 0.4012655496651617, "grad_norm": 0.361328125, "learning_rate": 2.0509750739769853e-05, "loss": 1.9944, "step": 12437 }, { "epoch": 0.40129781351895805, "grad_norm": 0.361328125, "learning_rate": 2.0508286985901952e-05, "loss": 1.987, "step": 12438 }, { "epoch": 0.4013300773727544, "grad_norm": 0.37109375, "learning_rate": 2.0506823171402776e-05, "loss": 1.9537, "step": 12439 }, { "epoch": 0.40136234122655073, "grad_norm": 0.365234375, "learning_rate": 2.0505359296288442e-05, "loss": 1.9877, "step": 12440 }, { "epoch": 0.4013946050803471, "grad_norm": 0.3828125, "learning_rate": 2.0503895360575063e-05, "loss": 2.0016, "step": 12441 }, { "epoch": 0.4014268689341434, "grad_norm": 0.392578125, "learning_rate": 2.050243136427875e-05, "loss": 1.9506, "step": 12442 }, { "epoch": 0.40145913278793977, "grad_norm": 0.365234375, "learning_rate": 2.050096730741562e-05, "loss": 1.9897, "step": 12443 }, { "epoch": 0.4014913966417361, "grad_norm": 0.36328125, "learning_rate": 2.0499503190001793e-05, "loss": 2.008, "step": 12444 }, { "epoch": 0.40152366049553245, "grad_norm": 0.35546875, "learning_rate": 2.049803901205338e-05, "loss": 1.9866, "step": 12445 }, { "epoch": 0.4015559243493288, "grad_norm": 0.357421875, "learning_rate": 2.04965747735865e-05, "loss": 2.0025, "step": 12446 }, { "epoch": 0.40158818820312514, "grad_norm": 0.3515625, "learning_rate": 2.049511047461726e-05, "loss": 1.9725, "step": 12447 }, { "epoch": 0.4016204520569215, "grad_norm": 0.37109375, "learning_rate": 2.049364611516179e-05, "loss": 1.9706, "step": 12448 }, { "epoch": 0.40165271591071783, "grad_norm": 0.373046875, "learning_rate": 2.0492181695236205e-05, "loss": 1.9656, "step": 12449 }, { "epoch": 0.40168497976451417, "grad_norm": 0.39453125, "learning_rate": 2.049071721485662e-05, "loss": 2.0401, "step": 12450 }, { "epoch": 0.40171724361831057, "grad_norm": 0.453125, "learning_rate": 2.0489252674039162e-05, "loss": 2.1306, "step": 12451 }, { "epoch": 0.4017495074721069, "grad_norm": 0.4296875, "learning_rate": 2.048778807279995e-05, "loss": 2.1008, "step": 12452 }, { "epoch": 0.40178177132590326, "grad_norm": 0.40234375, "learning_rate": 2.0486323411155102e-05, "loss": 2.1324, "step": 12453 }, { "epoch": 0.4018140351796996, "grad_norm": 0.447265625, "learning_rate": 2.048485868912074e-05, "loss": 2.1295, "step": 12454 }, { "epoch": 0.40184629903349595, "grad_norm": 0.40234375, "learning_rate": 2.0483393906712993e-05, "loss": 2.1427, "step": 12455 }, { "epoch": 0.4018785628872923, "grad_norm": 0.400390625, "learning_rate": 2.0481929063947976e-05, "loss": 2.1248, "step": 12456 }, { "epoch": 0.40191082674108863, "grad_norm": 0.431640625, "learning_rate": 2.048046416084182e-05, "loss": 2.1114, "step": 12457 }, { "epoch": 0.401943090594885, "grad_norm": 0.392578125, "learning_rate": 2.0478999197410646e-05, "loss": 2.1329, "step": 12458 }, { "epoch": 0.4019753544486813, "grad_norm": 0.41015625, "learning_rate": 2.0477534173670575e-05, "loss": 2.1062, "step": 12459 }, { "epoch": 0.40200761830247767, "grad_norm": 0.447265625, "learning_rate": 2.047606908963774e-05, "loss": 2.1249, "step": 12460 }, { "epoch": 0.402039882156274, "grad_norm": 0.359375, "learning_rate": 2.0474603945328265e-05, "loss": 2.1021, "step": 12461 }, { "epoch": 0.40207214601007035, "grad_norm": 0.416015625, "learning_rate": 2.0473138740758277e-05, "loss": 2.1002, "step": 12462 }, { "epoch": 0.4021044098638667, "grad_norm": 0.3671875, "learning_rate": 2.0471673475943905e-05, "loss": 2.1184, "step": 12463 }, { "epoch": 0.40213667371766304, "grad_norm": 0.41796875, "learning_rate": 2.0470208150901275e-05, "loss": 2.0842, "step": 12464 }, { "epoch": 0.4021689375714594, "grad_norm": 0.369140625, "learning_rate": 2.046874276564652e-05, "loss": 2.1072, "step": 12465 }, { "epoch": 0.4022012014252557, "grad_norm": 0.416015625, "learning_rate": 2.0467277320195768e-05, "loss": 2.1177, "step": 12466 }, { "epoch": 0.40223346527905207, "grad_norm": 0.375, "learning_rate": 2.0465811814565148e-05, "loss": 2.1275, "step": 12467 }, { "epoch": 0.4022657291328484, "grad_norm": 0.37109375, "learning_rate": 2.046434624877079e-05, "loss": 2.098, "step": 12468 }, { "epoch": 0.40229799298664476, "grad_norm": 0.361328125, "learning_rate": 2.0462880622828834e-05, "loss": 2.1222, "step": 12469 }, { "epoch": 0.4023302568404411, "grad_norm": 0.375, "learning_rate": 2.0461414936755405e-05, "loss": 2.1297, "step": 12470 }, { "epoch": 0.40236252069423745, "grad_norm": 0.369140625, "learning_rate": 2.045994919056664e-05, "loss": 2.1009, "step": 12471 }, { "epoch": 0.40239478454803385, "grad_norm": 0.3671875, "learning_rate": 2.045848338427867e-05, "loss": 2.1332, "step": 12472 }, { "epoch": 0.4024270484018302, "grad_norm": 0.36328125, "learning_rate": 2.0457017517907627e-05, "loss": 2.1291, "step": 12473 }, { "epoch": 0.40245931225562653, "grad_norm": 0.361328125, "learning_rate": 2.0455551591469653e-05, "loss": 2.0923, "step": 12474 }, { "epoch": 0.4024915761094229, "grad_norm": 0.38671875, "learning_rate": 2.0454085604980882e-05, "loss": 2.0762, "step": 12475 }, { "epoch": 0.4025238399632192, "grad_norm": 0.36328125, "learning_rate": 2.0452619558457448e-05, "loss": 2.12, "step": 12476 }, { "epoch": 0.40255610381701556, "grad_norm": 0.37890625, "learning_rate": 2.0451153451915492e-05, "loss": 2.1079, "step": 12477 }, { "epoch": 0.4025883676708119, "grad_norm": 0.39453125, "learning_rate": 2.044968728537115e-05, "loss": 2.1212, "step": 12478 }, { "epoch": 0.40262063152460825, "grad_norm": 0.380859375, "learning_rate": 2.0448221058840552e-05, "loss": 2.1206, "step": 12479 }, { "epoch": 0.4026528953784046, "grad_norm": 0.3671875, "learning_rate": 2.0446754772339854e-05, "loss": 2.0838, "step": 12480 }, { "epoch": 0.40268515923220094, "grad_norm": 0.369140625, "learning_rate": 2.0445288425885184e-05, "loss": 2.1117, "step": 12481 }, { "epoch": 0.4027174230859973, "grad_norm": 0.349609375, "learning_rate": 2.044382201949268e-05, "loss": 2.0715, "step": 12482 }, { "epoch": 0.4027496869397936, "grad_norm": 0.40625, "learning_rate": 2.04423555531785e-05, "loss": 2.1169, "step": 12483 }, { "epoch": 0.40278195079358997, "grad_norm": 0.3359375, "learning_rate": 2.0440889026958764e-05, "loss": 2.1064, "step": 12484 }, { "epoch": 0.4028142146473863, "grad_norm": 0.3984375, "learning_rate": 2.043942244084963e-05, "loss": 2.0843, "step": 12485 }, { "epoch": 0.40284647850118266, "grad_norm": 0.35546875, "learning_rate": 2.0437955794867237e-05, "loss": 2.1174, "step": 12486 }, { "epoch": 0.402878742354979, "grad_norm": 0.380859375, "learning_rate": 2.0436489089027726e-05, "loss": 2.1015, "step": 12487 }, { "epoch": 0.40291100620877535, "grad_norm": 0.3828125, "learning_rate": 2.043502232334724e-05, "loss": 2.1061, "step": 12488 }, { "epoch": 0.4029432700625717, "grad_norm": 0.3828125, "learning_rate": 2.0433555497841935e-05, "loss": 2.1051, "step": 12489 }, { "epoch": 0.40297553391636803, "grad_norm": 0.3828125, "learning_rate": 2.043208861252794e-05, "loss": 2.12, "step": 12490 }, { "epoch": 0.4030077977701644, "grad_norm": 0.443359375, "learning_rate": 2.0430621667421418e-05, "loss": 2.1155, "step": 12491 }, { "epoch": 0.4030400616239608, "grad_norm": 0.3671875, "learning_rate": 2.0429154662538504e-05, "loss": 2.135, "step": 12492 }, { "epoch": 0.4030723254777571, "grad_norm": 0.361328125, "learning_rate": 2.0427687597895352e-05, "loss": 2.1117, "step": 12493 }, { "epoch": 0.40310458933155346, "grad_norm": 0.37109375, "learning_rate": 2.042622047350811e-05, "loss": 2.1007, "step": 12494 }, { "epoch": 0.4031368531853498, "grad_norm": 0.359375, "learning_rate": 2.0424753289392922e-05, "loss": 2.1229, "step": 12495 }, { "epoch": 0.40316911703914615, "grad_norm": 0.369140625, "learning_rate": 2.0423286045565944e-05, "loss": 2.1113, "step": 12496 }, { "epoch": 0.4032013808929425, "grad_norm": 0.376953125, "learning_rate": 2.0421818742043326e-05, "loss": 2.1283, "step": 12497 }, { "epoch": 0.40323364474673884, "grad_norm": 0.412109375, "learning_rate": 2.0420351378841213e-05, "loss": 2.0936, "step": 12498 }, { "epoch": 0.4032659086005352, "grad_norm": 0.466796875, "learning_rate": 2.041888395597576e-05, "loss": 2.1437, "step": 12499 }, { "epoch": 0.4032981724543315, "grad_norm": 0.458984375, "learning_rate": 2.0417416473463122e-05, "loss": 2.1492, "step": 12500 }, { "epoch": 0.40333043630812787, "grad_norm": 0.51953125, "learning_rate": 2.041594893131945e-05, "loss": 2.1454, "step": 12501 }, { "epoch": 0.4033627001619242, "grad_norm": 0.419921875, "learning_rate": 2.04144813295609e-05, "loss": 2.141, "step": 12502 }, { "epoch": 0.40339496401572056, "grad_norm": 0.4375, "learning_rate": 2.041301366820362e-05, "loss": 2.1549, "step": 12503 }, { "epoch": 0.4034272278695169, "grad_norm": 0.439453125, "learning_rate": 2.0411545947263768e-05, "loss": 2.1335, "step": 12504 }, { "epoch": 0.40345949172331325, "grad_norm": 0.416015625, "learning_rate": 2.0410078166757504e-05, "loss": 2.1671, "step": 12505 }, { "epoch": 0.4034917555771096, "grad_norm": 0.4375, "learning_rate": 2.040861032670098e-05, "loss": 2.1506, "step": 12506 }, { "epoch": 0.40352401943090593, "grad_norm": 0.45703125, "learning_rate": 2.040714242711035e-05, "loss": 2.1337, "step": 12507 }, { "epoch": 0.4035562832847023, "grad_norm": 0.373046875, "learning_rate": 2.040567446800178e-05, "loss": 2.1564, "step": 12508 }, { "epoch": 0.4035885471384986, "grad_norm": 0.4140625, "learning_rate": 2.0404206449391423e-05, "loss": 2.1417, "step": 12509 }, { "epoch": 0.40362081099229496, "grad_norm": 0.376953125, "learning_rate": 2.0402738371295436e-05, "loss": 2.1488, "step": 12510 }, { "epoch": 0.4036530748460913, "grad_norm": 0.396484375, "learning_rate": 2.0401270233729985e-05, "loss": 2.121, "step": 12511 }, { "epoch": 0.4036853386998877, "grad_norm": 0.408203125, "learning_rate": 2.039980203671122e-05, "loss": 2.1902, "step": 12512 }, { "epoch": 0.40371760255368405, "grad_norm": 0.4609375, "learning_rate": 2.039833378025531e-05, "loss": 2.1019, "step": 12513 }, { "epoch": 0.4037498664074804, "grad_norm": 0.396484375, "learning_rate": 2.039686546437842e-05, "loss": 2.1814, "step": 12514 }, { "epoch": 0.40378213026127674, "grad_norm": 0.416015625, "learning_rate": 2.0395397089096702e-05, "loss": 2.1545, "step": 12515 }, { "epoch": 0.4038143941150731, "grad_norm": 0.390625, "learning_rate": 2.039392865442632e-05, "loss": 2.1367, "step": 12516 }, { "epoch": 0.4038466579688694, "grad_norm": 0.39453125, "learning_rate": 2.0392460160383448e-05, "loss": 2.1238, "step": 12517 }, { "epoch": 0.40387892182266577, "grad_norm": 0.3984375, "learning_rate": 2.039099160698424e-05, "loss": 2.068, "step": 12518 }, { "epoch": 0.4039111856764621, "grad_norm": 0.384765625, "learning_rate": 2.0389522994244856e-05, "loss": 2.167, "step": 12519 }, { "epoch": 0.40394344953025846, "grad_norm": 0.40234375, "learning_rate": 2.0388054322181483e-05, "loss": 2.1526, "step": 12520 }, { "epoch": 0.4039757133840548, "grad_norm": 0.384765625, "learning_rate": 2.0386585590810266e-05, "loss": 2.1441, "step": 12521 }, { "epoch": 0.40400797723785115, "grad_norm": 0.404296875, "learning_rate": 2.038511680014738e-05, "loss": 2.0992, "step": 12522 }, { "epoch": 0.4040402410916475, "grad_norm": 0.404296875, "learning_rate": 2.0383647950208995e-05, "loss": 2.1442, "step": 12523 }, { "epoch": 0.40407250494544383, "grad_norm": 0.361328125, "learning_rate": 2.038217904101127e-05, "loss": 2.0893, "step": 12524 }, { "epoch": 0.4041047687992402, "grad_norm": 0.4140625, "learning_rate": 2.0380710072570385e-05, "loss": 2.1261, "step": 12525 }, { "epoch": 0.4041370326530365, "grad_norm": 0.392578125, "learning_rate": 2.03792410449025e-05, "loss": 2.1596, "step": 12526 }, { "epoch": 0.40416929650683286, "grad_norm": 0.376953125, "learning_rate": 2.0377771958023785e-05, "loss": 2.1106, "step": 12527 }, { "epoch": 0.4042015603606292, "grad_norm": 0.369140625, "learning_rate": 2.0376302811950424e-05, "loss": 2.0877, "step": 12528 }, { "epoch": 0.40423382421442555, "grad_norm": 0.38671875, "learning_rate": 2.037483360669857e-05, "loss": 2.0982, "step": 12529 }, { "epoch": 0.4042660880682219, "grad_norm": 0.40625, "learning_rate": 2.0373364342284403e-05, "loss": 2.0845, "step": 12530 }, { "epoch": 0.40429835192201824, "grad_norm": 0.404296875, "learning_rate": 2.0371895018724104e-05, "loss": 1.9996, "step": 12531 }, { "epoch": 0.40433061577581464, "grad_norm": 0.400390625, "learning_rate": 2.0370425636033834e-05, "loss": 2.064, "step": 12532 }, { "epoch": 0.404362879629611, "grad_norm": 0.38671875, "learning_rate": 2.0368956194229768e-05, "loss": 2.0108, "step": 12533 }, { "epoch": 0.4043951434834073, "grad_norm": 0.42578125, "learning_rate": 2.0367486693328088e-05, "loss": 2.0435, "step": 12534 }, { "epoch": 0.40442740733720367, "grad_norm": 0.376953125, "learning_rate": 2.036601713334497e-05, "loss": 2.1158, "step": 12535 }, { "epoch": 0.404459671191, "grad_norm": 0.3984375, "learning_rate": 2.0364547514296576e-05, "loss": 2.0492, "step": 12536 }, { "epoch": 0.40449193504479636, "grad_norm": 0.400390625, "learning_rate": 2.0363077836199096e-05, "loss": 2.0448, "step": 12537 }, { "epoch": 0.4045241988985927, "grad_norm": 0.3828125, "learning_rate": 2.03616080990687e-05, "loss": 2.0617, "step": 12538 }, { "epoch": 0.40455646275238905, "grad_norm": 0.37890625, "learning_rate": 2.0360138302921573e-05, "loss": 2.0045, "step": 12539 }, { "epoch": 0.4045887266061854, "grad_norm": 0.39453125, "learning_rate": 2.0358668447773882e-05, "loss": 2.0273, "step": 12540 }, { "epoch": 0.40462099045998173, "grad_norm": 0.373046875, "learning_rate": 2.0357198533641816e-05, "loss": 2.0291, "step": 12541 }, { "epoch": 0.4046532543137781, "grad_norm": 0.37109375, "learning_rate": 2.0355728560541555e-05, "loss": 2.0205, "step": 12542 }, { "epoch": 0.4046855181675744, "grad_norm": 0.412109375, "learning_rate": 2.0354258528489275e-05, "loss": 2.1056, "step": 12543 }, { "epoch": 0.40471778202137076, "grad_norm": 0.369140625, "learning_rate": 2.0352788437501153e-05, "loss": 2.0604, "step": 12544 }, { "epoch": 0.4047500458751671, "grad_norm": 0.396484375, "learning_rate": 2.0351318287593383e-05, "loss": 2.0722, "step": 12545 }, { "epoch": 0.40478230972896345, "grad_norm": 0.490234375, "learning_rate": 2.034984807878214e-05, "loss": 2.1158, "step": 12546 }, { "epoch": 0.4048145735827598, "grad_norm": 0.412109375, "learning_rate": 2.03483778110836e-05, "loss": 2.1607, "step": 12547 }, { "epoch": 0.40484683743655614, "grad_norm": 0.41796875, "learning_rate": 2.0346907484513958e-05, "loss": 2.1299, "step": 12548 }, { "epoch": 0.4048791012903525, "grad_norm": 0.421875, "learning_rate": 2.03454370990894e-05, "loss": 2.1687, "step": 12549 }, { "epoch": 0.4049113651441488, "grad_norm": 0.42578125, "learning_rate": 2.0343966654826095e-05, "loss": 2.1499, "step": 12550 }, { "epoch": 0.40494362899794517, "grad_norm": 0.404296875, "learning_rate": 2.0342496151740243e-05, "loss": 2.1429, "step": 12551 }, { "epoch": 0.4049758928517415, "grad_norm": 0.427734375, "learning_rate": 2.034102558984803e-05, "loss": 2.168, "step": 12552 }, { "epoch": 0.4050081567055379, "grad_norm": 0.37109375, "learning_rate": 2.0339554969165634e-05, "loss": 2.129, "step": 12553 }, { "epoch": 0.40504042055933426, "grad_norm": 0.396484375, "learning_rate": 2.033808428970925e-05, "loss": 2.1539, "step": 12554 }, { "epoch": 0.4050726844131306, "grad_norm": 0.39453125, "learning_rate": 2.033661355149506e-05, "loss": 2.1121, "step": 12555 }, { "epoch": 0.40510494826692695, "grad_norm": 0.404296875, "learning_rate": 2.0335142754539263e-05, "loss": 2.1862, "step": 12556 }, { "epoch": 0.4051372121207233, "grad_norm": 0.392578125, "learning_rate": 2.0333671898858037e-05, "loss": 2.2208, "step": 12557 }, { "epoch": 0.40516947597451963, "grad_norm": 0.39453125, "learning_rate": 2.033220098446758e-05, "loss": 2.2006, "step": 12558 }, { "epoch": 0.405201739828316, "grad_norm": 0.375, "learning_rate": 2.033073001138408e-05, "loss": 2.1437, "step": 12559 }, { "epoch": 0.4052340036821123, "grad_norm": 0.388671875, "learning_rate": 2.0329258979623724e-05, "loss": 2.0974, "step": 12560 }, { "epoch": 0.40526626753590866, "grad_norm": 0.376953125, "learning_rate": 2.032778788920271e-05, "loss": 2.0316, "step": 12561 }, { "epoch": 0.405298531389705, "grad_norm": 0.37890625, "learning_rate": 2.0326316740137233e-05, "loss": 2.0778, "step": 12562 }, { "epoch": 0.40533079524350135, "grad_norm": 0.373046875, "learning_rate": 2.032484553244348e-05, "loss": 2.1368, "step": 12563 }, { "epoch": 0.4053630590972977, "grad_norm": 0.365234375, "learning_rate": 2.0323374266137642e-05, "loss": 2.1472, "step": 12564 }, { "epoch": 0.40539532295109404, "grad_norm": 0.369140625, "learning_rate": 2.0321902941235928e-05, "loss": 2.1137, "step": 12565 }, { "epoch": 0.4054275868048904, "grad_norm": 0.392578125, "learning_rate": 2.0320431557754524e-05, "loss": 2.1493, "step": 12566 }, { "epoch": 0.4054598506586867, "grad_norm": 0.361328125, "learning_rate": 2.031896011570962e-05, "loss": 2.1387, "step": 12567 }, { "epoch": 0.40549211451248307, "grad_norm": 0.3984375, "learning_rate": 2.031748861511742e-05, "loss": 2.0899, "step": 12568 }, { "epoch": 0.4055243783662794, "grad_norm": 0.421875, "learning_rate": 2.0316017055994122e-05, "loss": 2.1359, "step": 12569 }, { "epoch": 0.40555664222007576, "grad_norm": 0.37109375, "learning_rate": 2.031454543835592e-05, "loss": 2.1341, "step": 12570 }, { "epoch": 0.4055889060738721, "grad_norm": 0.396484375, "learning_rate": 2.0313073762219016e-05, "loss": 2.0117, "step": 12571 }, { "epoch": 0.40562116992766845, "grad_norm": 0.384765625, "learning_rate": 2.0311602027599606e-05, "loss": 2.0487, "step": 12572 }, { "epoch": 0.40565343378146484, "grad_norm": 0.404296875, "learning_rate": 2.0310130234513895e-05, "loss": 2.1294, "step": 12573 }, { "epoch": 0.4056856976352612, "grad_norm": 0.43359375, "learning_rate": 2.030865838297808e-05, "loss": 2.1015, "step": 12574 }, { "epoch": 0.40571796148905753, "grad_norm": 0.3984375, "learning_rate": 2.0307186473008357e-05, "loss": 2.0895, "step": 12575 }, { "epoch": 0.4057502253428539, "grad_norm": 0.416015625, "learning_rate": 2.0305714504620938e-05, "loss": 2.13, "step": 12576 }, { "epoch": 0.4057824891966502, "grad_norm": 0.408203125, "learning_rate": 2.030424247783202e-05, "loss": 2.1297, "step": 12577 }, { "epoch": 0.40581475305044656, "grad_norm": 0.4296875, "learning_rate": 2.0302770392657796e-05, "loss": 2.1191, "step": 12578 }, { "epoch": 0.4058470169042429, "grad_norm": 0.431640625, "learning_rate": 2.0301298249114493e-05, "loss": 2.1163, "step": 12579 }, { "epoch": 0.40587928075803925, "grad_norm": 0.39453125, "learning_rate": 2.02998260472183e-05, "loss": 2.0375, "step": 12580 }, { "epoch": 0.4059115446118356, "grad_norm": 0.41015625, "learning_rate": 2.0298353786985416e-05, "loss": 2.1321, "step": 12581 }, { "epoch": 0.40594380846563194, "grad_norm": 0.40234375, "learning_rate": 2.0296881468432056e-05, "loss": 2.1816, "step": 12582 }, { "epoch": 0.4059760723194283, "grad_norm": 0.39453125, "learning_rate": 2.029540909157443e-05, "loss": 2.0694, "step": 12583 }, { "epoch": 0.4060083361732246, "grad_norm": 0.373046875, "learning_rate": 2.0293936656428736e-05, "loss": 2.0722, "step": 12584 }, { "epoch": 0.40604060002702097, "grad_norm": 0.3984375, "learning_rate": 2.0292464163011183e-05, "loss": 2.0728, "step": 12585 }, { "epoch": 0.4060728638808173, "grad_norm": 0.359375, "learning_rate": 2.0290991611337984e-05, "loss": 2.0878, "step": 12586 }, { "epoch": 0.40610512773461366, "grad_norm": 0.396484375, "learning_rate": 2.028951900142535e-05, "loss": 2.0939, "step": 12587 }, { "epoch": 0.40613739158841, "grad_norm": 0.384765625, "learning_rate": 2.028804633328948e-05, "loss": 2.0661, "step": 12588 }, { "epoch": 0.40616965544220635, "grad_norm": 0.357421875, "learning_rate": 2.028657360694659e-05, "loss": 2.0813, "step": 12589 }, { "epoch": 0.4062019192960027, "grad_norm": 0.373046875, "learning_rate": 2.0285100822412894e-05, "loss": 2.1107, "step": 12590 }, { "epoch": 0.40623418314979903, "grad_norm": 0.369140625, "learning_rate": 2.0283627979704596e-05, "loss": 2.0915, "step": 12591 }, { "epoch": 0.4062664470035954, "grad_norm": 1.5625, "learning_rate": 2.028215507883791e-05, "loss": 2.1085, "step": 12592 }, { "epoch": 0.4062987108573918, "grad_norm": 0.71875, "learning_rate": 2.028068211982905e-05, "loss": 2.0944, "step": 12593 }, { "epoch": 0.4063309747111881, "grad_norm": 0.7734375, "learning_rate": 2.027920910269424e-05, "loss": 2.1216, "step": 12594 }, { "epoch": 0.40636323856498446, "grad_norm": 0.7265625, "learning_rate": 2.027773602744967e-05, "loss": 2.112, "step": 12595 }, { "epoch": 0.4063955024187808, "grad_norm": 0.7109375, "learning_rate": 2.0276262894111574e-05, "loss": 2.0819, "step": 12596 }, { "epoch": 0.40642776627257715, "grad_norm": 0.58984375, "learning_rate": 2.0274789702696166e-05, "loss": 2.078, "step": 12597 }, { "epoch": 0.4064600301263735, "grad_norm": 0.65234375, "learning_rate": 2.0273316453219647e-05, "loss": 2.1098, "step": 12598 }, { "epoch": 0.40649229398016984, "grad_norm": 0.53125, "learning_rate": 2.0271843145698248e-05, "loss": 2.089, "step": 12599 }, { "epoch": 0.4065245578339662, "grad_norm": 0.55078125, "learning_rate": 2.0270369780148183e-05, "loss": 2.0545, "step": 12600 }, { "epoch": 0.4065568216877625, "grad_norm": 0.50390625, "learning_rate": 2.026889635658567e-05, "loss": 2.0795, "step": 12601 }, { "epoch": 0.40658908554155887, "grad_norm": 0.515625, "learning_rate": 2.026742287502692e-05, "loss": 2.0574, "step": 12602 }, { "epoch": 0.4066213493953552, "grad_norm": 0.447265625, "learning_rate": 2.026594933548816e-05, "loss": 2.0522, "step": 12603 }, { "epoch": 0.40665361324915156, "grad_norm": 0.4609375, "learning_rate": 2.026447573798561e-05, "loss": 2.0714, "step": 12604 }, { "epoch": 0.4066858771029479, "grad_norm": 0.443359375, "learning_rate": 2.0263002082535488e-05, "loss": 2.0373, "step": 12605 }, { "epoch": 0.40671814095674425, "grad_norm": 0.41015625, "learning_rate": 2.0261528369154013e-05, "loss": 2.0687, "step": 12606 }, { "epoch": 0.4067504048105406, "grad_norm": 0.40234375, "learning_rate": 2.026005459785741e-05, "loss": 2.0156, "step": 12607 }, { "epoch": 0.40678266866433693, "grad_norm": 0.431640625, "learning_rate": 2.02585807686619e-05, "loss": 2.0896, "step": 12608 }, { "epoch": 0.4068149325181333, "grad_norm": 0.38671875, "learning_rate": 2.0257106881583698e-05, "loss": 2.0161, "step": 12609 }, { "epoch": 0.4068471963719296, "grad_norm": 0.408203125, "learning_rate": 2.0255632936639045e-05, "loss": 2.0753, "step": 12610 }, { "epoch": 0.40687946022572596, "grad_norm": 0.376953125, "learning_rate": 2.025415893384415e-05, "loss": 2.0433, "step": 12611 }, { "epoch": 0.4069117240795223, "grad_norm": 0.384765625, "learning_rate": 2.025268487321524e-05, "loss": 2.0524, "step": 12612 }, { "epoch": 0.4069439879333187, "grad_norm": 0.37109375, "learning_rate": 2.025121075476855e-05, "loss": 2.0894, "step": 12613 }, { "epoch": 0.40697625178711505, "grad_norm": 0.3828125, "learning_rate": 2.0249736578520297e-05, "loss": 2.0601, "step": 12614 }, { "epoch": 0.4070085156409114, "grad_norm": 0.37890625, "learning_rate": 2.0248262344486707e-05, "loss": 2.0693, "step": 12615 }, { "epoch": 0.40704077949470774, "grad_norm": 0.357421875, "learning_rate": 2.024678805268401e-05, "loss": 2.0528, "step": 12616 }, { "epoch": 0.4070730433485041, "grad_norm": 0.365234375, "learning_rate": 2.0245313703128436e-05, "loss": 2.0465, "step": 12617 }, { "epoch": 0.4071053072023004, "grad_norm": 0.361328125, "learning_rate": 2.0243839295836212e-05, "loss": 2.0522, "step": 12618 }, { "epoch": 0.40713757105609677, "grad_norm": 0.34765625, "learning_rate": 2.024236483082357e-05, "loss": 2.0407, "step": 12619 }, { "epoch": 0.4071698349098931, "grad_norm": 0.37109375, "learning_rate": 2.0240890308106727e-05, "loss": 2.0532, "step": 12620 }, { "epoch": 0.40720209876368946, "grad_norm": 0.353515625, "learning_rate": 2.023941572770193e-05, "loss": 2.056, "step": 12621 }, { "epoch": 0.4072343626174858, "grad_norm": 0.357421875, "learning_rate": 2.0237941089625407e-05, "loss": 2.0245, "step": 12622 }, { "epoch": 0.40726662647128214, "grad_norm": 0.36328125, "learning_rate": 2.023646639389338e-05, "loss": 2.0637, "step": 12623 }, { "epoch": 0.4072988903250785, "grad_norm": 0.36328125, "learning_rate": 2.023499164052209e-05, "loss": 2.0343, "step": 12624 }, { "epoch": 0.40733115417887483, "grad_norm": 0.39453125, "learning_rate": 2.0233516829527767e-05, "loss": 2.0691, "step": 12625 }, { "epoch": 0.4073634180326712, "grad_norm": 0.3671875, "learning_rate": 2.0232041960926645e-05, "loss": 2.0179, "step": 12626 }, { "epoch": 0.4073956818864675, "grad_norm": 0.369140625, "learning_rate": 2.023056703473496e-05, "loss": 1.9925, "step": 12627 }, { "epoch": 0.40742794574026386, "grad_norm": 0.353515625, "learning_rate": 2.0229092050968944e-05, "loss": 1.9973, "step": 12628 }, { "epoch": 0.4074602095940602, "grad_norm": 0.37109375, "learning_rate": 2.0227617009644835e-05, "loss": 1.9837, "step": 12629 }, { "epoch": 0.40749247344785655, "grad_norm": 0.353515625, "learning_rate": 2.0226141910778866e-05, "loss": 1.977, "step": 12630 }, { "epoch": 0.4075247373016529, "grad_norm": 0.357421875, "learning_rate": 2.022466675438728e-05, "loss": 2.0219, "step": 12631 }, { "epoch": 0.40755700115544924, "grad_norm": 0.361328125, "learning_rate": 2.022319154048631e-05, "loss": 2.0022, "step": 12632 }, { "epoch": 0.40758926500924564, "grad_norm": 0.349609375, "learning_rate": 2.022171626909219e-05, "loss": 2.0094, "step": 12633 }, { "epoch": 0.407621528863042, "grad_norm": 0.369140625, "learning_rate": 2.022024094022117e-05, "loss": 1.982, "step": 12634 }, { "epoch": 0.4076537927168383, "grad_norm": 0.337890625, "learning_rate": 2.0218765553889475e-05, "loss": 1.9867, "step": 12635 }, { "epoch": 0.40768605657063467, "grad_norm": 0.365234375, "learning_rate": 2.0217290110113358e-05, "loss": 1.9915, "step": 12636 }, { "epoch": 0.407718320424431, "grad_norm": 0.359375, "learning_rate": 2.0215814608909054e-05, "loss": 2.0029, "step": 12637 }, { "epoch": 0.40775058427822736, "grad_norm": 0.353515625, "learning_rate": 2.0214339050292805e-05, "loss": 1.9788, "step": 12638 }, { "epoch": 0.4077828481320237, "grad_norm": 0.345703125, "learning_rate": 2.0212863434280853e-05, "loss": 1.9904, "step": 12639 }, { "epoch": 0.40781511198582004, "grad_norm": 0.345703125, "learning_rate": 2.0211387760889434e-05, "loss": 1.9761, "step": 12640 }, { "epoch": 0.4078473758396164, "grad_norm": 0.349609375, "learning_rate": 2.0209912030134804e-05, "loss": 2.0045, "step": 12641 }, { "epoch": 0.40787963969341273, "grad_norm": 0.359375, "learning_rate": 2.0208436242033196e-05, "loss": 2.0126, "step": 12642 }, { "epoch": 0.4079119035472091, "grad_norm": 0.34765625, "learning_rate": 2.020696039660086e-05, "loss": 1.9942, "step": 12643 }, { "epoch": 0.4079441674010054, "grad_norm": 0.359375, "learning_rate": 2.020548449385404e-05, "loss": 2.0204, "step": 12644 }, { "epoch": 0.40797643125480176, "grad_norm": 0.380859375, "learning_rate": 2.020400853380898e-05, "loss": 1.995, "step": 12645 }, { "epoch": 0.4080086951085981, "grad_norm": 0.37109375, "learning_rate": 2.0202532516481927e-05, "loss": 1.9869, "step": 12646 }, { "epoch": 0.40804095896239445, "grad_norm": 0.349609375, "learning_rate": 2.0201056441889128e-05, "loss": 2.0059, "step": 12647 }, { "epoch": 0.4080732228161908, "grad_norm": 0.376953125, "learning_rate": 2.0199580310046833e-05, "loss": 1.9874, "step": 12648 }, { "epoch": 0.40810548666998714, "grad_norm": 0.380859375, "learning_rate": 2.0198104120971287e-05, "loss": 1.9845, "step": 12649 }, { "epoch": 0.4081377505237835, "grad_norm": 0.373046875, "learning_rate": 2.019662787467874e-05, "loss": 1.9832, "step": 12650 }, { "epoch": 0.4081700143775798, "grad_norm": 0.42578125, "learning_rate": 2.0195151571185445e-05, "loss": 2.0069, "step": 12651 }, { "epoch": 0.40820227823137617, "grad_norm": 0.357421875, "learning_rate": 2.019367521050764e-05, "loss": 2.0019, "step": 12652 }, { "epoch": 0.4082345420851725, "grad_norm": 0.373046875, "learning_rate": 2.0192198792661595e-05, "loss": 1.9662, "step": 12653 }, { "epoch": 0.4082668059389689, "grad_norm": 0.365234375, "learning_rate": 2.019072231766354e-05, "loss": 1.9996, "step": 12654 }, { "epoch": 0.40829906979276526, "grad_norm": 0.349609375, "learning_rate": 2.0189245785529742e-05, "loss": 1.9422, "step": 12655 }, { "epoch": 0.4083313336465616, "grad_norm": 0.359375, "learning_rate": 2.0187769196276452e-05, "loss": 1.9633, "step": 12656 }, { "epoch": 0.40836359750035794, "grad_norm": 0.3515625, "learning_rate": 2.0186292549919914e-05, "loss": 1.9747, "step": 12657 }, { "epoch": 0.4083958613541543, "grad_norm": 0.34765625, "learning_rate": 2.0184815846476394e-05, "loss": 1.9508, "step": 12658 }, { "epoch": 0.40842812520795063, "grad_norm": 0.361328125, "learning_rate": 2.018333908596214e-05, "loss": 1.9985, "step": 12659 }, { "epoch": 0.408460389061747, "grad_norm": 0.3515625, "learning_rate": 2.0181862268393404e-05, "loss": 1.9774, "step": 12660 }, { "epoch": 0.4084926529155433, "grad_norm": 0.35546875, "learning_rate": 2.0180385393786447e-05, "loss": 1.9779, "step": 12661 }, { "epoch": 0.40852491676933966, "grad_norm": 0.341796875, "learning_rate": 2.0178908462157527e-05, "loss": 1.9982, "step": 12662 }, { "epoch": 0.408557180623136, "grad_norm": 0.345703125, "learning_rate": 2.0177431473522892e-05, "loss": 1.9874, "step": 12663 }, { "epoch": 0.40858944447693235, "grad_norm": 0.337890625, "learning_rate": 2.017595442789881e-05, "loss": 1.9807, "step": 12664 }, { "epoch": 0.4086217083307287, "grad_norm": 0.345703125, "learning_rate": 2.0174477325301538e-05, "loss": 2.0005, "step": 12665 }, { "epoch": 0.40865397218452504, "grad_norm": 0.349609375, "learning_rate": 2.0173000165747327e-05, "loss": 2.0183, "step": 12666 }, { "epoch": 0.4086862360383214, "grad_norm": 0.34375, "learning_rate": 2.017152294925244e-05, "loss": 1.9624, "step": 12667 }, { "epoch": 0.4087184998921177, "grad_norm": 0.345703125, "learning_rate": 2.0170045675833146e-05, "loss": 1.9769, "step": 12668 }, { "epoch": 0.40875076374591407, "grad_norm": 0.341796875, "learning_rate": 2.0168568345505692e-05, "loss": 1.986, "step": 12669 }, { "epoch": 0.4087830275997104, "grad_norm": 0.34765625, "learning_rate": 2.016709095828635e-05, "loss": 1.9693, "step": 12670 }, { "epoch": 0.40881529145350676, "grad_norm": 0.35546875, "learning_rate": 2.016561351419137e-05, "loss": 1.9832, "step": 12671 }, { "epoch": 0.4088475553073031, "grad_norm": 0.359375, "learning_rate": 2.0164136013237025e-05, "loss": 1.9975, "step": 12672 }, { "epoch": 0.40887981916109944, "grad_norm": 0.353515625, "learning_rate": 2.016265845543958e-05, "loss": 1.9915, "step": 12673 }, { "epoch": 0.40891208301489584, "grad_norm": 0.33984375, "learning_rate": 2.016118084081529e-05, "loss": 1.9703, "step": 12674 }, { "epoch": 0.4089443468686922, "grad_norm": 0.37109375, "learning_rate": 2.0159703169380424e-05, "loss": 2.0091, "step": 12675 }, { "epoch": 0.40897661072248853, "grad_norm": 0.3515625, "learning_rate": 2.0158225441151254e-05, "loss": 1.9819, "step": 12676 }, { "epoch": 0.4090088745762849, "grad_norm": 0.353515625, "learning_rate": 2.015674765614403e-05, "loss": 1.9698, "step": 12677 }, { "epoch": 0.4090411384300812, "grad_norm": 0.3671875, "learning_rate": 2.0155269814375028e-05, "loss": 1.991, "step": 12678 }, { "epoch": 0.40907340228387756, "grad_norm": 0.39453125, "learning_rate": 2.015379191586052e-05, "loss": 2.0117, "step": 12679 }, { "epoch": 0.4091056661376739, "grad_norm": 0.40234375, "learning_rate": 2.0152313960616764e-05, "loss": 2.0767, "step": 12680 }, { "epoch": 0.40913792999147025, "grad_norm": 0.375, "learning_rate": 2.015083594866003e-05, "loss": 2.0595, "step": 12681 }, { "epoch": 0.4091701938452666, "grad_norm": 0.38671875, "learning_rate": 2.0149357880006594e-05, "loss": 2.0693, "step": 12682 }, { "epoch": 0.40920245769906294, "grad_norm": 0.390625, "learning_rate": 2.014787975467272e-05, "loss": 2.0758, "step": 12683 }, { "epoch": 0.4092347215528593, "grad_norm": 0.3984375, "learning_rate": 2.0146401572674676e-05, "loss": 2.0788, "step": 12684 }, { "epoch": 0.4092669854066556, "grad_norm": 0.40234375, "learning_rate": 2.0144923334028734e-05, "loss": 2.0765, "step": 12685 }, { "epoch": 0.40929924926045197, "grad_norm": 0.390625, "learning_rate": 2.0143445038751172e-05, "loss": 2.0474, "step": 12686 }, { "epoch": 0.4093315131142483, "grad_norm": 0.384765625, "learning_rate": 2.0141966686858256e-05, "loss": 2.076, "step": 12687 }, { "epoch": 0.40936377696804466, "grad_norm": 0.396484375, "learning_rate": 2.0140488278366257e-05, "loss": 2.078, "step": 12688 }, { "epoch": 0.409396040821841, "grad_norm": 0.376953125, "learning_rate": 2.013900981329145e-05, "loss": 2.0761, "step": 12689 }, { "epoch": 0.40942830467563734, "grad_norm": 0.400390625, "learning_rate": 2.0137531291650116e-05, "loss": 2.0557, "step": 12690 }, { "epoch": 0.4094605685294337, "grad_norm": 0.396484375, "learning_rate": 2.0136052713458515e-05, "loss": 2.098, "step": 12691 }, { "epoch": 0.40949283238323003, "grad_norm": 0.3828125, "learning_rate": 2.0134574078732933e-05, "loss": 2.0585, "step": 12692 }, { "epoch": 0.4095250962370264, "grad_norm": 0.37109375, "learning_rate": 2.0133095387489647e-05, "loss": 2.0717, "step": 12693 }, { "epoch": 0.4095573600908228, "grad_norm": 0.37890625, "learning_rate": 2.0131616639744926e-05, "loss": 2.0706, "step": 12694 }, { "epoch": 0.4095896239446191, "grad_norm": 0.37890625, "learning_rate": 2.013013783551505e-05, "loss": 2.0859, "step": 12695 }, { "epoch": 0.40962188779841546, "grad_norm": 0.365234375, "learning_rate": 2.0128658974816298e-05, "loss": 2.0631, "step": 12696 }, { "epoch": 0.4096541516522118, "grad_norm": 0.4140625, "learning_rate": 2.0127180057664947e-05, "loss": 2.0775, "step": 12697 }, { "epoch": 0.40968641550600815, "grad_norm": 0.404296875, "learning_rate": 2.012570108407728e-05, "loss": 2.0872, "step": 12698 }, { "epoch": 0.4097186793598045, "grad_norm": 0.3671875, "learning_rate": 2.012422205406957e-05, "loss": 2.0622, "step": 12699 }, { "epoch": 0.40975094321360084, "grad_norm": 0.380859375, "learning_rate": 2.0122742967658094e-05, "loss": 2.105, "step": 12700 }, { "epoch": 0.4097832070673972, "grad_norm": 0.39453125, "learning_rate": 2.0121263824859148e-05, "loss": 2.0961, "step": 12701 }, { "epoch": 0.4098154709211935, "grad_norm": 0.419921875, "learning_rate": 2.0119784625688994e-05, "loss": 2.0842, "step": 12702 }, { "epoch": 0.40984773477498987, "grad_norm": 0.380859375, "learning_rate": 2.0118305370163932e-05, "loss": 2.0943, "step": 12703 }, { "epoch": 0.4098799986287862, "grad_norm": 0.4375, "learning_rate": 2.0116826058300232e-05, "loss": 2.0734, "step": 12704 }, { "epoch": 0.40991226248258256, "grad_norm": 0.380859375, "learning_rate": 2.0115346690114184e-05, "loss": 2.1005, "step": 12705 }, { "epoch": 0.4099445263363789, "grad_norm": 0.412109375, "learning_rate": 2.011386726562207e-05, "loss": 2.0704, "step": 12706 }, { "epoch": 0.40997679019017524, "grad_norm": 0.3671875, "learning_rate": 2.0112387784840168e-05, "loss": 2.0632, "step": 12707 }, { "epoch": 0.4100090540439716, "grad_norm": 0.419921875, "learning_rate": 2.011090824778477e-05, "loss": 2.156, "step": 12708 }, { "epoch": 0.41004131789776793, "grad_norm": 0.404296875, "learning_rate": 2.0109428654472164e-05, "loss": 2.1514, "step": 12709 }, { "epoch": 0.4100735817515643, "grad_norm": 0.404296875, "learning_rate": 2.0107949004918637e-05, "loss": 2.1398, "step": 12710 }, { "epoch": 0.4101058456053606, "grad_norm": 0.40625, "learning_rate": 2.0106469299140466e-05, "loss": 2.1247, "step": 12711 }, { "epoch": 0.41013810945915696, "grad_norm": 0.412109375, "learning_rate": 2.010498953715394e-05, "loss": 2.1672, "step": 12712 }, { "epoch": 0.4101703733129533, "grad_norm": 0.392578125, "learning_rate": 2.010350971897536e-05, "loss": 2.1589, "step": 12713 }, { "epoch": 0.4102026371667497, "grad_norm": 0.384765625, "learning_rate": 2.0102029844621005e-05, "loss": 2.1103, "step": 12714 }, { "epoch": 0.41023490102054605, "grad_norm": 0.388671875, "learning_rate": 2.0100549914107165e-05, "loss": 2.1667, "step": 12715 }, { "epoch": 0.4102671648743424, "grad_norm": 0.37890625, "learning_rate": 2.009906992745013e-05, "loss": 2.148, "step": 12716 }, { "epoch": 0.41029942872813874, "grad_norm": 0.37890625, "learning_rate": 2.009758988466619e-05, "loss": 2.1663, "step": 12717 }, { "epoch": 0.4103316925819351, "grad_norm": 0.373046875, "learning_rate": 2.009610978577164e-05, "loss": 2.117, "step": 12718 }, { "epoch": 0.4103639564357314, "grad_norm": 0.37890625, "learning_rate": 2.0094629630782764e-05, "loss": 2.156, "step": 12719 }, { "epoch": 0.41039622028952777, "grad_norm": 0.365234375, "learning_rate": 2.0093149419715864e-05, "loss": 2.1822, "step": 12720 }, { "epoch": 0.4104284841433241, "grad_norm": 0.40234375, "learning_rate": 2.009166915258723e-05, "loss": 2.1655, "step": 12721 }, { "epoch": 0.41046074799712046, "grad_norm": 0.390625, "learning_rate": 2.009018882941315e-05, "loss": 2.1478, "step": 12722 }, { "epoch": 0.4104930118509168, "grad_norm": 0.375, "learning_rate": 2.0088708450209926e-05, "loss": 2.1497, "step": 12723 }, { "epoch": 0.41052527570471314, "grad_norm": 0.392578125, "learning_rate": 2.008722801499385e-05, "loss": 2.1499, "step": 12724 }, { "epoch": 0.4105575395585095, "grad_norm": 0.373046875, "learning_rate": 2.0085747523781222e-05, "loss": 2.1553, "step": 12725 }, { "epoch": 0.41058980341230583, "grad_norm": 0.38671875, "learning_rate": 2.0084266976588328e-05, "loss": 2.1778, "step": 12726 }, { "epoch": 0.4106220672661022, "grad_norm": 0.384765625, "learning_rate": 2.008278637343147e-05, "loss": 2.1278, "step": 12727 }, { "epoch": 0.4106543311198985, "grad_norm": 0.373046875, "learning_rate": 2.0081305714326948e-05, "loss": 2.1513, "step": 12728 }, { "epoch": 0.41068659497369486, "grad_norm": 0.39453125, "learning_rate": 2.0079824999291063e-05, "loss": 2.1451, "step": 12729 }, { "epoch": 0.4107188588274912, "grad_norm": 0.40234375, "learning_rate": 2.00783442283401e-05, "loss": 2.146, "step": 12730 }, { "epoch": 0.41075112268128755, "grad_norm": 0.37109375, "learning_rate": 2.007686340149037e-05, "loss": 2.1677, "step": 12731 }, { "epoch": 0.4107833865350839, "grad_norm": 0.384765625, "learning_rate": 2.0075382518758175e-05, "loss": 2.1365, "step": 12732 }, { "epoch": 0.41081565038888024, "grad_norm": 0.36328125, "learning_rate": 2.0073901580159805e-05, "loss": 2.1801, "step": 12733 }, { "epoch": 0.4108479142426766, "grad_norm": 0.404296875, "learning_rate": 2.0072420585711566e-05, "loss": 2.1568, "step": 12734 }, { "epoch": 0.410880178096473, "grad_norm": 0.373046875, "learning_rate": 2.007093953542976e-05, "loss": 2.1655, "step": 12735 }, { "epoch": 0.4109124419502693, "grad_norm": 0.390625, "learning_rate": 2.006945842933069e-05, "loss": 2.1706, "step": 12736 }, { "epoch": 0.41094470580406567, "grad_norm": 0.396484375, "learning_rate": 2.0067977267430662e-05, "loss": 2.1312, "step": 12737 }, { "epoch": 0.410976969657862, "grad_norm": 0.388671875, "learning_rate": 2.0066496049745975e-05, "loss": 2.1475, "step": 12738 }, { "epoch": 0.41100923351165836, "grad_norm": 0.3828125, "learning_rate": 2.006501477629293e-05, "loss": 2.1507, "step": 12739 }, { "epoch": 0.4110414973654547, "grad_norm": 0.373046875, "learning_rate": 2.0063533447087837e-05, "loss": 2.1606, "step": 12740 }, { "epoch": 0.41107376121925104, "grad_norm": 0.38671875, "learning_rate": 2.0062052062147e-05, "loss": 2.1461, "step": 12741 }, { "epoch": 0.4111060250730474, "grad_norm": 0.40625, "learning_rate": 2.0060570621486732e-05, "loss": 2.1599, "step": 12742 }, { "epoch": 0.41113828892684373, "grad_norm": 0.37109375, "learning_rate": 2.0059089125123323e-05, "loss": 2.1607, "step": 12743 }, { "epoch": 0.4111705527806401, "grad_norm": 0.3828125, "learning_rate": 2.00576075730731e-05, "loss": 2.1523, "step": 12744 }, { "epoch": 0.4112028166344364, "grad_norm": 0.357421875, "learning_rate": 2.0056125965352355e-05, "loss": 2.1609, "step": 12745 }, { "epoch": 0.41123508048823276, "grad_norm": 0.39453125, "learning_rate": 2.0054644301977406e-05, "loss": 2.1618, "step": 12746 }, { "epoch": 0.4112673443420291, "grad_norm": 0.373046875, "learning_rate": 2.0053162582964553e-05, "loss": 2.1507, "step": 12747 }, { "epoch": 0.41129960819582545, "grad_norm": 0.3671875, "learning_rate": 2.005168080833012e-05, "loss": 2.1672, "step": 12748 }, { "epoch": 0.4113318720496218, "grad_norm": 0.376953125, "learning_rate": 2.0050198978090402e-05, "loss": 2.1591, "step": 12749 }, { "epoch": 0.41136413590341814, "grad_norm": 0.37109375, "learning_rate": 2.004871709226172e-05, "loss": 2.1419, "step": 12750 }, { "epoch": 0.4113963997572145, "grad_norm": 0.375, "learning_rate": 2.004723515086038e-05, "loss": 2.1659, "step": 12751 }, { "epoch": 0.4114286636110108, "grad_norm": 0.380859375, "learning_rate": 2.0045753153902697e-05, "loss": 2.1063, "step": 12752 }, { "epoch": 0.41146092746480717, "grad_norm": 0.40625, "learning_rate": 2.0044271101404983e-05, "loss": 2.1004, "step": 12753 }, { "epoch": 0.4114931913186035, "grad_norm": 0.390625, "learning_rate": 2.0042788993383552e-05, "loss": 2.1594, "step": 12754 }, { "epoch": 0.4115254551723999, "grad_norm": 0.380859375, "learning_rate": 2.004130682985472e-05, "loss": 2.198, "step": 12755 }, { "epoch": 0.41155771902619626, "grad_norm": 0.3984375, "learning_rate": 2.0039824610834794e-05, "loss": 2.116, "step": 12756 }, { "epoch": 0.4115899828799926, "grad_norm": 0.361328125, "learning_rate": 2.0038342336340093e-05, "loss": 2.0547, "step": 12757 }, { "epoch": 0.41162224673378894, "grad_norm": 0.373046875, "learning_rate": 2.0036860006386942e-05, "loss": 2.0897, "step": 12758 }, { "epoch": 0.4116545105875853, "grad_norm": 0.357421875, "learning_rate": 2.0035377620991643e-05, "loss": 2.0961, "step": 12759 }, { "epoch": 0.41168677444138163, "grad_norm": 0.380859375, "learning_rate": 2.0033895180170523e-05, "loss": 2.0434, "step": 12760 }, { "epoch": 0.411719038295178, "grad_norm": 0.359375, "learning_rate": 2.0032412683939895e-05, "loss": 2.0515, "step": 12761 }, { "epoch": 0.4117513021489743, "grad_norm": 0.384765625, "learning_rate": 2.0030930132316078e-05, "loss": 2.0673, "step": 12762 }, { "epoch": 0.41178356600277066, "grad_norm": 0.390625, "learning_rate": 2.002944752531539e-05, "loss": 2.088, "step": 12763 }, { "epoch": 0.411815829856567, "grad_norm": 0.380859375, "learning_rate": 2.0027964862954155e-05, "loss": 2.0725, "step": 12764 }, { "epoch": 0.41184809371036335, "grad_norm": 0.380859375, "learning_rate": 2.0026482145248684e-05, "loss": 2.0829, "step": 12765 }, { "epoch": 0.4118803575641597, "grad_norm": 0.376953125, "learning_rate": 2.0024999372215313e-05, "loss": 2.0791, "step": 12766 }, { "epoch": 0.41191262141795604, "grad_norm": 0.373046875, "learning_rate": 2.0023516543870348e-05, "loss": 2.0862, "step": 12767 }, { "epoch": 0.4119448852717524, "grad_norm": 0.36328125, "learning_rate": 2.0022033660230113e-05, "loss": 2.0719, "step": 12768 }, { "epoch": 0.4119771491255487, "grad_norm": 0.3828125, "learning_rate": 2.0020550721310943e-05, "loss": 2.075, "step": 12769 }, { "epoch": 0.41200941297934507, "grad_norm": 0.359375, "learning_rate": 2.0019067727129144e-05, "loss": 2.0466, "step": 12770 }, { "epoch": 0.4120416768331414, "grad_norm": 0.376953125, "learning_rate": 2.001758467770105e-05, "loss": 2.0653, "step": 12771 }, { "epoch": 0.41207394068693776, "grad_norm": 0.412109375, "learning_rate": 2.0016101573042992e-05, "loss": 2.0664, "step": 12772 }, { "epoch": 0.4121062045407341, "grad_norm": 0.345703125, "learning_rate": 2.0014618413171276e-05, "loss": 2.0565, "step": 12773 }, { "epoch": 0.41213846839453044, "grad_norm": 0.421875, "learning_rate": 2.0013135198102245e-05, "loss": 2.0825, "step": 12774 }, { "epoch": 0.41217073224832684, "grad_norm": 0.38671875, "learning_rate": 2.0011651927852213e-05, "loss": 2.1024, "step": 12775 }, { "epoch": 0.4122029961021232, "grad_norm": 0.365234375, "learning_rate": 2.0010168602437513e-05, "loss": 2.0669, "step": 12776 }, { "epoch": 0.41223525995591953, "grad_norm": 0.375, "learning_rate": 2.0008685221874473e-05, "loss": 2.0568, "step": 12777 }, { "epoch": 0.4122675238097159, "grad_norm": 0.36328125, "learning_rate": 2.0007201786179417e-05, "loss": 2.0698, "step": 12778 }, { "epoch": 0.4122997876635122, "grad_norm": 0.37109375, "learning_rate": 2.0005718295368676e-05, "loss": 2.1009, "step": 12779 }, { "epoch": 0.41233205151730856, "grad_norm": 0.3515625, "learning_rate": 2.0004234749458584e-05, "loss": 2.0544, "step": 12780 }, { "epoch": 0.4123643153711049, "grad_norm": 0.4140625, "learning_rate": 2.000275114846546e-05, "loss": 2.0844, "step": 12781 }, { "epoch": 0.41239657922490125, "grad_norm": 0.37109375, "learning_rate": 2.000126749240564e-05, "loss": 2.0755, "step": 12782 }, { "epoch": 0.4124288430786976, "grad_norm": 0.400390625, "learning_rate": 1.9999783781295464e-05, "loss": 2.0889, "step": 12783 }, { "epoch": 0.41246110693249394, "grad_norm": 0.3828125, "learning_rate": 1.999830001515125e-05, "loss": 2.0732, "step": 12784 }, { "epoch": 0.4124933707862903, "grad_norm": 0.373046875, "learning_rate": 1.999681619398933e-05, "loss": 1.989, "step": 12785 }, { "epoch": 0.4125256346400866, "grad_norm": 0.423828125, "learning_rate": 1.9995332317826046e-05, "loss": 2.0018, "step": 12786 }, { "epoch": 0.41255789849388297, "grad_norm": 0.37109375, "learning_rate": 1.9993848386677726e-05, "loss": 1.9976, "step": 12787 }, { "epoch": 0.4125901623476793, "grad_norm": 0.38671875, "learning_rate": 1.999236440056071e-05, "loss": 1.9913, "step": 12788 }, { "epoch": 0.41262242620147566, "grad_norm": 0.365234375, "learning_rate": 1.9990880359491324e-05, "loss": 1.9621, "step": 12789 }, { "epoch": 0.412654690055272, "grad_norm": 0.375, "learning_rate": 1.998939626348591e-05, "loss": 1.9909, "step": 12790 }, { "epoch": 0.41268695390906834, "grad_norm": 0.3671875, "learning_rate": 1.99879121125608e-05, "loss": 1.9742, "step": 12791 }, { "epoch": 0.4127192177628647, "grad_norm": 0.388671875, "learning_rate": 1.998642790673233e-05, "loss": 1.9713, "step": 12792 }, { "epoch": 0.41275148161666103, "grad_norm": 0.38671875, "learning_rate": 1.998494364601684e-05, "loss": 1.9999, "step": 12793 }, { "epoch": 0.4127837454704574, "grad_norm": 0.36328125, "learning_rate": 1.9983459330430674e-05, "loss": 1.9719, "step": 12794 }, { "epoch": 0.4128160093242538, "grad_norm": 0.365234375, "learning_rate": 1.9981974959990152e-05, "loss": 2.0126, "step": 12795 }, { "epoch": 0.4128482731780501, "grad_norm": 0.359375, "learning_rate": 1.9980490534711627e-05, "loss": 1.9796, "step": 12796 }, { "epoch": 0.41288053703184646, "grad_norm": 0.35546875, "learning_rate": 1.9979006054611436e-05, "loss": 1.9942, "step": 12797 }, { "epoch": 0.4129128008856428, "grad_norm": 0.388671875, "learning_rate": 1.997752151970592e-05, "loss": 1.955, "step": 12798 }, { "epoch": 0.41294506473943915, "grad_norm": 0.341796875, "learning_rate": 1.9976036930011414e-05, "loss": 2.0026, "step": 12799 }, { "epoch": 0.4129773285932355, "grad_norm": 0.365234375, "learning_rate": 1.997455228554427e-05, "loss": 1.9899, "step": 12800 }, { "epoch": 0.41300959244703184, "grad_norm": 0.361328125, "learning_rate": 1.9973067586320817e-05, "loss": 1.9738, "step": 12801 }, { "epoch": 0.4130418563008282, "grad_norm": 0.34765625, "learning_rate": 1.99715828323574e-05, "loss": 1.9558, "step": 12802 }, { "epoch": 0.4130741201546245, "grad_norm": 0.365234375, "learning_rate": 1.9970098023670378e-05, "loss": 1.999, "step": 12803 }, { "epoch": 0.41310638400842087, "grad_norm": 0.34375, "learning_rate": 1.9968613160276072e-05, "loss": 1.9759, "step": 12804 }, { "epoch": 0.4131386478622172, "grad_norm": 0.361328125, "learning_rate": 1.9967128242190844e-05, "loss": 2.0038, "step": 12805 }, { "epoch": 0.41317091171601356, "grad_norm": 0.345703125, "learning_rate": 1.9965643269431025e-05, "loss": 1.9822, "step": 12806 }, { "epoch": 0.4132031755698099, "grad_norm": 0.384765625, "learning_rate": 1.996415824201297e-05, "loss": 1.9878, "step": 12807 }, { "epoch": 0.41323543942360624, "grad_norm": 0.345703125, "learning_rate": 1.9962673159953025e-05, "loss": 1.9812, "step": 12808 }, { "epoch": 0.4132677032774026, "grad_norm": 0.34375, "learning_rate": 1.996118802326753e-05, "loss": 1.9671, "step": 12809 }, { "epoch": 0.41329996713119893, "grad_norm": 0.37109375, "learning_rate": 1.9959702831972837e-05, "loss": 2.0017, "step": 12810 }, { "epoch": 0.4133322309849953, "grad_norm": 0.3671875, "learning_rate": 1.9958217586085296e-05, "loss": 2.0032, "step": 12811 }, { "epoch": 0.4133644948387916, "grad_norm": 0.353515625, "learning_rate": 1.9956732285621247e-05, "loss": 1.9765, "step": 12812 }, { "epoch": 0.41339675869258796, "grad_norm": 0.427734375, "learning_rate": 1.995524693059705e-05, "loss": 1.9708, "step": 12813 }, { "epoch": 0.4134290225463843, "grad_norm": 0.388671875, "learning_rate": 1.9953761521029053e-05, "loss": 2.0471, "step": 12814 }, { "epoch": 0.4134612864001807, "grad_norm": 0.375, "learning_rate": 1.99522760569336e-05, "loss": 2.0787, "step": 12815 }, { "epoch": 0.41349355025397705, "grad_norm": 0.39453125, "learning_rate": 1.995079053832704e-05, "loss": 2.0687, "step": 12816 }, { "epoch": 0.4135258141077734, "grad_norm": 0.37109375, "learning_rate": 1.9949304965225736e-05, "loss": 2.0632, "step": 12817 }, { "epoch": 0.41355807796156974, "grad_norm": 0.41015625, "learning_rate": 1.994781933764603e-05, "loss": 2.0706, "step": 12818 }, { "epoch": 0.4135903418153661, "grad_norm": 0.3828125, "learning_rate": 1.9946333655604276e-05, "loss": 2.0726, "step": 12819 }, { "epoch": 0.4136226056691624, "grad_norm": 0.38671875, "learning_rate": 1.9944847919116835e-05, "loss": 2.0885, "step": 12820 }, { "epoch": 0.41365486952295877, "grad_norm": 0.359375, "learning_rate": 1.994336212820005e-05, "loss": 2.0901, "step": 12821 }, { "epoch": 0.4136871333767551, "grad_norm": 0.373046875, "learning_rate": 1.994187628287029e-05, "loss": 2.0648, "step": 12822 }, { "epoch": 0.41371939723055146, "grad_norm": 0.369140625, "learning_rate": 1.9940390383143894e-05, "loss": 2.0859, "step": 12823 }, { "epoch": 0.4137516610843478, "grad_norm": 0.3828125, "learning_rate": 1.9938904429037225e-05, "loss": 2.0501, "step": 12824 }, { "epoch": 0.41378392493814414, "grad_norm": 0.3671875, "learning_rate": 1.993741842056664e-05, "loss": 2.0441, "step": 12825 }, { "epoch": 0.4138161887919405, "grad_norm": 0.376953125, "learning_rate": 1.9935932357748496e-05, "loss": 2.0786, "step": 12826 }, { "epoch": 0.41384845264573683, "grad_norm": 0.36328125, "learning_rate": 1.993444624059915e-05, "loss": 2.0383, "step": 12827 }, { "epoch": 0.4138807164995332, "grad_norm": 0.390625, "learning_rate": 1.993296006913496e-05, "loss": 2.0874, "step": 12828 }, { "epoch": 0.4139129803533295, "grad_norm": 0.451171875, "learning_rate": 1.9931473843372284e-05, "loss": 2.0766, "step": 12829 }, { "epoch": 0.41394524420712586, "grad_norm": 0.423828125, "learning_rate": 1.992998756332748e-05, "loss": 2.0854, "step": 12830 }, { "epoch": 0.4139775080609222, "grad_norm": 0.40625, "learning_rate": 1.992850122901691e-05, "loss": 2.0591, "step": 12831 }, { "epoch": 0.41400977191471855, "grad_norm": 0.36328125, "learning_rate": 1.9927014840456938e-05, "loss": 2.0919, "step": 12832 }, { "epoch": 0.4140420357685149, "grad_norm": 0.408203125, "learning_rate": 1.9925528397663914e-05, "loss": 2.1047, "step": 12833 }, { "epoch": 0.41407429962231124, "grad_norm": 0.375, "learning_rate": 1.9924041900654212e-05, "loss": 2.0849, "step": 12834 }, { "epoch": 0.4141065634761076, "grad_norm": 0.380859375, "learning_rate": 1.992255534944419e-05, "loss": 2.0488, "step": 12835 }, { "epoch": 0.414138827329904, "grad_norm": 0.3828125, "learning_rate": 1.9921068744050208e-05, "loss": 2.094, "step": 12836 }, { "epoch": 0.4141710911837003, "grad_norm": 0.388671875, "learning_rate": 1.9919582084488632e-05, "loss": 2.0717, "step": 12837 }, { "epoch": 0.41420335503749667, "grad_norm": 0.38671875, "learning_rate": 1.9918095370775826e-05, "loss": 2.0641, "step": 12838 }, { "epoch": 0.414235618891293, "grad_norm": 0.384765625, "learning_rate": 1.9916608602928152e-05, "loss": 2.0958, "step": 12839 }, { "epoch": 0.41426788274508936, "grad_norm": 0.375, "learning_rate": 1.991512178096198e-05, "loss": 2.0037, "step": 12840 }, { "epoch": 0.4143001465988857, "grad_norm": 0.37109375, "learning_rate": 1.991363490489367e-05, "loss": 2.0907, "step": 12841 }, { "epoch": 0.41433241045268204, "grad_norm": 0.369140625, "learning_rate": 1.99121479747396e-05, "loss": 2.1062, "step": 12842 }, { "epoch": 0.4143646743064784, "grad_norm": 0.404296875, "learning_rate": 1.9910660990516123e-05, "loss": 2.0644, "step": 12843 }, { "epoch": 0.41439693816027473, "grad_norm": 0.388671875, "learning_rate": 1.9909173952239615e-05, "loss": 2.086, "step": 12844 }, { "epoch": 0.4144292020140711, "grad_norm": 0.400390625, "learning_rate": 1.9907686859926443e-05, "loss": 2.0598, "step": 12845 }, { "epoch": 0.4144614658678674, "grad_norm": 0.416015625, "learning_rate": 1.9906199713592976e-05, "loss": 2.0904, "step": 12846 }, { "epoch": 0.41449372972166376, "grad_norm": 0.373046875, "learning_rate": 1.9904712513255575e-05, "loss": 2.0243, "step": 12847 }, { "epoch": 0.4145259935754601, "grad_norm": 0.400390625, "learning_rate": 1.990322525893062e-05, "loss": 2.0688, "step": 12848 }, { "epoch": 0.41455825742925645, "grad_norm": 0.41015625, "learning_rate": 1.9901737950634486e-05, "loss": 2.0435, "step": 12849 }, { "epoch": 0.4145905212830528, "grad_norm": 0.36328125, "learning_rate": 1.9900250588383532e-05, "loss": 2.0795, "step": 12850 }, { "epoch": 0.41462278513684914, "grad_norm": 0.404296875, "learning_rate": 1.989876317219413e-05, "loss": 2.0614, "step": 12851 }, { "epoch": 0.4146550489906455, "grad_norm": 0.37109375, "learning_rate": 1.989727570208266e-05, "loss": 2.0939, "step": 12852 }, { "epoch": 0.4146873128444418, "grad_norm": 0.39453125, "learning_rate": 1.98957881780655e-05, "loss": 2.0793, "step": 12853 }, { "epoch": 0.41471957669823817, "grad_norm": 0.353515625, "learning_rate": 1.9894300600159005e-05, "loss": 2.0842, "step": 12854 }, { "epoch": 0.4147518405520345, "grad_norm": 0.375, "learning_rate": 1.9892812968379568e-05, "loss": 2.0738, "step": 12855 }, { "epoch": 0.4147841044058309, "grad_norm": 0.361328125, "learning_rate": 1.9891325282743554e-05, "loss": 2.042, "step": 12856 }, { "epoch": 0.41481636825962726, "grad_norm": 0.37890625, "learning_rate": 1.9889837543267337e-05, "loss": 2.0923, "step": 12857 }, { "epoch": 0.4148486321134236, "grad_norm": 0.376953125, "learning_rate": 1.98883497499673e-05, "loss": 2.0735, "step": 12858 }, { "epoch": 0.41488089596721994, "grad_norm": 0.40234375, "learning_rate": 1.9886861902859816e-05, "loss": 2.0626, "step": 12859 }, { "epoch": 0.4149131598210163, "grad_norm": 0.3828125, "learning_rate": 1.9885374001961264e-05, "loss": 2.0842, "step": 12860 }, { "epoch": 0.41494542367481263, "grad_norm": 0.3828125, "learning_rate": 1.988388604728801e-05, "loss": 2.0546, "step": 12861 }, { "epoch": 0.414977687528609, "grad_norm": 0.375, "learning_rate": 1.988239803885645e-05, "loss": 2.1046, "step": 12862 }, { "epoch": 0.4150099513824053, "grad_norm": 0.357421875, "learning_rate": 1.9880909976682954e-05, "loss": 2.025, "step": 12863 }, { "epoch": 0.41504221523620166, "grad_norm": 0.390625, "learning_rate": 1.9879421860783903e-05, "loss": 2.0738, "step": 12864 }, { "epoch": 0.415074479089998, "grad_norm": 0.37109375, "learning_rate": 1.9877933691175675e-05, "loss": 2.0384, "step": 12865 }, { "epoch": 0.41510674294379435, "grad_norm": 0.373046875, "learning_rate": 1.9876445467874655e-05, "loss": 2.079, "step": 12866 }, { "epoch": 0.4151390067975907, "grad_norm": 0.3515625, "learning_rate": 1.987495719089722e-05, "loss": 2.0497, "step": 12867 }, { "epoch": 0.41517127065138704, "grad_norm": 0.365234375, "learning_rate": 1.987346886025975e-05, "loss": 2.0682, "step": 12868 }, { "epoch": 0.4152035345051834, "grad_norm": 0.353515625, "learning_rate": 1.9871980475978632e-05, "loss": 2.0773, "step": 12869 }, { "epoch": 0.4152357983589797, "grad_norm": 0.36328125, "learning_rate": 1.9870492038070255e-05, "loss": 2.0633, "step": 12870 }, { "epoch": 0.41526806221277607, "grad_norm": 0.353515625, "learning_rate": 1.986900354655099e-05, "loss": 2.0691, "step": 12871 }, { "epoch": 0.4153003260665724, "grad_norm": 0.365234375, "learning_rate": 1.9867515001437225e-05, "loss": 2.078, "step": 12872 }, { "epoch": 0.41533258992036876, "grad_norm": 0.37109375, "learning_rate": 1.9866026402745356e-05, "loss": 2.053, "step": 12873 }, { "epoch": 0.4153648537741651, "grad_norm": 0.37890625, "learning_rate": 1.9864537750491748e-05, "loss": 2.0854, "step": 12874 }, { "epoch": 0.41539711762796144, "grad_norm": 0.3984375, "learning_rate": 1.9863049044692802e-05, "loss": 2.0281, "step": 12875 }, { "epoch": 0.41542938148175784, "grad_norm": 0.40234375, "learning_rate": 1.9861560285364906e-05, "loss": 2.0907, "step": 12876 }, { "epoch": 0.4154616453355542, "grad_norm": 0.359375, "learning_rate": 1.986007147252444e-05, "loss": 2.0644, "step": 12877 }, { "epoch": 0.41549390918935053, "grad_norm": 0.41015625, "learning_rate": 1.9858582606187786e-05, "loss": 2.0758, "step": 12878 }, { "epoch": 0.4155261730431469, "grad_norm": 0.390625, "learning_rate": 1.985709368637135e-05, "loss": 2.0849, "step": 12879 }, { "epoch": 0.4155584368969432, "grad_norm": 0.365234375, "learning_rate": 1.9855604713091506e-05, "loss": 2.0756, "step": 12880 }, { "epoch": 0.41559070075073956, "grad_norm": 0.384765625, "learning_rate": 1.985411568636465e-05, "loss": 2.0791, "step": 12881 }, { "epoch": 0.4156229646045359, "grad_norm": 0.3671875, "learning_rate": 1.985262660620717e-05, "loss": 2.101, "step": 12882 }, { "epoch": 0.41565522845833225, "grad_norm": 0.3671875, "learning_rate": 1.9851137472635462e-05, "loss": 2.0494, "step": 12883 }, { "epoch": 0.4156874923121286, "grad_norm": 0.37109375, "learning_rate": 1.984964828566591e-05, "loss": 2.0699, "step": 12884 }, { "epoch": 0.41571975616592494, "grad_norm": 0.373046875, "learning_rate": 1.9848159045314903e-05, "loss": 2.0681, "step": 12885 }, { "epoch": 0.4157520200197213, "grad_norm": 0.380859375, "learning_rate": 1.9846669751598842e-05, "loss": 2.0577, "step": 12886 }, { "epoch": 0.4157842838735176, "grad_norm": 0.361328125, "learning_rate": 1.9845180404534126e-05, "loss": 2.0926, "step": 12887 }, { "epoch": 0.41581654772731397, "grad_norm": 0.36328125, "learning_rate": 1.984369100413713e-05, "loss": 2.0693, "step": 12888 }, { "epoch": 0.4158488115811103, "grad_norm": 0.3828125, "learning_rate": 1.9842201550424263e-05, "loss": 2.0958, "step": 12889 }, { "epoch": 0.41588107543490666, "grad_norm": 0.36328125, "learning_rate": 1.9840712043411917e-05, "loss": 2.0782, "step": 12890 }, { "epoch": 0.415913339288703, "grad_norm": 0.39453125, "learning_rate": 1.9839222483116485e-05, "loss": 2.0229, "step": 12891 }, { "epoch": 0.41594560314249934, "grad_norm": 0.34375, "learning_rate": 1.9837732869554356e-05, "loss": 1.9924, "step": 12892 }, { "epoch": 0.4159778669962957, "grad_norm": 0.384765625, "learning_rate": 1.983624320274194e-05, "loss": 1.992, "step": 12893 }, { "epoch": 0.41601013085009203, "grad_norm": 0.3671875, "learning_rate": 1.983475348269563e-05, "loss": 2.0003, "step": 12894 }, { "epoch": 0.4160423947038884, "grad_norm": 0.376953125, "learning_rate": 1.9833263709431816e-05, "loss": 1.9803, "step": 12895 }, { "epoch": 0.4160746585576848, "grad_norm": 0.35546875, "learning_rate": 1.983177388296691e-05, "loss": 1.9721, "step": 12896 }, { "epoch": 0.4161069224114811, "grad_norm": 0.35546875, "learning_rate": 1.98302840033173e-05, "loss": 2.0124, "step": 12897 }, { "epoch": 0.41613918626527746, "grad_norm": 0.35546875, "learning_rate": 1.9828794070499388e-05, "loss": 2.0099, "step": 12898 }, { "epoch": 0.4161714501190738, "grad_norm": 0.365234375, "learning_rate": 1.9827304084529575e-05, "loss": 1.9927, "step": 12899 }, { "epoch": 0.41620371397287015, "grad_norm": 0.34375, "learning_rate": 1.9825814045424264e-05, "loss": 1.9956, "step": 12900 }, { "epoch": 0.4162359778266665, "grad_norm": 0.36328125, "learning_rate": 1.9824323953199852e-05, "loss": 2.0058, "step": 12901 }, { "epoch": 0.41626824168046284, "grad_norm": 0.373046875, "learning_rate": 1.9822833807872743e-05, "loss": 1.9798, "step": 12902 }, { "epoch": 0.4163005055342592, "grad_norm": 0.34375, "learning_rate": 1.982134360945934e-05, "loss": 1.9784, "step": 12903 }, { "epoch": 0.4163327693880555, "grad_norm": 0.369140625, "learning_rate": 1.9819853357976047e-05, "loss": 2.0139, "step": 12904 }, { "epoch": 0.41636503324185187, "grad_norm": 0.34765625, "learning_rate": 1.981836305343926e-05, "loss": 1.991, "step": 12905 }, { "epoch": 0.4163972970956482, "grad_norm": 0.37109375, "learning_rate": 1.9816872695865393e-05, "loss": 2.0007, "step": 12906 }, { "epoch": 0.41642956094944455, "grad_norm": 0.3671875, "learning_rate": 1.9815382285270848e-05, "loss": 1.9699, "step": 12907 }, { "epoch": 0.4164618248032409, "grad_norm": 0.373046875, "learning_rate": 1.981389182167203e-05, "loss": 1.9721, "step": 12908 }, { "epoch": 0.41649408865703724, "grad_norm": 0.3671875, "learning_rate": 1.981240130508534e-05, "loss": 2.006, "step": 12909 }, { "epoch": 0.4165263525108336, "grad_norm": 0.3671875, "learning_rate": 1.9810910735527193e-05, "loss": 2.0024, "step": 12910 }, { "epoch": 0.41655861636462993, "grad_norm": 0.375, "learning_rate": 1.9809420113013994e-05, "loss": 1.9914, "step": 12911 }, { "epoch": 0.4165908802184263, "grad_norm": 0.380859375, "learning_rate": 1.9807929437562143e-05, "loss": 2.0043, "step": 12912 }, { "epoch": 0.4166231440722226, "grad_norm": 0.3828125, "learning_rate": 1.9806438709188054e-05, "loss": 2.0084, "step": 12913 }, { "epoch": 0.41665540792601896, "grad_norm": 0.3515625, "learning_rate": 1.980494792790814e-05, "loss": 1.9616, "step": 12914 }, { "epoch": 0.4166876717798153, "grad_norm": 0.38671875, "learning_rate": 1.9803457093738805e-05, "loss": 1.9873, "step": 12915 }, { "epoch": 0.41671993563361165, "grad_norm": 0.34375, "learning_rate": 1.9801966206696454e-05, "loss": 1.9525, "step": 12916 }, { "epoch": 0.41675219948740805, "grad_norm": 0.388671875, "learning_rate": 1.9800475266797512e-05, "loss": 1.9987, "step": 12917 }, { "epoch": 0.4167844633412044, "grad_norm": 0.373046875, "learning_rate": 1.9798984274058382e-05, "loss": 1.9597, "step": 12918 }, { "epoch": 0.41681672719500074, "grad_norm": 0.3671875, "learning_rate": 1.979749322849547e-05, "loss": 1.9843, "step": 12919 }, { "epoch": 0.4168489910487971, "grad_norm": 0.384765625, "learning_rate": 1.97960021301252e-05, "loss": 1.9968, "step": 12920 }, { "epoch": 0.4168812549025934, "grad_norm": 0.3671875, "learning_rate": 1.9794510978963976e-05, "loss": 1.9758, "step": 12921 }, { "epoch": 0.41691351875638977, "grad_norm": 0.375, "learning_rate": 1.9793019775028222e-05, "loss": 1.9525, "step": 12922 }, { "epoch": 0.4169457826101861, "grad_norm": 0.3828125, "learning_rate": 1.979152851833433e-05, "loss": 1.9757, "step": 12923 }, { "epoch": 0.41697804646398245, "grad_norm": 0.375, "learning_rate": 1.979003720889874e-05, "loss": 1.9981, "step": 12924 }, { "epoch": 0.4170103103177788, "grad_norm": 0.369140625, "learning_rate": 1.978854584673786e-05, "loss": 1.9965, "step": 12925 }, { "epoch": 0.41704257417157514, "grad_norm": 0.375, "learning_rate": 1.9787054431868093e-05, "loss": 1.9982, "step": 12926 }, { "epoch": 0.4170748380253715, "grad_norm": 0.3671875, "learning_rate": 1.978556296430587e-05, "loss": 2.0039, "step": 12927 }, { "epoch": 0.41710710187916783, "grad_norm": 0.375, "learning_rate": 1.97840714440676e-05, "loss": 1.9607, "step": 12928 }, { "epoch": 0.4171393657329642, "grad_norm": 0.369140625, "learning_rate": 1.978257987116971e-05, "loss": 2.0239, "step": 12929 }, { "epoch": 0.4171716295867605, "grad_norm": 0.357421875, "learning_rate": 1.9781088245628604e-05, "loss": 1.9465, "step": 12930 }, { "epoch": 0.41720389344055686, "grad_norm": 0.357421875, "learning_rate": 1.9779596567460716e-05, "loss": 2.0057, "step": 12931 }, { "epoch": 0.4172361572943532, "grad_norm": 0.345703125, "learning_rate": 1.9778104836682453e-05, "loss": 1.9901, "step": 12932 }, { "epoch": 0.41726842114814955, "grad_norm": 0.3515625, "learning_rate": 1.9776613053310236e-05, "loss": 1.9812, "step": 12933 }, { "epoch": 0.4173006850019459, "grad_norm": 0.35546875, "learning_rate": 1.9775121217360496e-05, "loss": 1.9806, "step": 12934 }, { "epoch": 0.41733294885574224, "grad_norm": 0.337890625, "learning_rate": 1.9773629328849648e-05, "loss": 1.9709, "step": 12935 }, { "epoch": 0.4173652127095386, "grad_norm": 0.35546875, "learning_rate": 1.9772137387794114e-05, "loss": 2.0104, "step": 12936 }, { "epoch": 0.417397476563335, "grad_norm": 0.349609375, "learning_rate": 1.9770645394210306e-05, "loss": 1.9799, "step": 12937 }, { "epoch": 0.4174297404171313, "grad_norm": 0.357421875, "learning_rate": 1.9769153348114663e-05, "loss": 1.9859, "step": 12938 }, { "epoch": 0.41746200427092767, "grad_norm": 0.34765625, "learning_rate": 1.97676612495236e-05, "loss": 1.9821, "step": 12939 }, { "epoch": 0.417494268124724, "grad_norm": 0.3671875, "learning_rate": 1.9766169098453542e-05, "loss": 2.0099, "step": 12940 }, { "epoch": 0.41752653197852035, "grad_norm": 0.37109375, "learning_rate": 1.9764676894920912e-05, "loss": 1.9751, "step": 12941 }, { "epoch": 0.4175587958323167, "grad_norm": 0.35546875, "learning_rate": 1.976318463894214e-05, "loss": 1.9893, "step": 12942 }, { "epoch": 0.41759105968611304, "grad_norm": 0.36328125, "learning_rate": 1.9761692330533648e-05, "loss": 1.9692, "step": 12943 }, { "epoch": 0.4176233235399094, "grad_norm": 0.3671875, "learning_rate": 1.976019996971186e-05, "loss": 2.0126, "step": 12944 }, { "epoch": 0.41765558739370573, "grad_norm": 0.373046875, "learning_rate": 1.9758707556493213e-05, "loss": 1.985, "step": 12945 }, { "epoch": 0.4176878512475021, "grad_norm": 0.375, "learning_rate": 1.975721509089412e-05, "loss": 2.022, "step": 12946 }, { "epoch": 0.4177201151012984, "grad_norm": 0.36328125, "learning_rate": 1.9755722572931017e-05, "loss": 1.9876, "step": 12947 }, { "epoch": 0.41775237895509476, "grad_norm": 0.373046875, "learning_rate": 1.9754230002620334e-05, "loss": 1.9844, "step": 12948 }, { "epoch": 0.4177846428088911, "grad_norm": 0.357421875, "learning_rate": 1.9752737379978495e-05, "loss": 1.9927, "step": 12949 }, { "epoch": 0.41781690666268745, "grad_norm": 0.365234375, "learning_rate": 1.9751244705021933e-05, "loss": 1.9868, "step": 12950 }, { "epoch": 0.4178491705164838, "grad_norm": 0.3515625, "learning_rate": 1.974975197776708e-05, "loss": 1.9897, "step": 12951 }, { "epoch": 0.41788143437028014, "grad_norm": 0.369140625, "learning_rate": 1.974825919823036e-05, "loss": 1.983, "step": 12952 }, { "epoch": 0.4179136982240765, "grad_norm": 0.3828125, "learning_rate": 1.9746766366428217e-05, "loss": 1.9497, "step": 12953 }, { "epoch": 0.4179459620778728, "grad_norm": 0.37109375, "learning_rate": 1.974527348237707e-05, "loss": 1.9984, "step": 12954 }, { "epoch": 0.41797822593166917, "grad_norm": 0.37890625, "learning_rate": 1.9743780546093357e-05, "loss": 1.9816, "step": 12955 }, { "epoch": 0.4180104897854655, "grad_norm": 0.423828125, "learning_rate": 1.9742287557593515e-05, "loss": 1.9853, "step": 12956 }, { "epoch": 0.4180427536392619, "grad_norm": 0.40234375, "learning_rate": 1.9740794516893968e-05, "loss": 1.9724, "step": 12957 }, { "epoch": 0.41807501749305825, "grad_norm": 0.3671875, "learning_rate": 1.9739301424011156e-05, "loss": 2.008, "step": 12958 }, { "epoch": 0.4181072813468546, "grad_norm": 0.39453125, "learning_rate": 1.973780827896152e-05, "loss": 1.985, "step": 12959 }, { "epoch": 0.41813954520065094, "grad_norm": 0.3828125, "learning_rate": 1.9736315081761482e-05, "loss": 2.0128, "step": 12960 }, { "epoch": 0.4181718090544473, "grad_norm": 0.375, "learning_rate": 1.973482183242749e-05, "loss": 1.9981, "step": 12961 }, { "epoch": 0.41820407290824363, "grad_norm": 0.404296875, "learning_rate": 1.9733328530975975e-05, "loss": 1.9972, "step": 12962 }, { "epoch": 0.41823633676204, "grad_norm": 0.35546875, "learning_rate": 1.9731835177423376e-05, "loss": 1.9871, "step": 12963 }, { "epoch": 0.4182686006158363, "grad_norm": 0.376953125, "learning_rate": 1.9730341771786127e-05, "loss": 1.9953, "step": 12964 }, { "epoch": 0.41830086446963266, "grad_norm": 0.349609375, "learning_rate": 1.9728848314080675e-05, "loss": 1.9992, "step": 12965 }, { "epoch": 0.418333128323429, "grad_norm": 0.38671875, "learning_rate": 1.9727354804323446e-05, "loss": 1.9901, "step": 12966 }, { "epoch": 0.41836539217722535, "grad_norm": 0.421875, "learning_rate": 1.9725861242530893e-05, "loss": 1.9525, "step": 12967 }, { "epoch": 0.4183976560310217, "grad_norm": 0.357421875, "learning_rate": 1.9724367628719445e-05, "loss": 1.975, "step": 12968 }, { "epoch": 0.41842991988481804, "grad_norm": 0.447265625, "learning_rate": 1.972287396290555e-05, "loss": 1.9925, "step": 12969 }, { "epoch": 0.4184621837386144, "grad_norm": 0.365234375, "learning_rate": 1.972138024510565e-05, "loss": 1.959, "step": 12970 }, { "epoch": 0.4184944475924107, "grad_norm": 0.41796875, "learning_rate": 1.9719886475336173e-05, "loss": 1.9878, "step": 12971 }, { "epoch": 0.41852671144620707, "grad_norm": 0.3828125, "learning_rate": 1.971839265361358e-05, "loss": 1.9676, "step": 12972 }, { "epoch": 0.4185589753000034, "grad_norm": 0.392578125, "learning_rate": 1.9716898779954306e-05, "loss": 1.9763, "step": 12973 }, { "epoch": 0.41859123915379975, "grad_norm": 0.3828125, "learning_rate": 1.9715404854374794e-05, "loss": 2.0156, "step": 12974 }, { "epoch": 0.4186235030075961, "grad_norm": 0.43359375, "learning_rate": 1.971391087689148e-05, "loss": 2.0718, "step": 12975 }, { "epoch": 0.41865576686139244, "grad_norm": 0.443359375, "learning_rate": 1.9712416847520826e-05, "loss": 2.0485, "step": 12976 }, { "epoch": 0.41868803071518884, "grad_norm": 0.42578125, "learning_rate": 1.9710922766279264e-05, "loss": 2.072, "step": 12977 }, { "epoch": 0.4187202945689852, "grad_norm": 0.484375, "learning_rate": 1.9709428633183242e-05, "loss": 2.0536, "step": 12978 }, { "epoch": 0.41875255842278153, "grad_norm": 0.390625, "learning_rate": 1.9707934448249213e-05, "loss": 2.0784, "step": 12979 }, { "epoch": 0.4187848222765779, "grad_norm": 0.44921875, "learning_rate": 1.9706440211493615e-05, "loss": 2.0696, "step": 12980 }, { "epoch": 0.4188170861303742, "grad_norm": 0.396484375, "learning_rate": 1.9704945922932905e-05, "loss": 2.0845, "step": 12981 }, { "epoch": 0.41884934998417056, "grad_norm": 0.462890625, "learning_rate": 1.9703451582583523e-05, "loss": 2.0803, "step": 12982 }, { "epoch": 0.4188816138379669, "grad_norm": 0.39453125, "learning_rate": 1.9701957190461915e-05, "loss": 2.0514, "step": 12983 }, { "epoch": 0.41891387769176325, "grad_norm": 0.41796875, "learning_rate": 1.9700462746584545e-05, "loss": 2.0712, "step": 12984 }, { "epoch": 0.4189461415455596, "grad_norm": 0.384765625, "learning_rate": 1.9698968250967844e-05, "loss": 2.0942, "step": 12985 }, { "epoch": 0.41897840539935594, "grad_norm": 0.427734375, "learning_rate": 1.9697473703628276e-05, "loss": 2.0166, "step": 12986 }, { "epoch": 0.4190106692531523, "grad_norm": 0.37890625, "learning_rate": 1.9695979104582286e-05, "loss": 2.0888, "step": 12987 }, { "epoch": 0.4190429331069486, "grad_norm": 0.41796875, "learning_rate": 1.969448445384633e-05, "loss": 2.0596, "step": 12988 }, { "epoch": 0.41907519696074497, "grad_norm": 0.400390625, "learning_rate": 1.9692989751436852e-05, "loss": 2.0678, "step": 12989 }, { "epoch": 0.4191074608145413, "grad_norm": 0.3828125, "learning_rate": 1.9691494997370314e-05, "loss": 2.0731, "step": 12990 }, { "epoch": 0.41913972466833765, "grad_norm": 0.369140625, "learning_rate": 1.9690000191663162e-05, "loss": 2.0267, "step": 12991 }, { "epoch": 0.419171988522134, "grad_norm": 0.38671875, "learning_rate": 1.968850533433185e-05, "loss": 2.0637, "step": 12992 }, { "epoch": 0.41920425237593034, "grad_norm": 0.361328125, "learning_rate": 1.9687010425392842e-05, "loss": 2.0673, "step": 12993 }, { "epoch": 0.4192365162297267, "grad_norm": 0.39453125, "learning_rate": 1.9685515464862582e-05, "loss": 2.0729, "step": 12994 }, { "epoch": 0.41926878008352303, "grad_norm": 0.365234375, "learning_rate": 1.9684020452757526e-05, "loss": 2.0717, "step": 12995 }, { "epoch": 0.4193010439373194, "grad_norm": 0.369140625, "learning_rate": 1.9682525389094137e-05, "loss": 2.0461, "step": 12996 }, { "epoch": 0.4193333077911157, "grad_norm": 0.36328125, "learning_rate": 1.9681030273888868e-05, "loss": 2.0518, "step": 12997 }, { "epoch": 0.4193655716449121, "grad_norm": 0.3671875, "learning_rate": 1.9679535107158175e-05, "loss": 2.091, "step": 12998 }, { "epoch": 0.41939783549870846, "grad_norm": 0.35546875, "learning_rate": 1.9678039888918517e-05, "loss": 2.0678, "step": 12999 }, { "epoch": 0.4194300993525048, "grad_norm": 0.37890625, "learning_rate": 1.9676544619186347e-05, "loss": 2.0215, "step": 13000 }, { "epoch": 0.41946236320630115, "grad_norm": 0.375, "learning_rate": 1.967504929797814e-05, "loss": 2.0477, "step": 13001 }, { "epoch": 0.4194946270600975, "grad_norm": 0.3671875, "learning_rate": 1.9673553925310334e-05, "loss": 2.0752, "step": 13002 }, { "epoch": 0.41952689091389384, "grad_norm": 0.36328125, "learning_rate": 1.96720585011994e-05, "loss": 2.0811, "step": 13003 }, { "epoch": 0.4195591547676902, "grad_norm": 0.375, "learning_rate": 1.96705630256618e-05, "loss": 2.093, "step": 13004 }, { "epoch": 0.4195914186214865, "grad_norm": 0.357421875, "learning_rate": 1.9669067498713993e-05, "loss": 2.0703, "step": 13005 }, { "epoch": 0.41962368247528287, "grad_norm": 0.396484375, "learning_rate": 1.966757192037244e-05, "loss": 2.061, "step": 13006 }, { "epoch": 0.4196559463290792, "grad_norm": 0.376953125, "learning_rate": 1.966607629065361e-05, "loss": 2.0668, "step": 13007 }, { "epoch": 0.41968821018287555, "grad_norm": 0.376953125, "learning_rate": 1.9664580609573952e-05, "loss": 2.052, "step": 13008 }, { "epoch": 0.4197204740366719, "grad_norm": 0.400390625, "learning_rate": 1.9663084877149937e-05, "loss": 2.0661, "step": 13009 }, { "epoch": 0.41975273789046824, "grad_norm": 0.37109375, "learning_rate": 1.9661589093398032e-05, "loss": 2.0656, "step": 13010 }, { "epoch": 0.4197850017442646, "grad_norm": 0.39453125, "learning_rate": 1.9660093258334694e-05, "loss": 2.0957, "step": 13011 }, { "epoch": 0.41981726559806093, "grad_norm": 0.3671875, "learning_rate": 1.96585973719764e-05, "loss": 2.041, "step": 13012 }, { "epoch": 0.4198495294518573, "grad_norm": 0.427734375, "learning_rate": 1.9657101434339604e-05, "loss": 2.0883, "step": 13013 }, { "epoch": 0.4198817933056536, "grad_norm": 0.375, "learning_rate": 1.9655605445440775e-05, "loss": 2.0854, "step": 13014 }, { "epoch": 0.41991405715944996, "grad_norm": 0.419921875, "learning_rate": 1.965410940529638e-05, "loss": 2.069, "step": 13015 }, { "epoch": 0.4199463210132463, "grad_norm": 0.3984375, "learning_rate": 1.9652613313922897e-05, "loss": 2.0756, "step": 13016 }, { "epoch": 0.41997858486704265, "grad_norm": 0.388671875, "learning_rate": 1.9651117171336773e-05, "loss": 2.0621, "step": 13017 }, { "epoch": 0.42001084872083905, "grad_norm": 0.404296875, "learning_rate": 1.9649620977554494e-05, "loss": 2.0705, "step": 13018 }, { "epoch": 0.4200431125746354, "grad_norm": 0.419921875, "learning_rate": 1.9648124732592523e-05, "loss": 2.077, "step": 13019 }, { "epoch": 0.42007537642843173, "grad_norm": 0.384765625, "learning_rate": 1.9646628436467325e-05, "loss": 2.0998, "step": 13020 }, { "epoch": 0.4201076402822281, "grad_norm": 0.375, "learning_rate": 1.9645132089195382e-05, "loss": 2.0818, "step": 13021 }, { "epoch": 0.4201399041360244, "grad_norm": 0.388671875, "learning_rate": 1.964363569079315e-05, "loss": 2.092, "step": 13022 }, { "epoch": 0.42017216798982077, "grad_norm": 0.359375, "learning_rate": 1.964213924127711e-05, "loss": 2.0578, "step": 13023 }, { "epoch": 0.4202044318436171, "grad_norm": 0.408203125, "learning_rate": 1.9640642740663733e-05, "loss": 2.0292, "step": 13024 }, { "epoch": 0.42023669569741345, "grad_norm": 0.35546875, "learning_rate": 1.963914618896949e-05, "loss": 2.0899, "step": 13025 }, { "epoch": 0.4202689595512098, "grad_norm": 0.376953125, "learning_rate": 1.963764958621085e-05, "loss": 2.0661, "step": 13026 }, { "epoch": 0.42030122340500614, "grad_norm": 0.36328125, "learning_rate": 1.9636152932404296e-05, "loss": 2.0714, "step": 13027 }, { "epoch": 0.4203334872588025, "grad_norm": 0.365234375, "learning_rate": 1.9634656227566294e-05, "loss": 2.0687, "step": 13028 }, { "epoch": 0.42036575111259883, "grad_norm": 0.3671875, "learning_rate": 1.963315947171332e-05, "loss": 2.0938, "step": 13029 }, { "epoch": 0.4203980149663952, "grad_norm": 0.36328125, "learning_rate": 1.963166266486185e-05, "loss": 2.0551, "step": 13030 }, { "epoch": 0.4204302788201915, "grad_norm": 0.384765625, "learning_rate": 1.9630165807028362e-05, "loss": 2.094, "step": 13031 }, { "epoch": 0.42046254267398786, "grad_norm": 0.37109375, "learning_rate": 1.962866889822933e-05, "loss": 2.0681, "step": 13032 }, { "epoch": 0.4204948065277842, "grad_norm": 0.37109375, "learning_rate": 1.962717193848123e-05, "loss": 2.0831, "step": 13033 }, { "epoch": 0.42052707038158055, "grad_norm": 0.388671875, "learning_rate": 1.9625674927800544e-05, "loss": 2.0731, "step": 13034 }, { "epoch": 0.4205593342353769, "grad_norm": 0.375, "learning_rate": 1.9624177866203744e-05, "loss": 2.0649, "step": 13035 }, { "epoch": 0.42059159808917324, "grad_norm": 0.365234375, "learning_rate": 1.9622680753707314e-05, "loss": 2.049, "step": 13036 }, { "epoch": 0.4206238619429696, "grad_norm": 0.35546875, "learning_rate": 1.9621183590327725e-05, "loss": 2.0727, "step": 13037 }, { "epoch": 0.420656125796766, "grad_norm": 0.41015625, "learning_rate": 1.961968637608147e-05, "loss": 2.0775, "step": 13038 }, { "epoch": 0.4206883896505623, "grad_norm": 0.369140625, "learning_rate": 1.9618189110985017e-05, "loss": 2.0622, "step": 13039 }, { "epoch": 0.42072065350435867, "grad_norm": 0.35546875, "learning_rate": 1.961669179505485e-05, "loss": 2.0683, "step": 13040 }, { "epoch": 0.420752917358155, "grad_norm": 0.375, "learning_rate": 1.9615194428307456e-05, "loss": 2.0754, "step": 13041 }, { "epoch": 0.42078518121195135, "grad_norm": 0.353515625, "learning_rate": 1.9613697010759307e-05, "loss": 2.0874, "step": 13042 }, { "epoch": 0.4208174450657477, "grad_norm": 0.384765625, "learning_rate": 1.9612199542426898e-05, "loss": 2.0637, "step": 13043 }, { "epoch": 0.42084970891954404, "grad_norm": 0.35546875, "learning_rate": 1.96107020233267e-05, "loss": 2.0993, "step": 13044 }, { "epoch": 0.4208819727733404, "grad_norm": 0.369140625, "learning_rate": 1.9609204453475202e-05, "loss": 2.1063, "step": 13045 }, { "epoch": 0.42091423662713673, "grad_norm": 0.357421875, "learning_rate": 1.960770683288889e-05, "loss": 2.0612, "step": 13046 }, { "epoch": 0.4209465004809331, "grad_norm": 0.3671875, "learning_rate": 1.9606209161584247e-05, "loss": 2.0709, "step": 13047 }, { "epoch": 0.4209787643347294, "grad_norm": 0.361328125, "learning_rate": 1.9604711439577755e-05, "loss": 2.0595, "step": 13048 }, { "epoch": 0.42101102818852576, "grad_norm": 0.3671875, "learning_rate": 1.9603213666885907e-05, "loss": 2.1042, "step": 13049 }, { "epoch": 0.4210432920423221, "grad_norm": 0.59765625, "learning_rate": 1.9601715843525184e-05, "loss": 2.084, "step": 13050 }, { "epoch": 0.42107555589611845, "grad_norm": 0.365234375, "learning_rate": 1.9600217969512074e-05, "loss": 2.0565, "step": 13051 }, { "epoch": 0.4211078197499148, "grad_norm": 0.37890625, "learning_rate": 1.9598720044863063e-05, "loss": 2.0292, "step": 13052 }, { "epoch": 0.42114008360371114, "grad_norm": 0.361328125, "learning_rate": 1.9597222069594642e-05, "loss": 1.974, "step": 13053 }, { "epoch": 0.4211723474575075, "grad_norm": 0.349609375, "learning_rate": 1.9595724043723297e-05, "loss": 1.9856, "step": 13054 }, { "epoch": 0.4212046113113038, "grad_norm": 0.376953125, "learning_rate": 1.9594225967265525e-05, "loss": 1.9755, "step": 13055 }, { "epoch": 0.42123687516510017, "grad_norm": 0.3671875, "learning_rate": 1.9592727840237805e-05, "loss": 1.9648, "step": 13056 }, { "epoch": 0.4212691390188965, "grad_norm": 0.357421875, "learning_rate": 1.959122966265663e-05, "loss": 2.0046, "step": 13057 }, { "epoch": 0.4213014028726929, "grad_norm": 0.373046875, "learning_rate": 1.9589731434538495e-05, "loss": 2.0015, "step": 13058 }, { "epoch": 0.42133366672648925, "grad_norm": 0.361328125, "learning_rate": 1.9588233155899887e-05, "loss": 1.9956, "step": 13059 }, { "epoch": 0.4213659305802856, "grad_norm": 0.41796875, "learning_rate": 1.9586734826757306e-05, "loss": 1.985, "step": 13060 }, { "epoch": 0.42139819443408194, "grad_norm": 0.365234375, "learning_rate": 1.958523644712723e-05, "loss": 1.9707, "step": 13061 }, { "epoch": 0.4214304582878783, "grad_norm": 0.3828125, "learning_rate": 1.9583738017026163e-05, "loss": 1.9905, "step": 13062 }, { "epoch": 0.42146272214167463, "grad_norm": 0.376953125, "learning_rate": 1.95822395364706e-05, "loss": 1.9658, "step": 13063 }, { "epoch": 0.42149498599547097, "grad_norm": 0.373046875, "learning_rate": 1.9580741005477026e-05, "loss": 2.0156, "step": 13064 }, { "epoch": 0.4215272498492673, "grad_norm": 0.38671875, "learning_rate": 1.9579242424061948e-05, "loss": 1.9785, "step": 13065 }, { "epoch": 0.42155951370306366, "grad_norm": 0.353515625, "learning_rate": 1.957774379224185e-05, "loss": 1.9974, "step": 13066 }, { "epoch": 0.42159177755686, "grad_norm": 0.359375, "learning_rate": 1.9576245110033233e-05, "loss": 1.9951, "step": 13067 }, { "epoch": 0.42162404141065635, "grad_norm": 0.353515625, "learning_rate": 1.9574746377452593e-05, "loss": 1.9645, "step": 13068 }, { "epoch": 0.4216563052644527, "grad_norm": 0.361328125, "learning_rate": 1.957324759451643e-05, "loss": 2.0095, "step": 13069 }, { "epoch": 0.42168856911824903, "grad_norm": 0.349609375, "learning_rate": 1.957174876124123e-05, "loss": 2.0057, "step": 13070 }, { "epoch": 0.4217208329720454, "grad_norm": 0.380859375, "learning_rate": 1.9570249877643507e-05, "loss": 1.9608, "step": 13071 }, { "epoch": 0.4217530968258417, "grad_norm": 0.408203125, "learning_rate": 1.9568750943739747e-05, "loss": 1.9887, "step": 13072 }, { "epoch": 0.42178536067963807, "grad_norm": 0.3515625, "learning_rate": 1.956725195954646e-05, "loss": 2.0161, "step": 13073 }, { "epoch": 0.4218176245334344, "grad_norm": 0.43359375, "learning_rate": 1.9565752925080135e-05, "loss": 1.9932, "step": 13074 }, { "epoch": 0.42184988838723075, "grad_norm": 0.380859375, "learning_rate": 1.956425384035728e-05, "loss": 1.9944, "step": 13075 }, { "epoch": 0.4218821522410271, "grad_norm": 0.380859375, "learning_rate": 1.9562754705394394e-05, "loss": 1.9932, "step": 13076 }, { "epoch": 0.42191441609482344, "grad_norm": 0.39453125, "learning_rate": 1.9561255520207977e-05, "loss": 2.0, "step": 13077 }, { "epoch": 0.42194667994861984, "grad_norm": 0.388671875, "learning_rate": 1.955975628481453e-05, "loss": 1.9793, "step": 13078 }, { "epoch": 0.4219789438024162, "grad_norm": 0.369140625, "learning_rate": 1.9558256999230556e-05, "loss": 2.0067, "step": 13079 }, { "epoch": 0.42201120765621253, "grad_norm": 0.373046875, "learning_rate": 1.955675766347256e-05, "loss": 2.0146, "step": 13080 }, { "epoch": 0.42204347151000887, "grad_norm": 0.373046875, "learning_rate": 1.9555258277557044e-05, "loss": 2.013, "step": 13081 }, { "epoch": 0.4220757353638052, "grad_norm": 0.365234375, "learning_rate": 1.9553758841500512e-05, "loss": 1.972, "step": 13082 }, { "epoch": 0.42210799921760156, "grad_norm": 0.369140625, "learning_rate": 1.9552259355319473e-05, "loss": 1.9738, "step": 13083 }, { "epoch": 0.4221402630713979, "grad_norm": 0.37890625, "learning_rate": 1.9550759819030425e-05, "loss": 1.9659, "step": 13084 }, { "epoch": 0.42217252692519425, "grad_norm": 0.361328125, "learning_rate": 1.954926023264988e-05, "loss": 1.9527, "step": 13085 }, { "epoch": 0.4222047907789906, "grad_norm": 0.412109375, "learning_rate": 1.954776059619434e-05, "loss": 1.9767, "step": 13086 }, { "epoch": 0.42223705463278693, "grad_norm": 0.34765625, "learning_rate": 1.954626090968032e-05, "loss": 1.9682, "step": 13087 }, { "epoch": 0.4222693184865833, "grad_norm": 0.365234375, "learning_rate": 1.954476117312432e-05, "loss": 1.9635, "step": 13088 }, { "epoch": 0.4223015823403796, "grad_norm": 0.388671875, "learning_rate": 1.9543261386542845e-05, "loss": 2.0074, "step": 13089 }, { "epoch": 0.42233384619417597, "grad_norm": 0.353515625, "learning_rate": 1.9541761549952407e-05, "loss": 2.01, "step": 13090 }, { "epoch": 0.4223661100479723, "grad_norm": 0.384765625, "learning_rate": 1.954026166336952e-05, "loss": 1.9905, "step": 13091 }, { "epoch": 0.42239837390176865, "grad_norm": 0.345703125, "learning_rate": 1.9538761726810688e-05, "loss": 1.9833, "step": 13092 }, { "epoch": 0.422430637755565, "grad_norm": 0.392578125, "learning_rate": 1.953726174029242e-05, "loss": 1.9875, "step": 13093 }, { "epoch": 0.42246290160936134, "grad_norm": 0.349609375, "learning_rate": 1.9535761703831237e-05, "loss": 1.9692, "step": 13094 }, { "epoch": 0.4224951654631577, "grad_norm": 0.388671875, "learning_rate": 1.9534261617443637e-05, "loss": 1.9899, "step": 13095 }, { "epoch": 0.42252742931695403, "grad_norm": 0.349609375, "learning_rate": 1.953276148114614e-05, "loss": 2.0102, "step": 13096 }, { "epoch": 0.4225596931707504, "grad_norm": 0.349609375, "learning_rate": 1.9531261294955257e-05, "loss": 1.9983, "step": 13097 }, { "epoch": 0.4225919570245467, "grad_norm": 0.34765625, "learning_rate": 1.95297610588875e-05, "loss": 1.9779, "step": 13098 }, { "epoch": 0.4226242208783431, "grad_norm": 0.365234375, "learning_rate": 1.9528260772959382e-05, "loss": 1.986, "step": 13099 }, { "epoch": 0.42265648473213946, "grad_norm": 0.34765625, "learning_rate": 1.9526760437187417e-05, "loss": 1.9757, "step": 13100 }, { "epoch": 0.4226887485859358, "grad_norm": 0.369140625, "learning_rate": 1.9525260051588123e-05, "loss": 1.9958, "step": 13101 }, { "epoch": 0.42272101243973215, "grad_norm": 0.34375, "learning_rate": 1.952375961617801e-05, "loss": 1.9884, "step": 13102 }, { "epoch": 0.4227532762935285, "grad_norm": 0.36328125, "learning_rate": 1.95222591309736e-05, "loss": 1.9628, "step": 13103 }, { "epoch": 0.42278554014732483, "grad_norm": 0.35546875, "learning_rate": 1.9520758595991406e-05, "loss": 1.9823, "step": 13104 }, { "epoch": 0.4228178040011212, "grad_norm": 0.37109375, "learning_rate": 1.9519258011247943e-05, "loss": 1.9605, "step": 13105 }, { "epoch": 0.4228500678549175, "grad_norm": 0.3828125, "learning_rate": 1.9517757376759727e-05, "loss": 1.981, "step": 13106 }, { "epoch": 0.42288233170871387, "grad_norm": 0.375, "learning_rate": 1.951625669254328e-05, "loss": 2.0085, "step": 13107 }, { "epoch": 0.4229145955625102, "grad_norm": 0.365234375, "learning_rate": 1.9514755958615123e-05, "loss": 2.0111, "step": 13108 }, { "epoch": 0.42294685941630655, "grad_norm": 0.3515625, "learning_rate": 1.951325517499177e-05, "loss": 1.9694, "step": 13109 }, { "epoch": 0.4229791232701029, "grad_norm": 0.376953125, "learning_rate": 1.951175434168974e-05, "loss": 1.9712, "step": 13110 }, { "epoch": 0.42301138712389924, "grad_norm": 0.3515625, "learning_rate": 1.9510253458725556e-05, "loss": 2.0047, "step": 13111 }, { "epoch": 0.4230436509776956, "grad_norm": 0.3515625, "learning_rate": 1.950875252611574e-05, "loss": 1.9961, "step": 13112 }, { "epoch": 0.42307591483149193, "grad_norm": 0.361328125, "learning_rate": 1.9507251543876804e-05, "loss": 1.9563, "step": 13113 }, { "epoch": 0.42310817868528827, "grad_norm": 0.3671875, "learning_rate": 1.9505750512025283e-05, "loss": 1.9803, "step": 13114 }, { "epoch": 0.4231404425390846, "grad_norm": 0.349609375, "learning_rate": 1.9504249430577692e-05, "loss": 1.9668, "step": 13115 }, { "epoch": 0.42317270639288096, "grad_norm": 0.357421875, "learning_rate": 1.950274829955055e-05, "loss": 1.9981, "step": 13116 }, { "epoch": 0.4232049702466773, "grad_norm": 0.341796875, "learning_rate": 1.9501247118960388e-05, "loss": 1.9494, "step": 13117 }, { "epoch": 0.42323723410047365, "grad_norm": 0.34765625, "learning_rate": 1.9499745888823725e-05, "loss": 1.9843, "step": 13118 }, { "epoch": 0.42326949795427005, "grad_norm": 0.3515625, "learning_rate": 1.9498244609157093e-05, "loss": 1.9507, "step": 13119 }, { "epoch": 0.4233017618080664, "grad_norm": 0.349609375, "learning_rate": 1.9496743279977007e-05, "loss": 2.0075, "step": 13120 }, { "epoch": 0.42333402566186273, "grad_norm": 0.3515625, "learning_rate": 1.9495241901299995e-05, "loss": 1.9814, "step": 13121 }, { "epoch": 0.4233662895156591, "grad_norm": 0.373046875, "learning_rate": 1.949374047314259e-05, "loss": 1.9952, "step": 13122 }, { "epoch": 0.4233985533694554, "grad_norm": 0.33984375, "learning_rate": 1.9492238995521313e-05, "loss": 1.9495, "step": 13123 }, { "epoch": 0.42343081722325177, "grad_norm": 0.369140625, "learning_rate": 1.9490737468452687e-05, "loss": 1.9602, "step": 13124 }, { "epoch": 0.4234630810770481, "grad_norm": 0.349609375, "learning_rate": 1.948923589195325e-05, "loss": 1.9699, "step": 13125 }, { "epoch": 0.42349534493084445, "grad_norm": 0.349609375, "learning_rate": 1.9487734266039523e-05, "loss": 1.9666, "step": 13126 }, { "epoch": 0.4235276087846408, "grad_norm": 0.3671875, "learning_rate": 1.9486232590728032e-05, "loss": 2.0001, "step": 13127 }, { "epoch": 0.42355987263843714, "grad_norm": 0.369140625, "learning_rate": 1.9484730866035317e-05, "loss": 1.9677, "step": 13128 }, { "epoch": 0.4235921364922335, "grad_norm": 0.37109375, "learning_rate": 1.9483229091977904e-05, "loss": 2.0609, "step": 13129 }, { "epoch": 0.42362440034602983, "grad_norm": 0.3828125, "learning_rate": 1.9481727268572315e-05, "loss": 2.0947, "step": 13130 }, { "epoch": 0.42365666419982617, "grad_norm": 0.408203125, "learning_rate": 1.948022539583509e-05, "loss": 2.072, "step": 13131 }, { "epoch": 0.4236889280536225, "grad_norm": 0.408203125, "learning_rate": 1.9478723473782756e-05, "loss": 2.0667, "step": 13132 }, { "epoch": 0.42372119190741886, "grad_norm": 0.384765625, "learning_rate": 1.947722150243185e-05, "loss": 2.0928, "step": 13133 }, { "epoch": 0.4237534557612152, "grad_norm": 0.40625, "learning_rate": 1.9475719481798902e-05, "loss": 2.0688, "step": 13134 }, { "epoch": 0.42378571961501155, "grad_norm": 0.400390625, "learning_rate": 1.9474217411900444e-05, "loss": 2.0959, "step": 13135 }, { "epoch": 0.4238179834688079, "grad_norm": 0.392578125, "learning_rate": 1.947271529275301e-05, "loss": 2.0776, "step": 13136 }, { "epoch": 0.42385024732260423, "grad_norm": 0.396484375, "learning_rate": 1.9471213124373134e-05, "loss": 2.0899, "step": 13137 }, { "epoch": 0.4238825111764006, "grad_norm": 0.421875, "learning_rate": 1.9469710906777354e-05, "loss": 2.0353, "step": 13138 }, { "epoch": 0.423914775030197, "grad_norm": 0.38671875, "learning_rate": 1.94682086399822e-05, "loss": 2.0849, "step": 13139 }, { "epoch": 0.4239470388839933, "grad_norm": 0.369140625, "learning_rate": 1.9466706324004215e-05, "loss": 2.0663, "step": 13140 }, { "epoch": 0.42397930273778967, "grad_norm": 0.384765625, "learning_rate": 1.9465203958859924e-05, "loss": 2.0642, "step": 13141 }, { "epoch": 0.424011566591586, "grad_norm": 0.369140625, "learning_rate": 1.946370154456588e-05, "loss": 2.0531, "step": 13142 }, { "epoch": 0.42404383044538235, "grad_norm": 0.390625, "learning_rate": 1.9462199081138603e-05, "loss": 2.0953, "step": 13143 }, { "epoch": 0.4240760942991787, "grad_norm": 0.3828125, "learning_rate": 1.9460696568594644e-05, "loss": 2.1109, "step": 13144 }, { "epoch": 0.42410835815297504, "grad_norm": 0.392578125, "learning_rate": 1.9459194006950537e-05, "loss": 2.071, "step": 13145 }, { "epoch": 0.4241406220067714, "grad_norm": 0.40625, "learning_rate": 1.9457691396222823e-05, "loss": 2.0612, "step": 13146 }, { "epoch": 0.4241728858605677, "grad_norm": 0.396484375, "learning_rate": 1.9456188736428035e-05, "loss": 2.0506, "step": 13147 }, { "epoch": 0.42420514971436407, "grad_norm": 0.359375, "learning_rate": 1.9454686027582724e-05, "loss": 2.0622, "step": 13148 }, { "epoch": 0.4242374135681604, "grad_norm": 0.39453125, "learning_rate": 1.9453183269703424e-05, "loss": 2.0957, "step": 13149 }, { "epoch": 0.42426967742195676, "grad_norm": 0.36328125, "learning_rate": 1.9451680462806674e-05, "loss": 2.0751, "step": 13150 }, { "epoch": 0.4243019412757531, "grad_norm": 0.40625, "learning_rate": 1.9450177606909023e-05, "loss": 2.0884, "step": 13151 }, { "epoch": 0.42433420512954945, "grad_norm": 0.388671875, "learning_rate": 1.9448674702027007e-05, "loss": 2.044, "step": 13152 }, { "epoch": 0.4243664689833458, "grad_norm": 0.375, "learning_rate": 1.944717174817717e-05, "loss": 2.0228, "step": 13153 }, { "epoch": 0.42439873283714213, "grad_norm": 0.380859375, "learning_rate": 1.944566874537606e-05, "loss": 2.0859, "step": 13154 }, { "epoch": 0.4244309966909385, "grad_norm": 0.3671875, "learning_rate": 1.9444165693640214e-05, "loss": 2.1031, "step": 13155 }, { "epoch": 0.4244632605447348, "grad_norm": 0.39453125, "learning_rate": 1.9442662592986183e-05, "loss": 2.0669, "step": 13156 }, { "epoch": 0.42449552439853117, "grad_norm": 0.34765625, "learning_rate": 1.944115944343051e-05, "loss": 2.0635, "step": 13157 }, { "epoch": 0.4245277882523275, "grad_norm": 0.390625, "learning_rate": 1.9439656244989737e-05, "loss": 2.1093, "step": 13158 }, { "epoch": 0.4245600521061239, "grad_norm": 0.357421875, "learning_rate": 1.9438152997680415e-05, "loss": 2.0928, "step": 13159 }, { "epoch": 0.42459231595992025, "grad_norm": 0.365234375, "learning_rate": 1.9436649701519094e-05, "loss": 2.0973, "step": 13160 }, { "epoch": 0.4246245798137166, "grad_norm": 0.36328125, "learning_rate": 1.9435146356522306e-05, "loss": 2.0474, "step": 13161 }, { "epoch": 0.42465684366751294, "grad_norm": 0.37109375, "learning_rate": 1.9433642962706614e-05, "loss": 2.0864, "step": 13162 }, { "epoch": 0.4246891075213093, "grad_norm": 0.38671875, "learning_rate": 1.943213952008856e-05, "loss": 2.094, "step": 13163 }, { "epoch": 0.4247213713751056, "grad_norm": 0.34765625, "learning_rate": 1.9430636028684695e-05, "loss": 2.0467, "step": 13164 }, { "epoch": 0.42475363522890197, "grad_norm": 0.37109375, "learning_rate": 1.9429132488511562e-05, "loss": 2.1108, "step": 13165 }, { "epoch": 0.4247858990826983, "grad_norm": 0.361328125, "learning_rate": 1.942762889958572e-05, "loss": 2.0667, "step": 13166 }, { "epoch": 0.42481816293649466, "grad_norm": 0.37109375, "learning_rate": 1.942612526192372e-05, "loss": 2.0941, "step": 13167 }, { "epoch": 0.424850426790291, "grad_norm": 0.36328125, "learning_rate": 1.9424621575542104e-05, "loss": 2.0686, "step": 13168 }, { "epoch": 0.42488269064408735, "grad_norm": 0.37890625, "learning_rate": 1.9423117840457424e-05, "loss": 2.0869, "step": 13169 }, { "epoch": 0.4249149544978837, "grad_norm": 0.359375, "learning_rate": 1.9421614056686242e-05, "loss": 2.0869, "step": 13170 }, { "epoch": 0.42494721835168003, "grad_norm": 0.35546875, "learning_rate": 1.94201102242451e-05, "loss": 2.0803, "step": 13171 }, { "epoch": 0.4249794822054764, "grad_norm": 0.369140625, "learning_rate": 1.9418606343150557e-05, "loss": 2.0879, "step": 13172 }, { "epoch": 0.4250117460592727, "grad_norm": 0.361328125, "learning_rate": 1.941710241341917e-05, "loss": 2.076, "step": 13173 }, { "epoch": 0.42504400991306907, "grad_norm": 0.35546875, "learning_rate": 1.941559843506748e-05, "loss": 2.0579, "step": 13174 }, { "epoch": 0.4250762737668654, "grad_norm": 0.375, "learning_rate": 1.9414094408112056e-05, "loss": 2.0954, "step": 13175 }, { "epoch": 0.42510853762066175, "grad_norm": 0.3828125, "learning_rate": 1.941259033256945e-05, "loss": 2.0619, "step": 13176 }, { "epoch": 0.4251408014744581, "grad_norm": 0.359375, "learning_rate": 1.941108620845621e-05, "loss": 2.0781, "step": 13177 }, { "epoch": 0.42517306532825444, "grad_norm": 0.376953125, "learning_rate": 1.9409582035788896e-05, "loss": 2.0793, "step": 13178 }, { "epoch": 0.4252053291820508, "grad_norm": 0.349609375, "learning_rate": 1.9408077814584072e-05, "loss": 2.0584, "step": 13179 }, { "epoch": 0.4252375930358472, "grad_norm": 0.384765625, "learning_rate": 1.9406573544858288e-05, "loss": 2.0853, "step": 13180 }, { "epoch": 0.4252698568896435, "grad_norm": 0.369140625, "learning_rate": 1.94050692266281e-05, "loss": 2.0956, "step": 13181 }, { "epoch": 0.42530212074343987, "grad_norm": 0.3984375, "learning_rate": 1.9403564859910074e-05, "loss": 2.084, "step": 13182 }, { "epoch": 0.4253343845972362, "grad_norm": 0.361328125, "learning_rate": 1.9402060444720763e-05, "loss": 2.0244, "step": 13183 }, { "epoch": 0.42536664845103256, "grad_norm": 0.400390625, "learning_rate": 1.9400555981076733e-05, "loss": 2.0835, "step": 13184 }, { "epoch": 0.4253989123048289, "grad_norm": 0.361328125, "learning_rate": 1.9399051468994534e-05, "loss": 2.0917, "step": 13185 }, { "epoch": 0.42543117615862525, "grad_norm": 0.416015625, "learning_rate": 1.9397546908490735e-05, "loss": 2.0568, "step": 13186 }, { "epoch": 0.4254634400124216, "grad_norm": 0.376953125, "learning_rate": 1.9396042299581893e-05, "loss": 2.0828, "step": 13187 }, { "epoch": 0.42549570386621793, "grad_norm": 0.36328125, "learning_rate": 1.9394537642284572e-05, "loss": 2.0781, "step": 13188 }, { "epoch": 0.4255279677200143, "grad_norm": 0.396484375, "learning_rate": 1.939303293661533e-05, "loss": 2.0687, "step": 13189 }, { "epoch": 0.4255602315738106, "grad_norm": 0.390625, "learning_rate": 1.939152818259074e-05, "loss": 2.0652, "step": 13190 }, { "epoch": 0.42559249542760696, "grad_norm": 0.359375, "learning_rate": 1.9390023380227356e-05, "loss": 2.0726, "step": 13191 }, { "epoch": 0.4256247592814033, "grad_norm": 0.3671875, "learning_rate": 1.938851852954174e-05, "loss": 2.0567, "step": 13192 }, { "epoch": 0.42565702313519965, "grad_norm": 0.361328125, "learning_rate": 1.9387013630550467e-05, "loss": 2.0599, "step": 13193 }, { "epoch": 0.425689286988996, "grad_norm": 0.353515625, "learning_rate": 1.938550868327009e-05, "loss": 2.0904, "step": 13194 }, { "epoch": 0.42572155084279234, "grad_norm": 0.37109375, "learning_rate": 1.938400368771718e-05, "loss": 2.08, "step": 13195 }, { "epoch": 0.4257538146965887, "grad_norm": 0.341796875, "learning_rate": 1.93824986439083e-05, "loss": 2.0765, "step": 13196 }, { "epoch": 0.425786078550385, "grad_norm": 0.35546875, "learning_rate": 1.9380993551860024e-05, "loss": 2.0299, "step": 13197 }, { "epoch": 0.42581834240418137, "grad_norm": 0.35546875, "learning_rate": 1.9379488411588915e-05, "loss": 2.1073, "step": 13198 }, { "epoch": 0.4258506062579777, "grad_norm": 0.365234375, "learning_rate": 1.9377983223111534e-05, "loss": 2.0521, "step": 13199 }, { "epoch": 0.4258828701117741, "grad_norm": 0.365234375, "learning_rate": 1.9376477986444457e-05, "loss": 2.0687, "step": 13200 }, { "epoch": 0.42591513396557046, "grad_norm": 0.349609375, "learning_rate": 1.937497270160425e-05, "loss": 2.0918, "step": 13201 }, { "epoch": 0.4259473978193668, "grad_norm": 0.359375, "learning_rate": 1.937346736860748e-05, "loss": 2.0895, "step": 13202 }, { "epoch": 0.42597966167316315, "grad_norm": 0.3671875, "learning_rate": 1.9371961987470722e-05, "loss": 2.0836, "step": 13203 }, { "epoch": 0.4260119255269595, "grad_norm": 0.361328125, "learning_rate": 1.937045655821054e-05, "loss": 2.075, "step": 13204 }, { "epoch": 0.42604418938075583, "grad_norm": 0.361328125, "learning_rate": 1.9368951080843514e-05, "loss": 2.0533, "step": 13205 }, { "epoch": 0.4260764532345522, "grad_norm": 0.34375, "learning_rate": 1.93674455553862e-05, "loss": 2.0345, "step": 13206 }, { "epoch": 0.4261087170883485, "grad_norm": 0.36328125, "learning_rate": 1.9365939981855182e-05, "loss": 1.9585, "step": 13207 }, { "epoch": 0.42614098094214486, "grad_norm": 0.35546875, "learning_rate": 1.9364434360267033e-05, "loss": 1.9807, "step": 13208 }, { "epoch": 0.4261732447959412, "grad_norm": 0.349609375, "learning_rate": 1.9362928690638312e-05, "loss": 2.0025, "step": 13209 }, { "epoch": 0.42620550864973755, "grad_norm": 0.349609375, "learning_rate": 1.936142297298561e-05, "loss": 1.9969, "step": 13210 }, { "epoch": 0.4262377725035339, "grad_norm": 0.349609375, "learning_rate": 1.9359917207325494e-05, "loss": 2.0029, "step": 13211 }, { "epoch": 0.42627003635733024, "grad_norm": 0.34375, "learning_rate": 1.935841139367453e-05, "loss": 1.9851, "step": 13212 }, { "epoch": 0.4263023002111266, "grad_norm": 0.353515625, "learning_rate": 1.9356905532049304e-05, "loss": 1.9793, "step": 13213 }, { "epoch": 0.4263345640649229, "grad_norm": 0.365234375, "learning_rate": 1.9355399622466388e-05, "loss": 2.0026, "step": 13214 }, { "epoch": 0.42636682791871927, "grad_norm": 0.345703125, "learning_rate": 1.9353893664942353e-05, "loss": 1.9863, "step": 13215 }, { "epoch": 0.4263990917725156, "grad_norm": 0.3671875, "learning_rate": 1.9352387659493787e-05, "loss": 2.0422, "step": 13216 }, { "epoch": 0.42643135562631196, "grad_norm": 0.357421875, "learning_rate": 1.9350881606137254e-05, "loss": 2.0904, "step": 13217 }, { "epoch": 0.4264636194801083, "grad_norm": 0.357421875, "learning_rate": 1.9349375504889343e-05, "loss": 2.06, "step": 13218 }, { "epoch": 0.42649588333390465, "grad_norm": 0.3671875, "learning_rate": 1.9347869355766625e-05, "loss": 2.0602, "step": 13219 }, { "epoch": 0.42652814718770105, "grad_norm": 0.361328125, "learning_rate": 1.9346363158785678e-05, "loss": 2.0846, "step": 13220 }, { "epoch": 0.4265604110414974, "grad_norm": 0.3828125, "learning_rate": 1.9344856913963087e-05, "loss": 2.0802, "step": 13221 }, { "epoch": 0.42659267489529373, "grad_norm": 0.361328125, "learning_rate": 1.9343350621315426e-05, "loss": 2.1013, "step": 13222 }, { "epoch": 0.4266249387490901, "grad_norm": 0.408203125, "learning_rate": 1.9341844280859275e-05, "loss": 2.0963, "step": 13223 }, { "epoch": 0.4266572026028864, "grad_norm": 0.412109375, "learning_rate": 1.934033789261122e-05, "loss": 2.0027, "step": 13224 }, { "epoch": 0.42668946645668276, "grad_norm": 0.388671875, "learning_rate": 1.933883145658784e-05, "loss": 1.9836, "step": 13225 }, { "epoch": 0.4267217303104791, "grad_norm": 0.376953125, "learning_rate": 1.933732497280571e-05, "loss": 1.9725, "step": 13226 }, { "epoch": 0.42675399416427545, "grad_norm": 0.35546875, "learning_rate": 1.9335818441281422e-05, "loss": 1.9883, "step": 13227 }, { "epoch": 0.4267862580180718, "grad_norm": 0.3828125, "learning_rate": 1.933431186203156e-05, "loss": 1.978, "step": 13228 }, { "epoch": 0.42681852187186814, "grad_norm": 0.37890625, "learning_rate": 1.9332805235072696e-05, "loss": 1.9937, "step": 13229 }, { "epoch": 0.4268507857256645, "grad_norm": 0.384765625, "learning_rate": 1.9331298560421423e-05, "loss": 1.9608, "step": 13230 }, { "epoch": 0.4268830495794608, "grad_norm": 0.37109375, "learning_rate": 1.9329791838094325e-05, "loss": 2.0021, "step": 13231 }, { "epoch": 0.42691531343325717, "grad_norm": 0.37109375, "learning_rate": 1.932828506810798e-05, "loss": 1.9697, "step": 13232 }, { "epoch": 0.4269475772870535, "grad_norm": 0.365234375, "learning_rate": 1.932677825047898e-05, "loss": 2.0011, "step": 13233 }, { "epoch": 0.42697984114084986, "grad_norm": 0.373046875, "learning_rate": 1.9325271385223903e-05, "loss": 2.002, "step": 13234 }, { "epoch": 0.4270121049946462, "grad_norm": 0.353515625, "learning_rate": 1.932376447235935e-05, "loss": 1.9864, "step": 13235 }, { "epoch": 0.42704436884844255, "grad_norm": 0.373046875, "learning_rate": 1.93222575119019e-05, "loss": 1.9979, "step": 13236 }, { "epoch": 0.4270766327022389, "grad_norm": 0.361328125, "learning_rate": 1.9320750503868133e-05, "loss": 2.0164, "step": 13237 }, { "epoch": 0.42710889655603523, "grad_norm": 0.380859375, "learning_rate": 1.931924344827465e-05, "loss": 1.996, "step": 13238 }, { "epoch": 0.4271411604098316, "grad_norm": 0.359375, "learning_rate": 1.9317736345138036e-05, "loss": 2.0154, "step": 13239 }, { "epoch": 0.427173424263628, "grad_norm": 0.376953125, "learning_rate": 1.931622919447487e-05, "loss": 1.9571, "step": 13240 }, { "epoch": 0.4272056881174243, "grad_norm": 0.36328125, "learning_rate": 1.9314721996301753e-05, "loss": 1.9814, "step": 13241 }, { "epoch": 0.42723795197122066, "grad_norm": 0.375, "learning_rate": 1.9313214750635273e-05, "loss": 2.01, "step": 13242 }, { "epoch": 0.427270215825017, "grad_norm": 0.36328125, "learning_rate": 1.9311707457492018e-05, "loss": 1.9797, "step": 13243 }, { "epoch": 0.42730247967881335, "grad_norm": 0.365234375, "learning_rate": 1.931020011688858e-05, "loss": 1.9856, "step": 13244 }, { "epoch": 0.4273347435326097, "grad_norm": 0.3671875, "learning_rate": 1.9308692728841556e-05, "loss": 2.0237, "step": 13245 }, { "epoch": 0.42736700738640604, "grad_norm": 0.4140625, "learning_rate": 1.9307185293367526e-05, "loss": 1.9525, "step": 13246 }, { "epoch": 0.4273992712402024, "grad_norm": 0.39453125, "learning_rate": 1.9305677810483094e-05, "loss": 1.9955, "step": 13247 }, { "epoch": 0.4274315350939987, "grad_norm": 0.349609375, "learning_rate": 1.9304170280204847e-05, "loss": 1.9569, "step": 13248 }, { "epoch": 0.42746379894779507, "grad_norm": 0.3671875, "learning_rate": 1.9302662702549384e-05, "loss": 1.9471, "step": 13249 }, { "epoch": 0.4274960628015914, "grad_norm": 0.375, "learning_rate": 1.9301155077533294e-05, "loss": 1.9597, "step": 13250 }, { "epoch": 0.42752832665538776, "grad_norm": 0.375, "learning_rate": 1.9299647405173178e-05, "loss": 1.9727, "step": 13251 }, { "epoch": 0.4275605905091841, "grad_norm": 0.349609375, "learning_rate": 1.929813968548563e-05, "loss": 1.9806, "step": 13252 }, { "epoch": 0.42759285436298045, "grad_norm": 0.361328125, "learning_rate": 1.929663191848724e-05, "loss": 1.9451, "step": 13253 }, { "epoch": 0.4276251182167768, "grad_norm": 0.353515625, "learning_rate": 1.9295124104194604e-05, "loss": 2.0038, "step": 13254 }, { "epoch": 0.42765738207057313, "grad_norm": 0.37890625, "learning_rate": 1.929361624262433e-05, "loss": 2.0034, "step": 13255 }, { "epoch": 0.4276896459243695, "grad_norm": 0.353515625, "learning_rate": 1.929210833379301e-05, "loss": 1.9796, "step": 13256 }, { "epoch": 0.4277219097781658, "grad_norm": 0.380859375, "learning_rate": 1.9290600377717234e-05, "loss": 1.9555, "step": 13257 }, { "epoch": 0.42775417363196216, "grad_norm": 0.375, "learning_rate": 1.928909237441361e-05, "loss": 1.9883, "step": 13258 }, { "epoch": 0.4277864374857585, "grad_norm": 0.3671875, "learning_rate": 1.9287584323898736e-05, "loss": 1.969, "step": 13259 }, { "epoch": 0.42781870133955485, "grad_norm": 0.365234375, "learning_rate": 1.9286076226189208e-05, "loss": 2.0007, "step": 13260 }, { "epoch": 0.42785096519335125, "grad_norm": 0.384765625, "learning_rate": 1.9284568081301628e-05, "loss": 1.9769, "step": 13261 }, { "epoch": 0.4278832290471476, "grad_norm": 0.37109375, "learning_rate": 1.9283059889252596e-05, "loss": 1.9905, "step": 13262 }, { "epoch": 0.42791549290094394, "grad_norm": 0.35546875, "learning_rate": 1.9281551650058713e-05, "loss": 1.9824, "step": 13263 }, { "epoch": 0.4279477567547403, "grad_norm": 0.380859375, "learning_rate": 1.928004336373658e-05, "loss": 1.9934, "step": 13264 }, { "epoch": 0.4279800206085366, "grad_norm": 0.390625, "learning_rate": 1.92785350303028e-05, "loss": 1.9952, "step": 13265 }, { "epoch": 0.42801228446233297, "grad_norm": 0.357421875, "learning_rate": 1.927702664977398e-05, "loss": 1.9764, "step": 13266 }, { "epoch": 0.4280445483161293, "grad_norm": 0.369140625, "learning_rate": 1.9275518222166718e-05, "loss": 1.9637, "step": 13267 }, { "epoch": 0.42807681216992566, "grad_norm": 0.3671875, "learning_rate": 1.927400974749761e-05, "loss": 2.0113, "step": 13268 }, { "epoch": 0.428109076023722, "grad_norm": 0.396484375, "learning_rate": 1.927250122578328e-05, "loss": 1.9757, "step": 13269 }, { "epoch": 0.42814133987751835, "grad_norm": 0.353515625, "learning_rate": 1.927099265704032e-05, "loss": 1.9979, "step": 13270 }, { "epoch": 0.4281736037313147, "grad_norm": 0.3671875, "learning_rate": 1.9269484041285333e-05, "loss": 2.0134, "step": 13271 }, { "epoch": 0.42820586758511103, "grad_norm": 0.373046875, "learning_rate": 1.926797537853493e-05, "loss": 1.9834, "step": 13272 }, { "epoch": 0.4282381314389074, "grad_norm": 0.369140625, "learning_rate": 1.9266466668805715e-05, "loss": 1.9795, "step": 13273 }, { "epoch": 0.4282703952927037, "grad_norm": 0.349609375, "learning_rate": 1.92649579121143e-05, "loss": 1.9916, "step": 13274 }, { "epoch": 0.42830265914650006, "grad_norm": 0.34375, "learning_rate": 1.926344910847728e-05, "loss": 2.0048, "step": 13275 }, { "epoch": 0.4283349230002964, "grad_norm": 0.341796875, "learning_rate": 1.926194025791128e-05, "loss": 2.0047, "step": 13276 }, { "epoch": 0.42836718685409275, "grad_norm": 0.396484375, "learning_rate": 1.926043136043289e-05, "loss": 1.9528, "step": 13277 }, { "epoch": 0.4283994507078891, "grad_norm": 0.37890625, "learning_rate": 1.9258922416058734e-05, "loss": 2.0051, "step": 13278 }, { "epoch": 0.42843171456168544, "grad_norm": 0.37890625, "learning_rate": 1.9257413424805416e-05, "loss": 2.015, "step": 13279 }, { "epoch": 0.4284639784154818, "grad_norm": 0.3828125, "learning_rate": 1.925590438668954e-05, "loss": 1.9947, "step": 13280 }, { "epoch": 0.4284962422692782, "grad_norm": 0.361328125, "learning_rate": 1.9254395301727728e-05, "loss": 1.9579, "step": 13281 }, { "epoch": 0.4285285061230745, "grad_norm": 0.408203125, "learning_rate": 1.9252886169936576e-05, "loss": 1.957, "step": 13282 }, { "epoch": 0.42856076997687087, "grad_norm": 0.404296875, "learning_rate": 1.925137699133271e-05, "loss": 1.9975, "step": 13283 }, { "epoch": 0.4285930338306672, "grad_norm": 0.42578125, "learning_rate": 1.9249867765932737e-05, "loss": 1.9603, "step": 13284 }, { "epoch": 0.42862529768446356, "grad_norm": 0.39453125, "learning_rate": 1.924835849375326e-05, "loss": 1.9292, "step": 13285 }, { "epoch": 0.4286575615382599, "grad_norm": 0.423828125, "learning_rate": 1.9246849174810915e-05, "loss": 1.9536, "step": 13286 }, { "epoch": 0.42868982539205625, "grad_norm": 0.419921875, "learning_rate": 1.9245339809122294e-05, "loss": 1.9686, "step": 13287 }, { "epoch": 0.4287220892458526, "grad_norm": 0.3984375, "learning_rate": 1.9243830396704014e-05, "loss": 1.9617, "step": 13288 }, { "epoch": 0.42875435309964893, "grad_norm": 0.37890625, "learning_rate": 1.9242320937572697e-05, "loss": 1.9745, "step": 13289 }, { "epoch": 0.4287866169534453, "grad_norm": 0.443359375, "learning_rate": 1.9240811431744955e-05, "loss": 1.9905, "step": 13290 }, { "epoch": 0.4288188808072416, "grad_norm": 0.392578125, "learning_rate": 1.92393018792374e-05, "loss": 1.978, "step": 13291 }, { "epoch": 0.42885114466103796, "grad_norm": 0.41796875, "learning_rate": 1.9237792280066652e-05, "loss": 1.9654, "step": 13292 }, { "epoch": 0.4288834085148343, "grad_norm": 0.41015625, "learning_rate": 1.923628263424933e-05, "loss": 1.9574, "step": 13293 }, { "epoch": 0.42891567236863065, "grad_norm": 0.40234375, "learning_rate": 1.9234772941802047e-05, "loss": 2.0226, "step": 13294 }, { "epoch": 0.428947936222427, "grad_norm": 0.375, "learning_rate": 1.923326320274142e-05, "loss": 1.9636, "step": 13295 }, { "epoch": 0.42898020007622334, "grad_norm": 0.376953125, "learning_rate": 1.923175341708407e-05, "loss": 1.9804, "step": 13296 }, { "epoch": 0.4290124639300197, "grad_norm": 0.380859375, "learning_rate": 1.9230243584846613e-05, "loss": 1.9845, "step": 13297 }, { "epoch": 0.429044727783816, "grad_norm": 0.361328125, "learning_rate": 1.9228733706045673e-05, "loss": 1.9853, "step": 13298 }, { "epoch": 0.42907699163761237, "grad_norm": 0.369140625, "learning_rate": 1.9227223780697858e-05, "loss": 1.9737, "step": 13299 }, { "epoch": 0.4291092554914087, "grad_norm": 0.375, "learning_rate": 1.9225713808819805e-05, "loss": 1.997, "step": 13300 }, { "epoch": 0.4291415193452051, "grad_norm": 0.380859375, "learning_rate": 1.922420379042812e-05, "loss": 1.9758, "step": 13301 }, { "epoch": 0.42917378319900146, "grad_norm": 0.369140625, "learning_rate": 1.9222693725539433e-05, "loss": 1.9981, "step": 13302 }, { "epoch": 0.4292060470527978, "grad_norm": 0.369140625, "learning_rate": 1.922118361417036e-05, "loss": 1.9623, "step": 13303 }, { "epoch": 0.42923831090659414, "grad_norm": 0.357421875, "learning_rate": 1.921967345633753e-05, "loss": 1.9992, "step": 13304 }, { "epoch": 0.4292705747603905, "grad_norm": 0.359375, "learning_rate": 1.921816325205756e-05, "loss": 1.9846, "step": 13305 }, { "epoch": 0.42930283861418683, "grad_norm": 0.35546875, "learning_rate": 1.921665300134707e-05, "loss": 1.9836, "step": 13306 }, { "epoch": 0.4293351024679832, "grad_norm": 0.365234375, "learning_rate": 1.9215142704222698e-05, "loss": 1.9555, "step": 13307 }, { "epoch": 0.4293673663217795, "grad_norm": 0.392578125, "learning_rate": 1.9213632360701052e-05, "loss": 1.9631, "step": 13308 }, { "epoch": 0.42939963017557586, "grad_norm": 0.341796875, "learning_rate": 1.9212121970798768e-05, "loss": 1.9703, "step": 13309 }, { "epoch": 0.4294318940293722, "grad_norm": 0.376953125, "learning_rate": 1.9210611534532466e-05, "loss": 2.0097, "step": 13310 }, { "epoch": 0.42946415788316855, "grad_norm": 0.3515625, "learning_rate": 1.9209101051918774e-05, "loss": 1.9774, "step": 13311 }, { "epoch": 0.4294964217369649, "grad_norm": 0.361328125, "learning_rate": 1.9207590522974318e-05, "loss": 1.9844, "step": 13312 }, { "epoch": 0.42952868559076124, "grad_norm": 0.375, "learning_rate": 1.9206079947715717e-05, "loss": 1.986, "step": 13313 }, { "epoch": 0.4295609494445576, "grad_norm": 0.34375, "learning_rate": 1.920456932615961e-05, "loss": 1.9706, "step": 13314 }, { "epoch": 0.4295932132983539, "grad_norm": 0.375, "learning_rate": 1.9203058658322624e-05, "loss": 2.0167, "step": 13315 }, { "epoch": 0.42962547715215027, "grad_norm": 0.3671875, "learning_rate": 1.9201547944221373e-05, "loss": 2.0045, "step": 13316 }, { "epoch": 0.4296577410059466, "grad_norm": 0.380859375, "learning_rate": 1.9200037183872506e-05, "loss": 1.9882, "step": 13317 }, { "epoch": 0.42969000485974296, "grad_norm": 0.369140625, "learning_rate": 1.9198526377292643e-05, "loss": 1.9808, "step": 13318 }, { "epoch": 0.4297222687135393, "grad_norm": 0.365234375, "learning_rate": 1.9197015524498407e-05, "loss": 1.9831, "step": 13319 }, { "epoch": 0.42975453256733565, "grad_norm": 0.357421875, "learning_rate": 1.919550462550644e-05, "loss": 1.9533, "step": 13320 }, { "epoch": 0.42978679642113204, "grad_norm": 0.349609375, "learning_rate": 1.9193993680333367e-05, "loss": 1.975, "step": 13321 }, { "epoch": 0.4298190602749284, "grad_norm": 0.34765625, "learning_rate": 1.9192482688995817e-05, "loss": 1.9895, "step": 13322 }, { "epoch": 0.42985132412872473, "grad_norm": 0.353515625, "learning_rate": 1.919097165151043e-05, "loss": 2.0212, "step": 13323 }, { "epoch": 0.4298835879825211, "grad_norm": 0.349609375, "learning_rate": 1.9189460567893833e-05, "loss": 2.0001, "step": 13324 }, { "epoch": 0.4299158518363174, "grad_norm": 0.37890625, "learning_rate": 1.9187949438162656e-05, "loss": 1.9708, "step": 13325 }, { "epoch": 0.42994811569011376, "grad_norm": 0.359375, "learning_rate": 1.9186438262333538e-05, "loss": 1.9783, "step": 13326 }, { "epoch": 0.4299803795439101, "grad_norm": 0.359375, "learning_rate": 1.9184927040423113e-05, "loss": 1.9381, "step": 13327 }, { "epoch": 0.43001264339770645, "grad_norm": 0.357421875, "learning_rate": 1.918341577244801e-05, "loss": 1.9969, "step": 13328 }, { "epoch": 0.4300449072515028, "grad_norm": 0.34765625, "learning_rate": 1.918190445842487e-05, "loss": 2.0247, "step": 13329 }, { "epoch": 0.43007717110529914, "grad_norm": 0.341796875, "learning_rate": 1.9180393098370328e-05, "loss": 1.9839, "step": 13330 }, { "epoch": 0.4301094349590955, "grad_norm": 0.35546875, "learning_rate": 1.9178881692301012e-05, "loss": 1.9735, "step": 13331 }, { "epoch": 0.4301416988128918, "grad_norm": 0.349609375, "learning_rate": 1.917737024023357e-05, "loss": 1.9818, "step": 13332 }, { "epoch": 0.43017396266668817, "grad_norm": 0.369140625, "learning_rate": 1.917585874218463e-05, "loss": 1.9809, "step": 13333 }, { "epoch": 0.4302062265204845, "grad_norm": 0.357421875, "learning_rate": 1.9174347198170828e-05, "loss": 1.9999, "step": 13334 }, { "epoch": 0.43023849037428086, "grad_norm": 0.369140625, "learning_rate": 1.9172835608208816e-05, "loss": 1.9634, "step": 13335 }, { "epoch": 0.4302707542280772, "grad_norm": 0.357421875, "learning_rate": 1.9171323972315216e-05, "loss": 1.9942, "step": 13336 }, { "epoch": 0.43030301808187355, "grad_norm": 0.345703125, "learning_rate": 1.9169812290506677e-05, "loss": 1.9314, "step": 13337 }, { "epoch": 0.4303352819356699, "grad_norm": 0.345703125, "learning_rate": 1.9168300562799838e-05, "loss": 2.024, "step": 13338 }, { "epoch": 0.43036754578946623, "grad_norm": 0.3515625, "learning_rate": 1.9166788789211332e-05, "loss": 1.9716, "step": 13339 }, { "epoch": 0.4303998096432626, "grad_norm": 0.3671875, "learning_rate": 1.9165276969757805e-05, "loss": 1.9776, "step": 13340 }, { "epoch": 0.430432073497059, "grad_norm": 0.353515625, "learning_rate": 1.91637651044559e-05, "loss": 1.9767, "step": 13341 }, { "epoch": 0.4304643373508553, "grad_norm": 0.369140625, "learning_rate": 1.9162253193322256e-05, "loss": 1.9602, "step": 13342 }, { "epoch": 0.43049660120465166, "grad_norm": 0.353515625, "learning_rate": 1.9160741236373516e-05, "loss": 1.9992, "step": 13343 }, { "epoch": 0.430528865058448, "grad_norm": 0.361328125, "learning_rate": 1.9159229233626317e-05, "loss": 2.0045, "step": 13344 }, { "epoch": 0.43056112891224435, "grad_norm": 0.3671875, "learning_rate": 1.9157717185097307e-05, "loss": 1.9871, "step": 13345 }, { "epoch": 0.4305933927660407, "grad_norm": 0.357421875, "learning_rate": 1.9156205090803133e-05, "loss": 1.9777, "step": 13346 }, { "epoch": 0.43062565661983704, "grad_norm": 0.365234375, "learning_rate": 1.915469295076043e-05, "loss": 1.9451, "step": 13347 }, { "epoch": 0.4306579204736334, "grad_norm": 0.3671875, "learning_rate": 1.915318076498585e-05, "loss": 1.9901, "step": 13348 }, { "epoch": 0.4306901843274297, "grad_norm": 0.341796875, "learning_rate": 1.9151668533496035e-05, "loss": 2.0129, "step": 13349 }, { "epoch": 0.43072244818122607, "grad_norm": 0.34765625, "learning_rate": 1.915015625630763e-05, "loss": 1.9995, "step": 13350 }, { "epoch": 0.4307547120350224, "grad_norm": 0.357421875, "learning_rate": 1.9148643933437287e-05, "loss": 2.0196, "step": 13351 }, { "epoch": 0.43078697588881876, "grad_norm": 0.357421875, "learning_rate": 1.9147131564901644e-05, "loss": 2.0138, "step": 13352 }, { "epoch": 0.4308192397426151, "grad_norm": 0.365234375, "learning_rate": 1.9145619150717353e-05, "loss": 1.9883, "step": 13353 }, { "epoch": 0.43085150359641144, "grad_norm": 0.3671875, "learning_rate": 1.9144106690901056e-05, "loss": 2.0141, "step": 13354 }, { "epoch": 0.4308837674502078, "grad_norm": 0.341796875, "learning_rate": 1.914259418546941e-05, "loss": 1.9984, "step": 13355 }, { "epoch": 0.43091603130400413, "grad_norm": 0.359375, "learning_rate": 1.914108163443906e-05, "loss": 2.0047, "step": 13356 }, { "epoch": 0.4309482951578005, "grad_norm": 0.35546875, "learning_rate": 1.9139569037826656e-05, "loss": 1.9989, "step": 13357 }, { "epoch": 0.4309805590115968, "grad_norm": 0.345703125, "learning_rate": 1.913805639564884e-05, "loss": 2.0224, "step": 13358 }, { "epoch": 0.43101282286539316, "grad_norm": 0.3828125, "learning_rate": 1.913654370792227e-05, "loss": 1.9597, "step": 13359 }, { "epoch": 0.4310450867191895, "grad_norm": 0.34765625, "learning_rate": 1.9135030974663595e-05, "loss": 1.9968, "step": 13360 }, { "epoch": 0.43107735057298585, "grad_norm": 0.369140625, "learning_rate": 1.9133518195889466e-05, "loss": 1.9709, "step": 13361 }, { "epoch": 0.43110961442678225, "grad_norm": 0.34375, "learning_rate": 1.913200537161653e-05, "loss": 1.9488, "step": 13362 }, { "epoch": 0.4311418782805786, "grad_norm": 0.37890625, "learning_rate": 1.9130492501861452e-05, "loss": 2.0038, "step": 13363 }, { "epoch": 0.43117414213437494, "grad_norm": 0.373046875, "learning_rate": 1.912897958664087e-05, "loss": 2.0057, "step": 13364 }, { "epoch": 0.4312064059881713, "grad_norm": 0.37109375, "learning_rate": 1.912746662597144e-05, "loss": 2.0009, "step": 13365 }, { "epoch": 0.4312386698419676, "grad_norm": 0.35546875, "learning_rate": 1.912595361986982e-05, "loss": 1.993, "step": 13366 }, { "epoch": 0.43127093369576397, "grad_norm": 0.365234375, "learning_rate": 1.9124440568352666e-05, "loss": 2.0302, "step": 13367 }, { "epoch": 0.4313031975495603, "grad_norm": 0.373046875, "learning_rate": 1.9122927471436624e-05, "loss": 1.9629, "step": 13368 }, { "epoch": 0.43133546140335666, "grad_norm": 0.3515625, "learning_rate": 1.9121414329138363e-05, "loss": 1.9682, "step": 13369 }, { "epoch": 0.431367725257153, "grad_norm": 0.36328125, "learning_rate": 1.9119901141474522e-05, "loss": 1.9956, "step": 13370 }, { "epoch": 0.43139998911094934, "grad_norm": 0.373046875, "learning_rate": 1.9118387908461766e-05, "loss": 2.0091, "step": 13371 }, { "epoch": 0.4314322529647457, "grad_norm": 0.34765625, "learning_rate": 1.9116874630116753e-05, "loss": 1.9312, "step": 13372 }, { "epoch": 0.43146451681854203, "grad_norm": 0.3671875, "learning_rate": 1.9115361306456134e-05, "loss": 1.9914, "step": 13373 }, { "epoch": 0.4314967806723384, "grad_norm": 0.353515625, "learning_rate": 1.9113847937496573e-05, "loss": 1.9864, "step": 13374 }, { "epoch": 0.4315290445261347, "grad_norm": 0.35546875, "learning_rate": 1.9112334523254725e-05, "loss": 2.0224, "step": 13375 }, { "epoch": 0.43156130837993106, "grad_norm": 0.390625, "learning_rate": 1.9110821063747246e-05, "loss": 1.9957, "step": 13376 }, { "epoch": 0.4315935722337274, "grad_norm": 0.37109375, "learning_rate": 1.91093075589908e-05, "loss": 2.0137, "step": 13377 }, { "epoch": 0.43162583608752375, "grad_norm": 0.3828125, "learning_rate": 1.9107794009002043e-05, "loss": 1.9886, "step": 13378 }, { "epoch": 0.4316580999413201, "grad_norm": 0.38671875, "learning_rate": 1.9106280413797637e-05, "loss": 1.9772, "step": 13379 }, { "epoch": 0.43169036379511644, "grad_norm": 0.37109375, "learning_rate": 1.9104766773394245e-05, "loss": 1.9776, "step": 13380 }, { "epoch": 0.4317226276489128, "grad_norm": 0.369140625, "learning_rate": 1.910325308780852e-05, "loss": 2.0331, "step": 13381 }, { "epoch": 0.4317548915027092, "grad_norm": 0.376953125, "learning_rate": 1.910173935705713e-05, "loss": 1.9697, "step": 13382 }, { "epoch": 0.4317871553565055, "grad_norm": 0.345703125, "learning_rate": 1.910022558115674e-05, "loss": 1.9626, "step": 13383 }, { "epoch": 0.43181941921030187, "grad_norm": 0.369140625, "learning_rate": 1.9098711760124e-05, "loss": 1.9806, "step": 13384 }, { "epoch": 0.4318516830640982, "grad_norm": 0.3515625, "learning_rate": 1.909719789397559e-05, "loss": 1.9816, "step": 13385 }, { "epoch": 0.43188394691789456, "grad_norm": 0.349609375, "learning_rate": 1.9095683982728162e-05, "loss": 1.9609, "step": 13386 }, { "epoch": 0.4319162107716909, "grad_norm": 0.349609375, "learning_rate": 1.9094170026398384e-05, "loss": 1.9882, "step": 13387 }, { "epoch": 0.43194847462548724, "grad_norm": 0.357421875, "learning_rate": 1.9092656025002916e-05, "loss": 1.9868, "step": 13388 }, { "epoch": 0.4319807384792836, "grad_norm": 0.357421875, "learning_rate": 1.909114197855843e-05, "loss": 1.9891, "step": 13389 }, { "epoch": 0.43201300233307993, "grad_norm": 0.36328125, "learning_rate": 1.9089627887081585e-05, "loss": 2.0154, "step": 13390 }, { "epoch": 0.4320452661868763, "grad_norm": 0.345703125, "learning_rate": 1.908811375058905e-05, "loss": 1.9782, "step": 13391 }, { "epoch": 0.4320775300406726, "grad_norm": 0.34375, "learning_rate": 1.9086599569097495e-05, "loss": 1.9772, "step": 13392 }, { "epoch": 0.43210979389446896, "grad_norm": 0.380859375, "learning_rate": 1.908508534262358e-05, "loss": 1.9781, "step": 13393 }, { "epoch": 0.4321420577482653, "grad_norm": 0.359375, "learning_rate": 1.908357107118398e-05, "loss": 1.9767, "step": 13394 }, { "epoch": 0.43217432160206165, "grad_norm": 0.376953125, "learning_rate": 1.9082056754795357e-05, "loss": 1.9814, "step": 13395 }, { "epoch": 0.432206585455858, "grad_norm": 0.3671875, "learning_rate": 1.9080542393474383e-05, "loss": 1.9733, "step": 13396 }, { "epoch": 0.43223884930965434, "grad_norm": 0.357421875, "learning_rate": 1.9079027987237725e-05, "loss": 1.9783, "step": 13397 }, { "epoch": 0.4322711131634507, "grad_norm": 0.40625, "learning_rate": 1.9077513536102054e-05, "loss": 1.9694, "step": 13398 }, { "epoch": 0.432303377017247, "grad_norm": 0.390625, "learning_rate": 1.9075999040084034e-05, "loss": 2.0295, "step": 13399 }, { "epoch": 0.43233564087104337, "grad_norm": 0.357421875, "learning_rate": 1.9074484499200346e-05, "loss": 1.9552, "step": 13400 }, { "epoch": 0.4323679047248397, "grad_norm": 0.37109375, "learning_rate": 1.9072969913467656e-05, "loss": 1.996, "step": 13401 }, { "epoch": 0.4324001685786361, "grad_norm": 0.365234375, "learning_rate": 1.907145528290263e-05, "loss": 1.9915, "step": 13402 }, { "epoch": 0.43243243243243246, "grad_norm": 0.349609375, "learning_rate": 1.906994060752195e-05, "loss": 2.0018, "step": 13403 }, { "epoch": 0.4324646962862288, "grad_norm": 0.361328125, "learning_rate": 1.9068425887342283e-05, "loss": 1.9533, "step": 13404 }, { "epoch": 0.43249696014002514, "grad_norm": 0.369140625, "learning_rate": 1.9066911122380305e-05, "loss": 1.9632, "step": 13405 }, { "epoch": 0.4325292239938215, "grad_norm": 0.3515625, "learning_rate": 1.906539631265268e-05, "loss": 1.9571, "step": 13406 }, { "epoch": 0.43256148784761783, "grad_norm": 0.361328125, "learning_rate": 1.9063881458176093e-05, "loss": 1.9911, "step": 13407 }, { "epoch": 0.4325937517014142, "grad_norm": 0.361328125, "learning_rate": 1.9062366558967216e-05, "loss": 1.9847, "step": 13408 }, { "epoch": 0.4326260155552105, "grad_norm": 0.359375, "learning_rate": 1.9060851615042717e-05, "loss": 1.9903, "step": 13409 }, { "epoch": 0.43265827940900686, "grad_norm": 0.361328125, "learning_rate": 1.905933662641928e-05, "loss": 1.9835, "step": 13410 }, { "epoch": 0.4326905432628032, "grad_norm": 0.380859375, "learning_rate": 1.9057821593113578e-05, "loss": 2.0042, "step": 13411 }, { "epoch": 0.43272280711659955, "grad_norm": 0.36328125, "learning_rate": 1.9056306515142286e-05, "loss": 1.9982, "step": 13412 }, { "epoch": 0.4327550709703959, "grad_norm": 0.37890625, "learning_rate": 1.9054791392522083e-05, "loss": 1.9829, "step": 13413 }, { "epoch": 0.43278733482419224, "grad_norm": 0.375, "learning_rate": 1.9053276225269646e-05, "loss": 1.9563, "step": 13414 }, { "epoch": 0.4328195986779886, "grad_norm": 0.373046875, "learning_rate": 1.905176101340165e-05, "loss": 1.991, "step": 13415 }, { "epoch": 0.4328518625317849, "grad_norm": 0.349609375, "learning_rate": 1.9050245756934773e-05, "loss": 1.9721, "step": 13416 }, { "epoch": 0.43288412638558127, "grad_norm": 0.373046875, "learning_rate": 1.9048730455885704e-05, "loss": 1.9766, "step": 13417 }, { "epoch": 0.4329163902393776, "grad_norm": 0.3515625, "learning_rate": 1.9047215110271107e-05, "loss": 1.9936, "step": 13418 }, { "epoch": 0.43294865409317396, "grad_norm": 0.3828125, "learning_rate": 1.9045699720107676e-05, "loss": 1.9667, "step": 13419 }, { "epoch": 0.4329809179469703, "grad_norm": 0.34375, "learning_rate": 1.9044184285412077e-05, "loss": 2.0064, "step": 13420 }, { "epoch": 0.43301318180076664, "grad_norm": 0.404296875, "learning_rate": 1.9042668806201005e-05, "loss": 1.9765, "step": 13421 }, { "epoch": 0.43304544565456304, "grad_norm": 0.341796875, "learning_rate": 1.9041153282491134e-05, "loss": 2.008, "step": 13422 }, { "epoch": 0.4330777095083594, "grad_norm": 0.3984375, "learning_rate": 1.9039637714299145e-05, "loss": 1.963, "step": 13423 }, { "epoch": 0.43310997336215573, "grad_norm": 0.3671875, "learning_rate": 1.9038122101641724e-05, "loss": 2.0149, "step": 13424 }, { "epoch": 0.4331422372159521, "grad_norm": 0.373046875, "learning_rate": 1.9036606444535552e-05, "loss": 2.0038, "step": 13425 }, { "epoch": 0.4331745010697484, "grad_norm": 0.349609375, "learning_rate": 1.9035090742997307e-05, "loss": 1.9735, "step": 13426 }, { "epoch": 0.43320676492354476, "grad_norm": 0.37109375, "learning_rate": 1.903357499704368e-05, "loss": 1.9973, "step": 13427 }, { "epoch": 0.4332390287773411, "grad_norm": 0.34765625, "learning_rate": 1.903205920669136e-05, "loss": 1.9975, "step": 13428 }, { "epoch": 0.43327129263113745, "grad_norm": 0.373046875, "learning_rate": 1.9030543371957024e-05, "loss": 1.9678, "step": 13429 }, { "epoch": 0.4333035564849338, "grad_norm": 0.35546875, "learning_rate": 1.9029027492857346e-05, "loss": 1.9545, "step": 13430 }, { "epoch": 0.43333582033873014, "grad_norm": 0.388671875, "learning_rate": 1.9027511569409033e-05, "loss": 1.9638, "step": 13431 }, { "epoch": 0.4333680841925265, "grad_norm": 0.37890625, "learning_rate": 1.9025995601628763e-05, "loss": 1.9931, "step": 13432 }, { "epoch": 0.4334003480463228, "grad_norm": 0.375, "learning_rate": 1.902447958953322e-05, "loss": 1.9648, "step": 13433 }, { "epoch": 0.43343261190011917, "grad_norm": 0.38671875, "learning_rate": 1.902296353313909e-05, "loss": 2.0241, "step": 13434 }, { "epoch": 0.4334648757539155, "grad_norm": 0.359375, "learning_rate": 1.9021447432463065e-05, "loss": 1.984, "step": 13435 }, { "epoch": 0.43349713960771186, "grad_norm": 0.37890625, "learning_rate": 1.9019931287521833e-05, "loss": 1.9607, "step": 13436 }, { "epoch": 0.4335294034615082, "grad_norm": 0.35546875, "learning_rate": 1.9018415098332077e-05, "loss": 2.0039, "step": 13437 }, { "epoch": 0.43356166731530454, "grad_norm": 0.357421875, "learning_rate": 1.9016898864910494e-05, "loss": 1.9712, "step": 13438 }, { "epoch": 0.4335939311691009, "grad_norm": 0.349609375, "learning_rate": 1.9015382587273772e-05, "loss": 1.9618, "step": 13439 }, { "epoch": 0.43362619502289723, "grad_norm": 0.361328125, "learning_rate": 1.9013866265438595e-05, "loss": 1.9805, "step": 13440 }, { "epoch": 0.4336584588766936, "grad_norm": 0.359375, "learning_rate": 1.9012349899421662e-05, "loss": 1.9743, "step": 13441 }, { "epoch": 0.4336907227304899, "grad_norm": 0.359375, "learning_rate": 1.901083348923966e-05, "loss": 1.9334, "step": 13442 }, { "epoch": 0.4337229865842863, "grad_norm": 0.375, "learning_rate": 1.9009317034909275e-05, "loss": 2.0026, "step": 13443 }, { "epoch": 0.43375525043808266, "grad_norm": 0.3671875, "learning_rate": 1.9007800536447204e-05, "loss": 1.9822, "step": 13444 }, { "epoch": 0.433787514291879, "grad_norm": 0.427734375, "learning_rate": 1.9006283993870145e-05, "loss": 1.9836, "step": 13445 }, { "epoch": 0.43381977814567535, "grad_norm": 0.388671875, "learning_rate": 1.9004767407194787e-05, "loss": 1.9606, "step": 13446 }, { "epoch": 0.4338520419994717, "grad_norm": 0.380859375, "learning_rate": 1.9003250776437814e-05, "loss": 1.9708, "step": 13447 }, { "epoch": 0.43388430585326804, "grad_norm": 0.390625, "learning_rate": 1.9001734101615935e-05, "loss": 1.9763, "step": 13448 }, { "epoch": 0.4339165697070644, "grad_norm": 0.37890625, "learning_rate": 1.9000217382745838e-05, "loss": 1.9831, "step": 13449 }, { "epoch": 0.4339488335608607, "grad_norm": 0.451171875, "learning_rate": 1.8998700619844218e-05, "loss": 2.011, "step": 13450 }, { "epoch": 0.43398109741465707, "grad_norm": 0.375, "learning_rate": 1.8997183812927764e-05, "loss": 2.002, "step": 13451 }, { "epoch": 0.4340133612684534, "grad_norm": 0.3984375, "learning_rate": 1.8995666962013186e-05, "loss": 1.9658, "step": 13452 }, { "epoch": 0.43404562512224976, "grad_norm": 0.34375, "learning_rate": 1.899415006711717e-05, "loss": 1.9681, "step": 13453 }, { "epoch": 0.4340778889760461, "grad_norm": 0.412109375, "learning_rate": 1.8992633128256413e-05, "loss": 1.9874, "step": 13454 }, { "epoch": 0.43411015282984244, "grad_norm": 0.3671875, "learning_rate": 1.8991116145447614e-05, "loss": 1.9899, "step": 13455 }, { "epoch": 0.4341424166836388, "grad_norm": 0.3984375, "learning_rate": 1.8989599118707476e-05, "loss": 1.9874, "step": 13456 }, { "epoch": 0.43417468053743513, "grad_norm": 0.359375, "learning_rate": 1.8988082048052694e-05, "loss": 1.9825, "step": 13457 }, { "epoch": 0.4342069443912315, "grad_norm": 0.384765625, "learning_rate": 1.8986564933499958e-05, "loss": 1.9705, "step": 13458 }, { "epoch": 0.4342392082450278, "grad_norm": 0.353515625, "learning_rate": 1.8985047775065986e-05, "loss": 1.978, "step": 13459 }, { "epoch": 0.43427147209882416, "grad_norm": 0.365234375, "learning_rate": 1.8983530572767458e-05, "loss": 1.9865, "step": 13460 }, { "epoch": 0.4343037359526205, "grad_norm": 0.359375, "learning_rate": 1.8982013326621086e-05, "loss": 1.9675, "step": 13461 }, { "epoch": 0.43433599980641685, "grad_norm": 0.353515625, "learning_rate": 1.8980496036643568e-05, "loss": 1.9465, "step": 13462 }, { "epoch": 0.43436826366021325, "grad_norm": 0.353515625, "learning_rate": 1.8978978702851608e-05, "loss": 1.9659, "step": 13463 }, { "epoch": 0.4344005275140096, "grad_norm": 0.3671875, "learning_rate": 1.89774613252619e-05, "loss": 1.9731, "step": 13464 }, { "epoch": 0.43443279136780594, "grad_norm": 0.341796875, "learning_rate": 1.8975943903891155e-05, "loss": 2.0031, "step": 13465 }, { "epoch": 0.4344650552216023, "grad_norm": 0.369140625, "learning_rate": 1.897442643875607e-05, "loss": 2.0088, "step": 13466 }, { "epoch": 0.4344973190753986, "grad_norm": 0.37109375, "learning_rate": 1.897290892987335e-05, "loss": 2.0051, "step": 13467 }, { "epoch": 0.43452958292919497, "grad_norm": 0.33203125, "learning_rate": 1.8971391377259697e-05, "loss": 1.9986, "step": 13468 }, { "epoch": 0.4345618467829913, "grad_norm": 0.3828125, "learning_rate": 1.896987378093182e-05, "loss": 1.9724, "step": 13469 }, { "epoch": 0.43459411063678766, "grad_norm": 0.37109375, "learning_rate": 1.8968356140906424e-05, "loss": 1.991, "step": 13470 }, { "epoch": 0.434626374490584, "grad_norm": 0.361328125, "learning_rate": 1.89668384572002e-05, "loss": 1.9729, "step": 13471 }, { "epoch": 0.43465863834438034, "grad_norm": 0.375, "learning_rate": 1.896532072982987e-05, "loss": 1.9811, "step": 13472 }, { "epoch": 0.4346909021981767, "grad_norm": 0.353515625, "learning_rate": 1.8963802958812136e-05, "loss": 1.996, "step": 13473 }, { "epoch": 0.43472316605197303, "grad_norm": 0.43359375, "learning_rate": 1.8962285144163705e-05, "loss": 1.9767, "step": 13474 }, { "epoch": 0.4347554299057694, "grad_norm": 0.353515625, "learning_rate": 1.8960767285901273e-05, "loss": 1.9758, "step": 13475 }, { "epoch": 0.4347876937595657, "grad_norm": 0.39453125, "learning_rate": 1.895924938404156e-05, "loss": 1.9361, "step": 13476 }, { "epoch": 0.43481995761336206, "grad_norm": 0.357421875, "learning_rate": 1.895773143860127e-05, "loss": 2.0004, "step": 13477 }, { "epoch": 0.4348522214671584, "grad_norm": 0.41015625, "learning_rate": 1.895621344959711e-05, "loss": 2.009, "step": 13478 }, { "epoch": 0.43488448532095475, "grad_norm": 0.35546875, "learning_rate": 1.8954695417045795e-05, "loss": 1.9903, "step": 13479 }, { "epoch": 0.4349167491747511, "grad_norm": 0.3984375, "learning_rate": 1.8953177340964026e-05, "loss": 1.9442, "step": 13480 }, { "epoch": 0.43494901302854744, "grad_norm": 0.34375, "learning_rate": 1.895165922136852e-05, "loss": 1.9511, "step": 13481 }, { "epoch": 0.4349812768823438, "grad_norm": 0.4140625, "learning_rate": 1.8950141058275984e-05, "loss": 1.9472, "step": 13482 }, { "epoch": 0.4350135407361402, "grad_norm": 0.341796875, "learning_rate": 1.8948622851703125e-05, "loss": 1.9941, "step": 13483 }, { "epoch": 0.4350458045899365, "grad_norm": 0.40625, "learning_rate": 1.8947104601666664e-05, "loss": 1.9959, "step": 13484 }, { "epoch": 0.43507806844373287, "grad_norm": 0.34375, "learning_rate": 1.8945586308183304e-05, "loss": 1.9645, "step": 13485 }, { "epoch": 0.4351103322975292, "grad_norm": 0.3828125, "learning_rate": 1.894406797126976e-05, "loss": 2.0066, "step": 13486 }, { "epoch": 0.43514259615132556, "grad_norm": 0.36328125, "learning_rate": 1.894254959094275e-05, "loss": 1.9912, "step": 13487 }, { "epoch": 0.4351748600051219, "grad_norm": 0.359375, "learning_rate": 1.8941031167218973e-05, "loss": 1.9894, "step": 13488 }, { "epoch": 0.43520712385891824, "grad_norm": 0.35546875, "learning_rate": 1.8939512700115157e-05, "loss": 2.0056, "step": 13489 }, { "epoch": 0.4352393877127146, "grad_norm": 0.35546875, "learning_rate": 1.8937994189648015e-05, "loss": 1.983, "step": 13490 }, { "epoch": 0.43527165156651093, "grad_norm": 0.328125, "learning_rate": 1.893647563583426e-05, "loss": 1.9941, "step": 13491 }, { "epoch": 0.4353039154203073, "grad_norm": 0.365234375, "learning_rate": 1.8934957038690595e-05, "loss": 1.94, "step": 13492 }, { "epoch": 0.4353361792741036, "grad_norm": 0.35546875, "learning_rate": 1.8933438398233748e-05, "loss": 1.9916, "step": 13493 }, { "epoch": 0.43536844312789996, "grad_norm": 0.36328125, "learning_rate": 1.8931919714480442e-05, "loss": 1.9453, "step": 13494 }, { "epoch": 0.4354007069816963, "grad_norm": 0.359375, "learning_rate": 1.893040098744738e-05, "loss": 1.9805, "step": 13495 }, { "epoch": 0.43543297083549265, "grad_norm": 0.427734375, "learning_rate": 1.8928882217151277e-05, "loss": 1.9644, "step": 13496 }, { "epoch": 0.435465234689289, "grad_norm": 0.353515625, "learning_rate": 1.8927363403608858e-05, "loss": 1.9198, "step": 13497 }, { "epoch": 0.43549749854308534, "grad_norm": 0.345703125, "learning_rate": 1.892584454683685e-05, "loss": 1.9824, "step": 13498 }, { "epoch": 0.4355297623968817, "grad_norm": 0.34765625, "learning_rate": 1.8924325646851956e-05, "loss": 1.9753, "step": 13499 }, { "epoch": 0.435562026250678, "grad_norm": 0.35546875, "learning_rate": 1.8922806703670897e-05, "loss": 1.9716, "step": 13500 }, { "epoch": 0.43559429010447437, "grad_norm": 0.337890625, "learning_rate": 1.8921287717310406e-05, "loss": 1.9643, "step": 13501 }, { "epoch": 0.4356265539582707, "grad_norm": 0.34765625, "learning_rate": 1.8919768687787186e-05, "loss": 1.9702, "step": 13502 }, { "epoch": 0.4356588178120671, "grad_norm": 0.369140625, "learning_rate": 1.891824961511796e-05, "loss": 1.9932, "step": 13503 }, { "epoch": 0.43569108166586346, "grad_norm": 0.365234375, "learning_rate": 1.891673049931946e-05, "loss": 2.0257, "step": 13504 }, { "epoch": 0.4357233455196598, "grad_norm": 0.373046875, "learning_rate": 1.89152113404084e-05, "loss": 1.9348, "step": 13505 }, { "epoch": 0.43575560937345614, "grad_norm": 0.345703125, "learning_rate": 1.8913692138401497e-05, "loss": 1.9922, "step": 13506 }, { "epoch": 0.4357878732272525, "grad_norm": 0.3515625, "learning_rate": 1.8912172893315487e-05, "loss": 1.9766, "step": 13507 }, { "epoch": 0.43582013708104883, "grad_norm": 0.337890625, "learning_rate": 1.891065360516708e-05, "loss": 1.9707, "step": 13508 }, { "epoch": 0.4358524009348452, "grad_norm": 0.34765625, "learning_rate": 1.8909134273973005e-05, "loss": 1.9907, "step": 13509 }, { "epoch": 0.4358846647886415, "grad_norm": 0.349609375, "learning_rate": 1.890761489974998e-05, "loss": 1.9821, "step": 13510 }, { "epoch": 0.43591692864243786, "grad_norm": 0.341796875, "learning_rate": 1.890609548251474e-05, "loss": 1.9831, "step": 13511 }, { "epoch": 0.4359491924962342, "grad_norm": 0.359375, "learning_rate": 1.8904576022283993e-05, "loss": 1.9775, "step": 13512 }, { "epoch": 0.43598145635003055, "grad_norm": 0.365234375, "learning_rate": 1.8903056519074484e-05, "loss": 1.9938, "step": 13513 }, { "epoch": 0.4360137202038269, "grad_norm": 0.349609375, "learning_rate": 1.8901536972902924e-05, "loss": 1.9902, "step": 13514 }, { "epoch": 0.43604598405762324, "grad_norm": 0.39453125, "learning_rate": 1.890001738378605e-05, "loss": 1.9718, "step": 13515 }, { "epoch": 0.4360782479114196, "grad_norm": 0.365234375, "learning_rate": 1.8898497751740576e-05, "loss": 1.9865, "step": 13516 }, { "epoch": 0.4361105117652159, "grad_norm": 0.37109375, "learning_rate": 1.8896978076783237e-05, "loss": 1.9879, "step": 13517 }, { "epoch": 0.43614277561901227, "grad_norm": 0.359375, "learning_rate": 1.8895458358930758e-05, "loss": 1.9831, "step": 13518 }, { "epoch": 0.4361750394728086, "grad_norm": 0.361328125, "learning_rate": 1.889393859819987e-05, "loss": 1.9625, "step": 13519 }, { "epoch": 0.43620730332660496, "grad_norm": 0.357421875, "learning_rate": 1.8892418794607298e-05, "loss": 1.9532, "step": 13520 }, { "epoch": 0.4362395671804013, "grad_norm": 0.357421875, "learning_rate": 1.8890898948169776e-05, "loss": 1.9935, "step": 13521 }, { "epoch": 0.43627183103419764, "grad_norm": 0.353515625, "learning_rate": 1.8889379058904028e-05, "loss": 1.9872, "step": 13522 }, { "epoch": 0.436304094887994, "grad_norm": 0.36328125, "learning_rate": 1.888785912682678e-05, "loss": 2.0064, "step": 13523 }, { "epoch": 0.4363363587417904, "grad_norm": 0.353515625, "learning_rate": 1.8886339151954776e-05, "loss": 1.9723, "step": 13524 }, { "epoch": 0.43636862259558673, "grad_norm": 0.3515625, "learning_rate": 1.8884819134304736e-05, "loss": 1.9778, "step": 13525 }, { "epoch": 0.4364008864493831, "grad_norm": 0.359375, "learning_rate": 1.888329907389339e-05, "loss": 1.9819, "step": 13526 }, { "epoch": 0.4364331503031794, "grad_norm": 0.353515625, "learning_rate": 1.8881778970737477e-05, "loss": 1.978, "step": 13527 }, { "epoch": 0.43646541415697576, "grad_norm": 0.34765625, "learning_rate": 1.888025882485373e-05, "loss": 1.9863, "step": 13528 }, { "epoch": 0.4364976780107721, "grad_norm": 0.357421875, "learning_rate": 1.887873863625887e-05, "loss": 2.0181, "step": 13529 }, { "epoch": 0.43652994186456845, "grad_norm": 0.35546875, "learning_rate": 1.887721840496964e-05, "loss": 2.0106, "step": 13530 }, { "epoch": 0.4365622057183648, "grad_norm": 0.357421875, "learning_rate": 1.887569813100277e-05, "loss": 2.0067, "step": 13531 }, { "epoch": 0.43659446957216114, "grad_norm": 0.373046875, "learning_rate": 1.8874177814375003e-05, "loss": 2.0066, "step": 13532 }, { "epoch": 0.4366267334259575, "grad_norm": 0.353515625, "learning_rate": 1.8872657455103056e-05, "loss": 1.9567, "step": 13533 }, { "epoch": 0.4366589972797538, "grad_norm": 0.35546875, "learning_rate": 1.8871137053203675e-05, "loss": 1.9654, "step": 13534 }, { "epoch": 0.43669126113355017, "grad_norm": 0.365234375, "learning_rate": 1.88696166086936e-05, "loss": 1.9956, "step": 13535 }, { "epoch": 0.4367235249873465, "grad_norm": 0.35546875, "learning_rate": 1.8868096121589563e-05, "loss": 2.0419, "step": 13536 }, { "epoch": 0.43675578884114286, "grad_norm": 0.35546875, "learning_rate": 1.8866575591908292e-05, "loss": 1.9747, "step": 13537 }, { "epoch": 0.4367880526949392, "grad_norm": 0.357421875, "learning_rate": 1.8865055019666537e-05, "loss": 1.9426, "step": 13538 }, { "epoch": 0.43682031654873554, "grad_norm": 0.345703125, "learning_rate": 1.8863534404881027e-05, "loss": 1.999, "step": 13539 }, { "epoch": 0.4368525804025319, "grad_norm": 0.35546875, "learning_rate": 1.88620137475685e-05, "loss": 1.9975, "step": 13540 }, { "epoch": 0.43688484425632823, "grad_norm": 0.33984375, "learning_rate": 1.8860493047745696e-05, "loss": 1.9766, "step": 13541 }, { "epoch": 0.4369171081101246, "grad_norm": 0.365234375, "learning_rate": 1.8858972305429358e-05, "loss": 2.0026, "step": 13542 }, { "epoch": 0.4369493719639209, "grad_norm": 0.341796875, "learning_rate": 1.8857451520636214e-05, "loss": 1.9734, "step": 13543 }, { "epoch": 0.4369816358177173, "grad_norm": 0.365234375, "learning_rate": 1.8855930693383016e-05, "loss": 1.9611, "step": 13544 }, { "epoch": 0.43701389967151366, "grad_norm": 0.357421875, "learning_rate": 1.88544098236865e-05, "loss": 1.9382, "step": 13545 }, { "epoch": 0.43704616352531, "grad_norm": 0.357421875, "learning_rate": 1.8852888911563408e-05, "loss": 2.0067, "step": 13546 }, { "epoch": 0.43707842737910635, "grad_norm": 0.357421875, "learning_rate": 1.8851367957030472e-05, "loss": 1.9695, "step": 13547 }, { "epoch": 0.4371106912329027, "grad_norm": 0.357421875, "learning_rate": 1.884984696010444e-05, "loss": 1.9535, "step": 13548 }, { "epoch": 0.43714295508669904, "grad_norm": 0.353515625, "learning_rate": 1.8848325920802062e-05, "loss": 1.9897, "step": 13549 }, { "epoch": 0.4371752189404954, "grad_norm": 0.365234375, "learning_rate": 1.8846804839140072e-05, "loss": 1.9904, "step": 13550 }, { "epoch": 0.4372074827942917, "grad_norm": 0.34375, "learning_rate": 1.884528371513521e-05, "loss": 1.9791, "step": 13551 }, { "epoch": 0.43723974664808807, "grad_norm": 0.373046875, "learning_rate": 1.8843762548804224e-05, "loss": 1.9807, "step": 13552 }, { "epoch": 0.4372720105018844, "grad_norm": 0.35546875, "learning_rate": 1.884224134016386e-05, "loss": 1.9719, "step": 13553 }, { "epoch": 0.43730427435568076, "grad_norm": 0.373046875, "learning_rate": 1.8840720089230854e-05, "loss": 1.9881, "step": 13554 }, { "epoch": 0.4373365382094771, "grad_norm": 0.36328125, "learning_rate": 1.8839198796021962e-05, "loss": 1.9627, "step": 13555 }, { "epoch": 0.43736880206327344, "grad_norm": 0.361328125, "learning_rate": 1.8837677460553926e-05, "loss": 1.9732, "step": 13556 }, { "epoch": 0.4374010659170698, "grad_norm": 0.3828125, "learning_rate": 1.8836156082843485e-05, "loss": 1.974, "step": 13557 }, { "epoch": 0.43743332977086613, "grad_norm": 0.373046875, "learning_rate": 1.883463466290739e-05, "loss": 2.0059, "step": 13558 }, { "epoch": 0.4374655936246625, "grad_norm": 0.34765625, "learning_rate": 1.8833113200762387e-05, "loss": 1.9729, "step": 13559 }, { "epoch": 0.4374978574784588, "grad_norm": 0.357421875, "learning_rate": 1.8831591696425227e-05, "loss": 1.9418, "step": 13560 }, { "epoch": 0.43753012133225516, "grad_norm": 0.373046875, "learning_rate": 1.883007014991265e-05, "loss": 1.9883, "step": 13561 }, { "epoch": 0.4375623851860515, "grad_norm": 0.365234375, "learning_rate": 1.8828548561241418e-05, "loss": 1.9932, "step": 13562 }, { "epoch": 0.43759464903984785, "grad_norm": 0.38671875, "learning_rate": 1.882702693042826e-05, "loss": 1.9757, "step": 13563 }, { "epoch": 0.43762691289364425, "grad_norm": 0.369140625, "learning_rate": 1.8825505257489937e-05, "loss": 1.9945, "step": 13564 }, { "epoch": 0.4376591767474406, "grad_norm": 0.37890625, "learning_rate": 1.8823983542443194e-05, "loss": 2.0136, "step": 13565 }, { "epoch": 0.43769144060123694, "grad_norm": 0.369140625, "learning_rate": 1.8822461785304792e-05, "loss": 2.0233, "step": 13566 }, { "epoch": 0.4377237044550333, "grad_norm": 0.392578125, "learning_rate": 1.882093998609147e-05, "loss": 1.9767, "step": 13567 }, { "epoch": 0.4377559683088296, "grad_norm": 0.35546875, "learning_rate": 1.8819418144819975e-05, "loss": 1.9731, "step": 13568 }, { "epoch": 0.43778823216262597, "grad_norm": 0.392578125, "learning_rate": 1.881789626150707e-05, "loss": 1.9901, "step": 13569 }, { "epoch": 0.4378204960164223, "grad_norm": 0.349609375, "learning_rate": 1.8816374336169508e-05, "loss": 1.9749, "step": 13570 }, { "epoch": 0.43785275987021866, "grad_norm": 0.376953125, "learning_rate": 1.8814852368824028e-05, "loss": 1.9963, "step": 13571 }, { "epoch": 0.437885023724015, "grad_norm": 0.337890625, "learning_rate": 1.8813330359487387e-05, "loss": 1.9541, "step": 13572 }, { "epoch": 0.43791728757781134, "grad_norm": 0.361328125, "learning_rate": 1.881180830817635e-05, "loss": 1.9622, "step": 13573 }, { "epoch": 0.4379495514316077, "grad_norm": 0.349609375, "learning_rate": 1.8810286214907655e-05, "loss": 1.9851, "step": 13574 }, { "epoch": 0.43798181528540403, "grad_norm": 0.37109375, "learning_rate": 1.8808764079698062e-05, "loss": 2.0074, "step": 13575 }, { "epoch": 0.4380140791392004, "grad_norm": 0.349609375, "learning_rate": 1.880724190256433e-05, "loss": 1.9594, "step": 13576 }, { "epoch": 0.4380463429929967, "grad_norm": 0.349609375, "learning_rate": 1.8805719683523212e-05, "loss": 2.0119, "step": 13577 }, { "epoch": 0.43807860684679306, "grad_norm": 0.33984375, "learning_rate": 1.8804197422591456e-05, "loss": 1.9414, "step": 13578 }, { "epoch": 0.4381108707005894, "grad_norm": 0.349609375, "learning_rate": 1.880267511978583e-05, "loss": 1.9949, "step": 13579 }, { "epoch": 0.43814313455438575, "grad_norm": 0.341796875, "learning_rate": 1.880115277512308e-05, "loss": 1.9566, "step": 13580 }, { "epoch": 0.4381753984081821, "grad_norm": 0.376953125, "learning_rate": 1.8799630388619974e-05, "loss": 1.9852, "step": 13581 }, { "epoch": 0.43820766226197844, "grad_norm": 0.35546875, "learning_rate": 1.879810796029325e-05, "loss": 1.9414, "step": 13582 }, { "epoch": 0.4382399261157748, "grad_norm": 0.34375, "learning_rate": 1.8796585490159685e-05, "loss": 2.014, "step": 13583 }, { "epoch": 0.4382721899695712, "grad_norm": 0.349609375, "learning_rate": 1.8795062978236035e-05, "loss": 1.9906, "step": 13584 }, { "epoch": 0.4383044538233675, "grad_norm": 0.34375, "learning_rate": 1.8793540424539047e-05, "loss": 1.9734, "step": 13585 }, { "epoch": 0.43833671767716387, "grad_norm": 0.35546875, "learning_rate": 1.8792017829085494e-05, "loss": 1.9612, "step": 13586 }, { "epoch": 0.4383689815309602, "grad_norm": 0.35546875, "learning_rate": 1.8790495191892128e-05, "loss": 2.0031, "step": 13587 }, { "epoch": 0.43840124538475655, "grad_norm": 0.3515625, "learning_rate": 1.8788972512975705e-05, "loss": 1.9894, "step": 13588 }, { "epoch": 0.4384335092385529, "grad_norm": 0.361328125, "learning_rate": 1.8787449792352993e-05, "loss": 1.9897, "step": 13589 }, { "epoch": 0.43846577309234924, "grad_norm": 0.365234375, "learning_rate": 1.8785927030040755e-05, "loss": 1.9613, "step": 13590 }, { "epoch": 0.4384980369461456, "grad_norm": 0.345703125, "learning_rate": 1.8784404226055743e-05, "loss": 1.9989, "step": 13591 }, { "epoch": 0.43853030079994193, "grad_norm": 0.36328125, "learning_rate": 1.878288138041473e-05, "loss": 1.9935, "step": 13592 }, { "epoch": 0.4385625646537383, "grad_norm": 0.341796875, "learning_rate": 1.878135849313447e-05, "loss": 2.0083, "step": 13593 }, { "epoch": 0.4385948285075346, "grad_norm": 0.35546875, "learning_rate": 1.8779835564231725e-05, "loss": 1.9575, "step": 13594 }, { "epoch": 0.43862709236133096, "grad_norm": 0.365234375, "learning_rate": 1.8778312593723264e-05, "loss": 2.0026, "step": 13595 }, { "epoch": 0.4386593562151273, "grad_norm": 0.328125, "learning_rate": 1.8776789581625845e-05, "loss": 1.9559, "step": 13596 }, { "epoch": 0.43869162006892365, "grad_norm": 0.359375, "learning_rate": 1.8775266527956242e-05, "loss": 1.9771, "step": 13597 }, { "epoch": 0.43872388392272, "grad_norm": 0.36328125, "learning_rate": 1.8773743432731213e-05, "loss": 1.9778, "step": 13598 }, { "epoch": 0.43875614777651634, "grad_norm": 0.341796875, "learning_rate": 1.877222029596752e-05, "loss": 1.9663, "step": 13599 }, { "epoch": 0.4387884116303127, "grad_norm": 0.349609375, "learning_rate": 1.877069711768193e-05, "loss": 1.9707, "step": 13600 }, { "epoch": 0.438820675484109, "grad_norm": 0.353515625, "learning_rate": 1.8769173897891217e-05, "loss": 2.0141, "step": 13601 }, { "epoch": 0.43885293933790537, "grad_norm": 0.3515625, "learning_rate": 1.876765063661214e-05, "loss": 1.9888, "step": 13602 }, { "epoch": 0.4388852031917017, "grad_norm": 0.349609375, "learning_rate": 1.8766127333861465e-05, "loss": 1.98, "step": 13603 }, { "epoch": 0.4389174670454981, "grad_norm": 0.3515625, "learning_rate": 1.876460398965597e-05, "loss": 1.9812, "step": 13604 }, { "epoch": 0.43894973089929445, "grad_norm": 0.384765625, "learning_rate": 1.8763080604012407e-05, "loss": 2.0193, "step": 13605 }, { "epoch": 0.4389819947530908, "grad_norm": 0.384765625, "learning_rate": 1.8761557176947553e-05, "loss": 1.9681, "step": 13606 }, { "epoch": 0.43901425860688714, "grad_norm": 0.37109375, "learning_rate": 1.876003370847818e-05, "loss": 1.9955, "step": 13607 }, { "epoch": 0.4390465224606835, "grad_norm": 0.341796875, "learning_rate": 1.875851019862105e-05, "loss": 1.9616, "step": 13608 }, { "epoch": 0.43907878631447983, "grad_norm": 0.3515625, "learning_rate": 1.8756986647392935e-05, "loss": 1.9876, "step": 13609 }, { "epoch": 0.4391110501682762, "grad_norm": 0.34765625, "learning_rate": 1.8755463054810613e-05, "loss": 2.0124, "step": 13610 }, { "epoch": 0.4391433140220725, "grad_norm": 0.345703125, "learning_rate": 1.875393942089084e-05, "loss": 1.9951, "step": 13611 }, { "epoch": 0.43917557787586886, "grad_norm": 0.365234375, "learning_rate": 1.8752415745650403e-05, "loss": 1.993, "step": 13612 }, { "epoch": 0.4392078417296652, "grad_norm": 0.353515625, "learning_rate": 1.8750892029106056e-05, "loss": 1.973, "step": 13613 }, { "epoch": 0.43924010558346155, "grad_norm": 0.353515625, "learning_rate": 1.8749368271274586e-05, "loss": 1.9916, "step": 13614 }, { "epoch": 0.4392723694372579, "grad_norm": 0.341796875, "learning_rate": 1.874784447217276e-05, "loss": 1.9658, "step": 13615 }, { "epoch": 0.43930463329105424, "grad_norm": 0.353515625, "learning_rate": 1.874632063181735e-05, "loss": 2.0018, "step": 13616 }, { "epoch": 0.4393368971448506, "grad_norm": 0.35546875, "learning_rate": 1.874479675022513e-05, "loss": 1.9857, "step": 13617 }, { "epoch": 0.4393691609986469, "grad_norm": 0.353515625, "learning_rate": 1.8743272827412876e-05, "loss": 1.9836, "step": 13618 }, { "epoch": 0.43940142485244327, "grad_norm": 0.357421875, "learning_rate": 1.8741748863397355e-05, "loss": 2.0002, "step": 13619 }, { "epoch": 0.4394336887062396, "grad_norm": 0.361328125, "learning_rate": 1.8740224858195347e-05, "loss": 2.0082, "step": 13620 }, { "epoch": 0.43946595256003596, "grad_norm": 0.345703125, "learning_rate": 1.8738700811823635e-05, "loss": 1.9999, "step": 13621 }, { "epoch": 0.4394982164138323, "grad_norm": 0.349609375, "learning_rate": 1.873717672429898e-05, "loss": 1.9783, "step": 13622 }, { "epoch": 0.43953048026762864, "grad_norm": 0.33984375, "learning_rate": 1.8735652595638166e-05, "loss": 1.9565, "step": 13623 }, { "epoch": 0.439562744121425, "grad_norm": 0.349609375, "learning_rate": 1.8734128425857968e-05, "loss": 1.9784, "step": 13624 }, { "epoch": 0.4395950079752214, "grad_norm": 0.3515625, "learning_rate": 1.873260421497516e-05, "loss": 2.0129, "step": 13625 }, { "epoch": 0.43962727182901773, "grad_norm": 0.349609375, "learning_rate": 1.873107996300653e-05, "loss": 2.0014, "step": 13626 }, { "epoch": 0.4396595356828141, "grad_norm": 0.3828125, "learning_rate": 1.8729555669968842e-05, "loss": 2.0053, "step": 13627 }, { "epoch": 0.4396917995366104, "grad_norm": 0.337890625, "learning_rate": 1.872803133587888e-05, "loss": 1.9768, "step": 13628 }, { "epoch": 0.43972406339040676, "grad_norm": 0.353515625, "learning_rate": 1.872650696075343e-05, "loss": 1.9826, "step": 13629 }, { "epoch": 0.4397563272442031, "grad_norm": 0.34765625, "learning_rate": 1.8724982544609258e-05, "loss": 2.0072, "step": 13630 }, { "epoch": 0.43978859109799945, "grad_norm": 0.33984375, "learning_rate": 1.8723458087463155e-05, "loss": 1.9882, "step": 13631 }, { "epoch": 0.4398208549517958, "grad_norm": 0.361328125, "learning_rate": 1.87219335893319e-05, "loss": 1.9706, "step": 13632 }, { "epoch": 0.43985311880559214, "grad_norm": 0.34765625, "learning_rate": 1.8720409050232265e-05, "loss": 1.9938, "step": 13633 }, { "epoch": 0.4398853826593885, "grad_norm": 0.345703125, "learning_rate": 1.8718884470181034e-05, "loss": 2.0043, "step": 13634 }, { "epoch": 0.4399176465131848, "grad_norm": 0.37109375, "learning_rate": 1.871735984919499e-05, "loss": 2.0004, "step": 13635 }, { "epoch": 0.43994991036698117, "grad_norm": 0.361328125, "learning_rate": 1.871583518729092e-05, "loss": 1.9947, "step": 13636 }, { "epoch": 0.4399821742207775, "grad_norm": 0.40234375, "learning_rate": 1.87143104844856e-05, "loss": 1.9825, "step": 13637 }, { "epoch": 0.44001443807457385, "grad_norm": 0.34375, "learning_rate": 1.871278574079582e-05, "loss": 1.9612, "step": 13638 }, { "epoch": 0.4400467019283702, "grad_norm": 0.349609375, "learning_rate": 1.8711260956238353e-05, "loss": 1.962, "step": 13639 }, { "epoch": 0.44007896578216654, "grad_norm": 0.34765625, "learning_rate": 1.8709736130829988e-05, "loss": 2.0104, "step": 13640 }, { "epoch": 0.4401112296359629, "grad_norm": 0.333984375, "learning_rate": 1.870821126458751e-05, "loss": 1.9837, "step": 13641 }, { "epoch": 0.44014349348975923, "grad_norm": 0.390625, "learning_rate": 1.87066863575277e-05, "loss": 1.9872, "step": 13642 }, { "epoch": 0.4401757573435556, "grad_norm": 0.34765625, "learning_rate": 1.870516140966735e-05, "loss": 1.9584, "step": 13643 }, { "epoch": 0.4402080211973519, "grad_norm": 0.36328125, "learning_rate": 1.870363642102324e-05, "loss": 1.9988, "step": 13644 }, { "epoch": 0.4402402850511483, "grad_norm": 0.353515625, "learning_rate": 1.8702111391612153e-05, "loss": 2.0049, "step": 13645 }, { "epoch": 0.44027254890494466, "grad_norm": 0.373046875, "learning_rate": 1.8700586321450887e-05, "loss": 1.9407, "step": 13646 }, { "epoch": 0.440304812758741, "grad_norm": 0.359375, "learning_rate": 1.8699061210556218e-05, "loss": 1.9783, "step": 13647 }, { "epoch": 0.44033707661253735, "grad_norm": 0.349609375, "learning_rate": 1.869753605894493e-05, "loss": 2.0006, "step": 13648 }, { "epoch": 0.4403693404663337, "grad_norm": 0.380859375, "learning_rate": 1.869601086663383e-05, "loss": 1.9814, "step": 13649 }, { "epoch": 0.44040160432013004, "grad_norm": 0.37109375, "learning_rate": 1.8694485633639687e-05, "loss": 1.9494, "step": 13650 }, { "epoch": 0.4404338681739264, "grad_norm": 0.3515625, "learning_rate": 1.8692960359979298e-05, "loss": 1.9529, "step": 13651 }, { "epoch": 0.4404661320277227, "grad_norm": 0.375, "learning_rate": 1.8691435045669453e-05, "loss": 1.9945, "step": 13652 }, { "epoch": 0.44049839588151907, "grad_norm": 0.359375, "learning_rate": 1.8689909690726935e-05, "loss": 1.9549, "step": 13653 }, { "epoch": 0.4405306597353154, "grad_norm": 0.37109375, "learning_rate": 1.8688384295168538e-05, "loss": 1.9865, "step": 13654 }, { "epoch": 0.44056292358911175, "grad_norm": 0.380859375, "learning_rate": 1.8686858859011058e-05, "loss": 1.985, "step": 13655 }, { "epoch": 0.4405951874429081, "grad_norm": 0.373046875, "learning_rate": 1.8685333382271277e-05, "loss": 1.9639, "step": 13656 }, { "epoch": 0.44062745129670444, "grad_norm": 0.392578125, "learning_rate": 1.868380786496599e-05, "loss": 1.9788, "step": 13657 }, { "epoch": 0.4406597151505008, "grad_norm": 0.349609375, "learning_rate": 1.8682282307111988e-05, "loss": 1.9994, "step": 13658 }, { "epoch": 0.44069197900429713, "grad_norm": 0.396484375, "learning_rate": 1.8680756708726066e-05, "loss": 1.978, "step": 13659 }, { "epoch": 0.4407242428580935, "grad_norm": 0.400390625, "learning_rate": 1.8679231069825016e-05, "loss": 1.9843, "step": 13660 }, { "epoch": 0.4407565067118898, "grad_norm": 0.380859375, "learning_rate": 1.8677705390425627e-05, "loss": 1.9257, "step": 13661 }, { "epoch": 0.44078877056568616, "grad_norm": 0.458984375, "learning_rate": 1.8676179670544693e-05, "loss": 1.9027, "step": 13662 }, { "epoch": 0.4408210344194825, "grad_norm": 0.357421875, "learning_rate": 1.8674653910199017e-05, "loss": 1.9561, "step": 13663 }, { "epoch": 0.44085329827327885, "grad_norm": 0.365234375, "learning_rate": 1.867312810940538e-05, "loss": 1.9768, "step": 13664 }, { "epoch": 0.44088556212707525, "grad_norm": 0.3515625, "learning_rate": 1.8671602268180587e-05, "loss": 1.9521, "step": 13665 }, { "epoch": 0.4409178259808716, "grad_norm": 0.353515625, "learning_rate": 1.8670076386541436e-05, "loss": 1.9957, "step": 13666 }, { "epoch": 0.44095008983466794, "grad_norm": 0.34375, "learning_rate": 1.866855046450471e-05, "loss": 1.9803, "step": 13667 }, { "epoch": 0.4409823536884643, "grad_norm": 0.35546875, "learning_rate": 1.8667024502087207e-05, "loss": 1.9838, "step": 13668 }, { "epoch": 0.4410146175422606, "grad_norm": 0.341796875, "learning_rate": 1.866549849930574e-05, "loss": 1.9733, "step": 13669 }, { "epoch": 0.44104688139605697, "grad_norm": 0.345703125, "learning_rate": 1.866397245617709e-05, "loss": 1.9765, "step": 13670 }, { "epoch": 0.4410791452498533, "grad_norm": 0.34375, "learning_rate": 1.8662446372718063e-05, "loss": 1.9769, "step": 13671 }, { "epoch": 0.44111140910364965, "grad_norm": 0.3359375, "learning_rate": 1.866092024894545e-05, "loss": 1.983, "step": 13672 }, { "epoch": 0.441143672957446, "grad_norm": 0.333984375, "learning_rate": 1.8659394084876055e-05, "loss": 1.9611, "step": 13673 }, { "epoch": 0.44117593681124234, "grad_norm": 0.341796875, "learning_rate": 1.8657867880526676e-05, "loss": 1.9835, "step": 13674 }, { "epoch": 0.4412082006650387, "grad_norm": 0.34765625, "learning_rate": 1.865634163591411e-05, "loss": 1.9808, "step": 13675 }, { "epoch": 0.44124046451883503, "grad_norm": 0.328125, "learning_rate": 1.8654815351055157e-05, "loss": 1.9859, "step": 13676 }, { "epoch": 0.4412727283726314, "grad_norm": 0.37109375, "learning_rate": 1.8653289025966625e-05, "loss": 2.0102, "step": 13677 }, { "epoch": 0.4413049922264277, "grad_norm": 0.34765625, "learning_rate": 1.86517626606653e-05, "loss": 1.9959, "step": 13678 }, { "epoch": 0.44133725608022406, "grad_norm": 0.3515625, "learning_rate": 1.8650236255167993e-05, "loss": 1.9708, "step": 13679 }, { "epoch": 0.4413695199340204, "grad_norm": 0.34375, "learning_rate": 1.864870980949151e-05, "loss": 1.9699, "step": 13680 }, { "epoch": 0.44140178378781675, "grad_norm": 0.345703125, "learning_rate": 1.8647183323652645e-05, "loss": 1.9828, "step": 13681 }, { "epoch": 0.4414340476416131, "grad_norm": 0.3359375, "learning_rate": 1.86456567976682e-05, "loss": 1.9782, "step": 13682 }, { "epoch": 0.44146631149540944, "grad_norm": 0.34375, "learning_rate": 1.8644130231554978e-05, "loss": 1.9752, "step": 13683 }, { "epoch": 0.4414985753492058, "grad_norm": 0.37109375, "learning_rate": 1.8642603625329786e-05, "loss": 1.9795, "step": 13684 }, { "epoch": 0.4415308392030022, "grad_norm": 0.33984375, "learning_rate": 1.864107697900943e-05, "loss": 1.9988, "step": 13685 }, { "epoch": 0.4415631030567985, "grad_norm": 0.359375, "learning_rate": 1.863955029261071e-05, "loss": 1.9769, "step": 13686 }, { "epoch": 0.44159536691059487, "grad_norm": 0.345703125, "learning_rate": 1.8638023566150428e-05, "loss": 1.9584, "step": 13687 }, { "epoch": 0.4416276307643912, "grad_norm": 0.35546875, "learning_rate": 1.8636496799645394e-05, "loss": 1.9548, "step": 13688 }, { "epoch": 0.44165989461818755, "grad_norm": 0.359375, "learning_rate": 1.8634969993112414e-05, "loss": 2.0028, "step": 13689 }, { "epoch": 0.4416921584719839, "grad_norm": 0.36328125, "learning_rate": 1.863344314656829e-05, "loss": 1.9947, "step": 13690 }, { "epoch": 0.44172442232578024, "grad_norm": 0.35546875, "learning_rate": 1.8631916260029832e-05, "loss": 1.969, "step": 13691 }, { "epoch": 0.4417566861795766, "grad_norm": 0.3828125, "learning_rate": 1.8630389333513843e-05, "loss": 1.9889, "step": 13692 }, { "epoch": 0.44178895003337293, "grad_norm": 0.33984375, "learning_rate": 1.8628862367037133e-05, "loss": 1.9395, "step": 13693 }, { "epoch": 0.4418212138871693, "grad_norm": 0.36328125, "learning_rate": 1.8627335360616515e-05, "loss": 1.9944, "step": 13694 }, { "epoch": 0.4418534777409656, "grad_norm": 0.361328125, "learning_rate": 1.8625808314268782e-05, "loss": 1.9918, "step": 13695 }, { "epoch": 0.44188574159476196, "grad_norm": 0.4609375, "learning_rate": 1.8624281228010758e-05, "loss": 2.023, "step": 13696 }, { "epoch": 0.4419180054485583, "grad_norm": 0.34375, "learning_rate": 1.8622754101859247e-05, "loss": 1.9834, "step": 13697 }, { "epoch": 0.44195026930235465, "grad_norm": 0.3515625, "learning_rate": 1.8621226935831055e-05, "loss": 1.9717, "step": 13698 }, { "epoch": 0.441982533156151, "grad_norm": 0.36328125, "learning_rate": 1.8619699729942997e-05, "loss": 1.9787, "step": 13699 }, { "epoch": 0.44201479700994734, "grad_norm": 0.365234375, "learning_rate": 1.861817248421188e-05, "loss": 1.9845, "step": 13700 }, { "epoch": 0.4420470608637437, "grad_norm": 0.361328125, "learning_rate": 1.8616645198654516e-05, "loss": 1.9634, "step": 13701 }, { "epoch": 0.44207932471754, "grad_norm": 0.345703125, "learning_rate": 1.861511787328772e-05, "loss": 1.9751, "step": 13702 }, { "epoch": 0.44211158857133637, "grad_norm": 0.3515625, "learning_rate": 1.8613590508128296e-05, "loss": 1.9211, "step": 13703 }, { "epoch": 0.4421438524251327, "grad_norm": 0.349609375, "learning_rate": 1.8612063103193058e-05, "loss": 1.9766, "step": 13704 }, { "epoch": 0.44217611627892905, "grad_norm": 0.34375, "learning_rate": 1.8610535658498822e-05, "loss": 1.8951, "step": 13705 }, { "epoch": 0.44220838013272545, "grad_norm": 0.36328125, "learning_rate": 1.8609008174062404e-05, "loss": 1.9812, "step": 13706 }, { "epoch": 0.4422406439865218, "grad_norm": 0.34765625, "learning_rate": 1.8607480649900608e-05, "loss": 1.9816, "step": 13707 }, { "epoch": 0.44227290784031814, "grad_norm": 0.36328125, "learning_rate": 1.8605953086030256e-05, "loss": 1.9932, "step": 13708 }, { "epoch": 0.4423051716941145, "grad_norm": 0.353515625, "learning_rate": 1.8604425482468153e-05, "loss": 1.9603, "step": 13709 }, { "epoch": 0.44233743554791083, "grad_norm": 0.353515625, "learning_rate": 1.860289783923113e-05, "loss": 1.9811, "step": 13710 }, { "epoch": 0.4423696994017072, "grad_norm": 0.36328125, "learning_rate": 1.8601370156335986e-05, "loss": 1.9723, "step": 13711 }, { "epoch": 0.4424019632555035, "grad_norm": 0.388671875, "learning_rate": 1.8599842433799544e-05, "loss": 1.9716, "step": 13712 }, { "epoch": 0.44243422710929986, "grad_norm": 0.369140625, "learning_rate": 1.859831467163862e-05, "loss": 1.9682, "step": 13713 }, { "epoch": 0.4424664909630962, "grad_norm": 0.345703125, "learning_rate": 1.859678686987003e-05, "loss": 1.9354, "step": 13714 }, { "epoch": 0.44249875481689255, "grad_norm": 0.38671875, "learning_rate": 1.859525902851059e-05, "loss": 1.9944, "step": 13715 }, { "epoch": 0.4425310186706889, "grad_norm": 0.353515625, "learning_rate": 1.8593731147577113e-05, "loss": 1.988, "step": 13716 }, { "epoch": 0.44256328252448524, "grad_norm": 0.3671875, "learning_rate": 1.859220322708643e-05, "loss": 1.9674, "step": 13717 }, { "epoch": 0.4425955463782816, "grad_norm": 0.37109375, "learning_rate": 1.8590675267055342e-05, "loss": 1.9633, "step": 13718 }, { "epoch": 0.4426278102320779, "grad_norm": 0.3671875, "learning_rate": 1.8589147267500685e-05, "loss": 1.9866, "step": 13719 }, { "epoch": 0.44266007408587427, "grad_norm": 0.373046875, "learning_rate": 1.8587619228439267e-05, "loss": 1.9827, "step": 13720 }, { "epoch": 0.4426923379396706, "grad_norm": 0.357421875, "learning_rate": 1.8586091149887908e-05, "loss": 1.9712, "step": 13721 }, { "epoch": 0.44272460179346695, "grad_norm": 0.388671875, "learning_rate": 1.8584563031863435e-05, "loss": 1.9804, "step": 13722 }, { "epoch": 0.4427568656472633, "grad_norm": 0.35546875, "learning_rate": 1.8583034874382656e-05, "loss": 1.9954, "step": 13723 }, { "epoch": 0.44278912950105964, "grad_norm": 0.376953125, "learning_rate": 1.8581506677462404e-05, "loss": 2.0017, "step": 13724 }, { "epoch": 0.442821393354856, "grad_norm": 0.388671875, "learning_rate": 1.8579978441119497e-05, "loss": 1.995, "step": 13725 }, { "epoch": 0.4428536572086524, "grad_norm": 0.359375, "learning_rate": 1.8578450165370757e-05, "loss": 1.9712, "step": 13726 }, { "epoch": 0.44288592106244873, "grad_norm": 0.359375, "learning_rate": 1.8576921850233e-05, "loss": 1.983, "step": 13727 }, { "epoch": 0.4429181849162451, "grad_norm": 0.376953125, "learning_rate": 1.8575393495723057e-05, "loss": 1.97, "step": 13728 }, { "epoch": 0.4429504487700414, "grad_norm": 0.359375, "learning_rate": 1.8573865101857744e-05, "loss": 1.9369, "step": 13729 }, { "epoch": 0.44298271262383776, "grad_norm": 0.361328125, "learning_rate": 1.8572336668653887e-05, "loss": 1.9963, "step": 13730 }, { "epoch": 0.4430149764776341, "grad_norm": 0.359375, "learning_rate": 1.8570808196128318e-05, "loss": 1.9904, "step": 13731 }, { "epoch": 0.44304724033143045, "grad_norm": 0.35546875, "learning_rate": 1.8569279684297848e-05, "loss": 1.9862, "step": 13732 }, { "epoch": 0.4430795041852268, "grad_norm": 0.35546875, "learning_rate": 1.8567751133179313e-05, "loss": 1.9575, "step": 13733 }, { "epoch": 0.44311176803902314, "grad_norm": 0.357421875, "learning_rate": 1.8566222542789526e-05, "loss": 1.9889, "step": 13734 }, { "epoch": 0.4431440318928195, "grad_norm": 0.3984375, "learning_rate": 1.8564693913145322e-05, "loss": 1.9857, "step": 13735 }, { "epoch": 0.4431762957466158, "grad_norm": 0.3671875, "learning_rate": 1.8563165244263526e-05, "loss": 1.9852, "step": 13736 }, { "epoch": 0.44320855960041217, "grad_norm": 0.3984375, "learning_rate": 1.8561636536160967e-05, "loss": 2.005, "step": 13737 }, { "epoch": 0.4432408234542085, "grad_norm": 0.37890625, "learning_rate": 1.856010778885446e-05, "loss": 1.9925, "step": 13738 }, { "epoch": 0.44327308730800485, "grad_norm": 0.41015625, "learning_rate": 1.8558579002360845e-05, "loss": 2.0021, "step": 13739 }, { "epoch": 0.4433053511618012, "grad_norm": 0.3671875, "learning_rate": 1.8557050176696947e-05, "loss": 1.996, "step": 13740 }, { "epoch": 0.44333761501559754, "grad_norm": 0.375, "learning_rate": 1.8555521311879587e-05, "loss": 1.9648, "step": 13741 }, { "epoch": 0.4433698788693939, "grad_norm": 0.349609375, "learning_rate": 1.8553992407925606e-05, "loss": 1.9797, "step": 13742 }, { "epoch": 0.44340214272319023, "grad_norm": 0.37109375, "learning_rate": 1.855246346485182e-05, "loss": 1.9836, "step": 13743 }, { "epoch": 0.4434344065769866, "grad_norm": 0.3828125, "learning_rate": 1.8550934482675066e-05, "loss": 1.9876, "step": 13744 }, { "epoch": 0.4434666704307829, "grad_norm": 0.359375, "learning_rate": 1.8549405461412178e-05, "loss": 1.989, "step": 13745 }, { "epoch": 0.4434989342845793, "grad_norm": 0.38671875, "learning_rate": 1.8547876401079974e-05, "loss": 1.9973, "step": 13746 }, { "epoch": 0.44353119813837566, "grad_norm": 0.359375, "learning_rate": 1.854634730169529e-05, "loss": 1.9919, "step": 13747 }, { "epoch": 0.443563461992172, "grad_norm": 0.41015625, "learning_rate": 1.854481816327497e-05, "loss": 1.9789, "step": 13748 }, { "epoch": 0.44359572584596835, "grad_norm": 0.375, "learning_rate": 1.854328898583583e-05, "loss": 1.9917, "step": 13749 }, { "epoch": 0.4436279896997647, "grad_norm": 0.3828125, "learning_rate": 1.8541759769394704e-05, "loss": 2.0014, "step": 13750 }, { "epoch": 0.44366025355356103, "grad_norm": 0.376953125, "learning_rate": 1.8540230513968426e-05, "loss": 1.9796, "step": 13751 }, { "epoch": 0.4436925174073574, "grad_norm": 0.390625, "learning_rate": 1.8538701219573832e-05, "loss": 1.9888, "step": 13752 }, { "epoch": 0.4437247812611537, "grad_norm": 0.375, "learning_rate": 1.8537171886227757e-05, "loss": 1.9724, "step": 13753 }, { "epoch": 0.44375704511495007, "grad_norm": 0.408203125, "learning_rate": 1.8535642513947026e-05, "loss": 1.9849, "step": 13754 }, { "epoch": 0.4437893089687464, "grad_norm": 0.369140625, "learning_rate": 1.8534113102748483e-05, "loss": 1.9941, "step": 13755 }, { "epoch": 0.44382157282254275, "grad_norm": 0.373046875, "learning_rate": 1.853258365264896e-05, "loss": 1.9991, "step": 13756 }, { "epoch": 0.4438538366763391, "grad_norm": 0.38671875, "learning_rate": 1.8531054163665285e-05, "loss": 1.9674, "step": 13757 }, { "epoch": 0.44388610053013544, "grad_norm": 0.353515625, "learning_rate": 1.85295246358143e-05, "loss": 1.9742, "step": 13758 }, { "epoch": 0.4439183643839318, "grad_norm": 0.38671875, "learning_rate": 1.8527995069112843e-05, "loss": 1.9759, "step": 13759 }, { "epoch": 0.44395062823772813, "grad_norm": 0.359375, "learning_rate": 1.8526465463577748e-05, "loss": 2.002, "step": 13760 }, { "epoch": 0.4439828920915245, "grad_norm": 0.375, "learning_rate": 1.8524935819225843e-05, "loss": 1.9787, "step": 13761 }, { "epoch": 0.4440151559453208, "grad_norm": 0.361328125, "learning_rate": 1.852340613607398e-05, "loss": 1.997, "step": 13762 }, { "epoch": 0.44404741979911716, "grad_norm": 0.373046875, "learning_rate": 1.8521876414138986e-05, "loss": 1.9707, "step": 13763 }, { "epoch": 0.4440796836529135, "grad_norm": 0.388671875, "learning_rate": 1.852034665343771e-05, "loss": 1.977, "step": 13764 }, { "epoch": 0.44411194750670985, "grad_norm": 0.36328125, "learning_rate": 1.8518816853986976e-05, "loss": 1.9882, "step": 13765 }, { "epoch": 0.44414421136050625, "grad_norm": 0.37109375, "learning_rate": 1.8517287015803637e-05, "loss": 1.9567, "step": 13766 }, { "epoch": 0.4441764752143026, "grad_norm": 0.353515625, "learning_rate": 1.8515757138904522e-05, "loss": 1.9791, "step": 13767 }, { "epoch": 0.44420873906809893, "grad_norm": 0.37890625, "learning_rate": 1.8514227223306474e-05, "loss": 2.0079, "step": 13768 }, { "epoch": 0.4442410029218953, "grad_norm": 0.353515625, "learning_rate": 1.8512697269026333e-05, "loss": 1.9558, "step": 13769 }, { "epoch": 0.4442732667756916, "grad_norm": 0.36328125, "learning_rate": 1.8511167276080946e-05, "loss": 1.9892, "step": 13770 }, { "epoch": 0.44430553062948797, "grad_norm": 0.37109375, "learning_rate": 1.8509637244487144e-05, "loss": 1.9878, "step": 13771 }, { "epoch": 0.4443377944832843, "grad_norm": 0.34375, "learning_rate": 1.850810717426177e-05, "loss": 1.9729, "step": 13772 }, { "epoch": 0.44437005833708065, "grad_norm": 0.369140625, "learning_rate": 1.8506577065421678e-05, "loss": 1.9656, "step": 13773 }, { "epoch": 0.444402322190877, "grad_norm": 0.359375, "learning_rate": 1.85050469179837e-05, "loss": 1.9837, "step": 13774 }, { "epoch": 0.44443458604467334, "grad_norm": 0.349609375, "learning_rate": 1.8503516731964667e-05, "loss": 1.9806, "step": 13775 }, { "epoch": 0.4444668498984697, "grad_norm": 0.365234375, "learning_rate": 1.8501986507381448e-05, "loss": 1.9873, "step": 13776 }, { "epoch": 0.44449911375226603, "grad_norm": 0.35546875, "learning_rate": 1.8500456244250874e-05, "loss": 2.01, "step": 13777 }, { "epoch": 0.4445313776060624, "grad_norm": 0.345703125, "learning_rate": 1.8498925942589785e-05, "loss": 1.9847, "step": 13778 }, { "epoch": 0.4445636414598587, "grad_norm": 0.396484375, "learning_rate": 1.8497395602415026e-05, "loss": 1.9859, "step": 13779 }, { "epoch": 0.44459590531365506, "grad_norm": 0.36328125, "learning_rate": 1.8495865223743446e-05, "loss": 2.0156, "step": 13780 }, { "epoch": 0.4446281691674514, "grad_norm": 0.345703125, "learning_rate": 1.8494334806591898e-05, "loss": 1.9869, "step": 13781 }, { "epoch": 0.44466043302124775, "grad_norm": 0.361328125, "learning_rate": 1.849280435097721e-05, "loss": 2.0076, "step": 13782 }, { "epoch": 0.4446926968750441, "grad_norm": 0.359375, "learning_rate": 1.849127385691624e-05, "loss": 1.9908, "step": 13783 }, { "epoch": 0.44472496072884043, "grad_norm": 0.365234375, "learning_rate": 1.8489743324425834e-05, "loss": 1.9703, "step": 13784 }, { "epoch": 0.4447572245826368, "grad_norm": 0.359375, "learning_rate": 1.8488212753522834e-05, "loss": 2.0174, "step": 13785 }, { "epoch": 0.4447894884364332, "grad_norm": 0.361328125, "learning_rate": 1.8486682144224094e-05, "loss": 1.9844, "step": 13786 }, { "epoch": 0.4448217522902295, "grad_norm": 0.3828125, "learning_rate": 1.8485151496546458e-05, "loss": 1.9645, "step": 13787 }, { "epoch": 0.44485401614402587, "grad_norm": 0.353515625, "learning_rate": 1.848362081050677e-05, "loss": 1.9895, "step": 13788 }, { "epoch": 0.4448862799978222, "grad_norm": 0.416015625, "learning_rate": 1.8482090086121885e-05, "loss": 1.9856, "step": 13789 }, { "epoch": 0.44491854385161855, "grad_norm": 0.34765625, "learning_rate": 1.8480559323408657e-05, "loss": 1.987, "step": 13790 }, { "epoch": 0.4449508077054149, "grad_norm": 0.408203125, "learning_rate": 1.8479028522383925e-05, "loss": 2.0128, "step": 13791 }, { "epoch": 0.44498307155921124, "grad_norm": 0.3828125, "learning_rate": 1.847749768306454e-05, "loss": 1.956, "step": 13792 }, { "epoch": 0.4450153354130076, "grad_norm": 0.34765625, "learning_rate": 1.8475966805467354e-05, "loss": 1.9495, "step": 13793 }, { "epoch": 0.44504759926680393, "grad_norm": 0.400390625, "learning_rate": 1.8474435889609224e-05, "loss": 1.9881, "step": 13794 }, { "epoch": 0.44507986312060027, "grad_norm": 0.33984375, "learning_rate": 1.8472904935506994e-05, "loss": 1.9863, "step": 13795 }, { "epoch": 0.4451121269743966, "grad_norm": 0.41015625, "learning_rate": 1.8471373943177516e-05, "loss": 2.0056, "step": 13796 }, { "epoch": 0.44514439082819296, "grad_norm": 0.3359375, "learning_rate": 1.8469842912637647e-05, "loss": 1.9625, "step": 13797 }, { "epoch": 0.4451766546819893, "grad_norm": 0.404296875, "learning_rate": 1.846831184390424e-05, "loss": 1.9784, "step": 13798 }, { "epoch": 0.44520891853578565, "grad_norm": 0.34375, "learning_rate": 1.8466780736994138e-05, "loss": 1.9794, "step": 13799 }, { "epoch": 0.445241182389582, "grad_norm": 0.392578125, "learning_rate": 1.84652495919242e-05, "loss": 2.0046, "step": 13800 }, { "epoch": 0.44527344624337833, "grad_norm": 0.357421875, "learning_rate": 1.8463718408711286e-05, "loss": 1.9732, "step": 13801 }, { "epoch": 0.4453057100971747, "grad_norm": 0.40625, "learning_rate": 1.846218718737224e-05, "loss": 1.9901, "step": 13802 }, { "epoch": 0.445337973950971, "grad_norm": 0.373046875, "learning_rate": 1.846065592792392e-05, "loss": 1.9837, "step": 13803 }, { "epoch": 0.44537023780476737, "grad_norm": 0.369140625, "learning_rate": 1.845912463038319e-05, "loss": 2.0026, "step": 13804 }, { "epoch": 0.4454025016585637, "grad_norm": 0.373046875, "learning_rate": 1.8457593294766895e-05, "loss": 1.9694, "step": 13805 }, { "epoch": 0.44543476551236005, "grad_norm": 0.3671875, "learning_rate": 1.8456061921091888e-05, "loss": 1.9897, "step": 13806 }, { "epoch": 0.44546702936615645, "grad_norm": 0.392578125, "learning_rate": 1.8454530509375033e-05, "loss": 1.9966, "step": 13807 }, { "epoch": 0.4454992932199528, "grad_norm": 0.357421875, "learning_rate": 1.8452999059633188e-05, "loss": 1.9493, "step": 13808 }, { "epoch": 0.44553155707374914, "grad_norm": 0.36328125, "learning_rate": 1.8451467571883206e-05, "loss": 1.9766, "step": 13809 }, { "epoch": 0.4455638209275455, "grad_norm": 0.388671875, "learning_rate": 1.8449936046141936e-05, "loss": 2.0026, "step": 13810 }, { "epoch": 0.44559608478134183, "grad_norm": 0.345703125, "learning_rate": 1.8448404482426252e-05, "loss": 1.9853, "step": 13811 }, { "epoch": 0.44562834863513817, "grad_norm": 0.40625, "learning_rate": 1.8446872880753008e-05, "loss": 1.9866, "step": 13812 }, { "epoch": 0.4456606124889345, "grad_norm": 0.3671875, "learning_rate": 1.8445341241139055e-05, "loss": 1.9967, "step": 13813 }, { "epoch": 0.44569287634273086, "grad_norm": 0.388671875, "learning_rate": 1.844380956360126e-05, "loss": 1.9756, "step": 13814 }, { "epoch": 0.4457251401965272, "grad_norm": 0.353515625, "learning_rate": 1.8442277848156484e-05, "loss": 1.9868, "step": 13815 }, { "epoch": 0.44575740405032355, "grad_norm": 0.380859375, "learning_rate": 1.8440746094821575e-05, "loss": 1.9822, "step": 13816 }, { "epoch": 0.4457896679041199, "grad_norm": 0.365234375, "learning_rate": 1.8439214303613403e-05, "loss": 1.9461, "step": 13817 }, { "epoch": 0.44582193175791623, "grad_norm": 0.345703125, "learning_rate": 1.8437682474548832e-05, "loss": 1.9225, "step": 13818 }, { "epoch": 0.4458541956117126, "grad_norm": 0.373046875, "learning_rate": 1.8436150607644717e-05, "loss": 1.9978, "step": 13819 }, { "epoch": 0.4458864594655089, "grad_norm": 0.3515625, "learning_rate": 1.843461870291792e-05, "loss": 1.9756, "step": 13820 }, { "epoch": 0.44591872331930527, "grad_norm": 0.46484375, "learning_rate": 1.8433086760385305e-05, "loss": 1.9905, "step": 13821 }, { "epoch": 0.4459509871731016, "grad_norm": 0.40625, "learning_rate": 1.8431554780063733e-05, "loss": 1.9862, "step": 13822 }, { "epoch": 0.44598325102689795, "grad_norm": 0.353515625, "learning_rate": 1.8430022761970065e-05, "loss": 1.9855, "step": 13823 }, { "epoch": 0.4460155148806943, "grad_norm": 0.38671875, "learning_rate": 1.8428490706121174e-05, "loss": 1.9817, "step": 13824 }, { "epoch": 0.44604777873449064, "grad_norm": 0.365234375, "learning_rate": 1.8426958612533912e-05, "loss": 1.9728, "step": 13825 }, { "epoch": 0.446080042588287, "grad_norm": 0.37890625, "learning_rate": 1.842542648122515e-05, "loss": 1.9478, "step": 13826 }, { "epoch": 0.4461123064420834, "grad_norm": 0.400390625, "learning_rate": 1.8423894312211745e-05, "loss": 1.9509, "step": 13827 }, { "epoch": 0.4461445702958797, "grad_norm": 0.3671875, "learning_rate": 1.8422362105510572e-05, "loss": 1.9674, "step": 13828 }, { "epoch": 0.44617683414967607, "grad_norm": 0.51171875, "learning_rate": 1.8420829861138493e-05, "loss": 1.9161, "step": 13829 }, { "epoch": 0.4462090980034724, "grad_norm": 0.365234375, "learning_rate": 1.841929757911237e-05, "loss": 1.9808, "step": 13830 }, { "epoch": 0.44624136185726876, "grad_norm": 0.396484375, "learning_rate": 1.8417765259449078e-05, "loss": 1.9653, "step": 13831 }, { "epoch": 0.4462736257110651, "grad_norm": 0.349609375, "learning_rate": 1.8416232902165474e-05, "loss": 1.9808, "step": 13832 }, { "epoch": 0.44630588956486145, "grad_norm": 0.37109375, "learning_rate": 1.841470050727843e-05, "loss": 1.9759, "step": 13833 }, { "epoch": 0.4463381534186578, "grad_norm": 0.3515625, "learning_rate": 1.841316807480481e-05, "loss": 2.0192, "step": 13834 }, { "epoch": 0.44637041727245413, "grad_norm": 0.373046875, "learning_rate": 1.8411635604761488e-05, "loss": 1.9893, "step": 13835 }, { "epoch": 0.4464026811262505, "grad_norm": 0.3828125, "learning_rate": 1.8410103097165328e-05, "loss": 1.8964, "step": 13836 }, { "epoch": 0.4464349449800468, "grad_norm": 0.373046875, "learning_rate": 1.8408570552033198e-05, "loss": 1.9557, "step": 13837 }, { "epoch": 0.44646720883384317, "grad_norm": 0.373046875, "learning_rate": 1.840703796938197e-05, "loss": 1.9742, "step": 13838 }, { "epoch": 0.4464994726876395, "grad_norm": 0.359375, "learning_rate": 1.8405505349228512e-05, "loss": 1.9447, "step": 13839 }, { "epoch": 0.44653173654143585, "grad_norm": 0.37109375, "learning_rate": 1.840397269158969e-05, "loss": 2.012, "step": 13840 }, { "epoch": 0.4465640003952322, "grad_norm": 0.35546875, "learning_rate": 1.8402439996482385e-05, "loss": 1.979, "step": 13841 }, { "epoch": 0.44659626424902854, "grad_norm": 0.3671875, "learning_rate": 1.840090726392346e-05, "loss": 1.9521, "step": 13842 }, { "epoch": 0.4466285281028249, "grad_norm": 0.33984375, "learning_rate": 1.8399374493929785e-05, "loss": 2.0046, "step": 13843 }, { "epoch": 0.44666079195662123, "grad_norm": 0.357421875, "learning_rate": 1.8397841686518236e-05, "loss": 1.9649, "step": 13844 }, { "epoch": 0.44669305581041757, "grad_norm": 0.361328125, "learning_rate": 1.8396308841705683e-05, "loss": 1.9845, "step": 13845 }, { "epoch": 0.4467253196642139, "grad_norm": 0.365234375, "learning_rate": 1.8394775959509e-05, "loss": 1.9909, "step": 13846 }, { "epoch": 0.4467575835180103, "grad_norm": 0.349609375, "learning_rate": 1.8393243039945057e-05, "loss": 1.9665, "step": 13847 }, { "epoch": 0.44678984737180666, "grad_norm": 0.36328125, "learning_rate": 1.839171008303073e-05, "loss": 1.9685, "step": 13848 }, { "epoch": 0.446822111225603, "grad_norm": 0.345703125, "learning_rate": 1.8390177088782893e-05, "loss": 1.9802, "step": 13849 }, { "epoch": 0.44685437507939935, "grad_norm": 0.34375, "learning_rate": 1.8388644057218416e-05, "loss": 1.9768, "step": 13850 }, { "epoch": 0.4468866389331957, "grad_norm": 0.34765625, "learning_rate": 1.8387110988354173e-05, "loss": 1.9893, "step": 13851 }, { "epoch": 0.44691890278699203, "grad_norm": 0.365234375, "learning_rate": 1.8385577882207047e-05, "loss": 1.9801, "step": 13852 }, { "epoch": 0.4469511666407884, "grad_norm": 0.34375, "learning_rate": 1.838404473879391e-05, "loss": 1.9637, "step": 13853 }, { "epoch": 0.4469834304945847, "grad_norm": 0.34375, "learning_rate": 1.8382511558131634e-05, "loss": 1.9426, "step": 13854 }, { "epoch": 0.44701569434838107, "grad_norm": 0.34765625, "learning_rate": 1.8380978340237094e-05, "loss": 1.9813, "step": 13855 }, { "epoch": 0.4470479582021774, "grad_norm": 0.345703125, "learning_rate": 1.8379445085127176e-05, "loss": 1.9884, "step": 13856 }, { "epoch": 0.44708022205597375, "grad_norm": 0.34375, "learning_rate": 1.8377911792818748e-05, "loss": 2.0015, "step": 13857 }, { "epoch": 0.4471124859097701, "grad_norm": 0.345703125, "learning_rate": 1.8376378463328688e-05, "loss": 1.9546, "step": 13858 }, { "epoch": 0.44714474976356644, "grad_norm": 0.373046875, "learning_rate": 1.837484509667388e-05, "loss": 1.9935, "step": 13859 }, { "epoch": 0.4471770136173628, "grad_norm": 0.34375, "learning_rate": 1.8373311692871195e-05, "loss": 1.9612, "step": 13860 }, { "epoch": 0.44720927747115913, "grad_norm": 0.34375, "learning_rate": 1.8371778251937518e-05, "loss": 1.9953, "step": 13861 }, { "epoch": 0.44724154132495547, "grad_norm": 0.36328125, "learning_rate": 1.837024477388972e-05, "loss": 1.9716, "step": 13862 }, { "epoch": 0.4472738051787518, "grad_norm": 0.345703125, "learning_rate": 1.8368711258744692e-05, "loss": 1.9637, "step": 13863 }, { "epoch": 0.44730606903254816, "grad_norm": 0.396484375, "learning_rate": 1.8367177706519302e-05, "loss": 1.9849, "step": 13864 }, { "epoch": 0.4473383328863445, "grad_norm": 0.34765625, "learning_rate": 1.8365644117230437e-05, "loss": 1.9967, "step": 13865 }, { "epoch": 0.44737059674014085, "grad_norm": 0.345703125, "learning_rate": 1.8364110490894977e-05, "loss": 1.9811, "step": 13866 }, { "epoch": 0.44740286059393725, "grad_norm": 0.3671875, "learning_rate": 1.8362576827529804e-05, "loss": 1.9833, "step": 13867 }, { "epoch": 0.4474351244477336, "grad_norm": 0.3515625, "learning_rate": 1.8361043127151792e-05, "loss": 1.9886, "step": 13868 }, { "epoch": 0.44746738830152993, "grad_norm": 0.373046875, "learning_rate": 1.835950938977783e-05, "loss": 1.9594, "step": 13869 }, { "epoch": 0.4474996521553263, "grad_norm": 0.359375, "learning_rate": 1.8357975615424806e-05, "loss": 1.9778, "step": 13870 }, { "epoch": 0.4475319160091226, "grad_norm": 0.359375, "learning_rate": 1.8356441804109588e-05, "loss": 1.9929, "step": 13871 }, { "epoch": 0.44756417986291896, "grad_norm": 0.38671875, "learning_rate": 1.8354907955849066e-05, "loss": 1.9734, "step": 13872 }, { "epoch": 0.4475964437167153, "grad_norm": 0.337890625, "learning_rate": 1.8353374070660127e-05, "loss": 1.9663, "step": 13873 }, { "epoch": 0.44762870757051165, "grad_norm": 0.37109375, "learning_rate": 1.8351840148559654e-05, "loss": 1.9968, "step": 13874 }, { "epoch": 0.447660971424308, "grad_norm": 0.380859375, "learning_rate": 1.8350306189564523e-05, "loss": 1.9952, "step": 13875 }, { "epoch": 0.44769323527810434, "grad_norm": 0.326171875, "learning_rate": 1.834877219369163e-05, "loss": 1.9849, "step": 13876 }, { "epoch": 0.4477254991319007, "grad_norm": 0.35546875, "learning_rate": 1.8347238160957853e-05, "loss": 1.9655, "step": 13877 }, { "epoch": 0.447757762985697, "grad_norm": 0.337890625, "learning_rate": 1.834570409138008e-05, "loss": 1.9789, "step": 13878 }, { "epoch": 0.44779002683949337, "grad_norm": 0.345703125, "learning_rate": 1.8344169984975195e-05, "loss": 1.9784, "step": 13879 }, { "epoch": 0.4478222906932897, "grad_norm": 0.349609375, "learning_rate": 1.8342635841760088e-05, "loss": 1.9592, "step": 13880 }, { "epoch": 0.44785455454708606, "grad_norm": 0.33203125, "learning_rate": 1.8341101661751648e-05, "loss": 1.9883, "step": 13881 }, { "epoch": 0.4478868184008824, "grad_norm": 0.3515625, "learning_rate": 1.8339567444966746e-05, "loss": 1.9599, "step": 13882 }, { "epoch": 0.44791908225467875, "grad_norm": 0.337890625, "learning_rate": 1.8338033191422294e-05, "loss": 1.9614, "step": 13883 }, { "epoch": 0.4479513461084751, "grad_norm": 0.33203125, "learning_rate": 1.8336498901135165e-05, "loss": 1.967, "step": 13884 }, { "epoch": 0.44798360996227143, "grad_norm": 0.35546875, "learning_rate": 1.8334964574122243e-05, "loss": 1.9689, "step": 13885 }, { "epoch": 0.4480158738160678, "grad_norm": 0.33984375, "learning_rate": 1.8333430210400424e-05, "loss": 1.9604, "step": 13886 }, { "epoch": 0.4480481376698641, "grad_norm": 0.341796875, "learning_rate": 1.8331895809986602e-05, "loss": 1.9884, "step": 13887 }, { "epoch": 0.4480804015236605, "grad_norm": 0.349609375, "learning_rate": 1.833036137289766e-05, "loss": 1.9864, "step": 13888 }, { "epoch": 0.44811266537745686, "grad_norm": 0.34375, "learning_rate": 1.8328826899150486e-05, "loss": 1.9901, "step": 13889 }, { "epoch": 0.4481449292312532, "grad_norm": 0.34375, "learning_rate": 1.8327292388761975e-05, "loss": 1.963, "step": 13890 }, { "epoch": 0.44817719308504955, "grad_norm": 0.34765625, "learning_rate": 1.832575784174902e-05, "loss": 2.018, "step": 13891 }, { "epoch": 0.4482094569388459, "grad_norm": 0.33984375, "learning_rate": 1.8324223258128503e-05, "loss": 1.9986, "step": 13892 }, { "epoch": 0.44824172079264224, "grad_norm": 0.3671875, "learning_rate": 1.8322688637917325e-05, "loss": 1.972, "step": 13893 }, { "epoch": 0.4482739846464386, "grad_norm": 0.349609375, "learning_rate": 1.8321153981132372e-05, "loss": 1.9789, "step": 13894 }, { "epoch": 0.4483062485002349, "grad_norm": 0.341796875, "learning_rate": 1.831961928779054e-05, "loss": 2.0025, "step": 13895 }, { "epoch": 0.44833851235403127, "grad_norm": 0.3515625, "learning_rate": 1.8318084557908716e-05, "loss": 1.9595, "step": 13896 }, { "epoch": 0.4483707762078276, "grad_norm": 0.353515625, "learning_rate": 1.8316549791503806e-05, "loss": 2.0002, "step": 13897 }, { "epoch": 0.44840304006162396, "grad_norm": 0.35546875, "learning_rate": 1.831501498859269e-05, "loss": 1.9749, "step": 13898 }, { "epoch": 0.4484353039154203, "grad_norm": 0.365234375, "learning_rate": 1.8313480149192265e-05, "loss": 2.0046, "step": 13899 }, { "epoch": 0.44846756776921665, "grad_norm": 0.373046875, "learning_rate": 1.8311945273319432e-05, "loss": 1.9822, "step": 13900 }, { "epoch": 0.448499831623013, "grad_norm": 0.37109375, "learning_rate": 1.831041036099108e-05, "loss": 2.0109, "step": 13901 }, { "epoch": 0.44853209547680933, "grad_norm": 0.35546875, "learning_rate": 1.8308875412224103e-05, "loss": 1.951, "step": 13902 }, { "epoch": 0.4485643593306057, "grad_norm": 0.349609375, "learning_rate": 1.83073404270354e-05, "loss": 1.9902, "step": 13903 }, { "epoch": 0.448596623184402, "grad_norm": 0.361328125, "learning_rate": 1.8305805405441872e-05, "loss": 1.9565, "step": 13904 }, { "epoch": 0.44862888703819837, "grad_norm": 0.369140625, "learning_rate": 1.8304270347460404e-05, "loss": 1.9545, "step": 13905 }, { "epoch": 0.4486611508919947, "grad_norm": 0.36328125, "learning_rate": 1.8302735253107898e-05, "loss": 2.0269, "step": 13906 }, { "epoch": 0.44869341474579105, "grad_norm": 0.3671875, "learning_rate": 1.830120012240126e-05, "loss": 1.95, "step": 13907 }, { "epoch": 0.44872567859958745, "grad_norm": 0.353515625, "learning_rate": 1.8299664955357367e-05, "loss": 1.9733, "step": 13908 }, { "epoch": 0.4487579424533838, "grad_norm": 0.359375, "learning_rate": 1.8298129751993136e-05, "loss": 1.9551, "step": 13909 }, { "epoch": 0.44879020630718014, "grad_norm": 0.357421875, "learning_rate": 1.829659451232546e-05, "loss": 1.9961, "step": 13910 }, { "epoch": 0.4488224701609765, "grad_norm": 0.365234375, "learning_rate": 1.8295059236371234e-05, "loss": 1.9875, "step": 13911 }, { "epoch": 0.4488547340147728, "grad_norm": 0.357421875, "learning_rate": 1.8293523924147362e-05, "loss": 1.9791, "step": 13912 }, { "epoch": 0.44888699786856917, "grad_norm": 0.37109375, "learning_rate": 1.8291988575670734e-05, "loss": 1.9823, "step": 13913 }, { "epoch": 0.4489192617223655, "grad_norm": 0.376953125, "learning_rate": 1.8290453190958264e-05, "loss": 1.9718, "step": 13914 }, { "epoch": 0.44895152557616186, "grad_norm": 0.3671875, "learning_rate": 1.828891777002685e-05, "loss": 1.9771, "step": 13915 }, { "epoch": 0.4489837894299582, "grad_norm": 0.41015625, "learning_rate": 1.8287382312893382e-05, "loss": 2.0105, "step": 13916 }, { "epoch": 0.44901605328375455, "grad_norm": 0.390625, "learning_rate": 1.8285846819574768e-05, "loss": 1.9692, "step": 13917 }, { "epoch": 0.4490483171375509, "grad_norm": 0.357421875, "learning_rate": 1.8284311290087914e-05, "loss": 2.0015, "step": 13918 }, { "epoch": 0.44908058099134723, "grad_norm": 0.349609375, "learning_rate": 1.8282775724449714e-05, "loss": 2.0054, "step": 13919 }, { "epoch": 0.4491128448451436, "grad_norm": 0.373046875, "learning_rate": 1.8281240122677076e-05, "loss": 1.9792, "step": 13920 }, { "epoch": 0.4491451086989399, "grad_norm": 0.33984375, "learning_rate": 1.8279704484786898e-05, "loss": 1.9747, "step": 13921 }, { "epoch": 0.44917737255273626, "grad_norm": 0.34765625, "learning_rate": 1.8278168810796085e-05, "loss": 2.0013, "step": 13922 }, { "epoch": 0.4492096364065326, "grad_norm": 0.359375, "learning_rate": 1.8276633100721542e-05, "loss": 1.9729, "step": 13923 }, { "epoch": 0.44924190026032895, "grad_norm": 0.349609375, "learning_rate": 1.8275097354580178e-05, "loss": 2.0143, "step": 13924 }, { "epoch": 0.4492741641141253, "grad_norm": 0.3515625, "learning_rate": 1.8273561572388884e-05, "loss": 1.9347, "step": 13925 }, { "epoch": 0.44930642796792164, "grad_norm": 0.34765625, "learning_rate": 1.827202575416458e-05, "loss": 2.0038, "step": 13926 }, { "epoch": 0.449338691821718, "grad_norm": 0.333984375, "learning_rate": 1.8270489899924155e-05, "loss": 1.9957, "step": 13927 }, { "epoch": 0.4493709556755144, "grad_norm": 0.35546875, "learning_rate": 1.8268954009684532e-05, "loss": 1.9736, "step": 13928 }, { "epoch": 0.4494032195293107, "grad_norm": 0.33203125, "learning_rate": 1.8267418083462608e-05, "loss": 1.9585, "step": 13929 }, { "epoch": 0.44943548338310707, "grad_norm": 0.349609375, "learning_rate": 1.8265882121275285e-05, "loss": 1.999, "step": 13930 }, { "epoch": 0.4494677472369034, "grad_norm": 0.34375, "learning_rate": 1.8264346123139474e-05, "loss": 1.9897, "step": 13931 }, { "epoch": 0.44950001109069976, "grad_norm": 0.353515625, "learning_rate": 1.826281008907209e-05, "loss": 2.0028, "step": 13932 }, { "epoch": 0.4495322749444961, "grad_norm": 0.3515625, "learning_rate": 1.8261274019090025e-05, "loss": 2.002, "step": 13933 }, { "epoch": 0.44956453879829245, "grad_norm": 0.37890625, "learning_rate": 1.8259737913210197e-05, "loss": 1.9734, "step": 13934 }, { "epoch": 0.4495968026520888, "grad_norm": 0.37109375, "learning_rate": 1.8258201771449518e-05, "loss": 2.0071, "step": 13935 }, { "epoch": 0.44962906650588513, "grad_norm": 0.37890625, "learning_rate": 1.8256665593824884e-05, "loss": 1.9999, "step": 13936 }, { "epoch": 0.4496613303596815, "grad_norm": 0.337890625, "learning_rate": 1.8255129380353216e-05, "loss": 1.9736, "step": 13937 }, { "epoch": 0.4496935942134778, "grad_norm": 0.3828125, "learning_rate": 1.8253593131051418e-05, "loss": 1.9885, "step": 13938 }, { "epoch": 0.44972585806727416, "grad_norm": 0.345703125, "learning_rate": 1.82520568459364e-05, "loss": 1.9765, "step": 13939 }, { "epoch": 0.4497581219210705, "grad_norm": 0.376953125, "learning_rate": 1.8250520525025074e-05, "loss": 1.9691, "step": 13940 }, { "epoch": 0.44979038577486685, "grad_norm": 0.341796875, "learning_rate": 1.824898416833435e-05, "loss": 1.9823, "step": 13941 }, { "epoch": 0.4498226496286632, "grad_norm": 0.359375, "learning_rate": 1.824744777588114e-05, "loss": 1.9486, "step": 13942 }, { "epoch": 0.44985491348245954, "grad_norm": 0.349609375, "learning_rate": 1.8245911347682357e-05, "loss": 1.9878, "step": 13943 }, { "epoch": 0.4498871773362559, "grad_norm": 0.353515625, "learning_rate": 1.8244374883754902e-05, "loss": 1.9981, "step": 13944 }, { "epoch": 0.4499194411900522, "grad_norm": 0.341796875, "learning_rate": 1.8242838384115704e-05, "loss": 1.9725, "step": 13945 }, { "epoch": 0.44995170504384857, "grad_norm": 0.341796875, "learning_rate": 1.8241301848781668e-05, "loss": 1.9679, "step": 13946 }, { "epoch": 0.4499839688976449, "grad_norm": 0.34765625, "learning_rate": 1.8239765277769702e-05, "loss": 1.9886, "step": 13947 }, { "epoch": 0.4500162327514413, "grad_norm": 0.359375, "learning_rate": 1.823822867109672e-05, "loss": 1.9767, "step": 13948 }, { "epoch": 0.45004849660523766, "grad_norm": 0.359375, "learning_rate": 1.8236692028779648e-05, "loss": 1.9776, "step": 13949 }, { "epoch": 0.450080760459034, "grad_norm": 0.353515625, "learning_rate": 1.823515535083539e-05, "loss": 1.9895, "step": 13950 }, { "epoch": 0.45011302431283035, "grad_norm": 0.349609375, "learning_rate": 1.823361863728086e-05, "loss": 1.9926, "step": 13951 }, { "epoch": 0.4501452881666267, "grad_norm": 0.3515625, "learning_rate": 1.8232081888132977e-05, "loss": 1.9546, "step": 13952 }, { "epoch": 0.45017755202042303, "grad_norm": 0.33984375, "learning_rate": 1.823054510340866e-05, "loss": 1.96, "step": 13953 }, { "epoch": 0.4502098158742194, "grad_norm": 0.353515625, "learning_rate": 1.8229008283124814e-05, "loss": 1.9207, "step": 13954 }, { "epoch": 0.4502420797280157, "grad_norm": 0.34765625, "learning_rate": 1.8227471427298362e-05, "loss": 1.9866, "step": 13955 }, { "epoch": 0.45027434358181206, "grad_norm": 0.345703125, "learning_rate": 1.8225934535946224e-05, "loss": 1.957, "step": 13956 }, { "epoch": 0.4503066074356084, "grad_norm": 0.34375, "learning_rate": 1.8224397609085312e-05, "loss": 1.974, "step": 13957 }, { "epoch": 0.45033887128940475, "grad_norm": 0.337890625, "learning_rate": 1.822286064673254e-05, "loss": 1.9872, "step": 13958 }, { "epoch": 0.4503711351432011, "grad_norm": 0.341796875, "learning_rate": 1.8221323648904833e-05, "loss": 1.9496, "step": 13959 }, { "epoch": 0.45040339899699744, "grad_norm": 0.349609375, "learning_rate": 1.8219786615619107e-05, "loss": 1.971, "step": 13960 }, { "epoch": 0.4504356628507938, "grad_norm": 0.333984375, "learning_rate": 1.8218249546892276e-05, "loss": 1.9606, "step": 13961 }, { "epoch": 0.4504679267045901, "grad_norm": 0.337890625, "learning_rate": 1.8216712442741266e-05, "loss": 1.9833, "step": 13962 }, { "epoch": 0.45050019055838647, "grad_norm": 0.37109375, "learning_rate": 1.8215175303182992e-05, "loss": 1.9554, "step": 13963 }, { "epoch": 0.4505324544121828, "grad_norm": 0.369140625, "learning_rate": 1.8213638128234376e-05, "loss": 1.9874, "step": 13964 }, { "epoch": 0.45056471826597916, "grad_norm": 0.359375, "learning_rate": 1.8212100917912337e-05, "loss": 1.9897, "step": 13965 }, { "epoch": 0.4505969821197755, "grad_norm": 0.35546875, "learning_rate": 1.8210563672233794e-05, "loss": 1.916, "step": 13966 }, { "epoch": 0.45062924597357185, "grad_norm": 0.3828125, "learning_rate": 1.820902639121567e-05, "loss": 1.9773, "step": 13967 }, { "epoch": 0.4506615098273682, "grad_norm": 0.380859375, "learning_rate": 1.8207489074874885e-05, "loss": 1.9751, "step": 13968 }, { "epoch": 0.4506937736811646, "grad_norm": 0.404296875, "learning_rate": 1.8205951723228363e-05, "loss": 1.9911, "step": 13969 }, { "epoch": 0.45072603753496093, "grad_norm": 0.357421875, "learning_rate": 1.8204414336293022e-05, "loss": 1.9971, "step": 13970 }, { "epoch": 0.4507583013887573, "grad_norm": 0.373046875, "learning_rate": 1.820287691408579e-05, "loss": 1.9486, "step": 13971 }, { "epoch": 0.4507905652425536, "grad_norm": 0.373046875, "learning_rate": 1.8201339456623584e-05, "loss": 1.983, "step": 13972 }, { "epoch": 0.45082282909634996, "grad_norm": 0.35546875, "learning_rate": 1.819980196392333e-05, "loss": 1.9777, "step": 13973 }, { "epoch": 0.4508550929501463, "grad_norm": 0.396484375, "learning_rate": 1.8198264436001952e-05, "loss": 1.9617, "step": 13974 }, { "epoch": 0.45088735680394265, "grad_norm": 0.375, "learning_rate": 1.8196726872876376e-05, "loss": 1.933, "step": 13975 }, { "epoch": 0.450919620657739, "grad_norm": 0.357421875, "learning_rate": 1.8195189274563522e-05, "loss": 1.9704, "step": 13976 }, { "epoch": 0.45095188451153534, "grad_norm": 0.373046875, "learning_rate": 1.819365164108032e-05, "loss": 1.9456, "step": 13977 }, { "epoch": 0.4509841483653317, "grad_norm": 0.37109375, "learning_rate": 1.8192113972443684e-05, "loss": 1.9673, "step": 13978 }, { "epoch": 0.451016412219128, "grad_norm": 0.37109375, "learning_rate": 1.8190576268670553e-05, "loss": 1.9838, "step": 13979 }, { "epoch": 0.45104867607292437, "grad_norm": 0.380859375, "learning_rate": 1.8189038529777843e-05, "loss": 1.9575, "step": 13980 }, { "epoch": 0.4510809399267207, "grad_norm": 0.361328125, "learning_rate": 1.818750075578249e-05, "loss": 1.9791, "step": 13981 }, { "epoch": 0.45111320378051706, "grad_norm": 0.36328125, "learning_rate": 1.818596294670141e-05, "loss": 1.9798, "step": 13982 }, { "epoch": 0.4511454676343134, "grad_norm": 0.34765625, "learning_rate": 1.818442510255154e-05, "loss": 1.9597, "step": 13983 }, { "epoch": 0.45117773148810975, "grad_norm": 0.34765625, "learning_rate": 1.81828872233498e-05, "loss": 1.9817, "step": 13984 }, { "epoch": 0.4512099953419061, "grad_norm": 0.384765625, "learning_rate": 1.818134930911312e-05, "loss": 1.9826, "step": 13985 }, { "epoch": 0.45124225919570243, "grad_norm": 0.333984375, "learning_rate": 1.8179811359858436e-05, "loss": 1.9592, "step": 13986 }, { "epoch": 0.4512745230494988, "grad_norm": 0.365234375, "learning_rate": 1.8178273375602664e-05, "loss": 1.9744, "step": 13987 }, { "epoch": 0.4513067869032951, "grad_norm": 0.357421875, "learning_rate": 1.8176735356362743e-05, "loss": 1.9666, "step": 13988 }, { "epoch": 0.4513390507570915, "grad_norm": 0.33203125, "learning_rate": 1.8175197302155592e-05, "loss": 1.9693, "step": 13989 }, { "epoch": 0.45137131461088786, "grad_norm": 0.33984375, "learning_rate": 1.817365921299815e-05, "loss": 1.9589, "step": 13990 }, { "epoch": 0.4514035784646842, "grad_norm": 0.35546875, "learning_rate": 1.8172121088907345e-05, "loss": 1.9809, "step": 13991 }, { "epoch": 0.45143584231848055, "grad_norm": 0.34375, "learning_rate": 1.8170582929900104e-05, "loss": 1.981, "step": 13992 }, { "epoch": 0.4514681061722769, "grad_norm": 0.333984375, "learning_rate": 1.816904473599336e-05, "loss": 1.9802, "step": 13993 }, { "epoch": 0.45150037002607324, "grad_norm": 0.349609375, "learning_rate": 1.8167506507204052e-05, "loss": 1.9941, "step": 13994 }, { "epoch": 0.4515326338798696, "grad_norm": 0.345703125, "learning_rate": 1.81659682435491e-05, "loss": 1.9737, "step": 13995 }, { "epoch": 0.4515648977336659, "grad_norm": 0.33984375, "learning_rate": 1.816442994504544e-05, "loss": 2.0007, "step": 13996 }, { "epoch": 0.45159716158746227, "grad_norm": 0.337890625, "learning_rate": 1.8162891611710007e-05, "loss": 1.9823, "step": 13997 }, { "epoch": 0.4516294254412586, "grad_norm": 0.32421875, "learning_rate": 1.8161353243559733e-05, "loss": 1.9327, "step": 13998 }, { "epoch": 0.45166168929505496, "grad_norm": 0.333984375, "learning_rate": 1.8159814840611547e-05, "loss": 1.9911, "step": 13999 }, { "epoch": 0.4516939531488513, "grad_norm": 0.333984375, "learning_rate": 1.815827640288239e-05, "loss": 1.9776, "step": 14000 }, { "epoch": 0.45172621700264765, "grad_norm": 0.33984375, "learning_rate": 1.815673793038919e-05, "loss": 1.9669, "step": 14001 }, { "epoch": 0.451758480856444, "grad_norm": 0.32421875, "learning_rate": 1.8155199423148884e-05, "loss": 1.9614, "step": 14002 }, { "epoch": 0.45179074471024033, "grad_norm": 0.328125, "learning_rate": 1.8153660881178407e-05, "loss": 1.957, "step": 14003 }, { "epoch": 0.4518230085640367, "grad_norm": 0.330078125, "learning_rate": 1.8152122304494692e-05, "loss": 1.9608, "step": 14004 }, { "epoch": 0.451855272417833, "grad_norm": 0.337890625, "learning_rate": 1.8150583693114676e-05, "loss": 1.9799, "step": 14005 }, { "epoch": 0.45188753627162936, "grad_norm": 0.34375, "learning_rate": 1.8149045047055298e-05, "loss": 1.9582, "step": 14006 }, { "epoch": 0.4519198001254257, "grad_norm": 0.341796875, "learning_rate": 1.8147506366333484e-05, "loss": 1.9673, "step": 14007 }, { "epoch": 0.45195206397922205, "grad_norm": 0.4140625, "learning_rate": 1.8145967650966184e-05, "loss": 1.9351, "step": 14008 }, { "epoch": 0.45198432783301845, "grad_norm": 0.357421875, "learning_rate": 1.8144428900970327e-05, "loss": 1.9644, "step": 14009 }, { "epoch": 0.4520165916868148, "grad_norm": 0.341796875, "learning_rate": 1.814289011636285e-05, "loss": 1.9522, "step": 14010 }, { "epoch": 0.45204885554061114, "grad_norm": 0.333984375, "learning_rate": 1.81413512971607e-05, "loss": 1.959, "step": 14011 }, { "epoch": 0.4520811193944075, "grad_norm": 0.33203125, "learning_rate": 1.8139812443380804e-05, "loss": 1.9505, "step": 14012 }, { "epoch": 0.4521133832482038, "grad_norm": 0.34375, "learning_rate": 1.8138273555040106e-05, "loss": 1.9, "step": 14013 }, { "epoch": 0.45214564710200017, "grad_norm": 0.353515625, "learning_rate": 1.8136734632155547e-05, "loss": 1.9874, "step": 14014 }, { "epoch": 0.4521779109557965, "grad_norm": 0.337890625, "learning_rate": 1.813519567474406e-05, "loss": 1.9468, "step": 14015 }, { "epoch": 0.45221017480959286, "grad_norm": 0.349609375, "learning_rate": 1.8133656682822585e-05, "loss": 1.9716, "step": 14016 }, { "epoch": 0.4522424386633892, "grad_norm": 0.357421875, "learning_rate": 1.8132117656408073e-05, "loss": 1.9405, "step": 14017 }, { "epoch": 0.45227470251718555, "grad_norm": 0.36328125, "learning_rate": 1.813057859551745e-05, "loss": 1.9455, "step": 14018 }, { "epoch": 0.4523069663709819, "grad_norm": 0.34375, "learning_rate": 1.812903950016767e-05, "loss": 1.9618, "step": 14019 }, { "epoch": 0.45233923022477823, "grad_norm": 0.33984375, "learning_rate": 1.812750037037566e-05, "loss": 1.9211, "step": 14020 }, { "epoch": 0.4523714940785746, "grad_norm": 0.36328125, "learning_rate": 1.8125961206158376e-05, "loss": 1.9473, "step": 14021 }, { "epoch": 0.4524037579323709, "grad_norm": 0.361328125, "learning_rate": 1.8124422007532754e-05, "loss": 1.9583, "step": 14022 }, { "epoch": 0.45243602178616726, "grad_norm": 0.36328125, "learning_rate": 1.812288277451573e-05, "loss": 1.9382, "step": 14023 }, { "epoch": 0.4524682856399636, "grad_norm": 0.47265625, "learning_rate": 1.8121343507124256e-05, "loss": 1.9762, "step": 14024 }, { "epoch": 0.45250054949375995, "grad_norm": 0.34375, "learning_rate": 1.8119804205375278e-05, "loss": 2.0045, "step": 14025 }, { "epoch": 0.4525328133475563, "grad_norm": 0.369140625, "learning_rate": 1.8118264869285723e-05, "loss": 1.9615, "step": 14026 }, { "epoch": 0.45256507720135264, "grad_norm": 0.353515625, "learning_rate": 1.8116725498872547e-05, "loss": 1.9574, "step": 14027 }, { "epoch": 0.452597341055149, "grad_norm": 0.34765625, "learning_rate": 1.81151860941527e-05, "loss": 1.97, "step": 14028 }, { "epoch": 0.4526296049089454, "grad_norm": 0.345703125, "learning_rate": 1.8113646655143113e-05, "loss": 1.9623, "step": 14029 }, { "epoch": 0.4526618687627417, "grad_norm": 0.35546875, "learning_rate": 1.811210718186074e-05, "loss": 1.9725, "step": 14030 }, { "epoch": 0.45269413261653807, "grad_norm": 0.3515625, "learning_rate": 1.8110567674322528e-05, "loss": 1.9683, "step": 14031 }, { "epoch": 0.4527263964703344, "grad_norm": 0.341796875, "learning_rate": 1.8109028132545412e-05, "loss": 1.9748, "step": 14032 }, { "epoch": 0.45275866032413076, "grad_norm": 0.353515625, "learning_rate": 1.8107488556546345e-05, "loss": 1.9884, "step": 14033 }, { "epoch": 0.4527909241779271, "grad_norm": 0.34765625, "learning_rate": 1.8105948946342275e-05, "loss": 1.9573, "step": 14034 }, { "epoch": 0.45282318803172344, "grad_norm": 0.369140625, "learning_rate": 1.8104409301950146e-05, "loss": 1.9843, "step": 14035 }, { "epoch": 0.4528554518855198, "grad_norm": 0.341796875, "learning_rate": 1.810286962338691e-05, "loss": 1.9939, "step": 14036 }, { "epoch": 0.45288771573931613, "grad_norm": 0.345703125, "learning_rate": 1.8101329910669504e-05, "loss": 1.988, "step": 14037 }, { "epoch": 0.4529199795931125, "grad_norm": 0.33984375, "learning_rate": 1.8099790163814886e-05, "loss": 1.9753, "step": 14038 }, { "epoch": 0.4529522434469088, "grad_norm": 0.34375, "learning_rate": 1.8098250382840006e-05, "loss": 1.9577, "step": 14039 }, { "epoch": 0.45298450730070516, "grad_norm": 0.34375, "learning_rate": 1.8096710567761807e-05, "loss": 1.97, "step": 14040 }, { "epoch": 0.4530167711545015, "grad_norm": 0.341796875, "learning_rate": 1.8095170718597237e-05, "loss": 1.9722, "step": 14041 }, { "epoch": 0.45304903500829785, "grad_norm": 0.357421875, "learning_rate": 1.8093630835363253e-05, "loss": 1.999, "step": 14042 }, { "epoch": 0.4530812988620942, "grad_norm": 0.333984375, "learning_rate": 1.8092090918076797e-05, "loss": 1.9965, "step": 14043 }, { "epoch": 0.45311356271589054, "grad_norm": 0.34765625, "learning_rate": 1.809055096675482e-05, "loss": 1.9743, "step": 14044 }, { "epoch": 0.4531458265696869, "grad_norm": 0.357421875, "learning_rate": 1.8089010981414277e-05, "loss": 1.9925, "step": 14045 }, { "epoch": 0.4531780904234832, "grad_norm": 0.34765625, "learning_rate": 1.8087470962072118e-05, "loss": 1.94, "step": 14046 }, { "epoch": 0.45321035427727957, "grad_norm": 0.359375, "learning_rate": 1.8085930908745296e-05, "loss": 1.9801, "step": 14047 }, { "epoch": 0.4532426181310759, "grad_norm": 0.34375, "learning_rate": 1.8084390821450755e-05, "loss": 1.9694, "step": 14048 }, { "epoch": 0.4532748819848723, "grad_norm": 0.349609375, "learning_rate": 1.8082850700205457e-05, "loss": 1.9745, "step": 14049 }, { "epoch": 0.45330714583866866, "grad_norm": 0.337890625, "learning_rate": 1.8081310545026348e-05, "loss": 1.9541, "step": 14050 }, { "epoch": 0.453339409692465, "grad_norm": 0.349609375, "learning_rate": 1.8079770355930384e-05, "loss": 1.9736, "step": 14051 }, { "epoch": 0.45337167354626134, "grad_norm": 0.359375, "learning_rate": 1.8078230132934514e-05, "loss": 1.9694, "step": 14052 }, { "epoch": 0.4534039374000577, "grad_norm": 0.359375, "learning_rate": 1.80766898760557e-05, "loss": 1.9796, "step": 14053 }, { "epoch": 0.45343620125385403, "grad_norm": 0.361328125, "learning_rate": 1.8075149585310886e-05, "loss": 1.9902, "step": 14054 }, { "epoch": 0.4534684651076504, "grad_norm": 0.3515625, "learning_rate": 1.8073609260717036e-05, "loss": 1.982, "step": 14055 }, { "epoch": 0.4535007289614467, "grad_norm": 0.337890625, "learning_rate": 1.80720689022911e-05, "loss": 1.9639, "step": 14056 }, { "epoch": 0.45353299281524306, "grad_norm": 0.34375, "learning_rate": 1.8070528510050033e-05, "loss": 1.9716, "step": 14057 }, { "epoch": 0.4535652566690394, "grad_norm": 0.3515625, "learning_rate": 1.8068988084010787e-05, "loss": 1.9872, "step": 14058 }, { "epoch": 0.45359752052283575, "grad_norm": 0.34375, "learning_rate": 1.8067447624190327e-05, "loss": 1.9605, "step": 14059 }, { "epoch": 0.4536297843766321, "grad_norm": 0.349609375, "learning_rate": 1.8065907130605598e-05, "loss": 1.9736, "step": 14060 }, { "epoch": 0.45366204823042844, "grad_norm": 0.33984375, "learning_rate": 1.8064366603273568e-05, "loss": 1.9483, "step": 14061 }, { "epoch": 0.4536943120842248, "grad_norm": 0.353515625, "learning_rate": 1.806282604221119e-05, "loss": 1.9642, "step": 14062 }, { "epoch": 0.4537265759380211, "grad_norm": 0.337890625, "learning_rate": 1.8061285447435415e-05, "loss": 1.9757, "step": 14063 }, { "epoch": 0.45375883979181747, "grad_norm": 0.345703125, "learning_rate": 1.805974481896321e-05, "loss": 1.9613, "step": 14064 }, { "epoch": 0.4537911036456138, "grad_norm": 0.341796875, "learning_rate": 1.8058204156811525e-05, "loss": 1.9868, "step": 14065 }, { "epoch": 0.45382336749941016, "grad_norm": 0.337890625, "learning_rate": 1.8056663460997324e-05, "loss": 1.9668, "step": 14066 }, { "epoch": 0.4538556313532065, "grad_norm": 0.345703125, "learning_rate": 1.805512273153757e-05, "loss": 1.9589, "step": 14067 }, { "epoch": 0.45388789520700284, "grad_norm": 0.345703125, "learning_rate": 1.805358196844921e-05, "loss": 1.9665, "step": 14068 }, { "epoch": 0.4539201590607992, "grad_norm": 0.33984375, "learning_rate": 1.805204117174921e-05, "loss": 1.9709, "step": 14069 }, { "epoch": 0.4539524229145956, "grad_norm": 0.33984375, "learning_rate": 1.8050500341454535e-05, "loss": 2.0177, "step": 14070 }, { "epoch": 0.45398468676839193, "grad_norm": 0.34375, "learning_rate": 1.804895947758214e-05, "loss": 1.9664, "step": 14071 }, { "epoch": 0.4540169506221883, "grad_norm": 0.35546875, "learning_rate": 1.8047418580148985e-05, "loss": 1.9704, "step": 14072 }, { "epoch": 0.4540492144759846, "grad_norm": 0.3359375, "learning_rate": 1.804587764917203e-05, "loss": 1.9696, "step": 14073 }, { "epoch": 0.45408147832978096, "grad_norm": 0.35546875, "learning_rate": 1.8044336684668245e-05, "loss": 1.9881, "step": 14074 }, { "epoch": 0.4541137421835773, "grad_norm": 0.3359375, "learning_rate": 1.804279568665458e-05, "loss": 1.9699, "step": 14075 }, { "epoch": 0.45414600603737365, "grad_norm": 0.337890625, "learning_rate": 1.8041254655148008e-05, "loss": 1.9696, "step": 14076 }, { "epoch": 0.45417826989117, "grad_norm": 0.349609375, "learning_rate": 1.803971359016548e-05, "loss": 1.9601, "step": 14077 }, { "epoch": 0.45421053374496634, "grad_norm": 0.3359375, "learning_rate": 1.8038172491723974e-05, "loss": 1.9542, "step": 14078 }, { "epoch": 0.4542427975987627, "grad_norm": 0.349609375, "learning_rate": 1.8036631359840437e-05, "loss": 1.9664, "step": 14079 }, { "epoch": 0.454275061452559, "grad_norm": 0.345703125, "learning_rate": 1.8035090194531842e-05, "loss": 1.9857, "step": 14080 }, { "epoch": 0.45430732530635537, "grad_norm": 0.35546875, "learning_rate": 1.8033548995815156e-05, "loss": 1.966, "step": 14081 }, { "epoch": 0.4543395891601517, "grad_norm": 0.353515625, "learning_rate": 1.8032007763707333e-05, "loss": 1.9702, "step": 14082 }, { "epoch": 0.45437185301394806, "grad_norm": 0.341796875, "learning_rate": 1.803046649822535e-05, "loss": 1.9565, "step": 14083 }, { "epoch": 0.4544041168677444, "grad_norm": 0.369140625, "learning_rate": 1.802892519938616e-05, "loss": 2.0015, "step": 14084 }, { "epoch": 0.45443638072154074, "grad_norm": 0.33984375, "learning_rate": 1.8027383867206737e-05, "loss": 1.9709, "step": 14085 }, { "epoch": 0.4544686445753371, "grad_norm": 0.35546875, "learning_rate": 1.8025842501704043e-05, "loss": 1.9956, "step": 14086 }, { "epoch": 0.45450090842913343, "grad_norm": 0.34765625, "learning_rate": 1.8024301102895047e-05, "loss": 1.9524, "step": 14087 }, { "epoch": 0.4545331722829298, "grad_norm": 0.36328125, "learning_rate": 1.8022759670796715e-05, "loss": 1.9676, "step": 14088 }, { "epoch": 0.4545654361367261, "grad_norm": 0.34765625, "learning_rate": 1.802121820542601e-05, "loss": 1.9228, "step": 14089 }, { "epoch": 0.4545976999905225, "grad_norm": 0.36328125, "learning_rate": 1.8019676706799904e-05, "loss": 1.9778, "step": 14090 }, { "epoch": 0.45462996384431886, "grad_norm": 0.369140625, "learning_rate": 1.8018135174935365e-05, "loss": 2.0091, "step": 14091 }, { "epoch": 0.4546622276981152, "grad_norm": 0.3828125, "learning_rate": 1.8016593609849353e-05, "loss": 1.9789, "step": 14092 }, { "epoch": 0.45469449155191155, "grad_norm": 0.4140625, "learning_rate": 1.801505201155885e-05, "loss": 1.9852, "step": 14093 }, { "epoch": 0.4547267554057079, "grad_norm": 0.353515625, "learning_rate": 1.8013510380080814e-05, "loss": 1.9533, "step": 14094 }, { "epoch": 0.45475901925950424, "grad_norm": 0.359375, "learning_rate": 1.801196871543222e-05, "loss": 1.9654, "step": 14095 }, { "epoch": 0.4547912831133006, "grad_norm": 0.3671875, "learning_rate": 1.8010427017630032e-05, "loss": 2.0099, "step": 14096 }, { "epoch": 0.4548235469670969, "grad_norm": 0.345703125, "learning_rate": 1.8008885286691222e-05, "loss": 1.9698, "step": 14097 }, { "epoch": 0.45485581082089327, "grad_norm": 0.35546875, "learning_rate": 1.8007343522632766e-05, "loss": 1.9324, "step": 14098 }, { "epoch": 0.4548880746746896, "grad_norm": 0.36328125, "learning_rate": 1.8005801725471626e-05, "loss": 1.9455, "step": 14099 }, { "epoch": 0.45492033852848596, "grad_norm": 0.359375, "learning_rate": 1.8004259895224776e-05, "loss": 1.9897, "step": 14100 }, { "epoch": 0.4549526023822823, "grad_norm": 0.375, "learning_rate": 1.8002718031909194e-05, "loss": 2.0035, "step": 14101 }, { "epoch": 0.45498486623607864, "grad_norm": 0.37109375, "learning_rate": 1.800117613554184e-05, "loss": 1.9841, "step": 14102 }, { "epoch": 0.455017130089875, "grad_norm": 0.34765625, "learning_rate": 1.7999634206139697e-05, "loss": 1.9951, "step": 14103 }, { "epoch": 0.45504939394367133, "grad_norm": 0.3984375, "learning_rate": 1.7998092243719732e-05, "loss": 1.9755, "step": 14104 }, { "epoch": 0.4550816577974677, "grad_norm": 0.380859375, "learning_rate": 1.799655024829892e-05, "loss": 1.9946, "step": 14105 }, { "epoch": 0.455113921651264, "grad_norm": 0.34765625, "learning_rate": 1.7995008219894227e-05, "loss": 2.0034, "step": 14106 }, { "epoch": 0.45514618550506036, "grad_norm": 0.376953125, "learning_rate": 1.7993466158522633e-05, "loss": 1.9818, "step": 14107 }, { "epoch": 0.4551784493588567, "grad_norm": 0.357421875, "learning_rate": 1.7991924064201115e-05, "loss": 1.9968, "step": 14108 }, { "epoch": 0.45521071321265305, "grad_norm": 0.3671875, "learning_rate": 1.799038193694664e-05, "loss": 1.9766, "step": 14109 }, { "epoch": 0.45524297706644945, "grad_norm": 0.353515625, "learning_rate": 1.7988839776776185e-05, "loss": 1.9727, "step": 14110 }, { "epoch": 0.4552752409202458, "grad_norm": 0.353515625, "learning_rate": 1.7987297583706726e-05, "loss": 1.9788, "step": 14111 }, { "epoch": 0.45530750477404214, "grad_norm": 0.35546875, "learning_rate": 1.7985755357755245e-05, "loss": 2.0024, "step": 14112 }, { "epoch": 0.4553397686278385, "grad_norm": 0.36328125, "learning_rate": 1.7984213098938705e-05, "loss": 1.9817, "step": 14113 }, { "epoch": 0.4553720324816348, "grad_norm": 0.365234375, "learning_rate": 1.7982670807274086e-05, "loss": 2.0091, "step": 14114 }, { "epoch": 0.45540429633543117, "grad_norm": 0.36328125, "learning_rate": 1.7981128482778374e-05, "loss": 1.9776, "step": 14115 }, { "epoch": 0.4554365601892275, "grad_norm": 0.4140625, "learning_rate": 1.7979586125468532e-05, "loss": 1.9997, "step": 14116 }, { "epoch": 0.45546882404302386, "grad_norm": 0.359375, "learning_rate": 1.7978043735361544e-05, "loss": 1.9841, "step": 14117 }, { "epoch": 0.4555010878968202, "grad_norm": 0.3671875, "learning_rate": 1.797650131247439e-05, "loss": 1.9949, "step": 14118 }, { "epoch": 0.45553335175061654, "grad_norm": 0.359375, "learning_rate": 1.7974958856824047e-05, "loss": 1.9952, "step": 14119 }, { "epoch": 0.4555656156044129, "grad_norm": 0.357421875, "learning_rate": 1.7973416368427484e-05, "loss": 2.0018, "step": 14120 }, { "epoch": 0.45559787945820923, "grad_norm": 0.390625, "learning_rate": 1.7971873847301693e-05, "loss": 2.0089, "step": 14121 }, { "epoch": 0.4556301433120056, "grad_norm": 0.3515625, "learning_rate": 1.7970331293463646e-05, "loss": 2.0016, "step": 14122 }, { "epoch": 0.4556624071658019, "grad_norm": 0.35546875, "learning_rate": 1.796878870693032e-05, "loss": 1.991, "step": 14123 }, { "epoch": 0.45569467101959826, "grad_norm": 0.337890625, "learning_rate": 1.7967246087718697e-05, "loss": 1.9627, "step": 14124 }, { "epoch": 0.4557269348733946, "grad_norm": 0.34765625, "learning_rate": 1.7965703435845758e-05, "loss": 1.9978, "step": 14125 }, { "epoch": 0.45575919872719095, "grad_norm": 0.345703125, "learning_rate": 1.7964160751328484e-05, "loss": 1.986, "step": 14126 }, { "epoch": 0.4557914625809873, "grad_norm": 0.376953125, "learning_rate": 1.7962618034183857e-05, "loss": 1.9822, "step": 14127 }, { "epoch": 0.45582372643478364, "grad_norm": 0.369140625, "learning_rate": 1.796107528442885e-05, "loss": 2.0094, "step": 14128 }, { "epoch": 0.45585599028858, "grad_norm": 0.361328125, "learning_rate": 1.795953250208046e-05, "loss": 1.9924, "step": 14129 }, { "epoch": 0.4558882541423764, "grad_norm": 0.36328125, "learning_rate": 1.795798968715565e-05, "loss": 1.9655, "step": 14130 }, { "epoch": 0.4559205179961727, "grad_norm": 0.3515625, "learning_rate": 1.795644683967142e-05, "loss": 1.9647, "step": 14131 }, { "epoch": 0.45595278184996907, "grad_norm": 0.36328125, "learning_rate": 1.795490395964474e-05, "loss": 2.0004, "step": 14132 }, { "epoch": 0.4559850457037654, "grad_norm": 0.3515625, "learning_rate": 1.7953361047092602e-05, "loss": 1.9682, "step": 14133 }, { "epoch": 0.45601730955756176, "grad_norm": 0.35546875, "learning_rate": 1.7951818102031975e-05, "loss": 1.9815, "step": 14134 }, { "epoch": 0.4560495734113581, "grad_norm": 0.3515625, "learning_rate": 1.795027512447986e-05, "loss": 2.0004, "step": 14135 }, { "epoch": 0.45608183726515444, "grad_norm": 0.35546875, "learning_rate": 1.794873211445323e-05, "loss": 1.9778, "step": 14136 }, { "epoch": 0.4561141011189508, "grad_norm": 0.34375, "learning_rate": 1.7947189071969066e-05, "loss": 1.9732, "step": 14137 }, { "epoch": 0.45614636497274713, "grad_norm": 0.34765625, "learning_rate": 1.7945645997044366e-05, "loss": 1.957, "step": 14138 }, { "epoch": 0.4561786288265435, "grad_norm": 0.3515625, "learning_rate": 1.7944102889696108e-05, "loss": 1.9999, "step": 14139 }, { "epoch": 0.4562108926803398, "grad_norm": 0.345703125, "learning_rate": 1.794255974994128e-05, "loss": 2.0112, "step": 14140 }, { "epoch": 0.45624315653413616, "grad_norm": 0.353515625, "learning_rate": 1.7941016577796857e-05, "loss": 2.0058, "step": 14141 }, { "epoch": 0.4562754203879325, "grad_norm": 0.345703125, "learning_rate": 1.793947337327984e-05, "loss": 1.9983, "step": 14142 }, { "epoch": 0.45630768424172885, "grad_norm": 0.345703125, "learning_rate": 1.7937930136407204e-05, "loss": 1.9989, "step": 14143 }, { "epoch": 0.4563399480955252, "grad_norm": 0.345703125, "learning_rate": 1.7936386867195945e-05, "loss": 1.9843, "step": 14144 }, { "epoch": 0.45637221194932154, "grad_norm": 0.35546875, "learning_rate": 1.7934843565663043e-05, "loss": 1.9728, "step": 14145 }, { "epoch": 0.4564044758031179, "grad_norm": 0.369140625, "learning_rate": 1.793330023182549e-05, "loss": 1.9902, "step": 14146 }, { "epoch": 0.4564367396569142, "grad_norm": 0.365234375, "learning_rate": 1.7931756865700272e-05, "loss": 1.9949, "step": 14147 }, { "epoch": 0.45646900351071057, "grad_norm": 0.345703125, "learning_rate": 1.7930213467304376e-05, "loss": 1.9466, "step": 14148 }, { "epoch": 0.4565012673645069, "grad_norm": 0.35546875, "learning_rate": 1.7928670036654796e-05, "loss": 1.9987, "step": 14149 }, { "epoch": 0.45653353121830326, "grad_norm": 0.34375, "learning_rate": 1.7927126573768522e-05, "loss": 1.9566, "step": 14150 }, { "epoch": 0.45656579507209966, "grad_norm": 0.349609375, "learning_rate": 1.7925583078662527e-05, "loss": 1.9878, "step": 14151 }, { "epoch": 0.456598058925896, "grad_norm": 0.3359375, "learning_rate": 1.792403955135382e-05, "loss": 1.9686, "step": 14152 }, { "epoch": 0.45663032277969234, "grad_norm": 0.337890625, "learning_rate": 1.7922495991859382e-05, "loss": 1.9743, "step": 14153 }, { "epoch": 0.4566625866334887, "grad_norm": 0.34765625, "learning_rate": 1.7920952400196202e-05, "loss": 1.9871, "step": 14154 }, { "epoch": 0.45669485048728503, "grad_norm": 0.361328125, "learning_rate": 1.7919408776381274e-05, "loss": 1.9622, "step": 14155 }, { "epoch": 0.4567271143410814, "grad_norm": 0.359375, "learning_rate": 1.7917865120431593e-05, "loss": 2.0024, "step": 14156 }, { "epoch": 0.4567593781948777, "grad_norm": 0.373046875, "learning_rate": 1.791632143236414e-05, "loss": 2.0128, "step": 14157 }, { "epoch": 0.45679164204867406, "grad_norm": 0.361328125, "learning_rate": 1.7914777712195914e-05, "loss": 2.0032, "step": 14158 }, { "epoch": 0.4568239059024704, "grad_norm": 0.34765625, "learning_rate": 1.7913233959943903e-05, "loss": 1.992, "step": 14159 }, { "epoch": 0.45685616975626675, "grad_norm": 0.369140625, "learning_rate": 1.791169017562511e-05, "loss": 2.0107, "step": 14160 }, { "epoch": 0.4568884336100631, "grad_norm": 0.3671875, "learning_rate": 1.7910146359256518e-05, "loss": 2.0105, "step": 14161 }, { "epoch": 0.45692069746385944, "grad_norm": 0.373046875, "learning_rate": 1.790860251085512e-05, "loss": 1.9539, "step": 14162 }, { "epoch": 0.4569529613176558, "grad_norm": 0.369140625, "learning_rate": 1.7907058630437917e-05, "loss": 1.9573, "step": 14163 }, { "epoch": 0.4569852251714521, "grad_norm": 0.396484375, "learning_rate": 1.79055147180219e-05, "loss": 1.9797, "step": 14164 }, { "epoch": 0.45701748902524847, "grad_norm": 0.3671875, "learning_rate": 1.790397077362405e-05, "loss": 2.0046, "step": 14165 }, { "epoch": 0.4570497528790448, "grad_norm": 0.380859375, "learning_rate": 1.790242679726138e-05, "loss": 1.9934, "step": 14166 }, { "epoch": 0.45708201673284116, "grad_norm": 0.37109375, "learning_rate": 1.7900882788950882e-05, "loss": 1.9892, "step": 14167 }, { "epoch": 0.4571142805866375, "grad_norm": 0.380859375, "learning_rate": 1.7899338748709536e-05, "loss": 1.9935, "step": 14168 }, { "epoch": 0.45714654444043384, "grad_norm": 0.37109375, "learning_rate": 1.789779467655436e-05, "loss": 1.9966, "step": 14169 }, { "epoch": 0.4571788082942302, "grad_norm": 0.380859375, "learning_rate": 1.7896250572502335e-05, "loss": 2.0165, "step": 14170 }, { "epoch": 0.4572110721480266, "grad_norm": 0.373046875, "learning_rate": 1.7894706436570464e-05, "loss": 1.9948, "step": 14171 }, { "epoch": 0.45724333600182293, "grad_norm": 0.353515625, "learning_rate": 1.7893162268775734e-05, "loss": 1.99, "step": 14172 }, { "epoch": 0.4572755998556193, "grad_norm": 0.3828125, "learning_rate": 1.7891618069135157e-05, "loss": 1.9975, "step": 14173 }, { "epoch": 0.4573078637094156, "grad_norm": 0.3515625, "learning_rate": 1.7890073837665718e-05, "loss": 2.0014, "step": 14174 }, { "epoch": 0.45734012756321196, "grad_norm": 0.380859375, "learning_rate": 1.788852957438442e-05, "loss": 1.9658, "step": 14175 }, { "epoch": 0.4573723914170083, "grad_norm": 0.380859375, "learning_rate": 1.7886985279308263e-05, "loss": 1.9733, "step": 14176 }, { "epoch": 0.45740465527080465, "grad_norm": 0.345703125, "learning_rate": 1.7885440952454244e-05, "loss": 1.9744, "step": 14177 }, { "epoch": 0.457436919124601, "grad_norm": 0.390625, "learning_rate": 1.7883896593839355e-05, "loss": 1.9762, "step": 14178 }, { "epoch": 0.45746918297839734, "grad_norm": 0.359375, "learning_rate": 1.7882352203480606e-05, "loss": 1.985, "step": 14179 }, { "epoch": 0.4575014468321937, "grad_norm": 0.3515625, "learning_rate": 1.7880807781394994e-05, "loss": 1.991, "step": 14180 }, { "epoch": 0.45753371068599, "grad_norm": 0.34375, "learning_rate": 1.7879263327599515e-05, "loss": 1.9963, "step": 14181 }, { "epoch": 0.45756597453978637, "grad_norm": 0.349609375, "learning_rate": 1.7877718842111167e-05, "loss": 1.9777, "step": 14182 }, { "epoch": 0.4575982383935827, "grad_norm": 0.375, "learning_rate": 1.7876174324946957e-05, "loss": 1.9969, "step": 14183 }, { "epoch": 0.45763050224737906, "grad_norm": 0.3671875, "learning_rate": 1.7874629776123885e-05, "loss": 1.9865, "step": 14184 }, { "epoch": 0.4576627661011754, "grad_norm": 0.3671875, "learning_rate": 1.7873085195658948e-05, "loss": 1.9736, "step": 14185 }, { "epoch": 0.45769502995497174, "grad_norm": 0.361328125, "learning_rate": 1.787154058356915e-05, "loss": 2.0014, "step": 14186 }, { "epoch": 0.4577272938087681, "grad_norm": 0.359375, "learning_rate": 1.78699959398715e-05, "loss": 1.9719, "step": 14187 }, { "epoch": 0.45775955766256443, "grad_norm": 0.376953125, "learning_rate": 1.7868451264582982e-05, "loss": 1.9804, "step": 14188 }, { "epoch": 0.4577918215163608, "grad_norm": 0.3671875, "learning_rate": 1.7866906557720617e-05, "loss": 1.9623, "step": 14189 }, { "epoch": 0.4578240853701571, "grad_norm": 0.359375, "learning_rate": 1.7865361819301404e-05, "loss": 1.9872, "step": 14190 }, { "epoch": 0.4578563492239535, "grad_norm": 0.373046875, "learning_rate": 1.786381704934234e-05, "loss": 2.0039, "step": 14191 }, { "epoch": 0.45788861307774986, "grad_norm": 0.345703125, "learning_rate": 1.786227224786043e-05, "loss": 1.9718, "step": 14192 }, { "epoch": 0.4579208769315462, "grad_norm": 0.357421875, "learning_rate": 1.7860727414872682e-05, "loss": 1.9907, "step": 14193 }, { "epoch": 0.45795314078534255, "grad_norm": 0.353515625, "learning_rate": 1.78591825503961e-05, "loss": 1.998, "step": 14194 }, { "epoch": 0.4579854046391389, "grad_norm": 0.345703125, "learning_rate": 1.7857637654447692e-05, "loss": 1.979, "step": 14195 }, { "epoch": 0.45801766849293524, "grad_norm": 0.3515625, "learning_rate": 1.785609272704445e-05, "loss": 1.9703, "step": 14196 }, { "epoch": 0.4580499323467316, "grad_norm": 0.349609375, "learning_rate": 1.7854547768203394e-05, "loss": 1.9834, "step": 14197 }, { "epoch": 0.4580821962005279, "grad_norm": 0.341796875, "learning_rate": 1.7853002777941526e-05, "loss": 2.0048, "step": 14198 }, { "epoch": 0.45811446005432427, "grad_norm": 0.341796875, "learning_rate": 1.7851457756275846e-05, "loss": 1.9928, "step": 14199 }, { "epoch": 0.4581467239081206, "grad_norm": 0.345703125, "learning_rate": 1.7849912703223364e-05, "loss": 1.9825, "step": 14200 }, { "epoch": 0.45817898776191696, "grad_norm": 0.412109375, "learning_rate": 1.7848367618801095e-05, "loss": 2.009, "step": 14201 }, { "epoch": 0.4582112516157133, "grad_norm": 0.33984375, "learning_rate": 1.7846822503026027e-05, "loss": 1.9932, "step": 14202 }, { "epoch": 0.45824351546950964, "grad_norm": 0.3359375, "learning_rate": 1.7845277355915188e-05, "loss": 1.9637, "step": 14203 }, { "epoch": 0.458275779323306, "grad_norm": 0.349609375, "learning_rate": 1.7843732177485574e-05, "loss": 1.9894, "step": 14204 }, { "epoch": 0.45830804317710233, "grad_norm": 0.357421875, "learning_rate": 1.7842186967754195e-05, "loss": 1.9963, "step": 14205 }, { "epoch": 0.4583403070308987, "grad_norm": 0.330078125, "learning_rate": 1.7840641726738062e-05, "loss": 2.0036, "step": 14206 }, { "epoch": 0.458372570884695, "grad_norm": 0.359375, "learning_rate": 1.7839096454454185e-05, "loss": 1.9977, "step": 14207 }, { "epoch": 0.45840483473849136, "grad_norm": 0.349609375, "learning_rate": 1.7837551150919566e-05, "loss": 1.987, "step": 14208 }, { "epoch": 0.4584370985922877, "grad_norm": 0.33203125, "learning_rate": 1.7836005816151227e-05, "loss": 1.9962, "step": 14209 }, { "epoch": 0.45846936244608405, "grad_norm": 0.34765625, "learning_rate": 1.783446045016616e-05, "loss": 1.988, "step": 14210 }, { "epoch": 0.45850162629988045, "grad_norm": 0.33984375, "learning_rate": 1.7832915052981397e-05, "loss": 1.9817, "step": 14211 }, { "epoch": 0.4585338901536768, "grad_norm": 0.34375, "learning_rate": 1.7831369624613932e-05, "loss": 1.9771, "step": 14212 }, { "epoch": 0.45856615400747314, "grad_norm": 0.353515625, "learning_rate": 1.7829824165080778e-05, "loss": 1.9717, "step": 14213 }, { "epoch": 0.4585984178612695, "grad_norm": 0.341796875, "learning_rate": 1.7828278674398955e-05, "loss": 1.9962, "step": 14214 }, { "epoch": 0.4586306817150658, "grad_norm": 0.369140625, "learning_rate": 1.782673315258547e-05, "loss": 1.9955, "step": 14215 }, { "epoch": 0.45866294556886217, "grad_norm": 0.345703125, "learning_rate": 1.7825187599657332e-05, "loss": 2.0247, "step": 14216 }, { "epoch": 0.4586952094226585, "grad_norm": 0.34765625, "learning_rate": 1.7823642015631554e-05, "loss": 1.9687, "step": 14217 }, { "epoch": 0.45872747327645486, "grad_norm": 0.3359375, "learning_rate": 1.7822096400525156e-05, "loss": 1.9642, "step": 14218 }, { "epoch": 0.4587597371302512, "grad_norm": 0.341796875, "learning_rate": 1.782055075435514e-05, "loss": 1.9863, "step": 14219 }, { "epoch": 0.45879200098404754, "grad_norm": 0.3515625, "learning_rate": 1.7819005077138522e-05, "loss": 1.928, "step": 14220 }, { "epoch": 0.4588242648378439, "grad_norm": 0.37890625, "learning_rate": 1.781745936889233e-05, "loss": 1.9866, "step": 14221 }, { "epoch": 0.45885652869164023, "grad_norm": 0.333984375, "learning_rate": 1.7815913629633555e-05, "loss": 2.0088, "step": 14222 }, { "epoch": 0.4588887925454366, "grad_norm": 0.34375, "learning_rate": 1.7814367859379225e-05, "loss": 1.9757, "step": 14223 }, { "epoch": 0.4589210563992329, "grad_norm": 0.3515625, "learning_rate": 1.781282205814636e-05, "loss": 2.0101, "step": 14224 }, { "epoch": 0.45895332025302926, "grad_norm": 0.36328125, "learning_rate": 1.781127622595196e-05, "loss": 2.0062, "step": 14225 }, { "epoch": 0.4589855841068256, "grad_norm": 0.359375, "learning_rate": 1.7809730362813053e-05, "loss": 1.9882, "step": 14226 }, { "epoch": 0.45901784796062195, "grad_norm": 0.345703125, "learning_rate": 1.7808184468746643e-05, "loss": 1.9896, "step": 14227 }, { "epoch": 0.4590501118144183, "grad_norm": 0.35546875, "learning_rate": 1.780663854376976e-05, "loss": 1.9947, "step": 14228 }, { "epoch": 0.45908237566821464, "grad_norm": 0.341796875, "learning_rate": 1.780509258789941e-05, "loss": 2.0051, "step": 14229 }, { "epoch": 0.459114639522011, "grad_norm": 0.357421875, "learning_rate": 1.780354660115261e-05, "loss": 2.0038, "step": 14230 }, { "epoch": 0.4591469033758073, "grad_norm": 0.40625, "learning_rate": 1.7802000583546385e-05, "loss": 2.0098, "step": 14231 }, { "epoch": 0.4591791672296037, "grad_norm": 0.353515625, "learning_rate": 1.780045453509775e-05, "loss": 1.9872, "step": 14232 }, { "epoch": 0.45921143108340007, "grad_norm": 0.373046875, "learning_rate": 1.7798908455823715e-05, "loss": 1.9925, "step": 14233 }, { "epoch": 0.4592436949371964, "grad_norm": 0.380859375, "learning_rate": 1.7797362345741308e-05, "loss": 1.9929, "step": 14234 }, { "epoch": 0.45927595879099276, "grad_norm": 0.3671875, "learning_rate": 1.7795816204867543e-05, "loss": 1.9776, "step": 14235 }, { "epoch": 0.4593082226447891, "grad_norm": 0.388671875, "learning_rate": 1.7794270033219434e-05, "loss": 2.0004, "step": 14236 }, { "epoch": 0.45934048649858544, "grad_norm": 0.373046875, "learning_rate": 1.7792723830814007e-05, "loss": 2.0075, "step": 14237 }, { "epoch": 0.4593727503523818, "grad_norm": 0.359375, "learning_rate": 1.7791177597668284e-05, "loss": 1.9355, "step": 14238 }, { "epoch": 0.45940501420617813, "grad_norm": 0.359375, "learning_rate": 1.7789631333799277e-05, "loss": 1.97, "step": 14239 }, { "epoch": 0.4594372780599745, "grad_norm": 0.38671875, "learning_rate": 1.7788085039224008e-05, "loss": 1.9801, "step": 14240 }, { "epoch": 0.4594695419137708, "grad_norm": 0.369140625, "learning_rate": 1.77865387139595e-05, "loss": 1.9656, "step": 14241 }, { "epoch": 0.45950180576756716, "grad_norm": 0.38671875, "learning_rate": 1.7784992358022776e-05, "loss": 1.9514, "step": 14242 }, { "epoch": 0.4595340696213635, "grad_norm": 0.369140625, "learning_rate": 1.778344597143085e-05, "loss": 1.9869, "step": 14243 }, { "epoch": 0.45956633347515985, "grad_norm": 0.357421875, "learning_rate": 1.7781899554200746e-05, "loss": 1.9466, "step": 14244 }, { "epoch": 0.4595985973289562, "grad_norm": 0.361328125, "learning_rate": 1.7780353106349494e-05, "loss": 1.9755, "step": 14245 }, { "epoch": 0.45963086118275254, "grad_norm": 0.431640625, "learning_rate": 1.7778806627894105e-05, "loss": 1.8821, "step": 14246 }, { "epoch": 0.4596631250365489, "grad_norm": 0.373046875, "learning_rate": 1.7777260118851608e-05, "loss": 1.9546, "step": 14247 }, { "epoch": 0.4596953888903452, "grad_norm": 0.365234375, "learning_rate": 1.777571357923902e-05, "loss": 1.9682, "step": 14248 }, { "epoch": 0.45972765274414157, "grad_norm": 0.34765625, "learning_rate": 1.777416700907338e-05, "loss": 1.9435, "step": 14249 }, { "epoch": 0.4597599165979379, "grad_norm": 0.37109375, "learning_rate": 1.777262040837169e-05, "loss": 1.9519, "step": 14250 }, { "epoch": 0.45979218045173426, "grad_norm": 0.357421875, "learning_rate": 1.7771073777150984e-05, "loss": 1.9763, "step": 14251 }, { "epoch": 0.45982444430553066, "grad_norm": 0.341796875, "learning_rate": 1.7769527115428287e-05, "loss": 1.9699, "step": 14252 }, { "epoch": 0.459856708159327, "grad_norm": 0.35546875, "learning_rate": 1.776798042322062e-05, "loss": 1.9588, "step": 14253 }, { "epoch": 0.45988897201312334, "grad_norm": 0.37109375, "learning_rate": 1.7766433700545013e-05, "loss": 2.0009, "step": 14254 }, { "epoch": 0.4599212358669197, "grad_norm": 0.373046875, "learning_rate": 1.776488694741849e-05, "loss": 1.9508, "step": 14255 }, { "epoch": 0.45995349972071603, "grad_norm": 0.333984375, "learning_rate": 1.7763340163858072e-05, "loss": 1.9724, "step": 14256 }, { "epoch": 0.4599857635745124, "grad_norm": 0.341796875, "learning_rate": 1.776179334988079e-05, "loss": 1.9652, "step": 14257 }, { "epoch": 0.4600180274283087, "grad_norm": 0.36328125, "learning_rate": 1.7760246505503665e-05, "loss": 1.9824, "step": 14258 }, { "epoch": 0.46005029128210506, "grad_norm": 0.34375, "learning_rate": 1.775869963074373e-05, "loss": 1.9863, "step": 14259 }, { "epoch": 0.4600825551359014, "grad_norm": 0.357421875, "learning_rate": 1.775715272561801e-05, "loss": 1.967, "step": 14260 }, { "epoch": 0.46011481898969775, "grad_norm": 0.3515625, "learning_rate": 1.7755605790143525e-05, "loss": 1.9797, "step": 14261 }, { "epoch": 0.4601470828434941, "grad_norm": 0.3671875, "learning_rate": 1.7754058824337308e-05, "loss": 1.979, "step": 14262 }, { "epoch": 0.46017934669729044, "grad_norm": 0.35546875, "learning_rate": 1.7752511828216395e-05, "loss": 1.9783, "step": 14263 }, { "epoch": 0.4602116105510868, "grad_norm": 0.345703125, "learning_rate": 1.7750964801797798e-05, "loss": 1.9744, "step": 14264 }, { "epoch": 0.4602438744048831, "grad_norm": 0.369140625, "learning_rate": 1.7749417745098556e-05, "loss": 1.931, "step": 14265 }, { "epoch": 0.46027613825867947, "grad_norm": 0.353515625, "learning_rate": 1.77478706581357e-05, "loss": 1.9748, "step": 14266 }, { "epoch": 0.4603084021124758, "grad_norm": 0.365234375, "learning_rate": 1.7746323540926254e-05, "loss": 1.9712, "step": 14267 }, { "epoch": 0.46034066596627216, "grad_norm": 0.369140625, "learning_rate": 1.7744776393487246e-05, "loss": 1.9729, "step": 14268 }, { "epoch": 0.4603729298200685, "grad_norm": 0.3671875, "learning_rate": 1.7743229215835714e-05, "loss": 1.9715, "step": 14269 }, { "epoch": 0.46040519367386484, "grad_norm": 0.369140625, "learning_rate": 1.7741682007988677e-05, "loss": 1.9599, "step": 14270 }, { "epoch": 0.4604374575276612, "grad_norm": 0.33984375, "learning_rate": 1.7740134769963174e-05, "loss": 1.9514, "step": 14271 }, { "epoch": 0.4604697213814576, "grad_norm": 0.375, "learning_rate": 1.773858750177623e-05, "loss": 1.9555, "step": 14272 }, { "epoch": 0.46050198523525393, "grad_norm": 0.349609375, "learning_rate": 1.7737040203444882e-05, "loss": 1.952, "step": 14273 }, { "epoch": 0.4605342490890503, "grad_norm": 0.359375, "learning_rate": 1.773549287498616e-05, "loss": 1.9248, "step": 14274 }, { "epoch": 0.4605665129428466, "grad_norm": 0.349609375, "learning_rate": 1.7733945516417095e-05, "loss": 1.9768, "step": 14275 }, { "epoch": 0.46059877679664296, "grad_norm": 0.349609375, "learning_rate": 1.7732398127754718e-05, "loss": 1.9502, "step": 14276 }, { "epoch": 0.4606310406504393, "grad_norm": 0.33984375, "learning_rate": 1.773085070901606e-05, "loss": 1.9241, "step": 14277 }, { "epoch": 0.46066330450423565, "grad_norm": 0.349609375, "learning_rate": 1.7729303260218162e-05, "loss": 1.9466, "step": 14278 }, { "epoch": 0.460695568358032, "grad_norm": 0.33984375, "learning_rate": 1.7727755781378045e-05, "loss": 1.9051, "step": 14279 }, { "epoch": 0.46072783221182834, "grad_norm": 0.37109375, "learning_rate": 1.7726208272512757e-05, "loss": 1.9406, "step": 14280 }, { "epoch": 0.4607600960656247, "grad_norm": 0.36328125, "learning_rate": 1.7724660733639316e-05, "loss": 1.944, "step": 14281 }, { "epoch": 0.460792359919421, "grad_norm": 0.349609375, "learning_rate": 1.7723113164774768e-05, "loss": 1.9781, "step": 14282 }, { "epoch": 0.46082462377321737, "grad_norm": 0.3515625, "learning_rate": 1.7721565565936144e-05, "loss": 1.9335, "step": 14283 }, { "epoch": 0.4608568876270137, "grad_norm": 0.3671875, "learning_rate": 1.772001793714048e-05, "loss": 1.9378, "step": 14284 }, { "epoch": 0.46088915148081006, "grad_norm": 0.341796875, "learning_rate": 1.771847027840481e-05, "loss": 1.9503, "step": 14285 }, { "epoch": 0.4609214153346064, "grad_norm": 0.375, "learning_rate": 1.7716922589746168e-05, "loss": 1.9629, "step": 14286 }, { "epoch": 0.46095367918840274, "grad_norm": 0.3515625, "learning_rate": 1.771537487118159e-05, "loss": 1.9265, "step": 14287 }, { "epoch": 0.4609859430421991, "grad_norm": 0.3515625, "learning_rate": 1.771382712272812e-05, "loss": 1.975, "step": 14288 }, { "epoch": 0.46101820689599543, "grad_norm": 0.34765625, "learning_rate": 1.771227934440278e-05, "loss": 1.971, "step": 14289 }, { "epoch": 0.4610504707497918, "grad_norm": 0.357421875, "learning_rate": 1.7710731536222614e-05, "loss": 1.9408, "step": 14290 }, { "epoch": 0.4610827346035881, "grad_norm": 0.35546875, "learning_rate": 1.7709183698204667e-05, "loss": 1.9676, "step": 14291 }, { "epoch": 0.4611149984573845, "grad_norm": 0.3515625, "learning_rate": 1.7707635830365963e-05, "loss": 1.9743, "step": 14292 }, { "epoch": 0.46114726231118086, "grad_norm": 0.35546875, "learning_rate": 1.7706087932723548e-05, "loss": 1.9324, "step": 14293 }, { "epoch": 0.4611795261649772, "grad_norm": 0.34765625, "learning_rate": 1.7704540005294463e-05, "loss": 1.9387, "step": 14294 }, { "epoch": 0.46121179001877355, "grad_norm": 0.365234375, "learning_rate": 1.7702992048095735e-05, "loss": 1.9696, "step": 14295 }, { "epoch": 0.4612440538725699, "grad_norm": 0.34765625, "learning_rate": 1.770144406114441e-05, "loss": 1.9605, "step": 14296 }, { "epoch": 0.46127631772636624, "grad_norm": 0.345703125, "learning_rate": 1.7699896044457536e-05, "loss": 1.9405, "step": 14297 }, { "epoch": 0.4613085815801626, "grad_norm": 0.333984375, "learning_rate": 1.7698347998052133e-05, "loss": 1.9244, "step": 14298 }, { "epoch": 0.4613408454339589, "grad_norm": 0.353515625, "learning_rate": 1.7696799921945253e-05, "loss": 1.923, "step": 14299 }, { "epoch": 0.46137310928775527, "grad_norm": 0.3515625, "learning_rate": 1.769525181615394e-05, "loss": 1.9274, "step": 14300 }, { "epoch": 0.4614053731415516, "grad_norm": 0.37109375, "learning_rate": 1.7693703680695224e-05, "loss": 1.8993, "step": 14301 }, { "epoch": 0.46143763699534796, "grad_norm": 0.349609375, "learning_rate": 1.7692155515586153e-05, "loss": 1.9209, "step": 14302 }, { "epoch": 0.4614699008491443, "grad_norm": 0.388671875, "learning_rate": 1.769060732084376e-05, "loss": 1.9493, "step": 14303 }, { "epoch": 0.46150216470294064, "grad_norm": 0.400390625, "learning_rate": 1.7689059096485097e-05, "loss": 1.9167, "step": 14304 }, { "epoch": 0.461534428556737, "grad_norm": 0.359375, "learning_rate": 1.76875108425272e-05, "loss": 1.8918, "step": 14305 }, { "epoch": 0.46156669241053333, "grad_norm": 0.416015625, "learning_rate": 1.7685962558987107e-05, "loss": 1.9063, "step": 14306 }, { "epoch": 0.4615989562643297, "grad_norm": 0.380859375, "learning_rate": 1.768441424588187e-05, "loss": 1.9518, "step": 14307 }, { "epoch": 0.461631220118126, "grad_norm": 0.38671875, "learning_rate": 1.7682865903228525e-05, "loss": 1.9542, "step": 14308 }, { "epoch": 0.46166348397192236, "grad_norm": 0.3671875, "learning_rate": 1.7681317531044117e-05, "loss": 1.9025, "step": 14309 }, { "epoch": 0.4616957478257187, "grad_norm": 0.36328125, "learning_rate": 1.7679769129345687e-05, "loss": 1.9483, "step": 14310 }, { "epoch": 0.46172801167951505, "grad_norm": 0.3828125, "learning_rate": 1.7678220698150286e-05, "loss": 1.9628, "step": 14311 }, { "epoch": 0.46176027553331145, "grad_norm": 0.3671875, "learning_rate": 1.7676672237474946e-05, "loss": 1.9383, "step": 14312 }, { "epoch": 0.4617925393871078, "grad_norm": 0.365234375, "learning_rate": 1.7675123747336725e-05, "loss": 1.9651, "step": 14313 }, { "epoch": 0.46182480324090414, "grad_norm": 0.36328125, "learning_rate": 1.767357522775266e-05, "loss": 1.9417, "step": 14314 }, { "epoch": 0.4618570670947005, "grad_norm": 0.365234375, "learning_rate": 1.7672026678739793e-05, "loss": 1.9625, "step": 14315 }, { "epoch": 0.4618893309484968, "grad_norm": 0.34765625, "learning_rate": 1.767047810031518e-05, "loss": 1.9191, "step": 14316 }, { "epoch": 0.46192159480229317, "grad_norm": 0.37109375, "learning_rate": 1.766892949249585e-05, "loss": 1.9439, "step": 14317 }, { "epoch": 0.4619538586560895, "grad_norm": 0.353515625, "learning_rate": 1.7667380855298866e-05, "loss": 1.9314, "step": 14318 }, { "epoch": 0.46198612250988585, "grad_norm": 0.365234375, "learning_rate": 1.766583218874127e-05, "loss": 1.9615, "step": 14319 }, { "epoch": 0.4620183863636822, "grad_norm": 0.365234375, "learning_rate": 1.7664283492840096e-05, "loss": 1.9643, "step": 14320 }, { "epoch": 0.46205065021747854, "grad_norm": 0.36328125, "learning_rate": 1.7662734767612403e-05, "loss": 1.9667, "step": 14321 }, { "epoch": 0.4620829140712749, "grad_norm": 0.36328125, "learning_rate": 1.7661186013075242e-05, "loss": 1.9388, "step": 14322 }, { "epoch": 0.46211517792507123, "grad_norm": 0.345703125, "learning_rate": 1.765963722924565e-05, "loss": 1.9596, "step": 14323 }, { "epoch": 0.4621474417788676, "grad_norm": 0.341796875, "learning_rate": 1.765808841614068e-05, "loss": 1.9126, "step": 14324 }, { "epoch": 0.4621797056326639, "grad_norm": 0.349609375, "learning_rate": 1.7656539573777384e-05, "loss": 1.9519, "step": 14325 }, { "epoch": 0.46221196948646026, "grad_norm": 0.345703125, "learning_rate": 1.76549907021728e-05, "loss": 1.9506, "step": 14326 }, { "epoch": 0.4622442333402566, "grad_norm": 0.359375, "learning_rate": 1.7653441801343985e-05, "loss": 1.9236, "step": 14327 }, { "epoch": 0.46227649719405295, "grad_norm": 0.353515625, "learning_rate": 1.7651892871307984e-05, "loss": 1.9323, "step": 14328 }, { "epoch": 0.4623087610478493, "grad_norm": 0.3671875, "learning_rate": 1.7650343912081852e-05, "loss": 1.9556, "step": 14329 }, { "epoch": 0.46234102490164564, "grad_norm": 0.365234375, "learning_rate": 1.7648794923682636e-05, "loss": 1.965, "step": 14330 }, { "epoch": 0.462373288755442, "grad_norm": 0.341796875, "learning_rate": 1.7647245906127382e-05, "loss": 1.9572, "step": 14331 }, { "epoch": 0.4624055526092383, "grad_norm": 0.359375, "learning_rate": 1.7645696859433146e-05, "loss": 1.9765, "step": 14332 }, { "epoch": 0.4624378164630347, "grad_norm": 0.349609375, "learning_rate": 1.764414778361698e-05, "loss": 1.9602, "step": 14333 }, { "epoch": 0.46247008031683107, "grad_norm": 0.34765625, "learning_rate": 1.764259867869593e-05, "loss": 1.9507, "step": 14334 }, { "epoch": 0.4625023441706274, "grad_norm": 0.380859375, "learning_rate": 1.7641049544687047e-05, "loss": 1.9219, "step": 14335 }, { "epoch": 0.46253460802442375, "grad_norm": 0.376953125, "learning_rate": 1.763950038160739e-05, "loss": 1.9402, "step": 14336 }, { "epoch": 0.4625668718782201, "grad_norm": 0.357421875, "learning_rate": 1.7637951189474e-05, "loss": 1.933, "step": 14337 }, { "epoch": 0.46259913573201644, "grad_norm": 0.396484375, "learning_rate": 1.7636401968303944e-05, "loss": 1.9374, "step": 14338 }, { "epoch": 0.4626313995858128, "grad_norm": 0.35546875, "learning_rate": 1.7634852718114263e-05, "loss": 1.9699, "step": 14339 }, { "epoch": 0.46266366343960913, "grad_norm": 0.349609375, "learning_rate": 1.7633303438922008e-05, "loss": 1.9661, "step": 14340 }, { "epoch": 0.4626959272934055, "grad_norm": 0.373046875, "learning_rate": 1.7631754130744242e-05, "loss": 1.9435, "step": 14341 }, { "epoch": 0.4627281911472018, "grad_norm": 0.341796875, "learning_rate": 1.763020479359802e-05, "loss": 1.9543, "step": 14342 }, { "epoch": 0.46276045500099816, "grad_norm": 0.3671875, "learning_rate": 1.7628655427500384e-05, "loss": 1.9481, "step": 14343 }, { "epoch": 0.4627927188547945, "grad_norm": 0.34375, "learning_rate": 1.7627106032468395e-05, "loss": 1.9232, "step": 14344 }, { "epoch": 0.46282498270859085, "grad_norm": 0.380859375, "learning_rate": 1.762555660851911e-05, "loss": 1.9271, "step": 14345 }, { "epoch": 0.4628572465623872, "grad_norm": 0.341796875, "learning_rate": 1.762400715566958e-05, "loss": 1.9715, "step": 14346 }, { "epoch": 0.46288951041618354, "grad_norm": 0.36328125, "learning_rate": 1.7622457673936864e-05, "loss": 1.9529, "step": 14347 }, { "epoch": 0.4629217742699799, "grad_norm": 0.359375, "learning_rate": 1.7620908163338013e-05, "loss": 1.9486, "step": 14348 }, { "epoch": 0.4629540381237762, "grad_norm": 0.400390625, "learning_rate": 1.7619358623890084e-05, "loss": 1.9569, "step": 14349 }, { "epoch": 0.46298630197757257, "grad_norm": 0.3515625, "learning_rate": 1.761780905561014e-05, "loss": 1.9724, "step": 14350 }, { "epoch": 0.4630185658313689, "grad_norm": 0.349609375, "learning_rate": 1.7616259458515225e-05, "loss": 1.9485, "step": 14351 }, { "epoch": 0.46305082968516525, "grad_norm": 0.3671875, "learning_rate": 1.7614709832622403e-05, "loss": 1.9483, "step": 14352 }, { "epoch": 0.46308309353896165, "grad_norm": 0.380859375, "learning_rate": 1.7613160177948737e-05, "loss": 1.9448, "step": 14353 }, { "epoch": 0.463115357392758, "grad_norm": 0.36328125, "learning_rate": 1.7611610494511272e-05, "loss": 1.9453, "step": 14354 }, { "epoch": 0.46314762124655434, "grad_norm": 0.373046875, "learning_rate": 1.7610060782327074e-05, "loss": 1.9458, "step": 14355 }, { "epoch": 0.4631798851003507, "grad_norm": 0.337890625, "learning_rate": 1.7608511041413202e-05, "loss": 1.9779, "step": 14356 }, { "epoch": 0.46321214895414703, "grad_norm": 0.375, "learning_rate": 1.7606961271786712e-05, "loss": 1.9609, "step": 14357 }, { "epoch": 0.4632444128079434, "grad_norm": 0.34765625, "learning_rate": 1.7605411473464657e-05, "loss": 1.9363, "step": 14358 }, { "epoch": 0.4632766766617397, "grad_norm": 0.388671875, "learning_rate": 1.760386164646411e-05, "loss": 1.9486, "step": 14359 }, { "epoch": 0.46330894051553606, "grad_norm": 0.37890625, "learning_rate": 1.7602311790802115e-05, "loss": 1.9729, "step": 14360 }, { "epoch": 0.4633412043693324, "grad_norm": 0.36328125, "learning_rate": 1.7600761906495744e-05, "loss": 1.9905, "step": 14361 }, { "epoch": 0.46337346822312875, "grad_norm": 0.384765625, "learning_rate": 1.7599211993562047e-05, "loss": 1.9346, "step": 14362 }, { "epoch": 0.4634057320769251, "grad_norm": 0.359375, "learning_rate": 1.7597662052018088e-05, "loss": 1.9744, "step": 14363 }, { "epoch": 0.46343799593072144, "grad_norm": 0.390625, "learning_rate": 1.759611208188094e-05, "loss": 1.9255, "step": 14364 }, { "epoch": 0.4634702597845178, "grad_norm": 0.361328125, "learning_rate": 1.759456208316764e-05, "loss": 1.9911, "step": 14365 }, { "epoch": 0.4635025236383141, "grad_norm": 0.361328125, "learning_rate": 1.7593012055895262e-05, "loss": 1.9637, "step": 14366 }, { "epoch": 0.46353478749211047, "grad_norm": 0.369140625, "learning_rate": 1.7591462000080877e-05, "loss": 1.97, "step": 14367 }, { "epoch": 0.4635670513459068, "grad_norm": 0.404296875, "learning_rate": 1.758991191574153e-05, "loss": 1.9854, "step": 14368 }, { "epoch": 0.46359931519970315, "grad_norm": 0.390625, "learning_rate": 1.7588361802894293e-05, "loss": 1.9955, "step": 14369 }, { "epoch": 0.4636315790534995, "grad_norm": 0.37890625, "learning_rate": 1.7586811661556228e-05, "loss": 1.9806, "step": 14370 }, { "epoch": 0.46366384290729584, "grad_norm": 0.3984375, "learning_rate": 1.758526149174439e-05, "loss": 1.9948, "step": 14371 }, { "epoch": 0.4636961067610922, "grad_norm": 0.40234375, "learning_rate": 1.758371129347585e-05, "loss": 2.0278, "step": 14372 }, { "epoch": 0.4637283706148886, "grad_norm": 0.3515625, "learning_rate": 1.7582161066767676e-05, "loss": 1.9534, "step": 14373 }, { "epoch": 0.46376063446868493, "grad_norm": 0.365234375, "learning_rate": 1.758061081163692e-05, "loss": 1.9569, "step": 14374 }, { "epoch": 0.4637928983224813, "grad_norm": 0.361328125, "learning_rate": 1.757906052810065e-05, "loss": 1.9875, "step": 14375 }, { "epoch": 0.4638251621762776, "grad_norm": 0.357421875, "learning_rate": 1.7577510216175937e-05, "loss": 1.9736, "step": 14376 }, { "epoch": 0.46385742603007396, "grad_norm": 0.37109375, "learning_rate": 1.7575959875879837e-05, "loss": 1.9578, "step": 14377 }, { "epoch": 0.4638896898838703, "grad_norm": 0.3828125, "learning_rate": 1.7574409507229424e-05, "loss": 1.9294, "step": 14378 }, { "epoch": 0.46392195373766665, "grad_norm": 0.37890625, "learning_rate": 1.7572859110241752e-05, "loss": 1.9273, "step": 14379 }, { "epoch": 0.463954217591463, "grad_norm": 0.361328125, "learning_rate": 1.757130868493389e-05, "loss": 1.9834, "step": 14380 }, { "epoch": 0.46398648144525934, "grad_norm": 0.376953125, "learning_rate": 1.7569758231322917e-05, "loss": 1.9942, "step": 14381 }, { "epoch": 0.4640187452990557, "grad_norm": 0.373046875, "learning_rate": 1.756820774942588e-05, "loss": 1.9413, "step": 14382 }, { "epoch": 0.464051009152852, "grad_norm": 0.37890625, "learning_rate": 1.7566657239259856e-05, "loss": 1.9611, "step": 14383 }, { "epoch": 0.46408327300664837, "grad_norm": 0.33984375, "learning_rate": 1.7565106700841917e-05, "loss": 1.9861, "step": 14384 }, { "epoch": 0.4641155368604447, "grad_norm": 0.38671875, "learning_rate": 1.7563556134189116e-05, "loss": 1.9617, "step": 14385 }, { "epoch": 0.46414780071424105, "grad_norm": 0.359375, "learning_rate": 1.7562005539318528e-05, "loss": 1.9839, "step": 14386 }, { "epoch": 0.4641800645680374, "grad_norm": 0.34765625, "learning_rate": 1.7560454916247228e-05, "loss": 1.9868, "step": 14387 }, { "epoch": 0.46421232842183374, "grad_norm": 0.33984375, "learning_rate": 1.7558904264992275e-05, "loss": 2.013, "step": 14388 }, { "epoch": 0.4642445922756301, "grad_norm": 0.36328125, "learning_rate": 1.7557353585570733e-05, "loss": 2.0019, "step": 14389 }, { "epoch": 0.46427685612942643, "grad_norm": 0.337890625, "learning_rate": 1.7555802877999683e-05, "loss": 1.9864, "step": 14390 }, { "epoch": 0.4643091199832228, "grad_norm": 0.349609375, "learning_rate": 1.7554252142296186e-05, "loss": 1.9906, "step": 14391 }, { "epoch": 0.4643413838370191, "grad_norm": 0.361328125, "learning_rate": 1.7552701378477318e-05, "loss": 1.9925, "step": 14392 }, { "epoch": 0.4643736476908155, "grad_norm": 0.345703125, "learning_rate": 1.755115058656014e-05, "loss": 1.995, "step": 14393 }, { "epoch": 0.46440591154461186, "grad_norm": 0.345703125, "learning_rate": 1.7549599766561727e-05, "loss": 1.993, "step": 14394 }, { "epoch": 0.4644381753984082, "grad_norm": 0.341796875, "learning_rate": 1.7548048918499153e-05, "loss": 1.9581, "step": 14395 }, { "epoch": 0.46447043925220455, "grad_norm": 0.369140625, "learning_rate": 1.754649804238948e-05, "loss": 2.0326, "step": 14396 }, { "epoch": 0.4645027031060009, "grad_norm": 0.34375, "learning_rate": 1.7544947138249784e-05, "loss": 1.9634, "step": 14397 }, { "epoch": 0.46453496695979724, "grad_norm": 0.359375, "learning_rate": 1.7543396206097137e-05, "loss": 1.9869, "step": 14398 }, { "epoch": 0.4645672308135936, "grad_norm": 0.3515625, "learning_rate": 1.7541845245948612e-05, "loss": 2.004, "step": 14399 }, { "epoch": 0.4645994946673899, "grad_norm": 0.349609375, "learning_rate": 1.754029425782127e-05, "loss": 1.9958, "step": 14400 }, { "epoch": 0.46463175852118627, "grad_norm": 0.345703125, "learning_rate": 1.75387432417322e-05, "loss": 1.9675, "step": 14401 }, { "epoch": 0.4646640223749826, "grad_norm": 0.35546875, "learning_rate": 1.7537192197698462e-05, "loss": 1.9948, "step": 14402 }, { "epoch": 0.46469628622877895, "grad_norm": 0.3359375, "learning_rate": 1.753564112573713e-05, "loss": 2.0061, "step": 14403 }, { "epoch": 0.4647285500825753, "grad_norm": 0.375, "learning_rate": 1.7534090025865284e-05, "loss": 2.0239, "step": 14404 }, { "epoch": 0.46476081393637164, "grad_norm": 0.66796875, "learning_rate": 1.7532538898099994e-05, "loss": 1.9757, "step": 14405 }, { "epoch": 0.464793077790168, "grad_norm": 0.34375, "learning_rate": 1.7530987742458324e-05, "loss": 1.9649, "step": 14406 }, { "epoch": 0.46482534164396433, "grad_norm": 0.36328125, "learning_rate": 1.752943655895736e-05, "loss": 1.9932, "step": 14407 }, { "epoch": 0.4648576054977607, "grad_norm": 0.357421875, "learning_rate": 1.7527885347614177e-05, "loss": 2.0055, "step": 14408 }, { "epoch": 0.464889869351557, "grad_norm": 0.373046875, "learning_rate": 1.7526334108445847e-05, "loss": 2.0048, "step": 14409 }, { "epoch": 0.46492213320535336, "grad_norm": 0.345703125, "learning_rate": 1.7524782841469438e-05, "loss": 1.9674, "step": 14410 }, { "epoch": 0.4649543970591497, "grad_norm": 0.34765625, "learning_rate": 1.752323154670203e-05, "loss": 1.9721, "step": 14411 }, { "epoch": 0.46498666091294605, "grad_norm": 0.35546875, "learning_rate": 1.7521680224160704e-05, "loss": 2.0026, "step": 14412 }, { "epoch": 0.4650189247667424, "grad_norm": 0.365234375, "learning_rate": 1.752012887386253e-05, "loss": 1.9708, "step": 14413 }, { "epoch": 0.4650511886205388, "grad_norm": 0.341796875, "learning_rate": 1.7518577495824582e-05, "loss": 1.9827, "step": 14414 }, { "epoch": 0.46508345247433514, "grad_norm": 0.359375, "learning_rate": 1.7517026090063946e-05, "loss": 1.9901, "step": 14415 }, { "epoch": 0.4651157163281315, "grad_norm": 0.353515625, "learning_rate": 1.7515474656597685e-05, "loss": 2.023, "step": 14416 }, { "epoch": 0.4651479801819278, "grad_norm": 0.337890625, "learning_rate": 1.751392319544289e-05, "loss": 1.9577, "step": 14417 }, { "epoch": 0.46518024403572417, "grad_norm": 0.34765625, "learning_rate": 1.751237170661663e-05, "loss": 1.9779, "step": 14418 }, { "epoch": 0.4652125078895205, "grad_norm": 0.34765625, "learning_rate": 1.7510820190135986e-05, "loss": 1.9763, "step": 14419 }, { "epoch": 0.46524477174331685, "grad_norm": 0.34375, "learning_rate": 1.750926864601803e-05, "loss": 1.9725, "step": 14420 }, { "epoch": 0.4652770355971132, "grad_norm": 0.353515625, "learning_rate": 1.7507717074279844e-05, "loss": 1.9716, "step": 14421 }, { "epoch": 0.46530929945090954, "grad_norm": 0.353515625, "learning_rate": 1.7506165474938515e-05, "loss": 2.0078, "step": 14422 }, { "epoch": 0.4653415633047059, "grad_norm": 0.3359375, "learning_rate": 1.750461384801111e-05, "loss": 1.9872, "step": 14423 }, { "epoch": 0.46537382715850223, "grad_norm": 0.34375, "learning_rate": 1.7503062193514714e-05, "loss": 1.9896, "step": 14424 }, { "epoch": 0.4654060910122986, "grad_norm": 0.373046875, "learning_rate": 1.7501510511466397e-05, "loss": 2.0034, "step": 14425 }, { "epoch": 0.4654383548660949, "grad_norm": 0.345703125, "learning_rate": 1.749995880188326e-05, "loss": 1.9307, "step": 14426 }, { "epoch": 0.46547061871989126, "grad_norm": 0.353515625, "learning_rate": 1.749840706478236e-05, "loss": 1.9797, "step": 14427 }, { "epoch": 0.4655028825736876, "grad_norm": 0.34765625, "learning_rate": 1.749685530018079e-05, "loss": 1.9891, "step": 14428 }, { "epoch": 0.46553514642748395, "grad_norm": 0.36328125, "learning_rate": 1.7495303508095634e-05, "loss": 1.9802, "step": 14429 }, { "epoch": 0.4655674102812803, "grad_norm": 0.33984375, "learning_rate": 1.7493751688543963e-05, "loss": 1.9785, "step": 14430 }, { "epoch": 0.46559967413507664, "grad_norm": 0.361328125, "learning_rate": 1.7492199841542858e-05, "loss": 1.9962, "step": 14431 }, { "epoch": 0.465631937988873, "grad_norm": 0.35546875, "learning_rate": 1.7490647967109412e-05, "loss": 1.9655, "step": 14432 }, { "epoch": 0.4656642018426693, "grad_norm": 0.359375, "learning_rate": 1.7489096065260698e-05, "loss": 2.005, "step": 14433 }, { "epoch": 0.4656964656964657, "grad_norm": 0.34765625, "learning_rate": 1.7487544136013795e-05, "loss": 1.9943, "step": 14434 }, { "epoch": 0.46572872955026207, "grad_norm": 0.34375, "learning_rate": 1.7485992179385796e-05, "loss": 1.9927, "step": 14435 }, { "epoch": 0.4657609934040584, "grad_norm": 0.365234375, "learning_rate": 1.7484440195393778e-05, "loss": 1.972, "step": 14436 }, { "epoch": 0.46579325725785475, "grad_norm": 0.359375, "learning_rate": 1.7482888184054824e-05, "loss": 1.9932, "step": 14437 }, { "epoch": 0.4658255211116511, "grad_norm": 0.36328125, "learning_rate": 1.7481336145386016e-05, "loss": 1.9831, "step": 14438 }, { "epoch": 0.46585778496544744, "grad_norm": 0.36328125, "learning_rate": 1.7479784079404444e-05, "loss": 1.9823, "step": 14439 }, { "epoch": 0.4658900488192438, "grad_norm": 0.3515625, "learning_rate": 1.7478231986127185e-05, "loss": 1.9869, "step": 14440 }, { "epoch": 0.46592231267304013, "grad_norm": 0.376953125, "learning_rate": 1.7476679865571325e-05, "loss": 1.9958, "step": 14441 }, { "epoch": 0.4659545765268365, "grad_norm": 0.396484375, "learning_rate": 1.747512771775395e-05, "loss": 1.954, "step": 14442 }, { "epoch": 0.4659868403806328, "grad_norm": 0.365234375, "learning_rate": 1.747357554269215e-05, "loss": 1.9615, "step": 14443 }, { "epoch": 0.46601910423442916, "grad_norm": 0.419921875, "learning_rate": 1.7472023340403e-05, "loss": 1.9677, "step": 14444 }, { "epoch": 0.4660513680882255, "grad_norm": 0.349609375, "learning_rate": 1.747047111090359e-05, "loss": 1.9779, "step": 14445 }, { "epoch": 0.46608363194202185, "grad_norm": 0.384765625, "learning_rate": 1.746891885421101e-05, "loss": 1.9671, "step": 14446 }, { "epoch": 0.4661158957958182, "grad_norm": 0.3828125, "learning_rate": 1.746736657034234e-05, "loss": 1.9892, "step": 14447 }, { "epoch": 0.46614815964961454, "grad_norm": 0.388671875, "learning_rate": 1.7465814259314668e-05, "loss": 1.9623, "step": 14448 }, { "epoch": 0.4661804235034109, "grad_norm": 0.34765625, "learning_rate": 1.7464261921145086e-05, "loss": 1.9572, "step": 14449 }, { "epoch": 0.4662126873572072, "grad_norm": 0.384765625, "learning_rate": 1.7462709555850673e-05, "loss": 1.9813, "step": 14450 }, { "epoch": 0.46624495121100357, "grad_norm": 0.34765625, "learning_rate": 1.746115716344852e-05, "loss": 1.9863, "step": 14451 }, { "epoch": 0.4662772150647999, "grad_norm": 0.345703125, "learning_rate": 1.7459604743955715e-05, "loss": 1.9855, "step": 14452 }, { "epoch": 0.46630947891859625, "grad_norm": 0.34765625, "learning_rate": 1.7458052297389348e-05, "loss": 1.9832, "step": 14453 }, { "epoch": 0.46634174277239265, "grad_norm": 0.373046875, "learning_rate": 1.7456499823766503e-05, "loss": 1.9815, "step": 14454 }, { "epoch": 0.466374006626189, "grad_norm": 0.35546875, "learning_rate": 1.7454947323104265e-05, "loss": 1.9948, "step": 14455 }, { "epoch": 0.46640627047998534, "grad_norm": 0.39453125, "learning_rate": 1.7453394795419735e-05, "loss": 1.9647, "step": 14456 }, { "epoch": 0.4664385343337817, "grad_norm": 0.36328125, "learning_rate": 1.7451842240729992e-05, "loss": 2.0051, "step": 14457 }, { "epoch": 0.46647079818757803, "grad_norm": 0.3671875, "learning_rate": 1.745028965905213e-05, "loss": 1.9849, "step": 14458 }, { "epoch": 0.4665030620413744, "grad_norm": 0.33984375, "learning_rate": 1.7448737050403233e-05, "loss": 1.985, "step": 14459 }, { "epoch": 0.4665353258951707, "grad_norm": 0.36328125, "learning_rate": 1.74471844148004e-05, "loss": 1.9637, "step": 14460 }, { "epoch": 0.46656758974896706, "grad_norm": 0.341796875, "learning_rate": 1.7445631752260717e-05, "loss": 1.9897, "step": 14461 }, { "epoch": 0.4665998536027634, "grad_norm": 0.373046875, "learning_rate": 1.744407906280127e-05, "loss": 1.9901, "step": 14462 }, { "epoch": 0.46663211745655975, "grad_norm": 0.34765625, "learning_rate": 1.7442526346439158e-05, "loss": 1.9676, "step": 14463 }, { "epoch": 0.4666643813103561, "grad_norm": 0.349609375, "learning_rate": 1.7440973603191468e-05, "loss": 1.9694, "step": 14464 }, { "epoch": 0.46669664516415243, "grad_norm": 0.35546875, "learning_rate": 1.743942083307529e-05, "loss": 1.9948, "step": 14465 }, { "epoch": 0.4667289090179488, "grad_norm": 0.3671875, "learning_rate": 1.7437868036107717e-05, "loss": 1.9803, "step": 14466 }, { "epoch": 0.4667611728717451, "grad_norm": 0.36328125, "learning_rate": 1.7436315212305846e-05, "loss": 2.0095, "step": 14467 }, { "epoch": 0.46679343672554147, "grad_norm": 0.3828125, "learning_rate": 1.7434762361686762e-05, "loss": 1.9865, "step": 14468 }, { "epoch": 0.4668257005793378, "grad_norm": 0.353515625, "learning_rate": 1.743320948426756e-05, "loss": 1.9741, "step": 14469 }, { "epoch": 0.46685796443313415, "grad_norm": 0.37890625, "learning_rate": 1.7431656580065334e-05, "loss": 1.9433, "step": 14470 }, { "epoch": 0.4668902282869305, "grad_norm": 0.35546875, "learning_rate": 1.7430103649097173e-05, "loss": 2.0022, "step": 14471 }, { "epoch": 0.46692249214072684, "grad_norm": 0.3515625, "learning_rate": 1.7428550691380178e-05, "loss": 1.9639, "step": 14472 }, { "epoch": 0.4669547559945232, "grad_norm": 0.34765625, "learning_rate": 1.742699770693144e-05, "loss": 1.9695, "step": 14473 }, { "epoch": 0.4669870198483196, "grad_norm": 0.353515625, "learning_rate": 1.7425444695768053e-05, "loss": 1.9492, "step": 14474 }, { "epoch": 0.46701928370211593, "grad_norm": 0.33984375, "learning_rate": 1.7423891657907105e-05, "loss": 1.984, "step": 14475 }, { "epoch": 0.46705154755591227, "grad_norm": 0.359375, "learning_rate": 1.7422338593365704e-05, "loss": 1.9602, "step": 14476 }, { "epoch": 0.4670838114097086, "grad_norm": 0.330078125, "learning_rate": 1.7420785502160935e-05, "loss": 1.9983, "step": 14477 }, { "epoch": 0.46711607526350496, "grad_norm": 0.34375, "learning_rate": 1.7419232384309895e-05, "loss": 1.9902, "step": 14478 }, { "epoch": 0.4671483391173013, "grad_norm": 0.33203125, "learning_rate": 1.7417679239829678e-05, "loss": 1.9491, "step": 14479 }, { "epoch": 0.46718060297109765, "grad_norm": 0.349609375, "learning_rate": 1.7416126068737386e-05, "loss": 1.9894, "step": 14480 }, { "epoch": 0.467212866824894, "grad_norm": 0.33984375, "learning_rate": 1.7414572871050113e-05, "loss": 1.9941, "step": 14481 }, { "epoch": 0.46724513067869033, "grad_norm": 0.34765625, "learning_rate": 1.741301964678495e-05, "loss": 1.984, "step": 14482 }, { "epoch": 0.4672773945324867, "grad_norm": 0.359375, "learning_rate": 1.7411466395959e-05, "loss": 1.9822, "step": 14483 }, { "epoch": 0.467309658386283, "grad_norm": 0.39453125, "learning_rate": 1.740991311858936e-05, "loss": 1.9686, "step": 14484 }, { "epoch": 0.46734192224007937, "grad_norm": 0.349609375, "learning_rate": 1.7408359814693123e-05, "loss": 1.9852, "step": 14485 }, { "epoch": 0.4673741860938757, "grad_norm": 0.35546875, "learning_rate": 1.7406806484287388e-05, "loss": 1.9593, "step": 14486 }, { "epoch": 0.46740644994767205, "grad_norm": 0.375, "learning_rate": 1.7405253127389255e-05, "loss": 1.9717, "step": 14487 }, { "epoch": 0.4674387138014684, "grad_norm": 0.369140625, "learning_rate": 1.740369974401582e-05, "loss": 1.9813, "step": 14488 }, { "epoch": 0.46747097765526474, "grad_norm": 0.357421875, "learning_rate": 1.7402146334184178e-05, "loss": 1.9508, "step": 14489 }, { "epoch": 0.4675032415090611, "grad_norm": 0.359375, "learning_rate": 1.7400592897911444e-05, "loss": 1.9852, "step": 14490 }, { "epoch": 0.46753550536285743, "grad_norm": 0.359375, "learning_rate": 1.7399039435214697e-05, "loss": 1.9722, "step": 14491 }, { "epoch": 0.4675677692166538, "grad_norm": 0.357421875, "learning_rate": 1.7397485946111048e-05, "loss": 1.9282, "step": 14492 }, { "epoch": 0.4676000330704501, "grad_norm": 0.361328125, "learning_rate": 1.739593243061759e-05, "loss": 1.979, "step": 14493 }, { "epoch": 0.4676322969242465, "grad_norm": 0.365234375, "learning_rate": 1.739437888875143e-05, "loss": 1.9674, "step": 14494 }, { "epoch": 0.46766456077804286, "grad_norm": 0.34375, "learning_rate": 1.7392825320529667e-05, "loss": 1.9287, "step": 14495 }, { "epoch": 0.4676968246318392, "grad_norm": 0.388671875, "learning_rate": 1.7391271725969392e-05, "loss": 1.936, "step": 14496 }, { "epoch": 0.46772908848563555, "grad_norm": 0.34765625, "learning_rate": 1.7389718105087722e-05, "loss": 1.9679, "step": 14497 }, { "epoch": 0.4677613523394319, "grad_norm": 0.365234375, "learning_rate": 1.7388164457901748e-05, "loss": 1.9607, "step": 14498 }, { "epoch": 0.46779361619322823, "grad_norm": 0.373046875, "learning_rate": 1.7386610784428567e-05, "loss": 1.9475, "step": 14499 }, { "epoch": 0.4678258800470246, "grad_norm": 0.34765625, "learning_rate": 1.738505708468529e-05, "loss": 1.9875, "step": 14500 }, { "epoch": 0.4678581439008209, "grad_norm": 0.373046875, "learning_rate": 1.738350335868902e-05, "loss": 1.9817, "step": 14501 }, { "epoch": 0.46789040775461727, "grad_norm": 0.34765625, "learning_rate": 1.7381949606456845e-05, "loss": 1.9584, "step": 14502 }, { "epoch": 0.4679226716084136, "grad_norm": 0.365234375, "learning_rate": 1.7380395828005884e-05, "loss": 1.9767, "step": 14503 }, { "epoch": 0.46795493546220995, "grad_norm": 0.33984375, "learning_rate": 1.7378842023353236e-05, "loss": 1.9949, "step": 14504 }, { "epoch": 0.4679871993160063, "grad_norm": 0.3515625, "learning_rate": 1.7377288192515997e-05, "loss": 1.9691, "step": 14505 }, { "epoch": 0.46801946316980264, "grad_norm": 0.345703125, "learning_rate": 1.7375734335511275e-05, "loss": 1.9792, "step": 14506 }, { "epoch": 0.468051727023599, "grad_norm": 0.361328125, "learning_rate": 1.7374180452356173e-05, "loss": 2.0018, "step": 14507 }, { "epoch": 0.46808399087739533, "grad_norm": 0.36328125, "learning_rate": 1.73726265430678e-05, "loss": 1.9814, "step": 14508 }, { "epoch": 0.46811625473119167, "grad_norm": 0.341796875, "learning_rate": 1.7371072607663256e-05, "loss": 1.9838, "step": 14509 }, { "epoch": 0.468148518584988, "grad_norm": 0.345703125, "learning_rate": 1.736951864615964e-05, "loss": 1.9733, "step": 14510 }, { "epoch": 0.46818078243878436, "grad_norm": 0.37890625, "learning_rate": 1.7367964658574068e-05, "loss": 1.989, "step": 14511 }, { "epoch": 0.4682130462925807, "grad_norm": 0.373046875, "learning_rate": 1.736641064492364e-05, "loss": 1.9928, "step": 14512 }, { "epoch": 0.46824531014637705, "grad_norm": 0.380859375, "learning_rate": 1.7364856605225458e-05, "loss": 1.9755, "step": 14513 }, { "epoch": 0.4682775740001734, "grad_norm": 0.369140625, "learning_rate": 1.7363302539496632e-05, "loss": 1.9736, "step": 14514 }, { "epoch": 0.4683098378539698, "grad_norm": 0.38671875, "learning_rate": 1.736174844775427e-05, "loss": 1.999, "step": 14515 }, { "epoch": 0.46834210170776613, "grad_norm": 0.4140625, "learning_rate": 1.7360194330015472e-05, "loss": 2.006, "step": 14516 }, { "epoch": 0.4683743655615625, "grad_norm": 0.369140625, "learning_rate": 1.7358640186297348e-05, "loss": 1.9724, "step": 14517 }, { "epoch": 0.4684066294153588, "grad_norm": 0.39453125, "learning_rate": 1.7357086016617006e-05, "loss": 1.966, "step": 14518 }, { "epoch": 0.46843889326915517, "grad_norm": 0.361328125, "learning_rate": 1.735553182099155e-05, "loss": 1.9989, "step": 14519 }, { "epoch": 0.4684711571229515, "grad_norm": 0.3828125, "learning_rate": 1.7353977599438092e-05, "loss": 2.0114, "step": 14520 }, { "epoch": 0.46850342097674785, "grad_norm": 0.3359375, "learning_rate": 1.735242335197374e-05, "loss": 1.9751, "step": 14521 }, { "epoch": 0.4685356848305442, "grad_norm": 0.361328125, "learning_rate": 1.7350869078615593e-05, "loss": 1.9679, "step": 14522 }, { "epoch": 0.46856794868434054, "grad_norm": 0.36328125, "learning_rate": 1.734931477938077e-05, "loss": 1.9603, "step": 14523 }, { "epoch": 0.4686002125381369, "grad_norm": 0.33984375, "learning_rate": 1.7347760454286373e-05, "loss": 1.9712, "step": 14524 }, { "epoch": 0.46863247639193323, "grad_norm": 0.35546875, "learning_rate": 1.7346206103349515e-05, "loss": 2.0121, "step": 14525 }, { "epoch": 0.46866474024572957, "grad_norm": 0.36328125, "learning_rate": 1.7344651726587308e-05, "loss": 2.0096, "step": 14526 }, { "epoch": 0.4686970040995259, "grad_norm": 0.3671875, "learning_rate": 1.7343097324016843e-05, "loss": 1.9721, "step": 14527 }, { "epoch": 0.46872926795332226, "grad_norm": 0.3515625, "learning_rate": 1.7341542895655256e-05, "loss": 1.988, "step": 14528 }, { "epoch": 0.4687615318071186, "grad_norm": 0.36328125, "learning_rate": 1.7339988441519644e-05, "loss": 2.0073, "step": 14529 }, { "epoch": 0.46879379566091495, "grad_norm": 0.353515625, "learning_rate": 1.7338433961627113e-05, "loss": 1.9854, "step": 14530 }, { "epoch": 0.4688260595147113, "grad_norm": 0.35546875, "learning_rate": 1.733687945599478e-05, "loss": 1.984, "step": 14531 }, { "epoch": 0.46885832336850763, "grad_norm": 0.34375, "learning_rate": 1.7335324924639758e-05, "loss": 2.0066, "step": 14532 }, { "epoch": 0.468890587222304, "grad_norm": 0.35546875, "learning_rate": 1.7333770367579147e-05, "loss": 1.9828, "step": 14533 }, { "epoch": 0.4689228510761003, "grad_norm": 0.341796875, "learning_rate": 1.733221578483007e-05, "loss": 1.9757, "step": 14534 }, { "epoch": 0.4689551149298967, "grad_norm": 0.3671875, "learning_rate": 1.7330661176409638e-05, "loss": 1.9904, "step": 14535 }, { "epoch": 0.46898737878369307, "grad_norm": 0.345703125, "learning_rate": 1.7329106542334955e-05, "loss": 1.9767, "step": 14536 }, { "epoch": 0.4690196426374894, "grad_norm": 0.337890625, "learning_rate": 1.732755188262314e-05, "loss": 1.9795, "step": 14537 }, { "epoch": 0.46905190649128575, "grad_norm": 0.37109375, "learning_rate": 1.7325997197291305e-05, "loss": 2.0002, "step": 14538 }, { "epoch": 0.4690841703450821, "grad_norm": 0.333984375, "learning_rate": 1.732444248635656e-05, "loss": 1.9858, "step": 14539 }, { "epoch": 0.46911643419887844, "grad_norm": 0.3671875, "learning_rate": 1.732288774983602e-05, "loss": 1.9931, "step": 14540 }, { "epoch": 0.4691486980526748, "grad_norm": 0.365234375, "learning_rate": 1.7321332987746794e-05, "loss": 1.9435, "step": 14541 }, { "epoch": 0.46918096190647113, "grad_norm": 0.3515625, "learning_rate": 1.7319778200106007e-05, "loss": 1.9296, "step": 14542 }, { "epoch": 0.46921322576026747, "grad_norm": 0.39453125, "learning_rate": 1.7318223386930765e-05, "loss": 1.9894, "step": 14543 }, { "epoch": 0.4692454896140638, "grad_norm": 0.359375, "learning_rate": 1.7316668548238178e-05, "loss": 1.9802, "step": 14544 }, { "epoch": 0.46927775346786016, "grad_norm": 0.388671875, "learning_rate": 1.731511368404537e-05, "loss": 1.9985, "step": 14545 }, { "epoch": 0.4693100173216565, "grad_norm": 0.357421875, "learning_rate": 1.7313558794369448e-05, "loss": 1.984, "step": 14546 }, { "epoch": 0.46934228117545285, "grad_norm": 0.4140625, "learning_rate": 1.7312003879227532e-05, "loss": 1.9904, "step": 14547 }, { "epoch": 0.4693745450292492, "grad_norm": 0.3359375, "learning_rate": 1.7310448938636736e-05, "loss": 1.9265, "step": 14548 }, { "epoch": 0.46940680888304553, "grad_norm": 0.408203125, "learning_rate": 1.7308893972614178e-05, "loss": 1.9911, "step": 14549 }, { "epoch": 0.4694390727368419, "grad_norm": 0.380859375, "learning_rate": 1.730733898117697e-05, "loss": 1.9775, "step": 14550 }, { "epoch": 0.4694713365906382, "grad_norm": 0.380859375, "learning_rate": 1.7305783964342223e-05, "loss": 2.0097, "step": 14551 }, { "epoch": 0.46950360044443457, "grad_norm": 0.349609375, "learning_rate": 1.7304228922127073e-05, "loss": 1.9835, "step": 14552 }, { "epoch": 0.4695358642982309, "grad_norm": 0.35546875, "learning_rate": 1.7302673854548615e-05, "loss": 1.9951, "step": 14553 }, { "epoch": 0.46956812815202725, "grad_norm": 0.3515625, "learning_rate": 1.7301118761623978e-05, "loss": 1.9773, "step": 14554 }, { "epoch": 0.46960039200582365, "grad_norm": 0.36328125, "learning_rate": 1.7299563643370274e-05, "loss": 1.9977, "step": 14555 }, { "epoch": 0.46963265585962, "grad_norm": 0.345703125, "learning_rate": 1.7298008499804623e-05, "loss": 1.9739, "step": 14556 }, { "epoch": 0.46966491971341634, "grad_norm": 0.345703125, "learning_rate": 1.729645333094415e-05, "loss": 1.9674, "step": 14557 }, { "epoch": 0.4696971835672127, "grad_norm": 0.353515625, "learning_rate": 1.7294898136805955e-05, "loss": 2.0012, "step": 14558 }, { "epoch": 0.469729447421009, "grad_norm": 0.345703125, "learning_rate": 1.7293342917407173e-05, "loss": 1.9947, "step": 14559 }, { "epoch": 0.46976171127480537, "grad_norm": 0.349609375, "learning_rate": 1.7291787672764924e-05, "loss": 1.9792, "step": 14560 }, { "epoch": 0.4697939751286017, "grad_norm": 0.361328125, "learning_rate": 1.729023240289631e-05, "loss": 2.0162, "step": 14561 }, { "epoch": 0.46982623898239806, "grad_norm": 0.34765625, "learning_rate": 1.7288677107818464e-05, "loss": 1.97, "step": 14562 }, { "epoch": 0.4698585028361944, "grad_norm": 0.380859375, "learning_rate": 1.7287121787548507e-05, "loss": 1.9953, "step": 14563 }, { "epoch": 0.46989076668999075, "grad_norm": 0.3515625, "learning_rate": 1.728556644210355e-05, "loss": 2.0128, "step": 14564 }, { "epoch": 0.4699230305437871, "grad_norm": 0.37109375, "learning_rate": 1.7284011071500717e-05, "loss": 1.9646, "step": 14565 }, { "epoch": 0.46995529439758343, "grad_norm": 0.34765625, "learning_rate": 1.7282455675757132e-05, "loss": 2.0007, "step": 14566 }, { "epoch": 0.4699875582513798, "grad_norm": 0.38671875, "learning_rate": 1.728090025488991e-05, "loss": 1.9706, "step": 14567 }, { "epoch": 0.4700198221051761, "grad_norm": 0.341796875, "learning_rate": 1.7279344808916173e-05, "loss": 1.9962, "step": 14568 }, { "epoch": 0.47005208595897247, "grad_norm": 0.376953125, "learning_rate": 1.7277789337853046e-05, "loss": 1.9686, "step": 14569 }, { "epoch": 0.4700843498127688, "grad_norm": 0.361328125, "learning_rate": 1.7276233841717645e-05, "loss": 1.9374, "step": 14570 }, { "epoch": 0.47011661366656515, "grad_norm": 0.3515625, "learning_rate": 1.7274678320527102e-05, "loss": 1.9747, "step": 14571 }, { "epoch": 0.4701488775203615, "grad_norm": 0.369140625, "learning_rate": 1.7273122774298525e-05, "loss": 1.9712, "step": 14572 }, { "epoch": 0.47018114137415784, "grad_norm": 0.349609375, "learning_rate": 1.7271567203049043e-05, "loss": 1.9778, "step": 14573 }, { "epoch": 0.4702134052279542, "grad_norm": 0.3671875, "learning_rate": 1.7270011606795785e-05, "loss": 2.0202, "step": 14574 }, { "epoch": 0.4702456690817506, "grad_norm": 0.3515625, "learning_rate": 1.726845598555586e-05, "loss": 2.0161, "step": 14575 }, { "epoch": 0.4702779329355469, "grad_norm": 0.359375, "learning_rate": 1.7266900339346402e-05, "loss": 1.9828, "step": 14576 }, { "epoch": 0.47031019678934327, "grad_norm": 0.361328125, "learning_rate": 1.7265344668184535e-05, "loss": 2.0131, "step": 14577 }, { "epoch": 0.4703424606431396, "grad_norm": 0.3515625, "learning_rate": 1.7263788972087373e-05, "loss": 1.9722, "step": 14578 }, { "epoch": 0.47037472449693596, "grad_norm": 0.35546875, "learning_rate": 1.7262233251072047e-05, "loss": 2.0057, "step": 14579 }, { "epoch": 0.4704069883507323, "grad_norm": 0.337890625, "learning_rate": 1.7260677505155687e-05, "loss": 1.9878, "step": 14580 }, { "epoch": 0.47043925220452865, "grad_norm": 0.34375, "learning_rate": 1.7259121734355403e-05, "loss": 2.0093, "step": 14581 }, { "epoch": 0.470471516058325, "grad_norm": 0.357421875, "learning_rate": 1.7257565938688332e-05, "loss": 1.9975, "step": 14582 }, { "epoch": 0.47050377991212133, "grad_norm": 0.35546875, "learning_rate": 1.7256010118171592e-05, "loss": 1.9711, "step": 14583 }, { "epoch": 0.4705360437659177, "grad_norm": 0.333984375, "learning_rate": 1.7254454272822312e-05, "loss": 1.958, "step": 14584 }, { "epoch": 0.470568307619714, "grad_norm": 0.3671875, "learning_rate": 1.725289840265762e-05, "loss": 1.9952, "step": 14585 }, { "epoch": 0.47060057147351037, "grad_norm": 0.361328125, "learning_rate": 1.7251342507694635e-05, "loss": 1.978, "step": 14586 }, { "epoch": 0.4706328353273067, "grad_norm": 0.3515625, "learning_rate": 1.7249786587950485e-05, "loss": 1.9956, "step": 14587 }, { "epoch": 0.47066509918110305, "grad_norm": 0.37890625, "learning_rate": 1.7248230643442304e-05, "loss": 1.9997, "step": 14588 }, { "epoch": 0.4706973630348994, "grad_norm": 0.349609375, "learning_rate": 1.724667467418721e-05, "loss": 1.9904, "step": 14589 }, { "epoch": 0.47072962688869574, "grad_norm": 0.373046875, "learning_rate": 1.724511868020233e-05, "loss": 2.0103, "step": 14590 }, { "epoch": 0.4707618907424921, "grad_norm": 0.349609375, "learning_rate": 1.7243562661504798e-05, "loss": 1.9575, "step": 14591 }, { "epoch": 0.47079415459628843, "grad_norm": 0.458984375, "learning_rate": 1.7242006618111734e-05, "loss": 1.9694, "step": 14592 }, { "epoch": 0.47082641845008477, "grad_norm": 0.345703125, "learning_rate": 1.7240450550040274e-05, "loss": 1.9991, "step": 14593 }, { "epoch": 0.4708586823038811, "grad_norm": 0.390625, "learning_rate": 1.723889445730754e-05, "loss": 2.0018, "step": 14594 }, { "epoch": 0.47089094615767746, "grad_norm": 0.349609375, "learning_rate": 1.723733833993066e-05, "loss": 1.9843, "step": 14595 }, { "epoch": 0.47092321001147386, "grad_norm": 0.349609375, "learning_rate": 1.7235782197926767e-05, "loss": 1.9918, "step": 14596 }, { "epoch": 0.4709554738652702, "grad_norm": 0.361328125, "learning_rate": 1.7234226031312988e-05, "loss": 1.9938, "step": 14597 }, { "epoch": 0.47098773771906655, "grad_norm": 0.345703125, "learning_rate": 1.723266984010645e-05, "loss": 1.9398, "step": 14598 }, { "epoch": 0.4710200015728629, "grad_norm": 0.375, "learning_rate": 1.7231113624324283e-05, "loss": 1.995, "step": 14599 }, { "epoch": 0.47105226542665923, "grad_norm": 0.376953125, "learning_rate": 1.7229557383983624e-05, "loss": 1.9799, "step": 14600 }, { "epoch": 0.4710845292804556, "grad_norm": 0.35546875, "learning_rate": 1.722800111910159e-05, "loss": 1.9707, "step": 14601 }, { "epoch": 0.4711167931342519, "grad_norm": 0.357421875, "learning_rate": 1.722644482969532e-05, "loss": 2.0096, "step": 14602 }, { "epoch": 0.47114905698804826, "grad_norm": 0.349609375, "learning_rate": 1.7224888515781942e-05, "loss": 1.9365, "step": 14603 }, { "epoch": 0.4711813208418446, "grad_norm": 0.38671875, "learning_rate": 1.7223332177378585e-05, "loss": 1.9601, "step": 14604 }, { "epoch": 0.47121358469564095, "grad_norm": 0.3515625, "learning_rate": 1.722177581450239e-05, "loss": 1.9926, "step": 14605 }, { "epoch": 0.4712458485494373, "grad_norm": 0.36328125, "learning_rate": 1.7220219427170476e-05, "loss": 1.9721, "step": 14606 }, { "epoch": 0.47127811240323364, "grad_norm": 0.36328125, "learning_rate": 1.7218663015399978e-05, "loss": 1.9735, "step": 14607 }, { "epoch": 0.47131037625703, "grad_norm": 0.35546875, "learning_rate": 1.7217106579208032e-05, "loss": 1.9677, "step": 14608 }, { "epoch": 0.4713426401108263, "grad_norm": 0.357421875, "learning_rate": 1.7215550118611767e-05, "loss": 1.9602, "step": 14609 }, { "epoch": 0.47137490396462267, "grad_norm": 0.333984375, "learning_rate": 1.7213993633628315e-05, "loss": 1.9865, "step": 14610 }, { "epoch": 0.471407167818419, "grad_norm": 0.35546875, "learning_rate": 1.721243712427481e-05, "loss": 1.9718, "step": 14611 }, { "epoch": 0.47143943167221536, "grad_norm": 0.357421875, "learning_rate": 1.721088059056838e-05, "loss": 1.9779, "step": 14612 }, { "epoch": 0.4714716955260117, "grad_norm": 0.3515625, "learning_rate": 1.7209324032526167e-05, "loss": 1.9919, "step": 14613 }, { "epoch": 0.47150395937980805, "grad_norm": 0.35546875, "learning_rate": 1.72077674501653e-05, "loss": 1.9766, "step": 14614 }, { "epoch": 0.4715362232336044, "grad_norm": 0.345703125, "learning_rate": 1.7206210843502912e-05, "loss": 1.9636, "step": 14615 }, { "epoch": 0.4715684870874008, "grad_norm": 0.34765625, "learning_rate": 1.7204654212556137e-05, "loss": 1.9643, "step": 14616 }, { "epoch": 0.47160075094119713, "grad_norm": 0.353515625, "learning_rate": 1.7203097557342112e-05, "loss": 1.9933, "step": 14617 }, { "epoch": 0.4716330147949935, "grad_norm": 0.345703125, "learning_rate": 1.7201540877877968e-05, "loss": 1.9949, "step": 14618 }, { "epoch": 0.4716652786487898, "grad_norm": 0.359375, "learning_rate": 1.7199984174180844e-05, "loss": 1.9905, "step": 14619 }, { "epoch": 0.47169754250258616, "grad_norm": 0.345703125, "learning_rate": 1.7198427446267865e-05, "loss": 2.0107, "step": 14620 }, { "epoch": 0.4717298063563825, "grad_norm": 0.3359375, "learning_rate": 1.7196870694156178e-05, "loss": 1.9705, "step": 14621 }, { "epoch": 0.47176207021017885, "grad_norm": 0.34765625, "learning_rate": 1.719531391786292e-05, "loss": 1.973, "step": 14622 }, { "epoch": 0.4717943340639752, "grad_norm": 0.34375, "learning_rate": 1.7193757117405214e-05, "loss": 1.9986, "step": 14623 }, { "epoch": 0.47182659791777154, "grad_norm": 0.341796875, "learning_rate": 1.719220029280021e-05, "loss": 1.9971, "step": 14624 }, { "epoch": 0.4718588617715679, "grad_norm": 0.3359375, "learning_rate": 1.719064344406503e-05, "loss": 1.9948, "step": 14625 }, { "epoch": 0.4718911256253642, "grad_norm": 0.373046875, "learning_rate": 1.7189086571216823e-05, "loss": 2.015, "step": 14626 }, { "epoch": 0.47192338947916057, "grad_norm": 0.345703125, "learning_rate": 1.718752967427272e-05, "loss": 1.9984, "step": 14627 }, { "epoch": 0.4719556533329569, "grad_norm": 0.349609375, "learning_rate": 1.7185972753249864e-05, "loss": 2.0014, "step": 14628 }, { "epoch": 0.47198791718675326, "grad_norm": 0.345703125, "learning_rate": 1.7184415808165382e-05, "loss": 1.9876, "step": 14629 }, { "epoch": 0.4720201810405496, "grad_norm": 0.353515625, "learning_rate": 1.7182858839036424e-05, "loss": 1.9978, "step": 14630 }, { "epoch": 0.47205244489434595, "grad_norm": 0.3515625, "learning_rate": 1.718130184588012e-05, "loss": 1.973, "step": 14631 }, { "epoch": 0.4720847087481423, "grad_norm": 0.359375, "learning_rate": 1.7179744828713605e-05, "loss": 1.9894, "step": 14632 }, { "epoch": 0.47211697260193863, "grad_norm": 0.359375, "learning_rate": 1.7178187787554028e-05, "loss": 1.9376, "step": 14633 }, { "epoch": 0.472149236455735, "grad_norm": 0.34375, "learning_rate": 1.717663072241852e-05, "loss": 1.9698, "step": 14634 }, { "epoch": 0.4721815003095313, "grad_norm": 0.37890625, "learning_rate": 1.717507363332422e-05, "loss": 1.9704, "step": 14635 }, { "epoch": 0.4722137641633277, "grad_norm": 0.3515625, "learning_rate": 1.7173516520288278e-05, "loss": 1.9858, "step": 14636 }, { "epoch": 0.47224602801712406, "grad_norm": 0.369140625, "learning_rate": 1.717195938332782e-05, "loss": 1.9691, "step": 14637 }, { "epoch": 0.4722782918709204, "grad_norm": 0.35546875, "learning_rate": 1.717040222245999e-05, "loss": 1.9875, "step": 14638 }, { "epoch": 0.47231055572471675, "grad_norm": 0.3671875, "learning_rate": 1.7168845037701935e-05, "loss": 2.0162, "step": 14639 }, { "epoch": 0.4723428195785131, "grad_norm": 0.375, "learning_rate": 1.7167287829070782e-05, "loss": 2.0009, "step": 14640 }, { "epoch": 0.47237508343230944, "grad_norm": 0.37109375, "learning_rate": 1.716573059658368e-05, "loss": 1.9912, "step": 14641 }, { "epoch": 0.4724073472861058, "grad_norm": 0.369140625, "learning_rate": 1.716417334025778e-05, "loss": 1.9743, "step": 14642 }, { "epoch": 0.4724396111399021, "grad_norm": 0.34765625, "learning_rate": 1.7162616060110202e-05, "loss": 1.9675, "step": 14643 }, { "epoch": 0.47247187499369847, "grad_norm": 0.373046875, "learning_rate": 1.7161058756158102e-05, "loss": 1.974, "step": 14644 }, { "epoch": 0.4725041388474948, "grad_norm": 0.34765625, "learning_rate": 1.7159501428418616e-05, "loss": 1.9635, "step": 14645 }, { "epoch": 0.47253640270129116, "grad_norm": 0.3515625, "learning_rate": 1.7157944076908888e-05, "loss": 1.9908, "step": 14646 }, { "epoch": 0.4725686665550875, "grad_norm": 0.349609375, "learning_rate": 1.715638670164606e-05, "loss": 1.9835, "step": 14647 }, { "epoch": 0.47260093040888385, "grad_norm": 0.359375, "learning_rate": 1.7154829302647274e-05, "loss": 1.9992, "step": 14648 }, { "epoch": 0.4726331942626802, "grad_norm": 0.345703125, "learning_rate": 1.7153271879929672e-05, "loss": 1.9695, "step": 14649 }, { "epoch": 0.47266545811647653, "grad_norm": 0.33984375, "learning_rate": 1.71517144335104e-05, "loss": 1.9666, "step": 14650 }, { "epoch": 0.4726977219702729, "grad_norm": 0.3515625, "learning_rate": 1.7150156963406597e-05, "loss": 1.9646, "step": 14651 }, { "epoch": 0.4727299858240692, "grad_norm": 0.341796875, "learning_rate": 1.714859946963541e-05, "loss": 1.9648, "step": 14652 }, { "epoch": 0.47276224967786556, "grad_norm": 0.3671875, "learning_rate": 1.7147041952213984e-05, "loss": 1.9958, "step": 14653 }, { "epoch": 0.4727945135316619, "grad_norm": 0.333984375, "learning_rate": 1.7145484411159456e-05, "loss": 1.9697, "step": 14654 }, { "epoch": 0.47282677738545825, "grad_norm": 0.349609375, "learning_rate": 1.714392684648897e-05, "loss": 2.0244, "step": 14655 }, { "epoch": 0.47285904123925465, "grad_norm": 0.359375, "learning_rate": 1.7142369258219687e-05, "loss": 1.9925, "step": 14656 }, { "epoch": 0.472891305093051, "grad_norm": 0.330078125, "learning_rate": 1.714081164636873e-05, "loss": 1.9616, "step": 14657 }, { "epoch": 0.47292356894684734, "grad_norm": 0.375, "learning_rate": 1.7139254010953262e-05, "loss": 1.9889, "step": 14658 }, { "epoch": 0.4729558328006437, "grad_norm": 0.34765625, "learning_rate": 1.713769635199042e-05, "loss": 1.9625, "step": 14659 }, { "epoch": 0.47298809665444, "grad_norm": 0.35546875, "learning_rate": 1.7136138669497343e-05, "loss": 1.946, "step": 14660 }, { "epoch": 0.47302036050823637, "grad_norm": 0.361328125, "learning_rate": 1.7134580963491194e-05, "loss": 1.958, "step": 14661 }, { "epoch": 0.4730526243620327, "grad_norm": 0.375, "learning_rate": 1.71330232339891e-05, "loss": 1.9581, "step": 14662 }, { "epoch": 0.47308488821582906, "grad_norm": 0.37890625, "learning_rate": 1.713146548100822e-05, "loss": 2.0103, "step": 14663 }, { "epoch": 0.4731171520696254, "grad_norm": 0.359375, "learning_rate": 1.71299077045657e-05, "loss": 1.9733, "step": 14664 }, { "epoch": 0.47314941592342175, "grad_norm": 0.38671875, "learning_rate": 1.7128349904678676e-05, "loss": 1.9807, "step": 14665 }, { "epoch": 0.4731816797772181, "grad_norm": 0.36328125, "learning_rate": 1.712679208136431e-05, "loss": 1.9883, "step": 14666 }, { "epoch": 0.47321394363101443, "grad_norm": 0.39453125, "learning_rate": 1.7125234234639744e-05, "loss": 1.9868, "step": 14667 }, { "epoch": 0.4732462074848108, "grad_norm": 0.37109375, "learning_rate": 1.712367636452212e-05, "loss": 1.9755, "step": 14668 }, { "epoch": 0.4732784713386071, "grad_norm": 0.349609375, "learning_rate": 1.7122118471028587e-05, "loss": 1.9828, "step": 14669 }, { "epoch": 0.47331073519240346, "grad_norm": 0.388671875, "learning_rate": 1.7120560554176304e-05, "loss": 1.9939, "step": 14670 }, { "epoch": 0.4733429990461998, "grad_norm": 0.361328125, "learning_rate": 1.711900261398241e-05, "loss": 1.9856, "step": 14671 }, { "epoch": 0.47337526289999615, "grad_norm": 0.380859375, "learning_rate": 1.7117444650464048e-05, "loss": 1.9482, "step": 14672 }, { "epoch": 0.4734075267537925, "grad_norm": 0.359375, "learning_rate": 1.7115886663638386e-05, "loss": 2.0123, "step": 14673 }, { "epoch": 0.47343979060758884, "grad_norm": 0.37890625, "learning_rate": 1.711432865352255e-05, "loss": 1.9662, "step": 14674 }, { "epoch": 0.4734720544613852, "grad_norm": 0.37890625, "learning_rate": 1.7112770620133706e-05, "loss": 1.9923, "step": 14675 }, { "epoch": 0.4735043183151815, "grad_norm": 0.388671875, "learning_rate": 1.7111212563489003e-05, "loss": 1.9928, "step": 14676 }, { "epoch": 0.4735365821689779, "grad_norm": 0.380859375, "learning_rate": 1.7109654483605578e-05, "loss": 1.9628, "step": 14677 }, { "epoch": 0.47356884602277427, "grad_norm": 0.345703125, "learning_rate": 1.71080963805006e-05, "loss": 1.9893, "step": 14678 }, { "epoch": 0.4736011098765706, "grad_norm": 0.396484375, "learning_rate": 1.7106538254191203e-05, "loss": 1.9612, "step": 14679 }, { "epoch": 0.47363337373036696, "grad_norm": 0.345703125, "learning_rate": 1.710498010469454e-05, "loss": 1.9891, "step": 14680 }, { "epoch": 0.4736656375841633, "grad_norm": 0.388671875, "learning_rate": 1.7103421932027774e-05, "loss": 1.9869, "step": 14681 }, { "epoch": 0.47369790143795965, "grad_norm": 0.37890625, "learning_rate": 1.7101863736208046e-05, "loss": 2.0053, "step": 14682 }, { "epoch": 0.473730165291756, "grad_norm": 0.35546875, "learning_rate": 1.710030551725251e-05, "loss": 1.9738, "step": 14683 }, { "epoch": 0.47376242914555233, "grad_norm": 0.361328125, "learning_rate": 1.709874727517832e-05, "loss": 1.988, "step": 14684 }, { "epoch": 0.4737946929993487, "grad_norm": 0.365234375, "learning_rate": 1.7097189010002618e-05, "loss": 1.983, "step": 14685 }, { "epoch": 0.473826956853145, "grad_norm": 0.341796875, "learning_rate": 1.709563072174257e-05, "loss": 1.987, "step": 14686 }, { "epoch": 0.47385922070694136, "grad_norm": 0.365234375, "learning_rate": 1.7094072410415323e-05, "loss": 2.0033, "step": 14687 }, { "epoch": 0.4738914845607377, "grad_norm": 0.349609375, "learning_rate": 1.7092514076038027e-05, "loss": 1.957, "step": 14688 }, { "epoch": 0.47392374841453405, "grad_norm": 0.34375, "learning_rate": 1.7090955718627834e-05, "loss": 1.9969, "step": 14689 }, { "epoch": 0.4739560122683304, "grad_norm": 0.33984375, "learning_rate": 1.7089397338201907e-05, "loss": 1.9711, "step": 14690 }, { "epoch": 0.47398827612212674, "grad_norm": 0.33984375, "learning_rate": 1.7087838934777388e-05, "loss": 1.9877, "step": 14691 }, { "epoch": 0.4740205399759231, "grad_norm": 0.34375, "learning_rate": 1.7086280508371443e-05, "loss": 1.9878, "step": 14692 }, { "epoch": 0.4740528038297194, "grad_norm": 0.341796875, "learning_rate": 1.7084722059001208e-05, "loss": 1.9826, "step": 14693 }, { "epoch": 0.47408506768351577, "grad_norm": 0.41015625, "learning_rate": 1.7083163586683854e-05, "loss": 2.0004, "step": 14694 }, { "epoch": 0.4741173315373121, "grad_norm": 0.345703125, "learning_rate": 1.7081605091436532e-05, "loss": 1.9713, "step": 14695 }, { "epoch": 0.47414959539110846, "grad_norm": 0.34375, "learning_rate": 1.708004657327639e-05, "loss": 1.9957, "step": 14696 }, { "epoch": 0.47418185924490486, "grad_norm": 0.3515625, "learning_rate": 1.7078488032220588e-05, "loss": 1.9871, "step": 14697 }, { "epoch": 0.4742141230987012, "grad_norm": 0.349609375, "learning_rate": 1.7076929468286287e-05, "loss": 1.986, "step": 14698 }, { "epoch": 0.47424638695249755, "grad_norm": 0.35546875, "learning_rate": 1.7075370881490627e-05, "loss": 2.0034, "step": 14699 }, { "epoch": 0.4742786508062939, "grad_norm": 0.349609375, "learning_rate": 1.7073812271850778e-05, "loss": 1.9882, "step": 14700 }, { "epoch": 0.47431091466009023, "grad_norm": 0.345703125, "learning_rate": 1.7072253639383895e-05, "loss": 1.9861, "step": 14701 }, { "epoch": 0.4743431785138866, "grad_norm": 0.341796875, "learning_rate": 1.707069498410713e-05, "loss": 1.9883, "step": 14702 }, { "epoch": 0.4743754423676829, "grad_norm": 0.341796875, "learning_rate": 1.706913630603763e-05, "loss": 1.9768, "step": 14703 }, { "epoch": 0.47440770622147926, "grad_norm": 0.3359375, "learning_rate": 1.7067577605192572e-05, "loss": 1.9845, "step": 14704 }, { "epoch": 0.4744399700752756, "grad_norm": 0.341796875, "learning_rate": 1.70660188815891e-05, "loss": 1.984, "step": 14705 }, { "epoch": 0.47447223392907195, "grad_norm": 0.330078125, "learning_rate": 1.706446013524437e-05, "loss": 1.9642, "step": 14706 }, { "epoch": 0.4745044977828683, "grad_norm": 0.34765625, "learning_rate": 1.7062901366175553e-05, "loss": 1.9813, "step": 14707 }, { "epoch": 0.47453676163666464, "grad_norm": 0.330078125, "learning_rate": 1.7061342574399788e-05, "loss": 1.9813, "step": 14708 }, { "epoch": 0.474569025490461, "grad_norm": 0.330078125, "learning_rate": 1.705978375993425e-05, "loss": 1.9641, "step": 14709 }, { "epoch": 0.4746012893442573, "grad_norm": 0.322265625, "learning_rate": 1.7058224922796088e-05, "loss": 1.9461, "step": 14710 }, { "epoch": 0.47463355319805367, "grad_norm": 0.337890625, "learning_rate": 1.705666606300246e-05, "loss": 2.019, "step": 14711 }, { "epoch": 0.47466581705185, "grad_norm": 0.3359375, "learning_rate": 1.7055107180570532e-05, "loss": 1.9763, "step": 14712 }, { "epoch": 0.47469808090564636, "grad_norm": 0.337890625, "learning_rate": 1.7053548275517456e-05, "loss": 1.9838, "step": 14713 }, { "epoch": 0.4747303447594427, "grad_norm": 0.34375, "learning_rate": 1.705198934786039e-05, "loss": 2.0236, "step": 14714 }, { "epoch": 0.47476260861323905, "grad_norm": 0.349609375, "learning_rate": 1.7050430397616504e-05, "loss": 1.9998, "step": 14715 }, { "epoch": 0.4747948724670354, "grad_norm": 0.3515625, "learning_rate": 1.7048871424802947e-05, "loss": 1.9634, "step": 14716 }, { "epoch": 0.4748271363208318, "grad_norm": 0.35546875, "learning_rate": 1.7047312429436883e-05, "loss": 1.9828, "step": 14717 }, { "epoch": 0.47485940017462813, "grad_norm": 0.357421875, "learning_rate": 1.7045753411535477e-05, "loss": 1.9608, "step": 14718 }, { "epoch": 0.4748916640284245, "grad_norm": 0.3671875, "learning_rate": 1.7044194371115884e-05, "loss": 1.9732, "step": 14719 }, { "epoch": 0.4749239278822208, "grad_norm": 0.34765625, "learning_rate": 1.704263530819526e-05, "loss": 1.9844, "step": 14720 }, { "epoch": 0.47495619173601716, "grad_norm": 0.380859375, "learning_rate": 1.704107622279078e-05, "loss": 1.9847, "step": 14721 }, { "epoch": 0.4749884555898135, "grad_norm": 0.384765625, "learning_rate": 1.7039517114919587e-05, "loss": 2.0143, "step": 14722 }, { "epoch": 0.47502071944360985, "grad_norm": 0.349609375, "learning_rate": 1.7037957984598864e-05, "loss": 1.9992, "step": 14723 }, { "epoch": 0.4750529832974062, "grad_norm": 0.375, "learning_rate": 1.703639883184575e-05, "loss": 1.9976, "step": 14724 }, { "epoch": 0.47508524715120254, "grad_norm": 0.359375, "learning_rate": 1.7034839656677425e-05, "loss": 1.967, "step": 14725 }, { "epoch": 0.4751175110049989, "grad_norm": 0.376953125, "learning_rate": 1.7033280459111048e-05, "loss": 1.9624, "step": 14726 }, { "epoch": 0.4751497748587952, "grad_norm": 0.337890625, "learning_rate": 1.7031721239163777e-05, "loss": 1.9735, "step": 14727 }, { "epoch": 0.47518203871259157, "grad_norm": 0.3515625, "learning_rate": 1.7030161996852772e-05, "loss": 1.9766, "step": 14728 }, { "epoch": 0.4752143025663879, "grad_norm": 0.345703125, "learning_rate": 1.7028602732195206e-05, "loss": 1.9902, "step": 14729 }, { "epoch": 0.47524656642018426, "grad_norm": 0.3515625, "learning_rate": 1.7027043445208228e-05, "loss": 1.995, "step": 14730 }, { "epoch": 0.4752788302739806, "grad_norm": 0.359375, "learning_rate": 1.7025484135909016e-05, "loss": 1.991, "step": 14731 }, { "epoch": 0.47531109412777695, "grad_norm": 0.33203125, "learning_rate": 1.7023924804314728e-05, "loss": 1.9862, "step": 14732 }, { "epoch": 0.4753433579815733, "grad_norm": 0.341796875, "learning_rate": 1.702236545044253e-05, "loss": 1.9742, "step": 14733 }, { "epoch": 0.47537562183536963, "grad_norm": 0.365234375, "learning_rate": 1.7020806074309575e-05, "loss": 1.9867, "step": 14734 }, { "epoch": 0.475407885689166, "grad_norm": 0.3359375, "learning_rate": 1.701924667593304e-05, "loss": 1.9821, "step": 14735 }, { "epoch": 0.4754401495429623, "grad_norm": 0.33984375, "learning_rate": 1.7017687255330092e-05, "loss": 1.96, "step": 14736 }, { "epoch": 0.4754724133967587, "grad_norm": 0.3515625, "learning_rate": 1.7016127812517885e-05, "loss": 1.9377, "step": 14737 }, { "epoch": 0.47550467725055506, "grad_norm": 0.349609375, "learning_rate": 1.701456834751359e-05, "loss": 1.9634, "step": 14738 }, { "epoch": 0.4755369411043514, "grad_norm": 0.349609375, "learning_rate": 1.7013008860334373e-05, "loss": 1.9881, "step": 14739 }, { "epoch": 0.47556920495814775, "grad_norm": 0.349609375, "learning_rate": 1.7011449350997397e-05, "loss": 1.9512, "step": 14740 }, { "epoch": 0.4756014688119441, "grad_norm": 0.35546875, "learning_rate": 1.700988981951983e-05, "loss": 1.9692, "step": 14741 }, { "epoch": 0.47563373266574044, "grad_norm": 0.349609375, "learning_rate": 1.7008330265918835e-05, "loss": 1.9829, "step": 14742 }, { "epoch": 0.4756659965195368, "grad_norm": 0.34765625, "learning_rate": 1.700677069021159e-05, "loss": 1.9794, "step": 14743 }, { "epoch": 0.4756982603733331, "grad_norm": 0.35546875, "learning_rate": 1.7005211092415243e-05, "loss": 1.953, "step": 14744 }, { "epoch": 0.47573052422712947, "grad_norm": 0.353515625, "learning_rate": 1.7003651472546976e-05, "loss": 1.9663, "step": 14745 }, { "epoch": 0.4757627880809258, "grad_norm": 0.35546875, "learning_rate": 1.700209183062395e-05, "loss": 1.9963, "step": 14746 }, { "epoch": 0.47579505193472216, "grad_norm": 0.33984375, "learning_rate": 1.7000532166663336e-05, "loss": 1.9767, "step": 14747 }, { "epoch": 0.4758273157885185, "grad_norm": 0.3671875, "learning_rate": 1.699897248068229e-05, "loss": 1.9972, "step": 14748 }, { "epoch": 0.47585957964231484, "grad_norm": 0.34765625, "learning_rate": 1.6997412772698004e-05, "loss": 1.9733, "step": 14749 }, { "epoch": 0.4758918434961112, "grad_norm": 0.341796875, "learning_rate": 1.6995853042727624e-05, "loss": 1.9774, "step": 14750 }, { "epoch": 0.47592410734990753, "grad_norm": 0.349609375, "learning_rate": 1.6994293290788317e-05, "loss": 1.9991, "step": 14751 }, { "epoch": 0.4759563712037039, "grad_norm": 0.337890625, "learning_rate": 1.699273351689727e-05, "loss": 1.976, "step": 14752 }, { "epoch": 0.4759886350575002, "grad_norm": 0.34765625, "learning_rate": 1.6991173721071644e-05, "loss": 1.9866, "step": 14753 }, { "epoch": 0.47602089891129656, "grad_norm": 0.3359375, "learning_rate": 1.6989613903328596e-05, "loss": 1.9819, "step": 14754 }, { "epoch": 0.4760531627650929, "grad_norm": 0.33984375, "learning_rate": 1.6988054063685312e-05, "loss": 2.0114, "step": 14755 }, { "epoch": 0.47608542661888925, "grad_norm": 0.353515625, "learning_rate": 1.6986494202158956e-05, "loss": 1.9986, "step": 14756 }, { "epoch": 0.47611769047268565, "grad_norm": 0.357421875, "learning_rate": 1.6984934318766694e-05, "loss": 2.0144, "step": 14757 }, { "epoch": 0.476149954326482, "grad_norm": 0.3515625, "learning_rate": 1.69833744135257e-05, "loss": 1.9823, "step": 14758 }, { "epoch": 0.47618221818027834, "grad_norm": 0.3359375, "learning_rate": 1.698181448645314e-05, "loss": 1.9986, "step": 14759 }, { "epoch": 0.4762144820340747, "grad_norm": 0.341796875, "learning_rate": 1.6980254537566192e-05, "loss": 1.9929, "step": 14760 }, { "epoch": 0.476246745887871, "grad_norm": 0.3515625, "learning_rate": 1.6978694566882025e-05, "loss": 1.9984, "step": 14761 }, { "epoch": 0.47627900974166737, "grad_norm": 0.345703125, "learning_rate": 1.6977134574417802e-05, "loss": 1.9841, "step": 14762 }, { "epoch": 0.4763112735954637, "grad_norm": 0.337890625, "learning_rate": 1.6975574560190704e-05, "loss": 1.9801, "step": 14763 }, { "epoch": 0.47634353744926006, "grad_norm": 0.33984375, "learning_rate": 1.6974014524217902e-05, "loss": 1.9929, "step": 14764 }, { "epoch": 0.4763758013030564, "grad_norm": 0.33984375, "learning_rate": 1.6972454466516553e-05, "loss": 1.9814, "step": 14765 }, { "epoch": 0.47640806515685274, "grad_norm": 0.33984375, "learning_rate": 1.697089438710385e-05, "loss": 1.978, "step": 14766 }, { "epoch": 0.4764403290106491, "grad_norm": 0.333984375, "learning_rate": 1.6969334285996954e-05, "loss": 1.9635, "step": 14767 }, { "epoch": 0.47647259286444543, "grad_norm": 0.345703125, "learning_rate": 1.6967774163213036e-05, "loss": 1.974, "step": 14768 }, { "epoch": 0.4765048567182418, "grad_norm": 0.33984375, "learning_rate": 1.696621401876927e-05, "loss": 2.0005, "step": 14769 }, { "epoch": 0.4765371205720381, "grad_norm": 0.345703125, "learning_rate": 1.696465385268284e-05, "loss": 1.9622, "step": 14770 }, { "epoch": 0.47656938442583446, "grad_norm": 0.34765625, "learning_rate": 1.69630936649709e-05, "loss": 1.9826, "step": 14771 }, { "epoch": 0.4766016482796308, "grad_norm": 0.361328125, "learning_rate": 1.6961533455650634e-05, "loss": 1.9732, "step": 14772 }, { "epoch": 0.47663391213342715, "grad_norm": 0.35546875, "learning_rate": 1.6959973224739216e-05, "loss": 1.9711, "step": 14773 }, { "epoch": 0.4766661759872235, "grad_norm": 0.37109375, "learning_rate": 1.6958412972253825e-05, "loss": 1.9811, "step": 14774 }, { "epoch": 0.47669843984101984, "grad_norm": 0.36328125, "learning_rate": 1.6956852698211622e-05, "loss": 1.9582, "step": 14775 }, { "epoch": 0.4767307036948162, "grad_norm": 0.349609375, "learning_rate": 1.6955292402629788e-05, "loss": 1.9846, "step": 14776 }, { "epoch": 0.4767629675486125, "grad_norm": 0.33984375, "learning_rate": 1.6953732085525504e-05, "loss": 1.9817, "step": 14777 }, { "epoch": 0.4767952314024089, "grad_norm": 0.35546875, "learning_rate": 1.6952171746915937e-05, "loss": 1.9979, "step": 14778 }, { "epoch": 0.47682749525620527, "grad_norm": 0.333984375, "learning_rate": 1.6950611386818256e-05, "loss": 1.9695, "step": 14779 }, { "epoch": 0.4768597591100016, "grad_norm": 0.365234375, "learning_rate": 1.6949051005249655e-05, "loss": 1.9727, "step": 14780 }, { "epoch": 0.47689202296379796, "grad_norm": 0.341796875, "learning_rate": 1.6947490602227296e-05, "loss": 1.9745, "step": 14781 }, { "epoch": 0.4769242868175943, "grad_norm": 0.353515625, "learning_rate": 1.6945930177768355e-05, "loss": 1.9494, "step": 14782 }, { "epoch": 0.47695655067139064, "grad_norm": 0.3515625, "learning_rate": 1.694436973189001e-05, "loss": 1.9646, "step": 14783 }, { "epoch": 0.476988814525187, "grad_norm": 0.34375, "learning_rate": 1.694280926460944e-05, "loss": 1.987, "step": 14784 }, { "epoch": 0.47702107837898333, "grad_norm": 0.3828125, "learning_rate": 1.6941248775943818e-05, "loss": 1.9885, "step": 14785 }, { "epoch": 0.4770533422327797, "grad_norm": 0.359375, "learning_rate": 1.693968826591032e-05, "loss": 1.994, "step": 14786 }, { "epoch": 0.477085606086576, "grad_norm": 0.359375, "learning_rate": 1.693812773452613e-05, "loss": 1.9887, "step": 14787 }, { "epoch": 0.47711786994037236, "grad_norm": 0.337890625, "learning_rate": 1.693656718180842e-05, "loss": 1.9512, "step": 14788 }, { "epoch": 0.4771501337941687, "grad_norm": 0.353515625, "learning_rate": 1.6935006607774366e-05, "loss": 1.9953, "step": 14789 }, { "epoch": 0.47718239764796505, "grad_norm": 0.33984375, "learning_rate": 1.6933446012441144e-05, "loss": 1.9706, "step": 14790 }, { "epoch": 0.4772146615017614, "grad_norm": 0.337890625, "learning_rate": 1.6931885395825942e-05, "loss": 1.9692, "step": 14791 }, { "epoch": 0.47724692535555774, "grad_norm": 0.33984375, "learning_rate": 1.6930324757945927e-05, "loss": 1.9627, "step": 14792 }, { "epoch": 0.4772791892093541, "grad_norm": 0.34765625, "learning_rate": 1.6928764098818283e-05, "loss": 1.9906, "step": 14793 }, { "epoch": 0.4773114530631504, "grad_norm": 0.361328125, "learning_rate": 1.6927203418460186e-05, "loss": 1.9641, "step": 14794 }, { "epoch": 0.47734371691694677, "grad_norm": 0.349609375, "learning_rate": 1.6925642716888823e-05, "loss": 2.018, "step": 14795 }, { "epoch": 0.4773759807707431, "grad_norm": 0.37890625, "learning_rate": 1.6924081994121358e-05, "loss": 1.9777, "step": 14796 }, { "epoch": 0.47740824462453946, "grad_norm": 0.341796875, "learning_rate": 1.6922521250174986e-05, "loss": 1.9825, "step": 14797 }, { "epoch": 0.47744050847833586, "grad_norm": 0.3671875, "learning_rate": 1.6920960485066875e-05, "loss": 1.9875, "step": 14798 }, { "epoch": 0.4774727723321322, "grad_norm": 0.349609375, "learning_rate": 1.691939969881421e-05, "loss": 1.9788, "step": 14799 }, { "epoch": 0.47750503618592854, "grad_norm": 0.37109375, "learning_rate": 1.6917838891434167e-05, "loss": 1.9662, "step": 14800 }, { "epoch": 0.4775373000397249, "grad_norm": 0.369140625, "learning_rate": 1.6916278062943932e-05, "loss": 2.0201, "step": 14801 }, { "epoch": 0.47756956389352123, "grad_norm": 0.33984375, "learning_rate": 1.6914717213360683e-05, "loss": 1.9685, "step": 14802 }, { "epoch": 0.4776018277473176, "grad_norm": 0.357421875, "learning_rate": 1.6913156342701598e-05, "loss": 1.9423, "step": 14803 }, { "epoch": 0.4776340916011139, "grad_norm": 0.341796875, "learning_rate": 1.6911595450983867e-05, "loss": 1.9948, "step": 14804 }, { "epoch": 0.47766635545491026, "grad_norm": 0.341796875, "learning_rate": 1.6910034538224656e-05, "loss": 1.9526, "step": 14805 }, { "epoch": 0.4776986193087066, "grad_norm": 0.36328125, "learning_rate": 1.690847360444116e-05, "loss": 2.0055, "step": 14806 }, { "epoch": 0.47773088316250295, "grad_norm": 0.333984375, "learning_rate": 1.6906912649650552e-05, "loss": 1.9333, "step": 14807 }, { "epoch": 0.4777631470162993, "grad_norm": 0.357421875, "learning_rate": 1.6905351673870026e-05, "loss": 1.9817, "step": 14808 }, { "epoch": 0.47779541087009564, "grad_norm": 0.34375, "learning_rate": 1.690379067711675e-05, "loss": 1.9781, "step": 14809 }, { "epoch": 0.477827674723892, "grad_norm": 0.35546875, "learning_rate": 1.6902229659407907e-05, "loss": 2.0054, "step": 14810 }, { "epoch": 0.4778599385776883, "grad_norm": 0.345703125, "learning_rate": 1.6900668620760694e-05, "loss": 2.0078, "step": 14811 }, { "epoch": 0.47789220243148467, "grad_norm": 0.34765625, "learning_rate": 1.6899107561192285e-05, "loss": 1.9719, "step": 14812 }, { "epoch": 0.477924466285281, "grad_norm": 0.353515625, "learning_rate": 1.6897546480719856e-05, "loss": 1.9906, "step": 14813 }, { "epoch": 0.47795673013907736, "grad_norm": 0.341796875, "learning_rate": 1.6895985379360596e-05, "loss": 1.997, "step": 14814 }, { "epoch": 0.4779889939928737, "grad_norm": 0.33984375, "learning_rate": 1.6894424257131696e-05, "loss": 1.9894, "step": 14815 }, { "epoch": 0.47802125784667004, "grad_norm": 0.345703125, "learning_rate": 1.6892863114050324e-05, "loss": 1.9537, "step": 14816 }, { "epoch": 0.4780535217004664, "grad_norm": 0.333984375, "learning_rate": 1.6891301950133674e-05, "loss": 2.0004, "step": 14817 }, { "epoch": 0.4780857855542628, "grad_norm": 0.34375, "learning_rate": 1.6889740765398935e-05, "loss": 1.9856, "step": 14818 }, { "epoch": 0.47811804940805913, "grad_norm": 0.3359375, "learning_rate": 1.6888179559863284e-05, "loss": 1.9852, "step": 14819 }, { "epoch": 0.4781503132618555, "grad_norm": 0.337890625, "learning_rate": 1.6886618333543903e-05, "loss": 2.0083, "step": 14820 }, { "epoch": 0.4781825771156518, "grad_norm": 0.341796875, "learning_rate": 1.6885057086457982e-05, "loss": 2.0066, "step": 14821 }, { "epoch": 0.47821484096944816, "grad_norm": 0.3515625, "learning_rate": 1.6883495818622708e-05, "loss": 1.9842, "step": 14822 }, { "epoch": 0.4782471048232445, "grad_norm": 0.365234375, "learning_rate": 1.6881934530055263e-05, "loss": 2.001, "step": 14823 }, { "epoch": 0.47827936867704085, "grad_norm": 0.35546875, "learning_rate": 1.6880373220772827e-05, "loss": 1.9679, "step": 14824 }, { "epoch": 0.4783116325308372, "grad_norm": 0.349609375, "learning_rate": 1.6878811890792596e-05, "loss": 1.9809, "step": 14825 }, { "epoch": 0.47834389638463354, "grad_norm": 0.359375, "learning_rate": 1.6877250540131753e-05, "loss": 1.9834, "step": 14826 }, { "epoch": 0.4783761602384299, "grad_norm": 0.359375, "learning_rate": 1.6875689168807478e-05, "loss": 1.9969, "step": 14827 }, { "epoch": 0.4784084240922262, "grad_norm": 0.341796875, "learning_rate": 1.6874127776836968e-05, "loss": 1.9644, "step": 14828 }, { "epoch": 0.47844068794602257, "grad_norm": 0.361328125, "learning_rate": 1.6872566364237403e-05, "loss": 1.9701, "step": 14829 }, { "epoch": 0.4784729517998189, "grad_norm": 0.34375, "learning_rate": 1.6871004931025967e-05, "loss": 1.9988, "step": 14830 }, { "epoch": 0.47850521565361526, "grad_norm": 0.345703125, "learning_rate": 1.6869443477219852e-05, "loss": 1.9527, "step": 14831 }, { "epoch": 0.4785374795074116, "grad_norm": 0.35546875, "learning_rate": 1.6867882002836248e-05, "loss": 1.9763, "step": 14832 }, { "epoch": 0.47856974336120794, "grad_norm": 0.34765625, "learning_rate": 1.6866320507892335e-05, "loss": 2.0034, "step": 14833 }, { "epoch": 0.4786020072150043, "grad_norm": 0.333984375, "learning_rate": 1.6864758992405302e-05, "loss": 1.9447, "step": 14834 }, { "epoch": 0.47863427106880063, "grad_norm": 0.365234375, "learning_rate": 1.686319745639235e-05, "loss": 1.99, "step": 14835 }, { "epoch": 0.478666534922597, "grad_norm": 0.33984375, "learning_rate": 1.686163589987065e-05, "loss": 1.9718, "step": 14836 }, { "epoch": 0.4786987987763933, "grad_norm": 0.349609375, "learning_rate": 1.6860074322857396e-05, "loss": 1.9881, "step": 14837 }, { "epoch": 0.4787310626301897, "grad_norm": 0.34375, "learning_rate": 1.685851272536978e-05, "loss": 1.9772, "step": 14838 }, { "epoch": 0.47876332648398606, "grad_norm": 0.3515625, "learning_rate": 1.685695110742499e-05, "loss": 1.966, "step": 14839 }, { "epoch": 0.4787955903377824, "grad_norm": 0.36328125, "learning_rate": 1.6855389469040217e-05, "loss": 1.9949, "step": 14840 }, { "epoch": 0.47882785419157875, "grad_norm": 0.349609375, "learning_rate": 1.685382781023264e-05, "loss": 1.9709, "step": 14841 }, { "epoch": 0.4788601180453751, "grad_norm": 0.35546875, "learning_rate": 1.6852266131019464e-05, "loss": 1.9576, "step": 14842 }, { "epoch": 0.47889238189917144, "grad_norm": 0.345703125, "learning_rate": 1.685070443141787e-05, "loss": 2.0092, "step": 14843 }, { "epoch": 0.4789246457529678, "grad_norm": 0.357421875, "learning_rate": 1.684914271144505e-05, "loss": 2.0126, "step": 14844 }, { "epoch": 0.4789569096067641, "grad_norm": 0.345703125, "learning_rate": 1.6847580971118188e-05, "loss": 1.9914, "step": 14845 }, { "epoch": 0.47898917346056047, "grad_norm": 0.341796875, "learning_rate": 1.6846019210454487e-05, "loss": 1.9983, "step": 14846 }, { "epoch": 0.4790214373143568, "grad_norm": 0.34375, "learning_rate": 1.6844457429471126e-05, "loss": 1.9707, "step": 14847 }, { "epoch": 0.47905370116815316, "grad_norm": 0.34765625, "learning_rate": 1.68428956281853e-05, "loss": 1.9658, "step": 14848 }, { "epoch": 0.4790859650219495, "grad_norm": 0.345703125, "learning_rate": 1.6841333806614206e-05, "loss": 2.0044, "step": 14849 }, { "epoch": 0.47911822887574584, "grad_norm": 0.3515625, "learning_rate": 1.6839771964775028e-05, "loss": 1.9913, "step": 14850 }, { "epoch": 0.4791504927295422, "grad_norm": 0.3359375, "learning_rate": 1.683821010268496e-05, "loss": 1.9628, "step": 14851 }, { "epoch": 0.47918275658333853, "grad_norm": 0.34375, "learning_rate": 1.6836648220361193e-05, "loss": 1.9361, "step": 14852 }, { "epoch": 0.4792150204371349, "grad_norm": 0.357421875, "learning_rate": 1.683508631782092e-05, "loss": 1.9707, "step": 14853 }, { "epoch": 0.4792472842909312, "grad_norm": 0.3359375, "learning_rate": 1.6833524395081337e-05, "loss": 1.9739, "step": 14854 }, { "epoch": 0.47927954814472756, "grad_norm": 0.33984375, "learning_rate": 1.6831962452159625e-05, "loss": 2.0129, "step": 14855 }, { "epoch": 0.4793118119985239, "grad_norm": 0.341796875, "learning_rate": 1.683040048907299e-05, "loss": 2.0013, "step": 14856 }, { "epoch": 0.47934407585232025, "grad_norm": 0.33203125, "learning_rate": 1.682883850583862e-05, "loss": 1.9678, "step": 14857 }, { "epoch": 0.4793763397061166, "grad_norm": 0.345703125, "learning_rate": 1.6827276502473705e-05, "loss": 1.9768, "step": 14858 }, { "epoch": 0.479408603559913, "grad_norm": 0.345703125, "learning_rate": 1.6825714478995443e-05, "loss": 1.9809, "step": 14859 }, { "epoch": 0.47944086741370934, "grad_norm": 0.33984375, "learning_rate": 1.682415243542103e-05, "loss": 1.9986, "step": 14860 }, { "epoch": 0.4794731312675057, "grad_norm": 0.34765625, "learning_rate": 1.682259037176765e-05, "loss": 1.9667, "step": 14861 }, { "epoch": 0.479505395121302, "grad_norm": 0.341796875, "learning_rate": 1.6821028288052504e-05, "loss": 1.9941, "step": 14862 }, { "epoch": 0.47953765897509837, "grad_norm": 0.34765625, "learning_rate": 1.6819466184292784e-05, "loss": 2.0001, "step": 14863 }, { "epoch": 0.4795699228288947, "grad_norm": 0.35546875, "learning_rate": 1.681790406050569e-05, "loss": 1.9884, "step": 14864 }, { "epoch": 0.47960218668269106, "grad_norm": 0.341796875, "learning_rate": 1.6816341916708403e-05, "loss": 1.9635, "step": 14865 }, { "epoch": 0.4796344505364874, "grad_norm": 0.337890625, "learning_rate": 1.6814779752918133e-05, "loss": 1.9697, "step": 14866 }, { "epoch": 0.47966671439028374, "grad_norm": 0.357421875, "learning_rate": 1.6813217569152073e-05, "loss": 1.9713, "step": 14867 }, { "epoch": 0.4796989782440801, "grad_norm": 0.3359375, "learning_rate": 1.681165536542741e-05, "loss": 1.9863, "step": 14868 }, { "epoch": 0.47973124209787643, "grad_norm": 0.359375, "learning_rate": 1.6810093141761346e-05, "loss": 2.0118, "step": 14869 }, { "epoch": 0.4797635059516728, "grad_norm": 0.34375, "learning_rate": 1.6808530898171073e-05, "loss": 1.9886, "step": 14870 }, { "epoch": 0.4797957698054691, "grad_norm": 0.3515625, "learning_rate": 1.6806968634673794e-05, "loss": 1.9868, "step": 14871 }, { "epoch": 0.47982803365926546, "grad_norm": 0.341796875, "learning_rate": 1.6805406351286694e-05, "loss": 2.0036, "step": 14872 }, { "epoch": 0.4798602975130618, "grad_norm": 0.337890625, "learning_rate": 1.6803844048026982e-05, "loss": 1.9979, "step": 14873 }, { "epoch": 0.47989256136685815, "grad_norm": 0.357421875, "learning_rate": 1.6802281724911848e-05, "loss": 1.9522, "step": 14874 }, { "epoch": 0.4799248252206545, "grad_norm": 0.328125, "learning_rate": 1.6800719381958488e-05, "loss": 1.9582, "step": 14875 }, { "epoch": 0.47995708907445084, "grad_norm": 0.349609375, "learning_rate": 1.67991570191841e-05, "loss": 1.9965, "step": 14876 }, { "epoch": 0.4799893529282472, "grad_norm": 0.349609375, "learning_rate": 1.6797594636605884e-05, "loss": 1.9837, "step": 14877 }, { "epoch": 0.4800216167820435, "grad_norm": 0.34375, "learning_rate": 1.6796032234241036e-05, "loss": 1.9703, "step": 14878 }, { "epoch": 0.4800538806358399, "grad_norm": 0.337890625, "learning_rate": 1.679446981210675e-05, "loss": 1.988, "step": 14879 }, { "epoch": 0.48008614448963627, "grad_norm": 0.3671875, "learning_rate": 1.6792907370220235e-05, "loss": 1.9527, "step": 14880 }, { "epoch": 0.4801184083434326, "grad_norm": 0.34375, "learning_rate": 1.6791344908598676e-05, "loss": 2.001, "step": 14881 }, { "epoch": 0.48015067219722896, "grad_norm": 0.375, "learning_rate": 1.6789782427259276e-05, "loss": 1.9713, "step": 14882 }, { "epoch": 0.4801829360510253, "grad_norm": 0.365234375, "learning_rate": 1.678821992621924e-05, "loss": 2.0095, "step": 14883 }, { "epoch": 0.48021519990482164, "grad_norm": 0.345703125, "learning_rate": 1.678665740549576e-05, "loss": 1.9864, "step": 14884 }, { "epoch": 0.480247463758618, "grad_norm": 0.35546875, "learning_rate": 1.6785094865106038e-05, "loss": 1.967, "step": 14885 }, { "epoch": 0.48027972761241433, "grad_norm": 0.330078125, "learning_rate": 1.6783532305067268e-05, "loss": 1.9649, "step": 14886 }, { "epoch": 0.4803119914662107, "grad_norm": 0.349609375, "learning_rate": 1.6781969725396656e-05, "loss": 1.9643, "step": 14887 }, { "epoch": 0.480344255320007, "grad_norm": 0.34375, "learning_rate": 1.6780407126111404e-05, "loss": 1.9908, "step": 14888 }, { "epoch": 0.48037651917380336, "grad_norm": 0.34765625, "learning_rate": 1.6778844507228702e-05, "loss": 1.9884, "step": 14889 }, { "epoch": 0.4804087830275997, "grad_norm": 0.349609375, "learning_rate": 1.677728186876576e-05, "loss": 1.9803, "step": 14890 }, { "epoch": 0.48044104688139605, "grad_norm": 0.353515625, "learning_rate": 1.6775719210739773e-05, "loss": 2.0051, "step": 14891 }, { "epoch": 0.4804733107351924, "grad_norm": 0.33984375, "learning_rate": 1.677415653316794e-05, "loss": 1.995, "step": 14892 }, { "epoch": 0.48050557458898874, "grad_norm": 0.345703125, "learning_rate": 1.6772593836067466e-05, "loss": 1.9452, "step": 14893 }, { "epoch": 0.4805378384427851, "grad_norm": 0.388671875, "learning_rate": 1.6771031119455553e-05, "loss": 1.9933, "step": 14894 }, { "epoch": 0.4805701022965814, "grad_norm": 0.357421875, "learning_rate": 1.6769468383349397e-05, "loss": 2.0055, "step": 14895 }, { "epoch": 0.48060236615037777, "grad_norm": 0.3671875, "learning_rate": 1.6767905627766203e-05, "loss": 1.9551, "step": 14896 }, { "epoch": 0.4806346300041741, "grad_norm": 0.365234375, "learning_rate": 1.6766342852723173e-05, "loss": 1.9805, "step": 14897 }, { "epoch": 0.48066689385797046, "grad_norm": 0.357421875, "learning_rate": 1.676478005823751e-05, "loss": 2.0074, "step": 14898 }, { "epoch": 0.48069915771176686, "grad_norm": 0.35546875, "learning_rate": 1.6763217244326415e-05, "loss": 2.001, "step": 14899 }, { "epoch": 0.4807314215655632, "grad_norm": 0.33984375, "learning_rate": 1.6761654411007084e-05, "loss": 1.975, "step": 14900 }, { "epoch": 0.48076368541935954, "grad_norm": 0.34375, "learning_rate": 1.6760091558296727e-05, "loss": 2.0022, "step": 14901 }, { "epoch": 0.4807959492731559, "grad_norm": 0.34375, "learning_rate": 1.675852868621255e-05, "loss": 1.9589, "step": 14902 }, { "epoch": 0.48082821312695223, "grad_norm": 0.349609375, "learning_rate": 1.6756965794771743e-05, "loss": 1.9808, "step": 14903 }, { "epoch": 0.4808604769807486, "grad_norm": 0.3515625, "learning_rate": 1.675540288399152e-05, "loss": 2.0005, "step": 14904 }, { "epoch": 0.4808927408345449, "grad_norm": 0.34765625, "learning_rate": 1.6753839953889083e-05, "loss": 1.9695, "step": 14905 }, { "epoch": 0.48092500468834126, "grad_norm": 0.3515625, "learning_rate": 1.6752277004481634e-05, "loss": 1.9876, "step": 14906 }, { "epoch": 0.4809572685421376, "grad_norm": 0.361328125, "learning_rate": 1.6750714035786373e-05, "loss": 1.9869, "step": 14907 }, { "epoch": 0.48098953239593395, "grad_norm": 0.3359375, "learning_rate": 1.6749151047820514e-05, "loss": 1.9863, "step": 14908 }, { "epoch": 0.4810217962497303, "grad_norm": 0.361328125, "learning_rate": 1.6747588040601246e-05, "loss": 1.9743, "step": 14909 }, { "epoch": 0.48105406010352664, "grad_norm": 0.3515625, "learning_rate": 1.674602501414579e-05, "loss": 1.95, "step": 14910 }, { "epoch": 0.481086323957323, "grad_norm": 0.341796875, "learning_rate": 1.674446196847134e-05, "loss": 1.9591, "step": 14911 }, { "epoch": 0.4811185878111193, "grad_norm": 0.349609375, "learning_rate": 1.6742898903595107e-05, "loss": 1.9604, "step": 14912 }, { "epoch": 0.48115085166491567, "grad_norm": 0.34765625, "learning_rate": 1.6741335819534288e-05, "loss": 2.0041, "step": 14913 }, { "epoch": 0.481183115518712, "grad_norm": 0.3515625, "learning_rate": 1.67397727163061e-05, "loss": 2.0002, "step": 14914 }, { "epoch": 0.48121537937250836, "grad_norm": 0.353515625, "learning_rate": 1.673820959392774e-05, "loss": 1.9792, "step": 14915 }, { "epoch": 0.4812476432263047, "grad_norm": 0.33203125, "learning_rate": 1.6736646452416415e-05, "loss": 1.9663, "step": 14916 }, { "epoch": 0.48127990708010104, "grad_norm": 0.337890625, "learning_rate": 1.673508329178933e-05, "loss": 1.9857, "step": 14917 }, { "epoch": 0.4813121709338974, "grad_norm": 0.34765625, "learning_rate": 1.6733520112063695e-05, "loss": 1.9633, "step": 14918 }, { "epoch": 0.4813444347876938, "grad_norm": 0.345703125, "learning_rate": 1.6731956913256715e-05, "loss": 1.9614, "step": 14919 }, { "epoch": 0.48137669864149013, "grad_norm": 0.3359375, "learning_rate": 1.6730393695385595e-05, "loss": 1.9762, "step": 14920 }, { "epoch": 0.4814089624952865, "grad_norm": 0.345703125, "learning_rate": 1.672883045846754e-05, "loss": 1.9597, "step": 14921 }, { "epoch": 0.4814412263490828, "grad_norm": 0.341796875, "learning_rate": 1.6727267202519765e-05, "loss": 1.9438, "step": 14922 }, { "epoch": 0.48147349020287916, "grad_norm": 0.34375, "learning_rate": 1.6725703927559465e-05, "loss": 1.9495, "step": 14923 }, { "epoch": 0.4815057540566755, "grad_norm": 0.33984375, "learning_rate": 1.672414063360386e-05, "loss": 1.9884, "step": 14924 }, { "epoch": 0.48153801791047185, "grad_norm": 0.341796875, "learning_rate": 1.6722577320670152e-05, "loss": 1.9627, "step": 14925 }, { "epoch": 0.4815702817642682, "grad_norm": 0.333984375, "learning_rate": 1.6721013988775543e-05, "loss": 1.9795, "step": 14926 }, { "epoch": 0.48160254561806454, "grad_norm": 0.3359375, "learning_rate": 1.6719450637937248e-05, "loss": 2.001, "step": 14927 }, { "epoch": 0.4816348094718609, "grad_norm": 0.341796875, "learning_rate": 1.6717887268172477e-05, "loss": 1.9626, "step": 14928 }, { "epoch": 0.4816670733256572, "grad_norm": 0.359375, "learning_rate": 1.6716323879498437e-05, "loss": 1.9936, "step": 14929 }, { "epoch": 0.48169933717945357, "grad_norm": 0.3359375, "learning_rate": 1.671476047193233e-05, "loss": 1.9832, "step": 14930 }, { "epoch": 0.4817316010332499, "grad_norm": 0.345703125, "learning_rate": 1.671319704549137e-05, "loss": 1.9615, "step": 14931 }, { "epoch": 0.48176386488704626, "grad_norm": 0.34375, "learning_rate": 1.671163360019277e-05, "loss": 2.0112, "step": 14932 }, { "epoch": 0.4817961287408426, "grad_norm": 0.34375, "learning_rate": 1.6710070136053734e-05, "loss": 1.9839, "step": 14933 }, { "epoch": 0.48182839259463894, "grad_norm": 0.345703125, "learning_rate": 1.670850665309147e-05, "loss": 1.9915, "step": 14934 }, { "epoch": 0.4818606564484353, "grad_norm": 0.357421875, "learning_rate": 1.6706943151323193e-05, "loss": 1.9751, "step": 14935 }, { "epoch": 0.48189292030223163, "grad_norm": 0.349609375, "learning_rate": 1.670537963076611e-05, "loss": 1.9494, "step": 14936 }, { "epoch": 0.481925184156028, "grad_norm": 0.349609375, "learning_rate": 1.670381609143743e-05, "loss": 1.9382, "step": 14937 }, { "epoch": 0.4819574480098243, "grad_norm": 0.341796875, "learning_rate": 1.6702252533354363e-05, "loss": 1.9153, "step": 14938 }, { "epoch": 0.48198971186362066, "grad_norm": 0.373046875, "learning_rate": 1.670068895653413e-05, "loss": 1.9855, "step": 14939 }, { "epoch": 0.48202197571741706, "grad_norm": 0.34765625, "learning_rate": 1.6699125360993925e-05, "loss": 1.962, "step": 14940 }, { "epoch": 0.4820542395712134, "grad_norm": 0.369140625, "learning_rate": 1.669756174675097e-05, "loss": 1.9903, "step": 14941 }, { "epoch": 0.48208650342500975, "grad_norm": 0.345703125, "learning_rate": 1.6695998113822473e-05, "loss": 1.9757, "step": 14942 }, { "epoch": 0.4821187672788061, "grad_norm": 0.34765625, "learning_rate": 1.6694434462225642e-05, "loss": 1.9667, "step": 14943 }, { "epoch": 0.48215103113260244, "grad_norm": 0.337890625, "learning_rate": 1.6692870791977695e-05, "loss": 1.9924, "step": 14944 }, { "epoch": 0.4821832949863988, "grad_norm": 0.35546875, "learning_rate": 1.6691307103095842e-05, "loss": 1.9749, "step": 14945 }, { "epoch": 0.4822155588401951, "grad_norm": 0.37109375, "learning_rate": 1.668974339559729e-05, "loss": 1.9621, "step": 14946 }, { "epoch": 0.48224782269399147, "grad_norm": 0.341796875, "learning_rate": 1.668817966949926e-05, "loss": 1.984, "step": 14947 }, { "epoch": 0.4822800865477878, "grad_norm": 0.37109375, "learning_rate": 1.6686615924818956e-05, "loss": 1.9973, "step": 14948 }, { "epoch": 0.48231235040158416, "grad_norm": 0.345703125, "learning_rate": 1.668505216157359e-05, "loss": 2.0148, "step": 14949 }, { "epoch": 0.4823446142553805, "grad_norm": 0.388671875, "learning_rate": 1.668348837978039e-05, "loss": 1.9988, "step": 14950 }, { "epoch": 0.48237687810917684, "grad_norm": 0.3828125, "learning_rate": 1.6681924579456546e-05, "loss": 1.9595, "step": 14951 }, { "epoch": 0.4824091419629732, "grad_norm": 0.3828125, "learning_rate": 1.668036076061929e-05, "loss": 1.9943, "step": 14952 }, { "epoch": 0.48244140581676953, "grad_norm": 0.380859375, "learning_rate": 1.6678796923285827e-05, "loss": 1.9347, "step": 14953 }, { "epoch": 0.4824736696705659, "grad_norm": 0.361328125, "learning_rate": 1.667723306747337e-05, "loss": 1.9704, "step": 14954 }, { "epoch": 0.4825059335243622, "grad_norm": 0.36328125, "learning_rate": 1.6675669193199134e-05, "loss": 1.9395, "step": 14955 }, { "epoch": 0.48253819737815856, "grad_norm": 0.361328125, "learning_rate": 1.6674105300480337e-05, "loss": 1.9854, "step": 14956 }, { "epoch": 0.4825704612319549, "grad_norm": 0.376953125, "learning_rate": 1.6672541389334187e-05, "loss": 2.0018, "step": 14957 }, { "epoch": 0.48260272508575125, "grad_norm": 0.353515625, "learning_rate": 1.66709774597779e-05, "loss": 1.9632, "step": 14958 }, { "epoch": 0.4826349889395476, "grad_norm": 0.388671875, "learning_rate": 1.6669413511828693e-05, "loss": 1.9785, "step": 14959 }, { "epoch": 0.482667252793344, "grad_norm": 0.359375, "learning_rate": 1.666784954550378e-05, "loss": 1.9452, "step": 14960 }, { "epoch": 0.48269951664714034, "grad_norm": 0.349609375, "learning_rate": 1.666628556082038e-05, "loss": 1.9473, "step": 14961 }, { "epoch": 0.4827317805009367, "grad_norm": 0.373046875, "learning_rate": 1.66647215577957e-05, "loss": 1.942, "step": 14962 }, { "epoch": 0.482764044354733, "grad_norm": 0.359375, "learning_rate": 1.6663157536446957e-05, "loss": 1.9816, "step": 14963 }, { "epoch": 0.48279630820852937, "grad_norm": 0.369140625, "learning_rate": 1.6661593496791377e-05, "loss": 1.9771, "step": 14964 }, { "epoch": 0.4828285720623257, "grad_norm": 0.35546875, "learning_rate": 1.666002943884616e-05, "loss": 1.9637, "step": 14965 }, { "epoch": 0.48286083591612206, "grad_norm": 0.345703125, "learning_rate": 1.665846536262853e-05, "loss": 1.9785, "step": 14966 }, { "epoch": 0.4828930997699184, "grad_norm": 0.345703125, "learning_rate": 1.665690126815571e-05, "loss": 2.0082, "step": 14967 }, { "epoch": 0.48292536362371474, "grad_norm": 0.375, "learning_rate": 1.6655337155444902e-05, "loss": 1.9926, "step": 14968 }, { "epoch": 0.4829576274775111, "grad_norm": 0.349609375, "learning_rate": 1.6653773024513333e-05, "loss": 1.9933, "step": 14969 }, { "epoch": 0.48298989133130743, "grad_norm": 0.36328125, "learning_rate": 1.665220887537822e-05, "loss": 2.0087, "step": 14970 }, { "epoch": 0.4830221551851038, "grad_norm": 0.349609375, "learning_rate": 1.665064470805677e-05, "loss": 1.982, "step": 14971 }, { "epoch": 0.4830544190389001, "grad_norm": 0.357421875, "learning_rate": 1.664908052256621e-05, "loss": 1.9727, "step": 14972 }, { "epoch": 0.48308668289269646, "grad_norm": 0.345703125, "learning_rate": 1.6647516318923758e-05, "loss": 1.9835, "step": 14973 }, { "epoch": 0.4831189467464928, "grad_norm": 0.3671875, "learning_rate": 1.6645952097146624e-05, "loss": 2.0093, "step": 14974 }, { "epoch": 0.48315121060028915, "grad_norm": 0.365234375, "learning_rate": 1.6644387857252034e-05, "loss": 1.9762, "step": 14975 }, { "epoch": 0.4831834744540855, "grad_norm": 0.35546875, "learning_rate": 1.66428235992572e-05, "loss": 1.9698, "step": 14976 }, { "epoch": 0.48321573830788184, "grad_norm": 0.353515625, "learning_rate": 1.6641259323179343e-05, "loss": 1.9994, "step": 14977 }, { "epoch": 0.4832480021616782, "grad_norm": 0.357421875, "learning_rate": 1.6639695029035682e-05, "loss": 1.9894, "step": 14978 }, { "epoch": 0.4832802660154745, "grad_norm": 0.369140625, "learning_rate": 1.663813071684343e-05, "loss": 1.9767, "step": 14979 }, { "epoch": 0.4833125298692709, "grad_norm": 0.34765625, "learning_rate": 1.6636566386619813e-05, "loss": 1.9856, "step": 14980 }, { "epoch": 0.48334479372306727, "grad_norm": 0.359375, "learning_rate": 1.663500203838205e-05, "loss": 1.9937, "step": 14981 }, { "epoch": 0.4833770575768636, "grad_norm": 0.345703125, "learning_rate": 1.6633437672147356e-05, "loss": 1.9797, "step": 14982 }, { "epoch": 0.48340932143065996, "grad_norm": 0.3515625, "learning_rate": 1.663187328793295e-05, "loss": 1.9671, "step": 14983 }, { "epoch": 0.4834415852844563, "grad_norm": 0.3515625, "learning_rate": 1.6630308885756052e-05, "loss": 1.991, "step": 14984 }, { "epoch": 0.48347384913825264, "grad_norm": 0.341796875, "learning_rate": 1.6628744465633892e-05, "loss": 1.9723, "step": 14985 }, { "epoch": 0.483506112992049, "grad_norm": 0.337890625, "learning_rate": 1.662718002758367e-05, "loss": 1.9477, "step": 14986 }, { "epoch": 0.48353837684584533, "grad_norm": 0.33984375, "learning_rate": 1.6625615571622624e-05, "loss": 1.9754, "step": 14987 }, { "epoch": 0.4835706406996417, "grad_norm": 0.34765625, "learning_rate": 1.6624051097767966e-05, "loss": 1.9581, "step": 14988 }, { "epoch": 0.483602904553438, "grad_norm": 0.333984375, "learning_rate": 1.6622486606036916e-05, "loss": 1.9753, "step": 14989 }, { "epoch": 0.48363516840723436, "grad_norm": 0.341796875, "learning_rate": 1.6620922096446704e-05, "loss": 1.9973, "step": 14990 }, { "epoch": 0.4836674322610307, "grad_norm": 0.369140625, "learning_rate": 1.661935756901454e-05, "loss": 1.9673, "step": 14991 }, { "epoch": 0.48369969611482705, "grad_norm": 0.375, "learning_rate": 1.6617793023757654e-05, "loss": 1.962, "step": 14992 }, { "epoch": 0.4837319599686234, "grad_norm": 0.37109375, "learning_rate": 1.661622846069326e-05, "loss": 1.9594, "step": 14993 }, { "epoch": 0.48376422382241974, "grad_norm": 0.3671875, "learning_rate": 1.6614663879838578e-05, "loss": 1.9743, "step": 14994 }, { "epoch": 0.4837964876762161, "grad_norm": 0.33984375, "learning_rate": 1.661309928121084e-05, "loss": 1.9609, "step": 14995 }, { "epoch": 0.4838287515300124, "grad_norm": 0.3515625, "learning_rate": 1.661153466482726e-05, "loss": 1.9462, "step": 14996 }, { "epoch": 0.48386101538380877, "grad_norm": 0.359375, "learning_rate": 1.6609970030705063e-05, "loss": 1.9687, "step": 14997 }, { "epoch": 0.4838932792376051, "grad_norm": 0.34765625, "learning_rate": 1.6608405378861475e-05, "loss": 1.957, "step": 14998 }, { "epoch": 0.48392554309140146, "grad_norm": 0.384765625, "learning_rate": 1.660684070931371e-05, "loss": 1.9792, "step": 14999 }, { "epoch": 0.48395780694519785, "grad_norm": 0.365234375, "learning_rate": 1.6605276022078994e-05, "loss": 1.9882, "step": 15000 }, { "epoch": 0.4839900707989942, "grad_norm": 0.3828125, "learning_rate": 1.6603711317174555e-05, "loss": 1.9293, "step": 15001 }, { "epoch": 0.48402233465279054, "grad_norm": 0.353515625, "learning_rate": 1.660214659461761e-05, "loss": 1.9782, "step": 15002 }, { "epoch": 0.4840545985065869, "grad_norm": 0.3984375, "learning_rate": 1.6600581854425386e-05, "loss": 1.9881, "step": 15003 }, { "epoch": 0.48408686236038323, "grad_norm": 0.359375, "learning_rate": 1.6599017096615103e-05, "loss": 1.9569, "step": 15004 }, { "epoch": 0.4841191262141796, "grad_norm": 0.361328125, "learning_rate": 1.659745232120399e-05, "loss": 1.9805, "step": 15005 }, { "epoch": 0.4841513900679759, "grad_norm": 0.3515625, "learning_rate": 1.6595887528209267e-05, "loss": 1.9949, "step": 15006 }, { "epoch": 0.48418365392177226, "grad_norm": 0.37109375, "learning_rate": 1.6594322717648154e-05, "loss": 1.9735, "step": 15007 }, { "epoch": 0.4842159177755686, "grad_norm": 0.35546875, "learning_rate": 1.6592757889537888e-05, "loss": 2.0194, "step": 15008 }, { "epoch": 0.48424818162936495, "grad_norm": 0.34765625, "learning_rate": 1.6591193043895682e-05, "loss": 1.9503, "step": 15009 }, { "epoch": 0.4842804454831613, "grad_norm": 0.34765625, "learning_rate": 1.6589628180738767e-05, "loss": 1.9793, "step": 15010 }, { "epoch": 0.48431270933695764, "grad_norm": 0.36328125, "learning_rate": 1.658806330008436e-05, "loss": 1.9893, "step": 15011 }, { "epoch": 0.484344973190754, "grad_norm": 0.3515625, "learning_rate": 1.6586498401949696e-05, "loss": 1.9935, "step": 15012 }, { "epoch": 0.4843772370445503, "grad_norm": 0.349609375, "learning_rate": 1.6584933486351995e-05, "loss": 1.9661, "step": 15013 }, { "epoch": 0.48440950089834667, "grad_norm": 0.34375, "learning_rate": 1.658336855330848e-05, "loss": 2.0102, "step": 15014 }, { "epoch": 0.484441764752143, "grad_norm": 0.361328125, "learning_rate": 1.6581803602836384e-05, "loss": 1.9615, "step": 15015 }, { "epoch": 0.48447402860593936, "grad_norm": 0.3515625, "learning_rate": 1.658023863495293e-05, "loss": 1.9868, "step": 15016 }, { "epoch": 0.4845062924597357, "grad_norm": 0.341796875, "learning_rate": 1.6578673649675335e-05, "loss": 1.9951, "step": 15017 }, { "epoch": 0.48453855631353204, "grad_norm": 0.359375, "learning_rate": 1.657710864702084e-05, "loss": 1.9846, "step": 15018 }, { "epoch": 0.4845708201673284, "grad_norm": 0.359375, "learning_rate": 1.6575543627006668e-05, "loss": 1.9841, "step": 15019 }, { "epoch": 0.4846030840211248, "grad_norm": 0.38671875, "learning_rate": 1.657397858965003e-05, "loss": 2.0032, "step": 15020 }, { "epoch": 0.48463534787492113, "grad_norm": 0.361328125, "learning_rate": 1.6572413534968174e-05, "loss": 1.9783, "step": 15021 }, { "epoch": 0.4846676117287175, "grad_norm": 0.37890625, "learning_rate": 1.6570848462978314e-05, "loss": 2.0047, "step": 15022 }, { "epoch": 0.4846998755825138, "grad_norm": 0.3515625, "learning_rate": 1.6569283373697683e-05, "loss": 2.0003, "step": 15023 }, { "epoch": 0.48473213943631016, "grad_norm": 0.361328125, "learning_rate": 1.6567718267143502e-05, "loss": 1.9924, "step": 15024 }, { "epoch": 0.4847644032901065, "grad_norm": 0.345703125, "learning_rate": 1.656615314333301e-05, "loss": 1.9684, "step": 15025 }, { "epoch": 0.48479666714390285, "grad_norm": 0.349609375, "learning_rate": 1.6564588002283426e-05, "loss": 1.9843, "step": 15026 }, { "epoch": 0.4848289309976992, "grad_norm": 0.34765625, "learning_rate": 1.6563022844011978e-05, "loss": 1.9775, "step": 15027 }, { "epoch": 0.48486119485149554, "grad_norm": 0.33984375, "learning_rate": 1.6561457668535895e-05, "loss": 1.9794, "step": 15028 }, { "epoch": 0.4848934587052919, "grad_norm": 0.3515625, "learning_rate": 1.6559892475872405e-05, "loss": 1.9699, "step": 15029 }, { "epoch": 0.4849257225590882, "grad_norm": 0.419921875, "learning_rate": 1.655832726603874e-05, "loss": 1.9936, "step": 15030 }, { "epoch": 0.48495798641288457, "grad_norm": 0.361328125, "learning_rate": 1.6556762039052125e-05, "loss": 1.986, "step": 15031 }, { "epoch": 0.4849902502666809, "grad_norm": 0.400390625, "learning_rate": 1.6555196794929795e-05, "loss": 1.9859, "step": 15032 }, { "epoch": 0.48502251412047725, "grad_norm": 0.353515625, "learning_rate": 1.6553631533688973e-05, "loss": 1.985, "step": 15033 }, { "epoch": 0.4850547779742736, "grad_norm": 0.37109375, "learning_rate": 1.655206625534688e-05, "loss": 1.9955, "step": 15034 }, { "epoch": 0.48508704182806994, "grad_norm": 0.34765625, "learning_rate": 1.6550500959920765e-05, "loss": 1.9914, "step": 15035 }, { "epoch": 0.4851193056818663, "grad_norm": 0.349609375, "learning_rate": 1.654893564742785e-05, "loss": 1.9498, "step": 15036 }, { "epoch": 0.48515156953566263, "grad_norm": 0.33203125, "learning_rate": 1.6547370317885358e-05, "loss": 1.9623, "step": 15037 }, { "epoch": 0.485183833389459, "grad_norm": 0.458984375, "learning_rate": 1.6545804971310524e-05, "loss": 1.9773, "step": 15038 }, { "epoch": 0.4852160972432553, "grad_norm": 0.37109375, "learning_rate": 1.6544239607720573e-05, "loss": 1.9741, "step": 15039 }, { "epoch": 0.48524836109705166, "grad_norm": 0.353515625, "learning_rate": 1.6542674227132748e-05, "loss": 1.9877, "step": 15040 }, { "epoch": 0.48528062495084806, "grad_norm": 0.353515625, "learning_rate": 1.6541108829564263e-05, "loss": 2.0042, "step": 15041 }, { "epoch": 0.4853128888046444, "grad_norm": 0.369140625, "learning_rate": 1.6539543415032362e-05, "loss": 1.985, "step": 15042 }, { "epoch": 0.48534515265844075, "grad_norm": 0.37890625, "learning_rate": 1.653797798355428e-05, "loss": 1.9827, "step": 15043 }, { "epoch": 0.4853774165122371, "grad_norm": 0.34765625, "learning_rate": 1.6536412535147226e-05, "loss": 1.9779, "step": 15044 }, { "epoch": 0.48540968036603344, "grad_norm": 0.365234375, "learning_rate": 1.6534847069828452e-05, "loss": 1.9924, "step": 15045 }, { "epoch": 0.4854419442198298, "grad_norm": 0.37109375, "learning_rate": 1.6533281587615183e-05, "loss": 1.9664, "step": 15046 }, { "epoch": 0.4854742080736261, "grad_norm": 0.373046875, "learning_rate": 1.6531716088524653e-05, "loss": 1.9599, "step": 15047 }, { "epoch": 0.48550647192742247, "grad_norm": 0.3671875, "learning_rate": 1.6530150572574082e-05, "loss": 1.9919, "step": 15048 }, { "epoch": 0.4855387357812188, "grad_norm": 0.37890625, "learning_rate": 1.6528585039780718e-05, "loss": 1.9791, "step": 15049 }, { "epoch": 0.48557099963501515, "grad_norm": 0.365234375, "learning_rate": 1.652701949016179e-05, "loss": 1.9876, "step": 15050 }, { "epoch": 0.4856032634888115, "grad_norm": 0.388671875, "learning_rate": 1.652545392373452e-05, "loss": 1.9498, "step": 15051 }, { "epoch": 0.48563552734260784, "grad_norm": 0.365234375, "learning_rate": 1.6523888340516145e-05, "loss": 2.0027, "step": 15052 }, { "epoch": 0.4856677911964042, "grad_norm": 0.419921875, "learning_rate": 1.6522322740523907e-05, "loss": 1.9793, "step": 15053 }, { "epoch": 0.48570005505020053, "grad_norm": 0.3515625, "learning_rate": 1.6520757123775032e-05, "loss": 1.9718, "step": 15054 }, { "epoch": 0.4857323189039969, "grad_norm": 0.376953125, "learning_rate": 1.651919149028675e-05, "loss": 1.9918, "step": 15055 }, { "epoch": 0.4857645827577932, "grad_norm": 0.35546875, "learning_rate": 1.65176258400763e-05, "loss": 1.995, "step": 15056 }, { "epoch": 0.48579684661158956, "grad_norm": 0.369140625, "learning_rate": 1.6516060173160914e-05, "loss": 1.9796, "step": 15057 }, { "epoch": 0.4858291104653859, "grad_norm": 0.357421875, "learning_rate": 1.6514494489557823e-05, "loss": 2.0026, "step": 15058 }, { "epoch": 0.48586137431918225, "grad_norm": 0.361328125, "learning_rate": 1.6512928789284263e-05, "loss": 1.9756, "step": 15059 }, { "epoch": 0.4858936381729786, "grad_norm": 0.35546875, "learning_rate": 1.6511363072357472e-05, "loss": 1.9557, "step": 15060 }, { "epoch": 0.485925902026775, "grad_norm": 0.365234375, "learning_rate": 1.6509797338794677e-05, "loss": 1.9397, "step": 15061 }, { "epoch": 0.48595816588057134, "grad_norm": 0.34375, "learning_rate": 1.6508231588613116e-05, "loss": 1.94, "step": 15062 }, { "epoch": 0.4859904297343677, "grad_norm": 0.3515625, "learning_rate": 1.6506665821830027e-05, "loss": 2.0023, "step": 15063 }, { "epoch": 0.486022693588164, "grad_norm": 0.34765625, "learning_rate": 1.650510003846264e-05, "loss": 2.0033, "step": 15064 }, { "epoch": 0.48605495744196037, "grad_norm": 0.341796875, "learning_rate": 1.650353423852819e-05, "loss": 2.009, "step": 15065 }, { "epoch": 0.4860872212957567, "grad_norm": 0.349609375, "learning_rate": 1.650196842204392e-05, "loss": 2.016, "step": 15066 }, { "epoch": 0.48611948514955305, "grad_norm": 0.3359375, "learning_rate": 1.6500402589027055e-05, "loss": 1.9912, "step": 15067 }, { "epoch": 0.4861517490033494, "grad_norm": 0.337890625, "learning_rate": 1.649883673949483e-05, "loss": 1.9849, "step": 15068 }, { "epoch": 0.48618401285714574, "grad_norm": 0.34375, "learning_rate": 1.649727087346449e-05, "loss": 1.9964, "step": 15069 }, { "epoch": 0.4862162767109421, "grad_norm": 0.330078125, "learning_rate": 1.6495704990953264e-05, "loss": 1.9875, "step": 15070 }, { "epoch": 0.48624854056473843, "grad_norm": 0.328125, "learning_rate": 1.6494139091978395e-05, "loss": 1.9728, "step": 15071 }, { "epoch": 0.4862808044185348, "grad_norm": 0.330078125, "learning_rate": 1.6492573176557108e-05, "loss": 1.9654, "step": 15072 }, { "epoch": 0.4863130682723311, "grad_norm": 0.34765625, "learning_rate": 1.6491007244706647e-05, "loss": 1.992, "step": 15073 }, { "epoch": 0.48634533212612746, "grad_norm": 0.33984375, "learning_rate": 1.6489441296444256e-05, "loss": 1.9945, "step": 15074 }, { "epoch": 0.4863775959799238, "grad_norm": 0.333984375, "learning_rate": 1.6487875331787156e-05, "loss": 1.9635, "step": 15075 }, { "epoch": 0.48640985983372015, "grad_norm": 0.34375, "learning_rate": 1.6486309350752596e-05, "loss": 1.9682, "step": 15076 }, { "epoch": 0.4864421236875165, "grad_norm": 0.33203125, "learning_rate": 1.6484743353357804e-05, "loss": 1.9724, "step": 15077 }, { "epoch": 0.48647438754131284, "grad_norm": 0.34375, "learning_rate": 1.648317733962003e-05, "loss": 2.0054, "step": 15078 }, { "epoch": 0.4865066513951092, "grad_norm": 0.349609375, "learning_rate": 1.6481611309556497e-05, "loss": 1.9941, "step": 15079 }, { "epoch": 0.4865389152489055, "grad_norm": 0.33984375, "learning_rate": 1.648004526318445e-05, "loss": 1.9752, "step": 15080 }, { "epoch": 0.4865711791027019, "grad_norm": 0.345703125, "learning_rate": 1.6478479200521136e-05, "loss": 1.9978, "step": 15081 }, { "epoch": 0.48660344295649827, "grad_norm": 0.341796875, "learning_rate": 1.6476913121583774e-05, "loss": 1.9516, "step": 15082 }, { "epoch": 0.4866357068102946, "grad_norm": 0.349609375, "learning_rate": 1.6475347026389613e-05, "loss": 1.9711, "step": 15083 }, { "epoch": 0.48666797066409095, "grad_norm": 0.369140625, "learning_rate": 1.6473780914955893e-05, "loss": 1.9502, "step": 15084 }, { "epoch": 0.4867002345178873, "grad_norm": 0.361328125, "learning_rate": 1.6472214787299848e-05, "loss": 1.9687, "step": 15085 }, { "epoch": 0.48673249837168364, "grad_norm": 0.34765625, "learning_rate": 1.6470648643438715e-05, "loss": 2.0172, "step": 15086 }, { "epoch": 0.48676476222548, "grad_norm": 0.345703125, "learning_rate": 1.646908248338974e-05, "loss": 1.9719, "step": 15087 }, { "epoch": 0.48679702607927633, "grad_norm": 0.345703125, "learning_rate": 1.6467516307170164e-05, "loss": 1.9881, "step": 15088 }, { "epoch": 0.4868292899330727, "grad_norm": 0.33984375, "learning_rate": 1.6465950114797214e-05, "loss": 1.9858, "step": 15089 }, { "epoch": 0.486861553786869, "grad_norm": 0.345703125, "learning_rate": 1.6464383906288138e-05, "loss": 1.9786, "step": 15090 }, { "epoch": 0.48689381764066536, "grad_norm": 0.361328125, "learning_rate": 1.646281768166018e-05, "loss": 1.9671, "step": 15091 }, { "epoch": 0.4869260814944617, "grad_norm": 0.33984375, "learning_rate": 1.6461251440930574e-05, "loss": 2.0009, "step": 15092 }, { "epoch": 0.48695834534825805, "grad_norm": 0.341796875, "learning_rate": 1.645968518411655e-05, "loss": 1.9393, "step": 15093 }, { "epoch": 0.4869906092020544, "grad_norm": 0.34375, "learning_rate": 1.645811891123537e-05, "loss": 1.9688, "step": 15094 }, { "epoch": 0.48702287305585074, "grad_norm": 0.3515625, "learning_rate": 1.645655262230426e-05, "loss": 1.9824, "step": 15095 }, { "epoch": 0.4870551369096471, "grad_norm": 0.337890625, "learning_rate": 1.6454986317340463e-05, "loss": 2.0082, "step": 15096 }, { "epoch": 0.4870874007634434, "grad_norm": 0.341796875, "learning_rate": 1.645341999636122e-05, "loss": 1.9552, "step": 15097 }, { "epoch": 0.48711966461723977, "grad_norm": 0.3359375, "learning_rate": 1.6451853659383774e-05, "loss": 1.9899, "step": 15098 }, { "epoch": 0.4871519284710361, "grad_norm": 0.345703125, "learning_rate": 1.645028730642536e-05, "loss": 1.967, "step": 15099 }, { "epoch": 0.48718419232483245, "grad_norm": 0.353515625, "learning_rate": 1.6448720937503227e-05, "loss": 2.0105, "step": 15100 }, { "epoch": 0.48721645617862885, "grad_norm": 0.333984375, "learning_rate": 1.6447154552634613e-05, "loss": 1.9665, "step": 15101 }, { "epoch": 0.4872487200324252, "grad_norm": 0.34765625, "learning_rate": 1.6445588151836762e-05, "loss": 1.9881, "step": 15102 }, { "epoch": 0.48728098388622154, "grad_norm": 0.33984375, "learning_rate": 1.644402173512691e-05, "loss": 1.987, "step": 15103 }, { "epoch": 0.4873132477400179, "grad_norm": 0.3359375, "learning_rate": 1.6442455302522304e-05, "loss": 1.9637, "step": 15104 }, { "epoch": 0.48734551159381423, "grad_norm": 0.341796875, "learning_rate": 1.6440888854040187e-05, "loss": 2.0095, "step": 15105 }, { "epoch": 0.4873777754476106, "grad_norm": 0.33984375, "learning_rate": 1.6439322389697793e-05, "loss": 1.9508, "step": 15106 }, { "epoch": 0.4874100393014069, "grad_norm": 0.33984375, "learning_rate": 1.6437755909512375e-05, "loss": 1.9672, "step": 15107 }, { "epoch": 0.48744230315520326, "grad_norm": 0.33203125, "learning_rate": 1.6436189413501173e-05, "loss": 1.9668, "step": 15108 }, { "epoch": 0.4874745670089996, "grad_norm": 0.341796875, "learning_rate": 1.643462290168143e-05, "loss": 1.9662, "step": 15109 }, { "epoch": 0.48750683086279595, "grad_norm": 0.345703125, "learning_rate": 1.6433056374070376e-05, "loss": 1.9894, "step": 15110 }, { "epoch": 0.4875390947165923, "grad_norm": 0.330078125, "learning_rate": 1.643148983068528e-05, "loss": 1.9647, "step": 15111 }, { "epoch": 0.48757135857038864, "grad_norm": 0.341796875, "learning_rate": 1.6429923271543363e-05, "loss": 1.9718, "step": 15112 }, { "epoch": 0.487603622424185, "grad_norm": 0.33203125, "learning_rate": 1.6428356696661878e-05, "loss": 1.9875, "step": 15113 }, { "epoch": 0.4876358862779813, "grad_norm": 0.345703125, "learning_rate": 1.6426790106058065e-05, "loss": 2.0037, "step": 15114 }, { "epoch": 0.48766815013177767, "grad_norm": 0.345703125, "learning_rate": 1.6425223499749174e-05, "loss": 1.9683, "step": 15115 }, { "epoch": 0.487700413985574, "grad_norm": 0.34765625, "learning_rate": 1.6423656877752443e-05, "loss": 1.9684, "step": 15116 }, { "epoch": 0.48773267783937035, "grad_norm": 0.328125, "learning_rate": 1.642209024008512e-05, "loss": 1.9625, "step": 15117 }, { "epoch": 0.4877649416931667, "grad_norm": 0.35546875, "learning_rate": 1.6420523586764445e-05, "loss": 1.981, "step": 15118 }, { "epoch": 0.48779720554696304, "grad_norm": 0.341796875, "learning_rate": 1.6418956917807666e-05, "loss": 1.9837, "step": 15119 }, { "epoch": 0.4878294694007594, "grad_norm": 0.337890625, "learning_rate": 1.6417390233232025e-05, "loss": 1.9386, "step": 15120 }, { "epoch": 0.48786173325455573, "grad_norm": 0.33984375, "learning_rate": 1.6415823533054774e-05, "loss": 1.9766, "step": 15121 }, { "epoch": 0.48789399710835213, "grad_norm": 0.34375, "learning_rate": 1.6414256817293152e-05, "loss": 1.9894, "step": 15122 }, { "epoch": 0.4879262609621485, "grad_norm": 0.353515625, "learning_rate": 1.6412690085964407e-05, "loss": 1.9973, "step": 15123 }, { "epoch": 0.4879585248159448, "grad_norm": 0.3515625, "learning_rate": 1.6411123339085777e-05, "loss": 1.989, "step": 15124 }, { "epoch": 0.48799078866974116, "grad_norm": 0.345703125, "learning_rate": 1.640955657667452e-05, "loss": 1.9744, "step": 15125 }, { "epoch": 0.4880230525235375, "grad_norm": 0.345703125, "learning_rate": 1.6407989798747874e-05, "loss": 1.9787, "step": 15126 }, { "epoch": 0.48805531637733385, "grad_norm": 0.3515625, "learning_rate": 1.6406423005323086e-05, "loss": 2.0077, "step": 15127 }, { "epoch": 0.4880875802311302, "grad_norm": 0.3515625, "learning_rate": 1.6404856196417402e-05, "loss": 1.9698, "step": 15128 }, { "epoch": 0.48811984408492654, "grad_norm": 0.35546875, "learning_rate": 1.6403289372048068e-05, "loss": 2.0082, "step": 15129 }, { "epoch": 0.4881521079387229, "grad_norm": 0.353515625, "learning_rate": 1.6401722532232332e-05, "loss": 1.9729, "step": 15130 }, { "epoch": 0.4881843717925192, "grad_norm": 0.353515625, "learning_rate": 1.6400155676987437e-05, "loss": 1.9846, "step": 15131 }, { "epoch": 0.48821663564631557, "grad_norm": 0.34765625, "learning_rate": 1.639858880633064e-05, "loss": 2.0176, "step": 15132 }, { "epoch": 0.4882488995001119, "grad_norm": 0.35546875, "learning_rate": 1.6397021920279176e-05, "loss": 1.9615, "step": 15133 }, { "epoch": 0.48828116335390825, "grad_norm": 0.34375, "learning_rate": 1.6395455018850294e-05, "loss": 1.9887, "step": 15134 }, { "epoch": 0.4883134272077046, "grad_norm": 0.337890625, "learning_rate": 1.6393888102061252e-05, "loss": 1.9526, "step": 15135 }, { "epoch": 0.48834569106150094, "grad_norm": 0.361328125, "learning_rate": 1.6392321169929283e-05, "loss": 1.9965, "step": 15136 }, { "epoch": 0.4883779549152973, "grad_norm": 0.3359375, "learning_rate": 1.6390754222471645e-05, "loss": 1.993, "step": 15137 }, { "epoch": 0.48841021876909363, "grad_norm": 0.375, "learning_rate": 1.638918725970558e-05, "loss": 2.0163, "step": 15138 }, { "epoch": 0.48844248262289, "grad_norm": 0.35546875, "learning_rate": 1.6387620281648338e-05, "loss": 1.9875, "step": 15139 }, { "epoch": 0.4884747464766863, "grad_norm": 0.3984375, "learning_rate": 1.6386053288317175e-05, "loss": 2.002, "step": 15140 }, { "epoch": 0.48850701033048266, "grad_norm": 0.349609375, "learning_rate": 1.6384486279729318e-05, "loss": 1.9752, "step": 15141 }, { "epoch": 0.48853927418427906, "grad_norm": 0.392578125, "learning_rate": 1.638291925590204e-05, "loss": 1.9695, "step": 15142 }, { "epoch": 0.4885715380380754, "grad_norm": 0.37109375, "learning_rate": 1.6381352216852575e-05, "loss": 1.9429, "step": 15143 }, { "epoch": 0.48860380189187175, "grad_norm": 0.34765625, "learning_rate": 1.6379785162598177e-05, "loss": 1.9771, "step": 15144 }, { "epoch": 0.4886360657456681, "grad_norm": 0.38671875, "learning_rate": 1.6378218093156092e-05, "loss": 1.9513, "step": 15145 }, { "epoch": 0.48866832959946443, "grad_norm": 0.34765625, "learning_rate": 1.6376651008543575e-05, "loss": 1.9639, "step": 15146 }, { "epoch": 0.4887005934532608, "grad_norm": 0.353515625, "learning_rate": 1.6375083908777867e-05, "loss": 1.9902, "step": 15147 }, { "epoch": 0.4887328573070571, "grad_norm": 0.42578125, "learning_rate": 1.637351679387622e-05, "loss": 2.0051, "step": 15148 }, { "epoch": 0.48876512116085347, "grad_norm": 0.345703125, "learning_rate": 1.637194966385589e-05, "loss": 2.0049, "step": 15149 }, { "epoch": 0.4887973850146498, "grad_norm": 0.3984375, "learning_rate": 1.6370382518734123e-05, "loss": 1.9944, "step": 15150 }, { "epoch": 0.48882964886844615, "grad_norm": 0.349609375, "learning_rate": 1.6368815358528165e-05, "loss": 2.0049, "step": 15151 }, { "epoch": 0.4888619127222425, "grad_norm": 0.375, "learning_rate": 1.6367248183255273e-05, "loss": 1.9738, "step": 15152 }, { "epoch": 0.48889417657603884, "grad_norm": 0.37109375, "learning_rate": 1.6365680992932692e-05, "loss": 1.9788, "step": 15153 }, { "epoch": 0.4889264404298352, "grad_norm": 0.357421875, "learning_rate": 1.6364113787577676e-05, "loss": 2.0178, "step": 15154 }, { "epoch": 0.48895870428363153, "grad_norm": 0.376953125, "learning_rate": 1.636254656720747e-05, "loss": 1.9659, "step": 15155 }, { "epoch": 0.4889909681374279, "grad_norm": 0.333984375, "learning_rate": 1.636097933183933e-05, "loss": 1.9753, "step": 15156 }, { "epoch": 0.4890232319912242, "grad_norm": 0.353515625, "learning_rate": 1.635941208149051e-05, "loss": 1.9948, "step": 15157 }, { "epoch": 0.48905549584502056, "grad_norm": 0.34765625, "learning_rate": 1.6357844816178252e-05, "loss": 2.0266, "step": 15158 }, { "epoch": 0.4890877596988169, "grad_norm": 0.345703125, "learning_rate": 1.6356277535919814e-05, "loss": 1.9888, "step": 15159 }, { "epoch": 0.48912002355261325, "grad_norm": 0.33203125, "learning_rate": 1.6354710240732447e-05, "loss": 1.9882, "step": 15160 }, { "epoch": 0.4891522874064096, "grad_norm": 0.349609375, "learning_rate": 1.63531429306334e-05, "loss": 1.9992, "step": 15161 }, { "epoch": 0.489184551260206, "grad_norm": 0.34375, "learning_rate": 1.6351575605639927e-05, "loss": 1.9782, "step": 15162 }, { "epoch": 0.48921681511400233, "grad_norm": 0.349609375, "learning_rate": 1.6350008265769282e-05, "loss": 1.9986, "step": 15163 }, { "epoch": 0.4892490789677987, "grad_norm": 0.353515625, "learning_rate": 1.634844091103871e-05, "loss": 1.9908, "step": 15164 }, { "epoch": 0.489281342821595, "grad_norm": 0.337890625, "learning_rate": 1.6346873541465466e-05, "loss": 1.9201, "step": 15165 }, { "epoch": 0.48931360667539137, "grad_norm": 0.36328125, "learning_rate": 1.6345306157066814e-05, "loss": 1.972, "step": 15166 }, { "epoch": 0.4893458705291877, "grad_norm": 0.34765625, "learning_rate": 1.634373875785999e-05, "loss": 1.9896, "step": 15167 }, { "epoch": 0.48937813438298405, "grad_norm": 0.359375, "learning_rate": 1.634217134386226e-05, "loss": 1.9802, "step": 15168 }, { "epoch": 0.4894103982367804, "grad_norm": 0.36328125, "learning_rate": 1.634060391509086e-05, "loss": 1.9817, "step": 15169 }, { "epoch": 0.48944266209057674, "grad_norm": 0.361328125, "learning_rate": 1.6339036471563058e-05, "loss": 1.9708, "step": 15170 }, { "epoch": 0.4894749259443731, "grad_norm": 0.3359375, "learning_rate": 1.6337469013296106e-05, "loss": 1.9479, "step": 15171 }, { "epoch": 0.48950718979816943, "grad_norm": 0.35546875, "learning_rate": 1.633590154030725e-05, "loss": 1.9757, "step": 15172 }, { "epoch": 0.4895394536519658, "grad_norm": 0.361328125, "learning_rate": 1.6334334052613753e-05, "loss": 2.0029, "step": 15173 }, { "epoch": 0.4895717175057621, "grad_norm": 0.357421875, "learning_rate": 1.6332766550232868e-05, "loss": 1.961, "step": 15174 }, { "epoch": 0.48960398135955846, "grad_norm": 0.349609375, "learning_rate": 1.6331199033181836e-05, "loss": 1.9631, "step": 15175 }, { "epoch": 0.4896362452133548, "grad_norm": 0.34765625, "learning_rate": 1.632963150147792e-05, "loss": 1.9572, "step": 15176 }, { "epoch": 0.48966850906715115, "grad_norm": 0.33984375, "learning_rate": 1.6328063955138383e-05, "loss": 1.9704, "step": 15177 }, { "epoch": 0.4897007729209475, "grad_norm": 0.341796875, "learning_rate": 1.6326496394180466e-05, "loss": 1.9653, "step": 15178 }, { "epoch": 0.48973303677474384, "grad_norm": 0.34375, "learning_rate": 1.6324928818621423e-05, "loss": 1.9893, "step": 15179 }, { "epoch": 0.4897653006285402, "grad_norm": 0.34375, "learning_rate": 1.6323361228478522e-05, "loss": 1.9679, "step": 15180 }, { "epoch": 0.4897975644823365, "grad_norm": 0.3515625, "learning_rate": 1.6321793623769002e-05, "loss": 1.8883, "step": 15181 }, { "epoch": 0.4898298283361329, "grad_norm": 0.36328125, "learning_rate": 1.6320226004510133e-05, "loss": 1.9627, "step": 15182 }, { "epoch": 0.48986209218992927, "grad_norm": 0.3359375, "learning_rate": 1.6318658370719164e-05, "loss": 1.974, "step": 15183 }, { "epoch": 0.4898943560437256, "grad_norm": 0.33984375, "learning_rate": 1.6317090722413343e-05, "loss": 1.9878, "step": 15184 }, { "epoch": 0.48992661989752195, "grad_norm": 0.36328125, "learning_rate": 1.631552305960994e-05, "loss": 1.998, "step": 15185 }, { "epoch": 0.4899588837513183, "grad_norm": 0.3515625, "learning_rate": 1.6313955382326193e-05, "loss": 1.9944, "step": 15186 }, { "epoch": 0.48999114760511464, "grad_norm": 0.353515625, "learning_rate": 1.6312387690579376e-05, "loss": 1.9907, "step": 15187 }, { "epoch": 0.490023411458911, "grad_norm": 0.3515625, "learning_rate": 1.6310819984386738e-05, "loss": 1.9927, "step": 15188 }, { "epoch": 0.49005567531270733, "grad_norm": 0.349609375, "learning_rate": 1.630925226376553e-05, "loss": 1.9765, "step": 15189 }, { "epoch": 0.49008793916650367, "grad_norm": 0.337890625, "learning_rate": 1.630768452873301e-05, "loss": 2.0053, "step": 15190 }, { "epoch": 0.4901202030203, "grad_norm": 0.369140625, "learning_rate": 1.6306116779306443e-05, "loss": 1.994, "step": 15191 }, { "epoch": 0.49015246687409636, "grad_norm": 0.365234375, "learning_rate": 1.630454901550307e-05, "loss": 1.9732, "step": 15192 }, { "epoch": 0.4901847307278927, "grad_norm": 0.392578125, "learning_rate": 1.6302981237340164e-05, "loss": 1.9765, "step": 15193 }, { "epoch": 0.49021699458168905, "grad_norm": 0.37109375, "learning_rate": 1.6301413444834977e-05, "loss": 2.0034, "step": 15194 }, { "epoch": 0.4902492584354854, "grad_norm": 0.34765625, "learning_rate": 1.6299845638004765e-05, "loss": 1.9881, "step": 15195 }, { "epoch": 0.49028152228928173, "grad_norm": 0.35546875, "learning_rate": 1.629827781686678e-05, "loss": 1.9883, "step": 15196 }, { "epoch": 0.4903137861430781, "grad_norm": 0.37109375, "learning_rate": 1.629670998143829e-05, "loss": 1.9448, "step": 15197 }, { "epoch": 0.4903460499968744, "grad_norm": 0.33984375, "learning_rate": 1.6295142131736538e-05, "loss": 1.9784, "step": 15198 }, { "epoch": 0.49037831385067077, "grad_norm": 0.345703125, "learning_rate": 1.62935742677788e-05, "loss": 1.982, "step": 15199 }, { "epoch": 0.4904105777044671, "grad_norm": 0.357421875, "learning_rate": 1.6292006389582317e-05, "loss": 1.9912, "step": 15200 }, { "epoch": 0.49044284155826345, "grad_norm": 0.34375, "learning_rate": 1.629043849716436e-05, "loss": 1.986, "step": 15201 }, { "epoch": 0.4904751054120598, "grad_norm": 0.369140625, "learning_rate": 1.6288870590542176e-05, "loss": 1.9822, "step": 15202 }, { "epoch": 0.4905073692658562, "grad_norm": 0.345703125, "learning_rate": 1.6287302669733032e-05, "loss": 1.9755, "step": 15203 }, { "epoch": 0.49053963311965254, "grad_norm": 0.357421875, "learning_rate": 1.6285734734754182e-05, "loss": 1.9576, "step": 15204 }, { "epoch": 0.4905718969734489, "grad_norm": 0.337890625, "learning_rate": 1.6284166785622892e-05, "loss": 1.9719, "step": 15205 }, { "epoch": 0.49060416082724523, "grad_norm": 0.357421875, "learning_rate": 1.628259882235641e-05, "loss": 1.9951, "step": 15206 }, { "epoch": 0.49063642468104157, "grad_norm": 0.361328125, "learning_rate": 1.6281030844972002e-05, "loss": 1.9817, "step": 15207 }, { "epoch": 0.4906686885348379, "grad_norm": 0.33984375, "learning_rate": 1.6279462853486927e-05, "loss": 2.0164, "step": 15208 }, { "epoch": 0.49070095238863426, "grad_norm": 0.3671875, "learning_rate": 1.627789484791844e-05, "loss": 1.9936, "step": 15209 }, { "epoch": 0.4907332162424306, "grad_norm": 0.345703125, "learning_rate": 1.62763268282838e-05, "loss": 1.9943, "step": 15210 }, { "epoch": 0.49076548009622695, "grad_norm": 0.35546875, "learning_rate": 1.627475879460028e-05, "loss": 1.966, "step": 15211 }, { "epoch": 0.4907977439500233, "grad_norm": 0.35546875, "learning_rate": 1.6273190746885123e-05, "loss": 1.9546, "step": 15212 }, { "epoch": 0.49083000780381963, "grad_norm": 0.34765625, "learning_rate": 1.6271622685155595e-05, "loss": 2.0077, "step": 15213 }, { "epoch": 0.490862271657616, "grad_norm": 0.359375, "learning_rate": 1.627005460942896e-05, "loss": 1.9809, "step": 15214 }, { "epoch": 0.4908945355114123, "grad_norm": 0.36328125, "learning_rate": 1.6268486519722473e-05, "loss": 1.982, "step": 15215 }, { "epoch": 0.49092679936520867, "grad_norm": 0.353515625, "learning_rate": 1.62669184160534e-05, "loss": 1.9995, "step": 15216 }, { "epoch": 0.490959063219005, "grad_norm": 0.369140625, "learning_rate": 1.6265350298438993e-05, "loss": 1.977, "step": 15217 }, { "epoch": 0.49099132707280135, "grad_norm": 0.357421875, "learning_rate": 1.6263782166896518e-05, "loss": 1.9508, "step": 15218 }, { "epoch": 0.4910235909265977, "grad_norm": 0.349609375, "learning_rate": 1.626221402144324e-05, "loss": 1.9342, "step": 15219 }, { "epoch": 0.49105585478039404, "grad_norm": 0.357421875, "learning_rate": 1.6260645862096413e-05, "loss": 1.9859, "step": 15220 }, { "epoch": 0.4910881186341904, "grad_norm": 0.34765625, "learning_rate": 1.6259077688873297e-05, "loss": 1.9757, "step": 15221 }, { "epoch": 0.49112038248798673, "grad_norm": 0.34765625, "learning_rate": 1.6257509501791164e-05, "loss": 1.977, "step": 15222 }, { "epoch": 0.49115264634178313, "grad_norm": 0.359375, "learning_rate": 1.6255941300867265e-05, "loss": 1.9886, "step": 15223 }, { "epoch": 0.49118491019557947, "grad_norm": 0.369140625, "learning_rate": 1.625437308611886e-05, "loss": 1.9761, "step": 15224 }, { "epoch": 0.4912171740493758, "grad_norm": 0.34375, "learning_rate": 1.6252804857563226e-05, "loss": 2.0003, "step": 15225 }, { "epoch": 0.49124943790317216, "grad_norm": 0.369140625, "learning_rate": 1.6251236615217607e-05, "loss": 2.0111, "step": 15226 }, { "epoch": 0.4912817017569685, "grad_norm": 0.34375, "learning_rate": 1.6249668359099273e-05, "loss": 1.9961, "step": 15227 }, { "epoch": 0.49131396561076485, "grad_norm": 0.369140625, "learning_rate": 1.6248100089225493e-05, "loss": 2.0039, "step": 15228 }, { "epoch": 0.4913462294645612, "grad_norm": 0.328125, "learning_rate": 1.624653180561352e-05, "loss": 1.9818, "step": 15229 }, { "epoch": 0.49137849331835753, "grad_norm": 0.380859375, "learning_rate": 1.6244963508280616e-05, "loss": 2.0106, "step": 15230 }, { "epoch": 0.4914107571721539, "grad_norm": 0.35546875, "learning_rate": 1.624339519724405e-05, "loss": 1.9833, "step": 15231 }, { "epoch": 0.4914430210259502, "grad_norm": 0.37109375, "learning_rate": 1.6241826872521082e-05, "loss": 1.9767, "step": 15232 }, { "epoch": 0.49147528487974657, "grad_norm": 0.330078125, "learning_rate": 1.624025853412897e-05, "loss": 1.9595, "step": 15233 }, { "epoch": 0.4915075487335429, "grad_norm": 0.359375, "learning_rate": 1.623869018208499e-05, "loss": 1.9909, "step": 15234 }, { "epoch": 0.49153981258733925, "grad_norm": 0.33984375, "learning_rate": 1.6237121816406386e-05, "loss": 1.9832, "step": 15235 }, { "epoch": 0.4915720764411356, "grad_norm": 0.34765625, "learning_rate": 1.623555343711044e-05, "loss": 1.978, "step": 15236 }, { "epoch": 0.49160434029493194, "grad_norm": 0.3515625, "learning_rate": 1.6233985044214408e-05, "loss": 1.9855, "step": 15237 }, { "epoch": 0.4916366041487283, "grad_norm": 0.361328125, "learning_rate": 1.623241663773555e-05, "loss": 1.9624, "step": 15238 }, { "epoch": 0.49166886800252463, "grad_norm": 0.33984375, "learning_rate": 1.623084821769114e-05, "loss": 1.945, "step": 15239 }, { "epoch": 0.49170113185632097, "grad_norm": 0.353515625, "learning_rate": 1.622927978409843e-05, "loss": 1.9829, "step": 15240 }, { "epoch": 0.4917333957101173, "grad_norm": 0.353515625, "learning_rate": 1.622771133697469e-05, "loss": 2.0064, "step": 15241 }, { "epoch": 0.49176565956391366, "grad_norm": 0.341796875, "learning_rate": 1.622614287633719e-05, "loss": 1.9817, "step": 15242 }, { "epoch": 0.49179792341771006, "grad_norm": 0.357421875, "learning_rate": 1.6224574402203183e-05, "loss": 1.9872, "step": 15243 }, { "epoch": 0.4918301872715064, "grad_norm": 0.345703125, "learning_rate": 1.6223005914589948e-05, "loss": 1.9774, "step": 15244 }, { "epoch": 0.49186245112530275, "grad_norm": 0.34375, "learning_rate": 1.622143741351473e-05, "loss": 1.9792, "step": 15245 }, { "epoch": 0.4918947149790991, "grad_norm": 0.361328125, "learning_rate": 1.621986889899481e-05, "loss": 1.9715, "step": 15246 }, { "epoch": 0.49192697883289543, "grad_norm": 0.341796875, "learning_rate": 1.621830037104745e-05, "loss": 2.0298, "step": 15247 }, { "epoch": 0.4919592426866918, "grad_norm": 0.34375, "learning_rate": 1.6216731829689912e-05, "loss": 1.9762, "step": 15248 }, { "epoch": 0.4919915065404881, "grad_norm": 0.345703125, "learning_rate": 1.6215163274939463e-05, "loss": 1.9594, "step": 15249 }, { "epoch": 0.49202377039428447, "grad_norm": 0.35546875, "learning_rate": 1.6213594706813372e-05, "loss": 1.9559, "step": 15250 }, { "epoch": 0.4920560342480808, "grad_norm": 0.34765625, "learning_rate": 1.6212026125328893e-05, "loss": 1.9717, "step": 15251 }, { "epoch": 0.49208829810187715, "grad_norm": 0.35546875, "learning_rate": 1.6210457530503304e-05, "loss": 1.9896, "step": 15252 }, { "epoch": 0.4921205619556735, "grad_norm": 0.3359375, "learning_rate": 1.620888892235387e-05, "loss": 1.9984, "step": 15253 }, { "epoch": 0.49215282580946984, "grad_norm": 0.34765625, "learning_rate": 1.620732030089785e-05, "loss": 2.011, "step": 15254 }, { "epoch": 0.4921850896632662, "grad_norm": 0.34375, "learning_rate": 1.6205751666152516e-05, "loss": 1.9861, "step": 15255 }, { "epoch": 0.49221735351706253, "grad_norm": 0.333984375, "learning_rate": 1.6204183018135135e-05, "loss": 1.9674, "step": 15256 }, { "epoch": 0.49224961737085887, "grad_norm": 0.353515625, "learning_rate": 1.6202614356862968e-05, "loss": 1.9768, "step": 15257 }, { "epoch": 0.4922818812246552, "grad_norm": 0.353515625, "learning_rate": 1.6201045682353282e-05, "loss": 2.01, "step": 15258 }, { "epoch": 0.49231414507845156, "grad_norm": 0.3515625, "learning_rate": 1.6199476994623353e-05, "loss": 1.9847, "step": 15259 }, { "epoch": 0.4923464089322479, "grad_norm": 0.341796875, "learning_rate": 1.6197908293690438e-05, "loss": 1.9831, "step": 15260 }, { "epoch": 0.49237867278604425, "grad_norm": 0.34375, "learning_rate": 1.619633957957181e-05, "loss": 1.9919, "step": 15261 }, { "epoch": 0.4924109366398406, "grad_norm": 0.353515625, "learning_rate": 1.6194770852284732e-05, "loss": 1.9427, "step": 15262 }, { "epoch": 0.492443200493637, "grad_norm": 0.36328125, "learning_rate": 1.6193202111846474e-05, "loss": 1.9838, "step": 15263 }, { "epoch": 0.49247546434743333, "grad_norm": 0.345703125, "learning_rate": 1.6191633358274306e-05, "loss": 1.9402, "step": 15264 }, { "epoch": 0.4925077282012297, "grad_norm": 0.349609375, "learning_rate": 1.619006459158549e-05, "loss": 1.9548, "step": 15265 }, { "epoch": 0.492539992055026, "grad_norm": 0.34765625, "learning_rate": 1.6188495811797294e-05, "loss": 1.9865, "step": 15266 }, { "epoch": 0.49257225590882237, "grad_norm": 0.3515625, "learning_rate": 1.6186927018926995e-05, "loss": 1.9941, "step": 15267 }, { "epoch": 0.4926045197626187, "grad_norm": 0.349609375, "learning_rate": 1.6185358212991853e-05, "loss": 2.0057, "step": 15268 }, { "epoch": 0.49263678361641505, "grad_norm": 0.349609375, "learning_rate": 1.6183789394009138e-05, "loss": 1.987, "step": 15269 }, { "epoch": 0.4926690474702114, "grad_norm": 0.33203125, "learning_rate": 1.618222056199612e-05, "loss": 1.9778, "step": 15270 }, { "epoch": 0.49270131132400774, "grad_norm": 0.373046875, "learning_rate": 1.618065171697006e-05, "loss": 1.9894, "step": 15271 }, { "epoch": 0.4927335751778041, "grad_norm": 0.34765625, "learning_rate": 1.6179082858948237e-05, "loss": 1.9506, "step": 15272 }, { "epoch": 0.49276583903160043, "grad_norm": 0.33984375, "learning_rate": 1.6177513987947918e-05, "loss": 1.9774, "step": 15273 }, { "epoch": 0.49279810288539677, "grad_norm": 0.357421875, "learning_rate": 1.6175945103986364e-05, "loss": 1.9987, "step": 15274 }, { "epoch": 0.4928303667391931, "grad_norm": 0.33203125, "learning_rate": 1.6174376207080856e-05, "loss": 1.9683, "step": 15275 }, { "epoch": 0.49286263059298946, "grad_norm": 0.34375, "learning_rate": 1.6172807297248653e-05, "loss": 1.9991, "step": 15276 }, { "epoch": 0.4928948944467858, "grad_norm": 0.373046875, "learning_rate": 1.6171238374507028e-05, "loss": 1.9892, "step": 15277 }, { "epoch": 0.49292715830058215, "grad_norm": 0.341796875, "learning_rate": 1.6169669438873256e-05, "loss": 1.9982, "step": 15278 }, { "epoch": 0.4929594221543785, "grad_norm": 0.345703125, "learning_rate": 1.6168100490364597e-05, "loss": 1.9582, "step": 15279 }, { "epoch": 0.49299168600817483, "grad_norm": 0.34375, "learning_rate": 1.6166531528998327e-05, "loss": 1.9558, "step": 15280 }, { "epoch": 0.4930239498619712, "grad_norm": 0.337890625, "learning_rate": 1.6164962554791718e-05, "loss": 1.9643, "step": 15281 }, { "epoch": 0.4930562137157675, "grad_norm": 0.34375, "learning_rate": 1.6163393567762033e-05, "loss": 1.9745, "step": 15282 }, { "epoch": 0.4930884775695639, "grad_norm": 0.34375, "learning_rate": 1.616182456792655e-05, "loss": 1.9626, "step": 15283 }, { "epoch": 0.49312074142336026, "grad_norm": 0.337890625, "learning_rate": 1.6160255555302534e-05, "loss": 1.9825, "step": 15284 }, { "epoch": 0.4931530052771566, "grad_norm": 0.36328125, "learning_rate": 1.6158686529907258e-05, "loss": 1.9836, "step": 15285 }, { "epoch": 0.49318526913095295, "grad_norm": 0.35546875, "learning_rate": 1.6157117491757986e-05, "loss": 1.9957, "step": 15286 }, { "epoch": 0.4932175329847493, "grad_norm": 0.3515625, "learning_rate": 1.6155548440872002e-05, "loss": 1.9876, "step": 15287 }, { "epoch": 0.49324979683854564, "grad_norm": 0.373046875, "learning_rate": 1.6153979377266565e-05, "loss": 1.988, "step": 15288 }, { "epoch": 0.493282060692342, "grad_norm": 0.357421875, "learning_rate": 1.6152410300958957e-05, "loss": 1.972, "step": 15289 }, { "epoch": 0.4933143245461383, "grad_norm": 0.361328125, "learning_rate": 1.6150841211966438e-05, "loss": 1.9768, "step": 15290 }, { "epoch": 0.49334658839993467, "grad_norm": 0.373046875, "learning_rate": 1.6149272110306283e-05, "loss": 1.9866, "step": 15291 }, { "epoch": 0.493378852253731, "grad_norm": 0.369140625, "learning_rate": 1.614770299599577e-05, "loss": 1.9764, "step": 15292 }, { "epoch": 0.49341111610752736, "grad_norm": 0.361328125, "learning_rate": 1.6146133869052163e-05, "loss": 1.961, "step": 15293 }, { "epoch": 0.4934433799613237, "grad_norm": 0.34375, "learning_rate": 1.6144564729492736e-05, "loss": 1.9727, "step": 15294 }, { "epoch": 0.49347564381512005, "grad_norm": 0.3671875, "learning_rate": 1.6142995577334768e-05, "loss": 1.9993, "step": 15295 }, { "epoch": 0.4935079076689164, "grad_norm": 0.34765625, "learning_rate": 1.6141426412595517e-05, "loss": 1.9629, "step": 15296 }, { "epoch": 0.49354017152271273, "grad_norm": 0.376953125, "learning_rate": 1.6139857235292265e-05, "loss": 1.9603, "step": 15297 }, { "epoch": 0.4935724353765091, "grad_norm": 0.37890625, "learning_rate": 1.6138288045442282e-05, "loss": 1.9654, "step": 15298 }, { "epoch": 0.4936046992303054, "grad_norm": 0.35546875, "learning_rate": 1.6136718843062843e-05, "loss": 1.9856, "step": 15299 }, { "epoch": 0.49363696308410177, "grad_norm": 0.375, "learning_rate": 1.6135149628171216e-05, "loss": 2.0021, "step": 15300 }, { "epoch": 0.4936692269378981, "grad_norm": 0.330078125, "learning_rate": 1.613358040078468e-05, "loss": 1.9899, "step": 15301 }, { "epoch": 0.49370149079169445, "grad_norm": 0.380859375, "learning_rate": 1.61320111609205e-05, "loss": 2.0164, "step": 15302 }, { "epoch": 0.4937337546454908, "grad_norm": 0.3359375, "learning_rate": 1.6130441908595952e-05, "loss": 1.8941, "step": 15303 }, { "epoch": 0.4937660184992872, "grad_norm": 0.361328125, "learning_rate": 1.6128872643828314e-05, "loss": 1.9882, "step": 15304 }, { "epoch": 0.49379828235308354, "grad_norm": 0.34375, "learning_rate": 1.6127303366634856e-05, "loss": 1.9453, "step": 15305 }, { "epoch": 0.4938305462068799, "grad_norm": 0.373046875, "learning_rate": 1.612573407703285e-05, "loss": 1.9626, "step": 15306 }, { "epoch": 0.4938628100606762, "grad_norm": 0.330078125, "learning_rate": 1.612416477503957e-05, "loss": 2.0044, "step": 15307 }, { "epoch": 0.49389507391447257, "grad_norm": 0.392578125, "learning_rate": 1.612259546067229e-05, "loss": 1.9989, "step": 15308 }, { "epoch": 0.4939273377682689, "grad_norm": 0.333984375, "learning_rate": 1.6121026133948287e-05, "loss": 1.9913, "step": 15309 }, { "epoch": 0.49395960162206526, "grad_norm": 0.376953125, "learning_rate": 1.6119456794884835e-05, "loss": 1.9906, "step": 15310 }, { "epoch": 0.4939918654758616, "grad_norm": 0.353515625, "learning_rate": 1.6117887443499202e-05, "loss": 1.9959, "step": 15311 }, { "epoch": 0.49402412932965795, "grad_norm": 0.37890625, "learning_rate": 1.6116318079808667e-05, "loss": 1.9977, "step": 15312 }, { "epoch": 0.4940563931834543, "grad_norm": 0.357421875, "learning_rate": 1.6114748703830502e-05, "loss": 2.0165, "step": 15313 }, { "epoch": 0.49408865703725063, "grad_norm": 0.359375, "learning_rate": 1.6113179315581982e-05, "loss": 1.969, "step": 15314 }, { "epoch": 0.494120920891047, "grad_norm": 0.361328125, "learning_rate": 1.6111609915080384e-05, "loss": 1.9917, "step": 15315 }, { "epoch": 0.4941531847448433, "grad_norm": 0.36328125, "learning_rate": 1.611004050234299e-05, "loss": 1.9814, "step": 15316 }, { "epoch": 0.49418544859863967, "grad_norm": 0.37890625, "learning_rate": 1.6108471077387052e-05, "loss": 1.9858, "step": 15317 }, { "epoch": 0.494217712452436, "grad_norm": 0.357421875, "learning_rate": 1.6106901640229866e-05, "loss": 1.9879, "step": 15318 }, { "epoch": 0.49424997630623235, "grad_norm": 0.390625, "learning_rate": 1.6105332190888698e-05, "loss": 2.0046, "step": 15319 }, { "epoch": 0.4942822401600287, "grad_norm": 0.359375, "learning_rate": 1.6103762729380832e-05, "loss": 1.9484, "step": 15320 }, { "epoch": 0.49431450401382504, "grad_norm": 0.376953125, "learning_rate": 1.6102193255723537e-05, "loss": 1.956, "step": 15321 }, { "epoch": 0.4943467678676214, "grad_norm": 0.3515625, "learning_rate": 1.6100623769934083e-05, "loss": 1.9544, "step": 15322 }, { "epoch": 0.4943790317214177, "grad_norm": 0.369140625, "learning_rate": 1.609905427202976e-05, "loss": 1.9831, "step": 15323 }, { "epoch": 0.4944112955752141, "grad_norm": 0.3671875, "learning_rate": 1.6097484762027834e-05, "loss": 1.9823, "step": 15324 }, { "epoch": 0.49444355942901047, "grad_norm": 0.365234375, "learning_rate": 1.6095915239945576e-05, "loss": 1.9903, "step": 15325 }, { "epoch": 0.4944758232828068, "grad_norm": 0.345703125, "learning_rate": 1.6094345705800275e-05, "loss": 1.9787, "step": 15326 }, { "epoch": 0.49450808713660316, "grad_norm": 0.359375, "learning_rate": 1.6092776159609204e-05, "loss": 1.9799, "step": 15327 }, { "epoch": 0.4945403509903995, "grad_norm": 0.3515625, "learning_rate": 1.6091206601389634e-05, "loss": 1.9965, "step": 15328 }, { "epoch": 0.49457261484419585, "grad_norm": 0.34375, "learning_rate": 1.6089637031158843e-05, "loss": 1.9808, "step": 15329 }, { "epoch": 0.4946048786979922, "grad_norm": 0.345703125, "learning_rate": 1.6088067448934115e-05, "loss": 1.9934, "step": 15330 }, { "epoch": 0.49463714255178853, "grad_norm": 0.333984375, "learning_rate": 1.6086497854732715e-05, "loss": 1.9817, "step": 15331 }, { "epoch": 0.4946694064055849, "grad_norm": 0.345703125, "learning_rate": 1.6084928248571928e-05, "loss": 2.008, "step": 15332 }, { "epoch": 0.4947016702593812, "grad_norm": 0.3515625, "learning_rate": 1.608335863046903e-05, "loss": 1.9574, "step": 15333 }, { "epoch": 0.49473393411317756, "grad_norm": 0.333984375, "learning_rate": 1.6081789000441293e-05, "loss": 1.9909, "step": 15334 }, { "epoch": 0.4947661979669739, "grad_norm": 0.345703125, "learning_rate": 1.6080219358506005e-05, "loss": 1.983, "step": 15335 }, { "epoch": 0.49479846182077025, "grad_norm": 0.345703125, "learning_rate": 1.6078649704680434e-05, "loss": 1.9804, "step": 15336 }, { "epoch": 0.4948307256745666, "grad_norm": 0.34765625, "learning_rate": 1.6077080038981863e-05, "loss": 1.9921, "step": 15337 }, { "epoch": 0.49486298952836294, "grad_norm": 0.34765625, "learning_rate": 1.6075510361427566e-05, "loss": 1.9862, "step": 15338 }, { "epoch": 0.4948952533821593, "grad_norm": 0.333984375, "learning_rate": 1.607394067203482e-05, "loss": 1.9873, "step": 15339 }, { "epoch": 0.4949275172359556, "grad_norm": 0.34375, "learning_rate": 1.607237097082091e-05, "loss": 1.976, "step": 15340 }, { "epoch": 0.49495978108975197, "grad_norm": 0.349609375, "learning_rate": 1.607080125780311e-05, "loss": 1.9786, "step": 15341 }, { "epoch": 0.4949920449435483, "grad_norm": 0.341796875, "learning_rate": 1.6069231532998694e-05, "loss": 1.9879, "step": 15342 }, { "epoch": 0.49502430879734466, "grad_norm": 0.34765625, "learning_rate": 1.6067661796424947e-05, "loss": 1.9897, "step": 15343 }, { "epoch": 0.49505657265114106, "grad_norm": 0.3359375, "learning_rate": 1.6066092048099142e-05, "loss": 1.9604, "step": 15344 }, { "epoch": 0.4950888365049374, "grad_norm": 0.341796875, "learning_rate": 1.606452228803856e-05, "loss": 1.9925, "step": 15345 }, { "epoch": 0.49512110035873375, "grad_norm": 0.330078125, "learning_rate": 1.6062952516260487e-05, "loss": 1.9885, "step": 15346 }, { "epoch": 0.4951533642125301, "grad_norm": 0.35546875, "learning_rate": 1.6061382732782195e-05, "loss": 1.9892, "step": 15347 }, { "epoch": 0.49518562806632643, "grad_norm": 0.33203125, "learning_rate": 1.6059812937620954e-05, "loss": 1.9425, "step": 15348 }, { "epoch": 0.4952178919201228, "grad_norm": 0.3359375, "learning_rate": 1.605824313079406e-05, "loss": 1.9856, "step": 15349 }, { "epoch": 0.4952501557739191, "grad_norm": 0.345703125, "learning_rate": 1.6056673312318782e-05, "loss": 1.9957, "step": 15350 }, { "epoch": 0.49528241962771546, "grad_norm": 0.326171875, "learning_rate": 1.6055103482212407e-05, "loss": 1.9954, "step": 15351 }, { "epoch": 0.4953146834815118, "grad_norm": 0.359375, "learning_rate": 1.60535336404922e-05, "loss": 1.9939, "step": 15352 }, { "epoch": 0.49534694733530815, "grad_norm": 0.337890625, "learning_rate": 1.605196378717546e-05, "loss": 1.9927, "step": 15353 }, { "epoch": 0.4953792111891045, "grad_norm": 0.369140625, "learning_rate": 1.6050393922279453e-05, "loss": 2.0058, "step": 15354 }, { "epoch": 0.49541147504290084, "grad_norm": 0.34375, "learning_rate": 1.6048824045821464e-05, "loss": 1.9593, "step": 15355 }, { "epoch": 0.4954437388966972, "grad_norm": 0.34765625, "learning_rate": 1.6047254157818768e-05, "loss": 1.9944, "step": 15356 }, { "epoch": 0.4954760027504935, "grad_norm": 0.365234375, "learning_rate": 1.6045684258288658e-05, "loss": 1.9608, "step": 15357 }, { "epoch": 0.49550826660428987, "grad_norm": 0.353515625, "learning_rate": 1.6044114347248403e-05, "loss": 2.0143, "step": 15358 }, { "epoch": 0.4955405304580862, "grad_norm": 0.373046875, "learning_rate": 1.604254442471528e-05, "loss": 1.9945, "step": 15359 }, { "epoch": 0.49557279431188256, "grad_norm": 0.345703125, "learning_rate": 1.6040974490706586e-05, "loss": 1.9777, "step": 15360 }, { "epoch": 0.4956050581656789, "grad_norm": 0.375, "learning_rate": 1.6039404545239586e-05, "loss": 1.9975, "step": 15361 }, { "epoch": 0.49563732201947525, "grad_norm": 0.375, "learning_rate": 1.6037834588331562e-05, "loss": 1.9748, "step": 15362 }, { "epoch": 0.4956695858732716, "grad_norm": 0.353515625, "learning_rate": 1.603626461999981e-05, "loss": 1.9985, "step": 15363 }, { "epoch": 0.495701849727068, "grad_norm": 0.400390625, "learning_rate": 1.6034694640261594e-05, "loss": 1.9578, "step": 15364 }, { "epoch": 0.49573411358086433, "grad_norm": 0.34765625, "learning_rate": 1.60331246491342e-05, "loss": 1.9767, "step": 15365 }, { "epoch": 0.4957663774346607, "grad_norm": 0.390625, "learning_rate": 1.6031554646634915e-05, "loss": 1.9956, "step": 15366 }, { "epoch": 0.495798641288457, "grad_norm": 0.365234375, "learning_rate": 1.6029984632781015e-05, "loss": 1.9963, "step": 15367 }, { "epoch": 0.49583090514225336, "grad_norm": 0.365234375, "learning_rate": 1.6028414607589783e-05, "loss": 1.9864, "step": 15368 }, { "epoch": 0.4958631689960497, "grad_norm": 0.359375, "learning_rate": 1.60268445710785e-05, "loss": 1.9757, "step": 15369 }, { "epoch": 0.49589543284984605, "grad_norm": 0.34375, "learning_rate": 1.602527452326445e-05, "loss": 1.9652, "step": 15370 }, { "epoch": 0.4959276967036424, "grad_norm": 0.37890625, "learning_rate": 1.6023704464164912e-05, "loss": 1.9855, "step": 15371 }, { "epoch": 0.49595996055743874, "grad_norm": 0.35546875, "learning_rate": 1.602213439379717e-05, "loss": 1.9995, "step": 15372 }, { "epoch": 0.4959922244112351, "grad_norm": 0.35546875, "learning_rate": 1.6020564312178508e-05, "loss": 1.9716, "step": 15373 }, { "epoch": 0.4960244882650314, "grad_norm": 0.345703125, "learning_rate": 1.6018994219326206e-05, "loss": 1.9778, "step": 15374 }, { "epoch": 0.49605675211882777, "grad_norm": 0.34765625, "learning_rate": 1.601742411525754e-05, "loss": 2.0005, "step": 15375 }, { "epoch": 0.4960890159726241, "grad_norm": 0.3515625, "learning_rate": 1.6015853999989806e-05, "loss": 1.9952, "step": 15376 }, { "epoch": 0.49612127982642046, "grad_norm": 0.353515625, "learning_rate": 1.601428387354028e-05, "loss": 1.9829, "step": 15377 }, { "epoch": 0.4961535436802168, "grad_norm": 0.33984375, "learning_rate": 1.6012713735926245e-05, "loss": 1.9858, "step": 15378 }, { "epoch": 0.49618580753401315, "grad_norm": 0.359375, "learning_rate": 1.601114358716498e-05, "loss": 1.9906, "step": 15379 }, { "epoch": 0.4962180713878095, "grad_norm": 0.34375, "learning_rate": 1.6009573427273772e-05, "loss": 1.9917, "step": 15380 }, { "epoch": 0.49625033524160583, "grad_norm": 0.365234375, "learning_rate": 1.600800325626991e-05, "loss": 2.0121, "step": 15381 }, { "epoch": 0.4962825990954022, "grad_norm": 0.34375, "learning_rate": 1.6006433074170667e-05, "loss": 2.0008, "step": 15382 }, { "epoch": 0.4963148629491985, "grad_norm": 0.35546875, "learning_rate": 1.600486288099333e-05, "loss": 1.9635, "step": 15383 }, { "epoch": 0.49634712680299486, "grad_norm": 0.33984375, "learning_rate": 1.6003292676755186e-05, "loss": 1.9874, "step": 15384 }, { "epoch": 0.49637939065679126, "grad_norm": 0.38671875, "learning_rate": 1.6001722461473514e-05, "loss": 1.9827, "step": 15385 }, { "epoch": 0.4964116545105876, "grad_norm": 0.35546875, "learning_rate": 1.6000152235165598e-05, "loss": 1.9824, "step": 15386 }, { "epoch": 0.49644391836438395, "grad_norm": 0.39453125, "learning_rate": 1.5998581997848723e-05, "loss": 2.012, "step": 15387 }, { "epoch": 0.4964761822181803, "grad_norm": 0.35546875, "learning_rate": 1.599701174954018e-05, "loss": 1.9985, "step": 15388 }, { "epoch": 0.49650844607197664, "grad_norm": 0.35546875, "learning_rate": 1.599544149025724e-05, "loss": 1.9778, "step": 15389 }, { "epoch": 0.496540709925773, "grad_norm": 0.3515625, "learning_rate": 1.5993871220017198e-05, "loss": 1.9885, "step": 15390 }, { "epoch": 0.4965729737795693, "grad_norm": 0.396484375, "learning_rate": 1.5992300938837335e-05, "loss": 1.9799, "step": 15391 }, { "epoch": 0.49660523763336567, "grad_norm": 0.369140625, "learning_rate": 1.5990730646734935e-05, "loss": 2.0059, "step": 15392 }, { "epoch": 0.496637501487162, "grad_norm": 0.37890625, "learning_rate": 1.598916034372728e-05, "loss": 1.9815, "step": 15393 }, { "epoch": 0.49666976534095836, "grad_norm": 0.35546875, "learning_rate": 1.5987590029831662e-05, "loss": 1.9771, "step": 15394 }, { "epoch": 0.4967020291947547, "grad_norm": 0.38671875, "learning_rate": 1.598601970506536e-05, "loss": 1.9565, "step": 15395 }, { "epoch": 0.49673429304855105, "grad_norm": 0.353515625, "learning_rate": 1.598444936944566e-05, "loss": 2.0058, "step": 15396 }, { "epoch": 0.4967665569023474, "grad_norm": 0.41015625, "learning_rate": 1.598287902298984e-05, "loss": 2.0036, "step": 15397 }, { "epoch": 0.49679882075614373, "grad_norm": 0.361328125, "learning_rate": 1.5981308665715205e-05, "loss": 1.9766, "step": 15398 }, { "epoch": 0.4968310846099401, "grad_norm": 0.384765625, "learning_rate": 1.5979738297639025e-05, "loss": 1.9844, "step": 15399 }, { "epoch": 0.4968633484637364, "grad_norm": 0.35546875, "learning_rate": 1.5978167918778585e-05, "loss": 1.966, "step": 15400 }, { "epoch": 0.49689561231753276, "grad_norm": 0.3671875, "learning_rate": 1.5976597529151176e-05, "loss": 1.962, "step": 15401 }, { "epoch": 0.4969278761713291, "grad_norm": 0.345703125, "learning_rate": 1.597502712877408e-05, "loss": 1.9803, "step": 15402 }, { "epoch": 0.49696014002512545, "grad_norm": 0.36328125, "learning_rate": 1.5973456717664587e-05, "loss": 1.9808, "step": 15403 }, { "epoch": 0.4969924038789218, "grad_norm": 0.357421875, "learning_rate": 1.597188629583998e-05, "loss": 1.989, "step": 15404 }, { "epoch": 0.4970246677327182, "grad_norm": 0.337890625, "learning_rate": 1.5970315863317548e-05, "loss": 1.9266, "step": 15405 }, { "epoch": 0.49705693158651454, "grad_norm": 0.376953125, "learning_rate": 1.5968745420114574e-05, "loss": 1.9937, "step": 15406 }, { "epoch": 0.4970891954403109, "grad_norm": 0.341796875, "learning_rate": 1.5967174966248343e-05, "loss": 1.9549, "step": 15407 }, { "epoch": 0.4971214592941072, "grad_norm": 0.35546875, "learning_rate": 1.5965604501736147e-05, "loss": 1.977, "step": 15408 }, { "epoch": 0.49715372314790357, "grad_norm": 0.33984375, "learning_rate": 1.596403402659527e-05, "loss": 2.0111, "step": 15409 }, { "epoch": 0.4971859870016999, "grad_norm": 0.361328125, "learning_rate": 1.5962463540842992e-05, "loss": 1.9812, "step": 15410 }, { "epoch": 0.49721825085549626, "grad_norm": 0.328125, "learning_rate": 1.5960893044496615e-05, "loss": 1.9882, "step": 15411 }, { "epoch": 0.4972505147092926, "grad_norm": 0.353515625, "learning_rate": 1.5959322537573412e-05, "loss": 1.9755, "step": 15412 }, { "epoch": 0.49728277856308895, "grad_norm": 0.3515625, "learning_rate": 1.595775202009067e-05, "loss": 2.0139, "step": 15413 }, { "epoch": 0.4973150424168853, "grad_norm": 0.35546875, "learning_rate": 1.5956181492065687e-05, "loss": 1.9744, "step": 15414 }, { "epoch": 0.49734730627068163, "grad_norm": 0.353515625, "learning_rate": 1.5954610953515745e-05, "loss": 1.9577, "step": 15415 }, { "epoch": 0.497379570124478, "grad_norm": 0.357421875, "learning_rate": 1.5953040404458125e-05, "loss": 1.9878, "step": 15416 }, { "epoch": 0.4974118339782743, "grad_norm": 0.33984375, "learning_rate": 1.5951469844910123e-05, "loss": 1.9941, "step": 15417 }, { "epoch": 0.49744409783207066, "grad_norm": 0.341796875, "learning_rate": 1.594989927488902e-05, "loss": 1.9714, "step": 15418 }, { "epoch": 0.497476361685867, "grad_norm": 0.337890625, "learning_rate": 1.5948328694412116e-05, "loss": 1.9621, "step": 15419 }, { "epoch": 0.49750862553966335, "grad_norm": 0.337890625, "learning_rate": 1.5946758103496682e-05, "loss": 1.9221, "step": 15420 }, { "epoch": 0.4975408893934597, "grad_norm": 0.333984375, "learning_rate": 1.5945187502160016e-05, "loss": 1.9936, "step": 15421 }, { "epoch": 0.49757315324725604, "grad_norm": 0.34375, "learning_rate": 1.5943616890419406e-05, "loss": 1.9708, "step": 15422 }, { "epoch": 0.4976054171010524, "grad_norm": 0.341796875, "learning_rate": 1.5942046268292136e-05, "loss": 1.9807, "step": 15423 }, { "epoch": 0.4976376809548487, "grad_norm": 0.33203125, "learning_rate": 1.5940475635795496e-05, "loss": 1.9598, "step": 15424 }, { "epoch": 0.4976699448086451, "grad_norm": 0.341796875, "learning_rate": 1.5938904992946778e-05, "loss": 1.9986, "step": 15425 }, { "epoch": 0.49770220866244147, "grad_norm": 0.337890625, "learning_rate": 1.593733433976327e-05, "loss": 1.9779, "step": 15426 }, { "epoch": 0.4977344725162378, "grad_norm": 0.3515625, "learning_rate": 1.593576367626225e-05, "loss": 1.9772, "step": 15427 }, { "epoch": 0.49776673637003416, "grad_norm": 0.330078125, "learning_rate": 1.593419300246102e-05, "loss": 1.987, "step": 15428 }, { "epoch": 0.4977990002238305, "grad_norm": 0.357421875, "learning_rate": 1.5932622318376863e-05, "loss": 1.9975, "step": 15429 }, { "epoch": 0.49783126407762684, "grad_norm": 0.34375, "learning_rate": 1.5931051624027065e-05, "loss": 1.9376, "step": 15430 }, { "epoch": 0.4978635279314232, "grad_norm": 0.34375, "learning_rate": 1.592948091942892e-05, "loss": 1.958, "step": 15431 }, { "epoch": 0.49789579178521953, "grad_norm": 0.35546875, "learning_rate": 1.592791020459972e-05, "loss": 1.976, "step": 15432 }, { "epoch": 0.4979280556390159, "grad_norm": 0.33203125, "learning_rate": 1.5926339479556746e-05, "loss": 1.9882, "step": 15433 }, { "epoch": 0.4979603194928122, "grad_norm": 0.36328125, "learning_rate": 1.5924768744317293e-05, "loss": 1.9937, "step": 15434 }, { "epoch": 0.49799258334660856, "grad_norm": 0.345703125, "learning_rate": 1.5923197998898645e-05, "loss": 1.9947, "step": 15435 }, { "epoch": 0.4980248472004049, "grad_norm": 0.361328125, "learning_rate": 1.5921627243318106e-05, "loss": 2.0032, "step": 15436 }, { "epoch": 0.49805711105420125, "grad_norm": 0.375, "learning_rate": 1.5920056477592947e-05, "loss": 1.939, "step": 15437 }, { "epoch": 0.4980893749079976, "grad_norm": 0.345703125, "learning_rate": 1.5918485701740467e-05, "loss": 1.9852, "step": 15438 }, { "epoch": 0.49812163876179394, "grad_norm": 0.357421875, "learning_rate": 1.5916914915777957e-05, "loss": 1.9779, "step": 15439 }, { "epoch": 0.4981539026155903, "grad_norm": 0.36328125, "learning_rate": 1.5915344119722707e-05, "loss": 1.9718, "step": 15440 }, { "epoch": 0.4981861664693866, "grad_norm": 0.369140625, "learning_rate": 1.5913773313592003e-05, "loss": 1.9765, "step": 15441 }, { "epoch": 0.49821843032318297, "grad_norm": 0.390625, "learning_rate": 1.591220249740314e-05, "loss": 1.9865, "step": 15442 }, { "epoch": 0.4982506941769793, "grad_norm": 0.3515625, "learning_rate": 1.5910631671173405e-05, "loss": 1.9963, "step": 15443 }, { "epoch": 0.49828295803077566, "grad_norm": 0.373046875, "learning_rate": 1.590906083492009e-05, "loss": 1.9802, "step": 15444 }, { "epoch": 0.49831522188457206, "grad_norm": 0.376953125, "learning_rate": 1.5907489988660484e-05, "loss": 1.9596, "step": 15445 }, { "epoch": 0.4983474857383684, "grad_norm": 0.365234375, "learning_rate": 1.590591913241188e-05, "loss": 1.9747, "step": 15446 }, { "epoch": 0.49837974959216474, "grad_norm": 0.3671875, "learning_rate": 1.590434826619157e-05, "loss": 1.9796, "step": 15447 }, { "epoch": 0.4984120134459611, "grad_norm": 0.375, "learning_rate": 1.590277739001684e-05, "loss": 1.9891, "step": 15448 }, { "epoch": 0.49844427729975743, "grad_norm": 0.361328125, "learning_rate": 1.590120650390499e-05, "loss": 1.9899, "step": 15449 }, { "epoch": 0.4984765411535538, "grad_norm": 0.349609375, "learning_rate": 1.5899635607873298e-05, "loss": 1.9816, "step": 15450 }, { "epoch": 0.4985088050073501, "grad_norm": 0.353515625, "learning_rate": 1.5898064701939064e-05, "loss": 1.9716, "step": 15451 }, { "epoch": 0.49854106886114646, "grad_norm": 0.33984375, "learning_rate": 1.5896493786119583e-05, "loss": 1.9621, "step": 15452 }, { "epoch": 0.4985733327149428, "grad_norm": 0.34375, "learning_rate": 1.5894922860432137e-05, "loss": 1.973, "step": 15453 }, { "epoch": 0.49860559656873915, "grad_norm": 0.341796875, "learning_rate": 1.5893351924894024e-05, "loss": 1.9906, "step": 15454 }, { "epoch": 0.4986378604225355, "grad_norm": 0.341796875, "learning_rate": 1.5891780979522532e-05, "loss": 1.9809, "step": 15455 }, { "epoch": 0.49867012427633184, "grad_norm": 0.349609375, "learning_rate": 1.5890210024334958e-05, "loss": 1.9852, "step": 15456 }, { "epoch": 0.4987023881301282, "grad_norm": 0.357421875, "learning_rate": 1.5888639059348592e-05, "loss": 1.9934, "step": 15457 }, { "epoch": 0.4987346519839245, "grad_norm": 0.345703125, "learning_rate": 1.588706808458072e-05, "loss": 1.9957, "step": 15458 }, { "epoch": 0.49876691583772087, "grad_norm": 0.359375, "learning_rate": 1.5885497100048636e-05, "loss": 1.9807, "step": 15459 }, { "epoch": 0.4987991796915172, "grad_norm": 0.361328125, "learning_rate": 1.5883926105769644e-05, "loss": 2.0094, "step": 15460 }, { "epoch": 0.49883144354531356, "grad_norm": 0.341796875, "learning_rate": 1.5882355101761023e-05, "loss": 2.0055, "step": 15461 }, { "epoch": 0.4988637073991099, "grad_norm": 0.37890625, "learning_rate": 1.5880784088040068e-05, "loss": 1.9599, "step": 15462 }, { "epoch": 0.49889597125290625, "grad_norm": 0.361328125, "learning_rate": 1.5879213064624078e-05, "loss": 2.0119, "step": 15463 }, { "epoch": 0.4989282351067026, "grad_norm": 0.359375, "learning_rate": 1.5877642031530337e-05, "loss": 1.9909, "step": 15464 }, { "epoch": 0.498960498960499, "grad_norm": 0.3671875, "learning_rate": 1.5876070988776145e-05, "loss": 1.9778, "step": 15465 }, { "epoch": 0.49899276281429533, "grad_norm": 0.39453125, "learning_rate": 1.587449993637879e-05, "loss": 1.9776, "step": 15466 }, { "epoch": 0.4990250266680917, "grad_norm": 0.337890625, "learning_rate": 1.587292887435557e-05, "loss": 1.9595, "step": 15467 }, { "epoch": 0.499057290521888, "grad_norm": 0.380859375, "learning_rate": 1.5871357802723777e-05, "loss": 2.0021, "step": 15468 }, { "epoch": 0.49908955437568436, "grad_norm": 0.333984375, "learning_rate": 1.5869786721500693e-05, "loss": 1.9669, "step": 15469 }, { "epoch": 0.4991218182294807, "grad_norm": 0.373046875, "learning_rate": 1.5868215630703632e-05, "loss": 1.9824, "step": 15470 }, { "epoch": 0.49915408208327705, "grad_norm": 0.34765625, "learning_rate": 1.586664453034987e-05, "loss": 1.9545, "step": 15471 }, { "epoch": 0.4991863459370734, "grad_norm": 0.341796875, "learning_rate": 1.586507342045671e-05, "loss": 1.9639, "step": 15472 }, { "epoch": 0.49921860979086974, "grad_norm": 0.337890625, "learning_rate": 1.586350230104144e-05, "loss": 1.9616, "step": 15473 }, { "epoch": 0.4992508736446661, "grad_norm": 0.3515625, "learning_rate": 1.5861931172121357e-05, "loss": 1.9828, "step": 15474 }, { "epoch": 0.4992831374984624, "grad_norm": 0.33203125, "learning_rate": 1.586036003371375e-05, "loss": 1.9652, "step": 15475 }, { "epoch": 0.49931540135225877, "grad_norm": 0.33984375, "learning_rate": 1.585878888583592e-05, "loss": 2.0064, "step": 15476 }, { "epoch": 0.4993476652060551, "grad_norm": 0.349609375, "learning_rate": 1.585721772850516e-05, "loss": 1.9685, "step": 15477 }, { "epoch": 0.49937992905985146, "grad_norm": 0.337890625, "learning_rate": 1.5855646561738763e-05, "loss": 2.0116, "step": 15478 }, { "epoch": 0.4994121929136478, "grad_norm": 0.337890625, "learning_rate": 1.5854075385554015e-05, "loss": 1.9809, "step": 15479 }, { "epoch": 0.49944445676744414, "grad_norm": 0.341796875, "learning_rate": 1.5852504199968225e-05, "loss": 1.9844, "step": 15480 }, { "epoch": 0.4994767206212405, "grad_norm": 0.34375, "learning_rate": 1.585093300499868e-05, "loss": 1.9774, "step": 15481 }, { "epoch": 0.49950898447503683, "grad_norm": 0.3515625, "learning_rate": 1.5849361800662672e-05, "loss": 1.9495, "step": 15482 }, { "epoch": 0.4995412483288332, "grad_norm": 0.34765625, "learning_rate": 1.5847790586977502e-05, "loss": 2.0085, "step": 15483 }, { "epoch": 0.4995735121826295, "grad_norm": 0.35546875, "learning_rate": 1.584621936396046e-05, "loss": 2.0027, "step": 15484 }, { "epoch": 0.49960577603642586, "grad_norm": 0.3515625, "learning_rate": 1.584464813162884e-05, "loss": 1.9655, "step": 15485 }, { "epoch": 0.49963803989022226, "grad_norm": 0.353515625, "learning_rate": 1.584307688999994e-05, "loss": 1.9905, "step": 15486 }, { "epoch": 0.4996703037440186, "grad_norm": 0.36328125, "learning_rate": 1.5841505639091056e-05, "loss": 1.9877, "step": 15487 }, { "epoch": 0.49970256759781495, "grad_norm": 0.35546875, "learning_rate": 1.583993437891948e-05, "loss": 1.981, "step": 15488 }, { "epoch": 0.4997348314516113, "grad_norm": 0.34765625, "learning_rate": 1.583836310950251e-05, "loss": 1.9596, "step": 15489 }, { "epoch": 0.49976709530540764, "grad_norm": 0.3359375, "learning_rate": 1.5836791830857436e-05, "loss": 1.9824, "step": 15490 }, { "epoch": 0.499799359159204, "grad_norm": 0.345703125, "learning_rate": 1.5835220543001565e-05, "loss": 1.9569, "step": 15491 }, { "epoch": 0.4998316230130003, "grad_norm": 0.36328125, "learning_rate": 1.5833649245952182e-05, "loss": 1.9816, "step": 15492 }, { "epoch": 0.49986388686679667, "grad_norm": 0.345703125, "learning_rate": 1.583207793972659e-05, "loss": 1.9729, "step": 15493 }, { "epoch": 0.499896150720593, "grad_norm": 0.34375, "learning_rate": 1.5830506624342077e-05, "loss": 1.9844, "step": 15494 }, { "epoch": 0.49992841457438936, "grad_norm": 0.33984375, "learning_rate": 1.582893529981594e-05, "loss": 1.9745, "step": 15495 }, { "epoch": 0.4999606784281857, "grad_norm": 0.34375, "learning_rate": 1.582736396616548e-05, "loss": 1.9799, "step": 15496 }, { "epoch": 0.49999294228198204, "grad_norm": 0.341796875, "learning_rate": 1.5825792623407997e-05, "loss": 1.9694, "step": 15497 }, { "epoch": 0.5000252061357784, "grad_norm": 0.333984375, "learning_rate": 1.5824221271560773e-05, "loss": 1.9728, "step": 15498 }, { "epoch": 0.5000574699895748, "grad_norm": 0.3515625, "learning_rate": 1.582264991064112e-05, "loss": 1.9949, "step": 15499 }, { "epoch": 0.5000897338433711, "grad_norm": 0.33984375, "learning_rate": 1.5821078540666324e-05, "loss": 1.9922, "step": 15500 }, { "epoch": 0.5001219976971675, "grad_norm": 0.33203125, "learning_rate": 1.581950716165368e-05, "loss": 2.0005, "step": 15501 }, { "epoch": 0.5001542615509638, "grad_norm": 0.3359375, "learning_rate": 1.5817935773620496e-05, "loss": 1.9898, "step": 15502 }, { "epoch": 0.5001865254047602, "grad_norm": 0.35546875, "learning_rate": 1.5816364376584057e-05, "loss": 1.9857, "step": 15503 }, { "epoch": 0.5002187892585565, "grad_norm": 0.34765625, "learning_rate": 1.5814792970561665e-05, "loss": 1.9939, "step": 15504 }, { "epoch": 0.5002510531123529, "grad_norm": 0.345703125, "learning_rate": 1.581322155557062e-05, "loss": 1.9556, "step": 15505 }, { "epoch": 0.5002833169661491, "grad_norm": 0.361328125, "learning_rate": 1.581165013162821e-05, "loss": 1.9652, "step": 15506 }, { "epoch": 0.5003155808199455, "grad_norm": 0.359375, "learning_rate": 1.581007869875174e-05, "loss": 2.0166, "step": 15507 }, { "epoch": 0.5003478446737418, "grad_norm": 0.333984375, "learning_rate": 1.580850725695851e-05, "loss": 1.9827, "step": 15508 }, { "epoch": 0.5003801085275382, "grad_norm": 0.36328125, "learning_rate": 1.5806935806265806e-05, "loss": 2.0193, "step": 15509 }, { "epoch": 0.5004123723813345, "grad_norm": 0.341796875, "learning_rate": 1.5805364346690932e-05, "loss": 1.9916, "step": 15510 }, { "epoch": 0.5004446362351309, "grad_norm": 0.341796875, "learning_rate": 1.5803792878251188e-05, "loss": 1.9783, "step": 15511 }, { "epoch": 0.5004769000889272, "grad_norm": 0.34375, "learning_rate": 1.5802221400963866e-05, "loss": 1.9694, "step": 15512 }, { "epoch": 0.5005091639427236, "grad_norm": 0.345703125, "learning_rate": 1.580064991484627e-05, "loss": 1.9705, "step": 15513 }, { "epoch": 0.5005414277965199, "grad_norm": 0.349609375, "learning_rate": 1.579907841991569e-05, "loss": 1.9814, "step": 15514 }, { "epoch": 0.5005736916503163, "grad_norm": 0.359375, "learning_rate": 1.5797506916189425e-05, "loss": 1.9708, "step": 15515 }, { "epoch": 0.5006059555041127, "grad_norm": 0.3984375, "learning_rate": 1.579593540368478e-05, "loss": 1.9806, "step": 15516 }, { "epoch": 0.500638219357909, "grad_norm": 0.3359375, "learning_rate": 1.579436388241905e-05, "loss": 1.963, "step": 15517 }, { "epoch": 0.5006704832117054, "grad_norm": 0.376953125, "learning_rate": 1.5792792352409535e-05, "loss": 1.965, "step": 15518 }, { "epoch": 0.5007027470655017, "grad_norm": 0.33984375, "learning_rate": 1.5791220813673527e-05, "loss": 1.9602, "step": 15519 }, { "epoch": 0.5007350109192981, "grad_norm": 0.369140625, "learning_rate": 1.5789649266228326e-05, "loss": 1.9795, "step": 15520 }, { "epoch": 0.5007672747730944, "grad_norm": 0.359375, "learning_rate": 1.5788077710091236e-05, "loss": 2.001, "step": 15521 }, { "epoch": 0.5007995386268908, "grad_norm": 0.361328125, "learning_rate": 1.5786506145279552e-05, "loss": 1.9512, "step": 15522 }, { "epoch": 0.500831802480687, "grad_norm": 0.353515625, "learning_rate": 1.578493457181057e-05, "loss": 2.0154, "step": 15523 }, { "epoch": 0.5008640663344834, "grad_norm": 0.384765625, "learning_rate": 1.578336298970159e-05, "loss": 1.9803, "step": 15524 }, { "epoch": 0.5008963301882797, "grad_norm": 0.345703125, "learning_rate": 1.5781791398969917e-05, "loss": 1.9797, "step": 15525 }, { "epoch": 0.5009285940420761, "grad_norm": 0.3515625, "learning_rate": 1.578021979963284e-05, "loss": 1.9892, "step": 15526 }, { "epoch": 0.5009608578958724, "grad_norm": 0.349609375, "learning_rate": 1.5778648191707666e-05, "loss": 1.9784, "step": 15527 }, { "epoch": 0.5009931217496688, "grad_norm": 0.333984375, "learning_rate": 1.577707657521169e-05, "loss": 1.9929, "step": 15528 }, { "epoch": 0.5010253856034651, "grad_norm": 0.349609375, "learning_rate": 1.5775504950162216e-05, "loss": 1.9731, "step": 15529 }, { "epoch": 0.5010576494572615, "grad_norm": 0.361328125, "learning_rate": 1.577393331657654e-05, "loss": 1.969, "step": 15530 }, { "epoch": 0.5010899133110578, "grad_norm": 0.341796875, "learning_rate": 1.5772361674471958e-05, "loss": 1.9295, "step": 15531 }, { "epoch": 0.5011221771648542, "grad_norm": 0.35546875, "learning_rate": 1.577079002386577e-05, "loss": 1.9782, "step": 15532 }, { "epoch": 0.5011544410186505, "grad_norm": 0.3515625, "learning_rate": 1.5769218364775285e-05, "loss": 1.9556, "step": 15533 }, { "epoch": 0.5011867048724469, "grad_norm": 0.345703125, "learning_rate": 1.5767646697217793e-05, "loss": 1.9887, "step": 15534 }, { "epoch": 0.5012189687262432, "grad_norm": 0.341796875, "learning_rate": 1.5766075021210592e-05, "loss": 1.9872, "step": 15535 }, { "epoch": 0.5012512325800396, "grad_norm": 0.33984375, "learning_rate": 1.5764503336770995e-05, "loss": 1.9946, "step": 15536 }, { "epoch": 0.501283496433836, "grad_norm": 0.341796875, "learning_rate": 1.576293164391629e-05, "loss": 1.9937, "step": 15537 }, { "epoch": 0.5013157602876323, "grad_norm": 0.349609375, "learning_rate": 1.5761359942663783e-05, "loss": 1.9826, "step": 15538 }, { "epoch": 0.5013480241414286, "grad_norm": 0.345703125, "learning_rate": 1.575978823303077e-05, "loss": 1.9817, "step": 15539 }, { "epoch": 0.5013802879952249, "grad_norm": 0.345703125, "learning_rate": 1.5758216515034553e-05, "loss": 2.0001, "step": 15540 }, { "epoch": 0.5014125518490213, "grad_norm": 0.359375, "learning_rate": 1.575664478869243e-05, "loss": 2.0096, "step": 15541 }, { "epoch": 0.5014448157028176, "grad_norm": 0.361328125, "learning_rate": 1.575507305402171e-05, "loss": 2.0148, "step": 15542 }, { "epoch": 0.501477079556614, "grad_norm": 0.373046875, "learning_rate": 1.5753501311039684e-05, "loss": 2.008, "step": 15543 }, { "epoch": 0.5015093434104103, "grad_norm": 0.34765625, "learning_rate": 1.5751929559763658e-05, "loss": 2.0299, "step": 15544 }, { "epoch": 0.5015416072642067, "grad_norm": 0.359375, "learning_rate": 1.5750357800210928e-05, "loss": 1.9707, "step": 15545 }, { "epoch": 0.501573871118003, "grad_norm": 0.34765625, "learning_rate": 1.57487860323988e-05, "loss": 1.9856, "step": 15546 }, { "epoch": 0.5016061349717994, "grad_norm": 0.365234375, "learning_rate": 1.574721425634457e-05, "loss": 1.9788, "step": 15547 }, { "epoch": 0.5016383988255957, "grad_norm": 0.337890625, "learning_rate": 1.5745642472065545e-05, "loss": 1.9887, "step": 15548 }, { "epoch": 0.5016706626793921, "grad_norm": 0.359375, "learning_rate": 1.5744070679579018e-05, "loss": 1.9823, "step": 15549 }, { "epoch": 0.5017029265331884, "grad_norm": 0.33984375, "learning_rate": 1.5742498878902296e-05, "loss": 1.9714, "step": 15550 }, { "epoch": 0.5017351903869848, "grad_norm": 0.3359375, "learning_rate": 1.5740927070052682e-05, "loss": 1.952, "step": 15551 }, { "epoch": 0.5017674542407811, "grad_norm": 0.3359375, "learning_rate": 1.5739355253047472e-05, "loss": 1.9579, "step": 15552 }, { "epoch": 0.5017997180945775, "grad_norm": 0.333984375, "learning_rate": 1.5737783427903973e-05, "loss": 1.945, "step": 15553 }, { "epoch": 0.5018319819483738, "grad_norm": 0.3359375, "learning_rate": 1.573621159463948e-05, "loss": 1.9887, "step": 15554 }, { "epoch": 0.5018642458021702, "grad_norm": 0.34375, "learning_rate": 1.5734639753271296e-05, "loss": 1.9976, "step": 15555 }, { "epoch": 0.5018965096559664, "grad_norm": 0.337890625, "learning_rate": 1.5733067903816728e-05, "loss": 2.0005, "step": 15556 }, { "epoch": 0.5019287735097628, "grad_norm": 0.345703125, "learning_rate": 1.5731496046293074e-05, "loss": 1.9707, "step": 15557 }, { "epoch": 0.5019610373635592, "grad_norm": 0.341796875, "learning_rate": 1.5729924180717636e-05, "loss": 1.9885, "step": 15558 }, { "epoch": 0.5019933012173555, "grad_norm": 0.357421875, "learning_rate": 1.5728352307107718e-05, "loss": 1.9939, "step": 15559 }, { "epoch": 0.5020255650711519, "grad_norm": 0.34765625, "learning_rate": 1.5726780425480616e-05, "loss": 1.9746, "step": 15560 }, { "epoch": 0.5020578289249482, "grad_norm": 0.349609375, "learning_rate": 1.5725208535853644e-05, "loss": 1.9691, "step": 15561 }, { "epoch": 0.5020900927787446, "grad_norm": 0.34765625, "learning_rate": 1.572363663824409e-05, "loss": 1.9463, "step": 15562 }, { "epoch": 0.5021223566325409, "grad_norm": 0.361328125, "learning_rate": 1.5722064732669264e-05, "loss": 1.9894, "step": 15563 }, { "epoch": 0.5021546204863373, "grad_norm": 0.349609375, "learning_rate": 1.572049281914647e-05, "loss": 1.9807, "step": 15564 }, { "epoch": 0.5021868843401336, "grad_norm": 0.349609375, "learning_rate": 1.5718920897693003e-05, "loss": 1.9927, "step": 15565 }, { "epoch": 0.50221914819393, "grad_norm": 0.357421875, "learning_rate": 1.5717348968326175e-05, "loss": 1.9701, "step": 15566 }, { "epoch": 0.5022514120477263, "grad_norm": 0.3515625, "learning_rate": 1.5715777031063283e-05, "loss": 1.9767, "step": 15567 }, { "epoch": 0.5022836759015227, "grad_norm": 0.34375, "learning_rate": 1.571420508592163e-05, "loss": 1.981, "step": 15568 }, { "epoch": 0.502315939755319, "grad_norm": 0.3515625, "learning_rate": 1.5712633132918517e-05, "loss": 1.9606, "step": 15569 }, { "epoch": 0.5023482036091154, "grad_norm": 0.333984375, "learning_rate": 1.5711061172071256e-05, "loss": 1.9939, "step": 15570 }, { "epoch": 0.5023804674629117, "grad_norm": 0.33984375, "learning_rate": 1.570948920339714e-05, "loss": 1.971, "step": 15571 }, { "epoch": 0.502412731316708, "grad_norm": 0.349609375, "learning_rate": 1.5707917226913478e-05, "loss": 1.9863, "step": 15572 }, { "epoch": 0.5024449951705043, "grad_norm": 0.349609375, "learning_rate": 1.570634524263757e-05, "loss": 1.9842, "step": 15573 }, { "epoch": 0.5024772590243007, "grad_norm": 0.34375, "learning_rate": 1.570477325058672e-05, "loss": 1.9833, "step": 15574 }, { "epoch": 0.502509522878097, "grad_norm": 0.345703125, "learning_rate": 1.5703201250778234e-05, "loss": 1.9878, "step": 15575 }, { "epoch": 0.5025417867318934, "grad_norm": 0.349609375, "learning_rate": 1.570162924322941e-05, "loss": 1.9649, "step": 15576 }, { "epoch": 0.5025740505856898, "grad_norm": 0.3671875, "learning_rate": 1.5700057227957554e-05, "loss": 1.946, "step": 15577 }, { "epoch": 0.5026063144394861, "grad_norm": 0.349609375, "learning_rate": 1.5698485204979975e-05, "loss": 1.9872, "step": 15578 }, { "epoch": 0.5026385782932825, "grad_norm": 0.34765625, "learning_rate": 1.5696913174313967e-05, "loss": 1.9462, "step": 15579 }, { "epoch": 0.5026708421470788, "grad_norm": 0.359375, "learning_rate": 1.5695341135976836e-05, "loss": 1.9763, "step": 15580 }, { "epoch": 0.5027031060008752, "grad_norm": 0.375, "learning_rate": 1.5693769089985898e-05, "loss": 1.972, "step": 15581 }, { "epoch": 0.5027353698546715, "grad_norm": 0.3515625, "learning_rate": 1.569219703635844e-05, "loss": 1.9644, "step": 15582 }, { "epoch": 0.5027676337084679, "grad_norm": 0.3515625, "learning_rate": 1.5690624975111777e-05, "loss": 1.9987, "step": 15583 }, { "epoch": 0.5027998975622642, "grad_norm": 0.376953125, "learning_rate": 1.568905290626321e-05, "loss": 1.9616, "step": 15584 }, { "epoch": 0.5028321614160606, "grad_norm": 0.34765625, "learning_rate": 1.5687480829830044e-05, "loss": 1.9756, "step": 15585 }, { "epoch": 0.5028644252698569, "grad_norm": 0.3359375, "learning_rate": 1.5685908745829576e-05, "loss": 2.0028, "step": 15586 }, { "epoch": 0.5028966891236533, "grad_norm": 0.361328125, "learning_rate": 1.568433665427912e-05, "loss": 1.959, "step": 15587 }, { "epoch": 0.5029289529774496, "grad_norm": 0.36328125, "learning_rate": 1.5682764555195978e-05, "loss": 1.9916, "step": 15588 }, { "epoch": 0.502961216831246, "grad_norm": 0.349609375, "learning_rate": 1.5681192448597458e-05, "loss": 1.9402, "step": 15589 }, { "epoch": 0.5029934806850422, "grad_norm": 0.345703125, "learning_rate": 1.5679620334500852e-05, "loss": 1.971, "step": 15590 }, { "epoch": 0.5030257445388386, "grad_norm": 0.353515625, "learning_rate": 1.567804821292348e-05, "loss": 1.963, "step": 15591 }, { "epoch": 0.5030580083926349, "grad_norm": 0.337890625, "learning_rate": 1.5676476083882637e-05, "loss": 1.9503, "step": 15592 }, { "epoch": 0.5030902722464313, "grad_norm": 0.36328125, "learning_rate": 1.5674903947395628e-05, "loss": 1.9972, "step": 15593 }, { "epoch": 0.5031225361002276, "grad_norm": 0.3515625, "learning_rate": 1.5673331803479763e-05, "loss": 1.9746, "step": 15594 }, { "epoch": 0.503154799954024, "grad_norm": 0.3515625, "learning_rate": 1.5671759652152342e-05, "loss": 1.9822, "step": 15595 }, { "epoch": 0.5031870638078203, "grad_norm": 0.376953125, "learning_rate": 1.5670187493430674e-05, "loss": 1.9973, "step": 15596 }, { "epoch": 0.5032193276616167, "grad_norm": 0.34765625, "learning_rate": 1.5668615327332063e-05, "loss": 1.9955, "step": 15597 }, { "epoch": 0.5032515915154131, "grad_norm": 0.3515625, "learning_rate": 1.5667043153873816e-05, "loss": 1.9706, "step": 15598 }, { "epoch": 0.5032838553692094, "grad_norm": 0.3671875, "learning_rate": 1.5665470973073238e-05, "loss": 1.9599, "step": 15599 }, { "epoch": 0.5033161192230058, "grad_norm": 0.353515625, "learning_rate": 1.5663898784947623e-05, "loss": 1.9953, "step": 15600 }, { "epoch": 0.5033483830768021, "grad_norm": 0.36328125, "learning_rate": 1.56623265895143e-05, "loss": 1.9912, "step": 15601 }, { "epoch": 0.5033806469305985, "grad_norm": 0.359375, "learning_rate": 1.566075438679055e-05, "loss": 1.9862, "step": 15602 }, { "epoch": 0.5034129107843948, "grad_norm": 0.337890625, "learning_rate": 1.5659182176793694e-05, "loss": 1.9859, "step": 15603 }, { "epoch": 0.5034451746381912, "grad_norm": 0.375, "learning_rate": 1.5657609959541036e-05, "loss": 1.962, "step": 15604 }, { "epoch": 0.5034774384919875, "grad_norm": 0.359375, "learning_rate": 1.5656037735049877e-05, "loss": 1.9835, "step": 15605 }, { "epoch": 0.5035097023457838, "grad_norm": 0.37109375, "learning_rate": 1.565446550333753e-05, "loss": 1.9792, "step": 15606 }, { "epoch": 0.5035419661995801, "grad_norm": 0.345703125, "learning_rate": 1.5652893264421285e-05, "loss": 1.9664, "step": 15607 }, { "epoch": 0.5035742300533765, "grad_norm": 0.337890625, "learning_rate": 1.5651321018318468e-05, "loss": 1.9899, "step": 15608 }, { "epoch": 0.5036064939071728, "grad_norm": 0.3359375, "learning_rate": 1.5649748765046373e-05, "loss": 1.9784, "step": 15609 }, { "epoch": 0.5036387577609692, "grad_norm": 0.36328125, "learning_rate": 1.5648176504622312e-05, "loss": 1.9714, "step": 15610 }, { "epoch": 0.5036710216147655, "grad_norm": 0.341796875, "learning_rate": 1.5646604237063588e-05, "loss": 1.983, "step": 15611 }, { "epoch": 0.5037032854685619, "grad_norm": 0.35546875, "learning_rate": 1.564503196238751e-05, "loss": 1.977, "step": 15612 }, { "epoch": 0.5037355493223582, "grad_norm": 0.333984375, "learning_rate": 1.564345968061138e-05, "loss": 1.9984, "step": 15613 }, { "epoch": 0.5037678131761546, "grad_norm": 0.392578125, "learning_rate": 1.564188739175251e-05, "loss": 2.0003, "step": 15614 }, { "epoch": 0.5038000770299509, "grad_norm": 0.349609375, "learning_rate": 1.5640315095828203e-05, "loss": 1.9715, "step": 15615 }, { "epoch": 0.5038323408837473, "grad_norm": 0.33984375, "learning_rate": 1.563874279285577e-05, "loss": 1.97, "step": 15616 }, { "epoch": 0.5038646047375437, "grad_norm": 0.34375, "learning_rate": 1.5637170482852505e-05, "loss": 1.9761, "step": 15617 }, { "epoch": 0.50389686859134, "grad_norm": 0.3515625, "learning_rate": 1.5635598165835733e-05, "loss": 1.9801, "step": 15618 }, { "epoch": 0.5039291324451364, "grad_norm": 0.34765625, "learning_rate": 1.5634025841822747e-05, "loss": 2.0038, "step": 15619 }, { "epoch": 0.5039613962989327, "grad_norm": 0.34765625, "learning_rate": 1.5632453510830867e-05, "loss": 1.9471, "step": 15620 }, { "epoch": 0.5039936601527291, "grad_norm": 0.36328125, "learning_rate": 1.5630881172877386e-05, "loss": 1.981, "step": 15621 }, { "epoch": 0.5040259240065253, "grad_norm": 0.357421875, "learning_rate": 1.562930882797962e-05, "loss": 1.9522, "step": 15622 }, { "epoch": 0.5040581878603217, "grad_norm": 0.353515625, "learning_rate": 1.5627736476154877e-05, "loss": 1.9732, "step": 15623 }, { "epoch": 0.504090451714118, "grad_norm": 0.3671875, "learning_rate": 1.5626164117420455e-05, "loss": 1.9902, "step": 15624 }, { "epoch": 0.5041227155679144, "grad_norm": 0.337890625, "learning_rate": 1.5624591751793672e-05, "loss": 1.9888, "step": 15625 }, { "epoch": 0.5041549794217107, "grad_norm": 0.369140625, "learning_rate": 1.562301937929183e-05, "loss": 1.9834, "step": 15626 }, { "epoch": 0.5041872432755071, "grad_norm": 0.345703125, "learning_rate": 1.5621446999932235e-05, "loss": 1.994, "step": 15627 }, { "epoch": 0.5042195071293034, "grad_norm": 0.35546875, "learning_rate": 1.5619874613732198e-05, "loss": 1.9332, "step": 15628 }, { "epoch": 0.5042517709830998, "grad_norm": 0.35546875, "learning_rate": 1.5618302220709032e-05, "loss": 1.962, "step": 15629 }, { "epoch": 0.5042840348368961, "grad_norm": 0.349609375, "learning_rate": 1.561672982088003e-05, "loss": 1.9612, "step": 15630 }, { "epoch": 0.5043162986906925, "grad_norm": 0.34765625, "learning_rate": 1.561515741426251e-05, "loss": 1.9715, "step": 15631 }, { "epoch": 0.5043485625444888, "grad_norm": 0.361328125, "learning_rate": 1.5613585000873787e-05, "loss": 1.969, "step": 15632 }, { "epoch": 0.5043808263982852, "grad_norm": 0.322265625, "learning_rate": 1.561201258073115e-05, "loss": 1.984, "step": 15633 }, { "epoch": 0.5044130902520815, "grad_norm": 0.412109375, "learning_rate": 1.5610440153851925e-05, "loss": 1.9764, "step": 15634 }, { "epoch": 0.5044453541058779, "grad_norm": 0.33984375, "learning_rate": 1.560886772025341e-05, "loss": 1.9579, "step": 15635 }, { "epoch": 0.5044776179596742, "grad_norm": 0.35546875, "learning_rate": 1.5607295279952912e-05, "loss": 1.9785, "step": 15636 }, { "epoch": 0.5045098818134706, "grad_norm": 0.345703125, "learning_rate": 1.560572283296775e-05, "loss": 2.0064, "step": 15637 }, { "epoch": 0.504542145667267, "grad_norm": 0.330078125, "learning_rate": 1.560415037931522e-05, "loss": 1.9718, "step": 15638 }, { "epoch": 0.5045744095210632, "grad_norm": 0.421875, "learning_rate": 1.560257791901264e-05, "loss": 1.9808, "step": 15639 }, { "epoch": 0.5046066733748596, "grad_norm": 0.341796875, "learning_rate": 1.5601005452077312e-05, "loss": 1.9629, "step": 15640 }, { "epoch": 0.5046389372286559, "grad_norm": 0.34375, "learning_rate": 1.559943297852655e-05, "loss": 1.9987, "step": 15641 }, { "epoch": 0.5046712010824523, "grad_norm": 0.3359375, "learning_rate": 1.5597860498377658e-05, "loss": 1.9954, "step": 15642 }, { "epoch": 0.5047034649362486, "grad_norm": 0.369140625, "learning_rate": 1.559628801164795e-05, "loss": 1.9861, "step": 15643 }, { "epoch": 0.504735728790045, "grad_norm": 0.345703125, "learning_rate": 1.5594715518354724e-05, "loss": 1.9802, "step": 15644 }, { "epoch": 0.5047679926438413, "grad_norm": 0.34765625, "learning_rate": 1.55931430185153e-05, "loss": 1.9724, "step": 15645 }, { "epoch": 0.5048002564976377, "grad_norm": 0.341796875, "learning_rate": 1.5591570512146985e-05, "loss": 1.9975, "step": 15646 }, { "epoch": 0.504832520351434, "grad_norm": 0.34375, "learning_rate": 1.5589997999267085e-05, "loss": 1.9375, "step": 15647 }, { "epoch": 0.5048647842052304, "grad_norm": 0.345703125, "learning_rate": 1.5588425479892906e-05, "loss": 1.9922, "step": 15648 }, { "epoch": 0.5048970480590267, "grad_norm": 0.365234375, "learning_rate": 1.558685295404177e-05, "loss": 1.9712, "step": 15649 }, { "epoch": 0.5049293119128231, "grad_norm": 0.326171875, "learning_rate": 1.5585280421730972e-05, "loss": 1.9893, "step": 15650 }, { "epoch": 0.5049615757666194, "grad_norm": 0.34765625, "learning_rate": 1.5583707882977828e-05, "loss": 1.9627, "step": 15651 }, { "epoch": 0.5049938396204158, "grad_norm": 0.33984375, "learning_rate": 1.5582135337799647e-05, "loss": 2.0232, "step": 15652 }, { "epoch": 0.5050261034742121, "grad_norm": 0.34375, "learning_rate": 1.5580562786213736e-05, "loss": 1.9716, "step": 15653 }, { "epoch": 0.5050583673280085, "grad_norm": 0.3515625, "learning_rate": 1.557899022823741e-05, "loss": 1.9592, "step": 15654 }, { "epoch": 0.5050906311818048, "grad_norm": 0.34765625, "learning_rate": 1.557741766388797e-05, "loss": 1.9683, "step": 15655 }, { "epoch": 0.5051228950356011, "grad_norm": 0.349609375, "learning_rate": 1.557584509318273e-05, "loss": 1.9964, "step": 15656 }, { "epoch": 0.5051551588893974, "grad_norm": 0.3359375, "learning_rate": 1.5574272516139006e-05, "loss": 1.9811, "step": 15657 }, { "epoch": 0.5051874227431938, "grad_norm": 0.349609375, "learning_rate": 1.55726999327741e-05, "loss": 1.9925, "step": 15658 }, { "epoch": 0.5052196865969902, "grad_norm": 0.328125, "learning_rate": 1.557112734310532e-05, "loss": 1.9399, "step": 15659 }, { "epoch": 0.5052519504507865, "grad_norm": 0.3359375, "learning_rate": 1.5569554747149987e-05, "loss": 2.0072, "step": 15660 }, { "epoch": 0.5052842143045829, "grad_norm": 0.337890625, "learning_rate": 1.55679821449254e-05, "loss": 1.9793, "step": 15661 }, { "epoch": 0.5053164781583792, "grad_norm": 0.33984375, "learning_rate": 1.556640953644887e-05, "loss": 2.0051, "step": 15662 }, { "epoch": 0.5053487420121756, "grad_norm": 0.33984375, "learning_rate": 1.5564836921737718e-05, "loss": 2.0215, "step": 15663 }, { "epoch": 0.5053810058659719, "grad_norm": 0.34765625, "learning_rate": 1.5563264300809242e-05, "loss": 1.9744, "step": 15664 }, { "epoch": 0.5054132697197683, "grad_norm": 0.326171875, "learning_rate": 1.5561691673680755e-05, "loss": 1.9952, "step": 15665 }, { "epoch": 0.5054455335735646, "grad_norm": 0.345703125, "learning_rate": 1.556011904036957e-05, "loss": 1.9745, "step": 15666 }, { "epoch": 0.505477797427361, "grad_norm": 0.333984375, "learning_rate": 1.5558546400892995e-05, "loss": 1.9709, "step": 15667 }, { "epoch": 0.5055100612811573, "grad_norm": 0.341796875, "learning_rate": 1.555697375526835e-05, "loss": 1.9725, "step": 15668 }, { "epoch": 0.5055423251349537, "grad_norm": 0.33203125, "learning_rate": 1.5555401103512928e-05, "loss": 1.9603, "step": 15669 }, { "epoch": 0.50557458898875, "grad_norm": 0.345703125, "learning_rate": 1.555382844564405e-05, "loss": 1.9968, "step": 15670 }, { "epoch": 0.5056068528425464, "grad_norm": 0.326171875, "learning_rate": 1.5552255781679033e-05, "loss": 1.9677, "step": 15671 }, { "epoch": 0.5056391166963426, "grad_norm": 0.333984375, "learning_rate": 1.5550683111635175e-05, "loss": 1.9677, "step": 15672 }, { "epoch": 0.505671380550139, "grad_norm": 0.3359375, "learning_rate": 1.554911043552979e-05, "loss": 1.9522, "step": 15673 }, { "epoch": 0.5057036444039353, "grad_norm": 0.3359375, "learning_rate": 1.5547537753380198e-05, "loss": 1.9852, "step": 15674 }, { "epoch": 0.5057359082577317, "grad_norm": 0.369140625, "learning_rate": 1.5545965065203703e-05, "loss": 1.9814, "step": 15675 }, { "epoch": 0.505768172111528, "grad_norm": 0.353515625, "learning_rate": 1.5544392371017607e-05, "loss": 1.9417, "step": 15676 }, { "epoch": 0.5058004359653244, "grad_norm": 0.365234375, "learning_rate": 1.5542819670839237e-05, "loss": 1.982, "step": 15677 }, { "epoch": 0.5058326998191208, "grad_norm": 0.365234375, "learning_rate": 1.55412469646859e-05, "loss": 1.9779, "step": 15678 }, { "epoch": 0.5058649636729171, "grad_norm": 0.359375, "learning_rate": 1.5539674252574896e-05, "loss": 1.951, "step": 15679 }, { "epoch": 0.5058972275267135, "grad_norm": 0.38671875, "learning_rate": 1.5538101534523552e-05, "loss": 2.0177, "step": 15680 }, { "epoch": 0.5059294913805098, "grad_norm": 0.34765625, "learning_rate": 1.5536528810549174e-05, "loss": 1.9908, "step": 15681 }, { "epoch": 0.5059617552343062, "grad_norm": 0.359375, "learning_rate": 1.553495608066907e-05, "loss": 1.955, "step": 15682 }, { "epoch": 0.5059940190881025, "grad_norm": 0.345703125, "learning_rate": 1.5533383344900547e-05, "loss": 1.9603, "step": 15683 }, { "epoch": 0.5060262829418989, "grad_norm": 0.353515625, "learning_rate": 1.5531810603260927e-05, "loss": 1.988, "step": 15684 }, { "epoch": 0.5060585467956952, "grad_norm": 0.337890625, "learning_rate": 1.553023785576752e-05, "loss": 2.0093, "step": 15685 }, { "epoch": 0.5060908106494916, "grad_norm": 0.34375, "learning_rate": 1.5528665102437634e-05, "loss": 1.9535, "step": 15686 }, { "epoch": 0.5061230745032879, "grad_norm": 0.357421875, "learning_rate": 1.552709234328858e-05, "loss": 2.0231, "step": 15687 }, { "epoch": 0.5061553383570843, "grad_norm": 0.3515625, "learning_rate": 1.5525519578337675e-05, "loss": 1.9523, "step": 15688 }, { "epoch": 0.5061876022108805, "grad_norm": 0.34765625, "learning_rate": 1.5523946807602223e-05, "loss": 1.9889, "step": 15689 }, { "epoch": 0.506219866064677, "grad_norm": 0.353515625, "learning_rate": 1.5522374031099538e-05, "loss": 1.9903, "step": 15690 }, { "epoch": 0.5062521299184732, "grad_norm": 0.333984375, "learning_rate": 1.5520801248846945e-05, "loss": 1.9788, "step": 15691 }, { "epoch": 0.5062843937722696, "grad_norm": 0.353515625, "learning_rate": 1.5519228460861738e-05, "loss": 1.9762, "step": 15692 }, { "epoch": 0.5063166576260659, "grad_norm": 0.34765625, "learning_rate": 1.5517655667161234e-05, "loss": 1.9813, "step": 15693 }, { "epoch": 0.5063489214798623, "grad_norm": 0.33984375, "learning_rate": 1.5516082867762757e-05, "loss": 1.9312, "step": 15694 }, { "epoch": 0.5063811853336586, "grad_norm": 0.345703125, "learning_rate": 1.5514510062683602e-05, "loss": 1.9838, "step": 15695 }, { "epoch": 0.506413449187455, "grad_norm": 0.341796875, "learning_rate": 1.5512937251941093e-05, "loss": 1.9651, "step": 15696 }, { "epoch": 0.5064457130412513, "grad_norm": 0.34765625, "learning_rate": 1.5511364435552537e-05, "loss": 1.991, "step": 15697 }, { "epoch": 0.5064779768950477, "grad_norm": 0.34765625, "learning_rate": 1.550979161353525e-05, "loss": 1.9443, "step": 15698 }, { "epoch": 0.5065102407488441, "grad_norm": 0.35546875, "learning_rate": 1.5508218785906538e-05, "loss": 1.987, "step": 15699 }, { "epoch": 0.5065425046026404, "grad_norm": 0.384765625, "learning_rate": 1.5506645952683724e-05, "loss": 1.9776, "step": 15700 }, { "epoch": 0.5065747684564368, "grad_norm": 0.345703125, "learning_rate": 1.5505073113884113e-05, "loss": 1.9739, "step": 15701 }, { "epoch": 0.5066070323102331, "grad_norm": 0.341796875, "learning_rate": 1.550350026952502e-05, "loss": 1.9607, "step": 15702 }, { "epoch": 0.5066392961640295, "grad_norm": 0.3671875, "learning_rate": 1.5501927419623753e-05, "loss": 1.9696, "step": 15703 }, { "epoch": 0.5066715600178258, "grad_norm": 0.353515625, "learning_rate": 1.550035456419763e-05, "loss": 1.9713, "step": 15704 }, { "epoch": 0.5067038238716222, "grad_norm": 0.3828125, "learning_rate": 1.549878170326397e-05, "loss": 1.9574, "step": 15705 }, { "epoch": 0.5067360877254184, "grad_norm": 0.3671875, "learning_rate": 1.5497208836840078e-05, "loss": 1.9638, "step": 15706 }, { "epoch": 0.5067683515792148, "grad_norm": 0.337890625, "learning_rate": 1.5495635964943258e-05, "loss": 1.989, "step": 15707 }, { "epoch": 0.5068006154330111, "grad_norm": 0.35546875, "learning_rate": 1.549406308759084e-05, "loss": 1.9398, "step": 15708 }, { "epoch": 0.5068328792868075, "grad_norm": 0.337890625, "learning_rate": 1.5492490204800133e-05, "loss": 2.0098, "step": 15709 }, { "epoch": 0.5068651431406038, "grad_norm": 0.37890625, "learning_rate": 1.5490917316588438e-05, "loss": 1.9945, "step": 15710 }, { "epoch": 0.5068974069944002, "grad_norm": 0.34765625, "learning_rate": 1.5489344422973083e-05, "loss": 1.9713, "step": 15711 }, { "epoch": 0.5069296708481965, "grad_norm": 0.359375, "learning_rate": 1.5487771523971378e-05, "loss": 2.0174, "step": 15712 }, { "epoch": 0.5069619347019929, "grad_norm": 0.361328125, "learning_rate": 1.5486198619600625e-05, "loss": 1.9823, "step": 15713 }, { "epoch": 0.5069941985557892, "grad_norm": 0.36328125, "learning_rate": 1.5484625709878153e-05, "loss": 1.9845, "step": 15714 }, { "epoch": 0.5070264624095856, "grad_norm": 0.34765625, "learning_rate": 1.5483052794821274e-05, "loss": 1.9805, "step": 15715 }, { "epoch": 0.5070587262633819, "grad_norm": 0.34765625, "learning_rate": 1.5481479874447287e-05, "loss": 1.9684, "step": 15716 }, { "epoch": 0.5070909901171783, "grad_norm": 0.357421875, "learning_rate": 1.5479906948773517e-05, "loss": 2.0038, "step": 15717 }, { "epoch": 0.5071232539709747, "grad_norm": 0.35546875, "learning_rate": 1.5478334017817277e-05, "loss": 1.9755, "step": 15718 }, { "epoch": 0.507155517824771, "grad_norm": 0.36328125, "learning_rate": 1.547676108159588e-05, "loss": 1.9562, "step": 15719 }, { "epoch": 0.5071877816785674, "grad_norm": 0.3671875, "learning_rate": 1.5475188140126637e-05, "loss": 1.9794, "step": 15720 }, { "epoch": 0.5072200455323637, "grad_norm": 0.36328125, "learning_rate": 1.5473615193426866e-05, "loss": 1.9876, "step": 15721 }, { "epoch": 0.5072523093861601, "grad_norm": 0.365234375, "learning_rate": 1.547204224151388e-05, "loss": 1.9647, "step": 15722 }, { "epoch": 0.5072845732399563, "grad_norm": 0.365234375, "learning_rate": 1.5470469284404992e-05, "loss": 1.9982, "step": 15723 }, { "epoch": 0.5073168370937527, "grad_norm": 0.328125, "learning_rate": 1.5468896322117505e-05, "loss": 1.9826, "step": 15724 }, { "epoch": 0.507349100947549, "grad_norm": 0.349609375, "learning_rate": 1.5467323354668758e-05, "loss": 1.9555, "step": 15725 }, { "epoch": 0.5073813648013454, "grad_norm": 0.341796875, "learning_rate": 1.5465750382076044e-05, "loss": 1.9706, "step": 15726 }, { "epoch": 0.5074136286551417, "grad_norm": 0.341796875, "learning_rate": 1.5464177404356684e-05, "loss": 1.9778, "step": 15727 }, { "epoch": 0.5074458925089381, "grad_norm": 0.3515625, "learning_rate": 1.546260442152799e-05, "loss": 1.986, "step": 15728 }, { "epoch": 0.5074781563627344, "grad_norm": 0.330078125, "learning_rate": 1.5461031433607284e-05, "loss": 1.9817, "step": 15729 }, { "epoch": 0.5075104202165308, "grad_norm": 0.337890625, "learning_rate": 1.5459458440611874e-05, "loss": 1.9948, "step": 15730 }, { "epoch": 0.5075426840703271, "grad_norm": 0.33984375, "learning_rate": 1.545788544255907e-05, "loss": 1.9759, "step": 15731 }, { "epoch": 0.5075749479241235, "grad_norm": 0.337890625, "learning_rate": 1.5456312439466195e-05, "loss": 1.9714, "step": 15732 }, { "epoch": 0.5076072117779198, "grad_norm": 0.341796875, "learning_rate": 1.545473943135056e-05, "loss": 1.9819, "step": 15733 }, { "epoch": 0.5076394756317162, "grad_norm": 0.3515625, "learning_rate": 1.5453166418229477e-05, "loss": 1.9825, "step": 15734 }, { "epoch": 0.5076717394855125, "grad_norm": 0.3515625, "learning_rate": 1.5451593400120265e-05, "loss": 2.0014, "step": 15735 }, { "epoch": 0.5077040033393089, "grad_norm": 0.326171875, "learning_rate": 1.545002037704024e-05, "loss": 1.9945, "step": 15736 }, { "epoch": 0.5077362671931052, "grad_norm": 0.373046875, "learning_rate": 1.544844734900671e-05, "loss": 1.9807, "step": 15737 }, { "epoch": 0.5077685310469016, "grad_norm": 0.33984375, "learning_rate": 1.544687431603699e-05, "loss": 1.9723, "step": 15738 }, { "epoch": 0.507800794900698, "grad_norm": 0.349609375, "learning_rate": 1.5445301278148403e-05, "loss": 1.9766, "step": 15739 }, { "epoch": 0.5078330587544942, "grad_norm": 0.36328125, "learning_rate": 1.544372823535826e-05, "loss": 1.9944, "step": 15740 }, { "epoch": 0.5078653226082906, "grad_norm": 0.353515625, "learning_rate": 1.5442155187683873e-05, "loss": 1.9859, "step": 15741 }, { "epoch": 0.5078975864620869, "grad_norm": 0.3515625, "learning_rate": 1.5440582135142555e-05, "loss": 1.9832, "step": 15742 }, { "epoch": 0.5079298503158833, "grad_norm": 0.353515625, "learning_rate": 1.543900907775163e-05, "loss": 1.9686, "step": 15743 }, { "epoch": 0.5079621141696796, "grad_norm": 0.34765625, "learning_rate": 1.543743601552841e-05, "loss": 1.9721, "step": 15744 }, { "epoch": 0.507994378023476, "grad_norm": 0.35546875, "learning_rate": 1.54358629484902e-05, "loss": 2.0028, "step": 15745 }, { "epoch": 0.5080266418772723, "grad_norm": 0.34765625, "learning_rate": 1.5434289876654332e-05, "loss": 1.9447, "step": 15746 }, { "epoch": 0.5080589057310687, "grad_norm": 0.341796875, "learning_rate": 1.543271680003811e-05, "loss": 1.9799, "step": 15747 }, { "epoch": 0.508091169584865, "grad_norm": 0.357421875, "learning_rate": 1.5431143718658846e-05, "loss": 1.9769, "step": 15748 }, { "epoch": 0.5081234334386614, "grad_norm": 0.34375, "learning_rate": 1.5429570632533868e-05, "loss": 1.9793, "step": 15749 }, { "epoch": 0.5081556972924577, "grad_norm": 0.33203125, "learning_rate": 1.542799754168048e-05, "loss": 1.9915, "step": 15750 }, { "epoch": 0.5081879611462541, "grad_norm": 0.357421875, "learning_rate": 1.542642444611601e-05, "loss": 1.9825, "step": 15751 }, { "epoch": 0.5082202250000504, "grad_norm": 0.3515625, "learning_rate": 1.5424851345857753e-05, "loss": 1.9731, "step": 15752 }, { "epoch": 0.5082524888538468, "grad_norm": 0.33203125, "learning_rate": 1.542327824092305e-05, "loss": 1.9987, "step": 15753 }, { "epoch": 0.5082847527076431, "grad_norm": 0.361328125, "learning_rate": 1.54217051313292e-05, "loss": 1.9955, "step": 15754 }, { "epoch": 0.5083170165614395, "grad_norm": 0.3359375, "learning_rate": 1.5420132017093517e-05, "loss": 1.9771, "step": 15755 }, { "epoch": 0.5083492804152357, "grad_norm": 0.33984375, "learning_rate": 1.5418558898233328e-05, "loss": 1.9801, "step": 15756 }, { "epoch": 0.5083815442690321, "grad_norm": 0.359375, "learning_rate": 1.5416985774765943e-05, "loss": 1.9953, "step": 15757 }, { "epoch": 0.5084138081228284, "grad_norm": 0.357421875, "learning_rate": 1.5415412646708674e-05, "loss": 1.9951, "step": 15758 }, { "epoch": 0.5084460719766248, "grad_norm": 0.3359375, "learning_rate": 1.541383951407884e-05, "loss": 1.9591, "step": 15759 }, { "epoch": 0.5084783358304212, "grad_norm": 0.361328125, "learning_rate": 1.5412266376893763e-05, "loss": 1.9772, "step": 15760 }, { "epoch": 0.5085105996842175, "grad_norm": 0.349609375, "learning_rate": 1.5410693235170748e-05, "loss": 1.9818, "step": 15761 }, { "epoch": 0.5085428635380139, "grad_norm": 0.34765625, "learning_rate": 1.540912008892712e-05, "loss": 2.0026, "step": 15762 }, { "epoch": 0.5085751273918102, "grad_norm": 0.37109375, "learning_rate": 1.540754693818019e-05, "loss": 1.9588, "step": 15763 }, { "epoch": 0.5086073912456066, "grad_norm": 0.337890625, "learning_rate": 1.5405973782947272e-05, "loss": 1.9608, "step": 15764 }, { "epoch": 0.5086396550994029, "grad_norm": 0.380859375, "learning_rate": 1.5404400623245692e-05, "loss": 1.9845, "step": 15765 }, { "epoch": 0.5086719189531993, "grad_norm": 0.337890625, "learning_rate": 1.5402827459092753e-05, "loss": 1.9624, "step": 15766 }, { "epoch": 0.5087041828069956, "grad_norm": 0.345703125, "learning_rate": 1.5401254290505787e-05, "loss": 2.0076, "step": 15767 }, { "epoch": 0.508736446660792, "grad_norm": 0.361328125, "learning_rate": 1.5399681117502103e-05, "loss": 1.9671, "step": 15768 }, { "epoch": 0.5087687105145883, "grad_norm": 0.3671875, "learning_rate": 1.5398107940099005e-05, "loss": 1.9634, "step": 15769 }, { "epoch": 0.5088009743683847, "grad_norm": 0.33984375, "learning_rate": 1.5396534758313826e-05, "loss": 1.995, "step": 15770 }, { "epoch": 0.508833238222181, "grad_norm": 0.365234375, "learning_rate": 1.539496157216388e-05, "loss": 1.9871, "step": 15771 }, { "epoch": 0.5088655020759774, "grad_norm": 0.328125, "learning_rate": 1.5393388381666478e-05, "loss": 1.9541, "step": 15772 }, { "epoch": 0.5088977659297736, "grad_norm": 0.3515625, "learning_rate": 1.5391815186838936e-05, "loss": 1.9797, "step": 15773 }, { "epoch": 0.50893002978357, "grad_norm": 0.341796875, "learning_rate": 1.5390241987698576e-05, "loss": 1.9802, "step": 15774 }, { "epoch": 0.5089622936373663, "grad_norm": 0.353515625, "learning_rate": 1.538866878426271e-05, "loss": 1.987, "step": 15775 }, { "epoch": 0.5089945574911627, "grad_norm": 0.345703125, "learning_rate": 1.538709557654866e-05, "loss": 1.9836, "step": 15776 }, { "epoch": 0.509026821344959, "grad_norm": 0.3359375, "learning_rate": 1.5385522364573742e-05, "loss": 1.9731, "step": 15777 }, { "epoch": 0.5090590851987554, "grad_norm": 0.3359375, "learning_rate": 1.5383949148355264e-05, "loss": 1.9602, "step": 15778 }, { "epoch": 0.5090913490525518, "grad_norm": 0.33984375, "learning_rate": 1.538237592791055e-05, "loss": 1.9528, "step": 15779 }, { "epoch": 0.5091236129063481, "grad_norm": 0.337890625, "learning_rate": 1.5380802703256923e-05, "loss": 1.9919, "step": 15780 }, { "epoch": 0.5091558767601445, "grad_norm": 0.33984375, "learning_rate": 1.5379229474411684e-05, "loss": 1.9788, "step": 15781 }, { "epoch": 0.5091881406139408, "grad_norm": 0.34375, "learning_rate": 1.5377656241392165e-05, "loss": 1.9921, "step": 15782 }, { "epoch": 0.5092204044677372, "grad_norm": 0.33203125, "learning_rate": 1.537608300421567e-05, "loss": 1.9838, "step": 15783 }, { "epoch": 0.5092526683215335, "grad_norm": 0.3515625, "learning_rate": 1.537450976289953e-05, "loss": 2.0003, "step": 15784 }, { "epoch": 0.5092849321753299, "grad_norm": 0.34375, "learning_rate": 1.5372936517461055e-05, "loss": 1.9738, "step": 15785 }, { "epoch": 0.5093171960291262, "grad_norm": 0.34765625, "learning_rate": 1.537136326791756e-05, "loss": 1.9664, "step": 15786 }, { "epoch": 0.5093494598829226, "grad_norm": 0.33984375, "learning_rate": 1.5369790014286365e-05, "loss": 1.9684, "step": 15787 }, { "epoch": 0.5093817237367189, "grad_norm": 0.33203125, "learning_rate": 1.536821675658479e-05, "loss": 1.9423, "step": 15788 }, { "epoch": 0.5094139875905153, "grad_norm": 0.34765625, "learning_rate": 1.5366643494830146e-05, "loss": 1.9825, "step": 15789 }, { "epoch": 0.5094462514443115, "grad_norm": 0.337890625, "learning_rate": 1.536507022903975e-05, "loss": 1.9439, "step": 15790 }, { "epoch": 0.509478515298108, "grad_norm": 0.345703125, "learning_rate": 1.536349695923093e-05, "loss": 1.9798, "step": 15791 }, { "epoch": 0.5095107791519042, "grad_norm": 0.3359375, "learning_rate": 1.536192368542099e-05, "loss": 1.9589, "step": 15792 }, { "epoch": 0.5095430430057006, "grad_norm": 0.341796875, "learning_rate": 1.536035040762726e-05, "loss": 1.9667, "step": 15793 }, { "epoch": 0.5095753068594969, "grad_norm": 0.328125, "learning_rate": 1.5358777125867046e-05, "loss": 1.9585, "step": 15794 }, { "epoch": 0.5096075707132933, "grad_norm": 0.33984375, "learning_rate": 1.535720384015767e-05, "loss": 1.9775, "step": 15795 }, { "epoch": 0.5096398345670896, "grad_norm": 0.326171875, "learning_rate": 1.5355630550516455e-05, "loss": 1.9552, "step": 15796 }, { "epoch": 0.509672098420886, "grad_norm": 0.341796875, "learning_rate": 1.5354057256960714e-05, "loss": 1.9684, "step": 15797 }, { "epoch": 0.5097043622746823, "grad_norm": 0.341796875, "learning_rate": 1.535248395950776e-05, "loss": 2.0142, "step": 15798 }, { "epoch": 0.5097366261284787, "grad_norm": 0.33984375, "learning_rate": 1.535091065817492e-05, "loss": 1.9541, "step": 15799 }, { "epoch": 0.5097688899822751, "grad_norm": 0.341796875, "learning_rate": 1.53493373529795e-05, "loss": 1.971, "step": 15800 }, { "epoch": 0.5098011538360714, "grad_norm": 0.33984375, "learning_rate": 1.5347764043938832e-05, "loss": 1.9593, "step": 15801 }, { "epoch": 0.5098334176898678, "grad_norm": 0.3515625, "learning_rate": 1.534619073107023e-05, "loss": 1.9935, "step": 15802 }, { "epoch": 0.5098656815436641, "grad_norm": 0.33984375, "learning_rate": 1.5344617414391003e-05, "loss": 2.0033, "step": 15803 }, { "epoch": 0.5098979453974605, "grad_norm": 0.357421875, "learning_rate": 1.5343044093918473e-05, "loss": 1.9902, "step": 15804 }, { "epoch": 0.5099302092512568, "grad_norm": 0.33984375, "learning_rate": 1.5341470769669962e-05, "loss": 1.967, "step": 15805 }, { "epoch": 0.5099624731050532, "grad_norm": 0.373046875, "learning_rate": 1.5339897441662786e-05, "loss": 1.9839, "step": 15806 }, { "epoch": 0.5099947369588494, "grad_norm": 0.3359375, "learning_rate": 1.5338324109914262e-05, "loss": 1.9833, "step": 15807 }, { "epoch": 0.5100270008126458, "grad_norm": 0.34765625, "learning_rate": 1.5336750774441714e-05, "loss": 1.9904, "step": 15808 }, { "epoch": 0.5100592646664421, "grad_norm": 0.375, "learning_rate": 1.533517743526245e-05, "loss": 1.9713, "step": 15809 }, { "epoch": 0.5100915285202385, "grad_norm": 0.33203125, "learning_rate": 1.533360409239379e-05, "loss": 1.9613, "step": 15810 }, { "epoch": 0.5101237923740348, "grad_norm": 0.35546875, "learning_rate": 1.5332030745853063e-05, "loss": 1.9878, "step": 15811 }, { "epoch": 0.5101560562278312, "grad_norm": 0.361328125, "learning_rate": 1.5330457395657578e-05, "loss": 1.9811, "step": 15812 }, { "epoch": 0.5101883200816275, "grad_norm": 0.33984375, "learning_rate": 1.5328884041824652e-05, "loss": 1.9758, "step": 15813 }, { "epoch": 0.5102205839354239, "grad_norm": 0.392578125, "learning_rate": 1.532731068437161e-05, "loss": 1.9757, "step": 15814 }, { "epoch": 0.5102528477892202, "grad_norm": 0.349609375, "learning_rate": 1.532573732331576e-05, "loss": 2.006, "step": 15815 }, { "epoch": 0.5102851116430166, "grad_norm": 0.345703125, "learning_rate": 1.5324163958674434e-05, "loss": 1.9477, "step": 15816 }, { "epoch": 0.5103173754968129, "grad_norm": 0.3515625, "learning_rate": 1.532259059046494e-05, "loss": 2.0024, "step": 15817 }, { "epoch": 0.5103496393506093, "grad_norm": 0.353515625, "learning_rate": 1.53210172187046e-05, "loss": 1.9759, "step": 15818 }, { "epoch": 0.5103819032044057, "grad_norm": 0.353515625, "learning_rate": 1.5319443843410738e-05, "loss": 1.9555, "step": 15819 }, { "epoch": 0.510414167058202, "grad_norm": 0.33984375, "learning_rate": 1.531787046460066e-05, "loss": 1.9873, "step": 15820 }, { "epoch": 0.5104464309119984, "grad_norm": 0.35546875, "learning_rate": 1.53162970822917e-05, "loss": 1.998, "step": 15821 }, { "epoch": 0.5104786947657947, "grad_norm": 0.349609375, "learning_rate": 1.5314723696501166e-05, "loss": 1.9672, "step": 15822 }, { "epoch": 0.5105109586195911, "grad_norm": 0.3359375, "learning_rate": 1.5313150307246376e-05, "loss": 1.9432, "step": 15823 }, { "epoch": 0.5105432224733873, "grad_norm": 0.359375, "learning_rate": 1.5311576914544653e-05, "loss": 1.9547, "step": 15824 }, { "epoch": 0.5105754863271837, "grad_norm": 0.34375, "learning_rate": 1.5310003518413318e-05, "loss": 1.9294, "step": 15825 }, { "epoch": 0.51060775018098, "grad_norm": 0.361328125, "learning_rate": 1.530843011886968e-05, "loss": 1.9963, "step": 15826 }, { "epoch": 0.5106400140347764, "grad_norm": 0.376953125, "learning_rate": 1.5306856715931072e-05, "loss": 1.9888, "step": 15827 }, { "epoch": 0.5106722778885727, "grad_norm": 0.36328125, "learning_rate": 1.5305283309614802e-05, "loss": 2.0126, "step": 15828 }, { "epoch": 0.5107045417423691, "grad_norm": 0.33984375, "learning_rate": 1.530370989993819e-05, "loss": 1.9928, "step": 15829 }, { "epoch": 0.5107368055961654, "grad_norm": 0.361328125, "learning_rate": 1.5302136486918566e-05, "loss": 1.9741, "step": 15830 }, { "epoch": 0.5107690694499618, "grad_norm": 0.3515625, "learning_rate": 1.5300563070573233e-05, "loss": 1.9996, "step": 15831 }, { "epoch": 0.5108013333037581, "grad_norm": 0.376953125, "learning_rate": 1.5298989650919514e-05, "loss": 1.9808, "step": 15832 }, { "epoch": 0.5108335971575545, "grad_norm": 0.35546875, "learning_rate": 1.5297416227974738e-05, "loss": 1.9732, "step": 15833 }, { "epoch": 0.5108658610113508, "grad_norm": 0.34765625, "learning_rate": 1.5295842801756216e-05, "loss": 1.9692, "step": 15834 }, { "epoch": 0.5108981248651472, "grad_norm": 0.40625, "learning_rate": 1.5294269372281265e-05, "loss": 2.0038, "step": 15835 }, { "epoch": 0.5109303887189435, "grad_norm": 0.375, "learning_rate": 1.5292695939567217e-05, "loss": 1.9747, "step": 15836 }, { "epoch": 0.5109626525727399, "grad_norm": 0.365234375, "learning_rate": 1.5291122503631374e-05, "loss": 1.9795, "step": 15837 }, { "epoch": 0.5109949164265362, "grad_norm": 0.369140625, "learning_rate": 1.5289549064491063e-05, "loss": 1.9533, "step": 15838 }, { "epoch": 0.5110271802803326, "grad_norm": 0.36328125, "learning_rate": 1.5287975622163608e-05, "loss": 1.9976, "step": 15839 }, { "epoch": 0.511059444134129, "grad_norm": 0.36328125, "learning_rate": 1.528640217666632e-05, "loss": 1.987, "step": 15840 }, { "epoch": 0.5110917079879252, "grad_norm": 0.3515625, "learning_rate": 1.5284828728016524e-05, "loss": 2.0198, "step": 15841 }, { "epoch": 0.5111239718417216, "grad_norm": 0.361328125, "learning_rate": 1.528325527623154e-05, "loss": 1.9958, "step": 15842 }, { "epoch": 0.5111562356955179, "grad_norm": 0.357421875, "learning_rate": 1.5281681821328683e-05, "loss": 1.994, "step": 15843 }, { "epoch": 0.5111884995493143, "grad_norm": 0.3828125, "learning_rate": 1.5280108363325277e-05, "loss": 1.9779, "step": 15844 }, { "epoch": 0.5112207634031106, "grad_norm": 0.3671875, "learning_rate": 1.5278534902238637e-05, "loss": 1.9602, "step": 15845 }, { "epoch": 0.511253027256907, "grad_norm": 0.353515625, "learning_rate": 1.527696143808608e-05, "loss": 1.975, "step": 15846 }, { "epoch": 0.5112852911107033, "grad_norm": 0.369140625, "learning_rate": 1.527538797088494e-05, "loss": 1.9766, "step": 15847 }, { "epoch": 0.5113175549644997, "grad_norm": 0.3515625, "learning_rate": 1.5273814500652516e-05, "loss": 1.9862, "step": 15848 }, { "epoch": 0.511349818818296, "grad_norm": 0.333984375, "learning_rate": 1.5272241027406145e-05, "loss": 1.9549, "step": 15849 }, { "epoch": 0.5113820826720924, "grad_norm": 0.359375, "learning_rate": 1.5270667551163142e-05, "loss": 1.9609, "step": 15850 }, { "epoch": 0.5114143465258887, "grad_norm": 0.33984375, "learning_rate": 1.526909407194082e-05, "loss": 1.9866, "step": 15851 }, { "epoch": 0.5114466103796851, "grad_norm": 0.4375, "learning_rate": 1.5267520589756505e-05, "loss": 1.9737, "step": 15852 }, { "epoch": 0.5114788742334814, "grad_norm": 0.3515625, "learning_rate": 1.5265947104627516e-05, "loss": 1.9946, "step": 15853 }, { "epoch": 0.5115111380872778, "grad_norm": 0.32421875, "learning_rate": 1.5264373616571168e-05, "loss": 1.9905, "step": 15854 }, { "epoch": 0.5115434019410741, "grad_norm": 0.365234375, "learning_rate": 1.526280012560479e-05, "loss": 1.9843, "step": 15855 }, { "epoch": 0.5115756657948705, "grad_norm": 0.337890625, "learning_rate": 1.5261226631745693e-05, "loss": 1.9811, "step": 15856 }, { "epoch": 0.5116079296486667, "grad_norm": 0.35546875, "learning_rate": 1.5259653135011204e-05, "loss": 1.9906, "step": 15857 }, { "epoch": 0.5116401935024631, "grad_norm": 0.35546875, "learning_rate": 1.5258079635418641e-05, "loss": 1.9539, "step": 15858 }, { "epoch": 0.5116724573562594, "grad_norm": 0.33984375, "learning_rate": 1.5256506132985318e-05, "loss": 1.9836, "step": 15859 }, { "epoch": 0.5117047212100558, "grad_norm": 0.34765625, "learning_rate": 1.5254932627728561e-05, "loss": 1.985, "step": 15860 }, { "epoch": 0.5117369850638522, "grad_norm": 0.34375, "learning_rate": 1.525335911966569e-05, "loss": 1.9881, "step": 15861 }, { "epoch": 0.5117692489176485, "grad_norm": 0.3515625, "learning_rate": 1.5251785608814022e-05, "loss": 1.9839, "step": 15862 }, { "epoch": 0.5118015127714449, "grad_norm": 0.34375, "learning_rate": 1.525021209519088e-05, "loss": 1.969, "step": 15863 }, { "epoch": 0.5118337766252412, "grad_norm": 0.333984375, "learning_rate": 1.5248638578813584e-05, "loss": 1.9511, "step": 15864 }, { "epoch": 0.5118660404790376, "grad_norm": 0.453125, "learning_rate": 1.5247065059699447e-05, "loss": 1.9866, "step": 15865 }, { "epoch": 0.5118983043328339, "grad_norm": 0.341796875, "learning_rate": 1.52454915378658e-05, "loss": 1.9772, "step": 15866 }, { "epoch": 0.5119305681866303, "grad_norm": 0.349609375, "learning_rate": 1.5243918013329956e-05, "loss": 1.9676, "step": 15867 }, { "epoch": 0.5119628320404266, "grad_norm": 0.341796875, "learning_rate": 1.5242344486109239e-05, "loss": 1.9679, "step": 15868 }, { "epoch": 0.511995095894223, "grad_norm": 0.33203125, "learning_rate": 1.5240770956220967e-05, "loss": 1.996, "step": 15869 }, { "epoch": 0.5120273597480193, "grad_norm": 0.34375, "learning_rate": 1.5239197423682463e-05, "loss": 1.9791, "step": 15870 }, { "epoch": 0.5120596236018157, "grad_norm": 0.35546875, "learning_rate": 1.523762388851104e-05, "loss": 1.9887, "step": 15871 }, { "epoch": 0.512091887455612, "grad_norm": 0.345703125, "learning_rate": 1.5236050350724027e-05, "loss": 1.9773, "step": 15872 }, { "epoch": 0.5121241513094084, "grad_norm": 0.34765625, "learning_rate": 1.5234476810338747e-05, "loss": 1.9764, "step": 15873 }, { "epoch": 0.5121564151632046, "grad_norm": 0.345703125, "learning_rate": 1.5232903267372507e-05, "loss": 2.0114, "step": 15874 }, { "epoch": 0.512188679017001, "grad_norm": 0.353515625, "learning_rate": 1.5231329721842637e-05, "loss": 2.0099, "step": 15875 }, { "epoch": 0.5122209428707973, "grad_norm": 0.341796875, "learning_rate": 1.5229756173766455e-05, "loss": 1.9654, "step": 15876 }, { "epoch": 0.5122532067245937, "grad_norm": 0.345703125, "learning_rate": 1.5228182623161282e-05, "loss": 1.981, "step": 15877 }, { "epoch": 0.51228547057839, "grad_norm": 0.33203125, "learning_rate": 1.5226609070044441e-05, "loss": 1.9729, "step": 15878 }, { "epoch": 0.5123177344321864, "grad_norm": 0.353515625, "learning_rate": 1.5225035514433246e-05, "loss": 1.9364, "step": 15879 }, { "epoch": 0.5123499982859828, "grad_norm": 0.337890625, "learning_rate": 1.522346195634502e-05, "loss": 1.9623, "step": 15880 }, { "epoch": 0.5123822621397791, "grad_norm": 0.341796875, "learning_rate": 1.522188839579709e-05, "loss": 1.9382, "step": 15881 }, { "epoch": 0.5124145259935755, "grad_norm": 0.462890625, "learning_rate": 1.522031483280677e-05, "loss": 1.9559, "step": 15882 }, { "epoch": 0.5124467898473718, "grad_norm": 0.345703125, "learning_rate": 1.5218741267391382e-05, "loss": 1.9673, "step": 15883 }, { "epoch": 0.5124790537011682, "grad_norm": 0.345703125, "learning_rate": 1.5217167699568249e-05, "loss": 1.9871, "step": 15884 }, { "epoch": 0.5125113175549645, "grad_norm": 0.34375, "learning_rate": 1.5215594129354688e-05, "loss": 1.9833, "step": 15885 }, { "epoch": 0.5125435814087609, "grad_norm": 0.3515625, "learning_rate": 1.5214020556768017e-05, "loss": 1.9401, "step": 15886 }, { "epoch": 0.5125758452625572, "grad_norm": 0.349609375, "learning_rate": 1.521244698182557e-05, "loss": 1.9806, "step": 15887 }, { "epoch": 0.5126081091163536, "grad_norm": 0.353515625, "learning_rate": 1.5210873404544652e-05, "loss": 1.9775, "step": 15888 }, { "epoch": 0.5126403729701499, "grad_norm": 0.337890625, "learning_rate": 1.5209299824942598e-05, "loss": 1.9861, "step": 15889 }, { "epoch": 0.5126726368239463, "grad_norm": 0.341796875, "learning_rate": 1.5207726243036715e-05, "loss": 1.9451, "step": 15890 }, { "epoch": 0.5127049006777425, "grad_norm": 0.337890625, "learning_rate": 1.520615265884433e-05, "loss": 1.9598, "step": 15891 }, { "epoch": 0.512737164531539, "grad_norm": 0.337890625, "learning_rate": 1.520457907238277e-05, "loss": 1.9876, "step": 15892 }, { "epoch": 0.5127694283853352, "grad_norm": 0.337890625, "learning_rate": 1.5203005483669347e-05, "loss": 1.977, "step": 15893 }, { "epoch": 0.5128016922391316, "grad_norm": 0.34765625, "learning_rate": 1.5201431892721384e-05, "loss": 1.9994, "step": 15894 }, { "epoch": 0.5128339560929279, "grad_norm": 0.330078125, "learning_rate": 1.5199858299556207e-05, "loss": 1.9547, "step": 15895 }, { "epoch": 0.5128662199467243, "grad_norm": 0.369140625, "learning_rate": 1.5198284704191132e-05, "loss": 1.9437, "step": 15896 }, { "epoch": 0.5128984838005206, "grad_norm": 0.341796875, "learning_rate": 1.5196711106643479e-05, "loss": 1.9741, "step": 15897 }, { "epoch": 0.512930747654317, "grad_norm": 0.34375, "learning_rate": 1.5195137506930575e-05, "loss": 1.9628, "step": 15898 }, { "epoch": 0.5129630115081133, "grad_norm": 0.361328125, "learning_rate": 1.5193563905069732e-05, "loss": 1.957, "step": 15899 }, { "epoch": 0.5129952753619097, "grad_norm": 0.349609375, "learning_rate": 1.519199030107828e-05, "loss": 2.0066, "step": 15900 }, { "epoch": 0.5130275392157061, "grad_norm": 0.3515625, "learning_rate": 1.5190416694973538e-05, "loss": 1.9884, "step": 15901 }, { "epoch": 0.5130598030695024, "grad_norm": 0.345703125, "learning_rate": 1.5188843086772822e-05, "loss": 2.0087, "step": 15902 }, { "epoch": 0.5130920669232988, "grad_norm": 0.33984375, "learning_rate": 1.518726947649346e-05, "loss": 1.9942, "step": 15903 }, { "epoch": 0.5131243307770951, "grad_norm": 0.373046875, "learning_rate": 1.5185695864152766e-05, "loss": 1.9947, "step": 15904 }, { "epoch": 0.5131565946308915, "grad_norm": 0.33984375, "learning_rate": 1.5184122249768069e-05, "loss": 1.9652, "step": 15905 }, { "epoch": 0.5131888584846878, "grad_norm": 0.37890625, "learning_rate": 1.5182548633356686e-05, "loss": 1.9796, "step": 15906 }, { "epoch": 0.5132211223384842, "grad_norm": 0.36328125, "learning_rate": 1.5180975014935936e-05, "loss": 1.9802, "step": 15907 }, { "epoch": 0.5132533861922804, "grad_norm": 0.369140625, "learning_rate": 1.5179401394523144e-05, "loss": 1.9889, "step": 15908 }, { "epoch": 0.5132856500460768, "grad_norm": 0.3828125, "learning_rate": 1.517782777213563e-05, "loss": 2.0024, "step": 15909 }, { "epoch": 0.5133179138998731, "grad_norm": 0.38671875, "learning_rate": 1.5176254147790717e-05, "loss": 2.0278, "step": 15910 }, { "epoch": 0.5133501777536695, "grad_norm": 0.341796875, "learning_rate": 1.5174680521505722e-05, "loss": 1.9868, "step": 15911 }, { "epoch": 0.5133824416074658, "grad_norm": 0.353515625, "learning_rate": 1.5173106893297975e-05, "loss": 1.9884, "step": 15912 }, { "epoch": 0.5134147054612622, "grad_norm": 0.333984375, "learning_rate": 1.5171533263184784e-05, "loss": 1.9663, "step": 15913 }, { "epoch": 0.5134469693150585, "grad_norm": 0.365234375, "learning_rate": 1.5169959631183479e-05, "loss": 1.9763, "step": 15914 }, { "epoch": 0.5134792331688549, "grad_norm": 0.3515625, "learning_rate": 1.5168385997311386e-05, "loss": 1.9808, "step": 15915 }, { "epoch": 0.5135114970226512, "grad_norm": 0.35546875, "learning_rate": 1.5166812361585815e-05, "loss": 1.9824, "step": 15916 }, { "epoch": 0.5135437608764476, "grad_norm": 0.353515625, "learning_rate": 1.5165238724024094e-05, "loss": 1.9914, "step": 15917 }, { "epoch": 0.5135760247302439, "grad_norm": 0.34375, "learning_rate": 1.5163665084643548e-05, "loss": 1.972, "step": 15918 }, { "epoch": 0.5136082885840403, "grad_norm": 0.359375, "learning_rate": 1.5162091443461493e-05, "loss": 1.9862, "step": 15919 }, { "epoch": 0.5136405524378366, "grad_norm": 0.35546875, "learning_rate": 1.5160517800495251e-05, "loss": 1.9776, "step": 15920 }, { "epoch": 0.513672816291633, "grad_norm": 0.33984375, "learning_rate": 1.5158944155762142e-05, "loss": 1.9864, "step": 15921 }, { "epoch": 0.5137050801454294, "grad_norm": 0.359375, "learning_rate": 1.515737050927949e-05, "loss": 1.9992, "step": 15922 }, { "epoch": 0.5137373439992257, "grad_norm": 0.353515625, "learning_rate": 1.5155796861064618e-05, "loss": 1.9807, "step": 15923 }, { "epoch": 0.5137696078530221, "grad_norm": 0.35546875, "learning_rate": 1.5154223211134845e-05, "loss": 1.9739, "step": 15924 }, { "epoch": 0.5138018717068183, "grad_norm": 0.34765625, "learning_rate": 1.5152649559507492e-05, "loss": 1.9822, "step": 15925 }, { "epoch": 0.5138341355606147, "grad_norm": 0.341796875, "learning_rate": 1.5151075906199886e-05, "loss": 1.9586, "step": 15926 }, { "epoch": 0.513866399414411, "grad_norm": 0.34375, "learning_rate": 1.514950225122934e-05, "loss": 1.9683, "step": 15927 }, { "epoch": 0.5138986632682074, "grad_norm": 0.34375, "learning_rate": 1.5147928594613185e-05, "loss": 2.0054, "step": 15928 }, { "epoch": 0.5139309271220037, "grad_norm": 0.341796875, "learning_rate": 1.5146354936368737e-05, "loss": 1.9978, "step": 15929 }, { "epoch": 0.5139631909758001, "grad_norm": 0.33984375, "learning_rate": 1.5144781276513319e-05, "loss": 1.9758, "step": 15930 }, { "epoch": 0.5139954548295964, "grad_norm": 0.333984375, "learning_rate": 1.5143207615064249e-05, "loss": 1.961, "step": 15931 }, { "epoch": 0.5140277186833928, "grad_norm": 0.341796875, "learning_rate": 1.5141633952038855e-05, "loss": 1.9889, "step": 15932 }, { "epoch": 0.5140599825371891, "grad_norm": 0.337890625, "learning_rate": 1.5140060287454457e-05, "loss": 1.9498, "step": 15933 }, { "epoch": 0.5140922463909855, "grad_norm": 0.349609375, "learning_rate": 1.5138486621328378e-05, "loss": 1.9845, "step": 15934 }, { "epoch": 0.5141245102447818, "grad_norm": 0.33984375, "learning_rate": 1.5136912953677931e-05, "loss": 2.005, "step": 15935 }, { "epoch": 0.5141567740985782, "grad_norm": 0.34765625, "learning_rate": 1.5135339284520448e-05, "loss": 1.971, "step": 15936 }, { "epoch": 0.5141890379523745, "grad_norm": 0.36328125, "learning_rate": 1.5133765613873249e-05, "loss": 1.9908, "step": 15937 }, { "epoch": 0.5142213018061709, "grad_norm": 0.361328125, "learning_rate": 1.5132191941753651e-05, "loss": 1.9904, "step": 15938 }, { "epoch": 0.5142535656599672, "grad_norm": 0.345703125, "learning_rate": 1.5130618268178979e-05, "loss": 1.9728, "step": 15939 }, { "epoch": 0.5142858295137636, "grad_norm": 0.34765625, "learning_rate": 1.5129044593166559e-05, "loss": 1.9573, "step": 15940 }, { "epoch": 0.51431809336756, "grad_norm": 0.34765625, "learning_rate": 1.5127470916733705e-05, "loss": 1.9687, "step": 15941 }, { "epoch": 0.5143503572213562, "grad_norm": 0.400390625, "learning_rate": 1.5125897238897742e-05, "loss": 1.9791, "step": 15942 }, { "epoch": 0.5143826210751526, "grad_norm": 0.341796875, "learning_rate": 1.5124323559675995e-05, "loss": 2.0063, "step": 15943 }, { "epoch": 0.5144148849289489, "grad_norm": 0.369140625, "learning_rate": 1.5122749879085783e-05, "loss": 1.9777, "step": 15944 }, { "epoch": 0.5144471487827453, "grad_norm": 0.3359375, "learning_rate": 1.5121176197144424e-05, "loss": 1.966, "step": 15945 }, { "epoch": 0.5144794126365416, "grad_norm": 0.345703125, "learning_rate": 1.511960251386925e-05, "loss": 1.9865, "step": 15946 }, { "epoch": 0.514511676490338, "grad_norm": 0.33984375, "learning_rate": 1.511802882927758e-05, "loss": 1.952, "step": 15947 }, { "epoch": 0.5145439403441343, "grad_norm": 0.33203125, "learning_rate": 1.5116455143386724e-05, "loss": 1.9426, "step": 15948 }, { "epoch": 0.5145762041979307, "grad_norm": 0.33984375, "learning_rate": 1.511488145621402e-05, "loss": 2.0021, "step": 15949 }, { "epoch": 0.514608468051727, "grad_norm": 0.345703125, "learning_rate": 1.5113307767776778e-05, "loss": 1.9692, "step": 15950 }, { "epoch": 0.5146407319055234, "grad_norm": 0.337890625, "learning_rate": 1.511173407809233e-05, "loss": 1.9232, "step": 15951 }, { "epoch": 0.5146729957593197, "grad_norm": 0.373046875, "learning_rate": 1.5110160387177993e-05, "loss": 1.9786, "step": 15952 }, { "epoch": 0.5147052596131161, "grad_norm": 0.341796875, "learning_rate": 1.5108586695051087e-05, "loss": 2.0065, "step": 15953 }, { "epoch": 0.5147375234669124, "grad_norm": 0.34765625, "learning_rate": 1.5107013001728942e-05, "loss": 2.0033, "step": 15954 }, { "epoch": 0.5147697873207088, "grad_norm": 0.361328125, "learning_rate": 1.510543930722887e-05, "loss": 1.9643, "step": 15955 }, { "epoch": 0.5148020511745051, "grad_norm": 0.361328125, "learning_rate": 1.5103865611568197e-05, "loss": 1.9423, "step": 15956 }, { "epoch": 0.5148343150283015, "grad_norm": 0.384765625, "learning_rate": 1.510229191476425e-05, "loss": 1.9772, "step": 15957 }, { "epoch": 0.5148665788820977, "grad_norm": 0.353515625, "learning_rate": 1.5100718216834341e-05, "loss": 1.9679, "step": 15958 }, { "epoch": 0.5148988427358941, "grad_norm": 0.353515625, "learning_rate": 1.5099144517795804e-05, "loss": 1.9862, "step": 15959 }, { "epoch": 0.5149311065896904, "grad_norm": 0.376953125, "learning_rate": 1.5097570817665955e-05, "loss": 1.9743, "step": 15960 }, { "epoch": 0.5149633704434868, "grad_norm": 0.349609375, "learning_rate": 1.5095997116462115e-05, "loss": 1.9778, "step": 15961 }, { "epoch": 0.5149956342972832, "grad_norm": 0.390625, "learning_rate": 1.50944234142016e-05, "loss": 1.9741, "step": 15962 }, { "epoch": 0.5150278981510795, "grad_norm": 0.365234375, "learning_rate": 1.509284971090175e-05, "loss": 2.0306, "step": 15963 }, { "epoch": 0.5150601620048759, "grad_norm": 0.376953125, "learning_rate": 1.5091276006579879e-05, "loss": 1.9873, "step": 15964 }, { "epoch": 0.5150924258586722, "grad_norm": 0.373046875, "learning_rate": 1.50897023012533e-05, "loss": 1.996, "step": 15965 }, { "epoch": 0.5151246897124686, "grad_norm": 0.3515625, "learning_rate": 1.508812859493934e-05, "loss": 1.9818, "step": 15966 }, { "epoch": 0.5151569535662649, "grad_norm": 0.357421875, "learning_rate": 1.5086554887655329e-05, "loss": 1.9825, "step": 15967 }, { "epoch": 0.5151892174200613, "grad_norm": 0.345703125, "learning_rate": 1.5084981179418585e-05, "loss": 1.974, "step": 15968 }, { "epoch": 0.5152214812738576, "grad_norm": 0.35546875, "learning_rate": 1.5083407470246425e-05, "loss": 1.9669, "step": 15969 }, { "epoch": 0.515253745127654, "grad_norm": 0.357421875, "learning_rate": 1.5081833760156177e-05, "loss": 2.0008, "step": 15970 }, { "epoch": 0.5152860089814503, "grad_norm": 0.341796875, "learning_rate": 1.5080260049165163e-05, "loss": 1.9627, "step": 15971 }, { "epoch": 0.5153182728352467, "grad_norm": 0.33984375, "learning_rate": 1.5078686337290702e-05, "loss": 1.9659, "step": 15972 }, { "epoch": 0.515350536689043, "grad_norm": 0.35546875, "learning_rate": 1.5077112624550117e-05, "loss": 1.9835, "step": 15973 }, { "epoch": 0.5153828005428394, "grad_norm": 0.3515625, "learning_rate": 1.5075538910960738e-05, "loss": 1.9696, "step": 15974 }, { "epoch": 0.5154150643966356, "grad_norm": 0.359375, "learning_rate": 1.5073965196539878e-05, "loss": 1.9732, "step": 15975 }, { "epoch": 0.515447328250432, "grad_norm": 0.353515625, "learning_rate": 1.5072391481304856e-05, "loss": 1.9613, "step": 15976 }, { "epoch": 0.5154795921042283, "grad_norm": 0.357421875, "learning_rate": 1.5070817765273007e-05, "loss": 1.9733, "step": 15977 }, { "epoch": 0.5155118559580247, "grad_norm": 0.369140625, "learning_rate": 1.5069244048461648e-05, "loss": 1.9945, "step": 15978 }, { "epoch": 0.515544119811821, "grad_norm": 0.359375, "learning_rate": 1.5067670330888097e-05, "loss": 1.9913, "step": 15979 }, { "epoch": 0.5155763836656174, "grad_norm": 0.37109375, "learning_rate": 1.5066096612569678e-05, "loss": 1.9841, "step": 15980 }, { "epoch": 0.5156086475194138, "grad_norm": 0.353515625, "learning_rate": 1.5064522893523717e-05, "loss": 1.9454, "step": 15981 }, { "epoch": 0.5156409113732101, "grad_norm": 0.34375, "learning_rate": 1.5062949173767537e-05, "loss": 1.9587, "step": 15982 }, { "epoch": 0.5156731752270065, "grad_norm": 0.361328125, "learning_rate": 1.5061375453318451e-05, "loss": 1.9483, "step": 15983 }, { "epoch": 0.5157054390808028, "grad_norm": 0.345703125, "learning_rate": 1.5059801732193791e-05, "loss": 1.9533, "step": 15984 }, { "epoch": 0.5157377029345992, "grad_norm": 0.34375, "learning_rate": 1.505822801041088e-05, "loss": 1.9534, "step": 15985 }, { "epoch": 0.5157699667883955, "grad_norm": 0.380859375, "learning_rate": 1.5056654287987033e-05, "loss": 1.9326, "step": 15986 }, { "epoch": 0.5158022306421919, "grad_norm": 0.33984375, "learning_rate": 1.5055080564939578e-05, "loss": 1.9976, "step": 15987 }, { "epoch": 0.5158344944959882, "grad_norm": 0.36328125, "learning_rate": 1.5053506841285836e-05, "loss": 1.9677, "step": 15988 }, { "epoch": 0.5158667583497846, "grad_norm": 0.37890625, "learning_rate": 1.5051933117043129e-05, "loss": 1.9874, "step": 15989 }, { "epoch": 0.5158990222035809, "grad_norm": 0.345703125, "learning_rate": 1.5050359392228776e-05, "loss": 1.9583, "step": 15990 }, { "epoch": 0.5159312860573773, "grad_norm": 0.365234375, "learning_rate": 1.5048785666860108e-05, "loss": 1.9597, "step": 15991 }, { "epoch": 0.5159635499111735, "grad_norm": 0.3515625, "learning_rate": 1.5047211940954441e-05, "loss": 1.9823, "step": 15992 }, { "epoch": 0.51599581376497, "grad_norm": 0.337890625, "learning_rate": 1.5045638214529094e-05, "loss": 1.9628, "step": 15993 }, { "epoch": 0.5160280776187662, "grad_norm": 0.345703125, "learning_rate": 1.5044064487601401e-05, "loss": 1.9636, "step": 15994 }, { "epoch": 0.5160603414725626, "grad_norm": 0.353515625, "learning_rate": 1.5042490760188677e-05, "loss": 1.9963, "step": 15995 }, { "epoch": 0.5160926053263589, "grad_norm": 0.341796875, "learning_rate": 1.5040917032308239e-05, "loss": 1.9605, "step": 15996 }, { "epoch": 0.5161248691801553, "grad_norm": 0.3515625, "learning_rate": 1.5039343303977419e-05, "loss": 1.9939, "step": 15997 }, { "epoch": 0.5161571330339516, "grad_norm": 0.35546875, "learning_rate": 1.5037769575213538e-05, "loss": 1.9488, "step": 15998 }, { "epoch": 0.516189396887748, "grad_norm": 0.357421875, "learning_rate": 1.5036195846033917e-05, "loss": 1.9722, "step": 15999 }, { "epoch": 0.5162216607415443, "grad_norm": 0.3515625, "learning_rate": 1.5034622116455873e-05, "loss": 1.9732, "step": 16000 }, { "epoch": 0.5162539245953407, "grad_norm": 0.35546875, "learning_rate": 1.5033048386496736e-05, "loss": 1.9683, "step": 16001 }, { "epoch": 0.5162861884491371, "grad_norm": 0.36328125, "learning_rate": 1.503147465617383e-05, "loss": 1.9512, "step": 16002 }, { "epoch": 0.5163184523029334, "grad_norm": 0.34765625, "learning_rate": 1.502990092550447e-05, "loss": 1.9539, "step": 16003 }, { "epoch": 0.5163507161567298, "grad_norm": 0.34765625, "learning_rate": 1.5028327194505984e-05, "loss": 1.9835, "step": 16004 }, { "epoch": 0.5163829800105261, "grad_norm": 0.337890625, "learning_rate": 1.502675346319569e-05, "loss": 1.9757, "step": 16005 }, { "epoch": 0.5164152438643225, "grad_norm": 0.33203125, "learning_rate": 1.5025179731590917e-05, "loss": 1.9644, "step": 16006 }, { "epoch": 0.5164475077181188, "grad_norm": 0.3515625, "learning_rate": 1.5023605999708977e-05, "loss": 1.9701, "step": 16007 }, { "epoch": 0.5164797715719152, "grad_norm": 0.345703125, "learning_rate": 1.5022032267567207e-05, "loss": 2.0042, "step": 16008 }, { "epoch": 0.5165120354257114, "grad_norm": 0.34375, "learning_rate": 1.5020458535182919e-05, "loss": 1.9896, "step": 16009 }, { "epoch": 0.5165442992795078, "grad_norm": 0.345703125, "learning_rate": 1.5018884802573434e-05, "loss": 1.961, "step": 16010 }, { "epoch": 0.5165765631333041, "grad_norm": 0.341796875, "learning_rate": 1.5017311069756082e-05, "loss": 1.9745, "step": 16011 }, { "epoch": 0.5166088269871005, "grad_norm": 0.33984375, "learning_rate": 1.5015737336748182e-05, "loss": 1.9875, "step": 16012 }, { "epoch": 0.5166410908408968, "grad_norm": 0.341796875, "learning_rate": 1.5014163603567057e-05, "loss": 1.9691, "step": 16013 }, { "epoch": 0.5166733546946932, "grad_norm": 0.35546875, "learning_rate": 1.5012589870230026e-05, "loss": 1.9708, "step": 16014 }, { "epoch": 0.5167056185484895, "grad_norm": 0.34765625, "learning_rate": 1.5011016136754416e-05, "loss": 1.9921, "step": 16015 }, { "epoch": 0.5167378824022859, "grad_norm": 0.330078125, "learning_rate": 1.5009442403157551e-05, "loss": 1.9724, "step": 16016 }, { "epoch": 0.5167701462560822, "grad_norm": 0.35546875, "learning_rate": 1.5007868669456749e-05, "loss": 1.9947, "step": 16017 }, { "epoch": 0.5168024101098786, "grad_norm": 0.353515625, "learning_rate": 1.5006294935669334e-05, "loss": 1.9943, "step": 16018 }, { "epoch": 0.5168346739636749, "grad_norm": 0.330078125, "learning_rate": 1.500472120181263e-05, "loss": 1.9772, "step": 16019 }, { "epoch": 0.5168669378174713, "grad_norm": 0.36328125, "learning_rate": 1.5003147467903962e-05, "loss": 2.0059, "step": 16020 }, { "epoch": 0.5168992016712676, "grad_norm": 0.337890625, "learning_rate": 1.5001573733960637e-05, "loss": 1.9643, "step": 16021 }, { "epoch": 0.516931465525064, "grad_norm": 0.361328125, "learning_rate": 1.5e-05, "loss": 1.9475, "step": 16022 }, { "epoch": 0.5169637293788604, "grad_norm": 0.341796875, "learning_rate": 1.499842626603936e-05, "loss": 1.9717, "step": 16023 }, { "epoch": 0.5169959932326567, "grad_norm": 0.36328125, "learning_rate": 1.4996852532096046e-05, "loss": 1.9759, "step": 16024 }, { "epoch": 0.5170282570864531, "grad_norm": 0.33984375, "learning_rate": 1.4995278798187373e-05, "loss": 1.9355, "step": 16025 }, { "epoch": 0.5170605209402493, "grad_norm": 0.35546875, "learning_rate": 1.4993705064330665e-05, "loss": 1.9758, "step": 16026 }, { "epoch": 0.5170927847940457, "grad_norm": 0.349609375, "learning_rate": 1.4992131330543254e-05, "loss": 1.9838, "step": 16027 }, { "epoch": 0.517125048647842, "grad_norm": 0.337890625, "learning_rate": 1.4990557596842451e-05, "loss": 1.9782, "step": 16028 }, { "epoch": 0.5171573125016384, "grad_norm": 0.345703125, "learning_rate": 1.4988983863245583e-05, "loss": 1.9816, "step": 16029 }, { "epoch": 0.5171895763554347, "grad_norm": 0.419921875, "learning_rate": 1.4987410129769981e-05, "loss": 1.9915, "step": 16030 }, { "epoch": 0.5172218402092311, "grad_norm": 0.34375, "learning_rate": 1.4985836396432946e-05, "loss": 1.9451, "step": 16031 }, { "epoch": 0.5172541040630274, "grad_norm": 0.345703125, "learning_rate": 1.4984262663251822e-05, "loss": 1.9608, "step": 16032 }, { "epoch": 0.5172863679168238, "grad_norm": 0.330078125, "learning_rate": 1.4982688930243924e-05, "loss": 1.9434, "step": 16033 }, { "epoch": 0.5173186317706201, "grad_norm": 0.34765625, "learning_rate": 1.4981115197426569e-05, "loss": 2.0165, "step": 16034 }, { "epoch": 0.5173508956244165, "grad_norm": 0.326171875, "learning_rate": 1.4979541464817084e-05, "loss": 1.9857, "step": 16035 }, { "epoch": 0.5173831594782128, "grad_norm": 0.341796875, "learning_rate": 1.49779677324328e-05, "loss": 1.9911, "step": 16036 }, { "epoch": 0.5174154233320092, "grad_norm": 0.35546875, "learning_rate": 1.4976394000291027e-05, "loss": 1.9875, "step": 16037 }, { "epoch": 0.5174476871858055, "grad_norm": 0.341796875, "learning_rate": 1.4974820268409085e-05, "loss": 1.9924, "step": 16038 }, { "epoch": 0.5174799510396019, "grad_norm": 0.333984375, "learning_rate": 1.4973246536804308e-05, "loss": 1.9591, "step": 16039 }, { "epoch": 0.5175122148933982, "grad_norm": 0.33984375, "learning_rate": 1.497167280549402e-05, "loss": 1.9663, "step": 16040 }, { "epoch": 0.5175444787471946, "grad_norm": 0.34375, "learning_rate": 1.4970099074495533e-05, "loss": 1.984, "step": 16041 }, { "epoch": 0.517576742600991, "grad_norm": 0.33984375, "learning_rate": 1.4968525343826168e-05, "loss": 1.9944, "step": 16042 }, { "epoch": 0.5176090064547872, "grad_norm": 0.333984375, "learning_rate": 1.4966951613503265e-05, "loss": 1.9887, "step": 16043 }, { "epoch": 0.5176412703085836, "grad_norm": 0.333984375, "learning_rate": 1.4965377883544129e-05, "loss": 1.9926, "step": 16044 }, { "epoch": 0.5176735341623799, "grad_norm": 0.3359375, "learning_rate": 1.4963804153966085e-05, "loss": 2.0025, "step": 16045 }, { "epoch": 0.5177057980161763, "grad_norm": 0.337890625, "learning_rate": 1.4962230424786467e-05, "loss": 1.9841, "step": 16046 }, { "epoch": 0.5177380618699726, "grad_norm": 0.337890625, "learning_rate": 1.4960656696022582e-05, "loss": 1.986, "step": 16047 }, { "epoch": 0.517770325723769, "grad_norm": 0.318359375, "learning_rate": 1.4959082967691762e-05, "loss": 1.9495, "step": 16048 }, { "epoch": 0.5178025895775653, "grad_norm": 0.33203125, "learning_rate": 1.495750923981133e-05, "loss": 1.947, "step": 16049 }, { "epoch": 0.5178348534313617, "grad_norm": 0.333984375, "learning_rate": 1.4955935512398603e-05, "loss": 1.9638, "step": 16050 }, { "epoch": 0.517867117285158, "grad_norm": 0.337890625, "learning_rate": 1.4954361785470905e-05, "loss": 1.9491, "step": 16051 }, { "epoch": 0.5178993811389544, "grad_norm": 0.34375, "learning_rate": 1.4952788059045565e-05, "loss": 1.9612, "step": 16052 }, { "epoch": 0.5179316449927507, "grad_norm": 0.337890625, "learning_rate": 1.4951214333139895e-05, "loss": 1.9557, "step": 16053 }, { "epoch": 0.5179639088465471, "grad_norm": 0.341796875, "learning_rate": 1.4949640607771223e-05, "loss": 1.9779, "step": 16054 }, { "epoch": 0.5179961727003434, "grad_norm": 0.337890625, "learning_rate": 1.4948066882956875e-05, "loss": 1.9776, "step": 16055 }, { "epoch": 0.5180284365541398, "grad_norm": 0.337890625, "learning_rate": 1.4946493158714166e-05, "loss": 1.9585, "step": 16056 }, { "epoch": 0.5180607004079361, "grad_norm": 0.345703125, "learning_rate": 1.4944919435060422e-05, "loss": 1.9652, "step": 16057 }, { "epoch": 0.5180929642617325, "grad_norm": 0.326171875, "learning_rate": 1.494334571201297e-05, "loss": 1.9982, "step": 16058 }, { "epoch": 0.5181252281155287, "grad_norm": 0.341796875, "learning_rate": 1.4941771989589121e-05, "loss": 1.9797, "step": 16059 }, { "epoch": 0.5181574919693251, "grad_norm": 0.345703125, "learning_rate": 1.4940198267806206e-05, "loss": 1.9825, "step": 16060 }, { "epoch": 0.5181897558231214, "grad_norm": 0.337890625, "learning_rate": 1.4938624546681554e-05, "loss": 1.9506, "step": 16061 }, { "epoch": 0.5182220196769178, "grad_norm": 0.3515625, "learning_rate": 1.4937050826232467e-05, "loss": 1.9858, "step": 16062 }, { "epoch": 0.5182542835307142, "grad_norm": 0.345703125, "learning_rate": 1.4935477106476282e-05, "loss": 1.9801, "step": 16063 }, { "epoch": 0.5182865473845105, "grad_norm": 0.3515625, "learning_rate": 1.4933903387430327e-05, "loss": 2.0003, "step": 16064 }, { "epoch": 0.5183188112383069, "grad_norm": 0.3203125, "learning_rate": 1.4932329669111905e-05, "loss": 1.9199, "step": 16065 }, { "epoch": 0.5183510750921032, "grad_norm": 0.3515625, "learning_rate": 1.4930755951538354e-05, "loss": 1.9885, "step": 16066 }, { "epoch": 0.5183833389458996, "grad_norm": 0.34765625, "learning_rate": 1.4929182234726997e-05, "loss": 1.9571, "step": 16067 }, { "epoch": 0.5184156027996959, "grad_norm": 0.337890625, "learning_rate": 1.4927608518695144e-05, "loss": 1.9913, "step": 16068 }, { "epoch": 0.5184478666534923, "grad_norm": 0.357421875, "learning_rate": 1.4926034803460124e-05, "loss": 1.9524, "step": 16069 }, { "epoch": 0.5184801305072886, "grad_norm": 0.333984375, "learning_rate": 1.4924461089039269e-05, "loss": 1.9922, "step": 16070 }, { "epoch": 0.518512394361085, "grad_norm": 0.33984375, "learning_rate": 1.4922887375449885e-05, "loss": 1.9771, "step": 16071 }, { "epoch": 0.5185446582148813, "grad_norm": 0.330078125, "learning_rate": 1.49213136627093e-05, "loss": 1.9704, "step": 16072 }, { "epoch": 0.5185769220686777, "grad_norm": 0.333984375, "learning_rate": 1.4919739950834836e-05, "loss": 1.9829, "step": 16073 }, { "epoch": 0.518609185922474, "grad_norm": 0.337890625, "learning_rate": 1.4918166239843828e-05, "loss": 2.0059, "step": 16074 }, { "epoch": 0.5186414497762704, "grad_norm": 0.330078125, "learning_rate": 1.4916592529753577e-05, "loss": 2.0253, "step": 16075 }, { "epoch": 0.5186737136300666, "grad_norm": 0.328125, "learning_rate": 1.4915018820581417e-05, "loss": 1.989, "step": 16076 }, { "epoch": 0.518705977483863, "grad_norm": 0.326171875, "learning_rate": 1.4913445112344673e-05, "loss": 1.9893, "step": 16077 }, { "epoch": 0.5187382413376593, "grad_norm": 0.349609375, "learning_rate": 1.4911871405060662e-05, "loss": 1.9912, "step": 16078 }, { "epoch": 0.5187705051914557, "grad_norm": 0.330078125, "learning_rate": 1.4910297698746705e-05, "loss": 1.9632, "step": 16079 }, { "epoch": 0.518802769045252, "grad_norm": 0.341796875, "learning_rate": 1.4908723993420129e-05, "loss": 2.0071, "step": 16080 }, { "epoch": 0.5188350328990484, "grad_norm": 0.33984375, "learning_rate": 1.4907150289098252e-05, "loss": 1.9877, "step": 16081 }, { "epoch": 0.5188672967528448, "grad_norm": 0.33203125, "learning_rate": 1.4905576585798397e-05, "loss": 1.9934, "step": 16082 }, { "epoch": 0.5188995606066411, "grad_norm": 0.34375, "learning_rate": 1.490400288353789e-05, "loss": 2.0025, "step": 16083 }, { "epoch": 0.5189318244604375, "grad_norm": 0.337890625, "learning_rate": 1.490242918233405e-05, "loss": 1.94, "step": 16084 }, { "epoch": 0.5189640883142338, "grad_norm": 0.33984375, "learning_rate": 1.4900855482204197e-05, "loss": 1.996, "step": 16085 }, { "epoch": 0.5189963521680302, "grad_norm": 0.337890625, "learning_rate": 1.4899281783165661e-05, "loss": 2.0147, "step": 16086 }, { "epoch": 0.5190286160218265, "grad_norm": 0.328125, "learning_rate": 1.4897708085235753e-05, "loss": 1.9678, "step": 16087 }, { "epoch": 0.5190608798756229, "grad_norm": 0.333984375, "learning_rate": 1.4896134388431802e-05, "loss": 1.9672, "step": 16088 }, { "epoch": 0.5190931437294192, "grad_norm": 0.33203125, "learning_rate": 1.4894560692771134e-05, "loss": 1.9744, "step": 16089 }, { "epoch": 0.5191254075832156, "grad_norm": 0.328125, "learning_rate": 1.4892986998271062e-05, "loss": 2.0003, "step": 16090 }, { "epoch": 0.5191576714370119, "grad_norm": 0.330078125, "learning_rate": 1.4891413304948912e-05, "loss": 1.9936, "step": 16091 }, { "epoch": 0.5191899352908083, "grad_norm": 0.333984375, "learning_rate": 1.4889839612822013e-05, "loss": 1.9789, "step": 16092 }, { "epoch": 0.5192221991446045, "grad_norm": 0.33984375, "learning_rate": 1.488826592190767e-05, "loss": 1.9686, "step": 16093 }, { "epoch": 0.519254462998401, "grad_norm": 0.3359375, "learning_rate": 1.488669223222322e-05, "loss": 1.9634, "step": 16094 }, { "epoch": 0.5192867268521972, "grad_norm": 0.330078125, "learning_rate": 1.4885118543785987e-05, "loss": 1.9876, "step": 16095 }, { "epoch": 0.5193189907059936, "grad_norm": 0.37890625, "learning_rate": 1.488354485661328e-05, "loss": 1.991, "step": 16096 }, { "epoch": 0.5193512545597899, "grad_norm": 0.337890625, "learning_rate": 1.4881971170722423e-05, "loss": 1.9904, "step": 16097 }, { "epoch": 0.5193835184135863, "grad_norm": 0.353515625, "learning_rate": 1.4880397486130756e-05, "loss": 1.9703, "step": 16098 }, { "epoch": 0.5194157822673826, "grad_norm": 0.35546875, "learning_rate": 1.4878823802855578e-05, "loss": 1.9771, "step": 16099 }, { "epoch": 0.519448046121179, "grad_norm": 0.322265625, "learning_rate": 1.4877250120914218e-05, "loss": 1.9874, "step": 16100 }, { "epoch": 0.5194803099749753, "grad_norm": 0.341796875, "learning_rate": 1.4875676440324011e-05, "loss": 1.9904, "step": 16101 }, { "epoch": 0.5195125738287717, "grad_norm": 0.357421875, "learning_rate": 1.4874102761102262e-05, "loss": 2.0016, "step": 16102 }, { "epoch": 0.5195448376825681, "grad_norm": 0.345703125, "learning_rate": 1.4872529083266298e-05, "loss": 1.9646, "step": 16103 }, { "epoch": 0.5195771015363644, "grad_norm": 0.345703125, "learning_rate": 1.4870955406833447e-05, "loss": 1.9748, "step": 16104 }, { "epoch": 0.5196093653901608, "grad_norm": 0.345703125, "learning_rate": 1.4869381731821023e-05, "loss": 1.9672, "step": 16105 }, { "epoch": 0.5196416292439571, "grad_norm": 0.34375, "learning_rate": 1.4867808058246351e-05, "loss": 1.992, "step": 16106 }, { "epoch": 0.5196738930977535, "grad_norm": 0.33203125, "learning_rate": 1.4866234386126753e-05, "loss": 1.9367, "step": 16107 }, { "epoch": 0.5197061569515498, "grad_norm": 0.380859375, "learning_rate": 1.4864660715479555e-05, "loss": 1.9376, "step": 16108 }, { "epoch": 0.5197384208053462, "grad_norm": 0.3359375, "learning_rate": 1.4863087046322071e-05, "loss": 1.9923, "step": 16109 }, { "epoch": 0.5197706846591424, "grad_norm": 0.33984375, "learning_rate": 1.4861513378671626e-05, "loss": 1.9358, "step": 16110 }, { "epoch": 0.5198029485129388, "grad_norm": 0.341796875, "learning_rate": 1.4859939712545547e-05, "loss": 1.976, "step": 16111 }, { "epoch": 0.5198352123667351, "grad_norm": 0.337890625, "learning_rate": 1.4858366047961147e-05, "loss": 1.9443, "step": 16112 }, { "epoch": 0.5198674762205315, "grad_norm": 0.353515625, "learning_rate": 1.485679238493575e-05, "loss": 2.0072, "step": 16113 }, { "epoch": 0.5198997400743278, "grad_norm": 0.337890625, "learning_rate": 1.4855218723486686e-05, "loss": 1.9963, "step": 16114 }, { "epoch": 0.5199320039281242, "grad_norm": 0.349609375, "learning_rate": 1.4853645063631266e-05, "loss": 1.9701, "step": 16115 }, { "epoch": 0.5199642677819205, "grad_norm": 0.34765625, "learning_rate": 1.4852071405386816e-05, "loss": 1.9887, "step": 16116 }, { "epoch": 0.5199965316357169, "grad_norm": 0.3515625, "learning_rate": 1.4850497748770662e-05, "loss": 1.9898, "step": 16117 }, { "epoch": 0.5200287954895132, "grad_norm": 0.36328125, "learning_rate": 1.4848924093800117e-05, "loss": 1.9378, "step": 16118 }, { "epoch": 0.5200610593433096, "grad_norm": 0.341796875, "learning_rate": 1.4847350440492507e-05, "loss": 1.9796, "step": 16119 }, { "epoch": 0.5200933231971059, "grad_norm": 0.353515625, "learning_rate": 1.4845776788865159e-05, "loss": 1.9971, "step": 16120 }, { "epoch": 0.5201255870509023, "grad_norm": 0.337890625, "learning_rate": 1.4844203138935383e-05, "loss": 1.9634, "step": 16121 }, { "epoch": 0.5201578509046986, "grad_norm": 0.341796875, "learning_rate": 1.484262949072051e-05, "loss": 1.9971, "step": 16122 }, { "epoch": 0.520190114758495, "grad_norm": 0.349609375, "learning_rate": 1.4841055844237865e-05, "loss": 2.0001, "step": 16123 }, { "epoch": 0.5202223786122914, "grad_norm": 0.333984375, "learning_rate": 1.4839482199504753e-05, "loss": 1.9651, "step": 16124 }, { "epoch": 0.5202546424660877, "grad_norm": 0.341796875, "learning_rate": 1.4837908556538508e-05, "loss": 1.983, "step": 16125 }, { "epoch": 0.5202869063198841, "grad_norm": 0.32421875, "learning_rate": 1.4836334915356458e-05, "loss": 1.962, "step": 16126 }, { "epoch": 0.5203191701736803, "grad_norm": 0.50390625, "learning_rate": 1.4834761275975907e-05, "loss": 1.9945, "step": 16127 }, { "epoch": 0.5203514340274767, "grad_norm": 0.333984375, "learning_rate": 1.4833187638414184e-05, "loss": 1.9728, "step": 16128 }, { "epoch": 0.520383697881273, "grad_norm": 0.33203125, "learning_rate": 1.483161400268862e-05, "loss": 1.9901, "step": 16129 }, { "epoch": 0.5204159617350694, "grad_norm": 0.33984375, "learning_rate": 1.4830040368816522e-05, "loss": 1.978, "step": 16130 }, { "epoch": 0.5204482255888657, "grad_norm": 0.33203125, "learning_rate": 1.4828466736815218e-05, "loss": 1.9708, "step": 16131 }, { "epoch": 0.5204804894426621, "grad_norm": 0.34375, "learning_rate": 1.4826893106702032e-05, "loss": 1.9785, "step": 16132 }, { "epoch": 0.5205127532964584, "grad_norm": 0.326171875, "learning_rate": 1.4825319478494281e-05, "loss": 1.983, "step": 16133 }, { "epoch": 0.5205450171502548, "grad_norm": 0.3359375, "learning_rate": 1.4823745852209286e-05, "loss": 1.9773, "step": 16134 }, { "epoch": 0.5205772810040511, "grad_norm": 0.349609375, "learning_rate": 1.4822172227864374e-05, "loss": 1.979, "step": 16135 }, { "epoch": 0.5206095448578475, "grad_norm": 0.34765625, "learning_rate": 1.4820598605476858e-05, "loss": 2.0202, "step": 16136 }, { "epoch": 0.5206418087116438, "grad_norm": 0.353515625, "learning_rate": 1.4819024985064066e-05, "loss": 1.9757, "step": 16137 }, { "epoch": 0.5206740725654402, "grad_norm": 0.337890625, "learning_rate": 1.4817451366643316e-05, "loss": 2.0058, "step": 16138 }, { "epoch": 0.5207063364192365, "grad_norm": 0.333984375, "learning_rate": 1.4815877750231935e-05, "loss": 1.9307, "step": 16139 }, { "epoch": 0.5207386002730329, "grad_norm": 0.357421875, "learning_rate": 1.4814304135847236e-05, "loss": 1.9943, "step": 16140 }, { "epoch": 0.5207708641268292, "grad_norm": 0.3359375, "learning_rate": 1.4812730523506543e-05, "loss": 1.9907, "step": 16141 }, { "epoch": 0.5208031279806256, "grad_norm": 0.33984375, "learning_rate": 1.481115691322718e-05, "loss": 1.9981, "step": 16142 }, { "epoch": 0.520835391834422, "grad_norm": 0.34765625, "learning_rate": 1.4809583305026466e-05, "loss": 1.9883, "step": 16143 }, { "epoch": 0.5208676556882182, "grad_norm": 0.330078125, "learning_rate": 1.480800969892172e-05, "loss": 1.9311, "step": 16144 }, { "epoch": 0.5208999195420146, "grad_norm": 0.337890625, "learning_rate": 1.4806436094930268e-05, "loss": 2.0141, "step": 16145 }, { "epoch": 0.5209321833958109, "grad_norm": 0.361328125, "learning_rate": 1.4804862493069428e-05, "loss": 1.9716, "step": 16146 }, { "epoch": 0.5209644472496073, "grad_norm": 0.337890625, "learning_rate": 1.480328889335652e-05, "loss": 1.9699, "step": 16147 }, { "epoch": 0.5209967111034036, "grad_norm": 0.34765625, "learning_rate": 1.4801715295808871e-05, "loss": 1.9611, "step": 16148 }, { "epoch": 0.5210289749572, "grad_norm": 0.34375, "learning_rate": 1.4800141700443794e-05, "loss": 1.9229, "step": 16149 }, { "epoch": 0.5210612388109963, "grad_norm": 0.35546875, "learning_rate": 1.4798568107278615e-05, "loss": 1.9509, "step": 16150 }, { "epoch": 0.5210935026647927, "grad_norm": 0.3515625, "learning_rate": 1.4796994516330655e-05, "loss": 1.9873, "step": 16151 }, { "epoch": 0.521125766518589, "grad_norm": 0.328125, "learning_rate": 1.4795420927617231e-05, "loss": 1.9761, "step": 16152 }, { "epoch": 0.5211580303723854, "grad_norm": 0.3828125, "learning_rate": 1.4793847341155668e-05, "loss": 1.9828, "step": 16153 }, { "epoch": 0.5211902942261817, "grad_norm": 0.32421875, "learning_rate": 1.4792273756963292e-05, "loss": 1.9394, "step": 16154 }, { "epoch": 0.5212225580799781, "grad_norm": 0.353515625, "learning_rate": 1.4790700175057406e-05, "loss": 1.9764, "step": 16155 }, { "epoch": 0.5212548219337744, "grad_norm": 0.330078125, "learning_rate": 1.4789126595455348e-05, "loss": 1.9849, "step": 16156 }, { "epoch": 0.5212870857875708, "grad_norm": 0.34375, "learning_rate": 1.4787553018174436e-05, "loss": 1.9581, "step": 16157 }, { "epoch": 0.5213193496413671, "grad_norm": 0.357421875, "learning_rate": 1.4785979443231985e-05, "loss": 1.962, "step": 16158 }, { "epoch": 0.5213516134951635, "grad_norm": 0.349609375, "learning_rate": 1.4784405870645313e-05, "loss": 1.969, "step": 16159 }, { "epoch": 0.5213838773489597, "grad_norm": 0.333984375, "learning_rate": 1.4782832300431757e-05, "loss": 1.9715, "step": 16160 }, { "epoch": 0.5214161412027561, "grad_norm": 0.3515625, "learning_rate": 1.4781258732608622e-05, "loss": 1.9596, "step": 16161 }, { "epoch": 0.5214484050565524, "grad_norm": 0.345703125, "learning_rate": 1.4779685167193234e-05, "loss": 1.9916, "step": 16162 }, { "epoch": 0.5214806689103488, "grad_norm": 0.341796875, "learning_rate": 1.4778111604202915e-05, "loss": 1.9969, "step": 16163 }, { "epoch": 0.5215129327641452, "grad_norm": 0.353515625, "learning_rate": 1.4776538043654981e-05, "loss": 1.9688, "step": 16164 }, { "epoch": 0.5215451966179415, "grad_norm": 0.337890625, "learning_rate": 1.4774964485566758e-05, "loss": 1.9549, "step": 16165 }, { "epoch": 0.5215774604717379, "grad_norm": 0.34765625, "learning_rate": 1.4773390929955566e-05, "loss": 1.9666, "step": 16166 }, { "epoch": 0.5216097243255342, "grad_norm": 0.33203125, "learning_rate": 1.477181737683872e-05, "loss": 1.994, "step": 16167 }, { "epoch": 0.5216419881793306, "grad_norm": 0.328125, "learning_rate": 1.4770243826233547e-05, "loss": 1.9349, "step": 16168 }, { "epoch": 0.5216742520331269, "grad_norm": 0.33203125, "learning_rate": 1.4768670278157369e-05, "loss": 1.971, "step": 16169 }, { "epoch": 0.5217065158869233, "grad_norm": 0.345703125, "learning_rate": 1.4767096732627496e-05, "loss": 1.9869, "step": 16170 }, { "epoch": 0.5217387797407196, "grad_norm": 0.333984375, "learning_rate": 1.4765523189661257e-05, "loss": 1.9735, "step": 16171 }, { "epoch": 0.521771043594516, "grad_norm": 0.33984375, "learning_rate": 1.4763949649275972e-05, "loss": 1.9868, "step": 16172 }, { "epoch": 0.5218033074483123, "grad_norm": 0.330078125, "learning_rate": 1.476237611148896e-05, "loss": 1.9519, "step": 16173 }, { "epoch": 0.5218355713021087, "grad_norm": 0.341796875, "learning_rate": 1.476080257631754e-05, "loss": 1.9717, "step": 16174 }, { "epoch": 0.521867835155905, "grad_norm": 0.333984375, "learning_rate": 1.4759229043779034e-05, "loss": 1.9787, "step": 16175 }, { "epoch": 0.5219000990097014, "grad_norm": 0.34765625, "learning_rate": 1.4757655513890764e-05, "loss": 2.0002, "step": 16176 }, { "epoch": 0.5219323628634976, "grad_norm": 0.333984375, "learning_rate": 1.4756081986670046e-05, "loss": 1.9725, "step": 16177 }, { "epoch": 0.521964626717294, "grad_norm": 0.341796875, "learning_rate": 1.4754508462134201e-05, "loss": 1.976, "step": 16178 }, { "epoch": 0.5219968905710903, "grad_norm": 0.34375, "learning_rate": 1.4752934940300555e-05, "loss": 1.9972, "step": 16179 }, { "epoch": 0.5220291544248867, "grad_norm": 0.34375, "learning_rate": 1.475136142118642e-05, "loss": 1.9589, "step": 16180 }, { "epoch": 0.522061418278683, "grad_norm": 0.34375, "learning_rate": 1.474978790480912e-05, "loss": 1.9843, "step": 16181 }, { "epoch": 0.5220936821324794, "grad_norm": 0.373046875, "learning_rate": 1.4748214391185984e-05, "loss": 1.9741, "step": 16182 }, { "epoch": 0.5221259459862757, "grad_norm": 0.33984375, "learning_rate": 1.4746640880334312e-05, "loss": 1.9787, "step": 16183 }, { "epoch": 0.5221582098400721, "grad_norm": 0.341796875, "learning_rate": 1.4745067372271438e-05, "loss": 1.9683, "step": 16184 }, { "epoch": 0.5221904736938685, "grad_norm": 0.349609375, "learning_rate": 1.4743493867014688e-05, "loss": 1.9986, "step": 16185 }, { "epoch": 0.5222227375476648, "grad_norm": 0.345703125, "learning_rate": 1.4741920364581361e-05, "loss": 1.9982, "step": 16186 }, { "epoch": 0.5222550014014612, "grad_norm": 0.330078125, "learning_rate": 1.4740346864988795e-05, "loss": 1.9644, "step": 16187 }, { "epoch": 0.5222872652552575, "grad_norm": 0.3515625, "learning_rate": 1.4738773368254309e-05, "loss": 1.9762, "step": 16188 }, { "epoch": 0.5223195291090539, "grad_norm": 0.345703125, "learning_rate": 1.4737199874395214e-05, "loss": 1.9716, "step": 16189 }, { "epoch": 0.5223517929628502, "grad_norm": 0.353515625, "learning_rate": 1.473562638342883e-05, "loss": 1.9699, "step": 16190 }, { "epoch": 0.5223840568166466, "grad_norm": 0.337890625, "learning_rate": 1.473405289537249e-05, "loss": 1.9813, "step": 16191 }, { "epoch": 0.5224163206704429, "grad_norm": 0.337890625, "learning_rate": 1.47324794102435e-05, "loss": 1.9979, "step": 16192 }, { "epoch": 0.5224485845242393, "grad_norm": 0.3359375, "learning_rate": 1.4730905928059182e-05, "loss": 2.0088, "step": 16193 }, { "epoch": 0.5224808483780355, "grad_norm": 0.322265625, "learning_rate": 1.4729332448836864e-05, "loss": 1.9947, "step": 16194 }, { "epoch": 0.5225131122318319, "grad_norm": 0.337890625, "learning_rate": 1.4727758972593859e-05, "loss": 2.0011, "step": 16195 }, { "epoch": 0.5225453760856282, "grad_norm": 0.341796875, "learning_rate": 1.4726185499347485e-05, "loss": 1.9545, "step": 16196 }, { "epoch": 0.5225776399394246, "grad_norm": 0.341796875, "learning_rate": 1.472461202911507e-05, "loss": 1.9803, "step": 16197 }, { "epoch": 0.5226099037932209, "grad_norm": 0.349609375, "learning_rate": 1.472303856191392e-05, "loss": 1.9487, "step": 16198 }, { "epoch": 0.5226421676470173, "grad_norm": 0.341796875, "learning_rate": 1.4721465097761369e-05, "loss": 1.9497, "step": 16199 }, { "epoch": 0.5226744315008136, "grad_norm": 0.337890625, "learning_rate": 1.471989163667473e-05, "loss": 2.006, "step": 16200 }, { "epoch": 0.52270669535461, "grad_norm": 0.341796875, "learning_rate": 1.471831817867132e-05, "loss": 1.9686, "step": 16201 }, { "epoch": 0.5227389592084063, "grad_norm": 0.349609375, "learning_rate": 1.4716744723768462e-05, "loss": 1.9774, "step": 16202 }, { "epoch": 0.5227712230622027, "grad_norm": 0.353515625, "learning_rate": 1.4715171271983479e-05, "loss": 1.9931, "step": 16203 }, { "epoch": 0.5228034869159991, "grad_norm": 0.333984375, "learning_rate": 1.4713597823333681e-05, "loss": 1.9237, "step": 16204 }, { "epoch": 0.5228357507697954, "grad_norm": 0.35546875, "learning_rate": 1.4712024377836395e-05, "loss": 1.9823, "step": 16205 }, { "epoch": 0.5228680146235918, "grad_norm": 0.33203125, "learning_rate": 1.4710450935508938e-05, "loss": 1.9587, "step": 16206 }, { "epoch": 0.5229002784773881, "grad_norm": 0.345703125, "learning_rate": 1.470887749636863e-05, "loss": 1.9735, "step": 16207 }, { "epoch": 0.5229325423311845, "grad_norm": 0.337890625, "learning_rate": 1.4707304060432789e-05, "loss": 1.963, "step": 16208 }, { "epoch": 0.5229648061849808, "grad_norm": 0.333984375, "learning_rate": 1.4705730627718734e-05, "loss": 1.9464, "step": 16209 }, { "epoch": 0.5229970700387772, "grad_norm": 0.341796875, "learning_rate": 1.4704157198243787e-05, "loss": 2.0111, "step": 16210 }, { "epoch": 0.5230293338925734, "grad_norm": 0.333984375, "learning_rate": 1.4702583772025263e-05, "loss": 1.9466, "step": 16211 }, { "epoch": 0.5230615977463698, "grad_norm": 0.333984375, "learning_rate": 1.4701010349080485e-05, "loss": 1.981, "step": 16212 }, { "epoch": 0.5230938616001661, "grad_norm": 0.35546875, "learning_rate": 1.4699436929426777e-05, "loss": 1.9851, "step": 16213 }, { "epoch": 0.5231261254539625, "grad_norm": 0.3359375, "learning_rate": 1.469786351308144e-05, "loss": 1.9918, "step": 16214 }, { "epoch": 0.5231583893077588, "grad_norm": 0.345703125, "learning_rate": 1.4696290100061807e-05, "loss": 1.9775, "step": 16215 }, { "epoch": 0.5231906531615552, "grad_norm": 0.337890625, "learning_rate": 1.4694716690385204e-05, "loss": 1.9785, "step": 16216 }, { "epoch": 0.5232229170153515, "grad_norm": 0.34765625, "learning_rate": 1.469314328406893e-05, "loss": 1.981, "step": 16217 }, { "epoch": 0.5232551808691479, "grad_norm": 0.359375, "learning_rate": 1.4691569881130316e-05, "loss": 1.9771, "step": 16218 }, { "epoch": 0.5232874447229442, "grad_norm": 0.37890625, "learning_rate": 1.4689996481586688e-05, "loss": 1.9824, "step": 16219 }, { "epoch": 0.5233197085767406, "grad_norm": 0.34375, "learning_rate": 1.4688423085455351e-05, "loss": 1.962, "step": 16220 }, { "epoch": 0.5233519724305369, "grad_norm": 0.365234375, "learning_rate": 1.4686849692753625e-05, "loss": 1.9962, "step": 16221 }, { "epoch": 0.5233842362843333, "grad_norm": 0.34375, "learning_rate": 1.4685276303498841e-05, "loss": 1.9752, "step": 16222 }, { "epoch": 0.5234165001381296, "grad_norm": 0.353515625, "learning_rate": 1.4683702917708304e-05, "loss": 1.9634, "step": 16223 }, { "epoch": 0.523448763991926, "grad_norm": 0.341796875, "learning_rate": 1.4682129535399338e-05, "loss": 1.9715, "step": 16224 }, { "epoch": 0.5234810278457224, "grad_norm": 0.337890625, "learning_rate": 1.4680556156589268e-05, "loss": 1.9711, "step": 16225 }, { "epoch": 0.5235132916995187, "grad_norm": 0.345703125, "learning_rate": 1.46789827812954e-05, "loss": 1.9619, "step": 16226 }, { "epoch": 0.5235455555533151, "grad_norm": 0.341796875, "learning_rate": 1.467740940953506e-05, "loss": 1.9712, "step": 16227 }, { "epoch": 0.5235778194071113, "grad_norm": 0.35546875, "learning_rate": 1.467583604132557e-05, "loss": 1.9745, "step": 16228 }, { "epoch": 0.5236100832609077, "grad_norm": 0.337890625, "learning_rate": 1.4674262676684243e-05, "loss": 1.9771, "step": 16229 }, { "epoch": 0.523642347114704, "grad_norm": 0.34765625, "learning_rate": 1.4672689315628396e-05, "loss": 1.9497, "step": 16230 }, { "epoch": 0.5236746109685004, "grad_norm": 0.361328125, "learning_rate": 1.4671115958175354e-05, "loss": 1.9895, "step": 16231 }, { "epoch": 0.5237068748222967, "grad_norm": 0.337890625, "learning_rate": 1.4669542604342428e-05, "loss": 1.9901, "step": 16232 }, { "epoch": 0.5237391386760931, "grad_norm": 0.36328125, "learning_rate": 1.466796925414694e-05, "loss": 1.9921, "step": 16233 }, { "epoch": 0.5237714025298894, "grad_norm": 0.34375, "learning_rate": 1.4666395907606212e-05, "loss": 1.9822, "step": 16234 }, { "epoch": 0.5238036663836858, "grad_norm": 0.384765625, "learning_rate": 1.4664822564737555e-05, "loss": 1.9616, "step": 16235 }, { "epoch": 0.5238359302374821, "grad_norm": 0.365234375, "learning_rate": 1.466324922555829e-05, "loss": 1.9921, "step": 16236 }, { "epoch": 0.5238681940912785, "grad_norm": 0.328125, "learning_rate": 1.466167589008574e-05, "loss": 1.9804, "step": 16237 }, { "epoch": 0.5239004579450748, "grad_norm": 0.353515625, "learning_rate": 1.4660102558337216e-05, "loss": 1.9745, "step": 16238 }, { "epoch": 0.5239327217988712, "grad_norm": 0.365234375, "learning_rate": 1.4658529230330037e-05, "loss": 1.9736, "step": 16239 }, { "epoch": 0.5239649856526675, "grad_norm": 0.341796875, "learning_rate": 1.4656955906081527e-05, "loss": 1.9527, "step": 16240 }, { "epoch": 0.5239972495064639, "grad_norm": 0.34765625, "learning_rate": 1.4655382585609002e-05, "loss": 1.9592, "step": 16241 }, { "epoch": 0.5240295133602602, "grad_norm": 0.33203125, "learning_rate": 1.4653809268929774e-05, "loss": 1.9787, "step": 16242 }, { "epoch": 0.5240617772140566, "grad_norm": 0.35546875, "learning_rate": 1.4652235956061167e-05, "loss": 1.9618, "step": 16243 }, { "epoch": 0.524094041067853, "grad_norm": 0.341796875, "learning_rate": 1.4650662647020502e-05, "loss": 1.9712, "step": 16244 }, { "epoch": 0.5241263049216492, "grad_norm": 0.33984375, "learning_rate": 1.4649089341825082e-05, "loss": 1.9689, "step": 16245 }, { "epoch": 0.5241585687754456, "grad_norm": 0.333984375, "learning_rate": 1.464751604049224e-05, "loss": 1.9732, "step": 16246 }, { "epoch": 0.5241908326292419, "grad_norm": 0.330078125, "learning_rate": 1.4645942743039293e-05, "loss": 1.9569, "step": 16247 }, { "epoch": 0.5242230964830383, "grad_norm": 0.353515625, "learning_rate": 1.464436944948355e-05, "loss": 1.9588, "step": 16248 }, { "epoch": 0.5242553603368346, "grad_norm": 0.33984375, "learning_rate": 1.4642796159842328e-05, "loss": 1.966, "step": 16249 }, { "epoch": 0.524287624190631, "grad_norm": 0.333984375, "learning_rate": 1.4641222874132958e-05, "loss": 1.9976, "step": 16250 }, { "epoch": 0.5243198880444273, "grad_norm": 0.34375, "learning_rate": 1.4639649592372746e-05, "loss": 1.9485, "step": 16251 }, { "epoch": 0.5243521518982237, "grad_norm": 0.341796875, "learning_rate": 1.4638076314579008e-05, "loss": 1.9638, "step": 16252 }, { "epoch": 0.52438441575202, "grad_norm": 0.34375, "learning_rate": 1.4636503040769076e-05, "loss": 1.9639, "step": 16253 }, { "epoch": 0.5244166796058164, "grad_norm": 0.34375, "learning_rate": 1.4634929770960251e-05, "loss": 1.9804, "step": 16254 }, { "epoch": 0.5244489434596127, "grad_norm": 0.333984375, "learning_rate": 1.4633356505169858e-05, "loss": 1.9762, "step": 16255 }, { "epoch": 0.5244812073134091, "grad_norm": 0.330078125, "learning_rate": 1.4631783243415217e-05, "loss": 1.9682, "step": 16256 }, { "epoch": 0.5245134711672054, "grad_norm": 0.330078125, "learning_rate": 1.4630209985713637e-05, "loss": 1.9747, "step": 16257 }, { "epoch": 0.5245457350210018, "grad_norm": 0.337890625, "learning_rate": 1.462863673208244e-05, "loss": 1.9492, "step": 16258 }, { "epoch": 0.5245779988747981, "grad_norm": 0.357421875, "learning_rate": 1.4627063482538949e-05, "loss": 1.9721, "step": 16259 }, { "epoch": 0.5246102627285945, "grad_norm": 0.34375, "learning_rate": 1.4625490237100472e-05, "loss": 1.9902, "step": 16260 }, { "epoch": 0.5246425265823907, "grad_norm": 0.3515625, "learning_rate": 1.4623916995784328e-05, "loss": 1.9845, "step": 16261 }, { "epoch": 0.5246747904361871, "grad_norm": 0.33203125, "learning_rate": 1.462234375860784e-05, "loss": 1.9753, "step": 16262 }, { "epoch": 0.5247070542899834, "grad_norm": 0.380859375, "learning_rate": 1.4620770525588318e-05, "loss": 1.9915, "step": 16263 }, { "epoch": 0.5247393181437798, "grad_norm": 0.357421875, "learning_rate": 1.4619197296743083e-05, "loss": 1.9639, "step": 16264 }, { "epoch": 0.5247715819975762, "grad_norm": 0.35546875, "learning_rate": 1.4617624072089452e-05, "loss": 1.9922, "step": 16265 }, { "epoch": 0.5248038458513725, "grad_norm": 0.37109375, "learning_rate": 1.4616050851644739e-05, "loss": 1.9955, "step": 16266 }, { "epoch": 0.5248361097051689, "grad_norm": 0.361328125, "learning_rate": 1.4614477635426262e-05, "loss": 1.9952, "step": 16267 }, { "epoch": 0.5248683735589652, "grad_norm": 0.365234375, "learning_rate": 1.4612904423451343e-05, "loss": 2.0005, "step": 16268 }, { "epoch": 0.5249006374127616, "grad_norm": 0.388671875, "learning_rate": 1.4611331215737291e-05, "loss": 1.9981, "step": 16269 }, { "epoch": 0.5249329012665579, "grad_norm": 0.357421875, "learning_rate": 1.4609758012301426e-05, "loss": 1.9849, "step": 16270 }, { "epoch": 0.5249651651203543, "grad_norm": 0.353515625, "learning_rate": 1.4608184813161063e-05, "loss": 1.965, "step": 16271 }, { "epoch": 0.5249974289741506, "grad_norm": 0.34375, "learning_rate": 1.4606611618333526e-05, "loss": 1.9616, "step": 16272 }, { "epoch": 0.525029692827947, "grad_norm": 0.353515625, "learning_rate": 1.4605038427836122e-05, "loss": 1.9654, "step": 16273 }, { "epoch": 0.5250619566817433, "grad_norm": 0.34765625, "learning_rate": 1.4603465241686172e-05, "loss": 1.9663, "step": 16274 }, { "epoch": 0.5250942205355397, "grad_norm": 0.345703125, "learning_rate": 1.4601892059900999e-05, "loss": 2.0105, "step": 16275 }, { "epoch": 0.525126484389336, "grad_norm": 0.333984375, "learning_rate": 1.4600318882497902e-05, "loss": 2.0026, "step": 16276 }, { "epoch": 0.5251587482431324, "grad_norm": 0.33984375, "learning_rate": 1.459874570949421e-05, "loss": 1.975, "step": 16277 }, { "epoch": 0.5251910120969286, "grad_norm": 0.3359375, "learning_rate": 1.4597172540907247e-05, "loss": 1.979, "step": 16278 }, { "epoch": 0.525223275950725, "grad_norm": 0.341796875, "learning_rate": 1.4595599376754312e-05, "loss": 1.9241, "step": 16279 }, { "epoch": 0.5252555398045213, "grad_norm": 0.341796875, "learning_rate": 1.4594026217052724e-05, "loss": 1.9792, "step": 16280 }, { "epoch": 0.5252878036583177, "grad_norm": 0.341796875, "learning_rate": 1.4592453061819816e-05, "loss": 1.944, "step": 16281 }, { "epoch": 0.525320067512114, "grad_norm": 0.349609375, "learning_rate": 1.4590879911072885e-05, "loss": 1.9465, "step": 16282 }, { "epoch": 0.5253523313659104, "grad_norm": 0.361328125, "learning_rate": 1.4589306764829254e-05, "loss": 1.982, "step": 16283 }, { "epoch": 0.5253845952197067, "grad_norm": 0.3671875, "learning_rate": 1.4587733623106243e-05, "loss": 1.9754, "step": 16284 }, { "epoch": 0.5254168590735031, "grad_norm": 0.36328125, "learning_rate": 1.4586160485921162e-05, "loss": 1.9849, "step": 16285 }, { "epoch": 0.5254491229272995, "grad_norm": 0.34375, "learning_rate": 1.458458735329133e-05, "loss": 1.9954, "step": 16286 }, { "epoch": 0.5254813867810958, "grad_norm": 0.33984375, "learning_rate": 1.4583014225234064e-05, "loss": 1.9606, "step": 16287 }, { "epoch": 0.5255136506348922, "grad_norm": 0.34375, "learning_rate": 1.4581441101766676e-05, "loss": 1.9999, "step": 16288 }, { "epoch": 0.5255459144886885, "grad_norm": 0.333984375, "learning_rate": 1.4579867982906484e-05, "loss": 1.9656, "step": 16289 }, { "epoch": 0.5255781783424849, "grad_norm": 0.333984375, "learning_rate": 1.4578294868670806e-05, "loss": 1.9717, "step": 16290 }, { "epoch": 0.5256104421962812, "grad_norm": 0.34765625, "learning_rate": 1.4576721759076954e-05, "loss": 1.9749, "step": 16291 }, { "epoch": 0.5256427060500776, "grad_norm": 0.345703125, "learning_rate": 1.4575148654142242e-05, "loss": 2.0073, "step": 16292 }, { "epoch": 0.5256749699038739, "grad_norm": 0.328125, "learning_rate": 1.4573575553883996e-05, "loss": 1.9305, "step": 16293 }, { "epoch": 0.5257072337576703, "grad_norm": 0.34765625, "learning_rate": 1.4572002458319518e-05, "loss": 1.9977, "step": 16294 }, { "epoch": 0.5257394976114665, "grad_norm": 0.37109375, "learning_rate": 1.4570429367466133e-05, "loss": 2.0056, "step": 16295 }, { "epoch": 0.5257717614652629, "grad_norm": 0.33984375, "learning_rate": 1.4568856281341155e-05, "loss": 1.9807, "step": 16296 }, { "epoch": 0.5258040253190592, "grad_norm": 0.365234375, "learning_rate": 1.4567283199961894e-05, "loss": 1.9831, "step": 16297 }, { "epoch": 0.5258362891728556, "grad_norm": 0.3671875, "learning_rate": 1.456571012334567e-05, "loss": 1.9802, "step": 16298 }, { "epoch": 0.5258685530266519, "grad_norm": 0.337890625, "learning_rate": 1.4564137051509804e-05, "loss": 1.984, "step": 16299 }, { "epoch": 0.5259008168804483, "grad_norm": 0.34765625, "learning_rate": 1.4562563984471594e-05, "loss": 1.9695, "step": 16300 }, { "epoch": 0.5259330807342446, "grad_norm": 0.3828125, "learning_rate": 1.456099092224837e-05, "loss": 1.9825, "step": 16301 }, { "epoch": 0.525965344588041, "grad_norm": 0.333984375, "learning_rate": 1.4559417864857448e-05, "loss": 1.9569, "step": 16302 }, { "epoch": 0.5259976084418373, "grad_norm": 0.37109375, "learning_rate": 1.4557844812316131e-05, "loss": 1.985, "step": 16303 }, { "epoch": 0.5260298722956337, "grad_norm": 0.369140625, "learning_rate": 1.4556271764641741e-05, "loss": 1.9766, "step": 16304 }, { "epoch": 0.5260621361494301, "grad_norm": 0.33984375, "learning_rate": 1.4554698721851596e-05, "loss": 1.9787, "step": 16305 }, { "epoch": 0.5260944000032264, "grad_norm": 0.380859375, "learning_rate": 1.4553125683963013e-05, "loss": 1.9859, "step": 16306 }, { "epoch": 0.5261266638570228, "grad_norm": 0.345703125, "learning_rate": 1.4551552650993292e-05, "loss": 1.9759, "step": 16307 }, { "epoch": 0.5261589277108191, "grad_norm": 0.34765625, "learning_rate": 1.4549979622959762e-05, "loss": 1.9514, "step": 16308 }, { "epoch": 0.5261911915646155, "grad_norm": 0.34765625, "learning_rate": 1.4548406599879739e-05, "loss": 1.9567, "step": 16309 }, { "epoch": 0.5262234554184118, "grad_norm": 0.330078125, "learning_rate": 1.4546833581770527e-05, "loss": 1.997, "step": 16310 }, { "epoch": 0.5262557192722082, "grad_norm": 0.345703125, "learning_rate": 1.4545260568649442e-05, "loss": 1.9699, "step": 16311 }, { "epoch": 0.5262879831260044, "grad_norm": 0.330078125, "learning_rate": 1.454368756053381e-05, "loss": 1.9899, "step": 16312 }, { "epoch": 0.5263202469798008, "grad_norm": 0.3359375, "learning_rate": 1.4542114557440933e-05, "loss": 1.99, "step": 16313 }, { "epoch": 0.5263525108335971, "grad_norm": 0.3359375, "learning_rate": 1.4540541559388132e-05, "loss": 1.9806, "step": 16314 }, { "epoch": 0.5263847746873935, "grad_norm": 0.345703125, "learning_rate": 1.453896856639272e-05, "loss": 1.9686, "step": 16315 }, { "epoch": 0.5264170385411898, "grad_norm": 0.33984375, "learning_rate": 1.4537395578472012e-05, "loss": 1.9805, "step": 16316 }, { "epoch": 0.5264493023949862, "grad_norm": 0.337890625, "learning_rate": 1.4535822595643319e-05, "loss": 1.9816, "step": 16317 }, { "epoch": 0.5264815662487825, "grad_norm": 0.341796875, "learning_rate": 1.453424961792396e-05, "loss": 1.9864, "step": 16318 }, { "epoch": 0.5265138301025789, "grad_norm": 0.333984375, "learning_rate": 1.4532676645331248e-05, "loss": 1.9368, "step": 16319 }, { "epoch": 0.5265460939563752, "grad_norm": 0.341796875, "learning_rate": 1.4531103677882492e-05, "loss": 1.9286, "step": 16320 }, { "epoch": 0.5265783578101716, "grad_norm": 0.333984375, "learning_rate": 1.4529530715595017e-05, "loss": 1.9484, "step": 16321 }, { "epoch": 0.5266106216639679, "grad_norm": 0.330078125, "learning_rate": 1.4527957758486123e-05, "loss": 1.9811, "step": 16322 }, { "epoch": 0.5266428855177643, "grad_norm": 0.33203125, "learning_rate": 1.4526384806573136e-05, "loss": 1.9809, "step": 16323 }, { "epoch": 0.5266751493715606, "grad_norm": 0.341796875, "learning_rate": 1.4524811859873364e-05, "loss": 1.9822, "step": 16324 }, { "epoch": 0.526707413225357, "grad_norm": 0.66015625, "learning_rate": 1.4523238918404122e-05, "loss": 1.9577, "step": 16325 }, { "epoch": 0.5267396770791534, "grad_norm": 0.337890625, "learning_rate": 1.4521665982182724e-05, "loss": 1.944, "step": 16326 }, { "epoch": 0.5267719409329497, "grad_norm": 0.337890625, "learning_rate": 1.4520093051226485e-05, "loss": 1.9709, "step": 16327 }, { "epoch": 0.526804204786746, "grad_norm": 0.341796875, "learning_rate": 1.4518520125552715e-05, "loss": 1.9961, "step": 16328 }, { "epoch": 0.5268364686405423, "grad_norm": 0.345703125, "learning_rate": 1.451694720517873e-05, "loss": 1.998, "step": 16329 }, { "epoch": 0.5268687324943387, "grad_norm": 0.32421875, "learning_rate": 1.4515374290121851e-05, "loss": 1.9592, "step": 16330 }, { "epoch": 0.526900996348135, "grad_norm": 0.333984375, "learning_rate": 1.4513801380399374e-05, "loss": 1.9903, "step": 16331 }, { "epoch": 0.5269332602019314, "grad_norm": 0.3359375, "learning_rate": 1.4512228476028625e-05, "loss": 1.9788, "step": 16332 }, { "epoch": 0.5269655240557277, "grad_norm": 0.32421875, "learning_rate": 1.4510655577026925e-05, "loss": 1.944, "step": 16333 }, { "epoch": 0.5269977879095241, "grad_norm": 0.345703125, "learning_rate": 1.4509082683411563e-05, "loss": 1.9685, "step": 16334 }, { "epoch": 0.5270300517633204, "grad_norm": 0.337890625, "learning_rate": 1.4507509795199873e-05, "loss": 1.9411, "step": 16335 }, { "epoch": 0.5270623156171168, "grad_norm": 0.34375, "learning_rate": 1.4505936912409167e-05, "loss": 1.9663, "step": 16336 }, { "epoch": 0.5270945794709131, "grad_norm": 0.33984375, "learning_rate": 1.4504364035056747e-05, "loss": 1.9441, "step": 16337 }, { "epoch": 0.5271268433247095, "grad_norm": 0.373046875, "learning_rate": 1.4502791163159928e-05, "loss": 1.9599, "step": 16338 }, { "epoch": 0.5271591071785058, "grad_norm": 0.333984375, "learning_rate": 1.450121829673603e-05, "loss": 1.9816, "step": 16339 }, { "epoch": 0.5271913710323022, "grad_norm": 0.34375, "learning_rate": 1.449964543580237e-05, "loss": 1.9887, "step": 16340 }, { "epoch": 0.5272236348860985, "grad_norm": 0.337890625, "learning_rate": 1.4498072580376248e-05, "loss": 1.9955, "step": 16341 }, { "epoch": 0.5272558987398949, "grad_norm": 0.333984375, "learning_rate": 1.4496499730474981e-05, "loss": 1.9474, "step": 16342 }, { "epoch": 0.5272881625936912, "grad_norm": 0.3359375, "learning_rate": 1.4494926886115893e-05, "loss": 2.004, "step": 16343 }, { "epoch": 0.5273204264474876, "grad_norm": 0.337890625, "learning_rate": 1.4493354047316279e-05, "loss": 1.9866, "step": 16344 }, { "epoch": 0.527352690301284, "grad_norm": 0.33203125, "learning_rate": 1.4491781214093461e-05, "loss": 1.9751, "step": 16345 }, { "epoch": 0.5273849541550802, "grad_norm": 0.349609375, "learning_rate": 1.4490208386464756e-05, "loss": 1.9806, "step": 16346 }, { "epoch": 0.5274172180088766, "grad_norm": 0.35546875, "learning_rate": 1.4488635564447467e-05, "loss": 1.9977, "step": 16347 }, { "epoch": 0.5274494818626729, "grad_norm": 0.33984375, "learning_rate": 1.448706274805891e-05, "loss": 1.9523, "step": 16348 }, { "epoch": 0.5274817457164693, "grad_norm": 0.35546875, "learning_rate": 1.4485489937316402e-05, "loss": 2.0111, "step": 16349 }, { "epoch": 0.5275140095702656, "grad_norm": 0.330078125, "learning_rate": 1.4483917132237249e-05, "loss": 2.0105, "step": 16350 }, { "epoch": 0.527546273424062, "grad_norm": 0.333984375, "learning_rate": 1.4482344332838765e-05, "loss": 1.957, "step": 16351 }, { "epoch": 0.5275785372778583, "grad_norm": 0.345703125, "learning_rate": 1.4480771539138268e-05, "loss": 1.9839, "step": 16352 }, { "epoch": 0.5276108011316547, "grad_norm": 0.3515625, "learning_rate": 1.447919875115306e-05, "loss": 1.9705, "step": 16353 }, { "epoch": 0.527643064985451, "grad_norm": 0.359375, "learning_rate": 1.4477625968900458e-05, "loss": 1.9741, "step": 16354 }, { "epoch": 0.5276753288392474, "grad_norm": 0.337890625, "learning_rate": 1.4476053192397781e-05, "loss": 1.9547, "step": 16355 }, { "epoch": 0.5277075926930437, "grad_norm": 0.357421875, "learning_rate": 1.4474480421662329e-05, "loss": 1.9453, "step": 16356 }, { "epoch": 0.5277398565468401, "grad_norm": 0.333984375, "learning_rate": 1.4472907656711421e-05, "loss": 1.9497, "step": 16357 }, { "epoch": 0.5277721204006364, "grad_norm": 0.365234375, "learning_rate": 1.447133489756237e-05, "loss": 1.9948, "step": 16358 }, { "epoch": 0.5278043842544328, "grad_norm": 0.353515625, "learning_rate": 1.4469762144232482e-05, "loss": 2.0118, "step": 16359 }, { "epoch": 0.5278366481082291, "grad_norm": 0.345703125, "learning_rate": 1.4468189396739072e-05, "loss": 1.9971, "step": 16360 }, { "epoch": 0.5278689119620255, "grad_norm": 0.34765625, "learning_rate": 1.4466616655099457e-05, "loss": 1.9791, "step": 16361 }, { "epoch": 0.5279011758158217, "grad_norm": 0.3359375, "learning_rate": 1.4465043919330935e-05, "loss": 1.9764, "step": 16362 }, { "epoch": 0.5279334396696181, "grad_norm": 0.341796875, "learning_rate": 1.4463471189450828e-05, "loss": 1.973, "step": 16363 }, { "epoch": 0.5279657035234144, "grad_norm": 0.337890625, "learning_rate": 1.4461898465476452e-05, "loss": 1.9699, "step": 16364 }, { "epoch": 0.5279979673772108, "grad_norm": 0.3359375, "learning_rate": 1.4460325747425106e-05, "loss": 1.9616, "step": 16365 }, { "epoch": 0.5280302312310072, "grad_norm": 0.341796875, "learning_rate": 1.4458753035314102e-05, "loss": 1.9555, "step": 16366 }, { "epoch": 0.5280624950848035, "grad_norm": 0.341796875, "learning_rate": 1.4457180329160767e-05, "loss": 1.9985, "step": 16367 }, { "epoch": 0.5280947589385999, "grad_norm": 0.3359375, "learning_rate": 1.4455607628982396e-05, "loss": 1.9326, "step": 16368 }, { "epoch": 0.5281270227923962, "grad_norm": 0.337890625, "learning_rate": 1.4454034934796301e-05, "loss": 1.9719, "step": 16369 }, { "epoch": 0.5281592866461926, "grad_norm": 0.32421875, "learning_rate": 1.4452462246619803e-05, "loss": 1.9474, "step": 16370 }, { "epoch": 0.5281915504999889, "grad_norm": 0.33984375, "learning_rate": 1.4450889564470212e-05, "loss": 1.9942, "step": 16371 }, { "epoch": 0.5282238143537853, "grad_norm": 0.34765625, "learning_rate": 1.4449316888364827e-05, "loss": 1.9872, "step": 16372 }, { "epoch": 0.5282560782075816, "grad_norm": 0.33984375, "learning_rate": 1.4447744218320968e-05, "loss": 1.9772, "step": 16373 }, { "epoch": 0.528288342061378, "grad_norm": 0.33984375, "learning_rate": 1.444617155435595e-05, "loss": 1.9823, "step": 16374 }, { "epoch": 0.5283206059151743, "grad_norm": 0.341796875, "learning_rate": 1.4444598896487074e-05, "loss": 1.972, "step": 16375 }, { "epoch": 0.5283528697689707, "grad_norm": 0.34375, "learning_rate": 1.4443026244731654e-05, "loss": 1.9722, "step": 16376 }, { "epoch": 0.528385133622767, "grad_norm": 0.33984375, "learning_rate": 1.4441453599107005e-05, "loss": 1.9787, "step": 16377 }, { "epoch": 0.5284173974765634, "grad_norm": 0.333984375, "learning_rate": 1.443988095963043e-05, "loss": 1.9941, "step": 16378 }, { "epoch": 0.5284496613303596, "grad_norm": 0.34375, "learning_rate": 1.4438308326319245e-05, "loss": 1.9811, "step": 16379 }, { "epoch": 0.528481925184156, "grad_norm": 0.333984375, "learning_rate": 1.4436735699190762e-05, "loss": 2.0047, "step": 16380 }, { "epoch": 0.5285141890379523, "grad_norm": 0.3359375, "learning_rate": 1.4435163078262286e-05, "loss": 1.9555, "step": 16381 }, { "epoch": 0.5285464528917487, "grad_norm": 0.349609375, "learning_rate": 1.4433590463551128e-05, "loss": 1.9703, "step": 16382 }, { "epoch": 0.528578716745545, "grad_norm": 0.337890625, "learning_rate": 1.4432017855074603e-05, "loss": 1.9762, "step": 16383 }, { "epoch": 0.5286109805993414, "grad_norm": 0.3359375, "learning_rate": 1.4430445252850016e-05, "loss": 1.971, "step": 16384 }, { "epoch": 0.5286432444531377, "grad_norm": 0.3359375, "learning_rate": 1.4428872656894678e-05, "loss": 1.9753, "step": 16385 }, { "epoch": 0.5286755083069341, "grad_norm": 0.33203125, "learning_rate": 1.4427300067225903e-05, "loss": 1.956, "step": 16386 }, { "epoch": 0.5287077721607305, "grad_norm": 0.34375, "learning_rate": 1.4425727483860995e-05, "loss": 1.9961, "step": 16387 }, { "epoch": 0.5287400360145268, "grad_norm": 0.3359375, "learning_rate": 1.4424154906817267e-05, "loss": 1.9786, "step": 16388 }, { "epoch": 0.5287722998683232, "grad_norm": 0.33203125, "learning_rate": 1.4422582336112032e-05, "loss": 1.9526, "step": 16389 }, { "epoch": 0.5288045637221195, "grad_norm": 0.341796875, "learning_rate": 1.4421009771762595e-05, "loss": 1.9568, "step": 16390 }, { "epoch": 0.5288368275759159, "grad_norm": 0.349609375, "learning_rate": 1.4419437213786265e-05, "loss": 1.9823, "step": 16391 }, { "epoch": 0.5288690914297122, "grad_norm": 0.3359375, "learning_rate": 1.4417864662200361e-05, "loss": 1.9745, "step": 16392 }, { "epoch": 0.5289013552835086, "grad_norm": 0.34765625, "learning_rate": 1.4416292117022175e-05, "loss": 1.989, "step": 16393 }, { "epoch": 0.5289336191373049, "grad_norm": 0.337890625, "learning_rate": 1.4414719578269029e-05, "loss": 1.9838, "step": 16394 }, { "epoch": 0.5289658829911013, "grad_norm": 0.34375, "learning_rate": 1.4413147045958239e-05, "loss": 1.9906, "step": 16395 }, { "epoch": 0.5289981468448975, "grad_norm": 0.333984375, "learning_rate": 1.4411574520107095e-05, "loss": 1.9734, "step": 16396 }, { "epoch": 0.5290304106986939, "grad_norm": 0.33203125, "learning_rate": 1.4410002000732917e-05, "loss": 2.0, "step": 16397 }, { "epoch": 0.5290626745524902, "grad_norm": 0.353515625, "learning_rate": 1.440842948785302e-05, "loss": 1.9599, "step": 16398 }, { "epoch": 0.5290949384062866, "grad_norm": 0.34375, "learning_rate": 1.4406856981484703e-05, "loss": 1.9768, "step": 16399 }, { "epoch": 0.5291272022600829, "grad_norm": 0.3359375, "learning_rate": 1.4405284481645279e-05, "loss": 1.9661, "step": 16400 }, { "epoch": 0.5291594661138793, "grad_norm": 0.3359375, "learning_rate": 1.4403711988352059e-05, "loss": 1.9819, "step": 16401 }, { "epoch": 0.5291917299676756, "grad_norm": 0.34375, "learning_rate": 1.4402139501622346e-05, "loss": 2.0002, "step": 16402 }, { "epoch": 0.529223993821472, "grad_norm": 0.341796875, "learning_rate": 1.4400567021473453e-05, "loss": 1.9729, "step": 16403 }, { "epoch": 0.5292562576752683, "grad_norm": 0.353515625, "learning_rate": 1.4398994547922685e-05, "loss": 1.9957, "step": 16404 }, { "epoch": 0.5292885215290647, "grad_norm": 0.34765625, "learning_rate": 1.4397422080987363e-05, "loss": 1.966, "step": 16405 }, { "epoch": 0.5293207853828611, "grad_norm": 0.341796875, "learning_rate": 1.439584962068478e-05, "loss": 1.9657, "step": 16406 }, { "epoch": 0.5293530492366574, "grad_norm": 0.349609375, "learning_rate": 1.4394277167032252e-05, "loss": 1.9967, "step": 16407 }, { "epoch": 0.5293853130904538, "grad_norm": 0.34375, "learning_rate": 1.4392704720047089e-05, "loss": 1.9641, "step": 16408 }, { "epoch": 0.5294175769442501, "grad_norm": 0.341796875, "learning_rate": 1.4391132279746594e-05, "loss": 1.9813, "step": 16409 }, { "epoch": 0.5294498407980465, "grad_norm": 0.345703125, "learning_rate": 1.4389559846148079e-05, "loss": 1.9852, "step": 16410 }, { "epoch": 0.5294821046518428, "grad_norm": 0.341796875, "learning_rate": 1.438798741926885e-05, "loss": 1.9602, "step": 16411 }, { "epoch": 0.5295143685056392, "grad_norm": 0.357421875, "learning_rate": 1.4386414999126219e-05, "loss": 1.9556, "step": 16412 }, { "epoch": 0.5295466323594354, "grad_norm": 0.34375, "learning_rate": 1.4384842585737488e-05, "loss": 1.9645, "step": 16413 }, { "epoch": 0.5295788962132318, "grad_norm": 0.349609375, "learning_rate": 1.4383270179119972e-05, "loss": 1.9916, "step": 16414 }, { "epoch": 0.5296111600670281, "grad_norm": 0.34765625, "learning_rate": 1.4381697779290974e-05, "loss": 1.9659, "step": 16415 }, { "epoch": 0.5296434239208245, "grad_norm": 0.34765625, "learning_rate": 1.43801253862678e-05, "loss": 1.9878, "step": 16416 }, { "epoch": 0.5296756877746208, "grad_norm": 0.33984375, "learning_rate": 1.4378553000067769e-05, "loss": 1.9791, "step": 16417 }, { "epoch": 0.5297079516284172, "grad_norm": 0.341796875, "learning_rate": 1.4376980620708175e-05, "loss": 1.9915, "step": 16418 }, { "epoch": 0.5297402154822135, "grad_norm": 0.33984375, "learning_rate": 1.437540824820633e-05, "loss": 1.9657, "step": 16419 }, { "epoch": 0.5297724793360099, "grad_norm": 0.341796875, "learning_rate": 1.4373835882579547e-05, "loss": 1.9845, "step": 16420 }, { "epoch": 0.5298047431898062, "grad_norm": 0.328125, "learning_rate": 1.4372263523845127e-05, "loss": 1.9693, "step": 16421 }, { "epoch": 0.5298370070436026, "grad_norm": 0.34375, "learning_rate": 1.4370691172020379e-05, "loss": 2.0181, "step": 16422 }, { "epoch": 0.5298692708973989, "grad_norm": 0.34375, "learning_rate": 1.4369118827122618e-05, "loss": 1.9819, "step": 16423 }, { "epoch": 0.5299015347511953, "grad_norm": 0.353515625, "learning_rate": 1.4367546489169135e-05, "loss": 1.9737, "step": 16424 }, { "epoch": 0.5299337986049916, "grad_norm": 0.3359375, "learning_rate": 1.4365974158177248e-05, "loss": 1.9517, "step": 16425 }, { "epoch": 0.529966062458788, "grad_norm": 0.337890625, "learning_rate": 1.4364401834164272e-05, "loss": 1.9464, "step": 16426 }, { "epoch": 0.5299983263125844, "grad_norm": 0.357421875, "learning_rate": 1.4362829517147497e-05, "loss": 1.9805, "step": 16427 }, { "epoch": 0.5300305901663807, "grad_norm": 0.3671875, "learning_rate": 1.4361257207144233e-05, "loss": 1.9879, "step": 16428 }, { "epoch": 0.530062854020177, "grad_norm": 0.3359375, "learning_rate": 1.4359684904171803e-05, "loss": 1.9694, "step": 16429 }, { "epoch": 0.5300951178739733, "grad_norm": 0.3828125, "learning_rate": 1.4358112608247495e-05, "loss": 1.9879, "step": 16430 }, { "epoch": 0.5301273817277697, "grad_norm": 0.3359375, "learning_rate": 1.4356540319388623e-05, "loss": 2.004, "step": 16431 }, { "epoch": 0.530159645581566, "grad_norm": 0.3984375, "learning_rate": 1.4354968037612496e-05, "loss": 2.0033, "step": 16432 }, { "epoch": 0.5301919094353624, "grad_norm": 0.353515625, "learning_rate": 1.4353395762936415e-05, "loss": 1.9849, "step": 16433 }, { "epoch": 0.5302241732891587, "grad_norm": 0.35546875, "learning_rate": 1.435182349537769e-05, "loss": 1.9738, "step": 16434 }, { "epoch": 0.5302564371429551, "grad_norm": 0.361328125, "learning_rate": 1.4350251234953629e-05, "loss": 1.9762, "step": 16435 }, { "epoch": 0.5302887009967514, "grad_norm": 0.376953125, "learning_rate": 1.4348678981681537e-05, "loss": 1.9979, "step": 16436 }, { "epoch": 0.5303209648505478, "grad_norm": 0.349609375, "learning_rate": 1.4347106735578714e-05, "loss": 1.9721, "step": 16437 }, { "epoch": 0.5303532287043441, "grad_norm": 0.359375, "learning_rate": 1.4345534496662476e-05, "loss": 2.0073, "step": 16438 }, { "epoch": 0.5303854925581405, "grad_norm": 0.341796875, "learning_rate": 1.4343962264950127e-05, "loss": 1.9809, "step": 16439 }, { "epoch": 0.5304177564119368, "grad_norm": 0.36328125, "learning_rate": 1.4342390040458965e-05, "loss": 1.9665, "step": 16440 }, { "epoch": 0.5304500202657332, "grad_norm": 0.341796875, "learning_rate": 1.4340817823206305e-05, "loss": 1.9805, "step": 16441 }, { "epoch": 0.5304822841195295, "grad_norm": 0.337890625, "learning_rate": 1.4339245613209451e-05, "loss": 2.0069, "step": 16442 }, { "epoch": 0.5305145479733259, "grad_norm": 0.353515625, "learning_rate": 1.4337673410485705e-05, "loss": 1.9551, "step": 16443 }, { "epoch": 0.5305468118271222, "grad_norm": 0.337890625, "learning_rate": 1.4336101215052373e-05, "loss": 1.9803, "step": 16444 }, { "epoch": 0.5305790756809186, "grad_norm": 0.353515625, "learning_rate": 1.4334529026926768e-05, "loss": 1.9866, "step": 16445 }, { "epoch": 0.5306113395347148, "grad_norm": 0.3359375, "learning_rate": 1.4332956846126186e-05, "loss": 1.9675, "step": 16446 }, { "epoch": 0.5306436033885112, "grad_norm": 0.33984375, "learning_rate": 1.4331384672667936e-05, "loss": 1.9688, "step": 16447 }, { "epoch": 0.5306758672423076, "grad_norm": 0.3515625, "learning_rate": 1.4329812506569329e-05, "loss": 1.9733, "step": 16448 }, { "epoch": 0.5307081310961039, "grad_norm": 0.3515625, "learning_rate": 1.4328240347847658e-05, "loss": 1.9848, "step": 16449 }, { "epoch": 0.5307403949499003, "grad_norm": 0.337890625, "learning_rate": 1.4326668196520238e-05, "loss": 1.9912, "step": 16450 }, { "epoch": 0.5307726588036966, "grad_norm": 0.357421875, "learning_rate": 1.4325096052604378e-05, "loss": 2.0018, "step": 16451 }, { "epoch": 0.530804922657493, "grad_norm": 0.353515625, "learning_rate": 1.4323523916117367e-05, "loss": 2.0237, "step": 16452 }, { "epoch": 0.5308371865112893, "grad_norm": 0.33984375, "learning_rate": 1.4321951787076522e-05, "loss": 1.9814, "step": 16453 }, { "epoch": 0.5308694503650857, "grad_norm": 0.34765625, "learning_rate": 1.4320379665499152e-05, "loss": 1.9758, "step": 16454 }, { "epoch": 0.530901714218882, "grad_norm": 0.341796875, "learning_rate": 1.4318807551402545e-05, "loss": 1.9934, "step": 16455 }, { "epoch": 0.5309339780726784, "grad_norm": 0.33984375, "learning_rate": 1.4317235444804019e-05, "loss": 1.986, "step": 16456 }, { "epoch": 0.5309662419264747, "grad_norm": 0.349609375, "learning_rate": 1.4315663345720881e-05, "loss": 2.0042, "step": 16457 }, { "epoch": 0.5309985057802711, "grad_norm": 0.341796875, "learning_rate": 1.4314091254170426e-05, "loss": 1.969, "step": 16458 }, { "epoch": 0.5310307696340674, "grad_norm": 0.333984375, "learning_rate": 1.4312519170169957e-05, "loss": 1.9714, "step": 16459 }, { "epoch": 0.5310630334878638, "grad_norm": 0.34375, "learning_rate": 1.4310947093736795e-05, "loss": 1.9723, "step": 16460 }, { "epoch": 0.53109529734166, "grad_norm": 0.34375, "learning_rate": 1.4309375024888226e-05, "loss": 1.9476, "step": 16461 }, { "epoch": 0.5311275611954565, "grad_norm": 0.33984375, "learning_rate": 1.430780296364156e-05, "loss": 1.9581, "step": 16462 }, { "epoch": 0.5311598250492527, "grad_norm": 0.3359375, "learning_rate": 1.4306230910014108e-05, "loss": 1.9615, "step": 16463 }, { "epoch": 0.5311920889030491, "grad_norm": 0.3359375, "learning_rate": 1.4304658864023163e-05, "loss": 1.9922, "step": 16464 }, { "epoch": 0.5312243527568454, "grad_norm": 0.33203125, "learning_rate": 1.4303086825686037e-05, "loss": 1.9835, "step": 16465 }, { "epoch": 0.5312566166106418, "grad_norm": 0.333984375, "learning_rate": 1.4301514795020032e-05, "loss": 1.9611, "step": 16466 }, { "epoch": 0.5312888804644382, "grad_norm": 0.3359375, "learning_rate": 1.4299942772042449e-05, "loss": 1.9985, "step": 16467 }, { "epoch": 0.5313211443182345, "grad_norm": 0.353515625, "learning_rate": 1.4298370756770592e-05, "loss": 1.9541, "step": 16468 }, { "epoch": 0.5313534081720309, "grad_norm": 0.34765625, "learning_rate": 1.4296798749221772e-05, "loss": 1.9572, "step": 16469 }, { "epoch": 0.5313856720258272, "grad_norm": 0.3359375, "learning_rate": 1.4295226749413284e-05, "loss": 1.9922, "step": 16470 }, { "epoch": 0.5314179358796236, "grad_norm": 0.34765625, "learning_rate": 1.4293654757362432e-05, "loss": 1.9678, "step": 16471 }, { "epoch": 0.5314501997334199, "grad_norm": 0.337890625, "learning_rate": 1.4292082773086523e-05, "loss": 1.9787, "step": 16472 }, { "epoch": 0.5314824635872163, "grad_norm": 0.34765625, "learning_rate": 1.4290510796602863e-05, "loss": 1.9987, "step": 16473 }, { "epoch": 0.5315147274410126, "grad_norm": 0.333984375, "learning_rate": 1.4288938827928745e-05, "loss": 1.9778, "step": 16474 }, { "epoch": 0.531546991294809, "grad_norm": 0.337890625, "learning_rate": 1.428736686708148e-05, "loss": 1.9768, "step": 16475 }, { "epoch": 0.5315792551486053, "grad_norm": 0.341796875, "learning_rate": 1.4285794914078375e-05, "loss": 1.9524, "step": 16476 }, { "epoch": 0.5316115190024017, "grad_norm": 0.3515625, "learning_rate": 1.428422296893672e-05, "loss": 2.0053, "step": 16477 }, { "epoch": 0.531643782856198, "grad_norm": 0.341796875, "learning_rate": 1.4282651031673827e-05, "loss": 1.9701, "step": 16478 }, { "epoch": 0.5316760467099944, "grad_norm": 0.34375, "learning_rate": 1.4281079102307e-05, "loss": 1.9755, "step": 16479 }, { "epoch": 0.5317083105637906, "grad_norm": 0.46484375, "learning_rate": 1.4279507180853535e-05, "loss": 1.984, "step": 16480 }, { "epoch": 0.531740574417587, "grad_norm": 0.373046875, "learning_rate": 1.4277935267330737e-05, "loss": 1.9989, "step": 16481 }, { "epoch": 0.5317728382713833, "grad_norm": 0.375, "learning_rate": 1.4276363361755918e-05, "loss": 1.9465, "step": 16482 }, { "epoch": 0.5318051021251797, "grad_norm": 0.380859375, "learning_rate": 1.4274791464146362e-05, "loss": 1.9903, "step": 16483 }, { "epoch": 0.531837365978976, "grad_norm": 0.34375, "learning_rate": 1.4273219574519383e-05, "loss": 1.9679, "step": 16484 }, { "epoch": 0.5318696298327724, "grad_norm": 0.375, "learning_rate": 1.4271647692892288e-05, "loss": 1.9908, "step": 16485 }, { "epoch": 0.5319018936865687, "grad_norm": 0.33984375, "learning_rate": 1.4270075819282365e-05, "loss": 1.9924, "step": 16486 }, { "epoch": 0.5319341575403651, "grad_norm": 0.341796875, "learning_rate": 1.4268503953706925e-05, "loss": 1.9746, "step": 16487 }, { "epoch": 0.5319664213941615, "grad_norm": 0.35546875, "learning_rate": 1.4266932096183276e-05, "loss": 1.9673, "step": 16488 }, { "epoch": 0.5319986852479578, "grad_norm": 0.34375, "learning_rate": 1.4265360246728708e-05, "loss": 1.9894, "step": 16489 }, { "epoch": 0.5320309491017542, "grad_norm": 0.365234375, "learning_rate": 1.4263788405360522e-05, "loss": 1.9811, "step": 16490 }, { "epoch": 0.5320632129555505, "grad_norm": 0.330078125, "learning_rate": 1.4262216572096035e-05, "loss": 1.9748, "step": 16491 }, { "epoch": 0.5320954768093469, "grad_norm": 0.341796875, "learning_rate": 1.426064474695253e-05, "loss": 2.0144, "step": 16492 }, { "epoch": 0.5321277406631432, "grad_norm": 0.3515625, "learning_rate": 1.4259072929947322e-05, "loss": 1.9766, "step": 16493 }, { "epoch": 0.5321600045169396, "grad_norm": 0.337890625, "learning_rate": 1.4257501121097707e-05, "loss": 1.9803, "step": 16494 }, { "epoch": 0.5321922683707359, "grad_norm": 0.3671875, "learning_rate": 1.4255929320420986e-05, "loss": 1.978, "step": 16495 }, { "epoch": 0.5322245322245323, "grad_norm": 0.333984375, "learning_rate": 1.4254357527934461e-05, "loss": 1.9607, "step": 16496 }, { "epoch": 0.5322567960783285, "grad_norm": 0.3359375, "learning_rate": 1.4252785743655435e-05, "loss": 1.9874, "step": 16497 }, { "epoch": 0.5322890599321249, "grad_norm": 0.34375, "learning_rate": 1.4251213967601205e-05, "loss": 1.9691, "step": 16498 }, { "epoch": 0.5323213237859212, "grad_norm": 0.333984375, "learning_rate": 1.4249642199789075e-05, "loss": 1.9722, "step": 16499 }, { "epoch": 0.5323535876397176, "grad_norm": 0.33203125, "learning_rate": 1.4248070440236348e-05, "loss": 1.9801, "step": 16500 }, { "epoch": 0.5323858514935139, "grad_norm": 0.369140625, "learning_rate": 1.424649868896032e-05, "loss": 1.9815, "step": 16501 }, { "epoch": 0.5324181153473103, "grad_norm": 0.3359375, "learning_rate": 1.4244926945978294e-05, "loss": 1.97, "step": 16502 }, { "epoch": 0.5324503792011066, "grad_norm": 0.328125, "learning_rate": 1.4243355211307568e-05, "loss": 1.9875, "step": 16503 }, { "epoch": 0.532482643054903, "grad_norm": 0.3515625, "learning_rate": 1.4241783484965451e-05, "loss": 1.9668, "step": 16504 }, { "epoch": 0.5325149069086993, "grad_norm": 0.337890625, "learning_rate": 1.4240211766969233e-05, "loss": 1.9746, "step": 16505 }, { "epoch": 0.5325471707624957, "grad_norm": 0.333984375, "learning_rate": 1.4238640057336218e-05, "loss": 1.9782, "step": 16506 }, { "epoch": 0.5325794346162921, "grad_norm": 0.33984375, "learning_rate": 1.4237068356083712e-05, "loss": 1.9763, "step": 16507 }, { "epoch": 0.5326116984700884, "grad_norm": 0.328125, "learning_rate": 1.4235496663229007e-05, "loss": 1.9811, "step": 16508 }, { "epoch": 0.5326439623238848, "grad_norm": 0.33203125, "learning_rate": 1.4233924978789405e-05, "loss": 1.9943, "step": 16509 }, { "epoch": 0.5326762261776811, "grad_norm": 0.341796875, "learning_rate": 1.423235330278221e-05, "loss": 1.9812, "step": 16510 }, { "epoch": 0.5327084900314775, "grad_norm": 0.326171875, "learning_rate": 1.4230781635224716e-05, "loss": 1.9325, "step": 16511 }, { "epoch": 0.5327407538852738, "grad_norm": 0.322265625, "learning_rate": 1.4229209976134228e-05, "loss": 1.9552, "step": 16512 }, { "epoch": 0.5327730177390702, "grad_norm": 0.322265625, "learning_rate": 1.422763832552805e-05, "loss": 1.9981, "step": 16513 }, { "epoch": 0.5328052815928664, "grad_norm": 0.322265625, "learning_rate": 1.4226066683423464e-05, "loss": 1.982, "step": 16514 }, { "epoch": 0.5328375454466628, "grad_norm": 0.337890625, "learning_rate": 1.4224495049837784e-05, "loss": 1.9825, "step": 16515 }, { "epoch": 0.5328698093004591, "grad_norm": 0.32421875, "learning_rate": 1.4222923424788312e-05, "loss": 2.0054, "step": 16516 }, { "epoch": 0.5329020731542555, "grad_norm": 0.345703125, "learning_rate": 1.4221351808292337e-05, "loss": 1.9699, "step": 16517 }, { "epoch": 0.5329343370080518, "grad_norm": 0.341796875, "learning_rate": 1.4219780200367159e-05, "loss": 1.9707, "step": 16518 }, { "epoch": 0.5329666008618482, "grad_norm": 0.345703125, "learning_rate": 1.4218208601030089e-05, "loss": 1.9481, "step": 16519 }, { "epoch": 0.5329988647156445, "grad_norm": 0.37890625, "learning_rate": 1.4216637010298413e-05, "loss": 1.9551, "step": 16520 }, { "epoch": 0.5330311285694409, "grad_norm": 0.33203125, "learning_rate": 1.421506542818943e-05, "loss": 2.0003, "step": 16521 }, { "epoch": 0.5330633924232372, "grad_norm": 0.345703125, "learning_rate": 1.4213493854720455e-05, "loss": 1.9884, "step": 16522 }, { "epoch": 0.5330956562770336, "grad_norm": 0.33984375, "learning_rate": 1.4211922289908768e-05, "loss": 1.9703, "step": 16523 }, { "epoch": 0.5331279201308299, "grad_norm": 0.33203125, "learning_rate": 1.4210350733771674e-05, "loss": 1.9301, "step": 16524 }, { "epoch": 0.5331601839846263, "grad_norm": 0.357421875, "learning_rate": 1.4208779186326479e-05, "loss": 1.981, "step": 16525 }, { "epoch": 0.5331924478384226, "grad_norm": 0.322265625, "learning_rate": 1.420720764759047e-05, "loss": 1.9872, "step": 16526 }, { "epoch": 0.533224711692219, "grad_norm": 0.3515625, "learning_rate": 1.420563611758095e-05, "loss": 1.9739, "step": 16527 }, { "epoch": 0.5332569755460154, "grad_norm": 0.333984375, "learning_rate": 1.4204064596315222e-05, "loss": 1.9927, "step": 16528 }, { "epoch": 0.5332892393998117, "grad_norm": 0.341796875, "learning_rate": 1.4202493083810575e-05, "loss": 1.947, "step": 16529 }, { "epoch": 0.533321503253608, "grad_norm": 0.33984375, "learning_rate": 1.4200921580084314e-05, "loss": 1.9753, "step": 16530 }, { "epoch": 0.5333537671074043, "grad_norm": 0.3515625, "learning_rate": 1.4199350085153738e-05, "loss": 1.9904, "step": 16531 }, { "epoch": 0.5333860309612007, "grad_norm": 0.349609375, "learning_rate": 1.4197778599036138e-05, "loss": 1.9959, "step": 16532 }, { "epoch": 0.533418294814997, "grad_norm": 0.34375, "learning_rate": 1.4196207121748817e-05, "loss": 1.9901, "step": 16533 }, { "epoch": 0.5334505586687934, "grad_norm": 0.3515625, "learning_rate": 1.4194635653309072e-05, "loss": 1.9982, "step": 16534 }, { "epoch": 0.5334828225225897, "grad_norm": 0.359375, "learning_rate": 1.4193064193734197e-05, "loss": 1.9792, "step": 16535 }, { "epoch": 0.5335150863763861, "grad_norm": 0.34765625, "learning_rate": 1.4191492743041494e-05, "loss": 2.0, "step": 16536 }, { "epoch": 0.5335473502301824, "grad_norm": 0.357421875, "learning_rate": 1.4189921301248257e-05, "loss": 1.9602, "step": 16537 }, { "epoch": 0.5335796140839788, "grad_norm": 0.337890625, "learning_rate": 1.4188349868371791e-05, "loss": 1.9665, "step": 16538 }, { "epoch": 0.5336118779377751, "grad_norm": 0.361328125, "learning_rate": 1.4186778444429384e-05, "loss": 1.9993, "step": 16539 }, { "epoch": 0.5336441417915715, "grad_norm": 0.3515625, "learning_rate": 1.4185207029438334e-05, "loss": 2.0013, "step": 16540 }, { "epoch": 0.5336764056453678, "grad_norm": 0.35546875, "learning_rate": 1.4183635623415946e-05, "loss": 1.9782, "step": 16541 }, { "epoch": 0.5337086694991642, "grad_norm": 0.353515625, "learning_rate": 1.4182064226379507e-05, "loss": 1.9715, "step": 16542 }, { "epoch": 0.5337409333529605, "grad_norm": 0.345703125, "learning_rate": 1.4180492838346318e-05, "loss": 1.9929, "step": 16543 }, { "epoch": 0.5337731972067569, "grad_norm": 0.349609375, "learning_rate": 1.4178921459333683e-05, "loss": 1.9753, "step": 16544 }, { "epoch": 0.5338054610605532, "grad_norm": 0.359375, "learning_rate": 1.4177350089358882e-05, "loss": 1.9617, "step": 16545 }, { "epoch": 0.5338377249143496, "grad_norm": 0.33203125, "learning_rate": 1.4175778728439224e-05, "loss": 1.9595, "step": 16546 }, { "epoch": 0.5338699887681458, "grad_norm": 0.3515625, "learning_rate": 1.417420737659201e-05, "loss": 1.97, "step": 16547 }, { "epoch": 0.5339022526219422, "grad_norm": 0.357421875, "learning_rate": 1.417263603383452e-05, "loss": 1.9932, "step": 16548 }, { "epoch": 0.5339345164757386, "grad_norm": 0.3359375, "learning_rate": 1.4171064700184057e-05, "loss": 1.9737, "step": 16549 }, { "epoch": 0.5339667803295349, "grad_norm": 0.341796875, "learning_rate": 1.416949337565793e-05, "loss": 1.9616, "step": 16550 }, { "epoch": 0.5339990441833313, "grad_norm": 0.345703125, "learning_rate": 1.4167922060273417e-05, "loss": 1.9857, "step": 16551 }, { "epoch": 0.5340313080371276, "grad_norm": 0.341796875, "learning_rate": 1.4166350754047819e-05, "loss": 1.9441, "step": 16552 }, { "epoch": 0.534063571890924, "grad_norm": 0.337890625, "learning_rate": 1.4164779456998439e-05, "loss": 1.9732, "step": 16553 }, { "epoch": 0.5340958357447203, "grad_norm": 0.337890625, "learning_rate": 1.4163208169142563e-05, "loss": 1.9707, "step": 16554 }, { "epoch": 0.5341280995985167, "grad_norm": 0.326171875, "learning_rate": 1.4161636890497491e-05, "loss": 2.0, "step": 16555 }, { "epoch": 0.534160363452313, "grad_norm": 0.333984375, "learning_rate": 1.4160065621080525e-05, "loss": 1.9523, "step": 16556 }, { "epoch": 0.5341926273061094, "grad_norm": 0.33984375, "learning_rate": 1.4158494360908948e-05, "loss": 1.9342, "step": 16557 }, { "epoch": 0.5342248911599057, "grad_norm": 0.333984375, "learning_rate": 1.4156923110000064e-05, "loss": 1.9471, "step": 16558 }, { "epoch": 0.5342571550137021, "grad_norm": 0.35546875, "learning_rate": 1.4155351868371165e-05, "loss": 1.9867, "step": 16559 }, { "epoch": 0.5342894188674984, "grad_norm": 0.349609375, "learning_rate": 1.4153780636039546e-05, "loss": 1.9883, "step": 16560 }, { "epoch": 0.5343216827212948, "grad_norm": 0.341796875, "learning_rate": 1.4152209413022502e-05, "loss": 1.977, "step": 16561 }, { "epoch": 0.534353946575091, "grad_norm": 0.337890625, "learning_rate": 1.415063819933733e-05, "loss": 1.9764, "step": 16562 }, { "epoch": 0.5343862104288875, "grad_norm": 0.33984375, "learning_rate": 1.4149066995001324e-05, "loss": 1.9772, "step": 16563 }, { "epoch": 0.5344184742826837, "grad_norm": 0.333984375, "learning_rate": 1.4147495800031776e-05, "loss": 1.9403, "step": 16564 }, { "epoch": 0.5344507381364801, "grad_norm": 0.345703125, "learning_rate": 1.4145924614445987e-05, "loss": 1.9521, "step": 16565 }, { "epoch": 0.5344830019902764, "grad_norm": 0.333984375, "learning_rate": 1.4144353438261243e-05, "loss": 1.9193, "step": 16566 }, { "epoch": 0.5345152658440728, "grad_norm": 0.34375, "learning_rate": 1.4142782271494841e-05, "loss": 1.9718, "step": 16567 }, { "epoch": 0.5345475296978692, "grad_norm": 0.330078125, "learning_rate": 1.4141211114164086e-05, "loss": 1.9647, "step": 16568 }, { "epoch": 0.5345797935516655, "grad_norm": 0.337890625, "learning_rate": 1.4139639966286251e-05, "loss": 1.973, "step": 16569 }, { "epoch": 0.5346120574054619, "grad_norm": 0.337890625, "learning_rate": 1.4138068827878647e-05, "loss": 1.9794, "step": 16570 }, { "epoch": 0.5346443212592582, "grad_norm": 0.33984375, "learning_rate": 1.4136497698958562e-05, "loss": 1.9647, "step": 16571 }, { "epoch": 0.5346765851130546, "grad_norm": 0.33203125, "learning_rate": 1.4134926579543295e-05, "loss": 1.9826, "step": 16572 }, { "epoch": 0.5347088489668509, "grad_norm": 0.3359375, "learning_rate": 1.4133355469650131e-05, "loss": 1.9816, "step": 16573 }, { "epoch": 0.5347411128206473, "grad_norm": 0.35546875, "learning_rate": 1.4131784369296369e-05, "loss": 1.9355, "step": 16574 }, { "epoch": 0.5347733766744436, "grad_norm": 0.359375, "learning_rate": 1.4130213278499308e-05, "loss": 1.9724, "step": 16575 }, { "epoch": 0.53480564052824, "grad_norm": 0.357421875, "learning_rate": 1.4128642197276225e-05, "loss": 1.9757, "step": 16576 }, { "epoch": 0.5348379043820363, "grad_norm": 0.333984375, "learning_rate": 1.4127071125644427e-05, "loss": 1.9865, "step": 16577 }, { "epoch": 0.5348701682358327, "grad_norm": 0.353515625, "learning_rate": 1.4125500063621211e-05, "loss": 1.9973, "step": 16578 }, { "epoch": 0.534902432089629, "grad_norm": 0.333984375, "learning_rate": 1.4123929011223857e-05, "loss": 1.9889, "step": 16579 }, { "epoch": 0.5349346959434254, "grad_norm": 0.341796875, "learning_rate": 1.412235796846966e-05, "loss": 1.9689, "step": 16580 }, { "epoch": 0.5349669597972216, "grad_norm": 0.357421875, "learning_rate": 1.4120786935375926e-05, "loss": 1.9927, "step": 16581 }, { "epoch": 0.534999223651018, "grad_norm": 0.337890625, "learning_rate": 1.4119215911959933e-05, "loss": 2.0059, "step": 16582 }, { "epoch": 0.5350314875048143, "grad_norm": 0.33984375, "learning_rate": 1.411764489823898e-05, "loss": 1.9593, "step": 16583 }, { "epoch": 0.5350637513586107, "grad_norm": 0.3359375, "learning_rate": 1.4116073894230363e-05, "loss": 1.9836, "step": 16584 }, { "epoch": 0.535096015212407, "grad_norm": 0.3359375, "learning_rate": 1.4114502899951363e-05, "loss": 1.9695, "step": 16585 }, { "epoch": 0.5351282790662034, "grad_norm": 0.333984375, "learning_rate": 1.4112931915419284e-05, "loss": 1.9764, "step": 16586 }, { "epoch": 0.5351605429199997, "grad_norm": 0.337890625, "learning_rate": 1.4111360940651415e-05, "loss": 1.9579, "step": 16587 }, { "epoch": 0.5351928067737961, "grad_norm": 0.33984375, "learning_rate": 1.4109789975665044e-05, "loss": 1.9645, "step": 16588 }, { "epoch": 0.5352250706275925, "grad_norm": 0.341796875, "learning_rate": 1.4108219020477467e-05, "loss": 1.9735, "step": 16589 }, { "epoch": 0.5352573344813888, "grad_norm": 0.330078125, "learning_rate": 1.410664807510598e-05, "loss": 1.9866, "step": 16590 }, { "epoch": 0.5352895983351852, "grad_norm": 0.375, "learning_rate": 1.4105077139567865e-05, "loss": 1.9851, "step": 16591 }, { "epoch": 0.5353218621889815, "grad_norm": 0.330078125, "learning_rate": 1.410350621388042e-05, "loss": 1.9844, "step": 16592 }, { "epoch": 0.5353541260427779, "grad_norm": 0.3671875, "learning_rate": 1.4101935298060938e-05, "loss": 1.9395, "step": 16593 }, { "epoch": 0.5353863898965742, "grad_norm": 0.34765625, "learning_rate": 1.4100364392126704e-05, "loss": 1.9513, "step": 16594 }, { "epoch": 0.5354186537503706, "grad_norm": 0.357421875, "learning_rate": 1.4098793496095014e-05, "loss": 1.9824, "step": 16595 }, { "epoch": 0.5354509176041669, "grad_norm": 0.37109375, "learning_rate": 1.4097222609983162e-05, "loss": 1.9947, "step": 16596 }, { "epoch": 0.5354831814579633, "grad_norm": 0.328125, "learning_rate": 1.4095651733808433e-05, "loss": 1.959, "step": 16597 }, { "epoch": 0.5355154453117595, "grad_norm": 0.361328125, "learning_rate": 1.409408086758812e-05, "loss": 1.9767, "step": 16598 }, { "epoch": 0.5355477091655559, "grad_norm": 0.39453125, "learning_rate": 1.4092510011339523e-05, "loss": 1.9754, "step": 16599 }, { "epoch": 0.5355799730193522, "grad_norm": 0.3515625, "learning_rate": 1.4090939165079914e-05, "loss": 1.9686, "step": 16600 }, { "epoch": 0.5356122368731486, "grad_norm": 0.388671875, "learning_rate": 1.4089368328826598e-05, "loss": 1.973, "step": 16601 }, { "epoch": 0.5356445007269449, "grad_norm": 0.36328125, "learning_rate": 1.4087797502596861e-05, "loss": 1.9948, "step": 16602 }, { "epoch": 0.5356767645807413, "grad_norm": 0.345703125, "learning_rate": 1.4086226686408003e-05, "loss": 1.9643, "step": 16603 }, { "epoch": 0.5357090284345376, "grad_norm": 0.365234375, "learning_rate": 1.4084655880277294e-05, "loss": 1.9527, "step": 16604 }, { "epoch": 0.535741292288334, "grad_norm": 0.369140625, "learning_rate": 1.408308508422204e-05, "loss": 2.0052, "step": 16605 }, { "epoch": 0.5357735561421303, "grad_norm": 0.375, "learning_rate": 1.4081514298259536e-05, "loss": 1.975, "step": 16606 }, { "epoch": 0.5358058199959267, "grad_norm": 0.36328125, "learning_rate": 1.4079943522407055e-05, "loss": 1.9566, "step": 16607 }, { "epoch": 0.5358380838497231, "grad_norm": 0.359375, "learning_rate": 1.4078372756681897e-05, "loss": 1.9952, "step": 16608 }, { "epoch": 0.5358703477035194, "grad_norm": 0.37109375, "learning_rate": 1.4076802001101356e-05, "loss": 1.9785, "step": 16609 }, { "epoch": 0.5359026115573158, "grad_norm": 0.369140625, "learning_rate": 1.4075231255682711e-05, "loss": 1.9884, "step": 16610 }, { "epoch": 0.5359348754111121, "grad_norm": 0.337890625, "learning_rate": 1.4073660520443253e-05, "loss": 1.9682, "step": 16611 }, { "epoch": 0.5359671392649085, "grad_norm": 0.357421875, "learning_rate": 1.4072089795400286e-05, "loss": 1.9711, "step": 16612 }, { "epoch": 0.5359994031187048, "grad_norm": 0.345703125, "learning_rate": 1.4070519080571082e-05, "loss": 1.9735, "step": 16613 }, { "epoch": 0.5360316669725012, "grad_norm": 0.35546875, "learning_rate": 1.4068948375972936e-05, "loss": 2.0117, "step": 16614 }, { "epoch": 0.5360639308262974, "grad_norm": 0.33984375, "learning_rate": 1.4067377681623144e-05, "loss": 1.9698, "step": 16615 }, { "epoch": 0.5360961946800938, "grad_norm": 0.3671875, "learning_rate": 1.4065806997538986e-05, "loss": 1.9948, "step": 16616 }, { "epoch": 0.5361284585338901, "grad_norm": 0.3359375, "learning_rate": 1.4064236323737753e-05, "loss": 1.9534, "step": 16617 }, { "epoch": 0.5361607223876865, "grad_norm": 0.345703125, "learning_rate": 1.4062665660236739e-05, "loss": 2.0003, "step": 16618 }, { "epoch": 0.5361929862414828, "grad_norm": 0.33203125, "learning_rate": 1.4061095007053226e-05, "loss": 1.9708, "step": 16619 }, { "epoch": 0.5362252500952792, "grad_norm": 0.345703125, "learning_rate": 1.4059524364204503e-05, "loss": 1.9877, "step": 16620 }, { "epoch": 0.5362575139490755, "grad_norm": 0.345703125, "learning_rate": 1.4057953731707866e-05, "loss": 1.9953, "step": 16621 }, { "epoch": 0.5362897778028719, "grad_norm": 0.3359375, "learning_rate": 1.4056383109580596e-05, "loss": 1.9534, "step": 16622 }, { "epoch": 0.5363220416566682, "grad_norm": 0.337890625, "learning_rate": 1.4054812497839985e-05, "loss": 1.9822, "step": 16623 }, { "epoch": 0.5363543055104646, "grad_norm": 0.333984375, "learning_rate": 1.405324189650332e-05, "loss": 1.9686, "step": 16624 }, { "epoch": 0.5363865693642609, "grad_norm": 0.341796875, "learning_rate": 1.4051671305587888e-05, "loss": 1.9949, "step": 16625 }, { "epoch": 0.5364188332180573, "grad_norm": 0.34375, "learning_rate": 1.4050100725110977e-05, "loss": 1.9824, "step": 16626 }, { "epoch": 0.5364510970718536, "grad_norm": 0.34375, "learning_rate": 1.404853015508988e-05, "loss": 1.9795, "step": 16627 }, { "epoch": 0.53648336092565, "grad_norm": 0.337890625, "learning_rate": 1.4046959595541876e-05, "loss": 1.9342, "step": 16628 }, { "epoch": 0.5365156247794464, "grad_norm": 0.33203125, "learning_rate": 1.404538904648426e-05, "loss": 1.9718, "step": 16629 }, { "epoch": 0.5365478886332427, "grad_norm": 0.328125, "learning_rate": 1.404381850793432e-05, "loss": 1.9347, "step": 16630 }, { "epoch": 0.536580152487039, "grad_norm": 0.32421875, "learning_rate": 1.404224797990933e-05, "loss": 1.978, "step": 16631 }, { "epoch": 0.5366124163408353, "grad_norm": 0.328125, "learning_rate": 1.4040677462426592e-05, "loss": 1.9738, "step": 16632 }, { "epoch": 0.5366446801946317, "grad_norm": 0.34375, "learning_rate": 1.4039106955503394e-05, "loss": 1.976, "step": 16633 }, { "epoch": 0.536676944048428, "grad_norm": 0.3359375, "learning_rate": 1.403753645915701e-05, "loss": 1.9236, "step": 16634 }, { "epoch": 0.5367092079022244, "grad_norm": 0.330078125, "learning_rate": 1.4035965973404732e-05, "loss": 1.9857, "step": 16635 }, { "epoch": 0.5367414717560207, "grad_norm": 0.333984375, "learning_rate": 1.4034395498263852e-05, "loss": 1.9611, "step": 16636 }, { "epoch": 0.5367737356098171, "grad_norm": 0.330078125, "learning_rate": 1.403282503375166e-05, "loss": 1.9707, "step": 16637 }, { "epoch": 0.5368059994636134, "grad_norm": 0.341796875, "learning_rate": 1.4031254579885427e-05, "loss": 1.9613, "step": 16638 }, { "epoch": 0.5368382633174098, "grad_norm": 0.349609375, "learning_rate": 1.402968413668245e-05, "loss": 1.9872, "step": 16639 }, { "epoch": 0.5368705271712061, "grad_norm": 0.326171875, "learning_rate": 1.4028113704160023e-05, "loss": 1.9718, "step": 16640 }, { "epoch": 0.5369027910250025, "grad_norm": 0.375, "learning_rate": 1.4026543282335417e-05, "loss": 1.9781, "step": 16641 }, { "epoch": 0.5369350548787988, "grad_norm": 0.357421875, "learning_rate": 1.4024972871225918e-05, "loss": 1.9857, "step": 16642 }, { "epoch": 0.5369673187325952, "grad_norm": 0.341796875, "learning_rate": 1.402340247084883e-05, "loss": 1.9613, "step": 16643 }, { "epoch": 0.5369995825863915, "grad_norm": 0.3671875, "learning_rate": 1.402183208122142e-05, "loss": 1.9789, "step": 16644 }, { "epoch": 0.5370318464401879, "grad_norm": 0.369140625, "learning_rate": 1.402026170236098e-05, "loss": 1.9764, "step": 16645 }, { "epoch": 0.5370641102939842, "grad_norm": 0.33984375, "learning_rate": 1.4018691334284799e-05, "loss": 1.9772, "step": 16646 }, { "epoch": 0.5370963741477806, "grad_norm": 0.3828125, "learning_rate": 1.4017120977010158e-05, "loss": 1.999, "step": 16647 }, { "epoch": 0.5371286380015768, "grad_norm": 0.3359375, "learning_rate": 1.4015550630554345e-05, "loss": 1.9727, "step": 16648 }, { "epoch": 0.5371609018553732, "grad_norm": 0.361328125, "learning_rate": 1.4013980294934647e-05, "loss": 1.9862, "step": 16649 }, { "epoch": 0.5371931657091696, "grad_norm": 0.341796875, "learning_rate": 1.401240997016834e-05, "loss": 1.9719, "step": 16650 }, { "epoch": 0.5372254295629659, "grad_norm": 0.337890625, "learning_rate": 1.401083965627272e-05, "loss": 1.9616, "step": 16651 }, { "epoch": 0.5372576934167623, "grad_norm": 0.375, "learning_rate": 1.4009269353265069e-05, "loss": 1.9741, "step": 16652 }, { "epoch": 0.5372899572705586, "grad_norm": 0.3359375, "learning_rate": 1.4007699061162667e-05, "loss": 1.9503, "step": 16653 }, { "epoch": 0.537322221124355, "grad_norm": 0.33984375, "learning_rate": 1.4006128779982803e-05, "loss": 1.9811, "step": 16654 }, { "epoch": 0.5373544849781513, "grad_norm": 0.337890625, "learning_rate": 1.400455850974276e-05, "loss": 1.9943, "step": 16655 }, { "epoch": 0.5373867488319477, "grad_norm": 0.3203125, "learning_rate": 1.4002988250459823e-05, "loss": 1.9624, "step": 16656 }, { "epoch": 0.537419012685744, "grad_norm": 0.35546875, "learning_rate": 1.4001418002151276e-05, "loss": 2.0068, "step": 16657 }, { "epoch": 0.5374512765395404, "grad_norm": 0.333984375, "learning_rate": 1.3999847764834405e-05, "loss": 1.9592, "step": 16658 }, { "epoch": 0.5374835403933367, "grad_norm": 0.35546875, "learning_rate": 1.399827753852649e-05, "loss": 1.9911, "step": 16659 }, { "epoch": 0.5375158042471331, "grad_norm": 0.36328125, "learning_rate": 1.3996707323244817e-05, "loss": 2.0034, "step": 16660 }, { "epoch": 0.5375480681009294, "grad_norm": 0.37109375, "learning_rate": 1.3995137119006676e-05, "loss": 1.9985, "step": 16661 }, { "epoch": 0.5375803319547258, "grad_norm": 0.36328125, "learning_rate": 1.3993566925829337e-05, "loss": 1.9692, "step": 16662 }, { "epoch": 0.537612595808522, "grad_norm": 0.380859375, "learning_rate": 1.3991996743730091e-05, "loss": 1.9828, "step": 16663 }, { "epoch": 0.5376448596623185, "grad_norm": 0.353515625, "learning_rate": 1.399042657272623e-05, "loss": 2.0044, "step": 16664 }, { "epoch": 0.5376771235161147, "grad_norm": 0.36328125, "learning_rate": 1.3988856412835024e-05, "loss": 1.967, "step": 16665 }, { "epoch": 0.5377093873699111, "grad_norm": 0.349609375, "learning_rate": 1.3987286264073755e-05, "loss": 1.9669, "step": 16666 }, { "epoch": 0.5377416512237074, "grad_norm": 0.330078125, "learning_rate": 1.3985716126459725e-05, "loss": 1.989, "step": 16667 }, { "epoch": 0.5377739150775038, "grad_norm": 0.369140625, "learning_rate": 1.3984146000010196e-05, "loss": 1.9948, "step": 16668 }, { "epoch": 0.5378061789313002, "grad_norm": 0.33203125, "learning_rate": 1.3982575884742458e-05, "loss": 1.9959, "step": 16669 }, { "epoch": 0.5378384427850965, "grad_norm": 0.3515625, "learning_rate": 1.3981005780673793e-05, "loss": 1.986, "step": 16670 }, { "epoch": 0.5378707066388929, "grad_norm": 0.34765625, "learning_rate": 1.3979435687821496e-05, "loss": 1.984, "step": 16671 }, { "epoch": 0.5379029704926892, "grad_norm": 0.3359375, "learning_rate": 1.3977865606202832e-05, "loss": 2.0079, "step": 16672 }, { "epoch": 0.5379352343464856, "grad_norm": 0.373046875, "learning_rate": 1.3976295535835085e-05, "loss": 1.9763, "step": 16673 }, { "epoch": 0.5379674982002819, "grad_norm": 0.33203125, "learning_rate": 1.3974725476735554e-05, "loss": 1.9955, "step": 16674 }, { "epoch": 0.5379997620540783, "grad_norm": 0.373046875, "learning_rate": 1.3973155428921503e-05, "loss": 1.9681, "step": 16675 }, { "epoch": 0.5380320259078746, "grad_norm": 0.337890625, "learning_rate": 1.397158539241022e-05, "loss": 1.9793, "step": 16676 }, { "epoch": 0.538064289761671, "grad_norm": 0.357421875, "learning_rate": 1.3970015367218989e-05, "loss": 1.9818, "step": 16677 }, { "epoch": 0.5380965536154673, "grad_norm": 0.330078125, "learning_rate": 1.3968445353365088e-05, "loss": 1.9707, "step": 16678 }, { "epoch": 0.5381288174692637, "grad_norm": 0.341796875, "learning_rate": 1.39668753508658e-05, "loss": 1.9608, "step": 16679 }, { "epoch": 0.53816108132306, "grad_norm": 0.33984375, "learning_rate": 1.3965305359738412e-05, "loss": 1.9676, "step": 16680 }, { "epoch": 0.5381933451768564, "grad_norm": 0.33984375, "learning_rate": 1.3963735380000194e-05, "loss": 1.9631, "step": 16681 }, { "epoch": 0.5382256090306526, "grad_norm": 0.357421875, "learning_rate": 1.3962165411668435e-05, "loss": 1.9854, "step": 16682 }, { "epoch": 0.538257872884449, "grad_norm": 0.337890625, "learning_rate": 1.396059545476042e-05, "loss": 1.9629, "step": 16683 }, { "epoch": 0.5382901367382453, "grad_norm": 0.357421875, "learning_rate": 1.3959025509293418e-05, "loss": 1.9963, "step": 16684 }, { "epoch": 0.5383224005920417, "grad_norm": 0.33984375, "learning_rate": 1.3957455575284717e-05, "loss": 1.9657, "step": 16685 }, { "epoch": 0.538354664445838, "grad_norm": 0.345703125, "learning_rate": 1.3955885652751603e-05, "loss": 2.0042, "step": 16686 }, { "epoch": 0.5383869282996344, "grad_norm": 0.34765625, "learning_rate": 1.3954315741711343e-05, "loss": 1.9632, "step": 16687 }, { "epoch": 0.5384191921534307, "grad_norm": 0.359375, "learning_rate": 1.3952745842181228e-05, "loss": 1.9382, "step": 16688 }, { "epoch": 0.5384514560072271, "grad_norm": 0.3515625, "learning_rate": 1.3951175954178539e-05, "loss": 1.9519, "step": 16689 }, { "epoch": 0.5384837198610235, "grad_norm": 0.33203125, "learning_rate": 1.3949606077720549e-05, "loss": 2.0005, "step": 16690 }, { "epoch": 0.5385159837148198, "grad_norm": 0.35546875, "learning_rate": 1.3948036212824542e-05, "loss": 1.9795, "step": 16691 }, { "epoch": 0.5385482475686162, "grad_norm": 0.33984375, "learning_rate": 1.3946466359507803e-05, "loss": 1.9932, "step": 16692 }, { "epoch": 0.5385805114224125, "grad_norm": 0.349609375, "learning_rate": 1.3944896517787597e-05, "loss": 2.0075, "step": 16693 }, { "epoch": 0.5386127752762089, "grad_norm": 0.3359375, "learning_rate": 1.3943326687681217e-05, "loss": 1.9371, "step": 16694 }, { "epoch": 0.5386450391300052, "grad_norm": 0.3359375, "learning_rate": 1.3941756869205946e-05, "loss": 1.9447, "step": 16695 }, { "epoch": 0.5386773029838016, "grad_norm": 0.33984375, "learning_rate": 1.3940187062379049e-05, "loss": 1.9828, "step": 16696 }, { "epoch": 0.5387095668375979, "grad_norm": 0.33203125, "learning_rate": 1.393861726721781e-05, "loss": 1.9851, "step": 16697 }, { "epoch": 0.5387418306913943, "grad_norm": 0.34765625, "learning_rate": 1.3937047483739517e-05, "loss": 2.0115, "step": 16698 }, { "epoch": 0.5387740945451905, "grad_norm": 0.33984375, "learning_rate": 1.3935477711961439e-05, "loss": 1.9766, "step": 16699 }, { "epoch": 0.5388063583989869, "grad_norm": 0.34375, "learning_rate": 1.393390795190086e-05, "loss": 1.9646, "step": 16700 }, { "epoch": 0.5388386222527832, "grad_norm": 0.3359375, "learning_rate": 1.393233820357506e-05, "loss": 1.9962, "step": 16701 }, { "epoch": 0.5388708861065796, "grad_norm": 0.35546875, "learning_rate": 1.393076846700131e-05, "loss": 1.966, "step": 16702 }, { "epoch": 0.5389031499603759, "grad_norm": 0.35546875, "learning_rate": 1.3929198742196894e-05, "loss": 1.9488, "step": 16703 }, { "epoch": 0.5389354138141723, "grad_norm": 0.341796875, "learning_rate": 1.3927629029179092e-05, "loss": 1.9896, "step": 16704 }, { "epoch": 0.5389676776679686, "grad_norm": 0.345703125, "learning_rate": 1.3926059327965182e-05, "loss": 1.9701, "step": 16705 }, { "epoch": 0.538999941521765, "grad_norm": 0.353515625, "learning_rate": 1.3924489638572439e-05, "loss": 2.0173, "step": 16706 }, { "epoch": 0.5390322053755613, "grad_norm": 0.34765625, "learning_rate": 1.3922919961018141e-05, "loss": 1.9755, "step": 16707 }, { "epoch": 0.5390644692293577, "grad_norm": 0.337890625, "learning_rate": 1.3921350295319569e-05, "loss": 1.942, "step": 16708 }, { "epoch": 0.539096733083154, "grad_norm": 0.3359375, "learning_rate": 1.3919780641493998e-05, "loss": 1.9878, "step": 16709 }, { "epoch": 0.5391289969369504, "grad_norm": 0.333984375, "learning_rate": 1.3918210999558706e-05, "loss": 1.9499, "step": 16710 }, { "epoch": 0.5391612607907468, "grad_norm": 0.34375, "learning_rate": 1.3916641369530975e-05, "loss": 1.9718, "step": 16711 }, { "epoch": 0.5391935246445431, "grad_norm": 0.341796875, "learning_rate": 1.3915071751428075e-05, "loss": 1.9925, "step": 16712 }, { "epoch": 0.5392257884983395, "grad_norm": 0.330078125, "learning_rate": 1.3913502145267288e-05, "loss": 1.9838, "step": 16713 }, { "epoch": 0.5392580523521358, "grad_norm": 0.34375, "learning_rate": 1.391193255106589e-05, "loss": 1.9863, "step": 16714 }, { "epoch": 0.5392903162059322, "grad_norm": 0.33203125, "learning_rate": 1.3910362968841158e-05, "loss": 1.9767, "step": 16715 }, { "epoch": 0.5393225800597284, "grad_norm": 0.3359375, "learning_rate": 1.3908793398610368e-05, "loss": 1.9963, "step": 16716 }, { "epoch": 0.5393548439135248, "grad_norm": 0.349609375, "learning_rate": 1.39072238403908e-05, "loss": 1.9912, "step": 16717 }, { "epoch": 0.5393871077673211, "grad_norm": 0.341796875, "learning_rate": 1.3905654294199724e-05, "loss": 1.9722, "step": 16718 }, { "epoch": 0.5394193716211175, "grad_norm": 0.365234375, "learning_rate": 1.3904084760054423e-05, "loss": 1.9839, "step": 16719 }, { "epoch": 0.5394516354749138, "grad_norm": 0.353515625, "learning_rate": 1.3902515237972176e-05, "loss": 1.9832, "step": 16720 }, { "epoch": 0.5394838993287102, "grad_norm": 0.333984375, "learning_rate": 1.3900945727970243e-05, "loss": 1.9738, "step": 16721 }, { "epoch": 0.5395161631825065, "grad_norm": 0.353515625, "learning_rate": 1.3899376230065914e-05, "loss": 1.9769, "step": 16722 }, { "epoch": 0.5395484270363029, "grad_norm": 0.3359375, "learning_rate": 1.389780674427647e-05, "loss": 1.9778, "step": 16723 }, { "epoch": 0.5395806908900992, "grad_norm": 0.34375, "learning_rate": 1.389623727061917e-05, "loss": 2.0009, "step": 16724 }, { "epoch": 0.5396129547438956, "grad_norm": 0.34765625, "learning_rate": 1.3894667809111298e-05, "loss": 1.9705, "step": 16725 }, { "epoch": 0.5396452185976919, "grad_norm": 0.34765625, "learning_rate": 1.389309835977014e-05, "loss": 1.974, "step": 16726 }, { "epoch": 0.5396774824514883, "grad_norm": 0.345703125, "learning_rate": 1.3891528922612952e-05, "loss": 1.9748, "step": 16727 }, { "epoch": 0.5397097463052846, "grad_norm": 0.34375, "learning_rate": 1.3889959497657017e-05, "loss": 2.0039, "step": 16728 }, { "epoch": 0.539742010159081, "grad_norm": 0.34765625, "learning_rate": 1.388839008491962e-05, "loss": 1.9723, "step": 16729 }, { "epoch": 0.5397742740128774, "grad_norm": 0.330078125, "learning_rate": 1.3886820684418022e-05, "loss": 2.0059, "step": 16730 }, { "epoch": 0.5398065378666737, "grad_norm": 0.3359375, "learning_rate": 1.3885251296169502e-05, "loss": 1.9766, "step": 16731 }, { "epoch": 0.53983880172047, "grad_norm": 0.345703125, "learning_rate": 1.3883681920191339e-05, "loss": 1.961, "step": 16732 }, { "epoch": 0.5398710655742663, "grad_norm": 0.33984375, "learning_rate": 1.3882112556500804e-05, "loss": 1.9844, "step": 16733 }, { "epoch": 0.5399033294280627, "grad_norm": 0.337890625, "learning_rate": 1.3880543205115169e-05, "loss": 1.9776, "step": 16734 }, { "epoch": 0.539935593281859, "grad_norm": 0.34375, "learning_rate": 1.3878973866051713e-05, "loss": 1.9754, "step": 16735 }, { "epoch": 0.5399678571356554, "grad_norm": 0.3203125, "learning_rate": 1.387740453932771e-05, "loss": 1.9677, "step": 16736 }, { "epoch": 0.5400001209894517, "grad_norm": 0.33984375, "learning_rate": 1.3875835224960432e-05, "loss": 1.9816, "step": 16737 }, { "epoch": 0.5400323848432481, "grad_norm": 0.341796875, "learning_rate": 1.3874265922967152e-05, "loss": 2.0062, "step": 16738 }, { "epoch": 0.5400646486970444, "grad_norm": 0.357421875, "learning_rate": 1.3872696633365148e-05, "loss": 1.9891, "step": 16739 }, { "epoch": 0.5400969125508408, "grad_norm": 0.337890625, "learning_rate": 1.3871127356171687e-05, "loss": 1.9794, "step": 16740 }, { "epoch": 0.5401291764046371, "grad_norm": 0.337890625, "learning_rate": 1.3869558091404047e-05, "loss": 1.9873, "step": 16741 }, { "epoch": 0.5401614402584335, "grad_norm": 0.33203125, "learning_rate": 1.3867988839079503e-05, "loss": 1.9668, "step": 16742 }, { "epoch": 0.5401937041122298, "grad_norm": 0.361328125, "learning_rate": 1.3866419599215324e-05, "loss": 2.0045, "step": 16743 }, { "epoch": 0.5402259679660262, "grad_norm": 0.345703125, "learning_rate": 1.3864850371828783e-05, "loss": 1.9779, "step": 16744 }, { "epoch": 0.5402582318198225, "grad_norm": 0.333984375, "learning_rate": 1.3863281156937161e-05, "loss": 1.9354, "step": 16745 }, { "epoch": 0.5402904956736189, "grad_norm": 0.349609375, "learning_rate": 1.3861711954557718e-05, "loss": 1.9843, "step": 16746 }, { "epoch": 0.5403227595274152, "grad_norm": 0.33984375, "learning_rate": 1.3860142764707734e-05, "loss": 1.9649, "step": 16747 }, { "epoch": 0.5403550233812116, "grad_norm": 0.34375, "learning_rate": 1.3858573587404487e-05, "loss": 1.9593, "step": 16748 }, { "epoch": 0.5403872872350078, "grad_norm": 0.357421875, "learning_rate": 1.3857004422665237e-05, "loss": 1.9913, "step": 16749 }, { "epoch": 0.5404195510888042, "grad_norm": 0.34765625, "learning_rate": 1.3855435270507262e-05, "loss": 1.9223, "step": 16750 }, { "epoch": 0.5404518149426006, "grad_norm": 0.345703125, "learning_rate": 1.3853866130947841e-05, "loss": 1.9875, "step": 16751 }, { "epoch": 0.5404840787963969, "grad_norm": 0.37109375, "learning_rate": 1.3852297004004231e-05, "loss": 1.9722, "step": 16752 }, { "epoch": 0.5405163426501933, "grad_norm": 0.349609375, "learning_rate": 1.3850727889693714e-05, "loss": 1.9668, "step": 16753 }, { "epoch": 0.5405486065039896, "grad_norm": 0.337890625, "learning_rate": 1.3849158788033567e-05, "loss": 1.9451, "step": 16754 }, { "epoch": 0.540580870357786, "grad_norm": 0.3515625, "learning_rate": 1.3847589699041049e-05, "loss": 1.9816, "step": 16755 }, { "epoch": 0.5406131342115823, "grad_norm": 0.35546875, "learning_rate": 1.3846020622733431e-05, "loss": 1.9521, "step": 16756 }, { "epoch": 0.5406453980653787, "grad_norm": 0.341796875, "learning_rate": 1.3844451559128004e-05, "loss": 1.9779, "step": 16757 }, { "epoch": 0.540677661919175, "grad_norm": 0.349609375, "learning_rate": 1.3842882508242015e-05, "loss": 2.0167, "step": 16758 }, { "epoch": 0.5407099257729714, "grad_norm": 0.345703125, "learning_rate": 1.3841313470092745e-05, "loss": 2.0006, "step": 16759 }, { "epoch": 0.5407421896267677, "grad_norm": 0.33203125, "learning_rate": 1.3839744444697472e-05, "loss": 1.9814, "step": 16760 }, { "epoch": 0.5407744534805641, "grad_norm": 0.35546875, "learning_rate": 1.3838175432073455e-05, "loss": 1.987, "step": 16761 }, { "epoch": 0.5408067173343604, "grad_norm": 0.349609375, "learning_rate": 1.3836606432237967e-05, "loss": 1.9918, "step": 16762 }, { "epoch": 0.5408389811881568, "grad_norm": 0.341796875, "learning_rate": 1.3835037445208288e-05, "loss": 1.9949, "step": 16763 }, { "epoch": 0.540871245041953, "grad_norm": 0.34375, "learning_rate": 1.3833468471001676e-05, "loss": 1.9706, "step": 16764 }, { "epoch": 0.5409035088957495, "grad_norm": 0.337890625, "learning_rate": 1.3831899509635404e-05, "loss": 1.9519, "step": 16765 }, { "epoch": 0.5409357727495457, "grad_norm": 0.328125, "learning_rate": 1.383033056112675e-05, "loss": 1.9878, "step": 16766 }, { "epoch": 0.5409680366033421, "grad_norm": 0.341796875, "learning_rate": 1.3828761625492975e-05, "loss": 1.9815, "step": 16767 }, { "epoch": 0.5410003004571384, "grad_norm": 0.33203125, "learning_rate": 1.382719270275135e-05, "loss": 1.9645, "step": 16768 }, { "epoch": 0.5410325643109348, "grad_norm": 0.33984375, "learning_rate": 1.3825623792919148e-05, "loss": 1.9988, "step": 16769 }, { "epoch": 0.5410648281647312, "grad_norm": 0.34375, "learning_rate": 1.3824054896013638e-05, "loss": 1.9752, "step": 16770 }, { "epoch": 0.5410970920185275, "grad_norm": 0.337890625, "learning_rate": 1.3822486012052086e-05, "loss": 1.9758, "step": 16771 }, { "epoch": 0.5411293558723239, "grad_norm": 0.333984375, "learning_rate": 1.3820917141051765e-05, "loss": 1.9767, "step": 16772 }, { "epoch": 0.5411616197261202, "grad_norm": 0.3515625, "learning_rate": 1.3819348283029943e-05, "loss": 1.9928, "step": 16773 }, { "epoch": 0.5411938835799166, "grad_norm": 0.34375, "learning_rate": 1.3817779438003886e-05, "loss": 1.9776, "step": 16774 }, { "epoch": 0.5412261474337129, "grad_norm": 0.34375, "learning_rate": 1.3816210605990864e-05, "loss": 1.9499, "step": 16775 }, { "epoch": 0.5412584112875093, "grad_norm": 0.33984375, "learning_rate": 1.3814641787008151e-05, "loss": 1.9689, "step": 16776 }, { "epoch": 0.5412906751413056, "grad_norm": 0.3515625, "learning_rate": 1.3813072981073006e-05, "loss": 1.9821, "step": 16777 }, { "epoch": 0.541322938995102, "grad_norm": 0.357421875, "learning_rate": 1.3811504188202702e-05, "loss": 1.9822, "step": 16778 }, { "epoch": 0.5413552028488983, "grad_norm": 0.34375, "learning_rate": 1.3809935408414514e-05, "loss": 1.9501, "step": 16779 }, { "epoch": 0.5413874667026947, "grad_norm": 0.357421875, "learning_rate": 1.3808366641725697e-05, "loss": 1.9623, "step": 16780 }, { "epoch": 0.541419730556491, "grad_norm": 0.330078125, "learning_rate": 1.3806797888153525e-05, "loss": 1.9692, "step": 16781 }, { "epoch": 0.5414519944102874, "grad_norm": 0.369140625, "learning_rate": 1.3805229147715274e-05, "loss": 1.9558, "step": 16782 }, { "epoch": 0.5414842582640836, "grad_norm": 0.337890625, "learning_rate": 1.3803660420428192e-05, "loss": 2.0133, "step": 16783 }, { "epoch": 0.54151652211788, "grad_norm": 0.36328125, "learning_rate": 1.3802091706309561e-05, "loss": 1.9839, "step": 16784 }, { "epoch": 0.5415487859716763, "grad_norm": 0.333984375, "learning_rate": 1.3800523005376652e-05, "loss": 1.9617, "step": 16785 }, { "epoch": 0.5415810498254727, "grad_norm": 0.341796875, "learning_rate": 1.3798954317646719e-05, "loss": 1.9652, "step": 16786 }, { "epoch": 0.541613313679269, "grad_norm": 0.34375, "learning_rate": 1.3797385643137033e-05, "loss": 1.9773, "step": 16787 }, { "epoch": 0.5416455775330654, "grad_norm": 0.3359375, "learning_rate": 1.3795816981864871e-05, "loss": 2.0072, "step": 16788 }, { "epoch": 0.5416778413868617, "grad_norm": 0.337890625, "learning_rate": 1.3794248333847488e-05, "loss": 1.975, "step": 16789 }, { "epoch": 0.5417101052406581, "grad_norm": 0.333984375, "learning_rate": 1.3792679699102148e-05, "loss": 1.9833, "step": 16790 }, { "epoch": 0.5417423690944545, "grad_norm": 0.34375, "learning_rate": 1.3791111077646134e-05, "loss": 1.948, "step": 16791 }, { "epoch": 0.5417746329482508, "grad_norm": 0.34765625, "learning_rate": 1.3789542469496697e-05, "loss": 1.9513, "step": 16792 }, { "epoch": 0.5418068968020472, "grad_norm": 0.337890625, "learning_rate": 1.3787973874671107e-05, "loss": 1.9674, "step": 16793 }, { "epoch": 0.5418391606558435, "grad_norm": 0.34375, "learning_rate": 1.3786405293186636e-05, "loss": 1.9839, "step": 16794 }, { "epoch": 0.5418714245096399, "grad_norm": 0.337890625, "learning_rate": 1.3784836725060539e-05, "loss": 1.9719, "step": 16795 }, { "epoch": 0.5419036883634362, "grad_norm": 0.33984375, "learning_rate": 1.3783268170310089e-05, "loss": 1.9564, "step": 16796 }, { "epoch": 0.5419359522172326, "grad_norm": 0.328125, "learning_rate": 1.3781699628952553e-05, "loss": 1.9467, "step": 16797 }, { "epoch": 0.5419682160710289, "grad_norm": 0.337890625, "learning_rate": 1.3780131101005191e-05, "loss": 1.979, "step": 16798 }, { "epoch": 0.5420004799248253, "grad_norm": 0.330078125, "learning_rate": 1.377856258648527e-05, "loss": 1.9749, "step": 16799 }, { "epoch": 0.5420327437786215, "grad_norm": 0.349609375, "learning_rate": 1.3776994085410061e-05, "loss": 1.9445, "step": 16800 }, { "epoch": 0.5420650076324179, "grad_norm": 0.3359375, "learning_rate": 1.3775425597796818e-05, "loss": 1.9522, "step": 16801 }, { "epoch": 0.5420972714862142, "grad_norm": 0.3359375, "learning_rate": 1.3773857123662812e-05, "loss": 1.9686, "step": 16802 }, { "epoch": 0.5421295353400106, "grad_norm": 0.34375, "learning_rate": 1.3772288663025308e-05, "loss": 1.94, "step": 16803 }, { "epoch": 0.5421617991938069, "grad_norm": 0.349609375, "learning_rate": 1.3770720215901571e-05, "loss": 1.9749, "step": 16804 }, { "epoch": 0.5421940630476033, "grad_norm": 0.34765625, "learning_rate": 1.3769151782308862e-05, "loss": 1.921, "step": 16805 }, { "epoch": 0.5422263269013996, "grad_norm": 0.337890625, "learning_rate": 1.3767583362264447e-05, "loss": 1.9579, "step": 16806 }, { "epoch": 0.542258590755196, "grad_norm": 0.34375, "learning_rate": 1.3766014955785594e-05, "loss": 1.9872, "step": 16807 }, { "epoch": 0.5422908546089923, "grad_norm": 0.341796875, "learning_rate": 1.3764446562889559e-05, "loss": 1.9521, "step": 16808 }, { "epoch": 0.5423231184627887, "grad_norm": 0.34765625, "learning_rate": 1.3762878183593612e-05, "loss": 1.997, "step": 16809 }, { "epoch": 0.542355382316585, "grad_norm": 0.35546875, "learning_rate": 1.3761309817915017e-05, "loss": 1.9793, "step": 16810 }, { "epoch": 0.5423876461703814, "grad_norm": 0.32421875, "learning_rate": 1.375974146587103e-05, "loss": 1.993, "step": 16811 }, { "epoch": 0.5424199100241778, "grad_norm": 0.33984375, "learning_rate": 1.375817312747892e-05, "loss": 1.9356, "step": 16812 }, { "epoch": 0.5424521738779741, "grad_norm": 0.34765625, "learning_rate": 1.3756604802755956e-05, "loss": 1.9967, "step": 16813 }, { "epoch": 0.5424844377317705, "grad_norm": 0.341796875, "learning_rate": 1.3755036491719383e-05, "loss": 1.9836, "step": 16814 }, { "epoch": 0.5425167015855668, "grad_norm": 0.341796875, "learning_rate": 1.3753468194386481e-05, "loss": 1.9669, "step": 16815 }, { "epoch": 0.5425489654393632, "grad_norm": 0.33203125, "learning_rate": 1.3751899910774511e-05, "loss": 1.9828, "step": 16816 }, { "epoch": 0.5425812292931594, "grad_norm": 0.341796875, "learning_rate": 1.3750331640900726e-05, "loss": 1.9817, "step": 16817 }, { "epoch": 0.5426134931469558, "grad_norm": 0.32421875, "learning_rate": 1.3748763384782392e-05, "loss": 1.9911, "step": 16818 }, { "epoch": 0.5426457570007521, "grad_norm": 0.337890625, "learning_rate": 1.3747195142436782e-05, "loss": 1.9621, "step": 16819 }, { "epoch": 0.5426780208545485, "grad_norm": 0.33203125, "learning_rate": 1.3745626913881142e-05, "loss": 1.9819, "step": 16820 }, { "epoch": 0.5427102847083448, "grad_norm": 0.328125, "learning_rate": 1.374405869913274e-05, "loss": 1.9985, "step": 16821 }, { "epoch": 0.5427425485621412, "grad_norm": 0.34765625, "learning_rate": 1.3742490498208844e-05, "loss": 1.9934, "step": 16822 }, { "epoch": 0.5427748124159375, "grad_norm": 0.341796875, "learning_rate": 1.3740922311126705e-05, "loss": 1.9906, "step": 16823 }, { "epoch": 0.5428070762697339, "grad_norm": 0.328125, "learning_rate": 1.3739354137903591e-05, "loss": 1.9809, "step": 16824 }, { "epoch": 0.5428393401235302, "grad_norm": 0.3515625, "learning_rate": 1.3737785978556767e-05, "loss": 1.9985, "step": 16825 }, { "epoch": 0.5428716039773266, "grad_norm": 0.333984375, "learning_rate": 1.3736217833103484e-05, "loss": 1.9884, "step": 16826 }, { "epoch": 0.5429038678311229, "grad_norm": 0.33984375, "learning_rate": 1.3734649701561011e-05, "loss": 1.9896, "step": 16827 }, { "epoch": 0.5429361316849193, "grad_norm": 0.33203125, "learning_rate": 1.3733081583946606e-05, "loss": 1.9596, "step": 16828 }, { "epoch": 0.5429683955387156, "grad_norm": 0.345703125, "learning_rate": 1.373151348027753e-05, "loss": 1.9701, "step": 16829 }, { "epoch": 0.543000659392512, "grad_norm": 0.345703125, "learning_rate": 1.3729945390571041e-05, "loss": 1.9809, "step": 16830 }, { "epoch": 0.5430329232463084, "grad_norm": 0.33984375, "learning_rate": 1.3728377314844407e-05, "loss": 1.9746, "step": 16831 }, { "epoch": 0.5430651871001047, "grad_norm": 0.326171875, "learning_rate": 1.372680925311488e-05, "loss": 1.9229, "step": 16832 }, { "epoch": 0.543097450953901, "grad_norm": 0.341796875, "learning_rate": 1.3725241205399722e-05, "loss": 1.9669, "step": 16833 }, { "epoch": 0.5431297148076973, "grad_norm": 0.322265625, "learning_rate": 1.3723673171716196e-05, "loss": 1.9382, "step": 16834 }, { "epoch": 0.5431619786614937, "grad_norm": 0.345703125, "learning_rate": 1.3722105152081563e-05, "loss": 1.9799, "step": 16835 }, { "epoch": 0.54319424251529, "grad_norm": 0.328125, "learning_rate": 1.3720537146513078e-05, "loss": 1.9723, "step": 16836 }, { "epoch": 0.5432265063690864, "grad_norm": 0.359375, "learning_rate": 1.3718969155027999e-05, "loss": 1.969, "step": 16837 }, { "epoch": 0.5432587702228827, "grad_norm": 0.33203125, "learning_rate": 1.3717401177643591e-05, "loss": 1.9573, "step": 16838 }, { "epoch": 0.5432910340766791, "grad_norm": 0.328125, "learning_rate": 1.371583321437711e-05, "loss": 1.9634, "step": 16839 }, { "epoch": 0.5433232979304754, "grad_norm": 0.333984375, "learning_rate": 1.3714265265245816e-05, "loss": 1.9557, "step": 16840 }, { "epoch": 0.5433555617842718, "grad_norm": 0.34375, "learning_rate": 1.371269733026697e-05, "loss": 1.9873, "step": 16841 }, { "epoch": 0.5433878256380681, "grad_norm": 0.337890625, "learning_rate": 1.3711129409457825e-05, "loss": 1.9442, "step": 16842 }, { "epoch": 0.5434200894918645, "grad_norm": 0.34375, "learning_rate": 1.3709561502835644e-05, "loss": 2.0028, "step": 16843 }, { "epoch": 0.5434523533456608, "grad_norm": 0.35546875, "learning_rate": 1.3707993610417689e-05, "loss": 1.9946, "step": 16844 }, { "epoch": 0.5434846171994572, "grad_norm": 0.328125, "learning_rate": 1.3706425732221204e-05, "loss": 1.9835, "step": 16845 }, { "epoch": 0.5435168810532535, "grad_norm": 0.33984375, "learning_rate": 1.3704857868263461e-05, "loss": 1.9879, "step": 16846 }, { "epoch": 0.5435491449070499, "grad_norm": 0.333984375, "learning_rate": 1.370329001856172e-05, "loss": 1.936, "step": 16847 }, { "epoch": 0.5435814087608462, "grad_norm": 0.341796875, "learning_rate": 1.3701722183133224e-05, "loss": 1.9728, "step": 16848 }, { "epoch": 0.5436136726146426, "grad_norm": 0.328125, "learning_rate": 1.3700154361995236e-05, "loss": 1.9757, "step": 16849 }, { "epoch": 0.5436459364684388, "grad_norm": 0.345703125, "learning_rate": 1.3698586555165027e-05, "loss": 1.9591, "step": 16850 }, { "epoch": 0.5436782003222352, "grad_norm": 0.33984375, "learning_rate": 1.3697018762659837e-05, "loss": 1.9705, "step": 16851 }, { "epoch": 0.5437104641760316, "grad_norm": 0.3359375, "learning_rate": 1.369545098449693e-05, "loss": 1.9663, "step": 16852 }, { "epoch": 0.5437427280298279, "grad_norm": 0.353515625, "learning_rate": 1.3693883220693565e-05, "loss": 1.9908, "step": 16853 }, { "epoch": 0.5437749918836243, "grad_norm": 0.33984375, "learning_rate": 1.3692315471266992e-05, "loss": 1.9805, "step": 16854 }, { "epoch": 0.5438072557374206, "grad_norm": 0.341796875, "learning_rate": 1.3690747736234474e-05, "loss": 1.9957, "step": 16855 }, { "epoch": 0.543839519591217, "grad_norm": 0.349609375, "learning_rate": 1.368918001561327e-05, "loss": 1.9444, "step": 16856 }, { "epoch": 0.5438717834450133, "grad_norm": 0.341796875, "learning_rate": 1.3687612309420624e-05, "loss": 1.9385, "step": 16857 }, { "epoch": 0.5439040472988097, "grad_norm": 0.33203125, "learning_rate": 1.3686044617673806e-05, "loss": 1.9719, "step": 16858 }, { "epoch": 0.543936311152606, "grad_norm": 0.341796875, "learning_rate": 1.3684476940390068e-05, "loss": 1.995, "step": 16859 }, { "epoch": 0.5439685750064024, "grad_norm": 0.330078125, "learning_rate": 1.3682909277586656e-05, "loss": 1.9448, "step": 16860 }, { "epoch": 0.5440008388601987, "grad_norm": 0.369140625, "learning_rate": 1.368134162928084e-05, "loss": 1.9986, "step": 16861 }, { "epoch": 0.5440331027139951, "grad_norm": 0.326171875, "learning_rate": 1.367977399548987e-05, "loss": 1.9514, "step": 16862 }, { "epoch": 0.5440653665677914, "grad_norm": 0.345703125, "learning_rate": 1.3678206376230997e-05, "loss": 1.9367, "step": 16863 }, { "epoch": 0.5440976304215878, "grad_norm": 0.341796875, "learning_rate": 1.367663877152148e-05, "loss": 1.9933, "step": 16864 }, { "epoch": 0.544129894275384, "grad_norm": 0.349609375, "learning_rate": 1.367507118137858e-05, "loss": 1.9835, "step": 16865 }, { "epoch": 0.5441621581291805, "grad_norm": 0.328125, "learning_rate": 1.367350360581954e-05, "loss": 1.9908, "step": 16866 }, { "epoch": 0.5441944219829767, "grad_norm": 0.34375, "learning_rate": 1.3671936044861622e-05, "loss": 2.0002, "step": 16867 }, { "epoch": 0.5442266858367731, "grad_norm": 0.349609375, "learning_rate": 1.3670368498522078e-05, "loss": 1.9778, "step": 16868 }, { "epoch": 0.5442589496905694, "grad_norm": 0.33203125, "learning_rate": 1.3668800966818167e-05, "loss": 2.0044, "step": 16869 }, { "epoch": 0.5442912135443658, "grad_norm": 0.3515625, "learning_rate": 1.3667233449767136e-05, "loss": 1.9814, "step": 16870 }, { "epoch": 0.5443234773981622, "grad_norm": 0.337890625, "learning_rate": 1.3665665947386244e-05, "loss": 1.9664, "step": 16871 }, { "epoch": 0.5443557412519585, "grad_norm": 0.3671875, "learning_rate": 1.366409845969275e-05, "loss": 1.9689, "step": 16872 }, { "epoch": 0.5443880051057549, "grad_norm": 0.330078125, "learning_rate": 1.3662530986703893e-05, "loss": 1.9976, "step": 16873 }, { "epoch": 0.5444202689595512, "grad_norm": 0.376953125, "learning_rate": 1.366096352843694e-05, "loss": 1.9488, "step": 16874 }, { "epoch": 0.5444525328133476, "grad_norm": 0.33203125, "learning_rate": 1.3659396084909145e-05, "loss": 1.9494, "step": 16875 }, { "epoch": 0.5444847966671439, "grad_norm": 0.333984375, "learning_rate": 1.3657828656137747e-05, "loss": 1.9731, "step": 16876 }, { "epoch": 0.5445170605209403, "grad_norm": 0.3671875, "learning_rate": 1.365626124214001e-05, "loss": 1.9882, "step": 16877 }, { "epoch": 0.5445493243747366, "grad_norm": 0.333984375, "learning_rate": 1.3654693842933192e-05, "loss": 2.0127, "step": 16878 }, { "epoch": 0.544581588228533, "grad_norm": 0.349609375, "learning_rate": 1.3653126458534533e-05, "loss": 1.9365, "step": 16879 }, { "epoch": 0.5446138520823293, "grad_norm": 0.3203125, "learning_rate": 1.3651559088961288e-05, "loss": 1.9436, "step": 16880 }, { "epoch": 0.5446461159361257, "grad_norm": 0.33203125, "learning_rate": 1.3649991734230724e-05, "loss": 1.9794, "step": 16881 }, { "epoch": 0.544678379789922, "grad_norm": 0.33203125, "learning_rate": 1.3648424394360075e-05, "loss": 1.9939, "step": 16882 }, { "epoch": 0.5447106436437184, "grad_norm": 0.345703125, "learning_rate": 1.36468570693666e-05, "loss": 2.019, "step": 16883 }, { "epoch": 0.5447429074975146, "grad_norm": 0.328125, "learning_rate": 1.3645289759267558e-05, "loss": 1.9808, "step": 16884 }, { "epoch": 0.544775171351311, "grad_norm": 0.341796875, "learning_rate": 1.3643722464080188e-05, "loss": 1.9768, "step": 16885 }, { "epoch": 0.5448074352051073, "grad_norm": 0.34765625, "learning_rate": 1.364215518382175e-05, "loss": 1.9913, "step": 16886 }, { "epoch": 0.5448396990589037, "grad_norm": 0.375, "learning_rate": 1.3640587918509496e-05, "loss": 1.9934, "step": 16887 }, { "epoch": 0.5448719629127, "grad_norm": 0.3515625, "learning_rate": 1.3639020668160672e-05, "loss": 2.0009, "step": 16888 }, { "epoch": 0.5449042267664964, "grad_norm": 0.349609375, "learning_rate": 1.3637453432792532e-05, "loss": 1.9563, "step": 16889 }, { "epoch": 0.5449364906202927, "grad_norm": 0.333984375, "learning_rate": 1.363588621242233e-05, "loss": 2.0107, "step": 16890 }, { "epoch": 0.5449687544740891, "grad_norm": 0.361328125, "learning_rate": 1.3634319007067312e-05, "loss": 1.9621, "step": 16891 }, { "epoch": 0.5450010183278855, "grad_norm": 0.345703125, "learning_rate": 1.363275181674473e-05, "loss": 1.9976, "step": 16892 }, { "epoch": 0.5450332821816818, "grad_norm": 0.361328125, "learning_rate": 1.3631184641471836e-05, "loss": 1.9594, "step": 16893 }, { "epoch": 0.5450655460354782, "grad_norm": 0.341796875, "learning_rate": 1.362961748126588e-05, "loss": 1.9625, "step": 16894 }, { "epoch": 0.5450978098892745, "grad_norm": 0.345703125, "learning_rate": 1.362805033614411e-05, "loss": 2.0093, "step": 16895 }, { "epoch": 0.5451300737430709, "grad_norm": 0.341796875, "learning_rate": 1.362648320612378e-05, "loss": 1.9702, "step": 16896 }, { "epoch": 0.5451623375968672, "grad_norm": 0.341796875, "learning_rate": 1.3624916091222136e-05, "loss": 1.9924, "step": 16897 }, { "epoch": 0.5451946014506636, "grad_norm": 0.333984375, "learning_rate": 1.3623348991456427e-05, "loss": 1.9566, "step": 16898 }, { "epoch": 0.5452268653044599, "grad_norm": 0.333984375, "learning_rate": 1.3621781906843914e-05, "loss": 1.9688, "step": 16899 }, { "epoch": 0.5452591291582563, "grad_norm": 0.357421875, "learning_rate": 1.3620214837401826e-05, "loss": 2.0253, "step": 16900 }, { "epoch": 0.5452913930120525, "grad_norm": 0.318359375, "learning_rate": 1.3618647783147426e-05, "loss": 1.956, "step": 16901 }, { "epoch": 0.5453236568658489, "grad_norm": 0.33984375, "learning_rate": 1.361708074409796e-05, "loss": 1.9607, "step": 16902 }, { "epoch": 0.5453559207196452, "grad_norm": 0.34765625, "learning_rate": 1.3615513720270685e-05, "loss": 1.9457, "step": 16903 }, { "epoch": 0.5453881845734416, "grad_norm": 0.337890625, "learning_rate": 1.3613946711682831e-05, "loss": 1.9792, "step": 16904 }, { "epoch": 0.5454204484272379, "grad_norm": 0.3359375, "learning_rate": 1.361237971835166e-05, "loss": 1.9711, "step": 16905 }, { "epoch": 0.5454527122810343, "grad_norm": 0.32421875, "learning_rate": 1.3610812740294426e-05, "loss": 2.0037, "step": 16906 }, { "epoch": 0.5454849761348306, "grad_norm": 0.330078125, "learning_rate": 1.360924577752836e-05, "loss": 1.9687, "step": 16907 }, { "epoch": 0.545517239988627, "grad_norm": 0.326171875, "learning_rate": 1.3607678830070716e-05, "loss": 1.9434, "step": 16908 }, { "epoch": 0.5455495038424233, "grad_norm": 0.328125, "learning_rate": 1.3606111897938755e-05, "loss": 1.9533, "step": 16909 }, { "epoch": 0.5455817676962197, "grad_norm": 0.341796875, "learning_rate": 1.3604544981149707e-05, "loss": 1.9927, "step": 16910 }, { "epoch": 0.545614031550016, "grad_norm": 0.353515625, "learning_rate": 1.3602978079720825e-05, "loss": 1.9976, "step": 16911 }, { "epoch": 0.5456462954038124, "grad_norm": 0.359375, "learning_rate": 1.3601411193669367e-05, "loss": 1.9872, "step": 16912 }, { "epoch": 0.5456785592576088, "grad_norm": 0.3515625, "learning_rate": 1.3599844323012564e-05, "loss": 1.9529, "step": 16913 }, { "epoch": 0.5457108231114051, "grad_norm": 0.345703125, "learning_rate": 1.359827746776767e-05, "loss": 1.9759, "step": 16914 }, { "epoch": 0.5457430869652015, "grad_norm": 0.345703125, "learning_rate": 1.3596710627951937e-05, "loss": 1.9801, "step": 16915 }, { "epoch": 0.5457753508189978, "grad_norm": 0.359375, "learning_rate": 1.3595143803582602e-05, "loss": 1.9915, "step": 16916 }, { "epoch": 0.5458076146727942, "grad_norm": 0.341796875, "learning_rate": 1.3593576994676918e-05, "loss": 1.9726, "step": 16917 }, { "epoch": 0.5458398785265904, "grad_norm": 0.36328125, "learning_rate": 1.359201020125213e-05, "loss": 1.9878, "step": 16918 }, { "epoch": 0.5458721423803868, "grad_norm": 0.35546875, "learning_rate": 1.3590443423325482e-05, "loss": 1.9827, "step": 16919 }, { "epoch": 0.5459044062341831, "grad_norm": 0.34375, "learning_rate": 1.358887666091422e-05, "loss": 1.958, "step": 16920 }, { "epoch": 0.5459366700879795, "grad_norm": 0.3671875, "learning_rate": 1.3587309914035597e-05, "loss": 1.9798, "step": 16921 }, { "epoch": 0.5459689339417758, "grad_norm": 0.3515625, "learning_rate": 1.358574318270685e-05, "loss": 1.9349, "step": 16922 }, { "epoch": 0.5460011977955722, "grad_norm": 0.341796875, "learning_rate": 1.3584176466945226e-05, "loss": 1.9783, "step": 16923 }, { "epoch": 0.5460334616493685, "grad_norm": 0.361328125, "learning_rate": 1.3582609766767975e-05, "loss": 1.9922, "step": 16924 }, { "epoch": 0.5460657255031649, "grad_norm": 0.3515625, "learning_rate": 1.3581043082192337e-05, "loss": 1.951, "step": 16925 }, { "epoch": 0.5460979893569612, "grad_norm": 0.359375, "learning_rate": 1.3579476413235557e-05, "loss": 1.9607, "step": 16926 }, { "epoch": 0.5461302532107576, "grad_norm": 0.345703125, "learning_rate": 1.3577909759914886e-05, "loss": 1.9796, "step": 16927 }, { "epoch": 0.5461625170645539, "grad_norm": 0.341796875, "learning_rate": 1.3576343122247561e-05, "loss": 1.9942, "step": 16928 }, { "epoch": 0.5461947809183503, "grad_norm": 0.357421875, "learning_rate": 1.3574776500250828e-05, "loss": 2.0108, "step": 16929 }, { "epoch": 0.5462270447721466, "grad_norm": 0.34375, "learning_rate": 1.3573209893941939e-05, "loss": 1.9917, "step": 16930 }, { "epoch": 0.546259308625943, "grad_norm": 0.341796875, "learning_rate": 1.3571643303338124e-05, "loss": 1.9804, "step": 16931 }, { "epoch": 0.5462915724797394, "grad_norm": 0.359375, "learning_rate": 1.3570076728456637e-05, "loss": 1.9556, "step": 16932 }, { "epoch": 0.5463238363335357, "grad_norm": 0.36328125, "learning_rate": 1.3568510169314722e-05, "loss": 1.9641, "step": 16933 }, { "epoch": 0.546356100187332, "grad_norm": 0.341796875, "learning_rate": 1.3566943625929623e-05, "loss": 1.9609, "step": 16934 }, { "epoch": 0.5463883640411283, "grad_norm": 0.345703125, "learning_rate": 1.3565377098318573e-05, "loss": 1.943, "step": 16935 }, { "epoch": 0.5464206278949247, "grad_norm": 0.3359375, "learning_rate": 1.3563810586498824e-05, "loss": 1.9489, "step": 16936 }, { "epoch": 0.546452891748721, "grad_norm": 0.34375, "learning_rate": 1.3562244090487628e-05, "loss": 1.9636, "step": 16937 }, { "epoch": 0.5464851556025174, "grad_norm": 0.333984375, "learning_rate": 1.3560677610302208e-05, "loss": 1.976, "step": 16938 }, { "epoch": 0.5465174194563137, "grad_norm": 0.3359375, "learning_rate": 1.3559111145959813e-05, "loss": 1.9877, "step": 16939 }, { "epoch": 0.5465496833101101, "grad_norm": 0.341796875, "learning_rate": 1.35575446974777e-05, "loss": 1.9786, "step": 16940 }, { "epoch": 0.5465819471639064, "grad_norm": 0.34375, "learning_rate": 1.3555978264873093e-05, "loss": 1.9557, "step": 16941 }, { "epoch": 0.5466142110177028, "grad_norm": 0.341796875, "learning_rate": 1.3554411848163238e-05, "loss": 1.9951, "step": 16942 }, { "epoch": 0.5466464748714991, "grad_norm": 0.345703125, "learning_rate": 1.355284544736539e-05, "loss": 1.978, "step": 16943 }, { "epoch": 0.5466787387252955, "grad_norm": 0.3515625, "learning_rate": 1.3551279062496775e-05, "loss": 2.0002, "step": 16944 }, { "epoch": 0.5467110025790918, "grad_norm": 0.333984375, "learning_rate": 1.3549712693574641e-05, "loss": 1.9742, "step": 16945 }, { "epoch": 0.5467432664328882, "grad_norm": 0.337890625, "learning_rate": 1.3548146340616232e-05, "loss": 1.9693, "step": 16946 }, { "epoch": 0.5467755302866845, "grad_norm": 0.35546875, "learning_rate": 1.3546580003638784e-05, "loss": 1.9813, "step": 16947 }, { "epoch": 0.5468077941404809, "grad_norm": 0.34765625, "learning_rate": 1.354501368265954e-05, "loss": 2.0075, "step": 16948 }, { "epoch": 0.5468400579942772, "grad_norm": 0.3359375, "learning_rate": 1.3543447377695744e-05, "loss": 1.956, "step": 16949 }, { "epoch": 0.5468723218480736, "grad_norm": 0.353515625, "learning_rate": 1.3541881088764633e-05, "loss": 1.9809, "step": 16950 }, { "epoch": 0.5469045857018698, "grad_norm": 0.333984375, "learning_rate": 1.3540314815883446e-05, "loss": 1.963, "step": 16951 }, { "epoch": 0.5469368495556662, "grad_norm": 0.326171875, "learning_rate": 1.3538748559069432e-05, "loss": 2.0066, "step": 16952 }, { "epoch": 0.5469691134094626, "grad_norm": 0.337890625, "learning_rate": 1.3537182318339822e-05, "loss": 1.942, "step": 16953 }, { "epoch": 0.5470013772632589, "grad_norm": 0.328125, "learning_rate": 1.353561609371186e-05, "loss": 1.983, "step": 16954 }, { "epoch": 0.5470336411170553, "grad_norm": 0.33984375, "learning_rate": 1.3534049885202786e-05, "loss": 1.9791, "step": 16955 }, { "epoch": 0.5470659049708516, "grad_norm": 0.345703125, "learning_rate": 1.353248369282984e-05, "loss": 2.001, "step": 16956 }, { "epoch": 0.547098168824648, "grad_norm": 0.32421875, "learning_rate": 1.353091751661026e-05, "loss": 1.9659, "step": 16957 }, { "epoch": 0.5471304326784443, "grad_norm": 0.33203125, "learning_rate": 1.3529351356561286e-05, "loss": 1.972, "step": 16958 }, { "epoch": 0.5471626965322407, "grad_norm": 0.337890625, "learning_rate": 1.3527785212700156e-05, "loss": 1.9648, "step": 16959 }, { "epoch": 0.547194960386037, "grad_norm": 0.322265625, "learning_rate": 1.3526219085044112e-05, "loss": 1.9559, "step": 16960 }, { "epoch": 0.5472272242398334, "grad_norm": 0.345703125, "learning_rate": 1.3524652973610395e-05, "loss": 1.9838, "step": 16961 }, { "epoch": 0.5472594880936297, "grad_norm": 0.330078125, "learning_rate": 1.352308687841623e-05, "loss": 1.9908, "step": 16962 }, { "epoch": 0.5472917519474261, "grad_norm": 0.33203125, "learning_rate": 1.352152079947887e-05, "loss": 1.9677, "step": 16963 }, { "epoch": 0.5473240158012224, "grad_norm": 0.359375, "learning_rate": 1.3519954736815553e-05, "loss": 1.9643, "step": 16964 }, { "epoch": 0.5473562796550188, "grad_norm": 0.341796875, "learning_rate": 1.3518388690443506e-05, "loss": 1.9985, "step": 16965 }, { "epoch": 0.547388543508815, "grad_norm": 0.365234375, "learning_rate": 1.3516822660379972e-05, "loss": 2.0146, "step": 16966 }, { "epoch": 0.5474208073626115, "grad_norm": 0.3515625, "learning_rate": 1.3515256646642193e-05, "loss": 1.995, "step": 16967 }, { "epoch": 0.5474530712164077, "grad_norm": 0.353515625, "learning_rate": 1.351369064924741e-05, "loss": 1.9998, "step": 16968 }, { "epoch": 0.5474853350702041, "grad_norm": 0.345703125, "learning_rate": 1.3512124668212847e-05, "loss": 1.962, "step": 16969 }, { "epoch": 0.5475175989240004, "grad_norm": 0.345703125, "learning_rate": 1.3510558703555745e-05, "loss": 1.9789, "step": 16970 }, { "epoch": 0.5475498627777968, "grad_norm": 0.328125, "learning_rate": 1.3508992755293354e-05, "loss": 1.9517, "step": 16971 }, { "epoch": 0.5475821266315931, "grad_norm": 0.341796875, "learning_rate": 1.3507426823442895e-05, "loss": 1.9811, "step": 16972 }, { "epoch": 0.5476143904853895, "grad_norm": 0.341796875, "learning_rate": 1.350586090802161e-05, "loss": 1.9959, "step": 16973 }, { "epoch": 0.5476466543391859, "grad_norm": 0.333984375, "learning_rate": 1.350429500904674e-05, "loss": 1.9619, "step": 16974 }, { "epoch": 0.5476789181929822, "grad_norm": 0.3515625, "learning_rate": 1.3502729126535514e-05, "loss": 1.9889, "step": 16975 }, { "epoch": 0.5477111820467786, "grad_norm": 0.3359375, "learning_rate": 1.3501163260505172e-05, "loss": 1.9825, "step": 16976 }, { "epoch": 0.5477434459005749, "grad_norm": 0.33984375, "learning_rate": 1.3499597410972953e-05, "loss": 1.9845, "step": 16977 }, { "epoch": 0.5477757097543713, "grad_norm": 0.341796875, "learning_rate": 1.3498031577956086e-05, "loss": 1.9936, "step": 16978 }, { "epoch": 0.5478079736081676, "grad_norm": 0.333984375, "learning_rate": 1.349646576147181e-05, "loss": 1.9845, "step": 16979 }, { "epoch": 0.547840237461964, "grad_norm": 0.33984375, "learning_rate": 1.3494899961537362e-05, "loss": 1.9653, "step": 16980 }, { "epoch": 0.5478725013157603, "grad_norm": 0.337890625, "learning_rate": 1.3493334178169975e-05, "loss": 1.9645, "step": 16981 }, { "epoch": 0.5479047651695567, "grad_norm": 0.32421875, "learning_rate": 1.3491768411386881e-05, "loss": 1.9652, "step": 16982 }, { "epoch": 0.547937029023353, "grad_norm": 0.33203125, "learning_rate": 1.3490202661205324e-05, "loss": 1.9861, "step": 16983 }, { "epoch": 0.5479692928771493, "grad_norm": 0.333984375, "learning_rate": 1.348863692764253e-05, "loss": 1.9745, "step": 16984 }, { "epoch": 0.5480015567309456, "grad_norm": 0.328125, "learning_rate": 1.3487071210715736e-05, "loss": 1.9524, "step": 16985 }, { "epoch": 0.548033820584742, "grad_norm": 0.330078125, "learning_rate": 1.348550551044218e-05, "loss": 1.9864, "step": 16986 }, { "epoch": 0.5480660844385383, "grad_norm": 0.3359375, "learning_rate": 1.3483939826839089e-05, "loss": 1.9876, "step": 16987 }, { "epoch": 0.5480983482923347, "grad_norm": 0.337890625, "learning_rate": 1.3482374159923702e-05, "loss": 2.003, "step": 16988 }, { "epoch": 0.548130612146131, "grad_norm": 0.333984375, "learning_rate": 1.3480808509713255e-05, "loss": 1.9656, "step": 16989 }, { "epoch": 0.5481628759999274, "grad_norm": 0.33203125, "learning_rate": 1.3479242876224972e-05, "loss": 1.9846, "step": 16990 }, { "epoch": 0.5481951398537237, "grad_norm": 0.33203125, "learning_rate": 1.3477677259476094e-05, "loss": 1.9762, "step": 16991 }, { "epoch": 0.5482274037075201, "grad_norm": 0.33203125, "learning_rate": 1.3476111659483859e-05, "loss": 1.9951, "step": 16992 }, { "epoch": 0.5482596675613165, "grad_norm": 0.3359375, "learning_rate": 1.3474546076265483e-05, "loss": 2.0076, "step": 16993 }, { "epoch": 0.5482919314151128, "grad_norm": 0.33203125, "learning_rate": 1.3472980509838214e-05, "loss": 1.9293, "step": 16994 }, { "epoch": 0.5483241952689092, "grad_norm": 0.3359375, "learning_rate": 1.3471414960219286e-05, "loss": 1.965, "step": 16995 }, { "epoch": 0.5483564591227055, "grad_norm": 0.3359375, "learning_rate": 1.346984942742592e-05, "loss": 1.9812, "step": 16996 }, { "epoch": 0.5483887229765019, "grad_norm": 0.333984375, "learning_rate": 1.346828391147535e-05, "loss": 2.0, "step": 16997 }, { "epoch": 0.5484209868302982, "grad_norm": 0.34375, "learning_rate": 1.3466718412384821e-05, "loss": 1.9647, "step": 16998 }, { "epoch": 0.5484532506840946, "grad_norm": 0.328125, "learning_rate": 1.346515293017155e-05, "loss": 1.9916, "step": 16999 }, { "epoch": 0.5484855145378909, "grad_norm": 0.34765625, "learning_rate": 1.3463587464852773e-05, "loss": 1.952, "step": 17000 }, { "epoch": 0.5485177783916872, "grad_norm": 0.330078125, "learning_rate": 1.3462022016445722e-05, "loss": 1.9855, "step": 17001 }, { "epoch": 0.5485500422454835, "grad_norm": 0.345703125, "learning_rate": 1.3460456584967637e-05, "loss": 1.9648, "step": 17002 }, { "epoch": 0.5485823060992799, "grad_norm": 0.337890625, "learning_rate": 1.3458891170435736e-05, "loss": 1.9805, "step": 17003 }, { "epoch": 0.5486145699530762, "grad_norm": 0.3359375, "learning_rate": 1.3457325772867256e-05, "loss": 1.9938, "step": 17004 }, { "epoch": 0.5486468338068726, "grad_norm": 0.333984375, "learning_rate": 1.345576039227943e-05, "loss": 1.9866, "step": 17005 }, { "epoch": 0.5486790976606689, "grad_norm": 0.3515625, "learning_rate": 1.3454195028689482e-05, "loss": 1.9517, "step": 17006 }, { "epoch": 0.5487113615144653, "grad_norm": 0.330078125, "learning_rate": 1.3452629682114646e-05, "loss": 1.966, "step": 17007 }, { "epoch": 0.5487436253682616, "grad_norm": 0.353515625, "learning_rate": 1.3451064352572157e-05, "loss": 1.9942, "step": 17008 }, { "epoch": 0.548775889222058, "grad_norm": 0.3359375, "learning_rate": 1.3449499040079235e-05, "loss": 1.9957, "step": 17009 }, { "epoch": 0.5488081530758543, "grad_norm": 0.328125, "learning_rate": 1.3447933744653117e-05, "loss": 1.9782, "step": 17010 }, { "epoch": 0.5488404169296507, "grad_norm": 0.33203125, "learning_rate": 1.3446368466311034e-05, "loss": 1.9678, "step": 17011 }, { "epoch": 0.548872680783447, "grad_norm": 0.328125, "learning_rate": 1.3444803205070207e-05, "loss": 1.9865, "step": 17012 }, { "epoch": 0.5489049446372434, "grad_norm": 0.32421875, "learning_rate": 1.3443237960947874e-05, "loss": 1.9416, "step": 17013 }, { "epoch": 0.5489372084910398, "grad_norm": 0.318359375, "learning_rate": 1.3441672733961261e-05, "loss": 1.9234, "step": 17014 }, { "epoch": 0.5489694723448361, "grad_norm": 0.326171875, "learning_rate": 1.3440107524127595e-05, "loss": 1.9917, "step": 17015 }, { "epoch": 0.5490017361986325, "grad_norm": 0.330078125, "learning_rate": 1.3438542331464106e-05, "loss": 1.989, "step": 17016 }, { "epoch": 0.5490340000524288, "grad_norm": 0.328125, "learning_rate": 1.3436977155988028e-05, "loss": 1.9622, "step": 17017 }, { "epoch": 0.5490662639062251, "grad_norm": 0.326171875, "learning_rate": 1.3435411997716577e-05, "loss": 1.9993, "step": 17018 }, { "epoch": 0.5490985277600214, "grad_norm": 0.35546875, "learning_rate": 1.343384685666699e-05, "loss": 1.9395, "step": 17019 }, { "epoch": 0.5491307916138178, "grad_norm": 0.3359375, "learning_rate": 1.34322817328565e-05, "loss": 1.9806, "step": 17020 }, { "epoch": 0.5491630554676141, "grad_norm": 0.33984375, "learning_rate": 1.343071662630232e-05, "loss": 1.9676, "step": 17021 }, { "epoch": 0.5491953193214105, "grad_norm": 0.337890625, "learning_rate": 1.3429151537021687e-05, "loss": 1.9843, "step": 17022 }, { "epoch": 0.5492275831752068, "grad_norm": 0.37890625, "learning_rate": 1.3427586465031834e-05, "loss": 2.0111, "step": 17023 }, { "epoch": 0.5492598470290032, "grad_norm": 0.333984375, "learning_rate": 1.3426021410349974e-05, "loss": 1.9725, "step": 17024 }, { "epoch": 0.5492921108827995, "grad_norm": 0.34765625, "learning_rate": 1.3424456372993336e-05, "loss": 1.9755, "step": 17025 }, { "epoch": 0.5493243747365959, "grad_norm": 0.34375, "learning_rate": 1.3422891352979165e-05, "loss": 1.9641, "step": 17026 }, { "epoch": 0.5493566385903922, "grad_norm": 0.337890625, "learning_rate": 1.3421326350324668e-05, "loss": 1.9844, "step": 17027 }, { "epoch": 0.5493889024441886, "grad_norm": 0.349609375, "learning_rate": 1.3419761365047071e-05, "loss": 1.9548, "step": 17028 }, { "epoch": 0.5494211662979849, "grad_norm": 0.330078125, "learning_rate": 1.341819639716362e-05, "loss": 1.9614, "step": 17029 }, { "epoch": 0.5494534301517813, "grad_norm": 0.349609375, "learning_rate": 1.3416631446691521e-05, "loss": 2.0162, "step": 17030 }, { "epoch": 0.5494856940055776, "grad_norm": 0.3359375, "learning_rate": 1.3415066513648008e-05, "loss": 1.9469, "step": 17031 }, { "epoch": 0.549517957859374, "grad_norm": 0.33984375, "learning_rate": 1.3413501598050308e-05, "loss": 1.9752, "step": 17032 }, { "epoch": 0.5495502217131704, "grad_norm": 0.341796875, "learning_rate": 1.3411936699915643e-05, "loss": 1.9713, "step": 17033 }, { "epoch": 0.5495824855669666, "grad_norm": 0.33984375, "learning_rate": 1.3410371819261238e-05, "loss": 1.9844, "step": 17034 }, { "epoch": 0.549614749420763, "grad_norm": 0.330078125, "learning_rate": 1.3408806956104319e-05, "loss": 1.9627, "step": 17035 }, { "epoch": 0.5496470132745593, "grad_norm": 0.33984375, "learning_rate": 1.3407242110462116e-05, "loss": 1.9853, "step": 17036 }, { "epoch": 0.5496792771283557, "grad_norm": 0.349609375, "learning_rate": 1.3405677282351845e-05, "loss": 1.9855, "step": 17037 }, { "epoch": 0.549711540982152, "grad_norm": 0.33984375, "learning_rate": 1.3404112471790735e-05, "loss": 1.9814, "step": 17038 }, { "epoch": 0.5497438048359484, "grad_norm": 0.337890625, "learning_rate": 1.3402547678796015e-05, "loss": 1.977, "step": 17039 }, { "epoch": 0.5497760686897447, "grad_norm": 0.333984375, "learning_rate": 1.3400982903384897e-05, "loss": 1.9919, "step": 17040 }, { "epoch": 0.5498083325435411, "grad_norm": 0.33203125, "learning_rate": 1.3399418145574616e-05, "loss": 1.9825, "step": 17041 }, { "epoch": 0.5498405963973374, "grad_norm": 0.345703125, "learning_rate": 1.3397853405382392e-05, "loss": 1.9942, "step": 17042 }, { "epoch": 0.5498728602511338, "grad_norm": 0.333984375, "learning_rate": 1.3396288682825447e-05, "loss": 1.9731, "step": 17043 }, { "epoch": 0.5499051241049301, "grad_norm": 0.3515625, "learning_rate": 1.3394723977921005e-05, "loss": 1.9728, "step": 17044 }, { "epoch": 0.5499373879587265, "grad_norm": 0.3515625, "learning_rate": 1.3393159290686293e-05, "loss": 1.9798, "step": 17045 }, { "epoch": 0.5499696518125228, "grad_norm": 0.33984375, "learning_rate": 1.339159462113853e-05, "loss": 1.9731, "step": 17046 }, { "epoch": 0.5500019156663192, "grad_norm": 0.34375, "learning_rate": 1.3390029969294935e-05, "loss": 1.9725, "step": 17047 }, { "epoch": 0.5500341795201155, "grad_norm": 0.341796875, "learning_rate": 1.3388465335172741e-05, "loss": 1.9461, "step": 17048 }, { "epoch": 0.5500664433739119, "grad_norm": 0.33203125, "learning_rate": 1.3386900718789161e-05, "loss": 1.9674, "step": 17049 }, { "epoch": 0.5500987072277082, "grad_norm": 0.33984375, "learning_rate": 1.3385336120161421e-05, "loss": 1.9699, "step": 17050 }, { "epoch": 0.5501309710815045, "grad_norm": 0.333984375, "learning_rate": 1.3383771539306748e-05, "loss": 1.9769, "step": 17051 }, { "epoch": 0.5501632349353008, "grad_norm": 0.32421875, "learning_rate": 1.338220697624235e-05, "loss": 1.938, "step": 17052 }, { "epoch": 0.5501954987890972, "grad_norm": 0.341796875, "learning_rate": 1.338064243098546e-05, "loss": 2.0064, "step": 17053 }, { "epoch": 0.5502277626428936, "grad_norm": 0.328125, "learning_rate": 1.3379077903553302e-05, "loss": 1.9817, "step": 17054 }, { "epoch": 0.5502600264966899, "grad_norm": 0.333984375, "learning_rate": 1.3377513393963087e-05, "loss": 1.958, "step": 17055 }, { "epoch": 0.5502922903504863, "grad_norm": 0.33203125, "learning_rate": 1.3375948902232034e-05, "loss": 1.9814, "step": 17056 }, { "epoch": 0.5503245542042826, "grad_norm": 0.345703125, "learning_rate": 1.3374384428377382e-05, "loss": 1.97, "step": 17057 }, { "epoch": 0.550356818058079, "grad_norm": 0.341796875, "learning_rate": 1.3372819972416335e-05, "loss": 1.9817, "step": 17058 }, { "epoch": 0.5503890819118753, "grad_norm": 0.326171875, "learning_rate": 1.3371255534366114e-05, "loss": 1.9885, "step": 17059 }, { "epoch": 0.5504213457656717, "grad_norm": 0.349609375, "learning_rate": 1.3369691114243952e-05, "loss": 1.9593, "step": 17060 }, { "epoch": 0.550453609619468, "grad_norm": 0.337890625, "learning_rate": 1.3368126712067056e-05, "loss": 1.9881, "step": 17061 }, { "epoch": 0.5504858734732644, "grad_norm": 0.361328125, "learning_rate": 1.336656232785265e-05, "loss": 1.9773, "step": 17062 }, { "epoch": 0.5505181373270607, "grad_norm": 0.349609375, "learning_rate": 1.3364997961617957e-05, "loss": 1.9749, "step": 17063 }, { "epoch": 0.5505504011808571, "grad_norm": 0.337890625, "learning_rate": 1.336343361338019e-05, "loss": 1.9362, "step": 17064 }, { "epoch": 0.5505826650346534, "grad_norm": 0.3671875, "learning_rate": 1.3361869283156572e-05, "loss": 1.9643, "step": 17065 }, { "epoch": 0.5506149288884498, "grad_norm": 0.349609375, "learning_rate": 1.3360304970964322e-05, "loss": 1.9683, "step": 17066 }, { "epoch": 0.550647192742246, "grad_norm": 0.349609375, "learning_rate": 1.3358740676820661e-05, "loss": 1.9495, "step": 17067 }, { "epoch": 0.5506794565960424, "grad_norm": 0.357421875, "learning_rate": 1.3357176400742803e-05, "loss": 1.9443, "step": 17068 }, { "epoch": 0.5507117204498387, "grad_norm": 0.330078125, "learning_rate": 1.3355612142747968e-05, "loss": 1.9825, "step": 17069 }, { "epoch": 0.5507439843036351, "grad_norm": 0.37109375, "learning_rate": 1.3354047902853377e-05, "loss": 1.9594, "step": 17070 }, { "epoch": 0.5507762481574314, "grad_norm": 0.349609375, "learning_rate": 1.3352483681076245e-05, "loss": 1.9824, "step": 17071 }, { "epoch": 0.5508085120112278, "grad_norm": 0.333984375, "learning_rate": 1.3350919477433788e-05, "loss": 2.0119, "step": 17072 }, { "epoch": 0.5508407758650241, "grad_norm": 0.384765625, "learning_rate": 1.3349355291943234e-05, "loss": 1.9795, "step": 17073 }, { "epoch": 0.5508730397188205, "grad_norm": 0.3515625, "learning_rate": 1.3347791124621786e-05, "loss": 1.9865, "step": 17074 }, { "epoch": 0.5509053035726169, "grad_norm": 0.3359375, "learning_rate": 1.3346226975486668e-05, "loss": 1.9586, "step": 17075 }, { "epoch": 0.5509375674264132, "grad_norm": 0.3515625, "learning_rate": 1.3344662844555102e-05, "loss": 2.0026, "step": 17076 }, { "epoch": 0.5509698312802096, "grad_norm": 0.361328125, "learning_rate": 1.3343098731844295e-05, "loss": 1.9887, "step": 17077 }, { "epoch": 0.5510020951340059, "grad_norm": 0.341796875, "learning_rate": 1.334153463737147e-05, "loss": 1.9202, "step": 17078 }, { "epoch": 0.5510343589878023, "grad_norm": 0.349609375, "learning_rate": 1.3339970561153844e-05, "loss": 1.9899, "step": 17079 }, { "epoch": 0.5510666228415986, "grad_norm": 0.33984375, "learning_rate": 1.3338406503208627e-05, "loss": 2.0073, "step": 17080 }, { "epoch": 0.551098886695395, "grad_norm": 0.3359375, "learning_rate": 1.3336842463553042e-05, "loss": 1.9486, "step": 17081 }, { "epoch": 0.5511311505491913, "grad_norm": 0.349609375, "learning_rate": 1.3335278442204305e-05, "loss": 1.9949, "step": 17082 }, { "epoch": 0.5511634144029877, "grad_norm": 0.322265625, "learning_rate": 1.3333714439179623e-05, "loss": 2.0009, "step": 17083 }, { "epoch": 0.551195678256784, "grad_norm": 0.357421875, "learning_rate": 1.3332150454496218e-05, "loss": 1.9667, "step": 17084 }, { "epoch": 0.5512279421105803, "grad_norm": 0.34765625, "learning_rate": 1.333058648817131e-05, "loss": 2.0303, "step": 17085 }, { "epoch": 0.5512602059643766, "grad_norm": 0.34375, "learning_rate": 1.3329022540222104e-05, "loss": 1.9502, "step": 17086 }, { "epoch": 0.551292469818173, "grad_norm": 0.35546875, "learning_rate": 1.3327458610665812e-05, "loss": 1.986, "step": 17087 }, { "epoch": 0.5513247336719693, "grad_norm": 0.3359375, "learning_rate": 1.332589469951967e-05, "loss": 1.9658, "step": 17088 }, { "epoch": 0.5513569975257657, "grad_norm": 0.3359375, "learning_rate": 1.3324330806800869e-05, "loss": 1.9804, "step": 17089 }, { "epoch": 0.551389261379562, "grad_norm": 0.365234375, "learning_rate": 1.3322766932526635e-05, "loss": 1.9636, "step": 17090 }, { "epoch": 0.5514215252333584, "grad_norm": 0.341796875, "learning_rate": 1.332120307671418e-05, "loss": 2.0072, "step": 17091 }, { "epoch": 0.5514537890871547, "grad_norm": 0.384765625, "learning_rate": 1.3319639239380713e-05, "loss": 1.959, "step": 17092 }, { "epoch": 0.5514860529409511, "grad_norm": 0.34375, "learning_rate": 1.3318075420543454e-05, "loss": 1.9404, "step": 17093 }, { "epoch": 0.5515183167947475, "grad_norm": 0.326171875, "learning_rate": 1.3316511620219617e-05, "loss": 1.9803, "step": 17094 }, { "epoch": 0.5515505806485438, "grad_norm": 0.35546875, "learning_rate": 1.3314947838426409e-05, "loss": 1.991, "step": 17095 }, { "epoch": 0.5515828445023402, "grad_norm": 0.369140625, "learning_rate": 1.3313384075181047e-05, "loss": 1.9749, "step": 17096 }, { "epoch": 0.5516151083561365, "grad_norm": 0.326171875, "learning_rate": 1.3311820330500745e-05, "loss": 1.9595, "step": 17097 }, { "epoch": 0.5516473722099329, "grad_norm": 0.34375, "learning_rate": 1.331025660440271e-05, "loss": 1.9462, "step": 17098 }, { "epoch": 0.5516796360637292, "grad_norm": 0.34765625, "learning_rate": 1.330869289690416e-05, "loss": 1.969, "step": 17099 }, { "epoch": 0.5517118999175256, "grad_norm": 0.34765625, "learning_rate": 1.3307129208022306e-05, "loss": 1.9754, "step": 17100 }, { "epoch": 0.5517441637713218, "grad_norm": 0.33203125, "learning_rate": 1.330556553777436e-05, "loss": 1.9962, "step": 17101 }, { "epoch": 0.5517764276251182, "grad_norm": 0.345703125, "learning_rate": 1.330400188617753e-05, "loss": 1.9991, "step": 17102 }, { "epoch": 0.5518086914789145, "grad_norm": 0.3359375, "learning_rate": 1.3302438253249032e-05, "loss": 1.9848, "step": 17103 }, { "epoch": 0.5518409553327109, "grad_norm": 0.328125, "learning_rate": 1.330087463900608e-05, "loss": 1.9522, "step": 17104 }, { "epoch": 0.5518732191865072, "grad_norm": 0.34765625, "learning_rate": 1.3299311043465875e-05, "loss": 1.9778, "step": 17105 }, { "epoch": 0.5519054830403036, "grad_norm": 0.3359375, "learning_rate": 1.3297747466645633e-05, "loss": 1.9755, "step": 17106 }, { "epoch": 0.5519377468940999, "grad_norm": 0.349609375, "learning_rate": 1.3296183908562573e-05, "loss": 2.0029, "step": 17107 }, { "epoch": 0.5519700107478963, "grad_norm": 0.3515625, "learning_rate": 1.3294620369233892e-05, "loss": 2.0126, "step": 17108 }, { "epoch": 0.5520022746016926, "grad_norm": 0.357421875, "learning_rate": 1.3293056848676807e-05, "loss": 1.9688, "step": 17109 }, { "epoch": 0.552034538455489, "grad_norm": 0.361328125, "learning_rate": 1.3291493346908533e-05, "loss": 1.983, "step": 17110 }, { "epoch": 0.5520668023092853, "grad_norm": 0.33203125, "learning_rate": 1.328992986394627e-05, "loss": 1.987, "step": 17111 }, { "epoch": 0.5520990661630817, "grad_norm": 0.349609375, "learning_rate": 1.328836639980723e-05, "loss": 1.986, "step": 17112 }, { "epoch": 0.552131330016878, "grad_norm": 0.328125, "learning_rate": 1.3286802954508635e-05, "loss": 2.0057, "step": 17113 }, { "epoch": 0.5521635938706744, "grad_norm": 0.33984375, "learning_rate": 1.328523952806767e-05, "loss": 1.9737, "step": 17114 }, { "epoch": 0.5521958577244708, "grad_norm": 0.333984375, "learning_rate": 1.3283676120501566e-05, "loss": 1.9265, "step": 17115 }, { "epoch": 0.5522281215782671, "grad_norm": 0.365234375, "learning_rate": 1.3282112731827527e-05, "loss": 1.9704, "step": 17116 }, { "epoch": 0.5522603854320635, "grad_norm": 0.341796875, "learning_rate": 1.3280549362062754e-05, "loss": 1.9711, "step": 17117 }, { "epoch": 0.5522926492858597, "grad_norm": 0.34765625, "learning_rate": 1.3278986011224457e-05, "loss": 1.9925, "step": 17118 }, { "epoch": 0.5523249131396561, "grad_norm": 0.333984375, "learning_rate": 1.3277422679329857e-05, "loss": 1.9454, "step": 17119 }, { "epoch": 0.5523571769934524, "grad_norm": 0.3359375, "learning_rate": 1.3275859366396145e-05, "loss": 1.9197, "step": 17120 }, { "epoch": 0.5523894408472488, "grad_norm": 0.33984375, "learning_rate": 1.3274296072440536e-05, "loss": 1.9492, "step": 17121 }, { "epoch": 0.5524217047010451, "grad_norm": 0.33203125, "learning_rate": 1.3272732797480243e-05, "loss": 1.9885, "step": 17122 }, { "epoch": 0.5524539685548415, "grad_norm": 0.3515625, "learning_rate": 1.3271169541532462e-05, "loss": 1.9718, "step": 17123 }, { "epoch": 0.5524862324086378, "grad_norm": 0.33203125, "learning_rate": 1.3269606304614409e-05, "loss": 1.9235, "step": 17124 }, { "epoch": 0.5525184962624342, "grad_norm": 0.337890625, "learning_rate": 1.3268043086743289e-05, "loss": 1.9799, "step": 17125 }, { "epoch": 0.5525507601162305, "grad_norm": 0.33984375, "learning_rate": 1.3266479887936307e-05, "loss": 1.9746, "step": 17126 }, { "epoch": 0.5525830239700269, "grad_norm": 0.333984375, "learning_rate": 1.326491670821067e-05, "loss": 1.964, "step": 17127 }, { "epoch": 0.5526152878238232, "grad_norm": 0.333984375, "learning_rate": 1.3263353547583589e-05, "loss": 1.9788, "step": 17128 }, { "epoch": 0.5526475516776196, "grad_norm": 0.34765625, "learning_rate": 1.3261790406072264e-05, "loss": 1.9754, "step": 17129 }, { "epoch": 0.5526798155314159, "grad_norm": 0.345703125, "learning_rate": 1.3260227283693902e-05, "loss": 1.9744, "step": 17130 }, { "epoch": 0.5527120793852123, "grad_norm": 0.337890625, "learning_rate": 1.3258664180465713e-05, "loss": 1.9925, "step": 17131 }, { "epoch": 0.5527443432390086, "grad_norm": 0.365234375, "learning_rate": 1.3257101096404897e-05, "loss": 2.0051, "step": 17132 }, { "epoch": 0.552776607092805, "grad_norm": 0.341796875, "learning_rate": 1.3255538031528661e-05, "loss": 1.9723, "step": 17133 }, { "epoch": 0.5528088709466014, "grad_norm": 0.35546875, "learning_rate": 1.3253974985854211e-05, "loss": 1.9979, "step": 17134 }, { "epoch": 0.5528411348003976, "grad_norm": 0.33984375, "learning_rate": 1.3252411959398755e-05, "loss": 1.9612, "step": 17135 }, { "epoch": 0.552873398654194, "grad_norm": 0.3359375, "learning_rate": 1.3250848952179492e-05, "loss": 1.9742, "step": 17136 }, { "epoch": 0.5529056625079903, "grad_norm": 0.333984375, "learning_rate": 1.3249285964213627e-05, "loss": 1.9836, "step": 17137 }, { "epoch": 0.5529379263617867, "grad_norm": 0.33984375, "learning_rate": 1.324772299551837e-05, "loss": 1.97, "step": 17138 }, { "epoch": 0.552970190215583, "grad_norm": 0.333984375, "learning_rate": 1.324616004611092e-05, "loss": 1.9843, "step": 17139 }, { "epoch": 0.5530024540693794, "grad_norm": 0.3359375, "learning_rate": 1.324459711600848e-05, "loss": 1.9893, "step": 17140 }, { "epoch": 0.5530347179231757, "grad_norm": 0.341796875, "learning_rate": 1.3243034205228261e-05, "loss": 1.9997, "step": 17141 }, { "epoch": 0.5530669817769721, "grad_norm": 0.341796875, "learning_rate": 1.3241471313787456e-05, "loss": 1.9497, "step": 17142 }, { "epoch": 0.5530992456307684, "grad_norm": 0.34765625, "learning_rate": 1.3239908441703272e-05, "loss": 1.9595, "step": 17143 }, { "epoch": 0.5531315094845648, "grad_norm": 0.33203125, "learning_rate": 1.3238345588992921e-05, "loss": 1.9778, "step": 17144 }, { "epoch": 0.5531637733383611, "grad_norm": 0.3359375, "learning_rate": 1.3236782755673589e-05, "loss": 1.9736, "step": 17145 }, { "epoch": 0.5531960371921575, "grad_norm": 0.34765625, "learning_rate": 1.323521994176249e-05, "loss": 1.9971, "step": 17146 }, { "epoch": 0.5532283010459538, "grad_norm": 0.345703125, "learning_rate": 1.323365714727683e-05, "loss": 2.0015, "step": 17147 }, { "epoch": 0.5532605648997502, "grad_norm": 0.333984375, "learning_rate": 1.3232094372233799e-05, "loss": 1.977, "step": 17148 }, { "epoch": 0.5532928287535465, "grad_norm": 0.3359375, "learning_rate": 1.3230531616650602e-05, "loss": 1.935, "step": 17149 }, { "epoch": 0.5533250926073429, "grad_norm": 0.341796875, "learning_rate": 1.3228968880544453e-05, "loss": 1.9624, "step": 17150 }, { "epoch": 0.5533573564611391, "grad_norm": 0.337890625, "learning_rate": 1.3227406163932537e-05, "loss": 1.9683, "step": 17151 }, { "epoch": 0.5533896203149355, "grad_norm": 0.328125, "learning_rate": 1.3225843466832062e-05, "loss": 1.9733, "step": 17152 }, { "epoch": 0.5534218841687318, "grad_norm": 0.349609375, "learning_rate": 1.3224280789260232e-05, "loss": 1.9692, "step": 17153 }, { "epoch": 0.5534541480225282, "grad_norm": 0.333984375, "learning_rate": 1.3222718131234246e-05, "loss": 1.9928, "step": 17154 }, { "epoch": 0.5534864118763246, "grad_norm": 0.3359375, "learning_rate": 1.3221155492771299e-05, "loss": 1.9745, "step": 17155 }, { "epoch": 0.5535186757301209, "grad_norm": 0.3203125, "learning_rate": 1.32195928738886e-05, "loss": 1.9456, "step": 17156 }, { "epoch": 0.5535509395839173, "grad_norm": 0.333984375, "learning_rate": 1.3218030274603345e-05, "loss": 1.9599, "step": 17157 }, { "epoch": 0.5535832034377136, "grad_norm": 0.330078125, "learning_rate": 1.3216467694932733e-05, "loss": 1.9532, "step": 17158 }, { "epoch": 0.55361546729151, "grad_norm": 0.326171875, "learning_rate": 1.3214905134893968e-05, "loss": 1.9838, "step": 17159 }, { "epoch": 0.5536477311453063, "grad_norm": 0.32421875, "learning_rate": 1.3213342594504242e-05, "loss": 1.9822, "step": 17160 }, { "epoch": 0.5536799949991027, "grad_norm": 0.34765625, "learning_rate": 1.3211780073780762e-05, "loss": 1.9566, "step": 17161 }, { "epoch": 0.553712258852899, "grad_norm": 0.330078125, "learning_rate": 1.3210217572740727e-05, "loss": 1.9618, "step": 17162 }, { "epoch": 0.5537445227066954, "grad_norm": 0.345703125, "learning_rate": 1.3208655091401328e-05, "loss": 1.969, "step": 17163 }, { "epoch": 0.5537767865604917, "grad_norm": 0.341796875, "learning_rate": 1.320709262977977e-05, "loss": 1.9617, "step": 17164 }, { "epoch": 0.5538090504142881, "grad_norm": 0.32421875, "learning_rate": 1.3205530187893249e-05, "loss": 2.0036, "step": 17165 }, { "epoch": 0.5538413142680844, "grad_norm": 0.345703125, "learning_rate": 1.3203967765758968e-05, "loss": 1.9931, "step": 17166 }, { "epoch": 0.5538735781218808, "grad_norm": 0.328125, "learning_rate": 1.3202405363394119e-05, "loss": 1.9651, "step": 17167 }, { "epoch": 0.553905841975677, "grad_norm": 0.34375, "learning_rate": 1.32008429808159e-05, "loss": 2.0004, "step": 17168 }, { "epoch": 0.5539381058294734, "grad_norm": 0.359375, "learning_rate": 1.3199280618041516e-05, "loss": 1.9545, "step": 17169 }, { "epoch": 0.5539703696832697, "grad_norm": 0.328125, "learning_rate": 1.3197718275088154e-05, "loss": 1.9925, "step": 17170 }, { "epoch": 0.5540026335370661, "grad_norm": 0.345703125, "learning_rate": 1.3196155951973017e-05, "loss": 1.9703, "step": 17171 }, { "epoch": 0.5540348973908624, "grad_norm": 0.326171875, "learning_rate": 1.319459364871331e-05, "loss": 1.9902, "step": 17172 }, { "epoch": 0.5540671612446588, "grad_norm": 0.3515625, "learning_rate": 1.3193031365326208e-05, "loss": 1.9839, "step": 17173 }, { "epoch": 0.5540994250984551, "grad_norm": 0.330078125, "learning_rate": 1.3191469101828925e-05, "loss": 1.9603, "step": 17174 }, { "epoch": 0.5541316889522515, "grad_norm": 0.3359375, "learning_rate": 1.318990685823866e-05, "loss": 1.9674, "step": 17175 }, { "epoch": 0.5541639528060479, "grad_norm": 0.33203125, "learning_rate": 1.3188344634572594e-05, "loss": 1.9716, "step": 17176 }, { "epoch": 0.5541962166598442, "grad_norm": 0.341796875, "learning_rate": 1.3186782430847928e-05, "loss": 1.9929, "step": 17177 }, { "epoch": 0.5542284805136406, "grad_norm": 0.32421875, "learning_rate": 1.3185220247081869e-05, "loss": 1.9552, "step": 17178 }, { "epoch": 0.5542607443674369, "grad_norm": 0.337890625, "learning_rate": 1.3183658083291598e-05, "loss": 1.9896, "step": 17179 }, { "epoch": 0.5542930082212333, "grad_norm": 0.326171875, "learning_rate": 1.3182095939494313e-05, "loss": 1.9511, "step": 17180 }, { "epoch": 0.5543252720750296, "grad_norm": 0.3671875, "learning_rate": 1.318053381570722e-05, "loss": 1.9563, "step": 17181 }, { "epoch": 0.554357535928826, "grad_norm": 0.34375, "learning_rate": 1.31789717119475e-05, "loss": 2.0027, "step": 17182 }, { "epoch": 0.5543897997826223, "grad_norm": 0.345703125, "learning_rate": 1.3177409628232353e-05, "loss": 1.98, "step": 17183 }, { "epoch": 0.5544220636364187, "grad_norm": 0.3359375, "learning_rate": 1.3175847564578978e-05, "loss": 1.9972, "step": 17184 }, { "epoch": 0.554454327490215, "grad_norm": 0.337890625, "learning_rate": 1.3174285521004558e-05, "loss": 2.0048, "step": 17185 }, { "epoch": 0.5544865913440113, "grad_norm": 0.326171875, "learning_rate": 1.3172723497526294e-05, "loss": 1.9488, "step": 17186 }, { "epoch": 0.5545188551978076, "grad_norm": 0.337890625, "learning_rate": 1.3171161494161383e-05, "loss": 1.9631, "step": 17187 }, { "epoch": 0.554551119051604, "grad_norm": 0.359375, "learning_rate": 1.316959951092701e-05, "loss": 1.9969, "step": 17188 }, { "epoch": 0.5545833829054003, "grad_norm": 0.3359375, "learning_rate": 1.3168037547840374e-05, "loss": 1.9924, "step": 17189 }, { "epoch": 0.5546156467591967, "grad_norm": 0.361328125, "learning_rate": 1.316647560491867e-05, "loss": 1.9715, "step": 17190 }, { "epoch": 0.554647910612993, "grad_norm": 0.333984375, "learning_rate": 1.3164913682179082e-05, "loss": 1.9935, "step": 17191 }, { "epoch": 0.5546801744667894, "grad_norm": 0.345703125, "learning_rate": 1.3163351779638809e-05, "loss": 1.9793, "step": 17192 }, { "epoch": 0.5547124383205857, "grad_norm": 0.326171875, "learning_rate": 1.3161789897315045e-05, "loss": 1.964, "step": 17193 }, { "epoch": 0.5547447021743821, "grad_norm": 0.3359375, "learning_rate": 1.3160228035224976e-05, "loss": 1.9584, "step": 17194 }, { "epoch": 0.5547769660281785, "grad_norm": 0.33203125, "learning_rate": 1.3158666193385796e-05, "loss": 1.9879, "step": 17195 }, { "epoch": 0.5548092298819748, "grad_norm": 0.322265625, "learning_rate": 1.3157104371814701e-05, "loss": 1.9807, "step": 17196 }, { "epoch": 0.5548414937357712, "grad_norm": 0.3359375, "learning_rate": 1.3155542570528877e-05, "loss": 1.9343, "step": 17197 }, { "epoch": 0.5548737575895675, "grad_norm": 0.33203125, "learning_rate": 1.3153980789545517e-05, "loss": 1.9663, "step": 17198 }, { "epoch": 0.5549060214433639, "grad_norm": 0.337890625, "learning_rate": 1.3152419028881811e-05, "loss": 1.9821, "step": 17199 }, { "epoch": 0.5549382852971602, "grad_norm": 0.3359375, "learning_rate": 1.3150857288554954e-05, "loss": 1.9987, "step": 17200 }, { "epoch": 0.5549705491509566, "grad_norm": 0.322265625, "learning_rate": 1.3149295568582132e-05, "loss": 1.9609, "step": 17201 }, { "epoch": 0.5550028130047528, "grad_norm": 0.32421875, "learning_rate": 1.3147733868980535e-05, "loss": 1.9505, "step": 17202 }, { "epoch": 0.5550350768585492, "grad_norm": 0.34765625, "learning_rate": 1.3146172189767362e-05, "loss": 1.9881, "step": 17203 }, { "epoch": 0.5550673407123455, "grad_norm": 0.322265625, "learning_rate": 1.3144610530959784e-05, "loss": 1.9477, "step": 17204 }, { "epoch": 0.5550996045661419, "grad_norm": 0.33203125, "learning_rate": 1.314304889257501e-05, "loss": 1.975, "step": 17205 }, { "epoch": 0.5551318684199382, "grad_norm": 0.32421875, "learning_rate": 1.3141487274630224e-05, "loss": 1.9874, "step": 17206 }, { "epoch": 0.5551641322737346, "grad_norm": 0.333984375, "learning_rate": 1.3139925677142607e-05, "loss": 1.998, "step": 17207 }, { "epoch": 0.5551963961275309, "grad_norm": 0.333984375, "learning_rate": 1.313836410012935e-05, "loss": 1.9791, "step": 17208 }, { "epoch": 0.5552286599813273, "grad_norm": 0.337890625, "learning_rate": 1.3136802543607658e-05, "loss": 1.9779, "step": 17209 }, { "epoch": 0.5552609238351236, "grad_norm": 0.333984375, "learning_rate": 1.3135241007594697e-05, "loss": 1.9998, "step": 17210 }, { "epoch": 0.55529318768892, "grad_norm": 0.333984375, "learning_rate": 1.3133679492107665e-05, "loss": 1.9958, "step": 17211 }, { "epoch": 0.5553254515427163, "grad_norm": 0.349609375, "learning_rate": 1.3132117997163758e-05, "loss": 1.9876, "step": 17212 }, { "epoch": 0.5553577153965127, "grad_norm": 0.3359375, "learning_rate": 1.313055652278015e-05, "loss": 1.9748, "step": 17213 }, { "epoch": 0.555389979250309, "grad_norm": 0.32421875, "learning_rate": 1.3128995068974036e-05, "loss": 1.9841, "step": 17214 }, { "epoch": 0.5554222431041054, "grad_norm": 0.32421875, "learning_rate": 1.3127433635762602e-05, "loss": 1.9549, "step": 17215 }, { "epoch": 0.5554545069579018, "grad_norm": 0.33203125, "learning_rate": 1.3125872223163035e-05, "loss": 1.9928, "step": 17216 }, { "epoch": 0.5554867708116981, "grad_norm": 0.337890625, "learning_rate": 1.3124310831192521e-05, "loss": 2.0057, "step": 17217 }, { "epoch": 0.5555190346654945, "grad_norm": 0.326171875, "learning_rate": 1.312274945986825e-05, "loss": 1.9587, "step": 17218 }, { "epoch": 0.5555512985192907, "grad_norm": 0.3359375, "learning_rate": 1.3121188109207405e-05, "loss": 1.9888, "step": 17219 }, { "epoch": 0.5555835623730871, "grad_norm": 0.328125, "learning_rate": 1.3119626779227172e-05, "loss": 1.9683, "step": 17220 }, { "epoch": 0.5556158262268834, "grad_norm": 0.34375, "learning_rate": 1.3118065469944743e-05, "loss": 2.0071, "step": 17221 }, { "epoch": 0.5556480900806798, "grad_norm": 0.330078125, "learning_rate": 1.3116504181377296e-05, "loss": 1.9521, "step": 17222 }, { "epoch": 0.5556803539344761, "grad_norm": 0.341796875, "learning_rate": 1.3114942913542017e-05, "loss": 1.9914, "step": 17223 }, { "epoch": 0.5557126177882725, "grad_norm": 0.33984375, "learning_rate": 1.31133816664561e-05, "loss": 1.9839, "step": 17224 }, { "epoch": 0.5557448816420688, "grad_norm": 0.34375, "learning_rate": 1.311182044013672e-05, "loss": 1.98, "step": 17225 }, { "epoch": 0.5557771454958652, "grad_norm": 0.33984375, "learning_rate": 1.3110259234601067e-05, "loss": 2.0072, "step": 17226 }, { "epoch": 0.5558094093496615, "grad_norm": 0.330078125, "learning_rate": 1.3108698049866329e-05, "loss": 1.9762, "step": 17227 }, { "epoch": 0.5558416732034579, "grad_norm": 0.341796875, "learning_rate": 1.3107136885949678e-05, "loss": 1.9637, "step": 17228 }, { "epoch": 0.5558739370572542, "grad_norm": 0.326171875, "learning_rate": 1.310557574286831e-05, "loss": 1.9571, "step": 17229 }, { "epoch": 0.5559062009110506, "grad_norm": 0.34765625, "learning_rate": 1.310401462063941e-05, "loss": 2.0084, "step": 17230 }, { "epoch": 0.5559384647648469, "grad_norm": 0.33984375, "learning_rate": 1.3102453519280148e-05, "loss": 1.9452, "step": 17231 }, { "epoch": 0.5559707286186433, "grad_norm": 0.333984375, "learning_rate": 1.310089243880772e-05, "loss": 1.9714, "step": 17232 }, { "epoch": 0.5560029924724396, "grad_norm": 0.34765625, "learning_rate": 1.3099331379239305e-05, "loss": 2.0029, "step": 17233 }, { "epoch": 0.556035256326236, "grad_norm": 0.333984375, "learning_rate": 1.3097770340592092e-05, "loss": 1.9887, "step": 17234 }, { "epoch": 0.5560675201800322, "grad_norm": 0.37890625, "learning_rate": 1.309620932288325e-05, "loss": 1.9657, "step": 17235 }, { "epoch": 0.5560997840338286, "grad_norm": 0.337890625, "learning_rate": 1.3094648326129973e-05, "loss": 1.9714, "step": 17236 }, { "epoch": 0.556132047887625, "grad_norm": 0.337890625, "learning_rate": 1.3093087350349447e-05, "loss": 1.9712, "step": 17237 }, { "epoch": 0.5561643117414213, "grad_norm": 0.33984375, "learning_rate": 1.3091526395558843e-05, "loss": 1.9785, "step": 17238 }, { "epoch": 0.5561965755952177, "grad_norm": 0.349609375, "learning_rate": 1.308996546177534e-05, "loss": 1.9878, "step": 17239 }, { "epoch": 0.556228839449014, "grad_norm": 0.3359375, "learning_rate": 1.308840454901614e-05, "loss": 1.971, "step": 17240 }, { "epoch": 0.5562611033028104, "grad_norm": 0.345703125, "learning_rate": 1.3086843657298403e-05, "loss": 1.9408, "step": 17241 }, { "epoch": 0.5562933671566067, "grad_norm": 0.333984375, "learning_rate": 1.308528278663932e-05, "loss": 1.9837, "step": 17242 }, { "epoch": 0.5563256310104031, "grad_norm": 0.337890625, "learning_rate": 1.3083721937056074e-05, "loss": 1.9925, "step": 17243 }, { "epoch": 0.5563578948641994, "grad_norm": 0.359375, "learning_rate": 1.3082161108565837e-05, "loss": 1.9841, "step": 17244 }, { "epoch": 0.5563901587179958, "grad_norm": 0.33203125, "learning_rate": 1.3080600301185795e-05, "loss": 1.9902, "step": 17245 }, { "epoch": 0.5564224225717921, "grad_norm": 0.328125, "learning_rate": 1.3079039514933131e-05, "loss": 1.9596, "step": 17246 }, { "epoch": 0.5564546864255885, "grad_norm": 0.361328125, "learning_rate": 1.307747874982502e-05, "loss": 1.977, "step": 17247 }, { "epoch": 0.5564869502793848, "grad_norm": 0.333984375, "learning_rate": 1.3075918005878643e-05, "loss": 1.9798, "step": 17248 }, { "epoch": 0.5565192141331812, "grad_norm": 0.34375, "learning_rate": 1.3074357283111183e-05, "loss": 1.9762, "step": 17249 }, { "epoch": 0.5565514779869775, "grad_norm": 0.34765625, "learning_rate": 1.3072796581539813e-05, "loss": 1.9667, "step": 17250 }, { "epoch": 0.5565837418407739, "grad_norm": 0.76953125, "learning_rate": 1.3071235901181717e-05, "loss": 1.9736, "step": 17251 }, { "epoch": 0.5566160056945701, "grad_norm": 0.359375, "learning_rate": 1.3069675242054076e-05, "loss": 1.9898, "step": 17252 }, { "epoch": 0.5566482695483665, "grad_norm": 0.365234375, "learning_rate": 1.306811460417406e-05, "loss": 1.9749, "step": 17253 }, { "epoch": 0.5566805334021628, "grad_norm": 0.35546875, "learning_rate": 1.3066553987558853e-05, "loss": 1.9954, "step": 17254 }, { "epoch": 0.5567127972559592, "grad_norm": 0.365234375, "learning_rate": 1.3064993392225637e-05, "loss": 1.9416, "step": 17255 }, { "epoch": 0.5567450611097556, "grad_norm": 0.34765625, "learning_rate": 1.3063432818191582e-05, "loss": 1.9725, "step": 17256 }, { "epoch": 0.5567773249635519, "grad_norm": 0.34765625, "learning_rate": 1.3061872265473869e-05, "loss": 1.9872, "step": 17257 }, { "epoch": 0.5568095888173483, "grad_norm": 0.35546875, "learning_rate": 1.3060311734089682e-05, "loss": 1.9897, "step": 17258 }, { "epoch": 0.5568418526711446, "grad_norm": 0.333984375, "learning_rate": 1.3058751224056185e-05, "loss": 1.9391, "step": 17259 }, { "epoch": 0.556874116524941, "grad_norm": 0.361328125, "learning_rate": 1.3057190735390561e-05, "loss": 1.9689, "step": 17260 }, { "epoch": 0.5569063803787373, "grad_norm": 0.3359375, "learning_rate": 1.3055630268109995e-05, "loss": 1.9781, "step": 17261 }, { "epoch": 0.5569386442325337, "grad_norm": 0.33984375, "learning_rate": 1.3054069822231648e-05, "loss": 1.959, "step": 17262 }, { "epoch": 0.55697090808633, "grad_norm": 0.32421875, "learning_rate": 1.3052509397772707e-05, "loss": 1.9759, "step": 17263 }, { "epoch": 0.5570031719401264, "grad_norm": 0.333984375, "learning_rate": 1.3050948994750352e-05, "loss": 1.9997, "step": 17264 }, { "epoch": 0.5570354357939227, "grad_norm": 0.33203125, "learning_rate": 1.3049388613181745e-05, "loss": 1.9983, "step": 17265 }, { "epoch": 0.5570676996477191, "grad_norm": 0.341796875, "learning_rate": 1.3047828253084065e-05, "loss": 1.9844, "step": 17266 }, { "epoch": 0.5570999635015154, "grad_norm": 0.333984375, "learning_rate": 1.3046267914474495e-05, "loss": 1.9747, "step": 17267 }, { "epoch": 0.5571322273553118, "grad_norm": 0.3359375, "learning_rate": 1.3044707597370213e-05, "loss": 1.9706, "step": 17268 }, { "epoch": 0.557164491209108, "grad_norm": 0.326171875, "learning_rate": 1.3043147301788382e-05, "loss": 1.9648, "step": 17269 }, { "epoch": 0.5571967550629044, "grad_norm": 0.33984375, "learning_rate": 1.3041587027746175e-05, "loss": 1.9835, "step": 17270 }, { "epoch": 0.5572290189167007, "grad_norm": 0.333984375, "learning_rate": 1.3040026775260784e-05, "loss": 1.9837, "step": 17271 }, { "epoch": 0.5572612827704971, "grad_norm": 0.345703125, "learning_rate": 1.3038466544349368e-05, "loss": 1.9723, "step": 17272 }, { "epoch": 0.5572935466242934, "grad_norm": 0.330078125, "learning_rate": 1.3036906335029103e-05, "loss": 1.9843, "step": 17273 }, { "epoch": 0.5573258104780898, "grad_norm": 0.337890625, "learning_rate": 1.3035346147317169e-05, "loss": 1.974, "step": 17274 }, { "epoch": 0.5573580743318861, "grad_norm": 0.33203125, "learning_rate": 1.3033785981230731e-05, "loss": 1.9912, "step": 17275 }, { "epoch": 0.5573903381856825, "grad_norm": 0.3359375, "learning_rate": 1.3032225836786966e-05, "loss": 1.9425, "step": 17276 }, { "epoch": 0.5574226020394789, "grad_norm": 0.330078125, "learning_rate": 1.3030665714003051e-05, "loss": 1.9375, "step": 17277 }, { "epoch": 0.5574548658932752, "grad_norm": 0.330078125, "learning_rate": 1.3029105612896153e-05, "loss": 1.9643, "step": 17278 }, { "epoch": 0.5574871297470716, "grad_norm": 0.33203125, "learning_rate": 1.3027545533483446e-05, "loss": 1.9828, "step": 17279 }, { "epoch": 0.5575193936008679, "grad_norm": 0.3359375, "learning_rate": 1.3025985475782106e-05, "loss": 1.9623, "step": 17280 }, { "epoch": 0.5575516574546643, "grad_norm": 0.361328125, "learning_rate": 1.3024425439809298e-05, "loss": 1.96, "step": 17281 }, { "epoch": 0.5575839213084606, "grad_norm": 0.3671875, "learning_rate": 1.3022865425582197e-05, "loss": 1.9823, "step": 17282 }, { "epoch": 0.557616185162257, "grad_norm": 0.349609375, "learning_rate": 1.3021305433117979e-05, "loss": 1.9615, "step": 17283 }, { "epoch": 0.5576484490160533, "grad_norm": 0.330078125, "learning_rate": 1.3019745462433807e-05, "loss": 1.9698, "step": 17284 }, { "epoch": 0.5576807128698497, "grad_norm": 0.33203125, "learning_rate": 1.3018185513546858e-05, "loss": 1.9688, "step": 17285 }, { "epoch": 0.557712976723646, "grad_norm": 0.341796875, "learning_rate": 1.3016625586474303e-05, "loss": 1.9904, "step": 17286 }, { "epoch": 0.5577452405774423, "grad_norm": 0.3359375, "learning_rate": 1.3015065681233308e-05, "loss": 1.9769, "step": 17287 }, { "epoch": 0.5577775044312386, "grad_norm": 0.322265625, "learning_rate": 1.3013505797841047e-05, "loss": 1.9678, "step": 17288 }, { "epoch": 0.557809768285035, "grad_norm": 0.345703125, "learning_rate": 1.3011945936314694e-05, "loss": 1.9747, "step": 17289 }, { "epoch": 0.5578420321388313, "grad_norm": 0.3359375, "learning_rate": 1.3010386096671404e-05, "loss": 1.9642, "step": 17290 }, { "epoch": 0.5578742959926277, "grad_norm": 0.33203125, "learning_rate": 1.3008826278928362e-05, "loss": 1.9867, "step": 17291 }, { "epoch": 0.557906559846424, "grad_norm": 0.3359375, "learning_rate": 1.3007266483102736e-05, "loss": 1.9696, "step": 17292 }, { "epoch": 0.5579388237002204, "grad_norm": 0.33203125, "learning_rate": 1.3005706709211684e-05, "loss": 1.9533, "step": 17293 }, { "epoch": 0.5579710875540167, "grad_norm": 0.333984375, "learning_rate": 1.3004146957272382e-05, "loss": 1.987, "step": 17294 }, { "epoch": 0.5580033514078131, "grad_norm": 0.3515625, "learning_rate": 1.3002587227302006e-05, "loss": 1.939, "step": 17295 }, { "epoch": 0.5580356152616095, "grad_norm": 0.337890625, "learning_rate": 1.300102751931771e-05, "loss": 1.9913, "step": 17296 }, { "epoch": 0.5580678791154058, "grad_norm": 0.341796875, "learning_rate": 1.2999467833336665e-05, "loss": 1.9699, "step": 17297 }, { "epoch": 0.5581001429692022, "grad_norm": 0.34765625, "learning_rate": 1.2997908169376047e-05, "loss": 2.0062, "step": 17298 }, { "epoch": 0.5581324068229985, "grad_norm": 0.345703125, "learning_rate": 1.2996348527453028e-05, "loss": 1.9896, "step": 17299 }, { "epoch": 0.5581646706767949, "grad_norm": 0.337890625, "learning_rate": 1.2994788907584758e-05, "loss": 1.9593, "step": 17300 }, { "epoch": 0.5581969345305912, "grad_norm": 0.357421875, "learning_rate": 1.2993229309788411e-05, "loss": 1.955, "step": 17301 }, { "epoch": 0.5582291983843876, "grad_norm": 0.365234375, "learning_rate": 1.2991669734081165e-05, "loss": 1.9931, "step": 17302 }, { "epoch": 0.5582614622381838, "grad_norm": 0.341796875, "learning_rate": 1.2990110180480173e-05, "loss": 1.9608, "step": 17303 }, { "epoch": 0.5582937260919802, "grad_norm": 0.3515625, "learning_rate": 1.2988550649002605e-05, "loss": 1.9907, "step": 17304 }, { "epoch": 0.5583259899457765, "grad_norm": 0.34765625, "learning_rate": 1.2986991139665633e-05, "loss": 1.9715, "step": 17305 }, { "epoch": 0.5583582537995729, "grad_norm": 0.34765625, "learning_rate": 1.2985431652486414e-05, "loss": 1.9863, "step": 17306 }, { "epoch": 0.5583905176533692, "grad_norm": 0.34375, "learning_rate": 1.2983872187482116e-05, "loss": 1.9542, "step": 17307 }, { "epoch": 0.5584227815071656, "grad_norm": 0.33203125, "learning_rate": 1.2982312744669914e-05, "loss": 1.9726, "step": 17308 }, { "epoch": 0.5584550453609619, "grad_norm": 0.33984375, "learning_rate": 1.298075332406696e-05, "loss": 1.9622, "step": 17309 }, { "epoch": 0.5584873092147583, "grad_norm": 0.341796875, "learning_rate": 1.2979193925690426e-05, "loss": 1.9304, "step": 17310 }, { "epoch": 0.5585195730685546, "grad_norm": 0.33203125, "learning_rate": 1.2977634549557479e-05, "loss": 1.9754, "step": 17311 }, { "epoch": 0.558551836922351, "grad_norm": 0.345703125, "learning_rate": 1.2976075195685276e-05, "loss": 1.9446, "step": 17312 }, { "epoch": 0.5585841007761473, "grad_norm": 0.34375, "learning_rate": 1.2974515864090985e-05, "loss": 1.9837, "step": 17313 }, { "epoch": 0.5586163646299437, "grad_norm": 0.33203125, "learning_rate": 1.2972956554791773e-05, "loss": 1.984, "step": 17314 }, { "epoch": 0.55864862848374, "grad_norm": 0.361328125, "learning_rate": 1.29713972678048e-05, "loss": 1.9655, "step": 17315 }, { "epoch": 0.5586808923375364, "grad_norm": 0.337890625, "learning_rate": 1.2969838003147229e-05, "loss": 1.9896, "step": 17316 }, { "epoch": 0.5587131561913328, "grad_norm": 0.357421875, "learning_rate": 1.2968278760836227e-05, "loss": 1.9556, "step": 17317 }, { "epoch": 0.5587454200451291, "grad_norm": 0.34375, "learning_rate": 1.2966719540888953e-05, "loss": 1.9892, "step": 17318 }, { "epoch": 0.5587776838989255, "grad_norm": 0.33203125, "learning_rate": 1.2965160343322572e-05, "loss": 1.9556, "step": 17319 }, { "epoch": 0.5588099477527217, "grad_norm": 0.330078125, "learning_rate": 1.2963601168154251e-05, "loss": 1.9653, "step": 17320 }, { "epoch": 0.5588422116065181, "grad_norm": 0.3515625, "learning_rate": 1.296204201540114e-05, "loss": 1.9406, "step": 17321 }, { "epoch": 0.5588744754603144, "grad_norm": 0.333984375, "learning_rate": 1.296048288508041e-05, "loss": 2.0022, "step": 17322 }, { "epoch": 0.5589067393141108, "grad_norm": 0.330078125, "learning_rate": 1.295892377720923e-05, "loss": 1.9756, "step": 17323 }, { "epoch": 0.5589390031679071, "grad_norm": 0.341796875, "learning_rate": 1.2957364691804745e-05, "loss": 1.9242, "step": 17324 }, { "epoch": 0.5589712670217035, "grad_norm": 0.326171875, "learning_rate": 1.295580562888412e-05, "loss": 1.9685, "step": 17325 }, { "epoch": 0.5590035308754998, "grad_norm": 0.330078125, "learning_rate": 1.295424658846453e-05, "loss": 1.9764, "step": 17326 }, { "epoch": 0.5590357947292962, "grad_norm": 0.33203125, "learning_rate": 1.295268757056312e-05, "loss": 1.9787, "step": 17327 }, { "epoch": 0.5590680585830925, "grad_norm": 0.33984375, "learning_rate": 1.2951128575197056e-05, "loss": 1.9671, "step": 17328 }, { "epoch": 0.5591003224368889, "grad_norm": 0.328125, "learning_rate": 1.2949569602383504e-05, "loss": 1.9791, "step": 17329 }, { "epoch": 0.5591325862906852, "grad_norm": 0.33203125, "learning_rate": 1.2948010652139612e-05, "loss": 1.998, "step": 17330 }, { "epoch": 0.5591648501444816, "grad_norm": 0.341796875, "learning_rate": 1.2946451724482548e-05, "loss": 1.9809, "step": 17331 }, { "epoch": 0.5591971139982779, "grad_norm": 0.337890625, "learning_rate": 1.2944892819429468e-05, "loss": 1.9758, "step": 17332 }, { "epoch": 0.5592293778520743, "grad_norm": 0.333984375, "learning_rate": 1.2943333936997544e-05, "loss": 1.9799, "step": 17333 }, { "epoch": 0.5592616417058706, "grad_norm": 0.353515625, "learning_rate": 1.2941775077203916e-05, "loss": 1.9475, "step": 17334 }, { "epoch": 0.559293905559667, "grad_norm": 0.328125, "learning_rate": 1.2940216240065753e-05, "loss": 1.9936, "step": 17335 }, { "epoch": 0.5593261694134632, "grad_norm": 0.3359375, "learning_rate": 1.2938657425600213e-05, "loss": 2.0075, "step": 17336 }, { "epoch": 0.5593584332672596, "grad_norm": 0.345703125, "learning_rate": 1.2937098633824453e-05, "loss": 1.9932, "step": 17337 }, { "epoch": 0.559390697121056, "grad_norm": 0.3359375, "learning_rate": 1.293553986475563e-05, "loss": 1.959, "step": 17338 }, { "epoch": 0.5594229609748523, "grad_norm": 0.337890625, "learning_rate": 1.2933981118410906e-05, "loss": 1.9512, "step": 17339 }, { "epoch": 0.5594552248286487, "grad_norm": 0.36328125, "learning_rate": 1.2932422394807432e-05, "loss": 1.9503, "step": 17340 }, { "epoch": 0.559487488682445, "grad_norm": 0.345703125, "learning_rate": 1.293086369396237e-05, "loss": 1.9614, "step": 17341 }, { "epoch": 0.5595197525362414, "grad_norm": 0.341796875, "learning_rate": 1.2929305015892878e-05, "loss": 2.0012, "step": 17342 }, { "epoch": 0.5595520163900377, "grad_norm": 0.345703125, "learning_rate": 1.292774636061611e-05, "loss": 1.9841, "step": 17343 }, { "epoch": 0.5595842802438341, "grad_norm": 0.353515625, "learning_rate": 1.2926187728149221e-05, "loss": 1.986, "step": 17344 }, { "epoch": 0.5596165440976304, "grad_norm": 0.341796875, "learning_rate": 1.2924629118509373e-05, "loss": 1.9486, "step": 17345 }, { "epoch": 0.5596488079514268, "grad_norm": 0.357421875, "learning_rate": 1.2923070531713718e-05, "loss": 1.9971, "step": 17346 }, { "epoch": 0.5596810718052231, "grad_norm": 0.345703125, "learning_rate": 1.292151196777941e-05, "loss": 1.9814, "step": 17347 }, { "epoch": 0.5597133356590195, "grad_norm": 0.341796875, "learning_rate": 1.2919953426723613e-05, "loss": 1.9305, "step": 17348 }, { "epoch": 0.5597455995128158, "grad_norm": 0.349609375, "learning_rate": 1.291839490856347e-05, "loss": 1.983, "step": 17349 }, { "epoch": 0.5597778633666122, "grad_norm": 0.33203125, "learning_rate": 1.2916836413316144e-05, "loss": 1.9984, "step": 17350 }, { "epoch": 0.5598101272204085, "grad_norm": 0.330078125, "learning_rate": 1.2915277940998797e-05, "loss": 1.939, "step": 17351 }, { "epoch": 0.5598423910742049, "grad_norm": 0.34765625, "learning_rate": 1.2913719491628563e-05, "loss": 1.9812, "step": 17352 }, { "epoch": 0.5598746549280011, "grad_norm": 0.33984375, "learning_rate": 1.291216106522261e-05, "loss": 1.9786, "step": 17353 }, { "epoch": 0.5599069187817975, "grad_norm": 0.318359375, "learning_rate": 1.29106026617981e-05, "loss": 1.9848, "step": 17354 }, { "epoch": 0.5599391826355938, "grad_norm": 0.328125, "learning_rate": 1.2909044281372168e-05, "loss": 1.9781, "step": 17355 }, { "epoch": 0.5599714464893902, "grad_norm": 0.333984375, "learning_rate": 1.2907485923961974e-05, "loss": 1.9578, "step": 17356 }, { "epoch": 0.5600037103431866, "grad_norm": 0.328125, "learning_rate": 1.2905927589584684e-05, "loss": 1.9695, "step": 17357 }, { "epoch": 0.5600359741969829, "grad_norm": 0.333984375, "learning_rate": 1.2904369278257435e-05, "loss": 1.9949, "step": 17358 }, { "epoch": 0.5600682380507793, "grad_norm": 0.318359375, "learning_rate": 1.2902810989997382e-05, "loss": 1.9625, "step": 17359 }, { "epoch": 0.5601005019045756, "grad_norm": 0.33203125, "learning_rate": 1.2901252724821689e-05, "loss": 1.9893, "step": 17360 }, { "epoch": 0.560132765758372, "grad_norm": 0.3359375, "learning_rate": 1.2899694482747493e-05, "loss": 1.9715, "step": 17361 }, { "epoch": 0.5601650296121683, "grad_norm": 0.333984375, "learning_rate": 1.2898136263791955e-05, "loss": 1.9589, "step": 17362 }, { "epoch": 0.5601972934659647, "grad_norm": 0.33984375, "learning_rate": 1.2896578067972228e-05, "loss": 1.973, "step": 17363 }, { "epoch": 0.560229557319761, "grad_norm": 0.357421875, "learning_rate": 1.2895019895305458e-05, "loss": 1.9756, "step": 17364 }, { "epoch": 0.5602618211735574, "grad_norm": 0.349609375, "learning_rate": 1.2893461745808801e-05, "loss": 1.9551, "step": 17365 }, { "epoch": 0.5602940850273537, "grad_norm": 0.349609375, "learning_rate": 1.2891903619499404e-05, "loss": 1.9749, "step": 17366 }, { "epoch": 0.5603263488811501, "grad_norm": 0.35546875, "learning_rate": 1.2890345516394421e-05, "loss": 1.9821, "step": 17367 }, { "epoch": 0.5603586127349464, "grad_norm": 0.3515625, "learning_rate": 1.2888787436511001e-05, "loss": 1.999, "step": 17368 }, { "epoch": 0.5603908765887428, "grad_norm": 0.341796875, "learning_rate": 1.2887229379866293e-05, "loss": 1.9688, "step": 17369 }, { "epoch": 0.560423140442539, "grad_norm": 0.349609375, "learning_rate": 1.288567134647745e-05, "loss": 1.9799, "step": 17370 }, { "epoch": 0.5604554042963354, "grad_norm": 0.326171875, "learning_rate": 1.288411333636162e-05, "loss": 1.9593, "step": 17371 }, { "epoch": 0.5604876681501317, "grad_norm": 0.349609375, "learning_rate": 1.288255534953595e-05, "loss": 1.9598, "step": 17372 }, { "epoch": 0.5605199320039281, "grad_norm": 0.34375, "learning_rate": 1.2880997386017594e-05, "loss": 1.9903, "step": 17373 }, { "epoch": 0.5605521958577244, "grad_norm": 0.330078125, "learning_rate": 1.2879439445823697e-05, "loss": 1.9768, "step": 17374 }, { "epoch": 0.5605844597115208, "grad_norm": 0.353515625, "learning_rate": 1.287788152897141e-05, "loss": 1.9704, "step": 17375 }, { "epoch": 0.5606167235653171, "grad_norm": 0.326171875, "learning_rate": 1.2876323635477885e-05, "loss": 1.9785, "step": 17376 }, { "epoch": 0.5606489874191135, "grad_norm": 0.345703125, "learning_rate": 1.287476576536026e-05, "loss": 1.9831, "step": 17377 }, { "epoch": 0.5606812512729099, "grad_norm": 0.3359375, "learning_rate": 1.287320791863569e-05, "loss": 1.9841, "step": 17378 }, { "epoch": 0.5607135151267062, "grad_norm": 0.3359375, "learning_rate": 1.2871650095321326e-05, "loss": 1.9612, "step": 17379 }, { "epoch": 0.5607457789805026, "grad_norm": 0.349609375, "learning_rate": 1.2870092295434303e-05, "loss": 1.9698, "step": 17380 }, { "epoch": 0.5607780428342989, "grad_norm": 0.333984375, "learning_rate": 1.286853451899178e-05, "loss": 1.9736, "step": 17381 }, { "epoch": 0.5608103066880953, "grad_norm": 0.322265625, "learning_rate": 1.2866976766010905e-05, "loss": 1.9991, "step": 17382 }, { "epoch": 0.5608425705418916, "grad_norm": 0.357421875, "learning_rate": 1.2865419036508812e-05, "loss": 1.9525, "step": 17383 }, { "epoch": 0.560874834395688, "grad_norm": 0.330078125, "learning_rate": 1.2863861330502655e-05, "loss": 1.9822, "step": 17384 }, { "epoch": 0.5609070982494843, "grad_norm": 0.330078125, "learning_rate": 1.2862303648009587e-05, "loss": 1.9873, "step": 17385 }, { "epoch": 0.5609393621032807, "grad_norm": 0.33203125, "learning_rate": 1.2860745989046742e-05, "loss": 1.9575, "step": 17386 }, { "epoch": 0.560971625957077, "grad_norm": 0.326171875, "learning_rate": 1.2859188353631266e-05, "loss": 1.966, "step": 17387 }, { "epoch": 0.5610038898108733, "grad_norm": 0.3359375, "learning_rate": 1.2857630741780319e-05, "loss": 1.9615, "step": 17388 }, { "epoch": 0.5610361536646696, "grad_norm": 0.349609375, "learning_rate": 1.2856073153511028e-05, "loss": 1.9752, "step": 17389 }, { "epoch": 0.561068417518466, "grad_norm": 0.33203125, "learning_rate": 1.2854515588840549e-05, "loss": 1.9934, "step": 17390 }, { "epoch": 0.5611006813722623, "grad_norm": 0.337890625, "learning_rate": 1.2852958047786025e-05, "loss": 1.9793, "step": 17391 }, { "epoch": 0.5611329452260587, "grad_norm": 0.330078125, "learning_rate": 1.2851400530364593e-05, "loss": 1.9557, "step": 17392 }, { "epoch": 0.561165209079855, "grad_norm": 0.326171875, "learning_rate": 1.2849843036593405e-05, "loss": 1.973, "step": 17393 }, { "epoch": 0.5611974729336514, "grad_norm": 0.322265625, "learning_rate": 1.2848285566489605e-05, "loss": 1.9415, "step": 17394 }, { "epoch": 0.5612297367874477, "grad_norm": 0.337890625, "learning_rate": 1.284672812007033e-05, "loss": 1.9472, "step": 17395 }, { "epoch": 0.5612620006412441, "grad_norm": 0.328125, "learning_rate": 1.2845170697352728e-05, "loss": 1.9804, "step": 17396 }, { "epoch": 0.5612942644950405, "grad_norm": 0.34375, "learning_rate": 1.284361329835394e-05, "loss": 1.9668, "step": 17397 }, { "epoch": 0.5613265283488368, "grad_norm": 0.337890625, "learning_rate": 1.2842055923091114e-05, "loss": 1.9663, "step": 17398 }, { "epoch": 0.5613587922026332, "grad_norm": 0.341796875, "learning_rate": 1.2840498571581385e-05, "loss": 1.983, "step": 17399 }, { "epoch": 0.5613910560564295, "grad_norm": 0.326171875, "learning_rate": 1.2838941243841899e-05, "loss": 1.9752, "step": 17400 }, { "epoch": 0.5614233199102259, "grad_norm": 0.341796875, "learning_rate": 1.28373839398898e-05, "loss": 1.9736, "step": 17401 }, { "epoch": 0.5614555837640222, "grad_norm": 0.3359375, "learning_rate": 1.2835826659742224e-05, "loss": 1.9373, "step": 17402 }, { "epoch": 0.5614878476178186, "grad_norm": 0.328125, "learning_rate": 1.2834269403416316e-05, "loss": 1.9526, "step": 17403 }, { "epoch": 0.5615201114716148, "grad_norm": 0.3515625, "learning_rate": 1.2832712170929219e-05, "loss": 1.9879, "step": 17404 }, { "epoch": 0.5615523753254112, "grad_norm": 0.345703125, "learning_rate": 1.283115496229807e-05, "loss": 1.9923, "step": 17405 }, { "epoch": 0.5615846391792075, "grad_norm": 0.34375, "learning_rate": 1.282959777754001e-05, "loss": 1.9698, "step": 17406 }, { "epoch": 0.5616169030330039, "grad_norm": 0.337890625, "learning_rate": 1.2828040616672184e-05, "loss": 1.9465, "step": 17407 }, { "epoch": 0.5616491668868002, "grad_norm": 0.333984375, "learning_rate": 1.2826483479711724e-05, "loss": 1.9841, "step": 17408 }, { "epoch": 0.5616814307405966, "grad_norm": 0.353515625, "learning_rate": 1.2824926366675776e-05, "loss": 1.9645, "step": 17409 }, { "epoch": 0.5617136945943929, "grad_norm": 0.333984375, "learning_rate": 1.2823369277581486e-05, "loss": 1.9594, "step": 17410 }, { "epoch": 0.5617459584481893, "grad_norm": 0.380859375, "learning_rate": 1.2821812212445974e-05, "loss": 1.9833, "step": 17411 }, { "epoch": 0.5617782223019856, "grad_norm": 0.34375, "learning_rate": 1.2820255171286395e-05, "loss": 1.9856, "step": 17412 }, { "epoch": 0.561810486155782, "grad_norm": 0.35546875, "learning_rate": 1.2818698154119889e-05, "loss": 2.0021, "step": 17413 }, { "epoch": 0.5618427500095783, "grad_norm": 0.341796875, "learning_rate": 1.2817141160963579e-05, "loss": 1.988, "step": 17414 }, { "epoch": 0.5618750138633747, "grad_norm": 0.333984375, "learning_rate": 1.2815584191834615e-05, "loss": 1.9692, "step": 17415 }, { "epoch": 0.561907277717171, "grad_norm": 0.3515625, "learning_rate": 1.2814027246750142e-05, "loss": 1.9637, "step": 17416 }, { "epoch": 0.5619395415709674, "grad_norm": 0.349609375, "learning_rate": 1.2812470325727283e-05, "loss": 1.9542, "step": 17417 }, { "epoch": 0.5619718054247638, "grad_norm": 0.333984375, "learning_rate": 1.2810913428783174e-05, "loss": 1.974, "step": 17418 }, { "epoch": 0.5620040692785601, "grad_norm": 0.34765625, "learning_rate": 1.2809356555934972e-05, "loss": 2.0044, "step": 17419 }, { "epoch": 0.5620363331323565, "grad_norm": 0.353515625, "learning_rate": 1.2807799707199797e-05, "loss": 1.9764, "step": 17420 }, { "epoch": 0.5620685969861527, "grad_norm": 0.330078125, "learning_rate": 1.2806242882594786e-05, "loss": 1.9367, "step": 17421 }, { "epoch": 0.5621008608399491, "grad_norm": 0.345703125, "learning_rate": 1.2804686082137086e-05, "loss": 1.9787, "step": 17422 }, { "epoch": 0.5621331246937454, "grad_norm": 0.33203125, "learning_rate": 1.2803129305843821e-05, "loss": 1.935, "step": 17423 }, { "epoch": 0.5621653885475418, "grad_norm": 0.345703125, "learning_rate": 1.2801572553732134e-05, "loss": 1.9677, "step": 17424 }, { "epoch": 0.5621976524013381, "grad_norm": 0.333984375, "learning_rate": 1.2800015825819163e-05, "loss": 1.9649, "step": 17425 }, { "epoch": 0.5622299162551345, "grad_norm": 0.333984375, "learning_rate": 1.2798459122122035e-05, "loss": 1.9611, "step": 17426 }, { "epoch": 0.5622621801089308, "grad_norm": 0.328125, "learning_rate": 1.2796902442657892e-05, "loss": 1.9814, "step": 17427 }, { "epoch": 0.5622944439627272, "grad_norm": 0.34375, "learning_rate": 1.2795345787443867e-05, "loss": 1.9733, "step": 17428 }, { "epoch": 0.5623267078165235, "grad_norm": 0.32421875, "learning_rate": 1.2793789156497092e-05, "loss": 1.9482, "step": 17429 }, { "epoch": 0.5623589716703199, "grad_norm": 0.32421875, "learning_rate": 1.2792232549834702e-05, "loss": 1.9581, "step": 17430 }, { "epoch": 0.5623912355241162, "grad_norm": 0.333984375, "learning_rate": 1.2790675967473832e-05, "loss": 1.9566, "step": 17431 }, { "epoch": 0.5624234993779126, "grad_norm": 0.322265625, "learning_rate": 1.2789119409431621e-05, "loss": 1.9642, "step": 17432 }, { "epoch": 0.5624557632317089, "grad_norm": 0.333984375, "learning_rate": 1.2787562875725194e-05, "loss": 1.9444, "step": 17433 }, { "epoch": 0.5624880270855053, "grad_norm": 0.33203125, "learning_rate": 1.2786006366371688e-05, "loss": 1.983, "step": 17434 }, { "epoch": 0.5625202909393016, "grad_norm": 0.318359375, "learning_rate": 1.2784449881388237e-05, "loss": 1.9755, "step": 17435 }, { "epoch": 0.562552554793098, "grad_norm": 0.326171875, "learning_rate": 1.2782893420791968e-05, "loss": 1.9412, "step": 17436 }, { "epoch": 0.5625848186468942, "grad_norm": 0.33984375, "learning_rate": 1.2781336984600021e-05, "loss": 1.9745, "step": 17437 }, { "epoch": 0.5626170825006906, "grad_norm": 0.345703125, "learning_rate": 1.2779780572829528e-05, "loss": 1.9934, "step": 17438 }, { "epoch": 0.562649346354487, "grad_norm": 0.3359375, "learning_rate": 1.2778224185497613e-05, "loss": 1.9994, "step": 17439 }, { "epoch": 0.5626816102082833, "grad_norm": 0.333984375, "learning_rate": 1.2776667822621413e-05, "loss": 1.9677, "step": 17440 }, { "epoch": 0.5627138740620797, "grad_norm": 0.326171875, "learning_rate": 1.2775111484218062e-05, "loss": 2.0003, "step": 17441 }, { "epoch": 0.562746137915876, "grad_norm": 0.322265625, "learning_rate": 1.2773555170304683e-05, "loss": 1.9355, "step": 17442 }, { "epoch": 0.5627784017696724, "grad_norm": 0.33984375, "learning_rate": 1.277199888089841e-05, "loss": 1.9818, "step": 17443 }, { "epoch": 0.5628106656234687, "grad_norm": 0.35546875, "learning_rate": 1.2770442616016385e-05, "loss": 2.0062, "step": 17444 }, { "epoch": 0.5628429294772651, "grad_norm": 0.3515625, "learning_rate": 1.276888637567572e-05, "loss": 1.987, "step": 17445 }, { "epoch": 0.5628751933310614, "grad_norm": 0.34765625, "learning_rate": 1.2767330159893552e-05, "loss": 2.0024, "step": 17446 }, { "epoch": 0.5629074571848578, "grad_norm": 0.345703125, "learning_rate": 1.2765773968687018e-05, "loss": 1.9589, "step": 17447 }, { "epoch": 0.5629397210386541, "grad_norm": 0.357421875, "learning_rate": 1.2764217802073237e-05, "loss": 1.9756, "step": 17448 }, { "epoch": 0.5629719848924505, "grad_norm": 0.337890625, "learning_rate": 1.2762661660069337e-05, "loss": 1.9882, "step": 17449 }, { "epoch": 0.5630042487462468, "grad_norm": 0.3359375, "learning_rate": 1.2761105542692467e-05, "loss": 1.9453, "step": 17450 }, { "epoch": 0.5630365126000432, "grad_norm": 0.326171875, "learning_rate": 1.275954944995973e-05, "loss": 1.9648, "step": 17451 }, { "epoch": 0.5630687764538395, "grad_norm": 0.34765625, "learning_rate": 1.2757993381888267e-05, "loss": 1.9969, "step": 17452 }, { "epoch": 0.5631010403076359, "grad_norm": 0.337890625, "learning_rate": 1.2756437338495208e-05, "loss": 1.9806, "step": 17453 }, { "epoch": 0.5631333041614321, "grad_norm": 0.337890625, "learning_rate": 1.2754881319797674e-05, "loss": 1.9574, "step": 17454 }, { "epoch": 0.5631655680152285, "grad_norm": 0.34375, "learning_rate": 1.2753325325812796e-05, "loss": 1.9905, "step": 17455 }, { "epoch": 0.5631978318690248, "grad_norm": 0.33984375, "learning_rate": 1.2751769356557703e-05, "loss": 1.9594, "step": 17456 }, { "epoch": 0.5632300957228212, "grad_norm": 0.328125, "learning_rate": 1.2750213412049517e-05, "loss": 1.9936, "step": 17457 }, { "epoch": 0.5632623595766176, "grad_norm": 0.353515625, "learning_rate": 1.2748657492305369e-05, "loss": 1.9619, "step": 17458 }, { "epoch": 0.5632946234304139, "grad_norm": 0.34375, "learning_rate": 1.2747101597342386e-05, "loss": 1.952, "step": 17459 }, { "epoch": 0.5633268872842103, "grad_norm": 0.3515625, "learning_rate": 1.274554572717769e-05, "loss": 1.9845, "step": 17460 }, { "epoch": 0.5633591511380066, "grad_norm": 0.3359375, "learning_rate": 1.274398988182841e-05, "loss": 1.9762, "step": 17461 }, { "epoch": 0.563391414991803, "grad_norm": 0.341796875, "learning_rate": 1.2742434061311673e-05, "loss": 1.9756, "step": 17462 }, { "epoch": 0.5634236788455993, "grad_norm": 0.33984375, "learning_rate": 1.27408782656446e-05, "loss": 1.9468, "step": 17463 }, { "epoch": 0.5634559426993957, "grad_norm": 0.33984375, "learning_rate": 1.2739322494844317e-05, "loss": 1.9847, "step": 17464 }, { "epoch": 0.563488206553192, "grad_norm": 0.330078125, "learning_rate": 1.273776674892795e-05, "loss": 1.9935, "step": 17465 }, { "epoch": 0.5635204704069884, "grad_norm": 0.33203125, "learning_rate": 1.273621102791263e-05, "loss": 1.9814, "step": 17466 }, { "epoch": 0.5635527342607847, "grad_norm": 0.330078125, "learning_rate": 1.273465533181547e-05, "loss": 1.9335, "step": 17467 }, { "epoch": 0.5635849981145811, "grad_norm": 0.32421875, "learning_rate": 1.2733099660653597e-05, "loss": 1.9391, "step": 17468 }, { "epoch": 0.5636172619683774, "grad_norm": 0.33984375, "learning_rate": 1.2731544014444142e-05, "loss": 1.9704, "step": 17469 }, { "epoch": 0.5636495258221738, "grad_norm": 0.322265625, "learning_rate": 1.272998839320422e-05, "loss": 1.9592, "step": 17470 }, { "epoch": 0.56368178967597, "grad_norm": 0.3203125, "learning_rate": 1.2728432796950955e-05, "loss": 1.9677, "step": 17471 }, { "epoch": 0.5637140535297664, "grad_norm": 0.33203125, "learning_rate": 1.2726877225701482e-05, "loss": 1.9537, "step": 17472 }, { "epoch": 0.5637463173835627, "grad_norm": 0.3359375, "learning_rate": 1.2725321679472902e-05, "loss": 1.9269, "step": 17473 }, { "epoch": 0.5637785812373591, "grad_norm": 0.3359375, "learning_rate": 1.2723766158282352e-05, "loss": 1.9585, "step": 17474 }, { "epoch": 0.5638108450911554, "grad_norm": 0.33203125, "learning_rate": 1.2722210662146958e-05, "loss": 1.9846, "step": 17475 }, { "epoch": 0.5638431089449518, "grad_norm": 0.3359375, "learning_rate": 1.2720655191083828e-05, "loss": 1.9816, "step": 17476 }, { "epoch": 0.5638753727987481, "grad_norm": 0.322265625, "learning_rate": 1.271909974511009e-05, "loss": 2.0011, "step": 17477 }, { "epoch": 0.5639076366525445, "grad_norm": 0.337890625, "learning_rate": 1.2717544324242872e-05, "loss": 1.989, "step": 17478 }, { "epoch": 0.5639399005063409, "grad_norm": 0.345703125, "learning_rate": 1.2715988928499284e-05, "loss": 1.9947, "step": 17479 }, { "epoch": 0.5639721643601372, "grad_norm": 0.3359375, "learning_rate": 1.2714433557896452e-05, "loss": 1.955, "step": 17480 }, { "epoch": 0.5640044282139336, "grad_norm": 0.326171875, "learning_rate": 1.2712878212451496e-05, "loss": 1.994, "step": 17481 }, { "epoch": 0.5640366920677299, "grad_norm": 0.337890625, "learning_rate": 1.2711322892181535e-05, "loss": 1.9945, "step": 17482 }, { "epoch": 0.5640689559215263, "grad_norm": 0.33984375, "learning_rate": 1.270976759710369e-05, "loss": 1.9663, "step": 17483 }, { "epoch": 0.5641012197753226, "grad_norm": 0.333984375, "learning_rate": 1.2708212327235083e-05, "loss": 1.9856, "step": 17484 }, { "epoch": 0.564133483629119, "grad_norm": 0.33203125, "learning_rate": 1.2706657082592826e-05, "loss": 1.9498, "step": 17485 }, { "epoch": 0.5641657474829153, "grad_norm": 0.326171875, "learning_rate": 1.2705101863194045e-05, "loss": 1.9916, "step": 17486 }, { "epoch": 0.5641980113367117, "grad_norm": 0.33984375, "learning_rate": 1.2703546669055859e-05, "loss": 1.9621, "step": 17487 }, { "epoch": 0.564230275190508, "grad_norm": 0.33203125, "learning_rate": 1.2701991500195378e-05, "loss": 1.9997, "step": 17488 }, { "epoch": 0.5642625390443043, "grad_norm": 0.322265625, "learning_rate": 1.2700436356629729e-05, "loss": 1.9582, "step": 17489 }, { "epoch": 0.5642948028981006, "grad_norm": 0.330078125, "learning_rate": 1.269888123837603e-05, "loss": 1.9731, "step": 17490 }, { "epoch": 0.564327066751897, "grad_norm": 0.33984375, "learning_rate": 1.269732614545139e-05, "loss": 1.9963, "step": 17491 }, { "epoch": 0.5643593306056933, "grad_norm": 0.361328125, "learning_rate": 1.2695771077872933e-05, "loss": 1.9742, "step": 17492 }, { "epoch": 0.5643915944594897, "grad_norm": 0.326171875, "learning_rate": 1.2694216035657778e-05, "loss": 1.9768, "step": 17493 }, { "epoch": 0.564423858313286, "grad_norm": 0.345703125, "learning_rate": 1.2692661018823037e-05, "loss": 1.9731, "step": 17494 }, { "epoch": 0.5644561221670824, "grad_norm": 0.33203125, "learning_rate": 1.2691106027385826e-05, "loss": 1.9892, "step": 17495 }, { "epoch": 0.5644883860208787, "grad_norm": 0.337890625, "learning_rate": 1.268955106136327e-05, "loss": 1.9832, "step": 17496 }, { "epoch": 0.5645206498746751, "grad_norm": 0.322265625, "learning_rate": 1.268799612077247e-05, "loss": 1.9822, "step": 17497 }, { "epoch": 0.5645529137284715, "grad_norm": 0.3359375, "learning_rate": 1.2686441205630553e-05, "loss": 1.9425, "step": 17498 }, { "epoch": 0.5645851775822678, "grad_norm": 0.337890625, "learning_rate": 1.268488631595463e-05, "loss": 1.9935, "step": 17499 }, { "epoch": 0.5646174414360642, "grad_norm": 0.328125, "learning_rate": 1.2683331451761825e-05, "loss": 1.9593, "step": 17500 }, { "epoch": 0.5646497052898605, "grad_norm": 0.330078125, "learning_rate": 1.2681776613069237e-05, "loss": 1.9625, "step": 17501 }, { "epoch": 0.5646819691436569, "grad_norm": 0.322265625, "learning_rate": 1.2680221799893992e-05, "loss": 1.9905, "step": 17502 }, { "epoch": 0.5647142329974532, "grad_norm": 0.337890625, "learning_rate": 1.2678667012253207e-05, "loss": 1.9846, "step": 17503 }, { "epoch": 0.5647464968512496, "grad_norm": 0.337890625, "learning_rate": 1.267711225016398e-05, "loss": 1.9673, "step": 17504 }, { "epoch": 0.5647787607050458, "grad_norm": 0.33203125, "learning_rate": 1.2675557513643439e-05, "loss": 1.9519, "step": 17505 }, { "epoch": 0.5648110245588422, "grad_norm": 0.337890625, "learning_rate": 1.26740028027087e-05, "loss": 1.9977, "step": 17506 }, { "epoch": 0.5648432884126385, "grad_norm": 0.349609375, "learning_rate": 1.2672448117376863e-05, "loss": 1.9749, "step": 17507 }, { "epoch": 0.5648755522664349, "grad_norm": 0.328125, "learning_rate": 1.2670893457665042e-05, "loss": 1.9688, "step": 17508 }, { "epoch": 0.5649078161202312, "grad_norm": 0.33203125, "learning_rate": 1.2669338823590368e-05, "loss": 1.9722, "step": 17509 }, { "epoch": 0.5649400799740276, "grad_norm": 0.33203125, "learning_rate": 1.2667784215169932e-05, "loss": 1.973, "step": 17510 }, { "epoch": 0.5649723438278239, "grad_norm": 0.34765625, "learning_rate": 1.2666229632420855e-05, "loss": 1.9497, "step": 17511 }, { "epoch": 0.5650046076816203, "grad_norm": 0.345703125, "learning_rate": 1.266467507536025e-05, "loss": 1.95, "step": 17512 }, { "epoch": 0.5650368715354166, "grad_norm": 0.36328125, "learning_rate": 1.2663120544005222e-05, "loss": 1.9602, "step": 17513 }, { "epoch": 0.565069135389213, "grad_norm": 0.35546875, "learning_rate": 1.2661566038372887e-05, "loss": 1.9859, "step": 17514 }, { "epoch": 0.5651013992430093, "grad_norm": 0.345703125, "learning_rate": 1.2660011558480361e-05, "loss": 1.9654, "step": 17515 }, { "epoch": 0.5651336630968057, "grad_norm": 0.361328125, "learning_rate": 1.2658457104344745e-05, "loss": 1.9798, "step": 17516 }, { "epoch": 0.565165926950602, "grad_norm": 0.33203125, "learning_rate": 1.2656902675983152e-05, "loss": 1.9935, "step": 17517 }, { "epoch": 0.5651981908043984, "grad_norm": 0.341796875, "learning_rate": 1.26553482734127e-05, "loss": 1.9772, "step": 17518 }, { "epoch": 0.5652304546581948, "grad_norm": 0.3359375, "learning_rate": 1.2653793896650485e-05, "loss": 1.9611, "step": 17519 }, { "epoch": 0.5652627185119911, "grad_norm": 0.3671875, "learning_rate": 1.2652239545713626e-05, "loss": 1.9731, "step": 17520 }, { "epoch": 0.5652949823657875, "grad_norm": 0.341796875, "learning_rate": 1.2650685220619233e-05, "loss": 1.9373, "step": 17521 }, { "epoch": 0.5653272462195837, "grad_norm": 0.380859375, "learning_rate": 1.2649130921384406e-05, "loss": 1.9787, "step": 17522 }, { "epoch": 0.5653595100733801, "grad_norm": 0.35546875, "learning_rate": 1.2647576648026263e-05, "loss": 1.9716, "step": 17523 }, { "epoch": 0.5653917739271764, "grad_norm": 0.35546875, "learning_rate": 1.2646022400561909e-05, "loss": 1.9712, "step": 17524 }, { "epoch": 0.5654240377809728, "grad_norm": 0.359375, "learning_rate": 1.264446817900845e-05, "loss": 1.9888, "step": 17525 }, { "epoch": 0.5654563016347691, "grad_norm": 0.337890625, "learning_rate": 1.2642913983382995e-05, "loss": 1.9779, "step": 17526 }, { "epoch": 0.5654885654885655, "grad_norm": 0.359375, "learning_rate": 1.2641359813702658e-05, "loss": 1.9798, "step": 17527 }, { "epoch": 0.5655208293423618, "grad_norm": 0.337890625, "learning_rate": 1.263980566998453e-05, "loss": 1.9576, "step": 17528 }, { "epoch": 0.5655530931961582, "grad_norm": 0.33984375, "learning_rate": 1.2638251552245732e-05, "loss": 1.9645, "step": 17529 }, { "epoch": 0.5655853570499545, "grad_norm": 0.359375, "learning_rate": 1.2636697460503366e-05, "loss": 1.9929, "step": 17530 }, { "epoch": 0.5656176209037509, "grad_norm": 0.33203125, "learning_rate": 1.2635143394774543e-05, "loss": 1.9478, "step": 17531 }, { "epoch": 0.5656498847575472, "grad_norm": 0.345703125, "learning_rate": 1.2633589355076363e-05, "loss": 1.9745, "step": 17532 }, { "epoch": 0.5656821486113436, "grad_norm": 0.33984375, "learning_rate": 1.2632035341425931e-05, "loss": 1.9511, "step": 17533 }, { "epoch": 0.5657144124651399, "grad_norm": 0.3359375, "learning_rate": 1.2630481353840362e-05, "loss": 1.9914, "step": 17534 }, { "epoch": 0.5657466763189363, "grad_norm": 0.35546875, "learning_rate": 1.2628927392336746e-05, "loss": 1.9681, "step": 17535 }, { "epoch": 0.5657789401727326, "grad_norm": 0.333984375, "learning_rate": 1.26273734569322e-05, "loss": 1.9848, "step": 17536 }, { "epoch": 0.565811204026529, "grad_norm": 0.337890625, "learning_rate": 1.262581954764383e-05, "loss": 1.9663, "step": 17537 }, { "epoch": 0.5658434678803252, "grad_norm": 0.333984375, "learning_rate": 1.2624265664488729e-05, "loss": 1.9597, "step": 17538 }, { "epoch": 0.5658757317341216, "grad_norm": 0.328125, "learning_rate": 1.2622711807484002e-05, "loss": 1.9599, "step": 17539 }, { "epoch": 0.565907995587918, "grad_norm": 0.33984375, "learning_rate": 1.262115797664677e-05, "loss": 1.9556, "step": 17540 }, { "epoch": 0.5659402594417143, "grad_norm": 0.328125, "learning_rate": 1.2619604171994119e-05, "loss": 1.9688, "step": 17541 }, { "epoch": 0.5659725232955107, "grad_norm": 0.326171875, "learning_rate": 1.2618050393543154e-05, "loss": 1.953, "step": 17542 }, { "epoch": 0.566004787149307, "grad_norm": 0.33203125, "learning_rate": 1.2616496641310988e-05, "loss": 1.9399, "step": 17543 }, { "epoch": 0.5660370510031034, "grad_norm": 0.333984375, "learning_rate": 1.2614942915314713e-05, "loss": 1.9821, "step": 17544 }, { "epoch": 0.5660693148568997, "grad_norm": 0.326171875, "learning_rate": 1.2613389215571434e-05, "loss": 1.9896, "step": 17545 }, { "epoch": 0.5661015787106961, "grad_norm": 0.333984375, "learning_rate": 1.261183554209826e-05, "loss": 1.9379, "step": 17546 }, { "epoch": 0.5661338425644924, "grad_norm": 0.326171875, "learning_rate": 1.2610281894912282e-05, "loss": 1.9799, "step": 17547 }, { "epoch": 0.5661661064182888, "grad_norm": 0.3359375, "learning_rate": 1.2608728274030607e-05, "loss": 1.9981, "step": 17548 }, { "epoch": 0.5661983702720851, "grad_norm": 0.34375, "learning_rate": 1.2607174679470339e-05, "loss": 1.9938, "step": 17549 }, { "epoch": 0.5662306341258815, "grad_norm": 0.330078125, "learning_rate": 1.2605621111248573e-05, "loss": 2.0004, "step": 17550 }, { "epoch": 0.5662628979796778, "grad_norm": 0.353515625, "learning_rate": 1.260406756938241e-05, "loss": 1.9818, "step": 17551 }, { "epoch": 0.5662951618334742, "grad_norm": 0.326171875, "learning_rate": 1.2602514053888958e-05, "loss": 1.9781, "step": 17552 }, { "epoch": 0.5663274256872705, "grad_norm": 0.322265625, "learning_rate": 1.2600960564785305e-05, "loss": 1.9323, "step": 17553 }, { "epoch": 0.5663596895410669, "grad_norm": 0.357421875, "learning_rate": 1.259940710208856e-05, "loss": 1.9833, "step": 17554 }, { "epoch": 0.5663919533948631, "grad_norm": 0.333984375, "learning_rate": 1.2597853665815822e-05, "loss": 1.9423, "step": 17555 }, { "epoch": 0.5664242172486595, "grad_norm": 0.33203125, "learning_rate": 1.2596300255984185e-05, "loss": 1.9901, "step": 17556 }, { "epoch": 0.5664564811024558, "grad_norm": 0.326171875, "learning_rate": 1.2594746872610748e-05, "loss": 1.9625, "step": 17557 }, { "epoch": 0.5664887449562522, "grad_norm": 0.35546875, "learning_rate": 1.259319351571262e-05, "loss": 2.0103, "step": 17558 }, { "epoch": 0.5665210088100486, "grad_norm": 0.3359375, "learning_rate": 1.2591640185306881e-05, "loss": 1.952, "step": 17559 }, { "epoch": 0.5665532726638449, "grad_norm": 0.33203125, "learning_rate": 1.2590086881410642e-05, "loss": 1.9828, "step": 17560 }, { "epoch": 0.5665855365176413, "grad_norm": 0.337890625, "learning_rate": 1.2588533604041006e-05, "loss": 1.9606, "step": 17561 }, { "epoch": 0.5666178003714376, "grad_norm": 0.337890625, "learning_rate": 1.2586980353215053e-05, "loss": 2.0001, "step": 17562 }, { "epoch": 0.566650064225234, "grad_norm": 0.33203125, "learning_rate": 1.2585427128949889e-05, "loss": 1.9423, "step": 17563 }, { "epoch": 0.5666823280790303, "grad_norm": 0.3515625, "learning_rate": 1.2583873931262613e-05, "loss": 1.9949, "step": 17564 }, { "epoch": 0.5667145919328267, "grad_norm": 0.333984375, "learning_rate": 1.2582320760170324e-05, "loss": 1.9794, "step": 17565 }, { "epoch": 0.566746855786623, "grad_norm": 0.33203125, "learning_rate": 1.2580767615690106e-05, "loss": 1.9857, "step": 17566 }, { "epoch": 0.5667791196404194, "grad_norm": 0.34765625, "learning_rate": 1.2579214497839064e-05, "loss": 1.9747, "step": 17567 }, { "epoch": 0.5668113834942157, "grad_norm": 0.328125, "learning_rate": 1.25776614066343e-05, "loss": 1.9314, "step": 17568 }, { "epoch": 0.5668436473480121, "grad_norm": 0.345703125, "learning_rate": 1.2576108342092894e-05, "loss": 1.9497, "step": 17569 }, { "epoch": 0.5668759112018084, "grad_norm": 0.330078125, "learning_rate": 1.2574555304231946e-05, "loss": 1.9834, "step": 17570 }, { "epoch": 0.5669081750556048, "grad_norm": 0.357421875, "learning_rate": 1.2573002293068565e-05, "loss": 1.9909, "step": 17571 }, { "epoch": 0.566940438909401, "grad_norm": 0.33984375, "learning_rate": 1.2571449308619824e-05, "loss": 1.9768, "step": 17572 }, { "epoch": 0.5669727027631974, "grad_norm": 0.34375, "learning_rate": 1.2569896350902828e-05, "loss": 2.0002, "step": 17573 }, { "epoch": 0.5670049666169937, "grad_norm": 0.341796875, "learning_rate": 1.2568343419934673e-05, "loss": 1.9581, "step": 17574 }, { "epoch": 0.5670372304707901, "grad_norm": 0.349609375, "learning_rate": 1.2566790515732446e-05, "loss": 1.9716, "step": 17575 }, { "epoch": 0.5670694943245864, "grad_norm": 0.34375, "learning_rate": 1.2565237638313242e-05, "loss": 1.9701, "step": 17576 }, { "epoch": 0.5671017581783828, "grad_norm": 0.33984375, "learning_rate": 1.256368478769416e-05, "loss": 1.97, "step": 17577 }, { "epoch": 0.5671340220321791, "grad_norm": 0.34375, "learning_rate": 1.2562131963892284e-05, "loss": 1.9493, "step": 17578 }, { "epoch": 0.5671662858859755, "grad_norm": 0.333984375, "learning_rate": 1.2560579166924712e-05, "loss": 1.9608, "step": 17579 }, { "epoch": 0.5671985497397719, "grad_norm": 0.341796875, "learning_rate": 1.2559026396808534e-05, "loss": 1.9787, "step": 17580 }, { "epoch": 0.5672308135935682, "grad_norm": 0.337890625, "learning_rate": 1.2557473653560845e-05, "loss": 1.9877, "step": 17581 }, { "epoch": 0.5672630774473646, "grad_norm": 0.333984375, "learning_rate": 1.255592093719873e-05, "loss": 1.9877, "step": 17582 }, { "epoch": 0.5672953413011609, "grad_norm": 0.328125, "learning_rate": 1.2554368247739287e-05, "loss": 1.9958, "step": 17583 }, { "epoch": 0.5673276051549573, "grad_norm": 0.3515625, "learning_rate": 1.2552815585199602e-05, "loss": 1.9926, "step": 17584 }, { "epoch": 0.5673598690087536, "grad_norm": 0.328125, "learning_rate": 1.2551262949596766e-05, "loss": 1.9022, "step": 17585 }, { "epoch": 0.56739213286255, "grad_norm": 0.326171875, "learning_rate": 1.2549710340947875e-05, "loss": 1.969, "step": 17586 }, { "epoch": 0.5674243967163463, "grad_norm": 0.369140625, "learning_rate": 1.2548157759270012e-05, "loss": 1.9725, "step": 17587 }, { "epoch": 0.5674566605701427, "grad_norm": 0.328125, "learning_rate": 1.2546605204580266e-05, "loss": 1.9174, "step": 17588 }, { "epoch": 0.567488924423939, "grad_norm": 0.33984375, "learning_rate": 1.254505267689574e-05, "loss": 1.9644, "step": 17589 }, { "epoch": 0.5675211882777353, "grad_norm": 0.326171875, "learning_rate": 1.2543500176233502e-05, "loss": 1.9734, "step": 17590 }, { "epoch": 0.5675534521315316, "grad_norm": 0.330078125, "learning_rate": 1.2541947702610656e-05, "loss": 1.9987, "step": 17591 }, { "epoch": 0.567585715985328, "grad_norm": 0.328125, "learning_rate": 1.2540395256044291e-05, "loss": 1.9566, "step": 17592 }, { "epoch": 0.5676179798391243, "grad_norm": 0.33203125, "learning_rate": 1.2538842836551485e-05, "loss": 1.9659, "step": 17593 }, { "epoch": 0.5676502436929207, "grad_norm": 0.3203125, "learning_rate": 1.2537290444149328e-05, "loss": 1.9681, "step": 17594 }, { "epoch": 0.567682507546717, "grad_norm": 0.33984375, "learning_rate": 1.253573807885492e-05, "loss": 1.9657, "step": 17595 }, { "epoch": 0.5677147714005134, "grad_norm": 0.337890625, "learning_rate": 1.2534185740685335e-05, "loss": 1.9782, "step": 17596 }, { "epoch": 0.5677470352543097, "grad_norm": 0.33203125, "learning_rate": 1.2532633429657664e-05, "loss": 1.9575, "step": 17597 }, { "epoch": 0.5677792991081061, "grad_norm": 0.32421875, "learning_rate": 1.2531081145788989e-05, "loss": 1.9612, "step": 17598 }, { "epoch": 0.5678115629619024, "grad_norm": 0.326171875, "learning_rate": 1.2529528889096413e-05, "loss": 1.9671, "step": 17599 }, { "epoch": 0.5678438268156988, "grad_norm": 0.328125, "learning_rate": 1.2527976659597003e-05, "loss": 1.9911, "step": 17600 }, { "epoch": 0.5678760906694952, "grad_norm": 0.33984375, "learning_rate": 1.252642445730785e-05, "loss": 1.9978, "step": 17601 }, { "epoch": 0.5679083545232915, "grad_norm": 0.4921875, "learning_rate": 1.2524872282246051e-05, "loss": 1.9728, "step": 17602 }, { "epoch": 0.5679406183770879, "grad_norm": 0.34375, "learning_rate": 1.2523320134428676e-05, "loss": 1.9778, "step": 17603 }, { "epoch": 0.5679728822308842, "grad_norm": 0.333984375, "learning_rate": 1.252176801387282e-05, "loss": 1.981, "step": 17604 }, { "epoch": 0.5680051460846806, "grad_norm": 0.345703125, "learning_rate": 1.2520215920595562e-05, "loss": 1.9633, "step": 17605 }, { "epoch": 0.5680374099384768, "grad_norm": 0.3515625, "learning_rate": 1.2518663854613987e-05, "loss": 1.9622, "step": 17606 }, { "epoch": 0.5680696737922732, "grad_norm": 0.3359375, "learning_rate": 1.2517111815945179e-05, "loss": 1.9867, "step": 17607 }, { "epoch": 0.5681019376460695, "grad_norm": 0.326171875, "learning_rate": 1.2515559804606226e-05, "loss": 1.98, "step": 17608 }, { "epoch": 0.5681342014998659, "grad_norm": 0.333984375, "learning_rate": 1.2514007820614206e-05, "loss": 1.9607, "step": 17609 }, { "epoch": 0.5681664653536622, "grad_norm": 0.34375, "learning_rate": 1.2512455863986204e-05, "loss": 1.9971, "step": 17610 }, { "epoch": 0.5681987292074586, "grad_norm": 0.326171875, "learning_rate": 1.2510903934739308e-05, "loss": 1.9666, "step": 17611 }, { "epoch": 0.5682309930612549, "grad_norm": 0.345703125, "learning_rate": 1.2509352032890592e-05, "loss": 1.9977, "step": 17612 }, { "epoch": 0.5682632569150513, "grad_norm": 0.33203125, "learning_rate": 1.2507800158457141e-05, "loss": 1.9733, "step": 17613 }, { "epoch": 0.5682955207688476, "grad_norm": 0.34375, "learning_rate": 1.2506248311456043e-05, "loss": 1.9344, "step": 17614 }, { "epoch": 0.568327784622644, "grad_norm": 0.34375, "learning_rate": 1.2504696491904369e-05, "loss": 1.9642, "step": 17615 }, { "epoch": 0.5683600484764403, "grad_norm": 0.349609375, "learning_rate": 1.2503144699819206e-05, "loss": 1.9745, "step": 17616 }, { "epoch": 0.5683923123302367, "grad_norm": 0.34765625, "learning_rate": 1.2501592935217641e-05, "loss": 1.9736, "step": 17617 }, { "epoch": 0.568424576184033, "grad_norm": 0.36328125, "learning_rate": 1.2500041198116743e-05, "loss": 2.0041, "step": 17618 }, { "epoch": 0.5684568400378294, "grad_norm": 0.33203125, "learning_rate": 1.2498489488533598e-05, "loss": 1.965, "step": 17619 }, { "epoch": 0.5684891038916258, "grad_norm": 0.33984375, "learning_rate": 1.2496937806485294e-05, "loss": 1.9796, "step": 17620 }, { "epoch": 0.5685213677454221, "grad_norm": 0.361328125, "learning_rate": 1.2495386151988892e-05, "loss": 1.9955, "step": 17621 }, { "epoch": 0.5685536315992185, "grad_norm": 0.33203125, "learning_rate": 1.2493834525061487e-05, "loss": 1.972, "step": 17622 }, { "epoch": 0.5685858954530147, "grad_norm": 0.353515625, "learning_rate": 1.2492282925720158e-05, "loss": 1.9871, "step": 17623 }, { "epoch": 0.5686181593068111, "grad_norm": 0.35546875, "learning_rate": 1.2490731353981976e-05, "loss": 1.9746, "step": 17624 }, { "epoch": 0.5686504231606074, "grad_norm": 0.345703125, "learning_rate": 1.2489179809864017e-05, "loss": 1.9548, "step": 17625 }, { "epoch": 0.5686826870144038, "grad_norm": 0.341796875, "learning_rate": 1.2487628293383377e-05, "loss": 1.9716, "step": 17626 }, { "epoch": 0.5687149508682001, "grad_norm": 0.34765625, "learning_rate": 1.2486076804557115e-05, "loss": 1.9818, "step": 17627 }, { "epoch": 0.5687472147219965, "grad_norm": 0.34375, "learning_rate": 1.2484525343402316e-05, "loss": 1.9878, "step": 17628 }, { "epoch": 0.5687794785757928, "grad_norm": 0.33203125, "learning_rate": 1.2482973909936055e-05, "loss": 2.0185, "step": 17629 }, { "epoch": 0.5688117424295892, "grad_norm": 0.34375, "learning_rate": 1.248142250417542e-05, "loss": 1.9965, "step": 17630 }, { "epoch": 0.5688440062833855, "grad_norm": 0.3359375, "learning_rate": 1.2479871126137473e-05, "loss": 1.9667, "step": 17631 }, { "epoch": 0.5688762701371819, "grad_norm": 0.341796875, "learning_rate": 1.2478319775839297e-05, "loss": 1.9658, "step": 17632 }, { "epoch": 0.5689085339909782, "grad_norm": 0.330078125, "learning_rate": 1.2476768453297972e-05, "loss": 1.9771, "step": 17633 }, { "epoch": 0.5689407978447746, "grad_norm": 0.322265625, "learning_rate": 1.2475217158530566e-05, "loss": 1.9689, "step": 17634 }, { "epoch": 0.5689730616985709, "grad_norm": 0.337890625, "learning_rate": 1.2473665891554157e-05, "loss": 1.984, "step": 17635 }, { "epoch": 0.5690053255523673, "grad_norm": 0.3359375, "learning_rate": 1.2472114652385827e-05, "loss": 1.9825, "step": 17636 }, { "epoch": 0.5690375894061636, "grad_norm": 0.33203125, "learning_rate": 1.247056344104264e-05, "loss": 1.979, "step": 17637 }, { "epoch": 0.56906985325996, "grad_norm": 0.341796875, "learning_rate": 1.2469012257541675e-05, "loss": 1.9614, "step": 17638 }, { "epoch": 0.5691021171137562, "grad_norm": 0.33984375, "learning_rate": 1.2467461101900014e-05, "loss": 1.9816, "step": 17639 }, { "epoch": 0.5691343809675526, "grad_norm": 0.322265625, "learning_rate": 1.246590997413472e-05, "loss": 1.9635, "step": 17640 }, { "epoch": 0.569166644821349, "grad_norm": 0.333984375, "learning_rate": 1.246435887426287e-05, "loss": 1.9673, "step": 17641 }, { "epoch": 0.5691989086751453, "grad_norm": 0.337890625, "learning_rate": 1.2462807802301542e-05, "loss": 1.9646, "step": 17642 }, { "epoch": 0.5692311725289417, "grad_norm": 0.326171875, "learning_rate": 1.2461256758267804e-05, "loss": 1.9498, "step": 17643 }, { "epoch": 0.569263436382738, "grad_norm": 0.3359375, "learning_rate": 1.2459705742178728e-05, "loss": 1.9996, "step": 17644 }, { "epoch": 0.5692957002365344, "grad_norm": 0.333984375, "learning_rate": 1.2458154754051392e-05, "loss": 1.9767, "step": 17645 }, { "epoch": 0.5693279640903307, "grad_norm": 0.349609375, "learning_rate": 1.2456603793902863e-05, "loss": 1.959, "step": 17646 }, { "epoch": 0.5693602279441271, "grad_norm": 0.333984375, "learning_rate": 1.2455052861750215e-05, "loss": 1.9582, "step": 17647 }, { "epoch": 0.5693924917979234, "grad_norm": 0.3203125, "learning_rate": 1.2453501957610525e-05, "loss": 1.9589, "step": 17648 }, { "epoch": 0.5694247556517198, "grad_norm": 0.337890625, "learning_rate": 1.245195108150085e-05, "loss": 1.9734, "step": 17649 }, { "epoch": 0.5694570195055161, "grad_norm": 0.341796875, "learning_rate": 1.245040023343827e-05, "loss": 1.9873, "step": 17650 }, { "epoch": 0.5694892833593125, "grad_norm": 0.318359375, "learning_rate": 1.2448849413439866e-05, "loss": 1.9547, "step": 17651 }, { "epoch": 0.5695215472131088, "grad_norm": 0.33984375, "learning_rate": 1.2447298621522685e-05, "loss": 1.9828, "step": 17652 }, { "epoch": 0.5695538110669052, "grad_norm": 0.345703125, "learning_rate": 1.2445747857703813e-05, "loss": 1.977, "step": 17653 }, { "epoch": 0.5695860749207015, "grad_norm": 0.326171875, "learning_rate": 1.244419712200032e-05, "loss": 1.982, "step": 17654 }, { "epoch": 0.5696183387744979, "grad_norm": 0.326171875, "learning_rate": 1.244264641442927e-05, "loss": 1.9632, "step": 17655 }, { "epoch": 0.5696506026282941, "grad_norm": 0.333984375, "learning_rate": 1.2441095735007729e-05, "loss": 1.9515, "step": 17656 }, { "epoch": 0.5696828664820905, "grad_norm": 0.333984375, "learning_rate": 1.2439545083752778e-05, "loss": 1.9893, "step": 17657 }, { "epoch": 0.5697151303358868, "grad_norm": 0.330078125, "learning_rate": 1.2437994460681473e-05, "loss": 1.9729, "step": 17658 }, { "epoch": 0.5697473941896832, "grad_norm": 0.333984375, "learning_rate": 1.2436443865810886e-05, "loss": 1.9642, "step": 17659 }, { "epoch": 0.5697796580434796, "grad_norm": 0.3359375, "learning_rate": 1.243489329915809e-05, "loss": 1.992, "step": 17660 }, { "epoch": 0.5698119218972759, "grad_norm": 0.337890625, "learning_rate": 1.2433342760740146e-05, "loss": 2.0019, "step": 17661 }, { "epoch": 0.5698441857510723, "grad_norm": 0.330078125, "learning_rate": 1.2431792250574121e-05, "loss": 1.9476, "step": 17662 }, { "epoch": 0.5698764496048686, "grad_norm": 0.3203125, "learning_rate": 1.2430241768677088e-05, "loss": 1.9714, "step": 17663 }, { "epoch": 0.569908713458665, "grad_norm": 0.326171875, "learning_rate": 1.242869131506611e-05, "loss": 1.9824, "step": 17664 }, { "epoch": 0.5699409773124613, "grad_norm": 0.34375, "learning_rate": 1.242714088975825e-05, "loss": 1.9532, "step": 17665 }, { "epoch": 0.5699732411662577, "grad_norm": 0.373046875, "learning_rate": 1.2425590492770581e-05, "loss": 1.9362, "step": 17666 }, { "epoch": 0.570005505020054, "grad_norm": 0.333984375, "learning_rate": 1.2424040124120164e-05, "loss": 2.007, "step": 17667 }, { "epoch": 0.5700377688738504, "grad_norm": 0.34765625, "learning_rate": 1.2422489783824066e-05, "loss": 1.9723, "step": 17668 }, { "epoch": 0.5700700327276467, "grad_norm": 0.333984375, "learning_rate": 1.2420939471899348e-05, "loss": 1.9392, "step": 17669 }, { "epoch": 0.5701022965814431, "grad_norm": 0.328125, "learning_rate": 1.2419389188363083e-05, "loss": 1.9926, "step": 17670 }, { "epoch": 0.5701345604352394, "grad_norm": 0.33203125, "learning_rate": 1.2417838933232326e-05, "loss": 1.9577, "step": 17671 }, { "epoch": 0.5701668242890358, "grad_norm": 0.3359375, "learning_rate": 1.2416288706524148e-05, "loss": 1.9753, "step": 17672 }, { "epoch": 0.570199088142832, "grad_norm": 0.341796875, "learning_rate": 1.241473850825561e-05, "loss": 1.9178, "step": 17673 }, { "epoch": 0.5702313519966284, "grad_norm": 0.33203125, "learning_rate": 1.2413188338443776e-05, "loss": 1.9852, "step": 17674 }, { "epoch": 0.5702636158504247, "grad_norm": 0.345703125, "learning_rate": 1.2411638197105707e-05, "loss": 1.956, "step": 17675 }, { "epoch": 0.5702958797042211, "grad_norm": 0.33984375, "learning_rate": 1.2410088084258472e-05, "loss": 1.9507, "step": 17676 }, { "epoch": 0.5703281435580174, "grad_norm": 0.3359375, "learning_rate": 1.2408537999919127e-05, "loss": 1.9992, "step": 17677 }, { "epoch": 0.5703604074118138, "grad_norm": 0.333984375, "learning_rate": 1.2406987944104735e-05, "loss": 1.9991, "step": 17678 }, { "epoch": 0.5703926712656101, "grad_norm": 0.34765625, "learning_rate": 1.2405437916832366e-05, "loss": 1.9866, "step": 17679 }, { "epoch": 0.5704249351194065, "grad_norm": 0.328125, "learning_rate": 1.2403887918119067e-05, "loss": 1.997, "step": 17680 }, { "epoch": 0.5704571989732029, "grad_norm": 0.357421875, "learning_rate": 1.2402337947981908e-05, "loss": 1.9906, "step": 17681 }, { "epoch": 0.5704894628269992, "grad_norm": 0.328125, "learning_rate": 1.2400788006437959e-05, "loss": 1.9848, "step": 17682 }, { "epoch": 0.5705217266807956, "grad_norm": 0.330078125, "learning_rate": 1.2399238093504259e-05, "loss": 2.0071, "step": 17683 }, { "epoch": 0.5705539905345919, "grad_norm": 0.359375, "learning_rate": 1.2397688209197884e-05, "loss": 1.9686, "step": 17684 }, { "epoch": 0.5705862543883883, "grad_norm": 0.32421875, "learning_rate": 1.2396138353535896e-05, "loss": 2.0108, "step": 17685 }, { "epoch": 0.5706185182421846, "grad_norm": 0.328125, "learning_rate": 1.2394588526535345e-05, "loss": 1.9999, "step": 17686 }, { "epoch": 0.570650782095981, "grad_norm": 0.34375, "learning_rate": 1.2393038728213288e-05, "loss": 1.9841, "step": 17687 }, { "epoch": 0.5706830459497773, "grad_norm": 0.341796875, "learning_rate": 1.2391488958586802e-05, "loss": 1.9707, "step": 17688 }, { "epoch": 0.5707153098035737, "grad_norm": 0.353515625, "learning_rate": 1.2389939217672929e-05, "loss": 2.0304, "step": 17689 }, { "epoch": 0.5707475736573699, "grad_norm": 0.33203125, "learning_rate": 1.2388389505488729e-05, "loss": 1.9894, "step": 17690 }, { "epoch": 0.5707798375111663, "grad_norm": 0.34765625, "learning_rate": 1.238683982205127e-05, "loss": 1.9747, "step": 17691 }, { "epoch": 0.5708121013649626, "grad_norm": 0.33203125, "learning_rate": 1.2385290167377599e-05, "loss": 1.9495, "step": 17692 }, { "epoch": 0.570844365218759, "grad_norm": 0.333984375, "learning_rate": 1.2383740541484779e-05, "loss": 1.9644, "step": 17693 }, { "epoch": 0.5708766290725553, "grad_norm": 0.34765625, "learning_rate": 1.2382190944389868e-05, "loss": 1.9908, "step": 17694 }, { "epoch": 0.5709088929263517, "grad_norm": 0.3359375, "learning_rate": 1.238064137610992e-05, "loss": 1.9783, "step": 17695 }, { "epoch": 0.570941156780148, "grad_norm": 0.345703125, "learning_rate": 1.2379091836661991e-05, "loss": 1.9558, "step": 17696 }, { "epoch": 0.5709734206339444, "grad_norm": 0.34375, "learning_rate": 1.2377542326063139e-05, "loss": 1.9852, "step": 17697 }, { "epoch": 0.5710056844877407, "grad_norm": 0.33203125, "learning_rate": 1.2375992844330422e-05, "loss": 2.0181, "step": 17698 }, { "epoch": 0.5710379483415371, "grad_norm": 0.34375, "learning_rate": 1.2374443391480892e-05, "loss": 1.9843, "step": 17699 }, { "epoch": 0.5710702121953334, "grad_norm": 0.345703125, "learning_rate": 1.2372893967531604e-05, "loss": 1.9798, "step": 17700 }, { "epoch": 0.5711024760491298, "grad_norm": 0.341796875, "learning_rate": 1.237134457249962e-05, "loss": 1.9604, "step": 17701 }, { "epoch": 0.5711347399029262, "grad_norm": 0.353515625, "learning_rate": 1.2369795206401984e-05, "loss": 1.9866, "step": 17702 }, { "epoch": 0.5711670037567225, "grad_norm": 0.34765625, "learning_rate": 1.2368245869255756e-05, "loss": 1.9852, "step": 17703 }, { "epoch": 0.5711992676105189, "grad_norm": 0.322265625, "learning_rate": 1.236669656107799e-05, "loss": 1.9678, "step": 17704 }, { "epoch": 0.5712315314643152, "grad_norm": 0.330078125, "learning_rate": 1.2365147281885741e-05, "loss": 1.9965, "step": 17705 }, { "epoch": 0.5712637953181116, "grad_norm": 0.337890625, "learning_rate": 1.2363598031696058e-05, "loss": 1.9555, "step": 17706 }, { "epoch": 0.5712960591719078, "grad_norm": 0.330078125, "learning_rate": 1.2362048810526e-05, "loss": 1.9477, "step": 17707 }, { "epoch": 0.5713283230257042, "grad_norm": 0.337890625, "learning_rate": 1.2360499618392612e-05, "loss": 1.9969, "step": 17708 }, { "epoch": 0.5713605868795005, "grad_norm": 0.359375, "learning_rate": 1.2358950455312952e-05, "loss": 1.9999, "step": 17709 }, { "epoch": 0.5713928507332969, "grad_norm": 0.337890625, "learning_rate": 1.2357401321304077e-05, "loss": 1.9873, "step": 17710 }, { "epoch": 0.5714251145870932, "grad_norm": 0.330078125, "learning_rate": 1.2355852216383022e-05, "loss": 1.9599, "step": 17711 }, { "epoch": 0.5714573784408896, "grad_norm": 0.349609375, "learning_rate": 1.2354303140566852e-05, "loss": 1.9843, "step": 17712 }, { "epoch": 0.5714896422946859, "grad_norm": 0.333984375, "learning_rate": 1.2352754093872622e-05, "loss": 1.9763, "step": 17713 }, { "epoch": 0.5715219061484823, "grad_norm": 0.345703125, "learning_rate": 1.235120507631737e-05, "loss": 1.9844, "step": 17714 }, { "epoch": 0.5715541700022786, "grad_norm": 0.359375, "learning_rate": 1.2349656087918147e-05, "loss": 1.9822, "step": 17715 }, { "epoch": 0.571586433856075, "grad_norm": 0.330078125, "learning_rate": 1.234810712869202e-05, "loss": 2.003, "step": 17716 }, { "epoch": 0.5716186977098713, "grad_norm": 0.3359375, "learning_rate": 1.2346558198656021e-05, "loss": 1.9899, "step": 17717 }, { "epoch": 0.5716509615636677, "grad_norm": 0.330078125, "learning_rate": 1.2345009297827202e-05, "loss": 1.9851, "step": 17718 }, { "epoch": 0.571683225417464, "grad_norm": 0.33984375, "learning_rate": 1.2343460426222626e-05, "loss": 1.9839, "step": 17719 }, { "epoch": 0.5717154892712604, "grad_norm": 0.333984375, "learning_rate": 1.2341911583859322e-05, "loss": 1.902, "step": 17720 }, { "epoch": 0.5717477531250568, "grad_norm": 0.326171875, "learning_rate": 1.2340362770754353e-05, "loss": 1.9813, "step": 17721 }, { "epoch": 0.5717800169788531, "grad_norm": 0.328125, "learning_rate": 1.2338813986924764e-05, "loss": 1.9806, "step": 17722 }, { "epoch": 0.5718122808326495, "grad_norm": 0.328125, "learning_rate": 1.2337265232387598e-05, "loss": 1.9423, "step": 17723 }, { "epoch": 0.5718445446864457, "grad_norm": 0.328125, "learning_rate": 1.2335716507159906e-05, "loss": 1.9892, "step": 17724 }, { "epoch": 0.5718768085402421, "grad_norm": 0.3203125, "learning_rate": 1.233416781125874e-05, "loss": 1.9831, "step": 17725 }, { "epoch": 0.5719090723940384, "grad_norm": 0.3359375, "learning_rate": 1.2332619144701137e-05, "loss": 1.9807, "step": 17726 }, { "epoch": 0.5719413362478348, "grad_norm": 0.3359375, "learning_rate": 1.233107050750415e-05, "loss": 1.9777, "step": 17727 }, { "epoch": 0.5719736001016311, "grad_norm": 0.322265625, "learning_rate": 1.2329521899684824e-05, "loss": 1.9764, "step": 17728 }, { "epoch": 0.5720058639554275, "grad_norm": 0.3359375, "learning_rate": 1.2327973321260209e-05, "loss": 1.9793, "step": 17729 }, { "epoch": 0.5720381278092238, "grad_norm": 0.33984375, "learning_rate": 1.2326424772247343e-05, "loss": 1.9507, "step": 17730 }, { "epoch": 0.5720703916630202, "grad_norm": 0.3359375, "learning_rate": 1.2324876252663274e-05, "loss": 1.9709, "step": 17731 }, { "epoch": 0.5721026555168165, "grad_norm": 0.326171875, "learning_rate": 1.2323327762525053e-05, "loss": 1.9978, "step": 17732 }, { "epoch": 0.5721349193706129, "grad_norm": 0.361328125, "learning_rate": 1.2321779301849717e-05, "loss": 1.9667, "step": 17733 }, { "epoch": 0.5721671832244092, "grad_norm": 0.330078125, "learning_rate": 1.232023087065431e-05, "loss": 1.9777, "step": 17734 }, { "epoch": 0.5721994470782056, "grad_norm": 0.3515625, "learning_rate": 1.2318682468955885e-05, "loss": 1.971, "step": 17735 }, { "epoch": 0.5722317109320019, "grad_norm": 0.34375, "learning_rate": 1.2317134096771477e-05, "loss": 1.9843, "step": 17736 }, { "epoch": 0.5722639747857983, "grad_norm": 0.34375, "learning_rate": 1.231558575411813e-05, "loss": 1.9423, "step": 17737 }, { "epoch": 0.5722962386395946, "grad_norm": 0.33984375, "learning_rate": 1.2314037441012895e-05, "loss": 1.9548, "step": 17738 }, { "epoch": 0.572328502493391, "grad_norm": 0.337890625, "learning_rate": 1.2312489157472804e-05, "loss": 1.9617, "step": 17739 }, { "epoch": 0.5723607663471872, "grad_norm": 0.326171875, "learning_rate": 1.2310940903514904e-05, "loss": 1.9671, "step": 17740 }, { "epoch": 0.5723930302009836, "grad_norm": 0.328125, "learning_rate": 1.2309392679156244e-05, "loss": 1.965, "step": 17741 }, { "epoch": 0.57242529405478, "grad_norm": 0.333984375, "learning_rate": 1.2307844484413851e-05, "loss": 1.967, "step": 17742 }, { "epoch": 0.5724575579085763, "grad_norm": 0.322265625, "learning_rate": 1.2306296319304777e-05, "loss": 1.9516, "step": 17743 }, { "epoch": 0.5724898217623727, "grad_norm": 0.32421875, "learning_rate": 1.2304748183846067e-05, "loss": 1.9527, "step": 17744 }, { "epoch": 0.572522085616169, "grad_norm": 0.330078125, "learning_rate": 1.2303200078054748e-05, "loss": 1.9609, "step": 17745 }, { "epoch": 0.5725543494699654, "grad_norm": 0.32421875, "learning_rate": 1.2301652001947866e-05, "loss": 1.9691, "step": 17746 }, { "epoch": 0.5725866133237617, "grad_norm": 0.328125, "learning_rate": 1.2300103955542471e-05, "loss": 1.9907, "step": 17747 }, { "epoch": 0.5726188771775581, "grad_norm": 0.3203125, "learning_rate": 1.2298555938855591e-05, "loss": 1.9874, "step": 17748 }, { "epoch": 0.5726511410313544, "grad_norm": 0.328125, "learning_rate": 1.2297007951904266e-05, "loss": 1.98, "step": 17749 }, { "epoch": 0.5726834048851508, "grad_norm": 0.3359375, "learning_rate": 1.2295459994705544e-05, "loss": 1.9944, "step": 17750 }, { "epoch": 0.5727156687389471, "grad_norm": 0.3203125, "learning_rate": 1.2293912067276455e-05, "loss": 1.9707, "step": 17751 }, { "epoch": 0.5727479325927435, "grad_norm": 0.326171875, "learning_rate": 1.229236416963404e-05, "loss": 1.9644, "step": 17752 }, { "epoch": 0.5727801964465398, "grad_norm": 0.33984375, "learning_rate": 1.2290816301795342e-05, "loss": 1.9751, "step": 17753 }, { "epoch": 0.5728124603003362, "grad_norm": 0.32421875, "learning_rate": 1.2289268463777388e-05, "loss": 1.9823, "step": 17754 }, { "epoch": 0.5728447241541325, "grad_norm": 0.34375, "learning_rate": 1.2287720655597225e-05, "loss": 1.9598, "step": 17755 }, { "epoch": 0.5728769880079289, "grad_norm": 0.34765625, "learning_rate": 1.228617287727189e-05, "loss": 1.9677, "step": 17756 }, { "epoch": 0.5729092518617251, "grad_norm": 0.330078125, "learning_rate": 1.2284625128818413e-05, "loss": 1.9452, "step": 17757 }, { "epoch": 0.5729415157155215, "grad_norm": 0.328125, "learning_rate": 1.2283077410253835e-05, "loss": 1.9811, "step": 17758 }, { "epoch": 0.5729737795693178, "grad_norm": 0.333984375, "learning_rate": 1.2281529721595195e-05, "loss": 1.9951, "step": 17759 }, { "epoch": 0.5730060434231142, "grad_norm": 0.341796875, "learning_rate": 1.2279982062859525e-05, "loss": 1.9793, "step": 17760 }, { "epoch": 0.5730383072769106, "grad_norm": 0.34375, "learning_rate": 1.2278434434063857e-05, "loss": 1.9857, "step": 17761 }, { "epoch": 0.5730705711307069, "grad_norm": 0.328125, "learning_rate": 1.2276886835225231e-05, "loss": 1.9591, "step": 17762 }, { "epoch": 0.5731028349845033, "grad_norm": 0.337890625, "learning_rate": 1.2275339266360685e-05, "loss": 2.0018, "step": 17763 }, { "epoch": 0.5731350988382996, "grad_norm": 0.337890625, "learning_rate": 1.2273791727487248e-05, "loss": 1.9938, "step": 17764 }, { "epoch": 0.573167362692096, "grad_norm": 0.328125, "learning_rate": 1.2272244218621954e-05, "loss": 1.9932, "step": 17765 }, { "epoch": 0.5731996265458923, "grad_norm": 0.3359375, "learning_rate": 1.2270696739781844e-05, "loss": 1.9941, "step": 17766 }, { "epoch": 0.5732318903996887, "grad_norm": 0.33203125, "learning_rate": 1.2269149290983942e-05, "loss": 1.9751, "step": 17767 }, { "epoch": 0.573264154253485, "grad_norm": 0.345703125, "learning_rate": 1.2267601872245285e-05, "loss": 1.9765, "step": 17768 }, { "epoch": 0.5732964181072814, "grad_norm": 0.34765625, "learning_rate": 1.2266054483582909e-05, "loss": 1.9827, "step": 17769 }, { "epoch": 0.5733286819610777, "grad_norm": 0.35546875, "learning_rate": 1.226450712501384e-05, "loss": 1.9782, "step": 17770 }, { "epoch": 0.5733609458148741, "grad_norm": 0.353515625, "learning_rate": 1.2262959796555117e-05, "loss": 1.9594, "step": 17771 }, { "epoch": 0.5733932096686704, "grad_norm": 0.35546875, "learning_rate": 1.2261412498223774e-05, "loss": 1.9992, "step": 17772 }, { "epoch": 0.5734254735224668, "grad_norm": 0.34375, "learning_rate": 1.2259865230036829e-05, "loss": 1.9953, "step": 17773 }, { "epoch": 0.573457737376263, "grad_norm": 0.361328125, "learning_rate": 1.2258317992011324e-05, "loss": 1.9631, "step": 17774 }, { "epoch": 0.5734900012300594, "grad_norm": 0.33984375, "learning_rate": 1.2256770784164294e-05, "loss": 1.9878, "step": 17775 }, { "epoch": 0.5735222650838557, "grad_norm": 0.357421875, "learning_rate": 1.2255223606512757e-05, "loss": 2.0186, "step": 17776 }, { "epoch": 0.5735545289376521, "grad_norm": 0.33984375, "learning_rate": 1.2253676459073746e-05, "loss": 1.9467, "step": 17777 }, { "epoch": 0.5735867927914484, "grad_norm": 0.337890625, "learning_rate": 1.2252129341864304e-05, "loss": 1.9914, "step": 17778 }, { "epoch": 0.5736190566452448, "grad_norm": 0.373046875, "learning_rate": 1.2250582254901445e-05, "loss": 1.9824, "step": 17779 }, { "epoch": 0.5736513204990411, "grad_norm": 0.322265625, "learning_rate": 1.2249035198202203e-05, "loss": 1.9534, "step": 17780 }, { "epoch": 0.5736835843528375, "grad_norm": 0.345703125, "learning_rate": 1.2247488171783613e-05, "loss": 1.9773, "step": 17781 }, { "epoch": 0.5737158482066339, "grad_norm": 0.359375, "learning_rate": 1.2245941175662693e-05, "loss": 1.9818, "step": 17782 }, { "epoch": 0.5737481120604302, "grad_norm": 0.326171875, "learning_rate": 1.2244394209856478e-05, "loss": 1.9635, "step": 17783 }, { "epoch": 0.5737803759142266, "grad_norm": 0.3671875, "learning_rate": 1.2242847274382e-05, "loss": 1.9712, "step": 17784 }, { "epoch": 0.5738126397680229, "grad_norm": 0.341796875, "learning_rate": 1.2241300369256276e-05, "loss": 1.9872, "step": 17785 }, { "epoch": 0.5738449036218193, "grad_norm": 0.33984375, "learning_rate": 1.2239753494496336e-05, "loss": 1.9841, "step": 17786 }, { "epoch": 0.5738771674756156, "grad_norm": 0.33984375, "learning_rate": 1.2238206650119216e-05, "loss": 2.0038, "step": 17787 }, { "epoch": 0.573909431329412, "grad_norm": 0.33984375, "learning_rate": 1.2236659836141932e-05, "loss": 1.9695, "step": 17788 }, { "epoch": 0.5739416951832083, "grad_norm": 0.318359375, "learning_rate": 1.2235113052581513e-05, "loss": 1.9951, "step": 17789 }, { "epoch": 0.5739739590370047, "grad_norm": 0.337890625, "learning_rate": 1.2233566299454991e-05, "loss": 1.9956, "step": 17790 }, { "epoch": 0.5740062228908009, "grad_norm": 0.34375, "learning_rate": 1.2232019576779382e-05, "loss": 1.973, "step": 17791 }, { "epoch": 0.5740384867445973, "grad_norm": 0.328125, "learning_rate": 1.2230472884571715e-05, "loss": 1.9862, "step": 17792 }, { "epoch": 0.5740707505983936, "grad_norm": 0.3359375, "learning_rate": 1.222892622284902e-05, "loss": 1.9816, "step": 17793 }, { "epoch": 0.57410301445219, "grad_norm": 0.345703125, "learning_rate": 1.2227379591628314e-05, "loss": 1.9407, "step": 17794 }, { "epoch": 0.5741352783059863, "grad_norm": 0.33203125, "learning_rate": 1.2225832990926625e-05, "loss": 1.9643, "step": 17795 }, { "epoch": 0.5741675421597827, "grad_norm": 0.333984375, "learning_rate": 1.2224286420760975e-05, "loss": 1.9871, "step": 17796 }, { "epoch": 0.574199806013579, "grad_norm": 0.337890625, "learning_rate": 1.2222739881148393e-05, "loss": 1.9697, "step": 17797 }, { "epoch": 0.5742320698673754, "grad_norm": 0.3359375, "learning_rate": 1.2221193372105894e-05, "loss": 1.9531, "step": 17798 }, { "epoch": 0.5742643337211717, "grad_norm": 0.330078125, "learning_rate": 1.2219646893650505e-05, "loss": 1.9408, "step": 17799 }, { "epoch": 0.5742965975749681, "grad_norm": 0.330078125, "learning_rate": 1.2218100445799255e-05, "loss": 1.9795, "step": 17800 }, { "epoch": 0.5743288614287644, "grad_norm": 0.330078125, "learning_rate": 1.221655402856915e-05, "loss": 1.9766, "step": 17801 }, { "epoch": 0.5743611252825608, "grad_norm": 0.328125, "learning_rate": 1.2215007641977224e-05, "loss": 1.9947, "step": 17802 }, { "epoch": 0.5743933891363572, "grad_norm": 0.326171875, "learning_rate": 1.2213461286040505e-05, "loss": 2.0076, "step": 17803 }, { "epoch": 0.5744256529901535, "grad_norm": 0.328125, "learning_rate": 1.2211914960775991e-05, "loss": 1.9884, "step": 17804 }, { "epoch": 0.5744579168439499, "grad_norm": 0.326171875, "learning_rate": 1.2210368666200724e-05, "loss": 1.9577, "step": 17805 }, { "epoch": 0.5744901806977462, "grad_norm": 0.34375, "learning_rate": 1.2208822402331723e-05, "loss": 2.0067, "step": 17806 }, { "epoch": 0.5745224445515426, "grad_norm": 0.337890625, "learning_rate": 1.2207276169185995e-05, "loss": 1.9508, "step": 17807 }, { "epoch": 0.5745547084053388, "grad_norm": 0.345703125, "learning_rate": 1.2205729966780565e-05, "loss": 2.0048, "step": 17808 }, { "epoch": 0.5745869722591352, "grad_norm": 0.337890625, "learning_rate": 1.2204183795132463e-05, "loss": 1.9848, "step": 17809 }, { "epoch": 0.5746192361129315, "grad_norm": 0.330078125, "learning_rate": 1.2202637654258698e-05, "loss": 1.9846, "step": 17810 }, { "epoch": 0.5746514999667279, "grad_norm": 0.33203125, "learning_rate": 1.2201091544176284e-05, "loss": 1.9681, "step": 17811 }, { "epoch": 0.5746837638205242, "grad_norm": 0.341796875, "learning_rate": 1.2199545464902255e-05, "loss": 1.9984, "step": 17812 }, { "epoch": 0.5747160276743206, "grad_norm": 0.33984375, "learning_rate": 1.2197999416453616e-05, "loss": 1.9893, "step": 17813 }, { "epoch": 0.5747482915281169, "grad_norm": 0.333984375, "learning_rate": 1.2196453398847388e-05, "loss": 1.9912, "step": 17814 }, { "epoch": 0.5747805553819133, "grad_norm": 0.349609375, "learning_rate": 1.2194907412100595e-05, "loss": 1.9656, "step": 17815 }, { "epoch": 0.5748128192357096, "grad_norm": 0.328125, "learning_rate": 1.2193361456230246e-05, "loss": 1.9914, "step": 17816 }, { "epoch": 0.574845083089506, "grad_norm": 0.333984375, "learning_rate": 1.2191815531253359e-05, "loss": 1.9384, "step": 17817 }, { "epoch": 0.5748773469433023, "grad_norm": 0.33203125, "learning_rate": 1.2190269637186954e-05, "loss": 1.9869, "step": 17818 }, { "epoch": 0.5749096107970987, "grad_norm": 0.333984375, "learning_rate": 1.2188723774048044e-05, "loss": 1.9782, "step": 17819 }, { "epoch": 0.574941874650895, "grad_norm": 0.3359375, "learning_rate": 1.2187177941853645e-05, "loss": 1.9784, "step": 17820 }, { "epoch": 0.5749741385046914, "grad_norm": 0.326171875, "learning_rate": 1.2185632140620776e-05, "loss": 1.9499, "step": 17821 }, { "epoch": 0.5750064023584878, "grad_norm": 0.333984375, "learning_rate": 1.2184086370366448e-05, "loss": 1.9728, "step": 17822 }, { "epoch": 0.575038666212284, "grad_norm": 0.330078125, "learning_rate": 1.2182540631107676e-05, "loss": 1.9751, "step": 17823 }, { "epoch": 0.5750709300660805, "grad_norm": 0.337890625, "learning_rate": 1.2180994922861477e-05, "loss": 1.9844, "step": 17824 }, { "epoch": 0.5751031939198767, "grad_norm": 0.34375, "learning_rate": 1.2179449245644864e-05, "loss": 2.004, "step": 17825 }, { "epoch": 0.5751354577736731, "grad_norm": 0.326171875, "learning_rate": 1.2177903599474848e-05, "loss": 1.9662, "step": 17826 }, { "epoch": 0.5751677216274694, "grad_norm": 0.326171875, "learning_rate": 1.2176357984368452e-05, "loss": 1.9661, "step": 17827 }, { "epoch": 0.5751999854812658, "grad_norm": 0.3671875, "learning_rate": 1.2174812400342672e-05, "loss": 1.9791, "step": 17828 }, { "epoch": 0.5752322493350621, "grad_norm": 0.33984375, "learning_rate": 1.2173266847414532e-05, "loss": 1.9737, "step": 17829 }, { "epoch": 0.5752645131888585, "grad_norm": 0.341796875, "learning_rate": 1.2171721325601044e-05, "loss": 1.9515, "step": 17830 }, { "epoch": 0.5752967770426548, "grad_norm": 0.361328125, "learning_rate": 1.2170175834919223e-05, "loss": 1.9719, "step": 17831 }, { "epoch": 0.5753290408964512, "grad_norm": 0.33984375, "learning_rate": 1.2168630375386069e-05, "loss": 1.9757, "step": 17832 }, { "epoch": 0.5753613047502475, "grad_norm": 0.337890625, "learning_rate": 1.2167084947018604e-05, "loss": 1.9708, "step": 17833 }, { "epoch": 0.5753935686040439, "grad_norm": 0.35546875, "learning_rate": 1.216553954983384e-05, "loss": 1.9334, "step": 17834 }, { "epoch": 0.5754258324578402, "grad_norm": 0.34375, "learning_rate": 1.2163994183848775e-05, "loss": 1.9526, "step": 17835 }, { "epoch": 0.5754580963116366, "grad_norm": 0.337890625, "learning_rate": 1.2162448849080431e-05, "loss": 1.9675, "step": 17836 }, { "epoch": 0.5754903601654329, "grad_norm": 0.3359375, "learning_rate": 1.2160903545545819e-05, "loss": 1.9654, "step": 17837 }, { "epoch": 0.5755226240192293, "grad_norm": 0.353515625, "learning_rate": 1.215935827326194e-05, "loss": 1.9534, "step": 17838 }, { "epoch": 0.5755548878730256, "grad_norm": 0.330078125, "learning_rate": 1.2157813032245804e-05, "loss": 1.9757, "step": 17839 }, { "epoch": 0.575587151726822, "grad_norm": 0.388671875, "learning_rate": 1.215626782251443e-05, "loss": 1.9871, "step": 17840 }, { "epoch": 0.5756194155806182, "grad_norm": 0.357421875, "learning_rate": 1.2154722644084816e-05, "loss": 1.9817, "step": 17841 }, { "epoch": 0.5756516794344146, "grad_norm": 0.33984375, "learning_rate": 1.2153177496973972e-05, "loss": 1.9818, "step": 17842 }, { "epoch": 0.575683943288211, "grad_norm": 0.36328125, "learning_rate": 1.2151632381198912e-05, "loss": 1.9647, "step": 17843 }, { "epoch": 0.5757162071420073, "grad_norm": 0.34375, "learning_rate": 1.2150087296776636e-05, "loss": 1.9787, "step": 17844 }, { "epoch": 0.5757484709958037, "grad_norm": 0.341796875, "learning_rate": 1.2148542243724155e-05, "loss": 1.9719, "step": 17845 }, { "epoch": 0.5757807348496, "grad_norm": 0.333984375, "learning_rate": 1.2146997222058479e-05, "loss": 1.9785, "step": 17846 }, { "epoch": 0.5758129987033964, "grad_norm": 0.34375, "learning_rate": 1.2145452231796605e-05, "loss": 1.989, "step": 17847 }, { "epoch": 0.5758452625571927, "grad_norm": 0.365234375, "learning_rate": 1.2143907272955549e-05, "loss": 2.005, "step": 17848 }, { "epoch": 0.5758775264109891, "grad_norm": 0.341796875, "learning_rate": 1.2142362345552314e-05, "loss": 1.9523, "step": 17849 }, { "epoch": 0.5759097902647854, "grad_norm": 0.349609375, "learning_rate": 1.21408174496039e-05, "loss": 1.9965, "step": 17850 }, { "epoch": 0.5759420541185818, "grad_norm": 0.33984375, "learning_rate": 1.2139272585127317e-05, "loss": 1.9228, "step": 17851 }, { "epoch": 0.5759743179723781, "grad_norm": 0.341796875, "learning_rate": 1.2137727752139573e-05, "loss": 1.9728, "step": 17852 }, { "epoch": 0.5760065818261745, "grad_norm": 0.361328125, "learning_rate": 1.2136182950657664e-05, "loss": 1.9721, "step": 17853 }, { "epoch": 0.5760388456799708, "grad_norm": 0.34375, "learning_rate": 1.2134638180698599e-05, "loss": 1.9942, "step": 17854 }, { "epoch": 0.5760711095337672, "grad_norm": 0.32421875, "learning_rate": 1.2133093442279385e-05, "loss": 1.9667, "step": 17855 }, { "epoch": 0.5761033733875635, "grad_norm": 0.345703125, "learning_rate": 1.2131548735417019e-05, "loss": 1.9709, "step": 17856 }, { "epoch": 0.5761356372413599, "grad_norm": 0.33984375, "learning_rate": 1.2130004060128507e-05, "loss": 1.9621, "step": 17857 }, { "epoch": 0.5761679010951561, "grad_norm": 0.328125, "learning_rate": 1.2128459416430857e-05, "loss": 1.9801, "step": 17858 }, { "epoch": 0.5762001649489525, "grad_norm": 0.34765625, "learning_rate": 1.2126914804341054e-05, "loss": 1.9872, "step": 17859 }, { "epoch": 0.5762324288027488, "grad_norm": 0.326171875, "learning_rate": 1.2125370223876116e-05, "loss": 1.9917, "step": 17860 }, { "epoch": 0.5762646926565452, "grad_norm": 0.333984375, "learning_rate": 1.2123825675053044e-05, "loss": 1.965, "step": 17861 }, { "epoch": 0.5762969565103415, "grad_norm": 0.33984375, "learning_rate": 1.2122281157888838e-05, "loss": 1.9815, "step": 17862 }, { "epoch": 0.5763292203641379, "grad_norm": 0.34375, "learning_rate": 1.2120736672400489e-05, "loss": 1.9884, "step": 17863 }, { "epoch": 0.5763614842179343, "grad_norm": 0.34375, "learning_rate": 1.2119192218605008e-05, "loss": 1.9632, "step": 17864 }, { "epoch": 0.5763937480717306, "grad_norm": 0.330078125, "learning_rate": 1.2117647796519398e-05, "loss": 1.9528, "step": 17865 }, { "epoch": 0.576426011925527, "grad_norm": 0.341796875, "learning_rate": 1.2116103406160646e-05, "loss": 1.9657, "step": 17866 }, { "epoch": 0.5764582757793233, "grad_norm": 0.333984375, "learning_rate": 1.2114559047545757e-05, "loss": 1.9554, "step": 17867 }, { "epoch": 0.5764905396331197, "grad_norm": 0.3359375, "learning_rate": 1.2113014720691741e-05, "loss": 1.9794, "step": 17868 }, { "epoch": 0.576522803486916, "grad_norm": 0.333984375, "learning_rate": 1.2111470425615583e-05, "loss": 1.9694, "step": 17869 }, { "epoch": 0.5765550673407124, "grad_norm": 0.3359375, "learning_rate": 1.2109926162334281e-05, "loss": 1.9395, "step": 17870 }, { "epoch": 0.5765873311945087, "grad_norm": 0.34765625, "learning_rate": 1.2108381930864849e-05, "loss": 1.985, "step": 17871 }, { "epoch": 0.5766195950483051, "grad_norm": 0.333984375, "learning_rate": 1.2106837731224269e-05, "loss": 1.9524, "step": 17872 }, { "epoch": 0.5766518589021014, "grad_norm": 0.341796875, "learning_rate": 1.2105293563429542e-05, "loss": 1.9762, "step": 17873 }, { "epoch": 0.5766841227558978, "grad_norm": 0.3359375, "learning_rate": 1.2103749427497669e-05, "loss": 1.9815, "step": 17874 }, { "epoch": 0.576716386609694, "grad_norm": 0.330078125, "learning_rate": 1.2102205323445644e-05, "loss": 1.932, "step": 17875 }, { "epoch": 0.5767486504634904, "grad_norm": 0.326171875, "learning_rate": 1.2100661251290461e-05, "loss": 1.9529, "step": 17876 }, { "epoch": 0.5767809143172867, "grad_norm": 0.333984375, "learning_rate": 1.2099117211049125e-05, "loss": 1.949, "step": 17877 }, { "epoch": 0.5768131781710831, "grad_norm": 0.328125, "learning_rate": 1.209757320273862e-05, "loss": 1.9633, "step": 17878 }, { "epoch": 0.5768454420248794, "grad_norm": 0.353515625, "learning_rate": 1.209602922637595e-05, "loss": 1.9624, "step": 17879 }, { "epoch": 0.5768777058786758, "grad_norm": 0.326171875, "learning_rate": 1.209448528197811e-05, "loss": 1.9647, "step": 17880 }, { "epoch": 0.5769099697324721, "grad_norm": 0.33203125, "learning_rate": 1.2092941369562085e-05, "loss": 1.9867, "step": 17881 }, { "epoch": 0.5769422335862685, "grad_norm": 0.34375, "learning_rate": 1.2091397489144879e-05, "loss": 1.9829, "step": 17882 }, { "epoch": 0.5769744974400649, "grad_norm": 0.3359375, "learning_rate": 1.2089853640743483e-05, "loss": 1.9554, "step": 17883 }, { "epoch": 0.5770067612938612, "grad_norm": 0.330078125, "learning_rate": 1.2088309824374891e-05, "loss": 1.9724, "step": 17884 }, { "epoch": 0.5770390251476576, "grad_norm": 0.333984375, "learning_rate": 1.2086766040056093e-05, "loss": 1.9726, "step": 17885 }, { "epoch": 0.5770712890014539, "grad_norm": 0.333984375, "learning_rate": 1.2085222287804088e-05, "loss": 1.9768, "step": 17886 }, { "epoch": 0.5771035528552503, "grad_norm": 0.328125, "learning_rate": 1.2083678567635862e-05, "loss": 1.9538, "step": 17887 }, { "epoch": 0.5771358167090466, "grad_norm": 0.33203125, "learning_rate": 1.2082134879568412e-05, "loss": 1.9749, "step": 17888 }, { "epoch": 0.577168080562843, "grad_norm": 0.326171875, "learning_rate": 1.208059122361873e-05, "loss": 1.9901, "step": 17889 }, { "epoch": 0.5772003444166393, "grad_norm": 0.328125, "learning_rate": 1.20790475998038e-05, "loss": 1.9544, "step": 17890 }, { "epoch": 0.5772326082704357, "grad_norm": 0.330078125, "learning_rate": 1.2077504008140622e-05, "loss": 1.9508, "step": 17891 }, { "epoch": 0.5772648721242319, "grad_norm": 0.330078125, "learning_rate": 1.2075960448646188e-05, "loss": 1.9687, "step": 17892 }, { "epoch": 0.5772971359780283, "grad_norm": 0.328125, "learning_rate": 1.2074416921337477e-05, "loss": 1.958, "step": 17893 }, { "epoch": 0.5773293998318246, "grad_norm": 0.32421875, "learning_rate": 1.2072873426231482e-05, "loss": 1.9755, "step": 17894 }, { "epoch": 0.577361663685621, "grad_norm": 0.341796875, "learning_rate": 1.20713299633452e-05, "loss": 1.9504, "step": 17895 }, { "epoch": 0.5773939275394173, "grad_norm": 0.33984375, "learning_rate": 1.2069786532695623e-05, "loss": 1.9591, "step": 17896 }, { "epoch": 0.5774261913932137, "grad_norm": 0.3359375, "learning_rate": 1.2068243134299728e-05, "loss": 1.9698, "step": 17897 }, { "epoch": 0.57745845524701, "grad_norm": 0.349609375, "learning_rate": 1.2066699768174507e-05, "loss": 1.9709, "step": 17898 }, { "epoch": 0.5774907191008064, "grad_norm": 0.361328125, "learning_rate": 1.206515643433696e-05, "loss": 1.9655, "step": 17899 }, { "epoch": 0.5775229829546027, "grad_norm": 0.333984375, "learning_rate": 1.2063613132804058e-05, "loss": 1.9627, "step": 17900 }, { "epoch": 0.5775552468083991, "grad_norm": 0.36328125, "learning_rate": 1.2062069863592797e-05, "loss": 1.9727, "step": 17901 }, { "epoch": 0.5775875106621954, "grad_norm": 0.400390625, "learning_rate": 1.2060526626720164e-05, "loss": 1.9678, "step": 17902 }, { "epoch": 0.5776197745159918, "grad_norm": 0.33984375, "learning_rate": 1.2058983422203145e-05, "loss": 2.0033, "step": 17903 }, { "epoch": 0.5776520383697882, "grad_norm": 0.333984375, "learning_rate": 1.2057440250058726e-05, "loss": 1.988, "step": 17904 }, { "epoch": 0.5776843022235845, "grad_norm": 0.357421875, "learning_rate": 1.2055897110303896e-05, "loss": 1.9506, "step": 17905 }, { "epoch": 0.5777165660773809, "grad_norm": 0.337890625, "learning_rate": 1.2054354002955635e-05, "loss": 1.9545, "step": 17906 }, { "epoch": 0.5777488299311772, "grad_norm": 0.357421875, "learning_rate": 1.2052810928030934e-05, "loss": 1.9646, "step": 17907 }, { "epoch": 0.5777810937849736, "grad_norm": 0.36328125, "learning_rate": 1.2051267885546777e-05, "loss": 1.9556, "step": 17908 }, { "epoch": 0.5778133576387698, "grad_norm": 0.337890625, "learning_rate": 1.2049724875520146e-05, "loss": 1.9737, "step": 17909 }, { "epoch": 0.5778456214925662, "grad_norm": 0.33203125, "learning_rate": 1.2048181897968027e-05, "loss": 1.9466, "step": 17910 }, { "epoch": 0.5778778853463625, "grad_norm": 0.333984375, "learning_rate": 1.2046638952907407e-05, "loss": 1.9778, "step": 17911 }, { "epoch": 0.5779101492001589, "grad_norm": 0.3359375, "learning_rate": 1.2045096040355262e-05, "loss": 1.9479, "step": 17912 }, { "epoch": 0.5779424130539552, "grad_norm": 0.328125, "learning_rate": 1.204355316032858e-05, "loss": 1.9669, "step": 17913 }, { "epoch": 0.5779746769077516, "grad_norm": 0.33203125, "learning_rate": 1.2042010312844348e-05, "loss": 1.9815, "step": 17914 }, { "epoch": 0.5780069407615479, "grad_norm": 0.328125, "learning_rate": 1.2040467497919544e-05, "loss": 1.9717, "step": 17915 }, { "epoch": 0.5780392046153443, "grad_norm": 0.326171875, "learning_rate": 1.2038924715571145e-05, "loss": 1.968, "step": 17916 }, { "epoch": 0.5780714684691406, "grad_norm": 0.34765625, "learning_rate": 1.2037381965816149e-05, "loss": 1.9907, "step": 17917 }, { "epoch": 0.578103732322937, "grad_norm": 0.326171875, "learning_rate": 1.2035839248671517e-05, "loss": 1.9962, "step": 17918 }, { "epoch": 0.5781359961767333, "grad_norm": 0.333984375, "learning_rate": 1.2034296564154241e-05, "loss": 1.994, "step": 17919 }, { "epoch": 0.5781682600305297, "grad_norm": 0.3359375, "learning_rate": 1.2032753912281309e-05, "loss": 1.9607, "step": 17920 }, { "epoch": 0.578200523884326, "grad_norm": 0.337890625, "learning_rate": 1.2031211293069686e-05, "loss": 1.9789, "step": 17921 }, { "epoch": 0.5782327877381224, "grad_norm": 0.33203125, "learning_rate": 1.2029668706536358e-05, "loss": 1.97, "step": 17922 }, { "epoch": 0.5782650515919188, "grad_norm": 0.33984375, "learning_rate": 1.2028126152698314e-05, "loss": 1.9946, "step": 17923 }, { "epoch": 0.578297315445715, "grad_norm": 0.3203125, "learning_rate": 1.2026583631572519e-05, "loss": 1.9627, "step": 17924 }, { "epoch": 0.5783295792995115, "grad_norm": 0.337890625, "learning_rate": 1.2025041143175956e-05, "loss": 1.9928, "step": 17925 }, { "epoch": 0.5783618431533077, "grad_norm": 0.33984375, "learning_rate": 1.2023498687525614e-05, "loss": 1.9758, "step": 17926 }, { "epoch": 0.5783941070071041, "grad_norm": 0.33984375, "learning_rate": 1.2021956264638459e-05, "loss": 1.975, "step": 17927 }, { "epoch": 0.5784263708609004, "grad_norm": 0.333984375, "learning_rate": 1.202041387453147e-05, "loss": 1.9649, "step": 17928 }, { "epoch": 0.5784586347146968, "grad_norm": 0.345703125, "learning_rate": 1.2018871517221626e-05, "loss": 1.9867, "step": 17929 }, { "epoch": 0.5784908985684931, "grad_norm": 0.33203125, "learning_rate": 1.2017329192725915e-05, "loss": 1.9403, "step": 17930 }, { "epoch": 0.5785231624222895, "grad_norm": 0.32421875, "learning_rate": 1.2015786901061297e-05, "loss": 1.9731, "step": 17931 }, { "epoch": 0.5785554262760858, "grad_norm": 0.345703125, "learning_rate": 1.2014244642244758e-05, "loss": 1.9863, "step": 17932 }, { "epoch": 0.5785876901298822, "grad_norm": 0.328125, "learning_rate": 1.2012702416293273e-05, "loss": 2.0004, "step": 17933 }, { "epoch": 0.5786199539836785, "grad_norm": 0.33203125, "learning_rate": 1.2011160223223815e-05, "loss": 1.9932, "step": 17934 }, { "epoch": 0.5786522178374749, "grad_norm": 0.328125, "learning_rate": 1.2009618063053362e-05, "loss": 1.9684, "step": 17935 }, { "epoch": 0.5786844816912712, "grad_norm": 0.328125, "learning_rate": 1.200807593579889e-05, "loss": 1.9731, "step": 17936 }, { "epoch": 0.5787167455450676, "grad_norm": 0.330078125, "learning_rate": 1.2006533841477368e-05, "loss": 1.977, "step": 17937 }, { "epoch": 0.5787490093988639, "grad_norm": 0.3359375, "learning_rate": 1.2004991780105775e-05, "loss": 1.9761, "step": 17938 }, { "epoch": 0.5787812732526603, "grad_norm": 0.326171875, "learning_rate": 1.2003449751701085e-05, "loss": 1.9857, "step": 17939 }, { "epoch": 0.5788135371064566, "grad_norm": 0.322265625, "learning_rate": 1.200190775628027e-05, "loss": 2.0029, "step": 17940 }, { "epoch": 0.578845800960253, "grad_norm": 0.32421875, "learning_rate": 1.2000365793860304e-05, "loss": 2.0086, "step": 17941 }, { "epoch": 0.5788780648140492, "grad_norm": 0.322265625, "learning_rate": 1.1998823864458159e-05, "loss": 1.9796, "step": 17942 }, { "epoch": 0.5789103286678456, "grad_norm": 0.337890625, "learning_rate": 1.1997281968090807e-05, "loss": 2.0187, "step": 17943 }, { "epoch": 0.578942592521642, "grad_norm": 0.33984375, "learning_rate": 1.199574010477522e-05, "loss": 1.9824, "step": 17944 }, { "epoch": 0.5789748563754383, "grad_norm": 0.328125, "learning_rate": 1.1994198274528376e-05, "loss": 2.0077, "step": 17945 }, { "epoch": 0.5790071202292347, "grad_norm": 0.32421875, "learning_rate": 1.1992656477367236e-05, "loss": 1.9544, "step": 17946 }, { "epoch": 0.579039384083031, "grad_norm": 0.330078125, "learning_rate": 1.1991114713308777e-05, "loss": 2.0067, "step": 17947 }, { "epoch": 0.5790716479368274, "grad_norm": 0.33203125, "learning_rate": 1.1989572982369974e-05, "loss": 1.96, "step": 17948 }, { "epoch": 0.5791039117906237, "grad_norm": 0.328125, "learning_rate": 1.1988031284567782e-05, "loss": 1.967, "step": 17949 }, { "epoch": 0.5791361756444201, "grad_norm": 0.326171875, "learning_rate": 1.1986489619919187e-05, "loss": 1.9471, "step": 17950 }, { "epoch": 0.5791684394982164, "grad_norm": 0.318359375, "learning_rate": 1.1984947988441155e-05, "loss": 1.9689, "step": 17951 }, { "epoch": 0.5792007033520128, "grad_norm": 0.32421875, "learning_rate": 1.1983406390150647e-05, "loss": 1.9525, "step": 17952 }, { "epoch": 0.5792329672058091, "grad_norm": 0.318359375, "learning_rate": 1.1981864825064636e-05, "loss": 1.9267, "step": 17953 }, { "epoch": 0.5792652310596055, "grad_norm": 0.31640625, "learning_rate": 1.1980323293200101e-05, "loss": 1.9433, "step": 17954 }, { "epoch": 0.5792974949134018, "grad_norm": 0.33984375, "learning_rate": 1.1978781794573994e-05, "loss": 1.9747, "step": 17955 }, { "epoch": 0.5793297587671982, "grad_norm": 0.3203125, "learning_rate": 1.1977240329203286e-05, "loss": 1.9793, "step": 17956 }, { "epoch": 0.5793620226209945, "grad_norm": 0.333984375, "learning_rate": 1.1975698897104958e-05, "loss": 1.9613, "step": 17957 }, { "epoch": 0.5793942864747909, "grad_norm": 0.326171875, "learning_rate": 1.197415749829596e-05, "loss": 1.9812, "step": 17958 }, { "epoch": 0.5794265503285871, "grad_norm": 0.33203125, "learning_rate": 1.1972616132793267e-05, "loss": 1.9545, "step": 17959 }, { "epoch": 0.5794588141823835, "grad_norm": 0.318359375, "learning_rate": 1.1971074800613838e-05, "loss": 1.957, "step": 17960 }, { "epoch": 0.5794910780361798, "grad_norm": 0.333984375, "learning_rate": 1.1969533501774655e-05, "loss": 1.9846, "step": 17961 }, { "epoch": 0.5795233418899762, "grad_norm": 0.32421875, "learning_rate": 1.1967992236292667e-05, "loss": 1.9596, "step": 17962 }, { "epoch": 0.5795556057437725, "grad_norm": 0.33203125, "learning_rate": 1.1966451004184847e-05, "loss": 2.0247, "step": 17963 }, { "epoch": 0.5795878695975689, "grad_norm": 0.32421875, "learning_rate": 1.196490980546816e-05, "loss": 1.9895, "step": 17964 }, { "epoch": 0.5796201334513653, "grad_norm": 0.33203125, "learning_rate": 1.1963368640159565e-05, "loss": 1.9747, "step": 17965 }, { "epoch": 0.5796523973051616, "grad_norm": 0.326171875, "learning_rate": 1.196182750827603e-05, "loss": 1.9737, "step": 17966 }, { "epoch": 0.579684661158958, "grad_norm": 0.328125, "learning_rate": 1.1960286409834521e-05, "loss": 1.9721, "step": 17967 }, { "epoch": 0.5797169250127543, "grad_norm": 0.345703125, "learning_rate": 1.1958745344851996e-05, "loss": 1.9532, "step": 17968 }, { "epoch": 0.5797491888665507, "grad_norm": 0.328125, "learning_rate": 1.195720431334542e-05, "loss": 1.9824, "step": 17969 }, { "epoch": 0.579781452720347, "grad_norm": 0.3359375, "learning_rate": 1.1955663315331759e-05, "loss": 1.9697, "step": 17970 }, { "epoch": 0.5798137165741434, "grad_norm": 0.328125, "learning_rate": 1.195412235082797e-05, "loss": 1.9893, "step": 17971 }, { "epoch": 0.5798459804279397, "grad_norm": 0.328125, "learning_rate": 1.1952581419851016e-05, "loss": 1.9789, "step": 17972 }, { "epoch": 0.5798782442817361, "grad_norm": 0.333984375, "learning_rate": 1.1951040522417862e-05, "loss": 1.9979, "step": 17973 }, { "epoch": 0.5799105081355324, "grad_norm": 0.3359375, "learning_rate": 1.1949499658545465e-05, "loss": 1.9766, "step": 17974 }, { "epoch": 0.5799427719893288, "grad_norm": 0.326171875, "learning_rate": 1.1947958828250786e-05, "loss": 1.9701, "step": 17975 }, { "epoch": 0.579975035843125, "grad_norm": 0.33203125, "learning_rate": 1.1946418031550792e-05, "loss": 1.973, "step": 17976 }, { "epoch": 0.5800072996969214, "grad_norm": 0.3359375, "learning_rate": 1.1944877268462434e-05, "loss": 1.965, "step": 17977 }, { "epoch": 0.5800395635507177, "grad_norm": 0.328125, "learning_rate": 1.1943336539002675e-05, "loss": 1.9569, "step": 17978 }, { "epoch": 0.5800718274045141, "grad_norm": 0.33984375, "learning_rate": 1.194179584318848e-05, "loss": 1.998, "step": 17979 }, { "epoch": 0.5801040912583104, "grad_norm": 0.33203125, "learning_rate": 1.1940255181036792e-05, "loss": 1.9822, "step": 17980 }, { "epoch": 0.5801363551121068, "grad_norm": 0.322265625, "learning_rate": 1.1938714552564586e-05, "loss": 1.9627, "step": 17981 }, { "epoch": 0.5801686189659031, "grad_norm": 0.328125, "learning_rate": 1.1937173957788818e-05, "loss": 1.988, "step": 17982 }, { "epoch": 0.5802008828196995, "grad_norm": 0.33203125, "learning_rate": 1.1935633396726436e-05, "loss": 1.9549, "step": 17983 }, { "epoch": 0.5802331466734959, "grad_norm": 0.33203125, "learning_rate": 1.19340928693944e-05, "loss": 2.0027, "step": 17984 }, { "epoch": 0.5802654105272922, "grad_norm": 0.3359375, "learning_rate": 1.1932552375809681e-05, "loss": 1.9672, "step": 17985 }, { "epoch": 0.5802976743810886, "grad_norm": 0.330078125, "learning_rate": 1.1931011915989217e-05, "loss": 1.9646, "step": 17986 }, { "epoch": 0.5803299382348849, "grad_norm": 0.330078125, "learning_rate": 1.192947148994997e-05, "loss": 1.9846, "step": 17987 }, { "epoch": 0.5803622020886813, "grad_norm": 0.32421875, "learning_rate": 1.1927931097708906e-05, "loss": 1.9888, "step": 17988 }, { "epoch": 0.5803944659424776, "grad_norm": 0.318359375, "learning_rate": 1.1926390739282968e-05, "loss": 1.9564, "step": 17989 }, { "epoch": 0.580426729796274, "grad_norm": 0.337890625, "learning_rate": 1.1924850414689115e-05, "loss": 1.9722, "step": 17990 }, { "epoch": 0.5804589936500703, "grad_norm": 0.322265625, "learning_rate": 1.1923310123944306e-05, "loss": 1.9762, "step": 17991 }, { "epoch": 0.5804912575038667, "grad_norm": 0.3203125, "learning_rate": 1.1921769867065487e-05, "loss": 1.961, "step": 17992 }, { "epoch": 0.5805235213576629, "grad_norm": 0.322265625, "learning_rate": 1.192022964406962e-05, "loss": 1.9396, "step": 17993 }, { "epoch": 0.5805557852114593, "grad_norm": 0.32421875, "learning_rate": 1.1918689454973654e-05, "loss": 1.9245, "step": 17994 }, { "epoch": 0.5805880490652556, "grad_norm": 0.3203125, "learning_rate": 1.1917149299794547e-05, "loss": 1.9982, "step": 17995 }, { "epoch": 0.580620312919052, "grad_norm": 0.326171875, "learning_rate": 1.1915609178549246e-05, "loss": 1.9594, "step": 17996 }, { "epoch": 0.5806525767728483, "grad_norm": 0.333984375, "learning_rate": 1.1914069091254707e-05, "loss": 1.9687, "step": 17997 }, { "epoch": 0.5806848406266447, "grad_norm": 0.33203125, "learning_rate": 1.1912529037927885e-05, "loss": 1.9318, "step": 17998 }, { "epoch": 0.580717104480441, "grad_norm": 0.330078125, "learning_rate": 1.1910989018585722e-05, "loss": 1.9812, "step": 17999 }, { "epoch": 0.5807493683342374, "grad_norm": 0.330078125, "learning_rate": 1.190944903324518e-05, "loss": 1.9801, "step": 18000 }, { "epoch": 0.5807816321880337, "grad_norm": 0.318359375, "learning_rate": 1.1907909081923207e-05, "loss": 1.9893, "step": 18001 }, { "epoch": 0.5808138960418301, "grad_norm": 0.333984375, "learning_rate": 1.1906369164636749e-05, "loss": 1.973, "step": 18002 }, { "epoch": 0.5808461598956264, "grad_norm": 0.333984375, "learning_rate": 1.190482928140276e-05, "loss": 1.9697, "step": 18003 }, { "epoch": 0.5808784237494228, "grad_norm": 0.337890625, "learning_rate": 1.1903289432238194e-05, "loss": 1.9587, "step": 18004 }, { "epoch": 0.5809106876032192, "grad_norm": 0.330078125, "learning_rate": 1.1901749617159993e-05, "loss": 1.9775, "step": 18005 }, { "epoch": 0.5809429514570155, "grad_norm": 0.326171875, "learning_rate": 1.190020983618511e-05, "loss": 1.9702, "step": 18006 }, { "epoch": 0.5809752153108119, "grad_norm": 0.32421875, "learning_rate": 1.1898670089330495e-05, "loss": 1.9848, "step": 18007 }, { "epoch": 0.5810074791646082, "grad_norm": 0.326171875, "learning_rate": 1.1897130376613094e-05, "loss": 1.9846, "step": 18008 }, { "epoch": 0.5810397430184046, "grad_norm": 0.33984375, "learning_rate": 1.1895590698049853e-05, "loss": 1.9786, "step": 18009 }, { "epoch": 0.5810720068722008, "grad_norm": 0.333984375, "learning_rate": 1.189405105365773e-05, "loss": 2.0022, "step": 18010 }, { "epoch": 0.5811042707259972, "grad_norm": 0.345703125, "learning_rate": 1.1892511443453656e-05, "loss": 1.9659, "step": 18011 }, { "epoch": 0.5811365345797935, "grad_norm": 0.322265625, "learning_rate": 1.189097186745459e-05, "loss": 1.9938, "step": 18012 }, { "epoch": 0.5811687984335899, "grad_norm": 0.3359375, "learning_rate": 1.1889432325677481e-05, "loss": 1.9606, "step": 18013 }, { "epoch": 0.5812010622873862, "grad_norm": 0.3359375, "learning_rate": 1.1887892818139263e-05, "loss": 1.9239, "step": 18014 }, { "epoch": 0.5812333261411826, "grad_norm": 0.34375, "learning_rate": 1.1886353344856884e-05, "loss": 2.0023, "step": 18015 }, { "epoch": 0.5812655899949789, "grad_norm": 0.333984375, "learning_rate": 1.1884813905847306e-05, "loss": 1.9771, "step": 18016 }, { "epoch": 0.5812978538487753, "grad_norm": 0.341796875, "learning_rate": 1.1883274501127452e-05, "loss": 1.9709, "step": 18017 }, { "epoch": 0.5813301177025716, "grad_norm": 0.3203125, "learning_rate": 1.188173513071428e-05, "loss": 1.9724, "step": 18018 }, { "epoch": 0.581362381556368, "grad_norm": 0.35546875, "learning_rate": 1.1880195794624732e-05, "loss": 1.9776, "step": 18019 }, { "epoch": 0.5813946454101643, "grad_norm": 0.3515625, "learning_rate": 1.1878656492875746e-05, "loss": 1.9787, "step": 18020 }, { "epoch": 0.5814269092639607, "grad_norm": 0.337890625, "learning_rate": 1.187711722548427e-05, "loss": 1.9834, "step": 18021 }, { "epoch": 0.581459173117757, "grad_norm": 0.34765625, "learning_rate": 1.1875577992467253e-05, "loss": 1.9907, "step": 18022 }, { "epoch": 0.5814914369715534, "grad_norm": 0.3515625, "learning_rate": 1.1874038793841627e-05, "loss": 1.9758, "step": 18023 }, { "epoch": 0.5815237008253498, "grad_norm": 0.3359375, "learning_rate": 1.187249962962434e-05, "loss": 1.9631, "step": 18024 }, { "epoch": 0.581555964679146, "grad_norm": 0.3359375, "learning_rate": 1.1870960499832337e-05, "loss": 1.9719, "step": 18025 }, { "epoch": 0.5815882285329425, "grad_norm": 0.35546875, "learning_rate": 1.1869421404482552e-05, "loss": 1.9822, "step": 18026 }, { "epoch": 0.5816204923867387, "grad_norm": 0.318359375, "learning_rate": 1.1867882343591932e-05, "loss": 1.9789, "step": 18027 }, { "epoch": 0.5816527562405351, "grad_norm": 0.34375, "learning_rate": 1.1866343317177412e-05, "loss": 1.9872, "step": 18028 }, { "epoch": 0.5816850200943314, "grad_norm": 0.333984375, "learning_rate": 1.1864804325255945e-05, "loss": 1.9863, "step": 18029 }, { "epoch": 0.5817172839481278, "grad_norm": 0.3359375, "learning_rate": 1.1863265367844457e-05, "loss": 1.9888, "step": 18030 }, { "epoch": 0.5817495478019241, "grad_norm": 0.337890625, "learning_rate": 1.1861726444959893e-05, "loss": 1.9861, "step": 18031 }, { "epoch": 0.5817818116557205, "grad_norm": 0.3515625, "learning_rate": 1.1860187556619199e-05, "loss": 1.9754, "step": 18032 }, { "epoch": 0.5818140755095168, "grad_norm": 0.345703125, "learning_rate": 1.1858648702839301e-05, "loss": 1.9626, "step": 18033 }, { "epoch": 0.5818463393633132, "grad_norm": 0.328125, "learning_rate": 1.1857109883637146e-05, "loss": 1.9871, "step": 18034 }, { "epoch": 0.5818786032171095, "grad_norm": 0.357421875, "learning_rate": 1.1855571099029674e-05, "loss": 1.9635, "step": 18035 }, { "epoch": 0.5819108670709059, "grad_norm": 0.34375, "learning_rate": 1.1854032349033817e-05, "loss": 1.9669, "step": 18036 }, { "epoch": 0.5819431309247022, "grad_norm": 0.33203125, "learning_rate": 1.1852493633666515e-05, "loss": 1.981, "step": 18037 }, { "epoch": 0.5819753947784986, "grad_norm": 0.337890625, "learning_rate": 1.1850954952944708e-05, "loss": 1.9792, "step": 18038 }, { "epoch": 0.5820076586322949, "grad_norm": 0.359375, "learning_rate": 1.1849416306885325e-05, "loss": 1.9679, "step": 18039 }, { "epoch": 0.5820399224860913, "grad_norm": 0.31640625, "learning_rate": 1.184787769550531e-05, "loss": 1.9849, "step": 18040 }, { "epoch": 0.5820721863398876, "grad_norm": 0.3359375, "learning_rate": 1.1846339118821601e-05, "loss": 1.9912, "step": 18041 }, { "epoch": 0.582104450193684, "grad_norm": 0.35546875, "learning_rate": 1.1844800576851117e-05, "loss": 1.9862, "step": 18042 }, { "epoch": 0.5821367140474802, "grad_norm": 0.330078125, "learning_rate": 1.184326206961081e-05, "loss": 1.967, "step": 18043 }, { "epoch": 0.5821689779012766, "grad_norm": 0.33984375, "learning_rate": 1.1841723597117616e-05, "loss": 1.9783, "step": 18044 }, { "epoch": 0.582201241755073, "grad_norm": 0.3671875, "learning_rate": 1.1840185159388458e-05, "loss": 1.984, "step": 18045 }, { "epoch": 0.5822335056088693, "grad_norm": 0.353515625, "learning_rate": 1.1838646756440268e-05, "loss": 1.9684, "step": 18046 }, { "epoch": 0.5822657694626657, "grad_norm": 0.34375, "learning_rate": 1.1837108388289997e-05, "loss": 1.9504, "step": 18047 }, { "epoch": 0.582298033316462, "grad_norm": 0.345703125, "learning_rate": 1.1835570054954563e-05, "loss": 2.0129, "step": 18048 }, { "epoch": 0.5823302971702584, "grad_norm": 0.357421875, "learning_rate": 1.1834031756450904e-05, "loss": 2.0053, "step": 18049 }, { "epoch": 0.5823625610240547, "grad_norm": 0.365234375, "learning_rate": 1.1832493492795954e-05, "loss": 1.9868, "step": 18050 }, { "epoch": 0.5823948248778511, "grad_norm": 0.3515625, "learning_rate": 1.183095526400664e-05, "loss": 1.9782, "step": 18051 }, { "epoch": 0.5824270887316474, "grad_norm": 0.359375, "learning_rate": 1.1829417070099898e-05, "loss": 1.9959, "step": 18052 }, { "epoch": 0.5824593525854438, "grad_norm": 0.34375, "learning_rate": 1.182787891109266e-05, "loss": 1.9374, "step": 18053 }, { "epoch": 0.5824916164392401, "grad_norm": 0.369140625, "learning_rate": 1.1826340787001854e-05, "loss": 1.9815, "step": 18054 }, { "epoch": 0.5825238802930365, "grad_norm": 0.32421875, "learning_rate": 1.182480269784441e-05, "loss": 1.99, "step": 18055 }, { "epoch": 0.5825561441468328, "grad_norm": 0.33203125, "learning_rate": 1.1823264643637264e-05, "loss": 1.9563, "step": 18056 }, { "epoch": 0.5825884080006292, "grad_norm": 0.349609375, "learning_rate": 1.1821726624397339e-05, "loss": 1.9203, "step": 18057 }, { "epoch": 0.5826206718544255, "grad_norm": 0.357421875, "learning_rate": 1.1820188640141567e-05, "loss": 1.9342, "step": 18058 }, { "epoch": 0.5826529357082219, "grad_norm": 0.3359375, "learning_rate": 1.181865069088688e-05, "loss": 1.9758, "step": 18059 }, { "epoch": 0.5826851995620181, "grad_norm": 0.33203125, "learning_rate": 1.1817112776650202e-05, "loss": 1.9848, "step": 18060 }, { "epoch": 0.5827174634158145, "grad_norm": 0.3671875, "learning_rate": 1.1815574897448461e-05, "loss": 1.9611, "step": 18061 }, { "epoch": 0.5827497272696108, "grad_norm": 0.32421875, "learning_rate": 1.181403705329859e-05, "loss": 1.9744, "step": 18062 }, { "epoch": 0.5827819911234072, "grad_norm": 0.345703125, "learning_rate": 1.1812499244217515e-05, "loss": 1.981, "step": 18063 }, { "epoch": 0.5828142549772035, "grad_norm": 0.3359375, "learning_rate": 1.1810961470222158e-05, "loss": 1.9728, "step": 18064 }, { "epoch": 0.5828465188309999, "grad_norm": 0.33203125, "learning_rate": 1.180942373132945e-05, "loss": 1.9932, "step": 18065 }, { "epoch": 0.5828787826847963, "grad_norm": 0.33203125, "learning_rate": 1.1807886027556319e-05, "loss": 1.9744, "step": 18066 }, { "epoch": 0.5829110465385926, "grad_norm": 0.326171875, "learning_rate": 1.1806348358919687e-05, "loss": 1.9412, "step": 18067 }, { "epoch": 0.582943310392389, "grad_norm": 0.330078125, "learning_rate": 1.180481072543648e-05, "loss": 1.9591, "step": 18068 }, { "epoch": 0.5829755742461853, "grad_norm": 0.337890625, "learning_rate": 1.180327312712363e-05, "loss": 1.9881, "step": 18069 }, { "epoch": 0.5830078380999817, "grad_norm": 0.33203125, "learning_rate": 1.1801735563998047e-05, "loss": 1.9722, "step": 18070 }, { "epoch": 0.583040101953778, "grad_norm": 0.337890625, "learning_rate": 1.1800198036076669e-05, "loss": 1.9854, "step": 18071 }, { "epoch": 0.5830723658075744, "grad_norm": 0.3359375, "learning_rate": 1.1798660543376421e-05, "loss": 1.9461, "step": 18072 }, { "epoch": 0.5831046296613707, "grad_norm": 0.34375, "learning_rate": 1.179712308591421e-05, "loss": 1.9671, "step": 18073 }, { "epoch": 0.5831368935151671, "grad_norm": 0.330078125, "learning_rate": 1.1795585663706977e-05, "loss": 1.9874, "step": 18074 }, { "epoch": 0.5831691573689634, "grad_norm": 0.322265625, "learning_rate": 1.1794048276771643e-05, "loss": 1.9742, "step": 18075 }, { "epoch": 0.5832014212227598, "grad_norm": 0.330078125, "learning_rate": 1.1792510925125118e-05, "loss": 1.9524, "step": 18076 }, { "epoch": 0.583233685076556, "grad_norm": 0.330078125, "learning_rate": 1.179097360878433e-05, "loss": 1.995, "step": 18077 }, { "epoch": 0.5832659489303524, "grad_norm": 0.326171875, "learning_rate": 1.178943632776621e-05, "loss": 1.9995, "step": 18078 }, { "epoch": 0.5832982127841487, "grad_norm": 0.326171875, "learning_rate": 1.1787899082087667e-05, "loss": 1.9805, "step": 18079 }, { "epoch": 0.5833304766379451, "grad_norm": 0.337890625, "learning_rate": 1.1786361871765626e-05, "loss": 1.9802, "step": 18080 }, { "epoch": 0.5833627404917414, "grad_norm": 0.330078125, "learning_rate": 1.178482469681701e-05, "loss": 1.9979, "step": 18081 }, { "epoch": 0.5833950043455378, "grad_norm": 0.337890625, "learning_rate": 1.1783287557258736e-05, "loss": 1.9648, "step": 18082 }, { "epoch": 0.5834272681993341, "grad_norm": 0.328125, "learning_rate": 1.1781750453107725e-05, "loss": 1.9761, "step": 18083 }, { "epoch": 0.5834595320531305, "grad_norm": 0.33203125, "learning_rate": 1.1780213384380899e-05, "loss": 1.9413, "step": 18084 }, { "epoch": 0.5834917959069269, "grad_norm": 0.333984375, "learning_rate": 1.177867635109517e-05, "loss": 1.9902, "step": 18085 }, { "epoch": 0.5835240597607232, "grad_norm": 0.3359375, "learning_rate": 1.1777139353267462e-05, "loss": 1.9699, "step": 18086 }, { "epoch": 0.5835563236145196, "grad_norm": 0.326171875, "learning_rate": 1.1775602390914694e-05, "loss": 1.9802, "step": 18087 }, { "epoch": 0.5835885874683159, "grad_norm": 0.328125, "learning_rate": 1.177406546405378e-05, "loss": 1.9343, "step": 18088 }, { "epoch": 0.5836208513221123, "grad_norm": 0.333984375, "learning_rate": 1.1772528572701637e-05, "loss": 1.959, "step": 18089 }, { "epoch": 0.5836531151759086, "grad_norm": 0.32421875, "learning_rate": 1.177099171687519e-05, "loss": 1.9296, "step": 18090 }, { "epoch": 0.583685379029705, "grad_norm": 0.328125, "learning_rate": 1.1769454896591345e-05, "loss": 1.9827, "step": 18091 }, { "epoch": 0.5837176428835013, "grad_norm": 0.32421875, "learning_rate": 1.1767918111867022e-05, "loss": 2.0023, "step": 18092 }, { "epoch": 0.5837499067372977, "grad_norm": 0.34375, "learning_rate": 1.176638136271914e-05, "loss": 1.9738, "step": 18093 }, { "epoch": 0.5837821705910939, "grad_norm": 0.330078125, "learning_rate": 1.1764844649164612e-05, "loss": 1.9848, "step": 18094 }, { "epoch": 0.5838144344448903, "grad_norm": 0.326171875, "learning_rate": 1.1763307971220353e-05, "loss": 1.9743, "step": 18095 }, { "epoch": 0.5838466982986866, "grad_norm": 0.326171875, "learning_rate": 1.1761771328903277e-05, "loss": 1.9634, "step": 18096 }, { "epoch": 0.583878962152483, "grad_norm": 0.328125, "learning_rate": 1.1760234722230302e-05, "loss": 1.9853, "step": 18097 }, { "epoch": 0.5839112260062793, "grad_norm": 0.328125, "learning_rate": 1.1758698151218337e-05, "loss": 1.9824, "step": 18098 }, { "epoch": 0.5839434898600757, "grad_norm": 0.33203125, "learning_rate": 1.1757161615884297e-05, "loss": 1.9656, "step": 18099 }, { "epoch": 0.583975753713872, "grad_norm": 0.345703125, "learning_rate": 1.17556251162451e-05, "loss": 1.9651, "step": 18100 }, { "epoch": 0.5840080175676684, "grad_norm": 0.337890625, "learning_rate": 1.1754088652317647e-05, "loss": 1.9778, "step": 18101 }, { "epoch": 0.5840402814214647, "grad_norm": 0.33984375, "learning_rate": 1.175255222411886e-05, "loss": 1.9943, "step": 18102 }, { "epoch": 0.5840725452752611, "grad_norm": 0.33984375, "learning_rate": 1.1751015831665653e-05, "loss": 1.9903, "step": 18103 }, { "epoch": 0.5841048091290574, "grad_norm": 0.34765625, "learning_rate": 1.1749479474974929e-05, "loss": 1.9814, "step": 18104 }, { "epoch": 0.5841370729828538, "grad_norm": 0.349609375, "learning_rate": 1.1747943154063598e-05, "loss": 1.9859, "step": 18105 }, { "epoch": 0.5841693368366502, "grad_norm": 0.33984375, "learning_rate": 1.1746406868948586e-05, "loss": 1.9732, "step": 18106 }, { "epoch": 0.5842016006904465, "grad_norm": 0.326171875, "learning_rate": 1.1744870619646788e-05, "loss": 1.9897, "step": 18107 }, { "epoch": 0.5842338645442429, "grad_norm": 0.33203125, "learning_rate": 1.1743334406175113e-05, "loss": 1.9149, "step": 18108 }, { "epoch": 0.5842661283980392, "grad_norm": 0.396484375, "learning_rate": 1.174179822855049e-05, "loss": 1.9489, "step": 18109 }, { "epoch": 0.5842983922518356, "grad_norm": 0.3359375, "learning_rate": 1.1740262086789806e-05, "loss": 1.9708, "step": 18110 }, { "epoch": 0.5843306561056318, "grad_norm": 0.361328125, "learning_rate": 1.1738725980909977e-05, "loss": 1.9695, "step": 18111 }, { "epoch": 0.5843629199594282, "grad_norm": 0.33203125, "learning_rate": 1.1737189910927917e-05, "loss": 1.956, "step": 18112 }, { "epoch": 0.5843951838132245, "grad_norm": 0.33984375, "learning_rate": 1.1735653876860526e-05, "loss": 1.9897, "step": 18113 }, { "epoch": 0.5844274476670209, "grad_norm": 0.345703125, "learning_rate": 1.1734117878724719e-05, "loss": 2.0196, "step": 18114 }, { "epoch": 0.5844597115208172, "grad_norm": 0.33203125, "learning_rate": 1.17325819165374e-05, "loss": 1.9815, "step": 18115 }, { "epoch": 0.5844919753746136, "grad_norm": 0.333984375, "learning_rate": 1.1731045990315472e-05, "loss": 1.9455, "step": 18116 }, { "epoch": 0.5845242392284099, "grad_norm": 0.349609375, "learning_rate": 1.1729510100075842e-05, "loss": 1.9921, "step": 18117 }, { "epoch": 0.5845565030822063, "grad_norm": 0.34375, "learning_rate": 1.1727974245835427e-05, "loss": 1.989, "step": 18118 }, { "epoch": 0.5845887669360026, "grad_norm": 0.3359375, "learning_rate": 1.1726438427611116e-05, "loss": 1.9998, "step": 18119 }, { "epoch": 0.584621030789799, "grad_norm": 0.359375, "learning_rate": 1.1724902645419826e-05, "loss": 1.9817, "step": 18120 }, { "epoch": 0.5846532946435953, "grad_norm": 0.333984375, "learning_rate": 1.172336689927846e-05, "loss": 1.9699, "step": 18121 }, { "epoch": 0.5846855584973917, "grad_norm": 0.330078125, "learning_rate": 1.1721831189203916e-05, "loss": 1.9769, "step": 18122 }, { "epoch": 0.584717822351188, "grad_norm": 0.341796875, "learning_rate": 1.1720295515213106e-05, "loss": 1.9609, "step": 18123 }, { "epoch": 0.5847500862049844, "grad_norm": 0.33984375, "learning_rate": 1.171875987732293e-05, "loss": 1.9631, "step": 18124 }, { "epoch": 0.5847823500587807, "grad_norm": 0.3515625, "learning_rate": 1.171722427555029e-05, "loss": 1.9677, "step": 18125 }, { "epoch": 0.584814613912577, "grad_norm": 0.361328125, "learning_rate": 1.1715688709912088e-05, "loss": 1.9724, "step": 18126 }, { "epoch": 0.5848468777663735, "grad_norm": 0.32421875, "learning_rate": 1.171415318042523e-05, "loss": 1.9744, "step": 18127 }, { "epoch": 0.5848791416201697, "grad_norm": 0.35546875, "learning_rate": 1.1712617687106622e-05, "loss": 1.9573, "step": 18128 }, { "epoch": 0.5849114054739661, "grad_norm": 0.341796875, "learning_rate": 1.1711082229973153e-05, "loss": 1.9893, "step": 18129 }, { "epoch": 0.5849436693277624, "grad_norm": 0.32421875, "learning_rate": 1.1709546809041732e-05, "loss": 1.9766, "step": 18130 }, { "epoch": 0.5849759331815588, "grad_norm": 0.33203125, "learning_rate": 1.1708011424329267e-05, "loss": 1.9743, "step": 18131 }, { "epoch": 0.5850081970353551, "grad_norm": 0.341796875, "learning_rate": 1.170647607585264e-05, "loss": 1.9947, "step": 18132 }, { "epoch": 0.5850404608891515, "grad_norm": 0.337890625, "learning_rate": 1.1704940763628765e-05, "loss": 1.9667, "step": 18133 }, { "epoch": 0.5850727247429478, "grad_norm": 0.330078125, "learning_rate": 1.1703405487674546e-05, "loss": 1.967, "step": 18134 }, { "epoch": 0.5851049885967442, "grad_norm": 0.36328125, "learning_rate": 1.1701870248006868e-05, "loss": 1.9861, "step": 18135 }, { "epoch": 0.5851372524505405, "grad_norm": 0.333984375, "learning_rate": 1.170033504464263e-05, "loss": 1.9734, "step": 18136 }, { "epoch": 0.5851695163043369, "grad_norm": 0.34375, "learning_rate": 1.169879987759875e-05, "loss": 1.9824, "step": 18137 }, { "epoch": 0.5852017801581332, "grad_norm": 0.330078125, "learning_rate": 1.1697264746892103e-05, "loss": 2.0149, "step": 18138 }, { "epoch": 0.5852340440119296, "grad_norm": 0.34375, "learning_rate": 1.1695729652539596e-05, "loss": 1.9822, "step": 18139 }, { "epoch": 0.5852663078657259, "grad_norm": 0.3359375, "learning_rate": 1.1694194594558134e-05, "loss": 1.9628, "step": 18140 }, { "epoch": 0.5852985717195223, "grad_norm": 0.33203125, "learning_rate": 1.16926595729646e-05, "loss": 1.9883, "step": 18141 }, { "epoch": 0.5853308355733186, "grad_norm": 0.341796875, "learning_rate": 1.1691124587775898e-05, "loss": 1.9882, "step": 18142 }, { "epoch": 0.585363099427115, "grad_norm": 0.341796875, "learning_rate": 1.1689589639008925e-05, "loss": 1.9649, "step": 18143 }, { "epoch": 0.5853953632809112, "grad_norm": 0.33203125, "learning_rate": 1.1688054726680573e-05, "loss": 1.9831, "step": 18144 }, { "epoch": 0.5854276271347076, "grad_norm": 0.33984375, "learning_rate": 1.1686519850807737e-05, "loss": 1.975, "step": 18145 }, { "epoch": 0.585459890988504, "grad_norm": 0.326171875, "learning_rate": 1.1684985011407316e-05, "loss": 1.987, "step": 18146 }, { "epoch": 0.5854921548423003, "grad_norm": 0.337890625, "learning_rate": 1.1683450208496199e-05, "loss": 1.9638, "step": 18147 }, { "epoch": 0.5855244186960967, "grad_norm": 0.33203125, "learning_rate": 1.1681915442091283e-05, "loss": 1.9848, "step": 18148 }, { "epoch": 0.585556682549893, "grad_norm": 0.328125, "learning_rate": 1.1680380712209464e-05, "loss": 1.9703, "step": 18149 }, { "epoch": 0.5855889464036894, "grad_norm": 0.34765625, "learning_rate": 1.1678846018867632e-05, "loss": 1.9648, "step": 18150 }, { "epoch": 0.5856212102574857, "grad_norm": 0.330078125, "learning_rate": 1.1677311362082678e-05, "loss": 1.9632, "step": 18151 }, { "epoch": 0.5856534741112821, "grad_norm": 0.33984375, "learning_rate": 1.16757767418715e-05, "loss": 1.9979, "step": 18152 }, { "epoch": 0.5856857379650784, "grad_norm": 0.330078125, "learning_rate": 1.1674242158250985e-05, "loss": 1.9849, "step": 18153 }, { "epoch": 0.5857180018188748, "grad_norm": 0.3359375, "learning_rate": 1.1672707611238025e-05, "loss": 1.9847, "step": 18154 }, { "epoch": 0.5857502656726711, "grad_norm": 0.345703125, "learning_rate": 1.1671173100849519e-05, "loss": 1.9847, "step": 18155 }, { "epoch": 0.5857825295264675, "grad_norm": 0.333984375, "learning_rate": 1.1669638627102345e-05, "loss": 1.9947, "step": 18156 }, { "epoch": 0.5858147933802638, "grad_norm": 0.33203125, "learning_rate": 1.1668104190013399e-05, "loss": 1.9788, "step": 18157 }, { "epoch": 0.5858470572340602, "grad_norm": 0.32421875, "learning_rate": 1.166656978959958e-05, "loss": 1.9763, "step": 18158 }, { "epoch": 0.5858793210878565, "grad_norm": 0.330078125, "learning_rate": 1.166503542587776e-05, "loss": 1.9797, "step": 18159 }, { "epoch": 0.5859115849416529, "grad_norm": 0.330078125, "learning_rate": 1.166350109886484e-05, "loss": 1.9777, "step": 18160 }, { "epoch": 0.5859438487954491, "grad_norm": 0.330078125, "learning_rate": 1.1661966808577707e-05, "loss": 1.9648, "step": 18161 }, { "epoch": 0.5859761126492455, "grad_norm": 0.330078125, "learning_rate": 1.1660432555033253e-05, "loss": 1.9476, "step": 18162 }, { "epoch": 0.5860083765030418, "grad_norm": 0.349609375, "learning_rate": 1.1658898338248354e-05, "loss": 1.9897, "step": 18163 }, { "epoch": 0.5860406403568382, "grad_norm": 0.341796875, "learning_rate": 1.1657364158239908e-05, "loss": 1.9958, "step": 18164 }, { "epoch": 0.5860729042106345, "grad_norm": 0.326171875, "learning_rate": 1.1655830015024806e-05, "loss": 1.9808, "step": 18165 }, { "epoch": 0.5861051680644309, "grad_norm": 0.345703125, "learning_rate": 1.1654295908619923e-05, "loss": 2.0059, "step": 18166 }, { "epoch": 0.5861374319182273, "grad_norm": 0.3359375, "learning_rate": 1.1652761839042146e-05, "loss": 1.9727, "step": 18167 }, { "epoch": 0.5861696957720236, "grad_norm": 0.3515625, "learning_rate": 1.1651227806308374e-05, "loss": 1.9781, "step": 18168 }, { "epoch": 0.58620195962582, "grad_norm": 0.3359375, "learning_rate": 1.1649693810435478e-05, "loss": 1.9704, "step": 18169 }, { "epoch": 0.5862342234796163, "grad_norm": 0.33203125, "learning_rate": 1.1648159851440349e-05, "loss": 1.9896, "step": 18170 }, { "epoch": 0.5862664873334127, "grad_norm": 0.337890625, "learning_rate": 1.1646625929339875e-05, "loss": 1.9551, "step": 18171 }, { "epoch": 0.586298751187209, "grad_norm": 0.326171875, "learning_rate": 1.1645092044150935e-05, "loss": 1.9672, "step": 18172 }, { "epoch": 0.5863310150410054, "grad_norm": 0.333984375, "learning_rate": 1.1643558195890413e-05, "loss": 1.9941, "step": 18173 }, { "epoch": 0.5863632788948017, "grad_norm": 0.33984375, "learning_rate": 1.16420243845752e-05, "loss": 1.9715, "step": 18174 }, { "epoch": 0.5863955427485981, "grad_norm": 0.3359375, "learning_rate": 1.1640490610222168e-05, "loss": 1.9765, "step": 18175 }, { "epoch": 0.5864278066023944, "grad_norm": 0.32421875, "learning_rate": 1.1638956872848207e-05, "loss": 1.9796, "step": 18176 }, { "epoch": 0.5864600704561908, "grad_norm": 0.337890625, "learning_rate": 1.1637423172470202e-05, "loss": 1.9684, "step": 18177 }, { "epoch": 0.586492334309987, "grad_norm": 0.33203125, "learning_rate": 1.1635889509105024e-05, "loss": 1.9796, "step": 18178 }, { "epoch": 0.5865245981637834, "grad_norm": 0.330078125, "learning_rate": 1.1634355882769562e-05, "loss": 1.9752, "step": 18179 }, { "epoch": 0.5865568620175797, "grad_norm": 0.337890625, "learning_rate": 1.16328222934807e-05, "loss": 1.989, "step": 18180 }, { "epoch": 0.5865891258713761, "grad_norm": 0.33203125, "learning_rate": 1.163128874125531e-05, "loss": 1.9728, "step": 18181 }, { "epoch": 0.5866213897251724, "grad_norm": 0.333984375, "learning_rate": 1.1629755226110279e-05, "loss": 1.9526, "step": 18182 }, { "epoch": 0.5866536535789688, "grad_norm": 0.33984375, "learning_rate": 1.1628221748062486e-05, "loss": 1.989, "step": 18183 }, { "epoch": 0.5866859174327651, "grad_norm": 0.328125, "learning_rate": 1.1626688307128806e-05, "loss": 1.9788, "step": 18184 }, { "epoch": 0.5867181812865615, "grad_norm": 0.330078125, "learning_rate": 1.1625154903326121e-05, "loss": 1.9686, "step": 18185 }, { "epoch": 0.5867504451403579, "grad_norm": 0.341796875, "learning_rate": 1.1623621536671318e-05, "loss": 1.9913, "step": 18186 }, { "epoch": 0.5867827089941542, "grad_norm": 0.322265625, "learning_rate": 1.1622088207181256e-05, "loss": 1.9421, "step": 18187 }, { "epoch": 0.5868149728479506, "grad_norm": 0.33984375, "learning_rate": 1.1620554914872825e-05, "loss": 1.9963, "step": 18188 }, { "epoch": 0.5868472367017469, "grad_norm": 0.330078125, "learning_rate": 1.1619021659762912e-05, "loss": 1.9816, "step": 18189 }, { "epoch": 0.5868795005555433, "grad_norm": 0.34375, "learning_rate": 1.161748844186837e-05, "loss": 1.964, "step": 18190 }, { "epoch": 0.5869117644093396, "grad_norm": 0.333984375, "learning_rate": 1.1615955261206093e-05, "loss": 2.0048, "step": 18191 }, { "epoch": 0.586944028263136, "grad_norm": 0.326171875, "learning_rate": 1.1614422117792952e-05, "loss": 1.9297, "step": 18192 }, { "epoch": 0.5869762921169323, "grad_norm": 0.33203125, "learning_rate": 1.1612889011645829e-05, "loss": 1.9651, "step": 18193 }, { "epoch": 0.5870085559707287, "grad_norm": 0.3203125, "learning_rate": 1.1611355942781586e-05, "loss": 1.955, "step": 18194 }, { "epoch": 0.5870408198245249, "grad_norm": 0.330078125, "learning_rate": 1.160982291121711e-05, "loss": 1.9623, "step": 18195 }, { "epoch": 0.5870730836783213, "grad_norm": 0.314453125, "learning_rate": 1.1608289916969276e-05, "loss": 1.9709, "step": 18196 }, { "epoch": 0.5871053475321176, "grad_norm": 0.32421875, "learning_rate": 1.1606756960054947e-05, "loss": 1.9627, "step": 18197 }, { "epoch": 0.587137611385914, "grad_norm": 0.32421875, "learning_rate": 1.1605224040491e-05, "loss": 1.9721, "step": 18198 }, { "epoch": 0.5871698752397103, "grad_norm": 0.318359375, "learning_rate": 1.1603691158294321e-05, "loss": 2.0046, "step": 18199 }, { "epoch": 0.5872021390935067, "grad_norm": 0.32421875, "learning_rate": 1.1602158313481766e-05, "loss": 1.9853, "step": 18200 }, { "epoch": 0.587234402947303, "grad_norm": 0.33984375, "learning_rate": 1.1600625506070216e-05, "loss": 1.9669, "step": 18201 }, { "epoch": 0.5872666668010994, "grad_norm": 0.330078125, "learning_rate": 1.1599092736076544e-05, "loss": 1.9825, "step": 18202 }, { "epoch": 0.5872989306548957, "grad_norm": 0.328125, "learning_rate": 1.1597560003517617e-05, "loss": 1.9465, "step": 18203 }, { "epoch": 0.5873311945086921, "grad_norm": 0.32421875, "learning_rate": 1.1596027308410308e-05, "loss": 1.9633, "step": 18204 }, { "epoch": 0.5873634583624884, "grad_norm": 0.33203125, "learning_rate": 1.1594494650771492e-05, "loss": 1.9753, "step": 18205 }, { "epoch": 0.5873957222162848, "grad_norm": 0.326171875, "learning_rate": 1.1592962030618034e-05, "loss": 1.9609, "step": 18206 }, { "epoch": 0.5874279860700812, "grad_norm": 0.333984375, "learning_rate": 1.1591429447966805e-05, "loss": 1.9799, "step": 18207 }, { "epoch": 0.5874602499238775, "grad_norm": 0.318359375, "learning_rate": 1.1589896902834676e-05, "loss": 1.9496, "step": 18208 }, { "epoch": 0.5874925137776739, "grad_norm": 0.326171875, "learning_rate": 1.1588364395238515e-05, "loss": 2.0065, "step": 18209 }, { "epoch": 0.5875247776314702, "grad_norm": 0.333984375, "learning_rate": 1.158683192519519e-05, "loss": 1.9932, "step": 18210 }, { "epoch": 0.5875570414852666, "grad_norm": 0.3359375, "learning_rate": 1.1585299492721572e-05, "loss": 1.9155, "step": 18211 }, { "epoch": 0.5875893053390628, "grad_norm": 0.3515625, "learning_rate": 1.1583767097834529e-05, "loss": 1.9929, "step": 18212 }, { "epoch": 0.5876215691928592, "grad_norm": 0.33203125, "learning_rate": 1.1582234740550925e-05, "loss": 2.0028, "step": 18213 }, { "epoch": 0.5876538330466555, "grad_norm": 0.333984375, "learning_rate": 1.158070242088763e-05, "loss": 1.9639, "step": 18214 }, { "epoch": 0.5876860969004519, "grad_norm": 0.328125, "learning_rate": 1.1579170138861508e-05, "loss": 1.9446, "step": 18215 }, { "epoch": 0.5877183607542482, "grad_norm": 0.328125, "learning_rate": 1.1577637894489429e-05, "loss": 1.9345, "step": 18216 }, { "epoch": 0.5877506246080446, "grad_norm": 0.34375, "learning_rate": 1.157610568778826e-05, "loss": 1.9677, "step": 18217 }, { "epoch": 0.5877828884618409, "grad_norm": 0.337890625, "learning_rate": 1.1574573518774856e-05, "loss": 1.9818, "step": 18218 }, { "epoch": 0.5878151523156373, "grad_norm": 0.3359375, "learning_rate": 1.157304138746609e-05, "loss": 1.9696, "step": 18219 }, { "epoch": 0.5878474161694336, "grad_norm": 0.333984375, "learning_rate": 1.1571509293878835e-05, "loss": 1.9802, "step": 18220 }, { "epoch": 0.58787968002323, "grad_norm": 0.337890625, "learning_rate": 1.156997723802994e-05, "loss": 1.9634, "step": 18221 }, { "epoch": 0.5879119438770263, "grad_norm": 0.345703125, "learning_rate": 1.1568445219936268e-05, "loss": 1.9935, "step": 18222 }, { "epoch": 0.5879442077308227, "grad_norm": 0.376953125, "learning_rate": 1.1566913239614702e-05, "loss": 1.9452, "step": 18223 }, { "epoch": 0.587976471584619, "grad_norm": 0.357421875, "learning_rate": 1.1565381297082086e-05, "loss": 1.958, "step": 18224 }, { "epoch": 0.5880087354384154, "grad_norm": 0.349609375, "learning_rate": 1.1563849392355283e-05, "loss": 1.9691, "step": 18225 }, { "epoch": 0.5880409992922117, "grad_norm": 0.3359375, "learning_rate": 1.1562317525451167e-05, "loss": 1.9545, "step": 18226 }, { "epoch": 0.588073263146008, "grad_norm": 0.353515625, "learning_rate": 1.1560785696386598e-05, "loss": 1.9798, "step": 18227 }, { "epoch": 0.5881055269998045, "grad_norm": 0.365234375, "learning_rate": 1.1559253905178426e-05, "loss": 1.9714, "step": 18228 }, { "epoch": 0.5881377908536007, "grad_norm": 0.337890625, "learning_rate": 1.1557722151843517e-05, "loss": 1.9567, "step": 18229 }, { "epoch": 0.5881700547073971, "grad_norm": 0.345703125, "learning_rate": 1.1556190436398742e-05, "loss": 1.9527, "step": 18230 }, { "epoch": 0.5882023185611934, "grad_norm": 0.375, "learning_rate": 1.1554658758860944e-05, "loss": 1.9907, "step": 18231 }, { "epoch": 0.5882345824149898, "grad_norm": 0.3671875, "learning_rate": 1.1553127119246992e-05, "loss": 1.9778, "step": 18232 }, { "epoch": 0.5882668462687861, "grad_norm": 0.326171875, "learning_rate": 1.1551595517573749e-05, "loss": 1.9817, "step": 18233 }, { "epoch": 0.5882991101225825, "grad_norm": 0.37109375, "learning_rate": 1.1550063953858063e-05, "loss": 1.9459, "step": 18234 }, { "epoch": 0.5883313739763788, "grad_norm": 0.341796875, "learning_rate": 1.15485324281168e-05, "loss": 1.9901, "step": 18235 }, { "epoch": 0.5883636378301752, "grad_norm": 0.33203125, "learning_rate": 1.1547000940366816e-05, "loss": 1.9686, "step": 18236 }, { "epoch": 0.5883959016839715, "grad_norm": 0.3515625, "learning_rate": 1.1545469490624968e-05, "loss": 1.9914, "step": 18237 }, { "epoch": 0.5884281655377679, "grad_norm": 0.359375, "learning_rate": 1.1543938078908114e-05, "loss": 2.0045, "step": 18238 }, { "epoch": 0.5884604293915642, "grad_norm": 0.361328125, "learning_rate": 1.1542406705233113e-05, "loss": 1.9834, "step": 18239 }, { "epoch": 0.5884926932453606, "grad_norm": 0.330078125, "learning_rate": 1.1540875369616813e-05, "loss": 1.9645, "step": 18240 }, { "epoch": 0.5885249570991569, "grad_norm": 0.345703125, "learning_rate": 1.1539344072076077e-05, "loss": 1.9567, "step": 18241 }, { "epoch": 0.5885572209529533, "grad_norm": 0.3359375, "learning_rate": 1.1537812812627763e-05, "loss": 1.9728, "step": 18242 }, { "epoch": 0.5885894848067496, "grad_norm": 0.349609375, "learning_rate": 1.1536281591288718e-05, "loss": 2.0122, "step": 18243 }, { "epoch": 0.588621748660546, "grad_norm": 0.349609375, "learning_rate": 1.1534750408075798e-05, "loss": 1.979, "step": 18244 }, { "epoch": 0.5886540125143422, "grad_norm": 0.35546875, "learning_rate": 1.1533219263005867e-05, "loss": 1.9808, "step": 18245 }, { "epoch": 0.5886862763681386, "grad_norm": 0.341796875, "learning_rate": 1.1531688156095765e-05, "loss": 1.9601, "step": 18246 }, { "epoch": 0.588718540221935, "grad_norm": 0.328125, "learning_rate": 1.1530157087362352e-05, "loss": 1.9793, "step": 18247 }, { "epoch": 0.5887508040757313, "grad_norm": 0.36328125, "learning_rate": 1.1528626056822486e-05, "loss": 1.9851, "step": 18248 }, { "epoch": 0.5887830679295277, "grad_norm": 0.333984375, "learning_rate": 1.1527095064493007e-05, "loss": 2.0017, "step": 18249 }, { "epoch": 0.588815331783324, "grad_norm": 0.341796875, "learning_rate": 1.1525564110390777e-05, "loss": 1.9887, "step": 18250 }, { "epoch": 0.5888475956371204, "grad_norm": 0.361328125, "learning_rate": 1.152403319453265e-05, "loss": 2.006, "step": 18251 }, { "epoch": 0.5888798594909167, "grad_norm": 0.349609375, "learning_rate": 1.1522502316935465e-05, "loss": 1.9813, "step": 18252 }, { "epoch": 0.5889121233447131, "grad_norm": 0.345703125, "learning_rate": 1.1520971477616078e-05, "loss": 1.9649, "step": 18253 }, { "epoch": 0.5889443871985094, "grad_norm": 0.34765625, "learning_rate": 1.1519440676591351e-05, "loss": 1.9799, "step": 18254 }, { "epoch": 0.5889766510523058, "grad_norm": 0.37890625, "learning_rate": 1.1517909913878116e-05, "loss": 1.981, "step": 18255 }, { "epoch": 0.5890089149061021, "grad_norm": 0.328125, "learning_rate": 1.151637918949323e-05, "loss": 1.9751, "step": 18256 }, { "epoch": 0.5890411787598985, "grad_norm": 0.33984375, "learning_rate": 1.1514848503453548e-05, "loss": 1.9373, "step": 18257 }, { "epoch": 0.5890734426136948, "grad_norm": 0.357421875, "learning_rate": 1.151331785577591e-05, "loss": 1.9521, "step": 18258 }, { "epoch": 0.5891057064674912, "grad_norm": 0.451171875, "learning_rate": 1.1511787246477167e-05, "loss": 1.9764, "step": 18259 }, { "epoch": 0.5891379703212875, "grad_norm": 0.337890625, "learning_rate": 1.1510256675574163e-05, "loss": 1.9861, "step": 18260 }, { "epoch": 0.5891702341750839, "grad_norm": 0.34765625, "learning_rate": 1.1508726143083763e-05, "loss": 1.9787, "step": 18261 }, { "epoch": 0.5892024980288801, "grad_norm": 0.326171875, "learning_rate": 1.150719564902279e-05, "loss": 1.9338, "step": 18262 }, { "epoch": 0.5892347618826765, "grad_norm": 0.33203125, "learning_rate": 1.1505665193408106e-05, "loss": 1.9394, "step": 18263 }, { "epoch": 0.5892670257364728, "grad_norm": 0.341796875, "learning_rate": 1.1504134776256555e-05, "loss": 1.9708, "step": 18264 }, { "epoch": 0.5892992895902692, "grad_norm": 0.3515625, "learning_rate": 1.1502604397584977e-05, "loss": 1.9876, "step": 18265 }, { "epoch": 0.5893315534440655, "grad_norm": 0.33203125, "learning_rate": 1.150107405741022e-05, "loss": 1.9678, "step": 18266 }, { "epoch": 0.5893638172978619, "grad_norm": 0.330078125, "learning_rate": 1.1499543755749133e-05, "loss": 1.9667, "step": 18267 }, { "epoch": 0.5893960811516583, "grad_norm": 0.337890625, "learning_rate": 1.1498013492618554e-05, "loss": 2.0262, "step": 18268 }, { "epoch": 0.5894283450054546, "grad_norm": 0.33203125, "learning_rate": 1.1496483268035329e-05, "loss": 1.9839, "step": 18269 }, { "epoch": 0.589460608859251, "grad_norm": 0.337890625, "learning_rate": 1.1494953082016309e-05, "loss": 1.9909, "step": 18270 }, { "epoch": 0.5894928727130473, "grad_norm": 0.337890625, "learning_rate": 1.1493422934578326e-05, "loss": 1.9695, "step": 18271 }, { "epoch": 0.5895251365668437, "grad_norm": 0.318359375, "learning_rate": 1.1491892825738226e-05, "loss": 1.9815, "step": 18272 }, { "epoch": 0.58955740042064, "grad_norm": 0.33203125, "learning_rate": 1.1490362755512859e-05, "loss": 1.9934, "step": 18273 }, { "epoch": 0.5895896642744364, "grad_norm": 0.33984375, "learning_rate": 1.1488832723919056e-05, "loss": 1.9818, "step": 18274 }, { "epoch": 0.5896219281282327, "grad_norm": 0.322265625, "learning_rate": 1.1487302730973665e-05, "loss": 2.011, "step": 18275 }, { "epoch": 0.5896541919820291, "grad_norm": 0.328125, "learning_rate": 1.1485772776693527e-05, "loss": 1.9836, "step": 18276 }, { "epoch": 0.5896864558358254, "grad_norm": 0.333984375, "learning_rate": 1.1484242861095479e-05, "loss": 1.9898, "step": 18277 }, { "epoch": 0.5897187196896218, "grad_norm": 0.333984375, "learning_rate": 1.1482712984196364e-05, "loss": 1.9784, "step": 18278 }, { "epoch": 0.589750983543418, "grad_norm": 0.3203125, "learning_rate": 1.1481183146013026e-05, "loss": 1.9807, "step": 18279 }, { "epoch": 0.5897832473972144, "grad_norm": 0.330078125, "learning_rate": 1.1479653346562291e-05, "loss": 1.9926, "step": 18280 }, { "epoch": 0.5898155112510107, "grad_norm": 0.330078125, "learning_rate": 1.147812358586101e-05, "loss": 1.9874, "step": 18281 }, { "epoch": 0.5898477751048071, "grad_norm": 0.326171875, "learning_rate": 1.1476593863926024e-05, "loss": 2.0099, "step": 18282 }, { "epoch": 0.5898800389586034, "grad_norm": 0.3203125, "learning_rate": 1.1475064180774159e-05, "loss": 1.9922, "step": 18283 }, { "epoch": 0.5899123028123998, "grad_norm": 0.326171875, "learning_rate": 1.1473534536422255e-05, "loss": 1.9609, "step": 18284 }, { "epoch": 0.5899445666661961, "grad_norm": 0.333984375, "learning_rate": 1.1472004930887163e-05, "loss": 1.9921, "step": 18285 }, { "epoch": 0.5899768305199925, "grad_norm": 0.333984375, "learning_rate": 1.1470475364185703e-05, "loss": 1.9816, "step": 18286 }, { "epoch": 0.5900090943737889, "grad_norm": 0.341796875, "learning_rate": 1.1468945836334717e-05, "loss": 1.9869, "step": 18287 }, { "epoch": 0.5900413582275852, "grad_norm": 0.32421875, "learning_rate": 1.1467416347351048e-05, "loss": 1.9736, "step": 18288 }, { "epoch": 0.5900736220813816, "grad_norm": 0.318359375, "learning_rate": 1.146588689725152e-05, "loss": 1.9996, "step": 18289 }, { "epoch": 0.5901058859351779, "grad_norm": 0.3359375, "learning_rate": 1.1464357486052973e-05, "loss": 1.9692, "step": 18290 }, { "epoch": 0.5901381497889743, "grad_norm": 0.328125, "learning_rate": 1.1462828113772242e-05, "loss": 1.9632, "step": 18291 }, { "epoch": 0.5901704136427706, "grad_norm": 0.34375, "learning_rate": 1.1461298780426169e-05, "loss": 1.9879, "step": 18292 }, { "epoch": 0.590202677496567, "grad_norm": 0.333984375, "learning_rate": 1.1459769486031576e-05, "loss": 1.9508, "step": 18293 }, { "epoch": 0.5902349413503633, "grad_norm": 0.330078125, "learning_rate": 1.1458240230605299e-05, "loss": 2.0158, "step": 18294 }, { "epoch": 0.5902672052041597, "grad_norm": 0.333984375, "learning_rate": 1.1456711014164176e-05, "loss": 1.9791, "step": 18295 }, { "epoch": 0.5902994690579559, "grad_norm": 0.3359375, "learning_rate": 1.1455181836725034e-05, "loss": 1.9731, "step": 18296 }, { "epoch": 0.5903317329117523, "grad_norm": 0.345703125, "learning_rate": 1.1453652698304706e-05, "loss": 1.9627, "step": 18297 }, { "epoch": 0.5903639967655486, "grad_norm": 0.33203125, "learning_rate": 1.1452123598920029e-05, "loss": 1.9868, "step": 18298 }, { "epoch": 0.590396260619345, "grad_norm": 0.33984375, "learning_rate": 1.1450594538587826e-05, "loss": 1.9901, "step": 18299 }, { "epoch": 0.5904285244731413, "grad_norm": 0.349609375, "learning_rate": 1.1449065517324933e-05, "loss": 1.9646, "step": 18300 }, { "epoch": 0.5904607883269377, "grad_norm": 0.333984375, "learning_rate": 1.1447536535148183e-05, "loss": 1.9952, "step": 18301 }, { "epoch": 0.590493052180734, "grad_norm": 0.34375, "learning_rate": 1.1446007592074398e-05, "loss": 1.9548, "step": 18302 }, { "epoch": 0.5905253160345304, "grad_norm": 0.3359375, "learning_rate": 1.1444478688120412e-05, "loss": 1.9947, "step": 18303 }, { "epoch": 0.5905575798883267, "grad_norm": 0.337890625, "learning_rate": 1.1442949823303058e-05, "loss": 2.0019, "step": 18304 }, { "epoch": 0.5905898437421231, "grad_norm": 0.333984375, "learning_rate": 1.1441420997639156e-05, "loss": 1.9601, "step": 18305 }, { "epoch": 0.5906221075959194, "grad_norm": 0.33203125, "learning_rate": 1.1439892211145539e-05, "loss": 1.9691, "step": 18306 }, { "epoch": 0.5906543714497158, "grad_norm": 0.330078125, "learning_rate": 1.143836346383904e-05, "loss": 1.9768, "step": 18307 }, { "epoch": 0.5906866353035122, "grad_norm": 0.330078125, "learning_rate": 1.1436834755736473e-05, "loss": 1.9756, "step": 18308 }, { "epoch": 0.5907188991573085, "grad_norm": 0.328125, "learning_rate": 1.1435306086854677e-05, "loss": 1.9489, "step": 18309 }, { "epoch": 0.5907511630111049, "grad_norm": 0.330078125, "learning_rate": 1.1433777457210478e-05, "loss": 1.9765, "step": 18310 }, { "epoch": 0.5907834268649012, "grad_norm": 0.322265625, "learning_rate": 1.143224886682069e-05, "loss": 1.9558, "step": 18311 }, { "epoch": 0.5908156907186976, "grad_norm": 0.33203125, "learning_rate": 1.1430720315702153e-05, "loss": 2.0108, "step": 18312 }, { "epoch": 0.5908479545724938, "grad_norm": 0.33203125, "learning_rate": 1.142919180387169e-05, "loss": 1.9923, "step": 18313 }, { "epoch": 0.5908802184262902, "grad_norm": 0.318359375, "learning_rate": 1.1427663331346114e-05, "loss": 1.9217, "step": 18314 }, { "epoch": 0.5909124822800865, "grad_norm": 0.328125, "learning_rate": 1.1426134898142255e-05, "loss": 1.9988, "step": 18315 }, { "epoch": 0.5909447461338829, "grad_norm": 0.33203125, "learning_rate": 1.1424606504276949e-05, "loss": 1.9962, "step": 18316 }, { "epoch": 0.5909770099876792, "grad_norm": 0.32421875, "learning_rate": 1.1423078149767005e-05, "loss": 1.9768, "step": 18317 }, { "epoch": 0.5910092738414756, "grad_norm": 0.326171875, "learning_rate": 1.1421549834629247e-05, "loss": 1.9753, "step": 18318 }, { "epoch": 0.5910415376952719, "grad_norm": 0.3359375, "learning_rate": 1.1420021558880507e-05, "loss": 1.9761, "step": 18319 }, { "epoch": 0.5910738015490683, "grad_norm": 0.330078125, "learning_rate": 1.1418493322537598e-05, "loss": 1.967, "step": 18320 }, { "epoch": 0.5911060654028646, "grad_norm": 0.32421875, "learning_rate": 1.1416965125617344e-05, "loss": 1.9716, "step": 18321 }, { "epoch": 0.591138329256661, "grad_norm": 0.33984375, "learning_rate": 1.1415436968136572e-05, "loss": 1.9783, "step": 18322 }, { "epoch": 0.5911705931104573, "grad_norm": 0.322265625, "learning_rate": 1.1413908850112094e-05, "loss": 1.9518, "step": 18323 }, { "epoch": 0.5912028569642537, "grad_norm": 0.328125, "learning_rate": 1.1412380771560736e-05, "loss": 1.9651, "step": 18324 }, { "epoch": 0.59123512081805, "grad_norm": 0.337890625, "learning_rate": 1.1410852732499317e-05, "loss": 1.9766, "step": 18325 }, { "epoch": 0.5912673846718464, "grad_norm": 0.3359375, "learning_rate": 1.1409324732944657e-05, "loss": 1.9383, "step": 18326 }, { "epoch": 0.5912996485256427, "grad_norm": 0.322265625, "learning_rate": 1.1407796772913573e-05, "loss": 1.9756, "step": 18327 }, { "epoch": 0.591331912379439, "grad_norm": 0.326171875, "learning_rate": 1.1406268852422884e-05, "loss": 1.9941, "step": 18328 }, { "epoch": 0.5913641762332355, "grad_norm": 0.349609375, "learning_rate": 1.1404740971489415e-05, "loss": 1.999, "step": 18329 }, { "epoch": 0.5913964400870317, "grad_norm": 0.328125, "learning_rate": 1.1403213130129973e-05, "loss": 1.9429, "step": 18330 }, { "epoch": 0.5914287039408281, "grad_norm": 0.326171875, "learning_rate": 1.140168532836138e-05, "loss": 1.9576, "step": 18331 }, { "epoch": 0.5914609677946244, "grad_norm": 0.3359375, "learning_rate": 1.1400157566200458e-05, "loss": 1.9918, "step": 18332 }, { "epoch": 0.5914932316484208, "grad_norm": 0.333984375, "learning_rate": 1.1398629843664015e-05, "loss": 1.9861, "step": 18333 }, { "epoch": 0.5915254955022171, "grad_norm": 0.32421875, "learning_rate": 1.1397102160768871e-05, "loss": 1.983, "step": 18334 }, { "epoch": 0.5915577593560135, "grad_norm": 0.333984375, "learning_rate": 1.1395574517531846e-05, "loss": 1.9458, "step": 18335 }, { "epoch": 0.5915900232098098, "grad_norm": 0.333984375, "learning_rate": 1.1394046913969747e-05, "loss": 1.9756, "step": 18336 }, { "epoch": 0.5916222870636062, "grad_norm": 0.330078125, "learning_rate": 1.1392519350099392e-05, "loss": 1.969, "step": 18337 }, { "epoch": 0.5916545509174025, "grad_norm": 0.333984375, "learning_rate": 1.1390991825937604e-05, "loss": 1.9723, "step": 18338 }, { "epoch": 0.5916868147711989, "grad_norm": 0.333984375, "learning_rate": 1.1389464341501178e-05, "loss": 1.9612, "step": 18339 }, { "epoch": 0.5917190786249952, "grad_norm": 0.33203125, "learning_rate": 1.1387936896806943e-05, "loss": 1.9696, "step": 18340 }, { "epoch": 0.5917513424787916, "grad_norm": 0.33203125, "learning_rate": 1.1386409491871712e-05, "loss": 1.9856, "step": 18341 }, { "epoch": 0.5917836063325879, "grad_norm": 0.32421875, "learning_rate": 1.1384882126712284e-05, "loss": 1.9576, "step": 18342 }, { "epoch": 0.5918158701863843, "grad_norm": 0.341796875, "learning_rate": 1.1383354801345482e-05, "loss": 1.9497, "step": 18343 }, { "epoch": 0.5918481340401806, "grad_norm": 0.330078125, "learning_rate": 1.1381827515788123e-05, "loss": 1.9782, "step": 18344 }, { "epoch": 0.591880397893977, "grad_norm": 0.326171875, "learning_rate": 1.1380300270057006e-05, "loss": 1.9687, "step": 18345 }, { "epoch": 0.5919126617477732, "grad_norm": 0.326171875, "learning_rate": 1.1378773064168942e-05, "loss": 1.9549, "step": 18346 }, { "epoch": 0.5919449256015696, "grad_norm": 0.330078125, "learning_rate": 1.1377245898140759e-05, "loss": 1.9557, "step": 18347 }, { "epoch": 0.591977189455366, "grad_norm": 0.326171875, "learning_rate": 1.1375718771989244e-05, "loss": 1.9612, "step": 18348 }, { "epoch": 0.5920094533091623, "grad_norm": 0.32421875, "learning_rate": 1.1374191685731219e-05, "loss": 1.9951, "step": 18349 }, { "epoch": 0.5920417171629587, "grad_norm": 0.333984375, "learning_rate": 1.1372664639383493e-05, "loss": 1.9635, "step": 18350 }, { "epoch": 0.592073981016755, "grad_norm": 0.353515625, "learning_rate": 1.137113763296287e-05, "loss": 1.9673, "step": 18351 }, { "epoch": 0.5921062448705514, "grad_norm": 0.33203125, "learning_rate": 1.136961066648616e-05, "loss": 1.9641, "step": 18352 }, { "epoch": 0.5921385087243477, "grad_norm": 0.34375, "learning_rate": 1.1368083739970174e-05, "loss": 1.9947, "step": 18353 }, { "epoch": 0.5921707725781441, "grad_norm": 0.32421875, "learning_rate": 1.1366556853431714e-05, "loss": 1.9738, "step": 18354 }, { "epoch": 0.5922030364319404, "grad_norm": 0.33203125, "learning_rate": 1.1365030006887588e-05, "loss": 1.9845, "step": 18355 }, { "epoch": 0.5922353002857368, "grad_norm": 0.33984375, "learning_rate": 1.1363503200354609e-05, "loss": 1.946, "step": 18356 }, { "epoch": 0.5922675641395331, "grad_norm": 0.326171875, "learning_rate": 1.1361976433849576e-05, "loss": 1.9719, "step": 18357 }, { "epoch": 0.5922998279933295, "grad_norm": 0.326171875, "learning_rate": 1.1360449707389294e-05, "loss": 1.9591, "step": 18358 }, { "epoch": 0.5923320918471258, "grad_norm": 0.33984375, "learning_rate": 1.1358923020990571e-05, "loss": 1.9761, "step": 18359 }, { "epoch": 0.5923643557009222, "grad_norm": 0.3359375, "learning_rate": 1.1357396374670214e-05, "loss": 1.9726, "step": 18360 }, { "epoch": 0.5923966195547185, "grad_norm": 0.33203125, "learning_rate": 1.1355869768445023e-05, "loss": 2.0214, "step": 18361 }, { "epoch": 0.5924288834085149, "grad_norm": 0.322265625, "learning_rate": 1.1354343202331802e-05, "loss": 1.9377, "step": 18362 }, { "epoch": 0.5924611472623111, "grad_norm": 0.33984375, "learning_rate": 1.1352816676347361e-05, "loss": 1.9466, "step": 18363 }, { "epoch": 0.5924934111161075, "grad_norm": 0.337890625, "learning_rate": 1.1351290190508493e-05, "loss": 1.9653, "step": 18364 }, { "epoch": 0.5925256749699038, "grad_norm": 0.3515625, "learning_rate": 1.1349763744832003e-05, "loss": 2.0008, "step": 18365 }, { "epoch": 0.5925579388237002, "grad_norm": 0.32421875, "learning_rate": 1.1348237339334701e-05, "loss": 1.9763, "step": 18366 }, { "epoch": 0.5925902026774965, "grad_norm": 0.33203125, "learning_rate": 1.134671097403338e-05, "loss": 1.9602, "step": 18367 }, { "epoch": 0.5926224665312929, "grad_norm": 0.326171875, "learning_rate": 1.134518464894484e-05, "loss": 1.9785, "step": 18368 }, { "epoch": 0.5926547303850893, "grad_norm": 0.322265625, "learning_rate": 1.1343658364085896e-05, "loss": 2.006, "step": 18369 }, { "epoch": 0.5926869942388856, "grad_norm": 0.330078125, "learning_rate": 1.1342132119473325e-05, "loss": 1.9431, "step": 18370 }, { "epoch": 0.592719258092682, "grad_norm": 0.3359375, "learning_rate": 1.1340605915123945e-05, "loss": 1.9626, "step": 18371 }, { "epoch": 0.5927515219464783, "grad_norm": 0.330078125, "learning_rate": 1.1339079751054553e-05, "loss": 1.9738, "step": 18372 }, { "epoch": 0.5927837858002747, "grad_norm": 0.330078125, "learning_rate": 1.1337553627281943e-05, "loss": 1.9687, "step": 18373 }, { "epoch": 0.592816049654071, "grad_norm": 0.33203125, "learning_rate": 1.1336027543822908e-05, "loss": 1.9686, "step": 18374 }, { "epoch": 0.5928483135078674, "grad_norm": 0.330078125, "learning_rate": 1.1334501500694264e-05, "loss": 1.9655, "step": 18375 }, { "epoch": 0.5928805773616637, "grad_norm": 0.32421875, "learning_rate": 1.1332975497912792e-05, "loss": 1.9456, "step": 18376 }, { "epoch": 0.5929128412154601, "grad_norm": 0.333984375, "learning_rate": 1.1331449535495292e-05, "loss": 1.952, "step": 18377 }, { "epoch": 0.5929451050692564, "grad_norm": 0.3203125, "learning_rate": 1.1329923613458573e-05, "loss": 1.9364, "step": 18378 }, { "epoch": 0.5929773689230528, "grad_norm": 0.33203125, "learning_rate": 1.1328397731819414e-05, "loss": 1.9953, "step": 18379 }, { "epoch": 0.593009632776849, "grad_norm": 0.328125, "learning_rate": 1.132687189059462e-05, "loss": 1.9805, "step": 18380 }, { "epoch": 0.5930418966306454, "grad_norm": 0.326171875, "learning_rate": 1.132534608980099e-05, "loss": 1.9858, "step": 18381 }, { "epoch": 0.5930741604844417, "grad_norm": 0.328125, "learning_rate": 1.132382032945531e-05, "loss": 1.9681, "step": 18382 }, { "epoch": 0.5931064243382381, "grad_norm": 0.328125, "learning_rate": 1.1322294609574378e-05, "loss": 1.9502, "step": 18383 }, { "epoch": 0.5931386881920344, "grad_norm": 0.33203125, "learning_rate": 1.1320768930174992e-05, "loss": 1.9533, "step": 18384 }, { "epoch": 0.5931709520458308, "grad_norm": 0.328125, "learning_rate": 1.1319243291273938e-05, "loss": 1.9857, "step": 18385 }, { "epoch": 0.5932032158996271, "grad_norm": 0.33984375, "learning_rate": 1.1317717692888014e-05, "loss": 1.9534, "step": 18386 }, { "epoch": 0.5932354797534235, "grad_norm": 0.333984375, "learning_rate": 1.1316192135034014e-05, "loss": 1.9552, "step": 18387 }, { "epoch": 0.5932677436072198, "grad_norm": 0.33984375, "learning_rate": 1.1314666617728727e-05, "loss": 1.942, "step": 18388 }, { "epoch": 0.5933000074610162, "grad_norm": 0.33984375, "learning_rate": 1.1313141140988945e-05, "loss": 1.9786, "step": 18389 }, { "epoch": 0.5933322713148126, "grad_norm": 0.337890625, "learning_rate": 1.1311615704831465e-05, "loss": 1.9767, "step": 18390 }, { "epoch": 0.5933645351686089, "grad_norm": 0.34375, "learning_rate": 1.1310090309273067e-05, "loss": 1.9749, "step": 18391 }, { "epoch": 0.5933967990224053, "grad_norm": 0.3515625, "learning_rate": 1.130856495433055e-05, "loss": 2.0021, "step": 18392 }, { "epoch": 0.5934290628762016, "grad_norm": 0.328125, "learning_rate": 1.13070396400207e-05, "loss": 1.9691, "step": 18393 }, { "epoch": 0.593461326729998, "grad_norm": 0.34765625, "learning_rate": 1.1305514366360315e-05, "loss": 1.9982, "step": 18394 }, { "epoch": 0.5934935905837943, "grad_norm": 0.3359375, "learning_rate": 1.1303989133366171e-05, "loss": 1.9958, "step": 18395 }, { "epoch": 0.5935258544375906, "grad_norm": 0.333984375, "learning_rate": 1.1302463941055064e-05, "loss": 1.9527, "step": 18396 }, { "epoch": 0.5935581182913869, "grad_norm": 0.333984375, "learning_rate": 1.1300938789443786e-05, "loss": 1.9636, "step": 18397 }, { "epoch": 0.5935903821451833, "grad_norm": 0.3359375, "learning_rate": 1.1299413678549117e-05, "loss": 1.9349, "step": 18398 }, { "epoch": 0.5936226459989796, "grad_norm": 0.33203125, "learning_rate": 1.1297888608387845e-05, "loss": 1.983, "step": 18399 }, { "epoch": 0.593654909852776, "grad_norm": 0.33203125, "learning_rate": 1.1296363578976767e-05, "loss": 2.0007, "step": 18400 }, { "epoch": 0.5936871737065723, "grad_norm": 0.328125, "learning_rate": 1.1294838590332651e-05, "loss": 1.9685, "step": 18401 }, { "epoch": 0.5937194375603687, "grad_norm": 0.328125, "learning_rate": 1.1293313642472298e-05, "loss": 1.9724, "step": 18402 }, { "epoch": 0.593751701414165, "grad_norm": 0.328125, "learning_rate": 1.1291788735412495e-05, "loss": 1.9672, "step": 18403 }, { "epoch": 0.5937839652679614, "grad_norm": 0.345703125, "learning_rate": 1.1290263869170016e-05, "loss": 1.9828, "step": 18404 }, { "epoch": 0.5938162291217577, "grad_norm": 0.328125, "learning_rate": 1.1288739043761648e-05, "loss": 1.9726, "step": 18405 }, { "epoch": 0.5938484929755541, "grad_norm": 0.328125, "learning_rate": 1.1287214259204187e-05, "loss": 1.964, "step": 18406 }, { "epoch": 0.5938807568293504, "grad_norm": 0.337890625, "learning_rate": 1.1285689515514402e-05, "loss": 1.9805, "step": 18407 }, { "epoch": 0.5939130206831468, "grad_norm": 0.322265625, "learning_rate": 1.1284164812709078e-05, "loss": 2.0141, "step": 18408 }, { "epoch": 0.5939452845369432, "grad_norm": 0.3359375, "learning_rate": 1.1282640150805011e-05, "loss": 1.997, "step": 18409 }, { "epoch": 0.5939775483907395, "grad_norm": 0.345703125, "learning_rate": 1.128111552981897e-05, "loss": 1.9713, "step": 18410 }, { "epoch": 0.5940098122445359, "grad_norm": 0.3359375, "learning_rate": 1.1279590949767741e-05, "loss": 1.9724, "step": 18411 }, { "epoch": 0.5940420760983322, "grad_norm": 0.333984375, "learning_rate": 1.1278066410668109e-05, "loss": 1.9851, "step": 18412 }, { "epoch": 0.5940743399521285, "grad_norm": 0.33984375, "learning_rate": 1.1276541912536846e-05, "loss": 1.9574, "step": 18413 }, { "epoch": 0.5941066038059248, "grad_norm": 0.318359375, "learning_rate": 1.1275017455390741e-05, "loss": 1.9671, "step": 18414 }, { "epoch": 0.5941388676597212, "grad_norm": 0.326171875, "learning_rate": 1.1273493039246574e-05, "loss": 1.9618, "step": 18415 }, { "epoch": 0.5941711315135175, "grad_norm": 0.322265625, "learning_rate": 1.127196866412112e-05, "loss": 1.9638, "step": 18416 }, { "epoch": 0.5942033953673139, "grad_norm": 0.326171875, "learning_rate": 1.127044433003116e-05, "loss": 1.9642, "step": 18417 }, { "epoch": 0.5942356592211102, "grad_norm": 0.341796875, "learning_rate": 1.1268920036993477e-05, "loss": 1.974, "step": 18418 }, { "epoch": 0.5942679230749066, "grad_norm": 0.326171875, "learning_rate": 1.1267395785024841e-05, "loss": 1.9653, "step": 18419 }, { "epoch": 0.5943001869287029, "grad_norm": 0.33203125, "learning_rate": 1.1265871574142036e-05, "loss": 1.9575, "step": 18420 }, { "epoch": 0.5943324507824993, "grad_norm": 0.328125, "learning_rate": 1.126434740436184e-05, "loss": 1.9899, "step": 18421 }, { "epoch": 0.5943647146362956, "grad_norm": 0.345703125, "learning_rate": 1.1262823275701025e-05, "loss": 1.9338, "step": 18422 }, { "epoch": 0.594396978490092, "grad_norm": 0.32421875, "learning_rate": 1.126129918817637e-05, "loss": 1.9584, "step": 18423 }, { "epoch": 0.5944292423438883, "grad_norm": 0.341796875, "learning_rate": 1.125977514180465e-05, "loss": 1.9574, "step": 18424 }, { "epoch": 0.5944615061976847, "grad_norm": 0.33984375, "learning_rate": 1.1258251136602646e-05, "loss": 1.9947, "step": 18425 }, { "epoch": 0.594493770051481, "grad_norm": 0.33984375, "learning_rate": 1.1256727172587128e-05, "loss": 1.9913, "step": 18426 }, { "epoch": 0.5945260339052774, "grad_norm": 0.3359375, "learning_rate": 1.125520324977487e-05, "loss": 1.9638, "step": 18427 }, { "epoch": 0.5945582977590737, "grad_norm": 0.32421875, "learning_rate": 1.1253679368182653e-05, "loss": 1.9885, "step": 18428 }, { "epoch": 0.59459056161287, "grad_norm": 0.337890625, "learning_rate": 1.125215552782724e-05, "loss": 1.9805, "step": 18429 }, { "epoch": 0.5946228254666664, "grad_norm": 0.326171875, "learning_rate": 1.1250631728725413e-05, "loss": 1.9404, "step": 18430 }, { "epoch": 0.5946550893204627, "grad_norm": 0.322265625, "learning_rate": 1.1249107970893946e-05, "loss": 1.9587, "step": 18431 }, { "epoch": 0.5946873531742591, "grad_norm": 0.330078125, "learning_rate": 1.1247584254349601e-05, "loss": 1.9643, "step": 18432 }, { "epoch": 0.5947196170280554, "grad_norm": 0.33203125, "learning_rate": 1.1246060579109156e-05, "loss": 1.9962, "step": 18433 }, { "epoch": 0.5947518808818518, "grad_norm": 0.3359375, "learning_rate": 1.1244536945189392e-05, "loss": 1.9824, "step": 18434 }, { "epoch": 0.5947841447356481, "grad_norm": 0.330078125, "learning_rate": 1.1243013352607064e-05, "loss": 1.9882, "step": 18435 }, { "epoch": 0.5948164085894445, "grad_norm": 0.330078125, "learning_rate": 1.1241489801378948e-05, "loss": 1.986, "step": 18436 }, { "epoch": 0.5948486724432408, "grad_norm": 0.33203125, "learning_rate": 1.1239966291521826e-05, "loss": 1.9775, "step": 18437 }, { "epoch": 0.5948809362970372, "grad_norm": 0.3359375, "learning_rate": 1.1238442823052449e-05, "loss": 1.9462, "step": 18438 }, { "epoch": 0.5949132001508335, "grad_norm": 0.333984375, "learning_rate": 1.1236919395987596e-05, "loss": 1.996, "step": 18439 }, { "epoch": 0.5949454640046299, "grad_norm": 0.33203125, "learning_rate": 1.1235396010344036e-05, "loss": 1.9668, "step": 18440 }, { "epoch": 0.5949777278584262, "grad_norm": 0.330078125, "learning_rate": 1.1233872666138534e-05, "loss": 1.9648, "step": 18441 }, { "epoch": 0.5950099917122226, "grad_norm": 0.330078125, "learning_rate": 1.1232349363387861e-05, "loss": 1.9741, "step": 18442 }, { "epoch": 0.5950422555660189, "grad_norm": 0.3359375, "learning_rate": 1.1230826102108785e-05, "loss": 1.9944, "step": 18443 }, { "epoch": 0.5950745194198153, "grad_norm": 0.341796875, "learning_rate": 1.122930288231807e-05, "loss": 2.001, "step": 18444 }, { "epoch": 0.5951067832736116, "grad_norm": 0.326171875, "learning_rate": 1.1227779704032482e-05, "loss": 1.9674, "step": 18445 }, { "epoch": 0.595139047127408, "grad_norm": 0.328125, "learning_rate": 1.1226256567268794e-05, "loss": 2.0022, "step": 18446 }, { "epoch": 0.5951713109812042, "grad_norm": 0.328125, "learning_rate": 1.122473347204376e-05, "loss": 1.9635, "step": 18447 }, { "epoch": 0.5952035748350006, "grad_norm": 0.34765625, "learning_rate": 1.1223210418374154e-05, "loss": 1.9892, "step": 18448 }, { "epoch": 0.595235838688797, "grad_norm": 0.330078125, "learning_rate": 1.122168740627674e-05, "loss": 1.9734, "step": 18449 }, { "epoch": 0.5952681025425933, "grad_norm": 0.357421875, "learning_rate": 1.1220164435768278e-05, "loss": 1.9861, "step": 18450 }, { "epoch": 0.5953003663963897, "grad_norm": 0.337890625, "learning_rate": 1.1218641506865534e-05, "loss": 1.9374, "step": 18451 }, { "epoch": 0.595332630250186, "grad_norm": 0.3359375, "learning_rate": 1.1217118619585276e-05, "loss": 1.9762, "step": 18452 }, { "epoch": 0.5953648941039824, "grad_norm": 0.341796875, "learning_rate": 1.1215595773944258e-05, "loss": 1.9545, "step": 18453 }, { "epoch": 0.5953971579577787, "grad_norm": 0.322265625, "learning_rate": 1.1214072969959246e-05, "loss": 1.9419, "step": 18454 }, { "epoch": 0.5954294218115751, "grad_norm": 0.326171875, "learning_rate": 1.1212550207647012e-05, "loss": 1.9683, "step": 18455 }, { "epoch": 0.5954616856653714, "grad_norm": 0.33203125, "learning_rate": 1.1211027487024294e-05, "loss": 1.981, "step": 18456 }, { "epoch": 0.5954939495191678, "grad_norm": 0.328125, "learning_rate": 1.1209504808107874e-05, "loss": 1.9878, "step": 18457 }, { "epoch": 0.5955262133729641, "grad_norm": 0.330078125, "learning_rate": 1.1207982170914507e-05, "loss": 1.9422, "step": 18458 }, { "epoch": 0.5955584772267605, "grad_norm": 0.322265625, "learning_rate": 1.1206459575460952e-05, "loss": 1.9719, "step": 18459 }, { "epoch": 0.5955907410805568, "grad_norm": 0.326171875, "learning_rate": 1.1204937021763966e-05, "loss": 1.9742, "step": 18460 }, { "epoch": 0.5956230049343532, "grad_norm": 0.33203125, "learning_rate": 1.120341450984031e-05, "loss": 1.9727, "step": 18461 }, { "epoch": 0.5956552687881495, "grad_norm": 0.33984375, "learning_rate": 1.1201892039706753e-05, "loss": 1.9799, "step": 18462 }, { "epoch": 0.5956875326419458, "grad_norm": 0.337890625, "learning_rate": 1.1200369611380032e-05, "loss": 1.9747, "step": 18463 }, { "epoch": 0.5957197964957421, "grad_norm": 0.33984375, "learning_rate": 1.1198847224876921e-05, "loss": 2.0003, "step": 18464 }, { "epoch": 0.5957520603495385, "grad_norm": 0.3203125, "learning_rate": 1.1197324880214176e-05, "loss": 1.9565, "step": 18465 }, { "epoch": 0.5957843242033348, "grad_norm": 0.341796875, "learning_rate": 1.1195802577408546e-05, "loss": 1.9546, "step": 18466 }, { "epoch": 0.5958165880571312, "grad_norm": 0.32421875, "learning_rate": 1.119428031647679e-05, "loss": 1.9684, "step": 18467 }, { "epoch": 0.5958488519109275, "grad_norm": 0.32421875, "learning_rate": 1.1192758097435673e-05, "loss": 1.9849, "step": 18468 }, { "epoch": 0.5958811157647239, "grad_norm": 0.341796875, "learning_rate": 1.119123592030194e-05, "loss": 1.9693, "step": 18469 }, { "epoch": 0.5959133796185203, "grad_norm": 0.33203125, "learning_rate": 1.118971378509235e-05, "loss": 1.9642, "step": 18470 }, { "epoch": 0.5959456434723166, "grad_norm": 0.326171875, "learning_rate": 1.1188191691823658e-05, "loss": 1.9583, "step": 18471 }, { "epoch": 0.595977907326113, "grad_norm": 0.35546875, "learning_rate": 1.1186669640512614e-05, "loss": 1.9745, "step": 18472 }, { "epoch": 0.5960101711799093, "grad_norm": 0.328125, "learning_rate": 1.1185147631175977e-05, "loss": 1.9693, "step": 18473 }, { "epoch": 0.5960424350337057, "grad_norm": 0.345703125, "learning_rate": 1.11836256638305e-05, "loss": 1.978, "step": 18474 }, { "epoch": 0.596074698887502, "grad_norm": 0.34375, "learning_rate": 1.118210373849293e-05, "loss": 1.9766, "step": 18475 }, { "epoch": 0.5961069627412984, "grad_norm": 0.341796875, "learning_rate": 1.1180581855180024e-05, "loss": 1.9374, "step": 18476 }, { "epoch": 0.5961392265950947, "grad_norm": 0.359375, "learning_rate": 1.1179060013908535e-05, "loss": 1.9622, "step": 18477 }, { "epoch": 0.5961714904488911, "grad_norm": 0.3359375, "learning_rate": 1.1177538214695212e-05, "loss": 1.9643, "step": 18478 }, { "epoch": 0.5962037543026873, "grad_norm": 0.322265625, "learning_rate": 1.1176016457556803e-05, "loss": 1.9499, "step": 18479 }, { "epoch": 0.5962360181564837, "grad_norm": 0.337890625, "learning_rate": 1.1174494742510066e-05, "loss": 1.9835, "step": 18480 }, { "epoch": 0.59626828201028, "grad_norm": 0.341796875, "learning_rate": 1.1172973069571745e-05, "loss": 1.9996, "step": 18481 }, { "epoch": 0.5963005458640764, "grad_norm": 0.33203125, "learning_rate": 1.1171451438758588e-05, "loss": 1.9766, "step": 18482 }, { "epoch": 0.5963328097178727, "grad_norm": 0.337890625, "learning_rate": 1.116992985008735e-05, "loss": 1.9629, "step": 18483 }, { "epoch": 0.5963650735716691, "grad_norm": 0.326171875, "learning_rate": 1.1168408303574777e-05, "loss": 1.9866, "step": 18484 }, { "epoch": 0.5963973374254654, "grad_norm": 0.33203125, "learning_rate": 1.1166886799237612e-05, "loss": 1.9607, "step": 18485 }, { "epoch": 0.5964296012792618, "grad_norm": 0.349609375, "learning_rate": 1.1165365337092619e-05, "loss": 1.9886, "step": 18486 }, { "epoch": 0.5964618651330581, "grad_norm": 0.33203125, "learning_rate": 1.1163843917156519e-05, "loss": 1.9719, "step": 18487 }, { "epoch": 0.5964941289868545, "grad_norm": 0.3359375, "learning_rate": 1.1162322539446078e-05, "loss": 1.9962, "step": 18488 }, { "epoch": 0.5965263928406508, "grad_norm": 0.333984375, "learning_rate": 1.1160801203978042e-05, "loss": 1.9511, "step": 18489 }, { "epoch": 0.5965586566944472, "grad_norm": 0.333984375, "learning_rate": 1.1159279910769148e-05, "loss": 1.9867, "step": 18490 }, { "epoch": 0.5965909205482436, "grad_norm": 0.341796875, "learning_rate": 1.1157758659836142e-05, "loss": 1.9735, "step": 18491 }, { "epoch": 0.5966231844020399, "grad_norm": 0.35546875, "learning_rate": 1.1156237451195777e-05, "loss": 1.9686, "step": 18492 }, { "epoch": 0.5966554482558363, "grad_norm": 0.3359375, "learning_rate": 1.1154716284864796e-05, "loss": 1.9886, "step": 18493 }, { "epoch": 0.5966877121096326, "grad_norm": 0.326171875, "learning_rate": 1.115319516085993e-05, "loss": 1.9636, "step": 18494 }, { "epoch": 0.596719975963429, "grad_norm": 0.32421875, "learning_rate": 1.1151674079197937e-05, "loss": 1.9725, "step": 18495 }, { "epoch": 0.5967522398172252, "grad_norm": 0.33984375, "learning_rate": 1.115015303989556e-05, "loss": 1.9962, "step": 18496 }, { "epoch": 0.5967845036710216, "grad_norm": 0.31640625, "learning_rate": 1.1148632042969532e-05, "loss": 1.9563, "step": 18497 }, { "epoch": 0.5968167675248179, "grad_norm": 0.328125, "learning_rate": 1.1147111088436595e-05, "loss": 1.9566, "step": 18498 }, { "epoch": 0.5968490313786143, "grad_norm": 0.333984375, "learning_rate": 1.1145590176313504e-05, "loss": 1.9787, "step": 18499 }, { "epoch": 0.5968812952324106, "grad_norm": 0.32421875, "learning_rate": 1.1144069306616986e-05, "loss": 1.9709, "step": 18500 }, { "epoch": 0.596913559086207, "grad_norm": 0.361328125, "learning_rate": 1.1142548479363785e-05, "loss": 1.9556, "step": 18501 }, { "epoch": 0.5969458229400033, "grad_norm": 0.33203125, "learning_rate": 1.1141027694570648e-05, "loss": 1.9688, "step": 18502 }, { "epoch": 0.5969780867937997, "grad_norm": 0.33203125, "learning_rate": 1.1139506952254308e-05, "loss": 1.997, "step": 18503 }, { "epoch": 0.597010350647596, "grad_norm": 0.34375, "learning_rate": 1.1137986252431503e-05, "loss": 1.9468, "step": 18504 }, { "epoch": 0.5970426145013924, "grad_norm": 0.33203125, "learning_rate": 1.113646559511898e-05, "loss": 1.9812, "step": 18505 }, { "epoch": 0.5970748783551887, "grad_norm": 0.33984375, "learning_rate": 1.1134944980333468e-05, "loss": 1.958, "step": 18506 }, { "epoch": 0.5971071422089851, "grad_norm": 0.34765625, "learning_rate": 1.1133424408091707e-05, "loss": 1.9431, "step": 18507 }, { "epoch": 0.5971394060627814, "grad_norm": 0.341796875, "learning_rate": 1.1131903878410442e-05, "loss": 1.9771, "step": 18508 }, { "epoch": 0.5971716699165778, "grad_norm": 0.333984375, "learning_rate": 1.1130383391306399e-05, "loss": 1.9705, "step": 18509 }, { "epoch": 0.5972039337703742, "grad_norm": 0.333984375, "learning_rate": 1.112886294679632e-05, "loss": 1.9497, "step": 18510 }, { "epoch": 0.5972361976241705, "grad_norm": 0.326171875, "learning_rate": 1.1127342544896946e-05, "loss": 1.9747, "step": 18511 }, { "epoch": 0.5972684614779669, "grad_norm": 0.3359375, "learning_rate": 1.1125822185625003e-05, "loss": 1.9598, "step": 18512 }, { "epoch": 0.5973007253317631, "grad_norm": 0.34765625, "learning_rate": 1.112430186899723e-05, "loss": 1.9525, "step": 18513 }, { "epoch": 0.5973329891855595, "grad_norm": 0.322265625, "learning_rate": 1.1122781595030363e-05, "loss": 1.9855, "step": 18514 }, { "epoch": 0.5973652530393558, "grad_norm": 0.322265625, "learning_rate": 1.1121261363741133e-05, "loss": 1.9954, "step": 18515 }, { "epoch": 0.5973975168931522, "grad_norm": 0.328125, "learning_rate": 1.1119741175146275e-05, "loss": 1.9795, "step": 18516 }, { "epoch": 0.5974297807469485, "grad_norm": 0.322265625, "learning_rate": 1.1118221029262527e-05, "loss": 1.9843, "step": 18517 }, { "epoch": 0.5974620446007449, "grad_norm": 0.326171875, "learning_rate": 1.1116700926106612e-05, "loss": 1.9828, "step": 18518 }, { "epoch": 0.5974943084545412, "grad_norm": 0.33203125, "learning_rate": 1.1115180865695265e-05, "loss": 1.956, "step": 18519 }, { "epoch": 0.5975265723083376, "grad_norm": 0.333984375, "learning_rate": 1.1113660848045232e-05, "loss": 1.9579, "step": 18520 }, { "epoch": 0.5975588361621339, "grad_norm": 0.328125, "learning_rate": 1.1112140873173222e-05, "loss": 1.9547, "step": 18521 }, { "epoch": 0.5975911000159303, "grad_norm": 0.322265625, "learning_rate": 1.1110620941095974e-05, "loss": 1.9737, "step": 18522 }, { "epoch": 0.5976233638697266, "grad_norm": 0.333984375, "learning_rate": 1.1109101051830225e-05, "loss": 1.9856, "step": 18523 }, { "epoch": 0.597655627723523, "grad_norm": 0.31640625, "learning_rate": 1.1107581205392706e-05, "loss": 1.9525, "step": 18524 }, { "epoch": 0.5976878915773193, "grad_norm": 0.330078125, "learning_rate": 1.1106061401800133e-05, "loss": 1.9611, "step": 18525 }, { "epoch": 0.5977201554311157, "grad_norm": 0.330078125, "learning_rate": 1.110454164106924e-05, "loss": 1.9325, "step": 18526 }, { "epoch": 0.597752419284912, "grad_norm": 0.337890625, "learning_rate": 1.1103021923216769e-05, "loss": 1.9756, "step": 18527 }, { "epoch": 0.5977846831387084, "grad_norm": 0.322265625, "learning_rate": 1.1101502248259428e-05, "loss": 1.9812, "step": 18528 }, { "epoch": 0.5978169469925046, "grad_norm": 0.353515625, "learning_rate": 1.1099982616213953e-05, "loss": 1.9625, "step": 18529 }, { "epoch": 0.597849210846301, "grad_norm": 0.33203125, "learning_rate": 1.109846302709708e-05, "loss": 1.9903, "step": 18530 }, { "epoch": 0.5978814747000974, "grad_norm": 0.318359375, "learning_rate": 1.109694348092552e-05, "loss": 1.9693, "step": 18531 }, { "epoch": 0.5979137385538937, "grad_norm": 0.33203125, "learning_rate": 1.1095423977716006e-05, "loss": 1.983, "step": 18532 }, { "epoch": 0.5979460024076901, "grad_norm": 0.33984375, "learning_rate": 1.1093904517485269e-05, "loss": 1.9666, "step": 18533 }, { "epoch": 0.5979782662614864, "grad_norm": 0.333984375, "learning_rate": 1.1092385100250023e-05, "loss": 1.9744, "step": 18534 }, { "epoch": 0.5980105301152828, "grad_norm": 0.33203125, "learning_rate": 1.1090865726027001e-05, "loss": 1.9628, "step": 18535 }, { "epoch": 0.5980427939690791, "grad_norm": 0.330078125, "learning_rate": 1.1089346394832926e-05, "loss": 1.9821, "step": 18536 }, { "epoch": 0.5980750578228755, "grad_norm": 0.3359375, "learning_rate": 1.1087827106684518e-05, "loss": 1.9984, "step": 18537 }, { "epoch": 0.5981073216766718, "grad_norm": 0.32421875, "learning_rate": 1.1086307861598502e-05, "loss": 1.9445, "step": 18538 }, { "epoch": 0.5981395855304682, "grad_norm": 0.32421875, "learning_rate": 1.1084788659591605e-05, "loss": 1.9851, "step": 18539 }, { "epoch": 0.5981718493842645, "grad_norm": 0.326171875, "learning_rate": 1.1083269500680543e-05, "loss": 1.976, "step": 18540 }, { "epoch": 0.5982041132380609, "grad_norm": 0.322265625, "learning_rate": 1.108175038488204e-05, "loss": 1.9361, "step": 18541 }, { "epoch": 0.5982363770918572, "grad_norm": 0.33203125, "learning_rate": 1.108023131221282e-05, "loss": 1.9593, "step": 18542 }, { "epoch": 0.5982686409456536, "grad_norm": 0.326171875, "learning_rate": 1.10787122826896e-05, "loss": 1.9906, "step": 18543 }, { "epoch": 0.5983009047994499, "grad_norm": 0.32421875, "learning_rate": 1.10771932963291e-05, "loss": 1.9773, "step": 18544 }, { "epoch": 0.5983331686532463, "grad_norm": 0.3203125, "learning_rate": 1.1075674353148048e-05, "loss": 1.9511, "step": 18545 }, { "epoch": 0.5983654325070425, "grad_norm": 0.333984375, "learning_rate": 1.1074155453163152e-05, "loss": 1.9861, "step": 18546 }, { "epoch": 0.598397696360839, "grad_norm": 0.328125, "learning_rate": 1.1072636596391138e-05, "loss": 1.9907, "step": 18547 }, { "epoch": 0.5984299602146352, "grad_norm": 0.3203125, "learning_rate": 1.1071117782848727e-05, "loss": 1.9875, "step": 18548 }, { "epoch": 0.5984622240684316, "grad_norm": 0.32421875, "learning_rate": 1.1069599012552627e-05, "loss": 1.9749, "step": 18549 }, { "epoch": 0.598494487922228, "grad_norm": 0.328125, "learning_rate": 1.1068080285519564e-05, "loss": 1.9813, "step": 18550 }, { "epoch": 0.5985267517760243, "grad_norm": 0.328125, "learning_rate": 1.1066561601766255e-05, "loss": 1.9723, "step": 18551 }, { "epoch": 0.5985590156298207, "grad_norm": 0.333984375, "learning_rate": 1.1065042961309411e-05, "loss": 1.9746, "step": 18552 }, { "epoch": 0.598591279483617, "grad_norm": 0.328125, "learning_rate": 1.1063524364165746e-05, "loss": 1.9879, "step": 18553 }, { "epoch": 0.5986235433374134, "grad_norm": 0.33203125, "learning_rate": 1.1062005810351991e-05, "loss": 1.9707, "step": 18554 }, { "epoch": 0.5986558071912097, "grad_norm": 0.326171875, "learning_rate": 1.1060487299884845e-05, "loss": 1.9687, "step": 18555 }, { "epoch": 0.5986880710450061, "grad_norm": 0.326171875, "learning_rate": 1.105896883278103e-05, "loss": 1.9608, "step": 18556 }, { "epoch": 0.5987203348988024, "grad_norm": 0.34375, "learning_rate": 1.1057450409057252e-05, "loss": 1.99, "step": 18557 }, { "epoch": 0.5987525987525988, "grad_norm": 0.326171875, "learning_rate": 1.1055932028730244e-05, "loss": 1.9766, "step": 18558 }, { "epoch": 0.5987848626063951, "grad_norm": 0.33203125, "learning_rate": 1.10544136918167e-05, "loss": 1.9811, "step": 18559 }, { "epoch": 0.5988171264601915, "grad_norm": 0.326171875, "learning_rate": 1.1052895398333337e-05, "loss": 1.9824, "step": 18560 }, { "epoch": 0.5988493903139878, "grad_norm": 0.330078125, "learning_rate": 1.1051377148296876e-05, "loss": 1.9737, "step": 18561 }, { "epoch": 0.5988816541677842, "grad_norm": 0.318359375, "learning_rate": 1.104985894172402e-05, "loss": 1.9686, "step": 18562 }, { "epoch": 0.5989139180215804, "grad_norm": 0.33203125, "learning_rate": 1.1048340778631481e-05, "loss": 1.9559, "step": 18563 }, { "epoch": 0.5989461818753768, "grad_norm": 0.3203125, "learning_rate": 1.1046822659035976e-05, "loss": 1.9828, "step": 18564 }, { "epoch": 0.5989784457291731, "grad_norm": 0.330078125, "learning_rate": 1.1045304582954207e-05, "loss": 1.9418, "step": 18565 }, { "epoch": 0.5990107095829695, "grad_norm": 0.326171875, "learning_rate": 1.104378655040289e-05, "loss": 1.973, "step": 18566 }, { "epoch": 0.5990429734367658, "grad_norm": 0.32421875, "learning_rate": 1.1042268561398733e-05, "loss": 1.9948, "step": 18567 }, { "epoch": 0.5990752372905622, "grad_norm": 0.33203125, "learning_rate": 1.1040750615958442e-05, "loss": 1.9779, "step": 18568 }, { "epoch": 0.5991075011443585, "grad_norm": 0.318359375, "learning_rate": 1.1039232714098728e-05, "loss": 1.9558, "step": 18569 }, { "epoch": 0.5991397649981549, "grad_norm": 0.3359375, "learning_rate": 1.1037714855836302e-05, "loss": 1.9695, "step": 18570 }, { "epoch": 0.5991720288519513, "grad_norm": 0.33203125, "learning_rate": 1.1036197041187866e-05, "loss": 1.985, "step": 18571 }, { "epoch": 0.5992042927057476, "grad_norm": 0.3203125, "learning_rate": 1.1034679270170129e-05, "loss": 2.0051, "step": 18572 }, { "epoch": 0.599236556559544, "grad_norm": 0.32421875, "learning_rate": 1.1033161542799802e-05, "loss": 1.9669, "step": 18573 }, { "epoch": 0.5992688204133403, "grad_norm": 0.328125, "learning_rate": 1.1031643859093582e-05, "loss": 1.9777, "step": 18574 }, { "epoch": 0.5993010842671367, "grad_norm": 0.333984375, "learning_rate": 1.103012621906818e-05, "loss": 1.9556, "step": 18575 }, { "epoch": 0.599333348120933, "grad_norm": 0.330078125, "learning_rate": 1.1028608622740307e-05, "loss": 1.9804, "step": 18576 }, { "epoch": 0.5993656119747294, "grad_norm": 0.322265625, "learning_rate": 1.102709107012665e-05, "loss": 1.9687, "step": 18577 }, { "epoch": 0.5993978758285257, "grad_norm": 0.326171875, "learning_rate": 1.1025573561243932e-05, "loss": 1.9726, "step": 18578 }, { "epoch": 0.5994301396823221, "grad_norm": 0.328125, "learning_rate": 1.1024056096108852e-05, "loss": 1.9914, "step": 18579 }, { "epoch": 0.5994624035361183, "grad_norm": 0.33984375, "learning_rate": 1.1022538674738101e-05, "loss": 1.9798, "step": 18580 }, { "epoch": 0.5994946673899147, "grad_norm": 0.330078125, "learning_rate": 1.1021021297148393e-05, "loss": 1.9764, "step": 18581 }, { "epoch": 0.599526931243711, "grad_norm": 0.33203125, "learning_rate": 1.1019503963356438e-05, "loss": 1.9819, "step": 18582 }, { "epoch": 0.5995591950975074, "grad_norm": 0.328125, "learning_rate": 1.101798667337892e-05, "loss": 1.9831, "step": 18583 }, { "epoch": 0.5995914589513037, "grad_norm": 0.330078125, "learning_rate": 1.1016469427232542e-05, "loss": 1.9677, "step": 18584 }, { "epoch": 0.5996237228051001, "grad_norm": 0.33203125, "learning_rate": 1.1014952224934023e-05, "loss": 1.9736, "step": 18585 }, { "epoch": 0.5996559866588964, "grad_norm": 0.328125, "learning_rate": 1.1013435066500043e-05, "loss": 1.9324, "step": 18586 }, { "epoch": 0.5996882505126928, "grad_norm": 0.341796875, "learning_rate": 1.101191795194731e-05, "loss": 1.95, "step": 18587 }, { "epoch": 0.5997205143664891, "grad_norm": 0.33203125, "learning_rate": 1.1010400881292529e-05, "loss": 1.9809, "step": 18588 }, { "epoch": 0.5997527782202855, "grad_norm": 0.33984375, "learning_rate": 1.1008883854552388e-05, "loss": 1.9724, "step": 18589 }, { "epoch": 0.5997850420740818, "grad_norm": 0.32421875, "learning_rate": 1.100736687174359e-05, "loss": 1.967, "step": 18590 }, { "epoch": 0.5998173059278782, "grad_norm": 0.349609375, "learning_rate": 1.1005849932882835e-05, "loss": 1.9868, "step": 18591 }, { "epoch": 0.5998495697816746, "grad_norm": 0.32421875, "learning_rate": 1.100433303798682e-05, "loss": 2.0027, "step": 18592 }, { "epoch": 0.5998818336354709, "grad_norm": 0.328125, "learning_rate": 1.1002816187072235e-05, "loss": 2.0028, "step": 18593 }, { "epoch": 0.5999140974892673, "grad_norm": 0.32421875, "learning_rate": 1.1001299380155786e-05, "loss": 1.9756, "step": 18594 }, { "epoch": 0.5999463613430636, "grad_norm": 0.328125, "learning_rate": 1.0999782617254166e-05, "loss": 1.9489, "step": 18595 }, { "epoch": 0.59997862519686, "grad_norm": 0.33203125, "learning_rate": 1.0998265898384066e-05, "loss": 1.9796, "step": 18596 }, { "epoch": 0.6000108890506562, "grad_norm": 0.322265625, "learning_rate": 1.0996749223562185e-05, "loss": 1.9955, "step": 18597 }, { "epoch": 0.6000431529044526, "grad_norm": 0.330078125, "learning_rate": 1.0995232592805217e-05, "loss": 1.9713, "step": 18598 }, { "epoch": 0.6000754167582489, "grad_norm": 0.33984375, "learning_rate": 1.0993716006129857e-05, "loss": 1.9865, "step": 18599 }, { "epoch": 0.6001076806120453, "grad_norm": 0.3359375, "learning_rate": 1.0992199463552794e-05, "loss": 1.9556, "step": 18600 }, { "epoch": 0.6001399444658416, "grad_norm": 0.32421875, "learning_rate": 1.0990682965090728e-05, "loss": 1.9504, "step": 18601 }, { "epoch": 0.600172208319638, "grad_norm": 0.31640625, "learning_rate": 1.0989166510760346e-05, "loss": 1.9693, "step": 18602 }, { "epoch": 0.6002044721734343, "grad_norm": 0.328125, "learning_rate": 1.0987650100578339e-05, "loss": 1.984, "step": 18603 }, { "epoch": 0.6002367360272307, "grad_norm": 0.333984375, "learning_rate": 1.0986133734561404e-05, "loss": 1.9596, "step": 18604 }, { "epoch": 0.600268999881027, "grad_norm": 0.33984375, "learning_rate": 1.0984617412726229e-05, "loss": 1.9921, "step": 18605 }, { "epoch": 0.6003012637348234, "grad_norm": 0.318359375, "learning_rate": 1.0983101135089503e-05, "loss": 1.936, "step": 18606 }, { "epoch": 0.6003335275886197, "grad_norm": 0.34375, "learning_rate": 1.0981584901667925e-05, "loss": 1.9574, "step": 18607 }, { "epoch": 0.6003657914424161, "grad_norm": 0.361328125, "learning_rate": 1.098006871247817e-05, "loss": 1.9899, "step": 18608 }, { "epoch": 0.6003980552962124, "grad_norm": 0.337890625, "learning_rate": 1.0978552567536936e-05, "loss": 1.9826, "step": 18609 }, { "epoch": 0.6004303191500088, "grad_norm": 0.341796875, "learning_rate": 1.0977036466860915e-05, "loss": 1.9682, "step": 18610 }, { "epoch": 0.6004625830038052, "grad_norm": 0.3359375, "learning_rate": 1.0975520410466784e-05, "loss": 2.0022, "step": 18611 }, { "epoch": 0.6004948468576015, "grad_norm": 0.326171875, "learning_rate": 1.0974004398371238e-05, "loss": 1.9722, "step": 18612 }, { "epoch": 0.6005271107113979, "grad_norm": 0.337890625, "learning_rate": 1.0972488430590972e-05, "loss": 1.9575, "step": 18613 }, { "epoch": 0.6005593745651941, "grad_norm": 0.328125, "learning_rate": 1.0970972507142656e-05, "loss": 1.9856, "step": 18614 }, { "epoch": 0.6005916384189905, "grad_norm": 0.337890625, "learning_rate": 1.0969456628042982e-05, "loss": 1.9903, "step": 18615 }, { "epoch": 0.6006239022727868, "grad_norm": 0.33203125, "learning_rate": 1.0967940793308646e-05, "loss": 1.9702, "step": 18616 }, { "epoch": 0.6006561661265832, "grad_norm": 0.33203125, "learning_rate": 1.096642500295632e-05, "loss": 1.9866, "step": 18617 }, { "epoch": 0.6006884299803795, "grad_norm": 0.341796875, "learning_rate": 1.0964909257002693e-05, "loss": 1.9505, "step": 18618 }, { "epoch": 0.6007206938341759, "grad_norm": 0.32421875, "learning_rate": 1.0963393555464454e-05, "loss": 1.9563, "step": 18619 }, { "epoch": 0.6007529576879722, "grad_norm": 0.326171875, "learning_rate": 1.0961877898358281e-05, "loss": 1.9966, "step": 18620 }, { "epoch": 0.6007852215417686, "grad_norm": 0.345703125, "learning_rate": 1.0960362285700858e-05, "loss": 1.981, "step": 18621 }, { "epoch": 0.6008174853955649, "grad_norm": 0.32421875, "learning_rate": 1.095884671750887e-05, "loss": 1.9683, "step": 18622 }, { "epoch": 0.6008497492493613, "grad_norm": 0.337890625, "learning_rate": 1.0957331193798998e-05, "loss": 1.9742, "step": 18623 }, { "epoch": 0.6008820131031576, "grad_norm": 0.326171875, "learning_rate": 1.0955815714587922e-05, "loss": 1.974, "step": 18624 }, { "epoch": 0.600914276956954, "grad_norm": 0.3203125, "learning_rate": 1.0954300279892328e-05, "loss": 1.9761, "step": 18625 }, { "epoch": 0.6009465408107503, "grad_norm": 0.3359375, "learning_rate": 1.0952784889728894e-05, "loss": 1.9904, "step": 18626 }, { "epoch": 0.6009788046645467, "grad_norm": 0.326171875, "learning_rate": 1.09512695441143e-05, "loss": 1.9748, "step": 18627 }, { "epoch": 0.601011068518343, "grad_norm": 0.322265625, "learning_rate": 1.0949754243065224e-05, "loss": 1.9872, "step": 18628 }, { "epoch": 0.6010433323721394, "grad_norm": 0.3125, "learning_rate": 1.0948238986598354e-05, "loss": 1.9399, "step": 18629 }, { "epoch": 0.6010755962259356, "grad_norm": 0.328125, "learning_rate": 1.0946723774730356e-05, "loss": 1.9587, "step": 18630 }, { "epoch": 0.601107860079732, "grad_norm": 0.326171875, "learning_rate": 1.0945208607477917e-05, "loss": 2.0086, "step": 18631 }, { "epoch": 0.6011401239335284, "grad_norm": 0.314453125, "learning_rate": 1.0943693484857716e-05, "loss": 1.9677, "step": 18632 }, { "epoch": 0.6011723877873247, "grad_norm": 0.333984375, "learning_rate": 1.0942178406886422e-05, "loss": 1.9703, "step": 18633 }, { "epoch": 0.6012046516411211, "grad_norm": 0.322265625, "learning_rate": 1.0940663373580717e-05, "loss": 1.9755, "step": 18634 }, { "epoch": 0.6012369154949174, "grad_norm": 0.330078125, "learning_rate": 1.0939148384957282e-05, "loss": 1.9982, "step": 18635 }, { "epoch": 0.6012691793487138, "grad_norm": 0.33203125, "learning_rate": 1.0937633441032787e-05, "loss": 1.9784, "step": 18636 }, { "epoch": 0.6013014432025101, "grad_norm": 0.43359375, "learning_rate": 1.0936118541823906e-05, "loss": 1.981, "step": 18637 }, { "epoch": 0.6013337070563065, "grad_norm": 0.32421875, "learning_rate": 1.0934603687347323e-05, "loss": 1.9626, "step": 18638 }, { "epoch": 0.6013659709101028, "grad_norm": 0.318359375, "learning_rate": 1.09330888776197e-05, "loss": 1.9508, "step": 18639 }, { "epoch": 0.6013982347638992, "grad_norm": 0.314453125, "learning_rate": 1.0931574112657716e-05, "loss": 1.9804, "step": 18640 }, { "epoch": 0.6014304986176955, "grad_norm": 0.330078125, "learning_rate": 1.0930059392478054e-05, "loss": 1.9566, "step": 18641 }, { "epoch": 0.6014627624714919, "grad_norm": 0.32421875, "learning_rate": 1.092854471709737e-05, "loss": 2.0008, "step": 18642 }, { "epoch": 0.6014950263252882, "grad_norm": 0.33203125, "learning_rate": 1.0927030086532345e-05, "loss": 1.9697, "step": 18643 }, { "epoch": 0.6015272901790846, "grad_norm": 0.33203125, "learning_rate": 1.0925515500799658e-05, "loss": 1.9844, "step": 18644 }, { "epoch": 0.6015595540328809, "grad_norm": 0.32421875, "learning_rate": 1.0924000959915968e-05, "loss": 1.9902, "step": 18645 }, { "epoch": 0.6015918178866773, "grad_norm": 0.31640625, "learning_rate": 1.0922486463897947e-05, "loss": 1.9514, "step": 18646 }, { "epoch": 0.6016240817404735, "grad_norm": 0.3203125, "learning_rate": 1.092097201276228e-05, "loss": 1.9703, "step": 18647 }, { "epoch": 0.60165634559427, "grad_norm": 0.33203125, "learning_rate": 1.0919457606525623e-05, "loss": 1.9225, "step": 18648 }, { "epoch": 0.6016886094480662, "grad_norm": 0.337890625, "learning_rate": 1.0917943245204646e-05, "loss": 2.0032, "step": 18649 }, { "epoch": 0.6017208733018626, "grad_norm": 0.33984375, "learning_rate": 1.0916428928816026e-05, "loss": 1.9848, "step": 18650 }, { "epoch": 0.6017531371556589, "grad_norm": 0.333984375, "learning_rate": 1.0914914657376422e-05, "loss": 1.9661, "step": 18651 }, { "epoch": 0.6017854010094553, "grad_norm": 0.330078125, "learning_rate": 1.0913400430902507e-05, "loss": 1.9788, "step": 18652 }, { "epoch": 0.6018176648632517, "grad_norm": 0.326171875, "learning_rate": 1.0911886249410953e-05, "loss": 1.9943, "step": 18653 }, { "epoch": 0.601849928717048, "grad_norm": 0.337890625, "learning_rate": 1.0910372112918418e-05, "loss": 1.94, "step": 18654 }, { "epoch": 0.6018821925708444, "grad_norm": 0.328125, "learning_rate": 1.0908858021441575e-05, "loss": 1.9908, "step": 18655 }, { "epoch": 0.6019144564246407, "grad_norm": 0.337890625, "learning_rate": 1.0907343974997083e-05, "loss": 1.9592, "step": 18656 }, { "epoch": 0.6019467202784371, "grad_norm": 0.3359375, "learning_rate": 1.090582997360162e-05, "loss": 1.9619, "step": 18657 }, { "epoch": 0.6019789841322334, "grad_norm": 0.328125, "learning_rate": 1.0904316017271839e-05, "loss": 1.9673, "step": 18658 }, { "epoch": 0.6020112479860298, "grad_norm": 0.333984375, "learning_rate": 1.090280210602441e-05, "loss": 1.9867, "step": 18659 }, { "epoch": 0.6020435118398261, "grad_norm": 0.333984375, "learning_rate": 1.0901288239876e-05, "loss": 1.976, "step": 18660 }, { "epoch": 0.6020757756936225, "grad_norm": 0.33203125, "learning_rate": 1.0899774418843263e-05, "loss": 1.9989, "step": 18661 }, { "epoch": 0.6021080395474188, "grad_norm": 0.326171875, "learning_rate": 1.089826064294287e-05, "loss": 1.9919, "step": 18662 }, { "epoch": 0.6021403034012152, "grad_norm": 0.35546875, "learning_rate": 1.0896746912191481e-05, "loss": 1.9867, "step": 18663 }, { "epoch": 0.6021725672550114, "grad_norm": 0.330078125, "learning_rate": 1.089523322660576e-05, "loss": 1.9941, "step": 18664 }, { "epoch": 0.6022048311088078, "grad_norm": 0.34765625, "learning_rate": 1.0893719586202364e-05, "loss": 1.9676, "step": 18665 }, { "epoch": 0.6022370949626041, "grad_norm": 0.328125, "learning_rate": 1.089220599099796e-05, "loss": 1.99, "step": 18666 }, { "epoch": 0.6022693588164005, "grad_norm": 0.32421875, "learning_rate": 1.0890692441009202e-05, "loss": 1.9865, "step": 18667 }, { "epoch": 0.6023016226701968, "grad_norm": 0.35546875, "learning_rate": 1.0889178936252754e-05, "loss": 1.9469, "step": 18668 }, { "epoch": 0.6023338865239932, "grad_norm": 0.333984375, "learning_rate": 1.0887665476745283e-05, "loss": 1.913, "step": 18669 }, { "epoch": 0.6023661503777895, "grad_norm": 0.341796875, "learning_rate": 1.088615206250343e-05, "loss": 1.979, "step": 18670 }, { "epoch": 0.6023984142315859, "grad_norm": 0.365234375, "learning_rate": 1.0884638693543867e-05, "loss": 1.9954, "step": 18671 }, { "epoch": 0.6024306780853823, "grad_norm": 0.32421875, "learning_rate": 1.0883125369883255e-05, "loss": 1.9948, "step": 18672 }, { "epoch": 0.6024629419391786, "grad_norm": 0.345703125, "learning_rate": 1.088161209153824e-05, "loss": 1.9893, "step": 18673 }, { "epoch": 0.602495205792975, "grad_norm": 0.3359375, "learning_rate": 1.0880098858525479e-05, "loss": 1.9643, "step": 18674 }, { "epoch": 0.6025274696467713, "grad_norm": 0.32421875, "learning_rate": 1.0878585670861646e-05, "loss": 1.947, "step": 18675 }, { "epoch": 0.6025597335005677, "grad_norm": 0.341796875, "learning_rate": 1.0877072528563377e-05, "loss": 1.9611, "step": 18676 }, { "epoch": 0.602591997354364, "grad_norm": 0.3359375, "learning_rate": 1.0875559431647337e-05, "loss": 1.9702, "step": 18677 }, { "epoch": 0.6026242612081604, "grad_norm": 0.33203125, "learning_rate": 1.0874046380130183e-05, "loss": 1.9575, "step": 18678 }, { "epoch": 0.6026565250619567, "grad_norm": 0.326171875, "learning_rate": 1.0872533374028563e-05, "loss": 1.9967, "step": 18679 }, { "epoch": 0.6026887889157531, "grad_norm": 0.333984375, "learning_rate": 1.0871020413359135e-05, "loss": 1.9577, "step": 18680 }, { "epoch": 0.6027210527695493, "grad_norm": 0.388671875, "learning_rate": 1.0869507498138556e-05, "loss": 1.9772, "step": 18681 }, { "epoch": 0.6027533166233457, "grad_norm": 0.341796875, "learning_rate": 1.086799462838347e-05, "loss": 1.9735, "step": 18682 }, { "epoch": 0.602785580477142, "grad_norm": 0.376953125, "learning_rate": 1.0866481804110537e-05, "loss": 1.9611, "step": 18683 }, { "epoch": 0.6028178443309384, "grad_norm": 0.37109375, "learning_rate": 1.086496902533641e-05, "loss": 1.9966, "step": 18684 }, { "epoch": 0.6028501081847347, "grad_norm": 0.328125, "learning_rate": 1.0863456292077733e-05, "loss": 1.9335, "step": 18685 }, { "epoch": 0.6028823720385311, "grad_norm": 0.328125, "learning_rate": 1.0861943604351162e-05, "loss": 1.9939, "step": 18686 }, { "epoch": 0.6029146358923274, "grad_norm": 0.345703125, "learning_rate": 1.0860430962173352e-05, "loss": 1.9636, "step": 18687 }, { "epoch": 0.6029468997461238, "grad_norm": 0.33203125, "learning_rate": 1.085891836556094e-05, "loss": 2.0171, "step": 18688 }, { "epoch": 0.6029791635999201, "grad_norm": 0.32421875, "learning_rate": 1.085740581453059e-05, "loss": 1.9687, "step": 18689 }, { "epoch": 0.6030114274537165, "grad_norm": 0.337890625, "learning_rate": 1.0855893309098943e-05, "loss": 1.9768, "step": 18690 }, { "epoch": 0.6030436913075128, "grad_norm": 0.345703125, "learning_rate": 1.085438084928265e-05, "loss": 1.9826, "step": 18691 }, { "epoch": 0.6030759551613092, "grad_norm": 0.333984375, "learning_rate": 1.0852868435098358e-05, "loss": 1.9685, "step": 18692 }, { "epoch": 0.6031082190151056, "grad_norm": 0.330078125, "learning_rate": 1.0851356066562716e-05, "loss": 1.9773, "step": 18693 }, { "epoch": 0.6031404828689019, "grad_norm": 0.330078125, "learning_rate": 1.0849843743692371e-05, "loss": 1.9209, "step": 18694 }, { "epoch": 0.6031727467226983, "grad_norm": 0.33203125, "learning_rate": 1.0848331466503966e-05, "loss": 1.9732, "step": 18695 }, { "epoch": 0.6032050105764946, "grad_norm": 0.326171875, "learning_rate": 1.0846819235014151e-05, "loss": 1.9919, "step": 18696 }, { "epoch": 0.603237274430291, "grad_norm": 0.328125, "learning_rate": 1.0845307049239572e-05, "loss": 1.9741, "step": 18697 }, { "epoch": 0.6032695382840872, "grad_norm": 0.34765625, "learning_rate": 1.0843794909196871e-05, "loss": 1.9604, "step": 18698 }, { "epoch": 0.6033018021378836, "grad_norm": 0.3359375, "learning_rate": 1.0842282814902692e-05, "loss": 1.9808, "step": 18699 }, { "epoch": 0.6033340659916799, "grad_norm": 0.322265625, "learning_rate": 1.0840770766373689e-05, "loss": 1.9713, "step": 18700 }, { "epoch": 0.6033663298454763, "grad_norm": 0.337890625, "learning_rate": 1.0839258763626489e-05, "loss": 1.9792, "step": 18701 }, { "epoch": 0.6033985936992726, "grad_norm": 0.326171875, "learning_rate": 1.0837746806677743e-05, "loss": 1.9728, "step": 18702 }, { "epoch": 0.603430857553069, "grad_norm": 0.3203125, "learning_rate": 1.0836234895544104e-05, "loss": 1.968, "step": 18703 }, { "epoch": 0.6034631214068653, "grad_norm": 0.3203125, "learning_rate": 1.0834723030242198e-05, "loss": 1.9707, "step": 18704 }, { "epoch": 0.6034953852606617, "grad_norm": 0.33203125, "learning_rate": 1.0833211210788669e-05, "loss": 1.9783, "step": 18705 }, { "epoch": 0.603527649114458, "grad_norm": 0.326171875, "learning_rate": 1.083169943720017e-05, "loss": 1.9806, "step": 18706 }, { "epoch": 0.6035599129682544, "grad_norm": 0.333984375, "learning_rate": 1.0830187709493326e-05, "loss": 1.9865, "step": 18707 }, { "epoch": 0.6035921768220507, "grad_norm": 0.31640625, "learning_rate": 1.0828676027684787e-05, "loss": 1.9685, "step": 18708 }, { "epoch": 0.6036244406758471, "grad_norm": 0.333984375, "learning_rate": 1.082716439179119e-05, "loss": 1.9736, "step": 18709 }, { "epoch": 0.6036567045296434, "grad_norm": 0.31640625, "learning_rate": 1.0825652801829173e-05, "loss": 1.96, "step": 18710 }, { "epoch": 0.6036889683834398, "grad_norm": 0.33203125, "learning_rate": 1.0824141257815375e-05, "loss": 1.9949, "step": 18711 }, { "epoch": 0.6037212322372362, "grad_norm": 0.33203125, "learning_rate": 1.0822629759766437e-05, "loss": 1.9808, "step": 18712 }, { "epoch": 0.6037534960910325, "grad_norm": 0.326171875, "learning_rate": 1.0821118307698992e-05, "loss": 1.9762, "step": 18713 }, { "epoch": 0.6037857599448289, "grad_norm": 0.330078125, "learning_rate": 1.0819606901629676e-05, "loss": 1.9901, "step": 18714 }, { "epoch": 0.6038180237986251, "grad_norm": 0.318359375, "learning_rate": 1.0818095541575133e-05, "loss": 1.9857, "step": 18715 }, { "epoch": 0.6038502876524215, "grad_norm": 0.328125, "learning_rate": 1.0816584227551991e-05, "loss": 1.9361, "step": 18716 }, { "epoch": 0.6038825515062178, "grad_norm": 0.33203125, "learning_rate": 1.0815072959576888e-05, "loss": 1.9784, "step": 18717 }, { "epoch": 0.6039148153600142, "grad_norm": 0.345703125, "learning_rate": 1.0813561737666465e-05, "loss": 1.9749, "step": 18718 }, { "epoch": 0.6039470792138105, "grad_norm": 0.33984375, "learning_rate": 1.0812050561837346e-05, "loss": 1.9832, "step": 18719 }, { "epoch": 0.6039793430676069, "grad_norm": 0.333984375, "learning_rate": 1.081053943210617e-05, "loss": 1.955, "step": 18720 }, { "epoch": 0.6040116069214032, "grad_norm": 0.359375, "learning_rate": 1.0809028348489572e-05, "loss": 1.9862, "step": 18721 }, { "epoch": 0.6040438707751996, "grad_norm": 0.35546875, "learning_rate": 1.0807517311004182e-05, "loss": 1.9679, "step": 18722 }, { "epoch": 0.6040761346289959, "grad_norm": 0.341796875, "learning_rate": 1.0806006319666635e-05, "loss": 1.9705, "step": 18723 }, { "epoch": 0.6041083984827923, "grad_norm": 0.349609375, "learning_rate": 1.0804495374493562e-05, "loss": 1.9842, "step": 18724 }, { "epoch": 0.6041406623365886, "grad_norm": 0.357421875, "learning_rate": 1.0802984475501594e-05, "loss": 1.9882, "step": 18725 }, { "epoch": 0.604172926190385, "grad_norm": 0.337890625, "learning_rate": 1.080147362270736e-05, "loss": 1.9443, "step": 18726 }, { "epoch": 0.6042051900441813, "grad_norm": 0.3359375, "learning_rate": 1.0799962816127494e-05, "loss": 1.9937, "step": 18727 }, { "epoch": 0.6042374538979777, "grad_norm": 0.33984375, "learning_rate": 1.079845205577863e-05, "loss": 1.9545, "step": 18728 }, { "epoch": 0.604269717751774, "grad_norm": 0.3359375, "learning_rate": 1.079694134167738e-05, "loss": 1.9482, "step": 18729 }, { "epoch": 0.6043019816055704, "grad_norm": 0.341796875, "learning_rate": 1.0795430673840389e-05, "loss": 1.9765, "step": 18730 }, { "epoch": 0.6043342454593666, "grad_norm": 0.345703125, "learning_rate": 1.0793920052284288e-05, "loss": 1.9814, "step": 18731 }, { "epoch": 0.604366509313163, "grad_norm": 0.328125, "learning_rate": 1.0792409477025688e-05, "loss": 1.9922, "step": 18732 }, { "epoch": 0.6043987731669594, "grad_norm": 0.353515625, "learning_rate": 1.0790898948081228e-05, "loss": 1.9985, "step": 18733 }, { "epoch": 0.6044310370207557, "grad_norm": 0.341796875, "learning_rate": 1.0789388465467538e-05, "loss": 1.9679, "step": 18734 }, { "epoch": 0.6044633008745521, "grad_norm": 0.35546875, "learning_rate": 1.0787878029201236e-05, "loss": 1.9671, "step": 18735 }, { "epoch": 0.6044955647283484, "grad_norm": 0.337890625, "learning_rate": 1.0786367639298946e-05, "loss": 1.9773, "step": 18736 }, { "epoch": 0.6045278285821448, "grad_norm": 0.33984375, "learning_rate": 1.0784857295777307e-05, "loss": 1.9585, "step": 18737 }, { "epoch": 0.6045600924359411, "grad_norm": 0.345703125, "learning_rate": 1.078334699865293e-05, "loss": 1.9443, "step": 18738 }, { "epoch": 0.6045923562897375, "grad_norm": 0.333984375, "learning_rate": 1.0781836747942443e-05, "loss": 1.9499, "step": 18739 }, { "epoch": 0.6046246201435338, "grad_norm": 0.32421875, "learning_rate": 1.0780326543662475e-05, "loss": 1.9664, "step": 18740 }, { "epoch": 0.6046568839973302, "grad_norm": 0.318359375, "learning_rate": 1.0778816385829641e-05, "loss": 1.9851, "step": 18741 }, { "epoch": 0.6046891478511265, "grad_norm": 0.333984375, "learning_rate": 1.0777306274460568e-05, "loss": 1.9936, "step": 18742 }, { "epoch": 0.6047214117049229, "grad_norm": 0.322265625, "learning_rate": 1.0775796209571884e-05, "loss": 1.9763, "step": 18743 }, { "epoch": 0.6047536755587192, "grad_norm": 0.322265625, "learning_rate": 1.0774286191180201e-05, "loss": 1.9704, "step": 18744 }, { "epoch": 0.6047859394125156, "grad_norm": 0.330078125, "learning_rate": 1.077277621930214e-05, "loss": 1.9522, "step": 18745 }, { "epoch": 0.6048182032663119, "grad_norm": 0.326171875, "learning_rate": 1.0771266293954334e-05, "loss": 1.9301, "step": 18746 }, { "epoch": 0.6048504671201083, "grad_norm": 0.326171875, "learning_rate": 1.076975641515339e-05, "loss": 1.9883, "step": 18747 }, { "epoch": 0.6048827309739045, "grad_norm": 0.328125, "learning_rate": 1.0768246582915932e-05, "loss": 1.9754, "step": 18748 }, { "epoch": 0.604914994827701, "grad_norm": 0.322265625, "learning_rate": 1.0766736797258582e-05, "loss": 1.9744, "step": 18749 }, { "epoch": 0.6049472586814972, "grad_norm": 0.330078125, "learning_rate": 1.0765227058197954e-05, "loss": 1.9765, "step": 18750 }, { "epoch": 0.6049795225352936, "grad_norm": 0.322265625, "learning_rate": 1.076371736575067e-05, "loss": 1.984, "step": 18751 }, { "epoch": 0.6050117863890899, "grad_norm": 0.333984375, "learning_rate": 1.0762207719933349e-05, "loss": 1.9756, "step": 18752 }, { "epoch": 0.6050440502428863, "grad_norm": 0.322265625, "learning_rate": 1.07606981207626e-05, "loss": 1.982, "step": 18753 }, { "epoch": 0.6050763140966827, "grad_norm": 0.34375, "learning_rate": 1.0759188568255047e-05, "loss": 1.9721, "step": 18754 }, { "epoch": 0.605108577950479, "grad_norm": 0.357421875, "learning_rate": 1.0757679062427304e-05, "loss": 2.0049, "step": 18755 }, { "epoch": 0.6051408418042754, "grad_norm": 0.328125, "learning_rate": 1.0756169603295988e-05, "loss": 1.9765, "step": 18756 }, { "epoch": 0.6051731056580717, "grad_norm": 0.34375, "learning_rate": 1.075466019087771e-05, "loss": 1.9563, "step": 18757 }, { "epoch": 0.6052053695118681, "grad_norm": 0.33203125, "learning_rate": 1.0753150825189086e-05, "loss": 1.964, "step": 18758 }, { "epoch": 0.6052376333656644, "grad_norm": 0.32421875, "learning_rate": 1.0751641506246738e-05, "loss": 1.9807, "step": 18759 }, { "epoch": 0.6052698972194608, "grad_norm": 0.330078125, "learning_rate": 1.0750132234067265e-05, "loss": 1.9574, "step": 18760 }, { "epoch": 0.6053021610732571, "grad_norm": 0.33203125, "learning_rate": 1.0748623008667289e-05, "loss": 1.9908, "step": 18761 }, { "epoch": 0.6053344249270535, "grad_norm": 0.33984375, "learning_rate": 1.0747113830063428e-05, "loss": 1.9863, "step": 18762 }, { "epoch": 0.6053666887808498, "grad_norm": 0.330078125, "learning_rate": 1.0745604698272276e-05, "loss": 1.9397, "step": 18763 }, { "epoch": 0.6053989526346462, "grad_norm": 0.3359375, "learning_rate": 1.074409561331046e-05, "loss": 2.011, "step": 18764 }, { "epoch": 0.6054312164884424, "grad_norm": 0.33984375, "learning_rate": 1.0742586575194594e-05, "loss": 1.9733, "step": 18765 }, { "epoch": 0.6054634803422388, "grad_norm": 0.3203125, "learning_rate": 1.0741077583941271e-05, "loss": 1.9426, "step": 18766 }, { "epoch": 0.6054957441960351, "grad_norm": 0.330078125, "learning_rate": 1.0739568639567108e-05, "loss": 1.9785, "step": 18767 }, { "epoch": 0.6055280080498315, "grad_norm": 0.34375, "learning_rate": 1.0738059742088729e-05, "loss": 2.0, "step": 18768 }, { "epoch": 0.6055602719036278, "grad_norm": 0.337890625, "learning_rate": 1.0736550891522722e-05, "loss": 1.9569, "step": 18769 }, { "epoch": 0.6055925357574242, "grad_norm": 0.326171875, "learning_rate": 1.0735042087885706e-05, "loss": 1.9545, "step": 18770 }, { "epoch": 0.6056247996112205, "grad_norm": 0.326171875, "learning_rate": 1.0733533331194289e-05, "loss": 1.9957, "step": 18771 }, { "epoch": 0.6056570634650169, "grad_norm": 0.330078125, "learning_rate": 1.0732024621465074e-05, "loss": 1.9729, "step": 18772 }, { "epoch": 0.6056893273188133, "grad_norm": 0.330078125, "learning_rate": 1.073051595871467e-05, "loss": 1.9689, "step": 18773 }, { "epoch": 0.6057215911726096, "grad_norm": 0.3203125, "learning_rate": 1.0729007342959684e-05, "loss": 1.9872, "step": 18774 }, { "epoch": 0.605753855026406, "grad_norm": 0.3359375, "learning_rate": 1.0727498774216723e-05, "loss": 1.9765, "step": 18775 }, { "epoch": 0.6057861188802023, "grad_norm": 0.33984375, "learning_rate": 1.0725990252502386e-05, "loss": 1.9631, "step": 18776 }, { "epoch": 0.6058183827339987, "grad_norm": 0.31640625, "learning_rate": 1.0724481777833288e-05, "loss": 1.9732, "step": 18777 }, { "epoch": 0.605850646587795, "grad_norm": 0.328125, "learning_rate": 1.0722973350226023e-05, "loss": 1.9661, "step": 18778 }, { "epoch": 0.6058829104415914, "grad_norm": 0.32421875, "learning_rate": 1.07214649696972e-05, "loss": 1.9494, "step": 18779 }, { "epoch": 0.6059151742953877, "grad_norm": 0.322265625, "learning_rate": 1.0719956636263424e-05, "loss": 1.9778, "step": 18780 }, { "epoch": 0.6059474381491841, "grad_norm": 0.330078125, "learning_rate": 1.071844834994129e-05, "loss": 1.9832, "step": 18781 }, { "epoch": 0.6059797020029803, "grad_norm": 0.35546875, "learning_rate": 1.0716940110747405e-05, "loss": 1.9426, "step": 18782 }, { "epoch": 0.6060119658567767, "grad_norm": 0.326171875, "learning_rate": 1.0715431918698376e-05, "loss": 1.9702, "step": 18783 }, { "epoch": 0.606044229710573, "grad_norm": 0.328125, "learning_rate": 1.0713923773810795e-05, "loss": 1.9653, "step": 18784 }, { "epoch": 0.6060764935643694, "grad_norm": 0.333984375, "learning_rate": 1.0712415676101265e-05, "loss": 1.9719, "step": 18785 }, { "epoch": 0.6061087574181657, "grad_norm": 0.328125, "learning_rate": 1.0710907625586395e-05, "loss": 1.948, "step": 18786 }, { "epoch": 0.6061410212719621, "grad_norm": 0.318359375, "learning_rate": 1.0709399622282768e-05, "loss": 1.9927, "step": 18787 }, { "epoch": 0.6061732851257584, "grad_norm": 0.3359375, "learning_rate": 1.0707891666206993e-05, "loss": 1.9749, "step": 18788 }, { "epoch": 0.6062055489795548, "grad_norm": 0.333984375, "learning_rate": 1.0706383757375668e-05, "loss": 1.9844, "step": 18789 }, { "epoch": 0.6062378128333511, "grad_norm": 0.330078125, "learning_rate": 1.0704875895805397e-05, "loss": 2.004, "step": 18790 }, { "epoch": 0.6062700766871475, "grad_norm": 0.314453125, "learning_rate": 1.0703368081512763e-05, "loss": 1.9611, "step": 18791 }, { "epoch": 0.6063023405409438, "grad_norm": 0.318359375, "learning_rate": 1.0701860314514372e-05, "loss": 1.9724, "step": 18792 }, { "epoch": 0.6063346043947402, "grad_norm": 0.333984375, "learning_rate": 1.0700352594826826e-05, "loss": 1.9798, "step": 18793 }, { "epoch": 0.6063668682485366, "grad_norm": 0.33984375, "learning_rate": 1.0698844922466707e-05, "loss": 1.9677, "step": 18794 }, { "epoch": 0.6063991321023329, "grad_norm": 0.33203125, "learning_rate": 1.0697337297450615e-05, "loss": 1.991, "step": 18795 }, { "epoch": 0.6064313959561293, "grad_norm": 0.333984375, "learning_rate": 1.0695829719795157e-05, "loss": 1.9723, "step": 18796 }, { "epoch": 0.6064636598099256, "grad_norm": 0.322265625, "learning_rate": 1.069432218951691e-05, "loss": 1.9559, "step": 18797 }, { "epoch": 0.606495923663722, "grad_norm": 0.333984375, "learning_rate": 1.0692814706632475e-05, "loss": 1.9786, "step": 18798 }, { "epoch": 0.6065281875175182, "grad_norm": 0.326171875, "learning_rate": 1.0691307271158453e-05, "loss": 1.9745, "step": 18799 }, { "epoch": 0.6065604513713146, "grad_norm": 0.32421875, "learning_rate": 1.0689799883111424e-05, "loss": 1.9743, "step": 18800 }, { "epoch": 0.6065927152251109, "grad_norm": 0.32421875, "learning_rate": 1.0688292542507986e-05, "loss": 1.9672, "step": 18801 }, { "epoch": 0.6066249790789073, "grad_norm": 0.326171875, "learning_rate": 1.068678524936473e-05, "loss": 1.9796, "step": 18802 }, { "epoch": 0.6066572429327036, "grad_norm": 0.326171875, "learning_rate": 1.0685278003698252e-05, "loss": 1.9785, "step": 18803 }, { "epoch": 0.6066895067865, "grad_norm": 0.322265625, "learning_rate": 1.0683770805525132e-05, "loss": 1.9804, "step": 18804 }, { "epoch": 0.6067217706402963, "grad_norm": 0.328125, "learning_rate": 1.0682263654861973e-05, "loss": 1.9611, "step": 18805 }, { "epoch": 0.6067540344940927, "grad_norm": 0.31640625, "learning_rate": 1.0680756551725353e-05, "loss": 1.9653, "step": 18806 }, { "epoch": 0.606786298347889, "grad_norm": 0.333984375, "learning_rate": 1.0679249496131868e-05, "loss": 1.9893, "step": 18807 }, { "epoch": 0.6068185622016854, "grad_norm": 0.32421875, "learning_rate": 1.0677742488098105e-05, "loss": 1.9653, "step": 18808 }, { "epoch": 0.6068508260554817, "grad_norm": 0.361328125, "learning_rate": 1.067623552764065e-05, "loss": 1.9775, "step": 18809 }, { "epoch": 0.6068830899092781, "grad_norm": 0.318359375, "learning_rate": 1.0674728614776094e-05, "loss": 1.9373, "step": 18810 }, { "epoch": 0.6069153537630744, "grad_norm": 0.32421875, "learning_rate": 1.0673221749521026e-05, "loss": 1.9644, "step": 18811 }, { "epoch": 0.6069476176168708, "grad_norm": 0.337890625, "learning_rate": 1.0671714931892025e-05, "loss": 1.9494, "step": 18812 }, { "epoch": 0.6069798814706672, "grad_norm": 0.32421875, "learning_rate": 1.0670208161905681e-05, "loss": 1.9671, "step": 18813 }, { "epoch": 0.6070121453244635, "grad_norm": 0.328125, "learning_rate": 1.0668701439578583e-05, "loss": 2.0, "step": 18814 }, { "epoch": 0.6070444091782599, "grad_norm": 0.337890625, "learning_rate": 1.0667194764927306e-05, "loss": 1.9431, "step": 18815 }, { "epoch": 0.6070766730320561, "grad_norm": 0.328125, "learning_rate": 1.0665688137968442e-05, "loss": 1.9568, "step": 18816 }, { "epoch": 0.6071089368858525, "grad_norm": 0.318359375, "learning_rate": 1.0664181558718582e-05, "loss": 1.9738, "step": 18817 }, { "epoch": 0.6071412007396488, "grad_norm": 0.32421875, "learning_rate": 1.066267502719429e-05, "loss": 1.9335, "step": 18818 }, { "epoch": 0.6071734645934452, "grad_norm": 0.33203125, "learning_rate": 1.0661168543412164e-05, "loss": 1.9599, "step": 18819 }, { "epoch": 0.6072057284472415, "grad_norm": 0.330078125, "learning_rate": 1.0659662107388789e-05, "loss": 1.9831, "step": 18820 }, { "epoch": 0.6072379923010379, "grad_norm": 0.330078125, "learning_rate": 1.0658155719140731e-05, "loss": 1.9667, "step": 18821 }, { "epoch": 0.6072702561548342, "grad_norm": 0.32421875, "learning_rate": 1.0656649378684578e-05, "loss": 1.9693, "step": 18822 }, { "epoch": 0.6073025200086306, "grad_norm": 0.326171875, "learning_rate": 1.0655143086036914e-05, "loss": 1.9537, "step": 18823 }, { "epoch": 0.6073347838624269, "grad_norm": 0.328125, "learning_rate": 1.0653636841214327e-05, "loss": 1.9875, "step": 18824 }, { "epoch": 0.6073670477162233, "grad_norm": 0.3203125, "learning_rate": 1.065213064423338e-05, "loss": 1.9721, "step": 18825 }, { "epoch": 0.6073993115700196, "grad_norm": 0.33203125, "learning_rate": 1.0650624495110656e-05, "loss": 1.9625, "step": 18826 }, { "epoch": 0.607431575423816, "grad_norm": 0.326171875, "learning_rate": 1.0649118393862747e-05, "loss": 1.9531, "step": 18827 }, { "epoch": 0.6074638392776123, "grad_norm": 0.328125, "learning_rate": 1.0647612340506217e-05, "loss": 1.9619, "step": 18828 }, { "epoch": 0.6074961031314087, "grad_norm": 0.330078125, "learning_rate": 1.0646106335057646e-05, "loss": 1.973, "step": 18829 }, { "epoch": 0.607528366985205, "grad_norm": 0.328125, "learning_rate": 1.0644600377533618e-05, "loss": 1.9566, "step": 18830 }, { "epoch": 0.6075606308390014, "grad_norm": 0.333984375, "learning_rate": 1.0643094467950699e-05, "loss": 1.9544, "step": 18831 }, { "epoch": 0.6075928946927976, "grad_norm": 0.33203125, "learning_rate": 1.0641588606325471e-05, "loss": 2.0, "step": 18832 }, { "epoch": 0.607625158546594, "grad_norm": 0.3359375, "learning_rate": 1.0640082792674514e-05, "loss": 1.9794, "step": 18833 }, { "epoch": 0.6076574224003904, "grad_norm": 0.337890625, "learning_rate": 1.0638577027014394e-05, "loss": 2.0046, "step": 18834 }, { "epoch": 0.6076896862541867, "grad_norm": 0.322265625, "learning_rate": 1.0637071309361687e-05, "loss": 1.9784, "step": 18835 }, { "epoch": 0.6077219501079831, "grad_norm": 0.33203125, "learning_rate": 1.0635565639732973e-05, "loss": 1.9533, "step": 18836 }, { "epoch": 0.6077542139617794, "grad_norm": 0.3359375, "learning_rate": 1.0634060018144819e-05, "loss": 1.9847, "step": 18837 }, { "epoch": 0.6077864778155758, "grad_norm": 0.328125, "learning_rate": 1.06325544446138e-05, "loss": 1.9688, "step": 18838 }, { "epoch": 0.6078187416693721, "grad_norm": 0.3359375, "learning_rate": 1.0631048919156493e-05, "loss": 1.9945, "step": 18839 }, { "epoch": 0.6078510055231685, "grad_norm": 0.337890625, "learning_rate": 1.0629543441789461e-05, "loss": 1.9635, "step": 18840 }, { "epoch": 0.6078832693769648, "grad_norm": 0.322265625, "learning_rate": 1.062803801252928e-05, "loss": 1.9984, "step": 18841 }, { "epoch": 0.6079155332307612, "grad_norm": 0.341796875, "learning_rate": 1.0626532631392523e-05, "loss": 1.9585, "step": 18842 }, { "epoch": 0.6079477970845575, "grad_norm": 0.337890625, "learning_rate": 1.0625027298395753e-05, "loss": 2.0005, "step": 18843 }, { "epoch": 0.6079800609383539, "grad_norm": 0.341796875, "learning_rate": 1.0623522013555544e-05, "loss": 1.9627, "step": 18844 }, { "epoch": 0.6080123247921502, "grad_norm": 0.326171875, "learning_rate": 1.0622016776888471e-05, "loss": 1.9841, "step": 18845 }, { "epoch": 0.6080445886459466, "grad_norm": 0.328125, "learning_rate": 1.062051158841109e-05, "loss": 1.9615, "step": 18846 }, { "epoch": 0.6080768524997429, "grad_norm": 0.330078125, "learning_rate": 1.0619006448139975e-05, "loss": 1.9884, "step": 18847 }, { "epoch": 0.6081091163535393, "grad_norm": 0.341796875, "learning_rate": 1.0617501356091703e-05, "loss": 1.9652, "step": 18848 }, { "epoch": 0.6081413802073355, "grad_norm": 0.322265625, "learning_rate": 1.0615996312282822e-05, "loss": 1.9496, "step": 18849 }, { "epoch": 0.608173644061132, "grad_norm": 0.32421875, "learning_rate": 1.061449131672991e-05, "loss": 1.9623, "step": 18850 }, { "epoch": 0.6082059079149282, "grad_norm": 0.34765625, "learning_rate": 1.0612986369449542e-05, "loss": 1.9486, "step": 18851 }, { "epoch": 0.6082381717687246, "grad_norm": 0.357421875, "learning_rate": 1.0611481470458263e-05, "loss": 1.9954, "step": 18852 }, { "epoch": 0.6082704356225209, "grad_norm": 0.34765625, "learning_rate": 1.0609976619772645e-05, "loss": 1.9581, "step": 18853 }, { "epoch": 0.6083026994763173, "grad_norm": 0.359375, "learning_rate": 1.0608471817409265e-05, "loss": 1.9773, "step": 18854 }, { "epoch": 0.6083349633301137, "grad_norm": 0.341796875, "learning_rate": 1.060696706338467e-05, "loss": 1.9949, "step": 18855 }, { "epoch": 0.60836722718391, "grad_norm": 0.333984375, "learning_rate": 1.060546235771543e-05, "loss": 1.9727, "step": 18856 }, { "epoch": 0.6083994910377064, "grad_norm": 0.36328125, "learning_rate": 1.0603957700418105e-05, "loss": 1.9716, "step": 18857 }, { "epoch": 0.6084317548915027, "grad_norm": 0.3515625, "learning_rate": 1.0602453091509269e-05, "loss": 2.0083, "step": 18858 }, { "epoch": 0.6084640187452991, "grad_norm": 0.34765625, "learning_rate": 1.0600948531005468e-05, "loss": 1.9626, "step": 18859 }, { "epoch": 0.6084962825990954, "grad_norm": 0.32421875, "learning_rate": 1.0599444018923271e-05, "loss": 1.9876, "step": 18860 }, { "epoch": 0.6085285464528918, "grad_norm": 0.34765625, "learning_rate": 1.059793955527924e-05, "loss": 1.9816, "step": 18861 }, { "epoch": 0.6085608103066881, "grad_norm": 0.32421875, "learning_rate": 1.059643514008993e-05, "loss": 1.9687, "step": 18862 }, { "epoch": 0.6085930741604845, "grad_norm": 0.337890625, "learning_rate": 1.0594930773371899e-05, "loss": 1.9469, "step": 18863 }, { "epoch": 0.6086253380142808, "grad_norm": 0.349609375, "learning_rate": 1.0593426455141716e-05, "loss": 1.9628, "step": 18864 }, { "epoch": 0.6086576018680772, "grad_norm": 0.341796875, "learning_rate": 1.059192218541593e-05, "loss": 1.9543, "step": 18865 }, { "epoch": 0.6086898657218734, "grad_norm": 0.3359375, "learning_rate": 1.0590417964211103e-05, "loss": 2.0121, "step": 18866 }, { "epoch": 0.6087221295756698, "grad_norm": 0.33203125, "learning_rate": 1.0588913791543794e-05, "loss": 1.9752, "step": 18867 }, { "epoch": 0.6087543934294661, "grad_norm": 0.33203125, "learning_rate": 1.0587409667430554e-05, "loss": 1.9584, "step": 18868 }, { "epoch": 0.6087866572832625, "grad_norm": 0.3359375, "learning_rate": 1.0585905591887941e-05, "loss": 1.9388, "step": 18869 }, { "epoch": 0.6088189211370588, "grad_norm": 0.326171875, "learning_rate": 1.058440156493252e-05, "loss": 1.9519, "step": 18870 }, { "epoch": 0.6088511849908552, "grad_norm": 0.341796875, "learning_rate": 1.0582897586580833e-05, "loss": 1.9705, "step": 18871 }, { "epoch": 0.6088834488446515, "grad_norm": 0.333984375, "learning_rate": 1.0581393656849442e-05, "loss": 1.946, "step": 18872 }, { "epoch": 0.6089157126984479, "grad_norm": 0.326171875, "learning_rate": 1.0579889775754901e-05, "loss": 1.969, "step": 18873 }, { "epoch": 0.6089479765522443, "grad_norm": 0.337890625, "learning_rate": 1.057838594331376e-05, "loss": 1.9564, "step": 18874 }, { "epoch": 0.6089802404060406, "grad_norm": 0.330078125, "learning_rate": 1.0576882159542575e-05, "loss": 1.9508, "step": 18875 }, { "epoch": 0.609012504259837, "grad_norm": 0.33203125, "learning_rate": 1.0575378424457904e-05, "loss": 1.94, "step": 18876 }, { "epoch": 0.6090447681136333, "grad_norm": 0.326171875, "learning_rate": 1.0573874738076285e-05, "loss": 1.9344, "step": 18877 }, { "epoch": 0.6090770319674297, "grad_norm": 0.345703125, "learning_rate": 1.0572371100414278e-05, "loss": 1.9903, "step": 18878 }, { "epoch": 0.609109295821226, "grad_norm": 0.333984375, "learning_rate": 1.057086751148844e-05, "loss": 1.9749, "step": 18879 }, { "epoch": 0.6091415596750224, "grad_norm": 0.330078125, "learning_rate": 1.056936397131531e-05, "loss": 1.993, "step": 18880 }, { "epoch": 0.6091738235288187, "grad_norm": 0.326171875, "learning_rate": 1.056786047991144e-05, "loss": 1.9704, "step": 18881 }, { "epoch": 0.6092060873826151, "grad_norm": 0.328125, "learning_rate": 1.0566357037293392e-05, "loss": 1.9614, "step": 18882 }, { "epoch": 0.6092383512364113, "grad_norm": 0.318359375, "learning_rate": 1.0564853643477697e-05, "loss": 1.9298, "step": 18883 }, { "epoch": 0.6092706150902077, "grad_norm": 0.326171875, "learning_rate": 1.056335029848091e-05, "loss": 1.9803, "step": 18884 }, { "epoch": 0.609302878944004, "grad_norm": 0.330078125, "learning_rate": 1.0561847002319588e-05, "loss": 1.9828, "step": 18885 }, { "epoch": 0.6093351427978004, "grad_norm": 0.3203125, "learning_rate": 1.0560343755010267e-05, "loss": 1.9721, "step": 18886 }, { "epoch": 0.6093674066515967, "grad_norm": 0.32421875, "learning_rate": 1.0558840556569494e-05, "loss": 1.9627, "step": 18887 }, { "epoch": 0.6093996705053931, "grad_norm": 0.328125, "learning_rate": 1.0557337407013815e-05, "loss": 1.9603, "step": 18888 }, { "epoch": 0.6094319343591894, "grad_norm": 0.3203125, "learning_rate": 1.0555834306359789e-05, "loss": 1.9759, "step": 18889 }, { "epoch": 0.6094641982129858, "grad_norm": 0.328125, "learning_rate": 1.0554331254623945e-05, "loss": 1.9993, "step": 18890 }, { "epoch": 0.6094964620667821, "grad_norm": 0.330078125, "learning_rate": 1.055282825182283e-05, "loss": 1.9609, "step": 18891 }, { "epoch": 0.6095287259205785, "grad_norm": 0.326171875, "learning_rate": 1.0551325297973e-05, "loss": 1.9623, "step": 18892 }, { "epoch": 0.6095609897743748, "grad_norm": 0.318359375, "learning_rate": 1.0549822393090981e-05, "loss": 1.9726, "step": 18893 }, { "epoch": 0.6095932536281712, "grad_norm": 0.322265625, "learning_rate": 1.0548319537193325e-05, "loss": 1.9748, "step": 18894 }, { "epoch": 0.6096255174819676, "grad_norm": 0.333984375, "learning_rate": 1.0546816730296582e-05, "loss": 1.9506, "step": 18895 }, { "epoch": 0.6096577813357639, "grad_norm": 0.3203125, "learning_rate": 1.054531397241728e-05, "loss": 1.9782, "step": 18896 }, { "epoch": 0.6096900451895603, "grad_norm": 0.322265625, "learning_rate": 1.0543811263571965e-05, "loss": 1.9659, "step": 18897 }, { "epoch": 0.6097223090433566, "grad_norm": 0.32421875, "learning_rate": 1.0542308603777181e-05, "loss": 1.9711, "step": 18898 }, { "epoch": 0.609754572897153, "grad_norm": 0.326171875, "learning_rate": 1.0540805993049463e-05, "loss": 1.9764, "step": 18899 }, { "epoch": 0.6097868367509492, "grad_norm": 0.32421875, "learning_rate": 1.0539303431405356e-05, "loss": 1.9722, "step": 18900 }, { "epoch": 0.6098191006047456, "grad_norm": 0.318359375, "learning_rate": 1.05378009188614e-05, "loss": 1.9712, "step": 18901 }, { "epoch": 0.6098513644585419, "grad_norm": 0.341796875, "learning_rate": 1.0536298455434126e-05, "loss": 1.9774, "step": 18902 }, { "epoch": 0.6098836283123383, "grad_norm": 0.318359375, "learning_rate": 1.0534796041140075e-05, "loss": 1.9498, "step": 18903 }, { "epoch": 0.6099158921661346, "grad_norm": 0.322265625, "learning_rate": 1.0533293675995791e-05, "loss": 1.9519, "step": 18904 }, { "epoch": 0.609948156019931, "grad_norm": 0.34765625, "learning_rate": 1.0531791360017802e-05, "loss": 1.9669, "step": 18905 }, { "epoch": 0.6099804198737273, "grad_norm": 0.322265625, "learning_rate": 1.0530289093222647e-05, "loss": 1.98, "step": 18906 }, { "epoch": 0.6100126837275237, "grad_norm": 0.33984375, "learning_rate": 1.052878687562687e-05, "loss": 1.9425, "step": 18907 }, { "epoch": 0.61004494758132, "grad_norm": 0.345703125, "learning_rate": 1.0527284707246991e-05, "loss": 1.9949, "step": 18908 }, { "epoch": 0.6100772114351164, "grad_norm": 0.333984375, "learning_rate": 1.0525782588099557e-05, "loss": 1.9833, "step": 18909 }, { "epoch": 0.6101094752889127, "grad_norm": 0.3515625, "learning_rate": 1.0524280518201103e-05, "loss": 1.9852, "step": 18910 }, { "epoch": 0.6101417391427091, "grad_norm": 0.330078125, "learning_rate": 1.0522778497568155e-05, "loss": 1.9884, "step": 18911 }, { "epoch": 0.6101740029965054, "grad_norm": 0.33203125, "learning_rate": 1.0521276526217241e-05, "loss": 1.9851, "step": 18912 }, { "epoch": 0.6102062668503018, "grad_norm": 0.328125, "learning_rate": 1.0519774604164914e-05, "loss": 1.9524, "step": 18913 }, { "epoch": 0.6102385307040981, "grad_norm": 0.31640625, "learning_rate": 1.0518272731427689e-05, "loss": 1.987, "step": 18914 }, { "epoch": 0.6102707945578945, "grad_norm": 0.328125, "learning_rate": 1.0516770908022098e-05, "loss": 1.9712, "step": 18915 }, { "epoch": 0.6103030584116909, "grad_norm": 0.3359375, "learning_rate": 1.0515269133964685e-05, "loss": 1.9676, "step": 18916 }, { "epoch": 0.6103353222654871, "grad_norm": 0.322265625, "learning_rate": 1.0513767409271969e-05, "loss": 2.0107, "step": 18917 }, { "epoch": 0.6103675861192835, "grad_norm": 0.330078125, "learning_rate": 1.0512265733960481e-05, "loss": 1.999, "step": 18918 }, { "epoch": 0.6103998499730798, "grad_norm": 0.318359375, "learning_rate": 1.0510764108046756e-05, "loss": 1.9678, "step": 18919 }, { "epoch": 0.6104321138268762, "grad_norm": 0.333984375, "learning_rate": 1.0509262531547315e-05, "loss": 1.9293, "step": 18920 }, { "epoch": 0.6104643776806725, "grad_norm": 0.33203125, "learning_rate": 1.0507761004478691e-05, "loss": 2.011, "step": 18921 }, { "epoch": 0.6104966415344689, "grad_norm": 0.326171875, "learning_rate": 1.050625952685741e-05, "loss": 1.9785, "step": 18922 }, { "epoch": 0.6105289053882652, "grad_norm": 0.326171875, "learning_rate": 1.0504758098700004e-05, "loss": 1.9653, "step": 18923 }, { "epoch": 0.6105611692420616, "grad_norm": 0.333984375, "learning_rate": 1.0503256720022996e-05, "loss": 1.963, "step": 18924 }, { "epoch": 0.6105934330958579, "grad_norm": 0.326171875, "learning_rate": 1.050175539084291e-05, "loss": 1.9369, "step": 18925 }, { "epoch": 0.6106256969496543, "grad_norm": 0.3203125, "learning_rate": 1.0500254111176274e-05, "loss": 1.944, "step": 18926 }, { "epoch": 0.6106579608034506, "grad_norm": 0.330078125, "learning_rate": 1.0498752881039613e-05, "loss": 1.9653, "step": 18927 }, { "epoch": 0.610690224657247, "grad_norm": 0.33984375, "learning_rate": 1.049725170044945e-05, "loss": 1.9408, "step": 18928 }, { "epoch": 0.6107224885110433, "grad_norm": 0.337890625, "learning_rate": 1.0495750569422312e-05, "loss": 1.9443, "step": 18929 }, { "epoch": 0.6107547523648397, "grad_norm": 0.31640625, "learning_rate": 1.049424948797472e-05, "loss": 1.9997, "step": 18930 }, { "epoch": 0.610787016218636, "grad_norm": 0.32421875, "learning_rate": 1.0492748456123195e-05, "loss": 1.9751, "step": 18931 }, { "epoch": 0.6108192800724324, "grad_norm": 0.328125, "learning_rate": 1.0491247473884265e-05, "loss": 1.9898, "step": 18932 }, { "epoch": 0.6108515439262286, "grad_norm": 0.3203125, "learning_rate": 1.0489746541274446e-05, "loss": 1.9959, "step": 18933 }, { "epoch": 0.610883807780025, "grad_norm": 0.337890625, "learning_rate": 1.048824565831026e-05, "loss": 1.9984, "step": 18934 }, { "epoch": 0.6109160716338214, "grad_norm": 0.318359375, "learning_rate": 1.0486744825008233e-05, "loss": 1.9936, "step": 18935 }, { "epoch": 0.6109483354876177, "grad_norm": 0.33203125, "learning_rate": 1.0485244041384879e-05, "loss": 1.9658, "step": 18936 }, { "epoch": 0.6109805993414141, "grad_norm": 0.328125, "learning_rate": 1.0483743307456717e-05, "loss": 1.9779, "step": 18937 }, { "epoch": 0.6110128631952104, "grad_norm": 0.345703125, "learning_rate": 1.0482242623240277e-05, "loss": 1.9821, "step": 18938 }, { "epoch": 0.6110451270490068, "grad_norm": 0.337890625, "learning_rate": 1.048074198875206e-05, "loss": 1.9876, "step": 18939 }, { "epoch": 0.6110773909028031, "grad_norm": 0.31640625, "learning_rate": 1.0479241404008595e-05, "loss": 1.9724, "step": 18940 }, { "epoch": 0.6111096547565995, "grad_norm": 0.33203125, "learning_rate": 1.0477740869026404e-05, "loss": 1.9548, "step": 18941 }, { "epoch": 0.6111419186103958, "grad_norm": 0.333984375, "learning_rate": 1.0476240383821993e-05, "loss": 1.9774, "step": 18942 }, { "epoch": 0.6111741824641922, "grad_norm": 0.330078125, "learning_rate": 1.0474739948411878e-05, "loss": 1.9393, "step": 18943 }, { "epoch": 0.6112064463179885, "grad_norm": 0.326171875, "learning_rate": 1.0473239562812587e-05, "loss": 1.9682, "step": 18944 }, { "epoch": 0.6112387101717849, "grad_norm": 0.337890625, "learning_rate": 1.0471739227040624e-05, "loss": 1.9799, "step": 18945 }, { "epoch": 0.6112709740255812, "grad_norm": 0.33203125, "learning_rate": 1.0470238941112506e-05, "loss": 1.9571, "step": 18946 }, { "epoch": 0.6113032378793776, "grad_norm": 0.330078125, "learning_rate": 1.046873870504475e-05, "loss": 1.9678, "step": 18947 }, { "epoch": 0.6113355017331739, "grad_norm": 0.330078125, "learning_rate": 1.0467238518853865e-05, "loss": 1.9773, "step": 18948 }, { "epoch": 0.6113677655869703, "grad_norm": 0.34765625, "learning_rate": 1.0465738382556367e-05, "loss": 1.9881, "step": 18949 }, { "epoch": 0.6114000294407665, "grad_norm": 0.318359375, "learning_rate": 1.0464238296168771e-05, "loss": 1.9651, "step": 18950 }, { "epoch": 0.611432293294563, "grad_norm": 0.3203125, "learning_rate": 1.0462738259707582e-05, "loss": 1.9415, "step": 18951 }, { "epoch": 0.6114645571483592, "grad_norm": 0.34765625, "learning_rate": 1.0461238273189316e-05, "loss": 1.9658, "step": 18952 }, { "epoch": 0.6114968210021556, "grad_norm": 0.328125, "learning_rate": 1.0459738336630485e-05, "loss": 1.9572, "step": 18953 }, { "epoch": 0.6115290848559519, "grad_norm": 0.333984375, "learning_rate": 1.0458238450047599e-05, "loss": 1.9682, "step": 18954 }, { "epoch": 0.6115613487097483, "grad_norm": 0.318359375, "learning_rate": 1.0456738613457161e-05, "loss": 1.9742, "step": 18955 }, { "epoch": 0.6115936125635447, "grad_norm": 0.326171875, "learning_rate": 1.0455238826875686e-05, "loss": 1.9693, "step": 18956 }, { "epoch": 0.611625876417341, "grad_norm": 0.3984375, "learning_rate": 1.0453739090319683e-05, "loss": 1.9471, "step": 18957 }, { "epoch": 0.6116581402711374, "grad_norm": 0.328125, "learning_rate": 1.0452239403805657e-05, "loss": 1.9711, "step": 18958 }, { "epoch": 0.6116904041249337, "grad_norm": 0.32421875, "learning_rate": 1.045073976735012e-05, "loss": 1.9773, "step": 18959 }, { "epoch": 0.6117226679787301, "grad_norm": 0.32421875, "learning_rate": 1.0449240180969574e-05, "loss": 1.9821, "step": 18960 }, { "epoch": 0.6117549318325264, "grad_norm": 0.3359375, "learning_rate": 1.0447740644680528e-05, "loss": 1.9773, "step": 18961 }, { "epoch": 0.6117871956863228, "grad_norm": 0.3359375, "learning_rate": 1.0446241158499486e-05, "loss": 1.9891, "step": 18962 }, { "epoch": 0.6118194595401191, "grad_norm": 0.333984375, "learning_rate": 1.0444741722442958e-05, "loss": 1.9981, "step": 18963 }, { "epoch": 0.6118517233939155, "grad_norm": 0.55859375, "learning_rate": 1.044324233652744e-05, "loss": 2.001, "step": 18964 }, { "epoch": 0.6118839872477118, "grad_norm": 0.32421875, "learning_rate": 1.0441743000769446e-05, "loss": 1.9915, "step": 18965 }, { "epoch": 0.6119162511015082, "grad_norm": 0.361328125, "learning_rate": 1.0440243715185474e-05, "loss": 2.009, "step": 18966 }, { "epoch": 0.6119485149553044, "grad_norm": 0.5078125, "learning_rate": 1.0438744479792027e-05, "loss": 2.1146, "step": 18967 }, { "epoch": 0.6119807788091008, "grad_norm": 0.45703125, "learning_rate": 1.0437245294605608e-05, "loss": 2.1344, "step": 18968 }, { "epoch": 0.6120130426628971, "grad_norm": 0.419921875, "learning_rate": 1.0435746159642724e-05, "loss": 2.1377, "step": 18969 }, { "epoch": 0.6120453065166935, "grad_norm": 0.478515625, "learning_rate": 1.0434247074919864e-05, "loss": 2.1131, "step": 18970 }, { "epoch": 0.6120775703704898, "grad_norm": 0.57421875, "learning_rate": 1.043274804045354e-05, "loss": 2.144, "step": 18971 }, { "epoch": 0.6121098342242862, "grad_norm": 0.4921875, "learning_rate": 1.0431249056260255e-05, "loss": 2.1229, "step": 18972 }, { "epoch": 0.6121420980780825, "grad_norm": 0.466796875, "learning_rate": 1.0429750122356497e-05, "loss": 2.123, "step": 18973 }, { "epoch": 0.6121743619318789, "grad_norm": 0.4921875, "learning_rate": 1.0428251238758768e-05, "loss": 2.1197, "step": 18974 }, { "epoch": 0.6122066257856753, "grad_norm": 0.46875, "learning_rate": 1.0426752405483578e-05, "loss": 2.1087, "step": 18975 }, { "epoch": 0.6122388896394716, "grad_norm": 0.5234375, "learning_rate": 1.0425253622547412e-05, "loss": 2.0826, "step": 18976 }, { "epoch": 0.612271153493268, "grad_norm": 0.85546875, "learning_rate": 1.042375488996677e-05, "loss": 2.0777, "step": 18977 }, { "epoch": 0.6123034173470643, "grad_norm": 0.6484375, "learning_rate": 1.0422256207758154e-05, "loss": 2.0469, "step": 18978 }, { "epoch": 0.6123356812008607, "grad_norm": 0.5703125, "learning_rate": 1.0420757575938056e-05, "loss": 2.0419, "step": 18979 }, { "epoch": 0.612367945054657, "grad_norm": 0.5546875, "learning_rate": 1.0419258994522973e-05, "loss": 2.0688, "step": 18980 }, { "epoch": 0.6124002089084534, "grad_norm": 0.58203125, "learning_rate": 1.0417760463529406e-05, "loss": 2.0736, "step": 18981 }, { "epoch": 0.6124324727622497, "grad_norm": 0.5859375, "learning_rate": 1.0416261982973838e-05, "loss": 2.0592, "step": 18982 }, { "epoch": 0.6124647366160461, "grad_norm": 0.54296875, "learning_rate": 1.0414763552872771e-05, "loss": 2.0723, "step": 18983 }, { "epoch": 0.6124970004698423, "grad_norm": 0.486328125, "learning_rate": 1.0413265173242703e-05, "loss": 2.0341, "step": 18984 }, { "epoch": 0.6125292643236387, "grad_norm": 0.55078125, "learning_rate": 1.0411766844100115e-05, "loss": 2.0003, "step": 18985 }, { "epoch": 0.612561528177435, "grad_norm": 0.53515625, "learning_rate": 1.0410268565461508e-05, "loss": 1.9876, "step": 18986 }, { "epoch": 0.6125937920312314, "grad_norm": 0.486328125, "learning_rate": 1.040877033734337e-05, "loss": 1.9533, "step": 18987 }, { "epoch": 0.6126260558850277, "grad_norm": 0.451171875, "learning_rate": 1.04072721597622e-05, "loss": 1.9795, "step": 18988 }, { "epoch": 0.6126583197388241, "grad_norm": 0.44140625, "learning_rate": 1.0405774032734478e-05, "loss": 1.9805, "step": 18989 }, { "epoch": 0.6126905835926204, "grad_norm": 0.423828125, "learning_rate": 1.04042759562767e-05, "loss": 1.9837, "step": 18990 }, { "epoch": 0.6127228474464168, "grad_norm": 0.40625, "learning_rate": 1.040277793040536e-05, "loss": 1.9838, "step": 18991 }, { "epoch": 0.6127551113002131, "grad_norm": 0.40625, "learning_rate": 1.0401279955136937e-05, "loss": 1.9704, "step": 18992 }, { "epoch": 0.6127873751540095, "grad_norm": 0.37890625, "learning_rate": 1.0399782030487928e-05, "loss": 1.9428, "step": 18993 }, { "epoch": 0.6128196390078058, "grad_norm": 0.3828125, "learning_rate": 1.0398284156474818e-05, "loss": 1.9552, "step": 18994 }, { "epoch": 0.6128519028616022, "grad_norm": 0.396484375, "learning_rate": 1.0396786333114094e-05, "loss": 1.9858, "step": 18995 }, { "epoch": 0.6128841667153986, "grad_norm": 0.365234375, "learning_rate": 1.0395288560422244e-05, "loss": 1.963, "step": 18996 }, { "epoch": 0.6129164305691949, "grad_norm": 0.353515625, "learning_rate": 1.0393790838415759e-05, "loss": 1.9883, "step": 18997 }, { "epoch": 0.6129486944229913, "grad_norm": 0.35546875, "learning_rate": 1.0392293167111111e-05, "loss": 1.9852, "step": 18998 }, { "epoch": 0.6129809582767876, "grad_norm": 0.369140625, "learning_rate": 1.0390795546524797e-05, "loss": 1.9678, "step": 18999 }, { "epoch": 0.613013222130584, "grad_norm": 0.349609375, "learning_rate": 1.0389297976673306e-05, "loss": 1.976, "step": 19000 }, { "epoch": 0.6130454859843802, "grad_norm": 0.34765625, "learning_rate": 1.0387800457573104e-05, "loss": 1.9702, "step": 19001 }, { "epoch": 0.6130777498381766, "grad_norm": 0.345703125, "learning_rate": 1.0386302989240692e-05, "loss": 1.9681, "step": 19002 }, { "epoch": 0.6131100136919729, "grad_norm": 0.341796875, "learning_rate": 1.0384805571692551e-05, "loss": 1.9749, "step": 19003 }, { "epoch": 0.6131422775457693, "grad_norm": 0.34375, "learning_rate": 1.0383308204945154e-05, "loss": 2.0145, "step": 19004 }, { "epoch": 0.6131745413995656, "grad_norm": 0.33203125, "learning_rate": 1.0381810889014985e-05, "loss": 1.9694, "step": 19005 }, { "epoch": 0.613206805253362, "grad_norm": 0.330078125, "learning_rate": 1.0380313623918538e-05, "loss": 1.9676, "step": 19006 }, { "epoch": 0.6132390691071583, "grad_norm": 0.33984375, "learning_rate": 1.0378816409672276e-05, "loss": 1.973, "step": 19007 }, { "epoch": 0.6132713329609547, "grad_norm": 0.61328125, "learning_rate": 1.0377319246292692e-05, "loss": 1.9672, "step": 19008 }, { "epoch": 0.613303596814751, "grad_norm": 0.734375, "learning_rate": 1.037582213379626e-05, "loss": 2.0121, "step": 19009 }, { "epoch": 0.6133358606685474, "grad_norm": 0.89453125, "learning_rate": 1.037432507219946e-05, "loss": 1.979, "step": 19010 }, { "epoch": 0.6133681245223437, "grad_norm": 0.82421875, "learning_rate": 1.0372828061518771e-05, "loss": 2.0346, "step": 19011 }, { "epoch": 0.6134003883761401, "grad_norm": 0.59765625, "learning_rate": 1.0371331101770675e-05, "loss": 2.1591, "step": 19012 }, { "epoch": 0.6134326522299364, "grad_norm": 0.51953125, "learning_rate": 1.036983419297164e-05, "loss": 2.1346, "step": 19013 }, { "epoch": 0.6134649160837328, "grad_norm": 0.46484375, "learning_rate": 1.036833733513815e-05, "loss": 2.1373, "step": 19014 }, { "epoch": 0.6134971799375291, "grad_norm": 0.51953125, "learning_rate": 1.0366840528286685e-05, "loss": 2.1618, "step": 19015 }, { "epoch": 0.6135294437913255, "grad_norm": 0.69921875, "learning_rate": 1.036534377243371e-05, "loss": 2.0314, "step": 19016 }, { "epoch": 0.6135617076451219, "grad_norm": 0.71875, "learning_rate": 1.0363847067595707e-05, "loss": 2.0248, "step": 19017 }, { "epoch": 0.6135939714989181, "grad_norm": 0.59765625, "learning_rate": 1.0362350413789153e-05, "loss": 1.9461, "step": 19018 }, { "epoch": 0.6136262353527145, "grad_norm": 0.58984375, "learning_rate": 1.0360853811030516e-05, "loss": 1.9632, "step": 19019 }, { "epoch": 0.6136584992065108, "grad_norm": 0.56640625, "learning_rate": 1.0359357259336269e-05, "loss": 1.9721, "step": 19020 }, { "epoch": 0.6136907630603072, "grad_norm": 0.5546875, "learning_rate": 1.035786075872289e-05, "loss": 1.954, "step": 19021 }, { "epoch": 0.6137230269141035, "grad_norm": 0.5234375, "learning_rate": 1.0356364309206853e-05, "loss": 1.9478, "step": 19022 }, { "epoch": 0.6137552907678999, "grad_norm": 0.498046875, "learning_rate": 1.0354867910804622e-05, "loss": 1.9864, "step": 19023 }, { "epoch": 0.6137875546216962, "grad_norm": 0.48828125, "learning_rate": 1.0353371563532674e-05, "loss": 2.121, "step": 19024 }, { "epoch": 0.6138198184754926, "grad_norm": 0.71484375, "learning_rate": 1.0351875267407481e-05, "loss": 2.2082, "step": 19025 }, { "epoch": 0.6138520823292889, "grad_norm": 0.8046875, "learning_rate": 1.0350379022445507e-05, "loss": 2.1159, "step": 19026 }, { "epoch": 0.6138843461830853, "grad_norm": 0.80078125, "learning_rate": 1.0348882828663226e-05, "loss": 2.0496, "step": 19027 }, { "epoch": 0.6139166100368816, "grad_norm": 0.57421875, "learning_rate": 1.0347386686077111e-05, "loss": 2.0872, "step": 19028 }, { "epoch": 0.613948873890678, "grad_norm": 0.578125, "learning_rate": 1.0345890594703617e-05, "loss": 2.0759, "step": 19029 }, { "epoch": 0.6139811377444743, "grad_norm": 0.546875, "learning_rate": 1.0344394554559224e-05, "loss": 2.0651, "step": 19030 }, { "epoch": 0.6140134015982707, "grad_norm": 0.52734375, "learning_rate": 1.0342898565660403e-05, "loss": 2.0297, "step": 19031 }, { "epoch": 0.614045665452067, "grad_norm": 0.5234375, "learning_rate": 1.0341402628023601e-05, "loss": 2.06, "step": 19032 }, { "epoch": 0.6140779293058634, "grad_norm": 0.5078125, "learning_rate": 1.0339906741665303e-05, "loss": 2.0808, "step": 19033 }, { "epoch": 0.6141101931596596, "grad_norm": 0.45703125, "learning_rate": 1.0338410906601974e-05, "loss": 2.0473, "step": 19034 }, { "epoch": 0.614142457013456, "grad_norm": 0.43359375, "learning_rate": 1.0336915122850066e-05, "loss": 2.0363, "step": 19035 }, { "epoch": 0.6141747208672524, "grad_norm": 0.435546875, "learning_rate": 1.0335419390426049e-05, "loss": 2.0544, "step": 19036 }, { "epoch": 0.6142069847210487, "grad_norm": 0.4375, "learning_rate": 1.0333923709346397e-05, "loss": 2.0562, "step": 19037 }, { "epoch": 0.6142392485748451, "grad_norm": 0.42578125, "learning_rate": 1.0332428079627561e-05, "loss": 2.0451, "step": 19038 }, { "epoch": 0.6142715124286414, "grad_norm": 0.447265625, "learning_rate": 1.0330932501286006e-05, "loss": 2.1109, "step": 19039 }, { "epoch": 0.6143037762824378, "grad_norm": 0.41015625, "learning_rate": 1.0329436974338203e-05, "loss": 2.085, "step": 19040 }, { "epoch": 0.6143360401362341, "grad_norm": 0.41015625, "learning_rate": 1.0327941498800601e-05, "loss": 2.054, "step": 19041 }, { "epoch": 0.6143683039900305, "grad_norm": 0.421875, "learning_rate": 1.0326446074689669e-05, "loss": 2.0466, "step": 19042 }, { "epoch": 0.6144005678438268, "grad_norm": 0.375, "learning_rate": 1.0324950702021869e-05, "loss": 2.0465, "step": 19043 }, { "epoch": 0.6144328316976232, "grad_norm": 0.384765625, "learning_rate": 1.0323455380813654e-05, "loss": 2.0812, "step": 19044 }, { "epoch": 0.6144650955514195, "grad_norm": 0.380859375, "learning_rate": 1.0321960111081487e-05, "loss": 2.0824, "step": 19045 }, { "epoch": 0.6144973594052159, "grad_norm": 0.365234375, "learning_rate": 1.032046489284183e-05, "loss": 2.0632, "step": 19046 }, { "epoch": 0.6145296232590122, "grad_norm": 0.37109375, "learning_rate": 1.0318969726111136e-05, "loss": 2.0547, "step": 19047 }, { "epoch": 0.6145618871128086, "grad_norm": 0.376953125, "learning_rate": 1.0317474610905864e-05, "loss": 2.1215, "step": 19048 }, { "epoch": 0.6145941509666049, "grad_norm": 0.57421875, "learning_rate": 1.0315979547242476e-05, "loss": 2.1358, "step": 19049 }, { "epoch": 0.6146264148204013, "grad_norm": 1.359375, "learning_rate": 1.0314484535137422e-05, "loss": 2.2418, "step": 19050 }, { "epoch": 0.6146586786741975, "grad_norm": 0.59765625, "learning_rate": 1.031298957460716e-05, "loss": 2.1724, "step": 19051 }, { "epoch": 0.6146909425279939, "grad_norm": 0.625, "learning_rate": 1.0311494665668151e-05, "loss": 2.1852, "step": 19052 }, { "epoch": 0.6147232063817902, "grad_norm": 0.546875, "learning_rate": 1.0309999808336842e-05, "loss": 2.2028, "step": 19053 }, { "epoch": 0.6147554702355866, "grad_norm": 0.49609375, "learning_rate": 1.0308505002629688e-05, "loss": 2.1679, "step": 19054 }, { "epoch": 0.6147877340893829, "grad_norm": 0.5234375, "learning_rate": 1.0307010248563149e-05, "loss": 2.1317, "step": 19055 }, { "epoch": 0.6148199979431793, "grad_norm": 0.5078125, "learning_rate": 1.0305515546153676e-05, "loss": 2.2114, "step": 19056 }, { "epoch": 0.6148522617969757, "grad_norm": 0.5078125, "learning_rate": 1.0304020895417713e-05, "loss": 2.1685, "step": 19057 }, { "epoch": 0.614884525650772, "grad_norm": 0.482421875, "learning_rate": 1.0302526296371725e-05, "loss": 2.1748, "step": 19058 }, { "epoch": 0.6149167895045684, "grad_norm": 0.48046875, "learning_rate": 1.0301031749032162e-05, "loss": 2.2159, "step": 19059 }, { "epoch": 0.6149490533583647, "grad_norm": 0.498046875, "learning_rate": 1.029953725341546e-05, "loss": 2.1452, "step": 19060 }, { "epoch": 0.6149813172121611, "grad_norm": 0.765625, "learning_rate": 1.0298042809538084e-05, "loss": 2.1979, "step": 19061 }, { "epoch": 0.6150135810659574, "grad_norm": 0.52734375, "learning_rate": 1.0296548417416485e-05, "loss": 2.1318, "step": 19062 }, { "epoch": 0.6150458449197538, "grad_norm": 0.47265625, "learning_rate": 1.02950540770671e-05, "loss": 2.1821, "step": 19063 }, { "epoch": 0.6150781087735501, "grad_norm": 0.48828125, "learning_rate": 1.0293559788506381e-05, "loss": 2.1483, "step": 19064 }, { "epoch": 0.6151103726273465, "grad_norm": 0.50390625, "learning_rate": 1.029206555175079e-05, "loss": 2.1905, "step": 19065 }, { "epoch": 0.6151426364811428, "grad_norm": 0.470703125, "learning_rate": 1.0290571366816759e-05, "loss": 2.183, "step": 19066 }, { "epoch": 0.6151749003349392, "grad_norm": 0.44921875, "learning_rate": 1.0289077233720735e-05, "loss": 2.1251, "step": 19067 }, { "epoch": 0.6152071641887354, "grad_norm": 0.494140625, "learning_rate": 1.0287583152479178e-05, "loss": 2.1684, "step": 19068 }, { "epoch": 0.6152394280425318, "grad_norm": 0.71484375, "learning_rate": 1.0286089123108522e-05, "loss": 2.164, "step": 19069 }, { "epoch": 0.6152716918963281, "grad_norm": 0.86328125, "learning_rate": 1.0284595145625212e-05, "loss": 2.1424, "step": 19070 }, { "epoch": 0.6153039557501245, "grad_norm": 1.03125, "learning_rate": 1.0283101220045698e-05, "loss": 2.1815, "step": 19071 }, { "epoch": 0.6153362196039208, "grad_norm": 0.66796875, "learning_rate": 1.0281607346386423e-05, "loss": 2.2772, "step": 19072 }, { "epoch": 0.6153684834577172, "grad_norm": 0.6171875, "learning_rate": 1.0280113524663826e-05, "loss": 2.1538, "step": 19073 }, { "epoch": 0.6154007473115135, "grad_norm": 0.61328125, "learning_rate": 1.0278619754894359e-05, "loss": 2.1538, "step": 19074 }, { "epoch": 0.6154330111653099, "grad_norm": 0.58984375, "learning_rate": 1.0277126037094452e-05, "loss": 2.1706, "step": 19075 }, { "epoch": 0.6154652750191063, "grad_norm": 0.57421875, "learning_rate": 1.0275632371280556e-05, "loss": 2.1872, "step": 19076 }, { "epoch": 0.6154975388729026, "grad_norm": 0.5, "learning_rate": 1.0274138757469113e-05, "loss": 2.1043, "step": 19077 }, { "epoch": 0.615529802726699, "grad_norm": 0.51171875, "learning_rate": 1.0272645195676554e-05, "loss": 2.1465, "step": 19078 }, { "epoch": 0.6155620665804953, "grad_norm": 0.53125, "learning_rate": 1.0271151685919329e-05, "loss": 2.1649, "step": 19079 }, { "epoch": 0.6155943304342917, "grad_norm": 0.515625, "learning_rate": 1.0269658228213874e-05, "loss": 2.1593, "step": 19080 }, { "epoch": 0.615626594288088, "grad_norm": 0.515625, "learning_rate": 1.0268164822576625e-05, "loss": 2.1759, "step": 19081 }, { "epoch": 0.6156588581418844, "grad_norm": 0.54296875, "learning_rate": 1.0266671469024025e-05, "loss": 2.1606, "step": 19082 }, { "epoch": 0.6156911219956807, "grad_norm": 0.6015625, "learning_rate": 1.0265178167572512e-05, "loss": 2.1836, "step": 19083 }, { "epoch": 0.6157233858494771, "grad_norm": 0.60546875, "learning_rate": 1.0263684918238517e-05, "loss": 2.1511, "step": 19084 }, { "epoch": 0.6157556497032733, "grad_norm": 0.478515625, "learning_rate": 1.0262191721038482e-05, "loss": 2.1636, "step": 19085 }, { "epoch": 0.6157879135570697, "grad_norm": 0.462890625, "learning_rate": 1.0260698575988841e-05, "loss": 2.1432, "step": 19086 }, { "epoch": 0.615820177410866, "grad_norm": 0.466796875, "learning_rate": 1.0259205483106033e-05, "loss": 2.1319, "step": 19087 }, { "epoch": 0.6158524412646624, "grad_norm": 0.50390625, "learning_rate": 1.0257712442406487e-05, "loss": 2.188, "step": 19088 }, { "epoch": 0.6158847051184587, "grad_norm": 0.486328125, "learning_rate": 1.025621945390664e-05, "loss": 2.1275, "step": 19089 }, { "epoch": 0.6159169689722551, "grad_norm": 0.462890625, "learning_rate": 1.0254726517622933e-05, "loss": 2.1316, "step": 19090 }, { "epoch": 0.6159492328260514, "grad_norm": 0.462890625, "learning_rate": 1.0253233633571782e-05, "loss": 2.1331, "step": 19091 }, { "epoch": 0.6159814966798478, "grad_norm": 0.5, "learning_rate": 1.0251740801769634e-05, "loss": 2.1397, "step": 19092 }, { "epoch": 0.6160137605336441, "grad_norm": 0.439453125, "learning_rate": 1.0250248022232924e-05, "loss": 2.0284, "step": 19093 }, { "epoch": 0.6160460243874405, "grad_norm": 0.427734375, "learning_rate": 1.0248755294978068e-05, "loss": 2.0201, "step": 19094 }, { "epoch": 0.6160782882412368, "grad_norm": 0.435546875, "learning_rate": 1.0247262620021504e-05, "loss": 2.003, "step": 19095 }, { "epoch": 0.6161105520950332, "grad_norm": 0.419921875, "learning_rate": 1.0245769997379672e-05, "loss": 2.0151, "step": 19096 }, { "epoch": 0.6161428159488296, "grad_norm": 0.38671875, "learning_rate": 1.0244277427068986e-05, "loss": 2.0378, "step": 19097 }, { "epoch": 0.6161750798026259, "grad_norm": 0.404296875, "learning_rate": 1.0242784909105882e-05, "loss": 2.0305, "step": 19098 }, { "epoch": 0.6162073436564223, "grad_norm": 0.43359375, "learning_rate": 1.0241292443506795e-05, "loss": 2.0614, "step": 19099 }, { "epoch": 0.6162396075102186, "grad_norm": 0.400390625, "learning_rate": 1.0239800030288141e-05, "loss": 2.0566, "step": 19100 }, { "epoch": 0.616271871364015, "grad_norm": 0.45703125, "learning_rate": 1.0238307669466355e-05, "loss": 2.0293, "step": 19101 }, { "epoch": 0.6163041352178112, "grad_norm": 0.490234375, "learning_rate": 1.0236815361057863e-05, "loss": 2.0038, "step": 19102 }, { "epoch": 0.6163363990716076, "grad_norm": 0.419921875, "learning_rate": 1.0235323105079089e-05, "loss": 2.0325, "step": 19103 }, { "epoch": 0.6163686629254039, "grad_norm": 0.3984375, "learning_rate": 1.0233830901546459e-05, "loss": 2.0673, "step": 19104 }, { "epoch": 0.6164009267792003, "grad_norm": 0.44140625, "learning_rate": 1.0232338750476403e-05, "loss": 2.0397, "step": 19105 }, { "epoch": 0.6164331906329966, "grad_norm": 0.431640625, "learning_rate": 1.023084665188534e-05, "loss": 2.0402, "step": 19106 }, { "epoch": 0.616465454486793, "grad_norm": 0.404296875, "learning_rate": 1.0229354605789695e-05, "loss": 2.0501, "step": 19107 }, { "epoch": 0.6164977183405893, "grad_norm": 0.3984375, "learning_rate": 1.0227862612205894e-05, "loss": 2.0425, "step": 19108 }, { "epoch": 0.6165299821943857, "grad_norm": 0.40625, "learning_rate": 1.0226370671150354e-05, "loss": 2.06, "step": 19109 }, { "epoch": 0.616562246048182, "grad_norm": 0.4921875, "learning_rate": 1.0224878782639504e-05, "loss": 1.9755, "step": 19110 }, { "epoch": 0.6165945099019784, "grad_norm": 0.48046875, "learning_rate": 1.0223386946689763e-05, "loss": 1.949, "step": 19111 }, { "epoch": 0.6166267737557747, "grad_norm": 0.474609375, "learning_rate": 1.022189516331755e-05, "loss": 1.9795, "step": 19112 }, { "epoch": 0.6166590376095711, "grad_norm": 0.490234375, "learning_rate": 1.0220403432539287e-05, "loss": 1.9586, "step": 19113 }, { "epoch": 0.6166913014633674, "grad_norm": 0.431640625, "learning_rate": 1.02189117543714e-05, "loss": 1.9489, "step": 19114 }, { "epoch": 0.6167235653171638, "grad_norm": 0.466796875, "learning_rate": 1.0217420128830294e-05, "loss": 1.9886, "step": 19115 }, { "epoch": 0.6167558291709601, "grad_norm": 0.53125, "learning_rate": 1.0215928555932399e-05, "loss": 2.0618, "step": 19116 }, { "epoch": 0.6167880930247565, "grad_norm": 0.50390625, "learning_rate": 1.0214437035694137e-05, "loss": 1.9922, "step": 19117 }, { "epoch": 0.6168203568785529, "grad_norm": 0.46875, "learning_rate": 1.0212945568131908e-05, "loss": 1.9479, "step": 19118 }, { "epoch": 0.6168526207323491, "grad_norm": 0.46484375, "learning_rate": 1.0211454153262144e-05, "loss": 1.9443, "step": 19119 }, { "epoch": 0.6168848845861455, "grad_norm": 0.49609375, "learning_rate": 1.020996279110126e-05, "loss": 1.9758, "step": 19120 }, { "epoch": 0.6169171484399418, "grad_norm": 0.466796875, "learning_rate": 1.0208471481665671e-05, "loss": 1.9373, "step": 19121 }, { "epoch": 0.6169494122937382, "grad_norm": 0.44921875, "learning_rate": 1.0206980224971784e-05, "loss": 1.9497, "step": 19122 }, { "epoch": 0.6169816761475345, "grad_norm": 0.48046875, "learning_rate": 1.0205489021036022e-05, "loss": 1.9275, "step": 19123 }, { "epoch": 0.6170139400013309, "grad_norm": 0.4375, "learning_rate": 1.0203997869874804e-05, "loss": 1.9427, "step": 19124 }, { "epoch": 0.6170462038551272, "grad_norm": 0.43359375, "learning_rate": 1.0202506771504531e-05, "loss": 1.9299, "step": 19125 }, { "epoch": 0.6170784677089236, "grad_norm": 0.44921875, "learning_rate": 1.0201015725941617e-05, "loss": 1.9794, "step": 19126 }, { "epoch": 0.6171107315627199, "grad_norm": 0.41796875, "learning_rate": 1.019952473320249e-05, "loss": 1.9545, "step": 19127 }, { "epoch": 0.6171429954165163, "grad_norm": 0.49609375, "learning_rate": 1.0198033793303545e-05, "loss": 1.9381, "step": 19128 }, { "epoch": 0.6171752592703126, "grad_norm": 0.44921875, "learning_rate": 1.0196542906261199e-05, "loss": 1.9698, "step": 19129 }, { "epoch": 0.617207523124109, "grad_norm": 0.435546875, "learning_rate": 1.0195052072091866e-05, "loss": 1.9789, "step": 19130 }, { "epoch": 0.6172397869779053, "grad_norm": 0.48828125, "learning_rate": 1.0193561290811947e-05, "loss": 2.0413, "step": 19131 }, { "epoch": 0.6172720508317017, "grad_norm": 0.419921875, "learning_rate": 1.019207056243786e-05, "loss": 2.0461, "step": 19132 }, { "epoch": 0.617304314685498, "grad_norm": 0.89453125, "learning_rate": 1.0190579886986014e-05, "loss": 1.9946, "step": 19133 }, { "epoch": 0.6173365785392944, "grad_norm": 0.640625, "learning_rate": 1.018908926447281e-05, "loss": 2.0184, "step": 19134 }, { "epoch": 0.6173688423930906, "grad_norm": 0.474609375, "learning_rate": 1.018759869491466e-05, "loss": 2.0217, "step": 19135 }, { "epoch": 0.617401106246887, "grad_norm": 0.5, "learning_rate": 1.0186108178327975e-05, "loss": 2.0402, "step": 19136 }, { "epoch": 0.6174333701006834, "grad_norm": 0.54296875, "learning_rate": 1.0184617714729156e-05, "loss": 2.06, "step": 19137 }, { "epoch": 0.6174656339544797, "grad_norm": 0.52734375, "learning_rate": 1.0183127304134608e-05, "loss": 2.055, "step": 19138 }, { "epoch": 0.6174978978082761, "grad_norm": 0.51953125, "learning_rate": 1.0181636946560743e-05, "loss": 2.079, "step": 19139 }, { "epoch": 0.6175301616620724, "grad_norm": 0.52734375, "learning_rate": 1.0180146642023959e-05, "loss": 2.0583, "step": 19140 }, { "epoch": 0.6175624255158688, "grad_norm": 0.4921875, "learning_rate": 1.0178656390540662e-05, "loss": 2.0889, "step": 19141 }, { "epoch": 0.6175946893696651, "grad_norm": 0.45703125, "learning_rate": 1.0177166192127262e-05, "loss": 2.0512, "step": 19142 }, { "epoch": 0.6176269532234615, "grad_norm": 0.4765625, "learning_rate": 1.0175676046800152e-05, "loss": 2.0833, "step": 19143 }, { "epoch": 0.6176592170772578, "grad_norm": 0.51953125, "learning_rate": 1.0174185954575739e-05, "loss": 2.2298, "step": 19144 }, { "epoch": 0.6176914809310542, "grad_norm": 0.53125, "learning_rate": 1.017269591547043e-05, "loss": 2.2244, "step": 19145 }, { "epoch": 0.6177237447848505, "grad_norm": 0.5234375, "learning_rate": 1.0171205929500614e-05, "loss": 2.229, "step": 19146 }, { "epoch": 0.6177560086386469, "grad_norm": 0.546875, "learning_rate": 1.0169715996682703e-05, "loss": 2.2284, "step": 19147 }, { "epoch": 0.6177882724924432, "grad_norm": 0.54296875, "learning_rate": 1.0168226117033097e-05, "loss": 2.2166, "step": 19148 }, { "epoch": 0.6178205363462396, "grad_norm": 0.474609375, "learning_rate": 1.0166736290568185e-05, "loss": 2.2084, "step": 19149 }, { "epoch": 0.6178528002000359, "grad_norm": 0.47265625, "learning_rate": 1.0165246517304373e-05, "loss": 2.2479, "step": 19150 }, { "epoch": 0.6178850640538323, "grad_norm": 0.52734375, "learning_rate": 1.0163756797258065e-05, "loss": 2.2274, "step": 19151 }, { "epoch": 0.6179173279076285, "grad_norm": 0.50390625, "learning_rate": 1.0162267130445648e-05, "loss": 2.2367, "step": 19152 }, { "epoch": 0.6179495917614249, "grad_norm": 0.47265625, "learning_rate": 1.016077751688352e-05, "loss": 2.2789, "step": 19153 }, { "epoch": 0.6179818556152212, "grad_norm": 0.4921875, "learning_rate": 1.0159287956588084e-05, "loss": 2.2666, "step": 19154 }, { "epoch": 0.6180141194690176, "grad_norm": 0.52734375, "learning_rate": 1.0157798449575741e-05, "loss": 2.2257, "step": 19155 }, { "epoch": 0.6180463833228139, "grad_norm": 0.6953125, "learning_rate": 1.015630899586287e-05, "loss": 2.2182, "step": 19156 }, { "epoch": 0.6180786471766103, "grad_norm": 0.65625, "learning_rate": 1.0154819595465875e-05, "loss": 2.2096, "step": 19157 }, { "epoch": 0.6181109110304067, "grad_norm": 0.71484375, "learning_rate": 1.0153330248401157e-05, "loss": 2.2329, "step": 19158 }, { "epoch": 0.618143174884203, "grad_norm": 1.0625, "learning_rate": 1.0151840954685098e-05, "loss": 2.2274, "step": 19159 }, { "epoch": 0.6181754387379994, "grad_norm": 1.078125, "learning_rate": 1.0150351714334095e-05, "loss": 2.2086, "step": 19160 }, { "epoch": 0.6182077025917957, "grad_norm": 0.546875, "learning_rate": 1.0148862527364544e-05, "loss": 2.1855, "step": 19161 }, { "epoch": 0.6182399664455921, "grad_norm": 0.8046875, "learning_rate": 1.0147373393792832e-05, "loss": 2.211, "step": 19162 }, { "epoch": 0.6182722302993884, "grad_norm": 0.609375, "learning_rate": 1.014588431363535e-05, "loss": 2.2117, "step": 19163 }, { "epoch": 0.6183044941531848, "grad_norm": 0.71484375, "learning_rate": 1.0144395286908498e-05, "loss": 2.2079, "step": 19164 }, { "epoch": 0.6183367580069811, "grad_norm": 0.55078125, "learning_rate": 1.0142906313628655e-05, "loss": 2.2256, "step": 19165 }, { "epoch": 0.6183690218607775, "grad_norm": 0.703125, "learning_rate": 1.0141417393812213e-05, "loss": 2.2085, "step": 19166 }, { "epoch": 0.6184012857145738, "grad_norm": 0.5625, "learning_rate": 1.0139928527475567e-05, "loss": 2.2133, "step": 19167 }, { "epoch": 0.6184335495683702, "grad_norm": 0.65234375, "learning_rate": 1.0138439714635097e-05, "loss": 2.2071, "step": 19168 }, { "epoch": 0.6184658134221664, "grad_norm": 0.6328125, "learning_rate": 1.0136950955307197e-05, "loss": 2.2016, "step": 19169 }, { "epoch": 0.6184980772759628, "grad_norm": 0.66015625, "learning_rate": 1.0135462249508253e-05, "loss": 2.209, "step": 19170 }, { "epoch": 0.6185303411297591, "grad_norm": 0.6953125, "learning_rate": 1.013397359725465e-05, "loss": 2.2366, "step": 19171 }, { "epoch": 0.6185626049835555, "grad_norm": 0.62890625, "learning_rate": 1.0132484998562774e-05, "loss": 2.1906, "step": 19172 }, { "epoch": 0.6185948688373518, "grad_norm": 0.60546875, "learning_rate": 1.0130996453449015e-05, "loss": 2.1698, "step": 19173 }, { "epoch": 0.6186271326911482, "grad_norm": 0.66015625, "learning_rate": 1.0129507961929749e-05, "loss": 2.1788, "step": 19174 }, { "epoch": 0.6186593965449445, "grad_norm": 0.5703125, "learning_rate": 1.0128019524021365e-05, "loss": 2.1802, "step": 19175 }, { "epoch": 0.6186916603987409, "grad_norm": 0.6328125, "learning_rate": 1.0126531139740253e-05, "loss": 2.2201, "step": 19176 }, { "epoch": 0.6187239242525373, "grad_norm": 0.56640625, "learning_rate": 1.0125042809102783e-05, "loss": 2.2313, "step": 19177 }, { "epoch": 0.6187561881063336, "grad_norm": 0.60546875, "learning_rate": 1.0123554532125347e-05, "loss": 2.2187, "step": 19178 }, { "epoch": 0.61878845196013, "grad_norm": 0.56640625, "learning_rate": 1.0122066308824331e-05, "loss": 2.1902, "step": 19179 }, { "epoch": 0.6188207158139263, "grad_norm": 0.59765625, "learning_rate": 1.0120578139216102e-05, "loss": 2.2057, "step": 19180 }, { "epoch": 0.6188529796677227, "grad_norm": 0.52734375, "learning_rate": 1.0119090023317045e-05, "loss": 2.1943, "step": 19181 }, { "epoch": 0.618885243521519, "grad_norm": 0.6328125, "learning_rate": 1.0117601961143554e-05, "loss": 2.2145, "step": 19182 }, { "epoch": 0.6189175073753154, "grad_norm": 0.5234375, "learning_rate": 1.0116113952711992e-05, "loss": 2.1895, "step": 19183 }, { "epoch": 0.6189497712291117, "grad_norm": 0.65234375, "learning_rate": 1.0114625998038742e-05, "loss": 2.2054, "step": 19184 }, { "epoch": 0.618982035082908, "grad_norm": 0.6953125, "learning_rate": 1.011313809714019e-05, "loss": 2.2906, "step": 19185 }, { "epoch": 0.6190142989367043, "grad_norm": 0.62890625, "learning_rate": 1.0111650250032705e-05, "loss": 2.3354, "step": 19186 }, { "epoch": 0.6190465627905007, "grad_norm": 0.6875, "learning_rate": 1.0110162456732665e-05, "loss": 2.4146, "step": 19187 }, { "epoch": 0.619078826644297, "grad_norm": 0.96484375, "learning_rate": 1.0108674717256445e-05, "loss": 2.4046, "step": 19188 }, { "epoch": 0.6191110904980934, "grad_norm": 0.88671875, "learning_rate": 1.0107187031620438e-05, "loss": 2.4082, "step": 19189 }, { "epoch": 0.6191433543518897, "grad_norm": 0.59375, "learning_rate": 1.0105699399840996e-05, "loss": 2.403, "step": 19190 }, { "epoch": 0.6191756182056861, "grad_norm": 0.85546875, "learning_rate": 1.0104211821934505e-05, "loss": 2.3988, "step": 19191 }, { "epoch": 0.6192078820594824, "grad_norm": 0.78515625, "learning_rate": 1.0102724297917342e-05, "loss": 2.4039, "step": 19192 }, { "epoch": 0.6192401459132788, "grad_norm": 0.59765625, "learning_rate": 1.0101236827805871e-05, "loss": 2.3853, "step": 19193 }, { "epoch": 0.6192724097670751, "grad_norm": 0.828125, "learning_rate": 1.0099749411616474e-05, "loss": 2.3981, "step": 19194 }, { "epoch": 0.6193046736208715, "grad_norm": 0.6171875, "learning_rate": 1.0098262049365521e-05, "loss": 2.3892, "step": 19195 }, { "epoch": 0.6193369374746678, "grad_norm": 0.74609375, "learning_rate": 1.0096774741069378e-05, "loss": 2.3975, "step": 19196 }, { "epoch": 0.6193692013284642, "grad_norm": 0.79296875, "learning_rate": 1.0095287486744424e-05, "loss": 2.4178, "step": 19197 }, { "epoch": 0.6194014651822606, "grad_norm": 0.60546875, "learning_rate": 1.0093800286407032e-05, "loss": 2.4095, "step": 19198 }, { "epoch": 0.6194337290360569, "grad_norm": 0.8828125, "learning_rate": 1.009231314007356e-05, "loss": 2.4059, "step": 19199 }, { "epoch": 0.6194659928898533, "grad_norm": 1.21875, "learning_rate": 1.0090826047760386e-05, "loss": 2.4345, "step": 19200 }, { "epoch": 0.6194982567436496, "grad_norm": 0.9140625, "learning_rate": 1.0089339009483877e-05, "loss": 2.3953, "step": 19201 }, { "epoch": 0.619530520597446, "grad_norm": 1.34375, "learning_rate": 1.00878520252604e-05, "loss": 2.3995, "step": 19202 }, { "epoch": 0.6195627844512422, "grad_norm": 0.80859375, "learning_rate": 1.0086365095106326e-05, "loss": 2.392, "step": 19203 }, { "epoch": 0.6195950483050386, "grad_norm": 1.078125, "learning_rate": 1.0084878219038021e-05, "loss": 2.3798, "step": 19204 }, { "epoch": 0.6196273121588349, "grad_norm": 1.046875, "learning_rate": 1.0083391397071849e-05, "loss": 2.3704, "step": 19205 }, { "epoch": 0.6196595760126313, "grad_norm": 0.6796875, "learning_rate": 1.0081904629224176e-05, "loss": 2.38, "step": 19206 }, { "epoch": 0.6196918398664276, "grad_norm": 0.91796875, "learning_rate": 1.0080417915511374e-05, "loss": 2.366, "step": 19207 }, { "epoch": 0.619724103720224, "grad_norm": 0.6953125, "learning_rate": 1.0078931255949793e-05, "loss": 2.3936, "step": 19208 }, { "epoch": 0.6197563675740203, "grad_norm": 0.828125, "learning_rate": 1.0077444650555811e-05, "loss": 2.3867, "step": 19209 }, { "epoch": 0.6197886314278167, "grad_norm": 0.77734375, "learning_rate": 1.0075958099345794e-05, "loss": 2.3789, "step": 19210 }, { "epoch": 0.619820895281613, "grad_norm": 0.63671875, "learning_rate": 1.007447160233609e-05, "loss": 2.3898, "step": 19211 }, { "epoch": 0.6198531591354094, "grad_norm": 0.7734375, "learning_rate": 1.0072985159543064e-05, "loss": 2.3743, "step": 19212 }, { "epoch": 0.6198854229892057, "grad_norm": 0.65625, "learning_rate": 1.0071498770983095e-05, "loss": 2.3879, "step": 19213 }, { "epoch": 0.6199176868430021, "grad_norm": 0.63671875, "learning_rate": 1.0070012436672526e-05, "loss": 2.3698, "step": 19214 }, { "epoch": 0.6199499506967984, "grad_norm": 0.734375, "learning_rate": 1.0068526156627722e-05, "loss": 2.3953, "step": 19215 }, { "epoch": 0.6199822145505948, "grad_norm": 0.62109375, "learning_rate": 1.0067039930865047e-05, "loss": 2.3672, "step": 19216 }, { "epoch": 0.6200144784043911, "grad_norm": 0.6796875, "learning_rate": 1.0065553759400855e-05, "loss": 2.3554, "step": 19217 }, { "epoch": 0.6200467422581875, "grad_norm": 0.83203125, "learning_rate": 1.0064067642251508e-05, "loss": 2.3593, "step": 19218 }, { "epoch": 0.6200790061119839, "grad_norm": 0.69921875, "learning_rate": 1.0062581579433358e-05, "loss": 2.3446, "step": 19219 }, { "epoch": 0.6201112699657801, "grad_norm": 0.578125, "learning_rate": 1.0061095570962779e-05, "loss": 2.3618, "step": 19220 }, { "epoch": 0.6201435338195765, "grad_norm": 0.6875, "learning_rate": 1.005960961685611e-05, "loss": 2.374, "step": 19221 }, { "epoch": 0.6201757976733728, "grad_norm": 0.75, "learning_rate": 1.0058123717129714e-05, "loss": 2.359, "step": 19222 }, { "epoch": 0.6202080615271692, "grad_norm": 0.58203125, "learning_rate": 1.0056637871799951e-05, "loss": 2.3432, "step": 19223 }, { "epoch": 0.6202403253809655, "grad_norm": 0.69140625, "learning_rate": 1.0055152080883166e-05, "loss": 2.3574, "step": 19224 }, { "epoch": 0.6202725892347619, "grad_norm": 0.74609375, "learning_rate": 1.0053666344395723e-05, "loss": 2.3694, "step": 19225 }, { "epoch": 0.6203048530885582, "grad_norm": 0.578125, "learning_rate": 1.0052180662353975e-05, "loss": 2.3262, "step": 19226 }, { "epoch": 0.6203371169423546, "grad_norm": 0.66796875, "learning_rate": 1.0050695034774267e-05, "loss": 2.3542, "step": 19227 }, { "epoch": 0.6203693807961509, "grad_norm": 0.8515625, "learning_rate": 1.004920946167296e-05, "loss": 2.3512, "step": 19228 }, { "epoch": 0.6204016446499473, "grad_norm": 0.74609375, "learning_rate": 1.0047723943066405e-05, "loss": 2.3694, "step": 19229 }, { "epoch": 0.6204339085037436, "grad_norm": 0.5234375, "learning_rate": 1.0046238478970951e-05, "loss": 2.3545, "step": 19230 }, { "epoch": 0.62046617235754, "grad_norm": 0.6796875, "learning_rate": 1.0044753069402946e-05, "loss": 2.3555, "step": 19231 }, { "epoch": 0.6204984362113363, "grad_norm": 0.79296875, "learning_rate": 1.0043267714378752e-05, "loss": 2.3594, "step": 19232 }, { "epoch": 0.6205307000651327, "grad_norm": 0.79296875, "learning_rate": 1.0041782413914706e-05, "loss": 2.3696, "step": 19233 }, { "epoch": 0.620562963918929, "grad_norm": 0.67578125, "learning_rate": 1.0040297168027162e-05, "loss": 2.3853, "step": 19234 }, { "epoch": 0.6205952277727254, "grad_norm": 1.09375, "learning_rate": 1.0038811976732471e-05, "loss": 2.3861, "step": 19235 }, { "epoch": 0.6206274916265216, "grad_norm": 1.109375, "learning_rate": 1.0037326840046978e-05, "loss": 2.4083, "step": 19236 }, { "epoch": 0.620659755480318, "grad_norm": 0.56640625, "learning_rate": 1.003584175798703e-05, "loss": 2.3983, "step": 19237 }, { "epoch": 0.6206920193341144, "grad_norm": 0.95703125, "learning_rate": 1.0034356730568981e-05, "loss": 2.3992, "step": 19238 }, { "epoch": 0.6207242831879107, "grad_norm": 1.125, "learning_rate": 1.003287175780916e-05, "loss": 2.3788, "step": 19239 }, { "epoch": 0.6207565470417071, "grad_norm": 0.59765625, "learning_rate": 1.0031386839723929e-05, "loss": 2.404, "step": 19240 }, { "epoch": 0.6207888108955034, "grad_norm": 0.953125, "learning_rate": 1.0029901976329631e-05, "loss": 2.3944, "step": 19241 }, { "epoch": 0.6208210747492998, "grad_norm": 0.703125, "learning_rate": 1.00284171676426e-05, "loss": 2.3986, "step": 19242 }, { "epoch": 0.6208533386030961, "grad_norm": 0.68359375, "learning_rate": 1.0026932413679185e-05, "loss": 2.3909, "step": 19243 }, { "epoch": 0.6208856024568925, "grad_norm": 0.82421875, "learning_rate": 1.0025447714455737e-05, "loss": 2.4191, "step": 19244 }, { "epoch": 0.6209178663106888, "grad_norm": 0.546875, "learning_rate": 1.0023963069988589e-05, "loss": 2.4959, "step": 19245 }, { "epoch": 0.6209501301644852, "grad_norm": 0.97265625, "learning_rate": 1.0022478480294084e-05, "loss": 2.5474, "step": 19246 }, { "epoch": 0.6209823940182815, "grad_norm": 0.92578125, "learning_rate": 1.0020993945388567e-05, "loss": 2.5366, "step": 19247 }, { "epoch": 0.6210146578720779, "grad_norm": 0.58203125, "learning_rate": 1.0019509465288375e-05, "loss": 2.5317, "step": 19248 }, { "epoch": 0.6210469217258742, "grad_norm": 0.8203125, "learning_rate": 1.001802504000985e-05, "loss": 2.562, "step": 19249 }, { "epoch": 0.6210791855796706, "grad_norm": 0.75, "learning_rate": 1.0016540669569336e-05, "loss": 2.5633, "step": 19250 }, { "epoch": 0.6211114494334669, "grad_norm": 0.5859375, "learning_rate": 1.001505635398316e-05, "loss": 2.5954, "step": 19251 }, { "epoch": 0.6211437132872633, "grad_norm": 0.6953125, "learning_rate": 1.001357209326767e-05, "loss": 2.5531, "step": 19252 }, { "epoch": 0.6211759771410595, "grad_norm": 0.73046875, "learning_rate": 1.0012087887439202e-05, "loss": 2.564, "step": 19253 }, { "epoch": 0.6212082409948559, "grad_norm": 0.66015625, "learning_rate": 1.0010603736514093e-05, "loss": 2.559, "step": 19254 }, { "epoch": 0.6212405048486522, "grad_norm": 0.640625, "learning_rate": 1.0009119640508677e-05, "loss": 2.5463, "step": 19255 }, { "epoch": 0.6212727687024486, "grad_norm": 0.6015625, "learning_rate": 1.000763559943929e-05, "loss": 2.5431, "step": 19256 }, { "epoch": 0.6213050325562449, "grad_norm": 0.5859375, "learning_rate": 1.0006151613322275e-05, "loss": 2.5438, "step": 19257 }, { "epoch": 0.6213372964100413, "grad_norm": 0.65234375, "learning_rate": 1.0004667682173953e-05, "loss": 2.5315, "step": 19258 }, { "epoch": 0.6213695602638377, "grad_norm": 0.73046875, "learning_rate": 1.000318380601067e-05, "loss": 2.542, "step": 19259 }, { "epoch": 0.621401824117634, "grad_norm": 0.71484375, "learning_rate": 1.0001699984848755e-05, "loss": 2.5421, "step": 19260 }, { "epoch": 0.6214340879714304, "grad_norm": 0.76953125, "learning_rate": 1.0000216218704539e-05, "loss": 2.5288, "step": 19261 }, { "epoch": 0.6214663518252267, "grad_norm": 0.78515625, "learning_rate": 9.998732507594355e-06, "loss": 2.5329, "step": 19262 }, { "epoch": 0.6214986156790231, "grad_norm": 0.8515625, "learning_rate": 9.99724885153454e-06, "loss": 2.521, "step": 19263 }, { "epoch": 0.6215308795328194, "grad_norm": 1.0234375, "learning_rate": 9.995765250541418e-06, "loss": 2.5535, "step": 19264 }, { "epoch": 0.6215631433866158, "grad_norm": 1.2109375, "learning_rate": 9.994281704631321e-06, "loss": 2.5228, "step": 19265 }, { "epoch": 0.6215954072404121, "grad_norm": 0.67578125, "learning_rate": 9.992798213820586e-06, "loss": 2.5321, "step": 19266 }, { "epoch": 0.6216276710942085, "grad_norm": 0.75, "learning_rate": 9.991314778125528e-06, "loss": 2.5282, "step": 19267 }, { "epoch": 0.6216599349480048, "grad_norm": 1.2421875, "learning_rate": 9.989831397562488e-06, "loss": 2.5566, "step": 19268 }, { "epoch": 0.6216921988018012, "grad_norm": 0.7578125, "learning_rate": 9.988348072147793e-06, "loss": 2.5524, "step": 19269 }, { "epoch": 0.6217244626555974, "grad_norm": 0.76953125, "learning_rate": 9.986864801897759e-06, "loss": 2.5377, "step": 19270 }, { "epoch": 0.6217567265093938, "grad_norm": 1.3828125, "learning_rate": 9.985381586828723e-06, "loss": 2.531, "step": 19271 }, { "epoch": 0.6217889903631901, "grad_norm": 0.66796875, "learning_rate": 9.983898426957017e-06, "loss": 2.5221, "step": 19272 }, { "epoch": 0.6218212542169865, "grad_norm": 1.15625, "learning_rate": 9.982415322298949e-06, "loss": 2.5599, "step": 19273 }, { "epoch": 0.6218535180707828, "grad_norm": 1.1328125, "learning_rate": 9.980932272870853e-06, "loss": 2.5357, "step": 19274 }, { "epoch": 0.6218857819245792, "grad_norm": 0.73828125, "learning_rate": 9.979449278689065e-06, "loss": 2.5426, "step": 19275 }, { "epoch": 0.6219180457783755, "grad_norm": 1.8828125, "learning_rate": 9.977966339769888e-06, "loss": 2.5447, "step": 19276 }, { "epoch": 0.6219503096321719, "grad_norm": 0.75, "learning_rate": 9.976483456129656e-06, "loss": 2.5474, "step": 19277 }, { "epoch": 0.6219825734859682, "grad_norm": 1.8828125, "learning_rate": 9.975000627784693e-06, "loss": 2.5577, "step": 19278 }, { "epoch": 0.6220148373397646, "grad_norm": 1.4140625, "learning_rate": 9.973517854751315e-06, "loss": 2.578, "step": 19279 }, { "epoch": 0.622047101193561, "grad_norm": 2.484375, "learning_rate": 9.972035137045848e-06, "loss": 2.5507, "step": 19280 }, { "epoch": 0.6220793650473573, "grad_norm": 2.421875, "learning_rate": 9.970552474684612e-06, "loss": 2.5331, "step": 19281 }, { "epoch": 0.6221116289011537, "grad_norm": 0.6875, "learning_rate": 9.969069867683927e-06, "loss": 2.5461, "step": 19282 }, { "epoch": 0.62214389275495, "grad_norm": 1.890625, "learning_rate": 9.967587316060107e-06, "loss": 2.5319, "step": 19283 }, { "epoch": 0.6221761566087464, "grad_norm": 1.5546875, "learning_rate": 9.966104819829482e-06, "loss": 2.5612, "step": 19284 }, { "epoch": 0.6222084204625427, "grad_norm": 1.125, "learning_rate": 9.96462237900836e-06, "loss": 2.5296, "step": 19285 }, { "epoch": 0.622240684316339, "grad_norm": 1.34375, "learning_rate": 9.96313999361306e-06, "loss": 2.5294, "step": 19286 }, { "epoch": 0.6222729481701353, "grad_norm": 0.62890625, "learning_rate": 9.961657663659903e-06, "loss": 2.529, "step": 19287 }, { "epoch": 0.6223052120239317, "grad_norm": 1.0703125, "learning_rate": 9.960175389165208e-06, "loss": 2.5568, "step": 19288 }, { "epoch": 0.622337475877728, "grad_norm": 0.65234375, "learning_rate": 9.958693170145284e-06, "loss": 2.5442, "step": 19289 }, { "epoch": 0.6223697397315244, "grad_norm": 1.171875, "learning_rate": 9.957211006616449e-06, "loss": 2.5386, "step": 19290 }, { "epoch": 0.6224020035853207, "grad_norm": 0.625, "learning_rate": 9.955728898595018e-06, "loss": 2.5397, "step": 19291 }, { "epoch": 0.6224342674391171, "grad_norm": 1.1796875, "learning_rate": 9.954246846097304e-06, "loss": 2.5205, "step": 19292 }, { "epoch": 0.6224665312929134, "grad_norm": 0.7890625, "learning_rate": 9.952764849139619e-06, "loss": 2.5315, "step": 19293 }, { "epoch": 0.6224987951467098, "grad_norm": 1.5703125, "learning_rate": 9.951282907738282e-06, "loss": 2.5145, "step": 19294 }, { "epoch": 0.6225310590005061, "grad_norm": 1.078125, "learning_rate": 9.949801021909598e-06, "loss": 2.5387, "step": 19295 }, { "epoch": 0.6225633228543025, "grad_norm": 1.9140625, "learning_rate": 9.948319191669883e-06, "loss": 2.5342, "step": 19296 }, { "epoch": 0.6225955867080988, "grad_norm": 1.9765625, "learning_rate": 9.946837417035449e-06, "loss": 2.5232, "step": 19297 }, { "epoch": 0.6226278505618952, "grad_norm": 0.6015625, "learning_rate": 9.945355698022596e-06, "loss": 2.5221, "step": 19298 }, { "epoch": 0.6226601144156916, "grad_norm": 1.8515625, "learning_rate": 9.943874034647646e-06, "loss": 2.5153, "step": 19299 }, { "epoch": 0.6226923782694879, "grad_norm": 1.2890625, "learning_rate": 9.942392426926907e-06, "loss": 2.528, "step": 19300 }, { "epoch": 0.6227246421232843, "grad_norm": 1.53125, "learning_rate": 9.940910874876678e-06, "loss": 2.5615, "step": 19301 }, { "epoch": 0.6227569059770806, "grad_norm": 1.5703125, "learning_rate": 9.93942937851327e-06, "loss": 2.5235, "step": 19302 }, { "epoch": 0.622789169830877, "grad_norm": 0.5859375, "learning_rate": 9.937947937853001e-06, "loss": 2.5186, "step": 19303 }, { "epoch": 0.6228214336846732, "grad_norm": 1.71875, "learning_rate": 9.936466552912165e-06, "loss": 2.499, "step": 19304 }, { "epoch": 0.6228536975384696, "grad_norm": 1.5703125, "learning_rate": 9.93498522370707e-06, "loss": 2.522, "step": 19305 }, { "epoch": 0.6228859613922659, "grad_norm": 0.6328125, "learning_rate": 9.933503950254033e-06, "loss": 2.523, "step": 19306 }, { "epoch": 0.6229182252460623, "grad_norm": 1.3203125, "learning_rate": 9.932022732569342e-06, "loss": 2.5133, "step": 19307 }, { "epoch": 0.6229504890998586, "grad_norm": 0.98046875, "learning_rate": 9.930541570669312e-06, "loss": 2.5394, "step": 19308 }, { "epoch": 0.622982752953655, "grad_norm": 1.3515625, "learning_rate": 9.929060464570243e-06, "loss": 2.5208, "step": 19309 }, { "epoch": 0.6230150168074513, "grad_norm": 1.390625, "learning_rate": 9.927579414288438e-06, "loss": 2.5288, "step": 19310 }, { "epoch": 0.6230472806612477, "grad_norm": 0.6328125, "learning_rate": 9.926098419840199e-06, "loss": 2.5251, "step": 19311 }, { "epoch": 0.623079544515044, "grad_norm": 1.1640625, "learning_rate": 9.924617481241831e-06, "loss": 2.5086, "step": 19312 }, { "epoch": 0.6231118083688404, "grad_norm": 0.88671875, "learning_rate": 9.923136598509632e-06, "loss": 2.5521, "step": 19313 }, { "epoch": 0.6231440722226367, "grad_norm": 1.2734375, "learning_rate": 9.9216557716599e-06, "loss": 2.5399, "step": 19314 }, { "epoch": 0.6231763360764331, "grad_norm": 1.171875, "learning_rate": 9.920175000708945e-06, "loss": 2.5459, "step": 19315 }, { "epoch": 0.6232085999302294, "grad_norm": 0.95703125, "learning_rate": 9.918694285673053e-06, "loss": 2.5416, "step": 19316 }, { "epoch": 0.6232408637840258, "grad_norm": 1.015625, "learning_rate": 9.91721362656853e-06, "loss": 2.5422, "step": 19317 }, { "epoch": 0.623273127637822, "grad_norm": 0.7734375, "learning_rate": 9.915733023411673e-06, "loss": 2.54, "step": 19318 }, { "epoch": 0.6233053914916185, "grad_norm": 1.0390625, "learning_rate": 9.914252476218782e-06, "loss": 2.5527, "step": 19319 }, { "epoch": 0.6233376553454149, "grad_norm": 0.71484375, "learning_rate": 9.912771985006149e-06, "loss": 2.5327, "step": 19320 }, { "epoch": 0.6233699191992111, "grad_norm": 0.91015625, "learning_rate": 9.911291549790073e-06, "loss": 2.5432, "step": 19321 }, { "epoch": 0.6234021830530075, "grad_norm": 0.69140625, "learning_rate": 9.909811170586852e-06, "loss": 2.5105, "step": 19322 }, { "epoch": 0.6234344469068038, "grad_norm": 0.859375, "learning_rate": 9.908330847412774e-06, "loss": 2.5265, "step": 19323 }, { "epoch": 0.6234667107606002, "grad_norm": 0.73828125, "learning_rate": 9.906850580284137e-06, "loss": 2.5081, "step": 19324 }, { "epoch": 0.6234989746143965, "grad_norm": 0.72265625, "learning_rate": 9.905370369217236e-06, "loss": 2.5282, "step": 19325 }, { "epoch": 0.6235312384681929, "grad_norm": 0.80078125, "learning_rate": 9.903890214228366e-06, "loss": 2.5284, "step": 19326 }, { "epoch": 0.6235635023219892, "grad_norm": 0.6015625, "learning_rate": 9.902410115333811e-06, "loss": 2.5153, "step": 19327 }, { "epoch": 0.6235957661757856, "grad_norm": 0.82421875, "learning_rate": 9.900930072549879e-06, "loss": 2.5197, "step": 19328 }, { "epoch": 0.6236280300295819, "grad_norm": 0.61328125, "learning_rate": 9.899450085892838e-06, "loss": 2.5332, "step": 19329 }, { "epoch": 0.6236602938833783, "grad_norm": 0.65234375, "learning_rate": 9.897970155378994e-06, "loss": 2.5278, "step": 19330 }, { "epoch": 0.6236925577371746, "grad_norm": 0.64453125, "learning_rate": 9.896490281024643e-06, "loss": 2.5319, "step": 19331 }, { "epoch": 0.623724821590971, "grad_norm": 0.69140625, "learning_rate": 9.89501046284606e-06, "loss": 2.5173, "step": 19332 }, { "epoch": 0.6237570854447673, "grad_norm": 0.859375, "learning_rate": 9.893530700859535e-06, "loss": 2.5144, "step": 19333 }, { "epoch": 0.6237893492985637, "grad_norm": 0.6953125, "learning_rate": 9.892050995081368e-06, "loss": 2.4497, "step": 19334 }, { "epoch": 0.62382161315236, "grad_norm": 0.6953125, "learning_rate": 9.890571345527835e-06, "loss": 2.4727, "step": 19335 }, { "epoch": 0.6238538770061564, "grad_norm": 0.87109375, "learning_rate": 9.889091752215226e-06, "loss": 2.4616, "step": 19336 }, { "epoch": 0.6238861408599526, "grad_norm": 0.8046875, "learning_rate": 9.887612215159834e-06, "loss": 2.4581, "step": 19337 }, { "epoch": 0.623918404713749, "grad_norm": 0.56640625, "learning_rate": 9.886132734377936e-06, "loss": 2.4653, "step": 19338 }, { "epoch": 0.6239506685675454, "grad_norm": 0.95703125, "learning_rate": 9.88465330988582e-06, "loss": 2.451, "step": 19339 }, { "epoch": 0.6239829324213417, "grad_norm": 1.1953125, "learning_rate": 9.883173941699772e-06, "loss": 2.4629, "step": 19340 }, { "epoch": 0.6240151962751381, "grad_norm": 0.5703125, "learning_rate": 9.881694629836072e-06, "loss": 2.4491, "step": 19341 }, { "epoch": 0.6240474601289344, "grad_norm": 1.0625, "learning_rate": 9.880215374311007e-06, "loss": 2.4496, "step": 19342 }, { "epoch": 0.6240797239827308, "grad_norm": 1.265625, "learning_rate": 9.87873617514086e-06, "loss": 2.4592, "step": 19343 }, { "epoch": 0.6241119878365271, "grad_norm": 0.58984375, "learning_rate": 9.877257032341907e-06, "loss": 2.4506, "step": 19344 }, { "epoch": 0.6241442516903235, "grad_norm": 1.1640625, "learning_rate": 9.875777945930436e-06, "loss": 2.4612, "step": 19345 }, { "epoch": 0.6241765155441198, "grad_norm": 0.91015625, "learning_rate": 9.874298915922726e-06, "loss": 2.4526, "step": 19346 }, { "epoch": 0.6242087793979162, "grad_norm": 0.625, "learning_rate": 9.872819942335053e-06, "loss": 2.4351, "step": 19347 }, { "epoch": 0.6242410432517125, "grad_norm": 0.99609375, "learning_rate": 9.871341025183703e-06, "loss": 2.4696, "step": 19348 }, { "epoch": 0.6242733071055089, "grad_norm": 0.9453125, "learning_rate": 9.869862164484955e-06, "loss": 2.451, "step": 19349 }, { "epoch": 0.6243055709593052, "grad_norm": 0.546875, "learning_rate": 9.868383360255076e-06, "loss": 2.4503, "step": 19350 }, { "epoch": 0.6243378348131016, "grad_norm": 1.109375, "learning_rate": 9.866904612510356e-06, "loss": 2.4444, "step": 19351 }, { "epoch": 0.6243700986668979, "grad_norm": 0.98046875, "learning_rate": 9.865425921267064e-06, "loss": 2.4606, "step": 19352 }, { "epoch": 0.6244023625206943, "grad_norm": 0.494140625, "learning_rate": 9.863947286541486e-06, "loss": 2.4655, "step": 19353 }, { "epoch": 0.6244346263744905, "grad_norm": 0.953125, "learning_rate": 9.86246870834989e-06, "loss": 2.3961, "step": 19354 }, { "epoch": 0.6244668902282869, "grad_norm": 0.90234375, "learning_rate": 9.860990186708547e-06, "loss": 2.4055, "step": 19355 }, { "epoch": 0.6244991540820832, "grad_norm": 0.50390625, "learning_rate": 9.859511721633746e-06, "loss": 2.3884, "step": 19356 }, { "epoch": 0.6245314179358796, "grad_norm": 0.734375, "learning_rate": 9.858033313141746e-06, "loss": 2.3789, "step": 19357 }, { "epoch": 0.6245636817896759, "grad_norm": 0.55859375, "learning_rate": 9.856554961248827e-06, "loss": 2.4055, "step": 19358 }, { "epoch": 0.6245959456434723, "grad_norm": 0.5703125, "learning_rate": 9.855076665971268e-06, "loss": 2.3978, "step": 19359 }, { "epoch": 0.6246282094972687, "grad_norm": 0.765625, "learning_rate": 9.853598427325324e-06, "loss": 2.3788, "step": 19360 }, { "epoch": 0.624660473351065, "grad_norm": 0.59375, "learning_rate": 9.85212024532728e-06, "loss": 2.3839, "step": 19361 }, { "epoch": 0.6246927372048614, "grad_norm": 0.5546875, "learning_rate": 9.850642119993411e-06, "loss": 2.3928, "step": 19362 }, { "epoch": 0.6247250010586577, "grad_norm": 0.73046875, "learning_rate": 9.84916405133997e-06, "loss": 2.3703, "step": 19363 }, { "epoch": 0.6247572649124541, "grad_norm": 0.69921875, "learning_rate": 9.847686039383236e-06, "loss": 2.3757, "step": 19364 }, { "epoch": 0.6247895287662504, "grad_norm": 0.51171875, "learning_rate": 9.846208084139485e-06, "loss": 2.333, "step": 19365 }, { "epoch": 0.6248217926200468, "grad_norm": 0.5859375, "learning_rate": 9.844730185624973e-06, "loss": 2.3201, "step": 19366 }, { "epoch": 0.6248540564738431, "grad_norm": 0.55859375, "learning_rate": 9.843252343855972e-06, "loss": 2.3257, "step": 19367 }, { "epoch": 0.6248863203276395, "grad_norm": 0.50390625, "learning_rate": 9.841774558848756e-06, "loss": 2.307, "step": 19368 }, { "epoch": 0.6249185841814358, "grad_norm": 0.65234375, "learning_rate": 9.840296830619577e-06, "loss": 2.3124, "step": 19369 }, { "epoch": 0.6249508480352322, "grad_norm": 0.54296875, "learning_rate": 9.838819159184711e-06, "loss": 2.3194, "step": 19370 }, { "epoch": 0.6249831118890284, "grad_norm": 0.498046875, "learning_rate": 9.837341544560425e-06, "loss": 2.316, "step": 19371 }, { "epoch": 0.6250153757428248, "grad_norm": 0.71484375, "learning_rate": 9.835863986762976e-06, "loss": 2.3192, "step": 19372 }, { "epoch": 0.6250476395966211, "grad_norm": 0.67578125, "learning_rate": 9.834386485808632e-06, "loss": 2.3107, "step": 19373 }, { "epoch": 0.6250799034504175, "grad_norm": 0.482421875, "learning_rate": 9.832909041713657e-06, "loss": 2.325, "step": 19374 }, { "epoch": 0.6251121673042138, "grad_norm": 0.53515625, "learning_rate": 9.831431654494312e-06, "loss": 2.3109, "step": 19375 }, { "epoch": 0.6251444311580102, "grad_norm": 0.56640625, "learning_rate": 9.829954324166858e-06, "loss": 2.3016, "step": 19376 }, { "epoch": 0.6251766950118065, "grad_norm": 0.546875, "learning_rate": 9.82847705074756e-06, "loss": 2.3188, "step": 19377 }, { "epoch": 0.6252089588656029, "grad_norm": 0.5078125, "learning_rate": 9.826999834252675e-06, "loss": 2.3042, "step": 19378 }, { "epoch": 0.6252412227193992, "grad_norm": 0.484375, "learning_rate": 9.825522674698463e-06, "loss": 2.3162, "step": 19379 }, { "epoch": 0.6252734865731956, "grad_norm": 0.625, "learning_rate": 9.82404557210119e-06, "loss": 2.3266, "step": 19380 }, { "epoch": 0.625305750426992, "grad_norm": 0.7109375, "learning_rate": 9.822568526477107e-06, "loss": 2.315, "step": 19381 }, { "epoch": 0.6253380142807883, "grad_norm": 0.7578125, "learning_rate": 9.821091537842474e-06, "loss": 2.2975, "step": 19382 }, { "epoch": 0.6253702781345847, "grad_norm": 0.8125, "learning_rate": 9.819614606213557e-06, "loss": 2.3123, "step": 19383 }, { "epoch": 0.625402541988381, "grad_norm": 0.890625, "learning_rate": 9.818137731606598e-06, "loss": 2.3311, "step": 19384 }, { "epoch": 0.6254348058421774, "grad_norm": 0.859375, "learning_rate": 9.816660914037862e-06, "loss": 2.3125, "step": 19385 }, { "epoch": 0.6254670696959737, "grad_norm": 0.609375, "learning_rate": 9.815184153523607e-06, "loss": 2.297, "step": 19386 }, { "epoch": 0.62549933354977, "grad_norm": 0.515625, "learning_rate": 9.813707450080086e-06, "loss": 2.3315, "step": 19387 }, { "epoch": 0.6255315974035663, "grad_norm": 0.84765625, "learning_rate": 9.812230803723552e-06, "loss": 2.3153, "step": 19388 }, { "epoch": 0.6255638612573627, "grad_norm": 1.0078125, "learning_rate": 9.810754214470257e-06, "loss": 2.3046, "step": 19389 }, { "epoch": 0.625596125111159, "grad_norm": 0.7421875, "learning_rate": 9.809277682336464e-06, "loss": 2.304, "step": 19390 }, { "epoch": 0.6256283889649554, "grad_norm": 0.5078125, "learning_rate": 9.807801207338411e-06, "loss": 2.3225, "step": 19391 }, { "epoch": 0.6256606528187517, "grad_norm": 0.84765625, "learning_rate": 9.806324789492356e-06, "loss": 2.3112, "step": 19392 }, { "epoch": 0.6256929166725481, "grad_norm": 0.82421875, "learning_rate": 9.804848428814564e-06, "loss": 2.3381, "step": 19393 }, { "epoch": 0.6257251805263444, "grad_norm": 0.53125, "learning_rate": 9.803372125321264e-06, "loss": 2.3297, "step": 19394 }, { "epoch": 0.6257574443801408, "grad_norm": 1.0625, "learning_rate": 9.801895879028712e-06, "loss": 2.3162, "step": 19395 }, { "epoch": 0.6257897082339371, "grad_norm": 1.1328125, "learning_rate": 9.800419689953171e-06, "loss": 2.2767, "step": 19396 }, { "epoch": 0.6258219720877335, "grad_norm": 0.5625, "learning_rate": 9.798943558110874e-06, "loss": 2.2493, "step": 19397 }, { "epoch": 0.6258542359415298, "grad_norm": 1.375, "learning_rate": 9.797467483518076e-06, "loss": 2.2469, "step": 19398 }, { "epoch": 0.6258864997953262, "grad_norm": 0.5546875, "learning_rate": 9.795991466191025e-06, "loss": 2.261, "step": 19399 }, { "epoch": 0.6259187636491226, "grad_norm": 1.2109375, "learning_rate": 9.794515506145964e-06, "loss": 2.2726, "step": 19400 }, { "epoch": 0.6259510275029189, "grad_norm": 0.6875, "learning_rate": 9.79303960339914e-06, "loss": 2.2567, "step": 19401 }, { "epoch": 0.6259832913567153, "grad_norm": 1.0859375, "learning_rate": 9.791563757966808e-06, "loss": 2.2242, "step": 19402 }, { "epoch": 0.6260155552105116, "grad_norm": 0.6953125, "learning_rate": 9.7900879698652e-06, "loss": 2.2219, "step": 19403 }, { "epoch": 0.626047819064308, "grad_norm": 0.546875, "learning_rate": 9.788612239110566e-06, "loss": 2.2233, "step": 19404 }, { "epoch": 0.6260800829181042, "grad_norm": 0.69921875, "learning_rate": 9.787136565719153e-06, "loss": 2.1887, "step": 19405 }, { "epoch": 0.6261123467719006, "grad_norm": 0.455078125, "learning_rate": 9.785660949707199e-06, "loss": 2.1975, "step": 19406 }, { "epoch": 0.6261446106256969, "grad_norm": 0.55859375, "learning_rate": 9.784185391090947e-06, "loss": 2.2053, "step": 19407 }, { "epoch": 0.6261768744794933, "grad_norm": 0.67578125, "learning_rate": 9.782709889886645e-06, "loss": 2.1845, "step": 19408 }, { "epoch": 0.6262091383332896, "grad_norm": 0.4453125, "learning_rate": 9.781234446110524e-06, "loss": 2.197, "step": 19409 }, { "epoch": 0.626241402187086, "grad_norm": 0.59765625, "learning_rate": 9.779759059778832e-06, "loss": 2.1786, "step": 19410 }, { "epoch": 0.6262736660408823, "grad_norm": 0.5390625, "learning_rate": 9.778283730907811e-06, "loss": 2.1803, "step": 19411 }, { "epoch": 0.6263059298946787, "grad_norm": 0.57421875, "learning_rate": 9.776808459513695e-06, "loss": 2.1934, "step": 19412 }, { "epoch": 0.626338193748475, "grad_norm": 0.640625, "learning_rate": 9.77533324561272e-06, "loss": 2.2143, "step": 19413 }, { "epoch": 0.6263704576022714, "grad_norm": 0.5546875, "learning_rate": 9.773858089221138e-06, "loss": 2.2054, "step": 19414 }, { "epoch": 0.6264027214560677, "grad_norm": 0.5859375, "learning_rate": 9.772382990355166e-06, "loss": 2.2063, "step": 19415 }, { "epoch": 0.6264349853098641, "grad_norm": 0.4765625, "learning_rate": 9.770907949031056e-06, "loss": 2.1965, "step": 19416 }, { "epoch": 0.6264672491636604, "grad_norm": 0.7890625, "learning_rate": 9.769432965265046e-06, "loss": 2.2593, "step": 19417 }, { "epoch": 0.6264995130174568, "grad_norm": 0.6328125, "learning_rate": 9.76795803907336e-06, "loss": 2.2741, "step": 19418 }, { "epoch": 0.626531776871253, "grad_norm": 0.60546875, "learning_rate": 9.766483170472232e-06, "loss": 2.2737, "step": 19419 }, { "epoch": 0.6265640407250495, "grad_norm": 0.78125, "learning_rate": 9.765008359477909e-06, "loss": 2.2591, "step": 19420 }, { "epoch": 0.6265963045788459, "grad_norm": 0.484375, "learning_rate": 9.763533606106625e-06, "loss": 2.2671, "step": 19421 }, { "epoch": 0.6266285684326421, "grad_norm": 0.640625, "learning_rate": 9.762058910374596e-06, "loss": 2.2282, "step": 19422 }, { "epoch": 0.6266608322864385, "grad_norm": 0.458984375, "learning_rate": 9.760584272298067e-06, "loss": 2.1834, "step": 19423 }, { "epoch": 0.6266930961402348, "grad_norm": 0.49609375, "learning_rate": 9.759109691893275e-06, "loss": 2.1919, "step": 19424 }, { "epoch": 0.6267253599940312, "grad_norm": 0.435546875, "learning_rate": 9.757635169176436e-06, "loss": 2.1773, "step": 19425 }, { "epoch": 0.6267576238478275, "grad_norm": 0.51171875, "learning_rate": 9.756160704163789e-06, "loss": 2.2088, "step": 19426 }, { "epoch": 0.6267898877016239, "grad_norm": 0.51171875, "learning_rate": 9.754686296871568e-06, "loss": 2.2219, "step": 19427 }, { "epoch": 0.6268221515554202, "grad_norm": 0.478515625, "learning_rate": 9.753211947315994e-06, "loss": 2.1988, "step": 19428 }, { "epoch": 0.6268544154092166, "grad_norm": 0.466796875, "learning_rate": 9.751737655513295e-06, "loss": 2.1689, "step": 19429 }, { "epoch": 0.6268866792630129, "grad_norm": 0.4609375, "learning_rate": 9.750263421479708e-06, "loss": 2.1982, "step": 19430 }, { "epoch": 0.6269189431168093, "grad_norm": 0.5, "learning_rate": 9.748789245231453e-06, "loss": 2.1746, "step": 19431 }, { "epoch": 0.6269512069706056, "grad_norm": 0.45703125, "learning_rate": 9.747315126784759e-06, "loss": 2.1905, "step": 19432 }, { "epoch": 0.626983470824402, "grad_norm": 0.5234375, "learning_rate": 9.745841066155855e-06, "loss": 2.1583, "step": 19433 }, { "epoch": 0.6270157346781983, "grad_norm": 0.423828125, "learning_rate": 9.744367063360959e-06, "loss": 2.198, "step": 19434 }, { "epoch": 0.6270479985319947, "grad_norm": 0.5234375, "learning_rate": 9.742893118416299e-06, "loss": 2.1661, "step": 19435 }, { "epoch": 0.627080262385791, "grad_norm": 0.53515625, "learning_rate": 9.741419231338105e-06, "loss": 2.1968, "step": 19436 }, { "epoch": 0.6271125262395874, "grad_norm": 0.421875, "learning_rate": 9.739945402142591e-06, "loss": 2.1921, "step": 19437 }, { "epoch": 0.6271447900933836, "grad_norm": 0.6171875, "learning_rate": 9.738471630845988e-06, "loss": 2.1723, "step": 19438 }, { "epoch": 0.62717705394718, "grad_norm": 0.50390625, "learning_rate": 9.736997917464515e-06, "loss": 2.1914, "step": 19439 }, { "epoch": 0.6272093178009764, "grad_norm": 0.451171875, "learning_rate": 9.73552426201439e-06, "loss": 2.2053, "step": 19440 }, { "epoch": 0.6272415816547727, "grad_norm": 0.53515625, "learning_rate": 9.734050664511837e-06, "loss": 2.1757, "step": 19441 }, { "epoch": 0.6272738455085691, "grad_norm": 0.546875, "learning_rate": 9.73257712497308e-06, "loss": 2.2195, "step": 19442 }, { "epoch": 0.6273061093623654, "grad_norm": 0.44921875, "learning_rate": 9.731103643414332e-06, "loss": 2.1494, "step": 19443 }, { "epoch": 0.6273383732161618, "grad_norm": 0.54296875, "learning_rate": 9.729630219851816e-06, "loss": 2.1547, "step": 19444 }, { "epoch": 0.6273706370699581, "grad_norm": 0.5390625, "learning_rate": 9.728156854301754e-06, "loss": 2.1868, "step": 19445 }, { "epoch": 0.6274029009237545, "grad_norm": 0.48828125, "learning_rate": 9.726683546780353e-06, "loss": 2.2099, "step": 19446 }, { "epoch": 0.6274351647775508, "grad_norm": 0.474609375, "learning_rate": 9.72521029730384e-06, "loss": 2.1823, "step": 19447 }, { "epoch": 0.6274674286313472, "grad_norm": 0.61328125, "learning_rate": 9.72373710588843e-06, "loss": 2.1936, "step": 19448 }, { "epoch": 0.6274996924851435, "grad_norm": 0.67578125, "learning_rate": 9.722263972550334e-06, "loss": 2.1863, "step": 19449 }, { "epoch": 0.6275319563389399, "grad_norm": 0.5625, "learning_rate": 9.720790897305763e-06, "loss": 2.126, "step": 19450 }, { "epoch": 0.6275642201927362, "grad_norm": 0.48046875, "learning_rate": 9.719317880170945e-06, "loss": 2.1409, "step": 19451 }, { "epoch": 0.6275964840465326, "grad_norm": 0.640625, "learning_rate": 9.717844921162095e-06, "loss": 2.1197, "step": 19452 }, { "epoch": 0.6276287479003289, "grad_norm": 0.439453125, "learning_rate": 9.716372020295408e-06, "loss": 2.1024, "step": 19453 }, { "epoch": 0.6276610117541253, "grad_norm": 0.546875, "learning_rate": 9.714899177587108e-06, "loss": 2.0886, "step": 19454 }, { "epoch": 0.6276932756079215, "grad_norm": 0.4296875, "learning_rate": 9.713426393053413e-06, "loss": 2.089, "step": 19455 }, { "epoch": 0.6277255394617179, "grad_norm": 0.49609375, "learning_rate": 9.711953666710523e-06, "loss": 2.1101, "step": 19456 }, { "epoch": 0.6277578033155142, "grad_norm": 0.4140625, "learning_rate": 9.71048099857465e-06, "loss": 2.1154, "step": 19457 }, { "epoch": 0.6277900671693106, "grad_norm": 0.455078125, "learning_rate": 9.709008388662016e-06, "loss": 2.1415, "step": 19458 }, { "epoch": 0.6278223310231069, "grad_norm": 0.44140625, "learning_rate": 9.707535836988818e-06, "loss": 2.1133, "step": 19459 }, { "epoch": 0.6278545948769033, "grad_norm": 0.44921875, "learning_rate": 9.706063343571265e-06, "loss": 2.0953, "step": 19460 }, { "epoch": 0.6278868587306997, "grad_norm": 0.408203125, "learning_rate": 9.704590908425575e-06, "loss": 2.1643, "step": 19461 }, { "epoch": 0.627919122584496, "grad_norm": 0.44921875, "learning_rate": 9.703118531567945e-06, "loss": 2.1227, "step": 19462 }, { "epoch": 0.6279513864382924, "grad_norm": 0.47265625, "learning_rate": 9.701646213014586e-06, "loss": 2.1455, "step": 19463 }, { "epoch": 0.6279836502920887, "grad_norm": 0.4375, "learning_rate": 9.700173952781711e-06, "loss": 2.1047, "step": 19464 }, { "epoch": 0.6280159141458851, "grad_norm": 0.421875, "learning_rate": 9.698701750885513e-06, "loss": 2.1271, "step": 19465 }, { "epoch": 0.6280481779996814, "grad_norm": 0.4375, "learning_rate": 9.6972296073422e-06, "loss": 2.1129, "step": 19466 }, { "epoch": 0.6280804418534778, "grad_norm": 0.48828125, "learning_rate": 9.695757522167987e-06, "loss": 2.0872, "step": 19467 }, { "epoch": 0.6281127057072741, "grad_norm": 0.447265625, "learning_rate": 9.694285495379066e-06, "loss": 2.1509, "step": 19468 }, { "epoch": 0.6281449695610705, "grad_norm": 0.439453125, "learning_rate": 9.692813526991642e-06, "loss": 2.0791, "step": 19469 }, { "epoch": 0.6281772334148668, "grad_norm": 0.51171875, "learning_rate": 9.691341617021925e-06, "loss": 2.0913, "step": 19470 }, { "epoch": 0.6282094972686632, "grad_norm": 0.431640625, "learning_rate": 9.689869765486106e-06, "loss": 2.1143, "step": 19471 }, { "epoch": 0.6282417611224594, "grad_norm": 0.421875, "learning_rate": 9.688397972400392e-06, "loss": 2.077, "step": 19472 }, { "epoch": 0.6282740249762558, "grad_norm": 0.494140625, "learning_rate": 9.686926237780986e-06, "loss": 2.1189, "step": 19473 }, { "epoch": 0.6283062888300521, "grad_norm": 0.447265625, "learning_rate": 9.68545456164408e-06, "loss": 2.1001, "step": 19474 }, { "epoch": 0.6283385526838485, "grad_norm": 0.4375, "learning_rate": 9.683982944005878e-06, "loss": 2.0818, "step": 19475 }, { "epoch": 0.6283708165376448, "grad_norm": 0.62109375, "learning_rate": 9.682511384882587e-06, "loss": 2.1233, "step": 19476 }, { "epoch": 0.6284030803914412, "grad_norm": 0.625, "learning_rate": 9.681039884290384e-06, "loss": 2.1167, "step": 19477 }, { "epoch": 0.6284353442452375, "grad_norm": 0.400390625, "learning_rate": 9.67956844224548e-06, "loss": 2.0798, "step": 19478 }, { "epoch": 0.6284676080990339, "grad_norm": 0.703125, "learning_rate": 9.678097058764078e-06, "loss": 2.1132, "step": 19479 }, { "epoch": 0.6284998719528302, "grad_norm": 0.640625, "learning_rate": 9.676625733862359e-06, "loss": 2.1183, "step": 19480 }, { "epoch": 0.6285321358066266, "grad_norm": 0.453125, "learning_rate": 9.675154467556521e-06, "loss": 2.0948, "step": 19481 }, { "epoch": 0.628564399660423, "grad_norm": 0.796875, "learning_rate": 9.673683259862771e-06, "loss": 2.0849, "step": 19482 }, { "epoch": 0.6285966635142193, "grad_norm": 0.4375, "learning_rate": 9.672212110797291e-06, "loss": 2.1066, "step": 19483 }, { "epoch": 0.6286289273680157, "grad_norm": 0.62890625, "learning_rate": 9.670741020376278e-06, "loss": 2.0502, "step": 19484 }, { "epoch": 0.628661191221812, "grad_norm": 0.40234375, "learning_rate": 9.66926998861592e-06, "loss": 2.1354, "step": 19485 }, { "epoch": 0.6286934550756084, "grad_norm": 0.5703125, "learning_rate": 9.667799015532424e-06, "loss": 2.1106, "step": 19486 }, { "epoch": 0.6287257189294047, "grad_norm": 0.4140625, "learning_rate": 9.666328101141964e-06, "loss": 2.1036, "step": 19487 }, { "epoch": 0.628757982783201, "grad_norm": 0.4609375, "learning_rate": 9.664857245460736e-06, "loss": 2.0978, "step": 19488 }, { "epoch": 0.6287902466369973, "grad_norm": 0.384765625, "learning_rate": 9.66338644850494e-06, "loss": 2.1214, "step": 19489 }, { "epoch": 0.6288225104907937, "grad_norm": 0.5078125, "learning_rate": 9.661915710290753e-06, "loss": 2.1207, "step": 19490 }, { "epoch": 0.62885477434459, "grad_norm": 0.390625, "learning_rate": 9.660445030834366e-06, "loss": 2.1112, "step": 19491 }, { "epoch": 0.6288870381983864, "grad_norm": 0.474609375, "learning_rate": 9.658974410151975e-06, "loss": 2.0854, "step": 19492 }, { "epoch": 0.6289193020521827, "grad_norm": 0.384765625, "learning_rate": 9.657503848259756e-06, "loss": 2.0967, "step": 19493 }, { "epoch": 0.6289515659059791, "grad_norm": 0.486328125, "learning_rate": 9.656033345173904e-06, "loss": 2.0634, "step": 19494 }, { "epoch": 0.6289838297597754, "grad_norm": 0.39453125, "learning_rate": 9.654562900910606e-06, "loss": 2.1003, "step": 19495 }, { "epoch": 0.6290160936135718, "grad_norm": 0.443359375, "learning_rate": 9.653092515486041e-06, "loss": 2.1174, "step": 19496 }, { "epoch": 0.6290483574673681, "grad_norm": 0.4140625, "learning_rate": 9.651622188916402e-06, "loss": 2.0929, "step": 19497 }, { "epoch": 0.6290806213211645, "grad_norm": 0.431640625, "learning_rate": 9.650151921217865e-06, "loss": 2.1633, "step": 19498 }, { "epoch": 0.6291128851749608, "grad_norm": 0.60546875, "learning_rate": 9.648681712406621e-06, "loss": 2.172, "step": 19499 }, { "epoch": 0.6291451490287572, "grad_norm": 0.49609375, "learning_rate": 9.647211562498847e-06, "loss": 2.143, "step": 19500 }, { "epoch": 0.6291774128825536, "grad_norm": 0.474609375, "learning_rate": 9.645741471510731e-06, "loss": 2.1544, "step": 19501 }, { "epoch": 0.6292096767363499, "grad_norm": 0.78515625, "learning_rate": 9.644271439458448e-06, "loss": 2.1623, "step": 19502 }, { "epoch": 0.6292419405901463, "grad_norm": 1.3203125, "learning_rate": 9.642801466358185e-06, "loss": 2.1537, "step": 19503 }, { "epoch": 0.6292742044439426, "grad_norm": 0.79296875, "learning_rate": 9.641331552226122e-06, "loss": 2.2129, "step": 19504 }, { "epoch": 0.629306468297739, "grad_norm": 0.466796875, "learning_rate": 9.639861697078433e-06, "loss": 2.129, "step": 19505 }, { "epoch": 0.6293387321515352, "grad_norm": 0.609375, "learning_rate": 9.638391900931301e-06, "loss": 2.0987, "step": 19506 }, { "epoch": 0.6293709960053316, "grad_norm": 0.515625, "learning_rate": 9.636922163800911e-06, "loss": 2.0854, "step": 19507 }, { "epoch": 0.6294032598591279, "grad_norm": 0.56640625, "learning_rate": 9.635452485703427e-06, "loss": 2.0835, "step": 19508 }, { "epoch": 0.6294355237129243, "grad_norm": 0.490234375, "learning_rate": 9.633982866655033e-06, "loss": 2.0864, "step": 19509 }, { "epoch": 0.6294677875667206, "grad_norm": 0.439453125, "learning_rate": 9.632513306671916e-06, "loss": 2.0958, "step": 19510 }, { "epoch": 0.629500051420517, "grad_norm": 0.578125, "learning_rate": 9.631043805770233e-06, "loss": 2.1146, "step": 19511 }, { "epoch": 0.6295323152743133, "grad_norm": 0.53125, "learning_rate": 9.629574363966167e-06, "loss": 2.0994, "step": 19512 }, { "epoch": 0.6295645791281097, "grad_norm": 0.54296875, "learning_rate": 9.628104981275902e-06, "loss": 2.1132, "step": 19513 }, { "epoch": 0.629596842981906, "grad_norm": 0.421875, "learning_rate": 9.626635657715598e-06, "loss": 2.096, "step": 19514 }, { "epoch": 0.6296291068357024, "grad_norm": 0.490234375, "learning_rate": 9.625166393301434e-06, "loss": 2.0988, "step": 19515 }, { "epoch": 0.6296613706894987, "grad_norm": 0.4296875, "learning_rate": 9.623697188049586e-06, "loss": 2.1196, "step": 19516 }, { "epoch": 0.6296936345432951, "grad_norm": 0.51171875, "learning_rate": 9.622228041976216e-06, "loss": 2.0813, "step": 19517 }, { "epoch": 0.6297258983970914, "grad_norm": 0.435546875, "learning_rate": 9.620758955097505e-06, "loss": 2.0765, "step": 19518 }, { "epoch": 0.6297581622508878, "grad_norm": 0.427734375, "learning_rate": 9.61928992742962e-06, "loss": 2.1114, "step": 19519 }, { "epoch": 0.629790426104684, "grad_norm": 0.396484375, "learning_rate": 9.617820958988734e-06, "loss": 2.1207, "step": 19520 }, { "epoch": 0.6298226899584805, "grad_norm": 0.4296875, "learning_rate": 9.61635204979101e-06, "loss": 2.0621, "step": 19521 }, { "epoch": 0.6298549538122769, "grad_norm": 0.3984375, "learning_rate": 9.61488319985262e-06, "loss": 2.1047, "step": 19522 }, { "epoch": 0.6298872176660731, "grad_norm": 0.419921875, "learning_rate": 9.613414409189736e-06, "loss": 2.1245, "step": 19523 }, { "epoch": 0.6299194815198695, "grad_norm": 0.416015625, "learning_rate": 9.611945677818519e-06, "loss": 2.0736, "step": 19524 }, { "epoch": 0.6299517453736658, "grad_norm": 0.3828125, "learning_rate": 9.61047700575514e-06, "loss": 2.0977, "step": 19525 }, { "epoch": 0.6299840092274622, "grad_norm": 0.39453125, "learning_rate": 9.609008393015765e-06, "loss": 2.1151, "step": 19526 }, { "epoch": 0.6300162730812585, "grad_norm": 0.462890625, "learning_rate": 9.607539839616555e-06, "loss": 2.0763, "step": 19527 }, { "epoch": 0.6300485369350549, "grad_norm": 0.40625, "learning_rate": 9.606071345573681e-06, "loss": 2.1045, "step": 19528 }, { "epoch": 0.6300808007888512, "grad_norm": 0.4453125, "learning_rate": 9.604602910903304e-06, "loss": 2.0795, "step": 19529 }, { "epoch": 0.6301130646426476, "grad_norm": 0.412109375, "learning_rate": 9.603134535621585e-06, "loss": 2.0199, "step": 19530 }, { "epoch": 0.6301453284964439, "grad_norm": 0.396484375, "learning_rate": 9.601666219744689e-06, "loss": 2.024, "step": 19531 }, { "epoch": 0.6301775923502403, "grad_norm": 0.384765625, "learning_rate": 9.60019796328878e-06, "loss": 2.0204, "step": 19532 }, { "epoch": 0.6302098562040366, "grad_norm": 0.359375, "learning_rate": 9.598729766270019e-06, "loss": 2.0058, "step": 19533 }, { "epoch": 0.630242120057833, "grad_norm": 0.365234375, "learning_rate": 9.597261628704563e-06, "loss": 1.9783, "step": 19534 }, { "epoch": 0.6302743839116293, "grad_norm": 0.3828125, "learning_rate": 9.595793550608583e-06, "loss": 1.9391, "step": 19535 }, { "epoch": 0.6303066477654257, "grad_norm": 0.373046875, "learning_rate": 9.59432553199822e-06, "loss": 1.9376, "step": 19536 }, { "epoch": 0.630338911619222, "grad_norm": 0.349609375, "learning_rate": 9.592857572889648e-06, "loss": 1.9584, "step": 19537 }, { "epoch": 0.6303711754730184, "grad_norm": 0.326171875, "learning_rate": 9.591389673299025e-06, "loss": 1.9728, "step": 19538 }, { "epoch": 0.6304034393268146, "grad_norm": 0.345703125, "learning_rate": 9.589921833242497e-06, "loss": 1.9598, "step": 19539 }, { "epoch": 0.630435703180611, "grad_norm": 0.3359375, "learning_rate": 9.58845405273623e-06, "loss": 1.9219, "step": 19540 }, { "epoch": 0.6304679670344073, "grad_norm": 0.345703125, "learning_rate": 9.586986331796385e-06, "loss": 1.8797, "step": 19541 }, { "epoch": 0.6305002308882037, "grad_norm": 0.328125, "learning_rate": 9.585518670439105e-06, "loss": 1.9256, "step": 19542 }, { "epoch": 0.6305324947420001, "grad_norm": 0.34375, "learning_rate": 9.584051068680548e-06, "loss": 1.935, "step": 19543 }, { "epoch": 0.6305647585957964, "grad_norm": 0.326171875, "learning_rate": 9.582583526536882e-06, "loss": 1.973, "step": 19544 }, { "epoch": 0.6305970224495928, "grad_norm": 0.341796875, "learning_rate": 9.581116044024242e-06, "loss": 1.9419, "step": 19545 }, { "epoch": 0.6306292863033891, "grad_norm": 0.33984375, "learning_rate": 9.579648621158791e-06, "loss": 1.965, "step": 19546 }, { "epoch": 0.6306615501571855, "grad_norm": 0.322265625, "learning_rate": 9.578181257956681e-06, "loss": 1.9559, "step": 19547 }, { "epoch": 0.6306938140109818, "grad_norm": 0.330078125, "learning_rate": 9.576713954434062e-06, "loss": 1.9741, "step": 19548 }, { "epoch": 0.6307260778647782, "grad_norm": 0.341796875, "learning_rate": 9.575246710607082e-06, "loss": 1.9619, "step": 19549 }, { "epoch": 0.6307583417185745, "grad_norm": 0.345703125, "learning_rate": 9.573779526491894e-06, "loss": 1.9402, "step": 19550 }, { "epoch": 0.6307906055723709, "grad_norm": 0.333984375, "learning_rate": 9.572312402104652e-06, "loss": 1.9441, "step": 19551 }, { "epoch": 0.6308228694261672, "grad_norm": 0.333984375, "learning_rate": 9.570845337461499e-06, "loss": 1.9419, "step": 19552 }, { "epoch": 0.6308551332799636, "grad_norm": 0.330078125, "learning_rate": 9.569378332578584e-06, "loss": 1.9756, "step": 19553 }, { "epoch": 0.6308873971337599, "grad_norm": 0.35546875, "learning_rate": 9.567911387472064e-06, "loss": 1.9794, "step": 19554 }, { "epoch": 0.6309196609875563, "grad_norm": 0.365234375, "learning_rate": 9.566444502158071e-06, "loss": 1.9758, "step": 19555 }, { "epoch": 0.6309519248413525, "grad_norm": 0.33984375, "learning_rate": 9.56497767665276e-06, "loss": 1.9445, "step": 19556 }, { "epoch": 0.6309841886951489, "grad_norm": 0.32421875, "learning_rate": 9.563510910972278e-06, "loss": 1.9413, "step": 19557 }, { "epoch": 0.6310164525489452, "grad_norm": 0.341796875, "learning_rate": 9.562044205132767e-06, "loss": 1.9422, "step": 19558 }, { "epoch": 0.6310487164027416, "grad_norm": 0.330078125, "learning_rate": 9.56057755915037e-06, "loss": 1.9283, "step": 19559 }, { "epoch": 0.6310809802565379, "grad_norm": 0.32421875, "learning_rate": 9.559110973041237e-06, "loss": 1.9407, "step": 19560 }, { "epoch": 0.6311132441103343, "grad_norm": 0.322265625, "learning_rate": 9.557644446821504e-06, "loss": 1.9305, "step": 19561 }, { "epoch": 0.6311455079641307, "grad_norm": 0.330078125, "learning_rate": 9.556177980507315e-06, "loss": 1.9611, "step": 19562 }, { "epoch": 0.631177771817927, "grad_norm": 0.396484375, "learning_rate": 9.55471157411482e-06, "loss": 1.9423, "step": 19563 }, { "epoch": 0.6312100356717234, "grad_norm": 0.376953125, "learning_rate": 9.553245227660149e-06, "loss": 1.9802, "step": 19564 }, { "epoch": 0.6312422995255197, "grad_norm": 0.34375, "learning_rate": 9.551778941159447e-06, "loss": 1.9794, "step": 19565 }, { "epoch": 0.6312745633793161, "grad_norm": 0.34375, "learning_rate": 9.55031271462886e-06, "loss": 1.9525, "step": 19566 }, { "epoch": 0.6313068272331124, "grad_norm": 0.3671875, "learning_rate": 9.54884654808451e-06, "loss": 1.9577, "step": 19567 }, { "epoch": 0.6313390910869088, "grad_norm": 0.345703125, "learning_rate": 9.54738044154255e-06, "loss": 1.9393, "step": 19568 }, { "epoch": 0.6313713549407051, "grad_norm": 0.337890625, "learning_rate": 9.545914395019124e-06, "loss": 1.941, "step": 19569 }, { "epoch": 0.6314036187945015, "grad_norm": 0.359375, "learning_rate": 9.544448408530351e-06, "loss": 1.9409, "step": 19570 }, { "epoch": 0.6314358826482978, "grad_norm": 0.357421875, "learning_rate": 9.542982482092372e-06, "loss": 1.9387, "step": 19571 }, { "epoch": 0.6314681465020942, "grad_norm": 0.337890625, "learning_rate": 9.541516615721338e-06, "loss": 1.9666, "step": 19572 }, { "epoch": 0.6315004103558904, "grad_norm": 0.361328125, "learning_rate": 9.540050809433367e-06, "loss": 1.969, "step": 19573 }, { "epoch": 0.6315326742096868, "grad_norm": 0.45703125, "learning_rate": 9.538585063244595e-06, "loss": 1.9578, "step": 19574 }, { "epoch": 0.6315649380634831, "grad_norm": 0.333984375, "learning_rate": 9.53711937717117e-06, "loss": 1.9351, "step": 19575 }, { "epoch": 0.6315972019172795, "grad_norm": 0.3828125, "learning_rate": 9.53565375122921e-06, "loss": 1.9527, "step": 19576 }, { "epoch": 0.6316294657710758, "grad_norm": 0.33984375, "learning_rate": 9.534188185434855e-06, "loss": 1.9119, "step": 19577 }, { "epoch": 0.6316617296248722, "grad_norm": 0.328125, "learning_rate": 9.532722679804238e-06, "loss": 1.9483, "step": 19578 }, { "epoch": 0.6316939934786685, "grad_norm": 0.3359375, "learning_rate": 9.531257234353485e-06, "loss": 1.954, "step": 19579 }, { "epoch": 0.6317262573324649, "grad_norm": 0.33203125, "learning_rate": 9.529791849098727e-06, "loss": 1.9415, "step": 19580 }, { "epoch": 0.6317585211862612, "grad_norm": 0.34765625, "learning_rate": 9.5283265240561e-06, "loss": 2.0078, "step": 19581 }, { "epoch": 0.6317907850400576, "grad_norm": 0.3359375, "learning_rate": 9.526861259241725e-06, "loss": 1.9631, "step": 19582 }, { "epoch": 0.631823048893854, "grad_norm": 0.33984375, "learning_rate": 9.525396054671738e-06, "loss": 1.9516, "step": 19583 }, { "epoch": 0.6318553127476503, "grad_norm": 0.333984375, "learning_rate": 9.523930910362259e-06, "loss": 1.9206, "step": 19584 }, { "epoch": 0.6318875766014467, "grad_norm": 0.33984375, "learning_rate": 9.522465826329427e-06, "loss": 1.9112, "step": 19585 }, { "epoch": 0.631919840455243, "grad_norm": 0.369140625, "learning_rate": 9.52100080258936e-06, "loss": 1.9993, "step": 19586 }, { "epoch": 0.6319521043090394, "grad_norm": 0.33984375, "learning_rate": 9.519535839158182e-06, "loss": 1.9351, "step": 19587 }, { "epoch": 0.6319843681628357, "grad_norm": 0.330078125, "learning_rate": 9.518070936052025e-06, "loss": 1.9626, "step": 19588 }, { "epoch": 0.632016632016632, "grad_norm": 0.380859375, "learning_rate": 9.516606093287009e-06, "loss": 1.9627, "step": 19589 }, { "epoch": 0.6320488958704283, "grad_norm": 0.359375, "learning_rate": 9.515141310879257e-06, "loss": 1.9981, "step": 19590 }, { "epoch": 0.6320811597242247, "grad_norm": 0.330078125, "learning_rate": 9.5136765888449e-06, "loss": 1.9428, "step": 19591 }, { "epoch": 0.632113423578021, "grad_norm": 0.388671875, "learning_rate": 9.51221192720005e-06, "loss": 1.9041, "step": 19592 }, { "epoch": 0.6321456874318174, "grad_norm": 0.341796875, "learning_rate": 9.510747325960837e-06, "loss": 1.9536, "step": 19593 }, { "epoch": 0.6321779512856137, "grad_norm": 0.34375, "learning_rate": 9.50928278514338e-06, "loss": 1.9457, "step": 19594 }, { "epoch": 0.6322102151394101, "grad_norm": 0.33984375, "learning_rate": 9.5078183047638e-06, "loss": 1.955, "step": 19595 }, { "epoch": 0.6322424789932064, "grad_norm": 0.337890625, "learning_rate": 9.506353884838212e-06, "loss": 1.9641, "step": 19596 }, { "epoch": 0.6322747428470028, "grad_norm": 0.341796875, "learning_rate": 9.504889525382747e-06, "loss": 1.9059, "step": 19597 }, { "epoch": 0.6323070067007991, "grad_norm": 0.36328125, "learning_rate": 9.503425226413507e-06, "loss": 1.957, "step": 19598 }, { "epoch": 0.6323392705545955, "grad_norm": 0.3203125, "learning_rate": 9.50196098794662e-06, "loss": 1.9543, "step": 19599 }, { "epoch": 0.6323715344083918, "grad_norm": 0.392578125, "learning_rate": 9.500496809998211e-06, "loss": 1.9105, "step": 19600 }, { "epoch": 0.6324037982621882, "grad_norm": 0.373046875, "learning_rate": 9.49903269258438e-06, "loss": 1.9907, "step": 19601 }, { "epoch": 0.6324360621159846, "grad_norm": 0.32421875, "learning_rate": 9.497568635721247e-06, "loss": 1.9632, "step": 19602 }, { "epoch": 0.6324683259697809, "grad_norm": 0.3515625, "learning_rate": 9.496104639424941e-06, "loss": 1.9525, "step": 19603 }, { "epoch": 0.6325005898235773, "grad_norm": 0.357421875, "learning_rate": 9.494640703711562e-06, "loss": 1.9314, "step": 19604 }, { "epoch": 0.6325328536773736, "grad_norm": 0.33203125, "learning_rate": 9.493176828597225e-06, "loss": 1.9355, "step": 19605 }, { "epoch": 0.63256511753117, "grad_norm": 0.365234375, "learning_rate": 9.491713014098055e-06, "loss": 1.9846, "step": 19606 }, { "epoch": 0.6325973813849662, "grad_norm": 0.326171875, "learning_rate": 9.490249260230151e-06, "loss": 1.9389, "step": 19607 }, { "epoch": 0.6326296452387626, "grad_norm": 0.33984375, "learning_rate": 9.488785567009629e-06, "loss": 1.9401, "step": 19608 }, { "epoch": 0.6326619090925589, "grad_norm": 0.3359375, "learning_rate": 9.487321934452606e-06, "loss": 1.9497, "step": 19609 }, { "epoch": 0.6326941729463553, "grad_norm": 0.33984375, "learning_rate": 9.485858362575186e-06, "loss": 1.9238, "step": 19610 }, { "epoch": 0.6327264368001516, "grad_norm": 0.333984375, "learning_rate": 9.48439485139348e-06, "loss": 1.9595, "step": 19611 }, { "epoch": 0.632758700653948, "grad_norm": 0.33203125, "learning_rate": 9.482931400923603e-06, "loss": 1.9299, "step": 19612 }, { "epoch": 0.6327909645077443, "grad_norm": 0.341796875, "learning_rate": 9.481468011181655e-06, "loss": 1.9367, "step": 19613 }, { "epoch": 0.6328232283615407, "grad_norm": 0.33984375, "learning_rate": 9.480004682183747e-06, "loss": 1.9527, "step": 19614 }, { "epoch": 0.632855492215337, "grad_norm": 0.359375, "learning_rate": 9.478541413945994e-06, "loss": 1.9193, "step": 19615 }, { "epoch": 0.6328877560691334, "grad_norm": 0.34765625, "learning_rate": 9.47707820648449e-06, "loss": 1.9262, "step": 19616 }, { "epoch": 0.6329200199229297, "grad_norm": 0.333984375, "learning_rate": 9.475615059815348e-06, "loss": 1.9233, "step": 19617 }, { "epoch": 0.6329522837767261, "grad_norm": 0.337890625, "learning_rate": 9.474151973954674e-06, "loss": 1.9378, "step": 19618 }, { "epoch": 0.6329845476305224, "grad_norm": 0.3359375, "learning_rate": 9.47268894891857e-06, "loss": 1.9568, "step": 19619 }, { "epoch": 0.6330168114843188, "grad_norm": 0.33984375, "learning_rate": 9.47122598472314e-06, "loss": 1.9442, "step": 19620 }, { "epoch": 0.633049075338115, "grad_norm": 0.34375, "learning_rate": 9.469763081384488e-06, "loss": 1.9453, "step": 19621 }, { "epoch": 0.6330813391919115, "grad_norm": 0.337890625, "learning_rate": 9.46830023891872e-06, "loss": 1.9501, "step": 19622 }, { "epoch": 0.6331136030457079, "grad_norm": 0.353515625, "learning_rate": 9.46683745734193e-06, "loss": 1.9605, "step": 19623 }, { "epoch": 0.6331458668995041, "grad_norm": 0.3515625, "learning_rate": 9.465374736670226e-06, "loss": 1.9797, "step": 19624 }, { "epoch": 0.6331781307533005, "grad_norm": 0.34765625, "learning_rate": 9.463912076919708e-06, "loss": 1.9344, "step": 19625 }, { "epoch": 0.6332103946070968, "grad_norm": 0.34375, "learning_rate": 9.462449478106472e-06, "loss": 1.9433, "step": 19626 }, { "epoch": 0.6332426584608932, "grad_norm": 0.328125, "learning_rate": 9.460986940246619e-06, "loss": 1.964, "step": 19627 }, { "epoch": 0.6332749223146895, "grad_norm": 0.3515625, "learning_rate": 9.459524463356255e-06, "loss": 1.9378, "step": 19628 }, { "epoch": 0.6333071861684859, "grad_norm": 0.33984375, "learning_rate": 9.458062047451461e-06, "loss": 1.9716, "step": 19629 }, { "epoch": 0.6333394500222822, "grad_norm": 0.3515625, "learning_rate": 9.456599692548348e-06, "loss": 1.9538, "step": 19630 }, { "epoch": 0.6333717138760786, "grad_norm": 0.3515625, "learning_rate": 9.455137398663014e-06, "loss": 1.9364, "step": 19631 }, { "epoch": 0.6334039777298749, "grad_norm": 0.345703125, "learning_rate": 9.453675165811543e-06, "loss": 1.8719, "step": 19632 }, { "epoch": 0.6334362415836713, "grad_norm": 0.35546875, "learning_rate": 9.452212994010035e-06, "loss": 1.9303, "step": 19633 }, { "epoch": 0.6334685054374676, "grad_norm": 0.34765625, "learning_rate": 9.450750883274593e-06, "loss": 1.9538, "step": 19634 }, { "epoch": 0.633500769291264, "grad_norm": 0.328125, "learning_rate": 9.449288833621302e-06, "loss": 1.9586, "step": 19635 }, { "epoch": 0.6335330331450603, "grad_norm": 0.33203125, "learning_rate": 9.447826845066256e-06, "loss": 1.9343, "step": 19636 }, { "epoch": 0.6335652969988567, "grad_norm": 0.326171875, "learning_rate": 9.446364917625552e-06, "loss": 1.9598, "step": 19637 }, { "epoch": 0.633597560852653, "grad_norm": 0.326171875, "learning_rate": 9.444903051315275e-06, "loss": 1.963, "step": 19638 }, { "epoch": 0.6336298247064494, "grad_norm": 0.34375, "learning_rate": 9.44344124615152e-06, "loss": 1.9251, "step": 19639 }, { "epoch": 0.6336620885602456, "grad_norm": 0.326171875, "learning_rate": 9.441979502150382e-06, "loss": 1.9101, "step": 19640 }, { "epoch": 0.633694352414042, "grad_norm": 0.333984375, "learning_rate": 9.440517819327943e-06, "loss": 1.9048, "step": 19641 }, { "epoch": 0.6337266162678383, "grad_norm": 0.345703125, "learning_rate": 9.439056197700292e-06, "loss": 1.9725, "step": 19642 }, { "epoch": 0.6337588801216347, "grad_norm": 0.3203125, "learning_rate": 9.43759463728353e-06, "loss": 1.951, "step": 19643 }, { "epoch": 0.6337911439754311, "grad_norm": 0.33203125, "learning_rate": 9.43613313809373e-06, "loss": 1.9239, "step": 19644 }, { "epoch": 0.6338234078292274, "grad_norm": 0.326171875, "learning_rate": 9.434671700146984e-06, "loss": 1.9603, "step": 19645 }, { "epoch": 0.6338556716830238, "grad_norm": 0.34765625, "learning_rate": 9.433210323459382e-06, "loss": 1.9327, "step": 19646 }, { "epoch": 0.6338879355368201, "grad_norm": 0.33203125, "learning_rate": 9.431749008047007e-06, "loss": 1.9287, "step": 19647 }, { "epoch": 0.6339201993906165, "grad_norm": 0.33984375, "learning_rate": 9.430287753925942e-06, "loss": 1.9313, "step": 19648 }, { "epoch": 0.6339524632444128, "grad_norm": 0.345703125, "learning_rate": 9.428826561112276e-06, "loss": 1.9146, "step": 19649 }, { "epoch": 0.6339847270982092, "grad_norm": 0.33984375, "learning_rate": 9.427365429622089e-06, "loss": 1.9729, "step": 19650 }, { "epoch": 0.6340169909520055, "grad_norm": 0.375, "learning_rate": 9.425904359471464e-06, "loss": 1.9849, "step": 19651 }, { "epoch": 0.6340492548058019, "grad_norm": 0.34375, "learning_rate": 9.424443350676485e-06, "loss": 1.9117, "step": 19652 }, { "epoch": 0.6340815186595982, "grad_norm": 0.380859375, "learning_rate": 9.422982403253238e-06, "loss": 1.9332, "step": 19653 }, { "epoch": 0.6341137825133946, "grad_norm": 0.357421875, "learning_rate": 9.4215215172178e-06, "loss": 1.9286, "step": 19654 }, { "epoch": 0.6341460463671909, "grad_norm": 0.349609375, "learning_rate": 9.420060692586246e-06, "loss": 1.9499, "step": 19655 }, { "epoch": 0.6341783102209873, "grad_norm": 0.349609375, "learning_rate": 9.418599929374666e-06, "loss": 1.902, "step": 19656 }, { "epoch": 0.6342105740747835, "grad_norm": 0.3671875, "learning_rate": 9.41713922759913e-06, "loss": 1.9558, "step": 19657 }, { "epoch": 0.6342428379285799, "grad_norm": 0.345703125, "learning_rate": 9.41567858727572e-06, "loss": 1.955, "step": 19658 }, { "epoch": 0.6342751017823762, "grad_norm": 0.35546875, "learning_rate": 9.414218008420522e-06, "loss": 1.9467, "step": 19659 }, { "epoch": 0.6343073656361726, "grad_norm": 0.349609375, "learning_rate": 9.412757491049595e-06, "loss": 1.953, "step": 19660 }, { "epoch": 0.6343396294899689, "grad_norm": 0.34765625, "learning_rate": 9.411297035179026e-06, "loss": 1.9571, "step": 19661 }, { "epoch": 0.6343718933437653, "grad_norm": 0.33984375, "learning_rate": 9.4098366408249e-06, "loss": 1.9185, "step": 19662 }, { "epoch": 0.6344041571975617, "grad_norm": 0.345703125, "learning_rate": 9.408376308003273e-06, "loss": 1.9177, "step": 19663 }, { "epoch": 0.634436421051358, "grad_norm": 0.369140625, "learning_rate": 9.406916036730227e-06, "loss": 1.9496, "step": 19664 }, { "epoch": 0.6344686849051544, "grad_norm": 0.33984375, "learning_rate": 9.405455827021843e-06, "loss": 1.8935, "step": 19665 }, { "epoch": 0.6345009487589507, "grad_norm": 0.34765625, "learning_rate": 9.403995678894185e-06, "loss": 1.9039, "step": 19666 }, { "epoch": 0.6345332126127471, "grad_norm": 0.33984375, "learning_rate": 9.402535592363326e-06, "loss": 1.9264, "step": 19667 }, { "epoch": 0.6345654764665434, "grad_norm": 0.33984375, "learning_rate": 9.401075567445342e-06, "loss": 1.8631, "step": 19668 }, { "epoch": 0.6345977403203398, "grad_norm": 0.341796875, "learning_rate": 9.399615604156299e-06, "loss": 1.9413, "step": 19669 }, { "epoch": 0.6346300041741361, "grad_norm": 0.333984375, "learning_rate": 9.398155702512272e-06, "loss": 1.9118, "step": 19670 }, { "epoch": 0.6346622680279325, "grad_norm": 0.349609375, "learning_rate": 9.39669586252933e-06, "loss": 1.9372, "step": 19671 }, { "epoch": 0.6346945318817288, "grad_norm": 0.34765625, "learning_rate": 9.395236084223535e-06, "loss": 1.9606, "step": 19672 }, { "epoch": 0.6347267957355252, "grad_norm": 0.337890625, "learning_rate": 9.393776367610963e-06, "loss": 1.9143, "step": 19673 }, { "epoch": 0.6347590595893214, "grad_norm": 0.345703125, "learning_rate": 9.392316712707681e-06, "loss": 1.9626, "step": 19674 }, { "epoch": 0.6347913234431178, "grad_norm": 0.34765625, "learning_rate": 9.39085711952975e-06, "loss": 1.9, "step": 19675 }, { "epoch": 0.6348235872969141, "grad_norm": 0.333984375, "learning_rate": 9.389397588093241e-06, "loss": 1.9249, "step": 19676 }, { "epoch": 0.6348558511507105, "grad_norm": 0.337890625, "learning_rate": 9.38793811841422e-06, "loss": 1.9477, "step": 19677 }, { "epoch": 0.6348881150045068, "grad_norm": 0.353515625, "learning_rate": 9.38647871050875e-06, "loss": 1.9691, "step": 19678 }, { "epoch": 0.6349203788583032, "grad_norm": 0.337890625, "learning_rate": 9.385019364392892e-06, "loss": 1.9004, "step": 19679 }, { "epoch": 0.6349526427120995, "grad_norm": 0.353515625, "learning_rate": 9.383560080082716e-06, "loss": 1.9466, "step": 19680 }, { "epoch": 0.6349849065658959, "grad_norm": 0.34375, "learning_rate": 9.382100857594279e-06, "loss": 1.944, "step": 19681 }, { "epoch": 0.6350171704196922, "grad_norm": 0.341796875, "learning_rate": 9.380641696943646e-06, "loss": 1.9451, "step": 19682 }, { "epoch": 0.6350494342734886, "grad_norm": 0.345703125, "learning_rate": 9.379182598146877e-06, "loss": 1.9405, "step": 19683 }, { "epoch": 0.635081698127285, "grad_norm": 0.365234375, "learning_rate": 9.377723561220037e-06, "loss": 1.9414, "step": 19684 }, { "epoch": 0.6351139619810813, "grad_norm": 0.349609375, "learning_rate": 9.37626458617918e-06, "loss": 1.9257, "step": 19685 }, { "epoch": 0.6351462258348777, "grad_norm": 0.341796875, "learning_rate": 9.37480567304037e-06, "loss": 1.9737, "step": 19686 }, { "epoch": 0.635178489688674, "grad_norm": 0.373046875, "learning_rate": 9.373346821819668e-06, "loss": 1.9438, "step": 19687 }, { "epoch": 0.6352107535424704, "grad_norm": 0.3359375, "learning_rate": 9.371888032533116e-06, "loss": 1.9192, "step": 19688 }, { "epoch": 0.6352430173962667, "grad_norm": 0.37890625, "learning_rate": 9.37042930519679e-06, "loss": 1.9565, "step": 19689 }, { "epoch": 0.635275281250063, "grad_norm": 0.330078125, "learning_rate": 9.368970639826741e-06, "loss": 1.9232, "step": 19690 }, { "epoch": 0.6353075451038593, "grad_norm": 0.345703125, "learning_rate": 9.367512036439018e-06, "loss": 1.94, "step": 19691 }, { "epoch": 0.6353398089576557, "grad_norm": 0.337890625, "learning_rate": 9.366053495049685e-06, "loss": 1.9311, "step": 19692 }, { "epoch": 0.635372072811452, "grad_norm": 0.34765625, "learning_rate": 9.364595015674797e-06, "loss": 1.9575, "step": 19693 }, { "epoch": 0.6354043366652484, "grad_norm": 0.353515625, "learning_rate": 9.3631365983304e-06, "loss": 1.9253, "step": 19694 }, { "epoch": 0.6354366005190447, "grad_norm": 0.357421875, "learning_rate": 9.361678243032549e-06, "loss": 1.9226, "step": 19695 }, { "epoch": 0.6354688643728411, "grad_norm": 0.33984375, "learning_rate": 9.360219949797306e-06, "loss": 1.9426, "step": 19696 }, { "epoch": 0.6355011282266374, "grad_norm": 0.3359375, "learning_rate": 9.358761718640712e-06, "loss": 1.9412, "step": 19697 }, { "epoch": 0.6355333920804338, "grad_norm": 0.345703125, "learning_rate": 9.357303549578818e-06, "loss": 1.9297, "step": 19698 }, { "epoch": 0.6355656559342301, "grad_norm": 0.345703125, "learning_rate": 9.355845442627686e-06, "loss": 1.8776, "step": 19699 }, { "epoch": 0.6355979197880265, "grad_norm": 0.36328125, "learning_rate": 9.354387397803353e-06, "loss": 1.9221, "step": 19700 }, { "epoch": 0.6356301836418228, "grad_norm": 0.3359375, "learning_rate": 9.352929415121873e-06, "loss": 1.9188, "step": 19701 }, { "epoch": 0.6356624474956192, "grad_norm": 0.341796875, "learning_rate": 9.3514714945993e-06, "loss": 1.9298, "step": 19702 }, { "epoch": 0.6356947113494156, "grad_norm": 0.38671875, "learning_rate": 9.350013636251672e-06, "loss": 1.8853, "step": 19703 }, { "epoch": 0.6357269752032119, "grad_norm": 0.337890625, "learning_rate": 9.348555840095042e-06, "loss": 1.9278, "step": 19704 }, { "epoch": 0.6357592390570083, "grad_norm": 0.33203125, "learning_rate": 9.347098106145456e-06, "loss": 1.939, "step": 19705 }, { "epoch": 0.6357915029108046, "grad_norm": 0.333984375, "learning_rate": 9.345640434418957e-06, "loss": 1.9536, "step": 19706 }, { "epoch": 0.635823766764601, "grad_norm": 0.349609375, "learning_rate": 9.344182824931591e-06, "loss": 1.9433, "step": 19707 }, { "epoch": 0.6358560306183972, "grad_norm": 0.328125, "learning_rate": 9.342725277699406e-06, "loss": 1.9249, "step": 19708 }, { "epoch": 0.6358882944721936, "grad_norm": 0.33984375, "learning_rate": 9.34126779273844e-06, "loss": 1.922, "step": 19709 }, { "epoch": 0.6359205583259899, "grad_norm": 0.34765625, "learning_rate": 9.339810370064738e-06, "loss": 1.946, "step": 19710 }, { "epoch": 0.6359528221797863, "grad_norm": 0.353515625, "learning_rate": 9.338353009694346e-06, "loss": 1.9061, "step": 19711 }, { "epoch": 0.6359850860335826, "grad_norm": 0.337890625, "learning_rate": 9.336895711643301e-06, "loss": 1.9376, "step": 19712 }, { "epoch": 0.636017349887379, "grad_norm": 0.359375, "learning_rate": 9.335438475927643e-06, "loss": 1.9453, "step": 19713 }, { "epoch": 0.6360496137411753, "grad_norm": 0.328125, "learning_rate": 9.333981302563422e-06, "loss": 1.9546, "step": 19714 }, { "epoch": 0.6360818775949717, "grad_norm": 0.373046875, "learning_rate": 9.33252419156666e-06, "loss": 1.8764, "step": 19715 }, { "epoch": 0.636114141448768, "grad_norm": 0.328125, "learning_rate": 9.33106714295341e-06, "loss": 1.9437, "step": 19716 }, { "epoch": 0.6361464053025644, "grad_norm": 0.349609375, "learning_rate": 9.329610156739705e-06, "loss": 1.9308, "step": 19717 }, { "epoch": 0.6361786691563607, "grad_norm": 0.333984375, "learning_rate": 9.32815323294159e-06, "loss": 1.8969, "step": 19718 }, { "epoch": 0.6362109330101571, "grad_norm": 0.345703125, "learning_rate": 9.326696371575086e-06, "loss": 1.9485, "step": 19719 }, { "epoch": 0.6362431968639534, "grad_norm": 0.34765625, "learning_rate": 9.325239572656243e-06, "loss": 1.9381, "step": 19720 }, { "epoch": 0.6362754607177498, "grad_norm": 0.337890625, "learning_rate": 9.323782836201096e-06, "loss": 1.9485, "step": 19721 }, { "epoch": 0.636307724571546, "grad_norm": 0.369140625, "learning_rate": 9.32232616222567e-06, "loss": 1.9162, "step": 19722 }, { "epoch": 0.6363399884253425, "grad_norm": 0.345703125, "learning_rate": 9.320869550746e-06, "loss": 1.9248, "step": 19723 }, { "epoch": 0.6363722522791389, "grad_norm": 0.328125, "learning_rate": 9.319413001778134e-06, "loss": 1.9302, "step": 19724 }, { "epoch": 0.6364045161329351, "grad_norm": 0.373046875, "learning_rate": 9.317956515338088e-06, "loss": 1.9057, "step": 19725 }, { "epoch": 0.6364367799867315, "grad_norm": 0.35546875, "learning_rate": 9.316500091441897e-06, "loss": 1.901, "step": 19726 }, { "epoch": 0.6364690438405278, "grad_norm": 0.357421875, "learning_rate": 9.315043730105606e-06, "loss": 1.8901, "step": 19727 }, { "epoch": 0.6365013076943242, "grad_norm": 0.375, "learning_rate": 9.31358743134523e-06, "loss": 1.9176, "step": 19728 }, { "epoch": 0.6365335715481205, "grad_norm": 0.33984375, "learning_rate": 9.312131195176801e-06, "loss": 1.9211, "step": 19729 }, { "epoch": 0.6365658354019169, "grad_norm": 0.3671875, "learning_rate": 9.310675021616357e-06, "loss": 1.9288, "step": 19730 }, { "epoch": 0.6365980992557132, "grad_norm": 0.337890625, "learning_rate": 9.309218910679918e-06, "loss": 1.9285, "step": 19731 }, { "epoch": 0.6366303631095096, "grad_norm": 0.33984375, "learning_rate": 9.307762862383512e-06, "loss": 1.9422, "step": 19732 }, { "epoch": 0.6366626269633059, "grad_norm": 0.37109375, "learning_rate": 9.306306876743174e-06, "loss": 1.9442, "step": 19733 }, { "epoch": 0.6366948908171023, "grad_norm": 0.345703125, "learning_rate": 9.304850953774922e-06, "loss": 1.9274, "step": 19734 }, { "epoch": 0.6367271546708986, "grad_norm": 0.3515625, "learning_rate": 9.303395093494785e-06, "loss": 1.9014, "step": 19735 }, { "epoch": 0.636759418524695, "grad_norm": 0.33203125, "learning_rate": 9.30193929591879e-06, "loss": 1.9063, "step": 19736 }, { "epoch": 0.6367916823784913, "grad_norm": 0.34375, "learning_rate": 9.300483561062958e-06, "loss": 1.8902, "step": 19737 }, { "epoch": 0.6368239462322877, "grad_norm": 0.33984375, "learning_rate": 9.299027888943313e-06, "loss": 1.9463, "step": 19738 }, { "epoch": 0.636856210086084, "grad_norm": 0.33203125, "learning_rate": 9.297572279575883e-06, "loss": 1.9195, "step": 19739 }, { "epoch": 0.6368884739398804, "grad_norm": 0.3515625, "learning_rate": 9.296116732976684e-06, "loss": 1.9474, "step": 19740 }, { "epoch": 0.6369207377936766, "grad_norm": 0.3515625, "learning_rate": 9.294661249161739e-06, "loss": 1.9789, "step": 19741 }, { "epoch": 0.636953001647473, "grad_norm": 0.349609375, "learning_rate": 9.293205828147072e-06, "loss": 1.925, "step": 19742 }, { "epoch": 0.6369852655012693, "grad_norm": 0.3515625, "learning_rate": 9.2917504699487e-06, "loss": 1.9242, "step": 19743 }, { "epoch": 0.6370175293550657, "grad_norm": 0.328125, "learning_rate": 9.290295174582643e-06, "loss": 1.9385, "step": 19744 }, { "epoch": 0.6370497932088621, "grad_norm": 0.337890625, "learning_rate": 9.288839942064928e-06, "loss": 1.9246, "step": 19745 }, { "epoch": 0.6370820570626584, "grad_norm": 0.359375, "learning_rate": 9.287384772411552e-06, "loss": 1.8581, "step": 19746 }, { "epoch": 0.6371143209164548, "grad_norm": 0.337890625, "learning_rate": 9.285929665638554e-06, "loss": 1.951, "step": 19747 }, { "epoch": 0.6371465847702511, "grad_norm": 0.337890625, "learning_rate": 9.284474621761948e-06, "loss": 1.9181, "step": 19748 }, { "epoch": 0.6371788486240475, "grad_norm": 0.33984375, "learning_rate": 9.283019640797739e-06, "loss": 1.9396, "step": 19749 }, { "epoch": 0.6372111124778438, "grad_norm": 0.34765625, "learning_rate": 9.281564722761944e-06, "loss": 1.9208, "step": 19750 }, { "epoch": 0.6372433763316402, "grad_norm": 0.3359375, "learning_rate": 9.280109867670586e-06, "loss": 1.9301, "step": 19751 }, { "epoch": 0.6372756401854365, "grad_norm": 0.35546875, "learning_rate": 9.278655075539682e-06, "loss": 1.9229, "step": 19752 }, { "epoch": 0.6373079040392329, "grad_norm": 0.34375, "learning_rate": 9.277200346385233e-06, "loss": 1.9185, "step": 19753 }, { "epoch": 0.6373401678930292, "grad_norm": 0.3359375, "learning_rate": 9.275745680223252e-06, "loss": 1.9173, "step": 19754 }, { "epoch": 0.6373724317468256, "grad_norm": 0.33984375, "learning_rate": 9.274291077069765e-06, "loss": 1.9442, "step": 19755 }, { "epoch": 0.6374046956006219, "grad_norm": 0.337890625, "learning_rate": 9.272836536940769e-06, "loss": 1.8876, "step": 19756 }, { "epoch": 0.6374369594544183, "grad_norm": 0.36328125, "learning_rate": 9.271382059852276e-06, "loss": 1.9195, "step": 19757 }, { "epoch": 0.6374692233082145, "grad_norm": 0.357421875, "learning_rate": 9.269927645820311e-06, "loss": 1.911, "step": 19758 }, { "epoch": 0.6375014871620109, "grad_norm": 0.337890625, "learning_rate": 9.268473294860865e-06, "loss": 1.9559, "step": 19759 }, { "epoch": 0.6375337510158072, "grad_norm": 0.3515625, "learning_rate": 9.267019006989951e-06, "loss": 1.9154, "step": 19760 }, { "epoch": 0.6375660148696036, "grad_norm": 0.345703125, "learning_rate": 9.265564782223585e-06, "loss": 1.9393, "step": 19761 }, { "epoch": 0.6375982787233999, "grad_norm": 0.388671875, "learning_rate": 9.264110620577763e-06, "loss": 1.9244, "step": 19762 }, { "epoch": 0.6376305425771963, "grad_norm": 0.357421875, "learning_rate": 9.262656522068496e-06, "loss": 1.8853, "step": 19763 }, { "epoch": 0.6376628064309927, "grad_norm": 0.384765625, "learning_rate": 9.261202486711793e-06, "loss": 1.9168, "step": 19764 }, { "epoch": 0.637695070284789, "grad_norm": 0.33984375, "learning_rate": 9.259748514523654e-06, "loss": 1.911, "step": 19765 }, { "epoch": 0.6377273341385854, "grad_norm": 0.34375, "learning_rate": 9.258294605520086e-06, "loss": 1.8669, "step": 19766 }, { "epoch": 0.6377595979923817, "grad_norm": 0.3515625, "learning_rate": 9.256840759717093e-06, "loss": 1.901, "step": 19767 }, { "epoch": 0.6377918618461781, "grad_norm": 0.34375, "learning_rate": 9.255386977130677e-06, "loss": 1.8977, "step": 19768 }, { "epoch": 0.6378241256999744, "grad_norm": 0.357421875, "learning_rate": 9.253933257776835e-06, "loss": 1.895, "step": 19769 }, { "epoch": 0.6378563895537708, "grad_norm": 0.353515625, "learning_rate": 9.252479601671581e-06, "loss": 1.8911, "step": 19770 }, { "epoch": 0.6378886534075671, "grad_norm": 0.373046875, "learning_rate": 9.251026008830902e-06, "loss": 1.8927, "step": 19771 }, { "epoch": 0.6379209172613635, "grad_norm": 0.357421875, "learning_rate": 9.249572479270807e-06, "loss": 1.9202, "step": 19772 }, { "epoch": 0.6379531811151598, "grad_norm": 0.35546875, "learning_rate": 9.248119013007298e-06, "loss": 1.8868, "step": 19773 }, { "epoch": 0.6379854449689562, "grad_norm": 0.345703125, "learning_rate": 9.24666561005636e-06, "loss": 1.92, "step": 19774 }, { "epoch": 0.6380177088227524, "grad_norm": 0.36328125, "learning_rate": 9.245212270434002e-06, "loss": 1.9107, "step": 19775 }, { "epoch": 0.6380499726765488, "grad_norm": 0.34375, "learning_rate": 9.243758994156224e-06, "loss": 1.9103, "step": 19776 }, { "epoch": 0.6380822365303451, "grad_norm": 0.33984375, "learning_rate": 9.242305781239009e-06, "loss": 1.8595, "step": 19777 }, { "epoch": 0.6381145003841415, "grad_norm": 0.333984375, "learning_rate": 9.240852631698365e-06, "loss": 1.8371, "step": 19778 }, { "epoch": 0.6381467642379378, "grad_norm": 0.341796875, "learning_rate": 9.23939954555029e-06, "loss": 1.9261, "step": 19779 }, { "epoch": 0.6381790280917342, "grad_norm": 0.33984375, "learning_rate": 9.237946522810766e-06, "loss": 1.8634, "step": 19780 }, { "epoch": 0.6382112919455305, "grad_norm": 0.33984375, "learning_rate": 9.236493563495788e-06, "loss": 1.8998, "step": 19781 }, { "epoch": 0.6382435557993269, "grad_norm": 0.33984375, "learning_rate": 9.235040667621359e-06, "loss": 1.9173, "step": 19782 }, { "epoch": 0.6382758196531232, "grad_norm": 0.359375, "learning_rate": 9.233587835203472e-06, "loss": 1.9353, "step": 19783 }, { "epoch": 0.6383080835069196, "grad_norm": 0.330078125, "learning_rate": 9.232135066258107e-06, "loss": 1.889, "step": 19784 }, { "epoch": 0.638340347360716, "grad_norm": 0.34375, "learning_rate": 9.230682360801258e-06, "loss": 1.9104, "step": 19785 }, { "epoch": 0.6383726112145123, "grad_norm": 0.40234375, "learning_rate": 9.22922971884893e-06, "loss": 1.9004, "step": 19786 }, { "epoch": 0.6384048750683087, "grad_norm": 0.3515625, "learning_rate": 9.227777140417092e-06, "loss": 1.8806, "step": 19787 }, { "epoch": 0.638437138922105, "grad_norm": 0.37109375, "learning_rate": 9.226324625521743e-06, "loss": 1.8907, "step": 19788 }, { "epoch": 0.6384694027759014, "grad_norm": 0.376953125, "learning_rate": 9.224872174178875e-06, "loss": 1.9279, "step": 19789 }, { "epoch": 0.6385016666296977, "grad_norm": 0.431640625, "learning_rate": 9.223419786404468e-06, "loss": 1.9264, "step": 19790 }, { "epoch": 0.638533930483494, "grad_norm": 0.625, "learning_rate": 9.221967462214511e-06, "loss": 2.0086, "step": 19791 }, { "epoch": 0.6385661943372903, "grad_norm": 0.58203125, "learning_rate": 9.220515201624996e-06, "loss": 1.9517, "step": 19792 }, { "epoch": 0.6385984581910867, "grad_norm": 0.609375, "learning_rate": 9.2190630046519e-06, "loss": 1.9753, "step": 19793 }, { "epoch": 0.638630722044883, "grad_norm": 0.51953125, "learning_rate": 9.217610871311209e-06, "loss": 1.9767, "step": 19794 }, { "epoch": 0.6386629858986794, "grad_norm": 0.59765625, "learning_rate": 9.216158801618912e-06, "loss": 1.9657, "step": 19795 }, { "epoch": 0.6386952497524757, "grad_norm": 0.5234375, "learning_rate": 9.21470679559099e-06, "loss": 2.0074, "step": 19796 }, { "epoch": 0.6387275136062721, "grad_norm": 0.482421875, "learning_rate": 9.213254853243423e-06, "loss": 1.9689, "step": 19797 }, { "epoch": 0.6387597774600684, "grad_norm": 0.515625, "learning_rate": 9.2118029745922e-06, "loss": 1.9792, "step": 19798 }, { "epoch": 0.6387920413138648, "grad_norm": 0.396484375, "learning_rate": 9.210351159653291e-06, "loss": 1.9589, "step": 19799 }, { "epoch": 0.6388243051676611, "grad_norm": 0.451171875, "learning_rate": 9.208899408442685e-06, "loss": 1.9789, "step": 19800 }, { "epoch": 0.6388565690214575, "grad_norm": 0.478515625, "learning_rate": 9.207447720976362e-06, "loss": 1.9774, "step": 19801 }, { "epoch": 0.6388888328752538, "grad_norm": 0.4765625, "learning_rate": 9.205996097270296e-06, "loss": 1.9844, "step": 19802 }, { "epoch": 0.6389210967290502, "grad_norm": 0.400390625, "learning_rate": 9.20454453734047e-06, "loss": 2.0205, "step": 19803 }, { "epoch": 0.6389533605828465, "grad_norm": 0.443359375, "learning_rate": 9.203093041202865e-06, "loss": 1.9937, "step": 19804 }, { "epoch": 0.6389856244366429, "grad_norm": 0.4453125, "learning_rate": 9.201641608873443e-06, "loss": 1.9711, "step": 19805 }, { "epoch": 0.6390178882904393, "grad_norm": 0.4140625, "learning_rate": 9.200190240368194e-06, "loss": 1.9765, "step": 19806 }, { "epoch": 0.6390501521442356, "grad_norm": 0.41796875, "learning_rate": 9.1987389357031e-06, "loss": 1.9698, "step": 19807 }, { "epoch": 0.639082415998032, "grad_norm": 0.498046875, "learning_rate": 9.19728769489411e-06, "loss": 2.0135, "step": 19808 }, { "epoch": 0.6391146798518282, "grad_norm": 0.396484375, "learning_rate": 9.195836517957223e-06, "loss": 1.9577, "step": 19809 }, { "epoch": 0.6391469437056246, "grad_norm": 0.419921875, "learning_rate": 9.194385404908407e-06, "loss": 1.9936, "step": 19810 }, { "epoch": 0.6391792075594209, "grad_norm": 0.462890625, "learning_rate": 9.192934355763627e-06, "loss": 2.0148, "step": 19811 }, { "epoch": 0.6392114714132173, "grad_norm": 0.431640625, "learning_rate": 9.191483370538856e-06, "loss": 1.9867, "step": 19812 }, { "epoch": 0.6392437352670136, "grad_norm": 0.43359375, "learning_rate": 9.190032449250078e-06, "loss": 1.9718, "step": 19813 }, { "epoch": 0.63927599912081, "grad_norm": 0.44921875, "learning_rate": 9.188581591913246e-06, "loss": 2.05, "step": 19814 }, { "epoch": 0.6393082629746063, "grad_norm": 0.400390625, "learning_rate": 9.187130798544344e-06, "loss": 1.9851, "step": 19815 }, { "epoch": 0.6393405268284027, "grad_norm": 0.388671875, "learning_rate": 9.18568006915933e-06, "loss": 1.9834, "step": 19816 }, { "epoch": 0.639372790682199, "grad_norm": 0.4375, "learning_rate": 9.184229403774187e-06, "loss": 1.9585, "step": 19817 }, { "epoch": 0.6394050545359954, "grad_norm": 0.416015625, "learning_rate": 9.18277880240487e-06, "loss": 2.0245, "step": 19818 }, { "epoch": 0.6394373183897917, "grad_norm": 0.47265625, "learning_rate": 9.181328265067351e-06, "loss": 1.9761, "step": 19819 }, { "epoch": 0.6394695822435881, "grad_norm": 0.44921875, "learning_rate": 9.179877791777597e-06, "loss": 1.9403, "step": 19820 }, { "epoch": 0.6395018460973844, "grad_norm": 0.376953125, "learning_rate": 9.17842738255157e-06, "loss": 1.9671, "step": 19821 }, { "epoch": 0.6395341099511808, "grad_norm": 0.3984375, "learning_rate": 9.17697703740524e-06, "loss": 1.8768, "step": 19822 }, { "epoch": 0.639566373804977, "grad_norm": 0.400390625, "learning_rate": 9.17552675635457e-06, "loss": 1.9246, "step": 19823 }, { "epoch": 0.6395986376587735, "grad_norm": 0.3359375, "learning_rate": 9.174076539415524e-06, "loss": 1.9206, "step": 19824 }, { "epoch": 0.6396309015125698, "grad_norm": 0.37890625, "learning_rate": 9.172626386604062e-06, "loss": 1.8982, "step": 19825 }, { "epoch": 0.6396631653663661, "grad_norm": 0.373046875, "learning_rate": 9.171176297936149e-06, "loss": 1.9224, "step": 19826 }, { "epoch": 0.6396954292201625, "grad_norm": 0.380859375, "learning_rate": 9.169726273427744e-06, "loss": 1.9082, "step": 19827 }, { "epoch": 0.6397276930739588, "grad_norm": 0.36328125, "learning_rate": 9.168276313094812e-06, "loss": 1.8924, "step": 19828 }, { "epoch": 0.6397599569277552, "grad_norm": 0.37109375, "learning_rate": 9.16682641695331e-06, "loss": 1.8971, "step": 19829 }, { "epoch": 0.6397922207815515, "grad_norm": 0.345703125, "learning_rate": 9.165376585019198e-06, "loss": 1.9061, "step": 19830 }, { "epoch": 0.6398244846353479, "grad_norm": 0.35546875, "learning_rate": 9.163926817308433e-06, "loss": 1.9044, "step": 19831 }, { "epoch": 0.6398567484891442, "grad_norm": 0.36328125, "learning_rate": 9.162477113836979e-06, "loss": 1.9274, "step": 19832 }, { "epoch": 0.6398890123429406, "grad_norm": 0.341796875, "learning_rate": 9.161027474620785e-06, "loss": 1.8581, "step": 19833 }, { "epoch": 0.6399212761967369, "grad_norm": 0.37109375, "learning_rate": 9.159577899675813e-06, "loss": 1.9267, "step": 19834 }, { "epoch": 0.6399535400505333, "grad_norm": 0.341796875, "learning_rate": 9.158128389018022e-06, "loss": 1.891, "step": 19835 }, { "epoch": 0.6399858039043296, "grad_norm": 0.3515625, "learning_rate": 9.156678942663355e-06, "loss": 1.9129, "step": 19836 }, { "epoch": 0.640018067758126, "grad_norm": 0.388671875, "learning_rate": 9.155229560627776e-06, "loss": 1.9484, "step": 19837 }, { "epoch": 0.6400503316119223, "grad_norm": 0.3515625, "learning_rate": 9.153780242927246e-06, "loss": 1.9121, "step": 19838 }, { "epoch": 0.6400825954657187, "grad_norm": 0.71875, "learning_rate": 9.152330989577699e-06, "loss": 1.9543, "step": 19839 }, { "epoch": 0.640114859319515, "grad_norm": 0.3984375, "learning_rate": 9.150881800595097e-06, "loss": 1.9656, "step": 19840 }, { "epoch": 0.6401471231733113, "grad_norm": 0.75390625, "learning_rate": 9.149432675995401e-06, "loss": 1.9793, "step": 19841 }, { "epoch": 0.6401793870271076, "grad_norm": 0.609375, "learning_rate": 9.147983615794545e-06, "loss": 1.927, "step": 19842 }, { "epoch": 0.640211650880904, "grad_norm": 0.46875, "learning_rate": 9.146534620008486e-06, "loss": 1.9694, "step": 19843 }, { "epoch": 0.6402439147347003, "grad_norm": 0.5, "learning_rate": 9.145085688653182e-06, "loss": 1.9657, "step": 19844 }, { "epoch": 0.6402761785884967, "grad_norm": 0.50390625, "learning_rate": 9.14363682174457e-06, "loss": 1.9519, "step": 19845 }, { "epoch": 0.6403084424422931, "grad_norm": 0.462890625, "learning_rate": 9.142188019298601e-06, "loss": 1.9693, "step": 19846 }, { "epoch": 0.6403407062960894, "grad_norm": 0.38671875, "learning_rate": 9.140739281331228e-06, "loss": 1.9871, "step": 19847 }, { "epoch": 0.6403729701498858, "grad_norm": 0.396484375, "learning_rate": 9.13929060785839e-06, "loss": 1.9995, "step": 19848 }, { "epoch": 0.6404052340036821, "grad_norm": 0.439453125, "learning_rate": 9.137841998896036e-06, "loss": 1.9414, "step": 19849 }, { "epoch": 0.6404374978574785, "grad_norm": 0.44140625, "learning_rate": 9.136393454460112e-06, "loss": 1.9688, "step": 19850 }, { "epoch": 0.6404697617112748, "grad_norm": 0.416015625, "learning_rate": 9.134944974566566e-06, "loss": 1.9472, "step": 19851 }, { "epoch": 0.6405020255650712, "grad_norm": 0.375, "learning_rate": 9.133496559231337e-06, "loss": 1.9679, "step": 19852 }, { "epoch": 0.6405342894188675, "grad_norm": 0.3671875, "learning_rate": 9.132048208470366e-06, "loss": 1.9517, "step": 19853 }, { "epoch": 0.6405665532726639, "grad_norm": 0.40625, "learning_rate": 9.130599922299605e-06, "loss": 1.982, "step": 19854 }, { "epoch": 0.6405988171264602, "grad_norm": 0.392578125, "learning_rate": 9.129151700734985e-06, "loss": 1.9755, "step": 19855 }, { "epoch": 0.6406310809802566, "grad_norm": 0.42578125, "learning_rate": 9.127703543792449e-06, "loss": 1.9596, "step": 19856 }, { "epoch": 0.6406633448340529, "grad_norm": 0.375, "learning_rate": 9.126255451487944e-06, "loss": 1.9926, "step": 19857 }, { "epoch": 0.6406956086878492, "grad_norm": 0.396484375, "learning_rate": 9.124807423837403e-06, "loss": 1.99, "step": 19858 }, { "epoch": 0.6407278725416455, "grad_norm": 0.390625, "learning_rate": 9.123359460856767e-06, "loss": 1.9818, "step": 19859 }, { "epoch": 0.6407601363954419, "grad_norm": 0.408203125, "learning_rate": 9.121911562561975e-06, "loss": 1.9633, "step": 19860 }, { "epoch": 0.6407924002492382, "grad_norm": 0.400390625, "learning_rate": 9.120463728968962e-06, "loss": 1.9946, "step": 19861 }, { "epoch": 0.6408246641030346, "grad_norm": 0.396484375, "learning_rate": 9.119015960093668e-06, "loss": 1.8918, "step": 19862 }, { "epoch": 0.6408569279568309, "grad_norm": 0.40625, "learning_rate": 9.11756825595203e-06, "loss": 1.9591, "step": 19863 }, { "epoch": 0.6408891918106273, "grad_norm": 0.37109375, "learning_rate": 9.116120616559975e-06, "loss": 1.9586, "step": 19864 }, { "epoch": 0.6409214556644237, "grad_norm": 0.412109375, "learning_rate": 9.114673041933444e-06, "loss": 1.9768, "step": 19865 }, { "epoch": 0.64095371951822, "grad_norm": 0.392578125, "learning_rate": 9.113225532088377e-06, "loss": 1.9625, "step": 19866 }, { "epoch": 0.6409859833720164, "grad_norm": 0.37890625, "learning_rate": 9.111778087040692e-06, "loss": 1.9602, "step": 19867 }, { "epoch": 0.6410182472258127, "grad_norm": 0.380859375, "learning_rate": 9.110330706806333e-06, "loss": 1.9611, "step": 19868 }, { "epoch": 0.6410505110796091, "grad_norm": 0.37890625, "learning_rate": 9.108883391401233e-06, "loss": 1.9704, "step": 19869 }, { "epoch": 0.6410827749334054, "grad_norm": 0.3828125, "learning_rate": 9.107436140841312e-06, "loss": 1.9775, "step": 19870 }, { "epoch": 0.6411150387872018, "grad_norm": 0.3828125, "learning_rate": 9.105988955142503e-06, "loss": 2.0036, "step": 19871 }, { "epoch": 0.6411473026409981, "grad_norm": 0.4140625, "learning_rate": 9.104541834320749e-06, "loss": 1.9633, "step": 19872 }, { "epoch": 0.6411795664947945, "grad_norm": 0.365234375, "learning_rate": 9.103094778391965e-06, "loss": 1.978, "step": 19873 }, { "epoch": 0.6412118303485908, "grad_norm": 0.3984375, "learning_rate": 9.101647787372081e-06, "loss": 1.9518, "step": 19874 }, { "epoch": 0.6412440942023871, "grad_norm": 0.375, "learning_rate": 9.100200861277029e-06, "loss": 2.0119, "step": 19875 }, { "epoch": 0.6412763580561834, "grad_norm": 0.36328125, "learning_rate": 9.098754000122733e-06, "loss": 1.9448, "step": 19876 }, { "epoch": 0.6413086219099798, "grad_norm": 0.390625, "learning_rate": 9.097307203925118e-06, "loss": 1.9702, "step": 19877 }, { "epoch": 0.6413408857637761, "grad_norm": 0.38671875, "learning_rate": 9.095860472700114e-06, "loss": 1.9769, "step": 19878 }, { "epoch": 0.6413731496175725, "grad_norm": 0.3515625, "learning_rate": 9.09441380646364e-06, "loss": 1.977, "step": 19879 }, { "epoch": 0.6414054134713688, "grad_norm": 0.361328125, "learning_rate": 9.09296720523162e-06, "loss": 1.9831, "step": 19880 }, { "epoch": 0.6414376773251652, "grad_norm": 0.3671875, "learning_rate": 9.09152066901998e-06, "loss": 1.9818, "step": 19881 }, { "epoch": 0.6414699411789615, "grad_norm": 0.369140625, "learning_rate": 9.090074197844647e-06, "loss": 2.0018, "step": 19882 }, { "epoch": 0.6415022050327579, "grad_norm": 0.369140625, "learning_rate": 9.088627791721532e-06, "loss": 1.9933, "step": 19883 }, { "epoch": 0.6415344688865542, "grad_norm": 0.37890625, "learning_rate": 9.087181450666561e-06, "loss": 1.9699, "step": 19884 }, { "epoch": 0.6415667327403506, "grad_norm": 0.396484375, "learning_rate": 9.085735174695659e-06, "loss": 2.0063, "step": 19885 }, { "epoch": 0.641598996594147, "grad_norm": 0.51171875, "learning_rate": 9.08428896382474e-06, "loss": 2.0411, "step": 19886 }, { "epoch": 0.6416312604479433, "grad_norm": 0.439453125, "learning_rate": 9.082842818069722e-06, "loss": 2.057, "step": 19887 }, { "epoch": 0.6416635243017397, "grad_norm": 0.45703125, "learning_rate": 9.081396737446528e-06, "loss": 2.1239, "step": 19888 }, { "epoch": 0.641695788155536, "grad_norm": 0.53515625, "learning_rate": 9.079950721971071e-06, "loss": 2.1384, "step": 19889 }, { "epoch": 0.6417280520093324, "grad_norm": 0.51171875, "learning_rate": 9.078504771659269e-06, "loss": 2.1103, "step": 19890 }, { "epoch": 0.6417603158631286, "grad_norm": 0.474609375, "learning_rate": 9.077058886527041e-06, "loss": 2.1053, "step": 19891 }, { "epoch": 0.641792579716925, "grad_norm": 0.53125, "learning_rate": 9.075613066590297e-06, "loss": 2.1388, "step": 19892 }, { "epoch": 0.6418248435707213, "grad_norm": 0.48046875, "learning_rate": 9.074167311864956e-06, "loss": 2.1389, "step": 19893 }, { "epoch": 0.6418571074245177, "grad_norm": 0.486328125, "learning_rate": 9.07272162236693e-06, "loss": 2.1521, "step": 19894 }, { "epoch": 0.641889371278314, "grad_norm": 0.546875, "learning_rate": 9.071275998112131e-06, "loss": 2.1369, "step": 19895 }, { "epoch": 0.6419216351321104, "grad_norm": 0.578125, "learning_rate": 9.069830439116474e-06, "loss": 2.1037, "step": 19896 }, { "epoch": 0.6419538989859067, "grad_norm": 0.65625, "learning_rate": 9.068384945395872e-06, "loss": 2.1423, "step": 19897 }, { "epoch": 0.6419861628397031, "grad_norm": 0.65234375, "learning_rate": 9.066939516966227e-06, "loss": 2.1202, "step": 19898 }, { "epoch": 0.6420184266934994, "grad_norm": 0.50390625, "learning_rate": 9.065494153843455e-06, "loss": 2.1369, "step": 19899 }, { "epoch": 0.6420506905472958, "grad_norm": 0.59375, "learning_rate": 9.064048856043475e-06, "loss": 2.1494, "step": 19900 }, { "epoch": 0.6420829544010921, "grad_norm": 0.6953125, "learning_rate": 9.06260362358218e-06, "loss": 2.1319, "step": 19901 }, { "epoch": 0.6421152182548885, "grad_norm": 0.447265625, "learning_rate": 9.06115845647548e-06, "loss": 2.1241, "step": 19902 }, { "epoch": 0.6421474821086848, "grad_norm": 0.6875, "learning_rate": 9.059713354739297e-06, "loss": 2.1121, "step": 19903 }, { "epoch": 0.6421797459624812, "grad_norm": 0.6796875, "learning_rate": 9.058268318389524e-06, "loss": 2.1434, "step": 19904 }, { "epoch": 0.6422120098162775, "grad_norm": 0.55078125, "learning_rate": 9.05682334744207e-06, "loss": 2.1386, "step": 19905 }, { "epoch": 0.6422442736700739, "grad_norm": 0.79296875, "learning_rate": 9.055378441912843e-06, "loss": 2.1348, "step": 19906 }, { "epoch": 0.6422765375238703, "grad_norm": 0.5, "learning_rate": 9.053933601817743e-06, "loss": 2.1408, "step": 19907 }, { "epoch": 0.6423088013776665, "grad_norm": 0.65234375, "learning_rate": 9.052488827172677e-06, "loss": 2.0958, "step": 19908 }, { "epoch": 0.642341065231463, "grad_norm": 0.462890625, "learning_rate": 9.051044117993547e-06, "loss": 2.1031, "step": 19909 }, { "epoch": 0.6423733290852592, "grad_norm": 0.51953125, "learning_rate": 9.049599474296256e-06, "loss": 2.045, "step": 19910 }, { "epoch": 0.6424055929390556, "grad_norm": 0.44140625, "learning_rate": 9.048154896096704e-06, "loss": 2.0434, "step": 19911 }, { "epoch": 0.6424378567928519, "grad_norm": 0.458984375, "learning_rate": 9.046710383410795e-06, "loss": 2.0597, "step": 19912 }, { "epoch": 0.6424701206466483, "grad_norm": 0.498046875, "learning_rate": 9.045265936254426e-06, "loss": 2.0299, "step": 19913 }, { "epoch": 0.6425023845004446, "grad_norm": 0.4375, "learning_rate": 9.043821554643497e-06, "loss": 2.0754, "step": 19914 }, { "epoch": 0.642534648354241, "grad_norm": 0.52734375, "learning_rate": 9.042377238593908e-06, "loss": 2.0146, "step": 19915 }, { "epoch": 0.6425669122080373, "grad_norm": 0.4296875, "learning_rate": 9.040932988121558e-06, "loss": 2.048, "step": 19916 }, { "epoch": 0.6425991760618337, "grad_norm": 0.4765625, "learning_rate": 9.039488803242341e-06, "loss": 2.0249, "step": 19917 }, { "epoch": 0.64263143991563, "grad_norm": 0.4375, "learning_rate": 9.038044683972155e-06, "loss": 2.0533, "step": 19918 }, { "epoch": 0.6426637037694264, "grad_norm": 0.423828125, "learning_rate": 9.036600630326899e-06, "loss": 2.0479, "step": 19919 }, { "epoch": 0.6426959676232227, "grad_norm": 0.41015625, "learning_rate": 9.03515664232246e-06, "loss": 2.0421, "step": 19920 }, { "epoch": 0.6427282314770191, "grad_norm": 0.453125, "learning_rate": 9.03371271997474e-06, "loss": 2.1229, "step": 19921 }, { "epoch": 0.6427604953308154, "grad_norm": 0.4453125, "learning_rate": 9.032268863299631e-06, "loss": 2.066, "step": 19922 }, { "epoch": 0.6427927591846118, "grad_norm": 0.41796875, "learning_rate": 9.030825072313026e-06, "loss": 2.053, "step": 19923 }, { "epoch": 0.642825023038408, "grad_norm": 0.443359375, "learning_rate": 9.029381347030813e-06, "loss": 2.119, "step": 19924 }, { "epoch": 0.6428572868922044, "grad_norm": 0.58984375, "learning_rate": 9.027937687468894e-06, "loss": 2.1189, "step": 19925 }, { "epoch": 0.6428895507460008, "grad_norm": 0.546875, "learning_rate": 9.026494093643144e-06, "loss": 2.1045, "step": 19926 }, { "epoch": 0.6429218145997971, "grad_norm": 0.439453125, "learning_rate": 9.025050565569463e-06, "loss": 2.1181, "step": 19927 }, { "epoch": 0.6429540784535935, "grad_norm": 0.50390625, "learning_rate": 9.023607103263747e-06, "loss": 2.1211, "step": 19928 }, { "epoch": 0.6429863423073898, "grad_norm": 0.5, "learning_rate": 9.022163706741867e-06, "loss": 2.1229, "step": 19929 }, { "epoch": 0.6430186061611862, "grad_norm": 0.451171875, "learning_rate": 9.020720376019722e-06, "loss": 2.1383, "step": 19930 }, { "epoch": 0.6430508700149825, "grad_norm": 0.61328125, "learning_rate": 9.019277111113205e-06, "loss": 2.1634, "step": 19931 }, { "epoch": 0.6430831338687789, "grad_norm": 0.6171875, "learning_rate": 9.01783391203819e-06, "loss": 2.1159, "step": 19932 }, { "epoch": 0.6431153977225752, "grad_norm": 0.54296875, "learning_rate": 9.016390778810562e-06, "loss": 2.13, "step": 19933 }, { "epoch": 0.6431476615763716, "grad_norm": 0.458984375, "learning_rate": 9.014947711446221e-06, "loss": 2.131, "step": 19934 }, { "epoch": 0.6431799254301679, "grad_norm": 0.640625, "learning_rate": 9.013504709961038e-06, "loss": 2.1668, "step": 19935 }, { "epoch": 0.6432121892839643, "grad_norm": 0.83203125, "learning_rate": 9.012061774370898e-06, "loss": 2.1405, "step": 19936 }, { "epoch": 0.6432444531377606, "grad_norm": 0.73046875, "learning_rate": 9.010618904691692e-06, "loss": 2.139, "step": 19937 }, { "epoch": 0.643276716991557, "grad_norm": 0.5, "learning_rate": 9.009176100939291e-06, "loss": 2.1102, "step": 19938 }, { "epoch": 0.6433089808453533, "grad_norm": 0.62109375, "learning_rate": 9.007733363129584e-06, "loss": 2.1434, "step": 19939 }, { "epoch": 0.6433412446991497, "grad_norm": 0.484375, "learning_rate": 9.00629069127845e-06, "loss": 2.1321, "step": 19940 }, { "epoch": 0.643373508552946, "grad_norm": 0.54296875, "learning_rate": 9.004848085401767e-06, "loss": 2.1676, "step": 19941 }, { "epoch": 0.6434057724067423, "grad_norm": 0.5625, "learning_rate": 9.003405545515415e-06, "loss": 2.1259, "step": 19942 }, { "epoch": 0.6434380362605386, "grad_norm": 0.45703125, "learning_rate": 9.001963071635275e-06, "loss": 2.1606, "step": 19943 }, { "epoch": 0.643470300114335, "grad_norm": 0.55078125, "learning_rate": 9.00052066377722e-06, "loss": 2.1452, "step": 19944 }, { "epoch": 0.6435025639681313, "grad_norm": 0.421875, "learning_rate": 8.999078321957128e-06, "loss": 2.1162, "step": 19945 }, { "epoch": 0.6435348278219277, "grad_norm": 0.6015625, "learning_rate": 8.997636046190882e-06, "loss": 2.1184, "step": 19946 }, { "epoch": 0.6435670916757241, "grad_norm": 0.5234375, "learning_rate": 8.996193836494348e-06, "loss": 2.1681, "step": 19947 }, { "epoch": 0.6435993555295204, "grad_norm": 0.51953125, "learning_rate": 8.994751692883407e-06, "loss": 2.1569, "step": 19948 }, { "epoch": 0.6436316193833168, "grad_norm": 0.625, "learning_rate": 8.993309615373928e-06, "loss": 2.135, "step": 19949 }, { "epoch": 0.6436638832371131, "grad_norm": 0.470703125, "learning_rate": 8.991867603981794e-06, "loss": 2.1335, "step": 19950 }, { "epoch": 0.6436961470909095, "grad_norm": 0.5546875, "learning_rate": 8.990425658722866e-06, "loss": 2.1651, "step": 19951 }, { "epoch": 0.6437284109447058, "grad_norm": 0.640625, "learning_rate": 8.988983779613023e-06, "loss": 2.1001, "step": 19952 }, { "epoch": 0.6437606747985022, "grad_norm": 0.435546875, "learning_rate": 8.987541966668135e-06, "loss": 2.0963, "step": 19953 }, { "epoch": 0.6437929386522985, "grad_norm": 0.55859375, "learning_rate": 8.986100219904068e-06, "loss": 2.0114, "step": 19954 }, { "epoch": 0.6438252025060949, "grad_norm": 0.453125, "learning_rate": 8.984658539336698e-06, "loss": 2.0434, "step": 19955 }, { "epoch": 0.6438574663598912, "grad_norm": 0.55078125, "learning_rate": 8.983216924981897e-06, "loss": 2.0823, "step": 19956 }, { "epoch": 0.6438897302136876, "grad_norm": 0.4609375, "learning_rate": 8.981775376855518e-06, "loss": 2.0826, "step": 19957 }, { "epoch": 0.6439219940674838, "grad_norm": 0.494140625, "learning_rate": 8.980333894973441e-06, "loss": 2.0938, "step": 19958 }, { "epoch": 0.6439542579212802, "grad_norm": 0.40625, "learning_rate": 8.978892479351538e-06, "loss": 2.0753, "step": 19959 }, { "epoch": 0.6439865217750765, "grad_norm": 0.40234375, "learning_rate": 8.977451130005657e-06, "loss": 1.997, "step": 19960 }, { "epoch": 0.6440187856288729, "grad_norm": 0.4140625, "learning_rate": 8.976009846951674e-06, "loss": 1.9934, "step": 19961 }, { "epoch": 0.6440510494826692, "grad_norm": 0.41796875, "learning_rate": 8.974568630205462e-06, "loss": 2.006, "step": 19962 }, { "epoch": 0.6440833133364656, "grad_norm": 0.396484375, "learning_rate": 8.973127479782868e-06, "loss": 2.0225, "step": 19963 }, { "epoch": 0.6441155771902619, "grad_norm": 0.404296875, "learning_rate": 8.971686395699762e-06, "loss": 2.0192, "step": 19964 }, { "epoch": 0.6441478410440583, "grad_norm": 0.47265625, "learning_rate": 8.970245377972014e-06, "loss": 1.9957, "step": 19965 }, { "epoch": 0.6441801048978547, "grad_norm": 0.3828125, "learning_rate": 8.968804426615475e-06, "loss": 1.9714, "step": 19966 }, { "epoch": 0.644212368751651, "grad_norm": 0.34765625, "learning_rate": 8.967363541646008e-06, "loss": 1.9375, "step": 19967 }, { "epoch": 0.6442446326054474, "grad_norm": 0.40234375, "learning_rate": 8.965922723079483e-06, "loss": 1.8836, "step": 19968 }, { "epoch": 0.6442768964592437, "grad_norm": 0.392578125, "learning_rate": 8.964481970931745e-06, "loss": 1.929, "step": 19969 }, { "epoch": 0.6443091603130401, "grad_norm": 0.349609375, "learning_rate": 8.96304128521866e-06, "loss": 1.901, "step": 19970 }, { "epoch": 0.6443414241668364, "grad_norm": 0.365234375, "learning_rate": 8.961600665956088e-06, "loss": 1.9205, "step": 19971 }, { "epoch": 0.6443736880206328, "grad_norm": 0.357421875, "learning_rate": 8.960160113159883e-06, "loss": 1.901, "step": 19972 }, { "epoch": 0.6444059518744291, "grad_norm": 0.341796875, "learning_rate": 8.9587196268459e-06, "loss": 1.9299, "step": 19973 }, { "epoch": 0.6444382157282255, "grad_norm": 0.353515625, "learning_rate": 8.957279207030003e-06, "loss": 1.9029, "step": 19974 }, { "epoch": 0.6444704795820217, "grad_norm": 0.3984375, "learning_rate": 8.955838853728037e-06, "loss": 1.9315, "step": 19975 }, { "epoch": 0.6445027434358181, "grad_norm": 0.35546875, "learning_rate": 8.954398566955862e-06, "loss": 1.8893, "step": 19976 }, { "epoch": 0.6445350072896144, "grad_norm": 0.34375, "learning_rate": 8.952958346729331e-06, "loss": 1.9499, "step": 19977 }, { "epoch": 0.6445672711434108, "grad_norm": 0.359375, "learning_rate": 8.951518193064296e-06, "loss": 1.9647, "step": 19978 }, { "epoch": 0.6445995349972071, "grad_norm": 0.45703125, "learning_rate": 8.950078105976608e-06, "loss": 1.9833, "step": 19979 }, { "epoch": 0.6446317988510035, "grad_norm": 0.46875, "learning_rate": 8.948638085482123e-06, "loss": 1.973, "step": 19980 }, { "epoch": 0.6446640627047998, "grad_norm": 0.67578125, "learning_rate": 8.947198131596686e-06, "loss": 2.0216, "step": 19981 }, { "epoch": 0.6446963265585962, "grad_norm": 0.5234375, "learning_rate": 8.945758244336149e-06, "loss": 2.0267, "step": 19982 }, { "epoch": 0.6447285904123925, "grad_norm": 0.44140625, "learning_rate": 8.944318423716366e-06, "loss": 2.0457, "step": 19983 }, { "epoch": 0.6447608542661889, "grad_norm": 0.49609375, "learning_rate": 8.94287866975318e-06, "loss": 1.9938, "step": 19984 }, { "epoch": 0.6447931181199852, "grad_norm": 0.447265625, "learning_rate": 8.941438982462439e-06, "loss": 2.0289, "step": 19985 }, { "epoch": 0.6448253819737816, "grad_norm": 0.4375, "learning_rate": 8.939999361859991e-06, "loss": 1.9474, "step": 19986 }, { "epoch": 0.644857645827578, "grad_norm": 0.462890625, "learning_rate": 8.938559807961688e-06, "loss": 2.0275, "step": 19987 }, { "epoch": 0.6448899096813743, "grad_norm": 0.486328125, "learning_rate": 8.937120320783362e-06, "loss": 2.038, "step": 19988 }, { "epoch": 0.6449221735351707, "grad_norm": 0.5703125, "learning_rate": 8.93568090034087e-06, "loss": 2.0976, "step": 19989 }, { "epoch": 0.644954437388967, "grad_norm": 0.5078125, "learning_rate": 8.934241546650055e-06, "loss": 2.0811, "step": 19990 }, { "epoch": 0.6449867012427634, "grad_norm": 0.431640625, "learning_rate": 8.932802259726754e-06, "loss": 2.1037, "step": 19991 }, { "epoch": 0.6450189650965596, "grad_norm": 0.55859375, "learning_rate": 8.93136303958681e-06, "loss": 2.0805, "step": 19992 }, { "epoch": 0.645051228950356, "grad_norm": 0.412109375, "learning_rate": 8.929923886246077e-06, "loss": 2.1035, "step": 19993 }, { "epoch": 0.6450834928041523, "grad_norm": 0.494140625, "learning_rate": 8.92848479972038e-06, "loss": 2.06, "step": 19994 }, { "epoch": 0.6451157566579487, "grad_norm": 0.4375, "learning_rate": 8.927045780025564e-06, "loss": 2.0661, "step": 19995 }, { "epoch": 0.645148020511745, "grad_norm": 0.51171875, "learning_rate": 8.92560682717748e-06, "loss": 2.0639, "step": 19996 }, { "epoch": 0.6451802843655414, "grad_norm": 0.4453125, "learning_rate": 8.924167941191953e-06, "loss": 2.0794, "step": 19997 }, { "epoch": 0.6452125482193377, "grad_norm": 0.47265625, "learning_rate": 8.922729122084827e-06, "loss": 2.0912, "step": 19998 }, { "epoch": 0.6452448120731341, "grad_norm": 0.5078125, "learning_rate": 8.92129036987194e-06, "loss": 2.0703, "step": 19999 }, { "epoch": 0.6452770759269304, "grad_norm": 0.5078125, "learning_rate": 8.919851684569126e-06, "loss": 2.1063, "step": 20000 }, { "epoch": 0.6453093397807268, "grad_norm": 0.54296875, "learning_rate": 8.918413066192225e-06, "loss": 2.0725, "step": 20001 }, { "epoch": 0.6453416036345231, "grad_norm": 0.5234375, "learning_rate": 8.916974514757072e-06, "loss": 2.0822, "step": 20002 }, { "epoch": 0.6453738674883195, "grad_norm": 0.49609375, "learning_rate": 8.915536030279495e-06, "loss": 2.1005, "step": 20003 }, { "epoch": 0.6454061313421158, "grad_norm": 0.51953125, "learning_rate": 8.914097612775332e-06, "loss": 2.0937, "step": 20004 }, { "epoch": 0.6454383951959122, "grad_norm": 0.5390625, "learning_rate": 8.912659262260421e-06, "loss": 2.0905, "step": 20005 }, { "epoch": 0.6454706590497085, "grad_norm": 0.5859375, "learning_rate": 8.911220978750586e-06, "loss": 2.0646, "step": 20006 }, { "epoch": 0.6455029229035049, "grad_norm": 0.4609375, "learning_rate": 8.909782762261663e-06, "loss": 2.056, "step": 20007 }, { "epoch": 0.6455351867573013, "grad_norm": 0.40234375, "learning_rate": 8.908344612809485e-06, "loss": 1.9933, "step": 20008 }, { "epoch": 0.6455674506110975, "grad_norm": 0.453125, "learning_rate": 8.906906530409878e-06, "loss": 2.02, "step": 20009 }, { "epoch": 0.645599714464894, "grad_norm": 0.46875, "learning_rate": 8.905468515078669e-06, "loss": 2.0097, "step": 20010 }, { "epoch": 0.6456319783186902, "grad_norm": 0.40625, "learning_rate": 8.904030566831696e-06, "loss": 2.0115, "step": 20011 }, { "epoch": 0.6456642421724866, "grad_norm": 0.478515625, "learning_rate": 8.902592685684778e-06, "loss": 1.9972, "step": 20012 }, { "epoch": 0.6456965060262829, "grad_norm": 0.419921875, "learning_rate": 8.901154871653743e-06, "loss": 1.9608, "step": 20013 }, { "epoch": 0.6457287698800793, "grad_norm": 0.365234375, "learning_rate": 8.899717124754425e-06, "loss": 1.9703, "step": 20014 }, { "epoch": 0.6457610337338756, "grad_norm": 0.4296875, "learning_rate": 8.898279445002643e-06, "loss": 2.0578, "step": 20015 }, { "epoch": 0.645793297587672, "grad_norm": 0.4140625, "learning_rate": 8.896841832414224e-06, "loss": 1.9745, "step": 20016 }, { "epoch": 0.6458255614414683, "grad_norm": 0.380859375, "learning_rate": 8.89540428700499e-06, "loss": 1.9882, "step": 20017 }, { "epoch": 0.6458578252952647, "grad_norm": 0.40234375, "learning_rate": 8.893966808790772e-06, "loss": 1.9907, "step": 20018 }, { "epoch": 0.645890089149061, "grad_norm": 0.390625, "learning_rate": 8.892529397787378e-06, "loss": 2.0122, "step": 20019 }, { "epoch": 0.6459223530028574, "grad_norm": 0.39453125, "learning_rate": 8.891092054010641e-06, "loss": 1.9802, "step": 20020 }, { "epoch": 0.6459546168566537, "grad_norm": 0.3984375, "learning_rate": 8.889654777476387e-06, "loss": 2.0063, "step": 20021 }, { "epoch": 0.6459868807104501, "grad_norm": 0.400390625, "learning_rate": 8.888217568200423e-06, "loss": 2.0338, "step": 20022 }, { "epoch": 0.6460191445642464, "grad_norm": 0.375, "learning_rate": 8.886780426198573e-06, "loss": 2.0088, "step": 20023 }, { "epoch": 0.6460514084180428, "grad_norm": 0.390625, "learning_rate": 8.88534335148667e-06, "loss": 1.9809, "step": 20024 }, { "epoch": 0.646083672271839, "grad_norm": 0.3828125, "learning_rate": 8.883906344080512e-06, "loss": 2.0098, "step": 20025 }, { "epoch": 0.6461159361256354, "grad_norm": 0.361328125, "learning_rate": 8.882469403995924e-06, "loss": 2.0021, "step": 20026 }, { "epoch": 0.6461481999794318, "grad_norm": 0.365234375, "learning_rate": 8.88103253124873e-06, "loss": 2.0186, "step": 20027 }, { "epoch": 0.6461804638332281, "grad_norm": 0.373046875, "learning_rate": 8.879595725854737e-06, "loss": 1.9694, "step": 20028 }, { "epoch": 0.6462127276870245, "grad_norm": 0.37109375, "learning_rate": 8.878158987829761e-06, "loss": 1.9806, "step": 20029 }, { "epoch": 0.6462449915408208, "grad_norm": 0.392578125, "learning_rate": 8.876722317189623e-06, "loss": 1.9969, "step": 20030 }, { "epoch": 0.6462772553946172, "grad_norm": 0.375, "learning_rate": 8.875285713950131e-06, "loss": 1.9955, "step": 20031 }, { "epoch": 0.6463095192484135, "grad_norm": 0.36328125, "learning_rate": 8.873849178127099e-06, "loss": 2.0158, "step": 20032 }, { "epoch": 0.6463417831022099, "grad_norm": 0.37890625, "learning_rate": 8.872412709736345e-06, "loss": 1.9682, "step": 20033 }, { "epoch": 0.6463740469560062, "grad_norm": 0.37109375, "learning_rate": 8.870976308793672e-06, "loss": 2.0074, "step": 20034 }, { "epoch": 0.6464063108098026, "grad_norm": 0.375, "learning_rate": 8.869539975314895e-06, "loss": 1.9552, "step": 20035 }, { "epoch": 0.6464385746635989, "grad_norm": 0.390625, "learning_rate": 8.868103709315824e-06, "loss": 1.9708, "step": 20036 }, { "epoch": 0.6464708385173953, "grad_norm": 0.38671875, "learning_rate": 8.866667510812271e-06, "loss": 2.0024, "step": 20037 }, { "epoch": 0.6465031023711916, "grad_norm": 0.365234375, "learning_rate": 8.865231379820037e-06, "loss": 2.0007, "step": 20038 }, { "epoch": 0.646535366224988, "grad_norm": 0.392578125, "learning_rate": 8.86379531635494e-06, "loss": 1.9452, "step": 20039 }, { "epoch": 0.6465676300787843, "grad_norm": 0.6953125, "learning_rate": 8.86235932043278e-06, "loss": 1.9413, "step": 20040 }, { "epoch": 0.6465998939325807, "grad_norm": 0.390625, "learning_rate": 8.860923392069364e-06, "loss": 1.9669, "step": 20041 }, { "epoch": 0.646632157786377, "grad_norm": 0.376953125, "learning_rate": 8.859487531280506e-06, "loss": 1.9792, "step": 20042 }, { "epoch": 0.6466644216401733, "grad_norm": 0.376953125, "learning_rate": 8.858051738081994e-06, "loss": 1.9752, "step": 20043 }, { "epoch": 0.6466966854939696, "grad_norm": 0.400390625, "learning_rate": 8.856616012489646e-06, "loss": 2.0133, "step": 20044 }, { "epoch": 0.646728949347766, "grad_norm": 0.375, "learning_rate": 8.855180354519269e-06, "loss": 1.9863, "step": 20045 }, { "epoch": 0.6467612132015623, "grad_norm": 0.3828125, "learning_rate": 8.853744764186647e-06, "loss": 1.9786, "step": 20046 }, { "epoch": 0.6467934770553587, "grad_norm": 0.412109375, "learning_rate": 8.852309241507597e-06, "loss": 2.0021, "step": 20047 }, { "epoch": 0.6468257409091551, "grad_norm": 0.373046875, "learning_rate": 8.850873786497918e-06, "loss": 2.0004, "step": 20048 }, { "epoch": 0.6468580047629514, "grad_norm": 0.37890625, "learning_rate": 8.849438399173413e-06, "loss": 1.9624, "step": 20049 }, { "epoch": 0.6468902686167478, "grad_norm": 0.41015625, "learning_rate": 8.84800307954987e-06, "loss": 1.9522, "step": 20050 }, { "epoch": 0.6469225324705441, "grad_norm": 0.375, "learning_rate": 8.846567827643097e-06, "loss": 1.9811, "step": 20051 }, { "epoch": 0.6469547963243405, "grad_norm": 0.392578125, "learning_rate": 8.845132643468899e-06, "loss": 1.9494, "step": 20052 }, { "epoch": 0.6469870601781368, "grad_norm": 0.376953125, "learning_rate": 8.843697527043058e-06, "loss": 1.9578, "step": 20053 }, { "epoch": 0.6470193240319332, "grad_norm": 0.384765625, "learning_rate": 8.842262478381375e-06, "loss": 1.9359, "step": 20054 }, { "epoch": 0.6470515878857295, "grad_norm": 0.408203125, "learning_rate": 8.84082749749966e-06, "loss": 1.963, "step": 20055 }, { "epoch": 0.6470838517395259, "grad_norm": 0.3671875, "learning_rate": 8.83939258441369e-06, "loss": 1.9885, "step": 20056 }, { "epoch": 0.6471161155933222, "grad_norm": 0.392578125, "learning_rate": 8.837957739139267e-06, "loss": 1.9956, "step": 20057 }, { "epoch": 0.6471483794471186, "grad_norm": 0.3984375, "learning_rate": 8.836522961692187e-06, "loss": 1.9685, "step": 20058 }, { "epoch": 0.6471806433009148, "grad_norm": 0.3984375, "learning_rate": 8.835088252088238e-06, "loss": 2.0027, "step": 20059 }, { "epoch": 0.6472129071547112, "grad_norm": 0.39453125, "learning_rate": 8.833653610343215e-06, "loss": 1.9841, "step": 20060 }, { "epoch": 0.6472451710085075, "grad_norm": 0.423828125, "learning_rate": 8.832219036472912e-06, "loss": 1.9664, "step": 20061 }, { "epoch": 0.6472774348623039, "grad_norm": 0.384765625, "learning_rate": 8.830784530493114e-06, "loss": 1.9764, "step": 20062 }, { "epoch": 0.6473096987161002, "grad_norm": 0.390625, "learning_rate": 8.829350092419615e-06, "loss": 1.9323, "step": 20063 }, { "epoch": 0.6473419625698966, "grad_norm": 0.427734375, "learning_rate": 8.827915722268208e-06, "loss": 1.9932, "step": 20064 }, { "epoch": 0.6473742264236929, "grad_norm": 0.369140625, "learning_rate": 8.82648142005467e-06, "loss": 1.9546, "step": 20065 }, { "epoch": 0.6474064902774893, "grad_norm": 0.3671875, "learning_rate": 8.825047185794798e-06, "loss": 1.9801, "step": 20066 }, { "epoch": 0.6474387541312856, "grad_norm": 0.359375, "learning_rate": 8.823613019504377e-06, "loss": 1.9627, "step": 20067 }, { "epoch": 0.647471017985082, "grad_norm": 0.3828125, "learning_rate": 8.822178921199194e-06, "loss": 1.8636, "step": 20068 }, { "epoch": 0.6475032818388784, "grad_norm": 0.37109375, "learning_rate": 8.820744890895031e-06, "loss": 1.8885, "step": 20069 }, { "epoch": 0.6475355456926747, "grad_norm": 0.353515625, "learning_rate": 8.81931092860768e-06, "loss": 1.8897, "step": 20070 }, { "epoch": 0.6475678095464711, "grad_norm": 0.3671875, "learning_rate": 8.817877034352913e-06, "loss": 1.8737, "step": 20071 }, { "epoch": 0.6476000734002674, "grad_norm": 0.36328125, "learning_rate": 8.816443208146526e-06, "loss": 1.8856, "step": 20072 }, { "epoch": 0.6476323372540638, "grad_norm": 0.37109375, "learning_rate": 8.8150094500043e-06, "loss": 1.8733, "step": 20073 }, { "epoch": 0.6476646011078601, "grad_norm": 0.44140625, "learning_rate": 8.813575759942002e-06, "loss": 1.8962, "step": 20074 }, { "epoch": 0.6476968649616565, "grad_norm": 0.369140625, "learning_rate": 8.81214213797543e-06, "loss": 1.8664, "step": 20075 }, { "epoch": 0.6477291288154527, "grad_norm": 0.373046875, "learning_rate": 8.810708584120362e-06, "loss": 1.9354, "step": 20076 }, { "epoch": 0.6477613926692491, "grad_norm": 0.384765625, "learning_rate": 8.809275098392566e-06, "loss": 1.8644, "step": 20077 }, { "epoch": 0.6477936565230454, "grad_norm": 0.3671875, "learning_rate": 8.80784168080783e-06, "loss": 1.8692, "step": 20078 }, { "epoch": 0.6478259203768418, "grad_norm": 0.369140625, "learning_rate": 8.806408331381936e-06, "loss": 1.911, "step": 20079 }, { "epoch": 0.6478581842306381, "grad_norm": 0.34375, "learning_rate": 8.804975050130652e-06, "loss": 1.912, "step": 20080 }, { "epoch": 0.6478904480844345, "grad_norm": 0.3984375, "learning_rate": 8.803541837069755e-06, "loss": 1.875, "step": 20081 }, { "epoch": 0.6479227119382308, "grad_norm": 0.34375, "learning_rate": 8.802108692215028e-06, "loss": 1.9129, "step": 20082 }, { "epoch": 0.6479549757920272, "grad_norm": 0.357421875, "learning_rate": 8.800675615582247e-06, "loss": 1.9031, "step": 20083 }, { "epoch": 0.6479872396458235, "grad_norm": 0.36328125, "learning_rate": 8.799242607187174e-06, "loss": 1.8911, "step": 20084 }, { "epoch": 0.6480195034996199, "grad_norm": 0.345703125, "learning_rate": 8.79780966704559e-06, "loss": 1.89, "step": 20085 }, { "epoch": 0.6480517673534162, "grad_norm": 0.33984375, "learning_rate": 8.796376795173274e-06, "loss": 1.9182, "step": 20086 }, { "epoch": 0.6480840312072126, "grad_norm": 0.376953125, "learning_rate": 8.794943991585988e-06, "loss": 1.8508, "step": 20087 }, { "epoch": 0.648116295061009, "grad_norm": 0.361328125, "learning_rate": 8.793511256299505e-06, "loss": 1.8674, "step": 20088 }, { "epoch": 0.6481485589148053, "grad_norm": 0.443359375, "learning_rate": 8.792078589329602e-06, "loss": 1.9067, "step": 20089 }, { "epoch": 0.6481808227686017, "grad_norm": 0.34765625, "learning_rate": 8.79064599069204e-06, "loss": 1.8993, "step": 20090 }, { "epoch": 0.648213086622398, "grad_norm": 0.384765625, "learning_rate": 8.789213460402595e-06, "loss": 1.8804, "step": 20091 }, { "epoch": 0.6482453504761944, "grad_norm": 0.34765625, "learning_rate": 8.787780998477032e-06, "loss": 1.8842, "step": 20092 }, { "epoch": 0.6482776143299906, "grad_norm": 0.373046875, "learning_rate": 8.786348604931121e-06, "loss": 1.854, "step": 20093 }, { "epoch": 0.648309878183787, "grad_norm": 0.373046875, "learning_rate": 8.784916279780622e-06, "loss": 1.9191, "step": 20094 }, { "epoch": 0.6483421420375833, "grad_norm": 0.3359375, "learning_rate": 8.783484023041312e-06, "loss": 1.8577, "step": 20095 }, { "epoch": 0.6483744058913797, "grad_norm": 0.384765625, "learning_rate": 8.782051834728944e-06, "loss": 1.8977, "step": 20096 }, { "epoch": 0.648406669745176, "grad_norm": 0.35546875, "learning_rate": 8.78061971485929e-06, "loss": 1.8998, "step": 20097 }, { "epoch": 0.6484389335989724, "grad_norm": 0.42578125, "learning_rate": 8.779187663448117e-06, "loss": 1.884, "step": 20098 }, { "epoch": 0.6484711974527687, "grad_norm": 0.3515625, "learning_rate": 8.777755680511179e-06, "loss": 1.9124, "step": 20099 }, { "epoch": 0.6485034613065651, "grad_norm": 0.341796875, "learning_rate": 8.776323766064245e-06, "loss": 1.9069, "step": 20100 }, { "epoch": 0.6485357251603614, "grad_norm": 0.396484375, "learning_rate": 8.774891920123075e-06, "loss": 1.8538, "step": 20101 }, { "epoch": 0.6485679890141578, "grad_norm": 0.33984375, "learning_rate": 8.773460142703427e-06, "loss": 1.8849, "step": 20102 }, { "epoch": 0.6486002528679541, "grad_norm": 0.5234375, "learning_rate": 8.772028433821061e-06, "loss": 1.8593, "step": 20103 }, { "epoch": 0.6486325167217505, "grad_norm": 0.34375, "learning_rate": 8.770596793491745e-06, "loss": 1.8298, "step": 20104 }, { "epoch": 0.6486647805755468, "grad_norm": 0.353515625, "learning_rate": 8.769165221731221e-06, "loss": 1.8869, "step": 20105 }, { "epoch": 0.6486970444293432, "grad_norm": 0.33984375, "learning_rate": 8.76773371855526e-06, "loss": 1.914, "step": 20106 }, { "epoch": 0.6487293082831395, "grad_norm": 0.345703125, "learning_rate": 8.766302283979622e-06, "loss": 1.8437, "step": 20107 }, { "epoch": 0.6487615721369359, "grad_norm": 0.353515625, "learning_rate": 8.764870918020049e-06, "loss": 1.8765, "step": 20108 }, { "epoch": 0.6487938359907323, "grad_norm": 0.341796875, "learning_rate": 8.763439620692301e-06, "loss": 1.8933, "step": 20109 }, { "epoch": 0.6488260998445285, "grad_norm": 0.392578125, "learning_rate": 8.762008392012146e-06, "loss": 1.8917, "step": 20110 }, { "epoch": 0.648858363698325, "grad_norm": 0.365234375, "learning_rate": 8.76057723199532e-06, "loss": 1.8597, "step": 20111 }, { "epoch": 0.6488906275521212, "grad_norm": 0.341796875, "learning_rate": 8.75914614065758e-06, "loss": 1.8621, "step": 20112 }, { "epoch": 0.6489228914059176, "grad_norm": 0.345703125, "learning_rate": 8.75771511801469e-06, "loss": 1.8832, "step": 20113 }, { "epoch": 0.6489551552597139, "grad_norm": 0.376953125, "learning_rate": 8.756284164082394e-06, "loss": 1.8337, "step": 20114 }, { "epoch": 0.6489874191135103, "grad_norm": 0.357421875, "learning_rate": 8.754853278876434e-06, "loss": 1.8078, "step": 20115 }, { "epoch": 0.6490196829673066, "grad_norm": 0.345703125, "learning_rate": 8.753422462412577e-06, "loss": 1.9062, "step": 20116 }, { "epoch": 0.649051946821103, "grad_norm": 0.3671875, "learning_rate": 8.751991714706564e-06, "loss": 1.8979, "step": 20117 }, { "epoch": 0.6490842106748993, "grad_norm": 0.33984375, "learning_rate": 8.750561035774137e-06, "loss": 1.8952, "step": 20118 }, { "epoch": 0.6491164745286957, "grad_norm": 0.365234375, "learning_rate": 8.749130425631059e-06, "loss": 1.8668, "step": 20119 }, { "epoch": 0.649148738382492, "grad_norm": 0.349609375, "learning_rate": 8.747699884293066e-06, "loss": 1.9018, "step": 20120 }, { "epoch": 0.6491810022362884, "grad_norm": 0.3359375, "learning_rate": 8.746269411775904e-06, "loss": 1.8835, "step": 20121 }, { "epoch": 0.6492132660900847, "grad_norm": 0.365234375, "learning_rate": 8.744839008095327e-06, "loss": 1.874, "step": 20122 }, { "epoch": 0.6492455299438811, "grad_norm": 0.333984375, "learning_rate": 8.743408673267074e-06, "loss": 1.854, "step": 20123 }, { "epoch": 0.6492777937976774, "grad_norm": 0.3515625, "learning_rate": 8.741978407306886e-06, "loss": 1.9019, "step": 20124 }, { "epoch": 0.6493100576514738, "grad_norm": 0.341796875, "learning_rate": 8.740548210230513e-06, "loss": 1.8868, "step": 20125 }, { "epoch": 0.64934232150527, "grad_norm": 0.34765625, "learning_rate": 8.739118082053696e-06, "loss": 1.8989, "step": 20126 }, { "epoch": 0.6493745853590664, "grad_norm": 0.33984375, "learning_rate": 8.737688022792176e-06, "loss": 1.8794, "step": 20127 }, { "epoch": 0.6494068492128628, "grad_norm": 0.365234375, "learning_rate": 8.736258032461689e-06, "loss": 1.8865, "step": 20128 }, { "epoch": 0.6494391130666591, "grad_norm": 0.375, "learning_rate": 8.734828111077983e-06, "loss": 1.8722, "step": 20129 }, { "epoch": 0.6494713769204555, "grad_norm": 0.34765625, "learning_rate": 8.733398258656797e-06, "loss": 1.9257, "step": 20130 }, { "epoch": 0.6495036407742518, "grad_norm": 0.359375, "learning_rate": 8.73196847521386e-06, "loss": 1.8874, "step": 20131 }, { "epoch": 0.6495359046280482, "grad_norm": 0.380859375, "learning_rate": 8.730538760764925e-06, "loss": 1.8276, "step": 20132 }, { "epoch": 0.6495681684818445, "grad_norm": 0.345703125, "learning_rate": 8.72910911532572e-06, "loss": 1.8628, "step": 20133 }, { "epoch": 0.6496004323356409, "grad_norm": 0.458984375, "learning_rate": 8.727679538911975e-06, "loss": 1.8979, "step": 20134 }, { "epoch": 0.6496326961894372, "grad_norm": 0.345703125, "learning_rate": 8.726250031539444e-06, "loss": 1.8687, "step": 20135 }, { "epoch": 0.6496649600432336, "grad_norm": 0.390625, "learning_rate": 8.72482059322385e-06, "loss": 1.871, "step": 20136 }, { "epoch": 0.6496972238970299, "grad_norm": 0.3515625, "learning_rate": 8.723391223980923e-06, "loss": 1.9556, "step": 20137 }, { "epoch": 0.6497294877508263, "grad_norm": 0.341796875, "learning_rate": 8.721961923826412e-06, "loss": 1.8837, "step": 20138 }, { "epoch": 0.6497617516046226, "grad_norm": 0.349609375, "learning_rate": 8.720532692776035e-06, "loss": 1.8777, "step": 20139 }, { "epoch": 0.649794015458419, "grad_norm": 0.38671875, "learning_rate": 8.719103530845525e-06, "loss": 1.835, "step": 20140 }, { "epoch": 0.6498262793122153, "grad_norm": 0.341796875, "learning_rate": 8.717674438050626e-06, "loss": 1.8939, "step": 20141 }, { "epoch": 0.6498585431660117, "grad_norm": 0.353515625, "learning_rate": 8.716245414407057e-06, "loss": 1.8825, "step": 20142 }, { "epoch": 0.649890807019808, "grad_norm": 0.34375, "learning_rate": 8.714816459930548e-06, "loss": 1.8739, "step": 20143 }, { "epoch": 0.6499230708736043, "grad_norm": 0.341796875, "learning_rate": 8.713387574636831e-06, "loss": 1.8553, "step": 20144 }, { "epoch": 0.6499553347274006, "grad_norm": 0.37890625, "learning_rate": 8.711958758541637e-06, "loss": 1.8771, "step": 20145 }, { "epoch": 0.649987598581197, "grad_norm": 0.349609375, "learning_rate": 8.710530011660684e-06, "loss": 1.8564, "step": 20146 }, { "epoch": 0.6500198624349933, "grad_norm": 0.359375, "learning_rate": 8.70910133400971e-06, "loss": 1.8538, "step": 20147 }, { "epoch": 0.6500521262887897, "grad_norm": 0.34765625, "learning_rate": 8.707672725604435e-06, "loss": 1.8667, "step": 20148 }, { "epoch": 0.6500843901425861, "grad_norm": 0.34765625, "learning_rate": 8.70624418646058e-06, "loss": 1.8795, "step": 20149 }, { "epoch": 0.6501166539963824, "grad_norm": 0.37890625, "learning_rate": 8.704815716593878e-06, "loss": 1.8584, "step": 20150 }, { "epoch": 0.6501489178501788, "grad_norm": 0.3359375, "learning_rate": 8.703387316020049e-06, "loss": 1.8996, "step": 20151 }, { "epoch": 0.6501811817039751, "grad_norm": 0.369140625, "learning_rate": 8.70195898475481e-06, "loss": 1.8785, "step": 20152 }, { "epoch": 0.6502134455577715, "grad_norm": 0.376953125, "learning_rate": 8.700530722813895e-06, "loss": 1.8811, "step": 20153 }, { "epoch": 0.6502457094115678, "grad_norm": 0.376953125, "learning_rate": 8.699102530213016e-06, "loss": 1.9143, "step": 20154 }, { "epoch": 0.6502779732653642, "grad_norm": 0.357421875, "learning_rate": 8.697674406967893e-06, "loss": 1.9542, "step": 20155 }, { "epoch": 0.6503102371191605, "grad_norm": 0.384765625, "learning_rate": 8.696246353094253e-06, "loss": 1.9365, "step": 20156 }, { "epoch": 0.6503425009729569, "grad_norm": 0.392578125, "learning_rate": 8.69481836860781e-06, "loss": 1.9424, "step": 20157 }, { "epoch": 0.6503747648267532, "grad_norm": 0.369140625, "learning_rate": 8.693390453524284e-06, "loss": 1.8795, "step": 20158 }, { "epoch": 0.6504070286805496, "grad_norm": 0.392578125, "learning_rate": 8.691962607859386e-06, "loss": 1.8999, "step": 20159 }, { "epoch": 0.6504392925343458, "grad_norm": 0.353515625, "learning_rate": 8.690534831628844e-06, "loss": 1.8817, "step": 20160 }, { "epoch": 0.6504715563881422, "grad_norm": 0.369140625, "learning_rate": 8.689107124848367e-06, "loss": 1.9228, "step": 20161 }, { "epoch": 0.6505038202419385, "grad_norm": 0.390625, "learning_rate": 8.687679487533668e-06, "loss": 1.8974, "step": 20162 }, { "epoch": 0.6505360840957349, "grad_norm": 0.369140625, "learning_rate": 8.686251919700469e-06, "loss": 1.8926, "step": 20163 }, { "epoch": 0.6505683479495312, "grad_norm": 0.3515625, "learning_rate": 8.68482442136448e-06, "loss": 1.9173, "step": 20164 }, { "epoch": 0.6506006118033276, "grad_norm": 0.36328125, "learning_rate": 8.683396992541406e-06, "loss": 1.8675, "step": 20165 }, { "epoch": 0.6506328756571239, "grad_norm": 0.375, "learning_rate": 8.681969633246974e-06, "loss": 1.8861, "step": 20166 }, { "epoch": 0.6506651395109203, "grad_norm": 0.376953125, "learning_rate": 8.680542343496889e-06, "loss": 1.9155, "step": 20167 }, { "epoch": 0.6506974033647166, "grad_norm": 0.57421875, "learning_rate": 8.67911512330685e-06, "loss": 1.9928, "step": 20168 }, { "epoch": 0.650729667218513, "grad_norm": 0.7265625, "learning_rate": 8.677687972692588e-06, "loss": 1.9628, "step": 20169 }, { "epoch": 0.6507619310723094, "grad_norm": 0.53515625, "learning_rate": 8.676260891669799e-06, "loss": 1.9718, "step": 20170 }, { "epoch": 0.6507941949261057, "grad_norm": 0.5859375, "learning_rate": 8.674833880254188e-06, "loss": 1.9671, "step": 20171 }, { "epoch": 0.6508264587799021, "grad_norm": 0.62109375, "learning_rate": 8.673406938461472e-06, "loss": 1.9622, "step": 20172 }, { "epoch": 0.6508587226336984, "grad_norm": 0.390625, "learning_rate": 8.671980066307355e-06, "loss": 2.0022, "step": 20173 }, { "epoch": 0.6508909864874948, "grad_norm": 0.4765625, "learning_rate": 8.670553263807535e-06, "loss": 1.991, "step": 20174 }, { "epoch": 0.6509232503412911, "grad_norm": 0.515625, "learning_rate": 8.669126530977734e-06, "loss": 1.9627, "step": 20175 }, { "epoch": 0.6509555141950875, "grad_norm": 0.421875, "learning_rate": 8.667699867833641e-06, "loss": 1.9878, "step": 20176 }, { "epoch": 0.6509877780488837, "grad_norm": 0.408203125, "learning_rate": 8.666273274390961e-06, "loss": 1.9989, "step": 20177 }, { "epoch": 0.6510200419026801, "grad_norm": 0.45703125, "learning_rate": 8.664846750665414e-06, "loss": 1.9904, "step": 20178 }, { "epoch": 0.6510523057564764, "grad_norm": 0.369140625, "learning_rate": 8.663420296672677e-06, "loss": 1.9109, "step": 20179 }, { "epoch": 0.6510845696102728, "grad_norm": 0.384765625, "learning_rate": 8.661993912428462e-06, "loss": 1.8944, "step": 20180 }, { "epoch": 0.6511168334640691, "grad_norm": 0.373046875, "learning_rate": 8.660567597948479e-06, "loss": 1.8773, "step": 20181 }, { "epoch": 0.6511490973178655, "grad_norm": 0.37109375, "learning_rate": 8.659141353248418e-06, "loss": 1.8866, "step": 20182 }, { "epoch": 0.6511813611716618, "grad_norm": 0.359375, "learning_rate": 8.657715178343976e-06, "loss": 1.8691, "step": 20183 }, { "epoch": 0.6512136250254582, "grad_norm": 0.36328125, "learning_rate": 8.656289073250859e-06, "loss": 1.8804, "step": 20184 }, { "epoch": 0.6512458888792545, "grad_norm": 0.373046875, "learning_rate": 8.654863037984763e-06, "loss": 1.9494, "step": 20185 }, { "epoch": 0.6512781527330509, "grad_norm": 0.388671875, "learning_rate": 8.653437072561383e-06, "loss": 1.9283, "step": 20186 }, { "epoch": 0.6513104165868472, "grad_norm": 0.359375, "learning_rate": 8.652011176996409e-06, "loss": 1.8738, "step": 20187 }, { "epoch": 0.6513426804406436, "grad_norm": 0.390625, "learning_rate": 8.650585351305547e-06, "loss": 1.9097, "step": 20188 }, { "epoch": 0.65137494429444, "grad_norm": 0.359375, "learning_rate": 8.649159595504485e-06, "loss": 1.8657, "step": 20189 }, { "epoch": 0.6514072081482363, "grad_norm": 0.369140625, "learning_rate": 8.647733909608913e-06, "loss": 1.8926, "step": 20190 }, { "epoch": 0.6514394720020327, "grad_norm": 0.3671875, "learning_rate": 8.646308293634534e-06, "loss": 1.9003, "step": 20191 }, { "epoch": 0.651471735855829, "grad_norm": 0.38671875, "learning_rate": 8.644882747597037e-06, "loss": 1.9232, "step": 20192 }, { "epoch": 0.6515039997096254, "grad_norm": 0.36328125, "learning_rate": 8.643457271512107e-06, "loss": 1.8791, "step": 20193 }, { "epoch": 0.6515362635634216, "grad_norm": 0.349609375, "learning_rate": 8.642031865395444e-06, "loss": 1.8846, "step": 20194 }, { "epoch": 0.651568527417218, "grad_norm": 0.359375, "learning_rate": 8.640606529262731e-06, "loss": 1.9077, "step": 20195 }, { "epoch": 0.6516007912710143, "grad_norm": 0.36328125, "learning_rate": 8.639181263129653e-06, "loss": 1.8733, "step": 20196 }, { "epoch": 0.6516330551248107, "grad_norm": 0.35546875, "learning_rate": 8.637756067011912e-06, "loss": 1.9596, "step": 20197 }, { "epoch": 0.651665318978607, "grad_norm": 0.38671875, "learning_rate": 8.636330940925188e-06, "loss": 1.8777, "step": 20198 }, { "epoch": 0.6516975828324034, "grad_norm": 0.361328125, "learning_rate": 8.634905884885162e-06, "loss": 1.8929, "step": 20199 }, { "epoch": 0.6517298466861997, "grad_norm": 0.404296875, "learning_rate": 8.63348089890753e-06, "loss": 1.9335, "step": 20200 }, { "epoch": 0.6517621105399961, "grad_norm": 0.41796875, "learning_rate": 8.632055983007975e-06, "loss": 1.9676, "step": 20201 }, { "epoch": 0.6517943743937924, "grad_norm": 0.380859375, "learning_rate": 8.630631137202173e-06, "loss": 1.9855, "step": 20202 }, { "epoch": 0.6518266382475888, "grad_norm": 0.423828125, "learning_rate": 8.629206361505818e-06, "loss": 2.0257, "step": 20203 }, { "epoch": 0.6518589021013851, "grad_norm": 0.46484375, "learning_rate": 8.627781655934593e-06, "loss": 2.0373, "step": 20204 }, { "epoch": 0.6518911659551815, "grad_norm": 0.39453125, "learning_rate": 8.626357020504167e-06, "loss": 1.9945, "step": 20205 }, { "epoch": 0.6519234298089778, "grad_norm": 0.431640625, "learning_rate": 8.624932455230238e-06, "loss": 1.9682, "step": 20206 }, { "epoch": 0.6519556936627742, "grad_norm": 0.4140625, "learning_rate": 8.623507960128479e-06, "loss": 1.9732, "step": 20207 }, { "epoch": 0.6519879575165705, "grad_norm": 0.375, "learning_rate": 8.622083535214563e-06, "loss": 1.9848, "step": 20208 }, { "epoch": 0.6520202213703669, "grad_norm": 0.41796875, "learning_rate": 8.620659180504188e-06, "loss": 1.953, "step": 20209 }, { "epoch": 0.6520524852241633, "grad_norm": 0.4140625, "learning_rate": 8.619234896013007e-06, "loss": 1.9895, "step": 20210 }, { "epoch": 0.6520847490779595, "grad_norm": 1.1640625, "learning_rate": 8.617810681756714e-06, "loss": 1.983, "step": 20211 }, { "epoch": 0.652117012931756, "grad_norm": 0.4609375, "learning_rate": 8.61638653775099e-06, "loss": 1.9599, "step": 20212 }, { "epoch": 0.6521492767855522, "grad_norm": 0.3984375, "learning_rate": 8.614962464011497e-06, "loss": 1.9828, "step": 20213 }, { "epoch": 0.6521815406393486, "grad_norm": 0.4296875, "learning_rate": 8.613538460553913e-06, "loss": 2.0078, "step": 20214 }, { "epoch": 0.6522138044931449, "grad_norm": 0.408203125, "learning_rate": 8.612114527393923e-06, "loss": 1.9963, "step": 20215 }, { "epoch": 0.6522460683469413, "grad_norm": 0.423828125, "learning_rate": 8.610690664547191e-06, "loss": 1.9713, "step": 20216 }, { "epoch": 0.6522783322007376, "grad_norm": 0.43359375, "learning_rate": 8.609266872029397e-06, "loss": 1.9747, "step": 20217 }, { "epoch": 0.652310596054534, "grad_norm": 0.3828125, "learning_rate": 8.607843149856198e-06, "loss": 1.9838, "step": 20218 }, { "epoch": 0.6523428599083303, "grad_norm": 0.408203125, "learning_rate": 8.606419498043284e-06, "loss": 1.9702, "step": 20219 }, { "epoch": 0.6523751237621267, "grad_norm": 0.45703125, "learning_rate": 8.604995916606319e-06, "loss": 1.9073, "step": 20220 }, { "epoch": 0.652407387615923, "grad_norm": 0.384765625, "learning_rate": 8.603572405560963e-06, "loss": 1.991, "step": 20221 }, { "epoch": 0.6524396514697194, "grad_norm": 0.404296875, "learning_rate": 8.602148964922901e-06, "loss": 1.9696, "step": 20222 }, { "epoch": 0.6524719153235157, "grad_norm": 0.404296875, "learning_rate": 8.600725594707793e-06, "loss": 1.9575, "step": 20223 }, { "epoch": 0.6525041791773121, "grad_norm": 0.39453125, "learning_rate": 8.5993022949313e-06, "loss": 1.9707, "step": 20224 }, { "epoch": 0.6525364430311084, "grad_norm": 0.3984375, "learning_rate": 8.5978790656091e-06, "loss": 1.9811, "step": 20225 }, { "epoch": 0.6525687068849048, "grad_norm": 0.4375, "learning_rate": 8.596455906756856e-06, "loss": 1.9637, "step": 20226 }, { "epoch": 0.652600970738701, "grad_norm": 0.365234375, "learning_rate": 8.595032818390227e-06, "loss": 1.9933, "step": 20227 }, { "epoch": 0.6526332345924974, "grad_norm": 0.4140625, "learning_rate": 8.593609800524885e-06, "loss": 1.951, "step": 20228 }, { "epoch": 0.6526654984462938, "grad_norm": 0.380859375, "learning_rate": 8.592186853176491e-06, "loss": 1.9553, "step": 20229 }, { "epoch": 0.6526977623000901, "grad_norm": 0.38671875, "learning_rate": 8.590763976360704e-06, "loss": 1.9449, "step": 20230 }, { "epoch": 0.6527300261538865, "grad_norm": 0.400390625, "learning_rate": 8.589341170093194e-06, "loss": 1.9366, "step": 20231 }, { "epoch": 0.6527622900076828, "grad_norm": 0.451171875, "learning_rate": 8.587918434389614e-06, "loss": 1.9633, "step": 20232 }, { "epoch": 0.6527945538614792, "grad_norm": 0.369140625, "learning_rate": 8.586495769265625e-06, "loss": 1.9671, "step": 20233 }, { "epoch": 0.6528268177152755, "grad_norm": 0.41015625, "learning_rate": 8.585073174736896e-06, "loss": 1.9685, "step": 20234 }, { "epoch": 0.6528590815690719, "grad_norm": 0.380859375, "learning_rate": 8.583650650819077e-06, "loss": 1.9691, "step": 20235 }, { "epoch": 0.6528913454228682, "grad_norm": 0.431640625, "learning_rate": 8.582228197527825e-06, "loss": 1.9381, "step": 20236 }, { "epoch": 0.6529236092766646, "grad_norm": 0.365234375, "learning_rate": 8.580805814878812e-06, "loss": 1.8689, "step": 20237 }, { "epoch": 0.6529558731304609, "grad_norm": 0.3515625, "learning_rate": 8.57938350288767e-06, "loss": 1.8685, "step": 20238 }, { "epoch": 0.6529881369842573, "grad_norm": 0.41015625, "learning_rate": 8.57796126157007e-06, "loss": 1.8552, "step": 20239 }, { "epoch": 0.6530204008380536, "grad_norm": 0.3828125, "learning_rate": 8.576539090941679e-06, "loss": 1.9202, "step": 20240 }, { "epoch": 0.65305266469185, "grad_norm": 0.39453125, "learning_rate": 8.575116991018122e-06, "loss": 1.9124, "step": 20241 }, { "epoch": 0.6530849285456463, "grad_norm": 0.349609375, "learning_rate": 8.57369496181507e-06, "loss": 1.8593, "step": 20242 }, { "epoch": 0.6531171923994427, "grad_norm": 0.365234375, "learning_rate": 8.572273003348184e-06, "loss": 1.8633, "step": 20243 }, { "epoch": 0.653149456253239, "grad_norm": 0.345703125, "learning_rate": 8.570851115633095e-06, "loss": 1.8571, "step": 20244 }, { "epoch": 0.6531817201070353, "grad_norm": 0.4453125, "learning_rate": 8.569429298685466e-06, "loss": 1.9332, "step": 20245 }, { "epoch": 0.6532139839608316, "grad_norm": 0.341796875, "learning_rate": 8.56800755252095e-06, "loss": 1.8605, "step": 20246 }, { "epoch": 0.653246247814628, "grad_norm": 0.376953125, "learning_rate": 8.566585877155193e-06, "loss": 1.8633, "step": 20247 }, { "epoch": 0.6532785116684243, "grad_norm": 0.390625, "learning_rate": 8.565164272603843e-06, "loss": 1.8604, "step": 20248 }, { "epoch": 0.6533107755222207, "grad_norm": 0.3828125, "learning_rate": 8.563742738882541e-06, "loss": 1.8649, "step": 20249 }, { "epoch": 0.6533430393760171, "grad_norm": 0.435546875, "learning_rate": 8.56232127600695e-06, "loss": 1.9172, "step": 20250 }, { "epoch": 0.6533753032298134, "grad_norm": 0.41015625, "learning_rate": 8.560899883992707e-06, "loss": 1.9109, "step": 20251 }, { "epoch": 0.6534075670836098, "grad_norm": 0.390625, "learning_rate": 8.559478562855453e-06, "loss": 1.8877, "step": 20252 }, { "epoch": 0.6534398309374061, "grad_norm": 0.3671875, "learning_rate": 8.558057312610844e-06, "loss": 1.8922, "step": 20253 }, { "epoch": 0.6534720947912025, "grad_norm": 0.384765625, "learning_rate": 8.556636133274521e-06, "loss": 1.8631, "step": 20254 }, { "epoch": 0.6535043586449988, "grad_norm": 0.35546875, "learning_rate": 8.555215024862116e-06, "loss": 1.8196, "step": 20255 }, { "epoch": 0.6535366224987952, "grad_norm": 0.39453125, "learning_rate": 8.553793987389287e-06, "loss": 1.8356, "step": 20256 }, { "epoch": 0.6535688863525915, "grad_norm": 0.353515625, "learning_rate": 8.55237302087167e-06, "loss": 1.8912, "step": 20257 }, { "epoch": 0.6536011502063879, "grad_norm": 0.35546875, "learning_rate": 8.5509521253249e-06, "loss": 1.8396, "step": 20258 }, { "epoch": 0.6536334140601842, "grad_norm": 0.353515625, "learning_rate": 8.549531300764628e-06, "loss": 1.8577, "step": 20259 }, { "epoch": 0.6536656779139806, "grad_norm": 0.359375, "learning_rate": 8.548110547206487e-06, "loss": 1.8506, "step": 20260 }, { "epoch": 0.6536979417677768, "grad_norm": 0.357421875, "learning_rate": 8.54668986466611e-06, "loss": 1.8917, "step": 20261 }, { "epoch": 0.6537302056215732, "grad_norm": 0.365234375, "learning_rate": 8.54526925315915e-06, "loss": 1.8495, "step": 20262 }, { "epoch": 0.6537624694753695, "grad_norm": 0.34765625, "learning_rate": 8.543848712701234e-06, "loss": 1.887, "step": 20263 }, { "epoch": 0.6537947333291659, "grad_norm": 0.361328125, "learning_rate": 8.542428243307993e-06, "loss": 1.8809, "step": 20264 }, { "epoch": 0.6538269971829622, "grad_norm": 0.365234375, "learning_rate": 8.541007844995075e-06, "loss": 1.8666, "step": 20265 }, { "epoch": 0.6538592610367586, "grad_norm": 0.3828125, "learning_rate": 8.53958751777811e-06, "loss": 1.9083, "step": 20266 }, { "epoch": 0.6538915248905549, "grad_norm": 0.3359375, "learning_rate": 8.538167261672725e-06, "loss": 1.8595, "step": 20267 }, { "epoch": 0.6539237887443513, "grad_norm": 0.34765625, "learning_rate": 8.536747076694572e-06, "loss": 1.888, "step": 20268 }, { "epoch": 0.6539560525981476, "grad_norm": 0.373046875, "learning_rate": 8.535326962859257e-06, "loss": 1.8548, "step": 20269 }, { "epoch": 0.653988316451944, "grad_norm": 0.365234375, "learning_rate": 8.533906920182426e-06, "loss": 1.8764, "step": 20270 }, { "epoch": 0.6540205803057404, "grad_norm": 0.357421875, "learning_rate": 8.53248694867972e-06, "loss": 1.9094, "step": 20271 }, { "epoch": 0.6540528441595367, "grad_norm": 0.3828125, "learning_rate": 8.531067048366745e-06, "loss": 1.9124, "step": 20272 }, { "epoch": 0.6540851080133331, "grad_norm": 0.36328125, "learning_rate": 8.529647219259146e-06, "loss": 1.8801, "step": 20273 }, { "epoch": 0.6541173718671294, "grad_norm": 0.37890625, "learning_rate": 8.528227461372558e-06, "loss": 1.8903, "step": 20274 }, { "epoch": 0.6541496357209258, "grad_norm": 0.34375, "learning_rate": 8.526807774722588e-06, "loss": 1.8839, "step": 20275 }, { "epoch": 0.6541818995747221, "grad_norm": 0.373046875, "learning_rate": 8.525388159324877e-06, "loss": 1.8283, "step": 20276 }, { "epoch": 0.6542141634285185, "grad_norm": 0.34765625, "learning_rate": 8.523968615195055e-06, "loss": 1.8601, "step": 20277 }, { "epoch": 0.6542464272823147, "grad_norm": 0.369140625, "learning_rate": 8.522549142348731e-06, "loss": 1.8778, "step": 20278 }, { "epoch": 0.6542786911361111, "grad_norm": 0.35546875, "learning_rate": 8.521129740801546e-06, "loss": 1.9257, "step": 20279 }, { "epoch": 0.6543109549899074, "grad_norm": 0.359375, "learning_rate": 8.51971041056911e-06, "loss": 1.8876, "step": 20280 }, { "epoch": 0.6543432188437038, "grad_norm": 0.361328125, "learning_rate": 8.51829115166706e-06, "loss": 1.8855, "step": 20281 }, { "epoch": 0.6543754826975001, "grad_norm": 0.349609375, "learning_rate": 8.51687196411101e-06, "loss": 1.8577, "step": 20282 }, { "epoch": 0.6544077465512965, "grad_norm": 0.359375, "learning_rate": 8.515452847916578e-06, "loss": 1.8867, "step": 20283 }, { "epoch": 0.6544400104050928, "grad_norm": 0.384765625, "learning_rate": 8.514033803099393e-06, "loss": 1.8911, "step": 20284 }, { "epoch": 0.6544722742588892, "grad_norm": 0.369140625, "learning_rate": 8.51261482967507e-06, "loss": 1.8994, "step": 20285 }, { "epoch": 0.6545045381126855, "grad_norm": 0.44140625, "learning_rate": 8.511195927659225e-06, "loss": 1.9983, "step": 20286 }, { "epoch": 0.6545368019664819, "grad_norm": 0.62109375, "learning_rate": 8.509777097067487e-06, "loss": 1.9753, "step": 20287 }, { "epoch": 0.6545690658202782, "grad_norm": 0.44921875, "learning_rate": 8.508358337915466e-06, "loss": 1.9888, "step": 20288 }, { "epoch": 0.6546013296740746, "grad_norm": 0.4921875, "learning_rate": 8.506939650218772e-06, "loss": 1.9683, "step": 20289 }, { "epoch": 0.654633593527871, "grad_norm": 0.4921875, "learning_rate": 8.505521033993034e-06, "loss": 1.9744, "step": 20290 }, { "epoch": 0.6546658573816673, "grad_norm": 0.45703125, "learning_rate": 8.504102489253863e-06, "loss": 1.9619, "step": 20291 }, { "epoch": 0.6546981212354637, "grad_norm": 0.546875, "learning_rate": 8.502684016016863e-06, "loss": 1.9499, "step": 20292 }, { "epoch": 0.65473038508926, "grad_norm": 0.43359375, "learning_rate": 8.501265614297667e-06, "loss": 1.9295, "step": 20293 }, { "epoch": 0.6547626489430564, "grad_norm": 0.439453125, "learning_rate": 8.499847284111872e-06, "loss": 1.9668, "step": 20294 }, { "epoch": 0.6547949127968526, "grad_norm": 0.443359375, "learning_rate": 8.498429025475093e-06, "loss": 1.9544, "step": 20295 }, { "epoch": 0.654827176650649, "grad_norm": 0.400390625, "learning_rate": 8.497010838402946e-06, "loss": 1.9704, "step": 20296 }, { "epoch": 0.6548594405044453, "grad_norm": 0.40234375, "learning_rate": 8.49559272291104e-06, "loss": 1.9579, "step": 20297 }, { "epoch": 0.6548917043582417, "grad_norm": 0.443359375, "learning_rate": 8.494174679014976e-06, "loss": 1.93, "step": 20298 }, { "epoch": 0.654923968212038, "grad_norm": 0.37109375, "learning_rate": 8.492756706730383e-06, "loss": 1.9564, "step": 20299 }, { "epoch": 0.6549562320658344, "grad_norm": 0.408203125, "learning_rate": 8.491338806072843e-06, "loss": 1.9658, "step": 20300 }, { "epoch": 0.6549884959196307, "grad_norm": 0.427734375, "learning_rate": 8.489920977057976e-06, "loss": 1.9651, "step": 20301 }, { "epoch": 0.6550207597734271, "grad_norm": 0.388671875, "learning_rate": 8.4885032197014e-06, "loss": 1.9516, "step": 20302 }, { "epoch": 0.6550530236272234, "grad_norm": 0.392578125, "learning_rate": 8.487085534018699e-06, "loss": 1.9478, "step": 20303 }, { "epoch": 0.6550852874810198, "grad_norm": 0.412109375, "learning_rate": 8.485667920025489e-06, "loss": 1.9885, "step": 20304 }, { "epoch": 0.6551175513348161, "grad_norm": 0.396484375, "learning_rate": 8.484250377737381e-06, "loss": 1.9642, "step": 20305 }, { "epoch": 0.6551498151886125, "grad_norm": 0.390625, "learning_rate": 8.482832907169963e-06, "loss": 1.9234, "step": 20306 }, { "epoch": 0.6551820790424088, "grad_norm": 0.380859375, "learning_rate": 8.481415508338843e-06, "loss": 1.9357, "step": 20307 }, { "epoch": 0.6552143428962052, "grad_norm": 0.43359375, "learning_rate": 8.479998181259638e-06, "loss": 1.9829, "step": 20308 }, { "epoch": 0.6552466067500015, "grad_norm": 0.37109375, "learning_rate": 8.47858092594792e-06, "loss": 1.9786, "step": 20309 }, { "epoch": 0.6552788706037979, "grad_norm": 0.412109375, "learning_rate": 8.477163742419313e-06, "loss": 1.9781, "step": 20310 }, { "epoch": 0.6553111344575943, "grad_norm": 0.3984375, "learning_rate": 8.475746630689408e-06, "loss": 1.9847, "step": 20311 }, { "epoch": 0.6553433983113905, "grad_norm": 0.384765625, "learning_rate": 8.474329590773797e-06, "loss": 1.9466, "step": 20312 }, { "epoch": 0.655375662165187, "grad_norm": 0.38671875, "learning_rate": 8.472912622688092e-06, "loss": 1.9733, "step": 20313 }, { "epoch": 0.6554079260189832, "grad_norm": 0.396484375, "learning_rate": 8.471495726447875e-06, "loss": 2.0115, "step": 20314 }, { "epoch": 0.6554401898727796, "grad_norm": 0.408203125, "learning_rate": 8.470078902068756e-06, "loss": 2.0007, "step": 20315 }, { "epoch": 0.6554724537265759, "grad_norm": 0.392578125, "learning_rate": 8.468662149566323e-06, "loss": 1.958, "step": 20316 }, { "epoch": 0.6555047175803723, "grad_norm": 0.3828125, "learning_rate": 8.467245468956167e-06, "loss": 1.9868, "step": 20317 }, { "epoch": 0.6555369814341686, "grad_norm": 0.40625, "learning_rate": 8.465828860253893e-06, "loss": 1.9869, "step": 20318 }, { "epoch": 0.655569245287965, "grad_norm": 0.447265625, "learning_rate": 8.464412323475084e-06, "loss": 1.9644, "step": 20319 }, { "epoch": 0.6556015091417613, "grad_norm": 0.408203125, "learning_rate": 8.462995858635333e-06, "loss": 2.0093, "step": 20320 }, { "epoch": 0.6556337729955577, "grad_norm": 0.404296875, "learning_rate": 8.461579465750237e-06, "loss": 1.9816, "step": 20321 }, { "epoch": 0.655666036849354, "grad_norm": 0.396484375, "learning_rate": 8.460163144835386e-06, "loss": 1.9927, "step": 20322 }, { "epoch": 0.6556983007031504, "grad_norm": 0.451171875, "learning_rate": 8.45874689590636e-06, "loss": 1.9517, "step": 20323 }, { "epoch": 0.6557305645569467, "grad_norm": 0.408203125, "learning_rate": 8.45733071897876e-06, "loss": 1.9648, "step": 20324 }, { "epoch": 0.6557628284107431, "grad_norm": 0.453125, "learning_rate": 8.455914614068173e-06, "loss": 1.9844, "step": 20325 }, { "epoch": 0.6557950922645394, "grad_norm": 0.421875, "learning_rate": 8.454498581190176e-06, "loss": 1.9706, "step": 20326 }, { "epoch": 0.6558273561183358, "grad_norm": 0.37890625, "learning_rate": 8.453082620360369e-06, "loss": 1.9822, "step": 20327 }, { "epoch": 0.655859619972132, "grad_norm": 0.427734375, "learning_rate": 8.451666731594331e-06, "loss": 2.0093, "step": 20328 }, { "epoch": 0.6558918838259284, "grad_norm": 0.421875, "learning_rate": 8.450250914907643e-06, "loss": 1.9466, "step": 20329 }, { "epoch": 0.6559241476797247, "grad_norm": 0.37890625, "learning_rate": 8.448835170315905e-06, "loss": 1.9657, "step": 20330 }, { "epoch": 0.6559564115335211, "grad_norm": 0.396484375, "learning_rate": 8.447419497834679e-06, "loss": 1.9821, "step": 20331 }, { "epoch": 0.6559886753873175, "grad_norm": 0.400390625, "learning_rate": 8.446003897479556e-06, "loss": 1.9687, "step": 20332 }, { "epoch": 0.6560209392411138, "grad_norm": 0.43359375, "learning_rate": 8.444588369266134e-06, "loss": 1.9693, "step": 20333 }, { "epoch": 0.6560532030949102, "grad_norm": 0.392578125, "learning_rate": 8.44317291320997e-06, "loss": 1.9854, "step": 20334 }, { "epoch": 0.6560854669487065, "grad_norm": 0.39453125, "learning_rate": 8.441757529326652e-06, "loss": 1.9744, "step": 20335 }, { "epoch": 0.6561177308025029, "grad_norm": 0.40234375, "learning_rate": 8.440342217631776e-06, "loss": 1.9841, "step": 20336 }, { "epoch": 0.6561499946562992, "grad_norm": 0.3828125, "learning_rate": 8.438926978140894e-06, "loss": 1.9534, "step": 20337 }, { "epoch": 0.6561822585100956, "grad_norm": 0.412109375, "learning_rate": 8.437511810869603e-06, "loss": 1.9909, "step": 20338 }, { "epoch": 0.6562145223638919, "grad_norm": 0.38671875, "learning_rate": 8.436096715833474e-06, "loss": 1.9987, "step": 20339 }, { "epoch": 0.6562467862176883, "grad_norm": 0.421875, "learning_rate": 8.434681693048077e-06, "loss": 1.9618, "step": 20340 }, { "epoch": 0.6562790500714846, "grad_norm": 0.376953125, "learning_rate": 8.433266742529002e-06, "loss": 1.9324, "step": 20341 }, { "epoch": 0.656311313925281, "grad_norm": 0.4296875, "learning_rate": 8.431851864291813e-06, "loss": 1.9789, "step": 20342 }, { "epoch": 0.6563435777790773, "grad_norm": 0.3984375, "learning_rate": 8.430437058352084e-06, "loss": 1.9841, "step": 20343 }, { "epoch": 0.6563758416328737, "grad_norm": 0.38671875, "learning_rate": 8.429022324725393e-06, "loss": 1.955, "step": 20344 }, { "epoch": 0.65640810548667, "grad_norm": 0.44140625, "learning_rate": 8.427607663427309e-06, "loss": 1.9409, "step": 20345 }, { "epoch": 0.6564403693404663, "grad_norm": 0.384765625, "learning_rate": 8.426193074473407e-06, "loss": 1.9712, "step": 20346 }, { "epoch": 0.6564726331942626, "grad_norm": 0.390625, "learning_rate": 8.424778557879254e-06, "loss": 1.998, "step": 20347 }, { "epoch": 0.656504897048059, "grad_norm": 0.43359375, "learning_rate": 8.423364113660422e-06, "loss": 1.9648, "step": 20348 }, { "epoch": 0.6565371609018553, "grad_norm": 0.369140625, "learning_rate": 8.42194974183248e-06, "loss": 1.9655, "step": 20349 }, { "epoch": 0.6565694247556517, "grad_norm": 0.36328125, "learning_rate": 8.420535442410998e-06, "loss": 1.9269, "step": 20350 }, { "epoch": 0.6566016886094481, "grad_norm": 0.41796875, "learning_rate": 8.419121215411537e-06, "loss": 1.8602, "step": 20351 }, { "epoch": 0.6566339524632444, "grad_norm": 0.357421875, "learning_rate": 8.417707060849673e-06, "loss": 1.8969, "step": 20352 }, { "epoch": 0.6566662163170408, "grad_norm": 0.37109375, "learning_rate": 8.416292978740967e-06, "loss": 1.882, "step": 20353 }, { "epoch": 0.6566984801708371, "grad_norm": 0.380859375, "learning_rate": 8.414878969100979e-06, "loss": 1.889, "step": 20354 }, { "epoch": 0.6567307440246335, "grad_norm": 0.35546875, "learning_rate": 8.413465031945288e-06, "loss": 1.9066, "step": 20355 }, { "epoch": 0.6567630078784298, "grad_norm": 0.419921875, "learning_rate": 8.412051167289447e-06, "loss": 1.883, "step": 20356 }, { "epoch": 0.6567952717322262, "grad_norm": 0.375, "learning_rate": 8.410637375149014e-06, "loss": 1.9111, "step": 20357 }, { "epoch": 0.6568275355860225, "grad_norm": 0.37109375, "learning_rate": 8.409223655539564e-06, "loss": 1.9113, "step": 20358 }, { "epoch": 0.6568597994398189, "grad_norm": 0.474609375, "learning_rate": 8.407810008476652e-06, "loss": 1.8227, "step": 20359 }, { "epoch": 0.6568920632936152, "grad_norm": 0.349609375, "learning_rate": 8.40639643397583e-06, "loss": 1.8494, "step": 20360 }, { "epoch": 0.6569243271474116, "grad_norm": 0.388671875, "learning_rate": 8.404982932052682e-06, "loss": 1.884, "step": 20361 }, { "epoch": 0.6569565910012078, "grad_norm": 0.376953125, "learning_rate": 8.403569502722735e-06, "loss": 1.8774, "step": 20362 }, { "epoch": 0.6569888548550042, "grad_norm": 0.36328125, "learning_rate": 8.402156146001565e-06, "loss": 1.8761, "step": 20363 }, { "epoch": 0.6570211187088005, "grad_norm": 0.373046875, "learning_rate": 8.400742861904737e-06, "loss": 1.8526, "step": 20364 }, { "epoch": 0.6570533825625969, "grad_norm": 0.361328125, "learning_rate": 8.399329650447786e-06, "loss": 1.8668, "step": 20365 }, { "epoch": 0.6570856464163932, "grad_norm": 0.357421875, "learning_rate": 8.397916511646281e-06, "loss": 1.8855, "step": 20366 }, { "epoch": 0.6571179102701896, "grad_norm": 0.365234375, "learning_rate": 8.396503445515784e-06, "loss": 1.8608, "step": 20367 }, { "epoch": 0.6571501741239859, "grad_norm": 0.353515625, "learning_rate": 8.395090452071828e-06, "loss": 1.9294, "step": 20368 }, { "epoch": 0.6571824379777823, "grad_norm": 0.34765625, "learning_rate": 8.393677531329986e-06, "loss": 1.926, "step": 20369 }, { "epoch": 0.6572147018315786, "grad_norm": 0.396484375, "learning_rate": 8.3922646833058e-06, "loss": 1.86, "step": 20370 }, { "epoch": 0.657246965685375, "grad_norm": 0.3828125, "learning_rate": 8.390851908014819e-06, "loss": 1.8663, "step": 20371 }, { "epoch": 0.6572792295391714, "grad_norm": 0.357421875, "learning_rate": 8.389439205472603e-06, "loss": 1.8749, "step": 20372 }, { "epoch": 0.6573114933929677, "grad_norm": 0.365234375, "learning_rate": 8.3880265756947e-06, "loss": 1.864, "step": 20373 }, { "epoch": 0.6573437572467641, "grad_norm": 0.375, "learning_rate": 8.386614018696652e-06, "loss": 1.8526, "step": 20374 }, { "epoch": 0.6573760211005604, "grad_norm": 0.37890625, "learning_rate": 8.385201534494015e-06, "loss": 1.8553, "step": 20375 }, { "epoch": 0.6574082849543568, "grad_norm": 0.380859375, "learning_rate": 8.383789123102338e-06, "loss": 1.928, "step": 20376 }, { "epoch": 0.6574405488081531, "grad_norm": 0.361328125, "learning_rate": 8.382376784537156e-06, "loss": 1.9013, "step": 20377 }, { "epoch": 0.6574728126619495, "grad_norm": 0.384765625, "learning_rate": 8.38096451881403e-06, "loss": 1.8799, "step": 20378 }, { "epoch": 0.6575050765157457, "grad_norm": 0.359375, "learning_rate": 8.37955232594849e-06, "loss": 1.8747, "step": 20379 }, { "epoch": 0.6575373403695421, "grad_norm": 0.3828125, "learning_rate": 8.378140205956095e-06, "loss": 1.8919, "step": 20380 }, { "epoch": 0.6575696042233384, "grad_norm": 0.392578125, "learning_rate": 8.376728158852383e-06, "loss": 1.8741, "step": 20381 }, { "epoch": 0.6576018680771348, "grad_norm": 0.345703125, "learning_rate": 8.37531618465289e-06, "loss": 1.888, "step": 20382 }, { "epoch": 0.6576341319309311, "grad_norm": 0.40625, "learning_rate": 8.373904283373169e-06, "loss": 1.9407, "step": 20383 }, { "epoch": 0.6576663957847275, "grad_norm": 0.353515625, "learning_rate": 8.372492455028757e-06, "loss": 1.886, "step": 20384 }, { "epoch": 0.6576986596385238, "grad_norm": 0.337890625, "learning_rate": 8.371080699635186e-06, "loss": 1.8668, "step": 20385 }, { "epoch": 0.6577309234923202, "grad_norm": 0.36328125, "learning_rate": 8.36966901720801e-06, "loss": 1.861, "step": 20386 }, { "epoch": 0.6577631873461165, "grad_norm": 0.3828125, "learning_rate": 8.368257407762764e-06, "loss": 1.8627, "step": 20387 }, { "epoch": 0.6577954511999129, "grad_norm": 0.376953125, "learning_rate": 8.366845871314972e-06, "loss": 1.8866, "step": 20388 }, { "epoch": 0.6578277150537092, "grad_norm": 0.40625, "learning_rate": 8.365434407880189e-06, "loss": 1.8579, "step": 20389 }, { "epoch": 0.6578599789075056, "grad_norm": 0.357421875, "learning_rate": 8.364023017473945e-06, "loss": 1.8957, "step": 20390 }, { "epoch": 0.657892242761302, "grad_norm": 0.39453125, "learning_rate": 8.362611700111771e-06, "loss": 1.9086, "step": 20391 }, { "epoch": 0.6579245066150983, "grad_norm": 0.365234375, "learning_rate": 8.361200455809216e-06, "loss": 1.8799, "step": 20392 }, { "epoch": 0.6579567704688947, "grad_norm": 0.365234375, "learning_rate": 8.359789284581792e-06, "loss": 1.8906, "step": 20393 }, { "epoch": 0.657989034322691, "grad_norm": 0.39453125, "learning_rate": 8.358378186445043e-06, "loss": 1.8473, "step": 20394 }, { "epoch": 0.6580212981764874, "grad_norm": 0.365234375, "learning_rate": 8.356967161414515e-06, "loss": 1.8523, "step": 20395 }, { "epoch": 0.6580535620302836, "grad_norm": 0.392578125, "learning_rate": 8.355556209505715e-06, "loss": 1.8468, "step": 20396 }, { "epoch": 0.65808582588408, "grad_norm": 0.384765625, "learning_rate": 8.354145330734187e-06, "loss": 1.8757, "step": 20397 }, { "epoch": 0.6581180897378763, "grad_norm": 0.416015625, "learning_rate": 8.352734525115467e-06, "loss": 1.8827, "step": 20398 }, { "epoch": 0.6581503535916727, "grad_norm": 0.35546875, "learning_rate": 8.35132379266507e-06, "loss": 1.8636, "step": 20399 }, { "epoch": 0.658182617445469, "grad_norm": 0.35546875, "learning_rate": 8.349913133398533e-06, "loss": 1.8379, "step": 20400 }, { "epoch": 0.6582148812992654, "grad_norm": 0.416015625, "learning_rate": 8.348502547331383e-06, "loss": 1.8751, "step": 20401 }, { "epoch": 0.6582471451530617, "grad_norm": 0.359375, "learning_rate": 8.347092034479138e-06, "loss": 1.8755, "step": 20402 }, { "epoch": 0.6582794090068581, "grad_norm": 0.36328125, "learning_rate": 8.345681594857339e-06, "loss": 1.8938, "step": 20403 }, { "epoch": 0.6583116728606544, "grad_norm": 0.353515625, "learning_rate": 8.344271228481501e-06, "loss": 1.8821, "step": 20404 }, { "epoch": 0.6583439367144508, "grad_norm": 0.353515625, "learning_rate": 8.342860935367149e-06, "loss": 1.8212, "step": 20405 }, { "epoch": 0.6583762005682471, "grad_norm": 0.3984375, "learning_rate": 8.34145071552981e-06, "loss": 1.8482, "step": 20406 }, { "epoch": 0.6584084644220435, "grad_norm": 0.42578125, "learning_rate": 8.340040568985005e-06, "loss": 1.8301, "step": 20407 }, { "epoch": 0.6584407282758398, "grad_norm": 0.388671875, "learning_rate": 8.33863049574825e-06, "loss": 1.9011, "step": 20408 }, { "epoch": 0.6584729921296362, "grad_norm": 0.416015625, "learning_rate": 8.337220495835078e-06, "loss": 1.8983, "step": 20409 }, { "epoch": 0.6585052559834325, "grad_norm": 0.404296875, "learning_rate": 8.335810569261003e-06, "loss": 1.8901, "step": 20410 }, { "epoch": 0.6585375198372289, "grad_norm": 0.419921875, "learning_rate": 8.334400716041537e-06, "loss": 1.8902, "step": 20411 }, { "epoch": 0.6585697836910253, "grad_norm": 0.59375, "learning_rate": 8.332990936192214e-06, "loss": 1.9044, "step": 20412 }, { "epoch": 0.6586020475448215, "grad_norm": 0.3671875, "learning_rate": 8.331581229728537e-06, "loss": 1.9133, "step": 20413 }, { "epoch": 0.6586343113986179, "grad_norm": 0.474609375, "learning_rate": 8.330171596666035e-06, "loss": 1.8903, "step": 20414 }, { "epoch": 0.6586665752524142, "grad_norm": 0.478515625, "learning_rate": 8.32876203702022e-06, "loss": 1.8696, "step": 20415 }, { "epoch": 0.6586988391062106, "grad_norm": 0.345703125, "learning_rate": 8.327352550806603e-06, "loss": 1.8938, "step": 20416 }, { "epoch": 0.6587311029600069, "grad_norm": 0.453125, "learning_rate": 8.325943138040704e-06, "loss": 1.8922, "step": 20417 }, { "epoch": 0.6587633668138033, "grad_norm": 0.53125, "learning_rate": 8.324533798738037e-06, "loss": 1.8557, "step": 20418 }, { "epoch": 0.6587956306675996, "grad_norm": 0.3828125, "learning_rate": 8.323124532914107e-06, "loss": 1.8581, "step": 20419 }, { "epoch": 0.658827894521396, "grad_norm": 0.421875, "learning_rate": 8.321715340584443e-06, "loss": 1.8714, "step": 20420 }, { "epoch": 0.6588601583751923, "grad_norm": 0.43359375, "learning_rate": 8.320306221764536e-06, "loss": 1.9138, "step": 20421 }, { "epoch": 0.6588924222289887, "grad_norm": 0.36328125, "learning_rate": 8.318897176469907e-06, "loss": 1.9003, "step": 20422 }, { "epoch": 0.658924686082785, "grad_norm": 0.37890625, "learning_rate": 8.317488204716073e-06, "loss": 1.8812, "step": 20423 }, { "epoch": 0.6589569499365814, "grad_norm": 0.38671875, "learning_rate": 8.316079306518524e-06, "loss": 1.8728, "step": 20424 }, { "epoch": 0.6589892137903777, "grad_norm": 0.376953125, "learning_rate": 8.31467048189278e-06, "loss": 1.8821, "step": 20425 }, { "epoch": 0.6590214776441741, "grad_norm": 0.40234375, "learning_rate": 8.313261730854358e-06, "loss": 1.8958, "step": 20426 }, { "epoch": 0.6590537414979704, "grad_norm": 0.3984375, "learning_rate": 8.311853053418743e-06, "loss": 1.8819, "step": 20427 }, { "epoch": 0.6590860053517668, "grad_norm": 0.345703125, "learning_rate": 8.31044444960145e-06, "loss": 1.8781, "step": 20428 }, { "epoch": 0.659118269205563, "grad_norm": 0.36328125, "learning_rate": 8.309035919418e-06, "loss": 1.9619, "step": 20429 }, { "epoch": 0.6591505330593594, "grad_norm": 0.37890625, "learning_rate": 8.307627462883868e-06, "loss": 1.881, "step": 20430 }, { "epoch": 0.6591827969131557, "grad_norm": 0.34375, "learning_rate": 8.30621908001458e-06, "loss": 1.8906, "step": 20431 }, { "epoch": 0.6592150607669521, "grad_norm": 0.361328125, "learning_rate": 8.304810770825628e-06, "loss": 1.8815, "step": 20432 }, { "epoch": 0.6592473246207485, "grad_norm": 0.3671875, "learning_rate": 8.303402535332512e-06, "loss": 1.8477, "step": 20433 }, { "epoch": 0.6592795884745448, "grad_norm": 0.35546875, "learning_rate": 8.30199437355074e-06, "loss": 1.8634, "step": 20434 }, { "epoch": 0.6593118523283412, "grad_norm": 0.341796875, "learning_rate": 8.30058628549581e-06, "loss": 1.8853, "step": 20435 }, { "epoch": 0.6593441161821375, "grad_norm": 0.380859375, "learning_rate": 8.299178271183215e-06, "loss": 1.8636, "step": 20436 }, { "epoch": 0.6593763800359339, "grad_norm": 0.34765625, "learning_rate": 8.297770330628464e-06, "loss": 1.8637, "step": 20437 }, { "epoch": 0.6594086438897302, "grad_norm": 0.357421875, "learning_rate": 8.29636246384705e-06, "loss": 1.8899, "step": 20438 }, { "epoch": 0.6594409077435266, "grad_norm": 0.349609375, "learning_rate": 8.294954670854462e-06, "loss": 1.9033, "step": 20439 }, { "epoch": 0.6594731715973229, "grad_norm": 0.35546875, "learning_rate": 8.293546951666208e-06, "loss": 1.8956, "step": 20440 }, { "epoch": 0.6595054354511193, "grad_norm": 0.36328125, "learning_rate": 8.29213930629778e-06, "loss": 1.9092, "step": 20441 }, { "epoch": 0.6595376993049156, "grad_norm": 0.35546875, "learning_rate": 8.290731734764663e-06, "loss": 1.8962, "step": 20442 }, { "epoch": 0.659569963158712, "grad_norm": 0.375, "learning_rate": 8.289324237082363e-06, "loss": 1.8795, "step": 20443 }, { "epoch": 0.6596022270125083, "grad_norm": 0.361328125, "learning_rate": 8.287916813266368e-06, "loss": 1.9077, "step": 20444 }, { "epoch": 0.6596344908663047, "grad_norm": 0.359375, "learning_rate": 8.286509463332164e-06, "loss": 1.8793, "step": 20445 }, { "epoch": 0.659666754720101, "grad_norm": 0.33984375, "learning_rate": 8.285102187295255e-06, "loss": 1.8847, "step": 20446 }, { "epoch": 0.6596990185738973, "grad_norm": 0.36328125, "learning_rate": 8.283694985171116e-06, "loss": 1.8809, "step": 20447 }, { "epoch": 0.6597312824276936, "grad_norm": 0.375, "learning_rate": 8.28228785697525e-06, "loss": 1.8764, "step": 20448 }, { "epoch": 0.65976354628149, "grad_norm": 0.3828125, "learning_rate": 8.280880802723142e-06, "loss": 1.8961, "step": 20449 }, { "epoch": 0.6597958101352863, "grad_norm": 0.3515625, "learning_rate": 8.27947382243027e-06, "loss": 1.8621, "step": 20450 }, { "epoch": 0.6598280739890827, "grad_norm": 0.37109375, "learning_rate": 8.278066916112141e-06, "loss": 1.8562, "step": 20451 }, { "epoch": 0.6598603378428791, "grad_norm": 0.388671875, "learning_rate": 8.276660083784217e-06, "loss": 1.9355, "step": 20452 }, { "epoch": 0.6598926016966754, "grad_norm": 0.349609375, "learning_rate": 8.275253325461995e-06, "loss": 1.8854, "step": 20453 }, { "epoch": 0.6599248655504718, "grad_norm": 0.353515625, "learning_rate": 8.273846641160974e-06, "loss": 1.8606, "step": 20454 }, { "epoch": 0.6599571294042681, "grad_norm": 0.373046875, "learning_rate": 8.272440030896613e-06, "loss": 1.9121, "step": 20455 }, { "epoch": 0.6599893932580645, "grad_norm": 0.365234375, "learning_rate": 8.271033494684403e-06, "loss": 1.8928, "step": 20456 }, { "epoch": 0.6600216571118608, "grad_norm": 0.357421875, "learning_rate": 8.269627032539841e-06, "loss": 1.8748, "step": 20457 }, { "epoch": 0.6600539209656572, "grad_norm": 0.3515625, "learning_rate": 8.268220644478385e-06, "loss": 1.8829, "step": 20458 }, { "epoch": 0.6600861848194535, "grad_norm": 0.359375, "learning_rate": 8.266814330515527e-06, "loss": 1.8327, "step": 20459 }, { "epoch": 0.6601184486732499, "grad_norm": 0.357421875, "learning_rate": 8.265408090666758e-06, "loss": 1.8727, "step": 20460 }, { "epoch": 0.6601507125270462, "grad_norm": 0.359375, "learning_rate": 8.264001924947534e-06, "loss": 1.8941, "step": 20461 }, { "epoch": 0.6601829763808426, "grad_norm": 0.353515625, "learning_rate": 8.26259583337335e-06, "loss": 1.8862, "step": 20462 }, { "epoch": 0.6602152402346388, "grad_norm": 0.36328125, "learning_rate": 8.261189815959679e-06, "loss": 1.9136, "step": 20463 }, { "epoch": 0.6602475040884352, "grad_norm": 0.36328125, "learning_rate": 8.259783872721988e-06, "loss": 1.8784, "step": 20464 }, { "epoch": 0.6602797679422315, "grad_norm": 0.345703125, "learning_rate": 8.258378003675767e-06, "loss": 1.904, "step": 20465 }, { "epoch": 0.6603120317960279, "grad_norm": 0.359375, "learning_rate": 8.256972208836486e-06, "loss": 1.9012, "step": 20466 }, { "epoch": 0.6603442956498242, "grad_norm": 0.359375, "learning_rate": 8.25556648821961e-06, "loss": 1.8907, "step": 20467 }, { "epoch": 0.6603765595036206, "grad_norm": 0.375, "learning_rate": 8.254160841840627e-06, "loss": 1.8962, "step": 20468 }, { "epoch": 0.6604088233574169, "grad_norm": 0.365234375, "learning_rate": 8.252755269715e-06, "loss": 1.8898, "step": 20469 }, { "epoch": 0.6604410872112133, "grad_norm": 0.345703125, "learning_rate": 8.251349771858198e-06, "loss": 1.8744, "step": 20470 }, { "epoch": 0.6604733510650096, "grad_norm": 0.353515625, "learning_rate": 8.249944348285702e-06, "loss": 1.8624, "step": 20471 }, { "epoch": 0.660505614918806, "grad_norm": 0.349609375, "learning_rate": 8.248538999012977e-06, "loss": 1.8746, "step": 20472 }, { "epoch": 0.6605378787726024, "grad_norm": 0.349609375, "learning_rate": 8.247133724055484e-06, "loss": 1.8423, "step": 20473 }, { "epoch": 0.6605701426263987, "grad_norm": 0.361328125, "learning_rate": 8.245728523428705e-06, "loss": 1.9253, "step": 20474 }, { "epoch": 0.6606024064801951, "grad_norm": 0.34765625, "learning_rate": 8.2443233971481e-06, "loss": 1.9195, "step": 20475 }, { "epoch": 0.6606346703339914, "grad_norm": 0.34375, "learning_rate": 8.242918345229133e-06, "loss": 1.9, "step": 20476 }, { "epoch": 0.6606669341877878, "grad_norm": 0.34375, "learning_rate": 8.241513367687278e-06, "loss": 1.8825, "step": 20477 }, { "epoch": 0.6606991980415841, "grad_norm": 0.36328125, "learning_rate": 8.240108464537987e-06, "loss": 1.8658, "step": 20478 }, { "epoch": 0.6607314618953805, "grad_norm": 0.349609375, "learning_rate": 8.23870363579674e-06, "loss": 1.9212, "step": 20479 }, { "epoch": 0.6607637257491767, "grad_norm": 0.353515625, "learning_rate": 8.237298881478995e-06, "loss": 1.8516, "step": 20480 }, { "epoch": 0.6607959896029731, "grad_norm": 0.365234375, "learning_rate": 8.235894201600205e-06, "loss": 1.8648, "step": 20481 }, { "epoch": 0.6608282534567694, "grad_norm": 0.361328125, "learning_rate": 8.23448959617585e-06, "loss": 1.924, "step": 20482 }, { "epoch": 0.6608605173105658, "grad_norm": 0.361328125, "learning_rate": 8.233085065221369e-06, "loss": 1.9042, "step": 20483 }, { "epoch": 0.6608927811643621, "grad_norm": 0.357421875, "learning_rate": 8.231680608752233e-06, "loss": 1.883, "step": 20484 }, { "epoch": 0.6609250450181585, "grad_norm": 0.3515625, "learning_rate": 8.230276226783913e-06, "loss": 1.8828, "step": 20485 }, { "epoch": 0.6609573088719548, "grad_norm": 0.349609375, "learning_rate": 8.228871919331845e-06, "loss": 1.8627, "step": 20486 }, { "epoch": 0.6609895727257512, "grad_norm": 0.357421875, "learning_rate": 8.227467686411496e-06, "loss": 1.8933, "step": 20487 }, { "epoch": 0.6610218365795475, "grad_norm": 0.36328125, "learning_rate": 8.226063528038336e-06, "loss": 1.8635, "step": 20488 }, { "epoch": 0.6610541004333439, "grad_norm": 0.373046875, "learning_rate": 8.224659444227799e-06, "loss": 1.8761, "step": 20489 }, { "epoch": 0.6610863642871402, "grad_norm": 0.373046875, "learning_rate": 8.223255434995352e-06, "loss": 1.882, "step": 20490 }, { "epoch": 0.6611186281409366, "grad_norm": 0.3515625, "learning_rate": 8.22185150035645e-06, "loss": 1.8705, "step": 20491 }, { "epoch": 0.661150891994733, "grad_norm": 0.40625, "learning_rate": 8.220447640326539e-06, "loss": 1.8506, "step": 20492 }, { "epoch": 0.6611831558485293, "grad_norm": 0.36328125, "learning_rate": 8.219043854921082e-06, "loss": 1.8571, "step": 20493 }, { "epoch": 0.6612154197023257, "grad_norm": 0.375, "learning_rate": 8.217640144155524e-06, "loss": 1.8547, "step": 20494 }, { "epoch": 0.661247683556122, "grad_norm": 0.35546875, "learning_rate": 8.216236508045315e-06, "loss": 1.8515, "step": 20495 }, { "epoch": 0.6612799474099184, "grad_norm": 0.3671875, "learning_rate": 8.214832946605913e-06, "loss": 1.8762, "step": 20496 }, { "epoch": 0.6613122112637146, "grad_norm": 0.375, "learning_rate": 8.21342945985276e-06, "loss": 1.8918, "step": 20497 }, { "epoch": 0.661344475117511, "grad_norm": 0.400390625, "learning_rate": 8.212026047801303e-06, "loss": 1.8935, "step": 20498 }, { "epoch": 0.6613767389713073, "grad_norm": 0.384765625, "learning_rate": 8.210622710466998e-06, "loss": 1.8502, "step": 20499 }, { "epoch": 0.6614090028251037, "grad_norm": 0.408203125, "learning_rate": 8.209219447865287e-06, "loss": 1.8592, "step": 20500 }, { "epoch": 0.6614412666789, "grad_norm": 0.365234375, "learning_rate": 8.207816260011613e-06, "loss": 1.8589, "step": 20501 }, { "epoch": 0.6614735305326964, "grad_norm": 0.3671875, "learning_rate": 8.20641314692143e-06, "loss": 1.8607, "step": 20502 }, { "epoch": 0.6615057943864927, "grad_norm": 0.416015625, "learning_rate": 8.205010108610179e-06, "loss": 1.8854, "step": 20503 }, { "epoch": 0.6615380582402891, "grad_norm": 0.388671875, "learning_rate": 8.203607145093294e-06, "loss": 1.8728, "step": 20504 }, { "epoch": 0.6615703220940854, "grad_norm": 0.447265625, "learning_rate": 8.202204256386231e-06, "loss": 1.8398, "step": 20505 }, { "epoch": 0.6616025859478818, "grad_norm": 0.353515625, "learning_rate": 8.200801442504427e-06, "loss": 1.8772, "step": 20506 }, { "epoch": 0.6616348498016781, "grad_norm": 0.498046875, "learning_rate": 8.199398703463318e-06, "loss": 1.8789, "step": 20507 }, { "epoch": 0.6616671136554745, "grad_norm": 0.373046875, "learning_rate": 8.197996039278355e-06, "loss": 1.858, "step": 20508 }, { "epoch": 0.6616993775092708, "grad_norm": 0.42578125, "learning_rate": 8.196593449964974e-06, "loss": 1.8796, "step": 20509 }, { "epoch": 0.6617316413630672, "grad_norm": 0.359375, "learning_rate": 8.195190935538603e-06, "loss": 1.9015, "step": 20510 }, { "epoch": 0.6617639052168635, "grad_norm": 0.404296875, "learning_rate": 8.193788496014694e-06, "loss": 1.874, "step": 20511 }, { "epoch": 0.6617961690706599, "grad_norm": 0.361328125, "learning_rate": 8.192386131408674e-06, "loss": 1.8549, "step": 20512 }, { "epoch": 0.6618284329244563, "grad_norm": 0.39453125, "learning_rate": 8.190983841735996e-06, "loss": 1.9123, "step": 20513 }, { "epoch": 0.6618606967782525, "grad_norm": 0.369140625, "learning_rate": 8.18958162701207e-06, "loss": 1.8292, "step": 20514 }, { "epoch": 0.6618929606320489, "grad_norm": 0.353515625, "learning_rate": 8.188179487252345e-06, "loss": 1.8348, "step": 20515 }, { "epoch": 0.6619252244858452, "grad_norm": 0.365234375, "learning_rate": 8.186777422472263e-06, "loss": 1.841, "step": 20516 }, { "epoch": 0.6619574883396416, "grad_norm": 0.390625, "learning_rate": 8.185375432687236e-06, "loss": 1.8533, "step": 20517 }, { "epoch": 0.6619897521934379, "grad_norm": 0.36328125, "learning_rate": 8.18397351791271e-06, "loss": 1.8781, "step": 20518 }, { "epoch": 0.6620220160472343, "grad_norm": 0.361328125, "learning_rate": 8.182571678164125e-06, "loss": 1.8799, "step": 20519 }, { "epoch": 0.6620542799010306, "grad_norm": 0.359375, "learning_rate": 8.181169913456885e-06, "loss": 1.8358, "step": 20520 }, { "epoch": 0.662086543754827, "grad_norm": 0.37109375, "learning_rate": 8.179768223806444e-06, "loss": 1.8352, "step": 20521 }, { "epoch": 0.6621188076086233, "grad_norm": 0.365234375, "learning_rate": 8.17836660922822e-06, "loss": 1.8704, "step": 20522 }, { "epoch": 0.6621510714624197, "grad_norm": 0.37109375, "learning_rate": 8.176965069737636e-06, "loss": 1.866, "step": 20523 }, { "epoch": 0.662183335316216, "grad_norm": 0.392578125, "learning_rate": 8.175563605350135e-06, "loss": 1.8513, "step": 20524 }, { "epoch": 0.6622155991700124, "grad_norm": 0.353515625, "learning_rate": 8.17416221608113e-06, "loss": 1.8174, "step": 20525 }, { "epoch": 0.6622478630238087, "grad_norm": 0.431640625, "learning_rate": 8.172760901946047e-06, "loss": 1.8149, "step": 20526 }, { "epoch": 0.6622801268776051, "grad_norm": 0.359375, "learning_rate": 8.17135966296032e-06, "loss": 1.815, "step": 20527 }, { "epoch": 0.6623123907314014, "grad_norm": 0.388671875, "learning_rate": 8.169958499139365e-06, "loss": 1.8227, "step": 20528 }, { "epoch": 0.6623446545851978, "grad_norm": 0.392578125, "learning_rate": 8.168557410498603e-06, "loss": 1.8953, "step": 20529 }, { "epoch": 0.662376918438994, "grad_norm": 0.357421875, "learning_rate": 8.167156397053467e-06, "loss": 1.8221, "step": 20530 }, { "epoch": 0.6624091822927904, "grad_norm": 0.396484375, "learning_rate": 8.165755458819368e-06, "loss": 1.8637, "step": 20531 }, { "epoch": 0.6624414461465867, "grad_norm": 0.33984375, "learning_rate": 8.164354595811727e-06, "loss": 1.8616, "step": 20532 }, { "epoch": 0.6624737100003831, "grad_norm": 0.357421875, "learning_rate": 8.162953808045972e-06, "loss": 1.8502, "step": 20533 }, { "epoch": 0.6625059738541795, "grad_norm": 0.404296875, "learning_rate": 8.161553095537517e-06, "loss": 1.8836, "step": 20534 }, { "epoch": 0.6625382377079758, "grad_norm": 0.359375, "learning_rate": 8.160152458301774e-06, "loss": 1.8797, "step": 20535 }, { "epoch": 0.6625705015617722, "grad_norm": 0.4453125, "learning_rate": 8.15875189635417e-06, "loss": 1.8417, "step": 20536 }, { "epoch": 0.6626027654155685, "grad_norm": 0.384765625, "learning_rate": 8.157351409710118e-06, "loss": 1.8993, "step": 20537 }, { "epoch": 0.6626350292693649, "grad_norm": 0.376953125, "learning_rate": 8.155950998385027e-06, "loss": 1.9059, "step": 20538 }, { "epoch": 0.6626672931231612, "grad_norm": 0.36328125, "learning_rate": 8.154550662394325e-06, "loss": 1.855, "step": 20539 }, { "epoch": 0.6626995569769576, "grad_norm": 0.3671875, "learning_rate": 8.153150401753416e-06, "loss": 1.8672, "step": 20540 }, { "epoch": 0.6627318208307539, "grad_norm": 0.412109375, "learning_rate": 8.15175021647771e-06, "loss": 1.8285, "step": 20541 }, { "epoch": 0.6627640846845503, "grad_norm": 0.38671875, "learning_rate": 8.15035010658263e-06, "loss": 1.916, "step": 20542 }, { "epoch": 0.6627963485383466, "grad_norm": 0.451171875, "learning_rate": 8.148950072083584e-06, "loss": 1.89, "step": 20543 }, { "epoch": 0.662828612392143, "grad_norm": 0.369140625, "learning_rate": 8.14755011299598e-06, "loss": 1.8694, "step": 20544 }, { "epoch": 0.6628608762459393, "grad_norm": 0.408203125, "learning_rate": 8.146150229335222e-06, "loss": 1.844, "step": 20545 }, { "epoch": 0.6628931400997357, "grad_norm": 0.40234375, "learning_rate": 8.144750421116726e-06, "loss": 1.8906, "step": 20546 }, { "epoch": 0.6629254039535319, "grad_norm": 0.3671875, "learning_rate": 8.143350688355911e-06, "loss": 1.8916, "step": 20547 }, { "epoch": 0.6629576678073283, "grad_norm": 0.416015625, "learning_rate": 8.14195103106816e-06, "loss": 1.8793, "step": 20548 }, { "epoch": 0.6629899316611246, "grad_norm": 0.37890625, "learning_rate": 8.140551449268893e-06, "loss": 1.8949, "step": 20549 }, { "epoch": 0.663022195514921, "grad_norm": 0.373046875, "learning_rate": 8.139151942973526e-06, "loss": 1.8527, "step": 20550 }, { "epoch": 0.6630544593687173, "grad_norm": 0.447265625, "learning_rate": 8.137752512197442e-06, "loss": 1.8768, "step": 20551 }, { "epoch": 0.6630867232225137, "grad_norm": 0.375, "learning_rate": 8.136353156956061e-06, "loss": 1.8936, "step": 20552 }, { "epoch": 0.6631189870763101, "grad_norm": 0.376953125, "learning_rate": 8.134953877264779e-06, "loss": 1.8761, "step": 20553 }, { "epoch": 0.6631512509301064, "grad_norm": 0.48828125, "learning_rate": 8.133554673138994e-06, "loss": 1.9026, "step": 20554 }, { "epoch": 0.6631835147839028, "grad_norm": 0.38671875, "learning_rate": 8.132155544594123e-06, "loss": 1.8771, "step": 20555 }, { "epoch": 0.6632157786376991, "grad_norm": 0.470703125, "learning_rate": 8.130756491645553e-06, "loss": 1.8804, "step": 20556 }, { "epoch": 0.6632480424914955, "grad_norm": 0.392578125, "learning_rate": 8.129357514308681e-06, "loss": 1.8542, "step": 20557 }, { "epoch": 0.6632803063452918, "grad_norm": 0.435546875, "learning_rate": 8.12795861259892e-06, "loss": 1.8358, "step": 20558 }, { "epoch": 0.6633125701990882, "grad_norm": 0.447265625, "learning_rate": 8.12655978653166e-06, "loss": 1.8586, "step": 20559 }, { "epoch": 0.6633448340528845, "grad_norm": 0.3671875, "learning_rate": 8.125161036122292e-06, "loss": 1.9053, "step": 20560 }, { "epoch": 0.6633770979066809, "grad_norm": 0.443359375, "learning_rate": 8.123762361386229e-06, "loss": 1.87, "step": 20561 }, { "epoch": 0.6634093617604772, "grad_norm": 0.408203125, "learning_rate": 8.122363762338852e-06, "loss": 1.8755, "step": 20562 }, { "epoch": 0.6634416256142736, "grad_norm": 0.376953125, "learning_rate": 8.120965238995557e-06, "loss": 1.9051, "step": 20563 }, { "epoch": 0.6634738894680698, "grad_norm": 0.38671875, "learning_rate": 8.119566791371749e-06, "loss": 1.8809, "step": 20564 }, { "epoch": 0.6635061533218662, "grad_norm": 0.419921875, "learning_rate": 8.118168419482812e-06, "loss": 1.8966, "step": 20565 }, { "epoch": 0.6635384171756625, "grad_norm": 0.35546875, "learning_rate": 8.116770123344134e-06, "loss": 1.8602, "step": 20566 }, { "epoch": 0.6635706810294589, "grad_norm": 0.375, "learning_rate": 8.11537190297112e-06, "loss": 1.8871, "step": 20567 }, { "epoch": 0.6636029448832552, "grad_norm": 0.373046875, "learning_rate": 8.113973758379154e-06, "loss": 1.8319, "step": 20568 }, { "epoch": 0.6636352087370516, "grad_norm": 0.40625, "learning_rate": 8.112575689583614e-06, "loss": 1.8737, "step": 20569 }, { "epoch": 0.6636674725908479, "grad_norm": 0.40234375, "learning_rate": 8.111177696599911e-06, "loss": 1.8332, "step": 20570 }, { "epoch": 0.6636997364446443, "grad_norm": 0.375, "learning_rate": 8.109779779443419e-06, "loss": 1.8993, "step": 20571 }, { "epoch": 0.6637320002984406, "grad_norm": 0.390625, "learning_rate": 8.10838193812953e-06, "loss": 1.8491, "step": 20572 }, { "epoch": 0.663764264152237, "grad_norm": 0.359375, "learning_rate": 8.106984172673621e-06, "loss": 1.8881, "step": 20573 }, { "epoch": 0.6637965280060334, "grad_norm": 0.361328125, "learning_rate": 8.105586483091092e-06, "loss": 1.8638, "step": 20574 }, { "epoch": 0.6638287918598297, "grad_norm": 0.37890625, "learning_rate": 8.104188869397319e-06, "loss": 1.9328, "step": 20575 }, { "epoch": 0.6638610557136261, "grad_norm": 0.361328125, "learning_rate": 8.102791331607685e-06, "loss": 1.9081, "step": 20576 }, { "epoch": 0.6638933195674224, "grad_norm": 0.3515625, "learning_rate": 8.101393869737573e-06, "loss": 1.856, "step": 20577 }, { "epoch": 0.6639255834212188, "grad_norm": 0.357421875, "learning_rate": 8.099996483802383e-06, "loss": 1.9048, "step": 20578 }, { "epoch": 0.6639578472750151, "grad_norm": 0.388671875, "learning_rate": 8.098599173817467e-06, "loss": 1.8437, "step": 20579 }, { "epoch": 0.6639901111288115, "grad_norm": 0.359375, "learning_rate": 8.097201939798222e-06, "loss": 1.8732, "step": 20580 }, { "epoch": 0.6640223749826077, "grad_norm": 0.416015625, "learning_rate": 8.095804781760038e-06, "loss": 1.8957, "step": 20581 }, { "epoch": 0.6640546388364041, "grad_norm": 0.359375, "learning_rate": 8.09440769971827e-06, "loss": 1.8621, "step": 20582 }, { "epoch": 0.6640869026902004, "grad_norm": 0.37109375, "learning_rate": 8.093010693688313e-06, "loss": 1.8845, "step": 20583 }, { "epoch": 0.6641191665439968, "grad_norm": 0.36328125, "learning_rate": 8.09161376368554e-06, "loss": 1.8746, "step": 20584 }, { "epoch": 0.6641514303977931, "grad_norm": 0.3515625, "learning_rate": 8.09021690972532e-06, "loss": 1.8388, "step": 20585 }, { "epoch": 0.6641836942515895, "grad_norm": 0.3671875, "learning_rate": 8.088820131823042e-06, "loss": 1.9272, "step": 20586 }, { "epoch": 0.6642159581053858, "grad_norm": 0.361328125, "learning_rate": 8.087423429994073e-06, "loss": 1.8782, "step": 20587 }, { "epoch": 0.6642482219591822, "grad_norm": 0.37109375, "learning_rate": 8.086026804253782e-06, "loss": 1.89, "step": 20588 }, { "epoch": 0.6642804858129785, "grad_norm": 0.36328125, "learning_rate": 8.084630254617554e-06, "loss": 1.8619, "step": 20589 }, { "epoch": 0.6643127496667749, "grad_norm": 0.359375, "learning_rate": 8.083233781100758e-06, "loss": 1.8539, "step": 20590 }, { "epoch": 0.6643450135205712, "grad_norm": 0.357421875, "learning_rate": 8.081837383718752e-06, "loss": 1.8804, "step": 20591 }, { "epoch": 0.6643772773743676, "grad_norm": 0.35546875, "learning_rate": 8.080441062486927e-06, "loss": 1.8793, "step": 20592 }, { "epoch": 0.6644095412281639, "grad_norm": 0.357421875, "learning_rate": 8.079044817420639e-06, "loss": 1.895, "step": 20593 }, { "epoch": 0.6644418050819603, "grad_norm": 0.353515625, "learning_rate": 8.077648648535255e-06, "loss": 1.8747, "step": 20594 }, { "epoch": 0.6644740689357567, "grad_norm": 0.349609375, "learning_rate": 8.076252555846157e-06, "loss": 1.8583, "step": 20595 }, { "epoch": 0.664506332789553, "grad_norm": 0.373046875, "learning_rate": 8.074856539368703e-06, "loss": 1.9118, "step": 20596 }, { "epoch": 0.6645385966433494, "grad_norm": 0.375, "learning_rate": 8.073460599118252e-06, "loss": 1.9086, "step": 20597 }, { "epoch": 0.6645708604971456, "grad_norm": 0.361328125, "learning_rate": 8.072064735110186e-06, "loss": 1.8802, "step": 20598 }, { "epoch": 0.664603124350942, "grad_norm": 0.380859375, "learning_rate": 8.070668947359861e-06, "loss": 1.838, "step": 20599 }, { "epoch": 0.6646353882047383, "grad_norm": 0.34765625, "learning_rate": 8.069273235882633e-06, "loss": 1.9342, "step": 20600 }, { "epoch": 0.6646676520585347, "grad_norm": 0.3515625, "learning_rate": 8.067877600693884e-06, "loss": 1.872, "step": 20601 }, { "epoch": 0.664699915912331, "grad_norm": 0.390625, "learning_rate": 8.06648204180896e-06, "loss": 1.9074, "step": 20602 }, { "epoch": 0.6647321797661274, "grad_norm": 0.345703125, "learning_rate": 8.06508655924323e-06, "loss": 1.8939, "step": 20603 }, { "epoch": 0.6647644436199237, "grad_norm": 0.34375, "learning_rate": 8.063691153012047e-06, "loss": 1.8925, "step": 20604 }, { "epoch": 0.6647967074737201, "grad_norm": 0.3828125, "learning_rate": 8.062295823130779e-06, "loss": 1.9334, "step": 20605 }, { "epoch": 0.6648289713275164, "grad_norm": 0.34375, "learning_rate": 8.060900569614782e-06, "loss": 1.8801, "step": 20606 }, { "epoch": 0.6648612351813128, "grad_norm": 0.361328125, "learning_rate": 8.059505392479411e-06, "loss": 1.8776, "step": 20607 }, { "epoch": 0.6648934990351091, "grad_norm": 0.349609375, "learning_rate": 8.05811029174003e-06, "loss": 1.8959, "step": 20608 }, { "epoch": 0.6649257628889055, "grad_norm": 0.390625, "learning_rate": 8.056715267411987e-06, "loss": 1.9021, "step": 20609 }, { "epoch": 0.6649580267427018, "grad_norm": 0.353515625, "learning_rate": 8.05532031951064e-06, "loss": 1.9018, "step": 20610 }, { "epoch": 0.6649902905964982, "grad_norm": 0.369140625, "learning_rate": 8.053925448051343e-06, "loss": 1.9129, "step": 20611 }, { "epoch": 0.6650225544502945, "grad_norm": 0.361328125, "learning_rate": 8.052530653049463e-06, "loss": 1.8977, "step": 20612 }, { "epoch": 0.6650548183040909, "grad_norm": 0.353515625, "learning_rate": 8.05113593452033e-06, "loss": 1.9189, "step": 20613 }, { "epoch": 0.6650870821578873, "grad_norm": 0.361328125, "learning_rate": 8.049741292479314e-06, "loss": 1.935, "step": 20614 }, { "epoch": 0.6651193460116835, "grad_norm": 0.375, "learning_rate": 8.04834672694176e-06, "loss": 1.8984, "step": 20615 }, { "epoch": 0.6651516098654799, "grad_norm": 0.34765625, "learning_rate": 8.04695223792301e-06, "loss": 1.9064, "step": 20616 }, { "epoch": 0.6651838737192762, "grad_norm": 0.408203125, "learning_rate": 8.045557825438429e-06, "loss": 1.8819, "step": 20617 }, { "epoch": 0.6652161375730726, "grad_norm": 0.33984375, "learning_rate": 8.04416348950336e-06, "loss": 1.8724, "step": 20618 }, { "epoch": 0.6652484014268689, "grad_norm": 0.353515625, "learning_rate": 8.042769230133142e-06, "loss": 1.8655, "step": 20619 }, { "epoch": 0.6652806652806653, "grad_norm": 0.47265625, "learning_rate": 8.041375047343132e-06, "loss": 1.9642, "step": 20620 }, { "epoch": 0.6653129291344616, "grad_norm": 0.4453125, "learning_rate": 8.039980941148677e-06, "loss": 1.9196, "step": 20621 }, { "epoch": 0.665345192988258, "grad_norm": 0.447265625, "learning_rate": 8.038586911565112e-06, "loss": 1.9871, "step": 20622 }, { "epoch": 0.6653774568420543, "grad_norm": 0.5234375, "learning_rate": 8.037192958607794e-06, "loss": 1.9964, "step": 20623 }, { "epoch": 0.6654097206958507, "grad_norm": 0.48828125, "learning_rate": 8.035799082292062e-06, "loss": 2.0423, "step": 20624 }, { "epoch": 0.665441984549647, "grad_norm": 0.76171875, "learning_rate": 8.034405282633252e-06, "loss": 2.0545, "step": 20625 }, { "epoch": 0.6654742484034434, "grad_norm": 0.52734375, "learning_rate": 8.033011559646714e-06, "loss": 2.0543, "step": 20626 }, { "epoch": 0.6655065122572397, "grad_norm": 0.5234375, "learning_rate": 8.031617913347791e-06, "loss": 2.0434, "step": 20627 }, { "epoch": 0.6655387761110361, "grad_norm": 0.490234375, "learning_rate": 8.030224343751808e-06, "loss": 2.038, "step": 20628 }, { "epoch": 0.6655710399648324, "grad_norm": 0.5546875, "learning_rate": 8.028830850874125e-06, "loss": 2.0344, "step": 20629 }, { "epoch": 0.6656033038186288, "grad_norm": 0.50390625, "learning_rate": 8.02743743473007e-06, "loss": 2.0386, "step": 20630 }, { "epoch": 0.665635567672425, "grad_norm": 0.486328125, "learning_rate": 8.026044095334976e-06, "loss": 2.0411, "step": 20631 }, { "epoch": 0.6656678315262214, "grad_norm": 0.43359375, "learning_rate": 8.02465083270419e-06, "loss": 2.0305, "step": 20632 }, { "epoch": 0.6657000953800177, "grad_norm": 0.50390625, "learning_rate": 8.023257646853045e-06, "loss": 2.0077, "step": 20633 }, { "epoch": 0.6657323592338141, "grad_norm": 0.416015625, "learning_rate": 8.021864537796875e-06, "loss": 1.971, "step": 20634 }, { "epoch": 0.6657646230876105, "grad_norm": 0.431640625, "learning_rate": 8.020471505551008e-06, "loss": 1.9581, "step": 20635 }, { "epoch": 0.6657968869414068, "grad_norm": 0.404296875, "learning_rate": 8.019078550130788e-06, "loss": 1.9008, "step": 20636 }, { "epoch": 0.6658291507952032, "grad_norm": 0.380859375, "learning_rate": 8.017685671551543e-06, "loss": 1.9493, "step": 20637 }, { "epoch": 0.6658614146489995, "grad_norm": 0.416015625, "learning_rate": 8.016292869828602e-06, "loss": 1.9859, "step": 20638 }, { "epoch": 0.6658936785027959, "grad_norm": 0.435546875, "learning_rate": 8.014900144977302e-06, "loss": 1.9758, "step": 20639 }, { "epoch": 0.6659259423565922, "grad_norm": 0.37890625, "learning_rate": 8.013507497012973e-06, "loss": 1.9822, "step": 20640 }, { "epoch": 0.6659582062103886, "grad_norm": 0.44140625, "learning_rate": 8.012114925950933e-06, "loss": 1.9845, "step": 20641 }, { "epoch": 0.6659904700641849, "grad_norm": 0.404296875, "learning_rate": 8.010722431806524e-06, "loss": 2.0011, "step": 20642 }, { "epoch": 0.6660227339179813, "grad_norm": 0.41015625, "learning_rate": 8.009330014595072e-06, "loss": 1.9045, "step": 20643 }, { "epoch": 0.6660549977717776, "grad_norm": 0.419921875, "learning_rate": 8.00793767433189e-06, "loss": 1.9738, "step": 20644 }, { "epoch": 0.666087261625574, "grad_norm": 0.435546875, "learning_rate": 8.006545411032323e-06, "loss": 1.9488, "step": 20645 }, { "epoch": 0.6661195254793703, "grad_norm": 0.419921875, "learning_rate": 8.005153224711687e-06, "loss": 1.9712, "step": 20646 }, { "epoch": 0.6661517893331667, "grad_norm": 0.515625, "learning_rate": 8.0037611153853e-06, "loss": 1.9622, "step": 20647 }, { "epoch": 0.6661840531869629, "grad_norm": 0.443359375, "learning_rate": 8.002369083068496e-06, "loss": 1.9561, "step": 20648 }, { "epoch": 0.6662163170407593, "grad_norm": 0.416015625, "learning_rate": 8.000977127776593e-06, "loss": 1.9468, "step": 20649 }, { "epoch": 0.6662485808945556, "grad_norm": 0.58984375, "learning_rate": 7.999585249524908e-06, "loss": 2.027, "step": 20650 }, { "epoch": 0.666280844748352, "grad_norm": 0.4765625, "learning_rate": 7.99819344832877e-06, "loss": 2.0068, "step": 20651 }, { "epoch": 0.6663131086021483, "grad_norm": 0.48828125, "learning_rate": 7.9968017242035e-06, "loss": 2.0478, "step": 20652 }, { "epoch": 0.6663453724559447, "grad_norm": 0.486328125, "learning_rate": 7.995410077164404e-06, "loss": 2.0689, "step": 20653 }, { "epoch": 0.6663776363097411, "grad_norm": 0.482421875, "learning_rate": 7.994018507226815e-06, "loss": 2.0359, "step": 20654 }, { "epoch": 0.6664099001635374, "grad_norm": 0.486328125, "learning_rate": 7.992627014406044e-06, "loss": 2.0661, "step": 20655 }, { "epoch": 0.6664421640173338, "grad_norm": 0.482421875, "learning_rate": 7.991235598717402e-06, "loss": 2.029, "step": 20656 }, { "epoch": 0.6664744278711301, "grad_norm": 0.435546875, "learning_rate": 7.989844260176215e-06, "loss": 2.0439, "step": 20657 }, { "epoch": 0.6665066917249265, "grad_norm": 0.4296875, "learning_rate": 7.988452998797796e-06, "loss": 2.047, "step": 20658 }, { "epoch": 0.6665389555787228, "grad_norm": 0.453125, "learning_rate": 7.987061814597449e-06, "loss": 2.0568, "step": 20659 }, { "epoch": 0.6665712194325192, "grad_norm": 0.408203125, "learning_rate": 7.9856707075905e-06, "loss": 2.0384, "step": 20660 }, { "epoch": 0.6666034832863155, "grad_norm": 0.400390625, "learning_rate": 7.984279677792256e-06, "loss": 2.0677, "step": 20661 }, { "epoch": 0.6666357471401119, "grad_norm": 0.4296875, "learning_rate": 7.982888725218024e-06, "loss": 2.0571, "step": 20662 }, { "epoch": 0.6666680109939082, "grad_norm": 0.400390625, "learning_rate": 7.981497849883123e-06, "loss": 2.0112, "step": 20663 }, { "epoch": 0.6667002748477046, "grad_norm": 0.4140625, "learning_rate": 7.980107051802857e-06, "loss": 2.0575, "step": 20664 }, { "epoch": 0.6667325387015008, "grad_norm": 0.4453125, "learning_rate": 7.97871633099254e-06, "loss": 2.0631, "step": 20665 }, { "epoch": 0.6667648025552972, "grad_norm": 0.3984375, "learning_rate": 7.977325687467469e-06, "loss": 2.0251, "step": 20666 }, { "epoch": 0.6667970664090935, "grad_norm": 0.43359375, "learning_rate": 7.975935121242965e-06, "loss": 2.0454, "step": 20667 }, { "epoch": 0.6668293302628899, "grad_norm": 0.39453125, "learning_rate": 7.974544632334329e-06, "loss": 2.0762, "step": 20668 }, { "epoch": 0.6668615941166862, "grad_norm": 0.412109375, "learning_rate": 7.973154220756857e-06, "loss": 2.0388, "step": 20669 }, { "epoch": 0.6668938579704826, "grad_norm": 0.41015625, "learning_rate": 7.97176388652587e-06, "loss": 2.0549, "step": 20670 }, { "epoch": 0.6669261218242789, "grad_norm": 0.431640625, "learning_rate": 7.970373629656665e-06, "loss": 2.0327, "step": 20671 }, { "epoch": 0.6669583856780753, "grad_norm": 0.39453125, "learning_rate": 7.968983450164536e-06, "loss": 2.0516, "step": 20672 }, { "epoch": 0.6669906495318716, "grad_norm": 0.408203125, "learning_rate": 7.9675933480648e-06, "loss": 2.0544, "step": 20673 }, { "epoch": 0.667022913385668, "grad_norm": 0.41015625, "learning_rate": 7.966203323372752e-06, "loss": 2.0474, "step": 20674 }, { "epoch": 0.6670551772394644, "grad_norm": 0.396484375, "learning_rate": 7.964813376103685e-06, "loss": 2.0424, "step": 20675 }, { "epoch": 0.6670874410932607, "grad_norm": 0.490234375, "learning_rate": 7.963423506272913e-06, "loss": 2.0307, "step": 20676 }, { "epoch": 0.6671197049470571, "grad_norm": 0.396484375, "learning_rate": 7.962033713895724e-06, "loss": 2.0432, "step": 20677 }, { "epoch": 0.6671519688008534, "grad_norm": 0.408203125, "learning_rate": 7.960643998987416e-06, "loss": 2.0043, "step": 20678 }, { "epoch": 0.6671842326546498, "grad_norm": 0.388671875, "learning_rate": 7.959254361563294e-06, "loss": 2.0129, "step": 20679 }, { "epoch": 0.667216496508446, "grad_norm": 0.396484375, "learning_rate": 7.957864801638646e-06, "loss": 2.0192, "step": 20680 }, { "epoch": 0.6672487603622425, "grad_norm": 0.392578125, "learning_rate": 7.95647531922877e-06, "loss": 2.0158, "step": 20681 }, { "epoch": 0.6672810242160387, "grad_norm": 0.41796875, "learning_rate": 7.955085914348961e-06, "loss": 2.0237, "step": 20682 }, { "epoch": 0.6673132880698351, "grad_norm": 0.412109375, "learning_rate": 7.953696587014516e-06, "loss": 1.9802, "step": 20683 }, { "epoch": 0.6673455519236314, "grad_norm": 0.416015625, "learning_rate": 7.952307337240717e-06, "loss": 2.0187, "step": 20684 }, { "epoch": 0.6673778157774278, "grad_norm": 0.48046875, "learning_rate": 7.950918165042868e-06, "loss": 2.0175, "step": 20685 }, { "epoch": 0.6674100796312241, "grad_norm": 0.4453125, "learning_rate": 7.949529070436256e-06, "loss": 2.0313, "step": 20686 }, { "epoch": 0.6674423434850205, "grad_norm": 0.423828125, "learning_rate": 7.948140053436163e-06, "loss": 2.0383, "step": 20687 }, { "epoch": 0.6674746073388168, "grad_norm": 0.431640625, "learning_rate": 7.946751114057892e-06, "loss": 1.9576, "step": 20688 }, { "epoch": 0.6675068711926132, "grad_norm": 0.44140625, "learning_rate": 7.945362252316725e-06, "loss": 2.0221, "step": 20689 }, { "epoch": 0.6675391350464095, "grad_norm": 0.435546875, "learning_rate": 7.943973468227941e-06, "loss": 2.0006, "step": 20690 }, { "epoch": 0.6675713989002059, "grad_norm": 0.4140625, "learning_rate": 7.942584761806844e-06, "loss": 1.9936, "step": 20691 }, { "epoch": 0.6676036627540022, "grad_norm": 0.47265625, "learning_rate": 7.94119613306871e-06, "loss": 2.0637, "step": 20692 }, { "epoch": 0.6676359266077986, "grad_norm": 0.546875, "learning_rate": 7.939807582028819e-06, "loss": 1.9886, "step": 20693 }, { "epoch": 0.6676681904615949, "grad_norm": 0.64453125, "learning_rate": 7.938419108702464e-06, "loss": 2.0777, "step": 20694 }, { "epoch": 0.6677004543153913, "grad_norm": 0.546875, "learning_rate": 7.937030713104932e-06, "loss": 2.0398, "step": 20695 }, { "epoch": 0.6677327181691877, "grad_norm": 0.462890625, "learning_rate": 7.935642395251495e-06, "loss": 2.0391, "step": 20696 }, { "epoch": 0.667764982022984, "grad_norm": 0.6875, "learning_rate": 7.934254155157432e-06, "loss": 1.9984, "step": 20697 }, { "epoch": 0.6677972458767804, "grad_norm": 0.447265625, "learning_rate": 7.932865992838037e-06, "loss": 2.0087, "step": 20698 }, { "epoch": 0.6678295097305766, "grad_norm": 0.6015625, "learning_rate": 7.931477908308584e-06, "loss": 2.0045, "step": 20699 }, { "epoch": 0.667861773584373, "grad_norm": 0.54296875, "learning_rate": 7.930089901584345e-06, "loss": 2.0024, "step": 20700 }, { "epoch": 0.6678940374381693, "grad_norm": 0.443359375, "learning_rate": 7.92870197268061e-06, "loss": 2.0293, "step": 20701 }, { "epoch": 0.6679263012919657, "grad_norm": 0.5234375, "learning_rate": 7.927314121612651e-06, "loss": 2.0401, "step": 20702 }, { "epoch": 0.667958565145762, "grad_norm": 0.44140625, "learning_rate": 7.925926348395738e-06, "loss": 2.0335, "step": 20703 }, { "epoch": 0.6679908289995584, "grad_norm": 0.515625, "learning_rate": 7.92453865304516e-06, "loss": 2.0309, "step": 20704 }, { "epoch": 0.6680230928533547, "grad_norm": 0.4375, "learning_rate": 7.923151035576183e-06, "loss": 2.0521, "step": 20705 }, { "epoch": 0.6680553567071511, "grad_norm": 0.52734375, "learning_rate": 7.921763496004076e-06, "loss": 1.9894, "step": 20706 }, { "epoch": 0.6680876205609474, "grad_norm": 0.392578125, "learning_rate": 7.920376034344127e-06, "loss": 2.0219, "step": 20707 }, { "epoch": 0.6681198844147438, "grad_norm": 0.478515625, "learning_rate": 7.918988650611598e-06, "loss": 2.0327, "step": 20708 }, { "epoch": 0.6681521482685401, "grad_norm": 0.41015625, "learning_rate": 7.917601344821756e-06, "loss": 1.9319, "step": 20709 }, { "epoch": 0.6681844121223365, "grad_norm": 0.443359375, "learning_rate": 7.916214116989884e-06, "loss": 1.9566, "step": 20710 }, { "epoch": 0.6682166759761328, "grad_norm": 0.421875, "learning_rate": 7.914826967131245e-06, "loss": 1.9566, "step": 20711 }, { "epoch": 0.6682489398299292, "grad_norm": 0.419921875, "learning_rate": 7.913439895261102e-06, "loss": 1.9986, "step": 20712 }, { "epoch": 0.6682812036837255, "grad_norm": 0.42578125, "learning_rate": 7.912052901394736e-06, "loss": 1.9184, "step": 20713 }, { "epoch": 0.6683134675375219, "grad_norm": 0.423828125, "learning_rate": 7.910665985547404e-06, "loss": 1.9258, "step": 20714 }, { "epoch": 0.6683457313913183, "grad_norm": 0.423828125, "learning_rate": 7.90927914773437e-06, "loss": 1.9542, "step": 20715 }, { "epoch": 0.6683779952451145, "grad_norm": 0.41015625, "learning_rate": 7.907892387970911e-06, "loss": 1.933, "step": 20716 }, { "epoch": 0.6684102590989109, "grad_norm": 0.400390625, "learning_rate": 7.906505706272285e-06, "loss": 1.9265, "step": 20717 }, { "epoch": 0.6684425229527072, "grad_norm": 0.5703125, "learning_rate": 7.905119102653748e-06, "loss": 1.92, "step": 20718 }, { "epoch": 0.6684747868065036, "grad_norm": 0.48046875, "learning_rate": 7.903732577130577e-06, "loss": 1.9682, "step": 20719 }, { "epoch": 0.6685070506602999, "grad_norm": 0.3828125, "learning_rate": 7.902346129718027e-06, "loss": 1.9369, "step": 20720 }, { "epoch": 0.6685393145140963, "grad_norm": 0.40234375, "learning_rate": 7.900959760431352e-06, "loss": 1.8816, "step": 20721 }, { "epoch": 0.6685715783678926, "grad_norm": 0.40234375, "learning_rate": 7.899573469285829e-06, "loss": 1.8987, "step": 20722 }, { "epoch": 0.668603842221689, "grad_norm": 0.380859375, "learning_rate": 7.898187256296702e-06, "loss": 1.8498, "step": 20723 }, { "epoch": 0.6686361060754853, "grad_norm": 0.38671875, "learning_rate": 7.896801121479237e-06, "loss": 1.8325, "step": 20724 }, { "epoch": 0.6686683699292817, "grad_norm": 0.390625, "learning_rate": 7.895415064848685e-06, "loss": 1.8864, "step": 20725 }, { "epoch": 0.668700633783078, "grad_norm": 0.375, "learning_rate": 7.89402908642031e-06, "loss": 1.8387, "step": 20726 }, { "epoch": 0.6687328976368744, "grad_norm": 0.376953125, "learning_rate": 7.892643186209367e-06, "loss": 1.8504, "step": 20727 }, { "epoch": 0.6687651614906707, "grad_norm": 0.361328125, "learning_rate": 7.891257364231104e-06, "loss": 1.8412, "step": 20728 }, { "epoch": 0.6687974253444671, "grad_norm": 0.373046875, "learning_rate": 7.889871620500785e-06, "loss": 1.8752, "step": 20729 }, { "epoch": 0.6688296891982634, "grad_norm": 0.3828125, "learning_rate": 7.888485955033658e-06, "loss": 1.8616, "step": 20730 }, { "epoch": 0.6688619530520598, "grad_norm": 0.36328125, "learning_rate": 7.88710036784497e-06, "loss": 1.8813, "step": 20731 }, { "epoch": 0.668894216905856, "grad_norm": 0.35546875, "learning_rate": 7.885714858949984e-06, "loss": 1.8488, "step": 20732 }, { "epoch": 0.6689264807596524, "grad_norm": 0.37109375, "learning_rate": 7.884329428363946e-06, "loss": 1.8768, "step": 20733 }, { "epoch": 0.6689587446134487, "grad_norm": 0.37890625, "learning_rate": 7.8829440761021e-06, "loss": 1.8589, "step": 20734 }, { "epoch": 0.6689910084672451, "grad_norm": 0.373046875, "learning_rate": 7.881558802179704e-06, "loss": 1.8472, "step": 20735 }, { "epoch": 0.6690232723210415, "grad_norm": 0.384765625, "learning_rate": 7.880173606612e-06, "loss": 1.8448, "step": 20736 }, { "epoch": 0.6690555361748378, "grad_norm": 0.376953125, "learning_rate": 7.878788489414236e-06, "loss": 1.8865, "step": 20737 }, { "epoch": 0.6690878000286342, "grad_norm": 0.369140625, "learning_rate": 7.877403450601662e-06, "loss": 1.8636, "step": 20738 }, { "epoch": 0.6691200638824305, "grad_norm": 0.6953125, "learning_rate": 7.876018490189522e-06, "loss": 1.9758, "step": 20739 }, { "epoch": 0.6691523277362269, "grad_norm": 0.4609375, "learning_rate": 7.874633608193054e-06, "loss": 1.9831, "step": 20740 }, { "epoch": 0.6691845915900232, "grad_norm": 0.796875, "learning_rate": 7.873248804627512e-06, "loss": 1.9794, "step": 20741 }, { "epoch": 0.6692168554438196, "grad_norm": 0.54296875, "learning_rate": 7.871864079508138e-06, "loss": 1.9275, "step": 20742 }, { "epoch": 0.6692491192976159, "grad_norm": 0.609375, "learning_rate": 7.870479432850163e-06, "loss": 1.9184, "step": 20743 }, { "epoch": 0.6692813831514123, "grad_norm": 0.65234375, "learning_rate": 7.86909486466884e-06, "loss": 1.9421, "step": 20744 }, { "epoch": 0.6693136470052086, "grad_norm": 0.50390625, "learning_rate": 7.867710374979407e-06, "loss": 1.9606, "step": 20745 }, { "epoch": 0.669345910859005, "grad_norm": 0.5546875, "learning_rate": 7.866325963797094e-06, "loss": 1.9499, "step": 20746 }, { "epoch": 0.6693781747128013, "grad_norm": 0.546875, "learning_rate": 7.864941631137153e-06, "loss": 1.9203, "step": 20747 }, { "epoch": 0.6694104385665977, "grad_norm": 0.515625, "learning_rate": 7.863557377014818e-06, "loss": 1.9531, "step": 20748 }, { "epoch": 0.6694427024203939, "grad_norm": 0.3984375, "learning_rate": 7.862173201445316e-06, "loss": 1.9698, "step": 20749 }, { "epoch": 0.6694749662741903, "grad_norm": 0.46875, "learning_rate": 7.860789104443897e-06, "loss": 1.9318, "step": 20750 }, { "epoch": 0.6695072301279866, "grad_norm": 0.5546875, "learning_rate": 7.859405086025791e-06, "loss": 1.9518, "step": 20751 }, { "epoch": 0.669539493981783, "grad_norm": 0.455078125, "learning_rate": 7.858021146206224e-06, "loss": 1.9917, "step": 20752 }, { "epoch": 0.6695717578355793, "grad_norm": 0.390625, "learning_rate": 7.856637285000442e-06, "loss": 1.9477, "step": 20753 }, { "epoch": 0.6696040216893757, "grad_norm": 0.435546875, "learning_rate": 7.855253502423673e-06, "loss": 1.9404, "step": 20754 }, { "epoch": 0.6696362855431721, "grad_norm": 0.423828125, "learning_rate": 7.853869798491147e-06, "loss": 1.9604, "step": 20755 }, { "epoch": 0.6696685493969684, "grad_norm": 0.443359375, "learning_rate": 7.852486173218092e-06, "loss": 1.9765, "step": 20756 }, { "epoch": 0.6697008132507648, "grad_norm": 0.41015625, "learning_rate": 7.851102626619745e-06, "loss": 1.993, "step": 20757 }, { "epoch": 0.6697330771045611, "grad_norm": 0.43359375, "learning_rate": 7.849719158711334e-06, "loss": 1.9442, "step": 20758 }, { "epoch": 0.6697653409583575, "grad_norm": 0.44921875, "learning_rate": 7.848335769508075e-06, "loss": 1.9543, "step": 20759 }, { "epoch": 0.6697976048121538, "grad_norm": 0.44140625, "learning_rate": 7.846952459025215e-06, "loss": 1.9676, "step": 20760 }, { "epoch": 0.6698298686659502, "grad_norm": 0.40234375, "learning_rate": 7.84556922727797e-06, "loss": 1.9907, "step": 20761 }, { "epoch": 0.6698621325197465, "grad_norm": 0.41015625, "learning_rate": 7.84418607428156e-06, "loss": 1.9506, "step": 20762 }, { "epoch": 0.6698943963735429, "grad_norm": 0.5, "learning_rate": 7.842803000051222e-06, "loss": 1.9822, "step": 20763 }, { "epoch": 0.6699266602273392, "grad_norm": 0.4140625, "learning_rate": 7.841420004602173e-06, "loss": 1.9487, "step": 20764 }, { "epoch": 0.6699589240811356, "grad_norm": 0.408203125, "learning_rate": 7.840037087949634e-06, "loss": 1.9496, "step": 20765 }, { "epoch": 0.6699911879349318, "grad_norm": 0.396484375, "learning_rate": 7.838654250108833e-06, "loss": 1.924, "step": 20766 }, { "epoch": 0.6700234517887282, "grad_norm": 0.404296875, "learning_rate": 7.837271491094991e-06, "loss": 2.0028, "step": 20767 }, { "epoch": 0.6700557156425245, "grad_norm": 0.39453125, "learning_rate": 7.835888810923317e-06, "loss": 1.9058, "step": 20768 }, { "epoch": 0.6700879794963209, "grad_norm": 0.37109375, "learning_rate": 7.834506209609049e-06, "loss": 1.8882, "step": 20769 }, { "epoch": 0.6701202433501172, "grad_norm": 0.37109375, "learning_rate": 7.833123687167395e-06, "loss": 1.8294, "step": 20770 }, { "epoch": 0.6701525072039136, "grad_norm": 0.373046875, "learning_rate": 7.831741243613566e-06, "loss": 1.8872, "step": 20771 }, { "epoch": 0.6701847710577099, "grad_norm": 0.365234375, "learning_rate": 7.830358878962795e-06, "loss": 1.8823, "step": 20772 }, { "epoch": 0.6702170349115063, "grad_norm": 0.357421875, "learning_rate": 7.828976593230289e-06, "loss": 1.8898, "step": 20773 }, { "epoch": 0.6702492987653026, "grad_norm": 0.376953125, "learning_rate": 7.827594386431258e-06, "loss": 1.9165, "step": 20774 }, { "epoch": 0.670281562619099, "grad_norm": 0.388671875, "learning_rate": 7.826212258580937e-06, "loss": 1.8407, "step": 20775 }, { "epoch": 0.6703138264728954, "grad_norm": 0.34765625, "learning_rate": 7.82483020969451e-06, "loss": 1.8543, "step": 20776 }, { "epoch": 0.6703460903266917, "grad_norm": 0.392578125, "learning_rate": 7.823448239787206e-06, "loss": 1.867, "step": 20777 }, { "epoch": 0.6703783541804881, "grad_norm": 0.373046875, "learning_rate": 7.82206634887424e-06, "loss": 1.8909, "step": 20778 }, { "epoch": 0.6704106180342844, "grad_norm": 0.365234375, "learning_rate": 7.820684536970819e-06, "loss": 1.8488, "step": 20779 }, { "epoch": 0.6704428818880808, "grad_norm": 0.35546875, "learning_rate": 7.819302804092145e-06, "loss": 1.8838, "step": 20780 }, { "epoch": 0.670475145741877, "grad_norm": 0.35546875, "learning_rate": 7.817921150253442e-06, "loss": 1.8627, "step": 20781 }, { "epoch": 0.6705074095956735, "grad_norm": 0.36328125, "learning_rate": 7.816539575469909e-06, "loss": 1.8884, "step": 20782 }, { "epoch": 0.6705396734494697, "grad_norm": 0.40234375, "learning_rate": 7.815158079756752e-06, "loss": 1.8361, "step": 20783 }, { "epoch": 0.6705719373032661, "grad_norm": 0.41796875, "learning_rate": 7.813776663129185e-06, "loss": 1.8188, "step": 20784 }, { "epoch": 0.6706042011570624, "grad_norm": 0.361328125, "learning_rate": 7.812395325602409e-06, "loss": 1.9008, "step": 20785 }, { "epoch": 0.6706364650108588, "grad_norm": 0.3515625, "learning_rate": 7.811014067191627e-06, "loss": 1.8948, "step": 20786 }, { "epoch": 0.6706687288646551, "grad_norm": 0.384765625, "learning_rate": 7.809632887912041e-06, "loss": 1.8541, "step": 20787 }, { "epoch": 0.6707009927184515, "grad_norm": 0.359375, "learning_rate": 7.808251787778863e-06, "loss": 1.8585, "step": 20788 }, { "epoch": 0.6707332565722478, "grad_norm": 0.349609375, "learning_rate": 7.806870766807292e-06, "loss": 1.8845, "step": 20789 }, { "epoch": 0.6707655204260442, "grad_norm": 0.357421875, "learning_rate": 7.805489825012518e-06, "loss": 1.8422, "step": 20790 }, { "epoch": 0.6707977842798405, "grad_norm": 0.357421875, "learning_rate": 7.804108962409759e-06, "loss": 1.8964, "step": 20791 }, { "epoch": 0.6708300481336369, "grad_norm": 0.392578125, "learning_rate": 7.802728179014205e-06, "loss": 1.9029, "step": 20792 }, { "epoch": 0.6708623119874332, "grad_norm": 0.361328125, "learning_rate": 7.80134747484105e-06, "loss": 1.8281, "step": 20793 }, { "epoch": 0.6708945758412296, "grad_norm": 0.376953125, "learning_rate": 7.799966849905504e-06, "loss": 1.8725, "step": 20794 }, { "epoch": 0.6709268396950259, "grad_norm": 0.369140625, "learning_rate": 7.798586304222759e-06, "loss": 1.8668, "step": 20795 }, { "epoch": 0.6709591035488223, "grad_norm": 0.380859375, "learning_rate": 7.797205837808002e-06, "loss": 1.8824, "step": 20796 }, { "epoch": 0.6709913674026187, "grad_norm": 0.3828125, "learning_rate": 7.795825450676442e-06, "loss": 1.8436, "step": 20797 }, { "epoch": 0.671023631256415, "grad_norm": 0.359375, "learning_rate": 7.794445142843268e-06, "loss": 1.8422, "step": 20798 }, { "epoch": 0.6710558951102114, "grad_norm": 0.3515625, "learning_rate": 7.793064914323664e-06, "loss": 1.8805, "step": 20799 }, { "epoch": 0.6710881589640076, "grad_norm": 0.40625, "learning_rate": 7.791684765132842e-06, "loss": 1.8678, "step": 20800 }, { "epoch": 0.671120422817804, "grad_norm": 0.359375, "learning_rate": 7.790304695285978e-06, "loss": 1.8927, "step": 20801 }, { "epoch": 0.6711526866716003, "grad_norm": 0.4453125, "learning_rate": 7.788924704798265e-06, "loss": 1.896, "step": 20802 }, { "epoch": 0.6711849505253967, "grad_norm": 0.453125, "learning_rate": 7.7875447936849e-06, "loss": 1.8945, "step": 20803 }, { "epoch": 0.671217214379193, "grad_norm": 0.404296875, "learning_rate": 7.786164961961068e-06, "loss": 1.8819, "step": 20804 }, { "epoch": 0.6712494782329894, "grad_norm": 0.396484375, "learning_rate": 7.784785209641953e-06, "loss": 1.8718, "step": 20805 }, { "epoch": 0.6712817420867857, "grad_norm": 0.365234375, "learning_rate": 7.783405536742754e-06, "loss": 1.8647, "step": 20806 }, { "epoch": 0.6713140059405821, "grad_norm": 0.400390625, "learning_rate": 7.78202594327864e-06, "loss": 1.8614, "step": 20807 }, { "epoch": 0.6713462697943784, "grad_norm": 0.466796875, "learning_rate": 7.780646429264806e-06, "loss": 1.8423, "step": 20808 }, { "epoch": 0.6713785336481748, "grad_norm": 0.3515625, "learning_rate": 7.779266994716446e-06, "loss": 1.8863, "step": 20809 }, { "epoch": 0.6714107975019711, "grad_norm": 0.380859375, "learning_rate": 7.77788763964873e-06, "loss": 1.89, "step": 20810 }, { "epoch": 0.6714430613557675, "grad_norm": 0.404296875, "learning_rate": 7.77650836407684e-06, "loss": 1.8734, "step": 20811 }, { "epoch": 0.6714753252095638, "grad_norm": 0.3515625, "learning_rate": 7.775129168015974e-06, "loss": 1.8082, "step": 20812 }, { "epoch": 0.6715075890633602, "grad_norm": 0.37109375, "learning_rate": 7.7737500514813e-06, "loss": 1.8671, "step": 20813 }, { "epoch": 0.6715398529171565, "grad_norm": 0.470703125, "learning_rate": 7.772371014487995e-06, "loss": 1.8714, "step": 20814 }, { "epoch": 0.6715721167709529, "grad_norm": 0.390625, "learning_rate": 7.770992057051254e-06, "loss": 1.8586, "step": 20815 }, { "epoch": 0.6716043806247493, "grad_norm": 0.42578125, "learning_rate": 7.769613179186244e-06, "loss": 1.8304, "step": 20816 }, { "epoch": 0.6716366444785455, "grad_norm": 0.47265625, "learning_rate": 7.768234380908144e-06, "loss": 1.8721, "step": 20817 }, { "epoch": 0.6716689083323419, "grad_norm": 0.349609375, "learning_rate": 7.766855662232126e-06, "loss": 1.8644, "step": 20818 }, { "epoch": 0.6717011721861382, "grad_norm": 0.4375, "learning_rate": 7.765477023173379e-06, "loss": 1.8087, "step": 20819 }, { "epoch": 0.6717334360399346, "grad_norm": 0.42578125, "learning_rate": 7.764098463747071e-06, "loss": 1.8265, "step": 20820 }, { "epoch": 0.6717656998937309, "grad_norm": 0.365234375, "learning_rate": 7.76271998396837e-06, "loss": 1.8652, "step": 20821 }, { "epoch": 0.6717979637475273, "grad_norm": 0.48828125, "learning_rate": 7.761341583852462e-06, "loss": 1.803, "step": 20822 }, { "epoch": 0.6718302276013236, "grad_norm": 0.3828125, "learning_rate": 7.759963263414513e-06, "loss": 1.8493, "step": 20823 }, { "epoch": 0.67186249145512, "grad_norm": 0.39453125, "learning_rate": 7.758585022669686e-06, "loss": 1.871, "step": 20824 }, { "epoch": 0.6718947553089163, "grad_norm": 0.42578125, "learning_rate": 7.757206861633168e-06, "loss": 1.8316, "step": 20825 }, { "epoch": 0.6719270191627127, "grad_norm": 0.36328125, "learning_rate": 7.755828780320122e-06, "loss": 1.8173, "step": 20826 }, { "epoch": 0.671959283016509, "grad_norm": 0.36328125, "learning_rate": 7.754450778745708e-06, "loss": 1.8424, "step": 20827 }, { "epoch": 0.6719915468703054, "grad_norm": 0.44140625, "learning_rate": 7.753072856925108e-06, "loss": 1.8603, "step": 20828 }, { "epoch": 0.6720238107241017, "grad_norm": 0.384765625, "learning_rate": 7.751695014873484e-06, "loss": 1.8928, "step": 20829 }, { "epoch": 0.6720560745778981, "grad_norm": 0.416015625, "learning_rate": 7.750317252605995e-06, "loss": 1.8861, "step": 20830 }, { "epoch": 0.6720883384316944, "grad_norm": 0.416015625, "learning_rate": 7.748939570137819e-06, "loss": 1.8384, "step": 20831 }, { "epoch": 0.6721206022854908, "grad_norm": 0.369140625, "learning_rate": 7.747561967484114e-06, "loss": 1.8902, "step": 20832 }, { "epoch": 0.672152866139287, "grad_norm": 0.45703125, "learning_rate": 7.746184444660037e-06, "loss": 1.8787, "step": 20833 }, { "epoch": 0.6721851299930834, "grad_norm": 0.37890625, "learning_rate": 7.744807001680764e-06, "loss": 1.9014, "step": 20834 }, { "epoch": 0.6722173938468797, "grad_norm": 0.365234375, "learning_rate": 7.743429638561451e-06, "loss": 1.8862, "step": 20835 }, { "epoch": 0.6722496577006761, "grad_norm": 0.36328125, "learning_rate": 7.742052355317255e-06, "loss": 1.7848, "step": 20836 }, { "epoch": 0.6722819215544725, "grad_norm": 0.396484375, "learning_rate": 7.740675151963349e-06, "loss": 1.8547, "step": 20837 }, { "epoch": 0.6723141854082688, "grad_norm": 0.365234375, "learning_rate": 7.739298028514871e-06, "loss": 1.8555, "step": 20838 }, { "epoch": 0.6723464492620652, "grad_norm": 0.400390625, "learning_rate": 7.737920984986993e-06, "loss": 1.8048, "step": 20839 }, { "epoch": 0.6723787131158615, "grad_norm": 0.373046875, "learning_rate": 7.736544021394883e-06, "loss": 1.8713, "step": 20840 }, { "epoch": 0.6724109769696579, "grad_norm": 0.41796875, "learning_rate": 7.735167137753671e-06, "loss": 1.8448, "step": 20841 }, { "epoch": 0.6724432408234542, "grad_norm": 0.361328125, "learning_rate": 7.733790334078532e-06, "loss": 1.8477, "step": 20842 }, { "epoch": 0.6724755046772506, "grad_norm": 0.361328125, "learning_rate": 7.73241361038462e-06, "loss": 1.8728, "step": 20843 }, { "epoch": 0.6725077685310469, "grad_norm": 0.408203125, "learning_rate": 7.731036966687082e-06, "loss": 1.7917, "step": 20844 }, { "epoch": 0.6725400323848433, "grad_norm": 0.369140625, "learning_rate": 7.729660403001074e-06, "loss": 1.8089, "step": 20845 }, { "epoch": 0.6725722962386396, "grad_norm": 0.400390625, "learning_rate": 7.728283919341752e-06, "loss": 1.8403, "step": 20846 }, { "epoch": 0.672604560092436, "grad_norm": 0.3671875, "learning_rate": 7.726907515724263e-06, "loss": 1.8263, "step": 20847 }, { "epoch": 0.6726368239462323, "grad_norm": 0.34765625, "learning_rate": 7.725531192163758e-06, "loss": 1.8908, "step": 20848 }, { "epoch": 0.6726690878000287, "grad_norm": 0.4375, "learning_rate": 7.724154948675382e-06, "loss": 1.8216, "step": 20849 }, { "epoch": 0.6727013516538249, "grad_norm": 0.41796875, "learning_rate": 7.722778785274294e-06, "loss": 1.83, "step": 20850 }, { "epoch": 0.6727336155076213, "grad_norm": 0.40234375, "learning_rate": 7.721402701975639e-06, "loss": 1.8638, "step": 20851 }, { "epoch": 0.6727658793614176, "grad_norm": 0.39453125, "learning_rate": 7.720026698794555e-06, "loss": 1.892, "step": 20852 }, { "epoch": 0.672798143215214, "grad_norm": 0.353515625, "learning_rate": 7.718650775746198e-06, "loss": 1.8589, "step": 20853 }, { "epoch": 0.6728304070690103, "grad_norm": 0.419921875, "learning_rate": 7.71727493284571e-06, "loss": 1.8599, "step": 20854 }, { "epoch": 0.6728626709228067, "grad_norm": 0.390625, "learning_rate": 7.71589917010823e-06, "loss": 1.8632, "step": 20855 }, { "epoch": 0.6728949347766031, "grad_norm": 0.41796875, "learning_rate": 7.714523487548912e-06, "loss": 1.8778, "step": 20856 }, { "epoch": 0.6729271986303994, "grad_norm": 0.375, "learning_rate": 7.713147885182894e-06, "loss": 1.8167, "step": 20857 }, { "epoch": 0.6729594624841958, "grad_norm": 0.361328125, "learning_rate": 7.711772363025309e-06, "loss": 1.8649, "step": 20858 }, { "epoch": 0.6729917263379921, "grad_norm": 0.3515625, "learning_rate": 7.710396921091312e-06, "loss": 1.8765, "step": 20859 }, { "epoch": 0.6730239901917885, "grad_norm": 0.36328125, "learning_rate": 7.709021559396036e-06, "loss": 1.8393, "step": 20860 }, { "epoch": 0.6730562540455848, "grad_norm": 0.37890625, "learning_rate": 7.707646277954613e-06, "loss": 1.8531, "step": 20861 }, { "epoch": 0.6730885178993812, "grad_norm": 0.369140625, "learning_rate": 7.706271076782195e-06, "loss": 1.8889, "step": 20862 }, { "epoch": 0.6731207817531775, "grad_norm": 0.384765625, "learning_rate": 7.704895955893914e-06, "loss": 1.8753, "step": 20863 }, { "epoch": 0.6731530456069739, "grad_norm": 0.375, "learning_rate": 7.703520915304895e-06, "loss": 1.841, "step": 20864 }, { "epoch": 0.6731853094607702, "grad_norm": 0.37890625, "learning_rate": 7.702145955030294e-06, "loss": 1.8327, "step": 20865 }, { "epoch": 0.6732175733145666, "grad_norm": 0.40625, "learning_rate": 7.700771075085232e-06, "loss": 1.8914, "step": 20866 }, { "epoch": 0.6732498371683628, "grad_norm": 0.380859375, "learning_rate": 7.69939627548484e-06, "loss": 1.8349, "step": 20867 }, { "epoch": 0.6732821010221592, "grad_norm": 0.3515625, "learning_rate": 7.698021556244269e-06, "loss": 1.8844, "step": 20868 }, { "epoch": 0.6733143648759555, "grad_norm": 0.40625, "learning_rate": 7.696646917378625e-06, "loss": 1.86, "step": 20869 }, { "epoch": 0.6733466287297519, "grad_norm": 0.361328125, "learning_rate": 7.695272358903055e-06, "loss": 1.846, "step": 20870 }, { "epoch": 0.6733788925835482, "grad_norm": 0.3828125, "learning_rate": 7.693897880832697e-06, "loss": 1.8688, "step": 20871 }, { "epoch": 0.6734111564373446, "grad_norm": 0.376953125, "learning_rate": 7.692523483182658e-06, "loss": 1.8502, "step": 20872 }, { "epoch": 0.6734434202911409, "grad_norm": 0.373046875, "learning_rate": 7.691149165968077e-06, "loss": 1.8919, "step": 20873 }, { "epoch": 0.6734756841449373, "grad_norm": 0.359375, "learning_rate": 7.689774929204096e-06, "loss": 1.9123, "step": 20874 }, { "epoch": 0.6735079479987336, "grad_norm": 0.373046875, "learning_rate": 7.688400772905815e-06, "loss": 1.882, "step": 20875 }, { "epoch": 0.67354021185253, "grad_norm": 0.359375, "learning_rate": 7.68702669708838e-06, "loss": 1.8619, "step": 20876 }, { "epoch": 0.6735724757063264, "grad_norm": 0.3515625, "learning_rate": 7.6856527017669e-06, "loss": 1.9194, "step": 20877 }, { "epoch": 0.6736047395601227, "grad_norm": 0.37890625, "learning_rate": 7.684278786956517e-06, "loss": 1.8756, "step": 20878 }, { "epoch": 0.6736370034139191, "grad_norm": 0.359375, "learning_rate": 7.682904952672343e-06, "loss": 1.8615, "step": 20879 }, { "epoch": 0.6736692672677154, "grad_norm": 0.40625, "learning_rate": 7.681531198929497e-06, "loss": 1.8444, "step": 20880 }, { "epoch": 0.6737015311215118, "grad_norm": 0.365234375, "learning_rate": 7.680157525743112e-06, "loss": 1.8298, "step": 20881 }, { "epoch": 0.673733794975308, "grad_norm": 0.388671875, "learning_rate": 7.678783933128302e-06, "loss": 1.8499, "step": 20882 }, { "epoch": 0.6737660588291045, "grad_norm": 0.3671875, "learning_rate": 7.67741042110018e-06, "loss": 1.8458, "step": 20883 }, { "epoch": 0.6737983226829007, "grad_norm": 0.359375, "learning_rate": 7.676036989673874e-06, "loss": 1.877, "step": 20884 }, { "epoch": 0.6738305865366971, "grad_norm": 0.359375, "learning_rate": 7.6746636388645e-06, "loss": 1.9133, "step": 20885 }, { "epoch": 0.6738628503904934, "grad_norm": 0.365234375, "learning_rate": 7.67329036868717e-06, "loss": 1.8935, "step": 20886 }, { "epoch": 0.6738951142442898, "grad_norm": 0.369140625, "learning_rate": 7.671917179157008e-06, "loss": 1.8723, "step": 20887 }, { "epoch": 0.6739273780980861, "grad_norm": 0.361328125, "learning_rate": 7.670544070289122e-06, "loss": 1.8937, "step": 20888 }, { "epoch": 0.6739596419518825, "grad_norm": 0.439453125, "learning_rate": 7.669171042098627e-06, "loss": 1.8433, "step": 20889 }, { "epoch": 0.6739919058056788, "grad_norm": 0.3515625, "learning_rate": 7.667798094600642e-06, "loss": 1.8792, "step": 20890 }, { "epoch": 0.6740241696594752, "grad_norm": 0.41015625, "learning_rate": 7.666425227810275e-06, "loss": 1.8399, "step": 20891 }, { "epoch": 0.6740564335132715, "grad_norm": 0.376953125, "learning_rate": 7.665052441742631e-06, "loss": 1.8804, "step": 20892 }, { "epoch": 0.6740886973670679, "grad_norm": 0.3671875, "learning_rate": 7.663679736412835e-06, "loss": 1.8973, "step": 20893 }, { "epoch": 0.6741209612208642, "grad_norm": 0.353515625, "learning_rate": 7.662307111835991e-06, "loss": 1.8942, "step": 20894 }, { "epoch": 0.6741532250746606, "grad_norm": 0.375, "learning_rate": 7.660934568027199e-06, "loss": 1.9213, "step": 20895 }, { "epoch": 0.6741854889284569, "grad_norm": 0.3828125, "learning_rate": 7.65956210500158e-06, "loss": 1.8871, "step": 20896 }, { "epoch": 0.6742177527822533, "grad_norm": 0.361328125, "learning_rate": 7.658189722774236e-06, "loss": 1.8326, "step": 20897 }, { "epoch": 0.6742500166360497, "grad_norm": 0.3671875, "learning_rate": 7.656817421360266e-06, "loss": 1.8649, "step": 20898 }, { "epoch": 0.674282280489846, "grad_norm": 0.42578125, "learning_rate": 7.655445200774792e-06, "loss": 1.8689, "step": 20899 }, { "epoch": 0.6743145443436424, "grad_norm": 0.376953125, "learning_rate": 7.654073061032901e-06, "loss": 1.9297, "step": 20900 }, { "epoch": 0.6743468081974386, "grad_norm": 0.4140625, "learning_rate": 7.6527010021497e-06, "loss": 1.8928, "step": 20901 }, { "epoch": 0.674379072051235, "grad_norm": 0.396484375, "learning_rate": 7.651329024140307e-06, "loss": 1.8785, "step": 20902 }, { "epoch": 0.6744113359050313, "grad_norm": 0.40234375, "learning_rate": 7.6499571270198e-06, "loss": 1.9162, "step": 20903 }, { "epoch": 0.6744435997588277, "grad_norm": 0.50390625, "learning_rate": 7.648585310803296e-06, "loss": 1.8752, "step": 20904 }, { "epoch": 0.674475863612624, "grad_norm": 0.361328125, "learning_rate": 7.6472135755059e-06, "loss": 1.9415, "step": 20905 }, { "epoch": 0.6745081274664204, "grad_norm": 0.34765625, "learning_rate": 7.64584192114269e-06, "loss": 1.8995, "step": 20906 }, { "epoch": 0.6745403913202167, "grad_norm": 0.361328125, "learning_rate": 7.644470347728784e-06, "loss": 1.9001, "step": 20907 }, { "epoch": 0.6745726551740131, "grad_norm": 0.34765625, "learning_rate": 7.643098855279263e-06, "loss": 1.844, "step": 20908 }, { "epoch": 0.6746049190278094, "grad_norm": 0.36328125, "learning_rate": 7.64172744380924e-06, "loss": 1.8511, "step": 20909 }, { "epoch": 0.6746371828816058, "grad_norm": 0.357421875, "learning_rate": 7.640356113333803e-06, "loss": 1.8911, "step": 20910 }, { "epoch": 0.6746694467354021, "grad_norm": 0.353515625, "learning_rate": 7.638984863868041e-06, "loss": 1.8512, "step": 20911 }, { "epoch": 0.6747017105891985, "grad_norm": 0.353515625, "learning_rate": 7.637613695427055e-06, "loss": 1.9271, "step": 20912 }, { "epoch": 0.6747339744429948, "grad_norm": 0.34375, "learning_rate": 7.636242608025938e-06, "loss": 1.8593, "step": 20913 }, { "epoch": 0.6747662382967912, "grad_norm": 0.36328125, "learning_rate": 7.634871601679772e-06, "loss": 1.8897, "step": 20914 }, { "epoch": 0.6747985021505875, "grad_norm": 0.359375, "learning_rate": 7.633500676403665e-06, "loss": 1.8649, "step": 20915 }, { "epoch": 0.6748307660043839, "grad_norm": 0.349609375, "learning_rate": 7.632129832212695e-06, "loss": 1.8992, "step": 20916 }, { "epoch": 0.6748630298581803, "grad_norm": 0.37890625, "learning_rate": 7.630759069121949e-06, "loss": 1.8909, "step": 20917 }, { "epoch": 0.6748952937119765, "grad_norm": 0.35546875, "learning_rate": 7.629388387146526e-06, "loss": 1.871, "step": 20918 }, { "epoch": 0.6749275575657729, "grad_norm": 0.375, "learning_rate": 7.628017786301508e-06, "loss": 1.9056, "step": 20919 }, { "epoch": 0.6749598214195692, "grad_norm": 0.349609375, "learning_rate": 7.626647266601976e-06, "loss": 1.8956, "step": 20920 }, { "epoch": 0.6749920852733656, "grad_norm": 0.3828125, "learning_rate": 7.625276828063029e-06, "loss": 1.9427, "step": 20921 }, { "epoch": 0.6750243491271619, "grad_norm": 0.384765625, "learning_rate": 7.6239064706997415e-06, "loss": 1.9186, "step": 20922 }, { "epoch": 0.6750566129809583, "grad_norm": 0.4375, "learning_rate": 7.622536194527195e-06, "loss": 1.8396, "step": 20923 }, { "epoch": 0.6750888768347546, "grad_norm": 0.365234375, "learning_rate": 7.621165999560484e-06, "loss": 1.8871, "step": 20924 }, { "epoch": 0.675121140688551, "grad_norm": 0.3671875, "learning_rate": 7.6197958858146836e-06, "loss": 1.8827, "step": 20925 }, { "epoch": 0.6751534045423473, "grad_norm": 0.3828125, "learning_rate": 7.618425853304869e-06, "loss": 1.9221, "step": 20926 }, { "epoch": 0.6751856683961437, "grad_norm": 0.353515625, "learning_rate": 7.617055902046139e-06, "loss": 1.8455, "step": 20927 }, { "epoch": 0.67521793224994, "grad_norm": 0.349609375, "learning_rate": 7.615686032053549e-06, "loss": 1.9385, "step": 20928 }, { "epoch": 0.6752501961037364, "grad_norm": 0.388671875, "learning_rate": 7.614316243342191e-06, "loss": 1.9022, "step": 20929 }, { "epoch": 0.6752824599575327, "grad_norm": 0.421875, "learning_rate": 7.6129465359271524e-06, "loss": 1.8967, "step": 20930 }, { "epoch": 0.6753147238113291, "grad_norm": 0.35546875, "learning_rate": 7.611576909823488e-06, "loss": 1.8849, "step": 20931 }, { "epoch": 0.6753469876651254, "grad_norm": 0.388671875, "learning_rate": 7.610207365046282e-06, "loss": 1.9183, "step": 20932 }, { "epoch": 0.6753792515189218, "grad_norm": 0.37109375, "learning_rate": 7.608837901610627e-06, "loss": 1.8848, "step": 20933 }, { "epoch": 0.675411515372718, "grad_norm": 0.3671875, "learning_rate": 7.607468519531568e-06, "loss": 1.8984, "step": 20934 }, { "epoch": 0.6754437792265144, "grad_norm": 0.4140625, "learning_rate": 7.606099218824192e-06, "loss": 1.8707, "step": 20935 }, { "epoch": 0.6754760430803107, "grad_norm": 0.38671875, "learning_rate": 7.604729999503584e-06, "loss": 1.8267, "step": 20936 }, { "epoch": 0.6755083069341071, "grad_norm": 0.376953125, "learning_rate": 7.6033608615847925e-06, "loss": 1.9004, "step": 20937 }, { "epoch": 0.6755405707879035, "grad_norm": 0.431640625, "learning_rate": 7.6019918050829025e-06, "loss": 1.8447, "step": 20938 }, { "epoch": 0.6755728346416998, "grad_norm": 0.375, "learning_rate": 7.60062283001298e-06, "loss": 1.8529, "step": 20939 }, { "epoch": 0.6756050984954962, "grad_norm": 0.40625, "learning_rate": 7.599253936390087e-06, "loss": 1.8461, "step": 20940 }, { "epoch": 0.6756373623492925, "grad_norm": 0.390625, "learning_rate": 7.597885124229303e-06, "loss": 1.8747, "step": 20941 }, { "epoch": 0.6756696262030889, "grad_norm": 0.373046875, "learning_rate": 7.596516393545684e-06, "loss": 1.8762, "step": 20942 }, { "epoch": 0.6757018900568852, "grad_norm": 0.392578125, "learning_rate": 7.595147744354305e-06, "loss": 1.8275, "step": 20943 }, { "epoch": 0.6757341539106816, "grad_norm": 0.4140625, "learning_rate": 7.59377917667023e-06, "loss": 1.8641, "step": 20944 }, { "epoch": 0.6757664177644779, "grad_norm": 0.365234375, "learning_rate": 7.5924106905085145e-06, "loss": 1.8651, "step": 20945 }, { "epoch": 0.6757986816182743, "grad_norm": 0.486328125, "learning_rate": 7.591042285884233e-06, "loss": 1.8587, "step": 20946 }, { "epoch": 0.6758309454720706, "grad_norm": 0.384765625, "learning_rate": 7.589673962812442e-06, "loss": 1.8711, "step": 20947 }, { "epoch": 0.675863209325867, "grad_norm": 0.412109375, "learning_rate": 7.5883057213081994e-06, "loss": 1.8433, "step": 20948 }, { "epoch": 0.6758954731796633, "grad_norm": 0.439453125, "learning_rate": 7.586937561386576e-06, "loss": 1.8593, "step": 20949 }, { "epoch": 0.6759277370334597, "grad_norm": 0.3671875, "learning_rate": 7.585569483062626e-06, "loss": 1.8996, "step": 20950 }, { "epoch": 0.6759600008872559, "grad_norm": 0.42578125, "learning_rate": 7.584201486351401e-06, "loss": 1.8807, "step": 20951 }, { "epoch": 0.6759922647410523, "grad_norm": 0.37890625, "learning_rate": 7.582833571267972e-06, "loss": 1.8453, "step": 20952 }, { "epoch": 0.6760245285948486, "grad_norm": 0.37109375, "learning_rate": 7.581465737827392e-06, "loss": 1.8597, "step": 20953 }, { "epoch": 0.676056792448645, "grad_norm": 0.369140625, "learning_rate": 7.580097986044708e-06, "loss": 1.8583, "step": 20954 }, { "epoch": 0.6760890563024413, "grad_norm": 0.384765625, "learning_rate": 7.578730315934989e-06, "loss": 1.871, "step": 20955 }, { "epoch": 0.6761213201562377, "grad_norm": 0.375, "learning_rate": 7.5773627275132804e-06, "loss": 1.9031, "step": 20956 }, { "epoch": 0.676153584010034, "grad_norm": 0.41015625, "learning_rate": 7.575995220794634e-06, "loss": 1.8527, "step": 20957 }, { "epoch": 0.6761858478638304, "grad_norm": 0.4140625, "learning_rate": 7.5746277957941175e-06, "loss": 1.83, "step": 20958 }, { "epoch": 0.6762181117176268, "grad_norm": 0.3671875, "learning_rate": 7.573260452526758e-06, "loss": 1.8921, "step": 20959 }, { "epoch": 0.6762503755714231, "grad_norm": 0.392578125, "learning_rate": 7.5718931910076214e-06, "loss": 1.8922, "step": 20960 }, { "epoch": 0.6762826394252195, "grad_norm": 0.359375, "learning_rate": 7.570526011251768e-06, "loss": 1.8195, "step": 20961 }, { "epoch": 0.6763149032790158, "grad_norm": 0.353515625, "learning_rate": 7.5691589132742225e-06, "loss": 1.8746, "step": 20962 }, { "epoch": 0.6763471671328122, "grad_norm": 0.384765625, "learning_rate": 7.567791897090042e-06, "loss": 1.8896, "step": 20963 }, { "epoch": 0.6763794309866085, "grad_norm": 0.359375, "learning_rate": 7.566424962714291e-06, "loss": 1.8945, "step": 20964 }, { "epoch": 0.6764116948404049, "grad_norm": 0.36328125, "learning_rate": 7.565058110161989e-06, "loss": 1.8827, "step": 20965 }, { "epoch": 0.6764439586942012, "grad_norm": 0.361328125, "learning_rate": 7.563691339448192e-06, "loss": 1.8684, "step": 20966 }, { "epoch": 0.6764762225479976, "grad_norm": 0.400390625, "learning_rate": 7.562324650587959e-06, "loss": 1.9198, "step": 20967 }, { "epoch": 0.6765084864017938, "grad_norm": 0.349609375, "learning_rate": 7.560958043596309e-06, "loss": 1.8581, "step": 20968 }, { "epoch": 0.6765407502555902, "grad_norm": 0.4375, "learning_rate": 7.559591518488301e-06, "loss": 1.9054, "step": 20969 }, { "epoch": 0.6765730141093865, "grad_norm": 0.3515625, "learning_rate": 7.5582250752789715e-06, "loss": 1.8268, "step": 20970 }, { "epoch": 0.6766052779631829, "grad_norm": 0.3828125, "learning_rate": 7.556858713983358e-06, "loss": 1.9013, "step": 20971 }, { "epoch": 0.6766375418169792, "grad_norm": 0.423828125, "learning_rate": 7.555492434616508e-06, "loss": 1.8654, "step": 20972 }, { "epoch": 0.6766698056707756, "grad_norm": 0.392578125, "learning_rate": 7.554126237193458e-06, "loss": 1.8626, "step": 20973 }, { "epoch": 0.6767020695245719, "grad_norm": 0.38671875, "learning_rate": 7.552760121729237e-06, "loss": 1.8278, "step": 20974 }, { "epoch": 0.6767343333783683, "grad_norm": 0.36328125, "learning_rate": 7.551394088238898e-06, "loss": 1.8364, "step": 20975 }, { "epoch": 0.6767665972321646, "grad_norm": 0.3515625, "learning_rate": 7.550028136737461e-06, "loss": 1.8347, "step": 20976 }, { "epoch": 0.676798861085961, "grad_norm": 0.390625, "learning_rate": 7.548662267239978e-06, "loss": 1.8342, "step": 20977 }, { "epoch": 0.6768311249397574, "grad_norm": 0.37890625, "learning_rate": 7.547296479761474e-06, "loss": 1.8442, "step": 20978 }, { "epoch": 0.6768633887935537, "grad_norm": 0.388671875, "learning_rate": 7.545930774316979e-06, "loss": 1.8462, "step": 20979 }, { "epoch": 0.6768956526473501, "grad_norm": 0.3984375, "learning_rate": 7.544565150921534e-06, "loss": 1.8495, "step": 20980 }, { "epoch": 0.6769279165011464, "grad_norm": 0.365234375, "learning_rate": 7.543199609590169e-06, "loss": 1.8708, "step": 20981 }, { "epoch": 0.6769601803549428, "grad_norm": 0.431640625, "learning_rate": 7.5418341503379085e-06, "loss": 1.8129, "step": 20982 }, { "epoch": 0.676992444208739, "grad_norm": 0.357421875, "learning_rate": 7.54046877317979e-06, "loss": 1.8597, "step": 20983 }, { "epoch": 0.6770247080625355, "grad_norm": 0.435546875, "learning_rate": 7.539103478130843e-06, "loss": 1.858, "step": 20984 }, { "epoch": 0.6770569719163317, "grad_norm": 0.375, "learning_rate": 7.537738265206085e-06, "loss": 1.8356, "step": 20985 }, { "epoch": 0.6770892357701281, "grad_norm": 0.4609375, "learning_rate": 7.536373134420556e-06, "loss": 1.877, "step": 20986 }, { "epoch": 0.6771214996239244, "grad_norm": 0.404296875, "learning_rate": 7.5350080857892774e-06, "loss": 1.8323, "step": 20987 }, { "epoch": 0.6771537634777208, "grad_norm": 0.423828125, "learning_rate": 7.533643119327269e-06, "loss": 1.8481, "step": 20988 }, { "epoch": 0.6771860273315171, "grad_norm": 0.4609375, "learning_rate": 7.532278235049573e-06, "loss": 1.8674, "step": 20989 }, { "epoch": 0.6772182911853135, "grad_norm": 0.3671875, "learning_rate": 7.530913432971188e-06, "loss": 1.879, "step": 20990 }, { "epoch": 0.6772505550391098, "grad_norm": 0.453125, "learning_rate": 7.529548713107152e-06, "loss": 1.8722, "step": 20991 }, { "epoch": 0.6772828188929062, "grad_norm": 0.4296875, "learning_rate": 7.528184075472494e-06, "loss": 1.8413, "step": 20992 }, { "epoch": 0.6773150827467025, "grad_norm": 0.40625, "learning_rate": 7.526819520082213e-06, "loss": 1.896, "step": 20993 }, { "epoch": 0.6773473466004989, "grad_norm": 0.404296875, "learning_rate": 7.525455046951344e-06, "loss": 1.8834, "step": 20994 }, { "epoch": 0.6773796104542952, "grad_norm": 0.53125, "learning_rate": 7.524090656094914e-06, "loss": 1.8913, "step": 20995 }, { "epoch": 0.6774118743080916, "grad_norm": 0.3984375, "learning_rate": 7.522726347527918e-06, "loss": 1.8675, "step": 20996 }, { "epoch": 0.6774441381618879, "grad_norm": 0.4765625, "learning_rate": 7.521362121265388e-06, "loss": 1.8607, "step": 20997 }, { "epoch": 0.6774764020156843, "grad_norm": 0.462890625, "learning_rate": 7.519997977322349e-06, "loss": 1.8268, "step": 20998 }, { "epoch": 0.6775086658694807, "grad_norm": 0.39453125, "learning_rate": 7.518633915713796e-06, "loss": 1.8898, "step": 20999 }, { "epoch": 0.677540929723277, "grad_norm": 0.439453125, "learning_rate": 7.517269936454759e-06, "loss": 1.8504, "step": 21000 }, { "epoch": 0.6775731935770734, "grad_norm": 0.3828125, "learning_rate": 7.515906039560246e-06, "loss": 1.8757, "step": 21001 }, { "epoch": 0.6776054574308696, "grad_norm": 0.373046875, "learning_rate": 7.514542225045264e-06, "loss": 1.872, "step": 21002 }, { "epoch": 0.677637721284666, "grad_norm": 0.41796875, "learning_rate": 7.513178492924838e-06, "loss": 1.8718, "step": 21003 }, { "epoch": 0.6776699851384623, "grad_norm": 0.38671875, "learning_rate": 7.511814843213972e-06, "loss": 1.8339, "step": 21004 }, { "epoch": 0.6777022489922587, "grad_norm": 0.384765625, "learning_rate": 7.510451275927669e-06, "loss": 1.8876, "step": 21005 }, { "epoch": 0.677734512846055, "grad_norm": 0.376953125, "learning_rate": 7.5090877910809544e-06, "loss": 1.8114, "step": 21006 }, { "epoch": 0.6777667766998514, "grad_norm": 0.365234375, "learning_rate": 7.507724388688824e-06, "loss": 1.8767, "step": 21007 }, { "epoch": 0.6777990405536477, "grad_norm": 0.388671875, "learning_rate": 7.506361068766284e-06, "loss": 1.8669, "step": 21008 }, { "epoch": 0.6778313044074441, "grad_norm": 0.392578125, "learning_rate": 7.504997831328351e-06, "loss": 1.8837, "step": 21009 }, { "epoch": 0.6778635682612404, "grad_norm": 0.3671875, "learning_rate": 7.50363467639002e-06, "loss": 1.8781, "step": 21010 }, { "epoch": 0.6778958321150368, "grad_norm": 0.37109375, "learning_rate": 7.502271603966306e-06, "loss": 1.8716, "step": 21011 }, { "epoch": 0.6779280959688331, "grad_norm": 0.373046875, "learning_rate": 7.5009086140722085e-06, "loss": 1.8423, "step": 21012 }, { "epoch": 0.6779603598226295, "grad_norm": 0.40234375, "learning_rate": 7.49954570672272e-06, "loss": 1.8456, "step": 21013 }, { "epoch": 0.6779926236764258, "grad_norm": 0.3671875, "learning_rate": 7.49818288193286e-06, "loss": 1.8782, "step": 21014 }, { "epoch": 0.6780248875302222, "grad_norm": 0.380859375, "learning_rate": 7.496820139717621e-06, "loss": 1.846, "step": 21015 }, { "epoch": 0.6780571513840185, "grad_norm": 0.392578125, "learning_rate": 7.495457480091996e-06, "loss": 1.8551, "step": 21016 }, { "epoch": 0.6780894152378149, "grad_norm": 0.36328125, "learning_rate": 7.494094903070999e-06, "loss": 1.8862, "step": 21017 }, { "epoch": 0.6781216790916113, "grad_norm": 0.365234375, "learning_rate": 7.492732408669619e-06, "loss": 1.8387, "step": 21018 }, { "epoch": 0.6781539429454075, "grad_norm": 0.4140625, "learning_rate": 7.491369996902851e-06, "loss": 1.8321, "step": 21019 }, { "epoch": 0.6781862067992039, "grad_norm": 0.357421875, "learning_rate": 7.490007667785705e-06, "loss": 1.9016, "step": 21020 }, { "epoch": 0.6782184706530002, "grad_norm": 0.416015625, "learning_rate": 7.488645421333157e-06, "loss": 1.8871, "step": 21021 }, { "epoch": 0.6782507345067966, "grad_norm": 0.40234375, "learning_rate": 7.487283257560209e-06, "loss": 1.8825, "step": 21022 }, { "epoch": 0.6782829983605929, "grad_norm": 0.388671875, "learning_rate": 7.485921176481871e-06, "loss": 1.8807, "step": 21023 }, { "epoch": 0.6783152622143893, "grad_norm": 0.392578125, "learning_rate": 7.4845591781131105e-06, "loss": 1.8487, "step": 21024 }, { "epoch": 0.6783475260681856, "grad_norm": 0.375, "learning_rate": 7.4831972624689295e-06, "loss": 1.8979, "step": 21025 }, { "epoch": 0.678379789921982, "grad_norm": 0.486328125, "learning_rate": 7.481835429564335e-06, "loss": 1.8638, "step": 21026 }, { "epoch": 0.6784120537757783, "grad_norm": 0.373046875, "learning_rate": 7.480473679414288e-06, "loss": 1.8559, "step": 21027 }, { "epoch": 0.6784443176295747, "grad_norm": 0.408203125, "learning_rate": 7.479112012033799e-06, "loss": 1.8463, "step": 21028 }, { "epoch": 0.678476581483371, "grad_norm": 0.439453125, "learning_rate": 7.477750427437851e-06, "loss": 1.8698, "step": 21029 }, { "epoch": 0.6785088453371674, "grad_norm": 0.376953125, "learning_rate": 7.476388925641422e-06, "loss": 1.8706, "step": 21030 }, { "epoch": 0.6785411091909637, "grad_norm": 0.3984375, "learning_rate": 7.4750275066595135e-06, "loss": 1.8217, "step": 21031 }, { "epoch": 0.6785733730447601, "grad_norm": 0.37890625, "learning_rate": 7.4736661705071025e-06, "loss": 1.8728, "step": 21032 }, { "epoch": 0.6786056368985564, "grad_norm": 0.37109375, "learning_rate": 7.47230491719917e-06, "loss": 1.8486, "step": 21033 }, { "epoch": 0.6786379007523528, "grad_norm": 0.353515625, "learning_rate": 7.470943746750712e-06, "loss": 1.9017, "step": 21034 }, { "epoch": 0.678670164606149, "grad_norm": 0.373046875, "learning_rate": 7.4695826591767e-06, "loss": 1.8606, "step": 21035 }, { "epoch": 0.6787024284599454, "grad_norm": 0.41015625, "learning_rate": 7.468221654492118e-06, "loss": 1.8922, "step": 21036 }, { "epoch": 0.6787346923137417, "grad_norm": 0.3828125, "learning_rate": 7.466860732711952e-06, "loss": 1.8953, "step": 21037 }, { "epoch": 0.6787669561675381, "grad_norm": 0.3984375, "learning_rate": 7.4654998938511795e-06, "loss": 1.9151, "step": 21038 }, { "epoch": 0.6787992200213345, "grad_norm": 0.365234375, "learning_rate": 7.464139137924772e-06, "loss": 1.9077, "step": 21039 }, { "epoch": 0.6788314838751308, "grad_norm": 0.37109375, "learning_rate": 7.462778464947723e-06, "loss": 1.869, "step": 21040 }, { "epoch": 0.6788637477289272, "grad_norm": 0.40625, "learning_rate": 7.4614178749349944e-06, "loss": 1.8769, "step": 21041 }, { "epoch": 0.6788960115827235, "grad_norm": 0.40625, "learning_rate": 7.460057367901575e-06, "loss": 1.8699, "step": 21042 }, { "epoch": 0.6789282754365199, "grad_norm": 0.419921875, "learning_rate": 7.458696943862434e-06, "loss": 1.8838, "step": 21043 }, { "epoch": 0.6789605392903162, "grad_norm": 0.4375, "learning_rate": 7.457336602832544e-06, "loss": 1.8736, "step": 21044 }, { "epoch": 0.6789928031441126, "grad_norm": 0.349609375, "learning_rate": 7.455976344826886e-06, "loss": 1.8489, "step": 21045 }, { "epoch": 0.6790250669979089, "grad_norm": 0.38671875, "learning_rate": 7.454616169860428e-06, "loss": 1.8587, "step": 21046 }, { "epoch": 0.6790573308517053, "grad_norm": 0.458984375, "learning_rate": 7.453256077948137e-06, "loss": 1.8731, "step": 21047 }, { "epoch": 0.6790895947055016, "grad_norm": 0.365234375, "learning_rate": 7.451896069104994e-06, "loss": 1.8489, "step": 21048 }, { "epoch": 0.679121858559298, "grad_norm": 0.416015625, "learning_rate": 7.450536143345965e-06, "loss": 1.8492, "step": 21049 }, { "epoch": 0.6791541224130943, "grad_norm": 0.4765625, "learning_rate": 7.449176300686013e-06, "loss": 1.8879, "step": 21050 }, { "epoch": 0.6791863862668907, "grad_norm": 0.353515625, "learning_rate": 7.44781654114012e-06, "loss": 1.8402, "step": 21051 }, { "epoch": 0.6792186501206869, "grad_norm": 0.419921875, "learning_rate": 7.446456864723236e-06, "loss": 1.8666, "step": 21052 }, { "epoch": 0.6792509139744833, "grad_norm": 0.431640625, "learning_rate": 7.445097271450334e-06, "loss": 1.8592, "step": 21053 }, { "epoch": 0.6792831778282796, "grad_norm": 0.435546875, "learning_rate": 7.443737761336394e-06, "loss": 1.8937, "step": 21054 }, { "epoch": 0.679315441682076, "grad_norm": 0.375, "learning_rate": 7.442378334396356e-06, "loss": 1.8387, "step": 21055 }, { "epoch": 0.6793477055358723, "grad_norm": 0.369140625, "learning_rate": 7.441018990645196e-06, "loss": 1.8193, "step": 21056 }, { "epoch": 0.6793799693896687, "grad_norm": 0.3515625, "learning_rate": 7.439659730097887e-06, "loss": 1.8884, "step": 21057 }, { "epoch": 0.679412233243465, "grad_norm": 0.431640625, "learning_rate": 7.438300552769369e-06, "loss": 1.8607, "step": 21058 }, { "epoch": 0.6794444970972614, "grad_norm": 0.365234375, "learning_rate": 7.4369414586746195e-06, "loss": 1.8677, "step": 21059 }, { "epoch": 0.6794767609510578, "grad_norm": 0.4296875, "learning_rate": 7.435582447828591e-06, "loss": 1.8651, "step": 21060 }, { "epoch": 0.6795090248048541, "grad_norm": 0.392578125, "learning_rate": 7.4342235202462415e-06, "loss": 1.8903, "step": 21061 }, { "epoch": 0.6795412886586505, "grad_norm": 0.353515625, "learning_rate": 7.432864675942536e-06, "loss": 1.8331, "step": 21062 }, { "epoch": 0.6795735525124468, "grad_norm": 0.3671875, "learning_rate": 7.4315059149324266e-06, "loss": 1.8696, "step": 21063 }, { "epoch": 0.6796058163662432, "grad_norm": 0.416015625, "learning_rate": 7.430147237230865e-06, "loss": 1.8719, "step": 21064 }, { "epoch": 0.6796380802200395, "grad_norm": 0.388671875, "learning_rate": 7.428788642852819e-06, "loss": 1.8549, "step": 21065 }, { "epoch": 0.6796703440738359, "grad_norm": 0.49609375, "learning_rate": 7.427430131813235e-06, "loss": 1.8814, "step": 21066 }, { "epoch": 0.6797026079276322, "grad_norm": 0.48828125, "learning_rate": 7.426071704127062e-06, "loss": 1.8864, "step": 21067 }, { "epoch": 0.6797348717814286, "grad_norm": 0.34765625, "learning_rate": 7.4247133598092656e-06, "loss": 1.8483, "step": 21068 }, { "epoch": 0.6797671356352248, "grad_norm": 0.44140625, "learning_rate": 7.423355098874788e-06, "loss": 1.8416, "step": 21069 }, { "epoch": 0.6797993994890212, "grad_norm": 0.451171875, "learning_rate": 7.4219969213385764e-06, "loss": 1.8542, "step": 21070 }, { "epoch": 0.6798316633428175, "grad_norm": 0.427734375, "learning_rate": 7.420638827215592e-06, "loss": 1.8159, "step": 21071 }, { "epoch": 0.6798639271966139, "grad_norm": 0.486328125, "learning_rate": 7.419280816520779e-06, "loss": 1.8349, "step": 21072 }, { "epoch": 0.6798961910504102, "grad_norm": 0.44921875, "learning_rate": 7.417922889269079e-06, "loss": 1.8639, "step": 21073 }, { "epoch": 0.6799284549042066, "grad_norm": 0.375, "learning_rate": 7.4165650454754485e-06, "loss": 1.828, "step": 21074 }, { "epoch": 0.6799607187580029, "grad_norm": 0.380859375, "learning_rate": 7.4152072851548256e-06, "loss": 1.8095, "step": 21075 }, { "epoch": 0.6799929826117993, "grad_norm": 0.38671875, "learning_rate": 7.4138496083221635e-06, "loss": 1.8423, "step": 21076 }, { "epoch": 0.6800252464655956, "grad_norm": 0.3671875, "learning_rate": 7.412492014992405e-06, "loss": 1.7984, "step": 21077 }, { "epoch": 0.680057510319392, "grad_norm": 0.380859375, "learning_rate": 7.4111345051804834e-06, "loss": 1.8226, "step": 21078 }, { "epoch": 0.6800897741731884, "grad_norm": 0.353515625, "learning_rate": 7.4097770789013615e-06, "loss": 1.806, "step": 21079 }, { "epoch": 0.6801220380269847, "grad_norm": 0.353515625, "learning_rate": 7.408419736169956e-06, "loss": 1.8467, "step": 21080 }, { "epoch": 0.6801543018807811, "grad_norm": 0.37890625, "learning_rate": 7.40706247700122e-06, "loss": 1.8661, "step": 21081 }, { "epoch": 0.6801865657345774, "grad_norm": 0.396484375, "learning_rate": 7.405705301410104e-06, "loss": 1.8279, "step": 21082 }, { "epoch": 0.6802188295883738, "grad_norm": 0.35546875, "learning_rate": 7.404348209411525e-06, "loss": 1.8577, "step": 21083 }, { "epoch": 0.68025109344217, "grad_norm": 0.35546875, "learning_rate": 7.402991201020433e-06, "loss": 1.8557, "step": 21084 }, { "epoch": 0.6802833572959665, "grad_norm": 0.380859375, "learning_rate": 7.401634276251774e-06, "loss": 1.8191, "step": 21085 }, { "epoch": 0.6803156211497627, "grad_norm": 0.35546875, "learning_rate": 7.400277435120463e-06, "loss": 1.8288, "step": 21086 }, { "epoch": 0.6803478850035591, "grad_norm": 0.376953125, "learning_rate": 7.3989206776414444e-06, "loss": 1.8443, "step": 21087 }, { "epoch": 0.6803801488573554, "grad_norm": 0.384765625, "learning_rate": 7.397564003829668e-06, "loss": 1.8675, "step": 21088 }, { "epoch": 0.6804124127111518, "grad_norm": 0.380859375, "learning_rate": 7.39620741370004e-06, "loss": 1.8517, "step": 21089 }, { "epoch": 0.6804446765649481, "grad_norm": 0.380859375, "learning_rate": 7.394850907267512e-06, "loss": 1.8551, "step": 21090 }, { "epoch": 0.6804769404187445, "grad_norm": 0.37109375, "learning_rate": 7.393494484547009e-06, "loss": 1.8361, "step": 21091 }, { "epoch": 0.6805092042725408, "grad_norm": 0.3671875, "learning_rate": 7.392138145553457e-06, "loss": 1.8616, "step": 21092 }, { "epoch": 0.6805414681263372, "grad_norm": 0.375, "learning_rate": 7.390781890301795e-06, "loss": 1.8644, "step": 21093 }, { "epoch": 0.6805737319801335, "grad_norm": 0.375, "learning_rate": 7.389425718806947e-06, "loss": 1.8289, "step": 21094 }, { "epoch": 0.6806059958339299, "grad_norm": 0.365234375, "learning_rate": 7.388069631083836e-06, "loss": 1.8418, "step": 21095 }, { "epoch": 0.6806382596877262, "grad_norm": 0.39453125, "learning_rate": 7.386713627147399e-06, "loss": 1.8524, "step": 21096 }, { "epoch": 0.6806705235415226, "grad_norm": 0.361328125, "learning_rate": 7.385357707012559e-06, "loss": 1.8442, "step": 21097 }, { "epoch": 0.6807027873953189, "grad_norm": 0.36328125, "learning_rate": 7.384001870694229e-06, "loss": 1.8701, "step": 21098 }, { "epoch": 0.6807350512491153, "grad_norm": 0.3671875, "learning_rate": 7.38264611820735e-06, "loss": 1.8731, "step": 21099 }, { "epoch": 0.6807673151029117, "grad_norm": 0.36328125, "learning_rate": 7.381290449566839e-06, "loss": 1.8502, "step": 21100 }, { "epoch": 0.680799578956708, "grad_norm": 0.39453125, "learning_rate": 7.37993486478761e-06, "loss": 1.8436, "step": 21101 }, { "epoch": 0.6808318428105044, "grad_norm": 0.359375, "learning_rate": 7.378579363884598e-06, "loss": 1.8769, "step": 21102 }, { "epoch": 0.6808641066643006, "grad_norm": 0.384765625, "learning_rate": 7.377223946872716e-06, "loss": 1.8623, "step": 21103 }, { "epoch": 0.680896370518097, "grad_norm": 0.375, "learning_rate": 7.3758686137668794e-06, "loss": 1.8301, "step": 21104 }, { "epoch": 0.6809286343718933, "grad_norm": 0.39453125, "learning_rate": 7.374513364582017e-06, "loss": 1.8535, "step": 21105 }, { "epoch": 0.6809608982256897, "grad_norm": 0.365234375, "learning_rate": 7.3731581993330395e-06, "loss": 1.8479, "step": 21106 }, { "epoch": 0.680993162079486, "grad_norm": 0.369140625, "learning_rate": 7.371803118034861e-06, "loss": 1.8224, "step": 21107 }, { "epoch": 0.6810254259332824, "grad_norm": 0.388671875, "learning_rate": 7.370448120702407e-06, "loss": 1.8975, "step": 21108 }, { "epoch": 0.6810576897870787, "grad_norm": 0.384765625, "learning_rate": 7.369093207350581e-06, "loss": 1.8613, "step": 21109 }, { "epoch": 0.6810899536408751, "grad_norm": 0.375, "learning_rate": 7.367738377994314e-06, "loss": 1.7951, "step": 21110 }, { "epoch": 0.6811222174946714, "grad_norm": 0.37890625, "learning_rate": 7.3663836326484954e-06, "loss": 1.8196, "step": 21111 }, { "epoch": 0.6811544813484678, "grad_norm": 0.3828125, "learning_rate": 7.365028971328048e-06, "loss": 1.8049, "step": 21112 }, { "epoch": 0.6811867452022641, "grad_norm": 0.38671875, "learning_rate": 7.3636743940478976e-06, "loss": 1.8552, "step": 21113 }, { "epoch": 0.6812190090560605, "grad_norm": 0.3828125, "learning_rate": 7.36231990082293e-06, "loss": 1.8471, "step": 21114 }, { "epoch": 0.6812512729098568, "grad_norm": 0.455078125, "learning_rate": 7.3609654916680665e-06, "loss": 1.8154, "step": 21115 }, { "epoch": 0.6812835367636532, "grad_norm": 0.41015625, "learning_rate": 7.359611166598224e-06, "loss": 1.8384, "step": 21116 }, { "epoch": 0.6813158006174495, "grad_norm": 0.38671875, "learning_rate": 7.35825692562829e-06, "loss": 1.8883, "step": 21117 }, { "epoch": 0.6813480644712459, "grad_norm": 0.3828125, "learning_rate": 7.356902768773182e-06, "loss": 1.8663, "step": 21118 }, { "epoch": 0.6813803283250423, "grad_norm": 0.41796875, "learning_rate": 7.355548696047817e-06, "loss": 1.8552, "step": 21119 }, { "epoch": 0.6814125921788385, "grad_norm": 0.396484375, "learning_rate": 7.354194707467075e-06, "loss": 1.8555, "step": 21120 }, { "epoch": 0.6814448560326349, "grad_norm": 0.384765625, "learning_rate": 7.35284080304588e-06, "loss": 1.8408, "step": 21121 }, { "epoch": 0.6814771198864312, "grad_norm": 0.369140625, "learning_rate": 7.351486982799126e-06, "loss": 1.8603, "step": 21122 }, { "epoch": 0.6815093837402276, "grad_norm": 0.408203125, "learning_rate": 7.350133246741712e-06, "loss": 1.8485, "step": 21123 }, { "epoch": 0.6815416475940239, "grad_norm": 0.384765625, "learning_rate": 7.348779594888547e-06, "loss": 1.8684, "step": 21124 }, { "epoch": 0.6815739114478203, "grad_norm": 0.38671875, "learning_rate": 7.347426027254528e-06, "loss": 1.8231, "step": 21125 }, { "epoch": 0.6816061753016166, "grad_norm": 0.375, "learning_rate": 7.3460725438545486e-06, "loss": 1.8404, "step": 21126 }, { "epoch": 0.681638439155413, "grad_norm": 0.3828125, "learning_rate": 7.344719144703516e-06, "loss": 1.8483, "step": 21127 }, { "epoch": 0.6816707030092093, "grad_norm": 0.390625, "learning_rate": 7.343365829816324e-06, "loss": 1.8672, "step": 21128 }, { "epoch": 0.6817029668630057, "grad_norm": 0.36328125, "learning_rate": 7.3420125992078626e-06, "loss": 1.8293, "step": 21129 }, { "epoch": 0.681735230716802, "grad_norm": 0.37890625, "learning_rate": 7.3406594528930384e-06, "loss": 1.8217, "step": 21130 }, { "epoch": 0.6817674945705984, "grad_norm": 0.3828125, "learning_rate": 7.33930639088674e-06, "loss": 1.8715, "step": 21131 }, { "epoch": 0.6817997584243947, "grad_norm": 0.376953125, "learning_rate": 7.337953413203856e-06, "loss": 1.9103, "step": 21132 }, { "epoch": 0.6818320222781911, "grad_norm": 0.376953125, "learning_rate": 7.336600519859288e-06, "loss": 1.8414, "step": 21133 }, { "epoch": 0.6818642861319874, "grad_norm": 0.380859375, "learning_rate": 7.335247710867924e-06, "loss": 1.8743, "step": 21134 }, { "epoch": 0.6818965499857838, "grad_norm": 0.421875, "learning_rate": 7.333894986244651e-06, "loss": 1.8832, "step": 21135 }, { "epoch": 0.68192881383958, "grad_norm": 0.384765625, "learning_rate": 7.332542346004367e-06, "loss": 1.8323, "step": 21136 }, { "epoch": 0.6819610776933764, "grad_norm": 0.455078125, "learning_rate": 7.331189790161955e-06, "loss": 1.8875, "step": 21137 }, { "epoch": 0.6819933415471727, "grad_norm": 0.4140625, "learning_rate": 7.3298373187323e-06, "loss": 1.8418, "step": 21138 }, { "epoch": 0.6820256054009691, "grad_norm": 0.427734375, "learning_rate": 7.328484931730299e-06, "loss": 1.8811, "step": 21139 }, { "epoch": 0.6820578692547655, "grad_norm": 0.412109375, "learning_rate": 7.327132629170826e-06, "loss": 1.8696, "step": 21140 }, { "epoch": 0.6820901331085618, "grad_norm": 0.439453125, "learning_rate": 7.325780411068783e-06, "loss": 1.8724, "step": 21141 }, { "epoch": 0.6821223969623582, "grad_norm": 0.404296875, "learning_rate": 7.324428277439034e-06, "loss": 1.8616, "step": 21142 }, { "epoch": 0.6821546608161545, "grad_norm": 0.369140625, "learning_rate": 7.323076228296472e-06, "loss": 1.8784, "step": 21143 }, { "epoch": 0.6821869246699509, "grad_norm": 0.71875, "learning_rate": 7.321724263655989e-06, "loss": 1.9996, "step": 21144 }, { "epoch": 0.6822191885237472, "grad_norm": 0.4921875, "learning_rate": 7.320372383532448e-06, "loss": 1.9796, "step": 21145 }, { "epoch": 0.6822514523775436, "grad_norm": 0.51171875, "learning_rate": 7.319020587940738e-06, "loss": 1.9269, "step": 21146 }, { "epoch": 0.6822837162313399, "grad_norm": 0.56640625, "learning_rate": 7.317668876895748e-06, "loss": 1.937, "step": 21147 }, { "epoch": 0.6823159800851363, "grad_norm": 0.4296875, "learning_rate": 7.316317250412339e-06, "loss": 1.9417, "step": 21148 }, { "epoch": 0.6823482439389326, "grad_norm": 0.423828125, "learning_rate": 7.3149657085053966e-06, "loss": 1.9306, "step": 21149 }, { "epoch": 0.682380507792729, "grad_norm": 0.56640625, "learning_rate": 7.31361425118981e-06, "loss": 1.964, "step": 21150 }, { "epoch": 0.6824127716465253, "grad_norm": 0.484375, "learning_rate": 7.312262878480432e-06, "loss": 1.9262, "step": 21151 }, { "epoch": 0.6824450355003217, "grad_norm": 0.48828125, "learning_rate": 7.310911590392155e-06, "loss": 1.9716, "step": 21152 }, { "epoch": 0.6824772993541179, "grad_norm": 0.42578125, "learning_rate": 7.309560386939846e-06, "loss": 2.001, "step": 21153 }, { "epoch": 0.6825095632079143, "grad_norm": 0.431640625, "learning_rate": 7.308209268138375e-06, "loss": 1.9743, "step": 21154 }, { "epoch": 0.6825418270617106, "grad_norm": 0.46484375, "learning_rate": 7.306858234002622e-06, "loss": 1.9305, "step": 21155 }, { "epoch": 0.682574090915507, "grad_norm": 0.490234375, "learning_rate": 7.305507284547457e-06, "loss": 1.9338, "step": 21156 }, { "epoch": 0.6826063547693033, "grad_norm": 0.376953125, "learning_rate": 7.3041564197877405e-06, "loss": 1.9343, "step": 21157 }, { "epoch": 0.6826386186230997, "grad_norm": 0.427734375, "learning_rate": 7.302805639738356e-06, "loss": 1.9333, "step": 21158 }, { "epoch": 0.682670882476896, "grad_norm": 0.392578125, "learning_rate": 7.301454944414163e-06, "loss": 1.8525, "step": 21159 }, { "epoch": 0.6827031463306924, "grad_norm": 0.375, "learning_rate": 7.300104333830028e-06, "loss": 1.8609, "step": 21160 }, { "epoch": 0.6827354101844888, "grad_norm": 0.3828125, "learning_rate": 7.298753808000824e-06, "loss": 1.8597, "step": 21161 }, { "epoch": 0.6827676740382851, "grad_norm": 0.390625, "learning_rate": 7.297403366941414e-06, "loss": 1.8452, "step": 21162 }, { "epoch": 0.6827999378920815, "grad_norm": 0.359375, "learning_rate": 7.296053010666657e-06, "loss": 1.8481, "step": 21163 }, { "epoch": 0.6828322017458778, "grad_norm": 0.400390625, "learning_rate": 7.294702739191427e-06, "loss": 1.8593, "step": 21164 }, { "epoch": 0.6828644655996742, "grad_norm": 0.3671875, "learning_rate": 7.293352552530581e-06, "loss": 1.8514, "step": 21165 }, { "epoch": 0.6828967294534705, "grad_norm": 0.359375, "learning_rate": 7.292002450698975e-06, "loss": 1.8856, "step": 21166 }, { "epoch": 0.6829289933072669, "grad_norm": 0.4375, "learning_rate": 7.290652433711483e-06, "loss": 1.8533, "step": 21167 }, { "epoch": 0.6829612571610632, "grad_norm": 0.376953125, "learning_rate": 7.28930250158296e-06, "loss": 1.8636, "step": 21168 }, { "epoch": 0.6829935210148596, "grad_norm": 0.412109375, "learning_rate": 7.287952654328256e-06, "loss": 1.8697, "step": 21169 }, { "epoch": 0.6830257848686558, "grad_norm": 0.46484375, "learning_rate": 7.286602891962242e-06, "loss": 1.8338, "step": 21170 }, { "epoch": 0.6830580487224522, "grad_norm": 0.359375, "learning_rate": 7.28525321449977e-06, "loss": 1.9001, "step": 21171 }, { "epoch": 0.6830903125762485, "grad_norm": 0.41796875, "learning_rate": 7.283903621955697e-06, "loss": 1.8612, "step": 21172 }, { "epoch": 0.6831225764300449, "grad_norm": 0.375, "learning_rate": 7.282554114344871e-06, "loss": 1.8591, "step": 21173 }, { "epoch": 0.6831548402838412, "grad_norm": 0.365234375, "learning_rate": 7.281204691682152e-06, "loss": 1.8352, "step": 21174 }, { "epoch": 0.6831871041376376, "grad_norm": 0.41015625, "learning_rate": 7.279855353982406e-06, "loss": 1.9286, "step": 21175 }, { "epoch": 0.6832193679914339, "grad_norm": 0.3828125, "learning_rate": 7.278506101260463e-06, "loss": 1.8954, "step": 21176 }, { "epoch": 0.6832516318452303, "grad_norm": 0.369140625, "learning_rate": 7.277156933531187e-06, "loss": 1.8428, "step": 21177 }, { "epoch": 0.6832838956990266, "grad_norm": 0.357421875, "learning_rate": 7.275807850809436e-06, "loss": 1.8901, "step": 21178 }, { "epoch": 0.683316159552823, "grad_norm": 0.39453125, "learning_rate": 7.27445885311004e-06, "loss": 1.8836, "step": 21179 }, { "epoch": 0.6833484234066194, "grad_norm": 0.349609375, "learning_rate": 7.273109940447866e-06, "loss": 1.9056, "step": 21180 }, { "epoch": 0.6833806872604157, "grad_norm": 0.359375, "learning_rate": 7.271761112837752e-06, "loss": 1.8649, "step": 21181 }, { "epoch": 0.6834129511142121, "grad_norm": 0.37890625, "learning_rate": 7.2704123702945435e-06, "loss": 1.8907, "step": 21182 }, { "epoch": 0.6834452149680084, "grad_norm": 0.365234375, "learning_rate": 7.269063712833097e-06, "loss": 1.86, "step": 21183 }, { "epoch": 0.6834774788218048, "grad_norm": 0.431640625, "learning_rate": 7.267715140468249e-06, "loss": 1.9301, "step": 21184 }, { "epoch": 0.683509742675601, "grad_norm": 0.55078125, "learning_rate": 7.26636665321484e-06, "loss": 1.9978, "step": 21185 }, { "epoch": 0.6835420065293975, "grad_norm": 0.49609375, "learning_rate": 7.265018251087726e-06, "loss": 1.9775, "step": 21186 }, { "epoch": 0.6835742703831937, "grad_norm": 0.396484375, "learning_rate": 7.263669934101741e-06, "loss": 1.9435, "step": 21187 }, { "epoch": 0.6836065342369901, "grad_norm": 0.47265625, "learning_rate": 7.2623217022717215e-06, "loss": 1.9501, "step": 21188 }, { "epoch": 0.6836387980907864, "grad_norm": 0.53125, "learning_rate": 7.26097355561252e-06, "loss": 1.9825, "step": 21189 }, { "epoch": 0.6836710619445828, "grad_norm": 0.51953125, "learning_rate": 7.259625494138969e-06, "loss": 1.9421, "step": 21190 }, { "epoch": 0.6837033257983791, "grad_norm": 0.455078125, "learning_rate": 7.258277517865902e-06, "loss": 1.9974, "step": 21191 }, { "epoch": 0.6837355896521755, "grad_norm": 0.45703125, "learning_rate": 7.25692962680817e-06, "loss": 1.943, "step": 21192 }, { "epoch": 0.6837678535059718, "grad_norm": 0.427734375, "learning_rate": 7.2555818209806e-06, "loss": 1.9857, "step": 21193 }, { "epoch": 0.6838001173597682, "grad_norm": 0.447265625, "learning_rate": 7.254234100398022e-06, "loss": 1.9334, "step": 21194 }, { "epoch": 0.6838323812135645, "grad_norm": 0.453125, "learning_rate": 7.252886465075288e-06, "loss": 1.942, "step": 21195 }, { "epoch": 0.6838646450673609, "grad_norm": 0.474609375, "learning_rate": 7.251538915027218e-06, "loss": 1.9478, "step": 21196 }, { "epoch": 0.6838969089211572, "grad_norm": 0.546875, "learning_rate": 7.250191450268646e-06, "loss": 1.9092, "step": 21197 }, { "epoch": 0.6839291727749536, "grad_norm": 0.423828125, "learning_rate": 7.248844070814411e-06, "loss": 1.9451, "step": 21198 }, { "epoch": 0.6839614366287499, "grad_norm": 0.40625, "learning_rate": 7.2474967766793406e-06, "loss": 1.9641, "step": 21199 }, { "epoch": 0.6839937004825463, "grad_norm": 0.4375, "learning_rate": 7.246149567878258e-06, "loss": 1.9765, "step": 21200 }, { "epoch": 0.6840259643363427, "grad_norm": 0.4296875, "learning_rate": 7.244802444426004e-06, "loss": 1.952, "step": 21201 }, { "epoch": 0.684058228190139, "grad_norm": 0.4296875, "learning_rate": 7.243455406337401e-06, "loss": 1.9904, "step": 21202 }, { "epoch": 0.6840904920439353, "grad_norm": 0.427734375, "learning_rate": 7.242108453627278e-06, "loss": 1.9495, "step": 21203 }, { "epoch": 0.6841227558977316, "grad_norm": 0.39453125, "learning_rate": 7.2407615863104515e-06, "loss": 1.9803, "step": 21204 }, { "epoch": 0.684155019751528, "grad_norm": 0.416015625, "learning_rate": 7.2394148044017626e-06, "loss": 2.0127, "step": 21205 }, { "epoch": 0.6841872836053243, "grad_norm": 0.380859375, "learning_rate": 7.238068107916026e-06, "loss": 1.9568, "step": 21206 }, { "epoch": 0.6842195474591207, "grad_norm": 0.392578125, "learning_rate": 7.2367214968680645e-06, "loss": 1.9808, "step": 21207 }, { "epoch": 0.684251811312917, "grad_norm": 0.40625, "learning_rate": 7.2353749712727016e-06, "loss": 1.9303, "step": 21208 }, { "epoch": 0.6842840751667134, "grad_norm": 0.3984375, "learning_rate": 7.2340285311447714e-06, "loss": 1.933, "step": 21209 }, { "epoch": 0.6843163390205097, "grad_norm": 0.40234375, "learning_rate": 7.232682176499074e-06, "loss": 1.9055, "step": 21210 }, { "epoch": 0.6843486028743061, "grad_norm": 0.369140625, "learning_rate": 7.231335907350443e-06, "loss": 1.9565, "step": 21211 }, { "epoch": 0.6843808667281024, "grad_norm": 0.380859375, "learning_rate": 7.229989723713695e-06, "loss": 1.9758, "step": 21212 }, { "epoch": 0.6844131305818988, "grad_norm": 0.390625, "learning_rate": 7.2286436256036404e-06, "loss": 1.907, "step": 21213 }, { "epoch": 0.6844453944356951, "grad_norm": 0.3984375, "learning_rate": 7.227297613035105e-06, "loss": 1.9467, "step": 21214 }, { "epoch": 0.6844776582894915, "grad_norm": 0.37890625, "learning_rate": 7.225951686022903e-06, "loss": 1.9647, "step": 21215 }, { "epoch": 0.6845099221432878, "grad_norm": 0.376953125, "learning_rate": 7.224605844581841e-06, "loss": 1.972, "step": 21216 }, { "epoch": 0.6845421859970842, "grad_norm": 0.412109375, "learning_rate": 7.223260088726747e-06, "loss": 1.928, "step": 21217 }, { "epoch": 0.6845744498508805, "grad_norm": 0.3828125, "learning_rate": 7.2219144184724265e-06, "loss": 1.956, "step": 21218 }, { "epoch": 0.6846067137046769, "grad_norm": 0.41015625, "learning_rate": 7.220568833833685e-06, "loss": 2.0117, "step": 21219 }, { "epoch": 0.6846389775584731, "grad_norm": 0.416015625, "learning_rate": 7.219223334825348e-06, "loss": 1.9672, "step": 21220 }, { "epoch": 0.6846712414122695, "grad_norm": 0.3671875, "learning_rate": 7.217877921462219e-06, "loss": 1.9333, "step": 21221 }, { "epoch": 0.6847035052660659, "grad_norm": 0.412109375, "learning_rate": 7.2165325937591e-06, "loss": 1.9267, "step": 21222 }, { "epoch": 0.6847357691198622, "grad_norm": 0.416015625, "learning_rate": 7.215187351730813e-06, "loss": 1.9145, "step": 21223 }, { "epoch": 0.6847680329736586, "grad_norm": 0.40625, "learning_rate": 7.2138421953921595e-06, "loss": 1.9777, "step": 21224 }, { "epoch": 0.6848002968274549, "grad_norm": 0.388671875, "learning_rate": 7.212497124757937e-06, "loss": 1.9886, "step": 21225 }, { "epoch": 0.6848325606812513, "grad_norm": 0.37890625, "learning_rate": 7.211152139842968e-06, "loss": 1.9651, "step": 21226 }, { "epoch": 0.6848648245350476, "grad_norm": 0.3828125, "learning_rate": 7.209807240662046e-06, "loss": 1.9734, "step": 21227 }, { "epoch": 0.684897088388844, "grad_norm": 0.43359375, "learning_rate": 7.2084624272299735e-06, "loss": 1.9357, "step": 21228 }, { "epoch": 0.6849293522426403, "grad_norm": 0.419921875, "learning_rate": 7.207117699561562e-06, "loss": 1.9361, "step": 21229 }, { "epoch": 0.6849616160964367, "grad_norm": 0.376953125, "learning_rate": 7.205773057671608e-06, "loss": 1.906, "step": 21230 }, { "epoch": 0.684993879950233, "grad_norm": 0.39453125, "learning_rate": 7.2044285015749125e-06, "loss": 1.944, "step": 21231 }, { "epoch": 0.6850261438040294, "grad_norm": 0.443359375, "learning_rate": 7.203084031286271e-06, "loss": 1.9654, "step": 21232 }, { "epoch": 0.6850584076578257, "grad_norm": 0.4296875, "learning_rate": 7.201739646820491e-06, "loss": 1.9184, "step": 21233 }, { "epoch": 0.6850906715116221, "grad_norm": 0.447265625, "learning_rate": 7.200395348192368e-06, "loss": 1.9397, "step": 21234 }, { "epoch": 0.6851229353654184, "grad_norm": 0.45703125, "learning_rate": 7.199051135416691e-06, "loss": 1.9254, "step": 21235 }, { "epoch": 0.6851551992192147, "grad_norm": 0.375, "learning_rate": 7.197707008508269e-06, "loss": 1.915, "step": 21236 }, { "epoch": 0.685187463073011, "grad_norm": 0.48046875, "learning_rate": 7.196362967481891e-06, "loss": 1.9632, "step": 21237 }, { "epoch": 0.6852197269268074, "grad_norm": 0.412109375, "learning_rate": 7.195019012352344e-06, "loss": 1.9581, "step": 21238 }, { "epoch": 0.6852519907806037, "grad_norm": 0.400390625, "learning_rate": 7.193675143134436e-06, "loss": 1.9641, "step": 21239 }, { "epoch": 0.6852842546344001, "grad_norm": 0.443359375, "learning_rate": 7.192331359842952e-06, "loss": 1.939, "step": 21240 }, { "epoch": 0.6853165184881965, "grad_norm": 0.400390625, "learning_rate": 7.190987662492675e-06, "loss": 1.9451, "step": 21241 }, { "epoch": 0.6853487823419928, "grad_norm": 0.443359375, "learning_rate": 7.18964405109841e-06, "loss": 1.947, "step": 21242 }, { "epoch": 0.6853810461957892, "grad_norm": 0.40625, "learning_rate": 7.188300525674942e-06, "loss": 1.9329, "step": 21243 }, { "epoch": 0.6854133100495855, "grad_norm": 0.3984375, "learning_rate": 7.186957086237052e-06, "loss": 1.9194, "step": 21244 }, { "epoch": 0.6854455739033819, "grad_norm": 0.408203125, "learning_rate": 7.185613732799538e-06, "loss": 1.9518, "step": 21245 }, { "epoch": 0.6854778377571782, "grad_norm": 0.408203125, "learning_rate": 7.184270465377181e-06, "loss": 1.9336, "step": 21246 }, { "epoch": 0.6855101016109746, "grad_norm": 0.416015625, "learning_rate": 7.182927283984764e-06, "loss": 1.9513, "step": 21247 }, { "epoch": 0.6855423654647709, "grad_norm": 0.40625, "learning_rate": 7.18158418863708e-06, "loss": 1.9299, "step": 21248 }, { "epoch": 0.6855746293185673, "grad_norm": 0.419921875, "learning_rate": 7.180241179348909e-06, "loss": 1.93, "step": 21249 }, { "epoch": 0.6856068931723636, "grad_norm": 0.38671875, "learning_rate": 7.178898256135028e-06, "loss": 1.9437, "step": 21250 }, { "epoch": 0.68563915702616, "grad_norm": 0.37109375, "learning_rate": 7.177555419010229e-06, "loss": 1.9639, "step": 21251 }, { "epoch": 0.6856714208799563, "grad_norm": 0.396484375, "learning_rate": 7.176212667989288e-06, "loss": 1.9669, "step": 21252 }, { "epoch": 0.6857036847337526, "grad_norm": 0.412109375, "learning_rate": 7.17487000308698e-06, "loss": 1.9495, "step": 21253 }, { "epoch": 0.6857359485875489, "grad_norm": 0.396484375, "learning_rate": 7.173527424318094e-06, "loss": 1.8903, "step": 21254 }, { "epoch": 0.6857682124413453, "grad_norm": 0.41015625, "learning_rate": 7.172184931697404e-06, "loss": 1.9613, "step": 21255 }, { "epoch": 0.6858004762951416, "grad_norm": 0.396484375, "learning_rate": 7.1708425252396815e-06, "loss": 1.9161, "step": 21256 }, { "epoch": 0.685832740148938, "grad_norm": 0.392578125, "learning_rate": 7.169500204959712e-06, "loss": 1.9696, "step": 21257 }, { "epoch": 0.6858650040027343, "grad_norm": 0.435546875, "learning_rate": 7.168157970872269e-06, "loss": 1.9121, "step": 21258 }, { "epoch": 0.6858972678565307, "grad_norm": 0.42578125, "learning_rate": 7.166815822992117e-06, "loss": 1.9232, "step": 21259 }, { "epoch": 0.685929531710327, "grad_norm": 0.423828125, "learning_rate": 7.165473761334044e-06, "loss": 1.9116, "step": 21260 }, { "epoch": 0.6859617955641234, "grad_norm": 0.3828125, "learning_rate": 7.1641317859128145e-06, "loss": 1.9354, "step": 21261 }, { "epoch": 0.6859940594179198, "grad_norm": 0.380859375, "learning_rate": 7.1627898967431995e-06, "loss": 1.8966, "step": 21262 }, { "epoch": 0.6860263232717161, "grad_norm": 0.388671875, "learning_rate": 7.1614480938399674e-06, "loss": 1.9146, "step": 21263 }, { "epoch": 0.6860585871255125, "grad_norm": 0.37890625, "learning_rate": 7.160106377217897e-06, "loss": 1.9496, "step": 21264 }, { "epoch": 0.6860908509793088, "grad_norm": 0.37890625, "learning_rate": 7.15876474689175e-06, "loss": 1.9034, "step": 21265 }, { "epoch": 0.6861231148331052, "grad_norm": 0.408203125, "learning_rate": 7.157423202876291e-06, "loss": 1.8779, "step": 21266 }, { "epoch": 0.6861553786869015, "grad_norm": 0.380859375, "learning_rate": 7.1560817451862966e-06, "loss": 1.9569, "step": 21267 }, { "epoch": 0.6861876425406979, "grad_norm": 0.376953125, "learning_rate": 7.154740373836526e-06, "loss": 1.9558, "step": 21268 }, { "epoch": 0.6862199063944942, "grad_norm": 0.44140625, "learning_rate": 7.153399088841741e-06, "loss": 1.9287, "step": 21269 }, { "epoch": 0.6862521702482905, "grad_norm": 0.408203125, "learning_rate": 7.152057890216714e-06, "loss": 1.8759, "step": 21270 }, { "epoch": 0.6862844341020868, "grad_norm": 0.44140625, "learning_rate": 7.150716777976203e-06, "loss": 1.8738, "step": 21271 }, { "epoch": 0.6863166979558832, "grad_norm": 0.5703125, "learning_rate": 7.149375752134967e-06, "loss": 1.8089, "step": 21272 }, { "epoch": 0.6863489618096795, "grad_norm": 0.37890625, "learning_rate": 7.148034812707775e-06, "loss": 1.8326, "step": 21273 }, { "epoch": 0.6863812256634759, "grad_norm": 0.48828125, "learning_rate": 7.1466939597093845e-06, "loss": 1.8728, "step": 21274 }, { "epoch": 0.6864134895172722, "grad_norm": 0.443359375, "learning_rate": 7.1453531931545456e-06, "loss": 1.8272, "step": 21275 }, { "epoch": 0.6864457533710686, "grad_norm": 0.50390625, "learning_rate": 7.144012513058031e-06, "loss": 1.8572, "step": 21276 }, { "epoch": 0.6864780172248649, "grad_norm": 0.439453125, "learning_rate": 7.1426719194345894e-06, "loss": 1.831, "step": 21277 }, { "epoch": 0.6865102810786613, "grad_norm": 0.365234375, "learning_rate": 7.1413314122989736e-06, "loss": 1.8609, "step": 21278 }, { "epoch": 0.6865425449324576, "grad_norm": 0.435546875, "learning_rate": 7.1399909916659494e-06, "loss": 1.8076, "step": 21279 }, { "epoch": 0.686574808786254, "grad_norm": 0.4296875, "learning_rate": 7.138650657550266e-06, "loss": 1.8509, "step": 21280 }, { "epoch": 0.6866070726400504, "grad_norm": 0.37109375, "learning_rate": 7.137310409966669e-06, "loss": 1.8562, "step": 21281 }, { "epoch": 0.6866393364938467, "grad_norm": 0.4453125, "learning_rate": 7.135970248929926e-06, "loss": 1.8644, "step": 21282 }, { "epoch": 0.6866716003476431, "grad_norm": 0.41015625, "learning_rate": 7.1346301744547794e-06, "loss": 1.7972, "step": 21283 }, { "epoch": 0.6867038642014394, "grad_norm": 0.38671875, "learning_rate": 7.133290186555977e-06, "loss": 1.8414, "step": 21284 }, { "epoch": 0.6867361280552358, "grad_norm": 0.3671875, "learning_rate": 7.131950285248277e-06, "loss": 1.8351, "step": 21285 }, { "epoch": 0.686768391909032, "grad_norm": 0.41796875, "learning_rate": 7.130610470546424e-06, "loss": 1.8138, "step": 21286 }, { "epoch": 0.6868006557628284, "grad_norm": 0.373046875, "learning_rate": 7.12927074246516e-06, "loss": 1.865, "step": 21287 }, { "epoch": 0.6868329196166247, "grad_norm": 0.439453125, "learning_rate": 7.127931101019244e-06, "loss": 1.868, "step": 21288 }, { "epoch": 0.6868651834704211, "grad_norm": 0.46875, "learning_rate": 7.1265915462234135e-06, "loss": 1.816, "step": 21289 }, { "epoch": 0.6868974473242174, "grad_norm": 0.365234375, "learning_rate": 7.12525207809241e-06, "loss": 1.858, "step": 21290 }, { "epoch": 0.6869297111780138, "grad_norm": 0.49609375, "learning_rate": 7.123912696640988e-06, "loss": 1.8353, "step": 21291 }, { "epoch": 0.6869619750318101, "grad_norm": 0.3984375, "learning_rate": 7.1225734018838846e-06, "loss": 1.8567, "step": 21292 }, { "epoch": 0.6869942388856065, "grad_norm": 0.419921875, "learning_rate": 7.1212341938358415e-06, "loss": 1.8467, "step": 21293 }, { "epoch": 0.6870265027394028, "grad_norm": 0.43359375, "learning_rate": 7.119895072511596e-06, "loss": 1.82, "step": 21294 }, { "epoch": 0.6870587665931992, "grad_norm": 0.369140625, "learning_rate": 7.118556037925897e-06, "loss": 1.8241, "step": 21295 }, { "epoch": 0.6870910304469955, "grad_norm": 0.40625, "learning_rate": 7.117217090093477e-06, "loss": 1.7979, "step": 21296 }, { "epoch": 0.6871232943007919, "grad_norm": 0.365234375, "learning_rate": 7.115878229029074e-06, "loss": 1.8721, "step": 21297 }, { "epoch": 0.6871555581545882, "grad_norm": 0.380859375, "learning_rate": 7.114539454747432e-06, "loss": 1.8537, "step": 21298 }, { "epoch": 0.6871878220083846, "grad_norm": 0.380859375, "learning_rate": 7.113200767263282e-06, "loss": 1.8466, "step": 21299 }, { "epoch": 0.6872200858621809, "grad_norm": 0.3984375, "learning_rate": 7.111862166591355e-06, "loss": 1.837, "step": 21300 }, { "epoch": 0.6872523497159773, "grad_norm": 0.419921875, "learning_rate": 7.110523652746394e-06, "loss": 1.8372, "step": 21301 }, { "epoch": 0.6872846135697737, "grad_norm": 0.439453125, "learning_rate": 7.109185225743131e-06, "loss": 1.822, "step": 21302 }, { "epoch": 0.68731687742357, "grad_norm": 0.41796875, "learning_rate": 7.107846885596289e-06, "loss": 1.859, "step": 21303 }, { "epoch": 0.6873491412773663, "grad_norm": 0.375, "learning_rate": 7.1065086323206134e-06, "loss": 1.7751, "step": 21304 }, { "epoch": 0.6873814051311626, "grad_norm": 0.48046875, "learning_rate": 7.105170465930828e-06, "loss": 1.8172, "step": 21305 }, { "epoch": 0.687413668984959, "grad_norm": 0.458984375, "learning_rate": 7.103832386441657e-06, "loss": 1.8481, "step": 21306 }, { "epoch": 0.6874459328387553, "grad_norm": 0.33984375, "learning_rate": 7.102494393867841e-06, "loss": 1.841, "step": 21307 }, { "epoch": 0.6874781966925517, "grad_norm": 0.421875, "learning_rate": 7.101156488224098e-06, "loss": 1.8099, "step": 21308 }, { "epoch": 0.687510460546348, "grad_norm": 0.400390625, "learning_rate": 7.099818669525154e-06, "loss": 1.8635, "step": 21309 }, { "epoch": 0.6875427244001444, "grad_norm": 0.453125, "learning_rate": 7.098480937785744e-06, "loss": 1.9106, "step": 21310 }, { "epoch": 0.6875749882539407, "grad_norm": 0.458984375, "learning_rate": 7.097143293020587e-06, "loss": 1.9487, "step": 21311 }, { "epoch": 0.6876072521077371, "grad_norm": 0.435546875, "learning_rate": 7.095805735244403e-06, "loss": 1.8884, "step": 21312 }, { "epoch": 0.6876395159615334, "grad_norm": 0.39453125, "learning_rate": 7.094468264471922e-06, "loss": 1.9257, "step": 21313 }, { "epoch": 0.6876717798153298, "grad_norm": 0.4140625, "learning_rate": 7.093130880717866e-06, "loss": 1.9341, "step": 21314 }, { "epoch": 0.6877040436691261, "grad_norm": 0.53125, "learning_rate": 7.091793583996945e-06, "loss": 1.9295, "step": 21315 }, { "epoch": 0.6877363075229225, "grad_norm": 0.4375, "learning_rate": 7.090456374323892e-06, "loss": 1.9308, "step": 21316 }, { "epoch": 0.6877685713767188, "grad_norm": 0.44140625, "learning_rate": 7.089119251713423e-06, "loss": 1.9826, "step": 21317 }, { "epoch": 0.6878008352305152, "grad_norm": 0.421875, "learning_rate": 7.087782216180248e-06, "loss": 1.9309, "step": 21318 }, { "epoch": 0.6878330990843115, "grad_norm": 0.419921875, "learning_rate": 7.086445267739094e-06, "loss": 1.9232, "step": 21319 }, { "epoch": 0.6878653629381078, "grad_norm": 0.396484375, "learning_rate": 7.0851084064046754e-06, "loss": 1.9064, "step": 21320 }, { "epoch": 0.6878976267919041, "grad_norm": 0.41796875, "learning_rate": 7.083771632191699e-06, "loss": 1.9439, "step": 21321 }, { "epoch": 0.6879298906457005, "grad_norm": 0.45703125, "learning_rate": 7.082434945114891e-06, "loss": 1.9373, "step": 21322 }, { "epoch": 0.6879621544994969, "grad_norm": 0.392578125, "learning_rate": 7.081098345188959e-06, "loss": 1.9363, "step": 21323 }, { "epoch": 0.6879944183532932, "grad_norm": 0.42578125, "learning_rate": 7.079761832428615e-06, "loss": 1.9433, "step": 21324 }, { "epoch": 0.6880266822070896, "grad_norm": 0.484375, "learning_rate": 7.078425406848566e-06, "loss": 1.9406, "step": 21325 }, { "epoch": 0.6880589460608859, "grad_norm": 0.408203125, "learning_rate": 7.077089068463533e-06, "loss": 1.92, "step": 21326 }, { "epoch": 0.6880912099146823, "grad_norm": 0.42578125, "learning_rate": 7.075752817288219e-06, "loss": 1.9206, "step": 21327 }, { "epoch": 0.6881234737684786, "grad_norm": 0.40234375, "learning_rate": 7.074416653337327e-06, "loss": 1.9481, "step": 21328 }, { "epoch": 0.688155737622275, "grad_norm": 0.50390625, "learning_rate": 7.073080576625577e-06, "loss": 1.9327, "step": 21329 }, { "epoch": 0.6881880014760713, "grad_norm": 0.44921875, "learning_rate": 7.07174458716767e-06, "loss": 1.9152, "step": 21330 }, { "epoch": 0.6882202653298677, "grad_norm": 0.474609375, "learning_rate": 7.070408684978305e-06, "loss": 1.9272, "step": 21331 }, { "epoch": 0.688252529183664, "grad_norm": 0.412109375, "learning_rate": 7.0690728700721975e-06, "loss": 1.8865, "step": 21332 }, { "epoch": 0.6882847930374604, "grad_norm": 0.435546875, "learning_rate": 7.067737142464045e-06, "loss": 1.8812, "step": 21333 }, { "epoch": 0.6883170568912567, "grad_norm": 0.408203125, "learning_rate": 7.066401502168549e-06, "loss": 1.9524, "step": 21334 }, { "epoch": 0.6883493207450531, "grad_norm": 0.41796875, "learning_rate": 7.065065949200417e-06, "loss": 1.9387, "step": 21335 }, { "epoch": 0.6883815845988493, "grad_norm": 0.416015625, "learning_rate": 7.063730483574348e-06, "loss": 1.9452, "step": 21336 }, { "epoch": 0.6884138484526457, "grad_norm": 0.4296875, "learning_rate": 7.062395105305033e-06, "loss": 1.9122, "step": 21337 }, { "epoch": 0.688446112306442, "grad_norm": 0.4375, "learning_rate": 7.061059814407184e-06, "loss": 1.9189, "step": 21338 }, { "epoch": 0.6884783761602384, "grad_norm": 0.419921875, "learning_rate": 7.059724610895495e-06, "loss": 1.9442, "step": 21339 }, { "epoch": 0.6885106400140347, "grad_norm": 0.439453125, "learning_rate": 7.058389494784654e-06, "loss": 1.9198, "step": 21340 }, { "epoch": 0.6885429038678311, "grad_norm": 0.41796875, "learning_rate": 7.057054466089371e-06, "loss": 1.9205, "step": 21341 }, { "epoch": 0.6885751677216275, "grad_norm": 0.40234375, "learning_rate": 7.055719524824333e-06, "loss": 1.9557, "step": 21342 }, { "epoch": 0.6886074315754238, "grad_norm": 0.41796875, "learning_rate": 7.054384671004229e-06, "loss": 1.9093, "step": 21343 }, { "epoch": 0.6886396954292202, "grad_norm": 0.3984375, "learning_rate": 7.053049904643765e-06, "loss": 1.9211, "step": 21344 }, { "epoch": 0.6886719592830165, "grad_norm": 0.3984375, "learning_rate": 7.051715225757627e-06, "loss": 1.9367, "step": 21345 }, { "epoch": 0.6887042231368129, "grad_norm": 0.40234375, "learning_rate": 7.050380634360501e-06, "loss": 1.9228, "step": 21346 }, { "epoch": 0.6887364869906092, "grad_norm": 0.4375, "learning_rate": 7.049046130467086e-06, "loss": 1.9432, "step": 21347 }, { "epoch": 0.6887687508444056, "grad_norm": 0.38671875, "learning_rate": 7.047711714092068e-06, "loss": 1.9258, "step": 21348 }, { "epoch": 0.6888010146982019, "grad_norm": 0.390625, "learning_rate": 7.046377385250127e-06, "loss": 1.9122, "step": 21349 }, { "epoch": 0.6888332785519983, "grad_norm": 0.396484375, "learning_rate": 7.0450431439559646e-06, "loss": 1.9567, "step": 21350 }, { "epoch": 0.6888655424057946, "grad_norm": 0.380859375, "learning_rate": 7.043708990224262e-06, "loss": 1.9537, "step": 21351 }, { "epoch": 0.688897806259591, "grad_norm": 0.3984375, "learning_rate": 7.042374924069696e-06, "loss": 1.9385, "step": 21352 }, { "epoch": 0.6889300701133872, "grad_norm": 0.38671875, "learning_rate": 7.0410409455069625e-06, "loss": 1.9408, "step": 21353 }, { "epoch": 0.6889623339671836, "grad_norm": 0.419921875, "learning_rate": 7.039707054550743e-06, "loss": 1.9493, "step": 21354 }, { "epoch": 0.6889945978209799, "grad_norm": 0.390625, "learning_rate": 7.038373251215716e-06, "loss": 1.9691, "step": 21355 }, { "epoch": 0.6890268616747763, "grad_norm": 0.384765625, "learning_rate": 7.0370395355165585e-06, "loss": 1.9346, "step": 21356 }, { "epoch": 0.6890591255285726, "grad_norm": 0.4140625, "learning_rate": 7.035705907467964e-06, "loss": 1.98, "step": 21357 }, { "epoch": 0.689091389382369, "grad_norm": 0.53125, "learning_rate": 7.034372367084605e-06, "loss": 2.0233, "step": 21358 }, { "epoch": 0.6891236532361653, "grad_norm": 0.50390625, "learning_rate": 7.033038914381156e-06, "loss": 2.0249, "step": 21359 }, { "epoch": 0.6891559170899617, "grad_norm": 0.41015625, "learning_rate": 7.031705549372303e-06, "loss": 1.9959, "step": 21360 }, { "epoch": 0.689188180943758, "grad_norm": 0.4921875, "learning_rate": 7.0303722720727185e-06, "loss": 2.0251, "step": 21361 }, { "epoch": 0.6892204447975544, "grad_norm": 0.54296875, "learning_rate": 7.029039082497074e-06, "loss": 1.9663, "step": 21362 }, { "epoch": 0.6892527086513508, "grad_norm": 0.478515625, "learning_rate": 7.027705980660056e-06, "loss": 2.0588, "step": 21363 }, { "epoch": 0.6892849725051471, "grad_norm": 0.47265625, "learning_rate": 7.026372966576329e-06, "loss": 2.0388, "step": 21364 }, { "epoch": 0.6893172363589435, "grad_norm": 0.412109375, "learning_rate": 7.025040040260563e-06, "loss": 2.0148, "step": 21365 }, { "epoch": 0.6893495002127398, "grad_norm": 0.48828125, "learning_rate": 7.023707201727442e-06, "loss": 2.0108, "step": 21366 }, { "epoch": 0.6893817640665362, "grad_norm": 0.421875, "learning_rate": 7.022374450991628e-06, "loss": 2.0045, "step": 21367 }, { "epoch": 0.6894140279203325, "grad_norm": 0.50390625, "learning_rate": 7.021041788067788e-06, "loss": 1.9902, "step": 21368 }, { "epoch": 0.6894462917741289, "grad_norm": 0.4140625, "learning_rate": 7.019709212970602e-06, "loss": 2.0286, "step": 21369 }, { "epoch": 0.6894785556279251, "grad_norm": 0.482421875, "learning_rate": 7.018376725714732e-06, "loss": 2.0249, "step": 21370 }, { "epoch": 0.6895108194817215, "grad_norm": 0.416015625, "learning_rate": 7.017044326314839e-06, "loss": 2.0135, "step": 21371 }, { "epoch": 0.6895430833355178, "grad_norm": 0.455078125, "learning_rate": 7.0157120147855995e-06, "loss": 2.0211, "step": 21372 }, { "epoch": 0.6895753471893142, "grad_norm": 0.392578125, "learning_rate": 7.014379791141677e-06, "loss": 1.9826, "step": 21373 }, { "epoch": 0.6896076110431105, "grad_norm": 0.435546875, "learning_rate": 7.013047655397725e-06, "loss": 1.9429, "step": 21374 }, { "epoch": 0.6896398748969069, "grad_norm": 0.400390625, "learning_rate": 7.0117156075684215e-06, "loss": 1.9335, "step": 21375 }, { "epoch": 0.6896721387507032, "grad_norm": 0.412109375, "learning_rate": 7.010383647668421e-06, "loss": 1.9321, "step": 21376 }, { "epoch": 0.6897044026044996, "grad_norm": 0.4140625, "learning_rate": 7.00905177571238e-06, "loss": 1.9331, "step": 21377 }, { "epoch": 0.6897366664582959, "grad_norm": 0.390625, "learning_rate": 7.007719991714971e-06, "loss": 1.9245, "step": 21378 }, { "epoch": 0.6897689303120923, "grad_norm": 0.490234375, "learning_rate": 7.006388295690845e-06, "loss": 1.9264, "step": 21379 }, { "epoch": 0.6898011941658886, "grad_norm": 0.421875, "learning_rate": 7.005056687654658e-06, "loss": 1.9213, "step": 21380 }, { "epoch": 0.689833458019685, "grad_norm": 0.4140625, "learning_rate": 7.003725167621077e-06, "loss": 1.9578, "step": 21381 }, { "epoch": 0.6898657218734814, "grad_norm": 0.40234375, "learning_rate": 7.002393735604752e-06, "loss": 1.9382, "step": 21382 }, { "epoch": 0.6898979857272777, "grad_norm": 0.408203125, "learning_rate": 7.00106239162034e-06, "loss": 1.9124, "step": 21383 }, { "epoch": 0.6899302495810741, "grad_norm": 0.41015625, "learning_rate": 6.999731135682489e-06, "loss": 1.952, "step": 21384 }, { "epoch": 0.6899625134348704, "grad_norm": 0.41015625, "learning_rate": 6.998399967805862e-06, "loss": 1.9265, "step": 21385 }, { "epoch": 0.6899947772886668, "grad_norm": 0.42578125, "learning_rate": 6.997068888005111e-06, "loss": 1.9699, "step": 21386 }, { "epoch": 0.690027041142463, "grad_norm": 0.37109375, "learning_rate": 6.995737896294878e-06, "loss": 1.966, "step": 21387 }, { "epoch": 0.6900593049962594, "grad_norm": 0.421875, "learning_rate": 6.994406992689824e-06, "loss": 1.9262, "step": 21388 }, { "epoch": 0.6900915688500557, "grad_norm": 0.392578125, "learning_rate": 6.993076177204595e-06, "loss": 1.8866, "step": 21389 }, { "epoch": 0.6901238327038521, "grad_norm": 0.38671875, "learning_rate": 6.991745449853835e-06, "loss": 1.9552, "step": 21390 }, { "epoch": 0.6901560965576484, "grad_norm": 0.412109375, "learning_rate": 6.9904148106522e-06, "loss": 1.9503, "step": 21391 }, { "epoch": 0.6901883604114448, "grad_norm": 0.44921875, "learning_rate": 6.989084259614334e-06, "loss": 1.8581, "step": 21392 }, { "epoch": 0.6902206242652411, "grad_norm": 0.380859375, "learning_rate": 6.987753796754876e-06, "loss": 1.9113, "step": 21393 }, { "epoch": 0.6902528881190375, "grad_norm": 0.404296875, "learning_rate": 6.986423422088481e-06, "loss": 1.919, "step": 21394 }, { "epoch": 0.6902851519728338, "grad_norm": 0.388671875, "learning_rate": 6.985093135629789e-06, "loss": 1.8566, "step": 21395 }, { "epoch": 0.6903174158266302, "grad_norm": 0.40234375, "learning_rate": 6.9837629373934354e-06, "loss": 1.8166, "step": 21396 }, { "epoch": 0.6903496796804265, "grad_norm": 0.37890625, "learning_rate": 6.982432827394073e-06, "loss": 1.8259, "step": 21397 }, { "epoch": 0.6903819435342229, "grad_norm": 0.447265625, "learning_rate": 6.98110280564634e-06, "loss": 1.7863, "step": 21398 }, { "epoch": 0.6904142073880192, "grad_norm": 0.375, "learning_rate": 6.979772872164869e-06, "loss": 1.8072, "step": 21399 }, { "epoch": 0.6904464712418156, "grad_norm": 0.431640625, "learning_rate": 6.97844302696431e-06, "loss": 1.8469, "step": 21400 }, { "epoch": 0.6904787350956119, "grad_norm": 0.3828125, "learning_rate": 6.9771132700592946e-06, "loss": 1.8605, "step": 21401 }, { "epoch": 0.6905109989494083, "grad_norm": 0.375, "learning_rate": 6.975783601464456e-06, "loss": 1.8068, "step": 21402 }, { "epoch": 0.6905432628032047, "grad_norm": 0.37890625, "learning_rate": 6.9744540211944405e-06, "loss": 1.79, "step": 21403 }, { "epoch": 0.690575526657001, "grad_norm": 0.41796875, "learning_rate": 6.973124529263878e-06, "loss": 1.9015, "step": 21404 }, { "epoch": 0.6906077905107973, "grad_norm": 0.365234375, "learning_rate": 6.971795125687395e-06, "loss": 1.859, "step": 21405 }, { "epoch": 0.6906400543645936, "grad_norm": 0.39453125, "learning_rate": 6.970465810479639e-06, "loss": 1.8439, "step": 21406 }, { "epoch": 0.69067231821839, "grad_norm": 0.392578125, "learning_rate": 6.969136583655234e-06, "loss": 1.8262, "step": 21407 }, { "epoch": 0.6907045820721863, "grad_norm": 0.36328125, "learning_rate": 6.9678074452288076e-06, "loss": 1.8419, "step": 21408 }, { "epoch": 0.6907368459259827, "grad_norm": 0.400390625, "learning_rate": 6.966478395215e-06, "loss": 1.8415, "step": 21409 }, { "epoch": 0.690769109779779, "grad_norm": 0.40234375, "learning_rate": 6.9651494336284354e-06, "loss": 1.8042, "step": 21410 }, { "epoch": 0.6908013736335754, "grad_norm": 0.392578125, "learning_rate": 6.963820560483735e-06, "loss": 1.8405, "step": 21411 }, { "epoch": 0.6908336374873717, "grad_norm": 0.423828125, "learning_rate": 6.962491775795539e-06, "loss": 1.8585, "step": 21412 }, { "epoch": 0.6908659013411681, "grad_norm": 0.37109375, "learning_rate": 6.961163079578466e-06, "loss": 1.8342, "step": 21413 }, { "epoch": 0.6908981651949644, "grad_norm": 0.400390625, "learning_rate": 6.959834471847145e-06, "loss": 1.8451, "step": 21414 }, { "epoch": 0.6909304290487608, "grad_norm": 0.380859375, "learning_rate": 6.958505952616192e-06, "loss": 1.8538, "step": 21415 }, { "epoch": 0.6909626929025571, "grad_norm": 0.375, "learning_rate": 6.957177521900241e-06, "loss": 1.8226, "step": 21416 }, { "epoch": 0.6909949567563535, "grad_norm": 0.39453125, "learning_rate": 6.95584917971391e-06, "loss": 1.8719, "step": 21417 }, { "epoch": 0.6910272206101498, "grad_norm": 0.41796875, "learning_rate": 6.954520926071816e-06, "loss": 1.8514, "step": 21418 }, { "epoch": 0.6910594844639462, "grad_norm": 0.396484375, "learning_rate": 6.953192760988588e-06, "loss": 1.8516, "step": 21419 }, { "epoch": 0.6910917483177424, "grad_norm": 0.447265625, "learning_rate": 6.951864684478843e-06, "loss": 1.7912, "step": 21420 }, { "epoch": 0.6911240121715388, "grad_norm": 0.361328125, "learning_rate": 6.95053669655719e-06, "loss": 1.8287, "step": 21421 }, { "epoch": 0.6911562760253351, "grad_norm": 0.369140625, "learning_rate": 6.949208797238262e-06, "loss": 1.8399, "step": 21422 }, { "epoch": 0.6911885398791315, "grad_norm": 0.431640625, "learning_rate": 6.9478809865366685e-06, "loss": 1.8343, "step": 21423 }, { "epoch": 0.6912208037329279, "grad_norm": 0.3671875, "learning_rate": 6.946553264467016e-06, "loss": 1.835, "step": 21424 }, { "epoch": 0.6912530675867242, "grad_norm": 0.404296875, "learning_rate": 6.945225631043937e-06, "loss": 1.8591, "step": 21425 }, { "epoch": 0.6912853314405206, "grad_norm": 0.384765625, "learning_rate": 6.943898086282034e-06, "loss": 1.7987, "step": 21426 }, { "epoch": 0.6913175952943169, "grad_norm": 0.37109375, "learning_rate": 6.9425706301959134e-06, "loss": 1.8595, "step": 21427 }, { "epoch": 0.6913498591481133, "grad_norm": 0.369140625, "learning_rate": 6.941243262800204e-06, "loss": 1.8318, "step": 21428 }, { "epoch": 0.6913821230019096, "grad_norm": 0.375, "learning_rate": 6.939915984109506e-06, "loss": 1.8769, "step": 21429 }, { "epoch": 0.691414386855706, "grad_norm": 0.3671875, "learning_rate": 6.938588794138424e-06, "loss": 1.8742, "step": 21430 }, { "epoch": 0.6914466507095023, "grad_norm": 0.392578125, "learning_rate": 6.937261692901581e-06, "loss": 1.8209, "step": 21431 }, { "epoch": 0.6914789145632987, "grad_norm": 0.384765625, "learning_rate": 6.935934680413576e-06, "loss": 1.8241, "step": 21432 }, { "epoch": 0.691511178417095, "grad_norm": 0.353515625, "learning_rate": 6.934607756689011e-06, "loss": 1.7818, "step": 21433 }, { "epoch": 0.6915434422708914, "grad_norm": 0.37890625, "learning_rate": 6.933280921742509e-06, "loss": 1.9033, "step": 21434 }, { "epoch": 0.6915757061246877, "grad_norm": 0.361328125, "learning_rate": 6.931954175588654e-06, "loss": 1.8496, "step": 21435 }, { "epoch": 0.6916079699784841, "grad_norm": 0.36328125, "learning_rate": 6.930627518242057e-06, "loss": 1.794, "step": 21436 }, { "epoch": 0.6916402338322803, "grad_norm": 0.380859375, "learning_rate": 6.929300949717337e-06, "loss": 1.8838, "step": 21437 }, { "epoch": 0.6916724976860767, "grad_norm": 0.365234375, "learning_rate": 6.92797447002907e-06, "loss": 1.8689, "step": 21438 }, { "epoch": 0.691704761539873, "grad_norm": 0.3984375, "learning_rate": 6.9266480791918685e-06, "loss": 1.8644, "step": 21439 }, { "epoch": 0.6917370253936694, "grad_norm": 0.361328125, "learning_rate": 6.925321777220339e-06, "loss": 1.8918, "step": 21440 }, { "epoch": 0.6917692892474657, "grad_norm": 0.380859375, "learning_rate": 6.923995564129075e-06, "loss": 1.8764, "step": 21441 }, { "epoch": 0.6918015531012621, "grad_norm": 0.375, "learning_rate": 6.922669439932668e-06, "loss": 1.8578, "step": 21442 }, { "epoch": 0.6918338169550585, "grad_norm": 0.359375, "learning_rate": 6.921343404645727e-06, "loss": 1.8603, "step": 21443 }, { "epoch": 0.6918660808088548, "grad_norm": 0.396484375, "learning_rate": 6.920017458282843e-06, "loss": 1.8453, "step": 21444 }, { "epoch": 0.6918983446626512, "grad_norm": 0.373046875, "learning_rate": 6.91869160085861e-06, "loss": 1.8683, "step": 21445 }, { "epoch": 0.6919306085164475, "grad_norm": 0.369140625, "learning_rate": 6.917365832387616e-06, "loss": 1.9123, "step": 21446 }, { "epoch": 0.6919628723702439, "grad_norm": 0.369140625, "learning_rate": 6.916040152884465e-06, "loss": 1.8633, "step": 21447 }, { "epoch": 0.6919951362240402, "grad_norm": 0.384765625, "learning_rate": 6.9147145623637474e-06, "loss": 1.8646, "step": 21448 }, { "epoch": 0.6920274000778366, "grad_norm": 0.376953125, "learning_rate": 6.913389060840044e-06, "loss": 1.8697, "step": 21449 }, { "epoch": 0.6920596639316329, "grad_norm": 0.3984375, "learning_rate": 6.912063648327958e-06, "loss": 1.8431, "step": 21450 }, { "epoch": 0.6920919277854293, "grad_norm": 0.373046875, "learning_rate": 6.910738324842075e-06, "loss": 1.877, "step": 21451 }, { "epoch": 0.6921241916392256, "grad_norm": 0.3671875, "learning_rate": 6.909413090396974e-06, "loss": 1.8718, "step": 21452 }, { "epoch": 0.692156455493022, "grad_norm": 0.384765625, "learning_rate": 6.9080879450072554e-06, "loss": 1.8532, "step": 21453 }, { "epoch": 0.6921887193468182, "grad_norm": 0.35546875, "learning_rate": 6.906762888687499e-06, "loss": 1.901, "step": 21454 }, { "epoch": 0.6922209832006146, "grad_norm": 0.380859375, "learning_rate": 6.905437921452287e-06, "loss": 1.8755, "step": 21455 }, { "epoch": 0.6922532470544109, "grad_norm": 0.46484375, "learning_rate": 6.904113043316212e-06, "loss": 1.8694, "step": 21456 }, { "epoch": 0.6922855109082073, "grad_norm": 0.380859375, "learning_rate": 6.902788254293852e-06, "loss": 1.8705, "step": 21457 }, { "epoch": 0.6923177747620036, "grad_norm": 0.412109375, "learning_rate": 6.901463554399785e-06, "loss": 1.9012, "step": 21458 }, { "epoch": 0.6923500386158, "grad_norm": 0.380859375, "learning_rate": 6.9001389436486025e-06, "loss": 1.8718, "step": 21459 }, { "epoch": 0.6923823024695963, "grad_norm": 0.376953125, "learning_rate": 6.89881442205488e-06, "loss": 1.8899, "step": 21460 }, { "epoch": 0.6924145663233927, "grad_norm": 0.421875, "learning_rate": 6.8974899896331915e-06, "loss": 1.8899, "step": 21461 }, { "epoch": 0.692446830177189, "grad_norm": 0.376953125, "learning_rate": 6.896165646398125e-06, "loss": 1.8662, "step": 21462 }, { "epoch": 0.6924790940309854, "grad_norm": 0.490234375, "learning_rate": 6.894841392364254e-06, "loss": 1.9122, "step": 21463 }, { "epoch": 0.6925113578847818, "grad_norm": 0.47265625, "learning_rate": 6.893517227546148e-06, "loss": 1.8846, "step": 21464 }, { "epoch": 0.6925436217385781, "grad_norm": 0.404296875, "learning_rate": 6.892193151958402e-06, "loss": 1.8539, "step": 21465 }, { "epoch": 0.6925758855923745, "grad_norm": 0.36328125, "learning_rate": 6.8908691656155645e-06, "loss": 1.8698, "step": 21466 }, { "epoch": 0.6926081494461708, "grad_norm": 0.4453125, "learning_rate": 6.889545268532222e-06, "loss": 1.9289, "step": 21467 }, { "epoch": 0.6926404132999672, "grad_norm": 0.40234375, "learning_rate": 6.888221460722958e-06, "loss": 1.8895, "step": 21468 }, { "epoch": 0.6926726771537635, "grad_norm": 0.373046875, "learning_rate": 6.88689774220232e-06, "loss": 1.8306, "step": 21469 }, { "epoch": 0.6927049410075599, "grad_norm": 0.419921875, "learning_rate": 6.885574112984893e-06, "loss": 1.8796, "step": 21470 }, { "epoch": 0.6927372048613561, "grad_norm": 0.392578125, "learning_rate": 6.884250573085251e-06, "loss": 1.9003, "step": 21471 }, { "epoch": 0.6927694687151525, "grad_norm": 0.35546875, "learning_rate": 6.882927122517956e-06, "loss": 1.8642, "step": 21472 }, { "epoch": 0.6928017325689488, "grad_norm": 0.400390625, "learning_rate": 6.881603761297571e-06, "loss": 1.8914, "step": 21473 }, { "epoch": 0.6928339964227452, "grad_norm": 0.388671875, "learning_rate": 6.880280489438672e-06, "loss": 1.9064, "step": 21474 }, { "epoch": 0.6928662602765415, "grad_norm": 0.357421875, "learning_rate": 6.87895730695582e-06, "loss": 1.9335, "step": 21475 }, { "epoch": 0.6928985241303379, "grad_norm": 0.380859375, "learning_rate": 6.877634213863582e-06, "loss": 1.9287, "step": 21476 }, { "epoch": 0.6929307879841342, "grad_norm": 0.37890625, "learning_rate": 6.876311210176511e-06, "loss": 1.8992, "step": 21477 }, { "epoch": 0.6929630518379306, "grad_norm": 0.38671875, "learning_rate": 6.874988295909187e-06, "loss": 1.9133, "step": 21478 }, { "epoch": 0.6929953156917269, "grad_norm": 0.36328125, "learning_rate": 6.873665471076162e-06, "loss": 1.9082, "step": 21479 }, { "epoch": 0.6930275795455233, "grad_norm": 0.41015625, "learning_rate": 6.872342735691991e-06, "loss": 1.9352, "step": 21480 }, { "epoch": 0.6930598433993196, "grad_norm": 0.373046875, "learning_rate": 6.8710200897712475e-06, "loss": 1.9013, "step": 21481 }, { "epoch": 0.693092107253116, "grad_norm": 0.361328125, "learning_rate": 6.869697533328483e-06, "loss": 1.8962, "step": 21482 }, { "epoch": 0.6931243711069123, "grad_norm": 0.37109375, "learning_rate": 6.868375066378249e-06, "loss": 1.8922, "step": 21483 }, { "epoch": 0.6931566349607087, "grad_norm": 0.419921875, "learning_rate": 6.8670526889351135e-06, "loss": 1.8851, "step": 21484 }, { "epoch": 0.6931888988145051, "grad_norm": 0.4140625, "learning_rate": 6.8657304010136294e-06, "loss": 1.8999, "step": 21485 }, { "epoch": 0.6932211626683014, "grad_norm": 0.453125, "learning_rate": 6.864408202628343e-06, "loss": 1.9159, "step": 21486 }, { "epoch": 0.6932534265220978, "grad_norm": 0.466796875, "learning_rate": 6.86308609379382e-06, "loss": 1.9055, "step": 21487 }, { "epoch": 0.693285690375894, "grad_norm": 0.361328125, "learning_rate": 6.861764074524608e-06, "loss": 1.8766, "step": 21488 }, { "epoch": 0.6933179542296904, "grad_norm": 0.384765625, "learning_rate": 6.860442144835253e-06, "loss": 1.8761, "step": 21489 }, { "epoch": 0.6933502180834867, "grad_norm": 0.37890625, "learning_rate": 6.859120304740317e-06, "loss": 1.8896, "step": 21490 }, { "epoch": 0.6933824819372831, "grad_norm": 0.3828125, "learning_rate": 6.8577985542543465e-06, "loss": 1.8863, "step": 21491 }, { "epoch": 0.6934147457910794, "grad_norm": 0.361328125, "learning_rate": 6.856476893391881e-06, "loss": 1.9307, "step": 21492 }, { "epoch": 0.6934470096448758, "grad_norm": 0.384765625, "learning_rate": 6.855155322167481e-06, "loss": 1.8933, "step": 21493 }, { "epoch": 0.6934792734986721, "grad_norm": 0.365234375, "learning_rate": 6.853833840595689e-06, "loss": 1.8812, "step": 21494 }, { "epoch": 0.6935115373524685, "grad_norm": 0.365234375, "learning_rate": 6.852512448691043e-06, "loss": 1.924, "step": 21495 }, { "epoch": 0.6935438012062648, "grad_norm": 0.36328125, "learning_rate": 6.851191146468107e-06, "loss": 1.8839, "step": 21496 }, { "epoch": 0.6935760650600612, "grad_norm": 0.357421875, "learning_rate": 6.849869933941402e-06, "loss": 1.877, "step": 21497 }, { "epoch": 0.6936083289138575, "grad_norm": 0.376953125, "learning_rate": 6.848548811125482e-06, "loss": 1.9195, "step": 21498 }, { "epoch": 0.6936405927676539, "grad_norm": 0.36328125, "learning_rate": 6.8472277780349e-06, "loss": 1.9016, "step": 21499 }, { "epoch": 0.6936728566214502, "grad_norm": 0.484375, "learning_rate": 6.845906834684176e-06, "loss": 1.8522, "step": 21500 }, { "epoch": 0.6937051204752466, "grad_norm": 0.443359375, "learning_rate": 6.844585981087857e-06, "loss": 1.8908, "step": 21501 }, { "epoch": 0.6937373843290429, "grad_norm": 0.416015625, "learning_rate": 6.843265217260498e-06, "loss": 1.8455, "step": 21502 }, { "epoch": 0.6937696481828393, "grad_norm": 0.3671875, "learning_rate": 6.8419445432166126e-06, "loss": 1.8723, "step": 21503 }, { "epoch": 0.6938019120366357, "grad_norm": 0.4296875, "learning_rate": 6.840623958970747e-06, "loss": 1.8959, "step": 21504 }, { "epoch": 0.693834175890432, "grad_norm": 0.421875, "learning_rate": 6.839303464537447e-06, "loss": 1.8991, "step": 21505 }, { "epoch": 0.6938664397442283, "grad_norm": 0.359375, "learning_rate": 6.837983059931241e-06, "loss": 1.8653, "step": 21506 }, { "epoch": 0.6938987035980246, "grad_norm": 0.39453125, "learning_rate": 6.83666274516666e-06, "loss": 1.8919, "step": 21507 }, { "epoch": 0.693930967451821, "grad_norm": 0.400390625, "learning_rate": 6.835342520258233e-06, "loss": 1.8754, "step": 21508 }, { "epoch": 0.6939632313056173, "grad_norm": 0.380859375, "learning_rate": 6.834022385220506e-06, "loss": 1.9207, "step": 21509 }, { "epoch": 0.6939954951594137, "grad_norm": 0.373046875, "learning_rate": 6.832702340068001e-06, "loss": 1.8996, "step": 21510 }, { "epoch": 0.69402775901321, "grad_norm": 0.4375, "learning_rate": 6.831382384815244e-06, "loss": 1.9073, "step": 21511 }, { "epoch": 0.6940600228670064, "grad_norm": 0.396484375, "learning_rate": 6.830062519476775e-06, "loss": 1.9223, "step": 21512 }, { "epoch": 0.6940922867208027, "grad_norm": 0.357421875, "learning_rate": 6.828742744067118e-06, "loss": 1.9165, "step": 21513 }, { "epoch": 0.6941245505745991, "grad_norm": 0.396484375, "learning_rate": 6.8274230586007925e-06, "loss": 1.9195, "step": 21514 }, { "epoch": 0.6941568144283954, "grad_norm": 0.474609375, "learning_rate": 6.826103463092336e-06, "loss": 1.8615, "step": 21515 }, { "epoch": 0.6941890782821918, "grad_norm": 0.365234375, "learning_rate": 6.824783957556268e-06, "loss": 1.8843, "step": 21516 }, { "epoch": 0.6942213421359881, "grad_norm": 0.376953125, "learning_rate": 6.8234645420071095e-06, "loss": 1.8638, "step": 21517 }, { "epoch": 0.6942536059897845, "grad_norm": 0.41796875, "learning_rate": 6.8221452164593925e-06, "loss": 1.8803, "step": 21518 }, { "epoch": 0.6942858698435808, "grad_norm": 0.447265625, "learning_rate": 6.820825980927635e-06, "loss": 1.8842, "step": 21519 }, { "epoch": 0.6943181336973772, "grad_norm": 0.365234375, "learning_rate": 6.81950683542635e-06, "loss": 1.8952, "step": 21520 }, { "epoch": 0.6943503975511734, "grad_norm": 0.455078125, "learning_rate": 6.818187779970072e-06, "loss": 1.8139, "step": 21521 }, { "epoch": 0.6943826614049698, "grad_norm": 0.419921875, "learning_rate": 6.816868814573312e-06, "loss": 1.9057, "step": 21522 }, { "epoch": 0.6944149252587661, "grad_norm": 0.375, "learning_rate": 6.815549939250586e-06, "loss": 1.8911, "step": 21523 }, { "epoch": 0.6944471891125625, "grad_norm": 0.41015625, "learning_rate": 6.814231154016419e-06, "loss": 1.8874, "step": 21524 }, { "epoch": 0.6944794529663589, "grad_norm": 0.42578125, "learning_rate": 6.812912458885322e-06, "loss": 1.9056, "step": 21525 }, { "epoch": 0.6945117168201552, "grad_norm": 0.37109375, "learning_rate": 6.811593853871807e-06, "loss": 1.8844, "step": 21526 }, { "epoch": 0.6945439806739516, "grad_norm": 0.3671875, "learning_rate": 6.810275338990403e-06, "loss": 1.8702, "step": 21527 }, { "epoch": 0.6945762445277479, "grad_norm": 0.4140625, "learning_rate": 6.808956914255602e-06, "loss": 1.8808, "step": 21528 }, { "epoch": 0.6946085083815443, "grad_norm": 0.46484375, "learning_rate": 6.807638579681928e-06, "loss": 1.8856, "step": 21529 }, { "epoch": 0.6946407722353406, "grad_norm": 0.361328125, "learning_rate": 6.806320335283902e-06, "loss": 1.8847, "step": 21530 }, { "epoch": 0.694673036089137, "grad_norm": 0.427734375, "learning_rate": 6.805002181076011e-06, "loss": 1.8662, "step": 21531 }, { "epoch": 0.6947052999429333, "grad_norm": 0.392578125, "learning_rate": 6.803684117072778e-06, "loss": 1.8822, "step": 21532 }, { "epoch": 0.6947375637967297, "grad_norm": 0.369140625, "learning_rate": 6.802366143288722e-06, "loss": 1.8609, "step": 21533 }, { "epoch": 0.694769827650526, "grad_norm": 0.416015625, "learning_rate": 6.801048259738328e-06, "loss": 1.8807, "step": 21534 }, { "epoch": 0.6948020915043224, "grad_norm": 0.39453125, "learning_rate": 6.799730466436116e-06, "loss": 1.8807, "step": 21535 }, { "epoch": 0.6948343553581187, "grad_norm": 0.357421875, "learning_rate": 6.79841276339659e-06, "loss": 1.8963, "step": 21536 }, { "epoch": 0.6948666192119151, "grad_norm": 0.384765625, "learning_rate": 6.797095150634246e-06, "loss": 1.8637, "step": 21537 }, { "epoch": 0.6948988830657113, "grad_norm": 0.39453125, "learning_rate": 6.795777628163599e-06, "loss": 1.9142, "step": 21538 }, { "epoch": 0.6949311469195077, "grad_norm": 0.373046875, "learning_rate": 6.794460195999143e-06, "loss": 1.8642, "step": 21539 }, { "epoch": 0.694963410773304, "grad_norm": 0.40625, "learning_rate": 6.7931428541553865e-06, "loss": 1.8848, "step": 21540 }, { "epoch": 0.6949956746271004, "grad_norm": 0.5859375, "learning_rate": 6.791825602646824e-06, "loss": 1.8599, "step": 21541 }, { "epoch": 0.6950279384808967, "grad_norm": 0.35546875, "learning_rate": 6.790508441487954e-06, "loss": 1.8838, "step": 21542 }, { "epoch": 0.6950602023346931, "grad_norm": 0.53125, "learning_rate": 6.789191370693282e-06, "loss": 1.8472, "step": 21543 }, { "epoch": 0.6950924661884895, "grad_norm": 0.40234375, "learning_rate": 6.787874390277302e-06, "loss": 1.8502, "step": 21544 }, { "epoch": 0.6951247300422858, "grad_norm": 0.369140625, "learning_rate": 6.786557500254503e-06, "loss": 1.8419, "step": 21545 }, { "epoch": 0.6951569938960822, "grad_norm": 0.474609375, "learning_rate": 6.785240700639393e-06, "loss": 1.8265, "step": 21546 }, { "epoch": 0.6951892577498785, "grad_norm": 0.462890625, "learning_rate": 6.783923991446458e-06, "loss": 1.8332, "step": 21547 }, { "epoch": 0.6952215216036749, "grad_norm": 0.357421875, "learning_rate": 6.78260737269019e-06, "loss": 1.8973, "step": 21548 }, { "epoch": 0.6952537854574712, "grad_norm": 0.41015625, "learning_rate": 6.78129084438509e-06, "loss": 1.8643, "step": 21549 }, { "epoch": 0.6952860493112676, "grad_norm": 0.56640625, "learning_rate": 6.779974406545643e-06, "loss": 1.8348, "step": 21550 }, { "epoch": 0.6953183131650639, "grad_norm": 0.375, "learning_rate": 6.7786580591863375e-06, "loss": 1.8316, "step": 21551 }, { "epoch": 0.6953505770188603, "grad_norm": 0.439453125, "learning_rate": 6.777341802321671e-06, "loss": 1.8255, "step": 21552 }, { "epoch": 0.6953828408726566, "grad_norm": 0.51171875, "learning_rate": 6.7760256359661266e-06, "loss": 1.8398, "step": 21553 }, { "epoch": 0.695415104726453, "grad_norm": 0.41015625, "learning_rate": 6.774709560134186e-06, "loss": 1.8345, "step": 21554 }, { "epoch": 0.6954473685802492, "grad_norm": 0.419921875, "learning_rate": 6.773393574840349e-06, "loss": 1.8212, "step": 21555 }, { "epoch": 0.6954796324340456, "grad_norm": 0.5390625, "learning_rate": 6.772077680099092e-06, "loss": 1.8607, "step": 21556 }, { "epoch": 0.6955118962878419, "grad_norm": 0.439453125, "learning_rate": 6.770761875924896e-06, "loss": 1.8035, "step": 21557 }, { "epoch": 0.6955441601416383, "grad_norm": 0.53515625, "learning_rate": 6.76944616233226e-06, "loss": 1.8159, "step": 21558 }, { "epoch": 0.6955764239954346, "grad_norm": 0.55859375, "learning_rate": 6.7681305393356465e-06, "loss": 1.8247, "step": 21559 }, { "epoch": 0.695608687849231, "grad_norm": 0.37109375, "learning_rate": 6.766815006949546e-06, "loss": 1.8688, "step": 21560 }, { "epoch": 0.6956409517030273, "grad_norm": 0.39453125, "learning_rate": 6.765499565188451e-06, "loss": 1.8661, "step": 21561 }, { "epoch": 0.6956732155568237, "grad_norm": 0.466796875, "learning_rate": 6.764184214066818e-06, "loss": 1.8284, "step": 21562 }, { "epoch": 0.69570547941062, "grad_norm": 0.494140625, "learning_rate": 6.762868953599137e-06, "loss": 1.8519, "step": 21563 }, { "epoch": 0.6957377432644164, "grad_norm": 0.390625, "learning_rate": 6.761553783799895e-06, "loss": 1.861, "step": 21564 }, { "epoch": 0.6957700071182128, "grad_norm": 0.47265625, "learning_rate": 6.7602387046835495e-06, "loss": 1.8347, "step": 21565 }, { "epoch": 0.6958022709720091, "grad_norm": 0.404296875, "learning_rate": 6.75892371626459e-06, "loss": 1.8702, "step": 21566 }, { "epoch": 0.6958345348258055, "grad_norm": 0.39453125, "learning_rate": 6.757608818557486e-06, "loss": 1.8281, "step": 21567 }, { "epoch": 0.6958667986796018, "grad_norm": 0.388671875, "learning_rate": 6.756294011576705e-06, "loss": 1.8043, "step": 21568 }, { "epoch": 0.6958990625333982, "grad_norm": 0.376953125, "learning_rate": 6.754979295336732e-06, "loss": 1.8083, "step": 21569 }, { "epoch": 0.6959313263871945, "grad_norm": 0.3671875, "learning_rate": 6.753664669852031e-06, "loss": 1.8289, "step": 21570 }, { "epoch": 0.6959635902409909, "grad_norm": 0.4296875, "learning_rate": 6.752350135137068e-06, "loss": 1.7868, "step": 21571 }, { "epoch": 0.6959958540947871, "grad_norm": 0.3984375, "learning_rate": 6.751035691206325e-06, "loss": 1.8037, "step": 21572 }, { "epoch": 0.6960281179485835, "grad_norm": 0.357421875, "learning_rate": 6.749721338074255e-06, "loss": 1.816, "step": 21573 }, { "epoch": 0.6960603818023798, "grad_norm": 0.39453125, "learning_rate": 6.7484070757553425e-06, "loss": 1.8364, "step": 21574 }, { "epoch": 0.6960926456561762, "grad_norm": 0.38671875, "learning_rate": 6.747092904264044e-06, "loss": 1.8247, "step": 21575 }, { "epoch": 0.6961249095099725, "grad_norm": 0.3671875, "learning_rate": 6.7457788236148195e-06, "loss": 1.8686, "step": 21576 }, { "epoch": 0.6961571733637689, "grad_norm": 0.375, "learning_rate": 6.744464833822146e-06, "loss": 1.8455, "step": 21577 }, { "epoch": 0.6961894372175652, "grad_norm": 0.37109375, "learning_rate": 6.743150934900482e-06, "loss": 1.819, "step": 21578 }, { "epoch": 0.6962217010713616, "grad_norm": 0.369140625, "learning_rate": 6.741837126864283e-06, "loss": 1.8602, "step": 21579 }, { "epoch": 0.6962539649251579, "grad_norm": 0.3984375, "learning_rate": 6.740523409728021e-06, "loss": 1.8498, "step": 21580 }, { "epoch": 0.6962862287789543, "grad_norm": 0.38671875, "learning_rate": 6.739209783506154e-06, "loss": 1.8086, "step": 21581 }, { "epoch": 0.6963184926327506, "grad_norm": 0.376953125, "learning_rate": 6.737896248213134e-06, "loss": 1.8283, "step": 21582 }, { "epoch": 0.696350756486547, "grad_norm": 0.373046875, "learning_rate": 6.7365828038634285e-06, "loss": 1.8377, "step": 21583 }, { "epoch": 0.6963830203403433, "grad_norm": 0.390625, "learning_rate": 6.735269450471493e-06, "loss": 1.884, "step": 21584 }, { "epoch": 0.6964152841941397, "grad_norm": 0.3828125, "learning_rate": 6.733956188051775e-06, "loss": 1.8088, "step": 21585 }, { "epoch": 0.6964475480479361, "grad_norm": 0.37890625, "learning_rate": 6.732643016618744e-06, "loss": 1.8147, "step": 21586 }, { "epoch": 0.6964798119017324, "grad_norm": 0.40234375, "learning_rate": 6.731329936186848e-06, "loss": 1.819, "step": 21587 }, { "epoch": 0.6965120757555288, "grad_norm": 0.375, "learning_rate": 6.730016946770535e-06, "loss": 1.7878, "step": 21588 }, { "epoch": 0.696544339609325, "grad_norm": 0.39453125, "learning_rate": 6.7287040483842725e-06, "loss": 1.8433, "step": 21589 }, { "epoch": 0.6965766034631214, "grad_norm": 0.416015625, "learning_rate": 6.727391241042492e-06, "loss": 1.9016, "step": 21590 }, { "epoch": 0.6966088673169177, "grad_norm": 0.376953125, "learning_rate": 6.726078524759652e-06, "loss": 1.821, "step": 21591 }, { "epoch": 0.6966411311707141, "grad_norm": 0.494140625, "learning_rate": 6.724765899550217e-06, "loss": 1.8233, "step": 21592 }, { "epoch": 0.6966733950245104, "grad_norm": 0.400390625, "learning_rate": 6.723453365428612e-06, "loss": 1.8422, "step": 21593 }, { "epoch": 0.6967056588783068, "grad_norm": 0.3671875, "learning_rate": 6.722140922409291e-06, "loss": 1.8252, "step": 21594 }, { "epoch": 0.6967379227321031, "grad_norm": 0.40234375, "learning_rate": 6.720828570506718e-06, "loss": 1.8719, "step": 21595 }, { "epoch": 0.6967701865858995, "grad_norm": 0.39453125, "learning_rate": 6.719516309735313e-06, "loss": 1.8363, "step": 21596 }, { "epoch": 0.6968024504396958, "grad_norm": 0.38671875, "learning_rate": 6.718204140109538e-06, "loss": 1.8118, "step": 21597 }, { "epoch": 0.6968347142934922, "grad_norm": 0.359375, "learning_rate": 6.71689206164383e-06, "loss": 1.8509, "step": 21598 }, { "epoch": 0.6968669781472885, "grad_norm": 0.412109375, "learning_rate": 6.715580074352627e-06, "loss": 1.8273, "step": 21599 }, { "epoch": 0.6968992420010849, "grad_norm": 0.390625, "learning_rate": 6.714268178250382e-06, "loss": 1.8168, "step": 21600 }, { "epoch": 0.6969315058548812, "grad_norm": 0.39453125, "learning_rate": 6.712956373351526e-06, "loss": 1.8528, "step": 21601 }, { "epoch": 0.6969637697086776, "grad_norm": 0.38671875, "learning_rate": 6.7116446596704975e-06, "loss": 1.837, "step": 21602 }, { "epoch": 0.6969960335624739, "grad_norm": 0.375, "learning_rate": 6.7103330372217425e-06, "loss": 1.8249, "step": 21603 }, { "epoch": 0.6970282974162703, "grad_norm": 0.388671875, "learning_rate": 6.7090215060196905e-06, "loss": 1.813, "step": 21604 }, { "epoch": 0.6970605612700667, "grad_norm": 0.40625, "learning_rate": 6.707710066078788e-06, "loss": 1.7934, "step": 21605 }, { "epoch": 0.697092825123863, "grad_norm": 0.369140625, "learning_rate": 6.7063987174134615e-06, "loss": 1.8304, "step": 21606 }, { "epoch": 0.6971250889776593, "grad_norm": 0.3671875, "learning_rate": 6.705087460038145e-06, "loss": 1.8324, "step": 21607 }, { "epoch": 0.6971573528314556, "grad_norm": 0.365234375, "learning_rate": 6.70377629396728e-06, "loss": 1.8285, "step": 21608 }, { "epoch": 0.697189616685252, "grad_norm": 0.38671875, "learning_rate": 6.702465219215293e-06, "loss": 1.827, "step": 21609 }, { "epoch": 0.6972218805390483, "grad_norm": 0.359375, "learning_rate": 6.701154235796611e-06, "loss": 1.8117, "step": 21610 }, { "epoch": 0.6972541443928447, "grad_norm": 0.38671875, "learning_rate": 6.699843343725676e-06, "loss": 1.8066, "step": 21611 }, { "epoch": 0.697286408246641, "grad_norm": 0.36328125, "learning_rate": 6.69853254301691e-06, "loss": 1.7866, "step": 21612 }, { "epoch": 0.6973186721004374, "grad_norm": 0.375, "learning_rate": 6.697221833684737e-06, "loss": 1.8352, "step": 21613 }, { "epoch": 0.6973509359542337, "grad_norm": 0.3828125, "learning_rate": 6.695911215743595e-06, "loss": 1.8092, "step": 21614 }, { "epoch": 0.6973831998080301, "grad_norm": 0.40234375, "learning_rate": 6.694600689207906e-06, "loss": 1.8516, "step": 21615 }, { "epoch": 0.6974154636618264, "grad_norm": 0.400390625, "learning_rate": 6.693290254092087e-06, "loss": 1.8131, "step": 21616 }, { "epoch": 0.6974477275156228, "grad_norm": 0.36328125, "learning_rate": 6.691979910410582e-06, "loss": 1.8538, "step": 21617 }, { "epoch": 0.6974799913694191, "grad_norm": 0.390625, "learning_rate": 6.690669658177791e-06, "loss": 1.8421, "step": 21618 }, { "epoch": 0.6975122552232155, "grad_norm": 0.37109375, "learning_rate": 6.689359497408147e-06, "loss": 1.8511, "step": 21619 }, { "epoch": 0.6975445190770118, "grad_norm": 0.375, "learning_rate": 6.688049428116081e-06, "loss": 1.8498, "step": 21620 }, { "epoch": 0.6975767829308082, "grad_norm": 0.36328125, "learning_rate": 6.686739450315995e-06, "loss": 1.8477, "step": 21621 }, { "epoch": 0.6976090467846044, "grad_norm": 0.3671875, "learning_rate": 6.685429564022318e-06, "loss": 1.8484, "step": 21622 }, { "epoch": 0.6976413106384008, "grad_norm": 0.3671875, "learning_rate": 6.684119769249477e-06, "loss": 1.8398, "step": 21623 }, { "epoch": 0.6976735744921971, "grad_norm": 0.361328125, "learning_rate": 6.68281006601187e-06, "loss": 1.7975, "step": 21624 }, { "epoch": 0.6977058383459935, "grad_norm": 0.365234375, "learning_rate": 6.681500454323922e-06, "loss": 1.8136, "step": 21625 }, { "epoch": 0.6977381021997899, "grad_norm": 0.3984375, "learning_rate": 6.680190934200059e-06, "loss": 1.8343, "step": 21626 }, { "epoch": 0.6977703660535862, "grad_norm": 0.380859375, "learning_rate": 6.678881505654677e-06, "loss": 1.8126, "step": 21627 }, { "epoch": 0.6978026299073826, "grad_norm": 0.3984375, "learning_rate": 6.677572168702203e-06, "loss": 1.8473, "step": 21628 }, { "epoch": 0.6978348937611789, "grad_norm": 0.390625, "learning_rate": 6.676262923357044e-06, "loss": 1.8168, "step": 21629 }, { "epoch": 0.6978671576149753, "grad_norm": 0.3671875, "learning_rate": 6.674953769633604e-06, "loss": 1.8471, "step": 21630 }, { "epoch": 0.6978994214687716, "grad_norm": 0.365234375, "learning_rate": 6.673644707546309e-06, "loss": 1.841, "step": 21631 }, { "epoch": 0.697931685322568, "grad_norm": 0.357421875, "learning_rate": 6.672335737109557e-06, "loss": 1.8363, "step": 21632 }, { "epoch": 0.6979639491763643, "grad_norm": 0.3671875, "learning_rate": 6.671026858337755e-06, "loss": 1.773, "step": 21633 }, { "epoch": 0.6979962130301607, "grad_norm": 0.400390625, "learning_rate": 6.66971807124532e-06, "loss": 1.851, "step": 21634 }, { "epoch": 0.698028476883957, "grad_norm": 0.384765625, "learning_rate": 6.66840937584665e-06, "loss": 1.8523, "step": 21635 }, { "epoch": 0.6980607407377534, "grad_norm": 0.42578125, "learning_rate": 6.66710077215615e-06, "loss": 1.8755, "step": 21636 }, { "epoch": 0.6980930045915497, "grad_norm": 0.37109375, "learning_rate": 6.665792260188231e-06, "loss": 1.8452, "step": 21637 }, { "epoch": 0.6981252684453461, "grad_norm": 0.376953125, "learning_rate": 6.664483839957287e-06, "loss": 1.8486, "step": 21638 }, { "epoch": 0.6981575322991423, "grad_norm": 0.41796875, "learning_rate": 6.663175511477729e-06, "loss": 1.8249, "step": 21639 }, { "epoch": 0.6981897961529387, "grad_norm": 0.37890625, "learning_rate": 6.6618672747639545e-06, "loss": 1.8119, "step": 21640 }, { "epoch": 0.698222060006735, "grad_norm": 0.478515625, "learning_rate": 6.660559129830358e-06, "loss": 1.8352, "step": 21641 }, { "epoch": 0.6982543238605314, "grad_norm": 0.4453125, "learning_rate": 6.659251076691351e-06, "loss": 1.8088, "step": 21642 }, { "epoch": 0.6982865877143277, "grad_norm": 0.392578125, "learning_rate": 6.6579431153613214e-06, "loss": 1.8072, "step": 21643 }, { "epoch": 0.6983188515681241, "grad_norm": 0.453125, "learning_rate": 6.656635245854667e-06, "loss": 1.7849, "step": 21644 }, { "epoch": 0.6983511154219205, "grad_norm": 0.365234375, "learning_rate": 6.65532746818579e-06, "loss": 1.8845, "step": 21645 }, { "epoch": 0.6983833792757168, "grad_norm": 0.359375, "learning_rate": 6.654019782369083e-06, "loss": 1.8176, "step": 21646 }, { "epoch": 0.6984156431295132, "grad_norm": 0.44140625, "learning_rate": 6.652712188418933e-06, "loss": 1.8503, "step": 21647 }, { "epoch": 0.6984479069833095, "grad_norm": 0.400390625, "learning_rate": 6.651404686349748e-06, "loss": 1.8224, "step": 21648 }, { "epoch": 0.6984801708371059, "grad_norm": 0.365234375, "learning_rate": 6.6500972761759016e-06, "loss": 1.8527, "step": 21649 }, { "epoch": 0.6985124346909022, "grad_norm": 0.482421875, "learning_rate": 6.648789957911795e-06, "loss": 1.8342, "step": 21650 }, { "epoch": 0.6985446985446986, "grad_norm": 0.373046875, "learning_rate": 6.6474827315718265e-06, "loss": 1.8503, "step": 21651 }, { "epoch": 0.6985769623984949, "grad_norm": 0.42578125, "learning_rate": 6.646175597170367e-06, "loss": 1.7593, "step": 21652 }, { "epoch": 0.6986092262522913, "grad_norm": 0.412109375, "learning_rate": 6.6448685547218124e-06, "loss": 1.8552, "step": 21653 }, { "epoch": 0.6986414901060876, "grad_norm": 0.375, "learning_rate": 6.643561604240562e-06, "loss": 1.8767, "step": 21654 }, { "epoch": 0.698673753959884, "grad_norm": 0.396484375, "learning_rate": 6.64225474574098e-06, "loss": 1.8236, "step": 21655 }, { "epoch": 0.6987060178136802, "grad_norm": 0.408203125, "learning_rate": 6.6409479792374605e-06, "loss": 1.8329, "step": 21656 }, { "epoch": 0.6987382816674766, "grad_norm": 0.3828125, "learning_rate": 6.6396413047444e-06, "loss": 1.847, "step": 21657 }, { "epoch": 0.6987705455212729, "grad_norm": 0.369140625, "learning_rate": 6.6383347222761615e-06, "loss": 1.8572, "step": 21658 }, { "epoch": 0.6988028093750693, "grad_norm": 0.384765625, "learning_rate": 6.63702823184714e-06, "loss": 1.8266, "step": 21659 }, { "epoch": 0.6988350732288656, "grad_norm": 0.390625, "learning_rate": 6.635721833471712e-06, "loss": 1.8256, "step": 21660 }, { "epoch": 0.698867337082662, "grad_norm": 0.376953125, "learning_rate": 6.634415527164251e-06, "loss": 1.8581, "step": 21661 }, { "epoch": 0.6988996009364583, "grad_norm": 0.376953125, "learning_rate": 6.6331093129391494e-06, "loss": 1.8371, "step": 21662 }, { "epoch": 0.6989318647902547, "grad_norm": 0.42578125, "learning_rate": 6.631803190810779e-06, "loss": 1.8565, "step": 21663 }, { "epoch": 0.698964128644051, "grad_norm": 0.375, "learning_rate": 6.630497160793507e-06, "loss": 1.8544, "step": 21664 }, { "epoch": 0.6989963924978474, "grad_norm": 0.44140625, "learning_rate": 6.6291912229017256e-06, "loss": 1.8395, "step": 21665 }, { "epoch": 0.6990286563516438, "grad_norm": 0.40625, "learning_rate": 6.627885377149803e-06, "loss": 1.8406, "step": 21666 }, { "epoch": 0.6990609202054401, "grad_norm": 0.416015625, "learning_rate": 6.626579623552105e-06, "loss": 1.8458, "step": 21667 }, { "epoch": 0.6990931840592365, "grad_norm": 0.50390625, "learning_rate": 6.625273962123017e-06, "loss": 1.8365, "step": 21668 }, { "epoch": 0.6991254479130328, "grad_norm": 0.39453125, "learning_rate": 6.623968392876906e-06, "loss": 1.8856, "step": 21669 }, { "epoch": 0.6991577117668292, "grad_norm": 0.41015625, "learning_rate": 6.622662915828137e-06, "loss": 1.8406, "step": 21670 }, { "epoch": 0.6991899756206255, "grad_norm": 0.3984375, "learning_rate": 6.621357530991088e-06, "loss": 1.7775, "step": 21671 }, { "epoch": 0.6992222394744219, "grad_norm": 0.3828125, "learning_rate": 6.620052238380122e-06, "loss": 1.8489, "step": 21672 }, { "epoch": 0.6992545033282181, "grad_norm": 0.3828125, "learning_rate": 6.6187470380096126e-06, "loss": 1.849, "step": 21673 }, { "epoch": 0.6992867671820145, "grad_norm": 0.380859375, "learning_rate": 6.617441929893923e-06, "loss": 1.8568, "step": 21674 }, { "epoch": 0.6993190310358108, "grad_norm": 0.376953125, "learning_rate": 6.6161369140474134e-06, "loss": 1.8377, "step": 21675 }, { "epoch": 0.6993512948896072, "grad_norm": 0.36328125, "learning_rate": 6.614831990484461e-06, "loss": 1.8743, "step": 21676 }, { "epoch": 0.6993835587434035, "grad_norm": 0.421875, "learning_rate": 6.613527159219423e-06, "loss": 1.8438, "step": 21677 }, { "epoch": 0.6994158225971999, "grad_norm": 0.44140625, "learning_rate": 6.612222420266654e-06, "loss": 1.9019, "step": 21678 }, { "epoch": 0.6994480864509962, "grad_norm": 0.35546875, "learning_rate": 6.610917773640535e-06, "loss": 1.8422, "step": 21679 }, { "epoch": 0.6994803503047926, "grad_norm": 0.388671875, "learning_rate": 6.609613219355404e-06, "loss": 1.8695, "step": 21680 }, { "epoch": 0.6995126141585889, "grad_norm": 0.39453125, "learning_rate": 6.608308757425632e-06, "loss": 1.8764, "step": 21681 }, { "epoch": 0.6995448780123853, "grad_norm": 0.357421875, "learning_rate": 6.6070043878655876e-06, "loss": 1.8663, "step": 21682 }, { "epoch": 0.6995771418661816, "grad_norm": 0.36328125, "learning_rate": 6.605700110689606e-06, "loss": 1.8674, "step": 21683 }, { "epoch": 0.699609405719978, "grad_norm": 0.37109375, "learning_rate": 6.6043959259120565e-06, "loss": 1.8359, "step": 21684 }, { "epoch": 0.6996416695737743, "grad_norm": 0.447265625, "learning_rate": 6.603091833547304e-06, "loss": 1.8725, "step": 21685 }, { "epoch": 0.6996739334275707, "grad_norm": 0.384765625, "learning_rate": 6.601787833609683e-06, "loss": 1.8164, "step": 21686 }, { "epoch": 0.6997061972813671, "grad_norm": 0.384765625, "learning_rate": 6.600483926113561e-06, "loss": 1.8356, "step": 21687 }, { "epoch": 0.6997384611351634, "grad_norm": 0.40234375, "learning_rate": 6.599180111073287e-06, "loss": 1.8203, "step": 21688 }, { "epoch": 0.6997707249889598, "grad_norm": 0.376953125, "learning_rate": 6.597876388503205e-06, "loss": 1.8725, "step": 21689 }, { "epoch": 0.699802988842756, "grad_norm": 0.4375, "learning_rate": 6.596572758417677e-06, "loss": 1.8497, "step": 21690 }, { "epoch": 0.6998352526965524, "grad_norm": 0.388671875, "learning_rate": 6.59526922083105e-06, "loss": 1.8568, "step": 21691 }, { "epoch": 0.6998675165503487, "grad_norm": 0.365234375, "learning_rate": 6.593965775757662e-06, "loss": 1.8609, "step": 21692 }, { "epoch": 0.6998997804041451, "grad_norm": 0.408203125, "learning_rate": 6.592662423211875e-06, "loss": 1.8182, "step": 21693 }, { "epoch": 0.6999320442579414, "grad_norm": 0.39453125, "learning_rate": 6.591359163208028e-06, "loss": 1.8397, "step": 21694 }, { "epoch": 0.6999643081117378, "grad_norm": 0.380859375, "learning_rate": 6.590055995760462e-06, "loss": 1.874, "step": 21695 }, { "epoch": 0.6999965719655341, "grad_norm": 0.36328125, "learning_rate": 6.588752920883532e-06, "loss": 1.8553, "step": 21696 }, { "epoch": 0.7000288358193305, "grad_norm": 0.373046875, "learning_rate": 6.587449938591576e-06, "loss": 1.8533, "step": 21697 }, { "epoch": 0.7000610996731268, "grad_norm": 0.3671875, "learning_rate": 6.586147048898928e-06, "loss": 1.8504, "step": 21698 }, { "epoch": 0.7000933635269232, "grad_norm": 0.3515625, "learning_rate": 6.5848442518199446e-06, "loss": 1.818, "step": 21699 }, { "epoch": 0.7001256273807195, "grad_norm": 0.359375, "learning_rate": 6.58354154736896e-06, "loss": 1.8382, "step": 21700 }, { "epoch": 0.7001578912345159, "grad_norm": 0.384765625, "learning_rate": 6.582238935560305e-06, "loss": 1.8524, "step": 21701 }, { "epoch": 0.7001901550883122, "grad_norm": 0.365234375, "learning_rate": 6.58093641640833e-06, "loss": 1.8281, "step": 21702 }, { "epoch": 0.7002224189421086, "grad_norm": 0.373046875, "learning_rate": 6.579633989927361e-06, "loss": 1.8911, "step": 21703 }, { "epoch": 0.7002546827959049, "grad_norm": 0.373046875, "learning_rate": 6.578331656131748e-06, "loss": 1.8428, "step": 21704 }, { "epoch": 0.7002869466497013, "grad_norm": 0.365234375, "learning_rate": 6.577029415035817e-06, "loss": 1.8683, "step": 21705 }, { "epoch": 0.7003192105034977, "grad_norm": 0.396484375, "learning_rate": 6.5757272666539e-06, "loss": 1.9115, "step": 21706 }, { "epoch": 0.700351474357294, "grad_norm": 0.421875, "learning_rate": 6.574425211000337e-06, "loss": 1.9433, "step": 21707 }, { "epoch": 0.7003837382110903, "grad_norm": 0.427734375, "learning_rate": 6.573123248089458e-06, "loss": 1.9563, "step": 21708 }, { "epoch": 0.7004160020648866, "grad_norm": 0.419921875, "learning_rate": 6.571821377935586e-06, "loss": 1.9574, "step": 21709 }, { "epoch": 0.700448265918683, "grad_norm": 0.388671875, "learning_rate": 6.57051960055307e-06, "loss": 2.0129, "step": 21710 }, { "epoch": 0.7004805297724793, "grad_norm": 0.4296875, "learning_rate": 6.569217915956216e-06, "loss": 1.9236, "step": 21711 }, { "epoch": 0.7005127936262757, "grad_norm": 0.38671875, "learning_rate": 6.567916324159363e-06, "loss": 1.9401, "step": 21712 }, { "epoch": 0.700545057480072, "grad_norm": 0.423828125, "learning_rate": 6.5666148251768485e-06, "loss": 1.9851, "step": 21713 }, { "epoch": 0.7005773213338684, "grad_norm": 0.423828125, "learning_rate": 6.565313419022978e-06, "loss": 1.9657, "step": 21714 }, { "epoch": 0.7006095851876647, "grad_norm": 0.40625, "learning_rate": 6.564012105712084e-06, "loss": 1.9568, "step": 21715 }, { "epoch": 0.7006418490414611, "grad_norm": 0.40234375, "learning_rate": 6.562710885258506e-06, "loss": 1.8858, "step": 21716 }, { "epoch": 0.7006741128952574, "grad_norm": 0.380859375, "learning_rate": 6.561409757676541e-06, "loss": 1.9653, "step": 21717 }, { "epoch": 0.7007063767490538, "grad_norm": 0.49609375, "learning_rate": 6.560108722980531e-06, "loss": 2.0036, "step": 21718 }, { "epoch": 0.7007386406028501, "grad_norm": 0.3984375, "learning_rate": 6.558807781184789e-06, "loss": 1.9706, "step": 21719 }, { "epoch": 0.7007709044566465, "grad_norm": 0.416015625, "learning_rate": 6.557506932303629e-06, "loss": 1.9943, "step": 21720 }, { "epoch": 0.7008031683104428, "grad_norm": 0.37890625, "learning_rate": 6.556206176351381e-06, "loss": 1.9767, "step": 21721 }, { "epoch": 0.7008354321642392, "grad_norm": 0.421875, "learning_rate": 6.554905513342359e-06, "loss": 1.9603, "step": 21722 }, { "epoch": 0.7008676960180354, "grad_norm": 0.40234375, "learning_rate": 6.553604943290874e-06, "loss": 1.9363, "step": 21723 }, { "epoch": 0.7008999598718318, "grad_norm": 0.40234375, "learning_rate": 6.552304466211249e-06, "loss": 1.9557, "step": 21724 }, { "epoch": 0.7009322237256281, "grad_norm": 0.3984375, "learning_rate": 6.551004082117798e-06, "loss": 1.9515, "step": 21725 }, { "epoch": 0.7009644875794245, "grad_norm": 0.416015625, "learning_rate": 6.5497037910248266e-06, "loss": 1.9206, "step": 21726 }, { "epoch": 0.7009967514332209, "grad_norm": 0.3828125, "learning_rate": 6.5484035929466594e-06, "loss": 1.9674, "step": 21727 }, { "epoch": 0.7010290152870172, "grad_norm": 0.400390625, "learning_rate": 6.547103487897603e-06, "loss": 1.9347, "step": 21728 }, { "epoch": 0.7010612791408136, "grad_norm": 0.37890625, "learning_rate": 6.545803475891961e-06, "loss": 1.9487, "step": 21729 }, { "epoch": 0.7010935429946099, "grad_norm": 0.390625, "learning_rate": 6.544503556944055e-06, "loss": 1.9388, "step": 21730 }, { "epoch": 0.7011258068484063, "grad_norm": 0.39453125, "learning_rate": 6.543203731068188e-06, "loss": 1.9201, "step": 21731 }, { "epoch": 0.7011580707022026, "grad_norm": 0.40625, "learning_rate": 6.5419039982786614e-06, "loss": 1.9023, "step": 21732 }, { "epoch": 0.701190334555999, "grad_norm": 0.423828125, "learning_rate": 6.540604358589794e-06, "loss": 1.9769, "step": 21733 }, { "epoch": 0.7012225984097953, "grad_norm": 0.4140625, "learning_rate": 6.5393048120158856e-06, "loss": 1.9878, "step": 21734 }, { "epoch": 0.7012548622635917, "grad_norm": 0.3828125, "learning_rate": 6.538005358571235e-06, "loss": 1.945, "step": 21735 }, { "epoch": 0.701287126117388, "grad_norm": 0.3984375, "learning_rate": 6.536705998270155e-06, "loss": 1.9245, "step": 21736 }, { "epoch": 0.7013193899711844, "grad_norm": 0.388671875, "learning_rate": 6.535406731126938e-06, "loss": 1.9462, "step": 21737 }, { "epoch": 0.7013516538249807, "grad_norm": 0.380859375, "learning_rate": 6.5341075571558964e-06, "loss": 1.9212, "step": 21738 }, { "epoch": 0.7013839176787771, "grad_norm": 0.3984375, "learning_rate": 6.532808476371327e-06, "loss": 1.912, "step": 21739 }, { "epoch": 0.7014161815325733, "grad_norm": 0.40234375, "learning_rate": 6.531509488787521e-06, "loss": 1.9266, "step": 21740 }, { "epoch": 0.7014484453863697, "grad_norm": 0.37890625, "learning_rate": 6.530210594418796e-06, "loss": 1.9449, "step": 21741 }, { "epoch": 0.701480709240166, "grad_norm": 0.40625, "learning_rate": 6.528911793279423e-06, "loss": 1.9158, "step": 21742 }, { "epoch": 0.7015129730939624, "grad_norm": 0.447265625, "learning_rate": 6.527613085383713e-06, "loss": 1.8552, "step": 21743 }, { "epoch": 0.7015452369477587, "grad_norm": 0.40234375, "learning_rate": 6.526314470745971e-06, "loss": 1.9149, "step": 21744 }, { "epoch": 0.7015775008015551, "grad_norm": 0.482421875, "learning_rate": 6.525015949380471e-06, "loss": 1.932, "step": 21745 }, { "epoch": 0.7016097646553514, "grad_norm": 0.404296875, "learning_rate": 6.523717521301512e-06, "loss": 1.9358, "step": 21746 }, { "epoch": 0.7016420285091478, "grad_norm": 0.412109375, "learning_rate": 6.522419186523404e-06, "loss": 1.9211, "step": 21747 }, { "epoch": 0.7016742923629442, "grad_norm": 0.423828125, "learning_rate": 6.5211209450604114e-06, "loss": 1.8836, "step": 21748 }, { "epoch": 0.7017065562167405, "grad_norm": 0.412109375, "learning_rate": 6.519822796926841e-06, "loss": 1.8864, "step": 21749 }, { "epoch": 0.7017388200705369, "grad_norm": 0.384765625, "learning_rate": 6.518524742136979e-06, "loss": 1.942, "step": 21750 }, { "epoch": 0.7017710839243332, "grad_norm": 0.431640625, "learning_rate": 6.517226780705107e-06, "loss": 1.9081, "step": 21751 }, { "epoch": 0.7018033477781296, "grad_norm": 0.3984375, "learning_rate": 6.5159289126455225e-06, "loss": 1.9042, "step": 21752 }, { "epoch": 0.7018356116319259, "grad_norm": 0.41015625, "learning_rate": 6.514631137972505e-06, "loss": 1.9376, "step": 21753 }, { "epoch": 0.7018678754857223, "grad_norm": 0.40625, "learning_rate": 6.513333456700336e-06, "loss": 1.9062, "step": 21754 }, { "epoch": 0.7019001393395186, "grad_norm": 0.40625, "learning_rate": 6.512035868843308e-06, "loss": 1.9136, "step": 21755 }, { "epoch": 0.701932403193315, "grad_norm": 0.41015625, "learning_rate": 6.5107383744157024e-06, "loss": 1.9194, "step": 21756 }, { "epoch": 0.7019646670471112, "grad_norm": 0.439453125, "learning_rate": 6.509440973431791e-06, "loss": 1.9184, "step": 21757 }, { "epoch": 0.7019969309009076, "grad_norm": 0.400390625, "learning_rate": 6.508143665905868e-06, "loss": 1.9216, "step": 21758 }, { "epoch": 0.7020291947547039, "grad_norm": 0.400390625, "learning_rate": 6.5068464518522085e-06, "loss": 1.9131, "step": 21759 }, { "epoch": 0.7020614586085003, "grad_norm": 0.4140625, "learning_rate": 6.505549331285085e-06, "loss": 1.9412, "step": 21760 }, { "epoch": 0.7020937224622966, "grad_norm": 0.412109375, "learning_rate": 6.504252304218784e-06, "loss": 1.9154, "step": 21761 }, { "epoch": 0.702125986316093, "grad_norm": 0.400390625, "learning_rate": 6.5029553706675805e-06, "loss": 1.8849, "step": 21762 }, { "epoch": 0.7021582501698893, "grad_norm": 0.47265625, "learning_rate": 6.501658530645742e-06, "loss": 1.9106, "step": 21763 }, { "epoch": 0.7021905140236857, "grad_norm": 0.38671875, "learning_rate": 6.5003617841675574e-06, "loss": 1.9084, "step": 21764 }, { "epoch": 0.702222777877482, "grad_norm": 0.40234375, "learning_rate": 6.499065131247291e-06, "loss": 1.9252, "step": 21765 }, { "epoch": 0.7022550417312784, "grad_norm": 0.462890625, "learning_rate": 6.4977685718992105e-06, "loss": 1.8941, "step": 21766 }, { "epoch": 0.7022873055850748, "grad_norm": 0.388671875, "learning_rate": 6.496472106137603e-06, "loss": 1.9538, "step": 21767 }, { "epoch": 0.7023195694388711, "grad_norm": 0.40625, "learning_rate": 6.4951757339767264e-06, "loss": 1.9115, "step": 21768 }, { "epoch": 0.7023518332926675, "grad_norm": 0.486328125, "learning_rate": 6.493879455430856e-06, "loss": 1.9002, "step": 21769 }, { "epoch": 0.7023840971464638, "grad_norm": 0.4765625, "learning_rate": 6.4925832705142515e-06, "loss": 1.969, "step": 21770 }, { "epoch": 0.7024163610002602, "grad_norm": 0.416015625, "learning_rate": 6.491287179241189e-06, "loss": 2.0045, "step": 21771 }, { "epoch": 0.7024486248540565, "grad_norm": 0.5390625, "learning_rate": 6.489991181625944e-06, "loss": 2.0884, "step": 21772 }, { "epoch": 0.7024808887078529, "grad_norm": 0.453125, "learning_rate": 6.488695277682759e-06, "loss": 1.9978, "step": 21773 }, { "epoch": 0.7025131525616491, "grad_norm": 0.51953125, "learning_rate": 6.48739946742591e-06, "loss": 1.9881, "step": 21774 }, { "epoch": 0.7025454164154455, "grad_norm": 0.5546875, "learning_rate": 6.486103750869672e-06, "loss": 2.007, "step": 21775 }, { "epoch": 0.7025776802692418, "grad_norm": 0.423828125, "learning_rate": 6.484808128028286e-06, "loss": 2.0543, "step": 21776 }, { "epoch": 0.7026099441230382, "grad_norm": 0.482421875, "learning_rate": 6.483512598916023e-06, "loss": 2.0386, "step": 21777 }, { "epoch": 0.7026422079768345, "grad_norm": 0.439453125, "learning_rate": 6.482217163547154e-06, "loss": 1.9734, "step": 21778 }, { "epoch": 0.7026744718306309, "grad_norm": 0.486328125, "learning_rate": 6.480921821935917e-06, "loss": 1.9718, "step": 21779 }, { "epoch": 0.7027067356844272, "grad_norm": 0.431640625, "learning_rate": 6.479626574096586e-06, "loss": 2.0398, "step": 21780 }, { "epoch": 0.7027389995382236, "grad_norm": 0.5390625, "learning_rate": 6.478331420043415e-06, "loss": 2.0455, "step": 21781 }, { "epoch": 0.7027712633920199, "grad_norm": 0.451171875, "learning_rate": 6.477036359790651e-06, "loss": 2.0514, "step": 21782 }, { "epoch": 0.7028035272458163, "grad_norm": 0.5, "learning_rate": 6.475741393352563e-06, "loss": 2.0353, "step": 21783 }, { "epoch": 0.7028357910996126, "grad_norm": 0.44921875, "learning_rate": 6.474446520743398e-06, "loss": 2.0187, "step": 21784 }, { "epoch": 0.702868054953409, "grad_norm": 0.427734375, "learning_rate": 6.4731517419774035e-06, "loss": 1.9512, "step": 21785 }, { "epoch": 0.7029003188072053, "grad_norm": 0.39453125, "learning_rate": 6.471857057068844e-06, "loss": 1.9409, "step": 21786 }, { "epoch": 0.7029325826610017, "grad_norm": 0.443359375, "learning_rate": 6.470562466031964e-06, "loss": 1.9362, "step": 21787 }, { "epoch": 0.7029648465147981, "grad_norm": 0.42578125, "learning_rate": 6.469267968881007e-06, "loss": 1.9467, "step": 21788 }, { "epoch": 0.7029971103685944, "grad_norm": 0.498046875, "learning_rate": 6.467973565630234e-06, "loss": 1.8275, "step": 21789 }, { "epoch": 0.7030293742223908, "grad_norm": 0.419921875, "learning_rate": 6.466679256293888e-06, "loss": 1.929, "step": 21790 }, { "epoch": 0.703061638076187, "grad_norm": 0.38671875, "learning_rate": 6.465385040886207e-06, "loss": 1.9214, "step": 21791 }, { "epoch": 0.7030939019299834, "grad_norm": 0.388671875, "learning_rate": 6.4640909194214525e-06, "loss": 1.9472, "step": 21792 }, { "epoch": 0.7031261657837797, "grad_norm": 0.453125, "learning_rate": 6.462796891913861e-06, "loss": 1.9393, "step": 21793 }, { "epoch": 0.7031584296375761, "grad_norm": 0.38671875, "learning_rate": 6.461502958377672e-06, "loss": 1.8119, "step": 21794 }, { "epoch": 0.7031906934913724, "grad_norm": 0.44140625, "learning_rate": 6.460209118827138e-06, "loss": 1.8304, "step": 21795 }, { "epoch": 0.7032229573451688, "grad_norm": 0.37890625, "learning_rate": 6.458915373276494e-06, "loss": 1.8526, "step": 21796 }, { "epoch": 0.7032552211989651, "grad_norm": 0.400390625, "learning_rate": 6.457621721739979e-06, "loss": 1.8831, "step": 21797 }, { "epoch": 0.7032874850527615, "grad_norm": 0.408203125, "learning_rate": 6.456328164231841e-06, "loss": 1.8123, "step": 21798 }, { "epoch": 0.7033197489065578, "grad_norm": 0.376953125, "learning_rate": 6.4550347007663125e-06, "loss": 1.8035, "step": 21799 }, { "epoch": 0.7033520127603542, "grad_norm": 0.435546875, "learning_rate": 6.4537413313576315e-06, "loss": 1.831, "step": 21800 }, { "epoch": 0.7033842766141505, "grad_norm": 0.3671875, "learning_rate": 6.45244805602003e-06, "loss": 1.865, "step": 21801 }, { "epoch": 0.7034165404679469, "grad_norm": 0.392578125, "learning_rate": 6.451154874767755e-06, "loss": 1.7928, "step": 21802 }, { "epoch": 0.7034488043217432, "grad_norm": 0.37890625, "learning_rate": 6.449861787615033e-06, "loss": 1.816, "step": 21803 }, { "epoch": 0.7034810681755396, "grad_norm": 0.365234375, "learning_rate": 6.448568794576092e-06, "loss": 1.8277, "step": 21804 }, { "epoch": 0.7035133320293359, "grad_norm": 0.404296875, "learning_rate": 6.447275895665172e-06, "loss": 1.7624, "step": 21805 }, { "epoch": 0.7035455958831323, "grad_norm": 0.388671875, "learning_rate": 6.445983090896514e-06, "loss": 1.8177, "step": 21806 }, { "epoch": 0.7035778597369287, "grad_norm": 0.384765625, "learning_rate": 6.4446903802843275e-06, "loss": 1.8438, "step": 21807 }, { "epoch": 0.703610123590725, "grad_norm": 0.388671875, "learning_rate": 6.443397763842852e-06, "loss": 1.8553, "step": 21808 }, { "epoch": 0.7036423874445213, "grad_norm": 0.3828125, "learning_rate": 6.442105241586324e-06, "loss": 1.883, "step": 21809 }, { "epoch": 0.7036746512983176, "grad_norm": 0.376953125, "learning_rate": 6.4408128135289535e-06, "loss": 1.8397, "step": 21810 }, { "epoch": 0.703706915152114, "grad_norm": 0.3671875, "learning_rate": 6.43952047968498e-06, "loss": 1.8713, "step": 21811 }, { "epoch": 0.7037391790059103, "grad_norm": 0.365234375, "learning_rate": 6.438228240068622e-06, "loss": 1.8305, "step": 21812 }, { "epoch": 0.7037714428597067, "grad_norm": 0.37890625, "learning_rate": 6.436936094694102e-06, "loss": 1.8473, "step": 21813 }, { "epoch": 0.703803706713503, "grad_norm": 0.3671875, "learning_rate": 6.435644043575652e-06, "loss": 1.8539, "step": 21814 }, { "epoch": 0.7038359705672994, "grad_norm": 0.3671875, "learning_rate": 6.434352086727488e-06, "loss": 1.8145, "step": 21815 }, { "epoch": 0.7038682344210957, "grad_norm": 0.36328125, "learning_rate": 6.4330602241638255e-06, "loss": 1.831, "step": 21816 }, { "epoch": 0.7039004982748921, "grad_norm": 0.396484375, "learning_rate": 6.431768455898894e-06, "loss": 1.8438, "step": 21817 }, { "epoch": 0.7039327621286884, "grad_norm": 0.37890625, "learning_rate": 6.430476781946911e-06, "loss": 1.8715, "step": 21818 }, { "epoch": 0.7039650259824848, "grad_norm": 0.369140625, "learning_rate": 6.429185202322085e-06, "loss": 1.8638, "step": 21819 }, { "epoch": 0.7039972898362811, "grad_norm": 0.369140625, "learning_rate": 6.427893717038645e-06, "loss": 1.8548, "step": 21820 }, { "epoch": 0.7040295536900775, "grad_norm": 0.357421875, "learning_rate": 6.4266023261108035e-06, "loss": 1.8535, "step": 21821 }, { "epoch": 0.7040618175438738, "grad_norm": 0.37890625, "learning_rate": 6.4253110295527655e-06, "loss": 1.8716, "step": 21822 }, { "epoch": 0.7040940813976702, "grad_norm": 0.380859375, "learning_rate": 6.424019827378757e-06, "loss": 1.8654, "step": 21823 }, { "epoch": 0.7041263452514664, "grad_norm": 0.384765625, "learning_rate": 6.4227287196029875e-06, "loss": 1.8834, "step": 21824 }, { "epoch": 0.7041586091052628, "grad_norm": 0.400390625, "learning_rate": 6.421437706239661e-06, "loss": 1.8509, "step": 21825 }, { "epoch": 0.7041908729590591, "grad_norm": 0.3984375, "learning_rate": 6.420146787303e-06, "loss": 1.8338, "step": 21826 }, { "epoch": 0.7042231368128555, "grad_norm": 0.380859375, "learning_rate": 6.418855962807206e-06, "loss": 1.855, "step": 21827 }, { "epoch": 0.7042554006666519, "grad_norm": 0.388671875, "learning_rate": 6.417565232766486e-06, "loss": 1.8361, "step": 21828 }, { "epoch": 0.7042876645204482, "grad_norm": 0.37890625, "learning_rate": 6.4162745971950555e-06, "loss": 1.818, "step": 21829 }, { "epoch": 0.7043199283742446, "grad_norm": 0.37890625, "learning_rate": 6.414984056107117e-06, "loss": 1.7998, "step": 21830 }, { "epoch": 0.7043521922280409, "grad_norm": 0.408203125, "learning_rate": 6.413693609516875e-06, "loss": 1.8886, "step": 21831 }, { "epoch": 0.7043844560818373, "grad_norm": 0.3671875, "learning_rate": 6.4124032574385285e-06, "loss": 1.8329, "step": 21832 }, { "epoch": 0.7044167199356336, "grad_norm": 0.408203125, "learning_rate": 6.411112999886292e-06, "loss": 1.9016, "step": 21833 }, { "epoch": 0.70444898378943, "grad_norm": 0.400390625, "learning_rate": 6.40982283687436e-06, "loss": 1.83, "step": 21834 }, { "epoch": 0.7044812476432263, "grad_norm": 0.404296875, "learning_rate": 6.408532768416932e-06, "loss": 1.8408, "step": 21835 }, { "epoch": 0.7045135114970227, "grad_norm": 0.39453125, "learning_rate": 6.407242794528212e-06, "loss": 1.8828, "step": 21836 }, { "epoch": 0.704545775350819, "grad_norm": 0.3828125, "learning_rate": 6.4059529152224085e-06, "loss": 1.8573, "step": 21837 }, { "epoch": 0.7045780392046154, "grad_norm": 0.390625, "learning_rate": 6.4046631305137e-06, "loss": 1.8461, "step": 21838 }, { "epoch": 0.7046103030584117, "grad_norm": 0.38671875, "learning_rate": 6.403373440416293e-06, "loss": 1.8961, "step": 21839 }, { "epoch": 0.7046425669122081, "grad_norm": 0.359375, "learning_rate": 6.4020838449443955e-06, "loss": 1.8852, "step": 21840 }, { "epoch": 0.7046748307660043, "grad_norm": 0.36328125, "learning_rate": 6.400794344112181e-06, "loss": 1.8246, "step": 21841 }, { "epoch": 0.7047070946198007, "grad_norm": 0.388671875, "learning_rate": 6.399504937933856e-06, "loss": 1.8806, "step": 21842 }, { "epoch": 0.704739358473597, "grad_norm": 0.359375, "learning_rate": 6.398215626423611e-06, "loss": 1.8316, "step": 21843 }, { "epoch": 0.7047716223273934, "grad_norm": 0.369140625, "learning_rate": 6.3969264095956345e-06, "loss": 1.8314, "step": 21844 }, { "epoch": 0.7048038861811897, "grad_norm": 0.37109375, "learning_rate": 6.395637287464124e-06, "loss": 1.833, "step": 21845 }, { "epoch": 0.7048361500349861, "grad_norm": 0.3671875, "learning_rate": 6.394348260043266e-06, "loss": 1.8545, "step": 21846 }, { "epoch": 0.7048684138887824, "grad_norm": 0.390625, "learning_rate": 6.393059327347242e-06, "loss": 1.8394, "step": 21847 }, { "epoch": 0.7049006777425788, "grad_norm": 0.365234375, "learning_rate": 6.391770489390254e-06, "loss": 1.8612, "step": 21848 }, { "epoch": 0.7049329415963752, "grad_norm": 0.365234375, "learning_rate": 6.39048174618648e-06, "loss": 1.8446, "step": 21849 }, { "epoch": 0.7049652054501715, "grad_norm": 0.376953125, "learning_rate": 6.389193097750101e-06, "loss": 1.8615, "step": 21850 }, { "epoch": 0.7049974693039679, "grad_norm": 0.36328125, "learning_rate": 6.387904544095312e-06, "loss": 1.8702, "step": 21851 }, { "epoch": 0.7050297331577642, "grad_norm": 0.361328125, "learning_rate": 6.3866160852362915e-06, "loss": 1.8878, "step": 21852 }, { "epoch": 0.7050619970115606, "grad_norm": 0.36328125, "learning_rate": 6.385327721187216e-06, "loss": 1.8654, "step": 21853 }, { "epoch": 0.7050942608653569, "grad_norm": 0.380859375, "learning_rate": 6.3840394519622805e-06, "loss": 1.9268, "step": 21854 }, { "epoch": 0.7051265247191533, "grad_norm": 0.365234375, "learning_rate": 6.382751277575655e-06, "loss": 1.8466, "step": 21855 }, { "epoch": 0.7051587885729496, "grad_norm": 0.357421875, "learning_rate": 6.381463198041518e-06, "loss": 1.8031, "step": 21856 }, { "epoch": 0.705191052426746, "grad_norm": 0.369140625, "learning_rate": 6.380175213374055e-06, "loss": 1.8372, "step": 21857 }, { "epoch": 0.7052233162805422, "grad_norm": 0.3515625, "learning_rate": 6.37888732358744e-06, "loss": 1.7925, "step": 21858 }, { "epoch": 0.7052555801343386, "grad_norm": 0.359375, "learning_rate": 6.377599528695844e-06, "loss": 1.8254, "step": 21859 }, { "epoch": 0.7052878439881349, "grad_norm": 0.388671875, "learning_rate": 6.376311828713451e-06, "loss": 1.9085, "step": 21860 }, { "epoch": 0.7053201078419313, "grad_norm": 0.384765625, "learning_rate": 6.3750242236544315e-06, "loss": 1.8769, "step": 21861 }, { "epoch": 0.7053523716957276, "grad_norm": 0.3984375, "learning_rate": 6.3737367135329575e-06, "loss": 1.8528, "step": 21862 }, { "epoch": 0.705384635549524, "grad_norm": 0.37109375, "learning_rate": 6.372449298363195e-06, "loss": 1.8619, "step": 21863 }, { "epoch": 0.7054168994033203, "grad_norm": 0.412109375, "learning_rate": 6.371161978159329e-06, "loss": 1.8558, "step": 21864 }, { "epoch": 0.7054491632571167, "grad_norm": 0.369140625, "learning_rate": 6.36987475293552e-06, "loss": 1.8399, "step": 21865 }, { "epoch": 0.705481427110913, "grad_norm": 0.458984375, "learning_rate": 6.368587622705932e-06, "loss": 1.8789, "step": 21866 }, { "epoch": 0.7055136909647094, "grad_norm": 0.365234375, "learning_rate": 6.367300587484748e-06, "loss": 1.8532, "step": 21867 }, { "epoch": 0.7055459548185058, "grad_norm": 0.376953125, "learning_rate": 6.366013647286123e-06, "loss": 1.8717, "step": 21868 }, { "epoch": 0.7055782186723021, "grad_norm": 0.4375, "learning_rate": 6.3647268021242215e-06, "loss": 1.8381, "step": 21869 }, { "epoch": 0.7056104825260985, "grad_norm": 0.369140625, "learning_rate": 6.363440052013217e-06, "loss": 1.8427, "step": 21870 }, { "epoch": 0.7056427463798948, "grad_norm": 0.45703125, "learning_rate": 6.362153396967271e-06, "loss": 1.8333, "step": 21871 }, { "epoch": 0.7056750102336912, "grad_norm": 0.392578125, "learning_rate": 6.360866837000536e-06, "loss": 1.9004, "step": 21872 }, { "epoch": 0.7057072740874875, "grad_norm": 0.375, "learning_rate": 6.3595803721271875e-06, "loss": 1.827, "step": 21873 }, { "epoch": 0.7057395379412839, "grad_norm": 0.392578125, "learning_rate": 6.358294002361379e-06, "loss": 1.8671, "step": 21874 }, { "epoch": 0.7057718017950801, "grad_norm": 0.431640625, "learning_rate": 6.357007727717266e-06, "loss": 1.8999, "step": 21875 }, { "epoch": 0.7058040656488765, "grad_norm": 0.373046875, "learning_rate": 6.355721548209016e-06, "loss": 1.8501, "step": 21876 }, { "epoch": 0.7058363295026728, "grad_norm": 0.455078125, "learning_rate": 6.354435463850784e-06, "loss": 1.8842, "step": 21877 }, { "epoch": 0.7058685933564692, "grad_norm": 0.404296875, "learning_rate": 6.3531494746567155e-06, "loss": 1.8474, "step": 21878 }, { "epoch": 0.7059008572102655, "grad_norm": 0.357421875, "learning_rate": 6.351863580640981e-06, "loss": 1.8357, "step": 21879 }, { "epoch": 0.7059331210640619, "grad_norm": 0.3671875, "learning_rate": 6.3505777818177286e-06, "loss": 1.8594, "step": 21880 }, { "epoch": 0.7059653849178582, "grad_norm": 0.421875, "learning_rate": 6.349292078201106e-06, "loss": 1.8453, "step": 21881 }, { "epoch": 0.7059976487716546, "grad_norm": 0.421875, "learning_rate": 6.348006469805274e-06, "loss": 1.8015, "step": 21882 }, { "epoch": 0.7060299126254509, "grad_norm": 0.421875, "learning_rate": 6.34672095664438e-06, "loss": 1.858, "step": 21883 }, { "epoch": 0.7060621764792473, "grad_norm": 0.421875, "learning_rate": 6.34543553873257e-06, "loss": 1.8497, "step": 21884 }, { "epoch": 0.7060944403330436, "grad_norm": 0.3984375, "learning_rate": 6.344150216084001e-06, "loss": 1.8675, "step": 21885 }, { "epoch": 0.70612670418684, "grad_norm": 0.423828125, "learning_rate": 6.3428649887128175e-06, "loss": 1.8432, "step": 21886 }, { "epoch": 0.7061589680406363, "grad_norm": 0.38671875, "learning_rate": 6.341579856633159e-06, "loss": 1.858, "step": 21887 }, { "epoch": 0.7061912318944327, "grad_norm": 0.365234375, "learning_rate": 6.340294819859185e-06, "loss": 1.8402, "step": 21888 }, { "epoch": 0.7062234957482291, "grad_norm": 0.37109375, "learning_rate": 6.339009878405032e-06, "loss": 1.8134, "step": 21889 }, { "epoch": 0.7062557596020254, "grad_norm": 0.447265625, "learning_rate": 6.33772503228484e-06, "loss": 1.8046, "step": 21890 }, { "epoch": 0.7062880234558218, "grad_norm": 0.37109375, "learning_rate": 6.3364402815127615e-06, "loss": 1.872, "step": 21891 }, { "epoch": 0.706320287309618, "grad_norm": 0.45703125, "learning_rate": 6.335155626102933e-06, "loss": 1.8726, "step": 21892 }, { "epoch": 0.7063525511634144, "grad_norm": 0.388671875, "learning_rate": 6.333871066069497e-06, "loss": 1.8537, "step": 21893 }, { "epoch": 0.7063848150172107, "grad_norm": 0.380859375, "learning_rate": 6.332586601426584e-06, "loss": 1.8325, "step": 21894 }, { "epoch": 0.7064170788710071, "grad_norm": 0.39453125, "learning_rate": 6.331302232188345e-06, "loss": 1.8615, "step": 21895 }, { "epoch": 0.7064493427248034, "grad_norm": 0.435546875, "learning_rate": 6.330017958368913e-06, "loss": 1.8584, "step": 21896 }, { "epoch": 0.7064816065785998, "grad_norm": 0.3671875, "learning_rate": 6.328733779982418e-06, "loss": 1.8251, "step": 21897 }, { "epoch": 0.7065138704323961, "grad_norm": 0.38671875, "learning_rate": 6.327449697043006e-06, "loss": 1.837, "step": 21898 }, { "epoch": 0.7065461342861925, "grad_norm": 0.384765625, "learning_rate": 6.326165709564806e-06, "loss": 1.8245, "step": 21899 }, { "epoch": 0.7065783981399888, "grad_norm": 0.37890625, "learning_rate": 6.324881817561945e-06, "loss": 1.8339, "step": 21900 }, { "epoch": 0.7066106619937852, "grad_norm": 0.388671875, "learning_rate": 6.323598021048569e-06, "loss": 1.8247, "step": 21901 }, { "epoch": 0.7066429258475815, "grad_norm": 0.369140625, "learning_rate": 6.3223143200388e-06, "loss": 1.8204, "step": 21902 }, { "epoch": 0.7066751897013779, "grad_norm": 0.384765625, "learning_rate": 6.321030714546766e-06, "loss": 1.8062, "step": 21903 }, { "epoch": 0.7067074535551742, "grad_norm": 0.37109375, "learning_rate": 6.319747204586603e-06, "loss": 1.8295, "step": 21904 }, { "epoch": 0.7067397174089706, "grad_norm": 0.375, "learning_rate": 6.3184637901724355e-06, "loss": 1.8867, "step": 21905 }, { "epoch": 0.7067719812627669, "grad_norm": 0.3671875, "learning_rate": 6.317180471318386e-06, "loss": 1.8259, "step": 21906 }, { "epoch": 0.7068042451165633, "grad_norm": 0.37890625, "learning_rate": 6.315897248038591e-06, "loss": 1.8521, "step": 21907 }, { "epoch": 0.7068365089703597, "grad_norm": 0.380859375, "learning_rate": 6.314614120347168e-06, "loss": 1.8317, "step": 21908 }, { "epoch": 0.7068687728241559, "grad_norm": 0.369140625, "learning_rate": 6.313331088258237e-06, "loss": 1.8177, "step": 21909 }, { "epoch": 0.7069010366779523, "grad_norm": 0.392578125, "learning_rate": 6.312048151785929e-06, "loss": 1.8572, "step": 21910 }, { "epoch": 0.7069333005317486, "grad_norm": 0.400390625, "learning_rate": 6.310765310944365e-06, "loss": 1.8539, "step": 21911 }, { "epoch": 0.706965564385545, "grad_norm": 0.380859375, "learning_rate": 6.309482565747656e-06, "loss": 1.8814, "step": 21912 }, { "epoch": 0.7069978282393413, "grad_norm": 0.396484375, "learning_rate": 6.308199916209935e-06, "loss": 1.8268, "step": 21913 }, { "epoch": 0.7070300920931377, "grad_norm": 0.3828125, "learning_rate": 6.306917362345312e-06, "loss": 1.8624, "step": 21914 }, { "epoch": 0.707062355946934, "grad_norm": 0.361328125, "learning_rate": 6.305634904167904e-06, "loss": 1.8464, "step": 21915 }, { "epoch": 0.7070946198007304, "grad_norm": 0.4453125, "learning_rate": 6.304352541691831e-06, "loss": 1.8497, "step": 21916 }, { "epoch": 0.7071268836545267, "grad_norm": 0.3671875, "learning_rate": 6.30307027493121e-06, "loss": 1.8343, "step": 21917 }, { "epoch": 0.7071591475083231, "grad_norm": 0.376953125, "learning_rate": 6.301788103900145e-06, "loss": 1.8502, "step": 21918 }, { "epoch": 0.7071914113621194, "grad_norm": 0.388671875, "learning_rate": 6.3005060286127635e-06, "loss": 1.8233, "step": 21919 }, { "epoch": 0.7072236752159158, "grad_norm": 0.435546875, "learning_rate": 6.2992240490831685e-06, "loss": 1.8073, "step": 21920 }, { "epoch": 0.7072559390697121, "grad_norm": 0.37109375, "learning_rate": 6.297942165325475e-06, "loss": 1.8355, "step": 21921 }, { "epoch": 0.7072882029235085, "grad_norm": 0.41796875, "learning_rate": 6.296660377353787e-06, "loss": 1.835, "step": 21922 }, { "epoch": 0.7073204667773048, "grad_norm": 0.458984375, "learning_rate": 6.295378685182222e-06, "loss": 1.8403, "step": 21923 }, { "epoch": 0.7073527306311012, "grad_norm": 0.369140625, "learning_rate": 6.294097088824883e-06, "loss": 1.847, "step": 21924 }, { "epoch": 0.7073849944848974, "grad_norm": 0.404296875, "learning_rate": 6.292815588295872e-06, "loss": 1.8527, "step": 21925 }, { "epoch": 0.7074172583386938, "grad_norm": 0.4296875, "learning_rate": 6.291534183609306e-06, "loss": 1.8526, "step": 21926 }, { "epoch": 0.7074495221924901, "grad_norm": 0.392578125, "learning_rate": 6.290252874779286e-06, "loss": 1.8758, "step": 21927 }, { "epoch": 0.7074817860462865, "grad_norm": 0.390625, "learning_rate": 6.288971661819907e-06, "loss": 1.8694, "step": 21928 }, { "epoch": 0.7075140499000829, "grad_norm": 0.40234375, "learning_rate": 6.287690544745284e-06, "loss": 1.8349, "step": 21929 }, { "epoch": 0.7075463137538792, "grad_norm": 0.396484375, "learning_rate": 6.286409523569515e-06, "loss": 1.848, "step": 21930 }, { "epoch": 0.7075785776076756, "grad_norm": 0.392578125, "learning_rate": 6.285128598306693e-06, "loss": 1.8536, "step": 21931 }, { "epoch": 0.7076108414614719, "grad_norm": 0.35546875, "learning_rate": 6.283847768970927e-06, "loss": 1.8636, "step": 21932 }, { "epoch": 0.7076431053152683, "grad_norm": 0.3828125, "learning_rate": 6.2825670355763125e-06, "loss": 1.8383, "step": 21933 }, { "epoch": 0.7076753691690646, "grad_norm": 0.375, "learning_rate": 6.281286398136943e-06, "loss": 1.8335, "step": 21934 }, { "epoch": 0.707707633022861, "grad_norm": 0.361328125, "learning_rate": 6.280005856666921e-06, "loss": 1.8659, "step": 21935 }, { "epoch": 0.7077398968766573, "grad_norm": 0.3671875, "learning_rate": 6.27872541118034e-06, "loss": 1.8607, "step": 21936 }, { "epoch": 0.7077721607304537, "grad_norm": 0.375, "learning_rate": 6.277445061691286e-06, "loss": 1.8647, "step": 21937 }, { "epoch": 0.70780442458425, "grad_norm": 0.373046875, "learning_rate": 6.276164808213866e-06, "loss": 1.8302, "step": 21938 }, { "epoch": 0.7078366884380464, "grad_norm": 0.36328125, "learning_rate": 6.274884650762165e-06, "loss": 1.8108, "step": 21939 }, { "epoch": 0.7078689522918427, "grad_norm": 0.3671875, "learning_rate": 6.273604589350269e-06, "loss": 1.8723, "step": 21940 }, { "epoch": 0.7079012161456391, "grad_norm": 0.392578125, "learning_rate": 6.27232462399228e-06, "loss": 1.8403, "step": 21941 }, { "epoch": 0.7079334799994353, "grad_norm": 0.388671875, "learning_rate": 6.271044754702279e-06, "loss": 1.8419, "step": 21942 }, { "epoch": 0.7079657438532317, "grad_norm": 0.369140625, "learning_rate": 6.269764981494351e-06, "loss": 1.8503, "step": 21943 }, { "epoch": 0.707998007707028, "grad_norm": 0.390625, "learning_rate": 6.2684853043825915e-06, "loss": 1.814, "step": 21944 }, { "epoch": 0.7080302715608244, "grad_norm": 0.388671875, "learning_rate": 6.267205723381081e-06, "loss": 1.8983, "step": 21945 }, { "epoch": 0.7080625354146207, "grad_norm": 0.3671875, "learning_rate": 6.2659262385039e-06, "loss": 1.8517, "step": 21946 }, { "epoch": 0.7080947992684171, "grad_norm": 0.37109375, "learning_rate": 6.2646468497651445e-06, "loss": 1.8606, "step": 21947 }, { "epoch": 0.7081270631222134, "grad_norm": 0.375, "learning_rate": 6.263367557178888e-06, "loss": 1.8306, "step": 21948 }, { "epoch": 0.7081593269760098, "grad_norm": 0.365234375, "learning_rate": 6.262088360759207e-06, "loss": 1.8567, "step": 21949 }, { "epoch": 0.7081915908298062, "grad_norm": 0.3828125, "learning_rate": 6.260809260520196e-06, "loss": 1.8691, "step": 21950 }, { "epoch": 0.7082238546836025, "grad_norm": 0.416015625, "learning_rate": 6.259530256475928e-06, "loss": 1.8169, "step": 21951 }, { "epoch": 0.7082561185373989, "grad_norm": 0.3671875, "learning_rate": 6.25825134864048e-06, "loss": 1.8448, "step": 21952 }, { "epoch": 0.7082883823911952, "grad_norm": 0.392578125, "learning_rate": 6.256972537027924e-06, "loss": 1.866, "step": 21953 }, { "epoch": 0.7083206462449916, "grad_norm": 0.3828125, "learning_rate": 6.255693821652347e-06, "loss": 1.8687, "step": 21954 }, { "epoch": 0.7083529100987879, "grad_norm": 0.404296875, "learning_rate": 6.25441520252782e-06, "loss": 1.8504, "step": 21955 }, { "epoch": 0.7083851739525843, "grad_norm": 0.388671875, "learning_rate": 6.253136679668411e-06, "loss": 1.892, "step": 21956 }, { "epoch": 0.7084174378063806, "grad_norm": 0.39453125, "learning_rate": 6.251858253088203e-06, "loss": 1.843, "step": 21957 }, { "epoch": 0.708449701660177, "grad_norm": 0.376953125, "learning_rate": 6.250579922801265e-06, "loss": 1.8678, "step": 21958 }, { "epoch": 0.7084819655139732, "grad_norm": 0.353515625, "learning_rate": 6.249301688821659e-06, "loss": 1.8537, "step": 21959 }, { "epoch": 0.7085142293677696, "grad_norm": 0.39453125, "learning_rate": 6.2480235511634694e-06, "loss": 1.848, "step": 21960 }, { "epoch": 0.7085464932215659, "grad_norm": 0.3828125, "learning_rate": 6.246745509840757e-06, "loss": 1.859, "step": 21961 }, { "epoch": 0.7085787570753623, "grad_norm": 0.3671875, "learning_rate": 6.2454675648675865e-06, "loss": 1.8593, "step": 21962 }, { "epoch": 0.7086110209291586, "grad_norm": 0.37890625, "learning_rate": 6.244189716258032e-06, "loss": 1.8488, "step": 21963 }, { "epoch": 0.708643284782955, "grad_norm": 0.4140625, "learning_rate": 6.242911964026158e-06, "loss": 1.8428, "step": 21964 }, { "epoch": 0.7086755486367513, "grad_norm": 0.373046875, "learning_rate": 6.241634308186019e-06, "loss": 1.8122, "step": 21965 }, { "epoch": 0.7087078124905477, "grad_norm": 0.421875, "learning_rate": 6.240356748751692e-06, "loss": 1.8527, "step": 21966 }, { "epoch": 0.708740076344344, "grad_norm": 0.419921875, "learning_rate": 6.239079285737235e-06, "loss": 1.8744, "step": 21967 }, { "epoch": 0.7087723401981404, "grad_norm": 0.3671875, "learning_rate": 6.237801919156703e-06, "loss": 1.8163, "step": 21968 }, { "epoch": 0.7088046040519368, "grad_norm": 0.427734375, "learning_rate": 6.236524649024166e-06, "loss": 1.8571, "step": 21969 }, { "epoch": 0.7088368679057331, "grad_norm": 0.390625, "learning_rate": 6.235247475353679e-06, "loss": 1.848, "step": 21970 }, { "epoch": 0.7088691317595295, "grad_norm": 0.35546875, "learning_rate": 6.233970398159294e-06, "loss": 1.8323, "step": 21971 }, { "epoch": 0.7089013956133258, "grad_norm": 0.5078125, "learning_rate": 6.23269341745508e-06, "loss": 1.82, "step": 21972 }, { "epoch": 0.7089336594671222, "grad_norm": 0.484375, "learning_rate": 6.231416533255087e-06, "loss": 1.8473, "step": 21973 }, { "epoch": 0.7089659233209185, "grad_norm": 0.384765625, "learning_rate": 6.230139745573365e-06, "loss": 1.8295, "step": 21974 }, { "epoch": 0.7089981871747149, "grad_norm": 0.380859375, "learning_rate": 6.22886305442398e-06, "loss": 1.8708, "step": 21975 }, { "epoch": 0.7090304510285111, "grad_norm": 0.45703125, "learning_rate": 6.227586459820976e-06, "loss": 1.8195, "step": 21976 }, { "epoch": 0.7090627148823075, "grad_norm": 0.369140625, "learning_rate": 6.226309961778401e-06, "loss": 1.8772, "step": 21977 }, { "epoch": 0.7090949787361038, "grad_norm": 0.39453125, "learning_rate": 6.22503356031032e-06, "loss": 1.855, "step": 21978 }, { "epoch": 0.7091272425899002, "grad_norm": 0.390625, "learning_rate": 6.223757255430772e-06, "loss": 1.8886, "step": 21979 }, { "epoch": 0.7091595064436965, "grad_norm": 0.369140625, "learning_rate": 6.222481047153804e-06, "loss": 1.8164, "step": 21980 }, { "epoch": 0.7091917702974929, "grad_norm": 0.365234375, "learning_rate": 6.221204935493473e-06, "loss": 1.846, "step": 21981 }, { "epoch": 0.7092240341512892, "grad_norm": 0.37109375, "learning_rate": 6.219928920463819e-06, "loss": 1.8391, "step": 21982 }, { "epoch": 0.7092562980050856, "grad_norm": 0.384765625, "learning_rate": 6.21865300207889e-06, "loss": 1.8698, "step": 21983 }, { "epoch": 0.7092885618588819, "grad_norm": 0.400390625, "learning_rate": 6.217377180352724e-06, "loss": 1.8545, "step": 21984 }, { "epoch": 0.7093208257126783, "grad_norm": 0.361328125, "learning_rate": 6.216101455299373e-06, "loss": 1.831, "step": 21985 }, { "epoch": 0.7093530895664746, "grad_norm": 0.380859375, "learning_rate": 6.214825826932876e-06, "loss": 1.8483, "step": 21986 }, { "epoch": 0.709385353420271, "grad_norm": 0.46875, "learning_rate": 6.213550295267269e-06, "loss": 1.8606, "step": 21987 }, { "epoch": 0.7094176172740673, "grad_norm": 0.369140625, "learning_rate": 6.212274860316603e-06, "loss": 1.8622, "step": 21988 }, { "epoch": 0.7094498811278637, "grad_norm": 0.42578125, "learning_rate": 6.21099952209491e-06, "loss": 1.8504, "step": 21989 }, { "epoch": 0.7094821449816601, "grad_norm": 0.41796875, "learning_rate": 6.209724280616225e-06, "loss": 1.8578, "step": 21990 }, { "epoch": 0.7095144088354564, "grad_norm": 0.388671875, "learning_rate": 6.208449135894594e-06, "loss": 1.8513, "step": 21991 }, { "epoch": 0.7095466726892528, "grad_norm": 0.40234375, "learning_rate": 6.207174087944046e-06, "loss": 1.8079, "step": 21992 }, { "epoch": 0.709578936543049, "grad_norm": 0.47265625, "learning_rate": 6.205899136778614e-06, "loss": 1.817, "step": 21993 }, { "epoch": 0.7096112003968454, "grad_norm": 0.396484375, "learning_rate": 6.204624282412341e-06, "loss": 1.836, "step": 21994 }, { "epoch": 0.7096434642506417, "grad_norm": 0.37109375, "learning_rate": 6.203349524859254e-06, "loss": 1.8247, "step": 21995 }, { "epoch": 0.7096757281044381, "grad_norm": 0.5078125, "learning_rate": 6.202074864133378e-06, "loss": 1.784, "step": 21996 }, { "epoch": 0.7097079919582344, "grad_norm": 0.36328125, "learning_rate": 6.2008003002487564e-06, "loss": 1.8253, "step": 21997 }, { "epoch": 0.7097402558120308, "grad_norm": 0.50390625, "learning_rate": 6.199525833219413e-06, "loss": 1.8589, "step": 21998 }, { "epoch": 0.7097725196658271, "grad_norm": 0.376953125, "learning_rate": 6.1982514630593705e-06, "loss": 1.8287, "step": 21999 }, { "epoch": 0.7098047835196235, "grad_norm": 0.384765625, "learning_rate": 6.196977189782667e-06, "loss": 1.829, "step": 22000 }, { "epoch": 0.7098370473734198, "grad_norm": 0.3828125, "learning_rate": 6.195703013403322e-06, "loss": 1.808, "step": 22001 }, { "epoch": 0.7098693112272162, "grad_norm": 0.37890625, "learning_rate": 6.1944289339353575e-06, "loss": 1.8045, "step": 22002 }, { "epoch": 0.7099015750810125, "grad_norm": 0.36328125, "learning_rate": 6.193154951392809e-06, "loss": 1.8011, "step": 22003 }, { "epoch": 0.7099338389348089, "grad_norm": 0.3671875, "learning_rate": 6.191881065789691e-06, "loss": 1.825, "step": 22004 }, { "epoch": 0.7099661027886052, "grad_norm": 0.404296875, "learning_rate": 6.190607277140023e-06, "loss": 1.8371, "step": 22005 }, { "epoch": 0.7099983666424016, "grad_norm": 0.373046875, "learning_rate": 6.189333585457834e-06, "loss": 1.8573, "step": 22006 }, { "epoch": 0.7100306304961979, "grad_norm": 0.36328125, "learning_rate": 6.188059990757142e-06, "loss": 1.8418, "step": 22007 }, { "epoch": 0.7100628943499943, "grad_norm": 0.416015625, "learning_rate": 6.1867864930519585e-06, "loss": 1.8471, "step": 22008 }, { "epoch": 0.7100951582037905, "grad_norm": 0.396484375, "learning_rate": 6.185513092356311e-06, "loss": 1.823, "step": 22009 }, { "epoch": 0.7101274220575869, "grad_norm": 0.37109375, "learning_rate": 6.184239788684212e-06, "loss": 1.7726, "step": 22010 }, { "epoch": 0.7101596859113833, "grad_norm": 0.3828125, "learning_rate": 6.182966582049672e-06, "loss": 1.8119, "step": 22011 }, { "epoch": 0.7101919497651796, "grad_norm": 0.458984375, "learning_rate": 6.1816934724667165e-06, "loss": 1.8602, "step": 22012 }, { "epoch": 0.710224213618976, "grad_norm": 0.443359375, "learning_rate": 6.180420459949351e-06, "loss": 1.8499, "step": 22013 }, { "epoch": 0.7102564774727723, "grad_norm": 0.498046875, "learning_rate": 6.179147544511592e-06, "loss": 1.8106, "step": 22014 }, { "epoch": 0.7102887413265687, "grad_norm": 0.5390625, "learning_rate": 6.177874726167444e-06, "loss": 1.849, "step": 22015 }, { "epoch": 0.710321005180365, "grad_norm": 0.369140625, "learning_rate": 6.176602004930924e-06, "loss": 1.8563, "step": 22016 }, { "epoch": 0.7103532690341614, "grad_norm": 0.40234375, "learning_rate": 6.175329380816044e-06, "loss": 1.8106, "step": 22017 }, { "epoch": 0.7103855328879577, "grad_norm": 0.419921875, "learning_rate": 6.174056853836798e-06, "loss": 1.8195, "step": 22018 }, { "epoch": 0.7104177967417541, "grad_norm": 0.380859375, "learning_rate": 6.17278442400721e-06, "loss": 1.8155, "step": 22019 }, { "epoch": 0.7104500605955504, "grad_norm": 0.357421875, "learning_rate": 6.171512091341276e-06, "loss": 1.8274, "step": 22020 }, { "epoch": 0.7104823244493468, "grad_norm": 0.384765625, "learning_rate": 6.170239855853001e-06, "loss": 1.8271, "step": 22021 }, { "epoch": 0.7105145883031431, "grad_norm": 0.40625, "learning_rate": 6.168967717556395e-06, "loss": 1.8326, "step": 22022 }, { "epoch": 0.7105468521569395, "grad_norm": 0.375, "learning_rate": 6.1676956764654576e-06, "loss": 1.8488, "step": 22023 }, { "epoch": 0.7105791160107358, "grad_norm": 0.427734375, "learning_rate": 6.166423732594183e-06, "loss": 1.8601, "step": 22024 }, { "epoch": 0.7106113798645322, "grad_norm": 0.416015625, "learning_rate": 6.165151885956586e-06, "loss": 1.8599, "step": 22025 }, { "epoch": 0.7106436437183284, "grad_norm": 0.392578125, "learning_rate": 6.163880136566659e-06, "loss": 1.8744, "step": 22026 }, { "epoch": 0.7106759075721248, "grad_norm": 0.373046875, "learning_rate": 6.162608484438394e-06, "loss": 1.8759, "step": 22027 }, { "epoch": 0.7107081714259211, "grad_norm": 0.390625, "learning_rate": 6.161336929585801e-06, "loss": 1.8678, "step": 22028 }, { "epoch": 0.7107404352797175, "grad_norm": 0.37890625, "learning_rate": 6.16006547202287e-06, "loss": 1.8448, "step": 22029 }, { "epoch": 0.7107726991335139, "grad_norm": 0.373046875, "learning_rate": 6.15879411176359e-06, "loss": 1.8478, "step": 22030 }, { "epoch": 0.7108049629873102, "grad_norm": 0.416015625, "learning_rate": 6.157522848821969e-06, "loss": 1.8631, "step": 22031 }, { "epoch": 0.7108372268411066, "grad_norm": 0.3671875, "learning_rate": 6.156251683211991e-06, "loss": 1.8317, "step": 22032 }, { "epoch": 0.7108694906949029, "grad_norm": 0.359375, "learning_rate": 6.154980614947647e-06, "loss": 1.8276, "step": 22033 }, { "epoch": 0.7109017545486993, "grad_norm": 0.3828125, "learning_rate": 6.153709644042936e-06, "loss": 1.8248, "step": 22034 }, { "epoch": 0.7109340184024956, "grad_norm": 0.38671875, "learning_rate": 6.15243877051184e-06, "loss": 1.8676, "step": 22035 }, { "epoch": 0.710966282256292, "grad_norm": 0.361328125, "learning_rate": 6.151167994368349e-06, "loss": 1.856, "step": 22036 }, { "epoch": 0.7109985461100883, "grad_norm": 0.376953125, "learning_rate": 6.149897315626456e-06, "loss": 1.8234, "step": 22037 }, { "epoch": 0.7110308099638847, "grad_norm": 0.4765625, "learning_rate": 6.148626734300145e-06, "loss": 1.8595, "step": 22038 }, { "epoch": 0.711063073817681, "grad_norm": 0.388671875, "learning_rate": 6.1473562504033945e-06, "loss": 1.8348, "step": 22039 }, { "epoch": 0.7110953376714774, "grad_norm": 0.4609375, "learning_rate": 6.146085863950203e-06, "loss": 1.8284, "step": 22040 }, { "epoch": 0.7111276015252737, "grad_norm": 0.396484375, "learning_rate": 6.144815574954544e-06, "loss": 1.8476, "step": 22041 }, { "epoch": 0.71115986537907, "grad_norm": 0.36328125, "learning_rate": 6.1435453834304e-06, "loss": 1.8604, "step": 22042 }, { "epoch": 0.7111921292328663, "grad_norm": 0.431640625, "learning_rate": 6.142275289391757e-06, "loss": 1.8651, "step": 22043 }, { "epoch": 0.7112243930866627, "grad_norm": 0.380859375, "learning_rate": 6.1410052928525966e-06, "loss": 1.8285, "step": 22044 }, { "epoch": 0.711256656940459, "grad_norm": 0.375, "learning_rate": 6.139735393826892e-06, "loss": 1.8884, "step": 22045 }, { "epoch": 0.7112889207942554, "grad_norm": 0.423828125, "learning_rate": 6.138465592328618e-06, "loss": 1.8697, "step": 22046 }, { "epoch": 0.7113211846480517, "grad_norm": 0.396484375, "learning_rate": 6.137195888371765e-06, "loss": 1.8369, "step": 22047 }, { "epoch": 0.7113534485018481, "grad_norm": 0.37890625, "learning_rate": 6.1359262819703e-06, "loss": 1.825, "step": 22048 }, { "epoch": 0.7113857123556444, "grad_norm": 0.380859375, "learning_rate": 6.134656773138196e-06, "loss": 1.8543, "step": 22049 }, { "epoch": 0.7114179762094408, "grad_norm": 0.37890625, "learning_rate": 6.1333873618894335e-06, "loss": 1.8385, "step": 22050 }, { "epoch": 0.7114502400632372, "grad_norm": 0.361328125, "learning_rate": 6.132118048237983e-06, "loss": 1.8735, "step": 22051 }, { "epoch": 0.7114825039170335, "grad_norm": 0.353515625, "learning_rate": 6.130848832197808e-06, "loss": 1.8501, "step": 22052 }, { "epoch": 0.7115147677708299, "grad_norm": 0.380859375, "learning_rate": 6.129579713782894e-06, "loss": 1.8805, "step": 22053 }, { "epoch": 0.7115470316246262, "grad_norm": 0.373046875, "learning_rate": 6.128310693007203e-06, "loss": 1.8584, "step": 22054 }, { "epoch": 0.7115792954784226, "grad_norm": 0.37109375, "learning_rate": 6.127041769884697e-06, "loss": 1.8548, "step": 22055 }, { "epoch": 0.7116115593322189, "grad_norm": 0.3828125, "learning_rate": 6.125772944429355e-06, "loss": 1.898, "step": 22056 }, { "epoch": 0.7116438231860153, "grad_norm": 0.38671875, "learning_rate": 6.124504216655138e-06, "loss": 1.8597, "step": 22057 }, { "epoch": 0.7116760870398116, "grad_norm": 0.361328125, "learning_rate": 6.123235586576007e-06, "loss": 1.9129, "step": 22058 }, { "epoch": 0.711708350893608, "grad_norm": 0.376953125, "learning_rate": 6.121967054205936e-06, "loss": 1.9168, "step": 22059 }, { "epoch": 0.7117406147474042, "grad_norm": 0.373046875, "learning_rate": 6.120698619558882e-06, "loss": 1.8908, "step": 22060 }, { "epoch": 0.7117728786012006, "grad_norm": 0.365234375, "learning_rate": 6.119430282648802e-06, "loss": 1.9016, "step": 22061 }, { "epoch": 0.7118051424549969, "grad_norm": 0.375, "learning_rate": 6.118162043489667e-06, "loss": 1.8504, "step": 22062 }, { "epoch": 0.7118374063087933, "grad_norm": 0.35546875, "learning_rate": 6.116893902095434e-06, "loss": 1.8825, "step": 22063 }, { "epoch": 0.7118696701625896, "grad_norm": 0.37109375, "learning_rate": 6.115625858480054e-06, "loss": 1.916, "step": 22064 }, { "epoch": 0.711901934016386, "grad_norm": 0.36328125, "learning_rate": 6.114357912657503e-06, "loss": 1.8679, "step": 22065 }, { "epoch": 0.7119341978701823, "grad_norm": 0.400390625, "learning_rate": 6.1130900646417125e-06, "loss": 1.896, "step": 22066 }, { "epoch": 0.7119664617239787, "grad_norm": 0.37109375, "learning_rate": 6.111822314446654e-06, "loss": 1.9232, "step": 22067 }, { "epoch": 0.711998725577775, "grad_norm": 0.38671875, "learning_rate": 6.110554662086283e-06, "loss": 1.8775, "step": 22068 }, { "epoch": 0.7120309894315714, "grad_norm": 0.37109375, "learning_rate": 6.109287107574548e-06, "loss": 1.8946, "step": 22069 }, { "epoch": 0.7120632532853678, "grad_norm": 0.357421875, "learning_rate": 6.1080196509253985e-06, "loss": 1.9279, "step": 22070 }, { "epoch": 0.7120955171391641, "grad_norm": 0.369140625, "learning_rate": 6.1067522921527944e-06, "loss": 1.9009, "step": 22071 }, { "epoch": 0.7121277809929605, "grad_norm": 0.36328125, "learning_rate": 6.105485031270681e-06, "loss": 1.8922, "step": 22072 }, { "epoch": 0.7121600448467568, "grad_norm": 0.369140625, "learning_rate": 6.104217868293007e-06, "loss": 1.8999, "step": 22073 }, { "epoch": 0.7121923087005532, "grad_norm": 0.359375, "learning_rate": 6.102950803233717e-06, "loss": 1.9207, "step": 22074 }, { "epoch": 0.7122245725543495, "grad_norm": 0.431640625, "learning_rate": 6.101683836106765e-06, "loss": 1.8875, "step": 22075 }, { "epoch": 0.7122568364081459, "grad_norm": 0.373046875, "learning_rate": 6.100416966926096e-06, "loss": 1.896, "step": 22076 }, { "epoch": 0.7122891002619421, "grad_norm": 0.359375, "learning_rate": 6.099150195705645e-06, "loss": 1.8898, "step": 22077 }, { "epoch": 0.7123213641157385, "grad_norm": 0.45703125, "learning_rate": 6.09788352245937e-06, "loss": 1.8613, "step": 22078 }, { "epoch": 0.7123536279695348, "grad_norm": 0.361328125, "learning_rate": 6.0966169472012076e-06, "loss": 1.8797, "step": 22079 }, { "epoch": 0.7123858918233312, "grad_norm": 0.36328125, "learning_rate": 6.095350469945092e-06, "loss": 1.8808, "step": 22080 }, { "epoch": 0.7124181556771275, "grad_norm": 0.357421875, "learning_rate": 6.094084090704976e-06, "loss": 1.89, "step": 22081 }, { "epoch": 0.7124504195309239, "grad_norm": 0.38671875, "learning_rate": 6.092817809494793e-06, "loss": 1.8855, "step": 22082 }, { "epoch": 0.7124826833847202, "grad_norm": 0.4140625, "learning_rate": 6.091551626328476e-06, "loss": 1.895, "step": 22083 }, { "epoch": 0.7125149472385166, "grad_norm": 0.36328125, "learning_rate": 6.090285541219973e-06, "loss": 1.8885, "step": 22084 }, { "epoch": 0.7125472110923129, "grad_norm": 0.427734375, "learning_rate": 6.089019554183215e-06, "loss": 1.8987, "step": 22085 }, { "epoch": 0.7125794749461093, "grad_norm": 0.361328125, "learning_rate": 6.087753665232132e-06, "loss": 1.8721, "step": 22086 }, { "epoch": 0.7126117387999056, "grad_norm": 0.38671875, "learning_rate": 6.086487874380669e-06, "loss": 1.9254, "step": 22087 }, { "epoch": 0.712644002653702, "grad_norm": 0.384765625, "learning_rate": 6.085222181642752e-06, "loss": 1.8928, "step": 22088 }, { "epoch": 0.7126762665074983, "grad_norm": 0.361328125, "learning_rate": 6.083956587032309e-06, "loss": 1.8533, "step": 22089 }, { "epoch": 0.7127085303612947, "grad_norm": 0.359375, "learning_rate": 6.082691090563281e-06, "loss": 1.8934, "step": 22090 }, { "epoch": 0.7127407942150911, "grad_norm": 0.37890625, "learning_rate": 6.081425692249592e-06, "loss": 1.8909, "step": 22091 }, { "epoch": 0.7127730580688874, "grad_norm": 0.37109375, "learning_rate": 6.080160392105166e-06, "loss": 1.8797, "step": 22092 }, { "epoch": 0.7128053219226838, "grad_norm": 0.35546875, "learning_rate": 6.07889519014394e-06, "loss": 1.8904, "step": 22093 }, { "epoch": 0.71283758577648, "grad_norm": 0.37109375, "learning_rate": 6.077630086379835e-06, "loss": 1.8762, "step": 22094 }, { "epoch": 0.7128698496302764, "grad_norm": 0.373046875, "learning_rate": 6.076365080826773e-06, "loss": 1.8739, "step": 22095 }, { "epoch": 0.7129021134840727, "grad_norm": 0.357421875, "learning_rate": 6.075100173498693e-06, "loss": 1.8758, "step": 22096 }, { "epoch": 0.7129343773378691, "grad_norm": 0.34765625, "learning_rate": 6.0738353644094965e-06, "loss": 1.8881, "step": 22097 }, { "epoch": 0.7129666411916654, "grad_norm": 0.365234375, "learning_rate": 6.072570653573116e-06, "loss": 1.9244, "step": 22098 }, { "epoch": 0.7129989050454618, "grad_norm": 0.35546875, "learning_rate": 6.0713060410034855e-06, "loss": 1.8911, "step": 22099 }, { "epoch": 0.7130311688992581, "grad_norm": 0.361328125, "learning_rate": 6.0700415267145015e-06, "loss": 1.9019, "step": 22100 }, { "epoch": 0.7130634327530545, "grad_norm": 0.359375, "learning_rate": 6.068777110720094e-06, "loss": 1.922, "step": 22101 }, { "epoch": 0.7130956966068508, "grad_norm": 0.35546875, "learning_rate": 6.0675127930341855e-06, "loss": 1.8372, "step": 22102 }, { "epoch": 0.7131279604606472, "grad_norm": 0.359375, "learning_rate": 6.066248573670689e-06, "loss": 1.8863, "step": 22103 }, { "epoch": 0.7131602243144435, "grad_norm": 0.35546875, "learning_rate": 6.0649844526435175e-06, "loss": 1.8945, "step": 22104 }, { "epoch": 0.7131924881682399, "grad_norm": 0.376953125, "learning_rate": 6.0637204299665836e-06, "loss": 1.8736, "step": 22105 }, { "epoch": 0.7132247520220362, "grad_norm": 0.388671875, "learning_rate": 6.062456505653809e-06, "loss": 1.847, "step": 22106 }, { "epoch": 0.7132570158758326, "grad_norm": 0.37109375, "learning_rate": 6.061192679719102e-06, "loss": 1.8666, "step": 22107 }, { "epoch": 0.7132892797296289, "grad_norm": 0.4375, "learning_rate": 6.059928952176368e-06, "loss": 1.857, "step": 22108 }, { "epoch": 0.7133215435834253, "grad_norm": 0.373046875, "learning_rate": 6.0586653230395274e-06, "loss": 1.8636, "step": 22109 }, { "epoch": 0.7133538074372215, "grad_norm": 0.369140625, "learning_rate": 6.057401792322484e-06, "loss": 1.8524, "step": 22110 }, { "epoch": 0.7133860712910179, "grad_norm": 0.359375, "learning_rate": 6.056138360039142e-06, "loss": 1.81, "step": 22111 }, { "epoch": 0.7134183351448143, "grad_norm": 0.373046875, "learning_rate": 6.054875026203417e-06, "loss": 1.8473, "step": 22112 }, { "epoch": 0.7134505989986106, "grad_norm": 0.37890625, "learning_rate": 6.05361179082921e-06, "loss": 1.8666, "step": 22113 }, { "epoch": 0.713482862852407, "grad_norm": 0.365234375, "learning_rate": 6.052348653930423e-06, "loss": 1.8545, "step": 22114 }, { "epoch": 0.7135151267062033, "grad_norm": 0.3671875, "learning_rate": 6.051085615520966e-06, "loss": 1.8506, "step": 22115 }, { "epoch": 0.7135473905599997, "grad_norm": 0.404296875, "learning_rate": 6.0498226756147395e-06, "loss": 1.8856, "step": 22116 }, { "epoch": 0.713579654413796, "grad_norm": 0.365234375, "learning_rate": 6.048559834225639e-06, "loss": 1.8578, "step": 22117 }, { "epoch": 0.7136119182675924, "grad_norm": 0.361328125, "learning_rate": 6.047297091367575e-06, "loss": 1.8587, "step": 22118 }, { "epoch": 0.7136441821213887, "grad_norm": 0.396484375, "learning_rate": 6.046034447054444e-06, "loss": 1.865, "step": 22119 }, { "epoch": 0.7136764459751851, "grad_norm": 0.3515625, "learning_rate": 6.044771901300136e-06, "loss": 1.8406, "step": 22120 }, { "epoch": 0.7137087098289814, "grad_norm": 0.376953125, "learning_rate": 6.043509454118559e-06, "loss": 1.8625, "step": 22121 }, { "epoch": 0.7137409736827778, "grad_norm": 0.359375, "learning_rate": 6.042247105523604e-06, "loss": 1.878, "step": 22122 }, { "epoch": 0.7137732375365741, "grad_norm": 0.365234375, "learning_rate": 6.040984855529162e-06, "loss": 1.8614, "step": 22123 }, { "epoch": 0.7138055013903705, "grad_norm": 0.369140625, "learning_rate": 6.039722704149143e-06, "loss": 1.8329, "step": 22124 }, { "epoch": 0.7138377652441668, "grad_norm": 0.359375, "learning_rate": 6.038460651397416e-06, "loss": 1.8369, "step": 22125 }, { "epoch": 0.7138700290979632, "grad_norm": 0.353515625, "learning_rate": 6.037198697287886e-06, "loss": 1.8518, "step": 22126 }, { "epoch": 0.7139022929517594, "grad_norm": 0.369140625, "learning_rate": 6.035936841834454e-06, "loss": 1.8091, "step": 22127 }, { "epoch": 0.7139345568055558, "grad_norm": 0.375, "learning_rate": 6.034675085050987e-06, "loss": 1.8069, "step": 22128 }, { "epoch": 0.7139668206593521, "grad_norm": 0.359375, "learning_rate": 6.0334134269513865e-06, "loss": 1.8403, "step": 22129 }, { "epoch": 0.7139990845131485, "grad_norm": 0.375, "learning_rate": 6.032151867549548e-06, "loss": 1.8442, "step": 22130 }, { "epoch": 0.7140313483669449, "grad_norm": 0.361328125, "learning_rate": 6.030890406859338e-06, "loss": 1.87, "step": 22131 }, { "epoch": 0.7140636122207412, "grad_norm": 0.373046875, "learning_rate": 6.02962904489465e-06, "loss": 1.8248, "step": 22132 }, { "epoch": 0.7140958760745376, "grad_norm": 0.37890625, "learning_rate": 6.0283677816693835e-06, "loss": 1.843, "step": 22133 }, { "epoch": 0.7141281399283339, "grad_norm": 0.369140625, "learning_rate": 6.027106617197397e-06, "loss": 1.8515, "step": 22134 }, { "epoch": 0.7141604037821303, "grad_norm": 0.35546875, "learning_rate": 6.0258455514925875e-06, "loss": 1.8495, "step": 22135 }, { "epoch": 0.7141926676359266, "grad_norm": 0.37890625, "learning_rate": 6.024584584568828e-06, "loss": 1.8371, "step": 22136 }, { "epoch": 0.714224931489723, "grad_norm": 0.400390625, "learning_rate": 6.023323716440007e-06, "loss": 1.818, "step": 22137 }, { "epoch": 0.7142571953435193, "grad_norm": 0.400390625, "learning_rate": 6.02206294712e-06, "loss": 1.8367, "step": 22138 }, { "epoch": 0.7142894591973157, "grad_norm": 0.376953125, "learning_rate": 6.020802276622677e-06, "loss": 1.8284, "step": 22139 }, { "epoch": 0.714321723051112, "grad_norm": 0.3828125, "learning_rate": 6.0195417049619256e-06, "loss": 1.8194, "step": 22140 }, { "epoch": 0.7143539869049084, "grad_norm": 0.423828125, "learning_rate": 6.018281232151618e-06, "loss": 1.8276, "step": 22141 }, { "epoch": 0.7143862507587047, "grad_norm": 0.3828125, "learning_rate": 6.017020858205622e-06, "loss": 1.8038, "step": 22142 }, { "epoch": 0.714418514612501, "grad_norm": 0.44140625, "learning_rate": 6.01576058313782e-06, "loss": 1.8319, "step": 22143 }, { "epoch": 0.7144507784662973, "grad_norm": 0.357421875, "learning_rate": 6.014500406962081e-06, "loss": 1.7784, "step": 22144 }, { "epoch": 0.7144830423200937, "grad_norm": 0.369140625, "learning_rate": 6.013240329692269e-06, "loss": 1.8415, "step": 22145 }, { "epoch": 0.71451530617389, "grad_norm": 0.390625, "learning_rate": 6.011980351342268e-06, "loss": 1.8191, "step": 22146 }, { "epoch": 0.7145475700276864, "grad_norm": 0.373046875, "learning_rate": 6.010720471925937e-06, "loss": 1.839, "step": 22147 }, { "epoch": 0.7145798338814827, "grad_norm": 0.375, "learning_rate": 6.0094606914571425e-06, "loss": 1.7883, "step": 22148 }, { "epoch": 0.7146120977352791, "grad_norm": 0.380859375, "learning_rate": 6.0082010099497595e-06, "loss": 1.8053, "step": 22149 }, { "epoch": 0.7146443615890754, "grad_norm": 0.380859375, "learning_rate": 6.006941427417649e-06, "loss": 1.8186, "step": 22150 }, { "epoch": 0.7146766254428718, "grad_norm": 0.376953125, "learning_rate": 6.005681943874669e-06, "loss": 1.8322, "step": 22151 }, { "epoch": 0.7147088892966682, "grad_norm": 0.42578125, "learning_rate": 6.004422559334698e-06, "loss": 1.7978, "step": 22152 }, { "epoch": 0.7147411531504645, "grad_norm": 0.3828125, "learning_rate": 6.003163273811588e-06, "loss": 1.7984, "step": 22153 }, { "epoch": 0.7147734170042609, "grad_norm": 0.365234375, "learning_rate": 6.001904087319198e-06, "loss": 1.8198, "step": 22154 }, { "epoch": 0.7148056808580572, "grad_norm": 0.400390625, "learning_rate": 6.000644999871402e-06, "loss": 1.8299, "step": 22155 }, { "epoch": 0.7148379447118536, "grad_norm": 0.388671875, "learning_rate": 5.999386011482041e-06, "loss": 1.867, "step": 22156 }, { "epoch": 0.7148702085656499, "grad_norm": 0.3828125, "learning_rate": 5.99812712216498e-06, "loss": 1.8325, "step": 22157 }, { "epoch": 0.7149024724194463, "grad_norm": 0.3828125, "learning_rate": 5.996868331934089e-06, "loss": 1.8342, "step": 22158 }, { "epoch": 0.7149347362732426, "grad_norm": 0.36328125, "learning_rate": 5.9956096408032024e-06, "loss": 1.8262, "step": 22159 }, { "epoch": 0.714967000127039, "grad_norm": 0.369140625, "learning_rate": 5.994351048786185e-06, "loss": 1.8163, "step": 22160 }, { "epoch": 0.7149992639808352, "grad_norm": 0.396484375, "learning_rate": 5.9930925558969e-06, "loss": 1.8266, "step": 22161 }, { "epoch": 0.7150315278346316, "grad_norm": 0.35546875, "learning_rate": 5.991834162149181e-06, "loss": 1.8552, "step": 22162 }, { "epoch": 0.7150637916884279, "grad_norm": 0.388671875, "learning_rate": 5.99057586755689e-06, "loss": 1.825, "step": 22163 }, { "epoch": 0.7150960555422243, "grad_norm": 0.37890625, "learning_rate": 5.989317672133885e-06, "loss": 1.8038, "step": 22164 }, { "epoch": 0.7151283193960206, "grad_norm": 0.404296875, "learning_rate": 5.988059575893998e-06, "loss": 1.7995, "step": 22165 }, { "epoch": 0.715160583249817, "grad_norm": 0.37890625, "learning_rate": 5.986801578851091e-06, "loss": 1.7888, "step": 22166 }, { "epoch": 0.7151928471036133, "grad_norm": 0.40234375, "learning_rate": 5.985543681019001e-06, "loss": 1.8082, "step": 22167 }, { "epoch": 0.7152251109574097, "grad_norm": 0.3671875, "learning_rate": 5.984285882411584e-06, "loss": 1.7798, "step": 22168 }, { "epoch": 0.715257374811206, "grad_norm": 0.380859375, "learning_rate": 5.98302818304268e-06, "loss": 1.8235, "step": 22169 }, { "epoch": 0.7152896386650024, "grad_norm": 0.369140625, "learning_rate": 5.9817705829261275e-06, "loss": 1.8048, "step": 22170 }, { "epoch": 0.7153219025187988, "grad_norm": 0.373046875, "learning_rate": 5.980513082075782e-06, "loss": 1.7996, "step": 22171 }, { "epoch": 0.7153541663725951, "grad_norm": 0.44921875, "learning_rate": 5.979255680505475e-06, "loss": 1.8064, "step": 22172 }, { "epoch": 0.7153864302263915, "grad_norm": 0.40234375, "learning_rate": 5.977998378229047e-06, "loss": 1.827, "step": 22173 }, { "epoch": 0.7154186940801878, "grad_norm": 0.3828125, "learning_rate": 5.976741175260346e-06, "loss": 1.8862, "step": 22174 }, { "epoch": 0.7154509579339842, "grad_norm": 0.3984375, "learning_rate": 5.975484071613204e-06, "loss": 1.8489, "step": 22175 }, { "epoch": 0.7154832217877805, "grad_norm": 0.388671875, "learning_rate": 5.9742270673014535e-06, "loss": 1.838, "step": 22176 }, { "epoch": 0.7155154856415769, "grad_norm": 0.38671875, "learning_rate": 5.972970162338941e-06, "loss": 1.8724, "step": 22177 }, { "epoch": 0.7155477494953731, "grad_norm": 0.373046875, "learning_rate": 5.9717133567394976e-06, "loss": 1.8467, "step": 22178 }, { "epoch": 0.7155800133491695, "grad_norm": 0.361328125, "learning_rate": 5.970456650516953e-06, "loss": 1.8125, "step": 22179 }, { "epoch": 0.7156122772029658, "grad_norm": 0.396484375, "learning_rate": 5.969200043685147e-06, "loss": 1.8008, "step": 22180 }, { "epoch": 0.7156445410567622, "grad_norm": 0.373046875, "learning_rate": 5.96794353625791e-06, "loss": 1.805, "step": 22181 }, { "epoch": 0.7156768049105585, "grad_norm": 0.37890625, "learning_rate": 5.966687128249066e-06, "loss": 1.8272, "step": 22182 }, { "epoch": 0.7157090687643549, "grad_norm": 0.36328125, "learning_rate": 5.9654308196724524e-06, "loss": 1.8581, "step": 22183 }, { "epoch": 0.7157413326181512, "grad_norm": 0.390625, "learning_rate": 5.9641746105418965e-06, "loss": 1.8174, "step": 22184 }, { "epoch": 0.7157735964719476, "grad_norm": 0.37109375, "learning_rate": 5.962918500871219e-06, "loss": 1.8032, "step": 22185 }, { "epoch": 0.7158058603257439, "grad_norm": 0.369140625, "learning_rate": 5.961662490674262e-06, "loss": 1.8253, "step": 22186 }, { "epoch": 0.7158381241795403, "grad_norm": 0.388671875, "learning_rate": 5.960406579964829e-06, "loss": 1.8399, "step": 22187 }, { "epoch": 0.7158703880333366, "grad_norm": 0.36328125, "learning_rate": 5.959150768756756e-06, "loss": 1.8605, "step": 22188 }, { "epoch": 0.715902651887133, "grad_norm": 0.3828125, "learning_rate": 5.957895057063876e-06, "loss": 1.8167, "step": 22189 }, { "epoch": 0.7159349157409293, "grad_norm": 0.359375, "learning_rate": 5.956639444899989e-06, "loss": 1.8107, "step": 22190 }, { "epoch": 0.7159671795947257, "grad_norm": 0.384765625, "learning_rate": 5.9553839322789275e-06, "loss": 1.8347, "step": 22191 }, { "epoch": 0.7159994434485221, "grad_norm": 0.376953125, "learning_rate": 5.954128519214523e-06, "loss": 1.8509, "step": 22192 }, { "epoch": 0.7160317073023184, "grad_norm": 0.365234375, "learning_rate": 5.952873205720571e-06, "loss": 1.8148, "step": 22193 }, { "epoch": 0.7160639711561148, "grad_norm": 0.390625, "learning_rate": 5.951617991810901e-06, "loss": 1.7833, "step": 22194 }, { "epoch": 0.716096235009911, "grad_norm": 0.4296875, "learning_rate": 5.950362877499338e-06, "loss": 1.8321, "step": 22195 }, { "epoch": 0.7161284988637074, "grad_norm": 0.376953125, "learning_rate": 5.949107862799678e-06, "loss": 1.817, "step": 22196 }, { "epoch": 0.7161607627175037, "grad_norm": 0.4375, "learning_rate": 5.947852947725752e-06, "loss": 1.8406, "step": 22197 }, { "epoch": 0.7161930265713001, "grad_norm": 0.404296875, "learning_rate": 5.946598132291366e-06, "loss": 1.8298, "step": 22198 }, { "epoch": 0.7162252904250964, "grad_norm": 0.421875, "learning_rate": 5.945343416510327e-06, "loss": 1.8121, "step": 22199 }, { "epoch": 0.7162575542788928, "grad_norm": 0.416015625, "learning_rate": 5.944088800396458e-06, "loss": 1.8232, "step": 22200 }, { "epoch": 0.7162898181326891, "grad_norm": 0.365234375, "learning_rate": 5.9428342839635585e-06, "loss": 1.8568, "step": 22201 }, { "epoch": 0.7163220819864855, "grad_norm": 0.57421875, "learning_rate": 5.941579867225445e-06, "loss": 1.7935, "step": 22202 }, { "epoch": 0.7163543458402818, "grad_norm": 0.384765625, "learning_rate": 5.940325550195923e-06, "loss": 1.8213, "step": 22203 }, { "epoch": 0.7163866096940782, "grad_norm": 0.40234375, "learning_rate": 5.939071332888791e-06, "loss": 1.8083, "step": 22204 }, { "epoch": 0.7164188735478745, "grad_norm": 0.359375, "learning_rate": 5.937817215317869e-06, "loss": 1.8364, "step": 22205 }, { "epoch": 0.7164511374016709, "grad_norm": 0.37109375, "learning_rate": 5.936563197496953e-06, "loss": 1.8443, "step": 22206 }, { "epoch": 0.7164834012554672, "grad_norm": 0.365234375, "learning_rate": 5.935309279439841e-06, "loss": 1.8156, "step": 22207 }, { "epoch": 0.7165156651092636, "grad_norm": 0.380859375, "learning_rate": 5.934055461160349e-06, "loss": 1.8855, "step": 22208 }, { "epoch": 0.7165479289630599, "grad_norm": 0.40625, "learning_rate": 5.932801742672268e-06, "loss": 1.8508, "step": 22209 }, { "epoch": 0.7165801928168563, "grad_norm": 0.3671875, "learning_rate": 5.931548123989398e-06, "loss": 1.854, "step": 22210 }, { "epoch": 0.7166124566706525, "grad_norm": 0.37890625, "learning_rate": 5.930294605125543e-06, "loss": 1.8322, "step": 22211 }, { "epoch": 0.7166447205244489, "grad_norm": 0.375, "learning_rate": 5.929041186094501e-06, "loss": 1.7932, "step": 22212 }, { "epoch": 0.7166769843782453, "grad_norm": 0.376953125, "learning_rate": 5.927787866910059e-06, "loss": 1.8189, "step": 22213 }, { "epoch": 0.7167092482320416, "grad_norm": 0.384765625, "learning_rate": 5.926534647586026e-06, "loss": 1.7942, "step": 22214 }, { "epoch": 0.716741512085838, "grad_norm": 0.3671875, "learning_rate": 5.925281528136191e-06, "loss": 1.8238, "step": 22215 }, { "epoch": 0.7167737759396343, "grad_norm": 0.384765625, "learning_rate": 5.92402850857434e-06, "loss": 1.8479, "step": 22216 }, { "epoch": 0.7168060397934307, "grad_norm": 0.357421875, "learning_rate": 5.922775588914283e-06, "loss": 1.8155, "step": 22217 }, { "epoch": 0.716838303647227, "grad_norm": 0.396484375, "learning_rate": 5.92152276916979e-06, "loss": 1.8262, "step": 22218 }, { "epoch": 0.7168705675010234, "grad_norm": 0.369140625, "learning_rate": 5.920270049354663e-06, "loss": 1.8308, "step": 22219 }, { "epoch": 0.7169028313548197, "grad_norm": 0.36328125, "learning_rate": 5.919017429482698e-06, "loss": 1.7962, "step": 22220 }, { "epoch": 0.7169350952086161, "grad_norm": 0.404296875, "learning_rate": 5.917764909567664e-06, "loss": 1.8271, "step": 22221 }, { "epoch": 0.7169673590624124, "grad_norm": 0.375, "learning_rate": 5.9165124896233586e-06, "loss": 1.8791, "step": 22222 }, { "epoch": 0.7169996229162088, "grad_norm": 0.359375, "learning_rate": 5.915260169663578e-06, "loss": 1.8022, "step": 22223 }, { "epoch": 0.7170318867700051, "grad_norm": 0.37109375, "learning_rate": 5.9140079497020865e-06, "loss": 1.8554, "step": 22224 }, { "epoch": 0.7170641506238015, "grad_norm": 0.373046875, "learning_rate": 5.91275582975268e-06, "loss": 1.8279, "step": 22225 }, { "epoch": 0.7170964144775978, "grad_norm": 0.376953125, "learning_rate": 5.911503809829141e-06, "loss": 1.8357, "step": 22226 }, { "epoch": 0.7171286783313942, "grad_norm": 0.38671875, "learning_rate": 5.910251889945241e-06, "loss": 1.831, "step": 22227 }, { "epoch": 0.7171609421851904, "grad_norm": 0.365234375, "learning_rate": 5.909000070114773e-06, "loss": 1.874, "step": 22228 }, { "epoch": 0.7171932060389868, "grad_norm": 0.416015625, "learning_rate": 5.9077483503515115e-06, "loss": 1.8312, "step": 22229 }, { "epoch": 0.7172254698927831, "grad_norm": 0.392578125, "learning_rate": 5.906496730669226e-06, "loss": 1.8242, "step": 22230 }, { "epoch": 0.7172577337465795, "grad_norm": 0.376953125, "learning_rate": 5.905245211081707e-06, "loss": 1.818, "step": 22231 }, { "epoch": 0.7172899976003759, "grad_norm": 0.46875, "learning_rate": 5.903993791602725e-06, "loss": 1.7892, "step": 22232 }, { "epoch": 0.7173222614541722, "grad_norm": 0.38671875, "learning_rate": 5.902742472246048e-06, "loss": 1.8301, "step": 22233 }, { "epoch": 0.7173545253079686, "grad_norm": 0.384765625, "learning_rate": 5.90149125302546e-06, "loss": 1.8381, "step": 22234 }, { "epoch": 0.7173867891617649, "grad_norm": 0.51171875, "learning_rate": 5.900240133954724e-06, "loss": 1.7985, "step": 22235 }, { "epoch": 0.7174190530155613, "grad_norm": 0.376953125, "learning_rate": 5.898989115047622e-06, "loss": 1.8448, "step": 22236 }, { "epoch": 0.7174513168693576, "grad_norm": 0.44140625, "learning_rate": 5.89773819631792e-06, "loss": 1.8361, "step": 22237 }, { "epoch": 0.717483580723154, "grad_norm": 0.44921875, "learning_rate": 5.89648737777938e-06, "loss": 1.8647, "step": 22238 }, { "epoch": 0.7175158445769503, "grad_norm": 0.3828125, "learning_rate": 5.895236659445781e-06, "loss": 1.824, "step": 22239 }, { "epoch": 0.7175481084307467, "grad_norm": 0.44140625, "learning_rate": 5.893986041330885e-06, "loss": 1.8281, "step": 22240 }, { "epoch": 0.717580372284543, "grad_norm": 0.431640625, "learning_rate": 5.892735523448453e-06, "loss": 1.8353, "step": 22241 }, { "epoch": 0.7176126361383394, "grad_norm": 0.39453125, "learning_rate": 5.89148510581226e-06, "loss": 1.838, "step": 22242 }, { "epoch": 0.7176448999921357, "grad_norm": 0.388671875, "learning_rate": 5.890234788436064e-06, "loss": 1.8264, "step": 22243 }, { "epoch": 0.717677163845932, "grad_norm": 0.41015625, "learning_rate": 5.888984571333623e-06, "loss": 1.8365, "step": 22244 }, { "epoch": 0.7177094276997283, "grad_norm": 0.369140625, "learning_rate": 5.8877344545187105e-06, "loss": 1.8537, "step": 22245 }, { "epoch": 0.7177416915535247, "grad_norm": 0.37890625, "learning_rate": 5.886484438005079e-06, "loss": 1.8412, "step": 22246 }, { "epoch": 0.717773955407321, "grad_norm": 0.384765625, "learning_rate": 5.885234521806483e-06, "loss": 1.8621, "step": 22247 }, { "epoch": 0.7178062192611174, "grad_norm": 0.396484375, "learning_rate": 5.883984705936697e-06, "loss": 1.8737, "step": 22248 }, { "epoch": 0.7178384831149137, "grad_norm": 0.392578125, "learning_rate": 5.882734990409458e-06, "loss": 1.8398, "step": 22249 }, { "epoch": 0.7178707469687101, "grad_norm": 0.427734375, "learning_rate": 5.88148537523853e-06, "loss": 1.8847, "step": 22250 }, { "epoch": 0.7179030108225064, "grad_norm": 0.384765625, "learning_rate": 5.880235860437681e-06, "loss": 1.8641, "step": 22251 }, { "epoch": 0.7179352746763028, "grad_norm": 0.3671875, "learning_rate": 5.878986446020643e-06, "loss": 1.8174, "step": 22252 }, { "epoch": 0.7179675385300992, "grad_norm": 0.369140625, "learning_rate": 5.8777371320011775e-06, "loss": 1.85, "step": 22253 }, { "epoch": 0.7179998023838955, "grad_norm": 0.365234375, "learning_rate": 5.8764879183930495e-06, "loss": 1.8518, "step": 22254 }, { "epoch": 0.7180320662376919, "grad_norm": 0.376953125, "learning_rate": 5.8752388052099855e-06, "loss": 1.8643, "step": 22255 }, { "epoch": 0.7180643300914882, "grad_norm": 0.3671875, "learning_rate": 5.873989792465753e-06, "loss": 1.8512, "step": 22256 }, { "epoch": 0.7180965939452846, "grad_norm": 0.37109375, "learning_rate": 5.872740880174093e-06, "loss": 1.8228, "step": 22257 }, { "epoch": 0.7181288577990809, "grad_norm": 0.373046875, "learning_rate": 5.871492068348749e-06, "loss": 1.8337, "step": 22258 }, { "epoch": 0.7181611216528773, "grad_norm": 0.56640625, "learning_rate": 5.870243357003477e-06, "loss": 1.9264, "step": 22259 }, { "epoch": 0.7181933855066736, "grad_norm": 0.412109375, "learning_rate": 5.868994746152015e-06, "loss": 1.9367, "step": 22260 }, { "epoch": 0.71822564936047, "grad_norm": 0.52734375, "learning_rate": 5.867746235808103e-06, "loss": 1.9501, "step": 22261 }, { "epoch": 0.7182579132142662, "grad_norm": 0.490234375, "learning_rate": 5.866497825985497e-06, "loss": 1.9187, "step": 22262 }, { "epoch": 0.7182901770680626, "grad_norm": 0.40625, "learning_rate": 5.865249516697929e-06, "loss": 1.9554, "step": 22263 }, { "epoch": 0.7183224409218589, "grad_norm": 0.419921875, "learning_rate": 5.864001307959135e-06, "loss": 1.9082, "step": 22264 }, { "epoch": 0.7183547047756553, "grad_norm": 0.4609375, "learning_rate": 5.862753199782866e-06, "loss": 1.9435, "step": 22265 }, { "epoch": 0.7183869686294516, "grad_norm": 0.451171875, "learning_rate": 5.86150519218285e-06, "loss": 1.9104, "step": 22266 }, { "epoch": 0.718419232483248, "grad_norm": 0.38671875, "learning_rate": 5.860257285172834e-06, "loss": 1.9227, "step": 22267 }, { "epoch": 0.7184514963370443, "grad_norm": 0.447265625, "learning_rate": 5.859009478766548e-06, "loss": 1.9355, "step": 22268 }, { "epoch": 0.7184837601908407, "grad_norm": 0.4765625, "learning_rate": 5.857761772977724e-06, "loss": 1.9206, "step": 22269 }, { "epoch": 0.718516024044637, "grad_norm": 0.4296875, "learning_rate": 5.856514167820105e-06, "loss": 1.8821, "step": 22270 }, { "epoch": 0.7185482878984334, "grad_norm": 0.43359375, "learning_rate": 5.855266663307417e-06, "loss": 1.9595, "step": 22271 }, { "epoch": 0.7185805517522298, "grad_norm": 0.462890625, "learning_rate": 5.85401925945339e-06, "loss": 1.9256, "step": 22272 }, { "epoch": 0.7186128156060261, "grad_norm": 0.42578125, "learning_rate": 5.8527719562717625e-06, "loss": 1.9306, "step": 22273 }, { "epoch": 0.7186450794598225, "grad_norm": 0.41796875, "learning_rate": 5.8515247537762575e-06, "loss": 1.9471, "step": 22274 }, { "epoch": 0.7186773433136188, "grad_norm": 0.44921875, "learning_rate": 5.850277651980601e-06, "loss": 1.9456, "step": 22275 }, { "epoch": 0.7187096071674152, "grad_norm": 0.447265625, "learning_rate": 5.8490306508985345e-06, "loss": 1.9408, "step": 22276 }, { "epoch": 0.7187418710212115, "grad_norm": 0.447265625, "learning_rate": 5.847783750543764e-06, "loss": 1.9429, "step": 22277 }, { "epoch": 0.7187741348750079, "grad_norm": 0.416015625, "learning_rate": 5.846536950930023e-06, "loss": 1.9374, "step": 22278 }, { "epoch": 0.7188063987288041, "grad_norm": 0.42578125, "learning_rate": 5.845290252071047e-06, "loss": 1.9403, "step": 22279 }, { "epoch": 0.7188386625826005, "grad_norm": 0.423828125, "learning_rate": 5.844043653980538e-06, "loss": 1.9398, "step": 22280 }, { "epoch": 0.7188709264363968, "grad_norm": 0.396484375, "learning_rate": 5.842797156672228e-06, "loss": 1.9355, "step": 22281 }, { "epoch": 0.7189031902901932, "grad_norm": 0.392578125, "learning_rate": 5.841550760159848e-06, "loss": 1.9222, "step": 22282 }, { "epoch": 0.7189354541439895, "grad_norm": 0.41015625, "learning_rate": 5.840304464457098e-06, "loss": 1.9446, "step": 22283 }, { "epoch": 0.7189677179977859, "grad_norm": 0.408203125, "learning_rate": 5.839058269577704e-06, "loss": 1.9288, "step": 22284 }, { "epoch": 0.7189999818515822, "grad_norm": 0.392578125, "learning_rate": 5.837812175535395e-06, "loss": 1.9216, "step": 22285 }, { "epoch": 0.7190322457053786, "grad_norm": 0.404296875, "learning_rate": 5.836566182343867e-06, "loss": 1.93, "step": 22286 }, { "epoch": 0.7190645095591749, "grad_norm": 0.384765625, "learning_rate": 5.835320290016849e-06, "loss": 1.9381, "step": 22287 }, { "epoch": 0.7190967734129713, "grad_norm": 0.419921875, "learning_rate": 5.83407449856805e-06, "loss": 1.9534, "step": 22288 }, { "epoch": 0.7191290372667676, "grad_norm": 0.423828125, "learning_rate": 5.832828808011179e-06, "loss": 1.9307, "step": 22289 }, { "epoch": 0.719161301120564, "grad_norm": 0.390625, "learning_rate": 5.831583218359957e-06, "loss": 1.9261, "step": 22290 }, { "epoch": 0.7191935649743603, "grad_norm": 0.39453125, "learning_rate": 5.83033772962809e-06, "loss": 1.9419, "step": 22291 }, { "epoch": 0.7192258288281567, "grad_norm": 0.3828125, "learning_rate": 5.829092341829281e-06, "loss": 1.9437, "step": 22292 }, { "epoch": 0.7192580926819531, "grad_norm": 0.37890625, "learning_rate": 5.827847054977249e-06, "loss": 1.8801, "step": 22293 }, { "epoch": 0.7192903565357494, "grad_norm": 0.392578125, "learning_rate": 5.826601869085698e-06, "loss": 1.9106, "step": 22294 }, { "epoch": 0.7193226203895458, "grad_norm": 0.4296875, "learning_rate": 5.8253567841683276e-06, "loss": 1.8862, "step": 22295 }, { "epoch": 0.719354884243342, "grad_norm": 0.392578125, "learning_rate": 5.824111800238852e-06, "loss": 1.9309, "step": 22296 }, { "epoch": 0.7193871480971384, "grad_norm": 0.419921875, "learning_rate": 5.822866917310971e-06, "loss": 1.9238, "step": 22297 }, { "epoch": 0.7194194119509347, "grad_norm": 0.40234375, "learning_rate": 5.8216221353983814e-06, "loss": 1.9553, "step": 22298 }, { "epoch": 0.7194516758047311, "grad_norm": 0.400390625, "learning_rate": 5.820377454514798e-06, "loss": 1.9402, "step": 22299 }, { "epoch": 0.7194839396585274, "grad_norm": 0.46875, "learning_rate": 5.819132874673907e-06, "loss": 1.9506, "step": 22300 }, { "epoch": 0.7195162035123238, "grad_norm": 0.412109375, "learning_rate": 5.817888395889422e-06, "loss": 1.9317, "step": 22301 }, { "epoch": 0.7195484673661201, "grad_norm": 0.388671875, "learning_rate": 5.816644018175033e-06, "loss": 1.9146, "step": 22302 }, { "epoch": 0.7195807312199165, "grad_norm": 0.431640625, "learning_rate": 5.815399741544435e-06, "loss": 1.9369, "step": 22303 }, { "epoch": 0.7196129950737128, "grad_norm": 0.421875, "learning_rate": 5.814155566011331e-06, "loss": 1.938, "step": 22304 }, { "epoch": 0.7196452589275092, "grad_norm": 0.4140625, "learning_rate": 5.812911491589414e-06, "loss": 1.9442, "step": 22305 }, { "epoch": 0.7196775227813055, "grad_norm": 0.388671875, "learning_rate": 5.8116675182923686e-06, "loss": 1.9367, "step": 22306 }, { "epoch": 0.7197097866351019, "grad_norm": 0.431640625, "learning_rate": 5.810423646133908e-06, "loss": 1.9357, "step": 22307 }, { "epoch": 0.7197420504888982, "grad_norm": 0.390625, "learning_rate": 5.809179875127702e-06, "loss": 1.8956, "step": 22308 }, { "epoch": 0.7197743143426946, "grad_norm": 0.384765625, "learning_rate": 5.807936205287449e-06, "loss": 1.9214, "step": 22309 }, { "epoch": 0.7198065781964909, "grad_norm": 0.390625, "learning_rate": 5.80669263662685e-06, "loss": 1.953, "step": 22310 }, { "epoch": 0.7198388420502873, "grad_norm": 0.423828125, "learning_rate": 5.805449169159574e-06, "loss": 1.9343, "step": 22311 }, { "epoch": 0.7198711059040835, "grad_norm": 0.458984375, "learning_rate": 5.804205802899316e-06, "loss": 1.9871, "step": 22312 }, { "epoch": 0.7199033697578799, "grad_norm": 0.380859375, "learning_rate": 5.802962537859776e-06, "loss": 1.9444, "step": 22313 }, { "epoch": 0.7199356336116763, "grad_norm": 0.40234375, "learning_rate": 5.801719374054614e-06, "loss": 1.9332, "step": 22314 }, { "epoch": 0.7199678974654726, "grad_norm": 0.3671875, "learning_rate": 5.800476311497526e-06, "loss": 1.8848, "step": 22315 }, { "epoch": 0.720000161319269, "grad_norm": 0.416015625, "learning_rate": 5.7992333502022075e-06, "loss": 1.9473, "step": 22316 }, { "epoch": 0.7200324251730653, "grad_norm": 0.40234375, "learning_rate": 5.7979904901823165e-06, "loss": 1.8817, "step": 22317 }, { "epoch": 0.7200646890268617, "grad_norm": 0.4296875, "learning_rate": 5.7967477314515486e-06, "loss": 1.956, "step": 22318 }, { "epoch": 0.720096952880658, "grad_norm": 0.44140625, "learning_rate": 5.795505074023581e-06, "loss": 1.9391, "step": 22319 }, { "epoch": 0.7201292167344544, "grad_norm": 0.423828125, "learning_rate": 5.794262517912085e-06, "loss": 1.9493, "step": 22320 }, { "epoch": 0.7201614805882507, "grad_norm": 0.48046875, "learning_rate": 5.793020063130747e-06, "loss": 2.0045, "step": 22321 }, { "epoch": 0.7201937444420471, "grad_norm": 0.5078125, "learning_rate": 5.79177770969324e-06, "loss": 2.0186, "step": 22322 }, { "epoch": 0.7202260082958434, "grad_norm": 0.5078125, "learning_rate": 5.7905354576132324e-06, "loss": 1.9826, "step": 22323 }, { "epoch": 0.7202582721496398, "grad_norm": 0.4609375, "learning_rate": 5.789293306904408e-06, "loss": 2.0036, "step": 22324 }, { "epoch": 0.7202905360034361, "grad_norm": 0.458984375, "learning_rate": 5.788051257580437e-06, "loss": 2.0057, "step": 22325 }, { "epoch": 0.7203227998572325, "grad_norm": 0.455078125, "learning_rate": 5.786809309654983e-06, "loss": 1.9971, "step": 22326 }, { "epoch": 0.7203550637110288, "grad_norm": 0.4296875, "learning_rate": 5.785567463141727e-06, "loss": 1.9936, "step": 22327 }, { "epoch": 0.7203873275648252, "grad_norm": 0.404296875, "learning_rate": 5.784325718054336e-06, "loss": 1.9846, "step": 22328 }, { "epoch": 0.7204195914186214, "grad_norm": 0.470703125, "learning_rate": 5.783084074406469e-06, "loss": 2.0089, "step": 22329 }, { "epoch": 0.7204518552724178, "grad_norm": 0.40234375, "learning_rate": 5.781842532211807e-06, "loss": 1.9929, "step": 22330 }, { "epoch": 0.7204841191262141, "grad_norm": 0.44140625, "learning_rate": 5.780601091484008e-06, "loss": 2.0306, "step": 22331 }, { "epoch": 0.7205163829800105, "grad_norm": 0.416015625, "learning_rate": 5.779359752236734e-06, "loss": 1.9702, "step": 22332 }, { "epoch": 0.7205486468338069, "grad_norm": 0.427734375, "learning_rate": 5.778118514483658e-06, "loss": 2.0408, "step": 22333 }, { "epoch": 0.7205809106876032, "grad_norm": 0.423828125, "learning_rate": 5.776877378238432e-06, "loss": 2.024, "step": 22334 }, { "epoch": 0.7206131745413996, "grad_norm": 0.4453125, "learning_rate": 5.775636343514729e-06, "loss": 1.9935, "step": 22335 }, { "epoch": 0.7206454383951959, "grad_norm": 0.44140625, "learning_rate": 5.774395410326204e-06, "loss": 2.0402, "step": 22336 }, { "epoch": 0.7206777022489923, "grad_norm": 0.455078125, "learning_rate": 5.773154578686508e-06, "loss": 2.0389, "step": 22337 }, { "epoch": 0.7207099661027886, "grad_norm": 0.453125, "learning_rate": 5.771913848609321e-06, "loss": 2.0372, "step": 22338 }, { "epoch": 0.720742229956585, "grad_norm": 0.435546875, "learning_rate": 5.770673220108273e-06, "loss": 2.0001, "step": 22339 }, { "epoch": 0.7207744938103813, "grad_norm": 0.447265625, "learning_rate": 5.769432693197035e-06, "loss": 2.0018, "step": 22340 }, { "epoch": 0.7208067576641777, "grad_norm": 0.41015625, "learning_rate": 5.768192267889269e-06, "loss": 1.9993, "step": 22341 }, { "epoch": 0.720839021517974, "grad_norm": 0.466796875, "learning_rate": 5.766951944198612e-06, "loss": 1.9987, "step": 22342 }, { "epoch": 0.7208712853717704, "grad_norm": 0.40234375, "learning_rate": 5.765711722138721e-06, "loss": 1.9957, "step": 22343 }, { "epoch": 0.7209035492255667, "grad_norm": 0.494140625, "learning_rate": 5.764471601723261e-06, "loss": 2.0125, "step": 22344 }, { "epoch": 0.720935813079363, "grad_norm": 0.443359375, "learning_rate": 5.763231582965864e-06, "loss": 2.0113, "step": 22345 }, { "epoch": 0.7209680769331593, "grad_norm": 0.447265625, "learning_rate": 5.761991665880186e-06, "loss": 2.0407, "step": 22346 }, { "epoch": 0.7210003407869557, "grad_norm": 0.421875, "learning_rate": 5.760751850479888e-06, "loss": 2.0294, "step": 22347 }, { "epoch": 0.721032604640752, "grad_norm": 0.453125, "learning_rate": 5.759512136778592e-06, "loss": 2.0018, "step": 22348 }, { "epoch": 0.7210648684945484, "grad_norm": 0.404296875, "learning_rate": 5.758272524789965e-06, "loss": 2.0297, "step": 22349 }, { "epoch": 0.7210971323483447, "grad_norm": 0.48046875, "learning_rate": 5.757033014527644e-06, "loss": 1.9962, "step": 22350 }, { "epoch": 0.7211293962021411, "grad_norm": 0.486328125, "learning_rate": 5.755793606005268e-06, "loss": 2.0062, "step": 22351 }, { "epoch": 0.7211616600559374, "grad_norm": 0.478515625, "learning_rate": 5.754554299236488e-06, "loss": 1.9954, "step": 22352 }, { "epoch": 0.7211939239097338, "grad_norm": 0.4296875, "learning_rate": 5.753315094234942e-06, "loss": 2.0481, "step": 22353 }, { "epoch": 0.7212261877635302, "grad_norm": 0.52734375, "learning_rate": 5.752075991014266e-06, "loss": 2.0066, "step": 22354 }, { "epoch": 0.7212584516173265, "grad_norm": 0.48828125, "learning_rate": 5.7508369895881054e-06, "loss": 2.0041, "step": 22355 }, { "epoch": 0.7212907154711229, "grad_norm": 0.486328125, "learning_rate": 5.749598089970098e-06, "loss": 1.9976, "step": 22356 }, { "epoch": 0.7213229793249192, "grad_norm": 0.62890625, "learning_rate": 5.748359292173872e-06, "loss": 2.0246, "step": 22357 }, { "epoch": 0.7213552431787156, "grad_norm": 0.57421875, "learning_rate": 5.747120596213077e-06, "loss": 2.0037, "step": 22358 }, { "epoch": 0.7213875070325119, "grad_norm": 0.4765625, "learning_rate": 5.74588200210134e-06, "loss": 1.9984, "step": 22359 }, { "epoch": 0.7214197708863083, "grad_norm": 0.466796875, "learning_rate": 5.744643509852289e-06, "loss": 1.9938, "step": 22360 }, { "epoch": 0.7214520347401046, "grad_norm": 0.51171875, "learning_rate": 5.7434051194795684e-06, "loss": 2.0002, "step": 22361 }, { "epoch": 0.721484298593901, "grad_norm": 0.478515625, "learning_rate": 5.742166830996803e-06, "loss": 1.9987, "step": 22362 }, { "epoch": 0.7215165624476972, "grad_norm": 0.486328125, "learning_rate": 5.74092864441762e-06, "loss": 2.0501, "step": 22363 }, { "epoch": 0.7215488263014936, "grad_norm": 0.53515625, "learning_rate": 5.739690559755655e-06, "loss": 1.9896, "step": 22364 }, { "epoch": 0.7215810901552899, "grad_norm": 0.482421875, "learning_rate": 5.738452577024535e-06, "loss": 1.993, "step": 22365 }, { "epoch": 0.7216133540090863, "grad_norm": 0.427734375, "learning_rate": 5.737214696237877e-06, "loss": 1.979, "step": 22366 }, { "epoch": 0.7216456178628826, "grad_norm": 0.455078125, "learning_rate": 5.735976917409321e-06, "loss": 1.9681, "step": 22367 }, { "epoch": 0.721677881716679, "grad_norm": 0.4375, "learning_rate": 5.73473924055248e-06, "loss": 2.0098, "step": 22368 }, { "epoch": 0.7217101455704753, "grad_norm": 0.46484375, "learning_rate": 5.733501665680993e-06, "loss": 2.0079, "step": 22369 }, { "epoch": 0.7217424094242717, "grad_norm": 0.43359375, "learning_rate": 5.7322641928084615e-06, "loss": 2.0116, "step": 22370 }, { "epoch": 0.721774673278068, "grad_norm": 0.490234375, "learning_rate": 5.731026821948517e-06, "loss": 2.0612, "step": 22371 }, { "epoch": 0.7218069371318644, "grad_norm": 0.470703125, "learning_rate": 5.729789553114792e-06, "loss": 2.003, "step": 22372 }, { "epoch": 0.7218392009856607, "grad_norm": 0.474609375, "learning_rate": 5.72855238632088e-06, "loss": 2.0377, "step": 22373 }, { "epoch": 0.7218714648394571, "grad_norm": 0.439453125, "learning_rate": 5.7273153215804134e-06, "loss": 2.0192, "step": 22374 }, { "epoch": 0.7219037286932535, "grad_norm": 0.50390625, "learning_rate": 5.726078358907019e-06, "loss": 2.0052, "step": 22375 }, { "epoch": 0.7219359925470498, "grad_norm": 0.47265625, "learning_rate": 5.7248414983142875e-06, "loss": 2.0573, "step": 22376 }, { "epoch": 0.7219682564008462, "grad_norm": 0.51953125, "learning_rate": 5.723604739815855e-06, "loss": 2.0478, "step": 22377 }, { "epoch": 0.7220005202546425, "grad_norm": 0.4296875, "learning_rate": 5.722368083425326e-06, "loss": 2.0193, "step": 22378 }, { "epoch": 0.7220327841084389, "grad_norm": 0.470703125, "learning_rate": 5.721131529156306e-06, "loss": 2.0021, "step": 22379 }, { "epoch": 0.7220650479622351, "grad_norm": 0.439453125, "learning_rate": 5.719895077022422e-06, "loss": 2.0214, "step": 22380 }, { "epoch": 0.7220973118160315, "grad_norm": 0.423828125, "learning_rate": 5.718658727037272e-06, "loss": 1.9771, "step": 22381 }, { "epoch": 0.7221295756698278, "grad_norm": 0.431640625, "learning_rate": 5.717422479214466e-06, "loss": 1.9582, "step": 22382 }, { "epoch": 0.7221618395236242, "grad_norm": 0.412109375, "learning_rate": 5.716186333567617e-06, "loss": 1.8984, "step": 22383 }, { "epoch": 0.7221941033774205, "grad_norm": 0.42578125, "learning_rate": 5.7149502901103305e-06, "loss": 1.8614, "step": 22384 }, { "epoch": 0.7222263672312169, "grad_norm": 0.4765625, "learning_rate": 5.713714348856205e-06, "loss": 1.9273, "step": 22385 }, { "epoch": 0.7222586310850132, "grad_norm": 0.486328125, "learning_rate": 5.7124785098188545e-06, "loss": 1.9447, "step": 22386 }, { "epoch": 0.7222908949388096, "grad_norm": 0.470703125, "learning_rate": 5.711242773011878e-06, "loss": 1.9393, "step": 22387 }, { "epoch": 0.7223231587926059, "grad_norm": 0.435546875, "learning_rate": 5.710007138448871e-06, "loss": 1.9196, "step": 22388 }, { "epoch": 0.7223554226464023, "grad_norm": 0.416015625, "learning_rate": 5.708771606143447e-06, "loss": 1.9159, "step": 22389 }, { "epoch": 0.7223876865001986, "grad_norm": 0.45703125, "learning_rate": 5.707536176109199e-06, "loss": 1.898, "step": 22390 }, { "epoch": 0.722419950353995, "grad_norm": 0.451171875, "learning_rate": 5.706300848359723e-06, "loss": 1.9279, "step": 22391 }, { "epoch": 0.7224522142077913, "grad_norm": 0.421875, "learning_rate": 5.705065622908623e-06, "loss": 1.9102, "step": 22392 }, { "epoch": 0.7224844780615877, "grad_norm": 0.390625, "learning_rate": 5.703830499769493e-06, "loss": 1.9205, "step": 22393 }, { "epoch": 0.7225167419153841, "grad_norm": 0.42578125, "learning_rate": 5.7025954789559234e-06, "loss": 1.9181, "step": 22394 }, { "epoch": 0.7225490057691804, "grad_norm": 0.46484375, "learning_rate": 5.7013605604815165e-06, "loss": 1.9298, "step": 22395 }, { "epoch": 0.7225812696229768, "grad_norm": 0.390625, "learning_rate": 5.700125744359863e-06, "loss": 1.9029, "step": 22396 }, { "epoch": 0.722613533476773, "grad_norm": 0.462890625, "learning_rate": 5.698891030604548e-06, "loss": 1.8856, "step": 22397 }, { "epoch": 0.7226457973305694, "grad_norm": 0.41015625, "learning_rate": 5.697656419229175e-06, "loss": 1.9187, "step": 22398 }, { "epoch": 0.7226780611843657, "grad_norm": 0.404296875, "learning_rate": 5.6964219102473195e-06, "loss": 1.9247, "step": 22399 }, { "epoch": 0.7227103250381621, "grad_norm": 0.40234375, "learning_rate": 5.6951875036725876e-06, "loss": 1.9052, "step": 22400 }, { "epoch": 0.7227425888919584, "grad_norm": 0.41796875, "learning_rate": 5.693953199518545e-06, "loss": 1.9219, "step": 22401 }, { "epoch": 0.7227748527457548, "grad_norm": 0.41796875, "learning_rate": 5.6927189977987935e-06, "loss": 1.8988, "step": 22402 }, { "epoch": 0.7228071165995511, "grad_norm": 0.38671875, "learning_rate": 5.691484898526921e-06, "loss": 1.9057, "step": 22403 }, { "epoch": 0.7228393804533475, "grad_norm": 0.384765625, "learning_rate": 5.690250901716496e-06, "loss": 1.8801, "step": 22404 }, { "epoch": 0.7228716443071438, "grad_norm": 0.4140625, "learning_rate": 5.68901700738111e-06, "loss": 1.8594, "step": 22405 }, { "epoch": 0.7229039081609402, "grad_norm": 0.416015625, "learning_rate": 5.687783215534355e-06, "loss": 1.8348, "step": 22406 }, { "epoch": 0.7229361720147365, "grad_norm": 0.380859375, "learning_rate": 5.686549526189791e-06, "loss": 1.8319, "step": 22407 }, { "epoch": 0.7229684358685329, "grad_norm": 0.384765625, "learning_rate": 5.685315939361015e-06, "loss": 1.8186, "step": 22408 }, { "epoch": 0.7230006997223292, "grad_norm": 0.41015625, "learning_rate": 5.684082455061597e-06, "loss": 1.8128, "step": 22409 }, { "epoch": 0.7230329635761256, "grad_norm": 0.431640625, "learning_rate": 5.682849073305112e-06, "loss": 1.879, "step": 22410 }, { "epoch": 0.7230652274299219, "grad_norm": 0.400390625, "learning_rate": 5.681615794105145e-06, "loss": 1.8132, "step": 22411 }, { "epoch": 0.7230974912837183, "grad_norm": 0.400390625, "learning_rate": 5.6803826174752674e-06, "loss": 1.8338, "step": 22412 }, { "epoch": 0.7231297551375145, "grad_norm": 0.47265625, "learning_rate": 5.679149543429046e-06, "loss": 1.8356, "step": 22413 }, { "epoch": 0.7231620189913109, "grad_norm": 0.37890625, "learning_rate": 5.677916571980065e-06, "loss": 1.8395, "step": 22414 }, { "epoch": 0.7231942828451073, "grad_norm": 0.40234375, "learning_rate": 5.676683703141891e-06, "loss": 1.8479, "step": 22415 }, { "epoch": 0.7232265466989036, "grad_norm": 0.42578125, "learning_rate": 5.675450936928089e-06, "loss": 1.8191, "step": 22416 }, { "epoch": 0.7232588105527, "grad_norm": 0.412109375, "learning_rate": 5.674218273352239e-06, "loss": 1.8339, "step": 22417 }, { "epoch": 0.7232910744064963, "grad_norm": 0.396484375, "learning_rate": 5.6729857124279045e-06, "loss": 1.9095, "step": 22418 }, { "epoch": 0.7233233382602927, "grad_norm": 0.43359375, "learning_rate": 5.6717532541686465e-06, "loss": 1.9072, "step": 22419 }, { "epoch": 0.723355602114089, "grad_norm": 0.400390625, "learning_rate": 5.670520898588042e-06, "loss": 1.8304, "step": 22420 }, { "epoch": 0.7233878659678854, "grad_norm": 0.376953125, "learning_rate": 5.669288645699649e-06, "loss": 1.8141, "step": 22421 }, { "epoch": 0.7234201298216817, "grad_norm": 0.51171875, "learning_rate": 5.668056495517028e-06, "loss": 1.8411, "step": 22422 }, { "epoch": 0.7234523936754781, "grad_norm": 0.470703125, "learning_rate": 5.666824448053754e-06, "loss": 1.8509, "step": 22423 }, { "epoch": 0.7234846575292744, "grad_norm": 0.419921875, "learning_rate": 5.665592503323379e-06, "loss": 1.9084, "step": 22424 }, { "epoch": 0.7235169213830708, "grad_norm": 0.37890625, "learning_rate": 5.664360661339459e-06, "loss": 1.8415, "step": 22425 }, { "epoch": 0.7235491852368671, "grad_norm": 0.408203125, "learning_rate": 5.6631289221155655e-06, "loss": 1.8405, "step": 22426 }, { "epoch": 0.7235814490906635, "grad_norm": 0.443359375, "learning_rate": 5.661897285665251e-06, "loss": 1.8414, "step": 22427 }, { "epoch": 0.7236137129444598, "grad_norm": 0.37890625, "learning_rate": 5.660665752002074e-06, "loss": 1.8226, "step": 22428 }, { "epoch": 0.7236459767982562, "grad_norm": 0.365234375, "learning_rate": 5.659434321139579e-06, "loss": 1.8546, "step": 22429 }, { "epoch": 0.7236782406520524, "grad_norm": 0.4765625, "learning_rate": 5.658202993091339e-06, "loss": 1.8167, "step": 22430 }, { "epoch": 0.7237105045058488, "grad_norm": 0.416015625, "learning_rate": 5.656971767870897e-06, "loss": 1.8715, "step": 22431 }, { "epoch": 0.7237427683596451, "grad_norm": 0.3671875, "learning_rate": 5.6557406454918024e-06, "loss": 1.829, "step": 22432 }, { "epoch": 0.7237750322134415, "grad_norm": 0.423828125, "learning_rate": 5.65450962596761e-06, "loss": 1.8325, "step": 22433 }, { "epoch": 0.7238072960672379, "grad_norm": 0.44140625, "learning_rate": 5.653278709311883e-06, "loss": 1.8474, "step": 22434 }, { "epoch": 0.7238395599210342, "grad_norm": 0.375, "learning_rate": 5.652047895538148e-06, "loss": 1.8598, "step": 22435 }, { "epoch": 0.7238718237748306, "grad_norm": 0.447265625, "learning_rate": 5.650817184659963e-06, "loss": 1.8059, "step": 22436 }, { "epoch": 0.7239040876286269, "grad_norm": 0.4765625, "learning_rate": 5.6495865766908855e-06, "loss": 1.8407, "step": 22437 }, { "epoch": 0.7239363514824233, "grad_norm": 0.375, "learning_rate": 5.648356071644442e-06, "loss": 1.833, "step": 22438 }, { "epoch": 0.7239686153362196, "grad_norm": 0.388671875, "learning_rate": 5.64712566953419e-06, "loss": 1.8636, "step": 22439 }, { "epoch": 0.724000879190016, "grad_norm": 0.470703125, "learning_rate": 5.64589537037367e-06, "loss": 1.8125, "step": 22440 }, { "epoch": 0.7240331430438123, "grad_norm": 0.3984375, "learning_rate": 5.644665174176418e-06, "loss": 1.8613, "step": 22441 }, { "epoch": 0.7240654068976087, "grad_norm": 0.373046875, "learning_rate": 5.6434350809559845e-06, "loss": 1.8546, "step": 22442 }, { "epoch": 0.724097670751405, "grad_norm": 0.439453125, "learning_rate": 5.642205090725905e-06, "loss": 1.846, "step": 22443 }, { "epoch": 0.7241299346052014, "grad_norm": 0.40234375, "learning_rate": 5.640975203499714e-06, "loss": 1.8371, "step": 22444 }, { "epoch": 0.7241621984589977, "grad_norm": 0.373046875, "learning_rate": 5.6397454192909584e-06, "loss": 1.8448, "step": 22445 }, { "epoch": 0.724194462312794, "grad_norm": 0.390625, "learning_rate": 5.638515738113172e-06, "loss": 1.8596, "step": 22446 }, { "epoch": 0.7242267261665903, "grad_norm": 0.4140625, "learning_rate": 5.63728615997988e-06, "loss": 1.7856, "step": 22447 }, { "epoch": 0.7242589900203867, "grad_norm": 0.390625, "learning_rate": 5.636056684904634e-06, "loss": 1.8115, "step": 22448 }, { "epoch": 0.724291253874183, "grad_norm": 0.365234375, "learning_rate": 5.634827312900956e-06, "loss": 1.8394, "step": 22449 }, { "epoch": 0.7243235177279794, "grad_norm": 0.357421875, "learning_rate": 5.633598043982376e-06, "loss": 1.8515, "step": 22450 }, { "epoch": 0.7243557815817757, "grad_norm": 0.39453125, "learning_rate": 5.632368878162435e-06, "loss": 1.8327, "step": 22451 }, { "epoch": 0.7243880454355721, "grad_norm": 0.384765625, "learning_rate": 5.631139815454657e-06, "loss": 1.8764, "step": 22452 }, { "epoch": 0.7244203092893684, "grad_norm": 0.37109375, "learning_rate": 5.629910855872565e-06, "loss": 1.8388, "step": 22453 }, { "epoch": 0.7244525731431648, "grad_norm": 0.37109375, "learning_rate": 5.628681999429697e-06, "loss": 1.8504, "step": 22454 }, { "epoch": 0.7244848369969612, "grad_norm": 0.380859375, "learning_rate": 5.627453246139575e-06, "loss": 1.8347, "step": 22455 }, { "epoch": 0.7245171008507575, "grad_norm": 0.365234375, "learning_rate": 5.626224596015719e-06, "loss": 1.8723, "step": 22456 }, { "epoch": 0.7245493647045539, "grad_norm": 0.369140625, "learning_rate": 5.624996049071662e-06, "loss": 1.8365, "step": 22457 }, { "epoch": 0.7245816285583502, "grad_norm": 0.369140625, "learning_rate": 5.623767605320923e-06, "loss": 1.8501, "step": 22458 }, { "epoch": 0.7246138924121466, "grad_norm": 0.384765625, "learning_rate": 5.622539264777025e-06, "loss": 1.8511, "step": 22459 }, { "epoch": 0.7246461562659429, "grad_norm": 0.4140625, "learning_rate": 5.621311027453481e-06, "loss": 1.8188, "step": 22460 }, { "epoch": 0.7246784201197393, "grad_norm": 0.359375, "learning_rate": 5.620082893363824e-06, "loss": 1.8396, "step": 22461 }, { "epoch": 0.7247106839735356, "grad_norm": 0.373046875, "learning_rate": 5.6188548625215636e-06, "loss": 1.7934, "step": 22462 }, { "epoch": 0.724742947827332, "grad_norm": 0.353515625, "learning_rate": 5.617626934940215e-06, "loss": 1.8322, "step": 22463 }, { "epoch": 0.7247752116811282, "grad_norm": 0.361328125, "learning_rate": 5.616399110633302e-06, "loss": 1.8407, "step": 22464 }, { "epoch": 0.7248074755349246, "grad_norm": 0.4296875, "learning_rate": 5.615171389614337e-06, "loss": 1.9586, "step": 22465 }, { "epoch": 0.7248397393887209, "grad_norm": 0.43359375, "learning_rate": 5.613943771896826e-06, "loss": 1.9657, "step": 22466 }, { "epoch": 0.7248720032425173, "grad_norm": 0.431640625, "learning_rate": 5.61271625749429e-06, "loss": 1.9205, "step": 22467 }, { "epoch": 0.7249042670963136, "grad_norm": 0.37890625, "learning_rate": 5.611488846420249e-06, "loss": 1.927, "step": 22468 }, { "epoch": 0.72493653095011, "grad_norm": 0.439453125, "learning_rate": 5.6102615386881934e-06, "loss": 1.9452, "step": 22469 }, { "epoch": 0.7249687948039063, "grad_norm": 0.421875, "learning_rate": 5.609034334311647e-06, "loss": 1.9188, "step": 22470 }, { "epoch": 0.7250010586577027, "grad_norm": 0.41015625, "learning_rate": 5.607807233304115e-06, "loss": 1.9406, "step": 22471 }, { "epoch": 0.725033322511499, "grad_norm": 0.431640625, "learning_rate": 5.606580235679096e-06, "loss": 1.9537, "step": 22472 }, { "epoch": 0.7250655863652954, "grad_norm": 0.400390625, "learning_rate": 5.605353341450108e-06, "loss": 1.936, "step": 22473 }, { "epoch": 0.7250978502190917, "grad_norm": 0.388671875, "learning_rate": 5.604126550630652e-06, "loss": 1.9511, "step": 22474 }, { "epoch": 0.7251301140728881, "grad_norm": 0.416015625, "learning_rate": 5.602899863234225e-06, "loss": 1.9684, "step": 22475 }, { "epoch": 0.7251623779266845, "grad_norm": 0.3984375, "learning_rate": 5.601673279274341e-06, "loss": 1.9368, "step": 22476 }, { "epoch": 0.7251946417804808, "grad_norm": 0.40234375, "learning_rate": 5.6004467987644925e-06, "loss": 1.9227, "step": 22477 }, { "epoch": 0.7252269056342772, "grad_norm": 0.408203125, "learning_rate": 5.599220421718179e-06, "loss": 1.9252, "step": 22478 }, { "epoch": 0.7252591694880735, "grad_norm": 0.412109375, "learning_rate": 5.597994148148909e-06, "loss": 1.9235, "step": 22479 }, { "epoch": 0.7252914333418699, "grad_norm": 0.388671875, "learning_rate": 5.5967679780701725e-06, "loss": 1.9321, "step": 22480 }, { "epoch": 0.7253236971956661, "grad_norm": 0.392578125, "learning_rate": 5.5955419114954644e-06, "loss": 1.9224, "step": 22481 }, { "epoch": 0.7253559610494625, "grad_norm": 0.3984375, "learning_rate": 5.5943159484382885e-06, "loss": 1.9149, "step": 22482 }, { "epoch": 0.7253882249032588, "grad_norm": 0.3984375, "learning_rate": 5.593090088912137e-06, "loss": 1.958, "step": 22483 }, { "epoch": 0.7254204887570552, "grad_norm": 0.396484375, "learning_rate": 5.591864332930493e-06, "loss": 1.9194, "step": 22484 }, { "epoch": 0.7254527526108515, "grad_norm": 0.388671875, "learning_rate": 5.590638680506865e-06, "loss": 1.9453, "step": 22485 }, { "epoch": 0.7254850164646479, "grad_norm": 0.41015625, "learning_rate": 5.589413131654735e-06, "loss": 2.0012, "step": 22486 }, { "epoch": 0.7255172803184442, "grad_norm": 0.404296875, "learning_rate": 5.588187686387589e-06, "loss": 1.9668, "step": 22487 }, { "epoch": 0.7255495441722406, "grad_norm": 0.384765625, "learning_rate": 5.586962344718926e-06, "loss": 1.921, "step": 22488 }, { "epoch": 0.7255818080260369, "grad_norm": 0.37109375, "learning_rate": 5.5857371066622284e-06, "loss": 1.9175, "step": 22489 }, { "epoch": 0.7256140718798333, "grad_norm": 0.388671875, "learning_rate": 5.5845119722309845e-06, "loss": 1.9015, "step": 22490 }, { "epoch": 0.7256463357336296, "grad_norm": 0.38671875, "learning_rate": 5.583286941438671e-06, "loss": 1.9029, "step": 22491 }, { "epoch": 0.725678599587426, "grad_norm": 0.37109375, "learning_rate": 5.582062014298786e-06, "loss": 1.8859, "step": 22492 }, { "epoch": 0.7257108634412223, "grad_norm": 0.400390625, "learning_rate": 5.5808371908248055e-06, "loss": 1.9374, "step": 22493 }, { "epoch": 0.7257431272950187, "grad_norm": 0.39453125, "learning_rate": 5.579612471030206e-06, "loss": 1.932, "step": 22494 }, { "epoch": 0.7257753911488151, "grad_norm": 0.447265625, "learning_rate": 5.578387854928479e-06, "loss": 1.9295, "step": 22495 }, { "epoch": 0.7258076550026114, "grad_norm": 0.38671875, "learning_rate": 5.577163342533101e-06, "loss": 1.9531, "step": 22496 }, { "epoch": 0.7258399188564078, "grad_norm": 0.40234375, "learning_rate": 5.5759389338575446e-06, "loss": 1.9317, "step": 22497 }, { "epoch": 0.725872182710204, "grad_norm": 0.458984375, "learning_rate": 5.57471462891529e-06, "loss": 1.8919, "step": 22498 }, { "epoch": 0.7259044465640004, "grad_norm": 0.396484375, "learning_rate": 5.573490427719826e-06, "loss": 1.9207, "step": 22499 }, { "epoch": 0.7259367104177967, "grad_norm": 0.390625, "learning_rate": 5.572266330284605e-06, "loss": 1.9456, "step": 22500 }, { "epoch": 0.7259689742715931, "grad_norm": 0.373046875, "learning_rate": 5.57104233662312e-06, "loss": 1.9815, "step": 22501 }, { "epoch": 0.7260012381253894, "grad_norm": 0.384765625, "learning_rate": 5.569818446748835e-06, "loss": 1.9667, "step": 22502 }, { "epoch": 0.7260335019791858, "grad_norm": 0.38671875, "learning_rate": 5.56859466067522e-06, "loss": 1.9495, "step": 22503 }, { "epoch": 0.7260657658329821, "grad_norm": 0.41796875, "learning_rate": 5.567370978415753e-06, "loss": 1.9512, "step": 22504 }, { "epoch": 0.7260980296867785, "grad_norm": 0.392578125, "learning_rate": 5.566147399983899e-06, "loss": 1.9207, "step": 22505 }, { "epoch": 0.7261302935405748, "grad_norm": 0.3984375, "learning_rate": 5.5649239253931225e-06, "loss": 1.9569, "step": 22506 }, { "epoch": 0.7261625573943712, "grad_norm": 0.376953125, "learning_rate": 5.5637005546568985e-06, "loss": 1.9043, "step": 22507 }, { "epoch": 0.7261948212481675, "grad_norm": 0.376953125, "learning_rate": 5.56247728778869e-06, "loss": 1.9242, "step": 22508 }, { "epoch": 0.7262270851019639, "grad_norm": 0.3984375, "learning_rate": 5.561254124801955e-06, "loss": 1.9362, "step": 22509 }, { "epoch": 0.7262593489557602, "grad_norm": 0.41015625, "learning_rate": 5.56003106571017e-06, "loss": 1.8599, "step": 22510 }, { "epoch": 0.7262916128095566, "grad_norm": 0.388671875, "learning_rate": 5.558808110526791e-06, "loss": 1.8852, "step": 22511 }, { "epoch": 0.7263238766633529, "grad_norm": 0.412109375, "learning_rate": 5.557585259265274e-06, "loss": 1.9057, "step": 22512 }, { "epoch": 0.7263561405171493, "grad_norm": 0.37109375, "learning_rate": 5.556362511939088e-06, "loss": 1.9275, "step": 22513 }, { "epoch": 0.7263884043709455, "grad_norm": 0.384765625, "learning_rate": 5.5551398685616885e-06, "loss": 1.9099, "step": 22514 }, { "epoch": 0.7264206682247419, "grad_norm": 0.380859375, "learning_rate": 5.55391732914653e-06, "loss": 1.9377, "step": 22515 }, { "epoch": 0.7264529320785383, "grad_norm": 0.375, "learning_rate": 5.552694893707078e-06, "loss": 1.9218, "step": 22516 }, { "epoch": 0.7264851959323346, "grad_norm": 0.380859375, "learning_rate": 5.551472562256783e-06, "loss": 1.9153, "step": 22517 }, { "epoch": 0.726517459786131, "grad_norm": 0.40234375, "learning_rate": 5.550250334809094e-06, "loss": 2.0086, "step": 22518 }, { "epoch": 0.7265497236399273, "grad_norm": 0.43359375, "learning_rate": 5.549028211377476e-06, "loss": 2.0324, "step": 22519 }, { "epoch": 0.7265819874937237, "grad_norm": 0.423828125, "learning_rate": 5.547806191975375e-06, "loss": 2.0051, "step": 22520 }, { "epoch": 0.72661425134752, "grad_norm": 0.458984375, "learning_rate": 5.546584276616241e-06, "loss": 2.0094, "step": 22521 }, { "epoch": 0.7266465152013164, "grad_norm": 0.443359375, "learning_rate": 5.5453624653135225e-06, "loss": 2.0287, "step": 22522 }, { "epoch": 0.7266787790551127, "grad_norm": 0.4609375, "learning_rate": 5.5441407580806755e-06, "loss": 2.0411, "step": 22523 }, { "epoch": 0.7267110429089091, "grad_norm": 0.5390625, "learning_rate": 5.542919154931143e-06, "loss": 2.0099, "step": 22524 }, { "epoch": 0.7267433067627054, "grad_norm": 0.416015625, "learning_rate": 5.5416976558783675e-06, "loss": 2.0243, "step": 22525 }, { "epoch": 0.7267755706165018, "grad_norm": 0.484375, "learning_rate": 5.540476260935803e-06, "loss": 2.043, "step": 22526 }, { "epoch": 0.7268078344702981, "grad_norm": 0.435546875, "learning_rate": 5.539254970116891e-06, "loss": 2.0469, "step": 22527 }, { "epoch": 0.7268400983240945, "grad_norm": 0.5625, "learning_rate": 5.538033783435068e-06, "loss": 2.0053, "step": 22528 }, { "epoch": 0.7268723621778908, "grad_norm": 0.435546875, "learning_rate": 5.536812700903786e-06, "loss": 1.9912, "step": 22529 }, { "epoch": 0.7269046260316872, "grad_norm": 0.5, "learning_rate": 5.5355917225364796e-06, "loss": 2.032, "step": 22530 }, { "epoch": 0.7269368898854834, "grad_norm": 0.423828125, "learning_rate": 5.5343708483465864e-06, "loss": 1.9738, "step": 22531 }, { "epoch": 0.7269691537392798, "grad_norm": 0.5, "learning_rate": 5.533150078347551e-06, "loss": 2.0344, "step": 22532 }, { "epoch": 0.7270014175930761, "grad_norm": 0.48828125, "learning_rate": 5.531929412552812e-06, "loss": 2.0253, "step": 22533 }, { "epoch": 0.7270336814468725, "grad_norm": 0.50390625, "learning_rate": 5.530708850975792e-06, "loss": 2.0112, "step": 22534 }, { "epoch": 0.7270659453006689, "grad_norm": 0.455078125, "learning_rate": 5.529488393629944e-06, "loss": 2.0184, "step": 22535 }, { "epoch": 0.7270982091544652, "grad_norm": 0.404296875, "learning_rate": 5.528268040528694e-06, "loss": 2.0259, "step": 22536 }, { "epoch": 0.7271304730082616, "grad_norm": 0.478515625, "learning_rate": 5.5270477916854665e-06, "loss": 1.953, "step": 22537 }, { "epoch": 0.7271627368620579, "grad_norm": 0.3984375, "learning_rate": 5.525827647113708e-06, "loss": 1.9248, "step": 22538 }, { "epoch": 0.7271950007158543, "grad_norm": 0.44140625, "learning_rate": 5.524607606826842e-06, "loss": 1.9205, "step": 22539 }, { "epoch": 0.7272272645696506, "grad_norm": 0.40625, "learning_rate": 5.523387670838292e-06, "loss": 1.9517, "step": 22540 }, { "epoch": 0.727259528423447, "grad_norm": 0.392578125, "learning_rate": 5.522167839161497e-06, "loss": 1.9349, "step": 22541 }, { "epoch": 0.7272917922772433, "grad_norm": 0.421875, "learning_rate": 5.52094811180988e-06, "loss": 1.89, "step": 22542 }, { "epoch": 0.7273240561310397, "grad_norm": 0.41015625, "learning_rate": 5.519728488796861e-06, "loss": 1.9323, "step": 22543 }, { "epoch": 0.727356319984836, "grad_norm": 0.404296875, "learning_rate": 5.518508970135874e-06, "loss": 1.935, "step": 22544 }, { "epoch": 0.7273885838386324, "grad_norm": 0.390625, "learning_rate": 5.517289555840339e-06, "loss": 1.9233, "step": 22545 }, { "epoch": 0.7274208476924287, "grad_norm": 0.400390625, "learning_rate": 5.516070245923672e-06, "loss": 1.9316, "step": 22546 }, { "epoch": 0.727453111546225, "grad_norm": 0.376953125, "learning_rate": 5.514851040399305e-06, "loss": 1.8386, "step": 22547 }, { "epoch": 0.7274853754000213, "grad_norm": 0.36328125, "learning_rate": 5.513631939280653e-06, "loss": 1.8553, "step": 22548 }, { "epoch": 0.7275176392538177, "grad_norm": 0.373046875, "learning_rate": 5.51241294258113e-06, "loss": 1.8395, "step": 22549 }, { "epoch": 0.727549903107614, "grad_norm": 0.421875, "learning_rate": 5.511194050314165e-06, "loss": 1.8465, "step": 22550 }, { "epoch": 0.7275821669614104, "grad_norm": 0.42578125, "learning_rate": 5.509975262493168e-06, "loss": 1.861, "step": 22551 }, { "epoch": 0.7276144308152067, "grad_norm": 0.39453125, "learning_rate": 5.508756579131556e-06, "loss": 1.8634, "step": 22552 }, { "epoch": 0.7276466946690031, "grad_norm": 0.37109375, "learning_rate": 5.507538000242736e-06, "loss": 1.8656, "step": 22553 }, { "epoch": 0.7276789585227994, "grad_norm": 0.388671875, "learning_rate": 5.5063195258401336e-06, "loss": 1.8731, "step": 22554 }, { "epoch": 0.7277112223765958, "grad_norm": 0.421875, "learning_rate": 5.505101155937155e-06, "loss": 1.88, "step": 22555 }, { "epoch": 0.7277434862303922, "grad_norm": 0.361328125, "learning_rate": 5.5038828905472065e-06, "loss": 1.8378, "step": 22556 }, { "epoch": 0.7277757500841885, "grad_norm": 0.400390625, "learning_rate": 5.502664729683707e-06, "loss": 1.8373, "step": 22557 }, { "epoch": 0.7278080139379849, "grad_norm": 0.3984375, "learning_rate": 5.501446673360062e-06, "loss": 1.8741, "step": 22558 }, { "epoch": 0.7278402777917812, "grad_norm": 0.37890625, "learning_rate": 5.500228721589672e-06, "loss": 1.8408, "step": 22559 }, { "epoch": 0.7278725416455776, "grad_norm": 0.3671875, "learning_rate": 5.4990108743859544e-06, "loss": 1.8306, "step": 22560 }, { "epoch": 0.7279048054993739, "grad_norm": 0.37890625, "learning_rate": 5.497793131762309e-06, "loss": 1.8373, "step": 22561 }, { "epoch": 0.7279370693531703, "grad_norm": 0.375, "learning_rate": 5.496575493732135e-06, "loss": 1.8108, "step": 22562 }, { "epoch": 0.7279693332069666, "grad_norm": 0.369140625, "learning_rate": 5.495357960308846e-06, "loss": 1.8561, "step": 22563 }, { "epoch": 0.728001597060763, "grad_norm": 0.376953125, "learning_rate": 5.4941405315058364e-06, "loss": 1.8632, "step": 22564 }, { "epoch": 0.7280338609145592, "grad_norm": 0.365234375, "learning_rate": 5.4929232073365046e-06, "loss": 1.8301, "step": 22565 }, { "epoch": 0.7280661247683556, "grad_norm": 0.419921875, "learning_rate": 5.491705987814257e-06, "loss": 1.8306, "step": 22566 }, { "epoch": 0.7280983886221519, "grad_norm": 0.380859375, "learning_rate": 5.490488872952492e-06, "loss": 1.8444, "step": 22567 }, { "epoch": 0.7281306524759483, "grad_norm": 0.373046875, "learning_rate": 5.489271862764595e-06, "loss": 1.8456, "step": 22568 }, { "epoch": 0.7281629163297446, "grad_norm": 0.3671875, "learning_rate": 5.488054957263976e-06, "loss": 1.8627, "step": 22569 }, { "epoch": 0.728195180183541, "grad_norm": 0.37890625, "learning_rate": 5.4868381564640255e-06, "loss": 1.877, "step": 22570 }, { "epoch": 0.7282274440373373, "grad_norm": 0.392578125, "learning_rate": 5.485621460378131e-06, "loss": 1.8379, "step": 22571 }, { "epoch": 0.7282597078911337, "grad_norm": 0.373046875, "learning_rate": 5.484404869019694e-06, "loss": 1.8361, "step": 22572 }, { "epoch": 0.72829197174493, "grad_norm": 0.3671875, "learning_rate": 5.483188382402102e-06, "loss": 1.879, "step": 22573 }, { "epoch": 0.7283242355987264, "grad_norm": 0.39453125, "learning_rate": 5.4819720005387395e-06, "loss": 1.8795, "step": 22574 }, { "epoch": 0.7283564994525227, "grad_norm": 0.3671875, "learning_rate": 5.480755723443007e-06, "loss": 1.8788, "step": 22575 }, { "epoch": 0.7283887633063191, "grad_norm": 0.373046875, "learning_rate": 5.4795395511282855e-06, "loss": 1.8844, "step": 22576 }, { "epoch": 0.7284210271601155, "grad_norm": 0.37890625, "learning_rate": 5.478323483607958e-06, "loss": 1.804, "step": 22577 }, { "epoch": 0.7284532910139118, "grad_norm": 0.390625, "learning_rate": 5.47710752089542e-06, "loss": 1.8085, "step": 22578 }, { "epoch": 0.7284855548677082, "grad_norm": 0.373046875, "learning_rate": 5.475891663004051e-06, "loss": 1.8323, "step": 22579 }, { "epoch": 0.7285178187215045, "grad_norm": 0.380859375, "learning_rate": 5.474675909947232e-06, "loss": 1.844, "step": 22580 }, { "epoch": 0.7285500825753009, "grad_norm": 0.70703125, "learning_rate": 5.473460261738344e-06, "loss": 1.8115, "step": 22581 }, { "epoch": 0.7285823464290971, "grad_norm": 0.40625, "learning_rate": 5.472244718390775e-06, "loss": 1.8502, "step": 22582 }, { "epoch": 0.7286146102828935, "grad_norm": 0.361328125, "learning_rate": 5.471029279917902e-06, "loss": 1.8885, "step": 22583 }, { "epoch": 0.7286468741366898, "grad_norm": 0.380859375, "learning_rate": 5.4698139463330975e-06, "loss": 1.8002, "step": 22584 }, { "epoch": 0.7286791379904862, "grad_norm": 0.373046875, "learning_rate": 5.468598717649749e-06, "loss": 1.7814, "step": 22585 }, { "epoch": 0.7287114018442825, "grad_norm": 0.390625, "learning_rate": 5.467383593881228e-06, "loss": 1.7871, "step": 22586 }, { "epoch": 0.7287436656980789, "grad_norm": 0.435546875, "learning_rate": 5.466168575040906e-06, "loss": 1.8005, "step": 22587 }, { "epoch": 0.7287759295518752, "grad_norm": 0.37890625, "learning_rate": 5.464953661142163e-06, "loss": 1.8131, "step": 22588 }, { "epoch": 0.7288081934056716, "grad_norm": 0.396484375, "learning_rate": 5.4637388521983715e-06, "loss": 1.8146, "step": 22589 }, { "epoch": 0.7288404572594679, "grad_norm": 0.375, "learning_rate": 5.4625241482228964e-06, "loss": 1.8426, "step": 22590 }, { "epoch": 0.7288727211132643, "grad_norm": 0.462890625, "learning_rate": 5.461309549229118e-06, "loss": 1.7857, "step": 22591 }, { "epoch": 0.7289049849670606, "grad_norm": 0.3671875, "learning_rate": 5.460095055230402e-06, "loss": 1.8328, "step": 22592 }, { "epoch": 0.728937248820857, "grad_norm": 0.376953125, "learning_rate": 5.45888066624011e-06, "loss": 1.879, "step": 22593 }, { "epoch": 0.7289695126746533, "grad_norm": 0.37890625, "learning_rate": 5.4576663822716186e-06, "loss": 1.8246, "step": 22594 }, { "epoch": 0.7290017765284497, "grad_norm": 0.37109375, "learning_rate": 5.456452203338292e-06, "loss": 1.8092, "step": 22595 }, { "epoch": 0.7290340403822461, "grad_norm": 0.40625, "learning_rate": 5.455238129453487e-06, "loss": 1.8359, "step": 22596 }, { "epoch": 0.7290663042360424, "grad_norm": 0.369140625, "learning_rate": 5.454024160630578e-06, "loss": 1.8036, "step": 22597 }, { "epoch": 0.7290985680898387, "grad_norm": 0.361328125, "learning_rate": 5.452810296882922e-06, "loss": 1.8543, "step": 22598 }, { "epoch": 0.729130831943635, "grad_norm": 0.3828125, "learning_rate": 5.451596538223879e-06, "loss": 1.8026, "step": 22599 }, { "epoch": 0.7291630957974314, "grad_norm": 0.36328125, "learning_rate": 5.4503828846668145e-06, "loss": 1.8016, "step": 22600 }, { "epoch": 0.7291953596512277, "grad_norm": 0.4765625, "learning_rate": 5.449169336225083e-06, "loss": 1.8023, "step": 22601 }, { "epoch": 0.7292276235050241, "grad_norm": 0.3671875, "learning_rate": 5.447955892912041e-06, "loss": 1.8167, "step": 22602 }, { "epoch": 0.7292598873588204, "grad_norm": 0.3828125, "learning_rate": 5.446742554741052e-06, "loss": 1.8429, "step": 22603 }, { "epoch": 0.7292921512126168, "grad_norm": 0.3671875, "learning_rate": 5.445529321725467e-06, "loss": 1.7875, "step": 22604 }, { "epoch": 0.7293244150664131, "grad_norm": 0.369140625, "learning_rate": 5.444316193878637e-06, "loss": 1.8243, "step": 22605 }, { "epoch": 0.7293566789202095, "grad_norm": 0.37109375, "learning_rate": 5.443103171213924e-06, "loss": 1.8007, "step": 22606 }, { "epoch": 0.7293889427740058, "grad_norm": 0.384765625, "learning_rate": 5.441890253744675e-06, "loss": 1.8295, "step": 22607 }, { "epoch": 0.7294212066278022, "grad_norm": 0.404296875, "learning_rate": 5.440677441484236e-06, "loss": 1.8476, "step": 22608 }, { "epoch": 0.7294534704815985, "grad_norm": 0.39453125, "learning_rate": 5.439464734445967e-06, "loss": 1.7998, "step": 22609 }, { "epoch": 0.7294857343353949, "grad_norm": 0.359375, "learning_rate": 5.438252132643211e-06, "loss": 1.833, "step": 22610 }, { "epoch": 0.7295179981891912, "grad_norm": 0.38671875, "learning_rate": 5.437039636089316e-06, "loss": 1.8062, "step": 22611 }, { "epoch": 0.7295502620429876, "grad_norm": 0.39453125, "learning_rate": 5.4358272447976245e-06, "loss": 1.808, "step": 22612 }, { "epoch": 0.7295825258967839, "grad_norm": 0.376953125, "learning_rate": 5.4346149587814886e-06, "loss": 1.8392, "step": 22613 }, { "epoch": 0.7296147897505803, "grad_norm": 0.404296875, "learning_rate": 5.433402778054251e-06, "loss": 1.8337, "step": 22614 }, { "epoch": 0.7296470536043765, "grad_norm": 0.423828125, "learning_rate": 5.432190702629246e-06, "loss": 1.8175, "step": 22615 }, { "epoch": 0.7296793174581729, "grad_norm": 0.373046875, "learning_rate": 5.430978732519826e-06, "loss": 1.7813, "step": 22616 }, { "epoch": 0.7297115813119693, "grad_norm": 0.412109375, "learning_rate": 5.429766867739329e-06, "loss": 1.8156, "step": 22617 }, { "epoch": 0.7297438451657656, "grad_norm": 0.412109375, "learning_rate": 5.428555108301087e-06, "loss": 1.802, "step": 22618 }, { "epoch": 0.729776109019562, "grad_norm": 0.37890625, "learning_rate": 5.427343454218449e-06, "loss": 1.8316, "step": 22619 }, { "epoch": 0.7298083728733583, "grad_norm": 0.376953125, "learning_rate": 5.426131905504747e-06, "loss": 1.7936, "step": 22620 }, { "epoch": 0.7298406367271547, "grad_norm": 0.412109375, "learning_rate": 5.4249204621733125e-06, "loss": 1.8211, "step": 22621 }, { "epoch": 0.729872900580951, "grad_norm": 0.435546875, "learning_rate": 5.423709124237489e-06, "loss": 1.8538, "step": 22622 }, { "epoch": 0.7299051644347474, "grad_norm": 0.37890625, "learning_rate": 5.422497891710604e-06, "loss": 1.8048, "step": 22623 }, { "epoch": 0.7299374282885437, "grad_norm": 0.40625, "learning_rate": 5.421286764605987e-06, "loss": 1.846, "step": 22624 }, { "epoch": 0.7299696921423401, "grad_norm": 0.490234375, "learning_rate": 5.4200757429369805e-06, "loss": 1.8058, "step": 22625 }, { "epoch": 0.7300019559961364, "grad_norm": 0.38671875, "learning_rate": 5.418864826716906e-06, "loss": 1.8239, "step": 22626 }, { "epoch": 0.7300342198499328, "grad_norm": 0.470703125, "learning_rate": 5.417654015959089e-06, "loss": 1.8411, "step": 22627 }, { "epoch": 0.7300664837037291, "grad_norm": 0.419921875, "learning_rate": 5.4164433106768675e-06, "loss": 1.8182, "step": 22628 }, { "epoch": 0.7300987475575255, "grad_norm": 0.38671875, "learning_rate": 5.415232710883562e-06, "loss": 1.7783, "step": 22629 }, { "epoch": 0.7301310114113218, "grad_norm": 0.4296875, "learning_rate": 5.414022216592494e-06, "loss": 1.8225, "step": 22630 }, { "epoch": 0.7301632752651182, "grad_norm": 0.447265625, "learning_rate": 5.412811827816998e-06, "loss": 1.8466, "step": 22631 }, { "epoch": 0.7301955391189144, "grad_norm": 0.375, "learning_rate": 5.4116015445703925e-06, "loss": 1.8433, "step": 22632 }, { "epoch": 0.7302278029727108, "grad_norm": 0.3828125, "learning_rate": 5.410391366865992e-06, "loss": 1.815, "step": 22633 }, { "epoch": 0.7302600668265071, "grad_norm": 0.435546875, "learning_rate": 5.409181294717129e-06, "loss": 1.8489, "step": 22634 }, { "epoch": 0.7302923306803035, "grad_norm": 0.380859375, "learning_rate": 5.4079713281371184e-06, "loss": 1.8494, "step": 22635 }, { "epoch": 0.7303245945340998, "grad_norm": 0.384765625, "learning_rate": 5.406761467139272e-06, "loss": 1.8684, "step": 22636 }, { "epoch": 0.7303568583878962, "grad_norm": 0.39453125, "learning_rate": 5.405551711736921e-06, "loss": 1.827, "step": 22637 }, { "epoch": 0.7303891222416926, "grad_norm": 0.48046875, "learning_rate": 5.4043420619433695e-06, "loss": 1.8535, "step": 22638 }, { "epoch": 0.7304213860954889, "grad_norm": 0.369140625, "learning_rate": 5.403132517771935e-06, "loss": 1.8413, "step": 22639 }, { "epoch": 0.7304536499492853, "grad_norm": 0.37109375, "learning_rate": 5.401923079235936e-06, "loss": 1.8776, "step": 22640 }, { "epoch": 0.7304859138030816, "grad_norm": 0.419921875, "learning_rate": 5.400713746348684e-06, "loss": 1.8206, "step": 22641 }, { "epoch": 0.730518177656878, "grad_norm": 0.38671875, "learning_rate": 5.399504519123487e-06, "loss": 1.8574, "step": 22642 }, { "epoch": 0.7305504415106743, "grad_norm": 0.396484375, "learning_rate": 5.398295397573654e-06, "loss": 1.8101, "step": 22643 }, { "epoch": 0.7305827053644707, "grad_norm": 0.39453125, "learning_rate": 5.3970863817124996e-06, "loss": 1.88, "step": 22644 }, { "epoch": 0.730614969218267, "grad_norm": 0.50390625, "learning_rate": 5.39587747155333e-06, "loss": 1.7979, "step": 22645 }, { "epoch": 0.7306472330720634, "grad_norm": 0.423828125, "learning_rate": 5.394668667109446e-06, "loss": 1.828, "step": 22646 }, { "epoch": 0.7306794969258597, "grad_norm": 0.3671875, "learning_rate": 5.393459968394166e-06, "loss": 1.82, "step": 22647 }, { "epoch": 0.730711760779656, "grad_norm": 0.455078125, "learning_rate": 5.3922513754207834e-06, "loss": 1.829, "step": 22648 }, { "epoch": 0.7307440246334523, "grad_norm": 0.5, "learning_rate": 5.391042888202602e-06, "loss": 1.8667, "step": 22649 }, { "epoch": 0.7307762884872487, "grad_norm": 0.37109375, "learning_rate": 5.389834506752931e-06, "loss": 1.8076, "step": 22650 }, { "epoch": 0.730808552341045, "grad_norm": 0.396484375, "learning_rate": 5.388626231085068e-06, "loss": 1.8246, "step": 22651 }, { "epoch": 0.7308408161948414, "grad_norm": 0.484375, "learning_rate": 5.3874180612123084e-06, "loss": 1.8545, "step": 22652 }, { "epoch": 0.7308730800486377, "grad_norm": 0.40234375, "learning_rate": 5.386209997147957e-06, "loss": 1.868, "step": 22653 }, { "epoch": 0.7309053439024341, "grad_norm": 0.37890625, "learning_rate": 5.3850020389053114e-06, "loss": 1.7981, "step": 22654 }, { "epoch": 0.7309376077562304, "grad_norm": 0.4140625, "learning_rate": 5.3837941864976606e-06, "loss": 1.8385, "step": 22655 }, { "epoch": 0.7309698716100268, "grad_norm": 0.392578125, "learning_rate": 5.382586439938309e-06, "loss": 1.8733, "step": 22656 }, { "epoch": 0.7310021354638232, "grad_norm": 0.416015625, "learning_rate": 5.381378799240547e-06, "loss": 1.8334, "step": 22657 }, { "epoch": 0.7310343993176195, "grad_norm": 0.392578125, "learning_rate": 5.38017126441766e-06, "loss": 1.8455, "step": 22658 }, { "epoch": 0.7310666631714159, "grad_norm": 0.421875, "learning_rate": 5.378963835482953e-06, "loss": 1.8584, "step": 22659 }, { "epoch": 0.7310989270252122, "grad_norm": 0.3671875, "learning_rate": 5.37775651244971e-06, "loss": 1.8551, "step": 22660 }, { "epoch": 0.7311311908790086, "grad_norm": 0.400390625, "learning_rate": 5.376549295331214e-06, "loss": 1.9536, "step": 22661 }, { "epoch": 0.7311634547328049, "grad_norm": 0.416015625, "learning_rate": 5.3753421841407715e-06, "loss": 1.9852, "step": 22662 }, { "epoch": 0.7311957185866013, "grad_norm": 0.390625, "learning_rate": 5.3741351788916464e-06, "loss": 1.9294, "step": 22663 }, { "epoch": 0.7312279824403976, "grad_norm": 0.390625, "learning_rate": 5.372928279597137e-06, "loss": 1.9347, "step": 22664 }, { "epoch": 0.731260246294194, "grad_norm": 0.37109375, "learning_rate": 5.371721486270529e-06, "loss": 1.9471, "step": 22665 }, { "epoch": 0.7312925101479902, "grad_norm": 0.38671875, "learning_rate": 5.370514798925105e-06, "loss": 1.9648, "step": 22666 }, { "epoch": 0.7313247740017866, "grad_norm": 0.375, "learning_rate": 5.369308217574142e-06, "loss": 1.9096, "step": 22667 }, { "epoch": 0.7313570378555829, "grad_norm": 0.439453125, "learning_rate": 5.368101742230928e-06, "loss": 1.9443, "step": 22668 }, { "epoch": 0.7313893017093793, "grad_norm": 0.404296875, "learning_rate": 5.366895372908743e-06, "loss": 1.9572, "step": 22669 }, { "epoch": 0.7314215655631756, "grad_norm": 0.4296875, "learning_rate": 5.365689109620855e-06, "loss": 1.9262, "step": 22670 }, { "epoch": 0.731453829416972, "grad_norm": 0.39453125, "learning_rate": 5.364482952380556e-06, "loss": 1.9503, "step": 22671 }, { "epoch": 0.7314860932707683, "grad_norm": 0.392578125, "learning_rate": 5.363276901201116e-06, "loss": 1.9211, "step": 22672 }, { "epoch": 0.7315183571245647, "grad_norm": 0.376953125, "learning_rate": 5.362070956095811e-06, "loss": 1.9059, "step": 22673 }, { "epoch": 0.731550620978361, "grad_norm": 0.423828125, "learning_rate": 5.3608651170779094e-06, "loss": 1.924, "step": 22674 }, { "epoch": 0.7315828848321574, "grad_norm": 0.40234375, "learning_rate": 5.359659384160694e-06, "loss": 1.9542, "step": 22675 }, { "epoch": 0.7316151486859537, "grad_norm": 0.396484375, "learning_rate": 5.358453757357433e-06, "loss": 1.9257, "step": 22676 }, { "epoch": 0.7316474125397501, "grad_norm": 0.390625, "learning_rate": 5.35724823668139e-06, "loss": 1.9247, "step": 22677 }, { "epoch": 0.7316796763935465, "grad_norm": 0.380859375, "learning_rate": 5.356042822145848e-06, "loss": 1.9453, "step": 22678 }, { "epoch": 0.7317119402473428, "grad_norm": 0.384765625, "learning_rate": 5.354837513764065e-06, "loss": 1.9214, "step": 22679 }, { "epoch": 0.7317442041011392, "grad_norm": 0.39453125, "learning_rate": 5.353632311549308e-06, "loss": 1.9817, "step": 22680 }, { "epoch": 0.7317764679549355, "grad_norm": 0.41015625, "learning_rate": 5.352427215514851e-06, "loss": 1.9332, "step": 22681 }, { "epoch": 0.7318087318087318, "grad_norm": 0.3828125, "learning_rate": 5.351222225673955e-06, "loss": 1.9476, "step": 22682 }, { "epoch": 0.7318409956625281, "grad_norm": 0.392578125, "learning_rate": 5.350017342039877e-06, "loss": 1.9422, "step": 22683 }, { "epoch": 0.7318732595163245, "grad_norm": 0.400390625, "learning_rate": 5.348812564625891e-06, "loss": 1.9554, "step": 22684 }, { "epoch": 0.7319055233701208, "grad_norm": 0.384765625, "learning_rate": 5.3476078934452505e-06, "loss": 1.9775, "step": 22685 }, { "epoch": 0.7319377872239172, "grad_norm": 0.390625, "learning_rate": 5.3464033285112136e-06, "loss": 2.0041, "step": 22686 }, { "epoch": 0.7319700510777135, "grad_norm": 0.373046875, "learning_rate": 5.345198869837049e-06, "loss": 1.956, "step": 22687 }, { "epoch": 0.7320023149315099, "grad_norm": 0.41796875, "learning_rate": 5.343994517436008e-06, "loss": 1.9274, "step": 22688 }, { "epoch": 0.7320345787853062, "grad_norm": 0.373046875, "learning_rate": 5.342790271321344e-06, "loss": 1.9755, "step": 22689 }, { "epoch": 0.7320668426391026, "grad_norm": 0.40625, "learning_rate": 5.341586131506322e-06, "loss": 1.944, "step": 22690 }, { "epoch": 0.7320991064928989, "grad_norm": 0.375, "learning_rate": 5.34038209800419e-06, "loss": 2.0166, "step": 22691 }, { "epoch": 0.7321313703466953, "grad_norm": 0.380859375, "learning_rate": 5.339178170828198e-06, "loss": 1.9526, "step": 22692 }, { "epoch": 0.7321636342004916, "grad_norm": 0.39453125, "learning_rate": 5.337974349991611e-06, "loss": 1.9634, "step": 22693 }, { "epoch": 0.732195898054288, "grad_norm": 0.400390625, "learning_rate": 5.336770635507661e-06, "loss": 1.9388, "step": 22694 }, { "epoch": 0.7322281619080843, "grad_norm": 0.380859375, "learning_rate": 5.335567027389609e-06, "loss": 1.9491, "step": 22695 }, { "epoch": 0.7322604257618807, "grad_norm": 0.400390625, "learning_rate": 5.334363525650711e-06, "loss": 1.9376, "step": 22696 }, { "epoch": 0.7322926896156771, "grad_norm": 0.373046875, "learning_rate": 5.333160130304194e-06, "loss": 1.9786, "step": 22697 }, { "epoch": 0.7323249534694733, "grad_norm": 0.38671875, "learning_rate": 5.3319568413633165e-06, "loss": 1.9358, "step": 22698 }, { "epoch": 0.7323572173232697, "grad_norm": 0.375, "learning_rate": 5.3307536588413274e-06, "loss": 1.978, "step": 22699 }, { "epoch": 0.732389481177066, "grad_norm": 0.384765625, "learning_rate": 5.329550582751465e-06, "loss": 1.9393, "step": 22700 }, { "epoch": 0.7324217450308624, "grad_norm": 0.37109375, "learning_rate": 5.328347613106967e-06, "loss": 1.9388, "step": 22701 }, { "epoch": 0.7324540088846587, "grad_norm": 0.384765625, "learning_rate": 5.327144749921084e-06, "loss": 1.9635, "step": 22702 }, { "epoch": 0.7324862727384551, "grad_norm": 0.373046875, "learning_rate": 5.325941993207054e-06, "loss": 1.9603, "step": 22703 }, { "epoch": 0.7325185365922514, "grad_norm": 0.37890625, "learning_rate": 5.324739342978115e-06, "loss": 1.9771, "step": 22704 }, { "epoch": 0.7325508004460478, "grad_norm": 0.390625, "learning_rate": 5.323536799247497e-06, "loss": 1.9604, "step": 22705 }, { "epoch": 0.7325830642998441, "grad_norm": 0.4140625, "learning_rate": 5.322334362028452e-06, "loss": 1.9201, "step": 22706 }, { "epoch": 0.7326153281536405, "grad_norm": 0.404296875, "learning_rate": 5.321132031334204e-06, "loss": 1.9701, "step": 22707 }, { "epoch": 0.7326475920074368, "grad_norm": 0.39453125, "learning_rate": 5.319929807177989e-06, "loss": 1.9523, "step": 22708 }, { "epoch": 0.7326798558612332, "grad_norm": 0.3828125, "learning_rate": 5.318727689573046e-06, "loss": 1.9719, "step": 22709 }, { "epoch": 0.7327121197150295, "grad_norm": 0.396484375, "learning_rate": 5.317525678532605e-06, "loss": 2.0297, "step": 22710 }, { "epoch": 0.7327443835688259, "grad_norm": 0.388671875, "learning_rate": 5.316323774069887e-06, "loss": 1.9611, "step": 22711 }, { "epoch": 0.7327766474226222, "grad_norm": 0.396484375, "learning_rate": 5.3151219761981365e-06, "loss": 1.9966, "step": 22712 }, { "epoch": 0.7328089112764186, "grad_norm": 0.390625, "learning_rate": 5.313920284930575e-06, "loss": 1.9618, "step": 22713 }, { "epoch": 0.7328411751302149, "grad_norm": 0.384765625, "learning_rate": 5.312718700280427e-06, "loss": 1.9101, "step": 22714 }, { "epoch": 0.7328734389840112, "grad_norm": 0.41015625, "learning_rate": 5.311517222260925e-06, "loss": 1.9482, "step": 22715 }, { "epoch": 0.7329057028378075, "grad_norm": 0.376953125, "learning_rate": 5.310315850885291e-06, "loss": 1.943, "step": 22716 }, { "epoch": 0.7329379666916039, "grad_norm": 0.412109375, "learning_rate": 5.309114586166745e-06, "loss": 1.9605, "step": 22717 }, { "epoch": 0.7329702305454003, "grad_norm": 0.41796875, "learning_rate": 5.3079134281185165e-06, "loss": 1.9658, "step": 22718 }, { "epoch": 0.7330024943991966, "grad_norm": 0.38671875, "learning_rate": 5.306712376753825e-06, "loss": 1.9136, "step": 22719 }, { "epoch": 0.733034758252993, "grad_norm": 0.3828125, "learning_rate": 5.305511432085885e-06, "loss": 1.9568, "step": 22720 }, { "epoch": 0.7330670221067893, "grad_norm": 0.3828125, "learning_rate": 5.304310594127926e-06, "loss": 1.9404, "step": 22721 }, { "epoch": 0.7330992859605857, "grad_norm": 0.396484375, "learning_rate": 5.303109862893159e-06, "loss": 1.9508, "step": 22722 }, { "epoch": 0.733131549814382, "grad_norm": 0.384765625, "learning_rate": 5.301909238394798e-06, "loss": 1.9458, "step": 22723 }, { "epoch": 0.7331638136681784, "grad_norm": 0.39453125, "learning_rate": 5.300708720646073e-06, "loss": 1.9584, "step": 22724 }, { "epoch": 0.7331960775219747, "grad_norm": 0.396484375, "learning_rate": 5.299508309660176e-06, "loss": 1.9444, "step": 22725 }, { "epoch": 0.7332283413757711, "grad_norm": 0.384765625, "learning_rate": 5.298308005450334e-06, "loss": 1.9491, "step": 22726 }, { "epoch": 0.7332606052295674, "grad_norm": 0.400390625, "learning_rate": 5.297107808029768e-06, "loss": 1.9581, "step": 22727 }, { "epoch": 0.7332928690833638, "grad_norm": 0.404296875, "learning_rate": 5.295907717411667e-06, "loss": 1.9639, "step": 22728 }, { "epoch": 0.7333251329371601, "grad_norm": 0.3828125, "learning_rate": 5.2947077336092525e-06, "loss": 1.975, "step": 22729 }, { "epoch": 0.7333573967909565, "grad_norm": 0.388671875, "learning_rate": 5.293507856635739e-06, "loss": 1.9796, "step": 22730 }, { "epoch": 0.7333896606447527, "grad_norm": 0.390625, "learning_rate": 5.292308086504327e-06, "loss": 1.9274, "step": 22731 }, { "epoch": 0.7334219244985491, "grad_norm": 0.41015625, "learning_rate": 5.291108423228223e-06, "loss": 1.9706, "step": 22732 }, { "epoch": 0.7334541883523454, "grad_norm": 0.39453125, "learning_rate": 5.289908866820627e-06, "loss": 1.9262, "step": 22733 }, { "epoch": 0.7334864522061418, "grad_norm": 0.412109375, "learning_rate": 5.288709417294754e-06, "loss": 1.9353, "step": 22734 }, { "epoch": 0.7335187160599381, "grad_norm": 0.408203125, "learning_rate": 5.287510074663801e-06, "loss": 1.937, "step": 22735 }, { "epoch": 0.7335509799137345, "grad_norm": 0.404296875, "learning_rate": 5.286310838940964e-06, "loss": 1.9175, "step": 22736 }, { "epoch": 0.7335832437675308, "grad_norm": 0.39453125, "learning_rate": 5.285111710139453e-06, "loss": 1.9199, "step": 22737 }, { "epoch": 0.7336155076213272, "grad_norm": 0.390625, "learning_rate": 5.283912688272463e-06, "loss": 1.9182, "step": 22738 }, { "epoch": 0.7336477714751236, "grad_norm": 0.390625, "learning_rate": 5.282713773353189e-06, "loss": 1.9284, "step": 22739 }, { "epoch": 0.7336800353289199, "grad_norm": 0.400390625, "learning_rate": 5.281514965394833e-06, "loss": 1.9269, "step": 22740 }, { "epoch": 0.7337122991827163, "grad_norm": 0.400390625, "learning_rate": 5.280316264410589e-06, "loss": 1.9607, "step": 22741 }, { "epoch": 0.7337445630365126, "grad_norm": 0.404296875, "learning_rate": 5.279117670413646e-06, "loss": 1.9548, "step": 22742 }, { "epoch": 0.733776826890309, "grad_norm": 0.380859375, "learning_rate": 5.2779191834172075e-06, "loss": 1.9527, "step": 22743 }, { "epoch": 0.7338090907441053, "grad_norm": 0.3984375, "learning_rate": 5.2767208034344584e-06, "loss": 1.9623, "step": 22744 }, { "epoch": 0.7338413545979017, "grad_norm": 0.42578125, "learning_rate": 5.275522530478588e-06, "loss": 1.9251, "step": 22745 }, { "epoch": 0.733873618451698, "grad_norm": 0.37890625, "learning_rate": 5.274324364562793e-06, "loss": 1.9329, "step": 22746 }, { "epoch": 0.7339058823054944, "grad_norm": 0.443359375, "learning_rate": 5.2731263057002595e-06, "loss": 1.9145, "step": 22747 }, { "epoch": 0.7339381461592906, "grad_norm": 0.4296875, "learning_rate": 5.271928353904168e-06, "loss": 1.82, "step": 22748 }, { "epoch": 0.733970410013087, "grad_norm": 0.3828125, "learning_rate": 5.270730509187715e-06, "loss": 1.8722, "step": 22749 }, { "epoch": 0.7340026738668833, "grad_norm": 0.416015625, "learning_rate": 5.26953277156408e-06, "loss": 1.81, "step": 22750 }, { "epoch": 0.7340349377206797, "grad_norm": 0.37109375, "learning_rate": 5.268335141046444e-06, "loss": 1.8796, "step": 22751 }, { "epoch": 0.734067201574476, "grad_norm": 0.408203125, "learning_rate": 5.267137617647998e-06, "loss": 1.8867, "step": 22752 }, { "epoch": 0.7340994654282724, "grad_norm": 0.373046875, "learning_rate": 5.265940201381919e-06, "loss": 1.8908, "step": 22753 }, { "epoch": 0.7341317292820687, "grad_norm": 0.400390625, "learning_rate": 5.2647428922613824e-06, "loss": 1.845, "step": 22754 }, { "epoch": 0.7341639931358651, "grad_norm": 0.396484375, "learning_rate": 5.26354569029958e-06, "loss": 1.8019, "step": 22755 }, { "epoch": 0.7341962569896614, "grad_norm": 0.36328125, "learning_rate": 5.262348595509673e-06, "loss": 1.805, "step": 22756 }, { "epoch": 0.7342285208434578, "grad_norm": 0.3671875, "learning_rate": 5.261151607904847e-06, "loss": 1.8574, "step": 22757 }, { "epoch": 0.7342607846972542, "grad_norm": 0.3984375, "learning_rate": 5.259954727498288e-06, "loss": 1.8162, "step": 22758 }, { "epoch": 0.7342930485510505, "grad_norm": 0.361328125, "learning_rate": 5.25875795430315e-06, "loss": 1.8066, "step": 22759 }, { "epoch": 0.7343253124048469, "grad_norm": 0.40234375, "learning_rate": 5.257561288332614e-06, "loss": 1.8427, "step": 22760 }, { "epoch": 0.7343575762586432, "grad_norm": 0.373046875, "learning_rate": 5.256364729599867e-06, "loss": 1.8044, "step": 22761 }, { "epoch": 0.7343898401124396, "grad_norm": 0.376953125, "learning_rate": 5.255168278118054e-06, "loss": 1.8464, "step": 22762 }, { "epoch": 0.7344221039662359, "grad_norm": 0.408203125, "learning_rate": 5.253971933900365e-06, "loss": 1.8262, "step": 22763 }, { "epoch": 0.7344543678200323, "grad_norm": 0.41015625, "learning_rate": 5.252775696959956e-06, "loss": 1.7984, "step": 22764 }, { "epoch": 0.7344866316738285, "grad_norm": 0.427734375, "learning_rate": 5.251579567310004e-06, "loss": 1.8458, "step": 22765 }, { "epoch": 0.734518895527625, "grad_norm": 0.49609375, "learning_rate": 5.25038354496367e-06, "loss": 1.8418, "step": 22766 }, { "epoch": 0.7345511593814212, "grad_norm": 0.37109375, "learning_rate": 5.249187629934114e-06, "loss": 1.8408, "step": 22767 }, { "epoch": 0.7345834232352176, "grad_norm": 0.36328125, "learning_rate": 5.2479918222345135e-06, "loss": 1.8275, "step": 22768 }, { "epoch": 0.7346156870890139, "grad_norm": 0.4453125, "learning_rate": 5.24679612187802e-06, "loss": 1.8226, "step": 22769 }, { "epoch": 0.7346479509428103, "grad_norm": 0.451171875, "learning_rate": 5.245600528877794e-06, "loss": 1.833, "step": 22770 }, { "epoch": 0.7346802147966066, "grad_norm": 0.388671875, "learning_rate": 5.244405043247005e-06, "loss": 1.8453, "step": 22771 }, { "epoch": 0.734712478650403, "grad_norm": 0.55859375, "learning_rate": 5.243209664998806e-06, "loss": 1.8158, "step": 22772 }, { "epoch": 0.7347447425041993, "grad_norm": 0.390625, "learning_rate": 5.242014394146351e-06, "loss": 1.8848, "step": 22773 }, { "epoch": 0.7347770063579957, "grad_norm": 0.390625, "learning_rate": 5.240819230702805e-06, "loss": 1.9163, "step": 22774 }, { "epoch": 0.734809270211792, "grad_norm": 0.412109375, "learning_rate": 5.2396241746813215e-06, "loss": 1.9175, "step": 22775 }, { "epoch": 0.7348415340655884, "grad_norm": 0.453125, "learning_rate": 5.238429226095046e-06, "loss": 1.9184, "step": 22776 }, { "epoch": 0.7348737979193847, "grad_norm": 0.39453125, "learning_rate": 5.237234384957146e-06, "loss": 1.9313, "step": 22777 }, { "epoch": 0.7349060617731811, "grad_norm": 0.40234375, "learning_rate": 5.236039651280764e-06, "loss": 1.9071, "step": 22778 }, { "epoch": 0.7349383256269775, "grad_norm": 0.44140625, "learning_rate": 5.234845025079049e-06, "loss": 1.9167, "step": 22779 }, { "epoch": 0.7349705894807738, "grad_norm": 0.408203125, "learning_rate": 5.233650506365159e-06, "loss": 1.8797, "step": 22780 }, { "epoch": 0.7350028533345702, "grad_norm": 0.39453125, "learning_rate": 5.232456095152238e-06, "loss": 1.95, "step": 22781 }, { "epoch": 0.7350351171883664, "grad_norm": 0.416015625, "learning_rate": 5.231261791453429e-06, "loss": 1.9435, "step": 22782 }, { "epoch": 0.7350673810421628, "grad_norm": 0.40625, "learning_rate": 5.230067595281885e-06, "loss": 1.9387, "step": 22783 }, { "epoch": 0.7350996448959591, "grad_norm": 0.404296875, "learning_rate": 5.22887350665075e-06, "loss": 1.9063, "step": 22784 }, { "epoch": 0.7351319087497555, "grad_norm": 0.396484375, "learning_rate": 5.227679525573159e-06, "loss": 1.8975, "step": 22785 }, { "epoch": 0.7351641726035518, "grad_norm": 0.39453125, "learning_rate": 5.226485652062273e-06, "loss": 1.9292, "step": 22786 }, { "epoch": 0.7351964364573482, "grad_norm": 0.396484375, "learning_rate": 5.225291886131211e-06, "loss": 1.9014, "step": 22787 }, { "epoch": 0.7352287003111445, "grad_norm": 0.3828125, "learning_rate": 5.224098227793122e-06, "loss": 1.9308, "step": 22788 }, { "epoch": 0.7352609641649409, "grad_norm": 0.41015625, "learning_rate": 5.222904677061159e-06, "loss": 1.9138, "step": 22789 }, { "epoch": 0.7352932280187372, "grad_norm": 0.375, "learning_rate": 5.221711233948435e-06, "loss": 1.8765, "step": 22790 }, { "epoch": 0.7353254918725336, "grad_norm": 0.400390625, "learning_rate": 5.220517898468101e-06, "loss": 1.8575, "step": 22791 }, { "epoch": 0.7353577557263299, "grad_norm": 0.419921875, "learning_rate": 5.219324670633298e-06, "loss": 1.8363, "step": 22792 }, { "epoch": 0.7353900195801263, "grad_norm": 0.396484375, "learning_rate": 5.218131550457142e-06, "loss": 1.7697, "step": 22793 }, { "epoch": 0.7354222834339226, "grad_norm": 0.384765625, "learning_rate": 5.216938537952784e-06, "loss": 1.8239, "step": 22794 }, { "epoch": 0.735454547287719, "grad_norm": 0.37109375, "learning_rate": 5.215745633133347e-06, "loss": 1.843, "step": 22795 }, { "epoch": 0.7354868111415153, "grad_norm": 0.3671875, "learning_rate": 5.214552836011958e-06, "loss": 1.7972, "step": 22796 }, { "epoch": 0.7355190749953117, "grad_norm": 0.37109375, "learning_rate": 5.213360146601757e-06, "loss": 1.8074, "step": 22797 }, { "epoch": 0.7355513388491081, "grad_norm": 0.375, "learning_rate": 5.21216756491586e-06, "loss": 1.7955, "step": 22798 }, { "epoch": 0.7355836027029043, "grad_norm": 0.359375, "learning_rate": 5.210975090967406e-06, "loss": 1.825, "step": 22799 }, { "epoch": 0.7356158665567007, "grad_norm": 0.40234375, "learning_rate": 5.209782724769517e-06, "loss": 1.8233, "step": 22800 }, { "epoch": 0.735648130410497, "grad_norm": 0.384765625, "learning_rate": 5.208590466335309e-06, "loss": 1.8011, "step": 22801 }, { "epoch": 0.7356803942642934, "grad_norm": 0.380859375, "learning_rate": 5.2073983156779205e-06, "loss": 1.8077, "step": 22802 }, { "epoch": 0.7357126581180897, "grad_norm": 0.373046875, "learning_rate": 5.206206272810464e-06, "loss": 1.8197, "step": 22803 }, { "epoch": 0.7357449219718861, "grad_norm": 0.36328125, "learning_rate": 5.205014337746058e-06, "loss": 1.8195, "step": 22804 }, { "epoch": 0.7357771858256824, "grad_norm": 0.373046875, "learning_rate": 5.203822510497834e-06, "loss": 1.8416, "step": 22805 }, { "epoch": 0.7358094496794788, "grad_norm": 0.384765625, "learning_rate": 5.202630791078902e-06, "loss": 1.8526, "step": 22806 }, { "epoch": 0.7358417135332751, "grad_norm": 0.435546875, "learning_rate": 5.2014391795023784e-06, "loss": 1.8075, "step": 22807 }, { "epoch": 0.7358739773870715, "grad_norm": 0.390625, "learning_rate": 5.200247675781386e-06, "loss": 1.8645, "step": 22808 }, { "epoch": 0.7359062412408678, "grad_norm": 0.408203125, "learning_rate": 5.1990562799290395e-06, "loss": 1.7824, "step": 22809 }, { "epoch": 0.7359385050946642, "grad_norm": 0.392578125, "learning_rate": 5.197864991958443e-06, "loss": 1.7934, "step": 22810 }, { "epoch": 0.7359707689484605, "grad_norm": 0.392578125, "learning_rate": 5.196673811882722e-06, "loss": 1.8198, "step": 22811 }, { "epoch": 0.7360030328022569, "grad_norm": 0.4375, "learning_rate": 5.195482739714983e-06, "loss": 1.8164, "step": 22812 }, { "epoch": 0.7360352966560532, "grad_norm": 0.47265625, "learning_rate": 5.194291775468332e-06, "loss": 1.8034, "step": 22813 }, { "epoch": 0.7360675605098496, "grad_norm": 0.388671875, "learning_rate": 5.193100919155891e-06, "loss": 1.8205, "step": 22814 }, { "epoch": 0.7360998243636458, "grad_norm": 0.412109375, "learning_rate": 5.1919101707907515e-06, "loss": 1.8339, "step": 22815 }, { "epoch": 0.7361320882174422, "grad_norm": 0.46875, "learning_rate": 5.190719530386028e-06, "loss": 1.8732, "step": 22816 }, { "epoch": 0.7361643520712385, "grad_norm": 0.37890625, "learning_rate": 5.1895289979548375e-06, "loss": 1.8023, "step": 22817 }, { "epoch": 0.7361966159250349, "grad_norm": 0.474609375, "learning_rate": 5.188338573510261e-06, "loss": 1.803, "step": 22818 }, { "epoch": 0.7362288797788313, "grad_norm": 0.400390625, "learning_rate": 5.187148257065418e-06, "loss": 1.7871, "step": 22819 }, { "epoch": 0.7362611436326276, "grad_norm": 0.380859375, "learning_rate": 5.185958048633415e-06, "loss": 1.7935, "step": 22820 }, { "epoch": 0.736293407486424, "grad_norm": 0.48046875, "learning_rate": 5.184767948227334e-06, "loss": 1.8507, "step": 22821 }, { "epoch": 0.7363256713402203, "grad_norm": 0.400390625, "learning_rate": 5.183577955860289e-06, "loss": 1.8592, "step": 22822 }, { "epoch": 0.7363579351940167, "grad_norm": 0.384765625, "learning_rate": 5.182388071545384e-06, "loss": 1.8204, "step": 22823 }, { "epoch": 0.736390199047813, "grad_norm": 0.37109375, "learning_rate": 5.181198295295699e-06, "loss": 1.7927, "step": 22824 }, { "epoch": 0.7364224629016094, "grad_norm": 0.384765625, "learning_rate": 5.180008627124344e-06, "loss": 1.8468, "step": 22825 }, { "epoch": 0.7364547267554057, "grad_norm": 0.380859375, "learning_rate": 5.17881906704441e-06, "loss": 1.8348, "step": 22826 }, { "epoch": 0.7364869906092021, "grad_norm": 0.3671875, "learning_rate": 5.177629615068982e-06, "loss": 1.8219, "step": 22827 }, { "epoch": 0.7365192544629984, "grad_norm": 0.365234375, "learning_rate": 5.176440271211168e-06, "loss": 1.8519, "step": 22828 }, { "epoch": 0.7365515183167948, "grad_norm": 0.373046875, "learning_rate": 5.175251035484047e-06, "loss": 1.8401, "step": 22829 }, { "epoch": 0.7365837821705911, "grad_norm": 0.412109375, "learning_rate": 5.174061907900719e-06, "loss": 1.8316, "step": 22830 }, { "epoch": 0.7366160460243875, "grad_norm": 0.37109375, "learning_rate": 5.172872888474268e-06, "loss": 1.8125, "step": 22831 }, { "epoch": 0.7366483098781837, "grad_norm": 0.40234375, "learning_rate": 5.171683977217779e-06, "loss": 1.8424, "step": 22832 }, { "epoch": 0.7366805737319801, "grad_norm": 0.38671875, "learning_rate": 5.170495174144345e-06, "loss": 1.7813, "step": 22833 }, { "epoch": 0.7367128375857764, "grad_norm": 0.380859375, "learning_rate": 5.169306479267048e-06, "loss": 1.8212, "step": 22834 }, { "epoch": 0.7367451014395728, "grad_norm": 0.37109375, "learning_rate": 5.168117892598971e-06, "loss": 1.8199, "step": 22835 }, { "epoch": 0.7367773652933691, "grad_norm": 0.39453125, "learning_rate": 5.166929414153202e-06, "loss": 1.8012, "step": 22836 }, { "epoch": 0.7368096291471655, "grad_norm": 0.392578125, "learning_rate": 5.165741043942818e-06, "loss": 1.8535, "step": 22837 }, { "epoch": 0.7368418930009618, "grad_norm": 0.396484375, "learning_rate": 5.164552781980898e-06, "loss": 1.7719, "step": 22838 }, { "epoch": 0.7368741568547582, "grad_norm": 0.37890625, "learning_rate": 5.16336462828053e-06, "loss": 1.7994, "step": 22839 }, { "epoch": 0.7369064207085546, "grad_norm": 0.37890625, "learning_rate": 5.162176582854788e-06, "loss": 1.7775, "step": 22840 }, { "epoch": 0.7369386845623509, "grad_norm": 0.376953125, "learning_rate": 5.160988645716743e-06, "loss": 1.9408, "step": 22841 }, { "epoch": 0.7369709484161473, "grad_norm": 0.431640625, "learning_rate": 5.1598008168794825e-06, "loss": 1.9091, "step": 22842 }, { "epoch": 0.7370032122699436, "grad_norm": 0.431640625, "learning_rate": 5.158613096356073e-06, "loss": 1.9023, "step": 22843 }, { "epoch": 0.73703547612374, "grad_norm": 0.4453125, "learning_rate": 5.157425484159587e-06, "loss": 1.9042, "step": 22844 }, { "epoch": 0.7370677399775363, "grad_norm": 0.4453125, "learning_rate": 5.156237980303111e-06, "loss": 1.8649, "step": 22845 }, { "epoch": 0.7371000038313327, "grad_norm": 0.4453125, "learning_rate": 5.155050584799694e-06, "loss": 1.8738, "step": 22846 }, { "epoch": 0.737132267685129, "grad_norm": 0.408203125, "learning_rate": 5.153863297662417e-06, "loss": 1.9231, "step": 22847 }, { "epoch": 0.7371645315389254, "grad_norm": 0.43359375, "learning_rate": 5.152676118904361e-06, "loss": 1.9105, "step": 22848 }, { "epoch": 0.7371967953927216, "grad_norm": 0.421875, "learning_rate": 5.151489048538571e-06, "loss": 1.9041, "step": 22849 }, { "epoch": 0.737229059246518, "grad_norm": 0.40625, "learning_rate": 5.150302086578124e-06, "loss": 1.9052, "step": 22850 }, { "epoch": 0.7372613231003143, "grad_norm": 0.44921875, "learning_rate": 5.1491152330360955e-06, "loss": 1.9094, "step": 22851 }, { "epoch": 0.7372935869541107, "grad_norm": 0.484375, "learning_rate": 5.147928487925531e-06, "loss": 1.9325, "step": 22852 }, { "epoch": 0.737325850807907, "grad_norm": 0.4296875, "learning_rate": 5.146741851259499e-06, "loss": 1.9178, "step": 22853 }, { "epoch": 0.7373581146617034, "grad_norm": 0.435546875, "learning_rate": 5.145555323051076e-06, "loss": 1.859, "step": 22854 }, { "epoch": 0.7373903785154997, "grad_norm": 0.396484375, "learning_rate": 5.1443689033133e-06, "loss": 1.9037, "step": 22855 }, { "epoch": 0.7374226423692961, "grad_norm": 0.451171875, "learning_rate": 5.143182592059245e-06, "loss": 1.9322, "step": 22856 }, { "epoch": 0.7374549062230924, "grad_norm": 0.396484375, "learning_rate": 5.141996389301966e-06, "loss": 1.9065, "step": 22857 }, { "epoch": 0.7374871700768888, "grad_norm": 0.44921875, "learning_rate": 5.140810295054513e-06, "loss": 1.9237, "step": 22858 }, { "epoch": 0.7375194339306852, "grad_norm": 0.4921875, "learning_rate": 5.139624309329953e-06, "loss": 1.8799, "step": 22859 }, { "epoch": 0.7375516977844815, "grad_norm": 0.419921875, "learning_rate": 5.1384384321413345e-06, "loss": 1.9103, "step": 22860 }, { "epoch": 0.7375839616382779, "grad_norm": 0.416015625, "learning_rate": 5.137252663501705e-06, "loss": 1.896, "step": 22861 }, { "epoch": 0.7376162254920742, "grad_norm": 0.419921875, "learning_rate": 5.136067003424129e-06, "loss": 1.9091, "step": 22862 }, { "epoch": 0.7376484893458706, "grad_norm": 0.408203125, "learning_rate": 5.134881451921645e-06, "loss": 1.9034, "step": 22863 }, { "epoch": 0.7376807531996669, "grad_norm": 0.392578125, "learning_rate": 5.133696009007314e-06, "loss": 1.8999, "step": 22864 }, { "epoch": 0.7377130170534633, "grad_norm": 0.443359375, "learning_rate": 5.1325106746941816e-06, "loss": 1.896, "step": 22865 }, { "epoch": 0.7377452809072595, "grad_norm": 0.41015625, "learning_rate": 5.131325448995284e-06, "loss": 1.923, "step": 22866 }, { "epoch": 0.737777544761056, "grad_norm": 0.392578125, "learning_rate": 5.1301403319236844e-06, "loss": 1.9222, "step": 22867 }, { "epoch": 0.7378098086148522, "grad_norm": 0.400390625, "learning_rate": 5.128955323492418e-06, "loss": 1.9328, "step": 22868 }, { "epoch": 0.7378420724686486, "grad_norm": 0.41796875, "learning_rate": 5.127770423714524e-06, "loss": 1.9382, "step": 22869 }, { "epoch": 0.7378743363224449, "grad_norm": 0.390625, "learning_rate": 5.126585632603057e-06, "loss": 1.909, "step": 22870 }, { "epoch": 0.7379066001762413, "grad_norm": 0.396484375, "learning_rate": 5.125400950171051e-06, "loss": 1.9641, "step": 22871 }, { "epoch": 0.7379388640300376, "grad_norm": 0.380859375, "learning_rate": 5.124216376431544e-06, "loss": 1.9028, "step": 22872 }, { "epoch": 0.737971127883834, "grad_norm": 0.388671875, "learning_rate": 5.123031911397583e-06, "loss": 1.9226, "step": 22873 }, { "epoch": 0.7380033917376303, "grad_norm": 0.41796875, "learning_rate": 5.1218475550822e-06, "loss": 1.9056, "step": 22874 }, { "epoch": 0.7380356555914267, "grad_norm": 0.41015625, "learning_rate": 5.120663307498428e-06, "loss": 1.9176, "step": 22875 }, { "epoch": 0.738067919445223, "grad_norm": 0.42578125, "learning_rate": 5.119479168659318e-06, "loss": 1.9716, "step": 22876 }, { "epoch": 0.7381001832990194, "grad_norm": 0.40234375, "learning_rate": 5.118295138577881e-06, "loss": 1.9196, "step": 22877 }, { "epoch": 0.7381324471528157, "grad_norm": 0.3984375, "learning_rate": 5.1171112172671635e-06, "loss": 1.902, "step": 22878 }, { "epoch": 0.7381647110066121, "grad_norm": 0.416015625, "learning_rate": 5.115927404740206e-06, "loss": 1.8881, "step": 22879 }, { "epoch": 0.7381969748604085, "grad_norm": 0.41796875, "learning_rate": 5.114743701010018e-06, "loss": 1.8607, "step": 22880 }, { "epoch": 0.7382292387142048, "grad_norm": 0.392578125, "learning_rate": 5.11356010608964e-06, "loss": 1.9122, "step": 22881 }, { "epoch": 0.7382615025680012, "grad_norm": 0.396484375, "learning_rate": 5.11237661999211e-06, "loss": 1.8957, "step": 22882 }, { "epoch": 0.7382937664217974, "grad_norm": 0.408203125, "learning_rate": 5.111193242730434e-06, "loss": 1.9411, "step": 22883 }, { "epoch": 0.7383260302755938, "grad_norm": 0.390625, "learning_rate": 5.110009974317654e-06, "loss": 1.9489, "step": 22884 }, { "epoch": 0.7383582941293901, "grad_norm": 0.390625, "learning_rate": 5.1088268147667895e-06, "loss": 1.9156, "step": 22885 }, { "epoch": 0.7383905579831865, "grad_norm": 0.4296875, "learning_rate": 5.1076437640908595e-06, "loss": 1.9058, "step": 22886 }, { "epoch": 0.7384228218369828, "grad_norm": 0.4453125, "learning_rate": 5.106460822302893e-06, "loss": 1.8385, "step": 22887 }, { "epoch": 0.7384550856907792, "grad_norm": 0.419921875, "learning_rate": 5.105277989415909e-06, "loss": 1.9439, "step": 22888 }, { "epoch": 0.7384873495445755, "grad_norm": 0.38671875, "learning_rate": 5.104095265442922e-06, "loss": 1.9067, "step": 22889 }, { "epoch": 0.7385196133983719, "grad_norm": 0.458984375, "learning_rate": 5.102912650396958e-06, "loss": 1.9184, "step": 22890 }, { "epoch": 0.7385518772521682, "grad_norm": 0.43359375, "learning_rate": 5.1017301442910345e-06, "loss": 1.9606, "step": 22891 }, { "epoch": 0.7385841411059646, "grad_norm": 0.404296875, "learning_rate": 5.100547747138158e-06, "loss": 1.9201, "step": 22892 }, { "epoch": 0.7386164049597609, "grad_norm": 0.44140625, "learning_rate": 5.099365458951354e-06, "loss": 1.8637, "step": 22893 }, { "epoch": 0.7386486688135573, "grad_norm": 0.462890625, "learning_rate": 5.098183279743635e-06, "loss": 1.8314, "step": 22894 }, { "epoch": 0.7386809326673536, "grad_norm": 0.384765625, "learning_rate": 5.097001209528004e-06, "loss": 1.8162, "step": 22895 }, { "epoch": 0.73871319652115, "grad_norm": 0.384765625, "learning_rate": 5.095819248317486e-06, "loss": 1.8029, "step": 22896 }, { "epoch": 0.7387454603749463, "grad_norm": 0.396484375, "learning_rate": 5.094637396125077e-06, "loss": 1.8431, "step": 22897 }, { "epoch": 0.7387777242287427, "grad_norm": 0.44921875, "learning_rate": 5.093455652963801e-06, "loss": 1.8217, "step": 22898 }, { "epoch": 0.738809988082539, "grad_norm": 0.43359375, "learning_rate": 5.092274018846655e-06, "loss": 1.8155, "step": 22899 }, { "epoch": 0.7388422519363353, "grad_norm": 0.404296875, "learning_rate": 5.091092493786647e-06, "loss": 1.8137, "step": 22900 }, { "epoch": 0.7388745157901317, "grad_norm": 0.392578125, "learning_rate": 5.089911077796788e-06, "loss": 1.8559, "step": 22901 }, { "epoch": 0.738906779643928, "grad_norm": 0.376953125, "learning_rate": 5.088729770890079e-06, "loss": 1.8834, "step": 22902 }, { "epoch": 0.7389390434977244, "grad_norm": 0.400390625, "learning_rate": 5.087548573079516e-06, "loss": 1.834, "step": 22903 }, { "epoch": 0.7389713073515207, "grad_norm": 0.37890625, "learning_rate": 5.0863674843781125e-06, "loss": 1.8202, "step": 22904 }, { "epoch": 0.7390035712053171, "grad_norm": 0.470703125, "learning_rate": 5.085186504798864e-06, "loss": 1.8992, "step": 22905 }, { "epoch": 0.7390358350591134, "grad_norm": 0.4140625, "learning_rate": 5.084005634354766e-06, "loss": 1.9481, "step": 22906 }, { "epoch": 0.7390680989129098, "grad_norm": 0.43359375, "learning_rate": 5.082824873058828e-06, "loss": 1.9282, "step": 22907 }, { "epoch": 0.7391003627667061, "grad_norm": 0.42578125, "learning_rate": 5.08164422092403e-06, "loss": 1.9134, "step": 22908 }, { "epoch": 0.7391326266205025, "grad_norm": 0.423828125, "learning_rate": 5.080463677963378e-06, "loss": 1.8915, "step": 22909 }, { "epoch": 0.7391648904742988, "grad_norm": 0.439453125, "learning_rate": 5.079283244189875e-06, "loss": 1.871, "step": 22910 }, { "epoch": 0.7391971543280952, "grad_norm": 0.439453125, "learning_rate": 5.078102919616495e-06, "loss": 1.9221, "step": 22911 }, { "epoch": 0.7392294181818915, "grad_norm": 0.416015625, "learning_rate": 5.07692270425624e-06, "loss": 1.9207, "step": 22912 }, { "epoch": 0.7392616820356879, "grad_norm": 0.44921875, "learning_rate": 5.07574259812211e-06, "loss": 1.9157, "step": 22913 }, { "epoch": 0.7392939458894842, "grad_norm": 0.41796875, "learning_rate": 5.0745626012270775e-06, "loss": 1.8823, "step": 22914 }, { "epoch": 0.7393262097432806, "grad_norm": 0.427734375, "learning_rate": 5.0733827135841434e-06, "loss": 1.963, "step": 22915 }, { "epoch": 0.7393584735970768, "grad_norm": 0.451171875, "learning_rate": 5.072202935206292e-06, "loss": 2.0053, "step": 22916 }, { "epoch": 0.7393907374508732, "grad_norm": 0.435546875, "learning_rate": 5.071023266106502e-06, "loss": 1.9725, "step": 22917 }, { "epoch": 0.7394230013046695, "grad_norm": 0.443359375, "learning_rate": 5.069843706297771e-06, "loss": 1.9893, "step": 22918 }, { "epoch": 0.7394552651584659, "grad_norm": 0.41796875, "learning_rate": 5.068664255793077e-06, "loss": 2.0192, "step": 22919 }, { "epoch": 0.7394875290122623, "grad_norm": 0.490234375, "learning_rate": 5.067484914605397e-06, "loss": 1.9603, "step": 22920 }, { "epoch": 0.7395197928660586, "grad_norm": 0.451171875, "learning_rate": 5.066305682747721e-06, "loss": 1.9911, "step": 22921 }, { "epoch": 0.739552056719855, "grad_norm": 0.47265625, "learning_rate": 5.065126560233027e-06, "loss": 2.0346, "step": 22922 }, { "epoch": 0.7395843205736513, "grad_norm": 0.439453125, "learning_rate": 5.063947547074287e-06, "loss": 2.0134, "step": 22923 }, { "epoch": 0.7396165844274477, "grad_norm": 0.419921875, "learning_rate": 5.06276864328449e-06, "loss": 1.9932, "step": 22924 }, { "epoch": 0.739648848281244, "grad_norm": 0.41796875, "learning_rate": 5.0615898488766075e-06, "loss": 1.998, "step": 22925 }, { "epoch": 0.7396811121350404, "grad_norm": 0.43359375, "learning_rate": 5.060411163863607e-06, "loss": 2.0263, "step": 22926 }, { "epoch": 0.7397133759888367, "grad_norm": 0.4140625, "learning_rate": 5.059232588258477e-06, "loss": 1.961, "step": 22927 }, { "epoch": 0.7397456398426331, "grad_norm": 0.447265625, "learning_rate": 5.058054122074182e-06, "loss": 2.0305, "step": 22928 }, { "epoch": 0.7397779036964294, "grad_norm": 0.388671875, "learning_rate": 5.056875765323689e-06, "loss": 2.014, "step": 22929 }, { "epoch": 0.7398101675502258, "grad_norm": 0.546875, "learning_rate": 5.055697518019981e-06, "loss": 2.1495, "step": 22930 }, { "epoch": 0.7398424314040221, "grad_norm": 0.4921875, "learning_rate": 5.054519380176015e-06, "loss": 2.191, "step": 22931 }, { "epoch": 0.7398746952578185, "grad_norm": 0.4921875, "learning_rate": 5.05334135180477e-06, "loss": 2.1881, "step": 22932 }, { "epoch": 0.7399069591116147, "grad_norm": 0.58984375, "learning_rate": 5.052163432919208e-06, "loss": 2.1475, "step": 22933 }, { "epoch": 0.7399392229654111, "grad_norm": 0.59765625, "learning_rate": 5.050985623532289e-06, "loss": 2.1762, "step": 22934 }, { "epoch": 0.7399714868192074, "grad_norm": 0.4375, "learning_rate": 5.049807923656988e-06, "loss": 2.1442, "step": 22935 }, { "epoch": 0.7400037506730038, "grad_norm": 0.478515625, "learning_rate": 5.048630333306263e-06, "loss": 2.1591, "step": 22936 }, { "epoch": 0.7400360145268001, "grad_norm": 0.466796875, "learning_rate": 5.047452852493071e-06, "loss": 2.2012, "step": 22937 }, { "epoch": 0.7400682783805965, "grad_norm": 0.515625, "learning_rate": 5.046275481230388e-06, "loss": 2.1532, "step": 22938 }, { "epoch": 0.7401005422343928, "grad_norm": 0.54296875, "learning_rate": 5.045098219531154e-06, "loss": 2.1553, "step": 22939 }, { "epoch": 0.7401328060881892, "grad_norm": 0.466796875, "learning_rate": 5.043921067408338e-06, "loss": 2.1642, "step": 22940 }, { "epoch": 0.7401650699419856, "grad_norm": 0.56640625, "learning_rate": 5.042744024874904e-06, "loss": 2.1472, "step": 22941 }, { "epoch": 0.7401973337957819, "grad_norm": 0.51171875, "learning_rate": 5.041567091943793e-06, "loss": 2.1975, "step": 22942 }, { "epoch": 0.7402295976495783, "grad_norm": 0.455078125, "learning_rate": 5.040390268627967e-06, "loss": 2.2028, "step": 22943 }, { "epoch": 0.7402618615033746, "grad_norm": 0.451171875, "learning_rate": 5.039213554940389e-06, "loss": 2.2062, "step": 22944 }, { "epoch": 0.740294125357171, "grad_norm": 0.455078125, "learning_rate": 5.038036950893993e-06, "loss": 2.1989, "step": 22945 }, { "epoch": 0.7403263892109673, "grad_norm": 0.490234375, "learning_rate": 5.036860456501743e-06, "loss": 2.1617, "step": 22946 }, { "epoch": 0.7403586530647637, "grad_norm": 0.4375, "learning_rate": 5.035684071776588e-06, "loss": 2.1476, "step": 22947 }, { "epoch": 0.74039091691856, "grad_norm": 0.57421875, "learning_rate": 5.034507796731468e-06, "loss": 2.2117, "step": 22948 }, { "epoch": 0.7404231807723564, "grad_norm": 0.64453125, "learning_rate": 5.033331631379342e-06, "loss": 2.189, "step": 22949 }, { "epoch": 0.7404554446261526, "grad_norm": 0.4375, "learning_rate": 5.032155575733151e-06, "loss": 2.2025, "step": 22950 }, { "epoch": 0.740487708479949, "grad_norm": 0.75390625, "learning_rate": 5.030979629805836e-06, "loss": 2.2014, "step": 22951 }, { "epoch": 0.7405199723337453, "grad_norm": 0.62109375, "learning_rate": 5.02980379361035e-06, "loss": 2.1503, "step": 22952 }, { "epoch": 0.7405522361875417, "grad_norm": 0.51953125, "learning_rate": 5.028628067159633e-06, "loss": 2.0912, "step": 22953 }, { "epoch": 0.740584500041338, "grad_norm": 0.478515625, "learning_rate": 5.027452450466618e-06, "loss": 2.1276, "step": 22954 }, { "epoch": 0.7406167638951344, "grad_norm": 0.59375, "learning_rate": 5.026276943544259e-06, "loss": 2.1083, "step": 22955 }, { "epoch": 0.7406490277489307, "grad_norm": 0.474609375, "learning_rate": 5.0251015464054855e-06, "loss": 2.1028, "step": 22956 }, { "epoch": 0.7406812916027271, "grad_norm": 0.5625, "learning_rate": 5.023926259063234e-06, "loss": 2.1166, "step": 22957 }, { "epoch": 0.7407135554565234, "grad_norm": 0.455078125, "learning_rate": 5.022751081530451e-06, "loss": 2.1287, "step": 22958 }, { "epoch": 0.7407458193103198, "grad_norm": 0.52734375, "learning_rate": 5.021576013820068e-06, "loss": 2.1195, "step": 22959 }, { "epoch": 0.7407780831641162, "grad_norm": 0.482421875, "learning_rate": 5.0204010559450105e-06, "loss": 2.1123, "step": 22960 }, { "epoch": 0.7408103470179125, "grad_norm": 0.5078125, "learning_rate": 5.019226207918225e-06, "loss": 2.1109, "step": 22961 }, { "epoch": 0.7408426108717089, "grad_norm": 0.470703125, "learning_rate": 5.018051469752632e-06, "loss": 2.1117, "step": 22962 }, { "epoch": 0.7408748747255052, "grad_norm": 0.455078125, "learning_rate": 5.016876841461173e-06, "loss": 2.1065, "step": 22963 }, { "epoch": 0.7409071385793016, "grad_norm": 0.51171875, "learning_rate": 5.015702323056771e-06, "loss": 2.0932, "step": 22964 }, { "epoch": 0.7409394024330979, "grad_norm": 0.4296875, "learning_rate": 5.0145279145523505e-06, "loss": 2.0716, "step": 22965 }, { "epoch": 0.7409716662868943, "grad_norm": 0.47265625, "learning_rate": 5.013353615960852e-06, "loss": 2.093, "step": 22966 }, { "epoch": 0.7410039301406905, "grad_norm": 0.4296875, "learning_rate": 5.012179427295184e-06, "loss": 2.1356, "step": 22967 }, { "epoch": 0.741036193994487, "grad_norm": 0.43359375, "learning_rate": 5.01100534856828e-06, "loss": 2.0061, "step": 22968 }, { "epoch": 0.7410684578482832, "grad_norm": 0.416015625, "learning_rate": 5.009831379793072e-06, "loss": 1.9524, "step": 22969 }, { "epoch": 0.7411007217020796, "grad_norm": 0.41796875, "learning_rate": 5.0086575209824626e-06, "loss": 1.9379, "step": 22970 }, { "epoch": 0.7411329855558759, "grad_norm": 0.38671875, "learning_rate": 5.007483772149385e-06, "loss": 1.9393, "step": 22971 }, { "epoch": 0.7411652494096723, "grad_norm": 0.396484375, "learning_rate": 5.006310133306766e-06, "loss": 1.9398, "step": 22972 }, { "epoch": 0.7411975132634686, "grad_norm": 0.380859375, "learning_rate": 5.005136604467506e-06, "loss": 1.8799, "step": 22973 }, { "epoch": 0.741229777117265, "grad_norm": 0.3828125, "learning_rate": 5.003963185644532e-06, "loss": 1.8588, "step": 22974 }, { "epoch": 0.7412620409710613, "grad_norm": 0.39453125, "learning_rate": 5.002789876850772e-06, "loss": 1.8431, "step": 22975 }, { "epoch": 0.7412943048248577, "grad_norm": 0.40234375, "learning_rate": 5.001616678099117e-06, "loss": 1.9199, "step": 22976 }, { "epoch": 0.741326568678654, "grad_norm": 0.388671875, "learning_rate": 5.000443589402497e-06, "loss": 1.9059, "step": 22977 }, { "epoch": 0.7413588325324504, "grad_norm": 0.396484375, "learning_rate": 4.9992706107738235e-06, "loss": 1.9084, "step": 22978 }, { "epoch": 0.7413910963862467, "grad_norm": 0.39453125, "learning_rate": 4.998097742225997e-06, "loss": 1.9205, "step": 22979 }, { "epoch": 0.7414233602400431, "grad_norm": 0.40625, "learning_rate": 4.996924983771942e-06, "loss": 1.8784, "step": 22980 }, { "epoch": 0.7414556240938395, "grad_norm": 0.375, "learning_rate": 4.995752335424558e-06, "loss": 1.8773, "step": 22981 }, { "epoch": 0.7414878879476358, "grad_norm": 0.36328125, "learning_rate": 4.994579797196751e-06, "loss": 1.8778, "step": 22982 }, { "epoch": 0.7415201518014322, "grad_norm": 0.3984375, "learning_rate": 4.993407369101438e-06, "loss": 1.8935, "step": 22983 }, { "epoch": 0.7415524156552284, "grad_norm": 0.36328125, "learning_rate": 4.992235051151519e-06, "loss": 1.8686, "step": 22984 }, { "epoch": 0.7415846795090248, "grad_norm": 0.384765625, "learning_rate": 4.9910628433598894e-06, "loss": 1.8832, "step": 22985 }, { "epoch": 0.7416169433628211, "grad_norm": 0.40234375, "learning_rate": 4.989890745739467e-06, "loss": 1.8662, "step": 22986 }, { "epoch": 0.7416492072166175, "grad_norm": 0.400390625, "learning_rate": 4.9887187583031445e-06, "loss": 1.9249, "step": 22987 }, { "epoch": 0.7416814710704138, "grad_norm": 0.369140625, "learning_rate": 4.987546881063818e-06, "loss": 1.8737, "step": 22988 }, { "epoch": 0.7417137349242102, "grad_norm": 0.376953125, "learning_rate": 4.9863751140344e-06, "loss": 1.8917, "step": 22989 }, { "epoch": 0.7417459987780065, "grad_norm": 0.423828125, "learning_rate": 4.985203457227779e-06, "loss": 1.8966, "step": 22990 }, { "epoch": 0.7417782626318029, "grad_norm": 0.361328125, "learning_rate": 4.9840319106568495e-06, "loss": 1.8937, "step": 22991 }, { "epoch": 0.7418105264855992, "grad_norm": 0.369140625, "learning_rate": 4.982860474334516e-06, "loss": 1.8852, "step": 22992 }, { "epoch": 0.7418427903393956, "grad_norm": 0.37109375, "learning_rate": 4.981689148273668e-06, "loss": 1.907, "step": 22993 }, { "epoch": 0.7418750541931919, "grad_norm": 0.37109375, "learning_rate": 4.980517932487195e-06, "loss": 1.8825, "step": 22994 }, { "epoch": 0.7419073180469883, "grad_norm": 0.373046875, "learning_rate": 4.979346826987997e-06, "loss": 1.8653, "step": 22995 }, { "epoch": 0.7419395819007846, "grad_norm": 0.369140625, "learning_rate": 4.9781758317889554e-06, "loss": 1.8822, "step": 22996 }, { "epoch": 0.741971845754581, "grad_norm": 0.380859375, "learning_rate": 4.9770049469029734e-06, "loss": 1.8627, "step": 22997 }, { "epoch": 0.7420041096083773, "grad_norm": 0.388671875, "learning_rate": 4.975834172342923e-06, "loss": 1.87, "step": 22998 }, { "epoch": 0.7420363734621737, "grad_norm": 0.361328125, "learning_rate": 4.974663508121696e-06, "loss": 1.839, "step": 22999 }, { "epoch": 0.74206863731597, "grad_norm": 0.3515625, "learning_rate": 4.973492954252192e-06, "loss": 1.8755, "step": 23000 }, { "epoch": 0.7421009011697663, "grad_norm": 0.41015625, "learning_rate": 4.972322510747276e-06, "loss": 1.9989, "step": 23001 }, { "epoch": 0.7421331650235627, "grad_norm": 0.37109375, "learning_rate": 4.971152177619838e-06, "loss": 1.9639, "step": 23002 }, { "epoch": 0.742165428877359, "grad_norm": 0.39453125, "learning_rate": 4.969981954882772e-06, "loss": 2.0031, "step": 23003 }, { "epoch": 0.7421976927311554, "grad_norm": 0.40625, "learning_rate": 4.968811842548941e-06, "loss": 2.0082, "step": 23004 }, { "epoch": 0.7422299565849517, "grad_norm": 0.38671875, "learning_rate": 4.967641840631232e-06, "loss": 1.9643, "step": 23005 }, { "epoch": 0.7422622204387481, "grad_norm": 0.38671875, "learning_rate": 4.966471949142534e-06, "loss": 1.9199, "step": 23006 }, { "epoch": 0.7422944842925444, "grad_norm": 0.39453125, "learning_rate": 4.9653021680957045e-06, "loss": 1.9956, "step": 23007 }, { "epoch": 0.7423267481463408, "grad_norm": 0.41796875, "learning_rate": 4.964132497503635e-06, "loss": 1.9647, "step": 23008 }, { "epoch": 0.7423590120001371, "grad_norm": 0.408203125, "learning_rate": 4.962962937379196e-06, "loss": 2.0183, "step": 23009 }, { "epoch": 0.7423912758539335, "grad_norm": 0.4140625, "learning_rate": 4.961793487735256e-06, "loss": 1.9835, "step": 23010 }, { "epoch": 0.7424235397077298, "grad_norm": 0.396484375, "learning_rate": 4.960624148584696e-06, "loss": 1.9789, "step": 23011 }, { "epoch": 0.7424558035615262, "grad_norm": 0.37890625, "learning_rate": 4.959454919940383e-06, "loss": 1.9519, "step": 23012 }, { "epoch": 0.7424880674153225, "grad_norm": 0.40625, "learning_rate": 4.958285801815183e-06, "loss": 1.9793, "step": 23013 }, { "epoch": 0.7425203312691189, "grad_norm": 0.40234375, "learning_rate": 4.957116794221973e-06, "loss": 1.9871, "step": 23014 }, { "epoch": 0.7425525951229152, "grad_norm": 0.40234375, "learning_rate": 4.955947897173617e-06, "loss": 1.9774, "step": 23015 }, { "epoch": 0.7425848589767116, "grad_norm": 0.3984375, "learning_rate": 4.9547791106829764e-06, "loss": 1.9908, "step": 23016 }, { "epoch": 0.7426171228305078, "grad_norm": 0.41796875, "learning_rate": 4.953610434762925e-06, "loss": 1.9953, "step": 23017 }, { "epoch": 0.7426493866843042, "grad_norm": 0.431640625, "learning_rate": 4.952441869426323e-06, "loss": 1.9454, "step": 23018 }, { "epoch": 0.7426816505381005, "grad_norm": 0.431640625, "learning_rate": 4.951273414686028e-06, "loss": 1.9543, "step": 23019 }, { "epoch": 0.7427139143918969, "grad_norm": 0.396484375, "learning_rate": 4.950105070554911e-06, "loss": 1.9733, "step": 23020 }, { "epoch": 0.7427461782456933, "grad_norm": 0.400390625, "learning_rate": 4.948936837045828e-06, "loss": 1.9831, "step": 23021 }, { "epoch": 0.7427784420994896, "grad_norm": 0.3984375, "learning_rate": 4.9477687141716335e-06, "loss": 2.0048, "step": 23022 }, { "epoch": 0.742810705953286, "grad_norm": 0.39453125, "learning_rate": 4.946600701945192e-06, "loss": 1.9532, "step": 23023 }, { "epoch": 0.7428429698070823, "grad_norm": 0.384765625, "learning_rate": 4.94543280037936e-06, "loss": 1.9738, "step": 23024 }, { "epoch": 0.7428752336608787, "grad_norm": 0.419921875, "learning_rate": 4.944265009486985e-06, "loss": 1.9534, "step": 23025 }, { "epoch": 0.742907497514675, "grad_norm": 0.388671875, "learning_rate": 4.943097329280932e-06, "loss": 1.9641, "step": 23026 }, { "epoch": 0.7429397613684714, "grad_norm": 0.392578125, "learning_rate": 4.94192975977405e-06, "loss": 1.9648, "step": 23027 }, { "epoch": 0.7429720252222677, "grad_norm": 0.439453125, "learning_rate": 4.940762300979187e-06, "loss": 1.9269, "step": 23028 }, { "epoch": 0.7430042890760641, "grad_norm": 0.408203125, "learning_rate": 4.939594952909193e-06, "loss": 1.9499, "step": 23029 }, { "epoch": 0.7430365529298604, "grad_norm": 0.423828125, "learning_rate": 4.93842771557692e-06, "loss": 1.9575, "step": 23030 }, { "epoch": 0.7430688167836568, "grad_norm": 0.4140625, "learning_rate": 4.937260588995226e-06, "loss": 1.9333, "step": 23031 }, { "epoch": 0.7431010806374531, "grad_norm": 0.40234375, "learning_rate": 4.936093573176938e-06, "loss": 1.9397, "step": 23032 }, { "epoch": 0.7431333444912495, "grad_norm": 0.421875, "learning_rate": 4.934926668134914e-06, "loss": 1.9479, "step": 23033 }, { "epoch": 0.7431656083450457, "grad_norm": 0.435546875, "learning_rate": 4.933759873882005e-06, "loss": 1.9592, "step": 23034 }, { "epoch": 0.7431978721988421, "grad_norm": 0.3828125, "learning_rate": 4.9325931904310365e-06, "loss": 1.9493, "step": 23035 }, { "epoch": 0.7432301360526384, "grad_norm": 0.376953125, "learning_rate": 4.931426617794859e-06, "loss": 1.9498, "step": 23036 }, { "epoch": 0.7432623999064348, "grad_norm": 0.392578125, "learning_rate": 4.9302601559863254e-06, "loss": 1.971, "step": 23037 }, { "epoch": 0.7432946637602311, "grad_norm": 0.419921875, "learning_rate": 4.929093805018253e-06, "loss": 1.9232, "step": 23038 }, { "epoch": 0.7433269276140275, "grad_norm": 0.400390625, "learning_rate": 4.927927564903497e-06, "loss": 1.9337, "step": 23039 }, { "epoch": 0.7433591914678238, "grad_norm": 0.39453125, "learning_rate": 4.9267614356548886e-06, "loss": 1.9819, "step": 23040 }, { "epoch": 0.7433914553216202, "grad_norm": 0.462890625, "learning_rate": 4.925595417285258e-06, "loss": 1.967, "step": 23041 }, { "epoch": 0.7434237191754166, "grad_norm": 0.384765625, "learning_rate": 4.924429509807451e-06, "loss": 1.9234, "step": 23042 }, { "epoch": 0.7434559830292129, "grad_norm": 0.44140625, "learning_rate": 4.923263713234295e-06, "loss": 1.9399, "step": 23043 }, { "epoch": 0.7434882468830093, "grad_norm": 0.4609375, "learning_rate": 4.9220980275786205e-06, "loss": 1.9414, "step": 23044 }, { "epoch": 0.7435205107368056, "grad_norm": 0.41796875, "learning_rate": 4.920932452853265e-06, "loss": 1.9743, "step": 23045 }, { "epoch": 0.743552774590602, "grad_norm": 0.390625, "learning_rate": 4.919766989071053e-06, "loss": 1.9495, "step": 23046 }, { "epoch": 0.7435850384443983, "grad_norm": 0.40625, "learning_rate": 4.91860163624481e-06, "loss": 1.9472, "step": 23047 }, { "epoch": 0.7436173022981947, "grad_norm": 0.46875, "learning_rate": 4.917436394387375e-06, "loss": 1.9475, "step": 23048 }, { "epoch": 0.743649566151991, "grad_norm": 0.37890625, "learning_rate": 4.916271263511565e-06, "loss": 1.9861, "step": 23049 }, { "epoch": 0.7436818300057874, "grad_norm": 0.40234375, "learning_rate": 4.915106243630201e-06, "loss": 1.9582, "step": 23050 }, { "epoch": 0.7437140938595836, "grad_norm": 0.396484375, "learning_rate": 4.913941334756119e-06, "loss": 1.9335, "step": 23051 }, { "epoch": 0.74374635771338, "grad_norm": 0.447265625, "learning_rate": 4.912776536902134e-06, "loss": 1.9921, "step": 23052 }, { "epoch": 0.7437786215671763, "grad_norm": 0.41015625, "learning_rate": 4.9116118500810646e-06, "loss": 1.9303, "step": 23053 }, { "epoch": 0.7438108854209727, "grad_norm": 0.37890625, "learning_rate": 4.910447274305738e-06, "loss": 1.9657, "step": 23054 }, { "epoch": 0.743843149274769, "grad_norm": 0.443359375, "learning_rate": 4.909282809588971e-06, "loss": 1.9946, "step": 23055 }, { "epoch": 0.7438754131285654, "grad_norm": 0.4140625, "learning_rate": 4.908118455943575e-06, "loss": 1.9389, "step": 23056 }, { "epoch": 0.7439076769823617, "grad_norm": 0.404296875, "learning_rate": 4.906954213382376e-06, "loss": 1.945, "step": 23057 }, { "epoch": 0.7439399408361581, "grad_norm": 0.41015625, "learning_rate": 4.905790081918184e-06, "loss": 1.93, "step": 23058 }, { "epoch": 0.7439722046899544, "grad_norm": 0.447265625, "learning_rate": 4.904626061563812e-06, "loss": 1.9528, "step": 23059 }, { "epoch": 0.7440044685437508, "grad_norm": 0.39453125, "learning_rate": 4.90346215233207e-06, "loss": 1.9361, "step": 23060 }, { "epoch": 0.7440367323975472, "grad_norm": 0.388671875, "learning_rate": 4.9022983542357735e-06, "loss": 1.9673, "step": 23061 }, { "epoch": 0.7440689962513435, "grad_norm": 0.412109375, "learning_rate": 4.901134667287743e-06, "loss": 1.9347, "step": 23062 }, { "epoch": 0.7441012601051399, "grad_norm": 0.39453125, "learning_rate": 4.8999710915007675e-06, "loss": 1.9378, "step": 23063 }, { "epoch": 0.7441335239589362, "grad_norm": 0.4140625, "learning_rate": 4.898807626887663e-06, "loss": 1.8769, "step": 23064 }, { "epoch": 0.7441657878127326, "grad_norm": 0.390625, "learning_rate": 4.897644273461248e-06, "loss": 1.8731, "step": 23065 }, { "epoch": 0.7441980516665289, "grad_norm": 0.396484375, "learning_rate": 4.896481031234307e-06, "loss": 1.971, "step": 23066 }, { "epoch": 0.7442303155203253, "grad_norm": 0.373046875, "learning_rate": 4.895317900219661e-06, "loss": 1.8816, "step": 23067 }, { "epoch": 0.7442625793741215, "grad_norm": 0.390625, "learning_rate": 4.894154880430107e-06, "loss": 1.9221, "step": 23068 }, { "epoch": 0.744294843227918, "grad_norm": 0.41796875, "learning_rate": 4.892991971878439e-06, "loss": 1.8724, "step": 23069 }, { "epoch": 0.7443271070817142, "grad_norm": 0.38671875, "learning_rate": 4.89182917457747e-06, "loss": 1.9146, "step": 23070 }, { "epoch": 0.7443593709355106, "grad_norm": 0.390625, "learning_rate": 4.890666488539995e-06, "loss": 1.8913, "step": 23071 }, { "epoch": 0.7443916347893069, "grad_norm": 0.37890625, "learning_rate": 4.889503913778806e-06, "loss": 1.9017, "step": 23072 }, { "epoch": 0.7444238986431033, "grad_norm": 0.3828125, "learning_rate": 4.888341450306709e-06, "loss": 1.8987, "step": 23073 }, { "epoch": 0.7444561624968996, "grad_norm": 0.3984375, "learning_rate": 4.887179098136494e-06, "loss": 1.9421, "step": 23074 }, { "epoch": 0.744488426350696, "grad_norm": 0.40234375, "learning_rate": 4.886016857280954e-06, "loss": 1.8851, "step": 23075 }, { "epoch": 0.7445206902044923, "grad_norm": 0.439453125, "learning_rate": 4.884854727752888e-06, "loss": 1.9255, "step": 23076 }, { "epoch": 0.7445529540582887, "grad_norm": 0.380859375, "learning_rate": 4.883692709565085e-06, "loss": 1.9607, "step": 23077 }, { "epoch": 0.744585217912085, "grad_norm": 0.384765625, "learning_rate": 4.882530802730332e-06, "loss": 1.9248, "step": 23078 }, { "epoch": 0.7446174817658814, "grad_norm": 0.390625, "learning_rate": 4.881369007261424e-06, "loss": 1.8938, "step": 23079 }, { "epoch": 0.7446497456196777, "grad_norm": 0.390625, "learning_rate": 4.880207323171148e-06, "loss": 1.8878, "step": 23080 }, { "epoch": 0.7446820094734741, "grad_norm": 0.40234375, "learning_rate": 4.879045750472285e-06, "loss": 1.8731, "step": 23081 }, { "epoch": 0.7447142733272705, "grad_norm": 0.384765625, "learning_rate": 4.87788428917763e-06, "loss": 1.8532, "step": 23082 }, { "epoch": 0.7447465371810668, "grad_norm": 0.52734375, "learning_rate": 4.876722939299964e-06, "loss": 1.8245, "step": 23083 }, { "epoch": 0.7447788010348632, "grad_norm": 0.3828125, "learning_rate": 4.875561700852063e-06, "loss": 1.866, "step": 23084 }, { "epoch": 0.7448110648886594, "grad_norm": 0.396484375, "learning_rate": 4.874400573846723e-06, "loss": 1.9014, "step": 23085 }, { "epoch": 0.7448433287424558, "grad_norm": 0.384765625, "learning_rate": 4.8732395582967156e-06, "loss": 1.8168, "step": 23086 }, { "epoch": 0.7448755925962521, "grad_norm": 0.380859375, "learning_rate": 4.872078654214818e-06, "loss": 1.8715, "step": 23087 }, { "epoch": 0.7449078564500485, "grad_norm": 0.384765625, "learning_rate": 4.870917861613817e-06, "loss": 1.8532, "step": 23088 }, { "epoch": 0.7449401203038448, "grad_norm": 0.3828125, "learning_rate": 4.869757180506488e-06, "loss": 1.8485, "step": 23089 }, { "epoch": 0.7449723841576412, "grad_norm": 0.70703125, "learning_rate": 4.868596610905602e-06, "loss": 1.7732, "step": 23090 }, { "epoch": 0.7450046480114375, "grad_norm": 0.376953125, "learning_rate": 4.867436152823933e-06, "loss": 1.8267, "step": 23091 }, { "epoch": 0.7450369118652339, "grad_norm": 0.384765625, "learning_rate": 4.866275806274262e-06, "loss": 1.8771, "step": 23092 }, { "epoch": 0.7450691757190302, "grad_norm": 0.388671875, "learning_rate": 4.865115571269355e-06, "loss": 1.8304, "step": 23093 }, { "epoch": 0.7451014395728266, "grad_norm": 0.400390625, "learning_rate": 4.863955447821982e-06, "loss": 1.8743, "step": 23094 }, { "epoch": 0.7451337034266229, "grad_norm": 0.380859375, "learning_rate": 4.862795435944916e-06, "loss": 1.8489, "step": 23095 }, { "epoch": 0.7451659672804193, "grad_norm": 0.38671875, "learning_rate": 4.8616355356509355e-06, "loss": 1.8669, "step": 23096 }, { "epoch": 0.7451982311342156, "grad_norm": 0.515625, "learning_rate": 4.860475746952787e-06, "loss": 1.8675, "step": 23097 }, { "epoch": 0.745230494988012, "grad_norm": 0.376953125, "learning_rate": 4.859316069863254e-06, "loss": 1.837, "step": 23098 }, { "epoch": 0.7452627588418083, "grad_norm": 0.4296875, "learning_rate": 4.8581565043950935e-06, "loss": 1.792, "step": 23099 }, { "epoch": 0.7452950226956047, "grad_norm": 0.61328125, "learning_rate": 4.856997050561065e-06, "loss": 1.8288, "step": 23100 }, { "epoch": 0.745327286549401, "grad_norm": 0.384765625, "learning_rate": 4.855837708373942e-06, "loss": 1.8224, "step": 23101 }, { "epoch": 0.7453595504031973, "grad_norm": 0.3984375, "learning_rate": 4.854678477846481e-06, "loss": 1.828, "step": 23102 }, { "epoch": 0.7453918142569937, "grad_norm": 0.38671875, "learning_rate": 4.8535193589914355e-06, "loss": 1.8146, "step": 23103 }, { "epoch": 0.74542407811079, "grad_norm": 0.376953125, "learning_rate": 4.852360351821574e-06, "loss": 1.8248, "step": 23104 }, { "epoch": 0.7454563419645864, "grad_norm": 0.36328125, "learning_rate": 4.85120145634965e-06, "loss": 1.8385, "step": 23105 }, { "epoch": 0.7454886058183827, "grad_norm": 0.375, "learning_rate": 4.850042672588415e-06, "loss": 1.9025, "step": 23106 }, { "epoch": 0.7455208696721791, "grad_norm": 0.373046875, "learning_rate": 4.848884000550635e-06, "loss": 1.8585, "step": 23107 }, { "epoch": 0.7455531335259754, "grad_norm": 0.375, "learning_rate": 4.847725440249054e-06, "loss": 1.819, "step": 23108 }, { "epoch": 0.7455853973797718, "grad_norm": 0.3984375, "learning_rate": 4.846566991696426e-06, "loss": 1.8204, "step": 23109 }, { "epoch": 0.7456176612335681, "grad_norm": 0.396484375, "learning_rate": 4.845408654905509e-06, "loss": 1.8253, "step": 23110 }, { "epoch": 0.7456499250873645, "grad_norm": 0.376953125, "learning_rate": 4.844250429889049e-06, "loss": 1.8702, "step": 23111 }, { "epoch": 0.7456821889411608, "grad_norm": 0.36328125, "learning_rate": 4.8430923166597895e-06, "loss": 1.906, "step": 23112 }, { "epoch": 0.7457144527949572, "grad_norm": 0.369140625, "learning_rate": 4.841934315230488e-06, "loss": 1.8435, "step": 23113 }, { "epoch": 0.7457467166487535, "grad_norm": 0.392578125, "learning_rate": 4.840776425613887e-06, "loss": 1.849, "step": 23114 }, { "epoch": 0.7457789805025499, "grad_norm": 0.404296875, "learning_rate": 4.839618647822725e-06, "loss": 1.8306, "step": 23115 }, { "epoch": 0.7458112443563462, "grad_norm": 0.37109375, "learning_rate": 4.8384609818697565e-06, "loss": 1.8595, "step": 23116 }, { "epoch": 0.7458435082101426, "grad_norm": 0.38671875, "learning_rate": 4.83730342776772e-06, "loss": 1.8447, "step": 23117 }, { "epoch": 0.7458757720639388, "grad_norm": 0.380859375, "learning_rate": 4.836145985529357e-06, "loss": 1.8639, "step": 23118 }, { "epoch": 0.7459080359177352, "grad_norm": 0.369140625, "learning_rate": 4.834988655167402e-06, "loss": 1.8433, "step": 23119 }, { "epoch": 0.7459402997715315, "grad_norm": 0.37109375, "learning_rate": 4.8338314366946054e-06, "loss": 1.8159, "step": 23120 }, { "epoch": 0.7459725636253279, "grad_norm": 0.361328125, "learning_rate": 4.8326743301236975e-06, "loss": 1.8908, "step": 23121 }, { "epoch": 0.7460048274791243, "grad_norm": 0.392578125, "learning_rate": 4.831517335467414e-06, "loss": 1.8672, "step": 23122 }, { "epoch": 0.7460370913329206, "grad_norm": 0.369140625, "learning_rate": 4.830360452738496e-06, "loss": 1.8615, "step": 23123 }, { "epoch": 0.746069355186717, "grad_norm": 0.380859375, "learning_rate": 4.829203681949674e-06, "loss": 1.8676, "step": 23124 }, { "epoch": 0.7461016190405133, "grad_norm": 0.3828125, "learning_rate": 4.828047023113677e-06, "loss": 1.8607, "step": 23125 }, { "epoch": 0.7461338828943097, "grad_norm": 0.369140625, "learning_rate": 4.826890476243248e-06, "loss": 1.8601, "step": 23126 }, { "epoch": 0.746166146748106, "grad_norm": 0.376953125, "learning_rate": 4.825734041351107e-06, "loss": 1.8421, "step": 23127 }, { "epoch": 0.7461984106019024, "grad_norm": 0.390625, "learning_rate": 4.824577718449986e-06, "loss": 1.8438, "step": 23128 }, { "epoch": 0.7462306744556987, "grad_norm": 0.380859375, "learning_rate": 4.823421507552615e-06, "loss": 1.8283, "step": 23129 }, { "epoch": 0.7462629383094951, "grad_norm": 0.365234375, "learning_rate": 4.82226540867172e-06, "loss": 1.8588, "step": 23130 }, { "epoch": 0.7462952021632914, "grad_norm": 0.369140625, "learning_rate": 4.8211094218200216e-06, "loss": 1.8533, "step": 23131 }, { "epoch": 0.7463274660170878, "grad_norm": 0.375, "learning_rate": 4.819953547010254e-06, "loss": 1.8029, "step": 23132 }, { "epoch": 0.7463597298708841, "grad_norm": 0.375, "learning_rate": 4.818797784255133e-06, "loss": 1.8545, "step": 23133 }, { "epoch": 0.7463919937246805, "grad_norm": 0.41015625, "learning_rate": 4.817642133567378e-06, "loss": 1.82, "step": 23134 }, { "epoch": 0.7464242575784767, "grad_norm": 0.390625, "learning_rate": 4.8164865949597195e-06, "loss": 1.831, "step": 23135 }, { "epoch": 0.7464565214322731, "grad_norm": 0.419921875, "learning_rate": 4.81533116844487e-06, "loss": 1.9137, "step": 23136 }, { "epoch": 0.7464887852860694, "grad_norm": 0.373046875, "learning_rate": 4.814175854035543e-06, "loss": 1.8017, "step": 23137 }, { "epoch": 0.7465210491398658, "grad_norm": 0.375, "learning_rate": 4.813020651744467e-06, "loss": 1.8779, "step": 23138 }, { "epoch": 0.7465533129936621, "grad_norm": 0.38671875, "learning_rate": 4.81186556158435e-06, "loss": 1.817, "step": 23139 }, { "epoch": 0.7465855768474585, "grad_norm": 0.4296875, "learning_rate": 4.810710583567905e-06, "loss": 1.8972, "step": 23140 }, { "epoch": 0.7466178407012548, "grad_norm": 0.375, "learning_rate": 4.809555717707853e-06, "loss": 1.8408, "step": 23141 }, { "epoch": 0.7466501045550512, "grad_norm": 0.39453125, "learning_rate": 4.808400964016899e-06, "loss": 1.8263, "step": 23142 }, { "epoch": 0.7466823684088476, "grad_norm": 0.416015625, "learning_rate": 4.807246322507751e-06, "loss": 1.8359, "step": 23143 }, { "epoch": 0.7467146322626439, "grad_norm": 0.380859375, "learning_rate": 4.80609179319313e-06, "loss": 1.8607, "step": 23144 }, { "epoch": 0.7467468961164403, "grad_norm": 0.38671875, "learning_rate": 4.804937376085736e-06, "loss": 1.8489, "step": 23145 }, { "epoch": 0.7467791599702366, "grad_norm": 0.431640625, "learning_rate": 4.803783071198274e-06, "loss": 1.8265, "step": 23146 }, { "epoch": 0.746811423824033, "grad_norm": 0.392578125, "learning_rate": 4.802628878543457e-06, "loss": 1.8334, "step": 23147 }, { "epoch": 0.7468436876778293, "grad_norm": 0.3671875, "learning_rate": 4.801474798133985e-06, "loss": 1.8461, "step": 23148 }, { "epoch": 0.7468759515316257, "grad_norm": 0.39453125, "learning_rate": 4.800320829982564e-06, "loss": 1.8501, "step": 23149 }, { "epoch": 0.746908215385422, "grad_norm": 0.412109375, "learning_rate": 4.7991669741018875e-06, "loss": 1.8959, "step": 23150 }, { "epoch": 0.7469404792392184, "grad_norm": 0.404296875, "learning_rate": 4.798013230504669e-06, "loss": 1.8573, "step": 23151 }, { "epoch": 0.7469727430930146, "grad_norm": 0.396484375, "learning_rate": 4.7968595992036024e-06, "loss": 1.8719, "step": 23152 }, { "epoch": 0.747005006946811, "grad_norm": 0.431640625, "learning_rate": 4.795706080211379e-06, "loss": 1.8298, "step": 23153 }, { "epoch": 0.7470372708006073, "grad_norm": 0.37890625, "learning_rate": 4.794552673540709e-06, "loss": 1.8149, "step": 23154 }, { "epoch": 0.7470695346544037, "grad_norm": 0.375, "learning_rate": 4.793399379204282e-06, "loss": 1.8224, "step": 23155 }, { "epoch": 0.7471017985082, "grad_norm": 0.376953125, "learning_rate": 4.792246197214785e-06, "loss": 1.8482, "step": 23156 }, { "epoch": 0.7471340623619964, "grad_norm": 0.427734375, "learning_rate": 4.791093127584925e-06, "loss": 1.8597, "step": 23157 }, { "epoch": 0.7471663262157927, "grad_norm": 0.408203125, "learning_rate": 4.7899401703273896e-06, "loss": 1.8771, "step": 23158 }, { "epoch": 0.7471985900695891, "grad_norm": 0.384765625, "learning_rate": 4.788787325454861e-06, "loss": 1.8507, "step": 23159 }, { "epoch": 0.7472308539233854, "grad_norm": 0.388671875, "learning_rate": 4.7876345929800415e-06, "loss": 1.8295, "step": 23160 }, { "epoch": 0.7472631177771818, "grad_norm": 0.376953125, "learning_rate": 4.786481972915616e-06, "loss": 1.8051, "step": 23161 }, { "epoch": 0.7472953816309781, "grad_norm": 0.427734375, "learning_rate": 4.7853294652742636e-06, "loss": 1.8666, "step": 23162 }, { "epoch": 0.7473276454847745, "grad_norm": 0.44140625, "learning_rate": 4.784177070068681e-06, "loss": 1.8635, "step": 23163 }, { "epoch": 0.7473599093385709, "grad_norm": 0.38671875, "learning_rate": 4.783024787311548e-06, "loss": 1.8061, "step": 23164 }, { "epoch": 0.7473921731923672, "grad_norm": 0.412109375, "learning_rate": 4.781872617015544e-06, "loss": 1.8204, "step": 23165 }, { "epoch": 0.7474244370461636, "grad_norm": 0.427734375, "learning_rate": 4.78072055919336e-06, "loss": 1.8327, "step": 23166 }, { "epoch": 0.7474567008999599, "grad_norm": 0.42578125, "learning_rate": 4.779568613857675e-06, "loss": 1.8251, "step": 23167 }, { "epoch": 0.7474889647537563, "grad_norm": 0.396484375, "learning_rate": 4.77841678102116e-06, "loss": 1.819, "step": 23168 }, { "epoch": 0.7475212286075525, "grad_norm": 0.46875, "learning_rate": 4.777265060696505e-06, "loss": 1.8155, "step": 23169 }, { "epoch": 0.747553492461349, "grad_norm": 0.4296875, "learning_rate": 4.7761134528963845e-06, "loss": 1.8153, "step": 23170 }, { "epoch": 0.7475857563151452, "grad_norm": 0.37890625, "learning_rate": 4.7749619576334654e-06, "loss": 1.7972, "step": 23171 }, { "epoch": 0.7476180201689416, "grad_norm": 0.376953125, "learning_rate": 4.7738105749204355e-06, "loss": 1.7719, "step": 23172 }, { "epoch": 0.7476502840227379, "grad_norm": 0.64453125, "learning_rate": 4.772659304769961e-06, "loss": 1.8046, "step": 23173 }, { "epoch": 0.7476825478765343, "grad_norm": 0.36328125, "learning_rate": 4.771508147194713e-06, "loss": 1.8134, "step": 23174 }, { "epoch": 0.7477148117303306, "grad_norm": 0.474609375, "learning_rate": 4.770357102207369e-06, "loss": 1.8388, "step": 23175 }, { "epoch": 0.747747075584127, "grad_norm": 0.47265625, "learning_rate": 4.769206169820597e-06, "loss": 1.8002, "step": 23176 }, { "epoch": 0.7477793394379233, "grad_norm": 0.376953125, "learning_rate": 4.768055350047057e-06, "loss": 1.823, "step": 23177 }, { "epoch": 0.7478116032917197, "grad_norm": 0.421875, "learning_rate": 4.766904642899429e-06, "loss": 1.8301, "step": 23178 }, { "epoch": 0.747843867145516, "grad_norm": 0.412109375, "learning_rate": 4.765754048390374e-06, "loss": 1.8063, "step": 23179 }, { "epoch": 0.7478761309993124, "grad_norm": 0.404296875, "learning_rate": 4.764603566532555e-06, "loss": 1.8386, "step": 23180 }, { "epoch": 0.7479083948531087, "grad_norm": 0.396484375, "learning_rate": 4.763453197338633e-06, "loss": 1.8438, "step": 23181 }, { "epoch": 0.7479406587069051, "grad_norm": 0.37890625, "learning_rate": 4.76230294082128e-06, "loss": 1.8211, "step": 23182 }, { "epoch": 0.7479729225607015, "grad_norm": 0.376953125, "learning_rate": 4.761152796993152e-06, "loss": 1.8627, "step": 23183 }, { "epoch": 0.7480051864144978, "grad_norm": 0.3828125, "learning_rate": 4.7600027658669026e-06, "loss": 1.8282, "step": 23184 }, { "epoch": 0.7480374502682942, "grad_norm": 0.3828125, "learning_rate": 4.758852847455203e-06, "loss": 1.8469, "step": 23185 }, { "epoch": 0.7480697141220904, "grad_norm": 0.37890625, "learning_rate": 4.757703041770702e-06, "loss": 1.8266, "step": 23186 }, { "epoch": 0.7481019779758868, "grad_norm": 0.375, "learning_rate": 4.756553348826056e-06, "loss": 1.7985, "step": 23187 }, { "epoch": 0.7481342418296831, "grad_norm": 0.4140625, "learning_rate": 4.755403768633925e-06, "loss": 1.7806, "step": 23188 }, { "epoch": 0.7481665056834795, "grad_norm": 0.369140625, "learning_rate": 4.75425430120696e-06, "loss": 1.8443, "step": 23189 }, { "epoch": 0.7481987695372758, "grad_norm": 0.376953125, "learning_rate": 4.75310494655781e-06, "loss": 1.8572, "step": 23190 }, { "epoch": 0.7482310333910722, "grad_norm": 0.3984375, "learning_rate": 4.751955704699132e-06, "loss": 1.8242, "step": 23191 }, { "epoch": 0.7482632972448685, "grad_norm": 0.376953125, "learning_rate": 4.750806575643577e-06, "loss": 1.8614, "step": 23192 }, { "epoch": 0.7482955610986649, "grad_norm": 0.388671875, "learning_rate": 4.749657559403782e-06, "loss": 1.8607, "step": 23193 }, { "epoch": 0.7483278249524612, "grad_norm": 0.376953125, "learning_rate": 4.748508655992411e-06, "loss": 1.8573, "step": 23194 }, { "epoch": 0.7483600888062576, "grad_norm": 0.380859375, "learning_rate": 4.7473598654221e-06, "loss": 1.8517, "step": 23195 }, { "epoch": 0.7483923526600539, "grad_norm": 0.39453125, "learning_rate": 4.746211187705493e-06, "loss": 1.8189, "step": 23196 }, { "epoch": 0.7484246165138503, "grad_norm": 0.390625, "learning_rate": 4.745062622855242e-06, "loss": 1.8369, "step": 23197 }, { "epoch": 0.7484568803676466, "grad_norm": 0.3828125, "learning_rate": 4.743914170883985e-06, "loss": 1.8434, "step": 23198 }, { "epoch": 0.748489144221443, "grad_norm": 0.369140625, "learning_rate": 4.742765831804357e-06, "loss": 1.8403, "step": 23199 }, { "epoch": 0.7485214080752393, "grad_norm": 0.37109375, "learning_rate": 4.741617605629009e-06, "loss": 1.8156, "step": 23200 }, { "epoch": 0.7485536719290357, "grad_norm": 0.3828125, "learning_rate": 4.7404694923705765e-06, "loss": 1.843, "step": 23201 }, { "epoch": 0.748585935782832, "grad_norm": 0.380859375, "learning_rate": 4.739321492041689e-06, "loss": 1.8011, "step": 23202 }, { "epoch": 0.7486181996366283, "grad_norm": 0.369140625, "learning_rate": 4.738173604654996e-06, "loss": 1.842, "step": 23203 }, { "epoch": 0.7486504634904247, "grad_norm": 0.388671875, "learning_rate": 4.737025830223127e-06, "loss": 1.8387, "step": 23204 }, { "epoch": 0.748682727344221, "grad_norm": 0.36328125, "learning_rate": 4.735878168758708e-06, "loss": 1.85, "step": 23205 }, { "epoch": 0.7487149911980174, "grad_norm": 0.37890625, "learning_rate": 4.734730620274384e-06, "loss": 1.846, "step": 23206 }, { "epoch": 0.7487472550518137, "grad_norm": 0.380859375, "learning_rate": 4.733583184782782e-06, "loss": 1.7932, "step": 23207 }, { "epoch": 0.7487795189056101, "grad_norm": 0.36328125, "learning_rate": 4.7324358622965256e-06, "loss": 1.8287, "step": 23208 }, { "epoch": 0.7488117827594064, "grad_norm": 0.40234375, "learning_rate": 4.731288652828255e-06, "loss": 1.8399, "step": 23209 }, { "epoch": 0.7488440466132028, "grad_norm": 0.37109375, "learning_rate": 4.730141556390592e-06, "loss": 1.8348, "step": 23210 }, { "epoch": 0.7488763104669991, "grad_norm": 0.38671875, "learning_rate": 4.728994572996161e-06, "loss": 1.7924, "step": 23211 }, { "epoch": 0.7489085743207955, "grad_norm": 0.38671875, "learning_rate": 4.727847702657587e-06, "loss": 1.861, "step": 23212 }, { "epoch": 0.7489408381745918, "grad_norm": 0.46875, "learning_rate": 4.7267009453875e-06, "loss": 1.8751, "step": 23213 }, { "epoch": 0.7489731020283882, "grad_norm": 0.4453125, "learning_rate": 4.7255543011985185e-06, "loss": 1.8214, "step": 23214 }, { "epoch": 0.7490053658821845, "grad_norm": 0.423828125, "learning_rate": 4.724407770103261e-06, "loss": 1.8217, "step": 23215 }, { "epoch": 0.7490376297359809, "grad_norm": 0.37109375, "learning_rate": 4.723261352114355e-06, "loss": 1.8164, "step": 23216 }, { "epoch": 0.7490698935897772, "grad_norm": 0.39453125, "learning_rate": 4.722115047244416e-06, "loss": 1.8203, "step": 23217 }, { "epoch": 0.7491021574435736, "grad_norm": 0.37890625, "learning_rate": 4.720968855506056e-06, "loss": 1.833, "step": 23218 }, { "epoch": 0.7491344212973698, "grad_norm": 0.3828125, "learning_rate": 4.719822776911902e-06, "loss": 1.8299, "step": 23219 }, { "epoch": 0.7491666851511662, "grad_norm": 0.369140625, "learning_rate": 4.718676811474563e-06, "loss": 1.8414, "step": 23220 }, { "epoch": 0.7491989490049625, "grad_norm": 0.375, "learning_rate": 4.71753095920665e-06, "loss": 1.791, "step": 23221 }, { "epoch": 0.7492312128587589, "grad_norm": 0.380859375, "learning_rate": 4.716385220120784e-06, "loss": 1.848, "step": 23222 }, { "epoch": 0.7492634767125553, "grad_norm": 0.37109375, "learning_rate": 4.715239594229572e-06, "loss": 1.8423, "step": 23223 }, { "epoch": 0.7492957405663516, "grad_norm": 0.373046875, "learning_rate": 4.71409408154562e-06, "loss": 1.8374, "step": 23224 }, { "epoch": 0.749328004420148, "grad_norm": 0.3671875, "learning_rate": 4.712948682081546e-06, "loss": 1.893, "step": 23225 }, { "epoch": 0.7493602682739443, "grad_norm": 0.369140625, "learning_rate": 4.711803395849953e-06, "loss": 1.8885, "step": 23226 }, { "epoch": 0.7493925321277407, "grad_norm": 0.421875, "learning_rate": 4.7106582228634424e-06, "loss": 1.8617, "step": 23227 }, { "epoch": 0.749424795981537, "grad_norm": 0.373046875, "learning_rate": 4.709513163134628e-06, "loss": 1.8912, "step": 23228 }, { "epoch": 0.7494570598353334, "grad_norm": 0.474609375, "learning_rate": 4.708368216676113e-06, "loss": 1.9017, "step": 23229 }, { "epoch": 0.7494893236891297, "grad_norm": 0.384765625, "learning_rate": 4.70722338350049e-06, "loss": 1.8785, "step": 23230 }, { "epoch": 0.7495215875429261, "grad_norm": 0.39453125, "learning_rate": 4.706078663620375e-06, "loss": 1.8759, "step": 23231 }, { "epoch": 0.7495538513967224, "grad_norm": 0.3984375, "learning_rate": 4.70493405704836e-06, "loss": 1.8872, "step": 23232 }, { "epoch": 0.7495861152505188, "grad_norm": 0.3828125, "learning_rate": 4.703789563797042e-06, "loss": 1.8153, "step": 23233 }, { "epoch": 0.7496183791043151, "grad_norm": 0.373046875, "learning_rate": 4.702645183879024e-06, "loss": 1.9157, "step": 23234 }, { "epoch": 0.7496506429581115, "grad_norm": 0.373046875, "learning_rate": 4.7015009173069025e-06, "loss": 1.8675, "step": 23235 }, { "epoch": 0.7496829068119077, "grad_norm": 0.408203125, "learning_rate": 4.700356764093266e-06, "loss": 1.8716, "step": 23236 }, { "epoch": 0.7497151706657041, "grad_norm": 0.400390625, "learning_rate": 4.699212724250718e-06, "loss": 1.8822, "step": 23237 }, { "epoch": 0.7497474345195004, "grad_norm": 0.37890625, "learning_rate": 4.6980687977918455e-06, "loss": 1.8789, "step": 23238 }, { "epoch": 0.7497796983732968, "grad_norm": 0.447265625, "learning_rate": 4.696924984729238e-06, "loss": 1.8767, "step": 23239 }, { "epoch": 0.7498119622270931, "grad_norm": 0.392578125, "learning_rate": 4.6957812850754924e-06, "loss": 1.918, "step": 23240 }, { "epoch": 0.7498442260808895, "grad_norm": 0.39453125, "learning_rate": 4.694637698843195e-06, "loss": 1.8415, "step": 23241 }, { "epoch": 0.7498764899346858, "grad_norm": 0.3984375, "learning_rate": 4.693494226044932e-06, "loss": 1.8796, "step": 23242 }, { "epoch": 0.7499087537884822, "grad_norm": 0.38671875, "learning_rate": 4.692350866693288e-06, "loss": 1.8892, "step": 23243 }, { "epoch": 0.7499410176422786, "grad_norm": 0.37109375, "learning_rate": 4.691207620800853e-06, "loss": 1.9099, "step": 23244 }, { "epoch": 0.7499732814960749, "grad_norm": 0.37890625, "learning_rate": 4.69006448838021e-06, "loss": 1.8725, "step": 23245 }, { "epoch": 0.7500055453498713, "grad_norm": 0.375, "learning_rate": 4.688921469443935e-06, "loss": 1.8476, "step": 23246 }, { "epoch": 0.7500378092036676, "grad_norm": 0.384765625, "learning_rate": 4.687778564004621e-06, "loss": 1.9014, "step": 23247 }, { "epoch": 0.750070073057464, "grad_norm": 0.373046875, "learning_rate": 4.686635772074845e-06, "loss": 1.8793, "step": 23248 }, { "epoch": 0.7501023369112603, "grad_norm": 0.36328125, "learning_rate": 4.685493093667176e-06, "loss": 1.8765, "step": 23249 }, { "epoch": 0.7501346007650567, "grad_norm": 0.357421875, "learning_rate": 4.684350528794205e-06, "loss": 1.8606, "step": 23250 }, { "epoch": 0.750166864618853, "grad_norm": 0.36328125, "learning_rate": 4.683208077468502e-06, "loss": 1.908, "step": 23251 }, { "epoch": 0.7501991284726494, "grad_norm": 0.37109375, "learning_rate": 4.682065739702641e-06, "loss": 1.9074, "step": 23252 }, { "epoch": 0.7502313923264456, "grad_norm": 0.408203125, "learning_rate": 4.6809235155092e-06, "loss": 1.9035, "step": 23253 }, { "epoch": 0.750263656180242, "grad_norm": 0.37890625, "learning_rate": 4.679781404900752e-06, "loss": 1.901, "step": 23254 }, { "epoch": 0.7502959200340383, "grad_norm": 0.361328125, "learning_rate": 4.678639407889861e-06, "loss": 1.8495, "step": 23255 }, { "epoch": 0.7503281838878347, "grad_norm": 0.376953125, "learning_rate": 4.677497524489109e-06, "loss": 1.9041, "step": 23256 }, { "epoch": 0.750360447741631, "grad_norm": 0.39453125, "learning_rate": 4.676355754711056e-06, "loss": 1.8271, "step": 23257 }, { "epoch": 0.7503927115954274, "grad_norm": 0.3671875, "learning_rate": 4.675214098568271e-06, "loss": 1.9027, "step": 23258 }, { "epoch": 0.7504249754492237, "grad_norm": 0.380859375, "learning_rate": 4.674072556073326e-06, "loss": 1.8829, "step": 23259 }, { "epoch": 0.7504572393030201, "grad_norm": 0.3671875, "learning_rate": 4.6729311272387825e-06, "loss": 1.874, "step": 23260 }, { "epoch": 0.7504895031568164, "grad_norm": 0.390625, "learning_rate": 4.671789812077198e-06, "loss": 1.9153, "step": 23261 }, { "epoch": 0.7505217670106128, "grad_norm": 0.373046875, "learning_rate": 4.670648610601148e-06, "loss": 1.89, "step": 23262 }, { "epoch": 0.7505540308644091, "grad_norm": 0.3671875, "learning_rate": 4.66950752282319e-06, "loss": 1.8835, "step": 23263 }, { "epoch": 0.7505862947182055, "grad_norm": 0.396484375, "learning_rate": 4.6683665487558755e-06, "loss": 1.8784, "step": 23264 }, { "epoch": 0.7506185585720019, "grad_norm": 0.361328125, "learning_rate": 4.667225688411775e-06, "loss": 1.9283, "step": 23265 }, { "epoch": 0.7506508224257982, "grad_norm": 0.36328125, "learning_rate": 4.66608494180344e-06, "loss": 1.9164, "step": 23266 }, { "epoch": 0.7506830862795946, "grad_norm": 0.365234375, "learning_rate": 4.664944308943425e-06, "loss": 1.8575, "step": 23267 }, { "epoch": 0.7507153501333909, "grad_norm": 0.373046875, "learning_rate": 4.663803789844293e-06, "loss": 1.8901, "step": 23268 }, { "epoch": 0.7507476139871873, "grad_norm": 0.369140625, "learning_rate": 4.662663384518594e-06, "loss": 1.8213, "step": 23269 }, { "epoch": 0.7507798778409835, "grad_norm": 0.384765625, "learning_rate": 4.66152309297888e-06, "loss": 1.8557, "step": 23270 }, { "epoch": 0.7508121416947799, "grad_norm": 0.37109375, "learning_rate": 4.660382915237699e-06, "loss": 1.8816, "step": 23271 }, { "epoch": 0.7508444055485762, "grad_norm": 0.3671875, "learning_rate": 4.65924285130761e-06, "loss": 1.848, "step": 23272 }, { "epoch": 0.7508766694023726, "grad_norm": 0.4140625, "learning_rate": 4.658102901201158e-06, "loss": 1.8978, "step": 23273 }, { "epoch": 0.7509089332561689, "grad_norm": 0.375, "learning_rate": 4.656963064930884e-06, "loss": 1.8729, "step": 23274 }, { "epoch": 0.7509411971099653, "grad_norm": 0.37890625, "learning_rate": 4.655823342509346e-06, "loss": 1.8587, "step": 23275 }, { "epoch": 0.7509734609637616, "grad_norm": 0.37109375, "learning_rate": 4.654683733949084e-06, "loss": 1.8809, "step": 23276 }, { "epoch": 0.751005724817558, "grad_norm": 0.447265625, "learning_rate": 4.653544239262636e-06, "loss": 1.8649, "step": 23277 }, { "epoch": 0.7510379886713543, "grad_norm": 0.36328125, "learning_rate": 4.6524048584625556e-06, "loss": 1.8416, "step": 23278 }, { "epoch": 0.7510702525251507, "grad_norm": 0.369140625, "learning_rate": 4.65126559156138e-06, "loss": 1.8947, "step": 23279 }, { "epoch": 0.751102516378947, "grad_norm": 0.361328125, "learning_rate": 4.6501264385716415e-06, "loss": 1.8859, "step": 23280 }, { "epoch": 0.7511347802327434, "grad_norm": 0.4296875, "learning_rate": 4.6489873995058926e-06, "loss": 1.8529, "step": 23281 }, { "epoch": 0.7511670440865397, "grad_norm": 0.380859375, "learning_rate": 4.6478484743766655e-06, "loss": 1.8529, "step": 23282 }, { "epoch": 0.7511993079403361, "grad_norm": 0.3671875, "learning_rate": 4.64670966319649e-06, "loss": 1.8295, "step": 23283 }, { "epoch": 0.7512315717941325, "grad_norm": 0.375, "learning_rate": 4.645570965977912e-06, "loss": 1.8375, "step": 23284 }, { "epoch": 0.7512638356479288, "grad_norm": 0.396484375, "learning_rate": 4.644432382733461e-06, "loss": 1.8483, "step": 23285 }, { "epoch": 0.7512960995017252, "grad_norm": 0.37109375, "learning_rate": 4.643293913475664e-06, "loss": 1.8297, "step": 23286 }, { "epoch": 0.7513283633555214, "grad_norm": 0.376953125, "learning_rate": 4.6421555582170625e-06, "loss": 1.8813, "step": 23287 }, { "epoch": 0.7513606272093178, "grad_norm": 0.40625, "learning_rate": 4.641017316970181e-06, "loss": 1.8655, "step": 23288 }, { "epoch": 0.7513928910631141, "grad_norm": 0.390625, "learning_rate": 4.639879189747546e-06, "loss": 1.8703, "step": 23289 }, { "epoch": 0.7514251549169105, "grad_norm": 0.3671875, "learning_rate": 4.638741176561694e-06, "loss": 1.8607, "step": 23290 }, { "epoch": 0.7514574187707068, "grad_norm": 0.384765625, "learning_rate": 4.637603277425144e-06, "loss": 1.8641, "step": 23291 }, { "epoch": 0.7514896826245032, "grad_norm": 0.390625, "learning_rate": 4.636465492350419e-06, "loss": 1.8696, "step": 23292 }, { "epoch": 0.7515219464782995, "grad_norm": 0.365234375, "learning_rate": 4.635327821350054e-06, "loss": 1.859, "step": 23293 }, { "epoch": 0.7515542103320959, "grad_norm": 0.361328125, "learning_rate": 4.6341902644365615e-06, "loss": 1.881, "step": 23294 }, { "epoch": 0.7515864741858922, "grad_norm": 0.380859375, "learning_rate": 4.633052821622464e-06, "loss": 1.8454, "step": 23295 }, { "epoch": 0.7516187380396886, "grad_norm": 0.35546875, "learning_rate": 4.631915492920287e-06, "loss": 1.8836, "step": 23296 }, { "epoch": 0.7516510018934849, "grad_norm": 0.375, "learning_rate": 4.630778278342546e-06, "loss": 1.8642, "step": 23297 }, { "epoch": 0.7516832657472813, "grad_norm": 0.37109375, "learning_rate": 4.629641177901756e-06, "loss": 1.884, "step": 23298 }, { "epoch": 0.7517155296010776, "grad_norm": 0.3671875, "learning_rate": 4.628504191610438e-06, "loss": 1.8323, "step": 23299 }, { "epoch": 0.751747793454874, "grad_norm": 0.375, "learning_rate": 4.6273673194811084e-06, "loss": 1.8559, "step": 23300 }, { "epoch": 0.7517800573086703, "grad_norm": 0.376953125, "learning_rate": 4.626230561526276e-06, "loss": 1.825, "step": 23301 }, { "epoch": 0.7518123211624667, "grad_norm": 0.41796875, "learning_rate": 4.625093917758453e-06, "loss": 1.8562, "step": 23302 }, { "epoch": 0.751844585016263, "grad_norm": 0.384765625, "learning_rate": 4.623957388190155e-06, "loss": 1.8921, "step": 23303 }, { "epoch": 0.7518768488700593, "grad_norm": 0.36328125, "learning_rate": 4.622820972833892e-06, "loss": 1.9031, "step": 23304 }, { "epoch": 0.7519091127238557, "grad_norm": 0.376953125, "learning_rate": 4.621684671702166e-06, "loss": 1.879, "step": 23305 }, { "epoch": 0.751941376577652, "grad_norm": 0.380859375, "learning_rate": 4.6205484848074945e-06, "loss": 1.832, "step": 23306 }, { "epoch": 0.7519736404314484, "grad_norm": 0.3984375, "learning_rate": 4.6194124121623795e-06, "loss": 1.82, "step": 23307 }, { "epoch": 0.7520059042852447, "grad_norm": 0.388671875, "learning_rate": 4.6182764537793215e-06, "loss": 1.8331, "step": 23308 }, { "epoch": 0.7520381681390411, "grad_norm": 0.4375, "learning_rate": 4.617140609670833e-06, "loss": 1.831, "step": 23309 }, { "epoch": 0.7520704319928374, "grad_norm": 0.376953125, "learning_rate": 4.6160048798494105e-06, "loss": 1.8219, "step": 23310 }, { "epoch": 0.7521026958466338, "grad_norm": 0.380859375, "learning_rate": 4.6148692643275535e-06, "loss": 1.8075, "step": 23311 }, { "epoch": 0.7521349597004301, "grad_norm": 0.39453125, "learning_rate": 4.61373376311777e-06, "loss": 1.793, "step": 23312 }, { "epoch": 0.7521672235542265, "grad_norm": 0.640625, "learning_rate": 4.6125983762325535e-06, "loss": 1.8001, "step": 23313 }, { "epoch": 0.7521994874080228, "grad_norm": 0.37890625, "learning_rate": 4.611463103684397e-06, "loss": 1.8253, "step": 23314 }, { "epoch": 0.7522317512618192, "grad_norm": 0.38671875, "learning_rate": 4.61032794548581e-06, "loss": 1.7802, "step": 23315 }, { "epoch": 0.7522640151156155, "grad_norm": 0.392578125, "learning_rate": 4.609192901649276e-06, "loss": 1.8159, "step": 23316 }, { "epoch": 0.7522962789694119, "grad_norm": 0.427734375, "learning_rate": 4.608057972187288e-06, "loss": 1.8183, "step": 23317 }, { "epoch": 0.7523285428232082, "grad_norm": 0.41015625, "learning_rate": 4.606923157112348e-06, "loss": 1.8179, "step": 23318 }, { "epoch": 0.7523608066770046, "grad_norm": 0.38671875, "learning_rate": 4.6057884564369424e-06, "loss": 1.8112, "step": 23319 }, { "epoch": 0.7523930705308008, "grad_norm": 0.40625, "learning_rate": 4.604653870173555e-06, "loss": 1.8544, "step": 23320 }, { "epoch": 0.7524253343845972, "grad_norm": 0.373046875, "learning_rate": 4.603519398334689e-06, "loss": 1.8079, "step": 23321 }, { "epoch": 0.7524575982383935, "grad_norm": 0.40234375, "learning_rate": 4.6023850409328155e-06, "loss": 1.7905, "step": 23322 }, { "epoch": 0.7524898620921899, "grad_norm": 0.400390625, "learning_rate": 4.601250797980428e-06, "loss": 1.8268, "step": 23323 }, { "epoch": 0.7525221259459863, "grad_norm": 0.384765625, "learning_rate": 4.60011666949002e-06, "loss": 1.8189, "step": 23324 }, { "epoch": 0.7525543897997826, "grad_norm": 0.38671875, "learning_rate": 4.598982655474058e-06, "loss": 1.8101, "step": 23325 }, { "epoch": 0.752586653653579, "grad_norm": 0.3828125, "learning_rate": 4.597848755945033e-06, "loss": 1.8812, "step": 23326 }, { "epoch": 0.7526189175073753, "grad_norm": 0.361328125, "learning_rate": 4.596714970915432e-06, "loss": 1.8248, "step": 23327 }, { "epoch": 0.7526511813611717, "grad_norm": 0.375, "learning_rate": 4.595581300397728e-06, "loss": 1.8607, "step": 23328 }, { "epoch": 0.752683445214968, "grad_norm": 0.380859375, "learning_rate": 4.594447744404398e-06, "loss": 1.8083, "step": 23329 }, { "epoch": 0.7527157090687644, "grad_norm": 0.376953125, "learning_rate": 4.593314302947925e-06, "loss": 1.8441, "step": 23330 }, { "epoch": 0.7527479729225607, "grad_norm": 0.3828125, "learning_rate": 4.592180976040784e-06, "loss": 1.8165, "step": 23331 }, { "epoch": 0.7527802367763571, "grad_norm": 0.447265625, "learning_rate": 4.591047763695448e-06, "loss": 1.8239, "step": 23332 }, { "epoch": 0.7528125006301534, "grad_norm": 0.37109375, "learning_rate": 4.589914665924385e-06, "loss": 1.8442, "step": 23333 }, { "epoch": 0.7528447644839498, "grad_norm": 0.3828125, "learning_rate": 4.588781682740079e-06, "loss": 1.8409, "step": 23334 }, { "epoch": 0.7528770283377461, "grad_norm": 0.412109375, "learning_rate": 4.587648814154995e-06, "loss": 1.8056, "step": 23335 }, { "epoch": 0.7529092921915425, "grad_norm": 0.384765625, "learning_rate": 4.586516060181597e-06, "loss": 1.7947, "step": 23336 }, { "epoch": 0.7529415560453387, "grad_norm": 0.37109375, "learning_rate": 4.585383420832367e-06, "loss": 1.8416, "step": 23337 }, { "epoch": 0.7529738198991351, "grad_norm": 0.39453125, "learning_rate": 4.584250896119763e-06, "loss": 1.7456, "step": 23338 }, { "epoch": 0.7530060837529314, "grad_norm": 0.365234375, "learning_rate": 4.5831184860562495e-06, "loss": 1.8183, "step": 23339 }, { "epoch": 0.7530383476067278, "grad_norm": 0.392578125, "learning_rate": 4.581986190654298e-06, "loss": 1.8336, "step": 23340 }, { "epoch": 0.7530706114605241, "grad_norm": 0.3984375, "learning_rate": 4.58085400992637e-06, "loss": 1.7898, "step": 23341 }, { "epoch": 0.7531028753143205, "grad_norm": 0.373046875, "learning_rate": 4.579721943884921e-06, "loss": 1.8331, "step": 23342 }, { "epoch": 0.7531351391681168, "grad_norm": 0.408203125, "learning_rate": 4.578589992542422e-06, "loss": 1.8725, "step": 23343 }, { "epoch": 0.7531674030219132, "grad_norm": 0.39453125, "learning_rate": 4.577458155911328e-06, "loss": 1.782, "step": 23344 }, { "epoch": 0.7531996668757096, "grad_norm": 0.369140625, "learning_rate": 4.576326434004093e-06, "loss": 1.794, "step": 23345 }, { "epoch": 0.7532319307295059, "grad_norm": 0.390625, "learning_rate": 4.575194826833183e-06, "loss": 1.8041, "step": 23346 }, { "epoch": 0.7532641945833023, "grad_norm": 0.408203125, "learning_rate": 4.57406333441105e-06, "loss": 1.814, "step": 23347 }, { "epoch": 0.7532964584370986, "grad_norm": 0.37890625, "learning_rate": 4.5729319567501436e-06, "loss": 1.8127, "step": 23348 }, { "epoch": 0.753328722290895, "grad_norm": 0.353515625, "learning_rate": 4.571800693862927e-06, "loss": 1.8372, "step": 23349 }, { "epoch": 0.7533609861446913, "grad_norm": 0.380859375, "learning_rate": 4.570669545761846e-06, "loss": 1.8316, "step": 23350 }, { "epoch": 0.7533932499984877, "grad_norm": 0.38671875, "learning_rate": 4.569538512459348e-06, "loss": 1.8352, "step": 23351 }, { "epoch": 0.753425513852284, "grad_norm": 0.373046875, "learning_rate": 4.568407593967899e-06, "loss": 1.8391, "step": 23352 }, { "epoch": 0.7534577777060804, "grad_norm": 0.369140625, "learning_rate": 4.5672767902999235e-06, "loss": 1.8338, "step": 23353 }, { "epoch": 0.7534900415598766, "grad_norm": 0.37109375, "learning_rate": 4.566146101467882e-06, "loss": 1.7923, "step": 23354 }, { "epoch": 0.753522305413673, "grad_norm": 0.38671875, "learning_rate": 4.565015527484229e-06, "loss": 1.7757, "step": 23355 }, { "epoch": 0.7535545692674693, "grad_norm": 0.3984375, "learning_rate": 4.5638850683613875e-06, "loss": 1.8149, "step": 23356 }, { "epoch": 0.7535868331212657, "grad_norm": 0.39453125, "learning_rate": 4.562754724111814e-06, "loss": 1.8743, "step": 23357 }, { "epoch": 0.753619096975062, "grad_norm": 0.369140625, "learning_rate": 4.561624494747958e-06, "loss": 1.839, "step": 23358 }, { "epoch": 0.7536513608288584, "grad_norm": 0.423828125, "learning_rate": 4.560494380282242e-06, "loss": 1.8049, "step": 23359 }, { "epoch": 0.7536836246826547, "grad_norm": 0.3984375, "learning_rate": 4.559364380727114e-06, "loss": 1.8122, "step": 23360 }, { "epoch": 0.7537158885364511, "grad_norm": 0.380859375, "learning_rate": 4.558234496095018e-06, "loss": 1.7705, "step": 23361 }, { "epoch": 0.7537481523902474, "grad_norm": 0.3828125, "learning_rate": 4.5571047263983845e-06, "loss": 1.8036, "step": 23362 }, { "epoch": 0.7537804162440438, "grad_norm": 0.421875, "learning_rate": 4.555975071649652e-06, "loss": 1.8288, "step": 23363 }, { "epoch": 0.7538126800978401, "grad_norm": 0.375, "learning_rate": 4.554845531861248e-06, "loss": 1.8522, "step": 23364 }, { "epoch": 0.7538449439516365, "grad_norm": 0.380859375, "learning_rate": 4.5537161070456155e-06, "loss": 1.8091, "step": 23365 }, { "epoch": 0.7538772078054329, "grad_norm": 0.421875, "learning_rate": 4.552586797215183e-06, "loss": 1.8242, "step": 23366 }, { "epoch": 0.7539094716592292, "grad_norm": 0.365234375, "learning_rate": 4.551457602382375e-06, "loss": 1.847, "step": 23367 }, { "epoch": 0.7539417355130256, "grad_norm": 0.380859375, "learning_rate": 4.55032852255963e-06, "loss": 1.8413, "step": 23368 }, { "epoch": 0.7539739993668219, "grad_norm": 0.380859375, "learning_rate": 4.549199557759372e-06, "loss": 1.7736, "step": 23369 }, { "epoch": 0.7540062632206183, "grad_norm": 0.380859375, "learning_rate": 4.548070707994024e-06, "loss": 1.8157, "step": 23370 }, { "epoch": 0.7540385270744145, "grad_norm": 0.3671875, "learning_rate": 4.546941973276019e-06, "loss": 1.8298, "step": 23371 }, { "epoch": 0.7540707909282109, "grad_norm": 0.380859375, "learning_rate": 4.5458133536177795e-06, "loss": 1.7795, "step": 23372 }, { "epoch": 0.7541030547820072, "grad_norm": 0.388671875, "learning_rate": 4.544684849031721e-06, "loss": 1.8638, "step": 23373 }, { "epoch": 0.7541353186358036, "grad_norm": 0.41015625, "learning_rate": 4.543556459530277e-06, "loss": 1.7805, "step": 23374 }, { "epoch": 0.7541675824895999, "grad_norm": 0.373046875, "learning_rate": 4.542428185125861e-06, "loss": 1.7763, "step": 23375 }, { "epoch": 0.7541998463433963, "grad_norm": 0.392578125, "learning_rate": 4.541300025830887e-06, "loss": 1.8628, "step": 23376 }, { "epoch": 0.7542321101971926, "grad_norm": 0.359375, "learning_rate": 4.540171981657786e-06, "loss": 1.8035, "step": 23377 }, { "epoch": 0.754264374050989, "grad_norm": 0.3671875, "learning_rate": 4.539044052618968e-06, "loss": 1.8152, "step": 23378 }, { "epoch": 0.7542966379047853, "grad_norm": 0.369140625, "learning_rate": 4.537916238726844e-06, "loss": 1.7872, "step": 23379 }, { "epoch": 0.7543289017585817, "grad_norm": 0.37890625, "learning_rate": 4.536788539993836e-06, "loss": 1.772, "step": 23380 }, { "epoch": 0.754361165612378, "grad_norm": 0.37890625, "learning_rate": 4.535660956432354e-06, "loss": 1.8173, "step": 23381 }, { "epoch": 0.7543934294661744, "grad_norm": 0.365234375, "learning_rate": 4.534533488054805e-06, "loss": 1.8509, "step": 23382 }, { "epoch": 0.7544256933199707, "grad_norm": 0.38671875, "learning_rate": 4.533406134873612e-06, "loss": 1.8338, "step": 23383 }, { "epoch": 0.7544579571737671, "grad_norm": 0.3828125, "learning_rate": 4.5322788969011654e-06, "loss": 1.8337, "step": 23384 }, { "epoch": 0.7544902210275635, "grad_norm": 0.365234375, "learning_rate": 4.5311517741498855e-06, "loss": 1.8485, "step": 23385 }, { "epoch": 0.7545224848813598, "grad_norm": 0.396484375, "learning_rate": 4.530024766632184e-06, "loss": 1.7934, "step": 23386 }, { "epoch": 0.7545547487351562, "grad_norm": 0.37890625, "learning_rate": 4.528897874360451e-06, "loss": 1.8174, "step": 23387 }, { "epoch": 0.7545870125889524, "grad_norm": 0.369140625, "learning_rate": 4.527771097347097e-06, "loss": 1.8697, "step": 23388 }, { "epoch": 0.7546192764427488, "grad_norm": 0.408203125, "learning_rate": 4.526644435604536e-06, "loss": 1.7899, "step": 23389 }, { "epoch": 0.7546515402965451, "grad_norm": 0.38671875, "learning_rate": 4.52551788914515e-06, "loss": 1.8325, "step": 23390 }, { "epoch": 0.7546838041503415, "grad_norm": 0.375, "learning_rate": 4.524391457981349e-06, "loss": 1.8056, "step": 23391 }, { "epoch": 0.7547160680041378, "grad_norm": 0.36328125, "learning_rate": 4.52326514212554e-06, "loss": 1.8169, "step": 23392 }, { "epoch": 0.7547483318579342, "grad_norm": 0.392578125, "learning_rate": 4.522138941590103e-06, "loss": 1.8247, "step": 23393 }, { "epoch": 0.7547805957117305, "grad_norm": 0.42578125, "learning_rate": 4.5210128563874494e-06, "loss": 1.9227, "step": 23394 }, { "epoch": 0.7548128595655269, "grad_norm": 0.423828125, "learning_rate": 4.5198868865299646e-06, "loss": 1.9275, "step": 23395 }, { "epoch": 0.7548451234193232, "grad_norm": 0.41796875, "learning_rate": 4.51876103203005e-06, "loss": 1.9008, "step": 23396 }, { "epoch": 0.7548773872731196, "grad_norm": 0.404296875, "learning_rate": 4.517635292900096e-06, "loss": 1.8911, "step": 23397 }, { "epoch": 0.7549096511269159, "grad_norm": 0.3828125, "learning_rate": 4.516509669152486e-06, "loss": 1.9212, "step": 23398 }, { "epoch": 0.7549419149807123, "grad_norm": 0.45703125, "learning_rate": 4.515384160799622e-06, "loss": 1.8643, "step": 23399 }, { "epoch": 0.7549741788345086, "grad_norm": 0.412109375, "learning_rate": 4.514258767853889e-06, "loss": 1.9256, "step": 23400 }, { "epoch": 0.755006442688305, "grad_norm": 0.423828125, "learning_rate": 4.513133490327666e-06, "loss": 1.914, "step": 23401 }, { "epoch": 0.7550387065421013, "grad_norm": 0.400390625, "learning_rate": 4.512008328233352e-06, "loss": 1.917, "step": 23402 }, { "epoch": 0.7550709703958977, "grad_norm": 0.390625, "learning_rate": 4.510883281583328e-06, "loss": 1.8838, "step": 23403 }, { "epoch": 0.7551032342496939, "grad_norm": 0.390625, "learning_rate": 4.509758350389969e-06, "loss": 1.8898, "step": 23404 }, { "epoch": 0.7551354981034903, "grad_norm": 0.408203125, "learning_rate": 4.50863353466567e-06, "loss": 1.9237, "step": 23405 }, { "epoch": 0.7551677619572867, "grad_norm": 0.40234375, "learning_rate": 4.507508834422807e-06, "loss": 1.9459, "step": 23406 }, { "epoch": 0.755200025811083, "grad_norm": 0.404296875, "learning_rate": 4.506384249673754e-06, "loss": 1.9124, "step": 23407 }, { "epoch": 0.7552322896648794, "grad_norm": 0.416015625, "learning_rate": 4.5052597804309e-06, "loss": 1.9548, "step": 23408 }, { "epoch": 0.7552645535186757, "grad_norm": 0.39453125, "learning_rate": 4.504135426706618e-06, "loss": 1.9317, "step": 23409 }, { "epoch": 0.7552968173724721, "grad_norm": 0.453125, "learning_rate": 4.503011188513278e-06, "loss": 1.947, "step": 23410 }, { "epoch": 0.7553290812262684, "grad_norm": 0.3828125, "learning_rate": 4.501887065863265e-06, "loss": 1.9461, "step": 23411 }, { "epoch": 0.7553613450800648, "grad_norm": 0.3984375, "learning_rate": 4.500763058768948e-06, "loss": 1.9404, "step": 23412 }, { "epoch": 0.7553936089338611, "grad_norm": 0.384765625, "learning_rate": 4.499639167242696e-06, "loss": 1.9151, "step": 23413 }, { "epoch": 0.7554258727876575, "grad_norm": 0.396484375, "learning_rate": 4.498515391296892e-06, "loss": 1.8761, "step": 23414 }, { "epoch": 0.7554581366414538, "grad_norm": 0.3828125, "learning_rate": 4.497391730943886e-06, "loss": 1.9266, "step": 23415 }, { "epoch": 0.7554904004952502, "grad_norm": 0.400390625, "learning_rate": 4.496268186196057e-06, "loss": 1.9766, "step": 23416 }, { "epoch": 0.7555226643490465, "grad_norm": 0.390625, "learning_rate": 4.495144757065784e-06, "loss": 1.9095, "step": 23417 }, { "epoch": 0.7555549282028429, "grad_norm": 0.392578125, "learning_rate": 4.494021443565409e-06, "loss": 1.9545, "step": 23418 }, { "epoch": 0.7555871920566392, "grad_norm": 0.38671875, "learning_rate": 4.4928982457073116e-06, "loss": 1.8875, "step": 23419 }, { "epoch": 0.7556194559104356, "grad_norm": 0.3828125, "learning_rate": 4.491775163503861e-06, "loss": 1.9373, "step": 23420 }, { "epoch": 0.7556517197642318, "grad_norm": 0.416015625, "learning_rate": 4.4906521969674004e-06, "loss": 1.948, "step": 23421 }, { "epoch": 0.7556839836180282, "grad_norm": 0.41015625, "learning_rate": 4.4895293461103086e-06, "loss": 1.959, "step": 23422 }, { "epoch": 0.7557162474718245, "grad_norm": 0.416015625, "learning_rate": 4.488406610944935e-06, "loss": 1.9865, "step": 23423 }, { "epoch": 0.7557485113256209, "grad_norm": 0.421875, "learning_rate": 4.487283991483637e-06, "loss": 1.9054, "step": 23424 }, { "epoch": 0.7557807751794172, "grad_norm": 0.41015625, "learning_rate": 4.486161487738779e-06, "loss": 1.912, "step": 23425 }, { "epoch": 0.7558130390332136, "grad_norm": 0.3984375, "learning_rate": 4.485039099722709e-06, "loss": 1.9426, "step": 23426 }, { "epoch": 0.75584530288701, "grad_norm": 0.42578125, "learning_rate": 4.483916827447789e-06, "loss": 1.9043, "step": 23427 }, { "epoch": 0.7558775667408063, "grad_norm": 0.400390625, "learning_rate": 4.482794670926369e-06, "loss": 1.9344, "step": 23428 }, { "epoch": 0.7559098305946027, "grad_norm": 0.388671875, "learning_rate": 4.481672630170795e-06, "loss": 1.9135, "step": 23429 }, { "epoch": 0.755942094448399, "grad_norm": 0.38671875, "learning_rate": 4.480550705193427e-06, "loss": 1.9248, "step": 23430 }, { "epoch": 0.7559743583021954, "grad_norm": 0.423828125, "learning_rate": 4.4794288960066116e-06, "loss": 1.9266, "step": 23431 }, { "epoch": 0.7560066221559917, "grad_norm": 0.453125, "learning_rate": 4.47830720262269e-06, "loss": 1.9583, "step": 23432 }, { "epoch": 0.7560388860097881, "grad_norm": 0.388671875, "learning_rate": 4.4771856250540195e-06, "loss": 1.9296, "step": 23433 }, { "epoch": 0.7560711498635844, "grad_norm": 0.435546875, "learning_rate": 4.47606416331294e-06, "loss": 1.9217, "step": 23434 }, { "epoch": 0.7561034137173808, "grad_norm": 0.435546875, "learning_rate": 4.474942817411793e-06, "loss": 1.9736, "step": 23435 }, { "epoch": 0.7561356775711771, "grad_norm": 0.4296875, "learning_rate": 4.473821587362928e-06, "loss": 1.9016, "step": 23436 }, { "epoch": 0.7561679414249735, "grad_norm": 0.44140625, "learning_rate": 4.472700473178685e-06, "loss": 1.9197, "step": 23437 }, { "epoch": 0.7562002052787697, "grad_norm": 0.482421875, "learning_rate": 4.4715794748713965e-06, "loss": 1.8966, "step": 23438 }, { "epoch": 0.7562324691325661, "grad_norm": 0.39453125, "learning_rate": 4.4704585924534146e-06, "loss": 1.8972, "step": 23439 }, { "epoch": 0.7562647329863624, "grad_norm": 0.37890625, "learning_rate": 4.469337825937068e-06, "loss": 1.952, "step": 23440 }, { "epoch": 0.7562969968401588, "grad_norm": 0.451171875, "learning_rate": 4.468217175334693e-06, "loss": 1.8724, "step": 23441 }, { "epoch": 0.7563292606939551, "grad_norm": 0.447265625, "learning_rate": 4.467096640658633e-06, "loss": 1.9338, "step": 23442 }, { "epoch": 0.7563615245477515, "grad_norm": 0.419921875, "learning_rate": 4.465976221921217e-06, "loss": 1.9376, "step": 23443 }, { "epoch": 0.7563937884015478, "grad_norm": 0.421875, "learning_rate": 4.464855919134772e-06, "loss": 1.9471, "step": 23444 }, { "epoch": 0.7564260522553442, "grad_norm": 0.44140625, "learning_rate": 4.463735732311646e-06, "loss": 1.8947, "step": 23445 }, { "epoch": 0.7564583161091406, "grad_norm": 0.388671875, "learning_rate": 4.462615661464146e-06, "loss": 1.9567, "step": 23446 }, { "epoch": 0.7564905799629369, "grad_norm": 0.396484375, "learning_rate": 4.461495706604616e-06, "loss": 1.9272, "step": 23447 }, { "epoch": 0.7565228438167333, "grad_norm": 0.408203125, "learning_rate": 4.46037586774539e-06, "loss": 1.8793, "step": 23448 }, { "epoch": 0.7565551076705296, "grad_norm": 0.392578125, "learning_rate": 4.459256144898775e-06, "loss": 1.9048, "step": 23449 }, { "epoch": 0.756587371524326, "grad_norm": 0.39453125, "learning_rate": 4.458136538077107e-06, "loss": 1.8987, "step": 23450 }, { "epoch": 0.7566196353781223, "grad_norm": 0.40625, "learning_rate": 4.45701704729272e-06, "loss": 1.9543, "step": 23451 }, { "epoch": 0.7566518992319187, "grad_norm": 0.392578125, "learning_rate": 4.455897672557914e-06, "loss": 1.9677, "step": 23452 }, { "epoch": 0.756684163085715, "grad_norm": 0.427734375, "learning_rate": 4.454778413885027e-06, "loss": 1.9401, "step": 23453 }, { "epoch": 0.7567164269395114, "grad_norm": 0.458984375, "learning_rate": 4.4536592712863755e-06, "loss": 2.0285, "step": 23454 }, { "epoch": 0.7567486907933076, "grad_norm": 0.43359375, "learning_rate": 4.452540244774273e-06, "loss": 2.0052, "step": 23455 }, { "epoch": 0.756780954647104, "grad_norm": 0.439453125, "learning_rate": 4.4514213343610435e-06, "loss": 2.0513, "step": 23456 }, { "epoch": 0.7568132185009003, "grad_norm": 0.439453125, "learning_rate": 4.450302540059002e-06, "loss": 1.9933, "step": 23457 }, { "epoch": 0.7568454823546967, "grad_norm": 0.427734375, "learning_rate": 4.4491838618804555e-06, "loss": 2.0019, "step": 23458 }, { "epoch": 0.756877746208493, "grad_norm": 0.43359375, "learning_rate": 4.4480652998377285e-06, "loss": 2.0154, "step": 23459 }, { "epoch": 0.7569100100622894, "grad_norm": 0.416015625, "learning_rate": 4.4469468539431265e-06, "loss": 1.9861, "step": 23460 }, { "epoch": 0.7569422739160857, "grad_norm": 0.47265625, "learning_rate": 4.445828524208965e-06, "loss": 1.9943, "step": 23461 }, { "epoch": 0.7569745377698821, "grad_norm": 0.447265625, "learning_rate": 4.444710310647552e-06, "loss": 1.9789, "step": 23462 }, { "epoch": 0.7570068016236784, "grad_norm": 0.478515625, "learning_rate": 4.4435922132711926e-06, "loss": 1.9665, "step": 23463 }, { "epoch": 0.7570390654774748, "grad_norm": 0.439453125, "learning_rate": 4.4424742320921995e-06, "loss": 2.0308, "step": 23464 }, { "epoch": 0.7570713293312711, "grad_norm": 0.478515625, "learning_rate": 4.441356367122877e-06, "loss": 2.0058, "step": 23465 }, { "epoch": 0.7571035931850675, "grad_norm": 0.474609375, "learning_rate": 4.440238618375525e-06, "loss": 2.0252, "step": 23466 }, { "epoch": 0.7571358570388639, "grad_norm": 0.42578125, "learning_rate": 4.439120985862455e-06, "loss": 2.0171, "step": 23467 }, { "epoch": 0.7571681208926602, "grad_norm": 0.439453125, "learning_rate": 4.438003469595963e-06, "loss": 2.002, "step": 23468 }, { "epoch": 0.7572003847464566, "grad_norm": 0.4765625, "learning_rate": 4.43688606958835e-06, "loss": 1.9662, "step": 23469 }, { "epoch": 0.7572326486002529, "grad_norm": 0.396484375, "learning_rate": 4.43576878585192e-06, "loss": 2.0297, "step": 23470 }, { "epoch": 0.7572649124540493, "grad_norm": 0.46484375, "learning_rate": 4.434651618398969e-06, "loss": 1.9876, "step": 23471 }, { "epoch": 0.7572971763078455, "grad_norm": 0.4140625, "learning_rate": 4.433534567241787e-06, "loss": 2.0109, "step": 23472 }, { "epoch": 0.7573294401616419, "grad_norm": 0.51171875, "learning_rate": 4.432417632392688e-06, "loss": 1.9956, "step": 23473 }, { "epoch": 0.7573617040154382, "grad_norm": 0.490234375, "learning_rate": 4.4313008138639445e-06, "loss": 2.0475, "step": 23474 }, { "epoch": 0.7573939678692346, "grad_norm": 0.5, "learning_rate": 4.43018411166786e-06, "loss": 1.9676, "step": 23475 }, { "epoch": 0.7574262317230309, "grad_norm": 0.482421875, "learning_rate": 4.429067525816733e-06, "loss": 2.0491, "step": 23476 }, { "epoch": 0.7574584955768273, "grad_norm": 0.482421875, "learning_rate": 4.427951056322841e-06, "loss": 2.0089, "step": 23477 }, { "epoch": 0.7574907594306236, "grad_norm": 0.462890625, "learning_rate": 4.4268347031984775e-06, "loss": 2.002, "step": 23478 }, { "epoch": 0.75752302328442, "grad_norm": 0.435546875, "learning_rate": 4.425718466455943e-06, "loss": 2.0158, "step": 23479 }, { "epoch": 0.7575552871382163, "grad_norm": 0.484375, "learning_rate": 4.424602346107503e-06, "loss": 1.9808, "step": 23480 }, { "epoch": 0.7575875509920127, "grad_norm": 0.4765625, "learning_rate": 4.423486342165454e-06, "loss": 1.9768, "step": 23481 }, { "epoch": 0.757619814845809, "grad_norm": 0.490234375, "learning_rate": 4.42237045464209e-06, "loss": 2.0231, "step": 23482 }, { "epoch": 0.7576520786996054, "grad_norm": 0.45703125, "learning_rate": 4.421254683549674e-06, "loss": 1.9677, "step": 23483 }, { "epoch": 0.7576843425534017, "grad_norm": 0.4921875, "learning_rate": 4.420139028900502e-06, "loss": 1.9842, "step": 23484 }, { "epoch": 0.7577166064071981, "grad_norm": 0.4453125, "learning_rate": 4.41902349070685e-06, "loss": 1.9263, "step": 23485 }, { "epoch": 0.7577488702609945, "grad_norm": 0.47265625, "learning_rate": 4.417908068980989e-06, "loss": 1.9111, "step": 23486 }, { "epoch": 0.7577811341147908, "grad_norm": 0.4140625, "learning_rate": 4.416792763735213e-06, "loss": 1.9206, "step": 23487 }, { "epoch": 0.7578133979685872, "grad_norm": 0.40234375, "learning_rate": 4.4156775749817875e-06, "loss": 1.939, "step": 23488 }, { "epoch": 0.7578456618223834, "grad_norm": 0.443359375, "learning_rate": 4.4145625027329865e-06, "loss": 1.9166, "step": 23489 }, { "epoch": 0.7578779256761798, "grad_norm": 0.462890625, "learning_rate": 4.413447547001091e-06, "loss": 1.9235, "step": 23490 }, { "epoch": 0.7579101895299761, "grad_norm": 0.46875, "learning_rate": 4.412332707798372e-06, "loss": 1.9085, "step": 23491 }, { "epoch": 0.7579424533837725, "grad_norm": 0.439453125, "learning_rate": 4.411217985137093e-06, "loss": 1.8622, "step": 23492 }, { "epoch": 0.7579747172375688, "grad_norm": 0.453125, "learning_rate": 4.410103379029534e-06, "loss": 1.8949, "step": 23493 }, { "epoch": 0.7580069810913652, "grad_norm": 0.41015625, "learning_rate": 4.408988889487957e-06, "loss": 1.9312, "step": 23494 }, { "epoch": 0.7580392449451615, "grad_norm": 0.421875, "learning_rate": 4.407874516524637e-06, "loss": 1.923, "step": 23495 }, { "epoch": 0.7580715087989579, "grad_norm": 0.41796875, "learning_rate": 4.406760260151834e-06, "loss": 1.9067, "step": 23496 }, { "epoch": 0.7581037726527542, "grad_norm": 0.41015625, "learning_rate": 4.4056461203818105e-06, "loss": 1.9015, "step": 23497 }, { "epoch": 0.7581360365065506, "grad_norm": 0.421875, "learning_rate": 4.4045320972268394e-06, "loss": 1.9121, "step": 23498 }, { "epoch": 0.7581683003603469, "grad_norm": 0.38671875, "learning_rate": 4.403418190699178e-06, "loss": 1.9627, "step": 23499 }, { "epoch": 0.7582005642141433, "grad_norm": 0.40234375, "learning_rate": 4.402304400811081e-06, "loss": 1.9052, "step": 23500 }, { "epoch": 0.7582328280679396, "grad_norm": 0.40234375, "learning_rate": 4.4011907275748205e-06, "loss": 1.8945, "step": 23501 }, { "epoch": 0.758265091921736, "grad_norm": 0.396484375, "learning_rate": 4.4000771710026486e-06, "loss": 1.947, "step": 23502 }, { "epoch": 0.7582973557755323, "grad_norm": 0.4375, "learning_rate": 4.398963731106816e-06, "loss": 1.9563, "step": 23503 }, { "epoch": 0.7583296196293287, "grad_norm": 0.48828125, "learning_rate": 4.397850407899598e-06, "loss": 1.9942, "step": 23504 }, { "epoch": 0.7583618834831249, "grad_norm": 0.43359375, "learning_rate": 4.396737201393226e-06, "loss": 1.9652, "step": 23505 }, { "epoch": 0.7583941473369213, "grad_norm": 0.421875, "learning_rate": 4.395624111599964e-06, "loss": 1.9765, "step": 23506 }, { "epoch": 0.7584264111907177, "grad_norm": 0.4296875, "learning_rate": 4.394511138532074e-06, "loss": 2.0032, "step": 23507 }, { "epoch": 0.758458675044514, "grad_norm": 0.404296875, "learning_rate": 4.393398282201788e-06, "loss": 2.0196, "step": 23508 }, { "epoch": 0.7584909388983104, "grad_norm": 0.41015625, "learning_rate": 4.392285542621364e-06, "loss": 1.9893, "step": 23509 }, { "epoch": 0.7585232027521067, "grad_norm": 0.419921875, "learning_rate": 4.391172919803061e-06, "loss": 1.981, "step": 23510 }, { "epoch": 0.7585554666059031, "grad_norm": 0.4296875, "learning_rate": 4.390060413759105e-06, "loss": 2.0111, "step": 23511 }, { "epoch": 0.7585877304596994, "grad_norm": 0.412109375, "learning_rate": 4.3889480245017545e-06, "loss": 2.0118, "step": 23512 }, { "epoch": 0.7586199943134958, "grad_norm": 0.423828125, "learning_rate": 4.3878357520432595e-06, "loss": 2.0112, "step": 23513 }, { "epoch": 0.7586522581672921, "grad_norm": 0.41796875, "learning_rate": 4.386723596395845e-06, "loss": 1.9616, "step": 23514 }, { "epoch": 0.7586845220210885, "grad_norm": 0.412109375, "learning_rate": 4.38561155757177e-06, "loss": 1.9881, "step": 23515 }, { "epoch": 0.7587167858748848, "grad_norm": 0.416015625, "learning_rate": 4.384499635583268e-06, "loss": 2.0246, "step": 23516 }, { "epoch": 0.7587490497286812, "grad_norm": 0.41796875, "learning_rate": 4.3833878304425725e-06, "loss": 2.0072, "step": 23517 }, { "epoch": 0.7587813135824775, "grad_norm": 0.3828125, "learning_rate": 4.382276142161934e-06, "loss": 1.9844, "step": 23518 }, { "epoch": 0.7588135774362739, "grad_norm": 0.40234375, "learning_rate": 4.381164570753582e-06, "loss": 1.9518, "step": 23519 }, { "epoch": 0.7588458412900702, "grad_norm": 0.4296875, "learning_rate": 4.380053116229747e-06, "loss": 1.9699, "step": 23520 }, { "epoch": 0.7588781051438666, "grad_norm": 0.447265625, "learning_rate": 4.378941778602673e-06, "loss": 1.9405, "step": 23521 }, { "epoch": 0.7589103689976628, "grad_norm": 0.400390625, "learning_rate": 4.377830557884589e-06, "loss": 1.9009, "step": 23522 }, { "epoch": 0.7589426328514592, "grad_norm": 0.40625, "learning_rate": 4.376719454087722e-06, "loss": 1.9348, "step": 23523 }, { "epoch": 0.7589748967052555, "grad_norm": 0.427734375, "learning_rate": 4.37560846722431e-06, "loss": 1.8994, "step": 23524 }, { "epoch": 0.7590071605590519, "grad_norm": 0.400390625, "learning_rate": 4.374497597306574e-06, "loss": 1.8747, "step": 23525 }, { "epoch": 0.7590394244128482, "grad_norm": 0.392578125, "learning_rate": 4.373386844346749e-06, "loss": 1.9089, "step": 23526 }, { "epoch": 0.7590716882666446, "grad_norm": 0.412109375, "learning_rate": 4.37227620835706e-06, "loss": 1.9091, "step": 23527 }, { "epoch": 0.759103952120441, "grad_norm": 0.419921875, "learning_rate": 4.371165689349724e-06, "loss": 1.8916, "step": 23528 }, { "epoch": 0.7591362159742373, "grad_norm": 0.408203125, "learning_rate": 4.370055287336975e-06, "loss": 1.9318, "step": 23529 }, { "epoch": 0.7591684798280337, "grad_norm": 0.380859375, "learning_rate": 4.3689450023310325e-06, "loss": 1.9637, "step": 23530 }, { "epoch": 0.75920074368183, "grad_norm": 0.404296875, "learning_rate": 4.367834834344113e-06, "loss": 1.8867, "step": 23531 }, { "epoch": 0.7592330075356264, "grad_norm": 0.451171875, "learning_rate": 4.366724783388445e-06, "loss": 1.8699, "step": 23532 }, { "epoch": 0.7592652713894227, "grad_norm": 0.400390625, "learning_rate": 4.365614849476242e-06, "loss": 1.8549, "step": 23533 }, { "epoch": 0.7592975352432191, "grad_norm": 0.41015625, "learning_rate": 4.364505032619715e-06, "loss": 1.8602, "step": 23534 }, { "epoch": 0.7593297990970154, "grad_norm": 0.4453125, "learning_rate": 4.363395332831097e-06, "loss": 1.9089, "step": 23535 }, { "epoch": 0.7593620629508118, "grad_norm": 0.396484375, "learning_rate": 4.362285750122585e-06, "loss": 1.9359, "step": 23536 }, { "epoch": 0.759394326804608, "grad_norm": 0.400390625, "learning_rate": 4.3611762845063984e-06, "loss": 1.9172, "step": 23537 }, { "epoch": 0.7594265906584045, "grad_norm": 0.376953125, "learning_rate": 4.36006693599476e-06, "loss": 1.8962, "step": 23538 }, { "epoch": 0.7594588545122007, "grad_norm": 0.451171875, "learning_rate": 4.358957704599863e-06, "loss": 1.8697, "step": 23539 }, { "epoch": 0.7594911183659971, "grad_norm": 0.412109375, "learning_rate": 4.357848590333927e-06, "loss": 1.9245, "step": 23540 }, { "epoch": 0.7595233822197934, "grad_norm": 0.404296875, "learning_rate": 4.3567395932091656e-06, "loss": 1.8828, "step": 23541 }, { "epoch": 0.7595556460735898, "grad_norm": 0.419921875, "learning_rate": 4.355630713237772e-06, "loss": 1.9171, "step": 23542 }, { "epoch": 0.7595879099273861, "grad_norm": 0.427734375, "learning_rate": 4.354521950431959e-06, "loss": 1.8748, "step": 23543 }, { "epoch": 0.7596201737811825, "grad_norm": 0.384765625, "learning_rate": 4.35341330480394e-06, "loss": 1.9047, "step": 23544 }, { "epoch": 0.7596524376349788, "grad_norm": 0.3984375, "learning_rate": 4.3523047763659e-06, "loss": 1.9217, "step": 23545 }, { "epoch": 0.7596847014887752, "grad_norm": 0.4609375, "learning_rate": 4.351196365130057e-06, "loss": 1.8764, "step": 23546 }, { "epoch": 0.7597169653425716, "grad_norm": 0.4296875, "learning_rate": 4.350088071108604e-06, "loss": 1.9159, "step": 23547 }, { "epoch": 0.7597492291963679, "grad_norm": 0.3984375, "learning_rate": 4.3489798943137346e-06, "loss": 1.9199, "step": 23548 }, { "epoch": 0.7597814930501643, "grad_norm": 0.466796875, "learning_rate": 4.34787183475766e-06, "loss": 1.8616, "step": 23549 }, { "epoch": 0.7598137569039606, "grad_norm": 0.47265625, "learning_rate": 4.346763892452571e-06, "loss": 1.8543, "step": 23550 }, { "epoch": 0.759846020757757, "grad_norm": 0.3828125, "learning_rate": 4.345656067410657e-06, "loss": 1.8904, "step": 23551 }, { "epoch": 0.7598782846115533, "grad_norm": 0.400390625, "learning_rate": 4.344548359644122e-06, "loss": 1.9551, "step": 23552 }, { "epoch": 0.7599105484653497, "grad_norm": 0.404296875, "learning_rate": 4.343440769165154e-06, "loss": 1.8415, "step": 23553 }, { "epoch": 0.759942812319146, "grad_norm": 0.390625, "learning_rate": 4.342333295985942e-06, "loss": 1.8893, "step": 23554 }, { "epoch": 0.7599750761729424, "grad_norm": 0.392578125, "learning_rate": 4.341225940118684e-06, "loss": 1.8963, "step": 23555 }, { "epoch": 0.7600073400267386, "grad_norm": 0.38671875, "learning_rate": 4.340118701575564e-06, "loss": 1.9044, "step": 23556 }, { "epoch": 0.760039603880535, "grad_norm": 0.40625, "learning_rate": 4.339011580368764e-06, "loss": 1.8883, "step": 23557 }, { "epoch": 0.7600718677343313, "grad_norm": 0.392578125, "learning_rate": 4.337904576510482e-06, "loss": 1.9279, "step": 23558 }, { "epoch": 0.7601041315881277, "grad_norm": 0.384765625, "learning_rate": 4.336797690012892e-06, "loss": 1.9006, "step": 23559 }, { "epoch": 0.760136395441924, "grad_norm": 0.388671875, "learning_rate": 4.335690920888189e-06, "loss": 1.8942, "step": 23560 }, { "epoch": 0.7601686592957204, "grad_norm": 0.384765625, "learning_rate": 4.33458426914855e-06, "loss": 1.8493, "step": 23561 }, { "epoch": 0.7602009231495167, "grad_norm": 0.375, "learning_rate": 4.333477734806151e-06, "loss": 1.8272, "step": 23562 }, { "epoch": 0.7602331870033131, "grad_norm": 0.369140625, "learning_rate": 4.33237131787318e-06, "loss": 1.8272, "step": 23563 }, { "epoch": 0.7602654508571094, "grad_norm": 0.384765625, "learning_rate": 4.331265018361814e-06, "loss": 1.8263, "step": 23564 }, { "epoch": 0.7602977147109058, "grad_norm": 0.37109375, "learning_rate": 4.330158836284225e-06, "loss": 1.8237, "step": 23565 }, { "epoch": 0.7603299785647021, "grad_norm": 0.376953125, "learning_rate": 4.3290527716526e-06, "loss": 1.7892, "step": 23566 }, { "epoch": 0.7603622424184985, "grad_norm": 0.423828125, "learning_rate": 4.3279468244791e-06, "loss": 1.8969, "step": 23567 }, { "epoch": 0.7603945062722949, "grad_norm": 0.400390625, "learning_rate": 4.326840994775904e-06, "loss": 1.9621, "step": 23568 }, { "epoch": 0.7604267701260912, "grad_norm": 0.39453125, "learning_rate": 4.325735282555195e-06, "loss": 1.9568, "step": 23569 }, { "epoch": 0.7604590339798876, "grad_norm": 0.408203125, "learning_rate": 4.324629687829126e-06, "loss": 1.8887, "step": 23570 }, { "epoch": 0.7604912978336839, "grad_norm": 0.416015625, "learning_rate": 4.323524210609874e-06, "loss": 1.9287, "step": 23571 }, { "epoch": 0.7605235616874803, "grad_norm": 0.416015625, "learning_rate": 4.322418850909617e-06, "loss": 1.9339, "step": 23572 }, { "epoch": 0.7605558255412765, "grad_norm": 0.40625, "learning_rate": 4.321313608740506e-06, "loss": 1.9053, "step": 23573 }, { "epoch": 0.7605880893950729, "grad_norm": 0.388671875, "learning_rate": 4.320208484114716e-06, "loss": 1.9569, "step": 23574 }, { "epoch": 0.7606203532488692, "grad_norm": 0.400390625, "learning_rate": 4.3191034770444104e-06, "loss": 1.906, "step": 23575 }, { "epoch": 0.7606526171026656, "grad_norm": 0.388671875, "learning_rate": 4.317998587541745e-06, "loss": 1.8975, "step": 23576 }, { "epoch": 0.7606848809564619, "grad_norm": 0.392578125, "learning_rate": 4.316893815618895e-06, "loss": 1.8597, "step": 23577 }, { "epoch": 0.7607171448102583, "grad_norm": 0.4296875, "learning_rate": 4.3157891612880134e-06, "loss": 1.9197, "step": 23578 }, { "epoch": 0.7607494086640546, "grad_norm": 0.41015625, "learning_rate": 4.3146846245612556e-06, "loss": 1.9605, "step": 23579 }, { "epoch": 0.760781672517851, "grad_norm": 0.458984375, "learning_rate": 4.313580205450787e-06, "loss": 1.9438, "step": 23580 }, { "epoch": 0.7608139363716473, "grad_norm": 0.4296875, "learning_rate": 4.312475903968761e-06, "loss": 1.9084, "step": 23581 }, { "epoch": 0.7608462002254437, "grad_norm": 0.40625, "learning_rate": 4.311371720127331e-06, "loss": 1.8792, "step": 23582 }, { "epoch": 0.76087846407924, "grad_norm": 0.462890625, "learning_rate": 4.310267653938656e-06, "loss": 1.9233, "step": 23583 }, { "epoch": 0.7609107279330364, "grad_norm": 0.40234375, "learning_rate": 4.309163705414886e-06, "loss": 1.9493, "step": 23584 }, { "epoch": 0.7609429917868327, "grad_norm": 0.451171875, "learning_rate": 4.308059874568167e-06, "loss": 1.9302, "step": 23585 }, { "epoch": 0.7609752556406291, "grad_norm": 0.40234375, "learning_rate": 4.30695616141066e-06, "loss": 1.9437, "step": 23586 }, { "epoch": 0.7610075194944255, "grad_norm": 0.40625, "learning_rate": 4.305852565954508e-06, "loss": 1.8812, "step": 23587 }, { "epoch": 0.7610397833482218, "grad_norm": 0.3984375, "learning_rate": 4.304749088211854e-06, "loss": 1.8426, "step": 23588 }, { "epoch": 0.7610720472020182, "grad_norm": 0.384765625, "learning_rate": 4.303645728194855e-06, "loss": 1.8071, "step": 23589 }, { "epoch": 0.7611043110558144, "grad_norm": 0.3671875, "learning_rate": 4.30254248591565e-06, "loss": 1.8625, "step": 23590 }, { "epoch": 0.7611365749096108, "grad_norm": 0.39453125, "learning_rate": 4.3014393613863774e-06, "loss": 1.8712, "step": 23591 }, { "epoch": 0.7611688387634071, "grad_norm": 0.373046875, "learning_rate": 4.300336354619192e-06, "loss": 1.8279, "step": 23592 }, { "epoch": 0.7612011026172035, "grad_norm": 0.392578125, "learning_rate": 4.299233465626221e-06, "loss": 1.7941, "step": 23593 }, { "epoch": 0.7612333664709998, "grad_norm": 0.400390625, "learning_rate": 4.298130694419616e-06, "loss": 1.8294, "step": 23594 }, { "epoch": 0.7612656303247962, "grad_norm": 0.404296875, "learning_rate": 4.297028041011513e-06, "loss": 1.8188, "step": 23595 }, { "epoch": 0.7612978941785925, "grad_norm": 0.37109375, "learning_rate": 4.295925505414042e-06, "loss": 1.7995, "step": 23596 }, { "epoch": 0.7613301580323889, "grad_norm": 0.37109375, "learning_rate": 4.294823087639353e-06, "loss": 1.8701, "step": 23597 }, { "epoch": 0.7613624218861852, "grad_norm": 0.39453125, "learning_rate": 4.29372078769956e-06, "loss": 1.8332, "step": 23598 }, { "epoch": 0.7613946857399816, "grad_norm": 0.37109375, "learning_rate": 4.292618605606809e-06, "loss": 1.8682, "step": 23599 }, { "epoch": 0.7614269495937779, "grad_norm": 0.369140625, "learning_rate": 4.291516541373242e-06, "loss": 1.8591, "step": 23600 }, { "epoch": 0.7614592134475743, "grad_norm": 0.404296875, "learning_rate": 4.290414595010968e-06, "loss": 1.826, "step": 23601 }, { "epoch": 0.7614914773013706, "grad_norm": 0.416015625, "learning_rate": 4.289312766532128e-06, "loss": 1.855, "step": 23602 }, { "epoch": 0.761523741155167, "grad_norm": 0.390625, "learning_rate": 4.288211055948859e-06, "loss": 1.8711, "step": 23603 }, { "epoch": 0.7615560050089633, "grad_norm": 0.392578125, "learning_rate": 4.2871094632732665e-06, "loss": 1.8414, "step": 23604 }, { "epoch": 0.7615882688627597, "grad_norm": 0.38671875, "learning_rate": 4.286007988517494e-06, "loss": 1.8522, "step": 23605 }, { "epoch": 0.7616205327165559, "grad_norm": 0.380859375, "learning_rate": 4.2849066316936584e-06, "loss": 1.8634, "step": 23606 }, { "epoch": 0.7616527965703523, "grad_norm": 0.4375, "learning_rate": 4.2838053928138805e-06, "loss": 1.8933, "step": 23607 }, { "epoch": 0.7616850604241487, "grad_norm": 0.4140625, "learning_rate": 4.282704271890288e-06, "loss": 1.9174, "step": 23608 }, { "epoch": 0.761717324277945, "grad_norm": 0.4375, "learning_rate": 4.281603268935e-06, "loss": 1.9369, "step": 23609 }, { "epoch": 0.7617495881317414, "grad_norm": 0.44921875, "learning_rate": 4.280502383960128e-06, "loss": 1.9162, "step": 23610 }, { "epoch": 0.7617818519855377, "grad_norm": 0.40625, "learning_rate": 4.2794016169778e-06, "loss": 1.9233, "step": 23611 }, { "epoch": 0.7618141158393341, "grad_norm": 0.41015625, "learning_rate": 4.278300968000129e-06, "loss": 1.9403, "step": 23612 }, { "epoch": 0.7618463796931304, "grad_norm": 0.423828125, "learning_rate": 4.2772004370392245e-06, "loss": 1.9041, "step": 23613 }, { "epoch": 0.7618786435469268, "grad_norm": 0.3671875, "learning_rate": 4.276100024107209e-06, "loss": 1.8851, "step": 23614 }, { "epoch": 0.7619109074007231, "grad_norm": 0.40625, "learning_rate": 4.2749997292161915e-06, "loss": 1.9547, "step": 23615 }, { "epoch": 0.7619431712545195, "grad_norm": 0.421875, "learning_rate": 4.273899552378279e-06, "loss": 1.8897, "step": 23616 }, { "epoch": 0.7619754351083158, "grad_norm": 0.38671875, "learning_rate": 4.2727994936055885e-06, "loss": 1.9543, "step": 23617 }, { "epoch": 0.7620076989621122, "grad_norm": 0.390625, "learning_rate": 4.271699552910227e-06, "loss": 1.9087, "step": 23618 }, { "epoch": 0.7620399628159085, "grad_norm": 0.416015625, "learning_rate": 4.270599730304294e-06, "loss": 1.9896, "step": 23619 }, { "epoch": 0.7620722266697049, "grad_norm": 0.412109375, "learning_rate": 4.269500025799908e-06, "loss": 1.95, "step": 23620 }, { "epoch": 0.7621044905235012, "grad_norm": 0.390625, "learning_rate": 4.268400439409168e-06, "loss": 1.9275, "step": 23621 }, { "epoch": 0.7621367543772976, "grad_norm": 0.384765625, "learning_rate": 4.267300971144172e-06, "loss": 1.9181, "step": 23622 }, { "epoch": 0.7621690182310938, "grad_norm": 0.40234375, "learning_rate": 4.266201621017032e-06, "loss": 1.9303, "step": 23623 }, { "epoch": 0.7622012820848902, "grad_norm": 0.384765625, "learning_rate": 4.265102389039839e-06, "loss": 1.939, "step": 23624 }, { "epoch": 0.7622335459386865, "grad_norm": 0.3984375, "learning_rate": 4.264003275224706e-06, "loss": 1.9266, "step": 23625 }, { "epoch": 0.7622658097924829, "grad_norm": 0.52734375, "learning_rate": 4.262904279583715e-06, "loss": 1.9502, "step": 23626 }, { "epoch": 0.7622980736462792, "grad_norm": 0.490234375, "learning_rate": 4.261805402128971e-06, "loss": 1.929, "step": 23627 }, { "epoch": 0.7623303375000756, "grad_norm": 0.46484375, "learning_rate": 4.26070664287258e-06, "loss": 1.9159, "step": 23628 }, { "epoch": 0.762362601353872, "grad_norm": 0.427734375, "learning_rate": 4.259608001826616e-06, "loss": 1.9045, "step": 23629 }, { "epoch": 0.7623948652076683, "grad_norm": 0.40234375, "learning_rate": 4.258509479003182e-06, "loss": 1.9585, "step": 23630 }, { "epoch": 0.7624271290614647, "grad_norm": 0.4296875, "learning_rate": 4.257411074414377e-06, "loss": 1.9296, "step": 23631 }, { "epoch": 0.762459392915261, "grad_norm": 0.4296875, "learning_rate": 4.256312788072278e-06, "loss": 1.9234, "step": 23632 }, { "epoch": 0.7624916567690574, "grad_norm": 0.408203125, "learning_rate": 4.255214619988977e-06, "loss": 1.9277, "step": 23633 }, { "epoch": 0.7625239206228537, "grad_norm": 0.439453125, "learning_rate": 4.254116570176576e-06, "loss": 1.9448, "step": 23634 }, { "epoch": 0.7625561844766501, "grad_norm": 0.47265625, "learning_rate": 4.253018638647142e-06, "loss": 1.939, "step": 23635 }, { "epoch": 0.7625884483304464, "grad_norm": 0.408203125, "learning_rate": 4.251920825412773e-06, "loss": 1.9152, "step": 23636 }, { "epoch": 0.7626207121842428, "grad_norm": 0.388671875, "learning_rate": 4.2508231304855505e-06, "loss": 1.908, "step": 23637 }, { "epoch": 0.762652976038039, "grad_norm": 0.40234375, "learning_rate": 4.249725553877549e-06, "loss": 1.9342, "step": 23638 }, { "epoch": 0.7626852398918355, "grad_norm": 0.3828125, "learning_rate": 4.248628095600862e-06, "loss": 1.9377, "step": 23639 }, { "epoch": 0.7627175037456317, "grad_norm": 0.427734375, "learning_rate": 4.247530755667563e-06, "loss": 1.9177, "step": 23640 }, { "epoch": 0.7627497675994281, "grad_norm": 0.376953125, "learning_rate": 4.246433534089728e-06, "loss": 1.8856, "step": 23641 }, { "epoch": 0.7627820314532244, "grad_norm": 0.38671875, "learning_rate": 4.245336430879442e-06, "loss": 1.9334, "step": 23642 }, { "epoch": 0.7628142953070208, "grad_norm": 0.419921875, "learning_rate": 4.244239446048777e-06, "loss": 1.9777, "step": 23643 }, { "epoch": 0.7628465591608171, "grad_norm": 0.3984375, "learning_rate": 4.243142579609804e-06, "loss": 1.9083, "step": 23644 }, { "epoch": 0.7628788230146135, "grad_norm": 0.404296875, "learning_rate": 4.242045831574604e-06, "loss": 1.8996, "step": 23645 }, { "epoch": 0.7629110868684098, "grad_norm": 0.375, "learning_rate": 4.240949201955249e-06, "loss": 1.9375, "step": 23646 }, { "epoch": 0.7629433507222062, "grad_norm": 0.431640625, "learning_rate": 4.239852690763798e-06, "loss": 1.9157, "step": 23647 }, { "epoch": 0.7629756145760026, "grad_norm": 0.400390625, "learning_rate": 4.238756298012336e-06, "loss": 1.9513, "step": 23648 }, { "epoch": 0.7630078784297989, "grad_norm": 0.408203125, "learning_rate": 4.237660023712925e-06, "loss": 1.9618, "step": 23649 }, { "epoch": 0.7630401422835953, "grad_norm": 0.39453125, "learning_rate": 4.236563867877627e-06, "loss": 1.9326, "step": 23650 }, { "epoch": 0.7630724061373916, "grad_norm": 0.3984375, "learning_rate": 4.235467830518516e-06, "loss": 1.9269, "step": 23651 }, { "epoch": 0.763104669991188, "grad_norm": 0.40625, "learning_rate": 4.234371911647652e-06, "loss": 1.9534, "step": 23652 }, { "epoch": 0.7631369338449843, "grad_norm": 0.455078125, "learning_rate": 4.233276111277096e-06, "loss": 1.919, "step": 23653 }, { "epoch": 0.7631691976987807, "grad_norm": 0.39453125, "learning_rate": 4.232180429418916e-06, "loss": 1.9519, "step": 23654 }, { "epoch": 0.763201461552577, "grad_norm": 0.39453125, "learning_rate": 4.23108486608517e-06, "loss": 1.9455, "step": 23655 }, { "epoch": 0.7632337254063734, "grad_norm": 0.42578125, "learning_rate": 4.229989421287914e-06, "loss": 1.9031, "step": 23656 }, { "epoch": 0.7632659892601696, "grad_norm": 0.400390625, "learning_rate": 4.228894095039206e-06, "loss": 1.9205, "step": 23657 }, { "epoch": 0.763298253113966, "grad_norm": 0.423828125, "learning_rate": 4.227798887351103e-06, "loss": 1.9264, "step": 23658 }, { "epoch": 0.7633305169677623, "grad_norm": 0.423828125, "learning_rate": 4.226703798235672e-06, "loss": 1.9255, "step": 23659 }, { "epoch": 0.7633627808215587, "grad_norm": 0.4609375, "learning_rate": 4.225608827704947e-06, "loss": 1.9304, "step": 23660 }, { "epoch": 0.763395044675355, "grad_norm": 0.447265625, "learning_rate": 4.224513975770992e-06, "loss": 1.9624, "step": 23661 }, { "epoch": 0.7634273085291514, "grad_norm": 0.380859375, "learning_rate": 4.223419242445864e-06, "loss": 1.9174, "step": 23662 }, { "epoch": 0.7634595723829477, "grad_norm": 0.412109375, "learning_rate": 4.2223246277415975e-06, "loss": 1.9356, "step": 23663 }, { "epoch": 0.7634918362367441, "grad_norm": 0.404296875, "learning_rate": 4.22123013167025e-06, "loss": 1.9396, "step": 23664 }, { "epoch": 0.7635241000905404, "grad_norm": 0.3984375, "learning_rate": 4.220135754243878e-06, "loss": 1.9228, "step": 23665 }, { "epoch": 0.7635563639443368, "grad_norm": 0.423828125, "learning_rate": 4.219041495474508e-06, "loss": 1.9515, "step": 23666 }, { "epoch": 0.7635886277981331, "grad_norm": 0.396484375, "learning_rate": 4.2179473553741995e-06, "loss": 1.9346, "step": 23667 }, { "epoch": 0.7636208916519295, "grad_norm": 0.427734375, "learning_rate": 4.2168533339549935e-06, "loss": 1.9345, "step": 23668 }, { "epoch": 0.7636531555057259, "grad_norm": 0.4296875, "learning_rate": 4.215759431228924e-06, "loss": 1.8665, "step": 23669 }, { "epoch": 0.7636854193595222, "grad_norm": 0.427734375, "learning_rate": 4.214665647208042e-06, "loss": 1.9165, "step": 23670 }, { "epoch": 0.7637176832133186, "grad_norm": 0.416015625, "learning_rate": 4.213571981904384e-06, "loss": 1.9312, "step": 23671 }, { "epoch": 0.7637499470671149, "grad_norm": 0.41015625, "learning_rate": 4.2124784353299826e-06, "loss": 1.9134, "step": 23672 }, { "epoch": 0.7637822109209113, "grad_norm": 0.44921875, "learning_rate": 4.211385007496884e-06, "loss": 1.9476, "step": 23673 }, { "epoch": 0.7638144747747075, "grad_norm": 0.400390625, "learning_rate": 4.210291698417122e-06, "loss": 1.9302, "step": 23674 }, { "epoch": 0.7638467386285039, "grad_norm": 0.412109375, "learning_rate": 4.209198508102722e-06, "loss": 1.9673, "step": 23675 }, { "epoch": 0.7638790024823002, "grad_norm": 0.396484375, "learning_rate": 4.208105436565728e-06, "loss": 1.9412, "step": 23676 }, { "epoch": 0.7639112663360966, "grad_norm": 0.380859375, "learning_rate": 4.207012483818167e-06, "loss": 1.9653, "step": 23677 }, { "epoch": 0.7639435301898929, "grad_norm": 0.40625, "learning_rate": 4.205919649872066e-06, "loss": 1.9325, "step": 23678 }, { "epoch": 0.7639757940436893, "grad_norm": 0.408203125, "learning_rate": 4.204826934739461e-06, "loss": 1.9478, "step": 23679 }, { "epoch": 0.7640080578974856, "grad_norm": 0.43359375, "learning_rate": 4.203734338432377e-06, "loss": 2.0491, "step": 23680 }, { "epoch": 0.764040321751282, "grad_norm": 0.384765625, "learning_rate": 4.202641860962837e-06, "loss": 2.0209, "step": 23681 }, { "epoch": 0.7640725856050783, "grad_norm": 0.408203125, "learning_rate": 4.2015495023428736e-06, "loss": 2.0033, "step": 23682 }, { "epoch": 0.7641048494588747, "grad_norm": 0.41015625, "learning_rate": 4.200457262584505e-06, "loss": 2.0515, "step": 23683 }, { "epoch": 0.764137113312671, "grad_norm": 0.38671875, "learning_rate": 4.199365141699752e-06, "loss": 2.0163, "step": 23684 }, { "epoch": 0.7641693771664674, "grad_norm": 0.419921875, "learning_rate": 4.198273139700645e-06, "loss": 2.0528, "step": 23685 }, { "epoch": 0.7642016410202637, "grad_norm": 0.40625, "learning_rate": 4.197181256599195e-06, "loss": 2.0317, "step": 23686 }, { "epoch": 0.7642339048740601, "grad_norm": 0.453125, "learning_rate": 4.196089492407427e-06, "loss": 2.0073, "step": 23687 }, { "epoch": 0.7642661687278564, "grad_norm": 0.416015625, "learning_rate": 4.194997847137348e-06, "loss": 2.0201, "step": 23688 }, { "epoch": 0.7642984325816528, "grad_norm": 0.443359375, "learning_rate": 4.193906320800988e-06, "loss": 1.955, "step": 23689 }, { "epoch": 0.7643306964354492, "grad_norm": 0.404296875, "learning_rate": 4.192814913410354e-06, "loss": 2.0159, "step": 23690 }, { "epoch": 0.7643629602892454, "grad_norm": 0.408203125, "learning_rate": 4.191723624977455e-06, "loss": 2.0381, "step": 23691 }, { "epoch": 0.7643952241430418, "grad_norm": 0.388671875, "learning_rate": 4.19063245551431e-06, "loss": 2.0527, "step": 23692 }, { "epoch": 0.7644274879968381, "grad_norm": 0.423828125, "learning_rate": 4.189541405032936e-06, "loss": 1.9482, "step": 23693 }, { "epoch": 0.7644597518506345, "grad_norm": 0.38671875, "learning_rate": 4.188450473545326e-06, "loss": 1.9076, "step": 23694 }, { "epoch": 0.7644920157044308, "grad_norm": 0.376953125, "learning_rate": 4.187359661063499e-06, "loss": 1.8963, "step": 23695 }, { "epoch": 0.7645242795582272, "grad_norm": 0.3828125, "learning_rate": 4.186268967599466e-06, "loss": 1.8998, "step": 23696 }, { "epoch": 0.7645565434120235, "grad_norm": 0.40234375, "learning_rate": 4.185178393165219e-06, "loss": 1.9137, "step": 23697 }, { "epoch": 0.7645888072658199, "grad_norm": 0.43359375, "learning_rate": 4.1840879377727745e-06, "loss": 1.9696, "step": 23698 }, { "epoch": 0.7646210711196162, "grad_norm": 0.376953125, "learning_rate": 4.1829976014341295e-06, "loss": 1.9548, "step": 23699 }, { "epoch": 0.7646533349734126, "grad_norm": 0.3984375, "learning_rate": 4.181907384161285e-06, "loss": 1.9558, "step": 23700 }, { "epoch": 0.7646855988272089, "grad_norm": 0.412109375, "learning_rate": 4.180817285966245e-06, "loss": 1.9084, "step": 23701 }, { "epoch": 0.7647178626810053, "grad_norm": 0.392578125, "learning_rate": 4.179727306861006e-06, "loss": 1.8872, "step": 23702 }, { "epoch": 0.7647501265348016, "grad_norm": 0.37890625, "learning_rate": 4.178637446857565e-06, "loss": 1.9213, "step": 23703 }, { "epoch": 0.764782390388598, "grad_norm": 0.439453125, "learning_rate": 4.177547705967922e-06, "loss": 1.9432, "step": 23704 }, { "epoch": 0.7648146542423943, "grad_norm": 0.447265625, "learning_rate": 4.17645808420407e-06, "loss": 1.9225, "step": 23705 }, { "epoch": 0.7648469180961907, "grad_norm": 0.404296875, "learning_rate": 4.1753685815779985e-06, "loss": 1.9351, "step": 23706 }, { "epoch": 0.7648791819499869, "grad_norm": 0.373046875, "learning_rate": 4.1742791981017085e-06, "loss": 1.9184, "step": 23707 }, { "epoch": 0.7649114458037833, "grad_norm": 0.419921875, "learning_rate": 4.173189933787187e-06, "loss": 1.9241, "step": 23708 }, { "epoch": 0.7649437096575797, "grad_norm": 0.40625, "learning_rate": 4.172100788646419e-06, "loss": 1.9375, "step": 23709 }, { "epoch": 0.764975973511376, "grad_norm": 0.44140625, "learning_rate": 4.171011762691403e-06, "loss": 1.888, "step": 23710 }, { "epoch": 0.7650082373651724, "grad_norm": 0.373046875, "learning_rate": 4.169922855934118e-06, "loss": 1.923, "step": 23711 }, { "epoch": 0.7650405012189687, "grad_norm": 0.40234375, "learning_rate": 4.168834068386551e-06, "loss": 1.904, "step": 23712 }, { "epoch": 0.7650727650727651, "grad_norm": 0.4140625, "learning_rate": 4.167745400060693e-06, "loss": 1.9701, "step": 23713 }, { "epoch": 0.7651050289265614, "grad_norm": 0.41796875, "learning_rate": 4.1666568509685214e-06, "loss": 1.95, "step": 23714 }, { "epoch": 0.7651372927803578, "grad_norm": 0.369140625, "learning_rate": 4.165568421122014e-06, "loss": 1.9806, "step": 23715 }, { "epoch": 0.7651695566341541, "grad_norm": 0.40625, "learning_rate": 4.164480110533163e-06, "loss": 1.9011, "step": 23716 }, { "epoch": 0.7652018204879505, "grad_norm": 0.388671875, "learning_rate": 4.16339191921394e-06, "loss": 1.8961, "step": 23717 }, { "epoch": 0.7652340843417468, "grad_norm": 0.40234375, "learning_rate": 4.162303847176326e-06, "loss": 1.9256, "step": 23718 }, { "epoch": 0.7652663481955432, "grad_norm": 0.37890625, "learning_rate": 4.161215894432291e-06, "loss": 1.8928, "step": 23719 }, { "epoch": 0.7652986120493395, "grad_norm": 0.369140625, "learning_rate": 4.160128060993821e-06, "loss": 1.8984, "step": 23720 }, { "epoch": 0.7653308759031359, "grad_norm": 0.369140625, "learning_rate": 4.159040346872884e-06, "loss": 1.9409, "step": 23721 }, { "epoch": 0.7653631397569322, "grad_norm": 0.412109375, "learning_rate": 4.157952752081451e-06, "loss": 1.9142, "step": 23722 }, { "epoch": 0.7653954036107286, "grad_norm": 0.369140625, "learning_rate": 4.156865276631499e-06, "loss": 1.9164, "step": 23723 }, { "epoch": 0.7654276674645248, "grad_norm": 0.376953125, "learning_rate": 4.155777920534994e-06, "loss": 1.9322, "step": 23724 }, { "epoch": 0.7654599313183212, "grad_norm": 0.376953125, "learning_rate": 4.154690683803904e-06, "loss": 1.9124, "step": 23725 }, { "epoch": 0.7654921951721175, "grad_norm": 0.396484375, "learning_rate": 4.153603566450201e-06, "loss": 1.9504, "step": 23726 }, { "epoch": 0.7655244590259139, "grad_norm": 0.388671875, "learning_rate": 4.152516568485851e-06, "loss": 1.9358, "step": 23727 }, { "epoch": 0.7655567228797102, "grad_norm": 0.376953125, "learning_rate": 4.15142968992281e-06, "loss": 1.9574, "step": 23728 }, { "epoch": 0.7655889867335066, "grad_norm": 0.37890625, "learning_rate": 4.150342930773053e-06, "loss": 1.954, "step": 23729 }, { "epoch": 0.765621250587303, "grad_norm": 0.375, "learning_rate": 4.1492562910485386e-06, "loss": 1.9552, "step": 23730 }, { "epoch": 0.7656535144410993, "grad_norm": 0.396484375, "learning_rate": 4.148169770761222e-06, "loss": 1.9298, "step": 23731 }, { "epoch": 0.7656857782948957, "grad_norm": 0.38671875, "learning_rate": 4.147083369923072e-06, "loss": 1.8886, "step": 23732 }, { "epoch": 0.765718042148692, "grad_norm": 0.380859375, "learning_rate": 4.145997088546042e-06, "loss": 1.9211, "step": 23733 }, { "epoch": 0.7657503060024884, "grad_norm": 0.390625, "learning_rate": 4.144910926642084e-06, "loss": 1.9297, "step": 23734 }, { "epoch": 0.7657825698562847, "grad_norm": 0.390625, "learning_rate": 4.1438248842231645e-06, "loss": 1.9974, "step": 23735 }, { "epoch": 0.7658148337100811, "grad_norm": 0.376953125, "learning_rate": 4.142738961301233e-06, "loss": 1.9297, "step": 23736 }, { "epoch": 0.7658470975638774, "grad_norm": 0.369140625, "learning_rate": 4.141653157888237e-06, "loss": 1.8961, "step": 23737 }, { "epoch": 0.7658793614176738, "grad_norm": 0.37890625, "learning_rate": 4.140567473996138e-06, "loss": 1.8178, "step": 23738 }, { "epoch": 0.76591162527147, "grad_norm": 0.37109375, "learning_rate": 4.139481909636881e-06, "loss": 1.8218, "step": 23739 }, { "epoch": 0.7659438891252665, "grad_norm": 0.384765625, "learning_rate": 4.138396464822413e-06, "loss": 1.8552, "step": 23740 }, { "epoch": 0.7659761529790627, "grad_norm": 0.37890625, "learning_rate": 4.137311139564686e-06, "loss": 1.867, "step": 23741 }, { "epoch": 0.7660084168328591, "grad_norm": 0.375, "learning_rate": 4.136225933875648e-06, "loss": 1.8397, "step": 23742 }, { "epoch": 0.7660406806866554, "grad_norm": 0.376953125, "learning_rate": 4.135140847767237e-06, "loss": 1.8484, "step": 23743 }, { "epoch": 0.7660729445404518, "grad_norm": 0.3671875, "learning_rate": 4.134055881251404e-06, "loss": 1.8695, "step": 23744 }, { "epoch": 0.7661052083942481, "grad_norm": 0.365234375, "learning_rate": 4.132971034340089e-06, "loss": 1.836, "step": 23745 }, { "epoch": 0.7661374722480445, "grad_norm": 0.3671875, "learning_rate": 4.131886307045229e-06, "loss": 1.8326, "step": 23746 }, { "epoch": 0.7661697361018408, "grad_norm": 0.369140625, "learning_rate": 4.130801699378771e-06, "loss": 1.7997, "step": 23747 }, { "epoch": 0.7662019999556372, "grad_norm": 0.3671875, "learning_rate": 4.129717211352652e-06, "loss": 1.8284, "step": 23748 }, { "epoch": 0.7662342638094336, "grad_norm": 0.376953125, "learning_rate": 4.128632842978808e-06, "loss": 1.826, "step": 23749 }, { "epoch": 0.7662665276632299, "grad_norm": 0.369140625, "learning_rate": 4.127548594269167e-06, "loss": 1.8331, "step": 23750 }, { "epoch": 0.7662987915170263, "grad_norm": 0.373046875, "learning_rate": 4.126464465235679e-06, "loss": 1.8499, "step": 23751 }, { "epoch": 0.7663310553708226, "grad_norm": 0.384765625, "learning_rate": 4.1253804558902675e-06, "loss": 1.8481, "step": 23752 }, { "epoch": 0.766363319224619, "grad_norm": 0.375, "learning_rate": 4.124296566244862e-06, "loss": 1.8057, "step": 23753 }, { "epoch": 0.7663955830784153, "grad_norm": 0.37890625, "learning_rate": 4.123212796311404e-06, "loss": 1.8438, "step": 23754 }, { "epoch": 0.7664278469322117, "grad_norm": 0.359375, "learning_rate": 4.1221291461018154e-06, "loss": 1.8253, "step": 23755 }, { "epoch": 0.766460110786008, "grad_norm": 0.373046875, "learning_rate": 4.121045615628021e-06, "loss": 1.8185, "step": 23756 }, { "epoch": 0.7664923746398044, "grad_norm": 0.412109375, "learning_rate": 4.119962204901956e-06, "loss": 1.8524, "step": 23757 }, { "epoch": 0.7665246384936006, "grad_norm": 0.404296875, "learning_rate": 4.1188789139355425e-06, "loss": 1.9039, "step": 23758 }, { "epoch": 0.766556902347397, "grad_norm": 0.388671875, "learning_rate": 4.1177957427407e-06, "loss": 1.9087, "step": 23759 }, { "epoch": 0.7665891662011933, "grad_norm": 0.400390625, "learning_rate": 4.116712691329358e-06, "loss": 1.9066, "step": 23760 }, { "epoch": 0.7666214300549897, "grad_norm": 0.427734375, "learning_rate": 4.115629759713436e-06, "loss": 1.9109, "step": 23761 }, { "epoch": 0.766653693908786, "grad_norm": 0.38671875, "learning_rate": 4.114546947904848e-06, "loss": 1.9047, "step": 23762 }, { "epoch": 0.7666859577625824, "grad_norm": 0.390625, "learning_rate": 4.1134642559155225e-06, "loss": 1.9481, "step": 23763 }, { "epoch": 0.7667182216163787, "grad_norm": 0.4375, "learning_rate": 4.112381683757373e-06, "loss": 1.9094, "step": 23764 }, { "epoch": 0.7667504854701751, "grad_norm": 0.412109375, "learning_rate": 4.1112992314423115e-06, "loss": 1.9327, "step": 23765 }, { "epoch": 0.7667827493239714, "grad_norm": 0.375, "learning_rate": 4.11021689898226e-06, "loss": 1.9095, "step": 23766 }, { "epoch": 0.7668150131777678, "grad_norm": 0.400390625, "learning_rate": 4.109134686389129e-06, "loss": 1.904, "step": 23767 }, { "epoch": 0.7668472770315641, "grad_norm": 0.46484375, "learning_rate": 4.108052593674824e-06, "loss": 1.8993, "step": 23768 }, { "epoch": 0.7668795408853605, "grad_norm": 0.37890625, "learning_rate": 4.1069706208512685e-06, "loss": 1.9144, "step": 23769 }, { "epoch": 0.7669118047391569, "grad_norm": 0.380859375, "learning_rate": 4.105888767930366e-06, "loss": 1.8685, "step": 23770 }, { "epoch": 0.7669440685929532, "grad_norm": 0.380859375, "learning_rate": 4.10480703492402e-06, "loss": 1.9135, "step": 23771 }, { "epoch": 0.7669763324467496, "grad_norm": 0.37890625, "learning_rate": 4.103725421844145e-06, "loss": 1.8995, "step": 23772 }, { "epoch": 0.7670085963005459, "grad_norm": 0.419921875, "learning_rate": 4.102643928702646e-06, "loss": 1.9287, "step": 23773 }, { "epoch": 0.7670408601543423, "grad_norm": 0.41796875, "learning_rate": 4.101562555511419e-06, "loss": 1.9434, "step": 23774 }, { "epoch": 0.7670731240081385, "grad_norm": 0.396484375, "learning_rate": 4.100481302282377e-06, "loss": 1.9099, "step": 23775 }, { "epoch": 0.7671053878619349, "grad_norm": 0.419921875, "learning_rate": 4.099400169027418e-06, "loss": 1.8737, "step": 23776 }, { "epoch": 0.7671376517157312, "grad_norm": 0.453125, "learning_rate": 4.098319155758441e-06, "loss": 1.8654, "step": 23777 }, { "epoch": 0.7671699155695276, "grad_norm": 0.423828125, "learning_rate": 4.097238262487343e-06, "loss": 1.9144, "step": 23778 }, { "epoch": 0.7672021794233239, "grad_norm": 0.38671875, "learning_rate": 4.0961574892260275e-06, "loss": 1.8766, "step": 23779 }, { "epoch": 0.7672344432771203, "grad_norm": 0.3828125, "learning_rate": 4.095076835986388e-06, "loss": 1.9065, "step": 23780 }, { "epoch": 0.7672667071309166, "grad_norm": 0.416015625, "learning_rate": 4.093996302780317e-06, "loss": 1.9133, "step": 23781 }, { "epoch": 0.767298970984713, "grad_norm": 0.447265625, "learning_rate": 4.092915889619714e-06, "loss": 1.9191, "step": 23782 }, { "epoch": 0.7673312348385093, "grad_norm": 0.3984375, "learning_rate": 4.0918355965164685e-06, "loss": 1.9251, "step": 23783 }, { "epoch": 0.7673634986923057, "grad_norm": 0.380859375, "learning_rate": 4.090755423482465e-06, "loss": 1.9567, "step": 23784 }, { "epoch": 0.767395762546102, "grad_norm": 0.46484375, "learning_rate": 4.0896753705296076e-06, "loss": 1.9189, "step": 23785 }, { "epoch": 0.7674280263998984, "grad_norm": 0.474609375, "learning_rate": 4.088595437669775e-06, "loss": 1.8959, "step": 23786 }, { "epoch": 0.7674602902536947, "grad_norm": 0.51171875, "learning_rate": 4.08751562491485e-06, "loss": 1.9513, "step": 23787 }, { "epoch": 0.7674925541074911, "grad_norm": 0.431640625, "learning_rate": 4.08643593227673e-06, "loss": 1.9398, "step": 23788 }, { "epoch": 0.7675248179612874, "grad_norm": 0.46875, "learning_rate": 4.085356359767295e-06, "loss": 1.9042, "step": 23789 }, { "epoch": 0.7675570818150838, "grad_norm": 0.4375, "learning_rate": 4.084276907398421e-06, "loss": 1.9369, "step": 23790 }, { "epoch": 0.7675893456688802, "grad_norm": 0.380859375, "learning_rate": 4.083197575182e-06, "loss": 1.8859, "step": 23791 }, { "epoch": 0.7676216095226764, "grad_norm": 0.4296875, "learning_rate": 4.082118363129911e-06, "loss": 1.8651, "step": 23792 }, { "epoch": 0.7676538733764728, "grad_norm": 0.447265625, "learning_rate": 4.081039271254024e-06, "loss": 1.9326, "step": 23793 }, { "epoch": 0.7676861372302691, "grad_norm": 0.49609375, "learning_rate": 4.079960299566228e-06, "loss": 1.874, "step": 23794 }, { "epoch": 0.7677184010840655, "grad_norm": 0.462890625, "learning_rate": 4.078881448078396e-06, "loss": 1.9279, "step": 23795 }, { "epoch": 0.7677506649378618, "grad_norm": 0.421875, "learning_rate": 4.077802716802397e-06, "loss": 1.9013, "step": 23796 }, { "epoch": 0.7677829287916582, "grad_norm": 0.47265625, "learning_rate": 4.0767241057501145e-06, "loss": 1.8631, "step": 23797 }, { "epoch": 0.7678151926454545, "grad_norm": 0.43359375, "learning_rate": 4.075645614933417e-06, "loss": 1.9191, "step": 23798 }, { "epoch": 0.7678474564992509, "grad_norm": 0.40625, "learning_rate": 4.074567244364171e-06, "loss": 1.9162, "step": 23799 }, { "epoch": 0.7678797203530472, "grad_norm": 0.412109375, "learning_rate": 4.073488994054256e-06, "loss": 1.9101, "step": 23800 }, { "epoch": 0.7679119842068436, "grad_norm": 0.45703125, "learning_rate": 4.072410864015534e-06, "loss": 1.9203, "step": 23801 }, { "epoch": 0.7679442480606399, "grad_norm": 0.423828125, "learning_rate": 4.071332854259871e-06, "loss": 1.9284, "step": 23802 }, { "epoch": 0.7679765119144363, "grad_norm": 0.404296875, "learning_rate": 4.07025496479914e-06, "loss": 1.8963, "step": 23803 }, { "epoch": 0.7680087757682326, "grad_norm": 0.439453125, "learning_rate": 4.0691771956452e-06, "loss": 1.8853, "step": 23804 }, { "epoch": 0.768041039622029, "grad_norm": 0.3828125, "learning_rate": 4.068099546809912e-06, "loss": 1.8219, "step": 23805 }, { "epoch": 0.7680733034758253, "grad_norm": 0.392578125, "learning_rate": 4.067022018305146e-06, "loss": 1.8832, "step": 23806 }, { "epoch": 0.7681055673296217, "grad_norm": 0.4609375, "learning_rate": 4.065944610142759e-06, "loss": 1.8468, "step": 23807 }, { "epoch": 0.7681378311834179, "grad_norm": 0.447265625, "learning_rate": 4.06486732233461e-06, "loss": 1.8235, "step": 23808 }, { "epoch": 0.7681700950372143, "grad_norm": 0.388671875, "learning_rate": 4.0637901548925496e-06, "loss": 1.8141, "step": 23809 }, { "epoch": 0.7682023588910107, "grad_norm": 0.375, "learning_rate": 4.062713107828449e-06, "loss": 1.832, "step": 23810 }, { "epoch": 0.768234622744807, "grad_norm": 0.458984375, "learning_rate": 4.061636181154154e-06, "loss": 1.8849, "step": 23811 }, { "epoch": 0.7682668865986034, "grad_norm": 0.431640625, "learning_rate": 4.060559374881519e-06, "loss": 1.8316, "step": 23812 }, { "epoch": 0.7682991504523997, "grad_norm": 0.40625, "learning_rate": 4.0594826890224015e-06, "loss": 1.8187, "step": 23813 }, { "epoch": 0.7683314143061961, "grad_norm": 0.37890625, "learning_rate": 4.058406123588648e-06, "loss": 1.826, "step": 23814 }, { "epoch": 0.7683636781599924, "grad_norm": 0.408203125, "learning_rate": 4.057329678592109e-06, "loss": 1.8583, "step": 23815 }, { "epoch": 0.7683959420137888, "grad_norm": 0.45703125, "learning_rate": 4.0562533540446365e-06, "loss": 1.8896, "step": 23816 }, { "epoch": 0.7684282058675851, "grad_norm": 0.37890625, "learning_rate": 4.055177149958078e-06, "loss": 1.7986, "step": 23817 }, { "epoch": 0.7684604697213815, "grad_norm": 0.373046875, "learning_rate": 4.054101066344272e-06, "loss": 1.8709, "step": 23818 }, { "epoch": 0.7684927335751778, "grad_norm": 0.3828125, "learning_rate": 4.053025103215074e-06, "loss": 1.8585, "step": 23819 }, { "epoch": 0.7685249974289742, "grad_norm": 0.392578125, "learning_rate": 4.051949260582323e-06, "loss": 1.8329, "step": 23820 }, { "epoch": 0.7685572612827705, "grad_norm": 0.458984375, "learning_rate": 4.0508735384578535e-06, "loss": 1.7951, "step": 23821 }, { "epoch": 0.7685895251365669, "grad_norm": 0.4296875, "learning_rate": 4.04979793685352e-06, "loss": 1.7897, "step": 23822 }, { "epoch": 0.7686217889903632, "grad_norm": 0.384765625, "learning_rate": 4.048722455781155e-06, "loss": 1.8018, "step": 23823 }, { "epoch": 0.7686540528441596, "grad_norm": 0.380859375, "learning_rate": 4.047647095252591e-06, "loss": 1.8361, "step": 23824 }, { "epoch": 0.7686863166979558, "grad_norm": 0.373046875, "learning_rate": 4.046571855279677e-06, "loss": 1.8431, "step": 23825 }, { "epoch": 0.7687185805517522, "grad_norm": 0.427734375, "learning_rate": 4.0454967358742415e-06, "loss": 1.8343, "step": 23826 }, { "epoch": 0.7687508444055485, "grad_norm": 0.376953125, "learning_rate": 4.044421737048113e-06, "loss": 1.8291, "step": 23827 }, { "epoch": 0.7687831082593449, "grad_norm": 0.376953125, "learning_rate": 4.043346858813136e-06, "loss": 1.805, "step": 23828 }, { "epoch": 0.7688153721131412, "grad_norm": 0.380859375, "learning_rate": 4.042272101181136e-06, "loss": 1.8566, "step": 23829 }, { "epoch": 0.7688476359669376, "grad_norm": 0.4375, "learning_rate": 4.041197464163941e-06, "loss": 1.7952, "step": 23830 }, { "epoch": 0.768879899820734, "grad_norm": 0.42578125, "learning_rate": 4.040122947773385e-06, "loss": 1.8503, "step": 23831 }, { "epoch": 0.7689121636745303, "grad_norm": 0.37109375, "learning_rate": 4.039048552021295e-06, "loss": 1.7946, "step": 23832 }, { "epoch": 0.7689444275283267, "grad_norm": 0.41796875, "learning_rate": 4.037974276919489e-06, "loss": 1.849, "step": 23833 }, { "epoch": 0.768976691382123, "grad_norm": 0.3984375, "learning_rate": 4.036900122479802e-06, "loss": 1.8665, "step": 23834 }, { "epoch": 0.7690089552359194, "grad_norm": 0.396484375, "learning_rate": 4.0358260887140566e-06, "loss": 1.859, "step": 23835 }, { "epoch": 0.7690412190897157, "grad_norm": 0.44921875, "learning_rate": 4.034752175634065e-06, "loss": 1.8922, "step": 23836 }, { "epoch": 0.7690734829435121, "grad_norm": 0.36328125, "learning_rate": 4.03367838325166e-06, "loss": 1.8643, "step": 23837 }, { "epoch": 0.7691057467973084, "grad_norm": 0.3828125, "learning_rate": 4.0326047115786565e-06, "loss": 1.8459, "step": 23838 }, { "epoch": 0.7691380106511048, "grad_norm": 0.37890625, "learning_rate": 4.031531160626871e-06, "loss": 1.8394, "step": 23839 }, { "epoch": 0.769170274504901, "grad_norm": 0.390625, "learning_rate": 4.0304577304081195e-06, "loss": 1.8251, "step": 23840 }, { "epoch": 0.7692025383586975, "grad_norm": 0.375, "learning_rate": 4.029384420934223e-06, "loss": 1.8145, "step": 23841 }, { "epoch": 0.7692348022124937, "grad_norm": 0.373046875, "learning_rate": 4.028311232216991e-06, "loss": 1.847, "step": 23842 }, { "epoch": 0.7692670660662901, "grad_norm": 0.376953125, "learning_rate": 4.027238164268237e-06, "loss": 1.872, "step": 23843 }, { "epoch": 0.7692993299200864, "grad_norm": 0.41015625, "learning_rate": 4.0261652170997746e-06, "loss": 1.862, "step": 23844 }, { "epoch": 0.7693315937738828, "grad_norm": 0.41015625, "learning_rate": 4.025092390723415e-06, "loss": 1.8596, "step": 23845 }, { "epoch": 0.7693638576276791, "grad_norm": 0.400390625, "learning_rate": 4.024019685150959e-06, "loss": 1.8667, "step": 23846 }, { "epoch": 0.7693961214814755, "grad_norm": 0.373046875, "learning_rate": 4.0229471003942255e-06, "loss": 1.837, "step": 23847 }, { "epoch": 0.7694283853352718, "grad_norm": 0.380859375, "learning_rate": 4.0218746364650155e-06, "loss": 1.8724, "step": 23848 }, { "epoch": 0.7694606491890682, "grad_norm": 0.388671875, "learning_rate": 4.020802293375129e-06, "loss": 1.843, "step": 23849 }, { "epoch": 0.7694929130428646, "grad_norm": 0.396484375, "learning_rate": 4.019730071136379e-06, "loss": 1.8398, "step": 23850 }, { "epoch": 0.7695251768966609, "grad_norm": 0.388671875, "learning_rate": 4.018657969760563e-06, "loss": 1.8563, "step": 23851 }, { "epoch": 0.7695574407504573, "grad_norm": 0.39453125, "learning_rate": 4.0175859892594786e-06, "loss": 1.8059, "step": 23852 }, { "epoch": 0.7695897046042536, "grad_norm": 0.375, "learning_rate": 4.016514129644931e-06, "loss": 1.8538, "step": 23853 }, { "epoch": 0.76962196845805, "grad_norm": 0.375, "learning_rate": 4.0154423909287195e-06, "loss": 1.8337, "step": 23854 }, { "epoch": 0.7696542323118463, "grad_norm": 0.384765625, "learning_rate": 4.014370773122632e-06, "loss": 1.8312, "step": 23855 }, { "epoch": 0.7696864961656427, "grad_norm": 0.37109375, "learning_rate": 4.013299276238474e-06, "loss": 1.8411, "step": 23856 }, { "epoch": 0.769718760019439, "grad_norm": 0.361328125, "learning_rate": 4.0122279002880355e-06, "loss": 1.8829, "step": 23857 }, { "epoch": 0.7697510238732354, "grad_norm": 0.41796875, "learning_rate": 4.011156645283106e-06, "loss": 1.7993, "step": 23858 }, { "epoch": 0.7697832877270316, "grad_norm": 0.376953125, "learning_rate": 4.010085511235484e-06, "loss": 1.8179, "step": 23859 }, { "epoch": 0.769815551580828, "grad_norm": 0.380859375, "learning_rate": 4.0090144981569585e-06, "loss": 1.8644, "step": 23860 }, { "epoch": 0.7698478154346243, "grad_norm": 0.373046875, "learning_rate": 4.007943606059311e-06, "loss": 1.8599, "step": 23861 }, { "epoch": 0.7698800792884207, "grad_norm": 0.37109375, "learning_rate": 4.0068728349543385e-06, "loss": 1.9292, "step": 23862 }, { "epoch": 0.769912343142217, "grad_norm": 0.369140625, "learning_rate": 4.0058021848538254e-06, "loss": 1.888, "step": 23863 }, { "epoch": 0.7699446069960134, "grad_norm": 0.3984375, "learning_rate": 4.004731655769547e-06, "loss": 1.83, "step": 23864 }, { "epoch": 0.7699768708498097, "grad_norm": 0.390625, "learning_rate": 4.0036612477133026e-06, "loss": 1.8352, "step": 23865 }, { "epoch": 0.7700091347036061, "grad_norm": 0.384765625, "learning_rate": 4.002590960696864e-06, "loss": 1.8284, "step": 23866 }, { "epoch": 0.7700413985574024, "grad_norm": 0.369140625, "learning_rate": 4.0015207947320105e-06, "loss": 1.8512, "step": 23867 }, { "epoch": 0.7700736624111988, "grad_norm": 0.3828125, "learning_rate": 4.000450749830531e-06, "loss": 1.8983, "step": 23868 }, { "epoch": 0.7701059262649951, "grad_norm": 0.373046875, "learning_rate": 3.999380826004198e-06, "loss": 1.859, "step": 23869 }, { "epoch": 0.7701381901187915, "grad_norm": 0.37109375, "learning_rate": 3.998311023264789e-06, "loss": 1.8685, "step": 23870 }, { "epoch": 0.7701704539725879, "grad_norm": 0.35546875, "learning_rate": 3.997241341624075e-06, "loss": 1.8473, "step": 23871 }, { "epoch": 0.7702027178263842, "grad_norm": 0.478515625, "learning_rate": 3.996171781093841e-06, "loss": 1.8015, "step": 23872 }, { "epoch": 0.7702349816801806, "grad_norm": 0.375, "learning_rate": 3.995102341685852e-06, "loss": 1.86, "step": 23873 }, { "epoch": 0.7702672455339769, "grad_norm": 0.359375, "learning_rate": 3.9940330234118785e-06, "loss": 1.8542, "step": 23874 }, { "epoch": 0.7702995093877733, "grad_norm": 0.375, "learning_rate": 3.992963826283698e-06, "loss": 1.877, "step": 23875 }, { "epoch": 0.7703317732415695, "grad_norm": 0.369140625, "learning_rate": 3.991894750313074e-06, "loss": 1.8585, "step": 23876 }, { "epoch": 0.7703640370953659, "grad_norm": 0.37890625, "learning_rate": 3.990825795511772e-06, "loss": 1.868, "step": 23877 }, { "epoch": 0.7703963009491622, "grad_norm": 0.37109375, "learning_rate": 3.989756961891567e-06, "loss": 1.8861, "step": 23878 }, { "epoch": 0.7704285648029586, "grad_norm": 0.373046875, "learning_rate": 3.988688249464218e-06, "loss": 1.8431, "step": 23879 }, { "epoch": 0.7704608286567549, "grad_norm": 0.361328125, "learning_rate": 3.9876196582414845e-06, "loss": 1.8438, "step": 23880 }, { "epoch": 0.7704930925105513, "grad_norm": 0.373046875, "learning_rate": 3.986551188235138e-06, "loss": 1.8827, "step": 23881 }, { "epoch": 0.7705253563643476, "grad_norm": 0.3984375, "learning_rate": 3.985482839456934e-06, "loss": 1.9045, "step": 23882 }, { "epoch": 0.770557620218144, "grad_norm": 0.375, "learning_rate": 3.98441461191863e-06, "loss": 1.8179, "step": 23883 }, { "epoch": 0.7705898840719403, "grad_norm": 0.431640625, "learning_rate": 3.983346505631992e-06, "loss": 1.8437, "step": 23884 }, { "epoch": 0.7706221479257367, "grad_norm": 0.375, "learning_rate": 3.982278520608773e-06, "loss": 1.837, "step": 23885 }, { "epoch": 0.770654411779533, "grad_norm": 0.400390625, "learning_rate": 3.981210656860722e-06, "loss": 1.8533, "step": 23886 }, { "epoch": 0.7706866756333294, "grad_norm": 0.369140625, "learning_rate": 3.980142914399603e-06, "loss": 1.8291, "step": 23887 }, { "epoch": 0.7707189394871257, "grad_norm": 0.37109375, "learning_rate": 3.9790752932371665e-06, "loss": 1.8527, "step": 23888 }, { "epoch": 0.7707512033409221, "grad_norm": 0.3671875, "learning_rate": 3.978007793385159e-06, "loss": 1.8181, "step": 23889 }, { "epoch": 0.7707834671947184, "grad_norm": 0.373046875, "learning_rate": 3.976940414855338e-06, "loss": 1.8274, "step": 23890 }, { "epoch": 0.7708157310485148, "grad_norm": 0.380859375, "learning_rate": 3.975873157659449e-06, "loss": 1.8302, "step": 23891 }, { "epoch": 0.7708479949023112, "grad_norm": 0.365234375, "learning_rate": 3.974806021809235e-06, "loss": 1.823, "step": 23892 }, { "epoch": 0.7708802587561074, "grad_norm": 0.361328125, "learning_rate": 3.973739007316452e-06, "loss": 1.8435, "step": 23893 }, { "epoch": 0.7709125226099038, "grad_norm": 0.3671875, "learning_rate": 3.97267211419284e-06, "loss": 1.8262, "step": 23894 }, { "epoch": 0.7709447864637001, "grad_norm": 0.40625, "learning_rate": 3.971605342450137e-06, "loss": 1.8639, "step": 23895 }, { "epoch": 0.7709770503174965, "grad_norm": 0.37109375, "learning_rate": 3.970538692100094e-06, "loss": 1.8489, "step": 23896 }, { "epoch": 0.7710093141712928, "grad_norm": 0.3828125, "learning_rate": 3.969472163154452e-06, "loss": 1.7886, "step": 23897 }, { "epoch": 0.7710415780250892, "grad_norm": 0.376953125, "learning_rate": 3.968405755624941e-06, "loss": 1.8112, "step": 23898 }, { "epoch": 0.7710738418788855, "grad_norm": 0.375, "learning_rate": 3.96733946952331e-06, "loss": 1.8475, "step": 23899 }, { "epoch": 0.7711061057326819, "grad_norm": 0.37109375, "learning_rate": 3.96627330486129e-06, "loss": 1.8524, "step": 23900 }, { "epoch": 0.7711383695864782, "grad_norm": 0.375, "learning_rate": 3.9652072616506184e-06, "loss": 1.8414, "step": 23901 }, { "epoch": 0.7711706334402746, "grad_norm": 0.369140625, "learning_rate": 3.964141339903026e-06, "loss": 1.8819, "step": 23902 }, { "epoch": 0.7712028972940709, "grad_norm": 0.408203125, "learning_rate": 3.963075539630251e-06, "loss": 1.8154, "step": 23903 }, { "epoch": 0.7712351611478673, "grad_norm": 0.3671875, "learning_rate": 3.962009860844024e-06, "loss": 1.8427, "step": 23904 }, { "epoch": 0.7712674250016636, "grad_norm": 0.375, "learning_rate": 3.96094430355607e-06, "loss": 1.846, "step": 23905 }, { "epoch": 0.77129968885546, "grad_norm": 0.365234375, "learning_rate": 3.959878867778124e-06, "loss": 1.8117, "step": 23906 }, { "epoch": 0.7713319527092563, "grad_norm": 0.3671875, "learning_rate": 3.958813553521914e-06, "loss": 1.8313, "step": 23907 }, { "epoch": 0.7713642165630527, "grad_norm": 0.396484375, "learning_rate": 3.957748360799157e-06, "loss": 1.8398, "step": 23908 }, { "epoch": 0.7713964804168489, "grad_norm": 0.37109375, "learning_rate": 3.95668328962159e-06, "loss": 1.7875, "step": 23909 }, { "epoch": 0.7714287442706453, "grad_norm": 0.388671875, "learning_rate": 3.955618340000929e-06, "loss": 1.8396, "step": 23910 }, { "epoch": 0.7714610081244417, "grad_norm": 0.37109375, "learning_rate": 3.954553511948894e-06, "loss": 1.889, "step": 23911 }, { "epoch": 0.771493271978238, "grad_norm": 0.373046875, "learning_rate": 3.953488805477217e-06, "loss": 1.8415, "step": 23912 }, { "epoch": 0.7715255358320344, "grad_norm": 0.375, "learning_rate": 3.952424220597608e-06, "loss": 1.854, "step": 23913 }, { "epoch": 0.7715577996858307, "grad_norm": 0.361328125, "learning_rate": 3.951359757321784e-06, "loss": 1.8404, "step": 23914 }, { "epoch": 0.7715900635396271, "grad_norm": 0.37109375, "learning_rate": 3.950295415661471e-06, "loss": 1.8111, "step": 23915 }, { "epoch": 0.7716223273934234, "grad_norm": 0.36328125, "learning_rate": 3.949231195628378e-06, "loss": 1.8697, "step": 23916 }, { "epoch": 0.7716545912472198, "grad_norm": 0.375, "learning_rate": 3.948167097234216e-06, "loss": 1.8393, "step": 23917 }, { "epoch": 0.7716868551010161, "grad_norm": 0.37109375, "learning_rate": 3.947103120490707e-06, "loss": 1.8172, "step": 23918 }, { "epoch": 0.7717191189548125, "grad_norm": 0.37890625, "learning_rate": 3.946039265409555e-06, "loss": 1.8707, "step": 23919 }, { "epoch": 0.7717513828086088, "grad_norm": 0.41015625, "learning_rate": 3.944975532002471e-06, "loss": 1.8466, "step": 23920 }, { "epoch": 0.7717836466624052, "grad_norm": 0.384765625, "learning_rate": 3.943911920281174e-06, "loss": 1.8415, "step": 23921 }, { "epoch": 0.7718159105162015, "grad_norm": 0.37890625, "learning_rate": 3.9428484302573535e-06, "loss": 1.8308, "step": 23922 }, { "epoch": 0.7718481743699979, "grad_norm": 0.37109375, "learning_rate": 3.941785061942725e-06, "loss": 1.7985, "step": 23923 }, { "epoch": 0.7718804382237942, "grad_norm": 0.39453125, "learning_rate": 3.940721815348997e-06, "loss": 1.8202, "step": 23924 }, { "epoch": 0.7719127020775906, "grad_norm": 0.3984375, "learning_rate": 3.939658690487871e-06, "loss": 1.7968, "step": 23925 }, { "epoch": 0.7719449659313868, "grad_norm": 0.384765625, "learning_rate": 3.938595687371043e-06, "loss": 1.8754, "step": 23926 }, { "epoch": 0.7719772297851832, "grad_norm": 0.39453125, "learning_rate": 3.937532806010221e-06, "loss": 1.8212, "step": 23927 }, { "epoch": 0.7720094936389795, "grad_norm": 0.4140625, "learning_rate": 3.936470046417104e-06, "loss": 1.8408, "step": 23928 }, { "epoch": 0.7720417574927759, "grad_norm": 0.359375, "learning_rate": 3.935407408603387e-06, "loss": 1.8393, "step": 23929 }, { "epoch": 0.7720740213465722, "grad_norm": 0.380859375, "learning_rate": 3.934344892580761e-06, "loss": 1.8391, "step": 23930 }, { "epoch": 0.7721062852003686, "grad_norm": 0.38671875, "learning_rate": 3.933282498360936e-06, "loss": 1.8364, "step": 23931 }, { "epoch": 0.772138549054165, "grad_norm": 0.419921875, "learning_rate": 3.932220225955595e-06, "loss": 1.8005, "step": 23932 }, { "epoch": 0.7721708129079613, "grad_norm": 0.404296875, "learning_rate": 3.93115807537643e-06, "loss": 1.8361, "step": 23933 }, { "epoch": 0.7722030767617577, "grad_norm": 0.37890625, "learning_rate": 3.930096046635142e-06, "loss": 1.8432, "step": 23934 }, { "epoch": 0.772235340615554, "grad_norm": 0.38671875, "learning_rate": 3.929034139743414e-06, "loss": 1.8283, "step": 23935 }, { "epoch": 0.7722676044693504, "grad_norm": 0.49609375, "learning_rate": 3.927972354712932e-06, "loss": 1.7875, "step": 23936 }, { "epoch": 0.7722998683231467, "grad_norm": 0.408203125, "learning_rate": 3.926910691555391e-06, "loss": 1.7768, "step": 23937 }, { "epoch": 0.7723321321769431, "grad_norm": 0.375, "learning_rate": 3.925849150282475e-06, "loss": 1.8378, "step": 23938 }, { "epoch": 0.7723643960307394, "grad_norm": 0.390625, "learning_rate": 3.9247877309058605e-06, "loss": 1.8621, "step": 23939 }, { "epoch": 0.7723966598845358, "grad_norm": 0.40625, "learning_rate": 3.923726433437243e-06, "loss": 1.8061, "step": 23940 }, { "epoch": 0.772428923738332, "grad_norm": 0.38671875, "learning_rate": 3.922665257888298e-06, "loss": 1.8791, "step": 23941 }, { "epoch": 0.7724611875921285, "grad_norm": 0.421875, "learning_rate": 3.9216042042707035e-06, "loss": 1.8369, "step": 23942 }, { "epoch": 0.7724934514459247, "grad_norm": 0.37890625, "learning_rate": 3.920543272596146e-06, "loss": 1.8537, "step": 23943 }, { "epoch": 0.7725257152997211, "grad_norm": 0.357421875, "learning_rate": 3.9194824628763e-06, "loss": 1.7911, "step": 23944 }, { "epoch": 0.7725579791535174, "grad_norm": 0.404296875, "learning_rate": 3.918421775122837e-06, "loss": 1.8604, "step": 23945 }, { "epoch": 0.7725902430073138, "grad_norm": 0.4453125, "learning_rate": 3.917361209347443e-06, "loss": 1.7838, "step": 23946 }, { "epoch": 0.7726225068611101, "grad_norm": 0.431640625, "learning_rate": 3.916300765561785e-06, "loss": 1.8094, "step": 23947 }, { "epoch": 0.7726547707149065, "grad_norm": 0.36328125, "learning_rate": 3.9152404437775335e-06, "loss": 1.8066, "step": 23948 }, { "epoch": 0.7726870345687028, "grad_norm": 0.400390625, "learning_rate": 3.914180244006366e-06, "loss": 1.8225, "step": 23949 }, { "epoch": 0.7727192984224992, "grad_norm": 0.40625, "learning_rate": 3.91312016625995e-06, "loss": 1.8146, "step": 23950 }, { "epoch": 0.7727515622762956, "grad_norm": 0.421875, "learning_rate": 3.91206021054995e-06, "loss": 1.823, "step": 23951 }, { "epoch": 0.7727838261300919, "grad_norm": 0.390625, "learning_rate": 3.911000376888043e-06, "loss": 1.8084, "step": 23952 }, { "epoch": 0.7728160899838883, "grad_norm": 0.37109375, "learning_rate": 3.909940665285882e-06, "loss": 1.8288, "step": 23953 }, { "epoch": 0.7728483538376846, "grad_norm": 0.416015625, "learning_rate": 3.9088810757551375e-06, "loss": 1.8177, "step": 23954 }, { "epoch": 0.772880617691481, "grad_norm": 0.40234375, "learning_rate": 3.907821608307483e-06, "loss": 1.8116, "step": 23955 }, { "epoch": 0.7729128815452773, "grad_norm": 0.4375, "learning_rate": 3.906762262954561e-06, "loss": 1.8244, "step": 23956 }, { "epoch": 0.7729451453990737, "grad_norm": 0.373046875, "learning_rate": 3.905703039708043e-06, "loss": 1.8533, "step": 23957 }, { "epoch": 0.77297740925287, "grad_norm": 0.39453125, "learning_rate": 3.90464393857959e-06, "loss": 1.8224, "step": 23958 }, { "epoch": 0.7730096731066664, "grad_norm": 0.41015625, "learning_rate": 3.9035849595808575e-06, "loss": 1.8044, "step": 23959 }, { "epoch": 0.7730419369604626, "grad_norm": 0.376953125, "learning_rate": 3.902526102723501e-06, "loss": 1.7926, "step": 23960 }, { "epoch": 0.773074200814259, "grad_norm": 0.375, "learning_rate": 3.901467368019171e-06, "loss": 1.8115, "step": 23961 }, { "epoch": 0.7731064646680553, "grad_norm": 0.3828125, "learning_rate": 3.900408755479531e-06, "loss": 1.8036, "step": 23962 }, { "epoch": 0.7731387285218517, "grad_norm": 0.38671875, "learning_rate": 3.899350265116228e-06, "loss": 1.8231, "step": 23963 }, { "epoch": 0.773170992375648, "grad_norm": 0.40234375, "learning_rate": 3.89829189694091e-06, "loss": 1.8094, "step": 23964 }, { "epoch": 0.7732032562294444, "grad_norm": 0.365234375, "learning_rate": 3.897233650965234e-06, "loss": 1.7842, "step": 23965 }, { "epoch": 0.7732355200832407, "grad_norm": 0.369140625, "learning_rate": 3.896175527200847e-06, "loss": 1.8154, "step": 23966 }, { "epoch": 0.7732677839370371, "grad_norm": 0.40625, "learning_rate": 3.8951175256593865e-06, "loss": 1.8228, "step": 23967 }, { "epoch": 0.7733000477908334, "grad_norm": 0.361328125, "learning_rate": 3.894059646352513e-06, "loss": 1.8863, "step": 23968 }, { "epoch": 0.7733323116446298, "grad_norm": 0.369140625, "learning_rate": 3.893001889291862e-06, "loss": 1.7704, "step": 23969 }, { "epoch": 0.7733645754984261, "grad_norm": 0.375, "learning_rate": 3.891944254489074e-06, "loss": 1.8147, "step": 23970 }, { "epoch": 0.7733968393522225, "grad_norm": 0.39453125, "learning_rate": 3.8908867419558e-06, "loss": 1.876, "step": 23971 }, { "epoch": 0.7734291032060189, "grad_norm": 0.3671875, "learning_rate": 3.8898293517036725e-06, "loss": 1.8159, "step": 23972 }, { "epoch": 0.7734613670598152, "grad_norm": 0.36328125, "learning_rate": 3.8887720837443315e-06, "loss": 1.8434, "step": 23973 }, { "epoch": 0.7734936309136116, "grad_norm": 0.369140625, "learning_rate": 3.8877149380894186e-06, "loss": 1.8224, "step": 23974 }, { "epoch": 0.7735258947674079, "grad_norm": 0.416015625, "learning_rate": 3.8866579147505694e-06, "loss": 1.8006, "step": 23975 }, { "epoch": 0.7735581586212043, "grad_norm": 0.37890625, "learning_rate": 3.885601013739411e-06, "loss": 1.8163, "step": 23976 }, { "epoch": 0.7735904224750005, "grad_norm": 0.36328125, "learning_rate": 3.884544235067589e-06, "loss": 1.7992, "step": 23977 }, { "epoch": 0.7736226863287969, "grad_norm": 0.37109375, "learning_rate": 3.88348757874673e-06, "loss": 1.8209, "step": 23978 }, { "epoch": 0.7736549501825932, "grad_norm": 0.37109375, "learning_rate": 3.88243104478846e-06, "loss": 1.8094, "step": 23979 }, { "epoch": 0.7736872140363896, "grad_norm": 0.390625, "learning_rate": 3.8813746332044225e-06, "loss": 1.7989, "step": 23980 }, { "epoch": 0.7737194778901859, "grad_norm": 0.369140625, "learning_rate": 3.880318344006227e-06, "loss": 1.8212, "step": 23981 }, { "epoch": 0.7737517417439823, "grad_norm": 0.392578125, "learning_rate": 3.8792621772055094e-06, "loss": 1.8016, "step": 23982 }, { "epoch": 0.7737840055977786, "grad_norm": 0.37109375, "learning_rate": 3.878206132813906e-06, "loss": 1.8018, "step": 23983 }, { "epoch": 0.773816269451575, "grad_norm": 0.408203125, "learning_rate": 3.877150210843022e-06, "loss": 1.8235, "step": 23984 }, { "epoch": 0.7738485333053713, "grad_norm": 0.361328125, "learning_rate": 3.876094411304489e-06, "loss": 1.7807, "step": 23985 }, { "epoch": 0.7738807971591677, "grad_norm": 0.376953125, "learning_rate": 3.875038734209936e-06, "loss": 1.7642, "step": 23986 }, { "epoch": 0.773913061012964, "grad_norm": 0.369140625, "learning_rate": 3.873983179570967e-06, "loss": 1.8251, "step": 23987 }, { "epoch": 0.7739453248667604, "grad_norm": 0.384765625, "learning_rate": 3.872927747399209e-06, "loss": 1.8022, "step": 23988 }, { "epoch": 0.7739775887205567, "grad_norm": 0.3671875, "learning_rate": 3.871872437706285e-06, "loss": 1.7987, "step": 23989 }, { "epoch": 0.7740098525743531, "grad_norm": 0.384765625, "learning_rate": 3.8708172505038075e-06, "loss": 1.8116, "step": 23990 }, { "epoch": 0.7740421164281494, "grad_norm": 0.400390625, "learning_rate": 3.869762185803388e-06, "loss": 1.7521, "step": 23991 }, { "epoch": 0.7740743802819458, "grad_norm": 0.396484375, "learning_rate": 3.868707243616637e-06, "loss": 1.7984, "step": 23992 }, { "epoch": 0.7741066441357422, "grad_norm": 0.373046875, "learning_rate": 3.867652423955176e-06, "loss": 1.7681, "step": 23993 }, { "epoch": 0.7741389079895384, "grad_norm": 0.361328125, "learning_rate": 3.86659772683061e-06, "loss": 1.8094, "step": 23994 }, { "epoch": 0.7741711718433348, "grad_norm": 0.36328125, "learning_rate": 3.865543152254546e-06, "loss": 1.8249, "step": 23995 }, { "epoch": 0.7742034356971311, "grad_norm": 0.3828125, "learning_rate": 3.864488700238599e-06, "loss": 1.8463, "step": 23996 }, { "epoch": 0.7742356995509275, "grad_norm": 0.373046875, "learning_rate": 3.863434370794371e-06, "loss": 1.7946, "step": 23997 }, { "epoch": 0.7742679634047238, "grad_norm": 0.37109375, "learning_rate": 3.862380163933464e-06, "loss": 1.795, "step": 23998 }, { "epoch": 0.7743002272585202, "grad_norm": 0.37890625, "learning_rate": 3.8613260796674925e-06, "loss": 1.8162, "step": 23999 }, { "epoch": 0.7743324911123165, "grad_norm": 0.37890625, "learning_rate": 3.860272118008049e-06, "loss": 1.8465, "step": 24000 }, { "epoch": 0.7743647549661129, "grad_norm": 0.380859375, "learning_rate": 3.859218278966738e-06, "loss": 1.8078, "step": 24001 }, { "epoch": 0.7743970188199092, "grad_norm": 0.41796875, "learning_rate": 3.858164562555163e-06, "loss": 1.84, "step": 24002 }, { "epoch": 0.7744292826737056, "grad_norm": 0.369140625, "learning_rate": 3.857110968784917e-06, "loss": 1.8159, "step": 24003 }, { "epoch": 0.7744615465275019, "grad_norm": 0.3984375, "learning_rate": 3.856057497667597e-06, "loss": 1.7835, "step": 24004 }, { "epoch": 0.7744938103812983, "grad_norm": 0.392578125, "learning_rate": 3.855004149214807e-06, "loss": 1.808, "step": 24005 }, { "epoch": 0.7745260742350946, "grad_norm": 0.38671875, "learning_rate": 3.853950923438134e-06, "loss": 1.8134, "step": 24006 }, { "epoch": 0.774558338088891, "grad_norm": 0.390625, "learning_rate": 3.852897820349168e-06, "loss": 1.8066, "step": 24007 }, { "epoch": 0.7745906019426873, "grad_norm": 0.365234375, "learning_rate": 3.851844839959513e-06, "loss": 1.8248, "step": 24008 }, { "epoch": 0.7746228657964837, "grad_norm": 0.384765625, "learning_rate": 3.850791982280751e-06, "loss": 1.7912, "step": 24009 }, { "epoch": 0.7746551296502799, "grad_norm": 0.40234375, "learning_rate": 3.849739247324467e-06, "loss": 1.754, "step": 24010 }, { "epoch": 0.7746873935040763, "grad_norm": 0.3671875, "learning_rate": 3.848686635102266e-06, "loss": 1.8134, "step": 24011 }, { "epoch": 0.7747196573578727, "grad_norm": 0.39453125, "learning_rate": 3.84763414562571e-06, "loss": 1.8362, "step": 24012 }, { "epoch": 0.774751921211669, "grad_norm": 0.392578125, "learning_rate": 3.846581778906399e-06, "loss": 1.8393, "step": 24013 }, { "epoch": 0.7747841850654654, "grad_norm": 0.37890625, "learning_rate": 3.845529534955922e-06, "loss": 1.7825, "step": 24014 }, { "epoch": 0.7748164489192617, "grad_norm": 0.376953125, "learning_rate": 3.844477413785845e-06, "loss": 1.7922, "step": 24015 }, { "epoch": 0.7748487127730581, "grad_norm": 0.373046875, "learning_rate": 3.843425415407759e-06, "loss": 1.7842, "step": 24016 }, { "epoch": 0.7748809766268544, "grad_norm": 0.373046875, "learning_rate": 3.842373539833251e-06, "loss": 1.7923, "step": 24017 }, { "epoch": 0.7749132404806508, "grad_norm": 0.38671875, "learning_rate": 3.84132178707388e-06, "loss": 1.7793, "step": 24018 }, { "epoch": 0.7749455043344471, "grad_norm": 0.400390625, "learning_rate": 3.840270157141236e-06, "loss": 1.7759, "step": 24019 }, { "epoch": 0.7749777681882435, "grad_norm": 0.380859375, "learning_rate": 3.8392186500469e-06, "loss": 1.8428, "step": 24020 }, { "epoch": 0.7750100320420398, "grad_norm": 0.380859375, "learning_rate": 3.838167265802429e-06, "loss": 1.8193, "step": 24021 }, { "epoch": 0.7750422958958362, "grad_norm": 0.384765625, "learning_rate": 3.837116004419409e-06, "loss": 1.8502, "step": 24022 }, { "epoch": 0.7750745597496325, "grad_norm": 0.515625, "learning_rate": 3.8360648659094046e-06, "loss": 1.7498, "step": 24023 }, { "epoch": 0.7751068236034289, "grad_norm": 0.384765625, "learning_rate": 3.835013850283994e-06, "loss": 1.8432, "step": 24024 }, { "epoch": 0.7751390874572252, "grad_norm": 0.3828125, "learning_rate": 3.833962957554742e-06, "loss": 1.7985, "step": 24025 }, { "epoch": 0.7751713513110216, "grad_norm": 0.39453125, "learning_rate": 3.832912187733208e-06, "loss": 1.8309, "step": 24026 }, { "epoch": 0.7752036151648178, "grad_norm": 0.37890625, "learning_rate": 3.831861540830971e-06, "loss": 1.8497, "step": 24027 }, { "epoch": 0.7752358790186142, "grad_norm": 0.373046875, "learning_rate": 3.830811016859591e-06, "loss": 1.8057, "step": 24028 }, { "epoch": 0.7752681428724105, "grad_norm": 0.373046875, "learning_rate": 3.829760615830626e-06, "loss": 1.8373, "step": 24029 }, { "epoch": 0.7753004067262069, "grad_norm": 0.375, "learning_rate": 3.828710337755646e-06, "loss": 1.8, "step": 24030 }, { "epoch": 0.7753326705800032, "grad_norm": 0.373046875, "learning_rate": 3.827660182646209e-06, "loss": 1.8378, "step": 24031 }, { "epoch": 0.7753649344337996, "grad_norm": 0.380859375, "learning_rate": 3.826610150513868e-06, "loss": 1.7843, "step": 24032 }, { "epoch": 0.775397198287596, "grad_norm": 0.384765625, "learning_rate": 3.825560241370194e-06, "loss": 1.7799, "step": 24033 }, { "epoch": 0.7754294621413923, "grad_norm": 0.37109375, "learning_rate": 3.8245104552267345e-06, "loss": 1.8341, "step": 24034 }, { "epoch": 0.7754617259951887, "grad_norm": 0.380859375, "learning_rate": 3.823460792095043e-06, "loss": 1.7908, "step": 24035 }, { "epoch": 0.775493989848985, "grad_norm": 0.37109375, "learning_rate": 3.82241125198668e-06, "loss": 1.8096, "step": 24036 }, { "epoch": 0.7755262537027814, "grad_norm": 0.38671875, "learning_rate": 3.821361834913197e-06, "loss": 1.7803, "step": 24037 }, { "epoch": 0.7755585175565777, "grad_norm": 0.380859375, "learning_rate": 3.820312540886137e-06, "loss": 1.8174, "step": 24038 }, { "epoch": 0.7755907814103741, "grad_norm": 0.384765625, "learning_rate": 3.819263369917063e-06, "loss": 1.8254, "step": 24039 }, { "epoch": 0.7756230452641704, "grad_norm": 0.380859375, "learning_rate": 3.818214322017516e-06, "loss": 1.7828, "step": 24040 }, { "epoch": 0.7756553091179668, "grad_norm": 0.365234375, "learning_rate": 3.817165397199038e-06, "loss": 1.7945, "step": 24041 }, { "epoch": 0.775687572971763, "grad_norm": 0.38671875, "learning_rate": 3.816116595473191e-06, "loss": 1.7985, "step": 24042 }, { "epoch": 0.7757198368255595, "grad_norm": 0.37890625, "learning_rate": 3.8150679168515e-06, "loss": 1.8691, "step": 24043 }, { "epoch": 0.7757521006793557, "grad_norm": 0.396484375, "learning_rate": 3.8140193613455184e-06, "loss": 1.8042, "step": 24044 }, { "epoch": 0.7757843645331521, "grad_norm": 0.384765625, "learning_rate": 3.8129709289667943e-06, "loss": 1.779, "step": 24045 }, { "epoch": 0.7758166283869484, "grad_norm": 0.3984375, "learning_rate": 3.8119226197268524e-06, "loss": 1.7399, "step": 24046 }, { "epoch": 0.7758488922407448, "grad_norm": 0.400390625, "learning_rate": 3.810874433637241e-06, "loss": 1.7973, "step": 24047 }, { "epoch": 0.7758811560945411, "grad_norm": 0.39453125, "learning_rate": 3.8098263707095033e-06, "loss": 1.8433, "step": 24048 }, { "epoch": 0.7759134199483375, "grad_norm": 0.376953125, "learning_rate": 3.808778430955162e-06, "loss": 1.8351, "step": 24049 }, { "epoch": 0.7759456838021338, "grad_norm": 0.39453125, "learning_rate": 3.807730614385758e-06, "loss": 1.8436, "step": 24050 }, { "epoch": 0.7759779476559302, "grad_norm": 0.427734375, "learning_rate": 3.8066829210128373e-06, "loss": 1.8615, "step": 24051 }, { "epoch": 0.7760102115097265, "grad_norm": 0.376953125, "learning_rate": 3.80563535084791e-06, "loss": 1.8281, "step": 24052 }, { "epoch": 0.7760424753635229, "grad_norm": 0.388671875, "learning_rate": 3.8045879039025227e-06, "loss": 1.8887, "step": 24053 }, { "epoch": 0.7760747392173193, "grad_norm": 0.38671875, "learning_rate": 3.8035405801882005e-06, "loss": 1.8251, "step": 24054 }, { "epoch": 0.7761070030711156, "grad_norm": 0.3984375, "learning_rate": 3.8024933797164665e-06, "loss": 1.8528, "step": 24055 }, { "epoch": 0.776139266924912, "grad_norm": 0.408203125, "learning_rate": 3.8014463024988573e-06, "loss": 1.8697, "step": 24056 }, { "epoch": 0.7761715307787083, "grad_norm": 0.373046875, "learning_rate": 3.800399348546889e-06, "loss": 1.878, "step": 24057 }, { "epoch": 0.7762037946325047, "grad_norm": 0.3828125, "learning_rate": 3.7993525178720946e-06, "loss": 1.8445, "step": 24058 }, { "epoch": 0.776236058486301, "grad_norm": 0.37890625, "learning_rate": 3.798305810485991e-06, "loss": 1.8126, "step": 24059 }, { "epoch": 0.7762683223400973, "grad_norm": 0.38671875, "learning_rate": 3.7972592264000987e-06, "loss": 1.9127, "step": 24060 }, { "epoch": 0.7763005861938936, "grad_norm": 0.419921875, "learning_rate": 3.7962127656259427e-06, "loss": 1.8544, "step": 24061 }, { "epoch": 0.77633285004769, "grad_norm": 0.375, "learning_rate": 3.7951664281750397e-06, "loss": 1.9174, "step": 24062 }, { "epoch": 0.7763651139014863, "grad_norm": 0.380859375, "learning_rate": 3.794120214058901e-06, "loss": 1.8952, "step": 24063 }, { "epoch": 0.7763973777552827, "grad_norm": 0.380859375, "learning_rate": 3.793074123289054e-06, "loss": 1.8588, "step": 24064 }, { "epoch": 0.776429641609079, "grad_norm": 0.392578125, "learning_rate": 3.792028155877007e-06, "loss": 1.831, "step": 24065 }, { "epoch": 0.7764619054628754, "grad_norm": 0.384765625, "learning_rate": 3.7909823118342683e-06, "loss": 1.8505, "step": 24066 }, { "epoch": 0.7764941693166717, "grad_norm": 0.37890625, "learning_rate": 3.7899365911723587e-06, "loss": 1.865, "step": 24067 }, { "epoch": 0.7765264331704681, "grad_norm": 0.375, "learning_rate": 3.7888909939027873e-06, "loss": 1.8734, "step": 24068 }, { "epoch": 0.7765586970242644, "grad_norm": 0.384765625, "learning_rate": 3.7878455200370542e-06, "loss": 1.8634, "step": 24069 }, { "epoch": 0.7765909608780608, "grad_norm": 0.3671875, "learning_rate": 3.7868001695866796e-06, "loss": 1.8389, "step": 24070 }, { "epoch": 0.7766232247318571, "grad_norm": 0.412109375, "learning_rate": 3.7857549425631647e-06, "loss": 1.9075, "step": 24071 }, { "epoch": 0.7766554885856535, "grad_norm": 0.396484375, "learning_rate": 3.78470983897801e-06, "loss": 1.8574, "step": 24072 }, { "epoch": 0.7766877524394499, "grad_norm": 0.373046875, "learning_rate": 3.783664858842732e-06, "loss": 1.8745, "step": 24073 }, { "epoch": 0.7767200162932462, "grad_norm": 0.37890625, "learning_rate": 3.7826200021688153e-06, "loss": 1.8306, "step": 24074 }, { "epoch": 0.7767522801470426, "grad_norm": 0.443359375, "learning_rate": 3.7815752689677703e-06, "loss": 1.8911, "step": 24075 }, { "epoch": 0.7767845440008389, "grad_norm": 0.390625, "learning_rate": 3.7805306592511063e-06, "loss": 1.8651, "step": 24076 }, { "epoch": 0.7768168078546352, "grad_norm": 0.41015625, "learning_rate": 3.7794861730303027e-06, "loss": 1.8773, "step": 24077 }, { "epoch": 0.7768490717084315, "grad_norm": 0.36328125, "learning_rate": 3.7784418103168665e-06, "loss": 1.8443, "step": 24078 }, { "epoch": 0.7768813355622279, "grad_norm": 0.37890625, "learning_rate": 3.7773975711223008e-06, "loss": 1.843, "step": 24079 }, { "epoch": 0.7769135994160242, "grad_norm": 0.384765625, "learning_rate": 3.7763534554580824e-06, "loss": 1.8747, "step": 24080 }, { "epoch": 0.7769458632698206, "grad_norm": 0.392578125, "learning_rate": 3.7753094633357175e-06, "loss": 1.8856, "step": 24081 }, { "epoch": 0.7769781271236169, "grad_norm": 0.38671875, "learning_rate": 3.7742655947666956e-06, "loss": 1.8629, "step": 24082 }, { "epoch": 0.7770103909774133, "grad_norm": 0.359375, "learning_rate": 3.7732218497624983e-06, "loss": 1.8575, "step": 24083 }, { "epoch": 0.7770426548312096, "grad_norm": 0.435546875, "learning_rate": 3.7721782283346253e-06, "loss": 1.8647, "step": 24084 }, { "epoch": 0.777074918685006, "grad_norm": 0.392578125, "learning_rate": 3.7711347304945603e-06, "loss": 1.8574, "step": 24085 }, { "epoch": 0.7771071825388023, "grad_norm": 0.36328125, "learning_rate": 3.770091356253783e-06, "loss": 1.8675, "step": 24086 }, { "epoch": 0.7771394463925987, "grad_norm": 0.373046875, "learning_rate": 3.769048105623789e-06, "loss": 1.8744, "step": 24087 }, { "epoch": 0.777171710246395, "grad_norm": 0.37109375, "learning_rate": 3.7680049786160524e-06, "loss": 1.8334, "step": 24088 }, { "epoch": 0.7772039741001914, "grad_norm": 0.37890625, "learning_rate": 3.766961975242064e-06, "loss": 1.8251, "step": 24089 }, { "epoch": 0.7772362379539877, "grad_norm": 0.380859375, "learning_rate": 3.7659190955132983e-06, "loss": 1.8487, "step": 24090 }, { "epoch": 0.7772685018077841, "grad_norm": 0.384765625, "learning_rate": 3.764876339441232e-06, "loss": 1.8592, "step": 24091 }, { "epoch": 0.7773007656615804, "grad_norm": 0.412109375, "learning_rate": 3.763833707037352e-06, "loss": 1.8275, "step": 24092 }, { "epoch": 0.7773330295153767, "grad_norm": 0.384765625, "learning_rate": 3.762791198313128e-06, "loss": 1.8437, "step": 24093 }, { "epoch": 0.7773652933691731, "grad_norm": 0.388671875, "learning_rate": 3.761748813280035e-06, "loss": 1.8937, "step": 24094 }, { "epoch": 0.7773975572229694, "grad_norm": 0.392578125, "learning_rate": 3.7607065519495515e-06, "loss": 1.8507, "step": 24095 }, { "epoch": 0.7774298210767658, "grad_norm": 0.4140625, "learning_rate": 3.759664414333147e-06, "loss": 1.8466, "step": 24096 }, { "epoch": 0.7774620849305621, "grad_norm": 0.388671875, "learning_rate": 3.758622400442289e-06, "loss": 1.8731, "step": 24097 }, { "epoch": 0.7774943487843585, "grad_norm": 0.375, "learning_rate": 3.7575805102884554e-06, "loss": 1.8782, "step": 24098 }, { "epoch": 0.7775266126381548, "grad_norm": 0.3828125, "learning_rate": 3.756538743883111e-06, "loss": 1.8572, "step": 24099 }, { "epoch": 0.7775588764919512, "grad_norm": 0.375, "learning_rate": 3.7554971012377175e-06, "loss": 1.8937, "step": 24100 }, { "epoch": 0.7775911403457475, "grad_norm": 0.400390625, "learning_rate": 3.754455582363749e-06, "loss": 1.9034, "step": 24101 }, { "epoch": 0.7776234041995439, "grad_norm": 0.400390625, "learning_rate": 3.7534141872726657e-06, "loss": 1.8506, "step": 24102 }, { "epoch": 0.7776556680533402, "grad_norm": 0.357421875, "learning_rate": 3.752372915975927e-06, "loss": 1.8389, "step": 24103 }, { "epoch": 0.7776879319071366, "grad_norm": 0.38671875, "learning_rate": 3.7513317684850064e-06, "loss": 1.8869, "step": 24104 }, { "epoch": 0.7777201957609329, "grad_norm": 0.37890625, "learning_rate": 3.750290744811348e-06, "loss": 1.8797, "step": 24105 }, { "epoch": 0.7777524596147293, "grad_norm": 0.4296875, "learning_rate": 3.7492498449664176e-06, "loss": 1.8104, "step": 24106 }, { "epoch": 0.7777847234685256, "grad_norm": 0.369140625, "learning_rate": 3.7482090689616833e-06, "loss": 1.8806, "step": 24107 }, { "epoch": 0.777816987322322, "grad_norm": 0.37890625, "learning_rate": 3.747168416808581e-06, "loss": 1.8327, "step": 24108 }, { "epoch": 0.7778492511761183, "grad_norm": 0.36328125, "learning_rate": 3.746127888518579e-06, "loss": 1.8517, "step": 24109 }, { "epoch": 0.7778815150299146, "grad_norm": 0.4140625, "learning_rate": 3.745087484103134e-06, "loss": 1.8788, "step": 24110 }, { "epoch": 0.7779137788837109, "grad_norm": 0.369140625, "learning_rate": 3.744047203573684e-06, "loss": 1.864, "step": 24111 }, { "epoch": 0.7779460427375073, "grad_norm": 0.40234375, "learning_rate": 3.743007046941691e-06, "loss": 1.8288, "step": 24112 }, { "epoch": 0.7779783065913037, "grad_norm": 0.3828125, "learning_rate": 3.741967014218603e-06, "loss": 1.8691, "step": 24113 }, { "epoch": 0.7780105704451, "grad_norm": 0.375, "learning_rate": 3.740927105415859e-06, "loss": 1.8717, "step": 24114 }, { "epoch": 0.7780428342988964, "grad_norm": 0.419921875, "learning_rate": 3.7398873205449163e-06, "loss": 1.873, "step": 24115 }, { "epoch": 0.7780750981526927, "grad_norm": 0.373046875, "learning_rate": 3.738847659617218e-06, "loss": 1.829, "step": 24116 }, { "epoch": 0.7781073620064891, "grad_norm": 0.408203125, "learning_rate": 3.737808122644201e-06, "loss": 1.8894, "step": 24117 }, { "epoch": 0.7781396258602854, "grad_norm": 0.412109375, "learning_rate": 3.736768709637316e-06, "loss": 1.8937, "step": 24118 }, { "epoch": 0.7781718897140818, "grad_norm": 0.38671875, "learning_rate": 3.735729420608001e-06, "loss": 1.9149, "step": 24119 }, { "epoch": 0.7782041535678781, "grad_norm": 0.431640625, "learning_rate": 3.734690255567692e-06, "loss": 1.8592, "step": 24120 }, { "epoch": 0.7782364174216745, "grad_norm": 0.392578125, "learning_rate": 3.7336512145278347e-06, "loss": 1.8812, "step": 24121 }, { "epoch": 0.7782686812754708, "grad_norm": 0.43359375, "learning_rate": 3.732612297499858e-06, "loss": 1.8734, "step": 24122 }, { "epoch": 0.7783009451292672, "grad_norm": 0.3671875, "learning_rate": 3.731573504495206e-06, "loss": 1.8017, "step": 24123 }, { "epoch": 0.7783332089830635, "grad_norm": 0.3671875, "learning_rate": 3.73053483552531e-06, "loss": 1.8148, "step": 24124 }, { "epoch": 0.7783654728368599, "grad_norm": 0.369140625, "learning_rate": 3.7294962906015974e-06, "loss": 1.8731, "step": 24125 }, { "epoch": 0.7783977366906562, "grad_norm": 0.376953125, "learning_rate": 3.728457869735507e-06, "loss": 1.8573, "step": 24126 }, { "epoch": 0.7784300005444525, "grad_norm": 0.36328125, "learning_rate": 3.7274195729384684e-06, "loss": 1.8456, "step": 24127 }, { "epoch": 0.7784622643982488, "grad_norm": 0.431640625, "learning_rate": 3.7263814002219026e-06, "loss": 1.8358, "step": 24128 }, { "epoch": 0.7784945282520452, "grad_norm": 0.39453125, "learning_rate": 3.7253433515972467e-06, "loss": 1.8297, "step": 24129 }, { "epoch": 0.7785267921058415, "grad_norm": 0.369140625, "learning_rate": 3.7243054270759238e-06, "loss": 1.8838, "step": 24130 }, { "epoch": 0.7785590559596379, "grad_norm": 0.423828125, "learning_rate": 3.723267626669352e-06, "loss": 1.8626, "step": 24131 }, { "epoch": 0.7785913198134342, "grad_norm": 0.41796875, "learning_rate": 3.722229950388965e-06, "loss": 1.8801, "step": 24132 }, { "epoch": 0.7786235836672306, "grad_norm": 0.369140625, "learning_rate": 3.7211923982461814e-06, "loss": 1.8132, "step": 24133 }, { "epoch": 0.778655847521027, "grad_norm": 0.369140625, "learning_rate": 3.7201549702524157e-06, "loss": 1.8357, "step": 24134 }, { "epoch": 0.7786881113748233, "grad_norm": 0.373046875, "learning_rate": 3.7191176664191e-06, "loss": 1.82, "step": 24135 }, { "epoch": 0.7787203752286197, "grad_norm": 0.376953125, "learning_rate": 3.7180804867576356e-06, "loss": 1.8267, "step": 24136 }, { "epoch": 0.778752639082416, "grad_norm": 0.400390625, "learning_rate": 3.7170434312794466e-06, "loss": 1.843, "step": 24137 }, { "epoch": 0.7787849029362124, "grad_norm": 0.375, "learning_rate": 3.7160064999959604e-06, "loss": 1.8419, "step": 24138 }, { "epoch": 0.7788171667900087, "grad_norm": 0.380859375, "learning_rate": 3.7149696929185676e-06, "loss": 1.7488, "step": 24139 }, { "epoch": 0.7788494306438051, "grad_norm": 0.41015625, "learning_rate": 3.713933010058695e-06, "loss": 1.7757, "step": 24140 }, { "epoch": 0.7788816944976014, "grad_norm": 0.390625, "learning_rate": 3.7128964514277593e-06, "loss": 1.8198, "step": 24141 }, { "epoch": 0.7789139583513978, "grad_norm": 0.416015625, "learning_rate": 3.711860017037153e-06, "loss": 1.8209, "step": 24142 }, { "epoch": 0.778946222205194, "grad_norm": 0.453125, "learning_rate": 3.710823706898299e-06, "loss": 1.7773, "step": 24143 }, { "epoch": 0.7789784860589904, "grad_norm": 0.37890625, "learning_rate": 3.7097875210225984e-06, "loss": 1.8701, "step": 24144 }, { "epoch": 0.7790107499127867, "grad_norm": 0.404296875, "learning_rate": 3.7087514594214516e-06, "loss": 1.8491, "step": 24145 }, { "epoch": 0.7790430137665831, "grad_norm": 0.375, "learning_rate": 3.7077155221062745e-06, "loss": 1.8425, "step": 24146 }, { "epoch": 0.7790752776203794, "grad_norm": 0.376953125, "learning_rate": 3.706679709088462e-06, "loss": 1.8229, "step": 24147 }, { "epoch": 0.7791075414741758, "grad_norm": 0.3828125, "learning_rate": 3.7056440203794154e-06, "loss": 1.8105, "step": 24148 }, { "epoch": 0.7791398053279721, "grad_norm": 0.380859375, "learning_rate": 3.70460845599054e-06, "loss": 1.7993, "step": 24149 }, { "epoch": 0.7791720691817685, "grad_norm": 0.38671875, "learning_rate": 3.7035730159332313e-06, "loss": 1.8212, "step": 24150 }, { "epoch": 0.7792043330355648, "grad_norm": 0.38671875, "learning_rate": 3.702537700218882e-06, "loss": 1.8148, "step": 24151 }, { "epoch": 0.7792365968893612, "grad_norm": 0.392578125, "learning_rate": 3.7015025088588976e-06, "loss": 1.8189, "step": 24152 }, { "epoch": 0.7792688607431575, "grad_norm": 0.3984375, "learning_rate": 3.7004674418646694e-06, "loss": 1.8135, "step": 24153 }, { "epoch": 0.7793011245969539, "grad_norm": 0.3984375, "learning_rate": 3.699432499247583e-06, "loss": 1.8266, "step": 24154 }, { "epoch": 0.7793333884507503, "grad_norm": 0.380859375, "learning_rate": 3.698397681019043e-06, "loss": 1.8179, "step": 24155 }, { "epoch": 0.7793656523045466, "grad_norm": 0.384765625, "learning_rate": 3.697362987190428e-06, "loss": 1.8042, "step": 24156 }, { "epoch": 0.779397916158343, "grad_norm": 0.435546875, "learning_rate": 3.6963284177731375e-06, "loss": 1.864, "step": 24157 }, { "epoch": 0.7794301800121393, "grad_norm": 0.384765625, "learning_rate": 3.6952939727785543e-06, "loss": 1.7654, "step": 24158 }, { "epoch": 0.7794624438659357, "grad_norm": 0.439453125, "learning_rate": 3.694259652218061e-06, "loss": 1.8237, "step": 24159 }, { "epoch": 0.779494707719732, "grad_norm": 0.380859375, "learning_rate": 3.6932254561030514e-06, "loss": 1.8124, "step": 24160 }, { "epoch": 0.7795269715735283, "grad_norm": 0.396484375, "learning_rate": 3.6921913844449033e-06, "loss": 1.8025, "step": 24161 }, { "epoch": 0.7795592354273246, "grad_norm": 0.423828125, "learning_rate": 3.6911574372549963e-06, "loss": 1.8137, "step": 24162 }, { "epoch": 0.779591499281121, "grad_norm": 0.3671875, "learning_rate": 3.6901236145447256e-06, "loss": 1.8128, "step": 24163 }, { "epoch": 0.7796237631349173, "grad_norm": 0.3828125, "learning_rate": 3.6890899163254506e-06, "loss": 1.7992, "step": 24164 }, { "epoch": 0.7796560269887137, "grad_norm": 0.37890625, "learning_rate": 3.688056342608559e-06, "loss": 1.8173, "step": 24165 }, { "epoch": 0.77968829084251, "grad_norm": 0.4296875, "learning_rate": 3.687022893405437e-06, "loss": 1.7861, "step": 24166 }, { "epoch": 0.7797205546963064, "grad_norm": 0.41796875, "learning_rate": 3.6859895687274414e-06, "loss": 1.8691, "step": 24167 }, { "epoch": 0.7797528185501027, "grad_norm": 0.384765625, "learning_rate": 3.684956368585957e-06, "loss": 1.8186, "step": 24168 }, { "epoch": 0.7797850824038991, "grad_norm": 0.400390625, "learning_rate": 3.683923292992365e-06, "loss": 1.8448, "step": 24169 }, { "epoch": 0.7798173462576954, "grad_norm": 0.369140625, "learning_rate": 3.6828903419580175e-06, "loss": 1.805, "step": 24170 }, { "epoch": 0.7798496101114918, "grad_norm": 0.384765625, "learning_rate": 3.681857515494295e-06, "loss": 1.8437, "step": 24171 }, { "epoch": 0.7798818739652881, "grad_norm": 0.3671875, "learning_rate": 3.6808248136125743e-06, "loss": 1.7529, "step": 24172 }, { "epoch": 0.7799141378190845, "grad_norm": 0.392578125, "learning_rate": 3.679792236324204e-06, "loss": 1.8091, "step": 24173 }, { "epoch": 0.7799464016728809, "grad_norm": 0.373046875, "learning_rate": 3.678759783640564e-06, "loss": 1.7971, "step": 24174 }, { "epoch": 0.7799786655266772, "grad_norm": 0.375, "learning_rate": 3.6777274555730145e-06, "loss": 1.822, "step": 24175 }, { "epoch": 0.7800109293804736, "grad_norm": 0.3671875, "learning_rate": 3.676695252132915e-06, "loss": 1.8089, "step": 24176 }, { "epoch": 0.7800431932342698, "grad_norm": 0.37890625, "learning_rate": 3.675663173331635e-06, "loss": 1.8513, "step": 24177 }, { "epoch": 0.7800754570880662, "grad_norm": 0.40234375, "learning_rate": 3.6746312191805313e-06, "loss": 1.8972, "step": 24178 }, { "epoch": 0.7801077209418625, "grad_norm": 0.435546875, "learning_rate": 3.6735993896909575e-06, "loss": 1.931, "step": 24179 }, { "epoch": 0.7801399847956589, "grad_norm": 0.44921875, "learning_rate": 3.672567684874279e-06, "loss": 1.8949, "step": 24180 }, { "epoch": 0.7801722486494552, "grad_norm": 0.4609375, "learning_rate": 3.671536104741852e-06, "loss": 1.9048, "step": 24181 }, { "epoch": 0.7802045125032516, "grad_norm": 0.423828125, "learning_rate": 3.6705046493050213e-06, "loss": 1.9376, "step": 24182 }, { "epoch": 0.7802367763570479, "grad_norm": 0.388671875, "learning_rate": 3.6694733185751546e-06, "loss": 1.8686, "step": 24183 }, { "epoch": 0.7802690402108443, "grad_norm": 0.408203125, "learning_rate": 3.6684421125635947e-06, "loss": 1.8354, "step": 24184 }, { "epoch": 0.7803013040646406, "grad_norm": 0.38671875, "learning_rate": 3.6674110312816915e-06, "loss": 1.824, "step": 24185 }, { "epoch": 0.780333567918437, "grad_norm": 0.376953125, "learning_rate": 3.666380074740803e-06, "loss": 1.8061, "step": 24186 }, { "epoch": 0.7803658317722333, "grad_norm": 0.39453125, "learning_rate": 3.6653492429522704e-06, "loss": 1.7994, "step": 24187 }, { "epoch": 0.7803980956260297, "grad_norm": 0.388671875, "learning_rate": 3.6643185359274378e-06, "loss": 1.8619, "step": 24188 }, { "epoch": 0.780430359479826, "grad_norm": 0.380859375, "learning_rate": 3.66328795367766e-06, "loss": 1.8202, "step": 24189 }, { "epoch": 0.7804626233336224, "grad_norm": 0.380859375, "learning_rate": 3.6622574962142706e-06, "loss": 1.859, "step": 24190 }, { "epoch": 0.7804948871874187, "grad_norm": 0.390625, "learning_rate": 3.6612271635486217e-06, "loss": 1.822, "step": 24191 }, { "epoch": 0.7805271510412151, "grad_norm": 0.40234375, "learning_rate": 3.660196955692049e-06, "loss": 1.7709, "step": 24192 }, { "epoch": 0.7805594148950113, "grad_norm": 0.38671875, "learning_rate": 3.6591668726558887e-06, "loss": 1.7862, "step": 24193 }, { "epoch": 0.7805916787488077, "grad_norm": 0.373046875, "learning_rate": 3.658136914451493e-06, "loss": 1.7998, "step": 24194 }, { "epoch": 0.7806239426026041, "grad_norm": 0.38671875, "learning_rate": 3.65710708109018e-06, "loss": 1.823, "step": 24195 }, { "epoch": 0.7806562064564004, "grad_norm": 0.396484375, "learning_rate": 3.656077372583296e-06, "loss": 1.8323, "step": 24196 }, { "epoch": 0.7806884703101968, "grad_norm": 0.388671875, "learning_rate": 3.6550477889421826e-06, "loss": 1.8179, "step": 24197 }, { "epoch": 0.7807207341639931, "grad_norm": 0.388671875, "learning_rate": 3.654018330178154e-06, "loss": 1.8193, "step": 24198 }, { "epoch": 0.7807529980177895, "grad_norm": 0.3828125, "learning_rate": 3.652988996302554e-06, "loss": 1.8315, "step": 24199 }, { "epoch": 0.7807852618715858, "grad_norm": 0.373046875, "learning_rate": 3.6519597873267195e-06, "loss": 1.8005, "step": 24200 }, { "epoch": 0.7808175257253822, "grad_norm": 0.3828125, "learning_rate": 3.6509307032619616e-06, "loss": 1.8471, "step": 24201 }, { "epoch": 0.7808497895791785, "grad_norm": 0.40234375, "learning_rate": 3.649901744119616e-06, "loss": 1.8119, "step": 24202 }, { "epoch": 0.7808820534329749, "grad_norm": 0.37109375, "learning_rate": 3.6488729099110185e-06, "loss": 1.827, "step": 24203 }, { "epoch": 0.7809143172867712, "grad_norm": 0.375, "learning_rate": 3.6478442006474757e-06, "loss": 1.823, "step": 24204 }, { "epoch": 0.7809465811405676, "grad_norm": 0.384765625, "learning_rate": 3.646815616340324e-06, "loss": 1.818, "step": 24205 }, { "epoch": 0.7809788449943639, "grad_norm": 0.859375, "learning_rate": 3.645787157000883e-06, "loss": 1.8401, "step": 24206 }, { "epoch": 0.7810111088481603, "grad_norm": 0.84375, "learning_rate": 3.6447588226404668e-06, "loss": 1.8915, "step": 24207 }, { "epoch": 0.7810433727019566, "grad_norm": 0.39453125, "learning_rate": 3.6437306132704016e-06, "loss": 1.9098, "step": 24208 }, { "epoch": 0.781075636555753, "grad_norm": 0.6875, "learning_rate": 3.6427025289020043e-06, "loss": 1.8951, "step": 24209 }, { "epoch": 0.7811079004095492, "grad_norm": 0.8203125, "learning_rate": 3.6416745695465855e-06, "loss": 1.9191, "step": 24210 }, { "epoch": 0.7811401642633456, "grad_norm": 0.79296875, "learning_rate": 3.640646735215469e-06, "loss": 1.8815, "step": 24211 }, { "epoch": 0.7811724281171419, "grad_norm": 0.61328125, "learning_rate": 3.639619025919964e-06, "loss": 1.8872, "step": 24212 }, { "epoch": 0.7812046919709383, "grad_norm": 0.421875, "learning_rate": 3.638591441671379e-06, "loss": 1.9167, "step": 24213 }, { "epoch": 0.7812369558247347, "grad_norm": 0.421875, "learning_rate": 3.6375639824810327e-06, "loss": 1.9263, "step": 24214 }, { "epoch": 0.781269219678531, "grad_norm": 0.6015625, "learning_rate": 3.6365366483602293e-06, "loss": 1.9283, "step": 24215 }, { "epoch": 0.7813014835323274, "grad_norm": 0.640625, "learning_rate": 3.635509439320275e-06, "loss": 1.8553, "step": 24216 }, { "epoch": 0.7813337473861237, "grad_norm": 0.56640625, "learning_rate": 3.6344823553724854e-06, "loss": 1.9031, "step": 24217 }, { "epoch": 0.7813660112399201, "grad_norm": 0.5625, "learning_rate": 3.6334553965281585e-06, "loss": 1.9216, "step": 24218 }, { "epoch": 0.7813982750937164, "grad_norm": 0.435546875, "learning_rate": 3.6324285627985985e-06, "loss": 1.941, "step": 24219 }, { "epoch": 0.7814305389475128, "grad_norm": 0.369140625, "learning_rate": 3.6314018541951117e-06, "loss": 1.8664, "step": 24220 }, { "epoch": 0.7814628028013091, "grad_norm": 0.458984375, "learning_rate": 3.630375270728995e-06, "loss": 1.9358, "step": 24221 }, { "epoch": 0.7814950666551055, "grad_norm": 0.4765625, "learning_rate": 3.629348812411553e-06, "loss": 1.8877, "step": 24222 }, { "epoch": 0.7815273305089018, "grad_norm": 0.53515625, "learning_rate": 3.6283224792540836e-06, "loss": 1.8568, "step": 24223 }, { "epoch": 0.7815595943626982, "grad_norm": 0.5859375, "learning_rate": 3.6272962712678777e-06, "loss": 1.8914, "step": 24224 }, { "epoch": 0.7815918582164945, "grad_norm": 0.50390625, "learning_rate": 3.626270188464244e-06, "loss": 1.8784, "step": 24225 }, { "epoch": 0.7816241220702909, "grad_norm": 0.451171875, "learning_rate": 3.6252442308544608e-06, "loss": 1.9483, "step": 24226 }, { "epoch": 0.7816563859240871, "grad_norm": 0.427734375, "learning_rate": 3.624218398449829e-06, "loss": 1.8936, "step": 24227 }, { "epoch": 0.7816886497778835, "grad_norm": 0.408203125, "learning_rate": 3.6231926912616474e-06, "loss": 1.8955, "step": 24228 }, { "epoch": 0.7817209136316798, "grad_norm": 0.43359375, "learning_rate": 3.622167109301191e-06, "loss": 1.912, "step": 24229 }, { "epoch": 0.7817531774854762, "grad_norm": 0.4765625, "learning_rate": 3.621141652579757e-06, "loss": 1.8549, "step": 24230 }, { "epoch": 0.7817854413392725, "grad_norm": 0.46484375, "learning_rate": 3.6201163211086414e-06, "loss": 1.8887, "step": 24231 }, { "epoch": 0.7818177051930689, "grad_norm": 0.44140625, "learning_rate": 3.619091114899112e-06, "loss": 1.8823, "step": 24232 }, { "epoch": 0.7818499690468652, "grad_norm": 0.41796875, "learning_rate": 3.6180660339624675e-06, "loss": 1.9474, "step": 24233 }, { "epoch": 0.7818822329006616, "grad_norm": 0.470703125, "learning_rate": 3.6170410783099865e-06, "loss": 1.9293, "step": 24234 }, { "epoch": 0.781914496754458, "grad_norm": 0.4453125, "learning_rate": 3.616016247952948e-06, "loss": 1.9195, "step": 24235 }, { "epoch": 0.7819467606082543, "grad_norm": 0.42578125, "learning_rate": 3.614991542902639e-06, "loss": 1.9016, "step": 24236 }, { "epoch": 0.7819790244620507, "grad_norm": 0.4140625, "learning_rate": 3.6139669631703347e-06, "loss": 1.9069, "step": 24237 }, { "epoch": 0.782011288315847, "grad_norm": 0.41015625, "learning_rate": 3.612942508767311e-06, "loss": 1.9103, "step": 24238 }, { "epoch": 0.7820435521696434, "grad_norm": 0.4453125, "learning_rate": 3.6119181797048513e-06, "loss": 1.912, "step": 24239 }, { "epoch": 0.7820758160234397, "grad_norm": 0.404296875, "learning_rate": 3.6108939759942255e-06, "loss": 1.9101, "step": 24240 }, { "epoch": 0.7821080798772361, "grad_norm": 0.404296875, "learning_rate": 3.609869897646705e-06, "loss": 1.8857, "step": 24241 }, { "epoch": 0.7821403437310324, "grad_norm": 0.4453125, "learning_rate": 3.6088459446735698e-06, "loss": 1.8888, "step": 24242 }, { "epoch": 0.7821726075848288, "grad_norm": 0.439453125, "learning_rate": 3.607822117086087e-06, "loss": 1.9098, "step": 24243 }, { "epoch": 0.782204871438625, "grad_norm": 0.439453125, "learning_rate": 3.6067984148955204e-06, "loss": 1.9526, "step": 24244 }, { "epoch": 0.7822371352924214, "grad_norm": 0.400390625, "learning_rate": 3.6057748381131474e-06, "loss": 1.9051, "step": 24245 }, { "epoch": 0.7822693991462177, "grad_norm": 0.400390625, "learning_rate": 3.604751386750231e-06, "loss": 1.9189, "step": 24246 }, { "epoch": 0.7823016630000141, "grad_norm": 0.4140625, "learning_rate": 3.603728060818032e-06, "loss": 1.8882, "step": 24247 }, { "epoch": 0.7823339268538104, "grad_norm": 0.404296875, "learning_rate": 3.602704860327823e-06, "loss": 1.9166, "step": 24248 }, { "epoch": 0.7823661907076068, "grad_norm": 0.49609375, "learning_rate": 3.6016817852908636e-06, "loss": 1.9248, "step": 24249 }, { "epoch": 0.7823984545614031, "grad_norm": 0.4765625, "learning_rate": 3.6006588357184097e-06, "loss": 1.9818, "step": 24250 }, { "epoch": 0.7824307184151995, "grad_norm": 0.46484375, "learning_rate": 3.5996360116217286e-06, "loss": 1.9872, "step": 24251 }, { "epoch": 0.7824629822689958, "grad_norm": 0.43359375, "learning_rate": 3.5986133130120754e-06, "loss": 2.0303, "step": 24252 }, { "epoch": 0.7824952461227922, "grad_norm": 0.451171875, "learning_rate": 3.5975907399007035e-06, "loss": 1.988, "step": 24253 }, { "epoch": 0.7825275099765885, "grad_norm": 0.515625, "learning_rate": 3.596568292298878e-06, "loss": 2.0237, "step": 24254 }, { "epoch": 0.7825597738303849, "grad_norm": 0.43359375, "learning_rate": 3.5955459702178427e-06, "loss": 1.9702, "step": 24255 }, { "epoch": 0.7825920376841813, "grad_norm": 0.421875, "learning_rate": 3.5945237736688657e-06, "loss": 2.004, "step": 24256 }, { "epoch": 0.7826243015379776, "grad_norm": 0.4765625, "learning_rate": 3.5935017026631794e-06, "loss": 2.0106, "step": 24257 }, { "epoch": 0.782656565391774, "grad_norm": 0.486328125, "learning_rate": 3.5924797572120433e-06, "loss": 2.0177, "step": 24258 }, { "epoch": 0.7826888292455703, "grad_norm": 0.453125, "learning_rate": 3.5914579373267146e-06, "loss": 1.9543, "step": 24259 }, { "epoch": 0.7827210930993667, "grad_norm": 0.41796875, "learning_rate": 3.590436243018425e-06, "loss": 1.9962, "step": 24260 }, { "epoch": 0.782753356953163, "grad_norm": 0.474609375, "learning_rate": 3.589414674298427e-06, "loss": 1.9677, "step": 24261 }, { "epoch": 0.7827856208069593, "grad_norm": 0.416015625, "learning_rate": 3.5883932311779743e-06, "loss": 1.9958, "step": 24262 }, { "epoch": 0.7828178846607556, "grad_norm": 0.412109375, "learning_rate": 3.5873719136682953e-06, "loss": 2.0189, "step": 24263 }, { "epoch": 0.782850148514552, "grad_norm": 0.42578125, "learning_rate": 3.5863507217806424e-06, "loss": 2.031, "step": 24264 }, { "epoch": 0.7828824123683483, "grad_norm": 0.41015625, "learning_rate": 3.5853296555262534e-06, "loss": 2.0279, "step": 24265 }, { "epoch": 0.7829146762221447, "grad_norm": 0.447265625, "learning_rate": 3.5843087149163607e-06, "loss": 2.0042, "step": 24266 }, { "epoch": 0.782946940075941, "grad_norm": 0.423828125, "learning_rate": 3.5832878999622137e-06, "loss": 1.9907, "step": 24267 }, { "epoch": 0.7829792039297374, "grad_norm": 0.38671875, "learning_rate": 3.582267210675042e-06, "loss": 2.0139, "step": 24268 }, { "epoch": 0.7830114677835337, "grad_norm": 0.408203125, "learning_rate": 3.5812466470660783e-06, "loss": 2.0109, "step": 24269 }, { "epoch": 0.7830437316373301, "grad_norm": 0.41796875, "learning_rate": 3.580226209146565e-06, "loss": 1.9974, "step": 24270 }, { "epoch": 0.7830759954911264, "grad_norm": 0.462890625, "learning_rate": 3.579205896927728e-06, "loss": 2.0504, "step": 24271 }, { "epoch": 0.7831082593449228, "grad_norm": 0.396484375, "learning_rate": 3.5781857104207953e-06, "loss": 2.0142, "step": 24272 }, { "epoch": 0.7831405231987191, "grad_norm": 0.392578125, "learning_rate": 3.577165649637005e-06, "loss": 2.0141, "step": 24273 }, { "epoch": 0.7831727870525155, "grad_norm": 0.39453125, "learning_rate": 3.5761457145875787e-06, "loss": 2.0377, "step": 24274 }, { "epoch": 0.7832050509063119, "grad_norm": 0.431640625, "learning_rate": 3.5751259052837414e-06, "loss": 2.0, "step": 24275 }, { "epoch": 0.7832373147601082, "grad_norm": 0.3984375, "learning_rate": 3.5741062217367275e-06, "loss": 2.0228, "step": 24276 }, { "epoch": 0.7832695786139046, "grad_norm": 0.41015625, "learning_rate": 3.5730866639577547e-06, "loss": 2.0191, "step": 24277 }, { "epoch": 0.7833018424677008, "grad_norm": 0.4140625, "learning_rate": 3.572067231958042e-06, "loss": 2.0627, "step": 24278 }, { "epoch": 0.7833341063214972, "grad_norm": 0.421875, "learning_rate": 3.5710479257488194e-06, "loss": 2.0603, "step": 24279 }, { "epoch": 0.7833663701752935, "grad_norm": 0.408203125, "learning_rate": 3.5700287453413006e-06, "loss": 2.0094, "step": 24280 }, { "epoch": 0.7833986340290899, "grad_norm": 0.443359375, "learning_rate": 3.5690096907467034e-06, "loss": 1.9762, "step": 24281 }, { "epoch": 0.7834308978828862, "grad_norm": 0.4140625, "learning_rate": 3.567990761976251e-06, "loss": 2.0402, "step": 24282 }, { "epoch": 0.7834631617366826, "grad_norm": 0.412109375, "learning_rate": 3.566971959041154e-06, "loss": 2.0328, "step": 24283 }, { "epoch": 0.7834954255904789, "grad_norm": 0.404296875, "learning_rate": 3.565953281952623e-06, "loss": 2.0018, "step": 24284 }, { "epoch": 0.7835276894442753, "grad_norm": 0.390625, "learning_rate": 3.56493473072188e-06, "loss": 1.9986, "step": 24285 }, { "epoch": 0.7835599532980716, "grad_norm": 0.431640625, "learning_rate": 3.563916305360134e-06, "loss": 2.0273, "step": 24286 }, { "epoch": 0.783592217151868, "grad_norm": 0.396484375, "learning_rate": 3.5628980058785905e-06, "loss": 2.0043, "step": 24287 }, { "epoch": 0.7836244810056643, "grad_norm": 0.416015625, "learning_rate": 3.5618798322884578e-06, "loss": 2.0398, "step": 24288 }, { "epoch": 0.7836567448594607, "grad_norm": 0.416015625, "learning_rate": 3.5608617846009465e-06, "loss": 2.0251, "step": 24289 }, { "epoch": 0.783689008713257, "grad_norm": 0.423828125, "learning_rate": 3.55984386282727e-06, "loss": 2.0151, "step": 24290 }, { "epoch": 0.7837212725670534, "grad_norm": 0.40625, "learning_rate": 3.558826066978617e-06, "loss": 2.0305, "step": 24291 }, { "epoch": 0.7837535364208497, "grad_norm": 0.455078125, "learning_rate": 3.557808397066199e-06, "loss": 2.0391, "step": 24292 }, { "epoch": 0.7837858002746461, "grad_norm": 0.412109375, "learning_rate": 3.556790853101225e-06, "loss": 2.0054, "step": 24293 }, { "epoch": 0.7838180641284423, "grad_norm": 0.404296875, "learning_rate": 3.555773435094881e-06, "loss": 1.9672, "step": 24294 }, { "epoch": 0.7838503279822387, "grad_norm": 0.4140625, "learning_rate": 3.5547561430583768e-06, "loss": 1.9304, "step": 24295 }, { "epoch": 0.7838825918360351, "grad_norm": 0.435546875, "learning_rate": 3.553738977002905e-06, "loss": 1.9461, "step": 24296 }, { "epoch": 0.7839148556898314, "grad_norm": 0.412109375, "learning_rate": 3.5527219369396596e-06, "loss": 1.9076, "step": 24297 }, { "epoch": 0.7839471195436278, "grad_norm": 0.419921875, "learning_rate": 3.5517050228798436e-06, "loss": 1.9144, "step": 24298 }, { "epoch": 0.7839793833974241, "grad_norm": 0.40625, "learning_rate": 3.5506882348346447e-06, "loss": 1.9166, "step": 24299 }, { "epoch": 0.7840116472512205, "grad_norm": 0.388671875, "learning_rate": 3.549671572815254e-06, "loss": 1.9567, "step": 24300 }, { "epoch": 0.7840439111050168, "grad_norm": 0.427734375, "learning_rate": 3.548655036832866e-06, "loss": 1.9608, "step": 24301 }, { "epoch": 0.7840761749588132, "grad_norm": 0.38671875, "learning_rate": 3.547638626898669e-06, "loss": 1.9423, "step": 24302 }, { "epoch": 0.7841084388126095, "grad_norm": 0.396484375, "learning_rate": 3.546622343023846e-06, "loss": 1.9115, "step": 24303 }, { "epoch": 0.7841407026664059, "grad_norm": 0.380859375, "learning_rate": 3.5456061852195915e-06, "loss": 1.8687, "step": 24304 }, { "epoch": 0.7841729665202022, "grad_norm": 0.38671875, "learning_rate": 3.544590153497087e-06, "loss": 1.8324, "step": 24305 }, { "epoch": 0.7842052303739986, "grad_norm": 0.388671875, "learning_rate": 3.543574247867512e-06, "loss": 1.8343, "step": 24306 }, { "epoch": 0.7842374942277949, "grad_norm": 0.38671875, "learning_rate": 3.5425584683420576e-06, "loss": 1.8537, "step": 24307 }, { "epoch": 0.7842697580815913, "grad_norm": 0.388671875, "learning_rate": 3.5415428149318997e-06, "loss": 1.8488, "step": 24308 }, { "epoch": 0.7843020219353876, "grad_norm": 0.37109375, "learning_rate": 3.5405272876482125e-06, "loss": 1.8974, "step": 24309 }, { "epoch": 0.784334285789184, "grad_norm": 0.369140625, "learning_rate": 3.539511886502186e-06, "loss": 1.8176, "step": 24310 }, { "epoch": 0.7843665496429802, "grad_norm": 0.376953125, "learning_rate": 3.5384966115049906e-06, "loss": 1.8722, "step": 24311 }, { "epoch": 0.7843988134967766, "grad_norm": 0.3828125, "learning_rate": 3.5374814626677977e-06, "loss": 1.8895, "step": 24312 }, { "epoch": 0.7844310773505729, "grad_norm": 0.38671875, "learning_rate": 3.5364664400017883e-06, "loss": 1.8282, "step": 24313 }, { "epoch": 0.7844633412043693, "grad_norm": 0.388671875, "learning_rate": 3.535451543518135e-06, "loss": 1.8722, "step": 24314 }, { "epoch": 0.7844956050581656, "grad_norm": 0.365234375, "learning_rate": 3.534436773228004e-06, "loss": 1.8533, "step": 24315 }, { "epoch": 0.784527868911962, "grad_norm": 0.3984375, "learning_rate": 3.533422129142565e-06, "loss": 1.8543, "step": 24316 }, { "epoch": 0.7845601327657584, "grad_norm": 0.375, "learning_rate": 3.5324076112729937e-06, "loss": 1.8883, "step": 24317 }, { "epoch": 0.7845923966195547, "grad_norm": 0.37109375, "learning_rate": 3.531393219630453e-06, "loss": 1.903, "step": 24318 }, { "epoch": 0.7846246604733511, "grad_norm": 0.37109375, "learning_rate": 3.5303789542261017e-06, "loss": 1.87, "step": 24319 }, { "epoch": 0.7846569243271474, "grad_norm": 0.3828125, "learning_rate": 3.529364815071112e-06, "loss": 1.8643, "step": 24320 }, { "epoch": 0.7846891881809438, "grad_norm": 0.365234375, "learning_rate": 3.528350802176654e-06, "loss": 1.86, "step": 24321 }, { "epoch": 0.7847214520347401, "grad_norm": 0.384765625, "learning_rate": 3.527336915553872e-06, "loss": 1.8247, "step": 24322 }, { "epoch": 0.7847537158885365, "grad_norm": 0.384765625, "learning_rate": 3.5263231552139345e-06, "loss": 1.8733, "step": 24323 }, { "epoch": 0.7847859797423328, "grad_norm": 0.482421875, "learning_rate": 3.525309521168009e-06, "loss": 1.82, "step": 24324 }, { "epoch": 0.7848182435961292, "grad_norm": 0.3671875, "learning_rate": 3.5242960134272368e-06, "loss": 1.8199, "step": 24325 }, { "epoch": 0.7848505074499255, "grad_norm": 0.369140625, "learning_rate": 3.523282632002785e-06, "loss": 1.8787, "step": 24326 }, { "epoch": 0.7848827713037219, "grad_norm": 0.435546875, "learning_rate": 3.5222693769058035e-06, "loss": 1.8416, "step": 24327 }, { "epoch": 0.7849150351575181, "grad_norm": 0.369140625, "learning_rate": 3.521256248147443e-06, "loss": 1.8817, "step": 24328 }, { "epoch": 0.7849472990113145, "grad_norm": 0.373046875, "learning_rate": 3.520243245738865e-06, "loss": 1.8606, "step": 24329 }, { "epoch": 0.7849795628651108, "grad_norm": 0.359375, "learning_rate": 3.519230369691212e-06, "loss": 1.8231, "step": 24330 }, { "epoch": 0.7850118267189072, "grad_norm": 0.369140625, "learning_rate": 3.5182176200156303e-06, "loss": 1.8518, "step": 24331 }, { "epoch": 0.7850440905727035, "grad_norm": 0.3671875, "learning_rate": 3.517204996723277e-06, "loss": 1.8852, "step": 24332 }, { "epoch": 0.7850763544264999, "grad_norm": 0.380859375, "learning_rate": 3.516192499825294e-06, "loss": 1.8445, "step": 24333 }, { "epoch": 0.7851086182802962, "grad_norm": 0.380859375, "learning_rate": 3.5151801293328205e-06, "loss": 1.8819, "step": 24334 }, { "epoch": 0.7851408821340926, "grad_norm": 0.373046875, "learning_rate": 3.5141678852570093e-06, "loss": 1.8387, "step": 24335 }, { "epoch": 0.785173145987889, "grad_norm": 0.376953125, "learning_rate": 3.5131557676089975e-06, "loss": 1.8683, "step": 24336 }, { "epoch": 0.7852054098416853, "grad_norm": 0.37890625, "learning_rate": 3.512143776399924e-06, "loss": 1.8347, "step": 24337 }, { "epoch": 0.7852376736954817, "grad_norm": 0.396484375, "learning_rate": 3.5111319116409337e-06, "loss": 1.851, "step": 24338 }, { "epoch": 0.785269937549278, "grad_norm": 0.384765625, "learning_rate": 3.51012017334316e-06, "loss": 1.9076, "step": 24339 }, { "epoch": 0.7853022014030744, "grad_norm": 0.3828125, "learning_rate": 3.509108561517737e-06, "loss": 1.8208, "step": 24340 }, { "epoch": 0.7853344652568707, "grad_norm": 0.404296875, "learning_rate": 3.5080970761758096e-06, "loss": 1.8963, "step": 24341 }, { "epoch": 0.7853667291106671, "grad_norm": 0.37109375, "learning_rate": 3.5070857173285035e-06, "loss": 1.8624, "step": 24342 }, { "epoch": 0.7853989929644634, "grad_norm": 0.369140625, "learning_rate": 3.5060744849869488e-06, "loss": 1.8509, "step": 24343 }, { "epoch": 0.7854312568182598, "grad_norm": 0.36328125, "learning_rate": 3.505063379162284e-06, "loss": 1.8317, "step": 24344 }, { "epoch": 0.785463520672056, "grad_norm": 0.361328125, "learning_rate": 3.504052399865637e-06, "loss": 1.8486, "step": 24345 }, { "epoch": 0.7854957845258524, "grad_norm": 0.37109375, "learning_rate": 3.503041547108133e-06, "loss": 1.8479, "step": 24346 }, { "epoch": 0.7855280483796487, "grad_norm": 0.38671875, "learning_rate": 3.5020308209008954e-06, "loss": 1.8168, "step": 24347 }, { "epoch": 0.7855603122334451, "grad_norm": 0.376953125, "learning_rate": 3.501020221255058e-06, "loss": 1.9049, "step": 24348 }, { "epoch": 0.7855925760872414, "grad_norm": 0.380859375, "learning_rate": 3.5000097481817412e-06, "loss": 1.7837, "step": 24349 }, { "epoch": 0.7856248399410378, "grad_norm": 0.3671875, "learning_rate": 3.498999401692061e-06, "loss": 1.8911, "step": 24350 }, { "epoch": 0.7856571037948341, "grad_norm": 0.39453125, "learning_rate": 3.497989181797151e-06, "loss": 1.838, "step": 24351 }, { "epoch": 0.7856893676486305, "grad_norm": 0.396484375, "learning_rate": 3.4969790885081233e-06, "loss": 1.8887, "step": 24352 }, { "epoch": 0.7857216315024268, "grad_norm": 0.384765625, "learning_rate": 3.495969121836094e-06, "loss": 1.8288, "step": 24353 }, { "epoch": 0.7857538953562232, "grad_norm": 0.416015625, "learning_rate": 3.4949592817921846e-06, "loss": 1.8217, "step": 24354 }, { "epoch": 0.7857861592100195, "grad_norm": 0.361328125, "learning_rate": 3.493949568387517e-06, "loss": 1.8738, "step": 24355 }, { "epoch": 0.7858184230638159, "grad_norm": 0.384765625, "learning_rate": 3.4929399816331903e-06, "loss": 1.8523, "step": 24356 }, { "epoch": 0.7858506869176123, "grad_norm": 0.392578125, "learning_rate": 3.491930521540328e-06, "loss": 1.9121, "step": 24357 }, { "epoch": 0.7858829507714086, "grad_norm": 0.359375, "learning_rate": 3.49092118812004e-06, "loss": 1.8238, "step": 24358 }, { "epoch": 0.785915214625205, "grad_norm": 0.392578125, "learning_rate": 3.48991198138343e-06, "loss": 1.8636, "step": 24359 }, { "epoch": 0.7859474784790013, "grad_norm": 0.375, "learning_rate": 3.488902901341617e-06, "loss": 1.879, "step": 24360 }, { "epoch": 0.7859797423327977, "grad_norm": 0.37890625, "learning_rate": 3.487893948005701e-06, "loss": 1.8947, "step": 24361 }, { "epoch": 0.786012006186594, "grad_norm": 0.4296875, "learning_rate": 3.4868851213867867e-06, "loss": 1.9624, "step": 24362 }, { "epoch": 0.7860442700403903, "grad_norm": 0.39453125, "learning_rate": 3.485876421495986e-06, "loss": 1.9644, "step": 24363 }, { "epoch": 0.7860765338941866, "grad_norm": 0.384765625, "learning_rate": 3.4848678483443974e-06, "loss": 1.9378, "step": 24364 }, { "epoch": 0.786108797747983, "grad_norm": 0.40234375, "learning_rate": 3.4838594019431176e-06, "loss": 1.9559, "step": 24365 }, { "epoch": 0.7861410616017793, "grad_norm": 0.3984375, "learning_rate": 3.4828510823032555e-06, "loss": 1.9256, "step": 24366 }, { "epoch": 0.7861733254555757, "grad_norm": 0.41015625, "learning_rate": 3.4818428894359076e-06, "loss": 1.9418, "step": 24367 }, { "epoch": 0.786205589309372, "grad_norm": 0.3828125, "learning_rate": 3.4808348233521654e-06, "loss": 1.938, "step": 24368 }, { "epoch": 0.7862378531631684, "grad_norm": 0.43359375, "learning_rate": 3.4798268840631343e-06, "loss": 1.9732, "step": 24369 }, { "epoch": 0.7862701170169647, "grad_norm": 0.373046875, "learning_rate": 3.478819071579904e-06, "loss": 1.9723, "step": 24370 }, { "epoch": 0.7863023808707611, "grad_norm": 0.439453125, "learning_rate": 3.477811385913564e-06, "loss": 1.9773, "step": 24371 }, { "epoch": 0.7863346447245574, "grad_norm": 0.40625, "learning_rate": 3.476803827075214e-06, "loss": 1.9708, "step": 24372 }, { "epoch": 0.7863669085783538, "grad_norm": 0.384765625, "learning_rate": 3.4757963950759416e-06, "loss": 1.9772, "step": 24373 }, { "epoch": 0.7863991724321501, "grad_norm": 0.404296875, "learning_rate": 3.474789089926831e-06, "loss": 1.9449, "step": 24374 }, { "epoch": 0.7864314362859465, "grad_norm": 0.384765625, "learning_rate": 3.473781911638977e-06, "loss": 1.9505, "step": 24375 }, { "epoch": 0.7864637001397429, "grad_norm": 0.39453125, "learning_rate": 3.4727748602234637e-06, "loss": 1.868, "step": 24376 }, { "epoch": 0.7864959639935392, "grad_norm": 0.380859375, "learning_rate": 3.4717679356913743e-06, "loss": 1.8612, "step": 24377 }, { "epoch": 0.7865282278473356, "grad_norm": 0.3828125, "learning_rate": 3.47076113805379e-06, "loss": 1.8576, "step": 24378 }, { "epoch": 0.7865604917011318, "grad_norm": 0.380859375, "learning_rate": 3.4697544673218e-06, "loss": 1.8892, "step": 24379 }, { "epoch": 0.7865927555549282, "grad_norm": 0.384765625, "learning_rate": 3.4687479235064806e-06, "loss": 1.9175, "step": 24380 }, { "epoch": 0.7866250194087245, "grad_norm": 0.376953125, "learning_rate": 3.4677415066189078e-06, "loss": 1.854, "step": 24381 }, { "epoch": 0.7866572832625209, "grad_norm": 0.373046875, "learning_rate": 3.466735216670166e-06, "loss": 1.8961, "step": 24382 }, { "epoch": 0.7866895471163172, "grad_norm": 0.37890625, "learning_rate": 3.4657290536713316e-06, "loss": 1.8775, "step": 24383 }, { "epoch": 0.7867218109701136, "grad_norm": 0.37109375, "learning_rate": 3.4647230176334705e-06, "loss": 1.8327, "step": 24384 }, { "epoch": 0.7867540748239099, "grad_norm": 0.388671875, "learning_rate": 3.463717108567669e-06, "loss": 1.8614, "step": 24385 }, { "epoch": 0.7867863386777063, "grad_norm": 0.400390625, "learning_rate": 3.462711326484993e-06, "loss": 1.8447, "step": 24386 }, { "epoch": 0.7868186025315026, "grad_norm": 0.3984375, "learning_rate": 3.461705671396509e-06, "loss": 1.8441, "step": 24387 }, { "epoch": 0.786850866385299, "grad_norm": 0.40234375, "learning_rate": 3.4607001433132963e-06, "loss": 1.8336, "step": 24388 }, { "epoch": 0.7868831302390953, "grad_norm": 0.384765625, "learning_rate": 3.4596947422464174e-06, "loss": 1.8249, "step": 24389 }, { "epoch": 0.7869153940928917, "grad_norm": 0.373046875, "learning_rate": 3.458689468206936e-06, "loss": 1.8342, "step": 24390 }, { "epoch": 0.786947657946688, "grad_norm": 0.3671875, "learning_rate": 3.4576843212059257e-06, "loss": 1.8123, "step": 24391 }, { "epoch": 0.7869799218004844, "grad_norm": 0.390625, "learning_rate": 3.4566793012544462e-06, "loss": 1.8116, "step": 24392 }, { "epoch": 0.7870121856542807, "grad_norm": 0.373046875, "learning_rate": 3.4556744083635556e-06, "loss": 1.8162, "step": 24393 }, { "epoch": 0.7870444495080771, "grad_norm": 0.5546875, "learning_rate": 3.454669642544323e-06, "loss": 1.9296, "step": 24394 }, { "epoch": 0.7870767133618733, "grad_norm": 0.47265625, "learning_rate": 3.453665003807805e-06, "loss": 1.9232, "step": 24395 }, { "epoch": 0.7871089772156697, "grad_norm": 0.4140625, "learning_rate": 3.452660492165054e-06, "loss": 1.9158, "step": 24396 }, { "epoch": 0.7871412410694661, "grad_norm": 0.46875, "learning_rate": 3.451656107627138e-06, "loss": 1.8901, "step": 24397 }, { "epoch": 0.7871735049232624, "grad_norm": 0.5703125, "learning_rate": 3.4506518502051047e-06, "loss": 1.8675, "step": 24398 }, { "epoch": 0.7872057687770588, "grad_norm": 0.470703125, "learning_rate": 3.449647719910009e-06, "loss": 1.9083, "step": 24399 }, { "epoch": 0.7872380326308551, "grad_norm": 0.3984375, "learning_rate": 3.4486437167529065e-06, "loss": 1.9492, "step": 24400 }, { "epoch": 0.7872702964846515, "grad_norm": 0.4296875, "learning_rate": 3.447639840744849e-06, "loss": 1.9279, "step": 24401 }, { "epoch": 0.7873025603384478, "grad_norm": 0.65625, "learning_rate": 3.4466360918968786e-06, "loss": 1.9293, "step": 24402 }, { "epoch": 0.7873348241922442, "grad_norm": 0.4765625, "learning_rate": 3.4456324702200553e-06, "loss": 1.9177, "step": 24403 }, { "epoch": 0.7873670880460405, "grad_norm": 0.46484375, "learning_rate": 3.44462897572542e-06, "loss": 1.9196, "step": 24404 }, { "epoch": 0.7873993518998369, "grad_norm": 0.396484375, "learning_rate": 3.443625608424016e-06, "loss": 1.9356, "step": 24405 }, { "epoch": 0.7874316157536332, "grad_norm": 0.384765625, "learning_rate": 3.442622368326894e-06, "loss": 1.9036, "step": 24406 }, { "epoch": 0.7874638796074296, "grad_norm": 0.439453125, "learning_rate": 3.4416192554450932e-06, "loss": 1.8747, "step": 24407 }, { "epoch": 0.7874961434612259, "grad_norm": 0.427734375, "learning_rate": 3.4406162697896572e-06, "loss": 1.9306, "step": 24408 }, { "epoch": 0.7875284073150223, "grad_norm": 0.43359375, "learning_rate": 3.4396134113716197e-06, "loss": 1.9249, "step": 24409 }, { "epoch": 0.7875606711688186, "grad_norm": 0.390625, "learning_rate": 3.4386106802020277e-06, "loss": 1.8914, "step": 24410 }, { "epoch": 0.787592935022615, "grad_norm": 0.423828125, "learning_rate": 3.437608076291917e-06, "loss": 1.9708, "step": 24411 }, { "epoch": 0.7876251988764112, "grad_norm": 0.5, "learning_rate": 3.436605599652317e-06, "loss": 1.9968, "step": 24412 }, { "epoch": 0.7876574627302076, "grad_norm": 0.458984375, "learning_rate": 3.435603250294271e-06, "loss": 2.0448, "step": 24413 }, { "epoch": 0.7876897265840039, "grad_norm": 0.41015625, "learning_rate": 3.4346010282288078e-06, "loss": 1.9735, "step": 24414 }, { "epoch": 0.7877219904378003, "grad_norm": 0.423828125, "learning_rate": 3.4335989334669556e-06, "loss": 1.9892, "step": 24415 }, { "epoch": 0.7877542542915966, "grad_norm": 0.435546875, "learning_rate": 3.432596966019752e-06, "loss": 2.0063, "step": 24416 }, { "epoch": 0.787786518145393, "grad_norm": 0.43359375, "learning_rate": 3.4315951258982237e-06, "loss": 2.0195, "step": 24417 }, { "epoch": 0.7878187819991894, "grad_norm": 0.416015625, "learning_rate": 3.430593413113393e-06, "loss": 1.9962, "step": 24418 }, { "epoch": 0.7878510458529857, "grad_norm": 0.431640625, "learning_rate": 3.4295918276762934e-06, "loss": 2.0099, "step": 24419 }, { "epoch": 0.7878833097067821, "grad_norm": 0.45703125, "learning_rate": 3.428590369597947e-06, "loss": 1.9845, "step": 24420 }, { "epoch": 0.7879155735605784, "grad_norm": 0.423828125, "learning_rate": 3.427589038889372e-06, "loss": 2.0532, "step": 24421 }, { "epoch": 0.7879478374143748, "grad_norm": 0.44921875, "learning_rate": 3.4265878355615997e-06, "loss": 2.0492, "step": 24422 }, { "epoch": 0.7879801012681711, "grad_norm": 0.4609375, "learning_rate": 3.4255867596256446e-06, "loss": 2.0135, "step": 24423 }, { "epoch": 0.7880123651219675, "grad_norm": 0.412109375, "learning_rate": 3.4245858110925245e-06, "loss": 1.9695, "step": 24424 }, { "epoch": 0.7880446289757638, "grad_norm": 0.40234375, "learning_rate": 3.423584989973262e-06, "loss": 2.0121, "step": 24425 }, { "epoch": 0.7880768928295602, "grad_norm": 0.431640625, "learning_rate": 3.422584296278873e-06, "loss": 2.0181, "step": 24426 }, { "epoch": 0.7881091566833565, "grad_norm": 0.458984375, "learning_rate": 3.4215837300203653e-06, "loss": 2.0194, "step": 24427 }, { "epoch": 0.7881414205371529, "grad_norm": 0.41015625, "learning_rate": 3.4205832912087614e-06, "loss": 2.0166, "step": 24428 }, { "epoch": 0.7881736843909491, "grad_norm": 0.455078125, "learning_rate": 3.4195829798550705e-06, "loss": 2.0074, "step": 24429 }, { "epoch": 0.7882059482447455, "grad_norm": 0.474609375, "learning_rate": 3.4185827959702963e-06, "loss": 2.0116, "step": 24430 }, { "epoch": 0.7882382120985418, "grad_norm": 0.435546875, "learning_rate": 3.417582739565461e-06, "loss": 1.9739, "step": 24431 }, { "epoch": 0.7882704759523382, "grad_norm": 0.4375, "learning_rate": 3.416582810651563e-06, "loss": 2.0361, "step": 24432 }, { "epoch": 0.7883027398061345, "grad_norm": 0.435546875, "learning_rate": 3.415583009239609e-06, "loss": 2.0187, "step": 24433 }, { "epoch": 0.7883350036599309, "grad_norm": 0.408203125, "learning_rate": 3.4145833353406103e-06, "loss": 1.992, "step": 24434 }, { "epoch": 0.7883672675137272, "grad_norm": 0.412109375, "learning_rate": 3.413583788965568e-06, "loss": 2.0138, "step": 24435 }, { "epoch": 0.7883995313675236, "grad_norm": 0.42578125, "learning_rate": 3.412584370125478e-06, "loss": 1.9466, "step": 24436 }, { "epoch": 0.78843179522132, "grad_norm": 0.4375, "learning_rate": 3.411585078831351e-06, "loss": 1.9887, "step": 24437 }, { "epoch": 0.7884640590751163, "grad_norm": 0.3828125, "learning_rate": 3.4105859150941814e-06, "loss": 1.9936, "step": 24438 }, { "epoch": 0.7884963229289127, "grad_norm": 0.45703125, "learning_rate": 3.4095868789249684e-06, "loss": 2.0114, "step": 24439 }, { "epoch": 0.788528586782709, "grad_norm": 0.419921875, "learning_rate": 3.408587970334703e-06, "loss": 2.0154, "step": 24440 }, { "epoch": 0.7885608506365054, "grad_norm": 0.390625, "learning_rate": 3.4075891893343907e-06, "loss": 1.9999, "step": 24441 }, { "epoch": 0.7885931144903017, "grad_norm": 0.4453125, "learning_rate": 3.4065905359350204e-06, "loss": 1.9698, "step": 24442 }, { "epoch": 0.7886253783440981, "grad_norm": 0.4453125, "learning_rate": 3.4055920101475808e-06, "loss": 1.9881, "step": 24443 }, { "epoch": 0.7886576421978944, "grad_norm": 0.41796875, "learning_rate": 3.4045936119830683e-06, "loss": 1.9927, "step": 24444 }, { "epoch": 0.7886899060516908, "grad_norm": 0.41796875, "learning_rate": 3.403595341452473e-06, "loss": 1.9794, "step": 24445 }, { "epoch": 0.788722169905487, "grad_norm": 0.412109375, "learning_rate": 3.402597198566777e-06, "loss": 1.9974, "step": 24446 }, { "epoch": 0.7887544337592834, "grad_norm": 0.427734375, "learning_rate": 3.4015991833369737e-06, "loss": 2.0106, "step": 24447 }, { "epoch": 0.7887866976130797, "grad_norm": 0.400390625, "learning_rate": 3.4006012957740473e-06, "loss": 1.9977, "step": 24448 }, { "epoch": 0.7888189614668761, "grad_norm": 0.447265625, "learning_rate": 3.3996035358889745e-06, "loss": 1.9705, "step": 24449 }, { "epoch": 0.7888512253206724, "grad_norm": 0.44140625, "learning_rate": 3.398605903692749e-06, "loss": 2.024, "step": 24450 }, { "epoch": 0.7888834891744688, "grad_norm": 0.39453125, "learning_rate": 3.397608399196348e-06, "loss": 1.9462, "step": 24451 }, { "epoch": 0.7889157530282651, "grad_norm": 0.39453125, "learning_rate": 3.3966110224107455e-06, "loss": 1.9245, "step": 24452 }, { "epoch": 0.7889480168820615, "grad_norm": 0.408203125, "learning_rate": 3.3956137733469278e-06, "loss": 1.9262, "step": 24453 }, { "epoch": 0.7889802807358578, "grad_norm": 0.451171875, "learning_rate": 3.3946166520158695e-06, "loss": 1.9223, "step": 24454 }, { "epoch": 0.7890125445896542, "grad_norm": 0.400390625, "learning_rate": 3.393619658428542e-06, "loss": 1.888, "step": 24455 }, { "epoch": 0.7890448084434505, "grad_norm": 0.40625, "learning_rate": 3.392622792595926e-06, "loss": 1.9318, "step": 24456 }, { "epoch": 0.7890770722972469, "grad_norm": 0.40234375, "learning_rate": 3.3916260545289913e-06, "loss": 1.9235, "step": 24457 }, { "epoch": 0.7891093361510433, "grad_norm": 0.40234375, "learning_rate": 3.3906294442387043e-06, "loss": 1.896, "step": 24458 }, { "epoch": 0.7891416000048396, "grad_norm": 0.380859375, "learning_rate": 3.389632961736046e-06, "loss": 1.9247, "step": 24459 }, { "epoch": 0.789173863858636, "grad_norm": 0.4140625, "learning_rate": 3.3886366070319763e-06, "loss": 1.9089, "step": 24460 }, { "epoch": 0.7892061277124323, "grad_norm": 0.380859375, "learning_rate": 3.3876403801374628e-06, "loss": 1.9775, "step": 24461 }, { "epoch": 0.7892383915662287, "grad_norm": 0.40234375, "learning_rate": 3.386644281063477e-06, "loss": 1.9216, "step": 24462 }, { "epoch": 0.789270655420025, "grad_norm": 0.4296875, "learning_rate": 3.3856483098209795e-06, "loss": 1.915, "step": 24463 }, { "epoch": 0.7893029192738213, "grad_norm": 0.373046875, "learning_rate": 3.384652466420929e-06, "loss": 1.8527, "step": 24464 }, { "epoch": 0.7893351831276176, "grad_norm": 0.435546875, "learning_rate": 3.383656750874294e-06, "loss": 1.835, "step": 24465 }, { "epoch": 0.789367446981414, "grad_norm": 0.388671875, "learning_rate": 3.382661163192034e-06, "loss": 1.7933, "step": 24466 }, { "epoch": 0.7893997108352103, "grad_norm": 0.3984375, "learning_rate": 3.3816657033851056e-06, "loss": 1.8209, "step": 24467 }, { "epoch": 0.7894319746890067, "grad_norm": 0.38671875, "learning_rate": 3.380670371464462e-06, "loss": 1.7932, "step": 24468 }, { "epoch": 0.789464238542803, "grad_norm": 0.384765625, "learning_rate": 3.3796751674410664e-06, "loss": 1.7906, "step": 24469 }, { "epoch": 0.7894965023965994, "grad_norm": 0.388671875, "learning_rate": 3.3786800913258716e-06, "loss": 1.7941, "step": 24470 }, { "epoch": 0.7895287662503957, "grad_norm": 0.435546875, "learning_rate": 3.3776851431298233e-06, "loss": 1.864, "step": 24471 }, { "epoch": 0.7895610301041921, "grad_norm": 0.39453125, "learning_rate": 3.376690322863885e-06, "loss": 1.8934, "step": 24472 }, { "epoch": 0.7895932939579884, "grad_norm": 0.384765625, "learning_rate": 3.375695630539e-06, "loss": 1.8792, "step": 24473 }, { "epoch": 0.7896255578117848, "grad_norm": 0.40234375, "learning_rate": 3.374701066166115e-06, "loss": 1.8917, "step": 24474 }, { "epoch": 0.7896578216655811, "grad_norm": 0.375, "learning_rate": 3.373706629756184e-06, "loss": 1.9229, "step": 24475 }, { "epoch": 0.7896900855193775, "grad_norm": 0.4296875, "learning_rate": 3.372712321320149e-06, "loss": 1.8984, "step": 24476 }, { "epoch": 0.7897223493731739, "grad_norm": 0.365234375, "learning_rate": 3.3717181408689533e-06, "loss": 1.9122, "step": 24477 }, { "epoch": 0.7897546132269702, "grad_norm": 0.388671875, "learning_rate": 3.3707240884135455e-06, "loss": 1.8896, "step": 24478 }, { "epoch": 0.7897868770807666, "grad_norm": 0.390625, "learning_rate": 3.369730163964864e-06, "loss": 1.8969, "step": 24479 }, { "epoch": 0.7898191409345628, "grad_norm": 0.396484375, "learning_rate": 3.3687363675338446e-06, "loss": 1.8555, "step": 24480 }, { "epoch": 0.7898514047883592, "grad_norm": 0.380859375, "learning_rate": 3.3677426991314357e-06, "loss": 1.9, "step": 24481 }, { "epoch": 0.7898836686421555, "grad_norm": 0.4375, "learning_rate": 3.3667491587685694e-06, "loss": 1.9198, "step": 24482 }, { "epoch": 0.7899159324959519, "grad_norm": 0.380859375, "learning_rate": 3.36575574645618e-06, "loss": 1.9199, "step": 24483 }, { "epoch": 0.7899481963497482, "grad_norm": 0.375, "learning_rate": 3.3647624622052085e-06, "loss": 1.9161, "step": 24484 }, { "epoch": 0.7899804602035446, "grad_norm": 0.3828125, "learning_rate": 3.363769306026585e-06, "loss": 1.9249, "step": 24485 }, { "epoch": 0.7900127240573409, "grad_norm": 0.396484375, "learning_rate": 3.362776277931237e-06, "loss": 1.9314, "step": 24486 }, { "epoch": 0.7900449879111373, "grad_norm": 0.375, "learning_rate": 3.3617833779301026e-06, "loss": 1.8963, "step": 24487 }, { "epoch": 0.7900772517649336, "grad_norm": 0.38671875, "learning_rate": 3.360790606034108e-06, "loss": 1.9142, "step": 24488 }, { "epoch": 0.79010951561873, "grad_norm": 0.408203125, "learning_rate": 3.3597979622541764e-06, "loss": 1.9076, "step": 24489 }, { "epoch": 0.7901417794725263, "grad_norm": 0.3828125, "learning_rate": 3.3588054466012435e-06, "loss": 1.9434, "step": 24490 }, { "epoch": 0.7901740433263227, "grad_norm": 0.3828125, "learning_rate": 3.357813059086228e-06, "loss": 1.8841, "step": 24491 }, { "epoch": 0.790206307180119, "grad_norm": 0.384765625, "learning_rate": 3.3568207997200483e-06, "loss": 1.9251, "step": 24492 }, { "epoch": 0.7902385710339154, "grad_norm": 0.384765625, "learning_rate": 3.3558286685136386e-06, "loss": 1.8987, "step": 24493 }, { "epoch": 0.7902708348877117, "grad_norm": 0.396484375, "learning_rate": 3.354836665477913e-06, "loss": 1.9301, "step": 24494 }, { "epoch": 0.7903030987415081, "grad_norm": 0.390625, "learning_rate": 3.3538447906237873e-06, "loss": 1.9326, "step": 24495 }, { "epoch": 0.7903353625953043, "grad_norm": 0.453125, "learning_rate": 3.352853043962187e-06, "loss": 1.9145, "step": 24496 }, { "epoch": 0.7903676264491007, "grad_norm": 0.439453125, "learning_rate": 3.351861425504026e-06, "loss": 1.9187, "step": 24497 }, { "epoch": 0.7903998903028971, "grad_norm": 0.431640625, "learning_rate": 3.3508699352602156e-06, "loss": 1.8664, "step": 24498 }, { "epoch": 0.7904321541566934, "grad_norm": 0.421875, "learning_rate": 3.349878573241669e-06, "loss": 1.879, "step": 24499 }, { "epoch": 0.7904644180104898, "grad_norm": 0.412109375, "learning_rate": 3.3488873394593063e-06, "loss": 1.906, "step": 24500 }, { "epoch": 0.7904966818642861, "grad_norm": 0.416015625, "learning_rate": 3.347896233924033e-06, "loss": 1.8704, "step": 24501 }, { "epoch": 0.7905289457180825, "grad_norm": 0.404296875, "learning_rate": 3.3469052566467534e-06, "loss": 1.8934, "step": 24502 }, { "epoch": 0.7905612095718788, "grad_norm": 0.388671875, "learning_rate": 3.345914407638386e-06, "loss": 1.9203, "step": 24503 }, { "epoch": 0.7905934734256752, "grad_norm": 0.419921875, "learning_rate": 3.3449236869098325e-06, "loss": 1.9206, "step": 24504 }, { "epoch": 0.7906257372794715, "grad_norm": 0.427734375, "learning_rate": 3.3439330944719937e-06, "loss": 1.9192, "step": 24505 }, { "epoch": 0.7906580011332679, "grad_norm": 0.38671875, "learning_rate": 3.342942630335781e-06, "loss": 1.8807, "step": 24506 }, { "epoch": 0.7906902649870642, "grad_norm": 0.40234375, "learning_rate": 3.341952294512092e-06, "loss": 1.8645, "step": 24507 }, { "epoch": 0.7907225288408606, "grad_norm": 0.41015625, "learning_rate": 3.3409620870118264e-06, "loss": 1.8849, "step": 24508 }, { "epoch": 0.7907547926946569, "grad_norm": 0.3984375, "learning_rate": 3.339972007845889e-06, "loss": 1.9065, "step": 24509 }, { "epoch": 0.7907870565484533, "grad_norm": 0.39453125, "learning_rate": 3.3389820570251744e-06, "loss": 1.8847, "step": 24510 }, { "epoch": 0.7908193204022496, "grad_norm": 0.400390625, "learning_rate": 3.3379922345605775e-06, "loss": 1.9173, "step": 24511 }, { "epoch": 0.790851584256046, "grad_norm": 0.3984375, "learning_rate": 3.337002540462999e-06, "loss": 1.9054, "step": 24512 }, { "epoch": 0.7908838481098422, "grad_norm": 0.416015625, "learning_rate": 3.3360129747433288e-06, "loss": 1.9428, "step": 24513 }, { "epoch": 0.7909161119636386, "grad_norm": 0.40625, "learning_rate": 3.3350235374124567e-06, "loss": 1.895, "step": 24514 }, { "epoch": 0.7909483758174349, "grad_norm": 0.42578125, "learning_rate": 3.33403422848128e-06, "loss": 1.8617, "step": 24515 }, { "epoch": 0.7909806396712313, "grad_norm": 0.390625, "learning_rate": 3.3330450479606863e-06, "loss": 1.9363, "step": 24516 }, { "epoch": 0.7910129035250276, "grad_norm": 0.392578125, "learning_rate": 3.3320559958615587e-06, "loss": 1.904, "step": 24517 }, { "epoch": 0.791045167378824, "grad_norm": 0.447265625, "learning_rate": 3.331067072194798e-06, "loss": 1.91, "step": 24518 }, { "epoch": 0.7910774312326204, "grad_norm": 0.380859375, "learning_rate": 3.3300782769712695e-06, "loss": 1.8791, "step": 24519 }, { "epoch": 0.7911096950864167, "grad_norm": 0.38671875, "learning_rate": 3.329089610201867e-06, "loss": 1.9057, "step": 24520 }, { "epoch": 0.7911419589402131, "grad_norm": 0.412109375, "learning_rate": 3.328101071897478e-06, "loss": 1.9054, "step": 24521 }, { "epoch": 0.7911742227940094, "grad_norm": 0.44140625, "learning_rate": 3.3271126620689805e-06, "loss": 1.9176, "step": 24522 }, { "epoch": 0.7912064866478058, "grad_norm": 0.40234375, "learning_rate": 3.326124380727246e-06, "loss": 1.8894, "step": 24523 }, { "epoch": 0.7912387505016021, "grad_norm": 0.416015625, "learning_rate": 3.325136227883166e-06, "loss": 1.9052, "step": 24524 }, { "epoch": 0.7912710143553985, "grad_norm": 0.41015625, "learning_rate": 3.3241482035476095e-06, "loss": 1.9093, "step": 24525 }, { "epoch": 0.7913032782091948, "grad_norm": 0.388671875, "learning_rate": 3.3231603077314497e-06, "loss": 1.9206, "step": 24526 }, { "epoch": 0.7913355420629912, "grad_norm": 0.37890625, "learning_rate": 3.322172540445568e-06, "loss": 1.8119, "step": 24527 }, { "epoch": 0.7913678059167875, "grad_norm": 0.37890625, "learning_rate": 3.3211849017008345e-06, "loss": 1.821, "step": 24528 }, { "epoch": 0.7914000697705839, "grad_norm": 0.396484375, "learning_rate": 3.320197391508118e-06, "loss": 1.7976, "step": 24529 }, { "epoch": 0.7914323336243801, "grad_norm": 0.400390625, "learning_rate": 3.319210009878287e-06, "loss": 1.8068, "step": 24530 }, { "epoch": 0.7914645974781765, "grad_norm": 0.37890625, "learning_rate": 3.3182227568222155e-06, "loss": 1.8368, "step": 24531 }, { "epoch": 0.7914968613319728, "grad_norm": 0.412109375, "learning_rate": 3.3172356323507676e-06, "loss": 1.8129, "step": 24532 }, { "epoch": 0.7915291251857692, "grad_norm": 0.412109375, "learning_rate": 3.3162486364748034e-06, "loss": 1.8336, "step": 24533 }, { "epoch": 0.7915613890395655, "grad_norm": 0.40234375, "learning_rate": 3.3152617692051984e-06, "loss": 1.7993, "step": 24534 }, { "epoch": 0.7915936528933619, "grad_norm": 0.3828125, "learning_rate": 3.3142750305528085e-06, "loss": 1.8313, "step": 24535 }, { "epoch": 0.7916259167471582, "grad_norm": 0.4140625, "learning_rate": 3.31328842052849e-06, "loss": 1.824, "step": 24536 }, { "epoch": 0.7916581806009546, "grad_norm": 0.400390625, "learning_rate": 3.3123019391431147e-06, "loss": 1.852, "step": 24537 }, { "epoch": 0.791690444454751, "grad_norm": 0.41015625, "learning_rate": 3.311315586407535e-06, "loss": 1.8421, "step": 24538 }, { "epoch": 0.7917227083085473, "grad_norm": 0.455078125, "learning_rate": 3.3103293623326026e-06, "loss": 1.8723, "step": 24539 }, { "epoch": 0.7917549721623437, "grad_norm": 0.38671875, "learning_rate": 3.3093432669291836e-06, "loss": 1.8729, "step": 24540 }, { "epoch": 0.79178723601614, "grad_norm": 0.412109375, "learning_rate": 3.308357300208126e-06, "loss": 1.8557, "step": 24541 }, { "epoch": 0.7918194998699364, "grad_norm": 0.41796875, "learning_rate": 3.3073714621802807e-06, "loss": 1.83, "step": 24542 }, { "epoch": 0.7918517637237327, "grad_norm": 0.388671875, "learning_rate": 3.306385752856505e-06, "loss": 1.8604, "step": 24543 }, { "epoch": 0.7918840275775291, "grad_norm": 0.373046875, "learning_rate": 3.305400172247646e-06, "loss": 1.8304, "step": 24544 }, { "epoch": 0.7919162914313254, "grad_norm": 0.390625, "learning_rate": 3.304414720364549e-06, "loss": 1.8039, "step": 24545 }, { "epoch": 0.7919485552851218, "grad_norm": 0.47265625, "learning_rate": 3.3034293972180684e-06, "loss": 1.8354, "step": 24546 }, { "epoch": 0.791980819138918, "grad_norm": 0.380859375, "learning_rate": 3.302444202819047e-06, "loss": 1.8232, "step": 24547 }, { "epoch": 0.7920130829927144, "grad_norm": 0.42578125, "learning_rate": 3.3014591371783216e-06, "loss": 1.7885, "step": 24548 }, { "epoch": 0.7920453468465107, "grad_norm": 0.373046875, "learning_rate": 3.3004742003067513e-06, "loss": 1.8739, "step": 24549 }, { "epoch": 0.7920776107003071, "grad_norm": 0.3828125, "learning_rate": 3.299489392215159e-06, "loss": 1.8268, "step": 24550 }, { "epoch": 0.7921098745541034, "grad_norm": 0.384765625, "learning_rate": 3.2985047129143937e-06, "loss": 1.8003, "step": 24551 }, { "epoch": 0.7921421384078998, "grad_norm": 0.37890625, "learning_rate": 3.297520162415297e-06, "loss": 1.8596, "step": 24552 }, { "epoch": 0.7921744022616961, "grad_norm": 0.380859375, "learning_rate": 3.296535740728705e-06, "loss": 1.888, "step": 24553 }, { "epoch": 0.7922066661154925, "grad_norm": 0.39453125, "learning_rate": 3.295551447865446e-06, "loss": 1.8295, "step": 24554 }, { "epoch": 0.7922389299692888, "grad_norm": 0.380859375, "learning_rate": 3.2945672838363643e-06, "loss": 1.8564, "step": 24555 }, { "epoch": 0.7922711938230852, "grad_norm": 0.3671875, "learning_rate": 3.293583248652289e-06, "loss": 1.8422, "step": 24556 }, { "epoch": 0.7923034576768815, "grad_norm": 0.373046875, "learning_rate": 3.2925993423240477e-06, "loss": 1.8468, "step": 24557 }, { "epoch": 0.7923357215306779, "grad_norm": 0.400390625, "learning_rate": 3.291615564862477e-06, "loss": 1.9076, "step": 24558 }, { "epoch": 0.7923679853844743, "grad_norm": 0.376953125, "learning_rate": 3.2906319162784016e-06, "loss": 1.7615, "step": 24559 }, { "epoch": 0.7924002492382706, "grad_norm": 0.38671875, "learning_rate": 3.289648396582651e-06, "loss": 1.838, "step": 24560 }, { "epoch": 0.792432513092067, "grad_norm": 0.37890625, "learning_rate": 3.2886650057860457e-06, "loss": 1.8644, "step": 24561 }, { "epoch": 0.7924647769458633, "grad_norm": 0.37109375, "learning_rate": 3.287681743899418e-06, "loss": 1.8094, "step": 24562 }, { "epoch": 0.7924970407996597, "grad_norm": 0.375, "learning_rate": 3.2866986109335888e-06, "loss": 1.8535, "step": 24563 }, { "epoch": 0.792529304653456, "grad_norm": 0.37109375, "learning_rate": 3.285715606899372e-06, "loss": 1.8633, "step": 24564 }, { "epoch": 0.7925615685072523, "grad_norm": 0.404296875, "learning_rate": 3.284732731807599e-06, "loss": 1.8409, "step": 24565 }, { "epoch": 0.7925938323610486, "grad_norm": 0.380859375, "learning_rate": 3.283749985669085e-06, "loss": 1.8517, "step": 24566 }, { "epoch": 0.792626096214845, "grad_norm": 0.373046875, "learning_rate": 3.2827673684946392e-06, "loss": 1.856, "step": 24567 }, { "epoch": 0.7926583600686413, "grad_norm": 0.41796875, "learning_rate": 3.2817848802950905e-06, "loss": 1.8254, "step": 24568 }, { "epoch": 0.7926906239224377, "grad_norm": 0.396484375, "learning_rate": 3.280802521081246e-06, "loss": 1.8451, "step": 24569 }, { "epoch": 0.792722887776234, "grad_norm": 0.373046875, "learning_rate": 3.2798202908639165e-06, "loss": 1.8704, "step": 24570 }, { "epoch": 0.7927551516300304, "grad_norm": 0.373046875, "learning_rate": 3.2788381896539215e-06, "loss": 1.8328, "step": 24571 }, { "epoch": 0.7927874154838267, "grad_norm": 0.388671875, "learning_rate": 3.2778562174620665e-06, "loss": 1.8484, "step": 24572 }, { "epoch": 0.7928196793376231, "grad_norm": 0.384765625, "learning_rate": 3.276874374299157e-06, "loss": 1.8499, "step": 24573 }, { "epoch": 0.7928519431914194, "grad_norm": 0.37109375, "learning_rate": 3.2758926601760083e-06, "loss": 1.8119, "step": 24574 }, { "epoch": 0.7928842070452158, "grad_norm": 0.376953125, "learning_rate": 3.274911075103423e-06, "loss": 1.8662, "step": 24575 }, { "epoch": 0.7929164708990121, "grad_norm": 0.384765625, "learning_rate": 3.273929619092199e-06, "loss": 1.8516, "step": 24576 }, { "epoch": 0.7929487347528085, "grad_norm": 0.37890625, "learning_rate": 3.272948292153152e-06, "loss": 1.7955, "step": 24577 }, { "epoch": 0.7929809986066048, "grad_norm": 0.376953125, "learning_rate": 3.2719670942970752e-06, "loss": 1.8081, "step": 24578 }, { "epoch": 0.7930132624604012, "grad_norm": 0.3671875, "learning_rate": 3.270986025534768e-06, "loss": 1.8596, "step": 24579 }, { "epoch": 0.7930455263141976, "grad_norm": 0.376953125, "learning_rate": 3.270005085877041e-06, "loss": 1.8165, "step": 24580 }, { "epoch": 0.7930777901679938, "grad_norm": 0.38671875, "learning_rate": 3.2690242753346738e-06, "loss": 1.8545, "step": 24581 }, { "epoch": 0.7931100540217902, "grad_norm": 0.373046875, "learning_rate": 3.2680435939184716e-06, "loss": 1.8379, "step": 24582 }, { "epoch": 0.7931423178755865, "grad_norm": 0.376953125, "learning_rate": 3.2670630416392372e-06, "loss": 1.8121, "step": 24583 }, { "epoch": 0.7931745817293829, "grad_norm": 0.384765625, "learning_rate": 3.2660826185077486e-06, "loss": 1.8775, "step": 24584 }, { "epoch": 0.7932068455831792, "grad_norm": 0.439453125, "learning_rate": 3.2651023245348037e-06, "loss": 1.9276, "step": 24585 }, { "epoch": 0.7932391094369756, "grad_norm": 0.421875, "learning_rate": 3.2641221597311976e-06, "loss": 1.9182, "step": 24586 }, { "epoch": 0.7932713732907719, "grad_norm": 0.439453125, "learning_rate": 3.2631421241077164e-06, "loss": 1.9123, "step": 24587 }, { "epoch": 0.7933036371445683, "grad_norm": 0.419921875, "learning_rate": 3.2621622176751415e-06, "loss": 1.903, "step": 24588 }, { "epoch": 0.7933359009983646, "grad_norm": 0.4453125, "learning_rate": 3.2611824404442693e-06, "loss": 1.9325, "step": 24589 }, { "epoch": 0.793368164852161, "grad_norm": 0.431640625, "learning_rate": 3.2602027924258794e-06, "loss": 1.892, "step": 24590 }, { "epoch": 0.7934004287059573, "grad_norm": 0.412109375, "learning_rate": 3.2592232736307546e-06, "loss": 1.8842, "step": 24591 }, { "epoch": 0.7934326925597537, "grad_norm": 0.39453125, "learning_rate": 3.258243884069672e-06, "loss": 1.8989, "step": 24592 }, { "epoch": 0.79346495641355, "grad_norm": 0.40234375, "learning_rate": 3.257264623753424e-06, "loss": 1.8741, "step": 24593 }, { "epoch": 0.7934972202673464, "grad_norm": 0.404296875, "learning_rate": 3.256285492692781e-06, "loss": 1.9381, "step": 24594 }, { "epoch": 0.7935294841211427, "grad_norm": 0.38671875, "learning_rate": 3.2553064908985186e-06, "loss": 1.9123, "step": 24595 }, { "epoch": 0.7935617479749391, "grad_norm": 0.40234375, "learning_rate": 3.25432761838142e-06, "loss": 1.8978, "step": 24596 }, { "epoch": 0.7935940118287353, "grad_norm": 0.390625, "learning_rate": 3.253348875152259e-06, "loss": 1.8929, "step": 24597 }, { "epoch": 0.7936262756825317, "grad_norm": 0.392578125, "learning_rate": 3.252370261221801e-06, "loss": 1.8931, "step": 24598 }, { "epoch": 0.7936585395363281, "grad_norm": 0.3828125, "learning_rate": 3.2513917766008277e-06, "loss": 1.9253, "step": 24599 }, { "epoch": 0.7936908033901244, "grad_norm": 0.37890625, "learning_rate": 3.250413421300104e-06, "loss": 1.8598, "step": 24600 }, { "epoch": 0.7937230672439208, "grad_norm": 0.37890625, "learning_rate": 3.249435195330398e-06, "loss": 1.9072, "step": 24601 }, { "epoch": 0.7937553310977171, "grad_norm": 0.3828125, "learning_rate": 3.2484570987024825e-06, "loss": 1.9041, "step": 24602 }, { "epoch": 0.7937875949515135, "grad_norm": 0.38671875, "learning_rate": 3.2474791314271223e-06, "loss": 1.9002, "step": 24603 }, { "epoch": 0.7938198588053098, "grad_norm": 0.38671875, "learning_rate": 3.246501293515074e-06, "loss": 1.9282, "step": 24604 }, { "epoch": 0.7938521226591062, "grad_norm": 0.380859375, "learning_rate": 3.2455235849771136e-06, "loss": 1.9064, "step": 24605 }, { "epoch": 0.7938843865129025, "grad_norm": 0.380859375, "learning_rate": 3.244546005823996e-06, "loss": 1.893, "step": 24606 }, { "epoch": 0.7939166503666989, "grad_norm": 0.373046875, "learning_rate": 3.2435685560664794e-06, "loss": 1.9184, "step": 24607 }, { "epoch": 0.7939489142204952, "grad_norm": 0.484375, "learning_rate": 3.2425912357153305e-06, "loss": 1.9213, "step": 24608 }, { "epoch": 0.7939811780742916, "grad_norm": 0.384765625, "learning_rate": 3.241614044781301e-06, "loss": 1.9493, "step": 24609 }, { "epoch": 0.7940134419280879, "grad_norm": 0.388671875, "learning_rate": 3.240636983275146e-06, "loss": 1.9031, "step": 24610 }, { "epoch": 0.7940457057818843, "grad_norm": 0.38671875, "learning_rate": 3.2396600512076303e-06, "loss": 1.9017, "step": 24611 }, { "epoch": 0.7940779696356806, "grad_norm": 0.38671875, "learning_rate": 3.2386832485894924e-06, "loss": 1.8946, "step": 24612 }, { "epoch": 0.794110233489477, "grad_norm": 0.37109375, "learning_rate": 3.2377065754314906e-06, "loss": 1.9097, "step": 24613 }, { "epoch": 0.7941424973432732, "grad_norm": 0.38671875, "learning_rate": 3.236730031744386e-06, "loss": 1.8984, "step": 24614 }, { "epoch": 0.7941747611970696, "grad_norm": 0.37890625, "learning_rate": 3.23575361753891e-06, "loss": 1.8996, "step": 24615 }, { "epoch": 0.7942070250508659, "grad_norm": 0.3671875, "learning_rate": 3.234777332825818e-06, "loss": 1.946, "step": 24616 }, { "epoch": 0.7942392889046623, "grad_norm": 0.373046875, "learning_rate": 3.2338011776158656e-06, "loss": 1.9283, "step": 24617 }, { "epoch": 0.7942715527584586, "grad_norm": 0.416015625, "learning_rate": 3.232825151919779e-06, "loss": 1.8633, "step": 24618 }, { "epoch": 0.794303816612255, "grad_norm": 0.376953125, "learning_rate": 3.2318492557483174e-06, "loss": 1.9209, "step": 24619 }, { "epoch": 0.7943360804660514, "grad_norm": 0.365234375, "learning_rate": 3.2308734891122117e-06, "loss": 1.8498, "step": 24620 }, { "epoch": 0.7943683443198477, "grad_norm": 0.421875, "learning_rate": 3.229897852022211e-06, "loss": 1.8236, "step": 24621 }, { "epoch": 0.7944006081736441, "grad_norm": 0.396484375, "learning_rate": 3.2289223444890525e-06, "loss": 1.8215, "step": 24622 }, { "epoch": 0.7944328720274404, "grad_norm": 0.431640625, "learning_rate": 3.227946966523467e-06, "loss": 1.8276, "step": 24623 }, { "epoch": 0.7944651358812368, "grad_norm": 0.384765625, "learning_rate": 3.2269717181362002e-06, "loss": 1.8274, "step": 24624 }, { "epoch": 0.7944973997350331, "grad_norm": 0.396484375, "learning_rate": 3.2259965993379824e-06, "loss": 1.7829, "step": 24625 }, { "epoch": 0.7945296635888295, "grad_norm": 0.388671875, "learning_rate": 3.2250216101395456e-06, "loss": 1.8189, "step": 24626 }, { "epoch": 0.7945619274426258, "grad_norm": 0.376953125, "learning_rate": 3.2240467505516246e-06, "loss": 1.8117, "step": 24627 }, { "epoch": 0.7945941912964222, "grad_norm": 0.40234375, "learning_rate": 3.2230720205849506e-06, "loss": 1.8358, "step": 24628 }, { "epoch": 0.7946264551502185, "grad_norm": 0.41015625, "learning_rate": 3.222097420250247e-06, "loss": 1.7802, "step": 24629 }, { "epoch": 0.7946587190040149, "grad_norm": 0.40234375, "learning_rate": 3.2211229495582494e-06, "loss": 1.7826, "step": 24630 }, { "epoch": 0.7946909828578111, "grad_norm": 0.373046875, "learning_rate": 3.22014860851968e-06, "loss": 1.7977, "step": 24631 }, { "epoch": 0.7947232467116075, "grad_norm": 0.380859375, "learning_rate": 3.2191743971452604e-06, "loss": 1.799, "step": 24632 }, { "epoch": 0.7947555105654038, "grad_norm": 0.41015625, "learning_rate": 3.2182003154457214e-06, "loss": 1.7905, "step": 24633 }, { "epoch": 0.7947877744192002, "grad_norm": 0.404296875, "learning_rate": 3.2172263634317813e-06, "loss": 1.8139, "step": 24634 }, { "epoch": 0.7948200382729965, "grad_norm": 0.38671875, "learning_rate": 3.2162525411141565e-06, "loss": 1.8505, "step": 24635 }, { "epoch": 0.7948523021267929, "grad_norm": 0.3984375, "learning_rate": 3.2152788485035732e-06, "loss": 1.8491, "step": 24636 }, { "epoch": 0.7948845659805892, "grad_norm": 0.373046875, "learning_rate": 3.214305285610747e-06, "loss": 1.8099, "step": 24637 }, { "epoch": 0.7949168298343856, "grad_norm": 0.400390625, "learning_rate": 3.213331852446388e-06, "loss": 1.833, "step": 24638 }, { "epoch": 0.794949093688182, "grad_norm": 0.423828125, "learning_rate": 3.212358549021221e-06, "loss": 1.8175, "step": 24639 }, { "epoch": 0.7949813575419783, "grad_norm": 0.390625, "learning_rate": 3.211385375345956e-06, "loss": 1.8778, "step": 24640 }, { "epoch": 0.7950136213957747, "grad_norm": 0.3671875, "learning_rate": 3.2104123314312983e-06, "loss": 1.8549, "step": 24641 }, { "epoch": 0.795045885249571, "grad_norm": 0.3671875, "learning_rate": 3.209439417287971e-06, "loss": 1.8286, "step": 24642 }, { "epoch": 0.7950781491033674, "grad_norm": 0.423828125, "learning_rate": 3.208466632926669e-06, "loss": 1.8046, "step": 24643 }, { "epoch": 0.7951104129571637, "grad_norm": 0.419921875, "learning_rate": 3.207493978358108e-06, "loss": 1.8439, "step": 24644 }, { "epoch": 0.7951426768109601, "grad_norm": 0.376953125, "learning_rate": 3.2065214535930005e-06, "loss": 1.8404, "step": 24645 }, { "epoch": 0.7951749406647564, "grad_norm": 0.375, "learning_rate": 3.2055490586420357e-06, "loss": 1.8178, "step": 24646 }, { "epoch": 0.7952072045185528, "grad_norm": 0.4140625, "learning_rate": 3.2045767935159263e-06, "loss": 1.7781, "step": 24647 }, { "epoch": 0.795239468372349, "grad_norm": 0.373046875, "learning_rate": 3.2036046582253814e-06, "loss": 1.8596, "step": 24648 }, { "epoch": 0.7952717322261454, "grad_norm": 0.431640625, "learning_rate": 3.202632652781085e-06, "loss": 1.8129, "step": 24649 }, { "epoch": 0.7953039960799417, "grad_norm": 0.41796875, "learning_rate": 3.201660777193749e-06, "loss": 1.8657, "step": 24650 }, { "epoch": 0.7953362599337381, "grad_norm": 0.380859375, "learning_rate": 3.2006890314740643e-06, "loss": 1.8033, "step": 24651 }, { "epoch": 0.7953685237875344, "grad_norm": 0.373046875, "learning_rate": 3.199717415632733e-06, "loss": 1.8128, "step": 24652 }, { "epoch": 0.7954007876413308, "grad_norm": 0.4140625, "learning_rate": 3.198745929680448e-06, "loss": 1.8231, "step": 24653 }, { "epoch": 0.7954330514951271, "grad_norm": 0.40234375, "learning_rate": 3.1977745736278967e-06, "loss": 1.8318, "step": 24654 }, { "epoch": 0.7954653153489235, "grad_norm": 0.40234375, "learning_rate": 3.1968033474857798e-06, "loss": 1.8178, "step": 24655 }, { "epoch": 0.7954975792027198, "grad_norm": 0.375, "learning_rate": 3.195832251264785e-06, "loss": 1.8478, "step": 24656 }, { "epoch": 0.7955298430565162, "grad_norm": 0.3828125, "learning_rate": 3.194861284975596e-06, "loss": 1.8466, "step": 24657 }, { "epoch": 0.7955621069103125, "grad_norm": 0.40625, "learning_rate": 3.1938904486289085e-06, "loss": 1.7905, "step": 24658 }, { "epoch": 0.7955943707641089, "grad_norm": 0.380859375, "learning_rate": 3.1929197422354055e-06, "loss": 1.8262, "step": 24659 }, { "epoch": 0.7956266346179053, "grad_norm": 0.376953125, "learning_rate": 3.1919491658057674e-06, "loss": 1.7999, "step": 24660 }, { "epoch": 0.7956588984717016, "grad_norm": 0.373046875, "learning_rate": 3.1909787193506873e-06, "loss": 1.8019, "step": 24661 }, { "epoch": 0.795691162325498, "grad_norm": 0.375, "learning_rate": 3.1900084028808395e-06, "loss": 1.8192, "step": 24662 }, { "epoch": 0.7957234261792943, "grad_norm": 0.40625, "learning_rate": 3.1890382164069045e-06, "loss": 1.8268, "step": 24663 }, { "epoch": 0.7957556900330907, "grad_norm": 0.376953125, "learning_rate": 3.188068159939566e-06, "loss": 1.7636, "step": 24664 }, { "epoch": 0.795787953886887, "grad_norm": 0.3828125, "learning_rate": 3.1870982334895014e-06, "loss": 1.8562, "step": 24665 }, { "epoch": 0.7958202177406833, "grad_norm": 0.390625, "learning_rate": 3.1861284370673806e-06, "loss": 1.8298, "step": 24666 }, { "epoch": 0.7958524815944796, "grad_norm": 0.37890625, "learning_rate": 3.1851587706838857e-06, "loss": 1.7745, "step": 24667 }, { "epoch": 0.795884745448276, "grad_norm": 0.435546875, "learning_rate": 3.1841892343496874e-06, "loss": 1.8186, "step": 24668 }, { "epoch": 0.7959170093020723, "grad_norm": 0.376953125, "learning_rate": 3.183219828075454e-06, "loss": 1.8178, "step": 24669 }, { "epoch": 0.7959492731558687, "grad_norm": 0.37109375, "learning_rate": 3.1822505518718657e-06, "loss": 1.8331, "step": 24670 }, { "epoch": 0.795981537009665, "grad_norm": 0.392578125, "learning_rate": 3.1812814057495786e-06, "loss": 1.8187, "step": 24671 }, { "epoch": 0.7960138008634614, "grad_norm": 0.376953125, "learning_rate": 3.180312389719267e-06, "loss": 1.8226, "step": 24672 }, { "epoch": 0.7960460647172577, "grad_norm": 0.38671875, "learning_rate": 3.179343503791606e-06, "loss": 1.86, "step": 24673 }, { "epoch": 0.7960783285710541, "grad_norm": 0.396484375, "learning_rate": 3.1783747479772413e-06, "loss": 1.793, "step": 24674 }, { "epoch": 0.7961105924248504, "grad_norm": 0.38671875, "learning_rate": 3.1774061222868484e-06, "loss": 1.8384, "step": 24675 }, { "epoch": 0.7961428562786468, "grad_norm": 0.37109375, "learning_rate": 3.176437626731095e-06, "loss": 1.8042, "step": 24676 }, { "epoch": 0.7961751201324431, "grad_norm": 0.380859375, "learning_rate": 3.1754692613206267e-06, "loss": 1.8429, "step": 24677 }, { "epoch": 0.7962073839862395, "grad_norm": 0.37890625, "learning_rate": 3.1745010260661088e-06, "loss": 1.8468, "step": 24678 }, { "epoch": 0.7962396478400358, "grad_norm": 0.3828125, "learning_rate": 3.173532920978209e-06, "loss": 1.8635, "step": 24679 }, { "epoch": 0.7962719116938322, "grad_norm": 0.3984375, "learning_rate": 3.172564946067566e-06, "loss": 1.857, "step": 24680 }, { "epoch": 0.7963041755476286, "grad_norm": 0.41015625, "learning_rate": 3.171597101344847e-06, "loss": 1.8234, "step": 24681 }, { "epoch": 0.7963364394014248, "grad_norm": 0.380859375, "learning_rate": 3.1706293868207015e-06, "loss": 1.805, "step": 24682 }, { "epoch": 0.7963687032552212, "grad_norm": 0.365234375, "learning_rate": 3.169661802505778e-06, "loss": 1.8543, "step": 24683 }, { "epoch": 0.7964009671090175, "grad_norm": 0.412109375, "learning_rate": 3.1686943484107323e-06, "loss": 1.853, "step": 24684 }, { "epoch": 0.7964332309628139, "grad_norm": 0.40625, "learning_rate": 3.1677270245462096e-06, "loss": 1.8532, "step": 24685 }, { "epoch": 0.7964654948166102, "grad_norm": 0.388671875, "learning_rate": 3.1667598309228637e-06, "loss": 1.866, "step": 24686 }, { "epoch": 0.7964977586704066, "grad_norm": 0.375, "learning_rate": 3.165792767551335e-06, "loss": 1.8667, "step": 24687 }, { "epoch": 0.7965300225242029, "grad_norm": 0.369140625, "learning_rate": 3.164825834442265e-06, "loss": 1.8741, "step": 24688 }, { "epoch": 0.7965622863779993, "grad_norm": 0.375, "learning_rate": 3.1638590316063083e-06, "loss": 1.8277, "step": 24689 }, { "epoch": 0.7965945502317956, "grad_norm": 0.3984375, "learning_rate": 3.162892359054098e-06, "loss": 1.8985, "step": 24690 }, { "epoch": 0.796626814085592, "grad_norm": 0.375, "learning_rate": 3.1619258167962734e-06, "loss": 1.8668, "step": 24691 }, { "epoch": 0.7966590779393883, "grad_norm": 0.390625, "learning_rate": 3.1609594048434805e-06, "loss": 1.8371, "step": 24692 }, { "epoch": 0.7966913417931847, "grad_norm": 0.376953125, "learning_rate": 3.159993123206354e-06, "loss": 1.7834, "step": 24693 }, { "epoch": 0.796723605646981, "grad_norm": 0.390625, "learning_rate": 3.1590269718955237e-06, "loss": 1.8053, "step": 24694 }, { "epoch": 0.7967558695007774, "grad_norm": 0.408203125, "learning_rate": 3.1580609509216353e-06, "loss": 1.8056, "step": 24695 }, { "epoch": 0.7967881333545737, "grad_norm": 0.416015625, "learning_rate": 3.1570950602953156e-06, "loss": 1.8388, "step": 24696 }, { "epoch": 0.7968203972083701, "grad_norm": 0.41015625, "learning_rate": 3.156129300027194e-06, "loss": 1.7983, "step": 24697 }, { "epoch": 0.7968526610621663, "grad_norm": 0.41796875, "learning_rate": 3.1551636701279096e-06, "loss": 1.7687, "step": 24698 }, { "epoch": 0.7968849249159627, "grad_norm": 0.376953125, "learning_rate": 3.1541981706080843e-06, "loss": 1.8094, "step": 24699 }, { "epoch": 0.7969171887697591, "grad_norm": 0.404296875, "learning_rate": 3.153232801478344e-06, "loss": 1.7952, "step": 24700 }, { "epoch": 0.7969494526235554, "grad_norm": 0.40625, "learning_rate": 3.1522675627493257e-06, "loss": 1.8173, "step": 24701 }, { "epoch": 0.7969817164773518, "grad_norm": 0.3828125, "learning_rate": 3.1513024544316395e-06, "loss": 1.7988, "step": 24702 }, { "epoch": 0.7970139803311481, "grad_norm": 0.37890625, "learning_rate": 3.1503374765359145e-06, "loss": 1.8204, "step": 24703 }, { "epoch": 0.7970462441849445, "grad_norm": 0.380859375, "learning_rate": 3.1493726290727826e-06, "loss": 1.7683, "step": 24704 }, { "epoch": 0.7970785080387408, "grad_norm": 0.390625, "learning_rate": 3.1484079120528452e-06, "loss": 1.8476, "step": 24705 }, { "epoch": 0.7971107718925372, "grad_norm": 0.380859375, "learning_rate": 3.147443325486734e-06, "loss": 1.8131, "step": 24706 }, { "epoch": 0.7971430357463335, "grad_norm": 0.392578125, "learning_rate": 3.1464788693850703e-06, "loss": 1.7907, "step": 24707 }, { "epoch": 0.7971752996001299, "grad_norm": 0.376953125, "learning_rate": 3.145514543758456e-06, "loss": 1.8085, "step": 24708 }, { "epoch": 0.7972075634539262, "grad_norm": 0.396484375, "learning_rate": 3.144550348617512e-06, "loss": 1.7649, "step": 24709 }, { "epoch": 0.7972398273077226, "grad_norm": 0.380859375, "learning_rate": 3.143586283972862e-06, "loss": 1.8317, "step": 24710 }, { "epoch": 0.7972720911615189, "grad_norm": 0.388671875, "learning_rate": 3.1426223498351002e-06, "loss": 1.8329, "step": 24711 }, { "epoch": 0.7973043550153153, "grad_norm": 0.37890625, "learning_rate": 3.1416585462148487e-06, "loss": 1.7751, "step": 24712 }, { "epoch": 0.7973366188691116, "grad_norm": 0.37890625, "learning_rate": 3.1406948731227124e-06, "loss": 1.8413, "step": 24713 }, { "epoch": 0.797368882722908, "grad_norm": 0.375, "learning_rate": 3.139731330569296e-06, "loss": 1.8387, "step": 24714 }, { "epoch": 0.7974011465767042, "grad_norm": 0.384765625, "learning_rate": 3.1387679185652126e-06, "loss": 1.8098, "step": 24715 }, { "epoch": 0.7974334104305006, "grad_norm": 0.38671875, "learning_rate": 3.137804637121062e-06, "loss": 1.8073, "step": 24716 }, { "epoch": 0.7974656742842969, "grad_norm": 0.375, "learning_rate": 3.1368414862474455e-06, "loss": 1.8231, "step": 24717 }, { "epoch": 0.7974979381380933, "grad_norm": 0.380859375, "learning_rate": 3.13587846595497e-06, "loss": 1.8114, "step": 24718 }, { "epoch": 0.7975302019918896, "grad_norm": 0.3671875, "learning_rate": 3.1349155762542285e-06, "loss": 1.8442, "step": 24719 }, { "epoch": 0.797562465845686, "grad_norm": 0.3828125, "learning_rate": 3.1339528171558297e-06, "loss": 1.8393, "step": 24720 }, { "epoch": 0.7975947296994824, "grad_norm": 0.373046875, "learning_rate": 3.132990188670366e-06, "loss": 1.7982, "step": 24721 }, { "epoch": 0.7976269935532787, "grad_norm": 0.375, "learning_rate": 3.132027690808428e-06, "loss": 1.7339, "step": 24722 }, { "epoch": 0.7976592574070751, "grad_norm": 0.3671875, "learning_rate": 3.131065323580618e-06, "loss": 1.8202, "step": 24723 }, { "epoch": 0.7976915212608714, "grad_norm": 0.37109375, "learning_rate": 3.1301030869975285e-06, "loss": 1.7734, "step": 24724 }, { "epoch": 0.7977237851146678, "grad_norm": 0.39453125, "learning_rate": 3.1291409810697436e-06, "loss": 1.8379, "step": 24725 }, { "epoch": 0.7977560489684641, "grad_norm": 0.388671875, "learning_rate": 3.128179005807862e-06, "loss": 1.8876, "step": 24726 }, { "epoch": 0.7977883128222605, "grad_norm": 0.4453125, "learning_rate": 3.12721716122247e-06, "loss": 1.9167, "step": 24727 }, { "epoch": 0.7978205766760568, "grad_norm": 0.4140625, "learning_rate": 3.1262554473241505e-06, "loss": 1.8658, "step": 24728 }, { "epoch": 0.7978528405298532, "grad_norm": 0.384765625, "learning_rate": 3.125293864123496e-06, "loss": 1.8819, "step": 24729 }, { "epoch": 0.7978851043836495, "grad_norm": 0.408203125, "learning_rate": 3.1243324116310885e-06, "loss": 1.872, "step": 24730 }, { "epoch": 0.7979173682374459, "grad_norm": 0.408203125, "learning_rate": 3.1233710898575053e-06, "loss": 1.9138, "step": 24731 }, { "epoch": 0.7979496320912421, "grad_norm": 0.39453125, "learning_rate": 3.122409898813341e-06, "loss": 1.8745, "step": 24732 }, { "epoch": 0.7979818959450385, "grad_norm": 0.400390625, "learning_rate": 3.1214488385091606e-06, "loss": 1.8823, "step": 24733 }, { "epoch": 0.7980141597988348, "grad_norm": 0.3828125, "learning_rate": 3.1204879089555487e-06, "loss": 1.8723, "step": 24734 }, { "epoch": 0.7980464236526312, "grad_norm": 0.3828125, "learning_rate": 3.119527110163092e-06, "loss": 1.8954, "step": 24735 }, { "epoch": 0.7980786875064275, "grad_norm": 0.37109375, "learning_rate": 3.1185664421423495e-06, "loss": 1.9097, "step": 24736 }, { "epoch": 0.7981109513602239, "grad_norm": 0.37109375, "learning_rate": 3.1176059049039047e-06, "loss": 1.896, "step": 24737 }, { "epoch": 0.7981432152140202, "grad_norm": 0.388671875, "learning_rate": 3.1166454984583363e-06, "loss": 1.93, "step": 24738 }, { "epoch": 0.7981754790678166, "grad_norm": 0.36328125, "learning_rate": 3.115685222816203e-06, "loss": 1.9403, "step": 24739 }, { "epoch": 0.798207742921613, "grad_norm": 0.369140625, "learning_rate": 3.1147250779880794e-06, "loss": 1.8886, "step": 24740 }, { "epoch": 0.7982400067754093, "grad_norm": 0.369140625, "learning_rate": 3.113765063984545e-06, "loss": 1.9389, "step": 24741 }, { "epoch": 0.7982722706292057, "grad_norm": 0.375, "learning_rate": 3.1128051808161482e-06, "loss": 1.9423, "step": 24742 }, { "epoch": 0.798304534483002, "grad_norm": 0.37890625, "learning_rate": 3.1118454284934696e-06, "loss": 1.874, "step": 24743 }, { "epoch": 0.7983367983367984, "grad_norm": 0.361328125, "learning_rate": 3.110885807027066e-06, "loss": 1.8973, "step": 24744 }, { "epoch": 0.7983690621905947, "grad_norm": 0.37890625, "learning_rate": 3.1099263164275003e-06, "loss": 1.9021, "step": 24745 }, { "epoch": 0.7984013260443911, "grad_norm": 0.380859375, "learning_rate": 3.1089669567053385e-06, "loss": 1.9151, "step": 24746 }, { "epoch": 0.7984335898981874, "grad_norm": 0.376953125, "learning_rate": 3.108007727871139e-06, "loss": 1.9145, "step": 24747 }, { "epoch": 0.7984658537519838, "grad_norm": 0.376953125, "learning_rate": 3.1070486299354545e-06, "loss": 1.9124, "step": 24748 }, { "epoch": 0.79849811760578, "grad_norm": 0.3671875, "learning_rate": 3.1060896629088505e-06, "loss": 1.8831, "step": 24749 }, { "epoch": 0.7985303814595764, "grad_norm": 0.37109375, "learning_rate": 3.1051308268018797e-06, "loss": 1.9027, "step": 24750 }, { "epoch": 0.7985626453133727, "grad_norm": 0.3984375, "learning_rate": 3.1041721216250907e-06, "loss": 1.9482, "step": 24751 }, { "epoch": 0.7985949091671691, "grad_norm": 0.404296875, "learning_rate": 3.103213547389045e-06, "loss": 1.9466, "step": 24752 }, { "epoch": 0.7986271730209654, "grad_norm": 0.369140625, "learning_rate": 3.1022551041042855e-06, "loss": 1.9278, "step": 24753 }, { "epoch": 0.7986594368747618, "grad_norm": 0.3671875, "learning_rate": 3.101296791781371e-06, "loss": 1.9201, "step": 24754 }, { "epoch": 0.7986917007285581, "grad_norm": 0.376953125, "learning_rate": 3.100338610430846e-06, "loss": 1.9176, "step": 24755 }, { "epoch": 0.7987239645823545, "grad_norm": 0.380859375, "learning_rate": 3.0993805600632506e-06, "loss": 1.9081, "step": 24756 }, { "epoch": 0.7987562284361508, "grad_norm": 0.365234375, "learning_rate": 3.098422640689143e-06, "loss": 1.9033, "step": 24757 }, { "epoch": 0.7987884922899472, "grad_norm": 0.357421875, "learning_rate": 3.0974648523190595e-06, "loss": 1.8987, "step": 24758 }, { "epoch": 0.7988207561437435, "grad_norm": 0.375, "learning_rate": 3.0965071949635406e-06, "loss": 1.9135, "step": 24759 }, { "epoch": 0.7988530199975399, "grad_norm": 0.388671875, "learning_rate": 3.0955496686331334e-06, "loss": 1.897, "step": 24760 }, { "epoch": 0.7988852838513363, "grad_norm": 0.373046875, "learning_rate": 3.0945922733383776e-06, "loss": 1.9288, "step": 24761 }, { "epoch": 0.7989175477051326, "grad_norm": 0.40625, "learning_rate": 3.0936350090898034e-06, "loss": 1.9211, "step": 24762 }, { "epoch": 0.798949811558929, "grad_norm": 0.375, "learning_rate": 3.0926778758979616e-06, "loss": 1.8828, "step": 24763 }, { "epoch": 0.7989820754127253, "grad_norm": 0.365234375, "learning_rate": 3.091720873773372e-06, "loss": 1.9267, "step": 24764 }, { "epoch": 0.7990143392665217, "grad_norm": 0.3828125, "learning_rate": 3.090764002726576e-06, "loss": 1.9144, "step": 24765 }, { "epoch": 0.7990466031203179, "grad_norm": 0.392578125, "learning_rate": 3.0898072627681152e-06, "loss": 1.8906, "step": 24766 }, { "epoch": 0.7990788669741143, "grad_norm": 0.388671875, "learning_rate": 3.0888506539085028e-06, "loss": 1.9431, "step": 24767 }, { "epoch": 0.7991111308279106, "grad_norm": 0.365234375, "learning_rate": 3.0878941761582767e-06, "loss": 1.9252, "step": 24768 }, { "epoch": 0.799143394681707, "grad_norm": 0.41015625, "learning_rate": 3.086937829527974e-06, "loss": 1.9059, "step": 24769 }, { "epoch": 0.7991756585355033, "grad_norm": 0.388671875, "learning_rate": 3.085981614028105e-06, "loss": 1.9314, "step": 24770 }, { "epoch": 0.7992079223892997, "grad_norm": 0.361328125, "learning_rate": 3.085025529669209e-06, "loss": 1.8581, "step": 24771 }, { "epoch": 0.799240186243096, "grad_norm": 0.384765625, "learning_rate": 3.084069576461802e-06, "loss": 1.9229, "step": 24772 }, { "epoch": 0.7992724500968924, "grad_norm": 0.392578125, "learning_rate": 3.0831137544164068e-06, "loss": 1.8902, "step": 24773 }, { "epoch": 0.7993047139506887, "grad_norm": 0.361328125, "learning_rate": 3.0821580635435494e-06, "loss": 1.9175, "step": 24774 }, { "epoch": 0.7993369778044851, "grad_norm": 0.384765625, "learning_rate": 3.0812025038537462e-06, "loss": 1.9395, "step": 24775 }, { "epoch": 0.7993692416582814, "grad_norm": 0.38671875, "learning_rate": 3.0802470753575105e-06, "loss": 1.8794, "step": 24776 }, { "epoch": 0.7994015055120778, "grad_norm": 0.404296875, "learning_rate": 3.079291778065368e-06, "loss": 1.9073, "step": 24777 }, { "epoch": 0.7994337693658741, "grad_norm": 0.3515625, "learning_rate": 3.0783366119878307e-06, "loss": 1.9141, "step": 24778 }, { "epoch": 0.7994660332196705, "grad_norm": 0.357421875, "learning_rate": 3.077381577135406e-06, "loss": 1.8536, "step": 24779 }, { "epoch": 0.7994982970734668, "grad_norm": 0.369140625, "learning_rate": 3.0764266735186175e-06, "loss": 1.9247, "step": 24780 }, { "epoch": 0.7995305609272632, "grad_norm": 0.365234375, "learning_rate": 3.075471901147969e-06, "loss": 1.8674, "step": 24781 }, { "epoch": 0.7995628247810596, "grad_norm": 0.384765625, "learning_rate": 3.0745172600339684e-06, "loss": 1.8824, "step": 24782 }, { "epoch": 0.7995950886348558, "grad_norm": 0.365234375, "learning_rate": 3.0735627501871293e-06, "loss": 1.9252, "step": 24783 }, { "epoch": 0.7996273524886522, "grad_norm": 0.365234375, "learning_rate": 3.0726083716179508e-06, "loss": 1.923, "step": 24784 }, { "epoch": 0.7996596163424485, "grad_norm": 0.369140625, "learning_rate": 3.0716541243369478e-06, "loss": 1.9351, "step": 24785 }, { "epoch": 0.7996918801962449, "grad_norm": 0.376953125, "learning_rate": 3.0707000083546198e-06, "loss": 1.8883, "step": 24786 }, { "epoch": 0.7997241440500412, "grad_norm": 0.3671875, "learning_rate": 3.0697460236814624e-06, "loss": 1.931, "step": 24787 }, { "epoch": 0.7997564079038376, "grad_norm": 0.38671875, "learning_rate": 3.0687921703279863e-06, "loss": 1.9099, "step": 24788 }, { "epoch": 0.7997886717576339, "grad_norm": 0.37109375, "learning_rate": 3.0678384483046873e-06, "loss": 1.8789, "step": 24789 }, { "epoch": 0.7998209356114303, "grad_norm": 0.369140625, "learning_rate": 3.0668848576220583e-06, "loss": 1.9239, "step": 24790 }, { "epoch": 0.7998531994652266, "grad_norm": 0.357421875, "learning_rate": 3.065931398290604e-06, "loss": 1.892, "step": 24791 }, { "epoch": 0.799885463319023, "grad_norm": 0.38671875, "learning_rate": 3.0649780703208147e-06, "loss": 1.9046, "step": 24792 }, { "epoch": 0.7999177271728193, "grad_norm": 0.357421875, "learning_rate": 3.064024873723182e-06, "loss": 1.9555, "step": 24793 }, { "epoch": 0.7999499910266157, "grad_norm": 0.35546875, "learning_rate": 3.0630718085082072e-06, "loss": 1.9032, "step": 24794 }, { "epoch": 0.799982254880412, "grad_norm": 0.400390625, "learning_rate": 3.0621188746863673e-06, "loss": 1.9309, "step": 24795 }, { "epoch": 0.8000145187342084, "grad_norm": 0.37890625, "learning_rate": 3.0611660722681585e-06, "loss": 1.8731, "step": 24796 }, { "epoch": 0.8000467825880047, "grad_norm": 0.365234375, "learning_rate": 3.060213401264076e-06, "loss": 1.9289, "step": 24797 }, { "epoch": 0.8000790464418011, "grad_norm": 0.3671875, "learning_rate": 3.0592608616845914e-06, "loss": 1.9219, "step": 24798 }, { "epoch": 0.8001113102955973, "grad_norm": 0.400390625, "learning_rate": 3.058308453540198e-06, "loss": 1.897, "step": 24799 }, { "epoch": 0.8001435741493937, "grad_norm": 0.40234375, "learning_rate": 3.0573561768413837e-06, "loss": 1.8973, "step": 24800 }, { "epoch": 0.8001758380031901, "grad_norm": 0.396484375, "learning_rate": 3.0564040315986193e-06, "loss": 1.9096, "step": 24801 }, { "epoch": 0.8002081018569864, "grad_norm": 0.376953125, "learning_rate": 3.0554520178223937e-06, "loss": 1.8916, "step": 24802 }, { "epoch": 0.8002403657107828, "grad_norm": 0.375, "learning_rate": 3.0545001355231846e-06, "loss": 1.8503, "step": 24803 }, { "epoch": 0.8002726295645791, "grad_norm": 0.423828125, "learning_rate": 3.0535483847114626e-06, "loss": 1.8881, "step": 24804 }, { "epoch": 0.8003048934183755, "grad_norm": 0.412109375, "learning_rate": 3.052596765397715e-06, "loss": 1.9737, "step": 24805 }, { "epoch": 0.8003371572721718, "grad_norm": 0.40625, "learning_rate": 3.0516452775924114e-06, "loss": 1.9131, "step": 24806 }, { "epoch": 0.8003694211259682, "grad_norm": 0.36328125, "learning_rate": 3.0506939213060204e-06, "loss": 1.8871, "step": 24807 }, { "epoch": 0.8004016849797645, "grad_norm": 0.3984375, "learning_rate": 3.0497426965490214e-06, "loss": 1.7816, "step": 24808 }, { "epoch": 0.8004339488335609, "grad_norm": 0.421875, "learning_rate": 3.048791603331882e-06, "loss": 1.8381, "step": 24809 }, { "epoch": 0.8004662126873572, "grad_norm": 0.376953125, "learning_rate": 3.0478406416650676e-06, "loss": 1.8858, "step": 24810 }, { "epoch": 0.8004984765411536, "grad_norm": 0.37890625, "learning_rate": 3.0468898115590527e-06, "loss": 1.8497, "step": 24811 }, { "epoch": 0.8005307403949499, "grad_norm": 0.3828125, "learning_rate": 3.0459391130243e-06, "loss": 1.8323, "step": 24812 }, { "epoch": 0.8005630042487463, "grad_norm": 0.400390625, "learning_rate": 3.044988546071269e-06, "loss": 1.7688, "step": 24813 }, { "epoch": 0.8005952681025426, "grad_norm": 0.40234375, "learning_rate": 3.044038110710432e-06, "loss": 1.7906, "step": 24814 }, { "epoch": 0.800627531956339, "grad_norm": 0.41015625, "learning_rate": 3.0430878069522473e-06, "loss": 1.8036, "step": 24815 }, { "epoch": 0.8006597958101352, "grad_norm": 0.39453125, "learning_rate": 3.0421376348071678e-06, "loss": 1.8118, "step": 24816 }, { "epoch": 0.8006920596639316, "grad_norm": 0.39453125, "learning_rate": 3.041187594285664e-06, "loss": 1.8026, "step": 24817 }, { "epoch": 0.8007243235177279, "grad_norm": 0.373046875, "learning_rate": 3.040237685398185e-06, "loss": 1.8406, "step": 24818 }, { "epoch": 0.8007565873715243, "grad_norm": 0.375, "learning_rate": 3.0392879081551917e-06, "loss": 1.8371, "step": 24819 }, { "epoch": 0.8007888512253206, "grad_norm": 0.3671875, "learning_rate": 3.0383382625671384e-06, "loss": 1.8251, "step": 24820 }, { "epoch": 0.800821115079117, "grad_norm": 0.37890625, "learning_rate": 3.0373887486444707e-06, "loss": 1.829, "step": 24821 }, { "epoch": 0.8008533789329134, "grad_norm": 0.408203125, "learning_rate": 3.0364393663976546e-06, "loss": 1.9126, "step": 24822 }, { "epoch": 0.8008856427867097, "grad_norm": 0.4140625, "learning_rate": 3.0354901158371225e-06, "loss": 1.7548, "step": 24823 }, { "epoch": 0.8009179066405061, "grad_norm": 0.400390625, "learning_rate": 3.0345409969733336e-06, "loss": 1.8613, "step": 24824 }, { "epoch": 0.8009501704943024, "grad_norm": 0.375, "learning_rate": 3.03359200981674e-06, "loss": 1.8113, "step": 24825 }, { "epoch": 0.8009824343480988, "grad_norm": 0.392578125, "learning_rate": 3.032643154377773e-06, "loss": 1.8097, "step": 24826 }, { "epoch": 0.8010146982018951, "grad_norm": 0.369140625, "learning_rate": 3.0316944306668843e-06, "loss": 1.8039, "step": 24827 }, { "epoch": 0.8010469620556915, "grad_norm": 0.392578125, "learning_rate": 3.0307458386945256e-06, "loss": 1.8333, "step": 24828 }, { "epoch": 0.8010792259094878, "grad_norm": 0.3671875, "learning_rate": 3.0297973784711225e-06, "loss": 1.8105, "step": 24829 }, { "epoch": 0.8011114897632842, "grad_norm": 0.380859375, "learning_rate": 3.0288490500071227e-06, "loss": 1.8511, "step": 24830 }, { "epoch": 0.8011437536170805, "grad_norm": 0.416015625, "learning_rate": 3.0279008533129726e-06, "loss": 1.7729, "step": 24831 }, { "epoch": 0.8011760174708769, "grad_norm": 0.373046875, "learning_rate": 3.0269527883990925e-06, "loss": 1.807, "step": 24832 }, { "epoch": 0.8012082813246731, "grad_norm": 0.3671875, "learning_rate": 3.026004855275931e-06, "loss": 1.8083, "step": 24833 }, { "epoch": 0.8012405451784695, "grad_norm": 0.376953125, "learning_rate": 3.025057053953918e-06, "loss": 1.7884, "step": 24834 }, { "epoch": 0.8012728090322658, "grad_norm": 0.373046875, "learning_rate": 3.0241093844434816e-06, "loss": 1.7821, "step": 24835 }, { "epoch": 0.8013050728860622, "grad_norm": 0.3671875, "learning_rate": 3.0231618467550613e-06, "loss": 1.8076, "step": 24836 }, { "epoch": 0.8013373367398585, "grad_norm": 0.37890625, "learning_rate": 3.0222144408990847e-06, "loss": 1.8034, "step": 24837 }, { "epoch": 0.8013696005936549, "grad_norm": 0.408203125, "learning_rate": 3.021267166885973e-06, "loss": 1.8302, "step": 24838 }, { "epoch": 0.8014018644474512, "grad_norm": 0.400390625, "learning_rate": 3.0203200247261646e-06, "loss": 1.8223, "step": 24839 }, { "epoch": 0.8014341283012476, "grad_norm": 0.37890625, "learning_rate": 3.019373014430078e-06, "loss": 1.8595, "step": 24840 }, { "epoch": 0.8014663921550439, "grad_norm": 0.384765625, "learning_rate": 3.0184261360081346e-06, "loss": 1.8203, "step": 24841 }, { "epoch": 0.8014986560088403, "grad_norm": 0.423828125, "learning_rate": 3.0174793894707657e-06, "loss": 1.7505, "step": 24842 }, { "epoch": 0.8015309198626367, "grad_norm": 0.392578125, "learning_rate": 3.016532774828387e-06, "loss": 1.8195, "step": 24843 }, { "epoch": 0.801563183716433, "grad_norm": 0.388671875, "learning_rate": 3.0155862920914133e-06, "loss": 1.8076, "step": 24844 }, { "epoch": 0.8015954475702294, "grad_norm": 0.375, "learning_rate": 3.014639941270273e-06, "loss": 1.7853, "step": 24845 }, { "epoch": 0.8016277114240257, "grad_norm": 0.388671875, "learning_rate": 3.013693722375379e-06, "loss": 1.8089, "step": 24846 }, { "epoch": 0.8016599752778221, "grad_norm": 0.37890625, "learning_rate": 3.01274763541714e-06, "loss": 1.8223, "step": 24847 }, { "epoch": 0.8016922391316184, "grad_norm": 0.37109375, "learning_rate": 3.0118016804059794e-06, "loss": 1.8263, "step": 24848 }, { "epoch": 0.8017245029854148, "grad_norm": 0.376953125, "learning_rate": 3.010855857352307e-06, "loss": 1.8547, "step": 24849 }, { "epoch": 0.801756766839211, "grad_norm": 0.404296875, "learning_rate": 3.009910166266528e-06, "loss": 1.8505, "step": 24850 }, { "epoch": 0.8017890306930074, "grad_norm": 0.412109375, "learning_rate": 3.0089646071590588e-06, "loss": 1.8785, "step": 24851 }, { "epoch": 0.8018212945468037, "grad_norm": 0.421875, "learning_rate": 3.008019180040303e-06, "loss": 1.9386, "step": 24852 }, { "epoch": 0.8018535584006001, "grad_norm": 0.4140625, "learning_rate": 3.0070738849206754e-06, "loss": 1.9321, "step": 24853 }, { "epoch": 0.8018858222543964, "grad_norm": 0.421875, "learning_rate": 3.0061287218105675e-06, "loss": 1.9046, "step": 24854 }, { "epoch": 0.8019180861081928, "grad_norm": 0.400390625, "learning_rate": 3.00518369072039e-06, "loss": 1.8972, "step": 24855 }, { "epoch": 0.8019503499619891, "grad_norm": 0.40234375, "learning_rate": 3.004238791660554e-06, "loss": 1.8756, "step": 24856 }, { "epoch": 0.8019826138157855, "grad_norm": 0.447265625, "learning_rate": 3.0032940246414433e-06, "loss": 1.8921, "step": 24857 }, { "epoch": 0.8020148776695818, "grad_norm": 0.396484375, "learning_rate": 3.0023493896734657e-06, "loss": 1.9448, "step": 24858 }, { "epoch": 0.8020471415233782, "grad_norm": 0.3828125, "learning_rate": 3.0014048867670264e-06, "loss": 1.9273, "step": 24859 }, { "epoch": 0.8020794053771745, "grad_norm": 0.388671875, "learning_rate": 3.0004605159325053e-06, "loss": 1.9082, "step": 24860 }, { "epoch": 0.8021116692309709, "grad_norm": 0.40234375, "learning_rate": 2.9995162771803082e-06, "loss": 1.8637, "step": 24861 }, { "epoch": 0.8021439330847673, "grad_norm": 0.384765625, "learning_rate": 2.9985721705208342e-06, "loss": 1.8996, "step": 24862 }, { "epoch": 0.8021761969385636, "grad_norm": 0.392578125, "learning_rate": 2.9976281959644603e-06, "loss": 1.8937, "step": 24863 }, { "epoch": 0.80220846079236, "grad_norm": 0.37890625, "learning_rate": 2.9966843535215884e-06, "loss": 1.9134, "step": 24864 }, { "epoch": 0.8022407246461563, "grad_norm": 0.4140625, "learning_rate": 2.9957406432026045e-06, "loss": 1.8579, "step": 24865 }, { "epoch": 0.8022729884999527, "grad_norm": 0.41796875, "learning_rate": 2.9947970650178914e-06, "loss": 1.8452, "step": 24866 }, { "epoch": 0.8023052523537489, "grad_norm": 0.390625, "learning_rate": 2.993853618977843e-06, "loss": 1.8306, "step": 24867 }, { "epoch": 0.8023375162075453, "grad_norm": 0.376953125, "learning_rate": 2.9929103050928418e-06, "loss": 1.8184, "step": 24868 }, { "epoch": 0.8023697800613416, "grad_norm": 0.37109375, "learning_rate": 2.991967123373266e-06, "loss": 1.8155, "step": 24869 }, { "epoch": 0.802402043915138, "grad_norm": 0.3828125, "learning_rate": 2.991024073829505e-06, "loss": 1.8325, "step": 24870 }, { "epoch": 0.8024343077689343, "grad_norm": 0.384765625, "learning_rate": 2.9900811564719365e-06, "loss": 1.8437, "step": 24871 }, { "epoch": 0.8024665716227307, "grad_norm": 0.384765625, "learning_rate": 2.989138371310935e-06, "loss": 1.8035, "step": 24872 }, { "epoch": 0.802498835476527, "grad_norm": 0.390625, "learning_rate": 2.9881957183568854e-06, "loss": 1.8665, "step": 24873 }, { "epoch": 0.8025310993303234, "grad_norm": 0.38671875, "learning_rate": 2.9872531976201595e-06, "loss": 1.8131, "step": 24874 }, { "epoch": 0.8025633631841197, "grad_norm": 0.37890625, "learning_rate": 2.9863108091111293e-06, "loss": 1.8044, "step": 24875 }, { "epoch": 0.8025956270379161, "grad_norm": 0.37109375, "learning_rate": 2.985368552840174e-06, "loss": 1.7926, "step": 24876 }, { "epoch": 0.8026278908917124, "grad_norm": 0.3671875, "learning_rate": 2.9844264288176633e-06, "loss": 1.8089, "step": 24877 }, { "epoch": 0.8026601547455088, "grad_norm": 0.3671875, "learning_rate": 2.983484437053963e-06, "loss": 1.8284, "step": 24878 }, { "epoch": 0.8026924185993051, "grad_norm": 0.37890625, "learning_rate": 2.982542577559448e-06, "loss": 1.8469, "step": 24879 }, { "epoch": 0.8027246824531015, "grad_norm": 0.376953125, "learning_rate": 2.9816008503444835e-06, "loss": 1.8473, "step": 24880 }, { "epoch": 0.8027569463068978, "grad_norm": 0.3671875, "learning_rate": 2.980659255419432e-06, "loss": 1.8203, "step": 24881 }, { "epoch": 0.8027892101606942, "grad_norm": 0.388671875, "learning_rate": 2.979717792794664e-06, "loss": 1.8474, "step": 24882 }, { "epoch": 0.8028214740144906, "grad_norm": 0.369140625, "learning_rate": 2.978776462480535e-06, "loss": 1.8725, "step": 24883 }, { "epoch": 0.8028537378682868, "grad_norm": 0.365234375, "learning_rate": 2.977835264487419e-06, "loss": 1.8491, "step": 24884 }, { "epoch": 0.8028860017220832, "grad_norm": 0.376953125, "learning_rate": 2.9768941988256587e-06, "loss": 1.8143, "step": 24885 }, { "epoch": 0.8029182655758795, "grad_norm": 0.390625, "learning_rate": 2.9759532655056225e-06, "loss": 1.7789, "step": 24886 }, { "epoch": 0.8029505294296759, "grad_norm": 0.375, "learning_rate": 2.975012464537676e-06, "loss": 1.8298, "step": 24887 }, { "epoch": 0.8029827932834722, "grad_norm": 0.384765625, "learning_rate": 2.9740717959321567e-06, "loss": 1.8189, "step": 24888 }, { "epoch": 0.8030150571372686, "grad_norm": 0.390625, "learning_rate": 2.973131259699427e-06, "loss": 1.91, "step": 24889 }, { "epoch": 0.8030473209910649, "grad_norm": 0.4609375, "learning_rate": 2.9721908558498486e-06, "loss": 1.9312, "step": 24890 }, { "epoch": 0.8030795848448613, "grad_norm": 0.392578125, "learning_rate": 2.971250584393758e-06, "loss": 1.8865, "step": 24891 }, { "epoch": 0.8031118486986576, "grad_norm": 0.396484375, "learning_rate": 2.9703104453415095e-06, "loss": 1.9098, "step": 24892 }, { "epoch": 0.803144112552454, "grad_norm": 0.404296875, "learning_rate": 2.9693704387034613e-06, "loss": 1.9191, "step": 24893 }, { "epoch": 0.8031763764062503, "grad_norm": 0.419921875, "learning_rate": 2.968430564489946e-06, "loss": 1.9033, "step": 24894 }, { "epoch": 0.8032086402600467, "grad_norm": 0.40625, "learning_rate": 2.967490822711321e-06, "loss": 1.9421, "step": 24895 }, { "epoch": 0.803240904113843, "grad_norm": 0.384765625, "learning_rate": 2.9665512133779216e-06, "loss": 1.893, "step": 24896 }, { "epoch": 0.8032731679676394, "grad_norm": 0.404296875, "learning_rate": 2.9656117365000923e-06, "loss": 1.883, "step": 24897 }, { "epoch": 0.8033054318214357, "grad_norm": 0.404296875, "learning_rate": 2.964672392088178e-06, "loss": 1.831, "step": 24898 }, { "epoch": 0.803337695675232, "grad_norm": 0.396484375, "learning_rate": 2.963733180152517e-06, "loss": 1.8345, "step": 24899 }, { "epoch": 0.8033699595290283, "grad_norm": 0.373046875, "learning_rate": 2.962794100703441e-06, "loss": 1.8285, "step": 24900 }, { "epoch": 0.8034022233828247, "grad_norm": 0.37109375, "learning_rate": 2.961855153751298e-06, "loss": 1.8536, "step": 24901 }, { "epoch": 0.8034344872366211, "grad_norm": 0.384765625, "learning_rate": 2.960916339306416e-06, "loss": 1.8468, "step": 24902 }, { "epoch": 0.8034667510904174, "grad_norm": 0.380859375, "learning_rate": 2.959977657379127e-06, "loss": 1.8409, "step": 24903 }, { "epoch": 0.8034990149442138, "grad_norm": 0.37890625, "learning_rate": 2.9590391079797686e-06, "loss": 1.8284, "step": 24904 }, { "epoch": 0.8035312787980101, "grad_norm": 0.375, "learning_rate": 2.958100691118672e-06, "loss": 1.8377, "step": 24905 }, { "epoch": 0.8035635426518065, "grad_norm": 0.37890625, "learning_rate": 2.9571624068061594e-06, "loss": 1.8561, "step": 24906 }, { "epoch": 0.8035958065056028, "grad_norm": 0.404296875, "learning_rate": 2.956224255052567e-06, "loss": 1.8192, "step": 24907 }, { "epoch": 0.8036280703593992, "grad_norm": 0.376953125, "learning_rate": 2.955286235868217e-06, "loss": 1.8455, "step": 24908 }, { "epoch": 0.8036603342131955, "grad_norm": 0.3671875, "learning_rate": 2.9543483492634325e-06, "loss": 1.8499, "step": 24909 }, { "epoch": 0.8036925980669919, "grad_norm": 0.40234375, "learning_rate": 2.953410595248544e-06, "loss": 1.8124, "step": 24910 }, { "epoch": 0.8037248619207882, "grad_norm": 0.392578125, "learning_rate": 2.9524729738338694e-06, "loss": 1.815, "step": 24911 }, { "epoch": 0.8037571257745846, "grad_norm": 0.388671875, "learning_rate": 2.9515354850297244e-06, "loss": 1.825, "step": 24912 }, { "epoch": 0.8037893896283809, "grad_norm": 0.369140625, "learning_rate": 2.9505981288464385e-06, "loss": 1.8703, "step": 24913 }, { "epoch": 0.8038216534821773, "grad_norm": 0.376953125, "learning_rate": 2.9496609052943235e-06, "loss": 1.8272, "step": 24914 }, { "epoch": 0.8038539173359736, "grad_norm": 0.390625, "learning_rate": 2.9487238143836966e-06, "loss": 1.8295, "step": 24915 }, { "epoch": 0.80388618118977, "grad_norm": 0.40234375, "learning_rate": 2.947786856124869e-06, "loss": 1.7965, "step": 24916 }, { "epoch": 0.8039184450435662, "grad_norm": 0.380859375, "learning_rate": 2.946850030528156e-06, "loss": 1.8447, "step": 24917 }, { "epoch": 0.8039507088973626, "grad_norm": 0.38671875, "learning_rate": 2.9459133376038795e-06, "loss": 1.828, "step": 24918 }, { "epoch": 0.8039829727511589, "grad_norm": 0.376953125, "learning_rate": 2.944976777362334e-06, "loss": 1.8175, "step": 24919 }, { "epoch": 0.8040152366049553, "grad_norm": 0.412109375, "learning_rate": 2.944040349813836e-06, "loss": 1.7902, "step": 24920 }, { "epoch": 0.8040475004587516, "grad_norm": 0.375, "learning_rate": 2.9431040549687007e-06, "loss": 1.8408, "step": 24921 }, { "epoch": 0.804079764312548, "grad_norm": 0.388671875, "learning_rate": 2.9421678928372188e-06, "loss": 1.8066, "step": 24922 }, { "epoch": 0.8041120281663444, "grad_norm": 0.369140625, "learning_rate": 2.9412318634297053e-06, "loss": 1.8455, "step": 24923 }, { "epoch": 0.8041442920201407, "grad_norm": 0.3828125, "learning_rate": 2.9402959667564606e-06, "loss": 1.8194, "step": 24924 }, { "epoch": 0.8041765558739371, "grad_norm": 0.365234375, "learning_rate": 2.939360202827783e-06, "loss": 1.8624, "step": 24925 }, { "epoch": 0.8042088197277334, "grad_norm": 0.388671875, "learning_rate": 2.9384245716539806e-06, "loss": 1.8072, "step": 24926 }, { "epoch": 0.8042410835815298, "grad_norm": 0.388671875, "learning_rate": 2.937489073245347e-06, "loss": 1.8017, "step": 24927 }, { "epoch": 0.8042733474353261, "grad_norm": 0.3828125, "learning_rate": 2.9365537076121756e-06, "loss": 1.8019, "step": 24928 }, { "epoch": 0.8043056112891225, "grad_norm": 0.380859375, "learning_rate": 2.93561847476477e-06, "loss": 1.807, "step": 24929 }, { "epoch": 0.8043378751429188, "grad_norm": 0.37890625, "learning_rate": 2.9346833747134237e-06, "loss": 1.8171, "step": 24930 }, { "epoch": 0.8043701389967152, "grad_norm": 0.392578125, "learning_rate": 2.933748407468422e-06, "loss": 1.7755, "step": 24931 }, { "epoch": 0.8044024028505115, "grad_norm": 0.38671875, "learning_rate": 2.932813573040064e-06, "loss": 1.8101, "step": 24932 }, { "epoch": 0.8044346667043079, "grad_norm": 0.375, "learning_rate": 2.931878871438639e-06, "loss": 1.7585, "step": 24933 }, { "epoch": 0.8044669305581041, "grad_norm": 0.390625, "learning_rate": 2.9309443026744302e-06, "loss": 1.7945, "step": 24934 }, { "epoch": 0.8044991944119005, "grad_norm": 0.380859375, "learning_rate": 2.9300098667577314e-06, "loss": 1.8626, "step": 24935 }, { "epoch": 0.8045314582656968, "grad_norm": 0.404296875, "learning_rate": 2.929075563698824e-06, "loss": 1.8693, "step": 24936 }, { "epoch": 0.8045637221194932, "grad_norm": 0.380859375, "learning_rate": 2.9281413935079907e-06, "loss": 1.7876, "step": 24937 }, { "epoch": 0.8045959859732895, "grad_norm": 0.390625, "learning_rate": 2.92720735619552e-06, "loss": 1.7964, "step": 24938 }, { "epoch": 0.8046282498270859, "grad_norm": 0.39453125, "learning_rate": 2.9262734517716887e-06, "loss": 1.8065, "step": 24939 }, { "epoch": 0.8046605136808822, "grad_norm": 0.376953125, "learning_rate": 2.9253396802467747e-06, "loss": 1.8094, "step": 24940 }, { "epoch": 0.8046927775346786, "grad_norm": 0.392578125, "learning_rate": 2.9244060416310642e-06, "loss": 1.8023, "step": 24941 }, { "epoch": 0.8047250413884749, "grad_norm": 0.369140625, "learning_rate": 2.9234725359348275e-06, "loss": 1.8416, "step": 24942 }, { "epoch": 0.8047573052422713, "grad_norm": 0.384765625, "learning_rate": 2.922539163168338e-06, "loss": 1.7507, "step": 24943 }, { "epoch": 0.8047895690960677, "grad_norm": 0.376953125, "learning_rate": 2.9216059233418774e-06, "loss": 1.7983, "step": 24944 }, { "epoch": 0.804821832949864, "grad_norm": 0.400390625, "learning_rate": 2.9206728164657126e-06, "loss": 1.7984, "step": 24945 }, { "epoch": 0.8048540968036604, "grad_norm": 0.373046875, "learning_rate": 2.919739842550117e-06, "loss": 1.8164, "step": 24946 }, { "epoch": 0.8048863606574567, "grad_norm": 0.400390625, "learning_rate": 2.9188070016053543e-06, "loss": 1.7787, "step": 24947 }, { "epoch": 0.8049186245112531, "grad_norm": 0.365234375, "learning_rate": 2.9178742936417006e-06, "loss": 1.7978, "step": 24948 }, { "epoch": 0.8049508883650494, "grad_norm": 0.392578125, "learning_rate": 2.91694171866942e-06, "loss": 1.8095, "step": 24949 }, { "epoch": 0.8049831522188458, "grad_norm": 0.40234375, "learning_rate": 2.916009276698773e-06, "loss": 1.8321, "step": 24950 }, { "epoch": 0.805015416072642, "grad_norm": 0.369140625, "learning_rate": 2.915076967740025e-06, "loss": 1.8035, "step": 24951 }, { "epoch": 0.8050476799264384, "grad_norm": 0.37109375, "learning_rate": 2.914144791803449e-06, "loss": 1.783, "step": 24952 }, { "epoch": 0.8050799437802347, "grad_norm": 0.373046875, "learning_rate": 2.9132127488992898e-06, "loss": 1.8063, "step": 24953 }, { "epoch": 0.8051122076340311, "grad_norm": 0.400390625, "learning_rate": 2.912280839037816e-06, "loss": 1.8713, "step": 24954 }, { "epoch": 0.8051444714878274, "grad_norm": 0.3828125, "learning_rate": 2.9113490622292836e-06, "loss": 1.8026, "step": 24955 }, { "epoch": 0.8051767353416238, "grad_norm": 0.384765625, "learning_rate": 2.9104174184839455e-06, "loss": 1.8198, "step": 24956 }, { "epoch": 0.8052089991954201, "grad_norm": 0.39453125, "learning_rate": 2.909485907812061e-06, "loss": 1.7897, "step": 24957 }, { "epoch": 0.8052412630492165, "grad_norm": 0.392578125, "learning_rate": 2.9085545302238844e-06, "loss": 1.8283, "step": 24958 }, { "epoch": 0.8052735269030128, "grad_norm": 0.435546875, "learning_rate": 2.9076232857296596e-06, "loss": 1.8489, "step": 24959 }, { "epoch": 0.8053057907568092, "grad_norm": 0.380859375, "learning_rate": 2.9066921743396478e-06, "loss": 1.8476, "step": 24960 }, { "epoch": 0.8053380546106055, "grad_norm": 0.376953125, "learning_rate": 2.9057611960640925e-06, "loss": 1.7946, "step": 24961 }, { "epoch": 0.8053703184644019, "grad_norm": 0.400390625, "learning_rate": 2.904830350913236e-06, "loss": 1.7902, "step": 24962 }, { "epoch": 0.8054025823181983, "grad_norm": 0.3828125, "learning_rate": 2.9038996388973366e-06, "loss": 1.8254, "step": 24963 }, { "epoch": 0.8054348461719946, "grad_norm": 0.390625, "learning_rate": 2.9029690600266303e-06, "loss": 1.7931, "step": 24964 }, { "epoch": 0.805467110025791, "grad_norm": 0.376953125, "learning_rate": 2.9020386143113587e-06, "loss": 1.8137, "step": 24965 }, { "epoch": 0.8054993738795873, "grad_norm": 0.3984375, "learning_rate": 2.9011083017617706e-06, "loss": 1.8642, "step": 24966 }, { "epoch": 0.8055316377333837, "grad_norm": 0.39453125, "learning_rate": 2.900178122388103e-06, "loss": 1.9082, "step": 24967 }, { "epoch": 0.8055639015871799, "grad_norm": 0.3984375, "learning_rate": 2.8992480762005906e-06, "loss": 1.9108, "step": 24968 }, { "epoch": 0.8055961654409763, "grad_norm": 0.392578125, "learning_rate": 2.898318163209479e-06, "loss": 1.869, "step": 24969 }, { "epoch": 0.8056284292947726, "grad_norm": 0.384765625, "learning_rate": 2.8973883834249982e-06, "loss": 1.8671, "step": 24970 }, { "epoch": 0.805660693148569, "grad_norm": 0.39453125, "learning_rate": 2.89645873685738e-06, "loss": 1.9311, "step": 24971 }, { "epoch": 0.8056929570023653, "grad_norm": 0.45703125, "learning_rate": 2.8955292235168657e-06, "loss": 1.9439, "step": 24972 }, { "epoch": 0.8057252208561617, "grad_norm": 0.427734375, "learning_rate": 2.8945998434136813e-06, "loss": 1.9141, "step": 24973 }, { "epoch": 0.805757484709958, "grad_norm": 0.421875, "learning_rate": 2.893670596558056e-06, "loss": 1.9902, "step": 24974 }, { "epoch": 0.8057897485637544, "grad_norm": 0.5234375, "learning_rate": 2.892741482960218e-06, "loss": 2.0317, "step": 24975 }, { "epoch": 0.8058220124175507, "grad_norm": 0.5234375, "learning_rate": 2.8918125026303994e-06, "loss": 2.0381, "step": 24976 }, { "epoch": 0.8058542762713471, "grad_norm": 0.419921875, "learning_rate": 2.890883655578821e-06, "loss": 2.0419, "step": 24977 }, { "epoch": 0.8058865401251434, "grad_norm": 0.462890625, "learning_rate": 2.889954941815706e-06, "loss": 1.9887, "step": 24978 }, { "epoch": 0.8059188039789398, "grad_norm": 0.5546875, "learning_rate": 2.8890263613512836e-06, "loss": 2.0404, "step": 24979 }, { "epoch": 0.8059510678327361, "grad_norm": 0.5234375, "learning_rate": 2.88809791419577e-06, "loss": 2.0519, "step": 24980 }, { "epoch": 0.8059833316865325, "grad_norm": 0.423828125, "learning_rate": 2.88716960035938e-06, "loss": 1.9801, "step": 24981 }, { "epoch": 0.8060155955403288, "grad_norm": 0.39453125, "learning_rate": 2.886241419852343e-06, "loss": 2.0617, "step": 24982 }, { "epoch": 0.8060478593941252, "grad_norm": 0.40625, "learning_rate": 2.88531337268487e-06, "loss": 2.0177, "step": 24983 }, { "epoch": 0.8060801232479216, "grad_norm": 0.494140625, "learning_rate": 2.884385458867172e-06, "loss": 2.0209, "step": 24984 }, { "epoch": 0.8061123871017178, "grad_norm": 0.431640625, "learning_rate": 2.88345767840947e-06, "loss": 2.0891, "step": 24985 }, { "epoch": 0.8061446509555142, "grad_norm": 0.44921875, "learning_rate": 2.8825300313219747e-06, "loss": 2.0415, "step": 24986 }, { "epoch": 0.8061769148093105, "grad_norm": 0.390625, "learning_rate": 2.881602517614893e-06, "loss": 2.0575, "step": 24987 }, { "epoch": 0.8062091786631069, "grad_norm": 0.3984375, "learning_rate": 2.8806751372984395e-06, "loss": 2.0311, "step": 24988 }, { "epoch": 0.8062414425169032, "grad_norm": 0.41015625, "learning_rate": 2.87974789038282e-06, "loss": 2.0474, "step": 24989 }, { "epoch": 0.8062737063706996, "grad_norm": 0.4453125, "learning_rate": 2.8788207768782375e-06, "loss": 2.0421, "step": 24990 }, { "epoch": 0.8063059702244959, "grad_norm": 0.421875, "learning_rate": 2.8778937967949027e-06, "loss": 2.026, "step": 24991 }, { "epoch": 0.8063382340782923, "grad_norm": 0.44140625, "learning_rate": 2.8769669501430185e-06, "loss": 2.0615, "step": 24992 }, { "epoch": 0.8063704979320886, "grad_norm": 0.388671875, "learning_rate": 2.876040236932779e-06, "loss": 2.1085, "step": 24993 }, { "epoch": 0.806402761785885, "grad_norm": 0.435546875, "learning_rate": 2.875113657174397e-06, "loss": 2.018, "step": 24994 }, { "epoch": 0.8064350256396813, "grad_norm": 0.388671875, "learning_rate": 2.874187210878063e-06, "loss": 2.0314, "step": 24995 }, { "epoch": 0.8064672894934777, "grad_norm": 0.42578125, "learning_rate": 2.873260898053976e-06, "loss": 2.0482, "step": 24996 }, { "epoch": 0.806499553347274, "grad_norm": 0.44921875, "learning_rate": 2.8723347187123356e-06, "loss": 1.9867, "step": 24997 }, { "epoch": 0.8065318172010704, "grad_norm": 0.39453125, "learning_rate": 2.871408672863334e-06, "loss": 2.0425, "step": 24998 }, { "epoch": 0.8065640810548667, "grad_norm": 0.380859375, "learning_rate": 2.8704827605171606e-06, "loss": 2.0201, "step": 24999 }, { "epoch": 0.806596344908663, "grad_norm": 0.373046875, "learning_rate": 2.8695569816840165e-06, "loss": 2.0531, "step": 25000 }, { "epoch": 0.8066286087624593, "grad_norm": 0.380859375, "learning_rate": 2.8686313363740867e-06, "loss": 2.0332, "step": 25001 }, { "epoch": 0.8066608726162557, "grad_norm": 0.396484375, "learning_rate": 2.8677058245975564e-06, "loss": 2.0096, "step": 25002 }, { "epoch": 0.8066931364700521, "grad_norm": 0.392578125, "learning_rate": 2.8667804463646187e-06, "loss": 2.0191, "step": 25003 }, { "epoch": 0.8067254003238484, "grad_norm": 0.408203125, "learning_rate": 2.865855201685459e-06, "loss": 2.0084, "step": 25004 }, { "epoch": 0.8067576641776448, "grad_norm": 0.373046875, "learning_rate": 2.8649300905702607e-06, "loss": 2.0719, "step": 25005 }, { "epoch": 0.8067899280314411, "grad_norm": 0.376953125, "learning_rate": 2.8640051130292013e-06, "loss": 2.0512, "step": 25006 }, { "epoch": 0.8068221918852375, "grad_norm": 0.380859375, "learning_rate": 2.8630802690724718e-06, "loss": 2.0497, "step": 25007 }, { "epoch": 0.8068544557390338, "grad_norm": 0.400390625, "learning_rate": 2.8621555587102475e-06, "loss": 2.0312, "step": 25008 }, { "epoch": 0.8068867195928302, "grad_norm": 0.396484375, "learning_rate": 2.8612309819527027e-06, "loss": 2.0517, "step": 25009 }, { "epoch": 0.8069189834466265, "grad_norm": 0.37109375, "learning_rate": 2.8603065388100237e-06, "loss": 2.0389, "step": 25010 }, { "epoch": 0.8069512473004229, "grad_norm": 0.365234375, "learning_rate": 2.859382229292382e-06, "loss": 2.051, "step": 25011 }, { "epoch": 0.8069835111542192, "grad_norm": 0.376953125, "learning_rate": 2.8584580534099453e-06, "loss": 2.053, "step": 25012 }, { "epoch": 0.8070157750080156, "grad_norm": 0.376953125, "learning_rate": 2.8575340111728963e-06, "loss": 2.0345, "step": 25013 }, { "epoch": 0.8070480388618119, "grad_norm": 0.375, "learning_rate": 2.8566101025914025e-06, "loss": 2.0719, "step": 25014 }, { "epoch": 0.8070803027156083, "grad_norm": 0.3828125, "learning_rate": 2.855686327675631e-06, "loss": 2.0669, "step": 25015 }, { "epoch": 0.8071125665694046, "grad_norm": 0.380859375, "learning_rate": 2.854762686435754e-06, "loss": 2.0425, "step": 25016 }, { "epoch": 0.807144830423201, "grad_norm": 0.376953125, "learning_rate": 2.8538391788819363e-06, "loss": 2.0592, "step": 25017 }, { "epoch": 0.8071770942769972, "grad_norm": 0.375, "learning_rate": 2.8529158050243394e-06, "loss": 2.0554, "step": 25018 }, { "epoch": 0.8072093581307936, "grad_norm": 0.380859375, "learning_rate": 2.851992564873135e-06, "loss": 2.037, "step": 25019 }, { "epoch": 0.8072416219845899, "grad_norm": 0.37890625, "learning_rate": 2.8510694584384815e-06, "loss": 2.0509, "step": 25020 }, { "epoch": 0.8072738858383863, "grad_norm": 0.412109375, "learning_rate": 2.8501464857305367e-06, "loss": 2.0666, "step": 25021 }, { "epoch": 0.8073061496921826, "grad_norm": 0.37109375, "learning_rate": 2.849223646759466e-06, "loss": 2.0684, "step": 25022 }, { "epoch": 0.807338413545979, "grad_norm": 0.380859375, "learning_rate": 2.8483009415354263e-06, "loss": 2.0399, "step": 25023 }, { "epoch": 0.8073706773997754, "grad_norm": 0.390625, "learning_rate": 2.847378370068566e-06, "loss": 2.0482, "step": 25024 }, { "epoch": 0.8074029412535717, "grad_norm": 0.384765625, "learning_rate": 2.846455932369052e-06, "loss": 2.0121, "step": 25025 }, { "epoch": 0.8074352051073681, "grad_norm": 0.365234375, "learning_rate": 2.8455336284470302e-06, "loss": 2.045, "step": 25026 }, { "epoch": 0.8074674689611644, "grad_norm": 0.376953125, "learning_rate": 2.8446114583126527e-06, "loss": 2.0439, "step": 25027 }, { "epoch": 0.8074997328149608, "grad_norm": 0.38671875, "learning_rate": 2.8436894219760746e-06, "loss": 2.0361, "step": 25028 }, { "epoch": 0.8075319966687571, "grad_norm": 0.38671875, "learning_rate": 2.8427675194474428e-06, "loss": 2.0142, "step": 25029 }, { "epoch": 0.8075642605225535, "grad_norm": 0.40234375, "learning_rate": 2.841845750736901e-06, "loss": 2.0176, "step": 25030 }, { "epoch": 0.8075965243763498, "grad_norm": 0.3828125, "learning_rate": 2.840924115854603e-06, "loss": 2.0443, "step": 25031 }, { "epoch": 0.8076287882301462, "grad_norm": 0.357421875, "learning_rate": 2.84000261481069e-06, "loss": 2.0559, "step": 25032 }, { "epoch": 0.8076610520839425, "grad_norm": 0.375, "learning_rate": 2.8390812476152995e-06, "loss": 2.0293, "step": 25033 }, { "epoch": 0.8076933159377389, "grad_norm": 0.392578125, "learning_rate": 2.838160014278583e-06, "loss": 2.0449, "step": 25034 }, { "epoch": 0.8077255797915351, "grad_norm": 0.38671875, "learning_rate": 2.837238914810676e-06, "loss": 2.0263, "step": 25035 }, { "epoch": 0.8077578436453315, "grad_norm": 0.3828125, "learning_rate": 2.836317949221717e-06, "loss": 2.0864, "step": 25036 }, { "epoch": 0.8077901074991278, "grad_norm": 0.373046875, "learning_rate": 2.835397117521841e-06, "loss": 2.0451, "step": 25037 }, { "epoch": 0.8078223713529242, "grad_norm": 0.365234375, "learning_rate": 2.83447641972119e-06, "loss": 2.0675, "step": 25038 }, { "epoch": 0.8078546352067205, "grad_norm": 0.353515625, "learning_rate": 2.833555855829894e-06, "loss": 2.0748, "step": 25039 }, { "epoch": 0.8078868990605169, "grad_norm": 0.3671875, "learning_rate": 2.832635425858084e-06, "loss": 2.0211, "step": 25040 }, { "epoch": 0.8079191629143132, "grad_norm": 0.37890625, "learning_rate": 2.8317151298158995e-06, "loss": 2.0707, "step": 25041 }, { "epoch": 0.8079514267681096, "grad_norm": 0.375, "learning_rate": 2.8307949677134645e-06, "loss": 2.0218, "step": 25042 }, { "epoch": 0.8079836906219059, "grad_norm": 0.36328125, "learning_rate": 2.8298749395609036e-06, "loss": 1.985, "step": 25043 }, { "epoch": 0.8080159544757023, "grad_norm": 0.361328125, "learning_rate": 2.828955045368353e-06, "loss": 2.0227, "step": 25044 }, { "epoch": 0.8080482183294987, "grad_norm": 0.365234375, "learning_rate": 2.8280352851459335e-06, "loss": 2.016, "step": 25045 }, { "epoch": 0.808080482183295, "grad_norm": 0.37109375, "learning_rate": 2.827115658903766e-06, "loss": 2.0446, "step": 25046 }, { "epoch": 0.8081127460370914, "grad_norm": 0.35546875, "learning_rate": 2.8261961666519807e-06, "loss": 2.0523, "step": 25047 }, { "epoch": 0.8081450098908877, "grad_norm": 0.376953125, "learning_rate": 2.825276808400695e-06, "loss": 2.0347, "step": 25048 }, { "epoch": 0.8081772737446841, "grad_norm": 0.3984375, "learning_rate": 2.8243575841600226e-06, "loss": 2.0312, "step": 25049 }, { "epoch": 0.8082095375984804, "grad_norm": 0.349609375, "learning_rate": 2.823438493940092e-06, "loss": 2.0032, "step": 25050 }, { "epoch": 0.8082418014522768, "grad_norm": 0.439453125, "learning_rate": 2.8225195377510143e-06, "loss": 2.1163, "step": 25051 }, { "epoch": 0.808274065306073, "grad_norm": 0.43359375, "learning_rate": 2.8216007156029015e-06, "loss": 2.05, "step": 25052 }, { "epoch": 0.8083063291598694, "grad_norm": 0.421875, "learning_rate": 2.820682027505876e-06, "loss": 2.0519, "step": 25053 }, { "epoch": 0.8083385930136657, "grad_norm": 0.419921875, "learning_rate": 2.8197634734700457e-06, "loss": 2.057, "step": 25054 }, { "epoch": 0.8083708568674621, "grad_norm": 0.431640625, "learning_rate": 2.818845053505516e-06, "loss": 2.0743, "step": 25055 }, { "epoch": 0.8084031207212584, "grad_norm": 0.396484375, "learning_rate": 2.8179267676224057e-06, "loss": 2.0599, "step": 25056 }, { "epoch": 0.8084353845750548, "grad_norm": 0.400390625, "learning_rate": 2.8170086158308185e-06, "loss": 2.0647, "step": 25057 }, { "epoch": 0.8084676484288511, "grad_norm": 0.419921875, "learning_rate": 2.8160905981408557e-06, "loss": 2.0375, "step": 25058 }, { "epoch": 0.8084999122826475, "grad_norm": 0.392578125, "learning_rate": 2.8151727145626316e-06, "loss": 2.0519, "step": 25059 }, { "epoch": 0.8085321761364438, "grad_norm": 0.392578125, "learning_rate": 2.8142549651062445e-06, "loss": 2.0275, "step": 25060 }, { "epoch": 0.8085644399902402, "grad_norm": 0.419921875, "learning_rate": 2.813337349781793e-06, "loss": 2.0743, "step": 25061 }, { "epoch": 0.8085967038440365, "grad_norm": 0.421875, "learning_rate": 2.812419868599385e-06, "loss": 2.051, "step": 25062 }, { "epoch": 0.8086289676978329, "grad_norm": 0.38671875, "learning_rate": 2.8115025215691153e-06, "loss": 2.0444, "step": 25063 }, { "epoch": 0.8086612315516293, "grad_norm": 0.39453125, "learning_rate": 2.8105853087010785e-06, "loss": 2.0123, "step": 25064 }, { "epoch": 0.8086934954054256, "grad_norm": 0.390625, "learning_rate": 2.8096682300053784e-06, "loss": 1.9241, "step": 25065 }, { "epoch": 0.808725759259222, "grad_norm": 0.375, "learning_rate": 2.808751285492105e-06, "loss": 1.913, "step": 25066 }, { "epoch": 0.8087580231130183, "grad_norm": 0.373046875, "learning_rate": 2.8078344751713504e-06, "loss": 1.8982, "step": 25067 }, { "epoch": 0.8087902869668147, "grad_norm": 0.41015625, "learning_rate": 2.8069177990532047e-06, "loss": 1.8859, "step": 25068 }, { "epoch": 0.8088225508206109, "grad_norm": 0.427734375, "learning_rate": 2.8060012571477633e-06, "loss": 1.8809, "step": 25069 }, { "epoch": 0.8088548146744073, "grad_norm": 0.373046875, "learning_rate": 2.805084849465113e-06, "loss": 1.8809, "step": 25070 }, { "epoch": 0.8088870785282036, "grad_norm": 0.3828125, "learning_rate": 2.804168576015336e-06, "loss": 1.9461, "step": 25071 }, { "epoch": 0.808919342382, "grad_norm": 0.3984375, "learning_rate": 2.803252436808526e-06, "loss": 1.8321, "step": 25072 }, { "epoch": 0.8089516062357963, "grad_norm": 0.3671875, "learning_rate": 2.802336431854763e-06, "loss": 1.8437, "step": 25073 }, { "epoch": 0.8089838700895927, "grad_norm": 0.359375, "learning_rate": 2.8014205611641253e-06, "loss": 1.9099, "step": 25074 }, { "epoch": 0.809016133943389, "grad_norm": 0.380859375, "learning_rate": 2.8005048247467034e-06, "loss": 1.9477, "step": 25075 }, { "epoch": 0.8090483977971854, "grad_norm": 0.37109375, "learning_rate": 2.799589222612573e-06, "loss": 1.9253, "step": 25076 }, { "epoch": 0.8090806616509817, "grad_norm": 0.37109375, "learning_rate": 2.7986737547718074e-06, "loss": 1.9008, "step": 25077 }, { "epoch": 0.8091129255047781, "grad_norm": 0.37890625, "learning_rate": 2.797758421234493e-06, "loss": 1.8835, "step": 25078 }, { "epoch": 0.8091451893585744, "grad_norm": 0.392578125, "learning_rate": 2.7968432220106975e-06, "loss": 1.8699, "step": 25079 }, { "epoch": 0.8091774532123708, "grad_norm": 0.369140625, "learning_rate": 2.795928157110496e-06, "loss": 1.8353, "step": 25080 }, { "epoch": 0.8092097170661671, "grad_norm": 0.390625, "learning_rate": 2.7950132265439652e-06, "loss": 1.908, "step": 25081 }, { "epoch": 0.8092419809199635, "grad_norm": 0.384765625, "learning_rate": 2.7940984303211725e-06, "loss": 1.8523, "step": 25082 }, { "epoch": 0.8092742447737598, "grad_norm": 0.37109375, "learning_rate": 2.7931837684521845e-06, "loss": 1.9247, "step": 25083 }, { "epoch": 0.8093065086275562, "grad_norm": 0.380859375, "learning_rate": 2.792269240947076e-06, "loss": 1.9651, "step": 25084 }, { "epoch": 0.8093387724813526, "grad_norm": 0.384765625, "learning_rate": 2.7913548478159103e-06, "loss": 1.9217, "step": 25085 }, { "epoch": 0.8093710363351488, "grad_norm": 0.37890625, "learning_rate": 2.790440589068748e-06, "loss": 1.9143, "step": 25086 }, { "epoch": 0.8094033001889452, "grad_norm": 0.3671875, "learning_rate": 2.78952646471566e-06, "loss": 1.9126, "step": 25087 }, { "epoch": 0.8094355640427415, "grad_norm": 0.41015625, "learning_rate": 2.7886124747667046e-06, "loss": 1.9174, "step": 25088 }, { "epoch": 0.8094678278965379, "grad_norm": 0.376953125, "learning_rate": 2.787698619231941e-06, "loss": 1.8947, "step": 25089 }, { "epoch": 0.8095000917503342, "grad_norm": 0.37890625, "learning_rate": 2.786784898121432e-06, "loss": 1.9071, "step": 25090 }, { "epoch": 0.8095323556041306, "grad_norm": 0.396484375, "learning_rate": 2.7858713114452324e-06, "loss": 1.8862, "step": 25091 }, { "epoch": 0.8095646194579269, "grad_norm": 0.359375, "learning_rate": 2.784957859213396e-06, "loss": 1.8616, "step": 25092 }, { "epoch": 0.8095968833117233, "grad_norm": 0.4609375, "learning_rate": 2.7840445414359844e-06, "loss": 1.8537, "step": 25093 }, { "epoch": 0.8096291471655196, "grad_norm": 0.361328125, "learning_rate": 2.7831313581230473e-06, "loss": 1.9095, "step": 25094 }, { "epoch": 0.809661411019316, "grad_norm": 0.37890625, "learning_rate": 2.7822183092846315e-06, "loss": 1.8858, "step": 25095 }, { "epoch": 0.8096936748731123, "grad_norm": 0.375, "learning_rate": 2.781305394930796e-06, "loss": 1.9041, "step": 25096 }, { "epoch": 0.8097259387269087, "grad_norm": 0.361328125, "learning_rate": 2.780392615071584e-06, "loss": 1.9141, "step": 25097 }, { "epoch": 0.809758202580705, "grad_norm": 0.35546875, "learning_rate": 2.7794799697170446e-06, "loss": 1.9196, "step": 25098 }, { "epoch": 0.8097904664345014, "grad_norm": 0.4140625, "learning_rate": 2.778567458877218e-06, "loss": 1.9707, "step": 25099 }, { "epoch": 0.8098227302882977, "grad_norm": 0.3671875, "learning_rate": 2.7776550825621576e-06, "loss": 1.9135, "step": 25100 }, { "epoch": 0.809854994142094, "grad_norm": 0.361328125, "learning_rate": 2.7767428407819025e-06, "loss": 1.8987, "step": 25101 }, { "epoch": 0.8098872579958903, "grad_norm": 0.3671875, "learning_rate": 2.775830733546489e-06, "loss": 1.9175, "step": 25102 }, { "epoch": 0.8099195218496867, "grad_norm": 0.37890625, "learning_rate": 2.7749187608659643e-06, "loss": 1.9144, "step": 25103 }, { "epoch": 0.809951785703483, "grad_norm": 0.38671875, "learning_rate": 2.774006922750364e-06, "loss": 1.9623, "step": 25104 }, { "epoch": 0.8099840495572794, "grad_norm": 0.361328125, "learning_rate": 2.7730952192097214e-06, "loss": 1.9414, "step": 25105 }, { "epoch": 0.8100163134110758, "grad_norm": 0.390625, "learning_rate": 2.772183650254077e-06, "loss": 1.9231, "step": 25106 }, { "epoch": 0.8100485772648721, "grad_norm": 0.3515625, "learning_rate": 2.7712722158934645e-06, "loss": 1.9443, "step": 25107 }, { "epoch": 0.8100808411186685, "grad_norm": 0.361328125, "learning_rate": 2.770360916137911e-06, "loss": 1.9155, "step": 25108 }, { "epoch": 0.8101131049724648, "grad_norm": 0.357421875, "learning_rate": 2.769449750997454e-06, "loss": 1.9355, "step": 25109 }, { "epoch": 0.8101453688262612, "grad_norm": 0.365234375, "learning_rate": 2.7685387204821207e-06, "loss": 1.9212, "step": 25110 }, { "epoch": 0.8101776326800575, "grad_norm": 0.40234375, "learning_rate": 2.767627824601933e-06, "loss": 1.9714, "step": 25111 }, { "epoch": 0.8102098965338539, "grad_norm": 0.384765625, "learning_rate": 2.766717063366929e-06, "loss": 2.0164, "step": 25112 }, { "epoch": 0.8102421603876502, "grad_norm": 0.376953125, "learning_rate": 2.7658064367871256e-06, "loss": 1.9809, "step": 25113 }, { "epoch": 0.8102744242414466, "grad_norm": 0.357421875, "learning_rate": 2.7648959448725457e-06, "loss": 2.0143, "step": 25114 }, { "epoch": 0.8103066880952429, "grad_norm": 0.349609375, "learning_rate": 2.763985587633215e-06, "loss": 1.9878, "step": 25115 }, { "epoch": 0.8103389519490393, "grad_norm": 0.482421875, "learning_rate": 2.7630753650791562e-06, "loss": 1.9892, "step": 25116 }, { "epoch": 0.8103712158028356, "grad_norm": 0.35546875, "learning_rate": 2.7621652772203786e-06, "loss": 1.9806, "step": 25117 }, { "epoch": 0.810403479656632, "grad_norm": 0.361328125, "learning_rate": 2.7612553240669112e-06, "loss": 1.9952, "step": 25118 }, { "epoch": 0.8104357435104282, "grad_norm": 0.357421875, "learning_rate": 2.7603455056287664e-06, "loss": 1.9639, "step": 25119 }, { "epoch": 0.8104680073642246, "grad_norm": 0.359375, "learning_rate": 2.7594358219159522e-06, "loss": 1.9607, "step": 25120 }, { "epoch": 0.8105002712180209, "grad_norm": 0.359375, "learning_rate": 2.758526272938491e-06, "loss": 1.9775, "step": 25121 }, { "epoch": 0.8105325350718173, "grad_norm": 0.36328125, "learning_rate": 2.757616858706391e-06, "loss": 1.9607, "step": 25122 }, { "epoch": 0.8105647989256136, "grad_norm": 0.365234375, "learning_rate": 2.756707579229658e-06, "loss": 1.9622, "step": 25123 }, { "epoch": 0.81059706277941, "grad_norm": 0.423828125, "learning_rate": 2.7557984345183075e-06, "loss": 2.0022, "step": 25124 }, { "epoch": 0.8106293266332064, "grad_norm": 0.359375, "learning_rate": 2.754889424582346e-06, "loss": 1.9991, "step": 25125 }, { "epoch": 0.8106615904870027, "grad_norm": 0.35546875, "learning_rate": 2.7539805494317753e-06, "loss": 1.9728, "step": 25126 }, { "epoch": 0.8106938543407991, "grad_norm": 0.359375, "learning_rate": 2.753071809076598e-06, "loss": 1.9894, "step": 25127 }, { "epoch": 0.8107261181945954, "grad_norm": 0.3671875, "learning_rate": 2.7521632035268245e-06, "loss": 2.008, "step": 25128 }, { "epoch": 0.8107583820483918, "grad_norm": 0.38671875, "learning_rate": 2.7512547327924513e-06, "loss": 1.9798, "step": 25129 }, { "epoch": 0.8107906459021881, "grad_norm": 0.3515625, "learning_rate": 2.7503463968834756e-06, "loss": 2.0301, "step": 25130 }, { "epoch": 0.8108229097559845, "grad_norm": 0.34765625, "learning_rate": 2.749438195809903e-06, "loss": 2.0055, "step": 25131 }, { "epoch": 0.8108551736097808, "grad_norm": 0.357421875, "learning_rate": 2.7485301295817265e-06, "loss": 2.0267, "step": 25132 }, { "epoch": 0.8108874374635772, "grad_norm": 0.357421875, "learning_rate": 2.747622198208937e-06, "loss": 1.997, "step": 25133 }, { "epoch": 0.8109197013173735, "grad_norm": 0.365234375, "learning_rate": 2.746714401701535e-06, "loss": 1.9893, "step": 25134 }, { "epoch": 0.8109519651711699, "grad_norm": 0.35546875, "learning_rate": 2.745806740069512e-06, "loss": 2.0324, "step": 25135 }, { "epoch": 0.8109842290249661, "grad_norm": 0.353515625, "learning_rate": 2.7448992133228536e-06, "loss": 2.0141, "step": 25136 }, { "epoch": 0.8110164928787625, "grad_norm": 0.365234375, "learning_rate": 2.743991821471555e-06, "loss": 2.0495, "step": 25137 }, { "epoch": 0.8110487567325588, "grad_norm": 0.3515625, "learning_rate": 2.743084564525603e-06, "loss": 2.0253, "step": 25138 }, { "epoch": 0.8110810205863552, "grad_norm": 0.35546875, "learning_rate": 2.7421774424949784e-06, "loss": 1.9759, "step": 25139 }, { "epoch": 0.8111132844401515, "grad_norm": 0.4140625, "learning_rate": 2.7412704553896767e-06, "loss": 2.0528, "step": 25140 }, { "epoch": 0.8111455482939479, "grad_norm": 0.375, "learning_rate": 2.7403636032196745e-06, "loss": 2.0144, "step": 25141 }, { "epoch": 0.8111778121477442, "grad_norm": 0.353515625, "learning_rate": 2.7394568859949503e-06, "loss": 2.0685, "step": 25142 }, { "epoch": 0.8112100760015406, "grad_norm": 0.35546875, "learning_rate": 2.738550303725493e-06, "loss": 2.0844, "step": 25143 }, { "epoch": 0.8112423398553369, "grad_norm": 0.34765625, "learning_rate": 2.737643856421277e-06, "loss": 2.0372, "step": 25144 }, { "epoch": 0.8112746037091333, "grad_norm": 0.353515625, "learning_rate": 2.736737544092278e-06, "loss": 2.0723, "step": 25145 }, { "epoch": 0.8113068675629297, "grad_norm": 0.34765625, "learning_rate": 2.7358313667484775e-06, "loss": 2.0627, "step": 25146 }, { "epoch": 0.811339131416726, "grad_norm": 0.353515625, "learning_rate": 2.734925324399847e-06, "loss": 2.0514, "step": 25147 }, { "epoch": 0.8113713952705224, "grad_norm": 0.357421875, "learning_rate": 2.7340194170563565e-06, "loss": 2.1701, "step": 25148 }, { "epoch": 0.8114036591243187, "grad_norm": 0.36328125, "learning_rate": 2.733113644727984e-06, "loss": 2.1269, "step": 25149 }, { "epoch": 0.8114359229781151, "grad_norm": 0.353515625, "learning_rate": 2.732208007424697e-06, "loss": 2.1492, "step": 25150 }, { "epoch": 0.8114681868319114, "grad_norm": 0.361328125, "learning_rate": 2.731302505156458e-06, "loss": 2.1483, "step": 25151 }, { "epoch": 0.8115004506857078, "grad_norm": 0.35546875, "learning_rate": 2.730397137933245e-06, "loss": 2.0936, "step": 25152 }, { "epoch": 0.811532714539504, "grad_norm": 0.359375, "learning_rate": 2.729491905765017e-06, "loss": 2.1371, "step": 25153 }, { "epoch": 0.8115649783933004, "grad_norm": 0.35546875, "learning_rate": 2.7285868086617348e-06, "loss": 2.1328, "step": 25154 }, { "epoch": 0.8115972422470967, "grad_norm": 0.373046875, "learning_rate": 2.7276818466333707e-06, "loss": 2.1616, "step": 25155 }, { "epoch": 0.8116295061008931, "grad_norm": 0.3515625, "learning_rate": 2.72677701968988e-06, "loss": 2.142, "step": 25156 }, { "epoch": 0.8116617699546894, "grad_norm": 0.345703125, "learning_rate": 2.7258723278412245e-06, "loss": 2.1687, "step": 25157 }, { "epoch": 0.8116940338084858, "grad_norm": 0.349609375, "learning_rate": 2.724967771097355e-06, "loss": 2.1132, "step": 25158 }, { "epoch": 0.8117262976622821, "grad_norm": 0.349609375, "learning_rate": 2.724063349468239e-06, "loss": 2.1449, "step": 25159 }, { "epoch": 0.8117585615160785, "grad_norm": 0.359375, "learning_rate": 2.723159062963827e-06, "loss": 2.1821, "step": 25160 }, { "epoch": 0.8117908253698748, "grad_norm": 0.35546875, "learning_rate": 2.7222549115940697e-06, "loss": 2.1343, "step": 25161 }, { "epoch": 0.8118230892236712, "grad_norm": 0.35546875, "learning_rate": 2.721350895368925e-06, "loss": 2.1494, "step": 25162 }, { "epoch": 0.8118553530774675, "grad_norm": 0.33984375, "learning_rate": 2.7204470142983406e-06, "loss": 2.1382, "step": 25163 }, { "epoch": 0.8118876169312639, "grad_norm": 0.341796875, "learning_rate": 2.7195432683922646e-06, "loss": 2.1063, "step": 25164 }, { "epoch": 0.8119198807850603, "grad_norm": 0.34375, "learning_rate": 2.7186396576606483e-06, "loss": 2.1189, "step": 25165 }, { "epoch": 0.8119521446388566, "grad_norm": 0.375, "learning_rate": 2.7177361821134355e-06, "loss": 2.1188, "step": 25166 }, { "epoch": 0.811984408492653, "grad_norm": 0.337890625, "learning_rate": 2.7168328417605687e-06, "loss": 2.1124, "step": 25167 }, { "epoch": 0.8120166723464493, "grad_norm": 0.34765625, "learning_rate": 2.715929636611998e-06, "loss": 2.1506, "step": 25168 }, { "epoch": 0.8120489362002457, "grad_norm": 0.34765625, "learning_rate": 2.71502656667766e-06, "loss": 2.1009, "step": 25169 }, { "epoch": 0.8120812000540419, "grad_norm": 0.34765625, "learning_rate": 2.7141236319674944e-06, "loss": 2.1486, "step": 25170 }, { "epoch": 0.8121134639078383, "grad_norm": 0.353515625, "learning_rate": 2.713220832491446e-06, "loss": 2.1826, "step": 25171 }, { "epoch": 0.8121457277616346, "grad_norm": 0.3515625, "learning_rate": 2.7123181682594467e-06, "loss": 2.1392, "step": 25172 }, { "epoch": 0.812177991615431, "grad_norm": 0.34765625, "learning_rate": 2.7114156392814306e-06, "loss": 2.1018, "step": 25173 }, { "epoch": 0.8122102554692273, "grad_norm": 0.349609375, "learning_rate": 2.710513245567339e-06, "loss": 2.1593, "step": 25174 }, { "epoch": 0.8122425193230237, "grad_norm": 0.337890625, "learning_rate": 2.7096109871271003e-06, "loss": 2.1562, "step": 25175 }, { "epoch": 0.81227478317682, "grad_norm": 0.337890625, "learning_rate": 2.708708863970645e-06, "loss": 2.1184, "step": 25176 }, { "epoch": 0.8123070470306164, "grad_norm": 0.349609375, "learning_rate": 2.7078068761079117e-06, "loss": 2.1901, "step": 25177 }, { "epoch": 0.8123393108844127, "grad_norm": 0.345703125, "learning_rate": 2.706905023548813e-06, "loss": 2.1745, "step": 25178 }, { "epoch": 0.8123715747382091, "grad_norm": 0.34375, "learning_rate": 2.7060033063032864e-06, "loss": 2.2202, "step": 25179 }, { "epoch": 0.8124038385920054, "grad_norm": 0.353515625, "learning_rate": 2.705101724381262e-06, "loss": 2.1395, "step": 25180 }, { "epoch": 0.8124361024458018, "grad_norm": 0.345703125, "learning_rate": 2.7042002777926504e-06, "loss": 2.1789, "step": 25181 }, { "epoch": 0.8124683662995981, "grad_norm": 0.35546875, "learning_rate": 2.7032989665473805e-06, "loss": 2.148, "step": 25182 }, { "epoch": 0.8125006301533945, "grad_norm": 0.357421875, "learning_rate": 2.7023977906553794e-06, "loss": 2.1603, "step": 25183 }, { "epoch": 0.8125328940071908, "grad_norm": 0.3515625, "learning_rate": 2.7014967501265604e-06, "loss": 2.2118, "step": 25184 }, { "epoch": 0.8125651578609872, "grad_norm": 0.349609375, "learning_rate": 2.700595844970839e-06, "loss": 2.2184, "step": 25185 }, { "epoch": 0.8125974217147836, "grad_norm": 0.34375, "learning_rate": 2.6996950751981377e-06, "loss": 2.1944, "step": 25186 }, { "epoch": 0.8126296855685798, "grad_norm": 0.349609375, "learning_rate": 2.6987944408183694e-06, "loss": 2.1973, "step": 25187 }, { "epoch": 0.8126619494223762, "grad_norm": 0.361328125, "learning_rate": 2.697893941841447e-06, "loss": 2.2302, "step": 25188 }, { "epoch": 0.8126942132761725, "grad_norm": 0.392578125, "learning_rate": 2.696993578277279e-06, "loss": 2.2396, "step": 25189 }, { "epoch": 0.8127264771299689, "grad_norm": 0.337890625, "learning_rate": 2.6960933501357825e-06, "loss": 2.2243, "step": 25190 }, { "epoch": 0.8127587409837652, "grad_norm": 0.359375, "learning_rate": 2.6951932574268644e-06, "loss": 2.2366, "step": 25191 }, { "epoch": 0.8127910048375616, "grad_norm": 0.349609375, "learning_rate": 2.694293300160427e-06, "loss": 2.2484, "step": 25192 }, { "epoch": 0.8128232686913579, "grad_norm": 0.349609375, "learning_rate": 2.6933934783463854e-06, "loss": 2.2582, "step": 25193 }, { "epoch": 0.8128555325451543, "grad_norm": 0.33984375, "learning_rate": 2.6924937919946404e-06, "loss": 2.2248, "step": 25194 }, { "epoch": 0.8128877963989506, "grad_norm": 0.345703125, "learning_rate": 2.6915942411150884e-06, "loss": 2.2229, "step": 25195 }, { "epoch": 0.812920060252747, "grad_norm": 0.357421875, "learning_rate": 2.6906948257176424e-06, "loss": 2.2141, "step": 25196 }, { "epoch": 0.8129523241065433, "grad_norm": 0.349609375, "learning_rate": 2.689795545812197e-06, "loss": 2.1773, "step": 25197 }, { "epoch": 0.8129845879603397, "grad_norm": 0.34375, "learning_rate": 2.6888964014086466e-06, "loss": 2.2298, "step": 25198 }, { "epoch": 0.813016851814136, "grad_norm": 0.34765625, "learning_rate": 2.687997392516896e-06, "loss": 2.2163, "step": 25199 }, { "epoch": 0.8130491156679324, "grad_norm": 0.33984375, "learning_rate": 2.6870985191468377e-06, "loss": 2.2136, "step": 25200 }, { "epoch": 0.8130813795217287, "grad_norm": 0.34375, "learning_rate": 2.6861997813083638e-06, "loss": 2.2453, "step": 25201 }, { "epoch": 0.813113643375525, "grad_norm": 0.33203125, "learning_rate": 2.685301179011371e-06, "loss": 2.237, "step": 25202 }, { "epoch": 0.8131459072293213, "grad_norm": 0.35546875, "learning_rate": 2.6844027122657487e-06, "loss": 2.2518, "step": 25203 }, { "epoch": 0.8131781710831177, "grad_norm": 0.337890625, "learning_rate": 2.683504381081382e-06, "loss": 2.2597, "step": 25204 }, { "epoch": 0.813210434936914, "grad_norm": 0.33203125, "learning_rate": 2.682606185468166e-06, "loss": 2.2554, "step": 25205 }, { "epoch": 0.8132426987907104, "grad_norm": 0.3359375, "learning_rate": 2.6817081254359867e-06, "loss": 2.2813, "step": 25206 }, { "epoch": 0.8132749626445068, "grad_norm": 0.35546875, "learning_rate": 2.680810200994722e-06, "loss": 2.2171, "step": 25207 }, { "epoch": 0.8133072264983031, "grad_norm": 0.353515625, "learning_rate": 2.67991241215427e-06, "loss": 2.1965, "step": 25208 }, { "epoch": 0.8133394903520995, "grad_norm": 0.33984375, "learning_rate": 2.679014758924494e-06, "loss": 2.2588, "step": 25209 }, { "epoch": 0.8133717542058958, "grad_norm": 0.33984375, "learning_rate": 2.678117241315286e-06, "loss": 2.2559, "step": 25210 }, { "epoch": 0.8134040180596922, "grad_norm": 0.34375, "learning_rate": 2.6772198593365314e-06, "loss": 2.2233, "step": 25211 }, { "epoch": 0.8134362819134885, "grad_norm": 0.333984375, "learning_rate": 2.6763226129980928e-06, "loss": 2.2568, "step": 25212 }, { "epoch": 0.8134685457672849, "grad_norm": 0.345703125, "learning_rate": 2.6754255023098533e-06, "loss": 2.2201, "step": 25213 }, { "epoch": 0.8135008096210812, "grad_norm": 0.33984375, "learning_rate": 2.674528527281692e-06, "loss": 2.2455, "step": 25214 }, { "epoch": 0.8135330734748776, "grad_norm": 0.341796875, "learning_rate": 2.673631687923478e-06, "loss": 2.2469, "step": 25215 }, { "epoch": 0.8135653373286739, "grad_norm": 0.333984375, "learning_rate": 2.672734984245081e-06, "loss": 2.2793, "step": 25216 }, { "epoch": 0.8135976011824703, "grad_norm": 0.33984375, "learning_rate": 2.671838416256377e-06, "loss": 2.2058, "step": 25217 }, { "epoch": 0.8136298650362666, "grad_norm": 0.337890625, "learning_rate": 2.6709419839672312e-06, "loss": 2.2923, "step": 25218 }, { "epoch": 0.813662128890063, "grad_norm": 0.333984375, "learning_rate": 2.6700456873875105e-06, "loss": 2.2811, "step": 25219 }, { "epoch": 0.8136943927438592, "grad_norm": 0.330078125, "learning_rate": 2.6691495265270792e-06, "loss": 2.2257, "step": 25220 }, { "epoch": 0.8137266565976556, "grad_norm": 0.33203125, "learning_rate": 2.668253501395806e-06, "loss": 2.2373, "step": 25221 }, { "epoch": 0.8137589204514519, "grad_norm": 0.333984375, "learning_rate": 2.6673576120035525e-06, "loss": 2.2613, "step": 25222 }, { "epoch": 0.8137911843052483, "grad_norm": 0.345703125, "learning_rate": 2.6664618583601763e-06, "loss": 2.1578, "step": 25223 }, { "epoch": 0.8138234481590446, "grad_norm": 0.3359375, "learning_rate": 2.665566240475541e-06, "loss": 2.2069, "step": 25224 }, { "epoch": 0.813855712012841, "grad_norm": 0.349609375, "learning_rate": 2.6646707583595054e-06, "loss": 2.1888, "step": 25225 }, { "epoch": 0.8138879758666374, "grad_norm": 0.353515625, "learning_rate": 2.6637754120219216e-06, "loss": 2.1629, "step": 25226 }, { "epoch": 0.8139202397204337, "grad_norm": 0.345703125, "learning_rate": 2.6628802014726516e-06, "loss": 2.1937, "step": 25227 }, { "epoch": 0.8139525035742301, "grad_norm": 0.53125, "learning_rate": 2.6619851267215443e-06, "loss": 2.1742, "step": 25228 }, { "epoch": 0.8139847674280264, "grad_norm": 0.34765625, "learning_rate": 2.6610901877784503e-06, "loss": 2.1758, "step": 25229 }, { "epoch": 0.8140170312818228, "grad_norm": 0.3515625, "learning_rate": 2.6601953846532286e-06, "loss": 2.1816, "step": 25230 }, { "epoch": 0.8140492951356191, "grad_norm": 0.34375, "learning_rate": 2.659300717355722e-06, "loss": 2.17, "step": 25231 }, { "epoch": 0.8140815589894155, "grad_norm": 0.353515625, "learning_rate": 2.6584061858957786e-06, "loss": 2.1976, "step": 25232 }, { "epoch": 0.8141138228432118, "grad_norm": 0.34375, "learning_rate": 2.6575117902832467e-06, "loss": 2.1859, "step": 25233 }, { "epoch": 0.8141460866970082, "grad_norm": 0.34375, "learning_rate": 2.656617530527972e-06, "loss": 2.1667, "step": 25234 }, { "epoch": 0.8141783505508045, "grad_norm": 0.341796875, "learning_rate": 2.6557234066397935e-06, "loss": 2.1787, "step": 25235 }, { "epoch": 0.8142106144046009, "grad_norm": 0.341796875, "learning_rate": 2.654829418628559e-06, "loss": 2.1525, "step": 25236 }, { "epoch": 0.8142428782583971, "grad_norm": 0.33984375, "learning_rate": 2.6539355665041055e-06, "loss": 2.2075, "step": 25237 }, { "epoch": 0.8142751421121935, "grad_norm": 0.33984375, "learning_rate": 2.653041850276269e-06, "loss": 2.2072, "step": 25238 }, { "epoch": 0.8143074059659898, "grad_norm": 0.337890625, "learning_rate": 2.6521482699548967e-06, "loss": 2.1823, "step": 25239 }, { "epoch": 0.8143396698197862, "grad_norm": 0.35546875, "learning_rate": 2.6512548255498127e-06, "loss": 2.1492, "step": 25240 }, { "epoch": 0.8143719336735825, "grad_norm": 0.322265625, "learning_rate": 2.650361517070854e-06, "loss": 2.1748, "step": 25241 }, { "epoch": 0.8144041975273789, "grad_norm": 0.34765625, "learning_rate": 2.6494683445278643e-06, "loss": 2.2261, "step": 25242 }, { "epoch": 0.8144364613811752, "grad_norm": 0.3359375, "learning_rate": 2.648575307930661e-06, "loss": 2.1835, "step": 25243 }, { "epoch": 0.8144687252349716, "grad_norm": 0.330078125, "learning_rate": 2.6476824072890783e-06, "loss": 2.18, "step": 25244 }, { "epoch": 0.8145009890887679, "grad_norm": 0.359375, "learning_rate": 2.646789642612954e-06, "loss": 2.2022, "step": 25245 }, { "epoch": 0.8145332529425643, "grad_norm": 0.349609375, "learning_rate": 2.645897013912099e-06, "loss": 2.1248, "step": 25246 }, { "epoch": 0.8145655167963607, "grad_norm": 0.353515625, "learning_rate": 2.6450045211963476e-06, "loss": 2.192, "step": 25247 }, { "epoch": 0.814597780650157, "grad_norm": 0.328125, "learning_rate": 2.6441121644755263e-06, "loss": 2.1787, "step": 25248 }, { "epoch": 0.8146300445039534, "grad_norm": 0.33203125, "learning_rate": 2.6432199437594556e-06, "loss": 2.1708, "step": 25249 }, { "epoch": 0.8146623083577497, "grad_norm": 0.337890625, "learning_rate": 2.642327859057953e-06, "loss": 2.1502, "step": 25250 }, { "epoch": 0.8146945722115461, "grad_norm": 0.640625, "learning_rate": 2.6414359103808373e-06, "loss": 2.1594, "step": 25251 }, { "epoch": 0.8147268360653424, "grad_norm": 0.3359375, "learning_rate": 2.6405440977379336e-06, "loss": 2.187, "step": 25252 }, { "epoch": 0.8147590999191388, "grad_norm": 0.34765625, "learning_rate": 2.6396524211390527e-06, "loss": 2.1957, "step": 25253 }, { "epoch": 0.814791363772935, "grad_norm": 0.328125, "learning_rate": 2.638760880594007e-06, "loss": 2.1822, "step": 25254 }, { "epoch": 0.8148236276267314, "grad_norm": 0.349609375, "learning_rate": 2.6378694761126177e-06, "loss": 2.2376, "step": 25255 }, { "epoch": 0.8148558914805277, "grad_norm": 0.33203125, "learning_rate": 2.6369782077046926e-06, "loss": 2.2042, "step": 25256 }, { "epoch": 0.8148881553343241, "grad_norm": 0.353515625, "learning_rate": 2.6360870753800377e-06, "loss": 2.2324, "step": 25257 }, { "epoch": 0.8149204191881204, "grad_norm": 0.34375, "learning_rate": 2.635196079148471e-06, "loss": 2.1585, "step": 25258 }, { "epoch": 0.8149526830419168, "grad_norm": 0.3359375, "learning_rate": 2.6343052190197963e-06, "loss": 2.2127, "step": 25259 }, { "epoch": 0.8149849468957131, "grad_norm": 0.33984375, "learning_rate": 2.633414495003813e-06, "loss": 2.1999, "step": 25260 }, { "epoch": 0.8150172107495095, "grad_norm": 0.36328125, "learning_rate": 2.6325239071103364e-06, "loss": 2.218, "step": 25261 }, { "epoch": 0.8150494746033058, "grad_norm": 0.3515625, "learning_rate": 2.6316334553491633e-06, "loss": 2.2054, "step": 25262 }, { "epoch": 0.8150817384571022, "grad_norm": 0.33984375, "learning_rate": 2.630743139730093e-06, "loss": 2.1789, "step": 25263 }, { "epoch": 0.8151140023108985, "grad_norm": 0.33984375, "learning_rate": 2.6298529602629305e-06, "loss": 2.2031, "step": 25264 }, { "epoch": 0.8151462661646949, "grad_norm": 0.341796875, "learning_rate": 2.6289629169574753e-06, "loss": 2.211, "step": 25265 }, { "epoch": 0.8151785300184913, "grad_norm": 0.3359375, "learning_rate": 2.6280730098235156e-06, "loss": 2.2052, "step": 25266 }, { "epoch": 0.8152107938722876, "grad_norm": 0.34765625, "learning_rate": 2.627183238870857e-06, "loss": 2.1812, "step": 25267 }, { "epoch": 0.815243057726084, "grad_norm": 0.34765625, "learning_rate": 2.6262936041092887e-06, "loss": 2.1738, "step": 25268 }, { "epoch": 0.8152753215798803, "grad_norm": 0.3359375, "learning_rate": 2.6254041055485994e-06, "loss": 2.1823, "step": 25269 }, { "epoch": 0.8153075854336767, "grad_norm": 0.345703125, "learning_rate": 2.6245147431985926e-06, "loss": 2.2039, "step": 25270 }, { "epoch": 0.8153398492874729, "grad_norm": 0.337890625, "learning_rate": 2.623625517069043e-06, "loss": 2.1901, "step": 25271 }, { "epoch": 0.8153721131412693, "grad_norm": 0.349609375, "learning_rate": 2.622736427169743e-06, "loss": 2.0895, "step": 25272 }, { "epoch": 0.8154043769950656, "grad_norm": 0.3515625, "learning_rate": 2.621847473510489e-06, "loss": 2.095, "step": 25273 }, { "epoch": 0.815436640848862, "grad_norm": 0.345703125, "learning_rate": 2.62095865610105e-06, "loss": 2.1075, "step": 25274 }, { "epoch": 0.8154689047026583, "grad_norm": 0.353515625, "learning_rate": 2.620069974951218e-06, "loss": 2.0579, "step": 25275 }, { "epoch": 0.8155011685564547, "grad_norm": 0.34375, "learning_rate": 2.6191814300707815e-06, "loss": 2.075, "step": 25276 }, { "epoch": 0.815533432410251, "grad_norm": 0.37109375, "learning_rate": 2.6182930214695065e-06, "loss": 2.0492, "step": 25277 }, { "epoch": 0.8155656962640474, "grad_norm": 0.34765625, "learning_rate": 2.6174047491571834e-06, "loss": 2.0775, "step": 25278 }, { "epoch": 0.8155979601178437, "grad_norm": 0.34375, "learning_rate": 2.616516613143586e-06, "loss": 2.094, "step": 25279 }, { "epoch": 0.8156302239716401, "grad_norm": 0.341796875, "learning_rate": 2.615628613438486e-06, "loss": 2.0978, "step": 25280 }, { "epoch": 0.8156624878254364, "grad_norm": 0.337890625, "learning_rate": 2.6147407500516647e-06, "loss": 2.1311, "step": 25281 }, { "epoch": 0.8156947516792328, "grad_norm": 0.345703125, "learning_rate": 2.6138530229928888e-06, "loss": 2.1211, "step": 25282 }, { "epoch": 0.8157270155330291, "grad_norm": 0.357421875, "learning_rate": 2.612965432271937e-06, "loss": 2.1376, "step": 25283 }, { "epoch": 0.8157592793868255, "grad_norm": 0.36328125, "learning_rate": 2.6120779778985736e-06, "loss": 2.1016, "step": 25284 }, { "epoch": 0.8157915432406218, "grad_norm": 0.35546875, "learning_rate": 2.6111906598825674e-06, "loss": 2.1025, "step": 25285 }, { "epoch": 0.8158238070944182, "grad_norm": 0.337890625, "learning_rate": 2.6103034782336887e-06, "loss": 2.1824, "step": 25286 }, { "epoch": 0.8158560709482146, "grad_norm": 0.39453125, "learning_rate": 2.6094164329617016e-06, "loss": 2.2677, "step": 25287 }, { "epoch": 0.8158883348020108, "grad_norm": 0.33984375, "learning_rate": 2.6085295240763664e-06, "loss": 2.2797, "step": 25288 }, { "epoch": 0.8159205986558072, "grad_norm": 0.33984375, "learning_rate": 2.6076427515874516e-06, "loss": 2.2002, "step": 25289 }, { "epoch": 0.8159528625096035, "grad_norm": 0.333984375, "learning_rate": 2.6067561155047155e-06, "loss": 2.2543, "step": 25290 }, { "epoch": 0.8159851263633999, "grad_norm": 0.33984375, "learning_rate": 2.6058696158379124e-06, "loss": 2.2351, "step": 25291 }, { "epoch": 0.8160173902171962, "grad_norm": 0.33203125, "learning_rate": 2.6049832525968104e-06, "loss": 2.2671, "step": 25292 }, { "epoch": 0.8160496540709926, "grad_norm": 0.337890625, "learning_rate": 2.6040970257911596e-06, "loss": 2.2882, "step": 25293 }, { "epoch": 0.8160819179247889, "grad_norm": 0.333984375, "learning_rate": 2.603210935430712e-06, "loss": 2.2579, "step": 25294 }, { "epoch": 0.8161141817785853, "grad_norm": 0.322265625, "learning_rate": 2.6023249815252302e-06, "loss": 2.2296, "step": 25295 }, { "epoch": 0.8161464456323816, "grad_norm": 0.34375, "learning_rate": 2.6014391640844597e-06, "loss": 2.2572, "step": 25296 }, { "epoch": 0.816178709486178, "grad_norm": 0.3203125, "learning_rate": 2.6005534831181476e-06, "loss": 2.2195, "step": 25297 }, { "epoch": 0.8162109733399743, "grad_norm": 0.3359375, "learning_rate": 2.599667938636053e-06, "loss": 2.1993, "step": 25298 }, { "epoch": 0.8162432371937707, "grad_norm": 0.337890625, "learning_rate": 2.5987825306479175e-06, "loss": 2.2061, "step": 25299 }, { "epoch": 0.816275501047567, "grad_norm": 0.33203125, "learning_rate": 2.597897259163482e-06, "loss": 2.2421, "step": 25300 }, { "epoch": 0.8163077649013634, "grad_norm": 0.330078125, "learning_rate": 2.5970121241925054e-06, "loss": 2.2372, "step": 25301 }, { "epoch": 0.8163400287551597, "grad_norm": 0.326171875, "learning_rate": 2.5961271257447135e-06, "loss": 2.2376, "step": 25302 }, { "epoch": 0.816372292608956, "grad_norm": 0.353515625, "learning_rate": 2.5952422638298562e-06, "loss": 2.2025, "step": 25303 }, { "epoch": 0.8164045564627523, "grad_norm": 0.3203125, "learning_rate": 2.5943575384576794e-06, "loss": 2.2763, "step": 25304 }, { "epoch": 0.8164368203165487, "grad_norm": 0.33203125, "learning_rate": 2.593472949637907e-06, "loss": 2.2449, "step": 25305 }, { "epoch": 0.816469084170345, "grad_norm": 0.3359375, "learning_rate": 2.5925884973802865e-06, "loss": 2.2422, "step": 25306 }, { "epoch": 0.8165013480241414, "grad_norm": 0.34375, "learning_rate": 2.5917041816945563e-06, "loss": 2.2563, "step": 25307 }, { "epoch": 0.8165336118779378, "grad_norm": 0.345703125, "learning_rate": 2.590820002590437e-06, "loss": 2.2525, "step": 25308 }, { "epoch": 0.8165658757317341, "grad_norm": 0.32421875, "learning_rate": 2.5899359600776746e-06, "loss": 2.2633, "step": 25309 }, { "epoch": 0.8165981395855305, "grad_norm": 0.330078125, "learning_rate": 2.5890520541659925e-06, "loss": 2.2603, "step": 25310 }, { "epoch": 0.8166304034393268, "grad_norm": 0.322265625, "learning_rate": 2.5881682848651177e-06, "loss": 2.2165, "step": 25311 }, { "epoch": 0.8166626672931232, "grad_norm": 0.33984375, "learning_rate": 2.5872846521847866e-06, "loss": 2.2127, "step": 25312 }, { "epoch": 0.8166949311469195, "grad_norm": 0.3203125, "learning_rate": 2.586401156134722e-06, "loss": 2.2149, "step": 25313 }, { "epoch": 0.8167271950007159, "grad_norm": 0.3359375, "learning_rate": 2.585517796724644e-06, "loss": 2.2571, "step": 25314 }, { "epoch": 0.8167594588545122, "grad_norm": 0.33203125, "learning_rate": 2.5846345739642824e-06, "loss": 2.2778, "step": 25315 }, { "epoch": 0.8167917227083086, "grad_norm": 0.361328125, "learning_rate": 2.5837514878633545e-06, "loss": 2.2347, "step": 25316 }, { "epoch": 0.8168239865621049, "grad_norm": 0.341796875, "learning_rate": 2.582868538431586e-06, "loss": 2.2605, "step": 25317 }, { "epoch": 0.8168562504159013, "grad_norm": 0.341796875, "learning_rate": 2.581985725678692e-06, "loss": 2.3045, "step": 25318 }, { "epoch": 0.8168885142696976, "grad_norm": 0.333984375, "learning_rate": 2.5811030496143874e-06, "loss": 2.2725, "step": 25319 }, { "epoch": 0.816920778123494, "grad_norm": 0.345703125, "learning_rate": 2.580220510248395e-06, "loss": 2.2547, "step": 25320 }, { "epoch": 0.8169530419772902, "grad_norm": 0.380859375, "learning_rate": 2.579338107590424e-06, "loss": 2.2044, "step": 25321 }, { "epoch": 0.8169853058310866, "grad_norm": 0.361328125, "learning_rate": 2.5784558416501873e-06, "loss": 2.1836, "step": 25322 }, { "epoch": 0.8170175696848829, "grad_norm": 0.36328125, "learning_rate": 2.5775737124374e-06, "loss": 2.2289, "step": 25323 }, { "epoch": 0.8170498335386793, "grad_norm": 0.396484375, "learning_rate": 2.576691719961769e-06, "loss": 2.2466, "step": 25324 }, { "epoch": 0.8170820973924756, "grad_norm": 0.34765625, "learning_rate": 2.5758098642329986e-06, "loss": 2.2619, "step": 25325 }, { "epoch": 0.817114361246272, "grad_norm": 0.359375, "learning_rate": 2.5749281452608057e-06, "loss": 2.2273, "step": 25326 }, { "epoch": 0.8171466251000684, "grad_norm": 0.3515625, "learning_rate": 2.5740465630548897e-06, "loss": 2.2119, "step": 25327 }, { "epoch": 0.8171788889538647, "grad_norm": 0.33984375, "learning_rate": 2.573165117624951e-06, "loss": 2.237, "step": 25328 }, { "epoch": 0.8172111528076611, "grad_norm": 0.498046875, "learning_rate": 2.572283808980698e-06, "loss": 2.2154, "step": 25329 }, { "epoch": 0.8172434166614574, "grad_norm": 0.34765625, "learning_rate": 2.5714026371318307e-06, "loss": 2.2371, "step": 25330 }, { "epoch": 0.8172756805152538, "grad_norm": 0.39453125, "learning_rate": 2.5705216020880436e-06, "loss": 2.2141, "step": 25331 }, { "epoch": 0.8173079443690501, "grad_norm": 0.333984375, "learning_rate": 2.569640703859044e-06, "loss": 2.2273, "step": 25332 }, { "epoch": 0.8173402082228465, "grad_norm": 0.330078125, "learning_rate": 2.568759942454515e-06, "loss": 2.2601, "step": 25333 }, { "epoch": 0.8173724720766428, "grad_norm": 0.3359375, "learning_rate": 2.5678793178841582e-06, "loss": 2.1859, "step": 25334 }, { "epoch": 0.8174047359304392, "grad_norm": 0.3359375, "learning_rate": 2.566998830157676e-06, "loss": 2.2238, "step": 25335 }, { "epoch": 0.8174369997842355, "grad_norm": 0.341796875, "learning_rate": 2.566118479284742e-06, "loss": 2.2218, "step": 25336 }, { "epoch": 0.8174692636380319, "grad_norm": 0.35546875, "learning_rate": 2.5652382652750572e-06, "loss": 2.1795, "step": 25337 }, { "epoch": 0.8175015274918281, "grad_norm": 0.416015625, "learning_rate": 2.5643581881383153e-06, "loss": 2.1665, "step": 25338 }, { "epoch": 0.8175337913456245, "grad_norm": 0.35546875, "learning_rate": 2.563478247884189e-06, "loss": 2.1702, "step": 25339 }, { "epoch": 0.8175660551994208, "grad_norm": 0.357421875, "learning_rate": 2.562598444522378e-06, "loss": 2.1373, "step": 25340 }, { "epoch": 0.8175983190532172, "grad_norm": 0.349609375, "learning_rate": 2.561718778062559e-06, "loss": 2.1565, "step": 25341 }, { "epoch": 0.8176305829070135, "grad_norm": 0.34375, "learning_rate": 2.5608392485144135e-06, "loss": 2.1559, "step": 25342 }, { "epoch": 0.8176628467608099, "grad_norm": 0.34375, "learning_rate": 2.55995985588763e-06, "loss": 2.1423, "step": 25343 }, { "epoch": 0.8176951106146062, "grad_norm": 0.341796875, "learning_rate": 2.5590806001918836e-06, "loss": 2.1568, "step": 25344 }, { "epoch": 0.8177273744684026, "grad_norm": 0.337890625, "learning_rate": 2.5582014814368493e-06, "loss": 2.1318, "step": 25345 }, { "epoch": 0.8177596383221989, "grad_norm": 0.345703125, "learning_rate": 2.557322499632211e-06, "loss": 2.1177, "step": 25346 }, { "epoch": 0.8177919021759953, "grad_norm": 0.341796875, "learning_rate": 2.5564436547876356e-06, "loss": 2.1823, "step": 25347 }, { "epoch": 0.8178241660297917, "grad_norm": 0.353515625, "learning_rate": 2.555564946912806e-06, "loss": 2.1669, "step": 25348 }, { "epoch": 0.817856429883588, "grad_norm": 0.359375, "learning_rate": 2.5546863760173913e-06, "loss": 2.1925, "step": 25349 }, { "epoch": 0.8178886937373844, "grad_norm": 0.34375, "learning_rate": 2.5538079421110543e-06, "loss": 2.1728, "step": 25350 }, { "epoch": 0.8179209575911807, "grad_norm": 0.384765625, "learning_rate": 2.552929645203477e-06, "loss": 2.0092, "step": 25351 }, { "epoch": 0.8179532214449771, "grad_norm": 0.376953125, "learning_rate": 2.552051485304319e-06, "loss": 2.0316, "step": 25352 }, { "epoch": 0.8179854852987734, "grad_norm": 0.396484375, "learning_rate": 2.551173462423244e-06, "loss": 2.0438, "step": 25353 }, { "epoch": 0.8180177491525698, "grad_norm": 0.3984375, "learning_rate": 2.5502955765699253e-06, "loss": 2.0172, "step": 25354 }, { "epoch": 0.818050013006366, "grad_norm": 0.3828125, "learning_rate": 2.5494178277540216e-06, "loss": 2.0362, "step": 25355 }, { "epoch": 0.8180822768601624, "grad_norm": 0.37890625, "learning_rate": 2.5485402159851896e-06, "loss": 2.0116, "step": 25356 }, { "epoch": 0.8181145407139587, "grad_norm": 0.365234375, "learning_rate": 2.5476627412731e-06, "loss": 2.0544, "step": 25357 }, { "epoch": 0.8181468045677551, "grad_norm": 0.37109375, "learning_rate": 2.546785403627404e-06, "loss": 2.0182, "step": 25358 }, { "epoch": 0.8181790684215514, "grad_norm": 0.369140625, "learning_rate": 2.545908203057757e-06, "loss": 2.0167, "step": 25359 }, { "epoch": 0.8182113322753478, "grad_norm": 0.36328125, "learning_rate": 2.5450311395738256e-06, "loss": 2.056, "step": 25360 }, { "epoch": 0.8182435961291441, "grad_norm": 0.380859375, "learning_rate": 2.5441542131852495e-06, "loss": 1.9513, "step": 25361 }, { "epoch": 0.8182758599829405, "grad_norm": 0.37109375, "learning_rate": 2.5432774239016876e-06, "loss": 1.9993, "step": 25362 }, { "epoch": 0.8183081238367368, "grad_norm": 0.396484375, "learning_rate": 2.5424007717327986e-06, "loss": 1.9519, "step": 25363 }, { "epoch": 0.8183403876905332, "grad_norm": 0.365234375, "learning_rate": 2.541524256688218e-06, "loss": 2.0178, "step": 25364 }, { "epoch": 0.8183726515443295, "grad_norm": 0.376953125, "learning_rate": 2.5406478787776e-06, "loss": 2.0215, "step": 25365 }, { "epoch": 0.8184049153981259, "grad_norm": 0.369140625, "learning_rate": 2.5397716380105985e-06, "loss": 2.0128, "step": 25366 }, { "epoch": 0.8184371792519222, "grad_norm": 0.359375, "learning_rate": 2.5388955343968456e-06, "loss": 1.9734, "step": 25367 }, { "epoch": 0.8184694431057186, "grad_norm": 0.365234375, "learning_rate": 2.5380195679459906e-06, "loss": 1.9815, "step": 25368 }, { "epoch": 0.818501706959515, "grad_norm": 0.365234375, "learning_rate": 2.5371437386676838e-06, "loss": 1.9785, "step": 25369 }, { "epoch": 0.8185339708133113, "grad_norm": 0.396484375, "learning_rate": 2.536268046571551e-06, "loss": 1.9814, "step": 25370 }, { "epoch": 0.8185662346671077, "grad_norm": 0.357421875, "learning_rate": 2.535392491667241e-06, "loss": 1.9944, "step": 25371 }, { "epoch": 0.8185984985209039, "grad_norm": 0.36328125, "learning_rate": 2.53451707396439e-06, "loss": 2.0064, "step": 25372 }, { "epoch": 0.8186307623747003, "grad_norm": 0.3515625, "learning_rate": 2.533641793472626e-06, "loss": 1.9974, "step": 25373 }, { "epoch": 0.8186630262284966, "grad_norm": 0.37109375, "learning_rate": 2.532766650201595e-06, "loss": 1.997, "step": 25374 }, { "epoch": 0.818695290082293, "grad_norm": 0.357421875, "learning_rate": 2.531891644160925e-06, "loss": 2.0674, "step": 25375 }, { "epoch": 0.8187275539360893, "grad_norm": 0.359375, "learning_rate": 2.531016775360242e-06, "loss": 2.0751, "step": 25376 }, { "epoch": 0.8187598177898857, "grad_norm": 0.35546875, "learning_rate": 2.530142043809186e-06, "loss": 2.0282, "step": 25377 }, { "epoch": 0.818792081643682, "grad_norm": 0.36328125, "learning_rate": 2.5292674495173805e-06, "loss": 2.0709, "step": 25378 }, { "epoch": 0.8188243454974784, "grad_norm": 0.337890625, "learning_rate": 2.5283929924944498e-06, "loss": 2.0412, "step": 25379 }, { "epoch": 0.8188566093512747, "grad_norm": 0.361328125, "learning_rate": 2.5275186727500243e-06, "loss": 2.0318, "step": 25380 }, { "epoch": 0.8188888732050711, "grad_norm": 0.357421875, "learning_rate": 2.5266444902937213e-06, "loss": 2.0586, "step": 25381 }, { "epoch": 0.8189211370588674, "grad_norm": 0.36328125, "learning_rate": 2.525770445135173e-06, "loss": 2.0183, "step": 25382 }, { "epoch": 0.8189534009126638, "grad_norm": 0.357421875, "learning_rate": 2.524896537283992e-06, "loss": 2.0408, "step": 25383 }, { "epoch": 0.8189856647664601, "grad_norm": 0.3515625, "learning_rate": 2.5240227667497987e-06, "loss": 2.0649, "step": 25384 }, { "epoch": 0.8190179286202565, "grad_norm": 0.357421875, "learning_rate": 2.5231491335422157e-06, "loss": 2.0271, "step": 25385 }, { "epoch": 0.8190501924740528, "grad_norm": 0.3515625, "learning_rate": 2.522275637670857e-06, "loss": 2.0578, "step": 25386 }, { "epoch": 0.8190824563278492, "grad_norm": 0.345703125, "learning_rate": 2.5214022791453322e-06, "loss": 2.0827, "step": 25387 }, { "epoch": 0.8191147201816456, "grad_norm": 0.34375, "learning_rate": 2.5205290579752616e-06, "loss": 2.0725, "step": 25388 }, { "epoch": 0.8191469840354418, "grad_norm": 0.3515625, "learning_rate": 2.5196559741702553e-06, "loss": 2.065, "step": 25389 }, { "epoch": 0.8191792478892382, "grad_norm": 0.341796875, "learning_rate": 2.5187830277399204e-06, "loss": 2.0627, "step": 25390 }, { "epoch": 0.8192115117430345, "grad_norm": 0.34375, "learning_rate": 2.517910218693873e-06, "loss": 2.0399, "step": 25391 }, { "epoch": 0.8192437755968309, "grad_norm": 0.345703125, "learning_rate": 2.5170375470417085e-06, "loss": 2.0645, "step": 25392 }, { "epoch": 0.8192760394506272, "grad_norm": 0.33203125, "learning_rate": 2.516165012793041e-06, "loss": 2.0316, "step": 25393 }, { "epoch": 0.8193083033044236, "grad_norm": 0.33203125, "learning_rate": 2.515292615957479e-06, "loss": 2.0392, "step": 25394 }, { "epoch": 0.8193405671582199, "grad_norm": 0.353515625, "learning_rate": 2.5144203565446123e-06, "loss": 2.0793, "step": 25395 }, { "epoch": 0.8193728310120163, "grad_norm": 0.33984375, "learning_rate": 2.5135482345640494e-06, "loss": 2.03, "step": 25396 }, { "epoch": 0.8194050948658126, "grad_norm": 0.353515625, "learning_rate": 2.5126762500253976e-06, "loss": 2.0521, "step": 25397 }, { "epoch": 0.819437358719609, "grad_norm": 0.34765625, "learning_rate": 2.5118044029382392e-06, "loss": 2.0754, "step": 25398 }, { "epoch": 0.8194696225734053, "grad_norm": 0.36328125, "learning_rate": 2.5109326933121785e-06, "loss": 2.041, "step": 25399 }, { "epoch": 0.8195018864272017, "grad_norm": 0.349609375, "learning_rate": 2.510061121156819e-06, "loss": 2.0978, "step": 25400 }, { "epoch": 0.819534150280998, "grad_norm": 0.337890625, "learning_rate": 2.5091896864817387e-06, "loss": 2.1326, "step": 25401 }, { "epoch": 0.8195664141347944, "grad_norm": 0.33984375, "learning_rate": 2.5083183892965427e-06, "loss": 2.0887, "step": 25402 }, { "epoch": 0.8195986779885907, "grad_norm": 0.421875, "learning_rate": 2.5074472296108154e-06, "loss": 2.1025, "step": 25403 }, { "epoch": 0.819630941842387, "grad_norm": 0.34765625, "learning_rate": 2.506576207434142e-06, "loss": 2.1369, "step": 25404 }, { "epoch": 0.8196632056961833, "grad_norm": 0.353515625, "learning_rate": 2.5057053227761202e-06, "loss": 2.1524, "step": 25405 }, { "epoch": 0.8196954695499797, "grad_norm": 0.375, "learning_rate": 2.5048345756463305e-06, "loss": 2.2144, "step": 25406 }, { "epoch": 0.819727733403776, "grad_norm": 0.34765625, "learning_rate": 2.5039639660543533e-06, "loss": 2.2149, "step": 25407 }, { "epoch": 0.8197599972575724, "grad_norm": 0.337890625, "learning_rate": 2.5030934940097817e-06, "loss": 2.2079, "step": 25408 }, { "epoch": 0.8197922611113688, "grad_norm": 0.337890625, "learning_rate": 2.5022231595221905e-06, "loss": 2.1574, "step": 25409 }, { "epoch": 0.8198245249651651, "grad_norm": 0.337890625, "learning_rate": 2.501352962601156e-06, "loss": 2.1417, "step": 25410 }, { "epoch": 0.8198567888189615, "grad_norm": 0.330078125, "learning_rate": 2.5004829032562672e-06, "loss": 2.1538, "step": 25411 }, { "epoch": 0.8198890526727578, "grad_norm": 0.330078125, "learning_rate": 2.499612981497094e-06, "loss": 2.157, "step": 25412 }, { "epoch": 0.8199213165265542, "grad_norm": 0.341796875, "learning_rate": 2.4987431973332115e-06, "loss": 2.1549, "step": 25413 }, { "epoch": 0.8199535803803505, "grad_norm": 0.34375, "learning_rate": 2.4978735507741977e-06, "loss": 2.1082, "step": 25414 }, { "epoch": 0.8199858442341469, "grad_norm": 0.349609375, "learning_rate": 2.4970040418296194e-06, "loss": 2.0396, "step": 25415 }, { "epoch": 0.8200181080879432, "grad_norm": 0.359375, "learning_rate": 2.4961346705090544e-06, "loss": 2.1235, "step": 25416 }, { "epoch": 0.8200503719417396, "grad_norm": 0.3515625, "learning_rate": 2.495265436822069e-06, "loss": 2.1125, "step": 25417 }, { "epoch": 0.8200826357955359, "grad_norm": 0.35546875, "learning_rate": 2.494396340778229e-06, "loss": 2.0908, "step": 25418 }, { "epoch": 0.8201148996493323, "grad_norm": 0.345703125, "learning_rate": 2.493527382387103e-06, "loss": 2.0883, "step": 25419 }, { "epoch": 0.8201471635031286, "grad_norm": 0.34375, "learning_rate": 2.492658561658257e-06, "loss": 2.0954, "step": 25420 }, { "epoch": 0.820179427356925, "grad_norm": 0.3515625, "learning_rate": 2.4917898786012497e-06, "loss": 2.1469, "step": 25421 }, { "epoch": 0.8202116912107212, "grad_norm": 0.341796875, "learning_rate": 2.4909213332256525e-06, "loss": 2.1552, "step": 25422 }, { "epoch": 0.8202439550645176, "grad_norm": 0.33203125, "learning_rate": 2.490052925541014e-06, "loss": 2.1428, "step": 25423 }, { "epoch": 0.8202762189183139, "grad_norm": 0.333984375, "learning_rate": 2.489184655556896e-06, "loss": 2.1578, "step": 25424 }, { "epoch": 0.8203084827721103, "grad_norm": 0.322265625, "learning_rate": 2.4883165232828665e-06, "loss": 2.1715, "step": 25425 }, { "epoch": 0.8203407466259066, "grad_norm": 0.328125, "learning_rate": 2.4874485287284644e-06, "loss": 2.1388, "step": 25426 }, { "epoch": 0.820373010479703, "grad_norm": 0.33203125, "learning_rate": 2.4865806719032534e-06, "loss": 2.1291, "step": 25427 }, { "epoch": 0.8204052743334994, "grad_norm": 0.34375, "learning_rate": 2.4857129528167915e-06, "loss": 2.0913, "step": 25428 }, { "epoch": 0.8204375381872957, "grad_norm": 0.33203125, "learning_rate": 2.4848453714786175e-06, "loss": 2.1718, "step": 25429 }, { "epoch": 0.8204698020410921, "grad_norm": 0.3515625, "learning_rate": 2.4839779278982915e-06, "loss": 2.1642, "step": 25430 }, { "epoch": 0.8205020658948884, "grad_norm": 0.349609375, "learning_rate": 2.4831106220853566e-06, "loss": 2.1448, "step": 25431 }, { "epoch": 0.8205343297486848, "grad_norm": 0.357421875, "learning_rate": 2.4822434540493566e-06, "loss": 2.1634, "step": 25432 }, { "epoch": 0.8205665936024811, "grad_norm": 0.341796875, "learning_rate": 2.4813764237998454e-06, "loss": 2.1405, "step": 25433 }, { "epoch": 0.8205988574562775, "grad_norm": 0.34765625, "learning_rate": 2.4805095313463604e-06, "loss": 2.1198, "step": 25434 }, { "epoch": 0.8206311213100738, "grad_norm": 0.33984375, "learning_rate": 2.479642776698441e-06, "loss": 2.1537, "step": 25435 }, { "epoch": 0.8206633851638702, "grad_norm": 0.345703125, "learning_rate": 2.478776159865637e-06, "loss": 2.0912, "step": 25436 }, { "epoch": 0.8206956490176665, "grad_norm": 0.33984375, "learning_rate": 2.4779096808574802e-06, "loss": 2.1764, "step": 25437 }, { "epoch": 0.8207279128714629, "grad_norm": 0.33203125, "learning_rate": 2.477043339683509e-06, "loss": 2.145, "step": 25438 }, { "epoch": 0.8207601767252591, "grad_norm": 0.32421875, "learning_rate": 2.4761771363532625e-06, "loss": 2.1285, "step": 25439 }, { "epoch": 0.8207924405790555, "grad_norm": 0.337890625, "learning_rate": 2.475311070876275e-06, "loss": 2.1617, "step": 25440 }, { "epoch": 0.8208247044328518, "grad_norm": 0.337890625, "learning_rate": 2.4744451432620736e-06, "loss": 2.1613, "step": 25441 }, { "epoch": 0.8208569682866482, "grad_norm": 0.482421875, "learning_rate": 2.473579353520199e-06, "loss": 2.1284, "step": 25442 }, { "epoch": 0.8208892321404445, "grad_norm": 0.3671875, "learning_rate": 2.4727137016601755e-06, "loss": 2.1603, "step": 25443 }, { "epoch": 0.8209214959942409, "grad_norm": 0.3515625, "learning_rate": 2.4718481876915285e-06, "loss": 2.1547, "step": 25444 }, { "epoch": 0.8209537598480372, "grad_norm": 0.333984375, "learning_rate": 2.4709828116237938e-06, "loss": 2.1546, "step": 25445 }, { "epoch": 0.8209860237018336, "grad_norm": 0.333984375, "learning_rate": 2.4701175734664873e-06, "loss": 2.1228, "step": 25446 }, { "epoch": 0.8210182875556299, "grad_norm": 0.369140625, "learning_rate": 2.469252473229143e-06, "loss": 2.2194, "step": 25447 }, { "epoch": 0.8210505514094263, "grad_norm": 0.341796875, "learning_rate": 2.468387510921276e-06, "loss": 2.1524, "step": 25448 }, { "epoch": 0.8210828152632227, "grad_norm": 0.34375, "learning_rate": 2.4675226865524065e-06, "loss": 2.1755, "step": 25449 }, { "epoch": 0.821115079117019, "grad_norm": 0.345703125, "learning_rate": 2.4666580001320594e-06, "loss": 2.1628, "step": 25450 }, { "epoch": 0.8211473429708154, "grad_norm": 0.337890625, "learning_rate": 2.4657934516697505e-06, "loss": 2.1792, "step": 25451 }, { "epoch": 0.8211796068246117, "grad_norm": 0.33984375, "learning_rate": 2.4649290411749908e-06, "loss": 2.1927, "step": 25452 }, { "epoch": 0.8212118706784081, "grad_norm": 0.3359375, "learning_rate": 2.4640647686573077e-06, "loss": 2.1659, "step": 25453 }, { "epoch": 0.8212441345322044, "grad_norm": 0.341796875, "learning_rate": 2.463200634126198e-06, "loss": 2.1655, "step": 25454 }, { "epoch": 0.8212763983860007, "grad_norm": 0.359375, "learning_rate": 2.462336637591183e-06, "loss": 2.1763, "step": 25455 }, { "epoch": 0.821308662239797, "grad_norm": 0.341796875, "learning_rate": 2.4614727790617785e-06, "loss": 2.1563, "step": 25456 }, { "epoch": 0.8213409260935934, "grad_norm": 0.3515625, "learning_rate": 2.46060905854748e-06, "loss": 2.1089, "step": 25457 }, { "epoch": 0.8213731899473897, "grad_norm": 0.34375, "learning_rate": 2.4597454760577996e-06, "loss": 2.1036, "step": 25458 }, { "epoch": 0.8214054538011861, "grad_norm": 0.353515625, "learning_rate": 2.4588820316022536e-06, "loss": 2.1551, "step": 25459 }, { "epoch": 0.8214377176549824, "grad_norm": 0.3359375, "learning_rate": 2.4580187251903292e-06, "loss": 2.1791, "step": 25460 }, { "epoch": 0.8214699815087788, "grad_norm": 0.33984375, "learning_rate": 2.4571555568315417e-06, "loss": 2.1561, "step": 25461 }, { "epoch": 0.8215022453625751, "grad_norm": 0.328125, "learning_rate": 2.4562925265353857e-06, "loss": 2.2186, "step": 25462 }, { "epoch": 0.8215345092163715, "grad_norm": 0.32421875, "learning_rate": 2.4554296343113604e-06, "loss": 2.1662, "step": 25463 }, { "epoch": 0.8215667730701678, "grad_norm": 0.34765625, "learning_rate": 2.4545668801689707e-06, "loss": 2.1381, "step": 25464 }, { "epoch": 0.8215990369239642, "grad_norm": 0.33203125, "learning_rate": 2.4537042641177064e-06, "loss": 2.1745, "step": 25465 }, { "epoch": 0.8216313007777605, "grad_norm": 0.33984375, "learning_rate": 2.452841786167063e-06, "loss": 2.1818, "step": 25466 }, { "epoch": 0.8216635646315569, "grad_norm": 0.3203125, "learning_rate": 2.45197944632654e-06, "loss": 2.1556, "step": 25467 }, { "epoch": 0.8216958284853532, "grad_norm": 0.34375, "learning_rate": 2.451117244605624e-06, "loss": 2.1677, "step": 25468 }, { "epoch": 0.8217280923391496, "grad_norm": 0.337890625, "learning_rate": 2.4502551810138045e-06, "loss": 2.104, "step": 25469 }, { "epoch": 0.821760356192946, "grad_norm": 0.34765625, "learning_rate": 2.4493932555605757e-06, "loss": 2.0997, "step": 25470 }, { "epoch": 0.8217926200467423, "grad_norm": 0.345703125, "learning_rate": 2.4485314682554217e-06, "loss": 2.1735, "step": 25471 }, { "epoch": 0.8218248839005386, "grad_norm": 0.326171875, "learning_rate": 2.4476698191078262e-06, "loss": 2.1333, "step": 25472 }, { "epoch": 0.8218571477543349, "grad_norm": 0.3359375, "learning_rate": 2.4468083081272786e-06, "loss": 2.1913, "step": 25473 }, { "epoch": 0.8218894116081313, "grad_norm": 0.330078125, "learning_rate": 2.445946935323261e-06, "loss": 2.2287, "step": 25474 }, { "epoch": 0.8219216754619276, "grad_norm": 0.384765625, "learning_rate": 2.4450857007052497e-06, "loss": 2.207, "step": 25475 }, { "epoch": 0.821953939315724, "grad_norm": 0.33203125, "learning_rate": 2.44422460428273e-06, "loss": 2.1561, "step": 25476 }, { "epoch": 0.8219862031695203, "grad_norm": 0.33203125, "learning_rate": 2.443363646065181e-06, "loss": 2.1923, "step": 25477 }, { "epoch": 0.8220184670233167, "grad_norm": 0.33203125, "learning_rate": 2.442502826062072e-06, "loss": 2.1587, "step": 25478 }, { "epoch": 0.822050730877113, "grad_norm": 0.326171875, "learning_rate": 2.4416421442828853e-06, "loss": 2.168, "step": 25479 }, { "epoch": 0.8220829947309094, "grad_norm": 0.322265625, "learning_rate": 2.4407816007370916e-06, "loss": 2.2041, "step": 25480 }, { "epoch": 0.8221152585847057, "grad_norm": 0.333984375, "learning_rate": 2.439921195434165e-06, "loss": 2.1534, "step": 25481 }, { "epoch": 0.8221475224385021, "grad_norm": 0.333984375, "learning_rate": 2.439060928383578e-06, "loss": 2.2104, "step": 25482 }, { "epoch": 0.8221797862922984, "grad_norm": 0.328125, "learning_rate": 2.4382007995947915e-06, "loss": 2.1578, "step": 25483 }, { "epoch": 0.8222120501460948, "grad_norm": 0.326171875, "learning_rate": 2.4373408090772876e-06, "loss": 2.1705, "step": 25484 }, { "epoch": 0.8222443139998911, "grad_norm": 0.322265625, "learning_rate": 2.436480956840514e-06, "loss": 2.2083, "step": 25485 }, { "epoch": 0.8222765778536875, "grad_norm": 0.32421875, "learning_rate": 2.4356212428939478e-06, "loss": 2.174, "step": 25486 }, { "epoch": 0.8223088417074838, "grad_norm": 0.330078125, "learning_rate": 2.4347616672470553e-06, "loss": 2.1352, "step": 25487 }, { "epoch": 0.8223411055612802, "grad_norm": 0.33984375, "learning_rate": 2.433902229909285e-06, "loss": 2.1809, "step": 25488 }, { "epoch": 0.8223733694150765, "grad_norm": 0.33984375, "learning_rate": 2.4330429308901032e-06, "loss": 2.1701, "step": 25489 }, { "epoch": 0.8224056332688728, "grad_norm": 0.333984375, "learning_rate": 2.432183770198977e-06, "loss": 2.1462, "step": 25490 }, { "epoch": 0.8224378971226692, "grad_norm": 0.345703125, "learning_rate": 2.4313247478453486e-06, "loss": 2.1688, "step": 25491 }, { "epoch": 0.8224701609764655, "grad_norm": 0.33203125, "learning_rate": 2.4304658638386844e-06, "loss": 2.147, "step": 25492 }, { "epoch": 0.8225024248302619, "grad_norm": 0.33203125, "learning_rate": 2.429607118188435e-06, "loss": 2.1668, "step": 25493 }, { "epoch": 0.8225346886840582, "grad_norm": 0.33984375, "learning_rate": 2.428748510904049e-06, "loss": 2.1553, "step": 25494 }, { "epoch": 0.8225669525378546, "grad_norm": 0.33984375, "learning_rate": 2.4278900419949845e-06, "loss": 2.2108, "step": 25495 }, { "epoch": 0.8225992163916509, "grad_norm": 0.34765625, "learning_rate": 2.427031711470687e-06, "loss": 2.1914, "step": 25496 }, { "epoch": 0.8226314802454473, "grad_norm": 0.333984375, "learning_rate": 2.4261735193406003e-06, "loss": 2.1796, "step": 25497 }, { "epoch": 0.8226637440992436, "grad_norm": 0.337890625, "learning_rate": 2.425315465614179e-06, "loss": 2.1981, "step": 25498 }, { "epoch": 0.82269600795304, "grad_norm": 0.337890625, "learning_rate": 2.4244575503008638e-06, "loss": 2.1628, "step": 25499 }, { "epoch": 0.8227282718068363, "grad_norm": 0.326171875, "learning_rate": 2.423599773410095e-06, "loss": 2.1585, "step": 25500 }, { "epoch": 0.8227605356606327, "grad_norm": 0.341796875, "learning_rate": 2.4227421349513208e-06, "loss": 2.1477, "step": 25501 }, { "epoch": 0.822792799514429, "grad_norm": 0.3359375, "learning_rate": 2.4218846349339796e-06, "loss": 2.1669, "step": 25502 }, { "epoch": 0.8228250633682254, "grad_norm": 0.33984375, "learning_rate": 2.4210272733675042e-06, "loss": 2.1336, "step": 25503 }, { "epoch": 0.8228573272220217, "grad_norm": 0.333984375, "learning_rate": 2.4201700502613407e-06, "loss": 2.1468, "step": 25504 }, { "epoch": 0.822889591075818, "grad_norm": 0.333984375, "learning_rate": 2.419312965624919e-06, "loss": 2.1862, "step": 25505 }, { "epoch": 0.8229218549296143, "grad_norm": 0.328125, "learning_rate": 2.4184560194676723e-06, "loss": 2.1648, "step": 25506 }, { "epoch": 0.8229541187834107, "grad_norm": 0.328125, "learning_rate": 2.41759921179904e-06, "loss": 2.1675, "step": 25507 }, { "epoch": 0.822986382637207, "grad_norm": 0.337890625, "learning_rate": 2.4167425426284485e-06, "loss": 2.1347, "step": 25508 }, { "epoch": 0.8230186464910034, "grad_norm": 0.341796875, "learning_rate": 2.4158860119653226e-06, "loss": 2.177, "step": 25509 }, { "epoch": 0.8230509103447998, "grad_norm": 0.326171875, "learning_rate": 2.4150296198191013e-06, "loss": 2.1889, "step": 25510 }, { "epoch": 0.8230831741985961, "grad_norm": 0.35546875, "learning_rate": 2.4141733661992055e-06, "loss": 2.1982, "step": 25511 }, { "epoch": 0.8231154380523925, "grad_norm": 0.427734375, "learning_rate": 2.413317251115059e-06, "loss": 2.1326, "step": 25512 }, { "epoch": 0.8231477019061888, "grad_norm": 0.330078125, "learning_rate": 2.412461274576085e-06, "loss": 2.1806, "step": 25513 }, { "epoch": 0.8231799657599852, "grad_norm": 0.33984375, "learning_rate": 2.411605436591705e-06, "loss": 2.1936, "step": 25514 }, { "epoch": 0.8232122296137815, "grad_norm": 0.33984375, "learning_rate": 2.410749737171349e-06, "loss": 2.1776, "step": 25515 }, { "epoch": 0.8232444934675779, "grad_norm": 0.328125, "learning_rate": 2.409894176324422e-06, "loss": 2.202, "step": 25516 }, { "epoch": 0.8232767573213742, "grad_norm": 0.388671875, "learning_rate": 2.409038754060347e-06, "loss": 2.1566, "step": 25517 }, { "epoch": 0.8233090211751706, "grad_norm": 0.376953125, "learning_rate": 2.408183470388548e-06, "loss": 2.1198, "step": 25518 }, { "epoch": 0.8233412850289669, "grad_norm": 0.333984375, "learning_rate": 2.407328325318426e-06, "loss": 2.175, "step": 25519 }, { "epoch": 0.8233735488827633, "grad_norm": 0.337890625, "learning_rate": 2.4064733188593998e-06, "loss": 2.1715, "step": 25520 }, { "epoch": 0.8234058127365596, "grad_norm": 0.33984375, "learning_rate": 2.4056184510208866e-06, "loss": 2.1954, "step": 25521 }, { "epoch": 0.823438076590356, "grad_norm": 0.341796875, "learning_rate": 2.4047637218122833e-06, "loss": 2.1684, "step": 25522 }, { "epoch": 0.8234703404441522, "grad_norm": 0.345703125, "learning_rate": 2.4039091312430097e-06, "loss": 2.1923, "step": 25523 }, { "epoch": 0.8235026042979486, "grad_norm": 0.35546875, "learning_rate": 2.4030546793224667e-06, "loss": 2.1421, "step": 25524 }, { "epoch": 0.8235348681517449, "grad_norm": 0.37890625, "learning_rate": 2.4022003660600588e-06, "loss": 2.0502, "step": 25525 }, { "epoch": 0.8235671320055413, "grad_norm": 0.36328125, "learning_rate": 2.4013461914651936e-06, "loss": 2.0628, "step": 25526 }, { "epoch": 0.8235993958593376, "grad_norm": 0.361328125, "learning_rate": 2.4004921555472715e-06, "loss": 1.9952, "step": 25527 }, { "epoch": 0.823631659713134, "grad_norm": 0.3671875, "learning_rate": 2.39963825831569e-06, "loss": 2.047, "step": 25528 }, { "epoch": 0.8236639235669304, "grad_norm": 0.35546875, "learning_rate": 2.398784499779855e-06, "loss": 2.0594, "step": 25529 }, { "epoch": 0.8236961874207267, "grad_norm": 0.35546875, "learning_rate": 2.3979308799491595e-06, "loss": 2.0804, "step": 25530 }, { "epoch": 0.8237284512745231, "grad_norm": 0.33984375, "learning_rate": 2.397077398832997e-06, "loss": 2.0663, "step": 25531 }, { "epoch": 0.8237607151283194, "grad_norm": 0.349609375, "learning_rate": 2.3962240564407685e-06, "loss": 2.0805, "step": 25532 }, { "epoch": 0.8237929789821158, "grad_norm": 0.36328125, "learning_rate": 2.3953708527818628e-06, "loss": 2.0716, "step": 25533 }, { "epoch": 0.8238252428359121, "grad_norm": 0.349609375, "learning_rate": 2.3945177878656697e-06, "loss": 2.0519, "step": 25534 }, { "epoch": 0.8238575066897085, "grad_norm": 0.37109375, "learning_rate": 2.3936648617015868e-06, "loss": 2.03, "step": 25535 }, { "epoch": 0.8238897705435048, "grad_norm": 0.384765625, "learning_rate": 2.392812074298994e-06, "loss": 1.9998, "step": 25536 }, { "epoch": 0.8239220343973012, "grad_norm": 0.376953125, "learning_rate": 2.391959425667281e-06, "loss": 1.9947, "step": 25537 }, { "epoch": 0.8239542982510975, "grad_norm": 0.380859375, "learning_rate": 2.3911069158158355e-06, "loss": 1.9709, "step": 25538 }, { "epoch": 0.8239865621048938, "grad_norm": 0.373046875, "learning_rate": 2.39025454475404e-06, "loss": 2.033, "step": 25539 }, { "epoch": 0.8240188259586901, "grad_norm": 0.357421875, "learning_rate": 2.389402312491273e-06, "loss": 1.9801, "step": 25540 }, { "epoch": 0.8240510898124865, "grad_norm": 0.375, "learning_rate": 2.3885502190369207e-06, "loss": 1.9546, "step": 25541 }, { "epoch": 0.8240833536662828, "grad_norm": 0.408203125, "learning_rate": 2.387698264400361e-06, "loss": 1.995, "step": 25542 }, { "epoch": 0.8241156175200792, "grad_norm": 0.369140625, "learning_rate": 2.3868464485909708e-06, "loss": 1.9718, "step": 25543 }, { "epoch": 0.8241478813738755, "grad_norm": 0.361328125, "learning_rate": 2.385994771618123e-06, "loss": 1.9601, "step": 25544 }, { "epoch": 0.8241801452276719, "grad_norm": 0.392578125, "learning_rate": 2.3851432334911998e-06, "loss": 1.9933, "step": 25545 }, { "epoch": 0.8242124090814682, "grad_norm": 0.390625, "learning_rate": 2.384291834219569e-06, "loss": 2.0124, "step": 25546 }, { "epoch": 0.8242446729352646, "grad_norm": 0.357421875, "learning_rate": 2.3834405738126e-06, "loss": 1.9832, "step": 25547 }, { "epoch": 0.8242769367890609, "grad_norm": 0.37890625, "learning_rate": 2.382589452279664e-06, "loss": 1.9734, "step": 25548 }, { "epoch": 0.8243092006428573, "grad_norm": 0.396484375, "learning_rate": 2.381738469630142e-06, "loss": 1.9638, "step": 25549 }, { "epoch": 0.8243414644966537, "grad_norm": 0.400390625, "learning_rate": 2.380887625873382e-06, "loss": 1.9798, "step": 25550 }, { "epoch": 0.82437372835045, "grad_norm": 0.365234375, "learning_rate": 2.3800369210187585e-06, "loss": 1.9711, "step": 25551 }, { "epoch": 0.8244059922042464, "grad_norm": 0.3671875, "learning_rate": 2.379186355075642e-06, "loss": 1.9792, "step": 25552 }, { "epoch": 0.8244382560580427, "grad_norm": 0.396484375, "learning_rate": 2.3783359280533805e-06, "loss": 1.9579, "step": 25553 }, { "epoch": 0.8244705199118391, "grad_norm": 0.380859375, "learning_rate": 2.3774856399613466e-06, "loss": 1.9123, "step": 25554 }, { "epoch": 0.8245027837656353, "grad_norm": 0.375, "learning_rate": 2.376635490808894e-06, "loss": 1.9567, "step": 25555 }, { "epoch": 0.8245350476194317, "grad_norm": 0.365234375, "learning_rate": 2.3757854806053804e-06, "loss": 1.9241, "step": 25556 }, { "epoch": 0.824567311473228, "grad_norm": 0.373046875, "learning_rate": 2.374935609360165e-06, "loss": 1.898, "step": 25557 }, { "epoch": 0.8245995753270244, "grad_norm": 0.384765625, "learning_rate": 2.374085877082602e-06, "loss": 1.9547, "step": 25558 }, { "epoch": 0.8246318391808207, "grad_norm": 0.380859375, "learning_rate": 2.3732362837820422e-06, "loss": 1.8999, "step": 25559 }, { "epoch": 0.8246641030346171, "grad_norm": 0.396484375, "learning_rate": 2.3723868294678395e-06, "loss": 1.9734, "step": 25560 }, { "epoch": 0.8246963668884134, "grad_norm": 0.37109375, "learning_rate": 2.3715375141493455e-06, "loss": 1.9935, "step": 25561 }, { "epoch": 0.8247286307422098, "grad_norm": 0.392578125, "learning_rate": 2.370688337835904e-06, "loss": 2.0103, "step": 25562 }, { "epoch": 0.8247608945960061, "grad_norm": 0.3671875, "learning_rate": 2.3698393005368673e-06, "loss": 1.968, "step": 25563 }, { "epoch": 0.8247931584498025, "grad_norm": 0.353515625, "learning_rate": 2.3689904022615784e-06, "loss": 1.9651, "step": 25564 }, { "epoch": 0.8248254223035988, "grad_norm": 0.361328125, "learning_rate": 2.368141643019379e-06, "loss": 1.963, "step": 25565 }, { "epoch": 0.8248576861573952, "grad_norm": 0.3671875, "learning_rate": 2.3672930228196175e-06, "loss": 1.982, "step": 25566 }, { "epoch": 0.8248899500111915, "grad_norm": 0.37109375, "learning_rate": 2.3664445416716328e-06, "loss": 1.9891, "step": 25567 }, { "epoch": 0.8249222138649879, "grad_norm": 0.390625, "learning_rate": 2.3655961995847574e-06, "loss": 1.939, "step": 25568 }, { "epoch": 0.8249544777187842, "grad_norm": 0.396484375, "learning_rate": 2.3647479965683405e-06, "loss": 1.9539, "step": 25569 }, { "epoch": 0.8249867415725806, "grad_norm": 0.365234375, "learning_rate": 2.363899932631713e-06, "loss": 1.9329, "step": 25570 }, { "epoch": 0.825019005426377, "grad_norm": 0.435546875, "learning_rate": 2.363052007784207e-06, "loss": 1.8884, "step": 25571 }, { "epoch": 0.8250512692801732, "grad_norm": 0.40234375, "learning_rate": 2.3622042220351606e-06, "loss": 1.8718, "step": 25572 }, { "epoch": 0.8250835331339696, "grad_norm": 0.392578125, "learning_rate": 2.3613565753939047e-06, "loss": 1.927, "step": 25573 }, { "epoch": 0.8251157969877659, "grad_norm": 0.3671875, "learning_rate": 2.36050906786977e-06, "loss": 1.9563, "step": 25574 }, { "epoch": 0.8251480608415623, "grad_norm": 0.359375, "learning_rate": 2.3596616994720787e-06, "loss": 1.9499, "step": 25575 }, { "epoch": 0.8251803246953586, "grad_norm": 0.361328125, "learning_rate": 2.358814470210167e-06, "loss": 1.9924, "step": 25576 }, { "epoch": 0.825212588549155, "grad_norm": 0.3515625, "learning_rate": 2.357967380093358e-06, "loss": 2.0083, "step": 25577 }, { "epoch": 0.8252448524029513, "grad_norm": 0.3515625, "learning_rate": 2.3571204291309697e-06, "loss": 1.9518, "step": 25578 }, { "epoch": 0.8252771162567477, "grad_norm": 0.357421875, "learning_rate": 2.35627361733233e-06, "loss": 1.9862, "step": 25579 }, { "epoch": 0.825309380110544, "grad_norm": 0.375, "learning_rate": 2.355426944706767e-06, "loss": 2.0052, "step": 25580 }, { "epoch": 0.8253416439643404, "grad_norm": 0.357421875, "learning_rate": 2.3545804112635866e-06, "loss": 2.0135, "step": 25581 }, { "epoch": 0.8253739078181367, "grad_norm": 0.349609375, "learning_rate": 2.3537340170121126e-06, "loss": 1.9594, "step": 25582 }, { "epoch": 0.8254061716719331, "grad_norm": 0.359375, "learning_rate": 2.3528877619616707e-06, "loss": 2.0115, "step": 25583 }, { "epoch": 0.8254384355257294, "grad_norm": 0.341796875, "learning_rate": 2.3520416461215587e-06, "loss": 1.9952, "step": 25584 }, { "epoch": 0.8254706993795258, "grad_norm": 0.361328125, "learning_rate": 2.351195669501103e-06, "loss": 1.9808, "step": 25585 }, { "epoch": 0.8255029632333221, "grad_norm": 0.353515625, "learning_rate": 2.35034983210961e-06, "loss": 1.973, "step": 25586 }, { "epoch": 0.8255352270871185, "grad_norm": 0.357421875, "learning_rate": 2.3495041339563885e-06, "loss": 1.978, "step": 25587 }, { "epoch": 0.8255674909409147, "grad_norm": 0.3515625, "learning_rate": 2.348658575050753e-06, "loss": 1.9758, "step": 25588 }, { "epoch": 0.8255997547947111, "grad_norm": 0.361328125, "learning_rate": 2.3478131554020075e-06, "loss": 1.9392, "step": 25589 }, { "epoch": 0.8256320186485075, "grad_norm": 0.353515625, "learning_rate": 2.3469678750194557e-06, "loss": 1.9003, "step": 25590 }, { "epoch": 0.8256642825023038, "grad_norm": 0.37109375, "learning_rate": 2.346122733912408e-06, "loss": 1.9281, "step": 25591 }, { "epoch": 0.8256965463561002, "grad_norm": 0.3671875, "learning_rate": 2.3452777320901623e-06, "loss": 1.9317, "step": 25592 }, { "epoch": 0.8257288102098965, "grad_norm": 0.384765625, "learning_rate": 2.3444328695620183e-06, "loss": 1.9421, "step": 25593 }, { "epoch": 0.8257610740636929, "grad_norm": 0.369140625, "learning_rate": 2.343588146337281e-06, "loss": 1.9399, "step": 25594 }, { "epoch": 0.8257933379174892, "grad_norm": 0.396484375, "learning_rate": 2.342743562425247e-06, "loss": 1.9084, "step": 25595 }, { "epoch": 0.8258256017712856, "grad_norm": 0.37109375, "learning_rate": 2.3418991178352067e-06, "loss": 1.9612, "step": 25596 }, { "epoch": 0.8258578656250819, "grad_norm": 0.365234375, "learning_rate": 2.3410548125764647e-06, "loss": 1.9565, "step": 25597 }, { "epoch": 0.8258901294788783, "grad_norm": 0.37890625, "learning_rate": 2.3402106466583084e-06, "loss": 1.9075, "step": 25598 }, { "epoch": 0.8259223933326746, "grad_norm": 0.400390625, "learning_rate": 2.3393666200900286e-06, "loss": 1.8697, "step": 25599 }, { "epoch": 0.825954657186471, "grad_norm": 0.357421875, "learning_rate": 2.3385227328809215e-06, "loss": 1.9351, "step": 25600 }, { "epoch": 0.8259869210402673, "grad_norm": 0.373046875, "learning_rate": 2.337678985040273e-06, "loss": 1.9108, "step": 25601 }, { "epoch": 0.8260191848940637, "grad_norm": 0.37890625, "learning_rate": 2.336835376577367e-06, "loss": 1.9667, "step": 25602 }, { "epoch": 0.82605144874786, "grad_norm": 0.373046875, "learning_rate": 2.335991907501496e-06, "loss": 1.9286, "step": 25603 }, { "epoch": 0.8260837126016564, "grad_norm": 0.365234375, "learning_rate": 2.3351485778219415e-06, "loss": 1.9283, "step": 25604 }, { "epoch": 0.8261159764554526, "grad_norm": 0.375, "learning_rate": 2.334305387547984e-06, "loss": 1.9224, "step": 25605 }, { "epoch": 0.826148240309249, "grad_norm": 0.36328125, "learning_rate": 2.3334623366889045e-06, "loss": 1.9077, "step": 25606 }, { "epoch": 0.8261805041630453, "grad_norm": 0.359375, "learning_rate": 2.332619425253989e-06, "loss": 1.9004, "step": 25607 }, { "epoch": 0.8262127680168417, "grad_norm": 0.359375, "learning_rate": 2.3317766532525096e-06, "loss": 1.9799, "step": 25608 }, { "epoch": 0.826245031870638, "grad_norm": 0.35546875, "learning_rate": 2.3309340206937413e-06, "loss": 2.02, "step": 25609 }, { "epoch": 0.8262772957244344, "grad_norm": 0.375, "learning_rate": 2.330091527586966e-06, "loss": 1.9392, "step": 25610 }, { "epoch": 0.8263095595782308, "grad_norm": 0.359375, "learning_rate": 2.329249173941455e-06, "loss": 1.9946, "step": 25611 }, { "epoch": 0.8263418234320271, "grad_norm": 0.353515625, "learning_rate": 2.328406959766474e-06, "loss": 2.0219, "step": 25612 }, { "epoch": 0.8263740872858235, "grad_norm": 0.349609375, "learning_rate": 2.3275648850713045e-06, "loss": 2.0077, "step": 25613 }, { "epoch": 0.8264063511396198, "grad_norm": 0.36328125, "learning_rate": 2.326722949865208e-06, "loss": 2.008, "step": 25614 }, { "epoch": 0.8264386149934162, "grad_norm": 0.345703125, "learning_rate": 2.32588115415745e-06, "loss": 2.0487, "step": 25615 }, { "epoch": 0.8264708788472125, "grad_norm": 0.353515625, "learning_rate": 2.3250394979573037e-06, "loss": 2.0305, "step": 25616 }, { "epoch": 0.8265031427010089, "grad_norm": 0.359375, "learning_rate": 2.324197981274029e-06, "loss": 2.0369, "step": 25617 }, { "epoch": 0.8265354065548052, "grad_norm": 0.359375, "learning_rate": 2.3233566041168865e-06, "loss": 1.9967, "step": 25618 }, { "epoch": 0.8265676704086016, "grad_norm": 0.357421875, "learning_rate": 2.3225153664951435e-06, "loss": 2.0613, "step": 25619 }, { "epoch": 0.8265999342623979, "grad_norm": 0.3515625, "learning_rate": 2.321674268418057e-06, "loss": 2.05, "step": 25620 }, { "epoch": 0.8266321981161943, "grad_norm": 0.35546875, "learning_rate": 2.3208333098948805e-06, "loss": 2.034, "step": 25621 }, { "epoch": 0.8266644619699905, "grad_norm": 0.35546875, "learning_rate": 2.3199924909348778e-06, "loss": 2.0575, "step": 25622 }, { "epoch": 0.826696725823787, "grad_norm": 0.353515625, "learning_rate": 2.3191518115473027e-06, "loss": 2.0901, "step": 25623 }, { "epoch": 0.8267289896775832, "grad_norm": 0.34375, "learning_rate": 2.3183112717414035e-06, "loss": 2.0722, "step": 25624 }, { "epoch": 0.8267612535313796, "grad_norm": 0.345703125, "learning_rate": 2.3174708715264396e-06, "loss": 2.0701, "step": 25625 }, { "epoch": 0.8267935173851759, "grad_norm": 0.369140625, "learning_rate": 2.3166306109116576e-06, "loss": 2.0095, "step": 25626 }, { "epoch": 0.8268257812389723, "grad_norm": 0.376953125, "learning_rate": 2.3157904899063024e-06, "loss": 1.9965, "step": 25627 }, { "epoch": 0.8268580450927686, "grad_norm": 0.359375, "learning_rate": 2.314950508519631e-06, "loss": 1.9668, "step": 25628 }, { "epoch": 0.826890308946565, "grad_norm": 0.39453125, "learning_rate": 2.3141106667608837e-06, "loss": 1.9247, "step": 25629 }, { "epoch": 0.8269225728003614, "grad_norm": 0.369140625, "learning_rate": 2.313270964639301e-06, "loss": 1.9565, "step": 25630 }, { "epoch": 0.8269548366541577, "grad_norm": 0.38671875, "learning_rate": 2.3124314021641354e-06, "loss": 1.947, "step": 25631 }, { "epoch": 0.8269871005079541, "grad_norm": 0.380859375, "learning_rate": 2.311591979344621e-06, "loss": 1.9385, "step": 25632 }, { "epoch": 0.8270193643617504, "grad_norm": 0.365234375, "learning_rate": 2.3107526961899966e-06, "loss": 1.9308, "step": 25633 }, { "epoch": 0.8270516282155468, "grad_norm": 0.380859375, "learning_rate": 2.309913552709508e-06, "loss": 1.9481, "step": 25634 }, { "epoch": 0.8270838920693431, "grad_norm": 0.369140625, "learning_rate": 2.3090745489123854e-06, "loss": 1.9946, "step": 25635 }, { "epoch": 0.8271161559231395, "grad_norm": 0.38671875, "learning_rate": 2.3082356848078658e-06, "loss": 1.9455, "step": 25636 }, { "epoch": 0.8271484197769358, "grad_norm": 0.3671875, "learning_rate": 2.307396960405181e-06, "loss": 1.951, "step": 25637 }, { "epoch": 0.8271806836307322, "grad_norm": 0.361328125, "learning_rate": 2.306558375713567e-06, "loss": 1.9603, "step": 25638 }, { "epoch": 0.8272129474845284, "grad_norm": 0.384765625, "learning_rate": 2.3057199307422523e-06, "loss": 1.9356, "step": 25639 }, { "epoch": 0.8272452113383248, "grad_norm": 0.36328125, "learning_rate": 2.3048816255004617e-06, "loss": 1.9611, "step": 25640 }, { "epoch": 0.8272774751921211, "grad_norm": 0.376953125, "learning_rate": 2.30404345999743e-06, "loss": 1.9627, "step": 25641 }, { "epoch": 0.8273097390459175, "grad_norm": 0.361328125, "learning_rate": 2.303205434242382e-06, "loss": 1.9241, "step": 25642 }, { "epoch": 0.8273420028997138, "grad_norm": 0.34765625, "learning_rate": 2.3023675482445344e-06, "loss": 1.9942, "step": 25643 }, { "epoch": 0.8273742667535102, "grad_norm": 0.36328125, "learning_rate": 2.3015298020131203e-06, "loss": 1.9703, "step": 25644 }, { "epoch": 0.8274065306073065, "grad_norm": 0.373046875, "learning_rate": 2.3006921955573557e-06, "loss": 1.9441, "step": 25645 }, { "epoch": 0.8274387944611029, "grad_norm": 0.3671875, "learning_rate": 2.299854728886458e-06, "loss": 1.9667, "step": 25646 }, { "epoch": 0.8274710583148992, "grad_norm": 0.376953125, "learning_rate": 2.2990174020096517e-06, "loss": 1.9942, "step": 25647 }, { "epoch": 0.8275033221686956, "grad_norm": 0.369140625, "learning_rate": 2.298180214936149e-06, "loss": 1.9752, "step": 25648 }, { "epoch": 0.8275355860224919, "grad_norm": 0.359375, "learning_rate": 2.297343167675165e-06, "loss": 1.9973, "step": 25649 }, { "epoch": 0.8275678498762883, "grad_norm": 0.37890625, "learning_rate": 2.2965062602359167e-06, "loss": 1.9512, "step": 25650 }, { "epoch": 0.8276001137300847, "grad_norm": 0.357421875, "learning_rate": 2.2956694926276135e-06, "loss": 1.953, "step": 25651 }, { "epoch": 0.827632377583881, "grad_norm": 0.37109375, "learning_rate": 2.294832864859463e-06, "loss": 1.9588, "step": 25652 }, { "epoch": 0.8276646414376774, "grad_norm": 0.359375, "learning_rate": 2.293996376940683e-06, "loss": 1.9813, "step": 25653 }, { "epoch": 0.8276969052914737, "grad_norm": 0.35546875, "learning_rate": 2.2931600288804744e-06, "loss": 1.9516, "step": 25654 }, { "epoch": 0.8277291691452701, "grad_norm": 0.455078125, "learning_rate": 2.29232382068804e-06, "loss": 1.8671, "step": 25655 }, { "epoch": 0.8277614329990663, "grad_norm": 0.3828125, "learning_rate": 2.2914877523725937e-06, "loss": 1.9208, "step": 25656 }, { "epoch": 0.8277936968528627, "grad_norm": 0.359375, "learning_rate": 2.2906518239433315e-06, "loss": 1.9587, "step": 25657 }, { "epoch": 0.827825960706659, "grad_norm": 0.361328125, "learning_rate": 2.289816035409455e-06, "loss": 1.978, "step": 25658 }, { "epoch": 0.8278582245604554, "grad_norm": 0.34765625, "learning_rate": 2.2889803867801663e-06, "loss": 1.9905, "step": 25659 }, { "epoch": 0.8278904884142517, "grad_norm": 0.365234375, "learning_rate": 2.2881448780646647e-06, "loss": 2.0342, "step": 25660 }, { "epoch": 0.8279227522680481, "grad_norm": 0.42578125, "learning_rate": 2.2873095092721396e-06, "loss": 1.962, "step": 25661 }, { "epoch": 0.8279550161218444, "grad_norm": 0.453125, "learning_rate": 2.2864742804117973e-06, "loss": 2.0255, "step": 25662 }, { "epoch": 0.8279872799756408, "grad_norm": 0.384765625, "learning_rate": 2.285639191492823e-06, "loss": 2.0061, "step": 25663 }, { "epoch": 0.8280195438294371, "grad_norm": 0.35546875, "learning_rate": 2.284804242524412e-06, "loss": 2.0299, "step": 25664 }, { "epoch": 0.8280518076832335, "grad_norm": 0.34765625, "learning_rate": 2.283969433515751e-06, "loss": 2.0185, "step": 25665 }, { "epoch": 0.8280840715370298, "grad_norm": 0.375, "learning_rate": 2.283134764476035e-06, "loss": 2.0007, "step": 25666 }, { "epoch": 0.8281163353908262, "grad_norm": 0.359375, "learning_rate": 2.28230023541445e-06, "loss": 1.9909, "step": 25667 }, { "epoch": 0.8281485992446225, "grad_norm": 0.388671875, "learning_rate": 2.281465846340175e-06, "loss": 2.0811, "step": 25668 }, { "epoch": 0.8281808630984189, "grad_norm": 0.373046875, "learning_rate": 2.2806315972624044e-06, "loss": 2.0398, "step": 25669 }, { "epoch": 0.8282131269522152, "grad_norm": 0.357421875, "learning_rate": 2.2797974881903156e-06, "loss": 2.0386, "step": 25670 }, { "epoch": 0.8282453908060116, "grad_norm": 0.33984375, "learning_rate": 2.2789635191330884e-06, "loss": 2.1044, "step": 25671 }, { "epoch": 0.828277654659808, "grad_norm": 0.349609375, "learning_rate": 2.2781296900999064e-06, "loss": 2.089, "step": 25672 }, { "epoch": 0.8283099185136042, "grad_norm": 0.365234375, "learning_rate": 2.2772960010999465e-06, "loss": 2.0896, "step": 25673 }, { "epoch": 0.8283421823674006, "grad_norm": 0.35546875, "learning_rate": 2.2764624521423827e-06, "loss": 2.0889, "step": 25674 }, { "epoch": 0.8283744462211969, "grad_norm": 0.345703125, "learning_rate": 2.2756290432363958e-06, "loss": 2.1775, "step": 25675 }, { "epoch": 0.8284067100749933, "grad_norm": 0.345703125, "learning_rate": 2.274795774391155e-06, "loss": 2.1374, "step": 25676 }, { "epoch": 0.8284389739287896, "grad_norm": 0.380859375, "learning_rate": 2.2739626456158303e-06, "loss": 2.0996, "step": 25677 }, { "epoch": 0.828471237782586, "grad_norm": 0.337890625, "learning_rate": 2.2731296569195986e-06, "loss": 2.1474, "step": 25678 }, { "epoch": 0.8285035016363823, "grad_norm": 0.345703125, "learning_rate": 2.272296808311625e-06, "loss": 2.1506, "step": 25679 }, { "epoch": 0.8285357654901787, "grad_norm": 0.349609375, "learning_rate": 2.2714640998010743e-06, "loss": 2.0843, "step": 25680 }, { "epoch": 0.828568029343975, "grad_norm": 0.349609375, "learning_rate": 2.2706315313971187e-06, "loss": 2.0374, "step": 25681 }, { "epoch": 0.8286002931977714, "grad_norm": 0.333984375, "learning_rate": 2.2697991031089188e-06, "loss": 2.0698, "step": 25682 }, { "epoch": 0.8286325570515677, "grad_norm": 0.33984375, "learning_rate": 2.268966814945634e-06, "loss": 2.0615, "step": 25683 }, { "epoch": 0.8286648209053641, "grad_norm": 0.34765625, "learning_rate": 2.2681346669164347e-06, "loss": 2.0798, "step": 25684 }, { "epoch": 0.8286970847591604, "grad_norm": 0.34375, "learning_rate": 2.267302659030473e-06, "loss": 2.0854, "step": 25685 }, { "epoch": 0.8287293486129568, "grad_norm": 0.349609375, "learning_rate": 2.2664707912969073e-06, "loss": 2.1592, "step": 25686 }, { "epoch": 0.8287616124667531, "grad_norm": 0.349609375, "learning_rate": 2.265639063724899e-06, "loss": 2.0769, "step": 25687 }, { "epoch": 0.8287938763205495, "grad_norm": 0.33984375, "learning_rate": 2.264807476323599e-06, "loss": 2.0603, "step": 25688 }, { "epoch": 0.8288261401743457, "grad_norm": 0.34765625, "learning_rate": 2.2639760291021607e-06, "loss": 2.1205, "step": 25689 }, { "epoch": 0.8288584040281421, "grad_norm": 0.357421875, "learning_rate": 2.2631447220697403e-06, "loss": 2.1214, "step": 25690 }, { "epoch": 0.8288906678819385, "grad_norm": 0.341796875, "learning_rate": 2.262313555235485e-06, "loss": 2.0689, "step": 25691 }, { "epoch": 0.8289229317357348, "grad_norm": 0.34765625, "learning_rate": 2.2614825286085424e-06, "loss": 2.1212, "step": 25692 }, { "epoch": 0.8289551955895312, "grad_norm": 0.33984375, "learning_rate": 2.260651642198063e-06, "loss": 2.1088, "step": 25693 }, { "epoch": 0.8289874594433275, "grad_norm": 0.341796875, "learning_rate": 2.259820896013191e-06, "loss": 2.0448, "step": 25694 }, { "epoch": 0.8290197232971239, "grad_norm": 0.345703125, "learning_rate": 2.2589902900630714e-06, "loss": 2.1167, "step": 25695 }, { "epoch": 0.8290519871509202, "grad_norm": 0.333984375, "learning_rate": 2.2581598243568437e-06, "loss": 2.1506, "step": 25696 }, { "epoch": 0.8290842510047166, "grad_norm": 0.337890625, "learning_rate": 2.2573294989036546e-06, "loss": 2.1033, "step": 25697 }, { "epoch": 0.8291165148585129, "grad_norm": 0.333984375, "learning_rate": 2.256499313712641e-06, "loss": 2.098, "step": 25698 }, { "epoch": 0.8291487787123093, "grad_norm": 0.34375, "learning_rate": 2.255669268792937e-06, "loss": 2.1278, "step": 25699 }, { "epoch": 0.8291810425661056, "grad_norm": 0.35546875, "learning_rate": 2.254839364153687e-06, "loss": 2.0656, "step": 25700 }, { "epoch": 0.829213306419902, "grad_norm": 0.3515625, "learning_rate": 2.25400959980402e-06, "loss": 2.0361, "step": 25701 }, { "epoch": 0.8292455702736983, "grad_norm": 0.349609375, "learning_rate": 2.2531799757530692e-06, "loss": 2.0435, "step": 25702 }, { "epoch": 0.8292778341274947, "grad_norm": 0.37109375, "learning_rate": 2.2523504920099736e-06, "loss": 2.054, "step": 25703 }, { "epoch": 0.829310097981291, "grad_norm": 0.498046875, "learning_rate": 2.251521148583856e-06, "loss": 1.9692, "step": 25704 }, { "epoch": 0.8293423618350874, "grad_norm": 0.353515625, "learning_rate": 2.2506919454838458e-06, "loss": 2.0214, "step": 25705 }, { "epoch": 0.8293746256888836, "grad_norm": 0.35546875, "learning_rate": 2.249862882719076e-06, "loss": 2.0226, "step": 25706 }, { "epoch": 0.82940688954268, "grad_norm": 0.34765625, "learning_rate": 2.249033960298669e-06, "loss": 2.0255, "step": 25707 }, { "epoch": 0.8294391533964763, "grad_norm": 0.349609375, "learning_rate": 2.248205178231744e-06, "loss": 2.0614, "step": 25708 }, { "epoch": 0.8294714172502727, "grad_norm": 0.34765625, "learning_rate": 2.2473765365274333e-06, "loss": 2.0157, "step": 25709 }, { "epoch": 0.829503681104069, "grad_norm": 0.37109375, "learning_rate": 2.246548035194851e-06, "loss": 2.0777, "step": 25710 }, { "epoch": 0.8295359449578654, "grad_norm": 0.373046875, "learning_rate": 2.245719674243117e-06, "loss": 1.9992, "step": 25711 }, { "epoch": 0.8295682088116618, "grad_norm": 0.361328125, "learning_rate": 2.2448914536813543e-06, "loss": 2.0108, "step": 25712 }, { "epoch": 0.8296004726654581, "grad_norm": 0.357421875, "learning_rate": 2.2440633735186737e-06, "loss": 2.0347, "step": 25713 }, { "epoch": 0.8296327365192545, "grad_norm": 0.361328125, "learning_rate": 2.243235433764192e-06, "loss": 2.0017, "step": 25714 }, { "epoch": 0.8296650003730508, "grad_norm": 0.341796875, "learning_rate": 2.2424076344270243e-06, "loss": 2.0293, "step": 25715 }, { "epoch": 0.8296972642268472, "grad_norm": 0.3671875, "learning_rate": 2.2415799755162824e-06, "loss": 2.0057, "step": 25716 }, { "epoch": 0.8297295280806435, "grad_norm": 0.345703125, "learning_rate": 2.2407524570410705e-06, "loss": 1.9886, "step": 25717 }, { "epoch": 0.8297617919344399, "grad_norm": 0.359375, "learning_rate": 2.2399250790105074e-06, "loss": 2.0203, "step": 25718 }, { "epoch": 0.8297940557882362, "grad_norm": 0.40625, "learning_rate": 2.239097841433693e-06, "loss": 1.9888, "step": 25719 }, { "epoch": 0.8298263196420326, "grad_norm": 0.3828125, "learning_rate": 2.238270744319732e-06, "loss": 1.9223, "step": 25720 }, { "epoch": 0.8298585834958289, "grad_norm": 0.357421875, "learning_rate": 2.2374437876777366e-06, "loss": 1.902, "step": 25721 }, { "epoch": 0.8298908473496253, "grad_norm": 0.37109375, "learning_rate": 2.236616971516803e-06, "loss": 1.939, "step": 25722 }, { "epoch": 0.8299231112034215, "grad_norm": 0.369140625, "learning_rate": 2.2357902958460287e-06, "loss": 1.9572, "step": 25723 }, { "epoch": 0.829955375057218, "grad_norm": 0.357421875, "learning_rate": 2.2349637606745233e-06, "loss": 1.9159, "step": 25724 }, { "epoch": 0.8299876389110142, "grad_norm": 0.373046875, "learning_rate": 2.234137366011378e-06, "loss": 1.9327, "step": 25725 }, { "epoch": 0.8300199027648106, "grad_norm": 0.373046875, "learning_rate": 2.23331111186569e-06, "loss": 1.9315, "step": 25726 }, { "epoch": 0.8300521666186069, "grad_norm": 0.3671875, "learning_rate": 2.232484998246553e-06, "loss": 1.983, "step": 25727 }, { "epoch": 0.8300844304724033, "grad_norm": 0.353515625, "learning_rate": 2.2316590251630637e-06, "loss": 1.9658, "step": 25728 }, { "epoch": 0.8301166943261996, "grad_norm": 0.37109375, "learning_rate": 2.230833192624312e-06, "loss": 1.9257, "step": 25729 }, { "epoch": 0.830148958179996, "grad_norm": 0.357421875, "learning_rate": 2.230007500639384e-06, "loss": 1.957, "step": 25730 }, { "epoch": 0.8301812220337923, "grad_norm": 0.357421875, "learning_rate": 2.229181949217377e-06, "loss": 1.945, "step": 25731 }, { "epoch": 0.8302134858875887, "grad_norm": 0.376953125, "learning_rate": 2.228356538367373e-06, "loss": 1.9772, "step": 25732 }, { "epoch": 0.8302457497413851, "grad_norm": 0.376953125, "learning_rate": 2.2275312680984534e-06, "loss": 1.9651, "step": 25733 }, { "epoch": 0.8302780135951814, "grad_norm": 0.375, "learning_rate": 2.22670613841971e-06, "loss": 1.9652, "step": 25734 }, { "epoch": 0.8303102774489778, "grad_norm": 0.35546875, "learning_rate": 2.225881149340224e-06, "loss": 1.9944, "step": 25735 }, { "epoch": 0.8303425413027741, "grad_norm": 0.375, "learning_rate": 2.2250563008690685e-06, "loss": 2.0123, "step": 25736 }, { "epoch": 0.8303748051565705, "grad_norm": 0.3671875, "learning_rate": 2.2242315930153334e-06, "loss": 1.9798, "step": 25737 }, { "epoch": 0.8304070690103668, "grad_norm": 0.349609375, "learning_rate": 2.2234070257880906e-06, "loss": 1.9746, "step": 25738 }, { "epoch": 0.8304393328641632, "grad_norm": 0.359375, "learning_rate": 2.2225825991964144e-06, "loss": 1.925, "step": 25739 }, { "epoch": 0.8304715967179594, "grad_norm": 0.412109375, "learning_rate": 2.221758313249387e-06, "loss": 1.9237, "step": 25740 }, { "epoch": 0.8305038605717558, "grad_norm": 0.37109375, "learning_rate": 2.220934167956076e-06, "loss": 1.9125, "step": 25741 }, { "epoch": 0.8305361244255521, "grad_norm": 0.39453125, "learning_rate": 2.2201101633255506e-06, "loss": 1.9096, "step": 25742 }, { "epoch": 0.8305683882793485, "grad_norm": 0.388671875, "learning_rate": 2.2192862993668877e-06, "loss": 1.9049, "step": 25743 }, { "epoch": 0.8306006521331448, "grad_norm": 0.373046875, "learning_rate": 2.218462576089152e-06, "loss": 1.8669, "step": 25744 }, { "epoch": 0.8306329159869412, "grad_norm": 0.41015625, "learning_rate": 2.2176389935014076e-06, "loss": 1.8902, "step": 25745 }, { "epoch": 0.8306651798407375, "grad_norm": 0.359375, "learning_rate": 2.216815551612727e-06, "loss": 1.8337, "step": 25746 }, { "epoch": 0.8306974436945339, "grad_norm": 0.373046875, "learning_rate": 2.21599225043217e-06, "loss": 1.851, "step": 25747 }, { "epoch": 0.8307297075483302, "grad_norm": 0.369140625, "learning_rate": 2.215169089968796e-06, "loss": 1.874, "step": 25748 }, { "epoch": 0.8307619714021266, "grad_norm": 0.376953125, "learning_rate": 2.2143460702316726e-06, "loss": 1.8998, "step": 25749 }, { "epoch": 0.8307942352559229, "grad_norm": 0.375, "learning_rate": 2.2135231912298543e-06, "loss": 1.8921, "step": 25750 }, { "epoch": 0.8308264991097193, "grad_norm": 0.376953125, "learning_rate": 2.2127004529723955e-06, "loss": 1.8937, "step": 25751 }, { "epoch": 0.8308587629635157, "grad_norm": 0.65625, "learning_rate": 2.211877855468362e-06, "loss": 1.9363, "step": 25752 }, { "epoch": 0.830891026817312, "grad_norm": 0.38671875, "learning_rate": 2.2110553987268022e-06, "loss": 1.884, "step": 25753 }, { "epoch": 0.8309232906711084, "grad_norm": 0.392578125, "learning_rate": 2.210233082756765e-06, "loss": 1.8341, "step": 25754 }, { "epoch": 0.8309555545249047, "grad_norm": 0.35546875, "learning_rate": 2.209410907567312e-06, "loss": 1.8518, "step": 25755 }, { "epoch": 0.8309878183787011, "grad_norm": 0.373046875, "learning_rate": 2.208588873167487e-06, "loss": 1.9039, "step": 25756 }, { "epoch": 0.8310200822324973, "grad_norm": 0.365234375, "learning_rate": 2.20776697956634e-06, "loss": 1.934, "step": 25757 }, { "epoch": 0.8310523460862937, "grad_norm": 0.36328125, "learning_rate": 2.206945226772911e-06, "loss": 1.8748, "step": 25758 }, { "epoch": 0.83108460994009, "grad_norm": 0.41015625, "learning_rate": 2.206123614796258e-06, "loss": 1.8766, "step": 25759 }, { "epoch": 0.8311168737938864, "grad_norm": 0.37890625, "learning_rate": 2.205302143645415e-06, "loss": 1.8853, "step": 25760 }, { "epoch": 0.8311491376476827, "grad_norm": 0.404296875, "learning_rate": 2.2044808133294263e-06, "loss": 1.8377, "step": 25761 }, { "epoch": 0.8311814015014791, "grad_norm": 0.375, "learning_rate": 2.2036596238573358e-06, "loss": 1.9083, "step": 25762 }, { "epoch": 0.8312136653552754, "grad_norm": 0.3671875, "learning_rate": 2.2028385752381795e-06, "loss": 1.8766, "step": 25763 }, { "epoch": 0.8312459292090718, "grad_norm": 0.369140625, "learning_rate": 2.2020176674809924e-06, "loss": 1.9696, "step": 25764 }, { "epoch": 0.8312781930628681, "grad_norm": 0.37109375, "learning_rate": 2.201196900594818e-06, "loss": 1.957, "step": 25765 }, { "epoch": 0.8313104569166645, "grad_norm": 0.37109375, "learning_rate": 2.2003762745886848e-06, "loss": 1.9735, "step": 25766 }, { "epoch": 0.8313427207704608, "grad_norm": 0.373046875, "learning_rate": 2.1995557894716234e-06, "loss": 1.9064, "step": 25767 }, { "epoch": 0.8313749846242572, "grad_norm": 0.357421875, "learning_rate": 2.1987354452526737e-06, "loss": 1.9756, "step": 25768 }, { "epoch": 0.8314072484780535, "grad_norm": 0.37109375, "learning_rate": 2.19791524194086e-06, "loss": 1.9234, "step": 25769 }, { "epoch": 0.8314395123318499, "grad_norm": 0.359375, "learning_rate": 2.197095179545208e-06, "loss": 1.9404, "step": 25770 }, { "epoch": 0.8314717761856462, "grad_norm": 0.3671875, "learning_rate": 2.1962752580747514e-06, "loss": 1.9748, "step": 25771 }, { "epoch": 0.8315040400394426, "grad_norm": 0.3671875, "learning_rate": 2.1954554775385107e-06, "loss": 1.9567, "step": 25772 }, { "epoch": 0.831536303893239, "grad_norm": 0.373046875, "learning_rate": 2.194635837945507e-06, "loss": 1.9637, "step": 25773 }, { "epoch": 0.8315685677470352, "grad_norm": 0.36328125, "learning_rate": 2.1938163393047677e-06, "loss": 2.0064, "step": 25774 }, { "epoch": 0.8316008316008316, "grad_norm": 0.36328125, "learning_rate": 2.192996981625313e-06, "loss": 2.0001, "step": 25775 }, { "epoch": 0.8316330954546279, "grad_norm": 0.35546875, "learning_rate": 2.1921777649161557e-06, "loss": 2.0067, "step": 25776 }, { "epoch": 0.8316653593084243, "grad_norm": 0.37109375, "learning_rate": 2.1913586891863196e-06, "loss": 1.9761, "step": 25777 }, { "epoch": 0.8316976231622206, "grad_norm": 0.357421875, "learning_rate": 2.19053975444482e-06, "loss": 1.9701, "step": 25778 }, { "epoch": 0.831729887016017, "grad_norm": 0.359375, "learning_rate": 2.1897209607006637e-06, "loss": 1.9702, "step": 25779 }, { "epoch": 0.8317621508698133, "grad_norm": 0.3515625, "learning_rate": 2.188902307962874e-06, "loss": 2.0022, "step": 25780 }, { "epoch": 0.8317944147236097, "grad_norm": 0.365234375, "learning_rate": 2.188083796240457e-06, "loss": 1.9792, "step": 25781 }, { "epoch": 0.831826678577406, "grad_norm": 0.3515625, "learning_rate": 2.187265425542419e-06, "loss": 2.0177, "step": 25782 }, { "epoch": 0.8318589424312024, "grad_norm": 0.349609375, "learning_rate": 2.1864471958777745e-06, "loss": 1.9911, "step": 25783 }, { "epoch": 0.8318912062849987, "grad_norm": 0.361328125, "learning_rate": 2.185629107255526e-06, "loss": 2.0023, "step": 25784 }, { "epoch": 0.8319234701387951, "grad_norm": 0.373046875, "learning_rate": 2.184811159684676e-06, "loss": 1.9774, "step": 25785 }, { "epoch": 0.8319557339925914, "grad_norm": 0.353515625, "learning_rate": 2.183993353174236e-06, "loss": 1.9604, "step": 25786 }, { "epoch": 0.8319879978463878, "grad_norm": 0.373046875, "learning_rate": 2.1831756877332024e-06, "loss": 1.9715, "step": 25787 }, { "epoch": 0.8320202617001841, "grad_norm": 0.369140625, "learning_rate": 2.1823581633705765e-06, "loss": 1.9927, "step": 25788 }, { "epoch": 0.8320525255539805, "grad_norm": 0.3515625, "learning_rate": 2.1815407800953517e-06, "loss": 2.0078, "step": 25789 }, { "epoch": 0.8320847894077767, "grad_norm": 0.36328125, "learning_rate": 2.1807235379165353e-06, "loss": 2.0009, "step": 25790 }, { "epoch": 0.8321170532615731, "grad_norm": 0.353515625, "learning_rate": 2.179906436843116e-06, "loss": 2.0222, "step": 25791 }, { "epoch": 0.8321493171153695, "grad_norm": 0.376953125, "learning_rate": 2.179089476884087e-06, "loss": 1.9853, "step": 25792 }, { "epoch": 0.8321815809691658, "grad_norm": 0.345703125, "learning_rate": 2.1782726580484457e-06, "loss": 1.9626, "step": 25793 }, { "epoch": 0.8322138448229622, "grad_norm": 0.345703125, "learning_rate": 2.177455980345182e-06, "loss": 2.0164, "step": 25794 }, { "epoch": 0.8322461086767585, "grad_norm": 0.361328125, "learning_rate": 2.1766394437832803e-06, "loss": 2.0648, "step": 25795 }, { "epoch": 0.8322783725305549, "grad_norm": 0.359375, "learning_rate": 2.1758230483717367e-06, "loss": 2.0105, "step": 25796 }, { "epoch": 0.8323106363843512, "grad_norm": 0.341796875, "learning_rate": 2.1750067941195305e-06, "loss": 2.0808, "step": 25797 }, { "epoch": 0.8323429002381476, "grad_norm": 0.353515625, "learning_rate": 2.174190681035648e-06, "loss": 2.0109, "step": 25798 }, { "epoch": 0.8323751640919439, "grad_norm": 0.373046875, "learning_rate": 2.173374709129076e-06, "loss": 2.0092, "step": 25799 }, { "epoch": 0.8324074279457403, "grad_norm": 0.341796875, "learning_rate": 2.1725588784087924e-06, "loss": 2.11, "step": 25800 }, { "epoch": 0.8324396917995366, "grad_norm": 0.337890625, "learning_rate": 2.1717431888837746e-06, "loss": 1.9995, "step": 25801 }, { "epoch": 0.832471955653333, "grad_norm": 0.34765625, "learning_rate": 2.1709276405630102e-06, "loss": 2.0834, "step": 25802 }, { "epoch": 0.8325042195071293, "grad_norm": 0.353515625, "learning_rate": 2.1701122334554687e-06, "loss": 2.0544, "step": 25803 }, { "epoch": 0.8325364833609257, "grad_norm": 0.345703125, "learning_rate": 2.169296967570126e-06, "loss": 2.1304, "step": 25804 }, { "epoch": 0.832568747214722, "grad_norm": 0.34765625, "learning_rate": 2.16848184291596e-06, "loss": 2.058, "step": 25805 }, { "epoch": 0.8326010110685184, "grad_norm": 0.388671875, "learning_rate": 2.16766685950194e-06, "loss": 2.043, "step": 25806 }, { "epoch": 0.8326332749223146, "grad_norm": 0.33984375, "learning_rate": 2.1668520173370343e-06, "loss": 2.0848, "step": 25807 }, { "epoch": 0.832665538776111, "grad_norm": 0.35546875, "learning_rate": 2.1660373164302224e-06, "loss": 2.1285, "step": 25808 }, { "epoch": 0.8326978026299073, "grad_norm": 0.33984375, "learning_rate": 2.165222756790458e-06, "loss": 2.0455, "step": 25809 }, { "epoch": 0.8327300664837037, "grad_norm": 0.333984375, "learning_rate": 2.164408338426713e-06, "loss": 2.0669, "step": 25810 }, { "epoch": 0.8327623303375, "grad_norm": 0.349609375, "learning_rate": 2.163594061347957e-06, "loss": 2.072, "step": 25811 }, { "epoch": 0.8327945941912964, "grad_norm": 0.34375, "learning_rate": 2.162779925563149e-06, "loss": 2.0859, "step": 25812 }, { "epoch": 0.8328268580450928, "grad_norm": 0.3515625, "learning_rate": 2.1619659310812474e-06, "loss": 2.0215, "step": 25813 }, { "epoch": 0.8328591218988891, "grad_norm": 0.37109375, "learning_rate": 2.161152077911216e-06, "loss": 2.0027, "step": 25814 }, { "epoch": 0.8328913857526855, "grad_norm": 0.35546875, "learning_rate": 2.1603383660620146e-06, "loss": 2.0239, "step": 25815 }, { "epoch": 0.8329236496064818, "grad_norm": 0.345703125, "learning_rate": 2.1595247955425946e-06, "loss": 2.017, "step": 25816 }, { "epoch": 0.8329559134602782, "grad_norm": 0.357421875, "learning_rate": 2.158711366361912e-06, "loss": 2.0902, "step": 25817 }, { "epoch": 0.8329881773140745, "grad_norm": 0.345703125, "learning_rate": 2.157898078528926e-06, "loss": 2.0221, "step": 25818 }, { "epoch": 0.8330204411678709, "grad_norm": 0.39453125, "learning_rate": 2.1570849320525847e-06, "loss": 2.0144, "step": 25819 }, { "epoch": 0.8330527050216672, "grad_norm": 0.34765625, "learning_rate": 2.1562719269418363e-06, "loss": 2.0133, "step": 25820 }, { "epoch": 0.8330849688754636, "grad_norm": 0.34375, "learning_rate": 2.155459063205635e-06, "loss": 2.085, "step": 25821 }, { "epoch": 0.8331172327292599, "grad_norm": 0.353515625, "learning_rate": 2.154646340852925e-06, "loss": 1.9852, "step": 25822 }, { "epoch": 0.8331494965830563, "grad_norm": 0.357421875, "learning_rate": 2.153833759892651e-06, "loss": 2.0348, "step": 25823 }, { "epoch": 0.8331817604368525, "grad_norm": 0.3671875, "learning_rate": 2.1530213203337613e-06, "loss": 2.0795, "step": 25824 }, { "epoch": 0.833214024290649, "grad_norm": 0.353515625, "learning_rate": 2.152209022185198e-06, "loss": 2.0318, "step": 25825 }, { "epoch": 0.8332462881444452, "grad_norm": 0.34375, "learning_rate": 2.1513968654558962e-06, "loss": 2.0101, "step": 25826 }, { "epoch": 0.8332785519982416, "grad_norm": 0.3515625, "learning_rate": 2.150584850154804e-06, "loss": 2.0495, "step": 25827 }, { "epoch": 0.8333108158520379, "grad_norm": 0.36328125, "learning_rate": 2.1497729762908562e-06, "loss": 2.0475, "step": 25828 }, { "epoch": 0.8333430797058343, "grad_norm": 0.349609375, "learning_rate": 2.1489612438729837e-06, "loss": 2.0423, "step": 25829 }, { "epoch": 0.8333753435596306, "grad_norm": 0.345703125, "learning_rate": 2.148149652910131e-06, "loss": 2.0639, "step": 25830 }, { "epoch": 0.833407607413427, "grad_norm": 0.3671875, "learning_rate": 2.147338203411228e-06, "loss": 2.0451, "step": 25831 }, { "epoch": 0.8334398712672233, "grad_norm": 0.349609375, "learning_rate": 2.146526895385201e-06, "loss": 1.9683, "step": 25832 }, { "epoch": 0.8334721351210197, "grad_norm": 0.359375, "learning_rate": 2.145715728840988e-06, "loss": 2.0417, "step": 25833 }, { "epoch": 0.8335043989748161, "grad_norm": 0.36328125, "learning_rate": 2.144904703787516e-06, "loss": 1.9676, "step": 25834 }, { "epoch": 0.8335366628286124, "grad_norm": 0.357421875, "learning_rate": 2.1440938202337057e-06, "loss": 2.0251, "step": 25835 }, { "epoch": 0.8335689266824088, "grad_norm": 0.357421875, "learning_rate": 2.143283078188492e-06, "loss": 1.9609, "step": 25836 }, { "epoch": 0.8336011905362051, "grad_norm": 0.35546875, "learning_rate": 2.1424724776607944e-06, "loss": 1.9409, "step": 25837 }, { "epoch": 0.8336334543900015, "grad_norm": 0.357421875, "learning_rate": 2.1416620186595344e-06, "loss": 2.0036, "step": 25838 }, { "epoch": 0.8336657182437978, "grad_norm": 0.369140625, "learning_rate": 2.140851701193639e-06, "loss": 1.9596, "step": 25839 }, { "epoch": 0.8336979820975942, "grad_norm": 0.37109375, "learning_rate": 2.1400415252720166e-06, "loss": 1.8902, "step": 25840 }, { "epoch": 0.8337302459513904, "grad_norm": 0.46484375, "learning_rate": 2.139231490903592e-06, "loss": 1.9165, "step": 25841 }, { "epoch": 0.8337625098051868, "grad_norm": 0.380859375, "learning_rate": 2.1384215980972876e-06, "loss": 1.935, "step": 25842 }, { "epoch": 0.8337947736589831, "grad_norm": 0.376953125, "learning_rate": 2.137611846862003e-06, "loss": 1.9395, "step": 25843 }, { "epoch": 0.8338270375127795, "grad_norm": 0.39453125, "learning_rate": 2.1368022372066616e-06, "loss": 1.9128, "step": 25844 }, { "epoch": 0.8338593013665758, "grad_norm": 0.3828125, "learning_rate": 2.135992769140177e-06, "loss": 1.9416, "step": 25845 }, { "epoch": 0.8338915652203722, "grad_norm": 0.40625, "learning_rate": 2.1351834426714546e-06, "loss": 1.9195, "step": 25846 }, { "epoch": 0.8339238290741685, "grad_norm": 0.375, "learning_rate": 2.1343742578094035e-06, "loss": 1.9562, "step": 25847 }, { "epoch": 0.8339560929279649, "grad_norm": 0.3828125, "learning_rate": 2.1335652145629263e-06, "loss": 1.9578, "step": 25848 }, { "epoch": 0.8339883567817612, "grad_norm": 0.42578125, "learning_rate": 2.1327563129409385e-06, "loss": 1.9355, "step": 25849 }, { "epoch": 0.8340206206355576, "grad_norm": 0.36328125, "learning_rate": 2.131947552952337e-06, "loss": 1.9405, "step": 25850 }, { "epoch": 0.8340528844893539, "grad_norm": 0.359375, "learning_rate": 2.131138934606022e-06, "loss": 1.9535, "step": 25851 }, { "epoch": 0.8340851483431503, "grad_norm": 0.365234375, "learning_rate": 2.1303304579109005e-06, "loss": 1.9462, "step": 25852 }, { "epoch": 0.8341174121969467, "grad_norm": 0.37109375, "learning_rate": 2.12952212287587e-06, "loss": 1.9181, "step": 25853 }, { "epoch": 0.834149676050743, "grad_norm": 0.373046875, "learning_rate": 2.1287139295098224e-06, "loss": 1.9817, "step": 25854 }, { "epoch": 0.8341819399045394, "grad_norm": 0.3671875, "learning_rate": 2.127905877821661e-06, "loss": 1.8854, "step": 25855 }, { "epoch": 0.8342142037583357, "grad_norm": 0.373046875, "learning_rate": 2.1270979678202784e-06, "loss": 1.8708, "step": 25856 }, { "epoch": 0.8342464676121321, "grad_norm": 0.357421875, "learning_rate": 2.126290199514561e-06, "loss": 1.8885, "step": 25857 }, { "epoch": 0.8342787314659283, "grad_norm": 0.37890625, "learning_rate": 2.1254825729134104e-06, "loss": 1.8713, "step": 25858 }, { "epoch": 0.8343109953197247, "grad_norm": 0.384765625, "learning_rate": 2.124675088025712e-06, "loss": 1.8886, "step": 25859 }, { "epoch": 0.834343259173521, "grad_norm": 0.3671875, "learning_rate": 2.1238677448603496e-06, "loss": 1.8742, "step": 25860 }, { "epoch": 0.8343755230273174, "grad_norm": 0.361328125, "learning_rate": 2.123060543426217e-06, "loss": 1.8817, "step": 25861 }, { "epoch": 0.8344077868811137, "grad_norm": 0.3671875, "learning_rate": 2.1222534837321955e-06, "loss": 1.8615, "step": 25862 }, { "epoch": 0.8344400507349101, "grad_norm": 0.375, "learning_rate": 2.1214465657871667e-06, "loss": 1.8861, "step": 25863 }, { "epoch": 0.8344723145887064, "grad_norm": 0.384765625, "learning_rate": 2.1206397896000185e-06, "loss": 1.8944, "step": 25864 }, { "epoch": 0.8345045784425028, "grad_norm": 0.369140625, "learning_rate": 2.1198331551796287e-06, "loss": 1.9887, "step": 25865 }, { "epoch": 0.8345368422962991, "grad_norm": 0.369140625, "learning_rate": 2.11902666253487e-06, "loss": 1.9487, "step": 25866 }, { "epoch": 0.8345691061500955, "grad_norm": 0.369140625, "learning_rate": 2.1182203116746334e-06, "loss": 1.9173, "step": 25867 }, { "epoch": 0.8346013700038918, "grad_norm": 0.369140625, "learning_rate": 2.1174141026077774e-06, "loss": 1.9569, "step": 25868 }, { "epoch": 0.8346336338576882, "grad_norm": 0.37109375, "learning_rate": 2.1166080353431866e-06, "loss": 1.9595, "step": 25869 }, { "epoch": 0.8346658977114845, "grad_norm": 0.361328125, "learning_rate": 2.11580210988974e-06, "loss": 1.9701, "step": 25870 }, { "epoch": 0.8346981615652809, "grad_norm": 0.3671875, "learning_rate": 2.114996326256291e-06, "loss": 1.9987, "step": 25871 }, { "epoch": 0.8347304254190772, "grad_norm": 0.40234375, "learning_rate": 2.1141906844517207e-06, "loss": 1.8664, "step": 25872 }, { "epoch": 0.8347626892728736, "grad_norm": 0.375, "learning_rate": 2.113385184484902e-06, "loss": 1.9279, "step": 25873 }, { "epoch": 0.83479495312667, "grad_norm": 0.66796875, "learning_rate": 2.1125798263646873e-06, "loss": 1.93, "step": 25874 }, { "epoch": 0.8348272169804662, "grad_norm": 0.357421875, "learning_rate": 2.1117746100999485e-06, "loss": 1.9238, "step": 25875 }, { "epoch": 0.8348594808342626, "grad_norm": 0.37890625, "learning_rate": 2.110969535699555e-06, "loss": 1.9975, "step": 25876 }, { "epoch": 0.8348917446880589, "grad_norm": 0.369140625, "learning_rate": 2.1101646031723566e-06, "loss": 1.9947, "step": 25877 }, { "epoch": 0.8349240085418553, "grad_norm": 0.359375, "learning_rate": 2.109359812527221e-06, "loss": 1.9635, "step": 25878 }, { "epoch": 0.8349562723956516, "grad_norm": 0.419921875, "learning_rate": 2.1085551637730024e-06, "loss": 1.9214, "step": 25879 }, { "epoch": 0.834988536249448, "grad_norm": 0.361328125, "learning_rate": 2.107750656918564e-06, "loss": 1.9412, "step": 25880 }, { "epoch": 0.8350208001032443, "grad_norm": 0.359375, "learning_rate": 2.1069462919727555e-06, "loss": 1.9921, "step": 25881 }, { "epoch": 0.8350530639570407, "grad_norm": 0.35546875, "learning_rate": 2.1061420689444318e-06, "loss": 1.9011, "step": 25882 }, { "epoch": 0.835085327810837, "grad_norm": 0.384765625, "learning_rate": 2.105337987842448e-06, "loss": 1.901, "step": 25883 }, { "epoch": 0.8351175916646334, "grad_norm": 0.43359375, "learning_rate": 2.104534048675654e-06, "loss": 1.9334, "step": 25884 }, { "epoch": 0.8351498555184297, "grad_norm": 0.39453125, "learning_rate": 2.1037302514528954e-06, "loss": 1.9794, "step": 25885 }, { "epoch": 0.8351821193722261, "grad_norm": 0.42578125, "learning_rate": 2.1029265961830236e-06, "loss": 1.9422, "step": 25886 }, { "epoch": 0.8352143832260224, "grad_norm": 0.361328125, "learning_rate": 2.1021230828748853e-06, "loss": 1.9439, "step": 25887 }, { "epoch": 0.8352466470798188, "grad_norm": 0.375, "learning_rate": 2.1013197115373193e-06, "loss": 1.9824, "step": 25888 }, { "epoch": 0.8352789109336151, "grad_norm": 0.353515625, "learning_rate": 2.1005164821791766e-06, "loss": 1.9732, "step": 25889 }, { "epoch": 0.8353111747874115, "grad_norm": 0.388671875, "learning_rate": 2.0997133948092935e-06, "loss": 1.9694, "step": 25890 }, { "epoch": 0.8353434386412077, "grad_norm": 0.37890625, "learning_rate": 2.0989104494365075e-06, "loss": 1.9796, "step": 25891 }, { "epoch": 0.8353757024950041, "grad_norm": 0.369140625, "learning_rate": 2.0981076460696637e-06, "loss": 1.9336, "step": 25892 }, { "epoch": 0.8354079663488005, "grad_norm": 0.3671875, "learning_rate": 2.0973049847175946e-06, "loss": 1.9716, "step": 25893 }, { "epoch": 0.8354402302025968, "grad_norm": 0.36328125, "learning_rate": 2.0965024653891317e-06, "loss": 1.9834, "step": 25894 }, { "epoch": 0.8354724940563932, "grad_norm": 0.388671875, "learning_rate": 2.095700088093118e-06, "loss": 1.9494, "step": 25895 }, { "epoch": 0.8355047579101895, "grad_norm": 0.390625, "learning_rate": 2.0948978528383783e-06, "loss": 1.9338, "step": 25896 }, { "epoch": 0.8355370217639859, "grad_norm": 0.3515625, "learning_rate": 2.0940957596337432e-06, "loss": 1.926, "step": 25897 }, { "epoch": 0.8355692856177822, "grad_norm": 0.35546875, "learning_rate": 2.0932938084880497e-06, "loss": 1.9389, "step": 25898 }, { "epoch": 0.8356015494715786, "grad_norm": 0.373046875, "learning_rate": 2.0924919994101124e-06, "loss": 1.9294, "step": 25899 }, { "epoch": 0.8356338133253749, "grad_norm": 0.365234375, "learning_rate": 2.0916903324087615e-06, "loss": 1.9464, "step": 25900 }, { "epoch": 0.8356660771791713, "grad_norm": 0.380859375, "learning_rate": 2.090888807492831e-06, "loss": 1.8731, "step": 25901 }, { "epoch": 0.8356983410329676, "grad_norm": 0.3671875, "learning_rate": 2.0900874246711286e-06, "loss": 1.9392, "step": 25902 }, { "epoch": 0.835730604886764, "grad_norm": 0.390625, "learning_rate": 2.089286183952484e-06, "loss": 1.9606, "step": 25903 }, { "epoch": 0.8357628687405603, "grad_norm": 0.361328125, "learning_rate": 2.08848508534572e-06, "loss": 1.9616, "step": 25904 }, { "epoch": 0.8357951325943567, "grad_norm": 0.3671875, "learning_rate": 2.0876841288596427e-06, "loss": 2.009, "step": 25905 }, { "epoch": 0.835827396448153, "grad_norm": 0.376953125, "learning_rate": 2.0868833145030754e-06, "loss": 1.9239, "step": 25906 }, { "epoch": 0.8358596603019494, "grad_norm": 0.373046875, "learning_rate": 2.0860826422848404e-06, "loss": 1.9751, "step": 25907 }, { "epoch": 0.8358919241557456, "grad_norm": 0.349609375, "learning_rate": 2.085282112213735e-06, "loss": 1.9932, "step": 25908 }, { "epoch": 0.835924188009542, "grad_norm": 0.3984375, "learning_rate": 2.0844817242985832e-06, "loss": 2.0185, "step": 25909 }, { "epoch": 0.8359564518633383, "grad_norm": 0.365234375, "learning_rate": 2.083681478548188e-06, "loss": 1.9517, "step": 25910 }, { "epoch": 0.8359887157171347, "grad_norm": 0.373046875, "learning_rate": 2.082881374971364e-06, "loss": 1.9953, "step": 25911 }, { "epoch": 0.836020979570931, "grad_norm": 0.36328125, "learning_rate": 2.0820814135769156e-06, "loss": 1.938, "step": 25912 }, { "epoch": 0.8360532434247274, "grad_norm": 0.373046875, "learning_rate": 2.081281594373643e-06, "loss": 1.9744, "step": 25913 }, { "epoch": 0.8360855072785238, "grad_norm": 0.37109375, "learning_rate": 2.0804819173703603e-06, "loss": 1.8815, "step": 25914 }, { "epoch": 0.8361177711323201, "grad_norm": 0.39453125, "learning_rate": 2.0796823825758626e-06, "loss": 1.9325, "step": 25915 }, { "epoch": 0.8361500349861165, "grad_norm": 0.388671875, "learning_rate": 2.0788829899989493e-06, "loss": 1.9226, "step": 25916 }, { "epoch": 0.8361822988399128, "grad_norm": 0.359375, "learning_rate": 2.0780837396484266e-06, "loss": 1.9028, "step": 25917 }, { "epoch": 0.8362145626937092, "grad_norm": 0.3828125, "learning_rate": 2.0772846315330855e-06, "loss": 1.8921, "step": 25918 }, { "epoch": 0.8362468265475055, "grad_norm": 0.392578125, "learning_rate": 2.0764856656617232e-06, "loss": 1.9263, "step": 25919 }, { "epoch": 0.8362790904013019, "grad_norm": 0.36328125, "learning_rate": 2.0756868420431383e-06, "loss": 1.9214, "step": 25920 }, { "epoch": 0.8363113542550982, "grad_norm": 0.36328125, "learning_rate": 2.0748881606861213e-06, "loss": 1.9155, "step": 25921 }, { "epoch": 0.8363436181088946, "grad_norm": 0.390625, "learning_rate": 2.0740896215994597e-06, "loss": 1.876, "step": 25922 }, { "epoch": 0.8363758819626909, "grad_norm": 0.37109375, "learning_rate": 2.0732912247919483e-06, "loss": 1.9057, "step": 25923 }, { "epoch": 0.8364081458164873, "grad_norm": 0.37890625, "learning_rate": 2.072492970272375e-06, "loss": 1.8413, "step": 25924 }, { "epoch": 0.8364404096702835, "grad_norm": 0.375, "learning_rate": 2.071694858049521e-06, "loss": 1.8797, "step": 25925 }, { "epoch": 0.83647267352408, "grad_norm": 0.365234375, "learning_rate": 2.070896888132179e-06, "loss": 1.8632, "step": 25926 }, { "epoch": 0.8365049373778762, "grad_norm": 0.373046875, "learning_rate": 2.0700990605291282e-06, "loss": 1.8828, "step": 25927 }, { "epoch": 0.8365372012316726, "grad_norm": 0.376953125, "learning_rate": 2.0693013752491486e-06, "loss": 1.774, "step": 25928 }, { "epoch": 0.8365694650854689, "grad_norm": 0.384765625, "learning_rate": 2.0685038323010284e-06, "loss": 1.8594, "step": 25929 }, { "epoch": 0.8366017289392653, "grad_norm": 0.380859375, "learning_rate": 2.0677064316935364e-06, "loss": 1.8451, "step": 25930 }, { "epoch": 0.8366339927930616, "grad_norm": 0.3828125, "learning_rate": 2.0669091734354526e-06, "loss": 1.8063, "step": 25931 }, { "epoch": 0.836666256646858, "grad_norm": 0.392578125, "learning_rate": 2.066112057535561e-06, "loss": 1.826, "step": 25932 }, { "epoch": 0.8366985205006543, "grad_norm": 0.380859375, "learning_rate": 2.065315084002623e-06, "loss": 1.8185, "step": 25933 }, { "epoch": 0.8367307843544507, "grad_norm": 0.3984375, "learning_rate": 2.0645182528454177e-06, "loss": 1.7527, "step": 25934 }, { "epoch": 0.8367630482082471, "grad_norm": 0.392578125, "learning_rate": 2.0637215640727213e-06, "loss": 1.8074, "step": 25935 }, { "epoch": 0.8367953120620434, "grad_norm": 0.423828125, "learning_rate": 2.0629250176932924e-06, "loss": 1.843, "step": 25936 }, { "epoch": 0.8368275759158398, "grad_norm": 0.38671875, "learning_rate": 2.0621286137159017e-06, "loss": 1.8334, "step": 25937 }, { "epoch": 0.8368598397696361, "grad_norm": 0.380859375, "learning_rate": 2.061332352149326e-06, "loss": 1.7978, "step": 25938 }, { "epoch": 0.8368921036234325, "grad_norm": 0.3828125, "learning_rate": 2.0605362330023146e-06, "loss": 1.8056, "step": 25939 }, { "epoch": 0.8369243674772288, "grad_norm": 0.375, "learning_rate": 2.059740256283641e-06, "loss": 1.8273, "step": 25940 }, { "epoch": 0.8369566313310252, "grad_norm": 0.4140625, "learning_rate": 2.0589444220020624e-06, "loss": 1.789, "step": 25941 }, { "epoch": 0.8369888951848214, "grad_norm": 0.384765625, "learning_rate": 2.0581487301663376e-06, "loss": 1.8487, "step": 25942 }, { "epoch": 0.8370211590386178, "grad_norm": 0.3984375, "learning_rate": 2.057353180785231e-06, "loss": 1.8624, "step": 25943 }, { "epoch": 0.8370534228924141, "grad_norm": 0.384765625, "learning_rate": 2.05655777386749e-06, "loss": 1.8451, "step": 25944 }, { "epoch": 0.8370856867462105, "grad_norm": 0.421875, "learning_rate": 2.05576250942188e-06, "loss": 1.8063, "step": 25945 }, { "epoch": 0.8371179506000068, "grad_norm": 0.376953125, "learning_rate": 2.0549673874571507e-06, "loss": 1.8142, "step": 25946 }, { "epoch": 0.8371502144538032, "grad_norm": 0.384765625, "learning_rate": 2.0541724079820497e-06, "loss": 1.8408, "step": 25947 }, { "epoch": 0.8371824783075995, "grad_norm": 0.37890625, "learning_rate": 2.0533775710053348e-06, "loss": 1.8243, "step": 25948 }, { "epoch": 0.8372147421613959, "grad_norm": 0.3828125, "learning_rate": 2.052582876535751e-06, "loss": 1.816, "step": 25949 }, { "epoch": 0.8372470060151922, "grad_norm": 0.380859375, "learning_rate": 2.051788324582045e-06, "loss": 1.8176, "step": 25950 }, { "epoch": 0.8372792698689886, "grad_norm": 0.380859375, "learning_rate": 2.050993915152966e-06, "loss": 1.7866, "step": 25951 }, { "epoch": 0.8373115337227849, "grad_norm": 0.392578125, "learning_rate": 2.0501996482572573e-06, "loss": 1.8241, "step": 25952 }, { "epoch": 0.8373437975765813, "grad_norm": 0.37109375, "learning_rate": 2.0494055239036563e-06, "loss": 1.8345, "step": 25953 }, { "epoch": 0.8373760614303777, "grad_norm": 0.41796875, "learning_rate": 2.0486115421009133e-06, "loss": 1.8673, "step": 25954 }, { "epoch": 0.837408325284174, "grad_norm": 0.404296875, "learning_rate": 2.047817702857763e-06, "loss": 1.9033, "step": 25955 }, { "epoch": 0.8374405891379704, "grad_norm": 0.376953125, "learning_rate": 2.0470240061829393e-06, "loss": 1.9278, "step": 25956 }, { "epoch": 0.8374728529917667, "grad_norm": 0.384765625, "learning_rate": 2.046230452085186e-06, "loss": 1.8614, "step": 25957 }, { "epoch": 0.8375051168455631, "grad_norm": 0.400390625, "learning_rate": 2.045437040573236e-06, "loss": 1.9276, "step": 25958 }, { "epoch": 0.8375373806993593, "grad_norm": 0.4140625, "learning_rate": 2.044643771655817e-06, "loss": 1.9311, "step": 25959 }, { "epoch": 0.8375696445531557, "grad_norm": 0.392578125, "learning_rate": 2.0438506453416724e-06, "loss": 1.9012, "step": 25960 }, { "epoch": 0.837601908406952, "grad_norm": 0.375, "learning_rate": 2.0430576616395196e-06, "loss": 1.8614, "step": 25961 }, { "epoch": 0.8376341722607484, "grad_norm": 0.36328125, "learning_rate": 2.0422648205580907e-06, "loss": 1.9169, "step": 25962 }, { "epoch": 0.8376664361145447, "grad_norm": 0.390625, "learning_rate": 2.0414721221061232e-06, "loss": 1.8562, "step": 25963 }, { "epoch": 0.8376986999683411, "grad_norm": 0.380859375, "learning_rate": 2.040679566292327e-06, "loss": 1.8822, "step": 25964 }, { "epoch": 0.8377309638221374, "grad_norm": 0.35546875, "learning_rate": 2.0398871531254338e-06, "loss": 1.9218, "step": 25965 }, { "epoch": 0.8377632276759338, "grad_norm": 0.376953125, "learning_rate": 2.039094882614171e-06, "loss": 1.938, "step": 25966 }, { "epoch": 0.8377954915297301, "grad_norm": 0.3828125, "learning_rate": 2.0383027547672468e-06, "loss": 1.8822, "step": 25967 }, { "epoch": 0.8378277553835265, "grad_norm": 0.36328125, "learning_rate": 2.037510769593391e-06, "loss": 1.948, "step": 25968 }, { "epoch": 0.8378600192373228, "grad_norm": 0.3515625, "learning_rate": 2.0367189271013183e-06, "loss": 1.8728, "step": 25969 }, { "epoch": 0.8378922830911192, "grad_norm": 0.357421875, "learning_rate": 2.0359272272997383e-06, "loss": 1.9085, "step": 25970 }, { "epoch": 0.8379245469449155, "grad_norm": 0.3671875, "learning_rate": 2.0351356701973754e-06, "loss": 1.9665, "step": 25971 }, { "epoch": 0.8379568107987119, "grad_norm": 0.375, "learning_rate": 2.0343442558029384e-06, "loss": 1.9321, "step": 25972 }, { "epoch": 0.8379890746525082, "grad_norm": 0.37109375, "learning_rate": 2.0335529841251347e-06, "loss": 1.9952, "step": 25973 }, { "epoch": 0.8380213385063046, "grad_norm": 0.35546875, "learning_rate": 2.0327618551726813e-06, "loss": 1.9301, "step": 25974 }, { "epoch": 0.838053602360101, "grad_norm": 0.62109375, "learning_rate": 2.031970868954282e-06, "loss": 1.841, "step": 25975 }, { "epoch": 0.8380858662138972, "grad_norm": 0.361328125, "learning_rate": 2.0311800254786422e-06, "loss": 1.9977, "step": 25976 }, { "epoch": 0.8381181300676936, "grad_norm": 0.357421875, "learning_rate": 2.030389324754471e-06, "loss": 1.9697, "step": 25977 }, { "epoch": 0.8381503939214899, "grad_norm": 0.365234375, "learning_rate": 2.0295987667904676e-06, "loss": 1.9084, "step": 25978 }, { "epoch": 0.8381826577752863, "grad_norm": 0.35546875, "learning_rate": 2.0288083515953387e-06, "loss": 1.9462, "step": 25979 }, { "epoch": 0.8382149216290826, "grad_norm": 0.3515625, "learning_rate": 2.0280180791777816e-06, "loss": 1.9531, "step": 25980 }, { "epoch": 0.838247185482879, "grad_norm": 0.353515625, "learning_rate": 2.027227949546492e-06, "loss": 1.9595, "step": 25981 }, { "epoch": 0.8382794493366753, "grad_norm": 0.361328125, "learning_rate": 2.026437962710175e-06, "loss": 1.8545, "step": 25982 }, { "epoch": 0.8383117131904717, "grad_norm": 0.359375, "learning_rate": 2.02564811867752e-06, "loss": 1.8983, "step": 25983 }, { "epoch": 0.838343977044268, "grad_norm": 0.365234375, "learning_rate": 2.0248584174572205e-06, "loss": 1.915, "step": 25984 }, { "epoch": 0.8383762408980644, "grad_norm": 0.361328125, "learning_rate": 2.0240688590579744e-06, "loss": 1.9178, "step": 25985 }, { "epoch": 0.8384085047518607, "grad_norm": 0.359375, "learning_rate": 2.0232794434884693e-06, "loss": 1.884, "step": 25986 }, { "epoch": 0.8384407686056571, "grad_norm": 0.369140625, "learning_rate": 2.0224901707573904e-06, "loss": 1.8727, "step": 25987 }, { "epoch": 0.8384730324594534, "grad_norm": 0.361328125, "learning_rate": 2.021701040873434e-06, "loss": 1.9399, "step": 25988 }, { "epoch": 0.8385052963132498, "grad_norm": 0.390625, "learning_rate": 2.0209120538452834e-06, "loss": 1.9143, "step": 25989 }, { "epoch": 0.8385375601670461, "grad_norm": 0.376953125, "learning_rate": 2.0201232096816165e-06, "loss": 1.8903, "step": 25990 }, { "epoch": 0.8385698240208425, "grad_norm": 0.36328125, "learning_rate": 2.019334508391129e-06, "loss": 1.8907, "step": 25991 }, { "epoch": 0.8386020878746387, "grad_norm": 0.376953125, "learning_rate": 2.0185459499824883e-06, "loss": 1.9317, "step": 25992 }, { "epoch": 0.8386343517284351, "grad_norm": 0.353515625, "learning_rate": 2.01775753446438e-06, "loss": 1.9558, "step": 25993 }, { "epoch": 0.8386666155822314, "grad_norm": 0.3828125, "learning_rate": 2.016969261845491e-06, "loss": 1.9661, "step": 25994 }, { "epoch": 0.8386988794360278, "grad_norm": 0.376953125, "learning_rate": 2.0161811321344826e-06, "loss": 1.9621, "step": 25995 }, { "epoch": 0.8387311432898242, "grad_norm": 0.36328125, "learning_rate": 2.015393145340039e-06, "loss": 1.8821, "step": 25996 }, { "epoch": 0.8387634071436205, "grad_norm": 0.37890625, "learning_rate": 2.0146053014708375e-06, "loss": 1.9134, "step": 25997 }, { "epoch": 0.8387956709974169, "grad_norm": 0.38671875, "learning_rate": 2.013817600535541e-06, "loss": 1.9294, "step": 25998 }, { "epoch": 0.8388279348512132, "grad_norm": 0.37890625, "learning_rate": 2.013030042542825e-06, "loss": 1.9635, "step": 25999 }, { "epoch": 0.8388601987050096, "grad_norm": 0.361328125, "learning_rate": 2.01224262750136e-06, "loss": 1.9514, "step": 26000 }, { "epoch": 0.8388924625588059, "grad_norm": 0.3984375, "learning_rate": 2.0114553554198057e-06, "loss": 1.9426, "step": 26001 }, { "epoch": 0.8389247264126023, "grad_norm": 0.361328125, "learning_rate": 2.0106682263068366e-06, "loss": 1.8571, "step": 26002 }, { "epoch": 0.8389569902663986, "grad_norm": 0.37890625, "learning_rate": 2.0098812401711127e-06, "loss": 1.8398, "step": 26003 }, { "epoch": 0.838989254120195, "grad_norm": 0.36328125, "learning_rate": 2.0090943970212934e-06, "loss": 1.8638, "step": 26004 }, { "epoch": 0.8390215179739913, "grad_norm": 0.359375, "learning_rate": 2.0083076968660487e-06, "loss": 1.8447, "step": 26005 }, { "epoch": 0.8390537818277877, "grad_norm": 0.357421875, "learning_rate": 2.0075211397140314e-06, "loss": 1.9393, "step": 26006 }, { "epoch": 0.839086045681584, "grad_norm": 0.365234375, "learning_rate": 2.0067347255738964e-06, "loss": 1.9368, "step": 26007 }, { "epoch": 0.8391183095353804, "grad_norm": 0.361328125, "learning_rate": 2.005948454454309e-06, "loss": 1.8823, "step": 26008 }, { "epoch": 0.8391505733891766, "grad_norm": 0.38671875, "learning_rate": 2.005162326363915e-06, "loss": 1.8695, "step": 26009 }, { "epoch": 0.839182837242973, "grad_norm": 0.365234375, "learning_rate": 2.004376341311375e-06, "loss": 1.8765, "step": 26010 }, { "epoch": 0.8392151010967693, "grad_norm": 0.390625, "learning_rate": 2.0035904993053373e-06, "loss": 1.8965, "step": 26011 }, { "epoch": 0.8392473649505657, "grad_norm": 0.369140625, "learning_rate": 2.0028048003544487e-06, "loss": 1.8613, "step": 26012 }, { "epoch": 0.839279628804362, "grad_norm": 0.36328125, "learning_rate": 2.002019244467364e-06, "loss": 1.883, "step": 26013 }, { "epoch": 0.8393118926581584, "grad_norm": 0.376953125, "learning_rate": 2.001233831652727e-06, "loss": 1.8912, "step": 26014 }, { "epoch": 0.8393441565119548, "grad_norm": 0.376953125, "learning_rate": 2.000448561919178e-06, "loss": 1.899, "step": 26015 }, { "epoch": 0.8393764203657511, "grad_norm": 0.375, "learning_rate": 1.9996634352753697e-06, "loss": 1.8815, "step": 26016 }, { "epoch": 0.8394086842195475, "grad_norm": 0.361328125, "learning_rate": 1.9988784517299407e-06, "loss": 1.872, "step": 26017 }, { "epoch": 0.8394409480733438, "grad_norm": 0.37890625, "learning_rate": 1.9980936112915265e-06, "loss": 1.9116, "step": 26018 }, { "epoch": 0.8394732119271402, "grad_norm": 0.373046875, "learning_rate": 1.9973089139687765e-06, "loss": 1.8856, "step": 26019 }, { "epoch": 0.8395054757809365, "grad_norm": 0.353515625, "learning_rate": 1.9965243597703157e-06, "loss": 1.9115, "step": 26020 }, { "epoch": 0.8395377396347329, "grad_norm": 0.384765625, "learning_rate": 1.995739948704787e-06, "loss": 1.8696, "step": 26021 }, { "epoch": 0.8395700034885292, "grad_norm": 0.3671875, "learning_rate": 1.994955680780829e-06, "loss": 1.8626, "step": 26022 }, { "epoch": 0.8396022673423256, "grad_norm": 0.373046875, "learning_rate": 1.994171556007064e-06, "loss": 1.883, "step": 26023 }, { "epoch": 0.8396345311961219, "grad_norm": 0.359375, "learning_rate": 1.993387574392127e-06, "loss": 1.8799, "step": 26024 }, { "epoch": 0.8396667950499183, "grad_norm": 0.40234375, "learning_rate": 1.9926037359446546e-06, "loss": 1.9258, "step": 26025 }, { "epoch": 0.8396990589037145, "grad_norm": 0.357421875, "learning_rate": 1.9918200406732633e-06, "loss": 1.9527, "step": 26026 }, { "epoch": 0.839731322757511, "grad_norm": 0.37890625, "learning_rate": 1.9910364885865863e-06, "loss": 1.8495, "step": 26027 }, { "epoch": 0.8397635866113072, "grad_norm": 0.36328125, "learning_rate": 1.990253079693251e-06, "loss": 1.9135, "step": 26028 }, { "epoch": 0.8397958504651036, "grad_norm": 0.359375, "learning_rate": 1.9894698140018713e-06, "loss": 1.8237, "step": 26029 }, { "epoch": 0.8398281143188999, "grad_norm": 0.408203125, "learning_rate": 1.9886866915210784e-06, "loss": 1.8942, "step": 26030 }, { "epoch": 0.8398603781726963, "grad_norm": 0.365234375, "learning_rate": 1.987903712259487e-06, "loss": 1.8816, "step": 26031 }, { "epoch": 0.8398926420264926, "grad_norm": 0.375, "learning_rate": 1.987120876225713e-06, "loss": 1.8906, "step": 26032 }, { "epoch": 0.839924905880289, "grad_norm": 0.37109375, "learning_rate": 1.9863381834283806e-06, "loss": 1.8854, "step": 26033 }, { "epoch": 0.8399571697340853, "grad_norm": 0.376953125, "learning_rate": 1.9855556338761023e-06, "loss": 1.8551, "step": 26034 }, { "epoch": 0.8399894335878817, "grad_norm": 0.361328125, "learning_rate": 1.984773227577486e-06, "loss": 1.8606, "step": 26035 }, { "epoch": 0.8400216974416781, "grad_norm": 0.369140625, "learning_rate": 1.9839909645411535e-06, "loss": 1.8801, "step": 26036 }, { "epoch": 0.8400539612954744, "grad_norm": 0.384765625, "learning_rate": 1.9832088447757113e-06, "loss": 1.853, "step": 26037 }, { "epoch": 0.8400862251492708, "grad_norm": 0.3828125, "learning_rate": 1.982426868289764e-06, "loss": 1.8596, "step": 26038 }, { "epoch": 0.8401184890030671, "grad_norm": 0.37109375, "learning_rate": 1.981645035091926e-06, "loss": 1.8293, "step": 26039 }, { "epoch": 0.8401507528568635, "grad_norm": 0.390625, "learning_rate": 1.9808633451908e-06, "loss": 1.8418, "step": 26040 }, { "epoch": 0.8401830167106598, "grad_norm": 0.388671875, "learning_rate": 1.9800817985949874e-06, "loss": 1.8338, "step": 26041 }, { "epoch": 0.8402152805644562, "grad_norm": 0.376953125, "learning_rate": 1.9793003953130978e-06, "loss": 1.8177, "step": 26042 }, { "epoch": 0.8402475444182524, "grad_norm": 0.38671875, "learning_rate": 1.978519135353725e-06, "loss": 1.8273, "step": 26043 }, { "epoch": 0.8402798082720488, "grad_norm": 0.392578125, "learning_rate": 1.9777380187254746e-06, "loss": 1.8902, "step": 26044 }, { "epoch": 0.8403120721258451, "grad_norm": 0.359375, "learning_rate": 1.976957045436942e-06, "loss": 1.9402, "step": 26045 }, { "epoch": 0.8403443359796415, "grad_norm": 0.357421875, "learning_rate": 1.9761762154967218e-06, "loss": 1.9486, "step": 26046 }, { "epoch": 0.8403765998334378, "grad_norm": 0.365234375, "learning_rate": 1.9753955289134124e-06, "loss": 1.9101, "step": 26047 }, { "epoch": 0.8404088636872342, "grad_norm": 0.3671875, "learning_rate": 1.9746149856956044e-06, "loss": 1.9058, "step": 26048 }, { "epoch": 0.8404411275410305, "grad_norm": 0.373046875, "learning_rate": 1.9738345858518886e-06, "loss": 1.9135, "step": 26049 }, { "epoch": 0.8404733913948269, "grad_norm": 0.3515625, "learning_rate": 1.973054329390862e-06, "loss": 1.9194, "step": 26050 }, { "epoch": 0.8405056552486232, "grad_norm": 0.376953125, "learning_rate": 1.972274216321103e-06, "loss": 1.9621, "step": 26051 }, { "epoch": 0.8405379191024196, "grad_norm": 0.3671875, "learning_rate": 1.971494246651202e-06, "loss": 1.9642, "step": 26052 }, { "epoch": 0.8405701829562159, "grad_norm": 0.40625, "learning_rate": 1.970714420389753e-06, "loss": 1.9486, "step": 26053 }, { "epoch": 0.8406024468100123, "grad_norm": 0.357421875, "learning_rate": 1.969934737545326e-06, "loss": 1.9396, "step": 26054 }, { "epoch": 0.8406347106638087, "grad_norm": 0.35546875, "learning_rate": 1.9691551981265082e-06, "loss": 1.9355, "step": 26055 }, { "epoch": 0.840666974517605, "grad_norm": 0.365234375, "learning_rate": 1.96837580214189e-06, "loss": 1.9301, "step": 26056 }, { "epoch": 0.8406992383714014, "grad_norm": 0.380859375, "learning_rate": 1.967596549600035e-06, "loss": 2.0419, "step": 26057 }, { "epoch": 0.8407315022251977, "grad_norm": 0.36328125, "learning_rate": 1.966817440509527e-06, "loss": 2.0024, "step": 26058 }, { "epoch": 0.8407637660789941, "grad_norm": 0.365234375, "learning_rate": 1.9660384748789495e-06, "loss": 1.9701, "step": 26059 }, { "epoch": 0.8407960299327903, "grad_norm": 0.388671875, "learning_rate": 1.965259652716864e-06, "loss": 1.9528, "step": 26060 }, { "epoch": 0.8408282937865867, "grad_norm": 0.35546875, "learning_rate": 1.9644809740318525e-06, "loss": 2.0282, "step": 26061 }, { "epoch": 0.840860557640383, "grad_norm": 0.35546875, "learning_rate": 1.9637024388324827e-06, "loss": 2.0046, "step": 26062 }, { "epoch": 0.8408928214941794, "grad_norm": 0.36328125, "learning_rate": 1.962924047127321e-06, "loss": 1.9767, "step": 26063 }, { "epoch": 0.8409250853479757, "grad_norm": 0.357421875, "learning_rate": 1.962145798924942e-06, "loss": 1.965, "step": 26064 }, { "epoch": 0.8409573492017721, "grad_norm": 0.35546875, "learning_rate": 1.9613676942339076e-06, "loss": 2.0593, "step": 26065 }, { "epoch": 0.8409896130555684, "grad_norm": 0.353515625, "learning_rate": 1.960589733062781e-06, "loss": 2.0865, "step": 26066 }, { "epoch": 0.8410218769093648, "grad_norm": 0.359375, "learning_rate": 1.9598119154201334e-06, "loss": 2.0663, "step": 26067 }, { "epoch": 0.8410541407631611, "grad_norm": 0.375, "learning_rate": 1.959034241314518e-06, "loss": 2.0187, "step": 26068 }, { "epoch": 0.8410864046169575, "grad_norm": 0.375, "learning_rate": 1.958256710754496e-06, "loss": 2.1142, "step": 26069 }, { "epoch": 0.8411186684707538, "grad_norm": 0.34375, "learning_rate": 1.957479323748633e-06, "loss": 2.0499, "step": 26070 }, { "epoch": 0.8411509323245502, "grad_norm": 0.3515625, "learning_rate": 1.95670208030548e-06, "loss": 2.0363, "step": 26071 }, { "epoch": 0.8411831961783465, "grad_norm": 0.380859375, "learning_rate": 1.9559249804335887e-06, "loss": 2.0574, "step": 26072 }, { "epoch": 0.8412154600321429, "grad_norm": 0.345703125, "learning_rate": 1.955148024141522e-06, "loss": 2.0431, "step": 26073 }, { "epoch": 0.8412477238859392, "grad_norm": 0.34765625, "learning_rate": 1.954371211437828e-06, "loss": 2.0726, "step": 26074 }, { "epoch": 0.8412799877397356, "grad_norm": 0.35546875, "learning_rate": 1.953594542331052e-06, "loss": 2.1013, "step": 26075 }, { "epoch": 0.841312251593532, "grad_norm": 0.390625, "learning_rate": 1.9528180168297517e-06, "loss": 2.0363, "step": 26076 }, { "epoch": 0.8413445154473282, "grad_norm": 0.345703125, "learning_rate": 1.9520416349424687e-06, "loss": 2.0235, "step": 26077 }, { "epoch": 0.8413767793011246, "grad_norm": 0.361328125, "learning_rate": 1.9512653966777527e-06, "loss": 2.0668, "step": 26078 }, { "epoch": 0.8414090431549209, "grad_norm": 0.349609375, "learning_rate": 1.9504893020441454e-06, "loss": 2.0602, "step": 26079 }, { "epoch": 0.8414413070087173, "grad_norm": 0.357421875, "learning_rate": 1.9497133510501884e-06, "loss": 2.0508, "step": 26080 }, { "epoch": 0.8414735708625136, "grad_norm": 0.357421875, "learning_rate": 1.94893754370443e-06, "loss": 2.0757, "step": 26081 }, { "epoch": 0.84150583471631, "grad_norm": 0.33984375, "learning_rate": 1.948161880015398e-06, "loss": 2.0695, "step": 26082 }, { "epoch": 0.8415380985701063, "grad_norm": 0.359375, "learning_rate": 1.947386359991637e-06, "loss": 2.0382, "step": 26083 }, { "epoch": 0.8415703624239027, "grad_norm": 0.4765625, "learning_rate": 1.946610983641691e-06, "loss": 2.0055, "step": 26084 }, { "epoch": 0.841602626277699, "grad_norm": 0.34765625, "learning_rate": 1.9458357509740785e-06, "loss": 2.0446, "step": 26085 }, { "epoch": 0.8416348901314954, "grad_norm": 0.373046875, "learning_rate": 1.945060661997341e-06, "loss": 2.0634, "step": 26086 }, { "epoch": 0.8416671539852917, "grad_norm": 0.373046875, "learning_rate": 1.9442857167200174e-06, "loss": 1.9264, "step": 26087 }, { "epoch": 0.8416994178390881, "grad_norm": 0.37109375, "learning_rate": 1.943510915150624e-06, "loss": 1.9523, "step": 26088 }, { "epoch": 0.8417316816928844, "grad_norm": 0.35546875, "learning_rate": 1.9427362572976946e-06, "loss": 1.9468, "step": 26089 }, { "epoch": 0.8417639455466808, "grad_norm": 0.357421875, "learning_rate": 1.9419617431697658e-06, "loss": 1.9691, "step": 26090 }, { "epoch": 0.8417962094004771, "grad_norm": 0.384765625, "learning_rate": 1.9411873727753465e-06, "loss": 1.897, "step": 26091 }, { "epoch": 0.8418284732542735, "grad_norm": 0.390625, "learning_rate": 1.9404131461229707e-06, "loss": 1.9026, "step": 26092 }, { "epoch": 0.8418607371080697, "grad_norm": 0.359375, "learning_rate": 1.93963906322116e-06, "loss": 1.9067, "step": 26093 }, { "epoch": 0.8418930009618661, "grad_norm": 0.36328125, "learning_rate": 1.9388651240784284e-06, "loss": 1.9181, "step": 26094 }, { "epoch": 0.8419252648156624, "grad_norm": 0.349609375, "learning_rate": 1.938091328703303e-06, "loss": 1.9153, "step": 26095 }, { "epoch": 0.8419575286694588, "grad_norm": 0.361328125, "learning_rate": 1.937317677104297e-06, "loss": 1.9811, "step": 26096 }, { "epoch": 0.8419897925232552, "grad_norm": 0.361328125, "learning_rate": 1.9365441692899254e-06, "loss": 1.9476, "step": 26097 }, { "epoch": 0.8420220563770515, "grad_norm": 0.3671875, "learning_rate": 1.935770805268706e-06, "loss": 1.9883, "step": 26098 }, { "epoch": 0.8420543202308479, "grad_norm": 0.353515625, "learning_rate": 1.9349975850491492e-06, "loss": 1.9877, "step": 26099 }, { "epoch": 0.8420865840846442, "grad_norm": 0.353515625, "learning_rate": 1.934224508639764e-06, "loss": 2.0036, "step": 26100 }, { "epoch": 0.8421188479384406, "grad_norm": 0.40234375, "learning_rate": 1.9334515760490647e-06, "loss": 1.9931, "step": 26101 }, { "epoch": 0.8421511117922369, "grad_norm": 0.9765625, "learning_rate": 1.932678787285555e-06, "loss": 1.9921, "step": 26102 }, { "epoch": 0.8421833756460333, "grad_norm": 0.373046875, "learning_rate": 1.9319061423577424e-06, "loss": 1.9799, "step": 26103 }, { "epoch": 0.8422156394998296, "grad_norm": 0.359375, "learning_rate": 1.9311336412741325e-06, "loss": 1.9931, "step": 26104 }, { "epoch": 0.842247903353626, "grad_norm": 0.349609375, "learning_rate": 1.9303612840432295e-06, "loss": 2.0078, "step": 26105 }, { "epoch": 0.8422801672074223, "grad_norm": 0.3671875, "learning_rate": 1.929589070673531e-06, "loss": 2.0347, "step": 26106 }, { "epoch": 0.8423124310612187, "grad_norm": 0.380859375, "learning_rate": 1.92881700117354e-06, "loss": 2.0085, "step": 26107 }, { "epoch": 0.842344694915015, "grad_norm": 0.35546875, "learning_rate": 1.9280450755517553e-06, "loss": 2.0466, "step": 26108 }, { "epoch": 0.8423769587688114, "grad_norm": 0.345703125, "learning_rate": 1.92727329381667e-06, "loss": 2.0329, "step": 26109 }, { "epoch": 0.8424092226226076, "grad_norm": 0.3515625, "learning_rate": 1.926501655976785e-06, "loss": 2.0352, "step": 26110 }, { "epoch": 0.842441486476404, "grad_norm": 0.353515625, "learning_rate": 1.9257301620405877e-06, "loss": 2.0481, "step": 26111 }, { "epoch": 0.8424737503302003, "grad_norm": 0.35546875, "learning_rate": 1.9249588120165805e-06, "loss": 2.0301, "step": 26112 }, { "epoch": 0.8425060141839967, "grad_norm": 0.357421875, "learning_rate": 1.924187605913239e-06, "loss": 2.0602, "step": 26113 }, { "epoch": 0.842538278037793, "grad_norm": 0.34765625, "learning_rate": 1.9234165437390596e-06, "loss": 2.0502, "step": 26114 }, { "epoch": 0.8425705418915894, "grad_norm": 0.3515625, "learning_rate": 1.9226456255025364e-06, "loss": 2.0411, "step": 26115 }, { "epoch": 0.8426028057453858, "grad_norm": 0.359375, "learning_rate": 1.9218748512121438e-06, "loss": 2.0225, "step": 26116 }, { "epoch": 0.8426350695991821, "grad_norm": 0.392578125, "learning_rate": 1.921104220876369e-06, "loss": 2.0111, "step": 26117 }, { "epoch": 0.8426673334529785, "grad_norm": 0.36328125, "learning_rate": 1.9203337345037024e-06, "loss": 1.9804, "step": 26118 }, { "epoch": 0.8426995973067748, "grad_norm": 0.337890625, "learning_rate": 1.9195633921026125e-06, "loss": 2.043, "step": 26119 }, { "epoch": 0.8427318611605712, "grad_norm": 0.357421875, "learning_rate": 1.9187931936815883e-06, "loss": 2.0131, "step": 26120 }, { "epoch": 0.8427641250143675, "grad_norm": 0.33984375, "learning_rate": 1.9180231392491055e-06, "loss": 2.0275, "step": 26121 }, { "epoch": 0.8427963888681639, "grad_norm": 0.404296875, "learning_rate": 1.9172532288136336e-06, "loss": 2.0219, "step": 26122 }, { "epoch": 0.8428286527219602, "grad_norm": 0.49609375, "learning_rate": 1.9164834623836574e-06, "loss": 1.9912, "step": 26123 }, { "epoch": 0.8428609165757566, "grad_norm": 0.3515625, "learning_rate": 1.9157138399676437e-06, "loss": 2.0604, "step": 26124 }, { "epoch": 0.8428931804295529, "grad_norm": 0.349609375, "learning_rate": 1.9149443615740643e-06, "loss": 2.0277, "step": 26125 }, { "epoch": 0.8429254442833493, "grad_norm": 0.34765625, "learning_rate": 1.914175027211392e-06, "loss": 2.0456, "step": 26126 }, { "epoch": 0.8429577081371455, "grad_norm": 0.376953125, "learning_rate": 1.913405836888093e-06, "loss": 2.0586, "step": 26127 }, { "epoch": 0.8429899719909419, "grad_norm": 0.3359375, "learning_rate": 1.9126367906126314e-06, "loss": 2.1104, "step": 26128 }, { "epoch": 0.8430222358447382, "grad_norm": 0.392578125, "learning_rate": 1.9118678883934797e-06, "loss": 2.1294, "step": 26129 }, { "epoch": 0.8430544996985346, "grad_norm": 0.384765625, "learning_rate": 1.911099130239095e-06, "loss": 2.0548, "step": 26130 }, { "epoch": 0.8430867635523309, "grad_norm": 0.34765625, "learning_rate": 1.910330516157939e-06, "loss": 2.1065, "step": 26131 }, { "epoch": 0.8431190274061273, "grad_norm": 0.341796875, "learning_rate": 1.9095620461584774e-06, "loss": 2.1101, "step": 26132 }, { "epoch": 0.8431512912599236, "grad_norm": 0.357421875, "learning_rate": 1.9087937202491657e-06, "loss": 2.1013, "step": 26133 }, { "epoch": 0.84318355511372, "grad_norm": 0.36328125, "learning_rate": 1.908025538438457e-06, "loss": 2.119, "step": 26134 }, { "epoch": 0.8432158189675163, "grad_norm": 0.40234375, "learning_rate": 1.9072575007348148e-06, "loss": 2.1506, "step": 26135 }, { "epoch": 0.8432480828213127, "grad_norm": 0.34375, "learning_rate": 1.9064896071466902e-06, "loss": 2.1245, "step": 26136 }, { "epoch": 0.8432803466751091, "grad_norm": 0.341796875, "learning_rate": 1.9057218576825303e-06, "loss": 2.0998, "step": 26137 }, { "epoch": 0.8433126105289054, "grad_norm": 0.345703125, "learning_rate": 1.9049542523507945e-06, "loss": 2.0784, "step": 26138 }, { "epoch": 0.8433448743827018, "grad_norm": 0.345703125, "learning_rate": 1.9041867911599263e-06, "loss": 2.0862, "step": 26139 }, { "epoch": 0.8433771382364981, "grad_norm": 0.349609375, "learning_rate": 1.903419474118374e-06, "loss": 2.0689, "step": 26140 }, { "epoch": 0.8434094020902945, "grad_norm": 0.349609375, "learning_rate": 1.9026523012345876e-06, "loss": 2.0552, "step": 26141 }, { "epoch": 0.8434416659440908, "grad_norm": 0.349609375, "learning_rate": 1.9018852725170049e-06, "loss": 2.1144, "step": 26142 }, { "epoch": 0.8434739297978872, "grad_norm": 0.400390625, "learning_rate": 1.901118387974078e-06, "loss": 2.0428, "step": 26143 }, { "epoch": 0.8435061936516834, "grad_norm": 0.34375, "learning_rate": 1.9003516476142379e-06, "loss": 2.1169, "step": 26144 }, { "epoch": 0.8435384575054798, "grad_norm": 0.345703125, "learning_rate": 1.8995850514459284e-06, "loss": 2.1335, "step": 26145 }, { "epoch": 0.8435707213592761, "grad_norm": 0.337890625, "learning_rate": 1.8988185994775942e-06, "loss": 2.1622, "step": 26146 }, { "epoch": 0.8436029852130725, "grad_norm": 0.337890625, "learning_rate": 1.8980522917176617e-06, "loss": 2.0703, "step": 26147 }, { "epoch": 0.8436352490668688, "grad_norm": 0.34375, "learning_rate": 1.8972861281745679e-06, "loss": 2.063, "step": 26148 }, { "epoch": 0.8436675129206652, "grad_norm": 0.353515625, "learning_rate": 1.8965201088567574e-06, "loss": 2.0493, "step": 26149 }, { "epoch": 0.8436997767744615, "grad_norm": 0.345703125, "learning_rate": 1.8957542337726453e-06, "loss": 2.044, "step": 26150 }, { "epoch": 0.8437320406282579, "grad_norm": 0.369140625, "learning_rate": 1.8949885029306729e-06, "loss": 2.0315, "step": 26151 }, { "epoch": 0.8437643044820542, "grad_norm": 0.345703125, "learning_rate": 1.8942229163392654e-06, "loss": 2.0249, "step": 26152 }, { "epoch": 0.8437965683358506, "grad_norm": 0.380859375, "learning_rate": 1.8934574740068477e-06, "loss": 2.034, "step": 26153 }, { "epoch": 0.8438288321896469, "grad_norm": 0.357421875, "learning_rate": 1.8926921759418498e-06, "loss": 2.0675, "step": 26154 }, { "epoch": 0.8438610960434433, "grad_norm": 0.35546875, "learning_rate": 1.891927022152693e-06, "loss": 2.0212, "step": 26155 }, { "epoch": 0.8438933598972397, "grad_norm": 0.361328125, "learning_rate": 1.8911620126477975e-06, "loss": 2.0449, "step": 26156 }, { "epoch": 0.843925623751036, "grad_norm": 0.353515625, "learning_rate": 1.8903971474355897e-06, "loss": 2.0318, "step": 26157 }, { "epoch": 0.8439578876048324, "grad_norm": 0.33984375, "learning_rate": 1.8896324265244847e-06, "loss": 2.0347, "step": 26158 }, { "epoch": 0.8439901514586287, "grad_norm": 0.345703125, "learning_rate": 1.8888678499228972e-06, "loss": 2.0335, "step": 26159 }, { "epoch": 0.8440224153124251, "grad_norm": 0.37890625, "learning_rate": 1.8881034176392509e-06, "loss": 2.0354, "step": 26160 }, { "epoch": 0.8440546791662213, "grad_norm": 0.341796875, "learning_rate": 1.8873391296819554e-06, "loss": 2.0433, "step": 26161 }, { "epoch": 0.8440869430200177, "grad_norm": 0.3359375, "learning_rate": 1.8865749860594206e-06, "loss": 2.0202, "step": 26162 }, { "epoch": 0.844119206873814, "grad_norm": 0.34765625, "learning_rate": 1.8858109867800632e-06, "loss": 2.0662, "step": 26163 }, { "epoch": 0.8441514707276104, "grad_norm": 0.34765625, "learning_rate": 1.88504713185229e-06, "loss": 2.0511, "step": 26164 }, { "epoch": 0.8441837345814067, "grad_norm": 0.41796875, "learning_rate": 1.8842834212845074e-06, "loss": 2.0801, "step": 26165 }, { "epoch": 0.8442159984352031, "grad_norm": 0.375, "learning_rate": 1.8835198550851268e-06, "loss": 2.0412, "step": 26166 }, { "epoch": 0.8442482622889994, "grad_norm": 0.345703125, "learning_rate": 1.8827564332625485e-06, "loss": 2.0655, "step": 26167 }, { "epoch": 0.8442805261427958, "grad_norm": 0.3359375, "learning_rate": 1.8819931558251757e-06, "loss": 2.058, "step": 26168 }, { "epoch": 0.8443127899965921, "grad_norm": 0.341796875, "learning_rate": 1.8812300227814132e-06, "loss": 2.1019, "step": 26169 }, { "epoch": 0.8443450538503885, "grad_norm": 0.34375, "learning_rate": 1.8804670341396612e-06, "loss": 2.1386, "step": 26170 }, { "epoch": 0.8443773177041848, "grad_norm": 0.359375, "learning_rate": 1.8797041899083145e-06, "loss": 2.0884, "step": 26171 }, { "epoch": 0.8444095815579812, "grad_norm": 0.345703125, "learning_rate": 1.878941490095768e-06, "loss": 2.0778, "step": 26172 }, { "epoch": 0.8444418454117775, "grad_norm": 0.3828125, "learning_rate": 1.878178934710425e-06, "loss": 2.1442, "step": 26173 }, { "epoch": 0.8444741092655739, "grad_norm": 0.345703125, "learning_rate": 1.8774165237606738e-06, "loss": 2.1552, "step": 26174 }, { "epoch": 0.8445063731193702, "grad_norm": 0.337890625, "learning_rate": 1.876654257254906e-06, "loss": 2.1284, "step": 26175 }, { "epoch": 0.8445386369731666, "grad_norm": 0.328125, "learning_rate": 1.8758921352015117e-06, "loss": 2.1136, "step": 26176 }, { "epoch": 0.844570900826963, "grad_norm": 0.359375, "learning_rate": 1.875130157608889e-06, "loss": 2.1663, "step": 26177 }, { "epoch": 0.8446031646807592, "grad_norm": 0.341796875, "learning_rate": 1.8743683244854099e-06, "loss": 2.1408, "step": 26178 }, { "epoch": 0.8446354285345556, "grad_norm": 0.34375, "learning_rate": 1.8736066358394687e-06, "loss": 2.1523, "step": 26179 }, { "epoch": 0.8446676923883519, "grad_norm": 0.34375, "learning_rate": 1.872845091679456e-06, "loss": 2.1069, "step": 26180 }, { "epoch": 0.8446999562421483, "grad_norm": 0.3359375, "learning_rate": 1.8720836920137414e-06, "loss": 2.0546, "step": 26181 }, { "epoch": 0.8447322200959446, "grad_norm": 0.33203125, "learning_rate": 1.8713224368507131e-06, "loss": 2.0918, "step": 26182 }, { "epoch": 0.844764483949741, "grad_norm": 0.341796875, "learning_rate": 1.8705613261987498e-06, "loss": 2.1427, "step": 26183 }, { "epoch": 0.8447967478035373, "grad_norm": 0.369140625, "learning_rate": 1.8698003600662245e-06, "loss": 2.1078, "step": 26184 }, { "epoch": 0.8448290116573337, "grad_norm": 0.376953125, "learning_rate": 1.8690395384615206e-06, "loss": 2.0725, "step": 26185 }, { "epoch": 0.84486127551113, "grad_norm": 0.365234375, "learning_rate": 1.8682788613930096e-06, "loss": 2.1066, "step": 26186 }, { "epoch": 0.8448935393649264, "grad_norm": 0.33984375, "learning_rate": 1.8675183288690616e-06, "loss": 2.1289, "step": 26187 }, { "epoch": 0.8449258032187227, "grad_norm": 0.373046875, "learning_rate": 1.8667579408980533e-06, "loss": 2.0211, "step": 26188 }, { "epoch": 0.8449580670725191, "grad_norm": 0.345703125, "learning_rate": 1.8659976974883514e-06, "loss": 2.0663, "step": 26189 }, { "epoch": 0.8449903309263154, "grad_norm": 0.337890625, "learning_rate": 1.8652375986483222e-06, "loss": 2.1282, "step": 26190 }, { "epoch": 0.8450225947801118, "grad_norm": 0.35546875, "learning_rate": 1.8644776443863376e-06, "loss": 2.1293, "step": 26191 }, { "epoch": 0.8450548586339081, "grad_norm": 0.33984375, "learning_rate": 1.8637178347107593e-06, "loss": 2.0509, "step": 26192 }, { "epoch": 0.8450871224877045, "grad_norm": 0.373046875, "learning_rate": 1.8629581696299485e-06, "loss": 2.1411, "step": 26193 }, { "epoch": 0.8451193863415007, "grad_norm": 0.3359375, "learning_rate": 1.8621986491522724e-06, "loss": 2.0326, "step": 26194 }, { "epoch": 0.8451516501952971, "grad_norm": 0.3515625, "learning_rate": 1.8614392732860891e-06, "loss": 2.0951, "step": 26195 }, { "epoch": 0.8451839140490934, "grad_norm": 0.333984375, "learning_rate": 1.8606800420397517e-06, "loss": 2.1273, "step": 26196 }, { "epoch": 0.8452161779028898, "grad_norm": 0.337890625, "learning_rate": 1.8599209554216272e-06, "loss": 2.1322, "step": 26197 }, { "epoch": 0.8452484417566862, "grad_norm": 0.330078125, "learning_rate": 1.8591620134400672e-06, "loss": 2.102, "step": 26198 }, { "epoch": 0.8452807056104825, "grad_norm": 0.330078125, "learning_rate": 1.8584032161034198e-06, "loss": 2.0976, "step": 26199 }, { "epoch": 0.8453129694642789, "grad_norm": 0.388671875, "learning_rate": 1.8576445634200451e-06, "loss": 2.1567, "step": 26200 }, { "epoch": 0.8453452333180752, "grad_norm": 0.388671875, "learning_rate": 1.8568860553982913e-06, "loss": 2.1149, "step": 26201 }, { "epoch": 0.8453774971718716, "grad_norm": 0.326171875, "learning_rate": 1.856127692046507e-06, "loss": 2.1827, "step": 26202 }, { "epoch": 0.8454097610256679, "grad_norm": 0.34375, "learning_rate": 1.8553694733730369e-06, "loss": 2.1616, "step": 26203 }, { "epoch": 0.8454420248794643, "grad_norm": 0.330078125, "learning_rate": 1.8546113993862313e-06, "loss": 2.1971, "step": 26204 }, { "epoch": 0.8454742887332606, "grad_norm": 0.328125, "learning_rate": 1.8538534700944348e-06, "loss": 2.2092, "step": 26205 }, { "epoch": 0.845506552587057, "grad_norm": 0.330078125, "learning_rate": 1.8530956855059845e-06, "loss": 2.1503, "step": 26206 }, { "epoch": 0.8455388164408533, "grad_norm": 0.330078125, "learning_rate": 1.8523380456292282e-06, "loss": 2.2071, "step": 26207 }, { "epoch": 0.8455710802946497, "grad_norm": 0.3671875, "learning_rate": 1.851580550472503e-06, "loss": 2.1262, "step": 26208 }, { "epoch": 0.845603344148446, "grad_norm": 0.345703125, "learning_rate": 1.8508232000441423e-06, "loss": 2.0782, "step": 26209 }, { "epoch": 0.8456356080022424, "grad_norm": 0.3359375, "learning_rate": 1.8500659943524872e-06, "loss": 2.1228, "step": 26210 }, { "epoch": 0.8456678718560386, "grad_norm": 0.373046875, "learning_rate": 1.8493089334058782e-06, "loss": 2.0739, "step": 26211 }, { "epoch": 0.845700135709835, "grad_norm": 0.33203125, "learning_rate": 1.848552017212637e-06, "loss": 2.1907, "step": 26212 }, { "epoch": 0.8457323995636313, "grad_norm": 0.333984375, "learning_rate": 1.8477952457811032e-06, "loss": 2.1563, "step": 26213 }, { "epoch": 0.8457646634174277, "grad_norm": 0.34375, "learning_rate": 1.847038619119602e-06, "loss": 2.2259, "step": 26214 }, { "epoch": 0.845796927271224, "grad_norm": 0.326171875, "learning_rate": 1.8462821372364635e-06, "loss": 2.2043, "step": 26215 }, { "epoch": 0.8458291911250204, "grad_norm": 0.34375, "learning_rate": 1.845525800140016e-06, "loss": 2.1526, "step": 26216 }, { "epoch": 0.8458614549788168, "grad_norm": 0.328125, "learning_rate": 1.8447696078385845e-06, "loss": 2.1931, "step": 26217 }, { "epoch": 0.8458937188326131, "grad_norm": 0.337890625, "learning_rate": 1.8440135603404872e-06, "loss": 2.1945, "step": 26218 }, { "epoch": 0.8459259826864095, "grad_norm": 0.3515625, "learning_rate": 1.8432576576540543e-06, "loss": 2.2516, "step": 26219 }, { "epoch": 0.8459582465402058, "grad_norm": 0.349609375, "learning_rate": 1.8425018997876025e-06, "loss": 2.1664, "step": 26220 }, { "epoch": 0.8459905103940022, "grad_norm": 0.3515625, "learning_rate": 1.8417462867494483e-06, "loss": 2.1556, "step": 26221 }, { "epoch": 0.8460227742477985, "grad_norm": 0.341796875, "learning_rate": 1.8409908185479136e-06, "loss": 2.218, "step": 26222 }, { "epoch": 0.8460550381015949, "grad_norm": 0.3359375, "learning_rate": 1.840235495191313e-06, "loss": 2.1873, "step": 26223 }, { "epoch": 0.8460873019553912, "grad_norm": 0.33203125, "learning_rate": 1.8394803166879554e-06, "loss": 2.1701, "step": 26224 }, { "epoch": 0.8461195658091876, "grad_norm": 0.369140625, "learning_rate": 1.8387252830461586e-06, "loss": 2.0953, "step": 26225 }, { "epoch": 0.8461518296629839, "grad_norm": 0.361328125, "learning_rate": 1.837970394274235e-06, "loss": 2.1533, "step": 26226 }, { "epoch": 0.8461840935167803, "grad_norm": 0.34375, "learning_rate": 1.8372156503804855e-06, "loss": 2.1417, "step": 26227 }, { "epoch": 0.8462163573705765, "grad_norm": 0.37890625, "learning_rate": 1.8364610513732272e-06, "loss": 2.1042, "step": 26228 }, { "epoch": 0.8462486212243729, "grad_norm": 0.345703125, "learning_rate": 1.8357065972607618e-06, "loss": 2.155, "step": 26229 }, { "epoch": 0.8462808850781692, "grad_norm": 0.3515625, "learning_rate": 1.834952288051391e-06, "loss": 2.1171, "step": 26230 }, { "epoch": 0.8463131489319656, "grad_norm": 0.36328125, "learning_rate": 1.8341981237534244e-06, "loss": 2.019, "step": 26231 }, { "epoch": 0.8463454127857619, "grad_norm": 0.349609375, "learning_rate": 1.8334441043751594e-06, "loss": 2.1296, "step": 26232 }, { "epoch": 0.8463776766395583, "grad_norm": 0.3515625, "learning_rate": 1.8326902299248972e-06, "loss": 2.0816, "step": 26233 }, { "epoch": 0.8464099404933546, "grad_norm": 0.357421875, "learning_rate": 1.8319365004109296e-06, "loss": 1.9964, "step": 26234 }, { "epoch": 0.846442204347151, "grad_norm": 0.35546875, "learning_rate": 1.8311829158415633e-06, "loss": 2.0905, "step": 26235 }, { "epoch": 0.8464744682009473, "grad_norm": 0.349609375, "learning_rate": 1.8304294762250868e-06, "loss": 2.0451, "step": 26236 }, { "epoch": 0.8465067320547437, "grad_norm": 0.3515625, "learning_rate": 1.8296761815697916e-06, "loss": 2.0596, "step": 26237 }, { "epoch": 0.8465389959085401, "grad_norm": 0.357421875, "learning_rate": 1.828923031883976e-06, "loss": 2.083, "step": 26238 }, { "epoch": 0.8465712597623364, "grad_norm": 0.3671875, "learning_rate": 1.8281700271759289e-06, "loss": 2.0462, "step": 26239 }, { "epoch": 0.8466035236161328, "grad_norm": 0.361328125, "learning_rate": 1.8274171674539313e-06, "loss": 2.1149, "step": 26240 }, { "epoch": 0.8466357874699291, "grad_norm": 0.361328125, "learning_rate": 1.8266644527262755e-06, "loss": 2.0128, "step": 26241 }, { "epoch": 0.8466680513237255, "grad_norm": 0.35546875, "learning_rate": 1.8259118830012544e-06, "loss": 2.0699, "step": 26242 }, { "epoch": 0.8467003151775218, "grad_norm": 0.349609375, "learning_rate": 1.8251594582871368e-06, "loss": 2.0509, "step": 26243 }, { "epoch": 0.8467325790313182, "grad_norm": 0.33984375, "learning_rate": 1.8244071785922156e-06, "loss": 2.067, "step": 26244 }, { "epoch": 0.8467648428851144, "grad_norm": 0.341796875, "learning_rate": 1.8236550439247695e-06, "loss": 2.0778, "step": 26245 }, { "epoch": 0.8467971067389108, "grad_norm": 0.353515625, "learning_rate": 1.822903054293072e-06, "loss": 2.1084, "step": 26246 }, { "epoch": 0.8468293705927071, "grad_norm": 0.34765625, "learning_rate": 1.8221512097054061e-06, "loss": 2.0176, "step": 26247 }, { "epoch": 0.8468616344465035, "grad_norm": 0.326171875, "learning_rate": 1.8213995101700471e-06, "loss": 2.0726, "step": 26248 }, { "epoch": 0.8468938983002998, "grad_norm": 0.345703125, "learning_rate": 1.820647955695265e-06, "loss": 2.1051, "step": 26249 }, { "epoch": 0.8469261621540962, "grad_norm": 0.33984375, "learning_rate": 1.8198965462893396e-06, "loss": 2.0984, "step": 26250 }, { "epoch": 0.8469584260078925, "grad_norm": 0.357421875, "learning_rate": 1.8191452819605363e-06, "loss": 2.0784, "step": 26251 }, { "epoch": 0.8469906898616889, "grad_norm": 0.396484375, "learning_rate": 1.8183941627171235e-06, "loss": 2.0695, "step": 26252 }, { "epoch": 0.8470229537154852, "grad_norm": 0.34765625, "learning_rate": 1.8176431885673727e-06, "loss": 2.1154, "step": 26253 }, { "epoch": 0.8470552175692816, "grad_norm": 0.3359375, "learning_rate": 1.8168923595195508e-06, "loss": 2.053, "step": 26254 }, { "epoch": 0.8470874814230779, "grad_norm": 0.341796875, "learning_rate": 1.816141675581916e-06, "loss": 2.0646, "step": 26255 }, { "epoch": 0.8471197452768743, "grad_norm": 0.369140625, "learning_rate": 1.8153911367627384e-06, "loss": 2.1096, "step": 26256 }, { "epoch": 0.8471520091306706, "grad_norm": 0.33984375, "learning_rate": 1.8146407430702784e-06, "loss": 2.0536, "step": 26257 }, { "epoch": 0.847184272984467, "grad_norm": 0.33984375, "learning_rate": 1.8138904945127888e-06, "loss": 2.0745, "step": 26258 }, { "epoch": 0.8472165368382634, "grad_norm": 0.353515625, "learning_rate": 1.8131403910985368e-06, "loss": 2.0308, "step": 26259 }, { "epoch": 0.8472488006920597, "grad_norm": 0.3515625, "learning_rate": 1.8123904328357755e-06, "loss": 2.0719, "step": 26260 }, { "epoch": 0.847281064545856, "grad_norm": 0.357421875, "learning_rate": 1.811640619732755e-06, "loss": 2.0366, "step": 26261 }, { "epoch": 0.8473133283996523, "grad_norm": 0.353515625, "learning_rate": 1.810890951797739e-06, "loss": 2.0756, "step": 26262 }, { "epoch": 0.8473455922534487, "grad_norm": 0.34765625, "learning_rate": 1.8101414290389706e-06, "loss": 2.025, "step": 26263 }, { "epoch": 0.847377856107245, "grad_norm": 0.357421875, "learning_rate": 1.809392051464705e-06, "loss": 2.0486, "step": 26264 }, { "epoch": 0.8474101199610414, "grad_norm": 0.38671875, "learning_rate": 1.8086428190831856e-06, "loss": 1.9856, "step": 26265 }, { "epoch": 0.8474423838148377, "grad_norm": 0.37109375, "learning_rate": 1.8078937319026655e-06, "loss": 2.0326, "step": 26266 }, { "epoch": 0.8474746476686341, "grad_norm": 0.365234375, "learning_rate": 1.807144789931387e-06, "loss": 2.0246, "step": 26267 }, { "epoch": 0.8475069115224304, "grad_norm": 0.380859375, "learning_rate": 1.8063959931775914e-06, "loss": 1.9907, "step": 26268 }, { "epoch": 0.8475391753762268, "grad_norm": 0.357421875, "learning_rate": 1.8056473416495256e-06, "loss": 2.014, "step": 26269 }, { "epoch": 0.8475714392300231, "grad_norm": 0.369140625, "learning_rate": 1.8048988353554296e-06, "loss": 1.9145, "step": 26270 }, { "epoch": 0.8476037030838195, "grad_norm": 0.36328125, "learning_rate": 1.804150474303537e-06, "loss": 1.9621, "step": 26271 }, { "epoch": 0.8476359669376158, "grad_norm": 0.36328125, "learning_rate": 1.8034022585020927e-06, "loss": 1.9876, "step": 26272 }, { "epoch": 0.8476682307914122, "grad_norm": 0.380859375, "learning_rate": 1.80265418795933e-06, "loss": 1.9461, "step": 26273 }, { "epoch": 0.8477004946452085, "grad_norm": 0.392578125, "learning_rate": 1.8019062626834776e-06, "loss": 1.9491, "step": 26274 }, { "epoch": 0.8477327584990049, "grad_norm": 0.36328125, "learning_rate": 1.8011584826827771e-06, "loss": 1.9232, "step": 26275 }, { "epoch": 0.8477650223528012, "grad_norm": 0.3671875, "learning_rate": 1.8004108479654552e-06, "loss": 1.9695, "step": 26276 }, { "epoch": 0.8477972862065976, "grad_norm": 0.37109375, "learning_rate": 1.7996633585397387e-06, "loss": 1.9916, "step": 26277 }, { "epoch": 0.847829550060394, "grad_norm": 0.3671875, "learning_rate": 1.7989160144138594e-06, "loss": 1.991, "step": 26278 }, { "epoch": 0.8478618139141902, "grad_norm": 0.361328125, "learning_rate": 1.798168815596044e-06, "loss": 1.9373, "step": 26279 }, { "epoch": 0.8478940777679866, "grad_norm": 0.359375, "learning_rate": 1.797421762094511e-06, "loss": 1.9631, "step": 26280 }, { "epoch": 0.8479263416217829, "grad_norm": 0.35546875, "learning_rate": 1.7966748539174905e-06, "loss": 1.9508, "step": 26281 }, { "epoch": 0.8479586054755793, "grad_norm": 0.353515625, "learning_rate": 1.7959280910732023e-06, "loss": 1.9623, "step": 26282 }, { "epoch": 0.8479908693293756, "grad_norm": 0.357421875, "learning_rate": 1.7951814735698618e-06, "loss": 1.9265, "step": 26283 }, { "epoch": 0.848023133183172, "grad_norm": 0.37109375, "learning_rate": 1.794435001415694e-06, "loss": 1.9711, "step": 26284 }, { "epoch": 0.8480553970369683, "grad_norm": 0.357421875, "learning_rate": 1.7936886746189107e-06, "loss": 1.9662, "step": 26285 }, { "epoch": 0.8480876608907647, "grad_norm": 0.36328125, "learning_rate": 1.792942493187727e-06, "loss": 1.9771, "step": 26286 }, { "epoch": 0.848119924744561, "grad_norm": 0.369140625, "learning_rate": 1.7921964571303596e-06, "loss": 1.9987, "step": 26287 }, { "epoch": 0.8481521885983574, "grad_norm": 0.37109375, "learning_rate": 1.7914505664550184e-06, "loss": 1.974, "step": 26288 }, { "epoch": 0.8481844524521537, "grad_norm": 0.35546875, "learning_rate": 1.7907048211699107e-06, "loss": 2.0095, "step": 26289 }, { "epoch": 0.8482167163059501, "grad_norm": 0.361328125, "learning_rate": 1.789959221283251e-06, "loss": 2.0048, "step": 26290 }, { "epoch": 0.8482489801597464, "grad_norm": 0.359375, "learning_rate": 1.7892137668032432e-06, "loss": 1.9885, "step": 26291 }, { "epoch": 0.8482812440135428, "grad_norm": 0.353515625, "learning_rate": 1.7884684577380905e-06, "loss": 1.9824, "step": 26292 }, { "epoch": 0.8483135078673391, "grad_norm": 0.37109375, "learning_rate": 1.7877232940960015e-06, "loss": 1.9747, "step": 26293 }, { "epoch": 0.8483457717211355, "grad_norm": 0.41796875, "learning_rate": 1.786978275885176e-06, "loss": 1.9151, "step": 26294 }, { "epoch": 0.8483780355749317, "grad_norm": 0.365234375, "learning_rate": 1.7862334031138161e-06, "loss": 1.9896, "step": 26295 }, { "epoch": 0.8484102994287281, "grad_norm": 0.349609375, "learning_rate": 1.785488675790115e-06, "loss": 1.9723, "step": 26296 }, { "epoch": 0.8484425632825244, "grad_norm": 0.392578125, "learning_rate": 1.784744093922278e-06, "loss": 1.904, "step": 26297 }, { "epoch": 0.8484748271363208, "grad_norm": 0.36328125, "learning_rate": 1.783999657518497e-06, "loss": 1.9259, "step": 26298 }, { "epoch": 0.8485070909901172, "grad_norm": 0.41015625, "learning_rate": 1.7832553665869634e-06, "loss": 1.9218, "step": 26299 }, { "epoch": 0.8485393548439135, "grad_norm": 0.3828125, "learning_rate": 1.7825112211358758e-06, "loss": 1.8827, "step": 26300 }, { "epoch": 0.8485716186977099, "grad_norm": 0.3671875, "learning_rate": 1.7817672211734243e-06, "loss": 1.9048, "step": 26301 }, { "epoch": 0.8486038825515062, "grad_norm": 0.396484375, "learning_rate": 1.7810233667077907e-06, "loss": 1.9112, "step": 26302 }, { "epoch": 0.8486361464053026, "grad_norm": 0.365234375, "learning_rate": 1.7802796577471736e-06, "loss": 1.995, "step": 26303 }, { "epoch": 0.8486684102590989, "grad_norm": 0.384765625, "learning_rate": 1.7795360942997545e-06, "loss": 1.9415, "step": 26304 }, { "epoch": 0.8487006741128953, "grad_norm": 0.353515625, "learning_rate": 1.7787926763737138e-06, "loss": 1.936, "step": 26305 }, { "epoch": 0.8487329379666916, "grad_norm": 0.388671875, "learning_rate": 1.7780494039772415e-06, "loss": 1.9072, "step": 26306 }, { "epoch": 0.848765201820488, "grad_norm": 0.36328125, "learning_rate": 1.7773062771185178e-06, "loss": 1.963, "step": 26307 }, { "epoch": 0.8487974656742843, "grad_norm": 0.36328125, "learning_rate": 1.776563295805716e-06, "loss": 1.9396, "step": 26308 }, { "epoch": 0.8488297295280807, "grad_norm": 0.375, "learning_rate": 1.7758204600470212e-06, "loss": 1.9761, "step": 26309 }, { "epoch": 0.848861993381877, "grad_norm": 0.361328125, "learning_rate": 1.7750777698506088e-06, "loss": 1.946, "step": 26310 }, { "epoch": 0.8488942572356734, "grad_norm": 0.447265625, "learning_rate": 1.7743352252246504e-06, "loss": 1.8711, "step": 26311 }, { "epoch": 0.8489265210894696, "grad_norm": 0.369140625, "learning_rate": 1.7735928261773243e-06, "loss": 1.9032, "step": 26312 }, { "epoch": 0.848958784943266, "grad_norm": 0.412109375, "learning_rate": 1.7728505727167994e-06, "loss": 2.0384, "step": 26313 }, { "epoch": 0.8489910487970623, "grad_norm": 0.369140625, "learning_rate": 1.7721084648512437e-06, "loss": 1.9436, "step": 26314 }, { "epoch": 0.8490233126508587, "grad_norm": 0.400390625, "learning_rate": 1.771366502588831e-06, "loss": 1.9566, "step": 26315 }, { "epoch": 0.849055576504655, "grad_norm": 0.35546875, "learning_rate": 1.7706246859377262e-06, "loss": 1.9684, "step": 26316 }, { "epoch": 0.8490878403584514, "grad_norm": 0.36328125, "learning_rate": 1.7698830149060896e-06, "loss": 1.9303, "step": 26317 }, { "epoch": 0.8491201042122478, "grad_norm": 0.373046875, "learning_rate": 1.7691414895020946e-06, "loss": 1.9393, "step": 26318 }, { "epoch": 0.8491523680660441, "grad_norm": 0.369140625, "learning_rate": 1.7684001097338964e-06, "loss": 1.9782, "step": 26319 }, { "epoch": 0.8491846319198405, "grad_norm": 0.34765625, "learning_rate": 1.7676588756096569e-06, "loss": 1.9699, "step": 26320 }, { "epoch": 0.8492168957736368, "grad_norm": 0.373046875, "learning_rate": 1.7669177871375357e-06, "loss": 1.9674, "step": 26321 }, { "epoch": 0.8492491596274332, "grad_norm": 0.3515625, "learning_rate": 1.766176844325692e-06, "loss": 1.9695, "step": 26322 }, { "epoch": 0.8492814234812295, "grad_norm": 0.357421875, "learning_rate": 1.7654360471822805e-06, "loss": 1.9291, "step": 26323 }, { "epoch": 0.8493136873350259, "grad_norm": 0.419921875, "learning_rate": 1.76469539571545e-06, "loss": 1.8716, "step": 26324 }, { "epoch": 0.8493459511888222, "grad_norm": 0.375, "learning_rate": 1.7639548899333601e-06, "loss": 1.9376, "step": 26325 }, { "epoch": 0.8493782150426186, "grad_norm": 0.388671875, "learning_rate": 1.7632145298441615e-06, "loss": 1.8721, "step": 26326 }, { "epoch": 0.8494104788964149, "grad_norm": 0.388671875, "learning_rate": 1.7624743154559957e-06, "loss": 1.9389, "step": 26327 }, { "epoch": 0.8494427427502113, "grad_norm": 0.37109375, "learning_rate": 1.7617342467770214e-06, "loss": 1.9349, "step": 26328 }, { "epoch": 0.8494750066040075, "grad_norm": 0.3515625, "learning_rate": 1.7609943238153787e-06, "loss": 1.9604, "step": 26329 }, { "epoch": 0.8495072704578039, "grad_norm": 0.373046875, "learning_rate": 1.7602545465792092e-06, "loss": 1.8973, "step": 26330 }, { "epoch": 0.8495395343116002, "grad_norm": 0.37890625, "learning_rate": 1.759514915076663e-06, "loss": 1.9471, "step": 26331 }, { "epoch": 0.8495717981653966, "grad_norm": 0.37890625, "learning_rate": 1.7587754293158776e-06, "loss": 1.9437, "step": 26332 }, { "epoch": 0.8496040620191929, "grad_norm": 0.376953125, "learning_rate": 1.7580360893049906e-06, "loss": 1.9213, "step": 26333 }, { "epoch": 0.8496363258729893, "grad_norm": 0.365234375, "learning_rate": 1.7572968950521446e-06, "loss": 1.982, "step": 26334 }, { "epoch": 0.8496685897267856, "grad_norm": 0.384765625, "learning_rate": 1.7565578465654758e-06, "loss": 1.9542, "step": 26335 }, { "epoch": 0.849700853580582, "grad_norm": 0.357421875, "learning_rate": 1.755818943853113e-06, "loss": 2.0048, "step": 26336 }, { "epoch": 0.8497331174343783, "grad_norm": 0.36328125, "learning_rate": 1.7550801869231963e-06, "loss": 1.9499, "step": 26337 }, { "epoch": 0.8497653812881747, "grad_norm": 0.361328125, "learning_rate": 1.754341575783856e-06, "loss": 1.9318, "step": 26338 }, { "epoch": 0.8497976451419711, "grad_norm": 0.369140625, "learning_rate": 1.7536031104432188e-06, "loss": 1.9515, "step": 26339 }, { "epoch": 0.8498299089957674, "grad_norm": 0.37890625, "learning_rate": 1.7528647909094181e-06, "loss": 1.9876, "step": 26340 }, { "epoch": 0.8498621728495638, "grad_norm": 0.36328125, "learning_rate": 1.7521266171905793e-06, "loss": 1.8921, "step": 26341 }, { "epoch": 0.8498944367033601, "grad_norm": 0.37109375, "learning_rate": 1.7513885892948222e-06, "loss": 1.9169, "step": 26342 }, { "epoch": 0.8499267005571565, "grad_norm": 0.390625, "learning_rate": 1.750650707230279e-06, "loss": 1.9467, "step": 26343 }, { "epoch": 0.8499589644109528, "grad_norm": 0.365234375, "learning_rate": 1.7499129710050664e-06, "loss": 1.9591, "step": 26344 }, { "epoch": 0.8499912282647492, "grad_norm": 0.34765625, "learning_rate": 1.7491753806273047e-06, "loss": 1.982, "step": 26345 }, { "epoch": 0.8500234921185454, "grad_norm": 0.359375, "learning_rate": 1.7484379361051172e-06, "loss": 1.9019, "step": 26346 }, { "epoch": 0.8500557559723418, "grad_norm": 0.361328125, "learning_rate": 1.7477006374466175e-06, "loss": 1.9721, "step": 26347 }, { "epoch": 0.8500880198261381, "grad_norm": 0.353515625, "learning_rate": 1.7469634846599192e-06, "loss": 1.9628, "step": 26348 }, { "epoch": 0.8501202836799345, "grad_norm": 0.3671875, "learning_rate": 1.7462264777531422e-06, "loss": 1.9107, "step": 26349 }, { "epoch": 0.8501525475337308, "grad_norm": 0.361328125, "learning_rate": 1.745489616734397e-06, "loss": 1.9732, "step": 26350 }, { "epoch": 0.8501848113875272, "grad_norm": 0.35546875, "learning_rate": 1.7447529016117885e-06, "loss": 1.9314, "step": 26351 }, { "epoch": 0.8502170752413235, "grad_norm": 0.353515625, "learning_rate": 1.7440163323934355e-06, "loss": 1.8405, "step": 26352 }, { "epoch": 0.8502493390951199, "grad_norm": 0.37109375, "learning_rate": 1.7432799090874396e-06, "loss": 1.8407, "step": 26353 }, { "epoch": 0.8502816029489162, "grad_norm": 0.37890625, "learning_rate": 1.7425436317019095e-06, "loss": 1.893, "step": 26354 }, { "epoch": 0.8503138668027126, "grad_norm": 0.384765625, "learning_rate": 1.7418075002449436e-06, "loss": 1.8571, "step": 26355 }, { "epoch": 0.8503461306565089, "grad_norm": 0.42578125, "learning_rate": 1.7410715147246536e-06, "loss": 1.878, "step": 26356 }, { "epoch": 0.8503783945103053, "grad_norm": 0.3984375, "learning_rate": 1.740335675149135e-06, "loss": 1.845, "step": 26357 }, { "epoch": 0.8504106583641016, "grad_norm": 0.365234375, "learning_rate": 1.739599981526488e-06, "loss": 1.9018, "step": 26358 }, { "epoch": 0.850442922217898, "grad_norm": 0.390625, "learning_rate": 1.7388644338648124e-06, "loss": 1.8539, "step": 26359 }, { "epoch": 0.8504751860716944, "grad_norm": 0.384765625, "learning_rate": 1.7381290321722054e-06, "loss": 1.8687, "step": 26360 }, { "epoch": 0.8505074499254907, "grad_norm": 0.373046875, "learning_rate": 1.7373937764567555e-06, "loss": 1.9114, "step": 26361 }, { "epoch": 0.850539713779287, "grad_norm": 0.365234375, "learning_rate": 1.7366586667265643e-06, "loss": 1.904, "step": 26362 }, { "epoch": 0.8505719776330833, "grad_norm": 0.373046875, "learning_rate": 1.735923702989719e-06, "loss": 1.9041, "step": 26363 }, { "epoch": 0.8506042414868797, "grad_norm": 0.37109375, "learning_rate": 1.7351888852543063e-06, "loss": 1.8689, "step": 26364 }, { "epoch": 0.850636505340676, "grad_norm": 0.375, "learning_rate": 1.7344542135284214e-06, "loss": 1.8827, "step": 26365 }, { "epoch": 0.8506687691944724, "grad_norm": 0.353515625, "learning_rate": 1.7337196878201494e-06, "loss": 1.9227, "step": 26366 }, { "epoch": 0.8507010330482687, "grad_norm": 0.3671875, "learning_rate": 1.732985308137569e-06, "loss": 1.9553, "step": 26367 }, { "epoch": 0.8507332969020651, "grad_norm": 0.40625, "learning_rate": 1.7322510744887737e-06, "loss": 1.8454, "step": 26368 }, { "epoch": 0.8507655607558614, "grad_norm": 0.361328125, "learning_rate": 1.7315169868818387e-06, "loss": 1.9773, "step": 26369 }, { "epoch": 0.8507978246096578, "grad_norm": 0.3671875, "learning_rate": 1.7307830453248425e-06, "loss": 1.9233, "step": 26370 }, { "epoch": 0.8508300884634541, "grad_norm": 0.375, "learning_rate": 1.730049249825872e-06, "loss": 1.8835, "step": 26371 }, { "epoch": 0.8508623523172505, "grad_norm": 0.37109375, "learning_rate": 1.7293156003929989e-06, "loss": 1.9283, "step": 26372 }, { "epoch": 0.8508946161710468, "grad_norm": 0.392578125, "learning_rate": 1.7285820970342953e-06, "loss": 1.8888, "step": 26373 }, { "epoch": 0.8509268800248432, "grad_norm": 0.39453125, "learning_rate": 1.727848739757843e-06, "loss": 1.8666, "step": 26374 }, { "epoch": 0.8509591438786395, "grad_norm": 0.380859375, "learning_rate": 1.7271155285717106e-06, "loss": 1.843, "step": 26375 }, { "epoch": 0.8509914077324359, "grad_norm": 0.375, "learning_rate": 1.7263824634839647e-06, "loss": 1.8788, "step": 26376 }, { "epoch": 0.8510236715862322, "grad_norm": 0.3671875, "learning_rate": 1.7256495445026827e-06, "loss": 1.9139, "step": 26377 }, { "epoch": 0.8510559354400286, "grad_norm": 0.376953125, "learning_rate": 1.724916771635926e-06, "loss": 1.9019, "step": 26378 }, { "epoch": 0.851088199293825, "grad_norm": 0.375, "learning_rate": 1.7241841448917584e-06, "loss": 1.8542, "step": 26379 }, { "epoch": 0.8511204631476212, "grad_norm": 0.419921875, "learning_rate": 1.7234516642782534e-06, "loss": 1.854, "step": 26380 }, { "epoch": 0.8511527270014176, "grad_norm": 0.369140625, "learning_rate": 1.722719329803466e-06, "loss": 1.8332, "step": 26381 }, { "epoch": 0.8511849908552139, "grad_norm": 0.37109375, "learning_rate": 1.721987141475455e-06, "loss": 1.8744, "step": 26382 }, { "epoch": 0.8512172547090103, "grad_norm": 0.39453125, "learning_rate": 1.7212550993022891e-06, "loss": 1.841, "step": 26383 }, { "epoch": 0.8512495185628066, "grad_norm": 0.388671875, "learning_rate": 1.7205232032920198e-06, "loss": 1.8549, "step": 26384 }, { "epoch": 0.851281782416603, "grad_norm": 0.3828125, "learning_rate": 1.719791453452706e-06, "loss": 1.9138, "step": 26385 }, { "epoch": 0.8513140462703993, "grad_norm": 0.373046875, "learning_rate": 1.7190598497923959e-06, "loss": 1.8908, "step": 26386 }, { "epoch": 0.8513463101241957, "grad_norm": 0.349609375, "learning_rate": 1.7183283923191516e-06, "loss": 1.9929, "step": 26387 }, { "epoch": 0.851378573977992, "grad_norm": 0.36328125, "learning_rate": 1.7175970810410186e-06, "loss": 1.951, "step": 26388 }, { "epoch": 0.8514108378317884, "grad_norm": 0.408203125, "learning_rate": 1.7168659159660465e-06, "loss": 1.9626, "step": 26389 }, { "epoch": 0.8514431016855847, "grad_norm": 0.375, "learning_rate": 1.7161348971022877e-06, "loss": 1.9854, "step": 26390 }, { "epoch": 0.8514753655393811, "grad_norm": 0.3671875, "learning_rate": 1.7154040244577856e-06, "loss": 1.9762, "step": 26391 }, { "epoch": 0.8515076293931774, "grad_norm": 0.353515625, "learning_rate": 1.7146732980405839e-06, "loss": 1.97, "step": 26392 }, { "epoch": 0.8515398932469738, "grad_norm": 0.380859375, "learning_rate": 1.713942717858729e-06, "loss": 1.9686, "step": 26393 }, { "epoch": 0.85157215710077, "grad_norm": 0.369140625, "learning_rate": 1.7132122839202619e-06, "loss": 2.0414, "step": 26394 }, { "epoch": 0.8516044209545665, "grad_norm": 0.357421875, "learning_rate": 1.7124819962332206e-06, "loss": 1.9654, "step": 26395 }, { "epoch": 0.8516366848083627, "grad_norm": 0.3515625, "learning_rate": 1.7117518548056453e-06, "loss": 2.0174, "step": 26396 }, { "epoch": 0.8516689486621591, "grad_norm": 0.361328125, "learning_rate": 1.711021859645575e-06, "loss": 1.9852, "step": 26397 }, { "epoch": 0.8517012125159554, "grad_norm": 0.392578125, "learning_rate": 1.7102920107610381e-06, "loss": 2.0022, "step": 26398 }, { "epoch": 0.8517334763697518, "grad_norm": 0.353515625, "learning_rate": 1.7095623081600765e-06, "loss": 2.0108, "step": 26399 }, { "epoch": 0.8517657402235482, "grad_norm": 0.365234375, "learning_rate": 1.7088327518507186e-06, "loss": 1.978, "step": 26400 }, { "epoch": 0.8517980040773445, "grad_norm": 0.375, "learning_rate": 1.7081033418409914e-06, "loss": 1.9634, "step": 26401 }, { "epoch": 0.8518302679311409, "grad_norm": 0.3828125, "learning_rate": 1.7073740781389303e-06, "loss": 2.0241, "step": 26402 }, { "epoch": 0.8518625317849372, "grad_norm": 0.3671875, "learning_rate": 1.7066449607525603e-06, "loss": 1.9971, "step": 26403 }, { "epoch": 0.8518947956387336, "grad_norm": 0.37109375, "learning_rate": 1.705915989689902e-06, "loss": 1.9965, "step": 26404 }, { "epoch": 0.8519270594925299, "grad_norm": 0.34765625, "learning_rate": 1.7051871649589885e-06, "loss": 2.0023, "step": 26405 }, { "epoch": 0.8519593233463263, "grad_norm": 0.34765625, "learning_rate": 1.704458486567832e-06, "loss": 2.0132, "step": 26406 }, { "epoch": 0.8519915872001226, "grad_norm": 0.37109375, "learning_rate": 1.7037299545244577e-06, "loss": 1.9445, "step": 26407 }, { "epoch": 0.852023851053919, "grad_norm": 0.349609375, "learning_rate": 1.7030015688368878e-06, "loss": 2.0216, "step": 26408 }, { "epoch": 0.8520561149077153, "grad_norm": 0.3515625, "learning_rate": 1.7022733295131388e-06, "loss": 2.0443, "step": 26409 }, { "epoch": 0.8520883787615117, "grad_norm": 0.3671875, "learning_rate": 1.701545236561221e-06, "loss": 2.0046, "step": 26410 }, { "epoch": 0.852120642615308, "grad_norm": 0.3515625, "learning_rate": 1.7008172899891566e-06, "loss": 2.0047, "step": 26411 }, { "epoch": 0.8521529064691044, "grad_norm": 0.35546875, "learning_rate": 1.7000894898049557e-06, "loss": 2.011, "step": 26412 }, { "epoch": 0.8521851703229006, "grad_norm": 0.369140625, "learning_rate": 1.6993618360166235e-06, "loss": 2.0746, "step": 26413 }, { "epoch": 0.852217434176697, "grad_norm": 0.359375, "learning_rate": 1.6986343286321787e-06, "loss": 1.9573, "step": 26414 }, { "epoch": 0.8522496980304933, "grad_norm": 0.3515625, "learning_rate": 1.6979069676596248e-06, "loss": 2.0087, "step": 26415 }, { "epoch": 0.8522819618842897, "grad_norm": 0.359375, "learning_rate": 1.697179753106967e-06, "loss": 2.0217, "step": 26416 }, { "epoch": 0.852314225738086, "grad_norm": 0.365234375, "learning_rate": 1.6964526849822093e-06, "loss": 2.0525, "step": 26417 }, { "epoch": 0.8523464895918824, "grad_norm": 0.453125, "learning_rate": 1.6957257632933598e-06, "loss": 2.062, "step": 26418 }, { "epoch": 0.8523787534456788, "grad_norm": 0.474609375, "learning_rate": 1.6949989880484158e-06, "loss": 2.0233, "step": 26419 }, { "epoch": 0.8524110172994751, "grad_norm": 0.48828125, "learning_rate": 1.6942723592553739e-06, "loss": 2.063, "step": 26420 }, { "epoch": 0.8524432811532715, "grad_norm": 0.455078125, "learning_rate": 1.6935458769222412e-06, "loss": 2.0567, "step": 26421 }, { "epoch": 0.8524755450070678, "grad_norm": 0.453125, "learning_rate": 1.6928195410570097e-06, "loss": 2.0426, "step": 26422 }, { "epoch": 0.8525078088608642, "grad_norm": 0.4375, "learning_rate": 1.6920933516676695e-06, "loss": 2.0766, "step": 26423 }, { "epoch": 0.8525400727146605, "grad_norm": 0.54296875, "learning_rate": 1.6913673087622227e-06, "loss": 2.1318, "step": 26424 }, { "epoch": 0.8525723365684569, "grad_norm": 0.61328125, "learning_rate": 1.6906414123486564e-06, "loss": 2.165, "step": 26425 }, { "epoch": 0.8526046004222532, "grad_norm": 0.578125, "learning_rate": 1.6899156624349588e-06, "loss": 2.162, "step": 26426 }, { "epoch": 0.8526368642760496, "grad_norm": 0.56640625, "learning_rate": 1.6891900590291236e-06, "loss": 2.0909, "step": 26427 }, { "epoch": 0.8526691281298459, "grad_norm": 0.5546875, "learning_rate": 1.6884646021391348e-06, "loss": 2.1753, "step": 26428 }, { "epoch": 0.8527013919836423, "grad_norm": 0.53515625, "learning_rate": 1.6877392917729757e-06, "loss": 2.141, "step": 26429 }, { "epoch": 0.8527336558374385, "grad_norm": 0.546875, "learning_rate": 1.6870141279386347e-06, "loss": 2.1925, "step": 26430 }, { "epoch": 0.8527659196912349, "grad_norm": 0.5078125, "learning_rate": 1.6862891106440925e-06, "loss": 2.1569, "step": 26431 }, { "epoch": 0.8527981835450312, "grad_norm": 0.49609375, "learning_rate": 1.685564239897326e-06, "loss": 2.1839, "step": 26432 }, { "epoch": 0.8528304473988276, "grad_norm": 0.474609375, "learning_rate": 1.6848395157063184e-06, "loss": 2.1666, "step": 26433 }, { "epoch": 0.8528627112526239, "grad_norm": 0.48828125, "learning_rate": 1.6841149380790454e-06, "loss": 2.1861, "step": 26434 }, { "epoch": 0.8528949751064203, "grad_norm": 0.466796875, "learning_rate": 1.683390507023479e-06, "loss": 2.0987, "step": 26435 }, { "epoch": 0.8529272389602166, "grad_norm": 0.458984375, "learning_rate": 1.6826662225476024e-06, "loss": 2.1515, "step": 26436 }, { "epoch": 0.852959502814013, "grad_norm": 0.443359375, "learning_rate": 1.6819420846593763e-06, "loss": 2.1389, "step": 26437 }, { "epoch": 0.8529917666678093, "grad_norm": 0.44921875, "learning_rate": 1.6812180933667775e-06, "loss": 2.1142, "step": 26438 }, { "epoch": 0.8530240305216057, "grad_norm": 0.427734375, "learning_rate": 1.680494248677783e-06, "loss": 2.1327, "step": 26439 }, { "epoch": 0.8530562943754021, "grad_norm": 0.4375, "learning_rate": 1.6797705506003447e-06, "loss": 2.1133, "step": 26440 }, { "epoch": 0.8530885582291984, "grad_norm": 0.431640625, "learning_rate": 1.6790469991424363e-06, "loss": 2.1435, "step": 26441 }, { "epoch": 0.8531208220829948, "grad_norm": 0.41015625, "learning_rate": 1.6783235943120246e-06, "loss": 2.1122, "step": 26442 }, { "epoch": 0.8531530859367911, "grad_norm": 0.4140625, "learning_rate": 1.67760033611707e-06, "loss": 2.0825, "step": 26443 }, { "epoch": 0.8531853497905875, "grad_norm": 0.4296875, "learning_rate": 1.6768772245655312e-06, "loss": 2.1062, "step": 26444 }, { "epoch": 0.8532176136443838, "grad_norm": 0.40234375, "learning_rate": 1.6761542596653733e-06, "loss": 2.1139, "step": 26445 }, { "epoch": 0.8532498774981802, "grad_norm": 0.416015625, "learning_rate": 1.6754314414245504e-06, "loss": 2.1421, "step": 26446 }, { "epoch": 0.8532821413519764, "grad_norm": 0.427734375, "learning_rate": 1.6747087698510188e-06, "loss": 2.1405, "step": 26447 }, { "epoch": 0.8533144052057728, "grad_norm": 0.447265625, "learning_rate": 1.673986244952731e-06, "loss": 2.0772, "step": 26448 }, { "epoch": 0.8533466690595691, "grad_norm": 0.458984375, "learning_rate": 1.6732638667376453e-06, "loss": 2.0491, "step": 26449 }, { "epoch": 0.8533789329133655, "grad_norm": 0.470703125, "learning_rate": 1.6725416352137107e-06, "loss": 2.0238, "step": 26450 }, { "epoch": 0.8534111967671618, "grad_norm": 0.443359375, "learning_rate": 1.6718195503888722e-06, "loss": 1.99, "step": 26451 }, { "epoch": 0.8534434606209582, "grad_norm": 0.44140625, "learning_rate": 1.671097612271087e-06, "loss": 1.9572, "step": 26452 }, { "epoch": 0.8534757244747545, "grad_norm": 0.421875, "learning_rate": 1.6703758208682967e-06, "loss": 2.0169, "step": 26453 }, { "epoch": 0.8535079883285509, "grad_norm": 0.43359375, "learning_rate": 1.6696541761884437e-06, "loss": 2.013, "step": 26454 }, { "epoch": 0.8535402521823472, "grad_norm": 0.416015625, "learning_rate": 1.6689326782394782e-06, "loss": 1.9621, "step": 26455 }, { "epoch": 0.8535725160361436, "grad_norm": 0.41796875, "learning_rate": 1.668211327029337e-06, "loss": 1.9934, "step": 26456 }, { "epoch": 0.8536047798899399, "grad_norm": 0.41796875, "learning_rate": 1.667490122565959e-06, "loss": 1.9688, "step": 26457 }, { "epoch": 0.8536370437437363, "grad_norm": 0.404296875, "learning_rate": 1.6667690648572874e-06, "loss": 2.0035, "step": 26458 }, { "epoch": 0.8536693075975326, "grad_norm": 0.40625, "learning_rate": 1.6660481539112583e-06, "loss": 1.975, "step": 26459 }, { "epoch": 0.853701571451329, "grad_norm": 0.41015625, "learning_rate": 1.6653273897358013e-06, "loss": 1.9945, "step": 26460 }, { "epoch": 0.8537338353051254, "grad_norm": 0.40234375, "learning_rate": 1.664606772338857e-06, "loss": 2.0039, "step": 26461 }, { "epoch": 0.8537660991589217, "grad_norm": 0.404296875, "learning_rate": 1.663886301728354e-06, "loss": 2.0036, "step": 26462 }, { "epoch": 0.853798363012718, "grad_norm": 0.41796875, "learning_rate": 1.663165977912221e-06, "loss": 1.9917, "step": 26463 }, { "epoch": 0.8538306268665143, "grad_norm": 0.384765625, "learning_rate": 1.6624458008983918e-06, "loss": 2.0002, "step": 26464 }, { "epoch": 0.8538628907203107, "grad_norm": 0.392578125, "learning_rate": 1.6617257706947913e-06, "loss": 1.9736, "step": 26465 }, { "epoch": 0.853895154574107, "grad_norm": 0.400390625, "learning_rate": 1.661005887309342e-06, "loss": 1.9755, "step": 26466 }, { "epoch": 0.8539274184279034, "grad_norm": 0.408203125, "learning_rate": 1.6602861507499756e-06, "loss": 1.938, "step": 26467 }, { "epoch": 0.8539596822816997, "grad_norm": 0.404296875, "learning_rate": 1.659566561024604e-06, "loss": 1.9257, "step": 26468 }, { "epoch": 0.8539919461354961, "grad_norm": 0.390625, "learning_rate": 1.6588471181411529e-06, "loss": 1.9296, "step": 26469 }, { "epoch": 0.8540242099892924, "grad_norm": 0.392578125, "learning_rate": 1.658127822107549e-06, "loss": 1.9478, "step": 26470 }, { "epoch": 0.8540564738430888, "grad_norm": 0.392578125, "learning_rate": 1.6574086729316961e-06, "loss": 1.8857, "step": 26471 }, { "epoch": 0.8540887376968851, "grad_norm": 0.416015625, "learning_rate": 1.6566896706215163e-06, "loss": 1.9125, "step": 26472 }, { "epoch": 0.8541210015506815, "grad_norm": 0.39453125, "learning_rate": 1.6559708151849262e-06, "loss": 1.9529, "step": 26473 }, { "epoch": 0.8541532654044778, "grad_norm": 0.3984375, "learning_rate": 1.6552521066298382e-06, "loss": 1.9172, "step": 26474 }, { "epoch": 0.8541855292582742, "grad_norm": 0.404296875, "learning_rate": 1.6545335449641608e-06, "loss": 1.952, "step": 26475 }, { "epoch": 0.8542177931120705, "grad_norm": 0.380859375, "learning_rate": 1.653815130195801e-06, "loss": 1.9208, "step": 26476 }, { "epoch": 0.8542500569658669, "grad_norm": 0.3828125, "learning_rate": 1.6530968623326726e-06, "loss": 1.8982, "step": 26477 }, { "epoch": 0.8542823208196632, "grad_norm": 0.388671875, "learning_rate": 1.652378741382679e-06, "loss": 1.9511, "step": 26478 }, { "epoch": 0.8543145846734596, "grad_norm": 0.5078125, "learning_rate": 1.651660767353721e-06, "loss": 1.9245, "step": 26479 }, { "epoch": 0.854346848527256, "grad_norm": 0.498046875, "learning_rate": 1.6509429402537085e-06, "loss": 1.892, "step": 26480 }, { "epoch": 0.8543791123810522, "grad_norm": 0.55859375, "learning_rate": 1.6502252600905404e-06, "loss": 1.9273, "step": 26481 }, { "epoch": 0.8544113762348486, "grad_norm": 0.51171875, "learning_rate": 1.649507726872112e-06, "loss": 1.908, "step": 26482 }, { "epoch": 0.8544436400886449, "grad_norm": 0.5703125, "learning_rate": 1.6487903406063287e-06, "loss": 1.9219, "step": 26483 }, { "epoch": 0.8544759039424413, "grad_norm": 0.515625, "learning_rate": 1.6480731013010824e-06, "loss": 1.9782, "step": 26484 }, { "epoch": 0.8545081677962376, "grad_norm": 0.466796875, "learning_rate": 1.6473560089642652e-06, "loss": 1.9229, "step": 26485 }, { "epoch": 0.854540431650034, "grad_norm": 0.490234375, "learning_rate": 1.6466390636037775e-06, "loss": 1.9134, "step": 26486 }, { "epoch": 0.8545726955038303, "grad_norm": 0.494140625, "learning_rate": 1.645922265227508e-06, "loss": 1.929, "step": 26487 }, { "epoch": 0.8546049593576267, "grad_norm": 0.494140625, "learning_rate": 1.6452056138433419e-06, "loss": 1.9535, "step": 26488 }, { "epoch": 0.854637223211423, "grad_norm": 0.5234375, "learning_rate": 1.6444891094591762e-06, "loss": 1.9036, "step": 26489 }, { "epoch": 0.8546694870652194, "grad_norm": 0.51171875, "learning_rate": 1.6437727520828916e-06, "loss": 1.893, "step": 26490 }, { "epoch": 0.8547017509190157, "grad_norm": 0.486328125, "learning_rate": 1.643056541722373e-06, "loss": 1.9091, "step": 26491 }, { "epoch": 0.8547340147728121, "grad_norm": 0.490234375, "learning_rate": 1.6423404783855078e-06, "loss": 1.924, "step": 26492 }, { "epoch": 0.8547662786266084, "grad_norm": 0.470703125, "learning_rate": 1.6416245620801762e-06, "loss": 1.8824, "step": 26493 }, { "epoch": 0.8547985424804048, "grad_norm": 0.4921875, "learning_rate": 1.6409087928142553e-06, "loss": 1.9045, "step": 26494 }, { "epoch": 0.854830806334201, "grad_norm": 0.51171875, "learning_rate": 1.6401931705956302e-06, "loss": 1.8498, "step": 26495 }, { "epoch": 0.8548630701879975, "grad_norm": 0.478515625, "learning_rate": 1.6394776954321734e-06, "loss": 1.9134, "step": 26496 }, { "epoch": 0.8548953340417937, "grad_norm": 0.470703125, "learning_rate": 1.638762367331758e-06, "loss": 1.8983, "step": 26497 }, { "epoch": 0.8549275978955901, "grad_norm": 0.515625, "learning_rate": 1.6380471863022683e-06, "loss": 1.8804, "step": 26498 }, { "epoch": 0.8549598617493864, "grad_norm": 0.470703125, "learning_rate": 1.6373321523515626e-06, "loss": 1.9046, "step": 26499 }, { "epoch": 0.8549921256031828, "grad_norm": 0.515625, "learning_rate": 1.63661726548752e-06, "loss": 1.9419, "step": 26500 }, { "epoch": 0.8550243894569792, "grad_norm": 0.4375, "learning_rate": 1.635902525718012e-06, "loss": 1.8153, "step": 26501 }, { "epoch": 0.8550566533107755, "grad_norm": 0.44140625, "learning_rate": 1.6351879330508946e-06, "loss": 1.852, "step": 26502 }, { "epoch": 0.8550889171645719, "grad_norm": 0.4609375, "learning_rate": 1.6344734874940426e-06, "loss": 1.836, "step": 26503 }, { "epoch": 0.8551211810183682, "grad_norm": 0.4453125, "learning_rate": 1.6337591890553233e-06, "loss": 1.8563, "step": 26504 }, { "epoch": 0.8551534448721646, "grad_norm": 0.447265625, "learning_rate": 1.6330450377425888e-06, "loss": 1.813, "step": 26505 }, { "epoch": 0.8551857087259609, "grad_norm": 0.439453125, "learning_rate": 1.632331033563706e-06, "loss": 1.8389, "step": 26506 }, { "epoch": 0.8552179725797573, "grad_norm": 0.45703125, "learning_rate": 1.631617176526532e-06, "loss": 1.8086, "step": 26507 }, { "epoch": 0.8552502364335536, "grad_norm": 0.439453125, "learning_rate": 1.6309034666389288e-06, "loss": 1.8297, "step": 26508 }, { "epoch": 0.85528250028735, "grad_norm": 0.42578125, "learning_rate": 1.6301899039087486e-06, "loss": 1.8438, "step": 26509 }, { "epoch": 0.8553147641411463, "grad_norm": 0.431640625, "learning_rate": 1.6294764883438434e-06, "loss": 1.8, "step": 26510 }, { "epoch": 0.8553470279949427, "grad_norm": 0.4140625, "learning_rate": 1.6287632199520719e-06, "loss": 1.8128, "step": 26511 }, { "epoch": 0.855379291848739, "grad_norm": 0.443359375, "learning_rate": 1.6280500987412827e-06, "loss": 1.8259, "step": 26512 }, { "epoch": 0.8554115557025354, "grad_norm": 0.455078125, "learning_rate": 1.6273371247193214e-06, "loss": 1.8288, "step": 26513 }, { "epoch": 0.8554438195563316, "grad_norm": 0.41015625, "learning_rate": 1.6266242978940432e-06, "loss": 1.7985, "step": 26514 }, { "epoch": 0.855476083410128, "grad_norm": 0.416015625, "learning_rate": 1.625911618273292e-06, "loss": 1.7554, "step": 26515 }, { "epoch": 0.8555083472639243, "grad_norm": 0.4140625, "learning_rate": 1.6251990858649063e-06, "loss": 1.8248, "step": 26516 }, { "epoch": 0.8555406111177207, "grad_norm": 0.416015625, "learning_rate": 1.6244867006767367e-06, "loss": 1.7697, "step": 26517 }, { "epoch": 0.855572874971517, "grad_norm": 0.421875, "learning_rate": 1.6237744627166235e-06, "loss": 1.8328, "step": 26518 }, { "epoch": 0.8556051388253134, "grad_norm": 0.404296875, "learning_rate": 1.623062371992402e-06, "loss": 1.8079, "step": 26519 }, { "epoch": 0.8556374026791097, "grad_norm": 0.40625, "learning_rate": 1.6223504285119144e-06, "loss": 1.7872, "step": 26520 }, { "epoch": 0.8556696665329061, "grad_norm": 0.41015625, "learning_rate": 1.621638632282998e-06, "loss": 1.8142, "step": 26521 }, { "epoch": 0.8557019303867025, "grad_norm": 0.39453125, "learning_rate": 1.6209269833134826e-06, "loss": 1.8005, "step": 26522 }, { "epoch": 0.8557341942404988, "grad_norm": 0.41015625, "learning_rate": 1.6202154816112074e-06, "loss": 1.7874, "step": 26523 }, { "epoch": 0.8557664580942952, "grad_norm": 0.400390625, "learning_rate": 1.6195041271840011e-06, "loss": 1.8075, "step": 26524 }, { "epoch": 0.8557987219480915, "grad_norm": 0.39453125, "learning_rate": 1.6187929200396922e-06, "loss": 1.8107, "step": 26525 }, { "epoch": 0.8558309858018879, "grad_norm": 0.3828125, "learning_rate": 1.6180818601861148e-06, "loss": 1.8004, "step": 26526 }, { "epoch": 0.8558632496556842, "grad_norm": 0.390625, "learning_rate": 1.6173709476310905e-06, "loss": 1.8289, "step": 26527 }, { "epoch": 0.8558955135094806, "grad_norm": 0.400390625, "learning_rate": 1.6166601823824433e-06, "loss": 1.8589, "step": 26528 }, { "epoch": 0.8559277773632769, "grad_norm": 0.3828125, "learning_rate": 1.615949564448007e-06, "loss": 1.8056, "step": 26529 }, { "epoch": 0.8559600412170733, "grad_norm": 0.3984375, "learning_rate": 1.6152390938355887e-06, "loss": 1.8217, "step": 26530 }, { "epoch": 0.8559923050708695, "grad_norm": 0.380859375, "learning_rate": 1.6145287705530171e-06, "loss": 1.7825, "step": 26531 }, { "epoch": 0.8560245689246659, "grad_norm": 0.3984375, "learning_rate": 1.6138185946081157e-06, "loss": 1.8105, "step": 26532 }, { "epoch": 0.8560568327784622, "grad_norm": 0.400390625, "learning_rate": 1.6131085660086904e-06, "loss": 1.8708, "step": 26533 }, { "epoch": 0.8560890966322586, "grad_norm": 0.412109375, "learning_rate": 1.6123986847625627e-06, "loss": 1.8701, "step": 26534 }, { "epoch": 0.8561213604860549, "grad_norm": 0.439453125, "learning_rate": 1.6116889508775517e-06, "loss": 1.8034, "step": 26535 }, { "epoch": 0.8561536243398513, "grad_norm": 0.423828125, "learning_rate": 1.6109793643614578e-06, "loss": 1.8014, "step": 26536 }, { "epoch": 0.8561858881936476, "grad_norm": 0.419921875, "learning_rate": 1.6102699252221015e-06, "loss": 1.8427, "step": 26537 }, { "epoch": 0.856218152047444, "grad_norm": 0.44140625, "learning_rate": 1.6095606334672863e-06, "loss": 1.8366, "step": 26538 }, { "epoch": 0.8562504159012403, "grad_norm": 0.427734375, "learning_rate": 1.6088514891048195e-06, "loss": 1.8583, "step": 26539 }, { "epoch": 0.8562826797550367, "grad_norm": 0.40234375, "learning_rate": 1.6081424921425132e-06, "loss": 1.8656, "step": 26540 }, { "epoch": 0.8563149436088331, "grad_norm": 0.392578125, "learning_rate": 1.6074336425881624e-06, "loss": 1.8368, "step": 26541 }, { "epoch": 0.8563472074626294, "grad_norm": 0.41015625, "learning_rate": 1.6067249404495766e-06, "loss": 1.8659, "step": 26542 }, { "epoch": 0.8563794713164258, "grad_norm": 0.400390625, "learning_rate": 1.6060163857345556e-06, "loss": 1.8379, "step": 26543 }, { "epoch": 0.8564117351702221, "grad_norm": 0.388671875, "learning_rate": 1.605307978450895e-06, "loss": 1.837, "step": 26544 }, { "epoch": 0.8564439990240185, "grad_norm": 0.40234375, "learning_rate": 1.6045997186063971e-06, "loss": 1.8547, "step": 26545 }, { "epoch": 0.8564762628778148, "grad_norm": 0.390625, "learning_rate": 1.6038916062088555e-06, "loss": 1.8861, "step": 26546 }, { "epoch": 0.8565085267316112, "grad_norm": 0.400390625, "learning_rate": 1.6031836412660605e-06, "loss": 1.8595, "step": 26547 }, { "epoch": 0.8565407905854074, "grad_norm": 0.384765625, "learning_rate": 1.6024758237858145e-06, "loss": 1.8663, "step": 26548 }, { "epoch": 0.8565730544392038, "grad_norm": 0.380859375, "learning_rate": 1.6017681537759028e-06, "loss": 1.8346, "step": 26549 }, { "epoch": 0.8566053182930001, "grad_norm": 0.380859375, "learning_rate": 1.6010606312441107e-06, "loss": 1.8653, "step": 26550 }, { "epoch": 0.8566375821467965, "grad_norm": 0.408203125, "learning_rate": 1.6003532561982354e-06, "loss": 1.9114, "step": 26551 }, { "epoch": 0.8566698460005928, "grad_norm": 0.408203125, "learning_rate": 1.599646028646059e-06, "loss": 1.8732, "step": 26552 }, { "epoch": 0.8567021098543892, "grad_norm": 0.40234375, "learning_rate": 1.5989389485953603e-06, "loss": 1.9104, "step": 26553 }, { "epoch": 0.8567343737081855, "grad_norm": 0.40625, "learning_rate": 1.5982320160539332e-06, "loss": 1.8956, "step": 26554 }, { "epoch": 0.8567666375619819, "grad_norm": 0.408203125, "learning_rate": 1.5975252310295529e-06, "loss": 1.9161, "step": 26555 }, { "epoch": 0.8567989014157782, "grad_norm": 0.40234375, "learning_rate": 1.5968185935299967e-06, "loss": 1.8936, "step": 26556 }, { "epoch": 0.8568311652695746, "grad_norm": 0.388671875, "learning_rate": 1.5961121035630533e-06, "loss": 1.885, "step": 26557 }, { "epoch": 0.8568634291233709, "grad_norm": 0.38671875, "learning_rate": 1.5954057611364848e-06, "loss": 1.8853, "step": 26558 }, { "epoch": 0.8568956929771673, "grad_norm": 0.46875, "learning_rate": 1.5946995662580732e-06, "loss": 1.9508, "step": 26559 }, { "epoch": 0.8569279568309636, "grad_norm": 0.439453125, "learning_rate": 1.5939935189355975e-06, "loss": 1.9571, "step": 26560 }, { "epoch": 0.85696022068476, "grad_norm": 0.46875, "learning_rate": 1.593287619176818e-06, "loss": 1.9506, "step": 26561 }, { "epoch": 0.8569924845385564, "grad_norm": 0.447265625, "learning_rate": 1.5925818669895103e-06, "loss": 1.9256, "step": 26562 }, { "epoch": 0.8570247483923527, "grad_norm": 0.44921875, "learning_rate": 1.5918762623814499e-06, "loss": 1.989, "step": 26563 }, { "epoch": 0.857057012246149, "grad_norm": 0.478515625, "learning_rate": 1.5911708053603903e-06, "loss": 2.0229, "step": 26564 }, { "epoch": 0.8570892760999453, "grad_norm": 0.4296875, "learning_rate": 1.5904654959341036e-06, "loss": 2.0221, "step": 26565 }, { "epoch": 0.8571215399537417, "grad_norm": 0.48046875, "learning_rate": 1.5897603341103573e-06, "loss": 2.0308, "step": 26566 }, { "epoch": 0.857153803807538, "grad_norm": 0.4375, "learning_rate": 1.5890553198969033e-06, "loss": 2.011, "step": 26567 }, { "epoch": 0.8571860676613344, "grad_norm": 0.412109375, "learning_rate": 1.5883504533015103e-06, "loss": 1.9716, "step": 26568 }, { "epoch": 0.8572183315151307, "grad_norm": 0.412109375, "learning_rate": 1.5876457343319356e-06, "loss": 2.012, "step": 26569 }, { "epoch": 0.8572505953689271, "grad_norm": 0.427734375, "learning_rate": 1.5869411629959297e-06, "loss": 2.0109, "step": 26570 }, { "epoch": 0.8572828592227234, "grad_norm": 0.42578125, "learning_rate": 1.586236739301256e-06, "loss": 1.9902, "step": 26571 }, { "epoch": 0.8573151230765198, "grad_norm": 0.404296875, "learning_rate": 1.585532463255664e-06, "loss": 2.0161, "step": 26572 }, { "epoch": 0.8573473869303161, "grad_norm": 0.439453125, "learning_rate": 1.5848283348669085e-06, "loss": 2.0343, "step": 26573 }, { "epoch": 0.8573796507841125, "grad_norm": 0.396484375, "learning_rate": 1.5841243541427403e-06, "loss": 1.9845, "step": 26574 }, { "epoch": 0.8574119146379088, "grad_norm": 0.44140625, "learning_rate": 1.5834205210909014e-06, "loss": 2.0505, "step": 26575 }, { "epoch": 0.8574441784917052, "grad_norm": 0.490234375, "learning_rate": 1.582716835719149e-06, "loss": 2.0491, "step": 26576 }, { "epoch": 0.8574764423455015, "grad_norm": 0.44921875, "learning_rate": 1.5820132980352237e-06, "loss": 2.0288, "step": 26577 }, { "epoch": 0.8575087061992979, "grad_norm": 0.458984375, "learning_rate": 1.5813099080468673e-06, "loss": 2.009, "step": 26578 }, { "epoch": 0.8575409700530942, "grad_norm": 0.462890625, "learning_rate": 1.5806066657618272e-06, "loss": 2.0207, "step": 26579 }, { "epoch": 0.8575732339068906, "grad_norm": 0.455078125, "learning_rate": 1.5799035711878423e-06, "loss": 2.0206, "step": 26580 }, { "epoch": 0.857605497760687, "grad_norm": 0.447265625, "learning_rate": 1.5792006243326495e-06, "loss": 2.0867, "step": 26581 }, { "epoch": 0.8576377616144832, "grad_norm": 0.4453125, "learning_rate": 1.5784978252039895e-06, "loss": 1.9989, "step": 26582 }, { "epoch": 0.8576700254682796, "grad_norm": 0.462890625, "learning_rate": 1.5777951738095974e-06, "loss": 1.9319, "step": 26583 }, { "epoch": 0.8577022893220759, "grad_norm": 0.64453125, "learning_rate": 1.5770926701572042e-06, "loss": 1.8731, "step": 26584 }, { "epoch": 0.8577345531758723, "grad_norm": 0.67578125, "learning_rate": 1.5763903142545483e-06, "loss": 1.8228, "step": 26585 }, { "epoch": 0.8577668170296686, "grad_norm": 0.609375, "learning_rate": 1.5756881061093586e-06, "loss": 1.8721, "step": 26586 }, { "epoch": 0.857799080883465, "grad_norm": 0.5703125, "learning_rate": 1.5749860457293592e-06, "loss": 1.895, "step": 26587 }, { "epoch": 0.8578313447372613, "grad_norm": 0.5859375, "learning_rate": 1.5742841331222885e-06, "loss": 1.8514, "step": 26588 }, { "epoch": 0.8578636085910577, "grad_norm": 0.546875, "learning_rate": 1.5735823682958623e-06, "loss": 1.8613, "step": 26589 }, { "epoch": 0.857895872444854, "grad_norm": 0.5390625, "learning_rate": 1.5728807512578076e-06, "loss": 1.8633, "step": 26590 }, { "epoch": 0.8579281362986504, "grad_norm": 0.515625, "learning_rate": 1.5721792820158547e-06, "loss": 1.8671, "step": 26591 }, { "epoch": 0.8579604001524467, "grad_norm": 0.49609375, "learning_rate": 1.5714779605777146e-06, "loss": 1.8816, "step": 26592 }, { "epoch": 0.8579926640062431, "grad_norm": 0.478515625, "learning_rate": 1.5707767869511108e-06, "loss": 1.8868, "step": 26593 }, { "epoch": 0.8580249278600394, "grad_norm": 0.482421875, "learning_rate": 1.5700757611437672e-06, "loss": 1.835, "step": 26594 }, { "epoch": 0.8580571917138358, "grad_norm": 0.490234375, "learning_rate": 1.5693748831633908e-06, "loss": 1.8829, "step": 26595 }, { "epoch": 0.858089455567632, "grad_norm": 0.478515625, "learning_rate": 1.568674153017699e-06, "loss": 1.8646, "step": 26596 }, { "epoch": 0.8581217194214285, "grad_norm": 0.484375, "learning_rate": 1.5679735707144138e-06, "loss": 1.8889, "step": 26597 }, { "epoch": 0.8581539832752247, "grad_norm": 0.47265625, "learning_rate": 1.5672731362612326e-06, "loss": 1.8983, "step": 26598 }, { "epoch": 0.8581862471290211, "grad_norm": 0.48828125, "learning_rate": 1.5665728496658755e-06, "loss": 1.8956, "step": 26599 }, { "epoch": 0.8582185109828174, "grad_norm": 0.486328125, "learning_rate": 1.5658727109360466e-06, "loss": 1.9008, "step": 26600 }, { "epoch": 0.8582507748366138, "grad_norm": 0.458984375, "learning_rate": 1.5651727200794513e-06, "loss": 1.9128, "step": 26601 }, { "epoch": 0.8582830386904102, "grad_norm": 0.74609375, "learning_rate": 1.5644728771037986e-06, "loss": 1.9343, "step": 26602 }, { "epoch": 0.8583153025442065, "grad_norm": 0.8671875, "learning_rate": 1.5637731820167906e-06, "loss": 1.9036, "step": 26603 }, { "epoch": 0.8583475663980029, "grad_norm": 0.9140625, "learning_rate": 1.563073634826126e-06, "loss": 1.7947, "step": 26604 }, { "epoch": 0.8583798302517992, "grad_norm": 0.87890625, "learning_rate": 1.5623742355395088e-06, "loss": 1.7109, "step": 26605 }, { "epoch": 0.8584120941055956, "grad_norm": 0.80859375, "learning_rate": 1.5616749841646328e-06, "loss": 1.8552, "step": 26606 }, { "epoch": 0.8584443579593919, "grad_norm": 0.87109375, "learning_rate": 1.5609758807092018e-06, "loss": 1.8505, "step": 26607 }, { "epoch": 0.8584766218131883, "grad_norm": 0.82421875, "learning_rate": 1.560276925180908e-06, "loss": 1.8846, "step": 26608 }, { "epoch": 0.8585088856669846, "grad_norm": 0.71484375, "learning_rate": 1.5595781175874402e-06, "loss": 1.8417, "step": 26609 }, { "epoch": 0.858541149520781, "grad_norm": 0.63671875, "learning_rate": 1.5588794579364973e-06, "loss": 1.8239, "step": 26610 }, { "epoch": 0.8585734133745773, "grad_norm": 0.671875, "learning_rate": 1.5581809462357666e-06, "loss": 1.8482, "step": 26611 }, { "epoch": 0.8586056772283737, "grad_norm": 0.65234375, "learning_rate": 1.5574825824929334e-06, "loss": 1.8146, "step": 26612 }, { "epoch": 0.85863794108217, "grad_norm": 0.78515625, "learning_rate": 1.5567843667156934e-06, "loss": 1.6547, "step": 26613 }, { "epoch": 0.8586702049359664, "grad_norm": 0.7890625, "learning_rate": 1.5560862989117252e-06, "loss": 1.6588, "step": 26614 }, { "epoch": 0.8587024687897626, "grad_norm": 0.875, "learning_rate": 1.5553883790887113e-06, "loss": 1.7057, "step": 26615 }, { "epoch": 0.858734732643559, "grad_norm": 0.8125, "learning_rate": 1.5546906072543405e-06, "loss": 1.6799, "step": 26616 }, { "epoch": 0.8587669964973553, "grad_norm": 0.87109375, "learning_rate": 1.5539929834162897e-06, "loss": 1.5944, "step": 26617 }, { "epoch": 0.8587992603511517, "grad_norm": 0.7890625, "learning_rate": 1.5532955075822364e-06, "loss": 1.8074, "step": 26618 }, { "epoch": 0.858831524204948, "grad_norm": 0.765625, "learning_rate": 1.5525981797598644e-06, "loss": 1.8156, "step": 26619 }, { "epoch": 0.8588637880587444, "grad_norm": 0.78125, "learning_rate": 1.5519009999568373e-06, "loss": 1.8461, "step": 26620 }, { "epoch": 0.8588960519125407, "grad_norm": 0.78515625, "learning_rate": 1.5512039681808376e-06, "loss": 1.8133, "step": 26621 }, { "epoch": 0.8589283157663371, "grad_norm": 0.83203125, "learning_rate": 1.5505070844395424e-06, "loss": 1.7262, "step": 26622 }, { "epoch": 0.8589605796201335, "grad_norm": 0.7734375, "learning_rate": 1.5498103487406107e-06, "loss": 1.6334, "step": 26623 }, { "epoch": 0.8589928434739298, "grad_norm": 0.71484375, "learning_rate": 1.5491137610917178e-06, "loss": 1.7764, "step": 26624 }, { "epoch": 0.8590251073277262, "grad_norm": 0.74609375, "learning_rate": 1.548417321500536e-06, "loss": 1.7065, "step": 26625 }, { "epoch": 0.8590573711815225, "grad_norm": 0.70703125, "learning_rate": 1.5477210299747208e-06, "loss": 1.8574, "step": 26626 }, { "epoch": 0.8590896350353189, "grad_norm": 0.703125, "learning_rate": 1.547024886521946e-06, "loss": 1.8174, "step": 26627 }, { "epoch": 0.8591218988891152, "grad_norm": 0.78515625, "learning_rate": 1.5463288911498691e-06, "loss": 1.8099, "step": 26628 }, { "epoch": 0.8591541627429116, "grad_norm": 0.8125, "learning_rate": 1.5456330438661488e-06, "loss": 1.8001, "step": 26629 }, { "epoch": 0.8591864265967079, "grad_norm": 0.82421875, "learning_rate": 1.5449373446784505e-06, "loss": 1.7819, "step": 26630 }, { "epoch": 0.8592186904505043, "grad_norm": 0.85546875, "learning_rate": 1.5442417935944298e-06, "loss": 1.7655, "step": 26631 }, { "epoch": 0.8592509543043005, "grad_norm": 0.9140625, "learning_rate": 1.5435463906217407e-06, "loss": 1.7372, "step": 26632 }, { "epoch": 0.8592832181580969, "grad_norm": 0.90234375, "learning_rate": 1.5428511357680403e-06, "loss": 1.7328, "step": 26633 }, { "epoch": 0.8593154820118932, "grad_norm": 0.78125, "learning_rate": 1.5421560290409808e-06, "loss": 1.7389, "step": 26634 }, { "epoch": 0.8593477458656896, "grad_norm": 0.90234375, "learning_rate": 1.5414610704482113e-06, "loss": 1.8024, "step": 26635 }, { "epoch": 0.8593800097194859, "grad_norm": 0.85546875, "learning_rate": 1.5407662599973854e-06, "loss": 1.7889, "step": 26636 }, { "epoch": 0.8594122735732823, "grad_norm": 0.86328125, "learning_rate": 1.5400715976961487e-06, "loss": 1.7719, "step": 26637 }, { "epoch": 0.8594445374270786, "grad_norm": 0.83984375, "learning_rate": 1.5393770835521454e-06, "loss": 1.8022, "step": 26638 }, { "epoch": 0.859476801280875, "grad_norm": 0.93359375, "learning_rate": 1.538682717573024e-06, "loss": 1.8001, "step": 26639 }, { "epoch": 0.8595090651346713, "grad_norm": 0.953125, "learning_rate": 1.537988499766425e-06, "loss": 1.829, "step": 26640 }, { "epoch": 0.8595413289884677, "grad_norm": 0.875, "learning_rate": 1.5372944301399927e-06, "loss": 1.8762, "step": 26641 }, { "epoch": 0.8595735928422641, "grad_norm": 0.95703125, "learning_rate": 1.5366005087013657e-06, "loss": 1.8956, "step": 26642 }, { "epoch": 0.8596058566960604, "grad_norm": 0.9296875, "learning_rate": 1.5359067354581796e-06, "loss": 1.9528, "step": 26643 }, { "epoch": 0.8596381205498568, "grad_norm": 0.97265625, "learning_rate": 1.535213110418075e-06, "loss": 1.9458, "step": 26644 }, { "epoch": 0.8596703844036531, "grad_norm": 0.99609375, "learning_rate": 1.5345196335886858e-06, "loss": 1.8276, "step": 26645 }, { "epoch": 0.8597026482574495, "grad_norm": 0.9609375, "learning_rate": 1.5338263049776407e-06, "loss": 1.8344, "step": 26646 }, { "epoch": 0.8597349121112458, "grad_norm": 0.9296875, "learning_rate": 1.5331331245925773e-06, "loss": 1.8356, "step": 26647 }, { "epoch": 0.8597671759650422, "grad_norm": 0.9375, "learning_rate": 1.532440092441124e-06, "loss": 1.8676, "step": 26648 }, { "epoch": 0.8597994398188384, "grad_norm": 0.83203125, "learning_rate": 1.5317472085309052e-06, "loss": 1.7951, "step": 26649 }, { "epoch": 0.8598317036726348, "grad_norm": 0.9375, "learning_rate": 1.531054472869558e-06, "loss": 1.8192, "step": 26650 }, { "epoch": 0.8598639675264311, "grad_norm": 0.94140625, "learning_rate": 1.5303618854646945e-06, "loss": 1.7381, "step": 26651 }, { "epoch": 0.8598962313802275, "grad_norm": 0.84765625, "learning_rate": 1.5296694463239436e-06, "loss": 1.7821, "step": 26652 }, { "epoch": 0.8599284952340238, "grad_norm": 0.82421875, "learning_rate": 1.5289771554549341e-06, "loss": 1.7702, "step": 26653 }, { "epoch": 0.8599607590878202, "grad_norm": 0.82421875, "learning_rate": 1.5282850128652737e-06, "loss": 1.7967, "step": 26654 }, { "epoch": 0.8599930229416165, "grad_norm": 0.86328125, "learning_rate": 1.5275930185625875e-06, "loss": 1.788, "step": 26655 }, { "epoch": 0.8600252867954129, "grad_norm": 0.86328125, "learning_rate": 1.5269011725544995e-06, "loss": 1.7366, "step": 26656 }, { "epoch": 0.8600575506492092, "grad_norm": 0.921875, "learning_rate": 1.5262094748486105e-06, "loss": 1.7868, "step": 26657 }, { "epoch": 0.8600898145030056, "grad_norm": 0.85546875, "learning_rate": 1.5255179254525442e-06, "loss": 1.9053, "step": 26658 }, { "epoch": 0.8601220783568019, "grad_norm": 0.88671875, "learning_rate": 1.5248265243739113e-06, "loss": 1.8302, "step": 26659 }, { "epoch": 0.8601543422105983, "grad_norm": 0.80078125, "learning_rate": 1.5241352716203173e-06, "loss": 1.9488, "step": 26660 }, { "epoch": 0.8601866060643946, "grad_norm": 0.82421875, "learning_rate": 1.5234441671993777e-06, "loss": 1.9543, "step": 26661 }, { "epoch": 0.860218869918191, "grad_norm": 0.796875, "learning_rate": 1.5227532111186965e-06, "loss": 1.8719, "step": 26662 }, { "epoch": 0.8602511337719874, "grad_norm": 0.70703125, "learning_rate": 1.5220624033858776e-06, "loss": 1.9114, "step": 26663 }, { "epoch": 0.8602833976257837, "grad_norm": 0.7578125, "learning_rate": 1.5213717440085285e-06, "loss": 1.9472, "step": 26664 }, { "epoch": 0.86031566147958, "grad_norm": 0.734375, "learning_rate": 1.5206812329942494e-06, "loss": 1.911, "step": 26665 }, { "epoch": 0.8603479253333763, "grad_norm": 0.83984375, "learning_rate": 1.5199908703506394e-06, "loss": 1.9078, "step": 26666 }, { "epoch": 0.8603801891871727, "grad_norm": 0.83203125, "learning_rate": 1.5193006560853007e-06, "loss": 1.852, "step": 26667 }, { "epoch": 0.860412453040969, "grad_norm": 0.8828125, "learning_rate": 1.5186105902058305e-06, "loss": 1.8479, "step": 26668 }, { "epoch": 0.8604447168947654, "grad_norm": 0.8203125, "learning_rate": 1.5179206727198214e-06, "loss": 1.9174, "step": 26669 }, { "epoch": 0.8604769807485617, "grad_norm": 0.80859375, "learning_rate": 1.5172309036348703e-06, "loss": 1.8371, "step": 26670 }, { "epoch": 0.8605092446023581, "grad_norm": 0.7734375, "learning_rate": 1.5165412829585711e-06, "loss": 1.8947, "step": 26671 }, { "epoch": 0.8605415084561544, "grad_norm": 0.828125, "learning_rate": 1.5158518106985081e-06, "loss": 1.9906, "step": 26672 }, { "epoch": 0.8605737723099508, "grad_norm": 0.82421875, "learning_rate": 1.5151624868622783e-06, "loss": 1.9134, "step": 26673 }, { "epoch": 0.8606060361637471, "grad_norm": 0.8828125, "learning_rate": 1.5144733114574638e-06, "loss": 1.9104, "step": 26674 }, { "epoch": 0.8606383000175435, "grad_norm": 0.94921875, "learning_rate": 1.5137842844916539e-06, "loss": 2.004, "step": 26675 }, { "epoch": 0.8606705638713398, "grad_norm": 0.82421875, "learning_rate": 1.5130954059724323e-06, "loss": 2.0088, "step": 26676 }, { "epoch": 0.8607028277251362, "grad_norm": 0.76171875, "learning_rate": 1.512406675907378e-06, "loss": 1.8518, "step": 26677 }, { "epoch": 0.8607350915789325, "grad_norm": 0.84765625, "learning_rate": 1.5117180943040781e-06, "loss": 2.0016, "step": 26678 }, { "epoch": 0.8607673554327289, "grad_norm": 0.796875, "learning_rate": 1.5110296611701085e-06, "loss": 1.9788, "step": 26679 }, { "epoch": 0.8607996192865252, "grad_norm": 0.8359375, "learning_rate": 1.5103413765130463e-06, "loss": 1.8171, "step": 26680 }, { "epoch": 0.8608318831403216, "grad_norm": 0.8046875, "learning_rate": 1.5096532403404722e-06, "loss": 1.7789, "step": 26681 }, { "epoch": 0.860864146994118, "grad_norm": 0.85546875, "learning_rate": 1.5089652526599535e-06, "loss": 1.9735, "step": 26682 }, { "epoch": 0.8608964108479142, "grad_norm": 0.78125, "learning_rate": 1.5082774134790655e-06, "loss": 1.914, "step": 26683 }, { "epoch": 0.8609286747017106, "grad_norm": 0.72265625, "learning_rate": 1.5075897228053875e-06, "loss": 1.8296, "step": 26684 }, { "epoch": 0.8609609385555069, "grad_norm": 1.015625, "learning_rate": 1.5069021806464767e-06, "loss": 1.6756, "step": 26685 }, { "epoch": 0.8609932024093033, "grad_norm": 1.0546875, "learning_rate": 1.5062147870099052e-06, "loss": 1.728, "step": 26686 }, { "epoch": 0.8610254662630996, "grad_norm": 0.875, "learning_rate": 1.505527541903249e-06, "loss": 1.9053, "step": 26687 }, { "epoch": 0.861057730116896, "grad_norm": 0.87109375, "learning_rate": 1.5048404453340565e-06, "loss": 1.8199, "step": 26688 }, { "epoch": 0.8610899939706923, "grad_norm": 0.828125, "learning_rate": 1.5041534973099036e-06, "loss": 1.8272, "step": 26689 }, { "epoch": 0.8611222578244887, "grad_norm": 0.79296875, "learning_rate": 1.503466697838346e-06, "loss": 1.8898, "step": 26690 }, { "epoch": 0.861154521678285, "grad_norm": 0.84375, "learning_rate": 1.5027800469269425e-06, "loss": 1.7979, "step": 26691 }, { "epoch": 0.8611867855320814, "grad_norm": 0.78125, "learning_rate": 1.5020935445832572e-06, "loss": 1.9729, "step": 26692 }, { "epoch": 0.8612190493858777, "grad_norm": 0.8671875, "learning_rate": 1.5014071908148408e-06, "loss": 1.984, "step": 26693 }, { "epoch": 0.8612513132396741, "grad_norm": 0.96484375, "learning_rate": 1.5007209856292486e-06, "loss": 1.9482, "step": 26694 }, { "epoch": 0.8612835770934704, "grad_norm": 0.91015625, "learning_rate": 1.5000349290340382e-06, "loss": 1.9929, "step": 26695 }, { "epoch": 0.8613158409472668, "grad_norm": 0.8984375, "learning_rate": 1.4993490210367583e-06, "loss": 1.9544, "step": 26696 }, { "epoch": 0.861348104801063, "grad_norm": 0.89453125, "learning_rate": 1.4986632616449563e-06, "loss": 1.9399, "step": 26697 }, { "epoch": 0.8613803686548595, "grad_norm": 0.85546875, "learning_rate": 1.4979776508661864e-06, "loss": 1.9941, "step": 26698 }, { "epoch": 0.8614126325086557, "grad_norm": 0.8203125, "learning_rate": 1.4972921887079905e-06, "loss": 1.9558, "step": 26699 }, { "epoch": 0.8614448963624521, "grad_norm": 0.9296875, "learning_rate": 1.4966068751779145e-06, "loss": 2.0281, "step": 26700 }, { "epoch": 0.8614771602162484, "grad_norm": 0.91796875, "learning_rate": 1.4959217102835054e-06, "loss": 2.0372, "step": 26701 }, { "epoch": 0.8615094240700448, "grad_norm": 1.0234375, "learning_rate": 1.495236694032301e-06, "loss": 2.0852, "step": 26702 }, { "epoch": 0.8615416879238412, "grad_norm": 0.828125, "learning_rate": 1.4945518264318413e-06, "loss": 1.9979, "step": 26703 }, { "epoch": 0.8615739517776375, "grad_norm": 0.8203125, "learning_rate": 1.4938671074896676e-06, "loss": 1.8343, "step": 26704 }, { "epoch": 0.8616062156314339, "grad_norm": 0.78125, "learning_rate": 1.4931825372133151e-06, "loss": 1.9623, "step": 26705 }, { "epoch": 0.8616384794852302, "grad_norm": 0.7578125, "learning_rate": 1.4924981156103196e-06, "loss": 2.0896, "step": 26706 }, { "epoch": 0.8616707433390266, "grad_norm": 0.7734375, "learning_rate": 1.4918138426882167e-06, "loss": 2.098, "step": 26707 }, { "epoch": 0.8617030071928229, "grad_norm": 0.75, "learning_rate": 1.4911297184545319e-06, "loss": 2.0493, "step": 26708 }, { "epoch": 0.8617352710466193, "grad_norm": 0.71875, "learning_rate": 1.4904457429168078e-06, "loss": 2.0522, "step": 26709 }, { "epoch": 0.8617675349004156, "grad_norm": 0.7265625, "learning_rate": 1.4897619160825582e-06, "loss": 2.0828, "step": 26710 }, { "epoch": 0.861799798754212, "grad_norm": 0.87109375, "learning_rate": 1.4890782379593171e-06, "loss": 2.2336, "step": 26711 }, { "epoch": 0.8618320626080083, "grad_norm": 0.6875, "learning_rate": 1.488394708554615e-06, "loss": 2.1552, "step": 26712 }, { "epoch": 0.8618643264618047, "grad_norm": 0.6484375, "learning_rate": 1.4877113278759664e-06, "loss": 2.0703, "step": 26713 }, { "epoch": 0.861896590315601, "grad_norm": 0.6796875, "learning_rate": 1.487028095930898e-06, "loss": 2.0907, "step": 26714 }, { "epoch": 0.8619288541693974, "grad_norm": 0.72265625, "learning_rate": 1.486345012726934e-06, "loss": 2.1084, "step": 26715 }, { "epoch": 0.8619611180231936, "grad_norm": 0.71875, "learning_rate": 1.4856620782715851e-06, "loss": 2.1183, "step": 26716 }, { "epoch": 0.86199338187699, "grad_norm": 0.6875, "learning_rate": 1.4849792925723736e-06, "loss": 2.0039, "step": 26717 }, { "epoch": 0.8620256457307863, "grad_norm": 0.95703125, "learning_rate": 1.4842966556368188e-06, "loss": 1.9983, "step": 26718 }, { "epoch": 0.8620579095845827, "grad_norm": 0.78515625, "learning_rate": 1.4836141674724258e-06, "loss": 1.9248, "step": 26719 }, { "epoch": 0.862090173438379, "grad_norm": 0.703125, "learning_rate": 1.4829318280867138e-06, "loss": 1.9687, "step": 26720 }, { "epoch": 0.8621224372921754, "grad_norm": 0.80078125, "learning_rate": 1.4822496374871902e-06, "loss": 1.9655, "step": 26721 }, { "epoch": 0.8621547011459717, "grad_norm": 0.7578125, "learning_rate": 1.4815675956813624e-06, "loss": 2.0765, "step": 26722 }, { "epoch": 0.8621869649997681, "grad_norm": 0.69921875, "learning_rate": 1.4808857026767443e-06, "loss": 1.8884, "step": 26723 }, { "epoch": 0.8622192288535645, "grad_norm": 0.7578125, "learning_rate": 1.4802039584808362e-06, "loss": 1.9205, "step": 26724 }, { "epoch": 0.8622514927073608, "grad_norm": 0.63671875, "learning_rate": 1.4795223631011428e-06, "loss": 1.9273, "step": 26725 }, { "epoch": 0.8622837565611572, "grad_norm": 0.71484375, "learning_rate": 1.4788409165451694e-06, "loss": 1.9147, "step": 26726 }, { "epoch": 0.8623160204149535, "grad_norm": 0.640625, "learning_rate": 1.4781596188204166e-06, "loss": 1.9446, "step": 26727 }, { "epoch": 0.8623482842687499, "grad_norm": 0.73828125, "learning_rate": 1.4774784699343785e-06, "loss": 1.7378, "step": 26728 }, { "epoch": 0.8623805481225462, "grad_norm": 0.84375, "learning_rate": 1.4767974698945574e-06, "loss": 1.7044, "step": 26729 }, { "epoch": 0.8624128119763426, "grad_norm": 0.7734375, "learning_rate": 1.4761166187084508e-06, "loss": 1.8727, "step": 26730 }, { "epoch": 0.8624450758301389, "grad_norm": 1.0546875, "learning_rate": 1.4754359163835458e-06, "loss": 2.065, "step": 26731 }, { "epoch": 0.8624773396839353, "grad_norm": 0.67578125, "learning_rate": 1.4747553629273447e-06, "loss": 2.0212, "step": 26732 }, { "epoch": 0.8625096035377315, "grad_norm": 0.70703125, "learning_rate": 1.4740749583473318e-06, "loss": 1.8145, "step": 26733 }, { "epoch": 0.8625418673915279, "grad_norm": 0.71875, "learning_rate": 1.4733947026509959e-06, "loss": 1.8943, "step": 26734 }, { "epoch": 0.8625741312453242, "grad_norm": 0.6171875, "learning_rate": 1.472714595845831e-06, "loss": 1.9634, "step": 26735 }, { "epoch": 0.8626063950991206, "grad_norm": 0.73046875, "learning_rate": 1.4720346379393178e-06, "loss": 2.0075, "step": 26736 }, { "epoch": 0.8626386589529169, "grad_norm": 0.65234375, "learning_rate": 1.4713548289389405e-06, "loss": 1.8545, "step": 26737 }, { "epoch": 0.8626709228067133, "grad_norm": 0.58984375, "learning_rate": 1.4706751688521864e-06, "loss": 1.8486, "step": 26738 }, { "epoch": 0.8627031866605096, "grad_norm": 0.5859375, "learning_rate": 1.469995657686531e-06, "loss": 2.0044, "step": 26739 }, { "epoch": 0.862735450514306, "grad_norm": 0.609375, "learning_rate": 1.4693162954494632e-06, "loss": 1.8958, "step": 26740 }, { "epoch": 0.8627677143681023, "grad_norm": 0.6328125, "learning_rate": 1.4686370821484491e-06, "loss": 1.8602, "step": 26741 }, { "epoch": 0.8627999782218987, "grad_norm": 0.66015625, "learning_rate": 1.4679580177909706e-06, "loss": 2.089, "step": 26742 }, { "epoch": 0.8628322420756951, "grad_norm": 0.5859375, "learning_rate": 1.4672791023845073e-06, "loss": 1.9575, "step": 26743 }, { "epoch": 0.8628645059294914, "grad_norm": 0.60546875, "learning_rate": 1.4666003359365227e-06, "loss": 2.0153, "step": 26744 }, { "epoch": 0.8628967697832878, "grad_norm": 0.7578125, "learning_rate": 1.4659217184544927e-06, "loss": 1.9085, "step": 26745 }, { "epoch": 0.8629290336370841, "grad_norm": 0.62890625, "learning_rate": 1.4652432499458913e-06, "loss": 1.9908, "step": 26746 }, { "epoch": 0.8629612974908805, "grad_norm": 0.625, "learning_rate": 1.464564930418179e-06, "loss": 1.8353, "step": 26747 }, { "epoch": 0.8629935613446768, "grad_norm": 0.62890625, "learning_rate": 1.4638867598788236e-06, "loss": 1.8132, "step": 26748 }, { "epoch": 0.8630258251984732, "grad_norm": 0.55859375, "learning_rate": 1.4632087383352988e-06, "loss": 1.9308, "step": 26749 }, { "epoch": 0.8630580890522694, "grad_norm": 0.60546875, "learning_rate": 1.4625308657950554e-06, "loss": 2.0174, "step": 26750 }, { "epoch": 0.8630903529060658, "grad_norm": 0.640625, "learning_rate": 1.4618531422655624e-06, "loss": 1.8643, "step": 26751 }, { "epoch": 0.8631226167598621, "grad_norm": 0.65234375, "learning_rate": 1.4611755677542787e-06, "loss": 1.7333, "step": 26752 }, { "epoch": 0.8631548806136585, "grad_norm": 0.65625, "learning_rate": 1.4604981422686586e-06, "loss": 1.7637, "step": 26753 }, { "epoch": 0.8631871444674548, "grad_norm": 0.65625, "learning_rate": 1.4598208658161643e-06, "loss": 1.7734, "step": 26754 }, { "epoch": 0.8632194083212512, "grad_norm": 0.59765625, "learning_rate": 1.4591437384042484e-06, "loss": 1.8467, "step": 26755 }, { "epoch": 0.8632516721750475, "grad_norm": 0.515625, "learning_rate": 1.4584667600403611e-06, "loss": 1.9611, "step": 26756 }, { "epoch": 0.8632839360288439, "grad_norm": 0.55078125, "learning_rate": 1.4577899307319603e-06, "loss": 1.9576, "step": 26757 }, { "epoch": 0.8633161998826402, "grad_norm": 0.5625, "learning_rate": 1.4571132504864931e-06, "loss": 1.9025, "step": 26758 }, { "epoch": 0.8633484637364366, "grad_norm": 0.57421875, "learning_rate": 1.4564367193114037e-06, "loss": 1.8785, "step": 26759 }, { "epoch": 0.8633807275902329, "grad_norm": 0.54296875, "learning_rate": 1.455760337214146e-06, "loss": 1.8801, "step": 26760 }, { "epoch": 0.8634129914440293, "grad_norm": 0.578125, "learning_rate": 1.4550841042021624e-06, "loss": 1.9825, "step": 26761 }, { "epoch": 0.8634452552978256, "grad_norm": 0.56640625, "learning_rate": 1.454408020282892e-06, "loss": 1.7041, "step": 26762 }, { "epoch": 0.863477519151622, "grad_norm": 0.58203125, "learning_rate": 1.4537320854637838e-06, "loss": 1.8104, "step": 26763 }, { "epoch": 0.8635097830054184, "grad_norm": 0.78515625, "learning_rate": 1.453056299752275e-06, "loss": 1.728, "step": 26764 }, { "epoch": 0.8635420468592147, "grad_norm": 0.7109375, "learning_rate": 1.4523806631558017e-06, "loss": 1.7664, "step": 26765 }, { "epoch": 0.863574310713011, "grad_norm": 0.58203125, "learning_rate": 1.451705175681806e-06, "loss": 1.9903, "step": 26766 }, { "epoch": 0.8636065745668073, "grad_norm": 0.5859375, "learning_rate": 1.451029837337719e-06, "loss": 2.0575, "step": 26767 }, { "epoch": 0.8636388384206037, "grad_norm": 0.61328125, "learning_rate": 1.4503546481309726e-06, "loss": 1.9708, "step": 26768 }, { "epoch": 0.8636711022744, "grad_norm": 0.49609375, "learning_rate": 1.449679608069006e-06, "loss": 1.979, "step": 26769 }, { "epoch": 0.8637033661281964, "grad_norm": 0.53515625, "learning_rate": 1.4490047171592435e-06, "loss": 1.9856, "step": 26770 }, { "epoch": 0.8637356299819927, "grad_norm": 0.58203125, "learning_rate": 1.4483299754091173e-06, "loss": 1.856, "step": 26771 }, { "epoch": 0.8637678938357891, "grad_norm": 0.62109375, "learning_rate": 1.4476553828260497e-06, "loss": 1.8041, "step": 26772 }, { "epoch": 0.8638001576895854, "grad_norm": 0.5625, "learning_rate": 1.4469809394174683e-06, "loss": 1.8741, "step": 26773 }, { "epoch": 0.8638324215433818, "grad_norm": 0.515625, "learning_rate": 1.4463066451908036e-06, "loss": 1.9801, "step": 26774 }, { "epoch": 0.8638646853971781, "grad_norm": 0.578125, "learning_rate": 1.445632500153468e-06, "loss": 2.0144, "step": 26775 }, { "epoch": 0.8638969492509745, "grad_norm": 0.59375, "learning_rate": 1.4449585043128844e-06, "loss": 2.0359, "step": 26776 }, { "epoch": 0.8639292131047708, "grad_norm": 0.734375, "learning_rate": 1.4442846576764795e-06, "loss": 2.0228, "step": 26777 }, { "epoch": 0.8639614769585672, "grad_norm": 0.5625, "learning_rate": 1.443610960251658e-06, "loss": 1.9202, "step": 26778 }, { "epoch": 0.8639937408123635, "grad_norm": 0.5390625, "learning_rate": 1.4429374120458416e-06, "loss": 2.0122, "step": 26779 }, { "epoch": 0.8640260046661599, "grad_norm": 0.5546875, "learning_rate": 1.4422640130664516e-06, "loss": 1.9811, "step": 26780 }, { "epoch": 0.8640582685199562, "grad_norm": 0.5546875, "learning_rate": 1.4415907633208852e-06, "loss": 2.0182, "step": 26781 }, { "epoch": 0.8640905323737526, "grad_norm": 0.578125, "learning_rate": 1.440917662816565e-06, "loss": 1.8728, "step": 26782 }, { "epoch": 0.8641227962275488, "grad_norm": 0.58984375, "learning_rate": 1.4402447115608964e-06, "loss": 2.0377, "step": 26783 }, { "epoch": 0.8641550600813452, "grad_norm": 0.57421875, "learning_rate": 1.439571909561282e-06, "loss": 1.9773, "step": 26784 }, { "epoch": 0.8641873239351416, "grad_norm": 0.57421875, "learning_rate": 1.4388992568251358e-06, "loss": 1.9773, "step": 26785 }, { "epoch": 0.8642195877889379, "grad_norm": 0.6015625, "learning_rate": 1.438226753359857e-06, "loss": 1.8969, "step": 26786 }, { "epoch": 0.8642518516427343, "grad_norm": 0.57421875, "learning_rate": 1.437554399172848e-06, "loss": 1.9091, "step": 26787 }, { "epoch": 0.8642841154965306, "grad_norm": 0.54296875, "learning_rate": 1.436882194271511e-06, "loss": 1.9836, "step": 26788 }, { "epoch": 0.864316379350327, "grad_norm": 0.5625, "learning_rate": 1.4362101386632453e-06, "loss": 1.9876, "step": 26789 }, { "epoch": 0.8643486432041233, "grad_norm": 0.5234375, "learning_rate": 1.4355382323554466e-06, "loss": 1.9277, "step": 26790 }, { "epoch": 0.8643809070579197, "grad_norm": 0.5703125, "learning_rate": 1.4348664753555125e-06, "loss": 1.9017, "step": 26791 }, { "epoch": 0.864413170911716, "grad_norm": 0.490234375, "learning_rate": 1.4341948676708383e-06, "loss": 1.9262, "step": 26792 }, { "epoch": 0.8644454347655124, "grad_norm": 0.5, "learning_rate": 1.4335234093088117e-06, "loss": 1.8726, "step": 26793 }, { "epoch": 0.8644776986193087, "grad_norm": 0.58203125, "learning_rate": 1.4328521002768285e-06, "loss": 1.9086, "step": 26794 }, { "epoch": 0.8645099624731051, "grad_norm": 0.53125, "learning_rate": 1.4321809405822777e-06, "loss": 1.8795, "step": 26795 }, { "epoch": 0.8645422263269014, "grad_norm": 0.58203125, "learning_rate": 1.4315099302325419e-06, "loss": 1.8771, "step": 26796 }, { "epoch": 0.8645744901806978, "grad_norm": 0.5390625, "learning_rate": 1.4308390692350149e-06, "loss": 1.9621, "step": 26797 }, { "epoch": 0.864606754034494, "grad_norm": 0.54296875, "learning_rate": 1.4301683575970758e-06, "loss": 1.9376, "step": 26798 }, { "epoch": 0.8646390178882905, "grad_norm": 0.5703125, "learning_rate": 1.4294977953261058e-06, "loss": 1.9038, "step": 26799 }, { "epoch": 0.8646712817420867, "grad_norm": 0.5546875, "learning_rate": 1.4288273824294917e-06, "loss": 1.9815, "step": 26800 }, { "epoch": 0.8647035455958831, "grad_norm": 0.53125, "learning_rate": 1.4281571189146097e-06, "loss": 1.9203, "step": 26801 }, { "epoch": 0.8647358094496794, "grad_norm": 0.515625, "learning_rate": 1.4274870047888373e-06, "loss": 1.8917, "step": 26802 }, { "epoch": 0.8647680733034758, "grad_norm": 0.51171875, "learning_rate": 1.4268170400595498e-06, "loss": 1.8845, "step": 26803 }, { "epoch": 0.8648003371572722, "grad_norm": 0.58203125, "learning_rate": 1.4261472247341217e-06, "loss": 1.9088, "step": 26804 }, { "epoch": 0.8648326010110685, "grad_norm": 0.515625, "learning_rate": 1.425477558819932e-06, "loss": 1.9262, "step": 26805 }, { "epoch": 0.8648648648648649, "grad_norm": 0.5234375, "learning_rate": 1.4248080423243432e-06, "loss": 1.9321, "step": 26806 }, { "epoch": 0.8648971287186612, "grad_norm": 0.51171875, "learning_rate": 1.4241386752547274e-06, "loss": 1.9409, "step": 26807 }, { "epoch": 0.8649293925724576, "grad_norm": 0.515625, "learning_rate": 1.423469457618461e-06, "loss": 1.9337, "step": 26808 }, { "epoch": 0.8649616564262539, "grad_norm": 0.4765625, "learning_rate": 1.4228003894228958e-06, "loss": 1.8806, "step": 26809 }, { "epoch": 0.8649939202800503, "grad_norm": 0.4609375, "learning_rate": 1.4221314706754062e-06, "loss": 1.8656, "step": 26810 }, { "epoch": 0.8650261841338466, "grad_norm": 0.46484375, "learning_rate": 1.4214627013833531e-06, "loss": 1.9039, "step": 26811 }, { "epoch": 0.865058447987643, "grad_norm": 0.46484375, "learning_rate": 1.4207940815540953e-06, "loss": 1.8937, "step": 26812 }, { "epoch": 0.8650907118414393, "grad_norm": 0.458984375, "learning_rate": 1.4201256111949974e-06, "loss": 1.9323, "step": 26813 }, { "epoch": 0.8651229756952357, "grad_norm": 0.46875, "learning_rate": 1.4194572903134145e-06, "loss": 1.9147, "step": 26814 }, { "epoch": 0.865155239549032, "grad_norm": 0.439453125, "learning_rate": 1.4187891189166997e-06, "loss": 1.9297, "step": 26815 }, { "epoch": 0.8651875034028284, "grad_norm": 0.4609375, "learning_rate": 1.4181210970122132e-06, "loss": 1.9155, "step": 26816 }, { "epoch": 0.8652197672566246, "grad_norm": 0.48046875, "learning_rate": 1.417453224607308e-06, "loss": 1.9315, "step": 26817 }, { "epoch": 0.865252031110421, "grad_norm": 0.50390625, "learning_rate": 1.4167855017093295e-06, "loss": 1.9933, "step": 26818 }, { "epoch": 0.8652842949642173, "grad_norm": 0.55859375, "learning_rate": 1.4161179283256354e-06, "loss": 2.0623, "step": 26819 }, { "epoch": 0.8653165588180137, "grad_norm": 0.5234375, "learning_rate": 1.4154505044635696e-06, "loss": 2.0614, "step": 26820 }, { "epoch": 0.86534882267181, "grad_norm": 0.5546875, "learning_rate": 1.4147832301304765e-06, "loss": 1.9559, "step": 26821 }, { "epoch": 0.8653810865256064, "grad_norm": 0.640625, "learning_rate": 1.4141161053337047e-06, "loss": 1.9586, "step": 26822 }, { "epoch": 0.8654133503794027, "grad_norm": 0.6328125, "learning_rate": 1.413449130080599e-06, "loss": 1.9674, "step": 26823 }, { "epoch": 0.8654456142331991, "grad_norm": 0.58984375, "learning_rate": 1.4127823043784933e-06, "loss": 1.9732, "step": 26824 }, { "epoch": 0.8654778780869955, "grad_norm": 0.546875, "learning_rate": 1.4121156282347365e-06, "loss": 1.9727, "step": 26825 }, { "epoch": 0.8655101419407918, "grad_norm": 0.62109375, "learning_rate": 1.4114491016566628e-06, "loss": 1.9092, "step": 26826 }, { "epoch": 0.8655424057945882, "grad_norm": 0.578125, "learning_rate": 1.4107827246516065e-06, "loss": 1.9171, "step": 26827 }, { "epoch": 0.8655746696483845, "grad_norm": 0.55859375, "learning_rate": 1.4101164972269081e-06, "loss": 1.9489, "step": 26828 }, { "epoch": 0.8656069335021809, "grad_norm": 0.5546875, "learning_rate": 1.4094504193898971e-06, "loss": 1.9426, "step": 26829 }, { "epoch": 0.8656391973559772, "grad_norm": 0.5078125, "learning_rate": 1.4087844911479042e-06, "loss": 1.9226, "step": 26830 }, { "epoch": 0.8656714612097736, "grad_norm": 0.52734375, "learning_rate": 1.4081187125082634e-06, "loss": 1.9146, "step": 26831 }, { "epoch": 0.8657037250635699, "grad_norm": 0.482421875, "learning_rate": 1.4074530834783007e-06, "loss": 1.9295, "step": 26832 }, { "epoch": 0.8657359889173663, "grad_norm": 0.498046875, "learning_rate": 1.406787604065345e-06, "loss": 1.9795, "step": 26833 }, { "epoch": 0.8657682527711625, "grad_norm": 0.49609375, "learning_rate": 1.4061222742767154e-06, "loss": 1.9802, "step": 26834 }, { "epoch": 0.8658005166249589, "grad_norm": 0.498046875, "learning_rate": 1.405457094119743e-06, "loss": 1.954, "step": 26835 }, { "epoch": 0.8658327804787552, "grad_norm": 0.48046875, "learning_rate": 1.4047920636017453e-06, "loss": 1.9582, "step": 26836 }, { "epoch": 0.8658650443325516, "grad_norm": 0.470703125, "learning_rate": 1.4041271827300411e-06, "loss": 1.9809, "step": 26837 }, { "epoch": 0.8658973081863479, "grad_norm": 0.4609375, "learning_rate": 1.4034624515119514e-06, "loss": 2.0111, "step": 26838 }, { "epoch": 0.8659295720401443, "grad_norm": 0.4921875, "learning_rate": 1.4027978699547988e-06, "loss": 1.9785, "step": 26839 }, { "epoch": 0.8659618358939406, "grad_norm": 0.48046875, "learning_rate": 1.4021334380658857e-06, "loss": 1.9899, "step": 26840 }, { "epoch": 0.865994099747737, "grad_norm": 0.48828125, "learning_rate": 1.4014691558525362e-06, "loss": 1.9407, "step": 26841 }, { "epoch": 0.8660263636015333, "grad_norm": 0.453125, "learning_rate": 1.4008050233220592e-06, "loss": 1.9342, "step": 26842 }, { "epoch": 0.8660586274553297, "grad_norm": 0.458984375, "learning_rate": 1.4001410404817612e-06, "loss": 1.9747, "step": 26843 }, { "epoch": 0.8660908913091261, "grad_norm": 0.486328125, "learning_rate": 1.3994772073389573e-06, "loss": 1.9936, "step": 26844 }, { "epoch": 0.8661231551629224, "grad_norm": 0.466796875, "learning_rate": 1.3988135239009503e-06, "loss": 1.9881, "step": 26845 }, { "epoch": 0.8661554190167188, "grad_norm": 0.44921875, "learning_rate": 1.398149990175046e-06, "loss": 1.9592, "step": 26846 }, { "epoch": 0.8661876828705151, "grad_norm": 0.423828125, "learning_rate": 1.3974866061685488e-06, "loss": 1.939, "step": 26847 }, { "epoch": 0.8662199467243115, "grad_norm": 0.423828125, "learning_rate": 1.3968233718887623e-06, "loss": 1.9778, "step": 26848 }, { "epoch": 0.8662522105781078, "grad_norm": 0.41796875, "learning_rate": 1.396160287342983e-06, "loss": 1.9559, "step": 26849 }, { "epoch": 0.8662844744319042, "grad_norm": 0.431640625, "learning_rate": 1.3954973525385128e-06, "loss": 1.9368, "step": 26850 }, { "epoch": 0.8663167382857004, "grad_norm": 0.41796875, "learning_rate": 1.3948345674826496e-06, "loss": 1.9657, "step": 26851 }, { "epoch": 0.8663490021394968, "grad_norm": 0.4296875, "learning_rate": 1.394171932182684e-06, "loss": 1.9289, "step": 26852 }, { "epoch": 0.8663812659932931, "grad_norm": 0.408203125, "learning_rate": 1.3935094466459152e-06, "loss": 1.9269, "step": 26853 }, { "epoch": 0.8664135298470895, "grad_norm": 0.421875, "learning_rate": 1.3928471108796325e-06, "loss": 1.9318, "step": 26854 }, { "epoch": 0.8664457937008858, "grad_norm": 0.404296875, "learning_rate": 1.392184924891125e-06, "loss": 1.9242, "step": 26855 }, { "epoch": 0.8664780575546822, "grad_norm": 0.41015625, "learning_rate": 1.391522888687687e-06, "loss": 1.9416, "step": 26856 }, { "epoch": 0.8665103214084785, "grad_norm": 0.4140625, "learning_rate": 1.390861002276602e-06, "loss": 1.9521, "step": 26857 }, { "epoch": 0.8665425852622749, "grad_norm": 0.40625, "learning_rate": 1.3901992656651536e-06, "loss": 1.9486, "step": 26858 }, { "epoch": 0.8665748491160712, "grad_norm": 0.400390625, "learning_rate": 1.38953767886063e-06, "loss": 1.9129, "step": 26859 }, { "epoch": 0.8666071129698676, "grad_norm": 0.390625, "learning_rate": 1.388876241870311e-06, "loss": 1.9463, "step": 26860 }, { "epoch": 0.8666393768236639, "grad_norm": 0.40234375, "learning_rate": 1.388214954701479e-06, "loss": 1.9267, "step": 26861 }, { "epoch": 0.8666716406774603, "grad_norm": 0.416015625, "learning_rate": 1.3875538173614094e-06, "loss": 1.9547, "step": 26862 }, { "epoch": 0.8667039045312566, "grad_norm": 0.400390625, "learning_rate": 1.3868928298573836e-06, "loss": 1.9426, "step": 26863 }, { "epoch": 0.866736168385053, "grad_norm": 0.396484375, "learning_rate": 1.3862319921966753e-06, "loss": 1.936, "step": 26864 }, { "epoch": 0.8667684322388494, "grad_norm": 0.392578125, "learning_rate": 1.385571304386556e-06, "loss": 1.9811, "step": 26865 }, { "epoch": 0.8668006960926457, "grad_norm": 0.408203125, "learning_rate": 1.3849107664343042e-06, "loss": 1.9696, "step": 26866 }, { "epoch": 0.866832959946442, "grad_norm": 0.384765625, "learning_rate": 1.3842503783471876e-06, "loss": 2.0075, "step": 26867 }, { "epoch": 0.8668652238002383, "grad_norm": 0.421875, "learning_rate": 1.3835901401324723e-06, "loss": 2.0152, "step": 26868 }, { "epoch": 0.8668974876540347, "grad_norm": 0.4140625, "learning_rate": 1.3829300517974292e-06, "loss": 2.0003, "step": 26869 }, { "epoch": 0.866929751507831, "grad_norm": 0.431640625, "learning_rate": 1.3822701133493255e-06, "loss": 1.9883, "step": 26870 }, { "epoch": 0.8669620153616274, "grad_norm": 0.416015625, "learning_rate": 1.3816103247954188e-06, "loss": 1.9968, "step": 26871 }, { "epoch": 0.8669942792154237, "grad_norm": 0.404296875, "learning_rate": 1.3809506861429798e-06, "loss": 1.9721, "step": 26872 }, { "epoch": 0.8670265430692201, "grad_norm": 0.39453125, "learning_rate": 1.380291197399265e-06, "loss": 2.0091, "step": 26873 }, { "epoch": 0.8670588069230164, "grad_norm": 0.404296875, "learning_rate": 1.3796318585715296e-06, "loss": 2.0087, "step": 26874 }, { "epoch": 0.8670910707768128, "grad_norm": 0.392578125, "learning_rate": 1.3789726696670396e-06, "loss": 1.9896, "step": 26875 }, { "epoch": 0.8671233346306091, "grad_norm": 0.396484375, "learning_rate": 1.3783136306930478e-06, "loss": 1.9731, "step": 26876 }, { "epoch": 0.8671555984844055, "grad_norm": 0.39453125, "learning_rate": 1.3776547416568031e-06, "loss": 1.9432, "step": 26877 }, { "epoch": 0.8671878623382018, "grad_norm": 0.390625, "learning_rate": 1.3769960025655665e-06, "loss": 1.9902, "step": 26878 }, { "epoch": 0.8672201261919982, "grad_norm": 0.40234375, "learning_rate": 1.3763374134265837e-06, "loss": 1.9697, "step": 26879 }, { "epoch": 0.8672523900457945, "grad_norm": 0.390625, "learning_rate": 1.3756789742471026e-06, "loss": 1.997, "step": 26880 }, { "epoch": 0.8672846538995909, "grad_norm": 0.400390625, "learning_rate": 1.3750206850343755e-06, "loss": 1.9959, "step": 26881 }, { "epoch": 0.8673169177533872, "grad_norm": 0.38671875, "learning_rate": 1.3743625457956465e-06, "loss": 2.021, "step": 26882 }, { "epoch": 0.8673491816071836, "grad_norm": 0.390625, "learning_rate": 1.3737045565381568e-06, "loss": 2.0316, "step": 26883 }, { "epoch": 0.8673814454609798, "grad_norm": 0.384765625, "learning_rate": 1.3730467172691553e-06, "loss": 1.9816, "step": 26884 }, { "epoch": 0.8674137093147762, "grad_norm": 0.376953125, "learning_rate": 1.3723890279958796e-06, "loss": 2.0045, "step": 26885 }, { "epoch": 0.8674459731685726, "grad_norm": 0.390625, "learning_rate": 1.3717314887255655e-06, "loss": 1.991, "step": 26886 }, { "epoch": 0.8674782370223689, "grad_norm": 0.376953125, "learning_rate": 1.3710740994654576e-06, "loss": 1.9249, "step": 26887 }, { "epoch": 0.8675105008761653, "grad_norm": 0.376953125, "learning_rate": 1.3704168602227879e-06, "loss": 1.9922, "step": 26888 }, { "epoch": 0.8675427647299616, "grad_norm": 0.37109375, "learning_rate": 1.3697597710047894e-06, "loss": 1.9707, "step": 26889 }, { "epoch": 0.867575028583758, "grad_norm": 0.369140625, "learning_rate": 1.3691028318186994e-06, "loss": 2.012, "step": 26890 }, { "epoch": 0.8676072924375543, "grad_norm": 0.37109375, "learning_rate": 1.3684460426717471e-06, "loss": 1.9718, "step": 26891 }, { "epoch": 0.8676395562913507, "grad_norm": 0.3671875, "learning_rate": 1.367789403571162e-06, "loss": 2.0077, "step": 26892 }, { "epoch": 0.867671820145147, "grad_norm": 0.3671875, "learning_rate": 1.3671329145241679e-06, "loss": 2.0095, "step": 26893 }, { "epoch": 0.8677040839989434, "grad_norm": 0.3671875, "learning_rate": 1.3664765755379976e-06, "loss": 1.9701, "step": 26894 }, { "epoch": 0.8677363478527397, "grad_norm": 0.373046875, "learning_rate": 1.3658203866198737e-06, "loss": 2.0123, "step": 26895 }, { "epoch": 0.8677686117065361, "grad_norm": 0.390625, "learning_rate": 1.3651643477770136e-06, "loss": 2.033, "step": 26896 }, { "epoch": 0.8678008755603324, "grad_norm": 0.369140625, "learning_rate": 1.3645084590166452e-06, "loss": 2.0156, "step": 26897 }, { "epoch": 0.8678331394141288, "grad_norm": 0.359375, "learning_rate": 1.3638527203459871e-06, "loss": 2.0171, "step": 26898 }, { "epoch": 0.867865403267925, "grad_norm": 0.37109375, "learning_rate": 1.3631971317722507e-06, "loss": 2.0099, "step": 26899 }, { "epoch": 0.8678976671217215, "grad_norm": 0.353515625, "learning_rate": 1.3625416933026618e-06, "loss": 1.9577, "step": 26900 }, { "epoch": 0.8679299309755177, "grad_norm": 0.357421875, "learning_rate": 1.3618864049444312e-06, "loss": 2.0015, "step": 26901 }, { "epoch": 0.8679621948293141, "grad_norm": 0.357421875, "learning_rate": 1.3612312667047667e-06, "loss": 1.9995, "step": 26902 }, { "epoch": 0.8679944586831104, "grad_norm": 0.369140625, "learning_rate": 1.360576278590887e-06, "loss": 2.0195, "step": 26903 }, { "epoch": 0.8680267225369068, "grad_norm": 0.353515625, "learning_rate": 1.3599214406099986e-06, "loss": 1.9811, "step": 26904 }, { "epoch": 0.8680589863907032, "grad_norm": 0.3671875, "learning_rate": 1.3592667527693087e-06, "loss": 1.9843, "step": 26905 }, { "epoch": 0.8680912502444995, "grad_norm": 0.3515625, "learning_rate": 1.358612215076025e-06, "loss": 1.9963, "step": 26906 }, { "epoch": 0.8681235140982959, "grad_norm": 0.349609375, "learning_rate": 1.3579578275373532e-06, "loss": 1.9912, "step": 26907 }, { "epoch": 0.8681557779520922, "grad_norm": 0.359375, "learning_rate": 1.3573035901604914e-06, "loss": 1.9847, "step": 26908 }, { "epoch": 0.8681880418058886, "grad_norm": 0.359375, "learning_rate": 1.3566495029526466e-06, "loss": 2.0037, "step": 26909 }, { "epoch": 0.8682203056596849, "grad_norm": 0.361328125, "learning_rate": 1.3559955659210182e-06, "loss": 1.9987, "step": 26910 }, { "epoch": 0.8682525695134813, "grad_norm": 0.40234375, "learning_rate": 1.3553417790727989e-06, "loss": 2.0289, "step": 26911 }, { "epoch": 0.8682848333672776, "grad_norm": 0.349609375, "learning_rate": 1.3546881424151913e-06, "loss": 1.9867, "step": 26912 }, { "epoch": 0.868317097221074, "grad_norm": 0.357421875, "learning_rate": 1.3540346559553862e-06, "loss": 2.0049, "step": 26913 }, { "epoch": 0.8683493610748703, "grad_norm": 0.34765625, "learning_rate": 1.353381319700578e-06, "loss": 2.0041, "step": 26914 }, { "epoch": 0.8683816249286667, "grad_norm": 0.357421875, "learning_rate": 1.352728133657959e-06, "loss": 1.9979, "step": 26915 }, { "epoch": 0.868413888782463, "grad_norm": 0.36328125, "learning_rate": 1.3520750978347185e-06, "loss": 2.0004, "step": 26916 }, { "epoch": 0.8684461526362593, "grad_norm": 0.3515625, "learning_rate": 1.351422212238041e-06, "loss": 2.0001, "step": 26917 }, { "epoch": 0.8684784164900556, "grad_norm": 0.361328125, "learning_rate": 1.3507694768751206e-06, "loss": 2.0032, "step": 26918 }, { "epoch": 0.868510680343852, "grad_norm": 0.34765625, "learning_rate": 1.3501168917531366e-06, "loss": 1.9801, "step": 26919 }, { "epoch": 0.8685429441976483, "grad_norm": 0.3515625, "learning_rate": 1.3494644568792714e-06, "loss": 1.9831, "step": 26920 }, { "epoch": 0.8685752080514447, "grad_norm": 0.359375, "learning_rate": 1.348812172260711e-06, "loss": 2.0, "step": 26921 }, { "epoch": 0.868607471905241, "grad_norm": 0.33984375, "learning_rate": 1.3481600379046334e-06, "loss": 1.954, "step": 26922 }, { "epoch": 0.8686397357590374, "grad_norm": 0.357421875, "learning_rate": 1.3475080538182155e-06, "loss": 1.9619, "step": 26923 }, { "epoch": 0.8686719996128337, "grad_norm": 0.3515625, "learning_rate": 1.346856220008632e-06, "loss": 1.9729, "step": 26924 }, { "epoch": 0.8687042634666301, "grad_norm": 0.34375, "learning_rate": 1.3462045364830638e-06, "loss": 1.9972, "step": 26925 }, { "epoch": 0.8687365273204265, "grad_norm": 0.35546875, "learning_rate": 1.3455530032486818e-06, "loss": 1.9827, "step": 26926 }, { "epoch": 0.8687687911742228, "grad_norm": 0.3515625, "learning_rate": 1.344901620312652e-06, "loss": 1.9953, "step": 26927 }, { "epoch": 0.8688010550280192, "grad_norm": 0.34375, "learning_rate": 1.3442503876821532e-06, "loss": 1.9919, "step": 26928 }, { "epoch": 0.8688333188818155, "grad_norm": 0.353515625, "learning_rate": 1.3435993053643485e-06, "loss": 2.0071, "step": 26929 }, { "epoch": 0.8688655827356119, "grad_norm": 0.349609375, "learning_rate": 1.342948373366404e-06, "loss": 1.9968, "step": 26930 }, { "epoch": 0.8688978465894082, "grad_norm": 0.341796875, "learning_rate": 1.3422975916954883e-06, "loss": 1.9813, "step": 26931 }, { "epoch": 0.8689301104432046, "grad_norm": 0.3515625, "learning_rate": 1.3416469603587628e-06, "loss": 2.015, "step": 26932 }, { "epoch": 0.8689623742970009, "grad_norm": 0.34765625, "learning_rate": 1.3409964793633867e-06, "loss": 1.9939, "step": 26933 }, { "epoch": 0.8689946381507972, "grad_norm": 0.353515625, "learning_rate": 1.3403461487165242e-06, "loss": 1.9836, "step": 26934 }, { "epoch": 0.8690269020045935, "grad_norm": 0.353515625, "learning_rate": 1.339695968425333e-06, "loss": 1.99, "step": 26935 }, { "epoch": 0.8690591658583899, "grad_norm": 0.34375, "learning_rate": 1.3390459384969655e-06, "loss": 1.9857, "step": 26936 }, { "epoch": 0.8690914297121862, "grad_norm": 0.357421875, "learning_rate": 1.3383960589385829e-06, "loss": 1.9636, "step": 26937 }, { "epoch": 0.8691236935659826, "grad_norm": 0.34375, "learning_rate": 1.3377463297573344e-06, "loss": 1.9874, "step": 26938 }, { "epoch": 0.8691559574197789, "grad_norm": 0.3515625, "learning_rate": 1.3370967509603727e-06, "loss": 2.0093, "step": 26939 }, { "epoch": 0.8691882212735753, "grad_norm": 0.34765625, "learning_rate": 1.3364473225548485e-06, "loss": 1.9995, "step": 26940 }, { "epoch": 0.8692204851273716, "grad_norm": 0.349609375, "learning_rate": 1.3357980445479112e-06, "loss": 2.0101, "step": 26941 }, { "epoch": 0.869252748981168, "grad_norm": 0.345703125, "learning_rate": 1.3351489169467034e-06, "loss": 1.9986, "step": 26942 }, { "epoch": 0.8692850128349643, "grad_norm": 0.345703125, "learning_rate": 1.3344999397583763e-06, "loss": 1.975, "step": 26943 }, { "epoch": 0.8693172766887607, "grad_norm": 0.353515625, "learning_rate": 1.3338511129900705e-06, "loss": 1.9603, "step": 26944 }, { "epoch": 0.8693495405425571, "grad_norm": 0.3515625, "learning_rate": 1.3332024366489254e-06, "loss": 1.9804, "step": 26945 }, { "epoch": 0.8693818043963534, "grad_norm": 0.345703125, "learning_rate": 1.3325539107420837e-06, "loss": 1.991, "step": 26946 }, { "epoch": 0.8694140682501498, "grad_norm": 0.345703125, "learning_rate": 1.3319055352766862e-06, "loss": 2.0193, "step": 26947 }, { "epoch": 0.8694463321039461, "grad_norm": 0.341796875, "learning_rate": 1.3312573102598625e-06, "loss": 1.9707, "step": 26948 }, { "epoch": 0.8694785959577425, "grad_norm": 0.34375, "learning_rate": 1.3306092356987564e-06, "loss": 2.0067, "step": 26949 }, { "epoch": 0.8695108598115387, "grad_norm": 0.341796875, "learning_rate": 1.329961311600496e-06, "loss": 1.9988, "step": 26950 }, { "epoch": 0.8695431236653351, "grad_norm": 0.357421875, "learning_rate": 1.3293135379722138e-06, "loss": 1.9667, "step": 26951 }, { "epoch": 0.8695753875191314, "grad_norm": 0.3515625, "learning_rate": 1.3286659148210423e-06, "loss": 1.9989, "step": 26952 }, { "epoch": 0.8696076513729278, "grad_norm": 0.34375, "learning_rate": 1.3280184421541092e-06, "loss": 2.0206, "step": 26953 }, { "epoch": 0.8696399152267241, "grad_norm": 0.349609375, "learning_rate": 1.3273711199785405e-06, "loss": 1.996, "step": 26954 }, { "epoch": 0.8696721790805205, "grad_norm": 0.353515625, "learning_rate": 1.3267239483014588e-06, "loss": 1.9914, "step": 26955 }, { "epoch": 0.8697044429343168, "grad_norm": 0.34375, "learning_rate": 1.326076927129995e-06, "loss": 1.9856, "step": 26956 }, { "epoch": 0.8697367067881132, "grad_norm": 0.341796875, "learning_rate": 1.3254300564712652e-06, "loss": 2.0177, "step": 26957 }, { "epoch": 0.8697689706419095, "grad_norm": 0.341796875, "learning_rate": 1.3247833363323886e-06, "loss": 1.9709, "step": 26958 }, { "epoch": 0.8698012344957059, "grad_norm": 0.337890625, "learning_rate": 1.3241367667204895e-06, "loss": 1.9993, "step": 26959 }, { "epoch": 0.8698334983495022, "grad_norm": 0.34765625, "learning_rate": 1.3234903476426824e-06, "loss": 1.988, "step": 26960 }, { "epoch": 0.8698657622032986, "grad_norm": 0.345703125, "learning_rate": 1.3228440791060782e-06, "loss": 1.9987, "step": 26961 }, { "epoch": 0.8698980260570949, "grad_norm": 0.35546875, "learning_rate": 1.3221979611177976e-06, "loss": 1.9895, "step": 26962 }, { "epoch": 0.8699302899108913, "grad_norm": 0.34375, "learning_rate": 1.3215519936849502e-06, "loss": 2.0247, "step": 26963 }, { "epoch": 0.8699625537646876, "grad_norm": 0.33984375, "learning_rate": 1.3209061768146419e-06, "loss": 2.0215, "step": 26964 }, { "epoch": 0.869994817618484, "grad_norm": 0.345703125, "learning_rate": 1.3202605105139887e-06, "loss": 1.9589, "step": 26965 }, { "epoch": 0.8700270814722804, "grad_norm": 0.404296875, "learning_rate": 1.3196149947900933e-06, "loss": 2.0035, "step": 26966 }, { "epoch": 0.8700593453260766, "grad_norm": 0.33984375, "learning_rate": 1.318969629650058e-06, "loss": 1.9913, "step": 26967 }, { "epoch": 0.870091609179873, "grad_norm": 0.34375, "learning_rate": 1.3183244151009944e-06, "loss": 2.0229, "step": 26968 }, { "epoch": 0.8701238730336693, "grad_norm": 0.34375, "learning_rate": 1.3176793511500013e-06, "loss": 1.9799, "step": 26969 }, { "epoch": 0.8701561368874657, "grad_norm": 0.345703125, "learning_rate": 1.3170344378041732e-06, "loss": 1.9792, "step": 26970 }, { "epoch": 0.870188400741262, "grad_norm": 0.349609375, "learning_rate": 1.3163896750706177e-06, "loss": 2.0039, "step": 26971 }, { "epoch": 0.8702206645950584, "grad_norm": 0.341796875, "learning_rate": 1.3157450629564293e-06, "loss": 2.0152, "step": 26972 }, { "epoch": 0.8702529284488547, "grad_norm": 0.3515625, "learning_rate": 1.3151006014686973e-06, "loss": 1.9686, "step": 26973 }, { "epoch": 0.8702851923026511, "grad_norm": 0.34765625, "learning_rate": 1.314456290614524e-06, "loss": 1.9674, "step": 26974 }, { "epoch": 0.8703174561564474, "grad_norm": 0.6640625, "learning_rate": 1.3138121304009991e-06, "loss": 1.8426, "step": 26975 }, { "epoch": 0.8703497200102438, "grad_norm": 0.66796875, "learning_rate": 1.3131681208352087e-06, "loss": 1.8621, "step": 26976 }, { "epoch": 0.8703819838640401, "grad_norm": 0.67578125, "learning_rate": 1.3125242619242467e-06, "loss": 1.8598, "step": 26977 }, { "epoch": 0.8704142477178365, "grad_norm": 0.6484375, "learning_rate": 1.3118805536751977e-06, "loss": 1.8655, "step": 26978 }, { "epoch": 0.8704465115716328, "grad_norm": 0.64453125, "learning_rate": 1.311236996095146e-06, "loss": 1.8502, "step": 26979 }, { "epoch": 0.8704787754254292, "grad_norm": 0.6328125, "learning_rate": 1.310593589191179e-06, "loss": 1.8511, "step": 26980 }, { "epoch": 0.8705110392792255, "grad_norm": 0.50390625, "learning_rate": 1.3099503329703783e-06, "loss": 1.8759, "step": 26981 }, { "epoch": 0.8705433031330219, "grad_norm": 0.462890625, "learning_rate": 1.3093072274398193e-06, "loss": 1.9858, "step": 26982 }, { "epoch": 0.8705755669868182, "grad_norm": 0.412109375, "learning_rate": 1.3086642726065883e-06, "loss": 1.9385, "step": 26983 }, { "epoch": 0.8706078308406145, "grad_norm": 0.419921875, "learning_rate": 1.308021468477758e-06, "loss": 1.9616, "step": 26984 }, { "epoch": 0.8706400946944108, "grad_norm": 0.416015625, "learning_rate": 1.307378815060406e-06, "loss": 1.9684, "step": 26985 }, { "epoch": 0.8706723585482072, "grad_norm": 0.44140625, "learning_rate": 1.3067363123616017e-06, "loss": 1.9821, "step": 26986 }, { "epoch": 0.8707046224020036, "grad_norm": 0.427734375, "learning_rate": 1.3060939603884226e-06, "loss": 1.9196, "step": 26987 }, { "epoch": 0.8707368862557999, "grad_norm": 0.43359375, "learning_rate": 1.3054517591479382e-06, "loss": 1.9619, "step": 26988 }, { "epoch": 0.8707691501095963, "grad_norm": 0.4296875, "learning_rate": 1.3048097086472126e-06, "loss": 1.9269, "step": 26989 }, { "epoch": 0.8708014139633926, "grad_norm": 0.421875, "learning_rate": 1.3041678088933206e-06, "loss": 1.9603, "step": 26990 }, { "epoch": 0.870833677817189, "grad_norm": 0.419921875, "learning_rate": 1.303526059893323e-06, "loss": 1.9499, "step": 26991 }, { "epoch": 0.8708659416709853, "grad_norm": 0.419921875, "learning_rate": 1.3028844616542822e-06, "loss": 1.9307, "step": 26992 }, { "epoch": 0.8708982055247817, "grad_norm": 0.421875, "learning_rate": 1.3022430141832663e-06, "loss": 1.9212, "step": 26993 }, { "epoch": 0.870930469378578, "grad_norm": 0.431640625, "learning_rate": 1.301601717487333e-06, "loss": 1.973, "step": 26994 }, { "epoch": 0.8709627332323744, "grad_norm": 0.412109375, "learning_rate": 1.3009605715735378e-06, "loss": 1.9609, "step": 26995 }, { "epoch": 0.8709949970861707, "grad_norm": 0.40625, "learning_rate": 1.3003195764489422e-06, "loss": 1.9413, "step": 26996 }, { "epoch": 0.8710272609399671, "grad_norm": 0.40625, "learning_rate": 1.2996787321206022e-06, "loss": 1.9685, "step": 26997 }, { "epoch": 0.8710595247937634, "grad_norm": 0.416015625, "learning_rate": 1.299038038595567e-06, "loss": 1.9731, "step": 26998 }, { "epoch": 0.8710917886475598, "grad_norm": 0.392578125, "learning_rate": 1.2983974958808942e-06, "loss": 1.9634, "step": 26999 }, { "epoch": 0.871124052501356, "grad_norm": 0.3984375, "learning_rate": 1.2977571039836333e-06, "loss": 1.9699, "step": 27000 }, { "epoch": 0.8711563163551524, "grad_norm": 0.41015625, "learning_rate": 1.2971168629108287e-06, "loss": 1.944, "step": 27001 }, { "epoch": 0.8711885802089487, "grad_norm": 0.390625, "learning_rate": 1.2964767726695349e-06, "loss": 1.9804, "step": 27002 }, { "epoch": 0.8712208440627451, "grad_norm": 0.384765625, "learning_rate": 1.2958368332667925e-06, "loss": 1.947, "step": 27003 }, { "epoch": 0.8712531079165414, "grad_norm": 0.388671875, "learning_rate": 1.295197044709646e-06, "loss": 1.9649, "step": 27004 }, { "epoch": 0.8712853717703378, "grad_norm": 0.37890625, "learning_rate": 1.2945574070051403e-06, "loss": 1.9519, "step": 27005 }, { "epoch": 0.8713176356241342, "grad_norm": 0.392578125, "learning_rate": 1.2939179201603141e-06, "loss": 1.9582, "step": 27006 }, { "epoch": 0.8713498994779305, "grad_norm": 0.37890625, "learning_rate": 1.2932785841822053e-06, "loss": 1.9522, "step": 27007 }, { "epoch": 0.8713821633317269, "grad_norm": 0.37109375, "learning_rate": 1.2926393990778533e-06, "loss": 1.959, "step": 27008 }, { "epoch": 0.8714144271855232, "grad_norm": 0.380859375, "learning_rate": 1.2920003648542945e-06, "loss": 1.9301, "step": 27009 }, { "epoch": 0.8714466910393196, "grad_norm": 0.37890625, "learning_rate": 1.2913614815185593e-06, "loss": 1.9292, "step": 27010 }, { "epoch": 0.8714789548931159, "grad_norm": 0.365234375, "learning_rate": 1.290722749077684e-06, "loss": 1.9584, "step": 27011 }, { "epoch": 0.8715112187469123, "grad_norm": 0.376953125, "learning_rate": 1.2900841675386981e-06, "loss": 1.9499, "step": 27012 }, { "epoch": 0.8715434826007086, "grad_norm": 0.373046875, "learning_rate": 1.289445736908631e-06, "loss": 1.9694, "step": 27013 }, { "epoch": 0.871575746454505, "grad_norm": 0.37109375, "learning_rate": 1.2888074571945051e-06, "loss": 1.9561, "step": 27014 }, { "epoch": 0.8716080103083013, "grad_norm": 0.369140625, "learning_rate": 1.2881693284033553e-06, "loss": 1.9454, "step": 27015 }, { "epoch": 0.8716402741620977, "grad_norm": 0.3671875, "learning_rate": 1.2875313505421987e-06, "loss": 1.9397, "step": 27016 }, { "epoch": 0.871672538015894, "grad_norm": 0.3671875, "learning_rate": 1.2868935236180584e-06, "loss": 1.9514, "step": 27017 }, { "epoch": 0.8717048018696903, "grad_norm": 0.369140625, "learning_rate": 1.2862558476379572e-06, "loss": 1.9634, "step": 27018 }, { "epoch": 0.8717370657234866, "grad_norm": 0.361328125, "learning_rate": 1.2856183226089157e-06, "loss": 1.9569, "step": 27019 }, { "epoch": 0.871769329577283, "grad_norm": 0.365234375, "learning_rate": 1.2849809485379454e-06, "loss": 1.9063, "step": 27020 }, { "epoch": 0.8718015934310793, "grad_norm": 0.37109375, "learning_rate": 1.2843437254320673e-06, "loss": 1.9169, "step": 27021 }, { "epoch": 0.8718338572848757, "grad_norm": 0.359375, "learning_rate": 1.2837066532982956e-06, "loss": 1.9671, "step": 27022 }, { "epoch": 0.871866121138672, "grad_norm": 0.3671875, "learning_rate": 1.2830697321436379e-06, "loss": 1.9606, "step": 27023 }, { "epoch": 0.8718983849924684, "grad_norm": 0.359375, "learning_rate": 1.2824329619751107e-06, "loss": 1.9666, "step": 27024 }, { "epoch": 0.8719306488462647, "grad_norm": 0.361328125, "learning_rate": 1.2817963427997198e-06, "loss": 1.9356, "step": 27025 }, { "epoch": 0.8719629127000611, "grad_norm": 0.361328125, "learning_rate": 1.2811598746244713e-06, "loss": 1.9562, "step": 27026 }, { "epoch": 0.8719951765538575, "grad_norm": 0.361328125, "learning_rate": 1.2805235574563761e-06, "loss": 1.9533, "step": 27027 }, { "epoch": 0.8720274404076538, "grad_norm": 0.365234375, "learning_rate": 1.2798873913024339e-06, "loss": 1.9545, "step": 27028 }, { "epoch": 0.8720597042614502, "grad_norm": 0.3671875, "learning_rate": 1.2792513761696473e-06, "loss": 1.9491, "step": 27029 }, { "epoch": 0.8720919681152465, "grad_norm": 0.36328125, "learning_rate": 1.2786155120650207e-06, "loss": 1.9081, "step": 27030 }, { "epoch": 0.8721242319690429, "grad_norm": 0.36328125, "learning_rate": 1.2779797989955499e-06, "loss": 1.9663, "step": 27031 }, { "epoch": 0.8721564958228392, "grad_norm": 0.361328125, "learning_rate": 1.2773442369682315e-06, "loss": 1.9354, "step": 27032 }, { "epoch": 0.8721887596766356, "grad_norm": 0.359375, "learning_rate": 1.276708825990066e-06, "loss": 1.9383, "step": 27033 }, { "epoch": 0.8722210235304318, "grad_norm": 0.357421875, "learning_rate": 1.276073566068045e-06, "loss": 1.9474, "step": 27034 }, { "epoch": 0.8722532873842282, "grad_norm": 0.392578125, "learning_rate": 1.2754384572091576e-06, "loss": 2.0583, "step": 27035 }, { "epoch": 0.8722855512380245, "grad_norm": 0.3984375, "learning_rate": 1.2748034994204e-06, "loss": 2.044, "step": 27036 }, { "epoch": 0.8723178150918209, "grad_norm": 0.396484375, "learning_rate": 1.2741686927087598e-06, "loss": 2.0691, "step": 27037 }, { "epoch": 0.8723500789456172, "grad_norm": 0.412109375, "learning_rate": 1.27353403708122e-06, "loss": 2.0672, "step": 27038 }, { "epoch": 0.8723823427994136, "grad_norm": 0.400390625, "learning_rate": 1.272899532544773e-06, "loss": 2.0379, "step": 27039 }, { "epoch": 0.8724146066532099, "grad_norm": 0.404296875, "learning_rate": 1.2722651791064016e-06, "loss": 2.0433, "step": 27040 }, { "epoch": 0.8724468705070063, "grad_norm": 0.384765625, "learning_rate": 1.271630976773082e-06, "loss": 2.059, "step": 27041 }, { "epoch": 0.8724791343608026, "grad_norm": 0.37109375, "learning_rate": 1.270996925551804e-06, "loss": 2.0429, "step": 27042 }, { "epoch": 0.872511398214599, "grad_norm": 0.361328125, "learning_rate": 1.270363025449543e-06, "loss": 1.9851, "step": 27043 }, { "epoch": 0.8725436620683953, "grad_norm": 0.361328125, "learning_rate": 1.2697292764732754e-06, "loss": 2.0048, "step": 27044 }, { "epoch": 0.8725759259221917, "grad_norm": 0.3515625, "learning_rate": 1.2690956786299757e-06, "loss": 2.0103, "step": 27045 }, { "epoch": 0.872608189775988, "grad_norm": 0.357421875, "learning_rate": 1.2684622319266232e-06, "loss": 1.974, "step": 27046 }, { "epoch": 0.8726404536297844, "grad_norm": 0.361328125, "learning_rate": 1.2678289363701857e-06, "loss": 1.9362, "step": 27047 }, { "epoch": 0.8726727174835808, "grad_norm": 0.353515625, "learning_rate": 1.2671957919676358e-06, "loss": 2.031, "step": 27048 }, { "epoch": 0.8727049813373771, "grad_norm": 0.359375, "learning_rate": 1.2665627987259433e-06, "loss": 2.0116, "step": 27049 }, { "epoch": 0.8727372451911735, "grad_norm": 0.349609375, "learning_rate": 1.2659299566520755e-06, "loss": 1.9774, "step": 27050 }, { "epoch": 0.8727695090449697, "grad_norm": 0.35546875, "learning_rate": 1.2652972657529972e-06, "loss": 1.9801, "step": 27051 }, { "epoch": 0.8728017728987661, "grad_norm": 0.3515625, "learning_rate": 1.2646647260356742e-06, "loss": 2.0206, "step": 27052 }, { "epoch": 0.8728340367525624, "grad_norm": 0.353515625, "learning_rate": 1.2640323375070678e-06, "loss": 1.9795, "step": 27053 }, { "epoch": 0.8728663006063588, "grad_norm": 0.341796875, "learning_rate": 1.2634001001741375e-06, "loss": 2.002, "step": 27054 }, { "epoch": 0.8728985644601551, "grad_norm": 0.349609375, "learning_rate": 1.2627680140438475e-06, "loss": 1.9787, "step": 27055 }, { "epoch": 0.8729308283139515, "grad_norm": 0.34765625, "learning_rate": 1.2621360791231523e-06, "loss": 1.9754, "step": 27056 }, { "epoch": 0.8729630921677478, "grad_norm": 0.349609375, "learning_rate": 1.2615042954190032e-06, "loss": 2.0149, "step": 27057 }, { "epoch": 0.8729953560215442, "grad_norm": 0.34375, "learning_rate": 1.2608726629383626e-06, "loss": 1.9916, "step": 27058 }, { "epoch": 0.8730276198753405, "grad_norm": 0.357421875, "learning_rate": 1.2602411816881804e-06, "loss": 2.0062, "step": 27059 }, { "epoch": 0.8730598837291369, "grad_norm": 0.341796875, "learning_rate": 1.2596098516754023e-06, "loss": 1.9794, "step": 27060 }, { "epoch": 0.8730921475829332, "grad_norm": 0.345703125, "learning_rate": 1.2589786729069847e-06, "loss": 1.9851, "step": 27061 }, { "epoch": 0.8731244114367296, "grad_norm": 0.34375, "learning_rate": 1.2583476453898717e-06, "loss": 1.9871, "step": 27062 }, { "epoch": 0.8731566752905259, "grad_norm": 0.37109375, "learning_rate": 1.2577167691310066e-06, "loss": 2.004, "step": 27063 }, { "epoch": 0.8731889391443223, "grad_norm": 0.349609375, "learning_rate": 1.2570860441373432e-06, "loss": 2.0123, "step": 27064 }, { "epoch": 0.8732212029981186, "grad_norm": 0.34765625, "learning_rate": 1.2564554704158116e-06, "loss": 1.9759, "step": 27065 }, { "epoch": 0.873253466851915, "grad_norm": 0.337890625, "learning_rate": 1.255825047973359e-06, "loss": 1.9887, "step": 27066 }, { "epoch": 0.8732857307057114, "grad_norm": 0.34375, "learning_rate": 1.2551947768169287e-06, "loss": 1.983, "step": 27067 }, { "epoch": 0.8733179945595076, "grad_norm": 0.34375, "learning_rate": 1.2545646569534496e-06, "loss": 1.9811, "step": 27068 }, { "epoch": 0.873350258413304, "grad_norm": 0.345703125, "learning_rate": 1.25393468838986e-06, "loss": 2.0261, "step": 27069 }, { "epoch": 0.8733825222671003, "grad_norm": 0.3359375, "learning_rate": 1.2533048711331008e-06, "loss": 1.9529, "step": 27070 }, { "epoch": 0.8734147861208967, "grad_norm": 0.337890625, "learning_rate": 1.252675205190098e-06, "loss": 2.0159, "step": 27071 }, { "epoch": 0.873447049974693, "grad_norm": 0.33984375, "learning_rate": 1.2520456905677829e-06, "loss": 1.9762, "step": 27072 }, { "epoch": 0.8734793138284894, "grad_norm": 0.349609375, "learning_rate": 1.2514163272730883e-06, "loss": 1.9847, "step": 27073 }, { "epoch": 0.8735115776822857, "grad_norm": 0.33984375, "learning_rate": 1.2507871153129402e-06, "loss": 1.9703, "step": 27074 }, { "epoch": 0.8735438415360821, "grad_norm": 0.33984375, "learning_rate": 1.2501580546942631e-06, "loss": 1.9931, "step": 27075 }, { "epoch": 0.8735761053898784, "grad_norm": 0.337890625, "learning_rate": 1.2495291454239781e-06, "loss": 1.9814, "step": 27076 }, { "epoch": 0.8736083692436748, "grad_norm": 0.34765625, "learning_rate": 1.2489003875090167e-06, "loss": 2.0214, "step": 27077 }, { "epoch": 0.8736406330974711, "grad_norm": 0.337890625, "learning_rate": 1.2482717809562944e-06, "loss": 1.984, "step": 27078 }, { "epoch": 0.8736728969512675, "grad_norm": 0.345703125, "learning_rate": 1.2476433257727277e-06, "loss": 1.9715, "step": 27079 }, { "epoch": 0.8737051608050638, "grad_norm": 0.34765625, "learning_rate": 1.2470150219652394e-06, "loss": 1.9992, "step": 27080 }, { "epoch": 0.8737374246588602, "grad_norm": 0.333984375, "learning_rate": 1.2463868695407437e-06, "loss": 2.0012, "step": 27081 }, { "epoch": 0.8737696885126565, "grad_norm": 0.337890625, "learning_rate": 1.2457588685061522e-06, "loss": 1.9966, "step": 27082 }, { "epoch": 0.8738019523664529, "grad_norm": 0.341796875, "learning_rate": 1.2451310188683822e-06, "loss": 1.9789, "step": 27083 }, { "epoch": 0.8738342162202491, "grad_norm": 0.33984375, "learning_rate": 1.2445033206343437e-06, "loss": 1.9924, "step": 27084 }, { "epoch": 0.8738664800740455, "grad_norm": 0.349609375, "learning_rate": 1.2438757738109408e-06, "loss": 2.0196, "step": 27085 }, { "epoch": 0.8738987439278418, "grad_norm": 0.349609375, "learning_rate": 1.2432483784050863e-06, "loss": 1.9998, "step": 27086 }, { "epoch": 0.8739310077816382, "grad_norm": 0.34375, "learning_rate": 1.2426211344236865e-06, "loss": 1.9877, "step": 27087 }, { "epoch": 0.8739632716354346, "grad_norm": 0.33984375, "learning_rate": 1.2419940418736391e-06, "loss": 2.001, "step": 27088 }, { "epoch": 0.8739955354892309, "grad_norm": 0.33984375, "learning_rate": 1.2413671007618555e-06, "loss": 2.0176, "step": 27089 }, { "epoch": 0.8740277993430273, "grad_norm": 0.3515625, "learning_rate": 1.240740311095233e-06, "loss": 2.0129, "step": 27090 }, { "epoch": 0.8740600631968236, "grad_norm": 0.333984375, "learning_rate": 1.2401136728806668e-06, "loss": 1.9951, "step": 27091 }, { "epoch": 0.87409232705062, "grad_norm": 0.333984375, "learning_rate": 1.2394871861250607e-06, "loss": 1.9934, "step": 27092 }, { "epoch": 0.8741245909044163, "grad_norm": 0.341796875, "learning_rate": 1.2388608508353077e-06, "loss": 2.0267, "step": 27093 }, { "epoch": 0.8741568547582127, "grad_norm": 0.337890625, "learning_rate": 1.2382346670182992e-06, "loss": 1.9745, "step": 27094 }, { "epoch": 0.874189118612009, "grad_norm": 0.33984375, "learning_rate": 1.2376086346809361e-06, "loss": 2.0049, "step": 27095 }, { "epoch": 0.8742213824658054, "grad_norm": 0.33984375, "learning_rate": 1.2369827538301e-06, "loss": 1.9842, "step": 27096 }, { "epoch": 0.8742536463196017, "grad_norm": 0.3359375, "learning_rate": 1.236357024472683e-06, "loss": 1.9796, "step": 27097 }, { "epoch": 0.8742859101733981, "grad_norm": 0.345703125, "learning_rate": 1.23573144661558e-06, "loss": 1.9966, "step": 27098 }, { "epoch": 0.8743181740271944, "grad_norm": 0.33984375, "learning_rate": 1.2351060202656656e-06, "loss": 1.9835, "step": 27099 }, { "epoch": 0.8743504378809908, "grad_norm": 0.35546875, "learning_rate": 1.2344807454298274e-06, "loss": 2.0335, "step": 27100 }, { "epoch": 0.874382701734787, "grad_norm": 0.33984375, "learning_rate": 1.2338556221149567e-06, "loss": 1.9664, "step": 27101 }, { "epoch": 0.8744149655885834, "grad_norm": 0.341796875, "learning_rate": 1.2332306503279228e-06, "loss": 1.9889, "step": 27102 }, { "epoch": 0.8744472294423797, "grad_norm": 0.341796875, "learning_rate": 1.2326058300756087e-06, "loss": 1.9988, "step": 27103 }, { "epoch": 0.8744794932961761, "grad_norm": 0.357421875, "learning_rate": 1.2319811613648956e-06, "loss": 1.9511, "step": 27104 }, { "epoch": 0.8745117571499724, "grad_norm": 0.337890625, "learning_rate": 1.2313566442026563e-06, "loss": 1.9905, "step": 27105 }, { "epoch": 0.8745440210037688, "grad_norm": 0.3359375, "learning_rate": 1.2307322785957669e-06, "loss": 1.9752, "step": 27106 }, { "epoch": 0.8745762848575652, "grad_norm": 0.341796875, "learning_rate": 1.2301080645510954e-06, "loss": 1.9993, "step": 27107 }, { "epoch": 0.8746085487113615, "grad_norm": 0.3515625, "learning_rate": 1.2294840020755177e-06, "loss": 1.9985, "step": 27108 }, { "epoch": 0.8746408125651579, "grad_norm": 0.345703125, "learning_rate": 1.2288600911759018e-06, "loss": 2.0109, "step": 27109 }, { "epoch": 0.8746730764189542, "grad_norm": 0.345703125, "learning_rate": 1.2282363318591122e-06, "loss": 2.0003, "step": 27110 }, { "epoch": 0.8747053402727506, "grad_norm": 0.337890625, "learning_rate": 1.2276127241320184e-06, "loss": 1.9957, "step": 27111 }, { "epoch": 0.8747376041265469, "grad_norm": 0.33984375, "learning_rate": 1.226989268001485e-06, "loss": 2.0227, "step": 27112 }, { "epoch": 0.8747698679803433, "grad_norm": 0.341796875, "learning_rate": 1.2263659634743695e-06, "loss": 1.9966, "step": 27113 }, { "epoch": 0.8748021318341396, "grad_norm": 0.3359375, "learning_rate": 1.2257428105575402e-06, "loss": 1.9573, "step": 27114 }, { "epoch": 0.874834395687936, "grad_norm": 0.337890625, "learning_rate": 1.2251198092578513e-06, "loss": 1.9664, "step": 27115 }, { "epoch": 0.8748666595417323, "grad_norm": 0.333984375, "learning_rate": 1.2244969595821591e-06, "loss": 1.9932, "step": 27116 }, { "epoch": 0.8748989233955287, "grad_norm": 0.337890625, "learning_rate": 1.223874261537325e-06, "loss": 1.9848, "step": 27117 }, { "epoch": 0.874931187249325, "grad_norm": 0.33984375, "learning_rate": 1.2232517151301996e-06, "loss": 1.9607, "step": 27118 }, { "epoch": 0.8749634511031213, "grad_norm": 0.341796875, "learning_rate": 1.222629320367633e-06, "loss": 1.9779, "step": 27119 }, { "epoch": 0.8749957149569176, "grad_norm": 0.341796875, "learning_rate": 1.2220070772564812e-06, "loss": 1.9706, "step": 27120 }, { "epoch": 0.875027978810714, "grad_norm": 0.333984375, "learning_rate": 1.2213849858035936e-06, "loss": 1.9783, "step": 27121 }, { "epoch": 0.8750602426645103, "grad_norm": 0.337890625, "learning_rate": 1.2207630460158099e-06, "loss": 1.9775, "step": 27122 }, { "epoch": 0.8750925065183067, "grad_norm": 0.333984375, "learning_rate": 1.220141257899986e-06, "loss": 2.0265, "step": 27123 }, { "epoch": 0.875124770372103, "grad_norm": 0.337890625, "learning_rate": 1.21951962146296e-06, "loss": 1.9828, "step": 27124 }, { "epoch": 0.8751570342258994, "grad_norm": 0.3359375, "learning_rate": 1.2188981367115747e-06, "loss": 2.0083, "step": 27125 }, { "epoch": 0.8751892980796957, "grad_norm": 0.33984375, "learning_rate": 1.2182768036526764e-06, "loss": 1.9784, "step": 27126 }, { "epoch": 0.8752215619334921, "grad_norm": 0.349609375, "learning_rate": 1.2176556222930945e-06, "loss": 1.9924, "step": 27127 }, { "epoch": 0.8752538257872885, "grad_norm": 0.333984375, "learning_rate": 1.2170345926396736e-06, "loss": 2.02, "step": 27128 }, { "epoch": 0.8752860896410848, "grad_norm": 0.3359375, "learning_rate": 1.2164137146992531e-06, "loss": 1.9896, "step": 27129 }, { "epoch": 0.8753183534948812, "grad_norm": 0.333984375, "learning_rate": 1.2157929884786562e-06, "loss": 1.9808, "step": 27130 }, { "epoch": 0.8753506173486775, "grad_norm": 0.337890625, "learning_rate": 1.2151724139847203e-06, "loss": 1.9966, "step": 27131 }, { "epoch": 0.8753828812024739, "grad_norm": 0.337890625, "learning_rate": 1.2145519912242835e-06, "loss": 2.0223, "step": 27132 }, { "epoch": 0.8754151450562702, "grad_norm": 0.34375, "learning_rate": 1.213931720204164e-06, "loss": 2.0039, "step": 27133 }, { "epoch": 0.8754474089100666, "grad_norm": 0.33984375, "learning_rate": 1.213311600931194e-06, "loss": 1.9952, "step": 27134 }, { "epoch": 0.8754796727638628, "grad_norm": 0.34375, "learning_rate": 1.212691633412205e-06, "loss": 1.967, "step": 27135 }, { "epoch": 0.8755119366176592, "grad_norm": 0.337890625, "learning_rate": 1.2120718176540085e-06, "loss": 2.0095, "step": 27136 }, { "epoch": 0.8755442004714555, "grad_norm": 0.333984375, "learning_rate": 1.2114521536634387e-06, "loss": 2.0015, "step": 27137 }, { "epoch": 0.8755764643252519, "grad_norm": 0.3359375, "learning_rate": 1.210832641447307e-06, "loss": 1.9869, "step": 27138 }, { "epoch": 0.8756087281790482, "grad_norm": 0.341796875, "learning_rate": 1.210213281012441e-06, "loss": 1.9817, "step": 27139 }, { "epoch": 0.8756409920328446, "grad_norm": 0.33984375, "learning_rate": 1.2095940723656556e-06, "loss": 2.0118, "step": 27140 }, { "epoch": 0.8756732558866409, "grad_norm": 0.345703125, "learning_rate": 1.2089750155137618e-06, "loss": 1.9687, "step": 27141 }, { "epoch": 0.8757055197404373, "grad_norm": 0.34765625, "learning_rate": 1.2083561104635792e-06, "loss": 1.9949, "step": 27142 }, { "epoch": 0.8757377835942336, "grad_norm": 0.333984375, "learning_rate": 1.2077373572219192e-06, "loss": 1.974, "step": 27143 }, { "epoch": 0.87577004744803, "grad_norm": 0.3359375, "learning_rate": 1.2071187557955893e-06, "loss": 1.9506, "step": 27144 }, { "epoch": 0.8758023113018263, "grad_norm": 0.345703125, "learning_rate": 1.2065003061914043e-06, "loss": 1.985, "step": 27145 }, { "epoch": 0.8758345751556227, "grad_norm": 0.458984375, "learning_rate": 1.2058820084161671e-06, "loss": 1.9983, "step": 27146 }, { "epoch": 0.875866839009419, "grad_norm": 0.333984375, "learning_rate": 1.2052638624766838e-06, "loss": 1.9569, "step": 27147 }, { "epoch": 0.8758991028632154, "grad_norm": 0.34375, "learning_rate": 1.2046458683797607e-06, "loss": 1.9882, "step": 27148 }, { "epoch": 0.8759313667170118, "grad_norm": 0.359375, "learning_rate": 1.2040280261322007e-06, "loss": 1.9736, "step": 27149 }, { "epoch": 0.8759636305708081, "grad_norm": 0.33984375, "learning_rate": 1.2034103357407982e-06, "loss": 2.0108, "step": 27150 }, { "epoch": 0.8759958944246045, "grad_norm": 0.4921875, "learning_rate": 1.2027927972123615e-06, "loss": 1.9628, "step": 27151 }, { "epoch": 0.8760281582784007, "grad_norm": 0.77734375, "learning_rate": 1.202175410553683e-06, "loss": 1.9489, "step": 27152 }, { "epoch": 0.8760604221321971, "grad_norm": 0.78125, "learning_rate": 1.2015581757715577e-06, "loss": 1.9102, "step": 27153 }, { "epoch": 0.8760926859859934, "grad_norm": 0.765625, "learning_rate": 1.2009410928727815e-06, "loss": 1.9281, "step": 27154 }, { "epoch": 0.8761249498397898, "grad_norm": 0.7734375, "learning_rate": 1.2003241618641475e-06, "loss": 1.9221, "step": 27155 }, { "epoch": 0.8761572136935861, "grad_norm": 0.76171875, "learning_rate": 1.1997073827524435e-06, "loss": 1.9083, "step": 27156 }, { "epoch": 0.8761894775473825, "grad_norm": 0.7421875, "learning_rate": 1.1990907555444642e-06, "loss": 1.9067, "step": 27157 }, { "epoch": 0.8762217414011788, "grad_norm": 0.73828125, "learning_rate": 1.198474280246989e-06, "loss": 1.9285, "step": 27158 }, { "epoch": 0.8762540052549752, "grad_norm": 0.69921875, "learning_rate": 1.1978579568668075e-06, "loss": 1.9004, "step": 27159 }, { "epoch": 0.8762862691087715, "grad_norm": 0.7109375, "learning_rate": 1.1972417854107077e-06, "loss": 1.9374, "step": 27160 }, { "epoch": 0.8763185329625679, "grad_norm": 0.6796875, "learning_rate": 1.1966257658854657e-06, "loss": 1.9074, "step": 27161 }, { "epoch": 0.8763507968163642, "grad_norm": 0.6796875, "learning_rate": 1.196009898297863e-06, "loss": 1.9122, "step": 27162 }, { "epoch": 0.8763830606701606, "grad_norm": 0.66015625, "learning_rate": 1.1953941826546854e-06, "loss": 1.9186, "step": 27163 }, { "epoch": 0.8764153245239569, "grad_norm": 0.65234375, "learning_rate": 1.1947786189627014e-06, "loss": 1.9315, "step": 27164 }, { "epoch": 0.8764475883777533, "grad_norm": 0.63671875, "learning_rate": 1.1941632072286917e-06, "loss": 1.9238, "step": 27165 }, { "epoch": 0.8764798522315496, "grad_norm": 0.625, "learning_rate": 1.1935479474594297e-06, "loss": 1.9029, "step": 27166 }, { "epoch": 0.876512116085346, "grad_norm": 0.625, "learning_rate": 1.1929328396616846e-06, "loss": 1.8725, "step": 27167 }, { "epoch": 0.8765443799391424, "grad_norm": 0.62109375, "learning_rate": 1.1923178838422328e-06, "loss": 1.8831, "step": 27168 }, { "epoch": 0.8765766437929386, "grad_norm": 0.53515625, "learning_rate": 1.191703080007837e-06, "loss": 1.9011, "step": 27169 }, { "epoch": 0.876608907646735, "grad_norm": 0.5, "learning_rate": 1.1910884281652706e-06, "loss": 1.8662, "step": 27170 }, { "epoch": 0.8766411715005313, "grad_norm": 0.49609375, "learning_rate": 1.1904739283212946e-06, "loss": 1.8755, "step": 27171 }, { "epoch": 0.8766734353543277, "grad_norm": 0.48828125, "learning_rate": 1.1898595804826734e-06, "loss": 1.8894, "step": 27172 }, { "epoch": 0.876705699208124, "grad_norm": 0.494140625, "learning_rate": 1.1892453846561734e-06, "loss": 1.8629, "step": 27173 }, { "epoch": 0.8767379630619204, "grad_norm": 0.482421875, "learning_rate": 1.1886313408485526e-06, "loss": 1.8705, "step": 27174 }, { "epoch": 0.8767702269157167, "grad_norm": 0.47265625, "learning_rate": 1.1880174490665674e-06, "loss": 1.8907, "step": 27175 }, { "epoch": 0.8768024907695131, "grad_norm": 0.46875, "learning_rate": 1.1874037093169786e-06, "loss": 1.9129, "step": 27176 }, { "epoch": 0.8768347546233094, "grad_norm": 0.46484375, "learning_rate": 1.186790121606543e-06, "loss": 1.8846, "step": 27177 }, { "epoch": 0.8768670184771058, "grad_norm": 0.4609375, "learning_rate": 1.1861766859420082e-06, "loss": 1.8637, "step": 27178 }, { "epoch": 0.8768992823309021, "grad_norm": 0.466796875, "learning_rate": 1.185563402330132e-06, "loss": 1.8773, "step": 27179 }, { "epoch": 0.8769315461846985, "grad_norm": 0.45703125, "learning_rate": 1.1849502707776644e-06, "loss": 1.8977, "step": 27180 }, { "epoch": 0.8769638100384948, "grad_norm": 0.447265625, "learning_rate": 1.1843372912913513e-06, "loss": 1.8836, "step": 27181 }, { "epoch": 0.8769960738922912, "grad_norm": 0.443359375, "learning_rate": 1.1837244638779443e-06, "loss": 1.8824, "step": 27182 }, { "epoch": 0.8770283377460875, "grad_norm": 0.431640625, "learning_rate": 1.1831117885441862e-06, "loss": 1.8957, "step": 27183 }, { "epoch": 0.8770606015998839, "grad_norm": 0.43359375, "learning_rate": 1.182499265296818e-06, "loss": 1.9069, "step": 27184 }, { "epoch": 0.8770928654536801, "grad_norm": 0.431640625, "learning_rate": 1.1818868941425897e-06, "loss": 1.8873, "step": 27185 }, { "epoch": 0.8771251293074765, "grad_norm": 0.435546875, "learning_rate": 1.1812746750882358e-06, "loss": 1.911, "step": 27186 }, { "epoch": 0.8771573931612728, "grad_norm": 0.43359375, "learning_rate": 1.180662608140496e-06, "loss": 1.8978, "step": 27187 }, { "epoch": 0.8771896570150692, "grad_norm": 0.4375, "learning_rate": 1.180050693306111e-06, "loss": 1.8744, "step": 27188 }, { "epoch": 0.8772219208688656, "grad_norm": 0.423828125, "learning_rate": 1.1794389305918097e-06, "loss": 1.8162, "step": 27189 }, { "epoch": 0.8772541847226619, "grad_norm": 0.419921875, "learning_rate": 1.178827320004331e-06, "loss": 1.8931, "step": 27190 }, { "epoch": 0.8772864485764583, "grad_norm": 0.462890625, "learning_rate": 1.1782158615504097e-06, "loss": 1.8773, "step": 27191 }, { "epoch": 0.8773187124302546, "grad_norm": 0.50390625, "learning_rate": 1.177604555236767e-06, "loss": 1.9015, "step": 27192 }, { "epoch": 0.877350976284051, "grad_norm": 0.5078125, "learning_rate": 1.1769934010701377e-06, "loss": 1.8537, "step": 27193 }, { "epoch": 0.8773832401378473, "grad_norm": 0.51953125, "learning_rate": 1.1763823990572548e-06, "loss": 1.9138, "step": 27194 }, { "epoch": 0.8774155039916437, "grad_norm": 0.50390625, "learning_rate": 1.175771549204831e-06, "loss": 1.9056, "step": 27195 }, { "epoch": 0.87744776784544, "grad_norm": 0.458984375, "learning_rate": 1.1751608515195977e-06, "loss": 1.8982, "step": 27196 }, { "epoch": 0.8774800316992364, "grad_norm": 0.462890625, "learning_rate": 1.174550306008278e-06, "loss": 1.9021, "step": 27197 }, { "epoch": 0.8775122955530327, "grad_norm": 0.455078125, "learning_rate": 1.1739399126775879e-06, "loss": 1.9201, "step": 27198 }, { "epoch": 0.8775445594068291, "grad_norm": 0.453125, "learning_rate": 1.1733296715342489e-06, "loss": 1.8954, "step": 27199 }, { "epoch": 0.8775768232606254, "grad_norm": 0.44921875, "learning_rate": 1.172719582584979e-06, "loss": 1.9097, "step": 27200 }, { "epoch": 0.8776090871144218, "grad_norm": 0.4453125, "learning_rate": 1.1721096458364894e-06, "loss": 1.8909, "step": 27201 }, { "epoch": 0.877641350968218, "grad_norm": 0.447265625, "learning_rate": 1.1714998612954997e-06, "loss": 1.8695, "step": 27202 }, { "epoch": 0.8776736148220144, "grad_norm": 0.439453125, "learning_rate": 1.1708902289687162e-06, "loss": 1.8838, "step": 27203 }, { "epoch": 0.8777058786758107, "grad_norm": 0.4453125, "learning_rate": 1.1702807488628554e-06, "loss": 1.9193, "step": 27204 }, { "epoch": 0.8777381425296071, "grad_norm": 0.43359375, "learning_rate": 1.1696714209846215e-06, "loss": 1.9166, "step": 27205 }, { "epoch": 0.8777704063834034, "grad_norm": 0.431640625, "learning_rate": 1.1690622453407212e-06, "loss": 1.9024, "step": 27206 }, { "epoch": 0.8778026702371998, "grad_norm": 0.42578125, "learning_rate": 1.168453221937864e-06, "loss": 1.8981, "step": 27207 }, { "epoch": 0.8778349340909962, "grad_norm": 0.43359375, "learning_rate": 1.1678443507827497e-06, "loss": 1.9255, "step": 27208 }, { "epoch": 0.8778671979447925, "grad_norm": 0.419921875, "learning_rate": 1.1672356318820808e-06, "loss": 1.9264, "step": 27209 }, { "epoch": 0.8778994617985889, "grad_norm": 0.42578125, "learning_rate": 1.1666270652425593e-06, "loss": 1.8907, "step": 27210 }, { "epoch": 0.8779317256523852, "grad_norm": 0.41796875, "learning_rate": 1.1660186508708842e-06, "loss": 1.9238, "step": 27211 }, { "epoch": 0.8779639895061816, "grad_norm": 0.40625, "learning_rate": 1.1654103887737488e-06, "loss": 1.8736, "step": 27212 }, { "epoch": 0.8779962533599779, "grad_norm": 0.4140625, "learning_rate": 1.1648022789578527e-06, "loss": 1.8538, "step": 27213 }, { "epoch": 0.8780285172137743, "grad_norm": 0.4140625, "learning_rate": 1.164194321429889e-06, "loss": 1.8518, "step": 27214 }, { "epoch": 0.8780607810675706, "grad_norm": 0.423828125, "learning_rate": 1.1635865161965453e-06, "loss": 1.8598, "step": 27215 }, { "epoch": 0.878093044921367, "grad_norm": 0.41796875, "learning_rate": 1.16297886326452e-06, "loss": 1.8589, "step": 27216 }, { "epoch": 0.8781253087751633, "grad_norm": 0.41015625, "learning_rate": 1.162371362640491e-06, "loss": 1.8722, "step": 27217 }, { "epoch": 0.8781575726289597, "grad_norm": 0.41796875, "learning_rate": 1.1617640143311526e-06, "loss": 1.8622, "step": 27218 }, { "epoch": 0.878189836482756, "grad_norm": 0.40625, "learning_rate": 1.1611568183431913e-06, "loss": 1.8794, "step": 27219 }, { "epoch": 0.8782221003365523, "grad_norm": 0.4140625, "learning_rate": 1.1605497746832838e-06, "loss": 1.8885, "step": 27220 }, { "epoch": 0.8782543641903486, "grad_norm": 0.400390625, "learning_rate": 1.1599428833581161e-06, "loss": 1.8737, "step": 27221 }, { "epoch": 0.878286628044145, "grad_norm": 0.404296875, "learning_rate": 1.1593361443743745e-06, "loss": 1.8639, "step": 27222 }, { "epoch": 0.8783188918979413, "grad_norm": 0.40234375, "learning_rate": 1.1587295577387252e-06, "loss": 1.8459, "step": 27223 }, { "epoch": 0.8783511557517377, "grad_norm": 0.39453125, "learning_rate": 1.1581231234578515e-06, "loss": 1.865, "step": 27224 }, { "epoch": 0.878383419605534, "grad_norm": 0.392578125, "learning_rate": 1.157516841538433e-06, "loss": 1.8409, "step": 27225 }, { "epoch": 0.8784156834593304, "grad_norm": 0.3828125, "learning_rate": 1.1569107119871342e-06, "loss": 1.8604, "step": 27226 }, { "epoch": 0.8784479473131267, "grad_norm": 0.380859375, "learning_rate": 1.156304734810633e-06, "loss": 1.8149, "step": 27227 }, { "epoch": 0.8784802111669231, "grad_norm": 0.349609375, "learning_rate": 1.155698910015598e-06, "loss": 1.9367, "step": 27228 }, { "epoch": 0.8785124750207195, "grad_norm": 0.3671875, "learning_rate": 1.1550932376086947e-06, "loss": 2.014, "step": 27229 }, { "epoch": 0.8785447388745158, "grad_norm": 0.365234375, "learning_rate": 1.154487717596595e-06, "loss": 1.9791, "step": 27230 }, { "epoch": 0.8785770027283122, "grad_norm": 0.369140625, "learning_rate": 1.1538823499859636e-06, "loss": 2.0151, "step": 27231 }, { "epoch": 0.8786092665821085, "grad_norm": 0.37109375, "learning_rate": 1.1532771347834581e-06, "loss": 2.0048, "step": 27232 }, { "epoch": 0.8786415304359049, "grad_norm": 0.36328125, "learning_rate": 1.1526720719957467e-06, "loss": 1.9758, "step": 27233 }, { "epoch": 0.8786737942897012, "grad_norm": 0.369140625, "learning_rate": 1.1520671616294892e-06, "loss": 2.0153, "step": 27234 }, { "epoch": 0.8787060581434976, "grad_norm": 0.357421875, "learning_rate": 1.1514624036913369e-06, "loss": 1.9869, "step": 27235 }, { "epoch": 0.8787383219972938, "grad_norm": 0.36328125, "learning_rate": 1.1508577981879558e-06, "loss": 1.9856, "step": 27236 }, { "epoch": 0.8787705858510902, "grad_norm": 0.375, "learning_rate": 1.1502533451259928e-06, "loss": 2.0069, "step": 27237 }, { "epoch": 0.8788028497048865, "grad_norm": 0.361328125, "learning_rate": 1.149649044512109e-06, "loss": 2.0084, "step": 27238 }, { "epoch": 0.8788351135586829, "grad_norm": 0.37890625, "learning_rate": 1.1490448963529503e-06, "loss": 1.9942, "step": 27239 }, { "epoch": 0.8788673774124792, "grad_norm": 0.357421875, "learning_rate": 1.1484409006551688e-06, "loss": 1.9674, "step": 27240 }, { "epoch": 0.8788996412662756, "grad_norm": 0.353515625, "learning_rate": 1.1478370574254137e-06, "loss": 1.9667, "step": 27241 }, { "epoch": 0.8789319051200719, "grad_norm": 0.359375, "learning_rate": 1.1472333666703316e-06, "loss": 2.0064, "step": 27242 }, { "epoch": 0.8789641689738683, "grad_norm": 0.357421875, "learning_rate": 1.1466298283965636e-06, "loss": 2.0129, "step": 27243 }, { "epoch": 0.8789964328276646, "grad_norm": 0.3515625, "learning_rate": 1.146026442610758e-06, "loss": 1.9779, "step": 27244 }, { "epoch": 0.879028696681461, "grad_norm": 0.361328125, "learning_rate": 1.145423209319556e-06, "loss": 1.9869, "step": 27245 }, { "epoch": 0.8790609605352573, "grad_norm": 0.353515625, "learning_rate": 1.1448201285295923e-06, "loss": 1.9767, "step": 27246 }, { "epoch": 0.8790932243890537, "grad_norm": 0.345703125, "learning_rate": 1.1442172002475148e-06, "loss": 1.9819, "step": 27247 }, { "epoch": 0.87912548824285, "grad_norm": 0.353515625, "learning_rate": 1.1436144244799485e-06, "loss": 1.968, "step": 27248 }, { "epoch": 0.8791577520966464, "grad_norm": 0.34375, "learning_rate": 1.1430118012335344e-06, "loss": 1.9835, "step": 27249 }, { "epoch": 0.8791900159504428, "grad_norm": 0.34765625, "learning_rate": 1.1424093305149107e-06, "loss": 1.9969, "step": 27250 }, { "epoch": 0.8792222798042391, "grad_norm": 0.35546875, "learning_rate": 1.1418070123306989e-06, "loss": 1.9663, "step": 27251 }, { "epoch": 0.8792545436580355, "grad_norm": 0.353515625, "learning_rate": 1.1412048466875337e-06, "loss": 1.9682, "step": 27252 }, { "epoch": 0.8792868075118317, "grad_norm": 0.34765625, "learning_rate": 1.1406028335920476e-06, "loss": 1.9981, "step": 27253 }, { "epoch": 0.8793190713656281, "grad_norm": 0.353515625, "learning_rate": 1.1400009730508592e-06, "loss": 1.9501, "step": 27254 }, { "epoch": 0.8793513352194244, "grad_norm": 0.345703125, "learning_rate": 1.1393992650705947e-06, "loss": 1.9834, "step": 27255 }, { "epoch": 0.8793835990732208, "grad_norm": 0.345703125, "learning_rate": 1.1387977096578872e-06, "loss": 1.9838, "step": 27256 }, { "epoch": 0.8794158629270171, "grad_norm": 0.34375, "learning_rate": 1.138196306819343e-06, "loss": 1.9823, "step": 27257 }, { "epoch": 0.8794481267808135, "grad_norm": 0.341796875, "learning_rate": 1.1375950565615934e-06, "loss": 1.9822, "step": 27258 }, { "epoch": 0.8794803906346098, "grad_norm": 0.345703125, "learning_rate": 1.1369939588912531e-06, "loss": 1.9682, "step": 27259 }, { "epoch": 0.8795126544884062, "grad_norm": 0.33984375, "learning_rate": 1.1363930138149336e-06, "loss": 1.9511, "step": 27260 }, { "epoch": 0.8795449183422025, "grad_norm": 0.341796875, "learning_rate": 1.135792221339258e-06, "loss": 1.9545, "step": 27261 }, { "epoch": 0.8795771821959989, "grad_norm": 0.345703125, "learning_rate": 1.1351915814708358e-06, "loss": 2.0081, "step": 27262 }, { "epoch": 0.8796094460497952, "grad_norm": 0.33984375, "learning_rate": 1.1345910942162736e-06, "loss": 1.9752, "step": 27263 }, { "epoch": 0.8796417099035916, "grad_norm": 0.34375, "learning_rate": 1.1339907595821908e-06, "loss": 1.9846, "step": 27264 }, { "epoch": 0.8796739737573879, "grad_norm": 0.34375, "learning_rate": 1.1333905775751873e-06, "loss": 1.9909, "step": 27265 }, { "epoch": 0.8797062376111843, "grad_norm": 0.341796875, "learning_rate": 1.132790548201873e-06, "loss": 1.9845, "step": 27266 }, { "epoch": 0.8797385014649806, "grad_norm": 0.3359375, "learning_rate": 1.1321906714688523e-06, "loss": 1.9779, "step": 27267 }, { "epoch": 0.879770765318777, "grad_norm": 0.349609375, "learning_rate": 1.1315909473827267e-06, "loss": 2.0191, "step": 27268 }, { "epoch": 0.8798030291725734, "grad_norm": 0.34375, "learning_rate": 1.1309913759501012e-06, "loss": 1.99, "step": 27269 }, { "epoch": 0.8798352930263696, "grad_norm": 0.337890625, "learning_rate": 1.1303919571775717e-06, "loss": 1.9683, "step": 27270 }, { "epoch": 0.879867556880166, "grad_norm": 0.337890625, "learning_rate": 1.1297926910717365e-06, "loss": 2.0129, "step": 27271 }, { "epoch": 0.8798998207339623, "grad_norm": 0.341796875, "learning_rate": 1.1291935776391954e-06, "loss": 1.986, "step": 27272 }, { "epoch": 0.8799320845877587, "grad_norm": 0.34375, "learning_rate": 1.1285946168865397e-06, "loss": 1.9937, "step": 27273 }, { "epoch": 0.879964348441555, "grad_norm": 0.345703125, "learning_rate": 1.1279958088203606e-06, "loss": 1.9924, "step": 27274 }, { "epoch": 0.8799966122953514, "grad_norm": 0.341796875, "learning_rate": 1.1273971534472566e-06, "loss": 1.9865, "step": 27275 }, { "epoch": 0.8800288761491477, "grad_norm": 0.33984375, "learning_rate": 1.1267986507738105e-06, "loss": 1.9941, "step": 27276 }, { "epoch": 0.8800611400029441, "grad_norm": 0.34375, "learning_rate": 1.1262003008066101e-06, "loss": 1.9884, "step": 27277 }, { "epoch": 0.8800934038567404, "grad_norm": 0.33984375, "learning_rate": 1.125602103552249e-06, "loss": 1.9484, "step": 27278 }, { "epoch": 0.8801256677105368, "grad_norm": 0.337890625, "learning_rate": 1.1250040590173e-06, "loss": 1.9784, "step": 27279 }, { "epoch": 0.8801579315643331, "grad_norm": 0.341796875, "learning_rate": 1.1244061672083544e-06, "loss": 1.9998, "step": 27280 }, { "epoch": 0.8801901954181295, "grad_norm": 0.34765625, "learning_rate": 1.1238084281319955e-06, "loss": 1.9946, "step": 27281 }, { "epoch": 0.8802224592719258, "grad_norm": 0.34375, "learning_rate": 1.1232108417947945e-06, "loss": 1.9993, "step": 27282 }, { "epoch": 0.8802547231257222, "grad_norm": 0.3515625, "learning_rate": 1.122613408203333e-06, "loss": 1.9702, "step": 27283 }, { "epoch": 0.8802869869795185, "grad_norm": 0.35546875, "learning_rate": 1.1220161273641922e-06, "loss": 1.9914, "step": 27284 }, { "epoch": 0.8803192508333149, "grad_norm": 0.341796875, "learning_rate": 1.1214189992839368e-06, "loss": 1.9529, "step": 27285 }, { "epoch": 0.8803515146871111, "grad_norm": 0.349609375, "learning_rate": 1.1208220239691436e-06, "loss": 2.0001, "step": 27286 }, { "epoch": 0.8803837785409075, "grad_norm": 0.34375, "learning_rate": 1.1202252014263919e-06, "loss": 1.945, "step": 27287 }, { "epoch": 0.8804160423947038, "grad_norm": 0.3359375, "learning_rate": 1.1196285316622367e-06, "loss": 1.9647, "step": 27288 }, { "epoch": 0.8804483062485002, "grad_norm": 0.35546875, "learning_rate": 1.1190320146832577e-06, "loss": 2.0018, "step": 27289 }, { "epoch": 0.8804805701022966, "grad_norm": 0.34765625, "learning_rate": 1.1184356504960148e-06, "loss": 1.9951, "step": 27290 }, { "epoch": 0.8805128339560929, "grad_norm": 0.337890625, "learning_rate": 1.1178394391070706e-06, "loss": 2.004, "step": 27291 }, { "epoch": 0.8805450978098893, "grad_norm": 0.3515625, "learning_rate": 1.1172433805229936e-06, "loss": 1.9914, "step": 27292 }, { "epoch": 0.8805773616636856, "grad_norm": 0.34375, "learning_rate": 1.1166474747503436e-06, "loss": 1.9705, "step": 27293 }, { "epoch": 0.880609625517482, "grad_norm": 0.3359375, "learning_rate": 1.116051721795675e-06, "loss": 1.9815, "step": 27294 }, { "epoch": 0.8806418893712783, "grad_norm": 0.3359375, "learning_rate": 1.1154561216655495e-06, "loss": 1.9833, "step": 27295 }, { "epoch": 0.8806741532250747, "grad_norm": 0.333984375, "learning_rate": 1.1148606743665234e-06, "loss": 1.9633, "step": 27296 }, { "epoch": 0.880706417078871, "grad_norm": 0.349609375, "learning_rate": 1.1142653799051483e-06, "loss": 1.9885, "step": 27297 }, { "epoch": 0.8807386809326674, "grad_norm": 0.33984375, "learning_rate": 1.1136702382879805e-06, "loss": 2.0043, "step": 27298 }, { "epoch": 0.8807709447864637, "grad_norm": 0.34375, "learning_rate": 1.1130752495215678e-06, "loss": 1.9354, "step": 27299 }, { "epoch": 0.8808032086402601, "grad_norm": 0.349609375, "learning_rate": 1.112480413612459e-06, "loss": 2.0065, "step": 27300 }, { "epoch": 0.8808354724940564, "grad_norm": 0.33984375, "learning_rate": 1.1118857305672048e-06, "loss": 1.9963, "step": 27301 }, { "epoch": 0.8808677363478528, "grad_norm": 0.349609375, "learning_rate": 1.111291200392347e-06, "loss": 2.0131, "step": 27302 }, { "epoch": 0.880900000201649, "grad_norm": 0.341796875, "learning_rate": 1.1106968230944336e-06, "loss": 1.9887, "step": 27303 }, { "epoch": 0.8809322640554454, "grad_norm": 0.34765625, "learning_rate": 1.1101025986800062e-06, "loss": 1.9934, "step": 27304 }, { "epoch": 0.8809645279092417, "grad_norm": 0.337890625, "learning_rate": 1.1095085271556027e-06, "loss": 1.9793, "step": 27305 }, { "epoch": 0.8809967917630381, "grad_norm": 0.3359375, "learning_rate": 1.1089146085277646e-06, "loss": 2.0132, "step": 27306 }, { "epoch": 0.8810290556168344, "grad_norm": 0.3359375, "learning_rate": 1.1083208428030318e-06, "loss": 1.9883, "step": 27307 }, { "epoch": 0.8810613194706308, "grad_norm": 0.333984375, "learning_rate": 1.1077272299879337e-06, "loss": 2.0038, "step": 27308 }, { "epoch": 0.8810935833244272, "grad_norm": 0.330078125, "learning_rate": 1.1071337700890122e-06, "loss": 2.0047, "step": 27309 }, { "epoch": 0.8811258471782235, "grad_norm": 0.3515625, "learning_rate": 1.1065404631127918e-06, "loss": 1.9722, "step": 27310 }, { "epoch": 0.8811581110320199, "grad_norm": 0.34765625, "learning_rate": 1.1059473090658056e-06, "loss": 1.991, "step": 27311 }, { "epoch": 0.8811903748858162, "grad_norm": 0.337890625, "learning_rate": 1.1053543079545902e-06, "loss": 1.9898, "step": 27312 }, { "epoch": 0.8812226387396126, "grad_norm": 0.353515625, "learning_rate": 1.1047614597856604e-06, "loss": 1.9853, "step": 27313 }, { "epoch": 0.8812549025934089, "grad_norm": 0.349609375, "learning_rate": 1.104168764565549e-06, "loss": 1.9787, "step": 27314 }, { "epoch": 0.8812871664472053, "grad_norm": 0.34375, "learning_rate": 1.1035762223007844e-06, "loss": 1.9676, "step": 27315 }, { "epoch": 0.8813194303010016, "grad_norm": 0.34375, "learning_rate": 1.1029838329978765e-06, "loss": 1.9453, "step": 27316 }, { "epoch": 0.881351694154798, "grad_norm": 0.337890625, "learning_rate": 1.102391596663358e-06, "loss": 1.9827, "step": 27317 }, { "epoch": 0.8813839580085943, "grad_norm": 0.33984375, "learning_rate": 1.1017995133037405e-06, "loss": 1.992, "step": 27318 }, { "epoch": 0.8814162218623907, "grad_norm": 0.34375, "learning_rate": 1.1012075829255424e-06, "loss": 1.9909, "step": 27319 }, { "epoch": 0.881448485716187, "grad_norm": 0.34765625, "learning_rate": 1.1006158055352816e-06, "loss": 1.9734, "step": 27320 }, { "epoch": 0.8814807495699833, "grad_norm": 0.35546875, "learning_rate": 1.1000241811394712e-06, "loss": 1.9895, "step": 27321 }, { "epoch": 0.8815130134237796, "grad_norm": 0.341796875, "learning_rate": 1.0994327097446194e-06, "loss": 2.0048, "step": 27322 }, { "epoch": 0.881545277277576, "grad_norm": 0.359375, "learning_rate": 1.0988413913572443e-06, "loss": 1.9775, "step": 27323 }, { "epoch": 0.8815775411313723, "grad_norm": 0.359375, "learning_rate": 1.098250225983849e-06, "loss": 2.0125, "step": 27324 }, { "epoch": 0.8816098049851687, "grad_norm": 0.3359375, "learning_rate": 1.0976592136309416e-06, "loss": 2.0009, "step": 27325 }, { "epoch": 0.881642068838965, "grad_norm": 0.36328125, "learning_rate": 1.0970683543050287e-06, "loss": 1.9582, "step": 27326 }, { "epoch": 0.8816743326927614, "grad_norm": 0.34375, "learning_rate": 1.0964776480126149e-06, "loss": 1.9823, "step": 27327 }, { "epoch": 0.8817065965465577, "grad_norm": 0.357421875, "learning_rate": 1.0958870947601968e-06, "loss": 1.9313, "step": 27328 }, { "epoch": 0.8817388604003541, "grad_norm": 0.35546875, "learning_rate": 1.0952966945542825e-06, "loss": 2.0018, "step": 27329 }, { "epoch": 0.8817711242541505, "grad_norm": 0.341796875, "learning_rate": 1.0947064474013651e-06, "loss": 1.9776, "step": 27330 }, { "epoch": 0.8818033881079468, "grad_norm": 0.34765625, "learning_rate": 1.0941163533079428e-06, "loss": 1.935, "step": 27331 }, { "epoch": 0.8818356519617432, "grad_norm": 0.337890625, "learning_rate": 1.0935264122805139e-06, "loss": 2.0128, "step": 27332 }, { "epoch": 0.8818679158155395, "grad_norm": 0.34375, "learning_rate": 1.0929366243255694e-06, "loss": 1.9711, "step": 27333 }, { "epoch": 0.8819001796693359, "grad_norm": 0.345703125, "learning_rate": 1.0923469894495996e-06, "loss": 1.9677, "step": 27334 }, { "epoch": 0.8819324435231322, "grad_norm": 0.337890625, "learning_rate": 1.0917575076590992e-06, "loss": 1.9731, "step": 27335 }, { "epoch": 0.8819647073769286, "grad_norm": 0.345703125, "learning_rate": 1.091168178960551e-06, "loss": 1.992, "step": 27336 }, { "epoch": 0.8819969712307248, "grad_norm": 0.33203125, "learning_rate": 1.0905790033604485e-06, "loss": 1.9985, "step": 27337 }, { "epoch": 0.8820292350845212, "grad_norm": 0.345703125, "learning_rate": 1.0899899808652747e-06, "loss": 1.9635, "step": 27338 }, { "epoch": 0.8820614989383175, "grad_norm": 0.33203125, "learning_rate": 1.0894011114815094e-06, "loss": 2.0039, "step": 27339 }, { "epoch": 0.8820937627921139, "grad_norm": 0.337890625, "learning_rate": 1.0888123952156408e-06, "loss": 1.986, "step": 27340 }, { "epoch": 0.8821260266459102, "grad_norm": 0.33984375, "learning_rate": 1.0882238320741421e-06, "loss": 2.0031, "step": 27341 }, { "epoch": 0.8821582904997066, "grad_norm": 0.333984375, "learning_rate": 1.0876354220634948e-06, "loss": 1.9904, "step": 27342 }, { "epoch": 0.8821905543535029, "grad_norm": 0.337890625, "learning_rate": 1.0870471651901803e-06, "loss": 1.9859, "step": 27343 }, { "epoch": 0.8822228182072993, "grad_norm": 0.33203125, "learning_rate": 1.0864590614606652e-06, "loss": 1.9959, "step": 27344 }, { "epoch": 0.8822550820610956, "grad_norm": 0.345703125, "learning_rate": 1.0858711108814272e-06, "loss": 1.9896, "step": 27345 }, { "epoch": 0.882287345914892, "grad_norm": 0.3359375, "learning_rate": 1.0852833134589419e-06, "loss": 1.9872, "step": 27346 }, { "epoch": 0.8823196097686883, "grad_norm": 0.337890625, "learning_rate": 1.0846956691996717e-06, "loss": 2.0104, "step": 27347 }, { "epoch": 0.8823518736224847, "grad_norm": 0.349609375, "learning_rate": 1.084108178110092e-06, "loss": 1.9907, "step": 27348 }, { "epoch": 0.882384137476281, "grad_norm": 0.33984375, "learning_rate": 1.083520840196664e-06, "loss": 1.9338, "step": 27349 }, { "epoch": 0.8824164013300774, "grad_norm": 0.34765625, "learning_rate": 1.0829336554658542e-06, "loss": 2.0035, "step": 27350 }, { "epoch": 0.8824486651838738, "grad_norm": 0.341796875, "learning_rate": 1.0823466239241293e-06, "loss": 1.972, "step": 27351 }, { "epoch": 0.8824809290376701, "grad_norm": 0.33203125, "learning_rate": 1.081759745577947e-06, "loss": 1.9744, "step": 27352 }, { "epoch": 0.8825131928914665, "grad_norm": 0.33984375, "learning_rate": 1.0811730204337678e-06, "loss": 1.9888, "step": 27353 }, { "epoch": 0.8825454567452627, "grad_norm": 0.333984375, "learning_rate": 1.080586448498051e-06, "loss": 2.0073, "step": 27354 }, { "epoch": 0.8825777205990591, "grad_norm": 0.33984375, "learning_rate": 1.080000029777255e-06, "loss": 1.9835, "step": 27355 }, { "epoch": 0.8826099844528554, "grad_norm": 0.34375, "learning_rate": 1.0794137642778278e-06, "loss": 1.9516, "step": 27356 }, { "epoch": 0.8826422483066518, "grad_norm": 0.3359375, "learning_rate": 1.0788276520062313e-06, "loss": 1.9807, "step": 27357 }, { "epoch": 0.8826745121604481, "grad_norm": 0.33984375, "learning_rate": 1.078241692968913e-06, "loss": 2.003, "step": 27358 }, { "epoch": 0.8827067760142445, "grad_norm": 0.33203125, "learning_rate": 1.0776558871723202e-06, "loss": 1.9944, "step": 27359 }, { "epoch": 0.8827390398680408, "grad_norm": 0.33203125, "learning_rate": 1.0770702346229055e-06, "loss": 2.0053, "step": 27360 }, { "epoch": 0.8827713037218372, "grad_norm": 0.345703125, "learning_rate": 1.076484735327114e-06, "loss": 1.9916, "step": 27361 }, { "epoch": 0.8828035675756335, "grad_norm": 0.337890625, "learning_rate": 1.0758993892913871e-06, "loss": 1.9664, "step": 27362 }, { "epoch": 0.8828358314294299, "grad_norm": 0.3359375, "learning_rate": 1.075314196522173e-06, "loss": 2.0027, "step": 27363 }, { "epoch": 0.8828680952832262, "grad_norm": 0.341796875, "learning_rate": 1.07472915702591e-06, "loss": 1.9655, "step": 27364 }, { "epoch": 0.8829003591370226, "grad_norm": 0.3359375, "learning_rate": 1.074144270809036e-06, "loss": 1.9835, "step": 27365 }, { "epoch": 0.8829326229908189, "grad_norm": 0.34375, "learning_rate": 1.0735595378779944e-06, "loss": 1.9542, "step": 27366 }, { "epoch": 0.8829648868446153, "grad_norm": 0.34765625, "learning_rate": 1.0729749582392169e-06, "loss": 1.9857, "step": 27367 }, { "epoch": 0.8829971506984116, "grad_norm": 0.341796875, "learning_rate": 1.0723905318991445e-06, "loss": 1.9796, "step": 27368 }, { "epoch": 0.883029414552208, "grad_norm": 0.3359375, "learning_rate": 1.0718062588642007e-06, "loss": 1.9728, "step": 27369 }, { "epoch": 0.8830616784060044, "grad_norm": 0.3359375, "learning_rate": 1.0712221391408207e-06, "loss": 1.992, "step": 27370 }, { "epoch": 0.8830939422598006, "grad_norm": 0.328125, "learning_rate": 1.0706381727354403e-06, "loss": 1.9788, "step": 27371 }, { "epoch": 0.883126206113597, "grad_norm": 0.34765625, "learning_rate": 1.070054359654478e-06, "loss": 1.9746, "step": 27372 }, { "epoch": 0.8831584699673933, "grad_norm": 0.337890625, "learning_rate": 1.0694706999043625e-06, "loss": 1.9653, "step": 27373 }, { "epoch": 0.8831907338211897, "grad_norm": 0.330078125, "learning_rate": 1.068887193491526e-06, "loss": 1.9836, "step": 27374 }, { "epoch": 0.883222997674986, "grad_norm": 0.3359375, "learning_rate": 1.0683038404223793e-06, "loss": 1.9846, "step": 27375 }, { "epoch": 0.8832552615287824, "grad_norm": 0.341796875, "learning_rate": 1.0677206407033502e-06, "loss": 2.0035, "step": 27376 }, { "epoch": 0.8832875253825787, "grad_norm": 0.35546875, "learning_rate": 1.0671375943408619e-06, "loss": 2.0025, "step": 27377 }, { "epoch": 0.8833197892363751, "grad_norm": 0.33203125, "learning_rate": 1.0665547013413208e-06, "loss": 1.9966, "step": 27378 }, { "epoch": 0.8833520530901714, "grad_norm": 0.33984375, "learning_rate": 1.0659719617111552e-06, "loss": 1.9509, "step": 27379 }, { "epoch": 0.8833843169439678, "grad_norm": 0.33984375, "learning_rate": 1.0653893754567717e-06, "loss": 1.9798, "step": 27380 }, { "epoch": 0.8834165807977641, "grad_norm": 0.345703125, "learning_rate": 1.0648069425845835e-06, "loss": 2.0096, "step": 27381 }, { "epoch": 0.8834488446515605, "grad_norm": 0.3359375, "learning_rate": 1.064224663101007e-06, "loss": 2.0094, "step": 27382 }, { "epoch": 0.8834811085053568, "grad_norm": 0.337890625, "learning_rate": 1.0636425370124475e-06, "loss": 1.9953, "step": 27383 }, { "epoch": 0.8835133723591532, "grad_norm": 0.34375, "learning_rate": 1.0630605643253095e-06, "loss": 1.983, "step": 27384 }, { "epoch": 0.8835456362129495, "grad_norm": 0.3359375, "learning_rate": 1.062478745046006e-06, "loss": 1.9874, "step": 27385 }, { "epoch": 0.8835779000667459, "grad_norm": 0.34375, "learning_rate": 1.0618970791809373e-06, "loss": 1.9566, "step": 27386 }, { "epoch": 0.8836101639205421, "grad_norm": 0.337890625, "learning_rate": 1.0613155667365032e-06, "loss": 1.9928, "step": 27387 }, { "epoch": 0.8836424277743385, "grad_norm": 0.337890625, "learning_rate": 1.0607342077191119e-06, "loss": 1.9711, "step": 27388 }, { "epoch": 0.8836746916281348, "grad_norm": 0.3359375, "learning_rate": 1.0601530021351579e-06, "loss": 2.0133, "step": 27389 }, { "epoch": 0.8837069554819312, "grad_norm": 0.3359375, "learning_rate": 1.0595719499910366e-06, "loss": 1.992, "step": 27390 }, { "epoch": 0.8837392193357276, "grad_norm": 0.333984375, "learning_rate": 1.0589910512931477e-06, "loss": 2.0077, "step": 27391 }, { "epoch": 0.8837714831895239, "grad_norm": 0.34375, "learning_rate": 1.0584103060478862e-06, "loss": 2.0164, "step": 27392 }, { "epoch": 0.8838037470433203, "grad_norm": 0.359375, "learning_rate": 1.0578297142616384e-06, "loss": 1.9649, "step": 27393 }, { "epoch": 0.8838360108971166, "grad_norm": 0.337890625, "learning_rate": 1.0572492759408026e-06, "loss": 1.99, "step": 27394 }, { "epoch": 0.883868274750913, "grad_norm": 0.33984375, "learning_rate": 1.0566689910917638e-06, "loss": 1.9857, "step": 27395 }, { "epoch": 0.8839005386047093, "grad_norm": 0.33984375, "learning_rate": 1.0560888597209066e-06, "loss": 1.9935, "step": 27396 }, { "epoch": 0.8839328024585057, "grad_norm": 0.337890625, "learning_rate": 1.055508881834623e-06, "loss": 2.006, "step": 27397 }, { "epoch": 0.883965066312302, "grad_norm": 0.34765625, "learning_rate": 1.0549290574392945e-06, "loss": 1.9711, "step": 27398 }, { "epoch": 0.8839973301660984, "grad_norm": 0.328125, "learning_rate": 1.0543493865413023e-06, "loss": 1.9957, "step": 27399 }, { "epoch": 0.8840295940198947, "grad_norm": 0.33203125, "learning_rate": 1.0537698691470248e-06, "loss": 1.9635, "step": 27400 }, { "epoch": 0.8840618578736911, "grad_norm": 0.345703125, "learning_rate": 1.0531905052628437e-06, "loss": 1.9993, "step": 27401 }, { "epoch": 0.8840941217274874, "grad_norm": 0.34375, "learning_rate": 1.0526112948951406e-06, "loss": 1.99, "step": 27402 }, { "epoch": 0.8841263855812838, "grad_norm": 0.345703125, "learning_rate": 1.0520322380502834e-06, "loss": 1.9883, "step": 27403 }, { "epoch": 0.88415864943508, "grad_norm": 0.33984375, "learning_rate": 1.0514533347346473e-06, "loss": 1.9907, "step": 27404 }, { "epoch": 0.8841909132888764, "grad_norm": 0.333984375, "learning_rate": 1.050874584954612e-06, "loss": 1.9827, "step": 27405 }, { "epoch": 0.8842231771426727, "grad_norm": 0.3515625, "learning_rate": 1.0502959887165359e-06, "loss": 1.9622, "step": 27406 }, { "epoch": 0.8842554409964691, "grad_norm": 0.333984375, "learning_rate": 1.0497175460267955e-06, "loss": 1.9955, "step": 27407 }, { "epoch": 0.8842877048502654, "grad_norm": 0.337890625, "learning_rate": 1.0491392568917607e-06, "loss": 2.0098, "step": 27408 }, { "epoch": 0.8843199687040618, "grad_norm": 0.34375, "learning_rate": 1.0485611213177882e-06, "loss": 1.9801, "step": 27409 }, { "epoch": 0.8843522325578581, "grad_norm": 0.34765625, "learning_rate": 1.0479831393112493e-06, "loss": 1.997, "step": 27410 }, { "epoch": 0.8843844964116545, "grad_norm": 0.333984375, "learning_rate": 1.0474053108785025e-06, "loss": 1.9828, "step": 27411 }, { "epoch": 0.8844167602654509, "grad_norm": 0.333984375, "learning_rate": 1.0468276360259043e-06, "loss": 1.9862, "step": 27412 }, { "epoch": 0.8844490241192472, "grad_norm": 0.341796875, "learning_rate": 1.0462501147598213e-06, "loss": 2.0076, "step": 27413 }, { "epoch": 0.8844812879730436, "grad_norm": 0.341796875, "learning_rate": 1.0456727470866067e-06, "loss": 1.9888, "step": 27414 }, { "epoch": 0.8845135518268399, "grad_norm": 0.34375, "learning_rate": 1.0450955330126138e-06, "loss": 2.0141, "step": 27415 }, { "epoch": 0.8845458156806363, "grad_norm": 0.341796875, "learning_rate": 1.0445184725441992e-06, "loss": 1.9688, "step": 27416 }, { "epoch": 0.8845780795344326, "grad_norm": 0.3359375, "learning_rate": 1.0439415656877143e-06, "loss": 2.0017, "step": 27417 }, { "epoch": 0.884610343388229, "grad_norm": 0.337890625, "learning_rate": 1.0433648124495076e-06, "loss": 2.0238, "step": 27418 }, { "epoch": 0.8846426072420253, "grad_norm": 0.33203125, "learning_rate": 1.042788212835929e-06, "loss": 1.9906, "step": 27419 }, { "epoch": 0.8846748710958217, "grad_norm": 0.3359375, "learning_rate": 1.0422117668533248e-06, "loss": 1.9777, "step": 27420 }, { "epoch": 0.884707134949618, "grad_norm": 0.33203125, "learning_rate": 1.0416354745080386e-06, "loss": 1.9815, "step": 27421 }, { "epoch": 0.8847393988034143, "grad_norm": 0.333984375, "learning_rate": 1.0410593358064186e-06, "loss": 1.9919, "step": 27422 }, { "epoch": 0.8847716626572106, "grad_norm": 0.328125, "learning_rate": 1.040483350754803e-06, "loss": 1.9686, "step": 27423 }, { "epoch": 0.884803926511007, "grad_norm": 0.326171875, "learning_rate": 1.039907519359528e-06, "loss": 1.9906, "step": 27424 }, { "epoch": 0.8848361903648033, "grad_norm": 0.345703125, "learning_rate": 1.039331841626941e-06, "loss": 1.9679, "step": 27425 }, { "epoch": 0.8848684542185997, "grad_norm": 0.345703125, "learning_rate": 1.0387563175633713e-06, "loss": 2.0056, "step": 27426 }, { "epoch": 0.884900718072396, "grad_norm": 0.333984375, "learning_rate": 1.0381809471751557e-06, "loss": 1.9959, "step": 27427 }, { "epoch": 0.8849329819261924, "grad_norm": 0.3359375, "learning_rate": 1.0376057304686293e-06, "loss": 1.9795, "step": 27428 }, { "epoch": 0.8849652457799887, "grad_norm": 0.337890625, "learning_rate": 1.0370306674501234e-06, "loss": 1.9788, "step": 27429 }, { "epoch": 0.8849975096337851, "grad_norm": 0.341796875, "learning_rate": 1.0364557581259665e-06, "loss": 1.9747, "step": 27430 }, { "epoch": 0.8850297734875815, "grad_norm": 0.333984375, "learning_rate": 1.0358810025024852e-06, "loss": 1.9659, "step": 27431 }, { "epoch": 0.8850620373413778, "grad_norm": 0.345703125, "learning_rate": 1.0353064005860091e-06, "loss": 1.9891, "step": 27432 }, { "epoch": 0.8850943011951742, "grad_norm": 0.330078125, "learning_rate": 1.0347319523828636e-06, "loss": 1.9605, "step": 27433 }, { "epoch": 0.8851265650489705, "grad_norm": 0.337890625, "learning_rate": 1.0341576578993667e-06, "loss": 1.9896, "step": 27434 }, { "epoch": 0.8851588289027669, "grad_norm": 0.33203125, "learning_rate": 1.0335835171418435e-06, "loss": 1.9938, "step": 27435 }, { "epoch": 0.8851910927565632, "grad_norm": 0.3359375, "learning_rate": 1.0330095301166186e-06, "loss": 1.9778, "step": 27436 }, { "epoch": 0.8852233566103596, "grad_norm": 0.337890625, "learning_rate": 1.032435696829999e-06, "loss": 2.0091, "step": 27437 }, { "epoch": 0.8852556204641558, "grad_norm": 0.333984375, "learning_rate": 1.0318620172883082e-06, "loss": 1.9947, "step": 27438 }, { "epoch": 0.8852878843179522, "grad_norm": 0.3515625, "learning_rate": 1.0312884914978638e-06, "loss": 1.9982, "step": 27439 }, { "epoch": 0.8853201481717485, "grad_norm": 0.34375, "learning_rate": 1.0307151194649695e-06, "loss": 1.9822, "step": 27440 }, { "epoch": 0.8853524120255449, "grad_norm": 0.333984375, "learning_rate": 1.0301419011959451e-06, "loss": 2.0122, "step": 27441 }, { "epoch": 0.8853846758793412, "grad_norm": 0.330078125, "learning_rate": 1.0295688366970957e-06, "loss": 1.9473, "step": 27442 }, { "epoch": 0.8854169397331376, "grad_norm": 0.33984375, "learning_rate": 1.0289959259747295e-06, "loss": 2.0008, "step": 27443 }, { "epoch": 0.8854492035869339, "grad_norm": 0.333984375, "learning_rate": 1.0284231690351532e-06, "loss": 1.9981, "step": 27444 }, { "epoch": 0.8854814674407303, "grad_norm": 0.330078125, "learning_rate": 1.0278505658846733e-06, "loss": 1.9847, "step": 27445 }, { "epoch": 0.8855137312945266, "grad_norm": 0.34375, "learning_rate": 1.027278116529588e-06, "loss": 1.979, "step": 27446 }, { "epoch": 0.885545995148323, "grad_norm": 0.337890625, "learning_rate": 1.0267058209762042e-06, "loss": 1.9886, "step": 27447 }, { "epoch": 0.8855782590021193, "grad_norm": 0.33203125, "learning_rate": 1.0261336792308168e-06, "loss": 2.0013, "step": 27448 }, { "epoch": 0.8856105228559157, "grad_norm": 0.337890625, "learning_rate": 1.025561691299724e-06, "loss": 1.9705, "step": 27449 }, { "epoch": 0.885642786709712, "grad_norm": 0.34375, "learning_rate": 1.024989857189224e-06, "loss": 1.9713, "step": 27450 }, { "epoch": 0.8856750505635084, "grad_norm": 0.330078125, "learning_rate": 1.0244181769056105e-06, "loss": 1.9938, "step": 27451 }, { "epoch": 0.8857073144173048, "grad_norm": 0.3515625, "learning_rate": 1.0238466504551746e-06, "loss": 1.9732, "step": 27452 }, { "epoch": 0.8857395782711011, "grad_norm": 0.333984375, "learning_rate": 1.02327527784421e-06, "loss": 1.9926, "step": 27453 }, { "epoch": 0.8857718421248975, "grad_norm": 0.337890625, "learning_rate": 1.0227040590790032e-06, "loss": 2.0006, "step": 27454 }, { "epoch": 0.8858041059786937, "grad_norm": 0.33984375, "learning_rate": 1.0221329941658408e-06, "loss": 1.9851, "step": 27455 }, { "epoch": 0.8858363698324901, "grad_norm": 0.349609375, "learning_rate": 1.0215620831110146e-06, "loss": 1.9865, "step": 27456 }, { "epoch": 0.8858686336862864, "grad_norm": 0.33203125, "learning_rate": 1.0209913259208026e-06, "loss": 1.9958, "step": 27457 }, { "epoch": 0.8859008975400828, "grad_norm": 0.337890625, "learning_rate": 1.0204207226014883e-06, "loss": 1.9668, "step": 27458 }, { "epoch": 0.8859331613938791, "grad_norm": 0.337890625, "learning_rate": 1.0198502731593567e-06, "loss": 2.0006, "step": 27459 }, { "epoch": 0.8859654252476755, "grad_norm": 0.341796875, "learning_rate": 1.0192799776006845e-06, "loss": 1.9982, "step": 27460 }, { "epoch": 0.8859976891014718, "grad_norm": 0.3359375, "learning_rate": 1.0187098359317465e-06, "loss": 1.9569, "step": 27461 }, { "epoch": 0.8860299529552682, "grad_norm": 0.337890625, "learning_rate": 1.0181398481588195e-06, "loss": 1.9771, "step": 27462 }, { "epoch": 0.8860622168090645, "grad_norm": 0.341796875, "learning_rate": 1.01757001428818e-06, "loss": 2.0115, "step": 27463 }, { "epoch": 0.8860944806628609, "grad_norm": 0.330078125, "learning_rate": 1.0170003343260998e-06, "loss": 2.0012, "step": 27464 }, { "epoch": 0.8861267445166572, "grad_norm": 0.3359375, "learning_rate": 1.0164308082788471e-06, "loss": 1.9979, "step": 27465 }, { "epoch": 0.8861590083704536, "grad_norm": 0.35546875, "learning_rate": 1.0158614361526918e-06, "loss": 1.9708, "step": 27466 }, { "epoch": 0.8861912722242499, "grad_norm": 0.333984375, "learning_rate": 1.0152922179539042e-06, "loss": 2.012, "step": 27467 }, { "epoch": 0.8862235360780463, "grad_norm": 0.33984375, "learning_rate": 1.0147231536887441e-06, "loss": 1.9971, "step": 27468 }, { "epoch": 0.8862557999318426, "grad_norm": 0.34375, "learning_rate": 1.01415424336348e-06, "loss": 1.9995, "step": 27469 }, { "epoch": 0.886288063785639, "grad_norm": 0.349609375, "learning_rate": 1.013585486984373e-06, "loss": 1.9781, "step": 27470 }, { "epoch": 0.8863203276394354, "grad_norm": 0.337890625, "learning_rate": 1.013016884557682e-06, "loss": 1.9945, "step": 27471 }, { "epoch": 0.8863525914932316, "grad_norm": 0.341796875, "learning_rate": 1.0124484360896686e-06, "loss": 1.9771, "step": 27472 }, { "epoch": 0.886384855347028, "grad_norm": 0.333984375, "learning_rate": 1.0118801415865875e-06, "loss": 1.9921, "step": 27473 }, { "epoch": 0.8864171192008243, "grad_norm": 0.341796875, "learning_rate": 1.0113120010546923e-06, "loss": 1.964, "step": 27474 }, { "epoch": 0.8864493830546207, "grad_norm": 0.341796875, "learning_rate": 1.0107440145002427e-06, "loss": 1.9547, "step": 27475 }, { "epoch": 0.886481646908417, "grad_norm": 0.333984375, "learning_rate": 1.0101761819294876e-06, "loss": 2.0276, "step": 27476 }, { "epoch": 0.8865139107622134, "grad_norm": 0.337890625, "learning_rate": 1.009608503348673e-06, "loss": 1.9806, "step": 27477 }, { "epoch": 0.8865461746160097, "grad_norm": 0.33984375, "learning_rate": 1.0090409787640542e-06, "loss": 2.0091, "step": 27478 }, { "epoch": 0.8865784384698061, "grad_norm": 0.341796875, "learning_rate": 1.0084736081818746e-06, "loss": 1.9955, "step": 27479 }, { "epoch": 0.8866107023236024, "grad_norm": 0.345703125, "learning_rate": 1.0079063916083775e-06, "loss": 1.9919, "step": 27480 }, { "epoch": 0.8866429661773988, "grad_norm": 0.33203125, "learning_rate": 1.0073393290498129e-06, "loss": 1.9624, "step": 27481 }, { "epoch": 0.8866752300311951, "grad_norm": 0.337890625, "learning_rate": 1.0067724205124174e-06, "loss": 2.0082, "step": 27482 }, { "epoch": 0.8867074938849915, "grad_norm": 0.330078125, "learning_rate": 1.006205666002431e-06, "loss": 1.972, "step": 27483 }, { "epoch": 0.8867397577387878, "grad_norm": 0.3359375, "learning_rate": 1.0056390655260954e-06, "loss": 2.0099, "step": 27484 }, { "epoch": 0.8867720215925842, "grad_norm": 0.333984375, "learning_rate": 1.0050726190896443e-06, "loss": 1.9785, "step": 27485 }, { "epoch": 0.8868042854463805, "grad_norm": 0.33984375, "learning_rate": 1.0045063266993137e-06, "loss": 2.0111, "step": 27486 }, { "epoch": 0.8868365493001769, "grad_norm": 0.349609375, "learning_rate": 1.0039401883613374e-06, "loss": 1.9693, "step": 27487 }, { "epoch": 0.8868688131539731, "grad_norm": 0.33984375, "learning_rate": 1.0033742040819488e-06, "loss": 2.0096, "step": 27488 }, { "epoch": 0.8869010770077695, "grad_norm": 0.333984375, "learning_rate": 1.0028083738673727e-06, "loss": 1.9679, "step": 27489 }, { "epoch": 0.8869333408615658, "grad_norm": 0.33203125, "learning_rate": 1.0022426977238425e-06, "loss": 1.9937, "step": 27490 }, { "epoch": 0.8869656047153622, "grad_norm": 0.3359375, "learning_rate": 1.0016771756575815e-06, "loss": 1.9933, "step": 27491 }, { "epoch": 0.8869978685691586, "grad_norm": 0.341796875, "learning_rate": 1.0011118076748182e-06, "loss": 1.9736, "step": 27492 }, { "epoch": 0.8870301324229549, "grad_norm": 0.345703125, "learning_rate": 1.0005465937817692e-06, "loss": 1.9618, "step": 27493 }, { "epoch": 0.8870623962767513, "grad_norm": 0.326171875, "learning_rate": 9.999815339846629e-07, "loss": 2.0049, "step": 27494 }, { "epoch": 0.8870946601305476, "grad_norm": 0.34375, "learning_rate": 9.994166282897178e-07, "loss": 1.9773, "step": 27495 }, { "epoch": 0.887126923984344, "grad_norm": 0.337890625, "learning_rate": 9.988518767031468e-07, "loss": 2.0126, "step": 27496 }, { "epoch": 0.8871591878381403, "grad_norm": 0.330078125, "learning_rate": 9.98287279231172e-07, "loss": 2.0217, "step": 27497 }, { "epoch": 0.8871914516919367, "grad_norm": 0.33203125, "learning_rate": 9.977228358800066e-07, "loss": 1.9794, "step": 27498 }, { "epoch": 0.887223715545733, "grad_norm": 0.3359375, "learning_rate": 9.971585466558607e-07, "loss": 1.9882, "step": 27499 }, { "epoch": 0.8872559793995294, "grad_norm": 0.3359375, "learning_rate": 9.965944115649494e-07, "loss": 2.0233, "step": 27500 }, { "epoch": 0.8872882432533257, "grad_norm": 0.34375, "learning_rate": 9.960304306134826e-07, "loss": 1.9864, "step": 27501 }, { "epoch": 0.8873205071071221, "grad_norm": 0.33984375, "learning_rate": 9.954666038076621e-07, "loss": 1.9436, "step": 27502 }, { "epoch": 0.8873527709609184, "grad_norm": 0.345703125, "learning_rate": 9.949029311537027e-07, "loss": 2.006, "step": 27503 }, { "epoch": 0.8873850348147148, "grad_norm": 0.337890625, "learning_rate": 9.943394126578032e-07, "loss": 1.9952, "step": 27504 }, { "epoch": 0.887417298668511, "grad_norm": 0.345703125, "learning_rate": 9.937760483261666e-07, "loss": 1.9915, "step": 27505 }, { "epoch": 0.8874495625223074, "grad_norm": 0.3359375, "learning_rate": 9.932128381649964e-07, "loss": 1.9853, "step": 27506 }, { "epoch": 0.8874818263761037, "grad_norm": 0.33203125, "learning_rate": 9.926497821804909e-07, "loss": 1.9877, "step": 27507 }, { "epoch": 0.8875140902299001, "grad_norm": 0.326171875, "learning_rate": 9.920868803788453e-07, "loss": 1.9841, "step": 27508 }, { "epoch": 0.8875463540836964, "grad_norm": 0.341796875, "learning_rate": 9.91524132766261e-07, "loss": 1.9608, "step": 27509 }, { "epoch": 0.8875786179374928, "grad_norm": 0.3359375, "learning_rate": 9.909615393489302e-07, "loss": 2.0045, "step": 27510 }, { "epoch": 0.8876108817912891, "grad_norm": 0.341796875, "learning_rate": 9.90399100133041e-07, "loss": 2.0021, "step": 27511 }, { "epoch": 0.8876431456450855, "grad_norm": 0.34375, "learning_rate": 9.898368151247915e-07, "loss": 1.9671, "step": 27512 }, { "epoch": 0.8876754094988819, "grad_norm": 0.341796875, "learning_rate": 9.892746843303675e-07, "loss": 2.003, "step": 27513 }, { "epoch": 0.8877076733526782, "grad_norm": 0.33984375, "learning_rate": 9.887127077559532e-07, "loss": 2.0192, "step": 27514 }, { "epoch": 0.8877399372064746, "grad_norm": 0.330078125, "learning_rate": 9.881508854077409e-07, "loss": 2.0048, "step": 27515 }, { "epoch": 0.8877722010602709, "grad_norm": 0.3359375, "learning_rate": 9.875892172919105e-07, "loss": 1.9766, "step": 27516 }, { "epoch": 0.8878044649140673, "grad_norm": 0.333984375, "learning_rate": 9.870277034146436e-07, "loss": 1.9611, "step": 27517 }, { "epoch": 0.8878367287678636, "grad_norm": 0.345703125, "learning_rate": 9.86466343782127e-07, "loss": 2.003, "step": 27518 }, { "epoch": 0.88786899262166, "grad_norm": 0.3515625, "learning_rate": 9.85905138400534e-07, "loss": 2.0006, "step": 27519 }, { "epoch": 0.8879012564754563, "grad_norm": 0.3359375, "learning_rate": 9.853440872760432e-07, "loss": 1.9795, "step": 27520 }, { "epoch": 0.8879335203292527, "grad_norm": 0.341796875, "learning_rate": 9.84783190414828e-07, "loss": 1.9962, "step": 27521 }, { "epoch": 0.887965784183049, "grad_norm": 0.33203125, "learning_rate": 9.842224478230683e-07, "loss": 1.9974, "step": 27522 }, { "epoch": 0.8879980480368453, "grad_norm": 0.330078125, "learning_rate": 9.83661859506934e-07, "loss": 1.9813, "step": 27523 }, { "epoch": 0.8880303118906416, "grad_norm": 0.337890625, "learning_rate": 9.831014254725905e-07, "loss": 2.0002, "step": 27524 }, { "epoch": 0.888062575744438, "grad_norm": 0.333984375, "learning_rate": 9.825411457262145e-07, "loss": 1.9806, "step": 27525 }, { "epoch": 0.8880948395982343, "grad_norm": 0.330078125, "learning_rate": 9.819810202739676e-07, "loss": 2.0014, "step": 27526 }, { "epoch": 0.8881271034520307, "grad_norm": 0.330078125, "learning_rate": 9.814210491220166e-07, "loss": 2.0041, "step": 27527 }, { "epoch": 0.888159367305827, "grad_norm": 0.333984375, "learning_rate": 9.80861232276528e-07, "loss": 1.9995, "step": 27528 }, { "epoch": 0.8881916311596234, "grad_norm": 0.333984375, "learning_rate": 9.803015697436607e-07, "loss": 2.0125, "step": 27529 }, { "epoch": 0.8882238950134197, "grad_norm": 0.330078125, "learning_rate": 9.797420615295744e-07, "loss": 1.9917, "step": 27530 }, { "epoch": 0.8882561588672161, "grad_norm": 0.33203125, "learning_rate": 9.791827076404324e-07, "loss": 1.9782, "step": 27531 }, { "epoch": 0.8882884227210125, "grad_norm": 0.33203125, "learning_rate": 9.786235080823868e-07, "loss": 1.9885, "step": 27532 }, { "epoch": 0.8883206865748088, "grad_norm": 0.345703125, "learning_rate": 9.780644628615941e-07, "loss": 1.9898, "step": 27533 }, { "epoch": 0.8883529504286052, "grad_norm": 0.341796875, "learning_rate": 9.775055719842097e-07, "loss": 1.9896, "step": 27534 }, { "epoch": 0.8883852142824015, "grad_norm": 0.337890625, "learning_rate": 9.769468354563848e-07, "loss": 1.9626, "step": 27535 }, { "epoch": 0.8884174781361979, "grad_norm": 0.333984375, "learning_rate": 9.763882532842665e-07, "loss": 1.9691, "step": 27536 }, { "epoch": 0.8884497419899942, "grad_norm": 0.341796875, "learning_rate": 9.758298254740082e-07, "loss": 1.9905, "step": 27537 }, { "epoch": 0.8884820058437906, "grad_norm": 0.36328125, "learning_rate": 9.752715520317552e-07, "loss": 1.9689, "step": 27538 }, { "epoch": 0.8885142696975868, "grad_norm": 0.337890625, "learning_rate": 9.747134329636486e-07, "loss": 1.9966, "step": 27539 }, { "epoch": 0.8885465335513832, "grad_norm": 0.337890625, "learning_rate": 9.741554682758375e-07, "loss": 1.9906, "step": 27540 }, { "epoch": 0.8885787974051795, "grad_norm": 0.337890625, "learning_rate": 9.7359765797446e-07, "loss": 2.0239, "step": 27541 }, { "epoch": 0.8886110612589759, "grad_norm": 0.333984375, "learning_rate": 9.730400020656543e-07, "loss": 2.005, "step": 27542 }, { "epoch": 0.8886433251127722, "grad_norm": 0.341796875, "learning_rate": 9.724825005555643e-07, "loss": 1.9484, "step": 27543 }, { "epoch": 0.8886755889665686, "grad_norm": 0.333984375, "learning_rate": 9.71925153450325e-07, "loss": 1.9972, "step": 27544 }, { "epoch": 0.8887078528203649, "grad_norm": 0.341796875, "learning_rate": 9.71367960756066e-07, "loss": 1.9923, "step": 27545 }, { "epoch": 0.8887401166741613, "grad_norm": 0.34375, "learning_rate": 9.708109224789263e-07, "loss": 1.9906, "step": 27546 }, { "epoch": 0.8887723805279576, "grad_norm": 0.330078125, "learning_rate": 9.702540386250374e-07, "loss": 2.0107, "step": 27547 }, { "epoch": 0.888804644381754, "grad_norm": 0.337890625, "learning_rate": 9.696973092005229e-07, "loss": 1.9708, "step": 27548 }, { "epoch": 0.8888369082355503, "grad_norm": 0.33203125, "learning_rate": 9.691407342115193e-07, "loss": 1.9888, "step": 27549 }, { "epoch": 0.8888691720893467, "grad_norm": 0.333984375, "learning_rate": 9.68584313664147e-07, "loss": 2.0191, "step": 27550 }, { "epoch": 0.888901435943143, "grad_norm": 0.341796875, "learning_rate": 9.680280475645342e-07, "loss": 1.9515, "step": 27551 }, { "epoch": 0.8889336997969394, "grad_norm": 0.330078125, "learning_rate": 9.674719359187978e-07, "loss": 1.989, "step": 27552 }, { "epoch": 0.8889659636507358, "grad_norm": 0.33984375, "learning_rate": 9.669159787330678e-07, "loss": 2.0118, "step": 27553 }, { "epoch": 0.8889982275045321, "grad_norm": 0.328125, "learning_rate": 9.663601760134578e-07, "loss": 1.9981, "step": 27554 }, { "epoch": 0.8890304913583285, "grad_norm": 0.33984375, "learning_rate": 9.65804527766086e-07, "loss": 1.988, "step": 27555 }, { "epoch": 0.8890627552121247, "grad_norm": 0.33984375, "learning_rate": 9.652490339970727e-07, "loss": 2.0151, "step": 27556 }, { "epoch": 0.8890950190659211, "grad_norm": 0.330078125, "learning_rate": 9.64693694712528e-07, "loss": 1.9816, "step": 27557 }, { "epoch": 0.8891272829197174, "grad_norm": 0.333984375, "learning_rate": 9.641385099185651e-07, "loss": 1.98, "step": 27558 }, { "epoch": 0.8891595467735138, "grad_norm": 0.337890625, "learning_rate": 9.635834796212977e-07, "loss": 2.0089, "step": 27559 }, { "epoch": 0.8891918106273101, "grad_norm": 0.333984375, "learning_rate": 9.630286038268344e-07, "loss": 1.9687, "step": 27560 }, { "epoch": 0.8892240744811065, "grad_norm": 0.3359375, "learning_rate": 9.624738825412782e-07, "loss": 1.9845, "step": 27561 }, { "epoch": 0.8892563383349028, "grad_norm": 0.34375, "learning_rate": 9.61919315770743e-07, "loss": 1.9867, "step": 27562 }, { "epoch": 0.8892886021886992, "grad_norm": 0.34375, "learning_rate": 9.613649035213285e-07, "loss": 1.9727, "step": 27563 }, { "epoch": 0.8893208660424955, "grad_norm": 0.326171875, "learning_rate": 9.60810645799135e-07, "loss": 1.9905, "step": 27564 }, { "epoch": 0.8893531298962919, "grad_norm": 0.33984375, "learning_rate": 9.602565426102695e-07, "loss": 1.9748, "step": 27565 }, { "epoch": 0.8893853937500882, "grad_norm": 0.328125, "learning_rate": 9.597025939608283e-07, "loss": 1.993, "step": 27566 }, { "epoch": 0.8894176576038846, "grad_norm": 0.3359375, "learning_rate": 9.591487998569055e-07, "loss": 1.9897, "step": 27567 }, { "epoch": 0.8894499214576809, "grad_norm": 0.34765625, "learning_rate": 9.585951603046022e-07, "loss": 2.0293, "step": 27568 }, { "epoch": 0.8894821853114773, "grad_norm": 0.330078125, "learning_rate": 9.580416753100107e-07, "loss": 1.9713, "step": 27569 }, { "epoch": 0.8895144491652736, "grad_norm": 0.33984375, "learning_rate": 9.574883448792194e-07, "loss": 2.0032, "step": 27570 }, { "epoch": 0.88954671301907, "grad_norm": 0.333984375, "learning_rate": 9.569351690183248e-07, "loss": 2.002, "step": 27571 }, { "epoch": 0.8895789768728664, "grad_norm": 0.3359375, "learning_rate": 9.563821477334139e-07, "loss": 1.9955, "step": 27572 }, { "epoch": 0.8896112407266626, "grad_norm": 0.3359375, "learning_rate": 9.558292810305718e-07, "loss": 1.9759, "step": 27573 }, { "epoch": 0.889643504580459, "grad_norm": 0.333984375, "learning_rate": 9.55276568915887e-07, "loss": 2.006, "step": 27574 }, { "epoch": 0.8896757684342553, "grad_norm": 0.337890625, "learning_rate": 9.547240113954415e-07, "loss": 1.9864, "step": 27575 }, { "epoch": 0.8897080322880517, "grad_norm": 0.34375, "learning_rate": 9.541716084753166e-07, "loss": 2.02, "step": 27576 }, { "epoch": 0.889740296141848, "grad_norm": 0.337890625, "learning_rate": 9.536193601615961e-07, "loss": 1.9667, "step": 27577 }, { "epoch": 0.8897725599956444, "grad_norm": 0.337890625, "learning_rate": 9.530672664603585e-07, "loss": 1.9847, "step": 27578 }, { "epoch": 0.8898048238494407, "grad_norm": 0.333984375, "learning_rate": 9.525153273776755e-07, "loss": 1.9924, "step": 27579 }, { "epoch": 0.8898370877032371, "grad_norm": 0.337890625, "learning_rate": 9.519635429196288e-07, "loss": 1.9447, "step": 27580 }, { "epoch": 0.8898693515570334, "grad_norm": 0.3359375, "learning_rate": 9.514119130922888e-07, "loss": 1.9664, "step": 27581 }, { "epoch": 0.8899016154108298, "grad_norm": 0.337890625, "learning_rate": 9.508604379017289e-07, "loss": 2.0058, "step": 27582 }, { "epoch": 0.8899338792646261, "grad_norm": 0.341796875, "learning_rate": 9.503091173540157e-07, "loss": 1.9915, "step": 27583 }, { "epoch": 0.8899661431184225, "grad_norm": 0.3359375, "learning_rate": 9.497579514552213e-07, "loss": 2.0077, "step": 27584 }, { "epoch": 0.8899984069722188, "grad_norm": 0.3359375, "learning_rate": 9.49206940211414e-07, "loss": 1.9493, "step": 27585 }, { "epoch": 0.8900306708260152, "grad_norm": 0.35546875, "learning_rate": 9.486560836286539e-07, "loss": 1.9513, "step": 27586 }, { "epoch": 0.8900629346798115, "grad_norm": 0.34375, "learning_rate": 9.481053817130098e-07, "loss": 1.9992, "step": 27587 }, { "epoch": 0.8900951985336079, "grad_norm": 0.3359375, "learning_rate": 9.475548344705398e-07, "loss": 2.0033, "step": 27588 }, { "epoch": 0.8901274623874041, "grad_norm": 0.33984375, "learning_rate": 9.470044419073026e-07, "loss": 1.9796, "step": 27589 }, { "epoch": 0.8901597262412005, "grad_norm": 0.33203125, "learning_rate": 9.464542040293633e-07, "loss": 1.9242, "step": 27590 }, { "epoch": 0.8901919900949968, "grad_norm": 0.345703125, "learning_rate": 9.459041208427721e-07, "loss": 1.9832, "step": 27591 }, { "epoch": 0.8902242539487932, "grad_norm": 0.337890625, "learning_rate": 9.453541923535841e-07, "loss": 1.9841, "step": 27592 }, { "epoch": 0.8902565178025896, "grad_norm": 0.337890625, "learning_rate": 9.448044185678579e-07, "loss": 2.0234, "step": 27593 }, { "epoch": 0.8902887816563859, "grad_norm": 0.328125, "learning_rate": 9.442547994916401e-07, "loss": 1.963, "step": 27594 }, { "epoch": 0.8903210455101823, "grad_norm": 0.328125, "learning_rate": 9.437053351309793e-07, "loss": 1.9893, "step": 27595 }, { "epoch": 0.8903533093639786, "grad_norm": 0.333984375, "learning_rate": 9.431560254919308e-07, "loss": 2.0051, "step": 27596 }, { "epoch": 0.890385573217775, "grad_norm": 0.3515625, "learning_rate": 9.426068705805346e-07, "loss": 1.987, "step": 27597 }, { "epoch": 0.8904178370715713, "grad_norm": 0.3359375, "learning_rate": 9.420578704028343e-07, "loss": 2.0039, "step": 27598 }, { "epoch": 0.8904501009253677, "grad_norm": 0.330078125, "learning_rate": 9.4150902496488e-07, "loss": 1.9483, "step": 27599 }, { "epoch": 0.890482364779164, "grad_norm": 0.341796875, "learning_rate": 9.409603342727086e-07, "loss": 1.9994, "step": 27600 }, { "epoch": 0.8905146286329604, "grad_norm": 0.328125, "learning_rate": 9.404117983323568e-07, "loss": 1.9764, "step": 27601 }, { "epoch": 0.8905468924867567, "grad_norm": 0.33984375, "learning_rate": 9.398634171498682e-07, "loss": 2.0037, "step": 27602 }, { "epoch": 0.8905791563405531, "grad_norm": 0.341796875, "learning_rate": 9.393151907312764e-07, "loss": 1.9644, "step": 27603 }, { "epoch": 0.8906114201943494, "grad_norm": 0.3359375, "learning_rate": 9.38767119082613e-07, "loss": 1.9889, "step": 27604 }, { "epoch": 0.8906436840481458, "grad_norm": 0.33203125, "learning_rate": 9.382192022099167e-07, "loss": 2.0011, "step": 27605 }, { "epoch": 0.890675947901942, "grad_norm": 0.3515625, "learning_rate": 9.376714401192144e-07, "loss": 1.9792, "step": 27606 }, { "epoch": 0.8907082117557384, "grad_norm": 0.3359375, "learning_rate": 9.37123832816536e-07, "loss": 1.9832, "step": 27607 }, { "epoch": 0.8907404756095347, "grad_norm": 0.35546875, "learning_rate": 9.365763803079103e-07, "loss": 1.9841, "step": 27608 }, { "epoch": 0.8907727394633311, "grad_norm": 0.333984375, "learning_rate": 9.36029082599364e-07, "loss": 1.9838, "step": 27609 }, { "epoch": 0.8908050033171274, "grad_norm": 0.33984375, "learning_rate": 9.354819396969189e-07, "loss": 1.9818, "step": 27610 }, { "epoch": 0.8908372671709238, "grad_norm": 0.33203125, "learning_rate": 9.349349516065986e-07, "loss": 1.9783, "step": 27611 }, { "epoch": 0.8908695310247201, "grad_norm": 0.3515625, "learning_rate": 9.343881183344249e-07, "loss": 1.9804, "step": 27612 }, { "epoch": 0.8909017948785165, "grad_norm": 0.333984375, "learning_rate": 9.338414398864164e-07, "loss": 1.9983, "step": 27613 }, { "epoch": 0.8909340587323129, "grad_norm": 0.34375, "learning_rate": 9.332949162685883e-07, "loss": 1.9803, "step": 27614 }, { "epoch": 0.8909663225861092, "grad_norm": 0.33203125, "learning_rate": 9.327485474869607e-07, "loss": 1.9782, "step": 27615 }, { "epoch": 0.8909985864399056, "grad_norm": 0.333984375, "learning_rate": 9.322023335475454e-07, "loss": 2.0044, "step": 27616 }, { "epoch": 0.8910308502937019, "grad_norm": 0.33984375, "learning_rate": 9.316562744563528e-07, "loss": 1.984, "step": 27617 }, { "epoch": 0.8910631141474983, "grad_norm": 0.333984375, "learning_rate": 9.311103702193962e-07, "loss": 1.9528, "step": 27618 }, { "epoch": 0.8910953780012946, "grad_norm": 0.33203125, "learning_rate": 9.305646208426843e-07, "loss": 1.9748, "step": 27619 }, { "epoch": 0.891127641855091, "grad_norm": 0.333984375, "learning_rate": 9.300190263322206e-07, "loss": 1.9907, "step": 27620 }, { "epoch": 0.8911599057088873, "grad_norm": 0.33203125, "learning_rate": 9.294735866940168e-07, "loss": 2.015, "step": 27621 }, { "epoch": 0.8911921695626837, "grad_norm": 0.33203125, "learning_rate": 9.28928301934075e-07, "loss": 2.0004, "step": 27622 }, { "epoch": 0.8912244334164799, "grad_norm": 0.3359375, "learning_rate": 9.283831720583918e-07, "loss": 1.9894, "step": 27623 }, { "epoch": 0.8912566972702763, "grad_norm": 0.337890625, "learning_rate": 9.278381970729743e-07, "loss": 1.9813, "step": 27624 }, { "epoch": 0.8912889611240726, "grad_norm": 0.3359375, "learning_rate": 9.272933769838193e-07, "loss": 1.9859, "step": 27625 }, { "epoch": 0.891321224977869, "grad_norm": 0.3359375, "learning_rate": 9.267487117969187e-07, "loss": 1.9866, "step": 27626 }, { "epoch": 0.8913534888316653, "grad_norm": 0.33984375, "learning_rate": 9.262042015182759e-07, "loss": 1.9519, "step": 27627 }, { "epoch": 0.8913857526854617, "grad_norm": 0.330078125, "learning_rate": 9.256598461538812e-07, "loss": 1.9657, "step": 27628 }, { "epoch": 0.891418016539258, "grad_norm": 0.33984375, "learning_rate": 9.251156457097231e-07, "loss": 1.9702, "step": 27629 }, { "epoch": 0.8914502803930544, "grad_norm": 0.328125, "learning_rate": 9.245716001917987e-07, "loss": 1.9734, "step": 27630 }, { "epoch": 0.8914825442468507, "grad_norm": 0.3359375, "learning_rate": 9.240277096060896e-07, "loss": 1.9642, "step": 27631 }, { "epoch": 0.8915148081006471, "grad_norm": 0.34375, "learning_rate": 9.234839739585843e-07, "loss": 2.006, "step": 27632 }, { "epoch": 0.8915470719544435, "grad_norm": 0.33203125, "learning_rate": 9.229403932552716e-07, "loss": 2.0046, "step": 27633 }, { "epoch": 0.8915793358082398, "grad_norm": 0.33203125, "learning_rate": 9.223969675021332e-07, "loss": 1.9724, "step": 27634 }, { "epoch": 0.8916115996620362, "grad_norm": 0.337890625, "learning_rate": 9.218536967051461e-07, "loss": 2.0194, "step": 27635 }, { "epoch": 0.8916438635158325, "grad_norm": 0.33203125, "learning_rate": 9.213105808702954e-07, "loss": 1.9545, "step": 27636 }, { "epoch": 0.8916761273696289, "grad_norm": 0.337890625, "learning_rate": 9.207676200035598e-07, "loss": 1.978, "step": 27637 }, { "epoch": 0.8917083912234252, "grad_norm": 0.345703125, "learning_rate": 9.202248141109109e-07, "loss": 2.0039, "step": 27638 }, { "epoch": 0.8917406550772216, "grad_norm": 0.337890625, "learning_rate": 9.196821631983276e-07, "loss": 1.986, "step": 27639 }, { "epoch": 0.8917729189310178, "grad_norm": 0.337890625, "learning_rate": 9.191396672717834e-07, "loss": 1.9796, "step": 27640 }, { "epoch": 0.8918051827848142, "grad_norm": 0.333984375, "learning_rate": 9.185973263372449e-07, "loss": 1.9861, "step": 27641 }, { "epoch": 0.8918374466386105, "grad_norm": 0.341796875, "learning_rate": 9.180551404006876e-07, "loss": 2.0167, "step": 27642 }, { "epoch": 0.8918697104924069, "grad_norm": 0.337890625, "learning_rate": 9.175131094680767e-07, "loss": 1.9912, "step": 27643 }, { "epoch": 0.8919019743462032, "grad_norm": 0.3359375, "learning_rate": 9.169712335453789e-07, "loss": 1.9805, "step": 27644 }, { "epoch": 0.8919342381999996, "grad_norm": 0.337890625, "learning_rate": 9.164295126385564e-07, "loss": 2.0271, "step": 27645 }, { "epoch": 0.8919665020537959, "grad_norm": 0.337890625, "learning_rate": 9.158879467535775e-07, "loss": 1.9693, "step": 27646 }, { "epoch": 0.8919987659075923, "grad_norm": 0.3359375, "learning_rate": 9.153465358963992e-07, "loss": 1.9965, "step": 27647 }, { "epoch": 0.8920310297613886, "grad_norm": 0.337890625, "learning_rate": 9.1480528007298e-07, "loss": 1.9949, "step": 27648 }, { "epoch": 0.892063293615185, "grad_norm": 0.3359375, "learning_rate": 9.142641792892819e-07, "loss": 1.9959, "step": 27649 }, { "epoch": 0.8920955574689813, "grad_norm": 0.3359375, "learning_rate": 9.137232335512585e-07, "loss": 1.9976, "step": 27650 }, { "epoch": 0.8921278213227777, "grad_norm": 0.33203125, "learning_rate": 9.131824428648617e-07, "loss": 1.9924, "step": 27651 }, { "epoch": 0.892160085176574, "grad_norm": 0.345703125, "learning_rate": 9.126418072360499e-07, "loss": 1.9381, "step": 27652 }, { "epoch": 0.8921923490303704, "grad_norm": 0.33203125, "learning_rate": 9.121013266707701e-07, "loss": 1.9722, "step": 27653 }, { "epoch": 0.8922246128841668, "grad_norm": 0.3359375, "learning_rate": 9.115610011749692e-07, "loss": 2.0063, "step": 27654 }, { "epoch": 0.8922568767379631, "grad_norm": 0.33984375, "learning_rate": 9.110208307546025e-07, "loss": 1.9431, "step": 27655 }, { "epoch": 0.8922891405917595, "grad_norm": 0.3359375, "learning_rate": 9.104808154156086e-07, "loss": 1.9968, "step": 27656 }, { "epoch": 0.8923214044455557, "grad_norm": 0.33203125, "learning_rate": 9.099409551639326e-07, "loss": 1.9943, "step": 27657 }, { "epoch": 0.8923536682993521, "grad_norm": 0.337890625, "learning_rate": 9.094012500055215e-07, "loss": 2.0027, "step": 27658 }, { "epoch": 0.8923859321531484, "grad_norm": 0.337890625, "learning_rate": 9.088616999463123e-07, "loss": 1.977, "step": 27659 }, { "epoch": 0.8924181960069448, "grad_norm": 0.345703125, "learning_rate": 9.083223049922435e-07, "loss": 1.9771, "step": 27660 }, { "epoch": 0.8924504598607411, "grad_norm": 0.33203125, "learning_rate": 9.077830651492536e-07, "loss": 2.0026, "step": 27661 }, { "epoch": 0.8924827237145375, "grad_norm": 0.33203125, "learning_rate": 9.072439804232796e-07, "loss": 1.9933, "step": 27662 }, { "epoch": 0.8925149875683338, "grad_norm": 0.3359375, "learning_rate": 9.067050508202501e-07, "loss": 2.0174, "step": 27663 }, { "epoch": 0.8925472514221302, "grad_norm": 0.34375, "learning_rate": 9.06166276346107e-07, "loss": 1.9842, "step": 27664 }, { "epoch": 0.8925795152759265, "grad_norm": 0.341796875, "learning_rate": 9.05627657006769e-07, "loss": 2.0017, "step": 27665 }, { "epoch": 0.8926117791297229, "grad_norm": 0.333984375, "learning_rate": 9.050891928081712e-07, "loss": 1.9636, "step": 27666 }, { "epoch": 0.8926440429835192, "grad_norm": 0.3515625, "learning_rate": 9.045508837562406e-07, "loss": 1.9671, "step": 27667 }, { "epoch": 0.8926763068373156, "grad_norm": 0.333984375, "learning_rate": 9.04012729856904e-07, "loss": 1.9599, "step": 27668 }, { "epoch": 0.8927085706911119, "grad_norm": 0.3359375, "learning_rate": 9.034747311160802e-07, "loss": 1.9518, "step": 27669 }, { "epoch": 0.8927408345449083, "grad_norm": 0.33203125, "learning_rate": 9.029368875396943e-07, "loss": 1.9962, "step": 27670 }, { "epoch": 0.8927730983987046, "grad_norm": 0.33984375, "learning_rate": 9.023991991336684e-07, "loss": 1.9575, "step": 27671 }, { "epoch": 0.892805362252501, "grad_norm": 0.333984375, "learning_rate": 9.018616659039158e-07, "loss": 1.9934, "step": 27672 }, { "epoch": 0.8928376261062972, "grad_norm": 0.34375, "learning_rate": 9.013242878563555e-07, "loss": 1.9959, "step": 27673 }, { "epoch": 0.8928698899600936, "grad_norm": 0.333984375, "learning_rate": 9.007870649969058e-07, "loss": 1.9896, "step": 27674 }, { "epoch": 0.89290215381389, "grad_norm": 0.337890625, "learning_rate": 9.002499973314754e-07, "loss": 2.0037, "step": 27675 }, { "epoch": 0.8929344176676863, "grad_norm": 0.330078125, "learning_rate": 8.99713084865978e-07, "loss": 1.9473, "step": 27676 }, { "epoch": 0.8929666815214827, "grad_norm": 0.345703125, "learning_rate": 8.991763276063237e-07, "loss": 1.9815, "step": 27677 }, { "epoch": 0.892998945375279, "grad_norm": 0.330078125, "learning_rate": 8.986397255584227e-07, "loss": 1.99, "step": 27678 }, { "epoch": 0.8930312092290754, "grad_norm": 0.326171875, "learning_rate": 8.981032787281757e-07, "loss": 1.9853, "step": 27679 }, { "epoch": 0.8930634730828717, "grad_norm": 0.33203125, "learning_rate": 8.975669871214926e-07, "loss": 1.9468, "step": 27680 }, { "epoch": 0.8930957369366681, "grad_norm": 0.333984375, "learning_rate": 8.970308507442754e-07, "loss": 1.9672, "step": 27681 }, { "epoch": 0.8931280007904644, "grad_norm": 0.33203125, "learning_rate": 8.964948696024245e-07, "loss": 1.9746, "step": 27682 }, { "epoch": 0.8931602646442608, "grad_norm": 0.32421875, "learning_rate": 8.959590437018417e-07, "loss": 2.0168, "step": 27683 }, { "epoch": 0.8931925284980571, "grad_norm": 0.3359375, "learning_rate": 8.954233730484223e-07, "loss": 1.9773, "step": 27684 }, { "epoch": 0.8932247923518535, "grad_norm": 0.3359375, "learning_rate": 8.948878576480634e-07, "loss": 2.003, "step": 27685 }, { "epoch": 0.8932570562056498, "grad_norm": 0.328125, "learning_rate": 8.943524975066619e-07, "loss": 1.9781, "step": 27686 }, { "epoch": 0.8932893200594462, "grad_norm": 0.3359375, "learning_rate": 8.93817292630108e-07, "loss": 1.9966, "step": 27687 }, { "epoch": 0.8933215839132425, "grad_norm": 0.337890625, "learning_rate": 8.932822430242904e-07, "loss": 1.9895, "step": 27688 }, { "epoch": 0.8933538477670389, "grad_norm": 0.33203125, "learning_rate": 8.92747348695106e-07, "loss": 2.0084, "step": 27689 }, { "epoch": 0.8933861116208351, "grad_norm": 0.33984375, "learning_rate": 8.922126096484368e-07, "loss": 1.9624, "step": 27690 }, { "epoch": 0.8934183754746315, "grad_norm": 0.341796875, "learning_rate": 8.916780258901697e-07, "loss": 2.0056, "step": 27691 }, { "epoch": 0.8934506393284278, "grad_norm": 0.330078125, "learning_rate": 8.911435974261917e-07, "loss": 1.9871, "step": 27692 }, { "epoch": 0.8934829031822242, "grad_norm": 0.341796875, "learning_rate": 8.906093242623814e-07, "loss": 1.979, "step": 27693 }, { "epoch": 0.8935151670360206, "grad_norm": 0.361328125, "learning_rate": 8.900752064046208e-07, "loss": 2.0201, "step": 27694 }, { "epoch": 0.8935474308898169, "grad_norm": 0.345703125, "learning_rate": 8.895412438587952e-07, "loss": 1.9973, "step": 27695 }, { "epoch": 0.8935796947436133, "grad_norm": 0.3359375, "learning_rate": 8.890074366307699e-07, "loss": 1.9607, "step": 27696 }, { "epoch": 0.8936119585974096, "grad_norm": 0.333984375, "learning_rate": 8.884737847264302e-07, "loss": 2.0255, "step": 27697 }, { "epoch": 0.893644222451206, "grad_norm": 0.33984375, "learning_rate": 8.879402881516496e-07, "loss": 2.0022, "step": 27698 }, { "epoch": 0.8936764863050023, "grad_norm": 0.3359375, "learning_rate": 8.874069469122953e-07, "loss": 1.978, "step": 27699 }, { "epoch": 0.8937087501587987, "grad_norm": 0.3359375, "learning_rate": 8.868737610142408e-07, "loss": 1.998, "step": 27700 }, { "epoch": 0.893741014012595, "grad_norm": 0.330078125, "learning_rate": 8.863407304633581e-07, "loss": 1.9941, "step": 27701 }, { "epoch": 0.8937732778663914, "grad_norm": 0.3359375, "learning_rate": 8.858078552655108e-07, "loss": 1.9627, "step": 27702 }, { "epoch": 0.8938055417201877, "grad_norm": 0.3359375, "learning_rate": 8.852751354265642e-07, "loss": 1.9824, "step": 27703 }, { "epoch": 0.8938378055739841, "grad_norm": 0.349609375, "learning_rate": 8.847425709523821e-07, "loss": 2.0034, "step": 27704 }, { "epoch": 0.8938700694277804, "grad_norm": 0.34765625, "learning_rate": 8.842101618488297e-07, "loss": 1.9884, "step": 27705 }, { "epoch": 0.8939023332815768, "grad_norm": 0.33984375, "learning_rate": 8.836779081217656e-07, "loss": 1.9701, "step": 27706 }, { "epoch": 0.893934597135373, "grad_norm": 0.3359375, "learning_rate": 8.831458097770451e-07, "loss": 1.9886, "step": 27707 }, { "epoch": 0.8939668609891694, "grad_norm": 0.357421875, "learning_rate": 8.826138668205303e-07, "loss": 1.9661, "step": 27708 }, { "epoch": 0.8939991248429657, "grad_norm": 0.328125, "learning_rate": 8.820820792580747e-07, "loss": 1.9914, "step": 27709 }, { "epoch": 0.8940313886967621, "grad_norm": 0.3359375, "learning_rate": 8.815504470955288e-07, "loss": 1.9707, "step": 27710 }, { "epoch": 0.8940636525505584, "grad_norm": 0.330078125, "learning_rate": 8.810189703387494e-07, "loss": 1.9864, "step": 27711 }, { "epoch": 0.8940959164043548, "grad_norm": 0.3359375, "learning_rate": 8.804876489935837e-07, "loss": 2.013, "step": 27712 }, { "epoch": 0.8941281802581511, "grad_norm": 0.345703125, "learning_rate": 8.799564830658785e-07, "loss": 1.9586, "step": 27713 }, { "epoch": 0.8941604441119475, "grad_norm": 0.330078125, "learning_rate": 8.794254725614858e-07, "loss": 1.9638, "step": 27714 }, { "epoch": 0.8941927079657439, "grad_norm": 0.3359375, "learning_rate": 8.788946174862478e-07, "loss": 1.9782, "step": 27715 }, { "epoch": 0.8942249718195402, "grad_norm": 0.33984375, "learning_rate": 8.783639178460045e-07, "loss": 1.9824, "step": 27716 }, { "epoch": 0.8942572356733366, "grad_norm": 0.326171875, "learning_rate": 8.778333736466016e-07, "loss": 1.955, "step": 27717 }, { "epoch": 0.8942894995271329, "grad_norm": 0.337890625, "learning_rate": 8.77302984893879e-07, "loss": 1.9633, "step": 27718 }, { "epoch": 0.8943217633809293, "grad_norm": 0.33984375, "learning_rate": 8.767727515936707e-07, "loss": 1.9826, "step": 27719 }, { "epoch": 0.8943540272347256, "grad_norm": 0.33203125, "learning_rate": 8.762426737518187e-07, "loss": 1.9894, "step": 27720 }, { "epoch": 0.894386291088522, "grad_norm": 0.341796875, "learning_rate": 8.757127513741547e-07, "loss": 1.9861, "step": 27721 }, { "epoch": 0.8944185549423183, "grad_norm": 0.328125, "learning_rate": 8.75182984466511e-07, "loss": 1.9606, "step": 27722 }, { "epoch": 0.8944508187961147, "grad_norm": 0.3359375, "learning_rate": 8.746533730347229e-07, "loss": 1.9717, "step": 27723 }, { "epoch": 0.8944830826499109, "grad_norm": 0.337890625, "learning_rate": 8.741239170846121e-07, "loss": 2.0227, "step": 27724 }, { "epoch": 0.8945153465037073, "grad_norm": 0.3359375, "learning_rate": 8.735946166220127e-07, "loss": 1.993, "step": 27725 }, { "epoch": 0.8945476103575036, "grad_norm": 0.3515625, "learning_rate": 8.730654716527531e-07, "loss": 1.9903, "step": 27726 }, { "epoch": 0.8945798742113, "grad_norm": 0.341796875, "learning_rate": 8.725364821826504e-07, "loss": 1.9576, "step": 27727 }, { "epoch": 0.8946121380650963, "grad_norm": 0.333984375, "learning_rate": 8.720076482175316e-07, "loss": 2.011, "step": 27728 }, { "epoch": 0.8946444019188927, "grad_norm": 0.3359375, "learning_rate": 8.714789697632203e-07, "loss": 1.9988, "step": 27729 }, { "epoch": 0.894676665772689, "grad_norm": 0.341796875, "learning_rate": 8.709504468255286e-07, "loss": 1.9471, "step": 27730 }, { "epoch": 0.8947089296264854, "grad_norm": 0.330078125, "learning_rate": 8.704220794102785e-07, "loss": 1.9875, "step": 27731 }, { "epoch": 0.8947411934802817, "grad_norm": 0.326171875, "learning_rate": 8.69893867523287e-07, "loss": 1.9732, "step": 27732 }, { "epoch": 0.8947734573340781, "grad_norm": 0.3359375, "learning_rate": 8.693658111703678e-07, "loss": 2.0202, "step": 27733 }, { "epoch": 0.8948057211878745, "grad_norm": 0.337890625, "learning_rate": 8.68837910357333e-07, "loss": 1.9984, "step": 27734 }, { "epoch": 0.8948379850416708, "grad_norm": 0.337890625, "learning_rate": 8.683101650899894e-07, "loss": 2.0127, "step": 27735 }, { "epoch": 0.8948702488954672, "grad_norm": 0.333984375, "learning_rate": 8.677825753741525e-07, "loss": 1.9998, "step": 27736 }, { "epoch": 0.8949025127492635, "grad_norm": 0.333984375, "learning_rate": 8.672551412156276e-07, "loss": 2.0063, "step": 27737 }, { "epoch": 0.8949347766030599, "grad_norm": 0.33203125, "learning_rate": 8.667278626202152e-07, "loss": 1.9528, "step": 27738 }, { "epoch": 0.8949670404568562, "grad_norm": 0.3359375, "learning_rate": 8.662007395937271e-07, "loss": 1.9928, "step": 27739 }, { "epoch": 0.8949993043106526, "grad_norm": 0.33203125, "learning_rate": 8.656737721419606e-07, "loss": 1.9631, "step": 27740 }, { "epoch": 0.8950315681644488, "grad_norm": 0.328125, "learning_rate": 8.651469602707157e-07, "loss": 1.9849, "step": 27741 }, { "epoch": 0.8950638320182452, "grad_norm": 0.337890625, "learning_rate": 8.646203039857947e-07, "loss": 1.9926, "step": 27742 }, { "epoch": 0.8950960958720415, "grad_norm": 0.330078125, "learning_rate": 8.640938032929912e-07, "loss": 1.956, "step": 27743 }, { "epoch": 0.8951283597258379, "grad_norm": 0.330078125, "learning_rate": 8.635674581981024e-07, "loss": 1.9889, "step": 27744 }, { "epoch": 0.8951606235796342, "grad_norm": 0.333984375, "learning_rate": 8.630412687069217e-07, "loss": 1.9814, "step": 27745 }, { "epoch": 0.8951928874334306, "grad_norm": 0.330078125, "learning_rate": 8.62515234825243e-07, "loss": 1.9886, "step": 27746 }, { "epoch": 0.8952251512872269, "grad_norm": 0.328125, "learning_rate": 8.619893565588499e-07, "loss": 1.9842, "step": 27747 }, { "epoch": 0.8952574151410233, "grad_norm": 0.330078125, "learning_rate": 8.614636339135396e-07, "loss": 1.9812, "step": 27748 }, { "epoch": 0.8952896789948196, "grad_norm": 0.328125, "learning_rate": 8.609380668950939e-07, "loss": 2.0037, "step": 27749 }, { "epoch": 0.895321942848616, "grad_norm": 0.33203125, "learning_rate": 8.604126555092967e-07, "loss": 2.0031, "step": 27750 }, { "epoch": 0.8953542067024123, "grad_norm": 0.3359375, "learning_rate": 8.59887399761935e-07, "loss": 1.9866, "step": 27751 }, { "epoch": 0.8953864705562087, "grad_norm": 0.3359375, "learning_rate": 8.593622996587891e-07, "loss": 1.9564, "step": 27752 }, { "epoch": 0.895418734410005, "grad_norm": 0.3359375, "learning_rate": 8.588373552056361e-07, "loss": 2.0028, "step": 27753 }, { "epoch": 0.8954509982638014, "grad_norm": 0.337890625, "learning_rate": 8.583125664082614e-07, "loss": 1.9946, "step": 27754 }, { "epoch": 0.8954832621175978, "grad_norm": 0.33203125, "learning_rate": 8.57787933272432e-07, "loss": 1.999, "step": 27755 }, { "epoch": 0.895515525971394, "grad_norm": 0.3359375, "learning_rate": 8.572634558039266e-07, "loss": 2.001, "step": 27756 }, { "epoch": 0.8955477898251905, "grad_norm": 0.333984375, "learning_rate": 8.567391340085257e-07, "loss": 1.9799, "step": 27757 }, { "epoch": 0.8955800536789867, "grad_norm": 0.337890625, "learning_rate": 8.562149678919879e-07, "loss": 1.9992, "step": 27758 }, { "epoch": 0.8956123175327831, "grad_norm": 0.330078125, "learning_rate": 8.556909574600902e-07, "loss": 1.9906, "step": 27759 }, { "epoch": 0.8956445813865794, "grad_norm": 0.33984375, "learning_rate": 8.551671027186031e-07, "loss": 1.9689, "step": 27760 }, { "epoch": 0.8956768452403758, "grad_norm": 0.330078125, "learning_rate": 8.546434036732837e-07, "loss": 1.985, "step": 27761 }, { "epoch": 0.8957091090941721, "grad_norm": 0.330078125, "learning_rate": 8.541198603299038e-07, "loss": 1.9821, "step": 27762 }, { "epoch": 0.8957413729479685, "grad_norm": 0.34375, "learning_rate": 8.535964726942274e-07, "loss": 2.0017, "step": 27763 }, { "epoch": 0.8957736368017648, "grad_norm": 0.33203125, "learning_rate": 8.530732407720082e-07, "loss": 1.9668, "step": 27764 }, { "epoch": 0.8958059006555612, "grad_norm": 0.333984375, "learning_rate": 8.525501645690132e-07, "loss": 1.9987, "step": 27765 }, { "epoch": 0.8958381645093575, "grad_norm": 0.33203125, "learning_rate": 8.520272440909926e-07, "loss": 1.9709, "step": 27766 }, { "epoch": 0.8958704283631539, "grad_norm": 0.326171875, "learning_rate": 8.515044793437088e-07, "loss": 1.9925, "step": 27767 }, { "epoch": 0.8959026922169502, "grad_norm": 0.326171875, "learning_rate": 8.509818703329153e-07, "loss": 2.0118, "step": 27768 }, { "epoch": 0.8959349560707466, "grad_norm": 0.333984375, "learning_rate": 8.504594170643593e-07, "loss": 2.0091, "step": 27769 }, { "epoch": 0.8959672199245429, "grad_norm": 0.3359375, "learning_rate": 8.499371195437977e-07, "loss": 1.9921, "step": 27770 }, { "epoch": 0.8959994837783393, "grad_norm": 0.34765625, "learning_rate": 8.494149777769777e-07, "loss": 1.9438, "step": 27771 }, { "epoch": 0.8960317476321356, "grad_norm": 0.3359375, "learning_rate": 8.488929917696431e-07, "loss": 1.9769, "step": 27772 }, { "epoch": 0.896064011485932, "grad_norm": 0.337890625, "learning_rate": 8.483711615275441e-07, "loss": 1.9927, "step": 27773 }, { "epoch": 0.8960962753397282, "grad_norm": 0.341796875, "learning_rate": 8.478494870564246e-07, "loss": 1.9769, "step": 27774 }, { "epoch": 0.8961285391935246, "grad_norm": 0.337890625, "learning_rate": 8.473279683620216e-07, "loss": 1.9836, "step": 27775 }, { "epoch": 0.896160803047321, "grad_norm": 0.33984375, "learning_rate": 8.468066054500823e-07, "loss": 2.0022, "step": 27776 }, { "epoch": 0.8961930669011173, "grad_norm": 0.36328125, "learning_rate": 8.462853983263419e-07, "loss": 1.9935, "step": 27777 }, { "epoch": 0.8962253307549137, "grad_norm": 0.345703125, "learning_rate": 8.457643469965359e-07, "loss": 1.9901, "step": 27778 }, { "epoch": 0.89625759460871, "grad_norm": 0.32421875, "learning_rate": 8.452434514664048e-07, "loss": 1.9769, "step": 27779 }, { "epoch": 0.8962898584625064, "grad_norm": 0.341796875, "learning_rate": 8.447227117416789e-07, "loss": 1.9954, "step": 27780 }, { "epoch": 0.8963221223163027, "grad_norm": 0.328125, "learning_rate": 8.44202127828087e-07, "loss": 1.9426, "step": 27781 }, { "epoch": 0.8963543861700991, "grad_norm": 0.34375, "learning_rate": 8.436816997313662e-07, "loss": 1.9836, "step": 27782 }, { "epoch": 0.8963866500238954, "grad_norm": 0.328125, "learning_rate": 8.431614274572419e-07, "loss": 2.013, "step": 27783 }, { "epoch": 0.8964189138776918, "grad_norm": 0.3359375, "learning_rate": 8.426413110114378e-07, "loss": 1.9908, "step": 27784 }, { "epoch": 0.8964511777314881, "grad_norm": 0.33984375, "learning_rate": 8.42121350399686e-07, "loss": 1.9858, "step": 27785 }, { "epoch": 0.8964834415852845, "grad_norm": 0.33203125, "learning_rate": 8.416015456277004e-07, "loss": 1.9929, "step": 27786 }, { "epoch": 0.8965157054390808, "grad_norm": 0.330078125, "learning_rate": 8.410818967012079e-07, "loss": 1.9955, "step": 27787 }, { "epoch": 0.8965479692928772, "grad_norm": 0.337890625, "learning_rate": 8.405624036259341e-07, "loss": 2.0031, "step": 27788 }, { "epoch": 0.8965802331466735, "grad_norm": 0.333984375, "learning_rate": 8.400430664075858e-07, "loss": 1.9821, "step": 27789 }, { "epoch": 0.8966124970004699, "grad_norm": 0.345703125, "learning_rate": 8.395238850518855e-07, "loss": 1.9632, "step": 27790 }, { "epoch": 0.8966447608542661, "grad_norm": 0.333984375, "learning_rate": 8.390048595645517e-07, "loss": 1.9801, "step": 27791 }, { "epoch": 0.8966770247080625, "grad_norm": 0.3359375, "learning_rate": 8.384859899512898e-07, "loss": 1.965, "step": 27792 }, { "epoch": 0.8967092885618588, "grad_norm": 0.337890625, "learning_rate": 8.379672762178136e-07, "loss": 1.988, "step": 27793 }, { "epoch": 0.8967415524156552, "grad_norm": 0.333984375, "learning_rate": 8.374487183698388e-07, "loss": 1.9751, "step": 27794 }, { "epoch": 0.8967738162694516, "grad_norm": 0.337890625, "learning_rate": 8.369303164130654e-07, "loss": 2.0042, "step": 27795 }, { "epoch": 0.8968060801232479, "grad_norm": 0.337890625, "learning_rate": 8.364120703532025e-07, "loss": 1.9709, "step": 27796 }, { "epoch": 0.8968383439770443, "grad_norm": 0.33984375, "learning_rate": 8.358939801959553e-07, "loss": 1.995, "step": 27797 }, { "epoch": 0.8968706078308406, "grad_norm": 0.33203125, "learning_rate": 8.353760459470245e-07, "loss": 1.9801, "step": 27798 }, { "epoch": 0.896902871684637, "grad_norm": 0.337890625, "learning_rate": 8.348582676121136e-07, "loss": 1.9945, "step": 27799 }, { "epoch": 0.8969351355384333, "grad_norm": 0.333984375, "learning_rate": 8.343406451969182e-07, "loss": 1.9991, "step": 27800 }, { "epoch": 0.8969673993922297, "grad_norm": 0.345703125, "learning_rate": 8.33823178707142e-07, "loss": 1.9598, "step": 27801 }, { "epoch": 0.896999663246026, "grad_norm": 0.337890625, "learning_rate": 8.333058681484773e-07, "loss": 1.9733, "step": 27802 }, { "epoch": 0.8970319270998224, "grad_norm": 0.333984375, "learning_rate": 8.32788713526616e-07, "loss": 1.9908, "step": 27803 }, { "epoch": 0.8970641909536187, "grad_norm": 0.333984375, "learning_rate": 8.322717148472552e-07, "loss": 1.9623, "step": 27804 }, { "epoch": 0.8970964548074151, "grad_norm": 0.34375, "learning_rate": 8.31754872116084e-07, "loss": 2.0001, "step": 27805 }, { "epoch": 0.8971287186612114, "grad_norm": 0.33203125, "learning_rate": 8.312381853387873e-07, "loss": 1.9829, "step": 27806 }, { "epoch": 0.8971609825150078, "grad_norm": 0.33203125, "learning_rate": 8.307216545210594e-07, "loss": 1.986, "step": 27807 }, { "epoch": 0.897193246368804, "grad_norm": 0.337890625, "learning_rate": 8.302052796685822e-07, "loss": 1.9962, "step": 27808 }, { "epoch": 0.8972255102226004, "grad_norm": 0.333984375, "learning_rate": 8.296890607870394e-07, "loss": 1.9818, "step": 27809 }, { "epoch": 0.8972577740763967, "grad_norm": 0.333984375, "learning_rate": 8.291729978821133e-07, "loss": 1.9891, "step": 27810 }, { "epoch": 0.8972900379301931, "grad_norm": 0.3359375, "learning_rate": 8.28657090959486e-07, "loss": 1.9944, "step": 27811 }, { "epoch": 0.8973223017839894, "grad_norm": 0.33203125, "learning_rate": 8.281413400248328e-07, "loss": 1.9673, "step": 27812 }, { "epoch": 0.8973545656377858, "grad_norm": 0.32421875, "learning_rate": 8.276257450838343e-07, "loss": 1.9956, "step": 27813 }, { "epoch": 0.8973868294915821, "grad_norm": 0.333984375, "learning_rate": 8.271103061421659e-07, "loss": 1.9991, "step": 27814 }, { "epoch": 0.8974190933453785, "grad_norm": 0.330078125, "learning_rate": 8.265950232054964e-07, "loss": 2.0055, "step": 27815 }, { "epoch": 0.8974513571991749, "grad_norm": 0.330078125, "learning_rate": 8.260798962795046e-07, "loss": 1.9892, "step": 27816 }, { "epoch": 0.8974836210529712, "grad_norm": 0.33203125, "learning_rate": 8.255649253698527e-07, "loss": 1.9764, "step": 27817 }, { "epoch": 0.8975158849067676, "grad_norm": 0.330078125, "learning_rate": 8.250501104822145e-07, "loss": 1.9681, "step": 27818 }, { "epoch": 0.8975481487605639, "grad_norm": 0.333984375, "learning_rate": 8.245354516222603e-07, "loss": 1.9823, "step": 27819 }, { "epoch": 0.8975804126143603, "grad_norm": 0.330078125, "learning_rate": 8.240209487956457e-07, "loss": 1.9635, "step": 27820 }, { "epoch": 0.8976126764681566, "grad_norm": 0.333984375, "learning_rate": 8.235066020080379e-07, "loss": 1.9831, "step": 27821 }, { "epoch": 0.897644940321953, "grad_norm": 0.33203125, "learning_rate": 8.229924112651038e-07, "loss": 2.0067, "step": 27822 }, { "epoch": 0.8976772041757493, "grad_norm": 0.345703125, "learning_rate": 8.224783765724942e-07, "loss": 1.9821, "step": 27823 }, { "epoch": 0.8977094680295457, "grad_norm": 0.341796875, "learning_rate": 8.219644979358742e-07, "loss": 1.9647, "step": 27824 }, { "epoch": 0.8977417318833419, "grad_norm": 0.326171875, "learning_rate": 8.214507753608979e-07, "loss": 1.9674, "step": 27825 }, { "epoch": 0.8977739957371383, "grad_norm": 0.33203125, "learning_rate": 8.209372088532175e-07, "loss": 2.0193, "step": 27826 }, { "epoch": 0.8978062595909346, "grad_norm": 0.345703125, "learning_rate": 8.204237984184915e-07, "loss": 1.9788, "step": 27827 }, { "epoch": 0.897838523444731, "grad_norm": 0.33203125, "learning_rate": 8.199105440623656e-07, "loss": 1.993, "step": 27828 }, { "epoch": 0.8978707872985273, "grad_norm": 0.3359375, "learning_rate": 8.193974457904918e-07, "loss": 1.9869, "step": 27829 }, { "epoch": 0.8979030511523237, "grad_norm": 0.337890625, "learning_rate": 8.188845036085191e-07, "loss": 2.0105, "step": 27830 }, { "epoch": 0.89793531500612, "grad_norm": 0.3359375, "learning_rate": 8.183717175220895e-07, "loss": 1.9745, "step": 27831 }, { "epoch": 0.8979675788599164, "grad_norm": 0.345703125, "learning_rate": 8.178590875368536e-07, "loss": 1.9799, "step": 27832 }, { "epoch": 0.8979998427137127, "grad_norm": 0.337890625, "learning_rate": 8.173466136584485e-07, "loss": 1.9976, "step": 27833 }, { "epoch": 0.8980321065675091, "grad_norm": 0.337890625, "learning_rate": 8.168342958925179e-07, "loss": 1.9618, "step": 27834 }, { "epoch": 0.8980643704213055, "grad_norm": 0.33203125, "learning_rate": 8.163221342447008e-07, "loss": 1.9983, "step": 27835 }, { "epoch": 0.8980966342751018, "grad_norm": 0.33203125, "learning_rate": 8.158101287206343e-07, "loss": 1.9971, "step": 27836 }, { "epoch": 0.8981288981288982, "grad_norm": 0.33203125, "learning_rate": 8.152982793259522e-07, "loss": 1.9862, "step": 27837 }, { "epoch": 0.8981611619826945, "grad_norm": 0.328125, "learning_rate": 8.147865860662934e-07, "loss": 2.0183, "step": 27838 }, { "epoch": 0.8981934258364909, "grad_norm": 0.3359375, "learning_rate": 8.142750489472867e-07, "loss": 1.9551, "step": 27839 }, { "epoch": 0.8982256896902872, "grad_norm": 0.330078125, "learning_rate": 8.137636679745625e-07, "loss": 1.9928, "step": 27840 }, { "epoch": 0.8982579535440836, "grad_norm": 0.330078125, "learning_rate": 8.132524431537514e-07, "loss": 1.9712, "step": 27841 }, { "epoch": 0.8982902173978798, "grad_norm": 0.333984375, "learning_rate": 8.127413744904805e-07, "loss": 1.9966, "step": 27842 }, { "epoch": 0.8983224812516762, "grad_norm": 0.3359375, "learning_rate": 8.12230461990372e-07, "loss": 2.0017, "step": 27843 }, { "epoch": 0.8983547451054725, "grad_norm": 0.33984375, "learning_rate": 8.117197056590547e-07, "loss": 2.0057, "step": 27844 }, { "epoch": 0.8983870089592689, "grad_norm": 0.337890625, "learning_rate": 8.112091055021492e-07, "loss": 1.9807, "step": 27845 }, { "epoch": 0.8984192728130652, "grad_norm": 0.328125, "learning_rate": 8.106986615252726e-07, "loss": 1.9865, "step": 27846 }, { "epoch": 0.8984515366668616, "grad_norm": 0.341796875, "learning_rate": 8.101883737340488e-07, "loss": 2.0265, "step": 27847 }, { "epoch": 0.8984838005206579, "grad_norm": 0.330078125, "learning_rate": 8.096782421340882e-07, "loss": 1.953, "step": 27848 }, { "epoch": 0.8985160643744543, "grad_norm": 0.330078125, "learning_rate": 8.091682667310097e-07, "loss": 1.9809, "step": 27849 }, { "epoch": 0.8985483282282506, "grad_norm": 0.33203125, "learning_rate": 8.086584475304304e-07, "loss": 1.9685, "step": 27850 }, { "epoch": 0.898580592082047, "grad_norm": 0.33203125, "learning_rate": 8.081487845379526e-07, "loss": 1.9882, "step": 27851 }, { "epoch": 0.8986128559358433, "grad_norm": 0.33984375, "learning_rate": 8.076392777591935e-07, "loss": 1.9856, "step": 27852 }, { "epoch": 0.8986451197896397, "grad_norm": 0.3671875, "learning_rate": 8.071299271997634e-07, "loss": 1.9631, "step": 27853 }, { "epoch": 0.898677383643436, "grad_norm": 0.333984375, "learning_rate": 8.066207328652614e-07, "loss": 1.9961, "step": 27854 }, { "epoch": 0.8987096474972324, "grad_norm": 0.345703125, "learning_rate": 8.061116947612962e-07, "loss": 1.9584, "step": 27855 }, { "epoch": 0.8987419113510288, "grad_norm": 0.357421875, "learning_rate": 8.056028128934733e-07, "loss": 1.9945, "step": 27856 }, { "epoch": 0.898774175204825, "grad_norm": 0.345703125, "learning_rate": 8.050940872673885e-07, "loss": 1.9966, "step": 27857 }, { "epoch": 0.8988064390586215, "grad_norm": 0.337890625, "learning_rate": 8.045855178886469e-07, "loss": 1.9785, "step": 27858 }, { "epoch": 0.8988387029124177, "grad_norm": 0.34765625, "learning_rate": 8.040771047628425e-07, "loss": 2.0115, "step": 27859 }, { "epoch": 0.8988709667662141, "grad_norm": 0.333984375, "learning_rate": 8.035688478955727e-07, "loss": 1.9941, "step": 27860 }, { "epoch": 0.8989032306200104, "grad_norm": 0.328125, "learning_rate": 8.030607472924345e-07, "loss": 1.9842, "step": 27861 }, { "epoch": 0.8989354944738068, "grad_norm": 0.333984375, "learning_rate": 8.025528029590185e-07, "loss": 1.9806, "step": 27862 }, { "epoch": 0.8989677583276031, "grad_norm": 0.33984375, "learning_rate": 8.020450149009151e-07, "loss": 1.9655, "step": 27863 }, { "epoch": 0.8990000221813995, "grad_norm": 0.3359375, "learning_rate": 8.015373831237149e-07, "loss": 1.9819, "step": 27864 }, { "epoch": 0.8990322860351958, "grad_norm": 0.333984375, "learning_rate": 8.010299076330052e-07, "loss": 2.0136, "step": 27865 }, { "epoch": 0.8990645498889922, "grad_norm": 0.3359375, "learning_rate": 8.005225884343731e-07, "loss": 1.9721, "step": 27866 }, { "epoch": 0.8990968137427885, "grad_norm": 0.33984375, "learning_rate": 8.000154255334008e-07, "loss": 1.9869, "step": 27867 }, { "epoch": 0.8991290775965849, "grad_norm": 0.333984375, "learning_rate": 7.995084189356705e-07, "loss": 1.9651, "step": 27868 }, { "epoch": 0.8991613414503812, "grad_norm": 0.333984375, "learning_rate": 7.990015686467661e-07, "loss": 1.9766, "step": 27869 }, { "epoch": 0.8991936053041776, "grad_norm": 0.330078125, "learning_rate": 7.984948746722647e-07, "loss": 1.9508, "step": 27870 }, { "epoch": 0.8992258691579739, "grad_norm": 0.328125, "learning_rate": 7.979883370177404e-07, "loss": 2.0025, "step": 27871 }, { "epoch": 0.8992581330117703, "grad_norm": 0.33203125, "learning_rate": 7.974819556887753e-07, "loss": 2.0071, "step": 27872 }, { "epoch": 0.8992903968655666, "grad_norm": 0.333984375, "learning_rate": 7.969757306909398e-07, "loss": 2.0002, "step": 27873 }, { "epoch": 0.899322660719363, "grad_norm": 0.3359375, "learning_rate": 7.964696620298045e-07, "loss": 1.9747, "step": 27874 }, { "epoch": 0.8993549245731592, "grad_norm": 0.345703125, "learning_rate": 7.959637497109434e-07, "loss": 1.926, "step": 27875 }, { "epoch": 0.8993871884269556, "grad_norm": 0.333984375, "learning_rate": 7.954579937399236e-07, "loss": 1.9927, "step": 27876 }, { "epoch": 0.899419452280752, "grad_norm": 0.333984375, "learning_rate": 7.949523941223091e-07, "loss": 1.972, "step": 27877 }, { "epoch": 0.8994517161345483, "grad_norm": 0.328125, "learning_rate": 7.94446950863672e-07, "loss": 1.9412, "step": 27878 }, { "epoch": 0.8994839799883447, "grad_norm": 0.341796875, "learning_rate": 7.939416639695696e-07, "loss": 1.9813, "step": 27879 }, { "epoch": 0.899516243842141, "grad_norm": 0.33203125, "learning_rate": 7.93436533445564e-07, "loss": 1.9714, "step": 27880 }, { "epoch": 0.8995485076959374, "grad_norm": 0.3359375, "learning_rate": 7.929315592972225e-07, "loss": 2.0431, "step": 27881 }, { "epoch": 0.8995807715497337, "grad_norm": 0.3359375, "learning_rate": 7.92426741530094e-07, "loss": 1.9815, "step": 27882 }, { "epoch": 0.8996130354035301, "grad_norm": 0.3359375, "learning_rate": 7.91922080149739e-07, "loss": 1.9817, "step": 27883 }, { "epoch": 0.8996452992573264, "grad_norm": 0.33203125, "learning_rate": 7.914175751617165e-07, "loss": 1.986, "step": 27884 }, { "epoch": 0.8996775631111228, "grad_norm": 0.337890625, "learning_rate": 7.909132265715735e-07, "loss": 2.0072, "step": 27885 }, { "epoch": 0.8997098269649191, "grad_norm": 0.326171875, "learning_rate": 7.904090343848658e-07, "loss": 1.998, "step": 27886 }, { "epoch": 0.8997420908187155, "grad_norm": 0.37890625, "learning_rate": 7.899049986071405e-07, "loss": 1.9984, "step": 27887 }, { "epoch": 0.8997743546725118, "grad_norm": 0.333984375, "learning_rate": 7.894011192439449e-07, "loss": 1.9966, "step": 27888 }, { "epoch": 0.8998066185263082, "grad_norm": 0.33203125, "learning_rate": 7.888973963008294e-07, "loss": 1.988, "step": 27889 }, { "epoch": 0.8998388823801045, "grad_norm": 0.34375, "learning_rate": 7.883938297833382e-07, "loss": 1.9839, "step": 27890 }, { "epoch": 0.8998711462339009, "grad_norm": 0.33203125, "learning_rate": 7.878904196970083e-07, "loss": 1.995, "step": 27891 }, { "epoch": 0.8999034100876971, "grad_norm": 0.328125, "learning_rate": 7.873871660473869e-07, "loss": 1.9564, "step": 27892 }, { "epoch": 0.8999356739414935, "grad_norm": 0.330078125, "learning_rate": 7.868840688400114e-07, "loss": 1.9905, "step": 27893 }, { "epoch": 0.8999679377952898, "grad_norm": 0.33984375, "learning_rate": 7.863811280804189e-07, "loss": 1.9811, "step": 27894 }, { "epoch": 0.9000002016490862, "grad_norm": 0.3359375, "learning_rate": 7.858783437741468e-07, "loss": 1.9901, "step": 27895 }, { "epoch": 0.9000324655028826, "grad_norm": 0.33203125, "learning_rate": 7.853757159267305e-07, "loss": 1.9725, "step": 27896 }, { "epoch": 0.9000647293566789, "grad_norm": 0.3359375, "learning_rate": 7.848732445436973e-07, "loss": 2.0045, "step": 27897 }, { "epoch": 0.9000969932104753, "grad_norm": 0.419921875, "learning_rate": 7.843709296305828e-07, "loss": 1.9864, "step": 27898 }, { "epoch": 0.9001292570642716, "grad_norm": 0.349609375, "learning_rate": 7.838687711929143e-07, "loss": 1.9591, "step": 27899 }, { "epoch": 0.900161520918068, "grad_norm": 0.341796875, "learning_rate": 7.833667692362223e-07, "loss": 1.9734, "step": 27900 }, { "epoch": 0.9001937847718643, "grad_norm": 0.328125, "learning_rate": 7.82864923766029e-07, "loss": 2.0188, "step": 27901 }, { "epoch": 0.9002260486256607, "grad_norm": 0.349609375, "learning_rate": 7.823632347878567e-07, "loss": 1.9977, "step": 27902 }, { "epoch": 0.900258312479457, "grad_norm": 0.341796875, "learning_rate": 7.818617023072328e-07, "loss": 1.9862, "step": 27903 }, { "epoch": 0.9002905763332534, "grad_norm": 0.333984375, "learning_rate": 7.813603263296759e-07, "loss": 1.9857, "step": 27904 }, { "epoch": 0.9003228401870497, "grad_norm": 0.353515625, "learning_rate": 7.808591068607018e-07, "loss": 1.9824, "step": 27905 }, { "epoch": 0.9003551040408461, "grad_norm": 0.333984375, "learning_rate": 7.803580439058327e-07, "loss": 1.9964, "step": 27906 }, { "epoch": 0.9003873678946424, "grad_norm": 0.369140625, "learning_rate": 7.798571374705776e-07, "loss": 1.9664, "step": 27907 }, { "epoch": 0.9004196317484388, "grad_norm": 0.33984375, "learning_rate": 7.793563875604553e-07, "loss": 1.9659, "step": 27908 }, { "epoch": 0.900451895602235, "grad_norm": 0.337890625, "learning_rate": 7.788557941809781e-07, "loss": 1.9761, "step": 27909 }, { "epoch": 0.9004841594560314, "grad_norm": 0.341796875, "learning_rate": 7.783553573376501e-07, "loss": 1.9965, "step": 27910 }, { "epoch": 0.9005164233098277, "grad_norm": 0.345703125, "learning_rate": 7.778550770359849e-07, "loss": 1.998, "step": 27911 }, { "epoch": 0.9005486871636241, "grad_norm": 0.3359375, "learning_rate": 7.773549532814916e-07, "loss": 2.0121, "step": 27912 }, { "epoch": 0.9005809510174204, "grad_norm": 0.33984375, "learning_rate": 7.768549860796676e-07, "loss": 2.0169, "step": 27913 }, { "epoch": 0.9006132148712168, "grad_norm": 0.341796875, "learning_rate": 7.763551754360216e-07, "loss": 1.9744, "step": 27914 }, { "epoch": 0.9006454787250131, "grad_norm": 0.333984375, "learning_rate": 7.758555213560559e-07, "loss": 1.9832, "step": 27915 }, { "epoch": 0.9006777425788095, "grad_norm": 0.33203125, "learning_rate": 7.753560238452645e-07, "loss": 2.0004, "step": 27916 }, { "epoch": 0.9007100064326059, "grad_norm": 0.34375, "learning_rate": 7.74856682909153e-07, "loss": 1.9714, "step": 27917 }, { "epoch": 0.9007422702864022, "grad_norm": 0.341796875, "learning_rate": 7.743574985532121e-07, "loss": 1.9655, "step": 27918 }, { "epoch": 0.9007745341401986, "grad_norm": 0.333984375, "learning_rate": 7.738584707829371e-07, "loss": 1.9996, "step": 27919 }, { "epoch": 0.9008067979939949, "grad_norm": 0.330078125, "learning_rate": 7.733595996038239e-07, "loss": 1.996, "step": 27920 }, { "epoch": 0.9008390618477913, "grad_norm": 0.333984375, "learning_rate": 7.72860885021363e-07, "loss": 1.9676, "step": 27921 }, { "epoch": 0.9008713257015876, "grad_norm": 0.345703125, "learning_rate": 7.723623270410402e-07, "loss": 1.9872, "step": 27922 }, { "epoch": 0.900903589555384, "grad_norm": 0.33984375, "learning_rate": 7.718639256683491e-07, "loss": 1.9367, "step": 27923 }, { "epoch": 0.9009358534091803, "grad_norm": 0.333984375, "learning_rate": 7.713656809087722e-07, "loss": 1.9924, "step": 27924 }, { "epoch": 0.9009681172629767, "grad_norm": 0.33203125, "learning_rate": 7.708675927677916e-07, "loss": 1.962, "step": 27925 }, { "epoch": 0.9010003811167729, "grad_norm": 0.337890625, "learning_rate": 7.703696612508948e-07, "loss": 1.9768, "step": 27926 }, { "epoch": 0.9010326449705693, "grad_norm": 0.34375, "learning_rate": 7.698718863635606e-07, "loss": 1.985, "step": 27927 }, { "epoch": 0.9010649088243656, "grad_norm": 0.330078125, "learning_rate": 7.693742681112664e-07, "loss": 1.9953, "step": 27928 }, { "epoch": 0.901097172678162, "grad_norm": 0.3359375, "learning_rate": 7.688768064994928e-07, "loss": 1.992, "step": 27929 }, { "epoch": 0.9011294365319583, "grad_norm": 0.3515625, "learning_rate": 7.683795015337137e-07, "loss": 1.9762, "step": 27930 }, { "epoch": 0.9011617003857547, "grad_norm": 0.333984375, "learning_rate": 7.678823532194013e-07, "loss": 1.9812, "step": 27931 }, { "epoch": 0.901193964239551, "grad_norm": 0.34375, "learning_rate": 7.673853615620329e-07, "loss": 1.9562, "step": 27932 }, { "epoch": 0.9012262280933474, "grad_norm": 0.33984375, "learning_rate": 7.668885265670728e-07, "loss": 2.0074, "step": 27933 }, { "epoch": 0.9012584919471437, "grad_norm": 0.3359375, "learning_rate": 7.663918482399961e-07, "loss": 1.9825, "step": 27934 }, { "epoch": 0.9012907558009401, "grad_norm": 0.328125, "learning_rate": 7.658953265862672e-07, "loss": 2.0091, "step": 27935 }, { "epoch": 0.9013230196547364, "grad_norm": 0.333984375, "learning_rate": 7.653989616113483e-07, "loss": 1.9772, "step": 27936 }, { "epoch": 0.9013552835085328, "grad_norm": 0.337890625, "learning_rate": 7.649027533207082e-07, "loss": 1.9749, "step": 27937 }, { "epoch": 0.9013875473623292, "grad_norm": 0.341796875, "learning_rate": 7.644067017198041e-07, "loss": 2.0067, "step": 27938 }, { "epoch": 0.9014198112161255, "grad_norm": 0.337890625, "learning_rate": 7.639108068140987e-07, "loss": 1.9622, "step": 27939 }, { "epoch": 0.9014520750699219, "grad_norm": 0.34765625, "learning_rate": 7.634150686090541e-07, "loss": 1.9832, "step": 27940 }, { "epoch": 0.9014843389237182, "grad_norm": 0.3359375, "learning_rate": 7.629194871101175e-07, "loss": 2.0036, "step": 27941 }, { "epoch": 0.9015166027775146, "grad_norm": 0.33984375, "learning_rate": 7.624240623227514e-07, "loss": 2.0223, "step": 27942 }, { "epoch": 0.9015488666313108, "grad_norm": 0.337890625, "learning_rate": 7.619287942524112e-07, "loss": 1.9674, "step": 27943 }, { "epoch": 0.9015811304851072, "grad_norm": 0.330078125, "learning_rate": 7.614336829045393e-07, "loss": 1.9914, "step": 27944 }, { "epoch": 0.9016133943389035, "grad_norm": 0.337890625, "learning_rate": 7.609387282845914e-07, "loss": 2.0034, "step": 27945 }, { "epoch": 0.9016456581926999, "grad_norm": 0.337890625, "learning_rate": 7.604439303980182e-07, "loss": 1.9554, "step": 27946 }, { "epoch": 0.9016779220464962, "grad_norm": 0.328125, "learning_rate": 7.599492892502602e-07, "loss": 1.997, "step": 27947 }, { "epoch": 0.9017101859002926, "grad_norm": 0.353515625, "learning_rate": 7.594548048467648e-07, "loss": 1.9865, "step": 27948 }, { "epoch": 0.9017424497540889, "grad_norm": 0.34375, "learning_rate": 7.589604771929759e-07, "loss": 2.007, "step": 27949 }, { "epoch": 0.9017747136078853, "grad_norm": 0.330078125, "learning_rate": 7.584663062943326e-07, "loss": 2.013, "step": 27950 }, { "epoch": 0.9018069774616816, "grad_norm": 0.330078125, "learning_rate": 7.579722921562754e-07, "loss": 1.9838, "step": 27951 }, { "epoch": 0.901839241315478, "grad_norm": 0.33203125, "learning_rate": 7.574784347842434e-07, "loss": 2.0104, "step": 27952 }, { "epoch": 0.9018715051692743, "grad_norm": 0.337890625, "learning_rate": 7.569847341836688e-07, "loss": 2.008, "step": 27953 }, { "epoch": 0.9019037690230707, "grad_norm": 0.33203125, "learning_rate": 7.564911903599908e-07, "loss": 1.9717, "step": 27954 }, { "epoch": 0.901936032876867, "grad_norm": 0.330078125, "learning_rate": 7.559978033186382e-07, "loss": 1.977, "step": 27955 }, { "epoch": 0.9019682967306634, "grad_norm": 0.33203125, "learning_rate": 7.555045730650417e-07, "loss": 1.9895, "step": 27956 }, { "epoch": 0.9020005605844598, "grad_norm": 0.333984375, "learning_rate": 7.550114996046354e-07, "loss": 2.0244, "step": 27957 }, { "epoch": 0.902032824438256, "grad_norm": 0.3359375, "learning_rate": 7.545185829428414e-07, "loss": 1.9876, "step": 27958 }, { "epoch": 0.9020650882920525, "grad_norm": 0.33984375, "learning_rate": 7.540258230850855e-07, "loss": 1.9638, "step": 27959 }, { "epoch": 0.9020973521458487, "grad_norm": 0.333984375, "learning_rate": 7.535332200367967e-07, "loss": 1.9844, "step": 27960 }, { "epoch": 0.9021296159996451, "grad_norm": 0.333984375, "learning_rate": 7.530407738033924e-07, "loss": 2.0137, "step": 27961 }, { "epoch": 0.9021618798534414, "grad_norm": 0.333984375, "learning_rate": 7.52548484390293e-07, "loss": 1.9811, "step": 27962 }, { "epoch": 0.9021941437072378, "grad_norm": 0.34375, "learning_rate": 7.520563518029211e-07, "loss": 1.9857, "step": 27963 }, { "epoch": 0.9022264075610341, "grad_norm": 0.33203125, "learning_rate": 7.51564376046689e-07, "loss": 1.9971, "step": 27964 }, { "epoch": 0.9022586714148305, "grad_norm": 0.32421875, "learning_rate": 7.510725571270155e-07, "loss": 2.0204, "step": 27965 }, { "epoch": 0.9022909352686268, "grad_norm": 0.328125, "learning_rate": 7.505808950493148e-07, "loss": 2.0102, "step": 27966 }, { "epoch": 0.9023231991224232, "grad_norm": 0.330078125, "learning_rate": 7.500893898189942e-07, "loss": 1.9682, "step": 27967 }, { "epoch": 0.9023554629762195, "grad_norm": 0.3359375, "learning_rate": 7.49598041441471e-07, "loss": 1.9836, "step": 27968 }, { "epoch": 0.9023877268300159, "grad_norm": 0.333984375, "learning_rate": 7.491068499221459e-07, "loss": 2.0139, "step": 27969 }, { "epoch": 0.9024199906838122, "grad_norm": 0.32421875, "learning_rate": 7.48615815266428e-07, "loss": 1.9856, "step": 27970 }, { "epoch": 0.9024522545376086, "grad_norm": 0.328125, "learning_rate": 7.481249374797277e-07, "loss": 1.9574, "step": 27971 }, { "epoch": 0.9024845183914049, "grad_norm": 0.326171875, "learning_rate": 7.47634216567441e-07, "loss": 1.9703, "step": 27972 }, { "epoch": 0.9025167822452013, "grad_norm": 0.333984375, "learning_rate": 7.471436525349701e-07, "loss": 1.9569, "step": 27973 }, { "epoch": 0.9025490460989976, "grad_norm": 0.333984375, "learning_rate": 7.466532453877222e-07, "loss": 1.9855, "step": 27974 }, { "epoch": 0.902581309952794, "grad_norm": 0.330078125, "learning_rate": 7.461629951310867e-07, "loss": 1.9671, "step": 27975 }, { "epoch": 0.9026135738065902, "grad_norm": 0.345703125, "learning_rate": 7.456729017704639e-07, "loss": 2.0032, "step": 27976 }, { "epoch": 0.9026458376603866, "grad_norm": 0.337890625, "learning_rate": 7.45182965311248e-07, "loss": 1.9888, "step": 27977 }, { "epoch": 0.902678101514183, "grad_norm": 0.341796875, "learning_rate": 7.446931857588313e-07, "loss": 2.0263, "step": 27978 }, { "epoch": 0.9027103653679793, "grad_norm": 0.333984375, "learning_rate": 7.442035631186061e-07, "loss": 2.0001, "step": 27979 }, { "epoch": 0.9027426292217757, "grad_norm": 0.349609375, "learning_rate": 7.437140973959599e-07, "loss": 1.9827, "step": 27980 }, { "epoch": 0.902774893075572, "grad_norm": 0.341796875, "learning_rate": 7.432247885962817e-07, "loss": 1.9766, "step": 27981 }, { "epoch": 0.9028071569293684, "grad_norm": 0.337890625, "learning_rate": 7.427356367249572e-07, "loss": 1.9872, "step": 27982 }, { "epoch": 0.9028394207831647, "grad_norm": 0.328125, "learning_rate": 7.422466417873719e-07, "loss": 2.004, "step": 27983 }, { "epoch": 0.9028716846369611, "grad_norm": 0.337890625, "learning_rate": 7.41757803788905e-07, "loss": 2.0038, "step": 27984 }, { "epoch": 0.9029039484907574, "grad_norm": 0.34375, "learning_rate": 7.412691227349405e-07, "loss": 2.0017, "step": 27985 }, { "epoch": 0.9029362123445538, "grad_norm": 0.345703125, "learning_rate": 7.407805986308574e-07, "loss": 2.0063, "step": 27986 }, { "epoch": 0.9029684761983501, "grad_norm": 0.3359375, "learning_rate": 7.402922314820298e-07, "loss": 2.0069, "step": 27987 }, { "epoch": 0.9030007400521465, "grad_norm": 0.33984375, "learning_rate": 7.398040212938368e-07, "loss": 2.0193, "step": 27988 }, { "epoch": 0.9030330039059428, "grad_norm": 0.34765625, "learning_rate": 7.393159680716522e-07, "loss": 1.9901, "step": 27989 }, { "epoch": 0.9030652677597392, "grad_norm": 0.3359375, "learning_rate": 7.388280718208434e-07, "loss": 1.994, "step": 27990 }, { "epoch": 0.9030975316135355, "grad_norm": 0.33203125, "learning_rate": 7.383403325467863e-07, "loss": 1.9846, "step": 27991 }, { "epoch": 0.9031297954673319, "grad_norm": 0.33203125, "learning_rate": 7.378527502548483e-07, "loss": 2.004, "step": 27992 }, { "epoch": 0.9031620593211281, "grad_norm": 0.33984375, "learning_rate": 7.373653249503931e-07, "loss": 1.9778, "step": 27993 }, { "epoch": 0.9031943231749245, "grad_norm": 0.33203125, "learning_rate": 7.368780566387917e-07, "loss": 1.9867, "step": 27994 }, { "epoch": 0.9032265870287208, "grad_norm": 0.333984375, "learning_rate": 7.363909453254031e-07, "loss": 2.003, "step": 27995 }, { "epoch": 0.9032588508825172, "grad_norm": 0.3359375, "learning_rate": 7.359039910155879e-07, "loss": 1.9969, "step": 27996 }, { "epoch": 0.9032911147363136, "grad_norm": 0.345703125, "learning_rate": 7.35417193714712e-07, "loss": 2.0076, "step": 27997 }, { "epoch": 0.9033233785901099, "grad_norm": 0.333984375, "learning_rate": 7.349305534281275e-07, "loss": 1.9625, "step": 27998 }, { "epoch": 0.9033556424439063, "grad_norm": 0.333984375, "learning_rate": 7.34444070161197e-07, "loss": 1.9829, "step": 27999 }, { "epoch": 0.9033879062977026, "grad_norm": 0.337890625, "learning_rate": 7.339577439192696e-07, "loss": 2.0011, "step": 28000 }, { "epoch": 0.903420170151499, "grad_norm": 0.328125, "learning_rate": 7.334715747076992e-07, "loss": 1.9829, "step": 28001 }, { "epoch": 0.9034524340052953, "grad_norm": 0.33203125, "learning_rate": 7.329855625318449e-07, "loss": 1.9848, "step": 28002 }, { "epoch": 0.9034846978590917, "grad_norm": 0.333984375, "learning_rate": 7.324997073970457e-07, "loss": 2.0079, "step": 28003 }, { "epoch": 0.903516961712888, "grad_norm": 0.341796875, "learning_rate": 7.320140093086542e-07, "loss": 1.9958, "step": 28004 }, { "epoch": 0.9035492255666844, "grad_norm": 0.326171875, "learning_rate": 7.315284682720192e-07, "loss": 1.9556, "step": 28005 }, { "epoch": 0.9035814894204807, "grad_norm": 0.3359375, "learning_rate": 7.310430842924798e-07, "loss": 2.0108, "step": 28006 }, { "epoch": 0.9036137532742771, "grad_norm": 0.328125, "learning_rate": 7.305578573753852e-07, "loss": 1.9996, "step": 28007 }, { "epoch": 0.9036460171280734, "grad_norm": 0.3359375, "learning_rate": 7.30072787526071e-07, "loss": 1.9914, "step": 28008 }, { "epoch": 0.9036782809818698, "grad_norm": 0.33203125, "learning_rate": 7.295878747498763e-07, "loss": 1.976, "step": 28009 }, { "epoch": 0.903710544835666, "grad_norm": 0.3359375, "learning_rate": 7.291031190521436e-07, "loss": 2.0194, "step": 28010 }, { "epoch": 0.9037428086894624, "grad_norm": 0.3359375, "learning_rate": 7.286185204382051e-07, "loss": 1.984, "step": 28011 }, { "epoch": 0.9037750725432587, "grad_norm": 0.326171875, "learning_rate": 7.281340789133933e-07, "loss": 2.0023, "step": 28012 }, { "epoch": 0.9038073363970551, "grad_norm": 0.326171875, "learning_rate": 7.276497944830474e-07, "loss": 1.9976, "step": 28013 }, { "epoch": 0.9038396002508514, "grad_norm": 0.333984375, "learning_rate": 7.271656671524912e-07, "loss": 2.0186, "step": 28014 }, { "epoch": 0.9038718641046478, "grad_norm": 0.333984375, "learning_rate": 7.26681696927054e-07, "loss": 1.9952, "step": 28015 }, { "epoch": 0.9039041279584441, "grad_norm": 0.333984375, "learning_rate": 7.26197883812068e-07, "loss": 1.9728, "step": 28016 }, { "epoch": 0.9039363918122405, "grad_norm": 0.345703125, "learning_rate": 7.25714227812856e-07, "loss": 1.9871, "step": 28017 }, { "epoch": 0.9039686556660369, "grad_norm": 0.33203125, "learning_rate": 7.252307289347382e-07, "loss": 1.9412, "step": 28018 }, { "epoch": 0.9040009195198332, "grad_norm": 0.33203125, "learning_rate": 7.24747387183044e-07, "loss": 2.0085, "step": 28019 }, { "epoch": 0.9040331833736296, "grad_norm": 0.333984375, "learning_rate": 7.242642025630874e-07, "loss": 1.999, "step": 28020 }, { "epoch": 0.9040654472274259, "grad_norm": 0.3359375, "learning_rate": 7.237811750801876e-07, "loss": 2.0148, "step": 28021 }, { "epoch": 0.9040977110812223, "grad_norm": 0.333984375, "learning_rate": 7.232983047396652e-07, "loss": 1.9677, "step": 28022 }, { "epoch": 0.9041299749350186, "grad_norm": 0.333984375, "learning_rate": 7.228155915468343e-07, "loss": 1.9973, "step": 28023 }, { "epoch": 0.904162238788815, "grad_norm": 0.328125, "learning_rate": 7.223330355070024e-07, "loss": 2.01, "step": 28024 }, { "epoch": 0.9041945026426113, "grad_norm": 0.33203125, "learning_rate": 7.218506366254901e-07, "loss": 1.961, "step": 28025 }, { "epoch": 0.9042267664964077, "grad_norm": 0.330078125, "learning_rate": 7.213683949076016e-07, "loss": 1.9859, "step": 28026 }, { "epoch": 0.9042590303502039, "grad_norm": 0.333984375, "learning_rate": 7.208863103586444e-07, "loss": 2.015, "step": 28027 }, { "epoch": 0.9042912942040003, "grad_norm": 0.328125, "learning_rate": 7.204043829839291e-07, "loss": 1.9904, "step": 28028 }, { "epoch": 0.9043235580577966, "grad_norm": 0.33203125, "learning_rate": 7.199226127887582e-07, "loss": 1.9587, "step": 28029 }, { "epoch": 0.904355821911593, "grad_norm": 0.33984375, "learning_rate": 7.19440999778434e-07, "loss": 1.9577, "step": 28030 }, { "epoch": 0.9043880857653893, "grad_norm": 0.326171875, "learning_rate": 7.189595439582575e-07, "loss": 2.0, "step": 28031 }, { "epoch": 0.9044203496191857, "grad_norm": 0.33984375, "learning_rate": 7.184782453335275e-07, "loss": 2.0131, "step": 28032 }, { "epoch": 0.904452613472982, "grad_norm": 0.328125, "learning_rate": 7.179971039095484e-07, "loss": 1.9367, "step": 28033 }, { "epoch": 0.9044848773267784, "grad_norm": 0.333984375, "learning_rate": 7.175161196916091e-07, "loss": 2.0217, "step": 28034 }, { "epoch": 0.9045171411805747, "grad_norm": 0.328125, "learning_rate": 7.170352926850038e-07, "loss": 1.9756, "step": 28035 }, { "epoch": 0.9045494050343711, "grad_norm": 0.357421875, "learning_rate": 7.165546228950331e-07, "loss": 2.0027, "step": 28036 }, { "epoch": 0.9045816688881674, "grad_norm": 0.330078125, "learning_rate": 7.160741103269763e-07, "loss": 1.999, "step": 28037 }, { "epoch": 0.9046139327419638, "grad_norm": 0.33203125, "learning_rate": 7.155937549861325e-07, "loss": 1.9718, "step": 28038 }, { "epoch": 0.9046461965957602, "grad_norm": 0.34765625, "learning_rate": 7.151135568777839e-07, "loss": 1.9957, "step": 28039 }, { "epoch": 0.9046784604495565, "grad_norm": 0.37109375, "learning_rate": 7.146335160072149e-07, "loss": 1.9859, "step": 28040 }, { "epoch": 0.9047107243033529, "grad_norm": 0.33203125, "learning_rate": 7.141536323797143e-07, "loss": 1.978, "step": 28041 }, { "epoch": 0.9047429881571492, "grad_norm": 0.337890625, "learning_rate": 7.136739060005631e-07, "loss": 1.9926, "step": 28042 }, { "epoch": 0.9047752520109456, "grad_norm": 0.333984375, "learning_rate": 7.131943368750371e-07, "loss": 1.992, "step": 28043 }, { "epoch": 0.9048075158647418, "grad_norm": 0.33984375, "learning_rate": 7.127149250084203e-07, "loss": 2.0034, "step": 28044 }, { "epoch": 0.9048397797185382, "grad_norm": 0.333984375, "learning_rate": 7.122356704059884e-07, "loss": 2.0066, "step": 28045 }, { "epoch": 0.9048720435723345, "grad_norm": 0.3359375, "learning_rate": 7.117565730730157e-07, "loss": 1.9889, "step": 28046 }, { "epoch": 0.9049043074261309, "grad_norm": 0.33203125, "learning_rate": 7.112776330147763e-07, "loss": 1.9754, "step": 28047 }, { "epoch": 0.9049365712799272, "grad_norm": 0.3359375, "learning_rate": 7.107988502365425e-07, "loss": 1.974, "step": 28048 }, { "epoch": 0.9049688351337236, "grad_norm": 0.3359375, "learning_rate": 7.103202247435819e-07, "loss": 2.0371, "step": 28049 }, { "epoch": 0.9050010989875199, "grad_norm": 0.3359375, "learning_rate": 7.098417565411685e-07, "loss": 1.9889, "step": 28050 }, { "epoch": 0.9050333628413163, "grad_norm": 0.3359375, "learning_rate": 7.093634456345632e-07, "loss": 1.9879, "step": 28051 }, { "epoch": 0.9050656266951126, "grad_norm": 0.33203125, "learning_rate": 7.088852920290317e-07, "loss": 1.9901, "step": 28052 }, { "epoch": 0.905097890548909, "grad_norm": 0.328125, "learning_rate": 7.084072957298416e-07, "loss": 1.9941, "step": 28053 }, { "epoch": 0.9051301544027053, "grad_norm": 0.33984375, "learning_rate": 7.0792945674225e-07, "loss": 2.0076, "step": 28054 }, { "epoch": 0.9051624182565017, "grad_norm": 0.333984375, "learning_rate": 7.074517750715165e-07, "loss": 1.9951, "step": 28055 }, { "epoch": 0.905194682110298, "grad_norm": 0.341796875, "learning_rate": 7.069742507229016e-07, "loss": 2.0098, "step": 28056 }, { "epoch": 0.9052269459640944, "grad_norm": 0.337890625, "learning_rate": 7.064968837016611e-07, "loss": 1.9873, "step": 28057 }, { "epoch": 0.9052592098178908, "grad_norm": 0.33984375, "learning_rate": 7.060196740130493e-07, "loss": 2.0139, "step": 28058 }, { "epoch": 0.905291473671687, "grad_norm": 0.337890625, "learning_rate": 7.055426216623151e-07, "loss": 1.9774, "step": 28059 }, { "epoch": 0.9053237375254835, "grad_norm": 0.33203125, "learning_rate": 7.050657266547161e-07, "loss": 1.9864, "step": 28060 }, { "epoch": 0.9053560013792797, "grad_norm": 0.34375, "learning_rate": 7.045889889954982e-07, "loss": 2.0074, "step": 28061 }, { "epoch": 0.9053882652330761, "grad_norm": 0.337890625, "learning_rate": 7.041124086899053e-07, "loss": 1.9647, "step": 28062 }, { "epoch": 0.9054205290868724, "grad_norm": 0.333984375, "learning_rate": 7.036359857431901e-07, "loss": 1.9612, "step": 28063 }, { "epoch": 0.9054527929406688, "grad_norm": 0.326171875, "learning_rate": 7.031597201605966e-07, "loss": 1.9478, "step": 28064 }, { "epoch": 0.9054850567944651, "grad_norm": 0.3359375, "learning_rate": 7.026836119473589e-07, "loss": 1.9823, "step": 28065 }, { "epoch": 0.9055173206482615, "grad_norm": 0.32421875, "learning_rate": 7.022076611087247e-07, "loss": 2.0124, "step": 28066 }, { "epoch": 0.9055495845020578, "grad_norm": 0.330078125, "learning_rate": 7.017318676499363e-07, "loss": 1.9657, "step": 28067 }, { "epoch": 0.9055818483558542, "grad_norm": 0.333984375, "learning_rate": 7.012562315762211e-07, "loss": 1.9989, "step": 28068 }, { "epoch": 0.9056141122096505, "grad_norm": 0.328125, "learning_rate": 7.0078075289282e-07, "loss": 2.0077, "step": 28069 }, { "epoch": 0.9056463760634469, "grad_norm": 0.3359375, "learning_rate": 7.003054316049673e-07, "loss": 1.9841, "step": 28070 }, { "epoch": 0.9056786399172432, "grad_norm": 0.3359375, "learning_rate": 6.998302677178919e-07, "loss": 1.9993, "step": 28071 }, { "epoch": 0.9057109037710396, "grad_norm": 0.33203125, "learning_rate": 6.993552612368281e-07, "loss": 1.9716, "step": 28072 }, { "epoch": 0.9057431676248359, "grad_norm": 0.337890625, "learning_rate": 6.988804121670034e-07, "loss": 1.9913, "step": 28073 }, { "epoch": 0.9057754314786323, "grad_norm": 0.341796875, "learning_rate": 6.984057205136402e-07, "loss": 2.002, "step": 28074 }, { "epoch": 0.9058076953324286, "grad_norm": 0.328125, "learning_rate": 6.979311862819693e-07, "loss": 1.9988, "step": 28075 }, { "epoch": 0.905839959186225, "grad_norm": 0.34375, "learning_rate": 6.974568094772116e-07, "loss": 1.9821, "step": 28076 }, { "epoch": 0.9058722230400212, "grad_norm": 0.333984375, "learning_rate": 6.969825901045862e-07, "loss": 1.9654, "step": 28077 }, { "epoch": 0.9059044868938176, "grad_norm": 0.33203125, "learning_rate": 6.965085281693173e-07, "loss": 1.9722, "step": 28078 }, { "epoch": 0.905936750747614, "grad_norm": 0.3359375, "learning_rate": 6.960346236766224e-07, "loss": 1.975, "step": 28079 }, { "epoch": 0.9059690146014103, "grad_norm": 0.33984375, "learning_rate": 6.95560876631714e-07, "loss": 1.9854, "step": 28080 }, { "epoch": 0.9060012784552067, "grad_norm": 0.33984375, "learning_rate": 6.950872870398111e-07, "loss": 1.9992, "step": 28081 }, { "epoch": 0.906033542309003, "grad_norm": 0.3359375, "learning_rate": 6.946138549061265e-07, "loss": 1.9878, "step": 28082 }, { "epoch": 0.9060658061627994, "grad_norm": 0.3359375, "learning_rate": 6.941405802358674e-07, "loss": 1.9811, "step": 28083 }, { "epoch": 0.9060980700165957, "grad_norm": 0.34375, "learning_rate": 6.936674630342465e-07, "loss": 1.9817, "step": 28084 }, { "epoch": 0.9061303338703921, "grad_norm": 0.341796875, "learning_rate": 6.93194503306473e-07, "loss": 1.9788, "step": 28085 }, { "epoch": 0.9061625977241884, "grad_norm": 0.33203125, "learning_rate": 6.927217010577474e-07, "loss": 1.9799, "step": 28086 }, { "epoch": 0.9061948615779848, "grad_norm": 0.33203125, "learning_rate": 6.922490562932793e-07, "loss": 2.0021, "step": 28087 }, { "epoch": 0.9062271254317811, "grad_norm": 0.3359375, "learning_rate": 6.917765690182692e-07, "loss": 1.988, "step": 28088 }, { "epoch": 0.9062593892855775, "grad_norm": 0.33203125, "learning_rate": 6.913042392379182e-07, "loss": 1.9689, "step": 28089 }, { "epoch": 0.9062916531393738, "grad_norm": 0.33984375, "learning_rate": 6.908320669574237e-07, "loss": 1.9873, "step": 28090 }, { "epoch": 0.9063239169931702, "grad_norm": 0.33203125, "learning_rate": 6.903600521819864e-07, "loss": 1.987, "step": 28091 }, { "epoch": 0.9063561808469665, "grad_norm": 0.33203125, "learning_rate": 6.89888194916799e-07, "loss": 1.9842, "step": 28092 }, { "epoch": 0.9063884447007629, "grad_norm": 0.3359375, "learning_rate": 6.89416495167054e-07, "loss": 2.0145, "step": 28093 }, { "epoch": 0.9064207085545591, "grad_norm": 0.345703125, "learning_rate": 6.889449529379488e-07, "loss": 1.9792, "step": 28094 }, { "epoch": 0.9064529724083555, "grad_norm": 0.33203125, "learning_rate": 6.88473568234671e-07, "loss": 1.9672, "step": 28095 }, { "epoch": 0.9064852362621518, "grad_norm": 0.34375, "learning_rate": 6.880023410624065e-07, "loss": 1.9958, "step": 28096 }, { "epoch": 0.9065175001159482, "grad_norm": 0.34375, "learning_rate": 6.87531271426346e-07, "loss": 1.9789, "step": 28097 }, { "epoch": 0.9065497639697446, "grad_norm": 0.33203125, "learning_rate": 6.870603593316755e-07, "loss": 1.99, "step": 28098 }, { "epoch": 0.9065820278235409, "grad_norm": 0.328125, "learning_rate": 6.86589604783574e-07, "loss": 1.9954, "step": 28099 }, { "epoch": 0.9066142916773373, "grad_norm": 0.333984375, "learning_rate": 6.861190077872276e-07, "loss": 1.9825, "step": 28100 }, { "epoch": 0.9066465555311336, "grad_norm": 0.33203125, "learning_rate": 6.856485683478153e-07, "loss": 1.983, "step": 28101 }, { "epoch": 0.90667881938493, "grad_norm": 0.3359375, "learning_rate": 6.851782864705114e-07, "loss": 1.9884, "step": 28102 }, { "epoch": 0.9067110832387263, "grad_norm": 0.341796875, "learning_rate": 6.847081621604967e-07, "loss": 2.0129, "step": 28103 }, { "epoch": 0.9067433470925227, "grad_norm": 0.341796875, "learning_rate": 6.842381954229454e-07, "loss": 2.0022, "step": 28104 }, { "epoch": 0.906775610946319, "grad_norm": 0.341796875, "learning_rate": 6.837683862630284e-07, "loss": 1.9515, "step": 28105 }, { "epoch": 0.9068078748001154, "grad_norm": 0.34375, "learning_rate": 6.832987346859198e-07, "loss": 1.9973, "step": 28106 }, { "epoch": 0.9068401386539117, "grad_norm": 0.337890625, "learning_rate": 6.82829240696789e-07, "loss": 1.9804, "step": 28107 }, { "epoch": 0.9068724025077081, "grad_norm": 0.337890625, "learning_rate": 6.823599043008017e-07, "loss": 2.0076, "step": 28108 }, { "epoch": 0.9069046663615044, "grad_norm": 0.337890625, "learning_rate": 6.818907255031254e-07, "loss": 1.9972, "step": 28109 }, { "epoch": 0.9069369302153008, "grad_norm": 0.33984375, "learning_rate": 6.814217043089244e-07, "loss": 2.0179, "step": 28110 }, { "epoch": 0.906969194069097, "grad_norm": 0.330078125, "learning_rate": 6.809528407233595e-07, "loss": 1.9868, "step": 28111 }, { "epoch": 0.9070014579228934, "grad_norm": 0.35546875, "learning_rate": 6.80484134751595e-07, "loss": 1.9749, "step": 28112 }, { "epoch": 0.9070337217766897, "grad_norm": 0.33984375, "learning_rate": 6.800155863987884e-07, "loss": 2.011, "step": 28113 }, { "epoch": 0.9070659856304861, "grad_norm": 0.330078125, "learning_rate": 6.795471956700955e-07, "loss": 1.9686, "step": 28114 }, { "epoch": 0.9070982494842824, "grad_norm": 0.318359375, "learning_rate": 6.790789625706755e-07, "loss": 1.9483, "step": 28115 }, { "epoch": 0.9071305133380788, "grad_norm": 0.333984375, "learning_rate": 6.786108871056812e-07, "loss": 2.0076, "step": 28116 }, { "epoch": 0.9071627771918751, "grad_norm": 0.333984375, "learning_rate": 6.781429692802598e-07, "loss": 1.994, "step": 28117 }, { "epoch": 0.9071950410456715, "grad_norm": 0.3359375, "learning_rate": 6.776752090995708e-07, "loss": 1.9916, "step": 28118 }, { "epoch": 0.9072273048994679, "grad_norm": 0.326171875, "learning_rate": 6.772076065687566e-07, "loss": 1.9979, "step": 28119 }, { "epoch": 0.9072595687532642, "grad_norm": 0.33984375, "learning_rate": 6.767401616929663e-07, "loss": 2.0163, "step": 28120 }, { "epoch": 0.9072918326070606, "grad_norm": 0.333984375, "learning_rate": 6.762728744773427e-07, "loss": 1.9945, "step": 28121 }, { "epoch": 0.9073240964608569, "grad_norm": 0.33203125, "learning_rate": 6.758057449270349e-07, "loss": 1.9685, "step": 28122 }, { "epoch": 0.9073563603146533, "grad_norm": 0.341796875, "learning_rate": 6.753387730471805e-07, "loss": 2.022, "step": 28123 }, { "epoch": 0.9073886241684496, "grad_norm": 0.337890625, "learning_rate": 6.748719588429186e-07, "loss": 2.0097, "step": 28124 }, { "epoch": 0.907420888022246, "grad_norm": 0.33203125, "learning_rate": 6.744053023193902e-07, "loss": 2.0048, "step": 28125 }, { "epoch": 0.9074531518760423, "grad_norm": 0.337890625, "learning_rate": 6.739388034817312e-07, "loss": 2.0318, "step": 28126 }, { "epoch": 0.9074854157298387, "grad_norm": 0.328125, "learning_rate": 6.734724623350758e-07, "loss": 1.9931, "step": 28127 }, { "epoch": 0.9075176795836349, "grad_norm": 0.333984375, "learning_rate": 6.730062788845598e-07, "loss": 2.0147, "step": 28128 }, { "epoch": 0.9075499434374313, "grad_norm": 0.333984375, "learning_rate": 6.725402531353109e-07, "loss": 2.0011, "step": 28129 }, { "epoch": 0.9075822072912276, "grad_norm": 0.330078125, "learning_rate": 6.720743850924599e-07, "loss": 1.976, "step": 28130 }, { "epoch": 0.907614471145024, "grad_norm": 0.330078125, "learning_rate": 6.71608674761136e-07, "loss": 1.968, "step": 28131 }, { "epoch": 0.9076467349988203, "grad_norm": 0.333984375, "learning_rate": 6.711431221464653e-07, "loss": 2.0075, "step": 28132 }, { "epoch": 0.9076789988526167, "grad_norm": 0.33203125, "learning_rate": 6.706777272535702e-07, "loss": 1.9809, "step": 28133 }, { "epoch": 0.907711262706413, "grad_norm": 0.333984375, "learning_rate": 6.70212490087575e-07, "loss": 2.0018, "step": 28134 }, { "epoch": 0.9077435265602094, "grad_norm": 0.333984375, "learning_rate": 6.697474106536022e-07, "loss": 1.9952, "step": 28135 }, { "epoch": 0.9077757904140057, "grad_norm": 0.3359375, "learning_rate": 6.692824889567678e-07, "loss": 1.9753, "step": 28136 }, { "epoch": 0.9078080542678021, "grad_norm": 0.3359375, "learning_rate": 6.688177250021926e-07, "loss": 2.0022, "step": 28137 }, { "epoch": 0.9078403181215984, "grad_norm": 0.330078125, "learning_rate": 6.683531187949892e-07, "loss": 1.948, "step": 28138 }, { "epoch": 0.9078725819753948, "grad_norm": 0.341796875, "learning_rate": 6.678886703402721e-07, "loss": 1.9856, "step": 28139 }, { "epoch": 0.9079048458291912, "grad_norm": 0.328125, "learning_rate": 6.674243796431583e-07, "loss": 1.9846, "step": 28140 }, { "epoch": 0.9079371096829875, "grad_norm": 0.330078125, "learning_rate": 6.669602467087526e-07, "loss": 1.9647, "step": 28141 }, { "epoch": 0.9079693735367839, "grad_norm": 0.35546875, "learning_rate": 6.664962715421658e-07, "loss": 1.9809, "step": 28142 }, { "epoch": 0.9080016373905802, "grad_norm": 0.3359375, "learning_rate": 6.660324541485053e-07, "loss": 1.9911, "step": 28143 }, { "epoch": 0.9080339012443766, "grad_norm": 0.330078125, "learning_rate": 6.65568794532877e-07, "loss": 1.9641, "step": 28144 }, { "epoch": 0.9080661650981728, "grad_norm": 0.384765625, "learning_rate": 6.651052927003837e-07, "loss": 1.9914, "step": 28145 }, { "epoch": 0.9080984289519692, "grad_norm": 0.341796875, "learning_rate": 6.646419486561278e-07, "loss": 1.9768, "step": 28146 }, { "epoch": 0.9081306928057655, "grad_norm": 0.33984375, "learning_rate": 6.641787624052087e-07, "loss": 1.967, "step": 28147 }, { "epoch": 0.9081629566595619, "grad_norm": 0.333984375, "learning_rate": 6.637157339527239e-07, "loss": 1.9856, "step": 28148 }, { "epoch": 0.9081952205133582, "grad_norm": 0.3359375, "learning_rate": 6.632528633037744e-07, "loss": 1.9796, "step": 28149 }, { "epoch": 0.9082274843671546, "grad_norm": 0.3359375, "learning_rate": 6.627901504634509e-07, "loss": 1.9868, "step": 28150 }, { "epoch": 0.9082597482209509, "grad_norm": 0.330078125, "learning_rate": 6.623275954368497e-07, "loss": 1.9932, "step": 28151 }, { "epoch": 0.9082920120747473, "grad_norm": 0.353515625, "learning_rate": 6.618651982290564e-07, "loss": 2.0013, "step": 28152 }, { "epoch": 0.9083242759285436, "grad_norm": 0.33203125, "learning_rate": 6.614029588451687e-07, "loss": 2.0049, "step": 28153 }, { "epoch": 0.90835653978234, "grad_norm": 0.3359375, "learning_rate": 6.609408772902692e-07, "loss": 2.0089, "step": 28154 }, { "epoch": 0.9083888036361363, "grad_norm": 0.341796875, "learning_rate": 6.604789535694456e-07, "loss": 2.0041, "step": 28155 }, { "epoch": 0.9084210674899327, "grad_norm": 0.333984375, "learning_rate": 6.600171876877837e-07, "loss": 1.9918, "step": 28156 }, { "epoch": 0.908453331343729, "grad_norm": 0.3359375, "learning_rate": 6.595555796503644e-07, "loss": 2.0083, "step": 28157 }, { "epoch": 0.9084855951975254, "grad_norm": 0.337890625, "learning_rate": 6.590941294622671e-07, "loss": 1.9662, "step": 28158 }, { "epoch": 0.9085178590513218, "grad_norm": 0.337890625, "learning_rate": 6.586328371285777e-07, "loss": 1.9704, "step": 28159 }, { "epoch": 0.908550122905118, "grad_norm": 0.33203125, "learning_rate": 6.581717026543671e-07, "loss": 1.975, "step": 28160 }, { "epoch": 0.9085823867589145, "grad_norm": 0.333984375, "learning_rate": 6.57710726044713e-07, "loss": 1.9843, "step": 28161 }, { "epoch": 0.9086146506127107, "grad_norm": 0.33203125, "learning_rate": 6.572499073046928e-07, "loss": 2.0012, "step": 28162 }, { "epoch": 0.9086469144665071, "grad_norm": 0.345703125, "learning_rate": 6.567892464393744e-07, "loss": 1.9781, "step": 28163 }, { "epoch": 0.9086791783203034, "grad_norm": 0.33984375, "learning_rate": 6.563287434538301e-07, "loss": 2.0186, "step": 28164 }, { "epoch": 0.9087114421740998, "grad_norm": 0.33203125, "learning_rate": 6.558683983531294e-07, "loss": 1.9958, "step": 28165 }, { "epoch": 0.9087437060278961, "grad_norm": 0.330078125, "learning_rate": 6.554082111423398e-07, "loss": 1.9944, "step": 28166 }, { "epoch": 0.9087759698816925, "grad_norm": 0.33984375, "learning_rate": 6.549481818265224e-07, "loss": 2.0088, "step": 28167 }, { "epoch": 0.9088082337354888, "grad_norm": 0.328125, "learning_rate": 6.54488310410748e-07, "loss": 1.997, "step": 28168 }, { "epoch": 0.9088404975892852, "grad_norm": 0.333984375, "learning_rate": 6.540285969000742e-07, "loss": 2.0141, "step": 28169 }, { "epoch": 0.9088727614430815, "grad_norm": 0.337890625, "learning_rate": 6.535690412995587e-07, "loss": 1.9985, "step": 28170 }, { "epoch": 0.9089050252968779, "grad_norm": 0.330078125, "learning_rate": 6.531096436142675e-07, "loss": 1.9912, "step": 28171 }, { "epoch": 0.9089372891506742, "grad_norm": 0.353515625, "learning_rate": 6.526504038492514e-07, "loss": 1.9888, "step": 28172 }, { "epoch": 0.9089695530044706, "grad_norm": 0.330078125, "learning_rate": 6.521913220095649e-07, "loss": 1.9504, "step": 28173 }, { "epoch": 0.9090018168582669, "grad_norm": 0.34375, "learning_rate": 6.517323981002655e-07, "loss": 1.991, "step": 28174 }, { "epoch": 0.9090340807120633, "grad_norm": 0.333984375, "learning_rate": 6.512736321264024e-07, "loss": 1.9643, "step": 28175 }, { "epoch": 0.9090663445658596, "grad_norm": 0.333984375, "learning_rate": 6.508150240930233e-07, "loss": 2.0137, "step": 28176 }, { "epoch": 0.909098608419656, "grad_norm": 0.330078125, "learning_rate": 6.503565740051808e-07, "loss": 1.9824, "step": 28177 }, { "epoch": 0.9091308722734522, "grad_norm": 0.34375, "learning_rate": 6.498982818679194e-07, "loss": 1.9894, "step": 28178 }, { "epoch": 0.9091631361272486, "grad_norm": 0.3515625, "learning_rate": 6.494401476862799e-07, "loss": 1.9714, "step": 28179 }, { "epoch": 0.909195399981045, "grad_norm": 0.3359375, "learning_rate": 6.489821714653099e-07, "loss": 1.983, "step": 28180 }, { "epoch": 0.9092276638348413, "grad_norm": 0.333984375, "learning_rate": 6.485243532100488e-07, "loss": 1.9654, "step": 28181 }, { "epoch": 0.9092599276886377, "grad_norm": 0.3359375, "learning_rate": 6.480666929255374e-07, "loss": 1.9921, "step": 28182 }, { "epoch": 0.909292191542434, "grad_norm": 0.33203125, "learning_rate": 6.476091906168086e-07, "loss": 1.9485, "step": 28183 }, { "epoch": 0.9093244553962304, "grad_norm": 0.330078125, "learning_rate": 6.471518462889031e-07, "loss": 1.982, "step": 28184 }, { "epoch": 0.9093567192500267, "grad_norm": 0.337890625, "learning_rate": 6.466946599468538e-07, "loss": 2.0069, "step": 28185 }, { "epoch": 0.9093889831038231, "grad_norm": 0.33203125, "learning_rate": 6.462376315956914e-07, "loss": 1.974, "step": 28186 }, { "epoch": 0.9094212469576194, "grad_norm": 0.333984375, "learning_rate": 6.457807612404487e-07, "loss": 1.9885, "step": 28187 }, { "epoch": 0.9094535108114158, "grad_norm": 0.3359375, "learning_rate": 6.45324048886155e-07, "loss": 1.9772, "step": 28188 }, { "epoch": 0.9094857746652121, "grad_norm": 0.333984375, "learning_rate": 6.44867494537833e-07, "loss": 1.981, "step": 28189 }, { "epoch": 0.9095180385190085, "grad_norm": 0.349609375, "learning_rate": 6.444110982005136e-07, "loss": 1.9705, "step": 28190 }, { "epoch": 0.9095503023728048, "grad_norm": 0.33203125, "learning_rate": 6.439548598792194e-07, "loss": 2.0046, "step": 28191 }, { "epoch": 0.9095825662266012, "grad_norm": 0.33984375, "learning_rate": 6.434987795789682e-07, "loss": 1.9632, "step": 28192 }, { "epoch": 0.9096148300803975, "grad_norm": 0.3359375, "learning_rate": 6.430428573047842e-07, "loss": 2.0089, "step": 28193 }, { "epoch": 0.9096470939341939, "grad_norm": 0.333984375, "learning_rate": 6.42587093061685e-07, "loss": 2.001, "step": 28194 }, { "epoch": 0.9096793577879901, "grad_norm": 0.333984375, "learning_rate": 6.421314868546868e-07, "loss": 1.9919, "step": 28195 }, { "epoch": 0.9097116216417865, "grad_norm": 0.3359375, "learning_rate": 6.416760386888054e-07, "loss": 1.988, "step": 28196 }, { "epoch": 0.9097438854955828, "grad_norm": 0.3359375, "learning_rate": 6.412207485690535e-07, "loss": 2.022, "step": 28197 }, { "epoch": 0.9097761493493792, "grad_norm": 0.333984375, "learning_rate": 6.407656165004405e-07, "loss": 1.977, "step": 28198 }, { "epoch": 0.9098084132031755, "grad_norm": 0.33203125, "learning_rate": 6.403106424879823e-07, "loss": 1.9987, "step": 28199 }, { "epoch": 0.9098406770569719, "grad_norm": 0.33203125, "learning_rate": 6.398558265366816e-07, "loss": 2.0029, "step": 28200 }, { "epoch": 0.9098729409107683, "grad_norm": 0.337890625, "learning_rate": 6.394011686515428e-07, "loss": 1.9966, "step": 28201 }, { "epoch": 0.9099052047645646, "grad_norm": 0.328125, "learning_rate": 6.389466688375767e-07, "loss": 1.994, "step": 28202 }, { "epoch": 0.909937468618361, "grad_norm": 0.337890625, "learning_rate": 6.384923270997844e-07, "loss": 1.9967, "step": 28203 }, { "epoch": 0.9099697324721573, "grad_norm": 0.345703125, "learning_rate": 6.38038143443162e-07, "loss": 1.9821, "step": 28204 }, { "epoch": 0.9100019963259537, "grad_norm": 0.341796875, "learning_rate": 6.375841178727154e-07, "loss": 1.9901, "step": 28205 }, { "epoch": 0.91003426017975, "grad_norm": 0.333984375, "learning_rate": 6.37130250393439e-07, "loss": 1.9889, "step": 28206 }, { "epoch": 0.9100665240335464, "grad_norm": 0.328125, "learning_rate": 6.36676541010327e-07, "loss": 1.9756, "step": 28207 }, { "epoch": 0.9100987878873427, "grad_norm": 0.333984375, "learning_rate": 6.362229897283772e-07, "loss": 1.9925, "step": 28208 }, { "epoch": 0.9101310517411391, "grad_norm": 0.337890625, "learning_rate": 6.357695965525789e-07, "loss": 1.975, "step": 28209 }, { "epoch": 0.9101633155949354, "grad_norm": 0.333984375, "learning_rate": 6.353163614879265e-07, "loss": 2.0028, "step": 28210 }, { "epoch": 0.9101955794487318, "grad_norm": 0.337890625, "learning_rate": 6.348632845394026e-07, "loss": 2.0069, "step": 28211 }, { "epoch": 0.910227843302528, "grad_norm": 0.330078125, "learning_rate": 6.344103657120015e-07, "loss": 1.9717, "step": 28212 }, { "epoch": 0.9102601071563244, "grad_norm": 0.33984375, "learning_rate": 6.339576050107043e-07, "loss": 1.9857, "step": 28213 }, { "epoch": 0.9102923710101207, "grad_norm": 0.333984375, "learning_rate": 6.335050024404936e-07, "loss": 2.0093, "step": 28214 }, { "epoch": 0.9103246348639171, "grad_norm": 0.353515625, "learning_rate": 6.330525580063557e-07, "loss": 1.9895, "step": 28215 }, { "epoch": 0.9103568987177134, "grad_norm": 0.337890625, "learning_rate": 6.326002717132678e-07, "loss": 1.9795, "step": 28216 }, { "epoch": 0.9103891625715098, "grad_norm": 0.333984375, "learning_rate": 6.32148143566208e-07, "loss": 1.9752, "step": 28217 }, { "epoch": 0.9104214264253061, "grad_norm": 0.33984375, "learning_rate": 6.316961735701554e-07, "loss": 1.9666, "step": 28218 }, { "epoch": 0.9104536902791025, "grad_norm": 0.35546875, "learning_rate": 6.312443617300845e-07, "loss": 1.9728, "step": 28219 }, { "epoch": 0.9104859541328989, "grad_norm": 0.3359375, "learning_rate": 6.307927080509646e-07, "loss": 2.0107, "step": 28220 }, { "epoch": 0.9105182179866952, "grad_norm": 0.337890625, "learning_rate": 6.303412125377717e-07, "loss": 1.9846, "step": 28221 }, { "epoch": 0.9105504818404916, "grad_norm": 0.341796875, "learning_rate": 6.298898751954751e-07, "loss": 1.9824, "step": 28222 }, { "epoch": 0.9105827456942879, "grad_norm": 0.33984375, "learning_rate": 6.294386960290394e-07, "loss": 2.0177, "step": 28223 }, { "epoch": 0.9106150095480843, "grad_norm": 0.33203125, "learning_rate": 6.289876750434353e-07, "loss": 2.0023, "step": 28224 }, { "epoch": 0.9106472734018806, "grad_norm": 0.33984375, "learning_rate": 6.285368122436242e-07, "loss": 2.0059, "step": 28225 }, { "epoch": 0.910679537255677, "grad_norm": 0.330078125, "learning_rate": 6.280861076345685e-07, "loss": 1.9956, "step": 28226 }, { "epoch": 0.9107118011094733, "grad_norm": 0.326171875, "learning_rate": 6.276355612212326e-07, "loss": 1.971, "step": 28227 }, { "epoch": 0.9107440649632697, "grad_norm": 0.333984375, "learning_rate": 6.271851730085726e-07, "loss": 1.9698, "step": 28228 }, { "epoch": 0.9107763288170659, "grad_norm": 0.3359375, "learning_rate": 6.267349430015462e-07, "loss": 1.9872, "step": 28229 }, { "epoch": 0.9108085926708623, "grad_norm": 0.33984375, "learning_rate": 6.262848712051128e-07, "loss": 1.9888, "step": 28230 }, { "epoch": 0.9108408565246586, "grad_norm": 0.337890625, "learning_rate": 6.258349576242233e-07, "loss": 1.9742, "step": 28231 }, { "epoch": 0.910873120378455, "grad_norm": 0.34375, "learning_rate": 6.25385202263829e-07, "loss": 1.9752, "step": 28232 }, { "epoch": 0.9109053842322513, "grad_norm": 0.33203125, "learning_rate": 6.24935605128884e-07, "loss": 1.9791, "step": 28233 }, { "epoch": 0.9109376480860477, "grad_norm": 0.3359375, "learning_rate": 6.244861662243345e-07, "loss": 1.9871, "step": 28234 }, { "epoch": 0.910969911939844, "grad_norm": 0.337890625, "learning_rate": 6.240368855551265e-07, "loss": 1.9634, "step": 28235 }, { "epoch": 0.9110021757936404, "grad_norm": 0.333984375, "learning_rate": 6.235877631262093e-07, "loss": 1.9907, "step": 28236 }, { "epoch": 0.9110344396474367, "grad_norm": 0.33984375, "learning_rate": 6.231387989425241e-07, "loss": 1.9952, "step": 28237 }, { "epoch": 0.9110667035012331, "grad_norm": 0.328125, "learning_rate": 6.226899930090119e-07, "loss": 1.9534, "step": 28238 }, { "epoch": 0.9110989673550294, "grad_norm": 0.33984375, "learning_rate": 6.22241345330617e-07, "loss": 1.981, "step": 28239 }, { "epoch": 0.9111312312088258, "grad_norm": 0.333984375, "learning_rate": 6.217928559122721e-07, "loss": 1.9907, "step": 28240 }, { "epoch": 0.9111634950626222, "grad_norm": 0.330078125, "learning_rate": 6.213445247589184e-07, "loss": 1.9811, "step": 28241 }, { "epoch": 0.9111957589164185, "grad_norm": 0.33203125, "learning_rate": 6.208963518754867e-07, "loss": 1.9873, "step": 28242 }, { "epoch": 0.9112280227702149, "grad_norm": 0.337890625, "learning_rate": 6.204483372669135e-07, "loss": 1.9987, "step": 28243 }, { "epoch": 0.9112602866240112, "grad_norm": 0.3359375, "learning_rate": 6.20000480938131e-07, "loss": 1.9826, "step": 28244 }, { "epoch": 0.9112925504778076, "grad_norm": 0.341796875, "learning_rate": 6.195527828940639e-07, "loss": 1.9976, "step": 28245 }, { "epoch": 0.9113248143316038, "grad_norm": 0.326171875, "learning_rate": 6.191052431396449e-07, "loss": 1.9861, "step": 28246 }, { "epoch": 0.9113570781854002, "grad_norm": 0.333984375, "learning_rate": 6.186578616797983e-07, "loss": 1.9729, "step": 28247 }, { "epoch": 0.9113893420391965, "grad_norm": 0.337890625, "learning_rate": 6.182106385194486e-07, "loss": 1.9913, "step": 28248 }, { "epoch": 0.9114216058929929, "grad_norm": 0.33984375, "learning_rate": 6.177635736635184e-07, "loss": 1.9857, "step": 28249 }, { "epoch": 0.9114538697467892, "grad_norm": 0.33203125, "learning_rate": 6.173166671169306e-07, "loss": 1.9869, "step": 28250 }, { "epoch": 0.9114861336005856, "grad_norm": 0.333984375, "learning_rate": 6.168699188846011e-07, "loss": 2.0107, "step": 28251 }, { "epoch": 0.9115183974543819, "grad_norm": 0.333984375, "learning_rate": 6.16423328971451e-07, "loss": 1.971, "step": 28252 }, { "epoch": 0.9115506613081783, "grad_norm": 0.326171875, "learning_rate": 6.159768973823932e-07, "loss": 1.9942, "step": 28253 }, { "epoch": 0.9115829251619746, "grad_norm": 0.34375, "learning_rate": 6.155306241223418e-07, "loss": 1.9921, "step": 28254 }, { "epoch": 0.911615189015771, "grad_norm": 0.349609375, "learning_rate": 6.150845091962099e-07, "loss": 1.9631, "step": 28255 }, { "epoch": 0.9116474528695673, "grad_norm": 0.345703125, "learning_rate": 6.146385526089098e-07, "loss": 2.0049, "step": 28256 }, { "epoch": 0.9116797167233637, "grad_norm": 0.330078125, "learning_rate": 6.141927543653464e-07, "loss": 1.9715, "step": 28257 }, { "epoch": 0.91171198057716, "grad_norm": 0.33203125, "learning_rate": 6.137471144704304e-07, "loss": 2.0066, "step": 28258 }, { "epoch": 0.9117442444309564, "grad_norm": 0.337890625, "learning_rate": 6.133016329290664e-07, "loss": 1.9783, "step": 28259 }, { "epoch": 0.9117765082847528, "grad_norm": 0.337890625, "learning_rate": 6.128563097461537e-07, "loss": 1.9818, "step": 28260 }, { "epoch": 0.911808772138549, "grad_norm": 0.333984375, "learning_rate": 6.124111449266018e-07, "loss": 1.989, "step": 28261 }, { "epoch": 0.9118410359923455, "grad_norm": 0.337890625, "learning_rate": 6.119661384753033e-07, "loss": 2.0321, "step": 28262 }, { "epoch": 0.9118732998461417, "grad_norm": 0.33203125, "learning_rate": 6.115212903971579e-07, "loss": 1.9911, "step": 28263 }, { "epoch": 0.9119055636999381, "grad_norm": 0.33984375, "learning_rate": 6.110766006970681e-07, "loss": 2.0042, "step": 28264 }, { "epoch": 0.9119378275537344, "grad_norm": 0.333984375, "learning_rate": 6.106320693799217e-07, "loss": 1.9826, "step": 28265 }, { "epoch": 0.9119700914075308, "grad_norm": 0.326171875, "learning_rate": 6.101876964506148e-07, "loss": 1.9275, "step": 28266 }, { "epoch": 0.9120023552613271, "grad_norm": 0.345703125, "learning_rate": 6.097434819140402e-07, "loss": 1.9916, "step": 28267 }, { "epoch": 0.9120346191151235, "grad_norm": 0.337890625, "learning_rate": 6.092994257750856e-07, "loss": 1.9781, "step": 28268 }, { "epoch": 0.9120668829689198, "grad_norm": 0.337890625, "learning_rate": 6.088555280386371e-07, "loss": 2.0132, "step": 28269 }, { "epoch": 0.9120991468227162, "grad_norm": 0.326171875, "learning_rate": 6.084117887095841e-07, "loss": 1.9791, "step": 28270 }, { "epoch": 0.9121314106765125, "grad_norm": 0.33984375, "learning_rate": 6.079682077928111e-07, "loss": 1.9948, "step": 28271 }, { "epoch": 0.9121636745303089, "grad_norm": 0.333984375, "learning_rate": 6.075247852931975e-07, "loss": 2.0029, "step": 28272 }, { "epoch": 0.9121959383841052, "grad_norm": 0.337890625, "learning_rate": 6.070815212156261e-07, "loss": 2.0024, "step": 28273 }, { "epoch": 0.9122282022379016, "grad_norm": 0.341796875, "learning_rate": 6.066384155649762e-07, "loss": 1.9925, "step": 28274 }, { "epoch": 0.9122604660916979, "grad_norm": 0.337890625, "learning_rate": 6.061954683461257e-07, "loss": 1.9706, "step": 28275 }, { "epoch": 0.9122927299454943, "grad_norm": 0.33203125, "learning_rate": 6.057526795639473e-07, "loss": 2.0113, "step": 28276 }, { "epoch": 0.9123249937992906, "grad_norm": 0.341796875, "learning_rate": 6.053100492233187e-07, "loss": 1.987, "step": 28277 }, { "epoch": 0.912357257653087, "grad_norm": 0.326171875, "learning_rate": 6.048675773291096e-07, "loss": 1.9919, "step": 28278 }, { "epoch": 0.9123895215068832, "grad_norm": 0.33203125, "learning_rate": 6.044252638861908e-07, "loss": 1.9797, "step": 28279 }, { "epoch": 0.9124217853606796, "grad_norm": 0.33203125, "learning_rate": 6.039831088994319e-07, "loss": 1.9856, "step": 28280 }, { "epoch": 0.912454049214476, "grad_norm": 0.33203125, "learning_rate": 6.035411123737005e-07, "loss": 1.9817, "step": 28281 }, { "epoch": 0.9124863130682723, "grad_norm": 0.337890625, "learning_rate": 6.03099274313858e-07, "loss": 1.9848, "step": 28282 }, { "epoch": 0.9125185769220687, "grad_norm": 0.3359375, "learning_rate": 6.02657594724772e-07, "loss": 1.9724, "step": 28283 }, { "epoch": 0.912550840775865, "grad_norm": 0.34375, "learning_rate": 6.022160736113036e-07, "loss": 1.9454, "step": 28284 }, { "epoch": 0.9125831046296614, "grad_norm": 0.3359375, "learning_rate": 6.017747109783089e-07, "loss": 1.9956, "step": 28285 }, { "epoch": 0.9126153684834577, "grad_norm": 0.34765625, "learning_rate": 6.013335068306508e-07, "loss": 1.9666, "step": 28286 }, { "epoch": 0.9126476323372541, "grad_norm": 0.32421875, "learning_rate": 6.008924611731836e-07, "loss": 1.9526, "step": 28287 }, { "epoch": 0.9126798961910504, "grad_norm": 0.33203125, "learning_rate": 6.004515740107619e-07, "loss": 1.9389, "step": 28288 }, { "epoch": 0.9127121600448468, "grad_norm": 0.349609375, "learning_rate": 6.0001084534824e-07, "loss": 1.9763, "step": 28289 }, { "epoch": 0.9127444238986431, "grad_norm": 0.337890625, "learning_rate": 5.995702751904691e-07, "loss": 1.996, "step": 28290 }, { "epoch": 0.9127766877524395, "grad_norm": 0.32421875, "learning_rate": 5.991298635422937e-07, "loss": 1.9571, "step": 28291 }, { "epoch": 0.9128089516062358, "grad_norm": 0.3359375, "learning_rate": 5.986896104085715e-07, "loss": 1.963, "step": 28292 }, { "epoch": 0.9128412154600322, "grad_norm": 0.333984375, "learning_rate": 5.982495157941387e-07, "loss": 2.016, "step": 28293 }, { "epoch": 0.9128734793138285, "grad_norm": 0.3359375, "learning_rate": 5.97809579703843e-07, "loss": 2.0116, "step": 28294 }, { "epoch": 0.9129057431676249, "grad_norm": 0.328125, "learning_rate": 5.973698021425289e-07, "loss": 1.9674, "step": 28295 }, { "epoch": 0.9129380070214211, "grad_norm": 0.33203125, "learning_rate": 5.969301831150359e-07, "loss": 1.978, "step": 28296 }, { "epoch": 0.9129702708752175, "grad_norm": 0.3359375, "learning_rate": 5.964907226262001e-07, "loss": 1.9927, "step": 28297 }, { "epoch": 0.9130025347290138, "grad_norm": 0.328125, "learning_rate": 5.960514206808643e-07, "loss": 1.9687, "step": 28298 }, { "epoch": 0.9130347985828102, "grad_norm": 0.330078125, "learning_rate": 5.956122772838612e-07, "loss": 2.0054, "step": 28299 }, { "epoch": 0.9130670624366065, "grad_norm": 0.33984375, "learning_rate": 5.951732924400222e-07, "loss": 1.9858, "step": 28300 }, { "epoch": 0.9130993262904029, "grad_norm": 0.3359375, "learning_rate": 5.947344661541832e-07, "loss": 2.001, "step": 28301 }, { "epoch": 0.9131315901441993, "grad_norm": 0.345703125, "learning_rate": 5.942957984311736e-07, "loss": 1.9976, "step": 28302 }, { "epoch": 0.9131638539979956, "grad_norm": 0.33203125, "learning_rate": 5.938572892758215e-07, "loss": 1.992, "step": 28303 }, { "epoch": 0.913196117851792, "grad_norm": 0.330078125, "learning_rate": 5.934189386929495e-07, "loss": 1.9898, "step": 28304 }, { "epoch": 0.9132283817055883, "grad_norm": 0.341796875, "learning_rate": 5.929807466873904e-07, "loss": 1.9865, "step": 28305 }, { "epoch": 0.9132606455593847, "grad_norm": 0.333984375, "learning_rate": 5.925427132639638e-07, "loss": 1.9582, "step": 28306 }, { "epoch": 0.913292909413181, "grad_norm": 0.333984375, "learning_rate": 5.921048384274875e-07, "loss": 2.0016, "step": 28307 }, { "epoch": 0.9133251732669774, "grad_norm": 0.341796875, "learning_rate": 5.91667122182789e-07, "loss": 1.991, "step": 28308 }, { "epoch": 0.9133574371207737, "grad_norm": 0.345703125, "learning_rate": 5.912295645346815e-07, "loss": 1.9833, "step": 28309 }, { "epoch": 0.9133897009745701, "grad_norm": 0.330078125, "learning_rate": 5.907921654879795e-07, "loss": 1.9828, "step": 28310 }, { "epoch": 0.9134219648283664, "grad_norm": 0.326171875, "learning_rate": 5.903549250475021e-07, "loss": 1.9701, "step": 28311 }, { "epoch": 0.9134542286821627, "grad_norm": 0.33203125, "learning_rate": 5.899178432180608e-07, "loss": 1.9639, "step": 28312 }, { "epoch": 0.913486492535959, "grad_norm": 0.34765625, "learning_rate": 5.894809200044649e-07, "loss": 1.9806, "step": 28313 }, { "epoch": 0.9135187563897554, "grad_norm": 0.330078125, "learning_rate": 5.890441554115255e-07, "loss": 1.9944, "step": 28314 }, { "epoch": 0.9135510202435517, "grad_norm": 0.330078125, "learning_rate": 5.886075494440507e-07, "loss": 2.0066, "step": 28315 }, { "epoch": 0.9135832840973481, "grad_norm": 0.328125, "learning_rate": 5.881711021068431e-07, "loss": 1.9976, "step": 28316 }, { "epoch": 0.9136155479511444, "grad_norm": 0.33984375, "learning_rate": 5.877348134047122e-07, "loss": 1.9752, "step": 28317 }, { "epoch": 0.9136478118049408, "grad_norm": 0.337890625, "learning_rate": 5.872986833424543e-07, "loss": 2.0148, "step": 28318 }, { "epoch": 0.9136800756587371, "grad_norm": 0.333984375, "learning_rate": 5.868627119248738e-07, "loss": 1.954, "step": 28319 }, { "epoch": 0.9137123395125335, "grad_norm": 0.341796875, "learning_rate": 5.864268991567684e-07, "loss": 2.0127, "step": 28320 }, { "epoch": 0.9137446033663299, "grad_norm": 0.333984375, "learning_rate": 5.859912450429361e-07, "loss": 1.9671, "step": 28321 }, { "epoch": 0.9137768672201262, "grad_norm": 0.32421875, "learning_rate": 5.855557495881697e-07, "loss": 1.9752, "step": 28322 }, { "epoch": 0.9138091310739226, "grad_norm": 0.34765625, "learning_rate": 5.851204127972686e-07, "loss": 1.9853, "step": 28323 }, { "epoch": 0.9138413949277189, "grad_norm": 0.3359375, "learning_rate": 5.846852346750176e-07, "loss": 1.977, "step": 28324 }, { "epoch": 0.9138736587815153, "grad_norm": 0.33203125, "learning_rate": 5.842502152262108e-07, "loss": 1.9935, "step": 28325 }, { "epoch": 0.9139059226353116, "grad_norm": 0.3359375, "learning_rate": 5.838153544556379e-07, "loss": 1.9884, "step": 28326 }, { "epoch": 0.913938186489108, "grad_norm": 0.3359375, "learning_rate": 5.833806523680802e-07, "loss": 2.0003, "step": 28327 }, { "epoch": 0.9139704503429043, "grad_norm": 0.33984375, "learning_rate": 5.829461089683252e-07, "loss": 2.0179, "step": 28328 }, { "epoch": 0.9140027141967006, "grad_norm": 0.33203125, "learning_rate": 5.825117242611594e-07, "loss": 1.9852, "step": 28329 }, { "epoch": 0.9140349780504969, "grad_norm": 0.333984375, "learning_rate": 5.820774982513621e-07, "loss": 1.9606, "step": 28330 }, { "epoch": 0.9140672419042933, "grad_norm": 0.326171875, "learning_rate": 5.816434309437096e-07, "loss": 1.9992, "step": 28331 }, { "epoch": 0.9140995057580896, "grad_norm": 0.3359375, "learning_rate": 5.812095223429831e-07, "loss": 1.9846, "step": 28332 }, { "epoch": 0.914131769611886, "grad_norm": 0.328125, "learning_rate": 5.807757724539603e-07, "loss": 1.9852, "step": 28333 }, { "epoch": 0.9141640334656823, "grad_norm": 0.33203125, "learning_rate": 5.803421812814125e-07, "loss": 2.0248, "step": 28334 }, { "epoch": 0.9141962973194787, "grad_norm": 0.337890625, "learning_rate": 5.799087488301108e-07, "loss": 1.9791, "step": 28335 }, { "epoch": 0.914228561173275, "grad_norm": 0.328125, "learning_rate": 5.794754751048314e-07, "loss": 1.9831, "step": 28336 }, { "epoch": 0.9142608250270714, "grad_norm": 0.333984375, "learning_rate": 5.790423601103407e-07, "loss": 1.9793, "step": 28337 }, { "epoch": 0.9142930888808677, "grad_norm": 0.326171875, "learning_rate": 5.786094038514045e-07, "loss": 2.0167, "step": 28338 }, { "epoch": 0.9143253527346641, "grad_norm": 0.337890625, "learning_rate": 5.781766063327926e-07, "loss": 1.9637, "step": 28339 }, { "epoch": 0.9143576165884604, "grad_norm": 0.333984375, "learning_rate": 5.77743967559266e-07, "loss": 1.9733, "step": 28340 }, { "epoch": 0.9143898804422568, "grad_norm": 0.330078125, "learning_rate": 5.773114875355845e-07, "loss": 1.9639, "step": 28341 }, { "epoch": 0.9144221442960532, "grad_norm": 0.333984375, "learning_rate": 5.76879166266514e-07, "loss": 1.9692, "step": 28342 }, { "epoch": 0.9144544081498495, "grad_norm": 0.34765625, "learning_rate": 5.764470037568093e-07, "loss": 1.9671, "step": 28343 }, { "epoch": 0.9144866720036459, "grad_norm": 0.33203125, "learning_rate": 5.76015000011228e-07, "loss": 1.9984, "step": 28344 }, { "epoch": 0.9145189358574422, "grad_norm": 0.349609375, "learning_rate": 5.755831550345281e-07, "loss": 2.0018, "step": 28345 }, { "epoch": 0.9145511997112385, "grad_norm": 0.333984375, "learning_rate": 5.751514688314591e-07, "loss": 1.9989, "step": 28346 }, { "epoch": 0.9145834635650348, "grad_norm": 0.3359375, "learning_rate": 5.747199414067739e-07, "loss": 1.967, "step": 28347 }, { "epoch": 0.9146157274188312, "grad_norm": 0.34765625, "learning_rate": 5.742885727652252e-07, "loss": 1.9933, "step": 28348 }, { "epoch": 0.9146479912726275, "grad_norm": 0.330078125, "learning_rate": 5.738573629115578e-07, "loss": 2.0076, "step": 28349 }, { "epoch": 0.9146802551264239, "grad_norm": 0.3359375, "learning_rate": 5.734263118505179e-07, "loss": 1.9784, "step": 28350 }, { "epoch": 0.9147125189802202, "grad_norm": 0.328125, "learning_rate": 5.729954195868531e-07, "loss": 1.9627, "step": 28351 }, { "epoch": 0.9147447828340166, "grad_norm": 0.3359375, "learning_rate": 5.725646861253048e-07, "loss": 2.0083, "step": 28352 }, { "epoch": 0.9147770466878129, "grad_norm": 0.34375, "learning_rate": 5.721341114706141e-07, "loss": 1.9676, "step": 28353 }, { "epoch": 0.9148093105416093, "grad_norm": 0.333984375, "learning_rate": 5.717036956275224e-07, "loss": 1.9698, "step": 28354 }, { "epoch": 0.9148415743954056, "grad_norm": 0.33203125, "learning_rate": 5.712734386007623e-07, "loss": 1.975, "step": 28355 }, { "epoch": 0.914873838249202, "grad_norm": 0.33984375, "learning_rate": 5.708433403950736e-07, "loss": 1.978, "step": 28356 }, { "epoch": 0.9149061021029983, "grad_norm": 0.3359375, "learning_rate": 5.704134010151924e-07, "loss": 2.0032, "step": 28357 }, { "epoch": 0.9149383659567947, "grad_norm": 0.330078125, "learning_rate": 5.699836204658465e-07, "loss": 1.9952, "step": 28358 }, { "epoch": 0.914970629810591, "grad_norm": 0.33984375, "learning_rate": 5.695539987517673e-07, "loss": 1.9837, "step": 28359 }, { "epoch": 0.9150028936643874, "grad_norm": 0.3359375, "learning_rate": 5.691245358776908e-07, "loss": 1.9525, "step": 28360 }, { "epoch": 0.9150351575181838, "grad_norm": 0.333984375, "learning_rate": 5.686952318483335e-07, "loss": 1.9716, "step": 28361 }, { "epoch": 0.91506742137198, "grad_norm": 0.33203125, "learning_rate": 5.68266086668428e-07, "loss": 1.9662, "step": 28362 }, { "epoch": 0.9150996852257764, "grad_norm": 0.3359375, "learning_rate": 5.678371003426958e-07, "loss": 1.9894, "step": 28363 }, { "epoch": 0.9151319490795727, "grad_norm": 0.33203125, "learning_rate": 5.674082728758595e-07, "loss": 1.9973, "step": 28364 }, { "epoch": 0.9151642129333691, "grad_norm": 0.337890625, "learning_rate": 5.669796042726405e-07, "loss": 2.0122, "step": 28365 }, { "epoch": 0.9151964767871654, "grad_norm": 0.337890625, "learning_rate": 5.665510945377533e-07, "loss": 1.9839, "step": 28366 }, { "epoch": 0.9152287406409618, "grad_norm": 0.328125, "learning_rate": 5.661227436759193e-07, "loss": 1.9835, "step": 28367 }, { "epoch": 0.9152610044947581, "grad_norm": 0.33203125, "learning_rate": 5.656945516918494e-07, "loss": 1.9793, "step": 28368 }, { "epoch": 0.9152932683485545, "grad_norm": 0.333984375, "learning_rate": 5.6526651859026e-07, "loss": 1.9812, "step": 28369 }, { "epoch": 0.9153255322023508, "grad_norm": 0.322265625, "learning_rate": 5.648386443758608e-07, "loss": 1.99, "step": 28370 }, { "epoch": 0.9153577960561472, "grad_norm": 0.337890625, "learning_rate": 5.644109290533627e-07, "loss": 1.991, "step": 28371 }, { "epoch": 0.9153900599099435, "grad_norm": 0.34375, "learning_rate": 5.639833726274707e-07, "loss": 2.0168, "step": 28372 }, { "epoch": 0.9154223237637399, "grad_norm": 0.337890625, "learning_rate": 5.635559751028973e-07, "loss": 2.0238, "step": 28373 }, { "epoch": 0.9154545876175362, "grad_norm": 0.337890625, "learning_rate": 5.631287364843408e-07, "loss": 1.9856, "step": 28374 }, { "epoch": 0.9154868514713326, "grad_norm": 0.330078125, "learning_rate": 5.627016567765053e-07, "loss": 1.9819, "step": 28375 }, { "epoch": 0.9155191153251289, "grad_norm": 0.333984375, "learning_rate": 5.62274735984094e-07, "loss": 1.9606, "step": 28376 }, { "epoch": 0.9155513791789253, "grad_norm": 0.326171875, "learning_rate": 5.618479741118049e-07, "loss": 1.9691, "step": 28377 }, { "epoch": 0.9155836430327216, "grad_norm": 0.341796875, "learning_rate": 5.61421371164334e-07, "loss": 2.0097, "step": 28378 }, { "epoch": 0.915615906886518, "grad_norm": 0.3359375, "learning_rate": 5.60994927146381e-07, "loss": 1.9923, "step": 28379 }, { "epoch": 0.9156481707403142, "grad_norm": 0.326171875, "learning_rate": 5.605686420626372e-07, "loss": 1.9712, "step": 28380 }, { "epoch": 0.9156804345941106, "grad_norm": 0.337890625, "learning_rate": 5.60142515917792e-07, "loss": 2.0261, "step": 28381 }, { "epoch": 0.915712698447907, "grad_norm": 0.33203125, "learning_rate": 5.597165487165401e-07, "loss": 1.9667, "step": 28382 }, { "epoch": 0.9157449623017033, "grad_norm": 0.33203125, "learning_rate": 5.592907404635711e-07, "loss": 2.0023, "step": 28383 }, { "epoch": 0.9157772261554997, "grad_norm": 0.330078125, "learning_rate": 5.588650911635663e-07, "loss": 1.9796, "step": 28384 }, { "epoch": 0.915809490009296, "grad_norm": 0.33984375, "learning_rate": 5.5843960082122e-07, "loss": 1.9929, "step": 28385 }, { "epoch": 0.9158417538630924, "grad_norm": 0.328125, "learning_rate": 5.580142694412055e-07, "loss": 2.0059, "step": 28386 }, { "epoch": 0.9158740177168887, "grad_norm": 0.33203125, "learning_rate": 5.575890970282089e-07, "loss": 1.9892, "step": 28387 }, { "epoch": 0.9159062815706851, "grad_norm": 0.33203125, "learning_rate": 5.571640835869146e-07, "loss": 1.9719, "step": 28388 }, { "epoch": 0.9159385454244814, "grad_norm": 0.333984375, "learning_rate": 5.567392291219941e-07, "loss": 2.0015, "step": 28389 }, { "epoch": 0.9159708092782778, "grad_norm": 0.330078125, "learning_rate": 5.563145336381254e-07, "loss": 2.001, "step": 28390 }, { "epoch": 0.9160030731320741, "grad_norm": 0.328125, "learning_rate": 5.558899971399877e-07, "loss": 1.9883, "step": 28391 }, { "epoch": 0.9160353369858705, "grad_norm": 0.33203125, "learning_rate": 5.554656196322478e-07, "loss": 1.9833, "step": 28392 }, { "epoch": 0.9160676008396668, "grad_norm": 0.333984375, "learning_rate": 5.550414011195831e-07, "loss": 2.0075, "step": 28393 }, { "epoch": 0.9160998646934632, "grad_norm": 0.3359375, "learning_rate": 5.546173416066569e-07, "loss": 2.016, "step": 28394 }, { "epoch": 0.9161321285472595, "grad_norm": 0.33984375, "learning_rate": 5.541934410981403e-07, "loss": 1.9767, "step": 28395 }, { "epoch": 0.9161643924010558, "grad_norm": 0.33203125, "learning_rate": 5.537696995987013e-07, "loss": 1.9987, "step": 28396 }, { "epoch": 0.9161966562548521, "grad_norm": 0.330078125, "learning_rate": 5.533461171129978e-07, "loss": 1.988, "step": 28397 }, { "epoch": 0.9162289201086485, "grad_norm": 0.33203125, "learning_rate": 5.529226936456993e-07, "loss": 1.9915, "step": 28398 }, { "epoch": 0.9162611839624448, "grad_norm": 0.328125, "learning_rate": 5.52499429201464e-07, "loss": 1.9839, "step": 28399 }, { "epoch": 0.9162934478162412, "grad_norm": 0.326171875, "learning_rate": 5.52076323784948e-07, "loss": 1.9838, "step": 28400 }, { "epoch": 0.9163257116700375, "grad_norm": 0.33203125, "learning_rate": 5.516533774008126e-07, "loss": 1.9759, "step": 28401 }, { "epoch": 0.9163579755238339, "grad_norm": 0.33203125, "learning_rate": 5.512305900537124e-07, "loss": 1.9653, "step": 28402 }, { "epoch": 0.9163902393776303, "grad_norm": 0.341796875, "learning_rate": 5.508079617482969e-07, "loss": 1.9683, "step": 28403 }, { "epoch": 0.9164225032314266, "grad_norm": 0.326171875, "learning_rate": 5.503854924892244e-07, "loss": 1.9851, "step": 28404 }, { "epoch": 0.916454767085223, "grad_norm": 0.337890625, "learning_rate": 5.499631822811424e-07, "loss": 1.9832, "step": 28405 }, { "epoch": 0.9164870309390193, "grad_norm": 0.330078125, "learning_rate": 5.495410311286975e-07, "loss": 1.9695, "step": 28406 }, { "epoch": 0.9165192947928157, "grad_norm": 0.32421875, "learning_rate": 5.491190390365392e-07, "loss": 2.007, "step": 28407 }, { "epoch": 0.916551558646612, "grad_norm": 0.3359375, "learning_rate": 5.48697206009312e-07, "loss": 1.9627, "step": 28408 }, { "epoch": 0.9165838225004084, "grad_norm": 0.33203125, "learning_rate": 5.482755320516574e-07, "loss": 2.012, "step": 28409 }, { "epoch": 0.9166160863542047, "grad_norm": 0.33203125, "learning_rate": 5.4785401716822e-07, "loss": 1.9954, "step": 28410 }, { "epoch": 0.9166483502080011, "grad_norm": 0.3359375, "learning_rate": 5.47432661363636e-07, "loss": 2.0086, "step": 28411 }, { "epoch": 0.9166806140617973, "grad_norm": 0.330078125, "learning_rate": 5.47011464642545e-07, "loss": 1.9909, "step": 28412 }, { "epoch": 0.9167128779155937, "grad_norm": 0.33203125, "learning_rate": 5.465904270095867e-07, "loss": 1.9671, "step": 28413 }, { "epoch": 0.91674514176939, "grad_norm": 0.333984375, "learning_rate": 5.461695484693874e-07, "loss": 1.9688, "step": 28414 }, { "epoch": 0.9167774056231864, "grad_norm": 0.330078125, "learning_rate": 5.45748829026585e-07, "loss": 2.0014, "step": 28415 }, { "epoch": 0.9168096694769827, "grad_norm": 0.33203125, "learning_rate": 5.453282686858141e-07, "loss": 1.9636, "step": 28416 }, { "epoch": 0.9168419333307791, "grad_norm": 0.330078125, "learning_rate": 5.449078674516961e-07, "loss": 1.9583, "step": 28417 }, { "epoch": 0.9168741971845754, "grad_norm": 0.326171875, "learning_rate": 5.444876253288622e-07, "loss": 1.9997, "step": 28418 }, { "epoch": 0.9169064610383718, "grad_norm": 0.330078125, "learning_rate": 5.440675423219421e-07, "loss": 2.0087, "step": 28419 }, { "epoch": 0.9169387248921681, "grad_norm": 0.330078125, "learning_rate": 5.436476184355521e-07, "loss": 1.9981, "step": 28420 }, { "epoch": 0.9169709887459645, "grad_norm": 0.32421875, "learning_rate": 5.432278536743185e-07, "loss": 1.9936, "step": 28421 }, { "epoch": 0.9170032525997609, "grad_norm": 0.33984375, "learning_rate": 5.428082480428642e-07, "loss": 2.0056, "step": 28422 }, { "epoch": 0.9170355164535572, "grad_norm": 0.330078125, "learning_rate": 5.423888015458023e-07, "loss": 2.0016, "step": 28423 }, { "epoch": 0.9170677803073536, "grad_norm": 0.33203125, "learning_rate": 5.41969514187754e-07, "loss": 1.9964, "step": 28424 }, { "epoch": 0.9171000441611499, "grad_norm": 0.328125, "learning_rate": 5.415503859733323e-07, "loss": 1.9969, "step": 28425 }, { "epoch": 0.9171323080149463, "grad_norm": 0.40234375, "learning_rate": 5.411314169071502e-07, "loss": 1.9909, "step": 28426 }, { "epoch": 0.9171645718687426, "grad_norm": 0.35546875, "learning_rate": 5.407126069938239e-07, "loss": 2.0014, "step": 28427 }, { "epoch": 0.917196835722539, "grad_norm": 0.337890625, "learning_rate": 5.402939562379567e-07, "loss": 1.9576, "step": 28428 }, { "epoch": 0.9172290995763352, "grad_norm": 0.32421875, "learning_rate": 5.398754646441628e-07, "loss": 2.0218, "step": 28429 }, { "epoch": 0.9172613634301316, "grad_norm": 0.3359375, "learning_rate": 5.394571322170472e-07, "loss": 2.0031, "step": 28430 }, { "epoch": 0.9172936272839279, "grad_norm": 0.330078125, "learning_rate": 5.390389589612094e-07, "loss": 1.98, "step": 28431 }, { "epoch": 0.9173258911377243, "grad_norm": 0.328125, "learning_rate": 5.386209448812607e-07, "loss": 2.0017, "step": 28432 }, { "epoch": 0.9173581549915206, "grad_norm": 0.33203125, "learning_rate": 5.382030899817958e-07, "loss": 1.9839, "step": 28433 }, { "epoch": 0.917390418845317, "grad_norm": 0.330078125, "learning_rate": 5.37785394267416e-07, "loss": 1.9724, "step": 28434 }, { "epoch": 0.9174226826991133, "grad_norm": 0.33984375, "learning_rate": 5.373678577427211e-07, "loss": 1.978, "step": 28435 }, { "epoch": 0.9174549465529097, "grad_norm": 0.34765625, "learning_rate": 5.369504804123054e-07, "loss": 1.9887, "step": 28436 }, { "epoch": 0.917487210406706, "grad_norm": 0.33203125, "learning_rate": 5.365332622807606e-07, "loss": 1.9758, "step": 28437 }, { "epoch": 0.9175194742605024, "grad_norm": 0.353515625, "learning_rate": 5.361162033526846e-07, "loss": 1.9943, "step": 28438 }, { "epoch": 0.9175517381142987, "grad_norm": 0.3359375, "learning_rate": 5.356993036326651e-07, "loss": 1.9813, "step": 28439 }, { "epoch": 0.9175840019680951, "grad_norm": 0.33203125, "learning_rate": 5.352825631252889e-07, "loss": 2.0183, "step": 28440 }, { "epoch": 0.9176162658218914, "grad_norm": 0.3359375, "learning_rate": 5.348659818351454e-07, "loss": 2.0209, "step": 28441 }, { "epoch": 0.9176485296756878, "grad_norm": 0.337890625, "learning_rate": 5.344495597668225e-07, "loss": 1.9916, "step": 28442 }, { "epoch": 0.9176807935294842, "grad_norm": 0.33203125, "learning_rate": 5.340332969248967e-07, "loss": 2.0148, "step": 28443 }, { "epoch": 0.9177130573832805, "grad_norm": 0.322265625, "learning_rate": 5.336171933139611e-07, "loss": 1.9477, "step": 28444 }, { "epoch": 0.9177453212370769, "grad_norm": 0.33203125, "learning_rate": 5.332012489385835e-07, "loss": 1.9814, "step": 28445 }, { "epoch": 0.9177775850908731, "grad_norm": 0.337890625, "learning_rate": 5.327854638033486e-07, "loss": 2.0099, "step": 28446 }, { "epoch": 0.9178098489446695, "grad_norm": 0.32421875, "learning_rate": 5.323698379128378e-07, "loss": 2.0138, "step": 28447 }, { "epoch": 0.9178421127984658, "grad_norm": 0.3359375, "learning_rate": 5.319543712716157e-07, "loss": 1.9467, "step": 28448 }, { "epoch": 0.9178743766522622, "grad_norm": 0.32421875, "learning_rate": 5.315390638842604e-07, "loss": 1.9731, "step": 28449 }, { "epoch": 0.9179066405060585, "grad_norm": 0.333984375, "learning_rate": 5.311239157553466e-07, "loss": 2.0246, "step": 28450 }, { "epoch": 0.9179389043598549, "grad_norm": 0.33203125, "learning_rate": 5.307089268894388e-07, "loss": 1.959, "step": 28451 }, { "epoch": 0.9179711682136512, "grad_norm": 0.326171875, "learning_rate": 5.302940972911052e-07, "loss": 1.9548, "step": 28452 }, { "epoch": 0.9180034320674476, "grad_norm": 0.333984375, "learning_rate": 5.298794269649188e-07, "loss": 1.9864, "step": 28453 }, { "epoch": 0.9180356959212439, "grad_norm": 0.34375, "learning_rate": 5.294649159154341e-07, "loss": 1.9718, "step": 28454 }, { "epoch": 0.9180679597750403, "grad_norm": 0.333984375, "learning_rate": 5.29050564147221e-07, "loss": 2.0142, "step": 28455 }, { "epoch": 0.9181002236288366, "grad_norm": 0.330078125, "learning_rate": 5.286363716648373e-07, "loss": 1.9875, "step": 28456 }, { "epoch": 0.918132487482633, "grad_norm": 0.333984375, "learning_rate": 5.282223384728429e-07, "loss": 1.9819, "step": 28457 }, { "epoch": 0.9181647513364293, "grad_norm": 0.322265625, "learning_rate": 5.278084645757941e-07, "loss": 1.9832, "step": 28458 }, { "epoch": 0.9181970151902257, "grad_norm": 0.341796875, "learning_rate": 5.273947499782489e-07, "loss": 1.9973, "step": 28459 }, { "epoch": 0.918229279044022, "grad_norm": 0.34375, "learning_rate": 5.26981194684757e-07, "loss": 1.9661, "step": 28460 }, { "epoch": 0.9182615428978184, "grad_norm": 0.3359375, "learning_rate": 5.265677986998763e-07, "loss": 2.0128, "step": 28461 }, { "epoch": 0.9182938067516146, "grad_norm": 0.330078125, "learning_rate": 5.261545620281516e-07, "loss": 1.996, "step": 28462 }, { "epoch": 0.918326070605411, "grad_norm": 0.333984375, "learning_rate": 5.25741484674136e-07, "loss": 2.0132, "step": 28463 }, { "epoch": 0.9183583344592074, "grad_norm": 0.328125, "learning_rate": 5.253285666423724e-07, "loss": 1.9732, "step": 28464 }, { "epoch": 0.9183905983130037, "grad_norm": 0.3203125, "learning_rate": 5.249158079374089e-07, "loss": 1.9271, "step": 28465 }, { "epoch": 0.9184228621668001, "grad_norm": 0.34375, "learning_rate": 5.245032085637885e-07, "loss": 1.981, "step": 28466 }, { "epoch": 0.9184551260205964, "grad_norm": 0.328125, "learning_rate": 5.240907685260509e-07, "loss": 1.9979, "step": 28467 }, { "epoch": 0.9184873898743928, "grad_norm": 0.328125, "learning_rate": 5.236784878287376e-07, "loss": 1.9864, "step": 28468 }, { "epoch": 0.9185196537281891, "grad_norm": 0.333984375, "learning_rate": 5.232663664763865e-07, "loss": 1.9696, "step": 28469 }, { "epoch": 0.9185519175819855, "grad_norm": 0.33203125, "learning_rate": 5.22854404473534e-07, "loss": 1.9872, "step": 28470 }, { "epoch": 0.9185841814357818, "grad_norm": 0.33203125, "learning_rate": 5.22442601824713e-07, "loss": 1.9939, "step": 28471 }, { "epoch": 0.9186164452895782, "grad_norm": 0.330078125, "learning_rate": 5.220309585344585e-07, "loss": 1.9842, "step": 28472 }, { "epoch": 0.9186487091433745, "grad_norm": 0.333984375, "learning_rate": 5.216194746073016e-07, "loss": 1.9673, "step": 28473 }, { "epoch": 0.9186809729971709, "grad_norm": 0.328125, "learning_rate": 5.212081500477689e-07, "loss": 1.9941, "step": 28474 }, { "epoch": 0.9187132368509672, "grad_norm": 0.330078125, "learning_rate": 5.207969848603933e-07, "loss": 2.0132, "step": 28475 }, { "epoch": 0.9187455007047636, "grad_norm": 0.33203125, "learning_rate": 5.203859790496929e-07, "loss": 2.0032, "step": 28476 }, { "epoch": 0.9187777645585599, "grad_norm": 0.33203125, "learning_rate": 5.199751326201957e-07, "loss": 1.9779, "step": 28477 }, { "epoch": 0.9188100284123563, "grad_norm": 0.3359375, "learning_rate": 5.195644455764298e-07, "loss": 2.0028, "step": 28478 }, { "epoch": 0.9188422922661525, "grad_norm": 0.330078125, "learning_rate": 5.191539179229049e-07, "loss": 1.9884, "step": 28479 }, { "epoch": 0.918874556119949, "grad_norm": 0.33203125, "learning_rate": 5.187435496641457e-07, "loss": 2.0099, "step": 28480 }, { "epoch": 0.9189068199737452, "grad_norm": 0.33984375, "learning_rate": 5.183333408046737e-07, "loss": 1.9438, "step": 28481 }, { "epoch": 0.9189390838275416, "grad_norm": 0.3359375, "learning_rate": 5.179232913489934e-07, "loss": 2.0038, "step": 28482 }, { "epoch": 0.918971347681338, "grad_norm": 0.328125, "learning_rate": 5.175134013016264e-07, "loss": 2.0303, "step": 28483 }, { "epoch": 0.9190036115351343, "grad_norm": 0.337890625, "learning_rate": 5.171036706670839e-07, "loss": 1.9879, "step": 28484 }, { "epoch": 0.9190358753889307, "grad_norm": 0.333984375, "learning_rate": 5.166940994498709e-07, "loss": 1.9946, "step": 28485 }, { "epoch": 0.919068139242727, "grad_norm": 0.33984375, "learning_rate": 5.162846876545019e-07, "loss": 1.9726, "step": 28486 }, { "epoch": 0.9191004030965234, "grad_norm": 0.33984375, "learning_rate": 5.158754352854783e-07, "loss": 2.0117, "step": 28487 }, { "epoch": 0.9191326669503197, "grad_norm": 0.337890625, "learning_rate": 5.154663423473082e-07, "loss": 1.9763, "step": 28488 }, { "epoch": 0.9191649308041161, "grad_norm": 0.328125, "learning_rate": 5.150574088444931e-07, "loss": 1.9939, "step": 28489 }, { "epoch": 0.9191971946579124, "grad_norm": 0.333984375, "learning_rate": 5.146486347815344e-07, "loss": 1.9846, "step": 28490 }, { "epoch": 0.9192294585117088, "grad_norm": 0.330078125, "learning_rate": 5.142400201629315e-07, "loss": 1.9865, "step": 28491 }, { "epoch": 0.9192617223655051, "grad_norm": 0.328125, "learning_rate": 5.13831564993183e-07, "loss": 1.9794, "step": 28492 }, { "epoch": 0.9192939862193015, "grad_norm": 0.326171875, "learning_rate": 5.134232692767848e-07, "loss": 1.9904, "step": 28493 }, { "epoch": 0.9193262500730978, "grad_norm": 0.337890625, "learning_rate": 5.130151330182286e-07, "loss": 2.011, "step": 28494 }, { "epoch": 0.9193585139268942, "grad_norm": 0.333984375, "learning_rate": 5.126071562220108e-07, "loss": 1.9938, "step": 28495 }, { "epoch": 0.9193907777806904, "grad_norm": 0.349609375, "learning_rate": 5.121993388926194e-07, "loss": 1.9824, "step": 28496 }, { "epoch": 0.9194230416344868, "grad_norm": 0.333984375, "learning_rate": 5.117916810345441e-07, "loss": 1.9882, "step": 28497 }, { "epoch": 0.9194553054882831, "grad_norm": 0.33203125, "learning_rate": 5.113841826522731e-07, "loss": 1.968, "step": 28498 }, { "epoch": 0.9194875693420795, "grad_norm": 0.337890625, "learning_rate": 5.109768437502893e-07, "loss": 1.9666, "step": 28499 }, { "epoch": 0.9195198331958758, "grad_norm": 0.34375, "learning_rate": 5.10569664333081e-07, "loss": 1.9831, "step": 28500 }, { "epoch": 0.9195520970496722, "grad_norm": 0.33984375, "learning_rate": 5.101626444051243e-07, "loss": 1.9652, "step": 28501 }, { "epoch": 0.9195843609034685, "grad_norm": 0.328125, "learning_rate": 5.097557839709027e-07, "loss": 1.9932, "step": 28502 }, { "epoch": 0.9196166247572649, "grad_norm": 0.326171875, "learning_rate": 5.093490830348956e-07, "loss": 1.9673, "step": 28503 }, { "epoch": 0.9196488886110613, "grad_norm": 0.330078125, "learning_rate": 5.08942541601578e-07, "loss": 1.9855, "step": 28504 }, { "epoch": 0.9196811524648576, "grad_norm": 0.333984375, "learning_rate": 5.085361596754229e-07, "loss": 1.9604, "step": 28505 }, { "epoch": 0.919713416318654, "grad_norm": 0.326171875, "learning_rate": 5.0812993726091e-07, "loss": 1.9721, "step": 28506 }, { "epoch": 0.9197456801724503, "grad_norm": 0.33203125, "learning_rate": 5.077238743625024e-07, "loss": 1.9748, "step": 28507 }, { "epoch": 0.9197779440262467, "grad_norm": 0.33203125, "learning_rate": 5.073179709846731e-07, "loss": 1.9867, "step": 28508 }, { "epoch": 0.919810207880043, "grad_norm": 0.328125, "learning_rate": 5.069122271318938e-07, "loss": 1.9917, "step": 28509 }, { "epoch": 0.9198424717338394, "grad_norm": 0.328125, "learning_rate": 5.065066428086257e-07, "loss": 1.9552, "step": 28510 }, { "epoch": 0.9198747355876357, "grad_norm": 0.326171875, "learning_rate": 5.061012180193336e-07, "loss": 2.0051, "step": 28511 }, { "epoch": 0.9199069994414321, "grad_norm": 0.328125, "learning_rate": 5.05695952768484e-07, "loss": 2.0005, "step": 28512 }, { "epoch": 0.9199392632952283, "grad_norm": 0.33984375, "learning_rate": 5.052908470605333e-07, "loss": 1.9922, "step": 28513 }, { "epoch": 0.9199715271490247, "grad_norm": 0.330078125, "learning_rate": 5.048859008999446e-07, "loss": 2.0195, "step": 28514 }, { "epoch": 0.920003791002821, "grad_norm": 0.328125, "learning_rate": 5.04481114291171e-07, "loss": 2.0043, "step": 28515 }, { "epoch": 0.9200360548566174, "grad_norm": 0.333984375, "learning_rate": 5.040764872386705e-07, "loss": 2.0096, "step": 28516 }, { "epoch": 0.9200683187104137, "grad_norm": 0.333984375, "learning_rate": 5.036720197468981e-07, "loss": 1.9727, "step": 28517 }, { "epoch": 0.9201005825642101, "grad_norm": 0.34375, "learning_rate": 5.032677118203033e-07, "loss": 1.9995, "step": 28518 }, { "epoch": 0.9201328464180064, "grad_norm": 0.326171875, "learning_rate": 5.028635634633378e-07, "loss": 1.973, "step": 28519 }, { "epoch": 0.9201651102718028, "grad_norm": 0.330078125, "learning_rate": 5.024595746804495e-07, "loss": 1.992, "step": 28520 }, { "epoch": 0.9201973741255991, "grad_norm": 0.330078125, "learning_rate": 5.020557454760865e-07, "loss": 2.0035, "step": 28521 }, { "epoch": 0.9202296379793955, "grad_norm": 0.333984375, "learning_rate": 5.016520758546921e-07, "loss": 1.9797, "step": 28522 }, { "epoch": 0.9202619018331919, "grad_norm": 0.333984375, "learning_rate": 5.012485658207127e-07, "loss": 1.9934, "step": 28523 }, { "epoch": 0.9202941656869882, "grad_norm": 0.328125, "learning_rate": 5.008452153785864e-07, "loss": 1.9682, "step": 28524 }, { "epoch": 0.9203264295407846, "grad_norm": 0.333984375, "learning_rate": 5.004420245327529e-07, "loss": 2.0202, "step": 28525 }, { "epoch": 0.9203586933945809, "grad_norm": 0.330078125, "learning_rate": 5.000389932876536e-07, "loss": 1.9584, "step": 28526 }, { "epoch": 0.9203909572483773, "grad_norm": 0.330078125, "learning_rate": 4.996361216477219e-07, "loss": 2.0007, "step": 28527 }, { "epoch": 0.9204232211021736, "grad_norm": 0.337890625, "learning_rate": 4.992334096173939e-07, "loss": 1.9866, "step": 28528 }, { "epoch": 0.92045548495597, "grad_norm": 0.326171875, "learning_rate": 4.988308572011013e-07, "loss": 1.9734, "step": 28529 }, { "epoch": 0.9204877488097662, "grad_norm": 0.333984375, "learning_rate": 4.984284644032755e-07, "loss": 1.9978, "step": 28530 }, { "epoch": 0.9205200126635626, "grad_norm": 0.326171875, "learning_rate": 4.980262312283462e-07, "loss": 1.9889, "step": 28531 }, { "epoch": 0.9205522765173589, "grad_norm": 0.3359375, "learning_rate": 4.9762415768074e-07, "loss": 2.0036, "step": 28532 }, { "epoch": 0.9205845403711553, "grad_norm": 0.33203125, "learning_rate": 4.972222437648832e-07, "loss": 2.0276, "step": 28533 }, { "epoch": 0.9206168042249516, "grad_norm": 0.330078125, "learning_rate": 4.968204894852007e-07, "loss": 2.0052, "step": 28534 }, { "epoch": 0.920649068078748, "grad_norm": 0.349609375, "learning_rate": 4.964188948461124e-07, "loss": 1.983, "step": 28535 }, { "epoch": 0.9206813319325443, "grad_norm": 0.3359375, "learning_rate": 4.960174598520395e-07, "loss": 1.986, "step": 28536 }, { "epoch": 0.9207135957863407, "grad_norm": 0.3203125, "learning_rate": 4.956161845074054e-07, "loss": 1.9723, "step": 28537 }, { "epoch": 0.920745859640137, "grad_norm": 0.328125, "learning_rate": 4.95215068816618e-07, "loss": 1.9538, "step": 28538 }, { "epoch": 0.9207781234939334, "grad_norm": 0.330078125, "learning_rate": 4.948141127840988e-07, "loss": 1.9618, "step": 28539 }, { "epoch": 0.9208103873477297, "grad_norm": 0.33984375, "learning_rate": 4.944133164142627e-07, "loss": 1.992, "step": 28540 }, { "epoch": 0.9208426512015261, "grad_norm": 0.328125, "learning_rate": 4.940126797115147e-07, "loss": 1.984, "step": 28541 }, { "epoch": 0.9208749150553224, "grad_norm": 0.326171875, "learning_rate": 4.936122026802692e-07, "loss": 1.9829, "step": 28542 }, { "epoch": 0.9209071789091188, "grad_norm": 0.333984375, "learning_rate": 4.932118853249363e-07, "loss": 1.9746, "step": 28543 }, { "epoch": 0.9209394427629152, "grad_norm": 0.333984375, "learning_rate": 4.928117276499172e-07, "loss": 2.0187, "step": 28544 }, { "epoch": 0.9209717066167115, "grad_norm": 0.328125, "learning_rate": 4.924117296596204e-07, "loss": 2.019, "step": 28545 }, { "epoch": 0.9210039704705079, "grad_norm": 0.333984375, "learning_rate": 4.920118913584487e-07, "loss": 2.0175, "step": 28546 }, { "epoch": 0.9210362343243041, "grad_norm": 0.333984375, "learning_rate": 4.916122127507988e-07, "loss": 1.9822, "step": 28547 }, { "epoch": 0.9210684981781005, "grad_norm": 0.359375, "learning_rate": 4.912126938410755e-07, "loss": 2.0176, "step": 28548 }, { "epoch": 0.9211007620318968, "grad_norm": 0.326171875, "learning_rate": 4.908133346336752e-07, "loss": 1.9657, "step": 28549 }, { "epoch": 0.9211330258856932, "grad_norm": 0.326171875, "learning_rate": 4.904141351329911e-07, "loss": 1.9728, "step": 28550 }, { "epoch": 0.9211652897394895, "grad_norm": 0.328125, "learning_rate": 4.900150953434196e-07, "loss": 2.0127, "step": 28551 }, { "epoch": 0.9211975535932859, "grad_norm": 0.33203125, "learning_rate": 4.896162152693539e-07, "loss": 2.0101, "step": 28552 }, { "epoch": 0.9212298174470822, "grad_norm": 0.337890625, "learning_rate": 4.892174949151806e-07, "loss": 1.9715, "step": 28553 }, { "epoch": 0.9212620813008786, "grad_norm": 0.322265625, "learning_rate": 4.888189342852928e-07, "loss": 1.9658, "step": 28554 }, { "epoch": 0.9212943451546749, "grad_norm": 0.33203125, "learning_rate": 4.884205333840769e-07, "loss": 1.9923, "step": 28555 }, { "epoch": 0.9213266090084713, "grad_norm": 0.33203125, "learning_rate": 4.880222922159144e-07, "loss": 2.0053, "step": 28556 }, { "epoch": 0.9213588728622676, "grad_norm": 0.333984375, "learning_rate": 4.876242107851936e-07, "loss": 1.9748, "step": 28557 }, { "epoch": 0.921391136716064, "grad_norm": 0.328125, "learning_rate": 4.872262890962941e-07, "loss": 1.9844, "step": 28558 }, { "epoch": 0.9214234005698603, "grad_norm": 0.326171875, "learning_rate": 4.868285271535944e-07, "loss": 2.0219, "step": 28559 }, { "epoch": 0.9214556644236567, "grad_norm": 0.33203125, "learning_rate": 4.864309249614773e-07, "loss": 2.0334, "step": 28560 }, { "epoch": 0.921487928277453, "grad_norm": 0.333984375, "learning_rate": 4.860334825243146e-07, "loss": 2.0064, "step": 28561 }, { "epoch": 0.9215201921312494, "grad_norm": 0.330078125, "learning_rate": 4.856361998464843e-07, "loss": 1.9808, "step": 28562 }, { "epoch": 0.9215524559850456, "grad_norm": 0.34375, "learning_rate": 4.852390769323578e-07, "loss": 1.9973, "step": 28563 }, { "epoch": 0.921584719838842, "grad_norm": 0.326171875, "learning_rate": 4.848421137863068e-07, "loss": 1.9959, "step": 28564 }, { "epoch": 0.9216169836926384, "grad_norm": 0.322265625, "learning_rate": 4.844453104127028e-07, "loss": 2.0094, "step": 28565 }, { "epoch": 0.9216492475464347, "grad_norm": 0.326171875, "learning_rate": 4.840486668159072e-07, "loss": 1.9708, "step": 28566 }, { "epoch": 0.9216815114002311, "grad_norm": 0.330078125, "learning_rate": 4.836521830002916e-07, "loss": 2.0092, "step": 28567 }, { "epoch": 0.9217137752540274, "grad_norm": 0.3359375, "learning_rate": 4.832558589702207e-07, "loss": 1.9809, "step": 28568 }, { "epoch": 0.9217460391078238, "grad_norm": 0.33203125, "learning_rate": 4.828596947300529e-07, "loss": 1.9701, "step": 28569 }, { "epoch": 0.9217783029616201, "grad_norm": 0.33203125, "learning_rate": 4.824636902841511e-07, "loss": 2.0146, "step": 28570 }, { "epoch": 0.9218105668154165, "grad_norm": 0.328125, "learning_rate": 4.820678456368771e-07, "loss": 1.9665, "step": 28571 }, { "epoch": 0.9218428306692128, "grad_norm": 0.3359375, "learning_rate": 4.816721607925806e-07, "loss": 1.9795, "step": 28572 }, { "epoch": 0.9218750945230092, "grad_norm": 0.34375, "learning_rate": 4.812766357556231e-07, "loss": 1.9566, "step": 28573 }, { "epoch": 0.9219073583768055, "grad_norm": 0.33203125, "learning_rate": 4.808812705303595e-07, "loss": 1.9759, "step": 28574 }, { "epoch": 0.9219396222306019, "grad_norm": 0.33984375, "learning_rate": 4.804860651211362e-07, "loss": 2.0073, "step": 28575 }, { "epoch": 0.9219718860843982, "grad_norm": 0.3359375, "learning_rate": 4.800910195323066e-07, "loss": 2.0305, "step": 28576 }, { "epoch": 0.9220041499381946, "grad_norm": 0.3359375, "learning_rate": 4.796961337682204e-07, "loss": 2.0232, "step": 28577 }, { "epoch": 0.9220364137919909, "grad_norm": 0.341796875, "learning_rate": 4.793014078332191e-07, "loss": 1.9923, "step": 28578 }, { "epoch": 0.9220686776457873, "grad_norm": 0.326171875, "learning_rate": 4.789068417316528e-07, "loss": 1.9999, "step": 28579 }, { "epoch": 0.9221009414995835, "grad_norm": 0.330078125, "learning_rate": 4.785124354678627e-07, "loss": 2.0192, "step": 28580 }, { "epoch": 0.92213320535338, "grad_norm": 0.33984375, "learning_rate": 4.781181890461872e-07, "loss": 1.9654, "step": 28581 }, { "epoch": 0.9221654692071762, "grad_norm": 0.330078125, "learning_rate": 4.77724102470971e-07, "loss": 1.974, "step": 28582 }, { "epoch": 0.9221977330609726, "grad_norm": 0.33203125, "learning_rate": 4.77330175746551e-07, "loss": 2.0076, "step": 28583 }, { "epoch": 0.922229996914769, "grad_norm": 0.330078125, "learning_rate": 4.769364088772598e-07, "loss": 1.9544, "step": 28584 }, { "epoch": 0.9222622607685653, "grad_norm": 0.3359375, "learning_rate": 4.765428018674345e-07, "loss": 2.0003, "step": 28585 }, { "epoch": 0.9222945246223617, "grad_norm": 0.328125, "learning_rate": 4.76149354721408e-07, "loss": 1.9847, "step": 28586 }, { "epoch": 0.922326788476158, "grad_norm": 0.333984375, "learning_rate": 4.7575606744350687e-07, "loss": 1.9745, "step": 28587 }, { "epoch": 0.9223590523299544, "grad_norm": 0.330078125, "learning_rate": 4.7536294003806604e-07, "loss": 1.9946, "step": 28588 }, { "epoch": 0.9223913161837507, "grad_norm": 0.33984375, "learning_rate": 4.749699725094103e-07, "loss": 1.9618, "step": 28589 }, { "epoch": 0.9224235800375471, "grad_norm": 0.341796875, "learning_rate": 4.74577164861863e-07, "loss": 1.9958, "step": 28590 }, { "epoch": 0.9224558438913434, "grad_norm": 0.330078125, "learning_rate": 4.741845170997505e-07, "loss": 2.0079, "step": 28591 }, { "epoch": 0.9224881077451398, "grad_norm": 0.3359375, "learning_rate": 4.7379202922739453e-07, "loss": 2.0039, "step": 28592 }, { "epoch": 0.9225203715989361, "grad_norm": 0.33203125, "learning_rate": 4.733997012491148e-07, "loss": 1.9836, "step": 28593 }, { "epoch": 0.9225526354527325, "grad_norm": 0.326171875, "learning_rate": 4.7300753316922964e-07, "loss": 1.9948, "step": 28594 }, { "epoch": 0.9225848993065288, "grad_norm": 0.33984375, "learning_rate": 4.726155249920555e-07, "loss": 1.9833, "step": 28595 }, { "epoch": 0.9226171631603252, "grad_norm": 0.33984375, "learning_rate": 4.7222367672190905e-07, "loss": 1.9954, "step": 28596 }, { "epoch": 0.9226494270141214, "grad_norm": 0.330078125, "learning_rate": 4.718319883631e-07, "loss": 1.9682, "step": 28597 }, { "epoch": 0.9226816908679178, "grad_norm": 0.328125, "learning_rate": 4.7144045991994344e-07, "loss": 1.9733, "step": 28598 }, { "epoch": 0.9227139547217141, "grad_norm": 0.326171875, "learning_rate": 4.7104909139674913e-07, "loss": 1.9995, "step": 28599 }, { "epoch": 0.9227462185755105, "grad_norm": 0.33203125, "learning_rate": 4.706578827978203e-07, "loss": 1.9887, "step": 28600 }, { "epoch": 0.9227784824293068, "grad_norm": 0.33203125, "learning_rate": 4.7026683412746686e-07, "loss": 1.9884, "step": 28601 }, { "epoch": 0.9228107462831032, "grad_norm": 0.33203125, "learning_rate": 4.6987594538999533e-07, "loss": 1.9822, "step": 28602 }, { "epoch": 0.9228430101368995, "grad_norm": 0.33203125, "learning_rate": 4.6948521658970234e-07, "loss": 2.0073, "step": 28603 }, { "epoch": 0.9228752739906959, "grad_norm": 0.337890625, "learning_rate": 4.6909464773089107e-07, "loss": 1.9872, "step": 28604 }, { "epoch": 0.9229075378444923, "grad_norm": 0.328125, "learning_rate": 4.687042388178664e-07, "loss": 2.0014, "step": 28605 }, { "epoch": 0.9229398016982886, "grad_norm": 0.33984375, "learning_rate": 4.6831398985491655e-07, "loss": 1.9897, "step": 28606 }, { "epoch": 0.922972065552085, "grad_norm": 0.361328125, "learning_rate": 4.679239008463432e-07, "loss": 1.97, "step": 28607 }, { "epoch": 0.9230043294058813, "grad_norm": 0.330078125, "learning_rate": 4.675339717964378e-07, "loss": 1.9792, "step": 28608 }, { "epoch": 0.9230365932596777, "grad_norm": 0.328125, "learning_rate": 4.671442027094919e-07, "loss": 1.9849, "step": 28609 }, { "epoch": 0.923068857113474, "grad_norm": 0.33203125, "learning_rate": 4.667545935897988e-07, "loss": 1.9822, "step": 28610 }, { "epoch": 0.9231011209672704, "grad_norm": 0.33203125, "learning_rate": 4.663651444416434e-07, "loss": 1.9769, "step": 28611 }, { "epoch": 0.9231333848210667, "grad_norm": 0.33203125, "learning_rate": 4.6597585526931385e-07, "loss": 2.0075, "step": 28612 }, { "epoch": 0.9231656486748631, "grad_norm": 0.330078125, "learning_rate": 4.6558672607709684e-07, "loss": 1.9991, "step": 28613 }, { "epoch": 0.9231979125286593, "grad_norm": 0.330078125, "learning_rate": 4.651977568692739e-07, "loss": 1.9603, "step": 28614 }, { "epoch": 0.9232301763824557, "grad_norm": 0.330078125, "learning_rate": 4.6480894765012493e-07, "loss": 1.9862, "step": 28615 }, { "epoch": 0.923262440236252, "grad_norm": 0.337890625, "learning_rate": 4.644202984239332e-07, "loss": 1.9868, "step": 28616 }, { "epoch": 0.9232947040900484, "grad_norm": 0.330078125, "learning_rate": 4.6403180919497524e-07, "loss": 1.9941, "step": 28617 }, { "epoch": 0.9233269679438447, "grad_norm": 0.330078125, "learning_rate": 4.63643479967526e-07, "loss": 2.0113, "step": 28618 }, { "epoch": 0.9233592317976411, "grad_norm": 0.3359375, "learning_rate": 4.63255310745862e-07, "loss": 1.9603, "step": 28619 }, { "epoch": 0.9233914956514374, "grad_norm": 0.33203125, "learning_rate": 4.6286730153425493e-07, "loss": 1.98, "step": 28620 }, { "epoch": 0.9234237595052338, "grad_norm": 0.333984375, "learning_rate": 4.6247945233697466e-07, "loss": 1.9927, "step": 28621 }, { "epoch": 0.9234560233590301, "grad_norm": 0.33203125, "learning_rate": 4.620917631582927e-07, "loss": 1.9988, "step": 28622 }, { "epoch": 0.9234882872128265, "grad_norm": 0.330078125, "learning_rate": 4.6170423400247574e-07, "loss": 1.9756, "step": 28623 }, { "epoch": 0.9235205510666229, "grad_norm": 0.33984375, "learning_rate": 4.6131686487378697e-07, "loss": 2.0014, "step": 28624 }, { "epoch": 0.9235528149204192, "grad_norm": 0.326171875, "learning_rate": 4.6092965577649305e-07, "loss": 1.9842, "step": 28625 }, { "epoch": 0.9235850787742156, "grad_norm": 0.337890625, "learning_rate": 4.605426067148538e-07, "loss": 2.0, "step": 28626 }, { "epoch": 0.9236173426280119, "grad_norm": 0.3359375, "learning_rate": 4.6015571769313425e-07, "loss": 1.9488, "step": 28627 }, { "epoch": 0.9236496064818083, "grad_norm": 0.33203125, "learning_rate": 4.597689887155859e-07, "loss": 1.9771, "step": 28628 }, { "epoch": 0.9236818703356046, "grad_norm": 0.33203125, "learning_rate": 4.593824197864704e-07, "loss": 1.9788, "step": 28629 }, { "epoch": 0.923714134189401, "grad_norm": 0.333984375, "learning_rate": 4.589960109100444e-07, "loss": 1.97, "step": 28630 }, { "epoch": 0.9237463980431972, "grad_norm": 0.328125, "learning_rate": 4.5860976209055607e-07, "loss": 1.9733, "step": 28631 }, { "epoch": 0.9237786618969936, "grad_norm": 0.33203125, "learning_rate": 4.5822367333226034e-07, "loss": 1.9887, "step": 28632 }, { "epoch": 0.9238109257507899, "grad_norm": 0.326171875, "learning_rate": 4.5783774463940717e-07, "loss": 1.9721, "step": 28633 }, { "epoch": 0.9238431896045863, "grad_norm": 0.33203125, "learning_rate": 4.5745197601624157e-07, "loss": 2.0043, "step": 28634 }, { "epoch": 0.9238754534583826, "grad_norm": 0.33203125, "learning_rate": 4.570663674670117e-07, "loss": 1.9556, "step": 28635 }, { "epoch": 0.923907717312179, "grad_norm": 0.341796875, "learning_rate": 4.566809189959659e-07, "loss": 1.9871, "step": 28636 }, { "epoch": 0.9239399811659753, "grad_norm": 0.345703125, "learning_rate": 4.5629563060733904e-07, "loss": 1.9872, "step": 28637 }, { "epoch": 0.9239722450197717, "grad_norm": 0.337890625, "learning_rate": 4.5591050230537944e-07, "loss": 1.9859, "step": 28638 }, { "epoch": 0.924004508873568, "grad_norm": 0.333984375, "learning_rate": 4.555255340943221e-07, "loss": 2.0003, "step": 28639 }, { "epoch": 0.9240367727273644, "grad_norm": 0.330078125, "learning_rate": 4.551407259784035e-07, "loss": 1.9867, "step": 28640 }, { "epoch": 0.9240690365811607, "grad_norm": 0.3359375, "learning_rate": 4.547560779618637e-07, "loss": 1.9354, "step": 28641 }, { "epoch": 0.9241013004349571, "grad_norm": 0.326171875, "learning_rate": 4.5437159004893427e-07, "loss": 1.9802, "step": 28642 }, { "epoch": 0.9241335642887534, "grad_norm": 0.337890625, "learning_rate": 4.5398726224384514e-07, "loss": 2.0037, "step": 28643 }, { "epoch": 0.9241658281425498, "grad_norm": 0.326171875, "learning_rate": 4.536030945508313e-07, "loss": 1.9619, "step": 28644 }, { "epoch": 0.9241980919963462, "grad_norm": 0.345703125, "learning_rate": 4.5321908697411764e-07, "loss": 2.0133, "step": 28645 }, { "epoch": 0.9242303558501425, "grad_norm": 0.326171875, "learning_rate": 4.528352395179325e-07, "loss": 1.9978, "step": 28646 }, { "epoch": 0.9242626197039389, "grad_norm": 0.328125, "learning_rate": 4.524515521865025e-07, "loss": 1.983, "step": 28647 }, { "epoch": 0.9242948835577351, "grad_norm": 0.33984375, "learning_rate": 4.5206802498404754e-07, "loss": 1.9749, "step": 28648 }, { "epoch": 0.9243271474115315, "grad_norm": 0.330078125, "learning_rate": 4.5168465791479096e-07, "loss": 2.0043, "step": 28649 }, { "epoch": 0.9243594112653278, "grad_norm": 0.33203125, "learning_rate": 4.513014509829544e-07, "loss": 2.0007, "step": 28650 }, { "epoch": 0.9243916751191242, "grad_norm": 0.337890625, "learning_rate": 4.509184041927544e-07, "loss": 2.0135, "step": 28651 }, { "epoch": 0.9244239389729205, "grad_norm": 0.3359375, "learning_rate": 4.5053551754840603e-07, "loss": 2.0031, "step": 28652 }, { "epoch": 0.9244562028267169, "grad_norm": 0.326171875, "learning_rate": 4.501527910541259e-07, "loss": 1.9865, "step": 28653 }, { "epoch": 0.9244884666805132, "grad_norm": 0.341796875, "learning_rate": 4.4977022471412554e-07, "loss": 1.9871, "step": 28654 }, { "epoch": 0.9245207305343096, "grad_norm": 0.337890625, "learning_rate": 4.49387818532615e-07, "loss": 1.9934, "step": 28655 }, { "epoch": 0.9245529943881059, "grad_norm": 0.33203125, "learning_rate": 4.490055725138059e-07, "loss": 1.9653, "step": 28656 }, { "epoch": 0.9245852582419023, "grad_norm": 0.330078125, "learning_rate": 4.486234866619049e-07, "loss": 1.9988, "step": 28657 }, { "epoch": 0.9246175220956986, "grad_norm": 0.337890625, "learning_rate": 4.4824156098111693e-07, "loss": 1.967, "step": 28658 }, { "epoch": 0.924649785949495, "grad_norm": 0.333984375, "learning_rate": 4.4785979547564525e-07, "loss": 1.9374, "step": 28659 }, { "epoch": 0.9246820498032913, "grad_norm": 0.333984375, "learning_rate": 4.4747819014969325e-07, "loss": 1.9775, "step": 28660 }, { "epoch": 0.9247143136570877, "grad_norm": 0.333984375, "learning_rate": 4.470967450074659e-07, "loss": 1.9833, "step": 28661 }, { "epoch": 0.924746577510884, "grad_norm": 0.333984375, "learning_rate": 4.4671546005315313e-07, "loss": 1.9837, "step": 28662 }, { "epoch": 0.9247788413646804, "grad_norm": 0.330078125, "learning_rate": 4.463343352909549e-07, "loss": 1.9641, "step": 28663 }, { "epoch": 0.9248111052184766, "grad_norm": 0.326171875, "learning_rate": 4.459533707250729e-07, "loss": 1.9794, "step": 28664 }, { "epoch": 0.924843369072273, "grad_norm": 0.326171875, "learning_rate": 4.4557256635969045e-07, "loss": 2.0155, "step": 28665 }, { "epoch": 0.9248756329260694, "grad_norm": 0.33203125, "learning_rate": 4.4519192219900585e-07, "loss": 1.9786, "step": 28666 }, { "epoch": 0.9249078967798657, "grad_norm": 0.328125, "learning_rate": 4.4481143824720736e-07, "loss": 1.9876, "step": 28667 }, { "epoch": 0.9249401606336621, "grad_norm": 0.33984375, "learning_rate": 4.4443111450848005e-07, "loss": 1.9903, "step": 28668 }, { "epoch": 0.9249724244874584, "grad_norm": 0.33203125, "learning_rate": 4.4405095098701553e-07, "loss": 1.9977, "step": 28669 }, { "epoch": 0.9250046883412548, "grad_norm": 0.330078125, "learning_rate": 4.4367094768699544e-07, "loss": 2.0022, "step": 28670 }, { "epoch": 0.9250369521950511, "grad_norm": 0.341796875, "learning_rate": 4.4329110461260147e-07, "loss": 1.9975, "step": 28671 }, { "epoch": 0.9250692160488475, "grad_norm": 0.330078125, "learning_rate": 4.429114217680186e-07, "loss": 2.0069, "step": 28672 }, { "epoch": 0.9251014799026438, "grad_norm": 0.337890625, "learning_rate": 4.4253189915742175e-07, "loss": 1.9917, "step": 28673 }, { "epoch": 0.9251337437564402, "grad_norm": 0.330078125, "learning_rate": 4.4215253678498933e-07, "loss": 1.9852, "step": 28674 }, { "epoch": 0.9251660076102365, "grad_norm": 0.333984375, "learning_rate": 4.4177333465490135e-07, "loss": 2.0058, "step": 28675 }, { "epoch": 0.9251982714640329, "grad_norm": 0.337890625, "learning_rate": 4.4139429277132606e-07, "loss": 1.9801, "step": 28676 }, { "epoch": 0.9252305353178292, "grad_norm": 0.326171875, "learning_rate": 4.410154111384368e-07, "loss": 1.9958, "step": 28677 }, { "epoch": 0.9252627991716256, "grad_norm": 0.333984375, "learning_rate": 4.406366897604086e-07, "loss": 1.9722, "step": 28678 }, { "epoch": 0.9252950630254219, "grad_norm": 0.326171875, "learning_rate": 4.402581286414048e-07, "loss": 2.003, "step": 28679 }, { "epoch": 0.9253273268792183, "grad_norm": 0.328125, "learning_rate": 4.398797277855937e-07, "loss": 1.9542, "step": 28680 }, { "epoch": 0.9253595907330145, "grad_norm": 0.328125, "learning_rate": 4.3950148719714367e-07, "loss": 2.0031, "step": 28681 }, { "epoch": 0.925391854586811, "grad_norm": 0.330078125, "learning_rate": 4.3912340688021467e-07, "loss": 2.0125, "step": 28682 }, { "epoch": 0.9254241184406072, "grad_norm": 0.326171875, "learning_rate": 4.387454868389684e-07, "loss": 2.003, "step": 28683 }, { "epoch": 0.9254563822944036, "grad_norm": 0.3359375, "learning_rate": 4.3836772707756655e-07, "loss": 1.9959, "step": 28684 }, { "epoch": 0.9254886461482, "grad_norm": 0.330078125, "learning_rate": 4.379901276001658e-07, "loss": 1.9835, "step": 28685 }, { "epoch": 0.9255209100019963, "grad_norm": 0.34375, "learning_rate": 4.376126884109227e-07, "loss": 1.9728, "step": 28686 }, { "epoch": 0.9255531738557927, "grad_norm": 0.330078125, "learning_rate": 4.3723540951399244e-07, "loss": 1.9903, "step": 28687 }, { "epoch": 0.925585437709589, "grad_norm": 0.33203125, "learning_rate": 4.3685829091352826e-07, "loss": 2.0033, "step": 28688 }, { "epoch": 0.9256177015633854, "grad_norm": 0.328125, "learning_rate": 4.3648133261368015e-07, "loss": 1.983, "step": 28689 }, { "epoch": 0.9256499654171817, "grad_norm": 0.328125, "learning_rate": 4.3610453461859656e-07, "loss": 1.9585, "step": 28690 }, { "epoch": 0.9256822292709781, "grad_norm": 0.333984375, "learning_rate": 4.357278969324274e-07, "loss": 1.989, "step": 28691 }, { "epoch": 0.9257144931247744, "grad_norm": 0.32421875, "learning_rate": 4.3535141955931613e-07, "loss": 1.9552, "step": 28692 }, { "epoch": 0.9257467569785708, "grad_norm": 0.333984375, "learning_rate": 4.349751025034077e-07, "loss": 1.9913, "step": 28693 }, { "epoch": 0.9257790208323671, "grad_norm": 0.33203125, "learning_rate": 4.3459894576884373e-07, "loss": 1.9963, "step": 28694 }, { "epoch": 0.9258112846861635, "grad_norm": 0.33203125, "learning_rate": 4.3422294935976934e-07, "loss": 1.9776, "step": 28695 }, { "epoch": 0.9258435485399598, "grad_norm": 0.341796875, "learning_rate": 4.338471132803146e-07, "loss": 1.9831, "step": 28696 }, { "epoch": 0.9258758123937562, "grad_norm": 0.33203125, "learning_rate": 4.334714375346244e-07, "loss": 1.9911, "step": 28697 }, { "epoch": 0.9259080762475524, "grad_norm": 0.3359375, "learning_rate": 4.330959221268305e-07, "loss": 1.9703, "step": 28698 }, { "epoch": 0.9259403401013488, "grad_norm": 0.330078125, "learning_rate": 4.3272056706106453e-07, "loss": 1.983, "step": 28699 }, { "epoch": 0.9259726039551451, "grad_norm": 0.33984375, "learning_rate": 4.3234537234145997e-07, "loss": 1.9768, "step": 28700 }, { "epoch": 0.9260048678089415, "grad_norm": 0.326171875, "learning_rate": 4.319703379721485e-07, "loss": 1.9521, "step": 28701 }, { "epoch": 0.9260371316627378, "grad_norm": 0.341796875, "learning_rate": 4.31595463957255e-07, "loss": 2.001, "step": 28702 }, { "epoch": 0.9260693955165342, "grad_norm": 0.3359375, "learning_rate": 4.31220750300908e-07, "loss": 1.9934, "step": 28703 }, { "epoch": 0.9261016593703305, "grad_norm": 0.337890625, "learning_rate": 4.308461970072325e-07, "loss": 1.9908, "step": 28704 }, { "epoch": 0.9261339232241269, "grad_norm": 0.3359375, "learning_rate": 4.304718040803485e-07, "loss": 1.9727, "step": 28705 }, { "epoch": 0.9261661870779233, "grad_norm": 0.326171875, "learning_rate": 4.3009757152437935e-07, "loss": 1.9944, "step": 28706 }, { "epoch": 0.9261984509317196, "grad_norm": 0.33203125, "learning_rate": 4.297234993434435e-07, "loss": 1.9988, "step": 28707 }, { "epoch": 0.926230714785516, "grad_norm": 0.33203125, "learning_rate": 4.2934958754165756e-07, "loss": 1.9962, "step": 28708 }, { "epoch": 0.9262629786393123, "grad_norm": 0.333984375, "learning_rate": 4.2897583612313996e-07, "loss": 2.0088, "step": 28709 }, { "epoch": 0.9262952424931087, "grad_norm": 0.328125, "learning_rate": 4.286022450920041e-07, "loss": 2.0013, "step": 28710 }, { "epoch": 0.926327506346905, "grad_norm": 0.330078125, "learning_rate": 4.2822881445235827e-07, "loss": 1.9698, "step": 28711 }, { "epoch": 0.9263597702007014, "grad_norm": 0.33203125, "learning_rate": 4.278555442083193e-07, "loss": 1.9785, "step": 28712 }, { "epoch": 0.9263920340544977, "grad_norm": 0.333984375, "learning_rate": 4.274824343639905e-07, "loss": 1.9712, "step": 28713 }, { "epoch": 0.9264242979082941, "grad_norm": 0.3359375, "learning_rate": 4.271094849234802e-07, "loss": 2.0117, "step": 28714 }, { "epoch": 0.9264565617620903, "grad_norm": 0.33203125, "learning_rate": 4.267366958908953e-07, "loss": 1.9902, "step": 28715 }, { "epoch": 0.9264888256158867, "grad_norm": 0.333984375, "learning_rate": 4.2636406727033893e-07, "loss": 1.9814, "step": 28716 }, { "epoch": 0.926521089469683, "grad_norm": 0.326171875, "learning_rate": 4.2599159906591133e-07, "loss": 2.025, "step": 28717 }, { "epoch": 0.9265533533234794, "grad_norm": 0.333984375, "learning_rate": 4.2561929128171087e-07, "loss": 1.992, "step": 28718 }, { "epoch": 0.9265856171772757, "grad_norm": 0.326171875, "learning_rate": 4.252471439218392e-07, "loss": 2.0051, "step": 28719 }, { "epoch": 0.9266178810310721, "grad_norm": 0.32421875, "learning_rate": 4.2487515699039135e-07, "loss": 1.9663, "step": 28720 }, { "epoch": 0.9266501448848684, "grad_norm": 0.33203125, "learning_rate": 4.2450333049146083e-07, "loss": 1.9892, "step": 28721 }, { "epoch": 0.9266824087386648, "grad_norm": 0.326171875, "learning_rate": 4.2413166442914265e-07, "loss": 1.9669, "step": 28722 }, { "epoch": 0.9267146725924611, "grad_norm": 0.35546875, "learning_rate": 4.237601588075252e-07, "loss": 1.9898, "step": 28723 }, { "epoch": 0.9267469364462575, "grad_norm": 0.32421875, "learning_rate": 4.2338881363069847e-07, "loss": 2.0008, "step": 28724 }, { "epoch": 0.9267792003000538, "grad_norm": 0.3359375, "learning_rate": 4.23017628902751e-07, "loss": 1.962, "step": 28725 }, { "epoch": 0.9268114641538502, "grad_norm": 0.333984375, "learning_rate": 4.2264660462776937e-07, "loss": 2.0404, "step": 28726 }, { "epoch": 0.9268437280076466, "grad_norm": 0.337890625, "learning_rate": 4.222757408098338e-07, "loss": 1.9927, "step": 28727 }, { "epoch": 0.9268759918614429, "grad_norm": 0.333984375, "learning_rate": 4.219050374530309e-07, "loss": 1.9912, "step": 28728 }, { "epoch": 0.9269082557152393, "grad_norm": 0.330078125, "learning_rate": 4.2153449456143746e-07, "loss": 1.9575, "step": 28729 }, { "epoch": 0.9269405195690356, "grad_norm": 0.328125, "learning_rate": 4.211641121391335e-07, "loss": 2.0107, "step": 28730 }, { "epoch": 0.926972783422832, "grad_norm": 0.3359375, "learning_rate": 4.207938901901975e-07, "loss": 1.9765, "step": 28731 }, { "epoch": 0.9270050472766282, "grad_norm": 0.333984375, "learning_rate": 4.2042382871870456e-07, "loss": 1.998, "step": 28732 }, { "epoch": 0.9270373111304246, "grad_norm": 0.353515625, "learning_rate": 4.20053927728723e-07, "loss": 1.9891, "step": 28733 }, { "epoch": 0.9270695749842209, "grad_norm": 0.330078125, "learning_rate": 4.1968418722433123e-07, "loss": 1.9861, "step": 28734 }, { "epoch": 0.9271018388380173, "grad_norm": 0.328125, "learning_rate": 4.19314607209596e-07, "loss": 1.9977, "step": 28735 }, { "epoch": 0.9271341026918136, "grad_norm": 0.33203125, "learning_rate": 4.1894518768858245e-07, "loss": 1.9899, "step": 28736 }, { "epoch": 0.92716636654561, "grad_norm": 0.328125, "learning_rate": 4.185759286653623e-07, "loss": 2.0109, "step": 28737 }, { "epoch": 0.9271986303994063, "grad_norm": 0.333984375, "learning_rate": 4.1820683014399896e-07, "loss": 2.0006, "step": 28738 }, { "epoch": 0.9272308942532027, "grad_norm": 0.326171875, "learning_rate": 4.178378921285508e-07, "loss": 1.9986, "step": 28739 }, { "epoch": 0.927263158106999, "grad_norm": 0.345703125, "learning_rate": 4.174691146230847e-07, "loss": 2.008, "step": 28740 }, { "epoch": 0.9272954219607954, "grad_norm": 0.328125, "learning_rate": 4.171004976316556e-07, "loss": 1.9884, "step": 28741 }, { "epoch": 0.9273276858145917, "grad_norm": 0.328125, "learning_rate": 4.16732041158322e-07, "loss": 1.9762, "step": 28742 }, { "epoch": 0.9273599496683881, "grad_norm": 0.326171875, "learning_rate": 4.163637452071406e-07, "loss": 2.002, "step": 28743 }, { "epoch": 0.9273922135221844, "grad_norm": 0.3359375, "learning_rate": 4.1599560978216655e-07, "loss": 1.9555, "step": 28744 }, { "epoch": 0.9274244773759808, "grad_norm": 0.326171875, "learning_rate": 4.156276348874466e-07, "loss": 1.9882, "step": 28745 }, { "epoch": 0.9274567412297772, "grad_norm": 0.341796875, "learning_rate": 4.1525982052703746e-07, "loss": 1.9936, "step": 28746 }, { "epoch": 0.9274890050835735, "grad_norm": 0.328125, "learning_rate": 4.14892166704986e-07, "loss": 2.0245, "step": 28747 }, { "epoch": 0.9275212689373699, "grad_norm": 0.337890625, "learning_rate": 4.1452467342533716e-07, "loss": 1.9978, "step": 28748 }, { "epoch": 0.9275535327911661, "grad_norm": 0.33203125, "learning_rate": 4.141573406921362e-07, "loss": 1.9914, "step": 28749 }, { "epoch": 0.9275857966449625, "grad_norm": 0.333984375, "learning_rate": 4.137901685094281e-07, "loss": 2.0005, "step": 28750 }, { "epoch": 0.9276180604987588, "grad_norm": 0.330078125, "learning_rate": 4.1342315688125463e-07, "loss": 1.9859, "step": 28751 }, { "epoch": 0.9276503243525552, "grad_norm": 0.333984375, "learning_rate": 4.1305630581165266e-07, "loss": 1.9604, "step": 28752 }, { "epoch": 0.9276825882063515, "grad_norm": 0.33203125, "learning_rate": 4.126896153046639e-07, "loss": 2.001, "step": 28753 }, { "epoch": 0.9277148520601479, "grad_norm": 0.33203125, "learning_rate": 4.1232308536432337e-07, "loss": 2.0134, "step": 28754 }, { "epoch": 0.9277471159139442, "grad_norm": 0.333984375, "learning_rate": 4.11956715994663e-07, "loss": 1.9864, "step": 28755 }, { "epoch": 0.9277793797677406, "grad_norm": 0.33203125, "learning_rate": 4.1159050719971944e-07, "loss": 1.9567, "step": 28756 }, { "epoch": 0.9278116436215369, "grad_norm": 0.330078125, "learning_rate": 4.112244589835229e-07, "loss": 2.0148, "step": 28757 }, { "epoch": 0.9278439074753333, "grad_norm": 0.328125, "learning_rate": 4.1085857135009843e-07, "loss": 1.9839, "step": 28758 }, { "epoch": 0.9278761713291296, "grad_norm": 0.33203125, "learning_rate": 4.104928443034794e-07, "loss": 2.0058, "step": 28759 }, { "epoch": 0.927908435182926, "grad_norm": 0.326171875, "learning_rate": 4.1012727784768776e-07, "loss": 1.9956, "step": 28760 }, { "epoch": 0.9279406990367223, "grad_norm": 0.333984375, "learning_rate": 4.0976187198674685e-07, "loss": 1.9857, "step": 28761 }, { "epoch": 0.9279729628905187, "grad_norm": 0.33203125, "learning_rate": 4.0939662672468183e-07, "loss": 1.9859, "step": 28762 }, { "epoch": 0.928005226744315, "grad_norm": 0.359375, "learning_rate": 4.0903154206551274e-07, "loss": 1.9942, "step": 28763 }, { "epoch": 0.9280374905981114, "grad_norm": 0.33203125, "learning_rate": 4.086666180132548e-07, "loss": 2.0104, "step": 28764 }, { "epoch": 0.9280697544519076, "grad_norm": 0.328125, "learning_rate": 4.0830185457192636e-07, "loss": 1.9773, "step": 28765 }, { "epoch": 0.928102018305704, "grad_norm": 0.330078125, "learning_rate": 4.07937251745546e-07, "loss": 2.0045, "step": 28766 }, { "epoch": 0.9281342821595004, "grad_norm": 0.337890625, "learning_rate": 4.0757280953812036e-07, "loss": 2.0132, "step": 28767 }, { "epoch": 0.9281665460132967, "grad_norm": 0.330078125, "learning_rate": 4.0720852795366805e-07, "loss": 1.9907, "step": 28768 }, { "epoch": 0.9281988098670931, "grad_norm": 0.333984375, "learning_rate": 4.068444069961941e-07, "loss": 1.9645, "step": 28769 }, { "epoch": 0.9282310737208894, "grad_norm": 0.333984375, "learning_rate": 4.0648044666970697e-07, "loss": 1.997, "step": 28770 }, { "epoch": 0.9282633375746858, "grad_norm": 0.33203125, "learning_rate": 4.0611664697821516e-07, "loss": 1.9958, "step": 28771 }, { "epoch": 0.9282956014284821, "grad_norm": 0.330078125, "learning_rate": 4.0575300792572045e-07, "loss": 1.9973, "step": 28772 }, { "epoch": 0.9283278652822785, "grad_norm": 0.33203125, "learning_rate": 4.0538952951622634e-07, "loss": 1.9902, "step": 28773 }, { "epoch": 0.9283601291360748, "grad_norm": 0.32421875, "learning_rate": 4.050262117537362e-07, "loss": 1.9601, "step": 28774 }, { "epoch": 0.9283923929898712, "grad_norm": 0.341796875, "learning_rate": 4.0466305464224694e-07, "loss": 1.9875, "step": 28775 }, { "epoch": 0.9284246568436675, "grad_norm": 0.333984375, "learning_rate": 4.043000581857537e-07, "loss": 1.9942, "step": 28776 }, { "epoch": 0.9284569206974639, "grad_norm": 0.333984375, "learning_rate": 4.039372223882565e-07, "loss": 1.9773, "step": 28777 }, { "epoch": 0.9284891845512602, "grad_norm": 0.328125, "learning_rate": 4.0357454725374723e-07, "loss": 1.9873, "step": 28778 }, { "epoch": 0.9285214484050566, "grad_norm": 0.326171875, "learning_rate": 4.0321203278621766e-07, "loss": 1.9959, "step": 28779 }, { "epoch": 0.9285537122588529, "grad_norm": 0.3359375, "learning_rate": 4.02849678989658e-07, "loss": 1.9915, "step": 28780 }, { "epoch": 0.9285859761126493, "grad_norm": 0.330078125, "learning_rate": 4.024874858680583e-07, "loss": 1.9809, "step": 28781 }, { "epoch": 0.9286182399664455, "grad_norm": 0.333984375, "learning_rate": 4.0212545342540206e-07, "loss": 2.0003, "step": 28782 }, { "epoch": 0.928650503820242, "grad_norm": 0.3359375, "learning_rate": 4.0176358166567783e-07, "loss": 2.0137, "step": 28783 }, { "epoch": 0.9286827676740382, "grad_norm": 0.33984375, "learning_rate": 4.0140187059286736e-07, "loss": 2.0014, "step": 28784 }, { "epoch": 0.9287150315278346, "grad_norm": 0.330078125, "learning_rate": 4.010403202109525e-07, "loss": 1.9827, "step": 28785 }, { "epoch": 0.928747295381631, "grad_norm": 0.3203125, "learning_rate": 4.0067893052391004e-07, "loss": 1.976, "step": 28786 }, { "epoch": 0.9287795592354273, "grad_norm": 0.33203125, "learning_rate": 4.0031770153572176e-07, "loss": 1.9978, "step": 28787 }, { "epoch": 0.9288118230892237, "grad_norm": 0.32421875, "learning_rate": 3.999566332503646e-07, "loss": 1.9913, "step": 28788 }, { "epoch": 0.92884408694302, "grad_norm": 0.326171875, "learning_rate": 3.9959572567180693e-07, "loss": 1.981, "step": 28789 }, { "epoch": 0.9288763507968164, "grad_norm": 0.333984375, "learning_rate": 3.9923497880402894e-07, "loss": 2.0236, "step": 28790 }, { "epoch": 0.9289086146506127, "grad_norm": 0.333984375, "learning_rate": 3.9887439265099577e-07, "loss": 1.9877, "step": 28791 }, { "epoch": 0.9289408785044091, "grad_norm": 0.333984375, "learning_rate": 3.985139672166777e-07, "loss": 1.9933, "step": 28792 }, { "epoch": 0.9289731423582054, "grad_norm": 0.333984375, "learning_rate": 3.981537025050447e-07, "loss": 2.0057, "step": 28793 }, { "epoch": 0.9290054062120018, "grad_norm": 0.328125, "learning_rate": 3.977935985200604e-07, "loss": 2.0028, "step": 28794 }, { "epoch": 0.9290376700657981, "grad_norm": 0.330078125, "learning_rate": 3.9743365526568655e-07, "loss": 1.9904, "step": 28795 }, { "epoch": 0.9290699339195945, "grad_norm": 0.3359375, "learning_rate": 3.9707387274588833e-07, "loss": 2.0039, "step": 28796 }, { "epoch": 0.9291021977733908, "grad_norm": 0.33203125, "learning_rate": 3.9671425096462597e-07, "loss": 2.0069, "step": 28797 }, { "epoch": 0.9291344616271872, "grad_norm": 0.33203125, "learning_rate": 3.963547899258546e-07, "loss": 2.006, "step": 28798 }, { "epoch": 0.9291667254809834, "grad_norm": 0.333984375, "learning_rate": 3.9599548963353437e-07, "loss": 1.9929, "step": 28799 }, { "epoch": 0.9291989893347798, "grad_norm": 0.33203125, "learning_rate": 3.956363500916205e-07, "loss": 1.9536, "step": 28800 }, { "epoch": 0.9292312531885761, "grad_norm": 0.333984375, "learning_rate": 3.952773713040614e-07, "loss": 1.968, "step": 28801 }, { "epoch": 0.9292635170423725, "grad_norm": 0.326171875, "learning_rate": 3.9491855327481395e-07, "loss": 1.987, "step": 28802 }, { "epoch": 0.9292957808961688, "grad_norm": 0.3359375, "learning_rate": 3.9455989600782505e-07, "loss": 1.9774, "step": 28803 }, { "epoch": 0.9293280447499652, "grad_norm": 0.333984375, "learning_rate": 3.942013995070415e-07, "loss": 1.9921, "step": 28804 }, { "epoch": 0.9293603086037615, "grad_norm": 0.33203125, "learning_rate": 3.9384306377641177e-07, "loss": 1.9732, "step": 28805 }, { "epoch": 0.9293925724575579, "grad_norm": 0.333984375, "learning_rate": 3.9348488881987945e-07, "loss": 1.9831, "step": 28806 }, { "epoch": 0.9294248363113543, "grad_norm": 0.328125, "learning_rate": 3.931268746413863e-07, "loss": 1.9639, "step": 28807 }, { "epoch": 0.9294571001651506, "grad_norm": 0.365234375, "learning_rate": 3.9276902124487425e-07, "loss": 1.9752, "step": 28808 }, { "epoch": 0.929489364018947, "grad_norm": 0.328125, "learning_rate": 3.9241132863428173e-07, "loss": 1.9786, "step": 28809 }, { "epoch": 0.9295216278727433, "grad_norm": 0.337890625, "learning_rate": 3.920537968135457e-07, "loss": 1.9735, "step": 28810 }, { "epoch": 0.9295538917265397, "grad_norm": 0.328125, "learning_rate": 3.916964257865996e-07, "loss": 1.9856, "step": 28811 }, { "epoch": 0.929586155580336, "grad_norm": 0.330078125, "learning_rate": 3.9133921555738195e-07, "loss": 1.9997, "step": 28812 }, { "epoch": 0.9296184194341324, "grad_norm": 0.333984375, "learning_rate": 3.909821661298213e-07, "loss": 1.9657, "step": 28813 }, { "epoch": 0.9296506832879287, "grad_norm": 0.326171875, "learning_rate": 3.9062527750784783e-07, "loss": 1.9966, "step": 28814 }, { "epoch": 0.9296829471417251, "grad_norm": 0.34375, "learning_rate": 3.902685496953917e-07, "loss": 2.0222, "step": 28815 }, { "epoch": 0.9297152109955213, "grad_norm": 0.33203125, "learning_rate": 3.899119826963782e-07, "loss": 1.9891, "step": 28816 }, { "epoch": 0.9297474748493177, "grad_norm": 0.330078125, "learning_rate": 3.8955557651473075e-07, "loss": 1.9759, "step": 28817 }, { "epoch": 0.929779738703114, "grad_norm": 0.330078125, "learning_rate": 3.8919933115437623e-07, "loss": 2.008, "step": 28818 }, { "epoch": 0.9298120025569104, "grad_norm": 0.33203125, "learning_rate": 3.8884324661923323e-07, "loss": 2.0108, "step": 28819 }, { "epoch": 0.9298442664107067, "grad_norm": 0.326171875, "learning_rate": 3.884873229132202e-07, "loss": 1.9971, "step": 28820 }, { "epoch": 0.9298765302645031, "grad_norm": 0.328125, "learning_rate": 3.8813156004025743e-07, "loss": 1.9844, "step": 28821 }, { "epoch": 0.9299087941182994, "grad_norm": 0.3359375, "learning_rate": 3.8777595800426004e-07, "loss": 1.9717, "step": 28822 }, { "epoch": 0.9299410579720958, "grad_norm": 0.333984375, "learning_rate": 3.874205168091416e-07, "loss": 1.9747, "step": 28823 }, { "epoch": 0.9299733218258921, "grad_norm": 0.33984375, "learning_rate": 3.8706523645881563e-07, "loss": 1.9776, "step": 28824 }, { "epoch": 0.9300055856796885, "grad_norm": 0.328125, "learning_rate": 3.867101169571924e-07, "loss": 1.9951, "step": 28825 }, { "epoch": 0.9300378495334848, "grad_norm": 0.333984375, "learning_rate": 3.863551583081804e-07, "loss": 2.0202, "step": 28826 }, { "epoch": 0.9300701133872812, "grad_norm": 0.345703125, "learning_rate": 3.860003605156881e-07, "loss": 2.0448, "step": 28827 }, { "epoch": 0.9301023772410776, "grad_norm": 0.33984375, "learning_rate": 3.8564572358361916e-07, "loss": 1.9628, "step": 28828 }, { "epoch": 0.9301346410948739, "grad_norm": 0.330078125, "learning_rate": 3.852912475158754e-07, "loss": 1.9863, "step": 28829 }, { "epoch": 0.9301669049486703, "grad_norm": 0.3359375, "learning_rate": 3.8493693231636373e-07, "loss": 1.9968, "step": 28830 }, { "epoch": 0.9301991688024666, "grad_norm": 0.337890625, "learning_rate": 3.845827779889827e-07, "loss": 2.013, "step": 28831 }, { "epoch": 0.930231432656263, "grad_norm": 0.32421875, "learning_rate": 3.8422878453762576e-07, "loss": 2.0165, "step": 28832 }, { "epoch": 0.9302636965100592, "grad_norm": 0.328125, "learning_rate": 3.8387495196619494e-07, "loss": 2.0064, "step": 28833 }, { "epoch": 0.9302959603638556, "grad_norm": 0.33203125, "learning_rate": 3.835212802785837e-07, "loss": 1.9947, "step": 28834 }, { "epoch": 0.9303282242176519, "grad_norm": 0.33203125, "learning_rate": 3.831677694786839e-07, "loss": 1.9941, "step": 28835 }, { "epoch": 0.9303604880714483, "grad_norm": 0.326171875, "learning_rate": 3.8281441957038755e-07, "loss": 1.9901, "step": 28836 }, { "epoch": 0.9303927519252446, "grad_norm": 0.322265625, "learning_rate": 3.8246123055758306e-07, "loss": 1.9833, "step": 28837 }, { "epoch": 0.930425015779041, "grad_norm": 0.34375, "learning_rate": 3.8210820244415746e-07, "loss": 1.9713, "step": 28838 }, { "epoch": 0.9304572796328373, "grad_norm": 0.33203125, "learning_rate": 3.8175533523400086e-07, "loss": 1.9993, "step": 28839 }, { "epoch": 0.9304895434866337, "grad_norm": 0.3359375, "learning_rate": 3.814026289309935e-07, "loss": 1.9449, "step": 28840 }, { "epoch": 0.93052180734043, "grad_norm": 0.33203125, "learning_rate": 3.81050083539019e-07, "loss": 1.9906, "step": 28841 }, { "epoch": 0.9305540711942264, "grad_norm": 0.330078125, "learning_rate": 3.806976990619543e-07, "loss": 1.9935, "step": 28842 }, { "epoch": 0.9305863350480227, "grad_norm": 0.33203125, "learning_rate": 3.803454755036845e-07, "loss": 1.9817, "step": 28843 }, { "epoch": 0.9306185989018191, "grad_norm": 0.330078125, "learning_rate": 3.799934128680849e-07, "loss": 1.9916, "step": 28844 }, { "epoch": 0.9306508627556154, "grad_norm": 0.328125, "learning_rate": 3.796415111590257e-07, "loss": 2.0118, "step": 28845 }, { "epoch": 0.9306831266094118, "grad_norm": 0.330078125, "learning_rate": 3.7928977038038716e-07, "loss": 1.9859, "step": 28846 }, { "epoch": 0.9307153904632082, "grad_norm": 0.3359375, "learning_rate": 3.7893819053603784e-07, "loss": 1.9953, "step": 28847 }, { "epoch": 0.9307476543170045, "grad_norm": 0.328125, "learning_rate": 3.785867716298447e-07, "loss": 2.0015, "step": 28848 }, { "epoch": 0.9307799181708009, "grad_norm": 0.330078125, "learning_rate": 3.782355136656812e-07, "loss": 1.9687, "step": 28849 }, { "epoch": 0.9308121820245971, "grad_norm": 0.330078125, "learning_rate": 3.7788441664741266e-07, "loss": 1.9965, "step": 28850 }, { "epoch": 0.9308444458783935, "grad_norm": 0.3359375, "learning_rate": 3.77533480578901e-07, "loss": 1.9806, "step": 28851 }, { "epoch": 0.9308767097321898, "grad_norm": 0.3359375, "learning_rate": 3.7718270546401144e-07, "loss": 2.0054, "step": 28852 }, { "epoch": 0.9309089735859862, "grad_norm": 0.328125, "learning_rate": 3.7683209130660426e-07, "loss": 1.9999, "step": 28853 }, { "epoch": 0.9309412374397825, "grad_norm": 0.337890625, "learning_rate": 3.764816381105379e-07, "loss": 2.0162, "step": 28854 }, { "epoch": 0.9309735012935789, "grad_norm": 0.33203125, "learning_rate": 3.761313458796711e-07, "loss": 1.9656, "step": 28855 }, { "epoch": 0.9310057651473752, "grad_norm": 0.333984375, "learning_rate": 3.757812146178607e-07, "loss": 1.9868, "step": 28856 }, { "epoch": 0.9310380290011716, "grad_norm": 0.333984375, "learning_rate": 3.7543124432895703e-07, "loss": 2.0079, "step": 28857 }, { "epoch": 0.9310702928549679, "grad_norm": 0.35546875, "learning_rate": 3.750814350168169e-07, "loss": 1.9805, "step": 28858 }, { "epoch": 0.9311025567087643, "grad_norm": 0.3359375, "learning_rate": 3.747317866852873e-07, "loss": 2.0168, "step": 28859 }, { "epoch": 0.9311348205625606, "grad_norm": 0.337890625, "learning_rate": 3.743822993382184e-07, "loss": 1.9641, "step": 28860 }, { "epoch": 0.931167084416357, "grad_norm": 0.328125, "learning_rate": 3.740329729794556e-07, "loss": 1.99, "step": 28861 }, { "epoch": 0.9311993482701533, "grad_norm": 0.3359375, "learning_rate": 3.736838076128474e-07, "loss": 1.9959, "step": 28862 }, { "epoch": 0.9312316121239497, "grad_norm": 0.33203125, "learning_rate": 3.7333480324223234e-07, "loss": 1.9903, "step": 28863 }, { "epoch": 0.931263875977746, "grad_norm": 0.330078125, "learning_rate": 3.7298595987145747e-07, "loss": 1.986, "step": 28864 }, { "epoch": 0.9312961398315424, "grad_norm": 0.333984375, "learning_rate": 3.7263727750435794e-07, "loss": 1.9807, "step": 28865 }, { "epoch": 0.9313284036853386, "grad_norm": 0.330078125, "learning_rate": 3.7228875614477407e-07, "loss": 1.9961, "step": 28866 }, { "epoch": 0.931360667539135, "grad_norm": 0.326171875, "learning_rate": 3.7194039579654114e-07, "loss": 1.9784, "step": 28867 }, { "epoch": 0.9313929313929314, "grad_norm": 0.330078125, "learning_rate": 3.715921964634961e-07, "loss": 1.9856, "step": 28868 }, { "epoch": 0.9314251952467277, "grad_norm": 0.337890625, "learning_rate": 3.712441581494691e-07, "loss": 2.0007, "step": 28869 }, { "epoch": 0.9314574591005241, "grad_norm": 0.32421875, "learning_rate": 3.708962808582889e-07, "loss": 1.9925, "step": 28870 }, { "epoch": 0.9314897229543204, "grad_norm": 0.333984375, "learning_rate": 3.705485645937906e-07, "loss": 2.0099, "step": 28871 }, { "epoch": 0.9315219868081168, "grad_norm": 0.328125, "learning_rate": 3.7020100935979797e-07, "loss": 2.0078, "step": 28872 }, { "epoch": 0.9315542506619131, "grad_norm": 0.3359375, "learning_rate": 3.6985361516013616e-07, "loss": 2.0088, "step": 28873 }, { "epoch": 0.9315865145157095, "grad_norm": 0.333984375, "learning_rate": 3.695063819986322e-07, "loss": 1.9654, "step": 28874 }, { "epoch": 0.9316187783695058, "grad_norm": 0.330078125, "learning_rate": 3.691593098791046e-07, "loss": 1.9779, "step": 28875 }, { "epoch": 0.9316510422233022, "grad_norm": 0.330078125, "learning_rate": 3.6881239880537544e-07, "loss": 2.0036, "step": 28876 }, { "epoch": 0.9316833060770985, "grad_norm": 0.33203125, "learning_rate": 3.6846564878126323e-07, "loss": 1.9973, "step": 28877 }, { "epoch": 0.9317155699308949, "grad_norm": 0.330078125, "learning_rate": 3.681190598105849e-07, "loss": 1.9922, "step": 28878 }, { "epoch": 0.9317478337846912, "grad_norm": 0.330078125, "learning_rate": 3.6777263189715414e-07, "loss": 1.9966, "step": 28879 }, { "epoch": 0.9317800976384876, "grad_norm": 0.3359375, "learning_rate": 3.6742636504478623e-07, "loss": 1.9759, "step": 28880 }, { "epoch": 0.9318123614922839, "grad_norm": 0.326171875, "learning_rate": 3.670802592572914e-07, "loss": 1.9972, "step": 28881 }, { "epoch": 0.9318446253460803, "grad_norm": 0.328125, "learning_rate": 3.667343145384766e-07, "loss": 1.9669, "step": 28882 }, { "epoch": 0.9318768891998765, "grad_norm": 0.33984375, "learning_rate": 3.663885308921555e-07, "loss": 1.9571, "step": 28883 }, { "epoch": 0.931909153053673, "grad_norm": 0.33203125, "learning_rate": 3.6604290832213174e-07, "loss": 2.0107, "step": 28884 }, { "epoch": 0.9319414169074692, "grad_norm": 0.33203125, "learning_rate": 3.6569744683220717e-07, "loss": 1.9734, "step": 28885 }, { "epoch": 0.9319736807612656, "grad_norm": 0.328125, "learning_rate": 3.6535214642618884e-07, "loss": 2.0089, "step": 28886 }, { "epoch": 0.932005944615062, "grad_norm": 0.33203125, "learning_rate": 3.6500700710787373e-07, "loss": 1.9965, "step": 28887 }, { "epoch": 0.9320382084688583, "grad_norm": 0.326171875, "learning_rate": 3.6466202888106205e-07, "loss": 1.9848, "step": 28888 }, { "epoch": 0.9320704723226547, "grad_norm": 0.341796875, "learning_rate": 3.6431721174955244e-07, "loss": 1.991, "step": 28889 }, { "epoch": 0.932102736176451, "grad_norm": 0.330078125, "learning_rate": 3.639725557171403e-07, "loss": 1.968, "step": 28890 }, { "epoch": 0.9321350000302474, "grad_norm": 0.33984375, "learning_rate": 3.636280607876158e-07, "loss": 1.9767, "step": 28891 }, { "epoch": 0.9321672638840437, "grad_norm": 0.333984375, "learning_rate": 3.6328372696477595e-07, "loss": 1.9859, "step": 28892 }, { "epoch": 0.9321995277378401, "grad_norm": 0.328125, "learning_rate": 3.629395542524078e-07, "loss": 1.998, "step": 28893 }, { "epoch": 0.9322317915916364, "grad_norm": 0.333984375, "learning_rate": 3.625955426542982e-07, "loss": 1.9819, "step": 28894 }, { "epoch": 0.9322640554454328, "grad_norm": 0.330078125, "learning_rate": 3.622516921742391e-07, "loss": 2.0055, "step": 28895 }, { "epoch": 0.9322963192992291, "grad_norm": 0.328125, "learning_rate": 3.61908002816011e-07, "loss": 2.0006, "step": 28896 }, { "epoch": 0.9323285831530255, "grad_norm": 0.33203125, "learning_rate": 3.615644745833974e-07, "loss": 1.9974, "step": 28897 }, { "epoch": 0.9323608470068218, "grad_norm": 0.333984375, "learning_rate": 3.612211074801819e-07, "loss": 1.9801, "step": 28898 }, { "epoch": 0.9323931108606182, "grad_norm": 0.3359375, "learning_rate": 3.608779015101432e-07, "loss": 1.9947, "step": 28899 }, { "epoch": 0.9324253747144144, "grad_norm": 0.345703125, "learning_rate": 3.605348566770583e-07, "loss": 1.9775, "step": 28900 }, { "epoch": 0.9324576385682108, "grad_norm": 0.337890625, "learning_rate": 3.6019197298470085e-07, "loss": 1.9677, "step": 28901 }, { "epoch": 0.9324899024220071, "grad_norm": 0.3359375, "learning_rate": 3.598492504368511e-07, "loss": 2.0195, "step": 28902 }, { "epoch": 0.9325221662758035, "grad_norm": 0.33203125, "learning_rate": 3.5950668903727615e-07, "loss": 1.9882, "step": 28903 }, { "epoch": 0.9325544301295998, "grad_norm": 0.328125, "learning_rate": 3.5916428878974783e-07, "loss": 1.9748, "step": 28904 }, { "epoch": 0.9325866939833962, "grad_norm": 0.33984375, "learning_rate": 3.5882204969803657e-07, "loss": 1.9761, "step": 28905 }, { "epoch": 0.9326189578371925, "grad_norm": 0.326171875, "learning_rate": 3.584799717659093e-07, "loss": 2.0099, "step": 28906 }, { "epoch": 0.9326512216909889, "grad_norm": 0.359375, "learning_rate": 3.581380549971297e-07, "loss": 2.0091, "step": 28907 }, { "epoch": 0.9326834855447853, "grad_norm": 0.333984375, "learning_rate": 3.577962993954631e-07, "loss": 1.9889, "step": 28908 }, { "epoch": 0.9327157493985816, "grad_norm": 0.328125, "learning_rate": 3.574547049646698e-07, "loss": 1.9904, "step": 28909 }, { "epoch": 0.932748013252378, "grad_norm": 0.337890625, "learning_rate": 3.5711327170851014e-07, "loss": 2.0015, "step": 28910 }, { "epoch": 0.9327802771061743, "grad_norm": 0.32421875, "learning_rate": 3.567719996307445e-07, "loss": 2.0116, "step": 28911 }, { "epoch": 0.9328125409599707, "grad_norm": 0.3359375, "learning_rate": 3.564308887351281e-07, "loss": 1.999, "step": 28912 }, { "epoch": 0.932844804813767, "grad_norm": 0.326171875, "learning_rate": 3.56089939025413e-07, "loss": 1.9959, "step": 28913 }, { "epoch": 0.9328770686675634, "grad_norm": 0.33203125, "learning_rate": 3.557491505053545e-07, "loss": 1.9859, "step": 28914 }, { "epoch": 0.9329093325213597, "grad_norm": 0.328125, "learning_rate": 3.5540852317870465e-07, "loss": 1.9809, "step": 28915 }, { "epoch": 0.9329415963751561, "grad_norm": 0.33203125, "learning_rate": 3.550680570492121e-07, "loss": 1.9426, "step": 28916 }, { "epoch": 0.9329738602289523, "grad_norm": 0.337890625, "learning_rate": 3.547277521206238e-07, "loss": 1.9998, "step": 28917 }, { "epoch": 0.9330061240827487, "grad_norm": 0.33203125, "learning_rate": 3.5438760839668516e-07, "loss": 1.9873, "step": 28918 }, { "epoch": 0.933038387936545, "grad_norm": 0.3359375, "learning_rate": 3.5404762588114147e-07, "loss": 1.954, "step": 28919 }, { "epoch": 0.9330706517903414, "grad_norm": 0.33984375, "learning_rate": 3.5370780457773643e-07, "loss": 1.9845, "step": 28920 }, { "epoch": 0.9331029156441377, "grad_norm": 0.33203125, "learning_rate": 3.533681444902054e-07, "loss": 2.0033, "step": 28921 }, { "epoch": 0.9331351794979341, "grad_norm": 0.34375, "learning_rate": 3.5302864562229196e-07, "loss": 1.9918, "step": 28922 }, { "epoch": 0.9331674433517304, "grad_norm": 0.328125, "learning_rate": 3.5268930797773325e-07, "loss": 2.0187, "step": 28923 }, { "epoch": 0.9331997072055268, "grad_norm": 0.3359375, "learning_rate": 3.523501315602595e-07, "loss": 1.9847, "step": 28924 }, { "epoch": 0.9332319710593231, "grad_norm": 0.328125, "learning_rate": 3.520111163736078e-07, "loss": 2.0208, "step": 28925 }, { "epoch": 0.9332642349131195, "grad_norm": 0.333984375, "learning_rate": 3.516722624215102e-07, "loss": 1.9924, "step": 28926 }, { "epoch": 0.9332964987669158, "grad_norm": 0.328125, "learning_rate": 3.513335697076969e-07, "loss": 1.9695, "step": 28927 }, { "epoch": 0.9333287626207122, "grad_norm": 0.337890625, "learning_rate": 3.5099503823589175e-07, "loss": 1.9935, "step": 28928 }, { "epoch": 0.9333610264745086, "grad_norm": 0.349609375, "learning_rate": 3.5065666800982667e-07, "loss": 1.9911, "step": 28929 }, { "epoch": 0.9333932903283049, "grad_norm": 0.326171875, "learning_rate": 3.503184590332237e-07, "loss": 2.0154, "step": 28930 }, { "epoch": 0.9334255541821013, "grad_norm": 0.330078125, "learning_rate": 3.4998041130980486e-07, "loss": 1.9893, "step": 28931 }, { "epoch": 0.9334578180358976, "grad_norm": 0.326171875, "learning_rate": 3.496425248432905e-07, "loss": 1.9658, "step": 28932 }, { "epoch": 0.933490081889694, "grad_norm": 0.333984375, "learning_rate": 3.493047996374027e-07, "loss": 2.0174, "step": 28933 }, { "epoch": 0.9335223457434902, "grad_norm": 0.32421875, "learning_rate": 3.4896723569585674e-07, "loss": 1.9697, "step": 28934 }, { "epoch": 0.9335546095972866, "grad_norm": 0.33203125, "learning_rate": 3.4862983302236807e-07, "loss": 1.985, "step": 28935 }, { "epoch": 0.9335868734510829, "grad_norm": 0.328125, "learning_rate": 3.48292591620652e-07, "loss": 1.9949, "step": 28936 }, { "epoch": 0.9336191373048793, "grad_norm": 0.33984375, "learning_rate": 3.479555114944205e-07, "loss": 1.9572, "step": 28937 }, { "epoch": 0.9336514011586756, "grad_norm": 0.33984375, "learning_rate": 3.4761859264738237e-07, "loss": 2.0185, "step": 28938 }, { "epoch": 0.933683665012472, "grad_norm": 0.326171875, "learning_rate": 3.4728183508324794e-07, "loss": 1.978, "step": 28939 }, { "epoch": 0.9337159288662683, "grad_norm": 0.33203125, "learning_rate": 3.4694523880572427e-07, "loss": 1.9852, "step": 28940 }, { "epoch": 0.9337481927200647, "grad_norm": 0.337890625, "learning_rate": 3.4660880381851335e-07, "loss": 1.997, "step": 28941 }, { "epoch": 0.933780456573861, "grad_norm": 0.345703125, "learning_rate": 3.4627253012532224e-07, "loss": 2.0262, "step": 28942 }, { "epoch": 0.9338127204276574, "grad_norm": 0.330078125, "learning_rate": 3.459364177298496e-07, "loss": 1.9655, "step": 28943 }, { "epoch": 0.9338449842814537, "grad_norm": 0.34375, "learning_rate": 3.456004666357943e-07, "loss": 1.9586, "step": 28944 }, { "epoch": 0.9338772481352501, "grad_norm": 0.330078125, "learning_rate": 3.452646768468598e-07, "loss": 2.0046, "step": 28945 }, { "epoch": 0.9339095119890464, "grad_norm": 0.33984375, "learning_rate": 3.4492904836673667e-07, "loss": 1.9796, "step": 28946 }, { "epoch": 0.9339417758428428, "grad_norm": 0.328125, "learning_rate": 3.445935811991202e-07, "loss": 1.9912, "step": 28947 }, { "epoch": 0.9339740396966392, "grad_norm": 0.33203125, "learning_rate": 3.4425827534770414e-07, "loss": 1.9618, "step": 28948 }, { "epoch": 0.9340063035504355, "grad_norm": 0.33203125, "learning_rate": 3.4392313081618056e-07, "loss": 1.9955, "step": 28949 }, { "epoch": 0.9340385674042319, "grad_norm": 0.3359375, "learning_rate": 3.4358814760823474e-07, "loss": 1.9865, "step": 28950 }, { "epoch": 0.9340708312580281, "grad_norm": 0.33203125, "learning_rate": 3.4325332572755884e-07, "loss": 1.9866, "step": 28951 }, { "epoch": 0.9341030951118245, "grad_norm": 0.33203125, "learning_rate": 3.429186651778332e-07, "loss": 1.9739, "step": 28952 }, { "epoch": 0.9341353589656208, "grad_norm": 0.33203125, "learning_rate": 3.425841659627432e-07, "loss": 1.9987, "step": 28953 }, { "epoch": 0.9341676228194172, "grad_norm": 0.328125, "learning_rate": 3.4224982808597596e-07, "loss": 1.9844, "step": 28954 }, { "epoch": 0.9341998866732135, "grad_norm": 0.34375, "learning_rate": 3.4191565155120175e-07, "loss": 2.0007, "step": 28955 }, { "epoch": 0.9342321505270099, "grad_norm": 0.3359375, "learning_rate": 3.4158163636210603e-07, "loss": 1.9755, "step": 28956 }, { "epoch": 0.9342644143808062, "grad_norm": 0.337890625, "learning_rate": 3.4124778252236587e-07, "loss": 2.0124, "step": 28957 }, { "epoch": 0.9342966782346026, "grad_norm": 0.32421875, "learning_rate": 3.409140900356533e-07, "loss": 2.0082, "step": 28958 }, { "epoch": 0.9343289420883989, "grad_norm": 0.341796875, "learning_rate": 3.4058055890564044e-07, "loss": 2.0101, "step": 28959 }, { "epoch": 0.9343612059421953, "grad_norm": 0.32421875, "learning_rate": 3.4024718913600095e-07, "loss": 1.954, "step": 28960 }, { "epoch": 0.9343934697959916, "grad_norm": 0.322265625, "learning_rate": 3.3991398073040526e-07, "loss": 1.9842, "step": 28961 }, { "epoch": 0.934425733649788, "grad_norm": 0.3359375, "learning_rate": 3.3958093369251876e-07, "loss": 1.9916, "step": 28962 }, { "epoch": 0.9344579975035843, "grad_norm": 0.33203125, "learning_rate": 3.3924804802600517e-07, "loss": 1.9924, "step": 28963 }, { "epoch": 0.9344902613573807, "grad_norm": 0.33203125, "learning_rate": 3.3891532373453326e-07, "loss": 1.9881, "step": 28964 }, { "epoch": 0.934522525211177, "grad_norm": 0.333984375, "learning_rate": 3.3858276082176507e-07, "loss": 1.9278, "step": 28965 }, { "epoch": 0.9345547890649734, "grad_norm": 0.337890625, "learning_rate": 3.382503592913577e-07, "loss": 1.9785, "step": 28966 }, { "epoch": 0.9345870529187696, "grad_norm": 0.333984375, "learning_rate": 3.3791811914697324e-07, "loss": 1.9864, "step": 28967 }, { "epoch": 0.934619316772566, "grad_norm": 0.337890625, "learning_rate": 3.375860403922687e-07, "loss": 1.9857, "step": 28968 }, { "epoch": 0.9346515806263624, "grad_norm": 0.337890625, "learning_rate": 3.3725412303089453e-07, "loss": 2.0049, "step": 28969 }, { "epoch": 0.9346838444801587, "grad_norm": 0.330078125, "learning_rate": 3.3692236706651113e-07, "loss": 1.9759, "step": 28970 }, { "epoch": 0.9347161083339551, "grad_norm": 0.328125, "learning_rate": 3.365907725027673e-07, "loss": 1.9843, "step": 28971 }, { "epoch": 0.9347483721877514, "grad_norm": 0.33203125, "learning_rate": 3.3625933934331e-07, "loss": 1.9627, "step": 28972 }, { "epoch": 0.9347806360415478, "grad_norm": 0.330078125, "learning_rate": 3.3592806759179306e-07, "loss": 1.9976, "step": 28973 }, { "epoch": 0.9348128998953441, "grad_norm": 0.3359375, "learning_rate": 3.3559695725185856e-07, "loss": 1.9953, "step": 28974 }, { "epoch": 0.9348451637491405, "grad_norm": 0.330078125, "learning_rate": 3.3526600832715195e-07, "loss": 2.0008, "step": 28975 }, { "epoch": 0.9348774276029368, "grad_norm": 0.330078125, "learning_rate": 3.3493522082131854e-07, "loss": 1.9869, "step": 28976 }, { "epoch": 0.9349096914567332, "grad_norm": 0.34765625, "learning_rate": 3.346045947379972e-07, "loss": 1.9532, "step": 28977 }, { "epoch": 0.9349419553105295, "grad_norm": 0.341796875, "learning_rate": 3.34274130080825e-07, "loss": 1.9788, "step": 28978 }, { "epoch": 0.9349742191643259, "grad_norm": 0.33203125, "learning_rate": 3.339438268534439e-07, "loss": 1.9916, "step": 28979 }, { "epoch": 0.9350064830181222, "grad_norm": 0.333984375, "learning_rate": 3.336136850594895e-07, "loss": 2.0249, "step": 28980 }, { "epoch": 0.9350387468719186, "grad_norm": 0.33984375, "learning_rate": 3.332837047025905e-07, "loss": 1.9801, "step": 28981 }, { "epoch": 0.9350710107257149, "grad_norm": 0.33203125, "learning_rate": 3.329538857863873e-07, "loss": 1.985, "step": 28982 }, { "epoch": 0.9351032745795113, "grad_norm": 0.330078125, "learning_rate": 3.32624228314502e-07, "loss": 1.9755, "step": 28983 }, { "epoch": 0.9351355384333075, "grad_norm": 0.326171875, "learning_rate": 3.3229473229056674e-07, "loss": 1.9871, "step": 28984 }, { "epoch": 0.9351678022871039, "grad_norm": 0.33984375, "learning_rate": 3.319653977182102e-07, "loss": 1.9934, "step": 28985 }, { "epoch": 0.9352000661409002, "grad_norm": 0.33203125, "learning_rate": 3.3163622460105456e-07, "loss": 1.9783, "step": 28986 }, { "epoch": 0.9352323299946966, "grad_norm": 0.34765625, "learning_rate": 3.313072129427236e-07, "loss": 2.0011, "step": 28987 }, { "epoch": 0.935264593848493, "grad_norm": 0.330078125, "learning_rate": 3.3097836274684266e-07, "loss": 1.957, "step": 28988 }, { "epoch": 0.9352968577022893, "grad_norm": 0.3359375, "learning_rate": 3.306496740170256e-07, "loss": 1.9753, "step": 28989 }, { "epoch": 0.9353291215560857, "grad_norm": 0.330078125, "learning_rate": 3.3032114675689285e-07, "loss": 1.9947, "step": 28990 }, { "epoch": 0.935361385409882, "grad_norm": 0.337890625, "learning_rate": 3.2999278097006314e-07, "loss": 1.9893, "step": 28991 }, { "epoch": 0.9353936492636784, "grad_norm": 0.3359375, "learning_rate": 3.296645766601469e-07, "loss": 1.9939, "step": 28992 }, { "epoch": 0.9354259131174747, "grad_norm": 0.349609375, "learning_rate": 3.293365338307597e-07, "loss": 1.9888, "step": 28993 }, { "epoch": 0.9354581769712711, "grad_norm": 0.330078125, "learning_rate": 3.2900865248551017e-07, "loss": 1.9539, "step": 28994 }, { "epoch": 0.9354904408250674, "grad_norm": 0.328125, "learning_rate": 3.2868093262801045e-07, "loss": 1.9576, "step": 28995 }, { "epoch": 0.9355227046788638, "grad_norm": 0.330078125, "learning_rate": 3.2835337426186443e-07, "loss": 1.9962, "step": 28996 }, { "epoch": 0.9355549685326601, "grad_norm": 0.33203125, "learning_rate": 3.280259773906774e-07, "loss": 1.9969, "step": 28997 }, { "epoch": 0.9355872323864565, "grad_norm": 0.333984375, "learning_rate": 3.2769874201805826e-07, "loss": 1.9889, "step": 28998 }, { "epoch": 0.9356194962402528, "grad_norm": 0.33203125, "learning_rate": 3.273716681476041e-07, "loss": 1.9948, "step": 28999 }, { "epoch": 0.9356517600940492, "grad_norm": 0.33203125, "learning_rate": 3.270447557829154e-07, "loss": 1.9921, "step": 29000 }, { "epoch": 0.9356840239478454, "grad_norm": 0.333984375, "learning_rate": 3.267180049275925e-07, "loss": 2.0084, "step": 29001 }, { "epoch": 0.9357162878016418, "grad_norm": 0.33203125, "learning_rate": 3.26391415585231e-07, "loss": 2.016, "step": 29002 }, { "epoch": 0.9357485516554381, "grad_norm": 0.333984375, "learning_rate": 3.2606498775942627e-07, "loss": 1.9973, "step": 29003 }, { "epoch": 0.9357808155092345, "grad_norm": 0.341796875, "learning_rate": 3.257387214537705e-07, "loss": 1.9522, "step": 29004 }, { "epoch": 0.9358130793630308, "grad_norm": 0.33203125, "learning_rate": 3.254126166718574e-07, "loss": 2.0114, "step": 29005 }, { "epoch": 0.9358453432168272, "grad_norm": 0.33203125, "learning_rate": 3.250866734172725e-07, "loss": 1.9758, "step": 29006 }, { "epoch": 0.9358776070706235, "grad_norm": 0.330078125, "learning_rate": 3.247608916936079e-07, "loss": 2.0175, "step": 29007 }, { "epoch": 0.9359098709244199, "grad_norm": 0.337890625, "learning_rate": 3.244352715044457e-07, "loss": 2.0147, "step": 29008 }, { "epoch": 0.9359421347782163, "grad_norm": 0.328125, "learning_rate": 3.2410981285337147e-07, "loss": 1.9538, "step": 29009 }, { "epoch": 0.9359743986320126, "grad_norm": 0.330078125, "learning_rate": 3.237845157439706e-07, "loss": 1.987, "step": 29010 }, { "epoch": 0.936006662485809, "grad_norm": 0.3359375, "learning_rate": 3.2345938017981857e-07, "loss": 2.0132, "step": 29011 }, { "epoch": 0.9360389263396053, "grad_norm": 0.341796875, "learning_rate": 3.231344061644975e-07, "loss": 2.0047, "step": 29012 }, { "epoch": 0.9360711901934017, "grad_norm": 0.3359375, "learning_rate": 3.2280959370158624e-07, "loss": 1.9681, "step": 29013 }, { "epoch": 0.936103454047198, "grad_norm": 0.330078125, "learning_rate": 3.224849427946536e-07, "loss": 2.0226, "step": 29014 }, { "epoch": 0.9361357179009944, "grad_norm": 0.333984375, "learning_rate": 3.221604534472783e-07, "loss": 1.9712, "step": 29015 }, { "epoch": 0.9361679817547907, "grad_norm": 0.328125, "learning_rate": 3.2183612566303256e-07, "loss": 2.0059, "step": 29016 }, { "epoch": 0.9362002456085871, "grad_norm": 0.333984375, "learning_rate": 3.215119594454835e-07, "loss": 1.9839, "step": 29017 }, { "epoch": 0.9362325094623833, "grad_norm": 0.333984375, "learning_rate": 3.211879547981983e-07, "loss": 1.9886, "step": 29018 }, { "epoch": 0.9362647733161797, "grad_norm": 0.330078125, "learning_rate": 3.2086411172475073e-07, "loss": 1.9909, "step": 29019 }, { "epoch": 0.936297037169976, "grad_norm": 0.328125, "learning_rate": 3.2054043022869626e-07, "loss": 1.9954, "step": 29020 }, { "epoch": 0.9363293010237724, "grad_norm": 0.33984375, "learning_rate": 3.202169103136021e-07, "loss": 1.941, "step": 29021 }, { "epoch": 0.9363615648775687, "grad_norm": 0.337890625, "learning_rate": 3.198935519830304e-07, "loss": 2.004, "step": 29022 }, { "epoch": 0.9363938287313651, "grad_norm": 0.33203125, "learning_rate": 3.1957035524053826e-07, "loss": 1.9849, "step": 29023 }, { "epoch": 0.9364260925851614, "grad_norm": 0.337890625, "learning_rate": 3.192473200896828e-07, "loss": 1.9903, "step": 29024 }, { "epoch": 0.9364583564389578, "grad_norm": 0.33203125, "learning_rate": 3.189244465340213e-07, "loss": 1.9776, "step": 29025 }, { "epoch": 0.9364906202927541, "grad_norm": 0.32421875, "learning_rate": 3.1860173457710917e-07, "loss": 2.021, "step": 29026 }, { "epoch": 0.9365228841465505, "grad_norm": 0.3359375, "learning_rate": 3.1827918422249525e-07, "loss": 1.9665, "step": 29027 }, { "epoch": 0.9365551480003468, "grad_norm": 0.330078125, "learning_rate": 3.179567954737317e-07, "loss": 2.0014, "step": 29028 }, { "epoch": 0.9365874118541432, "grad_norm": 0.330078125, "learning_rate": 3.1763456833436733e-07, "loss": 2.007, "step": 29029 }, { "epoch": 0.9366196757079396, "grad_norm": 0.3359375, "learning_rate": 3.173125028079493e-07, "loss": 1.9544, "step": 29030 }, { "epoch": 0.9366519395617359, "grad_norm": 0.33203125, "learning_rate": 3.1699059889801986e-07, "loss": 1.9695, "step": 29031 }, { "epoch": 0.9366842034155323, "grad_norm": 0.326171875, "learning_rate": 3.1666885660812606e-07, "loss": 1.9932, "step": 29032 }, { "epoch": 0.9367164672693286, "grad_norm": 0.337890625, "learning_rate": 3.163472759418068e-07, "loss": 1.9521, "step": 29033 }, { "epoch": 0.936748731123125, "grad_norm": 0.33984375, "learning_rate": 3.1602585690260255e-07, "loss": 1.9624, "step": 29034 }, { "epoch": 0.9367809949769212, "grad_norm": 0.337890625, "learning_rate": 3.1570459949405214e-07, "loss": 1.9698, "step": 29035 }, { "epoch": 0.9368132588307176, "grad_norm": 0.330078125, "learning_rate": 3.1538350371969114e-07, "loss": 1.9673, "step": 29036 }, { "epoch": 0.9368455226845139, "grad_norm": 0.333984375, "learning_rate": 3.150625695830517e-07, "loss": 1.9603, "step": 29037 }, { "epoch": 0.9368777865383103, "grad_norm": 0.33203125, "learning_rate": 3.147417970876709e-07, "loss": 1.9852, "step": 29038 }, { "epoch": 0.9369100503921066, "grad_norm": 0.333984375, "learning_rate": 3.1442118623707774e-07, "loss": 1.9744, "step": 29039 }, { "epoch": 0.936942314245903, "grad_norm": 0.341796875, "learning_rate": 3.1410073703479756e-07, "loss": 1.9851, "step": 29040 }, { "epoch": 0.9369745780996993, "grad_norm": 0.326171875, "learning_rate": 3.1378044948436434e-07, "loss": 2.0124, "step": 29041 }, { "epoch": 0.9370068419534957, "grad_norm": 0.32421875, "learning_rate": 3.1346032358929857e-07, "loss": 2.0071, "step": 29042 }, { "epoch": 0.937039105807292, "grad_norm": 0.328125, "learning_rate": 3.13140359353124e-07, "loss": 1.9895, "step": 29043 }, { "epoch": 0.9370713696610884, "grad_norm": 0.33203125, "learning_rate": 3.1282055677936796e-07, "loss": 2.0092, "step": 29044 }, { "epoch": 0.9371036335148847, "grad_norm": 0.341796875, "learning_rate": 3.125009158715442e-07, "loss": 1.9701, "step": 29045 }, { "epoch": 0.9371358973686811, "grad_norm": 0.349609375, "learning_rate": 3.121814366331716e-07, "loss": 2.0067, "step": 29046 }, { "epoch": 0.9371681612224774, "grad_norm": 0.33203125, "learning_rate": 3.1186211906777396e-07, "loss": 1.9937, "step": 29047 }, { "epoch": 0.9372004250762738, "grad_norm": 0.337890625, "learning_rate": 3.115429631788569e-07, "loss": 1.981, "step": 29048 }, { "epoch": 0.9372326889300702, "grad_norm": 0.33203125, "learning_rate": 3.1122396896993753e-07, "loss": 1.971, "step": 29049 }, { "epoch": 0.9372649527838665, "grad_norm": 0.330078125, "learning_rate": 3.1090513644452814e-07, "loss": 1.9724, "step": 29050 }, { "epoch": 0.9372972166376629, "grad_norm": 0.33984375, "learning_rate": 3.105864656061358e-07, "loss": 1.9356, "step": 29051 }, { "epoch": 0.9373294804914591, "grad_norm": 0.330078125, "learning_rate": 3.102679564582711e-07, "loss": 1.981, "step": 29052 }, { "epoch": 0.9373617443452555, "grad_norm": 0.330078125, "learning_rate": 3.0994960900443957e-07, "loss": 2.0041, "step": 29053 }, { "epoch": 0.9373940081990518, "grad_norm": 0.333984375, "learning_rate": 3.096314232481401e-07, "loss": 2.0168, "step": 29054 }, { "epoch": 0.9374262720528482, "grad_norm": 0.326171875, "learning_rate": 3.0931339919288317e-07, "loss": 2.0047, "step": 29055 }, { "epoch": 0.9374585359066445, "grad_norm": 0.330078125, "learning_rate": 3.0899553684216265e-07, "loss": 2.0004, "step": 29056 }, { "epoch": 0.9374907997604409, "grad_norm": 0.330078125, "learning_rate": 3.086778361994808e-07, "loss": 1.988, "step": 29057 }, { "epoch": 0.9375230636142372, "grad_norm": 0.32421875, "learning_rate": 3.083602972683347e-07, "loss": 1.9653, "step": 29058 }, { "epoch": 0.9375553274680336, "grad_norm": 0.330078125, "learning_rate": 3.0804292005221667e-07, "loss": 2.0069, "step": 29059 }, { "epoch": 0.9375875913218299, "grad_norm": 0.330078125, "learning_rate": 3.077257045546239e-07, "loss": 2.0055, "step": 29060 }, { "epoch": 0.9376198551756263, "grad_norm": 0.33203125, "learning_rate": 3.074086507790452e-07, "loss": 2.0139, "step": 29061 }, { "epoch": 0.9376521190294226, "grad_norm": 0.3359375, "learning_rate": 3.070917587289712e-07, "loss": 1.9835, "step": 29062 }, { "epoch": 0.937684382883219, "grad_norm": 0.330078125, "learning_rate": 3.067750284078924e-07, "loss": 1.9674, "step": 29063 }, { "epoch": 0.9377166467370153, "grad_norm": 0.330078125, "learning_rate": 3.0645845981929265e-07, "loss": 2.001, "step": 29064 }, { "epoch": 0.9377489105908117, "grad_norm": 0.33984375, "learning_rate": 3.0614205296665586e-07, "loss": 2.0041, "step": 29065 }, { "epoch": 0.937781174444608, "grad_norm": 0.33203125, "learning_rate": 3.058258078534676e-07, "loss": 1.9801, "step": 29066 }, { "epoch": 0.9378134382984044, "grad_norm": 0.328125, "learning_rate": 3.0550972448320667e-07, "loss": 1.99, "step": 29067 }, { "epoch": 0.9378457021522006, "grad_norm": 0.345703125, "learning_rate": 3.0519380285935206e-07, "loss": 1.9787, "step": 29068 }, { "epoch": 0.937877966005997, "grad_norm": 0.3359375, "learning_rate": 3.0487804298538424e-07, "loss": 1.9779, "step": 29069 }, { "epoch": 0.9379102298597934, "grad_norm": 0.341796875, "learning_rate": 3.045624448647755e-07, "loss": 1.9802, "step": 29070 }, { "epoch": 0.9379424937135897, "grad_norm": 0.33203125, "learning_rate": 3.0424700850099965e-07, "loss": 1.975, "step": 29071 }, { "epoch": 0.9379747575673861, "grad_norm": 0.326171875, "learning_rate": 3.0393173389753224e-07, "loss": 2.0003, "step": 29072 }, { "epoch": 0.9380070214211824, "grad_norm": 0.326171875, "learning_rate": 3.036166210578406e-07, "loss": 1.9378, "step": 29073 }, { "epoch": 0.9380392852749788, "grad_norm": 0.337890625, "learning_rate": 3.033016699853919e-07, "loss": 1.987, "step": 29074 }, { "epoch": 0.9380715491287751, "grad_norm": 0.326171875, "learning_rate": 3.0298688068366e-07, "loss": 1.9895, "step": 29075 }, { "epoch": 0.9381038129825715, "grad_norm": 0.3359375, "learning_rate": 3.0267225315610046e-07, "loss": 1.9887, "step": 29076 }, { "epoch": 0.9381360768363678, "grad_norm": 0.328125, "learning_rate": 3.023577874061806e-07, "loss": 2.0108, "step": 29077 }, { "epoch": 0.9381683406901642, "grad_norm": 0.33203125, "learning_rate": 3.020434834373659e-07, "loss": 2.0032, "step": 29078 }, { "epoch": 0.9382006045439605, "grad_norm": 0.326171875, "learning_rate": 3.0172934125311026e-07, "loss": 1.9875, "step": 29079 }, { "epoch": 0.9382328683977569, "grad_norm": 0.337890625, "learning_rate": 3.014153608568726e-07, "loss": 1.9961, "step": 29080 }, { "epoch": 0.9382651322515532, "grad_norm": 0.333984375, "learning_rate": 3.0110154225211184e-07, "loss": 1.9845, "step": 29081 }, { "epoch": 0.9382973961053496, "grad_norm": 0.33203125, "learning_rate": 3.0078788544227856e-07, "loss": 1.96, "step": 29082 }, { "epoch": 0.9383296599591459, "grad_norm": 0.3359375, "learning_rate": 3.0047439043082825e-07, "loss": 2.0088, "step": 29083 }, { "epoch": 0.9383619238129423, "grad_norm": 0.33203125, "learning_rate": 3.001610572212116e-07, "loss": 1.9801, "step": 29084 }, { "epoch": 0.9383941876667385, "grad_norm": 0.333984375, "learning_rate": 2.9984788581687405e-07, "loss": 2.0111, "step": 29085 }, { "epoch": 0.9384264515205349, "grad_norm": 0.33203125, "learning_rate": 2.995348762212663e-07, "loss": 2.0031, "step": 29086 }, { "epoch": 0.9384587153743312, "grad_norm": 0.328125, "learning_rate": 2.992220284378322e-07, "loss": 1.9897, "step": 29087 }, { "epoch": 0.9384909792281276, "grad_norm": 0.33203125, "learning_rate": 2.98909342470014e-07, "loss": 2.0054, "step": 29088 }, { "epoch": 0.9385232430819239, "grad_norm": 0.341796875, "learning_rate": 2.985968183212573e-07, "loss": 2.007, "step": 29089 }, { "epoch": 0.9385555069357203, "grad_norm": 0.330078125, "learning_rate": 2.9828445599499934e-07, "loss": 1.959, "step": 29090 }, { "epoch": 0.9385877707895167, "grad_norm": 0.341796875, "learning_rate": 2.979722554946807e-07, "loss": 1.9847, "step": 29091 }, { "epoch": 0.938620034643313, "grad_norm": 0.32421875, "learning_rate": 2.976602168237352e-07, "loss": 2.0041, "step": 29092 }, { "epoch": 0.9386522984971094, "grad_norm": 0.333984375, "learning_rate": 2.9734833998559696e-07, "loss": 2.0126, "step": 29093 }, { "epoch": 0.9386845623509057, "grad_norm": 0.33203125, "learning_rate": 2.970366249837031e-07, "loss": 1.9796, "step": 29094 }, { "epoch": 0.9387168262047021, "grad_norm": 0.3359375, "learning_rate": 2.9672507182148257e-07, "loss": 1.9854, "step": 29095 }, { "epoch": 0.9387490900584984, "grad_norm": 0.333984375, "learning_rate": 2.9641368050236095e-07, "loss": 1.9775, "step": 29096 }, { "epoch": 0.9387813539122948, "grad_norm": 0.3359375, "learning_rate": 2.9610245102977216e-07, "loss": 1.9982, "step": 29097 }, { "epoch": 0.9388136177660911, "grad_norm": 0.326171875, "learning_rate": 2.9579138340713853e-07, "loss": 2.0014, "step": 29098 }, { "epoch": 0.9388458816198875, "grad_norm": 0.333984375, "learning_rate": 2.9548047763788387e-07, "loss": 2.0054, "step": 29099 }, { "epoch": 0.9388781454736838, "grad_norm": 0.337890625, "learning_rate": 2.951697337254322e-07, "loss": 1.9967, "step": 29100 }, { "epoch": 0.9389104093274802, "grad_norm": 0.330078125, "learning_rate": 2.9485915167320244e-07, "loss": 1.98, "step": 29101 }, { "epoch": 0.9389426731812764, "grad_norm": 0.32421875, "learning_rate": 2.945487314846118e-07, "loss": 2.0173, "step": 29102 }, { "epoch": 0.9389749370350728, "grad_norm": 0.33203125, "learning_rate": 2.9423847316308263e-07, "loss": 1.9481, "step": 29103 }, { "epoch": 0.9390072008888691, "grad_norm": 0.328125, "learning_rate": 2.9392837671202375e-07, "loss": 1.9867, "step": 29104 }, { "epoch": 0.9390394647426655, "grad_norm": 0.33984375, "learning_rate": 2.936184421348526e-07, "loss": 2.0073, "step": 29105 }, { "epoch": 0.9390717285964618, "grad_norm": 0.330078125, "learning_rate": 2.933086694349796e-07, "loss": 2.0012, "step": 29106 }, { "epoch": 0.9391039924502582, "grad_norm": 0.330078125, "learning_rate": 2.9299905861581377e-07, "loss": 1.9774, "step": 29107 }, { "epoch": 0.9391362563040545, "grad_norm": 0.33984375, "learning_rate": 2.926896096807624e-07, "loss": 1.9927, "step": 29108 }, { "epoch": 0.9391685201578509, "grad_norm": 0.337890625, "learning_rate": 2.9238032263323445e-07, "loss": 2.0183, "step": 29109 }, { "epoch": 0.9392007840116473, "grad_norm": 0.330078125, "learning_rate": 2.9207119747663046e-07, "loss": 1.9597, "step": 29110 }, { "epoch": 0.9392330478654436, "grad_norm": 0.328125, "learning_rate": 2.917622342143561e-07, "loss": 1.9997, "step": 29111 }, { "epoch": 0.93926531171924, "grad_norm": 0.326171875, "learning_rate": 2.914534328498153e-07, "loss": 2.0131, "step": 29112 }, { "epoch": 0.9392975755730363, "grad_norm": 0.333984375, "learning_rate": 2.9114479338639866e-07, "loss": 1.9968, "step": 29113 }, { "epoch": 0.9393298394268327, "grad_norm": 0.33203125, "learning_rate": 2.908363158275101e-07, "loss": 2.0056, "step": 29114 }, { "epoch": 0.939362103280629, "grad_norm": 0.333984375, "learning_rate": 2.9052800017654204e-07, "loss": 1.9936, "step": 29115 }, { "epoch": 0.9393943671344254, "grad_norm": 0.33203125, "learning_rate": 2.9021984643688825e-07, "loss": 2.0253, "step": 29116 }, { "epoch": 0.9394266309882217, "grad_norm": 0.3359375, "learning_rate": 2.899118546119445e-07, "loss": 1.9643, "step": 29117 }, { "epoch": 0.939458894842018, "grad_norm": 0.333984375, "learning_rate": 2.8960402470509626e-07, "loss": 1.9756, "step": 29118 }, { "epoch": 0.9394911586958143, "grad_norm": 0.333984375, "learning_rate": 2.892963567197343e-07, "loss": 1.9788, "step": 29119 }, { "epoch": 0.9395234225496107, "grad_norm": 0.333984375, "learning_rate": 2.8898885065924416e-07, "loss": 1.9794, "step": 29120 }, { "epoch": 0.939555686403407, "grad_norm": 0.328125, "learning_rate": 2.8868150652701154e-07, "loss": 1.9823, "step": 29121 }, { "epoch": 0.9395879502572034, "grad_norm": 0.337890625, "learning_rate": 2.8837432432641865e-07, "loss": 1.9739, "step": 29122 }, { "epoch": 0.9396202141109997, "grad_norm": 0.3359375, "learning_rate": 2.880673040608478e-07, "loss": 1.969, "step": 29123 }, { "epoch": 0.9396524779647961, "grad_norm": 0.330078125, "learning_rate": 2.8776044573367633e-07, "loss": 1.959, "step": 29124 }, { "epoch": 0.9396847418185924, "grad_norm": 0.32421875, "learning_rate": 2.8745374934828485e-07, "loss": 2.0134, "step": 29125 }, { "epoch": 0.9397170056723888, "grad_norm": 0.3359375, "learning_rate": 2.871472149080473e-07, "loss": 1.9676, "step": 29126 }, { "epoch": 0.9397492695261851, "grad_norm": 0.37890625, "learning_rate": 2.86840842416336e-07, "loss": 1.9948, "step": 29127 }, { "epoch": 0.9397815333799815, "grad_norm": 0.34375, "learning_rate": 2.865346318765283e-07, "loss": 2.0074, "step": 29128 }, { "epoch": 0.9398137972337778, "grad_norm": 0.330078125, "learning_rate": 2.862285832919931e-07, "loss": 1.9776, "step": 29129 }, { "epoch": 0.9398460610875742, "grad_norm": 0.330078125, "learning_rate": 2.8592269666609437e-07, "loss": 2.0247, "step": 29130 }, { "epoch": 0.9398783249413706, "grad_norm": 0.328125, "learning_rate": 2.8561697200220616e-07, "loss": 2.0031, "step": 29131 }, { "epoch": 0.9399105887951669, "grad_norm": 0.37109375, "learning_rate": 2.8531140930368903e-07, "loss": 2.0091, "step": 29132 }, { "epoch": 0.9399428526489633, "grad_norm": 0.330078125, "learning_rate": 2.8500600857390533e-07, "loss": 1.9881, "step": 29133 }, { "epoch": 0.9399751165027596, "grad_norm": 0.328125, "learning_rate": 2.8470076981622407e-07, "loss": 2.0032, "step": 29134 }, { "epoch": 0.940007380356556, "grad_norm": 0.333984375, "learning_rate": 2.843956930339958e-07, "loss": 1.9754, "step": 29135 }, { "epoch": 0.9400396442103522, "grad_norm": 0.328125, "learning_rate": 2.840907782305829e-07, "loss": 2.0174, "step": 29136 }, { "epoch": 0.9400719080641486, "grad_norm": 0.33203125, "learning_rate": 2.837860254093444e-07, "loss": 1.9587, "step": 29137 }, { "epoch": 0.9401041719179449, "grad_norm": 0.333984375, "learning_rate": 2.834814345736292e-07, "loss": 1.9722, "step": 29138 }, { "epoch": 0.9401364357717413, "grad_norm": 0.330078125, "learning_rate": 2.8317700572679293e-07, "loss": 1.9792, "step": 29139 }, { "epoch": 0.9401686996255376, "grad_norm": 0.3359375, "learning_rate": 2.828727388721897e-07, "loss": 2.0175, "step": 29140 }, { "epoch": 0.940200963479334, "grad_norm": 0.326171875, "learning_rate": 2.8256863401316166e-07, "loss": 1.9839, "step": 29141 }, { "epoch": 0.9402332273331303, "grad_norm": 0.330078125, "learning_rate": 2.8226469115305966e-07, "loss": 2.0093, "step": 29142 }, { "epoch": 0.9402654911869267, "grad_norm": 0.326171875, "learning_rate": 2.819609102952325e-07, "loss": 1.983, "step": 29143 }, { "epoch": 0.940297755040723, "grad_norm": 0.330078125, "learning_rate": 2.816572914430193e-07, "loss": 2.0023, "step": 29144 }, { "epoch": 0.9403300188945194, "grad_norm": 0.33203125, "learning_rate": 2.8135383459976407e-07, "loss": 2.0066, "step": 29145 }, { "epoch": 0.9403622827483157, "grad_norm": 0.330078125, "learning_rate": 2.810505397688073e-07, "loss": 1.9924, "step": 29146 }, { "epoch": 0.9403945466021121, "grad_norm": 0.333984375, "learning_rate": 2.807474069534849e-07, "loss": 1.9934, "step": 29147 }, { "epoch": 0.9404268104559084, "grad_norm": 0.33203125, "learning_rate": 2.804444361571373e-07, "loss": 2.0157, "step": 29148 }, { "epoch": 0.9404590743097048, "grad_norm": 0.333984375, "learning_rate": 2.80141627383097e-07, "loss": 2.0208, "step": 29149 }, { "epoch": 0.9404913381635012, "grad_norm": 0.333984375, "learning_rate": 2.7983898063469624e-07, "loss": 1.9691, "step": 29150 }, { "epoch": 0.9405236020172975, "grad_norm": 0.33203125, "learning_rate": 2.7953649591527073e-07, "loss": 1.9852, "step": 29151 }, { "epoch": 0.9405558658710939, "grad_norm": 0.337890625, "learning_rate": 2.7923417322814614e-07, "loss": 1.9943, "step": 29152 }, { "epoch": 0.9405881297248901, "grad_norm": 0.33203125, "learning_rate": 2.789320125766498e-07, "loss": 2.0014, "step": 29153 }, { "epoch": 0.9406203935786865, "grad_norm": 0.32421875, "learning_rate": 2.7863001396410904e-07, "loss": 2.0085, "step": 29154 }, { "epoch": 0.9406526574324828, "grad_norm": 0.3359375, "learning_rate": 2.7832817739384955e-07, "loss": 2.0087, "step": 29155 }, { "epoch": 0.9406849212862792, "grad_norm": 0.328125, "learning_rate": 2.780265028691903e-07, "loss": 1.9657, "step": 29156 }, { "epoch": 0.9407171851400755, "grad_norm": 0.3359375, "learning_rate": 2.7772499039345535e-07, "loss": 1.9994, "step": 29157 }, { "epoch": 0.9407494489938719, "grad_norm": 0.333984375, "learning_rate": 2.774236399699603e-07, "loss": 2.0211, "step": 29158 }, { "epoch": 0.9407817128476682, "grad_norm": 0.349609375, "learning_rate": 2.77122451602026e-07, "loss": 1.9954, "step": 29159 }, { "epoch": 0.9408139767014646, "grad_norm": 0.33984375, "learning_rate": 2.768214252929646e-07, "loss": 1.9588, "step": 29160 }, { "epoch": 0.9408462405552609, "grad_norm": 0.373046875, "learning_rate": 2.7652056104609025e-07, "loss": 1.9852, "step": 29161 }, { "epoch": 0.9408785044090573, "grad_norm": 0.326171875, "learning_rate": 2.762198588647152e-07, "loss": 1.9557, "step": 29162 }, { "epoch": 0.9409107682628536, "grad_norm": 0.326171875, "learning_rate": 2.759193187521503e-07, "loss": 1.9775, "step": 29163 }, { "epoch": 0.94094303211665, "grad_norm": 0.333984375, "learning_rate": 2.75618940711701e-07, "loss": 2.0156, "step": 29164 }, { "epoch": 0.9409752959704463, "grad_norm": 0.32421875, "learning_rate": 2.753187247466765e-07, "loss": 1.9813, "step": 29165 }, { "epoch": 0.9410075598242427, "grad_norm": 0.3359375, "learning_rate": 2.7501867086037915e-07, "loss": 1.9434, "step": 29166 }, { "epoch": 0.941039823678039, "grad_norm": 0.33203125, "learning_rate": 2.747187790561112e-07, "loss": 1.9986, "step": 29167 }, { "epoch": 0.9410720875318354, "grad_norm": 0.33984375, "learning_rate": 2.7441904933717844e-07, "loss": 1.9976, "step": 29168 }, { "epoch": 0.9411043513856316, "grad_norm": 0.333984375, "learning_rate": 2.741194817068748e-07, "loss": 1.967, "step": 29169 }, { "epoch": 0.941136615239428, "grad_norm": 0.333984375, "learning_rate": 2.738200761684978e-07, "loss": 1.9944, "step": 29170 }, { "epoch": 0.9411688790932244, "grad_norm": 0.33203125, "learning_rate": 2.7352083272534965e-07, "loss": 1.9625, "step": 29171 }, { "epoch": 0.9412011429470207, "grad_norm": 0.337890625, "learning_rate": 2.7322175138071617e-07, "loss": 1.9891, "step": 29172 }, { "epoch": 0.9412334068008171, "grad_norm": 0.3359375, "learning_rate": 2.7292283213789293e-07, "loss": 1.9649, "step": 29173 }, { "epoch": 0.9412656706546134, "grad_norm": 0.337890625, "learning_rate": 2.726240750001707e-07, "loss": 1.9805, "step": 29174 }, { "epoch": 0.9412979345084098, "grad_norm": 0.326171875, "learning_rate": 2.7232547997083525e-07, "loss": 2.0068, "step": 29175 }, { "epoch": 0.9413301983622061, "grad_norm": 0.349609375, "learning_rate": 2.7202704705317717e-07, "loss": 1.98, "step": 29176 }, { "epoch": 0.9413624622160025, "grad_norm": 0.3359375, "learning_rate": 2.717287762504805e-07, "loss": 2.0146, "step": 29177 }, { "epoch": 0.9413947260697988, "grad_norm": 0.328125, "learning_rate": 2.714306675660244e-07, "loss": 1.9561, "step": 29178 }, { "epoch": 0.9414269899235952, "grad_norm": 0.34375, "learning_rate": 2.7113272100309616e-07, "loss": 1.9839, "step": 29179 }, { "epoch": 0.9414592537773915, "grad_norm": 0.330078125, "learning_rate": 2.708349365649715e-07, "loss": 1.9943, "step": 29180 }, { "epoch": 0.9414915176311879, "grad_norm": 0.330078125, "learning_rate": 2.705373142549278e-07, "loss": 1.995, "step": 29181 }, { "epoch": 0.9415237814849842, "grad_norm": 0.333984375, "learning_rate": 2.702398540762441e-07, "loss": 1.9981, "step": 29182 }, { "epoch": 0.9415560453387806, "grad_norm": 0.333984375, "learning_rate": 2.6994255603219276e-07, "loss": 1.9543, "step": 29183 }, { "epoch": 0.9415883091925769, "grad_norm": 0.333984375, "learning_rate": 2.696454201260462e-07, "loss": 1.9833, "step": 29184 }, { "epoch": 0.9416205730463733, "grad_norm": 0.33203125, "learning_rate": 2.6934844636107515e-07, "loss": 1.9978, "step": 29185 }, { "epoch": 0.9416528369001695, "grad_norm": 0.330078125, "learning_rate": 2.6905163474055025e-07, "loss": 1.9711, "step": 29186 }, { "epoch": 0.9416851007539659, "grad_norm": 0.330078125, "learning_rate": 2.68754985267734e-07, "loss": 1.9919, "step": 29187 }, { "epoch": 0.9417173646077622, "grad_norm": 0.34375, "learning_rate": 2.6845849794589706e-07, "loss": 1.9729, "step": 29188 }, { "epoch": 0.9417496284615586, "grad_norm": 0.32421875, "learning_rate": 2.681621727783001e-07, "loss": 1.9979, "step": 29189 }, { "epoch": 0.9417818923153549, "grad_norm": 0.333984375, "learning_rate": 2.6786600976820563e-07, "loss": 1.9997, "step": 29190 }, { "epoch": 0.9418141561691513, "grad_norm": 0.333984375, "learning_rate": 2.675700089188726e-07, "loss": 1.9582, "step": 29191 }, { "epoch": 0.9418464200229477, "grad_norm": 0.326171875, "learning_rate": 2.6727417023356015e-07, "loss": 1.9744, "step": 29192 }, { "epoch": 0.941878683876744, "grad_norm": 0.33203125, "learning_rate": 2.6697849371552395e-07, "loss": 1.9595, "step": 29193 }, { "epoch": 0.9419109477305404, "grad_norm": 0.3359375, "learning_rate": 2.666829793680214e-07, "loss": 1.9998, "step": 29194 }, { "epoch": 0.9419432115843367, "grad_norm": 0.33203125, "learning_rate": 2.663876271943e-07, "loss": 1.9945, "step": 29195 }, { "epoch": 0.9419754754381331, "grad_norm": 0.3359375, "learning_rate": 2.66092437197617e-07, "loss": 2.0045, "step": 29196 }, { "epoch": 0.9420077392919294, "grad_norm": 0.3359375, "learning_rate": 2.657974093812149e-07, "loss": 1.9447, "step": 29197 }, { "epoch": 0.9420400031457258, "grad_norm": 0.3359375, "learning_rate": 2.6550254374834604e-07, "loss": 1.958, "step": 29198 }, { "epoch": 0.9420722669995221, "grad_norm": 0.326171875, "learning_rate": 2.652078403022562e-07, "loss": 1.9954, "step": 29199 }, { "epoch": 0.9421045308533185, "grad_norm": 0.328125, "learning_rate": 2.649132990461861e-07, "loss": 1.9866, "step": 29200 }, { "epoch": 0.9421367947071148, "grad_norm": 0.333984375, "learning_rate": 2.646189199833782e-07, "loss": 1.9964, "step": 29201 }, { "epoch": 0.9421690585609112, "grad_norm": 0.333984375, "learning_rate": 2.6432470311707815e-07, "loss": 1.9459, "step": 29202 }, { "epoch": 0.9422013224147074, "grad_norm": 0.330078125, "learning_rate": 2.6403064845051674e-07, "loss": 1.9974, "step": 29203 }, { "epoch": 0.9422335862685038, "grad_norm": 0.3359375, "learning_rate": 2.6373675598693636e-07, "loss": 1.9546, "step": 29204 }, { "epoch": 0.9422658501223001, "grad_norm": 0.33203125, "learning_rate": 2.634430257295695e-07, "loss": 1.9998, "step": 29205 }, { "epoch": 0.9422981139760965, "grad_norm": 0.326171875, "learning_rate": 2.6314945768164845e-07, "loss": 1.9714, "step": 29206 }, { "epoch": 0.9423303778298928, "grad_norm": 0.33203125, "learning_rate": 2.628560518464074e-07, "loss": 1.9932, "step": 29207 }, { "epoch": 0.9423626416836892, "grad_norm": 0.33203125, "learning_rate": 2.6256280822707536e-07, "loss": 1.9911, "step": 29208 }, { "epoch": 0.9423949055374855, "grad_norm": 0.34375, "learning_rate": 2.6226972682687647e-07, "loss": 1.9733, "step": 29209 }, { "epoch": 0.9424271693912819, "grad_norm": 0.3359375, "learning_rate": 2.619768076490431e-07, "loss": 1.9702, "step": 29210 }, { "epoch": 0.9424594332450783, "grad_norm": 0.33203125, "learning_rate": 2.6168405069679437e-07, "loss": 1.9564, "step": 29211 }, { "epoch": 0.9424916970988746, "grad_norm": 0.330078125, "learning_rate": 2.613914559733527e-07, "loss": 1.9973, "step": 29212 }, { "epoch": 0.942523960952671, "grad_norm": 0.33984375, "learning_rate": 2.6109902348194383e-07, "loss": 1.9811, "step": 29213 }, { "epoch": 0.9425562248064673, "grad_norm": 0.330078125, "learning_rate": 2.6080675322578186e-07, "loss": 1.9992, "step": 29214 }, { "epoch": 0.9425884886602637, "grad_norm": 0.33203125, "learning_rate": 2.605146452080842e-07, "loss": 1.987, "step": 29215 }, { "epoch": 0.94262075251406, "grad_norm": 0.33203125, "learning_rate": 2.602226994320683e-07, "loss": 1.9904, "step": 29216 }, { "epoch": 0.9426530163678564, "grad_norm": 0.3359375, "learning_rate": 2.5993091590094666e-07, "loss": 1.9866, "step": 29217 }, { "epoch": 0.9426852802216527, "grad_norm": 0.3359375, "learning_rate": 2.596392946179299e-07, "loss": 1.972, "step": 29218 }, { "epoch": 0.942717544075449, "grad_norm": 0.33203125, "learning_rate": 2.5934783558623053e-07, "loss": 1.9694, "step": 29219 }, { "epoch": 0.9427498079292453, "grad_norm": 0.34765625, "learning_rate": 2.59056538809056e-07, "loss": 2.0002, "step": 29220 }, { "epoch": 0.9427820717830417, "grad_norm": 0.33203125, "learning_rate": 2.587654042896087e-07, "loss": 2.0249, "step": 29221 }, { "epoch": 0.942814335636838, "grad_norm": 0.32421875, "learning_rate": 2.5847443203109944e-07, "loss": 1.9838, "step": 29222 }, { "epoch": 0.9428465994906344, "grad_norm": 0.353515625, "learning_rate": 2.581836220367273e-07, "loss": 1.9735, "step": 29223 }, { "epoch": 0.9428788633444307, "grad_norm": 0.333984375, "learning_rate": 2.5789297430969306e-07, "loss": 1.9612, "step": 29224 }, { "epoch": 0.9429111271982271, "grad_norm": 0.333984375, "learning_rate": 2.5760248885319914e-07, "loss": 1.9665, "step": 29225 }, { "epoch": 0.9429433910520234, "grad_norm": 0.330078125, "learning_rate": 2.5731216567043805e-07, "loss": 1.9613, "step": 29226 }, { "epoch": 0.9429756549058198, "grad_norm": 0.3359375, "learning_rate": 2.570220047646121e-07, "loss": 1.9985, "step": 29227 }, { "epoch": 0.9430079187596161, "grad_norm": 0.341796875, "learning_rate": 2.567320061389089e-07, "loss": 2.0028, "step": 29228 }, { "epoch": 0.9430401826134125, "grad_norm": 0.3515625, "learning_rate": 2.564421697965225e-07, "loss": 1.9901, "step": 29229 }, { "epoch": 0.9430724464672088, "grad_norm": 0.33203125, "learning_rate": 2.5615249574064526e-07, "loss": 1.9593, "step": 29230 }, { "epoch": 0.9431047103210052, "grad_norm": 0.341796875, "learning_rate": 2.5586298397446306e-07, "loss": 1.9903, "step": 29231 }, { "epoch": 0.9431369741748016, "grad_norm": 0.3359375, "learning_rate": 2.5557363450116335e-07, "loss": 1.9617, "step": 29232 }, { "epoch": 0.9431692380285979, "grad_norm": 0.333984375, "learning_rate": 2.5528444732393354e-07, "loss": 1.9404, "step": 29233 }, { "epoch": 0.9432015018823943, "grad_norm": 0.328125, "learning_rate": 2.5499542244595275e-07, "loss": 2.0, "step": 29234 }, { "epoch": 0.9432337657361906, "grad_norm": 0.33203125, "learning_rate": 2.547065598704051e-07, "loss": 1.9805, "step": 29235 }, { "epoch": 0.943266029589987, "grad_norm": 0.345703125, "learning_rate": 2.544178596004698e-07, "loss": 1.9846, "step": 29236 }, { "epoch": 0.9432982934437832, "grad_norm": 0.33203125, "learning_rate": 2.5412932163932425e-07, "loss": 1.9935, "step": 29237 }, { "epoch": 0.9433305572975796, "grad_norm": 0.341796875, "learning_rate": 2.538409459901442e-07, "loss": 1.9733, "step": 29238 }, { "epoch": 0.9433628211513759, "grad_norm": 0.33984375, "learning_rate": 2.535527326561038e-07, "loss": 1.9899, "step": 29239 }, { "epoch": 0.9433950850051723, "grad_norm": 0.33203125, "learning_rate": 2.532646816403755e-07, "loss": 1.9915, "step": 29240 }, { "epoch": 0.9434273488589686, "grad_norm": 0.326171875, "learning_rate": 2.529767929461302e-07, "loss": 2.0027, "step": 29241 }, { "epoch": 0.943459612712765, "grad_norm": 0.328125, "learning_rate": 2.5268906657653856e-07, "loss": 2.0105, "step": 29242 }, { "epoch": 0.9434918765665613, "grad_norm": 0.333984375, "learning_rate": 2.524015025347648e-07, "loss": 1.9535, "step": 29243 }, { "epoch": 0.9435241404203577, "grad_norm": 0.33984375, "learning_rate": 2.521141008239747e-07, "loss": 1.9999, "step": 29244 }, { "epoch": 0.943556404274154, "grad_norm": 0.328125, "learning_rate": 2.5182686144733403e-07, "loss": 2.0153, "step": 29245 }, { "epoch": 0.9435886681279504, "grad_norm": 0.330078125, "learning_rate": 2.5153978440800195e-07, "loss": 1.9782, "step": 29246 }, { "epoch": 0.9436209319817467, "grad_norm": 0.33203125, "learning_rate": 2.5125286970914097e-07, "loss": 1.9755, "step": 29247 }, { "epoch": 0.9436531958355431, "grad_norm": 0.33984375, "learning_rate": 2.5096611735390686e-07, "loss": 1.9861, "step": 29248 }, { "epoch": 0.9436854596893394, "grad_norm": 0.322265625, "learning_rate": 2.506795273454554e-07, "loss": 1.9925, "step": 29249 }, { "epoch": 0.9437177235431358, "grad_norm": 0.3359375, "learning_rate": 2.503930996869441e-07, "loss": 2.0067, "step": 29250 }, { "epoch": 0.9437499873969322, "grad_norm": 0.337890625, "learning_rate": 2.501068343815238e-07, "loss": 2.0073, "step": 29251 }, { "epoch": 0.9437822512507285, "grad_norm": 0.3359375, "learning_rate": 2.498207314323453e-07, "loss": 2.0058, "step": 29252 }, { "epoch": 0.9438145151045249, "grad_norm": 0.333984375, "learning_rate": 2.495347908425594e-07, "loss": 1.9685, "step": 29253 }, { "epoch": 0.9438467789583211, "grad_norm": 0.333984375, "learning_rate": 2.4924901261531184e-07, "loss": 2.0075, "step": 29254 }, { "epoch": 0.9438790428121175, "grad_norm": 0.328125, "learning_rate": 2.489633967537502e-07, "loss": 1.9742, "step": 29255 }, { "epoch": 0.9439113066659138, "grad_norm": 0.33203125, "learning_rate": 2.4867794326101535e-07, "loss": 1.9941, "step": 29256 }, { "epoch": 0.9439435705197102, "grad_norm": 0.357421875, "learning_rate": 2.483926521402513e-07, "loss": 1.9817, "step": 29257 }, { "epoch": 0.9439758343735065, "grad_norm": 0.3359375, "learning_rate": 2.4810752339460065e-07, "loss": 1.9876, "step": 29258 }, { "epoch": 0.9440080982273029, "grad_norm": 0.333984375, "learning_rate": 2.4782255702719745e-07, "loss": 1.9852, "step": 29259 }, { "epoch": 0.9440403620810992, "grad_norm": 0.32421875, "learning_rate": 2.47537753041181e-07, "loss": 1.9805, "step": 29260 }, { "epoch": 0.9440726259348956, "grad_norm": 0.326171875, "learning_rate": 2.4725311143968697e-07, "loss": 2.0014, "step": 29261 }, { "epoch": 0.9441048897886919, "grad_norm": 0.380859375, "learning_rate": 2.4696863222584467e-07, "loss": 2.01, "step": 29262 }, { "epoch": 0.9441371536424883, "grad_norm": 0.337890625, "learning_rate": 2.4668431540278647e-07, "loss": 1.9781, "step": 29263 }, { "epoch": 0.9441694174962846, "grad_norm": 0.337890625, "learning_rate": 2.4640016097364826e-07, "loss": 1.9935, "step": 29264 }, { "epoch": 0.944201681350081, "grad_norm": 0.33984375, "learning_rate": 2.461161689415492e-07, "loss": 1.9962, "step": 29265 }, { "epoch": 0.9442339452038773, "grad_norm": 0.34375, "learning_rate": 2.458323393096201e-07, "loss": 1.9723, "step": 29266 }, { "epoch": 0.9442662090576737, "grad_norm": 0.3359375, "learning_rate": 2.455486720809852e-07, "loss": 1.9718, "step": 29267 }, { "epoch": 0.94429847291147, "grad_norm": 0.33203125, "learning_rate": 2.4526516725876355e-07, "loss": 1.9831, "step": 29268 }, { "epoch": 0.9443307367652664, "grad_norm": 0.333984375, "learning_rate": 2.4498182484607777e-07, "loss": 1.9996, "step": 29269 }, { "epoch": 0.9443630006190626, "grad_norm": 0.33203125, "learning_rate": 2.4469864484604866e-07, "loss": 2.021, "step": 29270 }, { "epoch": 0.944395264472859, "grad_norm": 0.330078125, "learning_rate": 2.4441562726179034e-07, "loss": 2.0108, "step": 29271 }, { "epoch": 0.9444275283266554, "grad_norm": 0.330078125, "learning_rate": 2.4413277209642035e-07, "loss": 2.0192, "step": 29272 }, { "epoch": 0.9444597921804517, "grad_norm": 0.330078125, "learning_rate": 2.438500793530496e-07, "loss": 1.9806, "step": 29273 }, { "epoch": 0.9444920560342481, "grad_norm": 0.333984375, "learning_rate": 2.435675490347905e-07, "loss": 2.0016, "step": 29274 }, { "epoch": 0.9445243198880444, "grad_norm": 0.337890625, "learning_rate": 2.432851811447556e-07, "loss": 2.0133, "step": 29275 }, { "epoch": 0.9445565837418408, "grad_norm": 0.337890625, "learning_rate": 2.4300297568605077e-07, "loss": 1.9924, "step": 29276 }, { "epoch": 0.9445888475956371, "grad_norm": 0.337890625, "learning_rate": 2.4272093266178185e-07, "loss": 2.0018, "step": 29277 }, { "epoch": 0.9446211114494335, "grad_norm": 0.330078125, "learning_rate": 2.424390520750547e-07, "loss": 1.9996, "step": 29278 }, { "epoch": 0.9446533753032298, "grad_norm": 0.330078125, "learning_rate": 2.4215733392897174e-07, "loss": 1.9665, "step": 29279 }, { "epoch": 0.9446856391570262, "grad_norm": 0.34375, "learning_rate": 2.4187577822663063e-07, "loss": 1.9914, "step": 29280 }, { "epoch": 0.9447179030108225, "grad_norm": 0.3359375, "learning_rate": 2.4159438497113715e-07, "loss": 2.0091, "step": 29281 }, { "epoch": 0.9447501668646189, "grad_norm": 0.33984375, "learning_rate": 2.4131315416558386e-07, "loss": 1.98, "step": 29282 }, { "epoch": 0.9447824307184152, "grad_norm": 0.3359375, "learning_rate": 2.410320858130666e-07, "loss": 1.983, "step": 29283 }, { "epoch": 0.9448146945722116, "grad_norm": 0.3359375, "learning_rate": 2.4075117991668115e-07, "loss": 1.988, "step": 29284 }, { "epoch": 0.9448469584260079, "grad_norm": 0.326171875, "learning_rate": 2.404704364795185e-07, "loss": 1.9921, "step": 29285 }, { "epoch": 0.9448792222798043, "grad_norm": 0.337890625, "learning_rate": 2.401898555046694e-07, "loss": 1.9684, "step": 29286 }, { "epoch": 0.9449114861336005, "grad_norm": 0.341796875, "learning_rate": 2.3990943699521983e-07, "loss": 2.016, "step": 29287 }, { "epoch": 0.9449437499873969, "grad_norm": 0.328125, "learning_rate": 2.396291809542589e-07, "loss": 1.9923, "step": 29288 }, { "epoch": 0.9449760138411932, "grad_norm": 0.328125, "learning_rate": 2.393490873848725e-07, "loss": 1.9734, "step": 29289 }, { "epoch": 0.9450082776949896, "grad_norm": 0.333984375, "learning_rate": 2.390691562901398e-07, "loss": 2.0044, "step": 29290 }, { "epoch": 0.9450405415487859, "grad_norm": 0.33203125, "learning_rate": 2.3878938767314506e-07, "loss": 2.0116, "step": 29291 }, { "epoch": 0.9450728054025823, "grad_norm": 0.33203125, "learning_rate": 2.385097815369691e-07, "loss": 2.0278, "step": 29292 }, { "epoch": 0.9451050692563787, "grad_norm": 0.337890625, "learning_rate": 2.382303378846845e-07, "loss": 1.9599, "step": 29293 }, { "epoch": 0.945137333110175, "grad_norm": 0.3359375, "learning_rate": 2.3795105671937045e-07, "loss": 2.0104, "step": 29294 }, { "epoch": 0.9451695969639714, "grad_norm": 0.32421875, "learning_rate": 2.3767193804410448e-07, "loss": 1.9738, "step": 29295 }, { "epoch": 0.9452018608177677, "grad_norm": 0.326171875, "learning_rate": 2.3739298186195245e-07, "loss": 2.0098, "step": 29296 }, { "epoch": 0.9452341246715641, "grad_norm": 0.337890625, "learning_rate": 2.3711418817598863e-07, "loss": 1.9657, "step": 29297 }, { "epoch": 0.9452663885253604, "grad_norm": 0.33203125, "learning_rate": 2.3683555698928217e-07, "loss": 1.9749, "step": 29298 }, { "epoch": 0.9452986523791568, "grad_norm": 0.341796875, "learning_rate": 2.3655708830489564e-07, "loss": 2.01, "step": 29299 }, { "epoch": 0.9453309162329531, "grad_norm": 0.326171875, "learning_rate": 2.3627878212589993e-07, "loss": 1.9473, "step": 29300 }, { "epoch": 0.9453631800867495, "grad_norm": 0.33203125, "learning_rate": 2.3600063845535592e-07, "loss": 1.9668, "step": 29301 }, { "epoch": 0.9453954439405458, "grad_norm": 0.330078125, "learning_rate": 2.3572265729632447e-07, "loss": 2.0061, "step": 29302 }, { "epoch": 0.9454277077943422, "grad_norm": 0.333984375, "learning_rate": 2.354448386518665e-07, "loss": 1.9631, "step": 29303 }, { "epoch": 0.9454599716481384, "grad_norm": 0.341796875, "learning_rate": 2.3516718252503954e-07, "loss": 1.9969, "step": 29304 }, { "epoch": 0.9454922355019348, "grad_norm": 0.330078125, "learning_rate": 2.348896889188995e-07, "loss": 1.9911, "step": 29305 }, { "epoch": 0.9455244993557311, "grad_norm": 0.328125, "learning_rate": 2.3461235783650224e-07, "loss": 2.0067, "step": 29306 }, { "epoch": 0.9455567632095275, "grad_norm": 0.34375, "learning_rate": 2.3433518928090036e-07, "loss": 2.0082, "step": 29307 }, { "epoch": 0.9455890270633238, "grad_norm": 0.33984375, "learning_rate": 2.3405818325514138e-07, "loss": 1.9701, "step": 29308 }, { "epoch": 0.9456212909171202, "grad_norm": 0.326171875, "learning_rate": 2.3378133976227788e-07, "loss": 1.9701, "step": 29309 }, { "epoch": 0.9456535547709165, "grad_norm": 0.333984375, "learning_rate": 2.3350465880535742e-07, "loss": 1.9743, "step": 29310 }, { "epoch": 0.9456858186247129, "grad_norm": 0.330078125, "learning_rate": 2.3322814038742258e-07, "loss": 1.9486, "step": 29311 }, { "epoch": 0.9457180824785093, "grad_norm": 0.337890625, "learning_rate": 2.3295178451151922e-07, "loss": 1.9772, "step": 29312 }, { "epoch": 0.9457503463323056, "grad_norm": 0.33984375, "learning_rate": 2.3267559118068993e-07, "loss": 1.9515, "step": 29313 }, { "epoch": 0.945782610186102, "grad_norm": 0.328125, "learning_rate": 2.3239956039797227e-07, "loss": 2.0021, "step": 29314 }, { "epoch": 0.9458148740398983, "grad_norm": 0.328125, "learning_rate": 2.3212369216640716e-07, "loss": 2.0165, "step": 29315 }, { "epoch": 0.9458471378936947, "grad_norm": 0.326171875, "learning_rate": 2.3184798648902884e-07, "loss": 1.9846, "step": 29316 }, { "epoch": 0.945879401747491, "grad_norm": 0.33203125, "learning_rate": 2.3157244336887485e-07, "loss": 1.9752, "step": 29317 }, { "epoch": 0.9459116656012874, "grad_norm": 0.33203125, "learning_rate": 2.3129706280897446e-07, "loss": 1.9996, "step": 29318 }, { "epoch": 0.9459439294550837, "grad_norm": 0.330078125, "learning_rate": 2.3102184481236188e-07, "loss": 1.9987, "step": 29319 }, { "epoch": 0.94597619330888, "grad_norm": 0.330078125, "learning_rate": 2.3074678938206638e-07, "loss": 1.9749, "step": 29320 }, { "epoch": 0.9460084571626763, "grad_norm": 0.328125, "learning_rate": 2.3047189652111388e-07, "loss": 2.0112, "step": 29321 }, { "epoch": 0.9460407210164727, "grad_norm": 0.328125, "learning_rate": 2.301971662325303e-07, "loss": 2.0085, "step": 29322 }, { "epoch": 0.946072984870269, "grad_norm": 0.32421875, "learning_rate": 2.2992259851934317e-07, "loss": 1.9828, "step": 29323 }, { "epoch": 0.9461052487240654, "grad_norm": 0.333984375, "learning_rate": 2.2964819338456845e-07, "loss": 1.996, "step": 29324 }, { "epoch": 0.9461375125778617, "grad_norm": 0.33203125, "learning_rate": 2.2937395083123203e-07, "loss": 2.0016, "step": 29325 }, { "epoch": 0.9461697764316581, "grad_norm": 0.328125, "learning_rate": 2.290998708623532e-07, "loss": 1.9633, "step": 29326 }, { "epoch": 0.9462020402854544, "grad_norm": 0.328125, "learning_rate": 2.2882595348094282e-07, "loss": 1.9985, "step": 29327 }, { "epoch": 0.9462343041392508, "grad_norm": 0.330078125, "learning_rate": 2.2855219869002186e-07, "loss": 1.9982, "step": 29328 }, { "epoch": 0.9462665679930471, "grad_norm": 0.322265625, "learning_rate": 2.2827860649259958e-07, "loss": 2.0058, "step": 29329 }, { "epoch": 0.9462988318468435, "grad_norm": 0.33203125, "learning_rate": 2.280051768916902e-07, "loss": 1.9979, "step": 29330 }, { "epoch": 0.9463310957006398, "grad_norm": 0.326171875, "learning_rate": 2.2773190989030302e-07, "loss": 1.9823, "step": 29331 }, { "epoch": 0.9463633595544362, "grad_norm": 0.333984375, "learning_rate": 2.2745880549144394e-07, "loss": 2.01, "step": 29332 }, { "epoch": 0.9463956234082326, "grad_norm": 0.33203125, "learning_rate": 2.2718586369812054e-07, "loss": 2.0155, "step": 29333 }, { "epoch": 0.9464278872620289, "grad_norm": 0.32421875, "learning_rate": 2.2691308451333881e-07, "loss": 1.9947, "step": 29334 }, { "epoch": 0.9464601511158253, "grad_norm": 0.333984375, "learning_rate": 2.2664046794009796e-07, "loss": 1.9925, "step": 29335 }, { "epoch": 0.9464924149696216, "grad_norm": 0.333984375, "learning_rate": 2.2636801398140062e-07, "loss": 1.9841, "step": 29336 }, { "epoch": 0.946524678823418, "grad_norm": 0.333984375, "learning_rate": 2.2609572264024768e-07, "loss": 1.9829, "step": 29337 }, { "epoch": 0.9465569426772142, "grad_norm": 0.32421875, "learning_rate": 2.2582359391963347e-07, "loss": 1.9957, "step": 29338 }, { "epoch": 0.9465892065310106, "grad_norm": 0.326171875, "learning_rate": 2.2555162782255224e-07, "loss": 1.9963, "step": 29339 }, { "epoch": 0.9466214703848069, "grad_norm": 0.333984375, "learning_rate": 2.2527982435200157e-07, "loss": 2.0006, "step": 29340 }, { "epoch": 0.9466537342386033, "grad_norm": 0.328125, "learning_rate": 2.2500818351097075e-07, "loss": 2.0075, "step": 29341 }, { "epoch": 0.9466859980923996, "grad_norm": 0.326171875, "learning_rate": 2.2473670530244905e-07, "loss": 1.9933, "step": 29342 }, { "epoch": 0.946718261946196, "grad_norm": 0.33203125, "learning_rate": 2.2446538972942743e-07, "loss": 1.9627, "step": 29343 }, { "epoch": 0.9467505257999923, "grad_norm": 0.328125, "learning_rate": 2.2419423679489014e-07, "loss": 1.9988, "step": 29344 }, { "epoch": 0.9467827896537887, "grad_norm": 0.328125, "learning_rate": 2.2392324650182317e-07, "loss": 1.9897, "step": 29345 }, { "epoch": 0.946815053507585, "grad_norm": 0.330078125, "learning_rate": 2.236524188532091e-07, "loss": 1.9985, "step": 29346 }, { "epoch": 0.9468473173613814, "grad_norm": 0.3359375, "learning_rate": 2.233817538520272e-07, "loss": 1.9717, "step": 29347 }, { "epoch": 0.9468795812151777, "grad_norm": 0.333984375, "learning_rate": 2.231112515012601e-07, "loss": 1.9914, "step": 29348 }, { "epoch": 0.9469118450689741, "grad_norm": 0.326171875, "learning_rate": 2.228409118038821e-07, "loss": 1.9868, "step": 29349 }, { "epoch": 0.9469441089227704, "grad_norm": 0.33203125, "learning_rate": 2.2257073476287083e-07, "loss": 1.99, "step": 29350 }, { "epoch": 0.9469763727765668, "grad_norm": 0.330078125, "learning_rate": 2.2230072038120052e-07, "loss": 2.006, "step": 29351 }, { "epoch": 0.947008636630363, "grad_norm": 0.3359375, "learning_rate": 2.2203086866184053e-07, "loss": 2.0015, "step": 29352 }, { "epoch": 0.9470409004841595, "grad_norm": 0.328125, "learning_rate": 2.2176117960776343e-07, "loss": 1.9837, "step": 29353 }, { "epoch": 0.9470731643379559, "grad_norm": 0.33203125, "learning_rate": 2.2149165322193853e-07, "loss": 2.0053, "step": 29354 }, { "epoch": 0.9471054281917521, "grad_norm": 0.326171875, "learning_rate": 2.2122228950733016e-07, "loss": 1.977, "step": 29355 }, { "epoch": 0.9471376920455485, "grad_norm": 0.3359375, "learning_rate": 2.2095308846690588e-07, "loss": 2.0106, "step": 29356 }, { "epoch": 0.9471699558993448, "grad_norm": 0.328125, "learning_rate": 2.206840501036267e-07, "loss": 2.0055, "step": 29357 }, { "epoch": 0.9472022197531412, "grad_norm": 0.326171875, "learning_rate": 2.2041517442045355e-07, "loss": 2.002, "step": 29358 }, { "epoch": 0.9472344836069375, "grad_norm": 0.330078125, "learning_rate": 2.2014646142035076e-07, "loss": 2.005, "step": 29359 }, { "epoch": 0.9472667474607339, "grad_norm": 0.322265625, "learning_rate": 2.1987791110627098e-07, "loss": 2.0089, "step": 29360 }, { "epoch": 0.9472990113145302, "grad_norm": 0.345703125, "learning_rate": 2.1960952348117015e-07, "loss": 1.9886, "step": 29361 }, { "epoch": 0.9473312751683266, "grad_norm": 0.337890625, "learning_rate": 2.1934129854800589e-07, "loss": 2.0253, "step": 29362 }, { "epoch": 0.9473635390221229, "grad_norm": 0.3359375, "learning_rate": 2.190732363097292e-07, "loss": 1.9341, "step": 29363 }, { "epoch": 0.9473958028759193, "grad_norm": 0.337890625, "learning_rate": 2.1880533676929105e-07, "loss": 2.012, "step": 29364 }, { "epoch": 0.9474280667297156, "grad_norm": 0.330078125, "learning_rate": 2.1853759992963906e-07, "loss": 1.9749, "step": 29365 }, { "epoch": 0.947460330583512, "grad_norm": 0.330078125, "learning_rate": 2.182700257937209e-07, "loss": 1.9891, "step": 29366 }, { "epoch": 0.9474925944373083, "grad_norm": 0.328125, "learning_rate": 2.1800261436448087e-07, "loss": 1.9781, "step": 29367 }, { "epoch": 0.9475248582911047, "grad_norm": 0.337890625, "learning_rate": 2.1773536564486496e-07, "loss": 1.9979, "step": 29368 }, { "epoch": 0.947557122144901, "grad_norm": 0.32421875, "learning_rate": 2.1746827963781412e-07, "loss": 1.9714, "step": 29369 }, { "epoch": 0.9475893859986974, "grad_norm": 0.326171875, "learning_rate": 2.1720135634626604e-07, "loss": 1.9886, "step": 29370 }, { "epoch": 0.9476216498524936, "grad_norm": 0.326171875, "learning_rate": 2.1693459577316e-07, "loss": 2.0183, "step": 29371 }, { "epoch": 0.94765391370629, "grad_norm": 0.33203125, "learning_rate": 2.1666799792143366e-07, "loss": 1.9496, "step": 29372 }, { "epoch": 0.9476861775600864, "grad_norm": 0.328125, "learning_rate": 2.1640156279401968e-07, "loss": 1.9796, "step": 29373 }, { "epoch": 0.9477184414138827, "grad_norm": 0.328125, "learning_rate": 2.161352903938524e-07, "loss": 1.996, "step": 29374 }, { "epoch": 0.9477507052676791, "grad_norm": 0.32421875, "learning_rate": 2.1586918072386274e-07, "loss": 2.0031, "step": 29375 }, { "epoch": 0.9477829691214754, "grad_norm": 0.3359375, "learning_rate": 2.1560323378697678e-07, "loss": 1.9852, "step": 29376 }, { "epoch": 0.9478152329752718, "grad_norm": 0.33203125, "learning_rate": 2.1533744958612544e-07, "loss": 1.9611, "step": 29377 }, { "epoch": 0.9478474968290681, "grad_norm": 0.3203125, "learning_rate": 2.150718281242331e-07, "loss": 1.997, "step": 29378 }, { "epoch": 0.9478797606828645, "grad_norm": 0.333984375, "learning_rate": 2.1480636940422404e-07, "loss": 2.0025, "step": 29379 }, { "epoch": 0.9479120245366608, "grad_norm": 0.3359375, "learning_rate": 2.1454107342901762e-07, "loss": 1.9984, "step": 29380 }, { "epoch": 0.9479442883904572, "grad_norm": 0.33203125, "learning_rate": 2.142759402015365e-07, "loss": 2.0041, "step": 29381 }, { "epoch": 0.9479765522442535, "grad_norm": 0.333984375, "learning_rate": 2.1401096972470002e-07, "loss": 2.011, "step": 29382 }, { "epoch": 0.9480088160980499, "grad_norm": 0.326171875, "learning_rate": 2.137461620014225e-07, "loss": 2.0235, "step": 29383 }, { "epoch": 0.9480410799518462, "grad_norm": 0.3359375, "learning_rate": 2.1348151703461993e-07, "loss": 1.9917, "step": 29384 }, { "epoch": 0.9480733438056426, "grad_norm": 0.326171875, "learning_rate": 2.13217034827205e-07, "loss": 1.9831, "step": 29385 }, { "epoch": 0.9481056076594389, "grad_norm": 0.328125, "learning_rate": 2.1295271538208705e-07, "loss": 2.0041, "step": 29386 }, { "epoch": 0.9481378715132353, "grad_norm": 0.32421875, "learning_rate": 2.126885587021804e-07, "loss": 1.9504, "step": 29387 }, { "epoch": 0.9481701353670315, "grad_norm": 0.3359375, "learning_rate": 2.1242456479038776e-07, "loss": 1.9701, "step": 29388 }, { "epoch": 0.9482023992208279, "grad_norm": 0.326171875, "learning_rate": 2.1216073364961675e-07, "loss": 2.0081, "step": 29389 }, { "epoch": 0.9482346630746242, "grad_norm": 0.326171875, "learning_rate": 2.118970652827734e-07, "loss": 1.997, "step": 29390 }, { "epoch": 0.9482669269284206, "grad_norm": 0.333984375, "learning_rate": 2.116335596927571e-07, "loss": 1.9612, "step": 29391 }, { "epoch": 0.9482991907822169, "grad_norm": 0.33203125, "learning_rate": 2.1137021688246882e-07, "loss": 1.9825, "step": 29392 }, { "epoch": 0.9483314546360133, "grad_norm": 0.330078125, "learning_rate": 2.111070368548096e-07, "loss": 1.973, "step": 29393 }, { "epoch": 0.9483637184898097, "grad_norm": 0.333984375, "learning_rate": 2.1084401961267375e-07, "loss": 1.9938, "step": 29394 }, { "epoch": 0.948395982343606, "grad_norm": 0.333984375, "learning_rate": 2.1058116515895731e-07, "loss": 1.9663, "step": 29395 }, { "epoch": 0.9484282461974024, "grad_norm": 0.3359375, "learning_rate": 2.10318473496553e-07, "loss": 1.9835, "step": 29396 }, { "epoch": 0.9484605100511987, "grad_norm": 0.328125, "learning_rate": 2.100559446283534e-07, "loss": 2.0261, "step": 29397 }, { "epoch": 0.9484927739049951, "grad_norm": 0.33203125, "learning_rate": 2.09793578557248e-07, "loss": 2.0071, "step": 29398 }, { "epoch": 0.9485250377587914, "grad_norm": 0.337890625, "learning_rate": 2.0953137528612443e-07, "loss": 1.9749, "step": 29399 }, { "epoch": 0.9485573016125878, "grad_norm": 0.32421875, "learning_rate": 2.092693348178687e-07, "loss": 1.9776, "step": 29400 }, { "epoch": 0.9485895654663841, "grad_norm": 0.330078125, "learning_rate": 2.090074571553635e-07, "loss": 1.9794, "step": 29401 }, { "epoch": 0.9486218293201805, "grad_norm": 0.333984375, "learning_rate": 2.087457423014949e-07, "loss": 1.9773, "step": 29402 }, { "epoch": 0.9486540931739768, "grad_norm": 0.333984375, "learning_rate": 2.0848419025914222e-07, "loss": 2.0194, "step": 29403 }, { "epoch": 0.9486863570277732, "grad_norm": 0.328125, "learning_rate": 2.082228010311832e-07, "loss": 2.0067, "step": 29404 }, { "epoch": 0.9487186208815694, "grad_norm": 0.33203125, "learning_rate": 2.0796157462049714e-07, "loss": 1.9968, "step": 29405 }, { "epoch": 0.9487508847353658, "grad_norm": 0.3359375, "learning_rate": 2.0770051102995846e-07, "loss": 1.9401, "step": 29406 }, { "epoch": 0.9487831485891621, "grad_norm": 0.333984375, "learning_rate": 2.0743961026243984e-07, "loss": 2.0073, "step": 29407 }, { "epoch": 0.9488154124429585, "grad_norm": 0.333984375, "learning_rate": 2.0717887232081234e-07, "loss": 1.976, "step": 29408 }, { "epoch": 0.9488476762967548, "grad_norm": 0.333984375, "learning_rate": 2.0691829720795031e-07, "loss": 2.0041, "step": 29409 }, { "epoch": 0.9488799401505512, "grad_norm": 0.33203125, "learning_rate": 2.066578849267181e-07, "loss": 1.9792, "step": 29410 }, { "epoch": 0.9489122040043475, "grad_norm": 0.328125, "learning_rate": 2.063976354799818e-07, "loss": 1.9633, "step": 29411 }, { "epoch": 0.9489444678581439, "grad_norm": 0.328125, "learning_rate": 2.061375488706091e-07, "loss": 1.9931, "step": 29412 }, { "epoch": 0.9489767317119403, "grad_norm": 0.35546875, "learning_rate": 2.0587762510146102e-07, "loss": 1.9927, "step": 29413 }, { "epoch": 0.9490089955657366, "grad_norm": 0.33984375, "learning_rate": 2.0561786417539862e-07, "loss": 1.9679, "step": 29414 }, { "epoch": 0.949041259419533, "grad_norm": 0.328125, "learning_rate": 2.053582660952813e-07, "loss": 1.9827, "step": 29415 }, { "epoch": 0.9490735232733293, "grad_norm": 0.326171875, "learning_rate": 2.0509883086396675e-07, "loss": 1.9724, "step": 29416 }, { "epoch": 0.9491057871271257, "grad_norm": 0.333984375, "learning_rate": 2.0483955848430936e-07, "loss": 1.996, "step": 29417 }, { "epoch": 0.949138050980922, "grad_norm": 0.357421875, "learning_rate": 2.0458044895916516e-07, "loss": 2.0096, "step": 29418 }, { "epoch": 0.9491703148347184, "grad_norm": 0.333984375, "learning_rate": 2.0432150229138525e-07, "loss": 2.0159, "step": 29419 }, { "epoch": 0.9492025786885147, "grad_norm": 0.328125, "learning_rate": 2.0406271848381897e-07, "loss": 2.0041, "step": 29420 }, { "epoch": 0.949234842542311, "grad_norm": 0.33203125, "learning_rate": 2.0380409753931572e-07, "loss": 1.9912, "step": 29421 }, { "epoch": 0.9492671063961073, "grad_norm": 0.365234375, "learning_rate": 2.0354563946072325e-07, "loss": 1.983, "step": 29422 }, { "epoch": 0.9492993702499037, "grad_norm": 0.36328125, "learning_rate": 2.0328734425088424e-07, "loss": 1.9386, "step": 29423 }, { "epoch": 0.9493316341037, "grad_norm": 0.328125, "learning_rate": 2.030292119126448e-07, "loss": 1.9739, "step": 29424 }, { "epoch": 0.9493638979574964, "grad_norm": 0.330078125, "learning_rate": 2.027712424488426e-07, "loss": 1.984, "step": 29425 }, { "epoch": 0.9493961618112927, "grad_norm": 0.33984375, "learning_rate": 2.025134358623204e-07, "loss": 2.0295, "step": 29426 }, { "epoch": 0.9494284256650891, "grad_norm": 0.3359375, "learning_rate": 2.0225579215591428e-07, "loss": 1.987, "step": 29427 }, { "epoch": 0.9494606895188854, "grad_norm": 0.326171875, "learning_rate": 2.0199831133246027e-07, "loss": 1.9439, "step": 29428 }, { "epoch": 0.9494929533726818, "grad_norm": 0.326171875, "learning_rate": 2.017409933947928e-07, "loss": 1.9849, "step": 29429 }, { "epoch": 0.9495252172264781, "grad_norm": 0.328125, "learning_rate": 2.0148383834574458e-07, "loss": 2.0111, "step": 29430 }, { "epoch": 0.9495574810802745, "grad_norm": 0.341796875, "learning_rate": 2.0122684618814503e-07, "loss": 1.9709, "step": 29431 }, { "epoch": 0.9495897449340708, "grad_norm": 0.328125, "learning_rate": 2.009700169248252e-07, "loss": 1.9662, "step": 29432 }, { "epoch": 0.9496220087878672, "grad_norm": 0.330078125, "learning_rate": 2.007133505586095e-07, "loss": 2.0031, "step": 29433 }, { "epoch": 0.9496542726416636, "grad_norm": 0.328125, "learning_rate": 2.004568470923257e-07, "loss": 1.988, "step": 29434 }, { "epoch": 0.9496865364954599, "grad_norm": 0.35546875, "learning_rate": 2.0020050652879318e-07, "loss": 2.0013, "step": 29435 }, { "epoch": 0.9497188003492563, "grad_norm": 0.3359375, "learning_rate": 1.9994432887083802e-07, "loss": 2.0267, "step": 29436 }, { "epoch": 0.9497510642030526, "grad_norm": 0.328125, "learning_rate": 1.9968831412127797e-07, "loss": 1.9743, "step": 29437 }, { "epoch": 0.949783328056849, "grad_norm": 0.3359375, "learning_rate": 1.9943246228293245e-07, "loss": 1.9957, "step": 29438 }, { "epoch": 0.9498155919106452, "grad_norm": 0.326171875, "learning_rate": 1.9917677335861417e-07, "loss": 2.0047, "step": 29439 }, { "epoch": 0.9498478557644416, "grad_norm": 0.328125, "learning_rate": 1.9892124735114093e-07, "loss": 1.9821, "step": 29440 }, { "epoch": 0.9498801196182379, "grad_norm": 0.349609375, "learning_rate": 1.9866588426332543e-07, "loss": 1.9772, "step": 29441 }, { "epoch": 0.9499123834720343, "grad_norm": 0.33984375, "learning_rate": 1.9841068409797546e-07, "loss": 1.9708, "step": 29442 }, { "epoch": 0.9499446473258306, "grad_norm": 0.330078125, "learning_rate": 1.981556468579021e-07, "loss": 1.9959, "step": 29443 }, { "epoch": 0.949976911179627, "grad_norm": 0.337890625, "learning_rate": 1.9790077254591476e-07, "loss": 1.9822, "step": 29444 }, { "epoch": 0.9500091750334233, "grad_norm": 0.337890625, "learning_rate": 1.9764606116481454e-07, "loss": 1.9716, "step": 29445 }, { "epoch": 0.9500414388872197, "grad_norm": 0.322265625, "learning_rate": 1.9739151271740753e-07, "loss": 1.9819, "step": 29446 }, { "epoch": 0.950073702741016, "grad_norm": 0.333984375, "learning_rate": 1.9713712720649645e-07, "loss": 1.9918, "step": 29447 }, { "epoch": 0.9501059665948124, "grad_norm": 0.333984375, "learning_rate": 1.9688290463487912e-07, "loss": 1.9964, "step": 29448 }, { "epoch": 0.9501382304486087, "grad_norm": 0.326171875, "learning_rate": 1.966288450053566e-07, "loss": 1.9845, "step": 29449 }, { "epoch": 0.9501704943024051, "grad_norm": 0.330078125, "learning_rate": 1.9637494832072334e-07, "loss": 2.0152, "step": 29450 }, { "epoch": 0.9502027581562014, "grad_norm": 0.3359375, "learning_rate": 1.9612121458377375e-07, "loss": 1.9941, "step": 29451 }, { "epoch": 0.9502350220099978, "grad_norm": 0.337890625, "learning_rate": 1.9586764379730393e-07, "loss": 1.9799, "step": 29452 }, { "epoch": 0.950267285863794, "grad_norm": 0.333984375, "learning_rate": 1.956142359641e-07, "loss": 1.9665, "step": 29453 }, { "epoch": 0.9502995497175905, "grad_norm": 0.34375, "learning_rate": 1.953609910869547e-07, "loss": 2.0007, "step": 29454 }, { "epoch": 0.9503318135713869, "grad_norm": 0.33203125, "learning_rate": 1.9510790916865584e-07, "loss": 1.9848, "step": 29455 }, { "epoch": 0.9503640774251831, "grad_norm": 0.33984375, "learning_rate": 1.9485499021198783e-07, "loss": 1.9774, "step": 29456 }, { "epoch": 0.9503963412789795, "grad_norm": 0.33203125, "learning_rate": 1.9460223421973344e-07, "loss": 1.9564, "step": 29457 }, { "epoch": 0.9504286051327758, "grad_norm": 0.3359375, "learning_rate": 1.9434964119467714e-07, "loss": 1.9471, "step": 29458 }, { "epoch": 0.9504608689865722, "grad_norm": 0.330078125, "learning_rate": 1.9409721113959833e-07, "loss": 1.9639, "step": 29459 }, { "epoch": 0.9504931328403685, "grad_norm": 0.328125, "learning_rate": 1.9384494405727481e-07, "loss": 1.9758, "step": 29460 }, { "epoch": 0.9505253966941649, "grad_norm": 0.3359375, "learning_rate": 1.9359283995048603e-07, "loss": 1.9589, "step": 29461 }, { "epoch": 0.9505576605479612, "grad_norm": 0.32421875, "learning_rate": 1.9334089882200478e-07, "loss": 1.9649, "step": 29462 }, { "epoch": 0.9505899244017576, "grad_norm": 0.333984375, "learning_rate": 1.9308912067460383e-07, "loss": 1.9653, "step": 29463 }, { "epoch": 0.9506221882555539, "grad_norm": 0.326171875, "learning_rate": 1.9283750551105762e-07, "loss": 1.9485, "step": 29464 }, { "epoch": 0.9506544521093503, "grad_norm": 0.330078125, "learning_rate": 1.9258605333413227e-07, "loss": 1.9494, "step": 29465 }, { "epoch": 0.9506867159631466, "grad_norm": 0.33984375, "learning_rate": 1.9233476414659557e-07, "loss": 1.9738, "step": 29466 }, { "epoch": 0.950718979816943, "grad_norm": 0.33203125, "learning_rate": 1.920836379512153e-07, "loss": 1.9908, "step": 29467 }, { "epoch": 0.9507512436707393, "grad_norm": 0.326171875, "learning_rate": 1.9183267475075593e-07, "loss": 1.9709, "step": 29468 }, { "epoch": 0.9507835075245357, "grad_norm": 0.33203125, "learning_rate": 1.915818745479786e-07, "loss": 2.0057, "step": 29469 }, { "epoch": 0.950815771378332, "grad_norm": 0.330078125, "learning_rate": 1.913312373456444e-07, "loss": 1.9809, "step": 29470 }, { "epoch": 0.9508480352321284, "grad_norm": 0.326171875, "learning_rate": 1.910807631465128e-07, "loss": 1.9684, "step": 29471 }, { "epoch": 0.9508802990859246, "grad_norm": 0.330078125, "learning_rate": 1.9083045195334158e-07, "loss": 2.0032, "step": 29472 }, { "epoch": 0.950912562939721, "grad_norm": 0.328125, "learning_rate": 1.9058030376888192e-07, "loss": 1.9937, "step": 29473 }, { "epoch": 0.9509448267935174, "grad_norm": 0.330078125, "learning_rate": 1.9033031859589157e-07, "loss": 1.9861, "step": 29474 }, { "epoch": 0.9509770906473137, "grad_norm": 0.349609375, "learning_rate": 1.9008049643712e-07, "loss": 1.9871, "step": 29475 }, { "epoch": 0.9510093545011101, "grad_norm": 0.33984375, "learning_rate": 1.8983083729531837e-07, "loss": 1.9809, "step": 29476 }, { "epoch": 0.9510416183549064, "grad_norm": 0.33984375, "learning_rate": 1.8958134117323444e-07, "loss": 1.9982, "step": 29477 }, { "epoch": 0.9510738822087028, "grad_norm": 0.3203125, "learning_rate": 1.8933200807361272e-07, "loss": 1.9665, "step": 29478 }, { "epoch": 0.9511061460624991, "grad_norm": 0.333984375, "learning_rate": 1.8908283799919934e-07, "loss": 1.9874, "step": 29479 }, { "epoch": 0.9511384099162955, "grad_norm": 0.3359375, "learning_rate": 1.8883383095273877e-07, "loss": 2.0199, "step": 29480 }, { "epoch": 0.9511706737700918, "grad_norm": 0.328125, "learning_rate": 1.885849869369688e-07, "loss": 1.9523, "step": 29481 }, { "epoch": 0.9512029376238882, "grad_norm": 0.333984375, "learning_rate": 1.8833630595462892e-07, "loss": 1.9909, "step": 29482 }, { "epoch": 0.9512352014776845, "grad_norm": 0.330078125, "learning_rate": 1.8808778800845694e-07, "loss": 2.0238, "step": 29483 }, { "epoch": 0.9512674653314809, "grad_norm": 0.33203125, "learning_rate": 1.8783943310119066e-07, "loss": 1.9602, "step": 29484 }, { "epoch": 0.9512997291852772, "grad_norm": 0.33203125, "learning_rate": 1.875912412355596e-07, "loss": 1.9819, "step": 29485 }, { "epoch": 0.9513319930390736, "grad_norm": 0.34375, "learning_rate": 1.873432124142982e-07, "loss": 1.935, "step": 29486 }, { "epoch": 0.9513642568928699, "grad_norm": 0.33984375, "learning_rate": 1.8709534664013594e-07, "loss": 2.0063, "step": 29487 }, { "epoch": 0.9513965207466663, "grad_norm": 0.333984375, "learning_rate": 1.8684764391580234e-07, "loss": 1.9708, "step": 29488 }, { "epoch": 0.9514287846004625, "grad_norm": 0.333984375, "learning_rate": 1.8660010424402185e-07, "loss": 2.0124, "step": 29489 }, { "epoch": 0.9514610484542589, "grad_norm": 0.328125, "learning_rate": 1.863527276275223e-07, "loss": 1.9899, "step": 29490 }, { "epoch": 0.9514933123080552, "grad_norm": 0.330078125, "learning_rate": 1.8610551406902153e-07, "loss": 1.9828, "step": 29491 }, { "epoch": 0.9515255761618516, "grad_norm": 0.333984375, "learning_rate": 1.858584635712457e-07, "loss": 1.9755, "step": 29492 }, { "epoch": 0.9515578400156479, "grad_norm": 0.330078125, "learning_rate": 1.8561157613691258e-07, "loss": 1.9423, "step": 29493 }, { "epoch": 0.9515901038694443, "grad_norm": 0.330078125, "learning_rate": 1.8536485176873842e-07, "loss": 1.965, "step": 29494 }, { "epoch": 0.9516223677232407, "grad_norm": 0.34375, "learning_rate": 1.8511829046944096e-07, "loss": 1.9816, "step": 29495 }, { "epoch": 0.951654631577037, "grad_norm": 0.32421875, "learning_rate": 1.8487189224173306e-07, "loss": 1.9471, "step": 29496 }, { "epoch": 0.9516868954308334, "grad_norm": 0.33203125, "learning_rate": 1.8462565708832756e-07, "loss": 1.9669, "step": 29497 }, { "epoch": 0.9517191592846297, "grad_norm": 0.326171875, "learning_rate": 1.8437958501193397e-07, "loss": 2.0037, "step": 29498 }, { "epoch": 0.9517514231384261, "grad_norm": 0.326171875, "learning_rate": 1.841336760152601e-07, "loss": 1.9706, "step": 29499 }, { "epoch": 0.9517836869922224, "grad_norm": 0.328125, "learning_rate": 1.8388793010101545e-07, "loss": 2.0245, "step": 29500 }, { "epoch": 0.9518159508460188, "grad_norm": 0.3359375, "learning_rate": 1.8364234727190121e-07, "loss": 1.9576, "step": 29501 }, { "epoch": 0.9518482146998151, "grad_norm": 0.3359375, "learning_rate": 1.833969275306252e-07, "loss": 2.0234, "step": 29502 }, { "epoch": 0.9518804785536115, "grad_norm": 0.333984375, "learning_rate": 1.8315167087988527e-07, "loss": 1.9996, "step": 29503 }, { "epoch": 0.9519127424074078, "grad_norm": 0.337890625, "learning_rate": 1.8290657732238258e-07, "loss": 1.9898, "step": 29504 }, { "epoch": 0.9519450062612042, "grad_norm": 0.326171875, "learning_rate": 1.8266164686081334e-07, "loss": 1.9991, "step": 29505 }, { "epoch": 0.9519772701150004, "grad_norm": 0.328125, "learning_rate": 1.82416879497877e-07, "loss": 1.9988, "step": 29506 }, { "epoch": 0.9520095339687968, "grad_norm": 0.328125, "learning_rate": 1.8217227523626313e-07, "loss": 1.9964, "step": 29507 }, { "epoch": 0.9520417978225931, "grad_norm": 0.33203125, "learning_rate": 1.8192783407866786e-07, "loss": 2.0343, "step": 29508 }, { "epoch": 0.9520740616763895, "grad_norm": 0.32421875, "learning_rate": 1.8168355602778075e-07, "loss": 1.9956, "step": 29509 }, { "epoch": 0.9521063255301858, "grad_norm": 0.33203125, "learning_rate": 1.8143944108628963e-07, "loss": 2.0209, "step": 29510 }, { "epoch": 0.9521385893839822, "grad_norm": 0.33203125, "learning_rate": 1.81195489256884e-07, "loss": 2.0018, "step": 29511 }, { "epoch": 0.9521708532377785, "grad_norm": 0.33203125, "learning_rate": 1.8095170054224674e-07, "loss": 1.9923, "step": 29512 }, { "epoch": 0.9522031170915749, "grad_norm": 0.33203125, "learning_rate": 1.807080749450607e-07, "loss": 2.014, "step": 29513 }, { "epoch": 0.9522353809453713, "grad_norm": 0.328125, "learning_rate": 1.804646124680104e-07, "loss": 1.9962, "step": 29514 }, { "epoch": 0.9522676447991676, "grad_norm": 0.33203125, "learning_rate": 1.8022131311377366e-07, "loss": 1.9565, "step": 29515 }, { "epoch": 0.952299908652964, "grad_norm": 0.337890625, "learning_rate": 1.7997817688502839e-07, "loss": 1.9803, "step": 29516 }, { "epoch": 0.9523321725067603, "grad_norm": 0.322265625, "learning_rate": 1.7973520378445242e-07, "loss": 1.9708, "step": 29517 }, { "epoch": 0.9523644363605567, "grad_norm": 0.34375, "learning_rate": 1.7949239381471861e-07, "loss": 1.9847, "step": 29518 }, { "epoch": 0.952396700214353, "grad_norm": 0.32421875, "learning_rate": 1.7924974697849982e-07, "loss": 1.972, "step": 29519 }, { "epoch": 0.9524289640681494, "grad_norm": 0.341796875, "learning_rate": 1.7900726327846895e-07, "loss": 2.0046, "step": 29520 }, { "epoch": 0.9524612279219457, "grad_norm": 0.337890625, "learning_rate": 1.787649427172938e-07, "loss": 1.9958, "step": 29521 }, { "epoch": 0.952493491775742, "grad_norm": 0.32421875, "learning_rate": 1.78522785297639e-07, "loss": 2.012, "step": 29522 }, { "epoch": 0.9525257556295383, "grad_norm": 0.330078125, "learning_rate": 1.78280791022174e-07, "loss": 1.9831, "step": 29523 }, { "epoch": 0.9525580194833347, "grad_norm": 0.333984375, "learning_rate": 1.7803895989356167e-07, "loss": 1.9874, "step": 29524 }, { "epoch": 0.952590283337131, "grad_norm": 0.326171875, "learning_rate": 1.777972919144616e-07, "loss": 1.9845, "step": 29525 }, { "epoch": 0.9526225471909274, "grad_norm": 0.333984375, "learning_rate": 1.775557870875383e-07, "loss": 2.0005, "step": 29526 }, { "epoch": 0.9526548110447237, "grad_norm": 0.333984375, "learning_rate": 1.7731444541544462e-07, "loss": 1.9474, "step": 29527 }, { "epoch": 0.9526870748985201, "grad_norm": 0.33203125, "learning_rate": 1.7707326690084013e-07, "loss": 1.9761, "step": 29528 }, { "epoch": 0.9527193387523164, "grad_norm": 0.330078125, "learning_rate": 1.7683225154638105e-07, "loss": 1.9612, "step": 29529 }, { "epoch": 0.9527516026061128, "grad_norm": 0.33203125, "learning_rate": 1.765913993547169e-07, "loss": 2.0057, "step": 29530 }, { "epoch": 0.9527838664599091, "grad_norm": 0.333984375, "learning_rate": 1.7635071032850057e-07, "loss": 1.9849, "step": 29531 }, { "epoch": 0.9528161303137055, "grad_norm": 0.33203125, "learning_rate": 1.7611018447038163e-07, "loss": 1.9928, "step": 29532 }, { "epoch": 0.9528483941675018, "grad_norm": 0.330078125, "learning_rate": 1.7586982178300627e-07, "loss": 1.9924, "step": 29533 }, { "epoch": 0.9528806580212982, "grad_norm": 0.33984375, "learning_rate": 1.7562962226902235e-07, "loss": 1.9613, "step": 29534 }, { "epoch": 0.9529129218750946, "grad_norm": 0.330078125, "learning_rate": 1.7538958593107112e-07, "loss": 1.9827, "step": 29535 }, { "epoch": 0.9529451857288909, "grad_norm": 0.33203125, "learning_rate": 1.7514971277179714e-07, "loss": 1.9752, "step": 29536 }, { "epoch": 0.9529774495826873, "grad_norm": 0.330078125, "learning_rate": 1.7491000279383994e-07, "loss": 1.9917, "step": 29537 }, { "epoch": 0.9530097134364836, "grad_norm": 0.326171875, "learning_rate": 1.7467045599983745e-07, "loss": 1.9824, "step": 29538 }, { "epoch": 0.95304197729028, "grad_norm": 0.333984375, "learning_rate": 1.744310723924275e-07, "loss": 1.9905, "step": 29539 }, { "epoch": 0.9530742411440762, "grad_norm": 0.33203125, "learning_rate": 1.741918519742447e-07, "loss": 1.9897, "step": 29540 }, { "epoch": 0.9531065049978726, "grad_norm": 0.3359375, "learning_rate": 1.7395279474792026e-07, "loss": 1.9726, "step": 29541 }, { "epoch": 0.9531387688516689, "grad_norm": 0.328125, "learning_rate": 1.7371390071608872e-07, "loss": 1.98, "step": 29542 }, { "epoch": 0.9531710327054653, "grad_norm": 0.328125, "learning_rate": 1.7347516988137967e-07, "loss": 1.9659, "step": 29543 }, { "epoch": 0.9532032965592616, "grad_norm": 0.337890625, "learning_rate": 1.73236602246416e-07, "loss": 1.9758, "step": 29544 }, { "epoch": 0.953235560413058, "grad_norm": 0.33203125, "learning_rate": 1.7299819781382896e-07, "loss": 1.9967, "step": 29545 }, { "epoch": 0.9532678242668543, "grad_norm": 0.322265625, "learning_rate": 1.7275995658624145e-07, "loss": 1.9763, "step": 29546 }, { "epoch": 0.9533000881206507, "grad_norm": 0.357421875, "learning_rate": 1.7252187856627466e-07, "loss": 1.9697, "step": 29547 }, { "epoch": 0.953332351974447, "grad_norm": 0.328125, "learning_rate": 1.7228396375654988e-07, "loss": 1.966, "step": 29548 }, { "epoch": 0.9533646158282434, "grad_norm": 0.333984375, "learning_rate": 1.7204621215968663e-07, "loss": 1.971, "step": 29549 }, { "epoch": 0.9533968796820397, "grad_norm": 0.3359375, "learning_rate": 1.7180862377829954e-07, "loss": 2.0179, "step": 29550 }, { "epoch": 0.9534291435358361, "grad_norm": 0.330078125, "learning_rate": 1.7157119861500815e-07, "loss": 1.986, "step": 29551 }, { "epoch": 0.9534614073896324, "grad_norm": 0.33203125, "learning_rate": 1.7133393667242036e-07, "loss": 1.9893, "step": 29552 }, { "epoch": 0.9534936712434288, "grad_norm": 0.337890625, "learning_rate": 1.7109683795315077e-07, "loss": 1.9813, "step": 29553 }, { "epoch": 0.953525935097225, "grad_norm": 0.33203125, "learning_rate": 1.7085990245981064e-07, "loss": 1.9453, "step": 29554 }, { "epoch": 0.9535581989510215, "grad_norm": 0.326171875, "learning_rate": 1.7062313019500453e-07, "loss": 1.9804, "step": 29555 }, { "epoch": 0.9535904628048179, "grad_norm": 0.341796875, "learning_rate": 1.7038652116134036e-07, "loss": 1.9692, "step": 29556 }, { "epoch": 0.9536227266586141, "grad_norm": 0.328125, "learning_rate": 1.7015007536142436e-07, "loss": 2.0126, "step": 29557 }, { "epoch": 0.9536549905124105, "grad_norm": 0.333984375, "learning_rate": 1.699137927978578e-07, "loss": 1.9895, "step": 29558 }, { "epoch": 0.9536872543662068, "grad_norm": 0.33203125, "learning_rate": 1.6967767347324027e-07, "loss": 1.9906, "step": 29559 }, { "epoch": 0.9537195182200032, "grad_norm": 0.33203125, "learning_rate": 1.6944171739016968e-07, "loss": 1.9416, "step": 29560 }, { "epoch": 0.9537517820737995, "grad_norm": 0.326171875, "learning_rate": 1.6920592455124727e-07, "loss": 1.9777, "step": 29561 }, { "epoch": 0.9537840459275959, "grad_norm": 0.33203125, "learning_rate": 1.6897029495906768e-07, "loss": 1.9617, "step": 29562 }, { "epoch": 0.9538163097813922, "grad_norm": 0.337890625, "learning_rate": 1.6873482861622046e-07, "loss": 1.9921, "step": 29563 }, { "epoch": 0.9538485736351886, "grad_norm": 0.328125, "learning_rate": 1.684995255253019e-07, "loss": 2.004, "step": 29564 }, { "epoch": 0.9538808374889849, "grad_norm": 0.328125, "learning_rate": 1.6826438568890155e-07, "loss": 1.9725, "step": 29565 }, { "epoch": 0.9539131013427813, "grad_norm": 0.328125, "learning_rate": 1.6802940910960408e-07, "loss": 2.0093, "step": 29566 }, { "epoch": 0.9539453651965776, "grad_norm": 0.33203125, "learning_rate": 1.6779459579000067e-07, "loss": 1.968, "step": 29567 }, { "epoch": 0.953977629050374, "grad_norm": 0.3359375, "learning_rate": 1.6755994573267263e-07, "loss": 1.9521, "step": 29568 }, { "epoch": 0.9540098929041703, "grad_norm": 0.361328125, "learning_rate": 1.6732545894020458e-07, "loss": 1.9985, "step": 29569 }, { "epoch": 0.9540421567579667, "grad_norm": 0.33984375, "learning_rate": 1.6709113541517607e-07, "loss": 1.9761, "step": 29570 }, { "epoch": 0.954074420611763, "grad_norm": 0.326171875, "learning_rate": 1.668569751601684e-07, "loss": 1.9791, "step": 29571 }, { "epoch": 0.9541066844655594, "grad_norm": 0.33984375, "learning_rate": 1.6662297817775619e-07, "loss": 2.0135, "step": 29572 }, { "epoch": 0.9541389483193556, "grad_norm": 0.333984375, "learning_rate": 1.66389144470519e-07, "loss": 1.9798, "step": 29573 }, { "epoch": 0.954171212173152, "grad_norm": 0.333984375, "learning_rate": 1.6615547404102815e-07, "loss": 1.9896, "step": 29574 }, { "epoch": 0.9542034760269484, "grad_norm": 0.3359375, "learning_rate": 1.659219668918549e-07, "loss": 1.9768, "step": 29575 }, { "epoch": 0.9542357398807447, "grad_norm": 0.33203125, "learning_rate": 1.6568862302557053e-07, "loss": 2.0183, "step": 29576 }, { "epoch": 0.9542680037345411, "grad_norm": 0.33984375, "learning_rate": 1.6545544244474464e-07, "loss": 1.9895, "step": 29577 }, { "epoch": 0.9543002675883374, "grad_norm": 0.328125, "learning_rate": 1.6522242515194186e-07, "loss": 1.9911, "step": 29578 }, { "epoch": 0.9543325314421338, "grad_norm": 0.333984375, "learning_rate": 1.6498957114973012e-07, "loss": 2.014, "step": 29579 }, { "epoch": 0.9543647952959301, "grad_norm": 0.333984375, "learning_rate": 1.6475688044066906e-07, "loss": 2.0099, "step": 29580 }, { "epoch": 0.9543970591497265, "grad_norm": 0.328125, "learning_rate": 1.645243530273216e-07, "loss": 2.0035, "step": 29581 }, { "epoch": 0.9544293230035228, "grad_norm": 0.328125, "learning_rate": 1.6429198891224907e-07, "loss": 1.9393, "step": 29582 }, { "epoch": 0.9544615868573192, "grad_norm": 0.33203125, "learning_rate": 1.6405978809800436e-07, "loss": 2.0057, "step": 29583 }, { "epoch": 0.9544938507111155, "grad_norm": 0.328125, "learning_rate": 1.6382775058714717e-07, "loss": 1.9772, "step": 29584 }, { "epoch": 0.9545261145649119, "grad_norm": 0.337890625, "learning_rate": 1.6359587638223207e-07, "loss": 2.0083, "step": 29585 }, { "epoch": 0.9545583784187082, "grad_norm": 0.333984375, "learning_rate": 1.6336416548580868e-07, "loss": 1.9894, "step": 29586 }, { "epoch": 0.9545906422725046, "grad_norm": 0.330078125, "learning_rate": 1.6313261790042834e-07, "loss": 1.9456, "step": 29587 }, { "epoch": 0.9546229061263009, "grad_norm": 0.328125, "learning_rate": 1.629012336286423e-07, "loss": 1.9936, "step": 29588 }, { "epoch": 0.9546551699800973, "grad_norm": 0.328125, "learning_rate": 1.6267001267299354e-07, "loss": 1.9799, "step": 29589 }, { "epoch": 0.9546874338338935, "grad_norm": 0.400390625, "learning_rate": 1.6243895503603001e-07, "loss": 1.9435, "step": 29590 }, { "epoch": 0.9547196976876899, "grad_norm": 0.32421875, "learning_rate": 1.6220806072029305e-07, "loss": 1.9907, "step": 29591 }, { "epoch": 0.9547519615414862, "grad_norm": 0.326171875, "learning_rate": 1.6197732972832557e-07, "loss": 2.0009, "step": 29592 }, { "epoch": 0.9547842253952826, "grad_norm": 0.326171875, "learning_rate": 1.6174676206266727e-07, "loss": 1.9894, "step": 29593 }, { "epoch": 0.9548164892490789, "grad_norm": 0.33203125, "learning_rate": 1.6151635772585604e-07, "loss": 1.9891, "step": 29594 }, { "epoch": 0.9548487531028753, "grad_norm": 0.341796875, "learning_rate": 1.612861167204266e-07, "loss": 1.989, "step": 29595 }, { "epoch": 0.9548810169566717, "grad_norm": 0.330078125, "learning_rate": 1.610560390489152e-07, "loss": 1.9956, "step": 29596 }, { "epoch": 0.954913280810468, "grad_norm": 0.33984375, "learning_rate": 1.6082612471385315e-07, "loss": 1.9887, "step": 29597 }, { "epoch": 0.9549455446642644, "grad_norm": 0.326171875, "learning_rate": 1.605963737177718e-07, "loss": 1.9949, "step": 29598 }, { "epoch": 0.9549778085180607, "grad_norm": 0.330078125, "learning_rate": 1.603667860631991e-07, "loss": 1.9847, "step": 29599 }, { "epoch": 0.9550100723718571, "grad_norm": 0.333984375, "learning_rate": 1.6013736175266303e-07, "loss": 2.0099, "step": 29600 }, { "epoch": 0.9550423362256534, "grad_norm": 0.33984375, "learning_rate": 1.599081007886899e-07, "loss": 1.9775, "step": 29601 }, { "epoch": 0.9550746000794498, "grad_norm": 0.33203125, "learning_rate": 1.5967900317380102e-07, "loss": 2.011, "step": 29602 }, { "epoch": 0.9551068639332461, "grad_norm": 0.328125, "learning_rate": 1.594500689105194e-07, "loss": 1.9755, "step": 29603 }, { "epoch": 0.9551391277870425, "grad_norm": 0.330078125, "learning_rate": 1.5922129800136464e-07, "loss": 1.9682, "step": 29604 }, { "epoch": 0.9551713916408388, "grad_norm": 0.3359375, "learning_rate": 1.5899269044885646e-07, "loss": 1.9749, "step": 29605 }, { "epoch": 0.9552036554946352, "grad_norm": 0.33203125, "learning_rate": 1.5876424625550778e-07, "loss": 2.0336, "step": 29606 }, { "epoch": 0.9552359193484314, "grad_norm": 0.333984375, "learning_rate": 1.5853596542383664e-07, "loss": 1.9957, "step": 29607 }, { "epoch": 0.9552681832022278, "grad_norm": 0.32421875, "learning_rate": 1.5830784795635435e-07, "loss": 1.999, "step": 29608 }, { "epoch": 0.9553004470560241, "grad_norm": 0.33984375, "learning_rate": 1.5807989385557054e-07, "loss": 1.9798, "step": 29609 }, { "epoch": 0.9553327109098205, "grad_norm": 0.333984375, "learning_rate": 1.5785210312399822e-07, "loss": 2.0072, "step": 29610 }, { "epoch": 0.9553649747636168, "grad_norm": 0.3359375, "learning_rate": 1.576244757641404e-07, "loss": 1.9714, "step": 29611 }, { "epoch": 0.9553972386174132, "grad_norm": 0.326171875, "learning_rate": 1.5739701177850507e-07, "loss": 2.0078, "step": 29612 }, { "epoch": 0.9554295024712095, "grad_norm": 0.326171875, "learning_rate": 1.571697111695969e-07, "loss": 2.0, "step": 29613 }, { "epoch": 0.9554617663250059, "grad_norm": 0.33203125, "learning_rate": 1.5694257393991386e-07, "loss": 2.0217, "step": 29614 }, { "epoch": 0.9554940301788022, "grad_norm": 0.330078125, "learning_rate": 1.5671560009195894e-07, "loss": 1.9889, "step": 29615 }, { "epoch": 0.9555262940325986, "grad_norm": 0.322265625, "learning_rate": 1.5648878962823186e-07, "loss": 1.9686, "step": 29616 }, { "epoch": 0.955558557886395, "grad_norm": 0.337890625, "learning_rate": 1.5626214255122563e-07, "loss": 1.9765, "step": 29617 }, { "epoch": 0.9555908217401913, "grad_norm": 0.328125, "learning_rate": 1.5603565886343652e-07, "loss": 1.9646, "step": 29618 }, { "epoch": 0.9556230855939877, "grad_norm": 0.328125, "learning_rate": 1.5580933856735924e-07, "loss": 1.987, "step": 29619 }, { "epoch": 0.955655349447784, "grad_norm": 0.333984375, "learning_rate": 1.5558318166548347e-07, "loss": 1.9956, "step": 29620 }, { "epoch": 0.9556876133015804, "grad_norm": 0.337890625, "learning_rate": 1.5535718816029887e-07, "loss": 2.0049, "step": 29621 }, { "epoch": 0.9557198771553767, "grad_norm": 0.3359375, "learning_rate": 1.5513135805429178e-07, "loss": 1.9868, "step": 29622 }, { "epoch": 0.955752141009173, "grad_norm": 0.328125, "learning_rate": 1.5490569134995025e-07, "loss": 2.0011, "step": 29623 }, { "epoch": 0.9557844048629693, "grad_norm": 0.33984375, "learning_rate": 1.546801880497556e-07, "loss": 2.0111, "step": 29624 }, { "epoch": 0.9558166687167657, "grad_norm": 0.33203125, "learning_rate": 1.5445484815619247e-07, "loss": 1.9579, "step": 29625 }, { "epoch": 0.955848932570562, "grad_norm": 0.337890625, "learning_rate": 1.5422967167174061e-07, "loss": 1.9905, "step": 29626 }, { "epoch": 0.9558811964243584, "grad_norm": 0.33203125, "learning_rate": 1.5400465859887803e-07, "loss": 1.9898, "step": 29627 }, { "epoch": 0.9559134602781547, "grad_norm": 0.328125, "learning_rate": 1.5377980894008103e-07, "loss": 2.0096, "step": 29628 }, { "epoch": 0.9559457241319511, "grad_norm": 0.32421875, "learning_rate": 1.5355512269782435e-07, "loss": 2.0092, "step": 29629 }, { "epoch": 0.9559779879857474, "grad_norm": 0.33203125, "learning_rate": 1.5333059987458432e-07, "loss": 1.987, "step": 29630 }, { "epoch": 0.9560102518395438, "grad_norm": 0.328125, "learning_rate": 1.5310624047282895e-07, "loss": 1.9739, "step": 29631 }, { "epoch": 0.9560425156933401, "grad_norm": 0.328125, "learning_rate": 1.5288204449502962e-07, "loss": 1.9932, "step": 29632 }, { "epoch": 0.9560747795471365, "grad_norm": 0.333984375, "learning_rate": 1.526580119436527e-07, "loss": 1.9754, "step": 29633 }, { "epoch": 0.9561070434009328, "grad_norm": 0.330078125, "learning_rate": 1.5243414282116452e-07, "loss": 1.9612, "step": 29634 }, { "epoch": 0.9561393072547292, "grad_norm": 0.3359375, "learning_rate": 1.5221043713003148e-07, "loss": 1.9782, "step": 29635 }, { "epoch": 0.9561715711085256, "grad_norm": 0.33984375, "learning_rate": 1.5198689487271322e-07, "loss": 1.9958, "step": 29636 }, { "epoch": 0.9562038349623219, "grad_norm": 0.33203125, "learning_rate": 1.5176351605166949e-07, "loss": 2.0135, "step": 29637 }, { "epoch": 0.9562360988161183, "grad_norm": 0.330078125, "learning_rate": 1.515403006693633e-07, "loss": 1.9908, "step": 29638 }, { "epoch": 0.9562683626699146, "grad_norm": 0.328125, "learning_rate": 1.5131724872824937e-07, "loss": 2.0099, "step": 29639 }, { "epoch": 0.956300626523711, "grad_norm": 0.33203125, "learning_rate": 1.5109436023078072e-07, "loss": 1.9738, "step": 29640 }, { "epoch": 0.9563328903775072, "grad_norm": 0.333984375, "learning_rate": 1.5087163517941537e-07, "loss": 1.9881, "step": 29641 }, { "epoch": 0.9563651542313036, "grad_norm": 0.326171875, "learning_rate": 1.5064907357659975e-07, "loss": 2.0176, "step": 29642 }, { "epoch": 0.9563974180850999, "grad_norm": 0.333984375, "learning_rate": 1.5042667542478682e-07, "loss": 1.9674, "step": 29643 }, { "epoch": 0.9564296819388963, "grad_norm": 0.330078125, "learning_rate": 1.5020444072642636e-07, "loss": 1.9699, "step": 29644 }, { "epoch": 0.9564619457926926, "grad_norm": 0.330078125, "learning_rate": 1.4998236948395973e-07, "loss": 1.9766, "step": 29645 }, { "epoch": 0.956494209646489, "grad_norm": 0.330078125, "learning_rate": 1.4976046169983325e-07, "loss": 1.9693, "step": 29646 }, { "epoch": 0.9565264735002853, "grad_norm": 0.337890625, "learning_rate": 1.495387173764934e-07, "loss": 1.9426, "step": 29647 }, { "epoch": 0.9565587373540817, "grad_norm": 0.333984375, "learning_rate": 1.493171365163748e-07, "loss": 2.0039, "step": 29648 }, { "epoch": 0.956591001207878, "grad_norm": 0.32421875, "learning_rate": 1.4909571912192056e-07, "loss": 1.9971, "step": 29649 }, { "epoch": 0.9566232650616744, "grad_norm": 0.333984375, "learning_rate": 1.4887446519556702e-07, "loss": 1.9815, "step": 29650 }, { "epoch": 0.9566555289154707, "grad_norm": 0.328125, "learning_rate": 1.4865337473974726e-07, "loss": 1.9925, "step": 29651 }, { "epoch": 0.9566877927692671, "grad_norm": 0.328125, "learning_rate": 1.4843244775689935e-07, "loss": 1.9944, "step": 29652 }, { "epoch": 0.9567200566230634, "grad_norm": 0.3359375, "learning_rate": 1.4821168424944964e-07, "loss": 1.9709, "step": 29653 }, { "epoch": 0.9567523204768598, "grad_norm": 0.3359375, "learning_rate": 1.4799108421983454e-07, "loss": 1.9686, "step": 29654 }, { "epoch": 0.956784584330656, "grad_norm": 0.32421875, "learning_rate": 1.477706476704771e-07, "loss": 1.9663, "step": 29655 }, { "epoch": 0.9568168481844525, "grad_norm": 0.32421875, "learning_rate": 1.475503746038037e-07, "loss": 1.9635, "step": 29656 }, { "epoch": 0.9568491120382489, "grad_norm": 0.333984375, "learning_rate": 1.4733026502224244e-07, "loss": 1.9927, "step": 29657 }, { "epoch": 0.9568813758920451, "grad_norm": 0.333984375, "learning_rate": 1.471103189282147e-07, "loss": 2.0016, "step": 29658 }, { "epoch": 0.9569136397458415, "grad_norm": 0.34375, "learning_rate": 1.4689053632414019e-07, "loss": 1.9907, "step": 29659 }, { "epoch": 0.9569459035996378, "grad_norm": 0.33203125, "learning_rate": 1.466709172124403e-07, "loss": 2.0241, "step": 29660 }, { "epoch": 0.9569781674534342, "grad_norm": 0.33203125, "learning_rate": 1.4645146159553147e-07, "loss": 1.9816, "step": 29661 }, { "epoch": 0.9570104313072305, "grad_norm": 0.33203125, "learning_rate": 1.4623216947582673e-07, "loss": 2.0114, "step": 29662 }, { "epoch": 0.9570426951610269, "grad_norm": 0.33203125, "learning_rate": 1.460130408557442e-07, "loss": 2.02, "step": 29663 }, { "epoch": 0.9570749590148232, "grad_norm": 0.33203125, "learning_rate": 1.4579407573769354e-07, "loss": 1.9953, "step": 29664 }, { "epoch": 0.9571072228686196, "grad_norm": 0.322265625, "learning_rate": 1.4557527412408455e-07, "loss": 1.9964, "step": 29665 }, { "epoch": 0.9571394867224159, "grad_norm": 0.333984375, "learning_rate": 1.4535663601732696e-07, "loss": 1.9734, "step": 29666 }, { "epoch": 0.9571717505762123, "grad_norm": 0.326171875, "learning_rate": 1.4513816141982716e-07, "loss": 1.9662, "step": 29667 }, { "epoch": 0.9572040144300086, "grad_norm": 0.32421875, "learning_rate": 1.4491985033398826e-07, "loss": 1.9889, "step": 29668 }, { "epoch": 0.957236278283805, "grad_norm": 0.328125, "learning_rate": 1.4470170276221663e-07, "loss": 1.9834, "step": 29669 }, { "epoch": 0.9572685421376013, "grad_norm": 0.33984375, "learning_rate": 1.4448371870691035e-07, "loss": 2.0063, "step": 29670 }, { "epoch": 0.9573008059913977, "grad_norm": 0.33203125, "learning_rate": 1.4426589817047086e-07, "loss": 1.993, "step": 29671 }, { "epoch": 0.957333069845194, "grad_norm": 0.333984375, "learning_rate": 1.440482411552946e-07, "loss": 1.9933, "step": 29672 }, { "epoch": 0.9573653336989904, "grad_norm": 0.32421875, "learning_rate": 1.438307476637779e-07, "loss": 2.0007, "step": 29673 }, { "epoch": 0.9573975975527866, "grad_norm": 0.33984375, "learning_rate": 1.4361341769831226e-07, "loss": 1.9876, "step": 29674 }, { "epoch": 0.957429861406583, "grad_norm": 0.33203125, "learning_rate": 1.4339625126129575e-07, "loss": 1.9656, "step": 29675 }, { "epoch": 0.9574621252603794, "grad_norm": 0.33203125, "learning_rate": 1.431792483551131e-07, "loss": 1.9633, "step": 29676 }, { "epoch": 0.9574943891141757, "grad_norm": 0.333984375, "learning_rate": 1.4296240898215406e-07, "loss": 1.995, "step": 29677 }, { "epoch": 0.9575266529679721, "grad_norm": 0.333984375, "learning_rate": 1.427457331448101e-07, "loss": 2.0054, "step": 29678 }, { "epoch": 0.9575589168217684, "grad_norm": 0.337890625, "learning_rate": 1.4252922084545926e-07, "loss": 1.9406, "step": 29679 }, { "epoch": 0.9575911806755648, "grad_norm": 0.333984375, "learning_rate": 1.4231287208648802e-07, "loss": 1.9902, "step": 29680 }, { "epoch": 0.9576234445293611, "grad_norm": 0.326171875, "learning_rate": 1.4209668687027943e-07, "loss": 1.9285, "step": 29681 }, { "epoch": 0.9576557083831575, "grad_norm": 0.328125, "learning_rate": 1.4188066519921162e-07, "loss": 1.9872, "step": 29682 }, { "epoch": 0.9576879722369538, "grad_norm": 0.3359375, "learning_rate": 1.4166480707566099e-07, "loss": 2.0016, "step": 29683 }, { "epoch": 0.9577202360907502, "grad_norm": 0.33984375, "learning_rate": 1.4144911250200564e-07, "loss": 1.9882, "step": 29684 }, { "epoch": 0.9577524999445465, "grad_norm": 0.326171875, "learning_rate": 1.4123358148061872e-07, "loss": 1.9732, "step": 29685 }, { "epoch": 0.9577847637983429, "grad_norm": 0.333984375, "learning_rate": 1.4101821401387327e-07, "loss": 2.0051, "step": 29686 }, { "epoch": 0.9578170276521392, "grad_norm": 0.330078125, "learning_rate": 1.408030101041391e-07, "loss": 1.9934, "step": 29687 }, { "epoch": 0.9578492915059356, "grad_norm": 0.330078125, "learning_rate": 1.405879697537843e-07, "loss": 1.9759, "step": 29688 }, { "epoch": 0.9578815553597319, "grad_norm": 0.33984375, "learning_rate": 1.4037309296517864e-07, "loss": 1.9971, "step": 29689 }, { "epoch": 0.9579138192135283, "grad_norm": 0.328125, "learning_rate": 1.4015837974068524e-07, "loss": 1.9923, "step": 29690 }, { "epoch": 0.9579460830673245, "grad_norm": 0.337890625, "learning_rate": 1.3994383008266887e-07, "loss": 1.9943, "step": 29691 }, { "epoch": 0.9579783469211209, "grad_norm": 0.330078125, "learning_rate": 1.397294439934893e-07, "loss": 1.9773, "step": 29692 }, { "epoch": 0.9580106107749172, "grad_norm": 0.33203125, "learning_rate": 1.3951522147550632e-07, "loss": 1.9943, "step": 29693 }, { "epoch": 0.9580428746287136, "grad_norm": 0.33984375, "learning_rate": 1.393011625310814e-07, "loss": 1.9884, "step": 29694 }, { "epoch": 0.9580751384825099, "grad_norm": 0.333984375, "learning_rate": 1.390872671625676e-07, "loss": 1.9669, "step": 29695 }, { "epoch": 0.9581074023363063, "grad_norm": 0.328125, "learning_rate": 1.3887353537231805e-07, "loss": 2.0006, "step": 29696 }, { "epoch": 0.9581396661901027, "grad_norm": 0.330078125, "learning_rate": 1.3865996716268926e-07, "loss": 2.0014, "step": 29697 }, { "epoch": 0.958171930043899, "grad_norm": 0.333984375, "learning_rate": 1.3844656253602928e-07, "loss": 2.0169, "step": 29698 }, { "epoch": 0.9582041938976954, "grad_norm": 0.33203125, "learning_rate": 1.3823332149468793e-07, "loss": 1.9946, "step": 29699 }, { "epoch": 0.9582364577514917, "grad_norm": 0.33203125, "learning_rate": 1.3802024404101167e-07, "loss": 2.003, "step": 29700 }, { "epoch": 0.9582687216052881, "grad_norm": 0.330078125, "learning_rate": 1.378073301773486e-07, "loss": 2.0068, "step": 29701 }, { "epoch": 0.9583009854590844, "grad_norm": 0.33203125, "learning_rate": 1.3759457990603852e-07, "loss": 1.9876, "step": 29702 }, { "epoch": 0.9583332493128808, "grad_norm": 0.32421875, "learning_rate": 1.3738199322942624e-07, "loss": 1.9857, "step": 29703 }, { "epoch": 0.9583655131666771, "grad_norm": 0.326171875, "learning_rate": 1.3716957014984987e-07, "loss": 1.9692, "step": 29704 }, { "epoch": 0.9583977770204735, "grad_norm": 0.333984375, "learning_rate": 1.3695731066964756e-07, "loss": 1.9965, "step": 29705 }, { "epoch": 0.9584300408742698, "grad_norm": 0.337890625, "learning_rate": 1.367452147911591e-07, "loss": 1.9934, "step": 29706 }, { "epoch": 0.9584623047280662, "grad_norm": 0.33203125, "learning_rate": 1.3653328251671425e-07, "loss": 2.0351, "step": 29707 }, { "epoch": 0.9584945685818624, "grad_norm": 0.33203125, "learning_rate": 1.3632151384864787e-07, "loss": 1.9852, "step": 29708 }, { "epoch": 0.9585268324356588, "grad_norm": 0.330078125, "learning_rate": 1.3610990878929307e-07, "loss": 1.9817, "step": 29709 }, { "epoch": 0.9585590962894551, "grad_norm": 0.3359375, "learning_rate": 1.3589846734097467e-07, "loss": 1.9693, "step": 29710 }, { "epoch": 0.9585913601432515, "grad_norm": 0.326171875, "learning_rate": 1.3568718950602411e-07, "loss": 1.9933, "step": 29711 }, { "epoch": 0.9586236239970478, "grad_norm": 0.328125, "learning_rate": 1.354760752867662e-07, "loss": 1.9917, "step": 29712 }, { "epoch": 0.9586558878508442, "grad_norm": 0.333984375, "learning_rate": 1.3526512468552077e-07, "loss": 2.0065, "step": 29713 }, { "epoch": 0.9586881517046405, "grad_norm": 0.33203125, "learning_rate": 1.3505433770461594e-07, "loss": 1.9837, "step": 29714 }, { "epoch": 0.9587204155584369, "grad_norm": 0.330078125, "learning_rate": 1.3484371434636822e-07, "loss": 1.9765, "step": 29715 }, { "epoch": 0.9587526794122332, "grad_norm": 0.32421875, "learning_rate": 1.3463325461309572e-07, "loss": 1.9729, "step": 29716 }, { "epoch": 0.9587849432660296, "grad_norm": 0.337890625, "learning_rate": 1.344229585071166e-07, "loss": 1.9896, "step": 29717 }, { "epoch": 0.958817207119826, "grad_norm": 0.337890625, "learning_rate": 1.3421282603074403e-07, "loss": 2.0166, "step": 29718 }, { "epoch": 0.9588494709736223, "grad_norm": 0.326171875, "learning_rate": 1.340028571862928e-07, "loss": 1.98, "step": 29719 }, { "epoch": 0.9588817348274187, "grad_norm": 0.33203125, "learning_rate": 1.3379305197607438e-07, "loss": 1.9603, "step": 29720 }, { "epoch": 0.958913998681215, "grad_norm": 0.328125, "learning_rate": 1.335834104023953e-07, "loss": 2.0087, "step": 29721 }, { "epoch": 0.9589462625350114, "grad_norm": 0.33984375, "learning_rate": 1.3337393246756534e-07, "loss": 2.0231, "step": 29722 }, { "epoch": 0.9589785263888077, "grad_norm": 0.3359375, "learning_rate": 1.3316461817389103e-07, "loss": 1.9981, "step": 29723 }, { "epoch": 0.959010790242604, "grad_norm": 0.33984375, "learning_rate": 1.3295546752367382e-07, "loss": 1.9781, "step": 29724 }, { "epoch": 0.9590430540964003, "grad_norm": 0.33203125, "learning_rate": 1.327464805192169e-07, "loss": 1.9656, "step": 29725 }, { "epoch": 0.9590753179501967, "grad_norm": 0.328125, "learning_rate": 1.3253765716282175e-07, "loss": 2.0102, "step": 29726 }, { "epoch": 0.959107581803993, "grad_norm": 0.333984375, "learning_rate": 1.323289974567865e-07, "loss": 1.9928, "step": 29727 }, { "epoch": 0.9591398456577894, "grad_norm": 0.3359375, "learning_rate": 1.321205014034077e-07, "loss": 1.9818, "step": 29728 }, { "epoch": 0.9591721095115857, "grad_norm": 0.33203125, "learning_rate": 1.3191216900498016e-07, "loss": 1.9748, "step": 29729 }, { "epoch": 0.9592043733653821, "grad_norm": 0.337890625, "learning_rate": 1.3170400026379536e-07, "loss": 1.9979, "step": 29730 }, { "epoch": 0.9592366372191784, "grad_norm": 0.333984375, "learning_rate": 1.3149599518214984e-07, "loss": 2.0189, "step": 29731 }, { "epoch": 0.9592689010729748, "grad_norm": 0.33203125, "learning_rate": 1.3128815376232672e-07, "loss": 2.0052, "step": 29732 }, { "epoch": 0.9593011649267711, "grad_norm": 0.3515625, "learning_rate": 1.3108047600661754e-07, "loss": 1.9675, "step": 29733 }, { "epoch": 0.9593334287805675, "grad_norm": 0.33203125, "learning_rate": 1.3087296191731045e-07, "loss": 2.011, "step": 29734 }, { "epoch": 0.9593656926343638, "grad_norm": 0.330078125, "learning_rate": 1.3066561149668366e-07, "loss": 1.9723, "step": 29735 }, { "epoch": 0.9593979564881602, "grad_norm": 0.333984375, "learning_rate": 1.3045842474702196e-07, "loss": 1.9903, "step": 29736 }, { "epoch": 0.9594302203419566, "grad_norm": 0.328125, "learning_rate": 1.3025140167061022e-07, "loss": 1.9831, "step": 29737 }, { "epoch": 0.9594624841957529, "grad_norm": 0.330078125, "learning_rate": 1.3004454226971996e-07, "loss": 1.992, "step": 29738 }, { "epoch": 0.9594947480495493, "grad_norm": 0.330078125, "learning_rate": 1.2983784654663266e-07, "loss": 1.9849, "step": 29739 }, { "epoch": 0.9595270119033456, "grad_norm": 0.328125, "learning_rate": 1.296313145036232e-07, "loss": 2.0033, "step": 29740 }, { "epoch": 0.959559275757142, "grad_norm": 0.328125, "learning_rate": 1.2942494614296473e-07, "loss": 1.995, "step": 29741 }, { "epoch": 0.9595915396109382, "grad_norm": 0.333984375, "learning_rate": 1.2921874146692712e-07, "loss": 2.0254, "step": 29742 }, { "epoch": 0.9596238034647346, "grad_norm": 0.330078125, "learning_rate": 1.2901270047778192e-07, "loss": 2.019, "step": 29743 }, { "epoch": 0.9596560673185309, "grad_norm": 0.330078125, "learning_rate": 1.2880682317779557e-07, "loss": 1.947, "step": 29744 }, { "epoch": 0.9596883311723273, "grad_norm": 0.328125, "learning_rate": 1.2860110956923632e-07, "loss": 1.9908, "step": 29745 }, { "epoch": 0.9597205950261236, "grad_norm": 0.333984375, "learning_rate": 1.2839555965436733e-07, "loss": 2.0005, "step": 29746 }, { "epoch": 0.95975285887992, "grad_norm": 0.330078125, "learning_rate": 1.2819017343545014e-07, "loss": 2.0344, "step": 29747 }, { "epoch": 0.9597851227337163, "grad_norm": 0.33203125, "learning_rate": 1.2798495091474793e-07, "loss": 1.9682, "step": 29748 }, { "epoch": 0.9598173865875127, "grad_norm": 0.333984375, "learning_rate": 1.2777989209451723e-07, "loss": 1.9758, "step": 29749 }, { "epoch": 0.959849650441309, "grad_norm": 0.3359375, "learning_rate": 1.2757499697701626e-07, "loss": 2.0245, "step": 29750 }, { "epoch": 0.9598819142951054, "grad_norm": 0.328125, "learning_rate": 1.273702655645015e-07, "loss": 1.9817, "step": 29751 }, { "epoch": 0.9599141781489017, "grad_norm": 0.337890625, "learning_rate": 1.2716569785922284e-07, "loss": 1.9848, "step": 29752 }, { "epoch": 0.9599464420026981, "grad_norm": 0.341796875, "learning_rate": 1.269612938634368e-07, "loss": 1.959, "step": 29753 }, { "epoch": 0.9599787058564944, "grad_norm": 0.333984375, "learning_rate": 1.2675705357938995e-07, "loss": 1.9734, "step": 29754 }, { "epoch": 0.9600109697102908, "grad_norm": 0.337890625, "learning_rate": 1.265529770093321e-07, "loss": 1.9819, "step": 29755 }, { "epoch": 0.960043233564087, "grad_norm": 0.33203125, "learning_rate": 1.2634906415550816e-07, "loss": 1.9819, "step": 29756 }, { "epoch": 0.9600754974178834, "grad_norm": 0.328125, "learning_rate": 1.26145315020163e-07, "loss": 2.0028, "step": 29757 }, { "epoch": 0.9601077612716798, "grad_norm": 0.32421875, "learning_rate": 1.2594172960553986e-07, "loss": 1.9996, "step": 29758 }, { "epoch": 0.9601400251254761, "grad_norm": 0.328125, "learning_rate": 1.2573830791388186e-07, "loss": 1.9866, "step": 29759 }, { "epoch": 0.9601722889792725, "grad_norm": 0.32421875, "learning_rate": 1.2553504994742394e-07, "loss": 2.0062, "step": 29760 }, { "epoch": 0.9602045528330688, "grad_norm": 0.32421875, "learning_rate": 1.2533195570840428e-07, "loss": 1.9784, "step": 29761 }, { "epoch": 0.9602368166868652, "grad_norm": 0.33203125, "learning_rate": 1.2512902519906277e-07, "loss": 1.9756, "step": 29762 }, { "epoch": 0.9602690805406615, "grad_norm": 0.330078125, "learning_rate": 1.2492625842162597e-07, "loss": 1.9674, "step": 29763 }, { "epoch": 0.9603013443944579, "grad_norm": 0.330078125, "learning_rate": 1.247236553783304e-07, "loss": 2.0012, "step": 29764 }, { "epoch": 0.9603336082482542, "grad_norm": 0.337890625, "learning_rate": 1.2452121607140765e-07, "loss": 1.9836, "step": 29765 }, { "epoch": 0.9603658721020506, "grad_norm": 0.33203125, "learning_rate": 1.243189405030809e-07, "loss": 1.9914, "step": 29766 }, { "epoch": 0.9603981359558469, "grad_norm": 0.33203125, "learning_rate": 1.241168286755817e-07, "loss": 1.9839, "step": 29767 }, { "epoch": 0.9604303998096433, "grad_norm": 0.33203125, "learning_rate": 1.2391488059113166e-07, "loss": 1.9903, "step": 29768 }, { "epoch": 0.9604626636634396, "grad_norm": 0.33984375, "learning_rate": 1.2371309625195392e-07, "loss": 1.9728, "step": 29769 }, { "epoch": 0.960494927517236, "grad_norm": 0.333984375, "learning_rate": 1.235114756602701e-07, "loss": 1.9822, "step": 29770 }, { "epoch": 0.9605271913710323, "grad_norm": 0.33203125, "learning_rate": 1.2331001881830172e-07, "loss": 1.9648, "step": 29771 }, { "epoch": 0.9605594552248287, "grad_norm": 0.3359375, "learning_rate": 1.2310872572826204e-07, "loss": 1.9764, "step": 29772 }, { "epoch": 0.960591719078625, "grad_norm": 0.33203125, "learning_rate": 1.2290759639237093e-07, "loss": 1.9794, "step": 29773 }, { "epoch": 0.9606239829324213, "grad_norm": 0.33203125, "learning_rate": 1.2270663081283828e-07, "loss": 1.9702, "step": 29774 }, { "epoch": 0.9606562467862176, "grad_norm": 0.326171875, "learning_rate": 1.22505828991879e-07, "loss": 2.0004, "step": 29775 }, { "epoch": 0.960688510640014, "grad_norm": 0.330078125, "learning_rate": 1.2230519093170134e-07, "loss": 1.9743, "step": 29776 }, { "epoch": 0.9607207744938104, "grad_norm": 0.328125, "learning_rate": 1.2210471663451682e-07, "loss": 2.0019, "step": 29777 }, { "epoch": 0.9607530383476067, "grad_norm": 0.337890625, "learning_rate": 1.2190440610252873e-07, "loss": 1.9849, "step": 29778 }, { "epoch": 0.9607853022014031, "grad_norm": 0.3359375, "learning_rate": 1.217042593379436e-07, "loss": 1.9645, "step": 29779 }, { "epoch": 0.9608175660551994, "grad_norm": 0.328125, "learning_rate": 1.215042763429647e-07, "loss": 1.989, "step": 29780 }, { "epoch": 0.9608498299089958, "grad_norm": 0.333984375, "learning_rate": 1.2130445711979188e-07, "loss": 1.9712, "step": 29781 }, { "epoch": 0.9608820937627921, "grad_norm": 0.322265625, "learning_rate": 1.2110480167062843e-07, "loss": 1.9653, "step": 29782 }, { "epoch": 0.9609143576165885, "grad_norm": 0.33203125, "learning_rate": 1.2090530999766758e-07, "loss": 1.9701, "step": 29783 }, { "epoch": 0.9609466214703848, "grad_norm": 0.337890625, "learning_rate": 1.2070598210310592e-07, "loss": 2.0012, "step": 29784 }, { "epoch": 0.9609788853241812, "grad_norm": 0.3359375, "learning_rate": 1.2050681798913832e-07, "loss": 2.013, "step": 29785 }, { "epoch": 0.9610111491779775, "grad_norm": 0.326171875, "learning_rate": 1.2030781765795806e-07, "loss": 1.9798, "step": 29786 }, { "epoch": 0.9610434130317739, "grad_norm": 0.328125, "learning_rate": 1.2010898111175506e-07, "loss": 1.9883, "step": 29787 }, { "epoch": 0.9610756768855702, "grad_norm": 0.326171875, "learning_rate": 1.1991030835271754e-07, "loss": 1.9452, "step": 29788 }, { "epoch": 0.9611079407393666, "grad_norm": 0.333984375, "learning_rate": 1.1971179938303213e-07, "loss": 1.9862, "step": 29789 }, { "epoch": 0.9611402045931629, "grad_norm": 0.330078125, "learning_rate": 1.195134542048837e-07, "loss": 1.9948, "step": 29790 }, { "epoch": 0.9611724684469592, "grad_norm": 0.337890625, "learning_rate": 1.1931527282045717e-07, "loss": 2.0172, "step": 29791 }, { "epoch": 0.9612047323007555, "grad_norm": 0.333984375, "learning_rate": 1.1911725523193085e-07, "loss": 1.9823, "step": 29792 }, { "epoch": 0.9612369961545519, "grad_norm": 0.326171875, "learning_rate": 1.189194014414896e-07, "loss": 1.9947, "step": 29793 }, { "epoch": 0.9612692600083482, "grad_norm": 0.32421875, "learning_rate": 1.1872171145130506e-07, "loss": 2.0245, "step": 29794 }, { "epoch": 0.9613015238621446, "grad_norm": 0.32421875, "learning_rate": 1.1852418526355713e-07, "loss": 1.9871, "step": 29795 }, { "epoch": 0.9613337877159409, "grad_norm": 0.330078125, "learning_rate": 1.1832682288041907e-07, "loss": 1.9998, "step": 29796 }, { "epoch": 0.9613660515697373, "grad_norm": 0.328125, "learning_rate": 1.1812962430406249e-07, "loss": 1.9669, "step": 29797 }, { "epoch": 0.9613983154235337, "grad_norm": 0.337890625, "learning_rate": 1.1793258953665897e-07, "loss": 1.9847, "step": 29798 }, { "epoch": 0.96143057927733, "grad_norm": 0.33203125, "learning_rate": 1.1773571858037846e-07, "loss": 1.9761, "step": 29799 }, { "epoch": 0.9614628431311264, "grad_norm": 0.326171875, "learning_rate": 1.1753901143738588e-07, "loss": 1.9939, "step": 29800 }, { "epoch": 0.9614951069849227, "grad_norm": 0.330078125, "learning_rate": 1.1734246810984617e-07, "loss": 2.0045, "step": 29801 }, { "epoch": 0.9615273708387191, "grad_norm": 0.326171875, "learning_rate": 1.1714608859992593e-07, "loss": 1.9714, "step": 29802 }, { "epoch": 0.9615596346925154, "grad_norm": 0.322265625, "learning_rate": 1.1694987290978177e-07, "loss": 1.9882, "step": 29803 }, { "epoch": 0.9615918985463118, "grad_norm": 0.33203125, "learning_rate": 1.1675382104157861e-07, "loss": 1.9938, "step": 29804 }, { "epoch": 0.9616241624001081, "grad_norm": 0.328125, "learning_rate": 1.1655793299747142e-07, "loss": 1.9578, "step": 29805 }, { "epoch": 0.9616564262539045, "grad_norm": 0.333984375, "learning_rate": 1.1636220877961678e-07, "loss": 1.9997, "step": 29806 }, { "epoch": 0.9616886901077007, "grad_norm": 0.3359375, "learning_rate": 1.1616664839017133e-07, "loss": 1.9834, "step": 29807 }, { "epoch": 0.9617209539614971, "grad_norm": 0.326171875, "learning_rate": 1.1597125183128499e-07, "loss": 1.9641, "step": 29808 }, { "epoch": 0.9617532178152934, "grad_norm": 0.33203125, "learning_rate": 1.1577601910510772e-07, "loss": 2.0074, "step": 29809 }, { "epoch": 0.9617854816690898, "grad_norm": 0.333984375, "learning_rate": 1.1558095021379111e-07, "loss": 2.0001, "step": 29810 }, { "epoch": 0.9618177455228861, "grad_norm": 0.328125, "learning_rate": 1.1538604515948181e-07, "loss": 2.0023, "step": 29811 }, { "epoch": 0.9618500093766825, "grad_norm": 0.33984375, "learning_rate": 1.1519130394432476e-07, "loss": 2.0084, "step": 29812 }, { "epoch": 0.9618822732304788, "grad_norm": 0.328125, "learning_rate": 1.1499672657046322e-07, "loss": 2.0241, "step": 29813 }, { "epoch": 0.9619145370842752, "grad_norm": 0.33984375, "learning_rate": 1.1480231304003886e-07, "loss": 2.0205, "step": 29814 }, { "epoch": 0.9619468009380715, "grad_norm": 0.330078125, "learning_rate": 1.1460806335519159e-07, "loss": 1.9848, "step": 29815 }, { "epoch": 0.9619790647918679, "grad_norm": 0.33984375, "learning_rate": 1.1441397751806137e-07, "loss": 1.9746, "step": 29816 }, { "epoch": 0.9620113286456642, "grad_norm": 0.33203125, "learning_rate": 1.1422005553078319e-07, "loss": 1.9987, "step": 29817 }, { "epoch": 0.9620435924994606, "grad_norm": 0.326171875, "learning_rate": 1.1402629739549031e-07, "loss": 1.9773, "step": 29818 }, { "epoch": 0.962075856353257, "grad_norm": 0.333984375, "learning_rate": 1.138327031143177e-07, "loss": 1.9767, "step": 29819 }, { "epoch": 0.9621081202070533, "grad_norm": 0.333984375, "learning_rate": 1.1363927268939533e-07, "loss": 2.0003, "step": 29820 }, { "epoch": 0.9621403840608497, "grad_norm": 0.328125, "learning_rate": 1.1344600612285316e-07, "loss": 1.9773, "step": 29821 }, { "epoch": 0.962172647914646, "grad_norm": 0.328125, "learning_rate": 1.1325290341681616e-07, "loss": 2.0007, "step": 29822 }, { "epoch": 0.9622049117684424, "grad_norm": 0.326171875, "learning_rate": 1.1305996457341261e-07, "loss": 1.9758, "step": 29823 }, { "epoch": 0.9622371756222386, "grad_norm": 0.33203125, "learning_rate": 1.1286718959476583e-07, "loss": 1.9908, "step": 29824 }, { "epoch": 0.962269439476035, "grad_norm": 0.326171875, "learning_rate": 1.1267457848299579e-07, "loss": 1.9784, "step": 29825 }, { "epoch": 0.9623017033298313, "grad_norm": 0.326171875, "learning_rate": 1.124821312402241e-07, "loss": 1.9599, "step": 29826 }, { "epoch": 0.9623339671836277, "grad_norm": 0.33984375, "learning_rate": 1.1228984786856911e-07, "loss": 1.9972, "step": 29827 }, { "epoch": 0.962366231037424, "grad_norm": 0.328125, "learning_rate": 1.1209772837014576e-07, "loss": 1.9979, "step": 29828 }, { "epoch": 0.9623984948912204, "grad_norm": 0.337890625, "learning_rate": 1.119057727470707e-07, "loss": 1.9901, "step": 29829 }, { "epoch": 0.9624307587450167, "grad_norm": 0.32421875, "learning_rate": 1.1171398100145891e-07, "loss": 2.0208, "step": 29830 }, { "epoch": 0.9624630225988131, "grad_norm": 0.330078125, "learning_rate": 1.1152235313541536e-07, "loss": 1.9794, "step": 29831 }, { "epoch": 0.9624952864526094, "grad_norm": 0.326171875, "learning_rate": 1.1133088915105338e-07, "loss": 1.9873, "step": 29832 }, { "epoch": 0.9625275503064058, "grad_norm": 0.33203125, "learning_rate": 1.1113958905048128e-07, "loss": 2.0183, "step": 29833 }, { "epoch": 0.9625598141602021, "grad_norm": 0.33203125, "learning_rate": 1.1094845283580236e-07, "loss": 1.9903, "step": 29834 }, { "epoch": 0.9625920780139985, "grad_norm": 0.330078125, "learning_rate": 1.1075748050912326e-07, "loss": 2.0063, "step": 29835 }, { "epoch": 0.9626243418677948, "grad_norm": 0.337890625, "learning_rate": 1.1056667207254234e-07, "loss": 2.0087, "step": 29836 }, { "epoch": 0.9626566057215912, "grad_norm": 0.33203125, "learning_rate": 1.1037602752816123e-07, "loss": 1.9903, "step": 29837 }, { "epoch": 0.9626888695753876, "grad_norm": 0.33203125, "learning_rate": 1.1018554687808157e-07, "loss": 2.0141, "step": 29838 }, { "epoch": 0.9627211334291839, "grad_norm": 0.33203125, "learning_rate": 1.0999523012439671e-07, "loss": 2.0147, "step": 29839 }, { "epoch": 0.9627533972829803, "grad_norm": 0.34765625, "learning_rate": 1.0980507726919997e-07, "loss": 2.027, "step": 29840 }, { "epoch": 0.9627856611367765, "grad_norm": 0.3359375, "learning_rate": 1.0961508831458966e-07, "loss": 1.9654, "step": 29841 }, { "epoch": 0.962817924990573, "grad_norm": 0.330078125, "learning_rate": 1.0942526326265245e-07, "loss": 1.9841, "step": 29842 }, { "epoch": 0.9628501888443692, "grad_norm": 0.333984375, "learning_rate": 1.0923560211548001e-07, "loss": 1.9592, "step": 29843 }, { "epoch": 0.9628824526981656, "grad_norm": 0.337890625, "learning_rate": 1.0904610487515898e-07, "loss": 2.0182, "step": 29844 }, { "epoch": 0.9629147165519619, "grad_norm": 0.32421875, "learning_rate": 1.088567715437777e-07, "loss": 2.0106, "step": 29845 }, { "epoch": 0.9629469804057583, "grad_norm": 0.330078125, "learning_rate": 1.086676021234162e-07, "loss": 2.0117, "step": 29846 }, { "epoch": 0.9629792442595546, "grad_norm": 0.33203125, "learning_rate": 1.0847859661615945e-07, "loss": 1.9922, "step": 29847 }, { "epoch": 0.963011508113351, "grad_norm": 0.330078125, "learning_rate": 1.0828975502408744e-07, "loss": 1.9847, "step": 29848 }, { "epoch": 0.9630437719671473, "grad_norm": 0.333984375, "learning_rate": 1.0810107734927688e-07, "loss": 1.9827, "step": 29849 }, { "epoch": 0.9630760358209437, "grad_norm": 0.326171875, "learning_rate": 1.0791256359380775e-07, "loss": 1.9794, "step": 29850 }, { "epoch": 0.96310829967474, "grad_norm": 0.3359375, "learning_rate": 1.0772421375975339e-07, "loss": 1.9669, "step": 29851 }, { "epoch": 0.9631405635285364, "grad_norm": 0.337890625, "learning_rate": 1.0753602784918715e-07, "loss": 1.9905, "step": 29852 }, { "epoch": 0.9631728273823327, "grad_norm": 0.3359375, "learning_rate": 1.073480058641807e-07, "loss": 1.9681, "step": 29853 }, { "epoch": 0.9632050912361291, "grad_norm": 0.341796875, "learning_rate": 1.071601478068024e-07, "loss": 1.9544, "step": 29854 }, { "epoch": 0.9632373550899254, "grad_norm": 0.337890625, "learning_rate": 1.0697245367912223e-07, "loss": 1.9862, "step": 29855 }, { "epoch": 0.9632696189437218, "grad_norm": 0.33984375, "learning_rate": 1.0678492348320523e-07, "loss": 1.9688, "step": 29856 }, { "epoch": 0.963301882797518, "grad_norm": 0.328125, "learning_rate": 1.0659755722111476e-07, "loss": 1.9742, "step": 29857 }, { "epoch": 0.9633341466513144, "grad_norm": 0.328125, "learning_rate": 1.0641035489491413e-07, "loss": 2.0183, "step": 29858 }, { "epoch": 0.9633664105051108, "grad_norm": 0.33203125, "learning_rate": 1.0622331650666339e-07, "loss": 1.9596, "step": 29859 }, { "epoch": 0.9633986743589071, "grad_norm": 0.330078125, "learning_rate": 1.0603644205842256e-07, "loss": 1.9662, "step": 29860 }, { "epoch": 0.9634309382127035, "grad_norm": 0.337890625, "learning_rate": 1.058497315522483e-07, "loss": 2.0119, "step": 29861 }, { "epoch": 0.9634632020664998, "grad_norm": 0.328125, "learning_rate": 1.0566318499019235e-07, "loss": 2.0186, "step": 29862 }, { "epoch": 0.9634954659202962, "grad_norm": 0.333984375, "learning_rate": 1.0547680237431301e-07, "loss": 1.9838, "step": 29863 }, { "epoch": 0.9635277297740925, "grad_norm": 0.349609375, "learning_rate": 1.0529058370666034e-07, "loss": 2.0003, "step": 29864 }, { "epoch": 0.9635599936278889, "grad_norm": 0.349609375, "learning_rate": 1.0510452898928102e-07, "loss": 2.0071, "step": 29865 }, { "epoch": 0.9635922574816852, "grad_norm": 0.330078125, "learning_rate": 1.0491863822422676e-07, "loss": 1.9724, "step": 29866 }, { "epoch": 0.9636245213354816, "grad_norm": 0.33984375, "learning_rate": 1.0473291141354425e-07, "loss": 2.0014, "step": 29867 }, { "epoch": 0.9636567851892779, "grad_norm": 0.328125, "learning_rate": 1.0454734855927351e-07, "loss": 1.9851, "step": 29868 }, { "epoch": 0.9636890490430743, "grad_norm": 0.32421875, "learning_rate": 1.0436194966345958e-07, "loss": 1.9737, "step": 29869 }, { "epoch": 0.9637213128968706, "grad_norm": 0.328125, "learning_rate": 1.0417671472814583e-07, "loss": 1.9731, "step": 29870 }, { "epoch": 0.963753576750667, "grad_norm": 0.328125, "learning_rate": 1.0399164375536562e-07, "loss": 1.9718, "step": 29871 }, { "epoch": 0.9637858406044633, "grad_norm": 0.333984375, "learning_rate": 1.0380673674716068e-07, "loss": 2.002, "step": 29872 }, { "epoch": 0.9638181044582597, "grad_norm": 0.326171875, "learning_rate": 1.0362199370556435e-07, "loss": 1.992, "step": 29873 }, { "epoch": 0.963850368312056, "grad_norm": 0.33203125, "learning_rate": 1.0343741463261003e-07, "loss": 1.9614, "step": 29874 }, { "epoch": 0.9638826321658523, "grad_norm": 0.328125, "learning_rate": 1.0325299953033107e-07, "loss": 1.9955, "step": 29875 }, { "epoch": 0.9639148960196486, "grad_norm": 0.328125, "learning_rate": 1.0306874840075586e-07, "loss": 1.9972, "step": 29876 }, { "epoch": 0.963947159873445, "grad_norm": 0.337890625, "learning_rate": 1.0288466124591112e-07, "loss": 1.9836, "step": 29877 }, { "epoch": 0.9639794237272413, "grad_norm": 0.328125, "learning_rate": 1.0270073806782687e-07, "loss": 1.9891, "step": 29878 }, { "epoch": 0.9640116875810377, "grad_norm": 0.3359375, "learning_rate": 1.0251697886852485e-07, "loss": 2.0058, "step": 29879 }, { "epoch": 0.9640439514348341, "grad_norm": 0.341796875, "learning_rate": 1.0233338365002842e-07, "loss": 1.9891, "step": 29880 }, { "epoch": 0.9640762152886304, "grad_norm": 0.341796875, "learning_rate": 1.0214995241435932e-07, "loss": 1.9914, "step": 29881 }, { "epoch": 0.9641084791424268, "grad_norm": 0.333984375, "learning_rate": 1.0196668516353591e-07, "loss": 2.0068, "step": 29882 }, { "epoch": 0.9641407429962231, "grad_norm": 0.330078125, "learning_rate": 1.0178358189957326e-07, "loss": 1.9706, "step": 29883 }, { "epoch": 0.9641730068500195, "grad_norm": 0.326171875, "learning_rate": 1.0160064262449142e-07, "loss": 1.9408, "step": 29884 }, { "epoch": 0.9642052707038158, "grad_norm": 0.326171875, "learning_rate": 1.0141786734030045e-07, "loss": 2.008, "step": 29885 }, { "epoch": 0.9642375345576122, "grad_norm": 0.33984375, "learning_rate": 1.0123525604901374e-07, "loss": 2.0113, "step": 29886 }, { "epoch": 0.9642697984114085, "grad_norm": 0.328125, "learning_rate": 1.0105280875264133e-07, "loss": 1.9833, "step": 29887 }, { "epoch": 0.9643020622652049, "grad_norm": 0.333984375, "learning_rate": 1.0087052545318997e-07, "loss": 1.9891, "step": 29888 }, { "epoch": 0.9643343261190012, "grad_norm": 0.33203125, "learning_rate": 1.0068840615266972e-07, "loss": 1.9611, "step": 29889 }, { "epoch": 0.9643665899727976, "grad_norm": 0.330078125, "learning_rate": 1.0050645085308063e-07, "loss": 2.006, "step": 29890 }, { "epoch": 0.9643988538265938, "grad_norm": 0.330078125, "learning_rate": 1.0032465955642778e-07, "loss": 1.9874, "step": 29891 }, { "epoch": 0.9644311176803902, "grad_norm": 0.337890625, "learning_rate": 1.0014303226471288e-07, "loss": 2.0114, "step": 29892 }, { "epoch": 0.9644633815341865, "grad_norm": 0.3359375, "learning_rate": 9.996156897993436e-08, "loss": 1.9902, "step": 29893 }, { "epoch": 0.9644956453879829, "grad_norm": 0.33203125, "learning_rate": 9.978026970408893e-08, "loss": 1.9836, "step": 29894 }, { "epoch": 0.9645279092417792, "grad_norm": 0.330078125, "learning_rate": 9.959913443917335e-08, "loss": 1.9944, "step": 29895 }, { "epoch": 0.9645601730955756, "grad_norm": 0.333984375, "learning_rate": 9.9418163187181e-08, "loss": 1.971, "step": 29896 }, { "epoch": 0.9645924369493719, "grad_norm": 0.33203125, "learning_rate": 9.923735595010364e-08, "loss": 1.944, "step": 29897 }, { "epoch": 0.9646247008031683, "grad_norm": 0.345703125, "learning_rate": 9.9056712729933e-08, "loss": 2.0204, "step": 29898 }, { "epoch": 0.9646569646569647, "grad_norm": 0.326171875, "learning_rate": 9.887623352865416e-08, "loss": 1.9891, "step": 29899 }, { "epoch": 0.964689228510761, "grad_norm": 0.3359375, "learning_rate": 9.869591834825719e-08, "loss": 1.9872, "step": 29900 }, { "epoch": 0.9647214923645574, "grad_norm": 0.34765625, "learning_rate": 9.851576719072553e-08, "loss": 1.9986, "step": 29901 }, { "epoch": 0.9647537562183537, "grad_norm": 0.345703125, "learning_rate": 9.833578005804089e-08, "loss": 1.9839, "step": 29902 }, { "epoch": 0.9647860200721501, "grad_norm": 0.33984375, "learning_rate": 9.815595695218504e-08, "loss": 1.9555, "step": 29903 }, { "epoch": 0.9648182839259464, "grad_norm": 0.337890625, "learning_rate": 9.797629787513973e-08, "loss": 1.9953, "step": 29904 }, { "epoch": 0.9648505477797428, "grad_norm": 0.330078125, "learning_rate": 9.779680282888003e-08, "loss": 1.9697, "step": 29905 }, { "epoch": 0.9648828116335391, "grad_norm": 0.326171875, "learning_rate": 9.761747181538105e-08, "loss": 1.987, "step": 29906 }, { "epoch": 0.9649150754873355, "grad_norm": 0.32421875, "learning_rate": 9.743830483661953e-08, "loss": 1.9698, "step": 29907 }, { "epoch": 0.9649473393411317, "grad_norm": 0.33203125, "learning_rate": 9.725930189456556e-08, "loss": 1.9878, "step": 29908 }, { "epoch": 0.9649796031949281, "grad_norm": 0.33203125, "learning_rate": 9.708046299119089e-08, "loss": 1.9645, "step": 29909 }, { "epoch": 0.9650118670487244, "grad_norm": 0.333984375, "learning_rate": 9.690178812846229e-08, "loss": 1.9904, "step": 29910 }, { "epoch": 0.9650441309025208, "grad_norm": 0.33984375, "learning_rate": 9.672327730834651e-08, "loss": 1.989, "step": 29911 }, { "epoch": 0.9650763947563171, "grad_norm": 0.3515625, "learning_rate": 9.654493053281033e-08, "loss": 1.9758, "step": 29912 }, { "epoch": 0.9651086586101135, "grad_norm": 0.33203125, "learning_rate": 9.636674780381715e-08, "loss": 1.9933, "step": 29913 }, { "epoch": 0.9651409224639098, "grad_norm": 0.33203125, "learning_rate": 9.618872912332544e-08, "loss": 2.001, "step": 29914 }, { "epoch": 0.9651731863177062, "grad_norm": 0.333984375, "learning_rate": 9.601087449329526e-08, "loss": 2.008, "step": 29915 }, { "epoch": 0.9652054501715025, "grad_norm": 0.33984375, "learning_rate": 9.583318391568674e-08, "loss": 1.9661, "step": 29916 }, { "epoch": 0.9652377140252989, "grad_norm": 0.330078125, "learning_rate": 9.565565739245496e-08, "loss": 2.0002, "step": 29917 }, { "epoch": 0.9652699778790952, "grad_norm": 0.328125, "learning_rate": 9.547829492555171e-08, "loss": 2.0028, "step": 29918 }, { "epoch": 0.9653022417328916, "grad_norm": 0.337890625, "learning_rate": 9.530109651693209e-08, "loss": 1.9981, "step": 29919 }, { "epoch": 0.965334505586688, "grad_norm": 0.33203125, "learning_rate": 9.512406216854452e-08, "loss": 1.9873, "step": 29920 }, { "epoch": 0.9653667694404843, "grad_norm": 0.33203125, "learning_rate": 9.494719188233913e-08, "loss": 2.0174, "step": 29921 }, { "epoch": 0.9653990332942807, "grad_norm": 0.3359375, "learning_rate": 9.477048566026269e-08, "loss": 1.9806, "step": 29922 }, { "epoch": 0.965431297148077, "grad_norm": 0.330078125, "learning_rate": 9.459394350426031e-08, "loss": 2.0099, "step": 29923 }, { "epoch": 0.9654635610018734, "grad_norm": 0.333984375, "learning_rate": 9.441756541627211e-08, "loss": 1.9816, "step": 29924 }, { "epoch": 0.9654958248556696, "grad_norm": 0.33203125, "learning_rate": 9.424135139824485e-08, "loss": 1.9767, "step": 29925 }, { "epoch": 0.965528088709466, "grad_norm": 0.353515625, "learning_rate": 9.406530145211368e-08, "loss": 1.9815, "step": 29926 }, { "epoch": 0.9655603525632623, "grad_norm": 0.326171875, "learning_rate": 9.388941557981867e-08, "loss": 1.9977, "step": 29927 }, { "epoch": 0.9655926164170587, "grad_norm": 0.33984375, "learning_rate": 9.371369378329497e-08, "loss": 1.9938, "step": 29928 }, { "epoch": 0.965624880270855, "grad_norm": 0.34375, "learning_rate": 9.353813606447937e-08, "loss": 1.9869, "step": 29929 }, { "epoch": 0.9656571441246514, "grad_norm": 0.33984375, "learning_rate": 9.336274242529864e-08, "loss": 2.0125, "step": 29930 }, { "epoch": 0.9656894079784477, "grad_norm": 0.3359375, "learning_rate": 9.318751286768956e-08, "loss": 2.0109, "step": 29931 }, { "epoch": 0.9657216718322441, "grad_norm": 0.337890625, "learning_rate": 9.301244739357728e-08, "loss": 1.9856, "step": 29932 }, { "epoch": 0.9657539356860404, "grad_norm": 0.34375, "learning_rate": 9.283754600489025e-08, "loss": 1.9496, "step": 29933 }, { "epoch": 0.9657861995398368, "grad_norm": 0.328125, "learning_rate": 9.266280870355192e-08, "loss": 1.9845, "step": 29934 }, { "epoch": 0.9658184633936331, "grad_norm": 0.3359375, "learning_rate": 9.24882354914891e-08, "loss": 2.0036, "step": 29935 }, { "epoch": 0.9658507272474295, "grad_norm": 0.330078125, "learning_rate": 9.231382637061858e-08, "loss": 1.9683, "step": 29936 }, { "epoch": 0.9658829911012258, "grad_norm": 0.337890625, "learning_rate": 9.213958134286381e-08, "loss": 1.984, "step": 29937 }, { "epoch": 0.9659152549550222, "grad_norm": 0.333984375, "learning_rate": 9.196550041014328e-08, "loss": 1.9782, "step": 29938 }, { "epoch": 0.9659475188088186, "grad_norm": 0.33984375, "learning_rate": 9.179158357436878e-08, "loss": 1.995, "step": 29939 }, { "epoch": 0.9659797826626149, "grad_norm": 0.326171875, "learning_rate": 9.161783083745879e-08, "loss": 2.0057, "step": 29940 }, { "epoch": 0.9660120465164113, "grad_norm": 0.33203125, "learning_rate": 9.144424220132508e-08, "loss": 2.0059, "step": 29941 }, { "epoch": 0.9660443103702075, "grad_norm": 0.326171875, "learning_rate": 9.127081766787614e-08, "loss": 1.9857, "step": 29942 }, { "epoch": 0.966076574224004, "grad_norm": 0.333984375, "learning_rate": 9.10975572390238e-08, "loss": 1.9357, "step": 29943 }, { "epoch": 0.9661088380778002, "grad_norm": 0.33984375, "learning_rate": 9.092446091667483e-08, "loss": 2.0039, "step": 29944 }, { "epoch": 0.9661411019315966, "grad_norm": 0.337890625, "learning_rate": 9.075152870273273e-08, "loss": 1.9926, "step": 29945 }, { "epoch": 0.9661733657853929, "grad_norm": 0.33984375, "learning_rate": 9.057876059910097e-08, "loss": 1.985, "step": 29946 }, { "epoch": 0.9662056296391893, "grad_norm": 0.3359375, "learning_rate": 9.040615660768303e-08, "loss": 2.0036, "step": 29947 }, { "epoch": 0.9662378934929856, "grad_norm": 0.333984375, "learning_rate": 9.023371673037905e-08, "loss": 1.9828, "step": 29948 }, { "epoch": 0.966270157346782, "grad_norm": 0.330078125, "learning_rate": 9.00614409690842e-08, "loss": 1.9685, "step": 29949 }, { "epoch": 0.9663024212005783, "grad_norm": 0.326171875, "learning_rate": 8.98893293256986e-08, "loss": 1.9752, "step": 29950 }, { "epoch": 0.9663346850543747, "grad_norm": 0.337890625, "learning_rate": 8.971738180211409e-08, "loss": 1.9811, "step": 29951 }, { "epoch": 0.966366948908171, "grad_norm": 0.326171875, "learning_rate": 8.954559840022247e-08, "loss": 1.926, "step": 29952 }, { "epoch": 0.9663992127619674, "grad_norm": 0.33984375, "learning_rate": 8.937397912191724e-08, "loss": 1.9665, "step": 29953 }, { "epoch": 0.9664314766157637, "grad_norm": 0.33203125, "learning_rate": 8.920252396908857e-08, "loss": 1.9727, "step": 29954 }, { "epoch": 0.9664637404695601, "grad_norm": 0.330078125, "learning_rate": 8.903123294361825e-08, "loss": 1.9748, "step": 29955 }, { "epoch": 0.9664960043233564, "grad_norm": 0.328125, "learning_rate": 8.886010604739647e-08, "loss": 1.9846, "step": 29956 }, { "epoch": 0.9665282681771528, "grad_norm": 0.330078125, "learning_rate": 8.868914328230504e-08, "loss": 1.9885, "step": 29957 }, { "epoch": 0.966560532030949, "grad_norm": 0.337890625, "learning_rate": 8.851834465022745e-08, "loss": 1.9562, "step": 29958 }, { "epoch": 0.9665927958847454, "grad_norm": 0.33984375, "learning_rate": 8.834771015304055e-08, "loss": 1.9717, "step": 29959 }, { "epoch": 0.9666250597385418, "grad_norm": 0.328125, "learning_rate": 8.817723979262616e-08, "loss": 1.9954, "step": 29960 }, { "epoch": 0.9666573235923381, "grad_norm": 0.330078125, "learning_rate": 8.800693357085776e-08, "loss": 1.9836, "step": 29961 }, { "epoch": 0.9666895874461345, "grad_norm": 0.328125, "learning_rate": 8.78367914896122e-08, "loss": 1.9965, "step": 29962 }, { "epoch": 0.9667218512999308, "grad_norm": 0.328125, "learning_rate": 8.766681355076133e-08, "loss": 1.981, "step": 29963 }, { "epoch": 0.9667541151537272, "grad_norm": 0.33203125, "learning_rate": 8.749699975617531e-08, "loss": 2.0075, "step": 29964 }, { "epoch": 0.9667863790075235, "grad_norm": 0.32421875, "learning_rate": 8.73273501077243e-08, "loss": 1.951, "step": 29965 }, { "epoch": 0.9668186428613199, "grad_norm": 0.337890625, "learning_rate": 8.715786460727682e-08, "loss": 1.9363, "step": 29966 }, { "epoch": 0.9668509067151162, "grad_norm": 0.330078125, "learning_rate": 8.69885432566947e-08, "loss": 2.013, "step": 29967 }, { "epoch": 0.9668831705689126, "grad_norm": 0.328125, "learning_rate": 8.681938605784645e-08, "loss": 1.9855, "step": 29968 }, { "epoch": 0.9669154344227089, "grad_norm": 0.330078125, "learning_rate": 8.665039301259058e-08, "loss": 2.0077, "step": 29969 }, { "epoch": 0.9669476982765053, "grad_norm": 0.33203125, "learning_rate": 8.64815641227873e-08, "loss": 1.9585, "step": 29970 }, { "epoch": 0.9669799621303016, "grad_norm": 0.33203125, "learning_rate": 8.63128993902984e-08, "loss": 1.9891, "step": 29971 }, { "epoch": 0.967012225984098, "grad_norm": 0.345703125, "learning_rate": 8.614439881697745e-08, "loss": 1.9897, "step": 29972 }, { "epoch": 0.9670444898378943, "grad_norm": 0.330078125, "learning_rate": 8.597606240467792e-08, "loss": 1.9963, "step": 29973 }, { "epoch": 0.9670767536916907, "grad_norm": 0.33203125, "learning_rate": 8.58078901552567e-08, "loss": 1.9879, "step": 29974 }, { "epoch": 0.967109017545487, "grad_norm": 0.345703125, "learning_rate": 8.563988207056228e-08, "loss": 1.9659, "step": 29975 }, { "epoch": 0.9671412813992833, "grad_norm": 0.326171875, "learning_rate": 8.547203815244487e-08, "loss": 1.9743, "step": 29976 }, { "epoch": 0.9671735452530796, "grad_norm": 0.33203125, "learning_rate": 8.53043584027513e-08, "loss": 1.9804, "step": 29977 }, { "epoch": 0.967205809106876, "grad_norm": 0.330078125, "learning_rate": 8.513684282332678e-08, "loss": 1.9903, "step": 29978 }, { "epoch": 0.9672380729606723, "grad_norm": 0.326171875, "learning_rate": 8.49694914160165e-08, "loss": 1.9747, "step": 29979 }, { "epoch": 0.9672703368144687, "grad_norm": 0.3359375, "learning_rate": 8.480230418266232e-08, "loss": 2.0381, "step": 29980 }, { "epoch": 0.9673026006682651, "grad_norm": 0.328125, "learning_rate": 8.463528112510444e-08, "loss": 1.9969, "step": 29981 }, { "epoch": 0.9673348645220614, "grad_norm": 0.333984375, "learning_rate": 8.446842224518137e-08, "loss": 1.939, "step": 29982 }, { "epoch": 0.9673671283758578, "grad_norm": 0.330078125, "learning_rate": 8.430172754472832e-08, "loss": 1.9964, "step": 29983 }, { "epoch": 0.9673993922296541, "grad_norm": 0.333984375, "learning_rate": 8.413519702558215e-08, "loss": 1.9859, "step": 29984 }, { "epoch": 0.9674316560834505, "grad_norm": 0.33203125, "learning_rate": 8.39688306895764e-08, "loss": 1.99, "step": 29985 }, { "epoch": 0.9674639199372468, "grad_norm": 0.330078125, "learning_rate": 8.380262853853792e-08, "loss": 2.0062, "step": 29986 }, { "epoch": 0.9674961837910432, "grad_norm": 0.34375, "learning_rate": 8.363659057430195e-08, "loss": 1.9656, "step": 29987 }, { "epoch": 0.9675284476448395, "grad_norm": 0.330078125, "learning_rate": 8.347071679869367e-08, "loss": 2.0035, "step": 29988 }, { "epoch": 0.9675607114986359, "grad_norm": 0.333984375, "learning_rate": 8.330500721353662e-08, "loss": 1.9921, "step": 29989 }, { "epoch": 0.9675929753524322, "grad_norm": 0.326171875, "learning_rate": 8.313946182065768e-08, "loss": 1.9866, "step": 29990 }, { "epoch": 0.9676252392062286, "grad_norm": 0.33984375, "learning_rate": 8.297408062187872e-08, "loss": 1.9886, "step": 29991 }, { "epoch": 0.9676575030600248, "grad_norm": 0.330078125, "learning_rate": 8.28088636190183e-08, "loss": 2.0103, "step": 29992 }, { "epoch": 0.9676897669138212, "grad_norm": 0.34765625, "learning_rate": 8.264381081389827e-08, "loss": 1.9994, "step": 29993 }, { "epoch": 0.9677220307676175, "grad_norm": 0.318359375, "learning_rate": 8.24789222083322e-08, "loss": 2.0104, "step": 29994 }, { "epoch": 0.9677542946214139, "grad_norm": 0.330078125, "learning_rate": 8.231419780413695e-08, "loss": 1.989, "step": 29995 }, { "epoch": 0.9677865584752102, "grad_norm": 0.35546875, "learning_rate": 8.21496376031261e-08, "loss": 2.0137, "step": 29996 }, { "epoch": 0.9678188223290066, "grad_norm": 0.328125, "learning_rate": 8.198524160710818e-08, "loss": 1.9823, "step": 29997 }, { "epoch": 0.9678510861828029, "grad_norm": 0.326171875, "learning_rate": 8.182100981789509e-08, "loss": 1.9736, "step": 29998 }, { "epoch": 0.9678833500365993, "grad_norm": 0.330078125, "learning_rate": 8.16569422372937e-08, "loss": 1.9671, "step": 29999 }, { "epoch": 0.9679156138903957, "grad_norm": 0.33203125, "learning_rate": 8.149303886711257e-08, "loss": 1.9978, "step": 30000 }, { "epoch": 0.967947877744192, "grad_norm": 0.3359375, "learning_rate": 8.132929970915026e-08, "loss": 1.9632, "step": 30001 }, { "epoch": 0.9679801415979884, "grad_norm": 0.330078125, "learning_rate": 8.116572476521533e-08, "loss": 2.0198, "step": 30002 }, { "epoch": 0.9680124054517847, "grad_norm": 0.349609375, "learning_rate": 8.100231403710301e-08, "loss": 1.9989, "step": 30003 }, { "epoch": 0.9680446693055811, "grad_norm": 0.33203125, "learning_rate": 8.083906752661518e-08, "loss": 1.9702, "step": 30004 }, { "epoch": 0.9680769331593774, "grad_norm": 0.322265625, "learning_rate": 8.067598523554876e-08, "loss": 1.9759, "step": 30005 }, { "epoch": 0.9681091970131738, "grad_norm": 0.326171875, "learning_rate": 8.051306716569728e-08, "loss": 1.9982, "step": 30006 }, { "epoch": 0.9681414608669701, "grad_norm": 0.3359375, "learning_rate": 8.035031331885601e-08, "loss": 2.0312, "step": 30007 }, { "epoch": 0.9681737247207665, "grad_norm": 0.3359375, "learning_rate": 8.018772369681348e-08, "loss": 1.9925, "step": 30008 }, { "epoch": 0.9682059885745627, "grad_norm": 0.32421875, "learning_rate": 8.002529830136163e-08, "loss": 1.9808, "step": 30009 }, { "epoch": 0.9682382524283591, "grad_norm": 0.32421875, "learning_rate": 7.986303713428733e-08, "loss": 2.0016, "step": 30010 }, { "epoch": 0.9682705162821554, "grad_norm": 0.328125, "learning_rate": 7.97009401973775e-08, "loss": 1.9707, "step": 30011 }, { "epoch": 0.9683027801359518, "grad_norm": 0.33203125, "learning_rate": 7.953900749241738e-08, "loss": 1.9814, "step": 30012 }, { "epoch": 0.9683350439897481, "grad_norm": 0.333984375, "learning_rate": 7.937723902118554e-08, "loss": 2.0151, "step": 30013 }, { "epoch": 0.9683673078435445, "grad_norm": 0.33203125, "learning_rate": 7.921563478546556e-08, "loss": 1.9931, "step": 30014 }, { "epoch": 0.9683995716973408, "grad_norm": 0.33203125, "learning_rate": 7.905419478703769e-08, "loss": 1.9902, "step": 30015 }, { "epoch": 0.9684318355511372, "grad_norm": 0.33203125, "learning_rate": 7.889291902767548e-08, "loss": 2.0046, "step": 30016 }, { "epoch": 0.9684640994049335, "grad_norm": 0.337890625, "learning_rate": 7.873180750915421e-08, "loss": 1.9917, "step": 30017 }, { "epoch": 0.9684963632587299, "grad_norm": 0.33203125, "learning_rate": 7.857086023325078e-08, "loss": 1.9423, "step": 30018 }, { "epoch": 0.9685286271125262, "grad_norm": 0.326171875, "learning_rate": 7.841007720173543e-08, "loss": 1.9868, "step": 30019 }, { "epoch": 0.9685608909663226, "grad_norm": 0.328125, "learning_rate": 7.82494584163751e-08, "loss": 1.9988, "step": 30020 }, { "epoch": 0.968593154820119, "grad_norm": 0.328125, "learning_rate": 7.808900387894169e-08, "loss": 1.9923, "step": 30021 }, { "epoch": 0.9686254186739153, "grad_norm": 0.33203125, "learning_rate": 7.79287135911988e-08, "loss": 1.9942, "step": 30022 }, { "epoch": 0.9686576825277117, "grad_norm": 0.330078125, "learning_rate": 7.776858755491001e-08, "loss": 1.9819, "step": 30023 }, { "epoch": 0.968689946381508, "grad_norm": 0.3359375, "learning_rate": 7.760862577184225e-08, "loss": 1.9699, "step": 30024 }, { "epoch": 0.9687222102353044, "grad_norm": 0.3359375, "learning_rate": 7.744882824375244e-08, "loss": 1.9561, "step": 30025 }, { "epoch": 0.9687544740891006, "grad_norm": 0.326171875, "learning_rate": 7.728919497239917e-08, "loss": 1.9991, "step": 30026 }, { "epoch": 0.968786737942897, "grad_norm": 0.32421875, "learning_rate": 7.71297259595427e-08, "loss": 1.9562, "step": 30027 }, { "epoch": 0.9688190017966933, "grad_norm": 0.337890625, "learning_rate": 7.697042120693665e-08, "loss": 1.9931, "step": 30028 }, { "epoch": 0.9688512656504897, "grad_norm": 0.333984375, "learning_rate": 7.681128071633293e-08, "loss": 2.0036, "step": 30029 }, { "epoch": 0.968883529504286, "grad_norm": 0.330078125, "learning_rate": 7.665230448948513e-08, "loss": 1.9925, "step": 30030 }, { "epoch": 0.9689157933580824, "grad_norm": 0.333984375, "learning_rate": 7.649349252814353e-08, "loss": 1.9846, "step": 30031 }, { "epoch": 0.9689480572118787, "grad_norm": 0.328125, "learning_rate": 7.633484483405506e-08, "loss": 1.9954, "step": 30032 }, { "epoch": 0.9689803210656751, "grad_norm": 0.328125, "learning_rate": 7.617636140896666e-08, "loss": 1.9989, "step": 30033 }, { "epoch": 0.9690125849194714, "grad_norm": 0.33203125, "learning_rate": 7.601804225462362e-08, "loss": 1.9978, "step": 30034 }, { "epoch": 0.9690448487732678, "grad_norm": 0.328125, "learning_rate": 7.58598873727645e-08, "loss": 1.968, "step": 30035 }, { "epoch": 0.9690771126270641, "grad_norm": 0.333984375, "learning_rate": 7.570189676513627e-08, "loss": 2.0008, "step": 30036 }, { "epoch": 0.9691093764808605, "grad_norm": 0.328125, "learning_rate": 7.554407043347423e-08, "loss": 2.0038, "step": 30037 }, { "epoch": 0.9691416403346568, "grad_norm": 0.326171875, "learning_rate": 7.538640837951526e-08, "loss": 1.9899, "step": 30038 }, { "epoch": 0.9691739041884532, "grad_norm": 0.3359375, "learning_rate": 7.522891060499638e-08, "loss": 1.9845, "step": 30039 }, { "epoch": 0.9692061680422496, "grad_norm": 0.328125, "learning_rate": 7.507157711165113e-08, "loss": 1.9714, "step": 30040 }, { "epoch": 0.9692384318960459, "grad_norm": 0.330078125, "learning_rate": 7.491440790120985e-08, "loss": 1.9981, "step": 30041 }, { "epoch": 0.9692706957498423, "grad_norm": 0.33203125, "learning_rate": 7.475740297540446e-08, "loss": 2.0113, "step": 30042 }, { "epoch": 0.9693029596036385, "grad_norm": 0.33203125, "learning_rate": 7.460056233596191e-08, "loss": 2.011, "step": 30043 }, { "epoch": 0.969335223457435, "grad_norm": 0.32421875, "learning_rate": 7.444388598460916e-08, "loss": 1.9802, "step": 30044 }, { "epoch": 0.9693674873112312, "grad_norm": 0.337890625, "learning_rate": 7.428737392306984e-08, "loss": 2.0088, "step": 30045 }, { "epoch": 0.9693997511650276, "grad_norm": 0.330078125, "learning_rate": 7.413102615306755e-08, "loss": 1.9837, "step": 30046 }, { "epoch": 0.9694320150188239, "grad_norm": 0.33203125, "learning_rate": 7.397484267632426e-08, "loss": 2.0266, "step": 30047 }, { "epoch": 0.9694642788726203, "grad_norm": 0.337890625, "learning_rate": 7.381882349455693e-08, "loss": 1.994, "step": 30048 }, { "epoch": 0.9694965427264166, "grad_norm": 0.330078125, "learning_rate": 7.366296860948418e-08, "loss": 1.9831, "step": 30049 }, { "epoch": 0.969528806580213, "grad_norm": 0.33203125, "learning_rate": 7.350727802282297e-08, "loss": 1.9849, "step": 30050 }, { "epoch": 0.9695610704340093, "grad_norm": 0.333984375, "learning_rate": 7.335175173628362e-08, "loss": 1.9578, "step": 30051 }, { "epoch": 0.9695933342878057, "grad_norm": 0.33203125, "learning_rate": 7.319638975157972e-08, "loss": 2.0326, "step": 30052 }, { "epoch": 0.969625598141602, "grad_norm": 0.32421875, "learning_rate": 7.304119207042325e-08, "loss": 1.9644, "step": 30053 }, { "epoch": 0.9696578619953984, "grad_norm": 0.333984375, "learning_rate": 7.288615869451787e-08, "loss": 1.982, "step": 30054 }, { "epoch": 0.9696901258491947, "grad_norm": 0.33203125, "learning_rate": 7.273128962557552e-08, "loss": 1.9902, "step": 30055 }, { "epoch": 0.9697223897029911, "grad_norm": 0.328125, "learning_rate": 7.257658486529817e-08, "loss": 1.999, "step": 30056 }, { "epoch": 0.9697546535567874, "grad_norm": 0.333984375, "learning_rate": 7.24220444153878e-08, "loss": 1.9548, "step": 30057 }, { "epoch": 0.9697869174105838, "grad_norm": 0.326171875, "learning_rate": 7.226766827754638e-08, "loss": 1.9651, "step": 30058 }, { "epoch": 0.96981918126438, "grad_norm": 0.326171875, "learning_rate": 7.211345645347422e-08, "loss": 2.0062, "step": 30059 }, { "epoch": 0.9698514451181764, "grad_norm": 0.333984375, "learning_rate": 7.19594089448683e-08, "loss": 1.9793, "step": 30060 }, { "epoch": 0.9698837089719728, "grad_norm": 0.337890625, "learning_rate": 7.180552575342392e-08, "loss": 2.0139, "step": 30061 }, { "epoch": 0.9699159728257691, "grad_norm": 0.328125, "learning_rate": 7.165180688083307e-08, "loss": 1.987, "step": 30062 }, { "epoch": 0.9699482366795655, "grad_norm": 0.326171875, "learning_rate": 7.149825232879104e-08, "loss": 1.9392, "step": 30063 }, { "epoch": 0.9699805005333618, "grad_norm": 0.330078125, "learning_rate": 7.134486209898649e-08, "loss": 1.9755, "step": 30064 }, { "epoch": 0.9700127643871582, "grad_norm": 0.326171875, "learning_rate": 7.119163619310809e-08, "loss": 1.9977, "step": 30065 }, { "epoch": 0.9700450282409545, "grad_norm": 0.3359375, "learning_rate": 7.103857461284113e-08, "loss": 1.9855, "step": 30066 }, { "epoch": 0.9700772920947509, "grad_norm": 0.341796875, "learning_rate": 7.088567735987261e-08, "loss": 1.9791, "step": 30067 }, { "epoch": 0.9701095559485472, "grad_norm": 0.337890625, "learning_rate": 7.073294443588452e-08, "loss": 2.0128, "step": 30068 }, { "epoch": 0.9701418198023436, "grad_norm": 0.3359375, "learning_rate": 7.058037584255717e-08, "loss": 2.0163, "step": 30069 }, { "epoch": 0.9701740836561399, "grad_norm": 0.32421875, "learning_rate": 7.042797158157088e-08, "loss": 1.9888, "step": 30070 }, { "epoch": 0.9702063475099363, "grad_norm": 0.328125, "learning_rate": 7.027573165460432e-08, "loss": 1.9594, "step": 30071 }, { "epoch": 0.9702386113637326, "grad_norm": 0.328125, "learning_rate": 7.012365606333115e-08, "loss": 1.9935, "step": 30072 }, { "epoch": 0.970270875217529, "grad_norm": 0.333984375, "learning_rate": 6.997174480942503e-08, "loss": 2.0011, "step": 30073 }, { "epoch": 0.9703031390713253, "grad_norm": 0.330078125, "learning_rate": 6.981999789456128e-08, "loss": 2.0052, "step": 30074 }, { "epoch": 0.9703354029251217, "grad_norm": 0.32421875, "learning_rate": 6.966841532040691e-08, "loss": 2.0031, "step": 30075 }, { "epoch": 0.970367666778918, "grad_norm": 0.33203125, "learning_rate": 6.951699708863058e-08, "loss": 2.0204, "step": 30076 }, { "epoch": 0.9703999306327143, "grad_norm": 0.328125, "learning_rate": 6.93657432009026e-08, "loss": 1.9892, "step": 30077 }, { "epoch": 0.9704321944865106, "grad_norm": 0.333984375, "learning_rate": 6.921465365888336e-08, "loss": 1.975, "step": 30078 }, { "epoch": 0.970464458340307, "grad_norm": 0.345703125, "learning_rate": 6.906372846423815e-08, "loss": 2.0248, "step": 30079 }, { "epoch": 0.9704967221941033, "grad_norm": 0.326171875, "learning_rate": 6.891296761862898e-08, "loss": 1.9615, "step": 30080 }, { "epoch": 0.9705289860478997, "grad_norm": 0.330078125, "learning_rate": 6.876237112371287e-08, "loss": 1.9905, "step": 30081 }, { "epoch": 0.9705612499016961, "grad_norm": 0.328125, "learning_rate": 6.861193898115015e-08, "loss": 2.0028, "step": 30082 }, { "epoch": 0.9705935137554924, "grad_norm": 0.330078125, "learning_rate": 6.84616711925945e-08, "loss": 1.9769, "step": 30083 }, { "epoch": 0.9706257776092888, "grad_norm": 0.349609375, "learning_rate": 6.831156775970292e-08, "loss": 1.9924, "step": 30084 }, { "epoch": 0.9706580414630851, "grad_norm": 0.330078125, "learning_rate": 6.816162868412245e-08, "loss": 1.9842, "step": 30085 }, { "epoch": 0.9706903053168815, "grad_norm": 0.328125, "learning_rate": 6.801185396751008e-08, "loss": 2.019, "step": 30086 }, { "epoch": 0.9707225691706778, "grad_norm": 0.337890625, "learning_rate": 6.78622436115095e-08, "loss": 1.9165, "step": 30087 }, { "epoch": 0.9707548330244742, "grad_norm": 0.3359375, "learning_rate": 6.771279761776771e-08, "loss": 1.9557, "step": 30088 }, { "epoch": 0.9707870968782705, "grad_norm": 0.337890625, "learning_rate": 6.756351598793342e-08, "loss": 1.996, "step": 30089 }, { "epoch": 0.9708193607320669, "grad_norm": 0.330078125, "learning_rate": 6.741439872364697e-08, "loss": 1.9825, "step": 30090 }, { "epoch": 0.9708516245858632, "grad_norm": 0.330078125, "learning_rate": 6.72654458265487e-08, "loss": 1.9683, "step": 30091 }, { "epoch": 0.9708838884396596, "grad_norm": 0.330078125, "learning_rate": 6.711665729828065e-08, "loss": 1.9701, "step": 30092 }, { "epoch": 0.9709161522934558, "grad_norm": 0.337890625, "learning_rate": 6.696803314047984e-08, "loss": 1.9903, "step": 30093 }, { "epoch": 0.9709484161472522, "grad_norm": 0.33203125, "learning_rate": 6.681957335478161e-08, "loss": 2.0007, "step": 30094 }, { "epoch": 0.9709806800010485, "grad_norm": 0.330078125, "learning_rate": 6.667127794282135e-08, "loss": 2.0069, "step": 30095 }, { "epoch": 0.9710129438548449, "grad_norm": 0.328125, "learning_rate": 6.652314690623107e-08, "loss": 1.9772, "step": 30096 }, { "epoch": 0.9710452077086412, "grad_norm": 0.33203125, "learning_rate": 6.637518024663947e-08, "loss": 1.9691, "step": 30097 }, { "epoch": 0.9710774715624376, "grad_norm": 0.33203125, "learning_rate": 6.622737796567691e-08, "loss": 2.0111, "step": 30098 }, { "epoch": 0.9711097354162339, "grad_norm": 0.33203125, "learning_rate": 6.607974006496875e-08, "loss": 1.9626, "step": 30099 }, { "epoch": 0.9711419992700303, "grad_norm": 0.330078125, "learning_rate": 6.59322665461437e-08, "loss": 1.9605, "step": 30100 }, { "epoch": 0.9711742631238267, "grad_norm": 0.34375, "learning_rate": 6.578495741082046e-08, "loss": 1.9956, "step": 30101 }, { "epoch": 0.971206526977623, "grad_norm": 0.328125, "learning_rate": 6.563781266062274e-08, "loss": 2.0057, "step": 30102 }, { "epoch": 0.9712387908314194, "grad_norm": 0.333984375, "learning_rate": 6.54908322971709e-08, "loss": 2.0029, "step": 30103 }, { "epoch": 0.9712710546852157, "grad_norm": 0.337890625, "learning_rate": 6.534401632208197e-08, "loss": 2.0017, "step": 30104 }, { "epoch": 0.9713033185390121, "grad_norm": 0.330078125, "learning_rate": 6.519736473697136e-08, "loss": 1.9951, "step": 30105 }, { "epoch": 0.9713355823928084, "grad_norm": 0.330078125, "learning_rate": 6.505087754345607e-08, "loss": 1.9974, "step": 30106 }, { "epoch": 0.9713678462466048, "grad_norm": 0.330078125, "learning_rate": 6.490455474314316e-08, "loss": 1.9491, "step": 30107 }, { "epoch": 0.9714001101004011, "grad_norm": 0.333984375, "learning_rate": 6.475839633764968e-08, "loss": 2.0025, "step": 30108 }, { "epoch": 0.9714323739541975, "grad_norm": 0.333984375, "learning_rate": 6.461240232857934e-08, "loss": 2.0012, "step": 30109 }, { "epoch": 0.9714646378079937, "grad_norm": 0.333984375, "learning_rate": 6.446657271754087e-08, "loss": 1.9786, "step": 30110 }, { "epoch": 0.9714969016617901, "grad_norm": 0.328125, "learning_rate": 6.432090750614129e-08, "loss": 2.0028, "step": 30111 }, { "epoch": 0.9715291655155864, "grad_norm": 0.333984375, "learning_rate": 6.417540669598099e-08, "loss": 1.9919, "step": 30112 }, { "epoch": 0.9715614293693828, "grad_norm": 0.3359375, "learning_rate": 6.40300702886637e-08, "loss": 1.9952, "step": 30113 }, { "epoch": 0.9715936932231791, "grad_norm": 0.330078125, "learning_rate": 6.388489828578814e-08, "loss": 2.009, "step": 30114 }, { "epoch": 0.9716259570769755, "grad_norm": 0.33203125, "learning_rate": 6.373989068895303e-08, "loss": 1.9884, "step": 30115 }, { "epoch": 0.9716582209307718, "grad_norm": 0.333984375, "learning_rate": 6.359504749975209e-08, "loss": 1.9972, "step": 30116 }, { "epoch": 0.9716904847845682, "grad_norm": 0.33203125, "learning_rate": 6.345036871978238e-08, "loss": 1.965, "step": 30117 }, { "epoch": 0.9717227486383645, "grad_norm": 0.333984375, "learning_rate": 6.330585435063762e-08, "loss": 1.9961, "step": 30118 }, { "epoch": 0.9717550124921609, "grad_norm": 0.328125, "learning_rate": 6.316150439390323e-08, "loss": 1.9456, "step": 30119 }, { "epoch": 0.9717872763459572, "grad_norm": 0.341796875, "learning_rate": 6.30173188511729e-08, "loss": 1.9898, "step": 30120 }, { "epoch": 0.9718195401997536, "grad_norm": 0.322265625, "learning_rate": 6.287329772403372e-08, "loss": 1.9361, "step": 30121 }, { "epoch": 0.97185180405355, "grad_norm": 0.337890625, "learning_rate": 6.272944101406775e-08, "loss": 2.0014, "step": 30122 }, { "epoch": 0.9718840679073463, "grad_norm": 0.33984375, "learning_rate": 6.258574872286038e-08, "loss": 1.9929, "step": 30123 }, { "epoch": 0.9719163317611427, "grad_norm": 0.333984375, "learning_rate": 6.244222085199202e-08, "loss": 1.9798, "step": 30124 }, { "epoch": 0.971948595614939, "grad_norm": 0.337890625, "learning_rate": 6.229885740304475e-08, "loss": 2.0, "step": 30125 }, { "epoch": 0.9719808594687354, "grad_norm": 0.337890625, "learning_rate": 6.215565837759562e-08, "loss": 1.9565, "step": 30126 }, { "epoch": 0.9720131233225316, "grad_norm": 0.349609375, "learning_rate": 6.201262377722005e-08, "loss": 1.9403, "step": 30127 }, { "epoch": 0.972045387176328, "grad_norm": 0.34375, "learning_rate": 6.186975360349345e-08, "loss": 2.0002, "step": 30128 }, { "epoch": 0.9720776510301243, "grad_norm": 0.3359375, "learning_rate": 6.172704785798788e-08, "loss": 1.9672, "step": 30129 }, { "epoch": 0.9721099148839207, "grad_norm": 0.3359375, "learning_rate": 6.158450654227376e-08, "loss": 2.0064, "step": 30130 }, { "epoch": 0.972142178737717, "grad_norm": 0.326171875, "learning_rate": 6.14421296579215e-08, "loss": 1.9882, "step": 30131 }, { "epoch": 0.9721744425915134, "grad_norm": 0.330078125, "learning_rate": 6.129991720649652e-08, "loss": 1.9837, "step": 30132 }, { "epoch": 0.9722067064453097, "grad_norm": 0.337890625, "learning_rate": 6.115786918956589e-08, "loss": 2.0088, "step": 30133 }, { "epoch": 0.9722389702991061, "grad_norm": 0.341796875, "learning_rate": 6.10159856086917e-08, "loss": 1.9889, "step": 30134 }, { "epoch": 0.9722712341529024, "grad_norm": 0.3359375, "learning_rate": 6.087426646543603e-08, "loss": 1.9923, "step": 30135 }, { "epoch": 0.9723034980066988, "grad_norm": 0.33203125, "learning_rate": 6.07327117613593e-08, "loss": 2.0051, "step": 30136 }, { "epoch": 0.9723357618604951, "grad_norm": 0.33984375, "learning_rate": 6.059132149802026e-08, "loss": 2.0051, "step": 30137 }, { "epoch": 0.9723680257142915, "grad_norm": 0.3359375, "learning_rate": 6.045009567697434e-08, "loss": 2.012, "step": 30138 }, { "epoch": 0.9724002895680878, "grad_norm": 0.328125, "learning_rate": 6.03090342997753e-08, "loss": 2.004, "step": 30139 }, { "epoch": 0.9724325534218842, "grad_norm": 0.330078125, "learning_rate": 6.016813736797854e-08, "loss": 2.0054, "step": 30140 }, { "epoch": 0.9724648172756805, "grad_norm": 0.34375, "learning_rate": 6.002740488313118e-08, "loss": 1.9857, "step": 30141 }, { "epoch": 0.9724970811294769, "grad_norm": 0.33203125, "learning_rate": 5.988683684678531e-08, "loss": 1.9838, "step": 30142 }, { "epoch": 0.9725293449832733, "grad_norm": 0.337890625, "learning_rate": 5.974643326048801e-08, "loss": 1.9915, "step": 30143 }, { "epoch": 0.9725616088370695, "grad_norm": 0.3359375, "learning_rate": 5.960619412578139e-08, "loss": 1.9873, "step": 30144 }, { "epoch": 0.972593872690866, "grad_norm": 0.328125, "learning_rate": 5.946611944421254e-08, "loss": 1.9607, "step": 30145 }, { "epoch": 0.9726261365446622, "grad_norm": 0.33203125, "learning_rate": 5.932620921732357e-08, "loss": 1.9964, "step": 30146 }, { "epoch": 0.9726584003984586, "grad_norm": 0.3359375, "learning_rate": 5.9186463446651574e-08, "loss": 1.9703, "step": 30147 }, { "epoch": 0.9726906642522549, "grad_norm": 0.322265625, "learning_rate": 5.9046882133738655e-08, "loss": 1.996, "step": 30148 }, { "epoch": 0.9727229281060513, "grad_norm": 0.357421875, "learning_rate": 5.8907465280116924e-08, "loss": 2.0078, "step": 30149 }, { "epoch": 0.9727551919598476, "grad_norm": 0.333984375, "learning_rate": 5.876821288732515e-08, "loss": 1.9841, "step": 30150 }, { "epoch": 0.972787455813644, "grad_norm": 0.330078125, "learning_rate": 5.8629124956892096e-08, "loss": 1.9808, "step": 30151 }, { "epoch": 0.9728197196674403, "grad_norm": 0.330078125, "learning_rate": 5.849020149035156e-08, "loss": 1.9692, "step": 30152 }, { "epoch": 0.9728519835212367, "grad_norm": 0.33203125, "learning_rate": 5.83514424892323e-08, "loss": 2.014, "step": 30153 }, { "epoch": 0.972884247375033, "grad_norm": 0.326171875, "learning_rate": 5.821284795506143e-08, "loss": 1.9981, "step": 30154 }, { "epoch": 0.9729165112288294, "grad_norm": 0.341796875, "learning_rate": 5.80744178893644e-08, "loss": 1.9947, "step": 30155 }, { "epoch": 0.9729487750826257, "grad_norm": 0.33203125, "learning_rate": 5.793615229366334e-08, "loss": 2.0014, "step": 30156 }, { "epoch": 0.9729810389364221, "grad_norm": 0.333984375, "learning_rate": 5.7798051169482e-08, "loss": 2.0191, "step": 30157 }, { "epoch": 0.9730133027902184, "grad_norm": 0.337890625, "learning_rate": 5.7660114518342525e-08, "loss": 1.9788, "step": 30158 }, { "epoch": 0.9730455666440148, "grad_norm": 0.3359375, "learning_rate": 5.7522342341758683e-08, "loss": 1.9965, "step": 30159 }, { "epoch": 0.973077830497811, "grad_norm": 0.34375, "learning_rate": 5.738473464124927e-08, "loss": 1.9788, "step": 30160 }, { "epoch": 0.9731100943516074, "grad_norm": 0.326171875, "learning_rate": 5.724729141832974e-08, "loss": 1.9998, "step": 30161 }, { "epoch": 0.9731423582054038, "grad_norm": 0.337890625, "learning_rate": 5.711001267451221e-08, "loss": 2.0034, "step": 30162 }, { "epoch": 0.9731746220592001, "grad_norm": 0.333984375, "learning_rate": 5.697289841130715e-08, "loss": 1.9903, "step": 30163 }, { "epoch": 0.9732068859129965, "grad_norm": 0.337890625, "learning_rate": 5.683594863022501e-08, "loss": 2.0223, "step": 30164 }, { "epoch": 0.9732391497667928, "grad_norm": 0.341796875, "learning_rate": 5.669916333277125e-08, "loss": 1.9988, "step": 30165 }, { "epoch": 0.9732714136205892, "grad_norm": 0.328125, "learning_rate": 5.656254252045468e-08, "loss": 1.98, "step": 30166 }, { "epoch": 0.9733036774743855, "grad_norm": 0.330078125, "learning_rate": 5.642608619477574e-08, "loss": 1.991, "step": 30167 }, { "epoch": 0.9733359413281819, "grad_norm": 0.330078125, "learning_rate": 5.628979435723825e-08, "loss": 1.9733, "step": 30168 }, { "epoch": 0.9733682051819782, "grad_norm": 0.3359375, "learning_rate": 5.615366700934266e-08, "loss": 2.0001, "step": 30169 }, { "epoch": 0.9734004690357746, "grad_norm": 0.33203125, "learning_rate": 5.6017704152586114e-08, "loss": 1.9905, "step": 30170 }, { "epoch": 0.9734327328895709, "grad_norm": 0.330078125, "learning_rate": 5.588190578846741e-08, "loss": 2.0201, "step": 30171 }, { "epoch": 0.9734649967433673, "grad_norm": 0.330078125, "learning_rate": 5.574627191847703e-08, "loss": 1.9951, "step": 30172 }, { "epoch": 0.9734972605971636, "grad_norm": 0.33984375, "learning_rate": 5.561080254411377e-08, "loss": 1.9983, "step": 30173 }, { "epoch": 0.97352952445096, "grad_norm": 0.328125, "learning_rate": 5.547549766686477e-08, "loss": 1.9774, "step": 30174 }, { "epoch": 0.9735617883047563, "grad_norm": 0.330078125, "learning_rate": 5.5340357288218845e-08, "loss": 1.9719, "step": 30175 }, { "epoch": 0.9735940521585527, "grad_norm": 0.328125, "learning_rate": 5.520538140966647e-08, "loss": 1.9736, "step": 30176 }, { "epoch": 0.973626316012349, "grad_norm": 0.32421875, "learning_rate": 5.507057003269145e-08, "loss": 1.9761, "step": 30177 }, { "epoch": 0.9736585798661453, "grad_norm": 0.330078125, "learning_rate": 5.49359231587776e-08, "loss": 1.9664, "step": 30178 }, { "epoch": 0.9736908437199416, "grad_norm": 0.3359375, "learning_rate": 5.4801440789410404e-08, "loss": 2.0, "step": 30179 }, { "epoch": 0.973723107573738, "grad_norm": 0.333984375, "learning_rate": 5.4667122926063684e-08, "loss": 2.0085, "step": 30180 }, { "epoch": 0.9737553714275343, "grad_norm": 0.330078125, "learning_rate": 5.4532969570221246e-08, "loss": 1.9848, "step": 30181 }, { "epoch": 0.9737876352813307, "grad_norm": 0.330078125, "learning_rate": 5.439898072335858e-08, "loss": 1.9752, "step": 30182 }, { "epoch": 0.9738198991351271, "grad_norm": 0.328125, "learning_rate": 5.4265156386947844e-08, "loss": 2.0127, "step": 30183 }, { "epoch": 0.9738521629889234, "grad_norm": 0.333984375, "learning_rate": 5.413149656246619e-08, "loss": 1.9919, "step": 30184 }, { "epoch": 0.9738844268427198, "grad_norm": 0.330078125, "learning_rate": 5.399800125138243e-08, "loss": 2.0098, "step": 30185 }, { "epoch": 0.9739166906965161, "grad_norm": 0.3359375, "learning_rate": 5.386467045516708e-08, "loss": 2.0014, "step": 30186 }, { "epoch": 0.9739489545503125, "grad_norm": 0.330078125, "learning_rate": 5.373150417528561e-08, "loss": 1.9559, "step": 30187 }, { "epoch": 0.9739812184041088, "grad_norm": 0.33203125, "learning_rate": 5.3598502413206854e-08, "loss": 1.996, "step": 30188 }, { "epoch": 0.9740134822579052, "grad_norm": 0.34375, "learning_rate": 5.346566517039131e-08, "loss": 1.9701, "step": 30189 }, { "epoch": 0.9740457461117015, "grad_norm": 0.32421875, "learning_rate": 5.333299244830614e-08, "loss": 2.0075, "step": 30190 }, { "epoch": 0.9740780099654979, "grad_norm": 0.328125, "learning_rate": 5.320048424840518e-08, "loss": 1.9803, "step": 30191 }, { "epoch": 0.9741102738192942, "grad_norm": 0.330078125, "learning_rate": 5.3068140572152254e-08, "loss": 1.9796, "step": 30192 }, { "epoch": 0.9741425376730906, "grad_norm": 0.330078125, "learning_rate": 5.293596142100288e-08, "loss": 1.9508, "step": 30193 }, { "epoch": 0.9741748015268868, "grad_norm": 0.328125, "learning_rate": 5.2803946796410874e-08, "loss": 1.9832, "step": 30194 }, { "epoch": 0.9742070653806832, "grad_norm": 0.328125, "learning_rate": 5.2672096699830083e-08, "loss": 1.9752, "step": 30195 }, { "epoch": 0.9742393292344795, "grad_norm": 0.326171875, "learning_rate": 5.254041113271269e-08, "loss": 1.9879, "step": 30196 }, { "epoch": 0.9742715930882759, "grad_norm": 0.333984375, "learning_rate": 5.2408890096505846e-08, "loss": 1.9812, "step": 30197 }, { "epoch": 0.9743038569420722, "grad_norm": 0.328125, "learning_rate": 5.227753359265841e-08, "loss": 1.9779, "step": 30198 }, { "epoch": 0.9743361207958686, "grad_norm": 0.341796875, "learning_rate": 5.214634162261755e-08, "loss": 1.9758, "step": 30199 }, { "epoch": 0.9743683846496649, "grad_norm": 0.330078125, "learning_rate": 5.201531418782379e-08, "loss": 1.9777, "step": 30200 }, { "epoch": 0.9744006485034613, "grad_norm": 0.33203125, "learning_rate": 5.18844512897243e-08, "loss": 1.9958, "step": 30201 }, { "epoch": 0.9744329123572577, "grad_norm": 0.341796875, "learning_rate": 5.175375292975626e-08, "loss": 1.995, "step": 30202 }, { "epoch": 0.974465176211054, "grad_norm": 0.328125, "learning_rate": 5.1623219109358523e-08, "loss": 2.0051, "step": 30203 }, { "epoch": 0.9744974400648504, "grad_norm": 0.33984375, "learning_rate": 5.1492849829969936e-08, "loss": 2.0005, "step": 30204 }, { "epoch": 0.9745297039186467, "grad_norm": 0.33203125, "learning_rate": 5.1362645093022684e-08, "loss": 1.9907, "step": 30205 }, { "epoch": 0.9745619677724431, "grad_norm": 0.322265625, "learning_rate": 5.1232604899952296e-08, "loss": 1.9936, "step": 30206 }, { "epoch": 0.9745942316262394, "grad_norm": 0.330078125, "learning_rate": 5.1102729252189284e-08, "loss": 1.9558, "step": 30207 }, { "epoch": 0.9746264954800358, "grad_norm": 0.36328125, "learning_rate": 5.097301815116251e-08, "loss": 1.9983, "step": 30208 }, { "epoch": 0.9746587593338321, "grad_norm": 0.337890625, "learning_rate": 5.0843471598299163e-08, "loss": 2.0157, "step": 30209 }, { "epoch": 0.9746910231876285, "grad_norm": 0.330078125, "learning_rate": 5.0714089595029765e-08, "loss": 1.9876, "step": 30210 }, { "epoch": 0.9747232870414247, "grad_norm": 0.333984375, "learning_rate": 5.0584872142771524e-08, "loss": 1.9685, "step": 30211 }, { "epoch": 0.9747555508952211, "grad_norm": 0.333984375, "learning_rate": 5.045581924295162e-08, "loss": 1.9709, "step": 30212 }, { "epoch": 0.9747878147490174, "grad_norm": 0.3359375, "learning_rate": 5.032693089699059e-08, "loss": 1.9542, "step": 30213 }, { "epoch": 0.9748200786028138, "grad_norm": 0.33203125, "learning_rate": 5.019820710630396e-08, "loss": 1.9918, "step": 30214 }, { "epoch": 0.9748523424566101, "grad_norm": 0.33203125, "learning_rate": 5.006964787231061e-08, "loss": 1.9706, "step": 30215 }, { "epoch": 0.9748846063104065, "grad_norm": 0.359375, "learning_rate": 4.994125319642773e-08, "loss": 2.0046, "step": 30216 }, { "epoch": 0.9749168701642028, "grad_norm": 0.337890625, "learning_rate": 4.981302308006419e-08, "loss": 1.9457, "step": 30217 }, { "epoch": 0.9749491340179992, "grad_norm": 0.330078125, "learning_rate": 4.968495752463553e-08, "loss": 2.0124, "step": 30218 }, { "epoch": 0.9749813978717955, "grad_norm": 0.341796875, "learning_rate": 4.95570565315473e-08, "loss": 1.9846, "step": 30219 }, { "epoch": 0.9750136617255919, "grad_norm": 0.337890625, "learning_rate": 4.9429320102211686e-08, "loss": 1.9786, "step": 30220 }, { "epoch": 0.9750459255793882, "grad_norm": 0.33203125, "learning_rate": 4.930174823803091e-08, "loss": 1.9226, "step": 30221 }, { "epoch": 0.9750781894331846, "grad_norm": 0.330078125, "learning_rate": 4.917434094041218e-08, "loss": 2.0165, "step": 30222 }, { "epoch": 0.975110453286981, "grad_norm": 0.32421875, "learning_rate": 4.904709821075604e-08, "loss": 2.031, "step": 30223 }, { "epoch": 0.9751427171407773, "grad_norm": 0.3359375, "learning_rate": 4.892002005046303e-08, "loss": 1.9875, "step": 30224 }, { "epoch": 0.9751749809945737, "grad_norm": 0.330078125, "learning_rate": 4.879310646093371e-08, "loss": 1.9934, "step": 30225 }, { "epoch": 0.97520724484837, "grad_norm": 0.328125, "learning_rate": 4.8666357443563624e-08, "loss": 1.964, "step": 30226 }, { "epoch": 0.9752395087021664, "grad_norm": 0.337890625, "learning_rate": 4.853977299974832e-08, "loss": 2.0166, "step": 30227 }, { "epoch": 0.9752717725559626, "grad_norm": 0.328125, "learning_rate": 4.8413353130880025e-08, "loss": 1.9747, "step": 30228 }, { "epoch": 0.975304036409759, "grad_norm": 0.3359375, "learning_rate": 4.828709783835261e-08, "loss": 1.9825, "step": 30229 }, { "epoch": 0.9753363002635553, "grad_norm": 0.330078125, "learning_rate": 4.816100712355498e-08, "loss": 1.9721, "step": 30230 }, { "epoch": 0.9753685641173517, "grad_norm": 0.34765625, "learning_rate": 4.803508098787268e-08, "loss": 1.9865, "step": 30231 }, { "epoch": 0.975400827971148, "grad_norm": 0.333984375, "learning_rate": 4.7909319432694606e-08, "loss": 2.0077, "step": 30232 }, { "epoch": 0.9754330918249444, "grad_norm": 0.33984375, "learning_rate": 4.7783722459404654e-08, "loss": 2.003, "step": 30233 }, { "epoch": 0.9754653556787407, "grad_norm": 0.330078125, "learning_rate": 4.765829006938338e-08, "loss": 2.0004, "step": 30234 }, { "epoch": 0.9754976195325371, "grad_norm": 0.330078125, "learning_rate": 4.7533022264014684e-08, "loss": 1.9977, "step": 30235 }, { "epoch": 0.9755298833863334, "grad_norm": 0.33203125, "learning_rate": 4.740791904467579e-08, "loss": 2.0025, "step": 30236 }, { "epoch": 0.9755621472401298, "grad_norm": 0.34765625, "learning_rate": 4.728298041274226e-08, "loss": 2.002, "step": 30237 }, { "epoch": 0.9755944110939261, "grad_norm": 0.3359375, "learning_rate": 4.7158206369589675e-08, "loss": 1.9763, "step": 30238 }, { "epoch": 0.9756266749477225, "grad_norm": 0.333984375, "learning_rate": 4.7033596916595256e-08, "loss": 2.0004, "step": 30239 }, { "epoch": 0.9756589388015188, "grad_norm": 0.33203125, "learning_rate": 4.690915205512458e-08, "loss": 1.9488, "step": 30240 }, { "epoch": 0.9756912026553152, "grad_norm": 0.330078125, "learning_rate": 4.678487178655322e-08, "loss": 1.9785, "step": 30241 }, { "epoch": 0.9757234665091115, "grad_norm": 0.3359375, "learning_rate": 4.666075611224674e-08, "loss": 2.0237, "step": 30242 }, { "epoch": 0.9757557303629079, "grad_norm": 0.333984375, "learning_rate": 4.653680503356905e-08, "loss": 2.021, "step": 30243 }, { "epoch": 0.9757879942167043, "grad_norm": 0.337890625, "learning_rate": 4.641301855188906e-08, "loss": 1.9841, "step": 30244 }, { "epoch": 0.9758202580705005, "grad_norm": 0.333984375, "learning_rate": 4.6289396668564024e-08, "loss": 1.989, "step": 30245 }, { "epoch": 0.975852521924297, "grad_norm": 0.326171875, "learning_rate": 4.6165939384959496e-08, "loss": 2.0001, "step": 30246 }, { "epoch": 0.9758847857780932, "grad_norm": 0.326171875, "learning_rate": 4.604264670243275e-08, "loss": 1.9972, "step": 30247 }, { "epoch": 0.9759170496318896, "grad_norm": 0.333984375, "learning_rate": 4.591951862233934e-08, "loss": 1.9937, "step": 30248 }, { "epoch": 0.9759493134856859, "grad_norm": 0.326171875, "learning_rate": 4.579655514603654e-08, "loss": 2.0248, "step": 30249 }, { "epoch": 0.9759815773394823, "grad_norm": 0.33203125, "learning_rate": 4.567375627487824e-08, "loss": 1.9856, "step": 30250 }, { "epoch": 0.9760138411932786, "grad_norm": 0.333984375, "learning_rate": 4.5551122010213386e-08, "loss": 1.9793, "step": 30251 }, { "epoch": 0.976046105047075, "grad_norm": 0.32421875, "learning_rate": 4.542865235339422e-08, "loss": 1.992, "step": 30252 }, { "epoch": 0.9760783689008713, "grad_norm": 0.337890625, "learning_rate": 4.530634730576799e-08, "loss": 2.0111, "step": 30253 }, { "epoch": 0.9761106327546677, "grad_norm": 0.33203125, "learning_rate": 4.51842068686803e-08, "loss": 1.9778, "step": 30254 }, { "epoch": 0.976142896608464, "grad_norm": 0.330078125, "learning_rate": 4.5062231043478394e-08, "loss": 1.9758, "step": 30255 }, { "epoch": 0.9761751604622604, "grad_norm": 0.333984375, "learning_rate": 4.494041983150121e-08, "loss": 1.994, "step": 30256 }, { "epoch": 0.9762074243160567, "grad_norm": 0.337890625, "learning_rate": 4.4818773234091005e-08, "loss": 1.9929, "step": 30257 }, { "epoch": 0.9762396881698531, "grad_norm": 0.337890625, "learning_rate": 4.4697291252586704e-08, "loss": 1.9977, "step": 30258 }, { "epoch": 0.9762719520236494, "grad_norm": 0.33203125, "learning_rate": 4.4575973888327236e-08, "loss": 1.9929, "step": 30259 }, { "epoch": 0.9763042158774458, "grad_norm": 0.333984375, "learning_rate": 4.445482114264488e-08, "loss": 1.9938, "step": 30260 }, { "epoch": 0.976336479731242, "grad_norm": 0.345703125, "learning_rate": 4.433383301687688e-08, "loss": 1.9921, "step": 30261 }, { "epoch": 0.9763687435850384, "grad_norm": 0.32421875, "learning_rate": 4.421300951235052e-08, "loss": 1.9463, "step": 30262 }, { "epoch": 0.9764010074388348, "grad_norm": 0.330078125, "learning_rate": 4.409235063039974e-08, "loss": 2.0077, "step": 30263 }, { "epoch": 0.9764332712926311, "grad_norm": 0.33984375, "learning_rate": 4.397185637235013e-08, "loss": 1.9897, "step": 30264 }, { "epoch": 0.9764655351464275, "grad_norm": 0.330078125, "learning_rate": 4.385152673952897e-08, "loss": 1.9899, "step": 30265 }, { "epoch": 0.9764977990002238, "grad_norm": 0.330078125, "learning_rate": 4.37313617332602e-08, "loss": 2.0063, "step": 30266 }, { "epoch": 0.9765300628540202, "grad_norm": 0.328125, "learning_rate": 4.361136135486943e-08, "loss": 1.9689, "step": 30267 }, { "epoch": 0.9765623267078165, "grad_norm": 0.3359375, "learning_rate": 4.3491525605672266e-08, "loss": 1.9825, "step": 30268 }, { "epoch": 0.9765945905616129, "grad_norm": 0.33203125, "learning_rate": 4.337185448699266e-08, "loss": 1.986, "step": 30269 }, { "epoch": 0.9766268544154092, "grad_norm": 0.328125, "learning_rate": 4.3252348000144546e-08, "loss": 1.9713, "step": 30270 }, { "epoch": 0.9766591182692056, "grad_norm": 0.34375, "learning_rate": 4.313300614644522e-08, "loss": 2.0148, "step": 30271 }, { "epoch": 0.9766913821230019, "grad_norm": 0.33203125, "learning_rate": 4.301382892720862e-08, "loss": 1.9829, "step": 30272 }, { "epoch": 0.9767236459767983, "grad_norm": 0.330078125, "learning_rate": 4.28948163437437e-08, "loss": 2.0238, "step": 30273 }, { "epoch": 0.9767559098305946, "grad_norm": 0.328125, "learning_rate": 4.277596839736275e-08, "loss": 2.014, "step": 30274 }, { "epoch": 0.976788173684391, "grad_norm": 0.33203125, "learning_rate": 4.2657285089376386e-08, "loss": 1.9929, "step": 30275 }, { "epoch": 0.9768204375381873, "grad_norm": 0.33203125, "learning_rate": 4.253876642108523e-08, "loss": 2.0011, "step": 30276 }, { "epoch": 0.9768527013919837, "grad_norm": 0.328125, "learning_rate": 4.2420412393798234e-08, "loss": 1.9732, "step": 30277 }, { "epoch": 0.97688496524578, "grad_norm": 0.33203125, "learning_rate": 4.230222300881603e-08, "loss": 1.9498, "step": 30278 }, { "epoch": 0.9769172290995763, "grad_norm": 0.326171875, "learning_rate": 4.218419826744091e-08, "loss": 1.971, "step": 30279 }, { "epoch": 0.9769494929533726, "grad_norm": 0.330078125, "learning_rate": 4.206633817097183e-08, "loss": 2.0049, "step": 30280 }, { "epoch": 0.976981756807169, "grad_norm": 0.328125, "learning_rate": 4.194864272070609e-08, "loss": 2.0139, "step": 30281 }, { "epoch": 0.9770140206609653, "grad_norm": 0.328125, "learning_rate": 4.183111191793765e-08, "loss": 1.9647, "step": 30282 }, { "epoch": 0.9770462845147617, "grad_norm": 0.333984375, "learning_rate": 4.1713745763962143e-08, "loss": 2.0006, "step": 30283 }, { "epoch": 0.9770785483685581, "grad_norm": 0.32421875, "learning_rate": 4.1596544260068535e-08, "loss": 1.9761, "step": 30284 }, { "epoch": 0.9771108122223544, "grad_norm": 0.328125, "learning_rate": 4.147950740755247e-08, "loss": 1.9649, "step": 30285 }, { "epoch": 0.9771430760761508, "grad_norm": 0.3359375, "learning_rate": 4.136263520769623e-08, "loss": 2.0132, "step": 30286 }, { "epoch": 0.9771753399299471, "grad_norm": 0.326171875, "learning_rate": 4.124592766179047e-08, "loss": 1.9739, "step": 30287 }, { "epoch": 0.9772076037837435, "grad_norm": 0.337890625, "learning_rate": 4.1129384771117494e-08, "loss": 2.0162, "step": 30288 }, { "epoch": 0.9772398676375398, "grad_norm": 0.33203125, "learning_rate": 4.101300653696127e-08, "loss": 1.9968, "step": 30289 }, { "epoch": 0.9772721314913362, "grad_norm": 0.337890625, "learning_rate": 4.089679296060078e-08, "loss": 1.9676, "step": 30290 }, { "epoch": 0.9773043953451325, "grad_norm": 0.330078125, "learning_rate": 4.078074404331833e-08, "loss": 2.0182, "step": 30291 }, { "epoch": 0.9773366591989289, "grad_norm": 0.33203125, "learning_rate": 4.066485978638956e-08, "loss": 2.0202, "step": 30292 }, { "epoch": 0.9773689230527252, "grad_norm": 0.33984375, "learning_rate": 4.054914019108846e-08, "loss": 2.0157, "step": 30293 }, { "epoch": 0.9774011869065216, "grad_norm": 0.337890625, "learning_rate": 4.043358525869234e-08, "loss": 1.9914, "step": 30294 }, { "epoch": 0.9774334507603178, "grad_norm": 0.328125, "learning_rate": 4.031819499047185e-08, "loss": 2.0109, "step": 30295 }, { "epoch": 0.9774657146141142, "grad_norm": 0.337890625, "learning_rate": 4.0202969387695966e-08, "loss": 2.0001, "step": 30296 }, { "epoch": 0.9774979784679105, "grad_norm": 0.330078125, "learning_rate": 4.0087908451633684e-08, "loss": 2.0016, "step": 30297 }, { "epoch": 0.9775302423217069, "grad_norm": 0.33203125, "learning_rate": 3.997301218355065e-08, "loss": 1.9752, "step": 30298 }, { "epoch": 0.9775625061755032, "grad_norm": 0.3359375, "learning_rate": 3.985828058471419e-08, "loss": 1.9898, "step": 30299 }, { "epoch": 0.9775947700292996, "grad_norm": 0.3359375, "learning_rate": 3.974371365638496e-08, "loss": 2.0013, "step": 30300 }, { "epoch": 0.9776270338830959, "grad_norm": 0.328125, "learning_rate": 3.962931139982362e-08, "loss": 1.9652, "step": 30301 }, { "epoch": 0.9776592977368923, "grad_norm": 0.330078125, "learning_rate": 3.951507381628916e-08, "loss": 2.0174, "step": 30302 }, { "epoch": 0.9776915615906887, "grad_norm": 0.333984375, "learning_rate": 3.940100090704224e-08, "loss": 2.0165, "step": 30303 }, { "epoch": 0.977723825444485, "grad_norm": 0.326171875, "learning_rate": 3.928709267333519e-08, "loss": 1.9763, "step": 30304 }, { "epoch": 0.9777560892982814, "grad_norm": 0.328125, "learning_rate": 3.917334911642367e-08, "loss": 2.0017, "step": 30305 }, { "epoch": 0.9777883531520777, "grad_norm": 0.33984375, "learning_rate": 3.9059770237560025e-08, "loss": 1.9915, "step": 30306 }, { "epoch": 0.9778206170058741, "grad_norm": 0.328125, "learning_rate": 3.894635603799157e-08, "loss": 1.9994, "step": 30307 }, { "epoch": 0.9778528808596704, "grad_norm": 0.333984375, "learning_rate": 3.883310651896899e-08, "loss": 1.9938, "step": 30308 }, { "epoch": 0.9778851447134668, "grad_norm": 0.33203125, "learning_rate": 3.8720021681741266e-08, "loss": 1.9564, "step": 30309 }, { "epoch": 0.9779174085672631, "grad_norm": 0.3359375, "learning_rate": 3.860710152754743e-08, "loss": 1.9859, "step": 30310 }, { "epoch": 0.9779496724210595, "grad_norm": 0.322265625, "learning_rate": 3.849434605763313e-08, "loss": 1.9639, "step": 30311 }, { "epoch": 0.9779819362748557, "grad_norm": 0.326171875, "learning_rate": 3.838175527324239e-08, "loss": 1.9955, "step": 30312 }, { "epoch": 0.9780142001286521, "grad_norm": 0.328125, "learning_rate": 3.826932917560921e-08, "loss": 1.9709, "step": 30313 }, { "epoch": 0.9780464639824484, "grad_norm": 0.330078125, "learning_rate": 3.815706776597428e-08, "loss": 2.0023, "step": 30314 }, { "epoch": 0.9780787278362448, "grad_norm": 0.337890625, "learning_rate": 3.804497104557325e-08, "loss": 2.0079, "step": 30315 }, { "epoch": 0.9781109916900411, "grad_norm": 0.330078125, "learning_rate": 3.793303901564016e-08, "loss": 1.9889, "step": 30316 }, { "epoch": 0.9781432555438375, "grad_norm": 0.33203125, "learning_rate": 3.7821271677405675e-08, "loss": 2.0126, "step": 30317 }, { "epoch": 0.9781755193976338, "grad_norm": 0.333984375, "learning_rate": 3.770966903210049e-08, "loss": 1.9927, "step": 30318 }, { "epoch": 0.9782077832514302, "grad_norm": 0.330078125, "learning_rate": 3.759823108095361e-08, "loss": 1.9964, "step": 30319 }, { "epoch": 0.9782400471052265, "grad_norm": 0.3359375, "learning_rate": 3.74869578251924e-08, "loss": 2.004, "step": 30320 }, { "epoch": 0.9782723109590229, "grad_norm": 0.328125, "learning_rate": 3.737584926603921e-08, "loss": 1.9882, "step": 30321 }, { "epoch": 0.9783045748128192, "grad_norm": 0.337890625, "learning_rate": 3.7264905404719717e-08, "loss": 1.9995, "step": 30322 }, { "epoch": 0.9783368386666156, "grad_norm": 0.33203125, "learning_rate": 3.7154126242452954e-08, "loss": 1.971, "step": 30323 }, { "epoch": 0.978369102520412, "grad_norm": 0.33203125, "learning_rate": 3.7043511780459616e-08, "loss": 2.0031, "step": 30324 }, { "epoch": 0.9784013663742083, "grad_norm": 0.337890625, "learning_rate": 3.693306201995705e-08, "loss": 2.0083, "step": 30325 }, { "epoch": 0.9784336302280047, "grad_norm": 0.326171875, "learning_rate": 3.6822776962160964e-08, "loss": 1.9651, "step": 30326 }, { "epoch": 0.978465894081801, "grad_norm": 0.33203125, "learning_rate": 3.671265660828371e-08, "loss": 2.006, "step": 30327 }, { "epoch": 0.9784981579355974, "grad_norm": 0.33203125, "learning_rate": 3.660270095954099e-08, "loss": 1.9975, "step": 30328 }, { "epoch": 0.9785304217893936, "grad_norm": 0.33203125, "learning_rate": 3.649291001714017e-08, "loss": 1.9855, "step": 30329 }, { "epoch": 0.97856268564319, "grad_norm": 0.326171875, "learning_rate": 3.638328378229028e-08, "loss": 1.9994, "step": 30330 }, { "epoch": 0.9785949494969863, "grad_norm": 0.33984375, "learning_rate": 3.627382225620035e-08, "loss": 1.9992, "step": 30331 }, { "epoch": 0.9786272133507827, "grad_norm": 0.328125, "learning_rate": 3.6164525440071095e-08, "loss": 1.9785, "step": 30332 }, { "epoch": 0.978659477204579, "grad_norm": 0.328125, "learning_rate": 3.6055393335108214e-08, "loss": 1.9603, "step": 30333 }, { "epoch": 0.9786917410583754, "grad_norm": 0.33203125, "learning_rate": 3.594642594251407e-08, "loss": 1.9422, "step": 30334 }, { "epoch": 0.9787240049121717, "grad_norm": 0.33203125, "learning_rate": 3.583762326348439e-08, "loss": 1.9907, "step": 30335 }, { "epoch": 0.9787562687659681, "grad_norm": 0.3359375, "learning_rate": 3.57289852992182e-08, "loss": 2.0186, "step": 30336 }, { "epoch": 0.9787885326197644, "grad_norm": 0.328125, "learning_rate": 3.5620512050914543e-08, "loss": 1.9942, "step": 30337 }, { "epoch": 0.9788207964735608, "grad_norm": 0.326171875, "learning_rate": 3.551220351976414e-08, "loss": 1.9726, "step": 30338 }, { "epoch": 0.9788530603273571, "grad_norm": 0.328125, "learning_rate": 3.5404059706959366e-08, "loss": 1.9729, "step": 30339 }, { "epoch": 0.9788853241811535, "grad_norm": 0.328125, "learning_rate": 3.529608061369094e-08, "loss": 2.0035, "step": 30340 }, { "epoch": 0.9789175880349498, "grad_norm": 0.328125, "learning_rate": 3.518826624114791e-08, "loss": 1.9555, "step": 30341 }, { "epoch": 0.9789498518887462, "grad_norm": 0.337890625, "learning_rate": 3.5080616590517647e-08, "loss": 1.9963, "step": 30342 }, { "epoch": 0.9789821157425425, "grad_norm": 0.33203125, "learning_rate": 3.497313166298255e-08, "loss": 2.021, "step": 30343 }, { "epoch": 0.9790143795963389, "grad_norm": 0.328125, "learning_rate": 3.4865811459728335e-08, "loss": 1.9719, "step": 30344 }, { "epoch": 0.9790466434501353, "grad_norm": 0.32421875, "learning_rate": 3.475865598193573e-08, "loss": 1.9828, "step": 30345 }, { "epoch": 0.9790789073039315, "grad_norm": 0.3203125, "learning_rate": 3.465166523078378e-08, "loss": 2.0129, "step": 30346 }, { "epoch": 0.9791111711577279, "grad_norm": 0.333984375, "learning_rate": 3.454483920744822e-08, "loss": 1.9923, "step": 30347 }, { "epoch": 0.9791434350115242, "grad_norm": 0.326171875, "learning_rate": 3.443817791310977e-08, "loss": 1.9823, "step": 30348 }, { "epoch": 0.9791756988653206, "grad_norm": 0.3515625, "learning_rate": 3.433168134893749e-08, "loss": 2.0117, "step": 30349 }, { "epoch": 0.9792079627191169, "grad_norm": 0.333984375, "learning_rate": 3.422534951610712e-08, "loss": 1.9912, "step": 30350 }, { "epoch": 0.9792402265729133, "grad_norm": 0.326171875, "learning_rate": 3.411918241578604e-08, "loss": 1.9852, "step": 30351 }, { "epoch": 0.9792724904267096, "grad_norm": 0.33203125, "learning_rate": 3.4013180049144997e-08, "loss": 1.9931, "step": 30352 }, { "epoch": 0.979304754280506, "grad_norm": 0.333984375, "learning_rate": 3.390734241735138e-08, "loss": 2.0078, "step": 30353 }, { "epoch": 0.9793370181343023, "grad_norm": 0.33984375, "learning_rate": 3.380166952156927e-08, "loss": 2.0066, "step": 30354 }, { "epoch": 0.9793692819880987, "grad_norm": 0.328125, "learning_rate": 3.369616136296105e-08, "loss": 1.9925, "step": 30355 }, { "epoch": 0.979401545841895, "grad_norm": 0.330078125, "learning_rate": 3.359081794268748e-08, "loss": 1.9897, "step": 30356 }, { "epoch": 0.9794338096956914, "grad_norm": 0.3359375, "learning_rate": 3.348563926191095e-08, "loss": 2.0308, "step": 30357 }, { "epoch": 0.9794660735494877, "grad_norm": 0.33203125, "learning_rate": 3.3380625321787204e-08, "loss": 1.9955, "step": 30358 }, { "epoch": 0.9794983374032841, "grad_norm": 0.3359375, "learning_rate": 3.327577612347199e-08, "loss": 2.0004, "step": 30359 }, { "epoch": 0.9795306012570804, "grad_norm": 0.3359375, "learning_rate": 3.3171091668121046e-08, "loss": 2.0094, "step": 30360 }, { "epoch": 0.9795628651108768, "grad_norm": 0.326171875, "learning_rate": 3.306657195688512e-08, "loss": 1.9655, "step": 30361 }, { "epoch": 0.979595128964673, "grad_norm": 0.337890625, "learning_rate": 3.296221699091495e-08, "loss": 1.9788, "step": 30362 }, { "epoch": 0.9796273928184694, "grad_norm": 0.326171875, "learning_rate": 3.285802677135963e-08, "loss": 1.9812, "step": 30363 }, { "epoch": 0.9796596566722658, "grad_norm": 0.326171875, "learning_rate": 3.27540012993649e-08, "loss": 1.988, "step": 30364 }, { "epoch": 0.9796919205260621, "grad_norm": 0.34375, "learning_rate": 3.265014057607818e-08, "loss": 2.0207, "step": 30365 }, { "epoch": 0.9797241843798585, "grad_norm": 0.328125, "learning_rate": 3.254644460263856e-08, "loss": 1.9958, "step": 30366 }, { "epoch": 0.9797564482336548, "grad_norm": 0.3359375, "learning_rate": 3.244291338019179e-08, "loss": 1.9816, "step": 30367 }, { "epoch": 0.9797887120874512, "grad_norm": 0.337890625, "learning_rate": 3.233954690987695e-08, "loss": 1.9793, "step": 30368 }, { "epoch": 0.9798209759412475, "grad_norm": 0.333984375, "learning_rate": 3.223634519282814e-08, "loss": 1.98, "step": 30369 }, { "epoch": 0.9798532397950439, "grad_norm": 0.345703125, "learning_rate": 3.213330823018612e-08, "loss": 1.9798, "step": 30370 }, { "epoch": 0.9798855036488402, "grad_norm": 0.330078125, "learning_rate": 3.2030436023081644e-08, "loss": 1.9872, "step": 30371 }, { "epoch": 0.9799177675026366, "grad_norm": 0.328125, "learning_rate": 3.192772857264714e-08, "loss": 1.9911, "step": 30372 }, { "epoch": 0.9799500313564329, "grad_norm": 0.33203125, "learning_rate": 3.1825185880016706e-08, "loss": 1.9706, "step": 30373 }, { "epoch": 0.9799822952102293, "grad_norm": 0.41015625, "learning_rate": 3.1722807946314434e-08, "loss": 1.9955, "step": 30374 }, { "epoch": 0.9800145590640256, "grad_norm": 0.328125, "learning_rate": 3.162059477266943e-08, "loss": 1.9953, "step": 30375 }, { "epoch": 0.980046822917822, "grad_norm": 0.3359375, "learning_rate": 3.151854636020746e-08, "loss": 1.9627, "step": 30376 }, { "epoch": 0.9800790867716183, "grad_norm": 0.33984375, "learning_rate": 3.1416662710050945e-08, "loss": 1.9853, "step": 30377 }, { "epoch": 0.9801113506254147, "grad_norm": 0.3359375, "learning_rate": 3.1314943823320675e-08, "loss": 1.9671, "step": 30378 }, { "epoch": 0.980143614479211, "grad_norm": 0.330078125, "learning_rate": 3.121338970113741e-08, "loss": 1.9633, "step": 30379 }, { "epoch": 0.9801758783330073, "grad_norm": 0.328125, "learning_rate": 3.1112000344618585e-08, "loss": 1.9752, "step": 30380 }, { "epoch": 0.9802081421868036, "grad_norm": 0.33203125, "learning_rate": 3.101077575487998e-08, "loss": 1.9647, "step": 30381 }, { "epoch": 0.9802404060406, "grad_norm": 0.3359375, "learning_rate": 3.090971593303737e-08, "loss": 1.9922, "step": 30382 }, { "epoch": 0.9802726698943963, "grad_norm": 0.328125, "learning_rate": 3.0808820880199876e-08, "loss": 2.0128, "step": 30383 }, { "epoch": 0.9803049337481927, "grad_norm": 0.341796875, "learning_rate": 3.0708090597481584e-08, "loss": 2.0131, "step": 30384 }, { "epoch": 0.9803371976019891, "grad_norm": 0.330078125, "learning_rate": 3.06075250859883e-08, "loss": 1.9826, "step": 30385 }, { "epoch": 0.9803694614557854, "grad_norm": 0.330078125, "learning_rate": 3.050712434682912e-08, "loss": 1.9821, "step": 30386 }, { "epoch": 0.9804017253095818, "grad_norm": 0.333984375, "learning_rate": 3.040688838110817e-08, "loss": 1.9936, "step": 30387 }, { "epoch": 0.9804339891633781, "grad_norm": 0.3203125, "learning_rate": 3.030681718992956e-08, "loss": 1.9807, "step": 30388 }, { "epoch": 0.9804662530171745, "grad_norm": 0.330078125, "learning_rate": 3.020691077439408e-08, "loss": 2.0287, "step": 30389 }, { "epoch": 0.9804985168709708, "grad_norm": 0.330078125, "learning_rate": 3.010716913560085e-08, "loss": 1.9538, "step": 30390 }, { "epoch": 0.9805307807247672, "grad_norm": 0.328125, "learning_rate": 3.000759227465067e-08, "loss": 2.0021, "step": 30391 }, { "epoch": 0.9805630445785635, "grad_norm": 0.326171875, "learning_rate": 2.990818019263597e-08, "loss": 1.9726, "step": 30392 }, { "epoch": 0.9805953084323599, "grad_norm": 0.33203125, "learning_rate": 2.980893289065256e-08, "loss": 1.9667, "step": 30393 }, { "epoch": 0.9806275722861562, "grad_norm": 0.33203125, "learning_rate": 2.97098503697929e-08, "loss": 2.0121, "step": 30394 }, { "epoch": 0.9806598361399526, "grad_norm": 0.333984375, "learning_rate": 2.9610932631147782e-08, "loss": 2.0059, "step": 30395 }, { "epoch": 0.9806920999937488, "grad_norm": 0.330078125, "learning_rate": 2.9512179675806326e-08, "loss": 1.9855, "step": 30396 }, { "epoch": 0.9807243638475452, "grad_norm": 0.330078125, "learning_rate": 2.9413591504852677e-08, "loss": 1.9983, "step": 30397 }, { "epoch": 0.9807566277013415, "grad_norm": 0.330078125, "learning_rate": 2.931516811937596e-08, "loss": 2.0102, "step": 30398 }, { "epoch": 0.9807888915551379, "grad_norm": 0.328125, "learning_rate": 2.9216909520458634e-08, "loss": 2.0009, "step": 30399 }, { "epoch": 0.9808211554089342, "grad_norm": 0.33203125, "learning_rate": 2.9118815709181513e-08, "loss": 1.9769, "step": 30400 }, { "epoch": 0.9808534192627306, "grad_norm": 0.328125, "learning_rate": 2.9020886686623726e-08, "loss": 1.9799, "step": 30401 }, { "epoch": 0.9808856831165269, "grad_norm": 0.330078125, "learning_rate": 2.8923122453864413e-08, "loss": 2.0119, "step": 30402 }, { "epoch": 0.9809179469703233, "grad_norm": 0.33984375, "learning_rate": 2.8825523011977717e-08, "loss": 2.0063, "step": 30403 }, { "epoch": 0.9809502108241196, "grad_norm": 0.330078125, "learning_rate": 2.8728088362041104e-08, "loss": 1.9506, "step": 30404 }, { "epoch": 0.980982474677916, "grad_norm": 0.32421875, "learning_rate": 2.863081850512539e-08, "loss": 2.0181, "step": 30405 }, { "epoch": 0.9810147385317124, "grad_norm": 0.3359375, "learning_rate": 2.8533713442299714e-08, "loss": 2.009, "step": 30406 }, { "epoch": 0.9810470023855087, "grad_norm": 0.330078125, "learning_rate": 2.843677317463489e-08, "loss": 2.01, "step": 30407 }, { "epoch": 0.9810792662393051, "grad_norm": 0.328125, "learning_rate": 2.8339997703198395e-08, "loss": 2.0217, "step": 30408 }, { "epoch": 0.9811115300931014, "grad_norm": 0.328125, "learning_rate": 2.824338702905438e-08, "loss": 1.9918, "step": 30409 }, { "epoch": 0.9811437939468978, "grad_norm": 0.328125, "learning_rate": 2.8146941153265326e-08, "loss": 1.9737, "step": 30410 }, { "epoch": 0.9811760578006941, "grad_norm": 0.345703125, "learning_rate": 2.8050660076895383e-08, "loss": 2.0199, "step": 30411 }, { "epoch": 0.9812083216544905, "grad_norm": 0.328125, "learning_rate": 2.795454380100204e-08, "loss": 1.9816, "step": 30412 }, { "epoch": 0.9812405855082867, "grad_norm": 0.33203125, "learning_rate": 2.785859232664445e-08, "loss": 2.003, "step": 30413 }, { "epoch": 0.9812728493620831, "grad_norm": 0.328125, "learning_rate": 2.776280565487843e-08, "loss": 2.0022, "step": 30414 }, { "epoch": 0.9813051132158794, "grad_norm": 0.330078125, "learning_rate": 2.7667183786758143e-08, "loss": 2.0067, "step": 30415 }, { "epoch": 0.9813373770696758, "grad_norm": 0.3359375, "learning_rate": 2.7571726723337744e-08, "loss": 1.9845, "step": 30416 }, { "epoch": 0.9813696409234721, "grad_norm": 0.330078125, "learning_rate": 2.7476434465664724e-08, "loss": 2.0073, "step": 30417 }, { "epoch": 0.9814019047772685, "grad_norm": 0.330078125, "learning_rate": 2.7381307014791578e-08, "loss": 1.9743, "step": 30418 }, { "epoch": 0.9814341686310648, "grad_norm": 0.33203125, "learning_rate": 2.7286344371762473e-08, "loss": 2.0101, "step": 30419 }, { "epoch": 0.9814664324848612, "grad_norm": 0.328125, "learning_rate": 2.7191546537623234e-08, "loss": 2.0096, "step": 30420 }, { "epoch": 0.9814986963386575, "grad_norm": 0.330078125, "learning_rate": 2.7096913513419697e-08, "loss": 1.9864, "step": 30421 }, { "epoch": 0.9815309601924539, "grad_norm": 0.333984375, "learning_rate": 2.7002445300191026e-08, "loss": 2.0146, "step": 30422 }, { "epoch": 0.9815632240462502, "grad_norm": 0.33203125, "learning_rate": 2.6908141898978057e-08, "loss": 2.0231, "step": 30423 }, { "epoch": 0.9815954879000466, "grad_norm": 0.330078125, "learning_rate": 2.6814003310819956e-08, "loss": 2.0044, "step": 30424 }, { "epoch": 0.981627751753843, "grad_norm": 0.33203125, "learning_rate": 2.6720029536749234e-08, "loss": 2.0111, "step": 30425 }, { "epoch": 0.9816600156076393, "grad_norm": 0.33203125, "learning_rate": 2.6626220577803396e-08, "loss": 1.9769, "step": 30426 }, { "epoch": 0.9816922794614357, "grad_norm": 0.333984375, "learning_rate": 2.6532576435014943e-08, "loss": 2.0208, "step": 30427 }, { "epoch": 0.981724543315232, "grad_norm": 0.333984375, "learning_rate": 2.6439097109413056e-08, "loss": 1.9772, "step": 30428 }, { "epoch": 0.9817568071690284, "grad_norm": 0.33984375, "learning_rate": 2.6345782602028578e-08, "loss": 1.9872, "step": 30429 }, { "epoch": 0.9817890710228246, "grad_norm": 0.326171875, "learning_rate": 2.6252632913887354e-08, "loss": 1.9882, "step": 30430 }, { "epoch": 0.981821334876621, "grad_norm": 0.330078125, "learning_rate": 2.6159648046015228e-08, "loss": 1.9992, "step": 30431 }, { "epoch": 0.9818535987304173, "grad_norm": 0.33203125, "learning_rate": 2.6066827999434717e-08, "loss": 2.0045, "step": 30432 }, { "epoch": 0.9818858625842137, "grad_norm": 0.33203125, "learning_rate": 2.5974172775168336e-08, "loss": 2.0074, "step": 30433 }, { "epoch": 0.98191812643801, "grad_norm": 0.330078125, "learning_rate": 2.5881682374236938e-08, "loss": 1.9648, "step": 30434 }, { "epoch": 0.9819503902918064, "grad_norm": 0.330078125, "learning_rate": 2.578935679765637e-08, "loss": 2.0114, "step": 30435 }, { "epoch": 0.9819826541456027, "grad_norm": 0.33984375, "learning_rate": 2.5697196046445825e-08, "loss": 1.9864, "step": 30436 }, { "epoch": 0.9820149179993991, "grad_norm": 0.330078125, "learning_rate": 2.5605200121616158e-08, "loss": 2.0019, "step": 30437 }, { "epoch": 0.9820471818531954, "grad_norm": 0.3359375, "learning_rate": 2.551336902418322e-08, "loss": 1.9973, "step": 30438 }, { "epoch": 0.9820794457069918, "grad_norm": 0.33984375, "learning_rate": 2.5421702755156205e-08, "loss": 1.9931, "step": 30439 }, { "epoch": 0.9821117095607881, "grad_norm": 0.34375, "learning_rate": 2.5330201315542646e-08, "loss": 2.0172, "step": 30440 }, { "epoch": 0.9821439734145845, "grad_norm": 0.333984375, "learning_rate": 2.5238864706353392e-08, "loss": 2.0091, "step": 30441 }, { "epoch": 0.9821762372683808, "grad_norm": 0.3359375, "learning_rate": 2.5147692928590983e-08, "loss": 2.017, "step": 30442 }, { "epoch": 0.9822085011221772, "grad_norm": 0.33203125, "learning_rate": 2.5056685983259608e-08, "loss": 1.9619, "step": 30443 }, { "epoch": 0.9822407649759735, "grad_norm": 0.33203125, "learning_rate": 2.496584387136347e-08, "loss": 1.9799, "step": 30444 }, { "epoch": 0.9822730288297699, "grad_norm": 0.3359375, "learning_rate": 2.4875166593898434e-08, "loss": 1.9741, "step": 30445 }, { "epoch": 0.9823052926835663, "grad_norm": 0.330078125, "learning_rate": 2.4784654151863705e-08, "loss": 1.997, "step": 30446 }, { "epoch": 0.9823375565373625, "grad_norm": 0.33203125, "learning_rate": 2.4694306546256816e-08, "loss": 2.003, "step": 30447 }, { "epoch": 0.9823698203911589, "grad_norm": 0.333984375, "learning_rate": 2.4604123778071974e-08, "loss": 1.9815, "step": 30448 }, { "epoch": 0.9824020842449552, "grad_norm": 0.337890625, "learning_rate": 2.451410584830338e-08, "loss": 2.0037, "step": 30449 }, { "epoch": 0.9824343480987516, "grad_norm": 0.33203125, "learning_rate": 2.4424252757938583e-08, "loss": 1.9621, "step": 30450 }, { "epoch": 0.9824666119525479, "grad_norm": 0.328125, "learning_rate": 2.433456450796845e-08, "loss": 1.9736, "step": 30451 }, { "epoch": 0.9824988758063443, "grad_norm": 0.330078125, "learning_rate": 2.424504109937886e-08, "loss": 1.9912, "step": 30452 }, { "epoch": 0.9825311396601406, "grad_norm": 0.34765625, "learning_rate": 2.4155682533157364e-08, "loss": 1.9941, "step": 30453 }, { "epoch": 0.982563403513937, "grad_norm": 0.33984375, "learning_rate": 2.4066488810286503e-08, "loss": 1.9958, "step": 30454 }, { "epoch": 0.9825956673677333, "grad_norm": 0.326171875, "learning_rate": 2.3977459931748823e-08, "loss": 1.9917, "step": 30455 }, { "epoch": 0.9826279312215297, "grad_norm": 0.33203125, "learning_rate": 2.388859589852188e-08, "loss": 1.9874, "step": 30456 }, { "epoch": 0.982660195075326, "grad_norm": 0.326171875, "learning_rate": 2.379989671158489e-08, "loss": 1.988, "step": 30457 }, { "epoch": 0.9826924589291224, "grad_norm": 0.330078125, "learning_rate": 2.3711362371917065e-08, "loss": 1.9986, "step": 30458 }, { "epoch": 0.9827247227829187, "grad_norm": 0.337890625, "learning_rate": 2.3622992880489303e-08, "loss": 1.9441, "step": 30459 }, { "epoch": 0.9827569866367151, "grad_norm": 0.333984375, "learning_rate": 2.3534788238275818e-08, "loss": 1.9968, "step": 30460 }, { "epoch": 0.9827892504905114, "grad_norm": 0.330078125, "learning_rate": 2.34467484462475e-08, "loss": 1.9603, "step": 30461 }, { "epoch": 0.9828215143443078, "grad_norm": 0.341796875, "learning_rate": 2.335887350537358e-08, "loss": 1.9526, "step": 30462 }, { "epoch": 0.982853778198104, "grad_norm": 0.337890625, "learning_rate": 2.3271163416619944e-08, "loss": 1.9797, "step": 30463 }, { "epoch": 0.9828860420519004, "grad_norm": 0.33203125, "learning_rate": 2.3183618180954158e-08, "loss": 1.9732, "step": 30464 }, { "epoch": 0.9829183059056968, "grad_norm": 0.330078125, "learning_rate": 2.3096237799337117e-08, "loss": 1.9623, "step": 30465 }, { "epoch": 0.9829505697594931, "grad_norm": 0.337890625, "learning_rate": 2.3009022272734714e-08, "loss": 1.9668, "step": 30466 }, { "epoch": 0.9829828336132895, "grad_norm": 0.333984375, "learning_rate": 2.2921971602102855e-08, "loss": 1.9849, "step": 30467 }, { "epoch": 0.9830150974670858, "grad_norm": 0.33984375, "learning_rate": 2.2835085788400767e-08, "loss": 1.9823, "step": 30468 }, { "epoch": 0.9830473613208822, "grad_norm": 0.330078125, "learning_rate": 2.2748364832587687e-08, "loss": 1.9839, "step": 30469 }, { "epoch": 0.9830796251746785, "grad_norm": 0.333984375, "learning_rate": 2.266180873561452e-08, "loss": 1.9818, "step": 30470 }, { "epoch": 0.9831118890284749, "grad_norm": 0.345703125, "learning_rate": 2.25754174984355e-08, "loss": 1.9766, "step": 30471 }, { "epoch": 0.9831441528822712, "grad_norm": 0.337890625, "learning_rate": 2.248919112200154e-08, "loss": 2.0084, "step": 30472 }, { "epoch": 0.9831764167360676, "grad_norm": 0.328125, "learning_rate": 2.2403129607261875e-08, "loss": 1.9452, "step": 30473 }, { "epoch": 0.9832086805898639, "grad_norm": 0.341796875, "learning_rate": 2.231723295516408e-08, "loss": 2.0013, "step": 30474 }, { "epoch": 0.9832409444436603, "grad_norm": 0.3359375, "learning_rate": 2.2231501166654066e-08, "loss": 1.9575, "step": 30475 }, { "epoch": 0.9832732082974566, "grad_norm": 0.330078125, "learning_rate": 2.214593424267275e-08, "loss": 1.9655, "step": 30476 }, { "epoch": 0.983305472151253, "grad_norm": 0.34375, "learning_rate": 2.2060532184164373e-08, "loss": 1.972, "step": 30477 }, { "epoch": 0.9833377360050493, "grad_norm": 0.34765625, "learning_rate": 2.197529499206985e-08, "loss": 1.9868, "step": 30478 }, { "epoch": 0.9833699998588457, "grad_norm": 0.337890625, "learning_rate": 2.1890222667325098e-08, "loss": 1.9823, "step": 30479 }, { "epoch": 0.9834022637126419, "grad_norm": 0.330078125, "learning_rate": 2.1805315210867704e-08, "loss": 1.9782, "step": 30480 }, { "epoch": 0.9834345275664383, "grad_norm": 0.330078125, "learning_rate": 2.1720572623631917e-08, "loss": 2.0021, "step": 30481 }, { "epoch": 0.9834667914202346, "grad_norm": 0.3359375, "learning_rate": 2.1635994906550327e-08, "loss": 1.971, "step": 30482 }, { "epoch": 0.983499055274031, "grad_norm": 0.33984375, "learning_rate": 2.155158206055552e-08, "loss": 1.9771, "step": 30483 }, { "epoch": 0.9835313191278273, "grad_norm": 0.326171875, "learning_rate": 2.146733408657342e-08, "loss": 2.0144, "step": 30484 }, { "epoch": 0.9835635829816237, "grad_norm": 0.3359375, "learning_rate": 2.138325098553495e-08, "loss": 2.0073, "step": 30485 }, { "epoch": 0.9835958468354201, "grad_norm": 0.333984375, "learning_rate": 2.1299332758364375e-08, "loss": 1.9931, "step": 30486 }, { "epoch": 0.9836281106892164, "grad_norm": 0.33203125, "learning_rate": 2.121557940598262e-08, "loss": 1.9985, "step": 30487 }, { "epoch": 0.9836603745430128, "grad_norm": 0.330078125, "learning_rate": 2.1131990929315614e-08, "loss": 1.9812, "step": 30488 }, { "epoch": 0.9836926383968091, "grad_norm": 0.34375, "learning_rate": 2.104856732928262e-08, "loss": 1.9931, "step": 30489 }, { "epoch": 0.9837249022506055, "grad_norm": 0.33203125, "learning_rate": 2.0965308606801236e-08, "loss": 1.9661, "step": 30490 }, { "epoch": 0.9837571661044018, "grad_norm": 0.330078125, "learning_rate": 2.0882214762785733e-08, "loss": 1.9869, "step": 30491 }, { "epoch": 0.9837894299581982, "grad_norm": 0.333984375, "learning_rate": 2.0799285798155376e-08, "loss": 1.9691, "step": 30492 }, { "epoch": 0.9838216938119945, "grad_norm": 0.3359375, "learning_rate": 2.0716521713819436e-08, "loss": 1.9687, "step": 30493 }, { "epoch": 0.9838539576657909, "grad_norm": 0.328125, "learning_rate": 2.0633922510688852e-08, "loss": 1.9649, "step": 30494 }, { "epoch": 0.9838862215195872, "grad_norm": 0.33203125, "learning_rate": 2.0551488189674562e-08, "loss": 2.0123, "step": 30495 }, { "epoch": 0.9839184853733836, "grad_norm": 0.3359375, "learning_rate": 2.0469218751684172e-08, "loss": 1.9697, "step": 30496 }, { "epoch": 0.9839507492271798, "grad_norm": 0.33984375, "learning_rate": 2.0387114197621958e-08, "loss": 1.9566, "step": 30497 }, { "epoch": 0.9839830130809762, "grad_norm": 0.33203125, "learning_rate": 2.03051745283922e-08, "loss": 1.9637, "step": 30498 }, { "epoch": 0.9840152769347725, "grad_norm": 0.328125, "learning_rate": 2.0223399744895842e-08, "loss": 1.9625, "step": 30499 }, { "epoch": 0.9840475407885689, "grad_norm": 0.322265625, "learning_rate": 2.014178984803383e-08, "loss": 1.9556, "step": 30500 }, { "epoch": 0.9840798046423652, "grad_norm": 0.333984375, "learning_rate": 2.0060344838705448e-08, "loss": 1.988, "step": 30501 }, { "epoch": 0.9841120684961616, "grad_norm": 0.333984375, "learning_rate": 1.9979064717804972e-08, "loss": 1.9511, "step": 30502 }, { "epoch": 0.9841443323499579, "grad_norm": 0.328125, "learning_rate": 1.9897949486230026e-08, "loss": 1.9519, "step": 30503 }, { "epoch": 0.9841765962037543, "grad_norm": 0.330078125, "learning_rate": 1.9816999144869896e-08, "loss": 2.0148, "step": 30504 }, { "epoch": 0.9842088600575506, "grad_norm": 0.33203125, "learning_rate": 1.9736213694618865e-08, "loss": 1.9623, "step": 30505 }, { "epoch": 0.984241123911347, "grad_norm": 0.330078125, "learning_rate": 1.965559313636456e-08, "loss": 1.9834, "step": 30506 }, { "epoch": 0.9842733877651434, "grad_norm": 0.333984375, "learning_rate": 1.9575137470994598e-08, "loss": 2.0062, "step": 30507 }, { "epoch": 0.9843056516189397, "grad_norm": 0.33203125, "learning_rate": 1.9494846699393276e-08, "loss": 1.9879, "step": 30508 }, { "epoch": 0.9843379154727361, "grad_norm": 0.33203125, "learning_rate": 1.941472082244822e-08, "loss": 2.004, "step": 30509 }, { "epoch": 0.9843701793265324, "grad_norm": 0.333984375, "learning_rate": 1.9334759841037053e-08, "loss": 1.9647, "step": 30510 }, { "epoch": 0.9844024431803288, "grad_norm": 0.337890625, "learning_rate": 1.925496375604241e-08, "loss": 1.9873, "step": 30511 }, { "epoch": 0.9844347070341251, "grad_norm": 0.3359375, "learning_rate": 1.917533256834192e-08, "loss": 1.989, "step": 30512 }, { "epoch": 0.9844669708879215, "grad_norm": 0.333984375, "learning_rate": 1.9095866278811547e-08, "loss": 1.9852, "step": 30513 }, { "epoch": 0.9844992347417177, "grad_norm": 0.3359375, "learning_rate": 1.901656488832726e-08, "loss": 2.0066, "step": 30514 }, { "epoch": 0.9845314985955141, "grad_norm": 0.328125, "learning_rate": 1.8937428397761693e-08, "loss": 2.0012, "step": 30515 }, { "epoch": 0.9845637624493104, "grad_norm": 0.3359375, "learning_rate": 1.8858456807984148e-08, "loss": 1.9908, "step": 30516 }, { "epoch": 0.9845960263031068, "grad_norm": 0.33984375, "learning_rate": 1.8779650119867263e-08, "loss": 2.0011, "step": 30517 }, { "epoch": 0.9846282901569031, "grad_norm": 0.330078125, "learning_rate": 1.8701008334275348e-08, "loss": 1.9747, "step": 30518 }, { "epoch": 0.9846605540106995, "grad_norm": 0.328125, "learning_rate": 1.8622531452074375e-08, "loss": 1.9849, "step": 30519 }, { "epoch": 0.9846928178644958, "grad_norm": 0.330078125, "learning_rate": 1.854421947413032e-08, "loss": 1.9283, "step": 30520 }, { "epoch": 0.9847250817182922, "grad_norm": 0.330078125, "learning_rate": 1.8466072401304157e-08, "loss": 2.0158, "step": 30521 }, { "epoch": 0.9847573455720885, "grad_norm": 0.33203125, "learning_rate": 1.83880902344552e-08, "loss": 2.0062, "step": 30522 }, { "epoch": 0.9847896094258849, "grad_norm": 0.330078125, "learning_rate": 1.83102729744411e-08, "loss": 2.005, "step": 30523 }, { "epoch": 0.9848218732796812, "grad_norm": 0.33203125, "learning_rate": 1.8232620622121165e-08, "loss": 2.0245, "step": 30524 }, { "epoch": 0.9848541371334776, "grad_norm": 0.330078125, "learning_rate": 1.815513317834805e-08, "loss": 1.9971, "step": 30525 }, { "epoch": 0.984886400987274, "grad_norm": 0.333984375, "learning_rate": 1.807781064397607e-08, "loss": 1.993, "step": 30526 }, { "epoch": 0.9849186648410703, "grad_norm": 0.3359375, "learning_rate": 1.800065301985454e-08, "loss": 2.0043, "step": 30527 }, { "epoch": 0.9849509286948667, "grad_norm": 0.35546875, "learning_rate": 1.7923660306834456e-08, "loss": 1.9756, "step": 30528 }, { "epoch": 0.984983192548663, "grad_norm": 0.328125, "learning_rate": 1.7846832505763466e-08, "loss": 2.0091, "step": 30529 }, { "epoch": 0.9850154564024594, "grad_norm": 0.333984375, "learning_rate": 1.7770169617484233e-08, "loss": 1.972, "step": 30530 }, { "epoch": 0.9850477202562556, "grad_norm": 0.326171875, "learning_rate": 1.7693671642844413e-08, "loss": 1.9825, "step": 30531 }, { "epoch": 0.985079984110052, "grad_norm": 0.330078125, "learning_rate": 1.7617338582684996e-08, "loss": 2.0092, "step": 30532 }, { "epoch": 0.9851122479638483, "grad_norm": 0.33203125, "learning_rate": 1.7541170437843645e-08, "loss": 1.9637, "step": 30533 }, { "epoch": 0.9851445118176447, "grad_norm": 0.333984375, "learning_rate": 1.7465167209161358e-08, "loss": 1.9683, "step": 30534 }, { "epoch": 0.985176775671441, "grad_norm": 0.3359375, "learning_rate": 1.7389328897474133e-08, "loss": 1.9793, "step": 30535 }, { "epoch": 0.9852090395252374, "grad_norm": 0.326171875, "learning_rate": 1.7313655503617965e-08, "loss": 1.9654, "step": 30536 }, { "epoch": 0.9852413033790337, "grad_norm": 0.33203125, "learning_rate": 1.723814702842219e-08, "loss": 1.9981, "step": 30537 }, { "epoch": 0.9852735672328301, "grad_norm": 0.333984375, "learning_rate": 1.716280347272281e-08, "loss": 1.9817, "step": 30538 }, { "epoch": 0.9853058310866264, "grad_norm": 0.357421875, "learning_rate": 1.7087624837344164e-08, "loss": 1.9769, "step": 30539 }, { "epoch": 0.9853380949404228, "grad_norm": 0.33203125, "learning_rate": 1.7012611123118916e-08, "loss": 1.9979, "step": 30540 }, { "epoch": 0.9853703587942191, "grad_norm": 0.33984375, "learning_rate": 1.693776233086808e-08, "loss": 1.9715, "step": 30541 }, { "epoch": 0.9854026226480155, "grad_norm": 0.3515625, "learning_rate": 1.686307846141766e-08, "loss": 1.9785, "step": 30542 }, { "epoch": 0.9854348865018118, "grad_norm": 0.32421875, "learning_rate": 1.678855951559033e-08, "loss": 1.986, "step": 30543 }, { "epoch": 0.9854671503556082, "grad_norm": 0.34375, "learning_rate": 1.6714205494205435e-08, "loss": 1.9842, "step": 30544 }, { "epoch": 0.9854994142094045, "grad_norm": 0.33203125, "learning_rate": 1.6640016398082325e-08, "loss": 1.9842, "step": 30545 }, { "epoch": 0.9855316780632009, "grad_norm": 0.33984375, "learning_rate": 1.6565992228035343e-08, "loss": 1.9716, "step": 30546 }, { "epoch": 0.9855639419169973, "grad_norm": 0.345703125, "learning_rate": 1.6492132984882168e-08, "loss": 1.9841, "step": 30547 }, { "epoch": 0.9855962057707935, "grad_norm": 0.333984375, "learning_rate": 1.641843866943382e-08, "loss": 1.9763, "step": 30548 }, { "epoch": 0.9856284696245899, "grad_norm": 0.326171875, "learning_rate": 1.6344909282502984e-08, "loss": 1.9655, "step": 30549 }, { "epoch": 0.9856607334783862, "grad_norm": 0.330078125, "learning_rate": 1.6271544824897345e-08, "loss": 2.0018, "step": 30550 }, { "epoch": 0.9856929973321826, "grad_norm": 0.33203125, "learning_rate": 1.619834529742459e-08, "loss": 1.9807, "step": 30551 }, { "epoch": 0.9857252611859789, "grad_norm": 0.3359375, "learning_rate": 1.6125310700892404e-08, "loss": 2.0197, "step": 30552 }, { "epoch": 0.9857575250397753, "grad_norm": 0.341796875, "learning_rate": 1.6052441036101817e-08, "loss": 1.9863, "step": 30553 }, { "epoch": 0.9857897888935716, "grad_norm": 0.333984375, "learning_rate": 1.597973630385885e-08, "loss": 1.9753, "step": 30554 }, { "epoch": 0.985822052747368, "grad_norm": 0.337890625, "learning_rate": 1.5907196504959533e-08, "loss": 2.0147, "step": 30555 }, { "epoch": 0.9858543166011643, "grad_norm": 0.330078125, "learning_rate": 1.5834821640204887e-08, "loss": 1.9852, "step": 30556 }, { "epoch": 0.9858865804549607, "grad_norm": 0.330078125, "learning_rate": 1.576261171039095e-08, "loss": 1.9887, "step": 30557 }, { "epoch": 0.985918844308757, "grad_norm": 0.326171875, "learning_rate": 1.569056671631208e-08, "loss": 2.0105, "step": 30558 }, { "epoch": 0.9859511081625534, "grad_norm": 0.3359375, "learning_rate": 1.5618686658762648e-08, "loss": 1.9861, "step": 30559 }, { "epoch": 0.9859833720163497, "grad_norm": 0.33203125, "learning_rate": 1.5546971538533682e-08, "loss": 1.9764, "step": 30560 }, { "epoch": 0.9860156358701461, "grad_norm": 0.330078125, "learning_rate": 1.5475421356411225e-08, "loss": 1.9292, "step": 30561 }, { "epoch": 0.9860478997239424, "grad_norm": 0.328125, "learning_rate": 1.540403611318797e-08, "loss": 2.0061, "step": 30562 }, { "epoch": 0.9860801635777388, "grad_norm": 0.330078125, "learning_rate": 1.5332815809646627e-08, "loss": 1.9938, "step": 30563 }, { "epoch": 0.986112427431535, "grad_norm": 0.330078125, "learning_rate": 1.5261760446569906e-08, "loss": 1.9879, "step": 30564 }, { "epoch": 0.9861446912853314, "grad_norm": 0.3359375, "learning_rate": 1.5190870024743843e-08, "loss": 2.0211, "step": 30565 }, { "epoch": 0.9861769551391278, "grad_norm": 0.326171875, "learning_rate": 1.5120144544946146e-08, "loss": 2.0148, "step": 30566 }, { "epoch": 0.9862092189929241, "grad_norm": 0.33203125, "learning_rate": 1.504958400795453e-08, "loss": 1.9991, "step": 30567 }, { "epoch": 0.9862414828467205, "grad_norm": 0.333984375, "learning_rate": 1.4979188414548372e-08, "loss": 1.981, "step": 30568 }, { "epoch": 0.9862737467005168, "grad_norm": 0.3359375, "learning_rate": 1.4908957765500386e-08, "loss": 1.9916, "step": 30569 }, { "epoch": 0.9863060105543132, "grad_norm": 0.33984375, "learning_rate": 1.4838892061584952e-08, "loss": 1.9824, "step": 30570 }, { "epoch": 0.9863382744081095, "grad_norm": 0.333984375, "learning_rate": 1.4768991303571455e-08, "loss": 1.9882, "step": 30571 }, { "epoch": 0.9863705382619059, "grad_norm": 0.33203125, "learning_rate": 1.469925549223261e-08, "loss": 1.9747, "step": 30572 }, { "epoch": 0.9864028021157022, "grad_norm": 0.333984375, "learning_rate": 1.4629684628332807e-08, "loss": 2.0089, "step": 30573 }, { "epoch": 0.9864350659694986, "grad_norm": 0.33203125, "learning_rate": 1.4560278712638098e-08, "loss": 1.9716, "step": 30574 }, { "epoch": 0.9864673298232949, "grad_norm": 0.3359375, "learning_rate": 1.4491037745914537e-08, "loss": 1.9945, "step": 30575 }, { "epoch": 0.9864995936770913, "grad_norm": 0.330078125, "learning_rate": 1.4421961728923183e-08, "loss": 1.9801, "step": 30576 }, { "epoch": 0.9865318575308876, "grad_norm": 0.328125, "learning_rate": 1.4353050662423429e-08, "loss": 1.9751, "step": 30577 }, { "epoch": 0.986564121384684, "grad_norm": 0.3359375, "learning_rate": 1.4284304547174665e-08, "loss": 1.9701, "step": 30578 }, { "epoch": 0.9865963852384803, "grad_norm": 0.333984375, "learning_rate": 1.421572338393462e-08, "loss": 1.9931, "step": 30579 }, { "epoch": 0.9866286490922767, "grad_norm": 0.328125, "learning_rate": 1.4147307173456025e-08, "loss": 2.0022, "step": 30580 }, { "epoch": 0.9866609129460729, "grad_norm": 0.33203125, "learning_rate": 1.4079055916493278e-08, "loss": 1.9925, "step": 30581 }, { "epoch": 0.9866931767998693, "grad_norm": 0.330078125, "learning_rate": 1.4010969613797441e-08, "loss": 1.9871, "step": 30582 }, { "epoch": 0.9867254406536656, "grad_norm": 0.33203125, "learning_rate": 1.394304826611792e-08, "loss": 1.9708, "step": 30583 }, { "epoch": 0.986757704507462, "grad_norm": 0.333984375, "learning_rate": 1.3875291874202444e-08, "loss": 1.9995, "step": 30584 }, { "epoch": 0.9867899683612583, "grad_norm": 0.333984375, "learning_rate": 1.380770043879709e-08, "loss": 1.9785, "step": 30585 }, { "epoch": 0.9868222322150547, "grad_norm": 0.333984375, "learning_rate": 1.374027396064459e-08, "loss": 1.9593, "step": 30586 }, { "epoch": 0.9868544960688511, "grad_norm": 0.333984375, "learning_rate": 1.3673012440489351e-08, "loss": 2.0, "step": 30587 }, { "epoch": 0.9868867599226474, "grad_norm": 0.33984375, "learning_rate": 1.360591587906912e-08, "loss": 1.9657, "step": 30588 }, { "epoch": 0.9869190237764438, "grad_norm": 0.333984375, "learning_rate": 1.3538984277124967e-08, "loss": 2.0201, "step": 30589 }, { "epoch": 0.9869512876302401, "grad_norm": 0.333984375, "learning_rate": 1.3472217635392969e-08, "loss": 1.9873, "step": 30590 }, { "epoch": 0.9869835514840365, "grad_norm": 0.3359375, "learning_rate": 1.3405615954605876e-08, "loss": 1.9619, "step": 30591 }, { "epoch": 0.9870158153378328, "grad_norm": 0.330078125, "learning_rate": 1.33391792354981e-08, "loss": 1.9949, "step": 30592 }, { "epoch": 0.9870480791916292, "grad_norm": 0.3359375, "learning_rate": 1.3272907478802388e-08, "loss": 2.0045, "step": 30593 }, { "epoch": 0.9870803430454255, "grad_norm": 0.33203125, "learning_rate": 1.3206800685248155e-08, "loss": 2.0017, "step": 30594 }, { "epoch": 0.9871126068992219, "grad_norm": 0.33203125, "learning_rate": 1.3140858855559822e-08, "loss": 1.9946, "step": 30595 }, { "epoch": 0.9871448707530182, "grad_norm": 0.32421875, "learning_rate": 1.3075081990466809e-08, "loss": 1.9817, "step": 30596 }, { "epoch": 0.9871771346068146, "grad_norm": 0.3359375, "learning_rate": 1.3009470090690202e-08, "loss": 1.9652, "step": 30597 }, { "epoch": 0.9872093984606108, "grad_norm": 0.337890625, "learning_rate": 1.2944023156956086e-08, "loss": 1.9977, "step": 30598 }, { "epoch": 0.9872416623144072, "grad_norm": 0.33203125, "learning_rate": 1.2878741189980559e-08, "loss": 2.0104, "step": 30599 }, { "epoch": 0.9872739261682035, "grad_norm": 0.328125, "learning_rate": 1.2813624190484707e-08, "loss": 1.9871, "step": 30600 }, { "epoch": 0.9873061900219999, "grad_norm": 0.330078125, "learning_rate": 1.2748672159184627e-08, "loss": 1.9901, "step": 30601 }, { "epoch": 0.9873384538757962, "grad_norm": 0.333984375, "learning_rate": 1.2683885096794746e-08, "loss": 2.0212, "step": 30602 }, { "epoch": 0.9873707177295926, "grad_norm": 0.326171875, "learning_rate": 1.2619263004029491e-08, "loss": 1.9745, "step": 30603 }, { "epoch": 0.9874029815833889, "grad_norm": 0.330078125, "learning_rate": 1.2554805881599963e-08, "loss": 1.9901, "step": 30604 }, { "epoch": 0.9874352454371853, "grad_norm": 0.333984375, "learning_rate": 1.2490513730215592e-08, "loss": 1.9822, "step": 30605 }, { "epoch": 0.9874675092909816, "grad_norm": 0.33203125, "learning_rate": 1.242638655058248e-08, "loss": 1.9868, "step": 30606 }, { "epoch": 0.987499773144778, "grad_norm": 0.326171875, "learning_rate": 1.2362424343408397e-08, "loss": 1.9815, "step": 30607 }, { "epoch": 0.9875320369985744, "grad_norm": 0.32421875, "learning_rate": 1.229862710939611e-08, "loss": 2.0195, "step": 30608 }, { "epoch": 0.9875643008523707, "grad_norm": 0.330078125, "learning_rate": 1.2234994849248393e-08, "loss": 2.009, "step": 30609 }, { "epoch": 0.9875965647061671, "grad_norm": 0.333984375, "learning_rate": 1.2171527563664687e-08, "loss": 2.0212, "step": 30610 }, { "epoch": 0.9876288285599634, "grad_norm": 0.33203125, "learning_rate": 1.2108225253346094e-08, "loss": 1.9524, "step": 30611 }, { "epoch": 0.9876610924137598, "grad_norm": 0.33203125, "learning_rate": 1.2045087918987064e-08, "loss": 2.0084, "step": 30612 }, { "epoch": 0.987693356267556, "grad_norm": 0.33984375, "learning_rate": 1.1982115561285368e-08, "loss": 1.9904, "step": 30613 }, { "epoch": 0.9877256201213525, "grad_norm": 0.33203125, "learning_rate": 1.1919308180930454e-08, "loss": 1.9726, "step": 30614 }, { "epoch": 0.9877578839751487, "grad_norm": 0.330078125, "learning_rate": 1.1856665778615106e-08, "loss": 1.9957, "step": 30615 }, { "epoch": 0.9877901478289451, "grad_norm": 0.337890625, "learning_rate": 1.1794188355028768e-08, "loss": 1.9951, "step": 30616 }, { "epoch": 0.9878224116827414, "grad_norm": 0.330078125, "learning_rate": 1.173187591086089e-08, "loss": 1.973, "step": 30617 }, { "epoch": 0.9878546755365378, "grad_norm": 0.3359375, "learning_rate": 1.1669728446795924e-08, "loss": 1.9868, "step": 30618 }, { "epoch": 0.9878869393903341, "grad_norm": 0.32421875, "learning_rate": 1.1607745963516659e-08, "loss": 1.985, "step": 30619 }, { "epoch": 0.9879192032441305, "grad_norm": 0.32421875, "learning_rate": 1.1545928461707545e-08, "loss": 1.9606, "step": 30620 }, { "epoch": 0.9879514670979268, "grad_norm": 0.341796875, "learning_rate": 1.148427594204804e-08, "loss": 2.0006, "step": 30621 }, { "epoch": 0.9879837309517232, "grad_norm": 0.330078125, "learning_rate": 1.1422788405217598e-08, "loss": 1.9975, "step": 30622 }, { "epoch": 0.9880159948055195, "grad_norm": 0.345703125, "learning_rate": 1.136146585189235e-08, "loss": 1.9974, "step": 30623 }, { "epoch": 0.9880482586593159, "grad_norm": 0.3515625, "learning_rate": 1.1300308282746752e-08, "loss": 1.9706, "step": 30624 }, { "epoch": 0.9880805225131122, "grad_norm": 0.333984375, "learning_rate": 1.1239315698453601e-08, "loss": 1.9986, "step": 30625 }, { "epoch": 0.9881127863669086, "grad_norm": 0.3359375, "learning_rate": 1.1178488099687356e-08, "loss": 1.9879, "step": 30626 }, { "epoch": 0.988145050220705, "grad_norm": 0.33984375, "learning_rate": 1.1117825487114152e-08, "loss": 1.9744, "step": 30627 }, { "epoch": 0.9881773140745013, "grad_norm": 0.3359375, "learning_rate": 1.1057327861401789e-08, "loss": 1.97, "step": 30628 }, { "epoch": 0.9882095779282977, "grad_norm": 0.333984375, "learning_rate": 1.0996995223218064e-08, "loss": 1.9967, "step": 30629 }, { "epoch": 0.988241841782094, "grad_norm": 0.33203125, "learning_rate": 1.0936827573227448e-08, "loss": 1.988, "step": 30630 }, { "epoch": 0.9882741056358904, "grad_norm": 0.33984375, "learning_rate": 1.0876824912091077e-08, "loss": 2.0037, "step": 30631 }, { "epoch": 0.9883063694896866, "grad_norm": 0.33203125, "learning_rate": 1.0816987240468423e-08, "loss": 2.002, "step": 30632 }, { "epoch": 0.988338633343483, "grad_norm": 0.328125, "learning_rate": 1.075731455901896e-08, "loss": 1.9803, "step": 30633 }, { "epoch": 0.9883708971972793, "grad_norm": 0.33203125, "learning_rate": 1.0697806868400495e-08, "loss": 1.9953, "step": 30634 }, { "epoch": 0.9884031610510757, "grad_norm": 0.330078125, "learning_rate": 1.0638464169267504e-08, "loss": 1.9911, "step": 30635 }, { "epoch": 0.988435424904872, "grad_norm": 0.3359375, "learning_rate": 1.0579286462272797e-08, "loss": 1.9738, "step": 30636 }, { "epoch": 0.9884676887586684, "grad_norm": 0.33203125, "learning_rate": 1.0520273748067522e-08, "loss": 1.9975, "step": 30637 }, { "epoch": 0.9884999526124647, "grad_norm": 0.330078125, "learning_rate": 1.0461426027301158e-08, "loss": 1.9661, "step": 30638 }, { "epoch": 0.9885322164662611, "grad_norm": 0.326171875, "learning_rate": 1.0402743300623186e-08, "loss": 1.9957, "step": 30639 }, { "epoch": 0.9885644803200574, "grad_norm": 0.3359375, "learning_rate": 1.0344225568678089e-08, "loss": 1.9931, "step": 30640 }, { "epoch": 0.9885967441738538, "grad_norm": 0.333984375, "learning_rate": 1.0285872832110355e-08, "loss": 2.0099, "step": 30641 }, { "epoch": 0.9886290080276501, "grad_norm": 0.328125, "learning_rate": 1.0227685091561134e-08, "loss": 2.0047, "step": 30642 }, { "epoch": 0.9886612718814465, "grad_norm": 0.33984375, "learning_rate": 1.0169662347673247e-08, "loss": 1.9969, "step": 30643 }, { "epoch": 0.9886935357352428, "grad_norm": 0.3359375, "learning_rate": 1.0111804601082853e-08, "loss": 1.9799, "step": 30644 }, { "epoch": 0.9887257995890392, "grad_norm": 0.341796875, "learning_rate": 1.0054111852429438e-08, "loss": 1.9853, "step": 30645 }, { "epoch": 0.9887580634428355, "grad_norm": 0.3359375, "learning_rate": 9.99658410234583e-09, "loss": 1.9946, "step": 30646 }, { "epoch": 0.9887903272966319, "grad_norm": 0.328125, "learning_rate": 9.939221351464855e-09, "loss": 1.9951, "step": 30647 }, { "epoch": 0.9888225911504283, "grad_norm": 0.333984375, "learning_rate": 9.882023600421008e-09, "loss": 1.9675, "step": 30648 }, { "epoch": 0.9888548550042245, "grad_norm": 0.33203125, "learning_rate": 9.824990849840453e-09, "loss": 1.9848, "step": 30649 }, { "epoch": 0.9888871188580209, "grad_norm": 0.328125, "learning_rate": 9.768123100352688e-09, "loss": 1.9998, "step": 30650 }, { "epoch": 0.9889193827118172, "grad_norm": 0.326171875, "learning_rate": 9.711420352583878e-09, "loss": 1.9925, "step": 30651 }, { "epoch": 0.9889516465656136, "grad_norm": 0.333984375, "learning_rate": 9.654882607158522e-09, "loss": 1.9987, "step": 30652 }, { "epoch": 0.9889839104194099, "grad_norm": 0.333984375, "learning_rate": 9.59850986469779e-09, "loss": 2.0007, "step": 30653 }, { "epoch": 0.9890161742732063, "grad_norm": 0.328125, "learning_rate": 9.542302125822856e-09, "loss": 1.9714, "step": 30654 }, { "epoch": 0.9890484381270026, "grad_norm": 0.330078125, "learning_rate": 9.486259391151553e-09, "loss": 1.9836, "step": 30655 }, { "epoch": 0.989080701980799, "grad_norm": 0.330078125, "learning_rate": 9.430381661301723e-09, "loss": 2.0102, "step": 30656 }, { "epoch": 0.9891129658345953, "grad_norm": 0.337890625, "learning_rate": 9.374668936887876e-09, "loss": 1.9629, "step": 30657 }, { "epoch": 0.9891452296883917, "grad_norm": 0.361328125, "learning_rate": 9.319121218522853e-09, "loss": 1.9758, "step": 30658 }, { "epoch": 0.989177493542188, "grad_norm": 0.328125, "learning_rate": 9.263738506819497e-09, "loss": 1.9863, "step": 30659 }, { "epoch": 0.9892097573959844, "grad_norm": 0.328125, "learning_rate": 9.208520802385656e-09, "loss": 1.9616, "step": 30660 }, { "epoch": 0.9892420212497807, "grad_norm": 0.337890625, "learning_rate": 9.153468105830842e-09, "loss": 1.9895, "step": 30661 }, { "epoch": 0.9892742851035771, "grad_norm": 0.330078125, "learning_rate": 9.098580417759572e-09, "loss": 1.9758, "step": 30662 }, { "epoch": 0.9893065489573734, "grad_norm": 0.330078125, "learning_rate": 9.043857738778028e-09, "loss": 1.9949, "step": 30663 }, { "epoch": 0.9893388128111698, "grad_norm": 0.33984375, "learning_rate": 8.989300069485729e-09, "loss": 1.9939, "step": 30664 }, { "epoch": 0.989371076664966, "grad_norm": 0.330078125, "learning_rate": 8.934907410485527e-09, "loss": 1.9973, "step": 30665 }, { "epoch": 0.9894033405187624, "grad_norm": 0.33984375, "learning_rate": 8.880679762375277e-09, "loss": 1.9913, "step": 30666 }, { "epoch": 0.9894356043725588, "grad_norm": 0.337890625, "learning_rate": 8.826617125752835e-09, "loss": 1.9963, "step": 30667 }, { "epoch": 0.9894678682263551, "grad_norm": 0.333984375, "learning_rate": 8.772719501211057e-09, "loss": 2.0024, "step": 30668 }, { "epoch": 0.9895001320801515, "grad_norm": 0.33203125, "learning_rate": 8.718986889344472e-09, "loss": 1.989, "step": 30669 }, { "epoch": 0.9895323959339478, "grad_norm": 0.333984375, "learning_rate": 8.665419290745935e-09, "loss": 2.0016, "step": 30670 }, { "epoch": 0.9895646597877442, "grad_norm": 0.3359375, "learning_rate": 8.612016706001647e-09, "loss": 1.9415, "step": 30671 }, { "epoch": 0.9895969236415405, "grad_norm": 0.33203125, "learning_rate": 8.5587791357028e-09, "loss": 1.9503, "step": 30672 }, { "epoch": 0.9896291874953369, "grad_norm": 0.33984375, "learning_rate": 8.505706580433925e-09, "loss": 1.9736, "step": 30673 }, { "epoch": 0.9896614513491332, "grad_norm": 0.333984375, "learning_rate": 8.452799040779558e-09, "loss": 1.9839, "step": 30674 }, { "epoch": 0.9896937152029296, "grad_norm": 0.333984375, "learning_rate": 8.400056517322562e-09, "loss": 1.9343, "step": 30675 }, { "epoch": 0.9897259790567259, "grad_norm": 0.3359375, "learning_rate": 8.347479010640812e-09, "loss": 1.9799, "step": 30676 }, { "epoch": 0.9897582429105223, "grad_norm": 0.330078125, "learning_rate": 8.295066521317174e-09, "loss": 1.9856, "step": 30677 }, { "epoch": 0.9897905067643186, "grad_norm": 0.34765625, "learning_rate": 8.242819049924522e-09, "loss": 1.9916, "step": 30678 }, { "epoch": 0.989822770618115, "grad_norm": 0.33203125, "learning_rate": 8.190736597040727e-09, "loss": 1.996, "step": 30679 }, { "epoch": 0.9898550344719113, "grad_norm": 0.33203125, "learning_rate": 8.138819163238664e-09, "loss": 1.9624, "step": 30680 }, { "epoch": 0.9898872983257077, "grad_norm": 0.3359375, "learning_rate": 8.087066749089544e-09, "loss": 2.0003, "step": 30681 }, { "epoch": 0.9899195621795039, "grad_norm": 0.333984375, "learning_rate": 8.035479355161247e-09, "loss": 1.9875, "step": 30682 }, { "epoch": 0.9899518260333003, "grad_norm": 0.341796875, "learning_rate": 7.984056982024979e-09, "loss": 1.9632, "step": 30683 }, { "epoch": 0.9899840898870966, "grad_norm": 0.330078125, "learning_rate": 7.932799630243625e-09, "loss": 1.9471, "step": 30684 }, { "epoch": 0.990016353740893, "grad_norm": 0.330078125, "learning_rate": 7.8817073003834e-09, "loss": 1.9672, "step": 30685 }, { "epoch": 0.9900486175946893, "grad_norm": 0.3359375, "learning_rate": 7.830779993007187e-09, "loss": 1.9929, "step": 30686 }, { "epoch": 0.9900808814484857, "grad_norm": 0.345703125, "learning_rate": 7.780017708672871e-09, "loss": 1.9882, "step": 30687 }, { "epoch": 0.9901131453022821, "grad_norm": 0.33203125, "learning_rate": 7.72942044794167e-09, "loss": 1.9405, "step": 30688 }, { "epoch": 0.9901454091560784, "grad_norm": 0.333984375, "learning_rate": 7.678988211368143e-09, "loss": 2.0105, "step": 30689 }, { "epoch": 0.9901776730098748, "grad_norm": 0.33203125, "learning_rate": 7.628720999510175e-09, "loss": 2.0173, "step": 30690 }, { "epoch": 0.9902099368636711, "grad_norm": 0.326171875, "learning_rate": 7.578618812918992e-09, "loss": 1.955, "step": 30691 }, { "epoch": 0.9902422007174675, "grad_norm": 0.328125, "learning_rate": 7.528681652147484e-09, "loss": 1.9781, "step": 30692 }, { "epoch": 0.9902744645712638, "grad_norm": 0.33984375, "learning_rate": 7.478909517745214e-09, "loss": 2.0141, "step": 30693 }, { "epoch": 0.9903067284250602, "grad_norm": 0.33203125, "learning_rate": 7.42930241025841e-09, "loss": 1.9909, "step": 30694 }, { "epoch": 0.9903389922788565, "grad_norm": 0.333984375, "learning_rate": 7.379860330234967e-09, "loss": 2.0103, "step": 30695 }, { "epoch": 0.9903712561326529, "grad_norm": 0.3359375, "learning_rate": 7.330583278217784e-09, "loss": 1.9897, "step": 30696 }, { "epoch": 0.9904035199864492, "grad_norm": 0.333984375, "learning_rate": 7.281471254749761e-09, "loss": 2.0007, "step": 30697 }, { "epoch": 0.9904357838402456, "grad_norm": 0.33203125, "learning_rate": 7.232524260372131e-09, "loss": 1.9987, "step": 30698 }, { "epoch": 0.9904680476940418, "grad_norm": 0.328125, "learning_rate": 7.183742295622797e-09, "loss": 1.9813, "step": 30699 }, { "epoch": 0.9905003115478382, "grad_norm": 0.326171875, "learning_rate": 7.135125361039663e-09, "loss": 2.0075, "step": 30700 }, { "epoch": 0.9905325754016345, "grad_norm": 0.328125, "learning_rate": 7.086673457155634e-09, "loss": 2.0128, "step": 30701 }, { "epoch": 0.9905648392554309, "grad_norm": 0.330078125, "learning_rate": 7.038386584506951e-09, "loss": 1.9917, "step": 30702 }, { "epoch": 0.9905971031092272, "grad_norm": 0.337890625, "learning_rate": 6.990264743623187e-09, "loss": 2.0018, "step": 30703 }, { "epoch": 0.9906293669630236, "grad_norm": 0.322265625, "learning_rate": 6.9423079350355855e-09, "loss": 1.9871, "step": 30704 }, { "epoch": 0.9906616308168199, "grad_norm": 0.33984375, "learning_rate": 6.894516159270392e-09, "loss": 1.9915, "step": 30705 }, { "epoch": 0.9906938946706163, "grad_norm": 0.333984375, "learning_rate": 6.846889416853852e-09, "loss": 1.9863, "step": 30706 }, { "epoch": 0.9907261585244126, "grad_norm": 0.333984375, "learning_rate": 6.7994277083122115e-09, "loss": 1.9418, "step": 30707 }, { "epoch": 0.990758422378209, "grad_norm": 0.3359375, "learning_rate": 6.752131034165054e-09, "loss": 1.9495, "step": 30708 }, { "epoch": 0.9907906862320054, "grad_norm": 0.337890625, "learning_rate": 6.7049993949336305e-09, "loss": 1.9976, "step": 30709 }, { "epoch": 0.9908229500858017, "grad_norm": 0.333984375, "learning_rate": 6.6580327911391905e-09, "loss": 2.0093, "step": 30710 }, { "epoch": 0.9908552139395981, "grad_norm": 0.333984375, "learning_rate": 6.6112312232963215e-09, "loss": 1.9848, "step": 30711 }, { "epoch": 0.9908874777933944, "grad_norm": 0.33984375, "learning_rate": 6.564594691921277e-09, "loss": 1.978, "step": 30712 }, { "epoch": 0.9909197416471908, "grad_norm": 0.34375, "learning_rate": 6.518123197525317e-09, "loss": 1.9772, "step": 30713 }, { "epoch": 0.990952005500987, "grad_norm": 0.33203125, "learning_rate": 6.471816740623026e-09, "loss": 1.989, "step": 30714 }, { "epoch": 0.9909842693547835, "grad_norm": 0.33203125, "learning_rate": 6.425675321722335e-09, "loss": 2.007, "step": 30715 }, { "epoch": 0.9910165332085797, "grad_norm": 0.333984375, "learning_rate": 6.379698941331169e-09, "loss": 2.0096, "step": 30716 }, { "epoch": 0.9910487970623761, "grad_norm": 0.3359375, "learning_rate": 6.333887599957455e-09, "loss": 2.0156, "step": 30717 }, { "epoch": 0.9910810609161724, "grad_norm": 0.326171875, "learning_rate": 6.28824129810246e-09, "loss": 1.98, "step": 30718 }, { "epoch": 0.9911133247699688, "grad_norm": 0.330078125, "learning_rate": 6.2427600362691125e-09, "loss": 1.9713, "step": 30719 }, { "epoch": 0.9911455886237651, "grad_norm": 0.328125, "learning_rate": 6.197443814960346e-09, "loss": 1.9476, "step": 30720 }, { "epoch": 0.9911778524775615, "grad_norm": 0.337890625, "learning_rate": 6.1522926346740945e-09, "loss": 1.9765, "step": 30721 }, { "epoch": 0.9912101163313578, "grad_norm": 0.337890625, "learning_rate": 6.107306495904963e-09, "loss": 1.9706, "step": 30722 }, { "epoch": 0.9912423801851542, "grad_norm": 0.337890625, "learning_rate": 6.062485399152551e-09, "loss": 2.006, "step": 30723 }, { "epoch": 0.9912746440389505, "grad_norm": 0.33203125, "learning_rate": 6.017829344906467e-09, "loss": 1.9864, "step": 30724 }, { "epoch": 0.9913069078927469, "grad_norm": 0.3359375, "learning_rate": 5.973338333657985e-09, "loss": 1.9436, "step": 30725 }, { "epoch": 0.9913391717465432, "grad_norm": 0.337890625, "learning_rate": 5.929012365900044e-09, "loss": 1.9897, "step": 30726 }, { "epoch": 0.9913714356003396, "grad_norm": 0.33203125, "learning_rate": 5.884851442118922e-09, "loss": 2.0177, "step": 30727 }, { "epoch": 0.991403699454136, "grad_norm": 0.333984375, "learning_rate": 5.840855562800895e-09, "loss": 1.9979, "step": 30728 }, { "epoch": 0.9914359633079323, "grad_norm": 0.337890625, "learning_rate": 5.797024728428912e-09, "loss": 1.972, "step": 30729 }, { "epoch": 0.9914682271617287, "grad_norm": 0.333984375, "learning_rate": 5.7533589394859196e-09, "loss": 1.9671, "step": 30730 }, { "epoch": 0.991500491015525, "grad_norm": 0.3359375, "learning_rate": 5.709858196454865e-09, "loss": 1.9452, "step": 30731 }, { "epoch": 0.9915327548693214, "grad_norm": 0.330078125, "learning_rate": 5.666522499812033e-09, "loss": 1.9823, "step": 30732 }, { "epoch": 0.9915650187231176, "grad_norm": 0.32421875, "learning_rate": 5.623351850035374e-09, "loss": 1.9948, "step": 30733 }, { "epoch": 0.991597282576914, "grad_norm": 0.337890625, "learning_rate": 5.580346247599511e-09, "loss": 1.9914, "step": 30734 }, { "epoch": 0.9916295464307103, "grad_norm": 0.328125, "learning_rate": 5.537505692977396e-09, "loss": 1.9848, "step": 30735 }, { "epoch": 0.9916618102845067, "grad_norm": 0.33203125, "learning_rate": 5.494830186643651e-09, "loss": 1.9375, "step": 30736 }, { "epoch": 0.991694074138303, "grad_norm": 0.330078125, "learning_rate": 5.45231972906457e-09, "loss": 1.9845, "step": 30737 }, { "epoch": 0.9917263379920994, "grad_norm": 0.33203125, "learning_rate": 5.409974320709776e-09, "loss": 1.9791, "step": 30738 }, { "epoch": 0.9917586018458957, "grad_norm": 0.337890625, "learning_rate": 5.367793962043899e-09, "loss": 1.9713, "step": 30739 }, { "epoch": 0.9917908656996921, "grad_norm": 0.328125, "learning_rate": 5.325778653533231e-09, "loss": 1.9824, "step": 30740 }, { "epoch": 0.9918231295534884, "grad_norm": 0.330078125, "learning_rate": 5.283928395639071e-09, "loss": 2.0066, "step": 30741 }, { "epoch": 0.9918553934072848, "grad_norm": 0.33203125, "learning_rate": 5.242243188822715e-09, "loss": 2.0007, "step": 30742 }, { "epoch": 0.9918876572610811, "grad_norm": 0.330078125, "learning_rate": 5.2007230335404665e-09, "loss": 2.0243, "step": 30743 }, { "epoch": 0.9919199211148775, "grad_norm": 0.33203125, "learning_rate": 5.159367930253622e-09, "loss": 2.0224, "step": 30744 }, { "epoch": 0.9919521849686738, "grad_norm": 0.326171875, "learning_rate": 5.118177879415153e-09, "loss": 2.0078, "step": 30745 }, { "epoch": 0.9919844488224702, "grad_norm": 0.330078125, "learning_rate": 5.077152881478031e-09, "loss": 1.9889, "step": 30746 }, { "epoch": 0.9920167126762665, "grad_norm": 0.337890625, "learning_rate": 5.036292936893561e-09, "loss": 1.9859, "step": 30747 }, { "epoch": 0.9920489765300629, "grad_norm": 0.333984375, "learning_rate": 4.995598046113048e-09, "loss": 2.0207, "step": 30748 }, { "epoch": 0.9920812403838593, "grad_norm": 0.333984375, "learning_rate": 4.955068209582803e-09, "loss": 2.009, "step": 30749 }, { "epoch": 0.9921135042376555, "grad_norm": 0.330078125, "learning_rate": 4.9147034277508e-09, "loss": 1.9979, "step": 30750 }, { "epoch": 0.9921457680914519, "grad_norm": 0.32421875, "learning_rate": 4.874503701060018e-09, "loss": 2.0166, "step": 30751 }, { "epoch": 0.9921780319452482, "grad_norm": 0.3359375, "learning_rate": 4.834469029953437e-09, "loss": 1.9719, "step": 30752 }, { "epoch": 0.9922102957990446, "grad_norm": 0.3359375, "learning_rate": 4.794599414870704e-09, "loss": 1.9658, "step": 30753 }, { "epoch": 0.9922425596528409, "grad_norm": 0.3359375, "learning_rate": 4.7548948562514686e-09, "loss": 1.9618, "step": 30754 }, { "epoch": 0.9922748235066373, "grad_norm": 0.33984375, "learning_rate": 4.715355354533713e-09, "loss": 1.9973, "step": 30755 }, { "epoch": 0.9923070873604336, "grad_norm": 0.333984375, "learning_rate": 4.675980910152089e-09, "loss": 2.001, "step": 30756 }, { "epoch": 0.99233935121423, "grad_norm": 0.333984375, "learning_rate": 4.636771523537919e-09, "loss": 2.0121, "step": 30757 }, { "epoch": 0.9923716150680263, "grad_norm": 0.33203125, "learning_rate": 4.5977271951258555e-09, "loss": 1.9838, "step": 30758 }, { "epoch": 0.9924038789218227, "grad_norm": 0.322265625, "learning_rate": 4.55884792534389e-09, "loss": 2.0076, "step": 30759 }, { "epoch": 0.992436142775619, "grad_norm": 0.33203125, "learning_rate": 4.520133714620012e-09, "loss": 1.9814, "step": 30760 }, { "epoch": 0.9924684066294154, "grad_norm": 0.326171875, "learning_rate": 4.481584563382213e-09, "loss": 2.0007, "step": 30761 }, { "epoch": 0.9925006704832117, "grad_norm": 0.3359375, "learning_rate": 4.443200472051823e-09, "loss": 1.995, "step": 30762 }, { "epoch": 0.9925329343370081, "grad_norm": 0.33203125, "learning_rate": 4.404981441053502e-09, "loss": 1.953, "step": 30763 }, { "epoch": 0.9925651981908044, "grad_norm": 0.337890625, "learning_rate": 4.366927470808579e-09, "loss": 2.0025, "step": 30764 }, { "epoch": 0.9925974620446008, "grad_norm": 0.341796875, "learning_rate": 4.329038561733389e-09, "loss": 1.9588, "step": 30765 }, { "epoch": 0.992629725898397, "grad_norm": 0.33984375, "learning_rate": 4.291314714245931e-09, "loss": 1.9375, "step": 30766 }, { "epoch": 0.9926619897521934, "grad_norm": 0.33203125, "learning_rate": 4.253755928762537e-09, "loss": 1.9933, "step": 30767 }, { "epoch": 0.9926942536059897, "grad_norm": 0.33203125, "learning_rate": 4.216362205696211e-09, "loss": 1.9677, "step": 30768 }, { "epoch": 0.9927265174597861, "grad_norm": 0.333984375, "learning_rate": 4.179133545458291e-09, "loss": 1.9682, "step": 30769 }, { "epoch": 0.9927587813135825, "grad_norm": 0.337890625, "learning_rate": 4.142069948456784e-09, "loss": 2.0173, "step": 30770 }, { "epoch": 0.9927910451673788, "grad_norm": 0.333984375, "learning_rate": 4.105171415103026e-09, "loss": 1.9902, "step": 30771 }, { "epoch": 0.9928233090211752, "grad_norm": 0.333984375, "learning_rate": 4.0684379458016955e-09, "loss": 1.9898, "step": 30772 }, { "epoch": 0.9928555728749715, "grad_norm": 0.32421875, "learning_rate": 4.0318695409558015e-09, "loss": 2.0269, "step": 30773 }, { "epoch": 0.9928878367287679, "grad_norm": 0.333984375, "learning_rate": 3.9954662009700216e-09, "loss": 1.9899, "step": 30774 }, { "epoch": 0.9929201005825642, "grad_norm": 0.3359375, "learning_rate": 3.959227926244035e-09, "loss": 1.9563, "step": 30775 }, { "epoch": 0.9929523644363606, "grad_norm": 0.330078125, "learning_rate": 3.923154717175859e-09, "loss": 1.9856, "step": 30776 }, { "epoch": 0.9929846282901569, "grad_norm": 0.333984375, "learning_rate": 3.887246574163505e-09, "loss": 1.9556, "step": 30777 }, { "epoch": 0.9930168921439533, "grad_norm": 0.3203125, "learning_rate": 3.851503497603326e-09, "loss": 1.9214, "step": 30778 }, { "epoch": 0.9930491559977496, "grad_norm": 0.328125, "learning_rate": 3.815925487886673e-09, "loss": 2.0027, "step": 30779 }, { "epoch": 0.993081419851546, "grad_norm": 0.33203125, "learning_rate": 3.780512545406567e-09, "loss": 2.0122, "step": 30780 }, { "epoch": 0.9931136837053423, "grad_norm": 0.3359375, "learning_rate": 3.745264670552695e-09, "loss": 1.9827, "step": 30781 }, { "epoch": 0.9931459475591387, "grad_norm": 0.34375, "learning_rate": 3.7101818637114148e-09, "loss": 2.0074, "step": 30782 }, { "epoch": 0.9931782114129349, "grad_norm": 0.3359375, "learning_rate": 3.67526412527075e-09, "loss": 1.96, "step": 30783 }, { "epoch": 0.9932104752667313, "grad_norm": 0.337890625, "learning_rate": 3.6405114556153916e-09, "loss": 1.9665, "step": 30784 }, { "epoch": 0.9932427391205276, "grad_norm": 0.337890625, "learning_rate": 3.6059238551250372e-09, "loss": 1.9771, "step": 30785 }, { "epoch": 0.993275002974324, "grad_norm": 0.330078125, "learning_rate": 3.5715013241843788e-09, "loss": 2.0009, "step": 30786 }, { "epoch": 0.9933072668281203, "grad_norm": 0.33203125, "learning_rate": 3.5372438631681157e-09, "loss": 1.9599, "step": 30787 }, { "epoch": 0.9933395306819167, "grad_norm": 0.3359375, "learning_rate": 3.5031514724576107e-09, "loss": 1.9511, "step": 30788 }, { "epoch": 0.9933717945357131, "grad_norm": 0.337890625, "learning_rate": 3.469224152424233e-09, "loss": 1.9882, "step": 30789 }, { "epoch": 0.9934040583895094, "grad_norm": 0.337890625, "learning_rate": 3.435461903444348e-09, "loss": 1.9612, "step": 30790 }, { "epoch": 0.9934363222433058, "grad_norm": 0.328125, "learning_rate": 3.4018647258876603e-09, "loss": 1.9805, "step": 30791 }, { "epoch": 0.9934685860971021, "grad_norm": 0.33984375, "learning_rate": 3.368432620125539e-09, "loss": 2.0115, "step": 30792 }, { "epoch": 0.9935008499508985, "grad_norm": 0.333984375, "learning_rate": 3.335165586524358e-09, "loss": 1.977, "step": 30793 }, { "epoch": 0.9935331138046948, "grad_norm": 0.337890625, "learning_rate": 3.3020636254521563e-09, "loss": 1.9649, "step": 30794 }, { "epoch": 0.9935653776584912, "grad_norm": 0.341796875, "learning_rate": 3.2691267372703116e-09, "loss": 2.0062, "step": 30795 }, { "epoch": 0.9935976415122875, "grad_norm": 0.34375, "learning_rate": 3.236354922345197e-09, "loss": 2.0075, "step": 30796 }, { "epoch": 0.9936299053660839, "grad_norm": 0.34375, "learning_rate": 3.20374818103486e-09, "loss": 2.003, "step": 30797 }, { "epoch": 0.9936621692198802, "grad_norm": 0.32421875, "learning_rate": 3.1713065136990126e-09, "loss": 1.9821, "step": 30798 }, { "epoch": 0.9936944330736766, "grad_norm": 0.337890625, "learning_rate": 3.139029920695702e-09, "loss": 1.9617, "step": 30799 }, { "epoch": 0.9937266969274728, "grad_norm": 0.345703125, "learning_rate": 3.1069184023779786e-09, "loss": 1.9525, "step": 30800 }, { "epoch": 0.9937589607812692, "grad_norm": 0.333984375, "learning_rate": 3.0749719591005588e-09, "loss": 1.9948, "step": 30801 }, { "epoch": 0.9937912246350655, "grad_norm": 0.333984375, "learning_rate": 3.0431905912148285e-09, "loss": 2.0041, "step": 30802 }, { "epoch": 0.9938234884888619, "grad_norm": 0.33203125, "learning_rate": 3.011574299072173e-09, "loss": 1.98, "step": 30803 }, { "epoch": 0.9938557523426582, "grad_norm": 0.330078125, "learning_rate": 2.9801230830189817e-09, "loss": 1.9715, "step": 30804 }, { "epoch": 0.9938880161964546, "grad_norm": 0.33203125, "learning_rate": 2.948836943401645e-09, "loss": 2.0067, "step": 30805 }, { "epoch": 0.9939202800502509, "grad_norm": 0.32421875, "learning_rate": 2.917715880564886e-09, "loss": 2.0076, "step": 30806 }, { "epoch": 0.9939525439040473, "grad_norm": 0.326171875, "learning_rate": 2.8867598948500997e-09, "loss": 1.9802, "step": 30807 }, { "epoch": 0.9939848077578436, "grad_norm": 0.33984375, "learning_rate": 2.8559689866003434e-09, "loss": 2.0016, "step": 30808 }, { "epoch": 0.99401707161164, "grad_norm": 0.333984375, "learning_rate": 2.825343156152016e-09, "loss": 2.0078, "step": 30809 }, { "epoch": 0.9940493354654364, "grad_norm": 0.330078125, "learning_rate": 2.7948824038431797e-09, "loss": 1.9628, "step": 30810 }, { "epoch": 0.9940815993192327, "grad_norm": 0.32421875, "learning_rate": 2.764586730010232e-09, "loss": 1.9848, "step": 30811 }, { "epoch": 0.9941138631730291, "grad_norm": 0.330078125, "learning_rate": 2.7344561349845755e-09, "loss": 1.9942, "step": 30812 }, { "epoch": 0.9941461270268254, "grad_norm": 0.33203125, "learning_rate": 2.704490619100941e-09, "loss": 1.9893, "step": 30813 }, { "epoch": 0.9941783908806218, "grad_norm": 0.337890625, "learning_rate": 2.674690182685735e-09, "loss": 1.9796, "step": 30814 }, { "epoch": 0.994210654734418, "grad_norm": 0.333984375, "learning_rate": 2.6450548260686933e-09, "loss": 1.9956, "step": 30815 }, { "epoch": 0.9942429185882145, "grad_norm": 0.322265625, "learning_rate": 2.615584549576222e-09, "loss": 1.9783, "step": 30816 }, { "epoch": 0.9942751824420107, "grad_norm": 0.33203125, "learning_rate": 2.586279353533061e-09, "loss": 1.9375, "step": 30817 }, { "epoch": 0.9943074462958071, "grad_norm": 0.326171875, "learning_rate": 2.5571392382606195e-09, "loss": 1.9732, "step": 30818 }, { "epoch": 0.9943397101496034, "grad_norm": 0.3359375, "learning_rate": 2.5281642040786425e-09, "loss": 1.99, "step": 30819 }, { "epoch": 0.9943719740033998, "grad_norm": 0.33984375, "learning_rate": 2.4993542513085387e-09, "loss": 1.958, "step": 30820 }, { "epoch": 0.9944042378571961, "grad_norm": 0.34375, "learning_rate": 2.4707093802667225e-09, "loss": 1.9736, "step": 30821 }, { "epoch": 0.9944365017109925, "grad_norm": 0.330078125, "learning_rate": 2.4422295912679415e-09, "loss": 1.9954, "step": 30822 }, { "epoch": 0.9944687655647888, "grad_norm": 0.32421875, "learning_rate": 2.413914884625279e-09, "loss": 1.9773, "step": 30823 }, { "epoch": 0.9945010294185852, "grad_norm": 0.337890625, "learning_rate": 2.3857652606518176e-09, "loss": 2.0004, "step": 30824 }, { "epoch": 0.9945332932723815, "grad_norm": 0.33203125, "learning_rate": 2.3577807196556443e-09, "loss": 2.0052, "step": 30825 }, { "epoch": 0.9945655571261779, "grad_norm": 0.333984375, "learning_rate": 2.3299612619465114e-09, "loss": 1.9864, "step": 30826 }, { "epoch": 0.9945978209799742, "grad_norm": 0.326171875, "learning_rate": 2.3023068878308405e-09, "loss": 1.9899, "step": 30827 }, { "epoch": 0.9946300848337706, "grad_norm": 0.328125, "learning_rate": 2.274817597610057e-09, "loss": 1.9749, "step": 30828 }, { "epoch": 0.994662348687567, "grad_norm": 0.328125, "learning_rate": 2.2474933915905828e-09, "loss": 1.9985, "step": 30829 }, { "epoch": 0.9946946125413633, "grad_norm": 0.330078125, "learning_rate": 2.2203342700688468e-09, "loss": 1.9807, "step": 30830 }, { "epoch": 0.9947268763951597, "grad_norm": 0.333984375, "learning_rate": 2.19334023334794e-09, "loss": 2.0067, "step": 30831 }, { "epoch": 0.994759140248956, "grad_norm": 0.3359375, "learning_rate": 2.166511281724293e-09, "loss": 1.9867, "step": 30832 }, { "epoch": 0.9947914041027524, "grad_norm": 0.337890625, "learning_rate": 2.1398474154910032e-09, "loss": 2.0011, "step": 30833 }, { "epoch": 0.9948236679565486, "grad_norm": 0.337890625, "learning_rate": 2.1133486349428354e-09, "loss": 2.0049, "step": 30834 }, { "epoch": 0.994855931810345, "grad_norm": 0.330078125, "learning_rate": 2.0870149403728888e-09, "loss": 1.9976, "step": 30835 }, { "epoch": 0.9948881956641413, "grad_norm": 0.33203125, "learning_rate": 2.0608463320676008e-09, "loss": 2.0004, "step": 30836 }, { "epoch": 0.9949204595179377, "grad_norm": 0.33203125, "learning_rate": 2.034842810318405e-09, "loss": 1.9713, "step": 30837 }, { "epoch": 0.994952723371734, "grad_norm": 0.33203125, "learning_rate": 2.009004375410073e-09, "loss": 2.0001, "step": 30838 }, { "epoch": 0.9949849872255304, "grad_norm": 0.33984375, "learning_rate": 1.9833310276273774e-09, "loss": 1.9976, "step": 30839 }, { "epoch": 0.9950172510793267, "grad_norm": 0.3359375, "learning_rate": 1.95782276725176e-09, "loss": 1.9804, "step": 30840 }, { "epoch": 0.9950495149331231, "grad_norm": 0.330078125, "learning_rate": 1.932479594564662e-09, "loss": 1.9478, "step": 30841 }, { "epoch": 0.9950817787869194, "grad_norm": 0.333984375, "learning_rate": 1.90730150984586e-09, "loss": 1.9997, "step": 30842 }, { "epoch": 0.9951140426407158, "grad_norm": 0.341796875, "learning_rate": 1.8822885133717994e-09, "loss": 1.9862, "step": 30843 }, { "epoch": 0.9951463064945121, "grad_norm": 0.33203125, "learning_rate": 1.8574406054189253e-09, "loss": 1.9763, "step": 30844 }, { "epoch": 0.9951785703483085, "grad_norm": 0.33984375, "learning_rate": 1.8327577862570223e-09, "loss": 1.9823, "step": 30845 }, { "epoch": 0.9952108342021048, "grad_norm": 0.33203125, "learning_rate": 1.8082400561625356e-09, "loss": 1.9835, "step": 30846 }, { "epoch": 0.9952430980559012, "grad_norm": 0.341796875, "learning_rate": 1.7838874154035844e-09, "loss": 1.9963, "step": 30847 }, { "epoch": 0.9952753619096975, "grad_norm": 0.33203125, "learning_rate": 1.7596998642466223e-09, "loss": 1.9608, "step": 30848 }, { "epoch": 0.9953076257634939, "grad_norm": 0.337890625, "learning_rate": 1.7356774029597678e-09, "loss": 1.979, "step": 30849 }, { "epoch": 0.9953398896172903, "grad_norm": 0.337890625, "learning_rate": 1.7118200318078091e-09, "loss": 1.9572, "step": 30850 }, { "epoch": 0.9953721534710865, "grad_norm": 0.330078125, "learning_rate": 1.6881277510505388e-09, "loss": 1.9757, "step": 30851 }, { "epoch": 0.9954044173248829, "grad_norm": 0.333984375, "learning_rate": 1.6646005609527448e-09, "loss": 1.9911, "step": 30852 }, { "epoch": 0.9954366811786792, "grad_norm": 0.333984375, "learning_rate": 1.6412384617692233e-09, "loss": 1.9736, "step": 30853 }, { "epoch": 0.9954689450324756, "grad_norm": 0.33203125, "learning_rate": 1.618041453761432e-09, "loss": 2.011, "step": 30854 }, { "epoch": 0.9955012088862719, "grad_norm": 0.33203125, "learning_rate": 1.595009537180836e-09, "loss": 1.9973, "step": 30855 }, { "epoch": 0.9955334727400683, "grad_norm": 0.33203125, "learning_rate": 1.5721427122822318e-09, "loss": 2.0012, "step": 30856 }, { "epoch": 0.9955657365938646, "grad_norm": 0.330078125, "learning_rate": 1.5494409793187504e-09, "loss": 2.0009, "step": 30857 }, { "epoch": 0.995598000447661, "grad_norm": 0.3359375, "learning_rate": 1.5269043385385262e-09, "loss": 1.988, "step": 30858 }, { "epoch": 0.9956302643014573, "grad_norm": 0.3359375, "learning_rate": 1.5045327901896944e-09, "loss": 2.01, "step": 30859 }, { "epoch": 0.9956625281552537, "grad_norm": 0.326171875, "learning_rate": 1.4823263345203896e-09, "loss": 2.0017, "step": 30860 }, { "epoch": 0.99569479200905, "grad_norm": 0.33203125, "learning_rate": 1.460284971773751e-09, "loss": 1.9918, "step": 30861 }, { "epoch": 0.9957270558628464, "grad_norm": 0.3359375, "learning_rate": 1.4384087021912517e-09, "loss": 2.0002, "step": 30862 }, { "epoch": 0.9957593197166427, "grad_norm": 0.33984375, "learning_rate": 1.4166975260160308e-09, "loss": 2.0107, "step": 30863 }, { "epoch": 0.9957915835704391, "grad_norm": 0.328125, "learning_rate": 1.3951514434845658e-09, "loss": 1.9855, "step": 30864 }, { "epoch": 0.9958238474242354, "grad_norm": 0.326171875, "learning_rate": 1.3737704548366647e-09, "loss": 1.9531, "step": 30865 }, { "epoch": 0.9958561112780318, "grad_norm": 0.333984375, "learning_rate": 1.3525545603054746e-09, "loss": 1.9954, "step": 30866 }, { "epoch": 0.995888375131828, "grad_norm": 0.32421875, "learning_rate": 1.3315037601258073e-09, "loss": 1.9764, "step": 30867 }, { "epoch": 0.9959206389856244, "grad_norm": 0.333984375, "learning_rate": 1.3106180545274794e-09, "loss": 1.9568, "step": 30868 }, { "epoch": 0.9959529028394207, "grad_norm": 0.3359375, "learning_rate": 1.2898974437436373e-09, "loss": 1.9816, "step": 30869 }, { "epoch": 0.9959851666932171, "grad_norm": 0.328125, "learning_rate": 1.2693419280007667e-09, "loss": 2.0009, "step": 30870 }, { "epoch": 0.9960174305470135, "grad_norm": 0.330078125, "learning_rate": 1.2489515075236878e-09, "loss": 1.9882, "step": 30871 }, { "epoch": 0.9960496944008098, "grad_norm": 0.328125, "learning_rate": 1.2287261825388862e-09, "loss": 1.9834, "step": 30872 }, { "epoch": 0.9960819582546062, "grad_norm": 0.326171875, "learning_rate": 1.2086659532678513e-09, "loss": 1.9647, "step": 30873 }, { "epoch": 0.9961142221084025, "grad_norm": 0.33984375, "learning_rate": 1.1887708199320723e-09, "loss": 1.9775, "step": 30874 }, { "epoch": 0.9961464859621989, "grad_norm": 0.330078125, "learning_rate": 1.1690407827497085e-09, "loss": 1.9729, "step": 30875 }, { "epoch": 0.9961787498159952, "grad_norm": 0.326171875, "learning_rate": 1.1494758419389185e-09, "loss": 2.0013, "step": 30876 }, { "epoch": 0.9962110136697916, "grad_norm": 0.330078125, "learning_rate": 1.1300759977145302e-09, "loss": 2.0152, "step": 30877 }, { "epoch": 0.9962432775235879, "grad_norm": 0.32421875, "learning_rate": 1.110841250289707e-09, "loss": 1.9712, "step": 30878 }, { "epoch": 0.9962755413773843, "grad_norm": 0.337890625, "learning_rate": 1.0917715998759458e-09, "loss": 1.9544, "step": 30879 }, { "epoch": 0.9963078052311806, "grad_norm": 0.341796875, "learning_rate": 1.0728670466847446e-09, "loss": 1.9793, "step": 30880 }, { "epoch": 0.996340069084977, "grad_norm": 0.3359375, "learning_rate": 1.0541275909226046e-09, "loss": 1.9854, "step": 30881 }, { "epoch": 0.9963723329387733, "grad_norm": 0.333984375, "learning_rate": 1.0355532327976925e-09, "loss": 1.9836, "step": 30882 }, { "epoch": 0.9964045967925697, "grad_norm": 0.328125, "learning_rate": 1.0171439725131792e-09, "loss": 2.0102, "step": 30883 }, { "epoch": 0.9964368606463659, "grad_norm": 0.333984375, "learning_rate": 9.988998102705705e-10, "loss": 2.0024, "step": 30884 }, { "epoch": 0.9964691245001623, "grad_norm": 0.328125, "learning_rate": 9.808207462713715e-10, "loss": 1.9517, "step": 30885 }, { "epoch": 0.9965013883539586, "grad_norm": 0.333984375, "learning_rate": 9.629067807170877e-10, "loss": 1.9861, "step": 30886 }, { "epoch": 0.996533652207755, "grad_norm": 0.333984375, "learning_rate": 9.45157913800898e-10, "loss": 1.9962, "step": 30887 }, { "epoch": 0.9965659160615513, "grad_norm": 0.32421875, "learning_rate": 9.275741457209774e-10, "loss": 2.0016, "step": 30888 }, { "epoch": 0.9965981799153477, "grad_norm": 0.326171875, "learning_rate": 9.101554766705045e-10, "loss": 1.9998, "step": 30889 }, { "epoch": 0.9966304437691441, "grad_norm": 0.337890625, "learning_rate": 8.929019068393274e-10, "loss": 1.993, "step": 30890 }, { "epoch": 0.9966627076229404, "grad_norm": 0.337890625, "learning_rate": 8.758134364206249e-10, "loss": 1.965, "step": 30891 }, { "epoch": 0.9966949714767368, "grad_norm": 0.330078125, "learning_rate": 8.588900655992493e-10, "loss": 1.9643, "step": 30892 }, { "epoch": 0.9967272353305331, "grad_norm": 0.3359375, "learning_rate": 8.421317945633833e-10, "loss": 1.9972, "step": 30893 }, { "epoch": 0.9967594991843295, "grad_norm": 0.33203125, "learning_rate": 8.255386234962137e-10, "loss": 2.0128, "step": 30894 }, { "epoch": 0.9967917630381258, "grad_norm": 0.33984375, "learning_rate": 8.091105525825926e-10, "loss": 2.0233, "step": 30895 }, { "epoch": 0.9968240268919222, "grad_norm": 0.330078125, "learning_rate": 7.928475820007108e-10, "loss": 1.9729, "step": 30896 }, { "epoch": 0.9968562907457185, "grad_norm": 0.3359375, "learning_rate": 7.767497119320899e-10, "loss": 1.9499, "step": 30897 }, { "epoch": 0.9968885545995149, "grad_norm": 0.33984375, "learning_rate": 7.608169425515899e-10, "loss": 1.964, "step": 30898 }, { "epoch": 0.9969208184533112, "grad_norm": 0.349609375, "learning_rate": 7.450492740357362e-10, "loss": 2.0145, "step": 30899 }, { "epoch": 0.9969530823071076, "grad_norm": 0.3359375, "learning_rate": 7.294467065577237e-10, "loss": 2.016, "step": 30900 }, { "epoch": 0.9969853461609038, "grad_norm": 0.333984375, "learning_rate": 7.140092402890819e-10, "loss": 1.9965, "step": 30901 }, { "epoch": 0.9970176100147002, "grad_norm": 0.33984375, "learning_rate": 6.987368754013401e-10, "loss": 2.022, "step": 30902 }, { "epoch": 0.9970498738684965, "grad_norm": 0.3359375, "learning_rate": 6.836296120610319e-10, "loss": 1.993, "step": 30903 }, { "epoch": 0.9970821377222929, "grad_norm": 0.337890625, "learning_rate": 6.686874504346907e-10, "loss": 2.0022, "step": 30904 }, { "epoch": 0.9971144015760892, "grad_norm": 0.353515625, "learning_rate": 6.539103906871846e-10, "loss": 2.0242, "step": 30905 }, { "epoch": 0.9971466654298856, "grad_norm": 0.341796875, "learning_rate": 6.392984329817164e-10, "loss": 2.0021, "step": 30906 }, { "epoch": 0.9971789292836819, "grad_norm": 0.326171875, "learning_rate": 6.248515774764929e-10, "loss": 1.9918, "step": 30907 }, { "epoch": 0.9972111931374783, "grad_norm": 0.330078125, "learning_rate": 6.105698243347169e-10, "loss": 2.011, "step": 30908 }, { "epoch": 0.9972434569912746, "grad_norm": 0.333984375, "learning_rate": 5.964531737095991e-10, "loss": 1.9718, "step": 30909 }, { "epoch": 0.997275720845071, "grad_norm": 0.32421875, "learning_rate": 5.82501625757681e-10, "loss": 1.9887, "step": 30910 }, { "epoch": 0.9973079846988674, "grad_norm": 0.33984375, "learning_rate": 5.687151806338386e-10, "loss": 2.0225, "step": 30911 }, { "epoch": 0.9973402485526637, "grad_norm": 0.330078125, "learning_rate": 5.550938384896176e-10, "loss": 1.9774, "step": 30912 }, { "epoch": 0.9973725124064601, "grad_norm": 0.33203125, "learning_rate": 5.416375994732325e-10, "loss": 2.0091, "step": 30913 }, { "epoch": 0.9974047762602564, "grad_norm": 0.3359375, "learning_rate": 5.283464637345636e-10, "loss": 2.0008, "step": 30914 }, { "epoch": 0.9974370401140528, "grad_norm": 0.337890625, "learning_rate": 5.152204314201603e-10, "loss": 1.9522, "step": 30915 }, { "epoch": 0.997469303967849, "grad_norm": 0.326171875, "learning_rate": 5.022595026732413e-10, "loss": 2.0075, "step": 30916 }, { "epoch": 0.9975015678216455, "grad_norm": 0.33984375, "learning_rate": 4.8946367763536e-10, "loss": 1.9827, "step": 30917 }, { "epoch": 0.9975338316754417, "grad_norm": 0.328125, "learning_rate": 4.768329564497354e-10, "loss": 2.0265, "step": 30918 }, { "epoch": 0.9975660955292381, "grad_norm": 0.33203125, "learning_rate": 4.6436733925459e-10, "loss": 1.9902, "step": 30919 }, { "epoch": 0.9975983593830344, "grad_norm": 0.32421875, "learning_rate": 4.5206682618814666e-10, "loss": 1.9897, "step": 30920 }, { "epoch": 0.9976306232368308, "grad_norm": 0.326171875, "learning_rate": 4.399314173836322e-10, "loss": 1.9655, "step": 30921 }, { "epoch": 0.9976628870906271, "grad_norm": 0.322265625, "learning_rate": 4.2796111297593866e-10, "loss": 1.9984, "step": 30922 }, { "epoch": 0.9976951509444235, "grad_norm": 0.33203125, "learning_rate": 4.1615591309662747e-10, "loss": 1.9745, "step": 30923 }, { "epoch": 0.9977274147982198, "grad_norm": 0.333984375, "learning_rate": 4.045158178755948e-10, "loss": 1.9843, "step": 30924 }, { "epoch": 0.9977596786520162, "grad_norm": 0.33203125, "learning_rate": 3.930408274410713e-10, "loss": 1.9816, "step": 30925 }, { "epoch": 0.9977919425058125, "grad_norm": 0.3359375, "learning_rate": 3.817309419179571e-10, "loss": 1.9934, "step": 30926 }, { "epoch": 0.9978242063596089, "grad_norm": 0.33203125, "learning_rate": 3.7058616143281766e-10, "loss": 2.0039, "step": 30927 }, { "epoch": 0.9978564702134052, "grad_norm": 0.33203125, "learning_rate": 3.596064861088877e-10, "loss": 1.9775, "step": 30928 }, { "epoch": 0.9978887340672016, "grad_norm": 0.32421875, "learning_rate": 3.48791916064406e-10, "loss": 1.9863, "step": 30929 }, { "epoch": 0.997920997920998, "grad_norm": 0.3359375, "learning_rate": 3.381424514209419e-10, "loss": 1.9951, "step": 30930 }, { "epoch": 0.9979532617747943, "grad_norm": 0.328125, "learning_rate": 3.276580922934036e-10, "loss": 1.9723, "step": 30931 }, { "epoch": 0.9979855256285907, "grad_norm": 0.349609375, "learning_rate": 3.1733883879836446e-10, "loss": 1.9863, "step": 30932 }, { "epoch": 0.998017789482387, "grad_norm": 0.330078125, "learning_rate": 3.0718469104906724e-10, "loss": 2.0297, "step": 30933 }, { "epoch": 0.9980500533361834, "grad_norm": 0.3359375, "learning_rate": 2.9719564915875466e-10, "loss": 2.0163, "step": 30934 }, { "epoch": 0.9980823171899796, "grad_norm": 0.33203125, "learning_rate": 2.873717132340081e-10, "loss": 2.0108, "step": 30935 }, { "epoch": 0.998114581043776, "grad_norm": 0.328125, "learning_rate": 2.777128833864051e-10, "loss": 1.9764, "step": 30936 }, { "epoch": 0.9981468448975723, "grad_norm": 0.328125, "learning_rate": 2.68219159722527e-10, "loss": 1.9993, "step": 30937 }, { "epoch": 0.9981791087513687, "grad_norm": 0.326171875, "learning_rate": 2.5889054234395916e-10, "loss": 1.9827, "step": 30938 }, { "epoch": 0.998211372605165, "grad_norm": 0.328125, "learning_rate": 2.4972703135395237e-10, "loss": 2.002, "step": 30939 }, { "epoch": 0.9982436364589614, "grad_norm": 0.33203125, "learning_rate": 2.4072862685409203e-10, "loss": 1.9983, "step": 30940 }, { "epoch": 0.9982759003127577, "grad_norm": 0.330078125, "learning_rate": 2.318953289442982e-10, "loss": 2.0056, "step": 30941 }, { "epoch": 0.9983081641665541, "grad_norm": 0.33203125, "learning_rate": 2.2322713772116031e-10, "loss": 1.9918, "step": 30942 }, { "epoch": 0.9983404280203504, "grad_norm": 0.337890625, "learning_rate": 2.1472405328126776e-10, "loss": 2.0108, "step": 30943 }, { "epoch": 0.9983726918741468, "grad_norm": 0.3359375, "learning_rate": 2.0638607571454859e-10, "loss": 1.9876, "step": 30944 }, { "epoch": 0.9984049557279431, "grad_norm": 0.33203125, "learning_rate": 1.982132051159269e-10, "loss": 1.9982, "step": 30945 }, { "epoch": 0.9984372195817395, "grad_norm": 0.33203125, "learning_rate": 1.902054415753307e-10, "loss": 1.9836, "step": 30946 }, { "epoch": 0.9984694834355358, "grad_norm": 0.33984375, "learning_rate": 1.8236278517935745e-10, "loss": 2.0139, "step": 30947 }, { "epoch": 0.9985017472893322, "grad_norm": 0.3671875, "learning_rate": 1.746852360146045e-10, "loss": 1.9809, "step": 30948 }, { "epoch": 0.9985340111431285, "grad_norm": 0.3359375, "learning_rate": 1.6717279416766928e-10, "loss": 1.9843, "step": 30949 }, { "epoch": 0.9985662749969249, "grad_norm": 0.3359375, "learning_rate": 1.5982545971848784e-10, "loss": 1.9955, "step": 30950 }, { "epoch": 0.9985985388507213, "grad_norm": 0.333984375, "learning_rate": 1.5264323274866153e-10, "loss": 1.9575, "step": 30951 }, { "epoch": 0.9986308027045175, "grad_norm": 0.328125, "learning_rate": 1.4562611333812648e-10, "loss": 1.9443, "step": 30952 }, { "epoch": 0.9986630665583139, "grad_norm": 0.330078125, "learning_rate": 1.3877410156348803e-10, "loss": 2.0033, "step": 30953 }, { "epoch": 0.9986953304121102, "grad_norm": 0.3671875, "learning_rate": 1.3208719750135157e-10, "loss": 1.9655, "step": 30954 }, { "epoch": 0.9987275942659066, "grad_norm": 0.33203125, "learning_rate": 1.255654012233265e-10, "loss": 2.0118, "step": 30955 }, { "epoch": 0.9987598581197029, "grad_norm": 0.33203125, "learning_rate": 1.1920871280268754e-10, "loss": 2.0161, "step": 30956 }, { "epoch": 0.9987921219734993, "grad_norm": 0.33984375, "learning_rate": 1.130171323093787e-10, "loss": 1.9853, "step": 30957 }, { "epoch": 0.9988243858272956, "grad_norm": 0.328125, "learning_rate": 1.0699065981001343e-10, "loss": 2.0055, "step": 30958 }, { "epoch": 0.998856649681092, "grad_norm": 0.333984375, "learning_rate": 1.011292953728704e-10, "loss": 1.9909, "step": 30959 }, { "epoch": 0.9988889135348883, "grad_norm": 0.328125, "learning_rate": 9.543303906123236e-11, "loss": 2.0069, "step": 30960 }, { "epoch": 0.9989211773886847, "grad_norm": 0.33203125, "learning_rate": 8.990189093838197e-11, "loss": 1.9965, "step": 30961 }, { "epoch": 0.998953441242481, "grad_norm": 0.337890625, "learning_rate": 8.453585106593664e-11, "loss": 1.9991, "step": 30962 }, { "epoch": 0.9989857050962774, "grad_norm": 0.33203125, "learning_rate": 7.933491950051775e-11, "loss": 2.0222, "step": 30963 }, { "epoch": 0.9990179689500737, "grad_norm": 0.330078125, "learning_rate": 7.429909630207731e-11, "loss": 1.9885, "step": 30964 }, { "epoch": 0.9990502328038701, "grad_norm": 0.333984375, "learning_rate": 6.942838152390607e-11, "loss": 2.0179, "step": 30965 }, { "epoch": 0.9990824966576664, "grad_norm": 0.3359375, "learning_rate": 6.472277522096004e-11, "loss": 1.9999, "step": 30966 }, { "epoch": 0.9991147605114628, "grad_norm": 0.322265625, "learning_rate": 6.018227744319926e-11, "loss": 1.9711, "step": 30967 }, { "epoch": 0.999147024365259, "grad_norm": 0.3359375, "learning_rate": 5.580688824391444e-11, "loss": 1.9845, "step": 30968 }, { "epoch": 0.9991792882190554, "grad_norm": 0.330078125, "learning_rate": 5.159660766973495e-11, "loss": 1.9743, "step": 30969 }, { "epoch": 0.9992115520728517, "grad_norm": 0.3359375, "learning_rate": 4.755143576562482e-11, "loss": 2.0016, "step": 30970 }, { "epoch": 0.9992438159266481, "grad_norm": 0.330078125, "learning_rate": 4.367137257654807e-11, "loss": 1.9739, "step": 30971 }, { "epoch": 0.9992760797804445, "grad_norm": 0.330078125, "learning_rate": 3.9956418147468755e-11, "loss": 2.0119, "step": 30972 }, { "epoch": 0.9993083436342408, "grad_norm": 0.328125, "learning_rate": 3.6406572516689554e-11, "loss": 1.9744, "step": 30973 }, { "epoch": 0.9993406074880372, "grad_norm": 0.34375, "learning_rate": 3.30218357241785e-11, "loss": 1.9951, "step": 30974 }, { "epoch": 0.9993728713418335, "grad_norm": 0.322265625, "learning_rate": 2.980220780657294e-11, "loss": 1.9921, "step": 30975 }, { "epoch": 0.9994051351956299, "grad_norm": 0.326171875, "learning_rate": 2.6747688800510262e-11, "loss": 1.9726, "step": 30976 }, { "epoch": 0.9994373990494262, "grad_norm": 0.3359375, "learning_rate": 2.3858278739297135e-11, "loss": 1.9783, "step": 30977 }, { "epoch": 0.9994696629032226, "grad_norm": 0.3359375, "learning_rate": 2.1133977654574922e-11, "loss": 1.9893, "step": 30978 }, { "epoch": 0.9995019267570189, "grad_norm": 0.330078125, "learning_rate": 1.857478557631964e-11, "loss": 1.9881, "step": 30979 }, { "epoch": 0.9995341906108153, "grad_norm": 0.3359375, "learning_rate": 1.6180702532841986e-11, "loss": 1.9845, "step": 30980 }, { "epoch": 0.9995664544646116, "grad_norm": 0.33984375, "learning_rate": 1.3951728549121968e-11, "loss": 2.0001, "step": 30981 }, { "epoch": 0.999598718318408, "grad_norm": 0.33203125, "learning_rate": 1.1887863651804942e-11, "loss": 1.9526, "step": 30982 }, { "epoch": 0.9996309821722043, "grad_norm": 0.326171875, "learning_rate": 9.989107862540258e-12, "loss": 1.9711, "step": 30983 }, { "epoch": 0.9996632460260007, "grad_norm": 0.333984375, "learning_rate": 8.255461202977267e-12, "loss": 1.97, "step": 30984 }, { "epoch": 0.9996955098797969, "grad_norm": 0.337890625, "learning_rate": 6.686923691434643e-12, "loss": 2.0205, "step": 30985 }, { "epoch": 0.9997277737335933, "grad_norm": 0.328125, "learning_rate": 5.283495344565736e-12, "loss": 1.994, "step": 30986 }, { "epoch": 0.9997600375873896, "grad_norm": 0.333984375, "learning_rate": 4.045176179023891e-12, "loss": 2.0405, "step": 30987 }, { "epoch": 0.999792301441186, "grad_norm": 0.32421875, "learning_rate": 2.971966208131782e-12, "loss": 1.9603, "step": 30988 }, { "epoch": 0.9998245652949823, "grad_norm": 0.3359375, "learning_rate": 2.0638654418814185e-12, "loss": 1.9974, "step": 30989 }, { "epoch": 0.9998568291487787, "grad_norm": 0.328125, "learning_rate": 1.3208738935954756e-12, "loss": 1.9939, "step": 30990 }, { "epoch": 0.9998890930025751, "grad_norm": 0.33984375, "learning_rate": 7.429915699352918e-13, "loss": 1.9724, "step": 30991 }, { "epoch": 0.9999213568563714, "grad_norm": 0.330078125, "learning_rate": 3.3021847756220523e-13, "loss": 1.9435, "step": 30992 }, { "epoch": 0.9999536207101678, "grad_norm": 0.3359375, "learning_rate": 8.255461980688494e-14, "loss": 1.9939, "step": 30993 }, { "epoch": 0.9999858845639641, "grad_norm": 0.33203125, "learning_rate": 0.0, "loss": 2.0098, "step": 30994 }, { "epoch": 1.0000322638537964, "grad_norm": 0.3359375, "learning_rate": 1.540516451161607e-05, "loss": 2.0265, "step": 30995 }, { "epoch": 1.0000645277075928, "grad_norm": 0.337890625, "learning_rate": 1.540439148445991e-05, "loss": 1.9554, "step": 30996 }, { "epoch": 1.000096791561389, "grad_norm": 0.443359375, "learning_rate": 1.5403618456228963e-05, "loss": 1.9931, "step": 30997 }, { "epoch": 1.0001290554151854, "grad_norm": 0.5546875, "learning_rate": 1.5402845426925274e-05, "loss": 2.01, "step": 30998 }, { "epoch": 1.0001613192689818, "grad_norm": 0.53515625, "learning_rate": 1.54020723965509e-05, "loss": 2.0185, "step": 30999 }, { "epoch": 1.0001935831227782, "grad_norm": 0.408203125, "learning_rate": 1.5401299365107892e-05, "loss": 1.9719, "step": 31000 }, { "epoch": 1.0002258469765744, "grad_norm": 0.48046875, "learning_rate": 1.5400526332598316e-05, "loss": 1.9689, "step": 31001 }, { "epoch": 1.0002581108303708, "grad_norm": 0.4140625, "learning_rate": 1.5399753299024212e-05, "loss": 1.9739, "step": 31002 }, { "epoch": 1.0002903746841671, "grad_norm": 0.44140625, "learning_rate": 1.539898026438764e-05, "loss": 1.9812, "step": 31003 }, { "epoch": 1.0003226385379635, "grad_norm": 0.470703125, "learning_rate": 1.539820722869066e-05, "loss": 1.9997, "step": 31004 }, { "epoch": 1.0003549023917597, "grad_norm": 0.412109375, "learning_rate": 1.5397434191935324e-05, "loss": 1.9812, "step": 31005 }, { "epoch": 1.0003871662455561, "grad_norm": 0.427734375, "learning_rate": 1.539666115412368e-05, "loss": 1.9876, "step": 31006 }, { "epoch": 1.0004194300993525, "grad_norm": 0.369140625, "learning_rate": 1.539588811525779e-05, "loss": 1.9622, "step": 31007 }, { "epoch": 1.000451693953149, "grad_norm": 0.4140625, "learning_rate": 1.5395115075339703e-05, "loss": 1.9873, "step": 31008 }, { "epoch": 1.000483957806945, "grad_norm": 0.37890625, "learning_rate": 1.539434203437148e-05, "loss": 2.0225, "step": 31009 }, { "epoch": 1.0005162216607415, "grad_norm": 0.388671875, "learning_rate": 1.5393568992355168e-05, "loss": 2.0216, "step": 31010 }, { "epoch": 1.000548485514538, "grad_norm": 0.408203125, "learning_rate": 1.539279594929283e-05, "loss": 1.9828, "step": 31011 }, { "epoch": 1.0005807493683343, "grad_norm": 0.369140625, "learning_rate": 1.5392022905186514e-05, "loss": 1.941, "step": 31012 }, { "epoch": 1.0006130132221305, "grad_norm": 0.400390625, "learning_rate": 1.539124986003828e-05, "loss": 1.9547, "step": 31013 }, { "epoch": 1.0006452770759269, "grad_norm": 0.375, "learning_rate": 1.5390476813850173e-05, "loss": 1.9566, "step": 31014 }, { "epoch": 1.0006775409297233, "grad_norm": 0.373046875, "learning_rate": 1.538970376662426e-05, "loss": 1.9968, "step": 31015 }, { "epoch": 1.0007098047835197, "grad_norm": 0.38671875, "learning_rate": 1.5388930718362594e-05, "loss": 1.9673, "step": 31016 }, { "epoch": 1.000742068637316, "grad_norm": 0.359375, "learning_rate": 1.5388157669067217e-05, "loss": 1.9937, "step": 31017 }, { "epoch": 1.0007743324911123, "grad_norm": 0.39453125, "learning_rate": 1.5387384618740194e-05, "loss": 2.0059, "step": 31018 }, { "epoch": 1.0008065963449086, "grad_norm": 0.38671875, "learning_rate": 1.538661156738358e-05, "loss": 1.9791, "step": 31019 }, { "epoch": 1.000838860198705, "grad_norm": 0.396484375, "learning_rate": 1.5385838514999425e-05, "loss": 1.978, "step": 31020 }, { "epoch": 1.0008711240525014, "grad_norm": 0.408203125, "learning_rate": 1.538506546158979e-05, "loss": 1.9716, "step": 31021 }, { "epoch": 1.0009033879062976, "grad_norm": 0.361328125, "learning_rate": 1.5384292407156723e-05, "loss": 1.9636, "step": 31022 }, { "epoch": 1.000935651760094, "grad_norm": 0.384765625, "learning_rate": 1.5383519351702277e-05, "loss": 1.9754, "step": 31023 }, { "epoch": 1.0009679156138904, "grad_norm": 0.390625, "learning_rate": 1.5382746295228516e-05, "loss": 1.9804, "step": 31024 }, { "epoch": 1.0010001794676868, "grad_norm": 0.36328125, "learning_rate": 1.5381973237737485e-05, "loss": 1.9448, "step": 31025 }, { "epoch": 1.001032443321483, "grad_norm": 0.373046875, "learning_rate": 1.5381200179231248e-05, "loss": 1.9738, "step": 31026 }, { "epoch": 1.0010647071752794, "grad_norm": 0.369140625, "learning_rate": 1.538042711971185e-05, "loss": 2.0131, "step": 31027 }, { "epoch": 1.0010969710290758, "grad_norm": 0.37890625, "learning_rate": 1.5379654059181357e-05, "loss": 2.0079, "step": 31028 }, { "epoch": 1.0011292348828722, "grad_norm": 0.3828125, "learning_rate": 1.537888099764181e-05, "loss": 1.9778, "step": 31029 }, { "epoch": 1.0011614987366684, "grad_norm": 0.365234375, "learning_rate": 1.5378107935095273e-05, "loss": 2.0064, "step": 31030 }, { "epoch": 1.0011937625904648, "grad_norm": 0.373046875, "learning_rate": 1.53773348715438e-05, "loss": 1.9878, "step": 31031 }, { "epoch": 1.0012260264442612, "grad_norm": 0.36328125, "learning_rate": 1.5376561806989442e-05, "loss": 2.0104, "step": 31032 }, { "epoch": 1.0012582902980576, "grad_norm": 0.353515625, "learning_rate": 1.5375788741434258e-05, "loss": 1.9795, "step": 31033 }, { "epoch": 1.0012905541518538, "grad_norm": 0.375, "learning_rate": 1.53750156748803e-05, "loss": 1.9621, "step": 31034 }, { "epoch": 1.0013228180056502, "grad_norm": 0.373046875, "learning_rate": 1.5374242607329617e-05, "loss": 2.0138, "step": 31035 }, { "epoch": 1.0013550818594465, "grad_norm": 0.392578125, "learning_rate": 1.5373469538784278e-05, "loss": 1.9702, "step": 31036 }, { "epoch": 1.001387345713243, "grad_norm": 0.361328125, "learning_rate": 1.5372696469246324e-05, "loss": 1.9849, "step": 31037 }, { "epoch": 1.0014196095670393, "grad_norm": 0.38671875, "learning_rate": 1.5371923398717816e-05, "loss": 1.984, "step": 31038 }, { "epoch": 1.0014518734208355, "grad_norm": 0.3828125, "learning_rate": 1.5371150327200807e-05, "loss": 2.0016, "step": 31039 }, { "epoch": 1.001484137274632, "grad_norm": 0.35546875, "learning_rate": 1.5370377254697354e-05, "loss": 2.0152, "step": 31040 }, { "epoch": 1.0015164011284283, "grad_norm": 0.388671875, "learning_rate": 1.536960418120951e-05, "loss": 1.9801, "step": 31041 }, { "epoch": 1.0015486649822247, "grad_norm": 0.392578125, "learning_rate": 1.5368831106739327e-05, "loss": 1.9784, "step": 31042 }, { "epoch": 1.001580928836021, "grad_norm": 0.376953125, "learning_rate": 1.5368058031288866e-05, "loss": 1.9385, "step": 31043 }, { "epoch": 1.0016131926898173, "grad_norm": 0.41796875, "learning_rate": 1.5367284954860174e-05, "loss": 1.9814, "step": 31044 }, { "epoch": 1.0016454565436137, "grad_norm": 0.388671875, "learning_rate": 1.5366511877455314e-05, "loss": 1.9891, "step": 31045 }, { "epoch": 1.00167772039741, "grad_norm": 0.388671875, "learning_rate": 1.5365738799076332e-05, "loss": 1.987, "step": 31046 }, { "epoch": 1.0017099842512063, "grad_norm": 0.376953125, "learning_rate": 1.5364965719725288e-05, "loss": 1.9674, "step": 31047 }, { "epoch": 1.0017422481050027, "grad_norm": 0.388671875, "learning_rate": 1.536419263940424e-05, "loss": 1.9959, "step": 31048 }, { "epoch": 1.001774511958799, "grad_norm": 0.37109375, "learning_rate": 1.5363419558115238e-05, "loss": 1.9884, "step": 31049 }, { "epoch": 1.0018067758125955, "grad_norm": 0.3828125, "learning_rate": 1.536264647586033e-05, "loss": 1.9916, "step": 31050 }, { "epoch": 1.0018390396663917, "grad_norm": 0.365234375, "learning_rate": 1.536187339264158e-05, "loss": 1.9793, "step": 31051 }, { "epoch": 1.001871303520188, "grad_norm": 0.376953125, "learning_rate": 1.5361100308461045e-05, "loss": 1.9867, "step": 31052 }, { "epoch": 1.0019035673739844, "grad_norm": 0.349609375, "learning_rate": 1.5360327223320775e-05, "loss": 1.9987, "step": 31053 }, { "epoch": 1.0019358312277808, "grad_norm": 0.37109375, "learning_rate": 1.5359554137222822e-05, "loss": 1.9519, "step": 31054 }, { "epoch": 1.0019680950815772, "grad_norm": 0.369140625, "learning_rate": 1.5358781050169247e-05, "loss": 1.9807, "step": 31055 }, { "epoch": 1.0020003589353734, "grad_norm": 0.384765625, "learning_rate": 1.5358007962162093e-05, "loss": 1.9559, "step": 31056 }, { "epoch": 1.0020326227891698, "grad_norm": 0.400390625, "learning_rate": 1.5357234873203432e-05, "loss": 1.9761, "step": 31057 }, { "epoch": 1.0020648866429662, "grad_norm": 0.388671875, "learning_rate": 1.535646178329531e-05, "loss": 1.9901, "step": 31058 }, { "epoch": 1.0020971504967626, "grad_norm": 0.38671875, "learning_rate": 1.5355688692439778e-05, "loss": 1.9846, "step": 31059 }, { "epoch": 1.0021294143505588, "grad_norm": 0.373046875, "learning_rate": 1.5354915600638895e-05, "loss": 1.9677, "step": 31060 }, { "epoch": 1.0021616782043552, "grad_norm": 0.388671875, "learning_rate": 1.5354142507894712e-05, "loss": 1.9722, "step": 31061 }, { "epoch": 1.0021939420581516, "grad_norm": 0.369140625, "learning_rate": 1.5353369414209292e-05, "loss": 1.9622, "step": 31062 }, { "epoch": 1.002226205911948, "grad_norm": 0.396484375, "learning_rate": 1.535259631958468e-05, "loss": 1.9925, "step": 31063 }, { "epoch": 1.0022584697657442, "grad_norm": 0.365234375, "learning_rate": 1.5351823224022942e-05, "loss": 1.9925, "step": 31064 }, { "epoch": 1.0022907336195406, "grad_norm": 0.365234375, "learning_rate": 1.535105012752612e-05, "loss": 1.9992, "step": 31065 }, { "epoch": 1.002322997473337, "grad_norm": 0.373046875, "learning_rate": 1.5350277030096275e-05, "loss": 1.9782, "step": 31066 }, { "epoch": 1.0023552613271334, "grad_norm": 0.36328125, "learning_rate": 1.534950393173546e-05, "loss": 2.0096, "step": 31067 }, { "epoch": 1.0023875251809296, "grad_norm": 0.38671875, "learning_rate": 1.5348730832445735e-05, "loss": 1.9497, "step": 31068 }, { "epoch": 1.002419789034726, "grad_norm": 0.36328125, "learning_rate": 1.5347957732229153e-05, "loss": 1.9895, "step": 31069 }, { "epoch": 1.0024520528885223, "grad_norm": 0.359375, "learning_rate": 1.534718463108776e-05, "loss": 1.9779, "step": 31070 }, { "epoch": 1.0024843167423187, "grad_norm": 0.36328125, "learning_rate": 1.5346411529023625e-05, "loss": 1.969, "step": 31071 }, { "epoch": 1.002516580596115, "grad_norm": 0.3515625, "learning_rate": 1.5345638426038787e-05, "loss": 1.9737, "step": 31072 }, { "epoch": 1.0025488444499113, "grad_norm": 0.35546875, "learning_rate": 1.534486532213531e-05, "loss": 2.0071, "step": 31073 }, { "epoch": 1.0025811083037077, "grad_norm": 0.35546875, "learning_rate": 1.5344092217315257e-05, "loss": 1.989, "step": 31074 }, { "epoch": 1.0026133721575041, "grad_norm": 0.369140625, "learning_rate": 1.5343319111580664e-05, "loss": 2.0109, "step": 31075 }, { "epoch": 1.0026456360113005, "grad_norm": 0.373046875, "learning_rate": 1.5342546004933598e-05, "loss": 1.9786, "step": 31076 }, { "epoch": 1.0026778998650967, "grad_norm": 0.361328125, "learning_rate": 1.5341772897376113e-05, "loss": 1.9692, "step": 31077 }, { "epoch": 1.002710163718893, "grad_norm": 0.39453125, "learning_rate": 1.534099978891026e-05, "loss": 1.9554, "step": 31078 }, { "epoch": 1.0027424275726895, "grad_norm": 0.388671875, "learning_rate": 1.5340226679538098e-05, "loss": 1.9679, "step": 31079 }, { "epoch": 1.002774691426486, "grad_norm": 0.3984375, "learning_rate": 1.5339453569261675e-05, "loss": 1.9652, "step": 31080 }, { "epoch": 1.002806955280282, "grad_norm": 0.3828125, "learning_rate": 1.5338680458083053e-05, "loss": 1.9853, "step": 31081 }, { "epoch": 1.0028392191340785, "grad_norm": 0.38671875, "learning_rate": 1.5337907346004285e-05, "loss": 1.9915, "step": 31082 }, { "epoch": 1.0028714829878749, "grad_norm": 0.373046875, "learning_rate": 1.533713423302742e-05, "loss": 1.9721, "step": 31083 }, { "epoch": 1.0029037468416713, "grad_norm": 0.388671875, "learning_rate": 1.5336361119154518e-05, "loss": 1.9635, "step": 31084 }, { "epoch": 1.0029360106954675, "grad_norm": 0.375, "learning_rate": 1.5335588004387637e-05, "loss": 1.9355, "step": 31085 }, { "epoch": 1.0029682745492638, "grad_norm": 0.341796875, "learning_rate": 1.533481488872883e-05, "loss": 2.0046, "step": 31086 }, { "epoch": 1.0030005384030602, "grad_norm": 0.390625, "learning_rate": 1.5334041772180146e-05, "loss": 1.9708, "step": 31087 }, { "epoch": 1.0030328022568566, "grad_norm": 0.36328125, "learning_rate": 1.533326865474364e-05, "loss": 2.0114, "step": 31088 }, { "epoch": 1.0030650661106528, "grad_norm": 0.373046875, "learning_rate": 1.5332495536421377e-05, "loss": 1.9666, "step": 31089 }, { "epoch": 1.0030973299644492, "grad_norm": 0.37890625, "learning_rate": 1.5331722417215403e-05, "loss": 2.0174, "step": 31090 }, { "epoch": 1.0031295938182456, "grad_norm": 0.40625, "learning_rate": 1.5330949297127774e-05, "loss": 1.9773, "step": 31091 }, { "epoch": 1.003161857672042, "grad_norm": 0.3828125, "learning_rate": 1.533017617616055e-05, "loss": 1.9944, "step": 31092 }, { "epoch": 1.0031941215258382, "grad_norm": 0.375, "learning_rate": 1.5329403054315775e-05, "loss": 1.968, "step": 31093 }, { "epoch": 1.0032263853796346, "grad_norm": 0.38671875, "learning_rate": 1.5328629931595513e-05, "loss": 1.9685, "step": 31094 }, { "epoch": 1.003258649233431, "grad_norm": 0.3984375, "learning_rate": 1.532785680800182e-05, "loss": 1.9915, "step": 31095 }, { "epoch": 1.0032909130872274, "grad_norm": 0.388671875, "learning_rate": 1.532708368353674e-05, "loss": 1.9886, "step": 31096 }, { "epoch": 1.0033231769410238, "grad_norm": 0.3984375, "learning_rate": 1.5326310558202342e-05, "loss": 1.9788, "step": 31097 }, { "epoch": 1.00335544079482, "grad_norm": 0.416015625, "learning_rate": 1.532553743200067e-05, "loss": 1.9984, "step": 31098 }, { "epoch": 1.0033877046486164, "grad_norm": 0.41015625, "learning_rate": 1.532476430493378e-05, "loss": 1.9884, "step": 31099 }, { "epoch": 1.0034199685024128, "grad_norm": 0.390625, "learning_rate": 1.5323991177003736e-05, "loss": 1.9559, "step": 31100 }, { "epoch": 1.0034522323562092, "grad_norm": 0.40625, "learning_rate": 1.5323218048212585e-05, "loss": 1.9956, "step": 31101 }, { "epoch": 1.0034844962100054, "grad_norm": 0.361328125, "learning_rate": 1.5322444918562377e-05, "loss": 1.9911, "step": 31102 }, { "epoch": 1.0035167600638017, "grad_norm": 0.40234375, "learning_rate": 1.5321671788055175e-05, "loss": 1.9816, "step": 31103 }, { "epoch": 1.0035490239175981, "grad_norm": 0.376953125, "learning_rate": 1.5320898656693035e-05, "loss": 1.9837, "step": 31104 }, { "epoch": 1.0035812877713945, "grad_norm": 0.38671875, "learning_rate": 1.5320125524478004e-05, "loss": 1.9902, "step": 31105 }, { "epoch": 1.0036135516251907, "grad_norm": 0.38671875, "learning_rate": 1.5319352391412143e-05, "loss": 2.0036, "step": 31106 }, { "epoch": 1.0036458154789871, "grad_norm": 0.373046875, "learning_rate": 1.531857925749751e-05, "loss": 1.998, "step": 31107 }, { "epoch": 1.0036780793327835, "grad_norm": 0.388671875, "learning_rate": 1.531780612273615e-05, "loss": 1.9942, "step": 31108 }, { "epoch": 1.00371034318658, "grad_norm": 0.369140625, "learning_rate": 1.531703298713012e-05, "loss": 1.983, "step": 31109 }, { "epoch": 1.003742607040376, "grad_norm": 0.39453125, "learning_rate": 1.5316259850681483e-05, "loss": 1.9524, "step": 31110 }, { "epoch": 1.0037748708941725, "grad_norm": 0.396484375, "learning_rate": 1.5315486713392288e-05, "loss": 1.9766, "step": 31111 }, { "epoch": 1.003807134747969, "grad_norm": 0.357421875, "learning_rate": 1.531471357526459e-05, "loss": 2.0097, "step": 31112 }, { "epoch": 1.0038393986017653, "grad_norm": 0.423828125, "learning_rate": 1.5313940436300442e-05, "loss": 2.0107, "step": 31113 }, { "epoch": 1.0038716624555615, "grad_norm": 0.34765625, "learning_rate": 1.5313167296501902e-05, "loss": 1.9828, "step": 31114 }, { "epoch": 1.0039039263093579, "grad_norm": 0.37890625, "learning_rate": 1.5312394155871026e-05, "loss": 1.9641, "step": 31115 }, { "epoch": 1.0039361901631543, "grad_norm": 0.37890625, "learning_rate": 1.5311621014409865e-05, "loss": 1.9716, "step": 31116 }, { "epoch": 1.0039684540169507, "grad_norm": 0.3671875, "learning_rate": 1.5310847872120475e-05, "loss": 1.9884, "step": 31117 }, { "epoch": 1.004000717870747, "grad_norm": 0.390625, "learning_rate": 1.5310074729004913e-05, "loss": 2.0009, "step": 31118 }, { "epoch": 1.0040329817245432, "grad_norm": 0.361328125, "learning_rate": 1.5309301585065232e-05, "loss": 2.0004, "step": 31119 }, { "epoch": 1.0040652455783396, "grad_norm": 0.37109375, "learning_rate": 1.5308528440303485e-05, "loss": 1.9867, "step": 31120 }, { "epoch": 1.004097509432136, "grad_norm": 0.357421875, "learning_rate": 1.5307755294721732e-05, "loss": 1.9724, "step": 31121 }, { "epoch": 1.0041297732859324, "grad_norm": 0.36328125, "learning_rate": 1.5306982148322027e-05, "loss": 1.9924, "step": 31122 }, { "epoch": 1.0041620371397286, "grad_norm": 0.376953125, "learning_rate": 1.530620900110642e-05, "loss": 1.9921, "step": 31123 }, { "epoch": 1.004194300993525, "grad_norm": 0.361328125, "learning_rate": 1.5305435853076968e-05, "loss": 1.9965, "step": 31124 }, { "epoch": 1.0042265648473214, "grad_norm": 0.359375, "learning_rate": 1.5304662704235727e-05, "loss": 1.9908, "step": 31125 }, { "epoch": 1.0042588287011178, "grad_norm": 0.349609375, "learning_rate": 1.530388955458475e-05, "loss": 2.0007, "step": 31126 }, { "epoch": 1.004291092554914, "grad_norm": 0.36328125, "learning_rate": 1.5303116404126097e-05, "loss": 1.9626, "step": 31127 }, { "epoch": 1.0043233564087104, "grad_norm": 0.373046875, "learning_rate": 1.530234325286182e-05, "loss": 2.013, "step": 31128 }, { "epoch": 1.0043556202625068, "grad_norm": 0.349609375, "learning_rate": 1.530157010079397e-05, "loss": 1.9884, "step": 31129 }, { "epoch": 1.0043878841163032, "grad_norm": 0.375, "learning_rate": 1.5300796947924604e-05, "loss": 2.004, "step": 31130 }, { "epoch": 1.0044201479700994, "grad_norm": 0.3671875, "learning_rate": 1.530002379425578e-05, "loss": 1.9934, "step": 31131 }, { "epoch": 1.0044524118238958, "grad_norm": 0.39453125, "learning_rate": 1.529925063978955e-05, "loss": 1.976, "step": 31132 }, { "epoch": 1.0044846756776922, "grad_norm": 0.357421875, "learning_rate": 1.5298477484527972e-05, "loss": 1.9961, "step": 31133 }, { "epoch": 1.0045169395314886, "grad_norm": 0.37890625, "learning_rate": 1.5297704328473094e-05, "loss": 1.9943, "step": 31134 }, { "epoch": 1.0045492033852848, "grad_norm": 0.365234375, "learning_rate": 1.529693117162698e-05, "loss": 1.9619, "step": 31135 }, { "epoch": 1.0045814672390811, "grad_norm": 0.349609375, "learning_rate": 1.5296158013991676e-05, "loss": 1.945, "step": 31136 }, { "epoch": 1.0046137310928775, "grad_norm": 0.376953125, "learning_rate": 1.529538485556925e-05, "loss": 1.9774, "step": 31137 }, { "epoch": 1.004645994946674, "grad_norm": 0.380859375, "learning_rate": 1.529461169636174e-05, "loss": 2.0007, "step": 31138 }, { "epoch": 1.0046782588004703, "grad_norm": 0.37109375, "learning_rate": 1.529383853637121e-05, "loss": 1.964, "step": 31139 }, { "epoch": 1.0047105226542665, "grad_norm": 0.365234375, "learning_rate": 1.5293065375599717e-05, "loss": 1.9779, "step": 31140 }, { "epoch": 1.004742786508063, "grad_norm": 0.357421875, "learning_rate": 1.529229221404931e-05, "loss": 1.993, "step": 31141 }, { "epoch": 1.0047750503618593, "grad_norm": 0.373046875, "learning_rate": 1.5291519051722047e-05, "loss": 1.9889, "step": 31142 }, { "epoch": 1.0048073142156557, "grad_norm": 0.35546875, "learning_rate": 1.5290745888619987e-05, "loss": 1.9831, "step": 31143 }, { "epoch": 1.004839578069452, "grad_norm": 0.375, "learning_rate": 1.5289972724745175e-05, "loss": 2.0134, "step": 31144 }, { "epoch": 1.0048718419232483, "grad_norm": 0.423828125, "learning_rate": 1.5289199560099676e-05, "loss": 1.9811, "step": 31145 }, { "epoch": 1.0049041057770447, "grad_norm": 0.376953125, "learning_rate": 1.5288426394685537e-05, "loss": 2.0088, "step": 31146 }, { "epoch": 1.004936369630841, "grad_norm": 0.3828125, "learning_rate": 1.528765322850482e-05, "loss": 1.9769, "step": 31147 }, { "epoch": 1.0049686334846373, "grad_norm": 0.345703125, "learning_rate": 1.5286880061559576e-05, "loss": 1.9957, "step": 31148 }, { "epoch": 1.0050008973384337, "grad_norm": 0.38671875, "learning_rate": 1.528610689385186e-05, "loss": 1.975, "step": 31149 }, { "epoch": 1.00503316119223, "grad_norm": 0.359375, "learning_rate": 1.5285333725383723e-05, "loss": 1.9828, "step": 31150 }, { "epoch": 1.0050654250460265, "grad_norm": 0.3515625, "learning_rate": 1.528456055615723e-05, "loss": 1.9637, "step": 31151 }, { "epoch": 1.0050976888998226, "grad_norm": 0.375, "learning_rate": 1.528378738617443e-05, "loss": 1.9878, "step": 31152 }, { "epoch": 1.005129952753619, "grad_norm": 0.349609375, "learning_rate": 1.5283014215437373e-05, "loss": 1.98, "step": 31153 }, { "epoch": 1.0051622166074154, "grad_norm": 0.359375, "learning_rate": 1.528224104394812e-05, "loss": 1.9861, "step": 31154 }, { "epoch": 1.0051944804612118, "grad_norm": 0.361328125, "learning_rate": 1.5281467871708727e-05, "loss": 1.9929, "step": 31155 }, { "epoch": 1.0052267443150082, "grad_norm": 0.36328125, "learning_rate": 1.5280694698721244e-05, "loss": 1.9853, "step": 31156 }, { "epoch": 1.0052590081688044, "grad_norm": 0.375, "learning_rate": 1.5279921524987734e-05, "loss": 1.9794, "step": 31157 }, { "epoch": 1.0052912720226008, "grad_norm": 0.35546875, "learning_rate": 1.5279148350510245e-05, "loss": 1.9935, "step": 31158 }, { "epoch": 1.0053235358763972, "grad_norm": 0.369140625, "learning_rate": 1.5278375175290833e-05, "loss": 2.0008, "step": 31159 }, { "epoch": 1.0053557997301936, "grad_norm": 0.359375, "learning_rate": 1.5277601999331555e-05, "loss": 1.982, "step": 31160 }, { "epoch": 1.0053880635839898, "grad_norm": 0.36328125, "learning_rate": 1.527682882263446e-05, "loss": 1.961, "step": 31161 }, { "epoch": 1.0054203274377862, "grad_norm": 0.373046875, "learning_rate": 1.527605564520161e-05, "loss": 1.9906, "step": 31162 }, { "epoch": 1.0054525912915826, "grad_norm": 0.3671875, "learning_rate": 1.5275282467035058e-05, "loss": 1.9904, "step": 31163 }, { "epoch": 1.005484855145379, "grad_norm": 0.365234375, "learning_rate": 1.5274509288136863e-05, "loss": 1.9896, "step": 31164 }, { "epoch": 1.0055171189991752, "grad_norm": 0.384765625, "learning_rate": 1.5273736108509072e-05, "loss": 1.978, "step": 31165 }, { "epoch": 1.0055493828529716, "grad_norm": 0.380859375, "learning_rate": 1.527296292815374e-05, "loss": 1.9709, "step": 31166 }, { "epoch": 1.005581646706768, "grad_norm": 0.369140625, "learning_rate": 1.5272189747072928e-05, "loss": 2.0002, "step": 31167 }, { "epoch": 1.0056139105605644, "grad_norm": 0.38671875, "learning_rate": 1.5271416565268685e-05, "loss": 1.965, "step": 31168 }, { "epoch": 1.0056461744143605, "grad_norm": 0.353515625, "learning_rate": 1.5270643382743076e-05, "loss": 1.9554, "step": 31169 }, { "epoch": 1.005678438268157, "grad_norm": 0.392578125, "learning_rate": 1.526987019949815e-05, "loss": 1.968, "step": 31170 }, { "epoch": 1.0057107021219533, "grad_norm": 0.357421875, "learning_rate": 1.5269097015535947e-05, "loss": 1.9883, "step": 31171 }, { "epoch": 1.0057429659757497, "grad_norm": 0.373046875, "learning_rate": 1.526832383085855e-05, "loss": 1.9809, "step": 31172 }, { "epoch": 1.005775229829546, "grad_norm": 0.376953125, "learning_rate": 1.5267550645467996e-05, "loss": 1.9886, "step": 31173 }, { "epoch": 1.0058074936833423, "grad_norm": 0.365234375, "learning_rate": 1.5266777459366348e-05, "loss": 1.9909, "step": 31174 }, { "epoch": 1.0058397575371387, "grad_norm": 0.345703125, "learning_rate": 1.526600427255565e-05, "loss": 2.0122, "step": 31175 }, { "epoch": 1.0058720213909351, "grad_norm": 0.36328125, "learning_rate": 1.526523108503797e-05, "loss": 1.9657, "step": 31176 }, { "epoch": 1.0059042852447315, "grad_norm": 0.353515625, "learning_rate": 1.5264457896815352e-05, "loss": 1.9357, "step": 31177 }, { "epoch": 1.0059365490985277, "grad_norm": 0.361328125, "learning_rate": 1.526368470788986e-05, "loss": 1.9818, "step": 31178 }, { "epoch": 1.005968812952324, "grad_norm": 0.357421875, "learning_rate": 1.5262911518263545e-05, "loss": 1.978, "step": 31179 }, { "epoch": 1.0060010768061205, "grad_norm": 0.349609375, "learning_rate": 1.5262138327938463e-05, "loss": 1.9567, "step": 31180 }, { "epoch": 1.006033340659917, "grad_norm": 0.33984375, "learning_rate": 1.5261365136916666e-05, "loss": 1.9881, "step": 31181 }, { "epoch": 1.006065604513713, "grad_norm": 0.357421875, "learning_rate": 1.5260591945200213e-05, "loss": 1.9747, "step": 31182 }, { "epoch": 1.0060978683675095, "grad_norm": 0.369140625, "learning_rate": 1.5259818752791155e-05, "loss": 1.9244, "step": 31183 }, { "epoch": 1.0061301322213059, "grad_norm": 0.34765625, "learning_rate": 1.525904555969155e-05, "loss": 1.9813, "step": 31184 }, { "epoch": 1.0061623960751023, "grad_norm": 0.369140625, "learning_rate": 1.525827236590345e-05, "loss": 1.9703, "step": 31185 }, { "epoch": 1.0061946599288984, "grad_norm": 0.353515625, "learning_rate": 1.5257499171428914e-05, "loss": 1.9955, "step": 31186 }, { "epoch": 1.0062269237826948, "grad_norm": 0.353515625, "learning_rate": 1.5256725976269991e-05, "loss": 1.9905, "step": 31187 }, { "epoch": 1.0062591876364912, "grad_norm": 0.35546875, "learning_rate": 1.5255952780428746e-05, "loss": 1.9993, "step": 31188 }, { "epoch": 1.0062914514902876, "grad_norm": 0.34375, "learning_rate": 1.5255179583907224e-05, "loss": 1.9544, "step": 31189 }, { "epoch": 1.0063237153440838, "grad_norm": 0.35546875, "learning_rate": 1.5254406386707489e-05, "loss": 2.0096, "step": 31190 }, { "epoch": 1.0063559791978802, "grad_norm": 0.35546875, "learning_rate": 1.5253633188831589e-05, "loss": 1.9685, "step": 31191 }, { "epoch": 1.0063882430516766, "grad_norm": 0.345703125, "learning_rate": 1.5252859990281574e-05, "loss": 1.9961, "step": 31192 }, { "epoch": 1.006420506905473, "grad_norm": 0.33984375, "learning_rate": 1.5252086791059512e-05, "loss": 1.9633, "step": 31193 }, { "epoch": 1.0064527707592692, "grad_norm": 0.34375, "learning_rate": 1.5251313591167457e-05, "loss": 1.9771, "step": 31194 }, { "epoch": 1.0064850346130656, "grad_norm": 0.349609375, "learning_rate": 1.5250540390607451e-05, "loss": 2.0026, "step": 31195 }, { "epoch": 1.006517298466862, "grad_norm": 0.3515625, "learning_rate": 1.524976718938156e-05, "loss": 1.9796, "step": 31196 }, { "epoch": 1.0065495623206584, "grad_norm": 0.3515625, "learning_rate": 1.5248993987491835e-05, "loss": 1.9661, "step": 31197 }, { "epoch": 1.0065818261744548, "grad_norm": 0.359375, "learning_rate": 1.5248220784940334e-05, "loss": 1.9879, "step": 31198 }, { "epoch": 1.006614090028251, "grad_norm": 0.34765625, "learning_rate": 1.5247447581729109e-05, "loss": 1.97, "step": 31199 }, { "epoch": 1.0066463538820474, "grad_norm": 0.38671875, "learning_rate": 1.5246674377860218e-05, "loss": 1.9794, "step": 31200 }, { "epoch": 1.0066786177358438, "grad_norm": 0.34375, "learning_rate": 1.5245901173335715e-05, "loss": 2.0129, "step": 31201 }, { "epoch": 1.0067108815896402, "grad_norm": 0.376953125, "learning_rate": 1.5245127968157654e-05, "loss": 1.9593, "step": 31202 }, { "epoch": 1.0067431454434363, "grad_norm": 0.37109375, "learning_rate": 1.5244354762328084e-05, "loss": 1.992, "step": 31203 }, { "epoch": 1.0067754092972327, "grad_norm": 0.376953125, "learning_rate": 1.5243581555849074e-05, "loss": 1.9758, "step": 31204 }, { "epoch": 1.0068076731510291, "grad_norm": 0.34765625, "learning_rate": 1.524280834872267e-05, "loss": 1.9798, "step": 31205 }, { "epoch": 1.0068399370048255, "grad_norm": 0.357421875, "learning_rate": 1.5242035140950922e-05, "loss": 1.9637, "step": 31206 }, { "epoch": 1.0068722008586217, "grad_norm": 0.3515625, "learning_rate": 1.5241261932535901e-05, "loss": 1.9858, "step": 31207 }, { "epoch": 1.0069044647124181, "grad_norm": 0.349609375, "learning_rate": 1.5240488723479642e-05, "loss": 1.9836, "step": 31208 }, { "epoch": 1.0069367285662145, "grad_norm": 0.34765625, "learning_rate": 1.523971551378422e-05, "loss": 1.9159, "step": 31209 }, { "epoch": 1.006968992420011, "grad_norm": 0.357421875, "learning_rate": 1.5238942303451678e-05, "loss": 1.9986, "step": 31210 }, { "epoch": 1.007001256273807, "grad_norm": 0.357421875, "learning_rate": 1.5238169092484073e-05, "loss": 1.996, "step": 31211 }, { "epoch": 1.0070335201276035, "grad_norm": 0.3515625, "learning_rate": 1.5237395880883459e-05, "loss": 1.9631, "step": 31212 }, { "epoch": 1.0070657839814, "grad_norm": 0.35546875, "learning_rate": 1.5236622668651894e-05, "loss": 1.9435, "step": 31213 }, { "epoch": 1.0070980478351963, "grad_norm": 0.36328125, "learning_rate": 1.5235849455791429e-05, "loss": 1.9827, "step": 31214 }, { "epoch": 1.0071303116889925, "grad_norm": 0.361328125, "learning_rate": 1.5235076242304124e-05, "loss": 1.9887, "step": 31215 }, { "epoch": 1.0071625755427889, "grad_norm": 0.365234375, "learning_rate": 1.5234303028192037e-05, "loss": 1.9512, "step": 31216 }, { "epoch": 1.0071948393965853, "grad_norm": 0.353515625, "learning_rate": 1.523352981345721e-05, "loss": 2.0052, "step": 31217 }, { "epoch": 1.0072271032503817, "grad_norm": 0.353515625, "learning_rate": 1.5232756598101711e-05, "loss": 1.9781, "step": 31218 }, { "epoch": 1.007259367104178, "grad_norm": 0.353515625, "learning_rate": 1.5231983382127587e-05, "loss": 1.9839, "step": 31219 }, { "epoch": 1.0072916309579742, "grad_norm": 0.3671875, "learning_rate": 1.5231210165536896e-05, "loss": 1.9487, "step": 31220 }, { "epoch": 1.0073238948117706, "grad_norm": 0.357421875, "learning_rate": 1.5230436948331695e-05, "loss": 1.9959, "step": 31221 }, { "epoch": 1.007356158665567, "grad_norm": 0.361328125, "learning_rate": 1.522966373051404e-05, "loss": 1.9625, "step": 31222 }, { "epoch": 1.0073884225193634, "grad_norm": 0.35546875, "learning_rate": 1.5228890512085978e-05, "loss": 1.9862, "step": 31223 }, { "epoch": 1.0074206863731596, "grad_norm": 0.3515625, "learning_rate": 1.5228117293049568e-05, "loss": 1.9773, "step": 31224 }, { "epoch": 1.007452950226956, "grad_norm": 0.384765625, "learning_rate": 1.5227344073406871e-05, "loss": 1.9647, "step": 31225 }, { "epoch": 1.0074852140807524, "grad_norm": 0.35546875, "learning_rate": 1.5226570853159935e-05, "loss": 1.977, "step": 31226 }, { "epoch": 1.0075174779345488, "grad_norm": 0.376953125, "learning_rate": 1.522579763231082e-05, "loss": 1.9496, "step": 31227 }, { "epoch": 1.007549741788345, "grad_norm": 0.34375, "learning_rate": 1.5225024410861578e-05, "loss": 1.9818, "step": 31228 }, { "epoch": 1.0075820056421414, "grad_norm": 0.36328125, "learning_rate": 1.5224251188814261e-05, "loss": 1.9974, "step": 31229 }, { "epoch": 1.0076142694959378, "grad_norm": 0.359375, "learning_rate": 1.5223477966170928e-05, "loss": 1.9722, "step": 31230 }, { "epoch": 1.0076465333497342, "grad_norm": 0.376953125, "learning_rate": 1.522270474293364e-05, "loss": 1.9736, "step": 31231 }, { "epoch": 1.0076787972035304, "grad_norm": 0.375, "learning_rate": 1.5221931519104442e-05, "loss": 1.965, "step": 31232 }, { "epoch": 1.0077110610573268, "grad_norm": 0.33203125, "learning_rate": 1.5221158294685393e-05, "loss": 1.9977, "step": 31233 }, { "epoch": 1.0077433249111232, "grad_norm": 0.373046875, "learning_rate": 1.5220385069678546e-05, "loss": 1.9899, "step": 31234 }, { "epoch": 1.0077755887649196, "grad_norm": 0.373046875, "learning_rate": 1.5219611844085963e-05, "loss": 1.969, "step": 31235 }, { "epoch": 1.0078078526187157, "grad_norm": 0.369140625, "learning_rate": 1.5218838617909688e-05, "loss": 1.9607, "step": 31236 }, { "epoch": 1.0078401164725121, "grad_norm": 0.359375, "learning_rate": 1.521806539115179e-05, "loss": 1.9483, "step": 31237 }, { "epoch": 1.0078723803263085, "grad_norm": 0.365234375, "learning_rate": 1.521729216381431e-05, "loss": 1.9936, "step": 31238 }, { "epoch": 1.007904644180105, "grad_norm": 0.349609375, "learning_rate": 1.5216518935899312e-05, "loss": 1.9712, "step": 31239 }, { "epoch": 1.0079369080339013, "grad_norm": 0.37109375, "learning_rate": 1.521574570740885e-05, "loss": 1.9772, "step": 31240 }, { "epoch": 1.0079691718876975, "grad_norm": 0.345703125, "learning_rate": 1.5214972478344975e-05, "loss": 1.9708, "step": 31241 }, { "epoch": 1.008001435741494, "grad_norm": 0.3671875, "learning_rate": 1.5214199248709744e-05, "loss": 1.9804, "step": 31242 }, { "epoch": 1.0080336995952903, "grad_norm": 0.373046875, "learning_rate": 1.5213426018505217e-05, "loss": 1.9943, "step": 31243 }, { "epoch": 1.0080659634490867, "grad_norm": 0.3515625, "learning_rate": 1.5212652787733442e-05, "loss": 1.9992, "step": 31244 }, { "epoch": 1.008098227302883, "grad_norm": 0.353515625, "learning_rate": 1.5211879556396473e-05, "loss": 1.9755, "step": 31245 }, { "epoch": 1.0081304911566793, "grad_norm": 0.3671875, "learning_rate": 1.5211106324496378e-05, "loss": 2.0019, "step": 31246 }, { "epoch": 1.0081627550104757, "grad_norm": 0.359375, "learning_rate": 1.5210333092035198e-05, "loss": 1.9758, "step": 31247 }, { "epoch": 1.008195018864272, "grad_norm": 0.36328125, "learning_rate": 1.5209559859014996e-05, "loss": 1.9789, "step": 31248 }, { "epoch": 1.0082272827180683, "grad_norm": 0.361328125, "learning_rate": 1.5208786625437825e-05, "loss": 1.9875, "step": 31249 }, { "epoch": 1.0082595465718647, "grad_norm": 0.35546875, "learning_rate": 1.5208013391305731e-05, "loss": 1.9342, "step": 31250 }, { "epoch": 1.008291810425661, "grad_norm": 0.359375, "learning_rate": 1.5207240156620785e-05, "loss": 1.9879, "step": 31251 }, { "epoch": 1.0083240742794575, "grad_norm": 0.34765625, "learning_rate": 1.5206466921385036e-05, "loss": 1.9457, "step": 31252 }, { "epoch": 1.0083563381332536, "grad_norm": 0.357421875, "learning_rate": 1.5205693685600536e-05, "loss": 1.9546, "step": 31253 }, { "epoch": 1.00838860198705, "grad_norm": 0.35546875, "learning_rate": 1.5204920449269342e-05, "loss": 1.9792, "step": 31254 }, { "epoch": 1.0084208658408464, "grad_norm": 0.380859375, "learning_rate": 1.5204147212393507e-05, "loss": 1.9782, "step": 31255 }, { "epoch": 1.0084531296946428, "grad_norm": 0.353515625, "learning_rate": 1.5203373974975089e-05, "loss": 1.9553, "step": 31256 }, { "epoch": 1.008485393548439, "grad_norm": 0.39453125, "learning_rate": 1.5202600737016143e-05, "loss": 2.0124, "step": 31257 }, { "epoch": 1.0085176574022354, "grad_norm": 0.380859375, "learning_rate": 1.5201827498518725e-05, "loss": 1.9743, "step": 31258 }, { "epoch": 1.0085499212560318, "grad_norm": 0.365234375, "learning_rate": 1.520105425948489e-05, "loss": 1.9917, "step": 31259 }, { "epoch": 1.0085821851098282, "grad_norm": 0.380859375, "learning_rate": 1.520028101991669e-05, "loss": 1.9954, "step": 31260 }, { "epoch": 1.0086144489636246, "grad_norm": 0.36328125, "learning_rate": 1.5199507779816177e-05, "loss": 1.9601, "step": 31261 }, { "epoch": 1.0086467128174208, "grad_norm": 0.357421875, "learning_rate": 1.5198734539185415e-05, "loss": 1.9725, "step": 31262 }, { "epoch": 1.0086789766712172, "grad_norm": 0.357421875, "learning_rate": 1.5197961298026453e-05, "loss": 1.978, "step": 31263 }, { "epoch": 1.0087112405250136, "grad_norm": 0.34765625, "learning_rate": 1.5197188056341355e-05, "loss": 1.9788, "step": 31264 }, { "epoch": 1.00874350437881, "grad_norm": 0.35546875, "learning_rate": 1.5196414814132162e-05, "loss": 1.993, "step": 31265 }, { "epoch": 1.0087757682326062, "grad_norm": 0.361328125, "learning_rate": 1.5195641571400935e-05, "loss": 1.9984, "step": 31266 }, { "epoch": 1.0088080320864026, "grad_norm": 0.35546875, "learning_rate": 1.5194868328149739e-05, "loss": 1.952, "step": 31267 }, { "epoch": 1.008840295940199, "grad_norm": 0.359375, "learning_rate": 1.5194095084380615e-05, "loss": 1.9627, "step": 31268 }, { "epoch": 1.0088725597939954, "grad_norm": 0.3515625, "learning_rate": 1.5193321840095624e-05, "loss": 1.9949, "step": 31269 }, { "epoch": 1.0089048236477915, "grad_norm": 0.35546875, "learning_rate": 1.5192548595296826e-05, "loss": 1.9729, "step": 31270 }, { "epoch": 1.008937087501588, "grad_norm": 0.36328125, "learning_rate": 1.519177534998626e-05, "loss": 1.9993, "step": 31271 }, { "epoch": 1.0089693513553843, "grad_norm": 0.361328125, "learning_rate": 1.5191002104166e-05, "loss": 1.9617, "step": 31272 }, { "epoch": 1.0090016152091807, "grad_norm": 0.357421875, "learning_rate": 1.5190228857838097e-05, "loss": 1.9781, "step": 31273 }, { "epoch": 1.009033879062977, "grad_norm": 0.365234375, "learning_rate": 1.5189455611004597e-05, "loss": 1.9821, "step": 31274 }, { "epoch": 1.0090661429167733, "grad_norm": 0.349609375, "learning_rate": 1.518868236366756e-05, "loss": 1.954, "step": 31275 }, { "epoch": 1.0090984067705697, "grad_norm": 0.3671875, "learning_rate": 1.5187909115829044e-05, "loss": 1.9834, "step": 31276 }, { "epoch": 1.0091306706243661, "grad_norm": 0.36328125, "learning_rate": 1.5187135867491102e-05, "loss": 1.9798, "step": 31277 }, { "epoch": 1.0091629344781625, "grad_norm": 0.357421875, "learning_rate": 1.5186362618655785e-05, "loss": 1.9813, "step": 31278 }, { "epoch": 1.0091951983319587, "grad_norm": 0.359375, "learning_rate": 1.518558936932516e-05, "loss": 1.9695, "step": 31279 }, { "epoch": 1.009227462185755, "grad_norm": 0.3515625, "learning_rate": 1.518481611950127e-05, "loss": 1.9657, "step": 31280 }, { "epoch": 1.0092597260395515, "grad_norm": 0.37890625, "learning_rate": 1.5184042869186175e-05, "loss": 1.9816, "step": 31281 }, { "epoch": 1.009291989893348, "grad_norm": 0.365234375, "learning_rate": 1.5183269618381927e-05, "loss": 1.9372, "step": 31282 }, { "epoch": 1.009324253747144, "grad_norm": 0.376953125, "learning_rate": 1.5182496367090588e-05, "loss": 1.9863, "step": 31283 }, { "epoch": 1.0093565176009405, "grad_norm": 0.384765625, "learning_rate": 1.5181723115314205e-05, "loss": 1.9528, "step": 31284 }, { "epoch": 1.0093887814547369, "grad_norm": 0.380859375, "learning_rate": 1.5180949863054841e-05, "loss": 2.0143, "step": 31285 }, { "epoch": 1.0094210453085333, "grad_norm": 0.380859375, "learning_rate": 1.5180176610314547e-05, "loss": 1.9879, "step": 31286 }, { "epoch": 1.0094533091623294, "grad_norm": 0.361328125, "learning_rate": 1.5179403357095371e-05, "loss": 1.9357, "step": 31287 }, { "epoch": 1.0094855730161258, "grad_norm": 0.41015625, "learning_rate": 1.5178630103399385e-05, "loss": 1.9814, "step": 31288 }, { "epoch": 1.0095178368699222, "grad_norm": 0.365234375, "learning_rate": 1.517785684922863e-05, "loss": 1.9625, "step": 31289 }, { "epoch": 1.0095501007237186, "grad_norm": 0.404296875, "learning_rate": 1.5177083594585168e-05, "loss": 1.99, "step": 31290 }, { "epoch": 1.0095823645775148, "grad_norm": 0.37890625, "learning_rate": 1.5176310339471052e-05, "loss": 1.9422, "step": 31291 }, { "epoch": 1.0096146284313112, "grad_norm": 0.365234375, "learning_rate": 1.5175537083888334e-05, "loss": 1.9749, "step": 31292 }, { "epoch": 1.0096468922851076, "grad_norm": 0.3828125, "learning_rate": 1.5174763827839077e-05, "loss": 2.0065, "step": 31293 }, { "epoch": 1.009679156138904, "grad_norm": 0.38671875, "learning_rate": 1.517399057132533e-05, "loss": 1.9838, "step": 31294 }, { "epoch": 1.0097114199927002, "grad_norm": 0.37109375, "learning_rate": 1.517321731434915e-05, "loss": 1.9844, "step": 31295 }, { "epoch": 1.0097436838464966, "grad_norm": 0.37890625, "learning_rate": 1.5172444056912589e-05, "loss": 1.9793, "step": 31296 }, { "epoch": 1.009775947700293, "grad_norm": 0.353515625, "learning_rate": 1.5171670799017708e-05, "loss": 1.9427, "step": 31297 }, { "epoch": 1.0098082115540894, "grad_norm": 0.369140625, "learning_rate": 1.5170897540666557e-05, "loss": 1.9681, "step": 31298 }, { "epoch": 1.0098404754078858, "grad_norm": 0.37890625, "learning_rate": 1.5170124281861194e-05, "loss": 1.9827, "step": 31299 }, { "epoch": 1.009872739261682, "grad_norm": 0.373046875, "learning_rate": 1.5169351022603679e-05, "loss": 1.9903, "step": 31300 }, { "epoch": 1.0099050031154784, "grad_norm": 0.35546875, "learning_rate": 1.5168577762896058e-05, "loss": 1.9807, "step": 31301 }, { "epoch": 1.0099372669692748, "grad_norm": 0.36328125, "learning_rate": 1.5167804502740388e-05, "loss": 1.9702, "step": 31302 }, { "epoch": 1.0099695308230712, "grad_norm": 0.3671875, "learning_rate": 1.5167031242138724e-05, "loss": 2.0044, "step": 31303 }, { "epoch": 1.0100017946768673, "grad_norm": 0.361328125, "learning_rate": 1.5166257981093126e-05, "loss": 1.958, "step": 31304 }, { "epoch": 1.0100340585306637, "grad_norm": 0.37109375, "learning_rate": 1.516548471960565e-05, "loss": 1.9962, "step": 31305 }, { "epoch": 1.0100663223844601, "grad_norm": 0.384765625, "learning_rate": 1.5164711457678347e-05, "loss": 1.997, "step": 31306 }, { "epoch": 1.0100985862382565, "grad_norm": 0.357421875, "learning_rate": 1.5163938195313268e-05, "loss": 1.9784, "step": 31307 }, { "epoch": 1.0101308500920527, "grad_norm": 0.37109375, "learning_rate": 1.5163164932512474e-05, "loss": 1.9763, "step": 31308 }, { "epoch": 1.0101631139458491, "grad_norm": 0.35546875, "learning_rate": 1.5162391669278023e-05, "loss": 1.9751, "step": 31309 }, { "epoch": 1.0101953777996455, "grad_norm": 0.359375, "learning_rate": 1.5161618405611964e-05, "loss": 1.9642, "step": 31310 }, { "epoch": 1.010227641653442, "grad_norm": 0.36328125, "learning_rate": 1.5160845141516353e-05, "loss": 1.9774, "step": 31311 }, { "epoch": 1.010259905507238, "grad_norm": 0.369140625, "learning_rate": 1.5160071876993246e-05, "loss": 1.983, "step": 31312 }, { "epoch": 1.0102921693610345, "grad_norm": 0.365234375, "learning_rate": 1.5159298612044703e-05, "loss": 1.9459, "step": 31313 }, { "epoch": 1.010324433214831, "grad_norm": 0.361328125, "learning_rate": 1.5158525346672773e-05, "loss": 1.9897, "step": 31314 }, { "epoch": 1.0103566970686273, "grad_norm": 0.353515625, "learning_rate": 1.5157752080879516e-05, "loss": 1.9853, "step": 31315 }, { "epoch": 1.0103889609224235, "grad_norm": 0.361328125, "learning_rate": 1.5156978814666982e-05, "loss": 1.9943, "step": 31316 }, { "epoch": 1.0104212247762199, "grad_norm": 0.357421875, "learning_rate": 1.5156205548037228e-05, "loss": 1.9498, "step": 31317 }, { "epoch": 1.0104534886300163, "grad_norm": 0.345703125, "learning_rate": 1.5155432280992313e-05, "loss": 1.9788, "step": 31318 }, { "epoch": 1.0104857524838127, "grad_norm": 0.353515625, "learning_rate": 1.5154659013534284e-05, "loss": 2.0064, "step": 31319 }, { "epoch": 1.010518016337609, "grad_norm": 0.369140625, "learning_rate": 1.5153885745665204e-05, "loss": 1.9768, "step": 31320 }, { "epoch": 1.0105502801914052, "grad_norm": 0.3515625, "learning_rate": 1.5153112477387127e-05, "loss": 1.9684, "step": 31321 }, { "epoch": 1.0105825440452016, "grad_norm": 0.36328125, "learning_rate": 1.5152339208702107e-05, "loss": 1.9651, "step": 31322 }, { "epoch": 1.010614807898998, "grad_norm": 0.353515625, "learning_rate": 1.5151565939612195e-05, "loss": 1.9847, "step": 31323 }, { "epoch": 1.0106470717527944, "grad_norm": 0.359375, "learning_rate": 1.5150792670119453e-05, "loss": 1.9925, "step": 31324 }, { "epoch": 1.0106793356065906, "grad_norm": 0.359375, "learning_rate": 1.5150019400225933e-05, "loss": 1.9779, "step": 31325 }, { "epoch": 1.010711599460387, "grad_norm": 0.345703125, "learning_rate": 1.5149246129933688e-05, "loss": 1.9835, "step": 31326 }, { "epoch": 1.0107438633141834, "grad_norm": 0.37109375, "learning_rate": 1.5148472859244778e-05, "loss": 1.9676, "step": 31327 }, { "epoch": 1.0107761271679798, "grad_norm": 0.341796875, "learning_rate": 1.5147699588161255e-05, "loss": 1.9585, "step": 31328 }, { "epoch": 1.010808391021776, "grad_norm": 0.341796875, "learning_rate": 1.5146926316685175e-05, "loss": 1.9876, "step": 31329 }, { "epoch": 1.0108406548755724, "grad_norm": 0.34375, "learning_rate": 1.5146153044818592e-05, "loss": 1.9714, "step": 31330 }, { "epoch": 1.0108729187293688, "grad_norm": 0.345703125, "learning_rate": 1.5145379772563567e-05, "loss": 1.9758, "step": 31331 }, { "epoch": 1.0109051825831652, "grad_norm": 0.36328125, "learning_rate": 1.514460649992215e-05, "loss": 1.98, "step": 31332 }, { "epoch": 1.0109374464369614, "grad_norm": 0.35546875, "learning_rate": 1.5143833226896394e-05, "loss": 1.9969, "step": 31333 }, { "epoch": 1.0109697102907578, "grad_norm": 0.345703125, "learning_rate": 1.5143059953488358e-05, "loss": 1.969, "step": 31334 }, { "epoch": 1.0110019741445542, "grad_norm": 0.3671875, "learning_rate": 1.5142286679700093e-05, "loss": 1.9712, "step": 31335 }, { "epoch": 1.0110342379983506, "grad_norm": 0.34765625, "learning_rate": 1.514151340553366e-05, "loss": 1.9673, "step": 31336 }, { "epoch": 1.0110665018521467, "grad_norm": 0.357421875, "learning_rate": 1.514074013099112e-05, "loss": 1.9677, "step": 31337 }, { "epoch": 1.0110987657059431, "grad_norm": 0.357421875, "learning_rate": 1.513996685607451e-05, "loss": 1.9912, "step": 31338 }, { "epoch": 1.0111310295597395, "grad_norm": 0.341796875, "learning_rate": 1.5139193580785896e-05, "loss": 1.9999, "step": 31339 }, { "epoch": 1.011163293413536, "grad_norm": 0.359375, "learning_rate": 1.5138420305127335e-05, "loss": 1.9715, "step": 31340 }, { "epoch": 1.0111955572673323, "grad_norm": 0.353515625, "learning_rate": 1.513764702910088e-05, "loss": 1.9503, "step": 31341 }, { "epoch": 1.0112278211211285, "grad_norm": 0.361328125, "learning_rate": 1.5136873752708585e-05, "loss": 1.9549, "step": 31342 }, { "epoch": 1.011260084974925, "grad_norm": 0.365234375, "learning_rate": 1.5136100475952506e-05, "loss": 1.9812, "step": 31343 }, { "epoch": 1.0112923488287213, "grad_norm": 0.345703125, "learning_rate": 1.5135327198834695e-05, "loss": 1.9715, "step": 31344 }, { "epoch": 1.0113246126825177, "grad_norm": 0.359375, "learning_rate": 1.5134553921357214e-05, "loss": 1.9886, "step": 31345 }, { "epoch": 1.011356876536314, "grad_norm": 0.380859375, "learning_rate": 1.5133780643522117e-05, "loss": 1.9835, "step": 31346 }, { "epoch": 1.0113891403901103, "grad_norm": 0.37109375, "learning_rate": 1.5133007365331454e-05, "loss": 1.9896, "step": 31347 }, { "epoch": 1.0114214042439067, "grad_norm": 0.3671875, "learning_rate": 1.5132234086787284e-05, "loss": 1.9998, "step": 31348 }, { "epoch": 1.011453668097703, "grad_norm": 0.375, "learning_rate": 1.5131460807891658e-05, "loss": 1.9818, "step": 31349 }, { "epoch": 1.0114859319514993, "grad_norm": 0.35546875, "learning_rate": 1.5130687528646637e-05, "loss": 1.989, "step": 31350 }, { "epoch": 1.0115181958052957, "grad_norm": 0.365234375, "learning_rate": 1.5129914249054275e-05, "loss": 1.9879, "step": 31351 }, { "epoch": 1.011550459659092, "grad_norm": 0.365234375, "learning_rate": 1.5129140969116626e-05, "loss": 2.0126, "step": 31352 }, { "epoch": 1.0115827235128885, "grad_norm": 0.357421875, "learning_rate": 1.5128367688835745e-05, "loss": 1.9611, "step": 31353 }, { "epoch": 1.0116149873666846, "grad_norm": 0.35546875, "learning_rate": 1.5127594408213686e-05, "loss": 1.9998, "step": 31354 }, { "epoch": 1.011647251220481, "grad_norm": 0.3515625, "learning_rate": 1.5126821127252509e-05, "loss": 1.9732, "step": 31355 }, { "epoch": 1.0116795150742774, "grad_norm": 0.390625, "learning_rate": 1.5126047845954262e-05, "loss": 1.9748, "step": 31356 }, { "epoch": 1.0117117789280738, "grad_norm": 0.34375, "learning_rate": 1.5125274564321006e-05, "loss": 1.9867, "step": 31357 }, { "epoch": 1.0117440427818702, "grad_norm": 0.34765625, "learning_rate": 1.51245012823548e-05, "loss": 1.9643, "step": 31358 }, { "epoch": 1.0117763066356664, "grad_norm": 0.365234375, "learning_rate": 1.5123728000057683e-05, "loss": 1.9567, "step": 31359 }, { "epoch": 1.0118085704894628, "grad_norm": 0.36328125, "learning_rate": 1.5122954717431726e-05, "loss": 1.978, "step": 31360 }, { "epoch": 1.0118408343432592, "grad_norm": 0.357421875, "learning_rate": 1.5122181434478981e-05, "loss": 1.983, "step": 31361 }, { "epoch": 1.0118730981970556, "grad_norm": 0.36328125, "learning_rate": 1.5121408151201501e-05, "loss": 1.9794, "step": 31362 }, { "epoch": 1.0119053620508518, "grad_norm": 0.337890625, "learning_rate": 1.5120634867601337e-05, "loss": 1.9812, "step": 31363 }, { "epoch": 1.0119376259046482, "grad_norm": 0.353515625, "learning_rate": 1.5119861583680557e-05, "loss": 1.9833, "step": 31364 }, { "epoch": 1.0119698897584446, "grad_norm": 0.3671875, "learning_rate": 1.51190882994412e-05, "loss": 1.9399, "step": 31365 }, { "epoch": 1.012002153612241, "grad_norm": 0.43359375, "learning_rate": 1.5118315014885332e-05, "loss": 1.9732, "step": 31366 }, { "epoch": 1.0120344174660372, "grad_norm": 0.36328125, "learning_rate": 1.5117541730015008e-05, "loss": 1.9569, "step": 31367 }, { "epoch": 1.0120666813198336, "grad_norm": 0.373046875, "learning_rate": 1.5116768444832278e-05, "loss": 1.9722, "step": 31368 }, { "epoch": 1.01209894517363, "grad_norm": 0.353515625, "learning_rate": 1.5115995159339202e-05, "loss": 1.9508, "step": 31369 }, { "epoch": 1.0121312090274264, "grad_norm": 0.361328125, "learning_rate": 1.511522187353783e-05, "loss": 1.9743, "step": 31370 }, { "epoch": 1.0121634728812225, "grad_norm": 0.353515625, "learning_rate": 1.5114448587430222e-05, "loss": 1.9932, "step": 31371 }, { "epoch": 1.012195736735019, "grad_norm": 0.359375, "learning_rate": 1.5113675301018436e-05, "loss": 1.9976, "step": 31372 }, { "epoch": 1.0122280005888153, "grad_norm": 0.33984375, "learning_rate": 1.511290201430452e-05, "loss": 1.9588, "step": 31373 }, { "epoch": 1.0122602644426117, "grad_norm": 0.349609375, "learning_rate": 1.5112128727290533e-05, "loss": 1.9733, "step": 31374 }, { "epoch": 1.012292528296408, "grad_norm": 0.34375, "learning_rate": 1.5111355439978528e-05, "loss": 1.9675, "step": 31375 }, { "epoch": 1.0123247921502043, "grad_norm": 0.35546875, "learning_rate": 1.5110582152370561e-05, "loss": 1.9141, "step": 31376 }, { "epoch": 1.0123570560040007, "grad_norm": 0.35546875, "learning_rate": 1.510980886446869e-05, "loss": 1.9702, "step": 31377 }, { "epoch": 1.0123893198577971, "grad_norm": 0.349609375, "learning_rate": 1.5109035576274967e-05, "loss": 1.9779, "step": 31378 }, { "epoch": 1.0124215837115935, "grad_norm": 0.3671875, "learning_rate": 1.5108262287791455e-05, "loss": 1.9301, "step": 31379 }, { "epoch": 1.0124538475653897, "grad_norm": 0.37890625, "learning_rate": 1.5107488999020195e-05, "loss": 1.9723, "step": 31380 }, { "epoch": 1.012486111419186, "grad_norm": 0.38671875, "learning_rate": 1.5106715709963247e-05, "loss": 1.9739, "step": 31381 }, { "epoch": 1.0125183752729825, "grad_norm": 0.36328125, "learning_rate": 1.5105942420622679e-05, "loss": 1.9752, "step": 31382 }, { "epoch": 1.012550639126779, "grad_norm": 0.408203125, "learning_rate": 1.510516913100053e-05, "loss": 1.9895, "step": 31383 }, { "epoch": 1.012582902980575, "grad_norm": 0.357421875, "learning_rate": 1.5104395841098865e-05, "loss": 1.9641, "step": 31384 }, { "epoch": 1.0126151668343715, "grad_norm": 0.37890625, "learning_rate": 1.5103622550919738e-05, "loss": 1.9638, "step": 31385 }, { "epoch": 1.0126474306881679, "grad_norm": 0.369140625, "learning_rate": 1.5102849260465194e-05, "loss": 1.9668, "step": 31386 }, { "epoch": 1.0126796945419643, "grad_norm": 0.37109375, "learning_rate": 1.5102075969737303e-05, "loss": 1.9643, "step": 31387 }, { "epoch": 1.0127119583957604, "grad_norm": 0.375, "learning_rate": 1.5101302678738117e-05, "loss": 1.9842, "step": 31388 }, { "epoch": 1.0127442222495568, "grad_norm": 0.353515625, "learning_rate": 1.510052938746968e-05, "loss": 1.946, "step": 31389 }, { "epoch": 1.0127764861033532, "grad_norm": 0.35546875, "learning_rate": 1.5099756095934059e-05, "loss": 1.9638, "step": 31390 }, { "epoch": 1.0128087499571496, "grad_norm": 0.36328125, "learning_rate": 1.5098982804133305e-05, "loss": 1.9835, "step": 31391 }, { "epoch": 1.0128410138109458, "grad_norm": 0.3515625, "learning_rate": 1.5098209512069476e-05, "loss": 1.9623, "step": 31392 }, { "epoch": 1.0128732776647422, "grad_norm": 0.35546875, "learning_rate": 1.5097436219744623e-05, "loss": 1.9723, "step": 31393 }, { "epoch": 1.0129055415185386, "grad_norm": 0.3515625, "learning_rate": 1.5096662927160806e-05, "loss": 1.9767, "step": 31394 }, { "epoch": 1.012937805372335, "grad_norm": 0.353515625, "learning_rate": 1.5095889634320074e-05, "loss": 2.0134, "step": 31395 }, { "epoch": 1.0129700692261312, "grad_norm": 0.3515625, "learning_rate": 1.5095116341224488e-05, "loss": 1.9721, "step": 31396 }, { "epoch": 1.0130023330799276, "grad_norm": 0.345703125, "learning_rate": 1.5094343047876103e-05, "loss": 1.9714, "step": 31397 }, { "epoch": 1.013034596933724, "grad_norm": 0.36328125, "learning_rate": 1.5093569754276969e-05, "loss": 2.0075, "step": 31398 }, { "epoch": 1.0130668607875204, "grad_norm": 0.349609375, "learning_rate": 1.5092796460429146e-05, "loss": 1.9944, "step": 31399 }, { "epoch": 1.0130991246413168, "grad_norm": 0.349609375, "learning_rate": 1.509202316633469e-05, "loss": 1.9491, "step": 31400 }, { "epoch": 1.013131388495113, "grad_norm": 0.349609375, "learning_rate": 1.5091249871995651e-05, "loss": 1.9567, "step": 31401 }, { "epoch": 1.0131636523489094, "grad_norm": 0.35546875, "learning_rate": 1.5090476577414086e-05, "loss": 1.9673, "step": 31402 }, { "epoch": 1.0131959162027058, "grad_norm": 0.35546875, "learning_rate": 1.5089703282592057e-05, "loss": 1.9928, "step": 31403 }, { "epoch": 1.0132281800565022, "grad_norm": 0.345703125, "learning_rate": 1.5088929987531612e-05, "loss": 1.9834, "step": 31404 }, { "epoch": 1.0132604439102983, "grad_norm": 0.361328125, "learning_rate": 1.5088156692234805e-05, "loss": 2.0095, "step": 31405 }, { "epoch": 1.0132927077640947, "grad_norm": 0.3515625, "learning_rate": 1.5087383396703702e-05, "loss": 1.9517, "step": 31406 }, { "epoch": 1.0133249716178911, "grad_norm": 0.36328125, "learning_rate": 1.5086610100940342e-05, "loss": 1.9526, "step": 31407 }, { "epoch": 1.0133572354716875, "grad_norm": 0.36328125, "learning_rate": 1.5085836804946793e-05, "loss": 2.0158, "step": 31408 }, { "epoch": 1.0133894993254837, "grad_norm": 0.3515625, "learning_rate": 1.508506350872511e-05, "loss": 1.9371, "step": 31409 }, { "epoch": 1.0134217631792801, "grad_norm": 0.3671875, "learning_rate": 1.508429021227734e-05, "loss": 1.9716, "step": 31410 }, { "epoch": 1.0134540270330765, "grad_norm": 0.345703125, "learning_rate": 1.5083516915605544e-05, "loss": 1.967, "step": 31411 }, { "epoch": 1.013486290886873, "grad_norm": 0.3515625, "learning_rate": 1.5082743618711775e-05, "loss": 2.0039, "step": 31412 }, { "epoch": 1.013518554740669, "grad_norm": 0.349609375, "learning_rate": 1.5081970321598092e-05, "loss": 1.9901, "step": 31413 }, { "epoch": 1.0135508185944655, "grad_norm": 0.357421875, "learning_rate": 1.5081197024266546e-05, "loss": 1.9583, "step": 31414 }, { "epoch": 1.013583082448262, "grad_norm": 0.353515625, "learning_rate": 1.5080423726719197e-05, "loss": 1.9964, "step": 31415 }, { "epoch": 1.0136153463020583, "grad_norm": 0.359375, "learning_rate": 1.5079650428958095e-05, "loss": 1.9841, "step": 31416 }, { "epoch": 1.0136476101558545, "grad_norm": 0.373046875, "learning_rate": 1.5078877130985296e-05, "loss": 1.9826, "step": 31417 }, { "epoch": 1.0136798740096509, "grad_norm": 0.341796875, "learning_rate": 1.5078103832802857e-05, "loss": 1.9264, "step": 31418 }, { "epoch": 1.0137121378634473, "grad_norm": 0.35546875, "learning_rate": 1.5077330534412835e-05, "loss": 2.0017, "step": 31419 }, { "epoch": 1.0137444017172437, "grad_norm": 0.341796875, "learning_rate": 1.5076557235817284e-05, "loss": 1.9502, "step": 31420 }, { "epoch": 1.01377666557104, "grad_norm": 0.34765625, "learning_rate": 1.5075783937018259e-05, "loss": 1.9576, "step": 31421 }, { "epoch": 1.0138089294248362, "grad_norm": 0.36328125, "learning_rate": 1.5075010638017816e-05, "loss": 1.9938, "step": 31422 }, { "epoch": 1.0138411932786326, "grad_norm": 0.341796875, "learning_rate": 1.5074237338818e-05, "loss": 1.9556, "step": 31423 }, { "epoch": 1.013873457132429, "grad_norm": 0.365234375, "learning_rate": 1.5073464039420888e-05, "loss": 1.9546, "step": 31424 }, { "epoch": 1.0139057209862254, "grad_norm": 0.345703125, "learning_rate": 1.5072690739828514e-05, "loss": 1.9792, "step": 31425 }, { "epoch": 1.0139379848400216, "grad_norm": 0.35546875, "learning_rate": 1.5071917440042946e-05, "loss": 1.9444, "step": 31426 }, { "epoch": 1.013970248693818, "grad_norm": 0.34765625, "learning_rate": 1.5071144140066235e-05, "loss": 1.9703, "step": 31427 }, { "epoch": 1.0140025125476144, "grad_norm": 0.357421875, "learning_rate": 1.5070370839900434e-05, "loss": 1.9809, "step": 31428 }, { "epoch": 1.0140347764014108, "grad_norm": 0.349609375, "learning_rate": 1.5069597539547604e-05, "loss": 1.9576, "step": 31429 }, { "epoch": 1.014067040255207, "grad_norm": 0.357421875, "learning_rate": 1.50688242390098e-05, "loss": 1.9797, "step": 31430 }, { "epoch": 1.0140993041090034, "grad_norm": 0.416015625, "learning_rate": 1.5068050938289069e-05, "loss": 2.0105, "step": 31431 }, { "epoch": 1.0141315679627998, "grad_norm": 0.390625, "learning_rate": 1.5067277637387472e-05, "loss": 2.0028, "step": 31432 }, { "epoch": 1.0141638318165962, "grad_norm": 0.373046875, "learning_rate": 1.5066504336307066e-05, "loss": 1.9917, "step": 31433 }, { "epoch": 1.0141960956703924, "grad_norm": 0.365234375, "learning_rate": 1.5065731035049905e-05, "loss": 1.9872, "step": 31434 }, { "epoch": 1.0142283595241888, "grad_norm": 0.373046875, "learning_rate": 1.5064957733618042e-05, "loss": 1.9827, "step": 31435 }, { "epoch": 1.0142606233779852, "grad_norm": 0.345703125, "learning_rate": 1.5064184432013537e-05, "loss": 1.9614, "step": 31436 }, { "epoch": 1.0142928872317816, "grad_norm": 0.376953125, "learning_rate": 1.506341113023844e-05, "loss": 1.9656, "step": 31437 }, { "epoch": 1.0143251510855777, "grad_norm": 0.359375, "learning_rate": 1.5062637828294808e-05, "loss": 1.9607, "step": 31438 }, { "epoch": 1.0143574149393741, "grad_norm": 0.373046875, "learning_rate": 1.5061864526184697e-05, "loss": 1.9858, "step": 31439 }, { "epoch": 1.0143896787931705, "grad_norm": 0.369140625, "learning_rate": 1.5061091223910162e-05, "loss": 1.9689, "step": 31440 }, { "epoch": 1.014421942646967, "grad_norm": 0.34765625, "learning_rate": 1.5060317921473257e-05, "loss": 1.9412, "step": 31441 }, { "epoch": 1.0144542065007633, "grad_norm": 0.37109375, "learning_rate": 1.5059544618876045e-05, "loss": 1.9452, "step": 31442 }, { "epoch": 1.0144864703545595, "grad_norm": 0.349609375, "learning_rate": 1.5058771316120564e-05, "loss": 1.9169, "step": 31443 }, { "epoch": 1.014518734208356, "grad_norm": 0.369140625, "learning_rate": 1.5057998013208886e-05, "loss": 1.9735, "step": 31444 }, { "epoch": 1.0145509980621523, "grad_norm": 0.3515625, "learning_rate": 1.5057224710143064e-05, "loss": 1.9826, "step": 31445 }, { "epoch": 1.0145832619159487, "grad_norm": 0.37890625, "learning_rate": 1.5056451406925146e-05, "loss": 1.9859, "step": 31446 }, { "epoch": 1.014615525769745, "grad_norm": 0.353515625, "learning_rate": 1.5055678103557193e-05, "loss": 1.9623, "step": 31447 }, { "epoch": 1.0146477896235413, "grad_norm": 0.35546875, "learning_rate": 1.5054904800041255e-05, "loss": 1.9924, "step": 31448 }, { "epoch": 1.0146800534773377, "grad_norm": 0.345703125, "learning_rate": 1.5054131496379393e-05, "loss": 1.9842, "step": 31449 }, { "epoch": 1.014712317331134, "grad_norm": 0.34765625, "learning_rate": 1.505335819257366e-05, "loss": 2.0104, "step": 31450 }, { "epoch": 1.0147445811849303, "grad_norm": 0.34765625, "learning_rate": 1.5052584888626112e-05, "loss": 1.9759, "step": 31451 }, { "epoch": 1.0147768450387267, "grad_norm": 0.349609375, "learning_rate": 1.5051811584538801e-05, "loss": 1.94, "step": 31452 }, { "epoch": 1.014809108892523, "grad_norm": 0.373046875, "learning_rate": 1.5051038280313786e-05, "loss": 1.9695, "step": 31453 }, { "epoch": 1.0148413727463195, "grad_norm": 0.361328125, "learning_rate": 1.5050264975953124e-05, "loss": 2.0009, "step": 31454 }, { "epoch": 1.0148736366001156, "grad_norm": 0.349609375, "learning_rate": 1.5049491671458861e-05, "loss": 1.9855, "step": 31455 }, { "epoch": 1.014905900453912, "grad_norm": 0.34765625, "learning_rate": 1.5048718366833064e-05, "loss": 1.9346, "step": 31456 }, { "epoch": 1.0149381643077084, "grad_norm": 0.37109375, "learning_rate": 1.5047945062077781e-05, "loss": 1.9811, "step": 31457 }, { "epoch": 1.0149704281615048, "grad_norm": 0.3671875, "learning_rate": 1.5047171757195072e-05, "loss": 1.9322, "step": 31458 }, { "epoch": 1.015002692015301, "grad_norm": 0.349609375, "learning_rate": 1.5046398452186984e-05, "loss": 1.9805, "step": 31459 }, { "epoch": 1.0150349558690974, "grad_norm": 0.365234375, "learning_rate": 1.5045625147055581e-05, "loss": 1.9734, "step": 31460 }, { "epoch": 1.0150672197228938, "grad_norm": 0.369140625, "learning_rate": 1.5044851841802918e-05, "loss": 2.005, "step": 31461 }, { "epoch": 1.0150994835766902, "grad_norm": 0.359375, "learning_rate": 1.5044078536431048e-05, "loss": 1.9505, "step": 31462 }, { "epoch": 1.0151317474304866, "grad_norm": 0.349609375, "learning_rate": 1.5043305230942021e-05, "loss": 1.9768, "step": 31463 }, { "epoch": 1.0151640112842828, "grad_norm": 0.380859375, "learning_rate": 1.50425319253379e-05, "loss": 1.9902, "step": 31464 }, { "epoch": 1.0151962751380792, "grad_norm": 0.3515625, "learning_rate": 1.5041758619620734e-05, "loss": 1.9738, "step": 31465 }, { "epoch": 1.0152285389918756, "grad_norm": 0.3671875, "learning_rate": 1.5040985313792582e-05, "loss": 1.9311, "step": 31466 }, { "epoch": 1.015260802845672, "grad_norm": 0.37109375, "learning_rate": 1.5040212007855504e-05, "loss": 1.9888, "step": 31467 }, { "epoch": 1.0152930666994682, "grad_norm": 0.3515625, "learning_rate": 1.5039438701811548e-05, "loss": 1.9777, "step": 31468 }, { "epoch": 1.0153253305532646, "grad_norm": 0.365234375, "learning_rate": 1.5038665395662772e-05, "loss": 1.9917, "step": 31469 }, { "epoch": 1.015357594407061, "grad_norm": 0.3515625, "learning_rate": 1.5037892089411228e-05, "loss": 1.9634, "step": 31470 }, { "epoch": 1.0153898582608574, "grad_norm": 0.359375, "learning_rate": 1.5037118783058976e-05, "loss": 1.9753, "step": 31471 }, { "epoch": 1.0154221221146535, "grad_norm": 0.345703125, "learning_rate": 1.5036345476608067e-05, "loss": 1.9999, "step": 31472 }, { "epoch": 1.01545438596845, "grad_norm": 0.349609375, "learning_rate": 1.5035572170060564e-05, "loss": 2.0048, "step": 31473 }, { "epoch": 1.0154866498222463, "grad_norm": 0.345703125, "learning_rate": 1.5034798863418513e-05, "loss": 1.9705, "step": 31474 }, { "epoch": 1.0155189136760427, "grad_norm": 0.3515625, "learning_rate": 1.5034025556683975e-05, "loss": 1.9752, "step": 31475 }, { "epoch": 1.015551177529839, "grad_norm": 0.34765625, "learning_rate": 1.5033252249859005e-05, "loss": 1.9836, "step": 31476 }, { "epoch": 1.0155834413836353, "grad_norm": 0.3515625, "learning_rate": 1.5032478942945653e-05, "loss": 1.9655, "step": 31477 }, { "epoch": 1.0156157052374317, "grad_norm": 0.337890625, "learning_rate": 1.5031705635945978e-05, "loss": 1.9904, "step": 31478 }, { "epoch": 1.0156479690912281, "grad_norm": 0.353515625, "learning_rate": 1.5030932328862044e-05, "loss": 1.9517, "step": 31479 }, { "epoch": 1.0156802329450243, "grad_norm": 0.3515625, "learning_rate": 1.5030159021695885e-05, "loss": 1.9787, "step": 31480 }, { "epoch": 1.0157124967988207, "grad_norm": 0.3671875, "learning_rate": 1.5029385714449576e-05, "loss": 1.9693, "step": 31481 }, { "epoch": 1.015744760652617, "grad_norm": 0.3515625, "learning_rate": 1.5028612407125168e-05, "loss": 1.9773, "step": 31482 }, { "epoch": 1.0157770245064135, "grad_norm": 0.3671875, "learning_rate": 1.5027839099724711e-05, "loss": 1.9568, "step": 31483 }, { "epoch": 1.01580928836021, "grad_norm": 0.3515625, "learning_rate": 1.5027065792250261e-05, "loss": 1.9927, "step": 31484 }, { "epoch": 1.015841552214006, "grad_norm": 0.34765625, "learning_rate": 1.5026292484703877e-05, "loss": 1.9366, "step": 31485 }, { "epoch": 1.0158738160678025, "grad_norm": 0.353515625, "learning_rate": 1.5025519177087615e-05, "loss": 1.9896, "step": 31486 }, { "epoch": 1.0159060799215989, "grad_norm": 0.3515625, "learning_rate": 1.5024745869403525e-05, "loss": 1.9976, "step": 31487 }, { "epoch": 1.0159383437753953, "grad_norm": 0.369140625, "learning_rate": 1.502397256165367e-05, "loss": 1.9837, "step": 31488 }, { "epoch": 1.0159706076291914, "grad_norm": 0.359375, "learning_rate": 1.5023199253840095e-05, "loss": 2.0092, "step": 31489 }, { "epoch": 1.0160028714829878, "grad_norm": 0.376953125, "learning_rate": 1.5022425945964864e-05, "loss": 1.9889, "step": 31490 }, { "epoch": 1.0160351353367842, "grad_norm": 0.34375, "learning_rate": 1.5021652638030027e-05, "loss": 2.0169, "step": 31491 }, { "epoch": 1.0160673991905806, "grad_norm": 0.40625, "learning_rate": 1.5020879330037643e-05, "loss": 1.9527, "step": 31492 }, { "epoch": 1.0160996630443768, "grad_norm": 0.357421875, "learning_rate": 1.5020106021989765e-05, "loss": 1.9926, "step": 31493 }, { "epoch": 1.0161319268981732, "grad_norm": 0.3671875, "learning_rate": 1.5019332713888451e-05, "loss": 1.9695, "step": 31494 }, { "epoch": 1.0161641907519696, "grad_norm": 0.369140625, "learning_rate": 1.5018559405735753e-05, "loss": 1.9957, "step": 31495 }, { "epoch": 1.016196454605766, "grad_norm": 0.365234375, "learning_rate": 1.5017786097533722e-05, "loss": 1.9714, "step": 31496 }, { "epoch": 1.0162287184595622, "grad_norm": 0.375, "learning_rate": 1.501701278928443e-05, "loss": 1.979, "step": 31497 }, { "epoch": 1.0162609823133586, "grad_norm": 0.36328125, "learning_rate": 1.5016239480989914e-05, "loss": 1.9773, "step": 31498 }, { "epoch": 1.016293246167155, "grad_norm": 0.3828125, "learning_rate": 1.501546617265224e-05, "loss": 1.9914, "step": 31499 }, { "epoch": 1.0163255100209514, "grad_norm": 0.36328125, "learning_rate": 1.5014692864273461e-05, "loss": 1.9852, "step": 31500 }, { "epoch": 1.0163577738747478, "grad_norm": 0.373046875, "learning_rate": 1.5013919555855623e-05, "loss": 1.9849, "step": 31501 }, { "epoch": 1.016390037728544, "grad_norm": 0.349609375, "learning_rate": 1.5013146247400795e-05, "loss": 1.9689, "step": 31502 }, { "epoch": 1.0164223015823404, "grad_norm": 0.34375, "learning_rate": 1.5012372938911028e-05, "loss": 2.0227, "step": 31503 }, { "epoch": 1.0164545654361368, "grad_norm": 0.37109375, "learning_rate": 1.5011599630388375e-05, "loss": 1.9623, "step": 31504 }, { "epoch": 1.0164868292899332, "grad_norm": 0.33203125, "learning_rate": 1.5010826321834893e-05, "loss": 1.9366, "step": 31505 }, { "epoch": 1.0165190931437293, "grad_norm": 0.375, "learning_rate": 1.5010053013252634e-05, "loss": 1.9955, "step": 31506 }, { "epoch": 1.0165513569975257, "grad_norm": 0.333984375, "learning_rate": 1.500927970464366e-05, "loss": 1.941, "step": 31507 }, { "epoch": 1.0165836208513221, "grad_norm": 0.37109375, "learning_rate": 1.5008506396010017e-05, "loss": 1.9351, "step": 31508 }, { "epoch": 1.0166158847051185, "grad_norm": 0.33984375, "learning_rate": 1.5007733087353775e-05, "loss": 1.9961, "step": 31509 }, { "epoch": 1.0166481485589147, "grad_norm": 0.349609375, "learning_rate": 1.500695977867697e-05, "loss": 1.9775, "step": 31510 }, { "epoch": 1.0166804124127111, "grad_norm": 0.349609375, "learning_rate": 1.5006186469981673e-05, "loss": 1.9817, "step": 31511 }, { "epoch": 1.0167126762665075, "grad_norm": 0.337890625, "learning_rate": 1.5005413161269933e-05, "loss": 1.975, "step": 31512 }, { "epoch": 1.016744940120304, "grad_norm": 0.369140625, "learning_rate": 1.5004639852543802e-05, "loss": 1.9982, "step": 31513 }, { "epoch": 1.0167772039741, "grad_norm": 0.359375, "learning_rate": 1.5003866543805341e-05, "loss": 1.9939, "step": 31514 }, { "epoch": 1.0168094678278965, "grad_norm": 0.34765625, "learning_rate": 1.5003093235056608e-05, "loss": 2.0015, "step": 31515 }, { "epoch": 1.016841731681693, "grad_norm": 0.341796875, "learning_rate": 1.500231992629965e-05, "loss": 1.9744, "step": 31516 }, { "epoch": 1.0168739955354893, "grad_norm": 0.35546875, "learning_rate": 1.5001546617536522e-05, "loss": 1.9792, "step": 31517 }, { "epoch": 1.0169062593892855, "grad_norm": 0.33203125, "learning_rate": 1.500077330876929e-05, "loss": 1.9736, "step": 31518 }, { "epoch": 1.0169385232430819, "grad_norm": 0.361328125, "learning_rate": 1.5e-05, "loss": 1.9532, "step": 31519 }, { "epoch": 1.0169707870968783, "grad_norm": 0.361328125, "learning_rate": 1.4999226691230711e-05, "loss": 1.9881, "step": 31520 }, { "epoch": 1.0170030509506747, "grad_norm": 0.3515625, "learning_rate": 1.4998453382463475e-05, "loss": 1.9762, "step": 31521 }, { "epoch": 1.017035314804471, "grad_norm": 0.359375, "learning_rate": 1.4997680073700351e-05, "loss": 1.9864, "step": 31522 }, { "epoch": 1.0170675786582672, "grad_norm": 0.369140625, "learning_rate": 1.4996906764943396e-05, "loss": 1.9858, "step": 31523 }, { "epoch": 1.0170998425120636, "grad_norm": 0.34765625, "learning_rate": 1.499613345619466e-05, "loss": 1.9764, "step": 31524 }, { "epoch": 1.01713210636586, "grad_norm": 0.361328125, "learning_rate": 1.4995360147456201e-05, "loss": 1.9554, "step": 31525 }, { "epoch": 1.0171643702196564, "grad_norm": 0.3515625, "learning_rate": 1.4994586838730072e-05, "loss": 1.9928, "step": 31526 }, { "epoch": 1.0171966340734526, "grad_norm": 0.36328125, "learning_rate": 1.4993813530018328e-05, "loss": 1.9846, "step": 31527 }, { "epoch": 1.017228897927249, "grad_norm": 0.345703125, "learning_rate": 1.4993040221323027e-05, "loss": 1.9694, "step": 31528 }, { "epoch": 1.0172611617810454, "grad_norm": 0.349609375, "learning_rate": 1.4992266912646231e-05, "loss": 1.9718, "step": 31529 }, { "epoch": 1.0172934256348418, "grad_norm": 0.349609375, "learning_rate": 1.4991493603989983e-05, "loss": 1.9757, "step": 31530 }, { "epoch": 1.017325689488638, "grad_norm": 0.359375, "learning_rate": 1.4990720295356343e-05, "loss": 1.931, "step": 31531 }, { "epoch": 1.0173579533424344, "grad_norm": 0.3515625, "learning_rate": 1.4989946986747367e-05, "loss": 1.9721, "step": 31532 }, { "epoch": 1.0173902171962308, "grad_norm": 0.35546875, "learning_rate": 1.4989173678165106e-05, "loss": 1.9457, "step": 31533 }, { "epoch": 1.0174224810500272, "grad_norm": 0.349609375, "learning_rate": 1.4988400369611624e-05, "loss": 2.0, "step": 31534 }, { "epoch": 1.0174547449038234, "grad_norm": 0.33984375, "learning_rate": 1.4987627061088978e-05, "loss": 1.977, "step": 31535 }, { "epoch": 1.0174870087576198, "grad_norm": 0.34375, "learning_rate": 1.498685375259921e-05, "loss": 1.9813, "step": 31536 }, { "epoch": 1.0175192726114162, "grad_norm": 0.34375, "learning_rate": 1.498608044414438e-05, "loss": 1.944, "step": 31537 }, { "epoch": 1.0175515364652126, "grad_norm": 0.3671875, "learning_rate": 1.4985307135726543e-05, "loss": 2.0202, "step": 31538 }, { "epoch": 1.0175838003190087, "grad_norm": 0.35546875, "learning_rate": 1.4984533827347762e-05, "loss": 1.9609, "step": 31539 }, { "epoch": 1.0176160641728051, "grad_norm": 0.341796875, "learning_rate": 1.4983760519010085e-05, "loss": 1.9731, "step": 31540 }, { "epoch": 1.0176483280266015, "grad_norm": 0.333984375, "learning_rate": 1.4982987210715578e-05, "loss": 1.977, "step": 31541 }, { "epoch": 1.017680591880398, "grad_norm": 0.359375, "learning_rate": 1.4982213902466279e-05, "loss": 1.9995, "step": 31542 }, { "epoch": 1.0177128557341943, "grad_norm": 0.33984375, "learning_rate": 1.498144059426425e-05, "loss": 1.9876, "step": 31543 }, { "epoch": 1.0177451195879905, "grad_norm": 0.349609375, "learning_rate": 1.4980667286111551e-05, "loss": 2.0089, "step": 31544 }, { "epoch": 1.017777383441787, "grad_norm": 0.359375, "learning_rate": 1.4979893978010236e-05, "loss": 1.9982, "step": 31545 }, { "epoch": 1.0178096472955833, "grad_norm": 0.353515625, "learning_rate": 1.4979120669962356e-05, "loss": 1.9927, "step": 31546 }, { "epoch": 1.0178419111493797, "grad_norm": 0.3515625, "learning_rate": 1.4978347361969976e-05, "loss": 1.9761, "step": 31547 }, { "epoch": 1.017874175003176, "grad_norm": 0.34375, "learning_rate": 1.4977574054035138e-05, "loss": 1.9632, "step": 31548 }, { "epoch": 1.0179064388569723, "grad_norm": 0.3828125, "learning_rate": 1.4976800746159907e-05, "loss": 1.9721, "step": 31549 }, { "epoch": 1.0179387027107687, "grad_norm": 0.36328125, "learning_rate": 1.4976027438346332e-05, "loss": 1.9866, "step": 31550 }, { "epoch": 1.017970966564565, "grad_norm": 0.376953125, "learning_rate": 1.4975254130596473e-05, "loss": 1.9681, "step": 31551 }, { "epoch": 1.0180032304183613, "grad_norm": 0.341796875, "learning_rate": 1.4974480822912385e-05, "loss": 1.981, "step": 31552 }, { "epoch": 1.0180354942721577, "grad_norm": 0.3671875, "learning_rate": 1.4973707515296123e-05, "loss": 1.943, "step": 31553 }, { "epoch": 1.018067758125954, "grad_norm": 0.36328125, "learning_rate": 1.497293420774974e-05, "loss": 1.9787, "step": 31554 }, { "epoch": 1.0181000219797505, "grad_norm": 0.37890625, "learning_rate": 1.4972160900275291e-05, "loss": 1.9713, "step": 31555 }, { "epoch": 1.0181322858335466, "grad_norm": 0.35546875, "learning_rate": 1.4971387592874834e-05, "loss": 1.9887, "step": 31556 }, { "epoch": 1.018164549687343, "grad_norm": 0.376953125, "learning_rate": 1.4970614285550424e-05, "loss": 1.966, "step": 31557 }, { "epoch": 1.0181968135411394, "grad_norm": 0.365234375, "learning_rate": 1.4969840978304114e-05, "loss": 1.9861, "step": 31558 }, { "epoch": 1.0182290773949358, "grad_norm": 0.361328125, "learning_rate": 1.4969067671137964e-05, "loss": 2.018, "step": 31559 }, { "epoch": 1.0182613412487322, "grad_norm": 0.365234375, "learning_rate": 1.4968294364054023e-05, "loss": 1.9107, "step": 31560 }, { "epoch": 1.0182936051025284, "grad_norm": 0.3671875, "learning_rate": 1.4967521057054351e-05, "loss": 1.9909, "step": 31561 }, { "epoch": 1.0183258689563248, "grad_norm": 0.36328125, "learning_rate": 1.4966747750141e-05, "loss": 1.9835, "step": 31562 }, { "epoch": 1.0183581328101212, "grad_norm": 0.375, "learning_rate": 1.4965974443316027e-05, "loss": 1.9765, "step": 31563 }, { "epoch": 1.0183903966639176, "grad_norm": 0.357421875, "learning_rate": 1.4965201136581487e-05, "loss": 1.9765, "step": 31564 }, { "epoch": 1.0184226605177138, "grad_norm": 0.361328125, "learning_rate": 1.4964427829939442e-05, "loss": 1.9883, "step": 31565 }, { "epoch": 1.0184549243715102, "grad_norm": 0.36328125, "learning_rate": 1.4963654523391936e-05, "loss": 1.9819, "step": 31566 }, { "epoch": 1.0184871882253066, "grad_norm": 0.36328125, "learning_rate": 1.4962881216941026e-05, "loss": 1.9855, "step": 31567 }, { "epoch": 1.018519452079103, "grad_norm": 0.35546875, "learning_rate": 1.4962107910588775e-05, "loss": 1.9716, "step": 31568 }, { "epoch": 1.0185517159328992, "grad_norm": 0.34765625, "learning_rate": 1.4961334604337232e-05, "loss": 1.9633, "step": 31569 }, { "epoch": 1.0185839797866956, "grad_norm": 0.353515625, "learning_rate": 1.496056129818845e-05, "loss": 1.9827, "step": 31570 }, { "epoch": 1.018616243640492, "grad_norm": 0.36328125, "learning_rate": 1.49597879921445e-05, "loss": 1.981, "step": 31571 }, { "epoch": 1.0186485074942884, "grad_norm": 0.34765625, "learning_rate": 1.4959014686207418e-05, "loss": 1.9706, "step": 31572 }, { "epoch": 1.0186807713480845, "grad_norm": 0.365234375, "learning_rate": 1.495824138037927e-05, "loss": 1.9734, "step": 31573 }, { "epoch": 1.018713035201881, "grad_norm": 0.375, "learning_rate": 1.4957468074662105e-05, "loss": 1.966, "step": 31574 }, { "epoch": 1.0187452990556773, "grad_norm": 0.36328125, "learning_rate": 1.4956694769057978e-05, "loss": 1.9756, "step": 31575 }, { "epoch": 1.0187775629094737, "grad_norm": 0.37109375, "learning_rate": 1.4955921463568953e-05, "loss": 1.9416, "step": 31576 }, { "epoch": 1.01880982676327, "grad_norm": 0.36328125, "learning_rate": 1.4955148158197086e-05, "loss": 1.9605, "step": 31577 }, { "epoch": 1.0188420906170663, "grad_norm": 0.357421875, "learning_rate": 1.495437485294442e-05, "loss": 1.9671, "step": 31578 }, { "epoch": 1.0188743544708627, "grad_norm": 0.369140625, "learning_rate": 1.4953601547813017e-05, "loss": 1.9704, "step": 31579 }, { "epoch": 1.0189066183246591, "grad_norm": 0.3671875, "learning_rate": 1.4952828242804929e-05, "loss": 1.9798, "step": 31580 }, { "epoch": 1.0189388821784555, "grad_norm": 0.34765625, "learning_rate": 1.4952054937922216e-05, "loss": 1.9632, "step": 31581 }, { "epoch": 1.0189711460322517, "grad_norm": 0.369140625, "learning_rate": 1.4951281633166935e-05, "loss": 1.9968, "step": 31582 }, { "epoch": 1.019003409886048, "grad_norm": 0.353515625, "learning_rate": 1.4950508328541143e-05, "loss": 1.9931, "step": 31583 }, { "epoch": 1.0190356737398445, "grad_norm": 0.3515625, "learning_rate": 1.494973502404688e-05, "loss": 1.9825, "step": 31584 }, { "epoch": 1.019067937593641, "grad_norm": 0.38671875, "learning_rate": 1.4948961719686216e-05, "loss": 1.9938, "step": 31585 }, { "epoch": 1.019100201447437, "grad_norm": 0.3515625, "learning_rate": 1.49481884154612e-05, "loss": 1.9744, "step": 31586 }, { "epoch": 1.0191324653012335, "grad_norm": 0.3671875, "learning_rate": 1.494741511137389e-05, "loss": 2.0017, "step": 31587 }, { "epoch": 1.0191647291550299, "grad_norm": 0.380859375, "learning_rate": 1.4946641807426341e-05, "loss": 1.9611, "step": 31588 }, { "epoch": 1.0191969930088263, "grad_norm": 0.34765625, "learning_rate": 1.494586850362061e-05, "loss": 1.998, "step": 31589 }, { "epoch": 1.0192292568626224, "grad_norm": 0.361328125, "learning_rate": 1.4945095199958748e-05, "loss": 1.9802, "step": 31590 }, { "epoch": 1.0192615207164188, "grad_norm": 0.361328125, "learning_rate": 1.4944321896442811e-05, "loss": 1.9942, "step": 31591 }, { "epoch": 1.0192937845702152, "grad_norm": 0.34765625, "learning_rate": 1.4943548593074856e-05, "loss": 1.9724, "step": 31592 }, { "epoch": 1.0193260484240116, "grad_norm": 0.388671875, "learning_rate": 1.4942775289856938e-05, "loss": 1.9725, "step": 31593 }, { "epoch": 1.0193583122778078, "grad_norm": 0.3515625, "learning_rate": 1.4942001986791113e-05, "loss": 1.9546, "step": 31594 }, { "epoch": 1.0193905761316042, "grad_norm": 0.35546875, "learning_rate": 1.4941228683879438e-05, "loss": 1.982, "step": 31595 }, { "epoch": 1.0194228399854006, "grad_norm": 0.35546875, "learning_rate": 1.4940455381123963e-05, "loss": 1.9832, "step": 31596 }, { "epoch": 1.019455103839197, "grad_norm": 0.35546875, "learning_rate": 1.4939682078526745e-05, "loss": 1.978, "step": 31597 }, { "epoch": 1.0194873676929932, "grad_norm": 0.384765625, "learning_rate": 1.4938908776089842e-05, "loss": 1.9932, "step": 31598 }, { "epoch": 1.0195196315467896, "grad_norm": 0.353515625, "learning_rate": 1.4938135473815307e-05, "loss": 1.959, "step": 31599 }, { "epoch": 1.019551895400586, "grad_norm": 0.3515625, "learning_rate": 1.4937362171705194e-05, "loss": 1.9773, "step": 31600 }, { "epoch": 1.0195841592543824, "grad_norm": 0.35546875, "learning_rate": 1.4936588869761566e-05, "loss": 1.9701, "step": 31601 }, { "epoch": 1.0196164231081788, "grad_norm": 0.34765625, "learning_rate": 1.4935815567986469e-05, "loss": 1.9994, "step": 31602 }, { "epoch": 1.019648686961975, "grad_norm": 0.341796875, "learning_rate": 1.493504226638196e-05, "loss": 1.9677, "step": 31603 }, { "epoch": 1.0196809508157714, "grad_norm": 0.35546875, "learning_rate": 1.4934268964950097e-05, "loss": 1.9768, "step": 31604 }, { "epoch": 1.0197132146695678, "grad_norm": 0.359375, "learning_rate": 1.4933495663692937e-05, "loss": 1.9761, "step": 31605 }, { "epoch": 1.0197454785233642, "grad_norm": 0.34765625, "learning_rate": 1.493272236261253e-05, "loss": 1.9682, "step": 31606 }, { "epoch": 1.0197777423771603, "grad_norm": 0.349609375, "learning_rate": 1.4931949061710937e-05, "loss": 1.9837, "step": 31607 }, { "epoch": 1.0198100062309567, "grad_norm": 0.35546875, "learning_rate": 1.4931175760990207e-05, "loss": 1.9795, "step": 31608 }, { "epoch": 1.0198422700847531, "grad_norm": 0.357421875, "learning_rate": 1.49304024604524e-05, "loss": 1.9116, "step": 31609 }, { "epoch": 1.0198745339385495, "grad_norm": 0.35546875, "learning_rate": 1.4929629160099567e-05, "loss": 1.9846, "step": 31610 }, { "epoch": 1.0199067977923457, "grad_norm": 0.369140625, "learning_rate": 1.492885585993377e-05, "loss": 2.0006, "step": 31611 }, { "epoch": 1.0199390616461421, "grad_norm": 0.349609375, "learning_rate": 1.4928082559957052e-05, "loss": 1.9757, "step": 31612 }, { "epoch": 1.0199713254999385, "grad_norm": 0.349609375, "learning_rate": 1.492730926017149e-05, "loss": 1.9812, "step": 31613 }, { "epoch": 1.020003589353735, "grad_norm": 0.349609375, "learning_rate": 1.4926535960579119e-05, "loss": 1.98, "step": 31614 }, { "epoch": 1.020035853207531, "grad_norm": 0.357421875, "learning_rate": 1.4925762661182e-05, "loss": 1.9433, "step": 31615 }, { "epoch": 1.0200681170613275, "grad_norm": 0.3515625, "learning_rate": 1.4924989361982188e-05, "loss": 1.9843, "step": 31616 }, { "epoch": 1.020100380915124, "grad_norm": 0.34765625, "learning_rate": 1.4924216062981744e-05, "loss": 1.9905, "step": 31617 }, { "epoch": 1.0201326447689203, "grad_norm": 0.34765625, "learning_rate": 1.4923442764182717e-05, "loss": 1.9894, "step": 31618 }, { "epoch": 1.0201649086227165, "grad_norm": 0.345703125, "learning_rate": 1.492266946558717e-05, "loss": 1.9661, "step": 31619 }, { "epoch": 1.0201971724765129, "grad_norm": 0.35546875, "learning_rate": 1.4921896167197147e-05, "loss": 1.9788, "step": 31620 }, { "epoch": 1.0202294363303093, "grad_norm": 0.357421875, "learning_rate": 1.4921122869014706e-05, "loss": 1.9666, "step": 31621 }, { "epoch": 1.0202617001841057, "grad_norm": 0.34765625, "learning_rate": 1.4920349571041907e-05, "loss": 1.9616, "step": 31622 }, { "epoch": 1.020293964037902, "grad_norm": 0.373046875, "learning_rate": 1.4919576273280806e-05, "loss": 1.9481, "step": 31623 }, { "epoch": 1.0203262278916982, "grad_norm": 0.349609375, "learning_rate": 1.4918802975733455e-05, "loss": 1.9853, "step": 31624 }, { "epoch": 1.0203584917454946, "grad_norm": 0.365234375, "learning_rate": 1.4918029678401916e-05, "loss": 1.9784, "step": 31625 }, { "epoch": 1.020390755599291, "grad_norm": 0.357421875, "learning_rate": 1.4917256381288227e-05, "loss": 1.9874, "step": 31626 }, { "epoch": 1.0204230194530874, "grad_norm": 0.35546875, "learning_rate": 1.4916483084394458e-05, "loss": 1.9565, "step": 31627 }, { "epoch": 1.0204552833068836, "grad_norm": 0.34765625, "learning_rate": 1.4915709787722663e-05, "loss": 1.9958, "step": 31628 }, { "epoch": 1.02048754716068, "grad_norm": 0.359375, "learning_rate": 1.4914936491274895e-05, "loss": 1.991, "step": 31629 }, { "epoch": 1.0205198110144764, "grad_norm": 0.345703125, "learning_rate": 1.4914163195053208e-05, "loss": 1.9684, "step": 31630 }, { "epoch": 1.0205520748682728, "grad_norm": 0.34765625, "learning_rate": 1.4913389899059663e-05, "loss": 1.987, "step": 31631 }, { "epoch": 1.020584338722069, "grad_norm": 0.365234375, "learning_rate": 1.4912616603296305e-05, "loss": 1.9905, "step": 31632 }, { "epoch": 1.0206166025758654, "grad_norm": 0.353515625, "learning_rate": 1.4911843307765196e-05, "loss": 1.9683, "step": 31633 }, { "epoch": 1.0206488664296618, "grad_norm": 0.3515625, "learning_rate": 1.4911070012468391e-05, "loss": 1.9709, "step": 31634 }, { "epoch": 1.0206811302834582, "grad_norm": 0.3828125, "learning_rate": 1.4910296717407946e-05, "loss": 1.9704, "step": 31635 }, { "epoch": 1.0207133941372544, "grad_norm": 0.349609375, "learning_rate": 1.4909523422585914e-05, "loss": 1.9638, "step": 31636 }, { "epoch": 1.0207456579910508, "grad_norm": 0.390625, "learning_rate": 1.4908750128004354e-05, "loss": 1.9897, "step": 31637 }, { "epoch": 1.0207779218448472, "grad_norm": 0.400390625, "learning_rate": 1.4907976833665317e-05, "loss": 2.0017, "step": 31638 }, { "epoch": 1.0208101856986436, "grad_norm": 0.3828125, "learning_rate": 1.4907203539570858e-05, "loss": 1.9651, "step": 31639 }, { "epoch": 1.0208424495524397, "grad_norm": 0.369140625, "learning_rate": 1.4906430245723034e-05, "loss": 1.9443, "step": 31640 }, { "epoch": 1.0208747134062361, "grad_norm": 0.3671875, "learning_rate": 1.4905656952123901e-05, "loss": 1.9715, "step": 31641 }, { "epoch": 1.0209069772600325, "grad_norm": 0.384765625, "learning_rate": 1.4904883658775513e-05, "loss": 1.9841, "step": 31642 }, { "epoch": 1.020939241113829, "grad_norm": 0.376953125, "learning_rate": 1.490411036567993e-05, "loss": 1.9581, "step": 31643 }, { "epoch": 1.0209715049676253, "grad_norm": 0.357421875, "learning_rate": 1.49033370728392e-05, "loss": 1.966, "step": 31644 }, { "epoch": 1.0210037688214215, "grad_norm": 0.392578125, "learning_rate": 1.490256378025538e-05, "loss": 1.9574, "step": 31645 }, { "epoch": 1.021036032675218, "grad_norm": 0.388671875, "learning_rate": 1.4901790487930528e-05, "loss": 1.983, "step": 31646 }, { "epoch": 1.0210682965290143, "grad_norm": 0.359375, "learning_rate": 1.4901017195866698e-05, "loss": 1.957, "step": 31647 }, { "epoch": 1.0211005603828107, "grad_norm": 0.373046875, "learning_rate": 1.4900243904065938e-05, "loss": 1.9766, "step": 31648 }, { "epoch": 1.021132824236607, "grad_norm": 0.353515625, "learning_rate": 1.4899470612530317e-05, "loss": 1.9427, "step": 31649 }, { "epoch": 1.0211650880904033, "grad_norm": 0.388671875, "learning_rate": 1.489869732126189e-05, "loss": 1.9822, "step": 31650 }, { "epoch": 1.0211973519441997, "grad_norm": 0.361328125, "learning_rate": 1.48979240302627e-05, "loss": 1.9859, "step": 31651 }, { "epoch": 1.021229615797996, "grad_norm": 0.349609375, "learning_rate": 1.4897150739534809e-05, "loss": 1.9416, "step": 31652 }, { "epoch": 1.0212618796517923, "grad_norm": 0.359375, "learning_rate": 1.4896377449080264e-05, "loss": 1.9493, "step": 31653 }, { "epoch": 1.0212941435055887, "grad_norm": 0.369140625, "learning_rate": 1.4895604158901134e-05, "loss": 1.983, "step": 31654 }, { "epoch": 1.021326407359385, "grad_norm": 0.349609375, "learning_rate": 1.4894830868999467e-05, "loss": 1.9651, "step": 31655 }, { "epoch": 1.0213586712131815, "grad_norm": 0.357421875, "learning_rate": 1.4894057579377327e-05, "loss": 1.9844, "step": 31656 }, { "epoch": 1.0213909350669776, "grad_norm": 0.3359375, "learning_rate": 1.4893284290036754e-05, "loss": 1.9351, "step": 31657 }, { "epoch": 1.021423198920774, "grad_norm": 0.369140625, "learning_rate": 1.4892511000979807e-05, "loss": 1.942, "step": 31658 }, { "epoch": 1.0214554627745704, "grad_norm": 0.3515625, "learning_rate": 1.489173771220855e-05, "loss": 1.9443, "step": 31659 }, { "epoch": 1.0214877266283668, "grad_norm": 0.375, "learning_rate": 1.4890964423725032e-05, "loss": 1.975, "step": 31660 }, { "epoch": 1.021519990482163, "grad_norm": 0.353515625, "learning_rate": 1.4890191135531308e-05, "loss": 1.9861, "step": 31661 }, { "epoch": 1.0215522543359594, "grad_norm": 0.35546875, "learning_rate": 1.4889417847629443e-05, "loss": 2.0231, "step": 31662 }, { "epoch": 1.0215845181897558, "grad_norm": 0.361328125, "learning_rate": 1.4888644560021475e-05, "loss": 1.9831, "step": 31663 }, { "epoch": 1.0216167820435522, "grad_norm": 0.369140625, "learning_rate": 1.4887871272709468e-05, "loss": 1.9651, "step": 31664 }, { "epoch": 1.0216490458973486, "grad_norm": 0.36328125, "learning_rate": 1.4887097985695482e-05, "loss": 1.9758, "step": 31665 }, { "epoch": 1.0216813097511448, "grad_norm": 0.3828125, "learning_rate": 1.4886324698981565e-05, "loss": 1.9569, "step": 31666 }, { "epoch": 1.0217135736049412, "grad_norm": 0.34375, "learning_rate": 1.4885551412569777e-05, "loss": 1.9751, "step": 31667 }, { "epoch": 1.0217458374587376, "grad_norm": 0.361328125, "learning_rate": 1.4884778126462172e-05, "loss": 1.9793, "step": 31668 }, { "epoch": 1.021778101312534, "grad_norm": 0.357421875, "learning_rate": 1.4884004840660802e-05, "loss": 1.9827, "step": 31669 }, { "epoch": 1.0218103651663302, "grad_norm": 0.33984375, "learning_rate": 1.4883231555167723e-05, "loss": 1.9812, "step": 31670 }, { "epoch": 1.0218426290201266, "grad_norm": 0.35546875, "learning_rate": 1.4882458269984995e-05, "loss": 1.9905, "step": 31671 }, { "epoch": 1.021874892873923, "grad_norm": 0.3515625, "learning_rate": 1.4881684985114667e-05, "loss": 1.967, "step": 31672 }, { "epoch": 1.0219071567277194, "grad_norm": 0.34765625, "learning_rate": 1.4880911700558801e-05, "loss": 1.9859, "step": 31673 }, { "epoch": 1.0219394205815155, "grad_norm": 0.34765625, "learning_rate": 1.488013841631945e-05, "loss": 1.9628, "step": 31674 }, { "epoch": 1.021971684435312, "grad_norm": 0.365234375, "learning_rate": 1.4879365132398666e-05, "loss": 1.9572, "step": 31675 }, { "epoch": 1.0220039482891083, "grad_norm": 0.3515625, "learning_rate": 1.4878591848798503e-05, "loss": 1.9577, "step": 31676 }, { "epoch": 1.0220362121429047, "grad_norm": 0.359375, "learning_rate": 1.4877818565521021e-05, "loss": 1.9952, "step": 31677 }, { "epoch": 1.022068475996701, "grad_norm": 0.353515625, "learning_rate": 1.4877045282568273e-05, "loss": 1.9702, "step": 31678 }, { "epoch": 1.0221007398504973, "grad_norm": 0.361328125, "learning_rate": 1.4876271999942317e-05, "loss": 1.9898, "step": 31679 }, { "epoch": 1.0221330037042937, "grad_norm": 0.341796875, "learning_rate": 1.4875498717645208e-05, "loss": 1.9974, "step": 31680 }, { "epoch": 1.0221652675580901, "grad_norm": 0.337890625, "learning_rate": 1.4874725435678996e-05, "loss": 2.0023, "step": 31681 }, { "epoch": 1.0221975314118863, "grad_norm": 0.34765625, "learning_rate": 1.487395215404574e-05, "loss": 1.9629, "step": 31682 }, { "epoch": 1.0222297952656827, "grad_norm": 0.34375, "learning_rate": 1.4873178872747495e-05, "loss": 1.9989, "step": 31683 }, { "epoch": 1.022262059119479, "grad_norm": 0.37109375, "learning_rate": 1.4872405591786315e-05, "loss": 1.9611, "step": 31684 }, { "epoch": 1.0222943229732755, "grad_norm": 0.36328125, "learning_rate": 1.4871632311164252e-05, "loss": 1.9525, "step": 31685 }, { "epoch": 1.022326586827072, "grad_norm": 0.384765625, "learning_rate": 1.4870859030883378e-05, "loss": 1.9775, "step": 31686 }, { "epoch": 1.022358850680868, "grad_norm": 0.341796875, "learning_rate": 1.4870085750945729e-05, "loss": 1.9384, "step": 31687 }, { "epoch": 1.0223911145346645, "grad_norm": 0.365234375, "learning_rate": 1.4869312471353365e-05, "loss": 1.9548, "step": 31688 }, { "epoch": 1.0224233783884609, "grad_norm": 0.365234375, "learning_rate": 1.4868539192108343e-05, "loss": 1.9901, "step": 31689 }, { "epoch": 1.0224556422422573, "grad_norm": 0.34765625, "learning_rate": 1.4867765913212717e-05, "loss": 1.9966, "step": 31690 }, { "epoch": 1.0224879060960534, "grad_norm": 0.34375, "learning_rate": 1.4866992634668545e-05, "loss": 1.9495, "step": 31691 }, { "epoch": 1.0225201699498498, "grad_norm": 0.3515625, "learning_rate": 1.4866219356477889e-05, "loss": 1.9878, "step": 31692 }, { "epoch": 1.0225524338036462, "grad_norm": 0.33984375, "learning_rate": 1.486544607864279e-05, "loss": 1.9891, "step": 31693 }, { "epoch": 1.0225846976574426, "grad_norm": 0.337890625, "learning_rate": 1.4864672801165307e-05, "loss": 1.9733, "step": 31694 }, { "epoch": 1.0226169615112388, "grad_norm": 0.3515625, "learning_rate": 1.4863899524047496e-05, "loss": 1.9683, "step": 31695 }, { "epoch": 1.0226492253650352, "grad_norm": 0.353515625, "learning_rate": 1.4863126247291416e-05, "loss": 1.9869, "step": 31696 }, { "epoch": 1.0226814892188316, "grad_norm": 0.345703125, "learning_rate": 1.486235297089912e-05, "loss": 1.9871, "step": 31697 }, { "epoch": 1.022713753072628, "grad_norm": 0.349609375, "learning_rate": 1.486157969487267e-05, "loss": 1.9766, "step": 31698 }, { "epoch": 1.0227460169264242, "grad_norm": 0.36328125, "learning_rate": 1.4860806419214109e-05, "loss": 1.9991, "step": 31699 }, { "epoch": 1.0227782807802206, "grad_norm": 0.375, "learning_rate": 1.4860033143925492e-05, "loss": 1.9868, "step": 31700 }, { "epoch": 1.022810544634017, "grad_norm": 0.37890625, "learning_rate": 1.4859259869008885e-05, "loss": 2.0183, "step": 31701 }, { "epoch": 1.0228428084878134, "grad_norm": 0.369140625, "learning_rate": 1.4858486594466337e-05, "loss": 2.0125, "step": 31702 }, { "epoch": 1.0228750723416098, "grad_norm": 0.345703125, "learning_rate": 1.4857713320299904e-05, "loss": 1.9863, "step": 31703 }, { "epoch": 1.022907336195406, "grad_norm": 0.384765625, "learning_rate": 1.4856940046511646e-05, "loss": 2.0076, "step": 31704 }, { "epoch": 1.0229396000492024, "grad_norm": 0.345703125, "learning_rate": 1.485616677310361e-05, "loss": 2.0065, "step": 31705 }, { "epoch": 1.0229718639029988, "grad_norm": 0.375, "learning_rate": 1.4855393500077853e-05, "loss": 1.9858, "step": 31706 }, { "epoch": 1.0230041277567952, "grad_norm": 0.380859375, "learning_rate": 1.4854620227436435e-05, "loss": 1.9823, "step": 31707 }, { "epoch": 1.0230363916105913, "grad_norm": 0.3671875, "learning_rate": 1.4853846955181407e-05, "loss": 1.9616, "step": 31708 }, { "epoch": 1.0230686554643877, "grad_norm": 0.37890625, "learning_rate": 1.4853073683314824e-05, "loss": 1.9493, "step": 31709 }, { "epoch": 1.0231009193181841, "grad_norm": 0.36328125, "learning_rate": 1.4852300411838749e-05, "loss": 1.9843, "step": 31710 }, { "epoch": 1.0231331831719805, "grad_norm": 0.357421875, "learning_rate": 1.4851527140755225e-05, "loss": 1.9652, "step": 31711 }, { "epoch": 1.0231654470257767, "grad_norm": 0.375, "learning_rate": 1.4850753870066314e-05, "loss": 1.9709, "step": 31712 }, { "epoch": 1.0231977108795731, "grad_norm": 0.353515625, "learning_rate": 1.4849980599774071e-05, "loss": 1.9773, "step": 31713 }, { "epoch": 1.0232299747333695, "grad_norm": 0.365234375, "learning_rate": 1.484920732988055e-05, "loss": 1.9776, "step": 31714 }, { "epoch": 1.023262238587166, "grad_norm": 0.353515625, "learning_rate": 1.4848434060387806e-05, "loss": 1.9428, "step": 31715 }, { "epoch": 1.023294502440962, "grad_norm": 0.34375, "learning_rate": 1.48476607912979e-05, "loss": 1.9854, "step": 31716 }, { "epoch": 1.0233267662947585, "grad_norm": 0.376953125, "learning_rate": 1.4846887522612879e-05, "loss": 2.0007, "step": 31717 }, { "epoch": 1.023359030148555, "grad_norm": 0.345703125, "learning_rate": 1.4846114254334799e-05, "loss": 1.9911, "step": 31718 }, { "epoch": 1.0233912940023513, "grad_norm": 0.392578125, "learning_rate": 1.4845340986465717e-05, "loss": 1.991, "step": 31719 }, { "epoch": 1.0234235578561475, "grad_norm": 0.35546875, "learning_rate": 1.4844567719007692e-05, "loss": 1.9938, "step": 31720 }, { "epoch": 1.0234558217099439, "grad_norm": 0.3828125, "learning_rate": 1.4843794451962773e-05, "loss": 1.9841, "step": 31721 }, { "epoch": 1.0234880855637403, "grad_norm": 0.33984375, "learning_rate": 1.4843021185333024e-05, "loss": 1.9681, "step": 31722 }, { "epoch": 1.0235203494175367, "grad_norm": 0.36328125, "learning_rate": 1.484224791912049e-05, "loss": 1.9859, "step": 31723 }, { "epoch": 1.023552613271333, "grad_norm": 0.35546875, "learning_rate": 1.4841474653327231e-05, "loss": 2.0117, "step": 31724 }, { "epoch": 1.0235848771251292, "grad_norm": 0.3671875, "learning_rate": 1.4840701387955299e-05, "loss": 1.9569, "step": 31725 }, { "epoch": 1.0236171409789256, "grad_norm": 0.3515625, "learning_rate": 1.4839928123006755e-05, "loss": 2.0, "step": 31726 }, { "epoch": 1.023649404832722, "grad_norm": 0.353515625, "learning_rate": 1.4839154858483646e-05, "loss": 2.0246, "step": 31727 }, { "epoch": 1.0236816686865184, "grad_norm": 0.357421875, "learning_rate": 1.4838381594388044e-05, "loss": 2.0025, "step": 31728 }, { "epoch": 1.0237139325403146, "grad_norm": 0.35546875, "learning_rate": 1.4837608330721983e-05, "loss": 1.9757, "step": 31729 }, { "epoch": 1.023746196394111, "grad_norm": 0.3515625, "learning_rate": 1.4836835067487529e-05, "loss": 1.9592, "step": 31730 }, { "epoch": 1.0237784602479074, "grad_norm": 0.359375, "learning_rate": 1.4836061804686736e-05, "loss": 1.9905, "step": 31731 }, { "epoch": 1.0238107241017038, "grad_norm": 0.341796875, "learning_rate": 1.4835288542321656e-05, "loss": 1.9721, "step": 31732 }, { "epoch": 1.0238429879555, "grad_norm": 0.3515625, "learning_rate": 1.483451528039435e-05, "loss": 1.9872, "step": 31733 }, { "epoch": 1.0238752518092964, "grad_norm": 0.357421875, "learning_rate": 1.4833742018906878e-05, "loss": 1.9582, "step": 31734 }, { "epoch": 1.0239075156630928, "grad_norm": 0.3515625, "learning_rate": 1.4832968757861279e-05, "loss": 2.0079, "step": 31735 }, { "epoch": 1.0239397795168892, "grad_norm": 0.3515625, "learning_rate": 1.4832195497259614e-05, "loss": 1.9605, "step": 31736 }, { "epoch": 1.0239720433706854, "grad_norm": 0.365234375, "learning_rate": 1.4831422237103946e-05, "loss": 1.9543, "step": 31737 }, { "epoch": 1.0240043072244818, "grad_norm": 0.353515625, "learning_rate": 1.4830648977396323e-05, "loss": 1.944, "step": 31738 }, { "epoch": 1.0240365710782782, "grad_norm": 0.345703125, "learning_rate": 1.4829875718138804e-05, "loss": 1.9818, "step": 31739 }, { "epoch": 1.0240688349320746, "grad_norm": 0.3671875, "learning_rate": 1.4829102459333449e-05, "loss": 1.9915, "step": 31740 }, { "epoch": 1.0241010987858707, "grad_norm": 0.33984375, "learning_rate": 1.4828329200982295e-05, "loss": 1.9883, "step": 31741 }, { "epoch": 1.0241333626396671, "grad_norm": 0.349609375, "learning_rate": 1.4827555943087412e-05, "loss": 1.9598, "step": 31742 }, { "epoch": 1.0241656264934635, "grad_norm": 0.369140625, "learning_rate": 1.4826782685650854e-05, "loss": 1.9715, "step": 31743 }, { "epoch": 1.02419789034726, "grad_norm": 0.345703125, "learning_rate": 1.4826009428674672e-05, "loss": 1.9825, "step": 31744 }, { "epoch": 1.0242301542010563, "grad_norm": 0.34765625, "learning_rate": 1.4825236172160924e-05, "loss": 1.9836, "step": 31745 }, { "epoch": 1.0242624180548525, "grad_norm": 0.3671875, "learning_rate": 1.4824462916111668e-05, "loss": 1.9968, "step": 31746 }, { "epoch": 1.024294681908649, "grad_norm": 0.34765625, "learning_rate": 1.4823689660528952e-05, "loss": 1.9687, "step": 31747 }, { "epoch": 1.0243269457624453, "grad_norm": 0.353515625, "learning_rate": 1.4822916405414834e-05, "loss": 1.9464, "step": 31748 }, { "epoch": 1.0243592096162417, "grad_norm": 0.3515625, "learning_rate": 1.4822143150771373e-05, "loss": 1.9551, "step": 31749 }, { "epoch": 1.024391473470038, "grad_norm": 0.359375, "learning_rate": 1.4821369896600618e-05, "loss": 1.9659, "step": 31750 }, { "epoch": 1.0244237373238343, "grad_norm": 0.369140625, "learning_rate": 1.4820596642904628e-05, "loss": 1.9927, "step": 31751 }, { "epoch": 1.0244560011776307, "grad_norm": 0.3515625, "learning_rate": 1.481982338968546e-05, "loss": 1.9879, "step": 31752 }, { "epoch": 1.024488265031427, "grad_norm": 0.3671875, "learning_rate": 1.4819050136945165e-05, "loss": 2.0116, "step": 31753 }, { "epoch": 1.0245205288852233, "grad_norm": 0.353515625, "learning_rate": 1.4818276884685797e-05, "loss": 2.0055, "step": 31754 }, { "epoch": 1.0245527927390197, "grad_norm": 0.36328125, "learning_rate": 1.4817503632909417e-05, "loss": 1.9865, "step": 31755 }, { "epoch": 1.024585056592816, "grad_norm": 0.349609375, "learning_rate": 1.4816730381618076e-05, "loss": 1.9713, "step": 31756 }, { "epoch": 1.0246173204466125, "grad_norm": 0.375, "learning_rate": 1.4815957130813828e-05, "loss": 1.9573, "step": 31757 }, { "epoch": 1.0246495843004086, "grad_norm": 0.357421875, "learning_rate": 1.4815183880498736e-05, "loss": 1.9688, "step": 31758 }, { "epoch": 1.024681848154205, "grad_norm": 0.353515625, "learning_rate": 1.4814410630674846e-05, "loss": 1.9508, "step": 31759 }, { "epoch": 1.0247141120080014, "grad_norm": 0.39453125, "learning_rate": 1.4813637381344217e-05, "loss": 1.9632, "step": 31760 }, { "epoch": 1.0247463758617978, "grad_norm": 0.359375, "learning_rate": 1.48128641325089e-05, "loss": 1.9815, "step": 31761 }, { "epoch": 1.024778639715594, "grad_norm": 0.365234375, "learning_rate": 1.4812090884170959e-05, "loss": 1.9716, "step": 31762 }, { "epoch": 1.0248109035693904, "grad_norm": 0.361328125, "learning_rate": 1.4811317636332443e-05, "loss": 1.9864, "step": 31763 }, { "epoch": 1.0248431674231868, "grad_norm": 0.34375, "learning_rate": 1.4810544388995409e-05, "loss": 1.9803, "step": 31764 }, { "epoch": 1.0248754312769832, "grad_norm": 0.380859375, "learning_rate": 1.4809771142161911e-05, "loss": 1.9976, "step": 31765 }, { "epoch": 1.0249076951307796, "grad_norm": 0.34765625, "learning_rate": 1.4808997895834002e-05, "loss": 1.9588, "step": 31766 }, { "epoch": 1.0249399589845758, "grad_norm": 0.3671875, "learning_rate": 1.4808224650013742e-05, "loss": 1.9851, "step": 31767 }, { "epoch": 1.0249722228383722, "grad_norm": 0.34765625, "learning_rate": 1.4807451404703178e-05, "loss": 1.9792, "step": 31768 }, { "epoch": 1.0250044866921686, "grad_norm": 0.373046875, "learning_rate": 1.4806678159904376e-05, "loss": 1.9582, "step": 31769 }, { "epoch": 1.025036750545965, "grad_norm": 0.337890625, "learning_rate": 1.4805904915619391e-05, "loss": 1.9523, "step": 31770 }, { "epoch": 1.0250690143997612, "grad_norm": 0.36328125, "learning_rate": 1.4805131671850269e-05, "loss": 1.9761, "step": 31771 }, { "epoch": 1.0251012782535576, "grad_norm": 0.33984375, "learning_rate": 1.4804358428599066e-05, "loss": 1.9785, "step": 31772 }, { "epoch": 1.025133542107354, "grad_norm": 0.35546875, "learning_rate": 1.480358518586784e-05, "loss": 1.9847, "step": 31773 }, { "epoch": 1.0251658059611504, "grad_norm": 0.349609375, "learning_rate": 1.4802811943658648e-05, "loss": 2.004, "step": 31774 }, { "epoch": 1.0251980698149465, "grad_norm": 0.359375, "learning_rate": 1.4802038701973545e-05, "loss": 2.0199, "step": 31775 }, { "epoch": 1.025230333668743, "grad_norm": 0.341796875, "learning_rate": 1.4801265460814591e-05, "loss": 1.9845, "step": 31776 }, { "epoch": 1.0252625975225393, "grad_norm": 0.376953125, "learning_rate": 1.4800492220183827e-05, "loss": 1.9305, "step": 31777 }, { "epoch": 1.0252948613763357, "grad_norm": 0.361328125, "learning_rate": 1.4799718980083315e-05, "loss": 2.0084, "step": 31778 }, { "epoch": 1.025327125230132, "grad_norm": 0.35546875, "learning_rate": 1.4798945740515115e-05, "loss": 1.9823, "step": 31779 }, { "epoch": 1.0253593890839283, "grad_norm": 0.341796875, "learning_rate": 1.4798172501481276e-05, "loss": 1.9541, "step": 31780 }, { "epoch": 1.0253916529377247, "grad_norm": 0.35546875, "learning_rate": 1.4797399262983856e-05, "loss": 1.9706, "step": 31781 }, { "epoch": 1.0254239167915211, "grad_norm": 0.357421875, "learning_rate": 1.479662602502491e-05, "loss": 1.9262, "step": 31782 }, { "epoch": 1.0254561806453175, "grad_norm": 0.375, "learning_rate": 1.4795852787606495e-05, "loss": 1.9914, "step": 31783 }, { "epoch": 1.0254884444991137, "grad_norm": 0.3515625, "learning_rate": 1.4795079550730662e-05, "loss": 1.9707, "step": 31784 }, { "epoch": 1.02552070835291, "grad_norm": 0.3515625, "learning_rate": 1.4794306314399466e-05, "loss": 1.9899, "step": 31785 }, { "epoch": 1.0255529722067065, "grad_norm": 0.37109375, "learning_rate": 1.4793533078614968e-05, "loss": 1.9687, "step": 31786 }, { "epoch": 1.025585236060503, "grad_norm": 0.34765625, "learning_rate": 1.4792759843379214e-05, "loss": 2.0004, "step": 31787 }, { "epoch": 1.025617499914299, "grad_norm": 0.41015625, "learning_rate": 1.4791986608694268e-05, "loss": 2.003, "step": 31788 }, { "epoch": 1.0256497637680955, "grad_norm": 0.35546875, "learning_rate": 1.4791213374562183e-05, "loss": 1.9846, "step": 31789 }, { "epoch": 1.0256820276218919, "grad_norm": 0.34375, "learning_rate": 1.4790440140985008e-05, "loss": 1.991, "step": 31790 }, { "epoch": 1.0257142914756883, "grad_norm": 0.3515625, "learning_rate": 1.4789666907964805e-05, "loss": 1.9487, "step": 31791 }, { "epoch": 1.0257465553294844, "grad_norm": 0.353515625, "learning_rate": 1.4788893675503625e-05, "loss": 1.9802, "step": 31792 }, { "epoch": 1.0257788191832808, "grad_norm": 0.36328125, "learning_rate": 1.4788120443603524e-05, "loss": 1.9454, "step": 31793 }, { "epoch": 1.0258110830370772, "grad_norm": 0.3671875, "learning_rate": 1.478734721226656e-05, "loss": 1.9619, "step": 31794 }, { "epoch": 1.0258433468908736, "grad_norm": 0.359375, "learning_rate": 1.4786573981494787e-05, "loss": 1.9933, "step": 31795 }, { "epoch": 1.0258756107446698, "grad_norm": 0.39453125, "learning_rate": 1.4785800751290257e-05, "loss": 1.9305, "step": 31796 }, { "epoch": 1.0259078745984662, "grad_norm": 0.35546875, "learning_rate": 1.478502752165503e-05, "loss": 1.9582, "step": 31797 }, { "epoch": 1.0259401384522626, "grad_norm": 0.373046875, "learning_rate": 1.4784254292591153e-05, "loss": 1.9726, "step": 31798 }, { "epoch": 1.025972402306059, "grad_norm": 0.36328125, "learning_rate": 1.478348106410069e-05, "loss": 1.9559, "step": 31799 }, { "epoch": 1.0260046661598552, "grad_norm": 0.345703125, "learning_rate": 1.4782707836185692e-05, "loss": 1.9945, "step": 31800 }, { "epoch": 1.0260369300136516, "grad_norm": 0.35546875, "learning_rate": 1.4781934608848215e-05, "loss": 1.9737, "step": 31801 }, { "epoch": 1.026069193867448, "grad_norm": 0.35546875, "learning_rate": 1.4781161382090315e-05, "loss": 1.9908, "step": 31802 }, { "epoch": 1.0261014577212444, "grad_norm": 0.3828125, "learning_rate": 1.4780388155914041e-05, "loss": 1.9665, "step": 31803 }, { "epoch": 1.0261337215750408, "grad_norm": 0.4453125, "learning_rate": 1.4779614930321456e-05, "loss": 1.9781, "step": 31804 }, { "epoch": 1.026165985428837, "grad_norm": 0.34375, "learning_rate": 1.4778841705314607e-05, "loss": 1.9423, "step": 31805 }, { "epoch": 1.0261982492826334, "grad_norm": 0.390625, "learning_rate": 1.4778068480895557e-05, "loss": 1.9813, "step": 31806 }, { "epoch": 1.0262305131364298, "grad_norm": 0.40234375, "learning_rate": 1.4777295257066366e-05, "loss": 1.9551, "step": 31807 }, { "epoch": 1.0262627769902262, "grad_norm": 0.34765625, "learning_rate": 1.4776522033829073e-05, "loss": 1.9905, "step": 31808 }, { "epoch": 1.0262950408440223, "grad_norm": 0.421875, "learning_rate": 1.4775748811185743e-05, "loss": 1.9833, "step": 31809 }, { "epoch": 1.0263273046978187, "grad_norm": 0.375, "learning_rate": 1.4774975589138425e-05, "loss": 1.9915, "step": 31810 }, { "epoch": 1.0263595685516151, "grad_norm": 0.37109375, "learning_rate": 1.4774202367689181e-05, "loss": 1.9677, "step": 31811 }, { "epoch": 1.0263918324054115, "grad_norm": 0.40234375, "learning_rate": 1.4773429146840064e-05, "loss": 1.9781, "step": 31812 }, { "epoch": 1.0264240962592077, "grad_norm": 0.37890625, "learning_rate": 1.4772655926593134e-05, "loss": 1.9496, "step": 31813 }, { "epoch": 1.0264563601130041, "grad_norm": 0.404296875, "learning_rate": 1.4771882706950436e-05, "loss": 1.9987, "step": 31814 }, { "epoch": 1.0264886239668005, "grad_norm": 0.345703125, "learning_rate": 1.4771109487914025e-05, "loss": 1.9763, "step": 31815 }, { "epoch": 1.026520887820597, "grad_norm": 0.3515625, "learning_rate": 1.4770336269485963e-05, "loss": 2.0068, "step": 31816 }, { "epoch": 1.026553151674393, "grad_norm": 0.39453125, "learning_rate": 1.4769563051668306e-05, "loss": 2.0019, "step": 31817 }, { "epoch": 1.0265854155281895, "grad_norm": 0.345703125, "learning_rate": 1.4768789834463102e-05, "loss": 1.9491, "step": 31818 }, { "epoch": 1.026617679381986, "grad_norm": 0.375, "learning_rate": 1.4768016617872419e-05, "loss": 1.9949, "step": 31819 }, { "epoch": 1.0266499432357823, "grad_norm": 0.349609375, "learning_rate": 1.4767243401898293e-05, "loss": 1.9443, "step": 31820 }, { "epoch": 1.0266822070895785, "grad_norm": 0.361328125, "learning_rate": 1.476647018654279e-05, "loss": 1.9916, "step": 31821 }, { "epoch": 1.0267144709433749, "grad_norm": 0.357421875, "learning_rate": 1.4765696971807967e-05, "loss": 1.9684, "step": 31822 }, { "epoch": 1.0267467347971713, "grad_norm": 0.353515625, "learning_rate": 1.4764923757695875e-05, "loss": 1.9819, "step": 31823 }, { "epoch": 1.0267789986509677, "grad_norm": 0.337890625, "learning_rate": 1.4764150544208569e-05, "loss": 1.9741, "step": 31824 }, { "epoch": 1.026811262504764, "grad_norm": 0.357421875, "learning_rate": 1.476337733134811e-05, "loss": 1.9697, "step": 31825 }, { "epoch": 1.0268435263585602, "grad_norm": 0.37109375, "learning_rate": 1.4762604119116545e-05, "loss": 1.9574, "step": 31826 }, { "epoch": 1.0268757902123566, "grad_norm": 0.3515625, "learning_rate": 1.476183090751593e-05, "loss": 1.9781, "step": 31827 }, { "epoch": 1.026908054066153, "grad_norm": 0.33984375, "learning_rate": 1.4761057696548326e-05, "loss": 1.9478, "step": 31828 }, { "epoch": 1.0269403179199494, "grad_norm": 0.359375, "learning_rate": 1.4760284486215781e-05, "loss": 1.9584, "step": 31829 }, { "epoch": 1.0269725817737456, "grad_norm": 0.330078125, "learning_rate": 1.4759511276520357e-05, "loss": 1.9886, "step": 31830 }, { "epoch": 1.027004845627542, "grad_norm": 0.3671875, "learning_rate": 1.4758738067464105e-05, "loss": 1.989, "step": 31831 }, { "epoch": 1.0270371094813384, "grad_norm": 0.375, "learning_rate": 1.475796485904908e-05, "loss": 1.9679, "step": 31832 }, { "epoch": 1.0270693733351348, "grad_norm": 0.37109375, "learning_rate": 1.4757191651277335e-05, "loss": 1.949, "step": 31833 }, { "epoch": 1.027101637188931, "grad_norm": 0.349609375, "learning_rate": 1.475641844415093e-05, "loss": 1.9673, "step": 31834 }, { "epoch": 1.0271339010427274, "grad_norm": 0.365234375, "learning_rate": 1.4755645237671917e-05, "loss": 1.989, "step": 31835 }, { "epoch": 1.0271661648965238, "grad_norm": 0.365234375, "learning_rate": 1.475487203184235e-05, "loss": 1.9919, "step": 31836 }, { "epoch": 1.0271984287503202, "grad_norm": 0.3515625, "learning_rate": 1.4754098826664291e-05, "loss": 1.9727, "step": 31837 }, { "epoch": 1.0272306926041164, "grad_norm": 0.373046875, "learning_rate": 1.4753325622139786e-05, "loss": 1.9725, "step": 31838 }, { "epoch": 1.0272629564579128, "grad_norm": 0.35546875, "learning_rate": 1.4752552418270894e-05, "loss": 1.9783, "step": 31839 }, { "epoch": 1.0272952203117092, "grad_norm": 0.35546875, "learning_rate": 1.4751779215059667e-05, "loss": 1.9763, "step": 31840 }, { "epoch": 1.0273274841655056, "grad_norm": 0.357421875, "learning_rate": 1.4751006012508166e-05, "loss": 1.9619, "step": 31841 }, { "epoch": 1.0273597480193017, "grad_norm": 0.3515625, "learning_rate": 1.4750232810618437e-05, "loss": 1.9918, "step": 31842 }, { "epoch": 1.0273920118730981, "grad_norm": 0.361328125, "learning_rate": 1.4749459609392554e-05, "loss": 1.9681, "step": 31843 }, { "epoch": 1.0274242757268945, "grad_norm": 0.36328125, "learning_rate": 1.4748686408832551e-05, "loss": 1.9678, "step": 31844 }, { "epoch": 1.027456539580691, "grad_norm": 0.353515625, "learning_rate": 1.474791320894049e-05, "loss": 1.9997, "step": 31845 }, { "epoch": 1.0274888034344873, "grad_norm": 0.36328125, "learning_rate": 1.4747140009718427e-05, "loss": 1.9997, "step": 31846 }, { "epoch": 1.0275210672882835, "grad_norm": 0.359375, "learning_rate": 1.4746366811168416e-05, "loss": 1.9392, "step": 31847 }, { "epoch": 1.02755333114208, "grad_norm": 0.353515625, "learning_rate": 1.4745593613292512e-05, "loss": 1.9606, "step": 31848 }, { "epoch": 1.0275855949958763, "grad_norm": 0.369140625, "learning_rate": 1.474482041609278e-05, "loss": 1.9764, "step": 31849 }, { "epoch": 1.0276178588496727, "grad_norm": 0.34765625, "learning_rate": 1.474404721957126e-05, "loss": 1.9727, "step": 31850 }, { "epoch": 1.027650122703469, "grad_norm": 0.390625, "learning_rate": 1.4743274023730013e-05, "loss": 1.9917, "step": 31851 }, { "epoch": 1.0276823865572653, "grad_norm": 0.35546875, "learning_rate": 1.4742500828571088e-05, "loss": 1.9644, "step": 31852 }, { "epoch": 1.0277146504110617, "grad_norm": 0.3828125, "learning_rate": 1.474172763409655e-05, "loss": 1.9633, "step": 31853 }, { "epoch": 1.027746914264858, "grad_norm": 0.34375, "learning_rate": 1.4740954440308452e-05, "loss": 1.9897, "step": 31854 }, { "epoch": 1.0277791781186543, "grad_norm": 0.365234375, "learning_rate": 1.4740181247208854e-05, "loss": 1.9891, "step": 31855 }, { "epoch": 1.0278114419724507, "grad_norm": 0.349609375, "learning_rate": 1.4739408054799793e-05, "loss": 1.9676, "step": 31856 }, { "epoch": 1.027843705826247, "grad_norm": 0.375, "learning_rate": 1.4738634863083337e-05, "loss": 1.9857, "step": 31857 }, { "epoch": 1.0278759696800435, "grad_norm": 0.33984375, "learning_rate": 1.473786167206154e-05, "loss": 1.9641, "step": 31858 }, { "epoch": 1.0279082335338396, "grad_norm": 0.361328125, "learning_rate": 1.4737088481736455e-05, "loss": 1.978, "step": 31859 }, { "epoch": 1.027940497387636, "grad_norm": 0.359375, "learning_rate": 1.473631529211014e-05, "loss": 1.9636, "step": 31860 }, { "epoch": 1.0279727612414324, "grad_norm": 0.345703125, "learning_rate": 1.4735542103184649e-05, "loss": 1.9852, "step": 31861 }, { "epoch": 1.0280050250952288, "grad_norm": 0.34375, "learning_rate": 1.4734768914962033e-05, "loss": 1.9726, "step": 31862 }, { "epoch": 1.028037288949025, "grad_norm": 0.384765625, "learning_rate": 1.4733995727444351e-05, "loss": 1.9797, "step": 31863 }, { "epoch": 1.0280695528028214, "grad_norm": 0.34375, "learning_rate": 1.4733222540633656e-05, "loss": 2.0003, "step": 31864 }, { "epoch": 1.0281018166566178, "grad_norm": 0.359375, "learning_rate": 1.4732449354532003e-05, "loss": 1.9628, "step": 31865 }, { "epoch": 1.0281340805104142, "grad_norm": 0.35546875, "learning_rate": 1.473167616914145e-05, "loss": 1.971, "step": 31866 }, { "epoch": 1.0281663443642106, "grad_norm": 0.333984375, "learning_rate": 1.4730902984464052e-05, "loss": 1.9625, "step": 31867 }, { "epoch": 1.0281986082180068, "grad_norm": 0.353515625, "learning_rate": 1.4730129800501859e-05, "loss": 1.9535, "step": 31868 }, { "epoch": 1.0282308720718032, "grad_norm": 0.34375, "learning_rate": 1.4729356617256928e-05, "loss": 1.9587, "step": 31869 }, { "epoch": 1.0282631359255996, "grad_norm": 0.337890625, "learning_rate": 1.4728583434731315e-05, "loss": 1.9964, "step": 31870 }, { "epoch": 1.028295399779396, "grad_norm": 0.33984375, "learning_rate": 1.4727810252927074e-05, "loss": 1.9723, "step": 31871 }, { "epoch": 1.0283276636331922, "grad_norm": 0.3515625, "learning_rate": 1.4727037071846262e-05, "loss": 1.9513, "step": 31872 }, { "epoch": 1.0283599274869886, "grad_norm": 0.365234375, "learning_rate": 1.4726263891490937e-05, "loss": 1.9638, "step": 31873 }, { "epoch": 1.028392191340785, "grad_norm": 0.3515625, "learning_rate": 1.4725490711863142e-05, "loss": 1.9833, "step": 31874 }, { "epoch": 1.0284244551945814, "grad_norm": 0.361328125, "learning_rate": 1.4724717532964943e-05, "loss": 1.961, "step": 31875 }, { "epoch": 1.0284567190483775, "grad_norm": 0.33984375, "learning_rate": 1.4723944354798392e-05, "loss": 1.9731, "step": 31876 }, { "epoch": 1.028488982902174, "grad_norm": 0.353515625, "learning_rate": 1.4723171177365542e-05, "loss": 1.9836, "step": 31877 }, { "epoch": 1.0285212467559703, "grad_norm": 0.34765625, "learning_rate": 1.4722398000668449e-05, "loss": 1.9415, "step": 31878 }, { "epoch": 1.0285535106097667, "grad_norm": 0.359375, "learning_rate": 1.4721624824709172e-05, "loss": 1.9902, "step": 31879 }, { "epoch": 1.028585774463563, "grad_norm": 0.34765625, "learning_rate": 1.472085164948976e-05, "loss": 1.976, "step": 31880 }, { "epoch": 1.0286180383173593, "grad_norm": 0.345703125, "learning_rate": 1.472007847501227e-05, "loss": 1.9534, "step": 31881 }, { "epoch": 1.0286503021711557, "grad_norm": 0.345703125, "learning_rate": 1.4719305301278755e-05, "loss": 1.9679, "step": 31882 }, { "epoch": 1.0286825660249521, "grad_norm": 0.3515625, "learning_rate": 1.4718532128291275e-05, "loss": 1.9744, "step": 31883 }, { "epoch": 1.0287148298787483, "grad_norm": 0.357421875, "learning_rate": 1.4717758956051878e-05, "loss": 1.9907, "step": 31884 }, { "epoch": 1.0287470937325447, "grad_norm": 0.34765625, "learning_rate": 1.4716985784562633e-05, "loss": 1.954, "step": 31885 }, { "epoch": 1.028779357586341, "grad_norm": 0.3671875, "learning_rate": 1.4716212613825579e-05, "loss": 2.0001, "step": 31886 }, { "epoch": 1.0288116214401375, "grad_norm": 0.34375, "learning_rate": 1.4715439443842776e-05, "loss": 1.9649, "step": 31887 }, { "epoch": 1.028843885293934, "grad_norm": 0.345703125, "learning_rate": 1.471466627461628e-05, "loss": 1.9636, "step": 31888 }, { "epoch": 1.02887614914773, "grad_norm": 0.353515625, "learning_rate": 1.4713893106148142e-05, "loss": 1.9649, "step": 31889 }, { "epoch": 1.0289084130015265, "grad_norm": 0.34375, "learning_rate": 1.4713119938440425e-05, "loss": 1.9888, "step": 31890 }, { "epoch": 1.0289406768553229, "grad_norm": 0.341796875, "learning_rate": 1.4712346771495187e-05, "loss": 1.9937, "step": 31891 }, { "epoch": 1.0289729407091193, "grad_norm": 0.33984375, "learning_rate": 1.4711573605314467e-05, "loss": 1.9662, "step": 31892 }, { "epoch": 1.0290052045629154, "grad_norm": 0.333984375, "learning_rate": 1.4710800439900325e-05, "loss": 1.9946, "step": 31893 }, { "epoch": 1.0290374684167118, "grad_norm": 0.357421875, "learning_rate": 1.4710027275254825e-05, "loss": 1.9745, "step": 31894 }, { "epoch": 1.0290697322705082, "grad_norm": 0.33203125, "learning_rate": 1.4709254111380015e-05, "loss": 1.9814, "step": 31895 }, { "epoch": 1.0291019961243046, "grad_norm": 0.388671875, "learning_rate": 1.4708480948277952e-05, "loss": 1.9629, "step": 31896 }, { "epoch": 1.0291342599781008, "grad_norm": 0.345703125, "learning_rate": 1.4707707785950697e-05, "loss": 1.982, "step": 31897 }, { "epoch": 1.0291665238318972, "grad_norm": 0.373046875, "learning_rate": 1.4706934624400287e-05, "loss": 1.9833, "step": 31898 }, { "epoch": 1.0291987876856936, "grad_norm": 0.37109375, "learning_rate": 1.4706161463628791e-05, "loss": 1.9919, "step": 31899 }, { "epoch": 1.02923105153949, "grad_norm": 0.373046875, "learning_rate": 1.4705388303638262e-05, "loss": 1.9618, "step": 31900 }, { "epoch": 1.0292633153932862, "grad_norm": 0.341796875, "learning_rate": 1.4704615144430755e-05, "loss": 1.9891, "step": 31901 }, { "epoch": 1.0292955792470826, "grad_norm": 0.365234375, "learning_rate": 1.4703841986008321e-05, "loss": 1.9992, "step": 31902 }, { "epoch": 1.029327843100879, "grad_norm": 0.361328125, "learning_rate": 1.4703068828373024e-05, "loss": 1.9801, "step": 31903 }, { "epoch": 1.0293601069546754, "grad_norm": 0.359375, "learning_rate": 1.4702295671526909e-05, "loss": 1.9495, "step": 31904 }, { "epoch": 1.0293923708084718, "grad_norm": 0.369140625, "learning_rate": 1.4701522515472032e-05, "loss": 1.9745, "step": 31905 }, { "epoch": 1.029424634662268, "grad_norm": 0.369140625, "learning_rate": 1.4700749360210451e-05, "loss": 1.9937, "step": 31906 }, { "epoch": 1.0294568985160644, "grad_norm": 0.357421875, "learning_rate": 1.4699976205744222e-05, "loss": 1.9932, "step": 31907 }, { "epoch": 1.0294891623698608, "grad_norm": 0.3515625, "learning_rate": 1.4699203052075397e-05, "loss": 1.9438, "step": 31908 }, { "epoch": 1.0295214262236572, "grad_norm": 0.361328125, "learning_rate": 1.4698429899206032e-05, "loss": 1.9869, "step": 31909 }, { "epoch": 1.0295536900774533, "grad_norm": 0.361328125, "learning_rate": 1.4697656747138186e-05, "loss": 1.9576, "step": 31910 }, { "epoch": 1.0295859539312497, "grad_norm": 0.353515625, "learning_rate": 1.4696883595873906e-05, "loss": 1.9852, "step": 31911 }, { "epoch": 1.0296182177850461, "grad_norm": 0.359375, "learning_rate": 1.469611044541525e-05, "loss": 1.9807, "step": 31912 }, { "epoch": 1.0296504816388425, "grad_norm": 0.365234375, "learning_rate": 1.4695337295764277e-05, "loss": 1.9644, "step": 31913 }, { "epoch": 1.0296827454926387, "grad_norm": 0.34765625, "learning_rate": 1.4694564146923035e-05, "loss": 1.9684, "step": 31914 }, { "epoch": 1.0297150093464351, "grad_norm": 0.349609375, "learning_rate": 1.4693790998893582e-05, "loss": 1.967, "step": 31915 }, { "epoch": 1.0297472732002315, "grad_norm": 0.34375, "learning_rate": 1.4693017851677979e-05, "loss": 1.9362, "step": 31916 }, { "epoch": 1.029779537054028, "grad_norm": 0.349609375, "learning_rate": 1.469224470527827e-05, "loss": 1.9478, "step": 31917 }, { "epoch": 1.029811800907824, "grad_norm": 0.36328125, "learning_rate": 1.4691471559696517e-05, "loss": 1.9634, "step": 31918 }, { "epoch": 1.0298440647616205, "grad_norm": 0.3671875, "learning_rate": 1.4690698414934772e-05, "loss": 2.0052, "step": 31919 }, { "epoch": 1.029876328615417, "grad_norm": 0.34375, "learning_rate": 1.468992527099509e-05, "loss": 1.988, "step": 31920 }, { "epoch": 1.0299085924692133, "grad_norm": 0.353515625, "learning_rate": 1.4689152127879524e-05, "loss": 1.9783, "step": 31921 }, { "epoch": 1.0299408563230095, "grad_norm": 0.36328125, "learning_rate": 1.468837898559014e-05, "loss": 2.0128, "step": 31922 }, { "epoch": 1.0299731201768059, "grad_norm": 0.3359375, "learning_rate": 1.468760584412898e-05, "loss": 1.962, "step": 31923 }, { "epoch": 1.0300053840306023, "grad_norm": 0.361328125, "learning_rate": 1.4686832703498102e-05, "loss": 1.9581, "step": 31924 }, { "epoch": 1.0300376478843987, "grad_norm": 0.365234375, "learning_rate": 1.4686059563699558e-05, "loss": 1.9789, "step": 31925 }, { "epoch": 1.030069911738195, "grad_norm": 0.375, "learning_rate": 1.4685286424735411e-05, "loss": 1.9754, "step": 31926 }, { "epoch": 1.0301021755919912, "grad_norm": 0.384765625, "learning_rate": 1.4684513286607713e-05, "loss": 2.0063, "step": 31927 }, { "epoch": 1.0301344394457876, "grad_norm": 0.33984375, "learning_rate": 1.4683740149318521e-05, "loss": 1.9365, "step": 31928 }, { "epoch": 1.030166703299584, "grad_norm": 0.36328125, "learning_rate": 1.4682967012869883e-05, "loss": 1.9575, "step": 31929 }, { "epoch": 1.0301989671533804, "grad_norm": 0.36328125, "learning_rate": 1.4682193877263852e-05, "loss": 1.9915, "step": 31930 }, { "epoch": 1.0302312310071766, "grad_norm": 0.3515625, "learning_rate": 1.4681420742502494e-05, "loss": 1.9479, "step": 31931 }, { "epoch": 1.030263494860973, "grad_norm": 0.365234375, "learning_rate": 1.4680647608587855e-05, "loss": 1.9916, "step": 31932 }, { "epoch": 1.0302957587147694, "grad_norm": 0.35546875, "learning_rate": 1.4679874475521993e-05, "loss": 1.9766, "step": 31933 }, { "epoch": 1.0303280225685658, "grad_norm": 0.365234375, "learning_rate": 1.4679101343306972e-05, "loss": 1.9931, "step": 31934 }, { "epoch": 1.030360286422362, "grad_norm": 0.34375, "learning_rate": 1.4678328211944826e-05, "loss": 1.9956, "step": 31935 }, { "epoch": 1.0303925502761584, "grad_norm": 0.33984375, "learning_rate": 1.4677555081437625e-05, "loss": 1.9684, "step": 31936 }, { "epoch": 1.0304248141299548, "grad_norm": 0.353515625, "learning_rate": 1.467678195178742e-05, "loss": 1.9829, "step": 31937 }, { "epoch": 1.0304570779837512, "grad_norm": 0.35546875, "learning_rate": 1.4676008822996266e-05, "loss": 1.9867, "step": 31938 }, { "epoch": 1.0304893418375474, "grad_norm": 0.349609375, "learning_rate": 1.467523569506622e-05, "loss": 1.9929, "step": 31939 }, { "epoch": 1.0305216056913438, "grad_norm": 0.3515625, "learning_rate": 1.4674462567999335e-05, "loss": 1.9464, "step": 31940 }, { "epoch": 1.0305538695451402, "grad_norm": 0.34375, "learning_rate": 1.4673689441797662e-05, "loss": 1.958, "step": 31941 }, { "epoch": 1.0305861333989366, "grad_norm": 0.3984375, "learning_rate": 1.467291631646326e-05, "loss": 1.9809, "step": 31942 }, { "epoch": 1.0306183972527327, "grad_norm": 0.34375, "learning_rate": 1.4672143191998184e-05, "loss": 1.9751, "step": 31943 }, { "epoch": 1.0306506611065291, "grad_norm": 0.345703125, "learning_rate": 1.4671370068404488e-05, "loss": 1.9704, "step": 31944 }, { "epoch": 1.0306829249603255, "grad_norm": 0.353515625, "learning_rate": 1.4670596945684226e-05, "loss": 1.997, "step": 31945 }, { "epoch": 1.030715188814122, "grad_norm": 0.35546875, "learning_rate": 1.4669823823839457e-05, "loss": 1.9459, "step": 31946 }, { "epoch": 1.0307474526679183, "grad_norm": 0.3359375, "learning_rate": 1.4669050702872227e-05, "loss": 1.9949, "step": 31947 }, { "epoch": 1.0307797165217145, "grad_norm": 0.353515625, "learning_rate": 1.46682775827846e-05, "loss": 1.9383, "step": 31948 }, { "epoch": 1.030811980375511, "grad_norm": 0.3671875, "learning_rate": 1.4667504463578624e-05, "loss": 1.9768, "step": 31949 }, { "epoch": 1.0308442442293073, "grad_norm": 0.34765625, "learning_rate": 1.4666731345256358e-05, "loss": 1.9623, "step": 31950 }, { "epoch": 1.0308765080831037, "grad_norm": 0.36328125, "learning_rate": 1.4665958227819855e-05, "loss": 1.9298, "step": 31951 }, { "epoch": 1.0309087719369, "grad_norm": 0.34765625, "learning_rate": 1.4665185111271175e-05, "loss": 1.9969, "step": 31952 }, { "epoch": 1.0309410357906963, "grad_norm": 0.353515625, "learning_rate": 1.4664411995612365e-05, "loss": 1.9364, "step": 31953 }, { "epoch": 1.0309732996444927, "grad_norm": 0.365234375, "learning_rate": 1.4663638880845481e-05, "loss": 1.9913, "step": 31954 }, { "epoch": 1.031005563498289, "grad_norm": 0.34765625, "learning_rate": 1.4662865766972583e-05, "loss": 2.0078, "step": 31955 }, { "epoch": 1.0310378273520853, "grad_norm": 0.349609375, "learning_rate": 1.4662092653995719e-05, "loss": 1.9834, "step": 31956 }, { "epoch": 1.0310700912058817, "grad_norm": 0.35546875, "learning_rate": 1.4661319541916946e-05, "loss": 1.9824, "step": 31957 }, { "epoch": 1.031102355059678, "grad_norm": 0.34765625, "learning_rate": 1.4660546430738328e-05, "loss": 2.0027, "step": 31958 }, { "epoch": 1.0311346189134745, "grad_norm": 0.3515625, "learning_rate": 1.4659773320461908e-05, "loss": 1.9472, "step": 31959 }, { "epoch": 1.0311668827672706, "grad_norm": 0.3515625, "learning_rate": 1.4659000211089743e-05, "loss": 1.9805, "step": 31960 }, { "epoch": 1.031199146621067, "grad_norm": 0.359375, "learning_rate": 1.465822710262389e-05, "loss": 1.9915, "step": 31961 }, { "epoch": 1.0312314104748634, "grad_norm": 0.36328125, "learning_rate": 1.4657453995066398e-05, "loss": 1.9837, "step": 31962 }, { "epoch": 1.0312636743286598, "grad_norm": 0.392578125, "learning_rate": 1.4656680888419333e-05, "loss": 1.9574, "step": 31963 }, { "epoch": 1.0312959381824562, "grad_norm": 0.359375, "learning_rate": 1.4655907782684751e-05, "loss": 1.9755, "step": 31964 }, { "epoch": 1.0313282020362524, "grad_norm": 0.376953125, "learning_rate": 1.465513467786469e-05, "loss": 1.9811, "step": 31965 }, { "epoch": 1.0313604658900488, "grad_norm": 0.3515625, "learning_rate": 1.4654361573961217e-05, "loss": 1.9653, "step": 31966 }, { "epoch": 1.0313927297438452, "grad_norm": 0.3515625, "learning_rate": 1.4653588470976379e-05, "loss": 1.9232, "step": 31967 }, { "epoch": 1.0314249935976416, "grad_norm": 0.357421875, "learning_rate": 1.4652815368912238e-05, "loss": 1.9733, "step": 31968 }, { "epoch": 1.0314572574514378, "grad_norm": 0.337890625, "learning_rate": 1.4652042267770848e-05, "loss": 1.9729, "step": 31969 }, { "epoch": 1.0314895213052342, "grad_norm": 0.361328125, "learning_rate": 1.4651269167554269e-05, "loss": 1.972, "step": 31970 }, { "epoch": 1.0315217851590306, "grad_norm": 0.35546875, "learning_rate": 1.4650496068264544e-05, "loss": 1.9848, "step": 31971 }, { "epoch": 1.031554049012827, "grad_norm": 0.35546875, "learning_rate": 1.4649722969903726e-05, "loss": 1.9773, "step": 31972 }, { "epoch": 1.0315863128666232, "grad_norm": 0.33203125, "learning_rate": 1.4648949872473882e-05, "loss": 1.9699, "step": 31973 }, { "epoch": 1.0316185767204196, "grad_norm": 0.3359375, "learning_rate": 1.4648176775977062e-05, "loss": 1.9846, "step": 31974 }, { "epoch": 1.031650840574216, "grad_norm": 0.34765625, "learning_rate": 1.464740368041532e-05, "loss": 1.9672, "step": 31975 }, { "epoch": 1.0316831044280124, "grad_norm": 0.333984375, "learning_rate": 1.4646630585790712e-05, "loss": 1.979, "step": 31976 }, { "epoch": 1.0317153682818085, "grad_norm": 0.33984375, "learning_rate": 1.4645857492105289e-05, "loss": 1.9612, "step": 31977 }, { "epoch": 1.031747632135605, "grad_norm": 0.34375, "learning_rate": 1.464508439936111e-05, "loss": 1.9643, "step": 31978 }, { "epoch": 1.0317798959894013, "grad_norm": 0.35546875, "learning_rate": 1.4644311307560224e-05, "loss": 1.9745, "step": 31979 }, { "epoch": 1.0318121598431977, "grad_norm": 0.3359375, "learning_rate": 1.4643538216704694e-05, "loss": 1.9815, "step": 31980 }, { "epoch": 1.031844423696994, "grad_norm": 0.345703125, "learning_rate": 1.4642765126796569e-05, "loss": 1.9635, "step": 31981 }, { "epoch": 1.0318766875507903, "grad_norm": 0.337890625, "learning_rate": 1.4641992037837907e-05, "loss": 1.9804, "step": 31982 }, { "epoch": 1.0319089514045867, "grad_norm": 0.357421875, "learning_rate": 1.4641218949830759e-05, "loss": 1.9931, "step": 31983 }, { "epoch": 1.0319412152583831, "grad_norm": 0.345703125, "learning_rate": 1.4640445862777182e-05, "loss": 1.9603, "step": 31984 }, { "epoch": 1.0319734791121795, "grad_norm": 0.34375, "learning_rate": 1.4639672776679229e-05, "loss": 1.9786, "step": 31985 }, { "epoch": 1.0320057429659757, "grad_norm": 0.357421875, "learning_rate": 1.4638899691538956e-05, "loss": 1.9736, "step": 31986 }, { "epoch": 1.032038006819772, "grad_norm": 0.341796875, "learning_rate": 1.463812660735842e-05, "loss": 1.9329, "step": 31987 }, { "epoch": 1.0320702706735685, "grad_norm": 0.345703125, "learning_rate": 1.4637353524139674e-05, "loss": 1.992, "step": 31988 }, { "epoch": 1.032102534527365, "grad_norm": 0.341796875, "learning_rate": 1.4636580441884771e-05, "loss": 1.9635, "step": 31989 }, { "epoch": 1.032134798381161, "grad_norm": 0.337890625, "learning_rate": 1.4635807360595765e-05, "loss": 1.9398, "step": 31990 }, { "epoch": 1.0321670622349575, "grad_norm": 0.33984375, "learning_rate": 1.4635034280274713e-05, "loss": 1.9437, "step": 31991 }, { "epoch": 1.0321993260887539, "grad_norm": 0.341796875, "learning_rate": 1.4634261200923669e-05, "loss": 1.9769, "step": 31992 }, { "epoch": 1.0322315899425503, "grad_norm": 0.345703125, "learning_rate": 1.463348812254469e-05, "loss": 1.9628, "step": 31993 }, { "epoch": 1.0322638537963464, "grad_norm": 0.3515625, "learning_rate": 1.463271504513983e-05, "loss": 1.9571, "step": 31994 }, { "epoch": 1.0322961176501428, "grad_norm": 0.337890625, "learning_rate": 1.463194196871114e-05, "loss": 1.9557, "step": 31995 }, { "epoch": 1.0323283815039392, "grad_norm": 0.357421875, "learning_rate": 1.4631168893260676e-05, "loss": 1.9596, "step": 31996 }, { "epoch": 1.0323606453577356, "grad_norm": 0.373046875, "learning_rate": 1.4630395818790493e-05, "loss": 1.97, "step": 31997 }, { "epoch": 1.0323929092115318, "grad_norm": 0.3515625, "learning_rate": 1.4629622745302649e-05, "loss": 1.9848, "step": 31998 }, { "epoch": 1.0324251730653282, "grad_norm": 0.365234375, "learning_rate": 1.462884967279919e-05, "loss": 1.9805, "step": 31999 }, { "epoch": 1.0324574369191246, "grad_norm": 0.3515625, "learning_rate": 1.4628076601282188e-05, "loss": 1.9742, "step": 32000 }, { "epoch": 1.032489700772921, "grad_norm": 0.357421875, "learning_rate": 1.4627303530753682e-05, "loss": 1.9814, "step": 32001 }, { "epoch": 1.0325219646267172, "grad_norm": 0.359375, "learning_rate": 1.4626530461215728e-05, "loss": 1.9408, "step": 32002 }, { "epoch": 1.0325542284805136, "grad_norm": 0.36328125, "learning_rate": 1.4625757392670384e-05, "loss": 1.9761, "step": 32003 }, { "epoch": 1.03258649233431, "grad_norm": 0.35546875, "learning_rate": 1.4624984325119701e-05, "loss": 1.9563, "step": 32004 }, { "epoch": 1.0326187561881064, "grad_norm": 0.359375, "learning_rate": 1.4624211258565743e-05, "loss": 1.9456, "step": 32005 }, { "epoch": 1.0326510200419028, "grad_norm": 0.37890625, "learning_rate": 1.4623438193010562e-05, "loss": 1.978, "step": 32006 }, { "epoch": 1.032683283895699, "grad_norm": 0.36328125, "learning_rate": 1.4622665128456206e-05, "loss": 2.0148, "step": 32007 }, { "epoch": 1.0327155477494954, "grad_norm": 0.453125, "learning_rate": 1.4621892064904731e-05, "loss": 1.9498, "step": 32008 }, { "epoch": 1.0327478116032918, "grad_norm": 0.376953125, "learning_rate": 1.462111900235819e-05, "loss": 1.9953, "step": 32009 }, { "epoch": 1.0327800754570882, "grad_norm": 0.361328125, "learning_rate": 1.4620345940818645e-05, "loss": 1.966, "step": 32010 }, { "epoch": 1.0328123393108843, "grad_norm": 0.37109375, "learning_rate": 1.4619572880288148e-05, "loss": 2.0028, "step": 32011 }, { "epoch": 1.0328446031646807, "grad_norm": 0.359375, "learning_rate": 1.4618799820768758e-05, "loss": 1.9679, "step": 32012 }, { "epoch": 1.0328768670184771, "grad_norm": 0.361328125, "learning_rate": 1.4618026762262516e-05, "loss": 1.9458, "step": 32013 }, { "epoch": 1.0329091308722735, "grad_norm": 0.3671875, "learning_rate": 1.4617253704771486e-05, "loss": 1.9719, "step": 32014 }, { "epoch": 1.0329413947260697, "grad_norm": 0.35546875, "learning_rate": 1.4616480648297723e-05, "loss": 1.984, "step": 32015 }, { "epoch": 1.0329736585798661, "grad_norm": 0.359375, "learning_rate": 1.4615707592843282e-05, "loss": 1.966, "step": 32016 }, { "epoch": 1.0330059224336625, "grad_norm": 0.341796875, "learning_rate": 1.4614934538410212e-05, "loss": 1.9943, "step": 32017 }, { "epoch": 1.033038186287459, "grad_norm": 0.36328125, "learning_rate": 1.4614161485000577e-05, "loss": 1.9835, "step": 32018 }, { "epoch": 1.033070450141255, "grad_norm": 0.353515625, "learning_rate": 1.4613388432616423e-05, "loss": 1.9406, "step": 32019 }, { "epoch": 1.0331027139950515, "grad_norm": 0.353515625, "learning_rate": 1.4612615381259808e-05, "loss": 1.9745, "step": 32020 }, { "epoch": 1.033134977848848, "grad_norm": 0.34765625, "learning_rate": 1.4611842330932784e-05, "loss": 1.9687, "step": 32021 }, { "epoch": 1.0331672417026443, "grad_norm": 0.3515625, "learning_rate": 1.461106928163741e-05, "loss": 1.9631, "step": 32022 }, { "epoch": 1.0331995055564405, "grad_norm": 0.357421875, "learning_rate": 1.4610296233375738e-05, "loss": 1.9901, "step": 32023 }, { "epoch": 1.0332317694102369, "grad_norm": 0.35546875, "learning_rate": 1.4609523186149826e-05, "loss": 1.9968, "step": 32024 }, { "epoch": 1.0332640332640333, "grad_norm": 0.357421875, "learning_rate": 1.4608750139961725e-05, "loss": 1.9928, "step": 32025 }, { "epoch": 1.0332962971178297, "grad_norm": 0.361328125, "learning_rate": 1.4607977094813487e-05, "loss": 1.9702, "step": 32026 }, { "epoch": 1.033328560971626, "grad_norm": 0.345703125, "learning_rate": 1.4607204050707172e-05, "loss": 1.9688, "step": 32027 }, { "epoch": 1.0333608248254222, "grad_norm": 0.369140625, "learning_rate": 1.4606431007644831e-05, "loss": 1.9755, "step": 32028 }, { "epoch": 1.0333930886792186, "grad_norm": 0.341796875, "learning_rate": 1.4605657965628523e-05, "loss": 1.9521, "step": 32029 }, { "epoch": 1.033425352533015, "grad_norm": 0.369140625, "learning_rate": 1.4604884924660302e-05, "loss": 1.9587, "step": 32030 }, { "epoch": 1.0334576163868114, "grad_norm": 0.345703125, "learning_rate": 1.4604111884742215e-05, "loss": 1.9669, "step": 32031 }, { "epoch": 1.0334898802406076, "grad_norm": 0.375, "learning_rate": 1.4603338845876324e-05, "loss": 1.9704, "step": 32032 }, { "epoch": 1.033522144094404, "grad_norm": 0.341796875, "learning_rate": 1.4602565808064682e-05, "loss": 1.9636, "step": 32033 }, { "epoch": 1.0335544079482004, "grad_norm": 0.3828125, "learning_rate": 1.4601792771309341e-05, "loss": 1.9886, "step": 32034 }, { "epoch": 1.0335866718019968, "grad_norm": 0.33984375, "learning_rate": 1.4601019735612358e-05, "loss": 1.9888, "step": 32035 }, { "epoch": 1.033618935655793, "grad_norm": 0.34765625, "learning_rate": 1.4600246700975794e-05, "loss": 1.9627, "step": 32036 }, { "epoch": 1.0336511995095894, "grad_norm": 0.34765625, "learning_rate": 1.459947366740169e-05, "loss": 1.9745, "step": 32037 }, { "epoch": 1.0336834633633858, "grad_norm": 0.369140625, "learning_rate": 1.459870063489211e-05, "loss": 1.9633, "step": 32038 }, { "epoch": 1.0337157272171822, "grad_norm": 0.33984375, "learning_rate": 1.4597927603449104e-05, "loss": 1.9572, "step": 32039 }, { "epoch": 1.0337479910709784, "grad_norm": 0.373046875, "learning_rate": 1.459715457307473e-05, "loss": 1.9716, "step": 32040 }, { "epoch": 1.0337802549247748, "grad_norm": 0.34375, "learning_rate": 1.4596381543771037e-05, "loss": 2.0043, "step": 32041 }, { "epoch": 1.0338125187785712, "grad_norm": 0.357421875, "learning_rate": 1.4595608515540088e-05, "loss": 1.9657, "step": 32042 }, { "epoch": 1.0338447826323676, "grad_norm": 0.34375, "learning_rate": 1.459483548838394e-05, "loss": 1.9813, "step": 32043 }, { "epoch": 1.0338770464861637, "grad_norm": 0.34375, "learning_rate": 1.4594062462304632e-05, "loss": 1.9785, "step": 32044 }, { "epoch": 1.0339093103399601, "grad_norm": 0.34375, "learning_rate": 1.4593289437304225e-05, "loss": 1.9637, "step": 32045 }, { "epoch": 1.0339415741937565, "grad_norm": 0.33203125, "learning_rate": 1.459251641338478e-05, "loss": 1.968, "step": 32046 }, { "epoch": 1.033973838047553, "grad_norm": 0.333984375, "learning_rate": 1.4591743390548346e-05, "loss": 1.9606, "step": 32047 }, { "epoch": 1.0340061019013493, "grad_norm": 0.3671875, "learning_rate": 1.459097036879698e-05, "loss": 2.0013, "step": 32048 }, { "epoch": 1.0340383657551455, "grad_norm": 0.353515625, "learning_rate": 1.4590197348132745e-05, "loss": 1.9322, "step": 32049 }, { "epoch": 1.034070629608942, "grad_norm": 0.34765625, "learning_rate": 1.4589424328557674e-05, "loss": 1.9749, "step": 32050 }, { "epoch": 1.0341028934627383, "grad_norm": 0.36328125, "learning_rate": 1.4588651310073836e-05, "loss": 1.9877, "step": 32051 }, { "epoch": 1.0341351573165347, "grad_norm": 0.373046875, "learning_rate": 1.4587878292683283e-05, "loss": 1.9797, "step": 32052 }, { "epoch": 1.034167421170331, "grad_norm": 0.34765625, "learning_rate": 1.4587105276388072e-05, "loss": 1.98, "step": 32053 }, { "epoch": 1.0341996850241273, "grad_norm": 0.404296875, "learning_rate": 1.4586332261190252e-05, "loss": 1.9566, "step": 32054 }, { "epoch": 1.0342319488779237, "grad_norm": 0.357421875, "learning_rate": 1.4585559247091887e-05, "loss": 1.9892, "step": 32055 }, { "epoch": 1.03426421273172, "grad_norm": 0.4296875, "learning_rate": 1.458478623409502e-05, "loss": 1.9943, "step": 32056 }, { "epoch": 1.0342964765855163, "grad_norm": 0.380859375, "learning_rate": 1.4584013222201713e-05, "loss": 1.9416, "step": 32057 }, { "epoch": 1.0343287404393127, "grad_norm": 0.380859375, "learning_rate": 1.4583240211414019e-05, "loss": 1.9624, "step": 32058 }, { "epoch": 1.034361004293109, "grad_norm": 0.384765625, "learning_rate": 1.4582467201733988e-05, "loss": 1.9815, "step": 32059 }, { "epoch": 1.0343932681469055, "grad_norm": 0.359375, "learning_rate": 1.4581694193163682e-05, "loss": 1.9704, "step": 32060 }, { "epoch": 1.0344255320007016, "grad_norm": 0.37890625, "learning_rate": 1.4580921185705152e-05, "loss": 1.9723, "step": 32061 }, { "epoch": 1.034457795854498, "grad_norm": 0.369140625, "learning_rate": 1.4580148179360452e-05, "loss": 1.9825, "step": 32062 }, { "epoch": 1.0344900597082944, "grad_norm": 0.376953125, "learning_rate": 1.4579375174131635e-05, "loss": 2.0062, "step": 32063 }, { "epoch": 1.0345223235620908, "grad_norm": 0.39453125, "learning_rate": 1.4578602170020758e-05, "loss": 2.0035, "step": 32064 }, { "epoch": 1.034554587415887, "grad_norm": 0.4375, "learning_rate": 1.4577829167029875e-05, "loss": 1.9599, "step": 32065 }, { "epoch": 1.0345868512696834, "grad_norm": 0.369140625, "learning_rate": 1.457705616516104e-05, "loss": 2.0009, "step": 32066 }, { "epoch": 1.0346191151234798, "grad_norm": 0.435546875, "learning_rate": 1.457628316441631e-05, "loss": 1.9543, "step": 32067 }, { "epoch": 1.0346513789772762, "grad_norm": 0.373046875, "learning_rate": 1.4575510164797736e-05, "loss": 1.9936, "step": 32068 }, { "epoch": 1.0346836428310726, "grad_norm": 0.37109375, "learning_rate": 1.4574737166307373e-05, "loss": 1.9646, "step": 32069 }, { "epoch": 1.0347159066848688, "grad_norm": 0.35546875, "learning_rate": 1.4573964168947277e-05, "loss": 1.9686, "step": 32070 }, { "epoch": 1.0347481705386652, "grad_norm": 0.34375, "learning_rate": 1.4573191172719501e-05, "loss": 1.961, "step": 32071 }, { "epoch": 1.0347804343924616, "grad_norm": 0.3671875, "learning_rate": 1.4572418177626101e-05, "loss": 1.9563, "step": 32072 }, { "epoch": 1.034812698246258, "grad_norm": 0.380859375, "learning_rate": 1.4571645183669134e-05, "loss": 1.9725, "step": 32073 }, { "epoch": 1.0348449621000542, "grad_norm": 0.3515625, "learning_rate": 1.4570872190850646e-05, "loss": 1.9202, "step": 32074 }, { "epoch": 1.0348772259538506, "grad_norm": 0.359375, "learning_rate": 1.45700991991727e-05, "loss": 1.964, "step": 32075 }, { "epoch": 1.034909489807647, "grad_norm": 0.361328125, "learning_rate": 1.4569326208637345e-05, "loss": 1.9796, "step": 32076 }, { "epoch": 1.0349417536614434, "grad_norm": 0.349609375, "learning_rate": 1.4568553219246634e-05, "loss": 2.0335, "step": 32077 }, { "epoch": 1.0349740175152395, "grad_norm": 0.361328125, "learning_rate": 1.4567780231002629e-05, "loss": 1.9799, "step": 32078 }, { "epoch": 1.035006281369036, "grad_norm": 0.369140625, "learning_rate": 1.4567007243907386e-05, "loss": 1.9425, "step": 32079 }, { "epoch": 1.0350385452228323, "grad_norm": 0.341796875, "learning_rate": 1.456623425796295e-05, "loss": 2.0049, "step": 32080 }, { "epoch": 1.0350708090766287, "grad_norm": 0.375, "learning_rate": 1.4565461273171378e-05, "loss": 1.9761, "step": 32081 }, { "epoch": 1.035103072930425, "grad_norm": 0.36328125, "learning_rate": 1.4564688289534722e-05, "loss": 1.9804, "step": 32082 }, { "epoch": 1.0351353367842213, "grad_norm": 0.34765625, "learning_rate": 1.4563915307055043e-05, "loss": 1.9153, "step": 32083 }, { "epoch": 1.0351676006380177, "grad_norm": 0.3984375, "learning_rate": 1.4563142325734393e-05, "loss": 1.9704, "step": 32084 }, { "epoch": 1.0351998644918141, "grad_norm": 0.34375, "learning_rate": 1.4562369345574834e-05, "loss": 1.9799, "step": 32085 }, { "epoch": 1.0352321283456103, "grad_norm": 0.3515625, "learning_rate": 1.4561596366578406e-05, "loss": 1.9758, "step": 32086 }, { "epoch": 1.0352643921994067, "grad_norm": 0.349609375, "learning_rate": 1.4560823388747165e-05, "loss": 1.9653, "step": 32087 }, { "epoch": 1.035296656053203, "grad_norm": 0.3515625, "learning_rate": 1.4560050412083175e-05, "loss": 1.9901, "step": 32088 }, { "epoch": 1.0353289199069995, "grad_norm": 0.353515625, "learning_rate": 1.4559277436588486e-05, "loss": 1.9881, "step": 32089 }, { "epoch": 1.035361183760796, "grad_norm": 0.333984375, "learning_rate": 1.4558504462265152e-05, "loss": 1.9578, "step": 32090 }, { "epoch": 1.035393447614592, "grad_norm": 0.337890625, "learning_rate": 1.4557731489115234e-05, "loss": 1.9876, "step": 32091 }, { "epoch": 1.0354257114683885, "grad_norm": 0.341796875, "learning_rate": 1.455695851714077e-05, "loss": 1.9937, "step": 32092 }, { "epoch": 1.0354579753221849, "grad_norm": 0.3359375, "learning_rate": 1.4556185546343829e-05, "loss": 1.9778, "step": 32093 }, { "epoch": 1.0354902391759813, "grad_norm": 0.34375, "learning_rate": 1.4555412576726459e-05, "loss": 1.9841, "step": 32094 }, { "epoch": 1.0355225030297774, "grad_norm": 0.341796875, "learning_rate": 1.4554639608290717e-05, "loss": 1.9467, "step": 32095 }, { "epoch": 1.0355547668835738, "grad_norm": 0.341796875, "learning_rate": 1.455386664103866e-05, "loss": 1.9591, "step": 32096 }, { "epoch": 1.0355870307373702, "grad_norm": 0.349609375, "learning_rate": 1.4553093674972336e-05, "loss": 2.0011, "step": 32097 }, { "epoch": 1.0356192945911666, "grad_norm": 0.34765625, "learning_rate": 1.4552320710093804e-05, "loss": 1.982, "step": 32098 }, { "epoch": 1.0356515584449628, "grad_norm": 0.33984375, "learning_rate": 1.4551547746405116e-05, "loss": 1.9788, "step": 32099 }, { "epoch": 1.0356838222987592, "grad_norm": 0.36328125, "learning_rate": 1.4550774783908327e-05, "loss": 1.9908, "step": 32100 }, { "epoch": 1.0357160861525556, "grad_norm": 0.359375, "learning_rate": 1.4550001822605492e-05, "loss": 2.0162, "step": 32101 }, { "epoch": 1.035748350006352, "grad_norm": 0.3671875, "learning_rate": 1.4549228862498664e-05, "loss": 1.9591, "step": 32102 }, { "epoch": 1.0357806138601482, "grad_norm": 0.357421875, "learning_rate": 1.4548455903589903e-05, "loss": 1.9876, "step": 32103 }, { "epoch": 1.0358128777139446, "grad_norm": 0.35546875, "learning_rate": 1.4547682945881254e-05, "loss": 1.9704, "step": 32104 }, { "epoch": 1.035845141567741, "grad_norm": 0.34765625, "learning_rate": 1.4546909989374778e-05, "loss": 1.9849, "step": 32105 }, { "epoch": 1.0358774054215374, "grad_norm": 0.357421875, "learning_rate": 1.4546137034072526e-05, "loss": 2.0107, "step": 32106 }, { "epoch": 1.0359096692753336, "grad_norm": 0.35546875, "learning_rate": 1.4545364079976556e-05, "loss": 1.9786, "step": 32107 }, { "epoch": 1.03594193312913, "grad_norm": 0.365234375, "learning_rate": 1.4544591127088918e-05, "loss": 1.9842, "step": 32108 }, { "epoch": 1.0359741969829264, "grad_norm": 0.35546875, "learning_rate": 1.4543818175411674e-05, "loss": 1.9773, "step": 32109 }, { "epoch": 1.0360064608367228, "grad_norm": 0.361328125, "learning_rate": 1.4543045224946868e-05, "loss": 1.9902, "step": 32110 }, { "epoch": 1.0360387246905192, "grad_norm": 0.357421875, "learning_rate": 1.454227227569656e-05, "loss": 1.99, "step": 32111 }, { "epoch": 1.0360709885443153, "grad_norm": 0.359375, "learning_rate": 1.4541499327662804e-05, "loss": 1.994, "step": 32112 }, { "epoch": 1.0361032523981117, "grad_norm": 0.3671875, "learning_rate": 1.4540726380847654e-05, "loss": 1.9808, "step": 32113 }, { "epoch": 1.0361355162519081, "grad_norm": 0.384765625, "learning_rate": 1.453995343525316e-05, "loss": 2.0043, "step": 32114 }, { "epoch": 1.0361677801057045, "grad_norm": 0.375, "learning_rate": 1.4539180490881392e-05, "loss": 1.9692, "step": 32115 }, { "epoch": 1.0362000439595007, "grad_norm": 0.38671875, "learning_rate": 1.4538407547734384e-05, "loss": 1.977, "step": 32116 }, { "epoch": 1.0362323078132971, "grad_norm": 0.380859375, "learning_rate": 1.4537634605814202e-05, "loss": 1.9578, "step": 32117 }, { "epoch": 1.0362645716670935, "grad_norm": 0.3828125, "learning_rate": 1.4536861665122898e-05, "loss": 1.9315, "step": 32118 }, { "epoch": 1.03629683552089, "grad_norm": 0.37890625, "learning_rate": 1.4536088725662521e-05, "loss": 1.9498, "step": 32119 }, { "epoch": 1.036329099374686, "grad_norm": 0.388671875, "learning_rate": 1.4535315787435134e-05, "loss": 1.9675, "step": 32120 }, { "epoch": 1.0363613632284825, "grad_norm": 0.365234375, "learning_rate": 1.4534542850442796e-05, "loss": 1.9848, "step": 32121 }, { "epoch": 1.036393627082279, "grad_norm": 0.345703125, "learning_rate": 1.4533769914687544e-05, "loss": 1.956, "step": 32122 }, { "epoch": 1.0364258909360753, "grad_norm": 0.3828125, "learning_rate": 1.4532996980171443e-05, "loss": 1.9902, "step": 32123 }, { "epoch": 1.0364581547898715, "grad_norm": 0.337890625, "learning_rate": 1.4532224046896542e-05, "loss": 1.9728, "step": 32124 }, { "epoch": 1.0364904186436679, "grad_norm": 0.36328125, "learning_rate": 1.4531451114864902e-05, "loss": 1.9628, "step": 32125 }, { "epoch": 1.0365226824974643, "grad_norm": 0.37890625, "learning_rate": 1.4530678184078573e-05, "loss": 1.9514, "step": 32126 }, { "epoch": 1.0365549463512607, "grad_norm": 0.3359375, "learning_rate": 1.452990525453962e-05, "loss": 1.9612, "step": 32127 }, { "epoch": 1.0365872102050568, "grad_norm": 0.3828125, "learning_rate": 1.452913232625008e-05, "loss": 1.9791, "step": 32128 }, { "epoch": 1.0366194740588532, "grad_norm": 0.375, "learning_rate": 1.4528359399212011e-05, "loss": 1.9589, "step": 32129 }, { "epoch": 1.0366517379126496, "grad_norm": 0.359375, "learning_rate": 1.4527586473427476e-05, "loss": 1.944, "step": 32130 }, { "epoch": 1.036684001766446, "grad_norm": 0.40625, "learning_rate": 1.4526813548898524e-05, "loss": 1.9616, "step": 32131 }, { "epoch": 1.0367162656202424, "grad_norm": 0.35546875, "learning_rate": 1.452604062562721e-05, "loss": 1.9939, "step": 32132 }, { "epoch": 1.0367485294740386, "grad_norm": 0.392578125, "learning_rate": 1.4525267703615591e-05, "loss": 1.9676, "step": 32133 }, { "epoch": 1.036780793327835, "grad_norm": 0.333984375, "learning_rate": 1.4524494782865717e-05, "loss": 1.9503, "step": 32134 }, { "epoch": 1.0368130571816314, "grad_norm": 0.3671875, "learning_rate": 1.4523721863379643e-05, "loss": 1.95, "step": 32135 }, { "epoch": 1.0368453210354278, "grad_norm": 0.341796875, "learning_rate": 1.4522948945159423e-05, "loss": 1.951, "step": 32136 }, { "epoch": 1.036877584889224, "grad_norm": 0.345703125, "learning_rate": 1.4522176028207114e-05, "loss": 1.9723, "step": 32137 }, { "epoch": 1.0369098487430204, "grad_norm": 0.357421875, "learning_rate": 1.4521403112524767e-05, "loss": 1.9754, "step": 32138 }, { "epoch": 1.0369421125968168, "grad_norm": 0.345703125, "learning_rate": 1.4520630198114442e-05, "loss": 1.9712, "step": 32139 }, { "epoch": 1.0369743764506132, "grad_norm": 0.35546875, "learning_rate": 1.4519857284978186e-05, "loss": 1.9616, "step": 32140 }, { "epoch": 1.0370066403044094, "grad_norm": 0.3515625, "learning_rate": 1.4519084373118055e-05, "loss": 1.9914, "step": 32141 }, { "epoch": 1.0370389041582058, "grad_norm": 0.375, "learning_rate": 1.4518311462536108e-05, "loss": 1.9789, "step": 32142 }, { "epoch": 1.0370711680120022, "grad_norm": 0.3671875, "learning_rate": 1.4517538553234394e-05, "loss": 1.9829, "step": 32143 }, { "epoch": 1.0371034318657986, "grad_norm": 0.35546875, "learning_rate": 1.4516765645214968e-05, "loss": 1.9839, "step": 32144 }, { "epoch": 1.0371356957195947, "grad_norm": 0.384765625, "learning_rate": 1.4515992738479891e-05, "loss": 1.9665, "step": 32145 }, { "epoch": 1.0371679595733911, "grad_norm": 0.3515625, "learning_rate": 1.4515219833031206e-05, "loss": 1.9885, "step": 32146 }, { "epoch": 1.0372002234271875, "grad_norm": 0.359375, "learning_rate": 1.4514446928870974e-05, "loss": 1.9805, "step": 32147 }, { "epoch": 1.037232487280984, "grad_norm": 0.373046875, "learning_rate": 1.4513674026001247e-05, "loss": 1.9904, "step": 32148 }, { "epoch": 1.0372647511347803, "grad_norm": 0.427734375, "learning_rate": 1.4512901124424082e-05, "loss": 1.9942, "step": 32149 }, { "epoch": 1.0372970149885765, "grad_norm": 0.4140625, "learning_rate": 1.4512128224141531e-05, "loss": 1.9831, "step": 32150 }, { "epoch": 1.037329278842373, "grad_norm": 0.345703125, "learning_rate": 1.4511355325155652e-05, "loss": 1.9599, "step": 32151 }, { "epoch": 1.0373615426961693, "grad_norm": 0.408203125, "learning_rate": 1.4510582427468493e-05, "loss": 1.9425, "step": 32152 }, { "epoch": 1.0373938065499657, "grad_norm": 0.3515625, "learning_rate": 1.450980953108211e-05, "loss": 1.9969, "step": 32153 }, { "epoch": 1.037426070403762, "grad_norm": 0.361328125, "learning_rate": 1.4509036635998559e-05, "loss": 1.9814, "step": 32154 }, { "epoch": 1.0374583342575583, "grad_norm": 0.3828125, "learning_rate": 1.4508263742219893e-05, "loss": 1.9971, "step": 32155 }, { "epoch": 1.0374905981113547, "grad_norm": 0.35546875, "learning_rate": 1.4507490849748164e-05, "loss": 1.9663, "step": 32156 }, { "epoch": 1.037522861965151, "grad_norm": 0.37890625, "learning_rate": 1.450671795858544e-05, "loss": 1.9659, "step": 32157 }, { "epoch": 1.0375551258189473, "grad_norm": 0.33984375, "learning_rate": 1.4505945068733758e-05, "loss": 1.9708, "step": 32158 }, { "epoch": 1.0375873896727437, "grad_norm": 0.34765625, "learning_rate": 1.4505172180195177e-05, "loss": 1.9754, "step": 32159 }, { "epoch": 1.03761965352654, "grad_norm": 0.357421875, "learning_rate": 1.4504399292971751e-05, "loss": 1.9546, "step": 32160 }, { "epoch": 1.0376519173803365, "grad_norm": 0.357421875, "learning_rate": 1.4503626407065534e-05, "loss": 1.9765, "step": 32161 }, { "epoch": 1.0376841812341326, "grad_norm": 0.3671875, "learning_rate": 1.4502853522478587e-05, "loss": 2.0112, "step": 32162 }, { "epoch": 1.037716445087929, "grad_norm": 0.365234375, "learning_rate": 1.4502080639212965e-05, "loss": 2.0202, "step": 32163 }, { "epoch": 1.0377487089417254, "grad_norm": 0.3515625, "learning_rate": 1.4501307757270708e-05, "loss": 1.9934, "step": 32164 }, { "epoch": 1.0377809727955218, "grad_norm": 0.36328125, "learning_rate": 1.4500534876653875e-05, "loss": 1.9671, "step": 32165 }, { "epoch": 1.037813236649318, "grad_norm": 0.345703125, "learning_rate": 1.4499761997364529e-05, "loss": 1.982, "step": 32166 }, { "epoch": 1.0378455005031144, "grad_norm": 0.361328125, "learning_rate": 1.4498989119404718e-05, "loss": 1.9677, "step": 32167 }, { "epoch": 1.0378777643569108, "grad_norm": 0.41015625, "learning_rate": 1.4498216242776495e-05, "loss": 1.9971, "step": 32168 }, { "epoch": 1.0379100282107072, "grad_norm": 0.357421875, "learning_rate": 1.449744336748192e-05, "loss": 1.9653, "step": 32169 }, { "epoch": 1.0379422920645036, "grad_norm": 0.33984375, "learning_rate": 1.4496670493523043e-05, "loss": 1.9692, "step": 32170 }, { "epoch": 1.0379745559182998, "grad_norm": 0.365234375, "learning_rate": 1.4495897620901916e-05, "loss": 1.9908, "step": 32171 }, { "epoch": 1.0380068197720962, "grad_norm": 0.345703125, "learning_rate": 1.4495124749620596e-05, "loss": 1.9726, "step": 32172 }, { "epoch": 1.0380390836258926, "grad_norm": 0.3515625, "learning_rate": 1.4494351879681138e-05, "loss": 1.9573, "step": 32173 }, { "epoch": 1.038071347479689, "grad_norm": 0.349609375, "learning_rate": 1.4493579011085592e-05, "loss": 1.9984, "step": 32174 }, { "epoch": 1.0381036113334852, "grad_norm": 0.341796875, "learning_rate": 1.4492806143836016e-05, "loss": 1.9804, "step": 32175 }, { "epoch": 1.0381358751872816, "grad_norm": 0.34375, "learning_rate": 1.4492033277934468e-05, "loss": 1.9789, "step": 32176 }, { "epoch": 1.038168139041078, "grad_norm": 0.3671875, "learning_rate": 1.4491260413382992e-05, "loss": 1.9684, "step": 32177 }, { "epoch": 1.0382004028948744, "grad_norm": 0.34765625, "learning_rate": 1.4490487550183648e-05, "loss": 1.9627, "step": 32178 }, { "epoch": 1.0382326667486705, "grad_norm": 0.3515625, "learning_rate": 1.448971468833849e-05, "loss": 1.9767, "step": 32179 }, { "epoch": 1.038264930602467, "grad_norm": 0.357421875, "learning_rate": 1.4488941827849572e-05, "loss": 1.9602, "step": 32180 }, { "epoch": 1.0382971944562633, "grad_norm": 0.361328125, "learning_rate": 1.4488168968718946e-05, "loss": 1.9754, "step": 32181 }, { "epoch": 1.0383294583100597, "grad_norm": 0.35546875, "learning_rate": 1.4487396110948674e-05, "loss": 1.9838, "step": 32182 }, { "epoch": 1.038361722163856, "grad_norm": 0.357421875, "learning_rate": 1.4486623254540798e-05, "loss": 1.9897, "step": 32183 }, { "epoch": 1.0383939860176523, "grad_norm": 0.361328125, "learning_rate": 1.4485850399497378e-05, "loss": 1.9959, "step": 32184 }, { "epoch": 1.0384262498714487, "grad_norm": 0.361328125, "learning_rate": 1.4485077545820469e-05, "loss": 1.9926, "step": 32185 }, { "epoch": 1.0384585137252451, "grad_norm": 0.357421875, "learning_rate": 1.4484304693512124e-05, "loss": 1.9664, "step": 32186 }, { "epoch": 1.0384907775790415, "grad_norm": 0.359375, "learning_rate": 1.4483531842574397e-05, "loss": 1.9722, "step": 32187 }, { "epoch": 1.0385230414328377, "grad_norm": 0.376953125, "learning_rate": 1.4482758993009348e-05, "loss": 1.9606, "step": 32188 }, { "epoch": 1.038555305286634, "grad_norm": 0.365234375, "learning_rate": 1.448198614481902e-05, "loss": 1.9543, "step": 32189 }, { "epoch": 1.0385875691404305, "grad_norm": 0.37890625, "learning_rate": 1.4481213298005473e-05, "loss": 1.9993, "step": 32190 }, { "epoch": 1.0386198329942269, "grad_norm": 0.373046875, "learning_rate": 1.448044045257076e-05, "loss": 1.9758, "step": 32191 }, { "epoch": 1.038652096848023, "grad_norm": 0.36328125, "learning_rate": 1.4479667608516937e-05, "loss": 1.9725, "step": 32192 }, { "epoch": 1.0386843607018195, "grad_norm": 0.3515625, "learning_rate": 1.4478894765846052e-05, "loss": 1.97, "step": 32193 }, { "epoch": 1.0387166245556159, "grad_norm": 0.3515625, "learning_rate": 1.4478121924560174e-05, "loss": 1.9654, "step": 32194 }, { "epoch": 1.0387488884094123, "grad_norm": 0.384765625, "learning_rate": 1.4477349084661341e-05, "loss": 1.9679, "step": 32195 }, { "epoch": 1.0387811522632084, "grad_norm": 0.36328125, "learning_rate": 1.4476576246151614e-05, "loss": 2.0023, "step": 32196 }, { "epoch": 1.0388134161170048, "grad_norm": 0.34765625, "learning_rate": 1.4475803409033045e-05, "loss": 1.929, "step": 32197 }, { "epoch": 1.0388456799708012, "grad_norm": 0.361328125, "learning_rate": 1.4475030573307686e-05, "loss": 1.9599, "step": 32198 }, { "epoch": 1.0388779438245976, "grad_norm": 0.353515625, "learning_rate": 1.4474257738977595e-05, "loss": 1.9563, "step": 32199 }, { "epoch": 1.0389102076783938, "grad_norm": 0.353515625, "learning_rate": 1.4473484906044834e-05, "loss": 1.9723, "step": 32200 }, { "epoch": 1.0389424715321902, "grad_norm": 0.392578125, "learning_rate": 1.4472712074511443e-05, "loss": 2.0005, "step": 32201 }, { "epoch": 1.0389747353859866, "grad_norm": 0.359375, "learning_rate": 1.4471939244379476e-05, "loss": 1.9929, "step": 32202 }, { "epoch": 1.039006999239783, "grad_norm": 0.35546875, "learning_rate": 1.4471166415650995e-05, "loss": 2.0048, "step": 32203 }, { "epoch": 1.0390392630935792, "grad_norm": 0.361328125, "learning_rate": 1.4470393588328054e-05, "loss": 2.0203, "step": 32204 }, { "epoch": 1.0390715269473756, "grad_norm": 0.353515625, "learning_rate": 1.4469620762412702e-05, "loss": 1.9743, "step": 32205 }, { "epoch": 1.039103790801172, "grad_norm": 0.37890625, "learning_rate": 1.4468847937907002e-05, "loss": 1.9888, "step": 32206 }, { "epoch": 1.0391360546549684, "grad_norm": 0.33984375, "learning_rate": 1.4468075114812991e-05, "loss": 1.9656, "step": 32207 }, { "epoch": 1.0391683185087648, "grad_norm": 0.3671875, "learning_rate": 1.446730229313274e-05, "loss": 1.9855, "step": 32208 }, { "epoch": 1.039200582362561, "grad_norm": 0.35546875, "learning_rate": 1.4466529472868293e-05, "loss": 1.9663, "step": 32209 }, { "epoch": 1.0392328462163574, "grad_norm": 0.359375, "learning_rate": 1.4465756654021709e-05, "loss": 1.9989, "step": 32210 }, { "epoch": 1.0392651100701538, "grad_norm": 0.353515625, "learning_rate": 1.446498383659504e-05, "loss": 1.9894, "step": 32211 }, { "epoch": 1.0392973739239502, "grad_norm": 0.37109375, "learning_rate": 1.4464211020590344e-05, "loss": 1.973, "step": 32212 }, { "epoch": 1.0393296377777463, "grad_norm": 0.34375, "learning_rate": 1.4463438206009668e-05, "loss": 1.9706, "step": 32213 }, { "epoch": 1.0393619016315427, "grad_norm": 0.359375, "learning_rate": 1.4462665392855069e-05, "loss": 1.9611, "step": 32214 }, { "epoch": 1.0393941654853391, "grad_norm": 0.359375, "learning_rate": 1.4461892581128601e-05, "loss": 1.9665, "step": 32215 }, { "epoch": 1.0394264293391355, "grad_norm": 0.361328125, "learning_rate": 1.446111977083232e-05, "loss": 1.9574, "step": 32216 }, { "epoch": 1.0394586931929317, "grad_norm": 0.365234375, "learning_rate": 1.4460346961968278e-05, "loss": 1.9462, "step": 32217 }, { "epoch": 1.0394909570467281, "grad_norm": 0.341796875, "learning_rate": 1.445957415453853e-05, "loss": 1.9552, "step": 32218 }, { "epoch": 1.0395232209005245, "grad_norm": 0.349609375, "learning_rate": 1.4458801348545129e-05, "loss": 1.9813, "step": 32219 }, { "epoch": 1.039555484754321, "grad_norm": 0.35546875, "learning_rate": 1.4458028543990128e-05, "loss": 1.989, "step": 32220 }, { "epoch": 1.039587748608117, "grad_norm": 0.35546875, "learning_rate": 1.4457255740875583e-05, "loss": 1.9869, "step": 32221 }, { "epoch": 1.0396200124619135, "grad_norm": 0.33984375, "learning_rate": 1.4456482939203546e-05, "loss": 1.9659, "step": 32222 }, { "epoch": 1.0396522763157099, "grad_norm": 0.357421875, "learning_rate": 1.4455710138976074e-05, "loss": 1.9603, "step": 32223 }, { "epoch": 1.0396845401695063, "grad_norm": 0.34765625, "learning_rate": 1.445493734019522e-05, "loss": 1.9935, "step": 32224 }, { "epoch": 1.0397168040233025, "grad_norm": 0.345703125, "learning_rate": 1.4454164542863036e-05, "loss": 1.986, "step": 32225 }, { "epoch": 1.0397490678770989, "grad_norm": 0.35546875, "learning_rate": 1.4453391746981576e-05, "loss": 1.9284, "step": 32226 }, { "epoch": 1.0397813317308953, "grad_norm": 0.35546875, "learning_rate": 1.4452618952552895e-05, "loss": 1.9831, "step": 32227 }, { "epoch": 1.0398135955846917, "grad_norm": 0.33984375, "learning_rate": 1.4451846159579048e-05, "loss": 1.9671, "step": 32228 }, { "epoch": 1.039845859438488, "grad_norm": 0.345703125, "learning_rate": 1.4451073368062085e-05, "loss": 1.9359, "step": 32229 }, { "epoch": 1.0398781232922842, "grad_norm": 0.349609375, "learning_rate": 1.4450300578004068e-05, "loss": 1.9369, "step": 32230 }, { "epoch": 1.0399103871460806, "grad_norm": 0.34765625, "learning_rate": 1.4449527789407041e-05, "loss": 1.9456, "step": 32231 }, { "epoch": 1.039942650999877, "grad_norm": 0.35546875, "learning_rate": 1.4448755002273065e-05, "loss": 1.9626, "step": 32232 }, { "epoch": 1.0399749148536734, "grad_norm": 0.345703125, "learning_rate": 1.444798221660419e-05, "loss": 1.96, "step": 32233 }, { "epoch": 1.0400071787074696, "grad_norm": 0.3515625, "learning_rate": 1.4447209432402468e-05, "loss": 1.9884, "step": 32234 }, { "epoch": 1.040039442561266, "grad_norm": 0.349609375, "learning_rate": 1.4446436649669959e-05, "loss": 1.9788, "step": 32235 }, { "epoch": 1.0400717064150624, "grad_norm": 0.349609375, "learning_rate": 1.4445663868408723e-05, "loss": 1.9717, "step": 32236 }, { "epoch": 1.0401039702688588, "grad_norm": 0.349609375, "learning_rate": 1.4444891088620798e-05, "loss": 1.9437, "step": 32237 }, { "epoch": 1.040136234122655, "grad_norm": 0.34375, "learning_rate": 1.4444118310308244e-05, "loss": 1.9435, "step": 32238 }, { "epoch": 1.0401684979764514, "grad_norm": 0.3671875, "learning_rate": 1.4443345533473113e-05, "loss": 1.9987, "step": 32239 }, { "epoch": 1.0402007618302478, "grad_norm": 0.359375, "learning_rate": 1.4442572758117466e-05, "loss": 1.9369, "step": 32240 }, { "epoch": 1.0402330256840442, "grad_norm": 0.3671875, "learning_rate": 1.4441799984243352e-05, "loss": 1.9977, "step": 32241 }, { "epoch": 1.0402652895378404, "grad_norm": 0.384765625, "learning_rate": 1.4441027211852832e-05, "loss": 1.9699, "step": 32242 }, { "epoch": 1.0402975533916368, "grad_norm": 0.37109375, "learning_rate": 1.4440254440947946e-05, "loss": 1.9821, "step": 32243 }, { "epoch": 1.0403298172454332, "grad_norm": 0.4140625, "learning_rate": 1.4439481671530754e-05, "loss": 1.9603, "step": 32244 }, { "epoch": 1.0403620810992296, "grad_norm": 0.369140625, "learning_rate": 1.4438708903603313e-05, "loss": 1.9777, "step": 32245 }, { "epoch": 1.0403943449530257, "grad_norm": 0.390625, "learning_rate": 1.4437936137167677e-05, "loss": 1.9666, "step": 32246 }, { "epoch": 1.0404266088068221, "grad_norm": 0.357421875, "learning_rate": 1.4437163372225895e-05, "loss": 1.9626, "step": 32247 }, { "epoch": 1.0404588726606185, "grad_norm": 0.376953125, "learning_rate": 1.4436390608780034e-05, "loss": 1.9563, "step": 32248 }, { "epoch": 1.040491136514415, "grad_norm": 0.357421875, "learning_rate": 1.4435617846832125e-05, "loss": 1.9763, "step": 32249 }, { "epoch": 1.0405234003682113, "grad_norm": 0.36328125, "learning_rate": 1.443484508638424e-05, "loss": 1.9656, "step": 32250 }, { "epoch": 1.0405556642220075, "grad_norm": 0.359375, "learning_rate": 1.4434072327438424e-05, "loss": 1.9777, "step": 32251 }, { "epoch": 1.040587928075804, "grad_norm": 0.37109375, "learning_rate": 1.4433299569996738e-05, "loss": 1.9733, "step": 32252 }, { "epoch": 1.0406201919296003, "grad_norm": 0.357421875, "learning_rate": 1.443252681406123e-05, "loss": 1.974, "step": 32253 }, { "epoch": 1.0406524557833967, "grad_norm": 0.369140625, "learning_rate": 1.443175405963396e-05, "loss": 1.9575, "step": 32254 }, { "epoch": 1.040684719637193, "grad_norm": 0.369140625, "learning_rate": 1.4430981306716974e-05, "loss": 1.9402, "step": 32255 }, { "epoch": 1.0407169834909893, "grad_norm": 0.359375, "learning_rate": 1.4430208555312328e-05, "loss": 1.9866, "step": 32256 }, { "epoch": 1.0407492473447857, "grad_norm": 0.3515625, "learning_rate": 1.442943580542208e-05, "loss": 1.9767, "step": 32257 }, { "epoch": 1.040781511198582, "grad_norm": 0.37890625, "learning_rate": 1.442866305704828e-05, "loss": 1.9846, "step": 32258 }, { "epoch": 1.0408137750523783, "grad_norm": 0.365234375, "learning_rate": 1.4427890310192983e-05, "loss": 1.9751, "step": 32259 }, { "epoch": 1.0408460389061747, "grad_norm": 0.359375, "learning_rate": 1.4427117564858245e-05, "loss": 1.9711, "step": 32260 }, { "epoch": 1.040878302759971, "grad_norm": 0.3671875, "learning_rate": 1.4426344821046117e-05, "loss": 1.9683, "step": 32261 }, { "epoch": 1.0409105666137675, "grad_norm": 0.341796875, "learning_rate": 1.4425572078758653e-05, "loss": 1.9839, "step": 32262 }, { "epoch": 1.0409428304675636, "grad_norm": 0.375, "learning_rate": 1.4424799337997905e-05, "loss": 1.9875, "step": 32263 }, { "epoch": 1.04097509432136, "grad_norm": 0.359375, "learning_rate": 1.442402659876593e-05, "loss": 1.9793, "step": 32264 }, { "epoch": 1.0410073581751564, "grad_norm": 0.375, "learning_rate": 1.4423253861064783e-05, "loss": 1.9742, "step": 32265 }, { "epoch": 1.0410396220289528, "grad_norm": 0.37890625, "learning_rate": 1.4422481124896515e-05, "loss": 1.9703, "step": 32266 }, { "epoch": 1.041071885882749, "grad_norm": 0.357421875, "learning_rate": 1.442170839026318e-05, "loss": 1.9586, "step": 32267 }, { "epoch": 1.0411041497365454, "grad_norm": 0.375, "learning_rate": 1.4420935657166831e-05, "loss": 1.9803, "step": 32268 }, { "epoch": 1.0411364135903418, "grad_norm": 0.34375, "learning_rate": 1.4420162925609523e-05, "loss": 1.9641, "step": 32269 }, { "epoch": 1.0411686774441382, "grad_norm": 0.3515625, "learning_rate": 1.441939019559331e-05, "loss": 1.9956, "step": 32270 }, { "epoch": 1.0412009412979346, "grad_norm": 0.34765625, "learning_rate": 1.4418617467120242e-05, "loss": 1.9835, "step": 32271 }, { "epoch": 1.0412332051517308, "grad_norm": 0.369140625, "learning_rate": 1.4417844740192387e-05, "loss": 1.9647, "step": 32272 }, { "epoch": 1.0412654690055272, "grad_norm": 0.357421875, "learning_rate": 1.441707201481178e-05, "loss": 2.0135, "step": 32273 }, { "epoch": 1.0412977328593236, "grad_norm": 0.345703125, "learning_rate": 1.4416299290980483e-05, "loss": 1.9842, "step": 32274 }, { "epoch": 1.04132999671312, "grad_norm": 0.373046875, "learning_rate": 1.441552656870055e-05, "loss": 1.995, "step": 32275 }, { "epoch": 1.0413622605669162, "grad_norm": 0.361328125, "learning_rate": 1.4414753847974032e-05, "loss": 1.97, "step": 32276 }, { "epoch": 1.0413945244207126, "grad_norm": 0.365234375, "learning_rate": 1.4413981128802989e-05, "loss": 1.9528, "step": 32277 }, { "epoch": 1.041426788274509, "grad_norm": 0.357421875, "learning_rate": 1.4413208411189474e-05, "loss": 1.9907, "step": 32278 }, { "epoch": 1.0414590521283054, "grad_norm": 0.34375, "learning_rate": 1.4412435695135534e-05, "loss": 2.0116, "step": 32279 }, { "epoch": 1.0414913159821015, "grad_norm": 0.373046875, "learning_rate": 1.4411662980643224e-05, "loss": 1.9743, "step": 32280 }, { "epoch": 1.041523579835898, "grad_norm": 0.36328125, "learning_rate": 1.4410890267714598e-05, "loss": 1.977, "step": 32281 }, { "epoch": 1.0415558436896943, "grad_norm": 0.349609375, "learning_rate": 1.4410117556351714e-05, "loss": 1.9761, "step": 32282 }, { "epoch": 1.0415881075434907, "grad_norm": 0.3828125, "learning_rate": 1.4409344846556623e-05, "loss": 1.9686, "step": 32283 }, { "epoch": 1.041620371397287, "grad_norm": 0.359375, "learning_rate": 1.4408572138331388e-05, "loss": 1.9702, "step": 32284 }, { "epoch": 1.0416526352510833, "grad_norm": 0.33984375, "learning_rate": 1.4407799431678041e-05, "loss": 1.9887, "step": 32285 }, { "epoch": 1.0416848991048797, "grad_norm": 0.365234375, "learning_rate": 1.4407026726598652e-05, "loss": 1.9781, "step": 32286 }, { "epoch": 1.0417171629586761, "grad_norm": 0.37109375, "learning_rate": 1.4406254023095272e-05, "loss": 1.9728, "step": 32287 }, { "epoch": 1.0417494268124723, "grad_norm": 0.34765625, "learning_rate": 1.4405481321169954e-05, "loss": 1.966, "step": 32288 }, { "epoch": 1.0417816906662687, "grad_norm": 0.35546875, "learning_rate": 1.4404708620824753e-05, "loss": 2.0025, "step": 32289 }, { "epoch": 1.041813954520065, "grad_norm": 0.36328125, "learning_rate": 1.4403935922061723e-05, "loss": 1.9775, "step": 32290 }, { "epoch": 1.0418462183738615, "grad_norm": 0.35546875, "learning_rate": 1.4403163224882912e-05, "loss": 1.9723, "step": 32291 }, { "epoch": 1.0418784822276579, "grad_norm": 0.345703125, "learning_rate": 1.440239052929038e-05, "loss": 1.9973, "step": 32292 }, { "epoch": 1.041910746081454, "grad_norm": 0.34765625, "learning_rate": 1.4401617835286177e-05, "loss": 1.9707, "step": 32293 }, { "epoch": 1.0419430099352505, "grad_norm": 0.33984375, "learning_rate": 1.4400845142872357e-05, "loss": 1.9617, "step": 32294 }, { "epoch": 1.0419752737890469, "grad_norm": 0.357421875, "learning_rate": 1.4400072452050975e-05, "loss": 1.9901, "step": 32295 }, { "epoch": 1.0420075376428433, "grad_norm": 0.3515625, "learning_rate": 1.439929976282409e-05, "loss": 1.985, "step": 32296 }, { "epoch": 1.0420398014966394, "grad_norm": 0.345703125, "learning_rate": 1.4398527075193744e-05, "loss": 1.9927, "step": 32297 }, { "epoch": 1.0420720653504358, "grad_norm": 0.333984375, "learning_rate": 1.4397754389161997e-05, "loss": 2.0046, "step": 32298 }, { "epoch": 1.0421043292042322, "grad_norm": 0.36328125, "learning_rate": 1.4396981704730903e-05, "loss": 1.9176, "step": 32299 }, { "epoch": 1.0421365930580286, "grad_norm": 0.34765625, "learning_rate": 1.4396209021902514e-05, "loss": 1.9603, "step": 32300 }, { "epoch": 1.0421688569118248, "grad_norm": 0.34375, "learning_rate": 1.4395436340678885e-05, "loss": 1.922, "step": 32301 }, { "epoch": 1.0422011207656212, "grad_norm": 0.357421875, "learning_rate": 1.439466366106207e-05, "loss": 1.9831, "step": 32302 }, { "epoch": 1.0422333846194176, "grad_norm": 0.36328125, "learning_rate": 1.4393890983054125e-05, "loss": 1.975, "step": 32303 }, { "epoch": 1.042265648473214, "grad_norm": 0.34375, "learning_rate": 1.4393118306657096e-05, "loss": 1.9963, "step": 32304 }, { "epoch": 1.0422979123270102, "grad_norm": 0.345703125, "learning_rate": 1.4392345631873042e-05, "loss": 1.9925, "step": 32305 }, { "epoch": 1.0423301761808066, "grad_norm": 0.3515625, "learning_rate": 1.4391572958704016e-05, "loss": 1.9439, "step": 32306 }, { "epoch": 1.042362440034603, "grad_norm": 0.34765625, "learning_rate": 1.439080028715207e-05, "loss": 1.9453, "step": 32307 }, { "epoch": 1.0423947038883994, "grad_norm": 0.35546875, "learning_rate": 1.4390027617219258e-05, "loss": 1.9637, "step": 32308 }, { "epoch": 1.0424269677421956, "grad_norm": 0.35546875, "learning_rate": 1.4389254948907642e-05, "loss": 1.9558, "step": 32309 }, { "epoch": 1.042459231595992, "grad_norm": 0.357421875, "learning_rate": 1.4388482282219262e-05, "loss": 2.0091, "step": 32310 }, { "epoch": 1.0424914954497884, "grad_norm": 0.34765625, "learning_rate": 1.4387709617156178e-05, "loss": 1.9979, "step": 32311 }, { "epoch": 1.0425237593035848, "grad_norm": 0.359375, "learning_rate": 1.4386936953720445e-05, "loss": 2.009, "step": 32312 }, { "epoch": 1.0425560231573812, "grad_norm": 0.36328125, "learning_rate": 1.438616429191411e-05, "loss": 1.9883, "step": 32313 }, { "epoch": 1.0425882870111773, "grad_norm": 0.345703125, "learning_rate": 1.4385391631739234e-05, "loss": 1.9759, "step": 32314 }, { "epoch": 1.0426205508649737, "grad_norm": 0.369140625, "learning_rate": 1.4384618973197875e-05, "loss": 1.9832, "step": 32315 }, { "epoch": 1.0426528147187701, "grad_norm": 0.34765625, "learning_rate": 1.4383846316292074e-05, "loss": 1.9931, "step": 32316 }, { "epoch": 1.0426850785725665, "grad_norm": 0.365234375, "learning_rate": 1.4383073661023891e-05, "loss": 1.9757, "step": 32317 }, { "epoch": 1.0427173424263627, "grad_norm": 0.3515625, "learning_rate": 1.4382301007395372e-05, "loss": 1.9835, "step": 32318 }, { "epoch": 1.0427496062801591, "grad_norm": 0.357421875, "learning_rate": 1.4381528355408585e-05, "loss": 1.967, "step": 32319 }, { "epoch": 1.0427818701339555, "grad_norm": 0.345703125, "learning_rate": 1.4380755705065573e-05, "loss": 1.995, "step": 32320 }, { "epoch": 1.042814133987752, "grad_norm": 0.3515625, "learning_rate": 1.43799830563684e-05, "loss": 1.9348, "step": 32321 }, { "epoch": 1.042846397841548, "grad_norm": 0.34375, "learning_rate": 1.4379210409319104e-05, "loss": 1.9708, "step": 32322 }, { "epoch": 1.0428786616953445, "grad_norm": 0.333984375, "learning_rate": 1.4378437763919746e-05, "loss": 2.0106, "step": 32323 }, { "epoch": 1.0429109255491409, "grad_norm": 0.3515625, "learning_rate": 1.4377665120172383e-05, "loss": 1.9792, "step": 32324 }, { "epoch": 1.0429431894029373, "grad_norm": 0.345703125, "learning_rate": 1.4376892478079066e-05, "loss": 1.9935, "step": 32325 }, { "epoch": 1.0429754532567335, "grad_norm": 0.3828125, "learning_rate": 1.4376119837641845e-05, "loss": 1.9882, "step": 32326 }, { "epoch": 1.0430077171105299, "grad_norm": 0.349609375, "learning_rate": 1.4375347198862783e-05, "loss": 1.9824, "step": 32327 }, { "epoch": 1.0430399809643263, "grad_norm": 0.353515625, "learning_rate": 1.4374574561743923e-05, "loss": 1.9654, "step": 32328 }, { "epoch": 1.0430722448181227, "grad_norm": 0.345703125, "learning_rate": 1.4373801926287322e-05, "loss": 1.9828, "step": 32329 }, { "epoch": 1.0431045086719188, "grad_norm": 0.35546875, "learning_rate": 1.4373029292495036e-05, "loss": 1.9596, "step": 32330 }, { "epoch": 1.0431367725257152, "grad_norm": 0.375, "learning_rate": 1.4372256660369116e-05, "loss": 2.0022, "step": 32331 }, { "epoch": 1.0431690363795116, "grad_norm": 0.37109375, "learning_rate": 1.4371484029911616e-05, "loss": 1.9811, "step": 32332 }, { "epoch": 1.043201300233308, "grad_norm": 0.376953125, "learning_rate": 1.4370711401124593e-05, "loss": 1.9815, "step": 32333 }, { "epoch": 1.0432335640871044, "grad_norm": 0.380859375, "learning_rate": 1.4369938774010095e-05, "loss": 1.9655, "step": 32334 }, { "epoch": 1.0432658279409006, "grad_norm": 0.35546875, "learning_rate": 1.4369166148570177e-05, "loss": 2.0023, "step": 32335 }, { "epoch": 1.043298091794697, "grad_norm": 0.39453125, "learning_rate": 1.4368393524806895e-05, "loss": 1.9711, "step": 32336 }, { "epoch": 1.0433303556484934, "grad_norm": 0.34375, "learning_rate": 1.4367620902722296e-05, "loss": 1.9976, "step": 32337 }, { "epoch": 1.0433626195022898, "grad_norm": 0.3828125, "learning_rate": 1.4366848282318444e-05, "loss": 1.9529, "step": 32338 }, { "epoch": 1.043394883356086, "grad_norm": 0.357421875, "learning_rate": 1.4366075663597386e-05, "loss": 1.9892, "step": 32339 }, { "epoch": 1.0434271472098824, "grad_norm": 0.353515625, "learning_rate": 1.4365303046561175e-05, "loss": 1.9853, "step": 32340 }, { "epoch": 1.0434594110636788, "grad_norm": 0.341796875, "learning_rate": 1.4364530431211864e-05, "loss": 1.973, "step": 32341 }, { "epoch": 1.0434916749174752, "grad_norm": 0.341796875, "learning_rate": 1.436375781755151e-05, "loss": 1.9576, "step": 32342 }, { "epoch": 1.0435239387712714, "grad_norm": 0.359375, "learning_rate": 1.4362985205582162e-05, "loss": 1.9471, "step": 32343 }, { "epoch": 1.0435562026250678, "grad_norm": 0.33984375, "learning_rate": 1.4362212595305881e-05, "loss": 1.9564, "step": 32344 }, { "epoch": 1.0435884664788642, "grad_norm": 0.349609375, "learning_rate": 1.4361439986724714e-05, "loss": 1.9828, "step": 32345 }, { "epoch": 1.0436207303326606, "grad_norm": 0.361328125, "learning_rate": 1.4360667379840715e-05, "loss": 1.9916, "step": 32346 }, { "epoch": 1.0436529941864567, "grad_norm": 0.33984375, "learning_rate": 1.4359894774655937e-05, "loss": 1.9751, "step": 32347 }, { "epoch": 1.0436852580402531, "grad_norm": 0.3515625, "learning_rate": 1.4359122171172434e-05, "loss": 1.9968, "step": 32348 }, { "epoch": 1.0437175218940495, "grad_norm": 0.34765625, "learning_rate": 1.4358349569392263e-05, "loss": 1.9695, "step": 32349 }, { "epoch": 1.043749785747846, "grad_norm": 0.330078125, "learning_rate": 1.435757696931747e-05, "loss": 1.9761, "step": 32350 }, { "epoch": 1.0437820496016423, "grad_norm": 0.3515625, "learning_rate": 1.4356804370950121e-05, "loss": 1.9773, "step": 32351 }, { "epoch": 1.0438143134554385, "grad_norm": 0.359375, "learning_rate": 1.4356031774292259e-05, "loss": 1.9655, "step": 32352 }, { "epoch": 1.043846577309235, "grad_norm": 0.365234375, "learning_rate": 1.4355259179345936e-05, "loss": 2.0022, "step": 32353 }, { "epoch": 1.0438788411630313, "grad_norm": 0.37109375, "learning_rate": 1.4354486586113209e-05, "loss": 1.9799, "step": 32354 }, { "epoch": 1.0439111050168277, "grad_norm": 0.365234375, "learning_rate": 1.4353713994596133e-05, "loss": 1.9723, "step": 32355 }, { "epoch": 1.0439433688706239, "grad_norm": 0.33984375, "learning_rate": 1.435294140479676e-05, "loss": 1.972, "step": 32356 }, { "epoch": 1.0439756327244203, "grad_norm": 0.36328125, "learning_rate": 1.4352168816717152e-05, "loss": 2.0113, "step": 32357 }, { "epoch": 1.0440078965782167, "grad_norm": 0.361328125, "learning_rate": 1.4351396230359347e-05, "loss": 1.9813, "step": 32358 }, { "epoch": 1.044040160432013, "grad_norm": 0.34375, "learning_rate": 1.43506236457254e-05, "loss": 2.0003, "step": 32359 }, { "epoch": 1.0440724242858093, "grad_norm": 0.365234375, "learning_rate": 1.4349851062817375e-05, "loss": 1.9789, "step": 32360 }, { "epoch": 1.0441046881396057, "grad_norm": 0.345703125, "learning_rate": 1.434907848163732e-05, "loss": 1.9623, "step": 32361 }, { "epoch": 1.044136951993402, "grad_norm": 0.380859375, "learning_rate": 1.4348305902187286e-05, "loss": 1.958, "step": 32362 }, { "epoch": 1.0441692158471985, "grad_norm": 0.34765625, "learning_rate": 1.4347533324469338e-05, "loss": 1.9941, "step": 32363 }, { "epoch": 1.0442014797009946, "grad_norm": 0.388671875, "learning_rate": 1.4346760748485509e-05, "loss": 1.9888, "step": 32364 }, { "epoch": 1.044233743554791, "grad_norm": 0.359375, "learning_rate": 1.4345988174237867e-05, "loss": 1.9458, "step": 32365 }, { "epoch": 1.0442660074085874, "grad_norm": 0.37890625, "learning_rate": 1.4345215601728462e-05, "loss": 1.9303, "step": 32366 }, { "epoch": 1.0442982712623838, "grad_norm": 0.4140625, "learning_rate": 1.4344443030959348e-05, "loss": 1.9782, "step": 32367 }, { "epoch": 1.04433053511618, "grad_norm": 0.376953125, "learning_rate": 1.4343670461932575e-05, "loss": 1.9459, "step": 32368 }, { "epoch": 1.0443627989699764, "grad_norm": 0.40234375, "learning_rate": 1.4342897894650205e-05, "loss": 1.9803, "step": 32369 }, { "epoch": 1.0443950628237728, "grad_norm": 0.365234375, "learning_rate": 1.4342125329114282e-05, "loss": 2.0062, "step": 32370 }, { "epoch": 1.0444273266775692, "grad_norm": 0.396484375, "learning_rate": 1.434135276532686e-05, "loss": 1.9586, "step": 32371 }, { "epoch": 1.0444595905313656, "grad_norm": 0.39453125, "learning_rate": 1.4340580203289996e-05, "loss": 1.9833, "step": 32372 }, { "epoch": 1.0444918543851618, "grad_norm": 0.373046875, "learning_rate": 1.4339807643005743e-05, "loss": 1.9777, "step": 32373 }, { "epoch": 1.0445241182389582, "grad_norm": 0.353515625, "learning_rate": 1.4339035084476153e-05, "loss": 1.961, "step": 32374 }, { "epoch": 1.0445563820927546, "grad_norm": 0.365234375, "learning_rate": 1.4338262527703284e-05, "loss": 1.9788, "step": 32375 }, { "epoch": 1.044588645946551, "grad_norm": 0.365234375, "learning_rate": 1.433748997268918e-05, "loss": 1.9899, "step": 32376 }, { "epoch": 1.0446209098003472, "grad_norm": 0.375, "learning_rate": 1.43367174194359e-05, "loss": 1.9926, "step": 32377 }, { "epoch": 1.0446531736541436, "grad_norm": 0.349609375, "learning_rate": 1.4335944867945498e-05, "loss": 1.9723, "step": 32378 }, { "epoch": 1.04468543750794, "grad_norm": 0.36328125, "learning_rate": 1.4335172318220025e-05, "loss": 1.988, "step": 32379 }, { "epoch": 1.0447177013617364, "grad_norm": 0.390625, "learning_rate": 1.4334399770261535e-05, "loss": 1.9532, "step": 32380 }, { "epoch": 1.0447499652155325, "grad_norm": 0.365234375, "learning_rate": 1.4333627224072085e-05, "loss": 1.9067, "step": 32381 }, { "epoch": 1.044782229069329, "grad_norm": 0.3828125, "learning_rate": 1.4332854679653723e-05, "loss": 1.9626, "step": 32382 }, { "epoch": 1.0448144929231253, "grad_norm": 0.39453125, "learning_rate": 1.4332082137008503e-05, "loss": 1.9654, "step": 32383 }, { "epoch": 1.0448467567769217, "grad_norm": 0.359375, "learning_rate": 1.4331309596138479e-05, "loss": 1.9605, "step": 32384 }, { "epoch": 1.044879020630718, "grad_norm": 0.3671875, "learning_rate": 1.4330537057045705e-05, "loss": 1.9877, "step": 32385 }, { "epoch": 1.0449112844845143, "grad_norm": 0.365234375, "learning_rate": 1.432976451973223e-05, "loss": 1.9981, "step": 32386 }, { "epoch": 1.0449435483383107, "grad_norm": 0.359375, "learning_rate": 1.4328991984200123e-05, "loss": 1.9873, "step": 32387 }, { "epoch": 1.0449758121921071, "grad_norm": 0.369140625, "learning_rate": 1.4328219450451416e-05, "loss": 1.9859, "step": 32388 }, { "epoch": 1.0450080760459035, "grad_norm": 0.373046875, "learning_rate": 1.4327446918488174e-05, "loss": 1.9595, "step": 32389 }, { "epoch": 1.0450403398996997, "grad_norm": 0.345703125, "learning_rate": 1.4326674388312446e-05, "loss": 1.9501, "step": 32390 }, { "epoch": 1.045072603753496, "grad_norm": 0.353515625, "learning_rate": 1.4325901859926285e-05, "loss": 1.9762, "step": 32391 }, { "epoch": 1.0451048676072925, "grad_norm": 0.34765625, "learning_rate": 1.4325129333331751e-05, "loss": 1.9673, "step": 32392 }, { "epoch": 1.0451371314610889, "grad_norm": 0.35546875, "learning_rate": 1.4324356808530898e-05, "loss": 1.9847, "step": 32393 }, { "epoch": 1.045169395314885, "grad_norm": 0.349609375, "learning_rate": 1.4323584285525768e-05, "loss": 1.9846, "step": 32394 }, { "epoch": 1.0452016591686815, "grad_norm": 0.3359375, "learning_rate": 1.4322811764318418e-05, "loss": 1.9739, "step": 32395 }, { "epoch": 1.0452339230224779, "grad_norm": 0.35546875, "learning_rate": 1.43220392449109e-05, "loss": 1.9608, "step": 32396 }, { "epoch": 1.0452661868762743, "grad_norm": 0.341796875, "learning_rate": 1.4321266727305275e-05, "loss": 1.9851, "step": 32397 }, { "epoch": 1.0452984507300704, "grad_norm": 0.36328125, "learning_rate": 1.4320494211503593e-05, "loss": 1.9907, "step": 32398 }, { "epoch": 1.0453307145838668, "grad_norm": 0.353515625, "learning_rate": 1.4319721697507912e-05, "loss": 1.9723, "step": 32399 }, { "epoch": 1.0453629784376632, "grad_norm": 0.365234375, "learning_rate": 1.4318949185320272e-05, "loss": 1.9757, "step": 32400 }, { "epoch": 1.0453952422914596, "grad_norm": 0.353515625, "learning_rate": 1.4318176674942728e-05, "loss": 1.9914, "step": 32401 }, { "epoch": 1.0454275061452558, "grad_norm": 0.357421875, "learning_rate": 1.4317404166377343e-05, "loss": 1.9654, "step": 32402 }, { "epoch": 1.0454597699990522, "grad_norm": 0.34765625, "learning_rate": 1.4316631659626167e-05, "loss": 1.9811, "step": 32403 }, { "epoch": 1.0454920338528486, "grad_norm": 0.373046875, "learning_rate": 1.431585915469125e-05, "loss": 1.9659, "step": 32404 }, { "epoch": 1.045524297706645, "grad_norm": 0.34375, "learning_rate": 1.4315086651574655e-05, "loss": 1.9881, "step": 32405 }, { "epoch": 1.0455565615604412, "grad_norm": 0.341796875, "learning_rate": 1.4314314150278417e-05, "loss": 1.9462, "step": 32406 }, { "epoch": 1.0455888254142376, "grad_norm": 0.349609375, "learning_rate": 1.4313541650804602e-05, "loss": 2.0005, "step": 32407 }, { "epoch": 1.045621089268034, "grad_norm": 0.345703125, "learning_rate": 1.431276915315526e-05, "loss": 1.9603, "step": 32408 }, { "epoch": 1.0456533531218304, "grad_norm": 0.349609375, "learning_rate": 1.4311996657332445e-05, "loss": 1.9539, "step": 32409 }, { "epoch": 1.0456856169756268, "grad_norm": 0.337890625, "learning_rate": 1.431122416333821e-05, "loss": 1.975, "step": 32410 }, { "epoch": 1.045717880829423, "grad_norm": 0.345703125, "learning_rate": 1.4310451671174612e-05, "loss": 1.989, "step": 32411 }, { "epoch": 1.0457501446832194, "grad_norm": 0.33984375, "learning_rate": 1.4309679180843695e-05, "loss": 1.9619, "step": 32412 }, { "epoch": 1.0457824085370158, "grad_norm": 0.3359375, "learning_rate": 1.4308906692347517e-05, "loss": 1.9894, "step": 32413 }, { "epoch": 1.0458146723908122, "grad_norm": 0.349609375, "learning_rate": 1.4308134205688132e-05, "loss": 2.0009, "step": 32414 }, { "epoch": 1.0458469362446083, "grad_norm": 0.337890625, "learning_rate": 1.4307361720867593e-05, "loss": 1.9587, "step": 32415 }, { "epoch": 1.0458792000984047, "grad_norm": 0.353515625, "learning_rate": 1.4306589237887952e-05, "loss": 1.9739, "step": 32416 }, { "epoch": 1.0459114639522011, "grad_norm": 0.345703125, "learning_rate": 1.4305816756751266e-05, "loss": 1.9733, "step": 32417 }, { "epoch": 1.0459437278059975, "grad_norm": 0.3515625, "learning_rate": 1.430504427745958e-05, "loss": 2.0176, "step": 32418 }, { "epoch": 1.0459759916597937, "grad_norm": 0.3515625, "learning_rate": 1.4304271800014953e-05, "loss": 1.9882, "step": 32419 }, { "epoch": 1.0460082555135901, "grad_norm": 0.34765625, "learning_rate": 1.4303499324419438e-05, "loss": 1.9393, "step": 32420 }, { "epoch": 1.0460405193673865, "grad_norm": 0.357421875, "learning_rate": 1.4302726850675084e-05, "loss": 1.9692, "step": 32421 }, { "epoch": 1.046072783221183, "grad_norm": 0.333984375, "learning_rate": 1.4301954378783951e-05, "loss": 1.9762, "step": 32422 }, { "epoch": 1.046105047074979, "grad_norm": 0.34765625, "learning_rate": 1.4301181908748089e-05, "loss": 1.9606, "step": 32423 }, { "epoch": 1.0461373109287755, "grad_norm": 0.345703125, "learning_rate": 1.4300409440569548e-05, "loss": 1.979, "step": 32424 }, { "epoch": 1.0461695747825719, "grad_norm": 0.341796875, "learning_rate": 1.4299636974250384e-05, "loss": 1.9887, "step": 32425 }, { "epoch": 1.0462018386363683, "grad_norm": 0.35546875, "learning_rate": 1.4298864509792647e-05, "loss": 1.9797, "step": 32426 }, { "epoch": 1.0462341024901645, "grad_norm": 0.341796875, "learning_rate": 1.4298092047198393e-05, "loss": 1.9658, "step": 32427 }, { "epoch": 1.0462663663439609, "grad_norm": 0.33984375, "learning_rate": 1.4297319586469673e-05, "loss": 1.9662, "step": 32428 }, { "epoch": 1.0462986301977573, "grad_norm": 0.353515625, "learning_rate": 1.4296547127608542e-05, "loss": 1.9804, "step": 32429 }, { "epoch": 1.0463308940515537, "grad_norm": 0.34375, "learning_rate": 1.4295774670617062e-05, "loss": 1.9475, "step": 32430 }, { "epoch": 1.04636315790535, "grad_norm": 0.33984375, "learning_rate": 1.4295002215497269e-05, "loss": 1.9675, "step": 32431 }, { "epoch": 1.0463954217591462, "grad_norm": 0.34765625, "learning_rate": 1.4294229762251223e-05, "loss": 1.976, "step": 32432 }, { "epoch": 1.0464276856129426, "grad_norm": 0.341796875, "learning_rate": 1.4293457310880975e-05, "loss": 1.9667, "step": 32433 }, { "epoch": 1.046459949466739, "grad_norm": 0.3515625, "learning_rate": 1.4292684861388583e-05, "loss": 1.9748, "step": 32434 }, { "epoch": 1.0464922133205354, "grad_norm": 0.345703125, "learning_rate": 1.4291912413776099e-05, "loss": 1.9617, "step": 32435 }, { "epoch": 1.0465244771743316, "grad_norm": 0.35546875, "learning_rate": 1.4291139968045581e-05, "loss": 1.9904, "step": 32436 }, { "epoch": 1.046556741028128, "grad_norm": 0.34375, "learning_rate": 1.429036752419907e-05, "loss": 1.9762, "step": 32437 }, { "epoch": 1.0465890048819244, "grad_norm": 0.341796875, "learning_rate": 1.428959508223862e-05, "loss": 1.9909, "step": 32438 }, { "epoch": 1.0466212687357208, "grad_norm": 0.37109375, "learning_rate": 1.4288822642166292e-05, "loss": 1.989, "step": 32439 }, { "epoch": 1.046653532589517, "grad_norm": 0.341796875, "learning_rate": 1.4288050203984137e-05, "loss": 1.9741, "step": 32440 }, { "epoch": 1.0466857964433134, "grad_norm": 0.353515625, "learning_rate": 1.4287277767694207e-05, "loss": 1.9707, "step": 32441 }, { "epoch": 1.0467180602971098, "grad_norm": 0.337890625, "learning_rate": 1.4286505333298557e-05, "loss": 1.9822, "step": 32442 }, { "epoch": 1.0467503241509062, "grad_norm": 0.341796875, "learning_rate": 1.4285732900799233e-05, "loss": 1.9685, "step": 32443 }, { "epoch": 1.0467825880047024, "grad_norm": 0.380859375, "learning_rate": 1.4284960470198296e-05, "loss": 1.9792, "step": 32444 }, { "epoch": 1.0468148518584988, "grad_norm": 0.388671875, "learning_rate": 1.4284188041497794e-05, "loss": 1.9778, "step": 32445 }, { "epoch": 1.0468471157122952, "grad_norm": 0.349609375, "learning_rate": 1.4283415614699783e-05, "loss": 1.9739, "step": 32446 }, { "epoch": 1.0468793795660916, "grad_norm": 0.365234375, "learning_rate": 1.4282643189806311e-05, "loss": 1.9634, "step": 32447 }, { "epoch": 1.0469116434198877, "grad_norm": 0.35546875, "learning_rate": 1.4281870766819441e-05, "loss": 1.9641, "step": 32448 }, { "epoch": 1.0469439072736841, "grad_norm": 0.34375, "learning_rate": 1.4281098345741215e-05, "loss": 1.9675, "step": 32449 }, { "epoch": 1.0469761711274805, "grad_norm": 0.359375, "learning_rate": 1.4280325926573691e-05, "loss": 1.9752, "step": 32450 }, { "epoch": 1.047008434981277, "grad_norm": 0.34765625, "learning_rate": 1.4279553509318922e-05, "loss": 1.9895, "step": 32451 }, { "epoch": 1.0470406988350733, "grad_norm": 0.357421875, "learning_rate": 1.427878109397896e-05, "loss": 2.0016, "step": 32452 }, { "epoch": 1.0470729626888695, "grad_norm": 0.35546875, "learning_rate": 1.4278008680555858e-05, "loss": 1.9847, "step": 32453 }, { "epoch": 1.047105226542666, "grad_norm": 0.349609375, "learning_rate": 1.4277236269051672e-05, "loss": 1.9945, "step": 32454 }, { "epoch": 1.0471374903964623, "grad_norm": 0.34375, "learning_rate": 1.427646385946845e-05, "loss": 1.9905, "step": 32455 }, { "epoch": 1.0471697542502587, "grad_norm": 0.345703125, "learning_rate": 1.4275691451808245e-05, "loss": 1.9426, "step": 32456 }, { "epoch": 1.0472020181040549, "grad_norm": 0.361328125, "learning_rate": 1.4274919046073114e-05, "loss": 1.9822, "step": 32457 }, { "epoch": 1.0472342819578513, "grad_norm": 0.353515625, "learning_rate": 1.4274146642265106e-05, "loss": 1.972, "step": 32458 }, { "epoch": 1.0472665458116477, "grad_norm": 0.349609375, "learning_rate": 1.4273374240386279e-05, "loss": 1.944, "step": 32459 }, { "epoch": 1.047298809665444, "grad_norm": 0.34375, "learning_rate": 1.4272601840438683e-05, "loss": 1.9344, "step": 32460 }, { "epoch": 1.0473310735192403, "grad_norm": 0.337890625, "learning_rate": 1.4271829442424365e-05, "loss": 1.9773, "step": 32461 }, { "epoch": 1.0473633373730367, "grad_norm": 0.34765625, "learning_rate": 1.4271057046345388e-05, "loss": 1.9959, "step": 32462 }, { "epoch": 1.047395601226833, "grad_norm": 0.3515625, "learning_rate": 1.4270284652203798e-05, "loss": 1.981, "step": 32463 }, { "epoch": 1.0474278650806295, "grad_norm": 0.34375, "learning_rate": 1.4269512260001651e-05, "loss": 2.0036, "step": 32464 }, { "epoch": 1.0474601289344256, "grad_norm": 0.33984375, "learning_rate": 1.4268739869740995e-05, "loss": 1.9832, "step": 32465 }, { "epoch": 1.047492392788222, "grad_norm": 0.3515625, "learning_rate": 1.4267967481423899e-05, "loss": 2.0, "step": 32466 }, { "epoch": 1.0475246566420184, "grad_norm": 0.345703125, "learning_rate": 1.4267195095052395e-05, "loss": 1.9737, "step": 32467 }, { "epoch": 1.0475569204958148, "grad_norm": 0.373046875, "learning_rate": 1.4266422710628545e-05, "loss": 1.9757, "step": 32468 }, { "epoch": 1.047589184349611, "grad_norm": 0.3515625, "learning_rate": 1.42656503281544e-05, "loss": 2.0119, "step": 32469 }, { "epoch": 1.0476214482034074, "grad_norm": 0.376953125, "learning_rate": 1.4264877947632015e-05, "loss": 1.9883, "step": 32470 }, { "epoch": 1.0476537120572038, "grad_norm": 0.3515625, "learning_rate": 1.4264105569063442e-05, "loss": 2.0211, "step": 32471 }, { "epoch": 1.0476859759110002, "grad_norm": 0.35546875, "learning_rate": 1.4263333192450742e-05, "loss": 1.9692, "step": 32472 }, { "epoch": 1.0477182397647966, "grad_norm": 0.359375, "learning_rate": 1.4262560817795952e-05, "loss": 1.9863, "step": 32473 }, { "epoch": 1.0477505036185928, "grad_norm": 0.35546875, "learning_rate": 1.4261788445101131e-05, "loss": 1.9593, "step": 32474 }, { "epoch": 1.0477827674723892, "grad_norm": 0.3515625, "learning_rate": 1.4261016074368336e-05, "loss": 1.9823, "step": 32475 }, { "epoch": 1.0478150313261856, "grad_norm": 0.349609375, "learning_rate": 1.4260243705599616e-05, "loss": 1.9551, "step": 32476 }, { "epoch": 1.047847295179982, "grad_norm": 0.34375, "learning_rate": 1.4259471338797026e-05, "loss": 1.9693, "step": 32477 }, { "epoch": 1.0478795590337782, "grad_norm": 0.34375, "learning_rate": 1.4258698973962626e-05, "loss": 1.9598, "step": 32478 }, { "epoch": 1.0479118228875746, "grad_norm": 0.341796875, "learning_rate": 1.4257926611098448e-05, "loss": 1.9461, "step": 32479 }, { "epoch": 1.047944086741371, "grad_norm": 0.353515625, "learning_rate": 1.4257154250206562e-05, "loss": 1.9737, "step": 32480 }, { "epoch": 1.0479763505951674, "grad_norm": 0.33203125, "learning_rate": 1.4256381891289015e-05, "loss": 1.9762, "step": 32481 }, { "epoch": 1.0480086144489635, "grad_norm": 0.349609375, "learning_rate": 1.4255609534347864e-05, "loss": 1.9647, "step": 32482 }, { "epoch": 1.04804087830276, "grad_norm": 0.35546875, "learning_rate": 1.4254837179385157e-05, "loss": 1.9895, "step": 32483 }, { "epoch": 1.0480731421565563, "grad_norm": 0.3359375, "learning_rate": 1.4254064826402951e-05, "loss": 1.9605, "step": 32484 }, { "epoch": 1.0481054060103527, "grad_norm": 0.359375, "learning_rate": 1.4253292475403296e-05, "loss": 1.9783, "step": 32485 }, { "epoch": 1.048137669864149, "grad_norm": 0.337890625, "learning_rate": 1.4252520126388242e-05, "loss": 1.955, "step": 32486 }, { "epoch": 1.0481699337179453, "grad_norm": 0.353515625, "learning_rate": 1.4251747779359846e-05, "loss": 1.9783, "step": 32487 }, { "epoch": 1.0482021975717417, "grad_norm": 0.359375, "learning_rate": 1.425097543432016e-05, "loss": 1.9488, "step": 32488 }, { "epoch": 1.048234461425538, "grad_norm": 0.33984375, "learning_rate": 1.4250203091271235e-05, "loss": 1.9933, "step": 32489 }, { "epoch": 1.0482667252793343, "grad_norm": 0.37890625, "learning_rate": 1.424943075021513e-05, "loss": 1.9597, "step": 32490 }, { "epoch": 1.0482989891331307, "grad_norm": 0.349609375, "learning_rate": 1.4248658411153887e-05, "loss": 1.9748, "step": 32491 }, { "epoch": 1.048331252986927, "grad_norm": 0.40625, "learning_rate": 1.4247886074089568e-05, "loss": 1.9671, "step": 32492 }, { "epoch": 1.0483635168407235, "grad_norm": 0.341796875, "learning_rate": 1.4247113739024219e-05, "loss": 2.0015, "step": 32493 }, { "epoch": 1.0483957806945199, "grad_norm": 0.337890625, "learning_rate": 1.4246341405959898e-05, "loss": 1.9705, "step": 32494 }, { "epoch": 1.048428044548316, "grad_norm": 0.361328125, "learning_rate": 1.4245569074898654e-05, "loss": 1.9819, "step": 32495 }, { "epoch": 1.0484603084021125, "grad_norm": 0.34375, "learning_rate": 1.4244796745842548e-05, "loss": 1.98, "step": 32496 }, { "epoch": 1.0484925722559089, "grad_norm": 0.369140625, "learning_rate": 1.424402441879362e-05, "loss": 1.9812, "step": 32497 }, { "epoch": 1.0485248361097053, "grad_norm": 0.37890625, "learning_rate": 1.4243252093753928e-05, "loss": 1.9186, "step": 32498 }, { "epoch": 1.0485570999635014, "grad_norm": 0.36328125, "learning_rate": 1.4242479770725528e-05, "loss": 1.9695, "step": 32499 }, { "epoch": 1.0485893638172978, "grad_norm": 0.369140625, "learning_rate": 1.4241707449710468e-05, "loss": 1.9793, "step": 32500 }, { "epoch": 1.0486216276710942, "grad_norm": 0.390625, "learning_rate": 1.4240935130710804e-05, "loss": 1.9876, "step": 32501 }, { "epoch": 1.0486538915248906, "grad_norm": 0.337890625, "learning_rate": 1.4240162813728592e-05, "loss": 1.9617, "step": 32502 }, { "epoch": 1.0486861553786868, "grad_norm": 0.388671875, "learning_rate": 1.4239390498765873e-05, "loss": 1.9844, "step": 32503 }, { "epoch": 1.0487184192324832, "grad_norm": 0.353515625, "learning_rate": 1.423861818582471e-05, "loss": 1.9693, "step": 32504 }, { "epoch": 1.0487506830862796, "grad_norm": 0.35546875, "learning_rate": 1.4237845874907152e-05, "loss": 1.9695, "step": 32505 }, { "epoch": 1.048782946940076, "grad_norm": 0.390625, "learning_rate": 1.4237073566015253e-05, "loss": 1.9716, "step": 32506 }, { "epoch": 1.0488152107938722, "grad_norm": 0.357421875, "learning_rate": 1.423630125915106e-05, "loss": 1.9946, "step": 32507 }, { "epoch": 1.0488474746476686, "grad_norm": 0.359375, "learning_rate": 1.423552895431664e-05, "loss": 1.9687, "step": 32508 }, { "epoch": 1.048879738501465, "grad_norm": 0.376953125, "learning_rate": 1.423475665151403e-05, "loss": 1.9611, "step": 32509 }, { "epoch": 1.0489120023552614, "grad_norm": 0.3671875, "learning_rate": 1.423398435074529e-05, "loss": 1.9935, "step": 32510 }, { "epoch": 1.0489442662090576, "grad_norm": 0.353515625, "learning_rate": 1.4233212052012467e-05, "loss": 1.9611, "step": 32511 }, { "epoch": 1.048976530062854, "grad_norm": 0.353515625, "learning_rate": 1.4232439755317622e-05, "loss": 1.9498, "step": 32512 }, { "epoch": 1.0490087939166504, "grad_norm": 0.357421875, "learning_rate": 1.4231667460662803e-05, "loss": 1.9781, "step": 32513 }, { "epoch": 1.0490410577704468, "grad_norm": 0.3671875, "learning_rate": 1.4230895168050069e-05, "loss": 1.9652, "step": 32514 }, { "epoch": 1.0490733216242432, "grad_norm": 0.373046875, "learning_rate": 1.4230122877481462e-05, "loss": 1.994, "step": 32515 }, { "epoch": 1.0491055854780393, "grad_norm": 0.37109375, "learning_rate": 1.4229350588959035e-05, "loss": 1.9358, "step": 32516 }, { "epoch": 1.0491378493318357, "grad_norm": 0.3671875, "learning_rate": 1.4228578302484848e-05, "loss": 1.98, "step": 32517 }, { "epoch": 1.0491701131856321, "grad_norm": 0.365234375, "learning_rate": 1.422780601806095e-05, "loss": 1.9974, "step": 32518 }, { "epoch": 1.0492023770394285, "grad_norm": 0.361328125, "learning_rate": 1.4227033735689395e-05, "loss": 1.9821, "step": 32519 }, { "epoch": 1.0492346408932247, "grad_norm": 0.361328125, "learning_rate": 1.422626145537224e-05, "loss": 1.9667, "step": 32520 }, { "epoch": 1.0492669047470211, "grad_norm": 0.50390625, "learning_rate": 1.4225489177111526e-05, "loss": 1.9455, "step": 32521 }, { "epoch": 1.0492991686008175, "grad_norm": 0.373046875, "learning_rate": 1.4224716900909313e-05, "loss": 1.9666, "step": 32522 }, { "epoch": 1.049331432454614, "grad_norm": 0.3515625, "learning_rate": 1.4223944626767652e-05, "loss": 1.977, "step": 32523 }, { "epoch": 1.04936369630841, "grad_norm": 0.3515625, "learning_rate": 1.4223172354688597e-05, "loss": 1.9588, "step": 32524 }, { "epoch": 1.0493959601622065, "grad_norm": 0.365234375, "learning_rate": 1.4222400084674199e-05, "loss": 1.9466, "step": 32525 }, { "epoch": 1.0494282240160029, "grad_norm": 0.361328125, "learning_rate": 1.4221627816726513e-05, "loss": 1.9707, "step": 32526 }, { "epoch": 1.0494604878697993, "grad_norm": 0.33984375, "learning_rate": 1.4220855550847589e-05, "loss": 1.9506, "step": 32527 }, { "epoch": 1.0494927517235955, "grad_norm": 0.36328125, "learning_rate": 1.4220083287039477e-05, "loss": 1.9466, "step": 32528 }, { "epoch": 1.0495250155773919, "grad_norm": 0.34765625, "learning_rate": 1.4219311025304235e-05, "loss": 1.9631, "step": 32529 }, { "epoch": 1.0495572794311883, "grad_norm": 0.34375, "learning_rate": 1.4218538765643913e-05, "loss": 1.9579, "step": 32530 }, { "epoch": 1.0495895432849847, "grad_norm": 0.3515625, "learning_rate": 1.4217766508060563e-05, "loss": 1.9828, "step": 32531 }, { "epoch": 1.0496218071387808, "grad_norm": 0.3515625, "learning_rate": 1.4216994252556243e-05, "loss": 1.9448, "step": 32532 }, { "epoch": 1.0496540709925772, "grad_norm": 0.33203125, "learning_rate": 1.4216221999132995e-05, "loss": 1.9946, "step": 32533 }, { "epoch": 1.0496863348463736, "grad_norm": 0.36328125, "learning_rate": 1.4215449747792878e-05, "loss": 1.9363, "step": 32534 }, { "epoch": 1.04971859870017, "grad_norm": 0.33984375, "learning_rate": 1.4214677498537945e-05, "loss": 1.9742, "step": 32535 }, { "epoch": 1.0497508625539664, "grad_norm": 0.361328125, "learning_rate": 1.4213905251370247e-05, "loss": 1.9949, "step": 32536 }, { "epoch": 1.0497831264077626, "grad_norm": 0.328125, "learning_rate": 1.4213133006291835e-05, "loss": 1.967, "step": 32537 }, { "epoch": 1.049815390261559, "grad_norm": 0.34765625, "learning_rate": 1.4212360763304767e-05, "loss": 1.9831, "step": 32538 }, { "epoch": 1.0498476541153554, "grad_norm": 0.34765625, "learning_rate": 1.4211588522411089e-05, "loss": 1.9489, "step": 32539 }, { "epoch": 1.0498799179691518, "grad_norm": 0.341796875, "learning_rate": 1.4210816283612857e-05, "loss": 1.9659, "step": 32540 }, { "epoch": 1.049912181822948, "grad_norm": 0.369140625, "learning_rate": 1.4210044046912122e-05, "loss": 1.9511, "step": 32541 }, { "epoch": 1.0499444456767444, "grad_norm": 0.33984375, "learning_rate": 1.4209271812310936e-05, "loss": 1.979, "step": 32542 }, { "epoch": 1.0499767095305408, "grad_norm": 0.35546875, "learning_rate": 1.420849957981135e-05, "loss": 1.9768, "step": 32543 }, { "epoch": 1.0500089733843372, "grad_norm": 0.357421875, "learning_rate": 1.4207727349415428e-05, "loss": 1.9767, "step": 32544 }, { "epoch": 1.0500412372381334, "grad_norm": 0.353515625, "learning_rate": 1.4206955121125208e-05, "loss": 1.9892, "step": 32545 }, { "epoch": 1.0500735010919298, "grad_norm": 0.337890625, "learning_rate": 1.4206182894942747e-05, "loss": 1.9703, "step": 32546 }, { "epoch": 1.0501057649457262, "grad_norm": 0.359375, "learning_rate": 1.4205410670870098e-05, "loss": 1.9949, "step": 32547 }, { "epoch": 1.0501380287995226, "grad_norm": 0.35546875, "learning_rate": 1.4204638448909312e-05, "loss": 1.9856, "step": 32548 }, { "epoch": 1.0501702926533187, "grad_norm": 0.349609375, "learning_rate": 1.4203866229062444e-05, "loss": 1.9796, "step": 32549 }, { "epoch": 1.0502025565071151, "grad_norm": 0.361328125, "learning_rate": 1.4203094011331554e-05, "loss": 1.937, "step": 32550 }, { "epoch": 1.0502348203609115, "grad_norm": 0.365234375, "learning_rate": 1.4202321795718679e-05, "loss": 1.9851, "step": 32551 }, { "epoch": 1.050267084214708, "grad_norm": 0.35546875, "learning_rate": 1.4201549582225877e-05, "loss": 1.9706, "step": 32552 }, { "epoch": 1.0502993480685043, "grad_norm": 0.349609375, "learning_rate": 1.4200777370855199e-05, "loss": 1.9158, "step": 32553 }, { "epoch": 1.0503316119223005, "grad_norm": 0.349609375, "learning_rate": 1.4200005161608704e-05, "loss": 1.982, "step": 32554 }, { "epoch": 1.050363875776097, "grad_norm": 0.34765625, "learning_rate": 1.419923295448844e-05, "loss": 1.9956, "step": 32555 }, { "epoch": 1.0503961396298933, "grad_norm": 0.35546875, "learning_rate": 1.4198460749496467e-05, "loss": 1.9762, "step": 32556 }, { "epoch": 1.0504284034836897, "grad_norm": 0.341796875, "learning_rate": 1.4197688546634822e-05, "loss": 1.9633, "step": 32557 }, { "epoch": 1.0504606673374859, "grad_norm": 0.365234375, "learning_rate": 1.4196916345905564e-05, "loss": 1.9885, "step": 32558 }, { "epoch": 1.0504929311912823, "grad_norm": 0.357421875, "learning_rate": 1.4196144147310749e-05, "loss": 1.9558, "step": 32559 }, { "epoch": 1.0505251950450787, "grad_norm": 0.3359375, "learning_rate": 1.4195371950852427e-05, "loss": 1.9581, "step": 32560 }, { "epoch": 1.050557458898875, "grad_norm": 0.3515625, "learning_rate": 1.4194599756532652e-05, "loss": 1.9791, "step": 32561 }, { "epoch": 1.0505897227526713, "grad_norm": 0.35546875, "learning_rate": 1.4193827564353474e-05, "loss": 1.9772, "step": 32562 }, { "epoch": 1.0506219866064677, "grad_norm": 0.353515625, "learning_rate": 1.4193055374316951e-05, "loss": 2.0001, "step": 32563 }, { "epoch": 1.050654250460264, "grad_norm": 0.365234375, "learning_rate": 1.4192283186425126e-05, "loss": 1.9593, "step": 32564 }, { "epoch": 1.0506865143140605, "grad_norm": 0.369140625, "learning_rate": 1.4191511000680057e-05, "loss": 1.9623, "step": 32565 }, { "epoch": 1.0507187781678566, "grad_norm": 0.369140625, "learning_rate": 1.4190738817083795e-05, "loss": 1.9906, "step": 32566 }, { "epoch": 1.050751042021653, "grad_norm": 0.36328125, "learning_rate": 1.418996663563839e-05, "loss": 1.9587, "step": 32567 }, { "epoch": 1.0507833058754494, "grad_norm": 0.341796875, "learning_rate": 1.41891944563459e-05, "loss": 1.9656, "step": 32568 }, { "epoch": 1.0508155697292458, "grad_norm": 0.359375, "learning_rate": 1.4188422279208378e-05, "loss": 1.9737, "step": 32569 }, { "epoch": 1.050847833583042, "grad_norm": 0.359375, "learning_rate": 1.4187650104227868e-05, "loss": 1.9775, "step": 32570 }, { "epoch": 1.0508800974368384, "grad_norm": 0.34375, "learning_rate": 1.4186877931406424e-05, "loss": 1.9838, "step": 32571 }, { "epoch": 1.0509123612906348, "grad_norm": 0.375, "learning_rate": 1.4186105760746104e-05, "loss": 1.9643, "step": 32572 }, { "epoch": 1.0509446251444312, "grad_norm": 0.35546875, "learning_rate": 1.4185333592248958e-05, "loss": 1.9446, "step": 32573 }, { "epoch": 1.0509768889982276, "grad_norm": 0.36328125, "learning_rate": 1.4184561425917036e-05, "loss": 1.9774, "step": 32574 }, { "epoch": 1.0510091528520238, "grad_norm": 0.361328125, "learning_rate": 1.4183789261752396e-05, "loss": 1.963, "step": 32575 }, { "epoch": 1.0510414167058202, "grad_norm": 0.3515625, "learning_rate": 1.4183017099757084e-05, "loss": 1.9756, "step": 32576 }, { "epoch": 1.0510736805596166, "grad_norm": 0.353515625, "learning_rate": 1.4182244939933152e-05, "loss": 1.959, "step": 32577 }, { "epoch": 1.051105944413413, "grad_norm": 0.34765625, "learning_rate": 1.4181472782282657e-05, "loss": 1.9698, "step": 32578 }, { "epoch": 1.0511382082672092, "grad_norm": 0.34765625, "learning_rate": 1.4180700626807648e-05, "loss": 1.9845, "step": 32579 }, { "epoch": 1.0511704721210056, "grad_norm": 0.349609375, "learning_rate": 1.4179928473510174e-05, "loss": 2.0089, "step": 32580 }, { "epoch": 1.051202735974802, "grad_norm": 0.35546875, "learning_rate": 1.4179156322392301e-05, "loss": 1.9901, "step": 32581 }, { "epoch": 1.0512349998285984, "grad_norm": 0.353515625, "learning_rate": 1.4178384173456067e-05, "loss": 1.986, "step": 32582 }, { "epoch": 1.0512672636823945, "grad_norm": 0.375, "learning_rate": 1.4177612026703528e-05, "loss": 1.9786, "step": 32583 }, { "epoch": 1.051299527536191, "grad_norm": 0.359375, "learning_rate": 1.4176839882136737e-05, "loss": 1.9464, "step": 32584 }, { "epoch": 1.0513317913899873, "grad_norm": 0.34765625, "learning_rate": 1.4176067739757742e-05, "loss": 1.974, "step": 32585 }, { "epoch": 1.0513640552437837, "grad_norm": 0.361328125, "learning_rate": 1.4175295599568603e-05, "loss": 1.9628, "step": 32586 }, { "epoch": 1.05139631909758, "grad_norm": 0.3359375, "learning_rate": 1.4174523461571378e-05, "loss": 1.9612, "step": 32587 }, { "epoch": 1.0514285829513763, "grad_norm": 0.34765625, "learning_rate": 1.41737513257681e-05, "loss": 1.9745, "step": 32588 }, { "epoch": 1.0514608468051727, "grad_norm": 0.34375, "learning_rate": 1.4172979192160832e-05, "loss": 1.9717, "step": 32589 }, { "epoch": 1.051493110658969, "grad_norm": 0.33984375, "learning_rate": 1.4172207060751623e-05, "loss": 1.986, "step": 32590 }, { "epoch": 1.0515253745127655, "grad_norm": 0.349609375, "learning_rate": 1.417143493154253e-05, "loss": 1.9754, "step": 32591 }, { "epoch": 1.0515576383665617, "grad_norm": 0.35546875, "learning_rate": 1.4170662804535603e-05, "loss": 1.9676, "step": 32592 }, { "epoch": 1.051589902220358, "grad_norm": 0.341796875, "learning_rate": 1.41698906797329e-05, "loss": 1.9444, "step": 32593 }, { "epoch": 1.0516221660741545, "grad_norm": 0.34765625, "learning_rate": 1.4169118557136457e-05, "loss": 1.9782, "step": 32594 }, { "epoch": 1.0516544299279509, "grad_norm": 0.404296875, "learning_rate": 1.4168346436748338e-05, "loss": 1.9819, "step": 32595 }, { "epoch": 1.051686693781747, "grad_norm": 0.34765625, "learning_rate": 1.4167574318570594e-05, "loss": 1.9797, "step": 32596 }, { "epoch": 1.0517189576355435, "grad_norm": 0.357421875, "learning_rate": 1.4166802202605277e-05, "loss": 1.9867, "step": 32597 }, { "epoch": 1.0517512214893399, "grad_norm": 0.35546875, "learning_rate": 1.4166030088854439e-05, "loss": 1.9792, "step": 32598 }, { "epoch": 1.0517834853431363, "grad_norm": 0.375, "learning_rate": 1.4165257977320135e-05, "loss": 1.97, "step": 32599 }, { "epoch": 1.0518157491969324, "grad_norm": 0.353515625, "learning_rate": 1.416448586800441e-05, "loss": 1.9706, "step": 32600 }, { "epoch": 1.0518480130507288, "grad_norm": 0.388671875, "learning_rate": 1.4163713760909319e-05, "loss": 1.9744, "step": 32601 }, { "epoch": 1.0518802769045252, "grad_norm": 0.408203125, "learning_rate": 1.4162941656036914e-05, "loss": 1.9659, "step": 32602 }, { "epoch": 1.0519125407583216, "grad_norm": 0.37109375, "learning_rate": 1.4162169553389252e-05, "loss": 1.9692, "step": 32603 }, { "epoch": 1.0519448046121178, "grad_norm": 0.3984375, "learning_rate": 1.4161397452968378e-05, "loss": 1.9632, "step": 32604 }, { "epoch": 1.0519770684659142, "grad_norm": 0.365234375, "learning_rate": 1.416062535477635e-05, "loss": 1.9976, "step": 32605 }, { "epoch": 1.0520093323197106, "grad_norm": 0.404296875, "learning_rate": 1.4159853258815216e-05, "loss": 1.9456, "step": 32606 }, { "epoch": 1.052041596173507, "grad_norm": 0.38671875, "learning_rate": 1.4159081165087029e-05, "loss": 1.9789, "step": 32607 }, { "epoch": 1.0520738600273032, "grad_norm": 0.373046875, "learning_rate": 1.4158309073593842e-05, "loss": 1.9952, "step": 32608 }, { "epoch": 1.0521061238810996, "grad_norm": 0.369140625, "learning_rate": 1.4157536984337706e-05, "loss": 1.9904, "step": 32609 }, { "epoch": 1.052138387734896, "grad_norm": 0.400390625, "learning_rate": 1.4156764897320673e-05, "loss": 1.9699, "step": 32610 }, { "epoch": 1.0521706515886924, "grad_norm": 0.357421875, "learning_rate": 1.4155992812544799e-05, "loss": 1.977, "step": 32611 }, { "epoch": 1.0522029154424888, "grad_norm": 0.3984375, "learning_rate": 1.415522073001213e-05, "loss": 1.9572, "step": 32612 }, { "epoch": 1.052235179296285, "grad_norm": 0.373046875, "learning_rate": 1.4154448649724722e-05, "loss": 1.981, "step": 32613 }, { "epoch": 1.0522674431500814, "grad_norm": 0.384765625, "learning_rate": 1.4153676571684624e-05, "loss": 1.9813, "step": 32614 }, { "epoch": 1.0522997070038778, "grad_norm": 0.3828125, "learning_rate": 1.415290449589389e-05, "loss": 1.9575, "step": 32615 }, { "epoch": 1.0523319708576742, "grad_norm": 0.359375, "learning_rate": 1.4152132422354572e-05, "loss": 1.932, "step": 32616 }, { "epoch": 1.0523642347114703, "grad_norm": 0.3515625, "learning_rate": 1.4151360351068726e-05, "loss": 1.9337, "step": 32617 }, { "epoch": 1.0523964985652667, "grad_norm": 0.375, "learning_rate": 1.4150588282038397e-05, "loss": 2.0054, "step": 32618 }, { "epoch": 1.0524287624190631, "grad_norm": 0.3515625, "learning_rate": 1.4149816215265638e-05, "loss": 1.9874, "step": 32619 }, { "epoch": 1.0524610262728595, "grad_norm": 0.359375, "learning_rate": 1.4149044150752505e-05, "loss": 2.0005, "step": 32620 }, { "epoch": 1.0524932901266557, "grad_norm": 0.353515625, "learning_rate": 1.4148272088501048e-05, "loss": 1.9914, "step": 32621 }, { "epoch": 1.052525553980452, "grad_norm": 0.36328125, "learning_rate": 1.4147500028513314e-05, "loss": 1.9565, "step": 32622 }, { "epoch": 1.0525578178342485, "grad_norm": 0.36328125, "learning_rate": 1.4146727970791371e-05, "loss": 1.9784, "step": 32623 }, { "epoch": 1.052590081688045, "grad_norm": 0.345703125, "learning_rate": 1.4145955915337252e-05, "loss": 1.9653, "step": 32624 }, { "epoch": 1.052622345541841, "grad_norm": 0.34765625, "learning_rate": 1.4145183862153017e-05, "loss": 1.9974, "step": 32625 }, { "epoch": 1.0526546093956375, "grad_norm": 0.33984375, "learning_rate": 1.414441181124072e-05, "loss": 1.9788, "step": 32626 }, { "epoch": 1.0526868732494339, "grad_norm": 0.34765625, "learning_rate": 1.4143639762602407e-05, "loss": 1.9618, "step": 32627 }, { "epoch": 1.0527191371032303, "grad_norm": 0.3515625, "learning_rate": 1.4142867716240135e-05, "loss": 1.993, "step": 32628 }, { "epoch": 1.0527514009570265, "grad_norm": 0.35546875, "learning_rate": 1.4142095672155963e-05, "loss": 1.9768, "step": 32629 }, { "epoch": 1.0527836648108229, "grad_norm": 0.349609375, "learning_rate": 1.4141323630351927e-05, "loss": 1.952, "step": 32630 }, { "epoch": 1.0528159286646193, "grad_norm": 0.345703125, "learning_rate": 1.4140551590830084e-05, "loss": 1.9382, "step": 32631 }, { "epoch": 1.0528481925184157, "grad_norm": 0.359375, "learning_rate": 1.4139779553592491e-05, "loss": 1.9951, "step": 32632 }, { "epoch": 1.052880456372212, "grad_norm": 0.353515625, "learning_rate": 1.4139007518641198e-05, "loss": 1.9741, "step": 32633 }, { "epoch": 1.0529127202260082, "grad_norm": 0.34765625, "learning_rate": 1.4138235485978257e-05, "loss": 1.9746, "step": 32634 }, { "epoch": 1.0529449840798046, "grad_norm": 0.357421875, "learning_rate": 1.4137463455605726e-05, "loss": 1.9841, "step": 32635 }, { "epoch": 1.052977247933601, "grad_norm": 0.349609375, "learning_rate": 1.413669142752564e-05, "loss": 1.9845, "step": 32636 }, { "epoch": 1.0530095117873974, "grad_norm": 0.3359375, "learning_rate": 1.4135919401740065e-05, "loss": 1.9982, "step": 32637 }, { "epoch": 1.0530417756411936, "grad_norm": 0.365234375, "learning_rate": 1.4135147378251046e-05, "loss": 1.9394, "step": 32638 }, { "epoch": 1.05307403949499, "grad_norm": 0.341796875, "learning_rate": 1.4134375357060642e-05, "loss": 1.9558, "step": 32639 }, { "epoch": 1.0531063033487864, "grad_norm": 0.3515625, "learning_rate": 1.4133603338170897e-05, "loss": 1.9777, "step": 32640 }, { "epoch": 1.0531385672025828, "grad_norm": 0.33203125, "learning_rate": 1.4132831321583873e-05, "loss": 1.9619, "step": 32641 }, { "epoch": 1.053170831056379, "grad_norm": 0.345703125, "learning_rate": 1.4132059307301611e-05, "loss": 1.954, "step": 32642 }, { "epoch": 1.0532030949101754, "grad_norm": 0.341796875, "learning_rate": 1.4131287295326166e-05, "loss": 1.9644, "step": 32643 }, { "epoch": 1.0532353587639718, "grad_norm": 0.349609375, "learning_rate": 1.4130515285659594e-05, "loss": 1.9761, "step": 32644 }, { "epoch": 1.0532676226177682, "grad_norm": 0.349609375, "learning_rate": 1.4129743278303941e-05, "loss": 1.9831, "step": 32645 }, { "epoch": 1.0532998864715644, "grad_norm": 0.33984375, "learning_rate": 1.4128971273261266e-05, "loss": 1.9821, "step": 32646 }, { "epoch": 1.0533321503253608, "grad_norm": 0.34375, "learning_rate": 1.4128199270533618e-05, "loss": 1.9696, "step": 32647 }, { "epoch": 1.0533644141791572, "grad_norm": 0.33984375, "learning_rate": 1.4127427270123042e-05, "loss": 1.9187, "step": 32648 }, { "epoch": 1.0533966780329536, "grad_norm": 0.3515625, "learning_rate": 1.4126655272031597e-05, "loss": 1.9838, "step": 32649 }, { "epoch": 1.0534289418867497, "grad_norm": 0.341796875, "learning_rate": 1.4125883276261334e-05, "loss": 1.9925, "step": 32650 }, { "epoch": 1.0534612057405461, "grad_norm": 0.34375, "learning_rate": 1.4125111282814304e-05, "loss": 1.954, "step": 32651 }, { "epoch": 1.0534934695943425, "grad_norm": 0.330078125, "learning_rate": 1.4124339291692558e-05, "loss": 1.9581, "step": 32652 }, { "epoch": 1.053525733448139, "grad_norm": 0.34375, "learning_rate": 1.4123567302898153e-05, "loss": 1.9491, "step": 32653 }, { "epoch": 1.0535579973019353, "grad_norm": 0.3359375, "learning_rate": 1.4122795316433133e-05, "loss": 1.9609, "step": 32654 }, { "epoch": 1.0535902611557315, "grad_norm": 0.34765625, "learning_rate": 1.4122023332299552e-05, "loss": 1.9928, "step": 32655 }, { "epoch": 1.053622525009528, "grad_norm": 0.33984375, "learning_rate": 1.4121251350499466e-05, "loss": 2.013, "step": 32656 }, { "epoch": 1.0536547888633243, "grad_norm": 0.357421875, "learning_rate": 1.4120479371034921e-05, "loss": 1.9571, "step": 32657 }, { "epoch": 1.0536870527171207, "grad_norm": 0.333984375, "learning_rate": 1.4119707393907973e-05, "loss": 1.963, "step": 32658 }, { "epoch": 1.0537193165709169, "grad_norm": 0.349609375, "learning_rate": 1.4118935419120675e-05, "loss": 1.9544, "step": 32659 }, { "epoch": 1.0537515804247133, "grad_norm": 0.349609375, "learning_rate": 1.4118163446675073e-05, "loss": 1.9883, "step": 32660 }, { "epoch": 1.0537838442785097, "grad_norm": 0.353515625, "learning_rate": 1.4117391476573223e-05, "loss": 1.9807, "step": 32661 }, { "epoch": 1.053816108132306, "grad_norm": 0.359375, "learning_rate": 1.4116619508817174e-05, "loss": 1.9673, "step": 32662 }, { "epoch": 1.0538483719861023, "grad_norm": 0.341796875, "learning_rate": 1.4115847543408976e-05, "loss": 1.9645, "step": 32663 }, { "epoch": 1.0538806358398987, "grad_norm": 0.365234375, "learning_rate": 1.411507558035069e-05, "loss": 1.9814, "step": 32664 }, { "epoch": 1.053912899693695, "grad_norm": 0.359375, "learning_rate": 1.4114303619644365e-05, "loss": 1.9663, "step": 32665 }, { "epoch": 1.0539451635474915, "grad_norm": 0.34765625, "learning_rate": 1.4113531661292045e-05, "loss": 1.9839, "step": 32666 }, { "epoch": 1.0539774274012876, "grad_norm": 0.365234375, "learning_rate": 1.4112759705295784e-05, "loss": 1.9627, "step": 32667 }, { "epoch": 1.054009691255084, "grad_norm": 0.34765625, "learning_rate": 1.4111987751657636e-05, "loss": 1.9561, "step": 32668 }, { "epoch": 1.0540419551088804, "grad_norm": 0.337890625, "learning_rate": 1.4111215800379654e-05, "loss": 1.9257, "step": 32669 }, { "epoch": 1.0540742189626768, "grad_norm": 0.36328125, "learning_rate": 1.4110443851463889e-05, "loss": 1.9601, "step": 32670 }, { "epoch": 1.054106482816473, "grad_norm": 0.341796875, "learning_rate": 1.4109671904912398e-05, "loss": 1.9834, "step": 32671 }, { "epoch": 1.0541387466702694, "grad_norm": 0.35546875, "learning_rate": 1.410889996072722e-05, "loss": 1.9877, "step": 32672 }, { "epoch": 1.0541710105240658, "grad_norm": 0.3671875, "learning_rate": 1.4108128018910413e-05, "loss": 1.9939, "step": 32673 }, { "epoch": 1.0542032743778622, "grad_norm": 0.33984375, "learning_rate": 1.410735607946403e-05, "loss": 1.9675, "step": 32674 }, { "epoch": 1.0542355382316586, "grad_norm": 0.375, "learning_rate": 1.410658414239012e-05, "loss": 1.9776, "step": 32675 }, { "epoch": 1.0542678020854548, "grad_norm": 0.3515625, "learning_rate": 1.410581220769074e-05, "loss": 1.9558, "step": 32676 }, { "epoch": 1.0543000659392512, "grad_norm": 0.357421875, "learning_rate": 1.4105040275367944e-05, "loss": 1.9597, "step": 32677 }, { "epoch": 1.0543323297930476, "grad_norm": 0.349609375, "learning_rate": 1.4104268345423765e-05, "loss": 1.9608, "step": 32678 }, { "epoch": 1.054364593646844, "grad_norm": 0.345703125, "learning_rate": 1.4103496417860273e-05, "loss": 2.013, "step": 32679 }, { "epoch": 1.0543968575006402, "grad_norm": 0.34375, "learning_rate": 1.4102724492679513e-05, "loss": 1.9816, "step": 32680 }, { "epoch": 1.0544291213544366, "grad_norm": 0.337890625, "learning_rate": 1.410195256988354e-05, "loss": 1.979, "step": 32681 }, { "epoch": 1.054461385208233, "grad_norm": 0.349609375, "learning_rate": 1.4101180649474401e-05, "loss": 1.927, "step": 32682 }, { "epoch": 1.0544936490620294, "grad_norm": 0.345703125, "learning_rate": 1.4100408731454153e-05, "loss": 1.976, "step": 32683 }, { "epoch": 1.0545259129158255, "grad_norm": 0.34765625, "learning_rate": 1.4099636815824843e-05, "loss": 2.0116, "step": 32684 }, { "epoch": 1.054558176769622, "grad_norm": 0.34375, "learning_rate": 1.4098864902588522e-05, "loss": 1.9833, "step": 32685 }, { "epoch": 1.0545904406234183, "grad_norm": 0.365234375, "learning_rate": 1.4098092991747245e-05, "loss": 1.9578, "step": 32686 }, { "epoch": 1.0546227044772147, "grad_norm": 0.359375, "learning_rate": 1.4097321083303062e-05, "loss": 1.9725, "step": 32687 }, { "epoch": 1.054654968331011, "grad_norm": 0.34375, "learning_rate": 1.4096549177258024e-05, "loss": 1.9784, "step": 32688 }, { "epoch": 1.0546872321848073, "grad_norm": 0.34765625, "learning_rate": 1.4095777273614185e-05, "loss": 1.9787, "step": 32689 }, { "epoch": 1.0547194960386037, "grad_norm": 0.3515625, "learning_rate": 1.4095005372373598e-05, "loss": 2.0039, "step": 32690 }, { "epoch": 1.0547517598924, "grad_norm": 0.359375, "learning_rate": 1.4094233473538308e-05, "loss": 1.986, "step": 32691 }, { "epoch": 1.0547840237461963, "grad_norm": 0.337890625, "learning_rate": 1.4093461577110369e-05, "loss": 1.9625, "step": 32692 }, { "epoch": 1.0548162875999927, "grad_norm": 0.35546875, "learning_rate": 1.4092689683091834e-05, "loss": 1.9719, "step": 32693 }, { "epoch": 1.054848551453789, "grad_norm": 0.345703125, "learning_rate": 1.4091917791484755e-05, "loss": 1.9507, "step": 32694 }, { "epoch": 1.0548808153075855, "grad_norm": 0.357421875, "learning_rate": 1.4091145902291182e-05, "loss": 2.0088, "step": 32695 }, { "epoch": 1.0549130791613819, "grad_norm": 0.341796875, "learning_rate": 1.4090374015513173e-05, "loss": 1.9888, "step": 32696 }, { "epoch": 1.054945343015178, "grad_norm": 0.365234375, "learning_rate": 1.4089602131152767e-05, "loss": 1.995, "step": 32697 }, { "epoch": 1.0549776068689745, "grad_norm": 0.34375, "learning_rate": 1.4088830249212025e-05, "loss": 1.9745, "step": 32698 }, { "epoch": 1.0550098707227709, "grad_norm": 0.34375, "learning_rate": 1.4088058369692995e-05, "loss": 1.973, "step": 32699 }, { "epoch": 1.0550421345765673, "grad_norm": 0.3515625, "learning_rate": 1.4087286492597726e-05, "loss": 1.9807, "step": 32700 }, { "epoch": 1.0550743984303634, "grad_norm": 0.353515625, "learning_rate": 1.4086514617928275e-05, "loss": 1.9897, "step": 32701 }, { "epoch": 1.0551066622841598, "grad_norm": 0.33203125, "learning_rate": 1.40857427456867e-05, "loss": 1.9822, "step": 32702 }, { "epoch": 1.0551389261379562, "grad_norm": 0.353515625, "learning_rate": 1.4084970875875037e-05, "loss": 1.9782, "step": 32703 }, { "epoch": 1.0551711899917526, "grad_norm": 0.341796875, "learning_rate": 1.4084199008495343e-05, "loss": 1.9673, "step": 32704 }, { "epoch": 1.0552034538455488, "grad_norm": 0.34765625, "learning_rate": 1.4083427143549667e-05, "loss": 1.9908, "step": 32705 }, { "epoch": 1.0552357176993452, "grad_norm": 0.345703125, "learning_rate": 1.4082655281040069e-05, "loss": 1.9817, "step": 32706 }, { "epoch": 1.0552679815531416, "grad_norm": 0.341796875, "learning_rate": 1.4081883420968595e-05, "loss": 1.9552, "step": 32707 }, { "epoch": 1.055300245406938, "grad_norm": 0.353515625, "learning_rate": 1.4081111563337305e-05, "loss": 1.9731, "step": 32708 }, { "epoch": 1.0553325092607342, "grad_norm": 0.3671875, "learning_rate": 1.4080339708148234e-05, "loss": 1.9813, "step": 32709 }, { "epoch": 1.0553647731145306, "grad_norm": 0.34765625, "learning_rate": 1.407956785540344e-05, "loss": 1.9516, "step": 32710 }, { "epoch": 1.055397036968327, "grad_norm": 0.3515625, "learning_rate": 1.407879600510498e-05, "loss": 1.9735, "step": 32711 }, { "epoch": 1.0554293008221234, "grad_norm": 0.345703125, "learning_rate": 1.4078024157254901e-05, "loss": 1.9514, "step": 32712 }, { "epoch": 1.0554615646759196, "grad_norm": 0.35546875, "learning_rate": 1.4077252311855256e-05, "loss": 1.9634, "step": 32713 }, { "epoch": 1.055493828529716, "grad_norm": 0.34375, "learning_rate": 1.4076480468908104e-05, "loss": 1.9627, "step": 32714 }, { "epoch": 1.0555260923835124, "grad_norm": 0.353515625, "learning_rate": 1.4075708628415475e-05, "loss": 1.9556, "step": 32715 }, { "epoch": 1.0555583562373088, "grad_norm": 0.361328125, "learning_rate": 1.407493679037944e-05, "loss": 1.9717, "step": 32716 }, { "epoch": 1.0555906200911052, "grad_norm": 0.41796875, "learning_rate": 1.4074164954802042e-05, "loss": 1.9727, "step": 32717 }, { "epoch": 1.0556228839449013, "grad_norm": 0.34765625, "learning_rate": 1.4073393121685333e-05, "loss": 1.9754, "step": 32718 }, { "epoch": 1.0556551477986977, "grad_norm": 0.345703125, "learning_rate": 1.4072621291031368e-05, "loss": 1.9659, "step": 32719 }, { "epoch": 1.0556874116524941, "grad_norm": 0.333984375, "learning_rate": 1.40718494628422e-05, "loss": 1.9514, "step": 32720 }, { "epoch": 1.0557196755062905, "grad_norm": 0.3515625, "learning_rate": 1.4071077637119872e-05, "loss": 1.9935, "step": 32721 }, { "epoch": 1.0557519393600867, "grad_norm": 0.349609375, "learning_rate": 1.407030581386644e-05, "loss": 1.991, "step": 32722 }, { "epoch": 1.055784203213883, "grad_norm": 0.330078125, "learning_rate": 1.4069533993083955e-05, "loss": 1.9619, "step": 32723 }, { "epoch": 1.0558164670676795, "grad_norm": 0.33984375, "learning_rate": 1.4068762174774469e-05, "loss": 1.9601, "step": 32724 }, { "epoch": 1.055848730921476, "grad_norm": 0.353515625, "learning_rate": 1.4067990358940033e-05, "loss": 1.9778, "step": 32725 }, { "epoch": 1.055880994775272, "grad_norm": 0.33203125, "learning_rate": 1.40672185455827e-05, "loss": 1.9894, "step": 32726 }, { "epoch": 1.0559132586290685, "grad_norm": 0.34765625, "learning_rate": 1.4066446734704519e-05, "loss": 1.9606, "step": 32727 }, { "epoch": 1.0559455224828649, "grad_norm": 0.341796875, "learning_rate": 1.4065674926307539e-05, "loss": 1.9708, "step": 32728 }, { "epoch": 1.0559777863366613, "grad_norm": 0.341796875, "learning_rate": 1.4064903120393815e-05, "loss": 1.943, "step": 32729 }, { "epoch": 1.0560100501904575, "grad_norm": 0.345703125, "learning_rate": 1.40641313169654e-05, "loss": 1.9754, "step": 32730 }, { "epoch": 1.0560423140442539, "grad_norm": 0.345703125, "learning_rate": 1.4063359516024341e-05, "loss": 1.9936, "step": 32731 }, { "epoch": 1.0560745778980503, "grad_norm": 0.33203125, "learning_rate": 1.4062587717572693e-05, "loss": 1.9736, "step": 32732 }, { "epoch": 1.0561068417518467, "grad_norm": 0.3515625, "learning_rate": 1.4061815921612505e-05, "loss": 1.9701, "step": 32733 }, { "epoch": 1.0561391056056428, "grad_norm": 0.33203125, "learning_rate": 1.4061044128145826e-05, "loss": 1.9847, "step": 32734 }, { "epoch": 1.0561713694594392, "grad_norm": 0.3515625, "learning_rate": 1.4060272337174712e-05, "loss": 1.9848, "step": 32735 }, { "epoch": 1.0562036333132356, "grad_norm": 0.3515625, "learning_rate": 1.4059500548701213e-05, "loss": 1.9775, "step": 32736 }, { "epoch": 1.056235897167032, "grad_norm": 0.333984375, "learning_rate": 1.4058728762727371e-05, "loss": 1.9512, "step": 32737 }, { "epoch": 1.0562681610208284, "grad_norm": 0.349609375, "learning_rate": 1.405795697925526e-05, "loss": 1.9567, "step": 32738 }, { "epoch": 1.0563004248746246, "grad_norm": 0.345703125, "learning_rate": 1.405718519828691e-05, "loss": 1.994, "step": 32739 }, { "epoch": 1.056332688728421, "grad_norm": 0.3359375, "learning_rate": 1.4056413419824381e-05, "loss": 1.9688, "step": 32740 }, { "epoch": 1.0563649525822174, "grad_norm": 0.33984375, "learning_rate": 1.405564164386972e-05, "loss": 1.9753, "step": 32741 }, { "epoch": 1.0563972164360138, "grad_norm": 0.341796875, "learning_rate": 1.4054869870424977e-05, "loss": 1.9622, "step": 32742 }, { "epoch": 1.05642948028981, "grad_norm": 0.373046875, "learning_rate": 1.4054098099492212e-05, "loss": 1.9665, "step": 32743 }, { "epoch": 1.0564617441436064, "grad_norm": 0.345703125, "learning_rate": 1.4053326331073477e-05, "loss": 1.9865, "step": 32744 }, { "epoch": 1.0564940079974028, "grad_norm": 0.341796875, "learning_rate": 1.405255456517081e-05, "loss": 1.9444, "step": 32745 }, { "epoch": 1.0565262718511992, "grad_norm": 0.34375, "learning_rate": 1.405178280178627e-05, "loss": 1.9677, "step": 32746 }, { "epoch": 1.0565585357049954, "grad_norm": 0.357421875, "learning_rate": 1.4051011040921905e-05, "loss": 1.9579, "step": 32747 }, { "epoch": 1.0565907995587918, "grad_norm": 0.34765625, "learning_rate": 1.4050239282579773e-05, "loss": 2.0017, "step": 32748 }, { "epoch": 1.0566230634125882, "grad_norm": 0.345703125, "learning_rate": 1.404946752676192e-05, "loss": 1.9746, "step": 32749 }, { "epoch": 1.0566553272663846, "grad_norm": 0.333984375, "learning_rate": 1.4048695773470405e-05, "loss": 1.9806, "step": 32750 }, { "epoch": 1.0566875911201807, "grad_norm": 0.3359375, "learning_rate": 1.4047924022707263e-05, "loss": 1.9539, "step": 32751 }, { "epoch": 1.0567198549739771, "grad_norm": 0.3515625, "learning_rate": 1.4047152274474559e-05, "loss": 1.9421, "step": 32752 }, { "epoch": 1.0567521188277735, "grad_norm": 0.341796875, "learning_rate": 1.4046380528774338e-05, "loss": 1.9705, "step": 32753 }, { "epoch": 1.05678438268157, "grad_norm": 0.3359375, "learning_rate": 1.4045608785608654e-05, "loss": 1.9532, "step": 32754 }, { "epoch": 1.056816646535366, "grad_norm": 0.34375, "learning_rate": 1.4044837044979556e-05, "loss": 1.9708, "step": 32755 }, { "epoch": 1.0568489103891625, "grad_norm": 0.34765625, "learning_rate": 1.4044065306889101e-05, "loss": 1.9645, "step": 32756 }, { "epoch": 1.056881174242959, "grad_norm": 0.365234375, "learning_rate": 1.4043293571339333e-05, "loss": 1.9447, "step": 32757 }, { "epoch": 1.0569134380967553, "grad_norm": 0.3359375, "learning_rate": 1.4042521838332303e-05, "loss": 1.9816, "step": 32758 }, { "epoch": 1.0569457019505517, "grad_norm": 0.349609375, "learning_rate": 1.4041750107870066e-05, "loss": 1.9759, "step": 32759 }, { "epoch": 1.0569779658043479, "grad_norm": 0.3515625, "learning_rate": 1.404097837995467e-05, "loss": 1.9776, "step": 32760 }, { "epoch": 1.0570102296581443, "grad_norm": 0.330078125, "learning_rate": 1.4040206654588169e-05, "loss": 1.9916, "step": 32761 }, { "epoch": 1.0570424935119407, "grad_norm": 0.34765625, "learning_rate": 1.4039434931772618e-05, "loss": 1.9341, "step": 32762 }, { "epoch": 1.057074757365737, "grad_norm": 0.349609375, "learning_rate": 1.4038663211510058e-05, "loss": 1.9999, "step": 32763 }, { "epoch": 1.0571070212195333, "grad_norm": 0.34375, "learning_rate": 1.4037891493802546e-05, "loss": 2.0163, "step": 32764 }, { "epoch": 1.0571392850733297, "grad_norm": 0.34765625, "learning_rate": 1.4037119778652132e-05, "loss": 1.9801, "step": 32765 }, { "epoch": 1.057171548927126, "grad_norm": 0.376953125, "learning_rate": 1.4036348066060866e-05, "loss": 1.9624, "step": 32766 }, { "epoch": 1.0572038127809225, "grad_norm": 0.35546875, "learning_rate": 1.4035576356030802e-05, "loss": 1.9873, "step": 32767 }, { "epoch": 1.0572360766347186, "grad_norm": 0.39453125, "learning_rate": 1.403480464856399e-05, "loss": 1.9308, "step": 32768 }, { "epoch": 1.057268340488515, "grad_norm": 0.35546875, "learning_rate": 1.403403294366248e-05, "loss": 2.0021, "step": 32769 }, { "epoch": 1.0573006043423114, "grad_norm": 0.3828125, "learning_rate": 1.4033261241328323e-05, "loss": 1.9577, "step": 32770 }, { "epoch": 1.0573328681961078, "grad_norm": 0.359375, "learning_rate": 1.403248954156357e-05, "loss": 1.9654, "step": 32771 }, { "epoch": 1.057365132049904, "grad_norm": 0.361328125, "learning_rate": 1.4031717844370272e-05, "loss": 1.9765, "step": 32772 }, { "epoch": 1.0573973959037004, "grad_norm": 0.3671875, "learning_rate": 1.403094614975048e-05, "loss": 1.983, "step": 32773 }, { "epoch": 1.0574296597574968, "grad_norm": 0.35546875, "learning_rate": 1.4030174457706253e-05, "loss": 1.995, "step": 32774 }, { "epoch": 1.0574619236112932, "grad_norm": 0.359375, "learning_rate": 1.4029402768239627e-05, "loss": 1.9974, "step": 32775 }, { "epoch": 1.0574941874650896, "grad_norm": 0.349609375, "learning_rate": 1.402863108135266e-05, "loss": 2.0049, "step": 32776 }, { "epoch": 1.0575264513188858, "grad_norm": 0.34765625, "learning_rate": 1.4027859397047407e-05, "loss": 1.949, "step": 32777 }, { "epoch": 1.0575587151726822, "grad_norm": 0.36328125, "learning_rate": 1.4027087715325913e-05, "loss": 1.9827, "step": 32778 }, { "epoch": 1.0575909790264786, "grad_norm": 0.345703125, "learning_rate": 1.402631603619023e-05, "loss": 1.9316, "step": 32779 }, { "epoch": 1.057623242880275, "grad_norm": 0.341796875, "learning_rate": 1.402554435964242e-05, "loss": 1.9528, "step": 32780 }, { "epoch": 1.0576555067340712, "grad_norm": 0.357421875, "learning_rate": 1.4024772685684518e-05, "loss": 1.9763, "step": 32781 }, { "epoch": 1.0576877705878676, "grad_norm": 0.3515625, "learning_rate": 1.402400101431858e-05, "loss": 1.9554, "step": 32782 }, { "epoch": 1.057720034441664, "grad_norm": 0.3359375, "learning_rate": 1.4023229345546656e-05, "loss": 1.9391, "step": 32783 }, { "epoch": 1.0577522982954604, "grad_norm": 0.34375, "learning_rate": 1.4022457679370804e-05, "loss": 1.9843, "step": 32784 }, { "epoch": 1.0577845621492565, "grad_norm": 0.34765625, "learning_rate": 1.4021686015793068e-05, "loss": 1.9762, "step": 32785 }, { "epoch": 1.057816826003053, "grad_norm": 0.353515625, "learning_rate": 1.402091435481551e-05, "loss": 1.9934, "step": 32786 }, { "epoch": 1.0578490898568493, "grad_norm": 0.375, "learning_rate": 1.4020142696440164e-05, "loss": 2.0039, "step": 32787 }, { "epoch": 1.0578813537106457, "grad_norm": 0.337890625, "learning_rate": 1.4019371040669088e-05, "loss": 1.9741, "step": 32788 }, { "epoch": 1.057913617564442, "grad_norm": 0.373046875, "learning_rate": 1.4018599387504335e-05, "loss": 1.9795, "step": 32789 }, { "epoch": 1.0579458814182383, "grad_norm": 0.345703125, "learning_rate": 1.4017827736947955e-05, "loss": 1.9556, "step": 32790 }, { "epoch": 1.0579781452720347, "grad_norm": 0.34765625, "learning_rate": 1.4017056089002e-05, "loss": 1.9647, "step": 32791 }, { "epoch": 1.058010409125831, "grad_norm": 0.34765625, "learning_rate": 1.4016284443668524e-05, "loss": 1.9927, "step": 32792 }, { "epoch": 1.0580426729796273, "grad_norm": 0.341796875, "learning_rate": 1.4015512800949565e-05, "loss": 1.9887, "step": 32793 }, { "epoch": 1.0580749368334237, "grad_norm": 0.361328125, "learning_rate": 1.4014741160847188e-05, "loss": 1.9631, "step": 32794 }, { "epoch": 1.05810720068722, "grad_norm": 0.357421875, "learning_rate": 1.4013969523363435e-05, "loss": 1.993, "step": 32795 }, { "epoch": 1.0581394645410165, "grad_norm": 0.361328125, "learning_rate": 1.4013197888500362e-05, "loss": 1.9818, "step": 32796 }, { "epoch": 1.0581717283948129, "grad_norm": 0.369140625, "learning_rate": 1.4012426256260016e-05, "loss": 2.019, "step": 32797 }, { "epoch": 1.058203992248609, "grad_norm": 0.337890625, "learning_rate": 1.4011654626644457e-05, "loss": 1.9936, "step": 32798 }, { "epoch": 1.0582362561024055, "grad_norm": 0.357421875, "learning_rate": 1.4010882999655722e-05, "loss": 1.9816, "step": 32799 }, { "epoch": 1.0582685199562019, "grad_norm": 0.373046875, "learning_rate": 1.4010111375295869e-05, "loss": 1.9691, "step": 32800 }, { "epoch": 1.0583007838099983, "grad_norm": 0.337890625, "learning_rate": 1.400933975356695e-05, "loss": 1.9777, "step": 32801 }, { "epoch": 1.0583330476637944, "grad_norm": 0.39453125, "learning_rate": 1.4008568134471011e-05, "loss": 2.0056, "step": 32802 }, { "epoch": 1.0583653115175908, "grad_norm": 0.3359375, "learning_rate": 1.4007796518010108e-05, "loss": 1.974, "step": 32803 }, { "epoch": 1.0583975753713872, "grad_norm": 0.361328125, "learning_rate": 1.4007024904186294e-05, "loss": 1.976, "step": 32804 }, { "epoch": 1.0584298392251836, "grad_norm": 0.361328125, "learning_rate": 1.4006253293001614e-05, "loss": 1.9774, "step": 32805 }, { "epoch": 1.0584621030789798, "grad_norm": 0.44921875, "learning_rate": 1.4005481684458117e-05, "loss": 1.9908, "step": 32806 }, { "epoch": 1.0584943669327762, "grad_norm": 0.43359375, "learning_rate": 1.4004710078557858e-05, "loss": 1.9898, "step": 32807 }, { "epoch": 1.0585266307865726, "grad_norm": 0.40625, "learning_rate": 1.4003938475302886e-05, "loss": 2.0043, "step": 32808 }, { "epoch": 1.058558894640369, "grad_norm": 0.41015625, "learning_rate": 1.4003166874695255e-05, "loss": 1.9894, "step": 32809 }, { "epoch": 1.0585911584941652, "grad_norm": 0.42578125, "learning_rate": 1.4002395276737015e-05, "loss": 2.0017, "step": 32810 }, { "epoch": 1.0586234223479616, "grad_norm": 0.419921875, "learning_rate": 1.4001623681430213e-05, "loss": 1.9903, "step": 32811 }, { "epoch": 1.058655686201758, "grad_norm": 0.3828125, "learning_rate": 1.4000852088776902e-05, "loss": 1.9695, "step": 32812 }, { "epoch": 1.0586879500555544, "grad_norm": 0.396484375, "learning_rate": 1.4000080498779132e-05, "loss": 2.0048, "step": 32813 }, { "epoch": 1.0587202139093508, "grad_norm": 0.369140625, "learning_rate": 1.3999308911438956e-05, "loss": 2.0001, "step": 32814 }, { "epoch": 1.058752477763147, "grad_norm": 0.396484375, "learning_rate": 1.3998537326758422e-05, "loss": 1.9722, "step": 32815 }, { "epoch": 1.0587847416169434, "grad_norm": 0.369140625, "learning_rate": 1.3997765744739578e-05, "loss": 1.9547, "step": 32816 }, { "epoch": 1.0588170054707398, "grad_norm": 0.3671875, "learning_rate": 1.3996994165384491e-05, "loss": 2.0017, "step": 32817 }, { "epoch": 1.0588492693245362, "grad_norm": 0.3671875, "learning_rate": 1.399622258869519e-05, "loss": 2.0218, "step": 32818 }, { "epoch": 1.0588815331783323, "grad_norm": 0.3671875, "learning_rate": 1.3995451014673737e-05, "loss": 1.9924, "step": 32819 }, { "epoch": 1.0589137970321287, "grad_norm": 0.3515625, "learning_rate": 1.3994679443322177e-05, "loss": 2.0015, "step": 32820 }, { "epoch": 1.0589460608859251, "grad_norm": 0.365234375, "learning_rate": 1.3993907874642566e-05, "loss": 2.027, "step": 32821 }, { "epoch": 1.0589783247397215, "grad_norm": 0.396484375, "learning_rate": 1.3993136308636956e-05, "loss": 1.9236, "step": 32822 }, { "epoch": 1.0590105885935177, "grad_norm": 0.3515625, "learning_rate": 1.39923647453074e-05, "loss": 1.9509, "step": 32823 }, { "epoch": 1.059042852447314, "grad_norm": 0.375, "learning_rate": 1.3991593184655938e-05, "loss": 1.9371, "step": 32824 }, { "epoch": 1.0590751163011105, "grad_norm": 0.357421875, "learning_rate": 1.3990821626684621e-05, "loss": 1.9683, "step": 32825 }, { "epoch": 1.059107380154907, "grad_norm": 0.38671875, "learning_rate": 1.399005007139551e-05, "loss": 1.9531, "step": 32826 }, { "epoch": 1.059139644008703, "grad_norm": 0.345703125, "learning_rate": 1.398927851879065e-05, "loss": 2.0037, "step": 32827 }, { "epoch": 1.0591719078624995, "grad_norm": 0.361328125, "learning_rate": 1.398850696887209e-05, "loss": 1.9948, "step": 32828 }, { "epoch": 1.0592041717162959, "grad_norm": 0.373046875, "learning_rate": 1.398773542164189e-05, "loss": 2.0278, "step": 32829 }, { "epoch": 1.0592364355700923, "grad_norm": 0.361328125, "learning_rate": 1.3986963877102085e-05, "loss": 1.9526, "step": 32830 }, { "epoch": 1.0592686994238885, "grad_norm": 0.353515625, "learning_rate": 1.3986192335254739e-05, "loss": 1.9675, "step": 32831 }, { "epoch": 1.0593009632776849, "grad_norm": 0.365234375, "learning_rate": 1.3985420796101894e-05, "loss": 2.0227, "step": 32832 }, { "epoch": 1.0593332271314813, "grad_norm": 0.365234375, "learning_rate": 1.3984649259645606e-05, "loss": 1.9723, "step": 32833 }, { "epoch": 1.0593654909852777, "grad_norm": 0.35546875, "learning_rate": 1.3983877725887922e-05, "loss": 1.984, "step": 32834 }, { "epoch": 1.059397754839074, "grad_norm": 0.369140625, "learning_rate": 1.3983106194830898e-05, "loss": 1.9883, "step": 32835 }, { "epoch": 1.0594300186928702, "grad_norm": 0.361328125, "learning_rate": 1.3982334666476578e-05, "loss": 1.9801, "step": 32836 }, { "epoch": 1.0594622825466666, "grad_norm": 0.37890625, "learning_rate": 1.3981563140827016e-05, "loss": 2.0024, "step": 32837 }, { "epoch": 1.059494546400463, "grad_norm": 0.345703125, "learning_rate": 1.3980791617884264e-05, "loss": 1.9938, "step": 32838 }, { "epoch": 1.0595268102542594, "grad_norm": 0.36328125, "learning_rate": 1.3980020097650367e-05, "loss": 1.9672, "step": 32839 }, { "epoch": 1.0595590741080556, "grad_norm": 0.36328125, "learning_rate": 1.397924858012738e-05, "loss": 1.9926, "step": 32840 }, { "epoch": 1.059591337961852, "grad_norm": 0.35546875, "learning_rate": 1.3978477065317357e-05, "loss": 1.9399, "step": 32841 }, { "epoch": 1.0596236018156484, "grad_norm": 0.375, "learning_rate": 1.3977705553222341e-05, "loss": 1.9786, "step": 32842 }, { "epoch": 1.0596558656694448, "grad_norm": 0.388671875, "learning_rate": 1.3976934043844387e-05, "loss": 1.9916, "step": 32843 }, { "epoch": 1.059688129523241, "grad_norm": 0.37109375, "learning_rate": 1.397616253718554e-05, "loss": 1.9839, "step": 32844 }, { "epoch": 1.0597203933770374, "grad_norm": 0.384765625, "learning_rate": 1.3975391033247858e-05, "loss": 1.943, "step": 32845 }, { "epoch": 1.0597526572308338, "grad_norm": 0.36328125, "learning_rate": 1.3974619532033388e-05, "loss": 1.948, "step": 32846 }, { "epoch": 1.0597849210846302, "grad_norm": 0.384765625, "learning_rate": 1.3973848033544185e-05, "loss": 1.9761, "step": 32847 }, { "epoch": 1.0598171849384264, "grad_norm": 0.359375, "learning_rate": 1.397307653778229e-05, "loss": 1.9609, "step": 32848 }, { "epoch": 1.0598494487922228, "grad_norm": 0.392578125, "learning_rate": 1.397230504474976e-05, "loss": 1.9861, "step": 32849 }, { "epoch": 1.0598817126460192, "grad_norm": 0.361328125, "learning_rate": 1.3971533554448645e-05, "loss": 1.966, "step": 32850 }, { "epoch": 1.0599139764998156, "grad_norm": 0.3515625, "learning_rate": 1.3970762066880993e-05, "loss": 2.0194, "step": 32851 }, { "epoch": 1.0599462403536117, "grad_norm": 0.36328125, "learning_rate": 1.3969990582048853e-05, "loss": 1.9844, "step": 32852 }, { "epoch": 1.0599785042074081, "grad_norm": 0.365234375, "learning_rate": 1.3969219099954291e-05, "loss": 1.9991, "step": 32853 }, { "epoch": 1.0600107680612045, "grad_norm": 0.359375, "learning_rate": 1.3968447620599338e-05, "loss": 1.9792, "step": 32854 }, { "epoch": 1.060043031915001, "grad_norm": 0.3671875, "learning_rate": 1.3967676143986048e-05, "loss": 1.962, "step": 32855 }, { "epoch": 1.0600752957687973, "grad_norm": 0.40625, "learning_rate": 1.3966904670116478e-05, "loss": 1.9293, "step": 32856 }, { "epoch": 1.0601075596225935, "grad_norm": 0.375, "learning_rate": 1.3966133198992671e-05, "loss": 1.9887, "step": 32857 }, { "epoch": 1.06013982347639, "grad_norm": 0.359375, "learning_rate": 1.3965361730616687e-05, "loss": 1.9991, "step": 32858 }, { "epoch": 1.0601720873301863, "grad_norm": 0.365234375, "learning_rate": 1.3964590264990576e-05, "loss": 1.9817, "step": 32859 }, { "epoch": 1.0602043511839827, "grad_norm": 0.3671875, "learning_rate": 1.3963818802116378e-05, "loss": 1.9323, "step": 32860 }, { "epoch": 1.0602366150377789, "grad_norm": 0.375, "learning_rate": 1.396304734199615e-05, "loss": 2.0046, "step": 32861 }, { "epoch": 1.0602688788915753, "grad_norm": 0.341796875, "learning_rate": 1.3962275884631935e-05, "loss": 1.9547, "step": 32862 }, { "epoch": 1.0603011427453717, "grad_norm": 0.37109375, "learning_rate": 1.3961504430025796e-05, "loss": 1.9511, "step": 32863 }, { "epoch": 1.060333406599168, "grad_norm": 0.35546875, "learning_rate": 1.3960732978179776e-05, "loss": 1.977, "step": 32864 }, { "epoch": 1.0603656704529643, "grad_norm": 0.359375, "learning_rate": 1.3959961529095934e-05, "loss": 1.9044, "step": 32865 }, { "epoch": 1.0603979343067607, "grad_norm": 0.35546875, "learning_rate": 1.3959190082776306e-05, "loss": 1.9476, "step": 32866 }, { "epoch": 1.060430198160557, "grad_norm": 0.35546875, "learning_rate": 1.3958418639222945e-05, "loss": 1.9815, "step": 32867 }, { "epoch": 1.0604624620143535, "grad_norm": 0.84375, "learning_rate": 1.395764719843791e-05, "loss": 2.0537, "step": 32868 }, { "epoch": 1.0604947258681496, "grad_norm": 0.6171875, "learning_rate": 1.3956875760423247e-05, "loss": 2.0643, "step": 32869 }, { "epoch": 1.060526989721946, "grad_norm": 0.56640625, "learning_rate": 1.3956104325181008e-05, "loss": 1.9994, "step": 32870 }, { "epoch": 1.0605592535757424, "grad_norm": 0.5234375, "learning_rate": 1.3955332892713242e-05, "loss": 1.9826, "step": 32871 }, { "epoch": 1.0605915174295388, "grad_norm": 0.515625, "learning_rate": 1.3954561463021997e-05, "loss": 1.9578, "step": 32872 }, { "epoch": 1.060623781283335, "grad_norm": 0.4765625, "learning_rate": 1.3953790036109326e-05, "loss": 1.9604, "step": 32873 }, { "epoch": 1.0606560451371314, "grad_norm": 0.447265625, "learning_rate": 1.3953018611977277e-05, "loss": 1.9572, "step": 32874 }, { "epoch": 1.0606883089909278, "grad_norm": 0.474609375, "learning_rate": 1.3952247190627902e-05, "loss": 1.9725, "step": 32875 }, { "epoch": 1.0607205728447242, "grad_norm": 0.44140625, "learning_rate": 1.3951475772063252e-05, "loss": 1.9549, "step": 32876 }, { "epoch": 1.0607528366985206, "grad_norm": 0.447265625, "learning_rate": 1.3950704356285381e-05, "loss": 1.9552, "step": 32877 }, { "epoch": 1.0607851005523168, "grad_norm": 0.443359375, "learning_rate": 1.394993294329633e-05, "loss": 1.9853, "step": 32878 }, { "epoch": 1.0608173644061132, "grad_norm": 0.39453125, "learning_rate": 1.3949161533098155e-05, "loss": 1.9895, "step": 32879 }, { "epoch": 1.0608496282599096, "grad_norm": 0.404296875, "learning_rate": 1.3948390125692904e-05, "loss": 1.9822, "step": 32880 }, { "epoch": 1.060881892113706, "grad_norm": 0.412109375, "learning_rate": 1.3947618721082629e-05, "loss": 1.9983, "step": 32881 }, { "epoch": 1.0609141559675022, "grad_norm": 0.390625, "learning_rate": 1.394684731926938e-05, "loss": 1.9978, "step": 32882 }, { "epoch": 1.0609464198212986, "grad_norm": 0.38671875, "learning_rate": 1.3946075920255209e-05, "loss": 1.9949, "step": 32883 }, { "epoch": 1.060978683675095, "grad_norm": 0.349609375, "learning_rate": 1.3945304524042162e-05, "loss": 1.9612, "step": 32884 }, { "epoch": 1.0610109475288914, "grad_norm": 0.375, "learning_rate": 1.3944533130632291e-05, "loss": 2.01, "step": 32885 }, { "epoch": 1.0610432113826875, "grad_norm": 0.365234375, "learning_rate": 1.3943761740027646e-05, "loss": 2.0317, "step": 32886 }, { "epoch": 1.061075475236484, "grad_norm": 0.361328125, "learning_rate": 1.394299035223028e-05, "loss": 1.9821, "step": 32887 }, { "epoch": 1.0611077390902803, "grad_norm": 0.357421875, "learning_rate": 1.394221896724224e-05, "loss": 1.9663, "step": 32888 }, { "epoch": 1.0611400029440767, "grad_norm": 0.34765625, "learning_rate": 1.3941447585065581e-05, "loss": 1.9885, "step": 32889 }, { "epoch": 1.061172266797873, "grad_norm": 0.361328125, "learning_rate": 1.3940676205702344e-05, "loss": 1.9616, "step": 32890 }, { "epoch": 1.0612045306516693, "grad_norm": 0.341796875, "learning_rate": 1.3939904829154588e-05, "loss": 1.9977, "step": 32891 }, { "epoch": 1.0612367945054657, "grad_norm": 0.349609375, "learning_rate": 1.3939133455424358e-05, "loss": 1.9434, "step": 32892 }, { "epoch": 1.061269058359262, "grad_norm": 0.353515625, "learning_rate": 1.3938362084513706e-05, "loss": 1.944, "step": 32893 }, { "epoch": 1.0613013222130583, "grad_norm": 0.349609375, "learning_rate": 1.3937590716424678e-05, "loss": 1.9637, "step": 32894 }, { "epoch": 1.0613335860668547, "grad_norm": 0.34765625, "learning_rate": 1.393681935115934e-05, "loss": 1.9681, "step": 32895 }, { "epoch": 1.061365849920651, "grad_norm": 0.34375, "learning_rate": 1.3936047988719723e-05, "loss": 1.9784, "step": 32896 }, { "epoch": 1.0613981137744475, "grad_norm": 0.37109375, "learning_rate": 1.3935276629107883e-05, "loss": 1.9638, "step": 32897 }, { "epoch": 1.0614303776282439, "grad_norm": 0.341796875, "learning_rate": 1.3934505272325874e-05, "loss": 1.9611, "step": 32898 }, { "epoch": 1.06146264148204, "grad_norm": 0.341796875, "learning_rate": 1.393373391837574e-05, "loss": 1.957, "step": 32899 }, { "epoch": 1.0614949053358365, "grad_norm": 0.3515625, "learning_rate": 1.3932962567259535e-05, "loss": 1.9624, "step": 32900 }, { "epoch": 1.0615271691896329, "grad_norm": 0.33203125, "learning_rate": 1.3932191218979319e-05, "loss": 1.9875, "step": 32901 }, { "epoch": 1.0615594330434293, "grad_norm": 0.3671875, "learning_rate": 1.3931419873537125e-05, "loss": 1.952, "step": 32902 }, { "epoch": 1.0615916968972254, "grad_norm": 0.337890625, "learning_rate": 1.3930648530935006e-05, "loss": 1.9837, "step": 32903 }, { "epoch": 1.0616239607510218, "grad_norm": 0.357421875, "learning_rate": 1.392987719117502e-05, "loss": 1.9493, "step": 32904 }, { "epoch": 1.0616562246048182, "grad_norm": 0.373046875, "learning_rate": 1.3929105854259213e-05, "loss": 1.9471, "step": 32905 }, { "epoch": 1.0616884884586146, "grad_norm": 0.3515625, "learning_rate": 1.3928334520189636e-05, "loss": 1.9621, "step": 32906 }, { "epoch": 1.0617207523124108, "grad_norm": 0.35546875, "learning_rate": 1.3927563188968345e-05, "loss": 1.9629, "step": 32907 }, { "epoch": 1.0617530161662072, "grad_norm": 0.333984375, "learning_rate": 1.3926791860597374e-05, "loss": 1.9559, "step": 32908 }, { "epoch": 1.0617852800200036, "grad_norm": 0.3515625, "learning_rate": 1.3926020535078785e-05, "loss": 1.9754, "step": 32909 }, { "epoch": 1.0618175438738, "grad_norm": 0.35546875, "learning_rate": 1.3925249212414625e-05, "loss": 1.9835, "step": 32910 }, { "epoch": 1.0618498077275962, "grad_norm": 0.341796875, "learning_rate": 1.3924477892606946e-05, "loss": 1.9827, "step": 32911 }, { "epoch": 1.0618820715813926, "grad_norm": 0.349609375, "learning_rate": 1.3923706575657796e-05, "loss": 1.9682, "step": 32912 }, { "epoch": 1.061914335435189, "grad_norm": 0.33984375, "learning_rate": 1.3922935261569228e-05, "loss": 1.9666, "step": 32913 }, { "epoch": 1.0619465992889854, "grad_norm": 0.34765625, "learning_rate": 1.3922163950343287e-05, "loss": 1.9492, "step": 32914 }, { "epoch": 1.0619788631427816, "grad_norm": 0.345703125, "learning_rate": 1.3921392641982027e-05, "loss": 1.971, "step": 32915 }, { "epoch": 1.062011126996578, "grad_norm": 0.375, "learning_rate": 1.3920621336487495e-05, "loss": 1.9936, "step": 32916 }, { "epoch": 1.0620433908503744, "grad_norm": 0.3359375, "learning_rate": 1.3919850033861741e-05, "loss": 1.9661, "step": 32917 }, { "epoch": 1.0620756547041708, "grad_norm": 0.38671875, "learning_rate": 1.391907873410682e-05, "loss": 1.9808, "step": 32918 }, { "epoch": 1.0621079185579672, "grad_norm": 0.341796875, "learning_rate": 1.3918307437224781e-05, "loss": 1.9996, "step": 32919 }, { "epoch": 1.0621401824117633, "grad_norm": 0.33203125, "learning_rate": 1.3917536143217666e-05, "loss": 1.9694, "step": 32920 }, { "epoch": 1.0621724462655597, "grad_norm": 0.349609375, "learning_rate": 1.3916764852087534e-05, "loss": 1.9632, "step": 32921 }, { "epoch": 1.0622047101193561, "grad_norm": 0.337890625, "learning_rate": 1.3915993563836427e-05, "loss": 1.9787, "step": 32922 }, { "epoch": 1.0622369739731525, "grad_norm": 0.33984375, "learning_rate": 1.3915222278466403e-05, "loss": 1.9528, "step": 32923 }, { "epoch": 1.0622692378269487, "grad_norm": 0.3515625, "learning_rate": 1.3914450995979507e-05, "loss": 1.9746, "step": 32924 }, { "epoch": 1.062301501680745, "grad_norm": 0.34375, "learning_rate": 1.3913679716377793e-05, "loss": 1.9916, "step": 32925 }, { "epoch": 1.0623337655345415, "grad_norm": 0.341796875, "learning_rate": 1.3912908439663304e-05, "loss": 1.9802, "step": 32926 }, { "epoch": 1.062366029388338, "grad_norm": 0.357421875, "learning_rate": 1.3912137165838097e-05, "loss": 1.9591, "step": 32927 }, { "epoch": 1.062398293242134, "grad_norm": 0.345703125, "learning_rate": 1.3911365894904215e-05, "loss": 1.9697, "step": 32928 }, { "epoch": 1.0624305570959305, "grad_norm": 0.33984375, "learning_rate": 1.3910594626863714e-05, "loss": 1.9802, "step": 32929 }, { "epoch": 1.0624628209497269, "grad_norm": 0.337890625, "learning_rate": 1.3909823361718642e-05, "loss": 1.9933, "step": 32930 }, { "epoch": 1.0624950848035233, "grad_norm": 0.373046875, "learning_rate": 1.390905209947105e-05, "loss": 1.989, "step": 32931 }, { "epoch": 1.0625273486573195, "grad_norm": 0.359375, "learning_rate": 1.3908280840122985e-05, "loss": 1.9855, "step": 32932 }, { "epoch": 1.0625596125111159, "grad_norm": 0.4296875, "learning_rate": 1.3907509583676497e-05, "loss": 2.0112, "step": 32933 }, { "epoch": 1.0625918763649123, "grad_norm": 0.3359375, "learning_rate": 1.3906738330133638e-05, "loss": 1.9503, "step": 32934 }, { "epoch": 1.0626241402187087, "grad_norm": 0.3515625, "learning_rate": 1.3905967079496456e-05, "loss": 1.9955, "step": 32935 }, { "epoch": 1.0626564040725048, "grad_norm": 0.36328125, "learning_rate": 1.3905195831767e-05, "loss": 1.9568, "step": 32936 }, { "epoch": 1.0626886679263012, "grad_norm": 0.341796875, "learning_rate": 1.3904424586947328e-05, "loss": 1.9634, "step": 32937 }, { "epoch": 1.0627209317800976, "grad_norm": 0.365234375, "learning_rate": 1.3903653345039481e-05, "loss": 1.9661, "step": 32938 }, { "epoch": 1.062753195633894, "grad_norm": 0.3515625, "learning_rate": 1.3902882106045509e-05, "loss": 2.0042, "step": 32939 }, { "epoch": 1.0627854594876904, "grad_norm": 0.34375, "learning_rate": 1.3902110869967461e-05, "loss": 1.9699, "step": 32940 }, { "epoch": 1.0628177233414866, "grad_norm": 0.33984375, "learning_rate": 1.3901339636807393e-05, "loss": 1.9716, "step": 32941 }, { "epoch": 1.062849987195283, "grad_norm": 0.353515625, "learning_rate": 1.3900568406567352e-05, "loss": 2.0208, "step": 32942 }, { "epoch": 1.0628822510490794, "grad_norm": 0.33984375, "learning_rate": 1.3899797179249394e-05, "loss": 1.9756, "step": 32943 }, { "epoch": 1.0629145149028758, "grad_norm": 0.33984375, "learning_rate": 1.3899025954855554e-05, "loss": 1.9863, "step": 32944 }, { "epoch": 1.062946778756672, "grad_norm": 0.353515625, "learning_rate": 1.3898254733387888e-05, "loss": 1.975, "step": 32945 }, { "epoch": 1.0629790426104684, "grad_norm": 0.3359375, "learning_rate": 1.3897483514848452e-05, "loss": 1.9879, "step": 32946 }, { "epoch": 1.0630113064642648, "grad_norm": 0.345703125, "learning_rate": 1.3896712299239291e-05, "loss": 1.9796, "step": 32947 }, { "epoch": 1.0630435703180612, "grad_norm": 0.34375, "learning_rate": 1.3895941086562453e-05, "loss": 1.9855, "step": 32948 }, { "epoch": 1.0630758341718574, "grad_norm": 0.33984375, "learning_rate": 1.3895169876819993e-05, "loss": 1.9715, "step": 32949 }, { "epoch": 1.0631080980256538, "grad_norm": 0.330078125, "learning_rate": 1.3894398670013959e-05, "loss": 2.0007, "step": 32950 }, { "epoch": 1.0631403618794502, "grad_norm": 0.34765625, "learning_rate": 1.3893627466146398e-05, "loss": 1.9856, "step": 32951 }, { "epoch": 1.0631726257332466, "grad_norm": 0.34375, "learning_rate": 1.3892856265219358e-05, "loss": 1.967, "step": 32952 }, { "epoch": 1.0632048895870427, "grad_norm": 0.353515625, "learning_rate": 1.3892085067234893e-05, "loss": 1.9648, "step": 32953 }, { "epoch": 1.0632371534408391, "grad_norm": 0.33984375, "learning_rate": 1.3891313872195053e-05, "loss": 1.9433, "step": 32954 }, { "epoch": 1.0632694172946355, "grad_norm": 0.341796875, "learning_rate": 1.3890542680101885e-05, "loss": 1.9747, "step": 32955 }, { "epoch": 1.063301681148432, "grad_norm": 0.361328125, "learning_rate": 1.3889771490957443e-05, "loss": 1.9642, "step": 32956 }, { "epoch": 1.063333945002228, "grad_norm": 0.34375, "learning_rate": 1.3889000304763771e-05, "loss": 1.9739, "step": 32957 }, { "epoch": 1.0633662088560245, "grad_norm": 0.34375, "learning_rate": 1.3888229121522919e-05, "loss": 1.975, "step": 32958 }, { "epoch": 1.063398472709821, "grad_norm": 0.3515625, "learning_rate": 1.388745794123694e-05, "loss": 1.9544, "step": 32959 }, { "epoch": 1.0634307365636173, "grad_norm": 0.337890625, "learning_rate": 1.3886686763907884e-05, "loss": 1.983, "step": 32960 }, { "epoch": 1.0634630004174137, "grad_norm": 0.35546875, "learning_rate": 1.3885915589537797e-05, "loss": 1.9672, "step": 32961 }, { "epoch": 1.0634952642712099, "grad_norm": 0.341796875, "learning_rate": 1.3885144418128735e-05, "loss": 1.953, "step": 32962 }, { "epoch": 1.0635275281250063, "grad_norm": 0.345703125, "learning_rate": 1.388437324968274e-05, "loss": 1.9973, "step": 32963 }, { "epoch": 1.0635597919788027, "grad_norm": 0.3515625, "learning_rate": 1.3883602084201866e-05, "loss": 2.0008, "step": 32964 }, { "epoch": 1.063592055832599, "grad_norm": 0.341796875, "learning_rate": 1.3882830921688158e-05, "loss": 1.9707, "step": 32965 }, { "epoch": 1.0636243196863953, "grad_norm": 0.341796875, "learning_rate": 1.3882059762143675e-05, "loss": 1.9432, "step": 32966 }, { "epoch": 1.0636565835401917, "grad_norm": 0.33984375, "learning_rate": 1.3881288605570455e-05, "loss": 1.9815, "step": 32967 }, { "epoch": 1.063688847393988, "grad_norm": 0.341796875, "learning_rate": 1.3880517451970559e-05, "loss": 1.9895, "step": 32968 }, { "epoch": 1.0637211112477845, "grad_norm": 0.337890625, "learning_rate": 1.387974630134603e-05, "loss": 1.9568, "step": 32969 }, { "epoch": 1.0637533751015806, "grad_norm": 0.34375, "learning_rate": 1.3878975153698916e-05, "loss": 1.9826, "step": 32970 }, { "epoch": 1.063785638955377, "grad_norm": 0.341796875, "learning_rate": 1.3878204009031268e-05, "loss": 1.9157, "step": 32971 }, { "epoch": 1.0638179028091734, "grad_norm": 0.33984375, "learning_rate": 1.3877432867345136e-05, "loss": 1.9801, "step": 32972 }, { "epoch": 1.0638501666629698, "grad_norm": 0.34375, "learning_rate": 1.387666172864257e-05, "loss": 1.9547, "step": 32973 }, { "epoch": 1.0638824305167662, "grad_norm": 0.3515625, "learning_rate": 1.3875890592925631e-05, "loss": 2.0035, "step": 32974 }, { "epoch": 1.0639146943705624, "grad_norm": 0.33984375, "learning_rate": 1.3875119460196348e-05, "loss": 1.9947, "step": 32975 }, { "epoch": 1.0639469582243588, "grad_norm": 0.3359375, "learning_rate": 1.387434833045678e-05, "loss": 1.9514, "step": 32976 }, { "epoch": 1.0639792220781552, "grad_norm": 0.35546875, "learning_rate": 1.3873577203708972e-05, "loss": 1.9642, "step": 32977 }, { "epoch": 1.0640114859319514, "grad_norm": 0.365234375, "learning_rate": 1.3872806079954982e-05, "loss": 1.9833, "step": 32978 }, { "epoch": 1.0640437497857478, "grad_norm": 0.34765625, "learning_rate": 1.3872034959196855e-05, "loss": 1.9799, "step": 32979 }, { "epoch": 1.0640760136395442, "grad_norm": 0.34375, "learning_rate": 1.3871263841436649e-05, "loss": 1.9862, "step": 32980 }, { "epoch": 1.0641082774933406, "grad_norm": 0.34375, "learning_rate": 1.3870492726676396e-05, "loss": 1.974, "step": 32981 }, { "epoch": 1.064140541347137, "grad_norm": 0.34375, "learning_rate": 1.3869721614918153e-05, "loss": 1.9767, "step": 32982 }, { "epoch": 1.0641728052009332, "grad_norm": 0.34375, "learning_rate": 1.3868950506163974e-05, "loss": 1.964, "step": 32983 }, { "epoch": 1.0642050690547296, "grad_norm": 0.349609375, "learning_rate": 1.3868179400415906e-05, "loss": 1.9789, "step": 32984 }, { "epoch": 1.064237332908526, "grad_norm": 0.3515625, "learning_rate": 1.3867408297675998e-05, "loss": 1.9745, "step": 32985 }, { "epoch": 1.0642695967623224, "grad_norm": 0.3515625, "learning_rate": 1.3866637197946306e-05, "loss": 1.9676, "step": 32986 }, { "epoch": 1.0643018606161185, "grad_norm": 0.341796875, "learning_rate": 1.3865866101228863e-05, "loss": 1.9813, "step": 32987 }, { "epoch": 1.064334124469915, "grad_norm": 0.345703125, "learning_rate": 1.3865095007525732e-05, "loss": 1.9714, "step": 32988 }, { "epoch": 1.0643663883237113, "grad_norm": 0.34375, "learning_rate": 1.3864323916838959e-05, "loss": 1.9509, "step": 32989 }, { "epoch": 1.0643986521775077, "grad_norm": 0.361328125, "learning_rate": 1.3863552829170592e-05, "loss": 1.9672, "step": 32990 }, { "epoch": 1.064430916031304, "grad_norm": 0.3515625, "learning_rate": 1.3862781744522682e-05, "loss": 1.9522, "step": 32991 }, { "epoch": 1.0644631798851003, "grad_norm": 0.34375, "learning_rate": 1.3862010662897283e-05, "loss": 1.9188, "step": 32992 }, { "epoch": 1.0644954437388967, "grad_norm": 0.33984375, "learning_rate": 1.3861239584296434e-05, "loss": 1.981, "step": 32993 }, { "epoch": 1.064527707592693, "grad_norm": 0.34765625, "learning_rate": 1.3860468508722192e-05, "loss": 1.9642, "step": 32994 }, { "epoch": 1.0645599714464895, "grad_norm": 0.3515625, "learning_rate": 1.3859697436176601e-05, "loss": 1.971, "step": 32995 }, { "epoch": 1.0645922353002857, "grad_norm": 0.345703125, "learning_rate": 1.3858926366661718e-05, "loss": 1.943, "step": 32996 }, { "epoch": 1.064624499154082, "grad_norm": 0.33984375, "learning_rate": 1.3858155300179583e-05, "loss": 1.9351, "step": 32997 }, { "epoch": 1.0646567630078785, "grad_norm": 0.333984375, "learning_rate": 1.3857384236732258e-05, "loss": 1.9353, "step": 32998 }, { "epoch": 1.0646890268616747, "grad_norm": 0.353515625, "learning_rate": 1.385661317632178e-05, "loss": 1.9937, "step": 32999 }, { "epoch": 1.064721290715471, "grad_norm": 0.349609375, "learning_rate": 1.3855842118950199e-05, "loss": 1.9765, "step": 33000 }, { "epoch": 1.0647535545692675, "grad_norm": 0.33984375, "learning_rate": 1.3855071064619572e-05, "loss": 1.9792, "step": 33001 }, { "epoch": 1.0647858184230639, "grad_norm": 0.359375, "learning_rate": 1.3854300013331945e-05, "loss": 1.9809, "step": 33002 }, { "epoch": 1.0648180822768603, "grad_norm": 0.345703125, "learning_rate": 1.3853528965089366e-05, "loss": 1.9813, "step": 33003 }, { "epoch": 1.0648503461306564, "grad_norm": 0.349609375, "learning_rate": 1.3852757919893886e-05, "loss": 1.9761, "step": 33004 }, { "epoch": 1.0648826099844528, "grad_norm": 0.337890625, "learning_rate": 1.3851986877747552e-05, "loss": 1.9727, "step": 33005 }, { "epoch": 1.0649148738382492, "grad_norm": 0.34765625, "learning_rate": 1.3851215838652415e-05, "loss": 1.9873, "step": 33006 }, { "epoch": 1.0649471376920456, "grad_norm": 0.34765625, "learning_rate": 1.3850444802610525e-05, "loss": 1.9844, "step": 33007 }, { "epoch": 1.0649794015458418, "grad_norm": 0.337890625, "learning_rate": 1.3849673769623928e-05, "loss": 1.9832, "step": 33008 }, { "epoch": 1.0650116653996382, "grad_norm": 0.34375, "learning_rate": 1.3848902739694673e-05, "loss": 1.9692, "step": 33009 }, { "epoch": 1.0650439292534346, "grad_norm": 0.34765625, "learning_rate": 1.3848131712824822e-05, "loss": 1.9641, "step": 33010 }, { "epoch": 1.065076193107231, "grad_norm": 0.365234375, "learning_rate": 1.3847360689016406e-05, "loss": 1.9834, "step": 33011 }, { "epoch": 1.0651084569610272, "grad_norm": 0.3671875, "learning_rate": 1.3846589668271484e-05, "loss": 1.9886, "step": 33012 }, { "epoch": 1.0651407208148236, "grad_norm": 0.341796875, "learning_rate": 1.3845818650592103e-05, "loss": 1.946, "step": 33013 }, { "epoch": 1.06517298466862, "grad_norm": 0.34375, "learning_rate": 1.384504763598031e-05, "loss": 1.9847, "step": 33014 }, { "epoch": 1.0652052485224164, "grad_norm": 0.39453125, "learning_rate": 1.3844276624438159e-05, "loss": 1.9763, "step": 33015 }, { "epoch": 1.0652375123762128, "grad_norm": 0.34765625, "learning_rate": 1.3843505615967703e-05, "loss": 1.9809, "step": 33016 }, { "epoch": 1.065269776230009, "grad_norm": 0.375, "learning_rate": 1.384273461057098e-05, "loss": 1.9759, "step": 33017 }, { "epoch": 1.0653020400838054, "grad_norm": 0.34765625, "learning_rate": 1.3841963608250044e-05, "loss": 1.9995, "step": 33018 }, { "epoch": 1.0653343039376018, "grad_norm": 0.357421875, "learning_rate": 1.3841192609006942e-05, "loss": 1.9838, "step": 33019 }, { "epoch": 1.0653665677913982, "grad_norm": 0.345703125, "learning_rate": 1.3840421612843728e-05, "loss": 1.9701, "step": 33020 }, { "epoch": 1.0653988316451943, "grad_norm": 0.34765625, "learning_rate": 1.383965061976245e-05, "loss": 1.9954, "step": 33021 }, { "epoch": 1.0654310954989907, "grad_norm": 0.34375, "learning_rate": 1.383887962976516e-05, "loss": 1.956, "step": 33022 }, { "epoch": 1.0654633593527871, "grad_norm": 0.349609375, "learning_rate": 1.3838108642853898e-05, "loss": 1.9573, "step": 33023 }, { "epoch": 1.0654956232065835, "grad_norm": 0.35546875, "learning_rate": 1.3837337659030718e-05, "loss": 1.9547, "step": 33024 }, { "epoch": 1.0655278870603797, "grad_norm": 0.37890625, "learning_rate": 1.3836566678297668e-05, "loss": 1.9644, "step": 33025 }, { "epoch": 1.065560150914176, "grad_norm": 0.369140625, "learning_rate": 1.3835795700656803e-05, "loss": 1.9734, "step": 33026 }, { "epoch": 1.0655924147679725, "grad_norm": 0.37109375, "learning_rate": 1.3835024726110164e-05, "loss": 1.9693, "step": 33027 }, { "epoch": 1.065624678621769, "grad_norm": 0.359375, "learning_rate": 1.383425375465981e-05, "loss": 1.9867, "step": 33028 }, { "epoch": 1.065656942475565, "grad_norm": 0.353515625, "learning_rate": 1.383348278630778e-05, "loss": 1.9816, "step": 33029 }, { "epoch": 1.0656892063293615, "grad_norm": 0.380859375, "learning_rate": 1.3832711821056126e-05, "loss": 2.0039, "step": 33030 }, { "epoch": 1.0657214701831579, "grad_norm": 0.357421875, "learning_rate": 1.3831940858906897e-05, "loss": 1.9649, "step": 33031 }, { "epoch": 1.0657537340369543, "grad_norm": 0.35546875, "learning_rate": 1.3831169899862146e-05, "loss": 1.9704, "step": 33032 }, { "epoch": 1.0657859978907505, "grad_norm": 0.341796875, "learning_rate": 1.3830398943923916e-05, "loss": 1.9769, "step": 33033 }, { "epoch": 1.0658182617445469, "grad_norm": 0.34375, "learning_rate": 1.3829627991094266e-05, "loss": 1.9653, "step": 33034 }, { "epoch": 1.0658505255983433, "grad_norm": 0.349609375, "learning_rate": 1.3828857041375233e-05, "loss": 1.9805, "step": 33035 }, { "epoch": 1.0658827894521397, "grad_norm": 0.35546875, "learning_rate": 1.3828086094768873e-05, "loss": 1.927, "step": 33036 }, { "epoch": 1.065915053305936, "grad_norm": 0.345703125, "learning_rate": 1.3827315151277232e-05, "loss": 1.9531, "step": 33037 }, { "epoch": 1.0659473171597322, "grad_norm": 0.34765625, "learning_rate": 1.3826544210902358e-05, "loss": 2.0027, "step": 33038 }, { "epoch": 1.0659795810135286, "grad_norm": 0.35546875, "learning_rate": 1.3825773273646305e-05, "loss": 1.9422, "step": 33039 }, { "epoch": 1.066011844867325, "grad_norm": 0.361328125, "learning_rate": 1.3825002339511123e-05, "loss": 1.9749, "step": 33040 }, { "epoch": 1.0660441087211214, "grad_norm": 0.35546875, "learning_rate": 1.3824231408498853e-05, "loss": 1.9819, "step": 33041 }, { "epoch": 1.0660763725749176, "grad_norm": 0.365234375, "learning_rate": 1.3823460480611549e-05, "loss": 1.9714, "step": 33042 }, { "epoch": 1.066108636428714, "grad_norm": 0.349609375, "learning_rate": 1.3822689555851259e-05, "loss": 1.9662, "step": 33043 }, { "epoch": 1.0661409002825104, "grad_norm": 0.35546875, "learning_rate": 1.3821918634220034e-05, "loss": 1.9941, "step": 33044 }, { "epoch": 1.0661731641363068, "grad_norm": 0.349609375, "learning_rate": 1.3821147715719917e-05, "loss": 1.9501, "step": 33045 }, { "epoch": 1.066205427990103, "grad_norm": 0.359375, "learning_rate": 1.3820376800352968e-05, "loss": 1.9612, "step": 33046 }, { "epoch": 1.0662376918438994, "grad_norm": 0.36328125, "learning_rate": 1.3819605888121226e-05, "loss": 1.965, "step": 33047 }, { "epoch": 1.0662699556976958, "grad_norm": 0.349609375, "learning_rate": 1.381883497902674e-05, "loss": 1.9994, "step": 33048 }, { "epoch": 1.0663022195514922, "grad_norm": 0.35546875, "learning_rate": 1.3818064073071566e-05, "loss": 1.9717, "step": 33049 }, { "epoch": 1.0663344834052884, "grad_norm": 0.353515625, "learning_rate": 1.3817293170257747e-05, "loss": 1.9749, "step": 33050 }, { "epoch": 1.0663667472590848, "grad_norm": 0.359375, "learning_rate": 1.381652227058733e-05, "loss": 1.968, "step": 33051 }, { "epoch": 1.0663990111128812, "grad_norm": 0.357421875, "learning_rate": 1.3815751374062379e-05, "loss": 1.9612, "step": 33052 }, { "epoch": 1.0664312749666776, "grad_norm": 0.353515625, "learning_rate": 1.3814980480684924e-05, "loss": 1.9515, "step": 33053 }, { "epoch": 1.0664635388204737, "grad_norm": 0.359375, "learning_rate": 1.3814209590457022e-05, "loss": 1.9508, "step": 33054 }, { "epoch": 1.0664958026742701, "grad_norm": 0.357421875, "learning_rate": 1.3813438703380721e-05, "loss": 1.9265, "step": 33055 }, { "epoch": 1.0665280665280665, "grad_norm": 0.353515625, "learning_rate": 1.3812667819458068e-05, "loss": 1.9829, "step": 33056 }, { "epoch": 1.066560330381863, "grad_norm": 0.349609375, "learning_rate": 1.3811896938691116e-05, "loss": 1.9821, "step": 33057 }, { "epoch": 1.0665925942356593, "grad_norm": 0.3671875, "learning_rate": 1.381112606108192e-05, "loss": 1.992, "step": 33058 }, { "epoch": 1.0666248580894555, "grad_norm": 0.33984375, "learning_rate": 1.3810355186632511e-05, "loss": 1.9575, "step": 33059 }, { "epoch": 1.066657121943252, "grad_norm": 0.361328125, "learning_rate": 1.3809584315344949e-05, "loss": 1.9805, "step": 33060 }, { "epoch": 1.0666893857970483, "grad_norm": 0.3515625, "learning_rate": 1.3808813447221283e-05, "loss": 1.956, "step": 33061 }, { "epoch": 1.0667216496508447, "grad_norm": 0.39453125, "learning_rate": 1.380804258226356e-05, "loss": 2.0118, "step": 33062 }, { "epoch": 1.0667539135046409, "grad_norm": 0.345703125, "learning_rate": 1.3807271720473828e-05, "loss": 1.9321, "step": 33063 }, { "epoch": 1.0667861773584373, "grad_norm": 0.35546875, "learning_rate": 1.3806500861854144e-05, "loss": 1.9674, "step": 33064 }, { "epoch": 1.0668184412122337, "grad_norm": 0.345703125, "learning_rate": 1.3805730006406542e-05, "loss": 1.9749, "step": 33065 }, { "epoch": 1.06685070506603, "grad_norm": 0.349609375, "learning_rate": 1.380495915413308e-05, "loss": 1.9702, "step": 33066 }, { "epoch": 1.0668829689198263, "grad_norm": 0.341796875, "learning_rate": 1.3804188305035807e-05, "loss": 1.9864, "step": 33067 }, { "epoch": 1.0669152327736227, "grad_norm": 0.349609375, "learning_rate": 1.3803417459116767e-05, "loss": 2.0008, "step": 33068 }, { "epoch": 1.066947496627419, "grad_norm": 0.36328125, "learning_rate": 1.3802646616378013e-05, "loss": 1.9773, "step": 33069 }, { "epoch": 1.0669797604812155, "grad_norm": 0.3515625, "learning_rate": 1.3801875776821598e-05, "loss": 1.9824, "step": 33070 }, { "epoch": 1.0670120243350116, "grad_norm": 0.34375, "learning_rate": 1.380110494044956e-05, "loss": 1.9683, "step": 33071 }, { "epoch": 1.067044288188808, "grad_norm": 0.34765625, "learning_rate": 1.3800334107263953e-05, "loss": 1.9763, "step": 33072 }, { "epoch": 1.0670765520426044, "grad_norm": 0.337890625, "learning_rate": 1.3799563277266826e-05, "loss": 1.9647, "step": 33073 }, { "epoch": 1.0671088158964008, "grad_norm": 0.33984375, "learning_rate": 1.3798792450460226e-05, "loss": 1.977, "step": 33074 }, { "epoch": 1.067141079750197, "grad_norm": 0.34765625, "learning_rate": 1.3798021626846206e-05, "loss": 1.9973, "step": 33075 }, { "epoch": 1.0671733436039934, "grad_norm": 0.341796875, "learning_rate": 1.379725080642681e-05, "loss": 1.9967, "step": 33076 }, { "epoch": 1.0672056074577898, "grad_norm": 0.341796875, "learning_rate": 1.3796479989204094e-05, "loss": 1.9716, "step": 33077 }, { "epoch": 1.0672378713115862, "grad_norm": 0.34765625, "learning_rate": 1.3795709175180095e-05, "loss": 1.9655, "step": 33078 }, { "epoch": 1.0672701351653826, "grad_norm": 0.357421875, "learning_rate": 1.3794938364356868e-05, "loss": 1.9693, "step": 33079 }, { "epoch": 1.0673023990191788, "grad_norm": 0.3515625, "learning_rate": 1.3794167556736464e-05, "loss": 1.9606, "step": 33080 }, { "epoch": 1.0673346628729752, "grad_norm": 0.34375, "learning_rate": 1.3793396752320928e-05, "loss": 1.9687, "step": 33081 }, { "epoch": 1.0673669267267716, "grad_norm": 0.345703125, "learning_rate": 1.379262595111231e-05, "loss": 1.9824, "step": 33082 }, { "epoch": 1.067399190580568, "grad_norm": 0.33984375, "learning_rate": 1.3791855153112662e-05, "loss": 2.0167, "step": 33083 }, { "epoch": 1.0674314544343642, "grad_norm": 0.341796875, "learning_rate": 1.3791084358324026e-05, "loss": 1.9917, "step": 33084 }, { "epoch": 1.0674637182881606, "grad_norm": 0.3359375, "learning_rate": 1.3790313566748454e-05, "loss": 1.9861, "step": 33085 }, { "epoch": 1.067495982141957, "grad_norm": 0.3359375, "learning_rate": 1.3789542778387993e-05, "loss": 1.9651, "step": 33086 }, { "epoch": 1.0675282459957534, "grad_norm": 0.337890625, "learning_rate": 1.3788771993244695e-05, "loss": 1.9892, "step": 33087 }, { "epoch": 1.0675605098495495, "grad_norm": 0.330078125, "learning_rate": 1.3788001211320602e-05, "loss": 1.9744, "step": 33088 }, { "epoch": 1.067592773703346, "grad_norm": 0.33984375, "learning_rate": 1.3787230432617778e-05, "loss": 1.9533, "step": 33089 }, { "epoch": 1.0676250375571423, "grad_norm": 0.330078125, "learning_rate": 1.3786459657138252e-05, "loss": 1.9811, "step": 33090 }, { "epoch": 1.0676573014109387, "grad_norm": 0.3359375, "learning_rate": 1.3785688884884085e-05, "loss": 1.938, "step": 33091 }, { "epoch": 1.067689565264735, "grad_norm": 0.33984375, "learning_rate": 1.3784918115857316e-05, "loss": 1.9585, "step": 33092 }, { "epoch": 1.0677218291185313, "grad_norm": 0.34375, "learning_rate": 1.3784147350060005e-05, "loss": 1.9667, "step": 33093 }, { "epoch": 1.0677540929723277, "grad_norm": 0.341796875, "learning_rate": 1.3783376587494194e-05, "loss": 1.9614, "step": 33094 }, { "epoch": 1.067786356826124, "grad_norm": 0.365234375, "learning_rate": 1.3782605828161937e-05, "loss": 1.9804, "step": 33095 }, { "epoch": 1.0678186206799203, "grad_norm": 0.341796875, "learning_rate": 1.3781835072065273e-05, "loss": 1.9883, "step": 33096 }, { "epoch": 1.0678508845337167, "grad_norm": 0.33984375, "learning_rate": 1.3781064319206252e-05, "loss": 1.9952, "step": 33097 }, { "epoch": 1.067883148387513, "grad_norm": 0.349609375, "learning_rate": 1.378029356958693e-05, "loss": 1.9652, "step": 33098 }, { "epoch": 1.0679154122413095, "grad_norm": 0.349609375, "learning_rate": 1.3779522823209352e-05, "loss": 1.9844, "step": 33099 }, { "epoch": 1.0679476760951059, "grad_norm": 0.337890625, "learning_rate": 1.3778752080075565e-05, "loss": 2.008, "step": 33100 }, { "epoch": 1.067979939948902, "grad_norm": 0.3515625, "learning_rate": 1.3777981340187626e-05, "loss": 1.9744, "step": 33101 }, { "epoch": 1.0680122038026985, "grad_norm": 0.376953125, "learning_rate": 1.3777210603547567e-05, "loss": 1.9913, "step": 33102 }, { "epoch": 1.0680444676564949, "grad_norm": 0.353515625, "learning_rate": 1.3776439870157447e-05, "loss": 1.9664, "step": 33103 }, { "epoch": 1.0680767315102913, "grad_norm": 0.3359375, "learning_rate": 1.3775669140019314e-05, "loss": 1.9801, "step": 33104 }, { "epoch": 1.0681089953640874, "grad_norm": 0.353515625, "learning_rate": 1.3774898413135215e-05, "loss": 1.9882, "step": 33105 }, { "epoch": 1.0681412592178838, "grad_norm": 0.361328125, "learning_rate": 1.37741276895072e-05, "loss": 1.9959, "step": 33106 }, { "epoch": 1.0681735230716802, "grad_norm": 0.439453125, "learning_rate": 1.3773356969137318e-05, "loss": 1.9994, "step": 33107 }, { "epoch": 1.0682057869254766, "grad_norm": 0.69921875, "learning_rate": 1.3772586252027614e-05, "loss": 2.1349, "step": 33108 }, { "epoch": 1.0682380507792728, "grad_norm": 0.51171875, "learning_rate": 1.3771815538180137e-05, "loss": 2.1338, "step": 33109 }, { "epoch": 1.0682703146330692, "grad_norm": 0.51953125, "learning_rate": 1.3771044827596937e-05, "loss": 2.1492, "step": 33110 }, { "epoch": 1.0683025784868656, "grad_norm": 0.55078125, "learning_rate": 1.3770274120280062e-05, "loss": 2.1129, "step": 33111 }, { "epoch": 1.068334842340662, "grad_norm": 0.54296875, "learning_rate": 1.376950341623156e-05, "loss": 2.1658, "step": 33112 }, { "epoch": 1.0683671061944582, "grad_norm": 0.5546875, "learning_rate": 1.3768732715453484e-05, "loss": 2.1484, "step": 33113 }, { "epoch": 1.0683993700482546, "grad_norm": 0.51171875, "learning_rate": 1.3767962017947875e-05, "loss": 2.1303, "step": 33114 }, { "epoch": 1.068431633902051, "grad_norm": 0.4453125, "learning_rate": 1.3767191323716785e-05, "loss": 2.1463, "step": 33115 }, { "epoch": 1.0684638977558474, "grad_norm": 0.4921875, "learning_rate": 1.3766420632762259e-05, "loss": 2.1219, "step": 33116 }, { "epoch": 1.0684961616096436, "grad_norm": 0.43359375, "learning_rate": 1.376564994508635e-05, "loss": 2.1441, "step": 33117 }, { "epoch": 1.06852842546344, "grad_norm": 0.427734375, "learning_rate": 1.3764879260691105e-05, "loss": 2.1182, "step": 33118 }, { "epoch": 1.0685606893172364, "grad_norm": 0.4140625, "learning_rate": 1.3764108579578575e-05, "loss": 2.1482, "step": 33119 }, { "epoch": 1.0685929531710328, "grad_norm": 0.408203125, "learning_rate": 1.3763337901750802e-05, "loss": 2.1326, "step": 33120 }, { "epoch": 1.0686252170248292, "grad_norm": 0.390625, "learning_rate": 1.3762567227209836e-05, "loss": 2.1331, "step": 33121 }, { "epoch": 1.0686574808786253, "grad_norm": 0.39453125, "learning_rate": 1.3761796555957729e-05, "loss": 2.1324, "step": 33122 }, { "epoch": 1.0686897447324217, "grad_norm": 0.40234375, "learning_rate": 1.3761025887996525e-05, "loss": 2.1579, "step": 33123 }, { "epoch": 1.0687220085862181, "grad_norm": 0.384765625, "learning_rate": 1.3760255223328274e-05, "loss": 2.1444, "step": 33124 }, { "epoch": 1.0687542724400145, "grad_norm": 0.376953125, "learning_rate": 1.3759484561955031e-05, "loss": 2.1355, "step": 33125 }, { "epoch": 1.0687865362938107, "grad_norm": 0.388671875, "learning_rate": 1.3758713903878834e-05, "loss": 2.144, "step": 33126 }, { "epoch": 1.068818800147607, "grad_norm": 0.375, "learning_rate": 1.3757943249101733e-05, "loss": 2.1126, "step": 33127 }, { "epoch": 1.0688510640014035, "grad_norm": 0.384765625, "learning_rate": 1.3757172597625781e-05, "loss": 2.1321, "step": 33128 }, { "epoch": 1.0688833278552, "grad_norm": 0.376953125, "learning_rate": 1.3756401949453019e-05, "loss": 2.1501, "step": 33129 }, { "epoch": 1.068915591708996, "grad_norm": 0.375, "learning_rate": 1.3755631304585503e-05, "loss": 2.1364, "step": 33130 }, { "epoch": 1.0689478555627925, "grad_norm": 0.3671875, "learning_rate": 1.3754860663025287e-05, "loss": 2.1109, "step": 33131 }, { "epoch": 1.0689801194165889, "grad_norm": 0.375, "learning_rate": 1.37540900247744e-05, "loss": 2.1268, "step": 33132 }, { "epoch": 1.0690123832703853, "grad_norm": 0.359375, "learning_rate": 1.3753319389834903e-05, "loss": 2.1289, "step": 33133 }, { "epoch": 1.0690446471241815, "grad_norm": 0.373046875, "learning_rate": 1.3752548758208836e-05, "loss": 2.1205, "step": 33134 }, { "epoch": 1.0690769109779779, "grad_norm": 0.35546875, "learning_rate": 1.375177812989826e-05, "loss": 2.1332, "step": 33135 }, { "epoch": 1.0691091748317743, "grad_norm": 0.369140625, "learning_rate": 1.3751007504905213e-05, "loss": 2.1058, "step": 33136 }, { "epoch": 1.0691414386855707, "grad_norm": 0.359375, "learning_rate": 1.3750236883231755e-05, "loss": 2.1267, "step": 33137 }, { "epoch": 1.0691737025393668, "grad_norm": 0.3671875, "learning_rate": 1.3749466264879918e-05, "loss": 2.1399, "step": 33138 }, { "epoch": 1.0692059663931632, "grad_norm": 0.36328125, "learning_rate": 1.3748695649851755e-05, "loss": 2.149, "step": 33139 }, { "epoch": 1.0692382302469596, "grad_norm": 0.376953125, "learning_rate": 1.374792503814932e-05, "loss": 2.1222, "step": 33140 }, { "epoch": 1.069270494100756, "grad_norm": 0.36328125, "learning_rate": 1.3747154429774657e-05, "loss": 2.1265, "step": 33141 }, { "epoch": 1.0693027579545524, "grad_norm": 0.390625, "learning_rate": 1.3746383824729815e-05, "loss": 2.1526, "step": 33142 }, { "epoch": 1.0693350218083486, "grad_norm": 0.361328125, "learning_rate": 1.374561322301685e-05, "loss": 2.1664, "step": 33143 }, { "epoch": 1.069367285662145, "grad_norm": 0.357421875, "learning_rate": 1.3744842624637792e-05, "loss": 2.1457, "step": 33144 }, { "epoch": 1.0693995495159414, "grad_norm": 0.361328125, "learning_rate": 1.37440720295947e-05, "loss": 2.1027, "step": 33145 }, { "epoch": 1.0694318133697378, "grad_norm": 0.361328125, "learning_rate": 1.3743301437889625e-05, "loss": 2.1196, "step": 33146 }, { "epoch": 1.069464077223534, "grad_norm": 0.3515625, "learning_rate": 1.3742530849524611e-05, "loss": 2.1067, "step": 33147 }, { "epoch": 1.0694963410773304, "grad_norm": 0.361328125, "learning_rate": 1.3741760264501706e-05, "loss": 2.098, "step": 33148 }, { "epoch": 1.0695286049311268, "grad_norm": 0.349609375, "learning_rate": 1.3740989682822962e-05, "loss": 2.1288, "step": 33149 }, { "epoch": 1.0695608687849232, "grad_norm": 0.357421875, "learning_rate": 1.374021910449042e-05, "loss": 2.121, "step": 33150 }, { "epoch": 1.0695931326387194, "grad_norm": 0.3671875, "learning_rate": 1.3739448529506132e-05, "loss": 2.1439, "step": 33151 }, { "epoch": 1.0696253964925158, "grad_norm": 0.3515625, "learning_rate": 1.3738677957872146e-05, "loss": 2.1435, "step": 33152 }, { "epoch": 1.0696576603463122, "grad_norm": 0.3671875, "learning_rate": 1.373790738959051e-05, "loss": 2.0998, "step": 33153 }, { "epoch": 1.0696899242001086, "grad_norm": 0.353515625, "learning_rate": 1.3737136824663272e-05, "loss": 2.1288, "step": 33154 }, { "epoch": 1.0697221880539047, "grad_norm": 0.3515625, "learning_rate": 1.3736366263092484e-05, "loss": 2.1532, "step": 33155 }, { "epoch": 1.0697544519077011, "grad_norm": 0.35546875, "learning_rate": 1.3735595704880186e-05, "loss": 2.1366, "step": 33156 }, { "epoch": 1.0697867157614975, "grad_norm": 0.353515625, "learning_rate": 1.3734825150028428e-05, "loss": 2.1276, "step": 33157 }, { "epoch": 1.069818979615294, "grad_norm": 0.34765625, "learning_rate": 1.3734054598539263e-05, "loss": 2.1237, "step": 33158 }, { "epoch": 1.06985124346909, "grad_norm": 0.33984375, "learning_rate": 1.3733284050414733e-05, "loss": 2.1244, "step": 33159 }, { "epoch": 1.0698835073228865, "grad_norm": 0.3515625, "learning_rate": 1.373251350565689e-05, "loss": 2.1307, "step": 33160 }, { "epoch": 1.069915771176683, "grad_norm": 0.3671875, "learning_rate": 1.3731742964267785e-05, "loss": 2.132, "step": 33161 }, { "epoch": 1.0699480350304793, "grad_norm": 0.35546875, "learning_rate": 1.3730972426249457e-05, "loss": 2.1196, "step": 33162 }, { "epoch": 1.0699802988842757, "grad_norm": 0.37890625, "learning_rate": 1.373020189160396e-05, "loss": 2.1221, "step": 33163 }, { "epoch": 1.0700125627380719, "grad_norm": 0.3671875, "learning_rate": 1.372943136033334e-05, "loss": 2.1348, "step": 33164 }, { "epoch": 1.0700448265918683, "grad_norm": 0.353515625, "learning_rate": 1.3728660832439645e-05, "loss": 2.1107, "step": 33165 }, { "epoch": 1.0700770904456647, "grad_norm": 0.357421875, "learning_rate": 1.372789030792492e-05, "loss": 2.1267, "step": 33166 }, { "epoch": 1.070109354299461, "grad_norm": 0.36328125, "learning_rate": 1.3727119786791229e-05, "loss": 2.1241, "step": 33167 }, { "epoch": 1.0701416181532573, "grad_norm": 0.357421875, "learning_rate": 1.3726349269040597e-05, "loss": 2.1104, "step": 33168 }, { "epoch": 1.0701738820070537, "grad_norm": 0.361328125, "learning_rate": 1.3725578754675085e-05, "loss": 2.1191, "step": 33169 }, { "epoch": 1.07020614586085, "grad_norm": 0.365234375, "learning_rate": 1.3724808243696737e-05, "loss": 2.125, "step": 33170 }, { "epoch": 1.0702384097146465, "grad_norm": 0.35546875, "learning_rate": 1.3724037736107597e-05, "loss": 2.1478, "step": 33171 }, { "epoch": 1.0702706735684426, "grad_norm": 0.53515625, "learning_rate": 1.3723267231909722e-05, "loss": 2.0893, "step": 33172 }, { "epoch": 1.070302937422239, "grad_norm": 0.45703125, "learning_rate": 1.3722496731105164e-05, "loss": 2.0802, "step": 33173 }, { "epoch": 1.0703352012760354, "grad_norm": 0.396484375, "learning_rate": 1.3721726233695954e-05, "loss": 2.077, "step": 33174 }, { "epoch": 1.0703674651298318, "grad_norm": 0.4296875, "learning_rate": 1.3720955739684149e-05, "loss": 2.0626, "step": 33175 }, { "epoch": 1.070399728983628, "grad_norm": 0.4296875, "learning_rate": 1.3720185249071792e-05, "loss": 2.0569, "step": 33176 }, { "epoch": 1.0704319928374244, "grad_norm": 0.41015625, "learning_rate": 1.3719414761860938e-05, "loss": 2.0277, "step": 33177 }, { "epoch": 1.0704642566912208, "grad_norm": 0.3984375, "learning_rate": 1.3718644278053632e-05, "loss": 2.0657, "step": 33178 }, { "epoch": 1.0704965205450172, "grad_norm": 0.376953125, "learning_rate": 1.371787379765193e-05, "loss": 2.028, "step": 33179 }, { "epoch": 1.0705287843988134, "grad_norm": 0.40625, "learning_rate": 1.3717103320657859e-05, "loss": 2.046, "step": 33180 }, { "epoch": 1.0705610482526098, "grad_norm": 0.423828125, "learning_rate": 1.3716332847073483e-05, "loss": 2.0613, "step": 33181 }, { "epoch": 1.0705933121064062, "grad_norm": 0.3828125, "learning_rate": 1.3715562376900846e-05, "loss": 2.024, "step": 33182 }, { "epoch": 1.0706255759602026, "grad_norm": 0.361328125, "learning_rate": 1.3714791910141996e-05, "loss": 2.0562, "step": 33183 }, { "epoch": 1.070657839813999, "grad_norm": 0.3984375, "learning_rate": 1.371402144679898e-05, "loss": 2.0628, "step": 33184 }, { "epoch": 1.0706901036677952, "grad_norm": 0.3984375, "learning_rate": 1.371325098687385e-05, "loss": 2.0672, "step": 33185 }, { "epoch": 1.0707223675215916, "grad_norm": 0.3984375, "learning_rate": 1.3712480530368645e-05, "loss": 2.0589, "step": 33186 }, { "epoch": 1.070754631375388, "grad_norm": 0.3828125, "learning_rate": 1.371171007728542e-05, "loss": 2.0032, "step": 33187 }, { "epoch": 1.0707868952291844, "grad_norm": 0.375, "learning_rate": 1.371093962762622e-05, "loss": 1.9733, "step": 33188 }, { "epoch": 1.0708191590829805, "grad_norm": 0.373046875, "learning_rate": 1.3710169181393091e-05, "loss": 1.9998, "step": 33189 }, { "epoch": 1.070851422936777, "grad_norm": 0.359375, "learning_rate": 1.3709398738588082e-05, "loss": 1.9662, "step": 33190 }, { "epoch": 1.0708836867905733, "grad_norm": 0.3671875, "learning_rate": 1.3708628299213246e-05, "loss": 2.0155, "step": 33191 }, { "epoch": 1.0709159506443697, "grad_norm": 0.353515625, "learning_rate": 1.3707857863270623e-05, "loss": 1.9534, "step": 33192 }, { "epoch": 1.070948214498166, "grad_norm": 0.359375, "learning_rate": 1.3707087430762265e-05, "loss": 1.9743, "step": 33193 }, { "epoch": 1.0709804783519623, "grad_norm": 0.357421875, "learning_rate": 1.3706317001690216e-05, "loss": 1.9562, "step": 33194 }, { "epoch": 1.0710127422057587, "grad_norm": 0.365234375, "learning_rate": 1.3705546576056528e-05, "loss": 2.0014, "step": 33195 }, { "epoch": 1.071045006059555, "grad_norm": 0.345703125, "learning_rate": 1.3704776153863244e-05, "loss": 1.9901, "step": 33196 }, { "epoch": 1.0710772699133515, "grad_norm": 0.375, "learning_rate": 1.370400573511242e-05, "loss": 1.9867, "step": 33197 }, { "epoch": 1.0711095337671477, "grad_norm": 0.33984375, "learning_rate": 1.3703235319806095e-05, "loss": 1.9793, "step": 33198 }, { "epoch": 1.071141797620944, "grad_norm": 0.37109375, "learning_rate": 1.3702464907946317e-05, "loss": 1.9661, "step": 33199 }, { "epoch": 1.0711740614747405, "grad_norm": 0.365234375, "learning_rate": 1.3701694499535138e-05, "loss": 1.9632, "step": 33200 }, { "epoch": 1.0712063253285367, "grad_norm": 0.36328125, "learning_rate": 1.3700924094574603e-05, "loss": 1.9466, "step": 33201 }, { "epoch": 1.071238589182333, "grad_norm": 0.349609375, "learning_rate": 1.3700153693066762e-05, "loss": 1.9276, "step": 33202 }, { "epoch": 1.0712708530361295, "grad_norm": 0.34765625, "learning_rate": 1.3699383295013662e-05, "loss": 1.9877, "step": 33203 }, { "epoch": 1.0713031168899259, "grad_norm": 0.35546875, "learning_rate": 1.3698612900417347e-05, "loss": 1.9601, "step": 33204 }, { "epoch": 1.0713353807437223, "grad_norm": 0.33984375, "learning_rate": 1.3697842509279867e-05, "loss": 1.9687, "step": 33205 }, { "epoch": 1.0713676445975184, "grad_norm": 0.34375, "learning_rate": 1.369707212160327e-05, "loss": 1.9553, "step": 33206 }, { "epoch": 1.0713999084513148, "grad_norm": 0.349609375, "learning_rate": 1.3696301737389603e-05, "loss": 1.9622, "step": 33207 }, { "epoch": 1.0714321723051112, "grad_norm": 0.337890625, "learning_rate": 1.3695531356640911e-05, "loss": 1.9756, "step": 33208 }, { "epoch": 1.0714644361589076, "grad_norm": 0.34375, "learning_rate": 1.3694760979359247e-05, "loss": 1.9697, "step": 33209 }, { "epoch": 1.0714967000127038, "grad_norm": 0.349609375, "learning_rate": 1.369399060554666e-05, "loss": 1.9989, "step": 33210 }, { "epoch": 1.0715289638665002, "grad_norm": 0.35546875, "learning_rate": 1.369322023520519e-05, "loss": 1.9557, "step": 33211 }, { "epoch": 1.0715612277202966, "grad_norm": 0.3515625, "learning_rate": 1.3692449868336883e-05, "loss": 1.9794, "step": 33212 }, { "epoch": 1.071593491574093, "grad_norm": 0.341796875, "learning_rate": 1.3691679504943795e-05, "loss": 2.0035, "step": 33213 }, { "epoch": 1.0716257554278892, "grad_norm": 0.333984375, "learning_rate": 1.3690909145027969e-05, "loss": 1.9324, "step": 33214 }, { "epoch": 1.0716580192816856, "grad_norm": 0.3359375, "learning_rate": 1.3690138788591452e-05, "loss": 1.9696, "step": 33215 }, { "epoch": 1.071690283135482, "grad_norm": 0.34765625, "learning_rate": 1.3689368435636301e-05, "loss": 1.9951, "step": 33216 }, { "epoch": 1.0717225469892784, "grad_norm": 0.357421875, "learning_rate": 1.3688598086164546e-05, "loss": 1.9664, "step": 33217 }, { "epoch": 1.0717548108430748, "grad_norm": 0.3359375, "learning_rate": 1.3687827740178245e-05, "loss": 1.976, "step": 33218 }, { "epoch": 1.071787074696871, "grad_norm": 0.3515625, "learning_rate": 1.3687057397679445e-05, "loss": 2.0145, "step": 33219 }, { "epoch": 1.0718193385506674, "grad_norm": 0.3671875, "learning_rate": 1.3686287058670189e-05, "loss": 2.0073, "step": 33220 }, { "epoch": 1.0718516024044638, "grad_norm": 0.349609375, "learning_rate": 1.368551672315253e-05, "loss": 1.9475, "step": 33221 }, { "epoch": 1.0718838662582602, "grad_norm": 0.357421875, "learning_rate": 1.368474639112852e-05, "loss": 1.9774, "step": 33222 }, { "epoch": 1.0719161301120563, "grad_norm": 0.341796875, "learning_rate": 1.3683976062600191e-05, "loss": 1.9573, "step": 33223 }, { "epoch": 1.0719483939658527, "grad_norm": 0.341796875, "learning_rate": 1.36832057375696e-05, "loss": 1.98, "step": 33224 }, { "epoch": 1.0719806578196491, "grad_norm": 0.341796875, "learning_rate": 1.3682435416038794e-05, "loss": 1.9785, "step": 33225 }, { "epoch": 1.0720129216734455, "grad_norm": 0.341796875, "learning_rate": 1.368166509800982e-05, "loss": 1.9708, "step": 33226 }, { "epoch": 1.0720451855272417, "grad_norm": 0.337890625, "learning_rate": 1.3680894783484727e-05, "loss": 1.9784, "step": 33227 }, { "epoch": 1.072077449381038, "grad_norm": 0.341796875, "learning_rate": 1.3680124472465561e-05, "loss": 1.9292, "step": 33228 }, { "epoch": 1.0721097132348345, "grad_norm": 0.349609375, "learning_rate": 1.3679354164954366e-05, "loss": 1.9881, "step": 33229 }, { "epoch": 1.072141977088631, "grad_norm": 0.34765625, "learning_rate": 1.3678583860953192e-05, "loss": 1.9486, "step": 33230 }, { "epoch": 1.072174240942427, "grad_norm": 0.357421875, "learning_rate": 1.3677813560464087e-05, "loss": 1.933, "step": 33231 }, { "epoch": 1.0722065047962235, "grad_norm": 0.349609375, "learning_rate": 1.3677043263489096e-05, "loss": 1.9778, "step": 33232 }, { "epoch": 1.0722387686500199, "grad_norm": 0.349609375, "learning_rate": 1.367627297003027e-05, "loss": 1.9696, "step": 33233 }, { "epoch": 1.0722710325038163, "grad_norm": 0.337890625, "learning_rate": 1.3675502680089657e-05, "loss": 1.9963, "step": 33234 }, { "epoch": 1.0723032963576125, "grad_norm": 0.359375, "learning_rate": 1.3674732393669296e-05, "loss": 1.9824, "step": 33235 }, { "epoch": 1.0723355602114089, "grad_norm": 0.3984375, "learning_rate": 1.3673962110771243e-05, "loss": 1.9358, "step": 33236 }, { "epoch": 1.0723678240652053, "grad_norm": 0.333984375, "learning_rate": 1.3673191831397541e-05, "loss": 1.9629, "step": 33237 }, { "epoch": 1.0724000879190017, "grad_norm": 0.36328125, "learning_rate": 1.3672421555550237e-05, "loss": 1.9952, "step": 33238 }, { "epoch": 1.072432351772798, "grad_norm": 0.35546875, "learning_rate": 1.3671651283231381e-05, "loss": 1.9831, "step": 33239 }, { "epoch": 1.0724646156265942, "grad_norm": 0.35546875, "learning_rate": 1.3670881014443021e-05, "loss": 1.9827, "step": 33240 }, { "epoch": 1.0724968794803906, "grad_norm": 0.369140625, "learning_rate": 1.36701107491872e-05, "loss": 1.9545, "step": 33241 }, { "epoch": 1.072529143334187, "grad_norm": 0.33203125, "learning_rate": 1.3669340487465966e-05, "loss": 1.9728, "step": 33242 }, { "epoch": 1.0725614071879834, "grad_norm": 0.349609375, "learning_rate": 1.3668570229281368e-05, "loss": 1.9789, "step": 33243 }, { "epoch": 1.0725936710417796, "grad_norm": 0.365234375, "learning_rate": 1.3667799974635453e-05, "loss": 1.9751, "step": 33244 }, { "epoch": 1.072625934895576, "grad_norm": 0.341796875, "learning_rate": 1.3667029723530264e-05, "loss": 1.9565, "step": 33245 }, { "epoch": 1.0726581987493724, "grad_norm": 0.353515625, "learning_rate": 1.3666259475967862e-05, "loss": 1.985, "step": 33246 }, { "epoch": 1.0726904626031688, "grad_norm": 0.337890625, "learning_rate": 1.3665489231950277e-05, "loss": 1.9721, "step": 33247 }, { "epoch": 1.072722726456965, "grad_norm": 0.345703125, "learning_rate": 1.3664718991479565e-05, "loss": 1.9624, "step": 33248 }, { "epoch": 1.0727549903107614, "grad_norm": 0.341796875, "learning_rate": 1.3663948754557765e-05, "loss": 1.9751, "step": 33249 }, { "epoch": 1.0727872541645578, "grad_norm": 0.34375, "learning_rate": 1.3663178521186938e-05, "loss": 1.9797, "step": 33250 }, { "epoch": 1.0728195180183542, "grad_norm": 0.337890625, "learning_rate": 1.3662408291369122e-05, "loss": 1.9677, "step": 33251 }, { "epoch": 1.0728517818721504, "grad_norm": 0.337890625, "learning_rate": 1.3661638065106371e-05, "loss": 1.9807, "step": 33252 }, { "epoch": 1.0728840457259468, "grad_norm": 0.341796875, "learning_rate": 1.3660867842400722e-05, "loss": 1.9533, "step": 33253 }, { "epoch": 1.0729163095797432, "grad_norm": 0.341796875, "learning_rate": 1.3660097623254224e-05, "loss": 1.9632, "step": 33254 }, { "epoch": 1.0729485734335396, "grad_norm": 0.333984375, "learning_rate": 1.3659327407668931e-05, "loss": 1.962, "step": 33255 }, { "epoch": 1.0729808372873357, "grad_norm": 0.353515625, "learning_rate": 1.3658557195646885e-05, "loss": 1.9871, "step": 33256 }, { "epoch": 1.0730131011411321, "grad_norm": 0.341796875, "learning_rate": 1.3657786987190134e-05, "loss": 1.9876, "step": 33257 }, { "epoch": 1.0730453649949285, "grad_norm": 0.330078125, "learning_rate": 1.3657016782300733e-05, "loss": 1.9504, "step": 33258 }, { "epoch": 1.073077628848725, "grad_norm": 0.33984375, "learning_rate": 1.365624658098071e-05, "loss": 1.9651, "step": 33259 }, { "epoch": 1.0731098927025213, "grad_norm": 0.35546875, "learning_rate": 1.3655476383232128e-05, "loss": 1.9561, "step": 33260 }, { "epoch": 1.0731421565563175, "grad_norm": 0.333984375, "learning_rate": 1.365470618905703e-05, "loss": 1.9779, "step": 33261 }, { "epoch": 1.073174420410114, "grad_norm": 0.337890625, "learning_rate": 1.3653935998457463e-05, "loss": 1.9821, "step": 33262 }, { "epoch": 1.0732066842639103, "grad_norm": 0.33984375, "learning_rate": 1.3653165811435472e-05, "loss": 1.9515, "step": 33263 }, { "epoch": 1.0732389481177067, "grad_norm": 0.34375, "learning_rate": 1.365239562799311e-05, "loss": 1.9911, "step": 33264 }, { "epoch": 1.0732712119715029, "grad_norm": 0.333984375, "learning_rate": 1.3651625448132417e-05, "loss": 1.9875, "step": 33265 }, { "epoch": 1.0733034758252993, "grad_norm": 0.337890625, "learning_rate": 1.365085527185544e-05, "loss": 1.9974, "step": 33266 }, { "epoch": 1.0733357396790957, "grad_norm": 0.345703125, "learning_rate": 1.3650085099164231e-05, "loss": 1.9758, "step": 33267 }, { "epoch": 1.073368003532892, "grad_norm": 0.33203125, "learning_rate": 1.3649314930060834e-05, "loss": 1.9471, "step": 33268 }, { "epoch": 1.0734002673866883, "grad_norm": 0.337890625, "learning_rate": 1.3648544764547297e-05, "loss": 1.9737, "step": 33269 }, { "epoch": 1.0734325312404847, "grad_norm": 0.3359375, "learning_rate": 1.3647774602625669e-05, "loss": 1.9589, "step": 33270 }, { "epoch": 1.073464795094281, "grad_norm": 0.3359375, "learning_rate": 1.364700444429799e-05, "loss": 1.9786, "step": 33271 }, { "epoch": 1.0734970589480775, "grad_norm": 0.341796875, "learning_rate": 1.3646234289566312e-05, "loss": 1.9927, "step": 33272 }, { "epoch": 1.0735293228018736, "grad_norm": 0.341796875, "learning_rate": 1.3645464138432682e-05, "loss": 1.9893, "step": 33273 }, { "epoch": 1.07356158665567, "grad_norm": 0.33203125, "learning_rate": 1.3644693990899146e-05, "loss": 1.9735, "step": 33274 }, { "epoch": 1.0735938505094664, "grad_norm": 0.337890625, "learning_rate": 1.364392384696775e-05, "loss": 1.9638, "step": 33275 }, { "epoch": 1.0736261143632628, "grad_norm": 0.330078125, "learning_rate": 1.3643153706640546e-05, "loss": 1.9343, "step": 33276 }, { "epoch": 1.073658378217059, "grad_norm": 0.33984375, "learning_rate": 1.3642383569919575e-05, "loss": 1.9906, "step": 33277 }, { "epoch": 1.0736906420708554, "grad_norm": 0.3359375, "learning_rate": 1.3641613436806885e-05, "loss": 1.969, "step": 33278 }, { "epoch": 1.0737229059246518, "grad_norm": 0.3359375, "learning_rate": 1.3640843307304521e-05, "loss": 1.9788, "step": 33279 }, { "epoch": 1.0737551697784482, "grad_norm": 0.326171875, "learning_rate": 1.3640073181414535e-05, "loss": 1.9856, "step": 33280 }, { "epoch": 1.0737874336322446, "grad_norm": 0.359375, "learning_rate": 1.3639303059138968e-05, "loss": 1.993, "step": 33281 }, { "epoch": 1.0738196974860408, "grad_norm": 0.33203125, "learning_rate": 1.363853294047988e-05, "loss": 1.979, "step": 33282 }, { "epoch": 1.0738519613398372, "grad_norm": 0.34765625, "learning_rate": 1.3637762825439299e-05, "loss": 1.9878, "step": 33283 }, { "epoch": 1.0738842251936336, "grad_norm": 0.337890625, "learning_rate": 1.3636992714019284e-05, "loss": 1.9547, "step": 33284 }, { "epoch": 1.07391648904743, "grad_norm": 0.341796875, "learning_rate": 1.3636222606221877e-05, "loss": 1.9922, "step": 33285 }, { "epoch": 1.0739487529012262, "grad_norm": 0.3359375, "learning_rate": 1.3635452502049122e-05, "loss": 1.9815, "step": 33286 }, { "epoch": 1.0739810167550226, "grad_norm": 0.3359375, "learning_rate": 1.3634682401503076e-05, "loss": 1.9747, "step": 33287 }, { "epoch": 1.074013280608819, "grad_norm": 0.345703125, "learning_rate": 1.3633912304585783e-05, "loss": 1.9659, "step": 33288 }, { "epoch": 1.0740455444626154, "grad_norm": 0.34375, "learning_rate": 1.3633142211299284e-05, "loss": 1.9824, "step": 33289 }, { "epoch": 1.0740778083164115, "grad_norm": 0.33203125, "learning_rate": 1.3632372121645626e-05, "loss": 2.003, "step": 33290 }, { "epoch": 1.074110072170208, "grad_norm": 0.345703125, "learning_rate": 1.3631602035626855e-05, "loss": 1.982, "step": 33291 }, { "epoch": 1.0741423360240043, "grad_norm": 0.349609375, "learning_rate": 1.3630831953245026e-05, "loss": 2.0078, "step": 33292 }, { "epoch": 1.0741745998778007, "grad_norm": 0.36328125, "learning_rate": 1.3630061874502178e-05, "loss": 1.9755, "step": 33293 }, { "epoch": 1.074206863731597, "grad_norm": 0.341796875, "learning_rate": 1.3629291799400368e-05, "loss": 2.0009, "step": 33294 }, { "epoch": 1.0742391275853933, "grad_norm": 0.34375, "learning_rate": 1.362852172794163e-05, "loss": 1.9625, "step": 33295 }, { "epoch": 1.0742713914391897, "grad_norm": 0.34765625, "learning_rate": 1.3627751660128012e-05, "loss": 1.9623, "step": 33296 }, { "epoch": 1.074303655292986, "grad_norm": 0.3671875, "learning_rate": 1.3626981595961567e-05, "loss": 1.9851, "step": 33297 }, { "epoch": 1.0743359191467823, "grad_norm": 0.388671875, "learning_rate": 1.362621153544434e-05, "loss": 1.993, "step": 33298 }, { "epoch": 1.0743681830005787, "grad_norm": 0.341796875, "learning_rate": 1.3625441478578376e-05, "loss": 1.9737, "step": 33299 }, { "epoch": 1.074400446854375, "grad_norm": 0.33984375, "learning_rate": 1.3624671425365727e-05, "loss": 1.9975, "step": 33300 }, { "epoch": 1.0744327107081715, "grad_norm": 0.359375, "learning_rate": 1.362390137580843e-05, "loss": 1.9689, "step": 33301 }, { "epoch": 1.0744649745619679, "grad_norm": 0.359375, "learning_rate": 1.3623131329908538e-05, "loss": 1.9572, "step": 33302 }, { "epoch": 1.074497238415764, "grad_norm": 0.333984375, "learning_rate": 1.3622361287668096e-05, "loss": 1.9547, "step": 33303 }, { "epoch": 1.0745295022695605, "grad_norm": 0.33984375, "learning_rate": 1.3621591249089153e-05, "loss": 1.9579, "step": 33304 }, { "epoch": 1.0745617661233569, "grad_norm": 0.337890625, "learning_rate": 1.362082121417375e-05, "loss": 1.9473, "step": 33305 }, { "epoch": 1.0745940299771533, "grad_norm": 0.33984375, "learning_rate": 1.3620051182923944e-05, "loss": 1.946, "step": 33306 }, { "epoch": 1.0746262938309494, "grad_norm": 0.359375, "learning_rate": 1.3619281155341768e-05, "loss": 1.9649, "step": 33307 }, { "epoch": 1.0746585576847458, "grad_norm": 0.359375, "learning_rate": 1.361851113142928e-05, "loss": 1.9818, "step": 33308 }, { "epoch": 1.0746908215385422, "grad_norm": 0.341796875, "learning_rate": 1.361774111118852e-05, "loss": 1.965, "step": 33309 }, { "epoch": 1.0747230853923386, "grad_norm": 0.3828125, "learning_rate": 1.3616971094621535e-05, "loss": 1.975, "step": 33310 }, { "epoch": 1.0747553492461348, "grad_norm": 0.353515625, "learning_rate": 1.3616201081730375e-05, "loss": 1.9598, "step": 33311 }, { "epoch": 1.0747876130999312, "grad_norm": 0.388671875, "learning_rate": 1.3615431072517086e-05, "loss": 1.9759, "step": 33312 }, { "epoch": 1.0748198769537276, "grad_norm": 0.353515625, "learning_rate": 1.3614661066983711e-05, "loss": 1.9498, "step": 33313 }, { "epoch": 1.074852140807524, "grad_norm": 0.3515625, "learning_rate": 1.3613891065132301e-05, "loss": 1.9341, "step": 33314 }, { "epoch": 1.0748844046613202, "grad_norm": 0.34765625, "learning_rate": 1.3613121066964898e-05, "loss": 1.9923, "step": 33315 }, { "epoch": 1.0749166685151166, "grad_norm": 0.3359375, "learning_rate": 1.3612351072483551e-05, "loss": 1.9683, "step": 33316 }, { "epoch": 1.074948932368913, "grad_norm": 0.36328125, "learning_rate": 1.3611581081690307e-05, "loss": 1.9753, "step": 33317 }, { "epoch": 1.0749811962227094, "grad_norm": 0.373046875, "learning_rate": 1.3610811094587214e-05, "loss": 1.964, "step": 33318 }, { "epoch": 1.0750134600765056, "grad_norm": 0.33984375, "learning_rate": 1.3610041111176314e-05, "loss": 1.9749, "step": 33319 }, { "epoch": 1.075045723930302, "grad_norm": 0.353515625, "learning_rate": 1.3609271131459653e-05, "loss": 1.9795, "step": 33320 }, { "epoch": 1.0750779877840984, "grad_norm": 0.3515625, "learning_rate": 1.3608501155439285e-05, "loss": 1.9996, "step": 33321 }, { "epoch": 1.0751102516378948, "grad_norm": 0.337890625, "learning_rate": 1.360773118311725e-05, "loss": 1.9766, "step": 33322 }, { "epoch": 1.0751425154916912, "grad_norm": 0.345703125, "learning_rate": 1.3606961214495588e-05, "loss": 1.9721, "step": 33323 }, { "epoch": 1.0751747793454873, "grad_norm": 0.34765625, "learning_rate": 1.360619124957637e-05, "loss": 1.9934, "step": 33324 }, { "epoch": 1.0752070431992837, "grad_norm": 0.33984375, "learning_rate": 1.3605421288361616e-05, "loss": 1.9719, "step": 33325 }, { "epoch": 1.0752393070530801, "grad_norm": 0.333984375, "learning_rate": 1.3604651330853383e-05, "loss": 1.9997, "step": 33326 }, { "epoch": 1.0752715709068765, "grad_norm": 0.34375, "learning_rate": 1.3603881377053715e-05, "loss": 1.9587, "step": 33327 }, { "epoch": 1.0753038347606727, "grad_norm": 0.337890625, "learning_rate": 1.3603111426964659e-05, "loss": 1.9881, "step": 33328 }, { "epoch": 1.075336098614469, "grad_norm": 0.341796875, "learning_rate": 1.3602341480588263e-05, "loss": 2.0071, "step": 33329 }, { "epoch": 1.0753683624682655, "grad_norm": 0.345703125, "learning_rate": 1.3601571537926584e-05, "loss": 1.9756, "step": 33330 }, { "epoch": 1.075400626322062, "grad_norm": 0.33984375, "learning_rate": 1.3600801598981647e-05, "loss": 1.9547, "step": 33331 }, { "epoch": 1.075432890175858, "grad_norm": 0.37109375, "learning_rate": 1.3600031663755509e-05, "loss": 1.9824, "step": 33332 }, { "epoch": 1.0754651540296545, "grad_norm": 0.35546875, "learning_rate": 1.3599261732250214e-05, "loss": 1.9665, "step": 33333 }, { "epoch": 1.0754974178834509, "grad_norm": 0.369140625, "learning_rate": 1.3598491804467813e-05, "loss": 1.9727, "step": 33334 }, { "epoch": 1.0755296817372473, "grad_norm": 0.357421875, "learning_rate": 1.3597721880410348e-05, "loss": 1.9917, "step": 33335 }, { "epoch": 1.0755619455910435, "grad_norm": 0.357421875, "learning_rate": 1.3596951960079868e-05, "loss": 1.9537, "step": 33336 }, { "epoch": 1.0755942094448399, "grad_norm": 0.36328125, "learning_rate": 1.3596182043478423e-05, "loss": 1.9298, "step": 33337 }, { "epoch": 1.0756264732986363, "grad_norm": 0.333984375, "learning_rate": 1.3595412130608049e-05, "loss": 1.9799, "step": 33338 }, { "epoch": 1.0756587371524327, "grad_norm": 0.35546875, "learning_rate": 1.35946422214708e-05, "loss": 1.9769, "step": 33339 }, { "epoch": 1.0756910010062288, "grad_norm": 0.353515625, "learning_rate": 1.3593872316068717e-05, "loss": 1.9825, "step": 33340 }, { "epoch": 1.0757232648600252, "grad_norm": 0.3515625, "learning_rate": 1.359310241440385e-05, "loss": 1.9713, "step": 33341 }, { "epoch": 1.0757555287138216, "grad_norm": 0.353515625, "learning_rate": 1.3592332516478244e-05, "loss": 1.9751, "step": 33342 }, { "epoch": 1.075787792567618, "grad_norm": 0.34375, "learning_rate": 1.359156262229395e-05, "loss": 1.9744, "step": 33343 }, { "epoch": 1.0758200564214144, "grad_norm": 0.353515625, "learning_rate": 1.3590792731853009e-05, "loss": 1.9756, "step": 33344 }, { "epoch": 1.0758523202752106, "grad_norm": 0.353515625, "learning_rate": 1.3590022845157464e-05, "loss": 1.974, "step": 33345 }, { "epoch": 1.075884584129007, "grad_norm": 0.345703125, "learning_rate": 1.3589252962209368e-05, "loss": 1.9673, "step": 33346 }, { "epoch": 1.0759168479828034, "grad_norm": 0.349609375, "learning_rate": 1.3588483083010763e-05, "loss": 2.0126, "step": 33347 }, { "epoch": 1.0759491118365998, "grad_norm": 0.341796875, "learning_rate": 1.3587713207563698e-05, "loss": 1.9854, "step": 33348 }, { "epoch": 1.075981375690396, "grad_norm": 0.3515625, "learning_rate": 1.3586943335870222e-05, "loss": 1.9954, "step": 33349 }, { "epoch": 1.0760136395441924, "grad_norm": 0.35546875, "learning_rate": 1.3586173467932372e-05, "loss": 1.9736, "step": 33350 }, { "epoch": 1.0760459033979888, "grad_norm": 0.33984375, "learning_rate": 1.35854036037522e-05, "loss": 1.964, "step": 33351 }, { "epoch": 1.0760781672517852, "grad_norm": 0.3515625, "learning_rate": 1.3584633743331753e-05, "loss": 1.971, "step": 33352 }, { "epoch": 1.0761104311055814, "grad_norm": 0.388671875, "learning_rate": 1.3583863886673074e-05, "loss": 1.9952, "step": 33353 }, { "epoch": 1.0761426949593778, "grad_norm": 0.328125, "learning_rate": 1.358309403377821e-05, "loss": 1.9683, "step": 33354 }, { "epoch": 1.0761749588131742, "grad_norm": 0.357421875, "learning_rate": 1.3582324184649213e-05, "loss": 1.9606, "step": 33355 }, { "epoch": 1.0762072226669706, "grad_norm": 0.35546875, "learning_rate": 1.3581554339288117e-05, "loss": 1.997, "step": 33356 }, { "epoch": 1.0762394865207667, "grad_norm": 0.361328125, "learning_rate": 1.358078449769698e-05, "loss": 2.009, "step": 33357 }, { "epoch": 1.0762717503745631, "grad_norm": 0.35546875, "learning_rate": 1.3580014659877838e-05, "loss": 1.9593, "step": 33358 }, { "epoch": 1.0763040142283595, "grad_norm": 0.361328125, "learning_rate": 1.3579244825832745e-05, "loss": 1.9637, "step": 33359 }, { "epoch": 1.076336278082156, "grad_norm": 0.40234375, "learning_rate": 1.3578474995563742e-05, "loss": 2.0039, "step": 33360 }, { "epoch": 1.076368541935952, "grad_norm": 0.38671875, "learning_rate": 1.3577705169072886e-05, "loss": 1.9861, "step": 33361 }, { "epoch": 1.0764008057897485, "grad_norm": 0.396484375, "learning_rate": 1.3576935346362207e-05, "loss": 2.0061, "step": 33362 }, { "epoch": 1.076433069643545, "grad_norm": 0.412109375, "learning_rate": 1.357616552743376e-05, "loss": 1.9969, "step": 33363 }, { "epoch": 1.0764653334973413, "grad_norm": 0.388671875, "learning_rate": 1.3575395712289589e-05, "loss": 1.9715, "step": 33364 }, { "epoch": 1.0764975973511377, "grad_norm": 0.39453125, "learning_rate": 1.3574625900931738e-05, "loss": 2.0215, "step": 33365 }, { "epoch": 1.0765298612049339, "grad_norm": 0.3984375, "learning_rate": 1.3573856093362257e-05, "loss": 1.9488, "step": 33366 }, { "epoch": 1.0765621250587303, "grad_norm": 0.38671875, "learning_rate": 1.3573086289583198e-05, "loss": 1.9959, "step": 33367 }, { "epoch": 1.0765943889125267, "grad_norm": 0.369140625, "learning_rate": 1.3572316489596593e-05, "loss": 1.9673, "step": 33368 }, { "epoch": 1.076626652766323, "grad_norm": 0.40234375, "learning_rate": 1.357154669340449e-05, "loss": 1.9831, "step": 33369 }, { "epoch": 1.0766589166201193, "grad_norm": 0.400390625, "learning_rate": 1.3570776901008943e-05, "loss": 1.961, "step": 33370 }, { "epoch": 1.0766911804739157, "grad_norm": 0.365234375, "learning_rate": 1.3570007112411995e-05, "loss": 1.9788, "step": 33371 }, { "epoch": 1.076723444327712, "grad_norm": 0.361328125, "learning_rate": 1.3569237327615691e-05, "loss": 1.9662, "step": 33372 }, { "epoch": 1.0767557081815085, "grad_norm": 0.392578125, "learning_rate": 1.3568467546622084e-05, "loss": 1.9489, "step": 33373 }, { "epoch": 1.0767879720353046, "grad_norm": 0.431640625, "learning_rate": 1.3567697769433205e-05, "loss": 1.9766, "step": 33374 }, { "epoch": 1.076820235889101, "grad_norm": 0.37890625, "learning_rate": 1.356692799605111e-05, "loss": 1.9874, "step": 33375 }, { "epoch": 1.0768524997428974, "grad_norm": 0.4140625, "learning_rate": 1.3566158226477842e-05, "loss": 1.9841, "step": 33376 }, { "epoch": 1.0768847635966938, "grad_norm": 0.388671875, "learning_rate": 1.356538846071545e-05, "loss": 1.9641, "step": 33377 }, { "epoch": 1.07691702745049, "grad_norm": 0.3828125, "learning_rate": 1.3564618698765974e-05, "loss": 1.936, "step": 33378 }, { "epoch": 1.0769492913042864, "grad_norm": 0.3828125, "learning_rate": 1.3563848940631472e-05, "loss": 1.9357, "step": 33379 }, { "epoch": 1.0769815551580828, "grad_norm": 0.37109375, "learning_rate": 1.3563079186313975e-05, "loss": 1.9518, "step": 33380 }, { "epoch": 1.0770138190118792, "grad_norm": 0.37890625, "learning_rate": 1.3562309435815536e-05, "loss": 1.9826, "step": 33381 }, { "epoch": 1.0770460828656754, "grad_norm": 0.373046875, "learning_rate": 1.3561539689138202e-05, "loss": 1.9335, "step": 33382 }, { "epoch": 1.0770783467194718, "grad_norm": 0.359375, "learning_rate": 1.3560769946284015e-05, "loss": 1.9318, "step": 33383 }, { "epoch": 1.0771106105732682, "grad_norm": 0.37109375, "learning_rate": 1.3560000207255024e-05, "loss": 1.944, "step": 33384 }, { "epoch": 1.0771428744270646, "grad_norm": 0.36328125, "learning_rate": 1.3559230472053277e-05, "loss": 1.9549, "step": 33385 }, { "epoch": 1.077175138280861, "grad_norm": 0.353515625, "learning_rate": 1.3558460740680811e-05, "loss": 1.9202, "step": 33386 }, { "epoch": 1.0772074021346572, "grad_norm": 0.341796875, "learning_rate": 1.355769101313968e-05, "loss": 1.9712, "step": 33387 }, { "epoch": 1.0772396659884536, "grad_norm": 0.35546875, "learning_rate": 1.3556921289431927e-05, "loss": 1.9397, "step": 33388 }, { "epoch": 1.07727192984225, "grad_norm": 0.359375, "learning_rate": 1.3556151569559596e-05, "loss": 1.8902, "step": 33389 }, { "epoch": 1.0773041936960464, "grad_norm": 0.359375, "learning_rate": 1.3555381853524737e-05, "loss": 1.9572, "step": 33390 }, { "epoch": 1.0773364575498425, "grad_norm": 0.36328125, "learning_rate": 1.3554612141329396e-05, "loss": 1.9584, "step": 33391 }, { "epoch": 1.077368721403639, "grad_norm": 0.353515625, "learning_rate": 1.3553842432975611e-05, "loss": 1.9305, "step": 33392 }, { "epoch": 1.0774009852574353, "grad_norm": 0.34375, "learning_rate": 1.3553072728465437e-05, "loss": 1.9003, "step": 33393 }, { "epoch": 1.0774332491112317, "grad_norm": 0.34765625, "learning_rate": 1.3552303027800913e-05, "loss": 1.9228, "step": 33394 }, { "epoch": 1.077465512965028, "grad_norm": 0.353515625, "learning_rate": 1.3551533330984086e-05, "loss": 1.9316, "step": 33395 }, { "epoch": 1.0774977768188243, "grad_norm": 0.361328125, "learning_rate": 1.3550763638017006e-05, "loss": 1.8635, "step": 33396 }, { "epoch": 1.0775300406726207, "grad_norm": 0.359375, "learning_rate": 1.3549993948901717e-05, "loss": 1.9333, "step": 33397 }, { "epoch": 1.077562304526417, "grad_norm": 0.3984375, "learning_rate": 1.354922426364026e-05, "loss": 1.9249, "step": 33398 }, { "epoch": 1.0775945683802135, "grad_norm": 0.359375, "learning_rate": 1.3548454582234685e-05, "loss": 1.9291, "step": 33399 }, { "epoch": 1.0776268322340097, "grad_norm": 0.373046875, "learning_rate": 1.3547684904687038e-05, "loss": 1.9526, "step": 33400 }, { "epoch": 1.077659096087806, "grad_norm": 0.396484375, "learning_rate": 1.3546915230999358e-05, "loss": 1.9855, "step": 33401 }, { "epoch": 1.0776913599416025, "grad_norm": 0.390625, "learning_rate": 1.35461455611737e-05, "loss": 1.9651, "step": 33402 }, { "epoch": 1.0777236237953987, "grad_norm": 0.36328125, "learning_rate": 1.3545375895212113e-05, "loss": 1.969, "step": 33403 }, { "epoch": 1.077755887649195, "grad_norm": 0.357421875, "learning_rate": 1.3544606233116629e-05, "loss": 1.9562, "step": 33404 }, { "epoch": 1.0777881515029915, "grad_norm": 0.3671875, "learning_rate": 1.3543836574889299e-05, "loss": 1.98, "step": 33405 }, { "epoch": 1.0778204153567879, "grad_norm": 0.359375, "learning_rate": 1.3543066920532167e-05, "loss": 1.9623, "step": 33406 }, { "epoch": 1.0778526792105843, "grad_norm": 0.359375, "learning_rate": 1.3542297270047283e-05, "loss": 1.9981, "step": 33407 }, { "epoch": 1.0778849430643804, "grad_norm": 0.3828125, "learning_rate": 1.3541527623436693e-05, "loss": 1.9498, "step": 33408 }, { "epoch": 1.0779172069181768, "grad_norm": 0.3515625, "learning_rate": 1.3540757980702446e-05, "loss": 1.977, "step": 33409 }, { "epoch": 1.0779494707719732, "grad_norm": 0.36328125, "learning_rate": 1.3539988341846573e-05, "loss": 1.9878, "step": 33410 }, { "epoch": 1.0779817346257696, "grad_norm": 0.3515625, "learning_rate": 1.3539218706871127e-05, "loss": 1.9857, "step": 33411 }, { "epoch": 1.0780139984795658, "grad_norm": 0.359375, "learning_rate": 1.3538449075778159e-05, "loss": 1.9371, "step": 33412 }, { "epoch": 1.0780462623333622, "grad_norm": 0.37890625, "learning_rate": 1.353767944856971e-05, "loss": 1.9368, "step": 33413 }, { "epoch": 1.0780785261871586, "grad_norm": 0.3515625, "learning_rate": 1.3536909825247824e-05, "loss": 1.9732, "step": 33414 }, { "epoch": 1.078110790040955, "grad_norm": 0.369140625, "learning_rate": 1.3536140205814558e-05, "loss": 1.9596, "step": 33415 }, { "epoch": 1.0781430538947512, "grad_norm": 0.365234375, "learning_rate": 1.3535370590271937e-05, "loss": 1.9813, "step": 33416 }, { "epoch": 1.0781753177485476, "grad_norm": 0.359375, "learning_rate": 1.353460097862202e-05, "loss": 2.0203, "step": 33417 }, { "epoch": 1.078207581602344, "grad_norm": 0.376953125, "learning_rate": 1.3533831370866851e-05, "loss": 1.9146, "step": 33418 }, { "epoch": 1.0782398454561404, "grad_norm": 0.365234375, "learning_rate": 1.3533061767008475e-05, "loss": 1.9852, "step": 33419 }, { "epoch": 1.0782721093099368, "grad_norm": 0.37109375, "learning_rate": 1.3532292167048937e-05, "loss": 1.9761, "step": 33420 }, { "epoch": 1.078304373163733, "grad_norm": 0.369140625, "learning_rate": 1.3531522570990284e-05, "loss": 2.0003, "step": 33421 }, { "epoch": 1.0783366370175294, "grad_norm": 0.361328125, "learning_rate": 1.3530752978834557e-05, "loss": 2.0008, "step": 33422 }, { "epoch": 1.0783689008713258, "grad_norm": 0.3515625, "learning_rate": 1.3529983390583805e-05, "loss": 1.962, "step": 33423 }, { "epoch": 1.0784011647251222, "grad_norm": 0.373046875, "learning_rate": 1.3529213806240072e-05, "loss": 1.914, "step": 33424 }, { "epoch": 1.0784334285789183, "grad_norm": 0.3359375, "learning_rate": 1.3528444225805404e-05, "loss": 1.9546, "step": 33425 }, { "epoch": 1.0784656924327147, "grad_norm": 0.359375, "learning_rate": 1.3527674649281847e-05, "loss": 1.9682, "step": 33426 }, { "epoch": 1.0784979562865111, "grad_norm": 0.365234375, "learning_rate": 1.3526905076671448e-05, "loss": 1.9988, "step": 33427 }, { "epoch": 1.0785302201403075, "grad_norm": 0.36328125, "learning_rate": 1.3526135507976248e-05, "loss": 1.9162, "step": 33428 }, { "epoch": 1.0785624839941037, "grad_norm": 0.361328125, "learning_rate": 1.3525365943198294e-05, "loss": 1.9786, "step": 33429 }, { "epoch": 1.0785947478479, "grad_norm": 0.365234375, "learning_rate": 1.3524596382339632e-05, "loss": 1.9665, "step": 33430 }, { "epoch": 1.0786270117016965, "grad_norm": 0.37890625, "learning_rate": 1.3523826825402308e-05, "loss": 1.9548, "step": 33431 }, { "epoch": 1.078659275555493, "grad_norm": 0.365234375, "learning_rate": 1.3523057272388366e-05, "loss": 2.001, "step": 33432 }, { "epoch": 1.078691539409289, "grad_norm": 0.376953125, "learning_rate": 1.3522287723299854e-05, "loss": 1.9522, "step": 33433 }, { "epoch": 1.0787238032630855, "grad_norm": 0.365234375, "learning_rate": 1.3521518178138815e-05, "loss": 1.9876, "step": 33434 }, { "epoch": 1.0787560671168819, "grad_norm": 0.392578125, "learning_rate": 1.352074863690729e-05, "loss": 1.963, "step": 33435 }, { "epoch": 1.0787883309706783, "grad_norm": 0.408203125, "learning_rate": 1.3519979099607334e-05, "loss": 1.9523, "step": 33436 }, { "epoch": 1.0788205948244745, "grad_norm": 0.40234375, "learning_rate": 1.3519209566240982e-05, "loss": 1.9546, "step": 33437 }, { "epoch": 1.0788528586782709, "grad_norm": 0.36328125, "learning_rate": 1.3518440036810285e-05, "loss": 1.9483, "step": 33438 }, { "epoch": 1.0788851225320673, "grad_norm": 0.37890625, "learning_rate": 1.3517670511317296e-05, "loss": 1.9382, "step": 33439 }, { "epoch": 1.0789173863858637, "grad_norm": 0.375, "learning_rate": 1.3516900989764046e-05, "loss": 2.009, "step": 33440 }, { "epoch": 1.07894965023966, "grad_norm": 0.365234375, "learning_rate": 1.3516131472152586e-05, "loss": 2.0013, "step": 33441 }, { "epoch": 1.0789819140934562, "grad_norm": 0.34765625, "learning_rate": 1.351536195848496e-05, "loss": 1.9758, "step": 33442 }, { "epoch": 1.0790141779472526, "grad_norm": 0.3671875, "learning_rate": 1.3514592448763213e-05, "loss": 1.9811, "step": 33443 }, { "epoch": 1.079046441801049, "grad_norm": 0.359375, "learning_rate": 1.3513822942989394e-05, "loss": 1.9692, "step": 33444 }, { "epoch": 1.0790787056548454, "grad_norm": 0.35546875, "learning_rate": 1.3513053441165553e-05, "loss": 1.9588, "step": 33445 }, { "epoch": 1.0791109695086416, "grad_norm": 0.353515625, "learning_rate": 1.3512283943293721e-05, "loss": 1.9807, "step": 33446 }, { "epoch": 1.079143233362438, "grad_norm": 0.6171875, "learning_rate": 1.351151444937595e-05, "loss": 1.9597, "step": 33447 }, { "epoch": 1.0791754972162344, "grad_norm": 0.98046875, "learning_rate": 1.3510744959414283e-05, "loss": 1.9729, "step": 33448 }, { "epoch": 1.0792077610700308, "grad_norm": 0.74609375, "learning_rate": 1.3509975473410771e-05, "loss": 1.9888, "step": 33449 }, { "epoch": 1.079240024923827, "grad_norm": 0.66015625, "learning_rate": 1.3509205991367456e-05, "loss": 1.9445, "step": 33450 }, { "epoch": 1.0792722887776234, "grad_norm": 0.71875, "learning_rate": 1.350843651328639e-05, "loss": 1.9823, "step": 33451 }, { "epoch": 1.0793045526314198, "grad_norm": 0.5859375, "learning_rate": 1.3507667039169605e-05, "loss": 1.9859, "step": 33452 }, { "epoch": 1.0793368164852162, "grad_norm": 0.53515625, "learning_rate": 1.3506897569019147e-05, "loss": 2.0034, "step": 33453 }, { "epoch": 1.0793690803390124, "grad_norm": 0.52734375, "learning_rate": 1.350612810283707e-05, "loss": 1.9905, "step": 33454 }, { "epoch": 1.0794013441928088, "grad_norm": 0.53125, "learning_rate": 1.3505358640625415e-05, "loss": 2.004, "step": 33455 }, { "epoch": 1.0794336080466052, "grad_norm": 0.484375, "learning_rate": 1.3504589182386229e-05, "loss": 1.9834, "step": 33456 }, { "epoch": 1.0794658719004016, "grad_norm": 0.54296875, "learning_rate": 1.3503819728121557e-05, "loss": 1.9553, "step": 33457 }, { "epoch": 1.0794981357541977, "grad_norm": 0.466796875, "learning_rate": 1.350305027783344e-05, "loss": 1.9437, "step": 33458 }, { "epoch": 1.0795303996079941, "grad_norm": 0.458984375, "learning_rate": 1.3502280831523928e-05, "loss": 1.9519, "step": 33459 }, { "epoch": 1.0795626634617905, "grad_norm": 0.44921875, "learning_rate": 1.350151138919506e-05, "loss": 1.9438, "step": 33460 }, { "epoch": 1.079594927315587, "grad_norm": 0.4296875, "learning_rate": 1.3500741950848886e-05, "loss": 1.9684, "step": 33461 }, { "epoch": 1.0796271911693833, "grad_norm": 0.431640625, "learning_rate": 1.349997251648745e-05, "loss": 1.951, "step": 33462 }, { "epoch": 1.0796594550231795, "grad_norm": 0.40234375, "learning_rate": 1.3499203086112801e-05, "loss": 1.9672, "step": 33463 }, { "epoch": 1.079691718876976, "grad_norm": 0.40625, "learning_rate": 1.3498433659726974e-05, "loss": 1.9753, "step": 33464 }, { "epoch": 1.0797239827307723, "grad_norm": 0.427734375, "learning_rate": 1.3497664237332023e-05, "loss": 1.9338, "step": 33465 }, { "epoch": 1.0797562465845687, "grad_norm": 0.390625, "learning_rate": 1.3496894818929988e-05, "loss": 1.9686, "step": 33466 }, { "epoch": 1.0797885104383649, "grad_norm": 0.390625, "learning_rate": 1.3496125404522915e-05, "loss": 1.959, "step": 33467 }, { "epoch": 1.0798207742921613, "grad_norm": 0.3984375, "learning_rate": 1.349535599411285e-05, "loss": 1.9768, "step": 33468 }, { "epoch": 1.0798530381459577, "grad_norm": 0.40234375, "learning_rate": 1.3494586587701838e-05, "loss": 1.9339, "step": 33469 }, { "epoch": 1.079885301999754, "grad_norm": 0.388671875, "learning_rate": 1.3493817185291926e-05, "loss": 1.9238, "step": 33470 }, { "epoch": 1.0799175658535503, "grad_norm": 0.37890625, "learning_rate": 1.3493047786885154e-05, "loss": 1.9266, "step": 33471 }, { "epoch": 1.0799498297073467, "grad_norm": 0.3671875, "learning_rate": 1.349227839248357e-05, "loss": 1.9313, "step": 33472 }, { "epoch": 1.079982093561143, "grad_norm": 0.37890625, "learning_rate": 1.3491509002089216e-05, "loss": 1.9644, "step": 33473 }, { "epoch": 1.0800143574149395, "grad_norm": 0.375, "learning_rate": 1.349073961570414e-05, "loss": 1.9538, "step": 33474 }, { "epoch": 1.0800466212687356, "grad_norm": 0.361328125, "learning_rate": 1.3489970233330383e-05, "loss": 1.9535, "step": 33475 }, { "epoch": 1.080078885122532, "grad_norm": 0.357421875, "learning_rate": 1.3489200854970003e-05, "loss": 1.9455, "step": 33476 }, { "epoch": 1.0801111489763284, "grad_norm": 0.365234375, "learning_rate": 1.3488431480625027e-05, "loss": 1.9288, "step": 33477 }, { "epoch": 1.0801434128301248, "grad_norm": 0.361328125, "learning_rate": 1.3487662110297509e-05, "loss": 1.9419, "step": 33478 }, { "epoch": 1.080175676683921, "grad_norm": 0.369140625, "learning_rate": 1.3486892743989491e-05, "loss": 1.9439, "step": 33479 }, { "epoch": 1.0802079405377174, "grad_norm": 0.3671875, "learning_rate": 1.3486123381703017e-05, "loss": 1.9307, "step": 33480 }, { "epoch": 1.0802402043915138, "grad_norm": 0.380859375, "learning_rate": 1.3485354023440138e-05, "loss": 1.9047, "step": 33481 }, { "epoch": 1.0802724682453102, "grad_norm": 0.3671875, "learning_rate": 1.3484584669202899e-05, "loss": 1.9216, "step": 33482 }, { "epoch": 1.0803047320991066, "grad_norm": 0.37109375, "learning_rate": 1.3483815318993337e-05, "loss": 1.9287, "step": 33483 }, { "epoch": 1.0803369959529028, "grad_norm": 0.37890625, "learning_rate": 1.3483045972813499e-05, "loss": 1.9273, "step": 33484 }, { "epoch": 1.0803692598066992, "grad_norm": 0.36328125, "learning_rate": 1.3482276630665427e-05, "loss": 1.9291, "step": 33485 }, { "epoch": 1.0804015236604956, "grad_norm": 0.373046875, "learning_rate": 1.3481507292551174e-05, "loss": 1.8787, "step": 33486 }, { "epoch": 1.080433787514292, "grad_norm": 0.369140625, "learning_rate": 1.3480737958472782e-05, "loss": 1.9173, "step": 33487 }, { "epoch": 1.0804660513680882, "grad_norm": 0.3671875, "learning_rate": 1.34799686284323e-05, "loss": 1.9179, "step": 33488 }, { "epoch": 1.0804983152218846, "grad_norm": 0.353515625, "learning_rate": 1.3479199302431757e-05, "loss": 1.9546, "step": 33489 }, { "epoch": 1.080530579075681, "grad_norm": 0.361328125, "learning_rate": 1.3478429980473212e-05, "loss": 1.9418, "step": 33490 }, { "epoch": 1.0805628429294774, "grad_norm": 0.35546875, "learning_rate": 1.3477660662558706e-05, "loss": 1.9142, "step": 33491 }, { "epoch": 1.0805951067832735, "grad_norm": 0.34765625, "learning_rate": 1.3476891348690281e-05, "loss": 1.955, "step": 33492 }, { "epoch": 1.08062737063707, "grad_norm": 0.376953125, "learning_rate": 1.3476122038869986e-05, "loss": 1.8846, "step": 33493 }, { "epoch": 1.0806596344908663, "grad_norm": 0.37109375, "learning_rate": 1.3475352733099867e-05, "loss": 1.9058, "step": 33494 }, { "epoch": 1.0806918983446627, "grad_norm": 0.341796875, "learning_rate": 1.3474583431381962e-05, "loss": 1.9445, "step": 33495 }, { "epoch": 1.080724162198459, "grad_norm": 0.349609375, "learning_rate": 1.3473814133718318e-05, "loss": 1.9603, "step": 33496 }, { "epoch": 1.0807564260522553, "grad_norm": 0.357421875, "learning_rate": 1.347304484011098e-05, "loss": 1.938, "step": 33497 }, { "epoch": 1.0807886899060517, "grad_norm": 0.3671875, "learning_rate": 1.3472275550561995e-05, "loss": 1.9453, "step": 33498 }, { "epoch": 1.080820953759848, "grad_norm": 0.3671875, "learning_rate": 1.3471506265073405e-05, "loss": 1.9105, "step": 33499 }, { "epoch": 1.0808532176136443, "grad_norm": 0.38671875, "learning_rate": 1.347073698364726e-05, "loss": 1.9106, "step": 33500 }, { "epoch": 1.0808854814674407, "grad_norm": 0.359375, "learning_rate": 1.3469967706285597e-05, "loss": 1.9227, "step": 33501 }, { "epoch": 1.080917745321237, "grad_norm": 0.373046875, "learning_rate": 1.3469198432990462e-05, "loss": 1.8593, "step": 33502 }, { "epoch": 1.0809500091750335, "grad_norm": 0.38671875, "learning_rate": 1.34684291637639e-05, "loss": 1.9213, "step": 33503 }, { "epoch": 1.0809822730288299, "grad_norm": 0.349609375, "learning_rate": 1.346765989860796e-05, "loss": 1.9149, "step": 33504 }, { "epoch": 1.081014536882626, "grad_norm": 0.36328125, "learning_rate": 1.3466890637524681e-05, "loss": 1.9149, "step": 33505 }, { "epoch": 1.0810468007364225, "grad_norm": 0.36328125, "learning_rate": 1.3466121380516116e-05, "loss": 1.8906, "step": 33506 }, { "epoch": 1.0810790645902189, "grad_norm": 0.35546875, "learning_rate": 1.3465352127584299e-05, "loss": 1.9161, "step": 33507 }, { "epoch": 1.0811113284440153, "grad_norm": 0.365234375, "learning_rate": 1.3464582878731279e-05, "loss": 1.9273, "step": 33508 }, { "epoch": 1.0811435922978114, "grad_norm": 0.404296875, "learning_rate": 1.3463813633959101e-05, "loss": 1.9328, "step": 33509 }, { "epoch": 1.0811758561516078, "grad_norm": 0.345703125, "learning_rate": 1.3463044393269806e-05, "loss": 1.942, "step": 33510 }, { "epoch": 1.0812081200054042, "grad_norm": 0.365234375, "learning_rate": 1.3462275156665445e-05, "loss": 1.9954, "step": 33511 }, { "epoch": 1.0812403838592006, "grad_norm": 0.34765625, "learning_rate": 1.3461505924148062e-05, "loss": 1.979, "step": 33512 }, { "epoch": 1.0812726477129968, "grad_norm": 0.357421875, "learning_rate": 1.3460736695719695e-05, "loss": 1.9769, "step": 33513 }, { "epoch": 1.0813049115667932, "grad_norm": 0.373046875, "learning_rate": 1.345996747138239e-05, "loss": 1.9816, "step": 33514 }, { "epoch": 1.0813371754205896, "grad_norm": 0.33984375, "learning_rate": 1.3459198251138194e-05, "loss": 1.9793, "step": 33515 }, { "epoch": 1.081369439274386, "grad_norm": 0.34765625, "learning_rate": 1.3458429034989153e-05, "loss": 1.9616, "step": 33516 }, { "epoch": 1.0814017031281822, "grad_norm": 0.36328125, "learning_rate": 1.3457659822937306e-05, "loss": 1.9621, "step": 33517 }, { "epoch": 1.0814339669819786, "grad_norm": 0.341796875, "learning_rate": 1.3456890614984708e-05, "loss": 1.9633, "step": 33518 }, { "epoch": 1.081466230835775, "grad_norm": 0.337890625, "learning_rate": 1.3456121411133391e-05, "loss": 1.984, "step": 33519 }, { "epoch": 1.0814984946895714, "grad_norm": 0.38671875, "learning_rate": 1.3455352211385402e-05, "loss": 1.9943, "step": 33520 }, { "epoch": 1.0815307585433676, "grad_norm": 0.34765625, "learning_rate": 1.3454583015742787e-05, "loss": 2.0073, "step": 33521 }, { "epoch": 1.081563022397164, "grad_norm": 0.359375, "learning_rate": 1.3453813824207595e-05, "loss": 1.9689, "step": 33522 }, { "epoch": 1.0815952862509604, "grad_norm": 0.3515625, "learning_rate": 1.3453044636781867e-05, "loss": 1.9703, "step": 33523 }, { "epoch": 1.0816275501047568, "grad_norm": 0.3359375, "learning_rate": 1.3452275453467652e-05, "loss": 1.9467, "step": 33524 }, { "epoch": 1.0816598139585532, "grad_norm": 0.3515625, "learning_rate": 1.3451506274266983e-05, "loss": 1.9571, "step": 33525 }, { "epoch": 1.0816920778123493, "grad_norm": 0.3515625, "learning_rate": 1.3450737099181908e-05, "loss": 1.956, "step": 33526 }, { "epoch": 1.0817243416661457, "grad_norm": 0.34765625, "learning_rate": 1.3449967928214475e-05, "loss": 1.9694, "step": 33527 }, { "epoch": 1.0817566055199421, "grad_norm": 0.357421875, "learning_rate": 1.344919876136673e-05, "loss": 1.9559, "step": 33528 }, { "epoch": 1.0817888693737385, "grad_norm": 0.349609375, "learning_rate": 1.3448429598640713e-05, "loss": 1.9775, "step": 33529 }, { "epoch": 1.0818211332275347, "grad_norm": 0.349609375, "learning_rate": 1.3447660440038477e-05, "loss": 1.9985, "step": 33530 }, { "epoch": 1.081853397081331, "grad_norm": 0.37109375, "learning_rate": 1.3446891285562049e-05, "loss": 2.0017, "step": 33531 }, { "epoch": 1.0818856609351275, "grad_norm": 0.33984375, "learning_rate": 1.3446122135213486e-05, "loss": 1.9993, "step": 33532 }, { "epoch": 1.081917924788924, "grad_norm": 0.33984375, "learning_rate": 1.3445352988994831e-05, "loss": 1.9684, "step": 33533 }, { "epoch": 1.08195018864272, "grad_norm": 0.35546875, "learning_rate": 1.3444583846908127e-05, "loss": 1.9843, "step": 33534 }, { "epoch": 1.0819824524965165, "grad_norm": 0.341796875, "learning_rate": 1.3443814708955418e-05, "loss": 1.9607, "step": 33535 }, { "epoch": 1.0820147163503129, "grad_norm": 0.333984375, "learning_rate": 1.344304557513875e-05, "loss": 1.951, "step": 33536 }, { "epoch": 1.0820469802041093, "grad_norm": 0.384765625, "learning_rate": 1.3442276445460164e-05, "loss": 1.9912, "step": 33537 }, { "epoch": 1.0820792440579055, "grad_norm": 0.34765625, "learning_rate": 1.3441507319921704e-05, "loss": 1.9607, "step": 33538 }, { "epoch": 1.0821115079117019, "grad_norm": 0.3828125, "learning_rate": 1.3440738198525416e-05, "loss": 1.9412, "step": 33539 }, { "epoch": 1.0821437717654983, "grad_norm": 0.34375, "learning_rate": 1.3439969081273345e-05, "loss": 1.9845, "step": 33540 }, { "epoch": 1.0821760356192947, "grad_norm": 0.376953125, "learning_rate": 1.3439199968167535e-05, "loss": 1.9582, "step": 33541 }, { "epoch": 1.0822082994730908, "grad_norm": 0.349609375, "learning_rate": 1.3438430859210032e-05, "loss": 1.9698, "step": 33542 }, { "epoch": 1.0822405633268872, "grad_norm": 0.349609375, "learning_rate": 1.3437661754402874e-05, "loss": 1.9732, "step": 33543 }, { "epoch": 1.0822728271806836, "grad_norm": 0.375, "learning_rate": 1.343689265374811e-05, "loss": 1.9674, "step": 33544 }, { "epoch": 1.08230509103448, "grad_norm": 0.3359375, "learning_rate": 1.3436123557247781e-05, "loss": 1.9711, "step": 33545 }, { "epoch": 1.0823373548882764, "grad_norm": 0.373046875, "learning_rate": 1.3435354464903934e-05, "loss": 1.9907, "step": 33546 }, { "epoch": 1.0823696187420726, "grad_norm": 0.341796875, "learning_rate": 1.343458537671861e-05, "loss": 1.9779, "step": 33547 }, { "epoch": 1.082401882595869, "grad_norm": 0.357421875, "learning_rate": 1.343381629269386e-05, "loss": 1.9628, "step": 33548 }, { "epoch": 1.0824341464496654, "grad_norm": 0.349609375, "learning_rate": 1.343304721283172e-05, "loss": 1.9961, "step": 33549 }, { "epoch": 1.0824664103034618, "grad_norm": 0.35546875, "learning_rate": 1.3432278137134238e-05, "loss": 1.9596, "step": 33550 }, { "epoch": 1.082498674157258, "grad_norm": 0.349609375, "learning_rate": 1.3431509065603455e-05, "loss": 1.9712, "step": 33551 }, { "epoch": 1.0825309380110544, "grad_norm": 0.361328125, "learning_rate": 1.3430739998241418e-05, "loss": 1.9895, "step": 33552 }, { "epoch": 1.0825632018648508, "grad_norm": 0.349609375, "learning_rate": 1.3429970935050172e-05, "loss": 1.9378, "step": 33553 }, { "epoch": 1.0825954657186472, "grad_norm": 0.345703125, "learning_rate": 1.3429201876031762e-05, "loss": 1.9531, "step": 33554 }, { "epoch": 1.0826277295724434, "grad_norm": 0.3515625, "learning_rate": 1.3428432821188225e-05, "loss": 1.9494, "step": 33555 }, { "epoch": 1.0826599934262398, "grad_norm": 0.33203125, "learning_rate": 1.342766377052161e-05, "loss": 1.9834, "step": 33556 }, { "epoch": 1.0826922572800362, "grad_norm": 0.36328125, "learning_rate": 1.342689472403396e-05, "loss": 1.9714, "step": 33557 }, { "epoch": 1.0827245211338326, "grad_norm": 0.34765625, "learning_rate": 1.3426125681727317e-05, "loss": 1.9874, "step": 33558 }, { "epoch": 1.0827567849876287, "grad_norm": 0.341796875, "learning_rate": 1.3425356643603731e-05, "loss": 1.9853, "step": 33559 }, { "epoch": 1.0827890488414251, "grad_norm": 0.373046875, "learning_rate": 1.3424587609665248e-05, "loss": 1.9666, "step": 33560 }, { "epoch": 1.0828213126952215, "grad_norm": 0.373046875, "learning_rate": 1.34238185799139e-05, "loss": 1.9635, "step": 33561 }, { "epoch": 1.082853576549018, "grad_norm": 0.330078125, "learning_rate": 1.3423049554351738e-05, "loss": 1.9477, "step": 33562 }, { "epoch": 1.082885840402814, "grad_norm": 0.3515625, "learning_rate": 1.34222805329808e-05, "loss": 1.9548, "step": 33563 }, { "epoch": 1.0829181042566105, "grad_norm": 0.33203125, "learning_rate": 1.342151151580314e-05, "loss": 1.9508, "step": 33564 }, { "epoch": 1.082950368110407, "grad_norm": 0.34375, "learning_rate": 1.3420742502820796e-05, "loss": 1.9987, "step": 33565 }, { "epoch": 1.0829826319642033, "grad_norm": 0.337890625, "learning_rate": 1.3419973494035817e-05, "loss": 1.9619, "step": 33566 }, { "epoch": 1.0830148958179997, "grad_norm": 0.33984375, "learning_rate": 1.341920448945024e-05, "loss": 1.9763, "step": 33567 }, { "epoch": 1.0830471596717959, "grad_norm": 0.353515625, "learning_rate": 1.3418435489066106e-05, "loss": 1.9999, "step": 33568 }, { "epoch": 1.0830794235255923, "grad_norm": 0.34765625, "learning_rate": 1.3417666492885467e-05, "loss": 1.9675, "step": 33569 }, { "epoch": 1.0831116873793887, "grad_norm": 0.33203125, "learning_rate": 1.3416897500910367e-05, "loss": 1.9877, "step": 33570 }, { "epoch": 1.083143951233185, "grad_norm": 0.34375, "learning_rate": 1.3416128513142846e-05, "loss": 1.9718, "step": 33571 }, { "epoch": 1.0831762150869813, "grad_norm": 0.34375, "learning_rate": 1.3415359529584953e-05, "loss": 1.9484, "step": 33572 }, { "epoch": 1.0832084789407777, "grad_norm": 0.33203125, "learning_rate": 1.3414590550238721e-05, "loss": 1.9494, "step": 33573 }, { "epoch": 1.083240742794574, "grad_norm": 0.345703125, "learning_rate": 1.3413821575106202e-05, "loss": 1.9892, "step": 33574 }, { "epoch": 1.0832730066483705, "grad_norm": 0.345703125, "learning_rate": 1.3413052604189439e-05, "loss": 1.9863, "step": 33575 }, { "epoch": 1.0833052705021666, "grad_norm": 0.337890625, "learning_rate": 1.3412283637490475e-05, "loss": 1.9578, "step": 33576 }, { "epoch": 1.083337534355963, "grad_norm": 0.34765625, "learning_rate": 1.3411514675011354e-05, "loss": 1.992, "step": 33577 }, { "epoch": 1.0833697982097594, "grad_norm": 0.337890625, "learning_rate": 1.3410745716754123e-05, "loss": 2.0004, "step": 33578 }, { "epoch": 1.0834020620635558, "grad_norm": 0.34765625, "learning_rate": 1.3409976762720817e-05, "loss": 2.0045, "step": 33579 }, { "epoch": 1.083434325917352, "grad_norm": 0.34765625, "learning_rate": 1.3409207812913487e-05, "loss": 1.9775, "step": 33580 }, { "epoch": 1.0834665897711484, "grad_norm": 0.68359375, "learning_rate": 1.3408438867334173e-05, "loss": 2.0012, "step": 33581 }, { "epoch": 1.0834988536249448, "grad_norm": 0.373046875, "learning_rate": 1.3407669925984923e-05, "loss": 1.9826, "step": 33582 }, { "epoch": 1.0835311174787412, "grad_norm": 0.3515625, "learning_rate": 1.3406900988867776e-05, "loss": 1.9964, "step": 33583 }, { "epoch": 1.0835633813325374, "grad_norm": 0.361328125, "learning_rate": 1.3406132055984781e-05, "loss": 1.9758, "step": 33584 }, { "epoch": 1.0835956451863338, "grad_norm": 0.3515625, "learning_rate": 1.3405363127337977e-05, "loss": 1.992, "step": 33585 }, { "epoch": 1.0836279090401302, "grad_norm": 0.357421875, "learning_rate": 1.3404594202929408e-05, "loss": 1.9484, "step": 33586 }, { "epoch": 1.0836601728939266, "grad_norm": 0.357421875, "learning_rate": 1.340382528276112e-05, "loss": 1.9577, "step": 33587 }, { "epoch": 1.083692436747723, "grad_norm": 0.353515625, "learning_rate": 1.3403056366835155e-05, "loss": 1.9664, "step": 33588 }, { "epoch": 1.0837247006015192, "grad_norm": 0.35546875, "learning_rate": 1.3402287455153557e-05, "loss": 1.9902, "step": 33589 }, { "epoch": 1.0837569644553156, "grad_norm": 0.337890625, "learning_rate": 1.3401518547718373e-05, "loss": 1.9662, "step": 33590 }, { "epoch": 1.083789228309112, "grad_norm": 0.33984375, "learning_rate": 1.3400749644531638e-05, "loss": 1.9411, "step": 33591 }, { "epoch": 1.0838214921629084, "grad_norm": 0.337890625, "learning_rate": 1.3399980745595404e-05, "loss": 1.9668, "step": 33592 }, { "epoch": 1.0838537560167045, "grad_norm": 0.359375, "learning_rate": 1.339921185091171e-05, "loss": 1.9639, "step": 33593 }, { "epoch": 1.083886019870501, "grad_norm": 0.353515625, "learning_rate": 1.33984429604826e-05, "loss": 1.9741, "step": 33594 }, { "epoch": 1.0839182837242973, "grad_norm": 0.341796875, "learning_rate": 1.3397674074310118e-05, "loss": 1.9555, "step": 33595 }, { "epoch": 1.0839505475780937, "grad_norm": 0.33984375, "learning_rate": 1.339690519239631e-05, "loss": 1.9546, "step": 33596 }, { "epoch": 1.08398281143189, "grad_norm": 0.33984375, "learning_rate": 1.3396136314743225e-05, "loss": 1.9674, "step": 33597 }, { "epoch": 1.0840150752856863, "grad_norm": 0.333984375, "learning_rate": 1.3395367441352892e-05, "loss": 1.9624, "step": 33598 }, { "epoch": 1.0840473391394827, "grad_norm": 0.34765625, "learning_rate": 1.3394598572227361e-05, "loss": 1.962, "step": 33599 }, { "epoch": 1.084079602993279, "grad_norm": 0.357421875, "learning_rate": 1.3393829707368674e-05, "loss": 1.954, "step": 33600 }, { "epoch": 1.0841118668470755, "grad_norm": 0.361328125, "learning_rate": 1.3393060846778881e-05, "loss": 1.9825, "step": 33601 }, { "epoch": 1.0841441307008717, "grad_norm": 0.486328125, "learning_rate": 1.339229199046002e-05, "loss": 1.9733, "step": 33602 }, { "epoch": 1.084176394554668, "grad_norm": 0.369140625, "learning_rate": 1.3391523138414142e-05, "loss": 1.9679, "step": 33603 }, { "epoch": 1.0842086584084645, "grad_norm": 0.349609375, "learning_rate": 1.3390754290643279e-05, "loss": 1.9818, "step": 33604 }, { "epoch": 1.0842409222622607, "grad_norm": 0.359375, "learning_rate": 1.3389985447149475e-05, "loss": 1.9463, "step": 33605 }, { "epoch": 1.084273186116057, "grad_norm": 0.36328125, "learning_rate": 1.3389216607934782e-05, "loss": 2.0036, "step": 33606 }, { "epoch": 1.0843054499698535, "grad_norm": 0.359375, "learning_rate": 1.3388447773001238e-05, "loss": 1.9635, "step": 33607 }, { "epoch": 1.0843377138236499, "grad_norm": 0.353515625, "learning_rate": 1.338767894235089e-05, "loss": 1.9836, "step": 33608 }, { "epoch": 1.0843699776774463, "grad_norm": 0.34765625, "learning_rate": 1.3386910115985783e-05, "loss": 1.9989, "step": 33609 }, { "epoch": 1.0844022415312424, "grad_norm": 0.345703125, "learning_rate": 1.3386141293907952e-05, "loss": 1.9721, "step": 33610 }, { "epoch": 1.0844345053850388, "grad_norm": 0.33984375, "learning_rate": 1.3385372476119445e-05, "loss": 1.9691, "step": 33611 }, { "epoch": 1.0844667692388352, "grad_norm": 0.345703125, "learning_rate": 1.3384603662622306e-05, "loss": 1.9935, "step": 33612 }, { "epoch": 1.0844990330926316, "grad_norm": 0.345703125, "learning_rate": 1.3383834853418578e-05, "loss": 1.9721, "step": 33613 }, { "epoch": 1.0845312969464278, "grad_norm": 0.345703125, "learning_rate": 1.3383066048510303e-05, "loss": 1.9581, "step": 33614 }, { "epoch": 1.0845635608002242, "grad_norm": 0.345703125, "learning_rate": 1.3382297247899529e-05, "loss": 1.9931, "step": 33615 }, { "epoch": 1.0845958246540206, "grad_norm": 0.33984375, "learning_rate": 1.3381528451588295e-05, "loss": 1.9764, "step": 33616 }, { "epoch": 1.084628088507817, "grad_norm": 0.34375, "learning_rate": 1.3380759659578642e-05, "loss": 1.9794, "step": 33617 }, { "epoch": 1.0846603523616132, "grad_norm": 0.357421875, "learning_rate": 1.3379990871872619e-05, "loss": 1.9575, "step": 33618 }, { "epoch": 1.0846926162154096, "grad_norm": 0.341796875, "learning_rate": 1.3379222088472265e-05, "loss": 1.9756, "step": 33619 }, { "epoch": 1.084724880069206, "grad_norm": 0.3515625, "learning_rate": 1.3378453309379626e-05, "loss": 1.9686, "step": 33620 }, { "epoch": 1.0847571439230024, "grad_norm": 0.33984375, "learning_rate": 1.3377684534596746e-05, "loss": 1.9646, "step": 33621 }, { "epoch": 1.0847894077767988, "grad_norm": 0.337890625, "learning_rate": 1.3376915764125666e-05, "loss": 1.9886, "step": 33622 }, { "epoch": 1.084821671630595, "grad_norm": 0.333984375, "learning_rate": 1.3376146997968426e-05, "loss": 1.9649, "step": 33623 }, { "epoch": 1.0848539354843914, "grad_norm": 0.375, "learning_rate": 1.3375378236127077e-05, "loss": 1.9747, "step": 33624 }, { "epoch": 1.0848861993381878, "grad_norm": 0.3515625, "learning_rate": 1.3374609478603655e-05, "loss": 1.9739, "step": 33625 }, { "epoch": 1.084918463191984, "grad_norm": 0.357421875, "learning_rate": 1.3373840725400207e-05, "loss": 1.9242, "step": 33626 }, { "epoch": 1.0849507270457803, "grad_norm": 0.34375, "learning_rate": 1.337307197651878e-05, "loss": 1.9903, "step": 33627 }, { "epoch": 1.0849829908995767, "grad_norm": 0.349609375, "learning_rate": 1.337230323196141e-05, "loss": 1.9371, "step": 33628 }, { "epoch": 1.0850152547533731, "grad_norm": 0.36328125, "learning_rate": 1.3371534491730141e-05, "loss": 2.0033, "step": 33629 }, { "epoch": 1.0850475186071695, "grad_norm": 0.3515625, "learning_rate": 1.337076575582702e-05, "loss": 2.005, "step": 33630 }, { "epoch": 1.0850797824609657, "grad_norm": 0.34375, "learning_rate": 1.3369997024254086e-05, "loss": 1.9669, "step": 33631 }, { "epoch": 1.085112046314762, "grad_norm": 0.33984375, "learning_rate": 1.3369228297013384e-05, "loss": 1.9828, "step": 33632 }, { "epoch": 1.0851443101685585, "grad_norm": 0.34765625, "learning_rate": 1.3368459574106965e-05, "loss": 1.9703, "step": 33633 }, { "epoch": 1.085176574022355, "grad_norm": 0.359375, "learning_rate": 1.336769085553686e-05, "loss": 1.9699, "step": 33634 }, { "epoch": 1.085208837876151, "grad_norm": 0.345703125, "learning_rate": 1.3366922141305115e-05, "loss": 1.9457, "step": 33635 }, { "epoch": 1.0852411017299475, "grad_norm": 0.37890625, "learning_rate": 1.3366153431413777e-05, "loss": 1.9332, "step": 33636 }, { "epoch": 1.0852733655837439, "grad_norm": 0.345703125, "learning_rate": 1.3365384725864883e-05, "loss": 1.9697, "step": 33637 }, { "epoch": 1.0853056294375403, "grad_norm": 0.3359375, "learning_rate": 1.3364616024660483e-05, "loss": 1.9637, "step": 33638 }, { "epoch": 1.0853378932913365, "grad_norm": 0.369140625, "learning_rate": 1.3363847327802622e-05, "loss": 1.9556, "step": 33639 }, { "epoch": 1.0853701571451329, "grad_norm": 0.34375, "learning_rate": 1.3363078635293334e-05, "loss": 1.9729, "step": 33640 }, { "epoch": 1.0854024209989293, "grad_norm": 0.3359375, "learning_rate": 1.3362309947134666e-05, "loss": 1.9564, "step": 33641 }, { "epoch": 1.0854346848527257, "grad_norm": 0.359375, "learning_rate": 1.3361541263328657e-05, "loss": 1.9571, "step": 33642 }, { "epoch": 1.085466948706522, "grad_norm": 0.341796875, "learning_rate": 1.3360772583877357e-05, "loss": 1.9899, "step": 33643 }, { "epoch": 1.0854992125603182, "grad_norm": 0.3359375, "learning_rate": 1.3360003908782807e-05, "loss": 1.9842, "step": 33644 }, { "epoch": 1.0855314764141146, "grad_norm": 0.35546875, "learning_rate": 1.3359235238047056e-05, "loss": 1.9793, "step": 33645 }, { "epoch": 1.085563740267911, "grad_norm": 0.36328125, "learning_rate": 1.335846657167213e-05, "loss": 1.979, "step": 33646 }, { "epoch": 1.0855960041217074, "grad_norm": 0.33984375, "learning_rate": 1.3357697909660084e-05, "loss": 1.9592, "step": 33647 }, { "epoch": 1.0856282679755036, "grad_norm": 0.37890625, "learning_rate": 1.3356929252012961e-05, "loss": 1.9937, "step": 33648 }, { "epoch": 1.0856605318293, "grad_norm": 0.3359375, "learning_rate": 1.3356160598732803e-05, "loss": 1.9729, "step": 33649 }, { "epoch": 1.0856927956830964, "grad_norm": 0.341796875, "learning_rate": 1.335539194982165e-05, "loss": 1.9274, "step": 33650 }, { "epoch": 1.0857250595368928, "grad_norm": 0.365234375, "learning_rate": 1.335462330528155e-05, "loss": 1.9788, "step": 33651 }, { "epoch": 1.085757323390689, "grad_norm": 0.33984375, "learning_rate": 1.3353854665114541e-05, "loss": 1.9342, "step": 33652 }, { "epoch": 1.0857895872444854, "grad_norm": 0.3515625, "learning_rate": 1.3353086029322665e-05, "loss": 1.9437, "step": 33653 }, { "epoch": 1.0858218510982818, "grad_norm": 0.35546875, "learning_rate": 1.3352317397907971e-05, "loss": 1.9616, "step": 33654 }, { "epoch": 1.0858541149520782, "grad_norm": 0.345703125, "learning_rate": 1.3351548770872497e-05, "loss": 1.9352, "step": 33655 }, { "epoch": 1.0858863788058744, "grad_norm": 0.3515625, "learning_rate": 1.3350780148218287e-05, "loss": 1.9747, "step": 33656 }, { "epoch": 1.0859186426596708, "grad_norm": 0.34765625, "learning_rate": 1.3350011529947388e-05, "loss": 1.9895, "step": 33657 }, { "epoch": 1.0859509065134672, "grad_norm": 0.357421875, "learning_rate": 1.3349242916061837e-05, "loss": 1.9486, "step": 33658 }, { "epoch": 1.0859831703672636, "grad_norm": 0.359375, "learning_rate": 1.3348474306563675e-05, "loss": 1.985, "step": 33659 }, { "epoch": 1.0860154342210597, "grad_norm": 0.3515625, "learning_rate": 1.3347705701454953e-05, "loss": 1.9854, "step": 33660 }, { "epoch": 1.0860476980748561, "grad_norm": 0.337890625, "learning_rate": 1.3346937100737705e-05, "loss": 1.9908, "step": 33661 }, { "epoch": 1.0860799619286525, "grad_norm": 0.349609375, "learning_rate": 1.3346168504413982e-05, "loss": 1.9614, "step": 33662 }, { "epoch": 1.086112225782449, "grad_norm": 0.337890625, "learning_rate": 1.3345399912485824e-05, "loss": 1.9737, "step": 33663 }, { "epoch": 1.0861444896362453, "grad_norm": 0.345703125, "learning_rate": 1.3344631324955272e-05, "loss": 1.9828, "step": 33664 }, { "epoch": 1.0861767534900415, "grad_norm": 0.337890625, "learning_rate": 1.3343862741824368e-05, "loss": 1.9739, "step": 33665 }, { "epoch": 1.086209017343838, "grad_norm": 0.34765625, "learning_rate": 1.3343094163095155e-05, "loss": 1.9801, "step": 33666 }, { "epoch": 1.0862412811976343, "grad_norm": 0.3359375, "learning_rate": 1.334232558876968e-05, "loss": 1.9503, "step": 33667 }, { "epoch": 1.0862735450514307, "grad_norm": 0.3515625, "learning_rate": 1.3341557018849981e-05, "loss": 1.9585, "step": 33668 }, { "epoch": 1.0863058089052269, "grad_norm": 0.345703125, "learning_rate": 1.3340788453338105e-05, "loss": 1.9709, "step": 33669 }, { "epoch": 1.0863380727590233, "grad_norm": 0.337890625, "learning_rate": 1.334001989223609e-05, "loss": 1.951, "step": 33670 }, { "epoch": 1.0863703366128197, "grad_norm": 0.33984375, "learning_rate": 1.333925133554598e-05, "loss": 1.9688, "step": 33671 }, { "epoch": 1.086402600466616, "grad_norm": 0.357421875, "learning_rate": 1.333848278326982e-05, "loss": 1.9615, "step": 33672 }, { "epoch": 1.0864348643204123, "grad_norm": 0.33984375, "learning_rate": 1.333771423540965e-05, "loss": 1.9701, "step": 33673 }, { "epoch": 1.0864671281742087, "grad_norm": 0.353515625, "learning_rate": 1.333694569196751e-05, "loss": 1.9666, "step": 33674 }, { "epoch": 1.086499392028005, "grad_norm": 0.349609375, "learning_rate": 1.3336177152945458e-05, "loss": 1.9553, "step": 33675 }, { "epoch": 1.0865316558818015, "grad_norm": 0.359375, "learning_rate": 1.3335408618345518e-05, "loss": 1.9714, "step": 33676 }, { "epoch": 1.0865639197355976, "grad_norm": 0.353515625, "learning_rate": 1.333464008816974e-05, "loss": 1.9867, "step": 33677 }, { "epoch": 1.086596183589394, "grad_norm": 0.365234375, "learning_rate": 1.3333871562420162e-05, "loss": 1.9929, "step": 33678 }, { "epoch": 1.0866284474431904, "grad_norm": 0.359375, "learning_rate": 1.3333103041098834e-05, "loss": 1.9856, "step": 33679 }, { "epoch": 1.0866607112969868, "grad_norm": 0.3515625, "learning_rate": 1.3332334524207797e-05, "loss": 1.9829, "step": 33680 }, { "epoch": 1.086692975150783, "grad_norm": 0.337890625, "learning_rate": 1.3331566011749097e-05, "loss": 2.001, "step": 33681 }, { "epoch": 1.0867252390045794, "grad_norm": 0.341796875, "learning_rate": 1.3330797503724766e-05, "loss": 1.9401, "step": 33682 }, { "epoch": 1.0867575028583758, "grad_norm": 0.35546875, "learning_rate": 1.333002900013685e-05, "loss": 1.9677, "step": 33683 }, { "epoch": 1.0867897667121722, "grad_norm": 0.330078125, "learning_rate": 1.3329260500987396e-05, "loss": 1.9776, "step": 33684 }, { "epoch": 1.0868220305659686, "grad_norm": 0.33984375, "learning_rate": 1.3328492006278443e-05, "loss": 1.937, "step": 33685 }, { "epoch": 1.0868542944197648, "grad_norm": 0.447265625, "learning_rate": 1.3327723516012037e-05, "loss": 1.9578, "step": 33686 }, { "epoch": 1.0868865582735612, "grad_norm": 0.8515625, "learning_rate": 1.3326955030190224e-05, "loss": 1.9037, "step": 33687 }, { "epoch": 1.0869188221273576, "grad_norm": 0.640625, "learning_rate": 1.332618654881503e-05, "loss": 1.9099, "step": 33688 }, { "epoch": 1.086951085981154, "grad_norm": 0.76171875, "learning_rate": 1.3325418071888512e-05, "loss": 1.8982, "step": 33689 }, { "epoch": 1.0869833498349502, "grad_norm": 0.6953125, "learning_rate": 1.332464959941271e-05, "loss": 1.8922, "step": 33690 }, { "epoch": 1.0870156136887466, "grad_norm": 0.66796875, "learning_rate": 1.3323881131389661e-05, "loss": 1.913, "step": 33691 }, { "epoch": 1.087047877542543, "grad_norm": 0.51953125, "learning_rate": 1.3323112667821417e-05, "loss": 1.8954, "step": 33692 }, { "epoch": 1.0870801413963394, "grad_norm": 0.6328125, "learning_rate": 1.3322344208710014e-05, "loss": 1.8876, "step": 33693 }, { "epoch": 1.0871124052501355, "grad_norm": 0.51171875, "learning_rate": 1.3321575754057494e-05, "loss": 1.9302, "step": 33694 }, { "epoch": 1.087144669103932, "grad_norm": 0.48046875, "learning_rate": 1.33208073038659e-05, "loss": 1.9225, "step": 33695 }, { "epoch": 1.0871769329577283, "grad_norm": 0.5, "learning_rate": 1.3320038858137277e-05, "loss": 1.9173, "step": 33696 }, { "epoch": 1.0872091968115247, "grad_norm": 0.443359375, "learning_rate": 1.3319270416873663e-05, "loss": 1.9083, "step": 33697 }, { "epoch": 1.087241460665321, "grad_norm": 0.4140625, "learning_rate": 1.3318501980077104e-05, "loss": 1.9159, "step": 33698 }, { "epoch": 1.0872737245191173, "grad_norm": 0.421875, "learning_rate": 1.3317733547749646e-05, "loss": 1.9237, "step": 33699 }, { "epoch": 1.0873059883729137, "grad_norm": 0.421875, "learning_rate": 1.3316965119893322e-05, "loss": 1.8742, "step": 33700 }, { "epoch": 1.08733825222671, "grad_norm": 0.41015625, "learning_rate": 1.3316196696510178e-05, "loss": 1.8896, "step": 33701 }, { "epoch": 1.0873705160805063, "grad_norm": 0.44140625, "learning_rate": 1.3315428277602258e-05, "loss": 1.9161, "step": 33702 }, { "epoch": 1.0874027799343027, "grad_norm": 0.375, "learning_rate": 1.3314659863171604e-05, "loss": 1.8806, "step": 33703 }, { "epoch": 1.087435043788099, "grad_norm": 0.400390625, "learning_rate": 1.3313891453220259e-05, "loss": 1.8695, "step": 33704 }, { "epoch": 1.0874673076418955, "grad_norm": 0.404296875, "learning_rate": 1.3313123047750265e-05, "loss": 1.8955, "step": 33705 }, { "epoch": 1.0874995714956919, "grad_norm": 0.375, "learning_rate": 1.3312354646763663e-05, "loss": 1.8921, "step": 33706 }, { "epoch": 1.087531835349488, "grad_norm": 0.375, "learning_rate": 1.3311586250262494e-05, "loss": 1.8796, "step": 33707 }, { "epoch": 1.0875640992032845, "grad_norm": 0.37890625, "learning_rate": 1.3310817858248801e-05, "loss": 1.8725, "step": 33708 }, { "epoch": 1.0875963630570809, "grad_norm": 0.3828125, "learning_rate": 1.331004947072463e-05, "loss": 1.8759, "step": 33709 }, { "epoch": 1.0876286269108772, "grad_norm": 0.373046875, "learning_rate": 1.3309281087692016e-05, "loss": 1.8868, "step": 33710 }, { "epoch": 1.0876608907646734, "grad_norm": 0.376953125, "learning_rate": 1.3308512709153013e-05, "loss": 1.8766, "step": 33711 }, { "epoch": 1.0876931546184698, "grad_norm": 0.37890625, "learning_rate": 1.3307744335109651e-05, "loss": 1.8291, "step": 33712 }, { "epoch": 1.0877254184722662, "grad_norm": 0.353515625, "learning_rate": 1.330697596556398e-05, "loss": 1.8546, "step": 33713 }, { "epoch": 1.0877576823260626, "grad_norm": 0.376953125, "learning_rate": 1.3306207600518035e-05, "loss": 1.8566, "step": 33714 }, { "epoch": 1.0877899461798588, "grad_norm": 0.353515625, "learning_rate": 1.3305439239973861e-05, "loss": 1.885, "step": 33715 }, { "epoch": 1.0878222100336552, "grad_norm": 0.357421875, "learning_rate": 1.3304670883933503e-05, "loss": 1.8905, "step": 33716 }, { "epoch": 1.0878544738874516, "grad_norm": 0.36328125, "learning_rate": 1.3303902532399012e-05, "loss": 1.862, "step": 33717 }, { "epoch": 1.087886737741248, "grad_norm": 0.369140625, "learning_rate": 1.3303134185372411e-05, "loss": 1.8924, "step": 33718 }, { "epoch": 1.0879190015950442, "grad_norm": 0.357421875, "learning_rate": 1.3302365842855751e-05, "loss": 1.8926, "step": 33719 }, { "epoch": 1.0879512654488406, "grad_norm": 0.359375, "learning_rate": 1.3301597504851071e-05, "loss": 1.9133, "step": 33720 }, { "epoch": 1.087983529302637, "grad_norm": 0.3671875, "learning_rate": 1.3300829171360419e-05, "loss": 1.8907, "step": 33721 }, { "epoch": 1.0880157931564334, "grad_norm": 0.349609375, "learning_rate": 1.3300060842385834e-05, "loss": 1.9114, "step": 33722 }, { "epoch": 1.0880480570102296, "grad_norm": 0.37109375, "learning_rate": 1.3299292517929363e-05, "loss": 1.9185, "step": 33723 }, { "epoch": 1.088080320864026, "grad_norm": 0.34765625, "learning_rate": 1.329852419799304e-05, "loss": 1.8801, "step": 33724 }, { "epoch": 1.0881125847178224, "grad_norm": 0.333984375, "learning_rate": 1.3297755882578907e-05, "loss": 1.8936, "step": 33725 }, { "epoch": 1.0881448485716188, "grad_norm": 0.384765625, "learning_rate": 1.329698757168901e-05, "loss": 1.8977, "step": 33726 }, { "epoch": 1.0881771124254151, "grad_norm": 0.345703125, "learning_rate": 1.3296219265325393e-05, "loss": 1.8989, "step": 33727 }, { "epoch": 1.0882093762792113, "grad_norm": 0.37890625, "learning_rate": 1.3295450963490094e-05, "loss": 1.8598, "step": 33728 }, { "epoch": 1.0882416401330077, "grad_norm": 0.353515625, "learning_rate": 1.3294682666185158e-05, "loss": 1.8955, "step": 33729 }, { "epoch": 1.0882739039868041, "grad_norm": 0.373046875, "learning_rate": 1.3293914373412626e-05, "loss": 1.8448, "step": 33730 }, { "epoch": 1.0883061678406005, "grad_norm": 0.357421875, "learning_rate": 1.3293146085174539e-05, "loss": 1.8854, "step": 33731 }, { "epoch": 1.0883384316943967, "grad_norm": 0.361328125, "learning_rate": 1.3292377801472938e-05, "loss": 1.8593, "step": 33732 }, { "epoch": 1.088370695548193, "grad_norm": 0.376953125, "learning_rate": 1.3291609522309866e-05, "loss": 1.9094, "step": 33733 }, { "epoch": 1.0884029594019895, "grad_norm": 0.369140625, "learning_rate": 1.3290841247687366e-05, "loss": 1.8836, "step": 33734 }, { "epoch": 1.088435223255786, "grad_norm": 0.36328125, "learning_rate": 1.3290072977607479e-05, "loss": 1.8631, "step": 33735 }, { "epoch": 1.088467487109582, "grad_norm": 0.376953125, "learning_rate": 1.328930471207225e-05, "loss": 1.9134, "step": 33736 }, { "epoch": 1.0884997509633785, "grad_norm": 0.388671875, "learning_rate": 1.3288536451083715e-05, "loss": 1.9683, "step": 33737 }, { "epoch": 1.0885320148171749, "grad_norm": 0.419921875, "learning_rate": 1.3287768194643918e-05, "loss": 1.9831, "step": 33738 }, { "epoch": 1.0885642786709713, "grad_norm": 0.37109375, "learning_rate": 1.3286999942754905e-05, "loss": 1.9529, "step": 33739 }, { "epoch": 1.0885965425247675, "grad_norm": 0.4296875, "learning_rate": 1.328623169541871e-05, "loss": 1.9529, "step": 33740 }, { "epoch": 1.0886288063785639, "grad_norm": 0.3828125, "learning_rate": 1.3285463452637382e-05, "loss": 1.95, "step": 33741 }, { "epoch": 1.0886610702323603, "grad_norm": 0.396484375, "learning_rate": 1.3284695214412963e-05, "loss": 1.9945, "step": 33742 }, { "epoch": 1.0886933340861566, "grad_norm": 0.3984375, "learning_rate": 1.3283926980747489e-05, "loss": 1.9665, "step": 33743 }, { "epoch": 1.0887255979399528, "grad_norm": 0.376953125, "learning_rate": 1.3283158751643006e-05, "loss": 1.9558, "step": 33744 }, { "epoch": 1.0887578617937492, "grad_norm": 0.408203125, "learning_rate": 1.3282390527101553e-05, "loss": 1.9636, "step": 33745 }, { "epoch": 1.0887901256475456, "grad_norm": 0.353515625, "learning_rate": 1.3281622307125173e-05, "loss": 1.9685, "step": 33746 }, { "epoch": 1.088822389501342, "grad_norm": 0.37890625, "learning_rate": 1.3280854091715909e-05, "loss": 1.9942, "step": 33747 }, { "epoch": 1.0888546533551384, "grad_norm": 0.376953125, "learning_rate": 1.3280085880875807e-05, "loss": 2.0175, "step": 33748 }, { "epoch": 1.0888869172089346, "grad_norm": 0.365234375, "learning_rate": 1.32793176746069e-05, "loss": 1.9852, "step": 33749 }, { "epoch": 1.088919181062731, "grad_norm": 0.365234375, "learning_rate": 1.3278549472911235e-05, "loss": 1.9973, "step": 33750 }, { "epoch": 1.0889514449165274, "grad_norm": 0.37890625, "learning_rate": 1.327778127579085e-05, "loss": 1.9504, "step": 33751 }, { "epoch": 1.0889837087703238, "grad_norm": 0.384765625, "learning_rate": 1.3277013083247786e-05, "loss": 1.9634, "step": 33752 }, { "epoch": 1.08901597262412, "grad_norm": 0.375, "learning_rate": 1.327624489528409e-05, "loss": 1.98, "step": 33753 }, { "epoch": 1.0890482364779164, "grad_norm": 0.388671875, "learning_rate": 1.327547671190181e-05, "loss": 1.9532, "step": 33754 }, { "epoch": 1.0890805003317128, "grad_norm": 0.36328125, "learning_rate": 1.3274708533102973e-05, "loss": 1.9641, "step": 33755 }, { "epoch": 1.0891127641855092, "grad_norm": 0.3671875, "learning_rate": 1.3273940358889624e-05, "loss": 1.9644, "step": 33756 }, { "epoch": 1.0891450280393054, "grad_norm": 0.404296875, "learning_rate": 1.3273172189263807e-05, "loss": 1.9609, "step": 33757 }, { "epoch": 1.0891772918931018, "grad_norm": 0.349609375, "learning_rate": 1.3272404024227566e-05, "loss": 1.9732, "step": 33758 }, { "epoch": 1.0892095557468982, "grad_norm": 0.40234375, "learning_rate": 1.3271635863782942e-05, "loss": 1.9619, "step": 33759 }, { "epoch": 1.0892418196006945, "grad_norm": 0.357421875, "learning_rate": 1.327086770793198e-05, "loss": 1.9513, "step": 33760 }, { "epoch": 1.0892740834544907, "grad_norm": 0.365234375, "learning_rate": 1.3270099556676711e-05, "loss": 1.9718, "step": 33761 }, { "epoch": 1.0893063473082871, "grad_norm": 0.3828125, "learning_rate": 1.326933141001918e-05, "loss": 1.9915, "step": 33762 }, { "epoch": 1.0893386111620835, "grad_norm": 0.357421875, "learning_rate": 1.3268563267961435e-05, "loss": 1.9921, "step": 33763 }, { "epoch": 1.08937087501588, "grad_norm": 0.37109375, "learning_rate": 1.3267795130505512e-05, "loss": 1.9828, "step": 33764 }, { "epoch": 1.089403138869676, "grad_norm": 0.37109375, "learning_rate": 1.3267026997653454e-05, "loss": 1.9589, "step": 33765 }, { "epoch": 1.0894354027234725, "grad_norm": 0.373046875, "learning_rate": 1.3266258869407308e-05, "loss": 1.987, "step": 33766 }, { "epoch": 1.089467666577269, "grad_norm": 0.423828125, "learning_rate": 1.3265490745769106e-05, "loss": 1.9637, "step": 33767 }, { "epoch": 1.0894999304310653, "grad_norm": 0.361328125, "learning_rate": 1.3264722626740895e-05, "loss": 1.9795, "step": 33768 }, { "epoch": 1.0895321942848617, "grad_norm": 0.38671875, "learning_rate": 1.3263954512324713e-05, "loss": 1.9823, "step": 33769 }, { "epoch": 1.0895644581386579, "grad_norm": 0.375, "learning_rate": 1.3263186402522607e-05, "loss": 1.9412, "step": 33770 }, { "epoch": 1.0895967219924543, "grad_norm": 0.349609375, "learning_rate": 1.3262418297336614e-05, "loss": 1.9843, "step": 33771 }, { "epoch": 1.0896289858462507, "grad_norm": 0.3671875, "learning_rate": 1.326165019676878e-05, "loss": 1.9209, "step": 33772 }, { "epoch": 1.089661249700047, "grad_norm": 0.3515625, "learning_rate": 1.3260882100821139e-05, "loss": 1.9649, "step": 33773 }, { "epoch": 1.0896935135538433, "grad_norm": 0.34765625, "learning_rate": 1.326011400949574e-05, "loss": 1.9336, "step": 33774 }, { "epoch": 1.0897257774076397, "grad_norm": 0.37109375, "learning_rate": 1.3259345922794618e-05, "loss": 1.9767, "step": 33775 }, { "epoch": 1.089758041261436, "grad_norm": 0.345703125, "learning_rate": 1.3258577840719818e-05, "loss": 1.9609, "step": 33776 }, { "epoch": 1.0897903051152324, "grad_norm": 0.3515625, "learning_rate": 1.3257809763273383e-05, "loss": 1.9597, "step": 33777 }, { "epoch": 1.0898225689690286, "grad_norm": 0.34375, "learning_rate": 1.3257041690457354e-05, "loss": 1.9914, "step": 33778 }, { "epoch": 1.089854832822825, "grad_norm": 0.34765625, "learning_rate": 1.3256273622273768e-05, "loss": 1.9389, "step": 33779 }, { "epoch": 1.0898870966766214, "grad_norm": 0.365234375, "learning_rate": 1.325550555872467e-05, "loss": 1.9566, "step": 33780 }, { "epoch": 1.0899193605304178, "grad_norm": 0.34765625, "learning_rate": 1.3254737499812099e-05, "loss": 1.9969, "step": 33781 }, { "epoch": 1.089951624384214, "grad_norm": 0.361328125, "learning_rate": 1.32539694455381e-05, "loss": 1.9597, "step": 33782 }, { "epoch": 1.0899838882380104, "grad_norm": 0.345703125, "learning_rate": 1.325320139590471e-05, "loss": 1.9862, "step": 33783 }, { "epoch": 1.0900161520918068, "grad_norm": 0.361328125, "learning_rate": 1.3252433350913977e-05, "loss": 1.9611, "step": 33784 }, { "epoch": 1.0900484159456032, "grad_norm": 0.353515625, "learning_rate": 1.3251665310567935e-05, "loss": 1.979, "step": 33785 }, { "epoch": 1.0900806797993994, "grad_norm": 0.361328125, "learning_rate": 1.3250897274868628e-05, "loss": 1.9491, "step": 33786 }, { "epoch": 1.0901129436531958, "grad_norm": 0.337890625, "learning_rate": 1.3250129243818098e-05, "loss": 1.9817, "step": 33787 }, { "epoch": 1.0901452075069922, "grad_norm": 0.3671875, "learning_rate": 1.3249361217418384e-05, "loss": 1.9693, "step": 33788 }, { "epoch": 1.0901774713607886, "grad_norm": 0.359375, "learning_rate": 1.3248593195671529e-05, "loss": 1.9948, "step": 33789 }, { "epoch": 1.090209735214585, "grad_norm": 0.333984375, "learning_rate": 1.3247825178579582e-05, "loss": 1.9191, "step": 33790 }, { "epoch": 1.0902419990683812, "grad_norm": 0.349609375, "learning_rate": 1.324705716614457e-05, "loss": 1.971, "step": 33791 }, { "epoch": 1.0902742629221776, "grad_norm": 0.3671875, "learning_rate": 1.324628915836854e-05, "loss": 1.9583, "step": 33792 }, { "epoch": 1.090306526775974, "grad_norm": 0.357421875, "learning_rate": 1.3245521155253536e-05, "loss": 1.9731, "step": 33793 }, { "epoch": 1.0903387906297703, "grad_norm": 0.369140625, "learning_rate": 1.3244753156801593e-05, "loss": 1.998, "step": 33794 }, { "epoch": 1.0903710544835665, "grad_norm": 0.341796875, "learning_rate": 1.3243985163014758e-05, "loss": 1.9604, "step": 33795 }, { "epoch": 1.090403318337363, "grad_norm": 0.3671875, "learning_rate": 1.324321717389508e-05, "loss": 1.957, "step": 33796 }, { "epoch": 1.0904355821911593, "grad_norm": 0.349609375, "learning_rate": 1.3242449189444583e-05, "loss": 1.9657, "step": 33797 }, { "epoch": 1.0904678460449557, "grad_norm": 0.349609375, "learning_rate": 1.3241681209665312e-05, "loss": 1.9574, "step": 33798 }, { "epoch": 1.090500109898752, "grad_norm": 0.337890625, "learning_rate": 1.3240913234559315e-05, "loss": 1.9712, "step": 33799 }, { "epoch": 1.0905323737525483, "grad_norm": 0.3359375, "learning_rate": 1.3240145264128632e-05, "loss": 1.973, "step": 33800 }, { "epoch": 1.0905646376063447, "grad_norm": 0.34765625, "learning_rate": 1.3239377298375302e-05, "loss": 1.9592, "step": 33801 }, { "epoch": 1.090596901460141, "grad_norm": 0.35546875, "learning_rate": 1.3238609337301373e-05, "loss": 1.9781, "step": 33802 }, { "epoch": 1.0906291653139373, "grad_norm": 0.365234375, "learning_rate": 1.3237841380908868e-05, "loss": 1.9669, "step": 33803 }, { "epoch": 1.0906614291677337, "grad_norm": 0.36328125, "learning_rate": 1.3237073429199843e-05, "loss": 1.9502, "step": 33804 }, { "epoch": 1.09069369302153, "grad_norm": 0.369140625, "learning_rate": 1.3236305482176337e-05, "loss": 1.9793, "step": 33805 }, { "epoch": 1.0907259568753265, "grad_norm": 0.357421875, "learning_rate": 1.3235537539840388e-05, "loss": 1.9828, "step": 33806 }, { "epoch": 1.0907582207291227, "grad_norm": 0.3515625, "learning_rate": 1.323476960219404e-05, "loss": 1.9677, "step": 33807 }, { "epoch": 1.090790484582919, "grad_norm": 0.365234375, "learning_rate": 1.3234001669239337e-05, "loss": 1.9653, "step": 33808 }, { "epoch": 1.0908227484367155, "grad_norm": 0.34375, "learning_rate": 1.3233233740978311e-05, "loss": 1.9717, "step": 33809 }, { "epoch": 1.0908550122905118, "grad_norm": 0.369140625, "learning_rate": 1.3232465817413008e-05, "loss": 1.9706, "step": 33810 }, { "epoch": 1.0908872761443082, "grad_norm": 0.33984375, "learning_rate": 1.3231697898545469e-05, "loss": 1.9635, "step": 33811 }, { "epoch": 1.0909195399981044, "grad_norm": 0.365234375, "learning_rate": 1.3230929984377734e-05, "loss": 1.9886, "step": 33812 }, { "epoch": 1.0909518038519008, "grad_norm": 0.345703125, "learning_rate": 1.3230162074911845e-05, "loss": 1.952, "step": 33813 }, { "epoch": 1.0909840677056972, "grad_norm": 0.376953125, "learning_rate": 1.3229394170149845e-05, "loss": 1.9749, "step": 33814 }, { "epoch": 1.0910163315594936, "grad_norm": 0.353515625, "learning_rate": 1.3228626270093772e-05, "loss": 1.937, "step": 33815 }, { "epoch": 1.0910485954132898, "grad_norm": 0.34765625, "learning_rate": 1.3227858374745664e-05, "loss": 1.9799, "step": 33816 }, { "epoch": 1.0910808592670862, "grad_norm": 0.353515625, "learning_rate": 1.322709048410757e-05, "loss": 1.9547, "step": 33817 }, { "epoch": 1.0911131231208826, "grad_norm": 0.35546875, "learning_rate": 1.3226322598181524e-05, "loss": 1.9888, "step": 33818 }, { "epoch": 1.091145386974679, "grad_norm": 0.36328125, "learning_rate": 1.3225554716969568e-05, "loss": 1.9526, "step": 33819 }, { "epoch": 1.0911776508284752, "grad_norm": 0.3671875, "learning_rate": 1.3224786840473749e-05, "loss": 1.9659, "step": 33820 }, { "epoch": 1.0912099146822716, "grad_norm": 0.3359375, "learning_rate": 1.32240189686961e-05, "loss": 1.9388, "step": 33821 }, { "epoch": 1.091242178536068, "grad_norm": 0.375, "learning_rate": 1.3223251101638663e-05, "loss": 1.9707, "step": 33822 }, { "epoch": 1.0912744423898644, "grad_norm": 0.369140625, "learning_rate": 1.3222483239303482e-05, "loss": 1.9806, "step": 33823 }, { "epoch": 1.0913067062436608, "grad_norm": 0.35546875, "learning_rate": 1.3221715381692596e-05, "loss": 1.9641, "step": 33824 }, { "epoch": 1.091338970097457, "grad_norm": 0.376953125, "learning_rate": 1.3220947528808047e-05, "loss": 1.9755, "step": 33825 }, { "epoch": 1.0913712339512534, "grad_norm": 0.345703125, "learning_rate": 1.3220179680651878e-05, "loss": 1.9604, "step": 33826 }, { "epoch": 1.0914034978050497, "grad_norm": 0.376953125, "learning_rate": 1.3219411837226124e-05, "loss": 1.982, "step": 33827 }, { "epoch": 1.091435761658846, "grad_norm": 0.345703125, "learning_rate": 1.3218643998532828e-05, "loss": 1.9857, "step": 33828 }, { "epoch": 1.0914680255126423, "grad_norm": 0.34375, "learning_rate": 1.3217876164574033e-05, "loss": 1.9866, "step": 33829 }, { "epoch": 1.0915002893664387, "grad_norm": 0.37109375, "learning_rate": 1.3217108335351773e-05, "loss": 1.9587, "step": 33830 }, { "epoch": 1.0915325532202351, "grad_norm": 0.345703125, "learning_rate": 1.3216340510868098e-05, "loss": 1.9478, "step": 33831 }, { "epoch": 1.0915648170740315, "grad_norm": 0.3515625, "learning_rate": 1.3215572691125052e-05, "loss": 1.9802, "step": 33832 }, { "epoch": 1.0915970809278277, "grad_norm": 0.34765625, "learning_rate": 1.3214804876124662e-05, "loss": 1.9831, "step": 33833 }, { "epoch": 1.091629344781624, "grad_norm": 0.333984375, "learning_rate": 1.3214037065868975e-05, "loss": 1.9302, "step": 33834 }, { "epoch": 1.0916616086354205, "grad_norm": 0.376953125, "learning_rate": 1.3213269260360026e-05, "loss": 1.9868, "step": 33835 }, { "epoch": 1.091693872489217, "grad_norm": 0.33984375, "learning_rate": 1.3212501459599867e-05, "loss": 1.9705, "step": 33836 }, { "epoch": 1.091726136343013, "grad_norm": 0.345703125, "learning_rate": 1.3211733663590533e-05, "loss": 1.9957, "step": 33837 }, { "epoch": 1.0917584001968095, "grad_norm": 0.341796875, "learning_rate": 1.321096587233407e-05, "loss": 1.9665, "step": 33838 }, { "epoch": 1.0917906640506059, "grad_norm": 0.3359375, "learning_rate": 1.3210198085832509e-05, "loss": 1.9817, "step": 33839 }, { "epoch": 1.0918229279044023, "grad_norm": 0.330078125, "learning_rate": 1.3209430304087892e-05, "loss": 1.9503, "step": 33840 }, { "epoch": 1.0918551917581985, "grad_norm": 0.34375, "learning_rate": 1.3208662527102264e-05, "loss": 1.981, "step": 33841 }, { "epoch": 1.0918874556119949, "grad_norm": 0.34765625, "learning_rate": 1.3207894754877666e-05, "loss": 1.9616, "step": 33842 }, { "epoch": 1.0919197194657912, "grad_norm": 0.3359375, "learning_rate": 1.3207126987416138e-05, "loss": 1.9717, "step": 33843 }, { "epoch": 1.0919519833195876, "grad_norm": 0.3359375, "learning_rate": 1.3206359224719724e-05, "loss": 1.9927, "step": 33844 }, { "epoch": 1.091984247173384, "grad_norm": 0.34375, "learning_rate": 1.320559146679045e-05, "loss": 2.0127, "step": 33845 }, { "epoch": 1.0920165110271802, "grad_norm": 0.33984375, "learning_rate": 1.320482371363037e-05, "loss": 1.9809, "step": 33846 }, { "epoch": 1.0920487748809766, "grad_norm": 0.33203125, "learning_rate": 1.3204055965241521e-05, "loss": 1.953, "step": 33847 }, { "epoch": 1.092081038734773, "grad_norm": 0.337890625, "learning_rate": 1.3203288221625944e-05, "loss": 1.9875, "step": 33848 }, { "epoch": 1.0921133025885694, "grad_norm": 0.3359375, "learning_rate": 1.3202520482785679e-05, "loss": 1.9679, "step": 33849 }, { "epoch": 1.0921455664423656, "grad_norm": 0.333984375, "learning_rate": 1.320175274872277e-05, "loss": 1.9743, "step": 33850 }, { "epoch": 1.092177830296162, "grad_norm": 0.337890625, "learning_rate": 1.3200985019439252e-05, "loss": 1.9657, "step": 33851 }, { "epoch": 1.0922100941499584, "grad_norm": 0.35546875, "learning_rate": 1.3200217294937166e-05, "loss": 1.9807, "step": 33852 }, { "epoch": 1.0922423580037548, "grad_norm": 0.3359375, "learning_rate": 1.3199449575218555e-05, "loss": 1.9705, "step": 33853 }, { "epoch": 1.092274621857551, "grad_norm": 0.35546875, "learning_rate": 1.3198681860285458e-05, "loss": 1.9705, "step": 33854 }, { "epoch": 1.0923068857113474, "grad_norm": 0.33984375, "learning_rate": 1.3197914150139916e-05, "loss": 1.9932, "step": 33855 }, { "epoch": 1.0923391495651438, "grad_norm": 0.33203125, "learning_rate": 1.319714644478397e-05, "loss": 1.9768, "step": 33856 }, { "epoch": 1.0923714134189402, "grad_norm": 0.34375, "learning_rate": 1.3196378744219662e-05, "loss": 1.9895, "step": 33857 }, { "epoch": 1.0924036772727364, "grad_norm": 0.333984375, "learning_rate": 1.3195611048449029e-05, "loss": 1.9854, "step": 33858 }, { "epoch": 1.0924359411265328, "grad_norm": 0.34765625, "learning_rate": 1.3194843357474113e-05, "loss": 1.9752, "step": 33859 }, { "epoch": 1.0924682049803291, "grad_norm": 0.357421875, "learning_rate": 1.3194075671296951e-05, "loss": 1.9351, "step": 33860 }, { "epoch": 1.0925004688341255, "grad_norm": 0.333984375, "learning_rate": 1.3193307989919588e-05, "loss": 1.9514, "step": 33861 }, { "epoch": 1.0925327326879217, "grad_norm": 0.3359375, "learning_rate": 1.3192540313344063e-05, "loss": 2.0011, "step": 33862 }, { "epoch": 1.0925649965417181, "grad_norm": 0.33203125, "learning_rate": 1.3191772641572419e-05, "loss": 1.9861, "step": 33863 }, { "epoch": 1.0925972603955145, "grad_norm": 0.330078125, "learning_rate": 1.3191004974606687e-05, "loss": 1.9297, "step": 33864 }, { "epoch": 1.092629524249311, "grad_norm": 0.3359375, "learning_rate": 1.3190237312448918e-05, "loss": 1.9936, "step": 33865 }, { "epoch": 1.0926617881031073, "grad_norm": 0.33203125, "learning_rate": 1.3189469655101146e-05, "loss": 1.9611, "step": 33866 }, { "epoch": 1.0926940519569035, "grad_norm": 0.33984375, "learning_rate": 1.3188702002565412e-05, "loss": 1.9765, "step": 33867 }, { "epoch": 1.0927263158107, "grad_norm": 0.33203125, "learning_rate": 1.3187934354843758e-05, "loss": 1.9495, "step": 33868 }, { "epoch": 1.0927585796644963, "grad_norm": 0.33984375, "learning_rate": 1.3187166711938231e-05, "loss": 1.9673, "step": 33869 }, { "epoch": 1.0927908435182927, "grad_norm": 0.345703125, "learning_rate": 1.3186399073850858e-05, "loss": 1.9637, "step": 33870 }, { "epoch": 1.0928231073720889, "grad_norm": 0.33984375, "learning_rate": 1.3185631440583685e-05, "loss": 1.9987, "step": 33871 }, { "epoch": 1.0928553712258853, "grad_norm": 0.33984375, "learning_rate": 1.3184863812138751e-05, "loss": 1.9744, "step": 33872 }, { "epoch": 1.0928876350796817, "grad_norm": 0.3359375, "learning_rate": 1.3184096188518098e-05, "loss": 1.953, "step": 33873 }, { "epoch": 1.092919898933478, "grad_norm": 0.353515625, "learning_rate": 1.3183328569723767e-05, "loss": 1.9804, "step": 33874 }, { "epoch": 1.0929521627872743, "grad_norm": 0.357421875, "learning_rate": 1.3182560955757805e-05, "loss": 1.9711, "step": 33875 }, { "epoch": 1.0929844266410706, "grad_norm": 0.3671875, "learning_rate": 1.3181793346622235e-05, "loss": 1.9434, "step": 33876 }, { "epoch": 1.093016690494867, "grad_norm": 0.349609375, "learning_rate": 1.3181025742319105e-05, "loss": 1.9559, "step": 33877 }, { "epoch": 1.0930489543486634, "grad_norm": 0.36328125, "learning_rate": 1.318025814285046e-05, "loss": 1.9291, "step": 33878 }, { "epoch": 1.0930812182024596, "grad_norm": 0.3671875, "learning_rate": 1.3179490548218337e-05, "loss": 1.9945, "step": 33879 }, { "epoch": 1.093113482056256, "grad_norm": 0.337890625, "learning_rate": 1.3178722958424775e-05, "loss": 1.9608, "step": 33880 }, { "epoch": 1.0931457459100524, "grad_norm": 0.3671875, "learning_rate": 1.317795537347182e-05, "loss": 1.9617, "step": 33881 }, { "epoch": 1.0931780097638488, "grad_norm": 0.34375, "learning_rate": 1.31771877933615e-05, "loss": 1.9819, "step": 33882 }, { "epoch": 1.093210273617645, "grad_norm": 0.341796875, "learning_rate": 1.3176420218095864e-05, "loss": 1.988, "step": 33883 }, { "epoch": 1.0932425374714414, "grad_norm": 0.34375, "learning_rate": 1.3175652647676948e-05, "loss": 1.9887, "step": 33884 }, { "epoch": 1.0932748013252378, "grad_norm": 0.349609375, "learning_rate": 1.3174885082106797e-05, "loss": 1.9995, "step": 33885 }, { "epoch": 1.0933070651790342, "grad_norm": 0.330078125, "learning_rate": 1.3174117521387446e-05, "loss": 1.9531, "step": 33886 }, { "epoch": 1.0933393290328306, "grad_norm": 0.357421875, "learning_rate": 1.3173349965520943e-05, "loss": 2.0035, "step": 33887 }, { "epoch": 1.0933715928866268, "grad_norm": 0.34375, "learning_rate": 1.3172582414509317e-05, "loss": 1.976, "step": 33888 }, { "epoch": 1.0934038567404232, "grad_norm": 0.380859375, "learning_rate": 1.3171814868354615e-05, "loss": 1.9731, "step": 33889 }, { "epoch": 1.0934361205942196, "grad_norm": 0.337890625, "learning_rate": 1.3171047327058874e-05, "loss": 1.9405, "step": 33890 }, { "epoch": 1.093468384448016, "grad_norm": 0.36328125, "learning_rate": 1.3170279790624137e-05, "loss": 1.9493, "step": 33891 }, { "epoch": 1.0935006483018122, "grad_norm": 0.361328125, "learning_rate": 1.316951225905244e-05, "loss": 1.9566, "step": 33892 }, { "epoch": 1.0935329121556085, "grad_norm": 0.34375, "learning_rate": 1.316874473234583e-05, "loss": 1.9743, "step": 33893 }, { "epoch": 1.093565176009405, "grad_norm": 0.36328125, "learning_rate": 1.3167977210506336e-05, "loss": 1.9737, "step": 33894 }, { "epoch": 1.0935974398632013, "grad_norm": 0.349609375, "learning_rate": 1.3167209693536006e-05, "loss": 1.9867, "step": 33895 }, { "epoch": 1.0936297037169975, "grad_norm": 0.341796875, "learning_rate": 1.3166442181436878e-05, "loss": 1.97, "step": 33896 }, { "epoch": 1.093661967570794, "grad_norm": 0.37890625, "learning_rate": 1.3165674674210991e-05, "loss": 1.9885, "step": 33897 }, { "epoch": 1.0936942314245903, "grad_norm": 0.349609375, "learning_rate": 1.3164907171860387e-05, "loss": 1.9714, "step": 33898 }, { "epoch": 1.0937264952783867, "grad_norm": 0.34375, "learning_rate": 1.3164139674387107e-05, "loss": 1.9654, "step": 33899 }, { "epoch": 1.093758759132183, "grad_norm": 0.35546875, "learning_rate": 1.3163372181793186e-05, "loss": 1.963, "step": 33900 }, { "epoch": 1.0937910229859793, "grad_norm": 0.34765625, "learning_rate": 1.3162604694080667e-05, "loss": 1.9779, "step": 33901 }, { "epoch": 1.0938232868397757, "grad_norm": 0.34765625, "learning_rate": 1.3161837211251587e-05, "loss": 1.9688, "step": 33902 }, { "epoch": 1.093855550693572, "grad_norm": 0.41796875, "learning_rate": 1.3161069733307988e-05, "loss": 1.9652, "step": 33903 }, { "epoch": 1.0938878145473683, "grad_norm": 0.3359375, "learning_rate": 1.3160302260251907e-05, "loss": 1.9663, "step": 33904 }, { "epoch": 1.0939200784011647, "grad_norm": 0.37890625, "learning_rate": 1.3159534792085398e-05, "loss": 1.9651, "step": 33905 }, { "epoch": 1.093952342254961, "grad_norm": 0.35546875, "learning_rate": 1.315876732881048e-05, "loss": 1.987, "step": 33906 }, { "epoch": 1.0939846061087575, "grad_norm": 0.357421875, "learning_rate": 1.3157999870429204e-05, "loss": 1.9396, "step": 33907 }, { "epoch": 1.0940168699625539, "grad_norm": 0.369140625, "learning_rate": 1.3157232416943607e-05, "loss": 1.9806, "step": 33908 }, { "epoch": 1.09404913381635, "grad_norm": 0.349609375, "learning_rate": 1.3156464968355726e-05, "loss": 1.9662, "step": 33909 }, { "epoch": 1.0940813976701464, "grad_norm": 0.345703125, "learning_rate": 1.3155697524667609e-05, "loss": 1.9524, "step": 33910 }, { "epoch": 1.0941136615239428, "grad_norm": 0.375, "learning_rate": 1.3154930085881296e-05, "loss": 1.974, "step": 33911 }, { "epoch": 1.0941459253777392, "grad_norm": 0.33984375, "learning_rate": 1.3154162651998817e-05, "loss": 1.965, "step": 33912 }, { "epoch": 1.0941781892315354, "grad_norm": 0.36328125, "learning_rate": 1.3153395223022215e-05, "loss": 1.9865, "step": 33913 }, { "epoch": 1.0942104530853318, "grad_norm": 0.3828125, "learning_rate": 1.3152627798953526e-05, "loss": 1.9611, "step": 33914 }, { "epoch": 1.0942427169391282, "grad_norm": 0.341796875, "learning_rate": 1.3151860379794802e-05, "loss": 1.9798, "step": 33915 }, { "epoch": 1.0942749807929246, "grad_norm": 0.3671875, "learning_rate": 1.3151092965548074e-05, "loss": 1.9424, "step": 33916 }, { "epoch": 1.0943072446467208, "grad_norm": 0.36328125, "learning_rate": 1.3150325556215388e-05, "loss": 1.9828, "step": 33917 }, { "epoch": 1.0943395085005172, "grad_norm": 0.35546875, "learning_rate": 1.314955815179877e-05, "loss": 1.9562, "step": 33918 }, { "epoch": 1.0943717723543136, "grad_norm": 0.3359375, "learning_rate": 1.3148790752300272e-05, "loss": 1.9905, "step": 33919 }, { "epoch": 1.09440403620811, "grad_norm": 0.330078125, "learning_rate": 1.3148023357721929e-05, "loss": 1.9931, "step": 33920 }, { "epoch": 1.0944363000619062, "grad_norm": 0.35546875, "learning_rate": 1.3147255968065782e-05, "loss": 1.9788, "step": 33921 }, { "epoch": 1.0944685639157026, "grad_norm": 0.328125, "learning_rate": 1.314648858333387e-05, "loss": 1.9782, "step": 33922 }, { "epoch": 1.094500827769499, "grad_norm": 0.34765625, "learning_rate": 1.3145721203528235e-05, "loss": 1.9724, "step": 33923 }, { "epoch": 1.0945330916232954, "grad_norm": 0.3359375, "learning_rate": 1.3144953828650913e-05, "loss": 1.9828, "step": 33924 }, { "epoch": 1.0945653554770916, "grad_norm": 0.357421875, "learning_rate": 1.3144186458703944e-05, "loss": 1.9532, "step": 33925 }, { "epoch": 1.094597619330888, "grad_norm": 0.384765625, "learning_rate": 1.3143419093689368e-05, "loss": 1.9425, "step": 33926 }, { "epoch": 1.0946298831846843, "grad_norm": 0.333984375, "learning_rate": 1.3142651733609223e-05, "loss": 1.9395, "step": 33927 }, { "epoch": 1.0946621470384807, "grad_norm": 0.34765625, "learning_rate": 1.3141884378465552e-05, "loss": 2.0059, "step": 33928 }, { "epoch": 1.0946944108922771, "grad_norm": 0.33203125, "learning_rate": 1.3141117028260395e-05, "loss": 1.9295, "step": 33929 }, { "epoch": 1.0947266747460733, "grad_norm": 0.341796875, "learning_rate": 1.3140349682995789e-05, "loss": 1.9771, "step": 33930 }, { "epoch": 1.0947589385998697, "grad_norm": 0.333984375, "learning_rate": 1.313958234267377e-05, "loss": 1.946, "step": 33931 }, { "epoch": 1.0947912024536661, "grad_norm": 0.333984375, "learning_rate": 1.3138815007296382e-05, "loss": 1.943, "step": 33932 }, { "epoch": 1.0948234663074625, "grad_norm": 0.34375, "learning_rate": 1.3138047676865665e-05, "loss": 1.9732, "step": 33933 }, { "epoch": 1.0948557301612587, "grad_norm": 0.33984375, "learning_rate": 1.3137280351383653e-05, "loss": 1.9734, "step": 33934 }, { "epoch": 1.094887994015055, "grad_norm": 0.349609375, "learning_rate": 1.3136513030852397e-05, "loss": 1.9724, "step": 33935 }, { "epoch": 1.0949202578688515, "grad_norm": 0.369140625, "learning_rate": 1.3135745715273923e-05, "loss": 2.0055, "step": 33936 }, { "epoch": 1.094952521722648, "grad_norm": 0.328125, "learning_rate": 1.3134978404650277e-05, "loss": 1.9521, "step": 33937 }, { "epoch": 1.094984785576444, "grad_norm": 0.3671875, "learning_rate": 1.3134211098983497e-05, "loss": 1.9832, "step": 33938 }, { "epoch": 1.0950170494302405, "grad_norm": 0.33203125, "learning_rate": 1.3133443798275623e-05, "loss": 1.982, "step": 33939 }, { "epoch": 1.0950493132840369, "grad_norm": 0.345703125, "learning_rate": 1.3132676502528694e-05, "loss": 1.9852, "step": 33940 }, { "epoch": 1.0950815771378333, "grad_norm": 0.337890625, "learning_rate": 1.3131909211744752e-05, "loss": 1.9347, "step": 33941 }, { "epoch": 1.0951138409916295, "grad_norm": 0.380859375, "learning_rate": 1.313114192592583e-05, "loss": 1.9755, "step": 33942 }, { "epoch": 1.0951461048454258, "grad_norm": 0.357421875, "learning_rate": 1.3130374645073973e-05, "loss": 1.9713, "step": 33943 }, { "epoch": 1.0951783686992222, "grad_norm": 0.353515625, "learning_rate": 1.3129607369191219e-05, "loss": 1.9388, "step": 33944 }, { "epoch": 1.0952106325530186, "grad_norm": 0.384765625, "learning_rate": 1.3128840098279605e-05, "loss": 1.9757, "step": 33945 }, { "epoch": 1.0952428964068148, "grad_norm": 0.359375, "learning_rate": 1.312807283234117e-05, "loss": 1.9828, "step": 33946 }, { "epoch": 1.0952751602606112, "grad_norm": 0.353515625, "learning_rate": 1.3127305571377963e-05, "loss": 1.9694, "step": 33947 }, { "epoch": 1.0953074241144076, "grad_norm": 0.33203125, "learning_rate": 1.312653831539201e-05, "loss": 1.9603, "step": 33948 }, { "epoch": 1.095339687968204, "grad_norm": 0.35546875, "learning_rate": 1.3125771064385354e-05, "loss": 1.9576, "step": 33949 }, { "epoch": 1.0953719518220004, "grad_norm": 0.345703125, "learning_rate": 1.3125003818360039e-05, "loss": 1.9566, "step": 33950 }, { "epoch": 1.0954042156757966, "grad_norm": 0.341796875, "learning_rate": 1.3124236577318094e-05, "loss": 1.9644, "step": 33951 }, { "epoch": 1.095436479529593, "grad_norm": 0.34765625, "learning_rate": 1.312346934126157e-05, "loss": 1.9788, "step": 33952 }, { "epoch": 1.0954687433833894, "grad_norm": 0.349609375, "learning_rate": 1.312270211019251e-05, "loss": 1.9635, "step": 33953 }, { "epoch": 1.0955010072371858, "grad_norm": 0.353515625, "learning_rate": 1.3121934884112934e-05, "loss": 1.9669, "step": 33954 }, { "epoch": 1.095533271090982, "grad_norm": 0.35546875, "learning_rate": 1.3121167663024891e-05, "loss": 2.0, "step": 33955 }, { "epoch": 1.0955655349447784, "grad_norm": 0.349609375, "learning_rate": 1.3120400446930423e-05, "loss": 2.0007, "step": 33956 }, { "epoch": 1.0955977987985748, "grad_norm": 0.3515625, "learning_rate": 1.3119633235831566e-05, "loss": 1.9567, "step": 33957 }, { "epoch": 1.0956300626523712, "grad_norm": 0.345703125, "learning_rate": 1.3118866029730361e-05, "loss": 1.9978, "step": 33958 }, { "epoch": 1.0956623265061674, "grad_norm": 0.365234375, "learning_rate": 1.3118098828628853e-05, "loss": 2.007, "step": 33959 }, { "epoch": 1.0956945903599637, "grad_norm": 0.3515625, "learning_rate": 1.3117331632529062e-05, "loss": 1.9543, "step": 33960 }, { "epoch": 1.0957268542137601, "grad_norm": 0.357421875, "learning_rate": 1.3116564441433046e-05, "loss": 1.9714, "step": 33961 }, { "epoch": 1.0957591180675565, "grad_norm": 0.37109375, "learning_rate": 1.3115797255342835e-05, "loss": 1.9758, "step": 33962 }, { "epoch": 1.0957913819213527, "grad_norm": 0.376953125, "learning_rate": 1.311503007426047e-05, "loss": 2.0091, "step": 33963 }, { "epoch": 1.0958236457751491, "grad_norm": 0.34375, "learning_rate": 1.3114262898187991e-05, "loss": 2.0012, "step": 33964 }, { "epoch": 1.0958559096289455, "grad_norm": 0.37109375, "learning_rate": 1.311349572712744e-05, "loss": 1.9646, "step": 33965 }, { "epoch": 1.095888173482742, "grad_norm": 0.365234375, "learning_rate": 1.3112728561080849e-05, "loss": 1.9651, "step": 33966 }, { "epoch": 1.095920437336538, "grad_norm": 0.37109375, "learning_rate": 1.3111961400050258e-05, "loss": 2.0037, "step": 33967 }, { "epoch": 1.0959527011903345, "grad_norm": 0.361328125, "learning_rate": 1.311119424403771e-05, "loss": 1.9938, "step": 33968 }, { "epoch": 1.095984965044131, "grad_norm": 0.3515625, "learning_rate": 1.3110427093045242e-05, "loss": 2.0086, "step": 33969 }, { "epoch": 1.0960172288979273, "grad_norm": 0.357421875, "learning_rate": 1.3109659947074893e-05, "loss": 2.0075, "step": 33970 }, { "epoch": 1.0960494927517237, "grad_norm": 0.384765625, "learning_rate": 1.3108892806128704e-05, "loss": 1.9722, "step": 33971 }, { "epoch": 1.0960817566055199, "grad_norm": 0.384765625, "learning_rate": 1.3108125670208712e-05, "loss": 1.9268, "step": 33972 }, { "epoch": 1.0961140204593163, "grad_norm": 0.3828125, "learning_rate": 1.3107358539316951e-05, "loss": 1.9458, "step": 33973 }, { "epoch": 1.0961462843131127, "grad_norm": 0.361328125, "learning_rate": 1.3106591413455468e-05, "loss": 1.9521, "step": 33974 }, { "epoch": 1.096178548166909, "grad_norm": 0.34375, "learning_rate": 1.3105824292626298e-05, "loss": 1.9616, "step": 33975 }, { "epoch": 1.0962108120207052, "grad_norm": 0.3515625, "learning_rate": 1.310505717683148e-05, "loss": 1.9347, "step": 33976 }, { "epoch": 1.0962430758745016, "grad_norm": 0.349609375, "learning_rate": 1.3104290066073058e-05, "loss": 1.9712, "step": 33977 }, { "epoch": 1.096275339728298, "grad_norm": 0.3515625, "learning_rate": 1.3103522960353061e-05, "loss": 1.9739, "step": 33978 }, { "epoch": 1.0963076035820944, "grad_norm": 0.35546875, "learning_rate": 1.3102755859673535e-05, "loss": 1.9655, "step": 33979 }, { "epoch": 1.0963398674358906, "grad_norm": 0.359375, "learning_rate": 1.3101988764036513e-05, "loss": 1.9405, "step": 33980 }, { "epoch": 1.096372131289687, "grad_norm": 0.35546875, "learning_rate": 1.3101221673444041e-05, "loss": 1.9448, "step": 33981 }, { "epoch": 1.0964043951434834, "grad_norm": 0.357421875, "learning_rate": 1.3100454587898154e-05, "loss": 1.9605, "step": 33982 }, { "epoch": 1.0964366589972798, "grad_norm": 0.3671875, "learning_rate": 1.3099687507400886e-05, "loss": 1.9321, "step": 33983 }, { "epoch": 1.096468922851076, "grad_norm": 0.349609375, "learning_rate": 1.3098920431954291e-05, "loss": 1.9551, "step": 33984 }, { "epoch": 1.0965011867048724, "grad_norm": 0.373046875, "learning_rate": 1.3098153361560392e-05, "loss": 1.926, "step": 33985 }, { "epoch": 1.0965334505586688, "grad_norm": 0.37109375, "learning_rate": 1.3097386296221234e-05, "loss": 1.9543, "step": 33986 }, { "epoch": 1.0965657144124652, "grad_norm": 0.375, "learning_rate": 1.3096619235938848e-05, "loss": 1.996, "step": 33987 }, { "epoch": 1.0965979782662614, "grad_norm": 0.357421875, "learning_rate": 1.3095852180715287e-05, "loss": 1.9711, "step": 33988 }, { "epoch": 1.0966302421200578, "grad_norm": 0.400390625, "learning_rate": 1.3095085130552582e-05, "loss": 1.9793, "step": 33989 }, { "epoch": 1.0966625059738542, "grad_norm": 0.365234375, "learning_rate": 1.3094318085452778e-05, "loss": 1.9709, "step": 33990 }, { "epoch": 1.0966947698276506, "grad_norm": 0.392578125, "learning_rate": 1.30935510454179e-05, "loss": 1.9819, "step": 33991 }, { "epoch": 1.096727033681447, "grad_norm": 0.38671875, "learning_rate": 1.3092784010449992e-05, "loss": 1.9823, "step": 33992 }, { "epoch": 1.0967592975352431, "grad_norm": 0.3671875, "learning_rate": 1.3092016980551098e-05, "loss": 1.9547, "step": 33993 }, { "epoch": 1.0967915613890395, "grad_norm": 0.361328125, "learning_rate": 1.3091249955723253e-05, "loss": 1.9596, "step": 33994 }, { "epoch": 1.096823825242836, "grad_norm": 0.37890625, "learning_rate": 1.3090482935968497e-05, "loss": 1.9319, "step": 33995 }, { "epoch": 1.0968560890966323, "grad_norm": 0.3671875, "learning_rate": 1.3089715921288874e-05, "loss": 1.9484, "step": 33996 }, { "epoch": 1.0968883529504285, "grad_norm": 0.361328125, "learning_rate": 1.3088948911686407e-05, "loss": 1.9546, "step": 33997 }, { "epoch": 1.096920616804225, "grad_norm": 0.373046875, "learning_rate": 1.3088181907163146e-05, "loss": 1.9392, "step": 33998 }, { "epoch": 1.0969528806580213, "grad_norm": 0.384765625, "learning_rate": 1.3087414907721128e-05, "loss": 1.9442, "step": 33999 }, { "epoch": 1.0969851445118177, "grad_norm": 0.3515625, "learning_rate": 1.3086647913362391e-05, "loss": 1.9738, "step": 34000 }, { "epoch": 1.097017408365614, "grad_norm": 0.365234375, "learning_rate": 1.3085880924088974e-05, "loss": 1.9921, "step": 34001 }, { "epoch": 1.0970496722194103, "grad_norm": 0.361328125, "learning_rate": 1.3085113939902919e-05, "loss": 1.9522, "step": 34002 }, { "epoch": 1.0970819360732067, "grad_norm": 0.353515625, "learning_rate": 1.3084346960806255e-05, "loss": 1.9891, "step": 34003 }, { "epoch": 1.097114199927003, "grad_norm": 0.359375, "learning_rate": 1.3083579986801028e-05, "loss": 1.9687, "step": 34004 }, { "epoch": 1.0971464637807993, "grad_norm": 0.341796875, "learning_rate": 1.3082813017889273e-05, "loss": 1.9753, "step": 34005 }, { "epoch": 1.0971787276345957, "grad_norm": 0.3671875, "learning_rate": 1.3082046054073032e-05, "loss": 1.9816, "step": 34006 }, { "epoch": 1.097210991488392, "grad_norm": 0.345703125, "learning_rate": 1.3081279095354342e-05, "loss": 1.9823, "step": 34007 }, { "epoch": 1.0972432553421885, "grad_norm": 0.361328125, "learning_rate": 1.308051214173524e-05, "loss": 1.9558, "step": 34008 }, { "epoch": 1.0972755191959846, "grad_norm": 0.3671875, "learning_rate": 1.3079745193217764e-05, "loss": 1.9217, "step": 34009 }, { "epoch": 1.097307783049781, "grad_norm": 0.359375, "learning_rate": 1.3078978249803954e-05, "loss": 1.9815, "step": 34010 }, { "epoch": 1.0973400469035774, "grad_norm": 0.396484375, "learning_rate": 1.307821131149585e-05, "loss": 2.0152, "step": 34011 }, { "epoch": 1.0973723107573738, "grad_norm": 0.392578125, "learning_rate": 1.3077444378295485e-05, "loss": 1.9478, "step": 34012 }, { "epoch": 1.0974045746111702, "grad_norm": 0.38671875, "learning_rate": 1.3076677450204903e-05, "loss": 1.8971, "step": 34013 }, { "epoch": 1.0974368384649664, "grad_norm": 0.3828125, "learning_rate": 1.3075910527226143e-05, "loss": 1.9712, "step": 34014 }, { "epoch": 1.0974691023187628, "grad_norm": 0.384765625, "learning_rate": 1.3075143609361235e-05, "loss": 1.9733, "step": 34015 }, { "epoch": 1.0975013661725592, "grad_norm": 0.345703125, "learning_rate": 1.3074376696612224e-05, "loss": 1.9485, "step": 34016 }, { "epoch": 1.0975336300263556, "grad_norm": 0.375, "learning_rate": 1.3073609788981149e-05, "loss": 1.9211, "step": 34017 }, { "epoch": 1.0975658938801518, "grad_norm": 0.34765625, "learning_rate": 1.3072842886470044e-05, "loss": 1.9182, "step": 34018 }, { "epoch": 1.0975981577339482, "grad_norm": 0.357421875, "learning_rate": 1.3072075989080946e-05, "loss": 1.9767, "step": 34019 }, { "epoch": 1.0976304215877446, "grad_norm": 0.361328125, "learning_rate": 1.307130909681591e-05, "loss": 1.9416, "step": 34020 }, { "epoch": 1.097662685441541, "grad_norm": 0.3515625, "learning_rate": 1.307054220967695e-05, "loss": 1.9677, "step": 34021 }, { "epoch": 1.0976949492953372, "grad_norm": 0.369140625, "learning_rate": 1.3069775327666118e-05, "loss": 1.9587, "step": 34022 }, { "epoch": 1.0977272131491336, "grad_norm": 0.359375, "learning_rate": 1.3069008450785449e-05, "loss": 1.9756, "step": 34023 }, { "epoch": 1.09775947700293, "grad_norm": 0.357421875, "learning_rate": 1.306824157903698e-05, "loss": 1.97, "step": 34024 }, { "epoch": 1.0977917408567264, "grad_norm": 0.35546875, "learning_rate": 1.3067474712422753e-05, "loss": 1.9767, "step": 34025 }, { "epoch": 1.0978240047105228, "grad_norm": 0.375, "learning_rate": 1.306670785094481e-05, "loss": 1.9703, "step": 34026 }, { "epoch": 1.097856268564319, "grad_norm": 0.353515625, "learning_rate": 1.3065940994605178e-05, "loss": 1.9654, "step": 34027 }, { "epoch": 1.0978885324181153, "grad_norm": 0.361328125, "learning_rate": 1.30651741434059e-05, "loss": 1.9648, "step": 34028 }, { "epoch": 1.0979207962719117, "grad_norm": 0.361328125, "learning_rate": 1.3064407297349013e-05, "loss": 1.9311, "step": 34029 }, { "epoch": 1.097953060125708, "grad_norm": 0.361328125, "learning_rate": 1.3063640456436559e-05, "loss": 1.9326, "step": 34030 }, { "epoch": 1.0979853239795043, "grad_norm": 0.37109375, "learning_rate": 1.3062873620670573e-05, "loss": 1.9404, "step": 34031 }, { "epoch": 1.0980175878333007, "grad_norm": 0.353515625, "learning_rate": 1.3062106790053101e-05, "loss": 1.9376, "step": 34032 }, { "epoch": 1.0980498516870971, "grad_norm": 0.373046875, "learning_rate": 1.3061339964586168e-05, "loss": 1.9652, "step": 34033 }, { "epoch": 1.0980821155408935, "grad_norm": 0.34765625, "learning_rate": 1.3060573144271813e-05, "loss": 1.9874, "step": 34034 }, { "epoch": 1.0981143793946897, "grad_norm": 0.357421875, "learning_rate": 1.3059806329112085e-05, "loss": 1.9675, "step": 34035 }, { "epoch": 1.098146643248486, "grad_norm": 0.369140625, "learning_rate": 1.3059039519109015e-05, "loss": 1.9648, "step": 34036 }, { "epoch": 1.0981789071022825, "grad_norm": 0.3515625, "learning_rate": 1.3058272714264644e-05, "loss": 1.927, "step": 34037 }, { "epoch": 1.098211170956079, "grad_norm": 0.34375, "learning_rate": 1.305750591458101e-05, "loss": 1.9399, "step": 34038 }, { "epoch": 1.098243434809875, "grad_norm": 0.3515625, "learning_rate": 1.3056739120060146e-05, "loss": 1.9207, "step": 34039 }, { "epoch": 1.0982756986636715, "grad_norm": 0.3359375, "learning_rate": 1.3055972330704094e-05, "loss": 1.9451, "step": 34040 }, { "epoch": 1.0983079625174679, "grad_norm": 0.392578125, "learning_rate": 1.305520554651489e-05, "loss": 1.9418, "step": 34041 }, { "epoch": 1.0983402263712643, "grad_norm": 0.33984375, "learning_rate": 1.3054438767494574e-05, "loss": 1.9486, "step": 34042 }, { "epoch": 1.0983724902250604, "grad_norm": 0.345703125, "learning_rate": 1.3053671993645185e-05, "loss": 1.9693, "step": 34043 }, { "epoch": 1.0984047540788568, "grad_norm": 0.37890625, "learning_rate": 1.3052905224968758e-05, "loss": 1.9705, "step": 34044 }, { "epoch": 1.0984370179326532, "grad_norm": 0.357421875, "learning_rate": 1.3052138461467333e-05, "loss": 1.9712, "step": 34045 }, { "epoch": 1.0984692817864496, "grad_norm": 0.369140625, "learning_rate": 1.3051371703142945e-05, "loss": 1.9114, "step": 34046 }, { "epoch": 1.098501545640246, "grad_norm": 0.359375, "learning_rate": 1.3050604949997635e-05, "loss": 1.9471, "step": 34047 }, { "epoch": 1.0985338094940422, "grad_norm": 0.359375, "learning_rate": 1.3049838202033439e-05, "loss": 1.9462, "step": 34048 }, { "epoch": 1.0985660733478386, "grad_norm": 0.359375, "learning_rate": 1.3049071459252396e-05, "loss": 1.9702, "step": 34049 }, { "epoch": 1.098598337201635, "grad_norm": 0.359375, "learning_rate": 1.3048304721656546e-05, "loss": 1.9374, "step": 34050 }, { "epoch": 1.0986306010554312, "grad_norm": 0.353515625, "learning_rate": 1.3047537989247922e-05, "loss": 1.9846, "step": 34051 }, { "epoch": 1.0986628649092276, "grad_norm": 0.35546875, "learning_rate": 1.3046771262028564e-05, "loss": 1.9597, "step": 34052 }, { "epoch": 1.098695128763024, "grad_norm": 0.3671875, "learning_rate": 1.304600454000051e-05, "loss": 1.9737, "step": 34053 }, { "epoch": 1.0987273926168204, "grad_norm": 0.326171875, "learning_rate": 1.3045237823165796e-05, "loss": 1.9515, "step": 34054 }, { "epoch": 1.0987596564706168, "grad_norm": 0.345703125, "learning_rate": 1.3044471111526462e-05, "loss": 1.9428, "step": 34055 }, { "epoch": 1.098791920324413, "grad_norm": 0.353515625, "learning_rate": 1.3043704405084552e-05, "loss": 1.9606, "step": 34056 }, { "epoch": 1.0988241841782094, "grad_norm": 0.3515625, "learning_rate": 1.304293770384209e-05, "loss": 1.9373, "step": 34057 }, { "epoch": 1.0988564480320058, "grad_norm": 0.373046875, "learning_rate": 1.3042171007801122e-05, "loss": 1.9601, "step": 34058 }, { "epoch": 1.0988887118858022, "grad_norm": 0.330078125, "learning_rate": 1.3041404316963686e-05, "loss": 1.9304, "step": 34059 }, { "epoch": 1.0989209757395983, "grad_norm": 0.37109375, "learning_rate": 1.3040637631331816e-05, "loss": 1.9678, "step": 34060 }, { "epoch": 1.0989532395933947, "grad_norm": 0.361328125, "learning_rate": 1.3039870950907549e-05, "loss": 1.9562, "step": 34061 }, { "epoch": 1.0989855034471911, "grad_norm": 0.369140625, "learning_rate": 1.3039104275692937e-05, "loss": 1.9644, "step": 34062 }, { "epoch": 1.0990177673009875, "grad_norm": 0.357421875, "learning_rate": 1.3038337605689999e-05, "loss": 1.9736, "step": 34063 }, { "epoch": 1.0990500311547837, "grad_norm": 0.359375, "learning_rate": 1.303757094090078e-05, "loss": 1.9661, "step": 34064 }, { "epoch": 1.0990822950085801, "grad_norm": 0.375, "learning_rate": 1.3036804281327316e-05, "loss": 1.9711, "step": 34065 }, { "epoch": 1.0991145588623765, "grad_norm": 0.34375, "learning_rate": 1.3036037626971646e-05, "loss": 1.95, "step": 34066 }, { "epoch": 1.099146822716173, "grad_norm": 0.34375, "learning_rate": 1.3035270977835809e-05, "loss": 1.9779, "step": 34067 }, { "epoch": 1.0991790865699693, "grad_norm": 0.357421875, "learning_rate": 1.3034504333921848e-05, "loss": 1.9265, "step": 34068 }, { "epoch": 1.0992113504237655, "grad_norm": 0.361328125, "learning_rate": 1.3033737695231788e-05, "loss": 1.9098, "step": 34069 }, { "epoch": 1.099243614277562, "grad_norm": 0.36328125, "learning_rate": 1.3032971061767673e-05, "loss": 1.9161, "step": 34070 }, { "epoch": 1.0992758781313583, "grad_norm": 0.34375, "learning_rate": 1.3032204433531537e-05, "loss": 1.964, "step": 34071 }, { "epoch": 1.0993081419851547, "grad_norm": 0.365234375, "learning_rate": 1.3031437810525424e-05, "loss": 1.9539, "step": 34072 }, { "epoch": 1.0993404058389509, "grad_norm": 0.33984375, "learning_rate": 1.303067119275137e-05, "loss": 1.9365, "step": 34073 }, { "epoch": 1.0993726696927473, "grad_norm": 0.384765625, "learning_rate": 1.3029904580211414e-05, "loss": 1.9503, "step": 34074 }, { "epoch": 1.0994049335465437, "grad_norm": 0.353515625, "learning_rate": 1.3029137972907581e-05, "loss": 1.9395, "step": 34075 }, { "epoch": 1.09943719740034, "grad_norm": 0.380859375, "learning_rate": 1.3028371370841923e-05, "loss": 1.9432, "step": 34076 }, { "epoch": 1.0994694612541362, "grad_norm": 0.349609375, "learning_rate": 1.302760477401647e-05, "loss": 1.9557, "step": 34077 }, { "epoch": 1.0995017251079326, "grad_norm": 0.35546875, "learning_rate": 1.3026838182433263e-05, "loss": 1.9675, "step": 34078 }, { "epoch": 1.099533988961729, "grad_norm": 0.408203125, "learning_rate": 1.3026071596094339e-05, "loss": 1.9748, "step": 34079 }, { "epoch": 1.0995662528155254, "grad_norm": 0.390625, "learning_rate": 1.3025305015001737e-05, "loss": 1.9576, "step": 34080 }, { "epoch": 1.0995985166693216, "grad_norm": 0.326171875, "learning_rate": 1.302453843915749e-05, "loss": 1.9511, "step": 34081 }, { "epoch": 1.099630780523118, "grad_norm": 0.388671875, "learning_rate": 1.3023771868563637e-05, "loss": 1.9279, "step": 34082 }, { "epoch": 1.0996630443769144, "grad_norm": 0.3671875, "learning_rate": 1.3023005303222214e-05, "loss": 1.9539, "step": 34083 }, { "epoch": 1.0996953082307108, "grad_norm": 0.349609375, "learning_rate": 1.3022238743135264e-05, "loss": 1.9788, "step": 34084 }, { "epoch": 1.099727572084507, "grad_norm": 0.33984375, "learning_rate": 1.3021472188304817e-05, "loss": 1.9485, "step": 34085 }, { "epoch": 1.0997598359383034, "grad_norm": 0.34765625, "learning_rate": 1.3020705638732918e-05, "loss": 1.9798, "step": 34086 }, { "epoch": 1.0997920997920998, "grad_norm": 0.34375, "learning_rate": 1.3019939094421597e-05, "loss": 2.0002, "step": 34087 }, { "epoch": 1.0998243636458962, "grad_norm": 0.369140625, "learning_rate": 1.3019172555372896e-05, "loss": 1.9288, "step": 34088 }, { "epoch": 1.0998566274996926, "grad_norm": 0.36328125, "learning_rate": 1.301840602158885e-05, "loss": 1.9696, "step": 34089 }, { "epoch": 1.0998888913534888, "grad_norm": 0.341796875, "learning_rate": 1.30176394930715e-05, "loss": 1.9894, "step": 34090 }, { "epoch": 1.0999211552072852, "grad_norm": 0.3828125, "learning_rate": 1.3016872969822876e-05, "loss": 1.9762, "step": 34091 }, { "epoch": 1.0999534190610816, "grad_norm": 0.349609375, "learning_rate": 1.3016106451845027e-05, "loss": 1.9389, "step": 34092 }, { "epoch": 1.099985682914878, "grad_norm": 0.34765625, "learning_rate": 1.3015339939139976e-05, "loss": 1.9587, "step": 34093 }, { "epoch": 1.1000179467686741, "grad_norm": 0.37890625, "learning_rate": 1.3014573431709771e-05, "loss": 1.9369, "step": 34094 }, { "epoch": 1.1000502106224705, "grad_norm": 0.34375, "learning_rate": 1.3013806929556444e-05, "loss": 1.9805, "step": 34095 }, { "epoch": 1.100082474476267, "grad_norm": 0.3671875, "learning_rate": 1.3013040432682034e-05, "loss": 1.943, "step": 34096 }, { "epoch": 1.1001147383300633, "grad_norm": 0.345703125, "learning_rate": 1.3012273941088575e-05, "loss": 1.9707, "step": 34097 }, { "epoch": 1.1001470021838595, "grad_norm": 0.34375, "learning_rate": 1.3011507454778115e-05, "loss": 1.9489, "step": 34098 }, { "epoch": 1.100179266037656, "grad_norm": 0.34765625, "learning_rate": 1.3010740973752678e-05, "loss": 1.9802, "step": 34099 }, { "epoch": 1.1002115298914523, "grad_norm": 0.345703125, "learning_rate": 1.3009974498014307e-05, "loss": 1.9977, "step": 34100 }, { "epoch": 1.1002437937452487, "grad_norm": 0.365234375, "learning_rate": 1.3009208027565038e-05, "loss": 1.9597, "step": 34101 }, { "epoch": 1.100276057599045, "grad_norm": 0.34375, "learning_rate": 1.3008441562406908e-05, "loss": 1.9699, "step": 34102 }, { "epoch": 1.1003083214528413, "grad_norm": 0.349609375, "learning_rate": 1.3007675102541955e-05, "loss": 1.9718, "step": 34103 }, { "epoch": 1.1003405853066377, "grad_norm": 0.3515625, "learning_rate": 1.3006908647972223e-05, "loss": 1.977, "step": 34104 }, { "epoch": 1.100372849160434, "grad_norm": 0.349609375, "learning_rate": 1.3006142198699735e-05, "loss": 1.9679, "step": 34105 }, { "epoch": 1.1004051130142303, "grad_norm": 0.359375, "learning_rate": 1.3005375754726537e-05, "loss": 2.0041, "step": 34106 }, { "epoch": 1.1004373768680267, "grad_norm": 0.333984375, "learning_rate": 1.3004609316054661e-05, "loss": 1.9805, "step": 34107 }, { "epoch": 1.100469640721823, "grad_norm": 0.34375, "learning_rate": 1.3003842882686147e-05, "loss": 1.9547, "step": 34108 }, { "epoch": 1.1005019045756195, "grad_norm": 0.345703125, "learning_rate": 1.3003076454623036e-05, "loss": 1.9686, "step": 34109 }, { "epoch": 1.1005341684294159, "grad_norm": 0.357421875, "learning_rate": 1.3002310031867366e-05, "loss": 1.9545, "step": 34110 }, { "epoch": 1.100566432283212, "grad_norm": 0.337890625, "learning_rate": 1.3001543614421166e-05, "loss": 1.9738, "step": 34111 }, { "epoch": 1.1005986961370084, "grad_norm": 0.3515625, "learning_rate": 1.3000777202286468e-05, "loss": 1.94, "step": 34112 }, { "epoch": 1.1006309599908048, "grad_norm": 0.330078125, "learning_rate": 1.3000010795465324e-05, "loss": 1.9414, "step": 34113 }, { "epoch": 1.1006632238446012, "grad_norm": 0.341796875, "learning_rate": 1.2999244393959763e-05, "loss": 1.9896, "step": 34114 }, { "epoch": 1.1006954876983974, "grad_norm": 0.337890625, "learning_rate": 1.2998477997771823e-05, "loss": 1.9846, "step": 34115 }, { "epoch": 1.1007277515521938, "grad_norm": 0.337890625, "learning_rate": 1.2997711606903543e-05, "loss": 1.9607, "step": 34116 }, { "epoch": 1.1007600154059902, "grad_norm": 0.33203125, "learning_rate": 1.2996945221356959e-05, "loss": 1.9742, "step": 34117 }, { "epoch": 1.1007922792597866, "grad_norm": 0.330078125, "learning_rate": 1.2996178841134106e-05, "loss": 1.9605, "step": 34118 }, { "epoch": 1.1008245431135828, "grad_norm": 0.326171875, "learning_rate": 1.2995412466237018e-05, "loss": 1.9643, "step": 34119 }, { "epoch": 1.1008568069673792, "grad_norm": 0.341796875, "learning_rate": 1.2994646096667739e-05, "loss": 1.9651, "step": 34120 }, { "epoch": 1.1008890708211756, "grad_norm": 0.34765625, "learning_rate": 1.2993879732428302e-05, "loss": 1.9817, "step": 34121 }, { "epoch": 1.100921334674972, "grad_norm": 0.337890625, "learning_rate": 1.2993113373520743e-05, "loss": 1.9889, "step": 34122 }, { "epoch": 1.1009535985287682, "grad_norm": 0.36328125, "learning_rate": 1.2992347019947105e-05, "loss": 1.9692, "step": 34123 }, { "epoch": 1.1009858623825646, "grad_norm": 0.3359375, "learning_rate": 1.2991580671709416e-05, "loss": 1.9383, "step": 34124 }, { "epoch": 1.101018126236361, "grad_norm": 0.353515625, "learning_rate": 1.2990814328809716e-05, "loss": 1.9676, "step": 34125 }, { "epoch": 1.1010503900901574, "grad_norm": 0.337890625, "learning_rate": 1.2990047991250044e-05, "loss": 2.0016, "step": 34126 }, { "epoch": 1.1010826539439535, "grad_norm": 0.357421875, "learning_rate": 1.2989281659032435e-05, "loss": 1.9381, "step": 34127 }, { "epoch": 1.10111491779775, "grad_norm": 0.3515625, "learning_rate": 1.2988515332158927e-05, "loss": 1.9806, "step": 34128 }, { "epoch": 1.1011471816515463, "grad_norm": 0.349609375, "learning_rate": 1.2987749010631558e-05, "loss": 1.9707, "step": 34129 }, { "epoch": 1.1011794455053427, "grad_norm": 0.435546875, "learning_rate": 1.2986982694452361e-05, "loss": 1.9452, "step": 34130 }, { "epoch": 1.1012117093591391, "grad_norm": 0.3515625, "learning_rate": 1.2986216383623373e-05, "loss": 1.9861, "step": 34131 }, { "epoch": 1.1012439732129353, "grad_norm": 0.361328125, "learning_rate": 1.2985450078146632e-05, "loss": 1.9557, "step": 34132 }, { "epoch": 1.1012762370667317, "grad_norm": 0.333984375, "learning_rate": 1.2984683778024177e-05, "loss": 1.9716, "step": 34133 }, { "epoch": 1.1013085009205281, "grad_norm": 0.3671875, "learning_rate": 1.298391748325804e-05, "loss": 1.9602, "step": 34134 }, { "epoch": 1.1013407647743245, "grad_norm": 0.376953125, "learning_rate": 1.2983151193850263e-05, "loss": 1.979, "step": 34135 }, { "epoch": 1.1013730286281207, "grad_norm": 0.33203125, "learning_rate": 1.2982384909802879e-05, "loss": 1.9548, "step": 34136 }, { "epoch": 1.101405292481917, "grad_norm": 0.357421875, "learning_rate": 1.2981618631117924e-05, "loss": 1.9673, "step": 34137 }, { "epoch": 1.1014375563357135, "grad_norm": 0.36328125, "learning_rate": 1.2980852357797435e-05, "loss": 1.9648, "step": 34138 }, { "epoch": 1.10146982018951, "grad_norm": 0.359375, "learning_rate": 1.2980086089843451e-05, "loss": 2.0109, "step": 34139 }, { "epoch": 1.101502084043306, "grad_norm": 0.33984375, "learning_rate": 1.2979319827258004e-05, "loss": 1.9806, "step": 34140 }, { "epoch": 1.1015343478971025, "grad_norm": 0.359375, "learning_rate": 1.2978553570043142e-05, "loss": 1.9551, "step": 34141 }, { "epoch": 1.1015666117508989, "grad_norm": 0.3515625, "learning_rate": 1.2977787318200889e-05, "loss": 1.9727, "step": 34142 }, { "epoch": 1.1015988756046953, "grad_norm": 0.37109375, "learning_rate": 1.2977021071733284e-05, "loss": 1.9786, "step": 34143 }, { "epoch": 1.1016311394584914, "grad_norm": 0.345703125, "learning_rate": 1.2976254830642364e-05, "loss": 1.9948, "step": 34144 }, { "epoch": 1.1016634033122878, "grad_norm": 0.341796875, "learning_rate": 1.297548859493017e-05, "loss": 1.9695, "step": 34145 }, { "epoch": 1.1016956671660842, "grad_norm": 0.353515625, "learning_rate": 1.2974722364598735e-05, "loss": 1.9903, "step": 34146 }, { "epoch": 1.1017279310198806, "grad_norm": 0.357421875, "learning_rate": 1.2973956139650103e-05, "loss": 1.9735, "step": 34147 }, { "epoch": 1.1017601948736768, "grad_norm": 0.3359375, "learning_rate": 1.2973189920086296e-05, "loss": 1.9557, "step": 34148 }, { "epoch": 1.1017924587274732, "grad_norm": 0.34765625, "learning_rate": 1.2972423705909355e-05, "loss": 1.9607, "step": 34149 }, { "epoch": 1.1018247225812696, "grad_norm": 0.337890625, "learning_rate": 1.2971657497121322e-05, "loss": 1.955, "step": 34150 }, { "epoch": 1.101856986435066, "grad_norm": 0.34765625, "learning_rate": 1.2970891293724233e-05, "loss": 1.979, "step": 34151 }, { "epoch": 1.1018892502888624, "grad_norm": 0.3515625, "learning_rate": 1.297012509572012e-05, "loss": 1.9669, "step": 34152 }, { "epoch": 1.1019215141426586, "grad_norm": 0.35546875, "learning_rate": 1.296935890311103e-05, "loss": 1.9288, "step": 34153 }, { "epoch": 1.101953777996455, "grad_norm": 0.34765625, "learning_rate": 1.2968592715898979e-05, "loss": 1.9518, "step": 34154 }, { "epoch": 1.1019860418502514, "grad_norm": 0.34375, "learning_rate": 1.296782653408602e-05, "loss": 1.9593, "step": 34155 }, { "epoch": 1.1020183057040478, "grad_norm": 0.39453125, "learning_rate": 1.2967060357674184e-05, "loss": 1.9755, "step": 34156 }, { "epoch": 1.102050569557844, "grad_norm": 0.3359375, "learning_rate": 1.296629418666551e-05, "loss": 1.9727, "step": 34157 }, { "epoch": 1.1020828334116404, "grad_norm": 0.34375, "learning_rate": 1.296552802106203e-05, "loss": 1.9895, "step": 34158 }, { "epoch": 1.1021150972654368, "grad_norm": 0.341796875, "learning_rate": 1.2964761860865788e-05, "loss": 1.9942, "step": 34159 }, { "epoch": 1.1021473611192332, "grad_norm": 0.337890625, "learning_rate": 1.296399570607881e-05, "loss": 1.9516, "step": 34160 }, { "epoch": 1.1021796249730293, "grad_norm": 0.365234375, "learning_rate": 1.2963229556703139e-05, "loss": 1.9804, "step": 34161 }, { "epoch": 1.1022118888268257, "grad_norm": 0.34375, "learning_rate": 1.2962463412740809e-05, "loss": 1.9744, "step": 34162 }, { "epoch": 1.1022441526806221, "grad_norm": 0.349609375, "learning_rate": 1.2961697274193857e-05, "loss": 1.9853, "step": 34163 }, { "epoch": 1.1022764165344185, "grad_norm": 0.35546875, "learning_rate": 1.2960931141064318e-05, "loss": 1.9687, "step": 34164 }, { "epoch": 1.1023086803882147, "grad_norm": 0.337890625, "learning_rate": 1.2960165013354234e-05, "loss": 1.997, "step": 34165 }, { "epoch": 1.1023409442420111, "grad_norm": 0.34765625, "learning_rate": 1.2959398891065634e-05, "loss": 1.9952, "step": 34166 }, { "epoch": 1.1023732080958075, "grad_norm": 0.341796875, "learning_rate": 1.2958632774200556e-05, "loss": 1.9479, "step": 34167 }, { "epoch": 1.102405471949604, "grad_norm": 0.345703125, "learning_rate": 1.2957866662761035e-05, "loss": 1.9796, "step": 34168 }, { "epoch": 1.1024377358034, "grad_norm": 0.3515625, "learning_rate": 1.2957100556749113e-05, "loss": 1.9633, "step": 34169 }, { "epoch": 1.1024699996571965, "grad_norm": 0.3359375, "learning_rate": 1.2956334456166818e-05, "loss": 1.9366, "step": 34170 }, { "epoch": 1.102502263510993, "grad_norm": 0.34375, "learning_rate": 1.2955568361016197e-05, "loss": 1.9663, "step": 34171 }, { "epoch": 1.1025345273647893, "grad_norm": 0.34765625, "learning_rate": 1.2954802271299277e-05, "loss": 1.9323, "step": 34172 }, { "epoch": 1.1025667912185857, "grad_norm": 0.33203125, "learning_rate": 1.2954036187018095e-05, "loss": 1.969, "step": 34173 }, { "epoch": 1.1025990550723819, "grad_norm": 0.353515625, "learning_rate": 1.2953270108174688e-05, "loss": 1.992, "step": 34174 }, { "epoch": 1.1026313189261783, "grad_norm": 0.337890625, "learning_rate": 1.2952504034771093e-05, "loss": 1.985, "step": 34175 }, { "epoch": 1.1026635827799747, "grad_norm": 0.35546875, "learning_rate": 1.2951737966809343e-05, "loss": 1.9875, "step": 34176 }, { "epoch": 1.102695846633771, "grad_norm": 0.349609375, "learning_rate": 1.295097190429149e-05, "loss": 1.9579, "step": 34177 }, { "epoch": 1.1027281104875672, "grad_norm": 0.3671875, "learning_rate": 1.2950205847219546e-05, "loss": 2.009, "step": 34178 }, { "epoch": 1.1027603743413636, "grad_norm": 0.357421875, "learning_rate": 1.294943979559556e-05, "loss": 1.9921, "step": 34179 }, { "epoch": 1.10279263819516, "grad_norm": 0.34375, "learning_rate": 1.2948673749421566e-05, "loss": 1.9715, "step": 34180 }, { "epoch": 1.1028249020489564, "grad_norm": 0.35546875, "learning_rate": 1.2947907708699598e-05, "loss": 1.9715, "step": 34181 }, { "epoch": 1.1028571659027526, "grad_norm": 0.337890625, "learning_rate": 1.2947141673431697e-05, "loss": 1.9732, "step": 34182 }, { "epoch": 1.102889429756549, "grad_norm": 0.34375, "learning_rate": 1.2946375643619903e-05, "loss": 2.0163, "step": 34183 }, { "epoch": 1.1029216936103454, "grad_norm": 0.357421875, "learning_rate": 1.2945609619266238e-05, "loss": 1.9695, "step": 34184 }, { "epoch": 1.1029539574641418, "grad_norm": 0.34375, "learning_rate": 1.2944843600372744e-05, "loss": 1.9987, "step": 34185 }, { "epoch": 1.102986221317938, "grad_norm": 0.34375, "learning_rate": 1.2944077586941454e-05, "loss": 1.9794, "step": 34186 }, { "epoch": 1.1030184851717344, "grad_norm": 0.345703125, "learning_rate": 1.2943311578974415e-05, "loss": 1.9512, "step": 34187 }, { "epoch": 1.1030507490255308, "grad_norm": 0.3515625, "learning_rate": 1.2942545576473653e-05, "loss": 1.9675, "step": 34188 }, { "epoch": 1.1030830128793272, "grad_norm": 0.353515625, "learning_rate": 1.2941779579441214e-05, "loss": 1.9691, "step": 34189 }, { "epoch": 1.1031152767331234, "grad_norm": 0.326171875, "learning_rate": 1.294101358787912e-05, "loss": 1.9642, "step": 34190 }, { "epoch": 1.1031475405869198, "grad_norm": 0.341796875, "learning_rate": 1.294024760178941e-05, "loss": 2.0006, "step": 34191 }, { "epoch": 1.1031798044407162, "grad_norm": 0.330078125, "learning_rate": 1.2939481621174126e-05, "loss": 1.9165, "step": 34192 }, { "epoch": 1.1032120682945126, "grad_norm": 0.341796875, "learning_rate": 1.29387156460353e-05, "loss": 1.9732, "step": 34193 }, { "epoch": 1.103244332148309, "grad_norm": 0.333984375, "learning_rate": 1.2937949676374971e-05, "loss": 1.9674, "step": 34194 }, { "epoch": 1.1032765960021051, "grad_norm": 0.345703125, "learning_rate": 1.2937183712195174e-05, "loss": 1.967, "step": 34195 }, { "epoch": 1.1033088598559015, "grad_norm": 0.33203125, "learning_rate": 1.2936417753497942e-05, "loss": 1.9688, "step": 34196 }, { "epoch": 1.103341123709698, "grad_norm": 0.34375, "learning_rate": 1.2935651800285312e-05, "loss": 1.9808, "step": 34197 }, { "epoch": 1.1033733875634943, "grad_norm": 0.34765625, "learning_rate": 1.2934885852559318e-05, "loss": 1.9769, "step": 34198 }, { "epoch": 1.1034056514172905, "grad_norm": 0.33203125, "learning_rate": 1.2934119910322e-05, "loss": 1.9556, "step": 34199 }, { "epoch": 1.103437915271087, "grad_norm": 0.3515625, "learning_rate": 1.293335397357539e-05, "loss": 1.9797, "step": 34200 }, { "epoch": 1.1034701791248833, "grad_norm": 0.33984375, "learning_rate": 1.293258804232153e-05, "loss": 1.9995, "step": 34201 }, { "epoch": 1.1035024429786797, "grad_norm": 0.337890625, "learning_rate": 1.2931822116562447e-05, "loss": 1.9882, "step": 34202 }, { "epoch": 1.103534706832476, "grad_norm": 0.3359375, "learning_rate": 1.293105619630018e-05, "loss": 1.9985, "step": 34203 }, { "epoch": 1.1035669706862723, "grad_norm": 0.35546875, "learning_rate": 1.2930290281536765e-05, "loss": 1.9783, "step": 34204 }, { "epoch": 1.1035992345400687, "grad_norm": 0.33984375, "learning_rate": 1.2929524372274237e-05, "loss": 1.9596, "step": 34205 }, { "epoch": 1.103631498393865, "grad_norm": 0.337890625, "learning_rate": 1.2928758468514636e-05, "loss": 1.9572, "step": 34206 }, { "epoch": 1.1036637622476613, "grad_norm": 0.3359375, "learning_rate": 1.2927992570259994e-05, "loss": 1.9557, "step": 34207 }, { "epoch": 1.1036960261014577, "grad_norm": 0.33203125, "learning_rate": 1.2927226677512344e-05, "loss": 1.9912, "step": 34208 }, { "epoch": 1.103728289955254, "grad_norm": 0.345703125, "learning_rate": 1.2926460790273726e-05, "loss": 1.9489, "step": 34209 }, { "epoch": 1.1037605538090505, "grad_norm": 0.330078125, "learning_rate": 1.2925694908546171e-05, "loss": 1.9561, "step": 34210 }, { "epoch": 1.1037928176628466, "grad_norm": 0.345703125, "learning_rate": 1.292492903233172e-05, "loss": 1.9657, "step": 34211 }, { "epoch": 1.103825081516643, "grad_norm": 0.349609375, "learning_rate": 1.2924163161632405e-05, "loss": 1.9774, "step": 34212 }, { "epoch": 1.1038573453704394, "grad_norm": 0.34375, "learning_rate": 1.2923397296450268e-05, "loss": 1.9564, "step": 34213 }, { "epoch": 1.1038896092242358, "grad_norm": 0.349609375, "learning_rate": 1.2922631436787334e-05, "loss": 1.9695, "step": 34214 }, { "epoch": 1.1039218730780322, "grad_norm": 0.337890625, "learning_rate": 1.2921865582645643e-05, "loss": 1.9733, "step": 34215 }, { "epoch": 1.1039541369318284, "grad_norm": 0.341796875, "learning_rate": 1.2921099734027232e-05, "loss": 1.9863, "step": 34216 }, { "epoch": 1.1039864007856248, "grad_norm": 0.333984375, "learning_rate": 1.2920333890934134e-05, "loss": 1.9671, "step": 34217 }, { "epoch": 1.1040186646394212, "grad_norm": 0.36328125, "learning_rate": 1.2919568053368383e-05, "loss": 1.983, "step": 34218 }, { "epoch": 1.1040509284932176, "grad_norm": 0.34375, "learning_rate": 1.291880222133203e-05, "loss": 1.9683, "step": 34219 }, { "epoch": 1.1040831923470138, "grad_norm": 0.32421875, "learning_rate": 1.2918036394827089e-05, "loss": 1.9858, "step": 34220 }, { "epoch": 1.1041154562008102, "grad_norm": 0.330078125, "learning_rate": 1.2917270573855605e-05, "loss": 1.9675, "step": 34221 }, { "epoch": 1.1041477200546066, "grad_norm": 0.3359375, "learning_rate": 1.2916504758419611e-05, "loss": 1.9938, "step": 34222 }, { "epoch": 1.104179983908403, "grad_norm": 0.349609375, "learning_rate": 1.2915738948521143e-05, "loss": 1.9754, "step": 34223 }, { "epoch": 1.1042122477621992, "grad_norm": 0.330078125, "learning_rate": 1.291497314416224e-05, "loss": 1.9714, "step": 34224 }, { "epoch": 1.1042445116159956, "grad_norm": 0.33984375, "learning_rate": 1.2914207345344943e-05, "loss": 1.9449, "step": 34225 }, { "epoch": 1.104276775469792, "grad_norm": 0.341796875, "learning_rate": 1.291344155207127e-05, "loss": 1.9764, "step": 34226 }, { "epoch": 1.1043090393235884, "grad_norm": 0.322265625, "learning_rate": 1.2912675764343264e-05, "loss": 1.9686, "step": 34227 }, { "epoch": 1.1043413031773845, "grad_norm": 0.33984375, "learning_rate": 1.2911909982162965e-05, "loss": 1.9555, "step": 34228 }, { "epoch": 1.104373567031181, "grad_norm": 0.34765625, "learning_rate": 1.2911144205532403e-05, "loss": 1.9534, "step": 34229 }, { "epoch": 1.1044058308849773, "grad_norm": 0.333984375, "learning_rate": 1.2910378434453618e-05, "loss": 1.9687, "step": 34230 }, { "epoch": 1.1044380947387737, "grad_norm": 0.34765625, "learning_rate": 1.2909612668928648e-05, "loss": 1.9586, "step": 34231 }, { "epoch": 1.10447035859257, "grad_norm": 0.333984375, "learning_rate": 1.2908846908959512e-05, "loss": 1.971, "step": 34232 }, { "epoch": 1.1045026224463663, "grad_norm": 0.34765625, "learning_rate": 1.2908081154548262e-05, "loss": 1.9592, "step": 34233 }, { "epoch": 1.1045348863001627, "grad_norm": 0.333984375, "learning_rate": 1.2907315405696928e-05, "loss": 1.9408, "step": 34234 }, { "epoch": 1.1045671501539591, "grad_norm": 0.51953125, "learning_rate": 1.2906549662407542e-05, "loss": 2.0256, "step": 34235 }, { "epoch": 1.1045994140077555, "grad_norm": 0.53125, "learning_rate": 1.2905783924682143e-05, "loss": 2.0648, "step": 34236 }, { "epoch": 1.1046316778615517, "grad_norm": 0.47265625, "learning_rate": 1.2905018192522768e-05, "loss": 2.0522, "step": 34237 }, { "epoch": 1.104663941715348, "grad_norm": 0.48046875, "learning_rate": 1.2904252465931445e-05, "loss": 2.0309, "step": 34238 }, { "epoch": 1.1046962055691445, "grad_norm": 0.482421875, "learning_rate": 1.2903486744910216e-05, "loss": 2.0413, "step": 34239 }, { "epoch": 1.104728469422941, "grad_norm": 0.427734375, "learning_rate": 1.290272102946111e-05, "loss": 2.0295, "step": 34240 }, { "epoch": 1.104760733276737, "grad_norm": 0.46875, "learning_rate": 1.2901955319586168e-05, "loss": 2.0581, "step": 34241 }, { "epoch": 1.1047929971305335, "grad_norm": 0.412109375, "learning_rate": 1.290118961528742e-05, "loss": 2.0608, "step": 34242 }, { "epoch": 1.1048252609843299, "grad_norm": 0.45703125, "learning_rate": 1.2900423916566907e-05, "loss": 2.0512, "step": 34243 }, { "epoch": 1.1048575248381263, "grad_norm": 0.439453125, "learning_rate": 1.2899658223426664e-05, "loss": 2.0494, "step": 34244 }, { "epoch": 1.1048897886919224, "grad_norm": 0.41796875, "learning_rate": 1.2898892535868719e-05, "loss": 2.0499, "step": 34245 }, { "epoch": 1.1049220525457188, "grad_norm": 0.39453125, "learning_rate": 1.289812685389511e-05, "loss": 2.0678, "step": 34246 }, { "epoch": 1.1049543163995152, "grad_norm": 0.39453125, "learning_rate": 1.2897361177507873e-05, "loss": 2.0696, "step": 34247 }, { "epoch": 1.1049865802533116, "grad_norm": 0.404296875, "learning_rate": 1.2896595506709044e-05, "loss": 2.0747, "step": 34248 }, { "epoch": 1.105018844107108, "grad_norm": 0.388671875, "learning_rate": 1.2895829841500659e-05, "loss": 2.0649, "step": 34249 }, { "epoch": 1.1050511079609042, "grad_norm": 0.396484375, "learning_rate": 1.2895064181884751e-05, "loss": 2.0834, "step": 34250 }, { "epoch": 1.1050833718147006, "grad_norm": 0.390625, "learning_rate": 1.2894298527863353e-05, "loss": 2.0503, "step": 34251 }, { "epoch": 1.105115635668497, "grad_norm": 0.39453125, "learning_rate": 1.28935328794385e-05, "loss": 2.0518, "step": 34252 }, { "epoch": 1.1051478995222932, "grad_norm": 0.357421875, "learning_rate": 1.2892767236612231e-05, "loss": 2.0678, "step": 34253 }, { "epoch": 1.1051801633760896, "grad_norm": 0.421875, "learning_rate": 1.2892001599386579e-05, "loss": 2.0525, "step": 34254 }, { "epoch": 1.105212427229886, "grad_norm": 0.384765625, "learning_rate": 1.2891235967763576e-05, "loss": 2.0464, "step": 34255 }, { "epoch": 1.1052446910836824, "grad_norm": 0.353515625, "learning_rate": 1.2890470341745267e-05, "loss": 2.0313, "step": 34256 }, { "epoch": 1.1052769549374788, "grad_norm": 0.365234375, "learning_rate": 1.2889704721333675e-05, "loss": 2.0338, "step": 34257 }, { "epoch": 1.105309218791275, "grad_norm": 0.40625, "learning_rate": 1.2888939106530839e-05, "loss": 2.0584, "step": 34258 }, { "epoch": 1.1053414826450714, "grad_norm": 0.3515625, "learning_rate": 1.2888173497338794e-05, "loss": 2.0339, "step": 34259 }, { "epoch": 1.1053737464988678, "grad_norm": 0.3984375, "learning_rate": 1.2887407893759571e-05, "loss": 2.0374, "step": 34260 }, { "epoch": 1.1054060103526642, "grad_norm": 0.37109375, "learning_rate": 1.2886642295795216e-05, "loss": 2.0513, "step": 34261 }, { "epoch": 1.1054382742064603, "grad_norm": 0.36328125, "learning_rate": 1.2885876703447758e-05, "loss": 2.042, "step": 34262 }, { "epoch": 1.1054705380602567, "grad_norm": 0.380859375, "learning_rate": 1.2885111116719227e-05, "loss": 2.0218, "step": 34263 }, { "epoch": 1.1055028019140531, "grad_norm": 0.390625, "learning_rate": 1.2884345535611658e-05, "loss": 2.0287, "step": 34264 }, { "epoch": 1.1055350657678495, "grad_norm": 0.33984375, "learning_rate": 1.2883579960127093e-05, "loss": 2.066, "step": 34265 }, { "epoch": 1.1055673296216457, "grad_norm": 0.37890625, "learning_rate": 1.2882814390267562e-05, "loss": 2.0625, "step": 34266 }, { "epoch": 1.1055995934754421, "grad_norm": 0.390625, "learning_rate": 1.2882048826035098e-05, "loss": 2.0548, "step": 34267 }, { "epoch": 1.1056318573292385, "grad_norm": 0.349609375, "learning_rate": 1.2881283267431749e-05, "loss": 2.0622, "step": 34268 }, { "epoch": 1.105664121183035, "grad_norm": 0.376953125, "learning_rate": 1.2880517714459526e-05, "loss": 2.063, "step": 34269 }, { "epoch": 1.1056963850368313, "grad_norm": 0.369140625, "learning_rate": 1.2879752167120482e-05, "loss": 2.0576, "step": 34270 }, { "epoch": 1.1057286488906275, "grad_norm": 0.38671875, "learning_rate": 1.2878986625416646e-05, "loss": 2.0745, "step": 34271 }, { "epoch": 1.105760912744424, "grad_norm": 0.373046875, "learning_rate": 1.2878221089350054e-05, "loss": 2.0585, "step": 34272 }, { "epoch": 1.1057931765982203, "grad_norm": 0.349609375, "learning_rate": 1.2877455558922737e-05, "loss": 2.0424, "step": 34273 }, { "epoch": 1.1058254404520167, "grad_norm": 0.34375, "learning_rate": 1.2876690034136738e-05, "loss": 2.0497, "step": 34274 }, { "epoch": 1.1058577043058129, "grad_norm": 0.34765625, "learning_rate": 1.2875924514994083e-05, "loss": 2.0434, "step": 34275 }, { "epoch": 1.1058899681596093, "grad_norm": 0.33984375, "learning_rate": 1.2875159001496807e-05, "loss": 2.0297, "step": 34276 }, { "epoch": 1.1059222320134057, "grad_norm": 0.33984375, "learning_rate": 1.2874393493646948e-05, "loss": 2.0569, "step": 34277 }, { "epoch": 1.105954495867202, "grad_norm": 0.3515625, "learning_rate": 1.287362799144654e-05, "loss": 2.0412, "step": 34278 }, { "epoch": 1.1059867597209982, "grad_norm": 0.400390625, "learning_rate": 1.2872862494897618e-05, "loss": 2.0599, "step": 34279 }, { "epoch": 1.1060190235747946, "grad_norm": 0.3984375, "learning_rate": 1.2872097004002219e-05, "loss": 2.0317, "step": 34280 }, { "epoch": 1.106051287428591, "grad_norm": 0.3984375, "learning_rate": 1.287133151876237e-05, "loss": 2.0282, "step": 34281 }, { "epoch": 1.1060835512823874, "grad_norm": 0.37109375, "learning_rate": 1.287056603918011e-05, "loss": 2.0429, "step": 34282 }, { "epoch": 1.1061158151361836, "grad_norm": 0.447265625, "learning_rate": 1.2869800565257475e-05, "loss": 2.0554, "step": 34283 }, { "epoch": 1.10614807898998, "grad_norm": 0.37890625, "learning_rate": 1.2869035096996498e-05, "loss": 2.0509, "step": 34284 }, { "epoch": 1.1061803428437764, "grad_norm": 0.412109375, "learning_rate": 1.286826963439921e-05, "loss": 2.0493, "step": 34285 }, { "epoch": 1.1062126066975728, "grad_norm": 0.4453125, "learning_rate": 1.2867504177467654e-05, "loss": 2.1116, "step": 34286 }, { "epoch": 1.106244870551369, "grad_norm": 0.375, "learning_rate": 1.2866738726203857e-05, "loss": 2.1374, "step": 34287 }, { "epoch": 1.1062771344051654, "grad_norm": 0.41015625, "learning_rate": 1.2865973280609853e-05, "loss": 2.0993, "step": 34288 }, { "epoch": 1.1063093982589618, "grad_norm": 0.376953125, "learning_rate": 1.286520784068768e-05, "loss": 2.1382, "step": 34289 }, { "epoch": 1.1063416621127582, "grad_norm": 0.40234375, "learning_rate": 1.2864442406439371e-05, "loss": 2.1136, "step": 34290 }, { "epoch": 1.1063739259665546, "grad_norm": 1.1171875, "learning_rate": 1.2863676977866964e-05, "loss": 2.128, "step": 34291 }, { "epoch": 1.1064061898203508, "grad_norm": 0.388671875, "learning_rate": 1.286291155497249e-05, "loss": 2.1387, "step": 34292 }, { "epoch": 1.1064384536741472, "grad_norm": 0.423828125, "learning_rate": 1.286214613775798e-05, "loss": 2.077, "step": 34293 }, { "epoch": 1.1064707175279436, "grad_norm": 0.375, "learning_rate": 1.2861380726225473e-05, "loss": 2.0958, "step": 34294 }, { "epoch": 1.10650298138174, "grad_norm": 0.388671875, "learning_rate": 1.2860615320377003e-05, "loss": 2.1065, "step": 34295 }, { "epoch": 1.1065352452355361, "grad_norm": 0.349609375, "learning_rate": 1.2859849920214599e-05, "loss": 2.1102, "step": 34296 }, { "epoch": 1.1065675090893325, "grad_norm": 0.37890625, "learning_rate": 1.2859084525740304e-05, "loss": 2.1203, "step": 34297 }, { "epoch": 1.106599772943129, "grad_norm": 0.376953125, "learning_rate": 1.2858319136956153e-05, "loss": 2.0983, "step": 34298 }, { "epoch": 1.1066320367969253, "grad_norm": 0.369140625, "learning_rate": 1.285755375386417e-05, "loss": 2.1215, "step": 34299 }, { "epoch": 1.1066643006507215, "grad_norm": 0.388671875, "learning_rate": 1.2856788376466393e-05, "loss": 2.1071, "step": 34300 }, { "epoch": 1.106696564504518, "grad_norm": 0.380859375, "learning_rate": 1.2856023004764857e-05, "loss": 2.1134, "step": 34301 }, { "epoch": 1.1067288283583143, "grad_norm": 0.39453125, "learning_rate": 1.2855257638761597e-05, "loss": 2.1595, "step": 34302 }, { "epoch": 1.1067610922121107, "grad_norm": 0.3671875, "learning_rate": 1.285449227845865e-05, "loss": 2.1121, "step": 34303 }, { "epoch": 1.106793356065907, "grad_norm": 0.408203125, "learning_rate": 1.2853726923858051e-05, "loss": 2.1569, "step": 34304 }, { "epoch": 1.1068256199197033, "grad_norm": 0.38671875, "learning_rate": 1.2852961574961826e-05, "loss": 2.1509, "step": 34305 }, { "epoch": 1.1068578837734997, "grad_norm": 0.427734375, "learning_rate": 1.2852196231772009e-05, "loss": 2.1203, "step": 34306 }, { "epoch": 1.106890147627296, "grad_norm": 0.427734375, "learning_rate": 1.2851430894290642e-05, "loss": 2.1389, "step": 34307 }, { "epoch": 1.1069224114810923, "grad_norm": 0.36328125, "learning_rate": 1.2850665562519755e-05, "loss": 2.1463, "step": 34308 }, { "epoch": 1.1069546753348887, "grad_norm": 0.421875, "learning_rate": 1.2849900236461386e-05, "loss": 2.1351, "step": 34309 }, { "epoch": 1.106986939188685, "grad_norm": 0.359375, "learning_rate": 1.2849134916117571e-05, "loss": 2.1311, "step": 34310 }, { "epoch": 1.1070192030424815, "grad_norm": 0.38671875, "learning_rate": 1.284836960149033e-05, "loss": 2.132, "step": 34311 }, { "epoch": 1.1070514668962779, "grad_norm": 0.373046875, "learning_rate": 1.2847604292581709e-05, "loss": 2.1103, "step": 34312 }, { "epoch": 1.107083730750074, "grad_norm": 0.361328125, "learning_rate": 1.2846838989393737e-05, "loss": 2.126, "step": 34313 }, { "epoch": 1.1071159946038704, "grad_norm": 0.3515625, "learning_rate": 1.2846073691928452e-05, "loss": 2.1397, "step": 34314 }, { "epoch": 1.1071482584576668, "grad_norm": 0.373046875, "learning_rate": 1.2845308400187886e-05, "loss": 2.1079, "step": 34315 }, { "epoch": 1.1071805223114632, "grad_norm": 0.361328125, "learning_rate": 1.2844543114174078e-05, "loss": 2.097, "step": 34316 }, { "epoch": 1.1072127861652594, "grad_norm": 0.369140625, "learning_rate": 1.2843777833889054e-05, "loss": 2.1799, "step": 34317 }, { "epoch": 1.1072450500190558, "grad_norm": 0.3671875, "learning_rate": 1.2843012559334849e-05, "loss": 2.1233, "step": 34318 }, { "epoch": 1.1072773138728522, "grad_norm": 0.361328125, "learning_rate": 1.28422472905135e-05, "loss": 2.1294, "step": 34319 }, { "epoch": 1.1073095777266486, "grad_norm": 0.359375, "learning_rate": 1.284148202742704e-05, "loss": 2.1412, "step": 34320 }, { "epoch": 1.1073418415804448, "grad_norm": 0.349609375, "learning_rate": 1.2840716770077503e-05, "loss": 2.1196, "step": 34321 }, { "epoch": 1.1073741054342412, "grad_norm": 0.34375, "learning_rate": 1.2839951518466927e-05, "loss": 2.1445, "step": 34322 }, { "epoch": 1.1074063692880376, "grad_norm": 0.3515625, "learning_rate": 1.2839186272597338e-05, "loss": 2.1213, "step": 34323 }, { "epoch": 1.107438633141834, "grad_norm": 0.349609375, "learning_rate": 1.2838421032470772e-05, "loss": 2.084, "step": 34324 }, { "epoch": 1.1074708969956302, "grad_norm": 0.357421875, "learning_rate": 1.2837655798089269e-05, "loss": 2.1238, "step": 34325 }, { "epoch": 1.1075031608494266, "grad_norm": 0.349609375, "learning_rate": 1.2836890569454853e-05, "loss": 2.1195, "step": 34326 }, { "epoch": 1.107535424703223, "grad_norm": 0.345703125, "learning_rate": 1.2836125346569565e-05, "loss": 2.1239, "step": 34327 }, { "epoch": 1.1075676885570194, "grad_norm": 0.34765625, "learning_rate": 1.2835360129435443e-05, "loss": 2.1323, "step": 34328 }, { "epoch": 1.1075999524108155, "grad_norm": 0.34375, "learning_rate": 1.2834594918054508e-05, "loss": 2.1241, "step": 34329 }, { "epoch": 1.107632216264612, "grad_norm": 0.35546875, "learning_rate": 1.2833829712428804e-05, "loss": 2.1206, "step": 34330 }, { "epoch": 1.1076644801184083, "grad_norm": 0.345703125, "learning_rate": 1.2833064512560359e-05, "loss": 2.123, "step": 34331 }, { "epoch": 1.1076967439722047, "grad_norm": 0.373046875, "learning_rate": 1.2832299318451211e-05, "loss": 2.1321, "step": 34332 }, { "epoch": 1.1077290078260011, "grad_norm": 0.349609375, "learning_rate": 1.2831534130103387e-05, "loss": 2.0716, "step": 34333 }, { "epoch": 1.1077612716797973, "grad_norm": 0.341796875, "learning_rate": 1.2830768947518936e-05, "loss": 2.1234, "step": 34334 }, { "epoch": 1.1077935355335937, "grad_norm": 0.375, "learning_rate": 1.2830003770699874e-05, "loss": 2.1167, "step": 34335 }, { "epoch": 1.1078257993873901, "grad_norm": 0.345703125, "learning_rate": 1.2829238599648242e-05, "loss": 2.1367, "step": 34336 }, { "epoch": 1.1078580632411865, "grad_norm": 0.3671875, "learning_rate": 1.2828473434366075e-05, "loss": 2.115, "step": 34337 }, { "epoch": 1.1078903270949827, "grad_norm": 0.357421875, "learning_rate": 1.2827708274855403e-05, "loss": 2.0912, "step": 34338 }, { "epoch": 1.107922590948779, "grad_norm": 0.361328125, "learning_rate": 1.2826943121118263e-05, "loss": 2.147, "step": 34339 }, { "epoch": 1.1079548548025755, "grad_norm": 0.3515625, "learning_rate": 1.2826177973156693e-05, "loss": 2.1217, "step": 34340 }, { "epoch": 1.107987118656372, "grad_norm": 0.35546875, "learning_rate": 1.2825412830972717e-05, "loss": 2.137, "step": 34341 }, { "epoch": 1.108019382510168, "grad_norm": 0.3515625, "learning_rate": 1.2824647694568371e-05, "loss": 2.1382, "step": 34342 }, { "epoch": 1.1080516463639645, "grad_norm": 0.357421875, "learning_rate": 1.282388256394569e-05, "loss": 2.1234, "step": 34343 }, { "epoch": 1.1080839102177609, "grad_norm": 0.34375, "learning_rate": 1.2823117439106709e-05, "loss": 2.1012, "step": 34344 }, { "epoch": 1.1081161740715573, "grad_norm": 0.3515625, "learning_rate": 1.282235232005346e-05, "loss": 2.124, "step": 34345 }, { "epoch": 1.1081484379253534, "grad_norm": 0.34765625, "learning_rate": 1.2821587206787985e-05, "loss": 2.132, "step": 34346 }, { "epoch": 1.1081807017791498, "grad_norm": 0.34765625, "learning_rate": 1.28208220993123e-05, "loss": 2.1103, "step": 34347 }, { "epoch": 1.1082129656329462, "grad_norm": 0.353515625, "learning_rate": 1.282005699762845e-05, "loss": 2.1477, "step": 34348 }, { "epoch": 1.1082452294867426, "grad_norm": 0.357421875, "learning_rate": 1.2819291901738468e-05, "loss": 2.1184, "step": 34349 }, { "epoch": 1.1082774933405388, "grad_norm": 0.35546875, "learning_rate": 1.2818526811644386e-05, "loss": 2.1185, "step": 34350 }, { "epoch": 1.1083097571943352, "grad_norm": 0.34765625, "learning_rate": 1.2817761727348237e-05, "loss": 2.129, "step": 34351 }, { "epoch": 1.1083420210481316, "grad_norm": 0.341796875, "learning_rate": 1.281699664885206e-05, "loss": 2.1419, "step": 34352 }, { "epoch": 1.108374284901928, "grad_norm": 0.35546875, "learning_rate": 1.2816231576157876e-05, "loss": 2.1418, "step": 34353 }, { "epoch": 1.1084065487557244, "grad_norm": 0.353515625, "learning_rate": 1.281546650926773e-05, "loss": 2.1222, "step": 34354 }, { "epoch": 1.1084388126095206, "grad_norm": 0.33984375, "learning_rate": 1.2814701448183649e-05, "loss": 2.1386, "step": 34355 }, { "epoch": 1.108471076463317, "grad_norm": 0.3515625, "learning_rate": 1.2813936392907669e-05, "loss": 2.1391, "step": 34356 }, { "epoch": 1.1085033403171134, "grad_norm": 0.369140625, "learning_rate": 1.2813171343441821e-05, "loss": 2.0614, "step": 34357 }, { "epoch": 1.1085356041709098, "grad_norm": 0.353515625, "learning_rate": 1.2812406299788148e-05, "loss": 2.0433, "step": 34358 }, { "epoch": 1.108567868024706, "grad_norm": 0.359375, "learning_rate": 1.2811641261948668e-05, "loss": 2.0575, "step": 34359 }, { "epoch": 1.1086001318785024, "grad_norm": 0.337890625, "learning_rate": 1.2810876229925424e-05, "loss": 2.0056, "step": 34360 }, { "epoch": 1.1086323957322988, "grad_norm": 0.3515625, "learning_rate": 1.2810111203720448e-05, "loss": 2.0374, "step": 34361 }, { "epoch": 1.1086646595860952, "grad_norm": 0.33984375, "learning_rate": 1.280934618333577e-05, "loss": 2.0528, "step": 34362 }, { "epoch": 1.1086969234398913, "grad_norm": 0.333984375, "learning_rate": 1.280858116877343e-05, "loss": 2.0586, "step": 34363 }, { "epoch": 1.1087291872936877, "grad_norm": 0.33984375, "learning_rate": 1.2807816160035457e-05, "loss": 2.0305, "step": 34364 }, { "epoch": 1.1087614511474841, "grad_norm": 0.33984375, "learning_rate": 1.2807051157123882e-05, "loss": 2.0137, "step": 34365 }, { "epoch": 1.1087937150012805, "grad_norm": 0.33203125, "learning_rate": 1.2806286160040743e-05, "loss": 1.9979, "step": 34366 }, { "epoch": 1.1088259788550767, "grad_norm": 0.34765625, "learning_rate": 1.2805521168788068e-05, "loss": 1.978, "step": 34367 }, { "epoch": 1.1088582427088731, "grad_norm": 0.3515625, "learning_rate": 1.2804756183367894e-05, "loss": 1.9746, "step": 34368 }, { "epoch": 1.1088905065626695, "grad_norm": 0.333984375, "learning_rate": 1.2803991203782251e-05, "loss": 1.9601, "step": 34369 }, { "epoch": 1.108922770416466, "grad_norm": 0.34375, "learning_rate": 1.2803226230033178e-05, "loss": 1.9637, "step": 34370 }, { "epoch": 1.108955034270262, "grad_norm": 0.34375, "learning_rate": 1.2802461262122706e-05, "loss": 1.9706, "step": 34371 }, { "epoch": 1.1089872981240585, "grad_norm": 0.326171875, "learning_rate": 1.2801696300052862e-05, "loss": 1.9503, "step": 34372 }, { "epoch": 1.109019561977855, "grad_norm": 0.34765625, "learning_rate": 1.2800931343825686e-05, "loss": 1.9603, "step": 34373 }, { "epoch": 1.1090518258316513, "grad_norm": 0.3359375, "learning_rate": 1.2800166393443206e-05, "loss": 1.9801, "step": 34374 }, { "epoch": 1.1090840896854477, "grad_norm": 0.349609375, "learning_rate": 1.2799401448907458e-05, "loss": 1.9541, "step": 34375 }, { "epoch": 1.1091163535392439, "grad_norm": 0.349609375, "learning_rate": 1.2798636510220477e-05, "loss": 1.9921, "step": 34376 }, { "epoch": 1.1091486173930403, "grad_norm": 0.35546875, "learning_rate": 1.2797871577384302e-05, "loss": 2.0094, "step": 34377 }, { "epoch": 1.1091808812468367, "grad_norm": 0.373046875, "learning_rate": 1.2797106650400948e-05, "loss": 1.9729, "step": 34378 }, { "epoch": 1.109213145100633, "grad_norm": 0.33203125, "learning_rate": 1.279634172927246e-05, "loss": 1.969, "step": 34379 }, { "epoch": 1.1092454089544292, "grad_norm": 0.345703125, "learning_rate": 1.2795576814000867e-05, "loss": 1.9806, "step": 34380 }, { "epoch": 1.1092776728082256, "grad_norm": 0.345703125, "learning_rate": 1.2794811904588207e-05, "loss": 1.9501, "step": 34381 }, { "epoch": 1.109309936662022, "grad_norm": 0.341796875, "learning_rate": 1.279404700103651e-05, "loss": 1.9833, "step": 34382 }, { "epoch": 1.1093422005158184, "grad_norm": 0.34765625, "learning_rate": 1.2793282103347815e-05, "loss": 1.9736, "step": 34383 }, { "epoch": 1.1093744643696146, "grad_norm": 0.337890625, "learning_rate": 1.279251721152414e-05, "loss": 1.9682, "step": 34384 }, { "epoch": 1.109406728223411, "grad_norm": 0.361328125, "learning_rate": 1.279175232556753e-05, "loss": 1.9806, "step": 34385 }, { "epoch": 1.1094389920772074, "grad_norm": 0.34765625, "learning_rate": 1.2790987445480017e-05, "loss": 1.9531, "step": 34386 }, { "epoch": 1.1094712559310038, "grad_norm": 0.34765625, "learning_rate": 1.2790222571263628e-05, "loss": 1.9687, "step": 34387 }, { "epoch": 1.1095035197848, "grad_norm": 0.337890625, "learning_rate": 1.2789457702920402e-05, "loss": 1.9603, "step": 34388 }, { "epoch": 1.1095357836385964, "grad_norm": 0.353515625, "learning_rate": 1.2788692840452372e-05, "loss": 1.9579, "step": 34389 }, { "epoch": 1.1095680474923928, "grad_norm": 0.337890625, "learning_rate": 1.2787927983861566e-05, "loss": 1.9979, "step": 34390 }, { "epoch": 1.1096003113461892, "grad_norm": 0.349609375, "learning_rate": 1.2787163133150019e-05, "loss": 1.9902, "step": 34391 }, { "epoch": 1.1096325751999854, "grad_norm": 0.349609375, "learning_rate": 1.2786398288319764e-05, "loss": 1.9585, "step": 34392 }, { "epoch": 1.1096648390537818, "grad_norm": 0.349609375, "learning_rate": 1.2785633449372836e-05, "loss": 1.9961, "step": 34393 }, { "epoch": 1.1096971029075782, "grad_norm": 0.349609375, "learning_rate": 1.2784868616311263e-05, "loss": 1.9702, "step": 34394 }, { "epoch": 1.1097293667613746, "grad_norm": 0.345703125, "learning_rate": 1.2784103789137085e-05, "loss": 1.967, "step": 34395 }, { "epoch": 1.109761630615171, "grad_norm": 0.333984375, "learning_rate": 1.2783338967852325e-05, "loss": 1.9886, "step": 34396 }, { "epoch": 1.1097938944689671, "grad_norm": 0.35546875, "learning_rate": 1.2782574152459023e-05, "loss": 1.998, "step": 34397 }, { "epoch": 1.1098261583227635, "grad_norm": 0.35546875, "learning_rate": 1.278180934295921e-05, "loss": 1.9609, "step": 34398 }, { "epoch": 1.10985842217656, "grad_norm": 0.328125, "learning_rate": 1.2781044539354916e-05, "loss": 1.9741, "step": 34399 }, { "epoch": 1.1098906860303563, "grad_norm": 0.34375, "learning_rate": 1.2780279741648177e-05, "loss": 1.9562, "step": 34400 }, { "epoch": 1.1099229498841525, "grad_norm": 0.34375, "learning_rate": 1.2779514949841031e-05, "loss": 1.962, "step": 34401 }, { "epoch": 1.109955213737949, "grad_norm": 0.34765625, "learning_rate": 1.2778750163935499e-05, "loss": 1.9906, "step": 34402 }, { "epoch": 1.1099874775917453, "grad_norm": 0.341796875, "learning_rate": 1.2777985383933619e-05, "loss": 1.9917, "step": 34403 }, { "epoch": 1.1100197414455417, "grad_norm": 0.3515625, "learning_rate": 1.2777220609837423e-05, "loss": 1.9939, "step": 34404 }, { "epoch": 1.110052005299338, "grad_norm": 0.333984375, "learning_rate": 1.2776455841648946e-05, "loss": 1.9791, "step": 34405 }, { "epoch": 1.1100842691531343, "grad_norm": 0.33984375, "learning_rate": 1.2775691079370218e-05, "loss": 1.9587, "step": 34406 }, { "epoch": 1.1101165330069307, "grad_norm": 0.328125, "learning_rate": 1.2774926323003278e-05, "loss": 1.9339, "step": 34407 }, { "epoch": 1.110148796860727, "grad_norm": 0.34375, "learning_rate": 1.2774161572550147e-05, "loss": 1.9891, "step": 34408 }, { "epoch": 1.1101810607145233, "grad_norm": 0.34765625, "learning_rate": 1.2773396828012866e-05, "loss": 1.9341, "step": 34409 }, { "epoch": 1.1102133245683197, "grad_norm": 0.341796875, "learning_rate": 1.2772632089393466e-05, "loss": 1.962, "step": 34410 }, { "epoch": 1.110245588422116, "grad_norm": 0.33984375, "learning_rate": 1.2771867356693975e-05, "loss": 1.9625, "step": 34411 }, { "epoch": 1.1102778522759125, "grad_norm": 0.330078125, "learning_rate": 1.277110262991643e-05, "loss": 1.9793, "step": 34412 }, { "epoch": 1.1103101161297086, "grad_norm": 0.32421875, "learning_rate": 1.277033790906287e-05, "loss": 2.0052, "step": 34413 }, { "epoch": 1.110342379983505, "grad_norm": 0.34765625, "learning_rate": 1.2769573194135315e-05, "loss": 1.959, "step": 34414 }, { "epoch": 1.1103746438373014, "grad_norm": 0.333984375, "learning_rate": 1.2768808485135802e-05, "loss": 1.9482, "step": 34415 }, { "epoch": 1.1104069076910978, "grad_norm": 0.33984375, "learning_rate": 1.2768043782066362e-05, "loss": 1.9682, "step": 34416 }, { "epoch": 1.1104391715448942, "grad_norm": 0.330078125, "learning_rate": 1.2767279084929032e-05, "loss": 2.0057, "step": 34417 }, { "epoch": 1.1104714353986904, "grad_norm": 0.333984375, "learning_rate": 1.2766514393725843e-05, "loss": 1.9899, "step": 34418 }, { "epoch": 1.1105036992524868, "grad_norm": 0.345703125, "learning_rate": 1.2765749708458833e-05, "loss": 1.9796, "step": 34419 }, { "epoch": 1.1105359631062832, "grad_norm": 0.33984375, "learning_rate": 1.276498502913002e-05, "loss": 1.9648, "step": 34420 }, { "epoch": 1.1105682269600796, "grad_norm": 0.357421875, "learning_rate": 1.2764220355741441e-05, "loss": 1.9886, "step": 34421 }, { "epoch": 1.1106004908138758, "grad_norm": 0.361328125, "learning_rate": 1.2763455688295135e-05, "loss": 1.9766, "step": 34422 }, { "epoch": 1.1106327546676722, "grad_norm": 0.349609375, "learning_rate": 1.2762691026793132e-05, "loss": 1.9681, "step": 34423 }, { "epoch": 1.1106650185214686, "grad_norm": 0.341796875, "learning_rate": 1.2761926371237463e-05, "loss": 1.998, "step": 34424 }, { "epoch": 1.110697282375265, "grad_norm": 0.345703125, "learning_rate": 1.2761161721630166e-05, "loss": 1.9829, "step": 34425 }, { "epoch": 1.1107295462290612, "grad_norm": 0.357421875, "learning_rate": 1.276039707797326e-05, "loss": 1.9372, "step": 34426 }, { "epoch": 1.1107618100828576, "grad_norm": 0.357421875, "learning_rate": 1.2759632440268786e-05, "loss": 1.9799, "step": 34427 }, { "epoch": 1.110794073936654, "grad_norm": 0.34765625, "learning_rate": 1.2758867808518778e-05, "loss": 1.9503, "step": 34428 }, { "epoch": 1.1108263377904504, "grad_norm": 0.36328125, "learning_rate": 1.2758103182725263e-05, "loss": 1.9679, "step": 34429 }, { "epoch": 1.1108586016442465, "grad_norm": 0.330078125, "learning_rate": 1.2757338562890278e-05, "loss": 1.9753, "step": 34430 }, { "epoch": 1.110890865498043, "grad_norm": 0.365234375, "learning_rate": 1.2756573949015855e-05, "loss": 1.9819, "step": 34431 }, { "epoch": 1.1109231293518393, "grad_norm": 0.328125, "learning_rate": 1.275580934110402e-05, "loss": 1.9784, "step": 34432 }, { "epoch": 1.1109553932056357, "grad_norm": 0.3515625, "learning_rate": 1.2755044739156813e-05, "loss": 1.962, "step": 34433 }, { "epoch": 1.110987657059432, "grad_norm": 0.3828125, "learning_rate": 1.2754280143176259e-05, "loss": 1.991, "step": 34434 }, { "epoch": 1.1110199209132283, "grad_norm": 0.333984375, "learning_rate": 1.2753515553164394e-05, "loss": 1.9768, "step": 34435 }, { "epoch": 1.1110521847670247, "grad_norm": 0.345703125, "learning_rate": 1.275275096912325e-05, "loss": 1.981, "step": 34436 }, { "epoch": 1.1110844486208211, "grad_norm": 0.353515625, "learning_rate": 1.2751986391054864e-05, "loss": 1.9544, "step": 34437 }, { "epoch": 1.1111167124746175, "grad_norm": 0.330078125, "learning_rate": 1.275122181896126e-05, "loss": 1.9598, "step": 34438 }, { "epoch": 1.1111489763284137, "grad_norm": 0.361328125, "learning_rate": 1.2750457252844473e-05, "loss": 1.953, "step": 34439 }, { "epoch": 1.11118124018221, "grad_norm": 0.3671875, "learning_rate": 1.2749692692706533e-05, "loss": 1.9747, "step": 34440 }, { "epoch": 1.1112135040360065, "grad_norm": 0.34375, "learning_rate": 1.2748928138549476e-05, "loss": 1.9776, "step": 34441 }, { "epoch": 1.111245767889803, "grad_norm": 0.353515625, "learning_rate": 1.2748163590375332e-05, "loss": 1.9817, "step": 34442 }, { "epoch": 1.111278031743599, "grad_norm": 0.361328125, "learning_rate": 1.2747399048186137e-05, "loss": 1.9599, "step": 34443 }, { "epoch": 1.1113102955973955, "grad_norm": 0.3359375, "learning_rate": 1.2746634511983916e-05, "loss": 1.9907, "step": 34444 }, { "epoch": 1.1113425594511919, "grad_norm": 0.3515625, "learning_rate": 1.2745869981770703e-05, "loss": 1.9913, "step": 34445 }, { "epoch": 1.1113748233049883, "grad_norm": 0.34375, "learning_rate": 1.2745105457548533e-05, "loss": 1.9685, "step": 34446 }, { "epoch": 1.1114070871587844, "grad_norm": 0.33984375, "learning_rate": 1.2744340939319435e-05, "loss": 1.9866, "step": 34447 }, { "epoch": 1.1114393510125808, "grad_norm": 0.357421875, "learning_rate": 1.2743576427085442e-05, "loss": 1.982, "step": 34448 }, { "epoch": 1.1114716148663772, "grad_norm": 0.337890625, "learning_rate": 1.2742811920848593e-05, "loss": 1.9606, "step": 34449 }, { "epoch": 1.1115038787201736, "grad_norm": 0.337890625, "learning_rate": 1.2742047420610906e-05, "loss": 1.9572, "step": 34450 }, { "epoch": 1.11153614257397, "grad_norm": 0.3515625, "learning_rate": 1.2741282926374422e-05, "loss": 1.9831, "step": 34451 }, { "epoch": 1.1115684064277662, "grad_norm": 0.345703125, "learning_rate": 1.2740518438141169e-05, "loss": 1.9905, "step": 34452 }, { "epoch": 1.1116006702815626, "grad_norm": 0.337890625, "learning_rate": 1.2739753955913179e-05, "loss": 1.9527, "step": 34453 }, { "epoch": 1.111632934135359, "grad_norm": 0.33984375, "learning_rate": 1.2738989479692488e-05, "loss": 1.9433, "step": 34454 }, { "epoch": 1.1116651979891552, "grad_norm": 0.34375, "learning_rate": 1.273822500948113e-05, "loss": 1.9733, "step": 34455 }, { "epoch": 1.1116974618429516, "grad_norm": 0.34375, "learning_rate": 1.2737460545281128e-05, "loss": 1.9833, "step": 34456 }, { "epoch": 1.111729725696748, "grad_norm": 0.337890625, "learning_rate": 1.2736696087094518e-05, "loss": 1.9527, "step": 34457 }, { "epoch": 1.1117619895505444, "grad_norm": 0.349609375, "learning_rate": 1.2735931634923328e-05, "loss": 1.9445, "step": 34458 }, { "epoch": 1.1117942534043408, "grad_norm": 0.337890625, "learning_rate": 1.2735167188769598e-05, "loss": 1.9705, "step": 34459 }, { "epoch": 1.111826517258137, "grad_norm": 0.33984375, "learning_rate": 1.2734402748635354e-05, "loss": 1.9473, "step": 34460 }, { "epoch": 1.1118587811119334, "grad_norm": 0.33984375, "learning_rate": 1.2733638314522636e-05, "loss": 1.9788, "step": 34461 }, { "epoch": 1.1118910449657298, "grad_norm": 0.333984375, "learning_rate": 1.273287388643346e-05, "loss": 1.9821, "step": 34462 }, { "epoch": 1.1119233088195262, "grad_norm": 0.33203125, "learning_rate": 1.2732109464369865e-05, "loss": 1.9678, "step": 34463 }, { "epoch": 1.1119555726733223, "grad_norm": 0.353515625, "learning_rate": 1.273134504833389e-05, "loss": 1.9753, "step": 34464 }, { "epoch": 1.1119878365271187, "grad_norm": 0.330078125, "learning_rate": 1.2730580638327556e-05, "loss": 1.9832, "step": 34465 }, { "epoch": 1.1120201003809151, "grad_norm": 0.353515625, "learning_rate": 1.2729816234352902e-05, "loss": 1.9635, "step": 34466 }, { "epoch": 1.1120523642347115, "grad_norm": 0.357421875, "learning_rate": 1.2729051836411963e-05, "loss": 1.9582, "step": 34467 }, { "epoch": 1.1120846280885077, "grad_norm": 0.33203125, "learning_rate": 1.2728287444506756e-05, "loss": 1.9774, "step": 34468 }, { "epoch": 1.1121168919423041, "grad_norm": 0.34375, "learning_rate": 1.2727523058639323e-05, "loss": 1.9654, "step": 34469 }, { "epoch": 1.1121491557961005, "grad_norm": 0.359375, "learning_rate": 1.2726758678811696e-05, "loss": 1.9476, "step": 34470 }, { "epoch": 1.112181419649897, "grad_norm": 0.33203125, "learning_rate": 1.2725994305025902e-05, "loss": 2.0017, "step": 34471 }, { "epoch": 1.1122136835036933, "grad_norm": 0.35546875, "learning_rate": 1.2725229937283976e-05, "loss": 1.9381, "step": 34472 }, { "epoch": 1.1122459473574895, "grad_norm": 0.37109375, "learning_rate": 1.2724465575587952e-05, "loss": 1.9872, "step": 34473 }, { "epoch": 1.112278211211286, "grad_norm": 0.33203125, "learning_rate": 1.2723701219939854e-05, "loss": 1.9789, "step": 34474 }, { "epoch": 1.1123104750650823, "grad_norm": 0.41015625, "learning_rate": 1.2722936870341718e-05, "loss": 2.0075, "step": 34475 }, { "epoch": 1.1123427389188787, "grad_norm": 0.3515625, "learning_rate": 1.2722172526795577e-05, "loss": 1.9454, "step": 34476 }, { "epoch": 1.1123750027726749, "grad_norm": 0.34375, "learning_rate": 1.2721408189303458e-05, "loss": 1.9654, "step": 34477 }, { "epoch": 1.1124072666264713, "grad_norm": 0.34375, "learning_rate": 1.2720643857867397e-05, "loss": 1.9979, "step": 34478 }, { "epoch": 1.1124395304802677, "grad_norm": 0.349609375, "learning_rate": 1.2719879532489425e-05, "loss": 1.9714, "step": 34479 }, { "epoch": 1.112471794334064, "grad_norm": 0.341796875, "learning_rate": 1.2719115213171568e-05, "loss": 1.9904, "step": 34480 }, { "epoch": 1.1125040581878602, "grad_norm": 0.39453125, "learning_rate": 1.2718350899915864e-05, "loss": 1.9704, "step": 34481 }, { "epoch": 1.1125363220416566, "grad_norm": 0.34375, "learning_rate": 1.2717586592724339e-05, "loss": 1.9737, "step": 34482 }, { "epoch": 1.112568585895453, "grad_norm": 0.349609375, "learning_rate": 1.271682229159903e-05, "loss": 1.9811, "step": 34483 }, { "epoch": 1.1126008497492494, "grad_norm": 0.361328125, "learning_rate": 1.2716057996541964e-05, "loss": 1.9931, "step": 34484 }, { "epoch": 1.1126331136030456, "grad_norm": 0.345703125, "learning_rate": 1.2715293707555177e-05, "loss": 1.9644, "step": 34485 }, { "epoch": 1.112665377456842, "grad_norm": 0.33984375, "learning_rate": 1.2714529424640692e-05, "loss": 1.9717, "step": 34486 }, { "epoch": 1.1126976413106384, "grad_norm": 0.3515625, "learning_rate": 1.271376514780055e-05, "loss": 1.9807, "step": 34487 }, { "epoch": 1.1127299051644348, "grad_norm": 0.56640625, "learning_rate": 1.2713000877036774e-05, "loss": 1.9665, "step": 34488 }, { "epoch": 1.112762169018231, "grad_norm": 0.3359375, "learning_rate": 1.27122366123514e-05, "loss": 1.9696, "step": 34489 }, { "epoch": 1.1127944328720274, "grad_norm": 0.3515625, "learning_rate": 1.2711472353746456e-05, "loss": 1.9993, "step": 34490 }, { "epoch": 1.1128266967258238, "grad_norm": 0.328125, "learning_rate": 1.2710708101223983e-05, "loss": 1.9694, "step": 34491 }, { "epoch": 1.1128589605796202, "grad_norm": 0.353515625, "learning_rate": 1.2709943854786002e-05, "loss": 1.95, "step": 34492 }, { "epoch": 1.1128912244334166, "grad_norm": 0.341796875, "learning_rate": 1.2709179614434543e-05, "loss": 2.0009, "step": 34493 }, { "epoch": 1.1129234882872128, "grad_norm": 0.34375, "learning_rate": 1.2708415380171644e-05, "loss": 1.986, "step": 34494 }, { "epoch": 1.1129557521410092, "grad_norm": 0.34765625, "learning_rate": 1.270765115199933e-05, "loss": 1.9514, "step": 34495 }, { "epoch": 1.1129880159948056, "grad_norm": 0.345703125, "learning_rate": 1.2706886929919638e-05, "loss": 1.9604, "step": 34496 }, { "epoch": 1.113020279848602, "grad_norm": 0.33203125, "learning_rate": 1.2706122713934602e-05, "loss": 1.9834, "step": 34497 }, { "epoch": 1.1130525437023981, "grad_norm": 0.361328125, "learning_rate": 1.2705358504046243e-05, "loss": 1.9647, "step": 34498 }, { "epoch": 1.1130848075561945, "grad_norm": 0.341796875, "learning_rate": 1.2704594300256597e-05, "loss": 1.9861, "step": 34499 }, { "epoch": 1.113117071409991, "grad_norm": 0.337890625, "learning_rate": 1.2703830102567691e-05, "loss": 1.9971, "step": 34500 }, { "epoch": 1.1131493352637873, "grad_norm": 0.353515625, "learning_rate": 1.2703065910981563e-05, "loss": 1.982, "step": 34501 }, { "epoch": 1.1131815991175835, "grad_norm": 0.33984375, "learning_rate": 1.2702301725500243e-05, "loss": 1.9837, "step": 34502 }, { "epoch": 1.11321386297138, "grad_norm": 0.34765625, "learning_rate": 1.2701537546125758e-05, "loss": 1.975, "step": 34503 }, { "epoch": 1.1132461268251763, "grad_norm": 0.353515625, "learning_rate": 1.2700773372860148e-05, "loss": 2.0044, "step": 34504 }, { "epoch": 1.1132783906789727, "grad_norm": 0.35546875, "learning_rate": 1.2700009205705433e-05, "loss": 1.9463, "step": 34505 }, { "epoch": 1.113310654532769, "grad_norm": 0.345703125, "learning_rate": 1.2699245044663646e-05, "loss": 1.9731, "step": 34506 }, { "epoch": 1.1133429183865653, "grad_norm": 0.34765625, "learning_rate": 1.2698480889736822e-05, "loss": 1.978, "step": 34507 }, { "epoch": 1.1133751822403617, "grad_norm": 0.357421875, "learning_rate": 1.2697716740926991e-05, "loss": 1.9572, "step": 34508 }, { "epoch": 1.113407446094158, "grad_norm": 0.361328125, "learning_rate": 1.2696952598236182e-05, "loss": 2.0066, "step": 34509 }, { "epoch": 1.1134397099479543, "grad_norm": 0.341796875, "learning_rate": 1.269618846166643e-05, "loss": 1.988, "step": 34510 }, { "epoch": 1.1134719738017507, "grad_norm": 0.359375, "learning_rate": 1.2695424331219763e-05, "loss": 1.9255, "step": 34511 }, { "epoch": 1.113504237655547, "grad_norm": 0.330078125, "learning_rate": 1.2694660206898209e-05, "loss": 1.9819, "step": 34512 }, { "epoch": 1.1135365015093435, "grad_norm": 0.36328125, "learning_rate": 1.2693896088703804e-05, "loss": 1.9853, "step": 34513 }, { "epoch": 1.1135687653631399, "grad_norm": 0.345703125, "learning_rate": 1.2693131976638575e-05, "loss": 1.9837, "step": 34514 }, { "epoch": 1.113601029216936, "grad_norm": 0.341796875, "learning_rate": 1.2692367870704557e-05, "loss": 1.9351, "step": 34515 }, { "epoch": 1.1136332930707324, "grad_norm": 0.3515625, "learning_rate": 1.269160377090378e-05, "loss": 1.994, "step": 34516 }, { "epoch": 1.1136655569245288, "grad_norm": 0.345703125, "learning_rate": 1.269083967723827e-05, "loss": 1.9754, "step": 34517 }, { "epoch": 1.1136978207783252, "grad_norm": 0.349609375, "learning_rate": 1.2690075589710063e-05, "loss": 1.9894, "step": 34518 }, { "epoch": 1.1137300846321214, "grad_norm": 0.337890625, "learning_rate": 1.2689311508321186e-05, "loss": 1.9721, "step": 34519 }, { "epoch": 1.1137623484859178, "grad_norm": 0.33984375, "learning_rate": 1.2688547433073674e-05, "loss": 1.938, "step": 34520 }, { "epoch": 1.1137946123397142, "grad_norm": 0.341796875, "learning_rate": 1.2687783363969554e-05, "loss": 1.9883, "step": 34521 }, { "epoch": 1.1138268761935106, "grad_norm": 0.33984375, "learning_rate": 1.2687019301010862e-05, "loss": 1.9745, "step": 34522 }, { "epoch": 1.1138591400473068, "grad_norm": 0.34765625, "learning_rate": 1.2686255244199622e-05, "loss": 1.97, "step": 34523 }, { "epoch": 1.1138914039011032, "grad_norm": 0.341796875, "learning_rate": 1.2685491193537866e-05, "loss": 1.9595, "step": 34524 }, { "epoch": 1.1139236677548996, "grad_norm": 0.328125, "learning_rate": 1.2684727149027629e-05, "loss": 1.9558, "step": 34525 }, { "epoch": 1.113955931608696, "grad_norm": 0.353515625, "learning_rate": 1.2683963110670938e-05, "loss": 1.983, "step": 34526 }, { "epoch": 1.1139881954624922, "grad_norm": 0.3359375, "learning_rate": 1.2683199078469821e-05, "loss": 1.9517, "step": 34527 }, { "epoch": 1.1140204593162886, "grad_norm": 0.3359375, "learning_rate": 1.2682435052426323e-05, "loss": 1.978, "step": 34528 }, { "epoch": 1.114052723170085, "grad_norm": 0.33984375, "learning_rate": 1.2681671032542459e-05, "loss": 1.9776, "step": 34529 }, { "epoch": 1.1140849870238814, "grad_norm": 0.33203125, "learning_rate": 1.268090701882026e-05, "loss": 1.9452, "step": 34530 }, { "epoch": 1.1141172508776775, "grad_norm": 0.359375, "learning_rate": 1.2680143011261766e-05, "loss": 1.9683, "step": 34531 }, { "epoch": 1.114149514731474, "grad_norm": 0.333984375, "learning_rate": 1.2679379009868998e-05, "loss": 1.9997, "step": 34532 }, { "epoch": 1.1141817785852703, "grad_norm": 0.34765625, "learning_rate": 1.2678615014643994e-05, "loss": 1.97, "step": 34533 }, { "epoch": 1.1142140424390667, "grad_norm": 0.345703125, "learning_rate": 1.267785102558879e-05, "loss": 1.9852, "step": 34534 }, { "epoch": 1.1142463062928631, "grad_norm": 0.34375, "learning_rate": 1.26770870427054e-05, "loss": 1.9592, "step": 34535 }, { "epoch": 1.1142785701466593, "grad_norm": 0.341796875, "learning_rate": 1.2676323065995861e-05, "loss": 1.9941, "step": 34536 }, { "epoch": 1.1143108340004557, "grad_norm": 0.349609375, "learning_rate": 1.2675559095462208e-05, "loss": 1.9675, "step": 34537 }, { "epoch": 1.1143430978542521, "grad_norm": 0.341796875, "learning_rate": 1.267479513110647e-05, "loss": 1.9631, "step": 34538 }, { "epoch": 1.1143753617080485, "grad_norm": 0.333984375, "learning_rate": 1.2674031172930678e-05, "loss": 1.9765, "step": 34539 }, { "epoch": 1.1144076255618447, "grad_norm": 0.3671875, "learning_rate": 1.2673267220936865e-05, "loss": 1.9932, "step": 34540 }, { "epoch": 1.114439889415641, "grad_norm": 0.34765625, "learning_rate": 1.267250327512705e-05, "loss": 1.9505, "step": 34541 }, { "epoch": 1.1144721532694375, "grad_norm": 0.349609375, "learning_rate": 1.2671739335503272e-05, "loss": 1.95, "step": 34542 }, { "epoch": 1.114504417123234, "grad_norm": 0.3359375, "learning_rate": 1.267097540206756e-05, "loss": 1.9705, "step": 34543 }, { "epoch": 1.11453668097703, "grad_norm": 0.36328125, "learning_rate": 1.2670211474821947e-05, "loss": 1.9562, "step": 34544 }, { "epoch": 1.1145689448308265, "grad_norm": 0.345703125, "learning_rate": 1.2669447553768458e-05, "loss": 1.9791, "step": 34545 }, { "epoch": 1.1146012086846229, "grad_norm": 0.357421875, "learning_rate": 1.2668683638909132e-05, "loss": 2.0003, "step": 34546 }, { "epoch": 1.1146334725384193, "grad_norm": 0.345703125, "learning_rate": 1.2667919730245989e-05, "loss": 1.9661, "step": 34547 }, { "epoch": 1.1146657363922154, "grad_norm": 0.341796875, "learning_rate": 1.2667155827781064e-05, "loss": 1.9757, "step": 34548 }, { "epoch": 1.1146980002460118, "grad_norm": 0.33984375, "learning_rate": 1.2666391931516388e-05, "loss": 1.9651, "step": 34549 }, { "epoch": 1.1147302640998082, "grad_norm": 0.34765625, "learning_rate": 1.266562804145399e-05, "loss": 1.9427, "step": 34550 }, { "epoch": 1.1147625279536046, "grad_norm": 0.341796875, "learning_rate": 1.2664864157595902e-05, "loss": 1.9443, "step": 34551 }, { "epoch": 1.1147947918074008, "grad_norm": 0.337890625, "learning_rate": 1.2664100279944155e-05, "loss": 1.963, "step": 34552 }, { "epoch": 1.1148270556611972, "grad_norm": 0.349609375, "learning_rate": 1.2663336408500775e-05, "loss": 2.0067, "step": 34553 }, { "epoch": 1.1148593195149936, "grad_norm": 0.341796875, "learning_rate": 1.2662572543267794e-05, "loss": 1.9867, "step": 34554 }, { "epoch": 1.11489158336879, "grad_norm": 0.361328125, "learning_rate": 1.2661808684247243e-05, "loss": 1.9786, "step": 34555 }, { "epoch": 1.1149238472225864, "grad_norm": 0.345703125, "learning_rate": 1.2661044831441153e-05, "loss": 1.9827, "step": 34556 }, { "epoch": 1.1149561110763826, "grad_norm": 0.349609375, "learning_rate": 1.266028098485155e-05, "loss": 1.9877, "step": 34557 }, { "epoch": 1.114988374930179, "grad_norm": 0.333984375, "learning_rate": 1.2659517144480474e-05, "loss": 1.9984, "step": 34558 }, { "epoch": 1.1150206387839754, "grad_norm": 0.33984375, "learning_rate": 1.2658753310329944e-05, "loss": 1.9603, "step": 34559 }, { "epoch": 1.1150529026377718, "grad_norm": 0.341796875, "learning_rate": 1.2657989482401994e-05, "loss": 1.9743, "step": 34560 }, { "epoch": 1.115085166491568, "grad_norm": 0.326171875, "learning_rate": 1.2657225660698657e-05, "loss": 1.9529, "step": 34561 }, { "epoch": 1.1151174303453644, "grad_norm": 0.34375, "learning_rate": 1.2656461845221958e-05, "loss": 1.9642, "step": 34562 }, { "epoch": 1.1151496941991608, "grad_norm": 0.345703125, "learning_rate": 1.2655698035973932e-05, "loss": 1.9812, "step": 34563 }, { "epoch": 1.1151819580529572, "grad_norm": 0.328125, "learning_rate": 1.2654934232956608e-05, "loss": 1.9758, "step": 34564 }, { "epoch": 1.1152142219067533, "grad_norm": 0.34765625, "learning_rate": 1.2654170436172013e-05, "loss": 1.9576, "step": 34565 }, { "epoch": 1.1152464857605497, "grad_norm": 0.34375, "learning_rate": 1.2653406645622178e-05, "loss": 1.9539, "step": 34566 }, { "epoch": 1.1152787496143461, "grad_norm": 0.345703125, "learning_rate": 1.2652642861309135e-05, "loss": 2.0039, "step": 34567 }, { "epoch": 1.1153110134681425, "grad_norm": 0.34375, "learning_rate": 1.2651879083234911e-05, "loss": 1.9886, "step": 34568 }, { "epoch": 1.1153432773219387, "grad_norm": 0.357421875, "learning_rate": 1.2651115311401539e-05, "loss": 1.9591, "step": 34569 }, { "epoch": 1.1153755411757351, "grad_norm": 0.333984375, "learning_rate": 1.2650351545811054e-05, "loss": 1.9737, "step": 34570 }, { "epoch": 1.1154078050295315, "grad_norm": 0.345703125, "learning_rate": 1.2649587786465473e-05, "loss": 1.996, "step": 34571 }, { "epoch": 1.115440068883328, "grad_norm": 0.34765625, "learning_rate": 1.2648824033366834e-05, "loss": 1.9643, "step": 34572 }, { "epoch": 1.115472332737124, "grad_norm": 0.34375, "learning_rate": 1.2648060286517162e-05, "loss": 1.9863, "step": 34573 }, { "epoch": 1.1155045965909205, "grad_norm": 0.3515625, "learning_rate": 1.2647296545918495e-05, "loss": 1.9707, "step": 34574 }, { "epoch": 1.115536860444717, "grad_norm": 0.337890625, "learning_rate": 1.2646532811572859e-05, "loss": 1.9683, "step": 34575 }, { "epoch": 1.1155691242985133, "grad_norm": 0.330078125, "learning_rate": 1.2645769083482287e-05, "loss": 1.9865, "step": 34576 }, { "epoch": 1.1156013881523097, "grad_norm": 0.353515625, "learning_rate": 1.2645005361648801e-05, "loss": 1.995, "step": 34577 }, { "epoch": 1.1156336520061059, "grad_norm": 0.35546875, "learning_rate": 1.264424164607443e-05, "loss": 1.9717, "step": 34578 }, { "epoch": 1.1156659158599023, "grad_norm": 0.353515625, "learning_rate": 1.2643477936761215e-05, "loss": 1.9989, "step": 34579 }, { "epoch": 1.1156981797136987, "grad_norm": 0.33984375, "learning_rate": 1.2642714233711176e-05, "loss": 1.9586, "step": 34580 }, { "epoch": 1.115730443567495, "grad_norm": 0.333984375, "learning_rate": 1.2641950536926347e-05, "loss": 1.9931, "step": 34581 }, { "epoch": 1.1157627074212912, "grad_norm": 0.34375, "learning_rate": 1.2641186846408766e-05, "loss": 1.9661, "step": 34582 }, { "epoch": 1.1157949712750876, "grad_norm": 0.34765625, "learning_rate": 1.264042316216044e-05, "loss": 1.981, "step": 34583 }, { "epoch": 1.115827235128884, "grad_norm": 0.34765625, "learning_rate": 1.263965948418342e-05, "loss": 1.9577, "step": 34584 }, { "epoch": 1.1158594989826804, "grad_norm": 0.34375, "learning_rate": 1.2638895812479725e-05, "loss": 1.9849, "step": 34585 }, { "epoch": 1.1158917628364766, "grad_norm": 0.3359375, "learning_rate": 1.263813214705139e-05, "loss": 1.9616, "step": 34586 }, { "epoch": 1.115924026690273, "grad_norm": 0.34765625, "learning_rate": 1.2637368487900442e-05, "loss": 1.9972, "step": 34587 }, { "epoch": 1.1159562905440694, "grad_norm": 0.34375, "learning_rate": 1.2636604835028916e-05, "loss": 1.9728, "step": 34588 }, { "epoch": 1.1159885543978658, "grad_norm": 0.34375, "learning_rate": 1.2635841188438831e-05, "loss": 1.9821, "step": 34589 }, { "epoch": 1.116020818251662, "grad_norm": 0.365234375, "learning_rate": 1.2635077548132224e-05, "loss": 1.9803, "step": 34590 }, { "epoch": 1.1160530821054584, "grad_norm": 0.3515625, "learning_rate": 1.2634313914111124e-05, "loss": 1.9767, "step": 34591 }, { "epoch": 1.1160853459592548, "grad_norm": 0.337890625, "learning_rate": 1.2633550286377559e-05, "loss": 1.9634, "step": 34592 }, { "epoch": 1.1161176098130512, "grad_norm": 0.35546875, "learning_rate": 1.263278666493356e-05, "loss": 1.9544, "step": 34593 }, { "epoch": 1.1161498736668474, "grad_norm": 0.45703125, "learning_rate": 1.2632023049781155e-05, "loss": 2.0575, "step": 34594 }, { "epoch": 1.1161821375206438, "grad_norm": 0.3984375, "learning_rate": 1.2631259440922376e-05, "loss": 2.0668, "step": 34595 }, { "epoch": 1.1162144013744402, "grad_norm": 0.423828125, "learning_rate": 1.2630495838359249e-05, "loss": 2.0635, "step": 34596 }, { "epoch": 1.1162466652282366, "grad_norm": 0.369140625, "learning_rate": 1.2629732242093807e-05, "loss": 2.0376, "step": 34597 }, { "epoch": 1.116278929082033, "grad_norm": 0.416015625, "learning_rate": 1.2628968652128076e-05, "loss": 2.0621, "step": 34598 }, { "epoch": 1.1163111929358291, "grad_norm": 0.376953125, "learning_rate": 1.2628205068464088e-05, "loss": 2.0665, "step": 34599 }, { "epoch": 1.1163434567896255, "grad_norm": 0.4296875, "learning_rate": 1.2627441491103876e-05, "loss": 2.0588, "step": 34600 }, { "epoch": 1.116375720643422, "grad_norm": 0.38671875, "learning_rate": 1.262667792004946e-05, "loss": 2.0163, "step": 34601 }, { "epoch": 1.1164079844972183, "grad_norm": 0.404296875, "learning_rate": 1.2625914355302876e-05, "loss": 2.0627, "step": 34602 }, { "epoch": 1.1164402483510145, "grad_norm": 0.396484375, "learning_rate": 1.2625150796866153e-05, "loss": 2.054, "step": 34603 }, { "epoch": 1.116472512204811, "grad_norm": 0.369140625, "learning_rate": 1.2624387244741319e-05, "loss": 2.0437, "step": 34604 }, { "epoch": 1.1165047760586073, "grad_norm": 0.4140625, "learning_rate": 1.26236236989304e-05, "loss": 2.0519, "step": 34605 }, { "epoch": 1.1165370399124037, "grad_norm": 0.37890625, "learning_rate": 1.2622860159435441e-05, "loss": 2.1058, "step": 34606 }, { "epoch": 1.1165693037662, "grad_norm": 0.419921875, "learning_rate": 1.2622096626258451e-05, "loss": 2.1229, "step": 34607 }, { "epoch": 1.1166015676199963, "grad_norm": 0.384765625, "learning_rate": 1.2621333099401467e-05, "loss": 2.135, "step": 34608 }, { "epoch": 1.1166338314737927, "grad_norm": 0.400390625, "learning_rate": 1.2620569578866522e-05, "loss": 2.1379, "step": 34609 }, { "epoch": 1.116666095327589, "grad_norm": 0.380859375, "learning_rate": 1.261980606465564e-05, "loss": 2.139, "step": 34610 }, { "epoch": 1.1166983591813853, "grad_norm": 0.3671875, "learning_rate": 1.2619042556770854e-05, "loss": 2.1279, "step": 34611 }, { "epoch": 1.1167306230351817, "grad_norm": 0.361328125, "learning_rate": 1.2618279055214197e-05, "loss": 2.1055, "step": 34612 }, { "epoch": 1.116762886888978, "grad_norm": 0.3828125, "learning_rate": 1.261751555998769e-05, "loss": 2.131, "step": 34613 }, { "epoch": 1.1167951507427745, "grad_norm": 0.357421875, "learning_rate": 1.2616752071093364e-05, "loss": 2.1212, "step": 34614 }, { "epoch": 1.1168274145965706, "grad_norm": 0.36328125, "learning_rate": 1.2615988588533245e-05, "loss": 2.1348, "step": 34615 }, { "epoch": 1.116859678450367, "grad_norm": 0.35546875, "learning_rate": 1.2615225112309373e-05, "loss": 2.1532, "step": 34616 }, { "epoch": 1.1168919423041634, "grad_norm": 0.359375, "learning_rate": 1.261446164242377e-05, "loss": 2.1464, "step": 34617 }, { "epoch": 1.1169242061579598, "grad_norm": 0.37890625, "learning_rate": 1.2613698178878473e-05, "loss": 2.1238, "step": 34618 }, { "epoch": 1.1169564700117562, "grad_norm": 0.3515625, "learning_rate": 1.2612934721675497e-05, "loss": 2.1374, "step": 34619 }, { "epoch": 1.1169887338655524, "grad_norm": 0.37890625, "learning_rate": 1.2612171270816875e-05, "loss": 2.1086, "step": 34620 }, { "epoch": 1.1170209977193488, "grad_norm": 0.35546875, "learning_rate": 1.2611407826304643e-05, "loss": 2.1137, "step": 34621 }, { "epoch": 1.1170532615731452, "grad_norm": 0.3515625, "learning_rate": 1.2610644388140828e-05, "loss": 2.1343, "step": 34622 }, { "epoch": 1.1170855254269416, "grad_norm": 0.369140625, "learning_rate": 1.2609880956327458e-05, "loss": 2.1024, "step": 34623 }, { "epoch": 1.1171177892807378, "grad_norm": 0.34765625, "learning_rate": 1.2609117530866564e-05, "loss": 2.1195, "step": 34624 }, { "epoch": 1.1171500531345342, "grad_norm": 0.34375, "learning_rate": 1.260835411176017e-05, "loss": 2.0918, "step": 34625 }, { "epoch": 1.1171823169883306, "grad_norm": 0.357421875, "learning_rate": 1.2607590699010308e-05, "loss": 2.1167, "step": 34626 }, { "epoch": 1.117214580842127, "grad_norm": 0.349609375, "learning_rate": 1.2606827292619005e-05, "loss": 2.0864, "step": 34627 }, { "epoch": 1.1172468446959232, "grad_norm": 0.345703125, "learning_rate": 1.2606063892588295e-05, "loss": 2.1175, "step": 34628 }, { "epoch": 1.1172791085497196, "grad_norm": 0.3515625, "learning_rate": 1.2605300498920201e-05, "loss": 2.1186, "step": 34629 }, { "epoch": 1.117311372403516, "grad_norm": 0.353515625, "learning_rate": 1.2604537111616761e-05, "loss": 2.1222, "step": 34630 }, { "epoch": 1.1173436362573124, "grad_norm": 0.34765625, "learning_rate": 1.2603773730679993e-05, "loss": 2.1118, "step": 34631 }, { "epoch": 1.1173759001111085, "grad_norm": 0.333984375, "learning_rate": 1.260301035611193e-05, "loss": 2.1218, "step": 34632 }, { "epoch": 1.117408163964905, "grad_norm": 0.361328125, "learning_rate": 1.2602246987914601e-05, "loss": 2.1206, "step": 34633 }, { "epoch": 1.1174404278187013, "grad_norm": 0.357421875, "learning_rate": 1.2601483626090037e-05, "loss": 2.111, "step": 34634 }, { "epoch": 1.1174726916724977, "grad_norm": 0.341796875, "learning_rate": 1.2600720270640265e-05, "loss": 2.1158, "step": 34635 }, { "epoch": 1.117504955526294, "grad_norm": 0.3671875, "learning_rate": 1.2599956921567314e-05, "loss": 2.1572, "step": 34636 }, { "epoch": 1.1175372193800903, "grad_norm": 0.33984375, "learning_rate": 1.2599193578873216e-05, "loss": 2.1094, "step": 34637 }, { "epoch": 1.1175694832338867, "grad_norm": 0.3671875, "learning_rate": 1.2598430242559994e-05, "loss": 2.136, "step": 34638 }, { "epoch": 1.1176017470876831, "grad_norm": 0.341796875, "learning_rate": 1.2597666912629679e-05, "loss": 2.1235, "step": 34639 }, { "epoch": 1.1176340109414795, "grad_norm": 0.36328125, "learning_rate": 1.25969035890843e-05, "loss": 2.1096, "step": 34640 }, { "epoch": 1.1176662747952757, "grad_norm": 0.3515625, "learning_rate": 1.2596140271925888e-05, "loss": 2.1157, "step": 34641 }, { "epoch": 1.117698538649072, "grad_norm": 0.392578125, "learning_rate": 1.2595376961156467e-05, "loss": 2.1514, "step": 34642 }, { "epoch": 1.1177308025028685, "grad_norm": 0.384765625, "learning_rate": 1.2594613656778077e-05, "loss": 2.1508, "step": 34643 }, { "epoch": 1.117763066356665, "grad_norm": 0.34765625, "learning_rate": 1.2593850358792731e-05, "loss": 2.1155, "step": 34644 }, { "epoch": 1.117795330210461, "grad_norm": 0.37109375, "learning_rate": 1.2593087067202467e-05, "loss": 2.1322, "step": 34645 }, { "epoch": 1.1178275940642575, "grad_norm": 0.43359375, "learning_rate": 1.259232378200931e-05, "loss": 2.1127, "step": 34646 }, { "epoch": 1.1178598579180539, "grad_norm": 0.427734375, "learning_rate": 1.2591560503215286e-05, "loss": 2.123, "step": 34647 }, { "epoch": 1.1178921217718503, "grad_norm": 0.375, "learning_rate": 1.2590797230822436e-05, "loss": 2.122, "step": 34648 }, { "epoch": 1.1179243856256464, "grad_norm": 0.359375, "learning_rate": 1.2590033964832784e-05, "loss": 2.1274, "step": 34649 }, { "epoch": 1.1179566494794428, "grad_norm": 0.359375, "learning_rate": 1.2589270705248349e-05, "loss": 2.1006, "step": 34650 }, { "epoch": 1.1179889133332392, "grad_norm": 0.341796875, "learning_rate": 1.2588507452071166e-05, "loss": 2.1142, "step": 34651 }, { "epoch": 1.1180211771870356, "grad_norm": 0.369140625, "learning_rate": 1.2587744205303258e-05, "loss": 2.1231, "step": 34652 }, { "epoch": 1.118053441040832, "grad_norm": 0.353515625, "learning_rate": 1.2586980964946666e-05, "loss": 2.1175, "step": 34653 }, { "epoch": 1.1180857048946282, "grad_norm": 0.369140625, "learning_rate": 1.258621773100341e-05, "loss": 2.1281, "step": 34654 }, { "epoch": 1.1181179687484246, "grad_norm": 0.33984375, "learning_rate": 1.2585454503475528e-05, "loss": 2.1324, "step": 34655 }, { "epoch": 1.118150232602221, "grad_norm": 0.361328125, "learning_rate": 1.2584691282365028e-05, "loss": 2.1407, "step": 34656 }, { "epoch": 1.1181824964560172, "grad_norm": 0.353515625, "learning_rate": 1.2583928067673956e-05, "loss": 2.1146, "step": 34657 }, { "epoch": 1.1182147603098136, "grad_norm": 0.470703125, "learning_rate": 1.2583164859404336e-05, "loss": 2.1229, "step": 34658 }, { "epoch": 1.11824702416361, "grad_norm": 0.36328125, "learning_rate": 1.2582401657558195e-05, "loss": 2.1422, "step": 34659 }, { "epoch": 1.1182792880174064, "grad_norm": 0.349609375, "learning_rate": 1.2581638462137563e-05, "loss": 2.1527, "step": 34660 }, { "epoch": 1.1183115518712028, "grad_norm": 0.341796875, "learning_rate": 1.2580875273144471e-05, "loss": 2.0942, "step": 34661 }, { "epoch": 1.118343815724999, "grad_norm": 0.37109375, "learning_rate": 1.258011209058094e-05, "loss": 2.1088, "step": 34662 }, { "epoch": 1.1183760795787954, "grad_norm": 0.34765625, "learning_rate": 1.2579348914449006e-05, "loss": 2.1208, "step": 34663 }, { "epoch": 1.1184083434325918, "grad_norm": 0.36328125, "learning_rate": 1.2578585744750692e-05, "loss": 2.1348, "step": 34664 }, { "epoch": 1.1184406072863882, "grad_norm": 0.36328125, "learning_rate": 1.2577822581488029e-05, "loss": 2.1395, "step": 34665 }, { "epoch": 1.1184728711401843, "grad_norm": 0.375, "learning_rate": 1.2577059424663044e-05, "loss": 2.1334, "step": 34666 }, { "epoch": 1.1185051349939807, "grad_norm": 0.361328125, "learning_rate": 1.2576296274277768e-05, "loss": 2.1201, "step": 34667 }, { "epoch": 1.1185373988477771, "grad_norm": 0.34765625, "learning_rate": 1.2575533130334227e-05, "loss": 2.1028, "step": 34668 }, { "epoch": 1.1185696627015735, "grad_norm": 0.37109375, "learning_rate": 1.2574769992834448e-05, "loss": 2.1184, "step": 34669 }, { "epoch": 1.1186019265553697, "grad_norm": 0.33984375, "learning_rate": 1.2574006861780462e-05, "loss": 2.1384, "step": 34670 }, { "epoch": 1.1186341904091661, "grad_norm": 0.3671875, "learning_rate": 1.2573243737174295e-05, "loss": 2.1543, "step": 34671 }, { "epoch": 1.1186664542629625, "grad_norm": 0.361328125, "learning_rate": 1.2572480619017979e-05, "loss": 2.1188, "step": 34672 }, { "epoch": 1.118698718116759, "grad_norm": 0.3515625, "learning_rate": 1.257171750731354e-05, "loss": 2.0953, "step": 34673 }, { "epoch": 1.1187309819705553, "grad_norm": 0.361328125, "learning_rate": 1.2570954402063004e-05, "loss": 2.038, "step": 34674 }, { "epoch": 1.1187632458243515, "grad_norm": 0.361328125, "learning_rate": 1.2570191303268401e-05, "loss": 2.0361, "step": 34675 }, { "epoch": 1.118795509678148, "grad_norm": 0.40234375, "learning_rate": 1.2569428210931758e-05, "loss": 2.0479, "step": 34676 }, { "epoch": 1.1188277735319443, "grad_norm": 0.34375, "learning_rate": 1.2568665125055105e-05, "loss": 2.0353, "step": 34677 }, { "epoch": 1.1188600373857405, "grad_norm": 0.37109375, "learning_rate": 1.2567902045640472e-05, "loss": 2.0348, "step": 34678 }, { "epoch": 1.1188923012395369, "grad_norm": 0.34375, "learning_rate": 1.2567138972689884e-05, "loss": 2.0288, "step": 34679 }, { "epoch": 1.1189245650933333, "grad_norm": 0.375, "learning_rate": 1.2566375906205368e-05, "loss": 2.0472, "step": 34680 }, { "epoch": 1.1189568289471297, "grad_norm": 0.361328125, "learning_rate": 1.2565612846188955e-05, "loss": 2.0358, "step": 34681 }, { "epoch": 1.118989092800926, "grad_norm": 0.361328125, "learning_rate": 1.256484979264267e-05, "loss": 2.0201, "step": 34682 }, { "epoch": 1.1190213566547222, "grad_norm": 0.36328125, "learning_rate": 1.2564086745568545e-05, "loss": 2.0523, "step": 34683 }, { "epoch": 1.1190536205085186, "grad_norm": 0.34765625, "learning_rate": 1.2563323704968602e-05, "loss": 2.0868, "step": 34684 }, { "epoch": 1.119085884362315, "grad_norm": 0.353515625, "learning_rate": 1.2562560670844885e-05, "loss": 2.0356, "step": 34685 }, { "epoch": 1.1191181482161114, "grad_norm": 0.353515625, "learning_rate": 1.2561797643199399e-05, "loss": 1.9798, "step": 34686 }, { "epoch": 1.1191504120699076, "grad_norm": 0.34765625, "learning_rate": 1.2561034622034185e-05, "loss": 1.9836, "step": 34687 }, { "epoch": 1.119182675923704, "grad_norm": 0.349609375, "learning_rate": 1.256027160735127e-05, "loss": 1.9514, "step": 34688 }, { "epoch": 1.1192149397775004, "grad_norm": 0.337890625, "learning_rate": 1.2559508599152676e-05, "loss": 1.9815, "step": 34689 }, { "epoch": 1.1192472036312968, "grad_norm": 0.345703125, "learning_rate": 1.255874559744044e-05, "loss": 1.9679, "step": 34690 }, { "epoch": 1.119279467485093, "grad_norm": 0.33984375, "learning_rate": 1.2557982602216591e-05, "loss": 1.9817, "step": 34691 }, { "epoch": 1.1193117313388894, "grad_norm": 0.333984375, "learning_rate": 1.2557219613483146e-05, "loss": 1.9583, "step": 34692 }, { "epoch": 1.1193439951926858, "grad_norm": 0.3359375, "learning_rate": 1.2556456631242134e-05, "loss": 1.9885, "step": 34693 }, { "epoch": 1.1193762590464822, "grad_norm": 0.337890625, "learning_rate": 1.2555693655495592e-05, "loss": 1.9728, "step": 34694 }, { "epoch": 1.1194085229002786, "grad_norm": 0.3359375, "learning_rate": 1.2554930686245544e-05, "loss": 1.9684, "step": 34695 }, { "epoch": 1.1194407867540748, "grad_norm": 0.333984375, "learning_rate": 1.2554167723494017e-05, "loss": 1.9882, "step": 34696 }, { "epoch": 1.1194730506078712, "grad_norm": 0.32421875, "learning_rate": 1.2553404767243043e-05, "loss": 1.9749, "step": 34697 }, { "epoch": 1.1195053144616676, "grad_norm": 0.328125, "learning_rate": 1.2552641817494638e-05, "loss": 1.9671, "step": 34698 }, { "epoch": 1.119537578315464, "grad_norm": 0.330078125, "learning_rate": 1.2551878874250839e-05, "loss": 1.9826, "step": 34699 }, { "epoch": 1.1195698421692601, "grad_norm": 0.33203125, "learning_rate": 1.2551115937513673e-05, "loss": 1.9873, "step": 34700 }, { "epoch": 1.1196021060230565, "grad_norm": 0.333984375, "learning_rate": 1.2550353007285167e-05, "loss": 1.9524, "step": 34701 }, { "epoch": 1.119634369876853, "grad_norm": 0.33203125, "learning_rate": 1.254959008356735e-05, "loss": 1.9815, "step": 34702 }, { "epoch": 1.1196666337306493, "grad_norm": 0.33984375, "learning_rate": 1.254882716636225e-05, "loss": 1.9607, "step": 34703 }, { "epoch": 1.1196988975844455, "grad_norm": 0.326171875, "learning_rate": 1.2548064255671887e-05, "loss": 1.9585, "step": 34704 }, { "epoch": 1.119731161438242, "grad_norm": 0.349609375, "learning_rate": 1.2547301351498299e-05, "loss": 1.9821, "step": 34705 }, { "epoch": 1.1197634252920383, "grad_norm": 0.341796875, "learning_rate": 1.2546538453843507e-05, "loss": 1.9651, "step": 34706 }, { "epoch": 1.1197956891458347, "grad_norm": 0.345703125, "learning_rate": 1.2545775562709541e-05, "loss": 1.9743, "step": 34707 }, { "epoch": 1.119827952999631, "grad_norm": 0.345703125, "learning_rate": 1.254501267809843e-05, "loss": 1.9698, "step": 34708 }, { "epoch": 1.1198602168534273, "grad_norm": 0.330078125, "learning_rate": 1.2544249800012204e-05, "loss": 1.9893, "step": 34709 }, { "epoch": 1.1198924807072237, "grad_norm": 0.353515625, "learning_rate": 1.254348692845288e-05, "loss": 1.9701, "step": 34710 }, { "epoch": 1.11992474456102, "grad_norm": 0.34375, "learning_rate": 1.2542724063422495e-05, "loss": 1.9648, "step": 34711 }, { "epoch": 1.1199570084148163, "grad_norm": 0.349609375, "learning_rate": 1.2541961204923073e-05, "loss": 1.9832, "step": 34712 }, { "epoch": 1.1199892722686127, "grad_norm": 0.337890625, "learning_rate": 1.2541198352956643e-05, "loss": 1.9676, "step": 34713 }, { "epoch": 1.120021536122409, "grad_norm": 0.3359375, "learning_rate": 1.254043550752523e-05, "loss": 1.9828, "step": 34714 }, { "epoch": 1.1200537999762055, "grad_norm": 0.3359375, "learning_rate": 1.253967266863087e-05, "loss": 1.9967, "step": 34715 }, { "epoch": 1.1200860638300019, "grad_norm": 0.341796875, "learning_rate": 1.2538909836275579e-05, "loss": 1.9969, "step": 34716 }, { "epoch": 1.120118327683798, "grad_norm": 0.341796875, "learning_rate": 1.2538147010461389e-05, "loss": 1.9138, "step": 34717 }, { "epoch": 1.1201505915375944, "grad_norm": 0.34375, "learning_rate": 1.2537384191190326e-05, "loss": 1.9851, "step": 34718 }, { "epoch": 1.1201828553913908, "grad_norm": 0.322265625, "learning_rate": 1.2536621378464423e-05, "loss": 1.8942, "step": 34719 }, { "epoch": 1.1202151192451872, "grad_norm": 0.337890625, "learning_rate": 1.25358585722857e-05, "loss": 1.9447, "step": 34720 }, { "epoch": 1.1202473830989834, "grad_norm": 0.349609375, "learning_rate": 1.2535095772656195e-05, "loss": 1.9531, "step": 34721 }, { "epoch": 1.1202796469527798, "grad_norm": 0.345703125, "learning_rate": 1.2534332979577922e-05, "loss": 1.9722, "step": 34722 }, { "epoch": 1.1203119108065762, "grad_norm": 0.33984375, "learning_rate": 1.2533570193052916e-05, "loss": 1.953, "step": 34723 }, { "epoch": 1.1203441746603726, "grad_norm": 0.34375, "learning_rate": 1.2532807413083203e-05, "loss": 1.9532, "step": 34724 }, { "epoch": 1.1203764385141688, "grad_norm": 0.3359375, "learning_rate": 1.2532044639670806e-05, "loss": 1.9711, "step": 34725 }, { "epoch": 1.1204087023679652, "grad_norm": 0.3671875, "learning_rate": 1.2531281872817763e-05, "loss": 1.9676, "step": 34726 }, { "epoch": 1.1204409662217616, "grad_norm": 0.337890625, "learning_rate": 1.2530519112526098e-05, "loss": 1.9537, "step": 34727 }, { "epoch": 1.120473230075558, "grad_norm": 0.34375, "learning_rate": 1.252975635879783e-05, "loss": 1.9489, "step": 34728 }, { "epoch": 1.1205054939293542, "grad_norm": 0.361328125, "learning_rate": 1.2528993611634992e-05, "loss": 1.9778, "step": 34729 }, { "epoch": 1.1205377577831506, "grad_norm": 0.333984375, "learning_rate": 1.2528230871039608e-05, "loss": 2.0059, "step": 34730 }, { "epoch": 1.120570021636947, "grad_norm": 0.3515625, "learning_rate": 1.252746813701371e-05, "loss": 1.9897, "step": 34731 }, { "epoch": 1.1206022854907434, "grad_norm": 0.357421875, "learning_rate": 1.2526705409559322e-05, "loss": 1.9711, "step": 34732 }, { "epoch": 1.1206345493445395, "grad_norm": 0.37890625, "learning_rate": 1.252594268867848e-05, "loss": 1.9615, "step": 34733 }, { "epoch": 1.120666813198336, "grad_norm": 0.34375, "learning_rate": 1.2525179974373198e-05, "loss": 1.974, "step": 34734 }, { "epoch": 1.1206990770521323, "grad_norm": 0.349609375, "learning_rate": 1.2524417266645506e-05, "loss": 1.9597, "step": 34735 }, { "epoch": 1.1207313409059287, "grad_norm": 0.365234375, "learning_rate": 1.2523654565497435e-05, "loss": 1.9703, "step": 34736 }, { "epoch": 1.1207636047597251, "grad_norm": 0.33203125, "learning_rate": 1.2522891870931012e-05, "loss": 1.9553, "step": 34737 }, { "epoch": 1.1207958686135213, "grad_norm": 0.3359375, "learning_rate": 1.2522129182948262e-05, "loss": 1.9721, "step": 34738 }, { "epoch": 1.1208281324673177, "grad_norm": 0.353515625, "learning_rate": 1.252136650155122e-05, "loss": 1.948, "step": 34739 }, { "epoch": 1.1208603963211141, "grad_norm": 0.341796875, "learning_rate": 1.2520603826741897e-05, "loss": 1.9605, "step": 34740 }, { "epoch": 1.1208926601749105, "grad_norm": 0.337890625, "learning_rate": 1.2519841158522333e-05, "loss": 1.979, "step": 34741 }, { "epoch": 1.1209249240287067, "grad_norm": 0.326171875, "learning_rate": 1.251907849689455e-05, "loss": 1.9633, "step": 34742 }, { "epoch": 1.120957187882503, "grad_norm": 0.3359375, "learning_rate": 1.2518315841860576e-05, "loss": 1.9608, "step": 34743 }, { "epoch": 1.1209894517362995, "grad_norm": 0.345703125, "learning_rate": 1.2517553193422437e-05, "loss": 1.9605, "step": 34744 }, { "epoch": 1.121021715590096, "grad_norm": 0.33984375, "learning_rate": 1.2516790551582168e-05, "loss": 1.9206, "step": 34745 }, { "epoch": 1.121053979443892, "grad_norm": 0.361328125, "learning_rate": 1.2516027916341783e-05, "loss": 1.966, "step": 34746 }, { "epoch": 1.1210862432976885, "grad_norm": 0.34375, "learning_rate": 1.2515265287703315e-05, "loss": 1.9779, "step": 34747 }, { "epoch": 1.1211185071514849, "grad_norm": 0.353515625, "learning_rate": 1.2514502665668793e-05, "loss": 1.9792, "step": 34748 }, { "epoch": 1.1211507710052813, "grad_norm": 0.35546875, "learning_rate": 1.251374005024024e-05, "loss": 1.9589, "step": 34749 }, { "epoch": 1.1211830348590774, "grad_norm": 0.35546875, "learning_rate": 1.2512977441419686e-05, "loss": 1.9756, "step": 34750 }, { "epoch": 1.1212152987128738, "grad_norm": 0.37890625, "learning_rate": 1.2512214839209158e-05, "loss": 1.992, "step": 34751 }, { "epoch": 1.1212475625666702, "grad_norm": 0.349609375, "learning_rate": 1.2511452243610681e-05, "loss": 1.9701, "step": 34752 }, { "epoch": 1.1212798264204666, "grad_norm": 0.34375, "learning_rate": 1.251068965462628e-05, "loss": 1.9932, "step": 34753 }, { "epoch": 1.1213120902742628, "grad_norm": 0.34375, "learning_rate": 1.2509927072257986e-05, "loss": 1.9883, "step": 34754 }, { "epoch": 1.1213443541280592, "grad_norm": 0.330078125, "learning_rate": 1.2509164496507822e-05, "loss": 1.9716, "step": 34755 }, { "epoch": 1.1213766179818556, "grad_norm": 0.34765625, "learning_rate": 1.2508401927377818e-05, "loss": 1.9644, "step": 34756 }, { "epoch": 1.121408881835652, "grad_norm": 0.359375, "learning_rate": 1.2507639364870003e-05, "loss": 1.9808, "step": 34757 }, { "epoch": 1.1214411456894484, "grad_norm": 0.337890625, "learning_rate": 1.2506876808986395e-05, "loss": 1.9731, "step": 34758 }, { "epoch": 1.1214734095432446, "grad_norm": 0.38671875, "learning_rate": 1.2506114259729027e-05, "loss": 1.9559, "step": 34759 }, { "epoch": 1.121505673397041, "grad_norm": 0.333984375, "learning_rate": 1.2505351717099924e-05, "loss": 1.9606, "step": 34760 }, { "epoch": 1.1215379372508374, "grad_norm": 0.3515625, "learning_rate": 1.2504589181101116e-05, "loss": 1.9603, "step": 34761 }, { "epoch": 1.1215702011046338, "grad_norm": 0.3359375, "learning_rate": 1.250382665173462e-05, "loss": 1.978, "step": 34762 }, { "epoch": 1.12160246495843, "grad_norm": 0.341796875, "learning_rate": 1.2503064129002476e-05, "loss": 2.0076, "step": 34763 }, { "epoch": 1.1216347288122264, "grad_norm": 0.34765625, "learning_rate": 1.250230161290671e-05, "loss": 1.9465, "step": 34764 }, { "epoch": 1.1216669926660228, "grad_norm": 0.337890625, "learning_rate": 1.2501539103449334e-05, "loss": 1.9504, "step": 34765 }, { "epoch": 1.1216992565198192, "grad_norm": 0.35546875, "learning_rate": 1.2500776600632386e-05, "loss": 1.9822, "step": 34766 }, { "epoch": 1.1217315203736153, "grad_norm": 0.341796875, "learning_rate": 1.2500014104457886e-05, "loss": 1.9488, "step": 34767 }, { "epoch": 1.1217637842274117, "grad_norm": 0.34765625, "learning_rate": 1.249925161492787e-05, "loss": 1.9885, "step": 34768 }, { "epoch": 1.1217960480812081, "grad_norm": 0.349609375, "learning_rate": 1.2498489132044356e-05, "loss": 1.9922, "step": 34769 }, { "epoch": 1.1218283119350045, "grad_norm": 0.349609375, "learning_rate": 1.2497726655809381e-05, "loss": 1.972, "step": 34770 }, { "epoch": 1.1218605757888007, "grad_norm": 0.349609375, "learning_rate": 1.249696418622496e-05, "loss": 1.9624, "step": 34771 }, { "epoch": 1.1218928396425971, "grad_norm": 0.33984375, "learning_rate": 1.2496201723293118e-05, "loss": 1.9761, "step": 34772 }, { "epoch": 1.1219251034963935, "grad_norm": 0.35546875, "learning_rate": 1.249543926701589e-05, "loss": 1.97, "step": 34773 }, { "epoch": 1.12195736735019, "grad_norm": 0.330078125, "learning_rate": 1.2494676817395302e-05, "loss": 1.9674, "step": 34774 }, { "epoch": 1.121989631203986, "grad_norm": 0.330078125, "learning_rate": 1.2493914374433377e-05, "loss": 1.9469, "step": 34775 }, { "epoch": 1.1220218950577825, "grad_norm": 0.33984375, "learning_rate": 1.2493151938132149e-05, "loss": 1.9973, "step": 34776 }, { "epoch": 1.122054158911579, "grad_norm": 0.33203125, "learning_rate": 1.2492389508493628e-05, "loss": 2.0024, "step": 34777 }, { "epoch": 1.1220864227653753, "grad_norm": 0.333984375, "learning_rate": 1.2491627085519854e-05, "loss": 1.9716, "step": 34778 }, { "epoch": 1.1221186866191717, "grad_norm": 0.32421875, "learning_rate": 1.249086466921285e-05, "loss": 1.9459, "step": 34779 }, { "epoch": 1.1221509504729679, "grad_norm": 0.3515625, "learning_rate": 1.2490102259574642e-05, "loss": 1.9593, "step": 34780 }, { "epoch": 1.1221832143267643, "grad_norm": 0.345703125, "learning_rate": 1.2489339856607256e-05, "loss": 1.9741, "step": 34781 }, { "epoch": 1.1222154781805607, "grad_norm": 0.37109375, "learning_rate": 1.2488577460312723e-05, "loss": 1.9428, "step": 34782 }, { "epoch": 1.122247742034357, "grad_norm": 0.3359375, "learning_rate": 1.248781507069306e-05, "loss": 1.9467, "step": 34783 }, { "epoch": 1.1222800058881532, "grad_norm": 0.34375, "learning_rate": 1.24870526877503e-05, "loss": 1.958, "step": 34784 }, { "epoch": 1.1223122697419496, "grad_norm": 0.345703125, "learning_rate": 1.2486290311486467e-05, "loss": 1.9684, "step": 34785 }, { "epoch": 1.122344533595746, "grad_norm": 0.36328125, "learning_rate": 1.2485527941903588e-05, "loss": 1.9817, "step": 34786 }, { "epoch": 1.1223767974495424, "grad_norm": 0.3515625, "learning_rate": 1.2484765579003688e-05, "loss": 1.9796, "step": 34787 }, { "epoch": 1.1224090613033386, "grad_norm": 0.3515625, "learning_rate": 1.2484003222788799e-05, "loss": 1.9698, "step": 34788 }, { "epoch": 1.122441325157135, "grad_norm": 0.349609375, "learning_rate": 1.2483240873260938e-05, "loss": 1.9387, "step": 34789 }, { "epoch": 1.1224735890109314, "grad_norm": 0.359375, "learning_rate": 1.2482478530422138e-05, "loss": 1.9525, "step": 34790 }, { "epoch": 1.1225058528647278, "grad_norm": 0.359375, "learning_rate": 1.2481716194274421e-05, "loss": 1.9828, "step": 34791 }, { "epoch": 1.122538116718524, "grad_norm": 0.3671875, "learning_rate": 1.2480953864819813e-05, "loss": 2.015, "step": 34792 }, { "epoch": 1.1225703805723204, "grad_norm": 0.349609375, "learning_rate": 1.2480191542060344e-05, "loss": 1.973, "step": 34793 }, { "epoch": 1.1226026444261168, "grad_norm": 0.34765625, "learning_rate": 1.2479429225998042e-05, "loss": 1.9373, "step": 34794 }, { "epoch": 1.1226349082799132, "grad_norm": 0.36328125, "learning_rate": 1.2478666916634925e-05, "loss": 1.9857, "step": 34795 }, { "epoch": 1.1226671721337094, "grad_norm": 0.34765625, "learning_rate": 1.2477904613973025e-05, "loss": 1.9543, "step": 34796 }, { "epoch": 1.1226994359875058, "grad_norm": 0.353515625, "learning_rate": 1.2477142318014366e-05, "loss": 1.9795, "step": 34797 }, { "epoch": 1.1227316998413022, "grad_norm": 0.357421875, "learning_rate": 1.2476380028760973e-05, "loss": 1.9867, "step": 34798 }, { "epoch": 1.1227639636950986, "grad_norm": 0.35546875, "learning_rate": 1.2475617746214872e-05, "loss": 1.9876, "step": 34799 }, { "epoch": 1.122796227548895, "grad_norm": 0.333984375, "learning_rate": 1.2474855470378099e-05, "loss": 1.9351, "step": 34800 }, { "epoch": 1.1228284914026911, "grad_norm": 0.353515625, "learning_rate": 1.2474093201252664e-05, "loss": 1.9856, "step": 34801 }, { "epoch": 1.1228607552564875, "grad_norm": 0.34765625, "learning_rate": 1.2473330938840604e-05, "loss": 1.9598, "step": 34802 }, { "epoch": 1.122893019110284, "grad_norm": 0.345703125, "learning_rate": 1.2472568683143938e-05, "loss": 2.0144, "step": 34803 }, { "epoch": 1.1229252829640803, "grad_norm": 0.337890625, "learning_rate": 1.2471806434164692e-05, "loss": 2.001, "step": 34804 }, { "epoch": 1.1229575468178765, "grad_norm": 0.341796875, "learning_rate": 1.24710441919049e-05, "loss": 1.9721, "step": 34805 }, { "epoch": 1.122989810671673, "grad_norm": 0.359375, "learning_rate": 1.2470281956366589e-05, "loss": 1.9752, "step": 34806 }, { "epoch": 1.1230220745254693, "grad_norm": 0.337890625, "learning_rate": 1.2469519727551772e-05, "loss": 1.9938, "step": 34807 }, { "epoch": 1.1230543383792657, "grad_norm": 0.333984375, "learning_rate": 1.2468757505462482e-05, "loss": 1.9643, "step": 34808 }, { "epoch": 1.123086602233062, "grad_norm": 0.34765625, "learning_rate": 1.2467995290100742e-05, "loss": 1.9929, "step": 34809 }, { "epoch": 1.1231188660868583, "grad_norm": 0.34375, "learning_rate": 1.2467233081468584e-05, "loss": 1.9969, "step": 34810 }, { "epoch": 1.1231511299406547, "grad_norm": 0.326171875, "learning_rate": 1.246647087956803e-05, "loss": 1.9674, "step": 34811 }, { "epoch": 1.123183393794451, "grad_norm": 0.34375, "learning_rate": 1.2465708684401112e-05, "loss": 1.9448, "step": 34812 }, { "epoch": 1.1232156576482473, "grad_norm": 0.369140625, "learning_rate": 1.246494649596984e-05, "loss": 1.9291, "step": 34813 }, { "epoch": 1.1232479215020437, "grad_norm": 0.3515625, "learning_rate": 1.2464184314276254e-05, "loss": 1.9706, "step": 34814 }, { "epoch": 1.12328018535584, "grad_norm": 0.34765625, "learning_rate": 1.2463422139322373e-05, "loss": 1.97, "step": 34815 }, { "epoch": 1.1233124492096365, "grad_norm": 0.333984375, "learning_rate": 1.2462659971110228e-05, "loss": 1.9475, "step": 34816 }, { "epoch": 1.1233447130634326, "grad_norm": 0.341796875, "learning_rate": 1.2461897809641838e-05, "loss": 1.9698, "step": 34817 }, { "epoch": 1.123376976917229, "grad_norm": 0.353515625, "learning_rate": 1.2461135654919238e-05, "loss": 1.9338, "step": 34818 }, { "epoch": 1.1234092407710254, "grad_norm": 0.33984375, "learning_rate": 1.2460373506944445e-05, "loss": 1.9759, "step": 34819 }, { "epoch": 1.1234415046248218, "grad_norm": 0.34375, "learning_rate": 1.2459611365719487e-05, "loss": 1.9854, "step": 34820 }, { "epoch": 1.1234737684786182, "grad_norm": 0.34765625, "learning_rate": 1.2458849231246392e-05, "loss": 1.9874, "step": 34821 }, { "epoch": 1.1235060323324144, "grad_norm": 0.333984375, "learning_rate": 1.2458087103527182e-05, "loss": 1.9518, "step": 34822 }, { "epoch": 1.1235382961862108, "grad_norm": 0.3515625, "learning_rate": 1.2457324982563885e-05, "loss": 1.9433, "step": 34823 }, { "epoch": 1.1235705600400072, "grad_norm": 0.34765625, "learning_rate": 1.245656286835853e-05, "loss": 1.9553, "step": 34824 }, { "epoch": 1.1236028238938036, "grad_norm": 0.353515625, "learning_rate": 1.2455800760913134e-05, "loss": 1.9533, "step": 34825 }, { "epoch": 1.1236350877475998, "grad_norm": 0.34765625, "learning_rate": 1.2455038660229728e-05, "loss": 2.0159, "step": 34826 }, { "epoch": 1.1236673516013962, "grad_norm": 0.357421875, "learning_rate": 1.2454276566310336e-05, "loss": 2.0001, "step": 34827 }, { "epoch": 1.1236996154551926, "grad_norm": 0.34765625, "learning_rate": 1.2453514479156984e-05, "loss": 1.9532, "step": 34828 }, { "epoch": 1.123731879308989, "grad_norm": 0.341796875, "learning_rate": 1.24527523987717e-05, "loss": 1.9593, "step": 34829 }, { "epoch": 1.1237641431627852, "grad_norm": 0.345703125, "learning_rate": 1.2451990325156509e-05, "loss": 2.0007, "step": 34830 }, { "epoch": 1.1237964070165816, "grad_norm": 0.337890625, "learning_rate": 1.245122825831343e-05, "loss": 1.9899, "step": 34831 }, { "epoch": 1.123828670870378, "grad_norm": 0.330078125, "learning_rate": 1.2450466198244494e-05, "loss": 1.98, "step": 34832 }, { "epoch": 1.1238609347241744, "grad_norm": 0.345703125, "learning_rate": 1.2449704144951726e-05, "loss": 1.9958, "step": 34833 }, { "epoch": 1.1238931985779705, "grad_norm": 0.33203125, "learning_rate": 1.244894209843715e-05, "loss": 1.949, "step": 34834 }, { "epoch": 1.123925462431767, "grad_norm": 0.34765625, "learning_rate": 1.2448180058702793e-05, "loss": 1.9607, "step": 34835 }, { "epoch": 1.1239577262855633, "grad_norm": 0.466796875, "learning_rate": 1.2447418025750684e-05, "loss": 2.0561, "step": 34836 }, { "epoch": 1.1239899901393597, "grad_norm": 0.4375, "learning_rate": 1.2446655999582837e-05, "loss": 2.0093, "step": 34837 }, { "epoch": 1.124022253993156, "grad_norm": 0.37109375, "learning_rate": 1.2445893980201287e-05, "loss": 2.0321, "step": 34838 }, { "epoch": 1.1240545178469523, "grad_norm": 0.4375, "learning_rate": 1.2445131967608055e-05, "loss": 2.0433, "step": 34839 }, { "epoch": 1.1240867817007487, "grad_norm": 0.357421875, "learning_rate": 1.2444369961805169e-05, "loss": 2.0236, "step": 34840 }, { "epoch": 1.1241190455545451, "grad_norm": 0.4140625, "learning_rate": 1.2443607962794648e-05, "loss": 2.0373, "step": 34841 }, { "epoch": 1.1241513094083415, "grad_norm": 0.36328125, "learning_rate": 1.2442845970578532e-05, "loss": 2.0427, "step": 34842 }, { "epoch": 1.1241835732621377, "grad_norm": 0.380859375, "learning_rate": 1.2442083985158833e-05, "loss": 2.0426, "step": 34843 }, { "epoch": 1.124215837115934, "grad_norm": 0.353515625, "learning_rate": 1.2441322006537577e-05, "loss": 2.0547, "step": 34844 }, { "epoch": 1.1242481009697305, "grad_norm": 0.39453125, "learning_rate": 1.2440560034716789e-05, "loss": 2.0609, "step": 34845 }, { "epoch": 1.124280364823527, "grad_norm": 0.353515625, "learning_rate": 1.2439798069698501e-05, "loss": 2.0367, "step": 34846 }, { "epoch": 1.124312628677323, "grad_norm": 0.353515625, "learning_rate": 1.2439036111484734e-05, "loss": 2.0648, "step": 34847 }, { "epoch": 1.1243448925311195, "grad_norm": 0.37109375, "learning_rate": 1.2438274160077519e-05, "loss": 2.0516, "step": 34848 }, { "epoch": 1.1243771563849159, "grad_norm": 0.34375, "learning_rate": 1.2437512215478872e-05, "loss": 2.0552, "step": 34849 }, { "epoch": 1.1244094202387123, "grad_norm": 0.3671875, "learning_rate": 1.2436750277690815e-05, "loss": 2.0489, "step": 34850 }, { "epoch": 1.1244416840925084, "grad_norm": 0.349609375, "learning_rate": 1.2435988346715384e-05, "loss": 2.0867, "step": 34851 }, { "epoch": 1.1244739479463048, "grad_norm": 0.388671875, "learning_rate": 1.2435226422554599e-05, "loss": 2.1163, "step": 34852 }, { "epoch": 1.1245062118001012, "grad_norm": 0.349609375, "learning_rate": 1.2434464505210486e-05, "loss": 2.1038, "step": 34853 }, { "epoch": 1.1245384756538976, "grad_norm": 0.3828125, "learning_rate": 1.2433702594685078e-05, "loss": 2.0763, "step": 34854 }, { "epoch": 1.1245707395076938, "grad_norm": 0.34765625, "learning_rate": 1.2432940690980381e-05, "loss": 2.1284, "step": 34855 }, { "epoch": 1.1246030033614902, "grad_norm": 0.369140625, "learning_rate": 1.2432178794098434e-05, "loss": 2.1076, "step": 34856 }, { "epoch": 1.1246352672152866, "grad_norm": 0.349609375, "learning_rate": 1.2431416904041258e-05, "loss": 2.1212, "step": 34857 }, { "epoch": 1.124667531069083, "grad_norm": 0.380859375, "learning_rate": 1.243065502081088e-05, "loss": 2.1494, "step": 34858 }, { "epoch": 1.1246997949228792, "grad_norm": 0.357421875, "learning_rate": 1.2429893144409323e-05, "loss": 2.1282, "step": 34859 }, { "epoch": 1.1247320587766756, "grad_norm": 0.369140625, "learning_rate": 1.2429131274838616e-05, "loss": 2.1386, "step": 34860 }, { "epoch": 1.124764322630472, "grad_norm": 0.361328125, "learning_rate": 1.2428369412100778e-05, "loss": 2.1164, "step": 34861 }, { "epoch": 1.1247965864842684, "grad_norm": 0.36328125, "learning_rate": 1.2427607556197836e-05, "loss": 2.1474, "step": 34862 }, { "epoch": 1.1248288503380648, "grad_norm": 0.35546875, "learning_rate": 1.2426845707131814e-05, "loss": 2.1216, "step": 34863 }, { "epoch": 1.124861114191861, "grad_norm": 0.365234375, "learning_rate": 1.2426083864904737e-05, "loss": 2.1103, "step": 34864 }, { "epoch": 1.1248933780456574, "grad_norm": 0.357421875, "learning_rate": 1.2425322029518635e-05, "loss": 2.1367, "step": 34865 }, { "epoch": 1.1249256418994538, "grad_norm": 0.349609375, "learning_rate": 1.2424560200975531e-05, "loss": 2.081, "step": 34866 }, { "epoch": 1.1249579057532502, "grad_norm": 0.357421875, "learning_rate": 1.2423798379277441e-05, "loss": 2.1365, "step": 34867 }, { "epoch": 1.1249901696070463, "grad_norm": 0.345703125, "learning_rate": 1.24230365644264e-05, "loss": 2.0732, "step": 34868 }, { "epoch": 1.1250224334608427, "grad_norm": 0.353515625, "learning_rate": 1.2422274756424427e-05, "loss": 2.1261, "step": 34869 }, { "epoch": 1.1250546973146391, "grad_norm": 0.35546875, "learning_rate": 1.2421512955273549e-05, "loss": 2.1266, "step": 34870 }, { "epoch": 1.1250869611684355, "grad_norm": 0.341796875, "learning_rate": 1.2420751160975792e-05, "loss": 2.1309, "step": 34871 }, { "epoch": 1.1251192250222317, "grad_norm": 0.35546875, "learning_rate": 1.2419989373533181e-05, "loss": 2.126, "step": 34872 }, { "epoch": 1.1251514888760281, "grad_norm": 0.357421875, "learning_rate": 1.2419227592947736e-05, "loss": 2.1179, "step": 34873 }, { "epoch": 1.1251837527298245, "grad_norm": 0.35546875, "learning_rate": 1.2418465819221483e-05, "loss": 2.1204, "step": 34874 }, { "epoch": 1.125216016583621, "grad_norm": 0.369140625, "learning_rate": 1.2417704052356452e-05, "loss": 2.1227, "step": 34875 }, { "epoch": 1.1252482804374173, "grad_norm": 0.359375, "learning_rate": 1.2416942292354661e-05, "loss": 2.1382, "step": 34876 }, { "epoch": 1.1252805442912135, "grad_norm": 0.357421875, "learning_rate": 1.2416180539218137e-05, "loss": 2.144, "step": 34877 }, { "epoch": 1.12531280814501, "grad_norm": 0.333984375, "learning_rate": 1.241541879294891e-05, "loss": 2.1149, "step": 34878 }, { "epoch": 1.1253450719988063, "grad_norm": 0.34375, "learning_rate": 1.2414657053548997e-05, "loss": 2.1202, "step": 34879 }, { "epoch": 1.1253773358526025, "grad_norm": 0.357421875, "learning_rate": 1.2413895321020423e-05, "loss": 2.1081, "step": 34880 }, { "epoch": 1.1254095997063989, "grad_norm": 0.357421875, "learning_rate": 1.2413133595365217e-05, "loss": 2.1183, "step": 34881 }, { "epoch": 1.1254418635601953, "grad_norm": 0.341796875, "learning_rate": 1.2412371876585394e-05, "loss": 2.1259, "step": 34882 }, { "epoch": 1.1254741274139917, "grad_norm": 0.34765625, "learning_rate": 1.2411610164682994e-05, "loss": 2.1208, "step": 34883 }, { "epoch": 1.125506391267788, "grad_norm": 0.33984375, "learning_rate": 1.2410848459660036e-05, "loss": 2.1385, "step": 34884 }, { "epoch": 1.1255386551215842, "grad_norm": 0.337890625, "learning_rate": 1.2410086761518537e-05, "loss": 2.1297, "step": 34885 }, { "epoch": 1.1255709189753806, "grad_norm": 0.345703125, "learning_rate": 1.2409325070260526e-05, "loss": 2.1144, "step": 34886 }, { "epoch": 1.125603182829177, "grad_norm": 0.3515625, "learning_rate": 1.2408563385888025e-05, "loss": 2.1265, "step": 34887 }, { "epoch": 1.1256354466829734, "grad_norm": 0.33984375, "learning_rate": 1.2407801708403062e-05, "loss": 2.1032, "step": 34888 }, { "epoch": 1.1256677105367696, "grad_norm": 0.34375, "learning_rate": 1.2407040037807662e-05, "loss": 2.1316, "step": 34889 }, { "epoch": 1.125699974390566, "grad_norm": 0.34765625, "learning_rate": 1.2406278374103853e-05, "loss": 2.1137, "step": 34890 }, { "epoch": 1.1257322382443624, "grad_norm": 0.345703125, "learning_rate": 1.240551671729365e-05, "loss": 2.0969, "step": 34891 }, { "epoch": 1.1257645020981588, "grad_norm": 0.357421875, "learning_rate": 1.2404755067379074e-05, "loss": 2.0851, "step": 34892 }, { "epoch": 1.125796765951955, "grad_norm": 0.357421875, "learning_rate": 1.2403993424362164e-05, "loss": 2.1112, "step": 34893 }, { "epoch": 1.1258290298057514, "grad_norm": 0.380859375, "learning_rate": 1.2403231788244935e-05, "loss": 2.1162, "step": 34894 }, { "epoch": 1.1258612936595478, "grad_norm": 0.341796875, "learning_rate": 1.2402470159029414e-05, "loss": 2.1179, "step": 34895 }, { "epoch": 1.1258935575133442, "grad_norm": 0.369140625, "learning_rate": 1.2401708536717625e-05, "loss": 2.0858, "step": 34896 }, { "epoch": 1.1259258213671406, "grad_norm": 0.361328125, "learning_rate": 1.2400946921311593e-05, "loss": 2.1162, "step": 34897 }, { "epoch": 1.1259580852209368, "grad_norm": 0.34765625, "learning_rate": 1.2400185312813341e-05, "loss": 2.128, "step": 34898 }, { "epoch": 1.1259903490747332, "grad_norm": 0.357421875, "learning_rate": 1.2399423711224892e-05, "loss": 2.134, "step": 34899 }, { "epoch": 1.1260226129285296, "grad_norm": 0.37109375, "learning_rate": 1.239866211654827e-05, "loss": 2.1119, "step": 34900 } ], "logging_steps": 1.0, "max_steps": 61988, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.7781083387608433e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }