{ "best_metric": 0.9159200551343901, "best_model_checkpoint": "videomae-base-finetuned-kinetics-fight_22-01-2024/checkpoint-5544", "epoch": 19.048798988621996, "eval_steps": 500, "global_step": 15820, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.8928611278533936, "learning_rate": 3.1605562579013902e-09, "loss": 0.6651, "step": 10 }, { "epoch": 0.0, "grad_norm": 4.591193199157715, "learning_rate": 6.3211125158027805e-09, "loss": 0.6776, "step": 20 }, { "epoch": 0.0, "grad_norm": 3.7871010303497314, "learning_rate": 9.481668773704172e-09, "loss": 0.6662, "step": 30 }, { "epoch": 0.0, "grad_norm": 4.225106716156006, "learning_rate": 1.2642225031605561e-08, "loss": 0.6809, "step": 40 }, { "epoch": 0.0, "grad_norm": 3.702281951904297, "learning_rate": 1.580278128950695e-08, "loss": 0.6831, "step": 50 }, { "epoch": 0.0, "grad_norm": 4.061862945556641, "learning_rate": 1.8963337547408344e-08, "loss": 0.6734, "step": 60 }, { "epoch": 0.0, "grad_norm": 3.502265691757202, "learning_rate": 2.2123893805309735e-08, "loss": 0.6698, "step": 70 }, { "epoch": 0.01, "grad_norm": 4.162473678588867, "learning_rate": 2.5284450063211122e-08, "loss": 0.6663, "step": 80 }, { "epoch": 0.01, "grad_norm": 4.365663528442383, "learning_rate": 2.8445006321112516e-08, "loss": 0.651, "step": 90 }, { "epoch": 0.01, "grad_norm": 3.814967393875122, "learning_rate": 3.16055625790139e-08, "loss": 0.6768, "step": 100 }, { "epoch": 0.01, "grad_norm": 3.5109472274780273, "learning_rate": 3.47661188369153e-08, "loss": 0.6606, "step": 110 }, { "epoch": 0.01, "grad_norm": 3.6083860397338867, "learning_rate": 3.792667509481669e-08, "loss": 0.6666, "step": 120 }, { "epoch": 0.01, "grad_norm": 3.944204807281494, "learning_rate": 4.108723135271808e-08, "loss": 0.6877, "step": 130 }, { "epoch": 0.01, "grad_norm": 3.932798147201538, "learning_rate": 4.424778761061947e-08, "loss": 0.6805, "step": 140 }, { "epoch": 0.01, "grad_norm": 4.184200286865234, "learning_rate": 4.740834386852085e-08, "loss": 0.6797, "step": 150 }, { "epoch": 0.01, "grad_norm": 3.581977367401123, "learning_rate": 5.0568900126422244e-08, "loss": 0.6769, "step": 160 }, { "epoch": 0.01, "grad_norm": 3.6752045154571533, "learning_rate": 5.372945638432364e-08, "loss": 0.6693, "step": 170 }, { "epoch": 0.01, "grad_norm": 3.25941801071167, "learning_rate": 5.689001264222503e-08, "loss": 0.6578, "step": 180 }, { "epoch": 0.01, "grad_norm": 3.624237537384033, "learning_rate": 6.005056890012642e-08, "loss": 0.6749, "step": 190 }, { "epoch": 0.01, "grad_norm": 3.2375965118408203, "learning_rate": 6.32111251580278e-08, "loss": 0.669, "step": 200 }, { "epoch": 0.01, "grad_norm": 3.6228995323181152, "learning_rate": 6.63716814159292e-08, "loss": 0.6759, "step": 210 }, { "epoch": 0.01, "grad_norm": 3.5825822353363037, "learning_rate": 6.95322376738306e-08, "loss": 0.6649, "step": 220 }, { "epoch": 0.01, "grad_norm": 3.498684883117676, "learning_rate": 7.269279393173199e-08, "loss": 0.6779, "step": 230 }, { "epoch": 0.02, "grad_norm": 3.4103477001190186, "learning_rate": 7.585335018963338e-08, "loss": 0.6515, "step": 240 }, { "epoch": 0.02, "grad_norm": 3.7684435844421387, "learning_rate": 7.901390644753477e-08, "loss": 0.6667, "step": 250 }, { "epoch": 0.02, "grad_norm": 3.769439935684204, "learning_rate": 8.217446270543616e-08, "loss": 0.669, "step": 260 }, { "epoch": 0.02, "grad_norm": 3.855694532394409, "learning_rate": 8.533501896333755e-08, "loss": 0.6679, "step": 270 }, { "epoch": 0.02, "grad_norm": 3.201188325881958, "learning_rate": 8.849557522123894e-08, "loss": 0.6557, "step": 280 }, { "epoch": 0.02, "grad_norm": 3.4259488582611084, "learning_rate": 9.165613147914032e-08, "loss": 0.6516, "step": 290 }, { "epoch": 0.02, "grad_norm": 3.6246564388275146, "learning_rate": 9.48166877370417e-08, "loss": 0.6627, "step": 300 }, { "epoch": 0.02, "grad_norm": 3.2031607627868652, "learning_rate": 9.79772439949431e-08, "loss": 0.642, "step": 310 }, { "epoch": 0.02, "grad_norm": 2.994783878326416, "learning_rate": 1.0113780025284449e-07, "loss": 0.6575, "step": 320 }, { "epoch": 0.02, "grad_norm": 3.599496841430664, "learning_rate": 1.0429835651074588e-07, "loss": 0.659, "step": 330 }, { "epoch": 0.02, "grad_norm": 3.7801012992858887, "learning_rate": 1.0745891276864728e-07, "loss": 0.6569, "step": 340 }, { "epoch": 0.02, "grad_norm": 4.021647930145264, "learning_rate": 1.1061946902654867e-07, "loss": 0.6521, "step": 350 }, { "epoch": 0.02, "grad_norm": 4.366917133331299, "learning_rate": 1.1378002528445006e-07, "loss": 0.6567, "step": 360 }, { "epoch": 0.02, "grad_norm": 3.160220146179199, "learning_rate": 1.1694058154235145e-07, "loss": 0.6395, "step": 370 }, { "epoch": 0.02, "grad_norm": 3.4906527996063232, "learning_rate": 1.2010113780025284e-07, "loss": 0.661, "step": 380 }, { "epoch": 0.02, "grad_norm": 3.2563610076904297, "learning_rate": 1.2326169405815422e-07, "loss": 0.6596, "step": 390 }, { "epoch": 0.03, "grad_norm": 4.387155532836914, "learning_rate": 1.264222503160556e-07, "loss": 0.647, "step": 400 }, { "epoch": 0.03, "grad_norm": 3.4603726863861084, "learning_rate": 1.29582806573957e-07, "loss": 0.6556, "step": 410 }, { "epoch": 0.03, "grad_norm": 3.5702872276306152, "learning_rate": 1.327433628318584e-07, "loss": 0.6675, "step": 420 }, { "epoch": 0.03, "grad_norm": 3.708195924758911, "learning_rate": 1.3590391908975978e-07, "loss": 0.6534, "step": 430 }, { "epoch": 0.03, "grad_norm": 3.8094639778137207, "learning_rate": 1.390644753476612e-07, "loss": 0.665, "step": 440 }, { "epoch": 0.03, "grad_norm": 3.1821417808532715, "learning_rate": 1.4222503160556257e-07, "loss": 0.6342, "step": 450 }, { "epoch": 0.03, "grad_norm": 3.5575406551361084, "learning_rate": 1.4538558786346397e-07, "loss": 0.6612, "step": 460 }, { "epoch": 0.03, "grad_norm": 3.4878993034362793, "learning_rate": 1.4854614412136535e-07, "loss": 0.6424, "step": 470 }, { "epoch": 0.03, "grad_norm": 3.4453928470611572, "learning_rate": 1.5170670037926675e-07, "loss": 0.6248, "step": 480 }, { "epoch": 0.03, "grad_norm": 3.1630289554595947, "learning_rate": 1.5486725663716813e-07, "loss": 0.6413, "step": 490 }, { "epoch": 0.03, "grad_norm": 3.8245644569396973, "learning_rate": 1.5802781289506953e-07, "loss": 0.6328, "step": 500 }, { "epoch": 0.03, "grad_norm": 3.973332643508911, "learning_rate": 1.611883691529709e-07, "loss": 0.6359, "step": 510 }, { "epoch": 0.03, "grad_norm": 3.4244556427001953, "learning_rate": 1.6434892541087231e-07, "loss": 0.6361, "step": 520 }, { "epoch": 0.03, "grad_norm": 3.6331629753112793, "learning_rate": 1.675094816687737e-07, "loss": 0.6453, "step": 530 }, { "epoch": 0.03, "grad_norm": 3.510688066482544, "learning_rate": 1.706700379266751e-07, "loss": 0.6496, "step": 540 }, { "epoch": 0.03, "grad_norm": 3.3321988582611084, "learning_rate": 1.738305941845765e-07, "loss": 0.6474, "step": 550 }, { "epoch": 0.04, "grad_norm": 3.2703301906585693, "learning_rate": 1.7699115044247788e-07, "loss": 0.64, "step": 560 }, { "epoch": 0.04, "grad_norm": 2.9783852100372314, "learning_rate": 1.8015170670037928e-07, "loss": 0.6006, "step": 570 }, { "epoch": 0.04, "grad_norm": 3.2926268577575684, "learning_rate": 1.8331226295828063e-07, "loss": 0.6407, "step": 580 }, { "epoch": 0.04, "grad_norm": 3.674591541290283, "learning_rate": 1.8647281921618204e-07, "loss": 0.6287, "step": 590 }, { "epoch": 0.04, "grad_norm": 4.331298351287842, "learning_rate": 1.896333754740834e-07, "loss": 0.6284, "step": 600 }, { "epoch": 0.04, "grad_norm": 3.805997848510742, "learning_rate": 1.9279393173198482e-07, "loss": 0.6499, "step": 610 }, { "epoch": 0.04, "grad_norm": 3.538834571838379, "learning_rate": 1.959544879898862e-07, "loss": 0.615, "step": 620 }, { "epoch": 0.04, "grad_norm": 3.613908290863037, "learning_rate": 1.991150442477876e-07, "loss": 0.6094, "step": 630 }, { "epoch": 0.04, "grad_norm": 3.2760069370269775, "learning_rate": 2.0227560050568898e-07, "loss": 0.6305, "step": 640 }, { "epoch": 0.04, "grad_norm": 3.5114917755126953, "learning_rate": 2.0543615676359038e-07, "loss": 0.6072, "step": 650 }, { "epoch": 0.04, "grad_norm": 3.9394311904907227, "learning_rate": 2.0859671302149176e-07, "loss": 0.6405, "step": 660 }, { "epoch": 0.04, "grad_norm": 3.8021371364593506, "learning_rate": 2.1175726927939316e-07, "loss": 0.6207, "step": 670 }, { "epoch": 0.04, "grad_norm": 3.225693464279175, "learning_rate": 2.1491782553729456e-07, "loss": 0.5956, "step": 680 }, { "epoch": 0.04, "grad_norm": 3.971768856048584, "learning_rate": 2.1807838179519594e-07, "loss": 0.6178, "step": 690 }, { "epoch": 0.04, "grad_norm": 3.5233986377716064, "learning_rate": 2.2123893805309735e-07, "loss": 0.5929, "step": 700 }, { "epoch": 0.04, "grad_norm": 4.2727766036987305, "learning_rate": 2.2439949431099872e-07, "loss": 0.5976, "step": 710 }, { "epoch": 0.05, "grad_norm": 2.9992125034332275, "learning_rate": 2.2756005056890013e-07, "loss": 0.6301, "step": 720 }, { "epoch": 0.05, "grad_norm": 4.434526443481445, "learning_rate": 2.307206068268015e-07, "loss": 0.6303, "step": 730 }, { "epoch": 0.05, "grad_norm": 3.7031564712524414, "learning_rate": 2.338811630847029e-07, "loss": 0.6173, "step": 740 }, { "epoch": 0.05, "grad_norm": 3.8267970085144043, "learning_rate": 2.3704171934260429e-07, "loss": 0.6051, "step": 750 }, { "epoch": 0.05, "grad_norm": 3.855449914932251, "learning_rate": 2.402022756005057e-07, "loss": 0.5935, "step": 760 }, { "epoch": 0.05, "grad_norm": 3.734497547149658, "learning_rate": 2.4336283185840704e-07, "loss": 0.5994, "step": 770 }, { "epoch": 0.05, "grad_norm": 3.817833423614502, "learning_rate": 2.4652338811630844e-07, "loss": 0.5957, "step": 780 }, { "epoch": 0.05, "grad_norm": 3.536569356918335, "learning_rate": 2.4968394437420985e-07, "loss": 0.6066, "step": 790 }, { "epoch": 0.05, "eval_accuracy": 0.7508614748449345, "eval_loss": 0.5968945026397705, "eval_precision": 0.781733746130031, "eval_recall": 0.6960716747070985, "eval_runtime": 689.8526, "eval_samples_per_second": 4.207, "eval_steps_per_second": 0.302, "step": 792 }, { "epoch": 1.0, "grad_norm": 3.690152883529663, "learning_rate": 2.528445006321112e-07, "loss": 0.6079, "step": 800 }, { "epoch": 1.0, "grad_norm": 3.60972261428833, "learning_rate": 2.560050568900126e-07, "loss": 0.5929, "step": 810 }, { "epoch": 1.0, "grad_norm": 4.441210746765137, "learning_rate": 2.59165613147914e-07, "loss": 0.5968, "step": 820 }, { "epoch": 1.0, "grad_norm": 3.974269151687622, "learning_rate": 2.623261694058154e-07, "loss": 0.5669, "step": 830 }, { "epoch": 1.0, "grad_norm": 4.74702262878418, "learning_rate": 2.654867256637168e-07, "loss": 0.5931, "step": 840 }, { "epoch": 1.0, "grad_norm": 3.494053363800049, "learning_rate": 2.6864728192161817e-07, "loss": 0.5555, "step": 850 }, { "epoch": 1.0, "grad_norm": 3.8187291622161865, "learning_rate": 2.7180783817951957e-07, "loss": 0.5748, "step": 860 }, { "epoch": 1.0, "grad_norm": 4.1680073738098145, "learning_rate": 2.7496839443742097e-07, "loss": 0.5547, "step": 870 }, { "epoch": 1.01, "grad_norm": 5.171167373657227, "learning_rate": 2.781289506953224e-07, "loss": 0.566, "step": 880 }, { "epoch": 1.01, "grad_norm": 3.8294074535369873, "learning_rate": 2.8128950695322373e-07, "loss": 0.5522, "step": 890 }, { "epoch": 1.01, "grad_norm": 3.33899188041687, "learning_rate": 2.8445006321112513e-07, "loss": 0.5416, "step": 900 }, { "epoch": 1.01, "grad_norm": 3.288118839263916, "learning_rate": 2.8761061946902654e-07, "loss": 0.5614, "step": 910 }, { "epoch": 1.01, "grad_norm": 3.70455265045166, "learning_rate": 2.9077117572692794e-07, "loss": 0.5498, "step": 920 }, { "epoch": 1.01, "grad_norm": 4.186919689178467, "learning_rate": 2.939317319848293e-07, "loss": 0.5234, "step": 930 }, { "epoch": 1.01, "grad_norm": 3.5319950580596924, "learning_rate": 2.970922882427307e-07, "loss": 0.5669, "step": 940 }, { "epoch": 1.01, "grad_norm": 3.688931465148926, "learning_rate": 3.002528445006321e-07, "loss": 0.5436, "step": 950 }, { "epoch": 1.01, "grad_norm": 3.5737836360931396, "learning_rate": 3.034134007585335e-07, "loss": 0.5304, "step": 960 }, { "epoch": 1.01, "grad_norm": 4.780731201171875, "learning_rate": 3.065739570164349e-07, "loss": 0.5385, "step": 970 }, { "epoch": 1.01, "grad_norm": 4.19320011138916, "learning_rate": 3.0973451327433626e-07, "loss": 0.5676, "step": 980 }, { "epoch": 1.01, "grad_norm": 4.2956156730651855, "learning_rate": 3.1289506953223766e-07, "loss": 0.5536, "step": 990 }, { "epoch": 1.01, "grad_norm": 3.5980467796325684, "learning_rate": 3.1605562579013907e-07, "loss": 0.5559, "step": 1000 }, { "epoch": 1.01, "grad_norm": 4.966118812561035, "learning_rate": 3.1921618204804047e-07, "loss": 0.5295, "step": 1010 }, { "epoch": 1.01, "grad_norm": 4.608511447906494, "learning_rate": 3.223767383059418e-07, "loss": 0.5364, "step": 1020 }, { "epoch": 1.02, "grad_norm": 4.232746124267578, "learning_rate": 3.255372945638432e-07, "loss": 0.5418, "step": 1030 }, { "epoch": 1.02, "grad_norm": 4.318231582641602, "learning_rate": 3.2869785082174463e-07, "loss": 0.5354, "step": 1040 }, { "epoch": 1.02, "grad_norm": 3.639399528503418, "learning_rate": 3.3185840707964603e-07, "loss": 0.5352, "step": 1050 }, { "epoch": 1.02, "grad_norm": 4.57344388961792, "learning_rate": 3.350189633375474e-07, "loss": 0.5558, "step": 1060 }, { "epoch": 1.02, "grad_norm": 4.283326625823975, "learning_rate": 3.381795195954488e-07, "loss": 0.4985, "step": 1070 }, { "epoch": 1.02, "grad_norm": 4.391725063323975, "learning_rate": 3.413400758533502e-07, "loss": 0.5353, "step": 1080 }, { "epoch": 1.02, "grad_norm": 3.476855754852295, "learning_rate": 3.445006321112516e-07, "loss": 0.5046, "step": 1090 }, { "epoch": 1.02, "grad_norm": 4.116981506347656, "learning_rate": 3.47661188369153e-07, "loss": 0.5461, "step": 1100 }, { "epoch": 1.02, "grad_norm": 4.8679351806640625, "learning_rate": 3.5082174462705435e-07, "loss": 0.5246, "step": 1110 }, { "epoch": 1.02, "grad_norm": 3.3669674396514893, "learning_rate": 3.5398230088495575e-07, "loss": 0.5058, "step": 1120 }, { "epoch": 1.02, "grad_norm": 4.285962104797363, "learning_rate": 3.5714285714285716e-07, "loss": 0.5084, "step": 1130 }, { "epoch": 1.02, "grad_norm": 4.696875095367432, "learning_rate": 3.6030341340075856e-07, "loss": 0.515, "step": 1140 }, { "epoch": 1.02, "grad_norm": 4.208799839019775, "learning_rate": 3.634639696586599e-07, "loss": 0.4993, "step": 1150 }, { "epoch": 1.02, "grad_norm": 4.399754047393799, "learning_rate": 3.6662452591656126e-07, "loss": 0.5235, "step": 1160 }, { "epoch": 1.02, "grad_norm": 3.940359354019165, "learning_rate": 3.6978508217446267e-07, "loss": 0.504, "step": 1170 }, { "epoch": 1.02, "grad_norm": 4.979330062866211, "learning_rate": 3.7294563843236407e-07, "loss": 0.495, "step": 1180 }, { "epoch": 1.03, "grad_norm": 2.9542107582092285, "learning_rate": 3.761061946902654e-07, "loss": 0.4807, "step": 1190 }, { "epoch": 1.03, "grad_norm": 4.393192291259766, "learning_rate": 3.792667509481668e-07, "loss": 0.5213, "step": 1200 }, { "epoch": 1.03, "grad_norm": 3.378692388534546, "learning_rate": 3.8242730720606823e-07, "loss": 0.4902, "step": 1210 }, { "epoch": 1.03, "grad_norm": 3.300856113433838, "learning_rate": 3.8558786346396963e-07, "loss": 0.4372, "step": 1220 }, { "epoch": 1.03, "grad_norm": 4.249393939971924, "learning_rate": 3.8874841972187104e-07, "loss": 0.4631, "step": 1230 }, { "epoch": 1.03, "grad_norm": 4.513766765594482, "learning_rate": 3.919089759797724e-07, "loss": 0.4688, "step": 1240 }, { "epoch": 1.03, "grad_norm": 4.223058700561523, "learning_rate": 3.950695322376738e-07, "loss": 0.4555, "step": 1250 }, { "epoch": 1.03, "grad_norm": 3.828815221786499, "learning_rate": 3.982300884955752e-07, "loss": 0.457, "step": 1260 }, { "epoch": 1.03, "grad_norm": 5.211395740509033, "learning_rate": 4.013906447534766e-07, "loss": 0.445, "step": 1270 }, { "epoch": 1.03, "grad_norm": 3.5266273021698, "learning_rate": 4.0455120101137795e-07, "loss": 0.4617, "step": 1280 }, { "epoch": 1.03, "grad_norm": 7.244021415710449, "learning_rate": 4.0771175726927935e-07, "loss": 0.4428, "step": 1290 }, { "epoch": 1.03, "grad_norm": 4.632552623748779, "learning_rate": 4.1087231352718076e-07, "loss": 0.3966, "step": 1300 }, { "epoch": 1.03, "grad_norm": 3.747267723083496, "learning_rate": 4.1403286978508216e-07, "loss": 0.4541, "step": 1310 }, { "epoch": 1.03, "grad_norm": 4.943673610687256, "learning_rate": 4.171934260429835e-07, "loss": 0.468, "step": 1320 }, { "epoch": 1.03, "grad_norm": 4.491810321807861, "learning_rate": 4.203539823008849e-07, "loss": 0.448, "step": 1330 }, { "epoch": 1.03, "grad_norm": 5.789212703704834, "learning_rate": 4.235145385587863e-07, "loss": 0.4362, "step": 1340 }, { "epoch": 1.04, "grad_norm": 6.113379955291748, "learning_rate": 4.266750948166877e-07, "loss": 0.4616, "step": 1350 }, { "epoch": 1.04, "grad_norm": 4.956961154937744, "learning_rate": 4.2983565107458913e-07, "loss": 0.4393, "step": 1360 }, { "epoch": 1.04, "grad_norm": 7.000941276550293, "learning_rate": 4.329962073324905e-07, "loss": 0.435, "step": 1370 }, { "epoch": 1.04, "grad_norm": 4.396864891052246, "learning_rate": 4.361567635903919e-07, "loss": 0.4303, "step": 1380 }, { "epoch": 1.04, "grad_norm": 4.85935640335083, "learning_rate": 4.393173198482933e-07, "loss": 0.4136, "step": 1390 }, { "epoch": 1.04, "grad_norm": 4.8949103355407715, "learning_rate": 4.424778761061947e-07, "loss": 0.4246, "step": 1400 }, { "epoch": 1.04, "grad_norm": 5.2272047996521, "learning_rate": 4.4563843236409604e-07, "loss": 0.427, "step": 1410 }, { "epoch": 1.04, "grad_norm": 5.332298755645752, "learning_rate": 4.4879898862199745e-07, "loss": 0.4183, "step": 1420 }, { "epoch": 1.04, "grad_norm": 4.333071708679199, "learning_rate": 4.5195954487989885e-07, "loss": 0.3873, "step": 1430 }, { "epoch": 1.04, "grad_norm": 6.237975597381592, "learning_rate": 4.5512010113780025e-07, "loss": 0.4298, "step": 1440 }, { "epoch": 1.04, "grad_norm": 4.731710910797119, "learning_rate": 4.582806573957016e-07, "loss": 0.3811, "step": 1450 }, { "epoch": 1.04, "grad_norm": 4.504519462585449, "learning_rate": 4.61441213653603e-07, "loss": 0.396, "step": 1460 }, { "epoch": 1.04, "grad_norm": 6.601734638214111, "learning_rate": 4.646017699115044e-07, "loss": 0.3893, "step": 1470 }, { "epoch": 1.04, "grad_norm": 13.606582641601562, "learning_rate": 4.677623261694058e-07, "loss": 0.4197, "step": 1480 }, { "epoch": 1.04, "grad_norm": 5.92011833190918, "learning_rate": 4.709228824273072e-07, "loss": 0.4237, "step": 1490 }, { "epoch": 1.04, "grad_norm": 5.502463340759277, "learning_rate": 4.7408343868520857e-07, "loss": 0.3962, "step": 1500 }, { "epoch": 1.05, "grad_norm": 7.764233589172363, "learning_rate": 4.7724399494311e-07, "loss": 0.4113, "step": 1510 }, { "epoch": 1.05, "grad_norm": 6.509207248687744, "learning_rate": 4.804045512010114e-07, "loss": 0.3753, "step": 1520 }, { "epoch": 1.05, "grad_norm": 13.51192569732666, "learning_rate": 4.835651074589128e-07, "loss": 0.4649, "step": 1530 }, { "epoch": 1.05, "grad_norm": 8.70686149597168, "learning_rate": 4.867256637168141e-07, "loss": 0.3656, "step": 1540 }, { "epoch": 1.05, "grad_norm": 4.622733116149902, "learning_rate": 4.898862199747155e-07, "loss": 0.3303, "step": 1550 }, { "epoch": 1.05, "grad_norm": 6.864151954650879, "learning_rate": 4.930467762326169e-07, "loss": 0.4072, "step": 1560 }, { "epoch": 1.05, "grad_norm": 5.3761820793151855, "learning_rate": 4.962073324905183e-07, "loss": 0.3846, "step": 1570 }, { "epoch": 1.05, "grad_norm": 5.665189743041992, "learning_rate": 4.993678887484197e-07, "loss": 0.429, "step": 1580 }, { "epoch": 1.05, "eval_accuracy": 0.8421778084079945, "eval_loss": 0.38979998230934143, "eval_precision": 0.893735130848533, "eval_recall": 0.7767057201929704, "eval_runtime": 806.4715, "eval_samples_per_second": 3.598, "eval_steps_per_second": 0.258, "step": 1584 }, { "epoch": 2.0, "grad_norm": 7.974698543548584, "learning_rate": 4.997190616659643e-07, "loss": 0.3735, "step": 1590 }, { "epoch": 2.0, "grad_norm": 6.501045227050781, "learning_rate": 4.993678887484197e-07, "loss": 0.3898, "step": 1600 }, { "epoch": 2.0, "grad_norm": 17.88860321044922, "learning_rate": 4.990167158308751e-07, "loss": 0.3378, "step": 1610 }, { "epoch": 2.0, "grad_norm": 4.293785095214844, "learning_rate": 4.986655429133306e-07, "loss": 0.3342, "step": 1620 }, { "epoch": 2.0, "grad_norm": 8.924853324890137, "learning_rate": 4.983143699957859e-07, "loss": 0.347, "step": 1630 }, { "epoch": 2.0, "grad_norm": 4.915943622589111, "learning_rate": 4.979631970782413e-07, "loss": 0.3486, "step": 1640 }, { "epoch": 2.0, "grad_norm": 8.103630065917969, "learning_rate": 4.976120241606967e-07, "loss": 0.3499, "step": 1650 }, { "epoch": 2.0, "grad_norm": 3.0036239624023438, "learning_rate": 4.972608512431521e-07, "loss": 0.3124, "step": 1660 }, { "epoch": 2.01, "grad_norm": 10.349337577819824, "learning_rate": 4.969096783256075e-07, "loss": 0.379, "step": 1670 }, { "epoch": 2.01, "grad_norm": 14.3101806640625, "learning_rate": 4.96558505408063e-07, "loss": 0.3473, "step": 1680 }, { "epoch": 2.01, "grad_norm": 6.858313083648682, "learning_rate": 4.962073324905183e-07, "loss": 0.3413, "step": 1690 }, { "epoch": 2.01, "grad_norm": 6.436093807220459, "learning_rate": 4.958561595729737e-07, "loss": 0.3664, "step": 1700 }, { "epoch": 2.01, "grad_norm": 7.0142903327941895, "learning_rate": 4.955049866554292e-07, "loss": 0.3549, "step": 1710 }, { "epoch": 2.01, "grad_norm": 5.248970985412598, "learning_rate": 4.951538137378845e-07, "loss": 0.3191, "step": 1720 }, { "epoch": 2.01, "grad_norm": 6.141357421875, "learning_rate": 4.948026408203399e-07, "loss": 0.3362, "step": 1730 }, { "epoch": 2.01, "grad_norm": 13.748830795288086, "learning_rate": 4.944514679027953e-07, "loss": 0.3249, "step": 1740 }, { "epoch": 2.01, "grad_norm": 5.0010857582092285, "learning_rate": 4.941002949852507e-07, "loss": 0.3016, "step": 1750 }, { "epoch": 2.01, "grad_norm": 5.12816047668457, "learning_rate": 4.937491220677061e-07, "loss": 0.3276, "step": 1760 }, { "epoch": 2.01, "grad_norm": 9.38332462310791, "learning_rate": 4.933979491501616e-07, "loss": 0.3824, "step": 1770 }, { "epoch": 2.01, "grad_norm": 7.798919200897217, "learning_rate": 4.930467762326169e-07, "loss": 0.3042, "step": 1780 }, { "epoch": 2.01, "grad_norm": 17.136524200439453, "learning_rate": 4.926956033150723e-07, "loss": 0.3253, "step": 1790 }, { "epoch": 2.01, "grad_norm": 8.636536598205566, "learning_rate": 4.923444303975278e-07, "loss": 0.3043, "step": 1800 }, { "epoch": 2.01, "grad_norm": 5.9158782958984375, "learning_rate": 4.919932574799831e-07, "loss": 0.2923, "step": 1810 }, { "epoch": 2.01, "grad_norm": 15.146574020385742, "learning_rate": 4.916420845624385e-07, "loss": 0.3465, "step": 1820 }, { "epoch": 2.02, "grad_norm": 4.89403772354126, "learning_rate": 4.91290911644894e-07, "loss": 0.3126, "step": 1830 }, { "epoch": 2.02, "grad_norm": 3.9748411178588867, "learning_rate": 4.909397387273494e-07, "loss": 0.3143, "step": 1840 }, { "epoch": 2.02, "grad_norm": 10.306771278381348, "learning_rate": 4.905885658098047e-07, "loss": 0.3134, "step": 1850 }, { "epoch": 2.02, "grad_norm": 6.087951183319092, "learning_rate": 4.902373928922602e-07, "loss": 0.3154, "step": 1860 }, { "epoch": 2.02, "grad_norm": 7.461074352264404, "learning_rate": 4.898862199747155e-07, "loss": 0.3352, "step": 1870 }, { "epoch": 2.02, "grad_norm": 8.902750968933105, "learning_rate": 4.895350470571709e-07, "loss": 0.3234, "step": 1880 }, { "epoch": 2.02, "grad_norm": 7.465813636779785, "learning_rate": 4.891838741396264e-07, "loss": 0.2948, "step": 1890 }, { "epoch": 2.02, "grad_norm": 4.285550117492676, "learning_rate": 4.888327012220818e-07, "loss": 0.2933, "step": 1900 }, { "epoch": 2.02, "grad_norm": 6.952497482299805, "learning_rate": 4.884815283045371e-07, "loss": 0.2956, "step": 1910 }, { "epoch": 2.02, "grad_norm": 4.856369972229004, "learning_rate": 4.881303553869926e-07, "loss": 0.3378, "step": 1920 }, { "epoch": 2.02, "grad_norm": 5.463541030883789, "learning_rate": 4.87779182469448e-07, "loss": 0.2768, "step": 1930 }, { "epoch": 2.02, "grad_norm": 9.049531936645508, "learning_rate": 4.874280095519033e-07, "loss": 0.306, "step": 1940 }, { "epoch": 2.02, "grad_norm": 4.3441481590271, "learning_rate": 4.870768366343588e-07, "loss": 0.2695, "step": 1950 }, { "epoch": 2.02, "grad_norm": 6.528475761413574, "learning_rate": 4.867256637168141e-07, "loss": 0.2755, "step": 1960 }, { "epoch": 2.02, "grad_norm": 11.025114059448242, "learning_rate": 4.863744907992695e-07, "loss": 0.2759, "step": 1970 }, { "epoch": 2.03, "grad_norm": 14.669293403625488, "learning_rate": 4.86023317881725e-07, "loss": 0.3099, "step": 1980 }, { "epoch": 2.03, "grad_norm": 8.935242652893066, "learning_rate": 4.856721449641804e-07, "loss": 0.313, "step": 1990 }, { "epoch": 2.03, "grad_norm": 10.113512992858887, "learning_rate": 4.853209720466357e-07, "loss": 0.2722, "step": 2000 }, { "epoch": 2.03, "grad_norm": 13.450063705444336, "learning_rate": 4.849697991290911e-07, "loss": 0.3074, "step": 2010 }, { "epoch": 2.03, "grad_norm": 10.026453018188477, "learning_rate": 4.846186262115466e-07, "loss": 0.2878, "step": 2020 }, { "epoch": 2.03, "grad_norm": 20.683448791503906, "learning_rate": 4.842674532940019e-07, "loss": 0.3048, "step": 2030 }, { "epoch": 2.03, "grad_norm": 5.725069999694824, "learning_rate": 4.839162803764573e-07, "loss": 0.2659, "step": 2040 }, { "epoch": 2.03, "grad_norm": 3.3149778842926025, "learning_rate": 4.835651074589128e-07, "loss": 0.308, "step": 2050 }, { "epoch": 2.03, "grad_norm": 7.162174701690674, "learning_rate": 4.832139345413681e-07, "loss": 0.261, "step": 2060 }, { "epoch": 2.03, "grad_norm": 32.44361877441406, "learning_rate": 4.828627616238235e-07, "loss": 0.2647, "step": 2070 }, { "epoch": 2.03, "grad_norm": 19.0134334564209, "learning_rate": 4.82511588706279e-07, "loss": 0.2835, "step": 2080 }, { "epoch": 2.03, "grad_norm": 7.808498382568359, "learning_rate": 4.821604157887343e-07, "loss": 0.2675, "step": 2090 }, { "epoch": 2.03, "grad_norm": 15.527098655700684, "learning_rate": 4.818092428711897e-07, "loss": 0.2894, "step": 2100 }, { "epoch": 2.03, "grad_norm": 10.184934616088867, "learning_rate": 4.814580699536452e-07, "loss": 0.2683, "step": 2110 }, { "epoch": 2.03, "grad_norm": 24.420616149902344, "learning_rate": 4.811068970361006e-07, "loss": 0.2518, "step": 2120 }, { "epoch": 2.03, "grad_norm": 4.963091850280762, "learning_rate": 4.807557241185559e-07, "loss": 0.2669, "step": 2130 }, { "epoch": 2.04, "grad_norm": 11.449257850646973, "learning_rate": 4.804045512010114e-07, "loss": 0.2426, "step": 2140 }, { "epoch": 2.04, "grad_norm": 11.900740623474121, "learning_rate": 4.800533782834667e-07, "loss": 0.2275, "step": 2150 }, { "epoch": 2.04, "grad_norm": 8.242390632629395, "learning_rate": 4.797022053659221e-07, "loss": 0.2803, "step": 2160 }, { "epoch": 2.04, "grad_norm": 11.955890655517578, "learning_rate": 4.793510324483776e-07, "loss": 0.2908, "step": 2170 }, { "epoch": 2.04, "grad_norm": 3.8110010623931885, "learning_rate": 4.789998595308329e-07, "loss": 0.2856, "step": 2180 }, { "epoch": 2.04, "grad_norm": 15.988479614257812, "learning_rate": 4.786486866132883e-07, "loss": 0.3222, "step": 2190 }, { "epoch": 2.04, "grad_norm": 3.741739273071289, "learning_rate": 4.782975136957438e-07, "loss": 0.2218, "step": 2200 }, { "epoch": 2.04, "grad_norm": 3.7071776390075684, "learning_rate": 4.779463407781992e-07, "loss": 0.2091, "step": 2210 }, { "epoch": 2.04, "grad_norm": 7.771834850311279, "learning_rate": 4.775951678606545e-07, "loss": 0.2396, "step": 2220 }, { "epoch": 2.04, "grad_norm": 5.974954128265381, "learning_rate": 4.7724399494311e-07, "loss": 0.2975, "step": 2230 }, { "epoch": 2.04, "grad_norm": 5.0555195808410645, "learning_rate": 4.768928220255653e-07, "loss": 0.2637, "step": 2240 }, { "epoch": 2.04, "grad_norm": 4.88677453994751, "learning_rate": 4.7654164910802074e-07, "loss": 0.2345, "step": 2250 }, { "epoch": 2.04, "grad_norm": 4.714931964874268, "learning_rate": 4.761904761904761e-07, "loss": 0.2318, "step": 2260 }, { "epoch": 2.04, "grad_norm": 14.919533729553223, "learning_rate": 4.758393032729316e-07, "loss": 0.2639, "step": 2270 }, { "epoch": 2.04, "grad_norm": 10.241095542907715, "learning_rate": 4.75488130355387e-07, "loss": 0.3168, "step": 2280 }, { "epoch": 2.04, "grad_norm": 10.279821395874023, "learning_rate": 4.7513695743784237e-07, "loss": 0.231, "step": 2290 }, { "epoch": 2.05, "grad_norm": 4.913956165313721, "learning_rate": 4.7478578452029775e-07, "loss": 0.2382, "step": 2300 }, { "epoch": 2.05, "grad_norm": 6.260556697845459, "learning_rate": 4.744346116027532e-07, "loss": 0.2632, "step": 2310 }, { "epoch": 2.05, "grad_norm": 8.971487045288086, "learning_rate": 4.7408343868520857e-07, "loss": 0.2763, "step": 2320 }, { "epoch": 2.05, "grad_norm": 12.89548110961914, "learning_rate": 4.7373226576766395e-07, "loss": 0.2315, "step": 2330 }, { "epoch": 2.05, "grad_norm": 9.425333976745605, "learning_rate": 4.733810928501194e-07, "loss": 0.2388, "step": 2340 }, { "epoch": 2.05, "grad_norm": 6.510443687438965, "learning_rate": 4.7302991993257477e-07, "loss": 0.2227, "step": 2350 }, { "epoch": 2.05, "grad_norm": 3.321969509124756, "learning_rate": 4.726787470150302e-07, "loss": 0.2204, "step": 2360 }, { "epoch": 2.05, "grad_norm": 10.656514167785645, "learning_rate": 4.723275740974856e-07, "loss": 0.184, "step": 2370 }, { "epoch": 2.05, "eval_accuracy": 0.8859407305306685, "eval_loss": 0.26999175548553467, "eval_precision": 0.9281345565749235, "eval_recall": 0.8366643694004136, "eval_runtime": 840.0427, "eval_samples_per_second": 3.455, "eval_steps_per_second": 0.248, "step": 2376 }, { "epoch": 3.0, "grad_norm": 6.158160209655762, "learning_rate": 4.7197640117994097e-07, "loss": 0.2115, "step": 2380 }, { "epoch": 3.0, "grad_norm": 10.620827674865723, "learning_rate": 4.7162522826239635e-07, "loss": 0.257, "step": 2390 }, { "epoch": 3.0, "grad_norm": 7.739505290985107, "learning_rate": 4.712740553448518e-07, "loss": 0.2225, "step": 2400 }, { "epoch": 3.0, "grad_norm": 6.495254993438721, "learning_rate": 4.709228824273072e-07, "loss": 0.2748, "step": 2410 }, { "epoch": 3.0, "grad_norm": 8.174240112304688, "learning_rate": 4.705717095097626e-07, "loss": 0.2065, "step": 2420 }, { "epoch": 3.0, "grad_norm": 8.847709655761719, "learning_rate": 4.70220536592218e-07, "loss": 0.2368, "step": 2430 }, { "epoch": 3.0, "grad_norm": 6.205838203430176, "learning_rate": 4.6986936367467337e-07, "loss": 0.2739, "step": 2440 }, { "epoch": 3.0, "grad_norm": 7.0135931968688965, "learning_rate": 4.695181907571288e-07, "loss": 0.1952, "step": 2450 }, { "epoch": 3.01, "grad_norm": 11.758519172668457, "learning_rate": 4.691670178395842e-07, "loss": 0.2383, "step": 2460 }, { "epoch": 3.01, "grad_norm": 6.2636590003967285, "learning_rate": 4.6881584492203956e-07, "loss": 0.2009, "step": 2470 }, { "epoch": 3.01, "grad_norm": 5.349643230438232, "learning_rate": 4.6846467200449495e-07, "loss": 0.2201, "step": 2480 }, { "epoch": 3.01, "grad_norm": 10.192341804504395, "learning_rate": 4.6811349908695043e-07, "loss": 0.2524, "step": 2490 }, { "epoch": 3.01, "grad_norm": 5.930012226104736, "learning_rate": 4.677623261694058e-07, "loss": 0.218, "step": 2500 }, { "epoch": 3.01, "grad_norm": 8.619109153747559, "learning_rate": 4.674111532518612e-07, "loss": 0.2184, "step": 2510 }, { "epoch": 3.01, "grad_norm": 4.657299518585205, "learning_rate": 4.670599803343166e-07, "loss": 0.2563, "step": 2520 }, { "epoch": 3.01, "grad_norm": 13.766278266906738, "learning_rate": 4.6670880741677196e-07, "loss": 0.2258, "step": 2530 }, { "epoch": 3.01, "grad_norm": 11.512306213378906, "learning_rate": 4.663576344992274e-07, "loss": 0.2265, "step": 2540 }, { "epoch": 3.01, "grad_norm": 5.0313544273376465, "learning_rate": 4.660064615816828e-07, "loss": 0.1552, "step": 2550 }, { "epoch": 3.01, "grad_norm": 5.563370704650879, "learning_rate": 4.656552886641382e-07, "loss": 0.2545, "step": 2560 }, { "epoch": 3.01, "grad_norm": 4.6292243003845215, "learning_rate": 4.653041157465936e-07, "loss": 0.2303, "step": 2570 }, { "epoch": 3.01, "grad_norm": 4.656125545501709, "learning_rate": 4.6495294282904903e-07, "loss": 0.2139, "step": 2580 }, { "epoch": 3.01, "grad_norm": 2.2568087577819824, "learning_rate": 4.646017699115044e-07, "loss": 0.31, "step": 2590 }, { "epoch": 3.01, "grad_norm": 8.638582229614258, "learning_rate": 4.642505969939598e-07, "loss": 0.1687, "step": 2600 }, { "epoch": 3.01, "grad_norm": 7.999022006988525, "learning_rate": 4.638994240764152e-07, "loss": 0.2159, "step": 2610 }, { "epoch": 3.02, "grad_norm": 30.751041412353516, "learning_rate": 4.6354825115887056e-07, "loss": 0.2528, "step": 2620 }, { "epoch": 3.02, "grad_norm": 8.652307510375977, "learning_rate": 4.6319707824132605e-07, "loss": 0.1779, "step": 2630 }, { "epoch": 3.02, "grad_norm": 7.926197052001953, "learning_rate": 4.6284590532378143e-07, "loss": 0.222, "step": 2640 }, { "epoch": 3.02, "grad_norm": 1.7880574464797974, "learning_rate": 4.624947324062368e-07, "loss": 0.2037, "step": 2650 }, { "epoch": 3.02, "grad_norm": 8.148602485656738, "learning_rate": 4.621435594886922e-07, "loss": 0.2751, "step": 2660 }, { "epoch": 3.02, "grad_norm": 4.340585708618164, "learning_rate": 4.6179238657114763e-07, "loss": 0.197, "step": 2670 }, { "epoch": 3.02, "grad_norm": 7.5678887367248535, "learning_rate": 4.61441213653603e-07, "loss": 0.1956, "step": 2680 }, { "epoch": 3.02, "grad_norm": 3.514565944671631, "learning_rate": 4.610900407360584e-07, "loss": 0.1981, "step": 2690 }, { "epoch": 3.02, "grad_norm": 5.947770595550537, "learning_rate": 4.6073886781851377e-07, "loss": 0.2204, "step": 2700 }, { "epoch": 3.02, "grad_norm": 9.586282730102539, "learning_rate": 4.6038769490096926e-07, "loss": 0.1563, "step": 2710 }, { "epoch": 3.02, "grad_norm": 4.624589443206787, "learning_rate": 4.6003652198342464e-07, "loss": 0.2018, "step": 2720 }, { "epoch": 3.02, "grad_norm": 4.1549553871154785, "learning_rate": 4.5968534906588e-07, "loss": 0.1925, "step": 2730 }, { "epoch": 3.02, "grad_norm": 5.1840338706970215, "learning_rate": 4.593341761483354e-07, "loss": 0.187, "step": 2740 }, { "epoch": 3.02, "grad_norm": 15.869466781616211, "learning_rate": 4.589830032307908e-07, "loss": 0.2551, "step": 2750 }, { "epoch": 3.02, "grad_norm": 4.108570575714111, "learning_rate": 4.586318303132462e-07, "loss": 0.2066, "step": 2760 }, { "epoch": 3.02, "grad_norm": 7.4606099128723145, "learning_rate": 4.582806573957016e-07, "loss": 0.2015, "step": 2770 }, { "epoch": 3.03, "grad_norm": 6.720588207244873, "learning_rate": 4.5792948447815704e-07, "loss": 0.1646, "step": 2780 }, { "epoch": 3.03, "grad_norm": 3.616619348526001, "learning_rate": 4.575783115606124e-07, "loss": 0.1784, "step": 2790 }, { "epoch": 3.03, "grad_norm": 3.042903184890747, "learning_rate": 4.5722713864306786e-07, "loss": 0.1645, "step": 2800 }, { "epoch": 3.03, "grad_norm": 8.903607368469238, "learning_rate": 4.5687596572552324e-07, "loss": 0.2087, "step": 2810 }, { "epoch": 3.03, "grad_norm": 8.300236701965332, "learning_rate": 4.565247928079786e-07, "loss": 0.2248, "step": 2820 }, { "epoch": 3.03, "grad_norm": 14.56275749206543, "learning_rate": 4.56173619890434e-07, "loss": 0.1879, "step": 2830 }, { "epoch": 3.03, "grad_norm": 9.6857328414917, "learning_rate": 4.558224469728894e-07, "loss": 0.1686, "step": 2840 }, { "epoch": 3.03, "grad_norm": 9.006673812866211, "learning_rate": 4.5547127405534487e-07, "loss": 0.1765, "step": 2850 }, { "epoch": 3.03, "grad_norm": 3.3305959701538086, "learning_rate": 4.5512010113780025e-07, "loss": 0.1502, "step": 2860 }, { "epoch": 3.03, "grad_norm": 7.797764778137207, "learning_rate": 4.5476892822025564e-07, "loss": 0.2003, "step": 2870 }, { "epoch": 3.03, "grad_norm": 6.234697341918945, "learning_rate": 4.54417755302711e-07, "loss": 0.147, "step": 2880 }, { "epoch": 3.03, "grad_norm": 12.207573890686035, "learning_rate": 4.5406658238516645e-07, "loss": 0.1926, "step": 2890 }, { "epoch": 3.03, "grad_norm": 8.931790351867676, "learning_rate": 4.5371540946762183e-07, "loss": 0.1984, "step": 2900 }, { "epoch": 3.03, "grad_norm": 3.7842655181884766, "learning_rate": 4.533642365500772e-07, "loss": 0.1992, "step": 2910 }, { "epoch": 3.03, "grad_norm": 7.3010101318359375, "learning_rate": 4.5301306363253265e-07, "loss": 0.2479, "step": 2920 }, { "epoch": 3.04, "grad_norm": 1.6983842849731445, "learning_rate": 4.5266189071498803e-07, "loss": 0.21, "step": 2930 }, { "epoch": 3.04, "grad_norm": 10.600749969482422, "learning_rate": 4.5231071779744347e-07, "loss": 0.1408, "step": 2940 }, { "epoch": 3.04, "grad_norm": 10.977212905883789, "learning_rate": 4.5195954487989885e-07, "loss": 0.1574, "step": 2950 }, { "epoch": 3.04, "grad_norm": 8.09029769897461, "learning_rate": 4.5160837196235423e-07, "loss": 0.1269, "step": 2960 }, { "epoch": 3.04, "grad_norm": 4.768685817718506, "learning_rate": 4.512571990448096e-07, "loss": 0.1732, "step": 2970 }, { "epoch": 3.04, "grad_norm": 42.22954177856445, "learning_rate": 4.5090602612726505e-07, "loss": 0.206, "step": 2980 }, { "epoch": 3.04, "grad_norm": 6.593056678771973, "learning_rate": 4.5055485320972043e-07, "loss": 0.1868, "step": 2990 }, { "epoch": 3.04, "grad_norm": 15.996166229248047, "learning_rate": 4.5020368029217587e-07, "loss": 0.22, "step": 3000 }, { "epoch": 3.04, "grad_norm": 10.244049072265625, "learning_rate": 4.4985250737463125e-07, "loss": 0.1679, "step": 3010 }, { "epoch": 3.04, "grad_norm": 6.346691608428955, "learning_rate": 4.495013344570867e-07, "loss": 0.1479, "step": 3020 }, { "epoch": 3.04, "grad_norm": 10.735522270202637, "learning_rate": 4.4915016153954206e-07, "loss": 0.2011, "step": 3030 }, { "epoch": 3.04, "grad_norm": 3.4601876735687256, "learning_rate": 4.4879898862199745e-07, "loss": 0.1444, "step": 3040 }, { "epoch": 3.04, "grad_norm": 5.515286922454834, "learning_rate": 4.4844781570445283e-07, "loss": 0.181, "step": 3050 }, { "epoch": 3.04, "grad_norm": 5.944587230682373, "learning_rate": 4.480966427869082e-07, "loss": 0.1735, "step": 3060 }, { "epoch": 3.04, "grad_norm": 9.784820556640625, "learning_rate": 4.477454698693637e-07, "loss": 0.1749, "step": 3070 }, { "epoch": 3.04, "grad_norm": 9.082942962646484, "learning_rate": 4.473942969518191e-07, "loss": 0.1716, "step": 3080 }, { "epoch": 3.05, "grad_norm": 12.850849151611328, "learning_rate": 4.4704312403427446e-07, "loss": 0.2014, "step": 3090 }, { "epoch": 3.05, "grad_norm": 2.5994491577148438, "learning_rate": 4.4669195111672984e-07, "loss": 0.1738, "step": 3100 }, { "epoch": 3.05, "grad_norm": 6.353146076202393, "learning_rate": 4.463407781991853e-07, "loss": 0.2033, "step": 3110 }, { "epoch": 3.05, "grad_norm": 5.359935283660889, "learning_rate": 4.4598960528164066e-07, "loss": 0.1594, "step": 3120 }, { "epoch": 3.05, "grad_norm": 8.088458061218262, "learning_rate": 4.4563843236409604e-07, "loss": 0.1445, "step": 3130 }, { "epoch": 3.05, "grad_norm": 5.7806243896484375, "learning_rate": 4.452872594465515e-07, "loss": 0.1709, "step": 3140 }, { "epoch": 3.05, "grad_norm": 5.524809837341309, "learning_rate": 4.4493608652900686e-07, "loss": 0.1831, "step": 3150 }, { "epoch": 3.05, "grad_norm": 7.7029337882995605, "learning_rate": 4.445849136114623e-07, "loss": 0.1911, "step": 3160 }, { "epoch": 3.05, "eval_accuracy": 0.9028256374913852, "eval_loss": 0.2280120700597763, "eval_precision": 0.9404672192916352, "eval_recall": 0.8600964851826327, "eval_runtime": 751.3763, "eval_samples_per_second": 3.862, "eval_steps_per_second": 0.277, "step": 3168 }, { "epoch": 4.0, "grad_norm": 4.812689781188965, "learning_rate": 4.442337406939177e-07, "loss": 0.1318, "step": 3170 }, { "epoch": 4.0, "grad_norm": 9.28890323638916, "learning_rate": 4.4388256777637306e-07, "loss": 0.1796, "step": 3180 }, { "epoch": 4.0, "grad_norm": 2.81488037109375, "learning_rate": 4.4353139485882844e-07, "loss": 0.2037, "step": 3190 }, { "epoch": 4.0, "grad_norm": 3.7426559925079346, "learning_rate": 4.431802219412839e-07, "loss": 0.1375, "step": 3200 }, { "epoch": 4.0, "grad_norm": 12.64274787902832, "learning_rate": 4.4282904902373926e-07, "loss": 0.1966, "step": 3210 }, { "epoch": 4.0, "grad_norm": 4.138557434082031, "learning_rate": 4.424778761061947e-07, "loss": 0.1358, "step": 3220 }, { "epoch": 4.0, "grad_norm": 14.764426231384277, "learning_rate": 4.4212670318865007e-07, "loss": 0.1964, "step": 3230 }, { "epoch": 4.0, "grad_norm": 5.4925856590271, "learning_rate": 4.4177553027110546e-07, "loss": 0.149, "step": 3240 }, { "epoch": 4.01, "grad_norm": 9.48027515411377, "learning_rate": 4.414243573535609e-07, "loss": 0.1633, "step": 3250 }, { "epoch": 4.01, "grad_norm": 8.98723030090332, "learning_rate": 4.4107318443601627e-07, "loss": 0.1554, "step": 3260 }, { "epoch": 4.01, "grad_norm": 7.85762882232666, "learning_rate": 4.4072201151847165e-07, "loss": 0.1966, "step": 3270 }, { "epoch": 4.01, "grad_norm": 7.1897077560424805, "learning_rate": 4.4037083860092704e-07, "loss": 0.2587, "step": 3280 }, { "epoch": 4.01, "grad_norm": 4.636531352996826, "learning_rate": 4.400196656833825e-07, "loss": 0.1838, "step": 3290 }, { "epoch": 4.01, "grad_norm": 15.14909839630127, "learning_rate": 4.396684927658379e-07, "loss": 0.1535, "step": 3300 }, { "epoch": 4.01, "grad_norm": 16.03323745727539, "learning_rate": 4.393173198482933e-07, "loss": 0.1772, "step": 3310 }, { "epoch": 4.01, "grad_norm": 12.649353981018066, "learning_rate": 4.3896614693074867e-07, "loss": 0.1392, "step": 3320 }, { "epoch": 4.01, "grad_norm": 4.349032402038574, "learning_rate": 4.3861497401320405e-07, "loss": 0.1478, "step": 3330 }, { "epoch": 4.01, "grad_norm": 4.391836643218994, "learning_rate": 4.382638010956595e-07, "loss": 0.1764, "step": 3340 }, { "epoch": 4.01, "grad_norm": 4.695539474487305, "learning_rate": 4.3791262817811487e-07, "loss": 0.1347, "step": 3350 }, { "epoch": 4.01, "grad_norm": 10.453676223754883, "learning_rate": 4.375614552605703e-07, "loss": 0.1769, "step": 3360 }, { "epoch": 4.01, "grad_norm": 10.562990188598633, "learning_rate": 4.372102823430257e-07, "loss": 0.1355, "step": 3370 }, { "epoch": 4.01, "grad_norm": 8.647297859191895, "learning_rate": 4.368591094254811e-07, "loss": 0.1251, "step": 3380 }, { "epoch": 4.01, "grad_norm": 5.990262508392334, "learning_rate": 4.365079365079365e-07, "loss": 0.1059, "step": 3390 }, { "epoch": 4.01, "grad_norm": 11.611662864685059, "learning_rate": 4.361567635903919e-07, "loss": 0.1409, "step": 3400 }, { "epoch": 4.02, "grad_norm": 28.644424438476562, "learning_rate": 4.3580559067284727e-07, "loss": 0.1582, "step": 3410 }, { "epoch": 4.02, "grad_norm": 6.621020793914795, "learning_rate": 4.3545441775530265e-07, "loss": 0.1551, "step": 3420 }, { "epoch": 4.02, "grad_norm": 6.776650905609131, "learning_rate": 4.351032448377581e-07, "loss": 0.1751, "step": 3430 }, { "epoch": 4.02, "grad_norm": 5.756790637969971, "learning_rate": 4.347520719202135e-07, "loss": 0.1321, "step": 3440 }, { "epoch": 4.02, "grad_norm": 10.614466667175293, "learning_rate": 4.344008990026689e-07, "loss": 0.1578, "step": 3450 }, { "epoch": 4.02, "grad_norm": 9.78405475616455, "learning_rate": 4.340497260851243e-07, "loss": 0.205, "step": 3460 }, { "epoch": 4.02, "grad_norm": 10.493732452392578, "learning_rate": 4.336985531675797e-07, "loss": 0.1717, "step": 3470 }, { "epoch": 4.02, "grad_norm": 8.754321098327637, "learning_rate": 4.333473802500351e-07, "loss": 0.1294, "step": 3480 }, { "epoch": 4.02, "grad_norm": 13.038125991821289, "learning_rate": 4.329962073324905e-07, "loss": 0.1858, "step": 3490 }, { "epoch": 4.02, "grad_norm": 13.937658309936523, "learning_rate": 4.3264503441494586e-07, "loss": 0.1051, "step": 3500 }, { "epoch": 4.02, "grad_norm": 6.210293292999268, "learning_rate": 4.3229386149740135e-07, "loss": 0.1733, "step": 3510 }, { "epoch": 4.02, "grad_norm": 5.685500144958496, "learning_rate": 4.3194268857985673e-07, "loss": 0.1711, "step": 3520 }, { "epoch": 4.02, "grad_norm": 4.830104827880859, "learning_rate": 4.315915156623121e-07, "loss": 0.1124, "step": 3530 }, { "epoch": 4.02, "grad_norm": 5.001163482666016, "learning_rate": 4.312403427447675e-07, "loss": 0.1282, "step": 3540 }, { "epoch": 4.02, "grad_norm": 8.28426456451416, "learning_rate": 4.308891698272229e-07, "loss": 0.1342, "step": 3550 }, { "epoch": 4.02, "grad_norm": 23.763334274291992, "learning_rate": 4.305379969096783e-07, "loss": 0.1539, "step": 3560 }, { "epoch": 4.03, "grad_norm": 4.7566375732421875, "learning_rate": 4.301868239921337e-07, "loss": 0.1379, "step": 3570 }, { "epoch": 4.03, "grad_norm": 8.54224967956543, "learning_rate": 4.2983565107458913e-07, "loss": 0.1389, "step": 3580 }, { "epoch": 4.03, "grad_norm": 2.0961079597473145, "learning_rate": 4.294844781570445e-07, "loss": 0.1745, "step": 3590 }, { "epoch": 4.03, "grad_norm": 14.91147232055664, "learning_rate": 4.2913330523949995e-07, "loss": 0.1169, "step": 3600 }, { "epoch": 4.03, "grad_norm": 6.1531596183776855, "learning_rate": 4.2878213232195533e-07, "loss": 0.1038, "step": 3610 }, { "epoch": 4.03, "grad_norm": 23.347980499267578, "learning_rate": 4.284309594044107e-07, "loss": 0.1576, "step": 3620 }, { "epoch": 4.03, "grad_norm": 9.57962703704834, "learning_rate": 4.280797864868661e-07, "loss": 0.1603, "step": 3630 }, { "epoch": 4.03, "grad_norm": 7.628696441650391, "learning_rate": 4.2772861356932147e-07, "loss": 0.1043, "step": 3640 }, { "epoch": 4.03, "grad_norm": 3.756329298019409, "learning_rate": 4.273774406517769e-07, "loss": 0.156, "step": 3650 }, { "epoch": 4.03, "grad_norm": 23.433168411254883, "learning_rate": 4.2702626773423234e-07, "loss": 0.166, "step": 3660 }, { "epoch": 4.03, "grad_norm": 15.06528091430664, "learning_rate": 4.266750948166877e-07, "loss": 0.2338, "step": 3670 }, { "epoch": 4.03, "grad_norm": 12.766666412353516, "learning_rate": 4.263239218991431e-07, "loss": 0.1043, "step": 3680 }, { "epoch": 4.03, "grad_norm": 2.78690242767334, "learning_rate": 4.2597274898159854e-07, "loss": 0.1234, "step": 3690 }, { "epoch": 4.03, "grad_norm": 9.292880058288574, "learning_rate": 4.256215760640539e-07, "loss": 0.1453, "step": 3700 }, { "epoch": 4.03, "grad_norm": 9.849581718444824, "learning_rate": 4.252704031465093e-07, "loss": 0.1754, "step": 3710 }, { "epoch": 4.03, "grad_norm": 4.6074113845825195, "learning_rate": 4.249192302289647e-07, "loss": 0.1631, "step": 3720 }, { "epoch": 4.04, "grad_norm": 9.207588195800781, "learning_rate": 4.245680573114201e-07, "loss": 0.1434, "step": 3730 }, { "epoch": 4.04, "grad_norm": 4.202247619628906, "learning_rate": 4.2421688439387556e-07, "loss": 0.1627, "step": 3740 }, { "epoch": 4.04, "grad_norm": 6.747264385223389, "learning_rate": 4.2386571147633094e-07, "loss": 0.1385, "step": 3750 }, { "epoch": 4.04, "grad_norm": 2.6409049034118652, "learning_rate": 4.235145385587863e-07, "loss": 0.1696, "step": 3760 }, { "epoch": 4.04, "grad_norm": 6.908291816711426, "learning_rate": 4.231633656412417e-07, "loss": 0.1642, "step": 3770 }, { "epoch": 4.04, "grad_norm": 9.435556411743164, "learning_rate": 4.2281219272369714e-07, "loss": 0.1072, "step": 3780 }, { "epoch": 4.04, "grad_norm": 11.453124046325684, "learning_rate": 4.224610198061525e-07, "loss": 0.0989, "step": 3790 }, { "epoch": 4.04, "grad_norm": 1.993767499923706, "learning_rate": 4.2210984688860795e-07, "loss": 0.1931, "step": 3800 }, { "epoch": 4.04, "grad_norm": 13.429659843444824, "learning_rate": 4.2175867397106334e-07, "loss": 0.1741, "step": 3810 }, { "epoch": 4.04, "grad_norm": 14.696208000183105, "learning_rate": 4.214075010535187e-07, "loss": 0.1525, "step": 3820 }, { "epoch": 4.04, "grad_norm": 7.1931586265563965, "learning_rate": 4.2105632813597415e-07, "loss": 0.1422, "step": 3830 }, { "epoch": 4.04, "grad_norm": 6.836225986480713, "learning_rate": 4.2070515521842954e-07, "loss": 0.13, "step": 3840 }, { "epoch": 4.04, "grad_norm": 16.221458435058594, "learning_rate": 4.203539823008849e-07, "loss": 0.1701, "step": 3850 }, { "epoch": 4.04, "grad_norm": 2.7478249073028564, "learning_rate": 4.200028093833403e-07, "loss": 0.0942, "step": 3860 }, { "epoch": 4.04, "grad_norm": 14.903944969177246, "learning_rate": 4.196516364657958e-07, "loss": 0.1428, "step": 3870 }, { "epoch": 4.05, "grad_norm": 18.389455795288086, "learning_rate": 4.1930046354825117e-07, "loss": 0.1993, "step": 3880 }, { "epoch": 4.05, "grad_norm": 8.134357452392578, "learning_rate": 4.1894929063070655e-07, "loss": 0.1829, "step": 3890 }, { "epoch": 4.05, "grad_norm": 7.82716178894043, "learning_rate": 4.1859811771316193e-07, "loss": 0.1391, "step": 3900 }, { "epoch": 4.05, "grad_norm": 8.363373756408691, "learning_rate": 4.182469447956173e-07, "loss": 0.1858, "step": 3910 }, { "epoch": 4.05, "grad_norm": 11.584836959838867, "learning_rate": 4.1789577187807275e-07, "loss": 0.1488, "step": 3920 }, { "epoch": 4.05, "grad_norm": 12.29085636138916, "learning_rate": 4.1754459896052813e-07, "loss": 0.1618, "step": 3930 }, { "epoch": 4.05, "grad_norm": 13.828036308288574, "learning_rate": 4.171934260429835e-07, "loss": 0.1291, "step": 3940 }, { "epoch": 4.05, "grad_norm": 6.466941833496094, "learning_rate": 4.1684225312543895e-07, "loss": 0.1504, "step": 3950 }, { "epoch": 4.05, "grad_norm": 16.25638198852539, "learning_rate": 4.164910802078944e-07, "loss": 0.1115, "step": 3960 }, { "epoch": 4.05, "eval_accuracy": 0.9062715368711234, "eval_loss": 0.22177456319332123, "eval_precision": 0.9435665914221218, "eval_recall": 0.8642315644383184, "eval_runtime": 781.532, "eval_samples_per_second": 3.713, "eval_steps_per_second": 0.266, "step": 3960 }, { "epoch": 5.0, "grad_norm": 9.544825553894043, "learning_rate": 4.1613990729034976e-07, "loss": 0.1039, "step": 3970 }, { "epoch": 5.0, "grad_norm": 11.964020729064941, "learning_rate": 4.1578873437280515e-07, "loss": 0.193, "step": 3980 }, { "epoch": 5.0, "grad_norm": 6.018560886383057, "learning_rate": 4.1543756145526053e-07, "loss": 0.095, "step": 3990 }, { "epoch": 5.0, "grad_norm": 1.2165708541870117, "learning_rate": 4.150863885377159e-07, "loss": 0.1063, "step": 4000 }, { "epoch": 5.0, "grad_norm": 6.656147480010986, "learning_rate": 4.1473521562017135e-07, "loss": 0.1351, "step": 4010 }, { "epoch": 5.0, "grad_norm": 3.6890463829040527, "learning_rate": 4.143840427026268e-07, "loss": 0.1144, "step": 4020 }, { "epoch": 5.0, "grad_norm": 10.286821365356445, "learning_rate": 4.1403286978508216e-07, "loss": 0.1378, "step": 4030 }, { "epoch": 5.01, "grad_norm": 11.775749206542969, "learning_rate": 4.1368169686753754e-07, "loss": 0.1364, "step": 4040 }, { "epoch": 5.01, "grad_norm": 17.810165405273438, "learning_rate": 4.13330523949993e-07, "loss": 0.1784, "step": 4050 }, { "epoch": 5.01, "grad_norm": 6.75129508972168, "learning_rate": 4.1297935103244836e-07, "loss": 0.1239, "step": 4060 }, { "epoch": 5.01, "grad_norm": 1.7124630212783813, "learning_rate": 4.1262817811490374e-07, "loss": 0.0873, "step": 4070 }, { "epoch": 5.01, "grad_norm": 9.44510555267334, "learning_rate": 4.122770051973591e-07, "loss": 0.1683, "step": 4080 }, { "epoch": 5.01, "grad_norm": 14.88578987121582, "learning_rate": 4.119258322798146e-07, "loss": 0.172, "step": 4090 }, { "epoch": 5.01, "grad_norm": 8.781535148620605, "learning_rate": 4.1157465936227e-07, "loss": 0.1436, "step": 4100 }, { "epoch": 5.01, "grad_norm": 4.762348175048828, "learning_rate": 4.112234864447254e-07, "loss": 0.1354, "step": 4110 }, { "epoch": 5.01, "grad_norm": 4.259210109710693, "learning_rate": 4.1087231352718076e-07, "loss": 0.1324, "step": 4120 }, { "epoch": 5.01, "grad_norm": 6.946531295776367, "learning_rate": 4.1052114060963614e-07, "loss": 0.0851, "step": 4130 }, { "epoch": 5.01, "grad_norm": 14.80830192565918, "learning_rate": 4.101699676920916e-07, "loss": 0.1105, "step": 4140 }, { "epoch": 5.01, "grad_norm": 6.9425177574157715, "learning_rate": 4.0981879477454696e-07, "loss": 0.1243, "step": 4150 }, { "epoch": 5.01, "grad_norm": 15.916327476501465, "learning_rate": 4.0946762185700234e-07, "loss": 0.1486, "step": 4160 }, { "epoch": 5.01, "grad_norm": 6.485676288604736, "learning_rate": 4.091164489394578e-07, "loss": 0.127, "step": 4170 }, { "epoch": 5.01, "grad_norm": 11.588597297668457, "learning_rate": 4.087652760219132e-07, "loss": 0.1614, "step": 4180 }, { "epoch": 5.01, "grad_norm": 9.615463256835938, "learning_rate": 4.084141031043686e-07, "loss": 0.1287, "step": 4190 }, { "epoch": 5.02, "grad_norm": 14.508774757385254, "learning_rate": 4.0806293018682397e-07, "loss": 0.1136, "step": 4200 }, { "epoch": 5.02, "grad_norm": 8.787707328796387, "learning_rate": 4.0771175726927935e-07, "loss": 0.2096, "step": 4210 }, { "epoch": 5.02, "grad_norm": 11.172078132629395, "learning_rate": 4.0736058435173474e-07, "loss": 0.2249, "step": 4220 }, { "epoch": 5.02, "grad_norm": 10.53729248046875, "learning_rate": 4.0700941143419017e-07, "loss": 0.1333, "step": 4230 }, { "epoch": 5.02, "grad_norm": 11.202916145324707, "learning_rate": 4.066582385166456e-07, "loss": 0.0804, "step": 4240 }, { "epoch": 5.02, "grad_norm": 6.41886568069458, "learning_rate": 4.06307065599101e-07, "loss": 0.1012, "step": 4250 }, { "epoch": 5.02, "grad_norm": 6.839794158935547, "learning_rate": 4.0595589268155637e-07, "loss": 0.1628, "step": 4260 }, { "epoch": 5.02, "grad_norm": 8.547186851501465, "learning_rate": 4.056047197640118e-07, "loss": 0.1432, "step": 4270 }, { "epoch": 5.02, "grad_norm": 9.808359146118164, "learning_rate": 4.052535468464672e-07, "loss": 0.125, "step": 4280 }, { "epoch": 5.02, "grad_norm": 6.607131004333496, "learning_rate": 4.0490237392892257e-07, "loss": 0.1078, "step": 4290 }, { "epoch": 5.02, "grad_norm": 13.340785026550293, "learning_rate": 4.0455120101137795e-07, "loss": 0.1344, "step": 4300 }, { "epoch": 5.02, "grad_norm": 6.319891452789307, "learning_rate": 4.042000280938334e-07, "loss": 0.1473, "step": 4310 }, { "epoch": 5.02, "grad_norm": 10.910396575927734, "learning_rate": 4.038488551762888e-07, "loss": 0.1071, "step": 4320 }, { "epoch": 5.02, "grad_norm": 6.297656536102295, "learning_rate": 4.034976822587442e-07, "loss": 0.0859, "step": 4330 }, { "epoch": 5.02, "grad_norm": 4.910276889801025, "learning_rate": 4.031465093411996e-07, "loss": 0.1287, "step": 4340 }, { "epoch": 5.02, "grad_norm": 18.060909271240234, "learning_rate": 4.0279533642365497e-07, "loss": 0.1391, "step": 4350 }, { "epoch": 5.03, "grad_norm": 13.502204895019531, "learning_rate": 4.024441635061104e-07, "loss": 0.1739, "step": 4360 }, { "epoch": 5.03, "grad_norm": 1.600792646408081, "learning_rate": 4.020929905885658e-07, "loss": 0.1037, "step": 4370 }, { "epoch": 5.03, "grad_norm": 6.544968128204346, "learning_rate": 4.0174181767102116e-07, "loss": 0.1424, "step": 4380 }, { "epoch": 5.03, "grad_norm": 6.39059591293335, "learning_rate": 4.013906447534766e-07, "loss": 0.1251, "step": 4390 }, { "epoch": 5.03, "grad_norm": 10.98317813873291, "learning_rate": 4.0103947183593203e-07, "loss": 0.1112, "step": 4400 }, { "epoch": 5.03, "grad_norm": 9.025348663330078, "learning_rate": 4.006882989183874e-07, "loss": 0.1506, "step": 4410 }, { "epoch": 5.03, "grad_norm": 9.448262214660645, "learning_rate": 4.003371260008428e-07, "loss": 0.147, "step": 4420 }, { "epoch": 5.03, "grad_norm": 15.520981788635254, "learning_rate": 3.999859530832982e-07, "loss": 0.1592, "step": 4430 }, { "epoch": 5.03, "grad_norm": 1.8781596422195435, "learning_rate": 3.9963478016575356e-07, "loss": 0.1171, "step": 4440 }, { "epoch": 5.03, "grad_norm": 43.35357666015625, "learning_rate": 3.99283607248209e-07, "loss": 0.1333, "step": 4450 }, { "epoch": 5.03, "grad_norm": 4.397987365722656, "learning_rate": 3.9893243433066443e-07, "loss": 0.1302, "step": 4460 }, { "epoch": 5.03, "grad_norm": 7.551544189453125, "learning_rate": 3.985812614131198e-07, "loss": 0.16, "step": 4470 }, { "epoch": 5.03, "grad_norm": 8.251708984375, "learning_rate": 3.982300884955752e-07, "loss": 0.1335, "step": 4480 }, { "epoch": 5.03, "grad_norm": 2.564143419265747, "learning_rate": 3.9787891557803063e-07, "loss": 0.1479, "step": 4490 }, { "epoch": 5.03, "grad_norm": 1.5294811725616455, "learning_rate": 3.97527742660486e-07, "loss": 0.0853, "step": 4500 }, { "epoch": 5.03, "grad_norm": 2.746248722076416, "learning_rate": 3.971765697429414e-07, "loss": 0.1106, "step": 4510 }, { "epoch": 5.04, "grad_norm": 4.9603962898254395, "learning_rate": 3.968253968253968e-07, "loss": 0.0831, "step": 4520 }, { "epoch": 5.04, "grad_norm": 5.679323673248291, "learning_rate": 3.964742239078522e-07, "loss": 0.0742, "step": 4530 }, { "epoch": 5.04, "grad_norm": 8.932284355163574, "learning_rate": 3.9612305099030765e-07, "loss": 0.1774, "step": 4540 }, { "epoch": 5.04, "grad_norm": 2.700977087020874, "learning_rate": 3.9577187807276303e-07, "loss": 0.0819, "step": 4550 }, { "epoch": 5.04, "grad_norm": 0.46211129426956177, "learning_rate": 3.954207051552184e-07, "loss": 0.0777, "step": 4560 }, { "epoch": 5.04, "grad_norm": 3.030123710632324, "learning_rate": 3.950695322376738e-07, "loss": 0.1237, "step": 4570 }, { "epoch": 5.04, "grad_norm": 3.8898704051971436, "learning_rate": 3.9471835932012923e-07, "loss": 0.0901, "step": 4580 }, { "epoch": 5.04, "grad_norm": 23.526710510253906, "learning_rate": 3.943671864025846e-07, "loss": 0.1349, "step": 4590 }, { "epoch": 5.04, "grad_norm": 0.7609380483627319, "learning_rate": 3.9401601348504e-07, "loss": 0.1437, "step": 4600 }, { "epoch": 5.04, "grad_norm": 15.892127990722656, "learning_rate": 3.936648405674954e-07, "loss": 0.1552, "step": 4610 }, { "epoch": 5.04, "grad_norm": 0.8223940134048462, "learning_rate": 3.933136676499508e-07, "loss": 0.1932, "step": 4620 }, { "epoch": 5.04, "grad_norm": 1.4060343503952026, "learning_rate": 3.9296249473240624e-07, "loss": 0.1077, "step": 4630 }, { "epoch": 5.04, "grad_norm": 8.013404846191406, "learning_rate": 3.926113218148616e-07, "loss": 0.096, "step": 4640 }, { "epoch": 5.04, "grad_norm": 7.16346549987793, "learning_rate": 3.92260148897317e-07, "loss": 0.1382, "step": 4650 }, { "epoch": 5.04, "grad_norm": 8.092345237731934, "learning_rate": 3.919089759797724e-07, "loss": 0.0963, "step": 4660 }, { "epoch": 5.04, "grad_norm": 1.4798976182937622, "learning_rate": 3.915578030622278e-07, "loss": 0.081, "step": 4670 }, { "epoch": 5.05, "grad_norm": 3.624849319458008, "learning_rate": 3.9120663014468326e-07, "loss": 0.0906, "step": 4680 }, { "epoch": 5.05, "grad_norm": 10.8258056640625, "learning_rate": 3.9085545722713864e-07, "loss": 0.1212, "step": 4690 }, { "epoch": 5.05, "grad_norm": 19.1744441986084, "learning_rate": 3.90504284309594e-07, "loss": 0.1494, "step": 4700 }, { "epoch": 5.05, "grad_norm": 2.5890440940856934, "learning_rate": 3.901531113920494e-07, "loss": 0.1221, "step": 4710 }, { "epoch": 5.05, "grad_norm": 26.965164184570312, "learning_rate": 3.8980193847450484e-07, "loss": 0.1312, "step": 4720 }, { "epoch": 5.05, "grad_norm": 5.628452301025391, "learning_rate": 3.894507655569602e-07, "loss": 0.1115, "step": 4730 }, { "epoch": 5.05, "grad_norm": 6.552463531494141, "learning_rate": 3.890995926394156e-07, "loss": 0.1151, "step": 4740 }, { "epoch": 5.05, "grad_norm": 10.636469841003418, "learning_rate": 3.8874841972187104e-07, "loss": 0.1799, "step": 4750 }, { "epoch": 5.05, "eval_accuracy": 0.9090282563749138, "eval_loss": 0.2293003350496292, "eval_precision": 0.9604344453064391, "eval_recall": 0.8532046864231564, "eval_runtime": 767.1422, "eval_samples_per_second": 3.783, "eval_steps_per_second": 0.271, "step": 4752 }, { "epoch": 6.0, "grad_norm": 0.6936726570129395, "learning_rate": 3.8839724680432647e-07, "loss": 0.1065, "step": 4760 }, { "epoch": 6.0, "grad_norm": 2.3142240047454834, "learning_rate": 3.8804607388678185e-07, "loss": 0.0631, "step": 4770 }, { "epoch": 6.0, "grad_norm": 0.4146326184272766, "learning_rate": 3.8769490096923724e-07, "loss": 0.1011, "step": 4780 }, { "epoch": 6.0, "grad_norm": 6.362814903259277, "learning_rate": 3.873437280516926e-07, "loss": 0.1331, "step": 4790 }, { "epoch": 6.0, "grad_norm": 8.569573402404785, "learning_rate": 3.86992555134148e-07, "loss": 0.0979, "step": 4800 }, { "epoch": 6.0, "grad_norm": 26.337459564208984, "learning_rate": 3.8664138221660343e-07, "loss": 0.1078, "step": 4810 }, { "epoch": 6.0, "grad_norm": 19.35256576538086, "learning_rate": 3.8629020929905887e-07, "loss": 0.1238, "step": 4820 }, { "epoch": 6.0, "grad_norm": 41.63506317138672, "learning_rate": 3.8593903638151425e-07, "loss": 0.1532, "step": 4830 }, { "epoch": 6.01, "grad_norm": 4.546126842498779, "learning_rate": 3.8558786346396963e-07, "loss": 0.1469, "step": 4840 }, { "epoch": 6.01, "grad_norm": 8.08976936340332, "learning_rate": 3.8523669054642507e-07, "loss": 0.1502, "step": 4850 }, { "epoch": 6.01, "grad_norm": 22.192916870117188, "learning_rate": 3.8488551762888045e-07, "loss": 0.1489, "step": 4860 }, { "epoch": 6.01, "grad_norm": 7.479325294494629, "learning_rate": 3.8453434471133583e-07, "loss": 0.1012, "step": 4870 }, { "epoch": 6.01, "grad_norm": 1.6453194618225098, "learning_rate": 3.841831717937912e-07, "loss": 0.0492, "step": 4880 }, { "epoch": 6.01, "grad_norm": 7.798226356506348, "learning_rate": 3.838319988762466e-07, "loss": 0.0514, "step": 4890 }, { "epoch": 6.01, "grad_norm": 9.014942169189453, "learning_rate": 3.834808259587021e-07, "loss": 0.0957, "step": 4900 }, { "epoch": 6.01, "grad_norm": 16.908748626708984, "learning_rate": 3.8312965304115747e-07, "loss": 0.1332, "step": 4910 }, { "epoch": 6.01, "grad_norm": 0.412589430809021, "learning_rate": 3.8277848012361285e-07, "loss": 0.105, "step": 4920 }, { "epoch": 6.01, "grad_norm": 8.188920974731445, "learning_rate": 3.8242730720606823e-07, "loss": 0.1226, "step": 4930 }, { "epoch": 6.01, "grad_norm": 3.5582354068756104, "learning_rate": 3.8207613428852366e-07, "loss": 0.1012, "step": 4940 }, { "epoch": 6.01, "grad_norm": 16.018510818481445, "learning_rate": 3.8172496137097905e-07, "loss": 0.1236, "step": 4950 }, { "epoch": 6.01, "grad_norm": 7.294294834136963, "learning_rate": 3.8137378845343443e-07, "loss": 0.0881, "step": 4960 }, { "epoch": 6.01, "grad_norm": 16.659114837646484, "learning_rate": 3.8102261553588986e-07, "loss": 0.1204, "step": 4970 }, { "epoch": 6.01, "grad_norm": 1.9289318323135376, "learning_rate": 3.806714426183453e-07, "loss": 0.0647, "step": 4980 }, { "epoch": 6.02, "grad_norm": 19.181127548217773, "learning_rate": 3.803202697008007e-07, "loss": 0.1426, "step": 4990 }, { "epoch": 6.02, "grad_norm": 9.255746841430664, "learning_rate": 3.7996909678325606e-07, "loss": 0.1581, "step": 5000 }, { "epoch": 6.02, "grad_norm": 2.0085437297821045, "learning_rate": 3.7961792386571144e-07, "loss": 0.089, "step": 5010 }, { "epoch": 6.02, "grad_norm": 4.7241530418396, "learning_rate": 3.792667509481668e-07, "loss": 0.0925, "step": 5020 }, { "epoch": 6.02, "grad_norm": 28.334856033325195, "learning_rate": 3.7891557803062226e-07, "loss": 0.1182, "step": 5030 }, { "epoch": 6.02, "grad_norm": 17.216020584106445, "learning_rate": 3.785644051130777e-07, "loss": 0.0757, "step": 5040 }, { "epoch": 6.02, "grad_norm": 17.278583526611328, "learning_rate": 3.782132321955331e-07, "loss": 0.1473, "step": 5050 }, { "epoch": 6.02, "grad_norm": 24.403547286987305, "learning_rate": 3.7786205927798846e-07, "loss": 0.2077, "step": 5060 }, { "epoch": 6.02, "grad_norm": 4.184651851654053, "learning_rate": 3.775108863604439e-07, "loss": 0.0999, "step": 5070 }, { "epoch": 6.02, "grad_norm": 16.007944107055664, "learning_rate": 3.771597134428993e-07, "loss": 0.1297, "step": 5080 }, { "epoch": 6.02, "grad_norm": 11.847286224365234, "learning_rate": 3.7680854052535466e-07, "loss": 0.1628, "step": 5090 }, { "epoch": 6.02, "grad_norm": 18.388057708740234, "learning_rate": 3.7645736760781004e-07, "loss": 0.1283, "step": 5100 }, { "epoch": 6.02, "grad_norm": 20.93194580078125, "learning_rate": 3.761061946902654e-07, "loss": 0.0978, "step": 5110 }, { "epoch": 6.02, "grad_norm": 2.187920331954956, "learning_rate": 3.757550217727209e-07, "loss": 0.0429, "step": 5120 }, { "epoch": 6.02, "grad_norm": 2.697463035583496, "learning_rate": 3.754038488551763e-07, "loss": 0.1106, "step": 5130 }, { "epoch": 6.02, "grad_norm": 2.4168097972869873, "learning_rate": 3.7505267593763167e-07, "loss": 0.0758, "step": 5140 }, { "epoch": 6.03, "grad_norm": 1.1960080862045288, "learning_rate": 3.7470150302008706e-07, "loss": 0.0764, "step": 5150 }, { "epoch": 6.03, "grad_norm": 6.627502918243408, "learning_rate": 3.743503301025425e-07, "loss": 0.0805, "step": 5160 }, { "epoch": 6.03, "grad_norm": 6.091911315917969, "learning_rate": 3.7399915718499787e-07, "loss": 0.2078, "step": 5170 }, { "epoch": 6.03, "grad_norm": 18.784122467041016, "learning_rate": 3.7364798426745325e-07, "loss": 0.0924, "step": 5180 }, { "epoch": 6.03, "grad_norm": 4.827120304107666, "learning_rate": 3.732968113499087e-07, "loss": 0.1104, "step": 5190 }, { "epoch": 6.03, "grad_norm": 12.252345085144043, "learning_rate": 3.7294563843236407e-07, "loss": 0.095, "step": 5200 }, { "epoch": 6.03, "grad_norm": 10.740091323852539, "learning_rate": 3.725944655148195e-07, "loss": 0.1178, "step": 5210 }, { "epoch": 6.03, "grad_norm": 32.347660064697266, "learning_rate": 3.722432925972749e-07, "loss": 0.174, "step": 5220 }, { "epoch": 6.03, "grad_norm": 11.732865333557129, "learning_rate": 3.7189211967973027e-07, "loss": 0.083, "step": 5230 }, { "epoch": 6.03, "grad_norm": 11.415803909301758, "learning_rate": 3.7154094676218565e-07, "loss": 0.1522, "step": 5240 }, { "epoch": 6.03, "grad_norm": 3.089712619781494, "learning_rate": 3.711897738446411e-07, "loss": 0.0775, "step": 5250 }, { "epoch": 6.03, "grad_norm": 6.789796829223633, "learning_rate": 3.708386009270965e-07, "loss": 0.1146, "step": 5260 }, { "epoch": 6.03, "grad_norm": 12.915666580200195, "learning_rate": 3.704874280095519e-07, "loss": 0.1326, "step": 5270 }, { "epoch": 6.03, "grad_norm": 4.979092121124268, "learning_rate": 3.701362550920073e-07, "loss": 0.1573, "step": 5280 }, { "epoch": 6.03, "grad_norm": 14.677412033081055, "learning_rate": 3.6978508217446267e-07, "loss": 0.1456, "step": 5290 }, { "epoch": 6.03, "grad_norm": 12.493571281433105, "learning_rate": 3.694339092569181e-07, "loss": 0.0819, "step": 5300 }, { "epoch": 6.04, "grad_norm": 1.4160298109054565, "learning_rate": 3.690827363393735e-07, "loss": 0.1109, "step": 5310 }, { "epoch": 6.04, "grad_norm": 0.47015225887298584, "learning_rate": 3.6873156342182887e-07, "loss": 0.0901, "step": 5320 }, { "epoch": 6.04, "grad_norm": 19.707412719726562, "learning_rate": 3.6838039050428425e-07, "loss": 0.2122, "step": 5330 }, { "epoch": 6.04, "grad_norm": 15.541810989379883, "learning_rate": 3.6802921758673974e-07, "loss": 0.1176, "step": 5340 }, { "epoch": 6.04, "grad_norm": 23.960447311401367, "learning_rate": 3.676780446691951e-07, "loss": 0.1126, "step": 5350 }, { "epoch": 6.04, "grad_norm": 11.301091194152832, "learning_rate": 3.673268717516505e-07, "loss": 0.1273, "step": 5360 }, { "epoch": 6.04, "grad_norm": 6.495253086090088, "learning_rate": 3.669756988341059e-07, "loss": 0.0736, "step": 5370 }, { "epoch": 6.04, "grad_norm": 18.85450553894043, "learning_rate": 3.6662452591656126e-07, "loss": 0.0726, "step": 5380 }, { "epoch": 6.04, "grad_norm": 2.298658847808838, "learning_rate": 3.662733529990167e-07, "loss": 0.0662, "step": 5390 }, { "epoch": 6.04, "grad_norm": 3.8143820762634277, "learning_rate": 3.659221800814721e-07, "loss": 0.0697, "step": 5400 }, { "epoch": 6.04, "grad_norm": 5.604777812957764, "learning_rate": 3.655710071639275e-07, "loss": 0.0836, "step": 5410 }, { "epoch": 6.04, "grad_norm": 6.372956275939941, "learning_rate": 3.652198342463829e-07, "loss": 0.0765, "step": 5420 }, { "epoch": 6.04, "grad_norm": 13.849519729614258, "learning_rate": 3.6486866132883833e-07, "loss": 0.131, "step": 5430 }, { "epoch": 6.04, "grad_norm": 1.1766833066940308, "learning_rate": 3.645174884112937e-07, "loss": 0.0578, "step": 5440 }, { "epoch": 6.04, "grad_norm": 2.669991970062256, "learning_rate": 3.641663154937491e-07, "loss": 0.0936, "step": 5450 }, { "epoch": 6.04, "grad_norm": 15.525193214416504, "learning_rate": 3.638151425762045e-07, "loss": 0.1407, "step": 5460 }, { "epoch": 6.05, "grad_norm": 1.4182878732681274, "learning_rate": 3.634639696586599e-07, "loss": 0.089, "step": 5470 }, { "epoch": 6.05, "grad_norm": 1.98279869556427, "learning_rate": 3.6311279674111535e-07, "loss": 0.0809, "step": 5480 }, { "epoch": 6.05, "grad_norm": 10.84345531463623, "learning_rate": 3.6276162382357073e-07, "loss": 0.2307, "step": 5490 }, { "epoch": 6.05, "grad_norm": 0.6052747964859009, "learning_rate": 3.624104509060261e-07, "loss": 0.156, "step": 5500 }, { "epoch": 6.05, "grad_norm": 17.478069305419922, "learning_rate": 3.620592779884815e-07, "loss": 0.1429, "step": 5510 }, { "epoch": 6.05, "grad_norm": 26.3503475189209, "learning_rate": 3.6170810507093693e-07, "loss": 0.1465, "step": 5520 }, { "epoch": 6.05, "grad_norm": 11.710220336914062, "learning_rate": 3.613569321533923e-07, "loss": 0.1006, "step": 5530 }, { "epoch": 6.05, "grad_norm": 1.0736058950424194, "learning_rate": 3.610057592358477e-07, "loss": 0.1282, "step": 5540 }, { "epoch": 6.05, "eval_accuracy": 0.9159200551343901, "eval_loss": 0.22648051381111145, "eval_precision": 0.9507094846900672, "eval_recall": 0.8773259820813232, "eval_runtime": 756.0223, "eval_samples_per_second": 3.839, "eval_steps_per_second": 0.275, "step": 5544 }, { "epoch": 7.0, "grad_norm": 9.742409706115723, "learning_rate": 3.6065458631830307e-07, "loss": 0.1211, "step": 5550 }, { "epoch": 7.0, "grad_norm": 15.846866607666016, "learning_rate": 3.6030341340075856e-07, "loss": 0.1691, "step": 5560 }, { "epoch": 7.0, "grad_norm": 13.851663589477539, "learning_rate": 3.5995224048321394e-07, "loss": 0.0699, "step": 5570 }, { "epoch": 7.0, "grad_norm": 4.5673828125, "learning_rate": 3.596010675656693e-07, "loss": 0.1744, "step": 5580 }, { "epoch": 7.0, "grad_norm": 2.131362199783325, "learning_rate": 3.592498946481247e-07, "loss": 0.142, "step": 5590 }, { "epoch": 7.0, "grad_norm": 1.5259567499160767, "learning_rate": 3.588987217305801e-07, "loss": 0.118, "step": 5600 }, { "epoch": 7.0, "grad_norm": 18.31442642211914, "learning_rate": 3.585475488130355e-07, "loss": 0.0587, "step": 5610 }, { "epoch": 7.0, "grad_norm": 13.407602310180664, "learning_rate": 3.581963758954909e-07, "loss": 0.0457, "step": 5620 }, { "epoch": 7.01, "grad_norm": 4.394553184509277, "learning_rate": 3.5784520297794634e-07, "loss": 0.1332, "step": 5630 }, { "epoch": 7.01, "grad_norm": 6.658379554748535, "learning_rate": 3.574940300604017e-07, "loss": 0.1387, "step": 5640 }, { "epoch": 7.01, "grad_norm": 5.3749098777771, "learning_rate": 3.5714285714285716e-07, "loss": 0.0985, "step": 5650 }, { "epoch": 7.01, "grad_norm": 0.47284817695617676, "learning_rate": 3.5679168422531254e-07, "loss": 0.0569, "step": 5660 }, { "epoch": 7.01, "grad_norm": 18.133363723754883, "learning_rate": 3.564405113077679e-07, "loss": 0.1501, "step": 5670 }, { "epoch": 7.01, "grad_norm": 21.444299697875977, "learning_rate": 3.560893383902233e-07, "loss": 0.1056, "step": 5680 }, { "epoch": 7.01, "grad_norm": 0.5516760945320129, "learning_rate": 3.557381654726787e-07, "loss": 0.1523, "step": 5690 }, { "epoch": 7.01, "grad_norm": 3.2379817962646484, "learning_rate": 3.5538699255513417e-07, "loss": 0.0393, "step": 5700 }, { "epoch": 7.01, "grad_norm": 0.22667652368545532, "learning_rate": 3.5503581963758955e-07, "loss": 0.0911, "step": 5710 }, { "epoch": 7.01, "grad_norm": 15.277908325195312, "learning_rate": 3.5468464672004494e-07, "loss": 0.0793, "step": 5720 }, { "epoch": 7.01, "grad_norm": 9.234783172607422, "learning_rate": 3.543334738025003e-07, "loss": 0.0859, "step": 5730 }, { "epoch": 7.01, "grad_norm": 17.580434799194336, "learning_rate": 3.5398230088495575e-07, "loss": 0.0891, "step": 5740 }, { "epoch": 7.01, "grad_norm": 1.048767328262329, "learning_rate": 3.5363112796741113e-07, "loss": 0.0807, "step": 5750 }, { "epoch": 7.01, "grad_norm": 21.72126579284668, "learning_rate": 3.532799550498665e-07, "loss": 0.221, "step": 5760 }, { "epoch": 7.01, "grad_norm": 10.846237182617188, "learning_rate": 3.529287821323219e-07, "loss": 0.1001, "step": 5770 }, { "epoch": 7.01, "grad_norm": 1.2319902181625366, "learning_rate": 3.525776092147774e-07, "loss": 0.1441, "step": 5780 }, { "epoch": 7.02, "grad_norm": 0.39791321754455566, "learning_rate": 3.5222643629723277e-07, "loss": 0.0565, "step": 5790 }, { "epoch": 7.02, "grad_norm": 3.3046720027923584, "learning_rate": 3.5187526337968815e-07, "loss": 0.0625, "step": 5800 }, { "epoch": 7.02, "grad_norm": 1.0815168619155884, "learning_rate": 3.5152409046214353e-07, "loss": 0.0853, "step": 5810 }, { "epoch": 7.02, "grad_norm": 1.5392457246780396, "learning_rate": 3.511729175445989e-07, "loss": 0.0944, "step": 5820 }, { "epoch": 7.02, "grad_norm": 1.9713202714920044, "learning_rate": 3.5082174462705435e-07, "loss": 0.1026, "step": 5830 }, { "epoch": 7.02, "grad_norm": 6.2235822677612305, "learning_rate": 3.5047057170950973e-07, "loss": 0.0609, "step": 5840 }, { "epoch": 7.02, "grad_norm": 0.4617595672607422, "learning_rate": 3.5011939879196517e-07, "loss": 0.0391, "step": 5850 }, { "epoch": 7.02, "grad_norm": 9.897273063659668, "learning_rate": 3.4976822587442055e-07, "loss": 0.0993, "step": 5860 }, { "epoch": 7.02, "grad_norm": 0.3265884518623352, "learning_rate": 3.49417052956876e-07, "loss": 0.0614, "step": 5870 }, { "epoch": 7.02, "grad_norm": 6.382578372955322, "learning_rate": 3.4906588003933136e-07, "loss": 0.1019, "step": 5880 }, { "epoch": 7.02, "grad_norm": 16.531150817871094, "learning_rate": 3.4871470712178675e-07, "loss": 0.0495, "step": 5890 }, { "epoch": 7.02, "grad_norm": 24.417316436767578, "learning_rate": 3.4836353420424213e-07, "loss": 0.1218, "step": 5900 }, { "epoch": 7.02, "grad_norm": 1.672906756401062, "learning_rate": 3.480123612866975e-07, "loss": 0.0404, "step": 5910 }, { "epoch": 7.02, "grad_norm": 14.079318046569824, "learning_rate": 3.47661188369153e-07, "loss": 0.1474, "step": 5920 }, { "epoch": 7.02, "grad_norm": 7.481205463409424, "learning_rate": 3.473100154516084e-07, "loss": 0.1105, "step": 5930 }, { "epoch": 7.03, "grad_norm": 11.99561595916748, "learning_rate": 3.4695884253406376e-07, "loss": 0.1116, "step": 5940 }, { "epoch": 7.03, "grad_norm": 20.05514144897461, "learning_rate": 3.4660766961651914e-07, "loss": 0.1012, "step": 5950 }, { "epoch": 7.03, "grad_norm": 18.203298568725586, "learning_rate": 3.462564966989746e-07, "loss": 0.115, "step": 5960 }, { "epoch": 7.03, "grad_norm": 1.3069722652435303, "learning_rate": 3.4590532378142996e-07, "loss": 0.0857, "step": 5970 }, { "epoch": 7.03, "grad_norm": 20.007654190063477, "learning_rate": 3.4555415086388534e-07, "loss": 0.0795, "step": 5980 }, { "epoch": 7.03, "grad_norm": 5.2448625564575195, "learning_rate": 3.452029779463408e-07, "loss": 0.0581, "step": 5990 }, { "epoch": 7.03, "grad_norm": 9.269058227539062, "learning_rate": 3.4485180502879616e-07, "loss": 0.1227, "step": 6000 }, { "epoch": 7.03, "grad_norm": 0.682830810546875, "learning_rate": 3.445006321112516e-07, "loss": 0.0992, "step": 6010 }, { "epoch": 7.03, "grad_norm": 1.8294386863708496, "learning_rate": 3.44149459193707e-07, "loss": 0.1366, "step": 6020 }, { "epoch": 7.03, "grad_norm": 19.818981170654297, "learning_rate": 3.4379828627616236e-07, "loss": 0.1784, "step": 6030 }, { "epoch": 7.03, "grad_norm": 5.310610294342041, "learning_rate": 3.4344711335861774e-07, "loss": 0.0789, "step": 6040 }, { "epoch": 7.03, "grad_norm": 1.281659483909607, "learning_rate": 3.430959404410732e-07, "loss": 0.0562, "step": 6050 }, { "epoch": 7.03, "grad_norm": 20.616718292236328, "learning_rate": 3.4274476752352856e-07, "loss": 0.0919, "step": 6060 }, { "epoch": 7.03, "grad_norm": 6.146024703979492, "learning_rate": 3.42393594605984e-07, "loss": 0.1585, "step": 6070 }, { "epoch": 7.03, "grad_norm": 1.6043559312820435, "learning_rate": 3.420424216884394e-07, "loss": 0.1108, "step": 6080 }, { "epoch": 7.03, "grad_norm": 5.046266555786133, "learning_rate": 3.4169124877089476e-07, "loss": 0.0845, "step": 6090 }, { "epoch": 7.04, "grad_norm": 1.7299400568008423, "learning_rate": 3.413400758533502e-07, "loss": 0.07, "step": 6100 }, { "epoch": 7.04, "grad_norm": 1.9915882349014282, "learning_rate": 3.4098890293580557e-07, "loss": 0.0513, "step": 6110 }, { "epoch": 7.04, "grad_norm": 6.278566360473633, "learning_rate": 3.4063773001826095e-07, "loss": 0.1365, "step": 6120 }, { "epoch": 7.04, "grad_norm": 1.8473691940307617, "learning_rate": 3.4028655710071634e-07, "loss": 0.0507, "step": 6130 }, { "epoch": 7.04, "grad_norm": 1.002000331878662, "learning_rate": 3.399353841831718e-07, "loss": 0.0927, "step": 6140 }, { "epoch": 7.04, "grad_norm": 12.666775703430176, "learning_rate": 3.395842112656272e-07, "loss": 0.0833, "step": 6150 }, { "epoch": 7.04, "grad_norm": 17.823122024536133, "learning_rate": 3.392330383480826e-07, "loss": 0.1392, "step": 6160 }, { "epoch": 7.04, "grad_norm": 25.339889526367188, "learning_rate": 3.3888186543053797e-07, "loss": 0.0635, "step": 6170 }, { "epoch": 7.04, "grad_norm": 5.442197322845459, "learning_rate": 3.3853069251299335e-07, "loss": 0.1579, "step": 6180 }, { "epoch": 7.04, "grad_norm": 19.960437774658203, "learning_rate": 3.381795195954488e-07, "loss": 0.1534, "step": 6190 }, { "epoch": 7.04, "grad_norm": 18.592321395874023, "learning_rate": 3.3782834667790417e-07, "loss": 0.1056, "step": 6200 }, { "epoch": 7.04, "grad_norm": 1.3825984001159668, "learning_rate": 3.374771737603596e-07, "loss": 0.0526, "step": 6210 }, { "epoch": 7.04, "grad_norm": 2.896301507949829, "learning_rate": 3.37126000842815e-07, "loss": 0.1203, "step": 6220 }, { "epoch": 7.04, "grad_norm": 14.241394996643066, "learning_rate": 3.367748279252704e-07, "loss": 0.2088, "step": 6230 }, { "epoch": 7.04, "grad_norm": 21.98098373413086, "learning_rate": 3.364236550077258e-07, "loss": 0.0971, "step": 6240 }, { "epoch": 7.04, "grad_norm": 12.069419860839844, "learning_rate": 3.360724820901812e-07, "loss": 0.1374, "step": 6250 }, { "epoch": 7.05, "grad_norm": 5.324819087982178, "learning_rate": 3.3572130917263657e-07, "loss": 0.1505, "step": 6260 }, { "epoch": 7.05, "grad_norm": 20.613985061645508, "learning_rate": 3.3537013625509195e-07, "loss": 0.15, "step": 6270 }, { "epoch": 7.05, "grad_norm": 6.698380470275879, "learning_rate": 3.350189633375474e-07, "loss": 0.1052, "step": 6280 }, { "epoch": 7.05, "grad_norm": 3.5656676292419434, "learning_rate": 3.346677904200028e-07, "loss": 0.0647, "step": 6290 }, { "epoch": 7.05, "grad_norm": 1.21132230758667, "learning_rate": 3.343166175024582e-07, "loss": 0.1096, "step": 6300 }, { "epoch": 7.05, "grad_norm": 5.2367706298828125, "learning_rate": 3.339654445849136e-07, "loss": 0.086, "step": 6310 }, { "epoch": 7.05, "grad_norm": 15.429601669311523, "learning_rate": 3.33614271667369e-07, "loss": 0.0644, "step": 6320 }, { "epoch": 7.05, "grad_norm": 2.4735891819000244, "learning_rate": 3.332630987498244e-07, "loss": 0.1211, "step": 6330 }, { "epoch": 7.05, "eval_accuracy": 0.9086836664369401, "eval_loss": 0.255357027053833, "eval_precision": 0.9561538461538461, "eval_recall": 0.8566505858028945, "eval_runtime": 732.6844, "eval_samples_per_second": 3.961, "eval_steps_per_second": 0.284, "step": 6336 }, { "epoch": 8.0, "grad_norm": 4.287319183349609, "learning_rate": 3.329119258322798e-07, "loss": 0.1059, "step": 6340 }, { "epoch": 8.0, "grad_norm": 4.341545581817627, "learning_rate": 3.3256075291473516e-07, "loss": 0.0767, "step": 6350 }, { "epoch": 8.0, "grad_norm": 0.42396020889282227, "learning_rate": 3.3220957999719065e-07, "loss": 0.0815, "step": 6360 }, { "epoch": 8.0, "grad_norm": 1.964640736579895, "learning_rate": 3.3185840707964603e-07, "loss": 0.1434, "step": 6370 }, { "epoch": 8.0, "grad_norm": 21.740081787109375, "learning_rate": 3.315072341621014e-07, "loss": 0.0831, "step": 6380 }, { "epoch": 8.0, "grad_norm": 2.9163291454315186, "learning_rate": 3.311560612445568e-07, "loss": 0.072, "step": 6390 }, { "epoch": 8.0, "grad_norm": 22.2987117767334, "learning_rate": 3.308048883270122e-07, "loss": 0.0901, "step": 6400 }, { "epoch": 8.0, "grad_norm": 2.1585073471069336, "learning_rate": 3.304537154094676e-07, "loss": 0.1003, "step": 6410 }, { "epoch": 8.01, "grad_norm": 2.7472150325775146, "learning_rate": 3.30102542491923e-07, "loss": 0.1212, "step": 6420 }, { "epoch": 8.01, "grad_norm": 9.646137237548828, "learning_rate": 3.2975136957437843e-07, "loss": 0.0777, "step": 6430 }, { "epoch": 8.01, "grad_norm": 21.681188583374023, "learning_rate": 3.294001966568338e-07, "loss": 0.1072, "step": 6440 }, { "epoch": 8.01, "grad_norm": 27.498842239379883, "learning_rate": 3.2904902373928925e-07, "loss": 0.073, "step": 6450 }, { "epoch": 8.01, "grad_norm": 13.911131858825684, "learning_rate": 3.2869785082174463e-07, "loss": 0.0868, "step": 6460 }, { "epoch": 8.01, "grad_norm": 1.294236421585083, "learning_rate": 3.283466779042e-07, "loss": 0.0836, "step": 6470 }, { "epoch": 8.01, "grad_norm": 17.68450927734375, "learning_rate": 3.279955049866554e-07, "loss": 0.1247, "step": 6480 }, { "epoch": 8.01, "grad_norm": 1.88713800907135, "learning_rate": 3.276443320691108e-07, "loss": 0.0233, "step": 6490 }, { "epoch": 8.01, "grad_norm": 0.5428490042686462, "learning_rate": 3.272931591515662e-07, "loss": 0.0414, "step": 6500 }, { "epoch": 8.01, "grad_norm": 2.0133981704711914, "learning_rate": 3.2694198623402164e-07, "loss": 0.0836, "step": 6510 }, { "epoch": 8.01, "grad_norm": 9.137702941894531, "learning_rate": 3.26590813316477e-07, "loss": 0.0756, "step": 6520 }, { "epoch": 8.01, "grad_norm": 18.93169403076172, "learning_rate": 3.262396403989324e-07, "loss": 0.1789, "step": 6530 }, { "epoch": 8.01, "grad_norm": 6.524487495422363, "learning_rate": 3.2588846748138784e-07, "loss": 0.0706, "step": 6540 }, { "epoch": 8.01, "grad_norm": 0.6078960299491882, "learning_rate": 3.255372945638432e-07, "loss": 0.0839, "step": 6550 }, { "epoch": 8.01, "grad_norm": 1.22637939453125, "learning_rate": 3.251861216462986e-07, "loss": 0.1302, "step": 6560 }, { "epoch": 8.01, "grad_norm": 4.361688137054443, "learning_rate": 3.24834948728754e-07, "loss": 0.1114, "step": 6570 }, { "epoch": 8.02, "grad_norm": 25.066158294677734, "learning_rate": 3.244837758112094e-07, "loss": 0.1415, "step": 6580 }, { "epoch": 8.02, "grad_norm": 0.827136754989624, "learning_rate": 3.2413260289366486e-07, "loss": 0.0514, "step": 6590 }, { "epoch": 8.02, "grad_norm": 0.4990995228290558, "learning_rate": 3.2378142997612024e-07, "loss": 0.089, "step": 6600 }, { "epoch": 8.02, "grad_norm": 35.56826400756836, "learning_rate": 3.234302570585756e-07, "loss": 0.1249, "step": 6610 }, { "epoch": 8.02, "grad_norm": 12.660245895385742, "learning_rate": 3.23079084141031e-07, "loss": 0.1474, "step": 6620 }, { "epoch": 8.02, "grad_norm": 14.677192687988281, "learning_rate": 3.2272791122348644e-07, "loss": 0.1264, "step": 6630 }, { "epoch": 8.02, "grad_norm": 1.2272533178329468, "learning_rate": 3.223767383059418e-07, "loss": 0.0882, "step": 6640 }, { "epoch": 8.02, "grad_norm": 3.515258550643921, "learning_rate": 3.2202556538839725e-07, "loss": 0.1178, "step": 6650 }, { "epoch": 8.02, "grad_norm": 36.15704345703125, "learning_rate": 3.2167439247085264e-07, "loss": 0.0388, "step": 6660 }, { "epoch": 8.02, "grad_norm": 4.137545108795166, "learning_rate": 3.21323219553308e-07, "loss": 0.0976, "step": 6670 }, { "epoch": 8.02, "grad_norm": 8.47613525390625, "learning_rate": 3.2097204663576345e-07, "loss": 0.1086, "step": 6680 }, { "epoch": 8.02, "grad_norm": 4.984015464782715, "learning_rate": 3.2062087371821884e-07, "loss": 0.0947, "step": 6690 }, { "epoch": 8.02, "grad_norm": 61.828582763671875, "learning_rate": 3.202697008006742e-07, "loss": 0.1438, "step": 6700 }, { "epoch": 8.02, "grad_norm": 1.4810198545455933, "learning_rate": 3.199185278831296e-07, "loss": 0.0785, "step": 6710 }, { "epoch": 8.02, "grad_norm": 20.765823364257812, "learning_rate": 3.1956735496558503e-07, "loss": 0.1147, "step": 6720 }, { "epoch": 8.02, "grad_norm": 17.071489334106445, "learning_rate": 3.1921618204804047e-07, "loss": 0.1129, "step": 6730 }, { "epoch": 8.03, "grad_norm": 0.20859764516353607, "learning_rate": 3.1886500913049585e-07, "loss": 0.0829, "step": 6740 }, { "epoch": 8.03, "grad_norm": 1.041981816291809, "learning_rate": 3.1851383621295123e-07, "loss": 0.1526, "step": 6750 }, { "epoch": 8.03, "grad_norm": 3.4200479984283447, "learning_rate": 3.1816266329540667e-07, "loss": 0.1141, "step": 6760 }, { "epoch": 8.03, "grad_norm": 0.3675917685031891, "learning_rate": 3.1781149037786205e-07, "loss": 0.1049, "step": 6770 }, { "epoch": 8.03, "grad_norm": 2.9936625957489014, "learning_rate": 3.1746031746031743e-07, "loss": 0.0662, "step": 6780 }, { "epoch": 8.03, "grad_norm": 28.420068740844727, "learning_rate": 3.171091445427728e-07, "loss": 0.1405, "step": 6790 }, { "epoch": 8.03, "grad_norm": 4.97244930267334, "learning_rate": 3.1675797162522825e-07, "loss": 0.1058, "step": 6800 }, { "epoch": 8.03, "grad_norm": 14.396577835083008, "learning_rate": 3.164067987076837e-07, "loss": 0.0677, "step": 6810 }, { "epoch": 8.03, "grad_norm": 16.645532608032227, "learning_rate": 3.1605562579013907e-07, "loss": 0.0805, "step": 6820 }, { "epoch": 8.03, "grad_norm": 1.6053117513656616, "learning_rate": 3.1570445287259445e-07, "loss": 0.0487, "step": 6830 }, { "epoch": 8.03, "grad_norm": 14.959306716918945, "learning_rate": 3.1535327995504983e-07, "loss": 0.067, "step": 6840 }, { "epoch": 8.03, "grad_norm": 28.669815063476562, "learning_rate": 3.1500210703750526e-07, "loss": 0.1044, "step": 6850 }, { "epoch": 8.03, "grad_norm": 0.3287888467311859, "learning_rate": 3.1465093411996065e-07, "loss": 0.1131, "step": 6860 }, { "epoch": 8.03, "grad_norm": 13.309894561767578, "learning_rate": 3.142997612024161e-07, "loss": 0.1029, "step": 6870 }, { "epoch": 8.03, "grad_norm": 4.255095958709717, "learning_rate": 3.1394858828487146e-07, "loss": 0.1391, "step": 6880 }, { "epoch": 8.04, "grad_norm": 9.072338104248047, "learning_rate": 3.1359741536732684e-07, "loss": 0.0983, "step": 6890 }, { "epoch": 8.04, "grad_norm": 0.8332878351211548, "learning_rate": 3.132462424497823e-07, "loss": 0.0736, "step": 6900 }, { "epoch": 8.04, "grad_norm": 0.2669464647769928, "learning_rate": 3.1289506953223766e-07, "loss": 0.0355, "step": 6910 }, { "epoch": 8.04, "grad_norm": 1.5229275226593018, "learning_rate": 3.1254389661469304e-07, "loss": 0.0583, "step": 6920 }, { "epoch": 8.04, "grad_norm": 13.594375610351562, "learning_rate": 3.121927236971484e-07, "loss": 0.1292, "step": 6930 }, { "epoch": 8.04, "grad_norm": 11.555625915527344, "learning_rate": 3.118415507796039e-07, "loss": 0.1764, "step": 6940 }, { "epoch": 8.04, "grad_norm": 5.248417854309082, "learning_rate": 3.114903778620593e-07, "loss": 0.0616, "step": 6950 }, { "epoch": 8.04, "grad_norm": 18.84252166748047, "learning_rate": 3.111392049445147e-07, "loss": 0.0481, "step": 6960 }, { "epoch": 8.04, "grad_norm": 0.2787688970565796, "learning_rate": 3.1078803202697006e-07, "loss": 0.1009, "step": 6970 }, { "epoch": 8.04, "grad_norm": 15.687891960144043, "learning_rate": 3.1043685910942544e-07, "loss": 0.0652, "step": 6980 }, { "epoch": 8.04, "grad_norm": 3.702401876449585, "learning_rate": 3.100856861918809e-07, "loss": 0.0598, "step": 6990 }, { "epoch": 8.04, "grad_norm": 4.519927501678467, "learning_rate": 3.0973451327433626e-07, "loss": 0.1266, "step": 7000 }, { "epoch": 8.04, "grad_norm": 15.591084480285645, "learning_rate": 3.0938334035679164e-07, "loss": 0.1314, "step": 7010 }, { "epoch": 8.04, "grad_norm": 21.9891357421875, "learning_rate": 3.090321674392471e-07, "loss": 0.1293, "step": 7020 }, { "epoch": 8.04, "grad_norm": 0.6028394103050232, "learning_rate": 3.086809945217025e-07, "loss": 0.0403, "step": 7030 }, { "epoch": 8.04, "grad_norm": 5.40667200088501, "learning_rate": 3.083298216041579e-07, "loss": 0.1004, "step": 7040 }, { "epoch": 8.05, "grad_norm": 1.0436077117919922, "learning_rate": 3.0797864868661327e-07, "loss": 0.1037, "step": 7050 }, { "epoch": 8.05, "grad_norm": 9.54244327545166, "learning_rate": 3.0762747576906865e-07, "loss": 0.0796, "step": 7060 }, { "epoch": 8.05, "grad_norm": 67.03922271728516, "learning_rate": 3.0727630285152404e-07, "loss": 0.1056, "step": 7070 }, { "epoch": 8.05, "grad_norm": 14.440448760986328, "learning_rate": 3.0692512993397947e-07, "loss": 0.0279, "step": 7080 }, { "epoch": 8.05, "grad_norm": 0.8730477094650269, "learning_rate": 3.065739570164349e-07, "loss": 0.0981, "step": 7090 }, { "epoch": 8.05, "grad_norm": 1.702113151550293, "learning_rate": 3.062227840988903e-07, "loss": 0.1088, "step": 7100 }, { "epoch": 8.05, "grad_norm": 2.820322036743164, "learning_rate": 3.0587161118134567e-07, "loss": 0.1205, "step": 7110 }, { "epoch": 8.05, "grad_norm": 4.130664348602295, "learning_rate": 3.055204382638011e-07, "loss": 0.076, "step": 7120 }, { "epoch": 8.05, "eval_accuracy": 0.9062715368711234, "eval_loss": 0.2738107144832611, "eval_precision": 0.9587548638132296, "eval_recall": 0.8490696071674707, "eval_runtime": 656.4723, "eval_samples_per_second": 4.421, "eval_steps_per_second": 0.317, "step": 7128 }, { "epoch": 9.0, "grad_norm": 3.000011682510376, "learning_rate": 3.051692653462565e-07, "loss": 0.1622, "step": 7130 }, { "epoch": 9.0, "grad_norm": 0.39611920714378357, "learning_rate": 3.0481809242871187e-07, "loss": 0.0904, "step": 7140 }, { "epoch": 9.0, "grad_norm": 21.985692977905273, "learning_rate": 3.0446691951116725e-07, "loss": 0.0969, "step": 7150 }, { "epoch": 9.0, "grad_norm": 6.75694465637207, "learning_rate": 3.0411574659362274e-07, "loss": 0.0334, "step": 7160 }, { "epoch": 9.0, "grad_norm": 0.8904764652252197, "learning_rate": 3.037645736760781e-07, "loss": 0.1156, "step": 7170 }, { "epoch": 9.0, "grad_norm": 1.2181483507156372, "learning_rate": 3.034134007585335e-07, "loss": 0.0557, "step": 7180 }, { "epoch": 9.0, "grad_norm": 8.484641075134277, "learning_rate": 3.030622278409889e-07, "loss": 0.088, "step": 7190 }, { "epoch": 9.0, "grad_norm": 6.985033988952637, "learning_rate": 3.0271105492344427e-07, "loss": 0.0935, "step": 7200 }, { "epoch": 9.01, "grad_norm": 4.86383581161499, "learning_rate": 3.023598820058997e-07, "loss": 0.0624, "step": 7210 }, { "epoch": 9.01, "grad_norm": 29.57334327697754, "learning_rate": 3.020087090883551e-07, "loss": 0.1027, "step": 7220 }, { "epoch": 9.01, "grad_norm": 1.6081123352050781, "learning_rate": 3.0165753617081047e-07, "loss": 0.017, "step": 7230 }, { "epoch": 9.01, "grad_norm": 17.083829879760742, "learning_rate": 3.013063632532659e-07, "loss": 0.096, "step": 7240 }, { "epoch": 9.01, "grad_norm": 1.8939573764801025, "learning_rate": 3.0095519033572133e-07, "loss": 0.0629, "step": 7250 }, { "epoch": 9.01, "grad_norm": 3.828808307647705, "learning_rate": 3.006040174181767e-07, "loss": 0.0323, "step": 7260 }, { "epoch": 9.01, "grad_norm": 0.9476205110549927, "learning_rate": 3.002528445006321e-07, "loss": 0.1058, "step": 7270 }, { "epoch": 9.01, "grad_norm": 0.6929569840431213, "learning_rate": 2.999016715830875e-07, "loss": 0.0234, "step": 7280 }, { "epoch": 9.01, "grad_norm": 15.474339485168457, "learning_rate": 2.9955049866554286e-07, "loss": 0.1038, "step": 7290 }, { "epoch": 9.01, "grad_norm": 10.778635025024414, "learning_rate": 2.991993257479983e-07, "loss": 0.0533, "step": 7300 }, { "epoch": 9.01, "grad_norm": 32.16393280029297, "learning_rate": 2.9884815283045373e-07, "loss": 0.1378, "step": 7310 }, { "epoch": 9.01, "grad_norm": 35.20622634887695, "learning_rate": 2.984969799129091e-07, "loss": 0.1584, "step": 7320 }, { "epoch": 9.01, "grad_norm": 0.6140238046646118, "learning_rate": 2.981458069953645e-07, "loss": 0.0666, "step": 7330 }, { "epoch": 9.01, "grad_norm": 0.3121790885925293, "learning_rate": 2.9779463407781993e-07, "loss": 0.0661, "step": 7340 }, { "epoch": 9.01, "grad_norm": 24.16884994506836, "learning_rate": 2.974434611602753e-07, "loss": 0.1045, "step": 7350 }, { "epoch": 9.01, "grad_norm": 6.619697093963623, "learning_rate": 2.970922882427307e-07, "loss": 0.0708, "step": 7360 }, { "epoch": 9.02, "grad_norm": 8.28366470336914, "learning_rate": 2.967411153251861e-07, "loss": 0.0544, "step": 7370 }, { "epoch": 9.02, "grad_norm": 1.1260850429534912, "learning_rate": 2.963899424076415e-07, "loss": 0.0356, "step": 7380 }, { "epoch": 9.02, "grad_norm": 1.715057373046875, "learning_rate": 2.9603876949009695e-07, "loss": 0.0378, "step": 7390 }, { "epoch": 9.02, "grad_norm": 19.292835235595703, "learning_rate": 2.9568759657255233e-07, "loss": 0.1163, "step": 7400 }, { "epoch": 9.02, "grad_norm": 18.853715896606445, "learning_rate": 2.953364236550077e-07, "loss": 0.0258, "step": 7410 }, { "epoch": 9.02, "grad_norm": 22.08260154724121, "learning_rate": 2.949852507374631e-07, "loss": 0.1199, "step": 7420 }, { "epoch": 9.02, "grad_norm": 25.916072845458984, "learning_rate": 2.9463407781991853e-07, "loss": 0.1398, "step": 7430 }, { "epoch": 9.02, "grad_norm": 21.899497985839844, "learning_rate": 2.942829049023739e-07, "loss": 0.0447, "step": 7440 }, { "epoch": 9.02, "grad_norm": 5.647512435913086, "learning_rate": 2.939317319848293e-07, "loss": 0.1623, "step": 7450 }, { "epoch": 9.02, "grad_norm": 10.745491027832031, "learning_rate": 2.935805590672847e-07, "loss": 0.0976, "step": 7460 }, { "epoch": 9.02, "grad_norm": 15.636648178100586, "learning_rate": 2.932293861497401e-07, "loss": 0.0189, "step": 7470 }, { "epoch": 9.02, "grad_norm": 18.907142639160156, "learning_rate": 2.9287821323219554e-07, "loss": 0.0422, "step": 7480 }, { "epoch": 9.02, "grad_norm": 3.067370891571045, "learning_rate": 2.925270403146509e-07, "loss": 0.0982, "step": 7490 }, { "epoch": 9.02, "grad_norm": 1.030925989151001, "learning_rate": 2.921758673971063e-07, "loss": 0.0815, "step": 7500 }, { "epoch": 9.02, "grad_norm": 5.1749162673950195, "learning_rate": 2.918246944795617e-07, "loss": 0.093, "step": 7510 }, { "epoch": 9.02, "grad_norm": 21.443838119506836, "learning_rate": 2.914735215620171e-07, "loss": 0.0834, "step": 7520 }, { "epoch": 9.03, "grad_norm": 32.34575653076172, "learning_rate": 2.9112234864447256e-07, "loss": 0.1081, "step": 7530 }, { "epoch": 9.03, "grad_norm": 28.232345581054688, "learning_rate": 2.9077117572692794e-07, "loss": 0.1281, "step": 7540 }, { "epoch": 9.03, "grad_norm": 15.796542167663574, "learning_rate": 2.904200028093833e-07, "loss": 0.0865, "step": 7550 }, { "epoch": 9.03, "grad_norm": 21.654006958007812, "learning_rate": 2.900688298918387e-07, "loss": 0.1527, "step": 7560 }, { "epoch": 9.03, "grad_norm": 0.21499530971050262, "learning_rate": 2.8971765697429414e-07, "loss": 0.0654, "step": 7570 }, { "epoch": 9.03, "grad_norm": 13.48742961883545, "learning_rate": 2.893664840567495e-07, "loss": 0.0844, "step": 7580 }, { "epoch": 9.03, "grad_norm": 2.3621346950531006, "learning_rate": 2.890153111392049e-07, "loss": 0.0813, "step": 7590 }, { "epoch": 9.03, "grad_norm": 0.2501245439052582, "learning_rate": 2.8866413822166034e-07, "loss": 0.0917, "step": 7600 }, { "epoch": 9.03, "grad_norm": 0.7612245082855225, "learning_rate": 2.8831296530411577e-07, "loss": 0.0683, "step": 7610 }, { "epoch": 9.03, "grad_norm": 36.79018783569336, "learning_rate": 2.8796179238657115e-07, "loss": 0.0773, "step": 7620 }, { "epoch": 9.03, "grad_norm": 0.5289931893348694, "learning_rate": 2.8761061946902654e-07, "loss": 0.1101, "step": 7630 }, { "epoch": 9.03, "grad_norm": 17.04367446899414, "learning_rate": 2.872594465514819e-07, "loss": 0.1634, "step": 7640 }, { "epoch": 9.03, "grad_norm": 14.436210632324219, "learning_rate": 2.869082736339373e-07, "loss": 0.0889, "step": 7650 }, { "epoch": 9.03, "grad_norm": 30.680509567260742, "learning_rate": 2.8655710071639273e-07, "loss": 0.0631, "step": 7660 }, { "epoch": 9.03, "grad_norm": 1.2134703397750854, "learning_rate": 2.862059277988481e-07, "loss": 0.1903, "step": 7670 }, { "epoch": 9.03, "grad_norm": 0.43618157505989075, "learning_rate": 2.8585475488130355e-07, "loss": 0.1265, "step": 7680 }, { "epoch": 9.04, "grad_norm": 19.14188003540039, "learning_rate": 2.8550358196375893e-07, "loss": 0.0773, "step": 7690 }, { "epoch": 9.04, "grad_norm": 6.0784687995910645, "learning_rate": 2.8515240904621437e-07, "loss": 0.0915, "step": 7700 }, { "epoch": 9.04, "grad_norm": 13.971697807312012, "learning_rate": 2.8480123612866975e-07, "loss": 0.1321, "step": 7710 }, { "epoch": 9.04, "grad_norm": 56.78779983520508, "learning_rate": 2.8445006321112513e-07, "loss": 0.1274, "step": 7720 }, { "epoch": 9.04, "grad_norm": 0.6677742004394531, "learning_rate": 2.840988902935805e-07, "loss": 0.0707, "step": 7730 }, { "epoch": 9.04, "grad_norm": 1.6670889854431152, "learning_rate": 2.837477173760359e-07, "loss": 0.0934, "step": 7740 }, { "epoch": 9.04, "grad_norm": 3.2662906646728516, "learning_rate": 2.833965444584914e-07, "loss": 0.235, "step": 7750 }, { "epoch": 9.04, "grad_norm": 5.112877368927002, "learning_rate": 2.8304537154094677e-07, "loss": 0.0414, "step": 7760 }, { "epoch": 9.04, "grad_norm": 47.451412200927734, "learning_rate": 2.8269419862340215e-07, "loss": 0.1443, "step": 7770 }, { "epoch": 9.04, "grad_norm": 6.293603420257568, "learning_rate": 2.8234302570585753e-07, "loss": 0.1102, "step": 7780 }, { "epoch": 9.04, "grad_norm": 22.858219146728516, "learning_rate": 2.8199185278831296e-07, "loss": 0.0803, "step": 7790 }, { "epoch": 9.04, "grad_norm": 1.3502225875854492, "learning_rate": 2.8164067987076835e-07, "loss": 0.0491, "step": 7800 }, { "epoch": 9.04, "grad_norm": 23.52175521850586, "learning_rate": 2.8128950695322373e-07, "loss": 0.0862, "step": 7810 }, { "epoch": 9.04, "grad_norm": 20.446243286132812, "learning_rate": 2.8093833403567916e-07, "loss": 0.0578, "step": 7820 }, { "epoch": 9.04, "grad_norm": 1.311772108078003, "learning_rate": 2.805871611181346e-07, "loss": 0.1186, "step": 7830 }, { "epoch": 9.05, "grad_norm": 5.1483049392700195, "learning_rate": 2.8023598820059e-07, "loss": 0.0512, "step": 7840 }, { "epoch": 9.05, "grad_norm": 7.511555194854736, "learning_rate": 2.7988481528304536e-07, "loss": 0.1003, "step": 7850 }, { "epoch": 9.05, "grad_norm": 22.577939987182617, "learning_rate": 2.7953364236550074e-07, "loss": 0.0583, "step": 7860 }, { "epoch": 9.05, "grad_norm": 11.192217826843262, "learning_rate": 2.791824694479561e-07, "loss": 0.0703, "step": 7870 }, { "epoch": 9.05, "grad_norm": 28.025941848754883, "learning_rate": 2.7883129653041156e-07, "loss": 0.1285, "step": 7880 }, { "epoch": 9.05, "grad_norm": 0.19005124270915985, "learning_rate": 2.78480123612867e-07, "loss": 0.0282, "step": 7890 }, { "epoch": 9.05, "grad_norm": 6.094644546508789, "learning_rate": 2.781289506953224e-07, "loss": 0.1231, "step": 7900 }, { "epoch": 9.05, "grad_norm": 12.106431007385254, "learning_rate": 2.7777777777777776e-07, "loss": 0.0535, "step": 7910 }, { "epoch": 9.05, "grad_norm": 4.554694652557373, "learning_rate": 2.774266048602332e-07, "loss": 0.1152, "step": 7920 }, { "epoch": 9.05, "eval_accuracy": 0.9090282563749138, "eval_loss": 0.2785201668739319, "eval_precision": 0.9540933435348126, "eval_recall": 0.8594073053066851, "eval_runtime": 804.744, "eval_samples_per_second": 3.606, "eval_steps_per_second": 0.258, "step": 7920 }, { "epoch": 10.0, "grad_norm": 18.617822647094727, "learning_rate": 2.770754319426886e-07, "loss": 0.1494, "step": 7930 }, { "epoch": 10.0, "grad_norm": 0.37271180748939514, "learning_rate": 2.7672425902514396e-07, "loss": 0.0926, "step": 7940 }, { "epoch": 10.0, "grad_norm": 1.2353838682174683, "learning_rate": 2.7637308610759934e-07, "loss": 0.0621, "step": 7950 }, { "epoch": 10.0, "grad_norm": 4.800443172454834, "learning_rate": 2.760219131900547e-07, "loss": 0.0359, "step": 7960 }, { "epoch": 10.0, "grad_norm": 3.866636276245117, "learning_rate": 2.756707402725102e-07, "loss": 0.0532, "step": 7970 }, { "epoch": 10.0, "grad_norm": 0.48045679926872253, "learning_rate": 2.753195673549656e-07, "loss": 0.0937, "step": 7980 }, { "epoch": 10.0, "grad_norm": 22.70956039428711, "learning_rate": 2.7496839443742097e-07, "loss": 0.0797, "step": 7990 }, { "epoch": 10.01, "grad_norm": 19.074684143066406, "learning_rate": 2.7461722151987636e-07, "loss": 0.125, "step": 8000 }, { "epoch": 10.01, "grad_norm": 11.377254486083984, "learning_rate": 2.742660486023318e-07, "loss": 0.0171, "step": 8010 }, { "epoch": 10.01, "grad_norm": 30.800861358642578, "learning_rate": 2.7391487568478717e-07, "loss": 0.1278, "step": 8020 }, { "epoch": 10.01, "grad_norm": 25.61414337158203, "learning_rate": 2.7356370276724255e-07, "loss": 0.1644, "step": 8030 }, { "epoch": 10.01, "grad_norm": 0.699822187423706, "learning_rate": 2.73212529849698e-07, "loss": 0.1147, "step": 8040 }, { "epoch": 10.01, "grad_norm": 1.4131821393966675, "learning_rate": 2.7286135693215337e-07, "loss": 0.0751, "step": 8050 }, { "epoch": 10.01, "grad_norm": 10.666711807250977, "learning_rate": 2.725101840146088e-07, "loss": 0.181, "step": 8060 }, { "epoch": 10.01, "grad_norm": 2.035438299179077, "learning_rate": 2.721590110970642e-07, "loss": 0.046, "step": 8070 }, { "epoch": 10.01, "grad_norm": 36.81755065917969, "learning_rate": 2.7180783817951957e-07, "loss": 0.0436, "step": 8080 }, { "epoch": 10.01, "grad_norm": 9.720380783081055, "learning_rate": 2.7145666526197495e-07, "loss": 0.0856, "step": 8090 }, { "epoch": 10.01, "grad_norm": 1.0668559074401855, "learning_rate": 2.711054923444304e-07, "loss": 0.0162, "step": 8100 }, { "epoch": 10.01, "grad_norm": 27.140485763549805, "learning_rate": 2.707543194268858e-07, "loss": 0.0645, "step": 8110 }, { "epoch": 10.01, "grad_norm": 16.283849716186523, "learning_rate": 2.704031465093412e-07, "loss": 0.1413, "step": 8120 }, { "epoch": 10.01, "grad_norm": 2.894906759262085, "learning_rate": 2.700519735917966e-07, "loss": 0.064, "step": 8130 }, { "epoch": 10.01, "grad_norm": 1.825907588005066, "learning_rate": 2.69700800674252e-07, "loss": 0.1597, "step": 8140 }, { "epoch": 10.01, "grad_norm": 5.189210414886475, "learning_rate": 2.693496277567074e-07, "loss": 0.1502, "step": 8150 }, { "epoch": 10.02, "grad_norm": 0.36072829365730286, "learning_rate": 2.689984548391628e-07, "loss": 0.0481, "step": 8160 }, { "epoch": 10.02, "grad_norm": 16.99314308166504, "learning_rate": 2.6864728192161817e-07, "loss": 0.093, "step": 8170 }, { "epoch": 10.02, "grad_norm": 0.8725453615188599, "learning_rate": 2.6829610900407355e-07, "loss": 0.0755, "step": 8180 }, { "epoch": 10.02, "grad_norm": 18.313819885253906, "learning_rate": 2.6794493608652904e-07, "loss": 0.0633, "step": 8190 }, { "epoch": 10.02, "grad_norm": 27.121206283569336, "learning_rate": 2.675937631689844e-07, "loss": 0.058, "step": 8200 }, { "epoch": 10.02, "grad_norm": 0.23845867812633514, "learning_rate": 2.672425902514398e-07, "loss": 0.0893, "step": 8210 }, { "epoch": 10.02, "grad_norm": 0.2858040928840637, "learning_rate": 2.668914173338952e-07, "loss": 0.0703, "step": 8220 }, { "epoch": 10.02, "grad_norm": 2.559169292449951, "learning_rate": 2.665402444163506e-07, "loss": 0.0389, "step": 8230 }, { "epoch": 10.02, "grad_norm": 27.230844497680664, "learning_rate": 2.66189071498806e-07, "loss": 0.094, "step": 8240 }, { "epoch": 10.02, "grad_norm": 0.22530515491962433, "learning_rate": 2.658378985812614e-07, "loss": 0.0569, "step": 8250 }, { "epoch": 10.02, "grad_norm": 10.439746856689453, "learning_rate": 2.654867256637168e-07, "loss": 0.0688, "step": 8260 }, { "epoch": 10.02, "grad_norm": 17.911014556884766, "learning_rate": 2.651355527461722e-07, "loss": 0.1362, "step": 8270 }, { "epoch": 10.02, "grad_norm": 40.26321029663086, "learning_rate": 2.6478437982862763e-07, "loss": 0.0371, "step": 8280 }, { "epoch": 10.02, "grad_norm": 1.1309014558792114, "learning_rate": 2.64433206911083e-07, "loss": 0.0495, "step": 8290 }, { "epoch": 10.02, "grad_norm": 1.6739460229873657, "learning_rate": 2.640820339935384e-07, "loss": 0.0619, "step": 8300 }, { "epoch": 10.02, "grad_norm": 9.568103790283203, "learning_rate": 2.637308610759938e-07, "loss": 0.0637, "step": 8310 }, { "epoch": 10.03, "grad_norm": 3.935555934906006, "learning_rate": 2.633796881584492e-07, "loss": 0.1149, "step": 8320 }, { "epoch": 10.03, "grad_norm": 15.553072929382324, "learning_rate": 2.6302851524090465e-07, "loss": 0.0506, "step": 8330 }, { "epoch": 10.03, "grad_norm": 0.7506347298622131, "learning_rate": 2.6267734232336003e-07, "loss": 0.0998, "step": 8340 }, { "epoch": 10.03, "grad_norm": 5.43869686126709, "learning_rate": 2.623261694058154e-07, "loss": 0.0365, "step": 8350 }, { "epoch": 10.03, "grad_norm": 1.0938035249710083, "learning_rate": 2.619749964882708e-07, "loss": 0.0657, "step": 8360 }, { "epoch": 10.03, "grad_norm": 0.17898356914520264, "learning_rate": 2.6162382357072623e-07, "loss": 0.0171, "step": 8370 }, { "epoch": 10.03, "grad_norm": 15.54056453704834, "learning_rate": 2.612726506531816e-07, "loss": 0.0459, "step": 8380 }, { "epoch": 10.03, "grad_norm": 30.857336044311523, "learning_rate": 2.60921477735637e-07, "loss": 0.0549, "step": 8390 }, { "epoch": 10.03, "grad_norm": 24.690610885620117, "learning_rate": 2.6057030481809237e-07, "loss": 0.0671, "step": 8400 }, { "epoch": 10.03, "grad_norm": 21.80284881591797, "learning_rate": 2.6021913190054786e-07, "loss": 0.0757, "step": 8410 }, { "epoch": 10.03, "grad_norm": 30.155031204223633, "learning_rate": 2.5986795898300324e-07, "loss": 0.0943, "step": 8420 }, { "epoch": 10.03, "grad_norm": 0.35464924573898315, "learning_rate": 2.595167860654586e-07, "loss": 0.1139, "step": 8430 }, { "epoch": 10.03, "grad_norm": 18.864530563354492, "learning_rate": 2.59165613147914e-07, "loss": 0.1183, "step": 8440 }, { "epoch": 10.03, "grad_norm": 2.6870336532592773, "learning_rate": 2.588144402303694e-07, "loss": 0.1551, "step": 8450 }, { "epoch": 10.03, "grad_norm": 2.52363657951355, "learning_rate": 2.584632673128248e-07, "loss": 0.0627, "step": 8460 }, { "epoch": 10.03, "grad_norm": 14.938979148864746, "learning_rate": 2.581120943952802e-07, "loss": 0.1016, "step": 8470 }, { "epoch": 10.04, "grad_norm": 0.8647470474243164, "learning_rate": 2.5776092147773564e-07, "loss": 0.1908, "step": 8480 }, { "epoch": 10.04, "grad_norm": 5.785952091217041, "learning_rate": 2.57409748560191e-07, "loss": 0.0758, "step": 8490 }, { "epoch": 10.04, "grad_norm": 4.872706890106201, "learning_rate": 2.5705857564264646e-07, "loss": 0.0299, "step": 8500 }, { "epoch": 10.04, "grad_norm": 0.8269332647323608, "learning_rate": 2.5670740272510184e-07, "loss": 0.0786, "step": 8510 }, { "epoch": 10.04, "grad_norm": 4.181182384490967, "learning_rate": 2.563562298075572e-07, "loss": 0.0539, "step": 8520 }, { "epoch": 10.04, "grad_norm": 19.55901527404785, "learning_rate": 2.560050568900126e-07, "loss": 0.1648, "step": 8530 }, { "epoch": 10.04, "grad_norm": 24.398893356323242, "learning_rate": 2.55653883972468e-07, "loss": 0.097, "step": 8540 }, { "epoch": 10.04, "grad_norm": 6.522715091705322, "learning_rate": 2.5530271105492347e-07, "loss": 0.072, "step": 8550 }, { "epoch": 10.04, "grad_norm": 0.2392318993806839, "learning_rate": 2.5495153813737885e-07, "loss": 0.0841, "step": 8560 }, { "epoch": 10.04, "grad_norm": 32.177833557128906, "learning_rate": 2.5460036521983424e-07, "loss": 0.1297, "step": 8570 }, { "epoch": 10.04, "grad_norm": 1.4605242013931274, "learning_rate": 2.542491923022896e-07, "loss": 0.0863, "step": 8580 }, { "epoch": 10.04, "grad_norm": 20.64754295349121, "learning_rate": 2.5389801938474505e-07, "loss": 0.1111, "step": 8590 }, { "epoch": 10.04, "grad_norm": 18.21504020690918, "learning_rate": 2.5354684646720044e-07, "loss": 0.0614, "step": 8600 }, { "epoch": 10.04, "grad_norm": 0.46438068151474, "learning_rate": 2.531956735496558e-07, "loss": 0.006, "step": 8610 }, { "epoch": 10.04, "grad_norm": 13.728133201599121, "learning_rate": 2.528445006321112e-07, "loss": 0.1083, "step": 8620 }, { "epoch": 10.04, "grad_norm": 2.8626413345336914, "learning_rate": 2.524933277145667e-07, "loss": 0.0562, "step": 8630 }, { "epoch": 10.05, "grad_norm": 11.264602661132812, "learning_rate": 2.5214215479702207e-07, "loss": 0.0785, "step": 8640 }, { "epoch": 10.05, "grad_norm": 0.8101587295532227, "learning_rate": 2.5179098187947745e-07, "loss": 0.0897, "step": 8650 }, { "epoch": 10.05, "grad_norm": 2.839077949523926, "learning_rate": 2.5143980896193283e-07, "loss": 0.0539, "step": 8660 }, { "epoch": 10.05, "grad_norm": 64.92765045166016, "learning_rate": 2.510886360443882e-07, "loss": 0.1078, "step": 8670 }, { "epoch": 10.05, "grad_norm": 8.513782501220703, "learning_rate": 2.5073746312684365e-07, "loss": 0.032, "step": 8680 }, { "epoch": 10.05, "grad_norm": 4.084259986877441, "learning_rate": 2.5038629020929903e-07, "loss": 0.0618, "step": 8690 }, { "epoch": 10.05, "grad_norm": 7.893348693847656, "learning_rate": 2.5003511729175447e-07, "loss": 0.0682, "step": 8700 }, { "epoch": 10.05, "grad_norm": 54.61248779296875, "learning_rate": 2.4968394437420985e-07, "loss": 0.0281, "step": 8710 }, { "epoch": 10.05, "eval_accuracy": 0.9117849758787043, "eval_loss": 0.28518858551979065, "eval_precision": 0.9536826119969628, "eval_recall": 0.8656099241902137, "eval_runtime": 727.1952, "eval_samples_per_second": 3.991, "eval_steps_per_second": 0.286, "step": 8712 }, { "epoch": 11.0, "grad_norm": 2.8041207790374756, "learning_rate": 2.493327714566653e-07, "loss": 0.0338, "step": 8720 }, { "epoch": 11.0, "grad_norm": 4.142540454864502, "learning_rate": 2.4898159853912067e-07, "loss": 0.0959, "step": 8730 }, { "epoch": 11.0, "grad_norm": 6.947538375854492, "learning_rate": 2.4863042562157605e-07, "loss": 0.0114, "step": 8740 }, { "epoch": 11.0, "grad_norm": 0.18752264976501465, "learning_rate": 2.482792527040315e-07, "loss": 0.0116, "step": 8750 }, { "epoch": 11.0, "grad_norm": 1.0682921409606934, "learning_rate": 2.4792807978648686e-07, "loss": 0.049, "step": 8760 }, { "epoch": 11.0, "grad_norm": 0.21910032629966736, "learning_rate": 2.4757690686894225e-07, "loss": 0.0842, "step": 8770 }, { "epoch": 11.0, "grad_norm": 0.393189400434494, "learning_rate": 2.4722573395139763e-07, "loss": 0.0417, "step": 8780 }, { "epoch": 11.0, "grad_norm": 5.005003929138184, "learning_rate": 2.4687456103385306e-07, "loss": 0.1165, "step": 8790 }, { "epoch": 11.01, "grad_norm": 0.33924514055252075, "learning_rate": 2.4652338811630844e-07, "loss": 0.1244, "step": 8800 }, { "epoch": 11.01, "grad_norm": 0.29950523376464844, "learning_rate": 2.461722151987639e-07, "loss": 0.0568, "step": 8810 }, { "epoch": 11.01, "grad_norm": 1.7086515426635742, "learning_rate": 2.4582104228121926e-07, "loss": 0.1406, "step": 8820 }, { "epoch": 11.01, "grad_norm": 23.033437728881836, "learning_rate": 2.454698693636747e-07, "loss": 0.0998, "step": 8830 }, { "epoch": 11.01, "grad_norm": 1.1737319231033325, "learning_rate": 2.451186964461301e-07, "loss": 0.1574, "step": 8840 }, { "epoch": 11.01, "grad_norm": 1.4726502895355225, "learning_rate": 2.4476752352858546e-07, "loss": 0.0696, "step": 8850 }, { "epoch": 11.01, "grad_norm": 1.4459102153778076, "learning_rate": 2.444163506110409e-07, "loss": 0.0783, "step": 8860 }, { "epoch": 11.01, "grad_norm": 1.1462914943695068, "learning_rate": 2.440651776934963e-07, "loss": 0.0759, "step": 8870 }, { "epoch": 11.01, "grad_norm": 0.45204418897628784, "learning_rate": 2.4371400477595166e-07, "loss": 0.0913, "step": 8880 }, { "epoch": 11.01, "grad_norm": 22.073589324951172, "learning_rate": 2.4336283185840704e-07, "loss": 0.1566, "step": 8890 }, { "epoch": 11.01, "grad_norm": 0.8241315484046936, "learning_rate": 2.430116589408625e-07, "loss": 0.0992, "step": 8900 }, { "epoch": 11.01, "grad_norm": 14.800032615661621, "learning_rate": 2.4266048602331786e-07, "loss": 0.0745, "step": 8910 }, { "epoch": 11.01, "grad_norm": 25.997074127197266, "learning_rate": 2.423093131057733e-07, "loss": 0.0468, "step": 8920 }, { "epoch": 11.01, "grad_norm": 25.528350830078125, "learning_rate": 2.419581401882287e-07, "loss": 0.1206, "step": 8930 }, { "epoch": 11.01, "grad_norm": 22.306915283203125, "learning_rate": 2.4160696727068406e-07, "loss": 0.0747, "step": 8940 }, { "epoch": 11.02, "grad_norm": 1.3441123962402344, "learning_rate": 2.412557943531395e-07, "loss": 0.0747, "step": 8950 }, { "epoch": 11.02, "grad_norm": 1.5736992359161377, "learning_rate": 2.4090462143559487e-07, "loss": 0.0779, "step": 8960 }, { "epoch": 11.02, "grad_norm": 1.521962285041809, "learning_rate": 2.405534485180503e-07, "loss": 0.0951, "step": 8970 }, { "epoch": 11.02, "grad_norm": 2.4471147060394287, "learning_rate": 2.402022756005057e-07, "loss": 0.0646, "step": 8980 }, { "epoch": 11.02, "grad_norm": 0.1730198711156845, "learning_rate": 2.3985110268296107e-07, "loss": 0.0939, "step": 8990 }, { "epoch": 11.02, "grad_norm": 38.81439971923828, "learning_rate": 2.3949992976541645e-07, "loss": 0.0592, "step": 9000 }, { "epoch": 11.02, "grad_norm": 57.413150787353516, "learning_rate": 2.391487568478719e-07, "loss": 0.0438, "step": 9010 }, { "epoch": 11.02, "grad_norm": 0.8407949805259705, "learning_rate": 2.3879758393032727e-07, "loss": 0.0206, "step": 9020 }, { "epoch": 11.02, "grad_norm": 0.8218994140625, "learning_rate": 2.3844641101278265e-07, "loss": 0.0427, "step": 9030 }, { "epoch": 11.02, "grad_norm": 0.6286032199859619, "learning_rate": 2.3809523809523806e-07, "loss": 0.1261, "step": 9040 }, { "epoch": 11.02, "grad_norm": 0.3803505301475525, "learning_rate": 2.377440651776935e-07, "loss": 0.0809, "step": 9050 }, { "epoch": 11.02, "grad_norm": 4.310997009277344, "learning_rate": 2.3739289226014888e-07, "loss": 0.0746, "step": 9060 }, { "epoch": 11.02, "grad_norm": 19.762901306152344, "learning_rate": 2.3704171934260429e-07, "loss": 0.0507, "step": 9070 }, { "epoch": 11.02, "grad_norm": 0.6946755051612854, "learning_rate": 2.366905464250597e-07, "loss": 0.0198, "step": 9080 }, { "epoch": 11.02, "grad_norm": 1.0548350811004639, "learning_rate": 2.363393735075151e-07, "loss": 0.009, "step": 9090 }, { "epoch": 11.02, "grad_norm": 27.209962844848633, "learning_rate": 2.3598820058997048e-07, "loss": 0.0805, "step": 9100 }, { "epoch": 11.03, "grad_norm": 0.15244987607002258, "learning_rate": 2.356370276724259e-07, "loss": 0.0721, "step": 9110 }, { "epoch": 11.03, "grad_norm": 3.15347957611084, "learning_rate": 2.352858547548813e-07, "loss": 0.0272, "step": 9120 }, { "epoch": 11.03, "grad_norm": 23.29449462890625, "learning_rate": 2.3493468183733668e-07, "loss": 0.1562, "step": 9130 }, { "epoch": 11.03, "grad_norm": 9.874842643737793, "learning_rate": 2.345835089197921e-07, "loss": 0.1013, "step": 9140 }, { "epoch": 11.03, "grad_norm": 14.927287101745605, "learning_rate": 2.3423233600224747e-07, "loss": 0.0927, "step": 9150 }, { "epoch": 11.03, "grad_norm": 1.461829423904419, "learning_rate": 2.338811630847029e-07, "loss": 0.0427, "step": 9160 }, { "epoch": 11.03, "grad_norm": 11.56317138671875, "learning_rate": 2.335299901671583e-07, "loss": 0.0566, "step": 9170 }, { "epoch": 11.03, "grad_norm": 0.6733220815658569, "learning_rate": 2.331788172496137e-07, "loss": 0.0182, "step": 9180 }, { "epoch": 11.03, "grad_norm": 18.02909278869629, "learning_rate": 2.328276443320691e-07, "loss": 0.1088, "step": 9190 }, { "epoch": 11.03, "grad_norm": 4.843137264251709, "learning_rate": 2.3247647141452452e-07, "loss": 0.1563, "step": 9200 }, { "epoch": 11.03, "grad_norm": 15.730489730834961, "learning_rate": 2.321252984969799e-07, "loss": 0.0789, "step": 9210 }, { "epoch": 11.03, "grad_norm": 2.05502986907959, "learning_rate": 2.3177412557943528e-07, "loss": 0.063, "step": 9220 }, { "epoch": 11.03, "grad_norm": 0.10426737368106842, "learning_rate": 2.3142295266189071e-07, "loss": 0.0727, "step": 9230 }, { "epoch": 11.03, "grad_norm": 0.3009401261806488, "learning_rate": 2.310717797443461e-07, "loss": 0.0423, "step": 9240 }, { "epoch": 11.03, "grad_norm": 51.094398498535156, "learning_rate": 2.307206068268015e-07, "loss": 0.0758, "step": 9250 }, { "epoch": 11.03, "grad_norm": 0.2700728476047516, "learning_rate": 2.3036943390925689e-07, "loss": 0.0723, "step": 9260 }, { "epoch": 11.04, "grad_norm": 3.4969286918640137, "learning_rate": 2.3001826099171232e-07, "loss": 0.0293, "step": 9270 }, { "epoch": 11.04, "grad_norm": 2.7565338611602783, "learning_rate": 2.296670880741677e-07, "loss": 0.0384, "step": 9280 }, { "epoch": 11.04, "grad_norm": 3.552182197570801, "learning_rate": 2.293159151566231e-07, "loss": 0.0636, "step": 9290 }, { "epoch": 11.04, "grad_norm": 21.1771240234375, "learning_rate": 2.2896474223907852e-07, "loss": 0.0689, "step": 9300 }, { "epoch": 11.04, "grad_norm": 29.375221252441406, "learning_rate": 2.2861356932153393e-07, "loss": 0.125, "step": 9310 }, { "epoch": 11.04, "grad_norm": 21.816547393798828, "learning_rate": 2.282623964039893e-07, "loss": 0.0985, "step": 9320 }, { "epoch": 11.04, "grad_norm": 1.5881174802780151, "learning_rate": 2.279112234864447e-07, "loss": 0.0531, "step": 9330 }, { "epoch": 11.04, "grad_norm": 4.415070533752441, "learning_rate": 2.2756005056890013e-07, "loss": 0.1003, "step": 9340 }, { "epoch": 11.04, "grad_norm": 10.756519317626953, "learning_rate": 2.272088776513555e-07, "loss": 0.0606, "step": 9350 }, { "epoch": 11.04, "grad_norm": 2.814474105834961, "learning_rate": 2.2685770473381092e-07, "loss": 0.0709, "step": 9360 }, { "epoch": 11.04, "grad_norm": 4.400051593780518, "learning_rate": 2.2650653181626633e-07, "loss": 0.015, "step": 9370 }, { "epoch": 11.04, "grad_norm": 0.23021164536476135, "learning_rate": 2.2615535889872173e-07, "loss": 0.0699, "step": 9380 }, { "epoch": 11.04, "grad_norm": 2.5226502418518066, "learning_rate": 2.2580418598117712e-07, "loss": 0.0181, "step": 9390 }, { "epoch": 11.04, "grad_norm": 1.7135413885116577, "learning_rate": 2.2545301306363252e-07, "loss": 0.0513, "step": 9400 }, { "epoch": 11.04, "grad_norm": 0.7580770254135132, "learning_rate": 2.2510184014608793e-07, "loss": 0.0776, "step": 9410 }, { "epoch": 11.04, "grad_norm": 48.73519515991211, "learning_rate": 2.2475066722854334e-07, "loss": 0.1002, "step": 9420 }, { "epoch": 11.05, "grad_norm": 17.40268325805664, "learning_rate": 2.2439949431099872e-07, "loss": 0.099, "step": 9430 }, { "epoch": 11.05, "grad_norm": 10.487020492553711, "learning_rate": 2.240483213934541e-07, "loss": 0.0788, "step": 9440 }, { "epoch": 11.05, "grad_norm": 0.7279491424560547, "learning_rate": 2.2369714847590954e-07, "loss": 0.1519, "step": 9450 }, { "epoch": 11.05, "grad_norm": 4.9954514503479, "learning_rate": 2.2334597555836492e-07, "loss": 0.0503, "step": 9460 }, { "epoch": 11.05, "grad_norm": 35.54444885253906, "learning_rate": 2.2299480264082033e-07, "loss": 0.09, "step": 9470 }, { "epoch": 11.05, "grad_norm": 4.42392635345459, "learning_rate": 2.2264362972327574e-07, "loss": 0.115, "step": 9480 }, { "epoch": 11.05, "grad_norm": 11.356151580810547, "learning_rate": 2.2229245680573115e-07, "loss": 0.0964, "step": 9490 }, { "epoch": 11.05, "grad_norm": 0.7066633105278015, "learning_rate": 2.2194128388818653e-07, "loss": 0.0806, "step": 9500 }, { "epoch": 11.05, "eval_accuracy": 0.9093728463128876, "eval_loss": 0.2993638515472412, "eval_precision": 0.9548238897396631, "eval_recall": 0.8594073053066851, "eval_runtime": 799.7886, "eval_samples_per_second": 3.628, "eval_steps_per_second": 0.26, "step": 9504 }, { "epoch": 12.0, "grad_norm": 4.612809658050537, "learning_rate": 2.2159011097064194e-07, "loss": 0.0839, "step": 9510 }, { "epoch": 12.0, "grad_norm": 0.5118339657783508, "learning_rate": 2.2123893805309735e-07, "loss": 0.0406, "step": 9520 }, { "epoch": 12.0, "grad_norm": 1.7584993839263916, "learning_rate": 2.2088776513555273e-07, "loss": 0.0703, "step": 9530 }, { "epoch": 12.0, "grad_norm": 8.799148559570312, "learning_rate": 2.2053659221800814e-07, "loss": 0.0888, "step": 9540 }, { "epoch": 12.0, "grad_norm": 15.844137191772461, "learning_rate": 2.2018541930046352e-07, "loss": 0.0499, "step": 9550 }, { "epoch": 12.0, "grad_norm": 7.411105155944824, "learning_rate": 2.1983424638291895e-07, "loss": 0.1194, "step": 9560 }, { "epoch": 12.0, "grad_norm": 1.5899499654769897, "learning_rate": 2.1948307346537433e-07, "loss": 0.1039, "step": 9570 }, { "epoch": 12.0, "grad_norm": 3.862456798553467, "learning_rate": 2.1913190054782974e-07, "loss": 0.0703, "step": 9580 }, { "epoch": 12.01, "grad_norm": 20.99657440185547, "learning_rate": 2.1878072763028515e-07, "loss": 0.0559, "step": 9590 }, { "epoch": 12.01, "grad_norm": 0.5292648077011108, "learning_rate": 2.1842955471274056e-07, "loss": 0.0676, "step": 9600 }, { "epoch": 12.01, "grad_norm": 48.832950592041016, "learning_rate": 2.1807838179519594e-07, "loss": 0.0248, "step": 9610 }, { "epoch": 12.01, "grad_norm": 2.5217955112457275, "learning_rate": 2.1772720887765132e-07, "loss": 0.0501, "step": 9620 }, { "epoch": 12.01, "grad_norm": 3.3120830059051514, "learning_rate": 2.1737603596010676e-07, "loss": 0.0279, "step": 9630 }, { "epoch": 12.01, "grad_norm": 11.754875183105469, "learning_rate": 2.1702486304256214e-07, "loss": 0.1322, "step": 9640 }, { "epoch": 12.01, "grad_norm": 29.171892166137695, "learning_rate": 2.1667369012501755e-07, "loss": 0.0674, "step": 9650 }, { "epoch": 12.01, "grad_norm": 8.62297534942627, "learning_rate": 2.1632251720747293e-07, "loss": 0.1048, "step": 9660 }, { "epoch": 12.01, "grad_norm": 0.2766675651073456, "learning_rate": 2.1597134428992837e-07, "loss": 0.052, "step": 9670 }, { "epoch": 12.01, "grad_norm": 13.070745468139648, "learning_rate": 2.1562017137238375e-07, "loss": 0.0315, "step": 9680 }, { "epoch": 12.01, "grad_norm": 24.398014068603516, "learning_rate": 2.1526899845483916e-07, "loss": 0.0478, "step": 9690 }, { "epoch": 12.01, "grad_norm": 4.022948265075684, "learning_rate": 2.1491782553729456e-07, "loss": 0.0565, "step": 9700 }, { "epoch": 12.01, "grad_norm": 6.304811954498291, "learning_rate": 2.1456665261974997e-07, "loss": 0.0417, "step": 9710 }, { "epoch": 12.01, "grad_norm": 0.35496971011161804, "learning_rate": 2.1421547970220535e-07, "loss": 0.0896, "step": 9720 }, { "epoch": 12.01, "grad_norm": 3.6217868328094482, "learning_rate": 2.1386430678466074e-07, "loss": 0.0734, "step": 9730 }, { "epoch": 12.01, "grad_norm": 33.2974739074707, "learning_rate": 2.1351313386711617e-07, "loss": 0.099, "step": 9740 }, { "epoch": 12.02, "grad_norm": 2.021024227142334, "learning_rate": 2.1316196094957155e-07, "loss": 0.0612, "step": 9750 }, { "epoch": 12.02, "grad_norm": 17.24266242980957, "learning_rate": 2.1281078803202696e-07, "loss": 0.1198, "step": 9760 }, { "epoch": 12.02, "grad_norm": 12.18859577178955, "learning_rate": 2.1245961511448234e-07, "loss": 0.0671, "step": 9770 }, { "epoch": 12.02, "grad_norm": 2.57064151763916, "learning_rate": 2.1210844219693778e-07, "loss": 0.0419, "step": 9780 }, { "epoch": 12.02, "grad_norm": 9.723092079162598, "learning_rate": 2.1175726927939316e-07, "loss": 0.0953, "step": 9790 }, { "epoch": 12.02, "grad_norm": 1.7855851650238037, "learning_rate": 2.1140609636184857e-07, "loss": 0.0643, "step": 9800 }, { "epoch": 12.02, "grad_norm": 0.8623290657997131, "learning_rate": 2.1105492344430398e-07, "loss": 0.0466, "step": 9810 }, { "epoch": 12.02, "grad_norm": 32.95787048339844, "learning_rate": 2.1070375052675936e-07, "loss": 0.0947, "step": 9820 }, { "epoch": 12.02, "grad_norm": 3.396002769470215, "learning_rate": 2.1035257760921477e-07, "loss": 0.0177, "step": 9830 }, { "epoch": 12.02, "grad_norm": 0.3016001284122467, "learning_rate": 2.1000140469167015e-07, "loss": 0.0982, "step": 9840 }, { "epoch": 12.02, "grad_norm": 20.19658660888672, "learning_rate": 2.0965023177412558e-07, "loss": 0.0265, "step": 9850 }, { "epoch": 12.02, "grad_norm": 26.934406280517578, "learning_rate": 2.0929905885658097e-07, "loss": 0.1133, "step": 9860 }, { "epoch": 12.02, "grad_norm": 25.39380645751953, "learning_rate": 2.0894788593903637e-07, "loss": 0.0553, "step": 9870 }, { "epoch": 12.02, "grad_norm": 61.38995361328125, "learning_rate": 2.0859671302149176e-07, "loss": 0.0413, "step": 9880 }, { "epoch": 12.02, "grad_norm": 3.131395101547241, "learning_rate": 2.082455401039472e-07, "loss": 0.2209, "step": 9890 }, { "epoch": 12.03, "grad_norm": 0.34632551670074463, "learning_rate": 2.0789436718640257e-07, "loss": 0.0883, "step": 9900 }, { "epoch": 12.03, "grad_norm": 1.4990023374557495, "learning_rate": 2.0754319426885796e-07, "loss": 0.0705, "step": 9910 }, { "epoch": 12.03, "grad_norm": 1.9048992395401, "learning_rate": 2.071920213513134e-07, "loss": 0.0073, "step": 9920 }, { "epoch": 12.03, "grad_norm": 27.92946434020996, "learning_rate": 2.0684084843376877e-07, "loss": 0.0872, "step": 9930 }, { "epoch": 12.03, "grad_norm": 0.5757583379745483, "learning_rate": 2.0648967551622418e-07, "loss": 0.0477, "step": 9940 }, { "epoch": 12.03, "grad_norm": 0.664885401725769, "learning_rate": 2.0613850259867956e-07, "loss": 0.0819, "step": 9950 }, { "epoch": 12.03, "grad_norm": 0.3888974189758301, "learning_rate": 2.05787329681135e-07, "loss": 0.059, "step": 9960 }, { "epoch": 12.03, "grad_norm": 14.178390502929688, "learning_rate": 2.0543615676359038e-07, "loss": 0.0892, "step": 9970 }, { "epoch": 12.03, "grad_norm": 1.0579181909561157, "learning_rate": 2.050849838460458e-07, "loss": 0.0142, "step": 9980 }, { "epoch": 12.03, "grad_norm": 3.477994203567505, "learning_rate": 2.0473381092850117e-07, "loss": 0.0283, "step": 9990 }, { "epoch": 12.03, "grad_norm": 0.28625795245170593, "learning_rate": 2.043826380109566e-07, "loss": 0.0669, "step": 10000 }, { "epoch": 12.03, "grad_norm": 0.13978077471256256, "learning_rate": 2.0403146509341199e-07, "loss": 0.0733, "step": 10010 }, { "epoch": 12.03, "grad_norm": 14.287908554077148, "learning_rate": 2.0368029217586737e-07, "loss": 0.1798, "step": 10020 }, { "epoch": 12.03, "grad_norm": 2.1499664783477783, "learning_rate": 2.033291192583228e-07, "loss": 0.0447, "step": 10030 }, { "epoch": 12.03, "grad_norm": 24.80770492553711, "learning_rate": 2.0297794634077818e-07, "loss": 0.0624, "step": 10040 }, { "epoch": 12.03, "grad_norm": 0.06428371369838715, "learning_rate": 2.026267734232336e-07, "loss": 0.0338, "step": 10050 }, { "epoch": 12.04, "grad_norm": 19.257822036743164, "learning_rate": 2.0227560050568898e-07, "loss": 0.1026, "step": 10060 }, { "epoch": 12.04, "grad_norm": 1.845421552658081, "learning_rate": 2.019244275881444e-07, "loss": 0.0457, "step": 10070 }, { "epoch": 12.04, "grad_norm": 0.73539137840271, "learning_rate": 2.015732546705998e-07, "loss": 0.0856, "step": 10080 }, { "epoch": 12.04, "grad_norm": 2.106534242630005, "learning_rate": 2.012220817530552e-07, "loss": 0.0578, "step": 10090 }, { "epoch": 12.04, "grad_norm": 1.3260374069213867, "learning_rate": 2.0087090883551058e-07, "loss": 0.0211, "step": 10100 }, { "epoch": 12.04, "grad_norm": 10.011427879333496, "learning_rate": 2.0051973591796602e-07, "loss": 0.0895, "step": 10110 }, { "epoch": 12.04, "grad_norm": 0.14594906568527222, "learning_rate": 2.001685630004214e-07, "loss": 0.0397, "step": 10120 }, { "epoch": 12.04, "grad_norm": 42.37451171875, "learning_rate": 1.9981739008287678e-07, "loss": 0.1362, "step": 10130 }, { "epoch": 12.04, "grad_norm": 0.3060012459754944, "learning_rate": 1.9946621716533222e-07, "loss": 0.0233, "step": 10140 }, { "epoch": 12.04, "grad_norm": 0.2238699197769165, "learning_rate": 1.991150442477876e-07, "loss": 0.0564, "step": 10150 }, { "epoch": 12.04, "grad_norm": 1.1285340785980225, "learning_rate": 1.98763871330243e-07, "loss": 0.0519, "step": 10160 }, { "epoch": 12.04, "grad_norm": 0.758639931678772, "learning_rate": 1.984126984126984e-07, "loss": 0.1106, "step": 10170 }, { "epoch": 12.04, "grad_norm": 8.095895767211914, "learning_rate": 1.9806152549515382e-07, "loss": 0.0564, "step": 10180 }, { "epoch": 12.04, "grad_norm": 0.6542243957519531, "learning_rate": 1.977103525776092e-07, "loss": 0.1147, "step": 10190 }, { "epoch": 12.04, "grad_norm": 1.960679531097412, "learning_rate": 1.9735917966006461e-07, "loss": 0.168, "step": 10200 }, { "epoch": 12.04, "grad_norm": 0.15955808758735657, "learning_rate": 1.9700800674252e-07, "loss": 0.1192, "step": 10210 }, { "epoch": 12.05, "grad_norm": 8.413512229919434, "learning_rate": 1.966568338249754e-07, "loss": 0.0481, "step": 10220 }, { "epoch": 12.05, "grad_norm": 0.5637698173522949, "learning_rate": 1.963056609074308e-07, "loss": 0.0165, "step": 10230 }, { "epoch": 12.05, "grad_norm": 0.7581736445426941, "learning_rate": 1.959544879898862e-07, "loss": 0.1387, "step": 10240 }, { "epoch": 12.05, "grad_norm": 0.16801892220973969, "learning_rate": 1.9560331507234163e-07, "loss": 0.0909, "step": 10250 }, { "epoch": 12.05, "grad_norm": 2.125321626663208, "learning_rate": 1.95252142154797e-07, "loss": 0.1042, "step": 10260 }, { "epoch": 12.05, "grad_norm": 0.36353224515914917, "learning_rate": 1.9490096923725242e-07, "loss": 0.0793, "step": 10270 }, { "epoch": 12.05, "grad_norm": 36.88089370727539, "learning_rate": 1.945497963197078e-07, "loss": 0.0698, "step": 10280 }, { "epoch": 12.05, "grad_norm": 8.90969181060791, "learning_rate": 1.9419862340216324e-07, "loss": 0.0755, "step": 10290 }, { "epoch": 12.05, "eval_accuracy": 0.9104066161268091, "eval_loss": 0.3123721182346344, "eval_precision": 0.9556235654169855, "eval_recall": 0.8607856650585803, "eval_runtime": 619.0836, "eval_samples_per_second": 4.688, "eval_steps_per_second": 0.336, "step": 10296 }, { "epoch": 13.0, "grad_norm": 1.3507524728775024, "learning_rate": 1.9384745048461862e-07, "loss": 0.1334, "step": 10300 }, { "epoch": 13.0, "grad_norm": 0.32605308294296265, "learning_rate": 1.93496277567074e-07, "loss": 0.0409, "step": 10310 }, { "epoch": 13.0, "grad_norm": 1.4926115274429321, "learning_rate": 1.9314510464952943e-07, "loss": 0.0427, "step": 10320 }, { "epoch": 13.0, "grad_norm": 5.537851810455322, "learning_rate": 1.9279393173198482e-07, "loss": 0.0781, "step": 10330 }, { "epoch": 13.0, "grad_norm": 0.3795417547225952, "learning_rate": 1.9244275881444022e-07, "loss": 0.0321, "step": 10340 }, { "epoch": 13.0, "grad_norm": 0.4356180727481842, "learning_rate": 1.920915858968956e-07, "loss": 0.0644, "step": 10350 }, { "epoch": 13.0, "grad_norm": 0.5931177139282227, "learning_rate": 1.9174041297935104e-07, "loss": 0.1524, "step": 10360 }, { "epoch": 13.0, "grad_norm": 5.495865345001221, "learning_rate": 1.9138924006180642e-07, "loss": 0.077, "step": 10370 }, { "epoch": 13.01, "grad_norm": 0.173753559589386, "learning_rate": 1.9103806714426183e-07, "loss": 0.1078, "step": 10380 }, { "epoch": 13.01, "grad_norm": 0.16850866377353668, "learning_rate": 1.9068689422671721e-07, "loss": 0.1027, "step": 10390 }, { "epoch": 13.01, "grad_norm": 10.446556091308594, "learning_rate": 1.9033572130917265e-07, "loss": 0.1498, "step": 10400 }, { "epoch": 13.01, "grad_norm": 4.751400470733643, "learning_rate": 1.8998454839162803e-07, "loss": 0.0496, "step": 10410 }, { "epoch": 13.01, "grad_norm": 28.42918586730957, "learning_rate": 1.896333754740834e-07, "loss": 0.0526, "step": 10420 }, { "epoch": 13.01, "grad_norm": 1.5165878534317017, "learning_rate": 1.8928220255653885e-07, "loss": 0.0673, "step": 10430 }, { "epoch": 13.01, "grad_norm": 45.8775749206543, "learning_rate": 1.8893102963899423e-07, "loss": 0.0268, "step": 10440 }, { "epoch": 13.01, "grad_norm": 3.914055109024048, "learning_rate": 1.8857985672144964e-07, "loss": 0.0831, "step": 10450 }, { "epoch": 13.01, "grad_norm": 1.2279554605484009, "learning_rate": 1.8822868380390502e-07, "loss": 0.0106, "step": 10460 }, { "epoch": 13.01, "grad_norm": 59.97119903564453, "learning_rate": 1.8787751088636045e-07, "loss": 0.0433, "step": 10470 }, { "epoch": 13.01, "grad_norm": 3.334914207458496, "learning_rate": 1.8752633796881584e-07, "loss": 0.0133, "step": 10480 }, { "epoch": 13.01, "grad_norm": 4.297630310058594, "learning_rate": 1.8717516505127124e-07, "loss": 0.1588, "step": 10490 }, { "epoch": 13.01, "grad_norm": 0.2498587965965271, "learning_rate": 1.8682399213372663e-07, "loss": 0.0149, "step": 10500 }, { "epoch": 13.01, "grad_norm": 17.232927322387695, "learning_rate": 1.8647281921618204e-07, "loss": 0.086, "step": 10510 }, { "epoch": 13.01, "grad_norm": 0.3848666250705719, "learning_rate": 1.8612164629863744e-07, "loss": 0.0287, "step": 10520 }, { "epoch": 13.01, "grad_norm": 0.5234899520874023, "learning_rate": 1.8577047338109283e-07, "loss": 0.0913, "step": 10530 }, { "epoch": 13.02, "grad_norm": 3.474271774291992, "learning_rate": 1.8541930046354826e-07, "loss": 0.0277, "step": 10540 }, { "epoch": 13.02, "grad_norm": 0.26419368386268616, "learning_rate": 1.8506812754600364e-07, "loss": 0.06, "step": 10550 }, { "epoch": 13.02, "grad_norm": 4.833984375, "learning_rate": 1.8471695462845905e-07, "loss": 0.0326, "step": 10560 }, { "epoch": 13.02, "grad_norm": 36.49141311645508, "learning_rate": 1.8436578171091443e-07, "loss": 0.1139, "step": 10570 }, { "epoch": 13.02, "grad_norm": 0.05287026986479759, "learning_rate": 1.8401460879336987e-07, "loss": 0.03, "step": 10580 }, { "epoch": 13.02, "grad_norm": 2.713606595993042, "learning_rate": 1.8366343587582525e-07, "loss": 0.0359, "step": 10590 }, { "epoch": 13.02, "grad_norm": 17.44700813293457, "learning_rate": 1.8331226295828063e-07, "loss": 0.0623, "step": 10600 }, { "epoch": 13.02, "grad_norm": 0.35079002380371094, "learning_rate": 1.8296109004073604e-07, "loss": 0.1396, "step": 10610 }, { "epoch": 13.02, "grad_norm": 0.07017870247364044, "learning_rate": 1.8260991712319145e-07, "loss": 0.0686, "step": 10620 }, { "epoch": 13.02, "grad_norm": 2.4053738117218018, "learning_rate": 1.8225874420564686e-07, "loss": 0.089, "step": 10630 }, { "epoch": 13.02, "grad_norm": 14.379215240478516, "learning_rate": 1.8190757128810224e-07, "loss": 0.1088, "step": 10640 }, { "epoch": 13.02, "grad_norm": 29.79482078552246, "learning_rate": 1.8155639837055767e-07, "loss": 0.1164, "step": 10650 }, { "epoch": 13.02, "grad_norm": 36.445255279541016, "learning_rate": 1.8120522545301306e-07, "loss": 0.0508, "step": 10660 }, { "epoch": 13.02, "grad_norm": 0.18258926272392273, "learning_rate": 1.8085405253546846e-07, "loss": 0.0998, "step": 10670 }, { "epoch": 13.02, "grad_norm": 0.21488305926322937, "learning_rate": 1.8050287961792385e-07, "loss": 0.1163, "step": 10680 }, { "epoch": 13.02, "grad_norm": 0.10966210812330246, "learning_rate": 1.8015170670037928e-07, "loss": 0.0414, "step": 10690 }, { "epoch": 13.03, "grad_norm": 0.049498215317726135, "learning_rate": 1.7980053378283466e-07, "loss": 0.0413, "step": 10700 }, { "epoch": 13.03, "grad_norm": 0.6685736179351807, "learning_rate": 1.7944936086529004e-07, "loss": 0.0446, "step": 10710 }, { "epoch": 13.03, "grad_norm": 1.5232113599777222, "learning_rate": 1.7909818794774545e-07, "loss": 0.0647, "step": 10720 }, { "epoch": 13.03, "grad_norm": 26.782896041870117, "learning_rate": 1.7874701503020086e-07, "loss": 0.0519, "step": 10730 }, { "epoch": 13.03, "grad_norm": 4.27664852142334, "learning_rate": 1.7839584211265627e-07, "loss": 0.0372, "step": 10740 }, { "epoch": 13.03, "grad_norm": 9.920866966247559, "learning_rate": 1.7804466919511165e-07, "loss": 0.0288, "step": 10750 }, { "epoch": 13.03, "grad_norm": 3.653459310531616, "learning_rate": 1.7769349627756709e-07, "loss": 0.0266, "step": 10760 }, { "epoch": 13.03, "grad_norm": 29.298677444458008, "learning_rate": 1.7734232336002247e-07, "loss": 0.0809, "step": 10770 }, { "epoch": 13.03, "grad_norm": 2.9988245964050293, "learning_rate": 1.7699115044247788e-07, "loss": 0.0683, "step": 10780 }, { "epoch": 13.03, "grad_norm": 1.4848108291625977, "learning_rate": 1.7663997752493326e-07, "loss": 0.1299, "step": 10790 }, { "epoch": 13.03, "grad_norm": 0.500281572341919, "learning_rate": 1.762888046073887e-07, "loss": 0.0479, "step": 10800 }, { "epoch": 13.03, "grad_norm": 0.08176117390394211, "learning_rate": 1.7593763168984408e-07, "loss": 0.1446, "step": 10810 }, { "epoch": 13.03, "grad_norm": 0.11161573976278305, "learning_rate": 1.7558645877229946e-07, "loss": 0.0953, "step": 10820 }, { "epoch": 13.03, "grad_norm": 2.8631839752197266, "learning_rate": 1.7523528585475487e-07, "loss": 0.054, "step": 10830 }, { "epoch": 13.03, "grad_norm": 9.307927131652832, "learning_rate": 1.7488411293721027e-07, "loss": 0.0093, "step": 10840 }, { "epoch": 13.04, "grad_norm": 21.216283798217773, "learning_rate": 1.7453294001966568e-07, "loss": 0.1063, "step": 10850 }, { "epoch": 13.04, "grad_norm": 33.69869613647461, "learning_rate": 1.7418176710212106e-07, "loss": 0.0315, "step": 10860 }, { "epoch": 13.04, "grad_norm": 0.5531070232391357, "learning_rate": 1.738305941845765e-07, "loss": 0.0118, "step": 10870 }, { "epoch": 13.04, "grad_norm": 0.22744214534759521, "learning_rate": 1.7347942126703188e-07, "loss": 0.0764, "step": 10880 }, { "epoch": 13.04, "grad_norm": 6.6699748039245605, "learning_rate": 1.731282483494873e-07, "loss": 0.0494, "step": 10890 }, { "epoch": 13.04, "grad_norm": 7.089933395385742, "learning_rate": 1.7277707543194267e-07, "loss": 0.0412, "step": 10900 }, { "epoch": 13.04, "grad_norm": 0.8456340432167053, "learning_rate": 1.7242590251439808e-07, "loss": 0.0763, "step": 10910 }, { "epoch": 13.04, "grad_norm": 0.3509749174118042, "learning_rate": 1.720747295968535e-07, "loss": 0.048, "step": 10920 }, { "epoch": 13.04, "grad_norm": 40.190940856933594, "learning_rate": 1.7172355667930887e-07, "loss": 0.1094, "step": 10930 }, { "epoch": 13.04, "grad_norm": 0.9430677890777588, "learning_rate": 1.7137238376176428e-07, "loss": 0.0881, "step": 10940 }, { "epoch": 13.04, "grad_norm": 7.912288188934326, "learning_rate": 1.710212108442197e-07, "loss": 0.2467, "step": 10950 }, { "epoch": 13.04, "grad_norm": 22.781770706176758, "learning_rate": 1.706700379266751e-07, "loss": 0.0843, "step": 10960 }, { "epoch": 13.04, "grad_norm": 22.491809844970703, "learning_rate": 1.7031886500913048e-07, "loss": 0.0128, "step": 10970 }, { "epoch": 13.04, "grad_norm": 2.367631673812866, "learning_rate": 1.699676920915859e-07, "loss": 0.0501, "step": 10980 }, { "epoch": 13.04, "grad_norm": 29.9377498626709, "learning_rate": 1.696165191740413e-07, "loss": 0.1301, "step": 10990 }, { "epoch": 13.04, "grad_norm": 0.1351088285446167, "learning_rate": 1.6926534625649668e-07, "loss": 0.0695, "step": 11000 }, { "epoch": 13.05, "grad_norm": 26.878820419311523, "learning_rate": 1.6891417333895208e-07, "loss": 0.1933, "step": 11010 }, { "epoch": 13.05, "grad_norm": 0.14201368391513824, "learning_rate": 1.685630004214075e-07, "loss": 0.011, "step": 11020 }, { "epoch": 13.05, "grad_norm": 0.08544864505529404, "learning_rate": 1.682118275038629e-07, "loss": 0.037, "step": 11030 }, { "epoch": 13.05, "grad_norm": 0.7054736614227295, "learning_rate": 1.6786065458631828e-07, "loss": 0.0484, "step": 11040 }, { "epoch": 13.05, "grad_norm": 2.4981675148010254, "learning_rate": 1.675094816687737e-07, "loss": 0.0437, "step": 11050 }, { "epoch": 13.05, "grad_norm": 0.16141514480113983, "learning_rate": 1.671583087512291e-07, "loss": 0.0227, "step": 11060 }, { "epoch": 13.05, "grad_norm": 43.41939163208008, "learning_rate": 1.668071358336845e-07, "loss": 0.1198, "step": 11070 }, { "epoch": 13.05, "grad_norm": 0.14974088966846466, "learning_rate": 1.664559629161399e-07, "loss": 0.0986, "step": 11080 }, { "epoch": 13.05, "eval_accuracy": 0.9114403859407305, "eval_loss": 0.31335166096687317, "eval_precision": 0.9461883408071748, "eval_recall": 0.8725017229496899, "eval_runtime": 702.0819, "eval_samples_per_second": 4.133, "eval_steps_per_second": 0.296, "step": 11088 }, { "epoch": 14.0, "grad_norm": 20.785886764526367, "learning_rate": 1.6610478999859532e-07, "loss": 0.0606, "step": 11090 }, { "epoch": 14.0, "grad_norm": 29.276687622070312, "learning_rate": 1.657536170810507e-07, "loss": 0.0622, "step": 11100 }, { "epoch": 14.0, "grad_norm": 0.12766475975513458, "learning_rate": 1.654024441635061e-07, "loss": 0.0709, "step": 11110 }, { "epoch": 14.0, "grad_norm": 0.4519425928592682, "learning_rate": 1.650512712459615e-07, "loss": 0.0092, "step": 11120 }, { "epoch": 14.0, "grad_norm": 27.842815399169922, "learning_rate": 1.647000983284169e-07, "loss": 0.0363, "step": 11130 }, { "epoch": 14.0, "grad_norm": 0.7554247379302979, "learning_rate": 1.6434892541087231e-07, "loss": 0.0151, "step": 11140 }, { "epoch": 14.0, "grad_norm": 0.6925431489944458, "learning_rate": 1.639977524933277e-07, "loss": 0.0521, "step": 11150 }, { "epoch": 14.0, "grad_norm": 0.1560594141483307, "learning_rate": 1.636465795757831e-07, "loss": 0.0875, "step": 11160 }, { "epoch": 14.01, "grad_norm": 13.047561645507812, "learning_rate": 1.632954066582385e-07, "loss": 0.1298, "step": 11170 }, { "epoch": 14.01, "grad_norm": 0.4681456387042999, "learning_rate": 1.6294423374069392e-07, "loss": 0.0418, "step": 11180 }, { "epoch": 14.01, "grad_norm": 49.84629821777344, "learning_rate": 1.625930608231493e-07, "loss": 0.0671, "step": 11190 }, { "epoch": 14.01, "grad_norm": 40.607948303222656, "learning_rate": 1.622418879056047e-07, "loss": 0.1959, "step": 11200 }, { "epoch": 14.01, "grad_norm": 0.5283187627792358, "learning_rate": 1.6189071498806012e-07, "loss": 0.0549, "step": 11210 }, { "epoch": 14.01, "grad_norm": 6.877123832702637, "learning_rate": 1.615395420705155e-07, "loss": 0.075, "step": 11220 }, { "epoch": 14.01, "grad_norm": 16.945167541503906, "learning_rate": 1.611883691529709e-07, "loss": 0.0242, "step": 11230 }, { "epoch": 14.01, "grad_norm": 17.68268585205078, "learning_rate": 1.6083719623542632e-07, "loss": 0.0613, "step": 11240 }, { "epoch": 14.01, "grad_norm": 3.456467866897583, "learning_rate": 1.6048602331788173e-07, "loss": 0.0593, "step": 11250 }, { "epoch": 14.01, "grad_norm": 8.147682189941406, "learning_rate": 1.601348504003371e-07, "loss": 0.0367, "step": 11260 }, { "epoch": 14.01, "grad_norm": 0.049571115523576736, "learning_rate": 1.5978367748279252e-07, "loss": 0.0912, "step": 11270 }, { "epoch": 14.01, "grad_norm": 34.133628845214844, "learning_rate": 1.5943250456524793e-07, "loss": 0.0293, "step": 11280 }, { "epoch": 14.01, "grad_norm": 0.2278907299041748, "learning_rate": 1.5908133164770333e-07, "loss": 0.0583, "step": 11290 }, { "epoch": 14.01, "grad_norm": 40.64368438720703, "learning_rate": 1.5873015873015872e-07, "loss": 0.0808, "step": 11300 }, { "epoch": 14.01, "grad_norm": 14.79566478729248, "learning_rate": 1.5837898581261412e-07, "loss": 0.0565, "step": 11310 }, { "epoch": 14.01, "grad_norm": 3.0359811782836914, "learning_rate": 1.5802781289506953e-07, "loss": 0.0518, "step": 11320 }, { "epoch": 14.02, "grad_norm": 0.8447651863098145, "learning_rate": 1.5767663997752491e-07, "loss": 0.0645, "step": 11330 }, { "epoch": 14.02, "grad_norm": 40.90627670288086, "learning_rate": 1.5732546705998032e-07, "loss": 0.0513, "step": 11340 }, { "epoch": 14.02, "grad_norm": 27.954736709594727, "learning_rate": 1.5697429414243573e-07, "loss": 0.0404, "step": 11350 }, { "epoch": 14.02, "grad_norm": 0.7304593920707703, "learning_rate": 1.5662312122489114e-07, "loss": 0.0623, "step": 11360 }, { "epoch": 14.02, "grad_norm": 28.985172271728516, "learning_rate": 1.5627194830734652e-07, "loss": 0.0535, "step": 11370 }, { "epoch": 14.02, "grad_norm": 0.46886610984802246, "learning_rate": 1.5592077538980196e-07, "loss": 0.022, "step": 11380 }, { "epoch": 14.02, "grad_norm": 0.2699759900569916, "learning_rate": 1.5556960247225734e-07, "loss": 0.0932, "step": 11390 }, { "epoch": 14.02, "grad_norm": 3.5106499195098877, "learning_rate": 1.5521842955471272e-07, "loss": 0.1032, "step": 11400 }, { "epoch": 14.02, "grad_norm": 38.5422477722168, "learning_rate": 1.5486725663716813e-07, "loss": 0.0917, "step": 11410 }, { "epoch": 14.02, "grad_norm": 1.6601707935333252, "learning_rate": 1.5451608371962354e-07, "loss": 0.1049, "step": 11420 }, { "epoch": 14.02, "grad_norm": 2.015580892562866, "learning_rate": 1.5416491080207895e-07, "loss": 0.0524, "step": 11430 }, { "epoch": 14.02, "grad_norm": 31.791162490844727, "learning_rate": 1.5381373788453433e-07, "loss": 0.0654, "step": 11440 }, { "epoch": 14.02, "grad_norm": 0.7041537165641785, "learning_rate": 1.5346256496698974e-07, "loss": 0.0965, "step": 11450 }, { "epoch": 14.02, "grad_norm": 0.18909145891666412, "learning_rate": 1.5311139204944514e-07, "loss": 0.0911, "step": 11460 }, { "epoch": 14.02, "grad_norm": 4.825838565826416, "learning_rate": 1.5276021913190055e-07, "loss": 0.1447, "step": 11470 }, { "epoch": 14.02, "grad_norm": 4.8759684562683105, "learning_rate": 1.5240904621435593e-07, "loss": 0.0391, "step": 11480 }, { "epoch": 14.03, "grad_norm": 0.07253478467464447, "learning_rate": 1.5205787329681137e-07, "loss": 0.0229, "step": 11490 }, { "epoch": 14.03, "grad_norm": 28.297250747680664, "learning_rate": 1.5170670037926675e-07, "loss": 0.0305, "step": 11500 }, { "epoch": 14.03, "grad_norm": 22.519737243652344, "learning_rate": 1.5135552746172213e-07, "loss": 0.1069, "step": 11510 }, { "epoch": 14.03, "grad_norm": 15.577325820922852, "learning_rate": 1.5100435454417754e-07, "loss": 0.0241, "step": 11520 }, { "epoch": 14.03, "grad_norm": 21.020410537719727, "learning_rate": 1.5065318162663295e-07, "loss": 0.1132, "step": 11530 }, { "epoch": 14.03, "grad_norm": 7.091934680938721, "learning_rate": 1.5030200870908836e-07, "loss": 0.1007, "step": 11540 }, { "epoch": 14.03, "grad_norm": 13.88670539855957, "learning_rate": 1.4995083579154374e-07, "loss": 0.0851, "step": 11550 }, { "epoch": 14.03, "grad_norm": 26.916616439819336, "learning_rate": 1.4959966287399915e-07, "loss": 0.0687, "step": 11560 }, { "epoch": 14.03, "grad_norm": 15.008544921875, "learning_rate": 1.4924848995645456e-07, "loss": 0.0589, "step": 11570 }, { "epoch": 14.03, "grad_norm": 0.47452348470687866, "learning_rate": 1.4889731703890997e-07, "loss": 0.0281, "step": 11580 }, { "epoch": 14.03, "grad_norm": 0.355864554643631, "learning_rate": 1.4854614412136535e-07, "loss": 0.0532, "step": 11590 }, { "epoch": 14.03, "grad_norm": 3.847524404525757, "learning_rate": 1.4819497120382076e-07, "loss": 0.0687, "step": 11600 }, { "epoch": 14.03, "grad_norm": 0.9754255414009094, "learning_rate": 1.4784379828627616e-07, "loss": 0.1192, "step": 11610 }, { "epoch": 14.03, "grad_norm": 0.5954816937446594, "learning_rate": 1.4749262536873155e-07, "loss": 0.0542, "step": 11620 }, { "epoch": 14.03, "grad_norm": 10.695175170898438, "learning_rate": 1.4714145245118695e-07, "loss": 0.058, "step": 11630 }, { "epoch": 14.03, "grad_norm": 0.24657100439071655, "learning_rate": 1.4679027953364236e-07, "loss": 0.0616, "step": 11640 }, { "epoch": 14.04, "grad_norm": 32.700927734375, "learning_rate": 1.4643910661609777e-07, "loss": 0.0645, "step": 11650 }, { "epoch": 14.04, "grad_norm": 0.8842484951019287, "learning_rate": 1.4608793369855315e-07, "loss": 0.0543, "step": 11660 }, { "epoch": 14.04, "grad_norm": 8.40760326385498, "learning_rate": 1.4573676078100856e-07, "loss": 0.0718, "step": 11670 }, { "epoch": 14.04, "grad_norm": 0.7897697687149048, "learning_rate": 1.4538558786346397e-07, "loss": 0.0596, "step": 11680 }, { "epoch": 14.04, "grad_norm": 6.106139659881592, "learning_rate": 1.4503441494591935e-07, "loss": 0.042, "step": 11690 }, { "epoch": 14.04, "grad_norm": 2.239445686340332, "learning_rate": 1.4468324202837476e-07, "loss": 0.095, "step": 11700 }, { "epoch": 14.04, "grad_norm": 0.9157733917236328, "learning_rate": 1.4433206911083017e-07, "loss": 0.0759, "step": 11710 }, { "epoch": 14.04, "grad_norm": 14.096631050109863, "learning_rate": 1.4398089619328558e-07, "loss": 0.1015, "step": 11720 }, { "epoch": 14.04, "grad_norm": 16.670286178588867, "learning_rate": 1.4362972327574096e-07, "loss": 0.1016, "step": 11730 }, { "epoch": 14.04, "grad_norm": 0.11395718157291412, "learning_rate": 1.4327855035819637e-07, "loss": 0.145, "step": 11740 }, { "epoch": 14.04, "grad_norm": 6.837492942810059, "learning_rate": 1.4292737744065178e-07, "loss": 0.0067, "step": 11750 }, { "epoch": 14.04, "grad_norm": 28.17953109741211, "learning_rate": 1.4257620452310718e-07, "loss": 0.095, "step": 11760 }, { "epoch": 14.04, "grad_norm": 0.12252172827720642, "learning_rate": 1.4222503160556257e-07, "loss": 0.0393, "step": 11770 }, { "epoch": 14.04, "grad_norm": 0.473533034324646, "learning_rate": 1.4187385868801795e-07, "loss": 0.0588, "step": 11780 }, { "epoch": 14.04, "grad_norm": 0.3052389919757843, "learning_rate": 1.4152268577047338e-07, "loss": 0.0187, "step": 11790 }, { "epoch": 14.05, "grad_norm": 1.0078842639923096, "learning_rate": 1.4117151285292876e-07, "loss": 0.057, "step": 11800 }, { "epoch": 14.05, "grad_norm": 11.919440269470215, "learning_rate": 1.4082033993538417e-07, "loss": 0.0998, "step": 11810 }, { "epoch": 14.05, "grad_norm": 0.2942746877670288, "learning_rate": 1.4046916701783958e-07, "loss": 0.0658, "step": 11820 }, { "epoch": 14.05, "grad_norm": 0.335204541683197, "learning_rate": 1.40117994100295e-07, "loss": 0.0506, "step": 11830 }, { "epoch": 14.05, "grad_norm": 0.4161699116230011, "learning_rate": 1.3976682118275037e-07, "loss": 0.0461, "step": 11840 }, { "epoch": 14.05, "grad_norm": 0.09358175843954086, "learning_rate": 1.3941564826520578e-07, "loss": 0.1069, "step": 11850 }, { "epoch": 14.05, "grad_norm": 1.5287635326385498, "learning_rate": 1.390644753476612e-07, "loss": 0.047, "step": 11860 }, { "epoch": 14.05, "grad_norm": 28.451845169067383, "learning_rate": 1.387133024301166e-07, "loss": 0.0917, "step": 11870 }, { "epoch": 14.05, "grad_norm": 0.08681017905473709, "learning_rate": 1.3836212951257198e-07, "loss": 0.0222, "step": 11880 }, { "epoch": 14.05, "eval_accuracy": 0.9110957960027567, "eval_loss": 0.3240586817264557, "eval_precision": 0.9549961861174676, "eval_recall": 0.8628532046864231, "eval_runtime": 708.5288, "eval_samples_per_second": 4.096, "eval_steps_per_second": 0.294, "step": 11880 }, { "epoch": 15.0, "grad_norm": 21.65879249572754, "learning_rate": 1.3801095659502736e-07, "loss": 0.0777, "step": 11890 }, { "epoch": 15.0, "grad_norm": 1.1158398389816284, "learning_rate": 1.376597836774828e-07, "loss": 0.0508, "step": 11900 }, { "epoch": 15.0, "grad_norm": 99.83663177490234, "learning_rate": 1.3730861075993818e-07, "loss": 0.0553, "step": 11910 }, { "epoch": 15.0, "grad_norm": 20.445518493652344, "learning_rate": 1.3695743784239359e-07, "loss": 0.0941, "step": 11920 }, { "epoch": 15.0, "grad_norm": 35.167179107666016, "learning_rate": 1.36606264924849e-07, "loss": 0.1474, "step": 11930 }, { "epoch": 15.0, "grad_norm": 0.18860562145709991, "learning_rate": 1.362550920073044e-07, "loss": 0.0857, "step": 11940 }, { "epoch": 15.0, "grad_norm": 0.11728419363498688, "learning_rate": 1.3590391908975978e-07, "loss": 0.0943, "step": 11950 }, { "epoch": 15.01, "grad_norm": 4.3208489418029785, "learning_rate": 1.355527461722152e-07, "loss": 0.0294, "step": 11960 }, { "epoch": 15.01, "grad_norm": 25.430233001708984, "learning_rate": 1.352015732546706e-07, "loss": 0.0816, "step": 11970 }, { "epoch": 15.01, "grad_norm": 1.615471601486206, "learning_rate": 1.34850400337126e-07, "loss": 0.0549, "step": 11980 }, { "epoch": 15.01, "grad_norm": 0.22264164686203003, "learning_rate": 1.344992274195814e-07, "loss": 0.1493, "step": 11990 }, { "epoch": 15.01, "grad_norm": 5.44607400894165, "learning_rate": 1.3414805450203677e-07, "loss": 0.0887, "step": 12000 }, { "epoch": 15.01, "grad_norm": 1.5872764587402344, "learning_rate": 1.337968815844922e-07, "loss": 0.0523, "step": 12010 }, { "epoch": 15.01, "grad_norm": 8.871977806091309, "learning_rate": 1.334457086669476e-07, "loss": 0.0432, "step": 12020 }, { "epoch": 15.01, "grad_norm": 24.194425582885742, "learning_rate": 1.33094535749403e-07, "loss": 0.069, "step": 12030 }, { "epoch": 15.01, "grad_norm": 0.1807916760444641, "learning_rate": 1.327433628318584e-07, "loss": 0.1111, "step": 12040 }, { "epoch": 15.01, "grad_norm": 3.424373149871826, "learning_rate": 1.3239218991431382e-07, "loss": 0.0196, "step": 12050 }, { "epoch": 15.01, "grad_norm": 25.846370697021484, "learning_rate": 1.320410169967692e-07, "loss": 0.0731, "step": 12060 }, { "epoch": 15.01, "grad_norm": 0.18328817188739777, "learning_rate": 1.316898440792246e-07, "loss": 0.0083, "step": 12070 }, { "epoch": 15.01, "grad_norm": 19.324533462524414, "learning_rate": 1.3133867116168001e-07, "loss": 0.0368, "step": 12080 }, { "epoch": 15.01, "grad_norm": 37.728981018066406, "learning_rate": 1.309874982441354e-07, "loss": 0.0689, "step": 12090 }, { "epoch": 15.01, "grad_norm": 47.76528549194336, "learning_rate": 1.306363253265908e-07, "loss": 0.1011, "step": 12100 }, { "epoch": 15.01, "grad_norm": 45.22971725463867, "learning_rate": 1.3028515240904619e-07, "loss": 0.0585, "step": 12110 }, { "epoch": 15.02, "grad_norm": 1.014990210533142, "learning_rate": 1.2993397949150162e-07, "loss": 0.0096, "step": 12120 }, { "epoch": 15.02, "grad_norm": 0.3104914128780365, "learning_rate": 1.29582806573957e-07, "loss": 0.0571, "step": 12130 }, { "epoch": 15.02, "grad_norm": 1.2338793277740479, "learning_rate": 1.292316336564124e-07, "loss": 0.01, "step": 12140 }, { "epoch": 15.02, "grad_norm": 6.471231460571289, "learning_rate": 1.2888046073886782e-07, "loss": 0.0439, "step": 12150 }, { "epoch": 15.02, "grad_norm": 13.96291446685791, "learning_rate": 1.2852928782132323e-07, "loss": 0.0467, "step": 12160 }, { "epoch": 15.02, "grad_norm": 1.9484097957611084, "learning_rate": 1.281781149037786e-07, "loss": 0.0139, "step": 12170 }, { "epoch": 15.02, "grad_norm": 31.51078987121582, "learning_rate": 1.27826941986234e-07, "loss": 0.1691, "step": 12180 }, { "epoch": 15.02, "grad_norm": 6.898707866668701, "learning_rate": 1.2747576906868943e-07, "loss": 0.0779, "step": 12190 }, { "epoch": 15.02, "grad_norm": 16.291780471801758, "learning_rate": 1.271245961511448e-07, "loss": 0.1016, "step": 12200 }, { "epoch": 15.02, "grad_norm": 0.20827807486057281, "learning_rate": 1.2677342323360022e-07, "loss": 0.0657, "step": 12210 }, { "epoch": 15.02, "grad_norm": 13.180303573608398, "learning_rate": 1.264222503160556e-07, "loss": 0.0318, "step": 12220 }, { "epoch": 15.02, "grad_norm": 0.12029338628053665, "learning_rate": 1.2607107739851103e-07, "loss": 0.0124, "step": 12230 }, { "epoch": 15.02, "grad_norm": 29.312000274658203, "learning_rate": 1.2571990448096642e-07, "loss": 0.1273, "step": 12240 }, { "epoch": 15.02, "grad_norm": 2.5276153087615967, "learning_rate": 1.2536873156342182e-07, "loss": 0.0153, "step": 12250 }, { "epoch": 15.02, "grad_norm": 0.22180302441120148, "learning_rate": 1.2501755864587723e-07, "loss": 0.0353, "step": 12260 }, { "epoch": 15.02, "grad_norm": 0.13120393455028534, "learning_rate": 1.2466638572833264e-07, "loss": 0.0395, "step": 12270 }, { "epoch": 15.03, "grad_norm": 0.3034791648387909, "learning_rate": 1.2431521281078802e-07, "loss": 0.0098, "step": 12280 }, { "epoch": 15.03, "grad_norm": 2.633538246154785, "learning_rate": 1.2396403989324343e-07, "loss": 0.0766, "step": 12290 }, { "epoch": 15.03, "grad_norm": 29.58428382873535, "learning_rate": 1.2361286697569881e-07, "loss": 0.115, "step": 12300 }, { "epoch": 15.03, "grad_norm": 44.047672271728516, "learning_rate": 1.2326169405815422e-07, "loss": 0.0962, "step": 12310 }, { "epoch": 15.03, "grad_norm": 4.22855806350708, "learning_rate": 1.2291052114060963e-07, "loss": 0.0533, "step": 12320 }, { "epoch": 15.03, "grad_norm": 1.6050031185150146, "learning_rate": 1.2255934822306504e-07, "loss": 0.0685, "step": 12330 }, { "epoch": 15.03, "grad_norm": 1.2848570346832275, "learning_rate": 1.2220817530552045e-07, "loss": 0.0224, "step": 12340 }, { "epoch": 15.03, "grad_norm": 1.7510156631469727, "learning_rate": 1.2185700238797583e-07, "loss": 0.0126, "step": 12350 }, { "epoch": 15.03, "grad_norm": 0.07937000691890717, "learning_rate": 1.2150582947043124e-07, "loss": 0.0748, "step": 12360 }, { "epoch": 15.03, "grad_norm": 0.04092041403055191, "learning_rate": 1.2115465655288665e-07, "loss": 0.0594, "step": 12370 }, { "epoch": 15.03, "grad_norm": 0.4021724462509155, "learning_rate": 1.2080348363534203e-07, "loss": 0.0734, "step": 12380 }, { "epoch": 15.03, "grad_norm": 0.37181994318962097, "learning_rate": 1.2045231071779744e-07, "loss": 0.0366, "step": 12390 }, { "epoch": 15.03, "grad_norm": 37.123680114746094, "learning_rate": 1.2010113780025284e-07, "loss": 0.0802, "step": 12400 }, { "epoch": 15.03, "grad_norm": 26.093215942382812, "learning_rate": 1.1974996488270823e-07, "loss": 0.0474, "step": 12410 }, { "epoch": 15.03, "grad_norm": 0.13403576612472534, "learning_rate": 1.1939879196516364e-07, "loss": 0.1685, "step": 12420 }, { "epoch": 15.03, "grad_norm": 0.4053228199481964, "learning_rate": 1.1904761904761903e-07, "loss": 0.0118, "step": 12430 }, { "epoch": 15.04, "grad_norm": 0.11965583264827728, "learning_rate": 1.1869644613007444e-07, "loss": 0.1383, "step": 12440 }, { "epoch": 15.04, "grad_norm": 0.31700626015663147, "learning_rate": 1.1834527321252985e-07, "loss": 0.0056, "step": 12450 }, { "epoch": 15.04, "grad_norm": 29.105073928833008, "learning_rate": 1.1799410029498524e-07, "loss": 0.0814, "step": 12460 }, { "epoch": 15.04, "grad_norm": 1.1203097105026245, "learning_rate": 1.1764292737744065e-07, "loss": 0.0498, "step": 12470 }, { "epoch": 15.04, "grad_norm": 1.625084638595581, "learning_rate": 1.1729175445989605e-07, "loss": 0.0717, "step": 12480 }, { "epoch": 15.04, "grad_norm": 14.878692626953125, "learning_rate": 1.1694058154235145e-07, "loss": 0.0599, "step": 12490 }, { "epoch": 15.04, "grad_norm": 0.6626260280609131, "learning_rate": 1.1658940862480685e-07, "loss": 0.0047, "step": 12500 }, { "epoch": 15.04, "grad_norm": 2.2045600414276123, "learning_rate": 1.1623823570726226e-07, "loss": 0.0878, "step": 12510 }, { "epoch": 15.04, "grad_norm": 0.1339772790670395, "learning_rate": 1.1588706278971764e-07, "loss": 0.1023, "step": 12520 }, { "epoch": 15.04, "grad_norm": 0.13629591464996338, "learning_rate": 1.1553588987217305e-07, "loss": 0.0526, "step": 12530 }, { "epoch": 15.04, "grad_norm": 0.2166033536195755, "learning_rate": 1.1518471695462844e-07, "loss": 0.0376, "step": 12540 }, { "epoch": 15.04, "grad_norm": 5.4872846603393555, "learning_rate": 1.1483354403708385e-07, "loss": 0.0659, "step": 12550 }, { "epoch": 15.04, "grad_norm": 26.86058807373047, "learning_rate": 1.1448237111953926e-07, "loss": 0.112, "step": 12560 }, { "epoch": 15.04, "grad_norm": 4.422199249267578, "learning_rate": 1.1413119820199466e-07, "loss": 0.0247, "step": 12570 }, { "epoch": 15.04, "grad_norm": 1.0232410430908203, "learning_rate": 1.1378002528445006e-07, "loss": 0.0587, "step": 12580 }, { "epoch": 15.04, "grad_norm": 0.240591898560524, "learning_rate": 1.1342885236690546e-07, "loss": 0.0453, "step": 12590 }, { "epoch": 15.05, "grad_norm": 0.8222384452819824, "learning_rate": 1.1307767944936087e-07, "loss": 0.0793, "step": 12600 }, { "epoch": 15.05, "grad_norm": 4.788244247436523, "learning_rate": 1.1272650653181626e-07, "loss": 0.0093, "step": 12610 }, { "epoch": 15.05, "grad_norm": 1.8619331121444702, "learning_rate": 1.1237533361427167e-07, "loss": 0.052, "step": 12620 }, { "epoch": 15.05, "grad_norm": 0.370745450258255, "learning_rate": 1.1202416069672705e-07, "loss": 0.0182, "step": 12630 }, { "epoch": 15.05, "grad_norm": 45.18473434448242, "learning_rate": 1.1167298777918246e-07, "loss": 0.0788, "step": 12640 }, { "epoch": 15.05, "grad_norm": 30.501544952392578, "learning_rate": 1.1132181486163787e-07, "loss": 0.1853, "step": 12650 }, { "epoch": 15.05, "grad_norm": 17.85689926147461, "learning_rate": 1.1097064194409326e-07, "loss": 0.0662, "step": 12660 }, { "epoch": 15.05, "grad_norm": 0.22137707471847534, "learning_rate": 1.1061946902654867e-07, "loss": 0.0272, "step": 12670 }, { "epoch": 15.05, "eval_accuracy": 0.912474155754652, "eval_loss": 0.3268735408782959, "eval_precision": 0.9503386004514672, "eval_recall": 0.870434183321847, "eval_runtime": 756.8727, "eval_samples_per_second": 3.834, "eval_steps_per_second": 0.275, "step": 12672 }, { "epoch": 16.0, "grad_norm": 32.03059387207031, "learning_rate": 1.1026829610900407e-07, "loss": 0.0302, "step": 12680 }, { "epoch": 16.0, "grad_norm": 1.973822832107544, "learning_rate": 1.0991712319145948e-07, "loss": 0.0382, "step": 12690 }, { "epoch": 16.0, "grad_norm": 13.538702011108398, "learning_rate": 1.0956595027391487e-07, "loss": 0.1078, "step": 12700 }, { "epoch": 16.0, "grad_norm": 1.6987743377685547, "learning_rate": 1.0921477735637028e-07, "loss": 0.0438, "step": 12710 }, { "epoch": 16.0, "grad_norm": 0.39363548159599304, "learning_rate": 1.0886360443882566e-07, "loss": 0.0427, "step": 12720 }, { "epoch": 16.0, "grad_norm": 2.549145460128784, "learning_rate": 1.0851243152128107e-07, "loss": 0.0395, "step": 12730 }, { "epoch": 16.0, "grad_norm": 0.663968026638031, "learning_rate": 1.0816125860373647e-07, "loss": 0.0034, "step": 12740 }, { "epoch": 16.0, "grad_norm": 0.6324851512908936, "learning_rate": 1.0781008568619187e-07, "loss": 0.0428, "step": 12750 }, { "epoch": 16.01, "grad_norm": 31.81590461730957, "learning_rate": 1.0745891276864728e-07, "loss": 0.0362, "step": 12760 }, { "epoch": 16.01, "grad_norm": 0.41047272086143494, "learning_rate": 1.0710773985110268e-07, "loss": 0.0053, "step": 12770 }, { "epoch": 16.01, "grad_norm": 56.90262222290039, "learning_rate": 1.0675656693355809e-07, "loss": 0.0954, "step": 12780 }, { "epoch": 16.01, "grad_norm": 0.3523768186569214, "learning_rate": 1.0640539401601348e-07, "loss": 0.0438, "step": 12790 }, { "epoch": 16.01, "grad_norm": 9.697644233703613, "learning_rate": 1.0605422109846889e-07, "loss": 0.0154, "step": 12800 }, { "epoch": 16.01, "grad_norm": 26.983633041381836, "learning_rate": 1.0570304818092428e-07, "loss": 0.16, "step": 12810 }, { "epoch": 16.01, "grad_norm": 22.593935012817383, "learning_rate": 1.0535187526337968e-07, "loss": 0.0253, "step": 12820 }, { "epoch": 16.01, "grad_norm": 1.4272109270095825, "learning_rate": 1.0500070234583507e-07, "loss": 0.0185, "step": 12830 }, { "epoch": 16.01, "grad_norm": 1.3675788640975952, "learning_rate": 1.0464952942829048e-07, "loss": 0.0484, "step": 12840 }, { "epoch": 16.01, "grad_norm": 44.93605041503906, "learning_rate": 1.0429835651074588e-07, "loss": 0.0788, "step": 12850 }, { "epoch": 16.01, "grad_norm": 5.919560432434082, "learning_rate": 1.0394718359320129e-07, "loss": 0.0449, "step": 12860 }, { "epoch": 16.01, "grad_norm": 6.663865566253662, "learning_rate": 1.035960106756567e-07, "loss": 0.074, "step": 12870 }, { "epoch": 16.01, "grad_norm": 0.10598092526197433, "learning_rate": 1.0324483775811209e-07, "loss": 0.036, "step": 12880 }, { "epoch": 16.01, "grad_norm": 4.663928508758545, "learning_rate": 1.028936648405675e-07, "loss": 0.0771, "step": 12890 }, { "epoch": 16.01, "grad_norm": 0.3389574885368347, "learning_rate": 1.025424919230229e-07, "loss": 0.0468, "step": 12900 }, { "epoch": 16.02, "grad_norm": 0.1751919984817505, "learning_rate": 1.021913190054783e-07, "loss": 0.0095, "step": 12910 }, { "epoch": 16.02, "grad_norm": 10.143257141113281, "learning_rate": 1.0184014608793368e-07, "loss": 0.1068, "step": 12920 }, { "epoch": 16.02, "grad_norm": 1.4437679052352905, "learning_rate": 1.0148897317038909e-07, "loss": 0.1038, "step": 12930 }, { "epoch": 16.02, "grad_norm": 11.701947212219238, "learning_rate": 1.0113780025284449e-07, "loss": 0.0229, "step": 12940 }, { "epoch": 16.02, "grad_norm": 49.36358642578125, "learning_rate": 1.007866273352999e-07, "loss": 0.1182, "step": 12950 }, { "epoch": 16.02, "grad_norm": 19.001245498657227, "learning_rate": 1.0043545441775529e-07, "loss": 0.0788, "step": 12960 }, { "epoch": 16.02, "grad_norm": 1.2796884775161743, "learning_rate": 1.000842815002107e-07, "loss": 0.0517, "step": 12970 }, { "epoch": 16.02, "grad_norm": 1.2621207237243652, "learning_rate": 9.973310858266611e-08, "loss": 0.0272, "step": 12980 }, { "epoch": 16.02, "grad_norm": 0.5226961374282837, "learning_rate": 9.93819356651215e-08, "loss": 0.0481, "step": 12990 }, { "epoch": 16.02, "grad_norm": 4.151657581329346, "learning_rate": 9.903076274757691e-08, "loss": 0.0089, "step": 13000 }, { "epoch": 16.02, "grad_norm": 49.46442794799805, "learning_rate": 9.867958983003231e-08, "loss": 0.1257, "step": 13010 }, { "epoch": 16.02, "grad_norm": 24.786367416381836, "learning_rate": 9.83284169124877e-08, "loss": 0.1604, "step": 13020 }, { "epoch": 16.02, "grad_norm": 0.2529968321323395, "learning_rate": 9.79772439949431e-08, "loss": 0.0112, "step": 13030 }, { "epoch": 16.02, "grad_norm": 40.06565475463867, "learning_rate": 9.76260710773985e-08, "loss": 0.0306, "step": 13040 }, { "epoch": 16.02, "grad_norm": 1.1190885305404663, "learning_rate": 9.72748981598539e-08, "loss": 0.0466, "step": 13050 }, { "epoch": 16.02, "grad_norm": 0.7878612875938416, "learning_rate": 9.692372524230931e-08, "loss": 0.0523, "step": 13060 }, { "epoch": 16.03, "grad_norm": 0.10649153590202332, "learning_rate": 9.657255232476472e-08, "loss": 0.06, "step": 13070 }, { "epoch": 16.03, "grad_norm": 0.17869366705417633, "learning_rate": 9.622137940722011e-08, "loss": 0.0737, "step": 13080 }, { "epoch": 16.03, "grad_norm": 3.671959638595581, "learning_rate": 9.587020648967552e-08, "loss": 0.0643, "step": 13090 }, { "epoch": 16.03, "grad_norm": 0.6201514601707458, "learning_rate": 9.551903357213092e-08, "loss": 0.0269, "step": 13100 }, { "epoch": 16.03, "grad_norm": 4.06514835357666, "learning_rate": 9.516786065458632e-08, "loss": 0.0358, "step": 13110 }, { "epoch": 16.03, "grad_norm": 0.8762524127960205, "learning_rate": 9.48166877370417e-08, "loss": 0.0192, "step": 13120 }, { "epoch": 16.03, "grad_norm": 87.55220031738281, "learning_rate": 9.446551481949711e-08, "loss": 0.0792, "step": 13130 }, { "epoch": 16.03, "grad_norm": 0.41326168179512024, "learning_rate": 9.411434190195251e-08, "loss": 0.0572, "step": 13140 }, { "epoch": 16.03, "grad_norm": 3.5746569633483887, "learning_rate": 9.376316898440792e-08, "loss": 0.0962, "step": 13150 }, { "epoch": 16.03, "grad_norm": 1.394159197807312, "learning_rate": 9.341199606686331e-08, "loss": 0.0778, "step": 13160 }, { "epoch": 16.03, "grad_norm": 5.649251461029053, "learning_rate": 9.306082314931872e-08, "loss": 0.0693, "step": 13170 }, { "epoch": 16.03, "grad_norm": 6.51912260055542, "learning_rate": 9.270965023177413e-08, "loss": 0.0652, "step": 13180 }, { "epoch": 16.03, "grad_norm": 0.5930720567703247, "learning_rate": 9.235847731422953e-08, "loss": 0.0475, "step": 13190 }, { "epoch": 16.03, "grad_norm": 1.0964086055755615, "learning_rate": 9.200730439668493e-08, "loss": 0.0922, "step": 13200 }, { "epoch": 16.03, "grad_norm": 1.1141244173049927, "learning_rate": 9.165613147914032e-08, "loss": 0.0669, "step": 13210 }, { "epoch": 16.03, "grad_norm": 26.397735595703125, "learning_rate": 9.130495856159572e-08, "loss": 0.0617, "step": 13220 }, { "epoch": 16.04, "grad_norm": 8.856929779052734, "learning_rate": 9.095378564405112e-08, "loss": 0.0563, "step": 13230 }, { "epoch": 16.04, "grad_norm": 0.09228429198265076, "learning_rate": 9.060261272650653e-08, "loss": 0.0284, "step": 13240 }, { "epoch": 16.04, "grad_norm": 1.722373604774475, "learning_rate": 9.025143980896192e-08, "loss": 0.1646, "step": 13250 }, { "epoch": 16.04, "grad_norm": 16.24824333190918, "learning_rate": 8.990026689141733e-08, "loss": 0.0386, "step": 13260 }, { "epoch": 16.04, "grad_norm": 5.771405220031738, "learning_rate": 8.954909397387273e-08, "loss": 0.0763, "step": 13270 }, { "epoch": 16.04, "grad_norm": 0.25008299946784973, "learning_rate": 8.919792105632813e-08, "loss": 0.0848, "step": 13280 }, { "epoch": 16.04, "grad_norm": 4.051112174987793, "learning_rate": 8.884674813878354e-08, "loss": 0.0885, "step": 13290 }, { "epoch": 16.04, "grad_norm": 0.32601436972618103, "learning_rate": 8.849557522123894e-08, "loss": 0.0628, "step": 13300 }, { "epoch": 16.04, "grad_norm": 0.586367666721344, "learning_rate": 8.814440230369435e-08, "loss": 0.0737, "step": 13310 }, { "epoch": 16.04, "grad_norm": 1.1470545530319214, "learning_rate": 8.779322938614973e-08, "loss": 0.0764, "step": 13320 }, { "epoch": 16.04, "grad_norm": 0.5341897010803223, "learning_rate": 8.744205646860514e-08, "loss": 0.0427, "step": 13330 }, { "epoch": 16.04, "grad_norm": 2.56229829788208, "learning_rate": 8.709088355106053e-08, "loss": 0.113, "step": 13340 }, { "epoch": 16.04, "grad_norm": 72.86203002929688, "learning_rate": 8.673971063351594e-08, "loss": 0.1379, "step": 13350 }, { "epoch": 16.04, "grad_norm": 0.2823909819126129, "learning_rate": 8.638853771597134e-08, "loss": 0.0184, "step": 13360 }, { "epoch": 16.04, "grad_norm": 0.43383774161338806, "learning_rate": 8.603736479842674e-08, "loss": 0.0792, "step": 13370 }, { "epoch": 16.04, "grad_norm": 24.955232620239258, "learning_rate": 8.568619188088214e-08, "loss": 0.0619, "step": 13380 }, { "epoch": 16.05, "grad_norm": 4.978287696838379, "learning_rate": 8.533501896333755e-08, "loss": 0.0071, "step": 13390 }, { "epoch": 16.05, "grad_norm": 0.20706014335155487, "learning_rate": 8.498384604579296e-08, "loss": 0.0099, "step": 13400 }, { "epoch": 16.05, "grad_norm": 0.25609487295150757, "learning_rate": 8.463267312824834e-08, "loss": 0.0095, "step": 13410 }, { "epoch": 16.05, "grad_norm": 0.2861858904361725, "learning_rate": 8.428150021070375e-08, "loss": 0.0446, "step": 13420 }, { "epoch": 16.05, "grad_norm": 0.07336357235908508, "learning_rate": 8.393032729315914e-08, "loss": 0.1142, "step": 13430 }, { "epoch": 16.05, "grad_norm": 10.012815475463867, "learning_rate": 8.357915437561455e-08, "loss": 0.1162, "step": 13440 }, { "epoch": 16.05, "grad_norm": 0.5580765604972839, "learning_rate": 8.322798145806995e-08, "loss": 0.0293, "step": 13450 }, { "epoch": 16.05, "grad_norm": 0.20322297513484955, "learning_rate": 8.287680854052535e-08, "loss": 0.0657, "step": 13460 }, { "epoch": 16.05, "eval_accuracy": 0.9097174362508614, "eval_loss": 0.3401437997817993, "eval_precision": 0.9555555555555556, "eval_recall": 0.8594073053066851, "eval_runtime": 736.3069, "eval_samples_per_second": 3.941, "eval_steps_per_second": 0.282, "step": 13464 }, { "epoch": 17.0, "grad_norm": 3.454097270965576, "learning_rate": 8.252563562298075e-08, "loss": 0.053, "step": 13470 }, { "epoch": 17.0, "grad_norm": 1.769278883934021, "learning_rate": 8.217446270543616e-08, "loss": 0.0534, "step": 13480 }, { "epoch": 17.0, "grad_norm": 27.92290687561035, "learning_rate": 8.182328978789155e-08, "loss": 0.059, "step": 13490 }, { "epoch": 17.0, "grad_norm": 1.8984858989715576, "learning_rate": 8.147211687034696e-08, "loss": 0.0968, "step": 13500 }, { "epoch": 17.0, "grad_norm": 4.035393238067627, "learning_rate": 8.112094395280236e-08, "loss": 0.0764, "step": 13510 }, { "epoch": 17.0, "grad_norm": 2.443753242492676, "learning_rate": 8.076977103525775e-08, "loss": 0.0666, "step": 13520 }, { "epoch": 17.0, "grad_norm": 1.1805529594421387, "learning_rate": 8.041859811771316e-08, "loss": 0.076, "step": 13530 }, { "epoch": 17.0, "grad_norm": 0.20299974083900452, "learning_rate": 8.006742520016855e-08, "loss": 0.0846, "step": 13540 }, { "epoch": 17.01, "grad_norm": 1.3644189834594727, "learning_rate": 7.971625228262396e-08, "loss": 0.0083, "step": 13550 }, { "epoch": 17.01, "grad_norm": 0.20236322283744812, "learning_rate": 7.936507936507936e-08, "loss": 0.0819, "step": 13560 }, { "epoch": 17.01, "grad_norm": 0.7914273738861084, "learning_rate": 7.901390644753477e-08, "loss": 0.1405, "step": 13570 }, { "epoch": 17.01, "grad_norm": 5.954097747802734, "learning_rate": 7.866273352999016e-08, "loss": 0.0315, "step": 13580 }, { "epoch": 17.01, "grad_norm": 0.20768192410469055, "learning_rate": 7.831156061244557e-08, "loss": 0.0409, "step": 13590 }, { "epoch": 17.01, "grad_norm": 0.022792167961597443, "learning_rate": 7.796038769490098e-08, "loss": 0.0175, "step": 13600 }, { "epoch": 17.01, "grad_norm": 102.04741668701172, "learning_rate": 7.760921477735636e-08, "loss": 0.0569, "step": 13610 }, { "epoch": 17.01, "grad_norm": 12.832448959350586, "learning_rate": 7.725804185981177e-08, "loss": 0.0179, "step": 13620 }, { "epoch": 17.01, "grad_norm": 14.275715827941895, "learning_rate": 7.690686894226716e-08, "loss": 0.0497, "step": 13630 }, { "epoch": 17.01, "grad_norm": 0.06563232094049454, "learning_rate": 7.655569602472257e-08, "loss": 0.0162, "step": 13640 }, { "epoch": 17.01, "grad_norm": 0.23883360624313354, "learning_rate": 7.620452310717797e-08, "loss": 0.004, "step": 13650 }, { "epoch": 17.01, "grad_norm": 0.10138227045536041, "learning_rate": 7.585335018963338e-08, "loss": 0.154, "step": 13660 }, { "epoch": 17.01, "grad_norm": 0.5160626173019409, "learning_rate": 7.550217727208877e-08, "loss": 0.0508, "step": 13670 }, { "epoch": 17.01, "grad_norm": 1.058582067489624, "learning_rate": 7.515100435454418e-08, "loss": 0.0662, "step": 13680 }, { "epoch": 17.01, "grad_norm": 38.418033599853516, "learning_rate": 7.479983143699957e-08, "loss": 0.0803, "step": 13690 }, { "epoch": 17.01, "grad_norm": 0.485832154750824, "learning_rate": 7.444865851945498e-08, "loss": 0.0078, "step": 13700 }, { "epoch": 17.02, "grad_norm": 1.9883220195770264, "learning_rate": 7.409748560191038e-08, "loss": 0.0024, "step": 13710 }, { "epoch": 17.02, "grad_norm": 11.962109565734863, "learning_rate": 7.374631268436577e-08, "loss": 0.1276, "step": 13720 }, { "epoch": 17.02, "grad_norm": 0.059181489050388336, "learning_rate": 7.339513976682118e-08, "loss": 0.0479, "step": 13730 }, { "epoch": 17.02, "grad_norm": 1.3044936656951904, "learning_rate": 7.304396684927658e-08, "loss": 0.0183, "step": 13740 }, { "epoch": 17.02, "grad_norm": 8.185949325561523, "learning_rate": 7.269279393173199e-08, "loss": 0.0233, "step": 13750 }, { "epoch": 17.02, "grad_norm": 4.081211090087891, "learning_rate": 7.234162101418738e-08, "loss": 0.1523, "step": 13760 }, { "epoch": 17.02, "grad_norm": 0.17457932233810425, "learning_rate": 7.199044809664279e-08, "loss": 0.0541, "step": 13770 }, { "epoch": 17.02, "grad_norm": 0.1796157956123352, "learning_rate": 7.163927517909818e-08, "loss": 0.0152, "step": 13780 }, { "epoch": 17.02, "grad_norm": 2.867469072341919, "learning_rate": 7.128810226155359e-08, "loss": 0.0596, "step": 13790 }, { "epoch": 17.02, "grad_norm": 1.897428274154663, "learning_rate": 7.093692934400897e-08, "loss": 0.1093, "step": 13800 }, { "epoch": 17.02, "grad_norm": 70.76785278320312, "learning_rate": 7.058575642646438e-08, "loss": 0.0841, "step": 13810 }, { "epoch": 17.02, "grad_norm": 0.5365475416183472, "learning_rate": 7.023458350891979e-08, "loss": 0.0272, "step": 13820 }, { "epoch": 17.02, "grad_norm": 0.17314934730529785, "learning_rate": 6.988341059137519e-08, "loss": 0.0042, "step": 13830 }, { "epoch": 17.02, "grad_norm": 0.11074217408895493, "learning_rate": 6.95322376738306e-08, "loss": 0.0639, "step": 13840 }, { "epoch": 17.02, "grad_norm": 0.8851208090782166, "learning_rate": 6.918106475628599e-08, "loss": 0.1055, "step": 13850 }, { "epoch": 17.03, "grad_norm": 16.637081146240234, "learning_rate": 6.88298918387414e-08, "loss": 0.0279, "step": 13860 }, { "epoch": 17.03, "grad_norm": 1.2571285963058472, "learning_rate": 6.847871892119679e-08, "loss": 0.1265, "step": 13870 }, { "epoch": 17.03, "grad_norm": 0.15630467236042023, "learning_rate": 6.81275460036522e-08, "loss": 0.0321, "step": 13880 }, { "epoch": 17.03, "grad_norm": 28.117023468017578, "learning_rate": 6.77763730861076e-08, "loss": 0.0934, "step": 13890 }, { "epoch": 17.03, "grad_norm": 0.8499059677124023, "learning_rate": 6.7425200168563e-08, "loss": 0.0039, "step": 13900 }, { "epoch": 17.03, "grad_norm": 7.584344387054443, "learning_rate": 6.707402725101839e-08, "loss": 0.0717, "step": 13910 }, { "epoch": 17.03, "grad_norm": 1.1301287412643433, "learning_rate": 6.67228543334738e-08, "loss": 0.0232, "step": 13920 }, { "epoch": 17.03, "grad_norm": 40.773563385009766, "learning_rate": 6.63716814159292e-08, "loss": 0.1155, "step": 13930 }, { "epoch": 17.03, "grad_norm": 0.7173879146575928, "learning_rate": 6.60205084983846e-08, "loss": 0.0451, "step": 13940 }, { "epoch": 17.03, "grad_norm": 0.8172169327735901, "learning_rate": 6.566933558084001e-08, "loss": 0.0049, "step": 13950 }, { "epoch": 17.03, "grad_norm": 0.5219703912734985, "learning_rate": 6.53181626632954e-08, "loss": 0.0521, "step": 13960 }, { "epoch": 17.03, "grad_norm": 1.2188835144042969, "learning_rate": 6.496698974575081e-08, "loss": 0.0211, "step": 13970 }, { "epoch": 17.03, "grad_norm": 0.7205813527107239, "learning_rate": 6.46158168282062e-08, "loss": 0.069, "step": 13980 }, { "epoch": 17.03, "grad_norm": 67.60127258300781, "learning_rate": 6.426464391066161e-08, "loss": 0.122, "step": 13990 }, { "epoch": 17.03, "grad_norm": 1.109858512878418, "learning_rate": 6.3913470993117e-08, "loss": 0.0135, "step": 14000 }, { "epoch": 17.03, "grad_norm": 41.44175720214844, "learning_rate": 6.35622980755724e-08, "loss": 0.0288, "step": 14010 }, { "epoch": 17.04, "grad_norm": 41.50529861450195, "learning_rate": 6.32111251580278e-08, "loss": 0.0844, "step": 14020 }, { "epoch": 17.04, "grad_norm": 0.36855119466781616, "learning_rate": 6.285995224048321e-08, "loss": 0.0045, "step": 14030 }, { "epoch": 17.04, "grad_norm": 47.08305358886719, "learning_rate": 6.250877932293862e-08, "loss": 0.0163, "step": 14040 }, { "epoch": 17.04, "grad_norm": 0.2649679183959961, "learning_rate": 6.215760640539401e-08, "loss": 0.0594, "step": 14050 }, { "epoch": 17.04, "grad_norm": 3.378206253051758, "learning_rate": 6.180643348784941e-08, "loss": 0.0203, "step": 14060 }, { "epoch": 17.04, "grad_norm": 1.8004230260849, "learning_rate": 6.145526057030482e-08, "loss": 0.0729, "step": 14070 }, { "epoch": 17.04, "grad_norm": 3.259279251098633, "learning_rate": 6.110408765276022e-08, "loss": 0.154, "step": 14080 }, { "epoch": 17.04, "grad_norm": 0.6432997584342957, "learning_rate": 6.075291473521562e-08, "loss": 0.0511, "step": 14090 }, { "epoch": 17.04, "grad_norm": 0.4449607729911804, "learning_rate": 6.040174181767101e-08, "loss": 0.0545, "step": 14100 }, { "epoch": 17.04, "grad_norm": 23.72673225402832, "learning_rate": 6.005056890012642e-08, "loss": 0.092, "step": 14110 }, { "epoch": 17.04, "grad_norm": 0.6149857044219971, "learning_rate": 5.969939598258182e-08, "loss": 0.1331, "step": 14120 }, { "epoch": 17.04, "grad_norm": 0.08165881782770157, "learning_rate": 5.934822306503722e-08, "loss": 0.1179, "step": 14130 }, { "epoch": 17.04, "grad_norm": 20.979671478271484, "learning_rate": 5.899705014749262e-08, "loss": 0.1185, "step": 14140 }, { "epoch": 17.04, "grad_norm": 16.830991744995117, "learning_rate": 5.864587722994802e-08, "loss": 0.0855, "step": 14150 }, { "epoch": 17.04, "grad_norm": 0.17441485822200775, "learning_rate": 5.8294704312403425e-08, "loss": 0.0591, "step": 14160 }, { "epoch": 17.04, "grad_norm": 27.158857345581055, "learning_rate": 5.794353139485882e-08, "loss": 0.0813, "step": 14170 }, { "epoch": 17.05, "grad_norm": 35.461181640625, "learning_rate": 5.759235847731422e-08, "loss": 0.0517, "step": 14180 }, { "epoch": 17.05, "grad_norm": 0.36255577206611633, "learning_rate": 5.724118555976963e-08, "loss": 0.0252, "step": 14190 }, { "epoch": 17.05, "grad_norm": 4.514914512634277, "learning_rate": 5.689001264222503e-08, "loss": 0.0202, "step": 14200 }, { "epoch": 17.05, "grad_norm": 2.4596855640411377, "learning_rate": 5.6538839724680434e-08, "loss": 0.0949, "step": 14210 }, { "epoch": 17.05, "grad_norm": 0.5682011842727661, "learning_rate": 5.6187666807135835e-08, "loss": 0.0458, "step": 14220 }, { "epoch": 17.05, "grad_norm": 40.876808166503906, "learning_rate": 5.583649388959123e-08, "loss": 0.0315, "step": 14230 }, { "epoch": 17.05, "grad_norm": 0.13087686896324158, "learning_rate": 5.548532097204663e-08, "loss": 0.091, "step": 14240 }, { "epoch": 17.05, "grad_norm": 3.1052403450012207, "learning_rate": 5.5134148054502034e-08, "loss": 0.1083, "step": 14250 }, { "epoch": 17.05, "eval_accuracy": 0.9097174362508614, "eval_loss": 0.3423954248428345, "eval_precision": 0.954858454475899, "eval_recall": 0.8600964851826327, "eval_runtime": 790.9047, "eval_samples_per_second": 3.669, "eval_steps_per_second": 0.263, "step": 14256 }, { "epoch": 18.0, "grad_norm": 9.596434593200684, "learning_rate": 5.4782975136957436e-08, "loss": 0.0861, "step": 14260 }, { "epoch": 18.0, "grad_norm": 5.937694549560547, "learning_rate": 5.443180221941283e-08, "loss": 0.0603, "step": 14270 }, { "epoch": 18.0, "grad_norm": 29.645692825317383, "learning_rate": 5.408062930186823e-08, "loss": 0.1232, "step": 14280 }, { "epoch": 18.0, "grad_norm": 2.9003117084503174, "learning_rate": 5.372945638432364e-08, "loss": 0.0108, "step": 14290 }, { "epoch": 18.0, "grad_norm": 0.2588464617729187, "learning_rate": 5.337828346677904e-08, "loss": 0.0373, "step": 14300 }, { "epoch": 18.0, "grad_norm": 9.48279857635498, "learning_rate": 5.3027110549234445e-08, "loss": 0.0307, "step": 14310 }, { "epoch": 18.0, "grad_norm": 1.0034399032592773, "learning_rate": 5.267593763168984e-08, "loss": 0.0545, "step": 14320 }, { "epoch": 18.0, "grad_norm": 5.403934955596924, "learning_rate": 5.232476471414524e-08, "loss": 0.065, "step": 14330 }, { "epoch": 18.01, "grad_norm": 23.750272750854492, "learning_rate": 5.1973591796600643e-08, "loss": 0.1104, "step": 14340 }, { "epoch": 18.01, "grad_norm": 20.655855178833008, "learning_rate": 5.1622418879056045e-08, "loss": 0.0329, "step": 14350 }, { "epoch": 18.01, "grad_norm": 1.4666916131973267, "learning_rate": 5.127124596151145e-08, "loss": 0.1336, "step": 14360 }, { "epoch": 18.01, "grad_norm": 1.027911901473999, "learning_rate": 5.092007304396684e-08, "loss": 0.0739, "step": 14370 }, { "epoch": 18.01, "grad_norm": 36.20118713378906, "learning_rate": 5.0568900126422244e-08, "loss": 0.0552, "step": 14380 }, { "epoch": 18.01, "grad_norm": 10.404723167419434, "learning_rate": 5.0217727208877646e-08, "loss": 0.0261, "step": 14390 }, { "epoch": 18.01, "grad_norm": 35.86191940307617, "learning_rate": 4.9866554291333054e-08, "loss": 0.0646, "step": 14400 }, { "epoch": 18.01, "grad_norm": 0.1536494493484497, "learning_rate": 4.9515381373788456e-08, "loss": 0.0202, "step": 14410 }, { "epoch": 18.01, "grad_norm": 1.620671033859253, "learning_rate": 4.916420845624385e-08, "loss": 0.0991, "step": 14420 }, { "epoch": 18.01, "grad_norm": 0.2140575349330902, "learning_rate": 4.881303553869925e-08, "loss": 0.048, "step": 14430 }, { "epoch": 18.01, "grad_norm": 0.08463574945926666, "learning_rate": 4.8461862621154654e-08, "loss": 0.0852, "step": 14440 }, { "epoch": 18.01, "grad_norm": 0.9875814318656921, "learning_rate": 4.8110689703610056e-08, "loss": 0.0633, "step": 14450 }, { "epoch": 18.01, "grad_norm": 0.9503943920135498, "learning_rate": 4.775951678606546e-08, "loss": 0.0308, "step": 14460 }, { "epoch": 18.01, "grad_norm": 8.947669982910156, "learning_rate": 4.740834386852085e-08, "loss": 0.0153, "step": 14470 }, { "epoch": 18.01, "grad_norm": 0.3054357171058655, "learning_rate": 4.7057170950976255e-08, "loss": 0.0561, "step": 14480 }, { "epoch": 18.01, "grad_norm": 0.0791822001338005, "learning_rate": 4.670599803343166e-08, "loss": 0.0702, "step": 14490 }, { "epoch": 18.02, "grad_norm": 1.1325267553329468, "learning_rate": 4.6354825115887065e-08, "loss": 0.1046, "step": 14500 }, { "epoch": 18.02, "grad_norm": 4.453803062438965, "learning_rate": 4.600365219834247e-08, "loss": 0.0073, "step": 14510 }, { "epoch": 18.02, "grad_norm": 40.15250778198242, "learning_rate": 4.565247928079786e-08, "loss": 0.0307, "step": 14520 }, { "epoch": 18.02, "grad_norm": 46.167171478271484, "learning_rate": 4.5301306363253264e-08, "loss": 0.0799, "step": 14530 }, { "epoch": 18.02, "grad_norm": 0.0337214432656765, "learning_rate": 4.4950133445708666e-08, "loss": 0.0066, "step": 14540 }, { "epoch": 18.02, "grad_norm": 2.820535182952881, "learning_rate": 4.459896052816407e-08, "loss": 0.0652, "step": 14550 }, { "epoch": 18.02, "grad_norm": 25.72003936767578, "learning_rate": 4.424778761061947e-08, "loss": 0.0706, "step": 14560 }, { "epoch": 18.02, "grad_norm": 5.185825824737549, "learning_rate": 4.3896614693074864e-08, "loss": 0.0618, "step": 14570 }, { "epoch": 18.02, "grad_norm": 53.930747985839844, "learning_rate": 4.3545441775530266e-08, "loss": 0.0412, "step": 14580 }, { "epoch": 18.02, "grad_norm": 37.754249572753906, "learning_rate": 4.319426885798567e-08, "loss": 0.0255, "step": 14590 }, { "epoch": 18.02, "grad_norm": 14.39865779876709, "learning_rate": 4.284309594044107e-08, "loss": 0.0851, "step": 14600 }, { "epoch": 18.02, "grad_norm": 16.49296760559082, "learning_rate": 4.249192302289648e-08, "loss": 0.1012, "step": 14610 }, { "epoch": 18.02, "grad_norm": 70.44441986083984, "learning_rate": 4.214075010535187e-08, "loss": 0.1135, "step": 14620 }, { "epoch": 18.02, "grad_norm": 38.89972686767578, "learning_rate": 4.1789577187807275e-08, "loss": 0.1311, "step": 14630 }, { "epoch": 18.02, "grad_norm": 2.2962796688079834, "learning_rate": 4.143840427026268e-08, "loss": 0.0888, "step": 14640 }, { "epoch": 18.02, "grad_norm": 29.199787139892578, "learning_rate": 4.108723135271808e-08, "loss": 0.027, "step": 14650 }, { "epoch": 18.03, "grad_norm": 0.24337296187877655, "learning_rate": 4.073605843517348e-08, "loss": 0.0962, "step": 14660 }, { "epoch": 18.03, "grad_norm": 52.252159118652344, "learning_rate": 4.0384885517628875e-08, "loss": 0.1413, "step": 14670 }, { "epoch": 18.03, "grad_norm": 0.08331502228975296, "learning_rate": 4.003371260008428e-08, "loss": 0.0911, "step": 14680 }, { "epoch": 18.03, "grad_norm": 23.067533493041992, "learning_rate": 3.968253968253968e-08, "loss": 0.0574, "step": 14690 }, { "epoch": 18.03, "grad_norm": 0.6708474159240723, "learning_rate": 3.933136676499508e-08, "loss": 0.0629, "step": 14700 }, { "epoch": 18.03, "grad_norm": 1.3226832151412964, "learning_rate": 3.898019384745049e-08, "loss": 0.0354, "step": 14710 }, { "epoch": 18.03, "grad_norm": 0.6364083290100098, "learning_rate": 3.8629020929905884e-08, "loss": 0.0697, "step": 14720 }, { "epoch": 18.03, "grad_norm": 0.16067157685756683, "learning_rate": 3.8277848012361286e-08, "loss": 0.0045, "step": 14730 }, { "epoch": 18.03, "grad_norm": 0.06121142953634262, "learning_rate": 3.792667509481669e-08, "loss": 0.072, "step": 14740 }, { "epoch": 18.03, "grad_norm": 58.5839729309082, "learning_rate": 3.757550217727209e-08, "loss": 0.0524, "step": 14750 }, { "epoch": 18.03, "grad_norm": 0.11591964215040207, "learning_rate": 3.722432925972749e-08, "loss": 0.0087, "step": 14760 }, { "epoch": 18.03, "grad_norm": 1.915065050125122, "learning_rate": 3.6873156342182887e-08, "loss": 0.0562, "step": 14770 }, { "epoch": 18.03, "grad_norm": 0.10940331220626831, "learning_rate": 3.652198342463829e-08, "loss": 0.0288, "step": 14780 }, { "epoch": 18.03, "grad_norm": 0.7604038715362549, "learning_rate": 3.617081050709369e-08, "loss": 0.0731, "step": 14790 }, { "epoch": 18.03, "grad_norm": 15.378995895385742, "learning_rate": 3.581963758954909e-08, "loss": 0.0107, "step": 14800 }, { "epoch": 18.04, "grad_norm": 54.52976989746094, "learning_rate": 3.546846467200449e-08, "loss": 0.0747, "step": 14810 }, { "epoch": 18.04, "grad_norm": 0.07162602990865707, "learning_rate": 3.5117291754459895e-08, "loss": 0.0062, "step": 14820 }, { "epoch": 18.04, "grad_norm": 41.741146087646484, "learning_rate": 3.47661188369153e-08, "loss": 0.0797, "step": 14830 }, { "epoch": 18.04, "grad_norm": 1.7149418592453003, "learning_rate": 3.44149459193707e-08, "loss": 0.1116, "step": 14840 }, { "epoch": 18.04, "grad_norm": 2.146855354309082, "learning_rate": 3.40637730018261e-08, "loss": 0.0941, "step": 14850 }, { "epoch": 18.04, "grad_norm": 18.62420654296875, "learning_rate": 3.37126000842815e-08, "loss": 0.1085, "step": 14860 }, { "epoch": 18.04, "grad_norm": 56.026275634765625, "learning_rate": 3.33614271667369e-08, "loss": 0.0619, "step": 14870 }, { "epoch": 18.04, "grad_norm": 0.5987139344215393, "learning_rate": 3.30102542491923e-08, "loss": 0.006, "step": 14880 }, { "epoch": 18.04, "grad_norm": 66.54093170166016, "learning_rate": 3.26590813316477e-08, "loss": 0.0799, "step": 14890 }, { "epoch": 18.04, "grad_norm": 1.3142921924591064, "learning_rate": 3.23079084141031e-08, "loss": 0.0298, "step": 14900 }, { "epoch": 18.04, "grad_norm": 2.5834734439849854, "learning_rate": 3.19567354965585e-08, "loss": 0.0833, "step": 14910 }, { "epoch": 18.04, "grad_norm": 0.1973741501569748, "learning_rate": 3.16055625790139e-08, "loss": 0.0072, "step": 14920 }, { "epoch": 18.04, "grad_norm": 0.14324507117271423, "learning_rate": 3.125438966146931e-08, "loss": 0.0416, "step": 14930 }, { "epoch": 18.04, "grad_norm": 0.04576882719993591, "learning_rate": 3.0903216743924703e-08, "loss": 0.1716, "step": 14940 }, { "epoch": 18.04, "grad_norm": 0.7260708212852478, "learning_rate": 3.055204382638011e-08, "loss": 0.0293, "step": 14950 }, { "epoch": 18.04, "grad_norm": 0.2393227070569992, "learning_rate": 3.020087090883551e-08, "loss": 0.0442, "step": 14960 }, { "epoch": 18.05, "grad_norm": 1.042154312133789, "learning_rate": 2.984969799129091e-08, "loss": 0.0624, "step": 14970 }, { "epoch": 18.05, "grad_norm": 0.19061008095741272, "learning_rate": 2.949852507374631e-08, "loss": 0.0255, "step": 14980 }, { "epoch": 18.05, "grad_norm": 4.478461265563965, "learning_rate": 2.9147352156201712e-08, "loss": 0.076, "step": 14990 }, { "epoch": 18.05, "grad_norm": 0.05673850327730179, "learning_rate": 2.879617923865711e-08, "loss": 0.0035, "step": 15000 }, { "epoch": 18.05, "grad_norm": 0.47382786870002747, "learning_rate": 2.8445006321112516e-08, "loss": 0.0225, "step": 15010 }, { "epoch": 18.05, "grad_norm": 2.153986930847168, "learning_rate": 2.8093833403567918e-08, "loss": 0.1156, "step": 15020 }, { "epoch": 18.05, "grad_norm": 1.0099316835403442, "learning_rate": 2.7742660486023316e-08, "loss": 0.0097, "step": 15030 }, { "epoch": 18.05, "grad_norm": 0.04206348955631256, "learning_rate": 2.7391487568478718e-08, "loss": 0.0059, "step": 15040 }, { "epoch": 18.05, "eval_accuracy": 0.9093728463128876, "eval_loss": 0.346143901348114, "eval_precision": 0.9555214723926381, "eval_recall": 0.8587181254307374, "eval_runtime": 757.3933, "eval_samples_per_second": 3.832, "eval_steps_per_second": 0.275, "step": 15048 }, { "epoch": 19.0, "grad_norm": 1.616142749786377, "learning_rate": 2.7040314650934116e-08, "loss": 0.0712, "step": 15050 }, { "epoch": 19.0, "grad_norm": 0.27258121967315674, "learning_rate": 2.668914173338952e-08, "loss": 0.0219, "step": 15060 }, { "epoch": 19.0, "grad_norm": 3.223154306411743, "learning_rate": 2.633796881584492e-08, "loss": 0.0815, "step": 15070 }, { "epoch": 19.0, "grad_norm": 0.8037530779838562, "learning_rate": 2.5986795898300322e-08, "loss": 0.0618, "step": 15080 }, { "epoch": 19.0, "grad_norm": 0.44467395544052124, "learning_rate": 2.5635622980755723e-08, "loss": 0.0033, "step": 15090 }, { "epoch": 19.0, "grad_norm": 4.022994518280029, "learning_rate": 2.5284450063211122e-08, "loss": 0.1035, "step": 15100 }, { "epoch": 19.0, "grad_norm": 7.198676109313965, "learning_rate": 2.4933277145666527e-08, "loss": 0.0629, "step": 15110 }, { "epoch": 19.0, "grad_norm": 2.59739351272583, "learning_rate": 2.4582104228121925e-08, "loss": 0.0104, "step": 15120 }, { "epoch": 19.01, "grad_norm": 0.4191197156906128, "learning_rate": 2.4230931310577327e-08, "loss": 0.0542, "step": 15130 }, { "epoch": 19.01, "grad_norm": 22.535751342773438, "learning_rate": 2.387975839303273e-08, "loss": 0.0097, "step": 15140 }, { "epoch": 19.01, "grad_norm": 0.6531215310096741, "learning_rate": 2.3528585475488127e-08, "loss": 0.0074, "step": 15150 }, { "epoch": 19.01, "grad_norm": 1.1146769523620605, "learning_rate": 2.3177412557943533e-08, "loss": 0.0211, "step": 15160 }, { "epoch": 19.01, "grad_norm": 40.62851333618164, "learning_rate": 2.282623964039893e-08, "loss": 0.0275, "step": 15170 }, { "epoch": 19.01, "grad_norm": 9.290081977844238, "learning_rate": 2.2475066722854333e-08, "loss": 0.1289, "step": 15180 }, { "epoch": 19.01, "grad_norm": 1.1785346269607544, "learning_rate": 2.2123893805309735e-08, "loss": 0.0587, "step": 15190 }, { "epoch": 19.01, "grad_norm": 2.7454476356506348, "learning_rate": 2.1772720887765133e-08, "loss": 0.0619, "step": 15200 }, { "epoch": 19.01, "grad_norm": 0.9285054206848145, "learning_rate": 2.1421547970220535e-08, "loss": 0.1011, "step": 15210 }, { "epoch": 19.01, "grad_norm": 1.1619677543640137, "learning_rate": 2.1070375052675937e-08, "loss": 0.0087, "step": 15220 }, { "epoch": 19.01, "grad_norm": 27.885623931884766, "learning_rate": 2.071920213513134e-08, "loss": 0.147, "step": 15230 }, { "epoch": 19.01, "grad_norm": 0.025918371975421906, "learning_rate": 2.036802921758674e-08, "loss": 0.0529, "step": 15240 }, { "epoch": 19.01, "grad_norm": 5.489053249359131, "learning_rate": 2.001685630004214e-08, "loss": 0.0915, "step": 15250 }, { "epoch": 19.01, "grad_norm": 0.9791603088378906, "learning_rate": 1.966568338249754e-08, "loss": 0.0049, "step": 15260 }, { "epoch": 19.01, "grad_norm": 26.683958053588867, "learning_rate": 1.9314510464952942e-08, "loss": 0.0437, "step": 15270 }, { "epoch": 19.01, "grad_norm": 1.208604097366333, "learning_rate": 1.8963337547408344e-08, "loss": 0.0647, "step": 15280 }, { "epoch": 19.02, "grad_norm": 3.8144545555114746, "learning_rate": 1.8612164629863746e-08, "loss": 0.0355, "step": 15290 }, { "epoch": 19.02, "grad_norm": 0.35762450098991394, "learning_rate": 1.8260991712319144e-08, "loss": 0.0615, "step": 15300 }, { "epoch": 19.02, "grad_norm": 0.08759688585996628, "learning_rate": 1.7909818794774546e-08, "loss": 0.0485, "step": 15310 }, { "epoch": 19.02, "grad_norm": 3.606381416320801, "learning_rate": 1.7558645877229948e-08, "loss": 0.0845, "step": 15320 }, { "epoch": 19.02, "grad_norm": 39.8365592956543, "learning_rate": 1.720747295968535e-08, "loss": 0.0323, "step": 15330 }, { "epoch": 19.02, "grad_norm": 0.8489830493927002, "learning_rate": 1.685630004214075e-08, "loss": 0.0224, "step": 15340 }, { "epoch": 19.02, "grad_norm": 40.61576843261719, "learning_rate": 1.650512712459615e-08, "loss": 0.0385, "step": 15350 }, { "epoch": 19.02, "grad_norm": 50.769920349121094, "learning_rate": 1.615395420705155e-08, "loss": 0.073, "step": 15360 }, { "epoch": 19.02, "grad_norm": 0.7444109916687012, "learning_rate": 1.580278128950695e-08, "loss": 0.0358, "step": 15370 }, { "epoch": 19.02, "grad_norm": 28.924657821655273, "learning_rate": 1.5451608371962352e-08, "loss": 0.0285, "step": 15380 }, { "epoch": 19.02, "grad_norm": 0.45112141966819763, "learning_rate": 1.5100435454417754e-08, "loss": 0.066, "step": 15390 }, { "epoch": 19.02, "grad_norm": 18.0377197265625, "learning_rate": 1.4749262536873155e-08, "loss": 0.0662, "step": 15400 }, { "epoch": 19.02, "grad_norm": 0.3616965711116791, "learning_rate": 1.4398089619328555e-08, "loss": 0.1021, "step": 15410 }, { "epoch": 19.02, "grad_norm": 0.9568153023719788, "learning_rate": 1.4046916701783959e-08, "loss": 0.0509, "step": 15420 }, { "epoch": 19.02, "grad_norm": 6.927822589874268, "learning_rate": 1.3695743784239359e-08, "loss": 0.0425, "step": 15430 }, { "epoch": 19.02, "grad_norm": 0.41615933179855347, "learning_rate": 1.334457086669476e-08, "loss": 0.0555, "step": 15440 }, { "epoch": 19.03, "grad_norm": 0.4636135995388031, "learning_rate": 1.2993397949150161e-08, "loss": 0.0068, "step": 15450 }, { "epoch": 19.03, "grad_norm": 0.06087684631347656, "learning_rate": 1.2642225031605561e-08, "loss": 0.0279, "step": 15460 }, { "epoch": 19.03, "grad_norm": 1.5282738208770752, "learning_rate": 1.2291052114060963e-08, "loss": 0.0403, "step": 15470 }, { "epoch": 19.03, "grad_norm": 16.40428924560547, "learning_rate": 1.1939879196516365e-08, "loss": 0.0234, "step": 15480 }, { "epoch": 19.03, "grad_norm": 6.117326736450195, "learning_rate": 1.1588706278971766e-08, "loss": 0.029, "step": 15490 }, { "epoch": 19.03, "grad_norm": 0.23214979469776154, "learning_rate": 1.1237533361427166e-08, "loss": 0.0105, "step": 15500 }, { "epoch": 19.03, "grad_norm": 0.08815844357013702, "learning_rate": 1.0886360443882567e-08, "loss": 0.0582, "step": 15510 }, { "epoch": 19.03, "grad_norm": 17.190937042236328, "learning_rate": 1.0535187526337968e-08, "loss": 0.0863, "step": 15520 }, { "epoch": 19.03, "grad_norm": 54.02675247192383, "learning_rate": 1.018401460879337e-08, "loss": 0.0932, "step": 15530 }, { "epoch": 19.03, "grad_norm": 0.9480177164077759, "learning_rate": 9.83284169124877e-09, "loss": 0.0145, "step": 15540 }, { "epoch": 19.03, "grad_norm": 0.0315365269780159, "learning_rate": 9.481668773704172e-09, "loss": 0.0898, "step": 15550 }, { "epoch": 19.03, "grad_norm": 0.1607980579137802, "learning_rate": 9.130495856159572e-09, "loss": 0.0356, "step": 15560 }, { "epoch": 19.03, "grad_norm": 46.33287048339844, "learning_rate": 8.779322938614974e-09, "loss": 0.1015, "step": 15570 }, { "epoch": 19.03, "grad_norm": 1.4551403522491455, "learning_rate": 8.428150021070376e-09, "loss": 0.0385, "step": 15580 }, { "epoch": 19.03, "grad_norm": 0.6073951721191406, "learning_rate": 8.076977103525776e-09, "loss": 0.0568, "step": 15590 }, { "epoch": 19.03, "grad_norm": 1.7435530424118042, "learning_rate": 7.725804185981176e-09, "loss": 0.0829, "step": 15600 }, { "epoch": 19.04, "grad_norm": 0.3223247826099396, "learning_rate": 7.374631268436578e-09, "loss": 0.0707, "step": 15610 }, { "epoch": 19.04, "grad_norm": 0.6448749899864197, "learning_rate": 7.023458350891979e-09, "loss": 0.0383, "step": 15620 }, { "epoch": 19.04, "grad_norm": 29.08024787902832, "learning_rate": 6.67228543334738e-09, "loss": 0.0811, "step": 15630 }, { "epoch": 19.04, "grad_norm": 14.624069213867188, "learning_rate": 6.3211125158027805e-09, "loss": 0.0083, "step": 15640 }, { "epoch": 19.04, "grad_norm": 5.21597146987915, "learning_rate": 5.969939598258182e-09, "loss": 0.0905, "step": 15650 }, { "epoch": 19.04, "grad_norm": 15.867218971252441, "learning_rate": 5.618766680713583e-09, "loss": 0.1495, "step": 15660 }, { "epoch": 19.04, "grad_norm": 0.09749890863895416, "learning_rate": 5.267593763168984e-09, "loss": 0.0277, "step": 15670 }, { "epoch": 19.04, "grad_norm": 0.4762328267097473, "learning_rate": 4.916420845624385e-09, "loss": 0.0666, "step": 15680 }, { "epoch": 19.04, "grad_norm": 0.9537943601608276, "learning_rate": 4.565247928079786e-09, "loss": 0.1046, "step": 15690 }, { "epoch": 19.04, "grad_norm": 0.13022573292255402, "learning_rate": 4.214075010535188e-09, "loss": 0.0642, "step": 15700 }, { "epoch": 19.04, "grad_norm": 1.101835012435913, "learning_rate": 3.862902092990588e-09, "loss": 0.047, "step": 15710 }, { "epoch": 19.04, "grad_norm": 0.7385942935943604, "learning_rate": 3.5117291754459897e-09, "loss": 0.0388, "step": 15720 }, { "epoch": 19.04, "grad_norm": 7.683347702026367, "learning_rate": 3.1605562579013902e-09, "loss": 0.0459, "step": 15730 }, { "epoch": 19.04, "grad_norm": 2.495002508163452, "learning_rate": 2.8093833403567916e-09, "loss": 0.0267, "step": 15740 }, { "epoch": 19.04, "grad_norm": 2.3338894844055176, "learning_rate": 2.4582104228121925e-09, "loss": 0.0601, "step": 15750 }, { "epoch": 19.05, "grad_norm": 0.10135813802480698, "learning_rate": 2.107037505267594e-09, "loss": 0.0277, "step": 15760 }, { "epoch": 19.05, "grad_norm": 2.0385422706604004, "learning_rate": 1.7558645877229949e-09, "loss": 0.1346, "step": 15770 }, { "epoch": 19.05, "grad_norm": 3.0810725688934326, "learning_rate": 1.4046916701783958e-09, "loss": 0.1058, "step": 15780 }, { "epoch": 19.05, "grad_norm": 0.9042685031890869, "learning_rate": 1.053518752633797e-09, "loss": 0.1046, "step": 15790 }, { "epoch": 19.05, "grad_norm": 3.7825100421905518, "learning_rate": 7.023458350891979e-10, "loss": 0.0477, "step": 15800 }, { "epoch": 19.05, "grad_norm": 4.027615070343018, "learning_rate": 3.5117291754459895e-10, "loss": 0.0412, "step": 15810 }, { "epoch": 19.05, "grad_norm": 2.243905544281006, "learning_rate": 0.0, "loss": 0.0143, "step": 15820 }, { "epoch": 19.05, "eval_accuracy": 0.9093728463128876, "eval_loss": 0.3462044894695282, "eval_precision": 0.9555214723926381, "eval_recall": 0.8587181254307374, "eval_runtime": 693.1753, "eval_samples_per_second": 4.187, "eval_steps_per_second": 0.3, "step": 15820 }, { "epoch": 19.05, "step": 15820, "total_flos": 2.7578898345860216e+20, "train_loss": 0.1489644473780055, "train_runtime": 79490.5014, "train_samples_per_second": 2.786, "train_steps_per_second": 0.199 }, { "epoch": 19.05, "eval_accuracy": 0.9159200551343901, "eval_loss": 0.22648051381111145, "eval_precision": 0.9507094846900672, "eval_recall": 0.8773259820813232, "eval_runtime": 795.689, "eval_samples_per_second": 3.647, "eval_steps_per_second": 0.261, "step": 15820 } ], "logging_steps": 10, "max_steps": 15820, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 2.7578898345860216e+20, "train_batch_size": 14, "trial_name": null, "trial_params": null }