diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,38791 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 64453, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999997030225484e-05, + "loss": 3.0012, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998812090263e-05, + "loss": 2.9619, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997327203357e-05, + "loss": 2.8142, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995248362182e-05, + "loss": 2.7896, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992575567232e-05, + "loss": 2.8117, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989308819143e-05, + "loss": 2.7935, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998544811869e-05, + "loss": 2.7057, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999809934667904e-05, + "loss": 2.5933, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999759448645024e-05, + "loss": 2.6133, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999703023130264e-05, + "loss": 2.6661, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964065813702e-05, + "loss": 2.7384, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999572353680116e-05, + "loss": 2.6686, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949810977577e-05, + "loss": 2.5632, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999417926441636e-05, + "loss": 2.592, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933180369675e-05, + "loss": 2.8662, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999239741561584e-05, + "loss": 2.6443, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999141740058e-05, + "loss": 2.3841, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999037799209286e-05, + "loss": 2.3181, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 4.999892791904014e-05, + "loss": 2.8112, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998812099576664e-05, + "loss": 2.5552, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998690340846375e-05, + "loss": 2.6409, + "step": 210 + }, + { + "epoch": 0.0, + "learning_rate": 4.99985626428782e-05, + "loss": 2.8176, + "step": 220 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998429005702475e-05, + "loss": 2.5206, + "step": 230 + }, + { + "epoch": 0.0, + "learning_rate": 4.999828942935097e-05, + "loss": 2.3381, + "step": 240 + }, + { + "epoch": 0.0, + "learning_rate": 4.999814391385681e-05, + "loss": 2.3726, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 4.999799245925459e-05, + "loss": 2.5409, + "step": 260 + }, + { + "epoch": 0.0, + "learning_rate": 4.999783506558029e-05, + "loss": 2.5615, + "step": 270 + }, + { + "epoch": 0.0, + "learning_rate": 4.99976717328713e-05, + "loss": 2.4968, + "step": 280 + }, + { + "epoch": 0.0, + "learning_rate": 4.999750246116643e-05, + "loss": 2.4802, + "step": 290 + }, + { + "epoch": 0.0, + "learning_rate": 4.99973272505059e-05, + "loss": 2.459, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 4.999714610093132e-05, + "loss": 2.4231, + "step": 310 + }, + { + "epoch": 0.0, + "learning_rate": 4.999695901248575e-05, + "loss": 2.3061, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 4.999676598521362e-05, + "loss": 2.5796, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 4.99965670191608e-05, + "loss": 2.52, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 4.999636211437455e-05, + "loss": 2.4434, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996151270903566e-05, + "loss": 2.8005, + "step": 360 + }, + { + "epoch": 0.01, + "learning_rate": 4.999593448879793e-05, + "loss": 2.59, + "step": 370 + }, + { + "epoch": 0.01, + "learning_rate": 4.999571176810916e-05, + "loss": 2.6115, + "step": 380 + }, + { + "epoch": 0.01, + "learning_rate": 4.999548310889015e-05, + "loss": 2.5592, + "step": 390 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995248511195236e-05, + "loss": 2.5598, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995007975080155e-05, + "loss": 2.459, + "step": 410 + }, + { + "epoch": 0.01, + "learning_rate": 4.999476150060205e-05, + "loss": 2.436, + "step": 420 + }, + { + "epoch": 0.01, + "learning_rate": 4.999450908781949e-05, + "loss": 2.4784, + "step": 430 + }, + { + "epoch": 0.01, + "learning_rate": 4.999425073679243e-05, + "loss": 2.4681, + "step": 440 + }, + { + "epoch": 0.01, + "learning_rate": 4.999398644758225e-05, + "loss": 2.6846, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993716220251754e-05, + "loss": 2.5075, + "step": 460 + }, + { + "epoch": 0.01, + "learning_rate": 4.999344005486513e-05, + "loss": 2.4964, + "step": 470 + }, + { + "epoch": 0.01, + "learning_rate": 4.999315795148799e-05, + "loss": 2.4126, + "step": 480 + }, + { + "epoch": 0.01, + "learning_rate": 4.999286991018736e-05, + "loss": 2.4928, + "step": 490 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992575931031685e-05, + "loss": 2.416, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992276014090786e-05, + "loss": 2.345, + "step": 510 + }, + { + "epoch": 0.01, + "learning_rate": 4.999197015943594e-05, + "loss": 2.2783, + "step": 520 + }, + { + "epoch": 0.01, + "learning_rate": 4.999165836713979e-05, + "loss": 2.2773, + "step": 530 + }, + { + "epoch": 0.01, + "learning_rate": 4.999134063727643e-05, + "loss": 2.4833, + "step": 540 + }, + { + "epoch": 0.01, + "learning_rate": 4.999101696992135e-05, + "loss": 2.3003, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 4.999068736515143e-05, + "loss": 2.5178, + "step": 560 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990351823044995e-05, + "loss": 2.5985, + "step": 570 + }, + { + "epoch": 0.01, + "learning_rate": 4.999001034368175e-05, + "loss": 2.6049, + "step": 580 + }, + { + "epoch": 0.01, + "learning_rate": 4.998966292714283e-05, + "loss": 2.5506, + "step": 590 + }, + { + "epoch": 0.01, + "learning_rate": 4.998930957351078e-05, + "loss": 2.3595, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 4.998895028286955e-05, + "loss": 2.232, + "step": 610 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988585055304485e-05, + "loss": 2.2808, + "step": 620 + }, + { + "epoch": 0.01, + "learning_rate": 4.998821389090238e-05, + "loss": 2.2583, + "step": 630 + }, + { + "epoch": 0.01, + "learning_rate": 4.99878367897514e-05, + "loss": 2.4466, + "step": 640 + }, + { + "epoch": 0.01, + "learning_rate": 4.998745375194114e-05, + "loss": 2.6608, + "step": 650 + }, + { + "epoch": 0.01, + "learning_rate": 4.998706477756261e-05, + "loss": 2.3876, + "step": 660 + }, + { + "epoch": 0.01, + "learning_rate": 4.998666986670821e-05, + "loss": 2.3225, + "step": 670 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986269019471784e-05, + "loss": 2.4232, + "step": 680 + }, + { + "epoch": 0.01, + "learning_rate": 4.998586223594855e-05, + "loss": 2.2644, + "step": 690 + }, + { + "epoch": 0.01, + "learning_rate": 4.998544951623516e-05, + "loss": 2.4051, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 4.998503086042966e-05, + "loss": 2.3918, + "step": 710 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984606268631524e-05, + "loss": 2.3766, + "step": 720 + }, + { + "epoch": 0.01, + "learning_rate": 4.998417574094162e-05, + "loss": 2.3092, + "step": 730 + }, + { + "epoch": 0.01, + "learning_rate": 4.998373927746225e-05, + "loss": 2.4057, + "step": 740 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983296878297085e-05, + "loss": 2.3432, + "step": 750 + }, + { + "epoch": 0.01, + "learning_rate": 4.998284854355125e-05, + "loss": 2.4986, + "step": 760 + }, + { + "epoch": 0.01, + "learning_rate": 4.998239427333126e-05, + "loss": 2.5343, + "step": 770 + }, + { + "epoch": 0.01, + "learning_rate": 4.998193406774502e-05, + "loss": 2.5031, + "step": 780 + }, + { + "epoch": 0.01, + "learning_rate": 4.99814679269019e-05, + "loss": 2.494, + "step": 790 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980995850912623e-05, + "loss": 2.2148, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 4.998051783988935e-05, + "loss": 2.3924, + "step": 810 + }, + { + "epoch": 0.01, + "learning_rate": 4.998003389394565e-05, + "loss": 2.417, + "step": 820 + }, + { + "epoch": 0.01, + "learning_rate": 4.997954401319651e-05, + "loss": 2.4294, + "step": 830 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979048197758295e-05, + "loss": 2.5631, + "step": 840 + }, + { + "epoch": 0.01, + "learning_rate": 4.997854644774882e-05, + "loss": 2.3411, + "step": 850 + }, + { + "epoch": 0.01, + "learning_rate": 4.997803876328728e-05, + "loss": 2.3587, + "step": 860 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977525144494296e-05, + "loss": 2.3098, + "step": 870 + }, + { + "epoch": 0.01, + "learning_rate": 4.99770055914919e-05, + "loss": 2.2527, + "step": 880 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976480104403536e-05, + "loss": 2.5115, + "step": 890 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975948683354023e-05, + "loss": 2.3659, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 4.997541132846963e-05, + "loss": 2.2096, + "step": 910 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974868039878034e-05, + "loss": 2.3797, + "step": 920 + }, + { + "epoch": 0.01, + "learning_rate": 4.99743188177083e-05, + "loss": 2.3605, + "step": 930 + }, + { + "epoch": 0.01, + "learning_rate": 4.997376366209091e-05, + "loss": 2.2011, + "step": 940 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973202573157776e-05, + "loss": 2.2481, + "step": 950 + }, + { + "epoch": 0.01, + "learning_rate": 4.997263555104218e-05, + "loss": 2.2433, + "step": 960 + }, + { + "epoch": 0.02, + "learning_rate": 4.997206259587884e-05, + "loss": 2.3716, + "step": 970 + }, + { + "epoch": 0.02, + "learning_rate": 4.99714837078039e-05, + "loss": 2.4106, + "step": 980 + }, + { + "epoch": 0.02, + "learning_rate": 4.997089888695488e-05, + "loss": 2.2783, + "step": 990 + }, + { + "epoch": 0.02, + "learning_rate": 4.997030813347072e-05, + "loss": 2.2129, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969711447491776e-05, + "loss": 2.2076, + "step": 1010 + }, + { + "epoch": 0.02, + "learning_rate": 4.9969108829159804e-05, + "loss": 2.134, + "step": 1020 + }, + { + "epoch": 0.02, + "learning_rate": 4.9968500278617984e-05, + "loss": 2.1275, + "step": 1030 + }, + { + "epoch": 0.02, + "learning_rate": 4.996788579601089e-05, + "loss": 2.1536, + "step": 1040 + }, + { + "epoch": 0.02, + "learning_rate": 4.996726538148452e-05, + "loss": 2.1009, + "step": 1050 + }, + { + "epoch": 0.02, + "learning_rate": 4.996663903518627e-05, + "loss": 2.3487, + "step": 1060 + }, + { + "epoch": 0.02, + "learning_rate": 4.9966006757264936e-05, + "loss": 2.28, + "step": 1070 + }, + { + "epoch": 0.02, + "learning_rate": 4.996536854787076e-05, + "loss": 2.299, + "step": 1080 + }, + { + "epoch": 0.02, + "learning_rate": 4.996472440715535e-05, + "loss": 2.379, + "step": 1090 + }, + { + "epoch": 0.02, + "learning_rate": 4.996407433527174e-05, + "loss": 2.3946, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 4.996341833237439e-05, + "loss": 2.3273, + "step": 1110 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962756398619155e-05, + "loss": 2.2376, + "step": 1120 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962088534163284e-05, + "loss": 2.3319, + "step": 1130 + }, + { + "epoch": 0.02, + "learning_rate": 4.996141473916546e-05, + "loss": 2.3277, + "step": 1140 + }, + { + "epoch": 0.02, + "learning_rate": 4.996073501378575e-05, + "loss": 2.2619, + "step": 1150 + }, + { + "epoch": 0.02, + "learning_rate": 4.9960049358185666e-05, + "loss": 2.4775, + "step": 1160 + }, + { + "epoch": 0.02, + "learning_rate": 4.9959357772528096e-05, + "loss": 2.4691, + "step": 1170 + }, + { + "epoch": 0.02, + "learning_rate": 4.995866025697735e-05, + "loss": 2.4217, + "step": 1180 + }, + { + "epoch": 0.02, + "learning_rate": 4.995795681169915e-05, + "loss": 2.2668, + "step": 1190 + }, + { + "epoch": 0.02, + "learning_rate": 4.99572474368606e-05, + "loss": 2.6242, + "step": 1200 + }, + { + "epoch": 0.02, + "learning_rate": 4.995653213263027e-05, + "loss": 2.3795, + "step": 1210 + }, + { + "epoch": 0.02, + "learning_rate": 4.995581089917807e-05, + "loss": 2.3136, + "step": 1220 + }, + { + "epoch": 0.02, + "learning_rate": 4.995508373667538e-05, + "loss": 2.1655, + "step": 1230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9954350645294935e-05, + "loss": 2.4984, + "step": 1240 + }, + { + "epoch": 0.02, + "learning_rate": 4.995361162521092e-05, + "loss": 2.0179, + "step": 1250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9952866676598915e-05, + "loss": 2.3977, + "step": 1260 + }, + { + "epoch": 0.02, + "learning_rate": 4.99521157996359e-05, + "loss": 2.3967, + "step": 1270 + }, + { + "epoch": 0.02, + "learning_rate": 4.995135899450026e-05, + "loss": 2.2054, + "step": 1280 + }, + { + "epoch": 0.02, + "learning_rate": 4.995059626137182e-05, + "loss": 2.0958, + "step": 1290 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949827600431774e-05, + "loss": 2.3841, + "step": 1300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9949053011862755e-05, + "loss": 2.3579, + "step": 1310 + }, + { + "epoch": 0.02, + "learning_rate": 4.994827249584878e-05, + "loss": 2.1933, + "step": 1320 + }, + { + "epoch": 0.02, + "learning_rate": 4.994748605257529e-05, + "loss": 2.1585, + "step": 1330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9946693682229136e-05, + "loss": 2.3349, + "step": 1340 + }, + { + "epoch": 0.02, + "learning_rate": 4.994589538499856e-05, + "loss": 2.3072, + "step": 1350 + }, + { + "epoch": 0.02, + "learning_rate": 4.994509116107323e-05, + "loss": 2.4915, + "step": 1360 + }, + { + "epoch": 0.02, + "learning_rate": 4.994428101064421e-05, + "loss": 2.3594, + "step": 1370 + }, + { + "epoch": 0.02, + "learning_rate": 4.9943464933903984e-05, + "loss": 2.2165, + "step": 1380 + }, + { + "epoch": 0.02, + "learning_rate": 4.9942642931046425e-05, + "loss": 2.1827, + "step": 1390 + }, + { + "epoch": 0.02, + "learning_rate": 4.994181500226685e-05, + "loss": 2.6124, + "step": 1400 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940981147761926e-05, + "loss": 2.5578, + "step": 1410 + }, + { + "epoch": 0.02, + "learning_rate": 4.9940141367729785e-05, + "loss": 2.5208, + "step": 1420 + }, + { + "epoch": 0.02, + "learning_rate": 4.9939295662369944e-05, + "loss": 2.528, + "step": 1430 + }, + { + "epoch": 0.02, + "learning_rate": 4.9938444031883326e-05, + "loss": 2.4929, + "step": 1440 + }, + { + "epoch": 0.02, + "learning_rate": 4.993758647647225e-05, + "loss": 2.4645, + "step": 1450 + }, + { + "epoch": 0.02, + "learning_rate": 4.993672299634047e-05, + "loss": 2.4204, + "step": 1460 + }, + { + "epoch": 0.02, + "learning_rate": 4.9935853591693126e-05, + "loss": 2.4796, + "step": 1470 + }, + { + "epoch": 0.02, + "learning_rate": 4.9934978262736774e-05, + "loss": 2.2953, + "step": 1480 + }, + { + "epoch": 0.02, + "learning_rate": 4.993409700967938e-05, + "loss": 2.2583, + "step": 1490 + }, + { + "epoch": 0.02, + "learning_rate": 4.993320983273031e-05, + "loss": 2.3071, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9932316732100334e-05, + "loss": 2.2646, + "step": 1510 + }, + { + "epoch": 0.02, + "learning_rate": 4.993141770800166e-05, + "loss": 2.39, + "step": 1520 + }, + { + "epoch": 0.02, + "learning_rate": 4.993051276064785e-05, + "loss": 2.3467, + "step": 1530 + }, + { + "epoch": 0.02, + "learning_rate": 4.992960189025393e-05, + "loss": 2.0758, + "step": 1540 + }, + { + "epoch": 0.02, + "learning_rate": 4.9928685097036285e-05, + "loss": 2.3439, + "step": 1550 + }, + { + "epoch": 0.02, + "learning_rate": 4.992776238121275e-05, + "loss": 2.2491, + "step": 1560 + }, + { + "epoch": 0.02, + "learning_rate": 4.9926833743002524e-05, + "loss": 2.5075, + "step": 1570 + }, + { + "epoch": 0.02, + "learning_rate": 4.9925899182626246e-05, + "loss": 2.2464, + "step": 1580 + }, + { + "epoch": 0.02, + "learning_rate": 4.992495870030595e-05, + "loss": 2.2552, + "step": 1590 + }, + { + "epoch": 0.02, + "learning_rate": 4.992401229626508e-05, + "loss": 2.2311, + "step": 1600 + }, + { + "epoch": 0.02, + "learning_rate": 4.992305997072847e-05, + "loss": 2.0316, + "step": 1610 + }, + { + "epoch": 0.03, + "learning_rate": 4.99221017239224e-05, + "loss": 2.3166, + "step": 1620 + }, + { + "epoch": 0.03, + "learning_rate": 4.992113755607451e-05, + "loss": 2.2292, + "step": 1630 + }, + { + "epoch": 0.03, + "learning_rate": 4.992016746741388e-05, + "loss": 2.2514, + "step": 1640 + }, + { + "epoch": 0.03, + "learning_rate": 4.991919145817098e-05, + "loss": 2.19, + "step": 1650 + }, + { + "epoch": 0.03, + "learning_rate": 4.9918209528577694e-05, + "loss": 2.3848, + "step": 1660 + }, + { + "epoch": 0.03, + "learning_rate": 4.991722167886732e-05, + "loss": 2.1526, + "step": 1670 + }, + { + "epoch": 0.03, + "learning_rate": 4.991622790927454e-05, + "loss": 2.3185, + "step": 1680 + }, + { + "epoch": 0.03, + "learning_rate": 4.9915228220035466e-05, + "loss": 2.2688, + "step": 1690 + }, + { + "epoch": 0.03, + "learning_rate": 4.991422261138759e-05, + "loss": 2.5844, + "step": 1700 + }, + { + "epoch": 0.03, + "learning_rate": 4.9913211083569855e-05, + "loss": 2.2667, + "step": 1710 + }, + { + "epoch": 0.03, + "learning_rate": 4.991219363682255e-05, + "loss": 2.3673, + "step": 1720 + }, + { + "epoch": 0.03, + "learning_rate": 4.991117027138742e-05, + "loss": 2.1373, + "step": 1730 + }, + { + "epoch": 0.03, + "learning_rate": 4.99101409875076e-05, + "loss": 2.1753, + "step": 1740 + }, + { + "epoch": 0.03, + "learning_rate": 4.990910578542762e-05, + "loss": 2.0088, + "step": 1750 + }, + { + "epoch": 0.03, + "learning_rate": 4.9908064665393426e-05, + "loss": 2.2182, + "step": 1760 + }, + { + "epoch": 0.03, + "learning_rate": 4.990701762765238e-05, + "loss": 2.0533, + "step": 1770 + }, + { + "epoch": 0.03, + "learning_rate": 4.990596467245322e-05, + "loss": 2.3013, + "step": 1780 + }, + { + "epoch": 0.03, + "learning_rate": 4.990490580004613e-05, + "loss": 2.3737, + "step": 1790 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903841010682665e-05, + "loss": 2.3559, + "step": 1800 + }, + { + "epoch": 0.03, + "learning_rate": 4.9902770304615807e-05, + "loss": 2.1684, + "step": 1810 + }, + { + "epoch": 0.03, + "learning_rate": 4.9901693682099936e-05, + "loss": 2.287, + "step": 1820 + }, + { + "epoch": 0.03, + "learning_rate": 4.990061114339084e-05, + "loss": 2.2476, + "step": 1830 + }, + { + "epoch": 0.03, + "learning_rate": 4.989952268874569e-05, + "loss": 2.3043, + "step": 1840 + }, + { + "epoch": 0.03, + "learning_rate": 4.989842831842312e-05, + "loss": 2.3861, + "step": 1850 + }, + { + "epoch": 0.03, + "learning_rate": 4.98973280326831e-05, + "loss": 2.4816, + "step": 1860 + }, + { + "epoch": 0.03, + "learning_rate": 4.989622183178706e-05, + "loss": 2.2807, + "step": 1870 + }, + { + "epoch": 0.03, + "learning_rate": 4.98951097159978e-05, + "loss": 2.2469, + "step": 1880 + }, + { + "epoch": 0.03, + "learning_rate": 4.989399168557954e-05, + "loss": 2.1102, + "step": 1890 + }, + { + "epoch": 0.03, + "learning_rate": 4.9892867740797913e-05, + "loss": 2.0544, + "step": 1900 + }, + { + "epoch": 0.03, + "learning_rate": 4.9891737881919934e-05, + "loss": 2.0653, + "step": 1910 + }, + { + "epoch": 0.03, + "learning_rate": 4.989060210921405e-05, + "loss": 2.0007, + "step": 1920 + }, + { + "epoch": 0.03, + "learning_rate": 4.988946042295009e-05, + "loss": 2.2082, + "step": 1930 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888312823399306e-05, + "loss": 2.1967, + "step": 1940 + }, + { + "epoch": 0.03, + "learning_rate": 4.988715931083434e-05, + "loss": 2.3142, + "step": 1950 + }, + { + "epoch": 0.03, + "learning_rate": 4.988599988552925e-05, + "loss": 2.286, + "step": 1960 + }, + { + "epoch": 0.03, + "learning_rate": 4.9884834547759504e-05, + "loss": 2.2712, + "step": 1970 + }, + { + "epoch": 0.03, + "learning_rate": 4.9883663297801955e-05, + "loss": 2.1449, + "step": 1980 + }, + { + "epoch": 0.03, + "learning_rate": 4.988248613593486e-05, + "loss": 2.054, + "step": 1990 + }, + { + "epoch": 0.03, + "learning_rate": 4.988130306243791e-05, + "loss": 1.896, + "step": 2000 + }, + { + "epoch": 0.03, + "learning_rate": 4.988011407759217e-05, + "loss": 2.2043, + "step": 2010 + }, + { + "epoch": 0.03, + "learning_rate": 4.987891918168013e-05, + "loss": 2.0708, + "step": 2020 + }, + { + "epoch": 0.03, + "learning_rate": 4.9877718374985675e-05, + "loss": 2.1179, + "step": 2030 + }, + { + "epoch": 0.03, + "learning_rate": 4.987651165779408e-05, + "loss": 1.9875, + "step": 2040 + }, + { + "epoch": 0.03, + "learning_rate": 4.987529903039206e-05, + "loss": 2.2379, + "step": 2050 + }, + { + "epoch": 0.03, + "learning_rate": 4.987408049306771e-05, + "loss": 2.1113, + "step": 2060 + }, + { + "epoch": 0.03, + "learning_rate": 4.987285604611051e-05, + "loss": 2.0418, + "step": 2070 + }, + { + "epoch": 0.03, + "learning_rate": 4.9871625689811395e-05, + "loss": 2.1074, + "step": 2080 + }, + { + "epoch": 0.03, + "learning_rate": 4.987038942446266e-05, + "loss": 2.0016, + "step": 2090 + }, + { + "epoch": 0.03, + "learning_rate": 4.9869147250358025e-05, + "loss": 2.2086, + "step": 2100 + }, + { + "epoch": 0.03, + "learning_rate": 4.986789916779261e-05, + "loss": 2.3601, + "step": 2110 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866645177062925e-05, + "loss": 2.2846, + "step": 2120 + }, + { + "epoch": 0.03, + "learning_rate": 4.986538527846691e-05, + "loss": 2.2031, + "step": 2130 + }, + { + "epoch": 0.03, + "learning_rate": 4.9864119472303885e-05, + "loss": 2.2138, + "step": 2140 + }, + { + "epoch": 0.03, + "learning_rate": 4.9862847758874586e-05, + "loss": 2.3275, + "step": 2150 + }, + { + "epoch": 0.03, + "learning_rate": 4.9861570138481154e-05, + "loss": 2.2675, + "step": 2160 + }, + { + "epoch": 0.03, + "learning_rate": 4.986028661142712e-05, + "loss": 1.941, + "step": 2170 + }, + { + "epoch": 0.03, + "learning_rate": 4.985899717801743e-05, + "loss": 2.0147, + "step": 2180 + }, + { + "epoch": 0.03, + "learning_rate": 4.985770183855842e-05, + "loss": 2.0947, + "step": 2190 + }, + { + "epoch": 0.03, + "learning_rate": 4.985640059335787e-05, + "loss": 2.2822, + "step": 2200 + }, + { + "epoch": 0.03, + "learning_rate": 4.9855093442724895e-05, + "loss": 2.064, + "step": 2210 + }, + { + "epoch": 0.03, + "learning_rate": 4.985378038697007e-05, + "loss": 2.1018, + "step": 2220 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852461426405355e-05, + "loss": 2.0853, + "step": 2230 + }, + { + "epoch": 0.03, + "learning_rate": 4.985113656134411e-05, + "loss": 2.3735, + "step": 2240 + }, + { + "epoch": 0.03, + "learning_rate": 4.984980579210109e-05, + "loss": 2.195, + "step": 2250 + }, + { + "epoch": 0.04, + "learning_rate": 4.9848469118992467e-05, + "loss": 2.1305, + "step": 2260 + }, + { + "epoch": 0.04, + "learning_rate": 4.984712654233582e-05, + "loss": 2.3085, + "step": 2270 + }, + { + "epoch": 0.04, + "learning_rate": 4.984577806245011e-05, + "loss": 2.2605, + "step": 2280 + }, + { + "epoch": 0.04, + "learning_rate": 4.984442367965571e-05, + "loss": 2.2594, + "step": 2290 + }, + { + "epoch": 0.04, + "learning_rate": 4.9843063394274405e-05, + "loss": 2.226, + "step": 2300 + }, + { + "epoch": 0.04, + "learning_rate": 4.984169720662937e-05, + "loss": 2.1836, + "step": 2310 + }, + { + "epoch": 0.04, + "learning_rate": 4.984032511704518e-05, + "loss": 2.4017, + "step": 2320 + }, + { + "epoch": 0.04, + "learning_rate": 4.983894712584782e-05, + "loss": 2.3924, + "step": 2330 + }, + { + "epoch": 0.04, + "learning_rate": 4.983756323336469e-05, + "loss": 2.2412, + "step": 2340 + }, + { + "epoch": 0.04, + "learning_rate": 4.9836173439924574e-05, + "loss": 2.024, + "step": 2350 + }, + { + "epoch": 0.04, + "learning_rate": 4.983477774585765e-05, + "loss": 2.2287, + "step": 2360 + }, + { + "epoch": 0.04, + "learning_rate": 4.983337615149552e-05, + "loss": 2.1847, + "step": 2370 + }, + { + "epoch": 0.04, + "learning_rate": 4.9831968657171175e-05, + "loss": 2.4511, + "step": 2380 + }, + { + "epoch": 0.04, + "learning_rate": 4.9830555263219006e-05, + "loss": 2.1062, + "step": 2390 + }, + { + "epoch": 0.04, + "learning_rate": 4.9829135969974815e-05, + "loss": 1.9827, + "step": 2400 + }, + { + "epoch": 0.04, + "learning_rate": 4.982771077777579e-05, + "loss": 1.8538, + "step": 2410 + }, + { + "epoch": 0.04, + "learning_rate": 4.9826279686960556e-05, + "loss": 1.9839, + "step": 2420 + }, + { + "epoch": 0.04, + "learning_rate": 4.982484269786909e-05, + "loss": 2.116, + "step": 2430 + }, + { + "epoch": 0.04, + "learning_rate": 4.98233998108428e-05, + "loss": 2.2749, + "step": 2440 + }, + { + "epoch": 0.04, + "learning_rate": 4.9821951026224504e-05, + "loss": 2.3435, + "step": 2450 + }, + { + "epoch": 0.04, + "learning_rate": 4.982049634435838e-05, + "loss": 2.2564, + "step": 2460 + }, + { + "epoch": 0.04, + "learning_rate": 4.981903576559006e-05, + "loss": 2.0415, + "step": 2470 + }, + { + "epoch": 0.04, + "learning_rate": 4.981756929026653e-05, + "loss": 2.1574, + "step": 2480 + }, + { + "epoch": 0.04, + "learning_rate": 4.981609691873622e-05, + "loss": 2.3005, + "step": 2490 + }, + { + "epoch": 0.04, + "learning_rate": 4.9814618651348934e-05, + "loss": 2.2791, + "step": 2500 + }, + { + "epoch": 0.04, + "learning_rate": 4.981313448845586e-05, + "loss": 2.2492, + "step": 2510 + }, + { + "epoch": 0.04, + "learning_rate": 4.981164443040963e-05, + "loss": 2.2691, + "step": 2520 + }, + { + "epoch": 0.04, + "learning_rate": 4.9810148477564254e-05, + "loss": 2.2665, + "step": 2530 + }, + { + "epoch": 0.04, + "learning_rate": 4.980864663027514e-05, + "loss": 2.3859, + "step": 2540 + }, + { + "epoch": 0.04, + "learning_rate": 4.980713888889909e-05, + "loss": 2.2164, + "step": 2550 + }, + { + "epoch": 0.04, + "learning_rate": 4.980562525379433e-05, + "loss": 2.1845, + "step": 2560 + }, + { + "epoch": 0.04, + "learning_rate": 4.980410572532046e-05, + "loss": 2.0928, + "step": 2570 + }, + { + "epoch": 0.04, + "learning_rate": 4.9802580303838506e-05, + "loss": 2.1488, + "step": 2580 + }, + { + "epoch": 0.04, + "learning_rate": 4.9801048989710874e-05, + "loss": 2.1667, + "step": 2590 + }, + { + "epoch": 0.04, + "learning_rate": 4.9799511783301375e-05, + "loss": 2.199, + "step": 2600 + }, + { + "epoch": 0.04, + "learning_rate": 4.979796868497523e-05, + "loss": 2.181, + "step": 2610 + }, + { + "epoch": 0.04, + "learning_rate": 4.979641969509903e-05, + "loss": 2.2468, + "step": 2620 + }, + { + "epoch": 0.04, + "learning_rate": 4.979486481404081e-05, + "loss": 2.1072, + "step": 2630 + }, + { + "epoch": 0.04, + "learning_rate": 4.9793304042169976e-05, + "loss": 2.0543, + "step": 2640 + }, + { + "epoch": 0.04, + "learning_rate": 4.979173737985733e-05, + "loss": 2.2469, + "step": 2650 + }, + { + "epoch": 0.04, + "learning_rate": 4.97901648274751e-05, + "loss": 2.1893, + "step": 2660 + }, + { + "epoch": 0.04, + "learning_rate": 4.978858638539687e-05, + "loss": 2.1738, + "step": 2670 + }, + { + "epoch": 0.04, + "learning_rate": 4.9787002053997674e-05, + "loss": 2.4275, + "step": 2680 + }, + { + "epoch": 0.04, + "learning_rate": 4.97854118336539e-05, + "loss": 2.3653, + "step": 2690 + }, + { + "epoch": 0.04, + "learning_rate": 4.978381572474338e-05, + "loss": 2.254, + "step": 2700 + }, + { + "epoch": 0.04, + "learning_rate": 4.978221372764531e-05, + "loss": 2.2414, + "step": 2710 + }, + { + "epoch": 0.04, + "learning_rate": 4.978060584274028e-05, + "loss": 2.1424, + "step": 2720 + }, + { + "epoch": 0.04, + "learning_rate": 4.9778992070410315e-05, + "loss": 2.0868, + "step": 2730 + }, + { + "epoch": 0.04, + "learning_rate": 4.9777372411038817e-05, + "loss": 2.1442, + "step": 2740 + }, + { + "epoch": 0.04, + "learning_rate": 4.977574686501057e-05, + "loss": 2.0401, + "step": 2750 + }, + { + "epoch": 0.04, + "learning_rate": 4.9774115432711785e-05, + "loss": 2.1412, + "step": 2760 + }, + { + "epoch": 0.04, + "learning_rate": 4.9772478114530074e-05, + "loss": 2.1729, + "step": 2770 + }, + { + "epoch": 0.04, + "learning_rate": 4.9770834910854414e-05, + "loss": 2.1737, + "step": 2780 + }, + { + "epoch": 0.04, + "learning_rate": 4.976918582207521e-05, + "loss": 1.953, + "step": 2790 + }, + { + "epoch": 0.04, + "learning_rate": 4.9767530848584254e-05, + "loss": 2.2031, + "step": 2800 + }, + { + "epoch": 0.04, + "learning_rate": 4.976586999077474e-05, + "loss": 2.2072, + "step": 2810 + }, + { + "epoch": 0.04, + "learning_rate": 4.976420324904126e-05, + "loss": 2.1298, + "step": 2820 + }, + { + "epoch": 0.04, + "learning_rate": 4.976253062377979e-05, + "loss": 2.1546, + "step": 2830 + }, + { + "epoch": 0.04, + "learning_rate": 4.976085211538773e-05, + "loss": 2.256, + "step": 2840 + }, + { + "epoch": 0.04, + "learning_rate": 4.9759167724263856e-05, + "loss": 2.2208, + "step": 2850 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757477450808346e-05, + "loss": 2.1131, + "step": 2860 + }, + { + "epoch": 0.04, + "learning_rate": 4.975578129542279e-05, + "loss": 2.1572, + "step": 2870 + }, + { + "epoch": 0.04, + "learning_rate": 4.975407925851015e-05, + "loss": 2.0763, + "step": 2880 + }, + { + "epoch": 0.04, + "learning_rate": 4.97523713404748e-05, + "loss": 2.339, + "step": 2890 + }, + { + "epoch": 0.04, + "learning_rate": 4.9750657541722524e-05, + "loss": 2.155, + "step": 2900 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748937862660483e-05, + "loss": 2.0969, + "step": 2910 + }, + { + "epoch": 0.05, + "learning_rate": 4.9747212303697236e-05, + "loss": 2.1013, + "step": 2920 + }, + { + "epoch": 0.05, + "learning_rate": 4.974548086524275e-05, + "loss": 2.0357, + "step": 2930 + }, + { + "epoch": 0.05, + "learning_rate": 4.9743743547708384e-05, + "loss": 2.0447, + "step": 2940 + }, + { + "epoch": 0.05, + "learning_rate": 4.974200035150689e-05, + "loss": 2.1169, + "step": 2950 + }, + { + "epoch": 0.05, + "learning_rate": 4.9740251277052424e-05, + "loss": 2.0658, + "step": 2960 + }, + { + "epoch": 0.05, + "learning_rate": 4.973849632476053e-05, + "loss": 1.9411, + "step": 2970 + }, + { + "epoch": 0.05, + "learning_rate": 4.973673549504816e-05, + "loss": 2.2571, + "step": 2980 + }, + { + "epoch": 0.05, + "learning_rate": 4.973496878833365e-05, + "loss": 2.2832, + "step": 2990 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733196205036735e-05, + "loss": 1.9626, + "step": 3000 + }, + { + "epoch": 0.05, + "learning_rate": 4.973141774557855e-05, + "loss": 2.3252, + "step": 3010 + }, + { + "epoch": 0.05, + "learning_rate": 4.972963341038163e-05, + "loss": 2.7221, + "step": 3020 + }, + { + "epoch": 0.05, + "learning_rate": 4.9727843199869905e-05, + "loss": 2.6555, + "step": 3030 + }, + { + "epoch": 0.05, + "learning_rate": 4.9726047114468676e-05, + "loss": 2.287, + "step": 3040 + }, + { + "epoch": 0.05, + "learning_rate": 4.9724245154604686e-05, + "loss": 2.1365, + "step": 3050 + }, + { + "epoch": 0.05, + "learning_rate": 4.972243732070603e-05, + "loss": 2.0094, + "step": 3060 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720623613202234e-05, + "loss": 1.867, + "step": 3070 + }, + { + "epoch": 0.05, + "learning_rate": 4.971880403252418e-05, + "loss": 2.3603, + "step": 3080 + }, + { + "epoch": 0.05, + "learning_rate": 4.971697857910418e-05, + "loss": 2.1237, + "step": 3090 + }, + { + "epoch": 0.05, + "learning_rate": 4.971514725337593e-05, + "loss": 2.0849, + "step": 3100 + }, + { + "epoch": 0.05, + "learning_rate": 4.971331005577452e-05, + "loss": 1.7417, + "step": 3110 + }, + { + "epoch": 0.05, + "learning_rate": 4.971146698673643e-05, + "loss": 2.2803, + "step": 3120 + }, + { + "epoch": 0.05, + "learning_rate": 4.9709618046699534e-05, + "loss": 2.15, + "step": 3130 + }, + { + "epoch": 0.05, + "learning_rate": 4.9707763236103133e-05, + "loss": 2.2166, + "step": 3140 + }, + { + "epoch": 0.05, + "learning_rate": 4.970590255538787e-05, + "loss": 2.0576, + "step": 3150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9704036004995816e-05, + "loss": 2.1542, + "step": 3160 + }, + { + "epoch": 0.05, + "learning_rate": 4.970216358537043e-05, + "loss": 2.1302, + "step": 3170 + }, + { + "epoch": 0.05, + "learning_rate": 4.970028529695657e-05, + "loss": 1.9554, + "step": 3180 + }, + { + "epoch": 0.05, + "learning_rate": 4.969840114020048e-05, + "loss": 2.1815, + "step": 3190 + }, + { + "epoch": 0.05, + "learning_rate": 4.9696511115549806e-05, + "loss": 2.3238, + "step": 3200 + }, + { + "epoch": 0.05, + "learning_rate": 4.969461522345358e-05, + "loss": 2.1987, + "step": 3210 + }, + { + "epoch": 0.05, + "learning_rate": 4.969271346436223e-05, + "loss": 2.2443, + "step": 3220 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690805838727575e-05, + "loss": 2.642, + "step": 3230 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688892347002845e-05, + "loss": 2.6266, + "step": 3240 + }, + { + "epoch": 0.05, + "learning_rate": 4.9686972989642644e-05, + "loss": 2.5613, + "step": 3250 + }, + { + "epoch": 0.05, + "learning_rate": 4.968504776710298e-05, + "loss": 2.5365, + "step": 3260 + }, + { + "epoch": 0.05, + "learning_rate": 4.968311667984124e-05, + "loss": 2.3755, + "step": 3270 + }, + { + "epoch": 0.05, + "learning_rate": 4.9681179728316235e-05, + "loss": 2.1146, + "step": 3280 + }, + { + "epoch": 0.05, + "learning_rate": 4.967923691298814e-05, + "loss": 2.1764, + "step": 3290 + }, + { + "epoch": 0.05, + "learning_rate": 4.967728823431852e-05, + "loss": 2.2956, + "step": 3300 + }, + { + "epoch": 0.05, + "learning_rate": 4.9675333692770364e-05, + "loss": 2.4662, + "step": 3310 + }, + { + "epoch": 0.05, + "learning_rate": 4.967337328880803e-05, + "loss": 2.2265, + "step": 3320 + }, + { + "epoch": 0.05, + "learning_rate": 4.967140702289727e-05, + "loss": 2.1081, + "step": 3330 + }, + { + "epoch": 0.05, + "learning_rate": 4.966943489550524e-05, + "loss": 2.0846, + "step": 3340 + }, + { + "epoch": 0.05, + "learning_rate": 4.966745690710048e-05, + "loss": 1.9382, + "step": 3350 + }, + { + "epoch": 0.05, + "learning_rate": 4.9665473058152925e-05, + "loss": 2.0287, + "step": 3360 + }, + { + "epoch": 0.05, + "learning_rate": 4.96634833491339e-05, + "loss": 1.9763, + "step": 3370 + }, + { + "epoch": 0.05, + "learning_rate": 4.9661487780516124e-05, + "loss": 2.1072, + "step": 3380 + }, + { + "epoch": 0.05, + "learning_rate": 4.96594863527737e-05, + "loss": 2.0486, + "step": 3390 + }, + { + "epoch": 0.05, + "learning_rate": 4.965747906638215e-05, + "loss": 2.0953, + "step": 3400 + }, + { + "epoch": 0.05, + "learning_rate": 4.965546592181836e-05, + "loss": 2.1719, + "step": 3410 + }, + { + "epoch": 0.05, + "learning_rate": 4.965344691956061e-05, + "loss": 2.0439, + "step": 3420 + }, + { + "epoch": 0.05, + "learning_rate": 4.965142206008858e-05, + "loss": 2.0581, + "step": 3430 + }, + { + "epoch": 0.05, + "learning_rate": 4.964939134388336e-05, + "loss": 2.1861, + "step": 3440 + }, + { + "epoch": 0.05, + "learning_rate": 4.964735477142738e-05, + "loss": 2.099, + "step": 3450 + }, + { + "epoch": 0.05, + "learning_rate": 4.9645312343204506e-05, + "loss": 2.018, + "step": 3460 + }, + { + "epoch": 0.05, + "learning_rate": 4.96432640597e-05, + "loss": 2.3352, + "step": 3470 + }, + { + "epoch": 0.05, + "learning_rate": 4.9641209921400475e-05, + "loss": 2.1058, + "step": 3480 + }, + { + "epoch": 0.05, + "learning_rate": 4.963914992879396e-05, + "loss": 1.9687, + "step": 3490 + }, + { + "epoch": 0.05, + "learning_rate": 4.963708408236988e-05, + "loss": 2.0786, + "step": 3500 + }, + { + "epoch": 0.05, + "learning_rate": 4.963501238261904e-05, + "loss": 2.1054, + "step": 3510 + }, + { + "epoch": 0.05, + "learning_rate": 4.963293483003363e-05, + "loss": 2.1543, + "step": 3520 + }, + { + "epoch": 0.05, + "learning_rate": 4.963085142510725e-05, + "loss": 1.8358, + "step": 3530 + }, + { + "epoch": 0.05, + "learning_rate": 4.962876216833488e-05, + "loss": 1.865, + "step": 3540 + }, + { + "epoch": 0.06, + "learning_rate": 4.962666706021288e-05, + "loss": 2.0973, + "step": 3550 + }, + { + "epoch": 0.06, + "learning_rate": 4.9624566101239024e-05, + "loss": 2.1343, + "step": 3560 + }, + { + "epoch": 0.06, + "learning_rate": 4.9622459291912445e-05, + "loss": 2.1087, + "step": 3570 + }, + { + "epoch": 0.06, + "learning_rate": 4.9620346632733695e-05, + "loss": 2.1601, + "step": 3580 + }, + { + "epoch": 0.06, + "learning_rate": 4.9618228124204705e-05, + "loss": 2.0268, + "step": 3590 + }, + { + "epoch": 0.06, + "learning_rate": 4.961610376682878e-05, + "loss": 1.8539, + "step": 3600 + }, + { + "epoch": 0.06, + "learning_rate": 4.961397356111064e-05, + "loss": 1.8472, + "step": 3610 + }, + { + "epoch": 0.06, + "learning_rate": 4.961183750755639e-05, + "loss": 1.8152, + "step": 3620 + }, + { + "epoch": 0.06, + "learning_rate": 4.96096956066735e-05, + "loss": 1.831, + "step": 3630 + }, + { + "epoch": 0.06, + "learning_rate": 4.960754785897086e-05, + "loss": 2.162, + "step": 3640 + }, + { + "epoch": 0.06, + "learning_rate": 4.960539426495874e-05, + "loss": 2.1966, + "step": 3650 + }, + { + "epoch": 0.06, + "learning_rate": 4.960323482514878e-05, + "loss": 1.8904, + "step": 3660 + }, + { + "epoch": 0.06, + "learning_rate": 4.9601069540054035e-05, + "loss": 1.9847, + "step": 3670 + }, + { + "epoch": 0.06, + "learning_rate": 4.959889841018893e-05, + "loss": 1.8438, + "step": 3680 + }, + { + "epoch": 0.06, + "learning_rate": 4.9596721436069295e-05, + "loss": 2.0892, + "step": 3690 + }, + { + "epoch": 0.06, + "learning_rate": 4.9594538618212334e-05, + "loss": 2.0328, + "step": 3700 + }, + { + "epoch": 0.06, + "learning_rate": 4.959234995713665e-05, + "loss": 2.0356, + "step": 3710 + }, + { + "epoch": 0.06, + "learning_rate": 4.959015545336222e-05, + "loss": 2.1961, + "step": 3720 + }, + { + "epoch": 0.06, + "learning_rate": 4.958795510741043e-05, + "loss": 2.314, + "step": 3730 + }, + { + "epoch": 0.06, + "learning_rate": 4.9585748919804035e-05, + "loss": 2.2465, + "step": 3740 + }, + { + "epoch": 0.06, + "learning_rate": 4.9583536891067184e-05, + "loss": 2.2194, + "step": 3750 + }, + { + "epoch": 0.06, + "learning_rate": 4.9581319021725424e-05, + "loss": 2.1749, + "step": 3760 + }, + { + "epoch": 0.06, + "learning_rate": 4.9579095312305677e-05, + "loss": 2.1942, + "step": 3770 + }, + { + "epoch": 0.06, + "learning_rate": 4.957686576333625e-05, + "loss": 2.0216, + "step": 3780 + }, + { + "epoch": 0.06, + "learning_rate": 4.9574630375346855e-05, + "loss": 1.7777, + "step": 3790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9572389148868564e-05, + "loss": 1.9338, + "step": 3800 + }, + { + "epoch": 0.06, + "learning_rate": 4.957014208443387e-05, + "loss": 2.1368, + "step": 3810 + }, + { + "epoch": 0.06, + "learning_rate": 4.956788918257662e-05, + "loss": 1.9417, + "step": 3820 + }, + { + "epoch": 0.06, + "learning_rate": 4.9565630443832065e-05, + "loss": 1.8576, + "step": 3830 + }, + { + "epoch": 0.06, + "learning_rate": 4.956336586873686e-05, + "loss": 1.8562, + "step": 3840 + }, + { + "epoch": 0.06, + "learning_rate": 4.9561095457828994e-05, + "loss": 2.0311, + "step": 3850 + }, + { + "epoch": 0.06, + "learning_rate": 4.95588192116479e-05, + "loss": 2.2187, + "step": 3860 + }, + { + "epoch": 0.06, + "learning_rate": 4.955653713073437e-05, + "loss": 2.555, + "step": 3870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9554249215630575e-05, + "loss": 2.5267, + "step": 3880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9551955466880094e-05, + "loss": 2.4391, + "step": 3890 + }, + { + "epoch": 0.06, + "learning_rate": 4.954965588502788e-05, + "loss": 2.4525, + "step": 3900 + }, + { + "epoch": 0.06, + "learning_rate": 4.9547350470620256e-05, + "loss": 2.3782, + "step": 3910 + }, + { + "epoch": 0.06, + "learning_rate": 4.954503922420497e-05, + "loss": 2.2834, + "step": 3920 + }, + { + "epoch": 0.06, + "learning_rate": 4.954272214633111e-05, + "loss": 2.0641, + "step": 3930 + }, + { + "epoch": 0.06, + "learning_rate": 4.954039923754919e-05, + "loss": 1.9868, + "step": 3940 + }, + { + "epoch": 0.06, + "learning_rate": 4.953807049841108e-05, + "loss": 2.0768, + "step": 3950 + }, + { + "epoch": 0.06, + "learning_rate": 4.953573592947006e-05, + "loss": 2.2646, + "step": 3960 + }, + { + "epoch": 0.06, + "learning_rate": 4.9533395531280764e-05, + "loss": 2.0901, + "step": 3970 + }, + { + "epoch": 0.06, + "learning_rate": 4.953104930439924e-05, + "loss": 2.161, + "step": 3980 + }, + { + "epoch": 0.06, + "learning_rate": 4.9528697249382896e-05, + "loss": 2.6214, + "step": 3990 + }, + { + "epoch": 0.06, + "learning_rate": 4.9526339366790556e-05, + "loss": 2.5649, + "step": 4000 + }, + { + "epoch": 0.06, + "learning_rate": 4.95239756571824e-05, + "loss": 2.4146, + "step": 4010 + }, + { + "epoch": 0.06, + "learning_rate": 4.952160612112e-05, + "loss": 2.002, + "step": 4020 + }, + { + "epoch": 0.06, + "learning_rate": 4.9519230759166324e-05, + "loss": 2.2773, + "step": 4030 + }, + { + "epoch": 0.06, + "learning_rate": 4.951684957188571e-05, + "loss": 2.2472, + "step": 4040 + }, + { + "epoch": 0.06, + "learning_rate": 4.951446255984389e-05, + "loss": 1.9906, + "step": 4050 + }, + { + "epoch": 0.06, + "learning_rate": 4.9512069723607967e-05, + "loss": 1.8697, + "step": 4060 + }, + { + "epoch": 0.06, + "learning_rate": 4.950967106374644e-05, + "loss": 2.014, + "step": 4070 + }, + { + "epoch": 0.06, + "learning_rate": 4.950726658082918e-05, + "loss": 2.2205, + "step": 4080 + }, + { + "epoch": 0.06, + "learning_rate": 4.950485627542747e-05, + "loss": 2.291, + "step": 4090 + }, + { + "epoch": 0.06, + "learning_rate": 4.950244014811393e-05, + "loss": 2.061, + "step": 4100 + }, + { + "epoch": 0.06, + "learning_rate": 4.95000181994626e-05, + "loss": 2.0847, + "step": 4110 + }, + { + "epoch": 0.06, + "learning_rate": 4.949759043004889e-05, + "loss": 1.981, + "step": 4120 + }, + { + "epoch": 0.06, + "learning_rate": 4.94951568404496e-05, + "loss": 1.9461, + "step": 4130 + }, + { + "epoch": 0.06, + "learning_rate": 4.94927174312429e-05, + "loss": 1.9566, + "step": 4140 + }, + { + "epoch": 0.06, + "learning_rate": 4.9490272203008345e-05, + "loss": 2.0425, + "step": 4150 + }, + { + "epoch": 0.06, + "learning_rate": 4.9487821156326884e-05, + "loss": 2.048, + "step": 4160 + }, + { + "epoch": 0.06, + "learning_rate": 4.948536429178084e-05, + "loss": 1.8071, + "step": 4170 + }, + { + "epoch": 0.06, + "learning_rate": 4.9482901609953924e-05, + "loss": 1.9987, + "step": 4180 + }, + { + "epoch": 0.07, + "learning_rate": 4.948043311143122e-05, + "loss": 1.9567, + "step": 4190 + }, + { + "epoch": 0.07, + "learning_rate": 4.9477958796799206e-05, + "loss": 1.875, + "step": 4200 + }, + { + "epoch": 0.07, + "learning_rate": 4.9475478666645725e-05, + "loss": 1.9359, + "step": 4210 + }, + { + "epoch": 0.07, + "learning_rate": 4.947299272156001e-05, + "loss": 2.1412, + "step": 4220 + }, + { + "epoch": 0.07, + "learning_rate": 4.9470500962132683e-05, + "loss": 2.1799, + "step": 4230 + }, + { + "epoch": 0.07, + "learning_rate": 4.9468003388955743e-05, + "loss": 1.9198, + "step": 4240 + }, + { + "epoch": 0.07, + "learning_rate": 4.946550000262256e-05, + "loss": 1.967, + "step": 4250 + }, + { + "epoch": 0.07, + "learning_rate": 4.94629908037279e-05, + "loss": 1.9976, + "step": 4260 + }, + { + "epoch": 0.07, + "learning_rate": 4.9460475792867904e-05, + "loss": 2.13, + "step": 4270 + }, + { + "epoch": 0.07, + "learning_rate": 4.9457954970640086e-05, + "loss": 1.8709, + "step": 4280 + }, + { + "epoch": 0.07, + "learning_rate": 4.945542833764336e-05, + "loss": 2.0669, + "step": 4290 + }, + { + "epoch": 0.07, + "learning_rate": 4.9452895894477995e-05, + "loss": 2.0228, + "step": 4300 + }, + { + "epoch": 0.07, + "learning_rate": 4.945035764174566e-05, + "loss": 2.277, + "step": 4310 + }, + { + "epoch": 0.07, + "learning_rate": 4.94478135800494e-05, + "loss": 1.9109, + "step": 4320 + }, + { + "epoch": 0.07, + "learning_rate": 4.944526370999364e-05, + "loss": 1.9506, + "step": 4330 + }, + { + "epoch": 0.07, + "learning_rate": 4.944270803218417e-05, + "loss": 1.9117, + "step": 4340 + }, + { + "epoch": 0.07, + "learning_rate": 4.944014654722819e-05, + "loss": 1.9772, + "step": 4350 + }, + { + "epoch": 0.07, + "learning_rate": 4.943757925573425e-05, + "loss": 1.9477, + "step": 4360 + }, + { + "epoch": 0.07, + "learning_rate": 4.94350061583123e-05, + "loss": 1.913, + "step": 4370 + }, + { + "epoch": 0.07, + "learning_rate": 4.943242725557366e-05, + "loss": 2.0901, + "step": 4380 + }, + { + "epoch": 0.07, + "learning_rate": 4.942984254813102e-05, + "loss": 2.021, + "step": 4390 + }, + { + "epoch": 0.07, + "learning_rate": 4.942725203659848e-05, + "loss": 1.7747, + "step": 4400 + }, + { + "epoch": 0.07, + "learning_rate": 4.9424655721591486e-05, + "loss": 2.07, + "step": 4410 + }, + { + "epoch": 0.07, + "learning_rate": 4.942205360372687e-05, + "loss": 2.0515, + "step": 4420 + }, + { + "epoch": 0.07, + "learning_rate": 4.941944568362287e-05, + "loss": 1.9969, + "step": 4430 + }, + { + "epoch": 0.07, + "learning_rate": 4.9416831961899054e-05, + "loss": 1.9436, + "step": 4440 + }, + { + "epoch": 0.07, + "learning_rate": 4.941421243917641e-05, + "loss": 1.8913, + "step": 4450 + }, + { + "epoch": 0.07, + "learning_rate": 4.9411587116077284e-05, + "loss": 1.9566, + "step": 4460 + }, + { + "epoch": 0.07, + "learning_rate": 4.940895599322542e-05, + "loss": 2.058, + "step": 4470 + }, + { + "epoch": 0.07, + "learning_rate": 4.9406319071245907e-05, + "loss": 2.1318, + "step": 4480 + }, + { + "epoch": 0.07, + "learning_rate": 4.940367635076524e-05, + "loss": 1.9649, + "step": 4490 + }, + { + "epoch": 0.07, + "learning_rate": 4.940102783241127e-05, + "loss": 2.0137, + "step": 4500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9398373516813254e-05, + "loss": 1.9616, + "step": 4510 + }, + { + "epoch": 0.07, + "learning_rate": 4.9395713404601795e-05, + "loss": 2.5022, + "step": 4520 + }, + { + "epoch": 0.07, + "learning_rate": 4.93930474964089e-05, + "loss": 2.3886, + "step": 4530 + }, + { + "epoch": 0.07, + "learning_rate": 4.9390375792867934e-05, + "loss": 2.354, + "step": 4540 + }, + { + "epoch": 0.07, + "learning_rate": 4.938769829461364e-05, + "loss": 2.1582, + "step": 4550 + }, + { + "epoch": 0.07, + "learning_rate": 4.9385015002282154e-05, + "loss": 1.9769, + "step": 4560 + }, + { + "epoch": 0.07, + "learning_rate": 4.938232591651097e-05, + "loss": 2.4004, + "step": 4570 + }, + { + "epoch": 0.07, + "learning_rate": 4.937963103793898e-05, + "loss": 2.1883, + "step": 4580 + }, + { + "epoch": 0.07, + "learning_rate": 4.937693036720642e-05, + "loss": 2.2007, + "step": 4590 + }, + { + "epoch": 0.07, + "learning_rate": 4.937422390495493e-05, + "loss": 1.9345, + "step": 4600 + }, + { + "epoch": 0.07, + "learning_rate": 4.937151165182752e-05, + "loss": 2.0859, + "step": 4610 + }, + { + "epoch": 0.07, + "learning_rate": 4.936879360846856e-05, + "loss": 2.2311, + "step": 4620 + }, + { + "epoch": 0.07, + "learning_rate": 4.936606977552382e-05, + "loss": 2.0618, + "step": 4630 + }, + { + "epoch": 0.07, + "learning_rate": 4.936334015364043e-05, + "loss": 2.1401, + "step": 4640 + }, + { + "epoch": 0.07, + "learning_rate": 4.93606047434669e-05, + "loss": 2.1817, + "step": 4650 + }, + { + "epoch": 0.07, + "learning_rate": 4.935786354565311e-05, + "loss": 2.2033, + "step": 4660 + }, + { + "epoch": 0.07, + "learning_rate": 4.9355116560850325e-05, + "loss": 2.0494, + "step": 4670 + }, + { + "epoch": 0.07, + "learning_rate": 4.935236378971117e-05, + "loss": 1.98, + "step": 4680 + }, + { + "epoch": 0.07, + "learning_rate": 4.934960523288966e-05, + "loss": 1.9362, + "step": 4690 + }, + { + "epoch": 0.07, + "learning_rate": 4.934684089104118e-05, + "loss": 2.1071, + "step": 4700 + }, + { + "epoch": 0.07, + "learning_rate": 4.934407076482249e-05, + "loss": 2.1135, + "step": 4710 + }, + { + "epoch": 0.07, + "learning_rate": 4.934129485489171e-05, + "loss": 1.8801, + "step": 4720 + }, + { + "epoch": 0.07, + "learning_rate": 4.933851316190836e-05, + "loss": 1.7928, + "step": 4730 + }, + { + "epoch": 0.07, + "learning_rate": 4.9335725686533313e-05, + "loss": 1.7444, + "step": 4740 + }, + { + "epoch": 0.07, + "learning_rate": 4.933293242942882e-05, + "loss": 1.7374, + "step": 4750 + }, + { + "epoch": 0.07, + "learning_rate": 4.933013339125851e-05, + "loss": 1.7355, + "step": 4760 + }, + { + "epoch": 0.07, + "learning_rate": 4.932732857268739e-05, + "loss": 1.7329, + "step": 4770 + }, + { + "epoch": 0.07, + "learning_rate": 4.9324517974381835e-05, + "loss": 1.9436, + "step": 4780 + }, + { + "epoch": 0.07, + "learning_rate": 4.9321701597009586e-05, + "loss": 1.8888, + "step": 4790 + }, + { + "epoch": 0.07, + "learning_rate": 4.931887944123976e-05, + "loss": 1.8875, + "step": 4800 + }, + { + "epoch": 0.07, + "learning_rate": 4.931605150774287e-05, + "loss": 1.833, + "step": 4810 + }, + { + "epoch": 0.07, + "learning_rate": 4.9313217797190744e-05, + "loss": 2.4164, + "step": 4820 + }, + { + "epoch": 0.07, + "learning_rate": 4.9310378310256664e-05, + "loss": 1.9184, + "step": 4830 + }, + { + "epoch": 0.08, + "learning_rate": 4.930753304761522e-05, + "loss": 1.809, + "step": 4840 + }, + { + "epoch": 0.08, + "learning_rate": 4.9304682009942384e-05, + "loss": 1.7764, + "step": 4850 + }, + { + "epoch": 0.08, + "learning_rate": 4.930182519791553e-05, + "loss": 1.7491, + "step": 4860 + }, + { + "epoch": 0.08, + "learning_rate": 4.929896261221338e-05, + "loss": 1.7499, + "step": 4870 + }, + { + "epoch": 0.08, + "learning_rate": 4.929609425351602e-05, + "loss": 1.8303, + "step": 4880 + }, + { + "epoch": 0.08, + "learning_rate": 4.9293220122504944e-05, + "loss": 1.7255, + "step": 4890 + }, + { + "epoch": 0.08, + "learning_rate": 4.929034021986299e-05, + "loss": 1.693, + "step": 4900 + }, + { + "epoch": 0.08, + "learning_rate": 4.9287454546274336e-05, + "loss": 1.884, + "step": 4910 + }, + { + "epoch": 0.08, + "learning_rate": 4.928456310242461e-05, + "loss": 2.0695, + "step": 4920 + }, + { + "epoch": 0.08, + "learning_rate": 4.9281665889000743e-05, + "loss": 2.0197, + "step": 4930 + }, + { + "epoch": 0.08, + "learning_rate": 4.9278762906691066e-05, + "loss": 1.9041, + "step": 4940 + }, + { + "epoch": 0.08, + "learning_rate": 4.927585415618528e-05, + "loss": 2.0866, + "step": 4950 + }, + { + "epoch": 0.08, + "learning_rate": 4.9272939638174444e-05, + "loss": 2.1303, + "step": 4960 + }, + { + "epoch": 0.08, + "learning_rate": 4.9270019353351e-05, + "loss": 1.9051, + "step": 4970 + }, + { + "epoch": 0.08, + "learning_rate": 4.926709330240875e-05, + "loss": 1.9725, + "step": 4980 + }, + { + "epoch": 0.08, + "learning_rate": 4.926416148604288e-05, + "loss": 2.0612, + "step": 4990 + }, + { + "epoch": 0.08, + "learning_rate": 4.926122390494993e-05, + "loss": 2.0576, + "step": 5000 + }, + { + "epoch": 0.08, + "eval_loss": 1.898725152015686, + "eval_runtime": 82.1036, + "eval_samples_per_second": 36.539, + "eval_steps_per_second": 4.567, + "step": 5000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9258280559827806e-05, + "loss": 2.02, + "step": 5010 + }, + { + "epoch": 0.08, + "learning_rate": 4.925533145137582e-05, + "loss": 1.8561, + "step": 5020 + }, + { + "epoch": 0.08, + "learning_rate": 4.92523765802946e-05, + "loss": 1.8566, + "step": 5030 + }, + { + "epoch": 0.08, + "learning_rate": 4.9249415947286185e-05, + "loss": 1.7604, + "step": 5040 + }, + { + "epoch": 0.08, + "learning_rate": 4.924644955305397e-05, + "loss": 2.1573, + "step": 5050 + }, + { + "epoch": 0.08, + "learning_rate": 4.924347739830271e-05, + "loss": 2.0955, + "step": 5060 + }, + { + "epoch": 0.08, + "learning_rate": 4.924049948373853e-05, + "loss": 2.2568, + "step": 5070 + }, + { + "epoch": 0.08, + "learning_rate": 4.923751581006894e-05, + "loss": 2.1872, + "step": 5080 + }, + { + "epoch": 0.08, + "learning_rate": 4.923452637800281e-05, + "loss": 2.0216, + "step": 5090 + }, + { + "epoch": 0.08, + "learning_rate": 4.923153118825036e-05, + "loss": 1.9783, + "step": 5100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9228530241523205e-05, + "loss": 1.9228, + "step": 5110 + }, + { + "epoch": 0.08, + "learning_rate": 4.9225523538534306e-05, + "loss": 1.9523, + "step": 5120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9222511079998015e-05, + "loss": 2.2417, + "step": 5130 + }, + { + "epoch": 0.08, + "learning_rate": 4.921949286663003e-05, + "loss": 2.2712, + "step": 5140 + }, + { + "epoch": 0.08, + "learning_rate": 4.921646889914742e-05, + "loss": 2.5226, + "step": 5150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9213439178268626e-05, + "loss": 2.4187, + "step": 5160 + }, + { + "epoch": 0.08, + "learning_rate": 4.921040370471346e-05, + "loss": 2.2626, + "step": 5170 + }, + { + "epoch": 0.08, + "learning_rate": 4.92073624792031e-05, + "loss": 2.057, + "step": 5180 + }, + { + "epoch": 0.08, + "learning_rate": 4.920431550246007e-05, + "loss": 2.1018, + "step": 5190 + }, + { + "epoch": 0.08, + "learning_rate": 4.920126277520829e-05, + "loss": 2.1591, + "step": 5200 + }, + { + "epoch": 0.08, + "learning_rate": 4.9198204298173036e-05, + "loss": 2.2859, + "step": 5210 + }, + { + "epoch": 0.08, + "learning_rate": 4.9195140072080934e-05, + "loss": 2.1389, + "step": 5220 + }, + { + "epoch": 0.08, + "learning_rate": 4.919207009766e-05, + "loss": 1.9886, + "step": 5230 + }, + { + "epoch": 0.08, + "learning_rate": 4.91889943756396e-05, + "loss": 2.0741, + "step": 5240 + }, + { + "epoch": 0.08, + "learning_rate": 4.9185912906750477e-05, + "loss": 1.7405, + "step": 5250 + }, + { + "epoch": 0.08, + "learning_rate": 4.9182825691724714e-05, + "loss": 1.65, + "step": 5260 + }, + { + "epoch": 0.08, + "learning_rate": 4.917973273129579e-05, + "loss": 1.5947, + "step": 5270 + }, + { + "epoch": 0.08, + "learning_rate": 4.9176634026198545e-05, + "loss": 1.6044, + "step": 5280 + }, + { + "epoch": 0.08, + "learning_rate": 4.9173529577169164e-05, + "loss": 1.8158, + "step": 5290 + }, + { + "epoch": 0.08, + "learning_rate": 4.917041938494521e-05, + "loss": 2.0296, + "step": 5300 + }, + { + "epoch": 0.08, + "learning_rate": 4.91673034502656e-05, + "loss": 1.9597, + "step": 5310 + }, + { + "epoch": 0.08, + "learning_rate": 4.9164181773870647e-05, + "loss": 2.1001, + "step": 5320 + }, + { + "epoch": 0.08, + "learning_rate": 4.916105435650198e-05, + "loss": 1.8555, + "step": 5330 + }, + { + "epoch": 0.08, + "learning_rate": 4.915792119890263e-05, + "loss": 2.0666, + "step": 5340 + }, + { + "epoch": 0.08, + "learning_rate": 4.915478230181698e-05, + "loss": 2.0482, + "step": 5350 + }, + { + "epoch": 0.08, + "learning_rate": 4.9151637665990775e-05, + "loss": 1.9815, + "step": 5360 + }, + { + "epoch": 0.08, + "learning_rate": 4.9148487292171116e-05, + "loss": 1.9244, + "step": 5370 + }, + { + "epoch": 0.08, + "learning_rate": 4.914533118110648e-05, + "loss": 1.9903, + "step": 5380 + }, + { + "epoch": 0.08, + "learning_rate": 4.914216933354671e-05, + "loss": 1.841, + "step": 5390 + }, + { + "epoch": 0.08, + "learning_rate": 4.913900175024298e-05, + "loss": 2.0886, + "step": 5400 + }, + { + "epoch": 0.08, + "learning_rate": 4.9135828431947875e-05, + "loss": 2.0558, + "step": 5410 + }, + { + "epoch": 0.08, + "learning_rate": 4.913264937941531e-05, + "loss": 2.1695, + "step": 5420 + }, + { + "epoch": 0.08, + "learning_rate": 4.9129464593400565e-05, + "loss": 2.3233, + "step": 5430 + }, + { + "epoch": 0.08, + "learning_rate": 4.91262740746603e-05, + "loss": 2.1296, + "step": 5440 + }, + { + "epoch": 0.08, + "learning_rate": 4.9123077823952513e-05, + "loss": 2.1855, + "step": 5450 + }, + { + "epoch": 0.08, + "learning_rate": 4.9119875842036587e-05, + "loss": 2.0441, + "step": 5460 + }, + { + "epoch": 0.08, + "learning_rate": 4.911666812967324e-05, + "loss": 2.0958, + "step": 5470 + }, + { + "epoch": 0.09, + "learning_rate": 4.911345468762457e-05, + "loss": 2.2783, + "step": 5480 + }, + { + "epoch": 0.09, + "learning_rate": 4.911023551665405e-05, + "loss": 2.2121, + "step": 5490 + }, + { + "epoch": 0.09, + "learning_rate": 4.910701061752648e-05, + "loss": 2.148, + "step": 5500 + }, + { + "epoch": 0.09, + "learning_rate": 4.910377999100804e-05, + "loss": 2.0967, + "step": 5510 + }, + { + "epoch": 0.09, + "learning_rate": 4.910054363786628e-05, + "loss": 2.1708, + "step": 5520 + }, + { + "epoch": 0.09, + "learning_rate": 4.909730155887008e-05, + "loss": 2.0055, + "step": 5530 + }, + { + "epoch": 0.09, + "learning_rate": 4.909405375478971e-05, + "loss": 2.1761, + "step": 5540 + }, + { + "epoch": 0.09, + "learning_rate": 4.90908002263968e-05, + "loss": 2.0528, + "step": 5550 + }, + { + "epoch": 0.09, + "learning_rate": 4.908754097446431e-05, + "loss": 1.8527, + "step": 5560 + }, + { + "epoch": 0.09, + "learning_rate": 4.908427599976658e-05, + "loss": 1.783, + "step": 5570 + }, + { + "epoch": 0.09, + "learning_rate": 4.908100530307933e-05, + "loss": 1.7497, + "step": 5580 + }, + { + "epoch": 0.09, + "learning_rate": 4.90777288851796e-05, + "loss": 1.9056, + "step": 5590 + }, + { + "epoch": 0.09, + "learning_rate": 4.907444674684582e-05, + "loss": 2.0527, + "step": 5600 + }, + { + "epoch": 0.09, + "learning_rate": 4.907115888885776e-05, + "loss": 2.064, + "step": 5610 + }, + { + "epoch": 0.09, + "learning_rate": 4.906786531199655e-05, + "loss": 1.9632, + "step": 5620 + }, + { + "epoch": 0.09, + "learning_rate": 4.90645660170447e-05, + "loss": 1.8798, + "step": 5630 + }, + { + "epoch": 0.09, + "learning_rate": 4.9061261004786045e-05, + "loss": 2.0581, + "step": 5640 + }, + { + "epoch": 0.09, + "learning_rate": 4.905795027600581e-05, + "loss": 2.0661, + "step": 5650 + }, + { + "epoch": 0.09, + "learning_rate": 4.9054633831490557e-05, + "loss": 2.2821, + "step": 5660 + }, + { + "epoch": 0.09, + "learning_rate": 4.905131167202822e-05, + "loss": 2.1977, + "step": 5670 + }, + { + "epoch": 0.09, + "learning_rate": 4.904798379840808e-05, + "loss": 2.3421, + "step": 5680 + }, + { + "epoch": 0.09, + "learning_rate": 4.904465021142077e-05, + "loss": 2.18, + "step": 5690 + }, + { + "epoch": 0.09, + "learning_rate": 4.904131091185831e-05, + "loss": 2.1462, + "step": 5700 + }, + { + "epoch": 0.09, + "learning_rate": 4.903796590051405e-05, + "loss": 2.1342, + "step": 5710 + }, + { + "epoch": 0.09, + "learning_rate": 4.90346151781827e-05, + "loss": 2.2187, + "step": 5720 + }, + { + "epoch": 0.09, + "learning_rate": 4.9031258745660326e-05, + "loss": 2.2763, + "step": 5730 + }, + { + "epoch": 0.09, + "learning_rate": 4.9027896603744364e-05, + "loss": 1.851, + "step": 5740 + }, + { + "epoch": 0.09, + "learning_rate": 4.90245287532336e-05, + "loss": 1.7995, + "step": 5750 + }, + { + "epoch": 0.09, + "learning_rate": 4.902115519492817e-05, + "loss": 1.7303, + "step": 5760 + }, + { + "epoch": 0.09, + "learning_rate": 4.901777592962957e-05, + "loss": 2.0054, + "step": 5770 + }, + { + "epoch": 0.09, + "learning_rate": 4.901439095814067e-05, + "loss": 1.9593, + "step": 5780 + }, + { + "epoch": 0.09, + "learning_rate": 4.901100028126565e-05, + "loss": 2.0311, + "step": 5790 + }, + { + "epoch": 0.09, + "learning_rate": 4.900760389981008e-05, + "loss": 2.089, + "step": 5800 + }, + { + "epoch": 0.09, + "learning_rate": 4.9004201814580896e-05, + "loss": 1.9953, + "step": 5810 + }, + { + "epoch": 0.09, + "learning_rate": 4.9000794026386354e-05, + "loss": 2.1757, + "step": 5820 + }, + { + "epoch": 0.09, + "learning_rate": 4.89973805360361e-05, + "loss": 2.2484, + "step": 5830 + }, + { + "epoch": 0.09, + "learning_rate": 4.89939613443411e-05, + "loss": 2.277, + "step": 5840 + }, + { + "epoch": 0.09, + "learning_rate": 4.89905364521137e-05, + "loss": 2.2497, + "step": 5850 + }, + { + "epoch": 0.09, + "learning_rate": 4.898710586016761e-05, + "loss": 2.2037, + "step": 5860 + }, + { + "epoch": 0.09, + "learning_rate": 4.898366956931784e-05, + "loss": 2.2424, + "step": 5870 + }, + { + "epoch": 0.09, + "learning_rate": 4.898022758038081e-05, + "loss": 2.0302, + "step": 5880 + }, + { + "epoch": 0.09, + "learning_rate": 4.8976779894174286e-05, + "loss": 2.0582, + "step": 5890 + }, + { + "epoch": 0.09, + "learning_rate": 4.897332651151736e-05, + "loss": 1.838, + "step": 5900 + }, + { + "epoch": 0.09, + "learning_rate": 4.89698674332305e-05, + "loss": 1.7248, + "step": 5910 + }, + { + "epoch": 0.09, + "learning_rate": 4.896640266013552e-05, + "loss": 1.7454, + "step": 5920 + }, + { + "epoch": 0.09, + "learning_rate": 4.8962932193055586e-05, + "loss": 1.855, + "step": 5930 + }, + { + "epoch": 0.09, + "learning_rate": 4.895945603281522e-05, + "loss": 1.9816, + "step": 5940 + }, + { + "epoch": 0.09, + "learning_rate": 4.8955974180240296e-05, + "loss": 2.0163, + "step": 5950 + }, + { + "epoch": 0.09, + "learning_rate": 4.895248663615803e-05, + "loss": 2.1321, + "step": 5960 + }, + { + "epoch": 0.09, + "learning_rate": 4.894899340139702e-05, + "loss": 1.9038, + "step": 5970 + }, + { + "epoch": 0.09, + "learning_rate": 4.8945494476787166e-05, + "loss": 1.8843, + "step": 5980 + }, + { + "epoch": 0.09, + "learning_rate": 4.8941989863159773e-05, + "loss": 1.7639, + "step": 5990 + }, + { + "epoch": 0.09, + "learning_rate": 4.893847956134747e-05, + "loss": 2.1, + "step": 6000 + }, + { + "epoch": 0.09, + "learning_rate": 4.893496357218423e-05, + "loss": 2.3216, + "step": 6010 + }, + { + "epoch": 0.09, + "learning_rate": 4.893144189650541e-05, + "loss": 2.1858, + "step": 6020 + }, + { + "epoch": 0.09, + "learning_rate": 4.892791453514768e-05, + "loss": 2.1404, + "step": 6030 + }, + { + "epoch": 0.09, + "learning_rate": 4.8924381488949074e-05, + "loss": 2.0588, + "step": 6040 + }, + { + "epoch": 0.09, + "learning_rate": 4.892084275874899e-05, + "loss": 2.0701, + "step": 6050 + }, + { + "epoch": 0.09, + "learning_rate": 4.891729834538816e-05, + "loss": 1.9458, + "step": 6060 + }, + { + "epoch": 0.09, + "learning_rate": 4.8913748249708684e-05, + "loss": 2.0036, + "step": 6070 + }, + { + "epoch": 0.09, + "learning_rate": 4.891019247255399e-05, + "loss": 1.8968, + "step": 6080 + }, + { + "epoch": 0.09, + "learning_rate": 4.890663101476887e-05, + "loss": 1.7224, + "step": 6090 + }, + { + "epoch": 0.09, + "learning_rate": 4.8903063877199465e-05, + "loss": 1.658, + "step": 6100 + }, + { + "epoch": 0.09, + "learning_rate": 4.889949106069325e-05, + "loss": 1.6651, + "step": 6110 + }, + { + "epoch": 0.09, + "learning_rate": 4.8895912566099075e-05, + "loss": 2.1595, + "step": 6120 + }, + { + "epoch": 0.1, + "learning_rate": 4.8892328394267114e-05, + "loss": 2.3534, + "step": 6130 + }, + { + "epoch": 0.1, + "learning_rate": 4.8888738546048926e-05, + "loss": 2.2488, + "step": 6140 + }, + { + "epoch": 0.1, + "learning_rate": 4.8885143022297365e-05, + "loss": 2.2491, + "step": 6150 + }, + { + "epoch": 0.1, + "learning_rate": 4.888154182386668e-05, + "loss": 1.7979, + "step": 6160 + }, + { + "epoch": 0.1, + "learning_rate": 4.887793495161244e-05, + "loss": 2.0072, + "step": 6170 + }, + { + "epoch": 0.1, + "learning_rate": 4.887432240639158e-05, + "loss": 2.0917, + "step": 6180 + }, + { + "epoch": 0.1, + "learning_rate": 4.887070418906238e-05, + "loss": 1.7792, + "step": 6190 + }, + { + "epoch": 0.1, + "learning_rate": 4.8867080300484454e-05, + "loss": 2.0356, + "step": 6200 + }, + { + "epoch": 0.1, + "learning_rate": 4.886345074151877e-05, + "loss": 2.025, + "step": 6210 + }, + { + "epoch": 0.1, + "learning_rate": 4.885981551302766e-05, + "loss": 1.9747, + "step": 6220 + }, + { + "epoch": 0.1, + "learning_rate": 4.885617461587478e-05, + "loss": 1.9287, + "step": 6230 + }, + { + "epoch": 0.1, + "learning_rate": 4.885252805092514e-05, + "loss": 1.8151, + "step": 6240 + }, + { + "epoch": 0.1, + "learning_rate": 4.884887581904509e-05, + "loss": 1.8781, + "step": 6250 + }, + { + "epoch": 0.1, + "learning_rate": 4.884521792110236e-05, + "loss": 2.2645, + "step": 6260 + }, + { + "epoch": 0.1, + "learning_rate": 4.884155435796598e-05, + "loss": 1.9585, + "step": 6270 + }, + { + "epoch": 0.1, + "learning_rate": 4.8837885130506345e-05, + "loss": 1.9634, + "step": 6280 + }, + { + "epoch": 0.1, + "learning_rate": 4.883421023959521e-05, + "loss": 1.9319, + "step": 6290 + }, + { + "epoch": 0.1, + "learning_rate": 4.883052968610565e-05, + "loss": 1.8935, + "step": 6300 + }, + { + "epoch": 0.1, + "learning_rate": 4.882684347091211e-05, + "loss": 1.9621, + "step": 6310 + }, + { + "epoch": 0.1, + "learning_rate": 4.882315159489036e-05, + "loss": 1.8953, + "step": 6320 + }, + { + "epoch": 0.1, + "learning_rate": 4.881945405891752e-05, + "loss": 1.9326, + "step": 6330 + }, + { + "epoch": 0.1, + "learning_rate": 4.8815750863872075e-05, + "loss": 2.0082, + "step": 6340 + }, + { + "epoch": 0.1, + "learning_rate": 4.8812042010633815e-05, + "loss": 1.824, + "step": 6350 + }, + { + "epoch": 0.1, + "learning_rate": 4.8808327500083914e-05, + "loss": 2.051, + "step": 6360 + }, + { + "epoch": 0.1, + "learning_rate": 4.880460733310487e-05, + "loss": 1.4587, + "step": 6370 + }, + { + "epoch": 0.1, + "learning_rate": 4.880088151058052e-05, + "loss": 1.9454, + "step": 6380 + }, + { + "epoch": 0.1, + "learning_rate": 4.8797150033396054e-05, + "loss": 2.0016, + "step": 6390 + }, + { + "epoch": 0.1, + "learning_rate": 4.8793412902438015e-05, + "loss": 1.9901, + "step": 6400 + }, + { + "epoch": 0.1, + "learning_rate": 4.878967011859426e-05, + "loss": 1.9554, + "step": 6410 + }, + { + "epoch": 0.1, + "learning_rate": 4.878592168275402e-05, + "loss": 2.0197, + "step": 6420 + }, + { + "epoch": 0.1, + "learning_rate": 4.8782167595807846e-05, + "loss": 1.7733, + "step": 6430 + }, + { + "epoch": 0.1, + "learning_rate": 4.8778407858647646e-05, + "loss": 1.7212, + "step": 6440 + }, + { + "epoch": 0.1, + "learning_rate": 4.877464247216668e-05, + "loss": 2.0501, + "step": 6450 + }, + { + "epoch": 0.1, + "learning_rate": 4.87708714372595e-05, + "loss": 1.9117, + "step": 6460 + }, + { + "epoch": 0.1, + "learning_rate": 4.8767094754822074e-05, + "loss": 1.944, + "step": 6470 + }, + { + "epoch": 0.1, + "learning_rate": 4.876331242575166e-05, + "loss": 1.8063, + "step": 6480 + }, + { + "epoch": 0.1, + "learning_rate": 4.875952445094686e-05, + "loss": 1.932, + "step": 6490 + }, + { + "epoch": 0.1, + "learning_rate": 4.8755730831307645e-05, + "loss": 2.0349, + "step": 6500 + }, + { + "epoch": 0.1, + "learning_rate": 4.875193156773529e-05, + "loss": 1.8732, + "step": 6510 + }, + { + "epoch": 0.1, + "learning_rate": 4.874812666113245e-05, + "loss": 2.0624, + "step": 6520 + }, + { + "epoch": 0.1, + "learning_rate": 4.874431611240311e-05, + "loss": 1.8679, + "step": 6530 + }, + { + "epoch": 0.1, + "learning_rate": 4.874049992245257e-05, + "loss": 2.325, + "step": 6540 + }, + { + "epoch": 0.1, + "learning_rate": 4.8736678092187485e-05, + "loss": 2.1108, + "step": 6550 + }, + { + "epoch": 0.1, + "learning_rate": 4.873285062251586e-05, + "loss": 2.0823, + "step": 6560 + }, + { + "epoch": 0.1, + "learning_rate": 4.872901751434703e-05, + "loss": 1.9649, + "step": 6570 + }, + { + "epoch": 0.1, + "learning_rate": 4.8725178768591685e-05, + "loss": 1.9148, + "step": 6580 + }, + { + "epoch": 0.1, + "learning_rate": 4.872133438616182e-05, + "loss": 1.8602, + "step": 6590 + }, + { + "epoch": 0.1, + "learning_rate": 4.871748436797081e-05, + "loss": 1.9709, + "step": 6600 + }, + { + "epoch": 0.1, + "learning_rate": 4.8713628714933345e-05, + "loss": 1.7993, + "step": 6610 + }, + { + "epoch": 0.1, + "learning_rate": 4.870976742796546e-05, + "loss": 1.749, + "step": 6620 + }, + { + "epoch": 0.1, + "learning_rate": 4.8705900507984514e-05, + "loss": 1.6584, + "step": 6630 + }, + { + "epoch": 0.1, + "learning_rate": 4.870202795590924e-05, + "loss": 1.9275, + "step": 6640 + }, + { + "epoch": 0.1, + "learning_rate": 4.8698149772659654e-05, + "loss": 2.0005, + "step": 6650 + }, + { + "epoch": 0.1, + "learning_rate": 4.8694265959157175e-05, + "loss": 2.0807, + "step": 6660 + }, + { + "epoch": 0.1, + "learning_rate": 4.869037651632451e-05, + "loss": 1.9795, + "step": 6670 + }, + { + "epoch": 0.1, + "learning_rate": 4.868648144508573e-05, + "loss": 1.9871, + "step": 6680 + }, + { + "epoch": 0.1, + "learning_rate": 4.8682580746366225e-05, + "loss": 2.03, + "step": 6690 + }, + { + "epoch": 0.1, + "learning_rate": 4.867867442109273e-05, + "loss": 2.1176, + "step": 6700 + }, + { + "epoch": 0.1, + "learning_rate": 4.867476247019332e-05, + "loss": 2.1169, + "step": 6710 + }, + { + "epoch": 0.1, + "learning_rate": 4.8670844894597414e-05, + "loss": 2.1753, + "step": 6720 + }, + { + "epoch": 0.1, + "learning_rate": 4.866692169523574e-05, + "loss": 2.2104, + "step": 6730 + }, + { + "epoch": 0.1, + "learning_rate": 4.86629928730404e-05, + "loss": 2.1714, + "step": 6740 + }, + { + "epoch": 0.1, + "learning_rate": 4.8659058428944784e-05, + "loss": 2.2279, + "step": 6750 + }, + { + "epoch": 0.1, + "learning_rate": 4.865511836388367e-05, + "loss": 2.2077, + "step": 6760 + }, + { + "epoch": 0.11, + "learning_rate": 4.865117267879313e-05, + "loss": 2.1795, + "step": 6770 + }, + { + "epoch": 0.11, + "learning_rate": 4.8647221374610595e-05, + "loss": 2.1064, + "step": 6780 + }, + { + "epoch": 0.11, + "learning_rate": 4.8643264452274825e-05, + "loss": 2.0315, + "step": 6790 + }, + { + "epoch": 0.11, + "learning_rate": 4.863930191272591e-05, + "loss": 1.9821, + "step": 6800 + }, + { + "epoch": 0.11, + "learning_rate": 4.863533375690529e-05, + "loss": 1.8336, + "step": 6810 + }, + { + "epoch": 0.11, + "learning_rate": 4.86313599857557e-05, + "loss": 1.8944, + "step": 6820 + }, + { + "epoch": 0.11, + "learning_rate": 4.862738060022126e-05, + "loss": 1.7907, + "step": 6830 + }, + { + "epoch": 0.11, + "learning_rate": 4.862339560124739e-05, + "loss": 1.6787, + "step": 6840 + }, + { + "epoch": 0.11, + "learning_rate": 4.861940498978086e-05, + "loss": 2.1433, + "step": 6850 + }, + { + "epoch": 0.11, + "learning_rate": 4.861540876676976e-05, + "loss": 1.9924, + "step": 6860 + }, + { + "epoch": 0.11, + "learning_rate": 4.861140693316353e-05, + "loss": 2.0064, + "step": 6870 + }, + { + "epoch": 0.11, + "learning_rate": 4.860739948991292e-05, + "loss": 2.0399, + "step": 6880 + }, + { + "epoch": 0.11, + "learning_rate": 4.860338643797004e-05, + "loss": 2.0269, + "step": 6890 + }, + { + "epoch": 0.11, + "learning_rate": 4.8599367778288306e-05, + "loss": 1.985, + "step": 6900 + }, + { + "epoch": 0.11, + "learning_rate": 4.8595343511822494e-05, + "loss": 2.0193, + "step": 6910 + }, + { + "epoch": 0.11, + "learning_rate": 4.8591313639528685e-05, + "loss": 1.9701, + "step": 6920 + }, + { + "epoch": 0.11, + "learning_rate": 4.858727816236431e-05, + "loss": 1.9802, + "step": 6930 + }, + { + "epoch": 0.11, + "learning_rate": 4.8583237081288126e-05, + "loss": 1.9126, + "step": 6940 + }, + { + "epoch": 0.11, + "learning_rate": 4.857919039726021e-05, + "loss": 2.1847, + "step": 6950 + }, + { + "epoch": 0.11, + "learning_rate": 4.8575138111241995e-05, + "loss": 1.8313, + "step": 6960 + }, + { + "epoch": 0.11, + "learning_rate": 4.857108022419622e-05, + "loss": 1.7952, + "step": 6970 + }, + { + "epoch": 0.11, + "learning_rate": 4.856701673708698e-05, + "loss": 1.7582, + "step": 6980 + }, + { + "epoch": 0.11, + "learning_rate": 4.856294765087968e-05, + "loss": 1.8362, + "step": 6990 + }, + { + "epoch": 0.11, + "learning_rate": 4.855887296654105e-05, + "loss": 1.8562, + "step": 7000 + }, + { + "epoch": 0.11, + "learning_rate": 4.855479268503917e-05, + "loss": 1.9698, + "step": 7010 + }, + { + "epoch": 0.11, + "learning_rate": 4.855070680734345e-05, + "loss": 1.8776, + "step": 7020 + }, + { + "epoch": 0.11, + "learning_rate": 4.854661533442461e-05, + "loss": 2.0073, + "step": 7030 + }, + { + "epoch": 0.11, + "learning_rate": 4.854251826725471e-05, + "loss": 2.0788, + "step": 7040 + }, + { + "epoch": 0.11, + "learning_rate": 4.853841560680715e-05, + "loss": 1.9275, + "step": 7050 + }, + { + "epoch": 0.11, + "learning_rate": 4.853430735405663e-05, + "loss": 1.7879, + "step": 7060 + }, + { + "epoch": 0.11, + "learning_rate": 4.853019350997922e-05, + "loss": 1.6989, + "step": 7070 + }, + { + "epoch": 0.11, + "learning_rate": 4.852607407555227e-05, + "loss": 1.9847, + "step": 7080 + }, + { + "epoch": 0.11, + "learning_rate": 4.85219490517545e-05, + "loss": 2.168, + "step": 7090 + }, + { + "epoch": 0.11, + "learning_rate": 4.851781843956594e-05, + "loss": 2.0781, + "step": 7100 + }, + { + "epoch": 0.11, + "learning_rate": 4.851368223996794e-05, + "loss": 2.0415, + "step": 7110 + }, + { + "epoch": 0.11, + "learning_rate": 4.8509540453943194e-05, + "loss": 1.9486, + "step": 7120 + }, + { + "epoch": 0.11, + "learning_rate": 4.850539308247571e-05, + "loss": 1.9106, + "step": 7130 + }, + { + "epoch": 0.11, + "learning_rate": 4.850124012655084e-05, + "loss": 2.2245, + "step": 7140 + }, + { + "epoch": 0.11, + "learning_rate": 4.8497081587155245e-05, + "loss": 1.9552, + "step": 7150 + }, + { + "epoch": 0.11, + "learning_rate": 4.849291746527691e-05, + "loss": 1.871, + "step": 7160 + }, + { + "epoch": 0.11, + "learning_rate": 4.848874776190517e-05, + "loss": 2.0217, + "step": 7170 + }, + { + "epoch": 0.11, + "learning_rate": 4.848457247803066e-05, + "loss": 2.1396, + "step": 7180 + }, + { + "epoch": 0.11, + "learning_rate": 4.848039161464536e-05, + "loss": 2.0519, + "step": 7190 + }, + { + "epoch": 0.11, + "learning_rate": 4.847620517274256e-05, + "loss": 1.9063, + "step": 7200 + }, + { + "epoch": 0.11, + "learning_rate": 4.847201315331689e-05, + "loss": 1.9367, + "step": 7210 + }, + { + "epoch": 0.11, + "learning_rate": 4.846781555736429e-05, + "loss": 2.0058, + "step": 7220 + }, + { + "epoch": 0.11, + "learning_rate": 4.846361238588205e-05, + "loss": 1.9975, + "step": 7230 + }, + { + "epoch": 0.11, + "learning_rate": 4.845940363986875e-05, + "loss": 1.9838, + "step": 7240 + }, + { + "epoch": 0.11, + "learning_rate": 4.845518932032431e-05, + "loss": 1.9991, + "step": 7250 + }, + { + "epoch": 0.11, + "learning_rate": 4.845096942825e-05, + "loss": 1.8271, + "step": 7260 + }, + { + "epoch": 0.11, + "learning_rate": 4.844674396464837e-05, + "loss": 1.9611, + "step": 7270 + }, + { + "epoch": 0.11, + "learning_rate": 4.8442512930523306e-05, + "loss": 2.1237, + "step": 7280 + }, + { + "epoch": 0.11, + "learning_rate": 4.843827632688006e-05, + "loss": 2.4828, + "step": 7290 + }, + { + "epoch": 0.11, + "learning_rate": 4.843403415472514e-05, + "loss": 2.3333, + "step": 7300 + }, + { + "epoch": 0.11, + "learning_rate": 4.8429786415066415e-05, + "loss": 2.3092, + "step": 7310 + }, + { + "epoch": 0.11, + "learning_rate": 4.8425533108913085e-05, + "loss": 1.853, + "step": 7320 + }, + { + "epoch": 0.11, + "learning_rate": 4.842127423727565e-05, + "loss": 1.6972, + "step": 7330 + }, + { + "epoch": 0.11, + "learning_rate": 4.8417009801165936e-05, + "loss": 2.0058, + "step": 7340 + }, + { + "epoch": 0.11, + "learning_rate": 4.841273980159711e-05, + "loss": 2.0079, + "step": 7350 + }, + { + "epoch": 0.11, + "learning_rate": 4.8408464239583626e-05, + "loss": 1.9739, + "step": 7360 + }, + { + "epoch": 0.11, + "learning_rate": 4.84041831161413e-05, + "loss": 2.0129, + "step": 7370 + }, + { + "epoch": 0.11, + "learning_rate": 4.839989643228725e-05, + "loss": 2.0464, + "step": 7380 + }, + { + "epoch": 0.11, + "learning_rate": 4.83956041890399e-05, + "loss": 2.0549, + "step": 7390 + }, + { + "epoch": 0.11, + "learning_rate": 4.839130638741902e-05, + "loss": 1.9158, + "step": 7400 + }, + { + "epoch": 0.11, + "learning_rate": 4.838700302844569e-05, + "loss": 1.726, + "step": 7410 + }, + { + "epoch": 0.12, + "learning_rate": 4.83826941131423e-05, + "loss": 1.8976, + "step": 7420 + }, + { + "epoch": 0.12, + "learning_rate": 4.837837964253258e-05, + "loss": 1.8251, + "step": 7430 + }, + { + "epoch": 0.12, + "learning_rate": 4.8374059617641574e-05, + "loss": 1.9301, + "step": 7440 + }, + { + "epoch": 0.12, + "learning_rate": 4.836973403949563e-05, + "loss": 1.8708, + "step": 7450 + }, + { + "epoch": 0.12, + "learning_rate": 4.8365402909122436e-05, + "loss": 1.9437, + "step": 7460 + }, + { + "epoch": 0.12, + "learning_rate": 4.8361066227551e-05, + "loss": 1.8773, + "step": 7470 + }, + { + "epoch": 0.12, + "learning_rate": 4.8356723995811615e-05, + "loss": 1.7485, + "step": 7480 + }, + { + "epoch": 0.12, + "learning_rate": 4.835237621493592e-05, + "loss": 1.9634, + "step": 7490 + }, + { + "epoch": 0.12, + "learning_rate": 4.83480228859569e-05, + "loss": 1.8195, + "step": 7500 + }, + { + "epoch": 0.12, + "learning_rate": 4.83436640099088e-05, + "loss": 1.9039, + "step": 7510 + }, + { + "epoch": 0.12, + "learning_rate": 4.8339299587827215e-05, + "loss": 1.9509, + "step": 7520 + }, + { + "epoch": 0.12, + "learning_rate": 4.833492962074905e-05, + "loss": 1.9431, + "step": 7530 + }, + { + "epoch": 0.12, + "learning_rate": 4.833055410971254e-05, + "loss": 2.0907, + "step": 7540 + }, + { + "epoch": 0.12, + "learning_rate": 4.8326173055757215e-05, + "loss": 2.0375, + "step": 7550 + }, + { + "epoch": 0.12, + "learning_rate": 4.832178645992396e-05, + "loss": 2.1631, + "step": 7560 + }, + { + "epoch": 0.12, + "learning_rate": 4.8317394323254914e-05, + "loss": 1.798, + "step": 7570 + }, + { + "epoch": 0.12, + "learning_rate": 4.83129966467936e-05, + "loss": 2.0854, + "step": 7580 + }, + { + "epoch": 0.12, + "learning_rate": 4.8308593431584816e-05, + "loss": 2.0271, + "step": 7590 + }, + { + "epoch": 0.12, + "learning_rate": 4.830418467867467e-05, + "loss": 2.0026, + "step": 7600 + }, + { + "epoch": 0.12, + "learning_rate": 4.829977038911063e-05, + "loss": 2.3122, + "step": 7610 + }, + { + "epoch": 0.12, + "learning_rate": 4.8295350563941446e-05, + "loss": 1.9115, + "step": 7620 + }, + { + "epoch": 0.12, + "learning_rate": 4.829092520421717e-05, + "loss": 1.7976, + "step": 7630 + }, + { + "epoch": 0.12, + "learning_rate": 4.8286494310989207e-05, + "loss": 1.7556, + "step": 7640 + }, + { + "epoch": 0.12, + "learning_rate": 4.8282057885310236e-05, + "loss": 1.873, + "step": 7650 + }, + { + "epoch": 0.12, + "learning_rate": 4.82776159282343e-05, + "loss": 1.85, + "step": 7660 + }, + { + "epoch": 0.12, + "learning_rate": 4.827316844081671e-05, + "loss": 1.8485, + "step": 7670 + }, + { + "epoch": 0.12, + "learning_rate": 4.8268715424114115e-05, + "loss": 1.8614, + "step": 7680 + }, + { + "epoch": 0.12, + "learning_rate": 4.8264256879184466e-05, + "loss": 1.7338, + "step": 7690 + }, + { + "epoch": 0.12, + "learning_rate": 4.8259792807087036e-05, + "loss": 1.6865, + "step": 7700 + }, + { + "epoch": 0.12, + "learning_rate": 4.8255323208882406e-05, + "loss": 1.8391, + "step": 7710 + }, + { + "epoch": 0.12, + "learning_rate": 4.825084808563247e-05, + "loss": 1.8365, + "step": 7720 + }, + { + "epoch": 0.12, + "learning_rate": 4.8246367438400444e-05, + "loss": 1.7852, + "step": 7730 + }, + { + "epoch": 0.12, + "learning_rate": 4.8241881268250846e-05, + "loss": 2.1699, + "step": 7740 + }, + { + "epoch": 0.12, + "learning_rate": 4.8237389576249514e-05, + "loss": 1.9428, + "step": 7750 + }, + { + "epoch": 0.12, + "learning_rate": 4.823289236346358e-05, + "loss": 1.912, + "step": 7760 + }, + { + "epoch": 0.12, + "learning_rate": 4.822838963096151e-05, + "loss": 2.0396, + "step": 7770 + }, + { + "epoch": 0.12, + "learning_rate": 4.822388137981306e-05, + "loss": 1.759, + "step": 7780 + }, + { + "epoch": 0.12, + "learning_rate": 4.8219367611089336e-05, + "loss": 1.9741, + "step": 7790 + }, + { + "epoch": 0.12, + "learning_rate": 4.8214848325862705e-05, + "loss": 2.0953, + "step": 7800 + }, + { + "epoch": 0.12, + "learning_rate": 4.821032352520687e-05, + "loss": 2.017, + "step": 7810 + }, + { + "epoch": 0.12, + "learning_rate": 4.820579321019685e-05, + "loss": 1.9514, + "step": 7820 + }, + { + "epoch": 0.12, + "learning_rate": 4.820125738190895e-05, + "loss": 1.9039, + "step": 7830 + }, + { + "epoch": 0.12, + "learning_rate": 4.819671604142082e-05, + "loss": 1.9749, + "step": 7840 + }, + { + "epoch": 0.12, + "learning_rate": 4.81921691898114e-05, + "loss": 1.9322, + "step": 7850 + }, + { + "epoch": 0.12, + "learning_rate": 4.818761682816093e-05, + "loss": 2.1528, + "step": 7860 + }, + { + "epoch": 0.12, + "learning_rate": 4.818305895755096e-05, + "loss": 2.0001, + "step": 7870 + }, + { + "epoch": 0.12, + "learning_rate": 4.8178495579064384e-05, + "loss": 1.8025, + "step": 7880 + }, + { + "epoch": 0.12, + "learning_rate": 4.817392669378536e-05, + "loss": 2.1475, + "step": 7890 + }, + { + "epoch": 0.12, + "learning_rate": 4.8169352302799386e-05, + "loss": 2.0314, + "step": 7900 + }, + { + "epoch": 0.12, + "learning_rate": 4.8164772407193234e-05, + "loss": 2.0755, + "step": 7910 + }, + { + "epoch": 0.12, + "learning_rate": 4.8160187008055024e-05, + "loss": 1.9103, + "step": 7920 + }, + { + "epoch": 0.12, + "learning_rate": 4.8155596106474155e-05, + "loss": 1.9736, + "step": 7930 + }, + { + "epoch": 0.12, + "learning_rate": 4.815099970354134e-05, + "loss": 1.8619, + "step": 7940 + }, + { + "epoch": 0.12, + "learning_rate": 4.8146397800348607e-05, + "loss": 2.0516, + "step": 7950 + }, + { + "epoch": 0.12, + "learning_rate": 4.8141790397989286e-05, + "loss": 1.6771, + "step": 7960 + }, + { + "epoch": 0.12, + "learning_rate": 4.813717749755801e-05, + "loss": 1.6609, + "step": 7970 + }, + { + "epoch": 0.12, + "learning_rate": 4.813255910015072e-05, + "loss": 1.9101, + "step": 7980 + }, + { + "epoch": 0.12, + "learning_rate": 4.812793520686467e-05, + "loss": 2.0758, + "step": 7990 + }, + { + "epoch": 0.12, + "learning_rate": 4.8123305818798406e-05, + "loss": 1.9979, + "step": 8000 + }, + { + "epoch": 0.12, + "learning_rate": 4.811867093705178e-05, + "loss": 2.1423, + "step": 8010 + }, + { + "epoch": 0.12, + "learning_rate": 4.811403056272598e-05, + "loss": 2.1008, + "step": 8020 + }, + { + "epoch": 0.12, + "learning_rate": 4.810938469692346e-05, + "loss": 1.7519, + "step": 8030 + }, + { + "epoch": 0.12, + "learning_rate": 4.8104733340747996e-05, + "loss": 1.7586, + "step": 8040 + }, + { + "epoch": 0.12, + "learning_rate": 4.8100076495304655e-05, + "loss": 1.9796, + "step": 8050 + }, + { + "epoch": 0.13, + "learning_rate": 4.8095414161699835e-05, + "loss": 1.8869, + "step": 8060 + }, + { + "epoch": 0.13, + "learning_rate": 4.8090746341041224e-05, + "loss": 1.7599, + "step": 8070 + }, + { + "epoch": 0.13, + "learning_rate": 4.8086073034437805e-05, + "loss": 1.803, + "step": 8080 + }, + { + "epoch": 0.13, + "learning_rate": 4.808139424299987e-05, + "loss": 1.7705, + "step": 8090 + }, + { + "epoch": 0.13, + "learning_rate": 4.807670996783902e-05, + "loss": 2.0492, + "step": 8100 + }, + { + "epoch": 0.13, + "learning_rate": 4.807202021006814e-05, + "loss": 2.061, + "step": 8110 + }, + { + "epoch": 0.13, + "learning_rate": 4.8067324970801454e-05, + "loss": 2.0152, + "step": 8120 + }, + { + "epoch": 0.13, + "learning_rate": 4.806262425115445e-05, + "loss": 1.9784, + "step": 8130 + }, + { + "epoch": 0.13, + "learning_rate": 4.8057918052243944e-05, + "loss": 2.0255, + "step": 8140 + }, + { + "epoch": 0.13, + "learning_rate": 4.8053206375188046e-05, + "loss": 1.9468, + "step": 8150 + }, + { + "epoch": 0.13, + "learning_rate": 4.804848922110615e-05, + "loss": 1.9584, + "step": 8160 + }, + { + "epoch": 0.13, + "learning_rate": 4.804376659111898e-05, + "loss": 1.9546, + "step": 8170 + }, + { + "epoch": 0.13, + "learning_rate": 4.803903848634854e-05, + "loss": 1.9174, + "step": 8180 + }, + { + "epoch": 0.13, + "learning_rate": 4.803430490791815e-05, + "loss": 2.0649, + "step": 8190 + }, + { + "epoch": 0.13, + "learning_rate": 4.802956585695241e-05, + "loss": 2.1623, + "step": 8200 + }, + { + "epoch": 0.13, + "learning_rate": 4.8024821334577254e-05, + "loss": 2.1094, + "step": 8210 + }, + { + "epoch": 0.13, + "learning_rate": 4.8020071341919884e-05, + "loss": 2.0405, + "step": 8220 + }, + { + "epoch": 0.13, + "learning_rate": 4.80153158801088e-05, + "loss": 2.1514, + "step": 8230 + }, + { + "epoch": 0.13, + "learning_rate": 4.8010554950273834e-05, + "loss": 2.1036, + "step": 8240 + }, + { + "epoch": 0.13, + "learning_rate": 4.800578855354608e-05, + "loss": 2.2558, + "step": 8250 + }, + { + "epoch": 0.13, + "learning_rate": 4.800101669105796e-05, + "loss": 2.4583, + "step": 8260 + }, + { + "epoch": 0.13, + "learning_rate": 4.799623936394318e-05, + "loss": 2.1735, + "step": 8270 + }, + { + "epoch": 0.13, + "learning_rate": 4.799145657333675e-05, + "loss": 1.833, + "step": 8280 + }, + { + "epoch": 0.13, + "learning_rate": 4.798666832037497e-05, + "loss": 1.9369, + "step": 8290 + }, + { + "epoch": 0.13, + "learning_rate": 4.7981874606195444e-05, + "loss": 1.9243, + "step": 8300 + }, + { + "epoch": 0.13, + "learning_rate": 4.797707543193706e-05, + "loss": 1.9941, + "step": 8310 + }, + { + "epoch": 0.13, + "learning_rate": 4.797227079874004e-05, + "loss": 2.0482, + "step": 8320 + }, + { + "epoch": 0.13, + "learning_rate": 4.7967460707745855e-05, + "loss": 2.0051, + "step": 8330 + }, + { + "epoch": 0.13, + "learning_rate": 4.796264516009731e-05, + "loss": 1.9693, + "step": 8340 + }, + { + "epoch": 0.13, + "learning_rate": 4.795782415693848e-05, + "loss": 1.9817, + "step": 8350 + }, + { + "epoch": 0.13, + "learning_rate": 4.795299769941476e-05, + "loss": 2.0272, + "step": 8360 + }, + { + "epoch": 0.13, + "learning_rate": 4.7948165788672825e-05, + "loss": 2.2728, + "step": 8370 + }, + { + "epoch": 0.13, + "learning_rate": 4.7943328425860656e-05, + "loss": 2.3472, + "step": 8380 + }, + { + "epoch": 0.13, + "learning_rate": 4.793848561212751e-05, + "loss": 2.1766, + "step": 8390 + }, + { + "epoch": 0.13, + "learning_rate": 4.793363734862396e-05, + "loss": 1.9, + "step": 8400 + }, + { + "epoch": 0.13, + "learning_rate": 4.7928783636501866e-05, + "loss": 1.742, + "step": 8410 + }, + { + "epoch": 0.13, + "learning_rate": 4.792392447691438e-05, + "loss": 2.033, + "step": 8420 + }, + { + "epoch": 0.13, + "learning_rate": 4.791905987101596e-05, + "loss": 1.9269, + "step": 8430 + }, + { + "epoch": 0.13, + "learning_rate": 4.791418981996233e-05, + "loss": 1.7271, + "step": 8440 + }, + { + "epoch": 0.13, + "learning_rate": 4.7909314324910546e-05, + "loss": 1.6478, + "step": 8450 + }, + { + "epoch": 0.13, + "learning_rate": 4.790443338701893e-05, + "loss": 1.8186, + "step": 8460 + }, + { + "epoch": 0.13, + "learning_rate": 4.789954700744709e-05, + "loss": 1.9366, + "step": 8470 + }, + { + "epoch": 0.13, + "learning_rate": 4.789465518735597e-05, + "loss": 1.9426, + "step": 8480 + }, + { + "epoch": 0.13, + "learning_rate": 4.7889757927907754e-05, + "loss": 1.8036, + "step": 8490 + }, + { + "epoch": 0.13, + "learning_rate": 4.788485523026596e-05, + "loss": 1.7854, + "step": 8500 + }, + { + "epoch": 0.13, + "learning_rate": 4.787994709559537e-05, + "loss": 1.8721, + "step": 8510 + }, + { + "epoch": 0.13, + "learning_rate": 4.7875033525062065e-05, + "loss": 2.0534, + "step": 8520 + }, + { + "epoch": 0.13, + "learning_rate": 4.787011451983343e-05, + "loss": 2.1017, + "step": 8530 + }, + { + "epoch": 0.13, + "learning_rate": 4.786519008107813e-05, + "loss": 2.1193, + "step": 8540 + }, + { + "epoch": 0.13, + "learning_rate": 4.786026020996612e-05, + "loss": 1.6915, + "step": 8550 + }, + { + "epoch": 0.13, + "learning_rate": 4.7855324907668656e-05, + "loss": 2.2558, + "step": 8560 + }, + { + "epoch": 0.13, + "learning_rate": 4.7850384175358264e-05, + "loss": 2.15, + "step": 8570 + }, + { + "epoch": 0.13, + "learning_rate": 4.784543801420878e-05, + "loss": 2.1195, + "step": 8580 + }, + { + "epoch": 0.13, + "learning_rate": 4.784048642539533e-05, + "loss": 1.9804, + "step": 8590 + }, + { + "epoch": 0.13, + "learning_rate": 4.78355294100943e-05, + "loss": 2.1879, + "step": 8600 + }, + { + "epoch": 0.13, + "learning_rate": 4.78305669694834e-05, + "loss": 2.1071, + "step": 8610 + }, + { + "epoch": 0.13, + "learning_rate": 4.782559910474163e-05, + "loss": 2.0897, + "step": 8620 + }, + { + "epoch": 0.13, + "learning_rate": 4.782062581704925e-05, + "loss": 1.9715, + "step": 8630 + }, + { + "epoch": 0.13, + "learning_rate": 4.7815647107587826e-05, + "loss": 2.0952, + "step": 8640 + }, + { + "epoch": 0.13, + "learning_rate": 4.781066297754021e-05, + "loss": 2.0414, + "step": 8650 + }, + { + "epoch": 0.13, + "learning_rate": 4.780567342809055e-05, + "loss": 1.9536, + "step": 8660 + }, + { + "epoch": 0.13, + "learning_rate": 4.780067846042425e-05, + "loss": 1.882, + "step": 8670 + }, + { + "epoch": 0.13, + "learning_rate": 4.7795678075728046e-05, + "loss": 2.4317, + "step": 8680 + }, + { + "epoch": 0.13, + "learning_rate": 4.779067227518993e-05, + "loss": 2.2772, + "step": 8690 + }, + { + "epoch": 0.13, + "learning_rate": 4.7785661059999185e-05, + "loss": 2.1378, + "step": 8700 + }, + { + "epoch": 0.14, + "learning_rate": 4.778064443134641e-05, + "loss": 2.3588, + "step": 8710 + }, + { + "epoch": 0.14, + "learning_rate": 4.7775622390423436e-05, + "loss": 2.0895, + "step": 8720 + }, + { + "epoch": 0.14, + "learning_rate": 4.777059493842342e-05, + "loss": 2.2796, + "step": 8730 + }, + { + "epoch": 0.14, + "learning_rate": 4.7765562076540796e-05, + "loss": 2.4353, + "step": 8740 + }, + { + "epoch": 0.14, + "learning_rate": 4.776052380597128e-05, + "loss": 2.3649, + "step": 8750 + }, + { + "epoch": 0.14, + "learning_rate": 4.775548012791188e-05, + "loss": 2.2979, + "step": 8760 + }, + { + "epoch": 0.14, + "learning_rate": 4.7750431043560866e-05, + "loss": 2.2781, + "step": 8770 + }, + { + "epoch": 0.14, + "learning_rate": 4.774537655411783e-05, + "loss": 2.2893, + "step": 8780 + }, + { + "epoch": 0.14, + "learning_rate": 4.7740316660783614e-05, + "loss": 2.1151, + "step": 8790 + }, + { + "epoch": 0.14, + "learning_rate": 4.7735251364760356e-05, + "loss": 2.0411, + "step": 8800 + }, + { + "epoch": 0.14, + "learning_rate": 4.7730180667251496e-05, + "loss": 2.2134, + "step": 8810 + }, + { + "epoch": 0.14, + "learning_rate": 4.772510456946172e-05, + "loss": 2.2249, + "step": 8820 + }, + { + "epoch": 0.14, + "learning_rate": 4.772002307259703e-05, + "loss": 2.0907, + "step": 8830 + }, + { + "epoch": 0.14, + "learning_rate": 4.771493617786469e-05, + "loss": 2.058, + "step": 8840 + }, + { + "epoch": 0.14, + "learning_rate": 4.770984388647327e-05, + "loss": 2.1777, + "step": 8850 + }, + { + "epoch": 0.14, + "learning_rate": 4.770474619963258e-05, + "loss": 2.1117, + "step": 8860 + }, + { + "epoch": 0.14, + "learning_rate": 4.769964311855376e-05, + "loss": 2.1006, + "step": 8870 + }, + { + "epoch": 0.14, + "learning_rate": 4.7694534644449204e-05, + "loss": 2.0163, + "step": 8880 + }, + { + "epoch": 0.14, + "learning_rate": 4.768942077853259e-05, + "loss": 1.9393, + "step": 8890 + }, + { + "epoch": 0.14, + "learning_rate": 4.768430152201888e-05, + "loss": 1.6398, + "step": 8900 + }, + { + "epoch": 0.14, + "learning_rate": 4.767917687612433e-05, + "loss": 2.2575, + "step": 8910 + }, + { + "epoch": 0.14, + "learning_rate": 4.7674046842066445e-05, + "loss": 2.3521, + "step": 8920 + }, + { + "epoch": 0.14, + "learning_rate": 4.7668911421064046e-05, + "loss": 2.3196, + "step": 8930 + }, + { + "epoch": 0.14, + "learning_rate": 4.76637706143372e-05, + "loss": 2.3459, + "step": 8940 + }, + { + "epoch": 0.14, + "learning_rate": 4.765862442310728e-05, + "loss": 1.855, + "step": 8950 + }, + { + "epoch": 0.14, + "learning_rate": 4.765347284859692e-05, + "loss": 1.7455, + "step": 8960 + }, + { + "epoch": 0.14, + "learning_rate": 4.764831589203005e-05, + "loss": 1.9444, + "step": 8970 + }, + { + "epoch": 0.14, + "learning_rate": 4.764315355463187e-05, + "loss": 1.9757, + "step": 8980 + }, + { + "epoch": 0.14, + "learning_rate": 4.763798583762885e-05, + "loss": 1.8883, + "step": 8990 + }, + { + "epoch": 0.14, + "learning_rate": 4.763281274224875e-05, + "loss": 1.8203, + "step": 9000 + }, + { + "epoch": 0.14, + "learning_rate": 4.762763426972061e-05, + "loss": 1.7857, + "step": 9010 + }, + { + "epoch": 0.14, + "learning_rate": 4.762245042127473e-05, + "loss": 1.8144, + "step": 9020 + }, + { + "epoch": 0.14, + "learning_rate": 4.761726119814271e-05, + "loss": 1.762, + "step": 9030 + }, + { + "epoch": 0.14, + "learning_rate": 4.761206660155742e-05, + "loss": 1.7896, + "step": 9040 + }, + { + "epoch": 0.14, + "learning_rate": 4.7606866632752985e-05, + "loss": 1.7461, + "step": 9050 + }, + { + "epoch": 0.14, + "learning_rate": 4.7601661292964826e-05, + "loss": 1.8114, + "step": 9060 + }, + { + "epoch": 0.14, + "learning_rate": 4.759645058342966e-05, + "loss": 2.1743, + "step": 9070 + }, + { + "epoch": 0.14, + "learning_rate": 4.759123450538543e-05, + "loss": 2.0353, + "step": 9080 + }, + { + "epoch": 0.14, + "learning_rate": 4.75860130600714e-05, + "loss": 2.1254, + "step": 9090 + }, + { + "epoch": 0.14, + "learning_rate": 4.7580786248728086e-05, + "loss": 2.0014, + "step": 9100 + }, + { + "epoch": 0.14, + "learning_rate": 4.757555407259728e-05, + "loss": 1.885, + "step": 9110 + }, + { + "epoch": 0.14, + "learning_rate": 4.7570316532922066e-05, + "loss": 1.8056, + "step": 9120 + }, + { + "epoch": 0.14, + "learning_rate": 4.756507363094677e-05, + "loss": 1.7643, + "step": 9130 + }, + { + "epoch": 0.14, + "learning_rate": 4.755982536791702e-05, + "loss": 1.9196, + "step": 9140 + }, + { + "epoch": 0.14, + "learning_rate": 4.755457174507971e-05, + "loss": 2.0162, + "step": 9150 + }, + { + "epoch": 0.14, + "learning_rate": 4.754931276368301e-05, + "loss": 1.953, + "step": 9160 + }, + { + "epoch": 0.14, + "learning_rate": 4.754404842497635e-05, + "loss": 2.2563, + "step": 9170 + }, + { + "epoch": 0.14, + "learning_rate": 4.753877873021044e-05, + "loss": 1.899, + "step": 9180 + }, + { + "epoch": 0.14, + "learning_rate": 4.753350368063728e-05, + "loss": 1.8397, + "step": 9190 + }, + { + "epoch": 0.14, + "learning_rate": 4.752822327751011e-05, + "loss": 2.0235, + "step": 9200 + }, + { + "epoch": 0.14, + "learning_rate": 4.752293752208347e-05, + "loss": 1.8929, + "step": 9210 + }, + { + "epoch": 0.14, + "learning_rate": 4.751764641561315e-05, + "loss": 1.9323, + "step": 9220 + }, + { + "epoch": 0.14, + "learning_rate": 4.751234995935623e-05, + "loss": 1.8821, + "step": 9230 + }, + { + "epoch": 0.14, + "learning_rate": 4.7507048154571054e-05, + "loss": 1.9615, + "step": 9240 + }, + { + "epoch": 0.14, + "learning_rate": 4.7501741002517224e-05, + "loss": 1.9782, + "step": 9250 + }, + { + "epoch": 0.14, + "learning_rate": 4.749642850445564e-05, + "loss": 2.0528, + "step": 9260 + }, + { + "epoch": 0.14, + "learning_rate": 4.749111066164843e-05, + "loss": 1.9991, + "step": 9270 + }, + { + "epoch": 0.14, + "learning_rate": 4.748578747535905e-05, + "loss": 2.023, + "step": 9280 + }, + { + "epoch": 0.14, + "learning_rate": 4.748045894685217e-05, + "loss": 1.9597, + "step": 9290 + }, + { + "epoch": 0.14, + "learning_rate": 4.747512507739376e-05, + "loss": 1.6098, + "step": 9300 + }, + { + "epoch": 0.14, + "learning_rate": 4.746978586825104e-05, + "loss": 1.7034, + "step": 9310 + }, + { + "epoch": 0.14, + "learning_rate": 4.746444132069253e-05, + "loss": 2.0285, + "step": 9320 + }, + { + "epoch": 0.14, + "learning_rate": 4.7459091435987987e-05, + "loss": 1.9661, + "step": 9330 + }, + { + "epoch": 0.14, + "learning_rate": 4.745373621540844e-05, + "loss": 1.6788, + "step": 9340 + }, + { + "epoch": 0.15, + "learning_rate": 4.744837566022621e-05, + "loss": 1.6256, + "step": 9350 + }, + { + "epoch": 0.15, + "learning_rate": 4.744300977171485e-05, + "loss": 1.5972, + "step": 9360 + }, + { + "epoch": 0.15, + "learning_rate": 4.743763855114922e-05, + "loss": 2.0121, + "step": 9370 + }, + { + "epoch": 0.15, + "learning_rate": 4.7432261999805405e-05, + "loss": 2.1146, + "step": 9380 + }, + { + "epoch": 0.15, + "learning_rate": 4.742688011896078e-05, + "loss": 1.9229, + "step": 9390 + }, + { + "epoch": 0.15, + "learning_rate": 4.742149290989399e-05, + "loss": 1.9889, + "step": 9400 + }, + { + "epoch": 0.15, + "learning_rate": 4.741610037388494e-05, + "loss": 1.8988, + "step": 9410 + }, + { + "epoch": 0.15, + "learning_rate": 4.741070251221479e-05, + "loss": 1.9786, + "step": 9420 + }, + { + "epoch": 0.15, + "learning_rate": 4.740529932616598e-05, + "loss": 2.0236, + "step": 9430 + }, + { + "epoch": 0.15, + "learning_rate": 4.739989081702222e-05, + "loss": 1.9261, + "step": 9440 + }, + { + "epoch": 0.15, + "learning_rate": 4.739447698606845e-05, + "loss": 1.9716, + "step": 9450 + }, + { + "epoch": 0.15, + "learning_rate": 4.738905783459091e-05, + "loss": 1.9437, + "step": 9460 + }, + { + "epoch": 0.15, + "learning_rate": 4.73836333638771e-05, + "loss": 2.1043, + "step": 9470 + }, + { + "epoch": 0.15, + "learning_rate": 4.737820357521577e-05, + "loss": 1.7465, + "step": 9480 + }, + { + "epoch": 0.15, + "learning_rate": 4.737276846989694e-05, + "loss": 2.0127, + "step": 9490 + }, + { + "epoch": 0.15, + "learning_rate": 4.7367328049211896e-05, + "loss": 1.6638, + "step": 9500 + }, + { + "epoch": 0.15, + "learning_rate": 4.736188231445318e-05, + "loss": 1.9884, + "step": 9510 + }, + { + "epoch": 0.15, + "learning_rate": 4.73564312669146e-05, + "loss": 1.9697, + "step": 9520 + }, + { + "epoch": 0.15, + "learning_rate": 4.735097490789123e-05, + "loss": 2.0905, + "step": 9530 + }, + { + "epoch": 0.15, + "learning_rate": 4.7345513238679395e-05, + "loss": 2.0008, + "step": 9540 + }, + { + "epoch": 0.15, + "learning_rate": 4.7340046260576706e-05, + "loss": 1.8835, + "step": 9550 + }, + { + "epoch": 0.15, + "learning_rate": 4.7334573974882005e-05, + "loss": 1.8408, + "step": 9560 + }, + { + "epoch": 0.15, + "learning_rate": 4.73290963828954e-05, + "loss": 1.8761, + "step": 9570 + }, + { + "epoch": 0.15, + "learning_rate": 4.732361348591828e-05, + "loss": 2.0504, + "step": 9580 + }, + { + "epoch": 0.15, + "learning_rate": 4.731812528525329e-05, + "loss": 1.8004, + "step": 9590 + }, + { + "epoch": 0.15, + "learning_rate": 4.731263178220432e-05, + "loss": 1.8499, + "step": 9600 + }, + { + "epoch": 0.15, + "learning_rate": 4.730713297807652e-05, + "loss": 1.9923, + "step": 9610 + }, + { + "epoch": 0.15, + "learning_rate": 4.7301628874176306e-05, + "loss": 2.0158, + "step": 9620 + }, + { + "epoch": 0.15, + "learning_rate": 4.729611947181137e-05, + "loss": 1.9316, + "step": 9630 + }, + { + "epoch": 0.15, + "learning_rate": 4.729060477229064e-05, + "loss": 1.9267, + "step": 9640 + }, + { + "epoch": 0.15, + "learning_rate": 4.7285084776924296e-05, + "loss": 1.8112, + "step": 9650 + }, + { + "epoch": 0.15, + "learning_rate": 4.727955948702381e-05, + "loss": 1.7576, + "step": 9660 + }, + { + "epoch": 0.15, + "learning_rate": 4.727402890390187e-05, + "loss": 1.9649, + "step": 9670 + }, + { + "epoch": 0.15, + "learning_rate": 4.726849302887245e-05, + "loss": 2.0302, + "step": 9680 + }, + { + "epoch": 0.15, + "learning_rate": 4.7262951863250786e-05, + "loss": 2.0225, + "step": 9690 + }, + { + "epoch": 0.15, + "learning_rate": 4.725740540835335e-05, + "loss": 2.0515, + "step": 9700 + }, + { + "epoch": 0.15, + "learning_rate": 4.725185366549788e-05, + "loss": 1.8163, + "step": 9710 + }, + { + "epoch": 0.15, + "learning_rate": 4.7246296636003366e-05, + "loss": 2.0532, + "step": 9720 + }, + { + "epoch": 0.15, + "learning_rate": 4.7240734321190065e-05, + "loss": 2.0369, + "step": 9730 + }, + { + "epoch": 0.15, + "learning_rate": 4.723516672237948e-05, + "loss": 2.0018, + "step": 9740 + }, + { + "epoch": 0.15, + "learning_rate": 4.722959384089437e-05, + "loss": 1.9775, + "step": 9750 + }, + { + "epoch": 0.15, + "learning_rate": 4.722401567805875e-05, + "loss": 1.8971, + "step": 9760 + }, + { + "epoch": 0.15, + "learning_rate": 4.7218432235197904e-05, + "loss": 1.8902, + "step": 9770 + }, + { + "epoch": 0.15, + "learning_rate": 4.721284351363834e-05, + "loss": 1.868, + "step": 9780 + }, + { + "epoch": 0.15, + "learning_rate": 4.720724951470784e-05, + "loss": 1.8857, + "step": 9790 + }, + { + "epoch": 0.15, + "learning_rate": 4.720165023973545e-05, + "loss": 1.8486, + "step": 9800 + }, + { + "epoch": 0.15, + "learning_rate": 4.719604569005144e-05, + "loss": 1.7548, + "step": 9810 + }, + { + "epoch": 0.15, + "learning_rate": 4.719043586698736e-05, + "loss": 1.9658, + "step": 9820 + }, + { + "epoch": 0.15, + "learning_rate": 4.7184820771876e-05, + "loss": 1.9502, + "step": 9830 + }, + { + "epoch": 0.15, + "learning_rate": 4.717920040605142e-05, + "loss": 1.9131, + "step": 9840 + }, + { + "epoch": 0.15, + "learning_rate": 4.717357477084889e-05, + "loss": 1.9672, + "step": 9850 + }, + { + "epoch": 0.15, + "learning_rate": 4.716794386760498e-05, + "loss": 1.7682, + "step": 9860 + }, + { + "epoch": 0.15, + "learning_rate": 4.7162307697657474e-05, + "loss": 1.7251, + "step": 9870 + }, + { + "epoch": 0.15, + "learning_rate": 4.7156666262345446e-05, + "loss": 1.9932, + "step": 9880 + }, + { + "epoch": 0.15, + "learning_rate": 4.715101956300918e-05, + "loss": 1.9698, + "step": 9890 + }, + { + "epoch": 0.15, + "learning_rate": 4.714536760099024e-05, + "loss": 1.935, + "step": 9900 + }, + { + "epoch": 0.15, + "learning_rate": 4.713971037763143e-05, + "loss": 1.8971, + "step": 9910 + }, + { + "epoch": 0.15, + "learning_rate": 4.71340478942768e-05, + "loss": 1.8972, + "step": 9920 + }, + { + "epoch": 0.15, + "learning_rate": 4.712838015227166e-05, + "loss": 2.0097, + "step": 9930 + }, + { + "epoch": 0.15, + "learning_rate": 4.712270715296256e-05, + "loss": 1.7283, + "step": 9940 + }, + { + "epoch": 0.15, + "learning_rate": 4.7117028897697294e-05, + "loss": 2.2552, + "step": 9950 + }, + { + "epoch": 0.15, + "learning_rate": 4.711134538782493e-05, + "loss": 2.3688, + "step": 9960 + }, + { + "epoch": 0.15, + "learning_rate": 4.710565662469575e-05, + "loss": 2.5769, + "step": 9970 + }, + { + "epoch": 0.15, + "learning_rate": 4.709996260966131e-05, + "loss": 2.6396, + "step": 9980 + }, + { + "epoch": 0.15, + "learning_rate": 4.709426334407441e-05, + "loss": 2.5805, + "step": 9990 + }, + { + "epoch": 0.16, + "learning_rate": 4.7088558829289084e-05, + "loss": 2.621, + "step": 10000 + }, + { + "epoch": 0.16, + "eval_loss": 1.8739286661148071, + "eval_runtime": 82.2522, + "eval_samples_per_second": 36.473, + "eval_steps_per_second": 4.559, + "step": 10000 + }, + { + "epoch": 0.16, + "learning_rate": 4.708284906666063e-05, + "loss": 2.1815, + "step": 10010 + }, + { + "epoch": 0.16, + "learning_rate": 4.707713405754558e-05, + "loss": 1.765, + "step": 10020 + }, + { + "epoch": 0.16, + "learning_rate": 4.7071413803301714e-05, + "loss": 1.9038, + "step": 10030 + }, + { + "epoch": 0.16, + "learning_rate": 4.706568830528806e-05, + "loss": 1.8567, + "step": 10040 + }, + { + "epoch": 0.16, + "learning_rate": 4.70599575648649e-05, + "loss": 1.857, + "step": 10050 + }, + { + "epoch": 0.16, + "learning_rate": 4.705422158339376e-05, + "loss": 1.8576, + "step": 10060 + }, + { + "epoch": 0.16, + "learning_rate": 4.704848036223739e-05, + "loss": 1.7714, + "step": 10070 + }, + { + "epoch": 0.16, + "learning_rate": 4.704273390275981e-05, + "loss": 1.7014, + "step": 10080 + }, + { + "epoch": 0.16, + "learning_rate": 4.703698220632628e-05, + "loss": 1.8278, + "step": 10090 + }, + { + "epoch": 0.16, + "learning_rate": 4.703122527430328e-05, + "loss": 2.0879, + "step": 10100 + }, + { + "epoch": 0.16, + "learning_rate": 4.702546310805857e-05, + "loss": 1.6694, + "step": 10110 + }, + { + "epoch": 0.16, + "learning_rate": 4.701969570896113e-05, + "loss": 1.739, + "step": 10120 + }, + { + "epoch": 0.16, + "learning_rate": 4.70139230783812e-05, + "loss": 1.7439, + "step": 10130 + }, + { + "epoch": 0.16, + "learning_rate": 4.700814521769025e-05, + "loss": 1.6742, + "step": 10140 + }, + { + "epoch": 0.16, + "learning_rate": 4.700236212826098e-05, + "loss": 1.699, + "step": 10150 + }, + { + "epoch": 0.16, + "learning_rate": 4.699657381146736e-05, + "loss": 1.9494, + "step": 10160 + }, + { + "epoch": 0.16, + "learning_rate": 4.6990780268684586e-05, + "loss": 1.9078, + "step": 10170 + }, + { + "epoch": 0.16, + "learning_rate": 4.698498150128911e-05, + "loss": 1.9222, + "step": 10180 + }, + { + "epoch": 0.16, + "learning_rate": 4.69791775106586e-05, + "loss": 1.9113, + "step": 10190 + }, + { + "epoch": 0.16, + "learning_rate": 4.697336829817199e-05, + "loss": 1.9318, + "step": 10200 + }, + { + "epoch": 0.16, + "learning_rate": 4.696755386520943e-05, + "loss": 1.9121, + "step": 10210 + }, + { + "epoch": 0.16, + "learning_rate": 4.6961734213152346e-05, + "loss": 1.9519, + "step": 10220 + }, + { + "epoch": 0.16, + "learning_rate": 4.695590934338336e-05, + "loss": 1.8991, + "step": 10230 + }, + { + "epoch": 0.16, + "learning_rate": 4.6950079257286385e-05, + "loss": 1.9348, + "step": 10240 + }, + { + "epoch": 0.16, + "learning_rate": 4.694424395624651e-05, + "loss": 1.9006, + "step": 10250 + }, + { + "epoch": 0.16, + "learning_rate": 4.693840344165011e-05, + "loss": 1.9004, + "step": 10260 + }, + { + "epoch": 0.16, + "learning_rate": 4.6932557714884804e-05, + "loss": 1.9497, + "step": 10270 + }, + { + "epoch": 0.16, + "learning_rate": 4.6926706777339404e-05, + "loss": 1.9068, + "step": 10280 + }, + { + "epoch": 0.16, + "learning_rate": 4.6920850630404e-05, + "loss": 1.8316, + "step": 10290 + }, + { + "epoch": 0.16, + "learning_rate": 4.691498927546991e-05, + "loss": 1.966, + "step": 10300 + }, + { + "epoch": 0.16, + "learning_rate": 4.690912271392969e-05, + "loss": 1.9009, + "step": 10310 + }, + { + "epoch": 0.16, + "learning_rate": 4.690325094717711e-05, + "loss": 2.0095, + "step": 10320 + }, + { + "epoch": 0.16, + "learning_rate": 4.6897373976607205e-05, + "loss": 1.9975, + "step": 10330 + }, + { + "epoch": 0.16, + "learning_rate": 4.689149180361625e-05, + "loss": 1.9553, + "step": 10340 + }, + { + "epoch": 0.16, + "learning_rate": 4.688560442960173e-05, + "loss": 1.9482, + "step": 10350 + }, + { + "epoch": 0.16, + "learning_rate": 4.6879711855962377e-05, + "loss": 1.8805, + "step": 10360 + }, + { + "epoch": 0.16, + "learning_rate": 4.6873814084098164e-05, + "loss": 1.8808, + "step": 10370 + }, + { + "epoch": 0.16, + "learning_rate": 4.68679111154103e-05, + "loss": 1.8206, + "step": 10380 + }, + { + "epoch": 0.16, + "learning_rate": 4.6862002951301224e-05, + "loss": 1.9451, + "step": 10390 + }, + { + "epoch": 0.16, + "learning_rate": 4.68560895931746e-05, + "loss": 1.9995, + "step": 10400 + }, + { + "epoch": 0.16, + "learning_rate": 4.685017104243534e-05, + "loss": 1.7784, + "step": 10410 + }, + { + "epoch": 0.16, + "learning_rate": 4.684424730048959e-05, + "loss": 1.8627, + "step": 10420 + }, + { + "epoch": 0.16, + "learning_rate": 4.683831836874472e-05, + "loss": 1.9029, + "step": 10430 + }, + { + "epoch": 0.16, + "learning_rate": 4.683238424860933e-05, + "loss": 1.6917, + "step": 10440 + }, + { + "epoch": 0.16, + "learning_rate": 4.682644494149327e-05, + "loss": 1.6733, + "step": 10450 + }, + { + "epoch": 0.16, + "learning_rate": 4.6820500448807614e-05, + "loss": 1.7877, + "step": 10460 + }, + { + "epoch": 0.16, + "learning_rate": 4.681455077196466e-05, + "loss": 1.7785, + "step": 10470 + }, + { + "epoch": 0.16, + "learning_rate": 4.680859591237794e-05, + "loss": 1.6942, + "step": 10480 + }, + { + "epoch": 0.16, + "learning_rate": 4.680263587146223e-05, + "loss": 1.6603, + "step": 10490 + }, + { + "epoch": 0.16, + "learning_rate": 4.679667065063352e-05, + "loss": 1.8127, + "step": 10500 + }, + { + "epoch": 0.16, + "learning_rate": 4.679070025130905e-05, + "loss": 1.711, + "step": 10510 + }, + { + "epoch": 0.16, + "learning_rate": 4.6784724674907265e-05, + "loss": 1.7378, + "step": 10520 + }, + { + "epoch": 0.16, + "learning_rate": 4.677874392284787e-05, + "loss": 1.8696, + "step": 10530 + }, + { + "epoch": 0.16, + "learning_rate": 4.677275799655177e-05, + "loss": 1.7392, + "step": 10540 + }, + { + "epoch": 0.16, + "learning_rate": 4.676676689744112e-05, + "loss": 1.6893, + "step": 10550 + }, + { + "epoch": 0.16, + "learning_rate": 4.6760770626939296e-05, + "loss": 1.8008, + "step": 10560 + }, + { + "epoch": 0.16, + "learning_rate": 4.6754769186470905e-05, + "loss": 1.8705, + "step": 10570 + }, + { + "epoch": 0.16, + "learning_rate": 4.674876257746178e-05, + "loss": 1.8482, + "step": 10580 + }, + { + "epoch": 0.16, + "learning_rate": 4.6742750801338975e-05, + "loss": 1.8464, + "step": 10590 + }, + { + "epoch": 0.16, + "learning_rate": 4.6736733859530796e-05, + "loss": 1.7576, + "step": 10600 + }, + { + "epoch": 0.16, + "learning_rate": 4.673071175346675e-05, + "loss": 1.6226, + "step": 10610 + }, + { + "epoch": 0.16, + "learning_rate": 4.6724684484577574e-05, + "loss": 1.5831, + "step": 10620 + }, + { + "epoch": 0.16, + "learning_rate": 4.671865205429525e-05, + "loss": 1.8734, + "step": 10630 + }, + { + "epoch": 0.17, + "learning_rate": 4.671261446405297e-05, + "loss": 1.8517, + "step": 10640 + }, + { + "epoch": 0.17, + "learning_rate": 4.670657171528516e-05, + "loss": 2.0043, + "step": 10650 + }, + { + "epoch": 0.17, + "learning_rate": 4.6700523809427463e-05, + "loss": 1.9505, + "step": 10660 + }, + { + "epoch": 0.17, + "learning_rate": 4.669447074791676e-05, + "loss": 1.8915, + "step": 10670 + }, + { + "epoch": 0.17, + "learning_rate": 4.668841253219114e-05, + "loss": 1.8408, + "step": 10680 + }, + { + "epoch": 0.17, + "learning_rate": 4.668234916368992e-05, + "loss": 1.9041, + "step": 10690 + }, + { + "epoch": 0.17, + "learning_rate": 4.667628064385367e-05, + "loss": 1.8687, + "step": 10700 + }, + { + "epoch": 0.17, + "learning_rate": 4.6670206974124145e-05, + "loss": 1.9502, + "step": 10710 + }, + { + "epoch": 0.17, + "learning_rate": 4.666412815594434e-05, + "loss": 1.7661, + "step": 10720 + }, + { + "epoch": 0.17, + "learning_rate": 4.6658044190758474e-05, + "loss": 1.6825, + "step": 10730 + }, + { + "epoch": 0.17, + "learning_rate": 4.665195508001198e-05, + "loss": 1.9132, + "step": 10740 + }, + { + "epoch": 0.17, + "learning_rate": 4.6645860825151536e-05, + "loss": 1.9045, + "step": 10750 + }, + { + "epoch": 0.17, + "learning_rate": 4.6639761427625024e-05, + "loss": 1.8062, + "step": 10760 + }, + { + "epoch": 0.17, + "learning_rate": 4.663365688888154e-05, + "loss": 1.9197, + "step": 10770 + }, + { + "epoch": 0.17, + "learning_rate": 4.662754721037142e-05, + "loss": 1.9311, + "step": 10780 + }, + { + "epoch": 0.17, + "learning_rate": 4.662143239354621e-05, + "loss": 2.028, + "step": 10790 + }, + { + "epoch": 0.17, + "learning_rate": 4.661531243985869e-05, + "loss": 1.8718, + "step": 10800 + }, + { + "epoch": 0.17, + "learning_rate": 4.660918735076284e-05, + "loss": 1.8887, + "step": 10810 + }, + { + "epoch": 0.17, + "learning_rate": 4.660305712771387e-05, + "loss": 1.866, + "step": 10820 + }, + { + "epoch": 0.17, + "learning_rate": 4.659692177216821e-05, + "loss": 1.9326, + "step": 10830 + }, + { + "epoch": 0.17, + "learning_rate": 4.659078128558352e-05, + "loss": 1.8323, + "step": 10840 + }, + { + "epoch": 0.17, + "learning_rate": 4.6584635669418666e-05, + "loss": 1.7588, + "step": 10850 + }, + { + "epoch": 0.17, + "learning_rate": 4.6578484925133726e-05, + "loss": 1.7223, + "step": 10860 + }, + { + "epoch": 0.17, + "learning_rate": 4.657232905419001e-05, + "loss": 1.8745, + "step": 10870 + }, + { + "epoch": 0.17, + "learning_rate": 4.6566168058050055e-05, + "loss": 1.8508, + "step": 10880 + }, + { + "epoch": 0.17, + "learning_rate": 4.656000193817758e-05, + "loss": 1.9472, + "step": 10890 + }, + { + "epoch": 0.17, + "learning_rate": 4.655383069603756e-05, + "loss": 1.6377, + "step": 10900 + }, + { + "epoch": 0.17, + "learning_rate": 4.654765433309616e-05, + "loss": 1.5515, + "step": 10910 + }, + { + "epoch": 0.17, + "learning_rate": 4.654147285082079e-05, + "loss": 1.5038, + "step": 10920 + }, + { + "epoch": 0.17, + "learning_rate": 4.653528625068004e-05, + "loss": 1.5037, + "step": 10930 + }, + { + "epoch": 0.17, + "learning_rate": 4.6529094534143745e-05, + "loss": 1.8102, + "step": 10940 + }, + { + "epoch": 0.17, + "learning_rate": 4.652289770268294e-05, + "loss": 1.8538, + "step": 10950 + }, + { + "epoch": 0.17, + "learning_rate": 4.651669575776988e-05, + "loss": 1.6151, + "step": 10960 + }, + { + "epoch": 0.17, + "learning_rate": 4.6510488700878044e-05, + "loss": 1.5582, + "step": 10970 + }, + { + "epoch": 0.17, + "learning_rate": 4.650427653348211e-05, + "loss": 1.6568, + "step": 10980 + }, + { + "epoch": 0.17, + "learning_rate": 4.649805925705797e-05, + "loss": 1.8329, + "step": 10990 + }, + { + "epoch": 0.17, + "learning_rate": 4.649183687308275e-05, + "loss": 1.7992, + "step": 11000 + }, + { + "epoch": 0.17, + "learning_rate": 4.648560938303477e-05, + "loss": 1.8048, + "step": 11010 + }, + { + "epoch": 0.17, + "learning_rate": 4.647937678839356e-05, + "loss": 1.8764, + "step": 11020 + }, + { + "epoch": 0.17, + "learning_rate": 4.647313909063989e-05, + "loss": 1.9041, + "step": 11030 + }, + { + "epoch": 0.17, + "learning_rate": 4.6466896291255724e-05, + "loss": 1.8926, + "step": 11040 + }, + { + "epoch": 0.17, + "learning_rate": 4.6460648391724215e-05, + "loss": 1.7009, + "step": 11050 + }, + { + "epoch": 0.17, + "learning_rate": 4.645439539352978e-05, + "loss": 1.8641, + "step": 11060 + }, + { + "epoch": 0.17, + "learning_rate": 4.6448137298158e-05, + "loss": 1.8535, + "step": 11070 + }, + { + "epoch": 0.17, + "learning_rate": 4.6441874107095684e-05, + "loss": 1.7434, + "step": 11080 + }, + { + "epoch": 0.17, + "learning_rate": 4.643560582183087e-05, + "loss": 1.9244, + "step": 11090 + }, + { + "epoch": 0.17, + "learning_rate": 4.642933244385277e-05, + "loss": 1.9989, + "step": 11100 + }, + { + "epoch": 0.17, + "learning_rate": 4.642305397465185e-05, + "loss": 1.9856, + "step": 11110 + }, + { + "epoch": 0.17, + "learning_rate": 4.6416770415719736e-05, + "loss": 1.8266, + "step": 11120 + }, + { + "epoch": 0.17, + "learning_rate": 4.6410481768549294e-05, + "loss": 2.0761, + "step": 11130 + }, + { + "epoch": 0.17, + "learning_rate": 4.640418803463461e-05, + "loss": 2.0146, + "step": 11140 + }, + { + "epoch": 0.17, + "learning_rate": 4.639788921547093e-05, + "loss": 1.9131, + "step": 11150 + }, + { + "epoch": 0.17, + "learning_rate": 4.639158531255478e-05, + "loss": 1.9705, + "step": 11160 + }, + { + "epoch": 0.17, + "learning_rate": 4.638527632738382e-05, + "loss": 1.9331, + "step": 11170 + }, + { + "epoch": 0.17, + "learning_rate": 4.6378962261456974e-05, + "loss": 1.8699, + "step": 11180 + }, + { + "epoch": 0.17, + "learning_rate": 4.6372643116274335e-05, + "loss": 1.8694, + "step": 11190 + }, + { + "epoch": 0.17, + "learning_rate": 4.6366318893337216e-05, + "loss": 1.8108, + "step": 11200 + }, + { + "epoch": 0.17, + "learning_rate": 4.635998959414816e-05, + "loss": 1.6571, + "step": 11210 + }, + { + "epoch": 0.17, + "learning_rate": 4.635365522021086e-05, + "loss": 1.5681, + "step": 11220 + }, + { + "epoch": 0.17, + "learning_rate": 4.634731577303028e-05, + "loss": 1.7951, + "step": 11230 + }, + { + "epoch": 0.17, + "learning_rate": 4.6340971254112544e-05, + "loss": 1.8145, + "step": 11240 + }, + { + "epoch": 0.17, + "learning_rate": 4.6334621664965e-05, + "loss": 1.6982, + "step": 11250 + }, + { + "epoch": 0.17, + "learning_rate": 4.6328267007096196e-05, + "loss": 1.7583, + "step": 11260 + }, + { + "epoch": 0.17, + "learning_rate": 4.632190728201587e-05, + "loss": 1.9065, + "step": 11270 + }, + { + "epoch": 0.18, + "learning_rate": 4.6315542491235005e-05, + "loss": 1.9994, + "step": 11280 + }, + { + "epoch": 0.18, + "learning_rate": 4.630917263626574e-05, + "loss": 1.8391, + "step": 11290 + }, + { + "epoch": 0.18, + "learning_rate": 4.6302797718621436e-05, + "loss": 2.2777, + "step": 11300 + }, + { + "epoch": 0.18, + "learning_rate": 4.6296417739816675e-05, + "loss": 2.056, + "step": 11310 + }, + { + "epoch": 0.18, + "learning_rate": 4.629003270136721e-05, + "loss": 1.6578, + "step": 11320 + }, + { + "epoch": 0.18, + "learning_rate": 4.6283642604790015e-05, + "loss": 1.9703, + "step": 11330 + }, + { + "epoch": 0.18, + "learning_rate": 4.6277247451603266e-05, + "loss": 1.9719, + "step": 11340 + }, + { + "epoch": 0.18, + "learning_rate": 4.6270847243326334e-05, + "loss": 1.9471, + "step": 11350 + }, + { + "epoch": 0.18, + "learning_rate": 4.6264441981479786e-05, + "loss": 2.0095, + "step": 11360 + }, + { + "epoch": 0.18, + "learning_rate": 4.625803166758541e-05, + "loss": 1.9687, + "step": 11370 + }, + { + "epoch": 0.18, + "learning_rate": 4.625161630316617e-05, + "loss": 1.9939, + "step": 11380 + }, + { + "epoch": 0.18, + "learning_rate": 4.624519588974624e-05, + "loss": 1.9446, + "step": 11390 + }, + { + "epoch": 0.18, + "learning_rate": 4.6238770428851e-05, + "loss": 1.9293, + "step": 11400 + }, + { + "epoch": 0.18, + "learning_rate": 4.6232339922007026e-05, + "loss": 1.8977, + "step": 11410 + }, + { + "epoch": 0.18, + "learning_rate": 4.622590437074209e-05, + "loss": 1.8549, + "step": 11420 + }, + { + "epoch": 0.18, + "learning_rate": 4.6219463776585155e-05, + "loss": 1.8216, + "step": 11430 + }, + { + "epoch": 0.18, + "learning_rate": 4.6213018141066394e-05, + "loss": 1.8008, + "step": 11440 + }, + { + "epoch": 0.18, + "learning_rate": 4.6206567465717175e-05, + "loss": 1.9247, + "step": 11450 + }, + { + "epoch": 0.18, + "learning_rate": 4.6200111752070066e-05, + "loss": 2.4714, + "step": 11460 + }, + { + "epoch": 0.18, + "learning_rate": 4.619365100165882e-05, + "loss": 1.939, + "step": 11470 + }, + { + "epoch": 0.18, + "learning_rate": 4.618718521601839e-05, + "loss": 1.9736, + "step": 11480 + }, + { + "epoch": 0.18, + "learning_rate": 4.618071439668495e-05, + "loss": 1.9322, + "step": 11490 + }, + { + "epoch": 0.18, + "learning_rate": 4.617423854519583e-05, + "loss": 1.7523, + "step": 11500 + }, + { + "epoch": 0.18, + "learning_rate": 4.6167757663089596e-05, + "loss": 1.7063, + "step": 11510 + }, + { + "epoch": 0.18, + "learning_rate": 4.616127175190598e-05, + "loss": 1.847, + "step": 11520 + }, + { + "epoch": 0.18, + "learning_rate": 4.6154780813185896e-05, + "loss": 1.7941, + "step": 11530 + }, + { + "epoch": 0.18, + "learning_rate": 4.6148284848471503e-05, + "loss": 1.7386, + "step": 11540 + }, + { + "epoch": 0.18, + "learning_rate": 4.614178385930611e-05, + "loss": 1.8555, + "step": 11550 + }, + { + "epoch": 0.18, + "learning_rate": 4.613527784723424e-05, + "loss": 1.9049, + "step": 11560 + }, + { + "epoch": 0.18, + "learning_rate": 4.612876681380161e-05, + "loss": 1.9095, + "step": 11570 + }, + { + "epoch": 0.18, + "learning_rate": 4.6122250760555105e-05, + "loss": 1.9012, + "step": 11580 + }, + { + "epoch": 0.18, + "learning_rate": 4.611572968904284e-05, + "loss": 1.9786, + "step": 11590 + }, + { + "epoch": 0.18, + "learning_rate": 4.6109203600814094e-05, + "loss": 1.8501, + "step": 11600 + }, + { + "epoch": 0.18, + "learning_rate": 4.610267249741935e-05, + "loss": 2.006, + "step": 11610 + }, + { + "epoch": 0.18, + "learning_rate": 4.6096136380410295e-05, + "loss": 1.8907, + "step": 11620 + }, + { + "epoch": 0.18, + "learning_rate": 4.608959525133977e-05, + "loss": 1.7982, + "step": 11630 + }, + { + "epoch": 0.18, + "learning_rate": 4.6083049111761837e-05, + "loss": 1.7995, + "step": 11640 + }, + { + "epoch": 0.18, + "learning_rate": 4.607649796323175e-05, + "loss": 1.8037, + "step": 11650 + }, + { + "epoch": 0.18, + "learning_rate": 4.6069941807305926e-05, + "loss": 2.2565, + "step": 11660 + }, + { + "epoch": 0.18, + "learning_rate": 4.6063380645542005e-05, + "loss": 1.8256, + "step": 11670 + }, + { + "epoch": 0.18, + "learning_rate": 4.6056814479498786e-05, + "loss": 1.8434, + "step": 11680 + }, + { + "epoch": 0.18, + "learning_rate": 4.60502433107363e-05, + "loss": 1.8675, + "step": 11690 + }, + { + "epoch": 0.18, + "learning_rate": 4.604366714081571e-05, + "loss": 1.8115, + "step": 11700 + }, + { + "epoch": 0.18, + "learning_rate": 4.60370859712994e-05, + "loss": 1.7835, + "step": 11710 + }, + { + "epoch": 0.18, + "learning_rate": 4.6030499803750936e-05, + "loss": 1.7173, + "step": 11720 + }, + { + "epoch": 0.18, + "learning_rate": 4.602390863973509e-05, + "loss": 1.8121, + "step": 11730 + }, + { + "epoch": 0.18, + "learning_rate": 4.601731248081779e-05, + "loss": 1.9854, + "step": 11740 + }, + { + "epoch": 0.18, + "learning_rate": 4.601071132856616e-05, + "loss": 1.8921, + "step": 11750 + }, + { + "epoch": 0.18, + "learning_rate": 4.600410518454853e-05, + "loss": 1.8271, + "step": 11760 + }, + { + "epoch": 0.18, + "learning_rate": 4.599749405033438e-05, + "loss": 2.0631, + "step": 11770 + }, + { + "epoch": 0.18, + "learning_rate": 4.5990877927494414e-05, + "loss": 1.9179, + "step": 11780 + }, + { + "epoch": 0.18, + "learning_rate": 4.5984256817600496e-05, + "loss": 1.9427, + "step": 11790 + }, + { + "epoch": 0.18, + "learning_rate": 4.5977630722225685e-05, + "loss": 1.8756, + "step": 11800 + }, + { + "epoch": 0.18, + "learning_rate": 4.597099964294422e-05, + "loss": 1.8167, + "step": 11810 + }, + { + "epoch": 0.18, + "learning_rate": 4.596436358133153e-05, + "loss": 1.7602, + "step": 11820 + }, + { + "epoch": 0.18, + "learning_rate": 4.595772253896421e-05, + "loss": 1.7515, + "step": 11830 + }, + { + "epoch": 0.18, + "learning_rate": 4.5951076517420066e-05, + "loss": 1.6922, + "step": 11840 + }, + { + "epoch": 0.18, + "learning_rate": 4.594442551827807e-05, + "loss": 1.6731, + "step": 11850 + }, + { + "epoch": 0.18, + "learning_rate": 4.5937769543118374e-05, + "loss": 1.7259, + "step": 11860 + }, + { + "epoch": 0.18, + "learning_rate": 4.593110859352232e-05, + "loss": 1.6148, + "step": 11870 + }, + { + "epoch": 0.18, + "learning_rate": 4.5924442671072435e-05, + "loss": 1.6675, + "step": 11880 + }, + { + "epoch": 0.18, + "learning_rate": 4.591777177735242e-05, + "loss": 1.8021, + "step": 11890 + }, + { + "epoch": 0.18, + "learning_rate": 4.5911095913947155e-05, + "loss": 1.8148, + "step": 11900 + }, + { + "epoch": 0.18, + "learning_rate": 4.59044150824427e-05, + "loss": 1.6678, + "step": 11910 + }, + { + "epoch": 0.18, + "learning_rate": 4.589772928442632e-05, + "loss": 1.6347, + "step": 11920 + }, + { + "epoch": 0.19, + "learning_rate": 4.589103852148641e-05, + "loss": 1.9682, + "step": 11930 + }, + { + "epoch": 0.19, + "learning_rate": 4.5884342795212595e-05, + "loss": 1.764, + "step": 11940 + }, + { + "epoch": 0.19, + "learning_rate": 4.587764210719566e-05, + "loss": 1.7597, + "step": 11950 + }, + { + "epoch": 0.19, + "learning_rate": 4.5870936459027564e-05, + "loss": 1.7164, + "step": 11960 + }, + { + "epoch": 0.19, + "learning_rate": 4.586422585230145e-05, + "loss": 2.3256, + "step": 11970 + }, + { + "epoch": 0.19, + "learning_rate": 4.585751028861162e-05, + "loss": 2.0235, + "step": 11980 + }, + { + "epoch": 0.19, + "learning_rate": 4.585078976955359e-05, + "loss": 1.9836, + "step": 11990 + }, + { + "epoch": 0.19, + "learning_rate": 4.584406429672403e-05, + "loss": 1.941, + "step": 12000 + }, + { + "epoch": 0.19, + "learning_rate": 4.5837333871720786e-05, + "loss": 1.9123, + "step": 12010 + }, + { + "epoch": 0.19, + "learning_rate": 4.58305984961429e-05, + "loss": 1.8873, + "step": 12020 + }, + { + "epoch": 0.19, + "learning_rate": 4.582385817159055e-05, + "loss": 1.8653, + "step": 12030 + }, + { + "epoch": 0.19, + "learning_rate": 4.581711289966514e-05, + "loss": 1.7987, + "step": 12040 + }, + { + "epoch": 0.19, + "learning_rate": 4.581036268196921e-05, + "loss": 1.7142, + "step": 12050 + }, + { + "epoch": 0.19, + "learning_rate": 4.580360752010651e-05, + "loss": 1.6927, + "step": 12060 + }, + { + "epoch": 0.19, + "learning_rate": 4.579684741568191e-05, + "loss": 1.683, + "step": 12070 + }, + { + "epoch": 0.19, + "learning_rate": 4.579008237030152e-05, + "loss": 1.6997, + "step": 12080 + }, + { + "epoch": 0.19, + "learning_rate": 4.5783312385572574e-05, + "loss": 1.84, + "step": 12090 + }, + { + "epoch": 0.19, + "learning_rate": 4.5776537463103506e-05, + "loss": 1.9425, + "step": 12100 + }, + { + "epoch": 0.19, + "learning_rate": 4.576975760450393e-05, + "loss": 1.8408, + "step": 12110 + }, + { + "epoch": 0.19, + "learning_rate": 4.576297281138459e-05, + "loss": 1.7698, + "step": 12120 + }, + { + "epoch": 0.19, + "learning_rate": 4.575618308535744e-05, + "loss": 1.9723, + "step": 12130 + }, + { + "epoch": 0.19, + "learning_rate": 4.5749388428035604e-05, + "loss": 1.8772, + "step": 12140 + }, + { + "epoch": 0.19, + "learning_rate": 4.574258884103337e-05, + "loss": 1.8779, + "step": 12150 + }, + { + "epoch": 0.19, + "learning_rate": 4.573578432596619e-05, + "loss": 2.0864, + "step": 12160 + }, + { + "epoch": 0.19, + "learning_rate": 4.57289748844507e-05, + "loss": 2.0642, + "step": 12170 + }, + { + "epoch": 0.19, + "learning_rate": 4.5722160518104695e-05, + "loss": 1.7313, + "step": 12180 + }, + { + "epoch": 0.19, + "learning_rate": 4.5715341228547146e-05, + "loss": 1.6198, + "step": 12190 + }, + { + "epoch": 0.19, + "learning_rate": 4.5708517017398203e-05, + "loss": 1.5874, + "step": 12200 + }, + { + "epoch": 0.19, + "learning_rate": 4.570168788627916e-05, + "loss": 1.9228, + "step": 12210 + }, + { + "epoch": 0.19, + "learning_rate": 4.569485383681251e-05, + "loss": 1.9219, + "step": 12220 + }, + { + "epoch": 0.19, + "learning_rate": 4.568801487062189e-05, + "loss": 1.8953, + "step": 12230 + }, + { + "epoch": 0.19, + "learning_rate": 4.568117098933212e-05, + "loss": 1.8927, + "step": 12240 + }, + { + "epoch": 0.19, + "learning_rate": 4.567432219456918e-05, + "loss": 1.8993, + "step": 12250 + }, + { + "epoch": 0.19, + "learning_rate": 4.566746848796022e-05, + "loss": 1.8572, + "step": 12260 + }, + { + "epoch": 0.19, + "learning_rate": 4.566060987113356e-05, + "loss": 1.8601, + "step": 12270 + }, + { + "epoch": 0.19, + "learning_rate": 4.565374634571868e-05, + "loss": 1.9086, + "step": 12280 + }, + { + "epoch": 0.19, + "learning_rate": 4.564687791334623e-05, + "loss": 1.9078, + "step": 12290 + }, + { + "epoch": 0.19, + "learning_rate": 4.564000457564803e-05, + "loss": 1.9682, + "step": 12300 + }, + { + "epoch": 0.19, + "learning_rate": 4.5633126334257056e-05, + "loss": 1.9442, + "step": 12310 + }, + { + "epoch": 0.19, + "learning_rate": 4.562624319080746e-05, + "loss": 1.7455, + "step": 12320 + }, + { + "epoch": 0.19, + "learning_rate": 4.561935514693455e-05, + "loss": 1.5605, + "step": 12330 + }, + { + "epoch": 0.19, + "learning_rate": 4.5612462204274796e-05, + "loss": 1.9678, + "step": 12340 + }, + { + "epoch": 0.19, + "learning_rate": 4.560556436446584e-05, + "loss": 1.4714, + "step": 12350 + }, + { + "epoch": 0.19, + "learning_rate": 4.559866162914649e-05, + "loss": 1.4454, + "step": 12360 + }, + { + "epoch": 0.19, + "learning_rate": 4.55917539999567e-05, + "loss": 1.9687, + "step": 12370 + }, + { + "epoch": 0.19, + "learning_rate": 4.558484147853761e-05, + "loss": 1.7806, + "step": 12380 + }, + { + "epoch": 0.19, + "learning_rate": 4.5577924066531506e-05, + "loss": 1.9462, + "step": 12390 + }, + { + "epoch": 0.19, + "learning_rate": 4.5571001765581836e-05, + "loss": 2.0398, + "step": 12400 + }, + { + "epoch": 0.19, + "learning_rate": 4.556407457733322e-05, + "loss": 1.9248, + "step": 12410 + }, + { + "epoch": 0.19, + "learning_rate": 4.555714250343143e-05, + "loss": 1.9582, + "step": 12420 + }, + { + "epoch": 0.19, + "learning_rate": 4.55502055455234e-05, + "loss": 1.9771, + "step": 12430 + }, + { + "epoch": 0.19, + "learning_rate": 4.554326370525723e-05, + "loss": 1.9089, + "step": 12440 + }, + { + "epoch": 0.19, + "learning_rate": 4.5536316984282165e-05, + "loss": 1.7116, + "step": 12450 + }, + { + "epoch": 0.19, + "learning_rate": 4.5529365384248645e-05, + "loss": 1.7041, + "step": 12460 + }, + { + "epoch": 0.19, + "learning_rate": 4.552240890680822e-05, + "loss": 1.804, + "step": 12470 + }, + { + "epoch": 0.19, + "learning_rate": 4.551544755361363e-05, + "loss": 2.0004, + "step": 12480 + }, + { + "epoch": 0.19, + "learning_rate": 4.550848132631877e-05, + "loss": 1.9521, + "step": 12490 + }, + { + "epoch": 0.19, + "learning_rate": 4.55015102265787e-05, + "loss": 1.8869, + "step": 12500 + }, + { + "epoch": 0.19, + "learning_rate": 4.549453425604961e-05, + "loss": 1.9398, + "step": 12510 + }, + { + "epoch": 0.19, + "learning_rate": 4.548755341638888e-05, + "loss": 1.9281, + "step": 12520 + }, + { + "epoch": 0.19, + "learning_rate": 4.548056770925502e-05, + "loss": 1.932, + "step": 12530 + }, + { + "epoch": 0.19, + "learning_rate": 4.54735771363077e-05, + "loss": 1.8425, + "step": 12540 + }, + { + "epoch": 0.19, + "learning_rate": 4.546658169920778e-05, + "loss": 1.7888, + "step": 12550 + }, + { + "epoch": 0.19, + "learning_rate": 4.545958139961724e-05, + "loss": 1.6667, + "step": 12560 + }, + { + "epoch": 0.2, + "learning_rate": 4.5452576239199204e-05, + "loss": 1.6906, + "step": 12570 + }, + { + "epoch": 0.2, + "learning_rate": 4.544556621961799e-05, + "loss": 1.8205, + "step": 12580 + }, + { + "epoch": 0.2, + "learning_rate": 4.543855134253906e-05, + "loss": 1.8937, + "step": 12590 + }, + { + "epoch": 0.2, + "learning_rate": 4.543153160962901e-05, + "loss": 1.8233, + "step": 12600 + }, + { + "epoch": 0.2, + "learning_rate": 4.54245070225556e-05, + "loss": 1.913, + "step": 12610 + }, + { + "epoch": 0.2, + "learning_rate": 4.5417477582987747e-05, + "loss": 1.7161, + "step": 12620 + }, + { + "epoch": 0.2, + "learning_rate": 4.541044329259553e-05, + "loss": 1.6269, + "step": 12630 + }, + { + "epoch": 0.2, + "learning_rate": 4.540340415305016e-05, + "loss": 1.8336, + "step": 12640 + }, + { + "epoch": 0.2, + "learning_rate": 4.5396360166024e-05, + "loss": 1.9987, + "step": 12650 + }, + { + "epoch": 0.2, + "learning_rate": 4.53893113331906e-05, + "loss": 1.9546, + "step": 12660 + }, + { + "epoch": 0.2, + "learning_rate": 4.538225765622462e-05, + "loss": 1.9335, + "step": 12670 + }, + { + "epoch": 0.2, + "learning_rate": 4.537519913680187e-05, + "loss": 1.8625, + "step": 12680 + }, + { + "epoch": 0.2, + "learning_rate": 4.5368135776599365e-05, + "loss": 1.8887, + "step": 12690 + }, + { + "epoch": 0.2, + "learning_rate": 4.5361067577295195e-05, + "loss": 1.9487, + "step": 12700 + }, + { + "epoch": 0.2, + "learning_rate": 4.535399454056866e-05, + "loss": 1.9788, + "step": 12710 + }, + { + "epoch": 0.2, + "learning_rate": 4.534691666810017e-05, + "loss": 2.1135, + "step": 12720 + }, + { + "epoch": 0.2, + "learning_rate": 4.5339833961571324e-05, + "loss": 2.1068, + "step": 12730 + }, + { + "epoch": 0.2, + "learning_rate": 4.533274642266482e-05, + "loss": 2.1422, + "step": 12740 + }, + { + "epoch": 0.2, + "learning_rate": 4.532565405306454e-05, + "loss": 1.9906, + "step": 12750 + }, + { + "epoch": 0.2, + "learning_rate": 4.531855685445549e-05, + "loss": 2.0665, + "step": 12760 + }, + { + "epoch": 0.2, + "learning_rate": 4.531145482852387e-05, + "loss": 1.5896, + "step": 12770 + }, + { + "epoch": 0.2, + "learning_rate": 4.5304347976956954e-05, + "loss": 1.4532, + "step": 12780 + }, + { + "epoch": 0.2, + "learning_rate": 4.529723630144323e-05, + "loss": 1.7132, + "step": 12790 + }, + { + "epoch": 0.2, + "learning_rate": 4.5290119803672295e-05, + "loss": 1.5862, + "step": 12800 + }, + { + "epoch": 0.2, + "learning_rate": 4.528299848533489e-05, + "loss": 1.8516, + "step": 12810 + }, + { + "epoch": 0.2, + "learning_rate": 4.527587234812292e-05, + "loss": 2.0298, + "step": 12820 + }, + { + "epoch": 0.2, + "learning_rate": 4.526874139372943e-05, + "loss": 1.9277, + "step": 12830 + }, + { + "epoch": 0.2, + "learning_rate": 4.526160562384861e-05, + "loss": 1.9302, + "step": 12840 + }, + { + "epoch": 0.2, + "learning_rate": 4.525446504017578e-05, + "loss": 2.0459, + "step": 12850 + }, + { + "epoch": 0.2, + "learning_rate": 4.5247319644407415e-05, + "loss": 1.8374, + "step": 12860 + }, + { + "epoch": 0.2, + "learning_rate": 4.524016943824113e-05, + "loss": 1.8198, + "step": 12870 + }, + { + "epoch": 0.2, + "learning_rate": 4.5233014423375694e-05, + "loss": 1.9469, + "step": 12880 + }, + { + "epoch": 0.2, + "learning_rate": 4.5225854601511006e-05, + "loss": 1.874, + "step": 12890 + }, + { + "epoch": 0.2, + "learning_rate": 4.521868997434811e-05, + "loss": 1.8618, + "step": 12900 + }, + { + "epoch": 0.2, + "learning_rate": 4.5211520543589195e-05, + "loss": 1.9154, + "step": 12910 + }, + { + "epoch": 0.2, + "learning_rate": 4.520434631093757e-05, + "loss": 1.9372, + "step": 12920 + }, + { + "epoch": 0.2, + "learning_rate": 4.519716727809773e-05, + "loss": 1.7493, + "step": 12930 + }, + { + "epoch": 0.2, + "learning_rate": 4.518998344677527e-05, + "loss": 1.8592, + "step": 12940 + }, + { + "epoch": 0.2, + "learning_rate": 4.518279481867695e-05, + "loss": 1.9, + "step": 12950 + }, + { + "epoch": 0.2, + "learning_rate": 4.5175601395510635e-05, + "loss": 1.7847, + "step": 12960 + }, + { + "epoch": 0.2, + "learning_rate": 4.5168403178985374e-05, + "loss": 1.6724, + "step": 12970 + }, + { + "epoch": 0.2, + "learning_rate": 4.5161200170811326e-05, + "loss": 1.6806, + "step": 12980 + }, + { + "epoch": 0.2, + "learning_rate": 4.5153992372699796e-05, + "loss": 1.9181, + "step": 12990 + }, + { + "epoch": 0.2, + "learning_rate": 4.5146779786363225e-05, + "loss": 2.0027, + "step": 13000 + }, + { + "epoch": 0.2, + "learning_rate": 4.513956241351519e-05, + "loss": 1.9227, + "step": 13010 + }, + { + "epoch": 0.2, + "learning_rate": 4.513234025587042e-05, + "loss": 1.8939, + "step": 13020 + }, + { + "epoch": 0.2, + "learning_rate": 4.512511331514477e-05, + "loss": 1.8229, + "step": 13030 + }, + { + "epoch": 0.2, + "learning_rate": 4.511788159305521e-05, + "loss": 1.7994, + "step": 13040 + }, + { + "epoch": 0.2, + "learning_rate": 4.5110645091319894e-05, + "loss": 1.7008, + "step": 13050 + }, + { + "epoch": 0.2, + "learning_rate": 4.510340381165806e-05, + "loss": 1.9179, + "step": 13060 + }, + { + "epoch": 0.2, + "learning_rate": 4.509615775579012e-05, + "loss": 1.6109, + "step": 13070 + }, + { + "epoch": 0.2, + "learning_rate": 4.50889069254376e-05, + "loss": 1.6107, + "step": 13080 + }, + { + "epoch": 0.2, + "learning_rate": 4.5081651322323174e-05, + "loss": 1.9068, + "step": 13090 + }, + { + "epoch": 0.2, + "learning_rate": 4.5074390948170635e-05, + "loss": 1.8588, + "step": 13100 + }, + { + "epoch": 0.2, + "learning_rate": 4.506712580470492e-05, + "loss": 1.7813, + "step": 13110 + }, + { + "epoch": 0.2, + "learning_rate": 4.505985589365209e-05, + "loss": 1.7971, + "step": 13120 + }, + { + "epoch": 0.2, + "learning_rate": 4.505258121673936e-05, + "loss": 1.8641, + "step": 13130 + }, + { + "epoch": 0.2, + "learning_rate": 4.504530177569505e-05, + "loss": 1.9082, + "step": 13140 + }, + { + "epoch": 0.2, + "learning_rate": 4.503801757224861e-05, + "loss": 1.9987, + "step": 13150 + }, + { + "epoch": 0.2, + "learning_rate": 4.503072860813067e-05, + "loss": 1.9675, + "step": 13160 + }, + { + "epoch": 0.2, + "learning_rate": 4.502343488507293e-05, + "loss": 1.9649, + "step": 13170 + }, + { + "epoch": 0.2, + "learning_rate": 4.501613640480826e-05, + "loss": 1.9364, + "step": 13180 + }, + { + "epoch": 0.2, + "learning_rate": 4.500883316907064e-05, + "loss": 1.9237, + "step": 13190 + }, + { + "epoch": 0.2, + "learning_rate": 4.500152517959519e-05, + "loss": 1.8864, + "step": 13200 + }, + { + "epoch": 0.2, + "learning_rate": 4.499421243811815e-05, + "loss": 1.9767, + "step": 13210 + }, + { + "epoch": 0.21, + "learning_rate": 4.49868949463769e-05, + "loss": 2.03, + "step": 13220 + }, + { + "epoch": 0.21, + "learning_rate": 4.497957270610995e-05, + "loss": 2.008, + "step": 13230 + }, + { + "epoch": 0.21, + "learning_rate": 4.497224571905693e-05, + "loss": 1.8984, + "step": 13240 + }, + { + "epoch": 0.21, + "learning_rate": 4.49649139869586e-05, + "loss": 1.6864, + "step": 13250 + }, + { + "epoch": 0.21, + "learning_rate": 4.4957577511556836e-05, + "loss": 1.826, + "step": 13260 + }, + { + "epoch": 0.21, + "learning_rate": 4.495023629459466e-05, + "loss": 1.9153, + "step": 13270 + }, + { + "epoch": 0.21, + "learning_rate": 4.494289033781622e-05, + "loss": 1.802, + "step": 13280 + }, + { + "epoch": 0.21, + "learning_rate": 4.4935539642966776e-05, + "loss": 1.8751, + "step": 13290 + }, + { + "epoch": 0.21, + "learning_rate": 4.492818421179271e-05, + "loss": 1.8342, + "step": 13300 + }, + { + "epoch": 0.21, + "learning_rate": 4.492082404604156e-05, + "loss": 1.9485, + "step": 13310 + }, + { + "epoch": 0.21, + "learning_rate": 4.4913459147461955e-05, + "loss": 1.5999, + "step": 13320 + }, + { + "epoch": 0.21, + "learning_rate": 4.490608951780367e-05, + "loss": 1.8489, + "step": 13330 + }, + { + "epoch": 0.21, + "learning_rate": 4.489871515881759e-05, + "loss": 1.8404, + "step": 13340 + }, + { + "epoch": 0.21, + "learning_rate": 4.489133607225573e-05, + "loss": 1.5456, + "step": 13350 + }, + { + "epoch": 0.21, + "learning_rate": 4.4883952259871234e-05, + "loss": 1.8751, + "step": 13360 + }, + { + "epoch": 0.21, + "learning_rate": 4.487656372341836e-05, + "loss": 1.9009, + "step": 13370 + }, + { + "epoch": 0.21, + "learning_rate": 4.4869170464652485e-05, + "loss": 1.8085, + "step": 13380 + }, + { + "epoch": 0.21, + "learning_rate": 4.4861772485330114e-05, + "loss": 1.919, + "step": 13390 + }, + { + "epoch": 0.21, + "learning_rate": 4.485436978720889e-05, + "loss": 1.6947, + "step": 13400 + }, + { + "epoch": 0.21, + "learning_rate": 4.484696237204754e-05, + "loss": 1.7379, + "step": 13410 + }, + { + "epoch": 0.21, + "learning_rate": 4.483955024160594e-05, + "loss": 1.8707, + "step": 13420 + }, + { + "epoch": 0.21, + "learning_rate": 4.483213339764508e-05, + "loss": 1.9474, + "step": 13430 + }, + { + "epoch": 0.21, + "learning_rate": 4.4824711841927075e-05, + "loss": 1.8756, + "step": 13440 + }, + { + "epoch": 0.21, + "learning_rate": 4.481728557621514e-05, + "loss": 1.8177, + "step": 13450 + }, + { + "epoch": 0.21, + "learning_rate": 4.480985460227362e-05, + "loss": 1.8495, + "step": 13460 + }, + { + "epoch": 0.21, + "learning_rate": 4.4802418921867996e-05, + "loss": 1.8704, + "step": 13470 + }, + { + "epoch": 0.21, + "learning_rate": 4.479497853676484e-05, + "loss": 1.8, + "step": 13480 + }, + { + "epoch": 0.21, + "learning_rate": 4.478753344873185e-05, + "loss": 1.7696, + "step": 13490 + }, + { + "epoch": 0.21, + "learning_rate": 4.478008365953786e-05, + "loss": 1.9174, + "step": 13500 + }, + { + "epoch": 0.21, + "learning_rate": 4.4772629170952795e-05, + "loss": 1.9657, + "step": 13510 + }, + { + "epoch": 0.21, + "learning_rate": 4.476516998474771e-05, + "loss": 1.7958, + "step": 13520 + }, + { + "epoch": 0.21, + "learning_rate": 4.4757706102694766e-05, + "loss": 2.0145, + "step": 13530 + }, + { + "epoch": 0.21, + "learning_rate": 4.475023752656725e-05, + "loss": 1.7189, + "step": 13540 + }, + { + "epoch": 0.21, + "learning_rate": 4.474276425813957e-05, + "loss": 1.6287, + "step": 13550 + }, + { + "epoch": 0.21, + "learning_rate": 4.473528629918723e-05, + "loss": 1.8224, + "step": 13560 + }, + { + "epoch": 0.21, + "learning_rate": 4.472780365148686e-05, + "loss": 1.6843, + "step": 13570 + }, + { + "epoch": 0.21, + "learning_rate": 4.4720316316816204e-05, + "loss": 1.782, + "step": 13580 + }, + { + "epoch": 0.21, + "learning_rate": 4.4712824296954114e-05, + "loss": 1.801, + "step": 13590 + }, + { + "epoch": 0.21, + "learning_rate": 4.470532759368056e-05, + "loss": 1.8339, + "step": 13600 + }, + { + "epoch": 0.21, + "learning_rate": 4.469782620877663e-05, + "loss": 1.7849, + "step": 13610 + }, + { + "epoch": 0.21, + "learning_rate": 4.469032014402451e-05, + "loss": 1.9943, + "step": 13620 + }, + { + "epoch": 0.21, + "learning_rate": 4.468280940120751e-05, + "loss": 2.0836, + "step": 13630 + }, + { + "epoch": 0.21, + "learning_rate": 4.467529398211005e-05, + "loss": 2.1812, + "step": 13640 + }, + { + "epoch": 0.21, + "learning_rate": 4.4667773888517645e-05, + "loss": 1.9783, + "step": 13650 + }, + { + "epoch": 0.21, + "learning_rate": 4.4660249122216944e-05, + "loss": 1.9577, + "step": 13660 + }, + { + "epoch": 0.21, + "learning_rate": 4.46527196849957e-05, + "loss": 1.7278, + "step": 13670 + }, + { + "epoch": 0.21, + "learning_rate": 4.4645185578642755e-05, + "loss": 1.6278, + "step": 13680 + }, + { + "epoch": 0.21, + "learning_rate": 4.4637646804948095e-05, + "loss": 1.866, + "step": 13690 + }, + { + "epoch": 0.21, + "learning_rate": 4.463010336570278e-05, + "loss": 1.8716, + "step": 13700 + }, + { + "epoch": 0.21, + "learning_rate": 4.4622555262699014e-05, + "loss": 1.8191, + "step": 13710 + }, + { + "epoch": 0.21, + "learning_rate": 4.4615002497730076e-05, + "loss": 1.9734, + "step": 13720 + }, + { + "epoch": 0.21, + "learning_rate": 4.460744507259037e-05, + "loss": 1.7782, + "step": 13730 + }, + { + "epoch": 0.21, + "learning_rate": 4.45998829890754e-05, + "loss": 1.8092, + "step": 13740 + }, + { + "epoch": 0.21, + "learning_rate": 4.459231624898179e-05, + "loss": 1.6374, + "step": 13750 + }, + { + "epoch": 0.21, + "learning_rate": 4.458474485410725e-05, + "loss": 1.4119, + "step": 13760 + }, + { + "epoch": 0.21, + "learning_rate": 4.4577168806250625e-05, + "loss": 1.8767, + "step": 13770 + }, + { + "epoch": 0.21, + "learning_rate": 4.456958810721182e-05, + "loss": 1.8548, + "step": 13780 + }, + { + "epoch": 0.21, + "learning_rate": 4.45620027587919e-05, + "loss": 1.7237, + "step": 13790 + }, + { + "epoch": 0.21, + "learning_rate": 4.455441276279299e-05, + "loss": 1.7079, + "step": 13800 + }, + { + "epoch": 0.21, + "learning_rate": 4.454681812101834e-05, + "loss": 1.7156, + "step": 13810 + }, + { + "epoch": 0.21, + "learning_rate": 4.45392188352723e-05, + "loss": 1.6823, + "step": 13820 + }, + { + "epoch": 0.21, + "learning_rate": 4.4531614907360325e-05, + "loss": 1.9576, + "step": 13830 + }, + { + "epoch": 0.21, + "learning_rate": 4.452400633908897e-05, + "loss": 2.1093, + "step": 13840 + }, + { + "epoch": 0.21, + "learning_rate": 4.451639313226589e-05, + "loss": 2.0721, + "step": 13850 + }, + { + "epoch": 0.22, + "learning_rate": 4.450877528869985e-05, + "loss": 2.033, + "step": 13860 + }, + { + "epoch": 0.22, + "learning_rate": 4.450115281020071e-05, + "loss": 1.8167, + "step": 13870 + }, + { + "epoch": 0.22, + "learning_rate": 4.4493525698579444e-05, + "loss": 2.1634, + "step": 13880 + }, + { + "epoch": 0.22, + "learning_rate": 4.448589395564809e-05, + "loss": 2.4067, + "step": 13890 + }, + { + "epoch": 0.22, + "learning_rate": 4.4478257583219846e-05, + "loss": 2.0891, + "step": 13900 + }, + { + "epoch": 0.22, + "learning_rate": 4.447061658310895e-05, + "loss": 1.6136, + "step": 13910 + }, + { + "epoch": 0.22, + "learning_rate": 4.446297095713078e-05, + "loss": 1.5512, + "step": 13920 + }, + { + "epoch": 0.22, + "learning_rate": 4.4455320707101785e-05, + "loss": 2.0977, + "step": 13930 + }, + { + "epoch": 0.22, + "learning_rate": 4.4447665834839544e-05, + "loss": 1.8516, + "step": 13940 + }, + { + "epoch": 0.22, + "learning_rate": 4.444000634216271e-05, + "loss": 1.776, + "step": 13950 + }, + { + "epoch": 0.22, + "learning_rate": 4.443234223089103e-05, + "loss": 1.6728, + "step": 13960 + }, + { + "epoch": 0.22, + "learning_rate": 4.442467350284537e-05, + "loss": 1.68, + "step": 13970 + }, + { + "epoch": 0.22, + "learning_rate": 4.441700015984768e-05, + "loss": 1.9841, + "step": 13980 + }, + { + "epoch": 0.22, + "learning_rate": 4.440932220372101e-05, + "loss": 2.0927, + "step": 13990 + }, + { + "epoch": 0.22, + "learning_rate": 4.4401639636289495e-05, + "loss": 1.8751, + "step": 14000 + }, + { + "epoch": 0.22, + "learning_rate": 4.439395245937838e-05, + "loss": 1.8214, + "step": 14010 + }, + { + "epoch": 0.22, + "learning_rate": 4.4386260674813994e-05, + "loss": 1.7763, + "step": 14020 + }, + { + "epoch": 0.22, + "learning_rate": 4.437856428442378e-05, + "loss": 1.792, + "step": 14030 + }, + { + "epoch": 0.22, + "learning_rate": 4.437086329003625e-05, + "loss": 1.7321, + "step": 14040 + }, + { + "epoch": 0.22, + "learning_rate": 4.436315769348103e-05, + "loss": 1.8559, + "step": 14050 + }, + { + "epoch": 0.22, + "learning_rate": 4.4355447496588814e-05, + "loss": 2.0059, + "step": 14060 + }, + { + "epoch": 0.22, + "learning_rate": 4.434773270119143e-05, + "loss": 2.0193, + "step": 14070 + }, + { + "epoch": 0.22, + "learning_rate": 4.434001330912174e-05, + "loss": 2.0029, + "step": 14080 + }, + { + "epoch": 0.22, + "learning_rate": 4.433228932221377e-05, + "loss": 1.9328, + "step": 14090 + }, + { + "epoch": 0.22, + "learning_rate": 4.432456074230258e-05, + "loss": 1.6896, + "step": 14100 + }, + { + "epoch": 0.22, + "learning_rate": 4.431682757122434e-05, + "loss": 1.7283, + "step": 14110 + }, + { + "epoch": 0.22, + "learning_rate": 4.430908981081633e-05, + "loss": 1.9206, + "step": 14120 + }, + { + "epoch": 0.22, + "learning_rate": 4.430134746291687e-05, + "loss": 1.8314, + "step": 14130 + }, + { + "epoch": 0.22, + "learning_rate": 4.429360052936543e-05, + "loss": 1.8634, + "step": 14140 + }, + { + "epoch": 0.22, + "learning_rate": 4.4285849012002534e-05, + "loss": 1.794, + "step": 14150 + }, + { + "epoch": 0.22, + "learning_rate": 4.42780929126698e-05, + "loss": 1.8407, + "step": 14160 + }, + { + "epoch": 0.22, + "learning_rate": 4.427033223320994e-05, + "loss": 1.8993, + "step": 14170 + }, + { + "epoch": 0.22, + "learning_rate": 4.426256697546674e-05, + "loss": 1.8699, + "step": 14180 + }, + { + "epoch": 0.22, + "learning_rate": 4.425479714128511e-05, + "loss": 1.792, + "step": 14190 + }, + { + "epoch": 0.22, + "learning_rate": 4.4247022732511e-05, + "loss": 1.747, + "step": 14200 + }, + { + "epoch": 0.22, + "learning_rate": 4.423924375099148e-05, + "loss": 1.7641, + "step": 14210 + }, + { + "epoch": 0.22, + "learning_rate": 4.423146019857469e-05, + "loss": 1.535, + "step": 14220 + }, + { + "epoch": 0.22, + "learning_rate": 4.4223672077109866e-05, + "loss": 1.6136, + "step": 14230 + }, + { + "epoch": 0.22, + "learning_rate": 4.4215879388447325e-05, + "loss": 1.5902, + "step": 14240 + }, + { + "epoch": 0.22, + "learning_rate": 4.420808213443845e-05, + "loss": 1.8859, + "step": 14250 + }, + { + "epoch": 0.22, + "learning_rate": 4.4200280316935764e-05, + "loss": 1.7579, + "step": 14260 + }, + { + "epoch": 0.22, + "learning_rate": 4.419247393779282e-05, + "loss": 1.704, + "step": 14270 + }, + { + "epoch": 0.22, + "learning_rate": 4.418466299886427e-05, + "loss": 1.766, + "step": 14280 + }, + { + "epoch": 0.22, + "learning_rate": 4.417684750200585e-05, + "loss": 1.8919, + "step": 14290 + }, + { + "epoch": 0.22, + "learning_rate": 4.416902744907438e-05, + "loss": 1.8188, + "step": 14300 + }, + { + "epoch": 0.22, + "learning_rate": 4.4161202841927784e-05, + "loss": 1.7794, + "step": 14310 + }, + { + "epoch": 0.22, + "learning_rate": 4.4153373682425026e-05, + "loss": 1.7279, + "step": 14320 + }, + { + "epoch": 0.22, + "learning_rate": 4.4145539972426186e-05, + "loss": 1.6114, + "step": 14330 + }, + { + "epoch": 0.22, + "learning_rate": 4.413770171379239e-05, + "loss": 1.8225, + "step": 14340 + }, + { + "epoch": 0.22, + "learning_rate": 4.41298589083859e-05, + "loss": 1.6789, + "step": 14350 + }, + { + "epoch": 0.22, + "learning_rate": 4.412201155807e-05, + "loss": 1.5272, + "step": 14360 + }, + { + "epoch": 0.22, + "learning_rate": 4.4114159664709085e-05, + "loss": 1.8824, + "step": 14370 + }, + { + "epoch": 0.22, + "learning_rate": 4.410630323016863e-05, + "loss": 1.9019, + "step": 14380 + }, + { + "epoch": 0.22, + "learning_rate": 4.409844225631518e-05, + "loss": 2.1993, + "step": 14390 + }, + { + "epoch": 0.22, + "learning_rate": 4.4090576745016355e-05, + "loss": 1.8192, + "step": 14400 + }, + { + "epoch": 0.22, + "learning_rate": 4.408270669814086e-05, + "loss": 1.6379, + "step": 14410 + }, + { + "epoch": 0.22, + "learning_rate": 4.407483211755847e-05, + "loss": 1.6886, + "step": 14420 + }, + { + "epoch": 0.22, + "learning_rate": 4.406695300514007e-05, + "loss": 1.7652, + "step": 14430 + }, + { + "epoch": 0.22, + "learning_rate": 4.4059069362757555e-05, + "loss": 1.8505, + "step": 14440 + }, + { + "epoch": 0.22, + "learning_rate": 4.405118119228397e-05, + "loss": 1.7787, + "step": 14450 + }, + { + "epoch": 0.22, + "learning_rate": 4.404328849559338e-05, + "loss": 1.8214, + "step": 14460 + }, + { + "epoch": 0.22, + "learning_rate": 4.403539127456096e-05, + "loss": 1.9068, + "step": 14470 + }, + { + "epoch": 0.22, + "learning_rate": 4.4027489531062954e-05, + "loss": 1.6333, + "step": 14480 + }, + { + "epoch": 0.22, + "learning_rate": 4.4019583266976655e-05, + "loss": 1.6862, + "step": 14490 + }, + { + "epoch": 0.22, + "learning_rate": 4.401167248418045e-05, + "loss": 1.8079, + "step": 14500 + }, + { + "epoch": 0.23, + "learning_rate": 4.400375718455382e-05, + "loss": 1.9234, + "step": 14510 + }, + { + "epoch": 0.23, + "learning_rate": 4.399583736997728e-05, + "loss": 1.972, + "step": 14520 + }, + { + "epoch": 0.23, + "learning_rate": 4.398791304233243e-05, + "loss": 1.9224, + "step": 14530 + }, + { + "epoch": 0.23, + "learning_rate": 4.3979984203501955e-05, + "loss": 1.7961, + "step": 14540 + }, + { + "epoch": 0.23, + "learning_rate": 4.397205085536961e-05, + "loss": 1.7981, + "step": 14550 + }, + { + "epoch": 0.23, + "learning_rate": 4.396411299982021e-05, + "loss": 1.728, + "step": 14560 + }, + { + "epoch": 0.23, + "learning_rate": 4.395617063873965e-05, + "loss": 2.0786, + "step": 14570 + }, + { + "epoch": 0.23, + "learning_rate": 4.3948223774014875e-05, + "loss": 2.108, + "step": 14580 + }, + { + "epoch": 0.23, + "learning_rate": 4.394027240753393e-05, + "loss": 2.1114, + "step": 14590 + }, + { + "epoch": 0.23, + "learning_rate": 4.393231654118593e-05, + "loss": 1.9395, + "step": 14600 + }, + { + "epoch": 0.23, + "learning_rate": 4.3924356176861024e-05, + "loss": 1.758, + "step": 14610 + }, + { + "epoch": 0.23, + "learning_rate": 4.391639131645045e-05, + "loss": 1.8279, + "step": 14620 + }, + { + "epoch": 0.23, + "learning_rate": 4.3908421961846534e-05, + "loss": 1.8606, + "step": 14630 + }, + { + "epoch": 0.23, + "learning_rate": 4.390044811494264e-05, + "loss": 1.8434, + "step": 14640 + }, + { + "epoch": 0.23, + "learning_rate": 4.38924697776332e-05, + "loss": 1.7888, + "step": 14650 + }, + { + "epoch": 0.23, + "learning_rate": 4.3884486951813747e-05, + "loss": 1.7293, + "step": 14660 + }, + { + "epoch": 0.23, + "learning_rate": 4.387649963938084e-05, + "loss": 1.7429, + "step": 14670 + }, + { + "epoch": 0.23, + "learning_rate": 4.3868507842232115e-05, + "loss": 1.8577, + "step": 14680 + }, + { + "epoch": 0.23, + "learning_rate": 4.386051156226629e-05, + "loss": 1.9004, + "step": 14690 + }, + { + "epoch": 0.23, + "learning_rate": 4.385251080138314e-05, + "loss": 1.9588, + "step": 14700 + }, + { + "epoch": 0.23, + "learning_rate": 4.384450556148349e-05, + "loss": 1.8877, + "step": 14710 + }, + { + "epoch": 0.23, + "learning_rate": 4.383649584446924e-05, + "loss": 2.0125, + "step": 14720 + }, + { + "epoch": 0.23, + "learning_rate": 4.382848165224337e-05, + "loss": 1.7729, + "step": 14730 + }, + { + "epoch": 0.23, + "learning_rate": 4.3820462986709894e-05, + "loss": 1.7276, + "step": 14740 + }, + { + "epoch": 0.23, + "learning_rate": 4.38124398497739e-05, + "loss": 1.7206, + "step": 14750 + }, + { + "epoch": 0.23, + "learning_rate": 4.380441224334156e-05, + "loss": 1.7593, + "step": 14760 + }, + { + "epoch": 0.23, + "learning_rate": 4.379638016932006e-05, + "loss": 1.8081, + "step": 14770 + }, + { + "epoch": 0.23, + "learning_rate": 4.37883436296177e-05, + "loss": 1.7556, + "step": 14780 + }, + { + "epoch": 0.23, + "learning_rate": 4.378030262614381e-05, + "loss": 1.9479, + "step": 14790 + }, + { + "epoch": 0.23, + "learning_rate": 4.377225716080878e-05, + "loss": 1.7268, + "step": 14800 + }, + { + "epoch": 0.23, + "learning_rate": 4.376420723552407e-05, + "loss": 1.792, + "step": 14810 + }, + { + "epoch": 0.23, + "learning_rate": 4.37561528522022e-05, + "loss": 1.777, + "step": 14820 + }, + { + "epoch": 0.23, + "learning_rate": 4.3748094012756756e-05, + "loss": 1.9572, + "step": 14830 + }, + { + "epoch": 0.23, + "learning_rate": 4.3740030719102356e-05, + "loss": 1.9225, + "step": 14840 + }, + { + "epoch": 0.23, + "learning_rate": 4.3731962973154706e-05, + "loss": 1.8801, + "step": 14850 + }, + { + "epoch": 0.23, + "learning_rate": 4.372389077683055e-05, + "loss": 1.8461, + "step": 14860 + }, + { + "epoch": 0.23, + "learning_rate": 4.3715814132047684e-05, + "loss": 1.7351, + "step": 14870 + }, + { + "epoch": 0.23, + "learning_rate": 4.3707733040725e-05, + "loss": 1.6492, + "step": 14880 + }, + { + "epoch": 0.23, + "learning_rate": 4.369964750478239e-05, + "loss": 1.6887, + "step": 14890 + }, + { + "epoch": 0.23, + "learning_rate": 4.369155752614086e-05, + "loss": 1.8136, + "step": 14900 + }, + { + "epoch": 0.23, + "learning_rate": 4.368346310672242e-05, + "loss": 1.7833, + "step": 14910 + }, + { + "epoch": 0.23, + "learning_rate": 4.367536424845018e-05, + "loss": 1.7457, + "step": 14920 + }, + { + "epoch": 0.23, + "learning_rate": 4.366726095324827e-05, + "loss": 1.8059, + "step": 14930 + }, + { + "epoch": 0.23, + "learning_rate": 4.365915322304188e-05, + "loss": 1.7849, + "step": 14940 + }, + { + "epoch": 0.23, + "learning_rate": 4.365104105975727e-05, + "loss": 1.827, + "step": 14950 + }, + { + "epoch": 0.23, + "learning_rate": 4.3642924465321745e-05, + "loss": 1.8097, + "step": 14960 + }, + { + "epoch": 0.23, + "learning_rate": 4.363480344166365e-05, + "loss": 1.7577, + "step": 14970 + }, + { + "epoch": 0.23, + "learning_rate": 4.362667799071241e-05, + "loss": 1.7518, + "step": 14980 + }, + { + "epoch": 0.23, + "learning_rate": 4.3618548114398475e-05, + "loss": 1.8154, + "step": 14990 + }, + { + "epoch": 0.23, + "learning_rate": 4.361041381465336e-05, + "loss": 1.7627, + "step": 15000 + }, + { + "epoch": 0.23, + "eval_loss": 1.7180181741714478, + "eval_runtime": 82.2878, + "eval_samples_per_second": 36.457, + "eval_steps_per_second": 4.557, + "step": 15000 + }, + { + "epoch": 0.23, + "learning_rate": 4.360227509340963e-05, + "loss": 1.7868, + "step": 15010 + }, + { + "epoch": 0.23, + "learning_rate": 4.3594131952600894e-05, + "loss": 1.7538, + "step": 15020 + }, + { + "epoch": 0.23, + "learning_rate": 4.358598439416182e-05, + "loss": 1.8729, + "step": 15030 + }, + { + "epoch": 0.23, + "learning_rate": 4.357783242002811e-05, + "loss": 1.7501, + "step": 15040 + }, + { + "epoch": 0.23, + "learning_rate": 4.356967603213654e-05, + "loss": 1.6776, + "step": 15050 + }, + { + "epoch": 0.23, + "learning_rate": 4.356151523242491e-05, + "loss": 2.0497, + "step": 15060 + }, + { + "epoch": 0.23, + "learning_rate": 4.3553350022832083e-05, + "loss": 1.9301, + "step": 15070 + }, + { + "epoch": 0.23, + "learning_rate": 4.354518040529797e-05, + "loss": 1.8151, + "step": 15080 + }, + { + "epoch": 0.23, + "learning_rate": 4.353700638176351e-05, + "loss": 2.0247, + "step": 15090 + }, + { + "epoch": 0.23, + "learning_rate": 4.3528827954170726e-05, + "loss": 1.8791, + "step": 15100 + }, + { + "epoch": 0.23, + "learning_rate": 4.3520645124462656e-05, + "loss": 1.7957, + "step": 15110 + }, + { + "epoch": 0.23, + "learning_rate": 4.351245789458338e-05, + "loss": 1.812, + "step": 15120 + }, + { + "epoch": 0.23, + "learning_rate": 4.350426626647805e-05, + "loss": 1.9326, + "step": 15130 + }, + { + "epoch": 0.23, + "learning_rate": 4.349607024209285e-05, + "loss": 2.0055, + "step": 15140 + }, + { + "epoch": 0.24, + "learning_rate": 4.3487869823374995e-05, + "loss": 2.093, + "step": 15150 + }, + { + "epoch": 0.24, + "learning_rate": 4.347966501227276e-05, + "loss": 2.1203, + "step": 15160 + }, + { + "epoch": 0.24, + "learning_rate": 4.3471455810735474e-05, + "loss": 1.9363, + "step": 15170 + }, + { + "epoch": 0.24, + "learning_rate": 4.346324222071348e-05, + "loss": 1.9597, + "step": 15180 + }, + { + "epoch": 0.24, + "learning_rate": 4.3455024244158184e-05, + "loss": 2.0135, + "step": 15190 + }, + { + "epoch": 0.24, + "learning_rate": 4.344680188302203e-05, + "loss": 2.1012, + "step": 15200 + }, + { + "epoch": 0.24, + "learning_rate": 4.343857513925851e-05, + "loss": 1.9789, + "step": 15210 + }, + { + "epoch": 0.24, + "learning_rate": 4.3430344014822125e-05, + "loss": 1.7305, + "step": 15220 + }, + { + "epoch": 0.24, + "learning_rate": 4.342210851166847e-05, + "loss": 1.7928, + "step": 15230 + }, + { + "epoch": 0.24, + "learning_rate": 4.341386863175414e-05, + "loss": 1.7754, + "step": 15240 + }, + { + "epoch": 0.24, + "learning_rate": 4.340562437703678e-05, + "loss": 1.9918, + "step": 15250 + }, + { + "epoch": 0.24, + "learning_rate": 4.339737574947508e-05, + "loss": 1.8024, + "step": 15260 + }, + { + "epoch": 0.24, + "learning_rate": 4.3389122751028764e-05, + "loss": 1.9443, + "step": 15270 + }, + { + "epoch": 0.24, + "learning_rate": 4.3380865383658594e-05, + "loss": 1.8665, + "step": 15280 + }, + { + "epoch": 0.24, + "learning_rate": 4.337260364932637e-05, + "loss": 1.9303, + "step": 15290 + }, + { + "epoch": 0.24, + "learning_rate": 4.336433754999494e-05, + "loss": 1.8062, + "step": 15300 + }, + { + "epoch": 0.24, + "learning_rate": 4.3356067087628174e-05, + "loss": 1.7333, + "step": 15310 + }, + { + "epoch": 0.24, + "learning_rate": 4.3347792264190976e-05, + "loss": 1.5874, + "step": 15320 + }, + { + "epoch": 0.24, + "learning_rate": 4.333951308164931e-05, + "loss": 1.5032, + "step": 15330 + }, + { + "epoch": 0.24, + "learning_rate": 4.333122954197015e-05, + "loss": 1.7122, + "step": 15340 + }, + { + "epoch": 0.24, + "learning_rate": 4.332294164712152e-05, + "loss": 1.6085, + "step": 15350 + }, + { + "epoch": 0.24, + "learning_rate": 4.331464939907248e-05, + "loss": 1.7119, + "step": 15360 + }, + { + "epoch": 0.24, + "learning_rate": 4.3306352799793105e-05, + "loss": 1.7717, + "step": 15370 + }, + { + "epoch": 0.24, + "learning_rate": 4.329805185125453e-05, + "loss": 1.8143, + "step": 15380 + }, + { + "epoch": 0.24, + "learning_rate": 4.32897465554289e-05, + "loss": 1.7925, + "step": 15390 + }, + { + "epoch": 0.24, + "learning_rate": 4.328143691428941e-05, + "loss": 1.7802, + "step": 15400 + }, + { + "epoch": 0.24, + "learning_rate": 4.327312292981028e-05, + "loss": 1.6896, + "step": 15410 + }, + { + "epoch": 0.24, + "learning_rate": 4.326480460396677e-05, + "loss": 1.6639, + "step": 15420 + }, + { + "epoch": 0.24, + "learning_rate": 4.325648193873514e-05, + "loss": 1.6593, + "step": 15430 + }, + { + "epoch": 0.24, + "learning_rate": 4.3248154936092734e-05, + "loss": 1.7572, + "step": 15440 + }, + { + "epoch": 0.24, + "learning_rate": 4.323982359801788e-05, + "loss": 1.911, + "step": 15450 + }, + { + "epoch": 0.24, + "learning_rate": 4.323148792648996e-05, + "loss": 2.201, + "step": 15460 + }, + { + "epoch": 0.24, + "learning_rate": 4.3223147923489385e-05, + "loss": 1.9133, + "step": 15470 + }, + { + "epoch": 0.24, + "learning_rate": 4.3214803590997574e-05, + "loss": 1.4673, + "step": 15480 + }, + { + "epoch": 0.24, + "learning_rate": 4.3206454930997e-05, + "loss": 1.5952, + "step": 15490 + }, + { + "epoch": 0.24, + "learning_rate": 4.319810194547115e-05, + "loss": 1.8063, + "step": 15500 + }, + { + "epoch": 0.24, + "learning_rate": 4.318974463640455e-05, + "loss": 1.7932, + "step": 15510 + }, + { + "epoch": 0.24, + "learning_rate": 4.318138300578274e-05, + "loss": 1.7659, + "step": 15520 + }, + { + "epoch": 0.24, + "learning_rate": 4.317301705559229e-05, + "loss": 1.6929, + "step": 15530 + }, + { + "epoch": 0.24, + "learning_rate": 4.31646467878208e-05, + "loss": 1.6017, + "step": 15540 + }, + { + "epoch": 0.24, + "learning_rate": 4.31562722044569e-05, + "loss": 1.7831, + "step": 15550 + }, + { + "epoch": 0.24, + "learning_rate": 4.314789330749024e-05, + "loss": 1.6174, + "step": 15560 + }, + { + "epoch": 0.24, + "learning_rate": 4.313951009891148e-05, + "loss": 1.4159, + "step": 15570 + }, + { + "epoch": 0.24, + "learning_rate": 4.313112258071235e-05, + "loss": 1.4068, + "step": 15580 + }, + { + "epoch": 0.24, + "learning_rate": 4.312273075488554e-05, + "loss": 1.6638, + "step": 15590 + }, + { + "epoch": 0.24, + "learning_rate": 4.3114334623424816e-05, + "loss": 1.8426, + "step": 15600 + }, + { + "epoch": 0.24, + "learning_rate": 4.310593418832494e-05, + "loss": 1.7097, + "step": 15610 + }, + { + "epoch": 0.24, + "learning_rate": 4.3097529451581695e-05, + "loss": 1.8638, + "step": 15620 + }, + { + "epoch": 0.24, + "learning_rate": 4.308912041519192e-05, + "loss": 2.0008, + "step": 15630 + }, + { + "epoch": 0.24, + "learning_rate": 4.308070708115343e-05, + "loss": 1.9124, + "step": 15640 + }, + { + "epoch": 0.24, + "learning_rate": 4.3072289451465086e-05, + "loss": 1.8525, + "step": 15650 + }, + { + "epoch": 0.24, + "learning_rate": 4.3063867528126766e-05, + "loss": 1.9728, + "step": 15660 + }, + { + "epoch": 0.24, + "learning_rate": 4.3055441313139366e-05, + "loss": 1.8224, + "step": 15670 + }, + { + "epoch": 0.24, + "learning_rate": 4.304701080850481e-05, + "loss": 1.8074, + "step": 15680 + }, + { + "epoch": 0.24, + "learning_rate": 4.303857601622602e-05, + "loss": 1.8022, + "step": 15690 + }, + { + "epoch": 0.24, + "learning_rate": 4.3030136938306966e-05, + "loss": 1.7989, + "step": 15700 + }, + { + "epoch": 0.24, + "learning_rate": 4.302169357675261e-05, + "loss": 1.7999, + "step": 15710 + }, + { + "epoch": 0.24, + "learning_rate": 4.3013245933568937e-05, + "loss": 1.8495, + "step": 15720 + }, + { + "epoch": 0.24, + "learning_rate": 4.300479401076297e-05, + "loss": 1.676, + "step": 15730 + }, + { + "epoch": 0.24, + "learning_rate": 4.299633781034272e-05, + "loss": 1.8275, + "step": 15740 + }, + { + "epoch": 0.24, + "learning_rate": 4.298787733431724e-05, + "loss": 1.852, + "step": 15750 + }, + { + "epoch": 0.24, + "learning_rate": 4.297941258469659e-05, + "loss": 1.7432, + "step": 15760 + }, + { + "epoch": 0.24, + "learning_rate": 4.2970943563491816e-05, + "loss": 1.6872, + "step": 15770 + }, + { + "epoch": 0.24, + "learning_rate": 4.296247027271503e-05, + "loss": 1.6908, + "step": 15780 + }, + { + "epoch": 0.24, + "learning_rate": 4.2953992714379314e-05, + "loss": 1.9134, + "step": 15790 + }, + { + "epoch": 0.25, + "learning_rate": 4.29455108904988e-05, + "loss": 1.784, + "step": 15800 + }, + { + "epoch": 0.25, + "learning_rate": 4.293702480308861e-05, + "loss": 1.8022, + "step": 15810 + }, + { + "epoch": 0.25, + "learning_rate": 4.2928534454164884e-05, + "loss": 1.8676, + "step": 15820 + }, + { + "epoch": 0.25, + "learning_rate": 4.2920039845744775e-05, + "loss": 1.7982, + "step": 15830 + }, + { + "epoch": 0.25, + "learning_rate": 4.291154097984644e-05, + "loss": 1.7909, + "step": 15840 + }, + { + "epoch": 0.25, + "learning_rate": 4.290303785848908e-05, + "loss": 1.6917, + "step": 15850 + }, + { + "epoch": 0.25, + "learning_rate": 4.289453048369287e-05, + "loss": 1.817, + "step": 15860 + }, + { + "epoch": 0.25, + "learning_rate": 4.2886018857479e-05, + "loss": 1.845, + "step": 15870 + }, + { + "epoch": 0.25, + "learning_rate": 4.2877502981869686e-05, + "loss": 1.7028, + "step": 15880 + }, + { + "epoch": 0.25, + "learning_rate": 4.2868982858888166e-05, + "loss": 1.8574, + "step": 15890 + }, + { + "epoch": 0.25, + "learning_rate": 4.286045849055863e-05, + "loss": 1.605, + "step": 15900 + }, + { + "epoch": 0.25, + "learning_rate": 4.285192987890634e-05, + "loss": 1.6146, + "step": 15910 + }, + { + "epoch": 0.25, + "learning_rate": 4.2843397025957523e-05, + "loss": 1.5675, + "step": 15920 + }, + { + "epoch": 0.25, + "learning_rate": 4.2834859933739455e-05, + "loss": 1.5633, + "step": 15930 + }, + { + "epoch": 0.25, + "learning_rate": 4.2826318604280375e-05, + "loss": 1.4985, + "step": 15940 + }, + { + "epoch": 0.25, + "learning_rate": 4.281777303960955e-05, + "loss": 1.4398, + "step": 15950 + }, + { + "epoch": 0.25, + "learning_rate": 4.280922324175727e-05, + "loss": 1.453, + "step": 15960 + }, + { + "epoch": 0.25, + "learning_rate": 4.2800669212754795e-05, + "loss": 1.6388, + "step": 15970 + }, + { + "epoch": 0.25, + "learning_rate": 4.279211095463441e-05, + "loss": 1.8499, + "step": 15980 + }, + { + "epoch": 0.25, + "learning_rate": 4.278354846942941e-05, + "loss": 1.8666, + "step": 15990 + }, + { + "epoch": 0.25, + "learning_rate": 4.277498175917408e-05, + "loss": 1.7608, + "step": 16000 + }, + { + "epoch": 0.25, + "learning_rate": 4.2766410825903716e-05, + "loss": 1.6489, + "step": 16010 + }, + { + "epoch": 0.25, + "learning_rate": 4.275783567165462e-05, + "loss": 1.9551, + "step": 16020 + }, + { + "epoch": 0.25, + "learning_rate": 4.27492562984641e-05, + "loss": 1.8426, + "step": 16030 + }, + { + "epoch": 0.25, + "learning_rate": 4.2740672708370454e-05, + "loss": 2.0718, + "step": 16040 + }, + { + "epoch": 0.25, + "learning_rate": 4.2732084903412985e-05, + "loss": 1.847, + "step": 16050 + }, + { + "epoch": 0.25, + "learning_rate": 4.272349288563201e-05, + "loss": 1.8333, + "step": 16060 + }, + { + "epoch": 0.25, + "learning_rate": 4.271489665706882e-05, + "loss": 1.7727, + "step": 16070 + }, + { + "epoch": 0.25, + "learning_rate": 4.270629621976574e-05, + "loss": 1.7242, + "step": 16080 + }, + { + "epoch": 0.25, + "learning_rate": 4.269769157576608e-05, + "loss": 1.5498, + "step": 16090 + }, + { + "epoch": 0.25, + "learning_rate": 4.268908272711413e-05, + "loss": 2.0275, + "step": 16100 + }, + { + "epoch": 0.25, + "learning_rate": 4.268046967585522e-05, + "loss": 1.8603, + "step": 16110 + }, + { + "epoch": 0.25, + "learning_rate": 4.267185242403564e-05, + "loss": 1.721, + "step": 16120 + }, + { + "epoch": 0.25, + "learning_rate": 4.26632309737027e-05, + "loss": 1.7883, + "step": 16130 + }, + { + "epoch": 0.25, + "learning_rate": 4.265460532690469e-05, + "loss": 1.7635, + "step": 16140 + }, + { + "epoch": 0.25, + "learning_rate": 4.264597548569093e-05, + "loss": 1.8881, + "step": 16150 + }, + { + "epoch": 0.25, + "learning_rate": 4.26373414521117e-05, + "loss": 1.9935, + "step": 16160 + }, + { + "epoch": 0.25, + "learning_rate": 4.262870322821829e-05, + "loss": 1.7706, + "step": 16170 + }, + { + "epoch": 0.25, + "learning_rate": 4.262006081606299e-05, + "loss": 1.7359, + "step": 16180 + }, + { + "epoch": 0.25, + "learning_rate": 4.261141421769908e-05, + "loss": 1.7273, + "step": 16190 + }, + { + "epoch": 0.25, + "learning_rate": 4.260276343518084e-05, + "loss": 1.5001, + "step": 16200 + }, + { + "epoch": 0.25, + "learning_rate": 4.2594108470563524e-05, + "loss": 1.8077, + "step": 16210 + }, + { + "epoch": 0.25, + "learning_rate": 4.2585449325903416e-05, + "loss": 1.7671, + "step": 16220 + }, + { + "epoch": 0.25, + "learning_rate": 4.257678600325777e-05, + "loss": 1.566, + "step": 16230 + }, + { + "epoch": 0.25, + "learning_rate": 4.2568118504684814e-05, + "loss": 1.9281, + "step": 16240 + }, + { + "epoch": 0.25, + "learning_rate": 4.255944683224382e-05, + "loss": 1.9293, + "step": 16250 + }, + { + "epoch": 0.25, + "learning_rate": 4.2550770987994995e-05, + "loss": 1.6836, + "step": 16260 + }, + { + "epoch": 0.25, + "learning_rate": 4.2542090973999574e-05, + "loss": 1.6259, + "step": 16270 + }, + { + "epoch": 0.25, + "learning_rate": 4.2533406792319774e-05, + "loss": 1.5957, + "step": 16280 + }, + { + "epoch": 0.25, + "learning_rate": 4.25247184450188e-05, + "loss": 1.5949, + "step": 16290 + }, + { + "epoch": 0.25, + "learning_rate": 4.251602593416084e-05, + "loss": 1.683, + "step": 16300 + }, + { + "epoch": 0.25, + "learning_rate": 4.250732926181108e-05, + "loss": 1.9353, + "step": 16310 + }, + { + "epoch": 0.25, + "learning_rate": 4.249862843003569e-05, + "loss": 1.9002, + "step": 16320 + }, + { + "epoch": 0.25, + "learning_rate": 4.248992344090185e-05, + "loss": 1.8219, + "step": 16330 + }, + { + "epoch": 0.25, + "learning_rate": 4.248121429647768e-05, + "loss": 1.7036, + "step": 16340 + }, + { + "epoch": 0.25, + "learning_rate": 4.2472500998832334e-05, + "loss": 1.6086, + "step": 16350 + }, + { + "epoch": 0.25, + "learning_rate": 4.246378355003593e-05, + "loss": 1.693, + "step": 16360 + }, + { + "epoch": 0.25, + "learning_rate": 4.245506195215957e-05, + "loss": 1.7098, + "step": 16370 + }, + { + "epoch": 0.25, + "learning_rate": 4.244633620727535e-05, + "loss": 1.8342, + "step": 16380 + }, + { + "epoch": 0.25, + "learning_rate": 4.2437606317456366e-05, + "loss": 1.8023, + "step": 16390 + }, + { + "epoch": 0.25, + "learning_rate": 4.242887228477666e-05, + "loss": 1.8904, + "step": 16400 + }, + { + "epoch": 0.25, + "learning_rate": 4.2420134111311293e-05, + "loss": 1.7619, + "step": 16410 + }, + { + "epoch": 0.25, + "learning_rate": 4.2411391799136304e-05, + "loss": 2.0016, + "step": 16420 + }, + { + "epoch": 0.25, + "learning_rate": 4.240264535032868e-05, + "loss": 1.5288, + "step": 16430 + }, + { + "epoch": 0.26, + "learning_rate": 4.239389476696646e-05, + "loss": 1.4601, + "step": 16440 + }, + { + "epoch": 0.26, + "learning_rate": 4.2385140051128594e-05, + "loss": 1.5915, + "step": 16450 + }, + { + "epoch": 0.26, + "learning_rate": 4.237638120489505e-05, + "loss": 1.675, + "step": 16460 + }, + { + "epoch": 0.26, + "learning_rate": 4.236761823034678e-05, + "loss": 1.7, + "step": 16470 + }, + { + "epoch": 0.26, + "learning_rate": 4.2358851129565715e-05, + "loss": 1.8357, + "step": 16480 + }, + { + "epoch": 0.26, + "learning_rate": 4.235007990463474e-05, + "loss": 1.8126, + "step": 16490 + }, + { + "epoch": 0.26, + "learning_rate": 4.234130455763775e-05, + "loss": 1.8795, + "step": 16500 + }, + { + "epoch": 0.26, + "learning_rate": 4.233252509065961e-05, + "loss": 1.7031, + "step": 16510 + }, + { + "epoch": 0.26, + "learning_rate": 4.2323741505786156e-05, + "loss": 1.8439, + "step": 16520 + }, + { + "epoch": 0.26, + "learning_rate": 4.231495380510422e-05, + "loss": 1.7563, + "step": 16530 + }, + { + "epoch": 0.26, + "learning_rate": 4.23061619907016e-05, + "loss": 1.7746, + "step": 16540 + }, + { + "epoch": 0.26, + "learning_rate": 4.229736606466707e-05, + "loss": 1.7549, + "step": 16550 + }, + { + "epoch": 0.26, + "learning_rate": 4.228856602909037e-05, + "loss": 1.793, + "step": 16560 + }, + { + "epoch": 0.26, + "learning_rate": 4.2279761886062255e-05, + "loss": 1.7028, + "step": 16570 + }, + { + "epoch": 0.26, + "learning_rate": 4.227095363767441e-05, + "loss": 2.0031, + "step": 16580 + }, + { + "epoch": 0.26, + "learning_rate": 4.226214128601953e-05, + "loss": 1.8168, + "step": 16590 + }, + { + "epoch": 0.26, + "learning_rate": 4.225332483319125e-05, + "loss": 1.7478, + "step": 16600 + }, + { + "epoch": 0.26, + "learning_rate": 4.224450428128423e-05, + "loss": 1.8152, + "step": 16610 + }, + { + "epoch": 0.26, + "learning_rate": 4.2235679632394055e-05, + "loss": 1.7568, + "step": 16620 + }, + { + "epoch": 0.26, + "learning_rate": 4.22268508886173e-05, + "loss": 1.7691, + "step": 16630 + }, + { + "epoch": 0.26, + "learning_rate": 4.221801805205152e-05, + "loss": 1.7557, + "step": 16640 + }, + { + "epoch": 0.26, + "learning_rate": 4.220918112479525e-05, + "loss": 1.786, + "step": 16650 + }, + { + "epoch": 0.26, + "learning_rate": 4.2200340108947965e-05, + "loss": 1.7086, + "step": 16660 + }, + { + "epoch": 0.26, + "learning_rate": 4.219149500661014e-05, + "loss": 1.7002, + "step": 16670 + }, + { + "epoch": 0.26, + "learning_rate": 4.218264581988321e-05, + "loss": 1.6567, + "step": 16680 + }, + { + "epoch": 0.26, + "learning_rate": 4.217379255086958e-05, + "loss": 1.6914, + "step": 16690 + }, + { + "epoch": 0.26, + "learning_rate": 4.216493520167264e-05, + "loss": 1.6897, + "step": 16700 + }, + { + "epoch": 0.26, + "learning_rate": 4.215607377439672e-05, + "loss": 2.014, + "step": 16710 + }, + { + "epoch": 0.26, + "learning_rate": 4.214720827114714e-05, + "loss": 1.9544, + "step": 16720 + }, + { + "epoch": 0.26, + "learning_rate": 4.213833869403019e-05, + "loss": 1.928, + "step": 16730 + }, + { + "epoch": 0.26, + "learning_rate": 4.212946504515311e-05, + "loss": 1.938, + "step": 16740 + }, + { + "epoch": 0.26, + "learning_rate": 4.2120587326624126e-05, + "loss": 2.0092, + "step": 16750 + }, + { + "epoch": 0.26, + "learning_rate": 4.211170554055244e-05, + "loss": 1.9373, + "step": 16760 + }, + { + "epoch": 0.26, + "learning_rate": 4.210281968904817e-05, + "loss": 1.7966, + "step": 16770 + }, + { + "epoch": 0.26, + "learning_rate": 4.209392977422245e-05, + "loss": 2.1905, + "step": 16780 + }, + { + "epoch": 0.26, + "learning_rate": 4.208503579818738e-05, + "loss": 1.8587, + "step": 16790 + }, + { + "epoch": 0.26, + "learning_rate": 4.207613776305598e-05, + "loss": 2.0832, + "step": 16800 + }, + { + "epoch": 0.26, + "learning_rate": 4.2067235670942284e-05, + "loss": 1.9328, + "step": 16810 + }, + { + "epoch": 0.26, + "learning_rate": 4.205832952396126e-05, + "loss": 1.8872, + "step": 16820 + }, + { + "epoch": 0.26, + "learning_rate": 4.204941932422885e-05, + "loss": 1.7503, + "step": 16830 + }, + { + "epoch": 0.26, + "learning_rate": 4.204050507386195e-05, + "loss": 1.7135, + "step": 16840 + }, + { + "epoch": 0.26, + "learning_rate": 4.2031586774978424e-05, + "loss": 1.6992, + "step": 16850 + }, + { + "epoch": 0.26, + "learning_rate": 4.202266442969712e-05, + "loss": 1.7776, + "step": 16860 + }, + { + "epoch": 0.26, + "learning_rate": 4.20137380401378e-05, + "loss": 1.6132, + "step": 16870 + }, + { + "epoch": 0.26, + "learning_rate": 4.2004807608421225e-05, + "loss": 1.8119, + "step": 16880 + }, + { + "epoch": 0.26, + "learning_rate": 4.199587313666911e-05, + "loss": 1.7936, + "step": 16890 + }, + { + "epoch": 0.26, + "learning_rate": 4.1986934627004114e-05, + "loss": 1.8504, + "step": 16900 + }, + { + "epoch": 0.26, + "learning_rate": 4.1977992081549874e-05, + "loss": 1.7269, + "step": 16910 + }, + { + "epoch": 0.26, + "learning_rate": 4.196904550243097e-05, + "loss": 1.7408, + "step": 16920 + }, + { + "epoch": 0.26, + "learning_rate": 4.196009489177296e-05, + "loss": 1.9128, + "step": 16930 + }, + { + "epoch": 0.26, + "learning_rate": 4.195114025170233e-05, + "loss": 1.9862, + "step": 16940 + }, + { + "epoch": 0.26, + "learning_rate": 4.194218158434655e-05, + "loss": 1.9998, + "step": 16950 + }, + { + "epoch": 0.26, + "learning_rate": 4.1933218891834034e-05, + "loss": 1.9382, + "step": 16960 + }, + { + "epoch": 0.26, + "learning_rate": 4.192425217629417e-05, + "loss": 1.7846, + "step": 16970 + }, + { + "epoch": 0.26, + "learning_rate": 4.191528143985727e-05, + "loss": 1.5135, + "step": 16980 + }, + { + "epoch": 0.26, + "learning_rate": 4.1906306684654636e-05, + "loss": 1.5438, + "step": 16990 + }, + { + "epoch": 0.26, + "learning_rate": 4.189732791281849e-05, + "loss": 2.0039, + "step": 17000 + }, + { + "epoch": 0.26, + "learning_rate": 4.1888345126482036e-05, + "loss": 1.9236, + "step": 17010 + }, + { + "epoch": 0.26, + "learning_rate": 4.1879358327779426e-05, + "loss": 1.8032, + "step": 17020 + }, + { + "epoch": 0.26, + "learning_rate": 4.187036751884576e-05, + "loss": 1.6551, + "step": 17030 + }, + { + "epoch": 0.26, + "learning_rate": 4.186137270181707e-05, + "loss": 1.5531, + "step": 17040 + }, + { + "epoch": 0.26, + "learning_rate": 4.18523738788304e-05, + "loss": 1.4822, + "step": 17050 + }, + { + "epoch": 0.26, + "learning_rate": 4.184337105202367e-05, + "loss": 1.421, + "step": 17060 + }, + { + "epoch": 0.26, + "learning_rate": 4.183436422353582e-05, + "loss": 1.4202, + "step": 17070 + }, + { + "epoch": 0.26, + "learning_rate": 4.182535339550669e-05, + "loss": 1.4155, + "step": 17080 + }, + { + "epoch": 0.27, + "learning_rate": 4.181633857007711e-05, + "loss": 1.5707, + "step": 17090 + }, + { + "epoch": 0.27, + "learning_rate": 4.1807319749388826e-05, + "loss": 1.8303, + "step": 17100 + }, + { + "epoch": 0.27, + "learning_rate": 4.179829693558454e-05, + "loss": 1.7824, + "step": 17110 + }, + { + "epoch": 0.27, + "learning_rate": 4.178927013080792e-05, + "loss": 1.7659, + "step": 17120 + }, + { + "epoch": 0.27, + "learning_rate": 4.1780239337203576e-05, + "loss": 1.7013, + "step": 17130 + }, + { + "epoch": 0.27, + "learning_rate": 4.1771204556917056e-05, + "loss": 1.7361, + "step": 17140 + }, + { + "epoch": 0.27, + "learning_rate": 4.176216579209487e-05, + "loss": 1.6975, + "step": 17150 + }, + { + "epoch": 0.27, + "learning_rate": 4.175312304488443e-05, + "loss": 1.7941, + "step": 17160 + }, + { + "epoch": 0.27, + "learning_rate": 4.1744076317434176e-05, + "loss": 1.7507, + "step": 17170 + }, + { + "epoch": 0.27, + "learning_rate": 4.173502561189343e-05, + "loss": 1.6078, + "step": 17180 + }, + { + "epoch": 0.27, + "learning_rate": 4.172597093041247e-05, + "loss": 1.5817, + "step": 17190 + }, + { + "epoch": 0.27, + "learning_rate": 4.1716912275142525e-05, + "loss": 1.4377, + "step": 17200 + }, + { + "epoch": 0.27, + "learning_rate": 4.170784964823577e-05, + "loss": 1.9275, + "step": 17210 + }, + { + "epoch": 0.27, + "learning_rate": 4.169878305184533e-05, + "loss": 1.7549, + "step": 17220 + }, + { + "epoch": 0.27, + "learning_rate": 4.1689712488125255e-05, + "loss": 1.6684, + "step": 17230 + }, + { + "epoch": 0.27, + "learning_rate": 4.168063795923055e-05, + "loss": 1.7631, + "step": 17240 + }, + { + "epoch": 0.27, + "learning_rate": 4.167155946731717e-05, + "loss": 1.7593, + "step": 17250 + }, + { + "epoch": 0.27, + "learning_rate": 4.166247701454198e-05, + "loss": 1.875, + "step": 17260 + }, + { + "epoch": 0.27, + "learning_rate": 4.165339060306282e-05, + "loss": 1.8556, + "step": 17270 + }, + { + "epoch": 0.27, + "learning_rate": 4.164430023503846e-05, + "loss": 1.8407, + "step": 17280 + }, + { + "epoch": 0.27, + "learning_rate": 4.16352059126286e-05, + "loss": 1.8785, + "step": 17290 + }, + { + "epoch": 0.27, + "learning_rate": 4.1626107637993886e-05, + "loss": 1.8172, + "step": 17300 + }, + { + "epoch": 0.27, + "learning_rate": 4.1617005413295915e-05, + "loss": 1.7869, + "step": 17310 + }, + { + "epoch": 0.27, + "learning_rate": 4.16078992406972e-05, + "loss": 1.7307, + "step": 17320 + }, + { + "epoch": 0.27, + "learning_rate": 4.159878912236121e-05, + "loss": 1.8682, + "step": 17330 + }, + { + "epoch": 0.27, + "learning_rate": 4.158967506045234e-05, + "loss": 1.9418, + "step": 17340 + }, + { + "epoch": 0.27, + "learning_rate": 4.158055705713593e-05, + "loss": 1.9209, + "step": 17350 + }, + { + "epoch": 0.27, + "learning_rate": 4.157143511457825e-05, + "loss": 1.8189, + "step": 17360 + }, + { + "epoch": 0.27, + "learning_rate": 4.1562309234946514e-05, + "loss": 1.8056, + "step": 17370 + }, + { + "epoch": 0.27, + "learning_rate": 4.155317942040887e-05, + "loss": 1.7732, + "step": 17380 + }, + { + "epoch": 0.27, + "learning_rate": 4.1544045673134377e-05, + "loss": 1.7047, + "step": 17390 + }, + { + "epoch": 0.27, + "learning_rate": 4.153490799529307e-05, + "loss": 1.956, + "step": 17400 + }, + { + "epoch": 0.27, + "learning_rate": 4.1525766389055895e-05, + "loss": 1.5792, + "step": 17410 + }, + { + "epoch": 0.27, + "learning_rate": 4.151662085659471e-05, + "loss": 1.5705, + "step": 17420 + }, + { + "epoch": 0.27, + "learning_rate": 4.150747140008236e-05, + "loss": 1.8104, + "step": 17430 + }, + { + "epoch": 0.27, + "learning_rate": 4.149831802169257e-05, + "loss": 1.8144, + "step": 17440 + }, + { + "epoch": 0.27, + "learning_rate": 4.1489160723600026e-05, + "loss": 1.8224, + "step": 17450 + }, + { + "epoch": 0.27, + "learning_rate": 4.147999950798033e-05, + "loss": 1.9291, + "step": 17460 + }, + { + "epoch": 0.27, + "learning_rate": 4.1470834377010024e-05, + "loss": 1.9241, + "step": 17470 + }, + { + "epoch": 0.27, + "learning_rate": 4.1461665332866595e-05, + "loss": 1.9035, + "step": 17480 + }, + { + "epoch": 0.27, + "learning_rate": 4.1452492377728415e-05, + "loss": 1.7754, + "step": 17490 + }, + { + "epoch": 0.27, + "learning_rate": 4.1443315513774825e-05, + "loss": 1.926, + "step": 17500 + }, + { + "epoch": 0.27, + "learning_rate": 4.143413474318608e-05, + "loss": 1.8511, + "step": 17510 + }, + { + "epoch": 0.27, + "learning_rate": 4.1424950068143374e-05, + "loss": 1.9259, + "step": 17520 + }, + { + "epoch": 0.27, + "learning_rate": 4.141576149082881e-05, + "loss": 1.9261, + "step": 17530 + }, + { + "epoch": 0.27, + "learning_rate": 4.140656901342543e-05, + "loss": 1.7308, + "step": 17540 + }, + { + "epoch": 0.27, + "learning_rate": 4.139737263811722e-05, + "loss": 1.9385, + "step": 17550 + }, + { + "epoch": 0.27, + "learning_rate": 4.138817236708904e-05, + "loss": 1.8203, + "step": 17560 + }, + { + "epoch": 0.27, + "learning_rate": 4.137896820252672e-05, + "loss": 1.7017, + "step": 17570 + }, + { + "epoch": 0.27, + "learning_rate": 4.1369760146617014e-05, + "loss": 1.7135, + "step": 17580 + }, + { + "epoch": 0.27, + "learning_rate": 4.136054820154759e-05, + "loss": 1.5351, + "step": 17590 + }, + { + "epoch": 0.27, + "learning_rate": 4.135133236950702e-05, + "loss": 1.9286, + "step": 17600 + }, + { + "epoch": 0.27, + "learning_rate": 4.134211265268484e-05, + "loss": 1.8042, + "step": 17610 + }, + { + "epoch": 0.27, + "learning_rate": 4.133288905327148e-05, + "loss": 1.5988, + "step": 17620 + }, + { + "epoch": 0.27, + "learning_rate": 4.1323661573458306e-05, + "loss": 1.7819, + "step": 17630 + }, + { + "epoch": 0.27, + "learning_rate": 4.13144302154376e-05, + "loss": 1.968, + "step": 17640 + }, + { + "epoch": 0.27, + "learning_rate": 4.130519498140256e-05, + "loss": 1.6891, + "step": 17650 + }, + { + "epoch": 0.27, + "learning_rate": 4.129595587354731e-05, + "loss": 1.8239, + "step": 17660 + }, + { + "epoch": 0.27, + "learning_rate": 4.12867128940669e-05, + "loss": 1.7686, + "step": 17670 + }, + { + "epoch": 0.27, + "learning_rate": 4.12774660451573e-05, + "loss": 1.8351, + "step": 17680 + }, + { + "epoch": 0.27, + "learning_rate": 4.126821532901538e-05, + "loss": 1.8614, + "step": 17690 + }, + { + "epoch": 0.27, + "learning_rate": 4.125896074783896e-05, + "loss": 1.818, + "step": 17700 + }, + { + "epoch": 0.27, + "learning_rate": 4.124970230382676e-05, + "loss": 1.8144, + "step": 17710 + }, + { + "epoch": 0.27, + "learning_rate": 4.124043999917841e-05, + "loss": 1.7685, + "step": 17720 + }, + { + "epoch": 0.28, + "learning_rate": 4.1231173836094464e-05, + "loss": 1.843, + "step": 17730 + }, + { + "epoch": 0.28, + "learning_rate": 4.122190381677641e-05, + "loss": 1.8512, + "step": 17740 + }, + { + "epoch": 0.28, + "learning_rate": 4.1212629943426616e-05, + "loss": 1.7045, + "step": 17750 + }, + { + "epoch": 0.28, + "learning_rate": 4.12033522182484e-05, + "loss": 1.6487, + "step": 17760 + }, + { + "epoch": 0.28, + "learning_rate": 4.1194070643445994e-05, + "loss": 1.7144, + "step": 17770 + }, + { + "epoch": 0.28, + "learning_rate": 4.118478522122451e-05, + "loss": 1.8606, + "step": 17780 + }, + { + "epoch": 0.28, + "learning_rate": 4.117549595379001e-05, + "loss": 1.9938, + "step": 17790 + }, + { + "epoch": 0.28, + "learning_rate": 4.1166202843349456e-05, + "loss": 1.7362, + "step": 17800 + }, + { + "epoch": 0.28, + "learning_rate": 4.1156905892110725e-05, + "loss": 1.799, + "step": 17810 + }, + { + "epoch": 0.28, + "learning_rate": 4.1147605102282595e-05, + "loss": 1.8124, + "step": 17820 + }, + { + "epoch": 0.28, + "learning_rate": 4.113830047607478e-05, + "loss": 1.6871, + "step": 17830 + }, + { + "epoch": 0.28, + "learning_rate": 4.112899201569787e-05, + "loss": 1.7202, + "step": 17840 + }, + { + "epoch": 0.28, + "learning_rate": 4.11196797233634e-05, + "loss": 1.6538, + "step": 17850 + }, + { + "epoch": 0.28, + "learning_rate": 4.111036360128381e-05, + "loss": 1.6437, + "step": 17860 + }, + { + "epoch": 0.28, + "learning_rate": 4.1101043651672444e-05, + "loss": 1.6758, + "step": 17870 + }, + { + "epoch": 0.28, + "learning_rate": 4.109171987674353e-05, + "loss": 1.6477, + "step": 17880 + }, + { + "epoch": 0.28, + "learning_rate": 4.108239227871226e-05, + "loss": 1.7014, + "step": 17890 + }, + { + "epoch": 0.28, + "learning_rate": 4.107306085979467e-05, + "loss": 1.8359, + "step": 17900 + }, + { + "epoch": 0.28, + "learning_rate": 4.106372562220777e-05, + "loss": 1.7704, + "step": 17910 + }, + { + "epoch": 0.28, + "learning_rate": 4.105438656816942e-05, + "loss": 1.7824, + "step": 17920 + }, + { + "epoch": 0.28, + "learning_rate": 4.1045043699898413e-05, + "loss": 1.8951, + "step": 17930 + }, + { + "epoch": 0.28, + "learning_rate": 4.103569701961447e-05, + "loss": 1.7652, + "step": 17940 + }, + { + "epoch": 0.28, + "learning_rate": 4.102634652953816e-05, + "loss": 1.7841, + "step": 17950 + }, + { + "epoch": 0.28, + "learning_rate": 4.101699223189101e-05, + "loss": 1.7326, + "step": 17960 + }, + { + "epoch": 0.28, + "learning_rate": 4.100763412889543e-05, + "loss": 1.8165, + "step": 17970 + }, + { + "epoch": 0.28, + "learning_rate": 4.0998272222774734e-05, + "loss": 1.8275, + "step": 17980 + }, + { + "epoch": 0.28, + "learning_rate": 4.0988906515753143e-05, + "loss": 1.9799, + "step": 17990 + }, + { + "epoch": 0.28, + "learning_rate": 4.097953701005578e-05, + "loss": 1.605, + "step": 18000 + }, + { + "epoch": 0.28, + "learning_rate": 4.097016370790868e-05, + "loss": 1.7013, + "step": 18010 + }, + { + "epoch": 0.28, + "learning_rate": 4.096078661153875e-05, + "loss": 1.6276, + "step": 18020 + }, + { + "epoch": 0.28, + "learning_rate": 4.0951405723173836e-05, + "loss": 1.5157, + "step": 18030 + }, + { + "epoch": 0.28, + "learning_rate": 4.0942021045042664e-05, + "loss": 1.7399, + "step": 18040 + }, + { + "epoch": 0.28, + "learning_rate": 4.0932632579374855e-05, + "loss": 1.7422, + "step": 18050 + }, + { + "epoch": 0.28, + "learning_rate": 4.0923240328400946e-05, + "loss": 1.7179, + "step": 18060 + }, + { + "epoch": 0.28, + "learning_rate": 4.0913844294352374e-05, + "loss": 1.8205, + "step": 18070 + }, + { + "epoch": 0.28, + "learning_rate": 4.0904444479461456e-05, + "loss": 1.6875, + "step": 18080 + }, + { + "epoch": 0.28, + "learning_rate": 4.0895040885961426e-05, + "loss": 1.7905, + "step": 18090 + }, + { + "epoch": 0.28, + "learning_rate": 4.088563351608641e-05, + "loss": 1.6654, + "step": 18100 + }, + { + "epoch": 0.28, + "learning_rate": 4.087622237207141e-05, + "loss": 1.9952, + "step": 18110 + }, + { + "epoch": 0.28, + "learning_rate": 4.086680745615237e-05, + "loss": 1.8413, + "step": 18120 + }, + { + "epoch": 0.28, + "learning_rate": 4.0857388770566086e-05, + "loss": 1.6973, + "step": 18130 + }, + { + "epoch": 0.28, + "learning_rate": 4.084796631755028e-05, + "loss": 1.8969, + "step": 18140 + }, + { + "epoch": 0.28, + "learning_rate": 4.083854009934354e-05, + "loss": 1.9469, + "step": 18150 + }, + { + "epoch": 0.28, + "learning_rate": 4.082911011818539e-05, + "loss": 1.7181, + "step": 18160 + }, + { + "epoch": 0.28, + "learning_rate": 4.08196763763162e-05, + "loss": 1.837, + "step": 18170 + }, + { + "epoch": 0.28, + "learning_rate": 4.081023887597727e-05, + "loss": 1.7704, + "step": 18180 + }, + { + "epoch": 0.28, + "learning_rate": 4.0800797619410775e-05, + "loss": 1.8593, + "step": 18190 + }, + { + "epoch": 0.28, + "learning_rate": 4.079135260885979e-05, + "loss": 1.8582, + "step": 18200 + }, + { + "epoch": 0.28, + "learning_rate": 4.0781903846568283e-05, + "loss": 1.694, + "step": 18210 + }, + { + "epoch": 0.28, + "learning_rate": 4.0772451334781094e-05, + "loss": 1.8066, + "step": 18220 + }, + { + "epoch": 0.28, + "learning_rate": 4.0762995075743986e-05, + "loss": 1.7904, + "step": 18230 + }, + { + "epoch": 0.28, + "learning_rate": 4.075353507170359e-05, + "loss": 1.5556, + "step": 18240 + }, + { + "epoch": 0.28, + "learning_rate": 4.074407132490743e-05, + "loss": 2.0854, + "step": 18250 + }, + { + "epoch": 0.28, + "learning_rate": 4.073460383760392e-05, + "loss": 2.2883, + "step": 18260 + }, + { + "epoch": 0.28, + "learning_rate": 4.0725132612042374e-05, + "loss": 1.9893, + "step": 18270 + }, + { + "epoch": 0.28, + "learning_rate": 4.071565765047298e-05, + "loss": 1.6285, + "step": 18280 + }, + { + "epoch": 0.28, + "learning_rate": 4.070617895514681e-05, + "loss": 1.6268, + "step": 18290 + }, + { + "epoch": 0.28, + "learning_rate": 4.069669652831584e-05, + "loss": 1.7356, + "step": 18300 + }, + { + "epoch": 0.28, + "learning_rate": 4.068721037223292e-05, + "loss": 1.8117, + "step": 18310 + }, + { + "epoch": 0.28, + "learning_rate": 4.067772048915178e-05, + "loss": 1.9067, + "step": 18320 + }, + { + "epoch": 0.28, + "learning_rate": 4.0668226881327064e-05, + "loss": 2.0273, + "step": 18330 + }, + { + "epoch": 0.28, + "learning_rate": 4.0658729551014275e-05, + "loss": 1.9985, + "step": 18340 + }, + { + "epoch": 0.28, + "learning_rate": 4.06492285004698e-05, + "loss": 1.8458, + "step": 18350 + }, + { + "epoch": 0.28, + "learning_rate": 4.063972373195092e-05, + "loss": 2.0248, + "step": 18360 + }, + { + "epoch": 0.29, + "learning_rate": 4.0630215247715806e-05, + "loss": 1.8124, + "step": 18370 + }, + { + "epoch": 0.29, + "learning_rate": 4.062070305002349e-05, + "loss": 1.6876, + "step": 18380 + }, + { + "epoch": 0.29, + "learning_rate": 4.061118714113391e-05, + "loss": 1.8265, + "step": 18390 + }, + { + "epoch": 0.29, + "learning_rate": 4.060166752330786e-05, + "loss": 1.7734, + "step": 18400 + }, + { + "epoch": 0.29, + "learning_rate": 4.059214419880705e-05, + "loss": 1.8269, + "step": 18410 + }, + { + "epoch": 0.29, + "learning_rate": 4.058261716989402e-05, + "loss": 1.8366, + "step": 18420 + }, + { + "epoch": 0.29, + "learning_rate": 4.0573086438832254e-05, + "loss": 1.8681, + "step": 18430 + }, + { + "epoch": 0.29, + "learning_rate": 4.056355200788606e-05, + "loss": 1.7621, + "step": 18440 + }, + { + "epoch": 0.29, + "learning_rate": 4.055401387932065e-05, + "loss": 1.6917, + "step": 18450 + }, + { + "epoch": 0.29, + "learning_rate": 4.0544472055402116e-05, + "loss": 1.7434, + "step": 18460 + }, + { + "epoch": 0.29, + "learning_rate": 4.0534926538397426e-05, + "loss": 1.9047, + "step": 18470 + }, + { + "epoch": 0.29, + "learning_rate": 4.052537733057441e-05, + "loss": 1.7002, + "step": 18480 + }, + { + "epoch": 0.29, + "learning_rate": 4.0515824434201796e-05, + "loss": 1.782, + "step": 18490 + }, + { + "epoch": 0.29, + "learning_rate": 4.050626785154918e-05, + "loss": 1.9203, + "step": 18500 + }, + { + "epoch": 0.29, + "learning_rate": 4.049670758488704e-05, + "loss": 1.7146, + "step": 18510 + }, + { + "epoch": 0.29, + "learning_rate": 4.048714363648671e-05, + "loss": 1.4322, + "step": 18520 + }, + { + "epoch": 0.29, + "learning_rate": 4.047757600862042e-05, + "loss": 1.7889, + "step": 18530 + }, + { + "epoch": 0.29, + "learning_rate": 4.0468004703561264e-05, + "loss": 1.9318, + "step": 18540 + }, + { + "epoch": 0.29, + "learning_rate": 4.04584297235832e-05, + "loss": 1.7388, + "step": 18550 + }, + { + "epoch": 0.29, + "learning_rate": 4.044885107096109e-05, + "loss": 1.8296, + "step": 18560 + }, + { + "epoch": 0.29, + "learning_rate": 4.0439268747970645e-05, + "loss": 2.0499, + "step": 18570 + }, + { + "epoch": 0.29, + "learning_rate": 4.0429682756888436e-05, + "loss": 1.9024, + "step": 18580 + }, + { + "epoch": 0.29, + "learning_rate": 4.042009309999194e-05, + "loss": 1.8188, + "step": 18590 + }, + { + "epoch": 0.29, + "learning_rate": 4.041049977955948e-05, + "loss": 1.7791, + "step": 18600 + }, + { + "epoch": 0.29, + "learning_rate": 4.040090279787025e-05, + "loss": 1.7076, + "step": 18610 + }, + { + "epoch": 0.29, + "learning_rate": 4.039130215720433e-05, + "loss": 1.6423, + "step": 18620 + }, + { + "epoch": 0.29, + "learning_rate": 4.038169785984265e-05, + "loss": 1.6914, + "step": 18630 + }, + { + "epoch": 0.29, + "learning_rate": 4.037208990806702e-05, + "loss": 1.7897, + "step": 18640 + }, + { + "epoch": 0.29, + "learning_rate": 4.0362478304160124e-05, + "loss": 1.7449, + "step": 18650 + }, + { + "epoch": 0.29, + "learning_rate": 4.03528630504055e-05, + "loss": 1.7032, + "step": 18660 + }, + { + "epoch": 0.29, + "learning_rate": 4.0343244149087554e-05, + "loss": 1.6831, + "step": 18670 + }, + { + "epoch": 0.29, + "learning_rate": 4.033362160249157e-05, + "loss": 1.6692, + "step": 18680 + }, + { + "epoch": 0.29, + "learning_rate": 4.032399541290368e-05, + "loss": 1.7421, + "step": 18690 + }, + { + "epoch": 0.29, + "learning_rate": 4.031436558261091e-05, + "loss": 2.0957, + "step": 18700 + }, + { + "epoch": 0.29, + "learning_rate": 4.030473211390113e-05, + "loss": 1.8474, + "step": 18710 + }, + { + "epoch": 0.29, + "learning_rate": 4.0295095009063076e-05, + "loss": 1.7627, + "step": 18720 + }, + { + "epoch": 0.29, + "learning_rate": 4.028545427038635e-05, + "loss": 1.6315, + "step": 18730 + }, + { + "epoch": 0.29, + "learning_rate": 4.0275809900161404e-05, + "loss": 1.9766, + "step": 18740 + }, + { + "epoch": 0.29, + "learning_rate": 4.026616190067958e-05, + "loss": 2.0223, + "step": 18750 + }, + { + "epoch": 0.29, + "learning_rate": 4.025651027423308e-05, + "loss": 1.735, + "step": 18760 + }, + { + "epoch": 0.29, + "learning_rate": 4.024685502311494e-05, + "loss": 1.875, + "step": 18770 + }, + { + "epoch": 0.29, + "learning_rate": 4.023719614961907e-05, + "loss": 1.73, + "step": 18780 + }, + { + "epoch": 0.29, + "learning_rate": 4.022753365604026e-05, + "loss": 1.8447, + "step": 18790 + }, + { + "epoch": 0.29, + "learning_rate": 4.021786754467413e-05, + "loss": 1.7906, + "step": 18800 + }, + { + "epoch": 0.29, + "learning_rate": 4.0208197817817186e-05, + "loss": 1.6478, + "step": 18810 + }, + { + "epoch": 0.29, + "learning_rate": 4.019852447776676e-05, + "loss": 1.7192, + "step": 18820 + }, + { + "epoch": 0.29, + "learning_rate": 4.018884752682109e-05, + "loss": 1.6251, + "step": 18830 + }, + { + "epoch": 0.29, + "learning_rate": 4.017916696727924e-05, + "loss": 1.7682, + "step": 18840 + }, + { + "epoch": 0.29, + "learning_rate": 4.0169482801441114e-05, + "loss": 1.7846, + "step": 18850 + }, + { + "epoch": 0.29, + "learning_rate": 4.0159795031607516e-05, + "loss": 1.7793, + "step": 18860 + }, + { + "epoch": 0.29, + "learning_rate": 4.015010366008007e-05, + "loss": 1.7393, + "step": 18870 + }, + { + "epoch": 0.29, + "learning_rate": 4.014040868916129e-05, + "loss": 1.8395, + "step": 18880 + }, + { + "epoch": 0.29, + "learning_rate": 4.0130710121154517e-05, + "loss": 1.7264, + "step": 18890 + }, + { + "epoch": 0.29, + "learning_rate": 4.0121007958363946e-05, + "loss": 1.8341, + "step": 18900 + }, + { + "epoch": 0.29, + "learning_rate": 4.011130220309465e-05, + "loss": 1.78, + "step": 18910 + }, + { + "epoch": 0.29, + "learning_rate": 4.010159285765253e-05, + "loss": 2.0015, + "step": 18920 + }, + { + "epoch": 0.29, + "learning_rate": 4.0091879924344366e-05, + "loss": 1.785, + "step": 18930 + }, + { + "epoch": 0.29, + "learning_rate": 4.008216340547777e-05, + "loss": 1.7484, + "step": 18940 + }, + { + "epoch": 0.29, + "learning_rate": 4.00724433033612e-05, + "loss": 1.7765, + "step": 18950 + }, + { + "epoch": 0.29, + "learning_rate": 4.006271962030399e-05, + "loss": 1.7668, + "step": 18960 + }, + { + "epoch": 0.29, + "learning_rate": 4.005299235861631e-05, + "loss": 1.8168, + "step": 18970 + }, + { + "epoch": 0.29, + "learning_rate": 4.0043261520609174e-05, + "loss": 1.7325, + "step": 18980 + }, + { + "epoch": 0.29, + "learning_rate": 4.0033527108594465e-05, + "loss": 1.7765, + "step": 18990 + }, + { + "epoch": 0.29, + "learning_rate": 4.002378912488489e-05, + "loss": 1.8701, + "step": 19000 + }, + { + "epoch": 0.29, + "learning_rate": 4.001404757179403e-05, + "loss": 1.7496, + "step": 19010 + }, + { + "epoch": 0.3, + "learning_rate": 4.00043024516363e-05, + "loss": 1.793, + "step": 19020 + }, + { + "epoch": 0.3, + "learning_rate": 3.999455376672697e-05, + "loss": 1.7605, + "step": 19030 + }, + { + "epoch": 0.3, + "learning_rate": 3.998480151938214e-05, + "loss": 1.8325, + "step": 19040 + }, + { + "epoch": 0.3, + "learning_rate": 3.997504571191877e-05, + "loss": 1.8125, + "step": 19050 + }, + { + "epoch": 0.3, + "learning_rate": 3.996528634665466e-05, + "loss": 1.7646, + "step": 19060 + }, + { + "epoch": 0.3, + "learning_rate": 3.995552342590848e-05, + "loss": 1.6872, + "step": 19070 + }, + { + "epoch": 0.3, + "learning_rate": 3.9945756951999695e-05, + "loss": 1.8314, + "step": 19080 + }, + { + "epoch": 0.3, + "learning_rate": 3.993598692724867e-05, + "loss": 1.9722, + "step": 19090 + }, + { + "epoch": 0.3, + "learning_rate": 3.992621335397657e-05, + "loss": 1.7721, + "step": 19100 + }, + { + "epoch": 0.3, + "learning_rate": 3.991643623450542e-05, + "loss": 1.7816, + "step": 19110 + }, + { + "epoch": 0.3, + "learning_rate": 3.99066555711581e-05, + "loss": 1.8161, + "step": 19120 + }, + { + "epoch": 0.3, + "learning_rate": 3.98968713662583e-05, + "loss": 1.7745, + "step": 19130 + }, + { + "epoch": 0.3, + "learning_rate": 3.988708362213059e-05, + "loss": 1.7645, + "step": 19140 + }, + { + "epoch": 0.3, + "learning_rate": 3.987729234110035e-05, + "loss": 1.731, + "step": 19150 + }, + { + "epoch": 0.3, + "learning_rate": 3.9867497525493815e-05, + "loss": 1.7704, + "step": 19160 + }, + { + "epoch": 0.3, + "learning_rate": 3.9857699177638056e-05, + "loss": 1.7141, + "step": 19170 + }, + { + "epoch": 0.3, + "learning_rate": 3.984789729986098e-05, + "loss": 1.7778, + "step": 19180 + }, + { + "epoch": 0.3, + "learning_rate": 3.9838091894491345e-05, + "loss": 1.7421, + "step": 19190 + }, + { + "epoch": 0.3, + "learning_rate": 3.982828296385872e-05, + "loss": 1.7052, + "step": 19200 + }, + { + "epoch": 0.3, + "learning_rate": 3.981847051029356e-05, + "loss": 1.7307, + "step": 19210 + }, + { + "epoch": 0.3, + "learning_rate": 3.980865453612711e-05, + "loss": 1.8335, + "step": 19220 + }, + { + "epoch": 0.3, + "learning_rate": 3.979883504369146e-05, + "loss": 1.8264, + "step": 19230 + }, + { + "epoch": 0.3, + "learning_rate": 3.9789012035319554e-05, + "loss": 1.7544, + "step": 19240 + }, + { + "epoch": 0.3, + "learning_rate": 3.9779185513345164e-05, + "loss": 1.7298, + "step": 19250 + }, + { + "epoch": 0.3, + "learning_rate": 3.9769355480102885e-05, + "loss": 1.7222, + "step": 19260 + }, + { + "epoch": 0.3, + "learning_rate": 3.975952193792817e-05, + "loss": 1.6603, + "step": 19270 + }, + { + "epoch": 0.3, + "learning_rate": 3.974968488915728e-05, + "loss": 1.7057, + "step": 19280 + }, + { + "epoch": 0.3, + "learning_rate": 3.973984433612732e-05, + "loss": 1.7883, + "step": 19290 + }, + { + "epoch": 0.3, + "learning_rate": 3.9730000281176226e-05, + "loss": 1.7152, + "step": 19300 + }, + { + "epoch": 0.3, + "learning_rate": 3.972015272664278e-05, + "loss": 1.7797, + "step": 19310 + }, + { + "epoch": 0.3, + "learning_rate": 3.9710301674866576e-05, + "loss": 1.8524, + "step": 19320 + }, + { + "epoch": 0.3, + "learning_rate": 3.970044712818805e-05, + "loss": 1.7798, + "step": 19330 + }, + { + "epoch": 0.3, + "learning_rate": 3.9690589088948453e-05, + "loss": 1.8383, + "step": 19340 + }, + { + "epoch": 0.3, + "learning_rate": 3.9680727559489895e-05, + "loss": 1.735, + "step": 19350 + }, + { + "epoch": 0.3, + "learning_rate": 3.967086254215527e-05, + "loss": 1.7415, + "step": 19360 + }, + { + "epoch": 0.3, + "learning_rate": 3.9660994039288365e-05, + "loss": 1.8081, + "step": 19370 + }, + { + "epoch": 0.3, + "learning_rate": 3.965112205323373e-05, + "loss": 1.8115, + "step": 19380 + }, + { + "epoch": 0.3, + "learning_rate": 3.964124658633678e-05, + "loss": 1.7512, + "step": 19390 + }, + { + "epoch": 0.3, + "learning_rate": 3.963136764094375e-05, + "loss": 1.775, + "step": 19400 + }, + { + "epoch": 0.3, + "learning_rate": 3.962148521940169e-05, + "loss": 1.9015, + "step": 19410 + }, + { + "epoch": 0.3, + "learning_rate": 3.9611599324058505e-05, + "loss": 1.8619, + "step": 19420 + }, + { + "epoch": 0.3, + "learning_rate": 3.960170995726289e-05, + "loss": 1.8775, + "step": 19430 + }, + { + "epoch": 0.3, + "learning_rate": 3.9591817121364374e-05, + "loss": 1.9008, + "step": 19440 + }, + { + "epoch": 0.3, + "learning_rate": 3.9581920818713325e-05, + "loss": 1.9972, + "step": 19450 + }, + { + "epoch": 0.3, + "learning_rate": 3.9572021051660937e-05, + "loss": 1.8781, + "step": 19460 + }, + { + "epoch": 0.3, + "learning_rate": 3.956211782255919e-05, + "loss": 1.9165, + "step": 19470 + }, + { + "epoch": 0.3, + "learning_rate": 3.955221113376094e-05, + "loss": 1.6503, + "step": 19480 + }, + { + "epoch": 0.3, + "learning_rate": 3.954230098761982e-05, + "loss": 1.7148, + "step": 19490 + }, + { + "epoch": 0.3, + "learning_rate": 3.953238738649031e-05, + "loss": 1.9949, + "step": 19500 + }, + { + "epoch": 0.3, + "learning_rate": 3.95224703327277e-05, + "loss": 1.7523, + "step": 19510 + }, + { + "epoch": 0.3, + "learning_rate": 3.9512549828688096e-05, + "loss": 1.4507, + "step": 19520 + }, + { + "epoch": 0.3, + "learning_rate": 3.9502625876728444e-05, + "loss": 1.7698, + "step": 19530 + }, + { + "epoch": 0.3, + "learning_rate": 3.949269847920649e-05, + "loss": 1.7696, + "step": 19540 + }, + { + "epoch": 0.3, + "learning_rate": 3.94827676384808e-05, + "loss": 1.7032, + "step": 19550 + }, + { + "epoch": 0.3, + "learning_rate": 3.947283335691078e-05, + "loss": 1.7362, + "step": 19560 + }, + { + "epoch": 0.3, + "learning_rate": 3.9462895636856606e-05, + "loss": 1.6577, + "step": 19570 + }, + { + "epoch": 0.3, + "learning_rate": 3.945295448067933e-05, + "loss": 1.5756, + "step": 19580 + }, + { + "epoch": 0.3, + "learning_rate": 3.944300989074077e-05, + "loss": 2.3291, + "step": 19590 + }, + { + "epoch": 0.3, + "learning_rate": 3.9433061869403596e-05, + "loss": 1.8899, + "step": 19600 + }, + { + "epoch": 0.3, + "learning_rate": 3.942311041903127e-05, + "loss": 1.9932, + "step": 19610 + }, + { + "epoch": 0.3, + "learning_rate": 3.9413155541988086e-05, + "loss": 1.924, + "step": 19620 + }, + { + "epoch": 0.3, + "learning_rate": 3.9403197240639134e-05, + "loss": 1.8888, + "step": 19630 + }, + { + "epoch": 0.3, + "learning_rate": 3.939323551735033e-05, + "loss": 1.7904, + "step": 19640 + }, + { + "epoch": 0.3, + "learning_rate": 3.93832703744884e-05, + "loss": 1.8913, + "step": 19650 + }, + { + "epoch": 0.31, + "learning_rate": 3.9373301814420885e-05, + "loss": 1.8015, + "step": 19660 + }, + { + "epoch": 0.31, + "learning_rate": 3.936332983951613e-05, + "loss": 1.8726, + "step": 19670 + }, + { + "epoch": 0.31, + "learning_rate": 3.9353354452143296e-05, + "loss": 2.3297, + "step": 19680 + }, + { + "epoch": 0.31, + "learning_rate": 3.934337565467236e-05, + "loss": 2.3726, + "step": 19690 + }, + { + "epoch": 0.31, + "learning_rate": 3.9333393449474096e-05, + "loss": 2.2656, + "step": 19700 + }, + { + "epoch": 0.31, + "learning_rate": 3.932340783892011e-05, + "loss": 1.8524, + "step": 19710 + }, + { + "epoch": 0.31, + "learning_rate": 3.9313418825382797e-05, + "loss": 1.8907, + "step": 19720 + }, + { + "epoch": 0.31, + "learning_rate": 3.930342641123536e-05, + "loss": 1.6221, + "step": 19730 + }, + { + "epoch": 0.31, + "learning_rate": 3.929343059885182e-05, + "loss": 1.6913, + "step": 19740 + }, + { + "epoch": 0.31, + "learning_rate": 3.9283431390607e-05, + "loss": 1.7511, + "step": 19750 + }, + { + "epoch": 0.31, + "learning_rate": 3.9273428788876545e-05, + "loss": 1.775, + "step": 19760 + }, + { + "epoch": 0.31, + "learning_rate": 3.926342279603686e-05, + "loss": 1.7368, + "step": 19770 + }, + { + "epoch": 0.31, + "learning_rate": 3.925341341446524e-05, + "loss": 1.8073, + "step": 19780 + }, + { + "epoch": 0.31, + "learning_rate": 3.924340064653968e-05, + "loss": 1.683, + "step": 19790 + }, + { + "epoch": 0.31, + "learning_rate": 3.923338449463906e-05, + "loss": 2.0044, + "step": 19800 + }, + { + "epoch": 0.31, + "learning_rate": 3.922336496114304e-05, + "loss": 1.8457, + "step": 19810 + }, + { + "epoch": 0.31, + "learning_rate": 3.9213342048432064e-05, + "loss": 1.9592, + "step": 19820 + }, + { + "epoch": 0.31, + "learning_rate": 3.920331575888742e-05, + "loss": 2.2334, + "step": 19830 + }, + { + "epoch": 0.31, + "learning_rate": 3.919328609489115e-05, + "loss": 2.165, + "step": 19840 + }, + { + "epoch": 0.31, + "learning_rate": 3.9183253058826126e-05, + "loss": 1.883, + "step": 19850 + }, + { + "epoch": 0.31, + "learning_rate": 3.917321665307602e-05, + "loss": 1.8251, + "step": 19860 + }, + { + "epoch": 0.31, + "learning_rate": 3.9163176880025306e-05, + "loss": 1.6437, + "step": 19870 + }, + { + "epoch": 0.31, + "learning_rate": 3.915313374205925e-05, + "loss": 1.6483, + "step": 19880 + }, + { + "epoch": 0.31, + "learning_rate": 3.914308724156392e-05, + "loss": 1.6747, + "step": 19890 + }, + { + "epoch": 0.31, + "learning_rate": 3.9133037380926174e-05, + "loss": 1.7958, + "step": 19900 + }, + { + "epoch": 0.31, + "learning_rate": 3.912298416253369e-05, + "loss": 1.8711, + "step": 19910 + }, + { + "epoch": 0.31, + "learning_rate": 3.911292758877493e-05, + "loss": 1.7244, + "step": 19920 + }, + { + "epoch": 0.31, + "learning_rate": 3.910286766203915e-05, + "loss": 1.3913, + "step": 19930 + }, + { + "epoch": 0.31, + "learning_rate": 3.9092804384716405e-05, + "loss": 1.2789, + "step": 19940 + }, + { + "epoch": 0.31, + "learning_rate": 3.908273775919756e-05, + "loss": 1.267, + "step": 19950 + }, + { + "epoch": 0.31, + "learning_rate": 3.907266778787425e-05, + "loss": 1.305, + "step": 19960 + }, + { + "epoch": 0.31, + "learning_rate": 3.9062594473138914e-05, + "loss": 1.6949, + "step": 19970 + }, + { + "epoch": 0.31, + "learning_rate": 3.9052517817384814e-05, + "loss": 1.726, + "step": 19980 + }, + { + "epoch": 0.31, + "learning_rate": 3.904243782300596e-05, + "loss": 1.6375, + "step": 19990 + }, + { + "epoch": 0.31, + "learning_rate": 3.903235449239719e-05, + "loss": 1.7818, + "step": 20000 + }, + { + "epoch": 0.31, + "eval_loss": 1.6719199419021606, + "eval_runtime": 82.1614, + "eval_samples_per_second": 36.513, + "eval_steps_per_second": 4.564, + "step": 20000 + }, + { + "epoch": 0.31, + "learning_rate": 3.902226782795411e-05, + "loss": 1.7389, + "step": 20010 + }, + { + "epoch": 0.31, + "learning_rate": 3.9012177832073135e-05, + "loss": 1.7549, + "step": 20020 + }, + { + "epoch": 0.31, + "learning_rate": 3.900208450715147e-05, + "loss": 2.4886, + "step": 20030 + }, + { + "epoch": 0.31, + "learning_rate": 3.8991987855587106e-05, + "loss": 1.7799, + "step": 20040 + }, + { + "epoch": 0.31, + "learning_rate": 3.8981887879778826e-05, + "loss": 1.8575, + "step": 20050 + }, + { + "epoch": 0.31, + "learning_rate": 3.897178458212618e-05, + "loss": 1.8324, + "step": 20060 + }, + { + "epoch": 0.31, + "learning_rate": 3.8961677965029564e-05, + "loss": 1.8776, + "step": 20070 + }, + { + "epoch": 0.31, + "learning_rate": 3.8951568030890106e-05, + "loss": 1.8885, + "step": 20080 + }, + { + "epoch": 0.31, + "learning_rate": 3.894145478210975e-05, + "loss": 1.6727, + "step": 20090 + }, + { + "epoch": 0.31, + "learning_rate": 3.893133822109122e-05, + "loss": 1.5491, + "step": 20100 + }, + { + "epoch": 0.31, + "learning_rate": 3.892121835023803e-05, + "loss": 1.6234, + "step": 20110 + }, + { + "epoch": 0.31, + "learning_rate": 3.891109517195448e-05, + "loss": 2.0649, + "step": 20120 + }, + { + "epoch": 0.31, + "learning_rate": 3.890096868864566e-05, + "loss": 1.7587, + "step": 20130 + }, + { + "epoch": 0.31, + "learning_rate": 3.8890838902717416e-05, + "loss": 1.7782, + "step": 20140 + }, + { + "epoch": 0.31, + "learning_rate": 3.888070581657644e-05, + "loss": 1.7231, + "step": 20150 + }, + { + "epoch": 0.31, + "learning_rate": 3.887056943263013e-05, + "loss": 1.661, + "step": 20160 + }, + { + "epoch": 0.31, + "learning_rate": 3.886042975328674e-05, + "loss": 1.6638, + "step": 20170 + }, + { + "epoch": 0.31, + "learning_rate": 3.885028678095526e-05, + "loss": 1.578, + "step": 20180 + }, + { + "epoch": 0.31, + "learning_rate": 3.8840140518045476e-05, + "loss": 1.7251, + "step": 20190 + }, + { + "epoch": 0.31, + "learning_rate": 3.882999096696795e-05, + "loss": 2.0096, + "step": 20200 + }, + { + "epoch": 0.31, + "learning_rate": 3.881983813013407e-05, + "loss": 1.6559, + "step": 20210 + }, + { + "epoch": 0.31, + "learning_rate": 3.880968200995592e-05, + "loss": 1.4021, + "step": 20220 + }, + { + "epoch": 0.31, + "learning_rate": 3.879952260884644e-05, + "loss": 1.3743, + "step": 20230 + }, + { + "epoch": 0.31, + "learning_rate": 3.878935992921931e-05, + "loss": 1.9068, + "step": 20240 + }, + { + "epoch": 0.31, + "learning_rate": 3.8779193973488994e-05, + "loss": 1.9849, + "step": 20250 + }, + { + "epoch": 0.31, + "learning_rate": 3.8769024744070746e-05, + "loss": 1.7681, + "step": 20260 + }, + { + "epoch": 0.31, + "learning_rate": 3.875885224338061e-05, + "loss": 2.0144, + "step": 20270 + }, + { + "epoch": 0.31, + "learning_rate": 3.874867647383535e-05, + "loss": 1.7185, + "step": 20280 + }, + { + "epoch": 0.31, + "learning_rate": 3.8738497437852574e-05, + "loss": 1.5899, + "step": 20290 + }, + { + "epoch": 0.31, + "learning_rate": 3.872831513785062e-05, + "loss": 1.5627, + "step": 20300 + }, + { + "epoch": 0.32, + "learning_rate": 3.871812957624864e-05, + "loss": 1.5517, + "step": 20310 + }, + { + "epoch": 0.32, + "learning_rate": 3.870794075546652e-05, + "loss": 1.543, + "step": 20320 + }, + { + "epoch": 0.32, + "learning_rate": 3.869774867792495e-05, + "loss": 1.5452, + "step": 20330 + }, + { + "epoch": 0.32, + "learning_rate": 3.8687553346045394e-05, + "loss": 1.5281, + "step": 20340 + }, + { + "epoch": 0.32, + "learning_rate": 3.867735476225005e-05, + "loss": 1.5573, + "step": 20350 + }, + { + "epoch": 0.32, + "learning_rate": 3.8667152928961935e-05, + "loss": 1.5516, + "step": 20360 + }, + { + "epoch": 0.32, + "learning_rate": 3.865694784860483e-05, + "loss": 1.7044, + "step": 20370 + }, + { + "epoch": 0.32, + "learning_rate": 3.864673952360326e-05, + "loss": 1.899, + "step": 20380 + }, + { + "epoch": 0.32, + "learning_rate": 3.8636527956382545e-05, + "loss": 1.9532, + "step": 20390 + }, + { + "epoch": 0.32, + "learning_rate": 3.8626313149368774e-05, + "loss": 1.7651, + "step": 20400 + }, + { + "epoch": 0.32, + "learning_rate": 3.8616095104988796e-05, + "loss": 1.7377, + "step": 20410 + }, + { + "epoch": 0.32, + "learning_rate": 3.8605873825670236e-05, + "loss": 1.849, + "step": 20420 + }, + { + "epoch": 0.32, + "learning_rate": 3.859564931384149e-05, + "loss": 2.0254, + "step": 20430 + }, + { + "epoch": 0.32, + "learning_rate": 3.8585421571931705e-05, + "loss": 1.7248, + "step": 20440 + }, + { + "epoch": 0.32, + "learning_rate": 3.857519060237082e-05, + "loss": 1.7746, + "step": 20450 + }, + { + "epoch": 0.32, + "learning_rate": 3.856495640758952e-05, + "loss": 1.7915, + "step": 20460 + }, + { + "epoch": 0.32, + "learning_rate": 3.855471899001927e-05, + "loss": 1.8315, + "step": 20470 + }, + { + "epoch": 0.32, + "learning_rate": 3.85444783520923e-05, + "loss": 1.6788, + "step": 20480 + }, + { + "epoch": 0.32, + "learning_rate": 3.853423449624159e-05, + "loss": 1.8222, + "step": 20490 + }, + { + "epoch": 0.32, + "learning_rate": 3.852398742490091e-05, + "loss": 1.9023, + "step": 20500 + }, + { + "epoch": 0.32, + "learning_rate": 3.8513737140504766e-05, + "loss": 1.7445, + "step": 20510 + }, + { + "epoch": 0.32, + "learning_rate": 3.850348364548844e-05, + "loss": 1.5802, + "step": 20520 + }, + { + "epoch": 0.32, + "learning_rate": 3.8493226942287986e-05, + "loss": 1.9335, + "step": 20530 + }, + { + "epoch": 0.32, + "learning_rate": 3.848296703334022e-05, + "loss": 1.8343, + "step": 20540 + }, + { + "epoch": 0.32, + "learning_rate": 3.8472703921082685e-05, + "loss": 1.8327, + "step": 20550 + }, + { + "epoch": 0.32, + "learning_rate": 3.846243760795373e-05, + "loss": 1.8993, + "step": 20560 + }, + { + "epoch": 0.32, + "learning_rate": 3.845216809639245e-05, + "loss": 2.1533, + "step": 20570 + }, + { + "epoch": 0.32, + "learning_rate": 3.8441895388838675e-05, + "loss": 2.0013, + "step": 20580 + }, + { + "epoch": 0.32, + "learning_rate": 3.843161948773304e-05, + "loss": 2.0934, + "step": 20590 + }, + { + "epoch": 0.32, + "learning_rate": 3.84213403955169e-05, + "loss": 2.1578, + "step": 20600 + }, + { + "epoch": 0.32, + "learning_rate": 3.841105811463238e-05, + "loss": 2.1593, + "step": 20610 + }, + { + "epoch": 0.32, + "learning_rate": 3.840077264752236e-05, + "loss": 1.7487, + "step": 20620 + }, + { + "epoch": 0.32, + "learning_rate": 3.83904839966305e-05, + "loss": 1.8957, + "step": 20630 + }, + { + "epoch": 0.32, + "learning_rate": 3.838019216440119e-05, + "loss": 2.0142, + "step": 20640 + }, + { + "epoch": 0.32, + "learning_rate": 3.836989715327957e-05, + "loss": 1.4616, + "step": 20650 + }, + { + "epoch": 0.32, + "learning_rate": 3.8359598965711566e-05, + "loss": 1.6836, + "step": 20660 + }, + { + "epoch": 0.32, + "learning_rate": 3.8349297604143824e-05, + "loss": 1.7139, + "step": 20670 + }, + { + "epoch": 0.32, + "learning_rate": 3.833899307102378e-05, + "loss": 1.6656, + "step": 20680 + }, + { + "epoch": 0.32, + "learning_rate": 3.83286853687996e-05, + "loss": 1.6214, + "step": 20690 + }, + { + "epoch": 0.32, + "learning_rate": 3.831837449992021e-05, + "loss": 1.6208, + "step": 20700 + }, + { + "epoch": 0.32, + "learning_rate": 3.830806046683528e-05, + "loss": 1.6895, + "step": 20710 + }, + { + "epoch": 0.32, + "learning_rate": 3.829774327199524e-05, + "loss": 1.6621, + "step": 20720 + }, + { + "epoch": 0.32, + "learning_rate": 3.828742291785127e-05, + "loss": 1.7969, + "step": 20730 + }, + { + "epoch": 0.32, + "learning_rate": 3.8277099406855305e-05, + "loss": 2.0026, + "step": 20740 + }, + { + "epoch": 0.32, + "learning_rate": 3.8266772741460015e-05, + "loss": 1.8381, + "step": 20750 + }, + { + "epoch": 0.32, + "learning_rate": 3.825644292411884e-05, + "loss": 1.7816, + "step": 20760 + }, + { + "epoch": 0.32, + "learning_rate": 3.8246109957285945e-05, + "loss": 1.7813, + "step": 20770 + }, + { + "epoch": 0.32, + "learning_rate": 3.8235773843416276e-05, + "loss": 1.8329, + "step": 20780 + }, + { + "epoch": 0.32, + "learning_rate": 3.822543458496548e-05, + "loss": 1.7267, + "step": 20790 + }, + { + "epoch": 0.32, + "learning_rate": 3.821509218439001e-05, + "loss": 1.8711, + "step": 20800 + }, + { + "epoch": 0.32, + "learning_rate": 3.820474664414701e-05, + "loss": 1.7582, + "step": 20810 + }, + { + "epoch": 0.32, + "learning_rate": 3.81943979666944e-05, + "loss": 1.6506, + "step": 20820 + }, + { + "epoch": 0.32, + "learning_rate": 3.8184046154490846e-05, + "loss": 1.8114, + "step": 20830 + }, + { + "epoch": 0.32, + "learning_rate": 3.817369120999574e-05, + "loss": 1.7804, + "step": 20840 + }, + { + "epoch": 0.32, + "learning_rate": 3.8163333135669245e-05, + "loss": 1.7462, + "step": 20850 + }, + { + "epoch": 0.32, + "learning_rate": 3.815297193397224e-05, + "loss": 1.7284, + "step": 20860 + }, + { + "epoch": 0.32, + "learning_rate": 3.814260760736636e-05, + "loss": 1.7066, + "step": 20870 + }, + { + "epoch": 0.32, + "learning_rate": 3.813224015831399e-05, + "loss": 1.7458, + "step": 20880 + }, + { + "epoch": 0.32, + "learning_rate": 3.812186958927824e-05, + "loss": 1.7124, + "step": 20890 + }, + { + "epoch": 0.32, + "learning_rate": 3.811149590272298e-05, + "loss": 1.8492, + "step": 20900 + }, + { + "epoch": 0.32, + "learning_rate": 3.810111910111281e-05, + "loss": 1.7383, + "step": 20910 + }, + { + "epoch": 0.32, + "learning_rate": 3.809073918691306e-05, + "loss": 1.7421, + "step": 20920 + }, + { + "epoch": 0.32, + "learning_rate": 3.808035616258982e-05, + "loss": 1.7694, + "step": 20930 + }, + { + "epoch": 0.32, + "learning_rate": 3.8069970030609905e-05, + "loss": 1.775, + "step": 20940 + }, + { + "epoch": 0.33, + "learning_rate": 3.805958079344088e-05, + "loss": 1.6878, + "step": 20950 + }, + { + "epoch": 0.33, + "learning_rate": 3.804918845355102e-05, + "loss": 1.5963, + "step": 20960 + }, + { + "epoch": 0.33, + "learning_rate": 3.803879301340938e-05, + "loss": 1.903, + "step": 20970 + }, + { + "epoch": 0.33, + "learning_rate": 3.802839447548572e-05, + "loss": 1.7145, + "step": 20980 + }, + { + "epoch": 0.33, + "learning_rate": 3.8017992842250544e-05, + "loss": 1.8377, + "step": 20990 + }, + { + "epoch": 0.33, + "learning_rate": 3.8007588116175094e-05, + "loss": 1.8695, + "step": 21000 + }, + { + "epoch": 0.33, + "learning_rate": 3.799718029973135e-05, + "loss": 1.7653, + "step": 21010 + }, + { + "epoch": 0.33, + "learning_rate": 3.798676939539201e-05, + "loss": 1.801, + "step": 21020 + }, + { + "epoch": 0.33, + "learning_rate": 3.7976355405630534e-05, + "loss": 1.6953, + "step": 21030 + }, + { + "epoch": 0.33, + "learning_rate": 3.796593833292108e-05, + "loss": 1.8006, + "step": 21040 + }, + { + "epoch": 0.33, + "learning_rate": 3.7955518179738564e-05, + "loss": 1.7951, + "step": 21050 + }, + { + "epoch": 0.33, + "learning_rate": 3.7945094948558635e-05, + "loss": 1.7566, + "step": 21060 + }, + { + "epoch": 0.33, + "learning_rate": 3.7934668641857654e-05, + "loss": 1.7827, + "step": 21070 + }, + { + "epoch": 0.33, + "learning_rate": 3.792423926211272e-05, + "loss": 1.8567, + "step": 21080 + }, + { + "epoch": 0.33, + "learning_rate": 3.7913806811801675e-05, + "loss": 1.7523, + "step": 21090 + }, + { + "epoch": 0.33, + "learning_rate": 3.790337129340308e-05, + "loss": 1.8241, + "step": 21100 + }, + { + "epoch": 0.33, + "learning_rate": 3.7892932709396224e-05, + "loss": 1.8981, + "step": 21110 + }, + { + "epoch": 0.33, + "learning_rate": 3.788249106226111e-05, + "loss": 1.8243, + "step": 21120 + }, + { + "epoch": 0.33, + "learning_rate": 3.787204635447853e-05, + "loss": 1.7446, + "step": 21130 + }, + { + "epoch": 0.33, + "learning_rate": 3.7861598588529915e-05, + "loss": 1.9259, + "step": 21140 + }, + { + "epoch": 0.33, + "learning_rate": 3.7851147766897474e-05, + "loss": 1.8173, + "step": 21150 + }, + { + "epoch": 0.33, + "learning_rate": 3.784069389206414e-05, + "loss": 1.7805, + "step": 21160 + }, + { + "epoch": 0.33, + "learning_rate": 3.7830236966513566e-05, + "loss": 1.7785, + "step": 21170 + }, + { + "epoch": 0.33, + "learning_rate": 3.781977699273013e-05, + "loss": 1.7508, + "step": 21180 + }, + { + "epoch": 0.33, + "learning_rate": 3.780931397319893e-05, + "loss": 1.7411, + "step": 21190 + }, + { + "epoch": 0.33, + "learning_rate": 3.77988479104058e-05, + "loss": 1.7592, + "step": 21200 + }, + { + "epoch": 0.33, + "learning_rate": 3.778837880683727e-05, + "loss": 1.5076, + "step": 21210 + }, + { + "epoch": 0.33, + "learning_rate": 3.777790666498061e-05, + "loss": 1.5261, + "step": 21220 + }, + { + "epoch": 0.33, + "learning_rate": 3.776743148732384e-05, + "loss": 1.8198, + "step": 21230 + }, + { + "epoch": 0.33, + "learning_rate": 3.775695327635564e-05, + "loss": 1.6624, + "step": 21240 + }, + { + "epoch": 0.33, + "learning_rate": 3.774647203456547e-05, + "loss": 1.8671, + "step": 21250 + }, + { + "epoch": 0.33, + "learning_rate": 3.773598776444347e-05, + "loss": 1.8776, + "step": 21260 + }, + { + "epoch": 0.33, + "learning_rate": 3.772550046848051e-05, + "loss": 1.9879, + "step": 21270 + }, + { + "epoch": 0.33, + "learning_rate": 3.771501014916818e-05, + "loss": 1.7667, + "step": 21280 + }, + { + "epoch": 0.33, + "learning_rate": 3.770451680899881e-05, + "loss": 1.786, + "step": 21290 + }, + { + "epoch": 0.33, + "learning_rate": 3.769402045046542e-05, + "loss": 1.6326, + "step": 21300 + }, + { + "epoch": 0.33, + "learning_rate": 3.7683521076061746e-05, + "loss": 2.1336, + "step": 21310 + }, + { + "epoch": 0.33, + "learning_rate": 3.7673018688282255e-05, + "loss": 1.8962, + "step": 21320 + }, + { + "epoch": 0.33, + "learning_rate": 3.7662513289622136e-05, + "loss": 1.9161, + "step": 21330 + }, + { + "epoch": 0.33, + "learning_rate": 3.7652004882577264e-05, + "loss": 1.837, + "step": 21340 + }, + { + "epoch": 0.33, + "learning_rate": 3.764149346964426e-05, + "loss": 2.0038, + "step": 21350 + }, + { + "epoch": 0.33, + "learning_rate": 3.7630979053320435e-05, + "loss": 1.6784, + "step": 21360 + }, + { + "epoch": 0.33, + "learning_rate": 3.762046163610384e-05, + "loss": 1.6955, + "step": 21370 + }, + { + "epoch": 0.33, + "learning_rate": 3.7609941220493206e-05, + "loss": 1.9777, + "step": 21380 + }, + { + "epoch": 0.33, + "learning_rate": 3.759941780898801e-05, + "loss": 2.3311, + "step": 21390 + }, + { + "epoch": 0.33, + "learning_rate": 3.7588891404088415e-05, + "loss": 1.7348, + "step": 21400 + }, + { + "epoch": 0.33, + "learning_rate": 3.757836200829531e-05, + "loss": 1.7724, + "step": 21410 + }, + { + "epoch": 0.33, + "learning_rate": 3.756782962411028e-05, + "loss": 1.8227, + "step": 21420 + }, + { + "epoch": 0.33, + "learning_rate": 3.755729425403565e-05, + "loss": 1.7858, + "step": 21430 + }, + { + "epoch": 0.33, + "learning_rate": 3.754675590057441e-05, + "loss": 1.7256, + "step": 21440 + }, + { + "epoch": 0.33, + "learning_rate": 3.753621456623029e-05, + "loss": 1.8612, + "step": 21450 + }, + { + "epoch": 0.33, + "learning_rate": 3.7525670253507735e-05, + "loss": 2.4958, + "step": 21460 + }, + { + "epoch": 0.33, + "learning_rate": 3.751512296491186e-05, + "loss": 1.9053, + "step": 21470 + }, + { + "epoch": 0.33, + "learning_rate": 3.750457270294854e-05, + "loss": 1.7845, + "step": 21480 + }, + { + "epoch": 0.33, + "learning_rate": 3.7494019470124295e-05, + "loss": 1.7628, + "step": 21490 + }, + { + "epoch": 0.33, + "learning_rate": 3.74834632689464e-05, + "loss": 1.725, + "step": 21500 + }, + { + "epoch": 0.33, + "learning_rate": 3.747290410192282e-05, + "loss": 1.7116, + "step": 21510 + }, + { + "epoch": 0.33, + "learning_rate": 3.7462341971562214e-05, + "loss": 1.7632, + "step": 21520 + }, + { + "epoch": 0.33, + "learning_rate": 3.745177688037396e-05, + "loss": 1.993, + "step": 21530 + }, + { + "epoch": 0.33, + "learning_rate": 3.744120883086813e-05, + "loss": 1.6328, + "step": 21540 + }, + { + "epoch": 0.33, + "learning_rate": 3.74306378255555e-05, + "loss": 1.5776, + "step": 21550 + }, + { + "epoch": 0.33, + "learning_rate": 3.742006386694756e-05, + "loss": 1.7845, + "step": 21560 + }, + { + "epoch": 0.33, + "learning_rate": 3.7409486957556486e-05, + "loss": 1.889, + "step": 21570 + }, + { + "epoch": 0.33, + "learning_rate": 3.7398907099895154e-05, + "loss": 1.7302, + "step": 21580 + }, + { + "epoch": 0.33, + "learning_rate": 3.738832429647716e-05, + "loss": 1.8648, + "step": 21590 + }, + { + "epoch": 0.34, + "learning_rate": 3.7377738549816774e-05, + "loss": 1.7418, + "step": 21600 + }, + { + "epoch": 0.34, + "learning_rate": 3.736714986242899e-05, + "loss": 1.6086, + "step": 21610 + }, + { + "epoch": 0.34, + "learning_rate": 3.7356558236829476e-05, + "loss": 1.7707, + "step": 21620 + }, + { + "epoch": 0.34, + "learning_rate": 3.7345963675534623e-05, + "loss": 1.8387, + "step": 21630 + }, + { + "epoch": 0.34, + "learning_rate": 3.733536618106151e-05, + "loss": 1.7437, + "step": 21640 + }, + { + "epoch": 0.34, + "learning_rate": 3.7324765755927896e-05, + "loss": 1.7369, + "step": 21650 + }, + { + "epoch": 0.34, + "learning_rate": 3.731416240265226e-05, + "loss": 1.7419, + "step": 21660 + }, + { + "epoch": 0.34, + "learning_rate": 3.730355612375377e-05, + "loss": 1.717, + "step": 21670 + }, + { + "epoch": 0.34, + "learning_rate": 3.729294692175228e-05, + "loss": 1.808, + "step": 21680 + }, + { + "epoch": 0.34, + "learning_rate": 3.728233479916835e-05, + "loss": 1.8349, + "step": 21690 + }, + { + "epoch": 0.34, + "learning_rate": 3.7271719758523225e-05, + "loss": 1.6731, + "step": 21700 + }, + { + "epoch": 0.34, + "learning_rate": 3.726110180233885e-05, + "loss": 1.7641, + "step": 21710 + }, + { + "epoch": 0.34, + "learning_rate": 3.725048093313787e-05, + "loss": 1.8213, + "step": 21720 + }, + { + "epoch": 0.34, + "learning_rate": 3.723985715344359e-05, + "loss": 1.8582, + "step": 21730 + }, + { + "epoch": 0.34, + "learning_rate": 3.722923046578004e-05, + "loss": 1.8905, + "step": 21740 + }, + { + "epoch": 0.34, + "learning_rate": 3.7218600872671926e-05, + "loss": 1.7755, + "step": 21750 + }, + { + "epoch": 0.34, + "learning_rate": 3.720796837664466e-05, + "loss": 1.749, + "step": 21760 + }, + { + "epoch": 0.34, + "learning_rate": 3.719733298022431e-05, + "loss": 1.7706, + "step": 21770 + }, + { + "epoch": 0.34, + "learning_rate": 3.718669468593767e-05, + "loss": 1.8133, + "step": 21780 + }, + { + "epoch": 0.34, + "learning_rate": 3.71760534963122e-05, + "loss": 1.7395, + "step": 21790 + }, + { + "epoch": 0.34, + "learning_rate": 3.716540941387606e-05, + "loss": 1.8009, + "step": 21800 + }, + { + "epoch": 0.34, + "learning_rate": 3.71547624411581e-05, + "loss": 1.9699, + "step": 21810 + }, + { + "epoch": 0.34, + "learning_rate": 3.714411258068783e-05, + "loss": 1.9111, + "step": 21820 + }, + { + "epoch": 0.34, + "learning_rate": 3.7133459834995465e-05, + "loss": 1.763, + "step": 21830 + }, + { + "epoch": 0.34, + "learning_rate": 3.712280420661192e-05, + "loss": 1.6418, + "step": 21840 + }, + { + "epoch": 0.34, + "learning_rate": 3.7112145698068776e-05, + "loss": 1.6618, + "step": 21850 + }, + { + "epoch": 0.34, + "learning_rate": 3.71014843118983e-05, + "loss": 1.5545, + "step": 21860 + }, + { + "epoch": 0.34, + "learning_rate": 3.7090820050633436e-05, + "loss": 1.5446, + "step": 21870 + }, + { + "epoch": 0.34, + "learning_rate": 3.708015291680783e-05, + "loss": 1.6402, + "step": 21880 + }, + { + "epoch": 0.34, + "learning_rate": 3.706948291295581e-05, + "loss": 1.5775, + "step": 21890 + }, + { + "epoch": 0.34, + "learning_rate": 3.7058810041612356e-05, + "loss": 1.8741, + "step": 21900 + }, + { + "epoch": 0.34, + "learning_rate": 3.704813430531316e-05, + "loss": 1.8697, + "step": 21910 + }, + { + "epoch": 0.34, + "learning_rate": 3.7037455706594596e-05, + "loss": 1.8899, + "step": 21920 + }, + { + "epoch": 0.34, + "learning_rate": 3.702677424799368e-05, + "loss": 1.6944, + "step": 21930 + }, + { + "epoch": 0.34, + "learning_rate": 3.701608993204815e-05, + "loss": 1.7497, + "step": 21940 + }, + { + "epoch": 0.34, + "learning_rate": 3.70054027612964e-05, + "loss": 1.7852, + "step": 21950 + }, + { + "epoch": 0.34, + "learning_rate": 3.699471273827753e-05, + "loss": 1.734, + "step": 21960 + }, + { + "epoch": 0.34, + "learning_rate": 3.698401986553127e-05, + "loss": 1.6974, + "step": 21970 + }, + { + "epoch": 0.34, + "learning_rate": 3.6973324145598065e-05, + "loss": 1.7039, + "step": 21980 + }, + { + "epoch": 0.34, + "learning_rate": 3.6962625581019024e-05, + "loss": 1.7583, + "step": 21990 + }, + { + "epoch": 0.34, + "learning_rate": 3.695192417433593e-05, + "loss": 1.8593, + "step": 22000 + }, + { + "epoch": 0.34, + "learning_rate": 3.694121992809125e-05, + "loss": 1.9412, + "step": 22010 + }, + { + "epoch": 0.34, + "learning_rate": 3.693051284482812e-05, + "loss": 1.9218, + "step": 22020 + }, + { + "epoch": 0.34, + "learning_rate": 3.6919802927090345e-05, + "loss": 1.8648, + "step": 22030 + }, + { + "epoch": 0.34, + "learning_rate": 3.690909017742241e-05, + "loss": 1.5874, + "step": 22040 + }, + { + "epoch": 0.34, + "learning_rate": 3.689837459836948e-05, + "loss": 1.7104, + "step": 22050 + }, + { + "epoch": 0.34, + "learning_rate": 3.688765619247736e-05, + "loss": 1.8052, + "step": 22060 + }, + { + "epoch": 0.34, + "learning_rate": 3.6876934962292575e-05, + "loss": 1.8381, + "step": 22070 + }, + { + "epoch": 0.34, + "learning_rate": 3.6866210910362284e-05, + "loss": 1.8112, + "step": 22080 + }, + { + "epoch": 0.34, + "learning_rate": 3.6855484039234325e-05, + "loss": 1.7958, + "step": 22090 + }, + { + "epoch": 0.34, + "learning_rate": 3.684475435145722e-05, + "loss": 1.9857, + "step": 22100 + }, + { + "epoch": 0.34, + "learning_rate": 3.6834021849580135e-05, + "loss": 1.7658, + "step": 22110 + }, + { + "epoch": 0.34, + "learning_rate": 3.682328653615293e-05, + "loss": 1.7657, + "step": 22120 + }, + { + "epoch": 0.34, + "learning_rate": 3.681254841372612e-05, + "loss": 1.5535, + "step": 22130 + }, + { + "epoch": 0.34, + "learning_rate": 3.6801807484850885e-05, + "loss": 1.7207, + "step": 22140 + }, + { + "epoch": 0.34, + "learning_rate": 3.679106375207908e-05, + "loss": 1.7034, + "step": 22150 + }, + { + "epoch": 0.34, + "learning_rate": 3.678031721796323e-05, + "loss": 2.0085, + "step": 22160 + }, + { + "epoch": 0.34, + "learning_rate": 3.67695678850565e-05, + "loss": 1.7749, + "step": 22170 + }, + { + "epoch": 0.34, + "learning_rate": 3.675881575591275e-05, + "loss": 1.8751, + "step": 22180 + }, + { + "epoch": 0.34, + "learning_rate": 3.674806083308648e-05, + "loss": 1.8139, + "step": 22190 + }, + { + "epoch": 0.34, + "learning_rate": 3.673730311913288e-05, + "loss": 1.5534, + "step": 22200 + }, + { + "epoch": 0.34, + "learning_rate": 3.672654261660779e-05, + "loss": 1.6991, + "step": 22210 + }, + { + "epoch": 0.34, + "learning_rate": 3.671577932806769e-05, + "loss": 2.0929, + "step": 22220 + }, + { + "epoch": 0.34, + "learning_rate": 3.6705013256069773e-05, + "loss": 1.6748, + "step": 22230 + }, + { + "epoch": 0.35, + "learning_rate": 3.669424440317183e-05, + "loss": 1.7667, + "step": 22240 + }, + { + "epoch": 0.35, + "learning_rate": 3.668347277193238e-05, + "loss": 1.8185, + "step": 22250 + }, + { + "epoch": 0.35, + "learning_rate": 3.667269836491054e-05, + "loss": 1.7565, + "step": 22260 + }, + { + "epoch": 0.35, + "learning_rate": 3.666192118466614e-05, + "loss": 1.7857, + "step": 22270 + }, + { + "epoch": 0.35, + "learning_rate": 3.665114123375961e-05, + "loss": 1.6989, + "step": 22280 + }, + { + "epoch": 0.35, + "learning_rate": 3.664035851475211e-05, + "loss": 1.7107, + "step": 22290 + }, + { + "epoch": 0.35, + "learning_rate": 3.66295730302054e-05, + "loss": 2.0024, + "step": 22300 + }, + { + "epoch": 0.35, + "learning_rate": 3.6618784782681915e-05, + "loss": 2.0356, + "step": 22310 + }, + { + "epoch": 0.35, + "learning_rate": 3.660799377474475e-05, + "loss": 1.9373, + "step": 22320 + }, + { + "epoch": 0.35, + "learning_rate": 3.6597200008957654e-05, + "loss": 1.7353, + "step": 22330 + }, + { + "epoch": 0.35, + "learning_rate": 3.6586403487885036e-05, + "loss": 1.7009, + "step": 22340 + }, + { + "epoch": 0.35, + "learning_rate": 3.6575604214091955e-05, + "loss": 1.7529, + "step": 22350 + }, + { + "epoch": 0.35, + "learning_rate": 3.656480219014411e-05, + "loss": 1.6625, + "step": 22360 + }, + { + "epoch": 0.35, + "learning_rate": 3.655399741860788e-05, + "loss": 1.6758, + "step": 22370 + }, + { + "epoch": 0.35, + "learning_rate": 3.654318990205029e-05, + "loss": 1.8066, + "step": 22380 + }, + { + "epoch": 0.35, + "learning_rate": 3.6532379643038986e-05, + "loss": 1.6777, + "step": 22390 + }, + { + "epoch": 0.35, + "learning_rate": 3.652156664414231e-05, + "loss": 1.7312, + "step": 22400 + }, + { + "epoch": 0.35, + "learning_rate": 3.651075090792924e-05, + "loss": 2.0819, + "step": 22410 + }, + { + "epoch": 0.35, + "learning_rate": 3.649993243696938e-05, + "loss": 2.0563, + "step": 22420 + }, + { + "epoch": 0.35, + "learning_rate": 3.6489111233833026e-05, + "loss": 1.9119, + "step": 22430 + }, + { + "epoch": 0.35, + "learning_rate": 3.6478287301091085e-05, + "loss": 1.9691, + "step": 22440 + }, + { + "epoch": 0.35, + "learning_rate": 3.646746064131513e-05, + "loss": 1.925, + "step": 22450 + }, + { + "epoch": 0.35, + "learning_rate": 3.6456631257077383e-05, + "loss": 1.7243, + "step": 22460 + }, + { + "epoch": 0.35, + "learning_rate": 3.644579915095071e-05, + "loss": 1.736, + "step": 22470 + }, + { + "epoch": 0.35, + "learning_rate": 3.643496432550864e-05, + "loss": 1.8087, + "step": 22480 + }, + { + "epoch": 0.35, + "learning_rate": 3.6424126783325296e-05, + "loss": 1.9584, + "step": 22490 + }, + { + "epoch": 0.35, + "learning_rate": 3.641328652697551e-05, + "loss": 1.9199, + "step": 22500 + }, + { + "epoch": 0.35, + "learning_rate": 3.6402443559034725e-05, + "loss": 1.9966, + "step": 22510 + }, + { + "epoch": 0.35, + "learning_rate": 3.6391597882079035e-05, + "loss": 1.8249, + "step": 22520 + }, + { + "epoch": 0.35, + "learning_rate": 3.6380749498685175e-05, + "loss": 1.8854, + "step": 22530 + }, + { + "epoch": 0.35, + "learning_rate": 3.6369898411430526e-05, + "loss": 1.9154, + "step": 22540 + }, + { + "epoch": 0.35, + "learning_rate": 3.6359044622893104e-05, + "loss": 1.7194, + "step": 22550 + }, + { + "epoch": 0.35, + "learning_rate": 3.634818813565159e-05, + "loss": 1.6552, + "step": 22560 + }, + { + "epoch": 0.35, + "learning_rate": 3.633732895228527e-05, + "loss": 1.7559, + "step": 22570 + }, + { + "epoch": 0.35, + "learning_rate": 3.632646707537411e-05, + "loss": 2.0456, + "step": 22580 + }, + { + "epoch": 0.35, + "learning_rate": 3.631560250749868e-05, + "loss": 2.4504, + "step": 22590 + }, + { + "epoch": 0.35, + "learning_rate": 3.630473525124021e-05, + "loss": 2.4843, + "step": 22600 + }, + { + "epoch": 0.35, + "learning_rate": 3.629386530918057e-05, + "loss": 2.4479, + "step": 22610 + }, + { + "epoch": 0.35, + "learning_rate": 3.628299268390225e-05, + "loss": 1.8605, + "step": 22620 + }, + { + "epoch": 0.35, + "learning_rate": 3.62721173779884e-05, + "loss": 1.7632, + "step": 22630 + }, + { + "epoch": 0.35, + "learning_rate": 3.62612393940228e-05, + "loss": 1.7765, + "step": 22640 + }, + { + "epoch": 0.35, + "learning_rate": 3.625035873458985e-05, + "loss": 1.6999, + "step": 22650 + }, + { + "epoch": 0.35, + "learning_rate": 3.6239475402274604e-05, + "loss": 1.7993, + "step": 22660 + }, + { + "epoch": 0.35, + "learning_rate": 3.622858939966275e-05, + "loss": 1.8779, + "step": 22670 + }, + { + "epoch": 0.35, + "learning_rate": 3.62177007293406e-05, + "loss": 1.8765, + "step": 22680 + }, + { + "epoch": 0.35, + "learning_rate": 3.6206809393895106e-05, + "loss": 1.7534, + "step": 22690 + }, + { + "epoch": 0.35, + "learning_rate": 3.6195915395913856e-05, + "loss": 1.8675, + "step": 22700 + }, + { + "epoch": 0.35, + "learning_rate": 3.6185018737985074e-05, + "loss": 1.7767, + "step": 22710 + }, + { + "epoch": 0.35, + "learning_rate": 3.617411942269759e-05, + "loss": 1.6999, + "step": 22720 + }, + { + "epoch": 0.35, + "learning_rate": 3.6163217452640895e-05, + "loss": 1.661, + "step": 22730 + }, + { + "epoch": 0.35, + "learning_rate": 3.61523128304051e-05, + "loss": 1.67, + "step": 22740 + }, + { + "epoch": 0.35, + "learning_rate": 3.614140555858096e-05, + "loss": 1.7231, + "step": 22750 + }, + { + "epoch": 0.35, + "learning_rate": 3.6130495639759827e-05, + "loss": 1.9564, + "step": 22760 + }, + { + "epoch": 0.35, + "learning_rate": 3.611958307653371e-05, + "loss": 1.7446, + "step": 22770 + }, + { + "epoch": 0.35, + "learning_rate": 3.610866787149523e-05, + "loss": 1.8313, + "step": 22780 + }, + { + "epoch": 0.35, + "learning_rate": 3.6097750027237654e-05, + "loss": 1.6877, + "step": 22790 + }, + { + "epoch": 0.35, + "learning_rate": 3.608682954635485e-05, + "loss": 1.6592, + "step": 22800 + }, + { + "epoch": 0.35, + "learning_rate": 3.607590643144135e-05, + "loss": 1.5947, + "step": 22810 + }, + { + "epoch": 0.35, + "learning_rate": 3.606498068509226e-05, + "loss": 1.7021, + "step": 22820 + }, + { + "epoch": 0.35, + "learning_rate": 3.605405230990336e-05, + "loss": 1.4892, + "step": 22830 + }, + { + "epoch": 0.35, + "learning_rate": 3.604312130847104e-05, + "loss": 1.8037, + "step": 22840 + }, + { + "epoch": 0.35, + "learning_rate": 3.603218768339229e-05, + "loss": 1.6658, + "step": 22850 + }, + { + "epoch": 0.35, + "learning_rate": 3.6021251437264746e-05, + "loss": 1.8919, + "step": 22860 + }, + { + "epoch": 0.35, + "learning_rate": 3.6010312572686676e-05, + "loss": 1.5664, + "step": 22870 + }, + { + "epoch": 0.35, + "learning_rate": 3.599937109225695e-05, + "loss": 1.6721, + "step": 22880 + }, + { + "epoch": 0.36, + "learning_rate": 3.5988426998575057e-05, + "loss": 1.7573, + "step": 22890 + }, + { + "epoch": 0.36, + "learning_rate": 3.597748029424113e-05, + "loss": 1.7993, + "step": 22900 + }, + { + "epoch": 0.36, + "learning_rate": 3.59665309818559e-05, + "loss": 1.7229, + "step": 22910 + }, + { + "epoch": 0.36, + "learning_rate": 3.595557906402073e-05, + "loss": 1.7968, + "step": 22920 + }, + { + "epoch": 0.36, + "learning_rate": 3.594462454333759e-05, + "loss": 1.917, + "step": 22930 + }, + { + "epoch": 0.36, + "learning_rate": 3.59336674224091e-05, + "loss": 1.7526, + "step": 22940 + }, + { + "epoch": 0.36, + "learning_rate": 3.592270770383844e-05, + "loss": 1.6933, + "step": 22950 + }, + { + "epoch": 0.36, + "learning_rate": 3.591174539022947e-05, + "loss": 1.7998, + "step": 22960 + }, + { + "epoch": 0.36, + "learning_rate": 3.590078048418663e-05, + "loss": 1.7173, + "step": 22970 + }, + { + "epoch": 0.36, + "learning_rate": 3.5889812988314976e-05, + "loss": 1.7017, + "step": 22980 + }, + { + "epoch": 0.36, + "learning_rate": 3.587884290522019e-05, + "loss": 1.784, + "step": 22990 + }, + { + "epoch": 0.36, + "learning_rate": 3.5867870237508574e-05, + "loss": 1.8082, + "step": 23000 + }, + { + "epoch": 0.36, + "learning_rate": 3.5856894987787023e-05, + "loss": 1.5939, + "step": 23010 + }, + { + "epoch": 0.36, + "learning_rate": 3.584591715866307e-05, + "loss": 1.7126, + "step": 23020 + }, + { + "epoch": 0.36, + "learning_rate": 3.583493675274484e-05, + "loss": 1.7157, + "step": 23030 + }, + { + "epoch": 0.36, + "learning_rate": 3.582395377264109e-05, + "loss": 1.7234, + "step": 23040 + }, + { + "epoch": 0.36, + "learning_rate": 3.5812968220961165e-05, + "loss": 1.6123, + "step": 23050 + }, + { + "epoch": 0.36, + "learning_rate": 3.5801980100315046e-05, + "loss": 1.7661, + "step": 23060 + }, + { + "epoch": 0.36, + "learning_rate": 3.5790989413313304e-05, + "loss": 1.9309, + "step": 23070 + }, + { + "epoch": 0.36, + "learning_rate": 3.577999616256713e-05, + "loss": 1.8323, + "step": 23080 + }, + { + "epoch": 0.36, + "learning_rate": 3.576900035068833e-05, + "loss": 1.8078, + "step": 23090 + }, + { + "epoch": 0.36, + "learning_rate": 3.5758001980289304e-05, + "loss": 1.6526, + "step": 23100 + }, + { + "epoch": 0.36, + "learning_rate": 3.574700105398306e-05, + "loss": 1.7426, + "step": 23110 + }, + { + "epoch": 0.36, + "learning_rate": 3.573599757438322e-05, + "loss": 1.7144, + "step": 23120 + }, + { + "epoch": 0.36, + "learning_rate": 3.572499154410403e-05, + "loss": 1.5783, + "step": 23130 + }, + { + "epoch": 0.36, + "learning_rate": 3.571398296576031e-05, + "loss": 1.7074, + "step": 23140 + }, + { + "epoch": 0.36, + "learning_rate": 3.5702971841967495e-05, + "loss": 1.6907, + "step": 23150 + }, + { + "epoch": 0.36, + "learning_rate": 3.569195817534164e-05, + "loss": 1.6853, + "step": 23160 + }, + { + "epoch": 0.36, + "learning_rate": 3.56809419684994e-05, + "loss": 1.7841, + "step": 23170 + }, + { + "epoch": 0.36, + "learning_rate": 3.566992322405801e-05, + "loss": 1.6522, + "step": 23180 + }, + { + "epoch": 0.36, + "learning_rate": 3.565890194463533e-05, + "loss": 1.6243, + "step": 23190 + }, + { + "epoch": 0.36, + "learning_rate": 3.5647878132849814e-05, + "loss": 1.5946, + "step": 23200 + }, + { + "epoch": 0.36, + "learning_rate": 3.563685179132053e-05, + "loss": 1.5563, + "step": 23210 + }, + { + "epoch": 0.36, + "learning_rate": 3.562582292266713e-05, + "loss": 1.5263, + "step": 23220 + }, + { + "epoch": 0.36, + "learning_rate": 3.5614791529509886e-05, + "loss": 1.5209, + "step": 23230 + }, + { + "epoch": 0.36, + "learning_rate": 3.560375761446964e-05, + "loss": 1.8463, + "step": 23240 + }, + { + "epoch": 0.36, + "learning_rate": 3.5592721180167865e-05, + "loss": 1.7597, + "step": 23250 + }, + { + "epoch": 0.36, + "learning_rate": 3.5581682229226613e-05, + "loss": 1.709, + "step": 23260 + }, + { + "epoch": 0.36, + "learning_rate": 3.557064076426855e-05, + "loss": 1.82, + "step": 23270 + }, + { + "epoch": 0.36, + "learning_rate": 3.555959678791692e-05, + "loss": 1.9364, + "step": 23280 + }, + { + "epoch": 0.36, + "learning_rate": 3.554855030279556e-05, + "loss": 1.9167, + "step": 23290 + }, + { + "epoch": 0.36, + "learning_rate": 3.553750131152894e-05, + "loss": 1.912, + "step": 23300 + }, + { + "epoch": 0.36, + "learning_rate": 3.552644981674209e-05, + "loss": 1.9452, + "step": 23310 + }, + { + "epoch": 0.36, + "learning_rate": 3.5515395821060635e-05, + "loss": 1.9852, + "step": 23320 + }, + { + "epoch": 0.36, + "learning_rate": 3.550433932711083e-05, + "loss": 1.8639, + "step": 23330 + }, + { + "epoch": 0.36, + "learning_rate": 3.549328033751947e-05, + "loss": 2.0317, + "step": 23340 + }, + { + "epoch": 0.36, + "learning_rate": 3.548221885491399e-05, + "loss": 1.7329, + "step": 23350 + }, + { + "epoch": 0.36, + "learning_rate": 3.547115488192239e-05, + "loss": 1.7058, + "step": 23360 + }, + { + "epoch": 0.36, + "learning_rate": 3.546008842117328e-05, + "loss": 1.8143, + "step": 23370 + }, + { + "epoch": 0.36, + "learning_rate": 3.5449019475295846e-05, + "loss": 1.7216, + "step": 23380 + }, + { + "epoch": 0.36, + "learning_rate": 3.5437948046919865e-05, + "loss": 1.6946, + "step": 23390 + }, + { + "epoch": 0.36, + "learning_rate": 3.542687413867572e-05, + "loss": 1.6823, + "step": 23400 + }, + { + "epoch": 0.36, + "learning_rate": 3.541579775319436e-05, + "loss": 1.6412, + "step": 23410 + }, + { + "epoch": 0.36, + "learning_rate": 3.540471889310734e-05, + "loss": 1.7579, + "step": 23420 + }, + { + "epoch": 0.36, + "learning_rate": 3.53936375610468e-05, + "loss": 1.7311, + "step": 23430 + }, + { + "epoch": 0.36, + "learning_rate": 3.538255375964546e-05, + "loss": 1.719, + "step": 23440 + }, + { + "epoch": 0.36, + "learning_rate": 3.537146749153663e-05, + "loss": 1.6885, + "step": 23450 + }, + { + "epoch": 0.36, + "learning_rate": 3.5360378759354216e-05, + "loss": 1.6851, + "step": 23460 + }, + { + "epoch": 0.36, + "learning_rate": 3.534928756573269e-05, + "loss": 1.6414, + "step": 23470 + }, + { + "epoch": 0.36, + "learning_rate": 3.533819391330713e-05, + "loss": 1.7166, + "step": 23480 + }, + { + "epoch": 0.36, + "learning_rate": 3.532709780471317e-05, + "loss": 1.6955, + "step": 23490 + }, + { + "epoch": 0.36, + "learning_rate": 3.5315999242587074e-05, + "loss": 1.7456, + "step": 23500 + }, + { + "epoch": 0.36, + "learning_rate": 3.530489822956563e-05, + "loss": 1.8366, + "step": 23510 + }, + { + "epoch": 0.36, + "learning_rate": 3.5293794768286254e-05, + "loss": 1.7837, + "step": 23520 + }, + { + "epoch": 0.37, + "learning_rate": 3.528268886138693e-05, + "loss": 1.9097, + "step": 23530 + }, + { + "epoch": 0.37, + "learning_rate": 3.527158051150622e-05, + "loss": 1.6959, + "step": 23540 + }, + { + "epoch": 0.37, + "learning_rate": 3.526046972128326e-05, + "loss": 1.759, + "step": 23550 + }, + { + "epoch": 0.37, + "learning_rate": 3.524935649335777e-05, + "loss": 1.7727, + "step": 23560 + }, + { + "epoch": 0.37, + "learning_rate": 3.5238240830370075e-05, + "loss": 1.8515, + "step": 23570 + }, + { + "epoch": 0.37, + "learning_rate": 3.522712273496103e-05, + "loss": 1.8419, + "step": 23580 + }, + { + "epoch": 0.37, + "learning_rate": 3.521600220977211e-05, + "loss": 1.7575, + "step": 23590 + }, + { + "epoch": 0.37, + "learning_rate": 3.520487925744535e-05, + "loss": 1.8707, + "step": 23600 + }, + { + "epoch": 0.37, + "learning_rate": 3.519375388062336e-05, + "loss": 1.7468, + "step": 23610 + }, + { + "epoch": 0.37, + "learning_rate": 3.518262608194932e-05, + "loss": 1.6861, + "step": 23620 + }, + { + "epoch": 0.37, + "learning_rate": 3.517149586406702e-05, + "loss": 1.5096, + "step": 23630 + }, + { + "epoch": 0.37, + "learning_rate": 3.516036322962076e-05, + "loss": 1.7559, + "step": 23640 + }, + { + "epoch": 0.37, + "learning_rate": 3.5149228181255485e-05, + "loss": 1.9187, + "step": 23650 + }, + { + "epoch": 0.37, + "learning_rate": 3.5138090721616676e-05, + "loss": 1.8076, + "step": 23660 + }, + { + "epoch": 0.37, + "learning_rate": 3.5126950853350384e-05, + "loss": 1.8971, + "step": 23670 + }, + { + "epoch": 0.37, + "learning_rate": 3.5115808579103235e-05, + "loss": 1.5526, + "step": 23680 + }, + { + "epoch": 0.37, + "learning_rate": 3.5104663901522456e-05, + "loss": 1.3989, + "step": 23690 + }, + { + "epoch": 0.37, + "learning_rate": 3.50935168232558e-05, + "loss": 1.397, + "step": 23700 + }, + { + "epoch": 0.37, + "learning_rate": 3.5082367346951625e-05, + "loss": 1.8248, + "step": 23710 + }, + { + "epoch": 0.37, + "learning_rate": 3.507121547525883e-05, + "loss": 1.82, + "step": 23720 + }, + { + "epoch": 0.37, + "learning_rate": 3.5060061210826914e-05, + "loss": 1.6933, + "step": 23730 + }, + { + "epoch": 0.37, + "learning_rate": 3.504890455630592e-05, + "loss": 1.5885, + "step": 23740 + }, + { + "epoch": 0.37, + "learning_rate": 3.503774551434647e-05, + "loss": 1.5817, + "step": 23750 + }, + { + "epoch": 0.37, + "learning_rate": 3.5026584087599765e-05, + "loss": 1.7256, + "step": 23760 + }, + { + "epoch": 0.37, + "learning_rate": 3.501542027871753e-05, + "loss": 1.9109, + "step": 23770 + }, + { + "epoch": 0.37, + "learning_rate": 3.5004254090352104e-05, + "loss": 1.768, + "step": 23780 + }, + { + "epoch": 0.37, + "learning_rate": 3.4993085525156376e-05, + "loss": 1.7429, + "step": 23790 + }, + { + "epoch": 0.37, + "learning_rate": 3.4981914585783784e-05, + "loss": 1.7856, + "step": 23800 + }, + { + "epoch": 0.37, + "learning_rate": 3.497074127488834e-05, + "loss": 1.7591, + "step": 23810 + }, + { + "epoch": 0.37, + "learning_rate": 3.4959565595124624e-05, + "loss": 1.8351, + "step": 23820 + }, + { + "epoch": 0.37, + "learning_rate": 3.4948387549147794e-05, + "loss": 1.8248, + "step": 23830 + }, + { + "epoch": 0.37, + "learning_rate": 3.493720713961353e-05, + "loss": 1.767, + "step": 23840 + }, + { + "epoch": 0.37, + "learning_rate": 3.492602436917809e-05, + "loss": 1.7004, + "step": 23850 + }, + { + "epoch": 0.37, + "learning_rate": 3.491483924049833e-05, + "loss": 1.7291, + "step": 23860 + }, + { + "epoch": 0.37, + "learning_rate": 3.49036517562316e-05, + "loss": 1.802, + "step": 23870 + }, + { + "epoch": 0.37, + "learning_rate": 3.489246191903587e-05, + "loss": 1.8571, + "step": 23880 + }, + { + "epoch": 0.37, + "learning_rate": 3.4881269731569634e-05, + "loss": 1.9238, + "step": 23890 + }, + { + "epoch": 0.37, + "learning_rate": 3.487007519649195e-05, + "loss": 1.8023, + "step": 23900 + }, + { + "epoch": 0.37, + "learning_rate": 3.485887831646243e-05, + "loss": 1.7083, + "step": 23910 + }, + { + "epoch": 0.37, + "learning_rate": 3.484767909414128e-05, + "loss": 1.7932, + "step": 23920 + }, + { + "epoch": 0.37, + "learning_rate": 3.48364775321892e-05, + "loss": 1.774, + "step": 23930 + }, + { + "epoch": 0.37, + "learning_rate": 3.4825273633267506e-05, + "loss": 1.6841, + "step": 23940 + }, + { + "epoch": 0.37, + "learning_rate": 3.481406740003803e-05, + "loss": 1.6688, + "step": 23950 + }, + { + "epoch": 0.37, + "learning_rate": 3.4802858835163166e-05, + "loss": 1.6681, + "step": 23960 + }, + { + "epoch": 0.37, + "learning_rate": 3.4791647941305874e-05, + "loss": 1.7979, + "step": 23970 + }, + { + "epoch": 0.37, + "learning_rate": 3.478043472112965e-05, + "loss": 1.7689, + "step": 23980 + }, + { + "epoch": 0.37, + "learning_rate": 3.4769219177298576e-05, + "loss": 1.8383, + "step": 23990 + }, + { + "epoch": 0.37, + "learning_rate": 3.4758001312477235e-05, + "loss": 1.867, + "step": 24000 + }, + { + "epoch": 0.37, + "learning_rate": 3.474678112933081e-05, + "loss": 1.8459, + "step": 24010 + }, + { + "epoch": 0.37, + "learning_rate": 3.4735558630525e-05, + "loss": 1.719, + "step": 24020 + }, + { + "epoch": 0.37, + "learning_rate": 3.4724333818726074e-05, + "loss": 2.2192, + "step": 24030 + }, + { + "epoch": 0.37, + "learning_rate": 3.471310669660085e-05, + "loss": 1.9411, + "step": 24040 + }, + { + "epoch": 0.37, + "learning_rate": 3.4701877266816684e-05, + "loss": 1.7642, + "step": 24050 + }, + { + "epoch": 0.37, + "learning_rate": 3.4690645532041485e-05, + "loss": 1.791, + "step": 24060 + }, + { + "epoch": 0.37, + "learning_rate": 3.467941149494372e-05, + "loss": 1.7735, + "step": 24070 + }, + { + "epoch": 0.37, + "learning_rate": 3.466817515819238e-05, + "loss": 1.7356, + "step": 24080 + }, + { + "epoch": 0.37, + "learning_rate": 3.4656936524457026e-05, + "loss": 1.7402, + "step": 24090 + }, + { + "epoch": 0.37, + "learning_rate": 3.464569559640775e-05, + "loss": 1.819, + "step": 24100 + }, + { + "epoch": 0.37, + "learning_rate": 3.463445237671519e-05, + "loss": 1.7783, + "step": 24110 + }, + { + "epoch": 0.37, + "learning_rate": 3.462320686805054e-05, + "loss": 1.8609, + "step": 24120 + }, + { + "epoch": 0.37, + "learning_rate": 3.461195907308553e-05, + "loss": 1.973, + "step": 24130 + }, + { + "epoch": 0.37, + "learning_rate": 3.460070899449242e-05, + "loss": 1.7322, + "step": 24140 + }, + { + "epoch": 0.37, + "learning_rate": 3.458945663494405e-05, + "loss": 1.7004, + "step": 24150 + }, + { + "epoch": 0.37, + "learning_rate": 3.457820199711375e-05, + "loss": 1.8108, + "step": 24160 + }, + { + "epoch": 0.38, + "learning_rate": 3.4566945083675436e-05, + "loss": 1.7443, + "step": 24170 + }, + { + "epoch": 0.38, + "learning_rate": 3.4555685897303544e-05, + "loss": 1.573, + "step": 24180 + }, + { + "epoch": 0.38, + "learning_rate": 3.454442444067305e-05, + "loss": 1.4901, + "step": 24190 + }, + { + "epoch": 0.38, + "learning_rate": 3.4533160716459485e-05, + "loss": 1.5956, + "step": 24200 + }, + { + "epoch": 0.38, + "learning_rate": 3.452189472733889e-05, + "loss": 2.021, + "step": 24210 + }, + { + "epoch": 0.38, + "learning_rate": 3.451062647598788e-05, + "loss": 1.9465, + "step": 24220 + }, + { + "epoch": 0.38, + "learning_rate": 3.4499355965083564e-05, + "loss": 1.8425, + "step": 24230 + }, + { + "epoch": 0.38, + "learning_rate": 3.448808319730363e-05, + "loss": 1.6531, + "step": 24240 + }, + { + "epoch": 0.38, + "learning_rate": 3.447680817532629e-05, + "loss": 1.952, + "step": 24250 + }, + { + "epoch": 0.38, + "learning_rate": 3.446553090183026e-05, + "loss": 1.7268, + "step": 24260 + }, + { + "epoch": 0.38, + "learning_rate": 3.4454251379494835e-05, + "loss": 1.7894, + "step": 24270 + }, + { + "epoch": 0.38, + "learning_rate": 3.444296961099983e-05, + "loss": 1.7273, + "step": 24280 + }, + { + "epoch": 0.38, + "learning_rate": 3.443168559902557e-05, + "loss": 1.7656, + "step": 24290 + }, + { + "epoch": 0.38, + "learning_rate": 3.4420399346252965e-05, + "loss": 1.7506, + "step": 24300 + }, + { + "epoch": 0.38, + "learning_rate": 3.4409110855363384e-05, + "loss": 1.8547, + "step": 24310 + }, + { + "epoch": 0.38, + "learning_rate": 3.439782012903881e-05, + "loss": 1.9149, + "step": 24320 + }, + { + "epoch": 0.38, + "learning_rate": 3.438652716996169e-05, + "loss": 1.8917, + "step": 24330 + }, + { + "epoch": 0.38, + "learning_rate": 3.4375231980815026e-05, + "loss": 1.702, + "step": 24340 + }, + { + "epoch": 0.38, + "learning_rate": 3.436393456428236e-05, + "loss": 1.7413, + "step": 24350 + }, + { + "epoch": 0.38, + "learning_rate": 3.435263492304776e-05, + "loss": 1.6912, + "step": 24360 + }, + { + "epoch": 0.38, + "learning_rate": 3.43413330597958e-05, + "loss": 1.798, + "step": 24370 + }, + { + "epoch": 0.38, + "learning_rate": 3.433002897721162e-05, + "loss": 1.8972, + "step": 24380 + }, + { + "epoch": 0.38, + "learning_rate": 3.431872267798085e-05, + "loss": 1.9135, + "step": 24390 + }, + { + "epoch": 0.38, + "learning_rate": 3.4307414164789665e-05, + "loss": 1.7388, + "step": 24400 + }, + { + "epoch": 0.38, + "learning_rate": 3.429610344032477e-05, + "loss": 1.7196, + "step": 24410 + }, + { + "epoch": 0.38, + "learning_rate": 3.428479050727339e-05, + "loss": 1.6962, + "step": 24420 + }, + { + "epoch": 0.38, + "learning_rate": 3.4273475368323266e-05, + "loss": 1.7335, + "step": 24430 + }, + { + "epoch": 0.38, + "learning_rate": 3.426215802616267e-05, + "loss": 1.7125, + "step": 24440 + }, + { + "epoch": 0.38, + "learning_rate": 3.4250838483480414e-05, + "loss": 1.6688, + "step": 24450 + }, + { + "epoch": 0.38, + "learning_rate": 3.4239516742965795e-05, + "loss": 1.6918, + "step": 24460 + }, + { + "epoch": 0.38, + "learning_rate": 3.422819280730867e-05, + "loss": 1.751, + "step": 24470 + }, + { + "epoch": 0.38, + "learning_rate": 3.42168666791994e-05, + "loss": 1.7352, + "step": 24480 + }, + { + "epoch": 0.38, + "learning_rate": 3.420553836132886e-05, + "loss": 1.6766, + "step": 24490 + }, + { + "epoch": 0.38, + "learning_rate": 3.419420785638846e-05, + "loss": 1.7238, + "step": 24500 + }, + { + "epoch": 0.38, + "learning_rate": 3.418287516707012e-05, + "loss": 1.705, + "step": 24510 + }, + { + "epoch": 0.38, + "learning_rate": 3.417154029606629e-05, + "loss": 1.7676, + "step": 24520 + }, + { + "epoch": 0.38, + "learning_rate": 3.416020324606993e-05, + "loss": 1.5816, + "step": 24530 + }, + { + "epoch": 0.38, + "learning_rate": 3.414886401977451e-05, + "loss": 1.7767, + "step": 24540 + }, + { + "epoch": 0.38, + "learning_rate": 3.4137522619874035e-05, + "loss": 1.6206, + "step": 24550 + }, + { + "epoch": 0.38, + "learning_rate": 3.412617904906301e-05, + "loss": 1.6197, + "step": 24560 + }, + { + "epoch": 0.38, + "learning_rate": 3.4114833310036465e-05, + "loss": 1.9195, + "step": 24570 + }, + { + "epoch": 0.38, + "learning_rate": 3.4103485405489954e-05, + "loss": 1.8067, + "step": 24580 + }, + { + "epoch": 0.38, + "learning_rate": 3.4092135338119514e-05, + "loss": 1.8727, + "step": 24590 + }, + { + "epoch": 0.38, + "learning_rate": 3.4080783110621725e-05, + "loss": 1.7131, + "step": 24600 + }, + { + "epoch": 0.38, + "learning_rate": 3.406942872569368e-05, + "loss": 1.5751, + "step": 24610 + }, + { + "epoch": 0.38, + "learning_rate": 3.405807218603297e-05, + "loss": 1.6031, + "step": 24620 + }, + { + "epoch": 0.38, + "learning_rate": 3.4046713494337694e-05, + "loss": 1.8843, + "step": 24630 + }, + { + "epoch": 0.38, + "learning_rate": 3.403535265330648e-05, + "loss": 1.8602, + "step": 24640 + }, + { + "epoch": 0.38, + "learning_rate": 3.402398966563848e-05, + "loss": 1.8609, + "step": 24650 + }, + { + "epoch": 0.38, + "learning_rate": 3.40126245340333e-05, + "loss": 1.7298, + "step": 24660 + }, + { + "epoch": 0.38, + "learning_rate": 3.4001257261191105e-05, + "loss": 1.6763, + "step": 24670 + }, + { + "epoch": 0.38, + "learning_rate": 3.398988784981255e-05, + "loss": 1.6175, + "step": 24680 + }, + { + "epoch": 0.38, + "learning_rate": 3.397851630259882e-05, + "loss": 1.5832, + "step": 24690 + }, + { + "epoch": 0.38, + "learning_rate": 3.3967142622251555e-05, + "loss": 1.6236, + "step": 24700 + }, + { + "epoch": 0.38, + "learning_rate": 3.395576681147298e-05, + "loss": 1.7545, + "step": 24710 + }, + { + "epoch": 0.38, + "learning_rate": 3.394438887296575e-05, + "loss": 1.751, + "step": 24720 + }, + { + "epoch": 0.38, + "learning_rate": 3.3933008809433064e-05, + "loss": 1.7365, + "step": 24730 + }, + { + "epoch": 0.38, + "learning_rate": 3.3921626623578624e-05, + "loss": 1.8056, + "step": 24740 + }, + { + "epoch": 0.38, + "learning_rate": 3.391024231810664e-05, + "loss": 1.7138, + "step": 24750 + }, + { + "epoch": 0.38, + "learning_rate": 3.3898855895721795e-05, + "loss": 1.6259, + "step": 24760 + }, + { + "epoch": 0.38, + "learning_rate": 3.388746735912932e-05, + "loss": 1.7193, + "step": 24770 + }, + { + "epoch": 0.38, + "learning_rate": 3.387607671103492e-05, + "loss": 1.8801, + "step": 24780 + }, + { + "epoch": 0.38, + "learning_rate": 3.3864683954144805e-05, + "loss": 1.8366, + "step": 24790 + }, + { + "epoch": 0.38, + "learning_rate": 3.385328909116568e-05, + "loss": 2.0822, + "step": 24800 + }, + { + "epoch": 0.38, + "learning_rate": 3.384189212480477e-05, + "loss": 2.0006, + "step": 24810 + }, + { + "epoch": 0.39, + "learning_rate": 3.38304930577698e-05, + "loss": 1.7844, + "step": 24820 + }, + { + "epoch": 0.39, + "learning_rate": 3.381909189276895e-05, + "loss": 1.6923, + "step": 24830 + }, + { + "epoch": 0.39, + "learning_rate": 3.3807688632510956e-05, + "loss": 1.6599, + "step": 24840 + }, + { + "epoch": 0.39, + "learning_rate": 3.379628327970502e-05, + "loss": 1.8322, + "step": 24850 + }, + { + "epoch": 0.39, + "learning_rate": 3.378487583706085e-05, + "loss": 1.9181, + "step": 24860 + }, + { + "epoch": 0.39, + "learning_rate": 3.3773466307288644e-05, + "loss": 1.7382, + "step": 24870 + }, + { + "epoch": 0.39, + "learning_rate": 3.376205469309911e-05, + "loss": 1.5476, + "step": 24880 + }, + { + "epoch": 0.39, + "learning_rate": 3.375064099720343e-05, + "loss": 1.7766, + "step": 24890 + }, + { + "epoch": 0.39, + "learning_rate": 3.3739225222313296e-05, + "loss": 1.7224, + "step": 24900 + }, + { + "epoch": 0.39, + "learning_rate": 3.372780737114091e-05, + "loss": 1.664, + "step": 24910 + }, + { + "epoch": 0.39, + "learning_rate": 3.371638744639891e-05, + "loss": 1.7273, + "step": 24920 + }, + { + "epoch": 0.39, + "learning_rate": 3.370496545080048e-05, + "loss": 1.697, + "step": 24930 + }, + { + "epoch": 0.39, + "learning_rate": 3.36935413870593e-05, + "loss": 1.6109, + "step": 24940 + }, + { + "epoch": 0.39, + "learning_rate": 3.36821152578895e-05, + "loss": 1.4285, + "step": 24950 + }, + { + "epoch": 0.39, + "learning_rate": 3.367068706600572e-05, + "loss": 1.7077, + "step": 24960 + }, + { + "epoch": 0.39, + "learning_rate": 3.3659256814123106e-05, + "loss": 1.6574, + "step": 24970 + }, + { + "epoch": 0.39, + "learning_rate": 3.3647824504957274e-05, + "loss": 1.6948, + "step": 24980 + }, + { + "epoch": 0.39, + "learning_rate": 3.3636390141224334e-05, + "loss": 1.6229, + "step": 24990 + }, + { + "epoch": 0.39, + "learning_rate": 3.3624953725640883e-05, + "loss": 1.7728, + "step": 25000 + }, + { + "epoch": 0.39, + "eval_loss": 1.6268333196640015, + "eval_runtime": 81.9489, + "eval_samples_per_second": 36.608, + "eval_steps_per_second": 4.576, + "step": 25000 + }, + { + "epoch": 0.39, + "learning_rate": 3.3613515260924014e-05, + "loss": 2.0887, + "step": 25010 + }, + { + "epoch": 0.39, + "learning_rate": 3.3602074749791285e-05, + "loss": 1.7694, + "step": 25020 + }, + { + "epoch": 0.39, + "learning_rate": 3.3590632194960764e-05, + "loss": 1.8469, + "step": 25030 + }, + { + "epoch": 0.39, + "learning_rate": 3.357918759915101e-05, + "loss": 1.5551, + "step": 25040 + }, + { + "epoch": 0.39, + "learning_rate": 3.356774096508102e-05, + "loss": 1.6253, + "step": 25050 + }, + { + "epoch": 0.39, + "learning_rate": 3.3556292295470334e-05, + "loss": 1.7699, + "step": 25060 + }, + { + "epoch": 0.39, + "learning_rate": 3.354484159303894e-05, + "loss": 1.6644, + "step": 25070 + }, + { + "epoch": 0.39, + "learning_rate": 3.3533388860507324e-05, + "loss": 1.584, + "step": 25080 + }, + { + "epoch": 0.39, + "learning_rate": 3.352193410059644e-05, + "loss": 1.4726, + "step": 25090 + }, + { + "epoch": 0.39, + "learning_rate": 3.3510477316027736e-05, + "loss": 1.5807, + "step": 25100 + }, + { + "epoch": 0.39, + "learning_rate": 3.349901850952315e-05, + "loss": 1.5795, + "step": 25110 + }, + { + "epoch": 0.39, + "learning_rate": 3.348755768380507e-05, + "loss": 1.5597, + "step": 25120 + }, + { + "epoch": 0.39, + "learning_rate": 3.347609484159638e-05, + "loss": 1.5436, + "step": 25130 + }, + { + "epoch": 0.39, + "learning_rate": 3.346462998562046e-05, + "loss": 1.5582, + "step": 25140 + }, + { + "epoch": 0.39, + "learning_rate": 3.345316311860113e-05, + "loss": 1.5671, + "step": 25150 + }, + { + "epoch": 0.39, + "learning_rate": 3.3441694243262745e-05, + "loss": 1.5275, + "step": 25160 + }, + { + "epoch": 0.39, + "learning_rate": 3.343022336233008e-05, + "loss": 1.5793, + "step": 25170 + }, + { + "epoch": 0.39, + "learning_rate": 3.3418750478528407e-05, + "loss": 1.6574, + "step": 25180 + }, + { + "epoch": 0.39, + "learning_rate": 3.3407275594583484e-05, + "loss": 1.6223, + "step": 25190 + }, + { + "epoch": 0.39, + "learning_rate": 3.3395798713221526e-05, + "loss": 1.5103, + "step": 25200 + }, + { + "epoch": 0.39, + "learning_rate": 3.338431983716926e-05, + "loss": 1.4893, + "step": 25210 + }, + { + "epoch": 0.39, + "learning_rate": 3.337283896915383e-05, + "loss": 1.5049, + "step": 25220 + }, + { + "epoch": 0.39, + "learning_rate": 3.33613561119029e-05, + "loss": 1.5761, + "step": 25230 + }, + { + "epoch": 0.39, + "learning_rate": 3.334987126814458e-05, + "loss": 1.5808, + "step": 25240 + }, + { + "epoch": 0.39, + "learning_rate": 3.3338384440607466e-05, + "loss": 1.567, + "step": 25250 + }, + { + "epoch": 0.39, + "learning_rate": 3.332689563202062e-05, + "loss": 1.4998, + "step": 25260 + }, + { + "epoch": 0.39, + "learning_rate": 3.3315404845113585e-05, + "loss": 1.7106, + "step": 25270 + }, + { + "epoch": 0.39, + "learning_rate": 3.330391208261635e-05, + "loss": 1.7445, + "step": 25280 + }, + { + "epoch": 0.39, + "learning_rate": 3.329241734725939e-05, + "loss": 1.7429, + "step": 25290 + }, + { + "epoch": 0.39, + "learning_rate": 3.328092064177366e-05, + "loss": 1.6719, + "step": 25300 + }, + { + "epoch": 0.39, + "learning_rate": 3.326942196889056e-05, + "loss": 1.654, + "step": 25310 + }, + { + "epoch": 0.39, + "learning_rate": 3.325792133134196e-05, + "loss": 1.6559, + "step": 25320 + }, + { + "epoch": 0.39, + "learning_rate": 3.324641873186022e-05, + "loss": 1.7359, + "step": 25330 + }, + { + "epoch": 0.39, + "learning_rate": 3.3234914173178143e-05, + "loss": 1.7824, + "step": 25340 + }, + { + "epoch": 0.39, + "learning_rate": 3.322340765802901e-05, + "loss": 1.6409, + "step": 25350 + }, + { + "epoch": 0.39, + "learning_rate": 3.321189918914654e-05, + "loss": 1.5096, + "step": 25360 + }, + { + "epoch": 0.39, + "learning_rate": 3.320038876926497e-05, + "loss": 2.0661, + "step": 25370 + }, + { + "epoch": 0.39, + "learning_rate": 3.3188876401118935e-05, + "loss": 2.0201, + "step": 25380 + }, + { + "epoch": 0.39, + "learning_rate": 3.317736208744359e-05, + "loss": 1.8802, + "step": 25390 + }, + { + "epoch": 0.39, + "learning_rate": 3.316584583097453e-05, + "loss": 1.7726, + "step": 25400 + }, + { + "epoch": 0.39, + "learning_rate": 3.3154327634447785e-05, + "loss": 1.6791, + "step": 25410 + }, + { + "epoch": 0.39, + "learning_rate": 3.314280750059989e-05, + "loss": 1.6414, + "step": 25420 + }, + { + "epoch": 0.39, + "learning_rate": 3.313128543216782e-05, + "loss": 1.8079, + "step": 25430 + }, + { + "epoch": 0.39, + "learning_rate": 3.3119761431889016e-05, + "loss": 1.9467, + "step": 25440 + }, + { + "epoch": 0.39, + "learning_rate": 3.310823550250135e-05, + "loss": 1.7744, + "step": 25450 + }, + { + "epoch": 0.4, + "learning_rate": 3.3096707646743195e-05, + "loss": 1.5397, + "step": 25460 + }, + { + "epoch": 0.4, + "learning_rate": 3.308517786735336e-05, + "loss": 1.5905, + "step": 25470 + }, + { + "epoch": 0.4, + "learning_rate": 3.30736461670711e-05, + "loss": 1.5812, + "step": 25480 + }, + { + "epoch": 0.4, + "learning_rate": 3.306211254863615e-05, + "loss": 1.6715, + "step": 25490 + }, + { + "epoch": 0.4, + "learning_rate": 3.3050577014788696e-05, + "loss": 1.6572, + "step": 25500 + }, + { + "epoch": 0.4, + "learning_rate": 3.3039039568269356e-05, + "loss": 1.7116, + "step": 25510 + }, + { + "epoch": 0.4, + "learning_rate": 3.302750021181923e-05, + "loss": 1.7914, + "step": 25520 + }, + { + "epoch": 0.4, + "learning_rate": 3.301595894817985e-05, + "loss": 1.8371, + "step": 25530 + }, + { + "epoch": 0.4, + "learning_rate": 3.3004415780093234e-05, + "loss": 1.5683, + "step": 25540 + }, + { + "epoch": 0.4, + "learning_rate": 3.2992870710301804e-05, + "loss": 1.6656, + "step": 25550 + }, + { + "epoch": 0.4, + "learning_rate": 3.2981323741548484e-05, + "loss": 1.895, + "step": 25560 + }, + { + "epoch": 0.4, + "learning_rate": 3.296977487657661e-05, + "loss": 1.6954, + "step": 25570 + }, + { + "epoch": 0.4, + "learning_rate": 3.295822411812999e-05, + "loss": 1.6994, + "step": 25580 + }, + { + "epoch": 0.4, + "learning_rate": 3.294667146895287e-05, + "loss": 1.8332, + "step": 25590 + }, + { + "epoch": 0.4, + "learning_rate": 3.293511693178996e-05, + "loss": 1.7575, + "step": 25600 + }, + { + "epoch": 0.4, + "learning_rate": 3.29235605093864e-05, + "loss": 1.7178, + "step": 25610 + }, + { + "epoch": 0.4, + "learning_rate": 3.291200220448779e-05, + "loss": 1.7724, + "step": 25620 + }, + { + "epoch": 0.4, + "learning_rate": 3.290044201984018e-05, + "loss": 1.7646, + "step": 25630 + }, + { + "epoch": 0.4, + "learning_rate": 3.2888879958190055e-05, + "loss": 1.714, + "step": 25640 + }, + { + "epoch": 0.4, + "learning_rate": 3.287731602228436e-05, + "loss": 1.7595, + "step": 25650 + }, + { + "epoch": 0.4, + "learning_rate": 3.286575021487047e-05, + "loss": 1.6687, + "step": 25660 + }, + { + "epoch": 0.4, + "learning_rate": 3.285418253869623e-05, + "loss": 1.6742, + "step": 25670 + }, + { + "epoch": 0.4, + "learning_rate": 3.2842612996509884e-05, + "loss": 1.6377, + "step": 25680 + }, + { + "epoch": 0.4, + "learning_rate": 3.283104159106015e-05, + "loss": 1.629, + "step": 25690 + }, + { + "epoch": 0.4, + "learning_rate": 3.281946832509621e-05, + "loss": 1.7158, + "step": 25700 + }, + { + "epoch": 0.4, + "learning_rate": 3.2807893201367645e-05, + "loss": 1.6402, + "step": 25710 + }, + { + "epoch": 0.4, + "learning_rate": 3.27963162226245e-05, + "loss": 1.6443, + "step": 25720 + }, + { + "epoch": 0.4, + "learning_rate": 3.278473739161725e-05, + "loss": 1.95, + "step": 25730 + }, + { + "epoch": 0.4, + "learning_rate": 3.2773156711096824e-05, + "loss": 1.8658, + "step": 25740 + }, + { + "epoch": 0.4, + "learning_rate": 3.2761574183814576e-05, + "loss": 1.685, + "step": 25750 + }, + { + "epoch": 0.4, + "learning_rate": 3.274998981252231e-05, + "loss": 1.8152, + "step": 25760 + }, + { + "epoch": 0.4, + "learning_rate": 3.273840359997227e-05, + "loss": 1.6196, + "step": 25770 + }, + { + "epoch": 0.4, + "learning_rate": 3.2726815548917114e-05, + "loss": 1.5927, + "step": 25780 + }, + { + "epoch": 0.4, + "learning_rate": 3.271522566210997e-05, + "loss": 1.7696, + "step": 25790 + }, + { + "epoch": 0.4, + "learning_rate": 3.270363394230438e-05, + "loss": 1.6634, + "step": 25800 + }, + { + "epoch": 0.4, + "learning_rate": 3.269204039225432e-05, + "loss": 1.8589, + "step": 25810 + }, + { + "epoch": 0.4, + "learning_rate": 3.2680445014714226e-05, + "loss": 1.784, + "step": 25820 + }, + { + "epoch": 0.4, + "learning_rate": 3.266884781243894e-05, + "loss": 1.6899, + "step": 25830 + }, + { + "epoch": 0.4, + "learning_rate": 3.265724878818374e-05, + "loss": 1.808, + "step": 25840 + }, + { + "epoch": 0.4, + "learning_rate": 3.2645647944704364e-05, + "loss": 1.734, + "step": 25850 + }, + { + "epoch": 0.4, + "learning_rate": 3.263404528475695e-05, + "loss": 1.5994, + "step": 25860 + }, + { + "epoch": 0.4, + "learning_rate": 3.262244081109807e-05, + "loss": 1.6928, + "step": 25870 + }, + { + "epoch": 0.4, + "learning_rate": 3.261083452648477e-05, + "loss": 1.5715, + "step": 25880 + }, + { + "epoch": 0.4, + "learning_rate": 3.259922643367446e-05, + "loss": 1.5805, + "step": 25890 + }, + { + "epoch": 0.4, + "learning_rate": 3.258761653542504e-05, + "loss": 1.6229, + "step": 25900 + }, + { + "epoch": 0.4, + "learning_rate": 3.257600483449479e-05, + "loss": 1.7378, + "step": 25910 + }, + { + "epoch": 0.4, + "learning_rate": 3.256439133364246e-05, + "loss": 1.714, + "step": 25920 + }, + { + "epoch": 0.4, + "learning_rate": 3.2552776035627194e-05, + "loss": 1.7747, + "step": 25930 + }, + { + "epoch": 0.4, + "learning_rate": 3.254115894320859e-05, + "loss": 1.9148, + "step": 25940 + }, + { + "epoch": 0.4, + "learning_rate": 3.252954005914665e-05, + "loss": 1.671, + "step": 25950 + }, + { + "epoch": 0.4, + "learning_rate": 3.251791938620181e-05, + "loss": 1.8409, + "step": 25960 + }, + { + "epoch": 0.4, + "learning_rate": 3.2506296927134935e-05, + "loss": 1.7661, + "step": 25970 + }, + { + "epoch": 0.4, + "learning_rate": 3.249467268470731e-05, + "loss": 1.707, + "step": 25980 + }, + { + "epoch": 0.4, + "learning_rate": 3.248304666168066e-05, + "loss": 1.7039, + "step": 25990 + }, + { + "epoch": 0.4, + "learning_rate": 3.24714188608171e-05, + "loss": 1.9488, + "step": 26000 + }, + { + "epoch": 0.4, + "learning_rate": 3.245978928487919e-05, + "loss": 1.8123, + "step": 26010 + }, + { + "epoch": 0.4, + "learning_rate": 3.2448157936629905e-05, + "loss": 1.406, + "step": 26020 + }, + { + "epoch": 0.4, + "learning_rate": 3.2436524818832665e-05, + "loss": 1.6617, + "step": 26030 + }, + { + "epoch": 0.4, + "learning_rate": 3.242488993425126e-05, + "loss": 1.9082, + "step": 26040 + }, + { + "epoch": 0.4, + "learning_rate": 3.2413253285649944e-05, + "loss": 1.8644, + "step": 26050 + }, + { + "epoch": 0.4, + "learning_rate": 3.240161487579337e-05, + "loss": 1.8957, + "step": 26060 + }, + { + "epoch": 0.4, + "learning_rate": 3.2389974707446616e-05, + "loss": 1.8585, + "step": 26070 + }, + { + "epoch": 0.4, + "learning_rate": 3.237833278337518e-05, + "loss": 1.9341, + "step": 26080 + }, + { + "epoch": 0.4, + "learning_rate": 3.236668910634496e-05, + "loss": 1.8685, + "step": 26090 + }, + { + "epoch": 0.4, + "learning_rate": 3.2355043679122295e-05, + "loss": 1.8628, + "step": 26100 + }, + { + "epoch": 0.41, + "learning_rate": 3.2343396504473926e-05, + "loss": 1.8625, + "step": 26110 + }, + { + "epoch": 0.41, + "learning_rate": 3.2331747585167016e-05, + "loss": 1.8669, + "step": 26120 + }, + { + "epoch": 0.41, + "learning_rate": 3.232009692396913e-05, + "loss": 1.9103, + "step": 26130 + }, + { + "epoch": 0.41, + "learning_rate": 3.230844452364826e-05, + "loss": 1.7012, + "step": 26140 + }, + { + "epoch": 0.41, + "learning_rate": 3.229679038697281e-05, + "loss": 1.6406, + "step": 26150 + }, + { + "epoch": 0.41, + "learning_rate": 3.2285134516711585e-05, + "loss": 1.587, + "step": 26160 + }, + { + "epoch": 0.41, + "learning_rate": 3.22734769156338e-05, + "loss": 1.5711, + "step": 26170 + }, + { + "epoch": 0.41, + "learning_rate": 3.2261817586509116e-05, + "loss": 1.5205, + "step": 26180 + }, + { + "epoch": 0.41, + "learning_rate": 3.2250156532107566e-05, + "loss": 1.7986, + "step": 26190 + }, + { + "epoch": 0.41, + "learning_rate": 3.223849375519961e-05, + "loss": 1.6426, + "step": 26200 + }, + { + "epoch": 0.41, + "learning_rate": 3.2226829258556096e-05, + "loss": 1.5944, + "step": 26210 + }, + { + "epoch": 0.41, + "learning_rate": 3.221516304494831e-05, + "loss": 1.5593, + "step": 26220 + }, + { + "epoch": 0.41, + "learning_rate": 3.2203495117147954e-05, + "loss": 1.5733, + "step": 26230 + }, + { + "epoch": 0.41, + "learning_rate": 3.2191825477927086e-05, + "loss": 1.5301, + "step": 26240 + }, + { + "epoch": 0.41, + "learning_rate": 3.218015413005822e-05, + "loss": 1.538, + "step": 26250 + }, + { + "epoch": 0.41, + "learning_rate": 3.216848107631424e-05, + "loss": 1.5263, + "step": 26260 + }, + { + "epoch": 0.41, + "learning_rate": 3.215680631946848e-05, + "loss": 1.515, + "step": 26270 + }, + { + "epoch": 0.41, + "learning_rate": 3.214512986229463e-05, + "loss": 1.5197, + "step": 26280 + }, + { + "epoch": 0.41, + "learning_rate": 3.213345170756682e-05, + "loss": 1.6252, + "step": 26290 + }, + { + "epoch": 0.41, + "learning_rate": 3.212177185805954e-05, + "loss": 1.8574, + "step": 26300 + }, + { + "epoch": 0.41, + "learning_rate": 3.2110090316547754e-05, + "loss": 1.5719, + "step": 26310 + }, + { + "epoch": 0.41, + "learning_rate": 3.209840708580676e-05, + "loss": 1.4922, + "step": 26320 + }, + { + "epoch": 0.41, + "learning_rate": 3.208672216861228e-05, + "loss": 1.708, + "step": 26330 + }, + { + "epoch": 0.41, + "learning_rate": 3.207503556774045e-05, + "loss": 1.5512, + "step": 26340 + }, + { + "epoch": 0.41, + "learning_rate": 3.20633472859678e-05, + "loss": 1.7736, + "step": 26350 + }, + { + "epoch": 0.41, + "learning_rate": 3.205165732607124e-05, + "loss": 2.1982, + "step": 26360 + }, + { + "epoch": 0.41, + "learning_rate": 3.203996569082811e-05, + "loss": 1.6502, + "step": 26370 + }, + { + "epoch": 0.41, + "learning_rate": 3.2028272383016105e-05, + "loss": 1.6477, + "step": 26380 + }, + { + "epoch": 0.41, + "learning_rate": 3.201657740541338e-05, + "loss": 1.6217, + "step": 26390 + }, + { + "epoch": 0.41, + "learning_rate": 3.2004880760798415e-05, + "loss": 1.6244, + "step": 26400 + }, + { + "epoch": 0.41, + "learning_rate": 3.199318245195015e-05, + "loss": 1.7033, + "step": 26410 + }, + { + "epoch": 0.41, + "learning_rate": 3.198148248164788e-05, + "loss": 1.7792, + "step": 26420 + }, + { + "epoch": 0.41, + "learning_rate": 3.1969780852671305e-05, + "loss": 1.5474, + "step": 26430 + }, + { + "epoch": 0.41, + "learning_rate": 3.1958077567800525e-05, + "loss": 1.5157, + "step": 26440 + }, + { + "epoch": 0.41, + "learning_rate": 3.194637262981603e-05, + "loss": 1.6204, + "step": 26450 + }, + { + "epoch": 0.41, + "learning_rate": 3.1934666041498704e-05, + "loss": 1.5332, + "step": 26460 + }, + { + "epoch": 0.41, + "learning_rate": 3.192295780562981e-05, + "loss": 1.66, + "step": 26470 + }, + { + "epoch": 0.41, + "learning_rate": 3.191124792499102e-05, + "loss": 1.5516, + "step": 26480 + }, + { + "epoch": 0.41, + "learning_rate": 3.189953640236441e-05, + "loss": 1.7165, + "step": 26490 + }, + { + "epoch": 0.41, + "learning_rate": 3.1887823240532386e-05, + "loss": 1.5814, + "step": 26500 + }, + { + "epoch": 0.41, + "learning_rate": 3.187610844227782e-05, + "loss": 1.6904, + "step": 26510 + }, + { + "epoch": 0.41, + "learning_rate": 3.186439201038392e-05, + "loss": 1.6722, + "step": 26520 + }, + { + "epoch": 0.41, + "learning_rate": 3.18526739476343e-05, + "loss": 1.6511, + "step": 26530 + }, + { + "epoch": 0.41, + "learning_rate": 3.184095425681296e-05, + "loss": 1.6741, + "step": 26540 + }, + { + "epoch": 0.41, + "learning_rate": 3.182923294070429e-05, + "loss": 1.6868, + "step": 26550 + }, + { + "epoch": 0.41, + "learning_rate": 3.181751000209307e-05, + "loss": 1.8265, + "step": 26560 + }, + { + "epoch": 0.41, + "learning_rate": 3.180578544376445e-05, + "loss": 1.713, + "step": 26570 + }, + { + "epoch": 0.41, + "learning_rate": 3.1794059268503965e-05, + "loss": 1.5994, + "step": 26580 + }, + { + "epoch": 0.41, + "learning_rate": 3.178233147909756e-05, + "loss": 1.7666, + "step": 26590 + }, + { + "epoch": 0.41, + "learning_rate": 3.1770602078331536e-05, + "loss": 1.6172, + "step": 26600 + }, + { + "epoch": 0.41, + "learning_rate": 3.175887106899259e-05, + "loss": 1.7012, + "step": 26610 + }, + { + "epoch": 0.41, + "learning_rate": 3.17471384538678e-05, + "loss": 1.7924, + "step": 26620 + }, + { + "epoch": 0.41, + "learning_rate": 3.173540423574462e-05, + "loss": 1.6811, + "step": 26630 + }, + { + "epoch": 0.41, + "learning_rate": 3.172366841741089e-05, + "loss": 1.6598, + "step": 26640 + }, + { + "epoch": 0.41, + "learning_rate": 3.171193100165484e-05, + "loss": 1.6031, + "step": 26650 + }, + { + "epoch": 0.41, + "learning_rate": 3.170019199126505e-05, + "loss": 1.5951, + "step": 26660 + }, + { + "epoch": 0.41, + "learning_rate": 3.16884513890305e-05, + "loss": 1.6063, + "step": 26670 + }, + { + "epoch": 0.41, + "learning_rate": 3.167670919774055e-05, + "loss": 1.6082, + "step": 26680 + }, + { + "epoch": 0.41, + "learning_rate": 3.166496542018494e-05, + "loss": 1.4703, + "step": 26690 + }, + { + "epoch": 0.41, + "learning_rate": 3.1653220059153764e-05, + "loss": 1.3815, + "step": 26700 + }, + { + "epoch": 0.41, + "learning_rate": 3.164147311743752e-05, + "loss": 1.3212, + "step": 26710 + }, + { + "epoch": 0.41, + "learning_rate": 3.162972459782707e-05, + "loss": 1.3003, + "step": 26720 + }, + { + "epoch": 0.41, + "learning_rate": 3.161797450311365e-05, + "loss": 1.262, + "step": 26730 + }, + { + "epoch": 0.41, + "learning_rate": 3.160622283608885e-05, + "loss": 1.2724, + "step": 26740 + }, + { + "epoch": 0.42, + "learning_rate": 3.159446959954468e-05, + "loss": 1.744, + "step": 26750 + }, + { + "epoch": 0.42, + "learning_rate": 3.1582714796273485e-05, + "loss": 1.9589, + "step": 26760 + }, + { + "epoch": 0.42, + "learning_rate": 3.1570958429068e-05, + "loss": 1.5921, + "step": 26770 + }, + { + "epoch": 0.42, + "learning_rate": 3.155920050072132e-05, + "loss": 1.7693, + "step": 26780 + }, + { + "epoch": 0.42, + "learning_rate": 3.154744101402692e-05, + "loss": 1.6997, + "step": 26790 + }, + { + "epoch": 0.42, + "learning_rate": 3.153567997177864e-05, + "loss": 1.5777, + "step": 26800 + }, + { + "epoch": 0.42, + "learning_rate": 3.15239173767707e-05, + "loss": 1.685, + "step": 26810 + }, + { + "epoch": 0.42, + "learning_rate": 3.151215323179767e-05, + "loss": 1.7791, + "step": 26820 + }, + { + "epoch": 0.42, + "learning_rate": 3.15003875396545e-05, + "loss": 1.5098, + "step": 26830 + }, + { + "epoch": 0.42, + "learning_rate": 3.148862030313651e-05, + "loss": 1.6355, + "step": 26840 + }, + { + "epoch": 0.42, + "learning_rate": 3.147685152503938e-05, + "loss": 1.8056, + "step": 26850 + }, + { + "epoch": 0.42, + "learning_rate": 3.1465081208159166e-05, + "loss": 1.7474, + "step": 26860 + }, + { + "epoch": 0.42, + "learning_rate": 3.145330935529226e-05, + "loss": 1.6095, + "step": 26870 + }, + { + "epoch": 0.42, + "learning_rate": 3.144153596923548e-05, + "loss": 1.5368, + "step": 26880 + }, + { + "epoch": 0.42, + "learning_rate": 3.1429761052785936e-05, + "loss": 1.631, + "step": 26890 + }, + { + "epoch": 0.42, + "learning_rate": 3.141798460874116e-05, + "loss": 1.6739, + "step": 26900 + }, + { + "epoch": 0.42, + "learning_rate": 3.1406206639899e-05, + "loss": 1.7434, + "step": 26910 + }, + { + "epoch": 0.42, + "learning_rate": 3.1394427149057706e-05, + "loss": 1.7871, + "step": 26920 + }, + { + "epoch": 0.42, + "learning_rate": 3.138264613901587e-05, + "loss": 1.7276, + "step": 26930 + }, + { + "epoch": 0.42, + "learning_rate": 3.1370863612572446e-05, + "loss": 1.8821, + "step": 26940 + }, + { + "epoch": 0.42, + "learning_rate": 3.1359079572526745e-05, + "loss": 1.8255, + "step": 26950 + }, + { + "epoch": 0.42, + "learning_rate": 3.134729402167845e-05, + "loss": 1.7216, + "step": 26960 + }, + { + "epoch": 0.42, + "learning_rate": 3.1335506962827584e-05, + "loss": 1.4447, + "step": 26970 + }, + { + "epoch": 0.42, + "learning_rate": 3.1323718398774555e-05, + "loss": 1.3712, + "step": 26980 + }, + { + "epoch": 0.42, + "learning_rate": 3.13119283323201e-05, + "loss": 1.3425, + "step": 26990 + }, + { + "epoch": 0.42, + "learning_rate": 3.1300136766265334e-05, + "loss": 1.4896, + "step": 27000 + }, + { + "epoch": 0.42, + "learning_rate": 3.1288343703411715e-05, + "loss": 1.4382, + "step": 27010 + }, + { + "epoch": 0.42, + "learning_rate": 3.1276549146561066e-05, + "loss": 1.4238, + "step": 27020 + }, + { + "epoch": 0.42, + "learning_rate": 3.126475309851556e-05, + "loss": 1.5625, + "step": 27030 + }, + { + "epoch": 0.42, + "learning_rate": 3.1252955562077725e-05, + "loss": 1.5133, + "step": 27040 + }, + { + "epoch": 0.42, + "learning_rate": 3.124115654005044e-05, + "loss": 1.4896, + "step": 27050 + }, + { + "epoch": 0.42, + "learning_rate": 3.1229356035236946e-05, + "loss": 1.7696, + "step": 27060 + }, + { + "epoch": 0.42, + "learning_rate": 3.1217554050440824e-05, + "loss": 1.456, + "step": 27070 + }, + { + "epoch": 0.42, + "learning_rate": 3.120575058846602e-05, + "loss": 1.8534, + "step": 27080 + }, + { + "epoch": 0.42, + "learning_rate": 3.1193945652116805e-05, + "loss": 1.7251, + "step": 27090 + }, + { + "epoch": 0.42, + "learning_rate": 3.1182139244197845e-05, + "loss": 1.7076, + "step": 27100 + }, + { + "epoch": 0.42, + "learning_rate": 3.1170331367514125e-05, + "loss": 1.6781, + "step": 27110 + }, + { + "epoch": 0.42, + "learning_rate": 3.1158522024870964e-05, + "loss": 1.5175, + "step": 27120 + }, + { + "epoch": 0.42, + "learning_rate": 3.1146711219074055e-05, + "loss": 1.6627, + "step": 27130 + }, + { + "epoch": 0.42, + "learning_rate": 3.113489895292945e-05, + "loss": 1.6183, + "step": 27140 + }, + { + "epoch": 0.42, + "learning_rate": 3.112308522924352e-05, + "loss": 1.7912, + "step": 27150 + }, + { + "epoch": 0.42, + "learning_rate": 3.111127005082298e-05, + "loss": 1.7367, + "step": 27160 + }, + { + "epoch": 0.42, + "learning_rate": 3.109945342047491e-05, + "loss": 1.7409, + "step": 27170 + }, + { + "epoch": 0.42, + "learning_rate": 3.108763534100675e-05, + "loss": 1.8691, + "step": 27180 + }, + { + "epoch": 0.42, + "learning_rate": 3.1075815815226236e-05, + "loss": 1.8603, + "step": 27190 + }, + { + "epoch": 0.42, + "learning_rate": 3.106399484594148e-05, + "loss": 1.6118, + "step": 27200 + }, + { + "epoch": 0.42, + "learning_rate": 3.105217243596094e-05, + "loss": 1.5487, + "step": 27210 + }, + { + "epoch": 0.42, + "learning_rate": 3.1040348588093396e-05, + "loss": 1.6202, + "step": 27220 + }, + { + "epoch": 0.42, + "learning_rate": 3.102852330514799e-05, + "loss": 1.8411, + "step": 27230 + }, + { + "epoch": 0.42, + "learning_rate": 3.101669658993419e-05, + "loss": 1.8134, + "step": 27240 + }, + { + "epoch": 0.42, + "learning_rate": 3.100486844526182e-05, + "loss": 1.6982, + "step": 27250 + }, + { + "epoch": 0.42, + "learning_rate": 3.099303887394102e-05, + "loss": 1.7489, + "step": 27260 + }, + { + "epoch": 0.42, + "learning_rate": 3.098120787878229e-05, + "loss": 1.5734, + "step": 27270 + }, + { + "epoch": 0.42, + "learning_rate": 3.096937546259646e-05, + "loss": 1.5495, + "step": 27280 + }, + { + "epoch": 0.42, + "learning_rate": 3.09575416281947e-05, + "loss": 1.5152, + "step": 27290 + }, + { + "epoch": 0.42, + "learning_rate": 3.094570637838851e-05, + "loss": 1.9858, + "step": 27300 + }, + { + "epoch": 0.42, + "learning_rate": 3.093386971598975e-05, + "loss": 1.749, + "step": 27310 + }, + { + "epoch": 0.42, + "learning_rate": 3.092203164381057e-05, + "loss": 1.8611, + "step": 27320 + }, + { + "epoch": 0.42, + "learning_rate": 3.0910192164663495e-05, + "loss": 1.838, + "step": 27330 + }, + { + "epoch": 0.42, + "learning_rate": 3.089835128136139e-05, + "loss": 1.9634, + "step": 27340 + }, + { + "epoch": 0.42, + "learning_rate": 3.08865089967174e-05, + "loss": 1.7006, + "step": 27350 + }, + { + "epoch": 0.42, + "learning_rate": 3.087466531354506e-05, + "loss": 1.8916, + "step": 27360 + }, + { + "epoch": 0.42, + "learning_rate": 3.086282023465822e-05, + "loss": 1.8243, + "step": 27370 + }, + { + "epoch": 0.42, + "learning_rate": 3.085097376287104e-05, + "loss": 1.5791, + "step": 27380 + }, + { + "epoch": 0.42, + "learning_rate": 3.083912590099804e-05, + "loss": 1.8089, + "step": 27390 + }, + { + "epoch": 0.43, + "learning_rate": 3.0827276651854055e-05, + "loss": 1.7644, + "step": 27400 + }, + { + "epoch": 0.43, + "learning_rate": 3.081542601825426e-05, + "loss": 1.7128, + "step": 27410 + }, + { + "epoch": 0.43, + "learning_rate": 3.0803574003014136e-05, + "loss": 1.3885, + "step": 27420 + }, + { + "epoch": 0.43, + "learning_rate": 3.0791720608949516e-05, + "loss": 1.2528, + "step": 27430 + }, + { + "epoch": 0.43, + "learning_rate": 3.0779865838876566e-05, + "loss": 1.3976, + "step": 27440 + }, + { + "epoch": 0.43, + "learning_rate": 3.076800969561174e-05, + "loss": 1.7092, + "step": 27450 + }, + { + "epoch": 0.43, + "learning_rate": 3.075615218197186e-05, + "loss": 1.6607, + "step": 27460 + }, + { + "epoch": 0.43, + "learning_rate": 3.074429330077405e-05, + "loss": 1.6261, + "step": 27470 + }, + { + "epoch": 0.43, + "learning_rate": 3.073243305483578e-05, + "loss": 1.6382, + "step": 27480 + }, + { + "epoch": 0.43, + "learning_rate": 3.072057144697481e-05, + "loss": 1.5335, + "step": 27490 + }, + { + "epoch": 0.43, + "learning_rate": 3.070870848000927e-05, + "loss": 1.5949, + "step": 27500 + }, + { + "epoch": 0.43, + "learning_rate": 3.069684415675756e-05, + "loss": 1.6318, + "step": 27510 + }, + { + "epoch": 0.43, + "learning_rate": 3.068497848003845e-05, + "loss": 1.5606, + "step": 27520 + }, + { + "epoch": 0.43, + "learning_rate": 3.067311145267099e-05, + "loss": 1.6279, + "step": 27530 + }, + { + "epoch": 0.43, + "learning_rate": 3.066124307747459e-05, + "loss": 1.8066, + "step": 27540 + }, + { + "epoch": 0.43, + "learning_rate": 3.0649373357268954e-05, + "loss": 1.7916, + "step": 27550 + }, + { + "epoch": 0.43, + "learning_rate": 3.0637502294874115e-05, + "loss": 1.7638, + "step": 27560 + }, + { + "epoch": 0.43, + "learning_rate": 3.062562989311043e-05, + "loss": 1.9256, + "step": 27570 + }, + { + "epoch": 0.43, + "learning_rate": 3.061375615479855e-05, + "loss": 1.7197, + "step": 27580 + }, + { + "epoch": 0.43, + "learning_rate": 3.060188108275947e-05, + "loss": 1.6927, + "step": 27590 + }, + { + "epoch": 0.43, + "learning_rate": 3.0590004679814496e-05, + "loss": 1.634, + "step": 27600 + }, + { + "epoch": 0.43, + "learning_rate": 3.0578126948785256e-05, + "loss": 1.5652, + "step": 27610 + }, + { + "epoch": 0.43, + "learning_rate": 3.0566247892493674e-05, + "loss": 1.763, + "step": 27620 + }, + { + "epoch": 0.43, + "learning_rate": 3.055436751376199e-05, + "loss": 1.6584, + "step": 27630 + }, + { + "epoch": 0.43, + "learning_rate": 3.054248581541278e-05, + "loss": 1.8066, + "step": 27640 + }, + { + "epoch": 0.43, + "learning_rate": 3.053060280026892e-05, + "loss": 1.7909, + "step": 27650 + }, + { + "epoch": 0.43, + "learning_rate": 3.05187184711536e-05, + "loss": 1.8564, + "step": 27660 + }, + { + "epoch": 0.43, + "learning_rate": 3.050683283089032e-05, + "loss": 1.8838, + "step": 27670 + }, + { + "epoch": 0.43, + "learning_rate": 3.0494945882302896e-05, + "loss": 1.8863, + "step": 27680 + }, + { + "epoch": 0.43, + "learning_rate": 3.0483057628215445e-05, + "loss": 1.8339, + "step": 27690 + }, + { + "epoch": 0.43, + "learning_rate": 3.047116807145241e-05, + "loss": 1.7681, + "step": 27700 + }, + { + "epoch": 0.43, + "learning_rate": 3.0459277214838532e-05, + "loss": 1.7846, + "step": 27710 + }, + { + "epoch": 0.43, + "learning_rate": 3.0447385061198867e-05, + "loss": 1.7298, + "step": 27720 + }, + { + "epoch": 0.43, + "learning_rate": 3.0435491613358775e-05, + "loss": 1.6944, + "step": 27730 + }, + { + "epoch": 0.43, + "learning_rate": 3.0423596874143928e-05, + "loss": 1.7725, + "step": 27740 + }, + { + "epoch": 0.43, + "learning_rate": 3.0411700846380294e-05, + "loss": 1.5585, + "step": 27750 + }, + { + "epoch": 0.43, + "learning_rate": 3.0399803532894155e-05, + "loss": 1.8569, + "step": 27760 + }, + { + "epoch": 0.43, + "learning_rate": 3.0387904936512107e-05, + "loss": 1.7329, + "step": 27770 + }, + { + "epoch": 0.43, + "learning_rate": 3.037600506006103e-05, + "loss": 1.6108, + "step": 27780 + }, + { + "epoch": 0.43, + "learning_rate": 3.036410390636813e-05, + "loss": 1.6965, + "step": 27790 + }, + { + "epoch": 0.43, + "learning_rate": 3.0352201478260904e-05, + "loss": 1.8549, + "step": 27800 + }, + { + "epoch": 0.43, + "learning_rate": 3.0340297778567152e-05, + "loss": 1.6424, + "step": 27810 + }, + { + "epoch": 0.43, + "learning_rate": 3.032839281011497e-05, + "loss": 1.5738, + "step": 27820 + }, + { + "epoch": 0.43, + "learning_rate": 3.0316486575732782e-05, + "loss": 1.5422, + "step": 27830 + }, + { + "epoch": 0.43, + "learning_rate": 3.0304579078249284e-05, + "loss": 1.5878, + "step": 27840 + }, + { + "epoch": 0.43, + "learning_rate": 3.0292670320493483e-05, + "loss": 1.5613, + "step": 27850 + }, + { + "epoch": 0.43, + "learning_rate": 3.0280760305294686e-05, + "loss": 1.5252, + "step": 27860 + }, + { + "epoch": 0.43, + "learning_rate": 3.0268849035482493e-05, + "loss": 1.6483, + "step": 27870 + }, + { + "epoch": 0.43, + "learning_rate": 3.0256936513886812e-05, + "loss": 1.6335, + "step": 27880 + }, + { + "epoch": 0.43, + "learning_rate": 3.024502274333785e-05, + "loss": 1.7424, + "step": 27890 + }, + { + "epoch": 0.43, + "learning_rate": 3.0233107726666092e-05, + "loss": 1.8983, + "step": 27900 + }, + { + "epoch": 0.43, + "learning_rate": 3.0221191466702337e-05, + "loss": 1.688, + "step": 27910 + }, + { + "epoch": 0.43, + "learning_rate": 3.0209273966277675e-05, + "loss": 1.7073, + "step": 27920 + }, + { + "epoch": 0.43, + "learning_rate": 3.019735522822348e-05, + "loss": 1.7371, + "step": 27930 + }, + { + "epoch": 0.43, + "learning_rate": 3.018543525537144e-05, + "loss": 1.7472, + "step": 27940 + }, + { + "epoch": 0.43, + "learning_rate": 3.0173514050553518e-05, + "loss": 1.698, + "step": 27950 + }, + { + "epoch": 0.43, + "learning_rate": 3.0161591616601982e-05, + "loss": 1.6788, + "step": 27960 + }, + { + "epoch": 0.43, + "learning_rate": 3.0149667956349393e-05, + "loss": 1.5568, + "step": 27970 + }, + { + "epoch": 0.43, + "learning_rate": 3.0137743072628578e-05, + "loss": 1.4975, + "step": 27980 + }, + { + "epoch": 0.43, + "learning_rate": 3.0125816968272692e-05, + "loss": 1.4826, + "step": 27990 + }, + { + "epoch": 0.43, + "learning_rate": 3.0113889646115156e-05, + "loss": 1.6935, + "step": 28000 + }, + { + "epoch": 0.43, + "learning_rate": 3.0101961108989685e-05, + "loss": 1.8728, + "step": 28010 + }, + { + "epoch": 0.43, + "learning_rate": 3.0090031359730288e-05, + "loss": 2.0207, + "step": 28020 + }, + { + "epoch": 0.43, + "learning_rate": 3.0078100401171256e-05, + "loss": 1.8829, + "step": 28030 + }, + { + "epoch": 0.44, + "learning_rate": 3.0066168236147168e-05, + "loss": 1.6192, + "step": 28040 + }, + { + "epoch": 0.44, + "learning_rate": 3.0054234867492893e-05, + "loss": 1.4405, + "step": 28050 + }, + { + "epoch": 0.44, + "learning_rate": 3.0042300298043584e-05, + "loss": 1.4945, + "step": 28060 + }, + { + "epoch": 0.44, + "learning_rate": 3.003036453063469e-05, + "loss": 1.7648, + "step": 28070 + }, + { + "epoch": 0.44, + "learning_rate": 3.0018427568101916e-05, + "loss": 1.7953, + "step": 28080 + }, + { + "epoch": 0.44, + "learning_rate": 3.0006489413281274e-05, + "loss": 1.6217, + "step": 28090 + }, + { + "epoch": 0.44, + "learning_rate": 2.999455006900907e-05, + "loss": 1.7079, + "step": 28100 + }, + { + "epoch": 0.44, + "learning_rate": 2.9982609538121852e-05, + "loss": 1.7493, + "step": 28110 + }, + { + "epoch": 0.44, + "learning_rate": 2.9970667823456487e-05, + "loss": 1.738, + "step": 28120 + }, + { + "epoch": 0.44, + "learning_rate": 2.9958724927850116e-05, + "loss": 1.7836, + "step": 28130 + }, + { + "epoch": 0.44, + "learning_rate": 2.9946780854140148e-05, + "loss": 1.7041, + "step": 28140 + }, + { + "epoch": 0.44, + "learning_rate": 2.993483560516428e-05, + "loss": 1.7077, + "step": 28150 + }, + { + "epoch": 0.44, + "learning_rate": 2.9922889183760495e-05, + "loss": 1.6833, + "step": 28160 + }, + { + "epoch": 0.44, + "learning_rate": 2.991094159276704e-05, + "loss": 1.7331, + "step": 28170 + }, + { + "epoch": 0.44, + "learning_rate": 2.9898992835022445e-05, + "loss": 1.6459, + "step": 28180 + }, + { + "epoch": 0.44, + "learning_rate": 2.988704291336552e-05, + "loss": 1.5785, + "step": 28190 + }, + { + "epoch": 0.44, + "learning_rate": 2.987509183063536e-05, + "loss": 1.6738, + "step": 28200 + }, + { + "epoch": 0.44, + "learning_rate": 2.986313958967132e-05, + "loss": 1.4948, + "step": 28210 + }, + { + "epoch": 0.44, + "learning_rate": 2.985118619331303e-05, + "loss": 1.6859, + "step": 28220 + }, + { + "epoch": 0.44, + "learning_rate": 2.9839231644400422e-05, + "loss": 1.5797, + "step": 28230 + }, + { + "epoch": 0.44, + "learning_rate": 2.9827275945773658e-05, + "loss": 1.428, + "step": 28240 + }, + { + "epoch": 0.44, + "learning_rate": 2.9815319100273208e-05, + "loss": 1.3978, + "step": 28250 + }, + { + "epoch": 0.44, + "learning_rate": 2.9803361110739807e-05, + "loss": 1.405, + "step": 28260 + }, + { + "epoch": 0.44, + "learning_rate": 2.979140198001445e-05, + "loss": 1.4238, + "step": 28270 + }, + { + "epoch": 0.44, + "learning_rate": 2.9779441710938406e-05, + "loss": 1.4123, + "step": 28280 + }, + { + "epoch": 0.44, + "learning_rate": 2.976748030635323e-05, + "loss": 1.4245, + "step": 28290 + }, + { + "epoch": 0.44, + "learning_rate": 2.9755517769100732e-05, + "loss": 1.4344, + "step": 28300 + }, + { + "epoch": 0.44, + "learning_rate": 2.9743554102022993e-05, + "loss": 1.4337, + "step": 28310 + }, + { + "epoch": 0.44, + "learning_rate": 2.9731589307962364e-05, + "loss": 1.4068, + "step": 28320 + }, + { + "epoch": 0.44, + "learning_rate": 2.971962338976147e-05, + "loss": 1.5998, + "step": 28330 + }, + { + "epoch": 0.44, + "learning_rate": 2.970765635026319e-05, + "loss": 1.7379, + "step": 28340 + }, + { + "epoch": 0.44, + "learning_rate": 2.9695688192310684e-05, + "loss": 1.7453, + "step": 28350 + }, + { + "epoch": 0.44, + "learning_rate": 2.9683718918747365e-05, + "loss": 1.6703, + "step": 28360 + }, + { + "epoch": 0.44, + "learning_rate": 2.9671748532416922e-05, + "loss": 1.761, + "step": 28370 + }, + { + "epoch": 0.44, + "learning_rate": 2.9659777036163293e-05, + "loss": 1.7083, + "step": 28380 + }, + { + "epoch": 0.44, + "learning_rate": 2.96478044328307e-05, + "loss": 1.7116, + "step": 28390 + }, + { + "epoch": 0.44, + "learning_rate": 2.9635830725263613e-05, + "loss": 1.6249, + "step": 28400 + }, + { + "epoch": 0.44, + "learning_rate": 2.962385591630677e-05, + "loss": 1.5438, + "step": 28410 + }, + { + "epoch": 0.44, + "learning_rate": 2.9611880008805164e-05, + "loss": 1.5816, + "step": 28420 + }, + { + "epoch": 0.44, + "learning_rate": 2.9599903005604067e-05, + "loss": 1.7611, + "step": 28430 + }, + { + "epoch": 0.44, + "learning_rate": 2.9587924909548987e-05, + "loss": 1.719, + "step": 28440 + }, + { + "epoch": 0.44, + "learning_rate": 2.957594572348571e-05, + "loss": 1.687, + "step": 28450 + }, + { + "epoch": 0.44, + "learning_rate": 2.9563965450260273e-05, + "loss": 1.643, + "step": 28460 + }, + { + "epoch": 0.44, + "learning_rate": 2.955198409271897e-05, + "loss": 1.6359, + "step": 28470 + }, + { + "epoch": 0.44, + "learning_rate": 2.9540001653708354e-05, + "loss": 1.6959, + "step": 28480 + }, + { + "epoch": 0.44, + "learning_rate": 2.952801813607524e-05, + "loss": 1.8389, + "step": 28490 + }, + { + "epoch": 0.44, + "learning_rate": 2.9516033542666705e-05, + "loss": 1.6751, + "step": 28500 + }, + { + "epoch": 0.44, + "learning_rate": 2.9504047876330054e-05, + "loss": 1.5968, + "step": 28510 + }, + { + "epoch": 0.44, + "learning_rate": 2.9492061139912873e-05, + "loss": 1.5431, + "step": 28520 + }, + { + "epoch": 0.44, + "learning_rate": 2.948007333626301e-05, + "loss": 1.9211, + "step": 28530 + }, + { + "epoch": 0.44, + "learning_rate": 2.9468084468228524e-05, + "loss": 1.8268, + "step": 28540 + }, + { + "epoch": 0.44, + "learning_rate": 2.9456094538657763e-05, + "loss": 1.6954, + "step": 28550 + }, + { + "epoch": 0.44, + "learning_rate": 2.9444103550399332e-05, + "loss": 1.7823, + "step": 28560 + }, + { + "epoch": 0.44, + "learning_rate": 2.9432111506302058e-05, + "loss": 1.9149, + "step": 28570 + }, + { + "epoch": 0.44, + "learning_rate": 2.9420118409215036e-05, + "loss": 1.8603, + "step": 28580 + }, + { + "epoch": 0.44, + "learning_rate": 2.940812426198762e-05, + "loss": 1.5809, + "step": 28590 + }, + { + "epoch": 0.44, + "learning_rate": 2.9396129067469387e-05, + "loss": 1.5678, + "step": 28600 + }, + { + "epoch": 0.44, + "learning_rate": 2.9384132828510186e-05, + "loss": 1.7516, + "step": 28610 + }, + { + "epoch": 0.44, + "learning_rate": 2.937213554796011e-05, + "loss": 1.7715, + "step": 28620 + }, + { + "epoch": 0.44, + "learning_rate": 2.9360137228669505e-05, + "loss": 1.7714, + "step": 28630 + }, + { + "epoch": 0.44, + "learning_rate": 2.9348137873488936e-05, + "loss": 1.6796, + "step": 28640 + }, + { + "epoch": 0.44, + "learning_rate": 2.9336137485269238e-05, + "loss": 1.6385, + "step": 28650 + }, + { + "epoch": 0.44, + "learning_rate": 2.9324136066861496e-05, + "loss": 1.648, + "step": 28660 + }, + { + "epoch": 0.44, + "learning_rate": 2.9312133621117023e-05, + "loss": 1.6922, + "step": 28670 + }, + { + "epoch": 0.44, + "learning_rate": 2.930013015088739e-05, + "loss": 1.7492, + "step": 28680 + }, + { + "epoch": 0.45, + "learning_rate": 2.92881256590244e-05, + "loss": 1.8912, + "step": 28690 + }, + { + "epoch": 0.45, + "learning_rate": 2.9276120148380105e-05, + "loss": 1.6817, + "step": 28700 + }, + { + "epoch": 0.45, + "learning_rate": 2.92641136218068e-05, + "loss": 1.6823, + "step": 28710 + }, + { + "epoch": 0.45, + "learning_rate": 2.9252106082157016e-05, + "loss": 1.5804, + "step": 28720 + }, + { + "epoch": 0.45, + "learning_rate": 2.9240097532283527e-05, + "loss": 1.602, + "step": 28730 + }, + { + "epoch": 0.45, + "learning_rate": 2.9228087975039348e-05, + "loss": 1.5708, + "step": 28740 + }, + { + "epoch": 0.45, + "learning_rate": 2.9216077413277736e-05, + "loss": 1.52, + "step": 28750 + }, + { + "epoch": 0.45, + "learning_rate": 2.9204065849852186e-05, + "loss": 1.4574, + "step": 28760 + }, + { + "epoch": 0.45, + "learning_rate": 2.9192053287616423e-05, + "loss": 1.494, + "step": 28770 + }, + { + "epoch": 0.45, + "learning_rate": 2.9180039729424413e-05, + "loss": 1.3531, + "step": 28780 + }, + { + "epoch": 0.45, + "learning_rate": 2.9168025178130376e-05, + "loss": 1.316, + "step": 28790 + }, + { + "epoch": 0.45, + "learning_rate": 2.915600963658873e-05, + "loss": 1.295, + "step": 28800 + }, + { + "epoch": 0.45, + "learning_rate": 2.9143993107654166e-05, + "loss": 1.3081, + "step": 28810 + }, + { + "epoch": 0.45, + "learning_rate": 2.9131975594181595e-05, + "loss": 1.8561, + "step": 28820 + }, + { + "epoch": 0.45, + "learning_rate": 2.9119957099026152e-05, + "loss": 1.6407, + "step": 28830 + }, + { + "epoch": 0.45, + "learning_rate": 2.9107937625043217e-05, + "loss": 1.6726, + "step": 28840 + }, + { + "epoch": 0.45, + "learning_rate": 2.9095917175088412e-05, + "loss": 1.9138, + "step": 28850 + }, + { + "epoch": 0.45, + "learning_rate": 2.908389575201757e-05, + "loss": 1.5129, + "step": 28860 + }, + { + "epoch": 0.45, + "learning_rate": 2.9071873358686762e-05, + "loss": 1.4201, + "step": 28870 + }, + { + "epoch": 0.45, + "learning_rate": 2.9059849997952294e-05, + "loss": 1.3606, + "step": 28880 + }, + { + "epoch": 0.45, + "learning_rate": 2.904782567267071e-05, + "loss": 1.3055, + "step": 28890 + }, + { + "epoch": 0.45, + "learning_rate": 2.9035800385698754e-05, + "loss": 1.2525, + "step": 28900 + }, + { + "epoch": 0.45, + "learning_rate": 2.9023774139893424e-05, + "loss": 1.263, + "step": 28910 + }, + { + "epoch": 0.45, + "learning_rate": 2.9011746938111945e-05, + "loss": 1.2766, + "step": 28920 + }, + { + "epoch": 0.45, + "learning_rate": 2.899971878321176e-05, + "loss": 1.7647, + "step": 28930 + }, + { + "epoch": 0.45, + "learning_rate": 2.898768967805054e-05, + "loss": 1.7972, + "step": 28940 + }, + { + "epoch": 0.45, + "learning_rate": 2.897565962548618e-05, + "loss": 1.8002, + "step": 28950 + }, + { + "epoch": 0.45, + "learning_rate": 2.8963628628376815e-05, + "loss": 1.7473, + "step": 28960 + }, + { + "epoch": 0.45, + "learning_rate": 2.8951596689580786e-05, + "loss": 1.7605, + "step": 28970 + }, + { + "epoch": 0.45, + "learning_rate": 2.893956381195666e-05, + "loss": 1.742, + "step": 28980 + }, + { + "epoch": 0.45, + "learning_rate": 2.892752999836324e-05, + "loss": 1.7703, + "step": 28990 + }, + { + "epoch": 0.45, + "learning_rate": 2.8915495251659537e-05, + "loss": 1.7393, + "step": 29000 + }, + { + "epoch": 0.45, + "learning_rate": 2.890345957470479e-05, + "loss": 1.6704, + "step": 29010 + }, + { + "epoch": 0.45, + "learning_rate": 2.8891422970358467e-05, + "loss": 1.6492, + "step": 29020 + }, + { + "epoch": 0.45, + "learning_rate": 2.8879385441480232e-05, + "loss": 1.578, + "step": 29030 + }, + { + "epoch": 0.45, + "learning_rate": 2.8867346990930004e-05, + "loss": 1.593, + "step": 29040 + }, + { + "epoch": 0.45, + "learning_rate": 2.8855307621567895e-05, + "loss": 1.6041, + "step": 29050 + }, + { + "epoch": 0.45, + "learning_rate": 2.8843267336254233e-05, + "loss": 1.6087, + "step": 29060 + }, + { + "epoch": 0.45, + "learning_rate": 2.8831226137849578e-05, + "loss": 1.7016, + "step": 29070 + }, + { + "epoch": 0.45, + "learning_rate": 2.88191840292147e-05, + "loss": 1.6694, + "step": 29080 + }, + { + "epoch": 0.45, + "learning_rate": 2.8807141013210596e-05, + "loss": 1.8301, + "step": 29090 + }, + { + "epoch": 0.45, + "learning_rate": 2.879509709269846e-05, + "loss": 2.0191, + "step": 29100 + }, + { + "epoch": 0.45, + "learning_rate": 2.8783052270539712e-05, + "loss": 1.6776, + "step": 29110 + }, + { + "epoch": 0.45, + "learning_rate": 2.8771006549595985e-05, + "loss": 1.7866, + "step": 29120 + }, + { + "epoch": 0.45, + "learning_rate": 2.8758959932729125e-05, + "loss": 1.9466, + "step": 29130 + }, + { + "epoch": 0.45, + "learning_rate": 2.8746912422801192e-05, + "loss": 1.7838, + "step": 29140 + }, + { + "epoch": 0.45, + "learning_rate": 2.8734864022674452e-05, + "loss": 1.7466, + "step": 29150 + }, + { + "epoch": 0.45, + "learning_rate": 2.8722814735211395e-05, + "loss": 1.6114, + "step": 29160 + }, + { + "epoch": 0.45, + "learning_rate": 2.8710764563274712e-05, + "loss": 1.4811, + "step": 29170 + }, + { + "epoch": 0.45, + "learning_rate": 2.8698713509727303e-05, + "loss": 1.5204, + "step": 29180 + }, + { + "epoch": 0.45, + "learning_rate": 2.868666157743229e-05, + "loss": 1.7502, + "step": 29190 + }, + { + "epoch": 0.45, + "learning_rate": 2.867460876925298e-05, + "loss": 1.3763, + "step": 29200 + }, + { + "epoch": 0.45, + "learning_rate": 2.8662555088052913e-05, + "loss": 1.3838, + "step": 29210 + }, + { + "epoch": 0.45, + "learning_rate": 2.8650500536695833e-05, + "loss": 1.3707, + "step": 29220 + }, + { + "epoch": 0.45, + "learning_rate": 2.8638445118045666e-05, + "loss": 1.6074, + "step": 29230 + }, + { + "epoch": 0.45, + "learning_rate": 2.862638883496657e-05, + "loss": 1.8818, + "step": 29240 + }, + { + "epoch": 0.45, + "learning_rate": 2.861433169032291e-05, + "loss": 1.5339, + "step": 29250 + }, + { + "epoch": 0.45, + "learning_rate": 2.860227368697923e-05, + "loss": 1.4017, + "step": 29260 + }, + { + "epoch": 0.45, + "learning_rate": 2.8590214827800303e-05, + "loss": 1.3372, + "step": 29270 + }, + { + "epoch": 0.45, + "learning_rate": 2.85781551156511e-05, + "loss": 1.4301, + "step": 29280 + }, + { + "epoch": 0.45, + "learning_rate": 2.8566094553396783e-05, + "loss": 1.9031, + "step": 29290 + }, + { + "epoch": 0.45, + "learning_rate": 2.8554033143902724e-05, + "loss": 1.6892, + "step": 29300 + }, + { + "epoch": 0.45, + "learning_rate": 2.8541970890034503e-05, + "loss": 1.4832, + "step": 29310 + }, + { + "epoch": 0.45, + "learning_rate": 2.8529907794657883e-05, + "loss": 1.3524, + "step": 29320 + }, + { + "epoch": 0.46, + "learning_rate": 2.8517843860638855e-05, + "loss": 1.3083, + "step": 29330 + }, + { + "epoch": 0.46, + "learning_rate": 2.8505779090843575e-05, + "loss": 1.2614, + "step": 29340 + }, + { + "epoch": 0.46, + "learning_rate": 2.8493713488138433e-05, + "loss": 1.2801, + "step": 29350 + }, + { + "epoch": 0.46, + "learning_rate": 2.8481647055389982e-05, + "loss": 1.6889, + "step": 29360 + }, + { + "epoch": 0.46, + "learning_rate": 2.846957979546499e-05, + "loss": 1.7342, + "step": 29370 + }, + { + "epoch": 0.46, + "learning_rate": 2.8457511711230435e-05, + "loss": 1.7363, + "step": 29380 + }, + { + "epoch": 0.46, + "learning_rate": 2.8445442805553458e-05, + "loss": 1.8042, + "step": 29390 + }, + { + "epoch": 0.46, + "learning_rate": 2.8433373081301418e-05, + "loss": 1.6996, + "step": 29400 + }, + { + "epoch": 0.46, + "learning_rate": 2.842130254134187e-05, + "loss": 1.7642, + "step": 29410 + }, + { + "epoch": 0.46, + "learning_rate": 2.8409231188542562e-05, + "loss": 1.7229, + "step": 29420 + }, + { + "epoch": 0.46, + "learning_rate": 2.8397159025771412e-05, + "loss": 1.8722, + "step": 29430 + }, + { + "epoch": 0.46, + "learning_rate": 2.838508605589656e-05, + "loss": 1.8483, + "step": 29440 + }, + { + "epoch": 0.46, + "learning_rate": 2.837301228178632e-05, + "loss": 1.8254, + "step": 29450 + }, + { + "epoch": 0.46, + "learning_rate": 2.8360937706309203e-05, + "loss": 1.8034, + "step": 29460 + }, + { + "epoch": 0.46, + "learning_rate": 2.834886233233392e-05, + "loss": 1.887, + "step": 29470 + }, + { + "epoch": 0.46, + "learning_rate": 2.8336786162729355e-05, + "loss": 1.9504, + "step": 29480 + }, + { + "epoch": 0.46, + "learning_rate": 2.832470920036459e-05, + "loss": 1.8102, + "step": 29490 + }, + { + "epoch": 0.46, + "learning_rate": 2.8312631448108888e-05, + "loss": 1.6279, + "step": 29500 + }, + { + "epoch": 0.46, + "learning_rate": 2.830055290883172e-05, + "loss": 1.8089, + "step": 29510 + }, + { + "epoch": 0.46, + "learning_rate": 2.8288473585402703e-05, + "loss": 1.6192, + "step": 29520 + }, + { + "epoch": 0.46, + "learning_rate": 2.827639348069169e-05, + "loss": 1.5336, + "step": 29530 + }, + { + "epoch": 0.46, + "learning_rate": 2.8264312597568675e-05, + "loss": 1.8823, + "step": 29540 + }, + { + "epoch": 0.46, + "learning_rate": 2.825223093890389e-05, + "loss": 1.748, + "step": 29550 + }, + { + "epoch": 0.46, + "learning_rate": 2.8240148507567687e-05, + "loss": 1.7695, + "step": 29560 + }, + { + "epoch": 0.46, + "learning_rate": 2.8228065306430644e-05, + "loss": 1.8945, + "step": 29570 + }, + { + "epoch": 0.46, + "learning_rate": 2.8215981338363523e-05, + "loss": 1.889, + "step": 29580 + }, + { + "epoch": 0.46, + "learning_rate": 2.820389660623724e-05, + "loss": 1.8328, + "step": 29590 + }, + { + "epoch": 0.46, + "learning_rate": 2.8191811112922916e-05, + "loss": 1.7398, + "step": 29600 + }, + { + "epoch": 0.46, + "learning_rate": 2.8179724861291852e-05, + "loss": 1.7906, + "step": 29610 + }, + { + "epoch": 0.46, + "learning_rate": 2.8167637854215518e-05, + "loss": 1.5268, + "step": 29620 + }, + { + "epoch": 0.46, + "learning_rate": 2.8155550094565562e-05, + "loss": 1.7252, + "step": 29630 + }, + { + "epoch": 0.46, + "learning_rate": 2.8143461585213832e-05, + "loss": 1.7338, + "step": 29640 + }, + { + "epoch": 0.46, + "learning_rate": 2.8131372329032333e-05, + "loss": 1.707, + "step": 29650 + }, + { + "epoch": 0.46, + "learning_rate": 2.8119282328893255e-05, + "loss": 1.6499, + "step": 29660 + }, + { + "epoch": 0.46, + "learning_rate": 2.810719158766896e-05, + "loss": 1.7669, + "step": 29670 + }, + { + "epoch": 0.46, + "learning_rate": 2.8095100108231998e-05, + "loss": 1.5605, + "step": 29680 + }, + { + "epoch": 0.46, + "learning_rate": 2.8083007893455077e-05, + "loss": 1.5086, + "step": 29690 + }, + { + "epoch": 0.46, + "learning_rate": 2.807091494621109e-05, + "loss": 1.4872, + "step": 29700 + }, + { + "epoch": 0.46, + "learning_rate": 2.805882126937312e-05, + "loss": 1.46, + "step": 29710 + }, + { + "epoch": 0.46, + "learning_rate": 2.8046726865814383e-05, + "loss": 1.4358, + "step": 29720 + }, + { + "epoch": 0.46, + "learning_rate": 2.8034631738408296e-05, + "loss": 1.5663, + "step": 29730 + }, + { + "epoch": 0.46, + "learning_rate": 2.8022535890028462e-05, + "loss": 1.8098, + "step": 29740 + }, + { + "epoch": 0.46, + "learning_rate": 2.801043932354861e-05, + "loss": 1.8617, + "step": 29750 + }, + { + "epoch": 0.46, + "learning_rate": 2.7998342041842683e-05, + "loss": 1.836, + "step": 29760 + }, + { + "epoch": 0.46, + "learning_rate": 2.798624404778477e-05, + "loss": 1.7425, + "step": 29770 + }, + { + "epoch": 0.46, + "learning_rate": 2.7974145344249142e-05, + "loss": 1.7579, + "step": 29780 + }, + { + "epoch": 0.46, + "learning_rate": 2.7962045934110225e-05, + "loss": 1.7358, + "step": 29790 + }, + { + "epoch": 0.46, + "learning_rate": 2.7949945820242628e-05, + "loss": 1.89, + "step": 29800 + }, + { + "epoch": 0.46, + "learning_rate": 2.7937845005521125e-05, + "loss": 1.8189, + "step": 29810 + }, + { + "epoch": 0.46, + "learning_rate": 2.7925743492820632e-05, + "loss": 1.6827, + "step": 29820 + }, + { + "epoch": 0.46, + "learning_rate": 2.791364128501627e-05, + "loss": 1.807, + "step": 29830 + }, + { + "epoch": 0.46, + "learning_rate": 2.7901538384983296e-05, + "loss": 1.6965, + "step": 29840 + }, + { + "epoch": 0.46, + "learning_rate": 2.788943479559713e-05, + "loss": 2.0146, + "step": 29850 + }, + { + "epoch": 0.46, + "learning_rate": 2.7877330519733386e-05, + "loss": 1.9342, + "step": 29860 + }, + { + "epoch": 0.46, + "learning_rate": 2.786522556026781e-05, + "loss": 1.8879, + "step": 29870 + }, + { + "epoch": 0.46, + "learning_rate": 2.785311992007633e-05, + "loss": 1.926, + "step": 29880 + }, + { + "epoch": 0.46, + "learning_rate": 2.7841013602035022e-05, + "loss": 1.7062, + "step": 29890 + }, + { + "epoch": 0.46, + "learning_rate": 2.7828906609020128e-05, + "loss": 1.4731, + "step": 29900 + }, + { + "epoch": 0.46, + "learning_rate": 2.7816798943908057e-05, + "loss": 1.3973, + "step": 29910 + }, + { + "epoch": 0.46, + "learning_rate": 2.7804690609575362e-05, + "loss": 1.6483, + "step": 29920 + }, + { + "epoch": 0.46, + "learning_rate": 2.7792581608898772e-05, + "loss": 1.5568, + "step": 29930 + }, + { + "epoch": 0.46, + "learning_rate": 2.7780471944755166e-05, + "loss": 1.7068, + "step": 29940 + }, + { + "epoch": 0.46, + "learning_rate": 2.7768361620021578e-05, + "loss": 1.7387, + "step": 29950 + }, + { + "epoch": 0.46, + "learning_rate": 2.775625063757521e-05, + "loss": 1.6133, + "step": 29960 + }, + { + "epoch": 0.46, + "learning_rate": 2.7744139000293413e-05, + "loss": 1.5842, + "step": 29970 + }, + { + "epoch": 0.47, + "learning_rate": 2.773202671105369e-05, + "loss": 1.5144, + "step": 29980 + }, + { + "epoch": 0.47, + "learning_rate": 2.7719913772733698e-05, + "loss": 1.7253, + "step": 29990 + }, + { + "epoch": 0.47, + "learning_rate": 2.7707800188211252e-05, + "loss": 2.3502, + "step": 30000 + }, + { + "epoch": 0.47, + "eval_loss": 1.6172226667404175, + "eval_runtime": 82.1504, + "eval_samples_per_second": 36.518, + "eval_steps_per_second": 4.565, + "step": 30000 + }, + { + "epoch": 0.47, + "learning_rate": 2.7695685960364338e-05, + "loss": 1.8308, + "step": 30010 + }, + { + "epoch": 0.47, + "learning_rate": 2.768357109207106e-05, + "loss": 1.8016, + "step": 30020 + }, + { + "epoch": 0.47, + "learning_rate": 2.7671455586209695e-05, + "loss": 1.7465, + "step": 30030 + }, + { + "epoch": 0.47, + "learning_rate": 2.765933944565868e-05, + "loss": 1.6972, + "step": 30040 + }, + { + "epoch": 0.47, + "learning_rate": 2.7647222673296575e-05, + "loss": 1.5448, + "step": 30050 + }, + { + "epoch": 0.47, + "learning_rate": 2.7635105272002116e-05, + "loss": 1.8143, + "step": 30060 + }, + { + "epoch": 0.47, + "learning_rate": 2.7622987244654185e-05, + "loss": 1.8477, + "step": 30070 + }, + { + "epoch": 0.47, + "learning_rate": 2.761086859413179e-05, + "loss": 1.8495, + "step": 30080 + }, + { + "epoch": 0.47, + "learning_rate": 2.7598749323314117e-05, + "loss": 1.9997, + "step": 30090 + }, + { + "epoch": 0.47, + "learning_rate": 2.758662943508048e-05, + "loss": 1.6225, + "step": 30100 + }, + { + "epoch": 0.47, + "learning_rate": 2.757450893231035e-05, + "loss": 1.7279, + "step": 30110 + }, + { + "epoch": 0.47, + "learning_rate": 2.756238781788334e-05, + "loss": 1.6533, + "step": 30120 + }, + { + "epoch": 0.47, + "learning_rate": 2.7550266094679205e-05, + "loss": 1.5769, + "step": 30130 + }, + { + "epoch": 0.47, + "learning_rate": 2.7538143765577852e-05, + "loss": 1.6187, + "step": 30140 + }, + { + "epoch": 0.47, + "learning_rate": 2.752602083345932e-05, + "loss": 1.6829, + "step": 30150 + }, + { + "epoch": 0.47, + "learning_rate": 2.7513897301203808e-05, + "loss": 1.8075, + "step": 30160 + }, + { + "epoch": 0.47, + "learning_rate": 2.7501773171691648e-05, + "loss": 1.8943, + "step": 30170 + }, + { + "epoch": 0.47, + "learning_rate": 2.74896484478033e-05, + "loss": 1.5768, + "step": 30180 + }, + { + "epoch": 0.47, + "learning_rate": 2.747752313241939e-05, + "loss": 1.4907, + "step": 30190 + }, + { + "epoch": 0.47, + "learning_rate": 2.7465397228420688e-05, + "loss": 1.5801, + "step": 30200 + }, + { + "epoch": 0.47, + "learning_rate": 2.745327073868807e-05, + "loss": 1.7734, + "step": 30210 + }, + { + "epoch": 0.47, + "learning_rate": 2.744114366610257e-05, + "loss": 1.6839, + "step": 30220 + }, + { + "epoch": 0.47, + "learning_rate": 2.7429016013545376e-05, + "loss": 1.6181, + "step": 30230 + }, + { + "epoch": 0.47, + "learning_rate": 2.7416887783897798e-05, + "loss": 1.6177, + "step": 30240 + }, + { + "epoch": 0.47, + "learning_rate": 2.740475898004127e-05, + "loss": 1.6521, + "step": 30250 + }, + { + "epoch": 0.47, + "learning_rate": 2.7392629604857393e-05, + "loss": 1.7239, + "step": 30260 + }, + { + "epoch": 0.47, + "learning_rate": 2.7380499661227882e-05, + "loss": 1.6157, + "step": 30270 + }, + { + "epoch": 0.47, + "learning_rate": 2.7368369152034583e-05, + "loss": 1.6048, + "step": 30280 + }, + { + "epoch": 0.47, + "learning_rate": 2.7356238080159503e-05, + "loss": 1.7573, + "step": 30290 + }, + { + "epoch": 0.47, + "learning_rate": 2.7344106448484763e-05, + "loss": 1.6857, + "step": 30300 + }, + { + "epoch": 0.47, + "learning_rate": 2.7331974259892606e-05, + "loss": 1.5751, + "step": 30310 + }, + { + "epoch": 0.47, + "learning_rate": 2.7319841517265438e-05, + "loss": 1.7957, + "step": 30320 + }, + { + "epoch": 0.47, + "learning_rate": 2.7307708223485762e-05, + "loss": 1.7801, + "step": 30330 + }, + { + "epoch": 0.47, + "learning_rate": 2.7295574381436252e-05, + "loss": 1.7531, + "step": 30340 + }, + { + "epoch": 0.47, + "learning_rate": 2.7283439993999673e-05, + "loss": 1.6598, + "step": 30350 + }, + { + "epoch": 0.47, + "learning_rate": 2.7271305064058938e-05, + "loss": 1.6426, + "step": 30360 + }, + { + "epoch": 0.47, + "learning_rate": 2.7259169594497098e-05, + "loss": 1.5347, + "step": 30370 + }, + { + "epoch": 0.47, + "learning_rate": 2.7247033588197306e-05, + "loss": 1.7294, + "step": 30380 + }, + { + "epoch": 0.47, + "learning_rate": 2.723489704804287e-05, + "loss": 1.7253, + "step": 30390 + }, + { + "epoch": 0.47, + "learning_rate": 2.722275997691721e-05, + "loss": 1.6307, + "step": 30400 + }, + { + "epoch": 0.47, + "learning_rate": 2.7210622377703877e-05, + "loss": 1.6531, + "step": 30410 + }, + { + "epoch": 0.47, + "learning_rate": 2.7198484253286538e-05, + "loss": 1.5858, + "step": 30420 + }, + { + "epoch": 0.47, + "learning_rate": 2.7186345606548995e-05, + "loss": 1.5903, + "step": 30430 + }, + { + "epoch": 0.47, + "learning_rate": 2.717420644037518e-05, + "loss": 1.5994, + "step": 30440 + }, + { + "epoch": 0.47, + "learning_rate": 2.7162066757649134e-05, + "loss": 1.5711, + "step": 30450 + }, + { + "epoch": 0.47, + "learning_rate": 2.714992656125503e-05, + "loss": 1.557, + "step": 30460 + }, + { + "epoch": 0.47, + "learning_rate": 2.7137785854077153e-05, + "loss": 1.5776, + "step": 30470 + }, + { + "epoch": 0.47, + "learning_rate": 2.712564463899992e-05, + "loss": 1.5899, + "step": 30480 + }, + { + "epoch": 0.47, + "learning_rate": 2.7113502918907863e-05, + "loss": 1.7311, + "step": 30490 + }, + { + "epoch": 0.47, + "learning_rate": 2.7101360696685644e-05, + "loss": 1.6193, + "step": 30500 + }, + { + "epoch": 0.47, + "learning_rate": 2.7089217975218022e-05, + "loss": 1.5175, + "step": 30510 + }, + { + "epoch": 0.47, + "learning_rate": 2.7077074757389896e-05, + "loss": 1.496, + "step": 30520 + }, + { + "epoch": 0.47, + "learning_rate": 2.7064931046086277e-05, + "loss": 1.5115, + "step": 30530 + }, + { + "epoch": 0.47, + "learning_rate": 2.705278684419229e-05, + "loss": 1.4888, + "step": 30540 + }, + { + "epoch": 0.47, + "learning_rate": 2.7040642154593177e-05, + "loss": 1.7007, + "step": 30550 + }, + { + "epoch": 0.47, + "learning_rate": 2.7028496980174295e-05, + "loss": 1.7672, + "step": 30560 + }, + { + "epoch": 0.47, + "learning_rate": 2.7016351323821126e-05, + "loss": 1.6953, + "step": 30570 + }, + { + "epoch": 0.47, + "learning_rate": 2.7004205188419252e-05, + "loss": 1.4908, + "step": 30580 + }, + { + "epoch": 0.47, + "learning_rate": 2.6992058576854378e-05, + "loss": 1.4833, + "step": 30590 + }, + { + "epoch": 0.47, + "learning_rate": 2.697991149201232e-05, + "loss": 1.4356, + "step": 30600 + }, + { + "epoch": 0.47, + "learning_rate": 2.696776393677901e-05, + "loss": 1.4056, + "step": 30610 + }, + { + "epoch": 0.48, + "learning_rate": 2.6955615914040485e-05, + "loss": 1.3645, + "step": 30620 + }, + { + "epoch": 0.48, + "learning_rate": 2.6943467426682893e-05, + "loss": 1.3449, + "step": 30630 + }, + { + "epoch": 0.48, + "learning_rate": 2.6931318477592494e-05, + "loss": 1.5329, + "step": 30640 + }, + { + "epoch": 0.48, + "learning_rate": 2.6919169069655663e-05, + "loss": 2.0459, + "step": 30650 + }, + { + "epoch": 0.48, + "learning_rate": 2.690701920575889e-05, + "loss": 1.6377, + "step": 30660 + }, + { + "epoch": 0.48, + "learning_rate": 2.6894868888788742e-05, + "loss": 1.729, + "step": 30670 + }, + { + "epoch": 0.48, + "learning_rate": 2.6882718121631934e-05, + "loss": 1.7079, + "step": 30680 + }, + { + "epoch": 0.48, + "learning_rate": 2.6870566907175255e-05, + "loss": 1.7092, + "step": 30690 + }, + { + "epoch": 0.48, + "learning_rate": 2.685841524830563e-05, + "loss": 1.7776, + "step": 30700 + }, + { + "epoch": 0.48, + "learning_rate": 2.6846263147910055e-05, + "loss": 1.8686, + "step": 30710 + }, + { + "epoch": 0.48, + "learning_rate": 2.6834110608875667e-05, + "loss": 1.7686, + "step": 30720 + }, + { + "epoch": 0.48, + "learning_rate": 2.6821957634089678e-05, + "loss": 1.6347, + "step": 30730 + }, + { + "epoch": 0.48, + "learning_rate": 2.680980422643942e-05, + "loss": 1.5857, + "step": 30740 + }, + { + "epoch": 0.48, + "learning_rate": 2.679765038881233e-05, + "loss": 1.6671, + "step": 30750 + }, + { + "epoch": 0.48, + "learning_rate": 2.678549612409594e-05, + "loss": 1.6739, + "step": 30760 + }, + { + "epoch": 0.48, + "learning_rate": 2.6773341435177866e-05, + "loss": 1.5915, + "step": 30770 + }, + { + "epoch": 0.48, + "learning_rate": 2.676118632494586e-05, + "loss": 1.5745, + "step": 30780 + }, + { + "epoch": 0.48, + "learning_rate": 2.674903079628775e-05, + "loss": 1.7472, + "step": 30790 + }, + { + "epoch": 0.48, + "learning_rate": 2.6736874852091483e-05, + "loss": 1.8191, + "step": 30800 + }, + { + "epoch": 0.48, + "learning_rate": 2.672471849524507e-05, + "loss": 1.7328, + "step": 30810 + }, + { + "epoch": 0.48, + "learning_rate": 2.671256172863666e-05, + "loss": 1.7156, + "step": 30820 + }, + { + "epoch": 0.48, + "learning_rate": 2.6700404555154474e-05, + "loss": 1.8492, + "step": 30830 + }, + { + "epoch": 0.48, + "learning_rate": 2.6688246977686836e-05, + "loss": 1.8925, + "step": 30840 + }, + { + "epoch": 0.48, + "learning_rate": 2.6676088999122173e-05, + "loss": 1.5997, + "step": 30850 + }, + { + "epoch": 0.48, + "learning_rate": 2.6663930622348997e-05, + "loss": 1.58, + "step": 30860 + }, + { + "epoch": 0.48, + "learning_rate": 2.665177185025592e-05, + "loss": 1.7138, + "step": 30870 + }, + { + "epoch": 0.48, + "learning_rate": 2.6639612685731648e-05, + "loss": 1.7152, + "step": 30880 + }, + { + "epoch": 0.48, + "learning_rate": 2.6627453131664977e-05, + "loss": 1.7096, + "step": 30890 + }, + { + "epoch": 0.48, + "learning_rate": 2.6615293190944794e-05, + "loss": 1.6762, + "step": 30900 + }, + { + "epoch": 0.48, + "learning_rate": 2.660313286646009e-05, + "loss": 1.9552, + "step": 30910 + }, + { + "epoch": 0.48, + "learning_rate": 2.6590972161099937e-05, + "loss": 2.2446, + "step": 30920 + }, + { + "epoch": 0.48, + "learning_rate": 2.6578811077753495e-05, + "loss": 1.4388, + "step": 30930 + }, + { + "epoch": 0.48, + "learning_rate": 2.6566649619310025e-05, + "loss": 1.5694, + "step": 30940 + }, + { + "epoch": 0.48, + "learning_rate": 2.655448778865886e-05, + "loss": 1.7624, + "step": 30950 + }, + { + "epoch": 0.48, + "learning_rate": 2.6542325588689443e-05, + "loss": 1.5322, + "step": 30960 + }, + { + "epoch": 0.48, + "learning_rate": 2.6530163022291282e-05, + "loss": 1.5382, + "step": 30970 + }, + { + "epoch": 0.48, + "learning_rate": 2.651800009235399e-05, + "loss": 1.5375, + "step": 30980 + }, + { + "epoch": 0.48, + "learning_rate": 2.6505836801767264e-05, + "loss": 1.5104, + "step": 30990 + }, + { + "epoch": 0.48, + "learning_rate": 2.6493673153420868e-05, + "loss": 1.5047, + "step": 31000 + }, + { + "epoch": 0.48, + "learning_rate": 2.6481509150204676e-05, + "loss": 1.5781, + "step": 31010 + }, + { + "epoch": 0.48, + "learning_rate": 2.646934479500864e-05, + "loss": 1.7457, + "step": 31020 + }, + { + "epoch": 0.48, + "learning_rate": 2.6457180090722782e-05, + "loss": 1.6599, + "step": 31030 + }, + { + "epoch": 0.48, + "learning_rate": 2.644501504023722e-05, + "loss": 1.5761, + "step": 31040 + }, + { + "epoch": 0.48, + "learning_rate": 2.6432849646442148e-05, + "loss": 2.4015, + "step": 31050 + }, + { + "epoch": 0.48, + "learning_rate": 2.642068391222785e-05, + "loss": 2.4452, + "step": 31060 + }, + { + "epoch": 0.48, + "learning_rate": 2.6408517840484674e-05, + "loss": 1.6371, + "step": 31070 + }, + { + "epoch": 0.48, + "learning_rate": 2.6396351434103077e-05, + "loss": 1.6392, + "step": 31080 + }, + { + "epoch": 0.48, + "learning_rate": 2.638418469597357e-05, + "loss": 1.6121, + "step": 31090 + }, + { + "epoch": 0.48, + "learning_rate": 2.6372017628986735e-05, + "loss": 1.5775, + "step": 31100 + }, + { + "epoch": 0.48, + "learning_rate": 2.6359850236033267e-05, + "loss": 1.6328, + "step": 31110 + }, + { + "epoch": 0.48, + "learning_rate": 2.6347682520003914e-05, + "loss": 1.6165, + "step": 31120 + }, + { + "epoch": 0.48, + "learning_rate": 2.6335514483789502e-05, + "loss": 1.5576, + "step": 31130 + }, + { + "epoch": 0.48, + "learning_rate": 2.632334613028094e-05, + "loss": 1.9131, + "step": 31140 + }, + { + "epoch": 0.48, + "learning_rate": 2.6311177462369208e-05, + "loss": 1.7581, + "step": 31150 + }, + { + "epoch": 0.48, + "learning_rate": 2.629900848294537e-05, + "loss": 1.6805, + "step": 31160 + }, + { + "epoch": 0.48, + "learning_rate": 2.6286839194900544e-05, + "loss": 1.6619, + "step": 31170 + }, + { + "epoch": 0.48, + "learning_rate": 2.6274669601125933e-05, + "loss": 1.7099, + "step": 31180 + }, + { + "epoch": 0.48, + "learning_rate": 2.6262499704512822e-05, + "loss": 1.6765, + "step": 31190 + }, + { + "epoch": 0.48, + "learning_rate": 2.6250329507952554e-05, + "loss": 1.7046, + "step": 31200 + }, + { + "epoch": 0.48, + "learning_rate": 2.623815901433655e-05, + "loss": 1.6142, + "step": 31210 + }, + { + "epoch": 0.48, + "learning_rate": 2.62259882265563e-05, + "loss": 1.6231, + "step": 31220 + }, + { + "epoch": 0.48, + "learning_rate": 2.6213817147503357e-05, + "loss": 1.5973, + "step": 31230 + }, + { + "epoch": 0.48, + "learning_rate": 2.6201645780069355e-05, + "loss": 1.51, + "step": 31240 + }, + { + "epoch": 0.48, + "learning_rate": 2.6189474127145998e-05, + "loss": 1.4731, + "step": 31250 + }, + { + "epoch": 0.49, + "learning_rate": 2.6177302191625043e-05, + "loss": 1.5109, + "step": 31260 + }, + { + "epoch": 0.49, + "learning_rate": 2.616512997639832e-05, + "loss": 1.5098, + "step": 31270 + }, + { + "epoch": 0.49, + "learning_rate": 2.6152957484357733e-05, + "loss": 1.6014, + "step": 31280 + }, + { + "epoch": 0.49, + "learning_rate": 2.614078471839525e-05, + "loss": 1.6762, + "step": 31290 + }, + { + "epoch": 0.49, + "learning_rate": 2.6128611681402894e-05, + "loss": 1.5151, + "step": 31300 + }, + { + "epoch": 0.49, + "learning_rate": 2.611643837627276e-05, + "loss": 1.792, + "step": 31310 + }, + { + "epoch": 0.49, + "learning_rate": 2.610426480589701e-05, + "loss": 1.8181, + "step": 31320 + }, + { + "epoch": 0.49, + "learning_rate": 2.609209097316786e-05, + "loss": 1.8565, + "step": 31330 + }, + { + "epoch": 0.49, + "learning_rate": 2.607991688097759e-05, + "loss": 1.7751, + "step": 31340 + }, + { + "epoch": 0.49, + "learning_rate": 2.6067742532218552e-05, + "loss": 1.597, + "step": 31350 + }, + { + "epoch": 0.49, + "learning_rate": 2.6055567929783155e-05, + "loss": 1.7352, + "step": 31360 + }, + { + "epoch": 0.49, + "learning_rate": 2.604339307656385e-05, + "loss": 1.6425, + "step": 31370 + }, + { + "epoch": 0.49, + "learning_rate": 2.603121797545317e-05, + "loss": 1.5603, + "step": 31380 + }, + { + "epoch": 0.49, + "learning_rate": 2.6019042629343705e-05, + "loss": 1.9553, + "step": 31390 + }, + { + "epoch": 0.49, + "learning_rate": 2.600686704112809e-05, + "loss": 1.7989, + "step": 31400 + }, + { + "epoch": 0.49, + "learning_rate": 2.599469121369903e-05, + "loss": 1.7747, + "step": 31410 + }, + { + "epoch": 0.49, + "learning_rate": 2.598251514994929e-05, + "loss": 1.6601, + "step": 31420 + }, + { + "epoch": 0.49, + "learning_rate": 2.5970338852771665e-05, + "loss": 1.6134, + "step": 31430 + }, + { + "epoch": 0.49, + "learning_rate": 2.5958162325059025e-05, + "loss": 1.621, + "step": 31440 + }, + { + "epoch": 0.49, + "learning_rate": 2.594598556970432e-05, + "loss": 1.705, + "step": 31450 + }, + { + "epoch": 0.49, + "learning_rate": 2.593380858960049e-05, + "loss": 1.7126, + "step": 31460 + }, + { + "epoch": 0.49, + "learning_rate": 2.5921631387640587e-05, + "loss": 1.6731, + "step": 31470 + }, + { + "epoch": 0.49, + "learning_rate": 2.5909453966717694e-05, + "loss": 1.6747, + "step": 31480 + }, + { + "epoch": 0.49, + "learning_rate": 2.5897276329724947e-05, + "loss": 1.6363, + "step": 31490 + }, + { + "epoch": 0.49, + "learning_rate": 2.5885098479555525e-05, + "loss": 1.6252, + "step": 31500 + }, + { + "epoch": 0.49, + "learning_rate": 2.587292041910267e-05, + "loss": 1.6745, + "step": 31510 + }, + { + "epoch": 0.49, + "learning_rate": 2.5860742151259677e-05, + "loss": 1.6305, + "step": 31520 + }, + { + "epoch": 0.49, + "learning_rate": 2.5848563678919862e-05, + "loss": 1.5926, + "step": 31530 + }, + { + "epoch": 0.49, + "learning_rate": 2.583638500497663e-05, + "loss": 1.6396, + "step": 31540 + }, + { + "epoch": 0.49, + "learning_rate": 2.5824206132323408e-05, + "loss": 1.6165, + "step": 31550 + }, + { + "epoch": 0.49, + "learning_rate": 2.5812027063853677e-05, + "loss": 1.6309, + "step": 31560 + }, + { + "epoch": 0.49, + "learning_rate": 2.5799847802460964e-05, + "loss": 1.6165, + "step": 31570 + }, + { + "epoch": 0.49, + "learning_rate": 2.5787668351038835e-05, + "loss": 1.5766, + "step": 31580 + }, + { + "epoch": 0.49, + "learning_rate": 2.5775488712480928e-05, + "loss": 1.608, + "step": 31590 + }, + { + "epoch": 0.49, + "learning_rate": 2.5763308889680883e-05, + "loss": 1.6832, + "step": 31600 + }, + { + "epoch": 0.49, + "learning_rate": 2.5751128885532412e-05, + "loss": 1.6309, + "step": 31610 + }, + { + "epoch": 0.49, + "learning_rate": 2.573894870292927e-05, + "loss": 1.6565, + "step": 31620 + }, + { + "epoch": 0.49, + "learning_rate": 2.572676834476525e-05, + "loss": 1.8253, + "step": 31630 + }, + { + "epoch": 0.49, + "learning_rate": 2.5714587813934177e-05, + "loss": 1.822, + "step": 31640 + }, + { + "epoch": 0.49, + "learning_rate": 2.5702407113329934e-05, + "loss": 1.738, + "step": 31650 + }, + { + "epoch": 0.49, + "learning_rate": 2.5690226245846422e-05, + "loss": 1.806, + "step": 31660 + }, + { + "epoch": 0.49, + "learning_rate": 2.5678045214377612e-05, + "loss": 1.7034, + "step": 31670 + }, + { + "epoch": 0.49, + "learning_rate": 2.5665864021817488e-05, + "loss": 1.615, + "step": 31680 + }, + { + "epoch": 0.49, + "learning_rate": 2.565368267106008e-05, + "loss": 1.751, + "step": 31690 + }, + { + "epoch": 0.49, + "learning_rate": 2.564150116499946e-05, + "loss": 1.5873, + "step": 31700 + }, + { + "epoch": 0.49, + "learning_rate": 2.5629319506529742e-05, + "loss": 1.6959, + "step": 31710 + }, + { + "epoch": 0.49, + "learning_rate": 2.5617137698545063e-05, + "loss": 1.8869, + "step": 31720 + }, + { + "epoch": 0.49, + "learning_rate": 2.560495574393959e-05, + "loss": 1.7533, + "step": 31730 + }, + { + "epoch": 0.49, + "learning_rate": 2.559277364560755e-05, + "loss": 1.8836, + "step": 31740 + }, + { + "epoch": 0.49, + "learning_rate": 2.558059140644319e-05, + "loss": 1.6901, + "step": 31750 + }, + { + "epoch": 0.49, + "learning_rate": 2.5568409029340773e-05, + "loss": 1.4849, + "step": 31760 + }, + { + "epoch": 0.49, + "learning_rate": 2.555622651719463e-05, + "loss": 1.662, + "step": 31770 + }, + { + "epoch": 0.49, + "learning_rate": 2.5544043872899103e-05, + "loss": 1.9107, + "step": 31780 + }, + { + "epoch": 0.49, + "learning_rate": 2.5531861099348557e-05, + "loss": 1.6115, + "step": 31790 + }, + { + "epoch": 0.49, + "learning_rate": 2.5519678199437413e-05, + "loss": 1.6896, + "step": 31800 + }, + { + "epoch": 0.49, + "learning_rate": 2.5507495176060103e-05, + "loss": 1.7752, + "step": 31810 + }, + { + "epoch": 0.49, + "learning_rate": 2.5495312032111096e-05, + "loss": 1.7192, + "step": 31820 + }, + { + "epoch": 0.49, + "learning_rate": 2.5483128770484882e-05, + "loss": 1.918, + "step": 31830 + }, + { + "epoch": 0.49, + "learning_rate": 2.547094539407599e-05, + "loss": 1.8316, + "step": 31840 + }, + { + "epoch": 0.49, + "learning_rate": 2.5458761905778962e-05, + "loss": 1.6121, + "step": 31850 + }, + { + "epoch": 0.49, + "learning_rate": 2.5446578308488384e-05, + "loss": 1.7559, + "step": 31860 + }, + { + "epoch": 0.49, + "learning_rate": 2.543439460509885e-05, + "loss": 1.7316, + "step": 31870 + }, + { + "epoch": 0.49, + "learning_rate": 2.5422210798505003e-05, + "loss": 1.5897, + "step": 31880 + }, + { + "epoch": 0.49, + "learning_rate": 2.5410026891601487e-05, + "loss": 1.5622, + "step": 31890 + }, + { + "epoch": 0.49, + "learning_rate": 2.5397842887282968e-05, + "loss": 1.7648, + "step": 31900 + }, + { + "epoch": 0.5, + "learning_rate": 2.5385658788444168e-05, + "loss": 1.7416, + "step": 31910 + }, + { + "epoch": 0.5, + "learning_rate": 2.5373474597979785e-05, + "loss": 1.6322, + "step": 31920 + }, + { + "epoch": 0.5, + "learning_rate": 2.5361290318784577e-05, + "loss": 2.0612, + "step": 31930 + }, + { + "epoch": 0.5, + "learning_rate": 2.5349105953753306e-05, + "loss": 2.408, + "step": 31940 + }, + { + "epoch": 0.5, + "learning_rate": 2.5336921505780763e-05, + "loss": 2.0981, + "step": 31950 + }, + { + "epoch": 0.5, + "learning_rate": 2.5324736977761742e-05, + "loss": 1.9849, + "step": 31960 + }, + { + "epoch": 0.5, + "learning_rate": 2.5312552372591075e-05, + "loss": 1.7358, + "step": 31970 + }, + { + "epoch": 0.5, + "learning_rate": 2.5300367693163607e-05, + "loss": 1.7846, + "step": 31980 + }, + { + "epoch": 0.5, + "learning_rate": 2.528818294237419e-05, + "loss": 1.5975, + "step": 31990 + }, + { + "epoch": 0.5, + "learning_rate": 2.52759981231177e-05, + "loss": 1.6959, + "step": 32000 + }, + { + "epoch": 0.5, + "learning_rate": 2.5263813238289036e-05, + "loss": 1.6827, + "step": 32010 + }, + { + "epoch": 0.5, + "learning_rate": 2.5251628290783108e-05, + "loss": 1.633, + "step": 32020 + }, + { + "epoch": 0.5, + "learning_rate": 2.5239443283494835e-05, + "loss": 1.7342, + "step": 32030 + }, + { + "epoch": 0.5, + "learning_rate": 2.5227258219319154e-05, + "loss": 1.7013, + "step": 32040 + }, + { + "epoch": 0.5, + "learning_rate": 2.5215073101151033e-05, + "loss": 1.6876, + "step": 32050 + }, + { + "epoch": 0.5, + "learning_rate": 2.520288793188541e-05, + "loss": 1.7396, + "step": 32060 + }, + { + "epoch": 0.5, + "learning_rate": 2.5190702714417276e-05, + "loss": 1.8015, + "step": 32070 + }, + { + "epoch": 0.5, + "learning_rate": 2.517851745164162e-05, + "loss": 1.7347, + "step": 32080 + }, + { + "epoch": 0.5, + "learning_rate": 2.5166332146453437e-05, + "loss": 1.6833, + "step": 32090 + }, + { + "epoch": 0.5, + "learning_rate": 2.5154146801747736e-05, + "loss": 1.84, + "step": 32100 + }, + { + "epoch": 0.5, + "learning_rate": 2.5141961420419542e-05, + "loss": 1.5983, + "step": 32110 + }, + { + "epoch": 0.5, + "learning_rate": 2.5129776005363865e-05, + "loss": 1.7959, + "step": 32120 + }, + { + "epoch": 0.5, + "learning_rate": 2.5117590559475752e-05, + "loss": 2.1036, + "step": 32130 + }, + { + "epoch": 0.5, + "learning_rate": 2.5105405085650246e-05, + "loss": 1.8333, + "step": 32140 + }, + { + "epoch": 0.5, + "learning_rate": 2.509321958678239e-05, + "loss": 1.7488, + "step": 32150 + }, + { + "epoch": 0.5, + "learning_rate": 2.5081034065767245e-05, + "loss": 1.5693, + "step": 32160 + }, + { + "epoch": 0.5, + "learning_rate": 2.5068848525499865e-05, + "loss": 1.7141, + "step": 32170 + }, + { + "epoch": 0.5, + "learning_rate": 2.5056662968875316e-05, + "loss": 1.8203, + "step": 32180 + }, + { + "epoch": 0.5, + "learning_rate": 2.5044477398788672e-05, + "loss": 1.8285, + "step": 32190 + }, + { + "epoch": 0.5, + "learning_rate": 2.5032291818134995e-05, + "loss": 1.6109, + "step": 32200 + }, + { + "epoch": 0.5, + "learning_rate": 2.5020106229809377e-05, + "loss": 1.7752, + "step": 32210 + }, + { + "epoch": 0.5, + "learning_rate": 2.5007920636706867e-05, + "loss": 1.5972, + "step": 32220 + }, + { + "epoch": 0.5, + "learning_rate": 2.4995735041722567e-05, + "loss": 1.6089, + "step": 32230 + }, + { + "epoch": 0.5, + "learning_rate": 2.4983549447751533e-05, + "loss": 1.7108, + "step": 32240 + }, + { + "epoch": 0.5, + "learning_rate": 2.4971363857688864e-05, + "loss": 1.7389, + "step": 32250 + }, + { + "epoch": 0.5, + "learning_rate": 2.4959178274429625e-05, + "loss": 1.6557, + "step": 32260 + }, + { + "epoch": 0.5, + "learning_rate": 2.4946992700868886e-05, + "loss": 1.6699, + "step": 32270 + }, + { + "epoch": 0.5, + "learning_rate": 2.4934807139901728e-05, + "loss": 1.536, + "step": 32280 + }, + { + "epoch": 0.5, + "learning_rate": 2.4922621594423224e-05, + "loss": 1.7225, + "step": 32290 + }, + { + "epoch": 0.5, + "learning_rate": 2.491043606732842e-05, + "loss": 1.4861, + "step": 32300 + }, + { + "epoch": 0.5, + "learning_rate": 2.48982505615124e-05, + "loss": 1.4572, + "step": 32310 + }, + { + "epoch": 0.5, + "learning_rate": 2.4886065079870202e-05, + "loss": 1.4441, + "step": 32320 + }, + { + "epoch": 0.5, + "learning_rate": 2.4873879625296894e-05, + "loss": 1.4619, + "step": 32330 + }, + { + "epoch": 0.5, + "learning_rate": 2.486169420068751e-05, + "loss": 1.6902, + "step": 32340 + }, + { + "epoch": 0.5, + "learning_rate": 2.484950880893709e-05, + "loss": 1.9447, + "step": 32350 + }, + { + "epoch": 0.5, + "learning_rate": 2.483732345294066e-05, + "loss": 1.6986, + "step": 32360 + }, + { + "epoch": 0.5, + "learning_rate": 2.482513813559325e-05, + "loss": 1.7045, + "step": 32370 + }, + { + "epoch": 0.5, + "learning_rate": 2.481295285978985e-05, + "loss": 1.6438, + "step": 32380 + }, + { + "epoch": 0.5, + "learning_rate": 2.480076762842549e-05, + "loss": 1.5444, + "step": 32390 + }, + { + "epoch": 0.5, + "learning_rate": 2.478858244439515e-05, + "loss": 1.5298, + "step": 32400 + }, + { + "epoch": 0.5, + "learning_rate": 2.4776397310593792e-05, + "loss": 1.5227, + "step": 32410 + }, + { + "epoch": 0.5, + "learning_rate": 2.4764212229916413e-05, + "loss": 1.5614, + "step": 32420 + }, + { + "epoch": 0.5, + "learning_rate": 2.4752027205257957e-05, + "loss": 1.6088, + "step": 32430 + }, + { + "epoch": 0.5, + "learning_rate": 2.473984223951335e-05, + "loss": 1.6516, + "step": 32440 + }, + { + "epoch": 0.5, + "learning_rate": 2.4727657335577546e-05, + "loss": 1.7938, + "step": 32450 + }, + { + "epoch": 0.5, + "learning_rate": 2.4715472496345445e-05, + "loss": 1.7171, + "step": 32460 + }, + { + "epoch": 0.5, + "learning_rate": 2.470328772471194e-05, + "loss": 1.6899, + "step": 32470 + }, + { + "epoch": 0.5, + "learning_rate": 2.4691103023571927e-05, + "loss": 1.6393, + "step": 32480 + }, + { + "epoch": 0.5, + "learning_rate": 2.4678918395820256e-05, + "loss": 1.5695, + "step": 32490 + }, + { + "epoch": 0.5, + "learning_rate": 2.4666733844351786e-05, + "loss": 1.6906, + "step": 32500 + }, + { + "epoch": 0.5, + "learning_rate": 2.465454937206134e-05, + "loss": 1.767, + "step": 32510 + }, + { + "epoch": 0.5, + "learning_rate": 2.4642364981843738e-05, + "loss": 1.608, + "step": 32520 + }, + { + "epoch": 0.5, + "learning_rate": 2.4630180676593756e-05, + "loss": 1.605, + "step": 32530 + }, + { + "epoch": 0.5, + "learning_rate": 2.4617996459206183e-05, + "loss": 1.7836, + "step": 32540 + }, + { + "epoch": 0.51, + "learning_rate": 2.460581233257575e-05, + "loss": 1.6599, + "step": 32550 + }, + { + "epoch": 0.51, + "learning_rate": 2.4593628299597208e-05, + "loss": 2.0553, + "step": 32560 + }, + { + "epoch": 0.51, + "learning_rate": 2.4581444363165253e-05, + "loss": 1.7034, + "step": 32570 + }, + { + "epoch": 0.51, + "learning_rate": 2.4569260526174552e-05, + "loss": 1.7329, + "step": 32580 + }, + { + "epoch": 0.51, + "learning_rate": 2.4557076791519794e-05, + "loss": 1.5378, + "step": 32590 + }, + { + "epoch": 0.51, + "learning_rate": 2.45448931620956e-05, + "loss": 1.6747, + "step": 32600 + }, + { + "epoch": 0.51, + "learning_rate": 2.4532709640796574e-05, + "loss": 1.7547, + "step": 32610 + }, + { + "epoch": 0.51, + "learning_rate": 2.4520526230517314e-05, + "loss": 1.5435, + "step": 32620 + }, + { + "epoch": 0.51, + "learning_rate": 2.4508342934152368e-05, + "loss": 1.9551, + "step": 32630 + }, + { + "epoch": 0.51, + "learning_rate": 2.4496159754596274e-05, + "loss": 1.7664, + "step": 32640 + }, + { + "epoch": 0.51, + "learning_rate": 2.4483976694743537e-05, + "loss": 1.5342, + "step": 32650 + }, + { + "epoch": 0.51, + "learning_rate": 2.4471793757488625e-05, + "loss": 1.7877, + "step": 32660 + }, + { + "epoch": 0.51, + "learning_rate": 2.4459610945725987e-05, + "loss": 1.835, + "step": 32670 + }, + { + "epoch": 0.51, + "learning_rate": 2.4447428262350043e-05, + "loss": 1.587, + "step": 32680 + }, + { + "epoch": 0.51, + "learning_rate": 2.4435245710255177e-05, + "loss": 1.9906, + "step": 32690 + }, + { + "epoch": 0.51, + "learning_rate": 2.4423063292335734e-05, + "loss": 1.7502, + "step": 32700 + }, + { + "epoch": 0.51, + "learning_rate": 2.441088101148606e-05, + "loss": 1.6386, + "step": 32710 + }, + { + "epoch": 0.51, + "learning_rate": 2.4398698870600428e-05, + "loss": 1.5704, + "step": 32720 + }, + { + "epoch": 0.51, + "learning_rate": 2.4386516872573086e-05, + "loss": 1.5277, + "step": 32730 + }, + { + "epoch": 0.51, + "learning_rate": 2.437433502029828e-05, + "loss": 1.5251, + "step": 32740 + }, + { + "epoch": 0.51, + "learning_rate": 2.436215331667019e-05, + "loss": 1.5114, + "step": 32750 + }, + { + "epoch": 0.51, + "learning_rate": 2.4349971764582953e-05, + "loss": 1.4764, + "step": 32760 + }, + { + "epoch": 0.51, + "learning_rate": 2.433779036693071e-05, + "loss": 1.6542, + "step": 32770 + }, + { + "epoch": 0.51, + "learning_rate": 2.4325609126607517e-05, + "loss": 1.719, + "step": 32780 + }, + { + "epoch": 0.51, + "learning_rate": 2.431342804650745e-05, + "loss": 1.8886, + "step": 32790 + }, + { + "epoch": 0.51, + "learning_rate": 2.430124712952448e-05, + "loss": 1.7042, + "step": 32800 + }, + { + "epoch": 0.51, + "learning_rate": 2.428906637855259e-05, + "loss": 1.6884, + "step": 32810 + }, + { + "epoch": 0.51, + "learning_rate": 2.4276885796485704e-05, + "loss": 1.7696, + "step": 32820 + }, + { + "epoch": 0.51, + "learning_rate": 2.426470538621772e-05, + "loss": 1.7448, + "step": 32830 + }, + { + "epoch": 0.51, + "learning_rate": 2.4252525150642452e-05, + "loss": 1.7439, + "step": 32840 + }, + { + "epoch": 0.51, + "learning_rate": 2.4240345092653745e-05, + "loss": 1.6832, + "step": 32850 + }, + { + "epoch": 0.51, + "learning_rate": 2.4228165215145336e-05, + "loss": 1.5707, + "step": 32860 + }, + { + "epoch": 0.51, + "learning_rate": 2.4215985521010937e-05, + "loss": 1.5439, + "step": 32870 + }, + { + "epoch": 0.51, + "learning_rate": 2.4203806013144247e-05, + "loss": 1.5626, + "step": 32880 + }, + { + "epoch": 0.51, + "learning_rate": 2.419162669443889e-05, + "loss": 1.96, + "step": 32890 + }, + { + "epoch": 0.51, + "learning_rate": 2.4179447567788434e-05, + "loss": 1.6574, + "step": 32900 + }, + { + "epoch": 0.51, + "learning_rate": 2.4167268636086446e-05, + "loss": 1.739, + "step": 32910 + }, + { + "epoch": 0.51, + "learning_rate": 2.4155089902226412e-05, + "loss": 1.5862, + "step": 32920 + }, + { + "epoch": 0.51, + "learning_rate": 2.414291136910176e-05, + "loss": 1.418, + "step": 32930 + }, + { + "epoch": 0.51, + "learning_rate": 2.4130733039605922e-05, + "loss": 1.7893, + "step": 32940 + }, + { + "epoch": 0.51, + "learning_rate": 2.4118554916632226e-05, + "loss": 1.7871, + "step": 32950 + }, + { + "epoch": 0.51, + "learning_rate": 2.4106377003073978e-05, + "loss": 1.8599, + "step": 32960 + }, + { + "epoch": 0.51, + "learning_rate": 2.4094199301824437e-05, + "loss": 1.5863, + "step": 32970 + }, + { + "epoch": 0.51, + "learning_rate": 2.4082021815776813e-05, + "loss": 1.4855, + "step": 32980 + }, + { + "epoch": 0.51, + "learning_rate": 2.406984454782423e-05, + "loss": 1.5832, + "step": 32990 + }, + { + "epoch": 0.51, + "learning_rate": 2.4057667500859816e-05, + "loss": 1.8194, + "step": 33000 + }, + { + "epoch": 0.51, + "learning_rate": 2.4045490677776596e-05, + "loss": 1.7327, + "step": 33010 + }, + { + "epoch": 0.51, + "learning_rate": 2.4033314081467582e-05, + "loss": 2.0048, + "step": 33020 + }, + { + "epoch": 0.51, + "learning_rate": 2.4021137714825708e-05, + "loss": 1.7481, + "step": 33030 + }, + { + "epoch": 0.51, + "learning_rate": 2.4008961580743837e-05, + "loss": 1.7322, + "step": 33040 + }, + { + "epoch": 0.51, + "learning_rate": 2.3996785682114836e-05, + "loss": 1.7494, + "step": 33050 + }, + { + "epoch": 0.51, + "learning_rate": 2.398461002183146e-05, + "loss": 1.7358, + "step": 33060 + }, + { + "epoch": 0.51, + "learning_rate": 2.397243460278641e-05, + "loss": 1.6446, + "step": 33070 + }, + { + "epoch": 0.51, + "learning_rate": 2.396025942787238e-05, + "loss": 1.3793, + "step": 33080 + }, + { + "epoch": 0.51, + "learning_rate": 2.394808449998195e-05, + "loss": 1.5024, + "step": 33090 + }, + { + "epoch": 0.51, + "learning_rate": 2.393590982200766e-05, + "loss": 1.7198, + "step": 33100 + }, + { + "epoch": 0.51, + "learning_rate": 2.3923735396842012e-05, + "loss": 1.6659, + "step": 33110 + }, + { + "epoch": 0.51, + "learning_rate": 2.3911561227377412e-05, + "loss": 1.5172, + "step": 33120 + }, + { + "epoch": 0.51, + "learning_rate": 2.3899387316506232e-05, + "loss": 1.469, + "step": 33130 + }, + { + "epoch": 0.51, + "learning_rate": 2.3887213667120774e-05, + "loss": 1.7945, + "step": 33140 + }, + { + "epoch": 0.51, + "learning_rate": 2.3875040282113278e-05, + "loss": 1.8832, + "step": 33150 + }, + { + "epoch": 0.51, + "learning_rate": 2.386286716437591e-05, + "loss": 1.7948, + "step": 33160 + }, + { + "epoch": 0.51, + "learning_rate": 2.38506943168008e-05, + "loss": 1.5751, + "step": 33170 + }, + { + "epoch": 0.51, + "learning_rate": 2.383852174227999e-05, + "loss": 1.5009, + "step": 33180 + }, + { + "epoch": 0.51, + "learning_rate": 2.382634944370545e-05, + "loss": 1.4573, + "step": 33190 + }, + { + "epoch": 0.52, + "learning_rate": 2.3814177423969125e-05, + "loss": 1.5138, + "step": 33200 + }, + { + "epoch": 0.52, + "learning_rate": 2.3802005685962853e-05, + "loss": 1.5747, + "step": 33210 + }, + { + "epoch": 0.52, + "learning_rate": 2.3789834232578405e-05, + "loss": 1.5056, + "step": 33220 + }, + { + "epoch": 0.52, + "learning_rate": 2.3777663066707527e-05, + "loss": 1.7035, + "step": 33230 + }, + { + "epoch": 0.52, + "learning_rate": 2.3765492191241848e-05, + "loss": 1.6348, + "step": 33240 + }, + { + "epoch": 0.52, + "learning_rate": 2.3753321609072962e-05, + "loss": 1.8221, + "step": 33250 + }, + { + "epoch": 0.52, + "learning_rate": 2.3741151323092374e-05, + "loss": 1.6239, + "step": 33260 + }, + { + "epoch": 0.52, + "learning_rate": 2.3728981336191518e-05, + "loss": 1.6051, + "step": 33270 + }, + { + "epoch": 0.52, + "learning_rate": 2.3716811651261776e-05, + "loss": 1.6145, + "step": 33280 + }, + { + "epoch": 0.52, + "learning_rate": 2.3704642271194432e-05, + "loss": 1.6199, + "step": 33290 + }, + { + "epoch": 0.52, + "learning_rate": 2.3692473198880717e-05, + "loss": 1.5711, + "step": 33300 + }, + { + "epoch": 0.52, + "learning_rate": 2.368030443721179e-05, + "loss": 1.603, + "step": 33310 + }, + { + "epoch": 0.52, + "learning_rate": 2.3668135989078723e-05, + "loss": 1.4979, + "step": 33320 + }, + { + "epoch": 0.52, + "learning_rate": 2.3655967857372514e-05, + "loss": 1.6174, + "step": 33330 + }, + { + "epoch": 0.52, + "learning_rate": 2.3643800044984106e-05, + "loss": 1.8854, + "step": 33340 + }, + { + "epoch": 0.52, + "learning_rate": 2.363163255480434e-05, + "loss": 1.6554, + "step": 33350 + }, + { + "epoch": 0.52, + "learning_rate": 2.361946538972399e-05, + "loss": 1.8907, + "step": 33360 + }, + { + "epoch": 0.52, + "learning_rate": 2.3607298552633773e-05, + "loss": 1.9422, + "step": 33370 + }, + { + "epoch": 0.52, + "learning_rate": 2.359513204642429e-05, + "loss": 1.8492, + "step": 33380 + }, + { + "epoch": 0.52, + "learning_rate": 2.3582965873986086e-05, + "loss": 1.7607, + "step": 33390 + }, + { + "epoch": 0.52, + "learning_rate": 2.357080003820964e-05, + "loss": 1.7617, + "step": 33400 + }, + { + "epoch": 0.52, + "learning_rate": 2.3558634541985322e-05, + "loss": 1.6094, + "step": 33410 + }, + { + "epoch": 0.52, + "learning_rate": 2.3546469388203427e-05, + "loss": 1.5686, + "step": 33420 + }, + { + "epoch": 0.52, + "learning_rate": 2.3534304579754195e-05, + "loss": 1.6858, + "step": 33430 + }, + { + "epoch": 0.52, + "learning_rate": 2.3522140119527752e-05, + "loss": 1.6803, + "step": 33440 + }, + { + "epoch": 0.52, + "learning_rate": 2.350997601041416e-05, + "loss": 1.7798, + "step": 33450 + }, + { + "epoch": 0.52, + "learning_rate": 2.3497812255303388e-05, + "loss": 1.7511, + "step": 33460 + }, + { + "epoch": 0.52, + "learning_rate": 2.3485648857085323e-05, + "loss": 1.9284, + "step": 33470 + }, + { + "epoch": 0.52, + "learning_rate": 2.3473485818649778e-05, + "loss": 1.689, + "step": 33480 + }, + { + "epoch": 0.52, + "learning_rate": 2.346132314288647e-05, + "loss": 1.6359, + "step": 33490 + }, + { + "epoch": 0.52, + "learning_rate": 2.344916083268502e-05, + "loss": 1.6527, + "step": 33500 + }, + { + "epoch": 0.52, + "learning_rate": 2.3436998890934987e-05, + "loss": 1.542, + "step": 33510 + }, + { + "epoch": 0.52, + "learning_rate": 2.3424837320525827e-05, + "loss": 1.3756, + "step": 33520 + }, + { + "epoch": 0.52, + "learning_rate": 2.3412676124346894e-05, + "loss": 1.3429, + "step": 33530 + }, + { + "epoch": 0.52, + "learning_rate": 2.3400515305287495e-05, + "loss": 1.3121, + "step": 33540 + }, + { + "epoch": 0.52, + "learning_rate": 2.33883548662368e-05, + "loss": 1.3005, + "step": 33550 + }, + { + "epoch": 0.52, + "learning_rate": 2.337619481008391e-05, + "loss": 1.6357, + "step": 33560 + }, + { + "epoch": 0.52, + "learning_rate": 2.3364035139717855e-05, + "loss": 1.6519, + "step": 33570 + }, + { + "epoch": 0.52, + "learning_rate": 2.3351875858027537e-05, + "loss": 1.7905, + "step": 33580 + }, + { + "epoch": 0.52, + "learning_rate": 2.3339716967901777e-05, + "loss": 1.6664, + "step": 33590 + }, + { + "epoch": 0.52, + "learning_rate": 2.3327558472229323e-05, + "loss": 1.6835, + "step": 33600 + }, + { + "epoch": 0.52, + "learning_rate": 2.331540037389881e-05, + "loss": 1.5074, + "step": 33610 + }, + { + "epoch": 0.52, + "learning_rate": 2.330324267579877e-05, + "loss": 1.5436, + "step": 33620 + }, + { + "epoch": 0.52, + "learning_rate": 2.3291085380817672e-05, + "loss": 1.5462, + "step": 33630 + }, + { + "epoch": 0.52, + "learning_rate": 2.3278928491843862e-05, + "loss": 1.5847, + "step": 33640 + }, + { + "epoch": 0.52, + "learning_rate": 2.3266772011765588e-05, + "loss": 1.7635, + "step": 33650 + }, + { + "epoch": 0.52, + "learning_rate": 2.325461594347103e-05, + "loss": 1.7619, + "step": 33660 + }, + { + "epoch": 0.52, + "learning_rate": 2.3242460289848238e-05, + "loss": 1.6929, + "step": 33670 + }, + { + "epoch": 0.52, + "learning_rate": 2.323030505378517e-05, + "loss": 1.6625, + "step": 33680 + }, + { + "epoch": 0.52, + "learning_rate": 2.3218150238169707e-05, + "loss": 1.6484, + "step": 33690 + }, + { + "epoch": 0.52, + "learning_rate": 2.3205995845889596e-05, + "loss": 1.835, + "step": 33700 + }, + { + "epoch": 0.52, + "learning_rate": 2.319384187983252e-05, + "loss": 1.6508, + "step": 33710 + }, + { + "epoch": 0.52, + "learning_rate": 2.318168834288603e-05, + "loss": 1.8109, + "step": 33720 + }, + { + "epoch": 0.52, + "learning_rate": 2.3169535237937584e-05, + "loss": 1.6658, + "step": 33730 + }, + { + "epoch": 0.52, + "learning_rate": 2.3157382567874558e-05, + "loss": 1.8626, + "step": 33740 + }, + { + "epoch": 0.52, + "learning_rate": 2.3145230335584186e-05, + "loss": 1.8833, + "step": 33750 + }, + { + "epoch": 0.52, + "learning_rate": 2.3133078543953632e-05, + "loss": 1.6479, + "step": 33760 + }, + { + "epoch": 0.52, + "learning_rate": 2.3120927195869947e-05, + "loss": 2.0983, + "step": 33770 + }, + { + "epoch": 0.52, + "learning_rate": 2.3108776294220054e-05, + "loss": 1.7872, + "step": 33780 + }, + { + "epoch": 0.52, + "learning_rate": 2.30966258418908e-05, + "loss": 1.6232, + "step": 33790 + }, + { + "epoch": 0.52, + "learning_rate": 2.3084475841768913e-05, + "loss": 1.6766, + "step": 33800 + }, + { + "epoch": 0.52, + "learning_rate": 2.307232629674102e-05, + "loss": 1.7139, + "step": 33810 + }, + { + "epoch": 0.52, + "learning_rate": 2.306017720969361e-05, + "loss": 1.6988, + "step": 33820 + }, + { + "epoch": 0.52, + "learning_rate": 2.304802858351312e-05, + "loss": 1.4856, + "step": 33830 + }, + { + "epoch": 0.53, + "learning_rate": 2.3035880421085827e-05, + "loss": 1.4142, + "step": 33840 + }, + { + "epoch": 0.53, + "learning_rate": 2.3023732725297903e-05, + "loss": 1.6597, + "step": 33850 + }, + { + "epoch": 0.53, + "learning_rate": 2.3011585499035445e-05, + "loss": 1.7434, + "step": 33860 + }, + { + "epoch": 0.53, + "learning_rate": 2.2999438745184407e-05, + "loss": 1.6781, + "step": 33870 + }, + { + "epoch": 0.53, + "learning_rate": 2.298729246663062e-05, + "loss": 1.7279, + "step": 33880 + }, + { + "epoch": 0.53, + "learning_rate": 2.2975146666259855e-05, + "loss": 1.7297, + "step": 33890 + }, + { + "epoch": 0.53, + "learning_rate": 2.296300134695771e-05, + "loss": 1.6467, + "step": 33900 + }, + { + "epoch": 0.53, + "learning_rate": 2.2950856511609696e-05, + "loss": 1.7753, + "step": 33910 + }, + { + "epoch": 0.53, + "learning_rate": 2.2938712163101212e-05, + "loss": 1.7859, + "step": 33920 + }, + { + "epoch": 0.53, + "learning_rate": 2.2926568304317536e-05, + "loss": 1.7883, + "step": 33930 + }, + { + "epoch": 0.53, + "learning_rate": 2.291442493814383e-05, + "loss": 1.902, + "step": 33940 + }, + { + "epoch": 0.53, + "learning_rate": 2.2902282067465144e-05, + "loss": 1.8431, + "step": 33950 + }, + { + "epoch": 0.53, + "learning_rate": 2.2890139695166387e-05, + "loss": 1.739, + "step": 33960 + }, + { + "epoch": 0.53, + "learning_rate": 2.287799782413239e-05, + "loss": 1.836, + "step": 33970 + }, + { + "epoch": 0.53, + "learning_rate": 2.2865856457247833e-05, + "loss": 1.7779, + "step": 33980 + }, + { + "epoch": 0.53, + "learning_rate": 2.2853715597397276e-05, + "loss": 1.713, + "step": 33990 + }, + { + "epoch": 0.53, + "learning_rate": 2.2841575247465185e-05, + "loss": 1.7279, + "step": 34000 + }, + { + "epoch": 0.53, + "learning_rate": 2.2829435410335883e-05, + "loss": 1.8186, + "step": 34010 + }, + { + "epoch": 0.53, + "learning_rate": 2.281729608889356e-05, + "loss": 1.9258, + "step": 34020 + }, + { + "epoch": 0.53, + "learning_rate": 2.2805157286022327e-05, + "loss": 1.9581, + "step": 34030 + }, + { + "epoch": 0.53, + "learning_rate": 2.279301900460613e-05, + "loss": 1.6842, + "step": 34040 + }, + { + "epoch": 0.53, + "learning_rate": 2.2780881247528796e-05, + "loss": 1.6601, + "step": 34050 + }, + { + "epoch": 0.53, + "learning_rate": 2.276874401767406e-05, + "loss": 1.5923, + "step": 34060 + }, + { + "epoch": 0.53, + "learning_rate": 2.275660731792549e-05, + "loss": 1.6098, + "step": 34070 + }, + { + "epoch": 0.53, + "learning_rate": 2.274447115116655e-05, + "loss": 1.773, + "step": 34080 + }, + { + "epoch": 0.53, + "learning_rate": 2.2732335520280584e-05, + "loss": 1.9136, + "step": 34090 + }, + { + "epoch": 0.53, + "learning_rate": 2.2720200428150797e-05, + "loss": 1.7324, + "step": 34100 + }, + { + "epoch": 0.53, + "learning_rate": 2.2708065877660254e-05, + "loss": 1.7334, + "step": 34110 + }, + { + "epoch": 0.53, + "learning_rate": 2.269593187169192e-05, + "loss": 1.6025, + "step": 34120 + }, + { + "epoch": 0.53, + "learning_rate": 2.268379841312862e-05, + "loss": 1.7218, + "step": 34130 + }, + { + "epoch": 0.53, + "learning_rate": 2.2671665504853022e-05, + "loss": 1.894, + "step": 34140 + }, + { + "epoch": 0.53, + "learning_rate": 2.265953314974771e-05, + "loss": 1.7248, + "step": 34150 + }, + { + "epoch": 0.53, + "learning_rate": 2.264740135069509e-05, + "loss": 1.646, + "step": 34160 + }, + { + "epoch": 0.53, + "learning_rate": 2.2635270110577486e-05, + "loss": 1.8224, + "step": 34170 + }, + { + "epoch": 0.53, + "learning_rate": 2.2623139432277048e-05, + "loss": 1.6072, + "step": 34180 + }, + { + "epoch": 0.53, + "learning_rate": 2.2611009318675793e-05, + "loss": 1.883, + "step": 34190 + }, + { + "epoch": 0.53, + "learning_rate": 2.259887977265564e-05, + "loss": 1.6763, + "step": 34200 + }, + { + "epoch": 0.53, + "learning_rate": 2.2586750797098344e-05, + "loss": 1.6696, + "step": 34210 + }, + { + "epoch": 0.53, + "learning_rate": 2.2574622394885512e-05, + "loss": 1.6135, + "step": 34220 + }, + { + "epoch": 0.53, + "learning_rate": 2.256249456889866e-05, + "loss": 1.4837, + "step": 34230 + }, + { + "epoch": 0.53, + "learning_rate": 2.2550367322019128e-05, + "loss": 1.4442, + "step": 34240 + }, + { + "epoch": 0.53, + "learning_rate": 2.253824065712813e-05, + "loss": 1.4963, + "step": 34250 + }, + { + "epoch": 0.53, + "learning_rate": 2.2526114577106745e-05, + "loss": 1.4023, + "step": 34260 + }, + { + "epoch": 0.53, + "learning_rate": 2.2513989084835918e-05, + "loss": 1.7389, + "step": 34270 + }, + { + "epoch": 0.53, + "learning_rate": 2.250186418319643e-05, + "loss": 1.7651, + "step": 34280 + }, + { + "epoch": 0.53, + "learning_rate": 2.2489739875068954e-05, + "loss": 1.8217, + "step": 34290 + }, + { + "epoch": 0.53, + "learning_rate": 2.2477616163334007e-05, + "loss": 1.7917, + "step": 34300 + }, + { + "epoch": 0.53, + "learning_rate": 2.246549305087195e-05, + "loss": 1.9634, + "step": 34310 + }, + { + "epoch": 0.53, + "learning_rate": 2.2453370540563036e-05, + "loss": 1.6658, + "step": 34320 + }, + { + "epoch": 0.53, + "learning_rate": 2.2441248635287344e-05, + "loss": 1.6517, + "step": 34330 + }, + { + "epoch": 0.53, + "learning_rate": 2.2429127337924806e-05, + "loss": 1.8958, + "step": 34340 + }, + { + "epoch": 0.53, + "learning_rate": 2.241700665135525e-05, + "loss": 1.733, + "step": 34350 + }, + { + "epoch": 0.53, + "learning_rate": 2.2404886578458323e-05, + "loss": 1.5721, + "step": 34360 + }, + { + "epoch": 0.53, + "learning_rate": 2.239276712211352e-05, + "loss": 1.6829, + "step": 34370 + }, + { + "epoch": 0.53, + "learning_rate": 2.2380648285200227e-05, + "loss": 1.7942, + "step": 34380 + }, + { + "epoch": 0.53, + "learning_rate": 2.2368530070597645e-05, + "loss": 1.6126, + "step": 34390 + }, + { + "epoch": 0.53, + "learning_rate": 2.235641248118486e-05, + "loss": 1.638, + "step": 34400 + }, + { + "epoch": 0.53, + "learning_rate": 2.2344295519840778e-05, + "loss": 1.6918, + "step": 34410 + }, + { + "epoch": 0.53, + "learning_rate": 2.2332179189444176e-05, + "loss": 1.7117, + "step": 34420 + }, + { + "epoch": 0.53, + "learning_rate": 2.232006349287367e-05, + "loss": 1.7946, + "step": 34430 + }, + { + "epoch": 0.53, + "learning_rate": 2.2307948433007747e-05, + "loss": 1.8358, + "step": 34440 + }, + { + "epoch": 0.53, + "learning_rate": 2.2295834012724702e-05, + "loss": 1.835, + "step": 34450 + }, + { + "epoch": 0.53, + "learning_rate": 2.2283720234902727e-05, + "loss": 2.0003, + "step": 34460 + }, + { + "epoch": 0.53, + "learning_rate": 2.2271607102419827e-05, + "loss": 1.7104, + "step": 34470 + }, + { + "epoch": 0.53, + "learning_rate": 2.2259494618153853e-05, + "loss": 1.6024, + "step": 34480 + }, + { + "epoch": 0.54, + "learning_rate": 2.2247382784982533e-05, + "loss": 1.7935, + "step": 34490 + }, + { + "epoch": 0.54, + "learning_rate": 2.2235271605783413e-05, + "loss": 1.6697, + "step": 34500 + }, + { + "epoch": 0.54, + "learning_rate": 2.2223161083433874e-05, + "loss": 1.5126, + "step": 34510 + }, + { + "epoch": 0.54, + "learning_rate": 2.2211051220811187e-05, + "loss": 1.4102, + "step": 34520 + }, + { + "epoch": 0.54, + "learning_rate": 2.2198942020792416e-05, + "loss": 1.6864, + "step": 34530 + }, + { + "epoch": 0.54, + "learning_rate": 2.2186833486254482e-05, + "loss": 1.809, + "step": 34540 + }, + { + "epoch": 0.54, + "learning_rate": 2.2174725620074184e-05, + "loss": 2.4907, + "step": 34550 + }, + { + "epoch": 0.54, + "learning_rate": 2.2162618425128102e-05, + "loss": 1.6648, + "step": 34560 + }, + { + "epoch": 0.54, + "learning_rate": 2.2150511904292704e-05, + "loss": 1.4899, + "step": 34570 + }, + { + "epoch": 0.54, + "learning_rate": 2.2138406060444276e-05, + "loss": 1.5963, + "step": 34580 + }, + { + "epoch": 0.54, + "learning_rate": 2.2126300896458955e-05, + "loss": 1.7502, + "step": 34590 + }, + { + "epoch": 0.54, + "learning_rate": 2.2114196415212693e-05, + "loss": 1.7971, + "step": 34600 + }, + { + "epoch": 0.54, + "learning_rate": 2.2102092619581315e-05, + "loss": 1.7811, + "step": 34610 + }, + { + "epoch": 0.54, + "learning_rate": 2.208998951244045e-05, + "loss": 1.8287, + "step": 34620 + }, + { + "epoch": 0.54, + "learning_rate": 2.207788709666559e-05, + "loss": 1.807, + "step": 34630 + }, + { + "epoch": 0.54, + "learning_rate": 2.206578537513205e-05, + "loss": 1.7596, + "step": 34640 + }, + { + "epoch": 0.54, + "learning_rate": 2.2053684350714966e-05, + "loss": 1.773, + "step": 34650 + }, + { + "epoch": 0.54, + "learning_rate": 2.204158402628934e-05, + "loss": 1.7895, + "step": 34660 + }, + { + "epoch": 0.54, + "learning_rate": 2.2029484404729987e-05, + "loss": 1.7783, + "step": 34670 + }, + { + "epoch": 0.54, + "learning_rate": 2.2017385488911543e-05, + "loss": 1.7443, + "step": 34680 + }, + { + "epoch": 0.54, + "learning_rate": 2.2005287281708513e-05, + "loss": 1.7495, + "step": 34690 + }, + { + "epoch": 0.54, + "learning_rate": 2.1993189785995212e-05, + "loss": 1.7505, + "step": 34700 + }, + { + "epoch": 0.54, + "learning_rate": 2.1981093004645762e-05, + "loss": 1.751, + "step": 34710 + }, + { + "epoch": 0.54, + "learning_rate": 2.196899694053417e-05, + "loss": 1.7445, + "step": 34720 + }, + { + "epoch": 0.54, + "learning_rate": 2.1956901596534228e-05, + "loss": 1.7619, + "step": 34730 + }, + { + "epoch": 0.54, + "learning_rate": 2.1944806975519568e-05, + "loss": 1.7533, + "step": 34740 + }, + { + "epoch": 0.54, + "learning_rate": 2.1932713080363664e-05, + "loss": 1.7821, + "step": 34750 + }, + { + "epoch": 0.54, + "learning_rate": 2.1920619913939805e-05, + "loss": 1.7179, + "step": 34760 + }, + { + "epoch": 0.54, + "learning_rate": 2.1908527479121096e-05, + "loss": 1.7433, + "step": 34770 + }, + { + "epoch": 0.54, + "learning_rate": 2.1896435778780505e-05, + "loss": 1.7481, + "step": 34780 + }, + { + "epoch": 0.54, + "learning_rate": 2.188434481579079e-05, + "loss": 1.7492, + "step": 34790 + }, + { + "epoch": 0.54, + "learning_rate": 2.1872254593024532e-05, + "loss": 1.722, + "step": 34800 + }, + { + "epoch": 0.54, + "learning_rate": 2.1860165113354176e-05, + "loss": 1.7646, + "step": 34810 + }, + { + "epoch": 0.54, + "learning_rate": 2.184807637965195e-05, + "loss": 1.7601, + "step": 34820 + }, + { + "epoch": 0.54, + "learning_rate": 2.1835988394789906e-05, + "loss": 1.718, + "step": 34830 + }, + { + "epoch": 0.54, + "learning_rate": 2.182390116163996e-05, + "loss": 1.703, + "step": 34840 + }, + { + "epoch": 0.54, + "learning_rate": 2.1811814683073794e-05, + "loss": 1.6917, + "step": 34850 + }, + { + "epoch": 0.54, + "learning_rate": 2.1799728961962962e-05, + "loss": 1.7516, + "step": 34860 + }, + { + "epoch": 0.54, + "learning_rate": 2.178764400117879e-05, + "loss": 1.7078, + "step": 34870 + }, + { + "epoch": 0.54, + "learning_rate": 2.177555980359246e-05, + "loss": 1.7169, + "step": 34880 + }, + { + "epoch": 0.54, + "learning_rate": 2.176347637207496e-05, + "loss": 1.7344, + "step": 34890 + }, + { + "epoch": 0.54, + "learning_rate": 2.175139370949709e-05, + "loss": 1.7129, + "step": 34900 + }, + { + "epoch": 0.54, + "learning_rate": 2.1739311818729474e-05, + "loss": 1.717, + "step": 34910 + }, + { + "epoch": 0.54, + "learning_rate": 2.172723070264255e-05, + "loss": 1.7029, + "step": 34920 + }, + { + "epoch": 0.54, + "learning_rate": 2.171515036410658e-05, + "loss": 1.6984, + "step": 34930 + }, + { + "epoch": 0.54, + "learning_rate": 2.170307080599162e-05, + "loss": 1.711, + "step": 34940 + }, + { + "epoch": 0.54, + "learning_rate": 2.1690992031167578e-05, + "loss": 1.7022, + "step": 34950 + }, + { + "epoch": 0.54, + "learning_rate": 2.1678914042504135e-05, + "loss": 1.6856, + "step": 34960 + }, + { + "epoch": 0.54, + "learning_rate": 2.16668368428708e-05, + "loss": 1.7337, + "step": 34970 + }, + { + "epoch": 0.54, + "learning_rate": 2.1654760435136917e-05, + "loss": 1.6995, + "step": 34980 + }, + { + "epoch": 0.54, + "learning_rate": 2.164268482217161e-05, + "loss": 1.7, + "step": 34990 + }, + { + "epoch": 0.54, + "learning_rate": 2.163061000684382e-05, + "loss": 1.6957, + "step": 35000 + }, + { + "epoch": 0.54, + "eval_loss": 1.5867847204208374, + "eval_runtime": 82.4161, + "eval_samples_per_second": 36.401, + "eval_steps_per_second": 4.55, + "step": 35000 + }, + { + "epoch": 0.54, + "learning_rate": 2.161853599202232e-05, + "loss": 1.7199, + "step": 35010 + }, + { + "epoch": 0.54, + "learning_rate": 2.1606462780575667e-05, + "loss": 1.6812, + "step": 35020 + }, + { + "epoch": 0.54, + "learning_rate": 2.1594390375372246e-05, + "loss": 1.7047, + "step": 35030 + }, + { + "epoch": 0.54, + "learning_rate": 2.1582318779280236e-05, + "loss": 1.7232, + "step": 35040 + }, + { + "epoch": 0.54, + "learning_rate": 2.1570247995167643e-05, + "loss": 1.7013, + "step": 35050 + }, + { + "epoch": 0.54, + "learning_rate": 2.1558178025902247e-05, + "loss": 1.7285, + "step": 35060 + }, + { + "epoch": 0.54, + "learning_rate": 2.1546108874351673e-05, + "loss": 1.7174, + "step": 35070 + }, + { + "epoch": 0.54, + "learning_rate": 2.153404054338332e-05, + "loss": 1.6271, + "step": 35080 + }, + { + "epoch": 0.54, + "learning_rate": 2.1521973035864426e-05, + "loss": 1.7001, + "step": 35090 + }, + { + "epoch": 0.54, + "learning_rate": 2.1509906354661997e-05, + "loss": 1.6902, + "step": 35100 + }, + { + "epoch": 0.54, + "learning_rate": 2.149784050264285e-05, + "loss": 1.7759, + "step": 35110 + }, + { + "epoch": 0.54, + "learning_rate": 2.1485775482673643e-05, + "loss": 1.5853, + "step": 35120 + }, + { + "epoch": 0.55, + "learning_rate": 2.147371129762078e-05, + "loss": 1.6965, + "step": 35130 + }, + { + "epoch": 0.55, + "learning_rate": 2.1461647950350502e-05, + "loss": 1.6932, + "step": 35140 + }, + { + "epoch": 0.55, + "learning_rate": 2.144958544372885e-05, + "loss": 1.6664, + "step": 35150 + }, + { + "epoch": 0.55, + "learning_rate": 2.1437523780621653e-05, + "loss": 1.7326, + "step": 35160 + }, + { + "epoch": 0.55, + "learning_rate": 2.1425462963894535e-05, + "loss": 1.7017, + "step": 35170 + }, + { + "epoch": 0.55, + "learning_rate": 2.141340299641295e-05, + "loss": 1.5896, + "step": 35180 + }, + { + "epoch": 0.55, + "learning_rate": 2.140134388104211e-05, + "loss": 1.6619, + "step": 35190 + }, + { + "epoch": 0.55, + "learning_rate": 2.1389285620647042e-05, + "loss": 1.6838, + "step": 35200 + }, + { + "epoch": 0.55, + "learning_rate": 2.1377228218092586e-05, + "loss": 1.6775, + "step": 35210 + }, + { + "epoch": 0.55, + "learning_rate": 2.1365171676243357e-05, + "loss": 1.5946, + "step": 35220 + }, + { + "epoch": 0.55, + "learning_rate": 2.1353115997963764e-05, + "loss": 1.6967, + "step": 35230 + }, + { + "epoch": 0.55, + "learning_rate": 2.1341061186118032e-05, + "loss": 1.6656, + "step": 35240 + }, + { + "epoch": 0.55, + "learning_rate": 2.1329007243570163e-05, + "loss": 1.6311, + "step": 35250 + }, + { + "epoch": 0.55, + "learning_rate": 2.1316954173183943e-05, + "loss": 1.7566, + "step": 35260 + }, + { + "epoch": 0.55, + "learning_rate": 2.1304901977822982e-05, + "loss": 1.6327, + "step": 35270 + }, + { + "epoch": 0.55, + "learning_rate": 2.129285066035066e-05, + "loss": 1.7071, + "step": 35280 + }, + { + "epoch": 0.55, + "learning_rate": 2.128080022363014e-05, + "loss": 1.674, + "step": 35290 + }, + { + "epoch": 0.55, + "learning_rate": 2.1268750670524405e-05, + "loss": 1.6822, + "step": 35300 + }, + { + "epoch": 0.55, + "learning_rate": 2.1256702003896197e-05, + "loss": 1.6378, + "step": 35310 + }, + { + "epoch": 0.55, + "learning_rate": 2.124465422660808e-05, + "loss": 1.6938, + "step": 35320 + }, + { + "epoch": 0.55, + "learning_rate": 2.1232607341522376e-05, + "loss": 1.6866, + "step": 35330 + }, + { + "epoch": 0.55, + "learning_rate": 2.1220561351501202e-05, + "loss": 1.69, + "step": 35340 + }, + { + "epoch": 0.55, + "learning_rate": 2.120851625940648e-05, + "loss": 1.692, + "step": 35350 + }, + { + "epoch": 0.55, + "learning_rate": 2.11964720680999e-05, + "loss": 1.6697, + "step": 35360 + }, + { + "epoch": 0.55, + "learning_rate": 2.1184428780442947e-05, + "loss": 1.6843, + "step": 35370 + }, + { + "epoch": 0.55, + "learning_rate": 2.1172386399296887e-05, + "loss": 1.7021, + "step": 35380 + }, + { + "epoch": 0.55, + "learning_rate": 2.116034492752278e-05, + "loss": 1.6651, + "step": 35390 + }, + { + "epoch": 0.55, + "learning_rate": 2.1148304367981446e-05, + "loss": 1.6863, + "step": 35400 + }, + { + "epoch": 0.55, + "learning_rate": 2.113626472353353e-05, + "loss": 1.6608, + "step": 35410 + }, + { + "epoch": 0.55, + "learning_rate": 2.112422599703941e-05, + "loss": 1.6148, + "step": 35420 + }, + { + "epoch": 0.55, + "learning_rate": 2.111218819135928e-05, + "loss": 1.6601, + "step": 35430 + }, + { + "epoch": 0.55, + "learning_rate": 2.1100151309353106e-05, + "loss": 1.6247, + "step": 35440 + }, + { + "epoch": 0.55, + "learning_rate": 2.1088115353880638e-05, + "loss": 1.6718, + "step": 35450 + }, + { + "epoch": 0.55, + "learning_rate": 2.1076080327801386e-05, + "loss": 1.6574, + "step": 35460 + }, + { + "epoch": 0.55, + "learning_rate": 2.106404623397468e-05, + "loss": 1.6764, + "step": 35470 + }, + { + "epoch": 0.55, + "learning_rate": 2.1052013075259585e-05, + "loss": 1.6576, + "step": 35480 + }, + { + "epoch": 0.55, + "learning_rate": 2.103998085451496e-05, + "loss": 1.6744, + "step": 35490 + }, + { + "epoch": 0.55, + "learning_rate": 2.1027949574599463e-05, + "loss": 1.6743, + "step": 35500 + }, + { + "epoch": 0.55, + "learning_rate": 2.101591923837149e-05, + "loss": 1.6804, + "step": 35510 + }, + { + "epoch": 0.55, + "learning_rate": 2.1003889848689236e-05, + "loss": 1.646, + "step": 35520 + }, + { + "epoch": 0.55, + "learning_rate": 2.0991861408410672e-05, + "loss": 1.6693, + "step": 35530 + }, + { + "epoch": 0.55, + "learning_rate": 2.0979833920393538e-05, + "loss": 1.6902, + "step": 35540 + }, + { + "epoch": 0.55, + "learning_rate": 2.0967807387495346e-05, + "loss": 1.6448, + "step": 35550 + }, + { + "epoch": 0.55, + "learning_rate": 2.095578181257339e-05, + "loss": 1.6497, + "step": 35560 + }, + { + "epoch": 0.55, + "learning_rate": 2.094375719848471e-05, + "loss": 1.6736, + "step": 35570 + }, + { + "epoch": 0.55, + "learning_rate": 2.0931733548086162e-05, + "loss": 1.6736, + "step": 35580 + }, + { + "epoch": 0.55, + "learning_rate": 2.091971086423434e-05, + "loss": 1.5904, + "step": 35590 + }, + { + "epoch": 0.55, + "learning_rate": 2.0907689149785603e-05, + "loss": 1.6969, + "step": 35600 + }, + { + "epoch": 0.55, + "learning_rate": 2.0895668407596113e-05, + "loss": 1.6721, + "step": 35610 + }, + { + "epoch": 0.55, + "learning_rate": 2.0883648640521777e-05, + "loss": 1.7031, + "step": 35620 + }, + { + "epoch": 0.55, + "learning_rate": 2.0871629851418255e-05, + "loss": 1.6977, + "step": 35630 + }, + { + "epoch": 0.55, + "learning_rate": 2.0859612043141027e-05, + "loss": 1.6856, + "step": 35640 + }, + { + "epoch": 0.55, + "learning_rate": 2.084759521854529e-05, + "loss": 1.6906, + "step": 35650 + }, + { + "epoch": 0.55, + "learning_rate": 2.083557938048601e-05, + "loss": 1.675, + "step": 35660 + }, + { + "epoch": 0.55, + "learning_rate": 2.0823564531817967e-05, + "loss": 1.6969, + "step": 35670 + }, + { + "epoch": 0.55, + "learning_rate": 2.0811550675395645e-05, + "loss": 1.6719, + "step": 35680 + }, + { + "epoch": 0.55, + "learning_rate": 2.0799537814073327e-05, + "loss": 1.7009, + "step": 35690 + }, + { + "epoch": 0.55, + "learning_rate": 2.078752595070506e-05, + "loss": 1.7159, + "step": 35700 + }, + { + "epoch": 0.55, + "learning_rate": 2.0775515088144643e-05, + "loss": 1.6867, + "step": 35710 + }, + { + "epoch": 0.55, + "learning_rate": 2.0763505229245624e-05, + "loss": 1.6888, + "step": 35720 + }, + { + "epoch": 0.55, + "learning_rate": 2.0751496376861355e-05, + "loss": 1.6791, + "step": 35730 + }, + { + "epoch": 0.55, + "learning_rate": 2.073948853384491e-05, + "loss": 1.6872, + "step": 35740 + }, + { + "epoch": 0.55, + "learning_rate": 2.072748170304913e-05, + "loss": 1.6545, + "step": 35750 + }, + { + "epoch": 0.55, + "learning_rate": 2.071547588732663e-05, + "loss": 1.7282, + "step": 35760 + }, + { + "epoch": 0.55, + "learning_rate": 2.0703471089529766e-05, + "loss": 1.7023, + "step": 35770 + }, + { + "epoch": 0.56, + "learning_rate": 2.0691467312510678e-05, + "loss": 1.6865, + "step": 35780 + }, + { + "epoch": 0.56, + "learning_rate": 2.067946455912124e-05, + "loss": 1.7061, + "step": 35790 + }, + { + "epoch": 0.56, + "learning_rate": 2.0667462832213073e-05, + "loss": 1.7203, + "step": 35800 + }, + { + "epoch": 0.56, + "learning_rate": 2.0655462134637593e-05, + "loss": 1.7236, + "step": 35810 + }, + { + "epoch": 0.56, + "learning_rate": 2.0643462469245945e-05, + "loss": 1.7272, + "step": 35820 + }, + { + "epoch": 0.56, + "learning_rate": 2.0631463838889016e-05, + "loss": 1.7049, + "step": 35830 + }, + { + "epoch": 0.56, + "learning_rate": 2.061946624641749e-05, + "loss": 1.6904, + "step": 35840 + }, + { + "epoch": 0.56, + "learning_rate": 2.0607469694681757e-05, + "loss": 1.6967, + "step": 35850 + }, + { + "epoch": 0.56, + "learning_rate": 2.0595474186531988e-05, + "loss": 1.6654, + "step": 35860 + }, + { + "epoch": 0.56, + "learning_rate": 2.0583479724818107e-05, + "loss": 1.6826, + "step": 35870 + }, + { + "epoch": 0.56, + "learning_rate": 2.0571486312389776e-05, + "loss": 1.698, + "step": 35880 + }, + { + "epoch": 0.56, + "learning_rate": 2.0559493952096403e-05, + "loss": 1.6943, + "step": 35890 + }, + { + "epoch": 0.56, + "learning_rate": 2.054750264678718e-05, + "loss": 1.6718, + "step": 35900 + }, + { + "epoch": 0.56, + "learning_rate": 2.053551239931101e-05, + "loss": 1.6603, + "step": 35910 + }, + { + "epoch": 0.56, + "learning_rate": 2.0523523212516552e-05, + "loss": 1.6593, + "step": 35920 + }, + { + "epoch": 0.56, + "learning_rate": 2.051153508925224e-05, + "loss": 1.669, + "step": 35930 + }, + { + "epoch": 0.56, + "learning_rate": 2.0499548032366224e-05, + "loss": 1.6858, + "step": 35940 + }, + { + "epoch": 0.56, + "learning_rate": 2.0487562044706402e-05, + "loss": 1.679, + "step": 35950 + }, + { + "epoch": 0.56, + "learning_rate": 2.0475577129120454e-05, + "loss": 1.6793, + "step": 35960 + }, + { + "epoch": 0.56, + "learning_rate": 2.0463593288455758e-05, + "loss": 1.6693, + "step": 35970 + }, + { + "epoch": 0.56, + "learning_rate": 2.045161052555947e-05, + "loss": 1.6794, + "step": 35980 + }, + { + "epoch": 0.56, + "learning_rate": 2.0439628843278474e-05, + "loss": 1.6656, + "step": 35990 + }, + { + "epoch": 0.56, + "learning_rate": 2.0427648244459403e-05, + "loss": 1.6536, + "step": 36000 + }, + { + "epoch": 0.56, + "learning_rate": 2.0415668731948625e-05, + "loss": 1.6534, + "step": 36010 + }, + { + "epoch": 0.56, + "learning_rate": 2.040369030859227e-05, + "loss": 1.6886, + "step": 36020 + }, + { + "epoch": 0.56, + "learning_rate": 2.0391712977236185e-05, + "loss": 1.6546, + "step": 36030 + }, + { + "epoch": 0.56, + "learning_rate": 2.0379736740725968e-05, + "loss": 1.6652, + "step": 36040 + }, + { + "epoch": 0.56, + "learning_rate": 2.0367761601906964e-05, + "loss": 1.6507, + "step": 36050 + }, + { + "epoch": 0.56, + "learning_rate": 2.0355787563624234e-05, + "loss": 1.6413, + "step": 36060 + }, + { + "epoch": 0.56, + "learning_rate": 2.0343814628722614e-05, + "loss": 1.6671, + "step": 36070 + }, + { + "epoch": 0.56, + "learning_rate": 2.0331842800046646e-05, + "loss": 1.6714, + "step": 36080 + }, + { + "epoch": 0.56, + "learning_rate": 2.0319872080440617e-05, + "loss": 1.6567, + "step": 36090 + }, + { + "epoch": 0.56, + "learning_rate": 2.0307902472748564e-05, + "loss": 1.6193, + "step": 36100 + }, + { + "epoch": 0.56, + "learning_rate": 2.0295933979814246e-05, + "loss": 1.6734, + "step": 36110 + }, + { + "epoch": 0.56, + "learning_rate": 2.028396660448115e-05, + "loss": 1.6715, + "step": 36120 + }, + { + "epoch": 0.56, + "learning_rate": 2.027200034959253e-05, + "loss": 1.6476, + "step": 36130 + }, + { + "epoch": 0.56, + "learning_rate": 2.0260035217991337e-05, + "loss": 1.6556, + "step": 36140 + }, + { + "epoch": 0.56, + "learning_rate": 2.024807121252027e-05, + "loss": 1.6485, + "step": 36150 + }, + { + "epoch": 0.56, + "learning_rate": 2.0236108336021764e-05, + "loss": 1.6567, + "step": 36160 + }, + { + "epoch": 0.56, + "learning_rate": 2.0224146591337994e-05, + "loss": 1.657, + "step": 36170 + }, + { + "epoch": 0.56, + "learning_rate": 2.021218598131083e-05, + "loss": 1.6314, + "step": 36180 + }, + { + "epoch": 0.56, + "learning_rate": 2.0200226508781925e-05, + "loss": 1.6215, + "step": 36190 + }, + { + "epoch": 0.56, + "learning_rate": 2.018826817659262e-05, + "loss": 1.6383, + "step": 36200 + }, + { + "epoch": 0.56, + "learning_rate": 2.0176310987583986e-05, + "loss": 1.6625, + "step": 36210 + }, + { + "epoch": 0.56, + "learning_rate": 2.0164354944596862e-05, + "loss": 1.6495, + "step": 36220 + }, + { + "epoch": 0.56, + "learning_rate": 2.0152400050471766e-05, + "loss": 1.6553, + "step": 36230 + }, + { + "epoch": 0.56, + "learning_rate": 2.0140446308048987e-05, + "loss": 1.6519, + "step": 36240 + }, + { + "epoch": 0.56, + "learning_rate": 2.0128493720168503e-05, + "loss": 1.6591, + "step": 36250 + }, + { + "epoch": 0.56, + "learning_rate": 2.0116542289670023e-05, + "loss": 1.6122, + "step": 36260 + }, + { + "epoch": 0.56, + "learning_rate": 2.0104592019393018e-05, + "loss": 1.6488, + "step": 36270 + }, + { + "epoch": 0.56, + "learning_rate": 2.009264291217664e-05, + "loss": 1.6568, + "step": 36280 + }, + { + "epoch": 0.56, + "learning_rate": 2.008069497085978e-05, + "loss": 1.6478, + "step": 36290 + }, + { + "epoch": 0.56, + "learning_rate": 2.0068748198281066e-05, + "loss": 1.6827, + "step": 36300 + }, + { + "epoch": 0.56, + "learning_rate": 2.005680259727882e-05, + "loss": 1.6815, + "step": 36310 + }, + { + "epoch": 0.56, + "learning_rate": 2.0044858170691112e-05, + "loss": 1.676, + "step": 36320 + }, + { + "epoch": 0.56, + "learning_rate": 2.0032914921355715e-05, + "loss": 1.7173, + "step": 36330 + }, + { + "epoch": 0.56, + "learning_rate": 2.002097285211014e-05, + "loss": 1.6834, + "step": 36340 + }, + { + "epoch": 0.56, + "learning_rate": 2.0009031965791594e-05, + "loss": 1.691, + "step": 36350 + }, + { + "epoch": 0.56, + "learning_rate": 1.9997092265237034e-05, + "loss": 1.672, + "step": 36360 + }, + { + "epoch": 0.56, + "learning_rate": 1.9985153753283105e-05, + "loss": 1.6638, + "step": 36370 + }, + { + "epoch": 0.56, + "learning_rate": 1.997321643276617e-05, + "loss": 1.6726, + "step": 36380 + }, + { + "epoch": 0.56, + "learning_rate": 1.996128030652235e-05, + "loss": 1.6455, + "step": 36390 + }, + { + "epoch": 0.56, + "learning_rate": 1.9949345377387434e-05, + "loss": 1.662, + "step": 36400 + }, + { + "epoch": 0.56, + "learning_rate": 1.993741164819694e-05, + "loss": 1.6576, + "step": 36410 + }, + { + "epoch": 0.57, + "learning_rate": 1.992547912178613e-05, + "loss": 1.6539, + "step": 36420 + }, + { + "epoch": 0.57, + "learning_rate": 1.991354780098993e-05, + "loss": 1.6428, + "step": 36430 + }, + { + "epoch": 0.57, + "learning_rate": 1.9901617688643033e-05, + "loss": 1.7203, + "step": 36440 + }, + { + "epoch": 0.57, + "learning_rate": 1.9889688787579807e-05, + "loss": 1.7004, + "step": 36450 + }, + { + "epoch": 0.57, + "learning_rate": 1.987776110063433e-05, + "loss": 1.6572, + "step": 36460 + }, + { + "epoch": 0.57, + "learning_rate": 1.986583463064043e-05, + "loss": 1.6404, + "step": 36470 + }, + { + "epoch": 0.57, + "learning_rate": 1.9853909380431602e-05, + "loss": 1.6314, + "step": 36480 + }, + { + "epoch": 0.57, + "learning_rate": 1.9841985352841078e-05, + "loss": 1.6172, + "step": 36490 + }, + { + "epoch": 0.57, + "learning_rate": 1.983006255070179e-05, + "loss": 1.6235, + "step": 36500 + }, + { + "epoch": 0.57, + "learning_rate": 1.981814097684639e-05, + "loss": 1.6229, + "step": 36510 + }, + { + "epoch": 0.57, + "learning_rate": 1.9806220634107206e-05, + "loss": 1.6283, + "step": 36520 + }, + { + "epoch": 0.57, + "learning_rate": 1.9794301525316323e-05, + "loss": 1.6487, + "step": 36530 + }, + { + "epoch": 0.57, + "learning_rate": 1.9782383653305496e-05, + "loss": 1.6501, + "step": 36540 + }, + { + "epoch": 0.57, + "learning_rate": 1.9770467020906182e-05, + "loss": 1.6227, + "step": 36550 + }, + { + "epoch": 0.57, + "learning_rate": 1.9758551630949584e-05, + "loss": 1.6341, + "step": 36560 + }, + { + "epoch": 0.57, + "learning_rate": 1.974663748626657e-05, + "loss": 1.661, + "step": 36570 + }, + { + "epoch": 0.57, + "learning_rate": 1.9734724589687714e-05, + "loss": 1.6329, + "step": 36580 + }, + { + "epoch": 0.57, + "learning_rate": 1.972281294404333e-05, + "loss": 1.608, + "step": 36590 + }, + { + "epoch": 0.57, + "learning_rate": 1.9710902552163397e-05, + "loss": 1.6393, + "step": 36600 + }, + { + "epoch": 0.57, + "learning_rate": 1.96989934168776e-05, + "loss": 1.635, + "step": 36610 + }, + { + "epoch": 0.57, + "learning_rate": 1.9687085541015355e-05, + "loss": 1.633, + "step": 36620 + }, + { + "epoch": 0.57, + "learning_rate": 1.967517892740574e-05, + "loss": 1.6157, + "step": 36630 + }, + { + "epoch": 0.57, + "learning_rate": 1.9663273578877564e-05, + "loss": 1.6495, + "step": 36640 + }, + { + "epoch": 0.57, + "learning_rate": 1.9651369498259316e-05, + "loss": 1.6712, + "step": 36650 + }, + { + "epoch": 0.57, + "learning_rate": 1.9639466688379195e-05, + "loss": 1.6215, + "step": 36660 + }, + { + "epoch": 0.57, + "learning_rate": 1.9627565152065093e-05, + "loss": 1.644, + "step": 36670 + }, + { + "epoch": 0.57, + "learning_rate": 1.9615664892144608e-05, + "loss": 1.6315, + "step": 36680 + }, + { + "epoch": 0.57, + "learning_rate": 1.9603765911445003e-05, + "loss": 1.6257, + "step": 36690 + }, + { + "epoch": 0.57, + "learning_rate": 1.959186821279329e-05, + "loss": 1.6185, + "step": 36700 + }, + { + "epoch": 0.57, + "learning_rate": 1.957997179901614e-05, + "loss": 1.6329, + "step": 36710 + }, + { + "epoch": 0.57, + "learning_rate": 1.95680766729399e-05, + "loss": 1.6276, + "step": 36720 + }, + { + "epoch": 0.57, + "learning_rate": 1.9556182837390678e-05, + "loss": 1.6251, + "step": 36730 + }, + { + "epoch": 0.57, + "learning_rate": 1.9544290295194216e-05, + "loss": 1.6479, + "step": 36740 + }, + { + "epoch": 0.57, + "learning_rate": 1.953239904917595e-05, + "loss": 1.6345, + "step": 36750 + }, + { + "epoch": 0.57, + "learning_rate": 1.9520509102161056e-05, + "loss": 1.6258, + "step": 36760 + }, + { + "epoch": 0.57, + "learning_rate": 1.950862045697436e-05, + "loss": 1.6358, + "step": 36770 + }, + { + "epoch": 0.57, + "learning_rate": 1.9496733116440372e-05, + "loss": 1.649, + "step": 36780 + }, + { + "epoch": 0.57, + "learning_rate": 1.948484708338334e-05, + "loss": 1.6902, + "step": 36790 + }, + { + "epoch": 0.57, + "learning_rate": 1.947296236062714e-05, + "loss": 1.6752, + "step": 36800 + }, + { + "epoch": 0.57, + "learning_rate": 1.946107895099539e-05, + "loss": 1.662, + "step": 36810 + }, + { + "epoch": 0.57, + "learning_rate": 1.9449196857311373e-05, + "loss": 1.6664, + "step": 36820 + }, + { + "epoch": 0.57, + "learning_rate": 1.9437316082398052e-05, + "loss": 1.6244, + "step": 36830 + }, + { + "epoch": 0.57, + "learning_rate": 1.9425436629078082e-05, + "loss": 1.6154, + "step": 36840 + }, + { + "epoch": 0.57, + "learning_rate": 1.9413558500173818e-05, + "loss": 1.6371, + "step": 36850 + }, + { + "epoch": 0.57, + "learning_rate": 1.9401681698507284e-05, + "loss": 1.643, + "step": 36860 + }, + { + "epoch": 0.57, + "learning_rate": 1.9389806226900182e-05, + "loss": 1.6153, + "step": 36870 + }, + { + "epoch": 0.57, + "learning_rate": 1.9377932088173935e-05, + "loss": 1.6275, + "step": 36880 + }, + { + "epoch": 0.57, + "learning_rate": 1.9366059285149595e-05, + "loss": 1.6104, + "step": 36890 + }, + { + "epoch": 0.57, + "learning_rate": 1.9354187820647955e-05, + "loss": 1.6675, + "step": 36900 + }, + { + "epoch": 0.57, + "learning_rate": 1.9342317697489446e-05, + "loss": 1.6076, + "step": 36910 + }, + { + "epoch": 0.57, + "learning_rate": 1.933044891849418e-05, + "loss": 1.6242, + "step": 36920 + }, + { + "epoch": 0.57, + "learning_rate": 1.9318581486482e-05, + "loss": 1.619, + "step": 36930 + }, + { + "epoch": 0.57, + "learning_rate": 1.930671540427237e-05, + "loss": 1.6285, + "step": 36940 + }, + { + "epoch": 0.57, + "learning_rate": 1.9294850674684447e-05, + "loss": 1.6157, + "step": 36950 + }, + { + "epoch": 0.57, + "learning_rate": 1.9282987300537104e-05, + "loss": 1.6029, + "step": 36960 + }, + { + "epoch": 0.57, + "learning_rate": 1.9271125284648848e-05, + "loss": 1.6473, + "step": 36970 + }, + { + "epoch": 0.57, + "learning_rate": 1.9259264629837886e-05, + "loss": 1.6199, + "step": 36980 + }, + { + "epoch": 0.57, + "learning_rate": 1.9247405338922092e-05, + "loss": 1.6086, + "step": 36990 + }, + { + "epoch": 0.57, + "learning_rate": 1.9235547414719024e-05, + "loss": 1.6335, + "step": 37000 + }, + { + "epoch": 0.57, + "learning_rate": 1.92236908600459e-05, + "loss": 1.6311, + "step": 37010 + }, + { + "epoch": 0.57, + "learning_rate": 1.921183567771964e-05, + "loss": 1.618, + "step": 37020 + }, + { + "epoch": 0.57, + "learning_rate": 1.9199981870556815e-05, + "loss": 1.6124, + "step": 37030 + }, + { + "epoch": 0.57, + "learning_rate": 1.9188129441373662e-05, + "loss": 1.6852, + "step": 37040 + }, + { + "epoch": 0.57, + "learning_rate": 1.9176278392986124e-05, + "loss": 1.6135, + "step": 37050 + }, + { + "epoch": 0.57, + "learning_rate": 1.9164428728209792e-05, + "loss": 1.6176, + "step": 37060 + }, + { + "epoch": 0.58, + "learning_rate": 1.915258044985992e-05, + "loss": 1.6052, + "step": 37070 + }, + { + "epoch": 0.58, + "learning_rate": 1.9140733560751462e-05, + "loss": 1.6169, + "step": 37080 + }, + { + "epoch": 0.58, + "learning_rate": 1.9128888063699012e-05, + "loss": 1.5959, + "step": 37090 + }, + { + "epoch": 0.58, + "learning_rate": 1.911704396151685e-05, + "loss": 1.6304, + "step": 37100 + }, + { + "epoch": 0.58, + "learning_rate": 1.9105201257018928e-05, + "loss": 1.6042, + "step": 37110 + }, + { + "epoch": 0.58, + "learning_rate": 1.909335995301885e-05, + "loss": 1.6218, + "step": 37120 + }, + { + "epoch": 0.58, + "learning_rate": 1.90815200523299e-05, + "loss": 1.6279, + "step": 37130 + }, + { + "epoch": 0.58, + "learning_rate": 1.9069681557765032e-05, + "loss": 1.623, + "step": 37140 + }, + { + "epoch": 0.58, + "learning_rate": 1.9057844472136845e-05, + "loss": 1.5995, + "step": 37150 + }, + { + "epoch": 0.58, + "learning_rate": 1.9046008798257624e-05, + "loss": 1.6197, + "step": 37160 + }, + { + "epoch": 0.58, + "learning_rate": 1.9034174538939316e-05, + "loss": 1.6644, + "step": 37170 + }, + { + "epoch": 0.58, + "learning_rate": 1.9022341696993513e-05, + "loss": 1.6634, + "step": 37180 + }, + { + "epoch": 0.58, + "learning_rate": 1.901051027523151e-05, + "loss": 1.6782, + "step": 37190 + }, + { + "epoch": 0.58, + "learning_rate": 1.8998680276464215e-05, + "loss": 1.6734, + "step": 37200 + }, + { + "epoch": 0.58, + "learning_rate": 1.8986851703502227e-05, + "loss": 1.6527, + "step": 37210 + }, + { + "epoch": 0.58, + "learning_rate": 1.8975024559155813e-05, + "loss": 1.6456, + "step": 37220 + }, + { + "epoch": 0.58, + "learning_rate": 1.8963198846234882e-05, + "loss": 1.6312, + "step": 37230 + }, + { + "epoch": 0.58, + "learning_rate": 1.8951374567549e-05, + "loss": 1.6193, + "step": 37240 + }, + { + "epoch": 0.58, + "learning_rate": 1.8939551725907426e-05, + "loss": 1.625, + "step": 37250 + }, + { + "epoch": 0.58, + "learning_rate": 1.892773032411903e-05, + "loss": 1.6118, + "step": 37260 + }, + { + "epoch": 0.58, + "learning_rate": 1.891591036499238e-05, + "loss": 1.6162, + "step": 37270 + }, + { + "epoch": 0.58, + "learning_rate": 1.8904091851335674e-05, + "loss": 1.632, + "step": 37280 + }, + { + "epoch": 0.58, + "learning_rate": 1.889227478595679e-05, + "loss": 1.6279, + "step": 37290 + }, + { + "epoch": 0.58, + "learning_rate": 1.8880459171663235e-05, + "loss": 1.6107, + "step": 37300 + }, + { + "epoch": 0.58, + "learning_rate": 1.8868645011262198e-05, + "loss": 1.6299, + "step": 37310 + }, + { + "epoch": 0.58, + "learning_rate": 1.885683230756051e-05, + "loss": 1.6124, + "step": 37320 + }, + { + "epoch": 0.58, + "learning_rate": 1.8845021063364638e-05, + "loss": 1.612, + "step": 37330 + }, + { + "epoch": 0.58, + "learning_rate": 1.883321128148075e-05, + "loss": 1.6124, + "step": 37340 + }, + { + "epoch": 0.58, + "learning_rate": 1.8821402964714607e-05, + "loss": 1.6117, + "step": 37350 + }, + { + "epoch": 0.58, + "learning_rate": 1.8809596115871677e-05, + "loss": 1.5816, + "step": 37360 + }, + { + "epoch": 0.58, + "learning_rate": 1.8797790737757045e-05, + "loss": 1.6274, + "step": 37370 + }, + { + "epoch": 0.58, + "learning_rate": 1.8785986833175445e-05, + "loss": 1.6172, + "step": 37380 + }, + { + "epoch": 0.58, + "learning_rate": 1.8774184404931294e-05, + "loss": 1.621, + "step": 37390 + }, + { + "epoch": 0.58, + "learning_rate": 1.876238345582862e-05, + "loss": 1.6078, + "step": 37400 + }, + { + "epoch": 0.58, + "learning_rate": 1.875058398867111e-05, + "loss": 1.6112, + "step": 37410 + }, + { + "epoch": 0.58, + "learning_rate": 1.8738786006262127e-05, + "loss": 1.6331, + "step": 37420 + }, + { + "epoch": 0.58, + "learning_rate": 1.872698951140464e-05, + "loss": 1.6103, + "step": 37430 + }, + { + "epoch": 0.58, + "learning_rate": 1.8715194506901287e-05, + "loss": 1.6117, + "step": 37440 + }, + { + "epoch": 0.58, + "learning_rate": 1.870340099555435e-05, + "loss": 1.614, + "step": 37450 + }, + { + "epoch": 0.58, + "learning_rate": 1.8691608980165765e-05, + "loss": 1.592, + "step": 37460 + }, + { + "epoch": 0.58, + "learning_rate": 1.8679818463537076e-05, + "loss": 1.602, + "step": 37470 + }, + { + "epoch": 0.58, + "learning_rate": 1.8668029448469526e-05, + "loss": 1.6005, + "step": 37480 + }, + { + "epoch": 0.58, + "learning_rate": 1.8656241937763964e-05, + "loss": 1.5889, + "step": 37490 + }, + { + "epoch": 0.58, + "learning_rate": 1.864445593422087e-05, + "loss": 1.6192, + "step": 37500 + }, + { + "epoch": 0.58, + "learning_rate": 1.863267144064041e-05, + "loss": 1.6034, + "step": 37510 + }, + { + "epoch": 0.58, + "learning_rate": 1.862088845982236e-05, + "loss": 1.6003, + "step": 37520 + }, + { + "epoch": 0.58, + "learning_rate": 1.8609106994566135e-05, + "loss": 1.6116, + "step": 37530 + }, + { + "epoch": 0.58, + "learning_rate": 1.8597327047670814e-05, + "loss": 1.6345, + "step": 37540 + }, + { + "epoch": 0.58, + "learning_rate": 1.8585548621935088e-05, + "loss": 1.6368, + "step": 37550 + }, + { + "epoch": 0.58, + "learning_rate": 1.8573771720157297e-05, + "loss": 1.6586, + "step": 37560 + }, + { + "epoch": 0.58, + "learning_rate": 1.8561996345135435e-05, + "loss": 1.6127, + "step": 37570 + }, + { + "epoch": 0.58, + "learning_rate": 1.8550222499667098e-05, + "loss": 1.6258, + "step": 37580 + }, + { + "epoch": 0.58, + "learning_rate": 1.853845018654956e-05, + "loss": 1.6294, + "step": 37590 + }, + { + "epoch": 0.58, + "learning_rate": 1.8526679408579697e-05, + "loss": 1.6193, + "step": 37600 + }, + { + "epoch": 0.58, + "learning_rate": 1.851491016855404e-05, + "loss": 1.6417, + "step": 37610 + }, + { + "epoch": 0.58, + "learning_rate": 1.850314246926874e-05, + "loss": 1.5921, + "step": 37620 + }, + { + "epoch": 0.58, + "learning_rate": 1.8491376313519604e-05, + "loss": 1.6261, + "step": 37630 + }, + { + "epoch": 0.58, + "learning_rate": 1.8479611704102038e-05, + "loss": 1.635, + "step": 37640 + }, + { + "epoch": 0.58, + "learning_rate": 1.8467848643811123e-05, + "loss": 1.6165, + "step": 37650 + }, + { + "epoch": 0.58, + "learning_rate": 1.845608713544154e-05, + "loss": 1.6248, + "step": 37660 + }, + { + "epoch": 0.58, + "learning_rate": 1.8444327181787602e-05, + "loss": 1.6395, + "step": 37670 + }, + { + "epoch": 0.58, + "learning_rate": 1.843256878564328e-05, + "loss": 1.6479, + "step": 37680 + }, + { + "epoch": 0.58, + "learning_rate": 1.842081194980215e-05, + "loss": 1.687, + "step": 37690 + }, + { + "epoch": 0.58, + "learning_rate": 1.840905667705741e-05, + "loss": 1.6954, + "step": 37700 + }, + { + "epoch": 0.59, + "learning_rate": 1.839730297020192e-05, + "loss": 1.7003, + "step": 37710 + }, + { + "epoch": 0.59, + "learning_rate": 1.8385550832028144e-05, + "loss": 1.6894, + "step": 37720 + }, + { + "epoch": 0.59, + "learning_rate": 1.837380026532816e-05, + "loss": 1.7122, + "step": 37730 + }, + { + "epoch": 0.59, + "learning_rate": 1.8362051272893725e-05, + "loss": 1.6924, + "step": 37740 + }, + { + "epoch": 0.59, + "learning_rate": 1.835030385751616e-05, + "loss": 1.7223, + "step": 37750 + }, + { + "epoch": 0.59, + "learning_rate": 1.8338558021986447e-05, + "loss": 1.7023, + "step": 37760 + }, + { + "epoch": 0.59, + "learning_rate": 1.8326813769095187e-05, + "loss": 1.7259, + "step": 37770 + }, + { + "epoch": 0.59, + "learning_rate": 1.8315071101632604e-05, + "loss": 1.698, + "step": 37780 + }, + { + "epoch": 0.59, + "learning_rate": 1.8303330022388528e-05, + "loss": 1.7212, + "step": 37790 + }, + { + "epoch": 0.59, + "learning_rate": 1.8291590534152457e-05, + "loss": 1.6951, + "step": 37800 + }, + { + "epoch": 0.59, + "learning_rate": 1.827985263971345e-05, + "loss": 1.6868, + "step": 37810 + }, + { + "epoch": 0.59, + "learning_rate": 1.8268116341860247e-05, + "loss": 1.6604, + "step": 37820 + }, + { + "epoch": 0.59, + "learning_rate": 1.8256381643381166e-05, + "loss": 1.6979, + "step": 37830 + }, + { + "epoch": 0.59, + "learning_rate": 1.8244648547064153e-05, + "loss": 1.6589, + "step": 37840 + }, + { + "epoch": 0.59, + "learning_rate": 1.82329170556968e-05, + "loss": 1.6791, + "step": 37850 + }, + { + "epoch": 0.59, + "learning_rate": 1.8221187172066285e-05, + "loss": 1.6811, + "step": 37860 + }, + { + "epoch": 0.59, + "learning_rate": 1.820945889895941e-05, + "loss": 1.6398, + "step": 37870 + }, + { + "epoch": 0.59, + "learning_rate": 1.8197732239162613e-05, + "loss": 1.5977, + "step": 37880 + }, + { + "epoch": 0.59, + "learning_rate": 1.8186007195461937e-05, + "loss": 1.6212, + "step": 37890 + }, + { + "epoch": 0.59, + "learning_rate": 1.8174283770643027e-05, + "loss": 1.5862, + "step": 37900 + }, + { + "epoch": 0.59, + "learning_rate": 1.8162561967491178e-05, + "loss": 1.5805, + "step": 37910 + }, + { + "epoch": 0.59, + "learning_rate": 1.815084178879126e-05, + "loss": 1.6151, + "step": 37920 + }, + { + "epoch": 0.59, + "learning_rate": 1.8139123237327787e-05, + "loss": 1.7078, + "step": 37930 + }, + { + "epoch": 0.59, + "learning_rate": 1.8127406315884873e-05, + "loss": 1.6221, + "step": 37940 + }, + { + "epoch": 0.59, + "learning_rate": 1.811569102724625e-05, + "loss": 1.6135, + "step": 37950 + }, + { + "epoch": 0.59, + "learning_rate": 1.8103977374195248e-05, + "loss": 1.6406, + "step": 37960 + }, + { + "epoch": 0.59, + "learning_rate": 1.8092265359514837e-05, + "loss": 1.6536, + "step": 37970 + }, + { + "epoch": 0.59, + "learning_rate": 1.808055498598757e-05, + "loss": 1.614, + "step": 37980 + }, + { + "epoch": 0.59, + "learning_rate": 1.806884625639561e-05, + "loss": 1.6013, + "step": 37990 + }, + { + "epoch": 0.59, + "learning_rate": 1.8057139173520764e-05, + "loss": 1.6079, + "step": 38000 + }, + { + "epoch": 0.59, + "learning_rate": 1.804543374014441e-05, + "loss": 1.6078, + "step": 38010 + }, + { + "epoch": 0.59, + "learning_rate": 1.8033729959047545e-05, + "loss": 1.5776, + "step": 38020 + }, + { + "epoch": 0.59, + "learning_rate": 1.8022027833010786e-05, + "loss": 1.6088, + "step": 38030 + }, + { + "epoch": 0.59, + "learning_rate": 1.801032736481433e-05, + "loss": 1.6384, + "step": 38040 + }, + { + "epoch": 0.59, + "learning_rate": 1.7998628557238023e-05, + "loss": 1.6569, + "step": 38050 + }, + { + "epoch": 0.59, + "learning_rate": 1.798693141306127e-05, + "loss": 1.624, + "step": 38060 + }, + { + "epoch": 0.59, + "learning_rate": 1.7975235935063102e-05, + "loss": 1.6564, + "step": 38070 + }, + { + "epoch": 0.59, + "learning_rate": 1.796354212602217e-05, + "loss": 1.585, + "step": 38080 + }, + { + "epoch": 0.59, + "learning_rate": 1.7951849988716694e-05, + "loss": 1.5716, + "step": 38090 + }, + { + "epoch": 0.59, + "learning_rate": 1.7940159525924525e-05, + "loss": 1.6015, + "step": 38100 + }, + { + "epoch": 0.59, + "learning_rate": 1.79284707404231e-05, + "loss": 1.5994, + "step": 38110 + }, + { + "epoch": 0.59, + "learning_rate": 1.7916783634989475e-05, + "loss": 1.5848, + "step": 38120 + }, + { + "epoch": 0.59, + "learning_rate": 1.7905098212400274e-05, + "loss": 1.5871, + "step": 38130 + }, + { + "epoch": 0.59, + "learning_rate": 1.789341447543177e-05, + "loss": 1.5862, + "step": 38140 + }, + { + "epoch": 0.59, + "learning_rate": 1.7881732426859793e-05, + "loss": 1.588, + "step": 38150 + }, + { + "epoch": 0.59, + "learning_rate": 1.7870052069459777e-05, + "loss": 1.5747, + "step": 38160 + }, + { + "epoch": 0.59, + "learning_rate": 1.7858373406006786e-05, + "loss": 1.6165, + "step": 38170 + }, + { + "epoch": 0.59, + "learning_rate": 1.7846696439275453e-05, + "loss": 1.5906, + "step": 38180 + }, + { + "epoch": 0.59, + "learning_rate": 1.7835021172039997e-05, + "loss": 1.5845, + "step": 38190 + }, + { + "epoch": 0.59, + "learning_rate": 1.7823347607074274e-05, + "loss": 1.6178, + "step": 38200 + }, + { + "epoch": 0.59, + "learning_rate": 1.7811675747151702e-05, + "loss": 1.5876, + "step": 38210 + }, + { + "epoch": 0.59, + "learning_rate": 1.78000055950453e-05, + "loss": 1.6362, + "step": 38220 + }, + { + "epoch": 0.59, + "learning_rate": 1.7788337153527695e-05, + "loss": 1.6062, + "step": 38230 + }, + { + "epoch": 0.59, + "learning_rate": 1.77766704253711e-05, + "loss": 1.636, + "step": 38240 + }, + { + "epoch": 0.59, + "learning_rate": 1.77650054133473e-05, + "loss": 1.6348, + "step": 38250 + }, + { + "epoch": 0.59, + "learning_rate": 1.775334212022772e-05, + "loss": 1.6208, + "step": 38260 + }, + { + "epoch": 0.59, + "learning_rate": 1.7741680548783328e-05, + "loss": 1.623, + "step": 38270 + }, + { + "epoch": 0.59, + "learning_rate": 1.7730020701784706e-05, + "loss": 1.5966, + "step": 38280 + }, + { + "epoch": 0.59, + "learning_rate": 1.771836258200203e-05, + "loss": 1.5977, + "step": 38290 + }, + { + "epoch": 0.59, + "learning_rate": 1.7706706192205044e-05, + "loss": 1.5994, + "step": 38300 + }, + { + "epoch": 0.59, + "learning_rate": 1.7695051535163116e-05, + "loss": 1.5909, + "step": 38310 + }, + { + "epoch": 0.59, + "learning_rate": 1.7683398613645174e-05, + "loss": 1.5842, + "step": 38320 + }, + { + "epoch": 0.59, + "learning_rate": 1.7671747430419732e-05, + "loss": 1.5904, + "step": 38330 + }, + { + "epoch": 0.59, + "learning_rate": 1.766009798825492e-05, + "loss": 1.5989, + "step": 38340 + }, + { + "epoch": 0.6, + "learning_rate": 1.764845028991842e-05, + "loss": 1.6016, + "step": 38350 + }, + { + "epoch": 0.6, + "learning_rate": 1.7636804338177516e-05, + "loss": 1.577, + "step": 38360 + }, + { + "epoch": 0.6, + "learning_rate": 1.7625160135799086e-05, + "loss": 1.5831, + "step": 38370 + }, + { + "epoch": 0.6, + "learning_rate": 1.761351768554957e-05, + "loss": 1.5808, + "step": 38380 + }, + { + "epoch": 0.6, + "learning_rate": 1.7601876990195016e-05, + "loss": 1.6154, + "step": 38390 + }, + { + "epoch": 0.6, + "learning_rate": 1.7590238052501034e-05, + "loss": 1.5626, + "step": 38400 + }, + { + "epoch": 0.6, + "learning_rate": 1.7578600875232837e-05, + "loss": 1.5827, + "step": 38410 + }, + { + "epoch": 0.6, + "learning_rate": 1.756696546115519e-05, + "loss": 1.5814, + "step": 38420 + }, + { + "epoch": 0.6, + "learning_rate": 1.7555331813032478e-05, + "loss": 1.5946, + "step": 38430 + }, + { + "epoch": 0.6, + "learning_rate": 1.7543699933628632e-05, + "loss": 1.5841, + "step": 38440 + }, + { + "epoch": 0.6, + "learning_rate": 1.7532069825707175e-05, + "loss": 1.6331, + "step": 38450 + }, + { + "epoch": 0.6, + "learning_rate": 1.752044149203123e-05, + "loss": 1.6097, + "step": 38460 + }, + { + "epoch": 0.6, + "learning_rate": 1.750881493536346e-05, + "loss": 1.6138, + "step": 38470 + }, + { + "epoch": 0.6, + "learning_rate": 1.7497190158466127e-05, + "loss": 1.588, + "step": 38480 + }, + { + "epoch": 0.6, + "learning_rate": 1.748556716410108e-05, + "loss": 1.5934, + "step": 38490 + }, + { + "epoch": 0.6, + "learning_rate": 1.7473945955029714e-05, + "loss": 1.5615, + "step": 38500 + }, + { + "epoch": 0.6, + "learning_rate": 1.746232653401304e-05, + "loss": 1.5744, + "step": 38510 + }, + { + "epoch": 0.6, + "learning_rate": 1.7450708903811616e-05, + "loss": 1.6236, + "step": 38520 + }, + { + "epoch": 0.6, + "learning_rate": 1.743909306718557e-05, + "loss": 1.5934, + "step": 38530 + }, + { + "epoch": 0.6, + "learning_rate": 1.742747902689463e-05, + "loss": 1.5735, + "step": 38540 + }, + { + "epoch": 0.6, + "learning_rate": 1.741586678569807e-05, + "loss": 1.5778, + "step": 38550 + }, + { + "epoch": 0.6, + "learning_rate": 1.7404256346354755e-05, + "loss": 1.5866, + "step": 38560 + }, + { + "epoch": 0.6, + "learning_rate": 1.7392647711623118e-05, + "loss": 1.598, + "step": 38570 + }, + { + "epoch": 0.6, + "learning_rate": 1.738104088426116e-05, + "loss": 1.6246, + "step": 38580 + }, + { + "epoch": 0.6, + "learning_rate": 1.736943586702645e-05, + "loss": 1.6181, + "step": 38590 + }, + { + "epoch": 0.6, + "learning_rate": 1.735783266267614e-05, + "loss": 1.6335, + "step": 38600 + }, + { + "epoch": 0.6, + "learning_rate": 1.7346231273966937e-05, + "loss": 1.6548, + "step": 38610 + }, + { + "epoch": 0.6, + "learning_rate": 1.733463170365511e-05, + "loss": 1.6111, + "step": 38620 + }, + { + "epoch": 0.6, + "learning_rate": 1.732303395449653e-05, + "loss": 1.6206, + "step": 38630 + }, + { + "epoch": 0.6, + "learning_rate": 1.7311438029246596e-05, + "loss": 1.5805, + "step": 38640 + }, + { + "epoch": 0.6, + "learning_rate": 1.729984393066029e-05, + "loss": 1.5742, + "step": 38650 + }, + { + "epoch": 0.6, + "learning_rate": 1.7288251661492177e-05, + "loss": 1.568, + "step": 38660 + }, + { + "epoch": 0.6, + "learning_rate": 1.727666122449636e-05, + "loss": 1.569, + "step": 38670 + }, + { + "epoch": 0.6, + "learning_rate": 1.7265072622426507e-05, + "loss": 1.5819, + "step": 38680 + }, + { + "epoch": 0.6, + "learning_rate": 1.7253485858035884e-05, + "loss": 1.6272, + "step": 38690 + }, + { + "epoch": 0.6, + "learning_rate": 1.7241900934077277e-05, + "loss": 1.5797, + "step": 38700 + }, + { + "epoch": 0.6, + "learning_rate": 1.7230317853303064e-05, + "loss": 1.5887, + "step": 38710 + }, + { + "epoch": 0.6, + "learning_rate": 1.721873661846518e-05, + "loss": 1.5757, + "step": 38720 + }, + { + "epoch": 0.6, + "learning_rate": 1.720715723231511e-05, + "loss": 1.5795, + "step": 38730 + }, + { + "epoch": 0.6, + "learning_rate": 1.719557969760391e-05, + "loss": 1.5713, + "step": 38740 + }, + { + "epoch": 0.6, + "learning_rate": 1.7184004017082196e-05, + "loss": 1.6052, + "step": 38750 + }, + { + "epoch": 0.6, + "learning_rate": 1.717243019350013e-05, + "loss": 1.557, + "step": 38760 + }, + { + "epoch": 0.6, + "learning_rate": 1.7160858229607463e-05, + "loss": 1.5745, + "step": 38770 + }, + { + "epoch": 0.6, + "learning_rate": 1.7149288128153472e-05, + "loss": 1.5799, + "step": 38780 + }, + { + "epoch": 0.6, + "learning_rate": 1.7137719891887e-05, + "loss": 1.5643, + "step": 38790 + }, + { + "epoch": 0.6, + "learning_rate": 1.7126153523556466e-05, + "loss": 1.6055, + "step": 38800 + }, + { + "epoch": 0.6, + "learning_rate": 1.7114589025909823e-05, + "loss": 1.558, + "step": 38810 + }, + { + "epoch": 0.6, + "learning_rate": 1.7103026401694574e-05, + "loss": 1.5983, + "step": 38820 + }, + { + "epoch": 0.6, + "learning_rate": 1.7091465653657814e-05, + "loss": 1.5699, + "step": 38830 + }, + { + "epoch": 0.6, + "learning_rate": 1.7079906784546156e-05, + "loss": 1.5673, + "step": 38840 + }, + { + "epoch": 0.6, + "learning_rate": 1.706834979710577e-05, + "loss": 1.572, + "step": 38850 + }, + { + "epoch": 0.6, + "learning_rate": 1.7056794694082405e-05, + "loss": 1.6072, + "step": 38860 + }, + { + "epoch": 0.6, + "learning_rate": 1.704524147822133e-05, + "loss": 1.5643, + "step": 38870 + }, + { + "epoch": 0.6, + "learning_rate": 1.703369015226739e-05, + "loss": 1.5748, + "step": 38880 + }, + { + "epoch": 0.6, + "learning_rate": 1.702214071896497e-05, + "loss": 1.5863, + "step": 38890 + }, + { + "epoch": 0.6, + "learning_rate": 1.7010593181058012e-05, + "loss": 1.5839, + "step": 38900 + }, + { + "epoch": 0.6, + "learning_rate": 1.6999047541289987e-05, + "loss": 1.5741, + "step": 38910 + }, + { + "epoch": 0.6, + "learning_rate": 1.6987503802403948e-05, + "loss": 1.5978, + "step": 38920 + }, + { + "epoch": 0.6, + "learning_rate": 1.697596196714247e-05, + "loss": 1.6085, + "step": 38930 + }, + { + "epoch": 0.6, + "learning_rate": 1.6964422038247685e-05, + "loss": 1.632, + "step": 38940 + }, + { + "epoch": 0.6, + "learning_rate": 1.6952884018461277e-05, + "loss": 1.6179, + "step": 38950 + }, + { + "epoch": 0.6, + "learning_rate": 1.694134791052446e-05, + "loss": 1.6036, + "step": 38960 + }, + { + "epoch": 0.6, + "learning_rate": 1.6929813717178023e-05, + "loss": 1.5891, + "step": 38970 + }, + { + "epoch": 0.6, + "learning_rate": 1.691828144116227e-05, + "loss": 1.588, + "step": 38980 + }, + { + "epoch": 0.6, + "learning_rate": 1.690675108521706e-05, + "loss": 1.5756, + "step": 38990 + }, + { + "epoch": 0.61, + "learning_rate": 1.6895222652081803e-05, + "loss": 1.6013, + "step": 39000 + }, + { + "epoch": 0.61, + "learning_rate": 1.6883696144495454e-05, + "loss": 1.6076, + "step": 39010 + }, + { + "epoch": 0.61, + "learning_rate": 1.687217156519648e-05, + "loss": 1.5992, + "step": 39020 + }, + { + "epoch": 0.61, + "learning_rate": 1.686064891692294e-05, + "loss": 1.5968, + "step": 39030 + }, + { + "epoch": 0.61, + "learning_rate": 1.684912820241239e-05, + "loss": 1.6165, + "step": 39040 + }, + { + "epoch": 0.61, + "learning_rate": 1.6837609424401953e-05, + "loss": 1.6549, + "step": 39050 + }, + { + "epoch": 0.61, + "learning_rate": 1.6826092585628273e-05, + "loss": 1.6706, + "step": 39060 + }, + { + "epoch": 0.61, + "learning_rate": 1.6814577688827556e-05, + "loss": 1.6727, + "step": 39070 + }, + { + "epoch": 0.61, + "learning_rate": 1.680306473673552e-05, + "loss": 1.6914, + "step": 39080 + }, + { + "epoch": 0.61, + "learning_rate": 1.6791553732087447e-05, + "loss": 1.6731, + "step": 39090 + }, + { + "epoch": 0.61, + "learning_rate": 1.6780044677618144e-05, + "loss": 1.7149, + "step": 39100 + }, + { + "epoch": 0.61, + "learning_rate": 1.676853757606194e-05, + "loss": 1.664, + "step": 39110 + }, + { + "epoch": 0.61, + "learning_rate": 1.675703243015273e-05, + "loss": 1.6827, + "step": 39120 + }, + { + "epoch": 0.61, + "learning_rate": 1.6745529242623924e-05, + "loss": 1.6748, + "step": 39130 + }, + { + "epoch": 0.61, + "learning_rate": 1.6734028016208463e-05, + "loss": 1.6684, + "step": 39140 + }, + { + "epoch": 0.61, + "learning_rate": 1.672252875363885e-05, + "loss": 1.6592, + "step": 39150 + }, + { + "epoch": 0.61, + "learning_rate": 1.6711031457647088e-05, + "loss": 1.6551, + "step": 39160 + }, + { + "epoch": 0.61, + "learning_rate": 1.6699536130964724e-05, + "loss": 1.6744, + "step": 39170 + }, + { + "epoch": 0.61, + "learning_rate": 1.6688042776322855e-05, + "loss": 1.6602, + "step": 39180 + }, + { + "epoch": 0.61, + "learning_rate": 1.667655139645208e-05, + "loss": 1.6684, + "step": 39190 + }, + { + "epoch": 0.61, + "learning_rate": 1.666506199408256e-05, + "loss": 1.6067, + "step": 39200 + }, + { + "epoch": 0.61, + "learning_rate": 1.6653574571943952e-05, + "loss": 1.5655, + "step": 39210 + }, + { + "epoch": 0.61, + "learning_rate": 1.664208913276547e-05, + "loss": 1.571, + "step": 39220 + }, + { + "epoch": 0.61, + "learning_rate": 1.6630605679275846e-05, + "loss": 1.5416, + "step": 39230 + }, + { + "epoch": 0.61, + "learning_rate": 1.6619124214203347e-05, + "loss": 1.573, + "step": 39240 + }, + { + "epoch": 0.61, + "learning_rate": 1.6607644740275744e-05, + "loss": 1.5577, + "step": 39250 + }, + { + "epoch": 0.61, + "learning_rate": 1.6596167260220375e-05, + "loss": 1.5479, + "step": 39260 + }, + { + "epoch": 0.61, + "learning_rate": 1.6584691776764072e-05, + "loss": 1.6363, + "step": 39270 + }, + { + "epoch": 0.61, + "learning_rate": 1.65732182926332e-05, + "loss": 1.5497, + "step": 39280 + }, + { + "epoch": 0.61, + "learning_rate": 1.656174681055365e-05, + "loss": 1.5819, + "step": 39290 + }, + { + "epoch": 0.61, + "learning_rate": 1.6550277333250856e-05, + "loss": 1.5544, + "step": 39300 + }, + { + "epoch": 0.61, + "learning_rate": 1.6538809863449733e-05, + "loss": 1.6114, + "step": 39310 + }, + { + "epoch": 0.61, + "learning_rate": 1.6527344403874773e-05, + "loss": 1.5593, + "step": 39320 + }, + { + "epoch": 0.61, + "learning_rate": 1.651588095724994e-05, + "loss": 1.5932, + "step": 39330 + }, + { + "epoch": 0.61, + "learning_rate": 1.6504419526298747e-05, + "loss": 1.5799, + "step": 39340 + }, + { + "epoch": 0.61, + "learning_rate": 1.6492960113744233e-05, + "loss": 1.6312, + "step": 39350 + }, + { + "epoch": 0.61, + "learning_rate": 1.6481502722308945e-05, + "loss": 1.5758, + "step": 39360 + }, + { + "epoch": 0.61, + "learning_rate": 1.6470047354714933e-05, + "loss": 1.5695, + "step": 39370 + }, + { + "epoch": 0.61, + "learning_rate": 1.645859401368382e-05, + "loss": 1.5225, + "step": 39380 + }, + { + "epoch": 0.61, + "learning_rate": 1.644714270193669e-05, + "loss": 1.5564, + "step": 39390 + }, + { + "epoch": 0.61, + "learning_rate": 1.6435693422194164e-05, + "loss": 1.6017, + "step": 39400 + }, + { + "epoch": 0.61, + "learning_rate": 1.6424246177176406e-05, + "loss": 1.6071, + "step": 39410 + }, + { + "epoch": 0.61, + "learning_rate": 1.641280096960305e-05, + "loss": 1.6136, + "step": 39420 + }, + { + "epoch": 0.61, + "learning_rate": 1.640135780219329e-05, + "loss": 1.614, + "step": 39430 + }, + { + "epoch": 0.61, + "learning_rate": 1.6389916677665813e-05, + "loss": 1.5882, + "step": 39440 + }, + { + "epoch": 0.61, + "learning_rate": 1.6378477598738808e-05, + "loss": 1.6441, + "step": 39450 + }, + { + "epoch": 0.61, + "learning_rate": 1.636704056813002e-05, + "loss": 1.5937, + "step": 39460 + }, + { + "epoch": 0.61, + "learning_rate": 1.6355605588556663e-05, + "loss": 1.6104, + "step": 39470 + }, + { + "epoch": 0.61, + "learning_rate": 1.6344172662735476e-05, + "loss": 1.6768, + "step": 39480 + }, + { + "epoch": 0.61, + "learning_rate": 1.633274179338274e-05, + "loss": 1.5862, + "step": 39490 + }, + { + "epoch": 0.61, + "learning_rate": 1.63213129832142e-05, + "loss": 1.5866, + "step": 39500 + }, + { + "epoch": 0.61, + "learning_rate": 1.6309886234945147e-05, + "loss": 1.534, + "step": 39510 + }, + { + "epoch": 0.61, + "learning_rate": 1.6298461551290364e-05, + "loss": 1.5384, + "step": 39520 + }, + { + "epoch": 0.61, + "learning_rate": 1.628703893496416e-05, + "loss": 1.5564, + "step": 39530 + }, + { + "epoch": 0.61, + "learning_rate": 1.627561838868033e-05, + "loss": 1.5615, + "step": 39540 + }, + { + "epoch": 0.61, + "learning_rate": 1.62641999151522e-05, + "loss": 1.5446, + "step": 39550 + }, + { + "epoch": 0.61, + "learning_rate": 1.6252783517092592e-05, + "loss": 1.5877, + "step": 39560 + }, + { + "epoch": 0.61, + "learning_rate": 1.6241369197213822e-05, + "loss": 1.5591, + "step": 39570 + }, + { + "epoch": 0.61, + "learning_rate": 1.6229956958227748e-05, + "loss": 1.5765, + "step": 39580 + }, + { + "epoch": 0.61, + "learning_rate": 1.6218546802845708e-05, + "loss": 1.5556, + "step": 39590 + }, + { + "epoch": 0.61, + "learning_rate": 1.6207138733778527e-05, + "loss": 1.5382, + "step": 39600 + }, + { + "epoch": 0.61, + "learning_rate": 1.619573275373658e-05, + "loss": 1.599, + "step": 39610 + }, + { + "epoch": 0.61, + "learning_rate": 1.618432886542972e-05, + "loss": 1.567, + "step": 39620 + }, + { + "epoch": 0.61, + "learning_rate": 1.617292707156729e-05, + "loss": 1.5705, + "step": 39630 + }, + { + "epoch": 0.62, + "learning_rate": 1.6161527374858166e-05, + "loss": 1.5521, + "step": 39640 + }, + { + "epoch": 0.62, + "learning_rate": 1.6150129778010698e-05, + "loss": 1.5673, + "step": 39650 + }, + { + "epoch": 0.62, + "learning_rate": 1.6138734283732766e-05, + "loss": 1.5601, + "step": 39660 + }, + { + "epoch": 0.62, + "learning_rate": 1.6127340894731717e-05, + "loss": 1.5915, + "step": 39670 + }, + { + "epoch": 0.62, + "learning_rate": 1.6115949613714422e-05, + "loss": 1.6511, + "step": 39680 + }, + { + "epoch": 0.62, + "learning_rate": 1.6104560443387245e-05, + "loss": 1.5667, + "step": 39690 + }, + { + "epoch": 0.62, + "learning_rate": 1.609317338645605e-05, + "loss": 1.5551, + "step": 39700 + }, + { + "epoch": 0.62, + "learning_rate": 1.608178844562618e-05, + "loss": 1.5782, + "step": 39710 + }, + { + "epoch": 0.62, + "learning_rate": 1.607040562360252e-05, + "loss": 1.6231, + "step": 39720 + }, + { + "epoch": 0.62, + "learning_rate": 1.60590249230894e-05, + "loss": 1.6085, + "step": 39730 + }, + { + "epoch": 0.62, + "learning_rate": 1.604764634679067e-05, + "loss": 1.5623, + "step": 39740 + }, + { + "epoch": 0.62, + "learning_rate": 1.603626989740969e-05, + "loss": 1.5714, + "step": 39750 + }, + { + "epoch": 0.62, + "learning_rate": 1.602489557764929e-05, + "loss": 1.5566, + "step": 39760 + }, + { + "epoch": 0.62, + "learning_rate": 1.6013523390211793e-05, + "loss": 1.5595, + "step": 39770 + }, + { + "epoch": 0.62, + "learning_rate": 1.6002153337799045e-05, + "loss": 1.6104, + "step": 39780 + }, + { + "epoch": 0.62, + "learning_rate": 1.5990785423112356e-05, + "loss": 1.5597, + "step": 39790 + }, + { + "epoch": 0.62, + "learning_rate": 1.5979419648852527e-05, + "loss": 1.5618, + "step": 39800 + }, + { + "epoch": 0.62, + "learning_rate": 1.5968056017719885e-05, + "loss": 1.526, + "step": 39810 + }, + { + "epoch": 0.62, + "learning_rate": 1.59566945324142e-05, + "loss": 1.557, + "step": 39820 + }, + { + "epoch": 0.62, + "learning_rate": 1.594533519563477e-05, + "loss": 1.571, + "step": 39830 + }, + { + "epoch": 0.62, + "learning_rate": 1.593397801008036e-05, + "loss": 1.5794, + "step": 39840 + }, + { + "epoch": 0.62, + "learning_rate": 1.5922622978449248e-05, + "loss": 1.5606, + "step": 39850 + }, + { + "epoch": 0.62, + "learning_rate": 1.5911270103439158e-05, + "loss": 1.5906, + "step": 39860 + }, + { + "epoch": 0.62, + "learning_rate": 1.589991938774736e-05, + "loss": 1.5902, + "step": 39870 + }, + { + "epoch": 0.62, + "learning_rate": 1.5888570834070548e-05, + "loss": 1.6501, + "step": 39880 + }, + { + "epoch": 0.62, + "learning_rate": 1.587722444510496e-05, + "loss": 1.6391, + "step": 39890 + }, + { + "epoch": 0.62, + "learning_rate": 1.5865880223546288e-05, + "loss": 1.5878, + "step": 39900 + }, + { + "epoch": 0.62, + "learning_rate": 1.585453817208969e-05, + "loss": 1.5549, + "step": 39910 + }, + { + "epoch": 0.62, + "learning_rate": 1.5843198293429867e-05, + "loss": 1.5613, + "step": 39920 + }, + { + "epoch": 0.62, + "learning_rate": 1.5831860590260954e-05, + "loss": 1.559, + "step": 39930 + }, + { + "epoch": 0.62, + "learning_rate": 1.5820525065276574e-05, + "loss": 1.5626, + "step": 39940 + }, + { + "epoch": 0.62, + "learning_rate": 1.5809191721169865e-05, + "loss": 1.5549, + "step": 39950 + }, + { + "epoch": 0.62, + "learning_rate": 1.5797860560633412e-05, + "loss": 1.5813, + "step": 39960 + }, + { + "epoch": 0.62, + "learning_rate": 1.5786531586359286e-05, + "loss": 1.5855, + "step": 39970 + }, + { + "epoch": 0.62, + "learning_rate": 1.5775204801039074e-05, + "loss": 1.5706, + "step": 39980 + }, + { + "epoch": 0.62, + "learning_rate": 1.5763880207363788e-05, + "loss": 1.554, + "step": 39990 + }, + { + "epoch": 0.62, + "learning_rate": 1.575255780802396e-05, + "loss": 1.5782, + "step": 40000 + }, + { + "epoch": 0.62, + "eval_loss": 1.5893032550811768, + "eval_runtime": 82.1174, + "eval_samples_per_second": 36.533, + "eval_steps_per_second": 4.567, + "step": 40000 + }, + { + "epoch": 0.62, + "learning_rate": 1.5741237605709587e-05, + "loss": 1.5483, + "step": 40010 + }, + { + "epoch": 0.62, + "learning_rate": 1.572991960311015e-05, + "loss": 1.6209, + "step": 40020 + }, + { + "epoch": 0.62, + "learning_rate": 1.571860380291459e-05, + "loss": 1.5402, + "step": 40030 + }, + { + "epoch": 0.62, + "learning_rate": 1.570729020781135e-05, + "loss": 1.5681, + "step": 40040 + }, + { + "epoch": 0.62, + "learning_rate": 1.5695978820488327e-05, + "loss": 1.5641, + "step": 40050 + }, + { + "epoch": 0.62, + "learning_rate": 1.5684669643632897e-05, + "loss": 1.5574, + "step": 40060 + }, + { + "epoch": 0.62, + "learning_rate": 1.5673362679931935e-05, + "loss": 1.584, + "step": 40070 + }, + { + "epoch": 0.62, + "learning_rate": 1.5662057932071768e-05, + "loss": 1.5969, + "step": 40080 + }, + { + "epoch": 0.62, + "learning_rate": 1.5650755402738172e-05, + "loss": 1.5696, + "step": 40090 + }, + { + "epoch": 0.62, + "learning_rate": 1.5639455094616462e-05, + "loss": 1.5702, + "step": 40100 + }, + { + "epoch": 0.62, + "learning_rate": 1.5628157010391363e-05, + "loss": 1.5483, + "step": 40110 + }, + { + "epoch": 0.62, + "learning_rate": 1.5616861152747108e-05, + "loss": 1.5312, + "step": 40120 + }, + { + "epoch": 0.62, + "learning_rate": 1.5605567524367393e-05, + "loss": 1.5911, + "step": 40130 + }, + { + "epoch": 0.62, + "learning_rate": 1.5594276127935357e-05, + "loss": 1.5652, + "step": 40140 + }, + { + "epoch": 0.62, + "learning_rate": 1.5582986966133655e-05, + "loss": 1.5772, + "step": 40150 + }, + { + "epoch": 0.62, + "learning_rate": 1.5571700041644384e-05, + "loss": 1.5528, + "step": 40160 + }, + { + "epoch": 0.62, + "learning_rate": 1.5560415357149103e-05, + "loss": 1.5573, + "step": 40170 + }, + { + "epoch": 0.62, + "learning_rate": 1.554913291532886e-05, + "loss": 1.5711, + "step": 40180 + }, + { + "epoch": 0.62, + "learning_rate": 1.553785271886416e-05, + "loss": 1.5924, + "step": 40190 + }, + { + "epoch": 0.62, + "learning_rate": 1.552657477043496e-05, + "loss": 1.6108, + "step": 40200 + }, + { + "epoch": 0.62, + "learning_rate": 1.551529907272072e-05, + "loss": 1.6392, + "step": 40210 + }, + { + "epoch": 0.62, + "learning_rate": 1.550402562840033e-05, + "loss": 1.6086, + "step": 40220 + }, + { + "epoch": 0.62, + "learning_rate": 1.5492754440152146e-05, + "loss": 1.6047, + "step": 40230 + }, + { + "epoch": 0.62, + "learning_rate": 1.548148551065402e-05, + "loss": 1.555, + "step": 40240 + }, + { + "epoch": 0.62, + "learning_rate": 1.5470218842583238e-05, + "loss": 1.5682, + "step": 40250 + }, + { + "epoch": 0.62, + "learning_rate": 1.5458954438616547e-05, + "loss": 1.5691, + "step": 40260 + }, + { + "epoch": 0.62, + "learning_rate": 1.5447692301430182e-05, + "loss": 1.5594, + "step": 40270 + }, + { + "epoch": 0.62, + "learning_rate": 1.543643243369982e-05, + "loss": 1.5894, + "step": 40280 + }, + { + "epoch": 0.63, + "learning_rate": 1.5425174838100587e-05, + "loss": 1.5827, + "step": 40290 + }, + { + "epoch": 0.63, + "learning_rate": 1.5413919517307107e-05, + "loss": 1.5524, + "step": 40300 + }, + { + "epoch": 0.63, + "learning_rate": 1.5402666473993425e-05, + "loss": 1.5529, + "step": 40310 + }, + { + "epoch": 0.63, + "learning_rate": 1.5391415710833066e-05, + "loss": 1.5768, + "step": 40320 + }, + { + "epoch": 0.63, + "learning_rate": 1.538016723049901e-05, + "loss": 1.5348, + "step": 40330 + }, + { + "epoch": 0.63, + "learning_rate": 1.536892103566369e-05, + "loss": 1.5794, + "step": 40340 + }, + { + "epoch": 0.63, + "learning_rate": 1.5357677128999005e-05, + "loss": 1.5712, + "step": 40350 + }, + { + "epoch": 0.63, + "learning_rate": 1.534643551317631e-05, + "loss": 1.5625, + "step": 40360 + }, + { + "epoch": 0.63, + "learning_rate": 1.5335196190866384e-05, + "loss": 1.574, + "step": 40370 + }, + { + "epoch": 0.63, + "learning_rate": 1.5323959164739515e-05, + "loss": 1.5474, + "step": 40380 + }, + { + "epoch": 0.63, + "learning_rate": 1.5312724437465408e-05, + "loss": 1.5503, + "step": 40390 + }, + { + "epoch": 0.63, + "learning_rate": 1.530149201171322e-05, + "loss": 1.5434, + "step": 40400 + }, + { + "epoch": 0.63, + "learning_rate": 1.5290261890151595e-05, + "loss": 1.5673, + "step": 40410 + }, + { + "epoch": 0.63, + "learning_rate": 1.5279034075448593e-05, + "loss": 1.5651, + "step": 40420 + }, + { + "epoch": 0.63, + "learning_rate": 1.5267808570271733e-05, + "loss": 1.5404, + "step": 40430 + }, + { + "epoch": 0.63, + "learning_rate": 1.5256585377288014e-05, + "loss": 1.5651, + "step": 40440 + }, + { + "epoch": 0.63, + "learning_rate": 1.5245364499163844e-05, + "loss": 1.5476, + "step": 40450 + }, + { + "epoch": 0.63, + "learning_rate": 1.5234145938565115e-05, + "loss": 1.5626, + "step": 40460 + }, + { + "epoch": 0.63, + "learning_rate": 1.5222929698157146e-05, + "loss": 1.5785, + "step": 40470 + }, + { + "epoch": 0.63, + "learning_rate": 1.5211715780604718e-05, + "loss": 1.6041, + "step": 40480 + }, + { + "epoch": 0.63, + "learning_rate": 1.520050418857205e-05, + "loss": 1.6076, + "step": 40490 + }, + { + "epoch": 0.63, + "learning_rate": 1.5189294924722825e-05, + "loss": 1.5756, + "step": 40500 + }, + { + "epoch": 0.63, + "learning_rate": 1.5178087991720158e-05, + "loss": 1.5833, + "step": 40510 + }, + { + "epoch": 0.63, + "learning_rate": 1.51668833922266e-05, + "loss": 1.5662, + "step": 40520 + }, + { + "epoch": 0.63, + "learning_rate": 1.515568112890418e-05, + "loss": 1.5742, + "step": 40530 + }, + { + "epoch": 0.63, + "learning_rate": 1.5144481204414351e-05, + "loss": 1.5812, + "step": 40540 + }, + { + "epoch": 0.63, + "learning_rate": 1.5133283621417998e-05, + "loss": 1.5908, + "step": 40550 + }, + { + "epoch": 0.63, + "learning_rate": 1.5122088382575483e-05, + "loss": 1.5723, + "step": 40560 + }, + { + "epoch": 0.63, + "learning_rate": 1.5110895490546578e-05, + "loss": 1.5963, + "step": 40570 + }, + { + "epoch": 0.63, + "learning_rate": 1.5099704947990528e-05, + "loss": 1.6236, + "step": 40580 + }, + { + "epoch": 0.63, + "learning_rate": 1.5088516757565995e-05, + "loss": 1.6409, + "step": 40590 + }, + { + "epoch": 0.63, + "learning_rate": 1.5077330921931083e-05, + "loss": 1.6617, + "step": 40600 + }, + { + "epoch": 0.63, + "learning_rate": 1.5066147443743361e-05, + "loss": 1.6618, + "step": 40610 + }, + { + "epoch": 0.63, + "learning_rate": 1.5054966325659814e-05, + "loss": 1.6597, + "step": 40620 + }, + { + "epoch": 0.63, + "learning_rate": 1.504378757033687e-05, + "loss": 1.6585, + "step": 40630 + }, + { + "epoch": 0.63, + "learning_rate": 1.5032611180430406e-05, + "loss": 1.645, + "step": 40640 + }, + { + "epoch": 0.63, + "learning_rate": 1.5021437158595733e-05, + "loss": 1.6876, + "step": 40650 + }, + { + "epoch": 0.63, + "learning_rate": 1.5010265507487581e-05, + "loss": 1.672, + "step": 40660 + }, + { + "epoch": 0.63, + "learning_rate": 1.4999096229760156e-05, + "loss": 1.6322, + "step": 40670 + }, + { + "epoch": 0.63, + "learning_rate": 1.4987929328067064e-05, + "loss": 1.6327, + "step": 40680 + }, + { + "epoch": 0.63, + "learning_rate": 1.497676480506135e-05, + "loss": 1.6568, + "step": 40690 + }, + { + "epoch": 0.63, + "learning_rate": 1.4965602663395528e-05, + "loss": 1.64, + "step": 40700 + }, + { + "epoch": 0.63, + "learning_rate": 1.4954442905721502e-05, + "loss": 1.6379, + "step": 40710 + }, + { + "epoch": 0.63, + "learning_rate": 1.4943285534690626e-05, + "loss": 1.6612, + "step": 40720 + }, + { + "epoch": 0.63, + "learning_rate": 1.493213055295371e-05, + "loss": 1.5794, + "step": 40730 + }, + { + "epoch": 0.63, + "learning_rate": 1.4920977963160965e-05, + "loss": 1.6017, + "step": 40740 + }, + { + "epoch": 0.63, + "learning_rate": 1.4909827767962033e-05, + "loss": 1.5533, + "step": 40750 + }, + { + "epoch": 0.63, + "learning_rate": 1.4898679970006023e-05, + "loss": 1.5505, + "step": 40760 + }, + { + "epoch": 0.63, + "learning_rate": 1.4887534571941431e-05, + "loss": 1.585, + "step": 40770 + }, + { + "epoch": 0.63, + "learning_rate": 1.4876391576416215e-05, + "loss": 1.5731, + "step": 40780 + }, + { + "epoch": 0.63, + "learning_rate": 1.4865250986077744e-05, + "loss": 1.5595, + "step": 40790 + }, + { + "epoch": 0.63, + "learning_rate": 1.4854112803572824e-05, + "loss": 1.5926, + "step": 40800 + }, + { + "epoch": 0.63, + "learning_rate": 1.4842977031547683e-05, + "loss": 1.5848, + "step": 40810 + }, + { + "epoch": 0.63, + "learning_rate": 1.4831843672647988e-05, + "loss": 1.5626, + "step": 40820 + }, + { + "epoch": 0.63, + "learning_rate": 1.4820712729518806e-05, + "loss": 1.5733, + "step": 40830 + }, + { + "epoch": 0.63, + "learning_rate": 1.4809584204804671e-05, + "loss": 1.5231, + "step": 40840 + }, + { + "epoch": 0.63, + "learning_rate": 1.479845810114951e-05, + "loss": 1.5251, + "step": 40850 + }, + { + "epoch": 0.63, + "learning_rate": 1.4787334421196668e-05, + "loss": 1.5359, + "step": 40860 + }, + { + "epoch": 0.63, + "learning_rate": 1.4776213167588965e-05, + "loss": 1.5273, + "step": 40870 + }, + { + "epoch": 0.63, + "learning_rate": 1.4765094342968582e-05, + "loss": 1.5995, + "step": 40880 + }, + { + "epoch": 0.63, + "learning_rate": 1.4753977949977154e-05, + "loss": 1.5298, + "step": 40890 + }, + { + "epoch": 0.63, + "learning_rate": 1.4742863991255756e-05, + "loss": 1.5485, + "step": 40900 + }, + { + "epoch": 0.63, + "learning_rate": 1.4731752469444845e-05, + "loss": 1.5373, + "step": 40910 + }, + { + "epoch": 0.63, + "learning_rate": 1.4720643387184314e-05, + "loss": 1.6521, + "step": 40920 + }, + { + "epoch": 0.64, + "learning_rate": 1.47095367471135e-05, + "loss": 1.5349, + "step": 40930 + }, + { + "epoch": 0.64, + "learning_rate": 1.4698432551871124e-05, + "loss": 1.5442, + "step": 40940 + }, + { + "epoch": 0.64, + "learning_rate": 1.468733080409535e-05, + "loss": 1.5967, + "step": 40950 + }, + { + "epoch": 0.64, + "learning_rate": 1.4676231506423752e-05, + "loss": 1.6176, + "step": 40960 + }, + { + "epoch": 0.64, + "learning_rate": 1.4665134661493327e-05, + "loss": 1.5479, + "step": 40970 + }, + { + "epoch": 0.64, + "learning_rate": 1.465404027194047e-05, + "loss": 1.555, + "step": 40980 + }, + { + "epoch": 0.64, + "learning_rate": 1.4642948340401028e-05, + "loss": 1.6044, + "step": 40990 + }, + { + "epoch": 0.64, + "learning_rate": 1.4631858869510223e-05, + "loss": 1.6422, + "step": 41000 + }, + { + "epoch": 0.64, + "learning_rate": 1.4620771861902737e-05, + "loss": 1.6331, + "step": 41010 + }, + { + "epoch": 0.64, + "learning_rate": 1.4609687320212631e-05, + "loss": 1.5515, + "step": 41020 + }, + { + "epoch": 0.64, + "learning_rate": 1.459860524707338e-05, + "loss": 1.549, + "step": 41030 + }, + { + "epoch": 0.64, + "learning_rate": 1.45875256451179e-05, + "loss": 1.5286, + "step": 41040 + }, + { + "epoch": 0.64, + "learning_rate": 1.4576448516978514e-05, + "loss": 1.5512, + "step": 41050 + }, + { + "epoch": 0.64, + "learning_rate": 1.4565373865286913e-05, + "loss": 1.5312, + "step": 41060 + }, + { + "epoch": 0.64, + "learning_rate": 1.4554301692674278e-05, + "loss": 1.5537, + "step": 41070 + }, + { + "epoch": 0.64, + "learning_rate": 1.4543232001771123e-05, + "loss": 1.5345, + "step": 41080 + }, + { + "epoch": 0.64, + "learning_rate": 1.453216479520742e-05, + "loss": 1.5743, + "step": 41090 + }, + { + "epoch": 0.64, + "learning_rate": 1.4521100075612543e-05, + "loss": 1.5569, + "step": 41100 + }, + { + "epoch": 0.64, + "learning_rate": 1.4510037845615266e-05, + "loss": 1.5499, + "step": 41110 + }, + { + "epoch": 0.64, + "learning_rate": 1.4498978107843758e-05, + "loss": 1.5511, + "step": 41120 + }, + { + "epoch": 0.64, + "learning_rate": 1.4487920864925648e-05, + "loss": 1.5941, + "step": 41130 + }, + { + "epoch": 0.64, + "learning_rate": 1.4476866119487906e-05, + "loss": 1.5664, + "step": 41140 + }, + { + "epoch": 0.64, + "learning_rate": 1.4465813874156953e-05, + "loss": 1.6154, + "step": 41150 + }, + { + "epoch": 0.64, + "learning_rate": 1.4454764131558602e-05, + "loss": 1.6256, + "step": 41160 + }, + { + "epoch": 0.64, + "learning_rate": 1.4443716894318074e-05, + "loss": 1.575, + "step": 41170 + }, + { + "epoch": 0.64, + "learning_rate": 1.443267216505999e-05, + "loss": 1.5828, + "step": 41180 + }, + { + "epoch": 0.64, + "learning_rate": 1.4421629946408383e-05, + "loss": 1.5404, + "step": 41190 + }, + { + "epoch": 0.64, + "learning_rate": 1.441059024098669e-05, + "loss": 1.5541, + "step": 41200 + }, + { + "epoch": 0.64, + "learning_rate": 1.4399553051417719e-05, + "loss": 1.558, + "step": 41210 + }, + { + "epoch": 0.64, + "learning_rate": 1.4388518380323748e-05, + "loss": 1.561, + "step": 41220 + }, + { + "epoch": 0.64, + "learning_rate": 1.4377486230326372e-05, + "loss": 1.5945, + "step": 41230 + }, + { + "epoch": 0.64, + "learning_rate": 1.4366456604046674e-05, + "loss": 1.5328, + "step": 41240 + }, + { + "epoch": 0.64, + "learning_rate": 1.4355429504105066e-05, + "loss": 1.5309, + "step": 41250 + }, + { + "epoch": 0.64, + "learning_rate": 1.4344404933121392e-05, + "loss": 1.5405, + "step": 41260 + }, + { + "epoch": 0.64, + "learning_rate": 1.4333382893714894e-05, + "loss": 1.5362, + "step": 41270 + }, + { + "epoch": 0.64, + "learning_rate": 1.4322363388504224e-05, + "loss": 1.5636, + "step": 41280 + }, + { + "epoch": 0.64, + "learning_rate": 1.431134642010738e-05, + "loss": 1.5355, + "step": 41290 + }, + { + "epoch": 0.64, + "learning_rate": 1.4300331991141841e-05, + "loss": 1.5594, + "step": 41300 + }, + { + "epoch": 0.64, + "learning_rate": 1.4289320104224404e-05, + "loss": 1.5534, + "step": 41310 + }, + { + "epoch": 0.64, + "learning_rate": 1.4278310761971306e-05, + "loss": 1.562, + "step": 41320 + }, + { + "epoch": 0.64, + "learning_rate": 1.4267303966998166e-05, + "loss": 1.5794, + "step": 41330 + }, + { + "epoch": 0.64, + "learning_rate": 1.4256299721920003e-05, + "loss": 1.5461, + "step": 41340 + }, + { + "epoch": 0.64, + "learning_rate": 1.4245298029351223e-05, + "loss": 1.5464, + "step": 41350 + }, + { + "epoch": 0.64, + "learning_rate": 1.4234298891905634e-05, + "loss": 1.5218, + "step": 41360 + }, + { + "epoch": 0.64, + "learning_rate": 1.4223302312196441e-05, + "loss": 1.5672, + "step": 41370 + }, + { + "epoch": 0.64, + "learning_rate": 1.4212308292836201e-05, + "loss": 1.5578, + "step": 41380 + }, + { + "epoch": 0.64, + "learning_rate": 1.4201316836436939e-05, + "loss": 1.5666, + "step": 41390 + }, + { + "epoch": 0.64, + "learning_rate": 1.4190327945609994e-05, + "loss": 1.575, + "step": 41400 + }, + { + "epoch": 0.64, + "learning_rate": 1.4179341622966136e-05, + "loss": 1.5484, + "step": 41410 + }, + { + "epoch": 0.64, + "learning_rate": 1.4168357871115522e-05, + "loss": 1.5433, + "step": 41420 + }, + { + "epoch": 0.64, + "learning_rate": 1.4157376692667702e-05, + "loss": 1.5534, + "step": 41430 + }, + { + "epoch": 0.64, + "learning_rate": 1.4146398090231572e-05, + "loss": 1.563, + "step": 41440 + }, + { + "epoch": 0.64, + "learning_rate": 1.4135422066415493e-05, + "loss": 1.5708, + "step": 41450 + }, + { + "epoch": 0.64, + "learning_rate": 1.412444862382713e-05, + "loss": 1.6022, + "step": 41460 + }, + { + "epoch": 0.64, + "learning_rate": 1.4113477765073612e-05, + "loss": 1.6043, + "step": 41470 + }, + { + "epoch": 0.64, + "learning_rate": 1.4102509492761396e-05, + "loss": 1.5763, + "step": 41480 + }, + { + "epoch": 0.64, + "learning_rate": 1.4091543809496347e-05, + "loss": 1.5666, + "step": 41490 + }, + { + "epoch": 0.64, + "learning_rate": 1.4080580717883719e-05, + "loss": 1.5547, + "step": 41500 + }, + { + "epoch": 0.64, + "learning_rate": 1.4069620220528138e-05, + "loss": 1.5456, + "step": 41510 + }, + { + "epoch": 0.64, + "learning_rate": 1.4058662320033624e-05, + "loss": 1.511, + "step": 41520 + }, + { + "epoch": 0.64, + "learning_rate": 1.404770701900358e-05, + "loss": 1.5496, + "step": 41530 + }, + { + "epoch": 0.64, + "learning_rate": 1.4036754320040796e-05, + "loss": 1.4655, + "step": 41540 + }, + { + "epoch": 0.64, + "learning_rate": 1.4025804225747402e-05, + "loss": 1.5148, + "step": 41550 + }, + { + "epoch": 0.64, + "learning_rate": 1.4014856738724985e-05, + "loss": 1.5204, + "step": 41560 + }, + { + "epoch": 0.64, + "learning_rate": 1.4003911861574443e-05, + "loss": 1.5484, + "step": 41570 + }, + { + "epoch": 0.65, + "learning_rate": 1.3992969596896088e-05, + "loss": 1.498, + "step": 41580 + }, + { + "epoch": 0.65, + "learning_rate": 1.3982029947289607e-05, + "loss": 1.522, + "step": 41590 + }, + { + "epoch": 0.65, + "learning_rate": 1.3971092915354073e-05, + "loss": 1.5372, + "step": 41600 + }, + { + "epoch": 0.65, + "learning_rate": 1.3960158503687895e-05, + "loss": 1.5364, + "step": 41610 + }, + { + "epoch": 0.65, + "learning_rate": 1.3949226714888936e-05, + "loss": 1.5487, + "step": 41620 + }, + { + "epoch": 0.65, + "learning_rate": 1.3938297551554359e-05, + "loss": 1.4983, + "step": 41630 + }, + { + "epoch": 0.65, + "learning_rate": 1.3927371016280743e-05, + "loss": 1.532, + "step": 41640 + }, + { + "epoch": 0.65, + "learning_rate": 1.3916447111664043e-05, + "loss": 1.5035, + "step": 41650 + }, + { + "epoch": 0.65, + "learning_rate": 1.3905525840299582e-05, + "loss": 1.5163, + "step": 41660 + }, + { + "epoch": 0.65, + "learning_rate": 1.3894607204782051e-05, + "loss": 1.5296, + "step": 41670 + }, + { + "epoch": 0.65, + "learning_rate": 1.3883691207705519e-05, + "loss": 1.4876, + "step": 41680 + }, + { + "epoch": 0.65, + "learning_rate": 1.3872777851663438e-05, + "loss": 1.4934, + "step": 41690 + }, + { + "epoch": 0.65, + "learning_rate": 1.3861867139248618e-05, + "loss": 1.4925, + "step": 41700 + }, + { + "epoch": 0.65, + "learning_rate": 1.3850959073053262e-05, + "loss": 1.5343, + "step": 41710 + }, + { + "epoch": 0.65, + "learning_rate": 1.38400536556689e-05, + "loss": 1.5292, + "step": 41720 + }, + { + "epoch": 0.65, + "learning_rate": 1.3829150889686495e-05, + "loss": 1.5309, + "step": 41730 + }, + { + "epoch": 0.65, + "learning_rate": 1.3818250777696329e-05, + "loss": 1.5062, + "step": 41740 + }, + { + "epoch": 0.65, + "learning_rate": 1.3807353322288068e-05, + "loss": 1.5219, + "step": 41750 + }, + { + "epoch": 0.65, + "learning_rate": 1.3796458526050765e-05, + "loss": 1.5074, + "step": 41760 + }, + { + "epoch": 0.65, + "learning_rate": 1.378556639157283e-05, + "loss": 1.5528, + "step": 41770 + }, + { + "epoch": 0.65, + "learning_rate": 1.377467692144201e-05, + "loss": 1.4984, + "step": 41780 + }, + { + "epoch": 0.65, + "learning_rate": 1.3763790118245479e-05, + "loss": 1.5192, + "step": 41790 + }, + { + "epoch": 0.65, + "learning_rate": 1.3752905984569724e-05, + "loss": 1.5097, + "step": 41800 + }, + { + "epoch": 0.65, + "learning_rate": 1.374202452300063e-05, + "loss": 1.5143, + "step": 41810 + }, + { + "epoch": 0.65, + "learning_rate": 1.3731145736123427e-05, + "loss": 1.5071, + "step": 41820 + }, + { + "epoch": 0.65, + "learning_rate": 1.3720269626522726e-05, + "loss": 1.5275, + "step": 41830 + }, + { + "epoch": 0.65, + "learning_rate": 1.370939619678249e-05, + "loss": 1.519, + "step": 41840 + }, + { + "epoch": 0.65, + "learning_rate": 1.3698525449486053e-05, + "loss": 1.5167, + "step": 41850 + }, + { + "epoch": 0.65, + "learning_rate": 1.3687657387216116e-05, + "loss": 1.5147, + "step": 41860 + }, + { + "epoch": 0.65, + "learning_rate": 1.3676792012554706e-05, + "loss": 1.5113, + "step": 41870 + }, + { + "epoch": 0.65, + "learning_rate": 1.3665929328083279e-05, + "loss": 1.5001, + "step": 41880 + }, + { + "epoch": 0.65, + "learning_rate": 1.3655069336382587e-05, + "loss": 1.5319, + "step": 41890 + }, + { + "epoch": 0.65, + "learning_rate": 1.3644212040032773e-05, + "loss": 1.5315, + "step": 41900 + }, + { + "epoch": 0.65, + "learning_rate": 1.3633357441613334e-05, + "loss": 1.531, + "step": 41910 + }, + { + "epoch": 0.65, + "learning_rate": 1.3622505543703131e-05, + "loss": 1.5634, + "step": 41920 + }, + { + "epoch": 0.65, + "learning_rate": 1.3611656348880377e-05, + "loss": 1.5178, + "step": 41930 + }, + { + "epoch": 0.65, + "learning_rate": 1.3600809859722658e-05, + "loss": 1.5812, + "step": 41940 + }, + { + "epoch": 0.65, + "learning_rate": 1.358996607880687e-05, + "loss": 1.5584, + "step": 41950 + }, + { + "epoch": 0.65, + "learning_rate": 1.3579125008709342e-05, + "loss": 1.5125, + "step": 41960 + }, + { + "epoch": 0.65, + "learning_rate": 1.3568286652005685e-05, + "loss": 1.5227, + "step": 41970 + }, + { + "epoch": 0.65, + "learning_rate": 1.3557451011270908e-05, + "loss": 1.5133, + "step": 41980 + }, + { + "epoch": 0.65, + "learning_rate": 1.3546618089079363e-05, + "loss": 1.5033, + "step": 41990 + }, + { + "epoch": 0.65, + "learning_rate": 1.3535787888004759e-05, + "loss": 1.518, + "step": 42000 + }, + { + "epoch": 0.65, + "learning_rate": 1.3524960410620157e-05, + "loss": 1.5011, + "step": 42010 + }, + { + "epoch": 0.65, + "learning_rate": 1.3514135659497968e-05, + "loss": 1.5244, + "step": 42020 + }, + { + "epoch": 0.65, + "learning_rate": 1.3503313637209963e-05, + "loss": 1.5127, + "step": 42030 + }, + { + "epoch": 0.65, + "learning_rate": 1.3492494346327239e-05, + "loss": 1.5169, + "step": 42040 + }, + { + "epoch": 0.65, + "learning_rate": 1.34816777894203e-05, + "loss": 1.5429, + "step": 42050 + }, + { + "epoch": 0.65, + "learning_rate": 1.3470863969058933e-05, + "loss": 1.5287, + "step": 42060 + }, + { + "epoch": 0.65, + "learning_rate": 1.3460052887812319e-05, + "loss": 1.5074, + "step": 42070 + }, + { + "epoch": 0.65, + "learning_rate": 1.3449244548248977e-05, + "loss": 1.505, + "step": 42080 + }, + { + "epoch": 0.65, + "learning_rate": 1.3438438952936778e-05, + "loss": 1.501, + "step": 42090 + }, + { + "epoch": 0.65, + "learning_rate": 1.3427636104442915e-05, + "loss": 1.4436, + "step": 42100 + }, + { + "epoch": 0.65, + "learning_rate": 1.341683600533398e-05, + "loss": 1.4839, + "step": 42110 + }, + { + "epoch": 0.65, + "learning_rate": 1.3406038658175856e-05, + "loss": 1.5189, + "step": 42120 + }, + { + "epoch": 0.65, + "learning_rate": 1.3395244065533813e-05, + "loss": 1.5291, + "step": 42130 + }, + { + "epoch": 0.65, + "learning_rate": 1.3384452229972446e-05, + "loss": 1.4994, + "step": 42140 + }, + { + "epoch": 0.65, + "learning_rate": 1.3373663154055702e-05, + "loss": 1.5158, + "step": 42150 + }, + { + "epoch": 0.65, + "learning_rate": 1.3362876840346872e-05, + "loss": 1.5063, + "step": 42160 + }, + { + "epoch": 0.65, + "learning_rate": 1.3352093291408585e-05, + "loss": 1.5338, + "step": 42170 + }, + { + "epoch": 0.65, + "learning_rate": 1.3341312509802822e-05, + "loss": 1.5137, + "step": 42180 + }, + { + "epoch": 0.65, + "learning_rate": 1.33305344980909e-05, + "loss": 1.493, + "step": 42190 + }, + { + "epoch": 0.65, + "learning_rate": 1.3319759258833492e-05, + "loss": 1.5025, + "step": 42200 + }, + { + "epoch": 0.65, + "learning_rate": 1.330898679459057e-05, + "loss": 1.4278, + "step": 42210 + }, + { + "epoch": 0.66, + "learning_rate": 1.3298217107921515e-05, + "loss": 1.5214, + "step": 42220 + }, + { + "epoch": 0.66, + "learning_rate": 1.3287450201384983e-05, + "loss": 1.4947, + "step": 42230 + }, + { + "epoch": 0.66, + "learning_rate": 1.3276686077539008e-05, + "loss": 1.5312, + "step": 42240 + }, + { + "epoch": 0.66, + "learning_rate": 1.3265924738940949e-05, + "loss": 1.5203, + "step": 42250 + }, + { + "epoch": 0.66, + "learning_rate": 1.3255166188147514e-05, + "loss": 1.5252, + "step": 42260 + }, + { + "epoch": 0.66, + "learning_rate": 1.3244410427714715e-05, + "loss": 1.533, + "step": 42270 + }, + { + "epoch": 0.66, + "learning_rate": 1.3233657460197963e-05, + "loss": 1.5381, + "step": 42280 + }, + { + "epoch": 0.66, + "learning_rate": 1.3222907288151943e-05, + "loss": 1.5358, + "step": 42290 + }, + { + "epoch": 0.66, + "learning_rate": 1.321215991413071e-05, + "loss": 1.5302, + "step": 42300 + }, + { + "epoch": 0.66, + "learning_rate": 1.3201415340687646e-05, + "loss": 1.5134, + "step": 42310 + }, + { + "epoch": 0.66, + "learning_rate": 1.3190673570375466e-05, + "loss": 1.3953, + "step": 42320 + }, + { + "epoch": 0.66, + "learning_rate": 1.3179934605746224e-05, + "loss": 1.5472, + "step": 42330 + }, + { + "epoch": 0.66, + "learning_rate": 1.3169198449351303e-05, + "loss": 1.5218, + "step": 42340 + }, + { + "epoch": 0.66, + "learning_rate": 1.315846510374143e-05, + "loss": 1.5374, + "step": 42350 + }, + { + "epoch": 0.66, + "learning_rate": 1.3147734571466625e-05, + "loss": 1.5279, + "step": 42360 + }, + { + "epoch": 0.66, + "learning_rate": 1.3137006855076306e-05, + "loss": 1.5498, + "step": 42370 + }, + { + "epoch": 0.66, + "learning_rate": 1.312628195711915e-05, + "loss": 1.5365, + "step": 42380 + }, + { + "epoch": 0.66, + "learning_rate": 1.3115559880143236e-05, + "loss": 1.5384, + "step": 42390 + }, + { + "epoch": 0.66, + "learning_rate": 1.3104840626695903e-05, + "loss": 1.4761, + "step": 42400 + }, + { + "epoch": 0.66, + "learning_rate": 1.309412419932387e-05, + "loss": 1.5717, + "step": 42410 + }, + { + "epoch": 0.66, + "learning_rate": 1.308341060057316e-05, + "loss": 1.5447, + "step": 42420 + }, + { + "epoch": 0.66, + "learning_rate": 1.3072699832989144e-05, + "loss": 1.5621, + "step": 42430 + }, + { + "epoch": 0.66, + "learning_rate": 1.306199189911648e-05, + "loss": 1.5802, + "step": 42440 + }, + { + "epoch": 0.66, + "learning_rate": 1.3051286801499212e-05, + "loss": 1.5573, + "step": 42450 + }, + { + "epoch": 0.66, + "learning_rate": 1.3040584542680656e-05, + "loss": 1.5567, + "step": 42460 + }, + { + "epoch": 0.66, + "learning_rate": 1.3029885125203482e-05, + "loss": 1.5613, + "step": 42470 + }, + { + "epoch": 0.66, + "learning_rate": 1.3019188551609676e-05, + "loss": 1.5234, + "step": 42480 + }, + { + "epoch": 0.66, + "learning_rate": 1.3008494824440557e-05, + "loss": 1.5484, + "step": 42490 + }, + { + "epoch": 0.66, + "learning_rate": 1.299780394623676e-05, + "loss": 1.5234, + "step": 42500 + }, + { + "epoch": 0.66, + "learning_rate": 1.298711591953824e-05, + "loss": 1.5403, + "step": 42510 + }, + { + "epoch": 0.66, + "learning_rate": 1.2976430746884292e-05, + "loss": 1.5185, + "step": 42520 + }, + { + "epoch": 0.66, + "learning_rate": 1.296574843081349e-05, + "loss": 1.5198, + "step": 42530 + }, + { + "epoch": 0.66, + "learning_rate": 1.2955068973863805e-05, + "loss": 1.5377, + "step": 42540 + }, + { + "epoch": 0.66, + "learning_rate": 1.2944392378572445e-05, + "loss": 1.5315, + "step": 42550 + }, + { + "epoch": 0.66, + "learning_rate": 1.2933718647475992e-05, + "loss": 1.5469, + "step": 42560 + }, + { + "epoch": 0.66, + "learning_rate": 1.2923047783110332e-05, + "loss": 1.5341, + "step": 42570 + }, + { + "epoch": 0.66, + "learning_rate": 1.2912379788010676e-05, + "loss": 1.5386, + "step": 42580 + }, + { + "epoch": 0.66, + "learning_rate": 1.290171466471152e-05, + "loss": 1.53, + "step": 42590 + }, + { + "epoch": 0.66, + "learning_rate": 1.2891052415746747e-05, + "loss": 1.5182, + "step": 42600 + }, + { + "epoch": 0.66, + "learning_rate": 1.2880393043649468e-05, + "loss": 1.5076, + "step": 42610 + }, + { + "epoch": 0.66, + "learning_rate": 1.2869736550952203e-05, + "loss": 1.5436, + "step": 42620 + }, + { + "epoch": 0.66, + "learning_rate": 1.2859082940186711e-05, + "loss": 1.5497, + "step": 42630 + }, + { + "epoch": 0.66, + "learning_rate": 1.284843221388411e-05, + "loss": 1.515, + "step": 42640 + }, + { + "epoch": 0.66, + "learning_rate": 1.2837784374574819e-05, + "loss": 1.5285, + "step": 42650 + }, + { + "epoch": 0.66, + "learning_rate": 1.282713942478857e-05, + "loss": 1.5388, + "step": 42660 + }, + { + "epoch": 0.66, + "learning_rate": 1.2816497367054411e-05, + "loss": 1.5157, + "step": 42670 + }, + { + "epoch": 0.66, + "learning_rate": 1.280585820390071e-05, + "loss": 1.52, + "step": 42680 + }, + { + "epoch": 0.66, + "learning_rate": 1.279522193785514e-05, + "loss": 1.6367, + "step": 42690 + }, + { + "epoch": 0.66, + "learning_rate": 1.2784588571444667e-05, + "loss": 1.5226, + "step": 42700 + }, + { + "epoch": 0.66, + "learning_rate": 1.2773958107195616e-05, + "loss": 1.5139, + "step": 42710 + }, + { + "epoch": 0.66, + "learning_rate": 1.2763330547633572e-05, + "loss": 1.5306, + "step": 42720 + }, + { + "epoch": 0.66, + "learning_rate": 1.2752705895283457e-05, + "loss": 1.5232, + "step": 42730 + }, + { + "epoch": 0.66, + "learning_rate": 1.2742084152669497e-05, + "loss": 1.5334, + "step": 42740 + }, + { + "epoch": 0.66, + "learning_rate": 1.273146532231524e-05, + "loss": 1.4799, + "step": 42750 + }, + { + "epoch": 0.66, + "learning_rate": 1.2720849406743496e-05, + "loss": 1.6232, + "step": 42760 + }, + { + "epoch": 0.66, + "learning_rate": 1.2710236408476456e-05, + "loss": 1.5305, + "step": 42770 + }, + { + "epoch": 0.66, + "learning_rate": 1.2699626330035541e-05, + "loss": 1.5135, + "step": 42780 + }, + { + "epoch": 0.66, + "learning_rate": 1.2689019173941535e-05, + "loss": 1.5218, + "step": 42790 + }, + { + "epoch": 0.66, + "learning_rate": 1.2678414942714498e-05, + "loss": 1.5411, + "step": 42800 + }, + { + "epoch": 0.66, + "learning_rate": 1.2667813638873807e-05, + "loss": 1.5246, + "step": 42810 + }, + { + "epoch": 0.66, + "learning_rate": 1.2657215264938144e-05, + "loss": 1.5164, + "step": 42820 + }, + { + "epoch": 0.66, + "learning_rate": 1.2646619823425484e-05, + "loss": 1.6018, + "step": 42830 + }, + { + "epoch": 0.66, + "learning_rate": 1.2636027316853119e-05, + "loss": 1.5423, + "step": 42840 + }, + { + "epoch": 0.66, + "learning_rate": 1.2625437747737634e-05, + "loss": 1.5627, + "step": 42850 + }, + { + "epoch": 0.66, + "learning_rate": 1.2614851118594934e-05, + "loss": 1.5739, + "step": 42860 + }, + { + "epoch": 0.67, + "learning_rate": 1.2604267431940176e-05, + "loss": 1.5762, + "step": 42870 + }, + { + "epoch": 0.67, + "learning_rate": 1.2593686690287899e-05, + "loss": 1.5671, + "step": 42880 + }, + { + "epoch": 0.67, + "learning_rate": 1.2583108896151858e-05, + "loss": 1.6074, + "step": 42890 + }, + { + "epoch": 0.67, + "learning_rate": 1.2572534052045165e-05, + "loss": 1.5652, + "step": 42900 + }, + { + "epoch": 0.67, + "learning_rate": 1.2561962160480204e-05, + "loss": 1.5241, + "step": 42910 + }, + { + "epoch": 0.67, + "learning_rate": 1.2551393223968683e-05, + "loss": 1.6026, + "step": 42920 + }, + { + "epoch": 0.67, + "learning_rate": 1.2540827245021552e-05, + "loss": 1.5431, + "step": 42930 + }, + { + "epoch": 0.67, + "learning_rate": 1.2530264226149146e-05, + "loss": 1.5524, + "step": 42940 + }, + { + "epoch": 0.67, + "learning_rate": 1.2519704169861015e-05, + "loss": 1.5595, + "step": 42950 + }, + { + "epoch": 0.67, + "learning_rate": 1.2509147078666048e-05, + "loss": 1.591, + "step": 42960 + }, + { + "epoch": 0.67, + "learning_rate": 1.2498592955072415e-05, + "loss": 1.5608, + "step": 42970 + }, + { + "epoch": 0.67, + "learning_rate": 1.2488041801587592e-05, + "loss": 1.5184, + "step": 42980 + }, + { + "epoch": 0.67, + "learning_rate": 1.2477493620718337e-05, + "loss": 1.5482, + "step": 42990 + }, + { + "epoch": 0.67, + "learning_rate": 1.2466948414970711e-05, + "loss": 1.509, + "step": 43000 + }, + { + "epoch": 0.67, + "learning_rate": 1.2456406186850072e-05, + "loss": 1.5322, + "step": 43010 + }, + { + "epoch": 0.67, + "learning_rate": 1.244586693886104e-05, + "loss": 1.536, + "step": 43020 + }, + { + "epoch": 0.67, + "learning_rate": 1.243533067350758e-05, + "loss": 1.5403, + "step": 43030 + }, + { + "epoch": 0.67, + "learning_rate": 1.2424797393292895e-05, + "loss": 1.5284, + "step": 43040 + }, + { + "epoch": 0.67, + "learning_rate": 1.2414267100719513e-05, + "loss": 1.5206, + "step": 43050 + }, + { + "epoch": 0.67, + "learning_rate": 1.2403739798289238e-05, + "loss": 1.3462, + "step": 43060 + }, + { + "epoch": 0.67, + "learning_rate": 1.2393215488503169e-05, + "loss": 1.3785, + "step": 43070 + }, + { + "epoch": 0.67, + "learning_rate": 1.2382694173861693e-05, + "loss": 1.534, + "step": 43080 + }, + { + "epoch": 0.67, + "learning_rate": 1.237217585686449e-05, + "loss": 1.5222, + "step": 43090 + }, + { + "epoch": 0.67, + "learning_rate": 1.23616605400105e-05, + "loss": 1.5202, + "step": 43100 + }, + { + "epoch": 0.67, + "learning_rate": 1.2351148225798007e-05, + "loss": 1.5179, + "step": 43110 + }, + { + "epoch": 0.67, + "learning_rate": 1.2340638916724517e-05, + "loss": 1.4851, + "step": 43120 + }, + { + "epoch": 0.67, + "learning_rate": 1.2330132615286869e-05, + "loss": 1.5359, + "step": 43130 + }, + { + "epoch": 0.67, + "learning_rate": 1.231962932398116e-05, + "loss": 1.5241, + "step": 43140 + }, + { + "epoch": 0.67, + "learning_rate": 1.2309129045302791e-05, + "loss": 1.497, + "step": 43150 + }, + { + "epoch": 0.67, + "learning_rate": 1.2298631781746437e-05, + "loss": 1.5224, + "step": 43160 + }, + { + "epoch": 0.67, + "learning_rate": 1.2288137535806057e-05, + "loss": 1.5101, + "step": 43170 + }, + { + "epoch": 0.67, + "learning_rate": 1.2277646309974905e-05, + "loss": 1.4912, + "step": 43180 + }, + { + "epoch": 0.67, + "learning_rate": 1.2267158106745477e-05, + "loss": 1.5201, + "step": 43190 + }, + { + "epoch": 0.67, + "learning_rate": 1.2256672928609622e-05, + "loss": 1.5572, + "step": 43200 + }, + { + "epoch": 0.67, + "learning_rate": 1.2246190778058398e-05, + "loss": 1.5244, + "step": 43210 + }, + { + "epoch": 0.67, + "learning_rate": 1.2235711657582183e-05, + "loss": 1.5068, + "step": 43220 + }, + { + "epoch": 0.67, + "learning_rate": 1.2225235569670628e-05, + "loss": 1.5245, + "step": 43230 + }, + { + "epoch": 0.67, + "learning_rate": 1.2214762516812667e-05, + "loss": 1.5043, + "step": 43240 + }, + { + "epoch": 0.67, + "learning_rate": 1.2204292501496506e-05, + "loss": 1.5127, + "step": 43250 + }, + { + "epoch": 0.67, + "learning_rate": 1.2193825526209628e-05, + "loss": 1.4951, + "step": 43260 + }, + { + "epoch": 0.67, + "learning_rate": 1.218336159343881e-05, + "loss": 1.5176, + "step": 43270 + }, + { + "epoch": 0.67, + "learning_rate": 1.2172900705670069e-05, + "loss": 1.4977, + "step": 43280 + }, + { + "epoch": 0.67, + "learning_rate": 1.2162442865388751e-05, + "loss": 1.5078, + "step": 43290 + }, + { + "epoch": 0.67, + "learning_rate": 1.215198807507942e-05, + "loss": 1.5037, + "step": 43300 + }, + { + "epoch": 0.67, + "learning_rate": 1.2141536337225987e-05, + "loss": 1.5089, + "step": 43310 + }, + { + "epoch": 0.67, + "learning_rate": 1.2131087654311557e-05, + "loss": 1.5426, + "step": 43320 + }, + { + "epoch": 0.67, + "learning_rate": 1.2120642028818568e-05, + "loss": 1.5138, + "step": 43330 + }, + { + "epoch": 0.67, + "learning_rate": 1.2110199463228706e-05, + "loss": 1.5503, + "step": 43340 + }, + { + "epoch": 0.67, + "learning_rate": 1.209975996002295e-05, + "loss": 1.5818, + "step": 43350 + }, + { + "epoch": 0.67, + "learning_rate": 1.2089323521681503e-05, + "loss": 1.5444, + "step": 43360 + }, + { + "epoch": 0.67, + "learning_rate": 1.2078890150683919e-05, + "loss": 1.5192, + "step": 43370 + }, + { + "epoch": 0.67, + "learning_rate": 1.2068459849508947e-05, + "loss": 1.5143, + "step": 43380 + }, + { + "epoch": 0.67, + "learning_rate": 1.2058032620634652e-05, + "loss": 1.5189, + "step": 43390 + }, + { + "epoch": 0.67, + "learning_rate": 1.204760846653835e-05, + "loss": 1.5124, + "step": 43400 + }, + { + "epoch": 0.67, + "learning_rate": 1.2037187389696633e-05, + "loss": 1.5127, + "step": 43410 + }, + { + "epoch": 0.67, + "learning_rate": 1.2026769392585363e-05, + "loss": 1.5022, + "step": 43420 + }, + { + "epoch": 0.67, + "learning_rate": 1.2016354477679667e-05, + "loss": 1.521, + "step": 43430 + }, + { + "epoch": 0.67, + "learning_rate": 1.2005942647453953e-05, + "loss": 1.4332, + "step": 43440 + }, + { + "epoch": 0.67, + "learning_rate": 1.199553390438185e-05, + "loss": 1.5107, + "step": 43450 + }, + { + "epoch": 0.67, + "learning_rate": 1.1985128250936331e-05, + "loss": 1.527, + "step": 43460 + }, + { + "epoch": 0.67, + "learning_rate": 1.1974725689589559e-05, + "loss": 1.5226, + "step": 43470 + }, + { + "epoch": 0.67, + "learning_rate": 1.1964326222813007e-05, + "loss": 1.5223, + "step": 43480 + }, + { + "epoch": 0.67, + "learning_rate": 1.19539298530774e-05, + "loss": 1.5141, + "step": 43490 + }, + { + "epoch": 0.67, + "learning_rate": 1.1943536582852738e-05, + "loss": 1.5161, + "step": 43500 + }, + { + "epoch": 0.68, + "learning_rate": 1.1933146414608245e-05, + "loss": 1.5118, + "step": 43510 + }, + { + "epoch": 0.68, + "learning_rate": 1.1922759350812476e-05, + "loss": 1.4786, + "step": 43520 + }, + { + "epoch": 0.68, + "learning_rate": 1.191237539393317e-05, + "loss": 1.5104, + "step": 43530 + }, + { + "epoch": 0.68, + "learning_rate": 1.1901994546437411e-05, + "loss": 1.5261, + "step": 43540 + }, + { + "epoch": 0.68, + "learning_rate": 1.1891616810791467e-05, + "loss": 1.5086, + "step": 43550 + }, + { + "epoch": 0.68, + "learning_rate": 1.1881242189460915e-05, + "loss": 1.5117, + "step": 43560 + }, + { + "epoch": 0.68, + "learning_rate": 1.1870870684910573e-05, + "loss": 1.5155, + "step": 43570 + }, + { + "epoch": 0.68, + "learning_rate": 1.1860502299604528e-05, + "loss": 1.5052, + "step": 43580 + }, + { + "epoch": 0.68, + "learning_rate": 1.185013703600612e-05, + "loss": 1.5181, + "step": 43590 + }, + { + "epoch": 0.68, + "learning_rate": 1.1839774896577946e-05, + "loss": 1.5074, + "step": 43600 + }, + { + "epoch": 0.68, + "learning_rate": 1.1829415883781878e-05, + "loss": 1.4969, + "step": 43610 + }, + { + "epoch": 0.68, + "learning_rate": 1.1819060000078993e-05, + "loss": 1.5028, + "step": 43620 + }, + { + "epoch": 0.68, + "learning_rate": 1.1808707247929712e-05, + "loss": 1.5016, + "step": 43630 + }, + { + "epoch": 0.68, + "learning_rate": 1.1798357629793624e-05, + "loss": 1.5091, + "step": 43640 + }, + { + "epoch": 0.68, + "learning_rate": 1.1788011148129626e-05, + "loss": 1.5267, + "step": 43650 + }, + { + "epoch": 0.68, + "learning_rate": 1.1777667805395856e-05, + "loss": 1.4964, + "step": 43660 + }, + { + "epoch": 0.68, + "learning_rate": 1.1767327604049714e-05, + "loss": 1.5434, + "step": 43670 + }, + { + "epoch": 0.68, + "learning_rate": 1.1756990546547813e-05, + "loss": 1.5185, + "step": 43680 + }, + { + "epoch": 0.68, + "learning_rate": 1.1746656635346093e-05, + "loss": 1.5269, + "step": 43690 + }, + { + "epoch": 0.68, + "learning_rate": 1.1736325872899678e-05, + "loss": 1.5073, + "step": 43700 + }, + { + "epoch": 0.68, + "learning_rate": 1.1725998261662977e-05, + "loss": 1.5338, + "step": 43710 + }, + { + "epoch": 0.68, + "learning_rate": 1.1715673804089647e-05, + "loss": 1.5157, + "step": 43720 + }, + { + "epoch": 0.68, + "learning_rate": 1.1705352502632588e-05, + "loss": 1.5234, + "step": 43730 + }, + { + "epoch": 0.68, + "learning_rate": 1.1695034359743961e-05, + "loss": 1.5081, + "step": 43740 + }, + { + "epoch": 0.68, + "learning_rate": 1.1684719377875168e-05, + "loss": 1.5071, + "step": 43750 + }, + { + "epoch": 0.68, + "learning_rate": 1.1674407559476863e-05, + "loss": 1.5125, + "step": 43760 + }, + { + "epoch": 0.68, + "learning_rate": 1.1664098906998947e-05, + "loss": 1.5049, + "step": 43770 + }, + { + "epoch": 0.68, + "learning_rate": 1.165379342289058e-05, + "loss": 1.5121, + "step": 43780 + }, + { + "epoch": 0.68, + "learning_rate": 1.1643491109600129e-05, + "loss": 1.5084, + "step": 43790 + }, + { + "epoch": 0.68, + "learning_rate": 1.163319196957528e-05, + "loss": 1.5171, + "step": 43800 + }, + { + "epoch": 0.68, + "learning_rate": 1.1622896005262893e-05, + "loss": 1.4973, + "step": 43810 + }, + { + "epoch": 0.68, + "learning_rate": 1.1612603219109108e-05, + "loss": 1.5074, + "step": 43820 + }, + { + "epoch": 0.68, + "learning_rate": 1.1602313613559313e-05, + "loss": 1.5245, + "step": 43830 + }, + { + "epoch": 0.68, + "learning_rate": 1.1592027191058136e-05, + "loss": 1.5254, + "step": 43840 + }, + { + "epoch": 0.68, + "learning_rate": 1.1581743954049418e-05, + "loss": 1.5041, + "step": 43850 + }, + { + "epoch": 0.68, + "learning_rate": 1.1571463904976313e-05, + "loss": 1.4813, + "step": 43860 + }, + { + "epoch": 0.68, + "learning_rate": 1.1561187046281141e-05, + "loss": 1.502, + "step": 43870 + }, + { + "epoch": 0.68, + "learning_rate": 1.1550913380405509e-05, + "loss": 1.511, + "step": 43880 + }, + { + "epoch": 0.68, + "learning_rate": 1.1540642909790258e-05, + "loss": 1.5054, + "step": 43890 + }, + { + "epoch": 0.68, + "learning_rate": 1.1530375636875464e-05, + "loss": 1.5367, + "step": 43900 + }, + { + "epoch": 0.68, + "learning_rate": 1.152011156410045e-05, + "loss": 1.4836, + "step": 43910 + }, + { + "epoch": 0.68, + "learning_rate": 1.1509850693903768e-05, + "loss": 1.5129, + "step": 43920 + }, + { + "epoch": 0.68, + "learning_rate": 1.1499593028723226e-05, + "loss": 1.538, + "step": 43930 + }, + { + "epoch": 0.68, + "learning_rate": 1.1489338570995836e-05, + "loss": 1.5445, + "step": 43940 + }, + { + "epoch": 0.68, + "learning_rate": 1.1479087323157909e-05, + "loss": 1.5766, + "step": 43950 + }, + { + "epoch": 0.68, + "learning_rate": 1.1468839287644926e-05, + "loss": 1.5463, + "step": 43960 + }, + { + "epoch": 0.68, + "learning_rate": 1.1458594466891644e-05, + "loss": 1.5008, + "step": 43970 + }, + { + "epoch": 0.68, + "learning_rate": 1.1448352863332054e-05, + "loss": 1.4844, + "step": 43980 + }, + { + "epoch": 0.68, + "learning_rate": 1.143811447939937e-05, + "loss": 1.4891, + "step": 43990 + }, + { + "epoch": 0.68, + "learning_rate": 1.1427879317526047e-05, + "loss": 1.4914, + "step": 44000 + }, + { + "epoch": 0.68, + "learning_rate": 1.1417647380143786e-05, + "loss": 1.4975, + "step": 44010 + }, + { + "epoch": 0.68, + "learning_rate": 1.1407418669683487e-05, + "loss": 1.5058, + "step": 44020 + }, + { + "epoch": 0.68, + "learning_rate": 1.1397193188575339e-05, + "loss": 1.4873, + "step": 44030 + }, + { + "epoch": 0.68, + "learning_rate": 1.1386970939248703e-05, + "loss": 1.4879, + "step": 44040 + }, + { + "epoch": 0.68, + "learning_rate": 1.1376751924132214e-05, + "loss": 1.4851, + "step": 44050 + }, + { + "epoch": 0.68, + "learning_rate": 1.1366536145653724e-05, + "loss": 1.4301, + "step": 44060 + }, + { + "epoch": 0.68, + "learning_rate": 1.1356323606240321e-05, + "loss": 1.5252, + "step": 44070 + }, + { + "epoch": 0.68, + "learning_rate": 1.1346114308318315e-05, + "loss": 1.506, + "step": 44080 + }, + { + "epoch": 0.68, + "learning_rate": 1.1335908254313251e-05, + "loss": 1.4936, + "step": 44090 + }, + { + "epoch": 0.68, + "learning_rate": 1.1325705446649917e-05, + "loss": 1.4777, + "step": 44100 + }, + { + "epoch": 0.68, + "learning_rate": 1.1315505887752287e-05, + "loss": 1.5018, + "step": 44110 + }, + { + "epoch": 0.68, + "learning_rate": 1.1305309580043627e-05, + "loss": 1.4953, + "step": 44120 + }, + { + "epoch": 0.68, + "learning_rate": 1.129511652594637e-05, + "loss": 1.4971, + "step": 44130 + }, + { + "epoch": 0.68, + "learning_rate": 1.128492672788221e-05, + "loss": 1.4569, + "step": 44140 + }, + { + "epoch": 0.68, + "learning_rate": 1.1274740188272061e-05, + "loss": 1.4674, + "step": 44150 + }, + { + "epoch": 0.69, + "learning_rate": 1.1264556909536068e-05, + "loss": 1.4537, + "step": 44160 + }, + { + "epoch": 0.69, + "learning_rate": 1.1254376894093569e-05, + "loss": 1.4497, + "step": 44170 + }, + { + "epoch": 0.69, + "learning_rate": 1.1244200144363184e-05, + "loss": 1.4994, + "step": 44180 + }, + { + "epoch": 0.69, + "learning_rate": 1.1234026662762704e-05, + "loss": 1.4864, + "step": 44190 + }, + { + "epoch": 0.69, + "learning_rate": 1.122385645170917e-05, + "loss": 1.4853, + "step": 44200 + }, + { + "epoch": 0.69, + "learning_rate": 1.1213689513618842e-05, + "loss": 1.484, + "step": 44210 + }, + { + "epoch": 0.69, + "learning_rate": 1.1203525850907198e-05, + "loss": 1.5006, + "step": 44220 + }, + { + "epoch": 0.69, + "learning_rate": 1.1193365465988948e-05, + "loss": 1.4791, + "step": 44230 + }, + { + "epoch": 0.69, + "learning_rate": 1.1183208361278007e-05, + "loss": 1.4832, + "step": 44240 + }, + { + "epoch": 0.69, + "learning_rate": 1.1173054539187527e-05, + "loss": 1.463, + "step": 44250 + }, + { + "epoch": 0.69, + "learning_rate": 1.1162904002129873e-05, + "loss": 1.475, + "step": 44260 + }, + { + "epoch": 0.69, + "learning_rate": 1.1152756752516633e-05, + "loss": 1.4567, + "step": 44270 + }, + { + "epoch": 0.69, + "learning_rate": 1.114261279275859e-05, + "loss": 1.5045, + "step": 44280 + }, + { + "epoch": 0.69, + "learning_rate": 1.1132472125265794e-05, + "loss": 1.5168, + "step": 44290 + }, + { + "epoch": 0.69, + "learning_rate": 1.1122334752447463e-05, + "loss": 1.5157, + "step": 44300 + }, + { + "epoch": 0.69, + "learning_rate": 1.1112200676712064e-05, + "loss": 1.5085, + "step": 44310 + }, + { + "epoch": 0.69, + "learning_rate": 1.1102069900467269e-05, + "loss": 1.4969, + "step": 44320 + }, + { + "epoch": 0.69, + "learning_rate": 1.1091942426119975e-05, + "loss": 1.5195, + "step": 44330 + }, + { + "epoch": 0.69, + "learning_rate": 1.1081818256076263e-05, + "loss": 1.5465, + "step": 44340 + }, + { + "epoch": 0.69, + "learning_rate": 1.1071697392741484e-05, + "loss": 1.5344, + "step": 44350 + }, + { + "epoch": 0.69, + "learning_rate": 1.1061579838520153e-05, + "loss": 1.4999, + "step": 44360 + }, + { + "epoch": 0.69, + "learning_rate": 1.1051465595816025e-05, + "loss": 1.4783, + "step": 44370 + }, + { + "epoch": 0.69, + "learning_rate": 1.1041354667032056e-05, + "loss": 1.4925, + "step": 44380 + }, + { + "epoch": 0.69, + "learning_rate": 1.1031247054570427e-05, + "loss": 1.4775, + "step": 44390 + }, + { + "epoch": 0.69, + "learning_rate": 1.102114276083252e-05, + "loss": 1.4643, + "step": 44400 + }, + { + "epoch": 0.69, + "learning_rate": 1.1011041788218937e-05, + "loss": 1.4822, + "step": 44410 + }, + { + "epoch": 0.69, + "learning_rate": 1.1000944139129494e-05, + "loss": 1.4745, + "step": 44420 + }, + { + "epoch": 0.69, + "learning_rate": 1.0990849815963181e-05, + "loss": 1.4768, + "step": 44430 + }, + { + "epoch": 0.69, + "learning_rate": 1.0980758821118267e-05, + "loss": 1.4961, + "step": 44440 + }, + { + "epoch": 0.69, + "learning_rate": 1.0970671156992152e-05, + "loss": 1.506, + "step": 44450 + }, + { + "epoch": 0.69, + "learning_rate": 1.096058682598152e-05, + "loss": 1.4897, + "step": 44460 + }, + { + "epoch": 0.69, + "learning_rate": 1.0950505830482196e-05, + "loss": 1.4805, + "step": 44470 + }, + { + "epoch": 0.69, + "learning_rate": 1.0940428172889258e-05, + "loss": 1.4837, + "step": 44480 + }, + { + "epoch": 0.69, + "learning_rate": 1.0930353855596973e-05, + "loss": 1.4574, + "step": 44490 + }, + { + "epoch": 0.69, + "learning_rate": 1.0920282880998823e-05, + "loss": 1.5058, + "step": 44500 + }, + { + "epoch": 0.69, + "learning_rate": 1.0910215251487462e-05, + "loss": 1.4799, + "step": 44510 + }, + { + "epoch": 0.69, + "learning_rate": 1.090015096945482e-05, + "loss": 1.478, + "step": 44520 + }, + { + "epoch": 0.69, + "learning_rate": 1.0890090037291956e-05, + "loss": 1.4905, + "step": 44530 + }, + { + "epoch": 0.69, + "learning_rate": 1.0880032457389177e-05, + "loss": 1.4843, + "step": 44540 + }, + { + "epoch": 0.69, + "learning_rate": 1.0869978232135985e-05, + "loss": 1.5049, + "step": 44550 + }, + { + "epoch": 0.69, + "learning_rate": 1.085992736392108e-05, + "loss": 1.508, + "step": 44560 + }, + { + "epoch": 0.69, + "learning_rate": 1.0849879855132365e-05, + "loss": 1.4822, + "step": 44570 + }, + { + "epoch": 0.69, + "learning_rate": 1.083983570815695e-05, + "loss": 1.4636, + "step": 44580 + }, + { + "epoch": 0.69, + "learning_rate": 1.0829794925381151e-05, + "loss": 1.4644, + "step": 44590 + }, + { + "epoch": 0.69, + "learning_rate": 1.0819757509190448e-05, + "loss": 1.5176, + "step": 44600 + }, + { + "epoch": 0.69, + "learning_rate": 1.080972346196959e-05, + "loss": 1.5004, + "step": 44610 + }, + { + "epoch": 0.69, + "learning_rate": 1.0799692786102458e-05, + "loss": 1.4904, + "step": 44620 + }, + { + "epoch": 0.69, + "learning_rate": 1.0789665483972166e-05, + "loss": 1.4863, + "step": 44630 + }, + { + "epoch": 0.69, + "learning_rate": 1.077964155796102e-05, + "loss": 1.4809, + "step": 44640 + }, + { + "epoch": 0.69, + "learning_rate": 1.0769621010450537e-05, + "loss": 1.478, + "step": 44650 + }, + { + "epoch": 0.69, + "learning_rate": 1.0759603843821387e-05, + "loss": 1.498, + "step": 44660 + }, + { + "epoch": 0.69, + "learning_rate": 1.0749590060453504e-05, + "loss": 1.4965, + "step": 44670 + }, + { + "epoch": 0.69, + "learning_rate": 1.0739579662725951e-05, + "loss": 1.4947, + "step": 44680 + }, + { + "epoch": 0.69, + "learning_rate": 1.072957265301705e-05, + "loss": 1.4781, + "step": 44690 + }, + { + "epoch": 0.69, + "learning_rate": 1.0719569033704261e-05, + "loss": 1.4702, + "step": 44700 + }, + { + "epoch": 0.69, + "learning_rate": 1.0709568807164273e-05, + "loss": 1.4836, + "step": 44710 + }, + { + "epoch": 0.69, + "learning_rate": 1.0699571975772954e-05, + "loss": 1.4885, + "step": 44720 + }, + { + "epoch": 0.69, + "learning_rate": 1.0689578541905376e-05, + "loss": 1.5019, + "step": 44730 + }, + { + "epoch": 0.69, + "learning_rate": 1.0679588507935798e-05, + "loss": 1.4694, + "step": 44740 + }, + { + "epoch": 0.69, + "learning_rate": 1.0669601876237672e-05, + "loss": 1.5008, + "step": 44750 + }, + { + "epoch": 0.69, + "learning_rate": 1.0659618649183647e-05, + "loss": 1.4867, + "step": 44760 + }, + { + "epoch": 0.69, + "learning_rate": 1.0649638829145537e-05, + "loss": 1.4744, + "step": 44770 + }, + { + "epoch": 0.69, + "learning_rate": 1.0639662418494399e-05, + "loss": 1.4877, + "step": 44780 + }, + { + "epoch": 0.69, + "learning_rate": 1.062968941960042e-05, + "loss": 1.4988, + "step": 44790 + }, + { + "epoch": 0.7, + "learning_rate": 1.0619719834833016e-05, + "loss": 1.4964, + "step": 44800 + }, + { + "epoch": 0.7, + "learning_rate": 1.0609753666560781e-05, + "loss": 1.4822, + "step": 44810 + }, + { + "epoch": 0.7, + "learning_rate": 1.0599790917151506e-05, + "loss": 1.4774, + "step": 44820 + }, + { + "epoch": 0.7, + "learning_rate": 1.058983158897213e-05, + "loss": 1.4701, + "step": 44830 + }, + { + "epoch": 0.7, + "learning_rate": 1.0579875684388849e-05, + "loss": 1.4151, + "step": 44840 + }, + { + "epoch": 0.7, + "learning_rate": 1.056992320576698e-05, + "loss": 1.4966, + "step": 44850 + }, + { + "epoch": 0.7, + "learning_rate": 1.0559974155471059e-05, + "loss": 1.5007, + "step": 44860 + }, + { + "epoch": 0.7, + "learning_rate": 1.05500285358648e-05, + "loss": 1.494, + "step": 44870 + }, + { + "epoch": 0.7, + "learning_rate": 1.0540086349311105e-05, + "loss": 1.512, + "step": 44880 + }, + { + "epoch": 0.7, + "learning_rate": 1.0530147598172058e-05, + "loss": 1.5136, + "step": 44890 + }, + { + "epoch": 0.7, + "learning_rate": 1.0520212284808927e-05, + "loss": 1.5578, + "step": 44900 + }, + { + "epoch": 0.7, + "learning_rate": 1.0510280411582163e-05, + "loss": 1.5213, + "step": 44910 + }, + { + "epoch": 0.7, + "learning_rate": 1.05003519808514e-05, + "loss": 1.5169, + "step": 44920 + }, + { + "epoch": 0.7, + "learning_rate": 1.0490426994975464e-05, + "loss": 1.4906, + "step": 44930 + }, + { + "epoch": 0.7, + "learning_rate": 1.0480505456312323e-05, + "loss": 1.4947, + "step": 44940 + }, + { + "epoch": 0.7, + "learning_rate": 1.0470587367219195e-05, + "loss": 1.5001, + "step": 44950 + }, + { + "epoch": 0.7, + "learning_rate": 1.0460672730052406e-05, + "loss": 1.4882, + "step": 44960 + }, + { + "epoch": 0.7, + "learning_rate": 1.0450761547167512e-05, + "loss": 1.5152, + "step": 44970 + }, + { + "epoch": 0.7, + "learning_rate": 1.0440853820919228e-05, + "loss": 1.4759, + "step": 44980 + }, + { + "epoch": 0.7, + "learning_rate": 1.0430949553661457e-05, + "loss": 1.4706, + "step": 44990 + }, + { + "epoch": 0.7, + "learning_rate": 1.042104874774725e-05, + "loss": 1.4779, + "step": 45000 + }, + { + "epoch": 0.7, + "eval_loss": 1.5883851051330566, + "eval_runtime": 81.9958, + "eval_samples_per_second": 36.587, + "eval_steps_per_second": 4.573, + "step": 45000 + }, + { + "epoch": 0.7, + "learning_rate": 1.0411151405528896e-05, + "loss": 1.4955, + "step": 45010 + }, + { + "epoch": 0.7, + "learning_rate": 1.04012575293578e-05, + "loss": 1.4941, + "step": 45020 + }, + { + "epoch": 0.7, + "learning_rate": 1.0391367121584573e-05, + "loss": 1.5202, + "step": 45030 + }, + { + "epoch": 0.7, + "learning_rate": 1.0381480184558998e-05, + "loss": 1.4977, + "step": 45040 + }, + { + "epoch": 0.7, + "learning_rate": 1.0371596720630036e-05, + "loss": 1.5057, + "step": 45050 + }, + { + "epoch": 0.7, + "learning_rate": 1.0361716732145818e-05, + "loss": 1.5014, + "step": 45060 + }, + { + "epoch": 0.7, + "learning_rate": 1.0351840221453651e-05, + "loss": 1.4775, + "step": 45070 + }, + { + "epoch": 0.7, + "learning_rate": 1.0341967190900027e-05, + "loss": 1.4694, + "step": 45080 + }, + { + "epoch": 0.7, + "learning_rate": 1.0332097642830569e-05, + "loss": 1.4855, + "step": 45090 + }, + { + "epoch": 0.7, + "learning_rate": 1.0322231579590142e-05, + "loss": 1.4839, + "step": 45100 + }, + { + "epoch": 0.7, + "learning_rate": 1.031236900352271e-05, + "loss": 1.5023, + "step": 45110 + }, + { + "epoch": 0.7, + "learning_rate": 1.0302509916971473e-05, + "loss": 1.4768, + "step": 45120 + }, + { + "epoch": 0.7, + "learning_rate": 1.0292654322278753e-05, + "loss": 1.5087, + "step": 45130 + }, + { + "epoch": 0.7, + "learning_rate": 1.0282802221786066e-05, + "loss": 1.5031, + "step": 45140 + }, + { + "epoch": 0.7, + "learning_rate": 1.0272953617834095e-05, + "loss": 1.4925, + "step": 45150 + }, + { + "epoch": 0.7, + "learning_rate": 1.0263108512762698e-05, + "loss": 1.4866, + "step": 45160 + }, + { + "epoch": 0.7, + "learning_rate": 1.0253266908910868e-05, + "loss": 1.4695, + "step": 45170 + }, + { + "epoch": 0.7, + "learning_rate": 1.0243428808616831e-05, + "loss": 1.4682, + "step": 45180 + }, + { + "epoch": 0.7, + "learning_rate": 1.0233594214217912e-05, + "loss": 1.4931, + "step": 45190 + }, + { + "epoch": 0.7, + "learning_rate": 1.0223763128050645e-05, + "loss": 1.5851, + "step": 45200 + }, + { + "epoch": 0.7, + "learning_rate": 1.0213935552450715e-05, + "loss": 1.4837, + "step": 45210 + }, + { + "epoch": 0.7, + "learning_rate": 1.020411148975298e-05, + "loss": 1.4793, + "step": 45220 + }, + { + "epoch": 0.7, + "learning_rate": 1.019429094229146e-05, + "loss": 1.4906, + "step": 45230 + }, + { + "epoch": 0.7, + "learning_rate": 1.0184473912399337e-05, + "loss": 1.5011, + "step": 45240 + }, + { + "epoch": 0.7, + "learning_rate": 1.017466040240897e-05, + "loss": 1.4972, + "step": 45250 + }, + { + "epoch": 0.7, + "learning_rate": 1.0164850414651847e-05, + "loss": 1.5116, + "step": 45260 + }, + { + "epoch": 0.7, + "learning_rate": 1.0155043951458679e-05, + "loss": 1.5027, + "step": 45270 + }, + { + "epoch": 0.7, + "learning_rate": 1.0145241015159279e-05, + "loss": 1.4907, + "step": 45280 + }, + { + "epoch": 0.7, + "learning_rate": 1.0135441608082654e-05, + "loss": 1.4682, + "step": 45290 + }, + { + "epoch": 0.7, + "learning_rate": 1.0125645732556968e-05, + "loss": 1.4876, + "step": 45300 + }, + { + "epoch": 0.7, + "learning_rate": 1.0115853390909555e-05, + "loss": 1.4773, + "step": 45310 + }, + { + "epoch": 0.7, + "learning_rate": 1.010606458546687e-05, + "loss": 1.5428, + "step": 45320 + }, + { + "epoch": 0.7, + "learning_rate": 1.0096279318554591e-05, + "loss": 1.4599, + "step": 45330 + }, + { + "epoch": 0.7, + "learning_rate": 1.0086497592497488e-05, + "loss": 1.4766, + "step": 45340 + }, + { + "epoch": 0.7, + "learning_rate": 1.0076719409619558e-05, + "loss": 1.4779, + "step": 45350 + }, + { + "epoch": 0.7, + "learning_rate": 1.006694477224389e-05, + "loss": 1.4641, + "step": 45360 + }, + { + "epoch": 0.7, + "learning_rate": 1.0057173682692777e-05, + "loss": 1.4916, + "step": 45370 + }, + { + "epoch": 0.7, + "learning_rate": 1.0047406143287649e-05, + "loss": 1.5116, + "step": 45380 + }, + { + "epoch": 0.7, + "learning_rate": 1.00376421563491e-05, + "loss": 1.4928, + "step": 45390 + }, + { + "epoch": 0.7, + "learning_rate": 1.0027881724196875e-05, + "loss": 1.4831, + "step": 45400 + }, + { + "epoch": 0.7, + "learning_rate": 1.0018124849149873e-05, + "loss": 1.4732, + "step": 45410 + }, + { + "epoch": 0.7, + "learning_rate": 1.000837153352617e-05, + "loss": 1.4853, + "step": 45420 + }, + { + "epoch": 0.7, + "learning_rate": 9.998621779642944e-06, + "loss": 1.5264, + "step": 45430 + }, + { + "epoch": 0.71, + "learning_rate": 9.988875589816599e-06, + "loss": 1.5323, + "step": 45440 + }, + { + "epoch": 0.71, + "learning_rate": 9.979132966362625e-06, + "loss": 1.4899, + "step": 45450 + }, + { + "epoch": 0.71, + "learning_rate": 9.969393911595703e-06, + "loss": 1.5472, + "step": 45460 + }, + { + "epoch": 0.71, + "learning_rate": 9.959658427829658e-06, + "loss": 1.5268, + "step": 45470 + }, + { + "epoch": 0.71, + "learning_rate": 9.949926517377467e-06, + "loss": 1.5686, + "step": 45480 + }, + { + "epoch": 0.71, + "learning_rate": 9.940198182551253e-06, + "loss": 1.5553, + "step": 45490 + }, + { + "epoch": 0.71, + "learning_rate": 9.930473425662293e-06, + "loss": 1.5395, + "step": 45500 + }, + { + "epoch": 0.71, + "learning_rate": 9.920752249021022e-06, + "loss": 1.5152, + "step": 45510 + }, + { + "epoch": 0.71, + "learning_rate": 9.911034654936997e-06, + "loss": 1.576, + "step": 45520 + }, + { + "epoch": 0.71, + "learning_rate": 9.901320645718966e-06, + "loss": 1.5625, + "step": 45530 + }, + { + "epoch": 0.71, + "learning_rate": 9.891610223674789e-06, + "loss": 1.5382, + "step": 45540 + }, + { + "epoch": 0.71, + "learning_rate": 9.881903391111486e-06, + "loss": 1.5183, + "step": 45550 + }, + { + "epoch": 0.71, + "learning_rate": 9.872200150335228e-06, + "loss": 1.4914, + "step": 45560 + }, + { + "epoch": 0.71, + "learning_rate": 9.862500503651334e-06, + "loss": 1.5182, + "step": 45570 + }, + { + "epoch": 0.71, + "learning_rate": 9.852804453364259e-06, + "loss": 1.4673, + "step": 45580 + }, + { + "epoch": 0.71, + "learning_rate": 9.843112001777624e-06, + "loss": 1.4805, + "step": 45590 + }, + { + "epoch": 0.71, + "learning_rate": 9.833423151194152e-06, + "loss": 1.5006, + "step": 45600 + }, + { + "epoch": 0.71, + "learning_rate": 9.823737903915772e-06, + "loss": 1.5299, + "step": 45610 + }, + { + "epoch": 0.71, + "learning_rate": 9.8140562622435e-06, + "loss": 1.5052, + "step": 45620 + }, + { + "epoch": 0.71, + "learning_rate": 9.804378228477529e-06, + "loss": 1.4895, + "step": 45630 + }, + { + "epoch": 0.71, + "learning_rate": 9.794703804917183e-06, + "loss": 1.4627, + "step": 45640 + }, + { + "epoch": 0.71, + "learning_rate": 9.785032993860933e-06, + "loss": 1.4412, + "step": 45650 + }, + { + "epoch": 0.71, + "learning_rate": 9.775365797606387e-06, + "loss": 1.3176, + "step": 45660 + }, + { + "epoch": 0.71, + "learning_rate": 9.765702218450298e-06, + "loss": 1.4935, + "step": 45670 + }, + { + "epoch": 0.71, + "learning_rate": 9.756042258688566e-06, + "loss": 1.491, + "step": 45680 + }, + { + "epoch": 0.71, + "learning_rate": 9.746385920616196e-06, + "loss": 1.5005, + "step": 45690 + }, + { + "epoch": 0.71, + "learning_rate": 9.736733206527396e-06, + "loss": 1.5159, + "step": 45700 + }, + { + "epoch": 0.71, + "learning_rate": 9.727084118715452e-06, + "loss": 1.4647, + "step": 45710 + }, + { + "epoch": 0.71, + "learning_rate": 9.717438659472819e-06, + "loss": 1.459, + "step": 45720 + }, + { + "epoch": 0.71, + "learning_rate": 9.707796831091087e-06, + "loss": 1.468, + "step": 45730 + }, + { + "epoch": 0.71, + "learning_rate": 9.698158635860988e-06, + "loss": 1.4579, + "step": 45740 + }, + { + "epoch": 0.71, + "learning_rate": 9.688524076072355e-06, + "loss": 1.4844, + "step": 45750 + }, + { + "epoch": 0.71, + "learning_rate": 9.678893154014224e-06, + "loss": 1.4929, + "step": 45760 + }, + { + "epoch": 0.71, + "learning_rate": 9.669265871974698e-06, + "loss": 1.4861, + "step": 45770 + }, + { + "epoch": 0.71, + "learning_rate": 9.659642232241058e-06, + "loss": 1.4981, + "step": 45780 + }, + { + "epoch": 0.71, + "learning_rate": 9.650022237099707e-06, + "loss": 1.4822, + "step": 45790 + }, + { + "epoch": 0.71, + "learning_rate": 9.640405888836182e-06, + "loss": 1.4686, + "step": 45800 + }, + { + "epoch": 0.71, + "learning_rate": 9.630793189735152e-06, + "loss": 1.4509, + "step": 45810 + }, + { + "epoch": 0.71, + "learning_rate": 9.621184142080422e-06, + "loss": 1.4935, + "step": 45820 + }, + { + "epoch": 0.71, + "learning_rate": 9.611578748154928e-06, + "loss": 1.4957, + "step": 45830 + }, + { + "epoch": 0.71, + "learning_rate": 9.60197701024074e-06, + "loss": 1.5428, + "step": 45840 + }, + { + "epoch": 0.71, + "learning_rate": 9.592378930619064e-06, + "loss": 1.508, + "step": 45850 + }, + { + "epoch": 0.71, + "learning_rate": 9.582784511570206e-06, + "loss": 1.4563, + "step": 45860 + }, + { + "epoch": 0.71, + "learning_rate": 9.573193755373663e-06, + "loss": 1.4817, + "step": 45870 + }, + { + "epoch": 0.71, + "learning_rate": 9.563606664307997e-06, + "loss": 1.4697, + "step": 45880 + }, + { + "epoch": 0.71, + "learning_rate": 9.554023240650939e-06, + "loss": 1.4858, + "step": 45890 + }, + { + "epoch": 0.71, + "learning_rate": 9.544443486679333e-06, + "loss": 1.4932, + "step": 45900 + }, + { + "epoch": 0.71, + "learning_rate": 9.534867404669171e-06, + "loss": 1.4996, + "step": 45910 + }, + { + "epoch": 0.71, + "learning_rate": 9.525294996895526e-06, + "loss": 1.4784, + "step": 45920 + }, + { + "epoch": 0.71, + "learning_rate": 9.515726265632665e-06, + "loss": 1.4554, + "step": 45930 + }, + { + "epoch": 0.71, + "learning_rate": 9.506161213153924e-06, + "loss": 1.4681, + "step": 45940 + }, + { + "epoch": 0.71, + "learning_rate": 9.49659984173179e-06, + "loss": 1.3982, + "step": 45950 + }, + { + "epoch": 0.71, + "learning_rate": 9.48704215363788e-06, + "loss": 1.4924, + "step": 45960 + }, + { + "epoch": 0.71, + "learning_rate": 9.477488151142917e-06, + "loss": 1.5032, + "step": 45970 + }, + { + "epoch": 0.71, + "learning_rate": 9.467937836516768e-06, + "loss": 1.512, + "step": 45980 + }, + { + "epoch": 0.71, + "learning_rate": 9.458391212028414e-06, + "loss": 1.5217, + "step": 45990 + }, + { + "epoch": 0.71, + "learning_rate": 9.448848279945968e-06, + "loss": 1.4993, + "step": 46000 + }, + { + "epoch": 0.71, + "learning_rate": 9.439309042536633e-06, + "loss": 1.4774, + "step": 46010 + }, + { + "epoch": 0.71, + "learning_rate": 9.429773502066796e-06, + "loss": 1.4808, + "step": 46020 + }, + { + "epoch": 0.71, + "learning_rate": 9.420241660801895e-06, + "loss": 1.4738, + "step": 46030 + }, + { + "epoch": 0.71, + "learning_rate": 9.410713521006554e-06, + "loss": 1.4941, + "step": 46040 + }, + { + "epoch": 0.71, + "learning_rate": 9.401189084944464e-06, + "loss": 1.4982, + "step": 46050 + }, + { + "epoch": 0.71, + "learning_rate": 9.39166835487847e-06, + "loss": 1.4676, + "step": 46060 + }, + { + "epoch": 0.71, + "learning_rate": 9.382151333070522e-06, + "loss": 1.4917, + "step": 46070 + }, + { + "epoch": 0.71, + "learning_rate": 9.372638021781701e-06, + "loss": 1.492, + "step": 46080 + }, + { + "epoch": 0.72, + "learning_rate": 9.363128423272178e-06, + "loss": 1.4877, + "step": 46090 + }, + { + "epoch": 0.72, + "learning_rate": 9.353622539801288e-06, + "loss": 1.4719, + "step": 46100 + }, + { + "epoch": 0.72, + "learning_rate": 9.344120373627438e-06, + "loss": 1.5018, + "step": 46110 + }, + { + "epoch": 0.72, + "learning_rate": 9.33462192700818e-06, + "loss": 1.5436, + "step": 46120 + }, + { + "epoch": 0.72, + "learning_rate": 9.32512720220017e-06, + "loss": 1.5147, + "step": 46130 + }, + { + "epoch": 0.72, + "learning_rate": 9.315636201459185e-06, + "loss": 1.5054, + "step": 46140 + }, + { + "epoch": 0.72, + "learning_rate": 9.306148927040115e-06, + "loss": 1.4983, + "step": 46150 + }, + { + "epoch": 0.72, + "learning_rate": 9.296665381196967e-06, + "loss": 1.5413, + "step": 46160 + }, + { + "epoch": 0.72, + "learning_rate": 9.287185566182869e-06, + "loss": 1.5103, + "step": 46170 + }, + { + "epoch": 0.72, + "learning_rate": 9.277709484250024e-06, + "loss": 1.5451, + "step": 46180 + }, + { + "epoch": 0.72, + "learning_rate": 9.268237137649818e-06, + "loss": 1.5703, + "step": 46190 + }, + { + "epoch": 0.72, + "learning_rate": 9.25876852863268e-06, + "loss": 1.5122, + "step": 46200 + }, + { + "epoch": 0.72, + "learning_rate": 9.249303659448192e-06, + "loss": 1.5228, + "step": 46210 + }, + { + "epoch": 0.72, + "learning_rate": 9.239842532345036e-06, + "loss": 1.502, + "step": 46220 + }, + { + "epoch": 0.72, + "learning_rate": 9.23038514957101e-06, + "loss": 1.5609, + "step": 46230 + }, + { + "epoch": 0.72, + "learning_rate": 9.220931513372996e-06, + "loss": 1.4846, + "step": 46240 + }, + { + "epoch": 0.72, + "learning_rate": 9.21148162599704e-06, + "loss": 1.4585, + "step": 46250 + }, + { + "epoch": 0.72, + "learning_rate": 9.202035489688233e-06, + "loss": 1.4693, + "step": 46260 + }, + { + "epoch": 0.72, + "learning_rate": 9.192593106690837e-06, + "loss": 1.4675, + "step": 46270 + }, + { + "epoch": 0.72, + "learning_rate": 9.183154479248165e-06, + "loss": 1.4985, + "step": 46280 + }, + { + "epoch": 0.72, + "learning_rate": 9.17371960960268e-06, + "loss": 1.511, + "step": 46290 + }, + { + "epoch": 0.72, + "learning_rate": 9.164288499995929e-06, + "loss": 1.5003, + "step": 46300 + }, + { + "epoch": 0.72, + "learning_rate": 9.15486115266858e-06, + "loss": 1.4743, + "step": 46310 + }, + { + "epoch": 0.72, + "learning_rate": 9.145437569860399e-06, + "loss": 1.473, + "step": 46320 + }, + { + "epoch": 0.72, + "learning_rate": 9.136017753810255e-06, + "loss": 1.5164, + "step": 46330 + }, + { + "epoch": 0.72, + "learning_rate": 9.12660170675614e-06, + "loss": 1.2634, + "step": 46340 + }, + { + "epoch": 0.72, + "learning_rate": 9.117189430935111e-06, + "loss": 1.2658, + "step": 46350 + }, + { + "epoch": 0.72, + "learning_rate": 9.107780928583387e-06, + "loss": 1.4907, + "step": 46360 + }, + { + "epoch": 0.72, + "learning_rate": 9.098376201936231e-06, + "loss": 1.4938, + "step": 46370 + }, + { + "epoch": 0.72, + "learning_rate": 9.088975253228049e-06, + "loss": 1.4837, + "step": 46380 + }, + { + "epoch": 0.72, + "learning_rate": 9.079578084692336e-06, + "loss": 1.5032, + "step": 46390 + }, + { + "epoch": 0.72, + "learning_rate": 9.070184698561698e-06, + "loss": 1.5036, + "step": 46400 + }, + { + "epoch": 0.72, + "learning_rate": 9.060795097067807e-06, + "loss": 1.4836, + "step": 46410 + }, + { + "epoch": 0.72, + "learning_rate": 9.051409282441497e-06, + "loss": 1.4657, + "step": 46420 + }, + { + "epoch": 0.72, + "learning_rate": 9.042027256912646e-06, + "loss": 1.4568, + "step": 46430 + }, + { + "epoch": 0.72, + "learning_rate": 9.03264902271026e-06, + "loss": 1.5213, + "step": 46440 + }, + { + "epoch": 0.72, + "learning_rate": 9.023274582062438e-06, + "loss": 1.4912, + "step": 46450 + }, + { + "epoch": 0.72, + "learning_rate": 9.013903937196377e-06, + "loss": 1.4793, + "step": 46460 + }, + { + "epoch": 0.72, + "learning_rate": 9.004537090338377e-06, + "loss": 1.4794, + "step": 46470 + }, + { + "epoch": 0.72, + "learning_rate": 8.995174043713828e-06, + "loss": 1.4688, + "step": 46480 + }, + { + "epoch": 0.72, + "learning_rate": 8.985814799547224e-06, + "loss": 1.4745, + "step": 46490 + }, + { + "epoch": 0.72, + "learning_rate": 8.97645936006215e-06, + "loss": 1.4903, + "step": 46500 + }, + { + "epoch": 0.72, + "learning_rate": 8.9671077274813e-06, + "loss": 1.463, + "step": 46510 + }, + { + "epoch": 0.72, + "learning_rate": 8.957759904026427e-06, + "loss": 1.4662, + "step": 46520 + }, + { + "epoch": 0.72, + "learning_rate": 8.948415891918438e-06, + "loss": 1.4628, + "step": 46530 + }, + { + "epoch": 0.72, + "learning_rate": 8.93907569337728e-06, + "loss": 1.511, + "step": 46540 + }, + { + "epoch": 0.72, + "learning_rate": 8.92973931062202e-06, + "loss": 1.4924, + "step": 46550 + }, + { + "epoch": 0.72, + "learning_rate": 8.920406745870816e-06, + "loss": 1.4794, + "step": 46560 + }, + { + "epoch": 0.72, + "learning_rate": 8.911078001340924e-06, + "loss": 1.4765, + "step": 46570 + }, + { + "epoch": 0.72, + "learning_rate": 8.901753079248665e-06, + "loss": 1.4915, + "step": 46580 + }, + { + "epoch": 0.72, + "learning_rate": 8.892431981809499e-06, + "loss": 1.4789, + "step": 46590 + }, + { + "epoch": 0.72, + "learning_rate": 8.883114711237931e-06, + "loss": 1.4648, + "step": 46600 + }, + { + "epoch": 0.72, + "learning_rate": 8.87380126974758e-06, + "loss": 1.4777, + "step": 46610 + }, + { + "epoch": 0.72, + "learning_rate": 8.864491659551157e-06, + "loss": 1.4521, + "step": 46620 + }, + { + "epoch": 0.72, + "learning_rate": 8.855185882860454e-06, + "loss": 1.4541, + "step": 46630 + }, + { + "epoch": 0.72, + "learning_rate": 8.845883941886358e-06, + "loss": 1.4562, + "step": 46640 + }, + { + "epoch": 0.72, + "learning_rate": 8.836585838838837e-06, + "loss": 1.4695, + "step": 46650 + }, + { + "epoch": 0.72, + "learning_rate": 8.827291575926966e-06, + "loss": 1.478, + "step": 46660 + }, + { + "epoch": 0.72, + "learning_rate": 8.818001155358869e-06, + "loss": 1.4659, + "step": 46670 + }, + { + "epoch": 0.72, + "learning_rate": 8.808714579341814e-06, + "loss": 1.4782, + "step": 46680 + }, + { + "epoch": 0.72, + "learning_rate": 8.799431850082099e-06, + "loss": 1.4849, + "step": 46690 + }, + { + "epoch": 0.72, + "learning_rate": 8.790152969785143e-06, + "loss": 1.4688, + "step": 46700 + }, + { + "epoch": 0.72, + "learning_rate": 8.780877940655441e-06, + "loss": 1.4743, + "step": 46710 + }, + { + "epoch": 0.72, + "learning_rate": 8.771606764896568e-06, + "loss": 1.4887, + "step": 46720 + }, + { + "epoch": 0.73, + "learning_rate": 8.762339444711192e-06, + "loss": 1.4866, + "step": 46730 + }, + { + "epoch": 0.73, + "learning_rate": 8.75307598230107e-06, + "loss": 1.4962, + "step": 46740 + }, + { + "epoch": 0.73, + "learning_rate": 8.743816379867007e-06, + "loss": 1.5155, + "step": 46750 + }, + { + "epoch": 0.73, + "learning_rate": 8.73456063960895e-06, + "loss": 1.5367, + "step": 46760 + }, + { + "epoch": 0.73, + "learning_rate": 8.72530876372587e-06, + "loss": 1.507, + "step": 46770 + }, + { + "epoch": 0.73, + "learning_rate": 8.716060754415861e-06, + "loss": 1.496, + "step": 46780 + }, + { + "epoch": 0.73, + "learning_rate": 8.706816613876076e-06, + "loss": 1.4854, + "step": 46790 + }, + { + "epoch": 0.73, + "learning_rate": 8.697576344302758e-06, + "loss": 1.5139, + "step": 46800 + }, + { + "epoch": 0.73, + "learning_rate": 8.688339947891228e-06, + "loss": 1.4783, + "step": 46810 + }, + { + "epoch": 0.73, + "learning_rate": 8.67910742683589e-06, + "loss": 1.488, + "step": 46820 + }, + { + "epoch": 0.73, + "learning_rate": 8.66987878333023e-06, + "loss": 1.4473, + "step": 46830 + }, + { + "epoch": 0.73, + "learning_rate": 8.66065401956678e-06, + "loss": 1.4943, + "step": 46840 + }, + { + "epoch": 0.73, + "learning_rate": 8.651433137737217e-06, + "loss": 1.4954, + "step": 46850 + }, + { + "epoch": 0.73, + "learning_rate": 8.642216140032228e-06, + "loss": 1.4675, + "step": 46860 + }, + { + "epoch": 0.73, + "learning_rate": 8.633003028641611e-06, + "loss": 1.4741, + "step": 46870 + }, + { + "epoch": 0.73, + "learning_rate": 8.623793805754241e-06, + "loss": 1.4591, + "step": 46880 + }, + { + "epoch": 0.73, + "learning_rate": 8.614588473558066e-06, + "loss": 1.4226, + "step": 46890 + }, + { + "epoch": 0.73, + "learning_rate": 8.605387034240087e-06, + "loss": 1.485, + "step": 46900 + }, + { + "epoch": 0.73, + "learning_rate": 8.596189489986428e-06, + "loss": 1.4972, + "step": 46910 + }, + { + "epoch": 0.73, + "learning_rate": 8.586995842982236e-06, + "loss": 1.487, + "step": 46920 + }, + { + "epoch": 0.73, + "learning_rate": 8.577806095411767e-06, + "loss": 1.4927, + "step": 46930 + }, + { + "epoch": 0.73, + "learning_rate": 8.568620249458336e-06, + "loss": 1.508, + "step": 46940 + }, + { + "epoch": 0.73, + "learning_rate": 8.559438307304338e-06, + "loss": 1.4723, + "step": 46950 + }, + { + "epoch": 0.73, + "learning_rate": 8.550260271131236e-06, + "loss": 1.4673, + "step": 46960 + }, + { + "epoch": 0.73, + "learning_rate": 8.541086143119561e-06, + "loss": 1.4407, + "step": 46970 + }, + { + "epoch": 0.73, + "learning_rate": 8.531915925448925e-06, + "loss": 1.4962, + "step": 46980 + }, + { + "epoch": 0.73, + "learning_rate": 8.522749620298004e-06, + "loss": 1.4612, + "step": 46990 + }, + { + "epoch": 0.73, + "learning_rate": 8.513587229844556e-06, + "loss": 1.4714, + "step": 47000 + }, + { + "epoch": 0.73, + "learning_rate": 8.504428756265375e-06, + "loss": 1.4823, + "step": 47010 + }, + { + "epoch": 0.73, + "learning_rate": 8.495274201736384e-06, + "loss": 1.4978, + "step": 47020 + }, + { + "epoch": 0.73, + "learning_rate": 8.486123568432511e-06, + "loss": 1.4868, + "step": 47030 + }, + { + "epoch": 0.73, + "learning_rate": 8.476976858527794e-06, + "loss": 1.4599, + "step": 47040 + }, + { + "epoch": 0.73, + "learning_rate": 8.467834074195321e-06, + "loss": 1.4624, + "step": 47050 + }, + { + "epoch": 0.73, + "learning_rate": 8.458695217607267e-06, + "loss": 1.4739, + "step": 47060 + }, + { + "epoch": 0.73, + "learning_rate": 8.449560290934833e-06, + "loss": 1.4979, + "step": 47070 + }, + { + "epoch": 0.73, + "learning_rate": 8.440429296348343e-06, + "loss": 1.4847, + "step": 47080 + }, + { + "epoch": 0.73, + "learning_rate": 8.431302236017132e-06, + "loss": 1.4802, + "step": 47090 + }, + { + "epoch": 0.73, + "learning_rate": 8.422179112109635e-06, + "loss": 1.4948, + "step": 47100 + }, + { + "epoch": 0.73, + "learning_rate": 8.413059926793343e-06, + "loss": 1.5017, + "step": 47110 + }, + { + "epoch": 0.73, + "learning_rate": 8.403944682234808e-06, + "loss": 1.4524, + "step": 47120 + }, + { + "epoch": 0.73, + "learning_rate": 8.394833380599649e-06, + "loss": 1.441, + "step": 47130 + }, + { + "epoch": 0.73, + "learning_rate": 8.385726024052542e-06, + "loss": 1.4815, + "step": 47140 + }, + { + "epoch": 0.73, + "learning_rate": 8.376622614757245e-06, + "loss": 1.4987, + "step": 47150 + }, + { + "epoch": 0.73, + "learning_rate": 8.36752315487654e-06, + "loss": 1.4666, + "step": 47160 + }, + { + "epoch": 0.73, + "learning_rate": 8.35842764657232e-06, + "loss": 1.4762, + "step": 47170 + }, + { + "epoch": 0.73, + "learning_rate": 8.349336092005488e-06, + "loss": 1.4914, + "step": 47180 + }, + { + "epoch": 0.73, + "learning_rate": 8.340248493336061e-06, + "loss": 1.4832, + "step": 47190 + }, + { + "epoch": 0.73, + "learning_rate": 8.331164852723066e-06, + "loss": 1.4713, + "step": 47200 + }, + { + "epoch": 0.73, + "learning_rate": 8.322085172324623e-06, + "loss": 1.455, + "step": 47210 + }, + { + "epoch": 0.73, + "learning_rate": 8.3130094542979e-06, + "loss": 1.4844, + "step": 47220 + }, + { + "epoch": 0.73, + "learning_rate": 8.303937700799124e-06, + "loss": 1.4817, + "step": 47230 + }, + { + "epoch": 0.73, + "learning_rate": 8.294869913983564e-06, + "loss": 1.4883, + "step": 47240 + }, + { + "epoch": 0.73, + "learning_rate": 8.285806096005593e-06, + "loss": 1.4827, + "step": 47250 + }, + { + "epoch": 0.73, + "learning_rate": 8.276746249018585e-06, + "loss": 1.5028, + "step": 47260 + }, + { + "epoch": 0.73, + "learning_rate": 8.267690375175007e-06, + "loss": 1.5385, + "step": 47270 + }, + { + "epoch": 0.73, + "learning_rate": 8.258638476626368e-06, + "loss": 1.4914, + "step": 47280 + }, + { + "epoch": 0.73, + "learning_rate": 8.249590555523237e-06, + "loss": 1.4922, + "step": 47290 + }, + { + "epoch": 0.73, + "learning_rate": 8.24054661401524e-06, + "loss": 1.5184, + "step": 47300 + }, + { + "epoch": 0.73, + "learning_rate": 8.231506654251048e-06, + "loss": 1.477, + "step": 47310 + }, + { + "epoch": 0.73, + "learning_rate": 8.222470678378407e-06, + "loss": 1.4718, + "step": 47320 + }, + { + "epoch": 0.73, + "learning_rate": 8.213438688544073e-06, + "loss": 1.4732, + "step": 47330 + }, + { + "epoch": 0.73, + "learning_rate": 8.204410686893918e-06, + "loss": 1.4546, + "step": 47340 + }, + { + "epoch": 0.73, + "learning_rate": 8.195386675572809e-06, + "loss": 1.4826, + "step": 47350 + }, + { + "epoch": 0.73, + "learning_rate": 8.186366656724692e-06, + "loss": 1.472, + "step": 47360 + }, + { + "epoch": 0.73, + "learning_rate": 8.177350632492567e-06, + "loss": 1.4703, + "step": 47370 + }, + { + "epoch": 0.74, + "learning_rate": 8.168338605018483e-06, + "loss": 1.4656, + "step": 47380 + }, + { + "epoch": 0.74, + "learning_rate": 8.15933057644351e-06, + "loss": 1.4873, + "step": 47390 + }, + { + "epoch": 0.74, + "learning_rate": 8.15032654890783e-06, + "loss": 1.4679, + "step": 47400 + }, + { + "epoch": 0.74, + "learning_rate": 8.1413265245506e-06, + "loss": 1.5016, + "step": 47410 + }, + { + "epoch": 0.74, + "learning_rate": 8.1323305055101e-06, + "loss": 1.4804, + "step": 47420 + }, + { + "epoch": 0.74, + "learning_rate": 8.123338493923593e-06, + "loss": 1.4869, + "step": 47430 + }, + { + "epoch": 0.74, + "learning_rate": 8.114350491927433e-06, + "loss": 1.4836, + "step": 47440 + }, + { + "epoch": 0.74, + "learning_rate": 8.105366501657002e-06, + "loss": 1.4748, + "step": 47450 + }, + { + "epoch": 0.74, + "learning_rate": 8.096386525246738e-06, + "loss": 1.4752, + "step": 47460 + }, + { + "epoch": 0.74, + "learning_rate": 8.087410564830116e-06, + "loss": 1.4715, + "step": 47470 + }, + { + "epoch": 0.74, + "learning_rate": 8.078438622539666e-06, + "loss": 1.5017, + "step": 47480 + }, + { + "epoch": 0.74, + "learning_rate": 8.06947070050697e-06, + "loss": 1.4867, + "step": 47490 + }, + { + "epoch": 0.74, + "learning_rate": 8.060506800862613e-06, + "loss": 1.4781, + "step": 47500 + }, + { + "epoch": 0.74, + "learning_rate": 8.051546925736298e-06, + "loss": 1.4666, + "step": 47510 + }, + { + "epoch": 0.74, + "learning_rate": 8.042591077256695e-06, + "loss": 1.4919, + "step": 47520 + }, + { + "epoch": 0.74, + "learning_rate": 8.033639257551568e-06, + "loss": 1.475, + "step": 47530 + }, + { + "epoch": 0.74, + "learning_rate": 8.024691468747706e-06, + "loss": 1.4831, + "step": 47540 + }, + { + "epoch": 0.74, + "learning_rate": 8.015747712970948e-06, + "loss": 1.4788, + "step": 47550 + }, + { + "epoch": 0.74, + "learning_rate": 8.006807992346144e-06, + "loss": 1.4679, + "step": 47560 + }, + { + "epoch": 0.74, + "learning_rate": 7.997872308997248e-06, + "loss": 1.4902, + "step": 47570 + }, + { + "epoch": 0.74, + "learning_rate": 7.988940665047187e-06, + "loss": 1.4785, + "step": 47580 + }, + { + "epoch": 0.74, + "learning_rate": 7.980013062617972e-06, + "loss": 1.4556, + "step": 47590 + }, + { + "epoch": 0.74, + "learning_rate": 7.971089503830637e-06, + "loss": 1.4901, + "step": 47600 + }, + { + "epoch": 0.74, + "learning_rate": 7.96216999080526e-06, + "loss": 1.4582, + "step": 47610 + }, + { + "epoch": 0.74, + "learning_rate": 7.953254525660952e-06, + "loss": 1.4817, + "step": 47620 + }, + { + "epoch": 0.74, + "learning_rate": 7.944343110515875e-06, + "loss": 1.4782, + "step": 47630 + }, + { + "epoch": 0.74, + "learning_rate": 7.935435747487214e-06, + "loss": 1.4891, + "step": 47640 + }, + { + "epoch": 0.74, + "learning_rate": 7.9265324386912e-06, + "loss": 1.4592, + "step": 47650 + }, + { + "epoch": 0.74, + "learning_rate": 7.917633186243106e-06, + "loss": 1.4818, + "step": 47660 + }, + { + "epoch": 0.74, + "learning_rate": 7.908737992257209e-06, + "loss": 1.5071, + "step": 47670 + }, + { + "epoch": 0.74, + "learning_rate": 7.89984685884688e-06, + "loss": 1.4964, + "step": 47680 + }, + { + "epoch": 0.74, + "learning_rate": 7.89095978812447e-06, + "loss": 1.4745, + "step": 47690 + }, + { + "epoch": 0.74, + "learning_rate": 7.882076782201389e-06, + "loss": 1.4741, + "step": 47700 + }, + { + "epoch": 0.74, + "learning_rate": 7.873197843188085e-06, + "loss": 1.4745, + "step": 47710 + }, + { + "epoch": 0.74, + "learning_rate": 7.86432297319403e-06, + "loss": 1.454, + "step": 47720 + }, + { + "epoch": 0.74, + "learning_rate": 7.855452174327732e-06, + "loss": 1.4666, + "step": 47730 + }, + { + "epoch": 0.74, + "learning_rate": 7.846585448696738e-06, + "loss": 1.4895, + "step": 47740 + }, + { + "epoch": 0.74, + "learning_rate": 7.837722798407626e-06, + "loss": 1.456, + "step": 47750 + }, + { + "epoch": 0.74, + "learning_rate": 7.828864225565977e-06, + "loss": 1.4764, + "step": 47760 + }, + { + "epoch": 0.74, + "learning_rate": 7.820009732276465e-06, + "loss": 1.4636, + "step": 47770 + }, + { + "epoch": 0.74, + "learning_rate": 7.811159320642728e-06, + "loss": 1.4285, + "step": 47780 + }, + { + "epoch": 0.74, + "learning_rate": 7.802312992767477e-06, + "loss": 1.5169, + "step": 47790 + }, + { + "epoch": 0.74, + "learning_rate": 7.793470750752435e-06, + "loss": 1.4799, + "step": 47800 + }, + { + "epoch": 0.74, + "learning_rate": 7.784632596698374e-06, + "loss": 1.5157, + "step": 47810 + }, + { + "epoch": 0.74, + "learning_rate": 7.775798532705047e-06, + "loss": 1.4954, + "step": 47820 + }, + { + "epoch": 0.74, + "learning_rate": 7.76696856087131e-06, + "loss": 1.5439, + "step": 47830 + }, + { + "epoch": 0.74, + "learning_rate": 7.758142683294974e-06, + "loss": 1.5205, + "step": 47840 + }, + { + "epoch": 0.74, + "learning_rate": 7.749320902072918e-06, + "loss": 1.514, + "step": 47850 + }, + { + "epoch": 0.74, + "learning_rate": 7.74050321930104e-06, + "loss": 1.4671, + "step": 47860 + }, + { + "epoch": 0.74, + "learning_rate": 7.73168963707426e-06, + "loss": 1.4976, + "step": 47870 + }, + { + "epoch": 0.74, + "learning_rate": 7.722880157486529e-06, + "loss": 1.4714, + "step": 47880 + }, + { + "epoch": 0.74, + "learning_rate": 7.714074782630818e-06, + "loss": 1.4916, + "step": 47890 + }, + { + "epoch": 0.74, + "learning_rate": 7.705273514599126e-06, + "loss": 1.489, + "step": 47900 + }, + { + "epoch": 0.74, + "learning_rate": 7.696476355482473e-06, + "loss": 1.4979, + "step": 47910 + }, + { + "epoch": 0.74, + "learning_rate": 7.687683307370919e-06, + "loss": 1.4943, + "step": 47920 + }, + { + "epoch": 0.74, + "learning_rate": 7.678894372353504e-06, + "loss": 1.4786, + "step": 47930 + }, + { + "epoch": 0.74, + "learning_rate": 7.670109552518356e-06, + "loss": 1.46, + "step": 47940 + }, + { + "epoch": 0.74, + "learning_rate": 7.661328849952565e-06, + "loss": 1.4619, + "step": 47950 + }, + { + "epoch": 0.74, + "learning_rate": 7.652552266742272e-06, + "loss": 1.5009, + "step": 47960 + }, + { + "epoch": 0.74, + "learning_rate": 7.643779804972642e-06, + "loss": 1.4769, + "step": 47970 + }, + { + "epoch": 0.74, + "learning_rate": 7.635011466727853e-06, + "loss": 1.4845, + "step": 47980 + }, + { + "epoch": 0.74, + "learning_rate": 7.626247254091087e-06, + "loss": 1.4777, + "step": 47990 + }, + { + "epoch": 0.74, + "learning_rate": 7.6174871691445935e-06, + "loss": 1.4806, + "step": 48000 + }, + { + "epoch": 0.74, + "learning_rate": 7.608731213969586e-06, + "loss": 1.4884, + "step": 48010 + }, + { + "epoch": 0.75, + "learning_rate": 7.599979390646325e-06, + "loss": 1.4958, + "step": 48020 + }, + { + "epoch": 0.75, + "learning_rate": 7.591231701254092e-06, + "loss": 1.4857, + "step": 48030 + }, + { + "epoch": 0.75, + "learning_rate": 7.58248814787118e-06, + "loss": 1.4582, + "step": 48040 + }, + { + "epoch": 0.75, + "learning_rate": 7.573748732574895e-06, + "loss": 1.4682, + "step": 48050 + }, + { + "epoch": 0.75, + "learning_rate": 7.56501345744157e-06, + "loss": 1.4969, + "step": 48060 + }, + { + "epoch": 0.75, + "learning_rate": 7.556282324546551e-06, + "loss": 1.4768, + "step": 48070 + }, + { + "epoch": 0.75, + "learning_rate": 7.547555335964179e-06, + "loss": 1.482, + "step": 48080 + }, + { + "epoch": 0.75, + "learning_rate": 7.5388324937678554e-06, + "loss": 1.4827, + "step": 48090 + }, + { + "epoch": 0.75, + "learning_rate": 7.5301138000299445e-06, + "loss": 1.4932, + "step": 48100 + }, + { + "epoch": 0.75, + "learning_rate": 7.521399256821879e-06, + "loss": 1.4832, + "step": 48110 + }, + { + "epoch": 0.75, + "learning_rate": 7.512688866214051e-06, + "loss": 1.4903, + "step": 48120 + }, + { + "epoch": 0.75, + "learning_rate": 7.503982630275904e-06, + "loss": 1.496, + "step": 48130 + }, + { + "epoch": 0.75, + "learning_rate": 7.4952805510758835e-06, + "loss": 1.4717, + "step": 48140 + }, + { + "epoch": 0.75, + "learning_rate": 7.486582630681452e-06, + "loss": 1.4708, + "step": 48150 + }, + { + "epoch": 0.75, + "learning_rate": 7.477888871159056e-06, + "loss": 1.614, + "step": 48160 + }, + { + "epoch": 0.75, + "learning_rate": 7.469199274574204e-06, + "loss": 1.4598, + "step": 48170 + }, + { + "epoch": 0.75, + "learning_rate": 7.460513842991368e-06, + "loss": 1.4795, + "step": 48180 + }, + { + "epoch": 0.75, + "learning_rate": 7.451832578474058e-06, + "loss": 1.5045, + "step": 48190 + }, + { + "epoch": 0.75, + "learning_rate": 7.443155483084782e-06, + "loss": 1.4814, + "step": 48200 + }, + { + "epoch": 0.75, + "learning_rate": 7.434482558885064e-06, + "loss": 1.4969, + "step": 48210 + }, + { + "epoch": 0.75, + "learning_rate": 7.4258138079354335e-06, + "loss": 1.477, + "step": 48220 + }, + { + "epoch": 0.75, + "learning_rate": 7.4171492322954285e-06, + "loss": 1.4691, + "step": 48230 + }, + { + "epoch": 0.75, + "learning_rate": 7.408488834023605e-06, + "loss": 1.4571, + "step": 48240 + }, + { + "epoch": 0.75, + "learning_rate": 7.399832615177496e-06, + "loss": 1.5241, + "step": 48250 + }, + { + "epoch": 0.75, + "learning_rate": 7.391180577813692e-06, + "loss": 1.4803, + "step": 48260 + }, + { + "epoch": 0.75, + "learning_rate": 7.382532723987736e-06, + "loss": 1.4696, + "step": 48270 + }, + { + "epoch": 0.75, + "learning_rate": 7.373889055754213e-06, + "loss": 1.4719, + "step": 48280 + }, + { + "epoch": 0.75, + "learning_rate": 7.3652495751667e-06, + "loss": 1.4654, + "step": 48290 + }, + { + "epoch": 0.75, + "learning_rate": 7.356614284277791e-06, + "loss": 1.4712, + "step": 48300 + }, + { + "epoch": 0.75, + "learning_rate": 7.347983185139048e-06, + "loss": 1.4928, + "step": 48310 + }, + { + "epoch": 0.75, + "learning_rate": 7.339356279801104e-06, + "loss": 1.4756, + "step": 48320 + }, + { + "epoch": 0.75, + "learning_rate": 7.3307335703135195e-06, + "loss": 1.4933, + "step": 48330 + }, + { + "epoch": 0.75, + "learning_rate": 7.322115058724926e-06, + "loss": 1.5256, + "step": 48340 + }, + { + "epoch": 0.75, + "learning_rate": 7.313500747082902e-06, + "loss": 1.5002, + "step": 48350 + }, + { + "epoch": 0.75, + "learning_rate": 7.3048906374340645e-06, + "loss": 1.4994, + "step": 48360 + }, + { + "epoch": 0.75, + "learning_rate": 7.296284731824015e-06, + "loss": 1.5227, + "step": 48370 + }, + { + "epoch": 0.75, + "learning_rate": 7.287683032297369e-06, + "loss": 1.5546, + "step": 48380 + }, + { + "epoch": 0.75, + "learning_rate": 7.279085540897726e-06, + "loss": 1.537, + "step": 48390 + }, + { + "epoch": 0.75, + "learning_rate": 7.270492259667699e-06, + "loss": 1.5325, + "step": 48400 + }, + { + "epoch": 0.75, + "learning_rate": 7.261903190648906e-06, + "loss": 1.5425, + "step": 48410 + }, + { + "epoch": 0.75, + "learning_rate": 7.253318335881931e-06, + "loss": 1.5262, + "step": 48420 + }, + { + "epoch": 0.75, + "learning_rate": 7.244737697406409e-06, + "loss": 1.5214, + "step": 48430 + }, + { + "epoch": 0.75, + "learning_rate": 7.2361612772609246e-06, + "loss": 1.5081, + "step": 48440 + }, + { + "epoch": 0.75, + "learning_rate": 7.227589077483085e-06, + "loss": 1.5809, + "step": 48450 + }, + { + "epoch": 0.75, + "learning_rate": 7.219021100109494e-06, + "loss": 1.4774, + "step": 48460 + }, + { + "epoch": 0.75, + "learning_rate": 7.210457347175756e-06, + "loss": 1.4893, + "step": 48470 + }, + { + "epoch": 0.75, + "learning_rate": 7.2018978207164355e-06, + "loss": 1.5304, + "step": 48480 + }, + { + "epoch": 0.75, + "learning_rate": 7.193342522765159e-06, + "loss": 1.5455, + "step": 48490 + }, + { + "epoch": 0.75, + "learning_rate": 7.184791455354486e-06, + "loss": 1.5436, + "step": 48500 + }, + { + "epoch": 0.75, + "learning_rate": 7.176244620516001e-06, + "loss": 1.5235, + "step": 48510 + }, + { + "epoch": 0.75, + "learning_rate": 7.167702020280282e-06, + "loss": 1.4938, + "step": 48520 + }, + { + "epoch": 0.75, + "learning_rate": 7.159163656676893e-06, + "loss": 1.4499, + "step": 48530 + }, + { + "epoch": 0.75, + "learning_rate": 7.150629531734398e-06, + "loss": 1.4967, + "step": 48540 + }, + { + "epoch": 0.75, + "learning_rate": 7.142099647480349e-06, + "loss": 1.4689, + "step": 48550 + }, + { + "epoch": 0.75, + "learning_rate": 7.133574005941293e-06, + "loss": 1.4718, + "step": 48560 + }, + { + "epoch": 0.75, + "learning_rate": 7.125052609142768e-06, + "loss": 1.4661, + "step": 48570 + }, + { + "epoch": 0.75, + "learning_rate": 7.116535459109313e-06, + "loss": 1.492, + "step": 48580 + }, + { + "epoch": 0.75, + "learning_rate": 7.1080225578644276e-06, + "loss": 1.499, + "step": 48590 + }, + { + "epoch": 0.75, + "learning_rate": 7.099513907430652e-06, + "loss": 1.4844, + "step": 48600 + }, + { + "epoch": 0.75, + "learning_rate": 7.091009509829466e-06, + "loss": 1.4861, + "step": 48610 + }, + { + "epoch": 0.75, + "learning_rate": 7.0825093670813695e-06, + "loss": 1.4741, + "step": 48620 + }, + { + "epoch": 0.75, + "learning_rate": 7.0740134812058405e-06, + "loss": 1.2331, + "step": 48630 + }, + { + "epoch": 0.75, + "learning_rate": 7.065521854221358e-06, + "loss": 1.3972, + "step": 48640 + }, + { + "epoch": 0.75, + "learning_rate": 7.057034488145356e-06, + "loss": 1.4636, + "step": 48650 + }, + { + "epoch": 0.75, + "learning_rate": 7.0485513849943095e-06, + "loss": 1.4798, + "step": 48660 + }, + { + "epoch": 0.76, + "learning_rate": 7.04007254678363e-06, + "loss": 1.4766, + "step": 48670 + }, + { + "epoch": 0.76, + "learning_rate": 7.03159797552774e-06, + "loss": 1.484, + "step": 48680 + }, + { + "epoch": 0.76, + "learning_rate": 7.023127673240049e-06, + "loss": 1.5094, + "step": 48690 + }, + { + "epoch": 0.76, + "learning_rate": 7.014661641932946e-06, + "loss": 1.4542, + "step": 48700 + }, + { + "epoch": 0.76, + "learning_rate": 7.0061998836178085e-06, + "loss": 1.4556, + "step": 48710 + }, + { + "epoch": 0.76, + "learning_rate": 6.997742400304997e-06, + "loss": 1.4898, + "step": 48720 + }, + { + "epoch": 0.76, + "learning_rate": 6.989289194003865e-06, + "loss": 1.4871, + "step": 48730 + }, + { + "epoch": 0.76, + "learning_rate": 6.980840266722716e-06, + "loss": 1.4616, + "step": 48740 + }, + { + "epoch": 0.76, + "learning_rate": 6.972395620468896e-06, + "loss": 1.4628, + "step": 48750 + }, + { + "epoch": 0.76, + "learning_rate": 6.963955257248677e-06, + "loss": 1.4765, + "step": 48760 + }, + { + "epoch": 0.76, + "learning_rate": 6.955519179067349e-06, + "loss": 1.476, + "step": 48770 + }, + { + "epoch": 0.76, + "learning_rate": 6.947087387929163e-06, + "loss": 1.4587, + "step": 48780 + }, + { + "epoch": 0.76, + "learning_rate": 6.93865988583737e-06, + "loss": 1.4651, + "step": 48790 + }, + { + "epoch": 0.76, + "learning_rate": 6.930236674794186e-06, + "loss": 1.4909, + "step": 48800 + }, + { + "epoch": 0.76, + "learning_rate": 6.921817756800822e-06, + "loss": 1.4651, + "step": 48810 + }, + { + "epoch": 0.76, + "learning_rate": 6.9134031338574404e-06, + "loss": 1.4697, + "step": 48820 + }, + { + "epoch": 0.76, + "learning_rate": 6.904992807963237e-06, + "loss": 1.449, + "step": 48830 + }, + { + "epoch": 0.76, + "learning_rate": 6.8965867811163234e-06, + "loss": 1.4761, + "step": 48840 + }, + { + "epoch": 0.76, + "learning_rate": 6.888185055313834e-06, + "loss": 1.4746, + "step": 48850 + }, + { + "epoch": 0.76, + "learning_rate": 6.879787632551862e-06, + "loss": 1.4634, + "step": 48860 + }, + { + "epoch": 0.76, + "learning_rate": 6.871394514825488e-06, + "loss": 1.4616, + "step": 48870 + }, + { + "epoch": 0.76, + "learning_rate": 6.8630057041287665e-06, + "loss": 1.4629, + "step": 48880 + }, + { + "epoch": 0.76, + "learning_rate": 6.854621202454722e-06, + "loss": 1.476, + "step": 48890 + }, + { + "epoch": 0.76, + "learning_rate": 6.84624101179537e-06, + "loss": 1.463, + "step": 48900 + }, + { + "epoch": 0.76, + "learning_rate": 6.837865134141674e-06, + "loss": 1.4489, + "step": 48910 + }, + { + "epoch": 0.76, + "learning_rate": 6.8294935714836195e-06, + "loss": 1.4666, + "step": 48920 + }, + { + "epoch": 0.76, + "learning_rate": 6.821126325810112e-06, + "loss": 1.4869, + "step": 48930 + }, + { + "epoch": 0.76, + "learning_rate": 6.812763399109073e-06, + "loss": 1.4637, + "step": 48940 + }, + { + "epoch": 0.76, + "learning_rate": 6.804404793367378e-06, + "loss": 1.476, + "step": 48950 + }, + { + "epoch": 0.76, + "learning_rate": 6.796050510570887e-06, + "loss": 1.4861, + "step": 48960 + }, + { + "epoch": 0.76, + "learning_rate": 6.787700552704409e-06, + "loss": 1.4865, + "step": 48970 + }, + { + "epoch": 0.76, + "learning_rate": 6.779354921751774e-06, + "loss": 1.4967, + "step": 48980 + }, + { + "epoch": 0.76, + "learning_rate": 6.771013619695726e-06, + "loss": 1.511, + "step": 48990 + }, + { + "epoch": 0.76, + "learning_rate": 6.762676648518018e-06, + "loss": 1.5213, + "step": 49000 + }, + { + "epoch": 0.76, + "learning_rate": 6.754344010199362e-06, + "loss": 1.4955, + "step": 49010 + }, + { + "epoch": 0.76, + "learning_rate": 6.746015706719447e-06, + "loss": 1.4646, + "step": 49020 + }, + { + "epoch": 0.76, + "learning_rate": 6.737691740056923e-06, + "loss": 1.4994, + "step": 49030 + }, + { + "epoch": 0.76, + "learning_rate": 6.729372112189417e-06, + "loss": 1.4643, + "step": 49040 + }, + { + "epoch": 0.76, + "learning_rate": 6.721056825093522e-06, + "loss": 1.4806, + "step": 49050 + }, + { + "epoch": 0.76, + "learning_rate": 6.7127458807448e-06, + "loss": 1.4764, + "step": 49060 + }, + { + "epoch": 0.76, + "learning_rate": 6.704439281117789e-06, + "loss": 1.4937, + "step": 49070 + }, + { + "epoch": 0.76, + "learning_rate": 6.696137028185965e-06, + "loss": 1.4644, + "step": 49080 + }, + { + "epoch": 0.76, + "learning_rate": 6.687839123921821e-06, + "loss": 1.4555, + "step": 49090 + }, + { + "epoch": 0.76, + "learning_rate": 6.679545570296772e-06, + "loss": 1.4319, + "step": 49100 + }, + { + "epoch": 0.76, + "learning_rate": 6.6712563692812204e-06, + "loss": 1.4405, + "step": 49110 + }, + { + "epoch": 0.76, + "learning_rate": 6.66297152284453e-06, + "loss": 1.4739, + "step": 49120 + }, + { + "epoch": 0.76, + "learning_rate": 6.65469103295504e-06, + "loss": 1.4516, + "step": 49130 + }, + { + "epoch": 0.76, + "learning_rate": 6.6464149015800255e-06, + "loss": 1.4757, + "step": 49140 + }, + { + "epoch": 0.76, + "learning_rate": 6.638143130685773e-06, + "loss": 1.4993, + "step": 49150 + }, + { + "epoch": 0.76, + "learning_rate": 6.629875722237483e-06, + "loss": 1.4639, + "step": 49160 + }, + { + "epoch": 0.76, + "learning_rate": 6.621612678199354e-06, + "loss": 1.4662, + "step": 49170 + }, + { + "epoch": 0.76, + "learning_rate": 6.61335400053453e-06, + "loss": 1.4645, + "step": 49180 + }, + { + "epoch": 0.76, + "learning_rate": 6.60509969120513e-06, + "loss": 1.4652, + "step": 49190 + }, + { + "epoch": 0.76, + "learning_rate": 6.596849752172224e-06, + "loss": 1.4663, + "step": 49200 + }, + { + "epoch": 0.76, + "learning_rate": 6.5886041853958525e-06, + "loss": 1.4706, + "step": 49210 + }, + { + "epoch": 0.76, + "learning_rate": 6.580362992835013e-06, + "loss": 1.4915, + "step": 49220 + }, + { + "epoch": 0.76, + "learning_rate": 6.57212617644766e-06, + "loss": 1.4755, + "step": 49230 + }, + { + "epoch": 0.76, + "learning_rate": 6.563893738190726e-06, + "loss": 1.4755, + "step": 49240 + }, + { + "epoch": 0.76, + "learning_rate": 6.555665680020063e-06, + "loss": 1.4735, + "step": 49250 + }, + { + "epoch": 0.76, + "learning_rate": 6.547442003890542e-06, + "loss": 1.4604, + "step": 49260 + }, + { + "epoch": 0.76, + "learning_rate": 6.539222711755935e-06, + "loss": 1.4827, + "step": 49270 + }, + { + "epoch": 0.76, + "learning_rate": 6.531007805569006e-06, + "loss": 1.479, + "step": 49280 + }, + { + "epoch": 0.76, + "learning_rate": 6.522797287281468e-06, + "loss": 1.4815, + "step": 49290 + }, + { + "epoch": 0.76, + "learning_rate": 6.514591158844002e-06, + "loss": 1.4686, + "step": 49300 + }, + { + "epoch": 0.77, + "learning_rate": 6.506389422206214e-06, + "loss": 1.4664, + "step": 49310 + }, + { + "epoch": 0.77, + "learning_rate": 6.498192079316712e-06, + "loss": 1.4808, + "step": 49320 + }, + { + "epoch": 0.77, + "learning_rate": 6.48999913212302e-06, + "loss": 1.4857, + "step": 49330 + }, + { + "epoch": 0.77, + "learning_rate": 6.481810582571643e-06, + "loss": 1.4781, + "step": 49340 + }, + { + "epoch": 0.77, + "learning_rate": 6.473626432608029e-06, + "loss": 1.4531, + "step": 49350 + }, + { + "epoch": 0.77, + "learning_rate": 6.465446684176587e-06, + "loss": 1.4776, + "step": 49360 + }, + { + "epoch": 0.77, + "learning_rate": 6.457271339220678e-06, + "loss": 1.4729, + "step": 49370 + }, + { + "epoch": 0.77, + "learning_rate": 6.449100399682611e-06, + "loss": 1.4553, + "step": 49380 + }, + { + "epoch": 0.77, + "learning_rate": 6.4409338675036676e-06, + "loss": 1.5116, + "step": 49390 + }, + { + "epoch": 0.77, + "learning_rate": 6.432771744624047e-06, + "loss": 1.4892, + "step": 49400 + }, + { + "epoch": 0.77, + "learning_rate": 6.424614032982945e-06, + "loss": 1.4913, + "step": 49410 + }, + { + "epoch": 0.77, + "learning_rate": 6.41646073451847e-06, + "loss": 1.4847, + "step": 49420 + }, + { + "epoch": 0.77, + "learning_rate": 6.408311851167706e-06, + "loss": 1.5246, + "step": 49430 + }, + { + "epoch": 0.77, + "learning_rate": 6.400167384866676e-06, + "loss": 1.507, + "step": 49440 + }, + { + "epoch": 0.77, + "learning_rate": 6.392027337550363e-06, + "loss": 1.4707, + "step": 49450 + }, + { + "epoch": 0.77, + "learning_rate": 6.3838917111526956e-06, + "loss": 1.5054, + "step": 49460 + }, + { + "epoch": 0.77, + "learning_rate": 6.375760507606557e-06, + "loss": 1.4718, + "step": 49470 + }, + { + "epoch": 0.77, + "learning_rate": 6.367633728843753e-06, + "loss": 1.4743, + "step": 49480 + }, + { + "epoch": 0.77, + "learning_rate": 6.359511376795088e-06, + "loss": 1.4673, + "step": 49490 + }, + { + "epoch": 0.77, + "learning_rate": 6.351393453390267e-06, + "loss": 1.4863, + "step": 49500 + }, + { + "epoch": 0.77, + "learning_rate": 6.343279960557966e-06, + "loss": 1.4704, + "step": 49510 + }, + { + "epoch": 0.77, + "learning_rate": 6.3351709002258095e-06, + "loss": 1.452, + "step": 49520 + }, + { + "epoch": 0.77, + "learning_rate": 6.327066274320359e-06, + "loss": 1.4491, + "step": 49530 + }, + { + "epoch": 0.77, + "learning_rate": 6.3189660847671274e-06, + "loss": 1.4798, + "step": 49540 + }, + { + "epoch": 0.77, + "learning_rate": 6.3108703334905785e-06, + "loss": 1.4789, + "step": 49550 + }, + { + "epoch": 0.77, + "learning_rate": 6.302779022414121e-06, + "loss": 1.4441, + "step": 49560 + }, + { + "epoch": 0.77, + "learning_rate": 6.29469215346008e-06, + "loss": 1.4639, + "step": 49570 + }, + { + "epoch": 0.77, + "learning_rate": 6.2866097285497835e-06, + "loss": 1.4839, + "step": 49580 + }, + { + "epoch": 0.77, + "learning_rate": 6.2785317496034474e-06, + "loss": 1.4668, + "step": 49590 + }, + { + "epoch": 0.77, + "learning_rate": 6.270458218540259e-06, + "loss": 1.4655, + "step": 49600 + }, + { + "epoch": 0.77, + "learning_rate": 6.262389137278343e-06, + "loss": 1.5048, + "step": 49610 + }, + { + "epoch": 0.77, + "learning_rate": 6.254324507734777e-06, + "loss": 1.4745, + "step": 49620 + }, + { + "epoch": 0.77, + "learning_rate": 6.246264331825549e-06, + "loss": 1.4797, + "step": 49630 + }, + { + "epoch": 0.77, + "learning_rate": 6.23820861146564e-06, + "loss": 1.4662, + "step": 49640 + }, + { + "epoch": 0.77, + "learning_rate": 6.2301573485689215e-06, + "loss": 1.4983, + "step": 49650 + }, + { + "epoch": 0.77, + "learning_rate": 6.222110545048237e-06, + "loss": 1.469, + "step": 49660 + }, + { + "epoch": 0.77, + "learning_rate": 6.214068202815362e-06, + "loss": 1.4489, + "step": 49670 + }, + { + "epoch": 0.77, + "learning_rate": 6.206030323781012e-06, + "loss": 1.4613, + "step": 49680 + }, + { + "epoch": 0.77, + "learning_rate": 6.197996909854842e-06, + "loss": 1.4845, + "step": 49690 + }, + { + "epoch": 0.77, + "learning_rate": 6.189967962945442e-06, + "loss": 1.5037, + "step": 49700 + }, + { + "epoch": 0.77, + "learning_rate": 6.1819434849603505e-06, + "loss": 1.4888, + "step": 49710 + }, + { + "epoch": 0.77, + "learning_rate": 6.173923477806037e-06, + "loss": 1.4575, + "step": 49720 + }, + { + "epoch": 0.77, + "learning_rate": 6.165907943387919e-06, + "loss": 1.4659, + "step": 49730 + }, + { + "epoch": 0.77, + "learning_rate": 6.1578968836103175e-06, + "loss": 1.4899, + "step": 49740 + }, + { + "epoch": 0.77, + "learning_rate": 6.149890300376549e-06, + "loss": 1.472, + "step": 49750 + }, + { + "epoch": 0.77, + "learning_rate": 6.141888195588807e-06, + "loss": 1.4453, + "step": 49760 + }, + { + "epoch": 0.77, + "learning_rate": 6.13389057114826e-06, + "loss": 1.5032, + "step": 49770 + }, + { + "epoch": 0.77, + "learning_rate": 6.125897428954994e-06, + "loss": 1.4644, + "step": 49780 + }, + { + "epoch": 0.77, + "learning_rate": 6.11790877090804e-06, + "loss": 1.4639, + "step": 49790 + }, + { + "epoch": 0.77, + "learning_rate": 6.109924598905353e-06, + "loss": 1.4739, + "step": 49800 + }, + { + "epoch": 0.77, + "learning_rate": 6.101944914843843e-06, + "loss": 1.4716, + "step": 49810 + }, + { + "epoch": 0.77, + "learning_rate": 6.093969720619319e-06, + "loss": 1.4968, + "step": 49820 + }, + { + "epoch": 0.77, + "learning_rate": 6.085999018126554e-06, + "loss": 1.5054, + "step": 49830 + }, + { + "epoch": 0.77, + "learning_rate": 6.078032809259238e-06, + "loss": 1.4639, + "step": 49840 + }, + { + "epoch": 0.77, + "learning_rate": 6.070071095910004e-06, + "loss": 1.4769, + "step": 49850 + }, + { + "epoch": 0.77, + "learning_rate": 6.06211387997041e-06, + "loss": 1.4926, + "step": 49860 + }, + { + "epoch": 0.77, + "learning_rate": 6.054161163330946e-06, + "loss": 1.4912, + "step": 49870 + }, + { + "epoch": 0.77, + "learning_rate": 6.046212947881038e-06, + "loss": 1.5049, + "step": 49880 + }, + { + "epoch": 0.77, + "learning_rate": 6.038269235509025e-06, + "loss": 1.4705, + "step": 49890 + }, + { + "epoch": 0.77, + "learning_rate": 6.030330028102213e-06, + "loss": 1.5136, + "step": 49900 + }, + { + "epoch": 0.77, + "learning_rate": 6.022395327546787e-06, + "loss": 1.5132, + "step": 49910 + }, + { + "epoch": 0.77, + "learning_rate": 6.014465135727917e-06, + "loss": 1.4923, + "step": 49920 + }, + { + "epoch": 0.77, + "learning_rate": 6.0065394545296524e-06, + "loss": 1.4701, + "step": 49930 + }, + { + "epoch": 0.77, + "learning_rate": 5.998618285835001e-06, + "loss": 1.5028, + "step": 49940 + }, + { + "epoch": 0.77, + "learning_rate": 5.990701631525886e-06, + "loss": 1.4903, + "step": 49950 + }, + { + "epoch": 0.78, + "learning_rate": 5.9827894934831624e-06, + "loss": 1.4864, + "step": 49960 + }, + { + "epoch": 0.78, + "learning_rate": 5.974881873586616e-06, + "loss": 1.4934, + "step": 49970 + }, + { + "epoch": 0.78, + "learning_rate": 5.966978773714949e-06, + "loss": 1.4822, + "step": 49980 + }, + { + "epoch": 0.78, + "learning_rate": 5.959080195745803e-06, + "loss": 1.4776, + "step": 49990 + }, + { + "epoch": 0.78, + "learning_rate": 5.95118614155572e-06, + "loss": 1.4725, + "step": 50000 + }, + { + "epoch": 0.78, + "eval_loss": 1.5840998888015747, + "eval_runtime": 82.3816, + "eval_samples_per_second": 36.416, + "eval_steps_per_second": 4.552, + "step": 50000 + }, + { + "epoch": 0.78, + "learning_rate": 5.943296613020211e-06, + "loss": 1.4755, + "step": 50010 + }, + { + "epoch": 0.78, + "learning_rate": 5.935411612013664e-06, + "loss": 1.4956, + "step": 50020 + }, + { + "epoch": 0.78, + "learning_rate": 5.9275311404094205e-06, + "loss": 1.4811, + "step": 50030 + }, + { + "epoch": 0.78, + "learning_rate": 5.919655200079741e-06, + "loss": 1.496, + "step": 50040 + }, + { + "epoch": 0.78, + "learning_rate": 5.911783792895809e-06, + "loss": 1.4275, + "step": 50050 + }, + { + "epoch": 0.78, + "learning_rate": 5.903916920727712e-06, + "loss": 1.4923, + "step": 50060 + }, + { + "epoch": 0.78, + "learning_rate": 5.896054585444502e-06, + "loss": 1.4898, + "step": 50070 + }, + { + "epoch": 0.78, + "learning_rate": 5.888196788914107e-06, + "loss": 1.4462, + "step": 50080 + }, + { + "epoch": 0.78, + "learning_rate": 5.880343533003407e-06, + "loss": 1.4748, + "step": 50090 + }, + { + "epoch": 0.78, + "learning_rate": 5.872494819578195e-06, + "loss": 1.4669, + "step": 50100 + }, + { + "epoch": 0.78, + "learning_rate": 5.864650650503181e-06, + "loss": 1.4858, + "step": 50110 + }, + { + "epoch": 0.78, + "learning_rate": 5.856811027641998e-06, + "loss": 1.4693, + "step": 50120 + }, + { + "epoch": 0.78, + "learning_rate": 5.848975952857199e-06, + "loss": 1.491, + "step": 50130 + }, + { + "epoch": 0.78, + "learning_rate": 5.841145428010258e-06, + "loss": 1.4601, + "step": 50140 + }, + { + "epoch": 0.78, + "learning_rate": 5.833319454961566e-06, + "loss": 1.4842, + "step": 50150 + }, + { + "epoch": 0.78, + "learning_rate": 5.8254980355704396e-06, + "loss": 1.4795, + "step": 50160 + }, + { + "epoch": 0.78, + "learning_rate": 5.817681171695086e-06, + "loss": 1.4354, + "step": 50170 + }, + { + "epoch": 0.78, + "learning_rate": 5.809868865192677e-06, + "loss": 1.4989, + "step": 50180 + }, + { + "epoch": 0.78, + "learning_rate": 5.802061117919258e-06, + "loss": 1.5115, + "step": 50190 + }, + { + "epoch": 0.78, + "learning_rate": 5.794257931729816e-06, + "loss": 1.4828, + "step": 50200 + }, + { + "epoch": 0.78, + "learning_rate": 5.786459308478246e-06, + "loss": 1.4782, + "step": 50210 + }, + { + "epoch": 0.78, + "learning_rate": 5.778665250017365e-06, + "loss": 1.467, + "step": 50220 + }, + { + "epoch": 0.78, + "learning_rate": 5.770875758198885e-06, + "loss": 1.4982, + "step": 50230 + }, + { + "epoch": 0.78, + "learning_rate": 5.763090834873472e-06, + "loss": 1.482, + "step": 50240 + }, + { + "epoch": 0.78, + "learning_rate": 5.755310481890664e-06, + "loss": 1.4769, + "step": 50250 + }, + { + "epoch": 0.78, + "learning_rate": 5.7475347010989385e-06, + "loss": 1.5394, + "step": 50260 + }, + { + "epoch": 0.78, + "learning_rate": 5.739763494345682e-06, + "loss": 1.4819, + "step": 50270 + }, + { + "epoch": 0.78, + "learning_rate": 5.7319968634771935e-06, + "loss": 1.4658, + "step": 50280 + }, + { + "epoch": 0.78, + "learning_rate": 5.724234810338683e-06, + "loss": 1.4804, + "step": 50290 + }, + { + "epoch": 0.78, + "learning_rate": 5.7164773367742755e-06, + "loss": 1.473, + "step": 50300 + }, + { + "epoch": 0.78, + "learning_rate": 5.708724444627015e-06, + "loss": 1.48, + "step": 50310 + }, + { + "epoch": 0.78, + "learning_rate": 5.7009761357388224e-06, + "loss": 1.4694, + "step": 50320 + }, + { + "epoch": 0.78, + "learning_rate": 5.6932324119505886e-06, + "loss": 1.4761, + "step": 50330 + }, + { + "epoch": 0.78, + "learning_rate": 5.6854932751020635e-06, + "loss": 1.5036, + "step": 50340 + }, + { + "epoch": 0.78, + "learning_rate": 5.677758727031929e-06, + "loss": 1.5378, + "step": 50350 + }, + { + "epoch": 0.78, + "learning_rate": 5.670028769577776e-06, + "loss": 1.5155, + "step": 50360 + }, + { + "epoch": 0.78, + "learning_rate": 5.662303404576103e-06, + "loss": 1.5137, + "step": 50370 + }, + { + "epoch": 0.78, + "learning_rate": 5.6545826338623185e-06, + "loss": 1.5696, + "step": 50380 + }, + { + "epoch": 0.78, + "learning_rate": 5.646866459270744e-06, + "loss": 1.5355, + "step": 50390 + }, + { + "epoch": 0.78, + "learning_rate": 5.639154882634582e-06, + "loss": 1.5573, + "step": 50400 + }, + { + "epoch": 0.78, + "learning_rate": 5.631447905785991e-06, + "loss": 1.5321, + "step": 50410 + }, + { + "epoch": 0.78, + "learning_rate": 5.623745530555993e-06, + "loss": 1.523, + "step": 50420 + }, + { + "epoch": 0.78, + "learning_rate": 5.616047758774537e-06, + "loss": 1.4869, + "step": 50430 + }, + { + "epoch": 0.78, + "learning_rate": 5.608354592270474e-06, + "loss": 1.5374, + "step": 50440 + }, + { + "epoch": 0.78, + "learning_rate": 5.600666032871563e-06, + "loss": 1.5196, + "step": 50450 + }, + { + "epoch": 0.78, + "learning_rate": 5.592982082404469e-06, + "loss": 1.5224, + "step": 50460 + }, + { + "epoch": 0.78, + "learning_rate": 5.5853027426947564e-06, + "loss": 1.5262, + "step": 50470 + }, + { + "epoch": 0.78, + "learning_rate": 5.5776280155669076e-06, + "loss": 1.5345, + "step": 50480 + }, + { + "epoch": 0.78, + "learning_rate": 5.569957902844275e-06, + "loss": 1.4899, + "step": 50490 + }, + { + "epoch": 0.78, + "learning_rate": 5.562292406349173e-06, + "loss": 1.5037, + "step": 50500 + }, + { + "epoch": 0.78, + "learning_rate": 5.5546315279027565e-06, + "loss": 1.5225, + "step": 50510 + }, + { + "epoch": 0.78, + "learning_rate": 5.546975269325125e-06, + "loss": 1.5435, + "step": 50520 + }, + { + "epoch": 0.78, + "learning_rate": 5.539323632435265e-06, + "loss": 1.5001, + "step": 50530 + }, + { + "epoch": 0.78, + "learning_rate": 5.531676619051077e-06, + "loss": 1.4946, + "step": 50540 + }, + { + "epoch": 0.78, + "learning_rate": 5.5240342309893275e-06, + "loss": 1.4655, + "step": 50550 + }, + { + "epoch": 0.78, + "learning_rate": 5.51639647006574e-06, + "loss": 1.4741, + "step": 50560 + }, + { + "epoch": 0.78, + "learning_rate": 5.508763338094891e-06, + "loss": 1.4562, + "step": 50570 + }, + { + "epoch": 0.78, + "learning_rate": 5.5011348368902785e-06, + "loss": 1.4809, + "step": 50580 + }, + { + "epoch": 0.78, + "learning_rate": 5.493510968264298e-06, + "loss": 1.482, + "step": 50590 + }, + { + "epoch": 0.79, + "learning_rate": 5.485891734028242e-06, + "loss": 1.499, + "step": 50600 + }, + { + "epoch": 0.79, + "learning_rate": 5.4782771359923045e-06, + "loss": 1.4791, + "step": 50610 + }, + { + "epoch": 0.79, + "learning_rate": 5.470667175965577e-06, + "loss": 1.4516, + "step": 50620 + }, + { + "epoch": 0.79, + "learning_rate": 5.463061855756049e-06, + "loss": 1.4586, + "step": 50630 + }, + { + "epoch": 0.79, + "learning_rate": 5.455461177170604e-06, + "loss": 1.2635, + "step": 50640 + }, + { + "epoch": 0.79, + "learning_rate": 5.447865142015035e-06, + "loss": 1.4854, + "step": 50650 + }, + { + "epoch": 0.79, + "learning_rate": 5.440273752094005e-06, + "loss": 1.4919, + "step": 50660 + }, + { + "epoch": 0.79, + "learning_rate": 5.432687009211116e-06, + "loss": 1.4165, + "step": 50670 + }, + { + "epoch": 0.79, + "learning_rate": 5.425104915168819e-06, + "loss": 1.4546, + "step": 50680 + }, + { + "epoch": 0.79, + "learning_rate": 5.417527471768494e-06, + "loss": 1.4885, + "step": 50690 + }, + { + "epoch": 0.79, + "learning_rate": 5.4099546808104e-06, + "loss": 1.4557, + "step": 50700 + }, + { + "epoch": 0.79, + "learning_rate": 5.402386544093707e-06, + "loss": 1.4634, + "step": 50710 + }, + { + "epoch": 0.79, + "learning_rate": 5.394823063416446e-06, + "loss": 1.4505, + "step": 50720 + }, + { + "epoch": 0.79, + "learning_rate": 5.387264240575588e-06, + "loss": 1.488, + "step": 50730 + }, + { + "epoch": 0.79, + "learning_rate": 5.379710077366956e-06, + "loss": 1.4901, + "step": 50740 + }, + { + "epoch": 0.79, + "learning_rate": 5.372160575585286e-06, + "loss": 1.4724, + "step": 50750 + }, + { + "epoch": 0.79, + "learning_rate": 5.364615737024209e-06, + "loss": 1.4867, + "step": 50760 + }, + { + "epoch": 0.79, + "learning_rate": 5.357075563476238e-06, + "loss": 1.4728, + "step": 50770 + }, + { + "epoch": 0.79, + "learning_rate": 5.349540056732782e-06, + "loss": 1.4688, + "step": 50780 + }, + { + "epoch": 0.79, + "learning_rate": 5.342009218584146e-06, + "loss": 1.4778, + "step": 50790 + }, + { + "epoch": 0.79, + "learning_rate": 5.334483050819522e-06, + "loss": 1.4729, + "step": 50800 + }, + { + "epoch": 0.79, + "learning_rate": 5.326961555226976e-06, + "loss": 1.4942, + "step": 50810 + }, + { + "epoch": 0.79, + "learning_rate": 5.319444733593504e-06, + "loss": 1.4733, + "step": 50820 + }, + { + "epoch": 0.79, + "learning_rate": 5.311932587704943e-06, + "loss": 1.481, + "step": 50830 + }, + { + "epoch": 0.79, + "learning_rate": 5.304425119346068e-06, + "loss": 1.4531, + "step": 50840 + }, + { + "epoch": 0.79, + "learning_rate": 5.2969223303005e-06, + "loss": 1.4811, + "step": 50850 + }, + { + "epoch": 0.79, + "learning_rate": 5.28942422235077e-06, + "loss": 1.4929, + "step": 50860 + }, + { + "epoch": 0.79, + "learning_rate": 5.281930797278295e-06, + "loss": 1.4633, + "step": 50870 + }, + { + "epoch": 0.79, + "learning_rate": 5.274442056863385e-06, + "loss": 1.478, + "step": 50880 + }, + { + "epoch": 0.79, + "learning_rate": 5.266958002885208e-06, + "loss": 1.493, + "step": 50890 + }, + { + "epoch": 0.79, + "learning_rate": 5.259478637121867e-06, + "loss": 1.4426, + "step": 50900 + }, + { + "epoch": 0.79, + "learning_rate": 5.2520039613503076e-06, + "loss": 1.5749, + "step": 50910 + }, + { + "epoch": 0.79, + "learning_rate": 5.24453397734638e-06, + "loss": 1.4814, + "step": 50920 + }, + { + "epoch": 0.79, + "learning_rate": 5.237068686884819e-06, + "loss": 1.486, + "step": 50930 + }, + { + "epoch": 0.79, + "learning_rate": 5.229608091739247e-06, + "loss": 1.4897, + "step": 50940 + }, + { + "epoch": 0.79, + "learning_rate": 5.222152193682159e-06, + "loss": 1.4756, + "step": 50950 + }, + { + "epoch": 0.79, + "learning_rate": 5.21470099448495e-06, + "loss": 1.4609, + "step": 50960 + }, + { + "epoch": 0.79, + "learning_rate": 5.2072544959178905e-06, + "loss": 1.5108, + "step": 50970 + }, + { + "epoch": 0.79, + "learning_rate": 5.19981269975012e-06, + "loss": 1.4782, + "step": 50980 + }, + { + "epoch": 0.79, + "learning_rate": 5.192375607749697e-06, + "loss": 1.4608, + "step": 50990 + }, + { + "epoch": 0.79, + "learning_rate": 5.184943221683524e-06, + "loss": 1.4532, + "step": 51000 + }, + { + "epoch": 0.79, + "learning_rate": 5.177515543317407e-06, + "loss": 1.4633, + "step": 51010 + }, + { + "epoch": 0.79, + "learning_rate": 5.170092574416027e-06, + "loss": 1.4678, + "step": 51020 + }, + { + "epoch": 0.79, + "learning_rate": 5.162674316742955e-06, + "loss": 1.4905, + "step": 51030 + }, + { + "epoch": 0.79, + "learning_rate": 5.155260772060619e-06, + "loss": 1.4889, + "step": 51040 + }, + { + "epoch": 0.79, + "learning_rate": 5.147851942130363e-06, + "loss": 1.5124, + "step": 51050 + }, + { + "epoch": 0.79, + "learning_rate": 5.140447828712372e-06, + "loss": 1.5236, + "step": 51060 + }, + { + "epoch": 0.79, + "learning_rate": 5.133048433565748e-06, + "loss": 1.5395, + "step": 51070 + }, + { + "epoch": 0.79, + "learning_rate": 5.125653758448443e-06, + "loss": 1.5118, + "step": 51080 + }, + { + "epoch": 0.79, + "learning_rate": 5.118263805117302e-06, + "loss": 1.5657, + "step": 51090 + }, + { + "epoch": 0.79, + "learning_rate": 5.11087857532804e-06, + "loss": 1.513, + "step": 51100 + }, + { + "epoch": 0.79, + "learning_rate": 5.103498070835258e-06, + "loss": 1.51, + "step": 51110 + }, + { + "epoch": 0.79, + "learning_rate": 5.0961222933924326e-06, + "loss": 1.5447, + "step": 51120 + }, + { + "epoch": 0.79, + "learning_rate": 5.088751244751913e-06, + "loss": 1.4895, + "step": 51130 + }, + { + "epoch": 0.79, + "learning_rate": 5.0813849266649335e-06, + "loss": 1.5375, + "step": 51140 + }, + { + "epoch": 0.79, + "learning_rate": 5.074023340881581e-06, + "loss": 1.4948, + "step": 51150 + }, + { + "epoch": 0.79, + "learning_rate": 5.066666489150859e-06, + "loss": 1.4658, + "step": 51160 + }, + { + "epoch": 0.79, + "learning_rate": 5.059314373220606e-06, + "loss": 1.4589, + "step": 51170 + }, + { + "epoch": 0.79, + "learning_rate": 5.051966994837557e-06, + "loss": 1.4557, + "step": 51180 + }, + { + "epoch": 0.79, + "learning_rate": 5.0446243557473155e-06, + "loss": 1.4644, + "step": 51190 + }, + { + "epoch": 0.79, + "learning_rate": 5.037286457694368e-06, + "loss": 1.4702, + "step": 51200 + }, + { + "epoch": 0.79, + "learning_rate": 5.029953302422047e-06, + "loss": 1.4857, + "step": 51210 + }, + { + "epoch": 0.79, + "learning_rate": 5.022624891672606e-06, + "loss": 1.4653, + "step": 51220 + }, + { + "epoch": 0.79, + "learning_rate": 5.015301227187119e-06, + "loss": 1.4971, + "step": 51230 + }, + { + "epoch": 0.79, + "learning_rate": 5.007982310705567e-06, + "loss": 1.4748, + "step": 51240 + }, + { + "epoch": 0.8, + "learning_rate": 5.000668143966791e-06, + "loss": 1.4976, + "step": 51250 + }, + { + "epoch": 0.8, + "learning_rate": 4.9933587287085055e-06, + "loss": 1.4955, + "step": 51260 + }, + { + "epoch": 0.8, + "learning_rate": 4.986054066667295e-06, + "loss": 1.5041, + "step": 51270 + }, + { + "epoch": 0.8, + "learning_rate": 4.978754159578617e-06, + "loss": 1.4695, + "step": 51280 + }, + { + "epoch": 0.8, + "learning_rate": 4.971459009176796e-06, + "loss": 1.496, + "step": 51290 + }, + { + "epoch": 0.8, + "learning_rate": 4.964168617195028e-06, + "loss": 1.5291, + "step": 51300 + }, + { + "epoch": 0.8, + "learning_rate": 4.956882985365388e-06, + "loss": 1.5725, + "step": 51310 + }, + { + "epoch": 0.8, + "learning_rate": 4.9496021154187885e-06, + "loss": 1.5869, + "step": 51320 + }, + { + "epoch": 0.8, + "learning_rate": 4.942326009085057e-06, + "loss": 1.5913, + "step": 51330 + }, + { + "epoch": 0.8, + "learning_rate": 4.935054668092853e-06, + "loss": 1.5562, + "step": 51340 + }, + { + "epoch": 0.8, + "learning_rate": 4.927788094169714e-06, + "loss": 1.5068, + "step": 51350 + }, + { + "epoch": 0.8, + "learning_rate": 4.9205262890420515e-06, + "loss": 1.476, + "step": 51360 + }, + { + "epoch": 0.8, + "learning_rate": 4.913269254435146e-06, + "loss": 1.4974, + "step": 51370 + }, + { + "epoch": 0.8, + "learning_rate": 4.906016992073115e-06, + "loss": 1.5013, + "step": 51380 + }, + { + "epoch": 0.8, + "learning_rate": 4.898769503678996e-06, + "loss": 1.4621, + "step": 51390 + }, + { + "epoch": 0.8, + "learning_rate": 4.891526790974637e-06, + "loss": 1.4486, + "step": 51400 + }, + { + "epoch": 0.8, + "learning_rate": 4.884288855680788e-06, + "loss": 1.4742, + "step": 51410 + }, + { + "epoch": 0.8, + "learning_rate": 4.8770556995170465e-06, + "loss": 1.4658, + "step": 51420 + }, + { + "epoch": 0.8, + "learning_rate": 4.8698273242018835e-06, + "loss": 1.4826, + "step": 51430 + }, + { + "epoch": 0.8, + "learning_rate": 4.862603731452631e-06, + "loss": 1.4749, + "step": 51440 + }, + { + "epoch": 0.8, + "learning_rate": 4.855384922985482e-06, + "loss": 1.4647, + "step": 51450 + }, + { + "epoch": 0.8, + "learning_rate": 4.8481709005155e-06, + "loss": 1.5001, + "step": 51460 + }, + { + "epoch": 0.8, + "learning_rate": 4.840961665756591e-06, + "loss": 1.4815, + "step": 51470 + }, + { + "epoch": 0.8, + "learning_rate": 4.8337572204215645e-06, + "loss": 1.4699, + "step": 51480 + }, + { + "epoch": 0.8, + "learning_rate": 4.826557566222048e-06, + "loss": 1.4558, + "step": 51490 + }, + { + "epoch": 0.8, + "learning_rate": 4.819362704868552e-06, + "loss": 1.4667, + "step": 51500 + }, + { + "epoch": 0.8, + "learning_rate": 4.812172638070447e-06, + "loss": 1.4347, + "step": 51510 + }, + { + "epoch": 0.8, + "learning_rate": 4.804987367535965e-06, + "loss": 1.4585, + "step": 51520 + }, + { + "epoch": 0.8, + "learning_rate": 4.797806894972198e-06, + "loss": 1.4926, + "step": 51530 + }, + { + "epoch": 0.8, + "learning_rate": 4.790631222085101e-06, + "loss": 1.4799, + "step": 51540 + }, + { + "epoch": 0.8, + "learning_rate": 4.783460350579463e-06, + "loss": 1.4829, + "step": 51550 + }, + { + "epoch": 0.8, + "learning_rate": 4.776294282158983e-06, + "loss": 1.4903, + "step": 51560 + }, + { + "epoch": 0.8, + "learning_rate": 4.76913301852617e-06, + "loss": 1.4846, + "step": 51570 + }, + { + "epoch": 0.8, + "learning_rate": 4.761976561382417e-06, + "loss": 1.4837, + "step": 51580 + }, + { + "epoch": 0.8, + "learning_rate": 4.754824912427966e-06, + "loss": 1.4699, + "step": 51590 + }, + { + "epoch": 0.8, + "learning_rate": 4.747678073361924e-06, + "loss": 1.4608, + "step": 51600 + }, + { + "epoch": 0.8, + "learning_rate": 4.740536045882249e-06, + "loss": 1.4909, + "step": 51610 + }, + { + "epoch": 0.8, + "learning_rate": 4.7333988316857595e-06, + "loss": 1.4851, + "step": 51620 + }, + { + "epoch": 0.8, + "learning_rate": 4.726266432468135e-06, + "loss": 1.4976, + "step": 51630 + }, + { + "epoch": 0.8, + "learning_rate": 4.719138849923882e-06, + "loss": 1.4899, + "step": 51640 + }, + { + "epoch": 0.8, + "learning_rate": 4.712016085746418e-06, + "loss": 1.5098, + "step": 51650 + }, + { + "epoch": 0.8, + "learning_rate": 4.704898141627956e-06, + "loss": 1.4991, + "step": 51660 + }, + { + "epoch": 0.8, + "learning_rate": 4.697785019259604e-06, + "loss": 1.4902, + "step": 51670 + }, + { + "epoch": 0.8, + "learning_rate": 4.690676720331308e-06, + "loss": 1.5154, + "step": 51680 + }, + { + "epoch": 0.8, + "learning_rate": 4.683573246531881e-06, + "loss": 1.5273, + "step": 51690 + }, + { + "epoch": 0.8, + "learning_rate": 4.676474599548958e-06, + "loss": 1.5253, + "step": 51700 + }, + { + "epoch": 0.8, + "learning_rate": 4.6693807810690774e-06, + "loss": 1.5098, + "step": 51710 + }, + { + "epoch": 0.8, + "learning_rate": 4.662291792777579e-06, + "loss": 1.4836, + "step": 51720 + }, + { + "epoch": 0.8, + "learning_rate": 4.6552076363586914e-06, + "loss": 1.4605, + "step": 51730 + }, + { + "epoch": 0.8, + "learning_rate": 4.648128313495478e-06, + "loss": 1.4865, + "step": 51740 + }, + { + "epoch": 0.8, + "learning_rate": 4.64105382586986e-06, + "loss": 1.4925, + "step": 51750 + }, + { + "epoch": 0.8, + "learning_rate": 4.6339841751626065e-06, + "loss": 1.5029, + "step": 51760 + }, + { + "epoch": 0.8, + "learning_rate": 4.626919363053339e-06, + "loss": 1.4843, + "step": 51770 + }, + { + "epoch": 0.8, + "learning_rate": 4.619859391220532e-06, + "loss": 1.4805, + "step": 51780 + }, + { + "epoch": 0.8, + "learning_rate": 4.612804261341505e-06, + "loss": 1.4954, + "step": 51790 + }, + { + "epoch": 0.8, + "learning_rate": 4.605753975092436e-06, + "loss": 1.4977, + "step": 51800 + }, + { + "epoch": 0.8, + "learning_rate": 4.59870853414833e-06, + "loss": 1.462, + "step": 51810 + }, + { + "epoch": 0.8, + "learning_rate": 4.591667940183078e-06, + "loss": 1.4538, + "step": 51820 + }, + { + "epoch": 0.8, + "learning_rate": 4.584632194869382e-06, + "loss": 1.4682, + "step": 51830 + }, + { + "epoch": 0.8, + "learning_rate": 4.577601299878812e-06, + "loss": 1.5604, + "step": 51840 + }, + { + "epoch": 0.8, + "learning_rate": 4.5705752568817816e-06, + "loss": 1.4856, + "step": 51850 + }, + { + "epoch": 0.8, + "learning_rate": 4.5635540675475595e-06, + "loss": 1.4553, + "step": 51860 + }, + { + "epoch": 0.8, + "learning_rate": 4.556537733544236e-06, + "loss": 1.4897, + "step": 51870 + }, + { + "epoch": 0.8, + "learning_rate": 4.5495262565387915e-06, + "loss": 1.4756, + "step": 51880 + }, + { + "epoch": 0.81, + "learning_rate": 4.542519638197002e-06, + "loss": 1.4618, + "step": 51890 + }, + { + "epoch": 0.81, + "learning_rate": 4.535517880183526e-06, + "loss": 1.4847, + "step": 51900 + }, + { + "epoch": 0.81, + "learning_rate": 4.528520984161852e-06, + "loss": 1.4869, + "step": 51910 + }, + { + "epoch": 0.81, + "learning_rate": 4.521528951794316e-06, + "loss": 1.4897, + "step": 51920 + }, + { + "epoch": 0.81, + "learning_rate": 4.514541784742099e-06, + "loss": 1.4824, + "step": 51930 + }, + { + "epoch": 0.81, + "learning_rate": 4.507559484665227e-06, + "loss": 1.4629, + "step": 51940 + }, + { + "epoch": 0.81, + "learning_rate": 4.500582053222574e-06, + "loss": 1.4682, + "step": 51950 + }, + { + "epoch": 0.81, + "learning_rate": 4.4936094920718315e-06, + "loss": 1.5411, + "step": 51960 + }, + { + "epoch": 0.81, + "learning_rate": 4.4866418028695845e-06, + "loss": 1.4954, + "step": 51970 + }, + { + "epoch": 0.81, + "learning_rate": 4.479678987271199e-06, + "loss": 1.4566, + "step": 51980 + }, + { + "epoch": 0.81, + "learning_rate": 4.472721046930942e-06, + "loss": 1.4761, + "step": 51990 + }, + { + "epoch": 0.81, + "learning_rate": 4.465767983501878e-06, + "loss": 1.4734, + "step": 52000 + }, + { + "epoch": 0.81, + "learning_rate": 4.4588197986359324e-06, + "loss": 1.4791, + "step": 52010 + }, + { + "epoch": 0.81, + "learning_rate": 4.45187649398387e-06, + "loss": 1.4852, + "step": 52020 + }, + { + "epoch": 0.81, + "learning_rate": 4.444938071195293e-06, + "loss": 1.4687, + "step": 52030 + }, + { + "epoch": 0.81, + "learning_rate": 4.438004531918649e-06, + "loss": 1.4596, + "step": 52040 + }, + { + "epoch": 0.81, + "learning_rate": 4.431075877801225e-06, + "loss": 1.4706, + "step": 52050 + }, + { + "epoch": 0.81, + "learning_rate": 4.424152110489135e-06, + "loss": 1.4752, + "step": 52060 + }, + { + "epoch": 0.81, + "learning_rate": 4.417233231627344e-06, + "loss": 1.4563, + "step": 52070 + }, + { + "epoch": 0.81, + "learning_rate": 4.410319242859654e-06, + "loss": 1.4526, + "step": 52080 + }, + { + "epoch": 0.81, + "learning_rate": 4.403410145828704e-06, + "loss": 1.4754, + "step": 52090 + }, + { + "epoch": 0.81, + "learning_rate": 4.396505942175971e-06, + "loss": 1.4715, + "step": 52100 + }, + { + "epoch": 0.81, + "learning_rate": 4.389606633541771e-06, + "loss": 1.4799, + "step": 52110 + }, + { + "epoch": 0.81, + "learning_rate": 4.382712221565258e-06, + "loss": 1.4921, + "step": 52120 + }, + { + "epoch": 0.81, + "learning_rate": 4.375822707884402e-06, + "loss": 1.5106, + "step": 52130 + }, + { + "epoch": 0.81, + "learning_rate": 4.368938094136055e-06, + "loss": 1.4828, + "step": 52140 + }, + { + "epoch": 0.81, + "learning_rate": 4.362058381955855e-06, + "loss": 1.4662, + "step": 52150 + }, + { + "epoch": 0.81, + "learning_rate": 4.355183572978308e-06, + "loss": 1.479, + "step": 52160 + }, + { + "epoch": 0.81, + "learning_rate": 4.34831366883674e-06, + "loss": 1.471, + "step": 52170 + }, + { + "epoch": 0.81, + "learning_rate": 4.341448671163317e-06, + "loss": 1.5148, + "step": 52180 + }, + { + "epoch": 0.81, + "learning_rate": 4.334588581589041e-06, + "loss": 1.4908, + "step": 52190 + }, + { + "epoch": 0.81, + "learning_rate": 4.3277334017437445e-06, + "loss": 1.4553, + "step": 52200 + }, + { + "epoch": 0.81, + "learning_rate": 4.320883133256093e-06, + "loss": 1.4699, + "step": 52210 + }, + { + "epoch": 0.81, + "learning_rate": 4.314037777753591e-06, + "loss": 1.4779, + "step": 52220 + }, + { + "epoch": 0.81, + "learning_rate": 4.30719733686257e-06, + "loss": 1.4804, + "step": 52230 + }, + { + "epoch": 0.81, + "learning_rate": 4.300361812208187e-06, + "loss": 1.4856, + "step": 52240 + }, + { + "epoch": 0.81, + "learning_rate": 4.293531205414456e-06, + "loss": 1.4632, + "step": 52250 + }, + { + "epoch": 0.81, + "learning_rate": 4.286705518104189e-06, + "loss": 1.4798, + "step": 52260 + }, + { + "epoch": 0.81, + "learning_rate": 4.279884751899055e-06, + "loss": 1.4386, + "step": 52270 + }, + { + "epoch": 0.81, + "learning_rate": 4.273068908419542e-06, + "loss": 1.463, + "step": 52280 + }, + { + "epoch": 0.81, + "learning_rate": 4.26625798928498e-06, + "loss": 1.4722, + "step": 52290 + }, + { + "epoch": 0.81, + "learning_rate": 4.2594519961135e-06, + "loss": 1.5278, + "step": 52300 + }, + { + "epoch": 0.81, + "learning_rate": 4.252650930522109e-06, + "loss": 1.4915, + "step": 52310 + }, + { + "epoch": 0.81, + "learning_rate": 4.2458547941266e-06, + "loss": 1.5032, + "step": 52320 + }, + { + "epoch": 0.81, + "learning_rate": 4.239063588541617e-06, + "loss": 1.5165, + "step": 52330 + }, + { + "epoch": 0.81, + "learning_rate": 4.232277315380631e-06, + "loss": 1.4951, + "step": 52340 + }, + { + "epoch": 0.81, + "learning_rate": 4.225495976255933e-06, + "loss": 1.5162, + "step": 52350 + }, + { + "epoch": 0.81, + "learning_rate": 4.218719572778648e-06, + "loss": 1.5337, + "step": 52360 + }, + { + "epoch": 0.81, + "learning_rate": 4.21194810655873e-06, + "loss": 1.5214, + "step": 52370 + }, + { + "epoch": 0.81, + "learning_rate": 4.205181579204964e-06, + "loss": 1.4992, + "step": 52380 + }, + { + "epoch": 0.81, + "learning_rate": 4.198419992324931e-06, + "loss": 1.4882, + "step": 52390 + }, + { + "epoch": 0.81, + "learning_rate": 4.191663347525094e-06, + "loss": 1.4752, + "step": 52400 + }, + { + "epoch": 0.81, + "learning_rate": 4.184911646410686e-06, + "loss": 1.4666, + "step": 52410 + }, + { + "epoch": 0.81, + "learning_rate": 4.178164890585795e-06, + "loss": 1.4433, + "step": 52420 + }, + { + "epoch": 0.81, + "learning_rate": 4.171423081653333e-06, + "loss": 1.4737, + "step": 52430 + }, + { + "epoch": 0.81, + "learning_rate": 4.164686221215028e-06, + "loss": 1.4779, + "step": 52440 + }, + { + "epoch": 0.81, + "learning_rate": 4.15795431087144e-06, + "loss": 1.4896, + "step": 52450 + }, + { + "epoch": 0.81, + "learning_rate": 4.151227352221953e-06, + "loss": 1.4879, + "step": 52460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1445053468647525e-06, + "loss": 1.4986, + "step": 52470 + }, + { + "epoch": 0.81, + "learning_rate": 4.137788296396894e-06, + "loss": 1.4891, + "step": 52480 + }, + { + "epoch": 0.81, + "learning_rate": 4.1310762024142025e-06, + "loss": 1.4652, + "step": 52490 + }, + { + "epoch": 0.81, + "learning_rate": 4.124369066511363e-06, + "loss": 1.4622, + "step": 52500 + }, + { + "epoch": 0.81, + "learning_rate": 4.117666890281868e-06, + "loss": 1.471, + "step": 52510 + }, + { + "epoch": 0.81, + "learning_rate": 4.110969675318032e-06, + "loss": 1.4636, + "step": 52520 + }, + { + "epoch": 0.82, + "learning_rate": 4.104277423210995e-06, + "loss": 1.4621, + "step": 52530 + }, + { + "epoch": 0.82, + "learning_rate": 4.0975901355507136e-06, + "loss": 1.4678, + "step": 52540 + }, + { + "epoch": 0.82, + "learning_rate": 4.090907813925971e-06, + "loss": 1.4633, + "step": 52550 + }, + { + "epoch": 0.82, + "learning_rate": 4.084230459924351e-06, + "loss": 1.467, + "step": 52560 + }, + { + "epoch": 0.82, + "learning_rate": 4.077558075132298e-06, + "loss": 1.4819, + "step": 52570 + }, + { + "epoch": 0.82, + "learning_rate": 4.070890661135029e-06, + "loss": 1.4894, + "step": 52580 + }, + { + "epoch": 0.82, + "learning_rate": 4.064228219516608e-06, + "loss": 1.4698, + "step": 52590 + }, + { + "epoch": 0.82, + "learning_rate": 4.057570751859912e-06, + "loss": 1.473, + "step": 52600 + }, + { + "epoch": 0.82, + "learning_rate": 4.050918259746642e-06, + "loss": 1.4624, + "step": 52610 + }, + { + "epoch": 0.82, + "learning_rate": 4.044270744757289e-06, + "loss": 1.4665, + "step": 52620 + }, + { + "epoch": 0.82, + "learning_rate": 4.037628208471209e-06, + "loss": 1.4703, + "step": 52630 + }, + { + "epoch": 0.82, + "learning_rate": 4.030990652466529e-06, + "loss": 1.4539, + "step": 52640 + }, + { + "epoch": 0.82, + "learning_rate": 4.02435807832022e-06, + "loss": 1.4844, + "step": 52650 + }, + { + "epoch": 0.82, + "learning_rate": 4.017730487608063e-06, + "loss": 1.4782, + "step": 52660 + }, + { + "epoch": 0.82, + "learning_rate": 4.0111078819046495e-06, + "loss": 1.4803, + "step": 52670 + }, + { + "epoch": 0.82, + "learning_rate": 4.004490262783397e-06, + "loss": 1.4925, + "step": 52680 + }, + { + "epoch": 0.82, + "learning_rate": 3.99787763181653e-06, + "loss": 1.4868, + "step": 52690 + }, + { + "epoch": 0.82, + "learning_rate": 3.991269990575089e-06, + "loss": 1.4717, + "step": 52700 + }, + { + "epoch": 0.82, + "learning_rate": 3.984667340628934e-06, + "loss": 1.4561, + "step": 52710 + }, + { + "epoch": 0.82, + "learning_rate": 3.978069683546737e-06, + "loss": 1.4471, + "step": 52720 + }, + { + "epoch": 0.82, + "learning_rate": 3.971477020895967e-06, + "loss": 1.4712, + "step": 52730 + }, + { + "epoch": 0.82, + "learning_rate": 3.964889354242943e-06, + "loss": 1.4862, + "step": 52740 + }, + { + "epoch": 0.82, + "learning_rate": 3.958306685152763e-06, + "loss": 1.4806, + "step": 52750 + }, + { + "epoch": 0.82, + "learning_rate": 3.951729015189351e-06, + "loss": 1.4773, + "step": 52760 + }, + { + "epoch": 0.82, + "learning_rate": 3.945156345915446e-06, + "loss": 1.4777, + "step": 52770 + }, + { + "epoch": 0.82, + "learning_rate": 3.938588678892602e-06, + "loss": 1.4824, + "step": 52780 + }, + { + "epoch": 0.82, + "learning_rate": 3.932026015681156e-06, + "loss": 1.4854, + "step": 52790 + }, + { + "epoch": 0.82, + "learning_rate": 3.925468357840306e-06, + "loss": 1.4886, + "step": 52800 + }, + { + "epoch": 0.82, + "learning_rate": 3.918915706928017e-06, + "loss": 1.474, + "step": 52810 + }, + { + "epoch": 0.82, + "learning_rate": 3.912368064501082e-06, + "loss": 1.4633, + "step": 52820 + }, + { + "epoch": 0.82, + "learning_rate": 3.905825432115104e-06, + "loss": 1.4565, + "step": 52830 + }, + { + "epoch": 0.82, + "learning_rate": 3.899287811324498e-06, + "loss": 1.4744, + "step": 52840 + }, + { + "epoch": 0.82, + "learning_rate": 3.89275520368248e-06, + "loss": 1.4453, + "step": 52850 + }, + { + "epoch": 0.82, + "learning_rate": 3.88622761074108e-06, + "loss": 1.4691, + "step": 52860 + }, + { + "epoch": 0.82, + "learning_rate": 3.879705034051148e-06, + "loss": 1.4915, + "step": 52870 + }, + { + "epoch": 0.82, + "learning_rate": 3.873187475162307e-06, + "loss": 1.4806, + "step": 52880 + }, + { + "epoch": 0.82, + "learning_rate": 3.8666749356230385e-06, + "loss": 1.4768, + "step": 52890 + }, + { + "epoch": 0.82, + "learning_rate": 3.860167416980578e-06, + "loss": 1.4903, + "step": 52900 + }, + { + "epoch": 0.82, + "learning_rate": 3.853664920781019e-06, + "loss": 1.49, + "step": 52910 + }, + { + "epoch": 0.82, + "learning_rate": 3.8471674485692195e-06, + "loss": 1.4669, + "step": 52920 + }, + { + "epoch": 0.82, + "learning_rate": 3.8406750018888685e-06, + "loss": 1.4733, + "step": 52930 + }, + { + "epoch": 0.82, + "learning_rate": 3.8341875822824555e-06, + "loss": 1.4584, + "step": 52940 + }, + { + "epoch": 0.82, + "learning_rate": 3.827705191291278e-06, + "loss": 1.4969, + "step": 52950 + }, + { + "epoch": 0.82, + "learning_rate": 3.821227830455418e-06, + "loss": 1.4829, + "step": 52960 + }, + { + "epoch": 0.82, + "learning_rate": 3.814755501313802e-06, + "loss": 1.5063, + "step": 52970 + }, + { + "epoch": 0.82, + "learning_rate": 3.808288205404123e-06, + "loss": 1.5382, + "step": 52980 + }, + { + "epoch": 0.82, + "learning_rate": 3.801825944262899e-06, + "loss": 1.5021, + "step": 52990 + }, + { + "epoch": 0.82, + "learning_rate": 3.7953687194254463e-06, + "loss": 1.51, + "step": 53000 + }, + { + "epoch": 0.82, + "learning_rate": 3.7889165324258844e-06, + "loss": 1.5057, + "step": 53010 + }, + { + "epoch": 0.82, + "learning_rate": 3.782469384797141e-06, + "loss": 1.4953, + "step": 53020 + }, + { + "epoch": 0.82, + "learning_rate": 3.7760272780709343e-06, + "loss": 1.5023, + "step": 53030 + }, + { + "epoch": 0.82, + "learning_rate": 3.7695902137778073e-06, + "loss": 1.4957, + "step": 53040 + }, + { + "epoch": 0.82, + "learning_rate": 3.763158193447064e-06, + "loss": 1.4929, + "step": 53050 + }, + { + "epoch": 0.82, + "learning_rate": 3.7567312186068656e-06, + "loss": 1.4784, + "step": 53060 + }, + { + "epoch": 0.82, + "learning_rate": 3.7503092907841263e-06, + "loss": 1.4954, + "step": 53070 + }, + { + "epoch": 0.82, + "learning_rate": 3.7438924115045876e-06, + "loss": 1.4652, + "step": 53080 + }, + { + "epoch": 0.82, + "learning_rate": 3.7374805822927823e-06, + "loss": 1.4784, + "step": 53090 + }, + { + "epoch": 0.82, + "learning_rate": 3.73107380467205e-06, + "loss": 1.4951, + "step": 53100 + }, + { + "epoch": 0.82, + "learning_rate": 3.7246720801645125e-06, + "loss": 1.481, + "step": 53110 + }, + { + "epoch": 0.82, + "learning_rate": 3.7182754102911223e-06, + "loss": 1.4982, + "step": 53120 + }, + { + "epoch": 0.82, + "learning_rate": 3.711883796571594e-06, + "loss": 1.5036, + "step": 53130 + }, + { + "epoch": 0.82, + "learning_rate": 3.7054972405244803e-06, + "loss": 1.4755, + "step": 53140 + }, + { + "epoch": 0.82, + "learning_rate": 3.699115743667092e-06, + "loss": 1.4792, + "step": 53150 + }, + { + "epoch": 0.82, + "learning_rate": 3.6927393075155693e-06, + "loss": 1.4755, + "step": 53160 + }, + { + "epoch": 0.82, + "learning_rate": 3.6863679335848335e-06, + "loss": 1.443, + "step": 53170 + }, + { + "epoch": 0.83, + "learning_rate": 3.6800016233886123e-06, + "loss": 1.4836, + "step": 53180 + }, + { + "epoch": 0.83, + "learning_rate": 3.673640378439422e-06, + "loss": 1.5109, + "step": 53190 + }, + { + "epoch": 0.83, + "learning_rate": 3.6672842002485803e-06, + "loss": 1.492, + "step": 53200 + }, + { + "epoch": 0.83, + "learning_rate": 3.66093309032621e-06, + "loss": 1.5158, + "step": 53210 + }, + { + "epoch": 0.83, + "learning_rate": 3.654587050181199e-06, + "loss": 1.5022, + "step": 53220 + }, + { + "epoch": 0.83, + "learning_rate": 3.648246081321277e-06, + "loss": 1.4729, + "step": 53230 + }, + { + "epoch": 0.83, + "learning_rate": 3.641910185252925e-06, + "loss": 1.4934, + "step": 53240 + }, + { + "epoch": 0.83, + "learning_rate": 3.6355793634814478e-06, + "loss": 1.4687, + "step": 53250 + }, + { + "epoch": 0.83, + "learning_rate": 3.6292536175109276e-06, + "loss": 1.5378, + "step": 53260 + }, + { + "epoch": 0.83, + "learning_rate": 3.622932948844257e-06, + "loss": 1.537, + "step": 53270 + }, + { + "epoch": 0.83, + "learning_rate": 3.616617358983096e-06, + "loss": 1.5674, + "step": 53280 + }, + { + "epoch": 0.83, + "learning_rate": 3.6103068494279356e-06, + "loss": 1.5454, + "step": 53290 + }, + { + "epoch": 0.83, + "learning_rate": 3.604001421678024e-06, + "loss": 1.5621, + "step": 53300 + }, + { + "epoch": 0.83, + "learning_rate": 3.5977010772314245e-06, + "loss": 1.5634, + "step": 53310 + }, + { + "epoch": 0.83, + "learning_rate": 3.5914058175849807e-06, + "loss": 1.5594, + "step": 53320 + }, + { + "epoch": 0.83, + "learning_rate": 3.5851156442343353e-06, + "loss": 1.5741, + "step": 53330 + }, + { + "epoch": 0.83, + "learning_rate": 3.578830558673918e-06, + "loss": 1.5657, + "step": 53340 + }, + { + "epoch": 0.83, + "learning_rate": 3.5725505623969564e-06, + "loss": 1.5853, + "step": 53350 + }, + { + "epoch": 0.83, + "learning_rate": 3.5662756568954598e-06, + "loss": 1.5727, + "step": 53360 + }, + { + "epoch": 0.83, + "learning_rate": 3.560005843660233e-06, + "loss": 1.5969, + "step": 53370 + }, + { + "epoch": 0.83, + "learning_rate": 3.553741124180879e-06, + "loss": 1.5711, + "step": 53380 + }, + { + "epoch": 0.83, + "learning_rate": 3.547481499945762e-06, + "loss": 1.5814, + "step": 53390 + }, + { + "epoch": 0.83, + "learning_rate": 3.5412269724420827e-06, + "loss": 1.5811, + "step": 53400 + }, + { + "epoch": 0.83, + "learning_rate": 3.534977543155782e-06, + "loss": 1.5698, + "step": 53410 + }, + { + "epoch": 0.83, + "learning_rate": 3.5287332135716196e-06, + "loss": 1.5744, + "step": 53420 + }, + { + "epoch": 0.83, + "learning_rate": 3.5224939851731363e-06, + "loss": 1.5695, + "step": 53430 + }, + { + "epoch": 0.83, + "learning_rate": 3.516259859442664e-06, + "loss": 1.5621, + "step": 53440 + }, + { + "epoch": 0.83, + "learning_rate": 3.5100308378613027e-06, + "loss": 1.5563, + "step": 53450 + }, + { + "epoch": 0.83, + "learning_rate": 3.503806921908981e-06, + "loss": 1.5139, + "step": 53460 + }, + { + "epoch": 0.83, + "learning_rate": 3.497588113064365e-06, + "loss": 1.5141, + "step": 53470 + }, + { + "epoch": 0.83, + "learning_rate": 3.491374412804943e-06, + "loss": 1.4896, + "step": 53480 + }, + { + "epoch": 0.83, + "learning_rate": 3.4851658226069753e-06, + "loss": 1.4853, + "step": 53490 + }, + { + "epoch": 0.83, + "learning_rate": 3.478962343945513e-06, + "loss": 1.5165, + "step": 53500 + }, + { + "epoch": 0.83, + "learning_rate": 3.472763978294391e-06, + "loss": 1.5623, + "step": 53510 + }, + { + "epoch": 0.83, + "learning_rate": 3.466570727126228e-06, + "loss": 1.5295, + "step": 53520 + }, + { + "epoch": 0.83, + "learning_rate": 3.460382591912434e-06, + "loss": 1.4999, + "step": 53530 + }, + { + "epoch": 0.83, + "learning_rate": 3.454199574123179e-06, + "loss": 1.5121, + "step": 53540 + }, + { + "epoch": 0.83, + "learning_rate": 3.448021675227464e-06, + "loss": 1.5069, + "step": 53550 + }, + { + "epoch": 0.83, + "learning_rate": 3.44184889669302e-06, + "loss": 1.5101, + "step": 53560 + }, + { + "epoch": 0.83, + "learning_rate": 3.435681239986416e-06, + "loss": 1.5458, + "step": 53570 + }, + { + "epoch": 0.83, + "learning_rate": 3.429518706572951e-06, + "loss": 1.5296, + "step": 53580 + }, + { + "epoch": 0.83, + "learning_rate": 3.423361297916744e-06, + "loss": 1.5202, + "step": 53590 + }, + { + "epoch": 0.83, + "learning_rate": 3.4172090154806796e-06, + "loss": 1.4876, + "step": 53600 + }, + { + "epoch": 0.83, + "learning_rate": 3.411061860726436e-06, + "loss": 1.4769, + "step": 53610 + }, + { + "epoch": 0.83, + "learning_rate": 3.4049198351144535e-06, + "loss": 1.4701, + "step": 53620 + }, + { + "epoch": 0.83, + "learning_rate": 3.398782940103984e-06, + "loss": 1.4828, + "step": 53630 + }, + { + "epoch": 0.83, + "learning_rate": 3.392651177153028e-06, + "loss": 1.4836, + "step": 53640 + }, + { + "epoch": 0.83, + "learning_rate": 3.3865245477183867e-06, + "loss": 1.4957, + "step": 53650 + }, + { + "epoch": 0.83, + "learning_rate": 3.3804030532556373e-06, + "loss": 1.509, + "step": 53660 + }, + { + "epoch": 0.83, + "learning_rate": 3.3742866952191394e-06, + "loss": 1.4779, + "step": 53670 + }, + { + "epoch": 0.83, + "learning_rate": 3.3681754750620243e-06, + "loss": 1.4791, + "step": 53680 + }, + { + "epoch": 0.83, + "learning_rate": 3.362069394236209e-06, + "loss": 1.4602, + "step": 53690 + }, + { + "epoch": 0.83, + "learning_rate": 3.3559684541923958e-06, + "loss": 1.4724, + "step": 53700 + }, + { + "epoch": 0.83, + "learning_rate": 3.34987265638004e-06, + "loss": 1.4471, + "step": 53710 + }, + { + "epoch": 0.83, + "learning_rate": 3.3437820022474143e-06, + "loss": 1.4668, + "step": 53720 + }, + { + "epoch": 0.83, + "learning_rate": 3.337696493241535e-06, + "loss": 1.4773, + "step": 53730 + }, + { + "epoch": 0.83, + "learning_rate": 3.331616130808213e-06, + "loss": 1.4918, + "step": 53740 + }, + { + "epoch": 0.83, + "learning_rate": 3.325540916392031e-06, + "loss": 1.4921, + "step": 53750 + }, + { + "epoch": 0.83, + "learning_rate": 3.319470851436357e-06, + "loss": 1.4536, + "step": 53760 + }, + { + "epoch": 0.83, + "learning_rate": 3.3134059373833126e-06, + "loss": 1.4726, + "step": 53770 + }, + { + "epoch": 0.83, + "learning_rate": 3.3073461756738367e-06, + "loss": 1.4617, + "step": 53780 + }, + { + "epoch": 0.83, + "learning_rate": 3.3012915677475913e-06, + "loss": 1.4737, + "step": 53790 + }, + { + "epoch": 0.83, + "learning_rate": 3.295242115043068e-06, + "loss": 1.4729, + "step": 53800 + }, + { + "epoch": 0.83, + "learning_rate": 3.2891978189974915e-06, + "loss": 1.4921, + "step": 53810 + }, + { + "epoch": 0.84, + "learning_rate": 3.283158681046883e-06, + "loss": 1.4789, + "step": 53820 + }, + { + "epoch": 0.84, + "learning_rate": 3.2771247026260306e-06, + "loss": 1.5095, + "step": 53830 + }, + { + "epoch": 0.84, + "learning_rate": 3.2710958851685012e-06, + "loss": 1.4844, + "step": 53840 + }, + { + "epoch": 0.84, + "learning_rate": 3.2650722301066306e-06, + "loss": 1.4664, + "step": 53850 + }, + { + "epoch": 0.84, + "learning_rate": 3.259053738871534e-06, + "loss": 1.4621, + "step": 53860 + }, + { + "epoch": 0.84, + "learning_rate": 3.2530404128930964e-06, + "loss": 1.487, + "step": 53870 + }, + { + "epoch": 0.84, + "learning_rate": 3.2470322535999657e-06, + "loss": 1.4638, + "step": 53880 + }, + { + "epoch": 0.84, + "learning_rate": 3.241029262419587e-06, + "loss": 1.4855, + "step": 53890 + }, + { + "epoch": 0.84, + "learning_rate": 3.2350314407781523e-06, + "loss": 1.4661, + "step": 53900 + }, + { + "epoch": 0.84, + "learning_rate": 3.2290387901006413e-06, + "loss": 1.4909, + "step": 53910 + }, + { + "epoch": 0.84, + "learning_rate": 3.223051311810796e-06, + "loss": 1.4953, + "step": 53920 + }, + { + "epoch": 0.84, + "learning_rate": 3.2170690073311386e-06, + "loss": 1.4794, + "step": 53930 + }, + { + "epoch": 0.84, + "learning_rate": 3.211091878082942e-06, + "loss": 1.4757, + "step": 53940 + }, + { + "epoch": 0.84, + "learning_rate": 3.2051199254862886e-06, + "loss": 1.4564, + "step": 53950 + }, + { + "epoch": 0.84, + "learning_rate": 3.1991531509599855e-06, + "loss": 1.4654, + "step": 53960 + }, + { + "epoch": 0.84, + "learning_rate": 3.1931915559216372e-06, + "loss": 1.4879, + "step": 53970 + }, + { + "epoch": 0.84, + "learning_rate": 3.1872351417876145e-06, + "loss": 1.4954, + "step": 53980 + }, + { + "epoch": 0.84, + "learning_rate": 3.1812839099730514e-06, + "loss": 1.4728, + "step": 53990 + }, + { + "epoch": 0.84, + "learning_rate": 3.1753378618918557e-06, + "loss": 1.5115, + "step": 54000 + }, + { + "epoch": 0.84, + "learning_rate": 3.1693969989566945e-06, + "loss": 1.4837, + "step": 54010 + }, + { + "epoch": 0.84, + "learning_rate": 3.1634613225790173e-06, + "loss": 1.4774, + "step": 54020 + }, + { + "epoch": 0.84, + "learning_rate": 3.1575308341690314e-06, + "loss": 1.4913, + "step": 54030 + }, + { + "epoch": 0.84, + "learning_rate": 3.1516055351357183e-06, + "loss": 1.505, + "step": 54040 + }, + { + "epoch": 0.84, + "learning_rate": 3.1456854268868046e-06, + "loss": 1.5045, + "step": 54050 + }, + { + "epoch": 0.84, + "learning_rate": 3.139770510828824e-06, + "loss": 1.5235, + "step": 54060 + }, + { + "epoch": 0.84, + "learning_rate": 3.133860788367041e-06, + "loss": 1.538, + "step": 54070 + }, + { + "epoch": 0.84, + "learning_rate": 3.1279562609054987e-06, + "loss": 1.5155, + "step": 54080 + }, + { + "epoch": 0.84, + "learning_rate": 3.122056929847009e-06, + "loss": 1.5252, + "step": 54090 + }, + { + "epoch": 0.84, + "learning_rate": 3.1161627965931527e-06, + "loss": 1.5092, + "step": 54100 + }, + { + "epoch": 0.84, + "learning_rate": 3.110273862544255e-06, + "loss": 1.5066, + "step": 54110 + }, + { + "epoch": 0.84, + "learning_rate": 3.1043901290994382e-06, + "loss": 1.4899, + "step": 54120 + }, + { + "epoch": 0.84, + "learning_rate": 3.098511597656556e-06, + "loss": 1.4912, + "step": 54130 + }, + { + "epoch": 0.84, + "learning_rate": 3.092638269612247e-06, + "loss": 1.4896, + "step": 54140 + }, + { + "epoch": 0.84, + "learning_rate": 3.0867701463619085e-06, + "loss": 1.4893, + "step": 54150 + }, + { + "epoch": 0.84, + "learning_rate": 3.080907229299704e-06, + "loss": 1.4853, + "step": 54160 + }, + { + "epoch": 0.84, + "learning_rate": 3.0750495198185484e-06, + "loss": 1.5088, + "step": 54170 + }, + { + "epoch": 0.84, + "learning_rate": 3.0691970193101357e-06, + "loss": 1.4546, + "step": 54180 + }, + { + "epoch": 0.84, + "learning_rate": 3.063349729164919e-06, + "loss": 1.475, + "step": 54190 + }, + { + "epoch": 0.84, + "learning_rate": 3.0575076507720874e-06, + "loss": 1.4698, + "step": 54200 + }, + { + "epoch": 0.84, + "learning_rate": 3.051670785519639e-06, + "loss": 1.4663, + "step": 54210 + }, + { + "epoch": 0.84, + "learning_rate": 3.045839134794293e-06, + "loss": 1.4958, + "step": 54220 + }, + { + "epoch": 0.84, + "learning_rate": 3.0400126999815453e-06, + "loss": 1.5144, + "step": 54230 + }, + { + "epoch": 0.84, + "learning_rate": 3.0341914824656537e-06, + "loss": 1.5114, + "step": 54240 + }, + { + "epoch": 0.84, + "learning_rate": 3.0283754836296362e-06, + "loss": 1.4832, + "step": 54250 + }, + { + "epoch": 0.84, + "learning_rate": 3.0225647048552686e-06, + "loss": 1.4687, + "step": 54260 + }, + { + "epoch": 0.84, + "learning_rate": 3.016759147523085e-06, + "loss": 1.5204, + "step": 54270 + }, + { + "epoch": 0.84, + "learning_rate": 3.0109588130123824e-06, + "loss": 1.5385, + "step": 54280 + }, + { + "epoch": 0.84, + "learning_rate": 3.0051637027012165e-06, + "loss": 1.5527, + "step": 54290 + }, + { + "epoch": 0.84, + "learning_rate": 2.9993738179664055e-06, + "loss": 1.5715, + "step": 54300 + }, + { + "epoch": 0.84, + "learning_rate": 2.99358916018351e-06, + "loss": 1.5795, + "step": 54310 + }, + { + "epoch": 0.84, + "learning_rate": 2.9878097307268677e-06, + "loss": 1.5761, + "step": 54320 + }, + { + "epoch": 0.84, + "learning_rate": 2.982035530969565e-06, + "loss": 1.5532, + "step": 54330 + }, + { + "epoch": 0.84, + "learning_rate": 2.97626656228345e-06, + "loss": 1.5826, + "step": 54340 + }, + { + "epoch": 0.84, + "learning_rate": 2.9705028260391203e-06, + "loss": 1.5647, + "step": 54350 + }, + { + "epoch": 0.84, + "learning_rate": 2.9647443236059476e-06, + "loss": 1.5685, + "step": 54360 + }, + { + "epoch": 0.84, + "learning_rate": 2.9589910563520283e-06, + "loss": 1.5711, + "step": 54370 + }, + { + "epoch": 0.84, + "learning_rate": 2.9532430256442563e-06, + "loss": 1.5612, + "step": 54380 + }, + { + "epoch": 0.84, + "learning_rate": 2.9475002328482465e-06, + "loss": 1.5673, + "step": 54390 + }, + { + "epoch": 0.84, + "learning_rate": 2.941762679328386e-06, + "loss": 1.5545, + "step": 54400 + }, + { + "epoch": 0.84, + "learning_rate": 2.9360303664478146e-06, + "loss": 1.5784, + "step": 54410 + }, + { + "epoch": 0.84, + "learning_rate": 2.9303032955684264e-06, + "loss": 1.5757, + "step": 54420 + }, + { + "epoch": 0.84, + "learning_rate": 2.9245814680508716e-06, + "loss": 1.5635, + "step": 54430 + }, + { + "epoch": 0.84, + "learning_rate": 2.918864885254549e-06, + "loss": 1.5525, + "step": 54440 + }, + { + "epoch": 0.84, + "learning_rate": 2.9131535485376253e-06, + "loss": 1.5299, + "step": 54450 + }, + { + "epoch": 0.84, + "learning_rate": 2.907447459256993e-06, + "loss": 1.5315, + "step": 54460 + }, + { + "epoch": 0.85, + "learning_rate": 2.9017466187683364e-06, + "loss": 1.4961, + "step": 54470 + }, + { + "epoch": 0.85, + "learning_rate": 2.8960510284260554e-06, + "loss": 1.4871, + "step": 54480 + }, + { + "epoch": 0.85, + "learning_rate": 2.890360689583335e-06, + "loss": 1.5083, + "step": 54490 + }, + { + "epoch": 0.85, + "learning_rate": 2.884675603592085e-06, + "loss": 1.4622, + "step": 54500 + }, + { + "epoch": 0.85, + "learning_rate": 2.8789957718029837e-06, + "loss": 1.4939, + "step": 54510 + }, + { + "epoch": 0.85, + "learning_rate": 2.8733211955654567e-06, + "loss": 1.4899, + "step": 54520 + }, + { + "epoch": 0.85, + "learning_rate": 2.8676518762276867e-06, + "loss": 1.4765, + "step": 54530 + }, + { + "epoch": 0.85, + "learning_rate": 2.8619878151365826e-06, + "loss": 1.4784, + "step": 54540 + }, + { + "epoch": 0.85, + "learning_rate": 2.8563290136378486e-06, + "loss": 1.4604, + "step": 54550 + }, + { + "epoch": 0.85, + "learning_rate": 2.850675473075898e-06, + "loss": 1.4745, + "step": 54560 + }, + { + "epoch": 0.85, + "learning_rate": 2.8450271947939117e-06, + "loss": 1.4924, + "step": 54570 + }, + { + "epoch": 0.85, + "learning_rate": 2.8393841801338194e-06, + "loss": 1.4574, + "step": 54580 + }, + { + "epoch": 0.85, + "learning_rate": 2.8337464304363043e-06, + "loss": 1.479, + "step": 54590 + }, + { + "epoch": 0.85, + "learning_rate": 2.8281139470407863e-06, + "loss": 1.4636, + "step": 54600 + }, + { + "epoch": 0.85, + "learning_rate": 2.822486731285448e-06, + "loss": 1.463, + "step": 54610 + }, + { + "epoch": 0.85, + "learning_rate": 2.81686478450722e-06, + "loss": 1.4934, + "step": 54620 + }, + { + "epoch": 0.85, + "learning_rate": 2.8112481080417557e-06, + "loss": 1.4953, + "step": 54630 + }, + { + "epoch": 0.85, + "learning_rate": 2.8056367032234966e-06, + "loss": 1.5016, + "step": 54640 + }, + { + "epoch": 0.85, + "learning_rate": 2.8000305713856007e-06, + "loss": 1.504, + "step": 54650 + }, + { + "epoch": 0.85, + "learning_rate": 2.794429713859986e-06, + "loss": 1.4807, + "step": 54660 + }, + { + "epoch": 0.85, + "learning_rate": 2.7888341319773135e-06, + "loss": 1.4723, + "step": 54670 + }, + { + "epoch": 0.85, + "learning_rate": 2.7832438270670005e-06, + "loss": 1.5035, + "step": 54680 + }, + { + "epoch": 0.85, + "learning_rate": 2.777658800457186e-06, + "loss": 1.475, + "step": 54690 + }, + { + "epoch": 0.85, + "learning_rate": 2.7720790534747936e-06, + "loss": 1.4958, + "step": 54700 + }, + { + "epoch": 0.85, + "learning_rate": 2.766504587445448e-06, + "loss": 1.4911, + "step": 54710 + }, + { + "epoch": 0.85, + "learning_rate": 2.7609354036935623e-06, + "loss": 1.4868, + "step": 54720 + }, + { + "epoch": 0.85, + "learning_rate": 2.755371503542259e-06, + "loss": 1.5188, + "step": 54730 + }, + { + "epoch": 0.85, + "learning_rate": 2.7498128883134273e-06, + "loss": 1.5067, + "step": 54740 + }, + { + "epoch": 0.85, + "learning_rate": 2.7442595593276917e-06, + "loss": 1.5195, + "step": 54750 + }, + { + "epoch": 0.85, + "learning_rate": 2.7387115179044244e-06, + "loss": 1.5164, + "step": 54760 + }, + { + "epoch": 0.85, + "learning_rate": 2.7331687653617377e-06, + "loss": 1.4779, + "step": 54770 + }, + { + "epoch": 0.85, + "learning_rate": 2.7276313030164922e-06, + "loss": 1.5089, + "step": 54780 + }, + { + "epoch": 0.85, + "learning_rate": 2.7220991321842944e-06, + "loss": 1.4609, + "step": 54790 + }, + { + "epoch": 0.85, + "learning_rate": 2.7165722541794697e-06, + "loss": 1.4831, + "step": 54800 + }, + { + "epoch": 0.85, + "learning_rate": 2.7110506703151295e-06, + "loss": 1.4776, + "step": 54810 + }, + { + "epoch": 0.85, + "learning_rate": 2.705534381903085e-06, + "loss": 1.4824, + "step": 54820 + }, + { + "epoch": 0.85, + "learning_rate": 2.700023390253911e-06, + "loss": 1.4935, + "step": 54830 + }, + { + "epoch": 0.85, + "learning_rate": 2.694517696676921e-06, + "loss": 1.4624, + "step": 54840 + }, + { + "epoch": 0.85, + "learning_rate": 2.6890173024801706e-06, + "loss": 1.4705, + "step": 54850 + }, + { + "epoch": 0.85, + "learning_rate": 2.683522208970443e-06, + "loss": 1.4675, + "step": 54860 + }, + { + "epoch": 0.85, + "learning_rate": 2.678032417453294e-06, + "loss": 1.5274, + "step": 54870 + }, + { + "epoch": 0.85, + "learning_rate": 2.6725479292329797e-06, + "loss": 1.4628, + "step": 54880 + }, + { + "epoch": 0.85, + "learning_rate": 2.667068745612525e-06, + "loss": 1.4946, + "step": 54890 + }, + { + "epoch": 0.85, + "learning_rate": 2.6615948678936826e-06, + "loss": 1.4843, + "step": 54900 + }, + { + "epoch": 0.85, + "learning_rate": 2.6561262973769456e-06, + "loss": 1.4765, + "step": 54910 + }, + { + "epoch": 0.85, + "learning_rate": 2.6506630353615502e-06, + "loss": 1.4877, + "step": 54920 + }, + { + "epoch": 0.85, + "learning_rate": 2.645205083145469e-06, + "loss": 1.4913, + "step": 54930 + }, + { + "epoch": 0.85, + "learning_rate": 2.6397524420254116e-06, + "loss": 1.4506, + "step": 54940 + }, + { + "epoch": 0.85, + "learning_rate": 2.6343051132968284e-06, + "loss": 1.4755, + "step": 54950 + }, + { + "epoch": 0.85, + "learning_rate": 2.6288630982539114e-06, + "loss": 1.4976, + "step": 54960 + }, + { + "epoch": 0.85, + "learning_rate": 2.6234263981895685e-06, + "loss": 1.4658, + "step": 54970 + }, + { + "epoch": 0.85, + "learning_rate": 2.6179950143954827e-06, + "loss": 1.4677, + "step": 54980 + }, + { + "epoch": 0.85, + "learning_rate": 2.6125689481620393e-06, + "loss": 1.4633, + "step": 54990 + }, + { + "epoch": 0.85, + "learning_rate": 2.6071482007783753e-06, + "loss": 1.4836, + "step": 55000 + }, + { + "epoch": 0.85, + "eval_loss": 1.5818219184875488, + "eval_runtime": 82.083, + "eval_samples_per_second": 36.548, + "eval_steps_per_second": 4.569, + "step": 55000 + }, + { + "epoch": 0.85, + "learning_rate": 2.601732773532367e-06, + "loss": 1.4945, + "step": 55010 + }, + { + "epoch": 0.85, + "learning_rate": 2.596322667710624e-06, + "loss": 1.4528, + "step": 55020 + }, + { + "epoch": 0.85, + "learning_rate": 2.5909178845984734e-06, + "loss": 1.4597, + "step": 55030 + }, + { + "epoch": 0.85, + "learning_rate": 2.5855184254800185e-06, + "loss": 1.4065, + "step": 55040 + }, + { + "epoch": 0.85, + "learning_rate": 2.5801242916380557e-06, + "loss": 1.4753, + "step": 55050 + }, + { + "epoch": 0.85, + "learning_rate": 2.5747354843541403e-06, + "loss": 1.4535, + "step": 55060 + }, + { + "epoch": 0.85, + "learning_rate": 2.569352004908554e-06, + "loss": 1.4683, + "step": 55070 + }, + { + "epoch": 0.85, + "learning_rate": 2.5639738545803147e-06, + "loss": 1.4666, + "step": 55080 + }, + { + "epoch": 0.85, + "learning_rate": 2.558601034647176e-06, + "loss": 1.4752, + "step": 55090 + }, + { + "epoch": 0.85, + "learning_rate": 2.5532335463856222e-06, + "loss": 1.4936, + "step": 55100 + }, + { + "epoch": 0.86, + "learning_rate": 2.5478713910708745e-06, + "loss": 1.4668, + "step": 55110 + }, + { + "epoch": 0.86, + "learning_rate": 2.54251456997687e-06, + "loss": 1.4776, + "step": 55120 + }, + { + "epoch": 0.86, + "learning_rate": 2.537163084376315e-06, + "loss": 1.4783, + "step": 55130 + }, + { + "epoch": 0.86, + "learning_rate": 2.5318169355406096e-06, + "loss": 1.5109, + "step": 55140 + }, + { + "epoch": 0.86, + "learning_rate": 2.5264761247399045e-06, + "loss": 1.4534, + "step": 55150 + }, + { + "epoch": 0.86, + "learning_rate": 2.521140653243087e-06, + "loss": 1.4642, + "step": 55160 + }, + { + "epoch": 0.86, + "learning_rate": 2.51581052231776e-06, + "loss": 1.4675, + "step": 55170 + }, + { + "epoch": 0.86, + "learning_rate": 2.510485733230275e-06, + "loss": 1.483, + "step": 55180 + }, + { + "epoch": 0.86, + "learning_rate": 2.5051662872457056e-06, + "loss": 1.4908, + "step": 55190 + }, + { + "epoch": 0.86, + "learning_rate": 2.499852185627841e-06, + "loss": 1.4799, + "step": 55200 + }, + { + "epoch": 0.86, + "learning_rate": 2.4945434296392374e-06, + "loss": 1.4626, + "step": 55210 + }, + { + "epoch": 0.86, + "learning_rate": 2.4892400205411475e-06, + "loss": 1.4683, + "step": 55220 + }, + { + "epoch": 0.86, + "learning_rate": 2.4839419595935637e-06, + "loss": 1.4457, + "step": 55230 + }, + { + "epoch": 0.86, + "learning_rate": 2.4786492480552174e-06, + "loss": 1.4914, + "step": 55240 + }, + { + "epoch": 0.86, + "learning_rate": 2.4733618871835536e-06, + "loss": 1.5014, + "step": 55250 + }, + { + "epoch": 0.86, + "learning_rate": 2.468079878234761e-06, + "loss": 1.5026, + "step": 55260 + }, + { + "epoch": 0.86, + "learning_rate": 2.462803222463744e-06, + "loss": 1.4685, + "step": 55270 + }, + { + "epoch": 0.86, + "learning_rate": 2.457531921124151e-06, + "loss": 1.4845, + "step": 55280 + }, + { + "epoch": 0.86, + "learning_rate": 2.4522659754683293e-06, + "loss": 1.4814, + "step": 55290 + }, + { + "epoch": 0.86, + "learning_rate": 2.4470053867473946e-06, + "loss": 1.4769, + "step": 55300 + }, + { + "epoch": 0.86, + "learning_rate": 2.4417501562111506e-06, + "loss": 1.4792, + "step": 55310 + }, + { + "epoch": 0.86, + "learning_rate": 2.436500285108151e-06, + "loss": 1.4822, + "step": 55320 + }, + { + "epoch": 0.86, + "learning_rate": 2.431255774685673e-06, + "loss": 1.486, + "step": 55330 + }, + { + "epoch": 0.86, + "learning_rate": 2.42601662618972e-06, + "loss": 1.463, + "step": 55340 + }, + { + "epoch": 0.86, + "learning_rate": 2.4207828408650046e-06, + "loss": 1.4841, + "step": 55350 + }, + { + "epoch": 0.86, + "learning_rate": 2.415554419955002e-06, + "loss": 1.4941, + "step": 55360 + }, + { + "epoch": 0.86, + "learning_rate": 2.4103313647018704e-06, + "loss": 1.4911, + "step": 55370 + }, + { + "epoch": 0.86, + "learning_rate": 2.4051136763465264e-06, + "loss": 1.4931, + "step": 55380 + }, + { + "epoch": 0.86, + "learning_rate": 2.3999013561285922e-06, + "loss": 1.5412, + "step": 55390 + }, + { + "epoch": 0.86, + "learning_rate": 2.3946944052864213e-06, + "loss": 1.5073, + "step": 55400 + }, + { + "epoch": 0.86, + "learning_rate": 2.3894928250570962e-06, + "loss": 1.4784, + "step": 55410 + }, + { + "epoch": 0.86, + "learning_rate": 2.3842966166764123e-06, + "loss": 1.4648, + "step": 55420 + }, + { + "epoch": 0.86, + "learning_rate": 2.3791057813789015e-06, + "loss": 1.4795, + "step": 55430 + }, + { + "epoch": 0.86, + "learning_rate": 2.373920320397807e-06, + "loss": 1.4737, + "step": 55440 + }, + { + "epoch": 0.86, + "learning_rate": 2.3687402349651084e-06, + "loss": 1.5094, + "step": 55450 + }, + { + "epoch": 0.86, + "learning_rate": 2.3635655263114843e-06, + "loss": 1.4934, + "step": 55460 + }, + { + "epoch": 0.86, + "learning_rate": 2.3583961956663735e-06, + "loss": 1.4758, + "step": 55470 + }, + { + "epoch": 0.86, + "learning_rate": 2.3532322442579023e-06, + "loss": 1.4553, + "step": 55480 + }, + { + "epoch": 0.86, + "learning_rate": 2.348073673312934e-06, + "loss": 1.5223, + "step": 55490 + }, + { + "epoch": 0.86, + "learning_rate": 2.342920484057054e-06, + "loss": 1.4714, + "step": 55500 + }, + { + "epoch": 0.86, + "learning_rate": 2.33777267771457e-06, + "loss": 1.4807, + "step": 55510 + }, + { + "epoch": 0.86, + "learning_rate": 2.3326302555084943e-06, + "loss": 1.4728, + "step": 55520 + }, + { + "epoch": 0.86, + "learning_rate": 2.3274932186605954e-06, + "loss": 1.4819, + "step": 55530 + }, + { + "epoch": 0.86, + "learning_rate": 2.3223615683913207e-06, + "loss": 1.4787, + "step": 55540 + }, + { + "epoch": 0.86, + "learning_rate": 2.317235305919868e-06, + "loss": 1.458, + "step": 55550 + }, + { + "epoch": 0.86, + "learning_rate": 2.3121144324641437e-06, + "loss": 1.4782, + "step": 55560 + }, + { + "epoch": 0.86, + "learning_rate": 2.3069989492407738e-06, + "loss": 1.5676, + "step": 55570 + }, + { + "epoch": 0.86, + "learning_rate": 2.3018888574651037e-06, + "loss": 1.4751, + "step": 55580 + }, + { + "epoch": 0.86, + "learning_rate": 2.2967841583512033e-06, + "loss": 1.4754, + "step": 55590 + }, + { + "epoch": 0.86, + "learning_rate": 2.291684853111856e-06, + "loss": 1.481, + "step": 55600 + }, + { + "epoch": 0.86, + "learning_rate": 2.2865909429585552e-06, + "loss": 1.4759, + "step": 55610 + }, + { + "epoch": 0.86, + "learning_rate": 2.2815024291015367e-06, + "loss": 1.4698, + "step": 55620 + }, + { + "epoch": 0.86, + "learning_rate": 2.276419312749725e-06, + "loss": 1.4414, + "step": 55630 + }, + { + "epoch": 0.86, + "learning_rate": 2.271341595110793e-06, + "loss": 1.4347, + "step": 55640 + }, + { + "epoch": 0.86, + "learning_rate": 2.266269277391103e-06, + "loss": 1.4929, + "step": 55650 + }, + { + "epoch": 0.86, + "learning_rate": 2.2612023607957474e-06, + "loss": 1.4615, + "step": 55660 + }, + { + "epoch": 0.86, + "learning_rate": 2.256140846528537e-06, + "loss": 1.476, + "step": 55670 + }, + { + "epoch": 0.86, + "learning_rate": 2.251084735792003e-06, + "loss": 1.4852, + "step": 55680 + }, + { + "epoch": 0.86, + "learning_rate": 2.2460340297873665e-06, + "loss": 1.5124, + "step": 55690 + }, + { + "epoch": 0.86, + "learning_rate": 2.2409887297146047e-06, + "loss": 1.4725, + "step": 55700 + }, + { + "epoch": 0.86, + "learning_rate": 2.235948836772378e-06, + "loss": 1.4448, + "step": 55710 + }, + { + "epoch": 0.86, + "learning_rate": 2.2309143521580784e-06, + "loss": 1.52, + "step": 55720 + }, + { + "epoch": 0.86, + "learning_rate": 2.2258852770678075e-06, + "loss": 1.4537, + "step": 55730 + }, + { + "epoch": 0.86, + "learning_rate": 2.2208616126963816e-06, + "loss": 1.4882, + "step": 55740 + }, + { + "epoch": 0.86, + "learning_rate": 2.215843360237338e-06, + "loss": 1.473, + "step": 55750 + }, + { + "epoch": 0.87, + "learning_rate": 2.2108305208829144e-06, + "loss": 1.4748, + "step": 55760 + }, + { + "epoch": 0.87, + "learning_rate": 2.205823095824083e-06, + "loss": 1.492, + "step": 55770 + }, + { + "epoch": 0.87, + "learning_rate": 2.2008210862505013e-06, + "loss": 1.4789, + "step": 55780 + }, + { + "epoch": 0.87, + "learning_rate": 2.1958244933505734e-06, + "loss": 1.457, + "step": 55790 + }, + { + "epoch": 0.87, + "learning_rate": 2.190833318311389e-06, + "loss": 1.4145, + "step": 55800 + }, + { + "epoch": 0.87, + "learning_rate": 2.185847562318763e-06, + "loss": 1.4016, + "step": 55810 + }, + { + "epoch": 0.87, + "learning_rate": 2.1808672265572232e-06, + "loss": 1.5047, + "step": 55820 + }, + { + "epoch": 0.87, + "learning_rate": 2.1758923122100123e-06, + "loss": 1.4851, + "step": 55830 + }, + { + "epoch": 0.87, + "learning_rate": 2.1709228204590622e-06, + "loss": 1.5013, + "step": 55840 + }, + { + "epoch": 0.87, + "learning_rate": 2.165958752485059e-06, + "loss": 1.5312, + "step": 55850 + }, + { + "epoch": 0.87, + "learning_rate": 2.161000109467354e-06, + "loss": 1.5337, + "step": 55860 + }, + { + "epoch": 0.87, + "learning_rate": 2.1560468925840487e-06, + "loss": 1.499, + "step": 55870 + }, + { + "epoch": 0.87, + "learning_rate": 2.151099103011925e-06, + "loss": 1.5017, + "step": 55880 + }, + { + "epoch": 0.87, + "learning_rate": 2.1461567419264944e-06, + "loss": 1.4837, + "step": 55890 + }, + { + "epoch": 0.87, + "learning_rate": 2.1412198105019738e-06, + "loss": 1.4676, + "step": 55900 + }, + { + "epoch": 0.87, + "learning_rate": 2.1362883099112855e-06, + "loss": 1.4883, + "step": 55910 + }, + { + "epoch": 0.87, + "learning_rate": 2.131362241326068e-06, + "loss": 1.4909, + "step": 55920 + }, + { + "epoch": 0.87, + "learning_rate": 2.126441605916665e-06, + "loss": 1.4762, + "step": 55930 + }, + { + "epoch": 0.87, + "learning_rate": 2.1215264048521338e-06, + "loss": 1.4679, + "step": 55940 + }, + { + "epoch": 0.87, + "learning_rate": 2.1166166393002284e-06, + "loss": 1.4802, + "step": 55950 + }, + { + "epoch": 0.87, + "learning_rate": 2.1117123104274327e-06, + "loss": 1.4682, + "step": 55960 + }, + { + "epoch": 0.87, + "learning_rate": 2.1068134193989203e-06, + "loss": 1.4743, + "step": 55970 + }, + { + "epoch": 0.87, + "learning_rate": 2.1019199673785766e-06, + "loss": 1.4565, + "step": 55980 + }, + { + "epoch": 0.87, + "learning_rate": 2.0970319555290025e-06, + "loss": 1.4782, + "step": 55990 + }, + { + "epoch": 0.87, + "learning_rate": 2.092149385011502e-06, + "loss": 1.4957, + "step": 56000 + }, + { + "epoch": 0.87, + "learning_rate": 2.087272256986078e-06, + "loss": 1.4825, + "step": 56010 + }, + { + "epoch": 0.87, + "learning_rate": 2.0824005726114592e-06, + "loss": 1.4591, + "step": 56020 + }, + { + "epoch": 0.87, + "learning_rate": 2.0775343330450612e-06, + "loss": 1.4867, + "step": 56030 + }, + { + "epoch": 0.87, + "learning_rate": 2.072673539443018e-06, + "loss": 1.4664, + "step": 56040 + }, + { + "epoch": 0.87, + "learning_rate": 2.0678181929601663e-06, + "loss": 1.4842, + "step": 56050 + }, + { + "epoch": 0.87, + "learning_rate": 2.0629682947500506e-06, + "loss": 1.4746, + "step": 56060 + }, + { + "epoch": 0.87, + "learning_rate": 2.0581238459649156e-06, + "loss": 1.4875, + "step": 56070 + }, + { + "epoch": 0.87, + "learning_rate": 2.0532848477557182e-06, + "loss": 1.5043, + "step": 56080 + }, + { + "epoch": 0.87, + "learning_rate": 2.0484513012721168e-06, + "loss": 1.473, + "step": 56090 + }, + { + "epoch": 0.87, + "learning_rate": 2.0436232076624706e-06, + "loss": 1.4668, + "step": 56100 + }, + { + "epoch": 0.87, + "learning_rate": 2.0388005680738594e-06, + "loss": 1.4794, + "step": 56110 + }, + { + "epoch": 0.87, + "learning_rate": 2.0339833836520357e-06, + "loss": 1.4773, + "step": 56120 + }, + { + "epoch": 0.87, + "learning_rate": 2.0291716555414952e-06, + "loss": 1.4871, + "step": 56130 + }, + { + "epoch": 0.87, + "learning_rate": 2.0243653848854042e-06, + "loss": 1.4795, + "step": 56140 + }, + { + "epoch": 0.87, + "learning_rate": 2.0195645728256525e-06, + "loss": 1.4831, + "step": 56150 + }, + { + "epoch": 0.87, + "learning_rate": 2.014769220502824e-06, + "loss": 1.4704, + "step": 56160 + }, + { + "epoch": 0.87, + "learning_rate": 2.0099793290562135e-06, + "loss": 1.4807, + "step": 56170 + }, + { + "epoch": 0.87, + "learning_rate": 2.0051948996237996e-06, + "loss": 1.4514, + "step": 56180 + }, + { + "epoch": 0.87, + "learning_rate": 2.0004159333422924e-06, + "loss": 1.4764, + "step": 56190 + }, + { + "epoch": 0.87, + "learning_rate": 1.995642431347078e-06, + "loss": 1.4826, + "step": 56200 + }, + { + "epoch": 0.87, + "learning_rate": 1.9908743947722577e-06, + "loss": 1.4854, + "step": 56210 + }, + { + "epoch": 0.87, + "learning_rate": 1.986111824750628e-06, + "loss": 1.4933, + "step": 56220 + }, + { + "epoch": 0.87, + "learning_rate": 1.981354722413692e-06, + "loss": 1.4825, + "step": 56230 + }, + { + "epoch": 0.87, + "learning_rate": 1.9766030888916513e-06, + "loss": 1.451, + "step": 56240 + }, + { + "epoch": 0.87, + "learning_rate": 1.971856925313409e-06, + "loss": 1.4993, + "step": 56250 + }, + { + "epoch": 0.87, + "learning_rate": 1.9671162328065734e-06, + "loss": 1.4853, + "step": 56260 + }, + { + "epoch": 0.87, + "learning_rate": 1.96238101249743e-06, + "loss": 1.5184, + "step": 56270 + }, + { + "epoch": 0.87, + "learning_rate": 1.957651265511004e-06, + "loss": 1.5228, + "step": 56280 + }, + { + "epoch": 0.87, + "learning_rate": 1.9529269929709798e-06, + "loss": 1.5102, + "step": 56290 + }, + { + "epoch": 0.87, + "learning_rate": 1.9482081959997704e-06, + "loss": 1.4951, + "step": 56300 + }, + { + "epoch": 0.87, + "learning_rate": 1.9434948757184714e-06, + "loss": 1.5029, + "step": 56310 + }, + { + "epoch": 0.87, + "learning_rate": 1.9387870332468833e-06, + "loss": 1.501, + "step": 56320 + }, + { + "epoch": 0.87, + "learning_rate": 1.9340846697035066e-06, + "loss": 1.4929, + "step": 56330 + }, + { + "epoch": 0.87, + "learning_rate": 1.9293877862055415e-06, + "loss": 1.4982, + "step": 56340 + }, + { + "epoch": 0.87, + "learning_rate": 1.9246963838688683e-06, + "loss": 1.4866, + "step": 56350 + }, + { + "epoch": 0.87, + "learning_rate": 1.9200104638081e-06, + "loss": 1.4976, + "step": 56360 + }, + { + "epoch": 0.87, + "learning_rate": 1.915330027136511e-06, + "loss": 1.5074, + "step": 56370 + }, + { + "epoch": 0.87, + "learning_rate": 1.9106550749660945e-06, + "loss": 1.4742, + "step": 56380 + }, + { + "epoch": 0.87, + "learning_rate": 1.9059856084075316e-06, + "loss": 1.4736, + "step": 56390 + }, + { + "epoch": 0.88, + "learning_rate": 1.90132162857021e-06, + "loss": 1.4597, + "step": 56400 + }, + { + "epoch": 0.88, + "learning_rate": 1.8966631365622017e-06, + "loss": 1.48, + "step": 56410 + }, + { + "epoch": 0.88, + "learning_rate": 1.8920101334902795e-06, + "loss": 1.4928, + "step": 56420 + }, + { + "epoch": 0.88, + "learning_rate": 1.8873626204599233e-06, + "loss": 1.5158, + "step": 56430 + }, + { + "epoch": 0.88, + "learning_rate": 1.88272059857528e-06, + "loss": 1.4891, + "step": 56440 + }, + { + "epoch": 0.88, + "learning_rate": 1.8780840689392288e-06, + "loss": 1.4735, + "step": 56450 + }, + { + "epoch": 0.88, + "learning_rate": 1.8734530326533134e-06, + "loss": 1.5343, + "step": 56460 + }, + { + "epoch": 0.88, + "learning_rate": 1.8688274908177872e-06, + "loss": 1.5748, + "step": 56470 + }, + { + "epoch": 0.88, + "learning_rate": 1.8642074445315987e-06, + "loss": 1.5613, + "step": 56480 + }, + { + "epoch": 0.88, + "learning_rate": 1.8595928948923842e-06, + "loss": 1.5513, + "step": 56490 + }, + { + "epoch": 0.88, + "learning_rate": 1.85498384299648e-06, + "loss": 1.5737, + "step": 56500 + }, + { + "epoch": 0.88, + "learning_rate": 1.8503802899389105e-06, + "loss": 1.569, + "step": 56510 + }, + { + "epoch": 0.88, + "learning_rate": 1.8457822368134037e-06, + "loss": 1.5671, + "step": 56520 + }, + { + "epoch": 0.88, + "learning_rate": 1.8411896847123633e-06, + "loss": 1.5859, + "step": 56530 + }, + { + "epoch": 0.88, + "learning_rate": 1.8366026347269084e-06, + "loss": 1.5768, + "step": 56540 + }, + { + "epoch": 0.88, + "learning_rate": 1.8320210879468307e-06, + "loss": 1.5597, + "step": 56550 + }, + { + "epoch": 0.88, + "learning_rate": 1.8274450454606268e-06, + "loss": 1.5799, + "step": 56560 + }, + { + "epoch": 0.88, + "learning_rate": 1.8228745083554794e-06, + "loss": 1.5591, + "step": 56570 + }, + { + "epoch": 0.88, + "learning_rate": 1.818309477717267e-06, + "loss": 1.5755, + "step": 56580 + }, + { + "epoch": 0.88, + "learning_rate": 1.8137499546305614e-06, + "loss": 1.5762, + "step": 56590 + }, + { + "epoch": 0.88, + "learning_rate": 1.8091959401786234e-06, + "loss": 1.5526, + "step": 56600 + }, + { + "epoch": 0.88, + "learning_rate": 1.8046474354433934e-06, + "loss": 1.5328, + "step": 56610 + }, + { + "epoch": 0.88, + "learning_rate": 1.8001044415055318e-06, + "loss": 1.5285, + "step": 56620 + }, + { + "epoch": 0.88, + "learning_rate": 1.7955669594443587e-06, + "loss": 1.4867, + "step": 56630 + }, + { + "epoch": 0.88, + "learning_rate": 1.7910349903379002e-06, + "loss": 1.4941, + "step": 56640 + }, + { + "epoch": 0.88, + "learning_rate": 1.7865085352628736e-06, + "loss": 1.4921, + "step": 56650 + }, + { + "epoch": 0.88, + "learning_rate": 1.7819875952946818e-06, + "loss": 1.4873, + "step": 56660 + }, + { + "epoch": 0.88, + "learning_rate": 1.7774721715074188e-06, + "loss": 1.4881, + "step": 56670 + }, + { + "epoch": 0.88, + "learning_rate": 1.7729622649738652e-06, + "loss": 1.4851, + "step": 56680 + }, + { + "epoch": 0.88, + "learning_rate": 1.768457876765503e-06, + "loss": 1.4649, + "step": 56690 + }, + { + "epoch": 0.88, + "learning_rate": 1.7639590079524759e-06, + "loss": 1.4728, + "step": 56700 + }, + { + "epoch": 0.88, + "learning_rate": 1.759465659603654e-06, + "loss": 1.4739, + "step": 56710 + }, + { + "epoch": 0.88, + "learning_rate": 1.7549778327865613e-06, + "loss": 1.5082, + "step": 56720 + }, + { + "epoch": 0.88, + "learning_rate": 1.7504955285674279e-06, + "loss": 1.5347, + "step": 56730 + }, + { + "epoch": 0.88, + "learning_rate": 1.7460187480111716e-06, + "loss": 1.5097, + "step": 56740 + }, + { + "epoch": 0.88, + "learning_rate": 1.7415474921813973e-06, + "loss": 1.5348, + "step": 56750 + }, + { + "epoch": 0.88, + "learning_rate": 1.73708176214038e-06, + "loss": 1.4791, + "step": 56760 + }, + { + "epoch": 0.88, + "learning_rate": 1.7326215589491184e-06, + "loss": 1.516, + "step": 56770 + }, + { + "epoch": 0.88, + "learning_rate": 1.7281668836672537e-06, + "loss": 1.5425, + "step": 56780 + }, + { + "epoch": 0.88, + "learning_rate": 1.7237177373531587e-06, + "loss": 1.5109, + "step": 56790 + }, + { + "epoch": 0.88, + "learning_rate": 1.719274121063852e-06, + "loss": 1.4886, + "step": 56800 + }, + { + "epoch": 0.88, + "learning_rate": 1.7148360358550665e-06, + "loss": 1.4978, + "step": 56810 + }, + { + "epoch": 0.88, + "learning_rate": 1.7104034827812088e-06, + "loss": 1.4812, + "step": 56820 + }, + { + "epoch": 0.88, + "learning_rate": 1.705976462895373e-06, + "loss": 1.4446, + "step": 56830 + }, + { + "epoch": 0.88, + "learning_rate": 1.7015549772493394e-06, + "loss": 1.4751, + "step": 56840 + }, + { + "epoch": 0.88, + "learning_rate": 1.697139026893571e-06, + "loss": 1.4899, + "step": 56850 + }, + { + "epoch": 0.88, + "learning_rate": 1.6927286128772258e-06, + "loss": 1.4905, + "step": 56860 + }, + { + "epoch": 0.88, + "learning_rate": 1.6883237362481264e-06, + "loss": 1.4655, + "step": 56870 + }, + { + "epoch": 0.88, + "learning_rate": 1.6839243980528052e-06, + "loss": 1.4738, + "step": 56880 + }, + { + "epoch": 0.88, + "learning_rate": 1.6795305993364568e-06, + "loss": 1.484, + "step": 56890 + }, + { + "epoch": 0.88, + "learning_rate": 1.6751423411429711e-06, + "loss": 1.4821, + "step": 56900 + }, + { + "epoch": 0.88, + "learning_rate": 1.670759624514917e-06, + "loss": 1.4774, + "step": 56910 + }, + { + "epoch": 0.88, + "learning_rate": 1.666382450493556e-06, + "loss": 1.4952, + "step": 56920 + }, + { + "epoch": 0.88, + "learning_rate": 1.6620108201188116e-06, + "loss": 1.4808, + "step": 56930 + }, + { + "epoch": 0.88, + "learning_rate": 1.6576447344293228e-06, + "loss": 1.4524, + "step": 56940 + }, + { + "epoch": 0.88, + "learning_rate": 1.6532841944623762e-06, + "loss": 1.4955, + "step": 56950 + }, + { + "epoch": 0.88, + "learning_rate": 1.6489292012539682e-06, + "loss": 1.496, + "step": 56960 + }, + { + "epoch": 0.88, + "learning_rate": 1.6445797558387598e-06, + "loss": 1.481, + "step": 56970 + }, + { + "epoch": 0.88, + "learning_rate": 1.6402358592501055e-06, + "loss": 1.5019, + "step": 56980 + }, + { + "epoch": 0.88, + "learning_rate": 1.6358975125200348e-06, + "loss": 1.4621, + "step": 56990 + }, + { + "epoch": 0.88, + "learning_rate": 1.6315647166792597e-06, + "loss": 1.4764, + "step": 57000 + }, + { + "epoch": 0.88, + "learning_rate": 1.6272374727571787e-06, + "loss": 1.4566, + "step": 57010 + }, + { + "epoch": 0.88, + "learning_rate": 1.6229157817818613e-06, + "loss": 1.4808, + "step": 57020 + }, + { + "epoch": 0.88, + "learning_rate": 1.6185996447800695e-06, + "loss": 1.4942, + "step": 57030 + }, + { + "epoch": 0.88, + "learning_rate": 1.61428906277723e-06, + "loss": 1.4714, + "step": 57040 + }, + { + "epoch": 0.89, + "learning_rate": 1.609984036797471e-06, + "loss": 1.4754, + "step": 57050 + }, + { + "epoch": 0.89, + "learning_rate": 1.6056845678635801e-06, + "loss": 1.4689, + "step": 57060 + }, + { + "epoch": 0.89, + "learning_rate": 1.6013906569970372e-06, + "loss": 1.4641, + "step": 57070 + }, + { + "epoch": 0.89, + "learning_rate": 1.5971023052179957e-06, + "loss": 1.4712, + "step": 57080 + }, + { + "epoch": 0.89, + "learning_rate": 1.592819513545296e-06, + "loss": 1.4912, + "step": 57090 + }, + { + "epoch": 0.89, + "learning_rate": 1.5885422829964442e-06, + "loss": 1.4833, + "step": 57100 + }, + { + "epoch": 0.89, + "learning_rate": 1.5842706145876434e-06, + "loss": 1.4845, + "step": 57110 + }, + { + "epoch": 0.89, + "learning_rate": 1.5800045093337546e-06, + "loss": 1.4533, + "step": 57120 + }, + { + "epoch": 0.89, + "learning_rate": 1.5757439682483332e-06, + "loss": 1.4735, + "step": 57130 + }, + { + "epoch": 0.89, + "learning_rate": 1.5714889923436061e-06, + "loss": 1.5135, + "step": 57140 + }, + { + "epoch": 0.89, + "learning_rate": 1.5672395826304782e-06, + "loss": 1.5223, + "step": 57150 + }, + { + "epoch": 0.89, + "learning_rate": 1.5629957401185307e-06, + "loss": 1.5294, + "step": 57160 + }, + { + "epoch": 0.89, + "learning_rate": 1.558757465816027e-06, + "loss": 1.5174, + "step": 57170 + }, + { + "epoch": 0.89, + "learning_rate": 1.5545247607299085e-06, + "loss": 1.5103, + "step": 57180 + }, + { + "epoch": 0.89, + "learning_rate": 1.5502976258657765e-06, + "loss": 1.5058, + "step": 57190 + }, + { + "epoch": 0.89, + "learning_rate": 1.5460760622279386e-06, + "loss": 1.4975, + "step": 57200 + }, + { + "epoch": 0.89, + "learning_rate": 1.5418600708193509e-06, + "loss": 1.5155, + "step": 57210 + }, + { + "epoch": 0.89, + "learning_rate": 1.5376496526416567e-06, + "loss": 1.4885, + "step": 57220 + }, + { + "epoch": 0.89, + "learning_rate": 1.5334448086951808e-06, + "loss": 1.4862, + "step": 57230 + }, + { + "epoch": 0.89, + "learning_rate": 1.5292455399789158e-06, + "loss": 1.5048, + "step": 57240 + }, + { + "epoch": 0.89, + "learning_rate": 1.5250518474905328e-06, + "loss": 1.4688, + "step": 57250 + }, + { + "epoch": 0.89, + "learning_rate": 1.5208637322263796e-06, + "loss": 1.4772, + "step": 57260 + }, + { + "epoch": 0.89, + "learning_rate": 1.5166811951814686e-06, + "loss": 1.4754, + "step": 57270 + }, + { + "epoch": 0.89, + "learning_rate": 1.5125042373495073e-06, + "loss": 1.483, + "step": 57280 + }, + { + "epoch": 0.89, + "learning_rate": 1.5083328597228553e-06, + "loss": 1.5114, + "step": 57290 + }, + { + "epoch": 0.89, + "learning_rate": 1.504167063292561e-06, + "loss": 1.4899, + "step": 57300 + }, + { + "epoch": 0.89, + "learning_rate": 1.5000068490483438e-06, + "loss": 1.4794, + "step": 57310 + }, + { + "epoch": 0.89, + "learning_rate": 1.495852217978591e-06, + "loss": 1.5322, + "step": 57320 + }, + { + "epoch": 0.89, + "learning_rate": 1.4917031710703715e-06, + "loss": 1.5527, + "step": 57330 + }, + { + "epoch": 0.89, + "learning_rate": 1.4875597093094245e-06, + "loss": 1.5719, + "step": 57340 + }, + { + "epoch": 0.89, + "learning_rate": 1.4834218336801625e-06, + "loss": 1.5578, + "step": 57350 + }, + { + "epoch": 0.89, + "learning_rate": 1.479289545165663e-06, + "loss": 1.5642, + "step": 57360 + }, + { + "epoch": 0.89, + "learning_rate": 1.4751628447476933e-06, + "loss": 1.5596, + "step": 57370 + }, + { + "epoch": 0.89, + "learning_rate": 1.4710417334066777e-06, + "loss": 1.5927, + "step": 57380 + }, + { + "epoch": 0.89, + "learning_rate": 1.4669262121217164e-06, + "loss": 1.5612, + "step": 57390 + }, + { + "epoch": 0.89, + "learning_rate": 1.4628162818705827e-06, + "loss": 1.5725, + "step": 57400 + }, + { + "epoch": 0.89, + "learning_rate": 1.4587119436297314e-06, + "loss": 1.56, + "step": 57410 + }, + { + "epoch": 0.89, + "learning_rate": 1.4546131983742634e-06, + "loss": 1.5703, + "step": 57420 + }, + { + "epoch": 0.89, + "learning_rate": 1.45052004707798e-06, + "loss": 1.5746, + "step": 57430 + }, + { + "epoch": 0.89, + "learning_rate": 1.4464324907133342e-06, + "loss": 1.5527, + "step": 57440 + }, + { + "epoch": 0.89, + "learning_rate": 1.4423505302514573e-06, + "loss": 1.5185, + "step": 57450 + }, + { + "epoch": 0.89, + "learning_rate": 1.4382741666621485e-06, + "loss": 1.5198, + "step": 57460 + }, + { + "epoch": 0.89, + "learning_rate": 1.4342034009138776e-06, + "loss": 1.481, + "step": 57470 + }, + { + "epoch": 0.89, + "learning_rate": 1.430138233973785e-06, + "loss": 1.4912, + "step": 57480 + }, + { + "epoch": 0.89, + "learning_rate": 1.4260786668076841e-06, + "loss": 1.4835, + "step": 57490 + }, + { + "epoch": 0.89, + "learning_rate": 1.4220247003800534e-06, + "loss": 1.4882, + "step": 57500 + }, + { + "epoch": 0.89, + "learning_rate": 1.417976335654042e-06, + "loss": 1.4571, + "step": 57510 + }, + { + "epoch": 0.89, + "learning_rate": 1.4139335735914694e-06, + "loss": 1.4826, + "step": 57520 + }, + { + "epoch": 0.89, + "learning_rate": 1.409896415152817e-06, + "loss": 1.4773, + "step": 57530 + }, + { + "epoch": 0.89, + "learning_rate": 1.4058648612972515e-06, + "loss": 1.4894, + "step": 57540 + }, + { + "epoch": 0.89, + "learning_rate": 1.4018389129825866e-06, + "loss": 1.4846, + "step": 57550 + }, + { + "epoch": 0.89, + "learning_rate": 1.3978185711653213e-06, + "loss": 1.4791, + "step": 57560 + }, + { + "epoch": 0.89, + "learning_rate": 1.3938038368006139e-06, + "loss": 1.4728, + "step": 57570 + }, + { + "epoch": 0.89, + "learning_rate": 1.3897947108422988e-06, + "loss": 1.4598, + "step": 57580 + }, + { + "epoch": 0.89, + "learning_rate": 1.385791194242858e-06, + "loss": 1.4758, + "step": 57590 + }, + { + "epoch": 0.89, + "learning_rate": 1.38179328795347e-06, + "loss": 1.4943, + "step": 57600 + }, + { + "epoch": 0.89, + "learning_rate": 1.3778009929239583e-06, + "loss": 1.484, + "step": 57610 + }, + { + "epoch": 0.89, + "learning_rate": 1.3738143101028168e-06, + "loss": 1.4688, + "step": 57620 + }, + { + "epoch": 0.89, + "learning_rate": 1.3698332404372154e-06, + "loss": 1.5061, + "step": 57630 + }, + { + "epoch": 0.89, + "learning_rate": 1.3658577848729842e-06, + "loss": 1.4936, + "step": 57640 + }, + { + "epoch": 0.89, + "learning_rate": 1.3618879443546173e-06, + "loss": 1.4905, + "step": 57650 + }, + { + "epoch": 0.89, + "learning_rate": 1.357923719825277e-06, + "loss": 1.4966, + "step": 57660 + }, + { + "epoch": 0.89, + "learning_rate": 1.3539651122267932e-06, + "loss": 1.5101, + "step": 57670 + }, + { + "epoch": 0.89, + "learning_rate": 1.350012122499658e-06, + "loss": 1.5311, + "step": 57680 + }, + { + "epoch": 0.9, + "learning_rate": 1.3460647515830344e-06, + "loss": 1.5072, + "step": 57690 + }, + { + "epoch": 0.9, + "learning_rate": 1.3421230004147383e-06, + "loss": 1.4929, + "step": 57700 + }, + { + "epoch": 0.9, + "learning_rate": 1.3381868699312683e-06, + "loss": 1.4991, + "step": 57710 + }, + { + "epoch": 0.9, + "learning_rate": 1.3342563610677733e-06, + "loss": 1.4514, + "step": 57720 + }, + { + "epoch": 0.9, + "learning_rate": 1.3303314747580676e-06, + "loss": 1.4825, + "step": 57730 + }, + { + "epoch": 0.9, + "learning_rate": 1.3264122119346412e-06, + "loss": 1.525, + "step": 57740 + }, + { + "epoch": 0.9, + "learning_rate": 1.3224985735286382e-06, + "loss": 1.4915, + "step": 57750 + }, + { + "epoch": 0.9, + "learning_rate": 1.3185905604698596e-06, + "loss": 1.4697, + "step": 57760 + }, + { + "epoch": 0.9, + "learning_rate": 1.3146881736867928e-06, + "loss": 1.4962, + "step": 57770 + }, + { + "epoch": 0.9, + "learning_rate": 1.310791414106563e-06, + "loss": 1.5408, + "step": 57780 + }, + { + "epoch": 0.9, + "learning_rate": 1.306900282654977e-06, + "loss": 1.4683, + "step": 57790 + }, + { + "epoch": 0.9, + "learning_rate": 1.3030147802564925e-06, + "loss": 1.46, + "step": 57800 + }, + { + "epoch": 0.9, + "learning_rate": 1.2991349078342403e-06, + "loss": 1.4924, + "step": 57810 + }, + { + "epoch": 0.9, + "learning_rate": 1.2952606663100026e-06, + "loss": 1.4726, + "step": 57820 + }, + { + "epoch": 0.9, + "learning_rate": 1.291392056604232e-06, + "loss": 1.4741, + "step": 57830 + }, + { + "epoch": 0.9, + "learning_rate": 1.2875290796360462e-06, + "loss": 1.526, + "step": 57840 + }, + { + "epoch": 0.9, + "learning_rate": 1.2836717363232026e-06, + "loss": 1.4906, + "step": 57850 + }, + { + "epoch": 0.9, + "learning_rate": 1.2798200275821542e-06, + "loss": 1.4854, + "step": 57860 + }, + { + "epoch": 0.9, + "learning_rate": 1.2759739543279886e-06, + "loss": 1.4757, + "step": 57870 + }, + { + "epoch": 0.9, + "learning_rate": 1.2721335174744636e-06, + "loss": 1.4842, + "step": 57880 + }, + { + "epoch": 0.9, + "learning_rate": 1.2682987179339994e-06, + "loss": 1.4606, + "step": 57890 + }, + { + "epoch": 0.9, + "learning_rate": 1.2644695566176728e-06, + "loss": 1.4539, + "step": 57900 + }, + { + "epoch": 0.9, + "learning_rate": 1.2606460344352256e-06, + "loss": 1.5026, + "step": 57910 + }, + { + "epoch": 0.9, + "learning_rate": 1.2568281522950642e-06, + "loss": 1.4798, + "step": 57920 + }, + { + "epoch": 0.9, + "learning_rate": 1.25301591110423e-06, + "loss": 1.466, + "step": 57930 + }, + { + "epoch": 0.9, + "learning_rate": 1.249209311768465e-06, + "loss": 1.4546, + "step": 57940 + }, + { + "epoch": 0.9, + "learning_rate": 1.2454083551921347e-06, + "loss": 1.4925, + "step": 57950 + }, + { + "epoch": 0.9, + "learning_rate": 1.2416130422782778e-06, + "loss": 1.4667, + "step": 57960 + }, + { + "epoch": 0.9, + "learning_rate": 1.2378233739285982e-06, + "loss": 1.4917, + "step": 57970 + }, + { + "epoch": 0.9, + "learning_rate": 1.2340393510434505e-06, + "loss": 1.5175, + "step": 57980 + }, + { + "epoch": 0.9, + "learning_rate": 1.2302609745218492e-06, + "loss": 1.4727, + "step": 57990 + }, + { + "epoch": 0.9, + "learning_rate": 1.2264882452614673e-06, + "loss": 1.4647, + "step": 58000 + }, + { + "epoch": 0.9, + "learning_rate": 1.2227211641586438e-06, + "loss": 1.4716, + "step": 58010 + }, + { + "epoch": 0.9, + "learning_rate": 1.2189597321083573e-06, + "loss": 1.4826, + "step": 58020 + }, + { + "epoch": 0.9, + "learning_rate": 1.2152039500042705e-06, + "loss": 1.4546, + "step": 58030 + }, + { + "epoch": 0.9, + "learning_rate": 1.2114538187386809e-06, + "loss": 1.4504, + "step": 58040 + }, + { + "epoch": 0.9, + "learning_rate": 1.2077093392025508e-06, + "loss": 1.4671, + "step": 58050 + }, + { + "epoch": 0.9, + "learning_rate": 1.2039705122855043e-06, + "loss": 1.5355, + "step": 58060 + }, + { + "epoch": 0.9, + "learning_rate": 1.2002373388758203e-06, + "loss": 1.4845, + "step": 58070 + }, + { + "epoch": 0.9, + "learning_rate": 1.196509819860428e-06, + "loss": 1.4896, + "step": 58080 + }, + { + "epoch": 0.9, + "learning_rate": 1.1927879561249272e-06, + "loss": 1.4623, + "step": 58090 + }, + { + "epoch": 0.9, + "learning_rate": 1.189071748553558e-06, + "loss": 1.5255, + "step": 58100 + }, + { + "epoch": 0.9, + "learning_rate": 1.185361198029228e-06, + "loss": 1.4769, + "step": 58110 + }, + { + "epoch": 0.9, + "learning_rate": 1.1816563054334956e-06, + "loss": 1.4769, + "step": 58120 + }, + { + "epoch": 0.9, + "learning_rate": 1.1779570716465765e-06, + "loss": 1.4789, + "step": 58130 + }, + { + "epoch": 0.9, + "learning_rate": 1.17426349754734e-06, + "loss": 1.4852, + "step": 58140 + }, + { + "epoch": 0.9, + "learning_rate": 1.1705755840133198e-06, + "loss": 1.4699, + "step": 58150 + }, + { + "epoch": 0.9, + "learning_rate": 1.1668933319206904e-06, + "loss": 1.5018, + "step": 58160 + }, + { + "epoch": 0.9, + "learning_rate": 1.1632167421442907e-06, + "loss": 1.4898, + "step": 58170 + }, + { + "epoch": 0.9, + "learning_rate": 1.1595458155576165e-06, + "loss": 1.493, + "step": 58180 + }, + { + "epoch": 0.9, + "learning_rate": 1.155880553032801e-06, + "loss": 1.4799, + "step": 58190 + }, + { + "epoch": 0.9, + "learning_rate": 1.1522209554406639e-06, + "loss": 1.468, + "step": 58200 + }, + { + "epoch": 0.9, + "learning_rate": 1.1485670236506429e-06, + "loss": 1.4563, + "step": 58210 + }, + { + "epoch": 0.9, + "learning_rate": 1.1449187585308524e-06, + "loss": 1.5323, + "step": 58220 + }, + { + "epoch": 0.9, + "learning_rate": 1.1412761609480515e-06, + "loss": 1.4679, + "step": 58230 + }, + { + "epoch": 0.9, + "learning_rate": 1.137639231767665e-06, + "loss": 1.4691, + "step": 58240 + }, + { + "epoch": 0.9, + "learning_rate": 1.1340079718537483e-06, + "loss": 1.4826, + "step": 58250 + }, + { + "epoch": 0.9, + "learning_rate": 1.130382382069034e-06, + "loss": 1.4616, + "step": 58260 + }, + { + "epoch": 0.9, + "learning_rate": 1.1267624632748913e-06, + "loss": 1.4722, + "step": 58270 + }, + { + "epoch": 0.9, + "learning_rate": 1.1231482163313518e-06, + "loss": 1.4939, + "step": 58280 + }, + { + "epoch": 0.9, + "learning_rate": 1.1195396420970893e-06, + "loss": 1.4896, + "step": 58290 + }, + { + "epoch": 0.9, + "learning_rate": 1.115936741429438e-06, + "loss": 1.4689, + "step": 58300 + }, + { + "epoch": 0.9, + "learning_rate": 1.112339515184385e-06, + "loss": 1.4842, + "step": 58310 + }, + { + "epoch": 0.9, + "learning_rate": 1.1087479642165666e-06, + "loss": 1.4849, + "step": 58320 + }, + { + "epoch": 0.91, + "learning_rate": 1.1051620893792692e-06, + "loss": 1.4478, + "step": 58330 + }, + { + "epoch": 0.91, + "learning_rate": 1.101581891524428e-06, + "loss": 1.4759, + "step": 58340 + }, + { + "epoch": 0.91, + "learning_rate": 1.0980073715026406e-06, + "loss": 1.458, + "step": 58350 + }, + { + "epoch": 0.91, + "learning_rate": 1.0944385301631388e-06, + "loss": 1.49, + "step": 58360 + }, + { + "epoch": 0.91, + "learning_rate": 1.0908753683538302e-06, + "loss": 1.5237, + "step": 58370 + }, + { + "epoch": 0.91, + "learning_rate": 1.087317886921241e-06, + "loss": 1.532, + "step": 58380 + }, + { + "epoch": 0.91, + "learning_rate": 1.0837660867105748e-06, + "loss": 1.5088, + "step": 58390 + }, + { + "epoch": 0.91, + "learning_rate": 1.0802199685656711e-06, + "loss": 1.5121, + "step": 58400 + }, + { + "epoch": 0.91, + "learning_rate": 1.0766795333290308e-06, + "loss": 1.4911, + "step": 58410 + }, + { + "epoch": 0.91, + "learning_rate": 1.073144781841784e-06, + "loss": 1.4799, + "step": 58420 + }, + { + "epoch": 0.91, + "learning_rate": 1.0696157149437363e-06, + "loss": 1.4472, + "step": 58430 + }, + { + "epoch": 0.91, + "learning_rate": 1.066092333473323e-06, + "loss": 1.4861, + "step": 58440 + }, + { + "epoch": 0.91, + "learning_rate": 1.0625746382676411e-06, + "loss": 1.4737, + "step": 58450 + }, + { + "epoch": 0.91, + "learning_rate": 1.0590626301624274e-06, + "loss": 1.4676, + "step": 58460 + }, + { + "epoch": 0.91, + "learning_rate": 1.0555563099920728e-06, + "loss": 1.4854, + "step": 58470 + }, + { + "epoch": 0.91, + "learning_rate": 1.0520556785896169e-06, + "loss": 1.4786, + "step": 58480 + }, + { + "epoch": 0.91, + "learning_rate": 1.048560736786744e-06, + "loss": 1.4862, + "step": 58490 + }, + { + "epoch": 0.91, + "learning_rate": 1.0450714854137955e-06, + "loss": 1.5, + "step": 58500 + }, + { + "epoch": 0.91, + "learning_rate": 1.0415879252997447e-06, + "loss": 1.4651, + "step": 58510 + }, + { + "epoch": 0.91, + "learning_rate": 1.0381100572722347e-06, + "loss": 1.4922, + "step": 58520 + }, + { + "epoch": 0.91, + "learning_rate": 1.0346378821575354e-06, + "loss": 1.4866, + "step": 58530 + }, + { + "epoch": 0.91, + "learning_rate": 1.0311714007805728e-06, + "loss": 1.4729, + "step": 58540 + }, + { + "epoch": 0.91, + "learning_rate": 1.0277106139649245e-06, + "loss": 1.4843, + "step": 58550 + }, + { + "epoch": 0.91, + "learning_rate": 1.0242555225328105e-06, + "loss": 1.4845, + "step": 58560 + }, + { + "epoch": 0.91, + "learning_rate": 1.0208061273050934e-06, + "loss": 1.4822, + "step": 58570 + }, + { + "epoch": 0.91, + "learning_rate": 1.0173624291012956e-06, + "loss": 1.4663, + "step": 58580 + }, + { + "epoch": 0.91, + "learning_rate": 1.0139244287395655e-06, + "loss": 1.4864, + "step": 58590 + }, + { + "epoch": 0.91, + "learning_rate": 1.0104921270367274e-06, + "loss": 1.472, + "step": 58600 + }, + { + "epoch": 0.91, + "learning_rate": 1.0070655248082178e-06, + "loss": 1.495, + "step": 58610 + }, + { + "epoch": 0.91, + "learning_rate": 1.003644622868144e-06, + "loss": 1.4775, + "step": 58620 + }, + { + "epoch": 0.91, + "learning_rate": 1.0002294220292474e-06, + "loss": 1.4781, + "step": 58630 + }, + { + "epoch": 0.91, + "learning_rate": 9.968199231029207e-07, + "loss": 1.4643, + "step": 58640 + }, + { + "epoch": 0.91, + "learning_rate": 9.934161268991966e-07, + "loss": 1.4911, + "step": 58650 + }, + { + "epoch": 0.91, + "learning_rate": 9.900180342267584e-07, + "loss": 1.4793, + "step": 58660 + }, + { + "epoch": 0.91, + "learning_rate": 9.86625645892933e-07, + "loss": 1.4958, + "step": 58670 + }, + { + "epoch": 0.91, + "learning_rate": 9.83238962703681e-07, + "loss": 1.4782, + "step": 58680 + }, + { + "epoch": 0.91, + "learning_rate": 9.798579854636308e-07, + "loss": 1.472, + "step": 58690 + }, + { + "epoch": 0.91, + "learning_rate": 9.76482714976032e-07, + "loss": 1.4873, + "step": 58700 + }, + { + "epoch": 0.91, + "learning_rate": 9.731131520427926e-07, + "loss": 1.4984, + "step": 58710 + }, + { + "epoch": 0.91, + "learning_rate": 9.697492974644561e-07, + "loss": 1.5191, + "step": 58720 + }, + { + "epoch": 0.91, + "learning_rate": 9.66391152040219e-07, + "loss": 1.5424, + "step": 58730 + }, + { + "epoch": 0.91, + "learning_rate": 9.6303871656791e-07, + "loss": 1.5048, + "step": 58740 + }, + { + "epoch": 0.91, + "learning_rate": 9.596919918440111e-07, + "loss": 1.4939, + "step": 58750 + }, + { + "epoch": 0.91, + "learning_rate": 9.563509786636477e-07, + "loss": 1.4922, + "step": 58760 + }, + { + "epoch": 0.91, + "learning_rate": 9.530156778205735e-07, + "loss": 1.5091, + "step": 58770 + }, + { + "epoch": 0.91, + "learning_rate": 9.496860901072047e-07, + "loss": 1.485, + "step": 58780 + }, + { + "epoch": 0.91, + "learning_rate": 9.463622163145891e-07, + "loss": 1.4946, + "step": 58790 + }, + { + "epoch": 0.91, + "learning_rate": 9.430440572324173e-07, + "loss": 1.4904, + "step": 58800 + }, + { + "epoch": 0.91, + "learning_rate": 9.397316136490225e-07, + "loss": 1.4602, + "step": 58810 + }, + { + "epoch": 0.91, + "learning_rate": 9.364248863513863e-07, + "loss": 1.4937, + "step": 58820 + }, + { + "epoch": 0.91, + "learning_rate": 9.331238761251276e-07, + "loss": 1.4808, + "step": 58830 + }, + { + "epoch": 0.91, + "learning_rate": 9.298285837545051e-07, + "loss": 1.4918, + "step": 58840 + }, + { + "epoch": 0.91, + "learning_rate": 9.265390100224147e-07, + "loss": 1.4981, + "step": 58850 + }, + { + "epoch": 0.91, + "learning_rate": 9.232551557104119e-07, + "loss": 1.4816, + "step": 58860 + }, + { + "epoch": 0.91, + "learning_rate": 9.199770215986725e-07, + "loss": 1.5277, + "step": 58870 + }, + { + "epoch": 0.91, + "learning_rate": 9.167046084660236e-07, + "loss": 1.5451, + "step": 58880 + }, + { + "epoch": 0.91, + "learning_rate": 9.134379170899349e-07, + "loss": 1.5308, + "step": 58890 + }, + { + "epoch": 0.91, + "learning_rate": 9.101769482465078e-07, + "loss": 1.5452, + "step": 58900 + }, + { + "epoch": 0.91, + "learning_rate": 9.069217027104921e-07, + "loss": 1.5654, + "step": 58910 + }, + { + "epoch": 0.91, + "learning_rate": 9.036721812552773e-07, + "loss": 1.5596, + "step": 58920 + }, + { + "epoch": 0.91, + "learning_rate": 9.004283846528932e-07, + "loss": 1.5862, + "step": 58930 + }, + { + "epoch": 0.91, + "learning_rate": 8.971903136739984e-07, + "loss": 1.5782, + "step": 58940 + }, + { + "epoch": 0.91, + "learning_rate": 8.939579690879107e-07, + "loss": 1.5804, + "step": 58950 + }, + { + "epoch": 0.91, + "learning_rate": 8.907313516625687e-07, + "loss": 1.5497, + "step": 58960 + }, + { + "epoch": 0.91, + "learning_rate": 8.875104621645619e-07, + "loss": 1.567, + "step": 58970 + }, + { + "epoch": 0.92, + "learning_rate": 8.842953013591171e-07, + "loss": 1.5717, + "step": 58980 + }, + { + "epoch": 0.92, + "learning_rate": 8.810858700100955e-07, + "loss": 1.5554, + "step": 58990 + }, + { + "epoch": 0.92, + "learning_rate": 8.778821688799982e-07, + "loss": 1.527, + "step": 59000 + }, + { + "epoch": 0.92, + "learning_rate": 8.746841987299747e-07, + "loss": 1.5014, + "step": 59010 + }, + { + "epoch": 0.92, + "learning_rate": 8.714919603197979e-07, + "loss": 1.4956, + "step": 59020 + }, + { + "epoch": 0.92, + "learning_rate": 8.683054544078889e-07, + "loss": 1.4669, + "step": 59030 + }, + { + "epoch": 0.92, + "learning_rate": 8.651246817513031e-07, + "loss": 1.475, + "step": 59040 + }, + { + "epoch": 0.92, + "learning_rate": 8.619496431057361e-07, + "loss": 1.4877, + "step": 59050 + }, + { + "epoch": 0.92, + "learning_rate": 8.587803392255178e-07, + "loss": 1.4631, + "step": 59060 + }, + { + "epoch": 0.92, + "learning_rate": 8.556167708636181e-07, + "loss": 1.4749, + "step": 59070 + }, + { + "epoch": 0.92, + "learning_rate": 8.524589387716442e-07, + "loss": 1.4815, + "step": 59080 + }, + { + "epoch": 0.92, + "learning_rate": 8.493068436998431e-07, + "loss": 1.4599, + "step": 59090 + }, + { + "epoch": 0.92, + "learning_rate": 8.461604863970907e-07, + "loss": 1.4643, + "step": 59100 + }, + { + "epoch": 0.92, + "learning_rate": 8.430198676109058e-07, + "loss": 1.4999, + "step": 59110 + }, + { + "epoch": 0.92, + "learning_rate": 8.398849880874471e-07, + "loss": 1.4747, + "step": 59120 + }, + { + "epoch": 0.92, + "learning_rate": 8.367558485714994e-07, + "loss": 1.4855, + "step": 59130 + }, + { + "epoch": 0.92, + "learning_rate": 8.336324498064929e-07, + "loss": 1.4629, + "step": 59140 + }, + { + "epoch": 0.92, + "learning_rate": 8.305147925344898e-07, + "loss": 1.4995, + "step": 59150 + }, + { + "epoch": 0.92, + "learning_rate": 8.274028774961945e-07, + "loss": 1.4684, + "step": 59160 + }, + { + "epoch": 0.92, + "learning_rate": 8.242967054309297e-07, + "loss": 1.4817, + "step": 59170 + }, + { + "epoch": 0.92, + "learning_rate": 8.211962770766801e-07, + "loss": 1.4839, + "step": 59180 + }, + { + "epoch": 0.92, + "learning_rate": 8.181015931700425e-07, + "loss": 1.5003, + "step": 59190 + }, + { + "epoch": 0.92, + "learning_rate": 8.150126544462594e-07, + "loss": 1.5047, + "step": 59200 + }, + { + "epoch": 0.92, + "learning_rate": 8.119294616392075e-07, + "loss": 1.5243, + "step": 59210 + }, + { + "epoch": 0.92, + "learning_rate": 8.088520154813984e-07, + "loss": 1.4819, + "step": 59220 + }, + { + "epoch": 0.92, + "learning_rate": 8.057803167039774e-07, + "loss": 1.4853, + "step": 59230 + }, + { + "epoch": 0.92, + "learning_rate": 8.027143660367248e-07, + "loss": 1.4807, + "step": 59240 + }, + { + "epoch": 0.92, + "learning_rate": 7.99654164208058e-07, + "loss": 1.4947, + "step": 59250 + }, + { + "epoch": 0.92, + "learning_rate": 7.965997119450174e-07, + "loss": 1.5098, + "step": 59260 + }, + { + "epoch": 0.92, + "learning_rate": 7.935510099732979e-07, + "loss": 1.4555, + "step": 59270 + }, + { + "epoch": 0.92, + "learning_rate": 7.905080590172031e-07, + "loss": 1.4103, + "step": 59280 + }, + { + "epoch": 0.92, + "learning_rate": 7.874708597996966e-07, + "loss": 1.3874, + "step": 59290 + }, + { + "epoch": 0.92, + "learning_rate": 7.84439413042351e-07, + "loss": 1.3984, + "step": 59300 + }, + { + "epoch": 0.92, + "learning_rate": 7.814137194653876e-07, + "loss": 1.4038, + "step": 59310 + }, + { + "epoch": 0.92, + "learning_rate": 7.783937797876589e-07, + "loss": 1.4096, + "step": 59320 + }, + { + "epoch": 0.92, + "learning_rate": 7.753795947266468e-07, + "loss": 1.3863, + "step": 59330 + }, + { + "epoch": 0.92, + "learning_rate": 7.723711649984588e-07, + "loss": 1.3856, + "step": 59340 + }, + { + "epoch": 0.92, + "learning_rate": 7.693684913178595e-07, + "loss": 1.3745, + "step": 59350 + }, + { + "epoch": 0.92, + "learning_rate": 7.66371574398217e-07, + "loss": 1.3847, + "step": 59360 + }, + { + "epoch": 0.92, + "learning_rate": 7.633804149515505e-07, + "loss": 1.403, + "step": 59370 + }, + { + "epoch": 0.92, + "learning_rate": 7.603950136885057e-07, + "loss": 1.4057, + "step": 59380 + }, + { + "epoch": 0.92, + "learning_rate": 7.574153713183568e-07, + "loss": 1.4055, + "step": 59390 + }, + { + "epoch": 0.92, + "learning_rate": 7.544414885490125e-07, + "loss": 1.4041, + "step": 59400 + }, + { + "epoch": 0.92, + "learning_rate": 7.514733660870188e-07, + "loss": 1.3832, + "step": 59410 + }, + { + "epoch": 0.92, + "learning_rate": 7.485110046375476e-07, + "loss": 1.3858, + "step": 59420 + }, + { + "epoch": 0.92, + "learning_rate": 7.455544049043972e-07, + "loss": 1.3809, + "step": 59430 + }, + { + "epoch": 0.92, + "learning_rate": 7.426035675900083e-07, + "loss": 1.3727, + "step": 59440 + }, + { + "epoch": 0.92, + "learning_rate": 7.396584933954426e-07, + "loss": 1.3982, + "step": 59450 + }, + { + "epoch": 0.92, + "learning_rate": 7.367191830203957e-07, + "loss": 1.3747, + "step": 59460 + }, + { + "epoch": 0.92, + "learning_rate": 7.337856371631979e-07, + "loss": 1.367, + "step": 59470 + }, + { + "epoch": 0.92, + "learning_rate": 7.308578565208113e-07, + "loss": 1.3827, + "step": 59480 + }, + { + "epoch": 0.92, + "learning_rate": 7.2793584178881e-07, + "loss": 1.3906, + "step": 59490 + }, + { + "epoch": 0.92, + "learning_rate": 7.250195936614246e-07, + "loss": 1.3266, + "step": 59500 + }, + { + "epoch": 0.92, + "learning_rate": 7.221091128314956e-07, + "loss": 1.3763, + "step": 59510 + }, + { + "epoch": 0.92, + "learning_rate": 7.192043999905085e-07, + "loss": 1.388, + "step": 59520 + }, + { + "epoch": 0.92, + "learning_rate": 7.163054558285615e-07, + "loss": 1.3726, + "step": 59530 + }, + { + "epoch": 0.92, + "learning_rate": 7.134122810343979e-07, + "loss": 1.409, + "step": 59540 + }, + { + "epoch": 0.92, + "learning_rate": 7.10524876295382e-07, + "loss": 1.4022, + "step": 59550 + }, + { + "epoch": 0.92, + "learning_rate": 7.076432422975065e-07, + "loss": 1.4211, + "step": 59560 + }, + { + "epoch": 0.92, + "learning_rate": 7.04767379725399e-07, + "loss": 1.3742, + "step": 59570 + }, + { + "epoch": 0.92, + "learning_rate": 7.018972892623127e-07, + "loss": 1.4245, + "step": 59580 + }, + { + "epoch": 0.92, + "learning_rate": 6.990329715901328e-07, + "loss": 1.4452, + "step": 59590 + }, + { + "epoch": 0.92, + "learning_rate": 6.961744273893567e-07, + "loss": 1.3943, + "step": 59600 + }, + { + "epoch": 0.92, + "learning_rate": 6.933216573391382e-07, + "loss": 1.4327, + "step": 59610 + }, + { + "epoch": 0.93, + "learning_rate": 6.904746621172325e-07, + "loss": 1.3746, + "step": 59620 + }, + { + "epoch": 0.93, + "learning_rate": 6.876334424000403e-07, + "loss": 1.4087, + "step": 59630 + }, + { + "epoch": 0.93, + "learning_rate": 6.847979988625825e-07, + "loss": 1.3783, + "step": 59640 + }, + { + "epoch": 0.93, + "learning_rate": 6.81968332178512e-07, + "loss": 1.4068, + "step": 59650 + }, + { + "epoch": 0.93, + "learning_rate": 6.791444430200994e-07, + "loss": 1.3688, + "step": 59660 + }, + { + "epoch": 0.93, + "learning_rate": 6.76326332058258e-07, + "loss": 1.3779, + "step": 59670 + }, + { + "epoch": 0.93, + "learning_rate": 6.735139999625162e-07, + "loss": 1.3718, + "step": 59680 + }, + { + "epoch": 0.93, + "learning_rate": 6.70707447401031e-07, + "loss": 1.4244, + "step": 59690 + }, + { + "epoch": 0.93, + "learning_rate": 6.679066750405943e-07, + "loss": 1.3695, + "step": 59700 + }, + { + "epoch": 0.93, + "learning_rate": 6.651116835466154e-07, + "loss": 1.4015, + "step": 59710 + }, + { + "epoch": 0.93, + "learning_rate": 6.623224735831352e-07, + "loss": 1.4227, + "step": 59720 + }, + { + "epoch": 0.93, + "learning_rate": 6.595390458128181e-07, + "loss": 1.3377, + "step": 59730 + }, + { + "epoch": 0.93, + "learning_rate": 6.567614008969574e-07, + "loss": 1.3937, + "step": 59740 + }, + { + "epoch": 0.93, + "learning_rate": 6.539895394954726e-07, + "loss": 1.394, + "step": 59750 + }, + { + "epoch": 0.93, + "learning_rate": 6.512234622669089e-07, + "loss": 1.3969, + "step": 59760 + }, + { + "epoch": 0.93, + "learning_rate": 6.484631698684296e-07, + "loss": 1.3727, + "step": 59770 + }, + { + "epoch": 0.93, + "learning_rate": 6.457086629558407e-07, + "loss": 1.3957, + "step": 59780 + }, + { + "epoch": 0.93, + "learning_rate": 6.429599421835519e-07, + "loss": 1.4139, + "step": 59790 + }, + { + "epoch": 0.93, + "learning_rate": 6.402170082046188e-07, + "loss": 1.41, + "step": 59800 + }, + { + "epoch": 0.93, + "learning_rate": 6.37479861670709e-07, + "loss": 1.395, + "step": 59810 + }, + { + "epoch": 0.93, + "learning_rate": 6.347485032321216e-07, + "loss": 1.398, + "step": 59820 + }, + { + "epoch": 0.93, + "learning_rate": 6.320229335377736e-07, + "loss": 1.4142, + "step": 59830 + }, + { + "epoch": 0.93, + "learning_rate": 6.293031532352167e-07, + "loss": 1.3971, + "step": 59840 + }, + { + "epoch": 0.93, + "learning_rate": 6.265891629706172e-07, + "loss": 1.3751, + "step": 59850 + }, + { + "epoch": 0.93, + "learning_rate": 6.238809633887705e-07, + "loss": 1.3398, + "step": 59860 + }, + { + "epoch": 0.93, + "learning_rate": 6.211785551330979e-07, + "loss": 1.4401, + "step": 59870 + }, + { + "epoch": 0.93, + "learning_rate": 6.184819388456442e-07, + "loss": 1.4284, + "step": 59880 + }, + { + "epoch": 0.93, + "learning_rate": 6.157911151670748e-07, + "loss": 1.4288, + "step": 59890 + }, + { + "epoch": 0.93, + "learning_rate": 6.131060847366782e-07, + "loss": 1.4708, + "step": 59900 + }, + { + "epoch": 0.93, + "learning_rate": 6.104268481923775e-07, + "loss": 1.4601, + "step": 59910 + }, + { + "epoch": 0.93, + "learning_rate": 6.077534061706997e-07, + "loss": 1.4352, + "step": 59920 + }, + { + "epoch": 0.93, + "learning_rate": 6.050857593068171e-07, + "loss": 1.4373, + "step": 59930 + }, + { + "epoch": 0.93, + "learning_rate": 6.024239082345062e-07, + "loss": 1.4235, + "step": 59940 + }, + { + "epoch": 0.93, + "learning_rate": 5.997678535861778e-07, + "loss": 1.411, + "step": 59950 + }, + { + "epoch": 0.93, + "learning_rate": 5.97117595992866e-07, + "loss": 1.4528, + "step": 59960 + }, + { + "epoch": 0.93, + "learning_rate": 5.94473136084217e-07, + "loss": 1.4501, + "step": 59970 + }, + { + "epoch": 0.93, + "learning_rate": 5.918344744885146e-07, + "loss": 1.4422, + "step": 59980 + }, + { + "epoch": 0.93, + "learning_rate": 5.892016118326543e-07, + "loss": 1.4527, + "step": 59990 + }, + { + "epoch": 0.93, + "learning_rate": 5.865745487421498e-07, + "loss": 1.4451, + "step": 60000 + }, + { + "epoch": 0.93, + "eval_loss": 1.5818288326263428, + "eval_runtime": 81.9976, + "eval_samples_per_second": 36.586, + "eval_steps_per_second": 4.573, + "step": 60000 + }, + { + "epoch": 0.93, + "learning_rate": 5.839532858411573e-07, + "loss": 1.4384, + "step": 60010 + }, + { + "epoch": 0.93, + "learning_rate": 5.813378237524314e-07, + "loss": 1.4485, + "step": 60020 + }, + { + "epoch": 0.93, + "learning_rate": 5.787281630973612e-07, + "loss": 1.4151, + "step": 60030 + }, + { + "epoch": 0.93, + "learning_rate": 5.761243044959535e-07, + "loss": 1.4175, + "step": 60040 + }, + { + "epoch": 0.93, + "learning_rate": 5.735262485668441e-07, + "loss": 1.4268, + "step": 60050 + }, + { + "epoch": 0.93, + "learning_rate": 5.709339959272781e-07, + "loss": 1.4373, + "step": 60060 + }, + { + "epoch": 0.93, + "learning_rate": 5.683475471931294e-07, + "loss": 1.4243, + "step": 60070 + }, + { + "epoch": 0.93, + "learning_rate": 5.657669029788954e-07, + "loss": 1.4327, + "step": 60080 + }, + { + "epoch": 0.93, + "learning_rate": 5.631920638976828e-07, + "loss": 1.4457, + "step": 60090 + }, + { + "epoch": 0.93, + "learning_rate": 5.606230305612359e-07, + "loss": 1.4328, + "step": 60100 + }, + { + "epoch": 0.93, + "learning_rate": 5.580598035799051e-07, + "loss": 1.4268, + "step": 60110 + }, + { + "epoch": 0.93, + "learning_rate": 5.555023835626671e-07, + "loss": 1.4811, + "step": 60120 + }, + { + "epoch": 0.93, + "learning_rate": 5.529507711171194e-07, + "loss": 1.4301, + "step": 60130 + }, + { + "epoch": 0.93, + "learning_rate": 5.504049668494826e-07, + "loss": 1.437, + "step": 60140 + }, + { + "epoch": 0.93, + "learning_rate": 5.478649713645867e-07, + "loss": 1.429, + "step": 60150 + }, + { + "epoch": 0.93, + "learning_rate": 5.453307852658962e-07, + "loss": 1.4005, + "step": 60160 + }, + { + "epoch": 0.93, + "learning_rate": 5.42802409155485e-07, + "loss": 1.3891, + "step": 60170 + }, + { + "epoch": 0.93, + "learning_rate": 5.40279843634045e-07, + "loss": 1.3829, + "step": 60180 + }, + { + "epoch": 0.93, + "learning_rate": 5.377630893008995e-07, + "loss": 1.3939, + "step": 60190 + }, + { + "epoch": 0.93, + "learning_rate": 5.352521467539811e-07, + "loss": 1.4158, + "step": 60200 + }, + { + "epoch": 0.93, + "learning_rate": 5.327470165898435e-07, + "loss": 1.4018, + "step": 60210 + }, + { + "epoch": 0.93, + "learning_rate": 5.302476994036631e-07, + "loss": 1.41, + "step": 60220 + }, + { + "epoch": 0.93, + "learning_rate": 5.27754195789229e-07, + "loss": 1.3976, + "step": 60230 + }, + { + "epoch": 0.93, + "learning_rate": 5.252665063389561e-07, + "loss": 1.4135, + "step": 60240 + }, + { + "epoch": 0.93, + "learning_rate": 5.227846316438745e-07, + "loss": 1.4189, + "step": 60250 + }, + { + "epoch": 0.93, + "learning_rate": 5.203085722936263e-07, + "loss": 1.4156, + "step": 60260 + }, + { + "epoch": 0.94, + "learning_rate": 5.178383288764882e-07, + "loss": 1.4066, + "step": 60270 + }, + { + "epoch": 0.94, + "learning_rate": 5.15373901979338e-07, + "loss": 1.4267, + "step": 60280 + }, + { + "epoch": 0.94, + "learning_rate": 5.129152921876823e-07, + "loss": 1.3942, + "step": 60290 + }, + { + "epoch": 0.94, + "learning_rate": 5.104625000856428e-07, + "loss": 1.3961, + "step": 60300 + }, + { + "epoch": 0.94, + "learning_rate": 5.080155262559588e-07, + "loss": 1.3971, + "step": 60310 + }, + { + "epoch": 0.94, + "learning_rate": 5.05574371279982e-07, + "loss": 1.3823, + "step": 60320 + }, + { + "epoch": 0.94, + "learning_rate": 5.031390357376959e-07, + "loss": 1.3918, + "step": 60330 + }, + { + "epoch": 0.94, + "learning_rate": 5.007095202076845e-07, + "loss": 1.3773, + "step": 60340 + }, + { + "epoch": 0.94, + "learning_rate": 4.982858252671613e-07, + "loss": 1.3921, + "step": 60350 + }, + { + "epoch": 0.94, + "learning_rate": 4.95867951491949e-07, + "loss": 1.3728, + "step": 60360 + }, + { + "epoch": 0.94, + "learning_rate": 4.934558994564964e-07, + "loss": 1.3783, + "step": 60370 + }, + { + "epoch": 0.94, + "learning_rate": 4.910496697338562e-07, + "loss": 1.3877, + "step": 60380 + }, + { + "epoch": 0.94, + "learning_rate": 4.886492628957101e-07, + "loss": 1.4156, + "step": 60390 + }, + { + "epoch": 0.94, + "learning_rate": 4.862546795123518e-07, + "loss": 1.4009, + "step": 60400 + }, + { + "epoch": 0.94, + "learning_rate": 4.838659201526874e-07, + "loss": 1.4033, + "step": 60410 + }, + { + "epoch": 0.94, + "learning_rate": 4.814829853842462e-07, + "loss": 1.402, + "step": 60420 + }, + { + "epoch": 0.94, + "learning_rate": 4.79105875773167e-07, + "loss": 1.4631, + "step": 60430 + }, + { + "epoch": 0.94, + "learning_rate": 4.767345918842175e-07, + "loss": 1.3907, + "step": 60440 + }, + { + "epoch": 0.94, + "learning_rate": 4.7436913428075824e-07, + "loss": 1.3474, + "step": 60450 + }, + { + "epoch": 0.94, + "learning_rate": 4.720095035247896e-07, + "loss": 1.392, + "step": 60460 + }, + { + "epoch": 0.94, + "learning_rate": 4.6965570017691305e-07, + "loss": 1.4169, + "step": 60470 + }, + { + "epoch": 0.94, + "learning_rate": 4.6730772479635367e-07, + "loss": 1.3749, + "step": 60480 + }, + { + "epoch": 0.94, + "learning_rate": 4.649655779409373e-07, + "loss": 1.3936, + "step": 60490 + }, + { + "epoch": 0.94, + "learning_rate": 4.6262926016713005e-07, + "loss": 1.3213, + "step": 60500 + }, + { + "epoch": 0.94, + "learning_rate": 4.6029877202999064e-07, + "loss": 1.4039, + "step": 60510 + }, + { + "epoch": 0.94, + "learning_rate": 4.579741140832039e-07, + "loss": 1.4118, + "step": 60520 + }, + { + "epoch": 0.94, + "learning_rate": 4.5565528687906423e-07, + "loss": 1.3979, + "step": 60530 + }, + { + "epoch": 0.94, + "learning_rate": 4.533422909684837e-07, + "loss": 1.3974, + "step": 60540 + }, + { + "epoch": 0.94, + "learning_rate": 4.5103512690098937e-07, + "loss": 1.4182, + "step": 60550 + }, + { + "epoch": 0.94, + "learning_rate": 4.4873379522472056e-07, + "loss": 1.4022, + "step": 60560 + }, + { + "epoch": 0.94, + "learning_rate": 4.464382964864372e-07, + "loss": 1.3883, + "step": 60570 + }, + { + "epoch": 0.94, + "learning_rate": 4.4414863123150017e-07, + "loss": 1.4029, + "step": 60580 + }, + { + "epoch": 0.94, + "learning_rate": 4.418648000038994e-07, + "loss": 1.4081, + "step": 60590 + }, + { + "epoch": 0.94, + "learning_rate": 4.395868033462286e-07, + "loss": 1.3972, + "step": 60600 + }, + { + "epoch": 0.94, + "learning_rate": 4.373146417996965e-07, + "loss": 1.4307, + "step": 60610 + }, + { + "epoch": 0.94, + "learning_rate": 4.350483159041352e-07, + "loss": 1.4386, + "step": 60620 + }, + { + "epoch": 0.94, + "learning_rate": 4.3278782619797786e-07, + "loss": 1.4653, + "step": 60630 + }, + { + "epoch": 0.94, + "learning_rate": 4.305331732182727e-07, + "loss": 1.431, + "step": 60640 + }, + { + "epoch": 0.94, + "learning_rate": 4.282843575006912e-07, + "loss": 1.4682, + "step": 60650 + }, + { + "epoch": 0.94, + "learning_rate": 4.2604137957950595e-07, + "loss": 1.4537, + "step": 60660 + }, + { + "epoch": 0.94, + "learning_rate": 4.23804239987613e-07, + "loss": 1.4513, + "step": 60670 + }, + { + "epoch": 0.94, + "learning_rate": 4.215729392565149e-07, + "loss": 1.4493, + "step": 60680 + }, + { + "epoch": 0.94, + "learning_rate": 4.1934747791632645e-07, + "loss": 1.4327, + "step": 60690 + }, + { + "epoch": 0.94, + "learning_rate": 4.171278564957776e-07, + "loss": 1.4541, + "step": 60700 + }, + { + "epoch": 0.94, + "learning_rate": 4.149140755222103e-07, + "loss": 1.4529, + "step": 60710 + }, + { + "epoch": 0.94, + "learning_rate": 4.1270613552158166e-07, + "loss": 1.4189, + "step": 60720 + }, + { + "epoch": 0.94, + "learning_rate": 4.1050403701845543e-07, + "loss": 1.4135, + "step": 60730 + }, + { + "epoch": 0.94, + "learning_rate": 4.0830778053601307e-07, + "loss": 1.4151, + "step": 60740 + }, + { + "epoch": 0.94, + "learning_rate": 4.0611736659603995e-07, + "loss": 1.4222, + "step": 60750 + }, + { + "epoch": 0.94, + "learning_rate": 4.039327957189476e-07, + "loss": 1.4165, + "step": 60760 + }, + { + "epoch": 0.94, + "learning_rate": 4.017540684237431e-07, + "loss": 1.4077, + "step": 60770 + }, + { + "epoch": 0.94, + "learning_rate": 3.995811852280568e-07, + "loss": 1.4474, + "step": 60780 + }, + { + "epoch": 0.94, + "learning_rate": 3.974141466481257e-07, + "loss": 1.4193, + "step": 60790 + }, + { + "epoch": 0.94, + "learning_rate": 3.952529531987992e-07, + "loss": 1.4244, + "step": 60800 + }, + { + "epoch": 0.94, + "learning_rate": 3.930976053935359e-07, + "loss": 1.423, + "step": 60810 + }, + { + "epoch": 0.94, + "learning_rate": 3.909481037444096e-07, + "loss": 1.433, + "step": 60820 + }, + { + "epoch": 0.94, + "learning_rate": 3.888044487621034e-07, + "loss": 1.4563, + "step": 60830 + }, + { + "epoch": 0.94, + "learning_rate": 3.866666409559072e-07, + "loss": 1.4197, + "step": 60840 + }, + { + "epoch": 0.94, + "learning_rate": 3.8453468083373135e-07, + "loss": 1.4426, + "step": 60850 + }, + { + "epoch": 0.94, + "learning_rate": 3.824085689020873e-07, + "loss": 1.438, + "step": 60860 + }, + { + "epoch": 0.94, + "learning_rate": 3.802883056661044e-07, + "loss": 1.4409, + "step": 60870 + }, + { + "epoch": 0.94, + "learning_rate": 3.78173891629513e-07, + "loss": 1.3888, + "step": 60880 + }, + { + "epoch": 0.94, + "learning_rate": 3.76065327294664e-07, + "loss": 1.4263, + "step": 60890 + }, + { + "epoch": 0.94, + "learning_rate": 3.7396261316251513e-07, + "loss": 1.4283, + "step": 60900 + }, + { + "epoch": 0.95, + "learning_rate": 3.718657497326333e-07, + "loss": 1.452, + "step": 60910 + }, + { + "epoch": 0.95, + "learning_rate": 3.697747375031896e-07, + "loss": 1.4101, + "step": 60920 + }, + { + "epoch": 0.95, + "learning_rate": 3.6768957697097814e-07, + "loss": 1.4018, + "step": 60930 + }, + { + "epoch": 0.95, + "learning_rate": 3.6561026863139167e-07, + "loss": 1.4052, + "step": 60940 + }, + { + "epoch": 0.95, + "learning_rate": 3.63536812978435e-07, + "loss": 1.3939, + "step": 60950 + }, + { + "epoch": 0.95, + "learning_rate": 3.6146921050472794e-07, + "loss": 1.3988, + "step": 60960 + }, + { + "epoch": 0.95, + "learning_rate": 3.594074617014914e-07, + "loss": 1.3868, + "step": 60970 + }, + { + "epoch": 0.95, + "learning_rate": 3.573515670585642e-07, + "loss": 1.4025, + "step": 60980 + }, + { + "epoch": 0.95, + "learning_rate": 3.5530152706438614e-07, + "loss": 1.4671, + "step": 60990 + }, + { + "epoch": 0.95, + "learning_rate": 3.532573422060148e-07, + "loss": 1.3759, + "step": 61000 + }, + { + "epoch": 0.95, + "learning_rate": 3.5121901296910067e-07, + "loss": 1.3779, + "step": 61010 + }, + { + "epoch": 0.95, + "learning_rate": 3.491865398379257e-07, + "loss": 1.4063, + "step": 61020 + }, + { + "epoch": 0.95, + "learning_rate": 3.4715992329536487e-07, + "loss": 1.3951, + "step": 61030 + }, + { + "epoch": 0.95, + "learning_rate": 3.4513916382290237e-07, + "loss": 1.3976, + "step": 61040 + }, + { + "epoch": 0.95, + "learning_rate": 3.431242619006375e-07, + "loss": 1.3962, + "step": 61050 + }, + { + "epoch": 0.95, + "learning_rate": 3.411152180072791e-07, + "loss": 1.4307, + "step": 61060 + }, + { + "epoch": 0.95, + "learning_rate": 3.391120326201286e-07, + "loss": 1.4205, + "step": 61070 + }, + { + "epoch": 0.95, + "learning_rate": 3.371147062151164e-07, + "loss": 1.4065, + "step": 61080 + }, + { + "epoch": 0.95, + "learning_rate": 3.351232392667658e-07, + "loss": 1.4099, + "step": 61090 + }, + { + "epoch": 0.95, + "learning_rate": 3.331376322482149e-07, + "loss": 1.4129, + "step": 61100 + }, + { + "epoch": 0.95, + "learning_rate": 3.3115788563120865e-07, + "loss": 1.3809, + "step": 61110 + }, + { + "epoch": 0.95, + "learning_rate": 3.291839998860985e-07, + "loss": 1.3808, + "step": 61120 + }, + { + "epoch": 0.95, + "learning_rate": 3.2721597548184544e-07, + "loss": 1.3901, + "step": 61130 + }, + { + "epoch": 0.95, + "learning_rate": 3.252538128860144e-07, + "loss": 1.3959, + "step": 61140 + }, + { + "epoch": 0.95, + "learning_rate": 3.232975125647825e-07, + "loss": 1.3829, + "step": 61150 + }, + { + "epoch": 0.95, + "learning_rate": 3.2134707498293057e-07, + "loss": 1.3828, + "step": 61160 + }, + { + "epoch": 0.95, + "learning_rate": 3.1940250060384924e-07, + "loss": 1.3865, + "step": 61170 + }, + { + "epoch": 0.95, + "learning_rate": 3.174637898895272e-07, + "loss": 1.4056, + "step": 61180 + }, + { + "epoch": 0.95, + "learning_rate": 3.1553094330057377e-07, + "loss": 1.3985, + "step": 61190 + }, + { + "epoch": 0.95, + "learning_rate": 3.1360396129619665e-07, + "loss": 1.4118, + "step": 61200 + }, + { + "epoch": 0.95, + "learning_rate": 3.1168284433421293e-07, + "loss": 1.3853, + "step": 61210 + }, + { + "epoch": 0.95, + "learning_rate": 3.097675928710408e-07, + "loss": 1.4015, + "step": 61220 + }, + { + "epoch": 0.95, + "learning_rate": 3.0785820736171924e-07, + "loss": 1.3923, + "step": 61230 + }, + { + "epoch": 0.95, + "learning_rate": 3.0595468825987127e-07, + "loss": 1.3785, + "step": 61240 + }, + { + "epoch": 0.95, + "learning_rate": 3.0405703601774913e-07, + "loss": 1.4135, + "step": 61250 + }, + { + "epoch": 0.95, + "learning_rate": 3.021652510861922e-07, + "loss": 1.4261, + "step": 61260 + }, + { + "epoch": 0.95, + "learning_rate": 3.0027933391466313e-07, + "loss": 1.4145, + "step": 61270 + }, + { + "epoch": 0.95, + "learning_rate": 2.9839928495121463e-07, + "loss": 1.4218, + "step": 61280 + }, + { + "epoch": 0.95, + "learning_rate": 2.9652510464251716e-07, + "loss": 1.4515, + "step": 61290 + }, + { + "epoch": 0.95, + "learning_rate": 2.9465679343383956e-07, + "loss": 1.4562, + "step": 61300 + }, + { + "epoch": 0.95, + "learning_rate": 2.927943517690601e-07, + "loss": 1.4442, + "step": 61310 + }, + { + "epoch": 0.95, + "learning_rate": 2.90937780090661e-07, + "loss": 1.4528, + "step": 61320 + }, + { + "epoch": 0.95, + "learning_rate": 2.890870788397282e-07, + "loss": 1.3946, + "step": 61330 + }, + { + "epoch": 0.95, + "learning_rate": 2.8724224845596003e-07, + "loss": 1.3639, + "step": 61340 + }, + { + "epoch": 0.95, + "learning_rate": 2.854032893776476e-07, + "loss": 1.4428, + "step": 61350 + }, + { + "epoch": 0.95, + "learning_rate": 2.835702020417025e-07, + "loss": 1.4382, + "step": 61360 + }, + { + "epoch": 0.95, + "learning_rate": 2.817429868836263e-07, + "loss": 1.4403, + "step": 61370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7992164433753576e-07, + "loss": 1.4348, + "step": 61380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7810617483614856e-07, + "loss": 1.4391, + "step": 61390 + }, + { + "epoch": 0.95, + "learning_rate": 2.762965788107891e-07, + "loss": 1.4449, + "step": 61400 + }, + { + "epoch": 0.95, + "learning_rate": 2.7449285669138016e-07, + "loss": 1.4161, + "step": 61410 + }, + { + "epoch": 0.95, + "learning_rate": 2.726950089064595e-07, + "loss": 1.4413, + "step": 61420 + }, + { + "epoch": 0.95, + "learning_rate": 2.7090303588316046e-07, + "loss": 1.4198, + "step": 61430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6911693804722026e-07, + "loss": 1.4386, + "step": 61440 + }, + { + "epoch": 0.95, + "learning_rate": 2.6733671582298833e-07, + "loss": 1.4534, + "step": 61450 + }, + { + "epoch": 0.95, + "learning_rate": 2.6556236963341253e-07, + "loss": 1.4405, + "step": 61460 + }, + { + "epoch": 0.95, + "learning_rate": 2.637938999000444e-07, + "loss": 1.435, + "step": 61470 + }, + { + "epoch": 0.95, + "learning_rate": 2.620313070430397e-07, + "loss": 1.4605, + "step": 61480 + }, + { + "epoch": 0.95, + "learning_rate": 2.602745914811605e-07, + "loss": 1.44, + "step": 61490 + }, + { + "epoch": 0.95, + "learning_rate": 2.585237536317703e-07, + "loss": 1.4059, + "step": 61500 + }, + { + "epoch": 0.95, + "learning_rate": 2.5677879391083625e-07, + "loss": 1.398, + "step": 61510 + }, + { + "epoch": 0.95, + "learning_rate": 2.550397127329296e-07, + "loss": 1.3822, + "step": 61520 + }, + { + "epoch": 0.95, + "learning_rate": 2.533065105112226e-07, + "loss": 1.3863, + "step": 61530 + }, + { + "epoch": 0.95, + "learning_rate": 2.5157918765749413e-07, + "loss": 1.3917, + "step": 61540 + }, + { + "epoch": 0.95, + "learning_rate": 2.4985774458212986e-07, + "loss": 1.3953, + "step": 61550 + }, + { + "epoch": 0.96, + "learning_rate": 2.4814218169410265e-07, + "loss": 1.3809, + "step": 61560 + }, + { + "epoch": 0.96, + "learning_rate": 2.464324994010114e-07, + "loss": 1.3828, + "step": 61570 + }, + { + "epoch": 0.96, + "learning_rate": 2.447286981090369e-07, + "loss": 1.4038, + "step": 61580 + }, + { + "epoch": 0.96, + "learning_rate": 2.430307782229774e-07, + "loss": 1.4187, + "step": 61590 + }, + { + "epoch": 0.96, + "learning_rate": 2.4133874014622414e-07, + "loss": 1.4067, + "step": 61600 + }, + { + "epoch": 0.96, + "learning_rate": 2.3965258428077507e-07, + "loss": 1.3923, + "step": 61610 + }, + { + "epoch": 0.96, + "learning_rate": 2.3797231102723195e-07, + "loss": 1.4125, + "step": 61620 + }, + { + "epoch": 0.96, + "learning_rate": 2.3629792078479762e-07, + "loss": 1.4093, + "step": 61630 + }, + { + "epoch": 0.96, + "learning_rate": 2.346294139512789e-07, + "loss": 1.4122, + "step": 61640 + }, + { + "epoch": 0.96, + "learning_rate": 2.3296679092307539e-07, + "loss": 1.3996, + "step": 61650 + }, + { + "epoch": 0.96, + "learning_rate": 2.3131005209520717e-07, + "loss": 1.3988, + "step": 61660 + }, + { + "epoch": 0.96, + "learning_rate": 2.2965919786127332e-07, + "loss": 1.4143, + "step": 61670 + }, + { + "epoch": 0.96, + "learning_rate": 2.2801422861349898e-07, + "loss": 1.4023, + "step": 61680 + }, + { + "epoch": 0.96, + "learning_rate": 2.2637514474269094e-07, + "loss": 1.4071, + "step": 61690 + }, + { + "epoch": 0.96, + "learning_rate": 2.2474194663826831e-07, + "loss": 1.4228, + "step": 61700 + }, + { + "epoch": 0.96, + "learning_rate": 2.2311463468825121e-07, + "loss": 1.4071, + "step": 61710 + }, + { + "epoch": 0.96, + "learning_rate": 2.2149320927925821e-07, + "loss": 1.4311, + "step": 61720 + }, + { + "epoch": 0.96, + "learning_rate": 2.198776707965089e-07, + "loss": 1.3902, + "step": 61730 + }, + { + "epoch": 0.96, + "learning_rate": 2.182680196238296e-07, + "loss": 1.412, + "step": 61740 + }, + { + "epoch": 0.96, + "learning_rate": 2.1666425614364217e-07, + "loss": 1.4194, + "step": 61750 + }, + { + "epoch": 0.96, + "learning_rate": 2.1506638073697237e-07, + "loss": 1.3934, + "step": 61760 + }, + { + "epoch": 0.96, + "learning_rate": 2.1347439378344435e-07, + "loss": 1.4113, + "step": 61770 + }, + { + "epoch": 0.96, + "learning_rate": 2.1188829566128887e-07, + "loss": 1.4148, + "step": 61780 + }, + { + "epoch": 0.96, + "learning_rate": 2.1030808674733505e-07, + "loss": 1.4326, + "step": 61790 + }, + { + "epoch": 0.96, + "learning_rate": 2.0873376741700755e-07, + "loss": 1.3805, + "step": 61800 + }, + { + "epoch": 0.96, + "learning_rate": 2.0716533804434058e-07, + "loss": 1.4088, + "step": 61810 + }, + { + "epoch": 0.96, + "learning_rate": 2.0560279900196377e-07, + "loss": 1.4018, + "step": 61820 + }, + { + "epoch": 0.96, + "learning_rate": 2.0404615066110799e-07, + "loss": 1.4527, + "step": 61830 + }, + { + "epoch": 0.96, + "learning_rate": 2.0249539339159962e-07, + "loss": 1.377, + "step": 61840 + }, + { + "epoch": 0.96, + "learning_rate": 2.0095052756187726e-07, + "loss": 1.4209, + "step": 61850 + }, + { + "epoch": 0.96, + "learning_rate": 1.9941155353897234e-07, + "loss": 1.4648, + "step": 61860 + }, + { + "epoch": 0.96, + "learning_rate": 1.9787847168851458e-07, + "loss": 1.4578, + "step": 61870 + }, + { + "epoch": 0.96, + "learning_rate": 1.9635128237473765e-07, + "loss": 1.4338, + "step": 61880 + }, + { + "epoch": 0.96, + "learning_rate": 1.948299859604791e-07, + "loss": 1.458, + "step": 61890 + }, + { + "epoch": 0.96, + "learning_rate": 1.9331458280716086e-07, + "loss": 1.4915, + "step": 61900 + }, + { + "epoch": 0.96, + "learning_rate": 1.9180507327482555e-07, + "loss": 1.4371, + "step": 61910 + }, + { + "epoch": 0.96, + "learning_rate": 1.903014577220974e-07, + "loss": 1.4461, + "step": 61920 + }, + { + "epoch": 0.96, + "learning_rate": 1.8880373650621563e-07, + "loss": 1.4315, + "step": 61930 + }, + { + "epoch": 0.96, + "learning_rate": 1.8731190998300674e-07, + "loss": 1.4689, + "step": 61940 + }, + { + "epoch": 0.96, + "learning_rate": 1.8582597850690386e-07, + "loss": 1.4423, + "step": 61950 + }, + { + "epoch": 0.96, + "learning_rate": 1.843459424309385e-07, + "loss": 1.4246, + "step": 61960 + }, + { + "epoch": 0.96, + "learning_rate": 1.828718021067377e-07, + "loss": 1.4385, + "step": 61970 + }, + { + "epoch": 0.96, + "learning_rate": 1.8140355788453245e-07, + "loss": 1.4478, + "step": 61980 + }, + { + "epoch": 0.96, + "learning_rate": 1.7994121011314923e-07, + "loss": 1.4384, + "step": 61990 + }, + { + "epoch": 0.96, + "learning_rate": 1.784847591400185e-07, + "loss": 1.4597, + "step": 62000 + }, + { + "epoch": 0.96, + "learning_rate": 1.7703420531116343e-07, + "loss": 1.4349, + "step": 62010 + }, + { + "epoch": 0.96, + "learning_rate": 1.7558954897121393e-07, + "loss": 1.438, + "step": 62020 + }, + { + "epoch": 0.96, + "learning_rate": 1.741507904633899e-07, + "loss": 1.4242, + "step": 62030 + }, + { + "epoch": 0.96, + "learning_rate": 1.7271793012951786e-07, + "loss": 1.4456, + "step": 62040 + }, + { + "epoch": 0.96, + "learning_rate": 1.7129096831001722e-07, + "loss": 1.4386, + "step": 62050 + }, + { + "epoch": 0.96, + "learning_rate": 1.6986990534390845e-07, + "loss": 1.3971, + "step": 62060 + }, + { + "epoch": 0.96, + "learning_rate": 1.6845474156881036e-07, + "loss": 1.3981, + "step": 62070 + }, + { + "epoch": 0.96, + "learning_rate": 1.6704547732094288e-07, + "loss": 1.3781, + "step": 62080 + }, + { + "epoch": 0.96, + "learning_rate": 1.6564211293511601e-07, + "loss": 1.3942, + "step": 62090 + }, + { + "epoch": 0.96, + "learning_rate": 1.6424464874475189e-07, + "loss": 1.4112, + "step": 62100 + }, + { + "epoch": 0.96, + "learning_rate": 1.6285308508185716e-07, + "loss": 1.3931, + "step": 62110 + }, + { + "epoch": 0.96, + "learning_rate": 1.6146742227704237e-07, + "loss": 1.3769, + "step": 62120 + }, + { + "epoch": 0.96, + "learning_rate": 1.600876606595164e-07, + "loss": 1.3996, + "step": 62130 + }, + { + "epoch": 0.96, + "learning_rate": 1.587138005570893e-07, + "loss": 1.3683, + "step": 62140 + }, + { + "epoch": 0.96, + "learning_rate": 1.573458422961638e-07, + "loss": 1.399, + "step": 62150 + }, + { + "epoch": 0.96, + "learning_rate": 1.5598378620173825e-07, + "loss": 1.4099, + "step": 62160 + }, + { + "epoch": 0.96, + "learning_rate": 1.5462763259741497e-07, + "loss": 1.4101, + "step": 62170 + }, + { + "epoch": 0.96, + "learning_rate": 1.5327738180539454e-07, + "loss": 1.4113, + "step": 62180 + }, + { + "epoch": 0.96, + "learning_rate": 1.5193303414646754e-07, + "loss": 1.4135, + "step": 62190 + }, + { + "epoch": 0.97, + "learning_rate": 1.5059458994003129e-07, + "loss": 1.4212, + "step": 62200 + }, + { + "epoch": 0.97, + "learning_rate": 1.492620495040731e-07, + "loss": 1.4206, + "step": 62210 + }, + { + "epoch": 0.97, + "learning_rate": 1.4793541315517855e-07, + "loss": 1.4057, + "step": 62220 + }, + { + "epoch": 0.97, + "learning_rate": 1.4661468120853726e-07, + "loss": 1.4111, + "step": 62230 + }, + { + "epoch": 0.97, + "learning_rate": 1.4529985397792601e-07, + "loss": 1.3654, + "step": 62240 + }, + { + "epoch": 0.97, + "learning_rate": 1.4399093177573098e-07, + "loss": 1.3989, + "step": 62250 + }, + { + "epoch": 0.97, + "learning_rate": 1.426879149129229e-07, + "loss": 1.3995, + "step": 62260 + }, + { + "epoch": 0.97, + "learning_rate": 1.413908036990763e-07, + "loss": 1.4072, + "step": 62270 + }, + { + "epoch": 0.97, + "learning_rate": 1.400995984423642e-07, + "loss": 1.3334, + "step": 62280 + }, + { + "epoch": 0.97, + "learning_rate": 1.388142994495495e-07, + "loss": 1.4196, + "step": 62290 + }, + { + "epoch": 0.97, + "learning_rate": 1.3753490702599626e-07, + "loss": 1.4255, + "step": 62300 + }, + { + "epoch": 0.97, + "learning_rate": 1.3626142147566967e-07, + "loss": 1.4019, + "step": 62310 + }, + { + "epoch": 0.97, + "learning_rate": 1.3499384310112217e-07, + "loss": 1.3982, + "step": 62320 + }, + { + "epoch": 0.97, + "learning_rate": 1.337321722035073e-07, + "loss": 1.4296, + "step": 62330 + }, + { + "epoch": 0.97, + "learning_rate": 1.324764090825825e-07, + "loss": 1.4362, + "step": 62340 + }, + { + "epoch": 0.97, + "learning_rate": 1.3122655403668416e-07, + "loss": 1.4522, + "step": 62350 + }, + { + "epoch": 0.97, + "learning_rate": 1.2998260736276358e-07, + "loss": 1.4464, + "step": 62360 + }, + { + "epoch": 0.97, + "learning_rate": 1.287445693563538e-07, + "loss": 1.4694, + "step": 62370 + }, + { + "epoch": 0.97, + "learning_rate": 1.2751244031159737e-07, + "loss": 1.4561, + "step": 62380 + }, + { + "epoch": 0.97, + "learning_rate": 1.2628622052121842e-07, + "loss": 1.4392, + "step": 62390 + }, + { + "epoch": 0.97, + "learning_rate": 1.2506591027655057e-07, + "loss": 1.4576, + "step": 62400 + }, + { + "epoch": 0.97, + "learning_rate": 1.2385150986751194e-07, + "loss": 1.4429, + "step": 62410 + }, + { + "epoch": 0.97, + "learning_rate": 1.2264301958262726e-07, + "loss": 1.4554, + "step": 62420 + }, + { + "epoch": 0.97, + "learning_rate": 1.2144043970901132e-07, + "loss": 1.4523, + "step": 62430 + }, + { + "epoch": 0.97, + "learning_rate": 1.2024377053237168e-07, + "loss": 1.4419, + "step": 62440 + }, + { + "epoch": 0.97, + "learning_rate": 1.1905301233701982e-07, + "loss": 1.4516, + "step": 62450 + }, + { + "epoch": 0.97, + "learning_rate": 1.1786816540585443e-07, + "loss": 1.4611, + "step": 62460 + }, + { + "epoch": 0.97, + "learning_rate": 1.1668923002037535e-07, + "loss": 1.4471, + "step": 62470 + }, + { + "epoch": 0.97, + "learning_rate": 1.1551620646067796e-07, + "loss": 1.4405, + "step": 62480 + }, + { + "epoch": 0.97, + "learning_rate": 1.1434909500545321e-07, + "loss": 1.4404, + "step": 62490 + }, + { + "epoch": 0.97, + "learning_rate": 1.1318789593197654e-07, + "loss": 1.4283, + "step": 62500 + }, + { + "epoch": 0.97, + "learning_rate": 1.1203260951613837e-07, + "loss": 1.4621, + "step": 62510 + }, + { + "epoch": 0.97, + "learning_rate": 1.108832360324108e-07, + "loss": 1.4535, + "step": 62520 + }, + { + "epoch": 0.97, + "learning_rate": 1.0973977575386151e-07, + "loss": 1.4431, + "step": 62530 + }, + { + "epoch": 0.97, + "learning_rate": 1.0860222895215655e-07, + "loss": 1.4353, + "step": 62540 + }, + { + "epoch": 0.97, + "learning_rate": 1.0747059589755748e-07, + "loss": 1.3918, + "step": 62550 + }, + { + "epoch": 0.97, + "learning_rate": 1.0634487685892147e-07, + "loss": 1.3883, + "step": 62560 + }, + { + "epoch": 0.97, + "learning_rate": 1.0522507210369847e-07, + "loss": 1.3874, + "step": 62570 + }, + { + "epoch": 0.97, + "learning_rate": 1.0411118189793123e-07, + "loss": 1.4125, + "step": 62580 + }, + { + "epoch": 0.97, + "learning_rate": 1.0300320650626082e-07, + "loss": 1.3829, + "step": 62590 + }, + { + "epoch": 0.97, + "learning_rate": 1.019011461919267e-07, + "loss": 1.3893, + "step": 62600 + }, + { + "epoch": 0.97, + "learning_rate": 1.0080500121675274e-07, + "loss": 1.4034, + "step": 62610 + }, + { + "epoch": 0.97, + "learning_rate": 9.971477184116395e-08, + "loss": 1.3971, + "step": 62620 + }, + { + "epoch": 0.97, + "learning_rate": 9.863045832418371e-08, + "loss": 1.4241, + "step": 62630 + }, + { + "epoch": 0.97, + "learning_rate": 9.755206092341984e-08, + "loss": 1.4013, + "step": 62640 + }, + { + "epoch": 0.97, + "learning_rate": 9.647957989508405e-08, + "loss": 1.4248, + "step": 62650 + }, + { + "epoch": 0.97, + "learning_rate": 9.541301549397807e-08, + "loss": 1.4075, + "step": 62660 + }, + { + "epoch": 0.97, + "learning_rate": 9.435236797349645e-08, + "loss": 1.3861, + "step": 62670 + }, + { + "epoch": 0.97, + "learning_rate": 9.329763758562926e-08, + "loss": 1.3799, + "step": 62680 + }, + { + "epoch": 0.97, + "learning_rate": 9.224882458096496e-08, + "loss": 1.4213, + "step": 62690 + }, + { + "epoch": 0.97, + "learning_rate": 9.12059292086792e-08, + "loss": 1.3838, + "step": 62700 + }, + { + "epoch": 0.97, + "learning_rate": 9.016895171654605e-08, + "loss": 1.4087, + "step": 62710 + }, + { + "epoch": 0.97, + "learning_rate": 8.913789235093506e-08, + "loss": 1.3861, + "step": 62720 + }, + { + "epoch": 0.97, + "learning_rate": 8.811275135680309e-08, + "loss": 1.4005, + "step": 62730 + }, + { + "epoch": 0.97, + "learning_rate": 8.709352897771084e-08, + "loss": 1.4065, + "step": 62740 + }, + { + "epoch": 0.97, + "learning_rate": 8.608022545580075e-08, + "loss": 1.4185, + "step": 62750 + }, + { + "epoch": 0.97, + "learning_rate": 8.507284103181911e-08, + "loss": 1.3899, + "step": 62760 + }, + { + "epoch": 0.97, + "learning_rate": 8.407137594510229e-08, + "loss": 1.3754, + "step": 62770 + }, + { + "epoch": 0.97, + "learning_rate": 8.307583043357936e-08, + "loss": 1.4166, + "step": 62780 + }, + { + "epoch": 0.97, + "learning_rate": 8.208620473377226e-08, + "loss": 1.3867, + "step": 62790 + }, + { + "epoch": 0.97, + "learning_rate": 8.110249908080402e-08, + "loss": 1.3962, + "step": 62800 + }, + { + "epoch": 0.97, + "learning_rate": 8.012471370837937e-08, + "loss": 1.3822, + "step": 62810 + }, + { + "epoch": 0.97, + "learning_rate": 7.915284884880414e-08, + "loss": 1.3901, + "step": 62820 + }, + { + "epoch": 0.97, + "learning_rate": 7.818690473297696e-08, + "loss": 1.3883, + "step": 62830 + }, + { + "epoch": 0.97, + "learning_rate": 7.722688159038927e-08, + "loss": 1.3986, + "step": 62840 + }, + { + "epoch": 0.98, + "learning_rate": 7.62727796491225e-08, + "loss": 1.4002, + "step": 62850 + }, + { + "epoch": 0.98, + "learning_rate": 7.532459913585643e-08, + "loss": 1.4006, + "step": 62860 + }, + { + "epoch": 0.98, + "learning_rate": 7.438234027586366e-08, + "loss": 1.3797, + "step": 62870 + }, + { + "epoch": 0.98, + "learning_rate": 7.344600329300122e-08, + "loss": 1.3898, + "step": 62880 + }, + { + "epoch": 0.98, + "learning_rate": 7.251558840973283e-08, + "loss": 1.4037, + "step": 62890 + }, + { + "epoch": 0.98, + "learning_rate": 7.159109584710388e-08, + "loss": 1.4244, + "step": 62900 + }, + { + "epoch": 0.98, + "learning_rate": 7.067252582475814e-08, + "loss": 1.4302, + "step": 62910 + }, + { + "epoch": 0.98, + "learning_rate": 6.975987856093213e-08, + "loss": 1.3964, + "step": 62920 + }, + { + "epoch": 0.98, + "learning_rate": 6.88531542724552e-08, + "loss": 1.407, + "step": 62930 + }, + { + "epoch": 0.98, + "learning_rate": 6.795235317474391e-08, + "loss": 1.4176, + "step": 62940 + }, + { + "epoch": 0.98, + "learning_rate": 6.705747548181874e-08, + "loss": 1.4169, + "step": 62950 + }, + { + "epoch": 0.98, + "learning_rate": 6.61685214062846e-08, + "loss": 1.3913, + "step": 62960 + }, + { + "epoch": 0.98, + "learning_rate": 6.528549115933647e-08, + "loss": 1.3996, + "step": 62970 + }, + { + "epoch": 0.98, + "learning_rate": 6.440838495077317e-08, + "loss": 1.4181, + "step": 62980 + }, + { + "epoch": 0.98, + "learning_rate": 6.353720298897525e-08, + "loss": 1.4163, + "step": 62990 + }, + { + "epoch": 0.98, + "learning_rate": 6.267194548091882e-08, + "loss": 1.4286, + "step": 63000 + }, + { + "epoch": 0.98, + "learning_rate": 6.18126126321783e-08, + "loss": 1.4204, + "step": 63010 + }, + { + "epoch": 0.98, + "learning_rate": 6.095920464690986e-08, + "loss": 1.4398, + "step": 63020 + }, + { + "epoch": 0.98, + "learning_rate": 6.011172172787072e-08, + "loss": 1.4177, + "step": 63030 + }, + { + "epoch": 0.98, + "learning_rate": 5.927016407641095e-08, + "loss": 1.4352, + "step": 63040 + }, + { + "epoch": 0.98, + "learning_rate": 5.843453189246229e-08, + "loss": 1.4169, + "step": 63050 + }, + { + "epoch": 0.98, + "learning_rate": 5.760482537456036e-08, + "loss": 1.3893, + "step": 63060 + }, + { + "epoch": 0.98, + "learning_rate": 5.678104471982804e-08, + "loss": 1.4048, + "step": 63070 + }, + { + "epoch": 0.98, + "learning_rate": 5.596319012398099e-08, + "loss": 1.415, + "step": 63080 + }, + { + "epoch": 0.98, + "learning_rate": 5.5151261781324905e-08, + "loss": 1.4127, + "step": 63090 + }, + { + "epoch": 0.98, + "learning_rate": 5.4345259884761027e-08, + "loss": 1.398, + "step": 63100 + }, + { + "epoch": 0.98, + "learning_rate": 5.354518462578062e-08, + "loss": 1.4096, + "step": 63110 + }, + { + "epoch": 0.98, + "learning_rate": 5.275103619446497e-08, + "loss": 1.4116, + "step": 63120 + }, + { + "epoch": 0.98, + "learning_rate": 5.196281477949372e-08, + "loss": 1.4121, + "step": 63130 + }, + { + "epoch": 0.98, + "learning_rate": 5.118052056813094e-08, + "loss": 1.4028, + "step": 63140 + }, + { + "epoch": 0.98, + "learning_rate": 5.04041537462363e-08, + "loss": 1.4253, + "step": 63150 + }, + { + "epoch": 0.98, + "learning_rate": 4.963371449825949e-08, + "loss": 1.4008, + "step": 63160 + }, + { + "epoch": 0.98, + "learning_rate": 4.886920300724573e-08, + "loss": 1.4082, + "step": 63170 + }, + { + "epoch": 0.98, + "learning_rate": 4.811061945482476e-08, + "loss": 1.3943, + "step": 63180 + }, + { + "epoch": 0.98, + "learning_rate": 4.735796402122739e-08, + "loss": 1.4271, + "step": 63190 + }, + { + "epoch": 0.98, + "learning_rate": 4.6611236885268917e-08, + "loss": 1.4605, + "step": 63200 + }, + { + "epoch": 0.98, + "learning_rate": 4.5870438224357435e-08, + "loss": 1.4544, + "step": 63210 + }, + { + "epoch": 0.98, + "learning_rate": 4.513556821449383e-08, + "loss": 1.4295, + "step": 63220 + }, + { + "epoch": 0.98, + "learning_rate": 4.4406627030268986e-08, + "loss": 1.4515, + "step": 63230 + }, + { + "epoch": 0.98, + "learning_rate": 4.368361484486938e-08, + "loss": 1.4289, + "step": 63240 + }, + { + "epoch": 0.98, + "learning_rate": 4.296653183006316e-08, + "loss": 1.433, + "step": 63250 + }, + { + "epoch": 0.98, + "learning_rate": 4.2255378156225154e-08, + "loss": 1.4579, + "step": 63260 + }, + { + "epoch": 0.98, + "learning_rate": 4.155015399230633e-08, + "loss": 1.4332, + "step": 63270 + }, + { + "epoch": 0.98, + "learning_rate": 4.085085950585876e-08, + "loss": 1.4635, + "step": 63280 + }, + { + "epoch": 0.98, + "learning_rate": 4.0157494863019006e-08, + "loss": 1.4396, + "step": 63290 + }, + { + "epoch": 0.98, + "learning_rate": 3.947006022851918e-08, + "loss": 1.4541, + "step": 63300 + }, + { + "epoch": 0.98, + "learning_rate": 3.878855576568141e-08, + "loss": 1.4546, + "step": 63310 + }, + { + "epoch": 0.98, + "learning_rate": 3.811298163642063e-08, + "loss": 1.4545, + "step": 63320 + }, + { + "epoch": 0.98, + "learning_rate": 3.7443338001236226e-08, + "loss": 1.4309, + "step": 63330 + }, + { + "epoch": 0.98, + "learning_rate": 3.677962501923149e-08, + "loss": 1.4417, + "step": 63340 + }, + { + "epoch": 0.98, + "learning_rate": 3.612184284808306e-08, + "loss": 1.434, + "step": 63350 + }, + { + "epoch": 0.98, + "learning_rate": 3.546999164407705e-08, + "loss": 1.4322, + "step": 63360 + }, + { + "epoch": 0.98, + "learning_rate": 3.482407156207568e-08, + "loss": 1.4426, + "step": 63370 + }, + { + "epoch": 0.98, + "learning_rate": 3.418408275553953e-08, + "loss": 1.4393, + "step": 63380 + }, + { + "epoch": 0.98, + "learning_rate": 3.355002537651919e-08, + "loss": 1.4401, + "step": 63390 + }, + { + "epoch": 0.98, + "learning_rate": 3.2921899575652504e-08, + "loss": 1.4294, + "step": 63400 + }, + { + "epoch": 0.98, + "learning_rate": 3.2299705502175646e-08, + "loss": 1.4236, + "step": 63410 + }, + { + "epoch": 0.98, + "learning_rate": 3.1683443303906493e-08, + "loss": 1.4327, + "step": 63420 + }, + { + "epoch": 0.98, + "learning_rate": 3.1073113127261244e-08, + "loss": 1.4443, + "step": 63430 + }, + { + "epoch": 0.98, + "learning_rate": 3.046871511724059e-08, + "loss": 1.4341, + "step": 63440 + }, + { + "epoch": 0.98, + "learning_rate": 2.987024941743799e-08, + "loss": 1.4922, + "step": 63450 + }, + { + "epoch": 0.98, + "learning_rate": 2.9277716170039715e-08, + "loss": 1.4402, + "step": 63460 + }, + { + "epoch": 0.98, + "learning_rate": 2.8691115515822043e-08, + "loss": 1.443, + "step": 63470 + }, + { + "epoch": 0.98, + "learning_rate": 2.8110447594148493e-08, + "loss": 1.4687, + "step": 63480 + }, + { + "epoch": 0.99, + "learning_rate": 2.7535712542978153e-08, + "loss": 1.4763, + "step": 63490 + }, + { + "epoch": 0.99, + "learning_rate": 2.6966910498854582e-08, + "loss": 1.4508, + "step": 63500 + }, + { + "epoch": 0.99, + "learning_rate": 2.640404159691412e-08, + "loss": 1.4658, + "step": 63510 + }, + { + "epoch": 0.99, + "learning_rate": 2.5847105970888686e-08, + "loss": 1.4558, + "step": 63520 + }, + { + "epoch": 0.99, + "learning_rate": 2.5296103753094658e-08, + "loss": 1.4341, + "step": 63530 + }, + { + "epoch": 0.99, + "learning_rate": 2.4751035074438432e-08, + "loss": 1.4271, + "step": 63540 + }, + { + "epoch": 0.99, + "learning_rate": 2.4211900064416425e-08, + "loss": 1.3818, + "step": 63550 + }, + { + "epoch": 0.99, + "learning_rate": 2.3678698851123395e-08, + "loss": 1.4161, + "step": 63560 + }, + { + "epoch": 0.99, + "learning_rate": 2.315143156123578e-08, + "loss": 1.3957, + "step": 63570 + }, + { + "epoch": 0.99, + "learning_rate": 2.2630098320020055e-08, + "loss": 1.396, + "step": 63580 + }, + { + "epoch": 0.99, + "learning_rate": 2.2114699251338243e-08, + "loss": 1.4011, + "step": 63590 + }, + { + "epoch": 0.99, + "learning_rate": 2.1605234477639625e-08, + "loss": 1.4132, + "step": 63600 + }, + { + "epoch": 0.99, + "learning_rate": 2.110170411996626e-08, + "loss": 1.3979, + "step": 63610 + }, + { + "epoch": 0.99, + "learning_rate": 2.0604108297941905e-08, + "loss": 1.4142, + "step": 63620 + }, + { + "epoch": 0.99, + "learning_rate": 2.0112447129791433e-08, + "loss": 1.4002, + "step": 63630 + }, + { + "epoch": 0.99, + "learning_rate": 1.9626720732321414e-08, + "loss": 1.3871, + "step": 63640 + }, + { + "epoch": 0.99, + "learning_rate": 1.9146929220933973e-08, + "loss": 1.4057, + "step": 63650 + }, + { + "epoch": 0.99, + "learning_rate": 1.867307270961849e-08, + "loss": 1.406, + "step": 63660 + }, + { + "epoch": 0.99, + "learning_rate": 1.820515131095435e-08, + "loss": 1.3913, + "step": 63670 + }, + { + "epoch": 0.99, + "learning_rate": 1.7743165136110964e-08, + "loss": 1.4131, + "step": 63680 + }, + { + "epoch": 0.99, + "learning_rate": 1.728711429484775e-08, + "loss": 1.4186, + "step": 63690 + }, + { + "epoch": 0.99, + "learning_rate": 1.6836998895514155e-08, + "loss": 1.4121, + "step": 63700 + }, + { + "epoch": 0.99, + "learning_rate": 1.639281904505241e-08, + "loss": 1.4079, + "step": 63710 + }, + { + "epoch": 0.99, + "learning_rate": 1.5954574848986436e-08, + "loss": 1.4214, + "step": 63720 + }, + { + "epoch": 0.99, + "learning_rate": 1.5522266411441278e-08, + "loss": 1.4273, + "step": 63730 + }, + { + "epoch": 0.99, + "learning_rate": 1.5095893835118113e-08, + "loss": 1.433, + "step": 63740 + }, + { + "epoch": 0.99, + "learning_rate": 1.467545722132202e-08, + "loss": 1.3967, + "step": 63750 + }, + { + "epoch": 0.99, + "learning_rate": 1.4260956669939762e-08, + "loss": 1.4042, + "step": 63760 + }, + { + "epoch": 0.99, + "learning_rate": 1.3852392279445347e-08, + "loss": 1.4123, + "step": 63770 + }, + { + "epoch": 0.99, + "learning_rate": 1.3449764146911125e-08, + "loss": 1.4277, + "step": 63780 + }, + { + "epoch": 0.99, + "learning_rate": 1.3053072367991137e-08, + "loss": 1.4108, + "step": 63790 + }, + { + "epoch": 0.99, + "learning_rate": 1.2662317036932214e-08, + "loss": 1.416, + "step": 63800 + }, + { + "epoch": 0.99, + "learning_rate": 1.2277498246571206e-08, + "loss": 1.4204, + "step": 63810 + }, + { + "epoch": 0.99, + "learning_rate": 1.1898616088337755e-08, + "loss": 1.4084, + "step": 63820 + }, + { + "epoch": 0.99, + "learning_rate": 1.1525670652243192e-08, + "loss": 1.4157, + "step": 63830 + }, + { + "epoch": 0.99, + "learning_rate": 1.1158662026891642e-08, + "loss": 1.4437, + "step": 63840 + }, + { + "epoch": 0.99, + "learning_rate": 1.0797590299482796e-08, + "loss": 1.4054, + "step": 63850 + }, + { + "epoch": 0.99, + "learning_rate": 1.0442455555798037e-08, + "loss": 1.4109, + "step": 63860 + }, + { + "epoch": 0.99, + "learning_rate": 1.0093257880208762e-08, + "loss": 1.4116, + "step": 63870 + }, + { + "epoch": 0.99, + "learning_rate": 9.749997355679164e-09, + "loss": 1.4179, + "step": 63880 + }, + { + "epoch": 0.99, + "learning_rate": 9.412674063766225e-09, + "loss": 1.3854, + "step": 63890 + }, + { + "epoch": 0.99, + "learning_rate": 9.081288084605844e-09, + "loss": 1.4173, + "step": 63900 + }, + { + "epoch": 0.99, + "learning_rate": 8.755839496932261e-09, + "loss": 1.4225, + "step": 63910 + }, + { + "epoch": 0.99, + "learning_rate": 8.43632837806696e-09, + "loss": 1.4167, + "step": 63920 + }, + { + "epoch": 0.99, + "learning_rate": 8.122754803918665e-09, + "loss": 1.4473, + "step": 63930 + }, + { + "epoch": 0.99, + "learning_rate": 7.815118848986113e-09, + "loss": 1.4522, + "step": 63940 + }, + { + "epoch": 0.99, + "learning_rate": 7.513420586358067e-09, + "loss": 1.4047, + "step": 63950 + }, + { + "epoch": 0.99, + "learning_rate": 7.217660087713296e-09, + "loss": 1.4055, + "step": 63960 + }, + { + "epoch": 0.99, + "learning_rate": 6.927837423320593e-09, + "loss": 1.4354, + "step": 63970 + }, + { + "epoch": 0.99, + "learning_rate": 6.64395266203599e-09, + "loss": 1.4274, + "step": 63980 + }, + { + "epoch": 0.99, + "learning_rate": 6.36600587130276e-09, + "loss": 1.4059, + "step": 63990 + }, + { + "epoch": 0.99, + "learning_rate": 6.093997117159744e-09, + "loss": 1.3817, + "step": 64000 + }, + { + "epoch": 0.99, + "learning_rate": 5.827926464227473e-09, + "loss": 1.4201, + "step": 64010 + }, + { + "epoch": 0.99, + "learning_rate": 5.567793975724822e-09, + "loss": 1.4139, + "step": 64020 + }, + { + "epoch": 0.99, + "learning_rate": 5.31359971344958e-09, + "loss": 1.4076, + "step": 64030 + }, + { + "epoch": 0.99, + "learning_rate": 5.065343737795103e-09, + "loss": 1.4405, + "step": 64040 + }, + { + "epoch": 0.99, + "learning_rate": 4.82302610774199e-09, + "loss": 1.4346, + "step": 64050 + }, + { + "epoch": 0.99, + "learning_rate": 4.586646880863632e-09, + "loss": 1.4489, + "step": 64060 + }, + { + "epoch": 0.99, + "learning_rate": 4.356206113315109e-09, + "loss": 1.4515, + "step": 64070 + }, + { + "epoch": 0.99, + "learning_rate": 4.131703859849845e-09, + "loss": 1.4392, + "step": 64080 + }, + { + "epoch": 0.99, + "learning_rate": 3.913140173800178e-09, + "loss": 1.4506, + "step": 64090 + }, + { + "epoch": 0.99, + "learning_rate": 3.7005151070967916e-09, + "loss": 1.451, + "step": 64100 + }, + { + "epoch": 0.99, + "learning_rate": 3.4938287102548316e-09, + "loss": 1.4501, + "step": 64110 + }, + { + "epoch": 0.99, + "learning_rate": 3.293081032379464e-09, + "loss": 1.4621, + "step": 64120 + }, + { + "epoch": 0.99, + "learning_rate": 3.0982721211630927e-09, + "loss": 1.4442, + "step": 64130 + }, + { + "epoch": 1.0, + "learning_rate": 2.9094020228881414e-09, + "loss": 1.4632, + "step": 64140 + }, + { + "epoch": 1.0, + "learning_rate": 2.7264707824325998e-09, + "loss": 1.4479, + "step": 64150 + }, + { + "epoch": 1.0, + "learning_rate": 2.5494784432505967e-09, + "loss": 1.4508, + "step": 64160 + }, + { + "epoch": 1.0, + "learning_rate": 2.3784250473946058e-09, + "loss": 1.4454, + "step": 64170 + }, + { + "epoch": 1.0, + "learning_rate": 2.21331063550434e-09, + "loss": 1.4473, + "step": 64180 + }, + { + "epoch": 1.0, + "learning_rate": 2.05413524680953e-09, + "loss": 1.4416, + "step": 64190 + }, + { + "epoch": 1.0, + "learning_rate": 1.900898919124372e-09, + "loss": 1.443, + "step": 64200 + }, + { + "epoch": 1.0, + "learning_rate": 1.7536016888558548e-09, + "loss": 1.4568, + "step": 64210 + }, + { + "epoch": 1.0, + "learning_rate": 1.6122435910037591e-09, + "loss": 1.487, + "step": 64220 + }, + { + "epoch": 1.0, + "learning_rate": 1.476824659144005e-09, + "loss": 1.4426, + "step": 64230 + }, + { + "epoch": 1.0, + "learning_rate": 1.3473449254564064e-09, + "loss": 1.466, + "step": 64240 + }, + { + "epoch": 1.0, + "learning_rate": 1.2238044206996923e-09, + "loss": 1.4406, + "step": 64250 + }, + { + "epoch": 1.0, + "learning_rate": 1.1062031742253842e-09, + "loss": 1.4513, + "step": 64260 + }, + { + "epoch": 1.0, + "learning_rate": 9.9454121397502e-10, + "loss": 1.4563, + "step": 64270 + }, + { + "epoch": 1.0, + "learning_rate": 8.888185664746029e-10, + "loss": 1.4638, + "step": 64280 + }, + { + "epoch": 1.0, + "learning_rate": 7.890352568457049e-10, + "loss": 1.4501, + "step": 64290 + }, + { + "epoch": 1.0, + "learning_rate": 6.951913087915874e-10, + "loss": 1.442, + "step": 64300 + }, + { + "epoch": 1.0, + "learning_rate": 6.072867446083042e-10, + "loss": 1.4548, + "step": 64310 + }, + { + "epoch": 1.0, + "learning_rate": 5.25321585181926e-10, + "loss": 1.4435, + "step": 64320 + }, + { + "epoch": 1.0, + "learning_rate": 4.492958499857647e-10, + "loss": 1.4338, + "step": 64330 + }, + { + "epoch": 1.0, + "learning_rate": 3.792095570803733e-10, + "loss": 1.4099, + "step": 64340 + }, + { + "epoch": 1.0, + "learning_rate": 3.1506272311909725e-10, + "loss": 1.4111, + "step": 64350 + }, + { + "epoch": 1.0, + "learning_rate": 2.56855363342523e-10, + "loss": 1.4059, + "step": 64360 + }, + { + "epoch": 1.0, + "learning_rate": 2.045874915757029e-10, + "loss": 1.4159, + "step": 64370 + }, + { + "epoch": 1.0, + "learning_rate": 1.5825912023925694e-10, + "loss": 1.3885, + "step": 64380 + }, + { + "epoch": 1.0, + "learning_rate": 1.17870260343822e-10, + "loss": 1.4094, + "step": 64390 + }, + { + "epoch": 1.0, + "learning_rate": 8.342092147617386e-11, + "loss": 1.4084, + "step": 64400 + }, + { + "epoch": 1.0, + "learning_rate": 5.49111118297585e-11, + "loss": 1.4197, + "step": 64410 + }, + { + "epoch": 1.0, + "learning_rate": 3.2340838171385225e-11, + "loss": 1.4317, + "step": 64420 + }, + { + "epoch": 1.0, + "learning_rate": 1.5710105866206802e-11, + "loss": 1.4178, + "step": 64430 + }, + { + "epoch": 1.0, + "learning_rate": 5.018918866617206e-12, + "loss": 1.4153, + "step": 64440 + }, + { + "epoch": 1.0, + "learning_rate": 2.672797122516002e-13, + "loss": 1.4375, + "step": 64450 + }, + { + "epoch": 1.0, + "step": 64453, + "total_flos": 8.622620333309952e+18, + "train_loss": 1.6966034523956515, + "train_runtime": 1526476.5703, + "train_samples_per_second": 10.809, + "train_steps_per_second": 0.042 + } + ], + "max_steps": 64453, + "num_train_epochs": 9223372036854775807, + "total_flos": 8.622620333309952e+18, + "trial_name": null, + "trial_params": null +}