diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,5854 +1,208 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.999974259311694, + "epoch": 0.9995835068721366, "eval_steps": 500, - "global_step": 9712, + "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0, - "learning_rate": 4.5e-07, - "loss": 2.976, + "epoch": 0.03, + "learning_rate": 3.0000000000000004e-07, + "loss": 11.1022, "step": 10 }, { - "epoch": 0.0, - "learning_rate": 9.5e-07, - "loss": 2.6462, + "epoch": 0.07, + "learning_rate": 8.000000000000001e-07, + "loss": 10.5874, "step": 20 }, { - "epoch": 0.0, - "learning_rate": 1.45e-06, - "loss": 2.2432, + "epoch": 0.1, + "learning_rate": 1.3e-06, + "loss": 9.359, "step": 30 }, { - "epoch": 0.0, - "learning_rate": 1.95e-06, - "loss": 2.3321, + "epoch": 0.13, + "learning_rate": 1.8e-06, + "loss": 7.4948, "step": 40 }, { - "epoch": 0.01, - "learning_rate": 2.4500000000000003e-06, - "loss": 2.2145, + "epoch": 0.17, + "learning_rate": 2.3e-06, + "loss": 5.7244, "step": 50 }, { - "epoch": 0.01, - "learning_rate": 2.95e-06, - "loss": 2.408, + "epoch": 0.2, + "learning_rate": 2.8000000000000003e-06, + "loss": 4.1675, "step": 60 }, { - "epoch": 0.01, - "learning_rate": 3.4500000000000004e-06, - "loss": 2.3696, + "epoch": 0.23, + "learning_rate": 3.3e-06, + "loss": 3.2211, "step": 70 }, { - "epoch": 0.01, - "learning_rate": 3.95e-06, - "loss": 2.3913, + "epoch": 0.27, + "learning_rate": 3.8e-06, + "loss": 2.4147, "step": 80 }, { - "epoch": 0.01, - "learning_rate": 4.45e-06, - "loss": 2.1274, + "epoch": 0.3, + "learning_rate": 4.2999999999999995e-06, + "loss": 1.7997, "step": 90 }, { - "epoch": 0.01, - "learning_rate": 4.950000000000001e-06, - "loss": 2.3101, + "epoch": 0.33, + "learning_rate": 4.800000000000001e-06, + "loss": 1.4401, "step": 100 }, { - "epoch": 0.01, - "learning_rate": 5.45e-06, - "loss": 2.3138, + "epoch": 0.37, + "learning_rate": 5.3e-06, + "loss": 1.2247, "step": 110 }, { - "epoch": 0.01, - "learning_rate": 5.95e-06, - "loss": 2.1893, + "epoch": 0.4, + "learning_rate": 5.8e-06, + "loss": 1.0594, "step": 120 }, { - "epoch": 0.01, - "learning_rate": 6.45e-06, - "loss": 2.1304, + "epoch": 0.43, + "learning_rate": 6.300000000000001e-06, + "loss": 0.9899, "step": 130 }, { - "epoch": 0.01, - "learning_rate": 6.950000000000001e-06, - "loss": 2.2672, + "epoch": 0.47, + "learning_rate": 6.800000000000001e-06, + "loss": 0.8842, "step": 140 }, { - "epoch": 0.02, - "learning_rate": 7.45e-06, - "loss": 2.115, + "epoch": 0.5, + "learning_rate": 7.2999999999999996e-06, + "loss": 0.8798, "step": 150 }, { - "epoch": 0.02, - "learning_rate": 7.95e-06, - "loss": 2.2899, + "epoch": 0.53, + "learning_rate": 7.8e-06, + "loss": 0.8872, "step": 160 }, { - "epoch": 0.02, - "learning_rate": 8.45e-06, - "loss": 2.3333, + "epoch": 0.57, + "learning_rate": 8.3e-06, + "loss": 0.8889, "step": 170 }, { - "epoch": 0.02, - "learning_rate": 8.95e-06, - "loss": 2.2231, + "epoch": 0.6, + "learning_rate": 8.8e-06, + "loss": 0.9344, "step": 180 }, { - "epoch": 0.02, - "learning_rate": 9.450000000000001e-06, - "loss": 2.2459, + "epoch": 0.63, + "learning_rate": 9.3e-06, + "loss": 0.9867, "step": 190 }, { - "epoch": 0.02, - "learning_rate": 9.950000000000001e-06, - "loss": 2.1321, + "epoch": 0.67, + "learning_rate": 9.800000000000001e-06, + "loss": 0.8925, "step": 200 }, { - "epoch": 0.02, - "learning_rate": 1.045e-05, - "loss": 2.2164, + "epoch": 0.7, + "learning_rate": 1.03e-05, + "loss": 0.7869, "step": 210 }, { - "epoch": 0.02, - "learning_rate": 1.095e-05, - "loss": 2.0555, + "epoch": 0.73, + "learning_rate": 1.08e-05, + "loss": 0.8847, "step": 220 }, { - "epoch": 0.02, - "learning_rate": 1.145e-05, - "loss": 2.1948, + "epoch": 0.77, + "learning_rate": 1.13e-05, + "loss": 0.8221, "step": 230 }, { - "epoch": 0.02, - "learning_rate": 1.195e-05, - "loss": 2.1423, + "epoch": 0.8, + "learning_rate": 1.18e-05, + "loss": 0.8611, "step": 240 }, { - "epoch": 0.03, - "learning_rate": 1.2450000000000001e-05, - "loss": 2.0471, + "epoch": 0.83, + "learning_rate": 1.23e-05, + "loss": 0.8544, "step": 250 }, { - "epoch": 0.03, - "learning_rate": 1.2950000000000001e-05, - "loss": 2.2886, + "epoch": 0.87, + "learning_rate": 1.2800000000000001e-05, + "loss": 0.8061, "step": 260 }, { - "epoch": 0.03, - "learning_rate": 1.3450000000000002e-05, - "loss": 2.3572, + "epoch": 0.9, + "learning_rate": 1.3300000000000001e-05, + "loss": 0.7984, "step": 270 }, { - "epoch": 0.03, - "learning_rate": 1.3950000000000002e-05, - "loss": 2.1849, + "epoch": 0.93, + "learning_rate": 1.3800000000000002e-05, + "loss": 0.7396, "step": 280 }, { - "epoch": 0.03, - "learning_rate": 1.4449999999999999e-05, - "loss": 2.0637, + "epoch": 0.97, + "learning_rate": 1.43e-05, + "loss": 0.8653, "step": 290 }, { - "epoch": 0.03, - "learning_rate": 1.4950000000000001e-05, - "loss": 1.9557, + "epoch": 1.0, + "learning_rate": 1.48e-05, + "loss": 0.8675, "step": 300 }, - { - "epoch": 0.03, - "learning_rate": 1.545e-05, - "loss": 2.1387, - "step": 310 - }, - { - "epoch": 0.03, - "learning_rate": 1.595e-05, - "loss": 2.2507, - "step": 320 - }, - { - "epoch": 0.03, - "learning_rate": 1.645e-05, - "loss": 2.1553, - "step": 330 - }, - { - "epoch": 0.04, - "learning_rate": 1.6950000000000002e-05, - "loss": 2.1832, - "step": 340 - }, - { - "epoch": 0.04, - "learning_rate": 1.745e-05, - "loss": 2.0478, - "step": 350 - }, - { - "epoch": 0.04, - "learning_rate": 1.795e-05, - "loss": 1.9862, - "step": 360 - }, - { - "epoch": 0.04, - "learning_rate": 1.845e-05, - "loss": 2.3255, - "step": 370 - }, - { - "epoch": 0.04, - "learning_rate": 1.895e-05, - "loss": 2.2428, - "step": 380 - }, - { - "epoch": 0.04, - "learning_rate": 1.9450000000000002e-05, - "loss": 2.1448, - "step": 390 - }, - { - "epoch": 0.04, - "learning_rate": 1.995e-05, - "loss": 2.2396, - "step": 400 - }, - { - "epoch": 0.04, - "learning_rate": 2.045e-05, - "loss": 2.0933, - "step": 410 - }, - { - "epoch": 0.04, - "learning_rate": 2.095e-05, - "loss": 2.0978, - "step": 420 - }, - { - "epoch": 0.04, - "learning_rate": 2.145e-05, - "loss": 2.0961, - "step": 430 - }, - { - "epoch": 0.05, - "learning_rate": 2.195e-05, - "loss": 2.0171, - "step": 440 - }, - { - "epoch": 0.05, - "learning_rate": 2.245e-05, - "loss": 1.8757, - "step": 450 - }, - { - "epoch": 0.05, - "learning_rate": 2.2950000000000002e-05, - "loss": 2.2018, - "step": 460 - }, - { - "epoch": 0.05, - "learning_rate": 2.345e-05, - "loss": 2.0696, - "step": 470 - }, - { - "epoch": 0.05, - "learning_rate": 2.395e-05, - "loss": 2.2329, - "step": 480 - }, - { - "epoch": 0.05, - "learning_rate": 2.445e-05, - "loss": 2.2144, - "step": 490 - }, - { - "epoch": 0.05, - "learning_rate": 2.495e-05, - "loss": 1.9678, - "step": 500 - }, - { - "epoch": 0.05, - "learning_rate": 2.5450000000000002e-05, - "loss": 2.0717, - "step": 510 - }, - { - "epoch": 0.05, - "learning_rate": 2.595e-05, - "loss": 2.0088, - "step": 520 - }, - { - "epoch": 0.05, - "learning_rate": 2.6450000000000003e-05, - "loss": 2.1696, - "step": 530 - }, - { - "epoch": 0.06, - "learning_rate": 2.6950000000000005e-05, - "loss": 1.9404, - "step": 540 - }, - { - "epoch": 0.06, - "learning_rate": 2.7450000000000003e-05, - "loss": 2.0756, - "step": 550 - }, - { - "epoch": 0.06, - "learning_rate": 2.7950000000000005e-05, - "loss": 2.0034, - "step": 560 - }, - { - "epoch": 0.06, - "learning_rate": 2.845e-05, - "loss": 2.0446, - "step": 570 - }, - { - "epoch": 0.06, - "learning_rate": 2.895e-05, - "loss": 2.054, - "step": 580 - }, - { - "epoch": 0.06, - "learning_rate": 2.945e-05, - "loss": 2.086, - "step": 590 - }, - { - "epoch": 0.06, - "learning_rate": 2.995e-05, - "loss": 2.1475, - "step": 600 - }, - { - "epoch": 0.06, - "learning_rate": 3.045e-05, - "loss": 2.2484, - "step": 610 - }, - { - "epoch": 0.06, - "learning_rate": 3.095e-05, - "loss": 2.0034, - "step": 620 - }, - { - "epoch": 0.06, - "learning_rate": 3.145e-05, - "loss": 2.0188, - "step": 630 - }, - { - "epoch": 0.07, - "learning_rate": 3.1950000000000004e-05, - "loss": 1.9873, - "step": 640 - }, - { - "epoch": 0.07, - "learning_rate": 3.245e-05, - "loss": 2.1709, - "step": 650 - }, - { - "epoch": 0.07, - "learning_rate": 3.295e-05, - "loss": 2.1436, - "step": 660 - }, - { - "epoch": 0.07, - "learning_rate": 3.345000000000001e-05, - "loss": 2.0891, - "step": 670 - }, - { - "epoch": 0.07, - "learning_rate": 3.3950000000000005e-05, - "loss": 2.1962, - "step": 680 - }, - { - "epoch": 0.07, - "learning_rate": 3.445e-05, - "loss": 1.9627, - "step": 690 - }, - { - "epoch": 0.07, - "learning_rate": 3.495e-05, - "loss": 1.9947, - "step": 700 - }, - { - "epoch": 0.07, - "learning_rate": 3.545e-05, - "loss": 2.1952, - "step": 710 - }, - { - "epoch": 0.07, - "learning_rate": 3.595e-05, - "loss": 2.0161, - "step": 720 - }, - { - "epoch": 0.08, - "learning_rate": 3.645e-05, - "loss": 2.134, - "step": 730 - }, - { - "epoch": 0.08, - "learning_rate": 3.6950000000000004e-05, - "loss": 2.085, - "step": 740 - }, - { - "epoch": 0.08, - "learning_rate": 3.745e-05, - "loss": 2.2181, - "step": 750 - }, - { - "epoch": 0.08, - "learning_rate": 3.795e-05, - "loss": 2.0471, - "step": 760 - }, - { - "epoch": 0.08, - "learning_rate": 3.845e-05, - "loss": 2.0651, - "step": 770 - }, - { - "epoch": 0.08, - "learning_rate": 3.8950000000000005e-05, - "loss": 2.0187, - "step": 780 - }, - { - "epoch": 0.08, - "learning_rate": 3.9450000000000003e-05, - "loss": 2.0299, - "step": 790 - }, - { - "epoch": 0.08, - "learning_rate": 3.995e-05, - "loss": 2.1057, - "step": 800 - }, - { - "epoch": 0.08, - "learning_rate": 4.045000000000001e-05, - "loss": 1.995, - "step": 810 - }, - { - "epoch": 0.08, - "learning_rate": 4.095e-05, - "loss": 2.1075, - "step": 820 - }, - { - "epoch": 0.09, - "learning_rate": 4.145e-05, - "loss": 1.97, - "step": 830 - }, - { - "epoch": 0.09, - "learning_rate": 4.195e-05, - "loss": 2.0722, - "step": 840 - }, - { - "epoch": 0.09, - "learning_rate": 4.245e-05, - "loss": 2.1999, - "step": 850 - }, - { - "epoch": 0.09, - "learning_rate": 4.295e-05, - "loss": 1.9943, - "step": 860 - }, - { - "epoch": 0.09, - "learning_rate": 4.345e-05, - "loss": 2.0222, - "step": 870 - }, - { - "epoch": 0.09, - "learning_rate": 4.3950000000000004e-05, - "loss": 2.0142, - "step": 880 - }, - { - "epoch": 0.09, - "learning_rate": 4.445e-05, - "loss": 2.0544, - "step": 890 - }, - { - "epoch": 0.09, - "learning_rate": 4.495e-05, - "loss": 2.0566, - "step": 900 - }, - { - "epoch": 0.09, - "learning_rate": 4.545000000000001e-05, - "loss": 2.0461, - "step": 910 - }, - { - "epoch": 0.09, - "learning_rate": 4.5950000000000006e-05, - "loss": 2.0164, - "step": 920 - }, - { - "epoch": 0.1, - "learning_rate": 4.6450000000000004e-05, - "loss": 2.1995, - "step": 930 - }, - { - "epoch": 0.1, - "learning_rate": 4.695e-05, - "loss": 1.9895, - "step": 940 - }, - { - "epoch": 0.1, - "learning_rate": 4.745e-05, - "loss": 2.0993, - "step": 950 - }, - { - "epoch": 0.1, - "learning_rate": 4.795e-05, - "loss": 2.011, - "step": 960 - }, - { - "epoch": 0.1, - "learning_rate": 4.845e-05, - "loss": 2.193, - "step": 970 - }, - { - "epoch": 0.1, - "learning_rate": 4.8950000000000004e-05, - "loss": 2.0688, - "step": 980 - }, - { - "epoch": 0.1, - "learning_rate": 4.945e-05, - "loss": 2.0526, - "step": 990 - }, - { - "epoch": 0.1, - "learning_rate": 4.995e-05, - "loss": 2.023, - "step": 1000 - }, - { - "epoch": 0.1, - "learning_rate": 4.999214275736835e-05, - "loss": 2.0966, - "step": 1010 - }, - { - "epoch": 0.11, - "learning_rate": 4.9983412487777624e-05, - "loss": 2.1602, - "step": 1020 - }, - { - "epoch": 0.11, - "learning_rate": 4.9974682218186904e-05, - "loss": 2.1433, - "step": 1030 - }, - { - "epoch": 0.11, - "learning_rate": 4.996595194859617e-05, - "loss": 2.1597, - "step": 1040 - }, - { - "epoch": 0.11, - "learning_rate": 4.995722167900545e-05, - "loss": 2.1164, - "step": 1050 - }, - { - "epoch": 0.11, - "learning_rate": 4.994849140941473e-05, - "loss": 2.0517, - "step": 1060 - }, - { - "epoch": 0.11, - "learning_rate": 4.9939761139824e-05, - "loss": 1.9674, - "step": 1070 - }, - { - "epoch": 0.11, - "learning_rate": 4.993103087023328e-05, - "loss": 2.0646, - "step": 1080 - }, - { - "epoch": 0.11, - "learning_rate": 4.992230060064255e-05, - "loss": 2.0989, - "step": 1090 - }, - { - "epoch": 0.11, - "learning_rate": 4.991357033105182e-05, - "loss": 2.0109, - "step": 1100 - }, - { - "epoch": 0.11, - "learning_rate": 4.9904840061461103e-05, - "loss": 2.0324, - "step": 1110 - }, - { - "epoch": 0.12, - "learning_rate": 4.989610979187038e-05, - "loss": 2.0882, - "step": 1120 - }, - { - "epoch": 0.12, - "learning_rate": 4.988737952227965e-05, - "loss": 2.0099, - "step": 1130 - }, - { - "epoch": 0.12, - "learning_rate": 4.987864925268892e-05, - "loss": 2.0875, - "step": 1140 - }, - { - "epoch": 0.12, - "learning_rate": 4.98699189830982e-05, - "loss": 1.9643, - "step": 1150 - }, - { - "epoch": 0.12, - "learning_rate": 4.986118871350747e-05, - "loss": 2.2203, - "step": 1160 - }, - { - "epoch": 0.12, - "learning_rate": 4.985245844391675e-05, - "loss": 1.9917, - "step": 1170 - }, - { - "epoch": 0.12, - "learning_rate": 4.984372817432603e-05, - "loss": 1.971, - "step": 1180 - }, - { - "epoch": 0.12, - "learning_rate": 4.98349979047353e-05, - "loss": 2.0906, - "step": 1190 - }, - { - "epoch": 0.12, - "learning_rate": 4.9826267635144576e-05, - "loss": 2.0374, - "step": 1200 - }, - { - "epoch": 0.12, - "learning_rate": 4.981753736555385e-05, - "loss": 1.9202, - "step": 1210 - }, - { - "epoch": 0.13, - "learning_rate": 4.980880709596313e-05, - "loss": 1.9862, - "step": 1220 - }, - { - "epoch": 0.13, - "learning_rate": 4.9800076826372396e-05, - "loss": 2.0537, - "step": 1230 - }, - { - "epoch": 0.13, - "learning_rate": 4.9791346556781676e-05, - "loss": 1.958, - "step": 1240 - }, - { - "epoch": 0.13, - "learning_rate": 4.9782616287190956e-05, - "loss": 2.0852, - "step": 1250 - }, - { - "epoch": 0.13, - "learning_rate": 4.977388601760022e-05, - "loss": 2.0906, - "step": 1260 - }, - { - "epoch": 0.13, - "learning_rate": 4.97651557480095e-05, - "loss": 2.0058, - "step": 1270 - }, - { - "epoch": 0.13, - "learning_rate": 4.9756425478418776e-05, - "loss": 1.9265, - "step": 1280 - }, - { - "epoch": 0.13, - "learning_rate": 4.974769520882805e-05, - "loss": 1.9428, - "step": 1290 - }, - { - "epoch": 0.13, - "learning_rate": 4.973896493923732e-05, - "loss": 1.9286, - "step": 1300 - }, - { - "epoch": 0.13, - "learning_rate": 4.97302346696466e-05, - "loss": 2.0018, - "step": 1310 - }, - { - "epoch": 0.14, - "learning_rate": 4.9721504400055876e-05, - "loss": 1.9844, - "step": 1320 - }, - { - "epoch": 0.14, - "learning_rate": 4.971277413046515e-05, - "loss": 1.888, - "step": 1330 - }, - { - "epoch": 0.14, - "learning_rate": 4.970404386087443e-05, - "loss": 2.1434, - "step": 1340 - }, - { - "epoch": 0.14, - "learning_rate": 4.9695313591283696e-05, - "loss": 1.7986, - "step": 1350 - }, - { - "epoch": 0.14, - "learning_rate": 4.9686583321692976e-05, - "loss": 2.1599, - "step": 1360 - }, - { - "epoch": 0.14, - "learning_rate": 4.967785305210225e-05, - "loss": 2.1983, - "step": 1370 - }, - { - "epoch": 0.14, - "learning_rate": 4.966912278251153e-05, - "loss": 2.0426, - "step": 1380 - }, - { - "epoch": 0.14, - "learning_rate": 4.96603925129208e-05, - "loss": 2.0469, - "step": 1390 - }, - { - "epoch": 0.14, - "learning_rate": 4.9651662243330076e-05, - "loss": 2.1305, - "step": 1400 - }, - { - "epoch": 0.15, - "learning_rate": 4.9642931973739356e-05, - "loss": 1.8912, - "step": 1410 - }, - { - "epoch": 0.15, - "learning_rate": 4.963420170414862e-05, - "loss": 2.0398, - "step": 1420 - }, - { - "epoch": 0.15, - "learning_rate": 4.96254714345579e-05, - "loss": 1.9241, - "step": 1430 - }, - { - "epoch": 0.15, - "learning_rate": 4.9616741164967176e-05, - "loss": 2.0574, - "step": 1440 - }, - { - "epoch": 0.15, - "learning_rate": 4.960801089537645e-05, - "loss": 1.9888, - "step": 1450 - }, - { - "epoch": 0.15, - "learning_rate": 4.959928062578573e-05, - "loss": 2.0308, - "step": 1460 - }, - { - "epoch": 0.15, - "learning_rate": 4.9590550356195e-05, - "loss": 2.0775, - "step": 1470 - }, - { - "epoch": 0.15, - "learning_rate": 4.9581820086604275e-05, - "loss": 2.1822, - "step": 1480 - }, - { - "epoch": 0.15, - "learning_rate": 4.957308981701355e-05, - "loss": 2.1338, - "step": 1490 - }, - { - "epoch": 0.15, - "learning_rate": 4.956435954742283e-05, - "loss": 1.9209, - "step": 1500 - }, - { - "epoch": 0.16, - "learning_rate": 4.95556292778321e-05, - "loss": 2.1183, - "step": 1510 - }, - { - "epoch": 0.16, - "learning_rate": 4.9546899008241375e-05, - "loss": 2.1071, - "step": 1520 - }, - { - "epoch": 0.16, - "learning_rate": 4.9538168738650655e-05, - "loss": 1.9681, - "step": 1530 - }, - { - "epoch": 0.16, - "learning_rate": 4.952943846905992e-05, - "loss": 1.9383, - "step": 1540 - }, - { - "epoch": 0.16, - "learning_rate": 4.95207081994692e-05, - "loss": 2.0337, - "step": 1550 - }, - { - "epoch": 0.16, - "learning_rate": 4.9511977929878475e-05, - "loss": 1.994, - "step": 1560 - }, - { - "epoch": 0.16, - "learning_rate": 4.950324766028775e-05, - "loss": 1.8505, - "step": 1570 - }, - { - "epoch": 0.16, - "learning_rate": 4.949451739069703e-05, - "loss": 2.041, - "step": 1580 - }, - { - "epoch": 0.16, - "learning_rate": 4.94857871211063e-05, - "loss": 1.9524, - "step": 1590 - }, - { - "epoch": 0.16, - "learning_rate": 4.947705685151558e-05, - "loss": 1.9885, - "step": 1600 - }, - { - "epoch": 0.17, - "learning_rate": 4.946832658192485e-05, - "loss": 1.8919, - "step": 1610 - }, - { - "epoch": 0.17, - "learning_rate": 4.945959631233413e-05, - "loss": 1.9439, - "step": 1620 - }, - { - "epoch": 0.17, - "learning_rate": 4.94508660427434e-05, - "loss": 1.9724, - "step": 1630 - }, - { - "epoch": 0.17, - "learning_rate": 4.9442135773152675e-05, - "loss": 2.0511, - "step": 1640 - }, - { - "epoch": 0.17, - "learning_rate": 4.9433405503561955e-05, - "loss": 2.0607, - "step": 1650 - }, - { - "epoch": 0.17, - "learning_rate": 4.942467523397123e-05, - "loss": 1.9982, - "step": 1660 - }, - { - "epoch": 0.17, - "learning_rate": 4.94159449643805e-05, - "loss": 2.1207, - "step": 1670 - }, - { - "epoch": 0.17, - "learning_rate": 4.9407214694789775e-05, - "loss": 1.9295, - "step": 1680 - }, - { - "epoch": 0.17, - "learning_rate": 4.9398484425199055e-05, - "loss": 1.9922, - "step": 1690 - }, - { - "epoch": 0.18, - "learning_rate": 4.938975415560833e-05, - "loss": 2.0557, - "step": 1700 - }, - { - "epoch": 0.18, - "learning_rate": 4.93810238860176e-05, - "loss": 2.1227, - "step": 1710 - }, - { - "epoch": 0.18, - "learning_rate": 4.937229361642688e-05, - "loss": 1.9687, - "step": 1720 - }, - { - "epoch": 0.18, - "learning_rate": 4.936356334683615e-05, - "loss": 2.0497, - "step": 1730 - }, - { - "epoch": 0.18, - "learning_rate": 4.935483307724543e-05, - "loss": 1.9009, - "step": 1740 - }, - { - "epoch": 0.18, - "learning_rate": 4.93461028076547e-05, - "loss": 1.9465, - "step": 1750 - }, - { - "epoch": 0.18, - "learning_rate": 4.9337372538063974e-05, - "loss": 1.9086, - "step": 1760 - }, - { - "epoch": 0.18, - "learning_rate": 4.9328642268473254e-05, - "loss": 2.1419, - "step": 1770 - }, - { - "epoch": 0.18, - "learning_rate": 4.931991199888253e-05, - "loss": 1.9966, - "step": 1780 - }, - { - "epoch": 0.18, - "learning_rate": 4.931118172929181e-05, - "loss": 2.077, - "step": 1790 - }, - { - "epoch": 0.19, - "learning_rate": 4.9302451459701074e-05, - "loss": 1.8679, - "step": 1800 - }, - { - "epoch": 0.19, - "learning_rate": 4.9293721190110354e-05, - "loss": 2.1705, - "step": 1810 - }, - { - "epoch": 0.19, - "learning_rate": 4.928499092051963e-05, - "loss": 1.9652, - "step": 1820 - }, - { - "epoch": 0.19, - "learning_rate": 4.92762606509289e-05, - "loss": 1.8823, - "step": 1830 - }, - { - "epoch": 0.19, - "learning_rate": 4.926753038133818e-05, - "loss": 1.982, - "step": 1840 - }, - { - "epoch": 0.19, - "learning_rate": 4.9258800111747454e-05, - "loss": 2.0703, - "step": 1850 - }, - { - "epoch": 0.19, - "learning_rate": 4.925006984215673e-05, - "loss": 1.8744, - "step": 1860 - }, - { - "epoch": 0.19, - "learning_rate": 4.9241339572566e-05, - "loss": 1.9123, - "step": 1870 - }, - { - "epoch": 0.19, - "learning_rate": 4.923260930297528e-05, - "loss": 1.9953, - "step": 1880 - }, - { - "epoch": 0.19, - "learning_rate": 4.9223879033384554e-05, - "loss": 1.9946, - "step": 1890 - }, - { - "epoch": 0.2, - "learning_rate": 4.921514876379383e-05, - "loss": 2.0267, - "step": 1900 - }, - { - "epoch": 0.2, - "learning_rate": 4.920641849420311e-05, - "loss": 2.0987, - "step": 1910 - }, - { - "epoch": 0.2, - "learning_rate": 4.9197688224612374e-05, - "loss": 1.9375, - "step": 1920 - }, - { - "epoch": 0.2, - "learning_rate": 4.9188957955021654e-05, - "loss": 2.0028, - "step": 1930 - }, - { - "epoch": 0.2, - "learning_rate": 4.918022768543093e-05, - "loss": 1.9827, - "step": 1940 - }, - { - "epoch": 0.2, - "learning_rate": 4.91714974158402e-05, - "loss": 1.8602, - "step": 1950 - }, - { - "epoch": 0.2, - "learning_rate": 4.916276714624948e-05, - "loss": 1.8787, - "step": 1960 - }, - { - "epoch": 0.2, - "learning_rate": 4.9154036876658754e-05, - "loss": 1.9983, - "step": 1970 - }, - { - "epoch": 0.2, - "learning_rate": 4.9145306607068034e-05, - "loss": 2.0019, - "step": 1980 - }, - { - "epoch": 0.2, - "learning_rate": 4.91365763374773e-05, - "loss": 1.9988, - "step": 1990 - }, - { - "epoch": 0.21, - "learning_rate": 4.912784606788658e-05, - "loss": 1.9989, - "step": 2000 - }, - { - "epoch": 0.21, - "learning_rate": 4.9119115798295854e-05, - "loss": 1.9736, - "step": 2010 - }, - { - "epoch": 0.21, - "learning_rate": 4.911038552870513e-05, - "loss": 2.0263, - "step": 2020 - }, - { - "epoch": 0.21, - "learning_rate": 4.910165525911441e-05, - "loss": 1.956, - "step": 2030 - }, - { - "epoch": 0.21, - "learning_rate": 4.909292498952368e-05, - "loss": 1.8579, - "step": 2040 - }, - { - "epoch": 0.21, - "learning_rate": 4.9084194719932954e-05, - "loss": 1.9411, - "step": 2050 - }, - { - "epoch": 0.21, - "learning_rate": 4.907546445034223e-05, - "loss": 1.96, - "step": 2060 - }, - { - "epoch": 0.21, - "learning_rate": 4.9067607207710575e-05, - "loss": 2.0006, - "step": 2070 - }, - { - "epoch": 0.21, - "learning_rate": 4.9058876938119855e-05, - "loss": 1.8304, - "step": 2080 - }, - { - "epoch": 0.22, - "learning_rate": 4.905014666852913e-05, - "loss": 1.9305, - "step": 2090 - }, - { - "epoch": 0.22, - "learning_rate": 4.90414163989384e-05, - "loss": 1.9985, - "step": 2100 - }, - { - "epoch": 0.22, - "learning_rate": 4.9032686129347675e-05, - "loss": 2.0149, - "step": 2110 - }, - { - "epoch": 0.22, - "learning_rate": 4.9023955859756955e-05, - "loss": 2.0072, - "step": 2120 - }, - { - "epoch": 0.22, - "learning_rate": 4.901522559016623e-05, - "loss": 2.0938, - "step": 2130 - }, - { - "epoch": 0.22, - "learning_rate": 4.90064953205755e-05, - "loss": 1.9227, - "step": 2140 - }, - { - "epoch": 0.22, - "learning_rate": 4.899776505098478e-05, - "loss": 1.9047, - "step": 2150 - }, - { - "epoch": 0.22, - "learning_rate": 4.898903478139405e-05, - "loss": 2.0212, - "step": 2160 - }, - { - "epoch": 0.22, - "learning_rate": 4.898030451180333e-05, - "loss": 2.0709, - "step": 2170 - }, - { - "epoch": 0.22, - "learning_rate": 4.89715742422126e-05, - "loss": 2.0179, - "step": 2180 - }, - { - "epoch": 0.23, - "learning_rate": 4.8962843972621874e-05, - "loss": 2.0354, - "step": 2190 - }, - { - "epoch": 0.23, - "learning_rate": 4.8954113703031154e-05, - "loss": 1.9762, - "step": 2200 - }, - { - "epoch": 0.23, - "learning_rate": 4.894538343344043e-05, - "loss": 1.9248, - "step": 2210 - }, - { - "epoch": 0.23, - "learning_rate": 4.89366531638497e-05, - "loss": 1.9126, - "step": 2220 - }, - { - "epoch": 0.23, - "learning_rate": 4.8927922894258974e-05, - "loss": 1.9365, - "step": 2230 - }, - { - "epoch": 0.23, - "learning_rate": 4.8919192624668254e-05, - "loss": 2.0461, - "step": 2240 - }, - { - "epoch": 0.23, - "learning_rate": 4.891046235507753e-05, - "loss": 2.0511, - "step": 2250 - }, - { - "epoch": 0.23, - "learning_rate": 4.89017320854868e-05, - "loss": 1.8505, - "step": 2260 - }, - { - "epoch": 0.23, - "learning_rate": 4.889300181589608e-05, - "loss": 1.9558, - "step": 2270 - }, - { - "epoch": 0.23, - "learning_rate": 4.8884271546305354e-05, - "loss": 1.9439, - "step": 2280 - }, - { - "epoch": 0.24, - "learning_rate": 4.887554127671463e-05, - "loss": 2.008, - "step": 2290 - }, - { - "epoch": 0.24, - "learning_rate": 4.88668110071239e-05, - "loss": 1.8518, - "step": 2300 - }, - { - "epoch": 0.24, - "learning_rate": 4.885808073753318e-05, - "loss": 2.0675, - "step": 2310 - }, - { - "epoch": 0.24, - "learning_rate": 4.8849350467942454e-05, - "loss": 1.9116, - "step": 2320 - }, - { - "epoch": 0.24, - "learning_rate": 4.884062019835173e-05, - "loss": 1.8394, - "step": 2330 - }, - { - "epoch": 0.24, - "learning_rate": 4.883188992876101e-05, - "loss": 1.9337, - "step": 2340 - }, - { - "epoch": 0.24, - "learning_rate": 4.8823159659170274e-05, - "loss": 1.9895, - "step": 2350 - }, - { - "epoch": 0.24, - "learning_rate": 4.8814429389579554e-05, - "loss": 2.1444, - "step": 2360 - }, - { - "epoch": 0.24, - "learning_rate": 4.880569911998883e-05, - "loss": 1.9105, - "step": 2370 - }, - { - "epoch": 0.25, - "learning_rate": 4.87969688503981e-05, - "loss": 2.0462, - "step": 2380 - }, - { - "epoch": 0.25, - "learning_rate": 4.878823858080738e-05, - "loss": 2.0439, - "step": 2390 - }, - { - "epoch": 0.25, - "learning_rate": 4.8779508311216654e-05, - "loss": 1.9062, - "step": 2400 - }, - { - "epoch": 0.25, - "learning_rate": 4.877077804162593e-05, - "loss": 1.9375, - "step": 2410 - }, - { - "epoch": 0.25, - "learning_rate": 4.87620477720352e-05, - "loss": 1.9148, - "step": 2420 - }, - { - "epoch": 0.25, - "learning_rate": 4.875331750244448e-05, - "loss": 2.1124, - "step": 2430 - }, - { - "epoch": 0.25, - "learning_rate": 4.8744587232853754e-05, - "loss": 2.1599, - "step": 2440 - }, - { - "epoch": 0.25, - "learning_rate": 4.873585696326303e-05, - "loss": 1.7231, - "step": 2450 - }, - { - "epoch": 0.25, - "learning_rate": 4.872712669367231e-05, - "loss": 1.904, - "step": 2460 - }, - { - "epoch": 0.25, - "learning_rate": 4.871839642408158e-05, - "loss": 2.0292, - "step": 2470 - }, - { - "epoch": 0.26, - "learning_rate": 4.8709666154490853e-05, - "loss": 2.1222, - "step": 2480 - }, - { - "epoch": 0.26, - "learning_rate": 4.870093588490013e-05, - "loss": 2.0809, - "step": 2490 - }, - { - "epoch": 0.26, - "learning_rate": 4.869220561530941e-05, - "loss": 1.8857, - "step": 2500 - }, - { - "epoch": 0.26, - "learning_rate": 4.868347534571867e-05, - "loss": 1.9735, - "step": 2510 - }, - { - "epoch": 0.26, - "learning_rate": 4.867474507612795e-05, - "loss": 2.0159, - "step": 2520 - }, - { - "epoch": 0.26, - "learning_rate": 4.866601480653723e-05, - "loss": 2.0342, - "step": 2530 - }, - { - "epoch": 0.26, - "learning_rate": 4.86572845369465e-05, - "loss": 1.9589, - "step": 2540 - }, - { - "epoch": 0.26, - "learning_rate": 4.864855426735578e-05, - "loss": 1.9393, - "step": 2550 - }, - { - "epoch": 0.26, - "learning_rate": 4.863982399776505e-05, - "loss": 1.8396, - "step": 2560 - }, - { - "epoch": 0.26, - "learning_rate": 4.8631093728174326e-05, - "loss": 1.8402, - "step": 2570 - }, - { - "epoch": 0.27, - "learning_rate": 4.86223634585836e-05, - "loss": 1.9787, - "step": 2580 - }, - { - "epoch": 0.27, - "learning_rate": 4.861363318899288e-05, - "loss": 1.9899, - "step": 2590 - }, - { - "epoch": 0.27, - "learning_rate": 4.860490291940215e-05, - "loss": 1.9194, - "step": 2600 - }, - { - "epoch": 0.27, - "learning_rate": 4.8596172649811426e-05, - "loss": 1.9271, - "step": 2610 - }, - { - "epoch": 0.27, - "learning_rate": 4.8587442380220706e-05, - "loss": 2.0848, - "step": 2620 - }, - { - "epoch": 0.27, - "learning_rate": 4.857871211062997e-05, - "loss": 2.0946, - "step": 2630 - }, - { - "epoch": 0.27, - "learning_rate": 4.856998184103925e-05, - "loss": 2.1027, - "step": 2640 - }, - { - "epoch": 0.27, - "learning_rate": 4.8561251571448526e-05, - "loss": 2.091, - "step": 2650 - }, - { - "epoch": 0.27, - "learning_rate": 4.8552521301857806e-05, - "loss": 2.1318, - "step": 2660 - }, - { - "epoch": 0.27, - "learning_rate": 4.854379103226708e-05, - "loss": 2.0221, - "step": 2670 - }, - { - "epoch": 0.28, - "learning_rate": 4.853506076267635e-05, - "loss": 2.1148, - "step": 2680 - }, - { - "epoch": 0.28, - "learning_rate": 4.852633049308563e-05, - "loss": 1.7976, - "step": 2690 - }, - { - "epoch": 0.28, - "learning_rate": 4.85176002234949e-05, - "loss": 1.9151, - "step": 2700 - }, - { - "epoch": 0.28, - "learning_rate": 4.850886995390418e-05, - "loss": 1.8436, - "step": 2710 - }, - { - "epoch": 0.28, - "learning_rate": 4.850013968431345e-05, - "loss": 2.0596, - "step": 2720 - }, - { - "epoch": 0.28, - "learning_rate": 4.8491409414722726e-05, - "loss": 2.0695, - "step": 2730 - }, - { - "epoch": 0.28, - "learning_rate": 4.8482679145132006e-05, - "loss": 1.9452, - "step": 2740 - }, - { - "epoch": 0.28, - "learning_rate": 4.847394887554128e-05, - "loss": 2.0062, - "step": 2750 - }, - { - "epoch": 0.28, - "learning_rate": 4.846521860595055e-05, - "loss": 1.9714, - "step": 2760 - }, - { - "epoch": 0.29, - "learning_rate": 4.8456488336359826e-05, - "loss": 1.9842, - "step": 2770 - }, - { - "epoch": 0.29, - "learning_rate": 4.8447758066769106e-05, - "loss": 1.8406, - "step": 2780 - }, - { - "epoch": 0.29, - "learning_rate": 4.843902779717838e-05, - "loss": 2.0617, - "step": 2790 - }, - { - "epoch": 0.29, - "learning_rate": 4.843029752758765e-05, - "loss": 1.9513, - "step": 2800 - }, - { - "epoch": 0.29, - "learning_rate": 4.842156725799693e-05, - "loss": 2.1505, - "step": 2810 - }, - { - "epoch": 0.29, - "learning_rate": 4.84128369884062e-05, - "loss": 2.0696, - "step": 2820 - }, - { - "epoch": 0.29, - "learning_rate": 4.840410671881548e-05, - "loss": 2.006, - "step": 2830 - }, - { - "epoch": 0.29, - "learning_rate": 4.839537644922475e-05, - "loss": 1.8353, - "step": 2840 - }, - { - "epoch": 0.29, - "learning_rate": 4.838664617963403e-05, - "loss": 2.0312, - "step": 2850 - }, - { - "epoch": 0.29, - "learning_rate": 4.8377915910043305e-05, - "loss": 1.9733, - "step": 2860 - }, - { - "epoch": 0.3, - "learning_rate": 4.836918564045258e-05, - "loss": 1.931, - "step": 2870 - }, - { - "epoch": 0.3, - "learning_rate": 4.836045537086186e-05, - "loss": 1.9602, - "step": 2880 - }, - { - "epoch": 0.3, - "learning_rate": 4.8351725101271125e-05, - "loss": 2.1504, - "step": 2890 - }, - { - "epoch": 0.3, - "learning_rate": 4.8342994831680405e-05, - "loss": 2.0694, - "step": 2900 - }, - { - "epoch": 0.3, - "learning_rate": 4.833426456208968e-05, - "loss": 1.9285, - "step": 2910 - }, - { - "epoch": 0.3, - "learning_rate": 4.832553429249895e-05, - "loss": 2.1324, - "step": 2920 - }, - { - "epoch": 0.3, - "learning_rate": 4.831680402290823e-05, - "loss": 1.9514, - "step": 2930 - }, - { - "epoch": 0.3, - "learning_rate": 4.8308073753317505e-05, - "loss": 1.8926, - "step": 2940 - }, - { - "epoch": 0.3, - "learning_rate": 4.829934348372678e-05, - "loss": 1.7547, - "step": 2950 - }, - { - "epoch": 0.3, - "learning_rate": 4.829061321413605e-05, - "loss": 2.0432, - "step": 2960 - }, - { - "epoch": 0.31, - "learning_rate": 4.828188294454533e-05, - "loss": 2.1317, - "step": 2970 - }, - { - "epoch": 0.31, - "learning_rate": 4.8273152674954605e-05, - "loss": 1.9686, - "step": 2980 - }, - { - "epoch": 0.31, - "learning_rate": 4.826442240536388e-05, - "loss": 1.894, - "step": 2990 - }, - { - "epoch": 0.31, - "learning_rate": 4.825569213577316e-05, - "loss": 2.0452, - "step": 3000 - }, - { - "epoch": 0.31, - "learning_rate": 4.8246961866182425e-05, - "loss": 1.9332, - "step": 3010 - }, - { - "epoch": 0.31, - "learning_rate": 4.8238231596591705e-05, - "loss": 1.837, - "step": 3020 - }, - { - "epoch": 0.31, - "learning_rate": 4.822950132700098e-05, - "loss": 1.8098, - "step": 3030 - }, - { - "epoch": 0.31, - "learning_rate": 4.822077105741026e-05, - "loss": 1.9209, - "step": 3040 - }, - { - "epoch": 0.31, - "learning_rate": 4.821204078781953e-05, - "loss": 2.0261, - "step": 3050 - }, - { - "epoch": 0.32, - "learning_rate": 4.8203310518228805e-05, - "loss": 1.9942, - "step": 3060 - }, - { - "epoch": 0.32, - "learning_rate": 4.8194580248638085e-05, - "loss": 1.9699, - "step": 3070 - }, - { - "epoch": 0.32, - "learning_rate": 4.818584997904735e-05, - "loss": 1.9254, - "step": 3080 - }, - { - "epoch": 0.32, - "learning_rate": 4.817711970945663e-05, - "loss": 1.9911, - "step": 3090 - }, - { - "epoch": 0.32, - "learning_rate": 4.8168389439865905e-05, - "loss": 2.0407, - "step": 3100 - }, - { - "epoch": 0.32, - "learning_rate": 4.815965917027518e-05, - "loss": 1.9203, - "step": 3110 - }, - { - "epoch": 0.32, - "learning_rate": 4.815092890068446e-05, - "loss": 1.8973, - "step": 3120 - }, - { - "epoch": 0.32, - "learning_rate": 4.814219863109373e-05, - "loss": 1.8513, - "step": 3130 - }, - { - "epoch": 0.32, - "learning_rate": 4.8133468361503005e-05, - "loss": 1.901, - "step": 3140 - }, - { - "epoch": 0.32, - "learning_rate": 4.812473809191228e-05, - "loss": 1.9327, - "step": 3150 - }, - { - "epoch": 0.33, - "learning_rate": 4.811600782232156e-05, - "loss": 1.8996, - "step": 3160 - }, - { - "epoch": 0.33, - "learning_rate": 4.810727755273083e-05, - "loss": 1.9939, - "step": 3170 - }, - { - "epoch": 0.33, - "learning_rate": 4.8098547283140104e-05, - "loss": 1.8483, - "step": 3180 - }, - { - "epoch": 0.33, - "learning_rate": 4.8089817013549384e-05, - "loss": 2.0592, - "step": 3190 - }, - { - "epoch": 0.33, - "learning_rate": 4.808108674395865e-05, - "loss": 2.0302, - "step": 3200 - }, - { - "epoch": 0.33, - "learning_rate": 4.807235647436793e-05, - "loss": 1.9272, - "step": 3210 - }, - { - "epoch": 0.33, - "learning_rate": 4.8063626204777204e-05, - "loss": 2.0519, - "step": 3220 - }, - { - "epoch": 0.33, - "learning_rate": 4.8054895935186484e-05, - "loss": 1.9114, - "step": 3230 - }, - { - "epoch": 0.33, - "learning_rate": 4.804616566559576e-05, - "loss": 1.9389, - "step": 3240 - }, - { - "epoch": 0.33, - "learning_rate": 4.803743539600503e-05, - "loss": 1.9797, - "step": 3250 - }, - { - "epoch": 0.34, - "learning_rate": 4.802870512641431e-05, - "loss": 1.9191, - "step": 3260 - }, - { - "epoch": 0.34, - "learning_rate": 4.801997485682358e-05, - "loss": 2.0026, - "step": 3270 - }, - { - "epoch": 0.34, - "learning_rate": 4.801124458723286e-05, - "loss": 1.9302, - "step": 3280 - }, - { - "epoch": 0.34, - "learning_rate": 4.800251431764213e-05, - "loss": 1.8641, - "step": 3290 - }, - { - "epoch": 0.34, - "learning_rate": 4.7993784048051404e-05, - "loss": 1.9597, - "step": 3300 - }, - { - "epoch": 0.34, - "learning_rate": 4.7985053778460684e-05, - "loss": 1.9298, - "step": 3310 - }, - { - "epoch": 0.34, - "learning_rate": 4.797632350886996e-05, - "loss": 2.1863, - "step": 3320 - }, - { - "epoch": 0.34, - "learning_rate": 4.796759323927923e-05, - "loss": 2.1563, - "step": 3330 - }, - { - "epoch": 0.34, - "learning_rate": 4.7958862969688504e-05, - "loss": 2.0512, - "step": 3340 - }, - { - "epoch": 0.34, - "learning_rate": 4.7950132700097784e-05, - "loss": 1.9424, - "step": 3350 - }, - { - "epoch": 0.35, - "learning_rate": 4.794140243050706e-05, - "loss": 1.901, - "step": 3360 - }, - { - "epoch": 0.35, - "learning_rate": 4.793267216091633e-05, - "loss": 1.9661, - "step": 3370 - }, - { - "epoch": 0.35, - "learning_rate": 4.792394189132561e-05, - "loss": 1.904, - "step": 3380 - }, - { - "epoch": 0.35, - "learning_rate": 4.791521162173488e-05, - "loss": 1.9385, - "step": 3390 - }, - { - "epoch": 0.35, - "learning_rate": 4.790648135214416e-05, - "loss": 2.0139, - "step": 3400 - }, - { - "epoch": 0.35, - "learning_rate": 4.789775108255343e-05, - "loss": 1.9967, - "step": 3410 - }, - { - "epoch": 0.35, - "learning_rate": 4.7889020812962704e-05, - "loss": 2.0197, - "step": 3420 - }, - { - "epoch": 0.35, - "learning_rate": 4.7880290543371984e-05, - "loss": 2.0362, - "step": 3430 - }, - { - "epoch": 0.35, - "learning_rate": 4.787156027378126e-05, - "loss": 1.7917, - "step": 3440 - }, - { - "epoch": 0.36, - "learning_rate": 4.786283000419054e-05, - "loss": 1.8769, - "step": 3450 - }, - { - "epoch": 0.36, - "learning_rate": 4.7854099734599803e-05, - "loss": 1.9675, - "step": 3460 - }, - { - "epoch": 0.36, - "learning_rate": 4.7845369465009083e-05, - "loss": 1.9496, - "step": 3470 - }, - { - "epoch": 0.36, - "learning_rate": 4.783663919541836e-05, - "loss": 1.8427, - "step": 3480 - }, - { - "epoch": 0.36, - "learning_rate": 4.782790892582763e-05, - "loss": 2.0347, - "step": 3490 - }, - { - "epoch": 0.36, - "learning_rate": 4.781917865623691e-05, - "loss": 1.9731, - "step": 3500 - }, - { - "epoch": 0.36, - "learning_rate": 4.781044838664618e-05, - "loss": 2.1021, - "step": 3510 - }, - { - "epoch": 0.36, - "learning_rate": 4.7801718117055457e-05, - "loss": 1.8601, - "step": 3520 - }, - { - "epoch": 0.36, - "learning_rate": 4.779298784746473e-05, - "loss": 1.9404, - "step": 3530 - }, - { - "epoch": 0.36, - "learning_rate": 4.778425757787401e-05, - "loss": 2.1529, - "step": 3540 - }, - { - "epoch": 0.37, - "learning_rate": 4.777552730828328e-05, - "loss": 1.828, - "step": 3550 - }, - { - "epoch": 0.37, - "learning_rate": 4.7766797038692556e-05, - "loss": 2.0343, - "step": 3560 - }, - { - "epoch": 0.37, - "learning_rate": 4.7758066769101836e-05, - "loss": 1.9911, - "step": 3570 - }, - { - "epoch": 0.37, - "learning_rate": 4.77493364995111e-05, - "loss": 1.9486, - "step": 3580 - }, - { - "epoch": 0.37, - "learning_rate": 4.774060622992038e-05, - "loss": 2.0261, - "step": 3590 - }, - { - "epoch": 0.37, - "learning_rate": 4.7731875960329656e-05, - "loss": 1.8422, - "step": 3600 - }, - { - "epoch": 0.37, - "learning_rate": 4.772314569073893e-05, - "loss": 1.9237, - "step": 3610 - }, - { - "epoch": 0.37, - "learning_rate": 4.771441542114821e-05, - "loss": 1.833, - "step": 3620 - }, - { - "epoch": 0.37, - "learning_rate": 4.770568515155748e-05, - "loss": 1.9817, - "step": 3630 - }, - { - "epoch": 0.37, - "learning_rate": 4.769695488196676e-05, - "loss": 1.8873, - "step": 3640 - }, - { - "epoch": 0.38, - "learning_rate": 4.768822461237603e-05, - "loss": 1.9343, - "step": 3650 - }, - { - "epoch": 0.38, - "learning_rate": 4.767949434278531e-05, - "loss": 1.9811, - "step": 3660 - }, - { - "epoch": 0.38, - "learning_rate": 4.767076407319458e-05, - "loss": 1.9471, - "step": 3670 - }, - { - "epoch": 0.38, - "learning_rate": 4.7662033803603856e-05, - "loss": 2.0074, - "step": 3680 - }, - { - "epoch": 0.38, - "learning_rate": 4.7653303534013136e-05, - "loss": 1.9113, - "step": 3690 - }, - { - "epoch": 0.38, - "learning_rate": 4.764457326442241e-05, - "loss": 1.9931, - "step": 3700 - }, - { - "epoch": 0.38, - "learning_rate": 4.763584299483168e-05, - "loss": 1.8319, - "step": 3710 - }, - { - "epoch": 0.38, - "learning_rate": 4.7627112725240956e-05, - "loss": 1.9883, - "step": 3720 - }, - { - "epoch": 0.38, - "learning_rate": 4.7618382455650236e-05, - "loss": 2.0284, - "step": 3730 - }, - { - "epoch": 0.39, - "learning_rate": 4.76096521860595e-05, - "loss": 1.8626, - "step": 3740 - }, - { - "epoch": 0.39, - "learning_rate": 4.760092191646878e-05, - "loss": 1.926, - "step": 3750 - }, - { - "epoch": 0.39, - "learning_rate": 4.759219164687806e-05, - "loss": 1.9263, - "step": 3760 - }, - { - "epoch": 0.39, - "learning_rate": 4.758346137728733e-05, - "loss": 1.7766, - "step": 3770 - }, - { - "epoch": 0.39, - "learning_rate": 4.757473110769661e-05, - "loss": 1.8262, - "step": 3780 - }, - { - "epoch": 0.39, - "learning_rate": 4.756600083810588e-05, - "loss": 1.8861, - "step": 3790 - }, - { - "epoch": 0.39, - "learning_rate": 4.7557270568515156e-05, - "loss": 1.7907, - "step": 3800 - }, - { - "epoch": 0.39, - "learning_rate": 4.754854029892443e-05, - "loss": 1.9, - "step": 3810 - }, - { - "epoch": 0.39, - "learning_rate": 4.753981002933371e-05, - "loss": 1.9612, - "step": 3820 - }, - { - "epoch": 0.39, - "learning_rate": 4.753107975974299e-05, - "loss": 1.9975, - "step": 3830 - }, - { - "epoch": 0.4, - "learning_rate": 4.7522349490152255e-05, - "loss": 1.8367, - "step": 3840 - }, - { - "epoch": 0.4, - "learning_rate": 4.7513619220561535e-05, - "loss": 1.9552, - "step": 3850 - }, - { - "epoch": 0.4, - "learning_rate": 4.750488895097081e-05, - "loss": 1.9816, - "step": 3860 - }, - { - "epoch": 0.4, - "learning_rate": 4.749615868138008e-05, - "loss": 1.8382, - "step": 3870 - }, - { - "epoch": 0.4, - "learning_rate": 4.7487428411789355e-05, - "loss": 2.0723, - "step": 3880 - }, - { - "epoch": 0.4, - "learning_rate": 4.7478698142198635e-05, - "loss": 1.8815, - "step": 3890 - }, - { - "epoch": 0.4, - "learning_rate": 4.746996787260791e-05, - "loss": 1.9258, - "step": 3900 - }, - { - "epoch": 0.4, - "learning_rate": 4.746123760301718e-05, - "loss": 1.8927, - "step": 3910 - }, - { - "epoch": 0.4, - "learning_rate": 4.745250733342646e-05, - "loss": 1.8611, - "step": 3920 - }, - { - "epoch": 0.4, - "learning_rate": 4.744377706383573e-05, - "loss": 2.068, - "step": 3930 - }, - { - "epoch": 0.41, - "learning_rate": 4.743504679424501e-05, - "loss": 2.0035, - "step": 3940 - }, - { - "epoch": 0.41, - "learning_rate": 4.742631652465428e-05, - "loss": 1.8908, - "step": 3950 - }, - { - "epoch": 0.41, - "learning_rate": 4.7417586255063555e-05, - "loss": 2.0301, - "step": 3960 - }, - { - "epoch": 0.41, - "learning_rate": 4.7408855985472835e-05, - "loss": 1.9541, - "step": 3970 - }, - { - "epoch": 0.41, - "learning_rate": 4.740012571588211e-05, - "loss": 1.8775, - "step": 3980 - }, - { - "epoch": 0.41, - "learning_rate": 4.739139544629138e-05, - "loss": 1.9272, - "step": 3990 - }, - { - "epoch": 0.41, - "learning_rate": 4.7382665176700655e-05, - "loss": 1.9072, - "step": 4000 - }, - { - "epoch": 0.41, - "learning_rate": 4.7373934907109935e-05, - "loss": 2.0545, - "step": 4010 - }, - { - "epoch": 0.41, - "learning_rate": 4.736520463751921e-05, - "loss": 2.0361, - "step": 4020 - }, - { - "epoch": 0.41, - "learning_rate": 4.735647436792848e-05, - "loss": 2.0615, - "step": 4030 - }, - { - "epoch": 0.42, - "learning_rate": 4.734774409833776e-05, - "loss": 1.9838, - "step": 4040 - }, - { - "epoch": 0.42, - "learning_rate": 4.7339013828747035e-05, - "loss": 1.961, - "step": 4050 - }, - { - "epoch": 0.42, - "learning_rate": 4.733028355915631e-05, - "loss": 2.0249, - "step": 4060 - }, - { - "epoch": 0.42, - "learning_rate": 4.7322426316524656e-05, - "loss": 1.9382, - "step": 4070 - }, - { - "epoch": 0.42, - "learning_rate": 4.731369604693393e-05, - "loss": 1.7809, - "step": 4080 - }, - { - "epoch": 0.42, - "learning_rate": 4.730496577734321e-05, - "loss": 1.8983, - "step": 4090 - }, - { - "epoch": 0.42, - "learning_rate": 4.729623550775248e-05, - "loss": 1.9597, - "step": 4100 - }, - { - "epoch": 0.42, - "learning_rate": 4.7287505238161756e-05, - "loss": 1.9825, - "step": 4110 - }, - { - "epoch": 0.42, - "learning_rate": 4.727877496857103e-05, - "loss": 1.7884, - "step": 4120 - }, - { - "epoch": 0.43, - "learning_rate": 4.727004469898031e-05, - "loss": 1.8879, - "step": 4130 - }, - { - "epoch": 0.43, - "learning_rate": 4.726131442938958e-05, - "loss": 2.029, - "step": 4140 - }, - { - "epoch": 0.43, - "learning_rate": 4.7252584159798856e-05, - "loss": 1.8555, - "step": 4150 - }, - { - "epoch": 0.43, - "learning_rate": 4.7243853890208136e-05, - "loss": 2.1565, - "step": 4160 - }, - { - "epoch": 0.43, - "learning_rate": 4.72351236206174e-05, - "loss": 1.9317, - "step": 4170 - }, - { - "epoch": 0.43, - "learning_rate": 4.722639335102668e-05, - "loss": 1.9698, - "step": 4180 - }, - { - "epoch": 0.43, - "learning_rate": 4.7217663081435956e-05, - "loss": 1.9343, - "step": 4190 - }, - { - "epoch": 0.43, - "learning_rate": 4.720893281184523e-05, - "loss": 1.9083, - "step": 4200 - }, - { - "epoch": 0.43, - "learning_rate": 4.720020254225451e-05, - "loss": 1.9588, - "step": 4210 - }, - { - "epoch": 0.43, - "learning_rate": 4.719147227266378e-05, - "loss": 1.9084, - "step": 4220 - }, - { - "epoch": 0.44, - "learning_rate": 4.7182742003073055e-05, - "loss": 2.0141, - "step": 4230 - }, - { - "epoch": 0.44, - "learning_rate": 4.717401173348233e-05, - "loss": 1.9221, - "step": 4240 - }, - { - "epoch": 0.44, - "learning_rate": 4.716528146389161e-05, - "loss": 1.8738, - "step": 4250 - }, - { - "epoch": 0.44, - "learning_rate": 4.715655119430088e-05, - "loss": 1.9212, - "step": 4260 - }, - { - "epoch": 0.44, - "learning_rate": 4.7147820924710155e-05, - "loss": 1.8343, - "step": 4270 - }, - { - "epoch": 0.44, - "learning_rate": 4.7139090655119435e-05, - "loss": 1.9677, - "step": 4280 - }, - { - "epoch": 0.44, - "learning_rate": 4.713036038552871e-05, - "loss": 1.9636, - "step": 4290 - }, - { - "epoch": 0.44, - "learning_rate": 4.712163011593798e-05, - "loss": 2.0357, - "step": 4300 - }, - { - "epoch": 0.44, - "learning_rate": 4.7112899846347255e-05, - "loss": 1.9199, - "step": 4310 - }, - { - "epoch": 0.44, - "learning_rate": 4.7104169576756535e-05, - "loss": 1.9294, - "step": 4320 - }, - { - "epoch": 0.45, - "learning_rate": 4.709543930716581e-05, - "loss": 1.9129, - "step": 4330 - }, - { - "epoch": 0.45, - "learning_rate": 4.708670903757508e-05, - "loss": 2.0412, - "step": 4340 - }, - { - "epoch": 0.45, - "learning_rate": 4.707797876798436e-05, - "loss": 1.8468, - "step": 4350 - }, - { - "epoch": 0.45, - "learning_rate": 4.706924849839363e-05, - "loss": 2.0292, - "step": 4360 - }, - { - "epoch": 0.45, - "learning_rate": 4.706051822880291e-05, - "loss": 1.9925, - "step": 4370 - }, - { - "epoch": 0.45, - "learning_rate": 4.705178795921218e-05, - "loss": 2.1383, - "step": 4380 - }, - { - "epoch": 0.45, - "learning_rate": 4.7043057689621455e-05, - "loss": 1.8481, - "step": 4390 - }, - { - "epoch": 0.45, - "learning_rate": 4.7034327420030735e-05, - "loss": 1.8528, - "step": 4400 - }, - { - "epoch": 0.45, - "learning_rate": 4.702559715044001e-05, - "loss": 2.0173, - "step": 4410 - }, - { - "epoch": 0.46, - "learning_rate": 4.701686688084928e-05, - "loss": 1.7626, - "step": 4420 - }, - { - "epoch": 0.46, - "learning_rate": 4.7008136611258555e-05, - "loss": 1.8769, - "step": 4430 - }, - { - "epoch": 0.46, - "learning_rate": 4.6999406341667835e-05, - "loss": 1.8164, - "step": 4440 - }, - { - "epoch": 0.46, - "learning_rate": 4.699067607207711e-05, - "loss": 1.9763, - "step": 4450 - }, - { - "epoch": 0.46, - "learning_rate": 4.698194580248638e-05, - "loss": 1.7779, - "step": 4460 - }, - { - "epoch": 0.46, - "learning_rate": 4.697321553289566e-05, - "loss": 1.8596, - "step": 4470 - }, - { - "epoch": 0.46, - "learning_rate": 4.696448526330493e-05, - "loss": 1.8938, - "step": 4480 - }, - { - "epoch": 0.46, - "learning_rate": 4.695575499371421e-05, - "loss": 1.912, - "step": 4490 - }, - { - "epoch": 0.46, - "learning_rate": 4.694702472412348e-05, - "loss": 1.9153, - "step": 4500 - }, - { - "epoch": 0.46, - "learning_rate": 4.693829445453276e-05, - "loss": 1.8165, - "step": 4510 - }, - { - "epoch": 0.47, - "learning_rate": 4.6929564184942035e-05, - "loss": 1.8726, - "step": 4520 - }, - { - "epoch": 0.47, - "learning_rate": 4.692083391535131e-05, - "loss": 2.0008, - "step": 4530 - }, - { - "epoch": 0.47, - "learning_rate": 4.691210364576059e-05, - "loss": 1.8834, - "step": 4540 - }, - { - "epoch": 0.47, - "learning_rate": 4.6903373376169854e-05, - "loss": 2.0086, - "step": 4550 - }, - { - "epoch": 0.47, - "learning_rate": 4.6894643106579134e-05, - "loss": 1.9196, - "step": 4560 - }, - { - "epoch": 0.47, - "learning_rate": 4.688591283698841e-05, - "loss": 1.7789, - "step": 4570 - }, - { - "epoch": 0.47, - "learning_rate": 4.687718256739768e-05, - "loss": 1.8876, - "step": 4580 - }, - { - "epoch": 0.47, - "learning_rate": 4.686845229780696e-05, - "loss": 1.8964, - "step": 4590 - }, - { - "epoch": 0.47, - "learning_rate": 4.6859722028216234e-05, - "loss": 1.8953, - "step": 4600 - }, - { - "epoch": 0.47, - "learning_rate": 4.685099175862551e-05, - "loss": 2.1051, - "step": 4610 - }, - { - "epoch": 0.48, - "learning_rate": 4.684226148903478e-05, - "loss": 1.9136, - "step": 4620 - }, - { - "epoch": 0.48, - "learning_rate": 4.683353121944406e-05, - "loss": 1.8372, - "step": 4630 - }, - { - "epoch": 0.48, - "learning_rate": 4.6824800949853334e-05, - "loss": 1.8293, - "step": 4640 - }, - { - "epoch": 0.48, - "learning_rate": 4.681607068026261e-05, - "loss": 1.9932, - "step": 4650 - }, - { - "epoch": 0.48, - "learning_rate": 4.680734041067189e-05, - "loss": 1.9893, - "step": 4660 - }, - { - "epoch": 0.48, - "learning_rate": 4.6798610141081154e-05, - "loss": 1.8914, - "step": 4670 - }, - { - "epoch": 0.48, - "learning_rate": 4.6789879871490434e-05, - "loss": 2.1179, - "step": 4680 - }, - { - "epoch": 0.48, - "learning_rate": 4.678114960189971e-05, - "loss": 1.9395, - "step": 4690 - }, - { - "epoch": 0.48, - "learning_rate": 4.677241933230899e-05, - "loss": 1.8291, - "step": 4700 - }, - { - "epoch": 0.48, - "learning_rate": 4.676368906271826e-05, - "loss": 1.8711, - "step": 4710 - }, - { - "epoch": 0.49, - "learning_rate": 4.6754958793127534e-05, - "loss": 1.9817, - "step": 4720 - }, - { - "epoch": 0.49, - "learning_rate": 4.6746228523536814e-05, - "loss": 1.8458, - "step": 4730 - }, - { - "epoch": 0.49, - "learning_rate": 4.673749825394608e-05, - "loss": 1.8851, - "step": 4740 - }, - { - "epoch": 0.49, - "learning_rate": 4.672876798435536e-05, - "loss": 1.8997, - "step": 4750 - }, - { - "epoch": 0.49, - "learning_rate": 4.6720037714764634e-05, - "loss": 2.0368, - "step": 4760 - }, - { - "epoch": 0.49, - "learning_rate": 4.671130744517391e-05, - "loss": 1.8385, - "step": 4770 - }, - { - "epoch": 0.49, - "learning_rate": 4.670257717558319e-05, - "loss": 1.8494, - "step": 4780 - }, - { - "epoch": 0.49, - "learning_rate": 4.669384690599246e-05, - "loss": 1.9522, - "step": 4790 - }, - { - "epoch": 0.49, - "learning_rate": 4.6685116636401734e-05, - "loss": 1.9617, - "step": 4800 - }, - { - "epoch": 0.5, - "learning_rate": 4.667638636681101e-05, - "loss": 1.8706, - "step": 4810 - }, - { - "epoch": 0.5, - "learning_rate": 4.666765609722029e-05, - "loss": 1.7858, - "step": 4820 - }, - { - "epoch": 0.5, - "learning_rate": 4.665892582762956e-05, - "loss": 1.8736, - "step": 4830 - }, - { - "epoch": 0.5, - "learning_rate": 4.6650195558038833e-05, - "loss": 1.8902, - "step": 4840 - }, - { - "epoch": 0.5, - "learning_rate": 4.6641465288448113e-05, - "loss": 1.8929, - "step": 4850 - }, - { - "epoch": 0.5, - "learning_rate": 4.663273501885738e-05, - "loss": 1.9886, - "step": 4860 - }, - { - "epoch": 0.5, - "learning_rate": 4.662400474926666e-05, - "loss": 1.9129, - "step": 4870 - }, - { - "epoch": 0.5, - "learning_rate": 4.661527447967593e-05, - "loss": 1.8532, - "step": 4880 - }, - { - "epoch": 0.5, - "learning_rate": 4.660654421008521e-05, - "loss": 2.0415, - "step": 4890 - }, - { - "epoch": 0.5, - "learning_rate": 4.659781394049449e-05, - "loss": 1.9084, - "step": 4900 - }, - { - "epoch": 0.51, - "learning_rate": 4.658908367090376e-05, - "loss": 2.0404, - "step": 4910 - }, - { - "epoch": 0.51, - "learning_rate": 4.658035340131304e-05, - "loss": 1.8304, - "step": 4920 - }, - { - "epoch": 0.51, - "learning_rate": 4.6571623131722306e-05, - "loss": 1.9758, - "step": 4930 - }, - { - "epoch": 0.51, - "learning_rate": 4.6562892862131586e-05, - "loss": 1.944, - "step": 4940 - }, - { - "epoch": 0.51, - "learning_rate": 4.655416259254086e-05, - "loss": 2.0301, - "step": 4950 - }, - { - "epoch": 0.51, - "learning_rate": 4.654543232295013e-05, - "loss": 1.9225, - "step": 4960 - }, - { - "epoch": 0.51, - "learning_rate": 4.653670205335941e-05, - "loss": 2.0178, - "step": 4970 - }, - { - "epoch": 0.51, - "learning_rate": 4.6527971783768686e-05, - "loss": 1.84, - "step": 4980 - }, - { - "epoch": 0.51, - "learning_rate": 4.651924151417796e-05, - "loss": 1.8213, - "step": 4990 - }, - { - "epoch": 0.51, - "learning_rate": 4.651051124458723e-05, - "loss": 1.9907, - "step": 5000 - }, - { - "epoch": 0.52, - "learning_rate": 4.650178097499651e-05, - "loss": 2.1089, - "step": 5010 - }, - { - "epoch": 0.52, - "learning_rate": 4.6493050705405786e-05, - "loss": 1.8555, - "step": 5020 - }, - { - "epoch": 0.52, - "learning_rate": 4.648432043581506e-05, - "loss": 1.9852, - "step": 5030 - }, - { - "epoch": 0.52, - "learning_rate": 4.647559016622434e-05, - "loss": 1.7622, - "step": 5040 - }, - { - "epoch": 0.52, - "learning_rate": 4.6466859896633606e-05, - "loss": 1.9574, - "step": 5050 - }, - { - "epoch": 0.52, - "learning_rate": 4.6458129627042886e-05, - "loss": 1.8197, - "step": 5060 - }, - { - "epoch": 0.52, - "learning_rate": 4.644939935745216e-05, - "loss": 1.9539, - "step": 5070 - }, - { - "epoch": 0.52, - "learning_rate": 4.644066908786144e-05, - "loss": 1.8171, - "step": 5080 - }, - { - "epoch": 0.52, - "learning_rate": 4.643193881827071e-05, - "loss": 1.9054, - "step": 5090 - }, - { - "epoch": 0.53, - "learning_rate": 4.6423208548679986e-05, - "loss": 1.964, - "step": 5100 - }, - { - "epoch": 0.53, - "learning_rate": 4.6414478279089266e-05, - "loss": 1.7665, - "step": 5110 - }, - { - "epoch": 0.53, - "learning_rate": 4.640574800949853e-05, - "loss": 1.7917, - "step": 5120 - }, - { - "epoch": 0.53, - "learning_rate": 4.639701773990781e-05, - "loss": 1.9539, - "step": 5130 - }, - { - "epoch": 0.53, - "learning_rate": 4.6388287470317086e-05, - "loss": 2.0556, - "step": 5140 - }, - { - "epoch": 0.53, - "learning_rate": 4.637955720072636e-05, - "loss": 1.8934, - "step": 5150 - }, - { - "epoch": 0.53, - "learning_rate": 4.637082693113564e-05, - "loss": 1.8274, - "step": 5160 - }, - { - "epoch": 0.53, - "learning_rate": 4.636209666154491e-05, - "loss": 1.9965, - "step": 5170 - }, - { - "epoch": 0.53, - "learning_rate": 4.6353366391954186e-05, - "loss": 1.9861, - "step": 5180 - }, - { - "epoch": 0.53, - "learning_rate": 4.634463612236346e-05, - "loss": 1.9275, - "step": 5190 - }, - { - "epoch": 0.54, - "learning_rate": 4.633590585277274e-05, - "loss": 1.8608, - "step": 5200 - }, - { - "epoch": 0.54, - "learning_rate": 4.6327175583182005e-05, - "loss": 2.0487, - "step": 5210 - }, - { - "epoch": 0.54, - "learning_rate": 4.6318445313591286e-05, - "loss": 1.8333, - "step": 5220 - }, - { - "epoch": 0.54, - "learning_rate": 4.6309715044000566e-05, - "loss": 1.9095, - "step": 5230 - }, - { - "epoch": 0.54, - "learning_rate": 4.630098477440983e-05, - "loss": 1.7228, - "step": 5240 - }, - { - "epoch": 0.54, - "learning_rate": 4.629225450481911e-05, - "loss": 1.7879, - "step": 5250 - }, - { - "epoch": 0.54, - "learning_rate": 4.6283524235228385e-05, - "loss": 1.9049, - "step": 5260 - }, - { - "epoch": 0.54, - "learning_rate": 4.627479396563766e-05, - "loss": 2.1479, - "step": 5270 - }, - { - "epoch": 0.54, - "learning_rate": 4.626606369604693e-05, - "loss": 1.8826, - "step": 5280 - }, - { - "epoch": 0.54, - "learning_rate": 4.625733342645621e-05, - "loss": 1.86, - "step": 5290 - }, - { - "epoch": 0.55, - "learning_rate": 4.624860315686549e-05, - "loss": 1.8297, - "step": 5300 - }, - { - "epoch": 0.55, - "learning_rate": 4.623987288727476e-05, - "loss": 1.9474, - "step": 5310 - }, - { - "epoch": 0.55, - "learning_rate": 4.623114261768404e-05, - "loss": 1.9161, - "step": 5320 - }, - { - "epoch": 0.55, - "learning_rate": 4.622241234809331e-05, - "loss": 1.8701, - "step": 5330 - }, - { - "epoch": 0.55, - "learning_rate": 4.6213682078502585e-05, - "loss": 1.9483, - "step": 5340 - }, - { - "epoch": 0.55, - "learning_rate": 4.620495180891186e-05, - "loss": 1.8677, - "step": 5350 - }, - { - "epoch": 0.55, - "learning_rate": 4.619622153932114e-05, - "loss": 1.8863, - "step": 5360 - }, - { - "epoch": 0.55, - "learning_rate": 4.618749126973041e-05, - "loss": 2.0326, - "step": 5370 - }, - { - "epoch": 0.55, - "learning_rate": 4.6178761000139685e-05, - "loss": 1.9934, - "step": 5380 - }, - { - "epoch": 0.55, - "learning_rate": 4.6170030730548965e-05, - "loss": 1.9114, - "step": 5390 - }, - { - "epoch": 0.56, - "learning_rate": 4.616130046095823e-05, - "loss": 1.8575, - "step": 5400 - }, - { - "epoch": 0.56, - "learning_rate": 4.615257019136751e-05, - "loss": 1.881, - "step": 5410 - }, - { - "epoch": 0.56, - "learning_rate": 4.6143839921776785e-05, - "loss": 1.8793, - "step": 5420 - }, - { - "epoch": 0.56, - "learning_rate": 4.613510965218606e-05, - "loss": 1.8581, - "step": 5430 - }, - { - "epoch": 0.56, - "learning_rate": 4.612637938259534e-05, - "loss": 1.921, - "step": 5440 - }, - { - "epoch": 0.56, - "learning_rate": 4.611764911300461e-05, - "loss": 1.924, - "step": 5450 - }, - { - "epoch": 0.56, - "learning_rate": 4.6108918843413885e-05, - "loss": 1.8078, - "step": 5460 - }, - { - "epoch": 0.56, - "learning_rate": 4.610018857382316e-05, - "loss": 1.9087, - "step": 5470 - }, - { - "epoch": 0.56, - "learning_rate": 4.609145830423244e-05, - "loss": 1.9449, - "step": 5480 - }, - { - "epoch": 0.57, - "learning_rate": 4.608272803464171e-05, - "loss": 1.9525, - "step": 5490 - }, - { - "epoch": 0.57, - "learning_rate": 4.6073997765050985e-05, - "loss": 2.0435, - "step": 5500 - }, - { - "epoch": 0.57, - "learning_rate": 4.6065267495460265e-05, - "loss": 1.9521, - "step": 5510 - }, - { - "epoch": 0.57, - "learning_rate": 4.605653722586954e-05, - "loss": 1.8731, - "step": 5520 - }, - { - "epoch": 0.57, - "learning_rate": 4.604780695627881e-05, - "loss": 1.8189, - "step": 5530 - }, - { - "epoch": 0.57, - "learning_rate": 4.6039076686688084e-05, - "loss": 1.9498, - "step": 5540 - }, - { - "epoch": 0.57, - "learning_rate": 4.6030346417097364e-05, - "loss": 1.9245, - "step": 5550 - }, - { - "epoch": 0.57, - "learning_rate": 4.602161614750664e-05, - "loss": 1.8627, - "step": 5560 - }, - { - "epoch": 0.57, - "learning_rate": 4.601288587791591e-05, - "loss": 1.9171, - "step": 5570 - }, - { - "epoch": 0.57, - "learning_rate": 4.600415560832519e-05, - "loss": 1.8403, - "step": 5580 - }, - { - "epoch": 0.58, - "learning_rate": 4.599542533873446e-05, - "loss": 1.8587, - "step": 5590 - }, - { - "epoch": 0.58, - "learning_rate": 4.598669506914374e-05, - "loss": 1.8318, - "step": 5600 - }, - { - "epoch": 0.58, - "learning_rate": 4.597796479955301e-05, - "loss": 1.954, - "step": 5610 - }, - { - "epoch": 0.58, - "learning_rate": 4.5969234529962284e-05, - "loss": 1.9895, - "step": 5620 - }, - { - "epoch": 0.58, - "learning_rate": 4.5960504260371564e-05, - "loss": 2.0052, - "step": 5630 - }, - { - "epoch": 0.58, - "learning_rate": 4.595177399078084e-05, - "loss": 1.8374, - "step": 5640 - }, - { - "epoch": 0.58, - "learning_rate": 4.594304372119011e-05, - "loss": 1.9758, - "step": 5650 - }, - { - "epoch": 0.58, - "learning_rate": 4.5934313451599384e-05, - "loss": 2.0072, - "step": 5660 - }, - { - "epoch": 0.58, - "learning_rate": 4.5925583182008664e-05, - "loss": 1.8884, - "step": 5670 - }, - { - "epoch": 0.58, - "learning_rate": 4.591685291241794e-05, - "loss": 1.84, - "step": 5680 - }, - { - "epoch": 0.59, - "learning_rate": 4.590812264282721e-05, - "loss": 1.9712, - "step": 5690 - }, - { - "epoch": 0.59, - "learning_rate": 4.589939237323649e-05, - "loss": 1.9127, - "step": 5700 - }, - { - "epoch": 0.59, - "learning_rate": 4.5890662103645764e-05, - "loss": 2.0855, - "step": 5710 - }, - { - "epoch": 0.59, - "learning_rate": 4.588193183405504e-05, - "loss": 2.1225, - "step": 5720 - }, - { - "epoch": 0.59, - "learning_rate": 4.587320156446431e-05, - "loss": 1.8943, - "step": 5730 - }, - { - "epoch": 0.59, - "learning_rate": 4.586447129487359e-05, - "loss": 1.6987, - "step": 5740 - }, - { - "epoch": 0.59, - "learning_rate": 4.5855741025282864e-05, - "loss": 1.8871, - "step": 5750 - }, - { - "epoch": 0.59, - "learning_rate": 4.584701075569214e-05, - "loss": 1.8374, - "step": 5760 - }, - { - "epoch": 0.59, - "learning_rate": 4.583828048610142e-05, - "loss": 1.9954, - "step": 5770 - }, - { - "epoch": 0.6, - "learning_rate": 4.5829550216510684e-05, - "loss": 1.8718, - "step": 5780 - }, - { - "epoch": 0.6, - "learning_rate": 4.5820819946919964e-05, - "loss": 1.7798, - "step": 5790 - }, - { - "epoch": 0.6, - "learning_rate": 4.581208967732924e-05, - "loss": 1.8892, - "step": 5800 - }, - { - "epoch": 0.6, - "learning_rate": 4.580335940773851e-05, - "loss": 1.9298, - "step": 5810 - }, - { - "epoch": 0.6, - "learning_rate": 4.579462913814779e-05, - "loss": 1.8787, - "step": 5820 - }, - { - "epoch": 0.6, - "learning_rate": 4.5785898868557063e-05, - "loss": 1.7445, - "step": 5830 - }, - { - "epoch": 0.6, - "learning_rate": 4.577716859896634e-05, - "loss": 1.9633, - "step": 5840 - }, - { - "epoch": 0.6, - "learning_rate": 4.576843832937561e-05, - "loss": 1.8791, - "step": 5850 - }, - { - "epoch": 0.6, - "learning_rate": 4.575970805978489e-05, - "loss": 1.8588, - "step": 5860 - }, - { - "epoch": 0.6, - "learning_rate": 4.575097779019416e-05, - "loss": 2.0058, - "step": 5870 - }, - { - "epoch": 0.61, - "learning_rate": 4.5742247520603437e-05, - "loss": 2.0155, - "step": 5880 - }, - { - "epoch": 0.61, - "learning_rate": 4.573351725101272e-05, - "loss": 1.9881, - "step": 5890 - }, - { - "epoch": 0.61, - "learning_rate": 4.572478698142199e-05, - "loss": 1.975, - "step": 5900 - }, - { - "epoch": 0.61, - "learning_rate": 4.571605671183126e-05, - "loss": 1.9278, - "step": 5910 - }, - { - "epoch": 0.61, - "learning_rate": 4.5707326442240536e-05, - "loss": 1.9845, - "step": 5920 - }, - { - "epoch": 0.61, - "learning_rate": 4.5698596172649817e-05, - "loss": 1.8113, - "step": 5930 - }, - { - "epoch": 0.61, - "learning_rate": 4.568986590305909e-05, - "loss": 1.8022, - "step": 5940 - }, - { - "epoch": 0.61, - "learning_rate": 4.568113563346836e-05, - "loss": 1.9143, - "step": 5950 - }, - { - "epoch": 0.61, - "learning_rate": 4.567240536387764e-05, - "loss": 1.8843, - "step": 5960 - }, - { - "epoch": 0.61, - "learning_rate": 4.566367509428691e-05, - "loss": 1.9027, - "step": 5970 - }, - { - "epoch": 0.62, - "learning_rate": 4.565494482469619e-05, - "loss": 1.8579, - "step": 5980 - }, - { - "epoch": 0.62, - "learning_rate": 4.564621455510546e-05, - "loss": 1.9922, - "step": 5990 - }, - { - "epoch": 0.62, - "learning_rate": 4.5637484285514736e-05, - "loss": 1.8765, - "step": 6000 - }, - { - "epoch": 0.62, - "learning_rate": 4.5628754015924016e-05, - "loss": 1.9406, - "step": 6010 - }, - { - "epoch": 0.62, - "learning_rate": 4.562002374633329e-05, - "loss": 1.7855, - "step": 6020 - }, - { - "epoch": 0.62, - "learning_rate": 4.561129347674256e-05, - "loss": 1.9938, - "step": 6030 - }, - { - "epoch": 0.62, - "learning_rate": 4.5602563207151836e-05, - "loss": 2.0107, - "step": 6040 - }, - { - "epoch": 0.62, - "learning_rate": 4.5593832937561116e-05, - "loss": 1.9262, - "step": 6050 - }, - { - "epoch": 0.62, - "learning_rate": 4.558510266797039e-05, - "loss": 1.8582, - "step": 6060 - }, - { - "epoch": 0.62, - "learning_rate": 4.557637239837966e-05, - "loss": 1.8521, - "step": 6070 - }, - { - "epoch": 0.63, - "learning_rate": 4.556851515574801e-05, - "loss": 2.0212, - "step": 6080 - }, - { - "epoch": 0.63, - "learning_rate": 4.5559784886157284e-05, - "loss": 1.6967, - "step": 6090 - }, - { - "epoch": 0.63, - "learning_rate": 4.5551054616566564e-05, - "loss": 1.8419, - "step": 6100 - }, - { - "epoch": 0.63, - "learning_rate": 4.554232434697584e-05, - "loss": 1.8578, - "step": 6110 - }, - { - "epoch": 0.63, - "learning_rate": 4.553359407738511e-05, - "loss": 1.942, - "step": 6120 - }, - { - "epoch": 0.63, - "learning_rate": 4.552486380779439e-05, - "loss": 1.8642, - "step": 6130 - }, - { - "epoch": 0.63, - "learning_rate": 4.5516133538203664e-05, - "loss": 1.9283, - "step": 6140 - }, - { - "epoch": 0.63, - "learning_rate": 4.550740326861294e-05, - "loss": 1.9561, - "step": 6150 - }, - { - "epoch": 0.63, - "learning_rate": 4.549867299902221e-05, - "loss": 1.9378, - "step": 6160 - }, - { - "epoch": 0.64, - "learning_rate": 4.548994272943149e-05, - "loss": 2.0321, - "step": 6170 - }, - { - "epoch": 0.64, - "learning_rate": 4.5481212459840764e-05, - "loss": 1.9804, - "step": 6180 - }, - { - "epoch": 0.64, - "learning_rate": 4.547248219025004e-05, - "loss": 1.8869, - "step": 6190 - }, - { - "epoch": 0.64, - "learning_rate": 4.546375192065932e-05, - "loss": 1.9017, - "step": 6200 - }, - { - "epoch": 0.64, - "learning_rate": 4.5455021651068583e-05, - "loss": 1.9475, - "step": 6210 - }, - { - "epoch": 0.64, - "learning_rate": 4.5446291381477864e-05, - "loss": 1.8769, - "step": 6220 - }, - { - "epoch": 0.64, - "learning_rate": 4.543756111188714e-05, - "loss": 1.9344, - "step": 6230 - }, - { - "epoch": 0.64, - "learning_rate": 4.542883084229641e-05, - "loss": 1.841, - "step": 6240 - }, - { - "epoch": 0.64, - "learning_rate": 4.542010057270569e-05, - "loss": 2.077, - "step": 6250 - }, - { - "epoch": 0.64, - "learning_rate": 4.541137030311496e-05, - "loss": 2.0144, - "step": 6260 - }, - { - "epoch": 0.65, - "learning_rate": 4.540264003352424e-05, - "loss": 1.8897, - "step": 6270 - }, - { - "epoch": 0.65, - "learning_rate": 4.539390976393351e-05, - "loss": 1.8136, - "step": 6280 - }, - { - "epoch": 0.65, - "learning_rate": 4.538517949434279e-05, - "loss": 1.9073, - "step": 6290 - }, - { - "epoch": 0.65, - "learning_rate": 4.537644922475206e-05, - "loss": 1.9607, - "step": 6300 - }, - { - "epoch": 0.65, - "learning_rate": 4.5367718955161336e-05, - "loss": 1.8875, - "step": 6310 - }, - { - "epoch": 0.65, - "learning_rate": 4.5358988685570617e-05, - "loss": 1.9473, - "step": 6320 - }, - { - "epoch": 0.65, - "learning_rate": 4.535025841597988e-05, - "loss": 1.7896, - "step": 6330 - }, - { - "epoch": 0.65, - "learning_rate": 4.534152814638916e-05, - "loss": 1.8456, - "step": 6340 - }, - { - "epoch": 0.65, - "learning_rate": 4.5332797876798436e-05, - "loss": 1.8739, - "step": 6350 - }, - { - "epoch": 0.65, - "learning_rate": 4.5324067607207716e-05, - "loss": 2.0112, - "step": 6360 - }, - { - "epoch": 0.66, - "learning_rate": 4.531533733761699e-05, - "loss": 1.8687, - "step": 6370 - }, - { - "epoch": 0.66, - "learning_rate": 4.530660706802626e-05, - "loss": 1.7995, - "step": 6380 - }, - { - "epoch": 0.66, - "learning_rate": 4.529787679843554e-05, - "loss": 1.9376, - "step": 6390 - }, - { - "epoch": 0.66, - "learning_rate": 4.528914652884481e-05, - "loss": 1.9731, - "step": 6400 - }, - { - "epoch": 0.66, - "learning_rate": 4.528041625925409e-05, - "loss": 1.9968, - "step": 6410 - }, - { - "epoch": 0.66, - "learning_rate": 4.527168598966336e-05, - "loss": 1.8424, - "step": 6420 - }, - { - "epoch": 0.66, - "learning_rate": 4.5262955720072636e-05, - "loss": 1.8483, - "step": 6430 - }, - { - "epoch": 0.66, - "learning_rate": 4.5254225450481916e-05, - "loss": 1.9378, - "step": 6440 - }, - { - "epoch": 0.66, - "learning_rate": 4.524549518089119e-05, - "loss": 1.9041, - "step": 6450 - }, - { - "epoch": 0.67, - "learning_rate": 4.523676491130046e-05, - "loss": 1.943, - "step": 6460 - }, - { - "epoch": 0.67, - "learning_rate": 4.5228034641709736e-05, - "loss": 1.9822, - "step": 6470 - }, - { - "epoch": 0.67, - "learning_rate": 4.5219304372119016e-05, - "loss": 1.7001, - "step": 6480 - }, - { - "epoch": 0.67, - "learning_rate": 4.521057410252828e-05, - "loss": 1.9421, - "step": 6490 - }, - { - "epoch": 0.67, - "learning_rate": 4.520184383293756e-05, - "loss": 1.9232, - "step": 6500 - }, - { - "epoch": 0.67, - "learning_rate": 4.519311356334684e-05, - "loss": 1.9127, - "step": 6510 - }, - { - "epoch": 0.67, - "learning_rate": 4.518438329375611e-05, - "loss": 1.7644, - "step": 6520 - }, - { - "epoch": 0.67, - "learning_rate": 4.517565302416539e-05, - "loss": 1.8973, - "step": 6530 - }, - { - "epoch": 0.67, - "learning_rate": 4.516692275457466e-05, - "loss": 2.0772, - "step": 6540 - }, - { - "epoch": 0.67, - "learning_rate": 4.515819248498394e-05, - "loss": 1.9666, - "step": 6550 - }, - { - "epoch": 0.68, - "learning_rate": 4.514946221539321e-05, - "loss": 1.8494, - "step": 6560 - }, - { - "epoch": 0.68, - "learning_rate": 4.514073194580249e-05, - "loss": 1.8023, - "step": 6570 - }, - { - "epoch": 0.68, - "learning_rate": 4.513200167621177e-05, - "loss": 1.8861, - "step": 6580 - }, - { - "epoch": 0.68, - "learning_rate": 4.5123271406621036e-05, - "loss": 1.8539, - "step": 6590 - }, - { - "epoch": 0.68, - "learning_rate": 4.5114541137030316e-05, - "loss": 1.86, - "step": 6600 - }, - { - "epoch": 0.68, - "learning_rate": 4.510581086743959e-05, - "loss": 1.9367, - "step": 6610 - }, - { - "epoch": 0.68, - "learning_rate": 4.509708059784886e-05, - "loss": 1.847, - "step": 6620 - }, - { - "epoch": 0.68, - "learning_rate": 4.5088350328258135e-05, - "loss": 1.9183, - "step": 6630 - }, - { - "epoch": 0.68, - "learning_rate": 4.5079620058667415e-05, - "loss": 1.9808, - "step": 6640 - }, - { - "epoch": 0.68, - "learning_rate": 4.507088978907669e-05, - "loss": 1.8325, - "step": 6650 - }, - { - "epoch": 0.69, - "learning_rate": 4.506215951948596e-05, - "loss": 1.9137, - "step": 6660 - }, - { - "epoch": 0.69, - "learning_rate": 4.505342924989524e-05, - "loss": 1.81, - "step": 6670 - }, - { - "epoch": 0.69, - "learning_rate": 4.504469898030451e-05, - "loss": 1.7753, - "step": 6680 - }, - { - "epoch": 0.69, - "learning_rate": 4.503596871071379e-05, - "loss": 1.9381, - "step": 6690 - }, - { - "epoch": 0.69, - "learning_rate": 4.502723844112307e-05, - "loss": 1.9853, - "step": 6700 - }, - { - "epoch": 0.69, - "learning_rate": 4.5018508171532335e-05, - "loss": 1.9685, - "step": 6710 - }, - { - "epoch": 0.69, - "learning_rate": 4.5009777901941615e-05, - "loss": 2.0147, - "step": 6720 - }, - { - "epoch": 0.69, - "learning_rate": 4.500104763235089e-05, - "loss": 1.9566, - "step": 6730 - }, - { - "epoch": 0.69, - "learning_rate": 4.499231736276017e-05, - "loss": 1.9469, - "step": 6740 - }, - { - "epoch": 0.69, - "learning_rate": 4.4983587093169435e-05, - "loss": 1.9955, - "step": 6750 - }, - { - "epoch": 0.7, - "learning_rate": 4.4974856823578715e-05, - "loss": 1.88, - "step": 6760 - }, - { - "epoch": 0.7, - "learning_rate": 4.4966126553987995e-05, - "loss": 1.9952, - "step": 6770 - }, - { - "epoch": 0.7, - "learning_rate": 4.495739628439726e-05, - "loss": 1.8726, - "step": 6780 - }, - { - "epoch": 0.7, - "learning_rate": 4.494866601480654e-05, - "loss": 1.8484, - "step": 6790 - }, - { - "epoch": 0.7, - "learning_rate": 4.4939935745215815e-05, - "loss": 1.985, - "step": 6800 - }, - { - "epoch": 0.7, - "learning_rate": 4.493120547562509e-05, - "loss": 1.8963, - "step": 6810 - }, - { - "epoch": 0.7, - "learning_rate": 4.492247520603436e-05, - "loss": 1.9718, - "step": 6820 - }, - { - "epoch": 0.7, - "learning_rate": 4.491374493644364e-05, - "loss": 1.9229, - "step": 6830 - }, - { - "epoch": 0.7, - "learning_rate": 4.4905014666852915e-05, - "loss": 1.9079, - "step": 6840 - }, - { - "epoch": 0.71, - "learning_rate": 4.489628439726219e-05, - "loss": 1.9877, - "step": 6850 - }, - { - "epoch": 0.71, - "learning_rate": 4.488755412767147e-05, - "loss": 1.9899, - "step": 6860 - }, - { - "epoch": 0.71, - "learning_rate": 4.4878823858080735e-05, - "loss": 1.9247, - "step": 6870 - }, - { - "epoch": 0.71, - "learning_rate": 4.4870093588490015e-05, - "loss": 1.9661, - "step": 6880 - }, - { - "epoch": 0.71, - "learning_rate": 4.486136331889929e-05, - "loss": 1.941, - "step": 6890 - }, - { - "epoch": 0.71, - "learning_rate": 4.485263304930856e-05, - "loss": 1.9007, - "step": 6900 - }, - { - "epoch": 0.71, - "learning_rate": 4.484390277971784e-05, - "loss": 1.9399, - "step": 6910 - }, - { - "epoch": 0.71, - "learning_rate": 4.4835172510127114e-05, - "loss": 1.8359, - "step": 6920 - }, - { - "epoch": 0.71, - "learning_rate": 4.4826442240536394e-05, - "loss": 1.8088, - "step": 6930 - }, - { - "epoch": 0.71, - "learning_rate": 4.481771197094566e-05, - "loss": 2.1038, - "step": 6940 - }, - { - "epoch": 0.72, - "learning_rate": 4.480898170135494e-05, - "loss": 1.8658, - "step": 6950 - }, - { - "epoch": 0.72, - "learning_rate": 4.4800251431764214e-05, - "loss": 1.8927, - "step": 6960 - }, - { - "epoch": 0.72, - "learning_rate": 4.479152116217349e-05, - "loss": 1.8161, - "step": 6970 - }, - { - "epoch": 0.72, - "learning_rate": 4.478279089258277e-05, - "loss": 2.0503, - "step": 6980 - }, - { - "epoch": 0.72, - "learning_rate": 4.477406062299204e-05, - "loss": 1.8365, - "step": 6990 - }, - { - "epoch": 0.72, - "learning_rate": 4.4765330353401314e-05, - "loss": 1.8593, - "step": 7000 - }, - { - "epoch": 0.72, - "learning_rate": 4.475660008381059e-05, - "loss": 1.8986, - "step": 7010 - }, - { - "epoch": 0.72, - "learning_rate": 4.474786981421987e-05, - "loss": 1.8684, - "step": 7020 - }, - { - "epoch": 0.72, - "learning_rate": 4.473913954462914e-05, - "loss": 1.9206, - "step": 7030 - }, - { - "epoch": 0.72, - "learning_rate": 4.4730409275038414e-05, - "loss": 1.9081, - "step": 7040 - }, - { - "epoch": 0.73, - "learning_rate": 4.4721679005447694e-05, - "loss": 1.9756, - "step": 7050 - }, - { - "epoch": 0.73, - "learning_rate": 4.471294873585696e-05, - "loss": 1.8253, - "step": 7060 - }, - { - "epoch": 0.73, - "learning_rate": 4.470421846626624e-05, - "loss": 2.0947, - "step": 7070 - }, - { - "epoch": 0.73, - "learning_rate": 4.4695488196675514e-05, - "loss": 1.7639, - "step": 7080 - }, - { - "epoch": 0.73, - "learning_rate": 4.468675792708479e-05, - "loss": 1.8864, - "step": 7090 - }, - { - "epoch": 0.73, - "learning_rate": 4.467802765749407e-05, - "loss": 2.0049, - "step": 7100 - }, - { - "epoch": 0.73, - "learning_rate": 4.466929738790334e-05, - "loss": 1.8289, - "step": 7110 - }, - { - "epoch": 0.73, - "learning_rate": 4.4660567118312614e-05, - "loss": 1.9287, - "step": 7120 - }, - { - "epoch": 0.73, - "learning_rate": 4.465183684872189e-05, - "loss": 1.8943, - "step": 7130 - }, - { - "epoch": 0.74, - "learning_rate": 4.464310657913117e-05, - "loss": 1.8995, - "step": 7140 - }, - { - "epoch": 0.74, - "learning_rate": 4.463437630954044e-05, - "loss": 1.9107, - "step": 7150 - }, - { - "epoch": 0.74, - "learning_rate": 4.4625646039949714e-05, - "loss": 1.8897, - "step": 7160 - }, - { - "epoch": 0.74, - "learning_rate": 4.4616915770358994e-05, - "loss": 1.9495, - "step": 7170 - }, - { - "epoch": 0.74, - "learning_rate": 4.460818550076827e-05, - "loss": 1.8295, - "step": 7180 - }, - { - "epoch": 0.74, - "learning_rate": 4.459945523117754e-05, - "loss": 2.0628, - "step": 7190 - }, - { - "epoch": 0.74, - "learning_rate": 4.4590724961586813e-05, - "loss": 1.9066, - "step": 7200 - }, - { - "epoch": 0.74, - "learning_rate": 4.4581994691996094e-05, - "loss": 1.7927, - "step": 7210 - }, - { - "epoch": 0.74, - "learning_rate": 4.457326442240537e-05, - "loss": 1.8584, - "step": 7220 - }, - { - "epoch": 0.74, - "learning_rate": 4.456453415281464e-05, - "loss": 1.9142, - "step": 7230 - }, - { - "epoch": 0.75, - "learning_rate": 4.455580388322392e-05, - "loss": 1.9398, - "step": 7240 - }, - { - "epoch": 0.75, - "learning_rate": 4.4547073613633187e-05, - "loss": 1.8544, - "step": 7250 - }, - { - "epoch": 0.75, - "learning_rate": 4.453834334404247e-05, - "loss": 2.0312, - "step": 7260 - }, - { - "epoch": 0.75, - "learning_rate": 4.452961307445174e-05, - "loss": 1.9066, - "step": 7270 - }, - { - "epoch": 0.75, - "learning_rate": 4.452088280486101e-05, - "loss": 1.8878, - "step": 7280 - }, - { - "epoch": 0.75, - "learning_rate": 4.451215253527029e-05, - "loss": 1.9033, - "step": 7290 - }, - { - "epoch": 0.75, - "learning_rate": 4.4503422265679567e-05, - "loss": 1.8675, - "step": 7300 - }, - { - "epoch": 0.75, - "learning_rate": 4.449469199608884e-05, - "loss": 1.9868, - "step": 7310 - }, - { - "epoch": 0.75, - "learning_rate": 4.448596172649811e-05, - "loss": 1.8584, - "step": 7320 - }, - { - "epoch": 0.75, - "learning_rate": 4.447723145690739e-05, - "loss": 1.8565, - "step": 7330 - }, - { - "epoch": 0.76, - "learning_rate": 4.4468501187316666e-05, - "loss": 1.8517, - "step": 7340 - }, - { - "epoch": 0.76, - "learning_rate": 4.445977091772594e-05, - "loss": 2.0381, - "step": 7350 - }, - { - "epoch": 0.76, - "learning_rate": 4.445104064813522e-05, - "loss": 1.8618, - "step": 7360 - }, - { - "epoch": 0.76, - "learning_rate": 4.444231037854449e-05, - "loss": 1.9044, - "step": 7370 - }, - { - "epoch": 0.76, - "learning_rate": 4.4433580108953766e-05, - "loss": 1.9624, - "step": 7380 - }, - { - "epoch": 0.76, - "learning_rate": 4.442484983936304e-05, - "loss": 1.93, - "step": 7390 - }, - { - "epoch": 0.76, - "learning_rate": 4.441611956977232e-05, - "loss": 1.7296, - "step": 7400 - }, - { - "epoch": 0.76, - "learning_rate": 4.440738930018159e-05, - "loss": 1.8708, - "step": 7410 - }, - { - "epoch": 0.76, - "learning_rate": 4.4398659030590866e-05, - "loss": 1.9082, - "step": 7420 - }, - { - "epoch": 0.77, - "learning_rate": 4.4389928761000146e-05, - "loss": 1.9923, - "step": 7430 - }, - { - "epoch": 0.77, - "learning_rate": 4.438119849140941e-05, - "loss": 1.8932, - "step": 7440 - }, - { - "epoch": 0.77, - "learning_rate": 4.437246822181869e-05, - "loss": 1.955, - "step": 7450 - }, - { - "epoch": 0.77, - "learning_rate": 4.4363737952227966e-05, - "loss": 1.8237, - "step": 7460 - }, - { - "epoch": 0.77, - "learning_rate": 4.435500768263724e-05, - "loss": 1.9451, - "step": 7470 - }, - { - "epoch": 0.77, - "learning_rate": 4.434627741304652e-05, - "loss": 1.9676, - "step": 7480 - }, - { - "epoch": 0.77, - "learning_rate": 4.433754714345579e-05, - "loss": 1.9159, - "step": 7490 - }, - { - "epoch": 0.77, - "learning_rate": 4.4328816873865066e-05, - "loss": 1.9371, - "step": 7500 - }, - { - "epoch": 0.77, - "learning_rate": 4.432008660427434e-05, - "loss": 2.0315, - "step": 7510 - }, - { - "epoch": 0.77, - "learning_rate": 4.431135633468362e-05, - "loss": 1.9277, - "step": 7520 - }, - { - "epoch": 0.78, - "learning_rate": 4.430262606509289e-05, - "loss": 1.9546, - "step": 7530 - }, - { - "epoch": 0.78, - "learning_rate": 4.4293895795502166e-05, - "loss": 1.8681, - "step": 7540 - }, - { - "epoch": 0.78, - "learning_rate": 4.4285165525911446e-05, - "loss": 1.9917, - "step": 7550 - }, - { - "epoch": 0.78, - "learning_rate": 4.427643525632072e-05, - "loss": 1.9484, - "step": 7560 - }, - { - "epoch": 0.78, - "learning_rate": 4.426770498672999e-05, - "loss": 1.7766, - "step": 7570 - }, - { - "epoch": 0.78, - "learning_rate": 4.4258974717139266e-05, - "loss": 1.7735, - "step": 7580 - }, - { - "epoch": 0.78, - "learning_rate": 4.4250244447548546e-05, - "loss": 2.0253, - "step": 7590 - }, - { - "epoch": 0.78, - "learning_rate": 4.424151417795782e-05, - "loss": 1.9334, - "step": 7600 - }, - { - "epoch": 0.78, - "learning_rate": 4.423278390836709e-05, - "loss": 1.9026, - "step": 7610 - }, - { - "epoch": 0.78, - "learning_rate": 4.422405363877637e-05, - "loss": 1.8632, - "step": 7620 - }, - { - "epoch": 0.79, - "learning_rate": 4.421532336918564e-05, - "loss": 1.9665, - "step": 7630 - }, - { - "epoch": 0.79, - "learning_rate": 4.420659309959492e-05, - "loss": 1.9057, - "step": 7640 - }, - { - "epoch": 0.79, - "learning_rate": 4.419786283000419e-05, - "loss": 1.8698, - "step": 7650 - }, - { - "epoch": 0.79, - "learning_rate": 4.4189132560413465e-05, - "loss": 1.836, - "step": 7660 - }, - { - "epoch": 0.79, - "learning_rate": 4.4180402290822745e-05, - "loss": 1.9126, - "step": 7670 - }, - { - "epoch": 0.79, - "learning_rate": 4.417167202123202e-05, - "loss": 1.8486, - "step": 7680 - }, - { - "epoch": 0.79, - "learning_rate": 4.416294175164129e-05, - "loss": 1.9936, - "step": 7690 - }, - { - "epoch": 0.79, - "learning_rate": 4.4154211482050565e-05, - "loss": 1.8677, - "step": 7700 - }, - { - "epoch": 0.79, - "learning_rate": 4.4145481212459845e-05, - "loss": 1.9565, - "step": 7710 - }, - { - "epoch": 0.79, - "learning_rate": 4.413675094286912e-05, - "loss": 1.8159, - "step": 7720 - }, - { - "epoch": 0.8, - "learning_rate": 4.412802067327839e-05, - "loss": 1.8449, - "step": 7730 - }, - { - "epoch": 0.8, - "learning_rate": 4.411929040368767e-05, - "loss": 1.8823, - "step": 7740 - }, - { - "epoch": 0.8, - "learning_rate": 4.4110560134096945e-05, - "loss": 1.8765, - "step": 7750 - }, - { - "epoch": 0.8, - "learning_rate": 4.410182986450622e-05, - "loss": 1.9246, - "step": 7760 - }, - { - "epoch": 0.8, - "learning_rate": 4.409309959491549e-05, - "loss": 1.8965, - "step": 7770 - }, - { - "epoch": 0.8, - "learning_rate": 4.408436932532477e-05, - "loss": 2.0334, - "step": 7780 - }, - { - "epoch": 0.8, - "learning_rate": 4.4075639055734045e-05, - "loss": 1.95, - "step": 7790 - }, - { - "epoch": 0.8, - "learning_rate": 4.406690878614332e-05, - "loss": 1.8416, - "step": 7800 - }, - { - "epoch": 0.8, - "learning_rate": 4.40581785165526e-05, - "loss": 2.128, - "step": 7810 - }, - { - "epoch": 0.81, - "learning_rate": 4.4049448246961865e-05, - "loss": 2.0332, - "step": 7820 - }, - { - "epoch": 0.81, - "learning_rate": 4.4040717977371145e-05, - "loss": 1.9482, - "step": 7830 - }, - { - "epoch": 0.81, - "learning_rate": 4.403198770778042e-05, - "loss": 1.907, - "step": 7840 - }, - { - "epoch": 0.81, - "learning_rate": 4.402325743818969e-05, - "loss": 1.9139, - "step": 7850 - }, - { - "epoch": 0.81, - "learning_rate": 4.401452716859897e-05, - "loss": 1.9173, - "step": 7860 - }, - { - "epoch": 0.81, - "learning_rate": 4.4005796899008245e-05, - "loss": 1.8155, - "step": 7870 - }, - { - "epoch": 0.81, - "learning_rate": 4.399793965637659e-05, - "loss": 2.0311, - "step": 7880 - }, - { - "epoch": 0.81, - "learning_rate": 4.3989209386785866e-05, - "loss": 1.8039, - "step": 7890 - }, - { - "epoch": 0.81, - "learning_rate": 4.398047911719514e-05, - "loss": 1.9292, - "step": 7900 - }, - { - "epoch": 0.81, - "learning_rate": 4.397174884760442e-05, - "loss": 1.826, - "step": 7910 - }, - { - "epoch": 0.82, - "learning_rate": 4.396301857801369e-05, - "loss": 1.8845, - "step": 7920 - }, - { - "epoch": 0.82, - "learning_rate": 4.3954288308422966e-05, - "loss": 1.8567, - "step": 7930 - }, - { - "epoch": 0.82, - "learning_rate": 4.394555803883224e-05, - "loss": 1.8737, - "step": 7940 - }, - { - "epoch": 0.82, - "learning_rate": 4.393682776924152e-05, - "loss": 1.9106, - "step": 7950 - }, - { - "epoch": 0.82, - "learning_rate": 4.3928097499650786e-05, - "loss": 2.0107, - "step": 7960 - }, - { - "epoch": 0.82, - "learning_rate": 4.3919367230060066e-05, - "loss": 2.0222, - "step": 7970 - }, - { - "epoch": 0.82, - "learning_rate": 4.3910636960469346e-05, - "loss": 1.8418, - "step": 7980 - }, - { - "epoch": 0.82, - "learning_rate": 4.390190669087862e-05, - "loss": 1.8249, - "step": 7990 - }, - { - "epoch": 0.82, - "learning_rate": 4.389317642128789e-05, - "loss": 1.9456, - "step": 8000 - }, - { - "epoch": 0.82, - "learning_rate": 4.3884446151697165e-05, - "loss": 1.8933, - "step": 8010 - }, - { - "epoch": 0.83, - "learning_rate": 4.3875715882106445e-05, - "loss": 1.9091, - "step": 8020 - }, - { - "epoch": 0.83, - "learning_rate": 4.386698561251571e-05, - "loss": 1.837, - "step": 8030 - }, - { - "epoch": 0.83, - "learning_rate": 4.385825534292499e-05, - "loss": 1.8042, - "step": 8040 - }, - { - "epoch": 0.83, - "learning_rate": 4.384952507333427e-05, - "loss": 1.9415, - "step": 8050 - }, - { - "epoch": 0.83, - "learning_rate": 4.384079480374354e-05, - "loss": 1.8731, - "step": 8060 - }, - { - "epoch": 0.83, - "learning_rate": 4.383206453415282e-05, - "loss": 1.8279, - "step": 8070 - }, - { - "epoch": 0.83, - "learning_rate": 4.382333426456209e-05, - "loss": 1.9256, - "step": 8080 - }, - { - "epoch": 0.83, - "learning_rate": 4.3814603994971365e-05, - "loss": 1.967, - "step": 8090 - }, - { - "epoch": 0.83, - "learning_rate": 4.380587372538064e-05, - "loss": 1.6856, - "step": 8100 - }, - { - "epoch": 0.84, - "learning_rate": 4.379714345578992e-05, - "loss": 1.9908, - "step": 8110 - }, - { - "epoch": 0.84, - "learning_rate": 4.378841318619919e-05, - "loss": 1.9756, - "step": 8120 - }, - { - "epoch": 0.84, - "learning_rate": 4.3779682916608465e-05, - "loss": 1.8669, - "step": 8130 - }, - { - "epoch": 0.84, - "learning_rate": 4.3770952647017745e-05, - "loss": 1.8277, - "step": 8140 - }, - { - "epoch": 0.84, - "learning_rate": 4.376222237742701e-05, - "loss": 1.8518, - "step": 8150 - }, - { - "epoch": 0.84, - "learning_rate": 4.375349210783629e-05, - "loss": 1.9726, - "step": 8160 - }, - { - "epoch": 0.84, - "learning_rate": 4.3744761838245565e-05, - "loss": 1.766, - "step": 8170 - }, - { - "epoch": 0.84, - "learning_rate": 4.373603156865484e-05, - "loss": 1.8517, - "step": 8180 - }, - { - "epoch": 0.84, - "learning_rate": 4.372730129906412e-05, - "loss": 1.8879, - "step": 8190 - }, - { - "epoch": 0.84, - "learning_rate": 4.371857102947339e-05, - "loss": 2.0617, - "step": 8200 - }, - { - "epoch": 0.85, - "learning_rate": 4.370984075988267e-05, - "loss": 1.9032, - "step": 8210 - }, - { - "epoch": 0.85, - "learning_rate": 4.370111049029194e-05, - "loss": 2.014, - "step": 8220 - }, - { - "epoch": 0.85, - "learning_rate": 4.369238022070122e-05, - "loss": 1.9015, - "step": 8230 - }, - { - "epoch": 0.85, - "learning_rate": 4.368364995111049e-05, - "loss": 1.9622, - "step": 8240 - }, - { - "epoch": 0.85, - "learning_rate": 4.3674919681519765e-05, - "loss": 1.8902, - "step": 8250 - }, - { - "epoch": 0.85, - "learning_rate": 4.3666189411929045e-05, - "loss": 1.9431, - "step": 8260 - }, - { - "epoch": 0.85, - "learning_rate": 4.365745914233832e-05, - "loss": 1.9136, - "step": 8270 - }, - { - "epoch": 0.85, - "learning_rate": 4.364872887274759e-05, - "loss": 1.8425, - "step": 8280 - }, - { - "epoch": 0.85, - "learning_rate": 4.3639998603156864e-05, - "loss": 2.0659, - "step": 8290 - }, - { - "epoch": 0.85, - "learning_rate": 4.3631268333566145e-05, - "loss": 1.9682, - "step": 8300 - }, - { - "epoch": 0.86, - "learning_rate": 4.362253806397542e-05, - "loss": 1.7982, - "step": 8310 - }, - { - "epoch": 0.86, - "learning_rate": 4.361380779438469e-05, - "loss": 1.8961, - "step": 8320 - }, - { - "epoch": 0.86, - "learning_rate": 4.360507752479397e-05, - "loss": 2.1465, - "step": 8330 - }, - { - "epoch": 0.86, - "learning_rate": 4.359634725520324e-05, - "loss": 1.8898, - "step": 8340 - }, - { - "epoch": 0.86, - "learning_rate": 4.358761698561252e-05, - "loss": 1.8611, - "step": 8350 - }, - { - "epoch": 0.86, - "learning_rate": 4.357888671602179e-05, - "loss": 1.7984, - "step": 8360 - }, - { - "epoch": 0.86, - "learning_rate": 4.3570156446431064e-05, - "loss": 1.8857, - "step": 8370 - }, - { - "epoch": 0.86, - "learning_rate": 4.3561426176840344e-05, - "loss": 1.9614, - "step": 8380 - }, - { - "epoch": 0.86, - "learning_rate": 4.355269590724962e-05, - "loss": 1.8525, - "step": 8390 - }, - { - "epoch": 0.86, - "learning_rate": 4.35439656376589e-05, - "loss": 1.8673, - "step": 8400 - }, - { - "epoch": 0.87, - "learning_rate": 4.3535235368068164e-05, - "loss": 1.7879, - "step": 8410 - }, - { - "epoch": 0.87, - "learning_rate": 4.3526505098477444e-05, - "loss": 1.8082, - "step": 8420 - }, - { - "epoch": 0.87, - "learning_rate": 4.351777482888672e-05, - "loss": 1.7898, - "step": 8430 - }, - { - "epoch": 0.87, - "learning_rate": 4.350904455929599e-05, - "loss": 1.923, - "step": 8440 - }, - { - "epoch": 0.87, - "learning_rate": 4.350031428970527e-05, - "loss": 1.7423, - "step": 8450 - }, - { - "epoch": 0.87, - "learning_rate": 4.3491584020114544e-05, - "loss": 1.8416, - "step": 8460 - }, - { - "epoch": 0.87, - "learning_rate": 4.348285375052382e-05, - "loss": 1.9893, - "step": 8470 - }, - { - "epoch": 0.87, - "learning_rate": 4.347412348093309e-05, - "loss": 1.8634, - "step": 8480 - }, - { - "epoch": 0.87, - "learning_rate": 4.346539321134237e-05, - "loss": 1.9571, - "step": 8490 - }, - { - "epoch": 0.88, - "learning_rate": 4.3456662941751644e-05, - "loss": 1.901, - "step": 8500 - }, - { - "epoch": 0.88, - "learning_rate": 4.344793267216092e-05, - "loss": 1.8699, - "step": 8510 - }, - { - "epoch": 0.88, - "learning_rate": 4.34392024025702e-05, - "loss": 1.8812, - "step": 8520 - }, - { - "epoch": 0.88, - "learning_rate": 4.3430472132979464e-05, - "loss": 1.9219, - "step": 8530 - }, - { - "epoch": 0.88, - "learning_rate": 4.3421741863388744e-05, - "loss": 1.8457, - "step": 8540 - }, - { - "epoch": 0.88, - "learning_rate": 4.341301159379802e-05, - "loss": 1.9833, - "step": 8550 - }, - { - "epoch": 0.88, - "learning_rate": 4.340428132420729e-05, - "loss": 1.9811, - "step": 8560 - }, - { - "epoch": 0.88, - "learning_rate": 4.339555105461657e-05, - "loss": 1.8513, - "step": 8570 - }, - { - "epoch": 0.88, - "learning_rate": 4.3386820785025844e-05, - "loss": 1.9744, - "step": 8580 - }, - { - "epoch": 0.88, - "learning_rate": 4.3378090515435124e-05, - "loss": 1.84, - "step": 8590 - }, - { - "epoch": 0.89, - "learning_rate": 4.336936024584439e-05, - "loss": 1.5563, - "step": 8600 - }, - { - "epoch": 0.89, - "learning_rate": 4.336062997625367e-05, - "loss": 1.8902, - "step": 8610 - }, - { - "epoch": 0.89, - "learning_rate": 4.335189970666294e-05, - "loss": 1.8737, - "step": 8620 - }, - { - "epoch": 0.89, - "learning_rate": 4.334316943707222e-05, - "loss": 1.9445, - "step": 8630 - }, - { - "epoch": 0.89, - "learning_rate": 4.33344391674815e-05, - "loss": 1.9151, - "step": 8640 - }, - { - "epoch": 0.89, - "learning_rate": 4.332570889789077e-05, - "loss": 1.8131, - "step": 8650 - }, - { - "epoch": 0.89, - "learning_rate": 4.331697862830004e-05, - "loss": 1.8619, - "step": 8660 - }, - { - "epoch": 0.89, - "learning_rate": 4.3308248358709317e-05, - "loss": 1.9442, - "step": 8670 - }, - { - "epoch": 0.89, - "learning_rate": 4.3299518089118597e-05, - "loss": 1.8936, - "step": 8680 - }, - { - "epoch": 0.89, - "learning_rate": 4.329078781952787e-05, - "loss": 1.8226, - "step": 8690 - }, - { - "epoch": 0.9, - "learning_rate": 4.328205754993714e-05, - "loss": 2.0373, - "step": 8700 - }, - { - "epoch": 0.9, - "learning_rate": 4.327332728034642e-05, - "loss": 1.8952, - "step": 8710 - }, - { - "epoch": 0.9, - "learning_rate": 4.326459701075569e-05, - "loss": 1.9068, - "step": 8720 - }, - { - "epoch": 0.9, - "learning_rate": 4.325586674116497e-05, - "loss": 1.8326, - "step": 8730 - }, - { - "epoch": 0.9, - "learning_rate": 4.324713647157424e-05, - "loss": 1.9627, - "step": 8740 - }, - { - "epoch": 0.9, - "learning_rate": 4.3238406201983516e-05, - "loss": 1.919, - "step": 8750 - }, - { - "epoch": 0.9, - "learning_rate": 4.3229675932392796e-05, - "loss": 2.0294, - "step": 8760 - }, - { - "epoch": 0.9, - "learning_rate": 4.322094566280207e-05, - "loss": 1.8436, - "step": 8770 - }, - { - "epoch": 0.9, - "learning_rate": 4.321221539321135e-05, - "loss": 1.8034, - "step": 8780 - }, - { - "epoch": 0.91, - "learning_rate": 4.3203485123620616e-05, - "loss": 1.8162, - "step": 8790 - }, - { - "epoch": 0.91, - "learning_rate": 4.3194754854029896e-05, - "loss": 1.8468, - "step": 8800 - }, - { - "epoch": 0.91, - "learning_rate": 4.318602458443917e-05, - "loss": 1.7474, - "step": 8810 - }, - { - "epoch": 0.91, - "learning_rate": 4.317729431484844e-05, - "loss": 1.741, - "step": 8820 - }, - { - "epoch": 0.91, - "learning_rate": 4.316856404525772e-05, - "loss": 1.9141, - "step": 8830 - }, - { - "epoch": 0.91, - "learning_rate": 4.3159833775666996e-05, - "loss": 1.8607, - "step": 8840 - }, - { - "epoch": 0.91, - "learning_rate": 4.315110350607627e-05, - "loss": 1.8223, - "step": 8850 - }, - { - "epoch": 0.91, - "learning_rate": 4.314237323648554e-05, - "loss": 2.0047, - "step": 8860 - }, - { - "epoch": 0.91, - "learning_rate": 4.313364296689482e-05, - "loss": 1.8995, - "step": 8870 - }, - { - "epoch": 0.91, - "learning_rate": 4.3124912697304096e-05, - "loss": 1.8478, - "step": 8880 - }, - { - "epoch": 0.92, - "learning_rate": 4.311618242771337e-05, - "loss": 2.0053, - "step": 8890 - }, - { - "epoch": 0.92, - "learning_rate": 4.310745215812265e-05, - "loss": 1.7061, - "step": 8900 - }, - { - "epoch": 0.92, - "learning_rate": 4.3098721888531916e-05, - "loss": 1.8929, - "step": 8910 - }, - { - "epoch": 0.92, - "learning_rate": 4.3089991618941196e-05, - "loss": 1.7674, - "step": 8920 - }, - { - "epoch": 0.92, - "learning_rate": 4.308126134935047e-05, - "loss": 1.8123, - "step": 8930 - }, - { - "epoch": 0.92, - "learning_rate": 4.307253107975974e-05, - "loss": 1.8774, - "step": 8940 - }, - { - "epoch": 0.92, - "learning_rate": 4.306380081016902e-05, - "loss": 1.8441, - "step": 8950 - }, - { - "epoch": 0.92, - "learning_rate": 4.3055070540578296e-05, - "loss": 1.9632, - "step": 8960 - }, - { - "epoch": 0.92, - "learning_rate": 4.304634027098757e-05, - "loss": 1.8782, - "step": 8970 - }, - { - "epoch": 0.92, - "learning_rate": 4.303761000139684e-05, - "loss": 1.8817, - "step": 8980 - }, - { - "epoch": 0.93, - "learning_rate": 4.302887973180612e-05, - "loss": 2.05, - "step": 8990 - }, - { - "epoch": 0.93, - "learning_rate": 4.3020149462215395e-05, - "loss": 1.8287, - "step": 9000 - }, - { - "epoch": 0.93, - "learning_rate": 4.301141919262467e-05, - "loss": 2.0142, - "step": 9010 - }, - { - "epoch": 0.93, - "learning_rate": 4.300268892303395e-05, - "loss": 2.0055, - "step": 9020 - }, - { - "epoch": 0.93, - "learning_rate": 4.299395865344322e-05, - "loss": 1.6785, - "step": 9030 - }, - { - "epoch": 0.93, - "learning_rate": 4.2985228383852495e-05, - "loss": 1.8379, - "step": 9040 - }, - { - "epoch": 0.93, - "learning_rate": 4.297649811426177e-05, - "loss": 1.8608, - "step": 9050 - }, - { - "epoch": 0.93, - "learning_rate": 4.296776784467105e-05, - "loss": 1.887, - "step": 9060 - }, - { - "epoch": 0.93, - "learning_rate": 4.295903757508032e-05, - "loss": 1.867, - "step": 9070 - }, - { - "epoch": 0.93, - "learning_rate": 4.2950307305489595e-05, - "loss": 1.8419, - "step": 9080 - }, - { - "epoch": 0.94, - "learning_rate": 4.2941577035898875e-05, - "loss": 1.9248, - "step": 9090 - }, - { - "epoch": 0.94, - "learning_rate": 4.293284676630814e-05, - "loss": 1.8193, - "step": 9100 - }, - { - "epoch": 0.94, - "learning_rate": 4.292411649671742e-05, - "loss": 1.8485, - "step": 9110 - }, - { - "epoch": 0.94, - "learning_rate": 4.2915386227126695e-05, - "loss": 1.9335, - "step": 9120 - }, - { - "epoch": 0.94, - "learning_rate": 4.290665595753597e-05, - "loss": 1.8016, - "step": 9130 - }, - { - "epoch": 0.94, - "learning_rate": 4.289792568794525e-05, - "loss": 1.8523, - "step": 9140 - }, - { - "epoch": 0.94, - "learning_rate": 4.288919541835452e-05, - "loss": 1.9271, - "step": 9150 - }, - { - "epoch": 0.94, - "learning_rate": 4.2880465148763795e-05, - "loss": 1.8492, - "step": 9160 - }, - { - "epoch": 0.94, - "learning_rate": 4.287173487917307e-05, - "loss": 1.9555, - "step": 9170 - }, - { - "epoch": 0.95, - "learning_rate": 4.286300460958235e-05, - "loss": 1.8619, - "step": 9180 - }, - { - "epoch": 0.95, - "learning_rate": 4.285427433999162e-05, - "loss": 1.8998, - "step": 9190 - }, - { - "epoch": 0.95, - "learning_rate": 4.2845544070400895e-05, - "loss": 1.8354, - "step": 9200 - }, - { - "epoch": 0.95, - "learning_rate": 4.2836813800810175e-05, - "loss": 1.9187, - "step": 9210 - }, - { - "epoch": 0.95, - "learning_rate": 4.282808353121945e-05, - "loss": 1.8662, - "step": 9220 - }, - { - "epoch": 0.95, - "learning_rate": 4.281935326162872e-05, - "loss": 1.9838, - "step": 9230 - }, - { - "epoch": 0.95, - "learning_rate": 4.2810622992037995e-05, - "loss": 1.8133, - "step": 9240 - }, - { - "epoch": 0.95, - "learning_rate": 4.2801892722447275e-05, - "loss": 1.8465, - "step": 9250 - }, - { - "epoch": 0.95, - "learning_rate": 4.279316245285654e-05, - "loss": 1.7488, - "step": 9260 - }, - { - "epoch": 0.95, - "learning_rate": 4.278443218326582e-05, - "loss": 1.7855, - "step": 9270 - }, - { - "epoch": 0.96, - "learning_rate": 4.27757019136751e-05, - "loss": 1.8665, - "step": 9280 - }, - { - "epoch": 0.96, - "learning_rate": 4.276697164408437e-05, - "loss": 1.7769, - "step": 9290 - }, - { - "epoch": 0.96, - "learning_rate": 4.275824137449365e-05, - "loss": 1.996, - "step": 9300 - }, - { - "epoch": 0.96, - "learning_rate": 4.274951110490292e-05, - "loss": 1.8218, - "step": 9310 - }, - { - "epoch": 0.96, - "learning_rate": 4.2740780835312194e-05, - "loss": 2.0463, - "step": 9320 - }, - { - "epoch": 0.96, - "learning_rate": 4.273205056572147e-05, - "loss": 1.874, - "step": 9330 - }, - { - "epoch": 0.96, - "learning_rate": 4.272332029613075e-05, - "loss": 1.9101, - "step": 9340 - }, - { - "epoch": 0.96, - "learning_rate": 4.271459002654002e-05, - "loss": 1.8118, - "step": 9350 - }, - { - "epoch": 0.96, - "learning_rate": 4.2705859756949294e-05, - "loss": 1.7913, - "step": 9360 - }, - { - "epoch": 0.96, - "learning_rate": 4.2697129487358574e-05, - "loss": 2.0015, - "step": 9370 - }, - { - "epoch": 0.97, - "learning_rate": 4.268839921776785e-05, - "loss": 1.8567, - "step": 9380 - }, - { - "epoch": 0.97, - "learning_rate": 4.267966894817712e-05, - "loss": 1.9197, - "step": 9390 - }, - { - "epoch": 0.97, - "learning_rate": 4.2670938678586394e-05, - "loss": 2.0934, - "step": 9400 - }, - { - "epoch": 0.97, - "learning_rate": 4.2662208408995674e-05, - "loss": 1.8394, - "step": 9410 - }, - { - "epoch": 0.97, - "learning_rate": 4.265347813940495e-05, - "loss": 1.8115, - "step": 9420 - }, - { - "epoch": 0.97, - "learning_rate": 4.264474786981422e-05, - "loss": 1.8204, - "step": 9430 - }, - { - "epoch": 0.97, - "learning_rate": 4.26360176002235e-05, - "loss": 2.0474, - "step": 9440 - }, - { - "epoch": 0.97, - "learning_rate": 4.262728733063277e-05, - "loss": 1.9147, - "step": 9450 - }, - { - "epoch": 0.97, - "learning_rate": 4.261855706104205e-05, - "loss": 1.9168, - "step": 9460 - }, - { - "epoch": 0.98, - "learning_rate": 4.260982679145132e-05, - "loss": 1.7557, - "step": 9470 - }, - { - "epoch": 0.98, - "learning_rate": 4.2601096521860594e-05, - "loss": 2.0177, - "step": 9480 - }, - { - "epoch": 0.98, - "learning_rate": 4.2592366252269874e-05, - "loss": 1.8579, - "step": 9490 - }, - { - "epoch": 0.98, - "learning_rate": 4.258363598267915e-05, - "loss": 1.8935, - "step": 9500 - }, - { - "epoch": 0.98, - "learning_rate": 4.257490571308842e-05, - "loss": 1.8755, - "step": 9510 - }, - { - "epoch": 0.98, - "learning_rate": 4.2566175443497694e-05, - "loss": 2.0003, - "step": 9520 - }, - { - "epoch": 0.98, - "learning_rate": 4.2557445173906974e-05, - "loss": 1.8433, - "step": 9530 - }, - { - "epoch": 0.98, - "learning_rate": 4.254871490431625e-05, - "loss": 1.9238, - "step": 9540 - }, - { - "epoch": 0.98, - "learning_rate": 4.253998463472552e-05, - "loss": 1.8563, - "step": 9550 - }, - { - "epoch": 0.98, - "learning_rate": 4.25312543651348e-05, - "loss": 1.8431, - "step": 9560 - }, - { - "epoch": 0.99, - "learning_rate": 4.2522524095544074e-05, - "loss": 2.0862, - "step": 9570 - }, - { - "epoch": 0.99, - "learning_rate": 4.251379382595335e-05, - "loss": 1.8584, - "step": 9580 - }, - { - "epoch": 0.99, - "learning_rate": 4.250506355636262e-05, - "loss": 1.8315, - "step": 9590 - }, - { - "epoch": 0.99, - "learning_rate": 4.24963332867719e-05, - "loss": 1.8362, - "step": 9600 - }, - { - "epoch": 0.99, - "learning_rate": 4.2487603017181173e-05, - "loss": 1.8479, - "step": 9610 - }, - { - "epoch": 0.99, - "learning_rate": 4.247887274759045e-05, - "loss": 1.6951, - "step": 9620 - }, - { - "epoch": 0.99, - "learning_rate": 4.247014247799973e-05, - "loss": 1.8114, - "step": 9630 - }, - { - "epoch": 0.99, - "learning_rate": 4.246141220840899e-05, - "loss": 1.9353, - "step": 9640 - }, - { - "epoch": 0.99, - "learning_rate": 4.245268193881827e-05, - "loss": 1.8924, - "step": 9650 - }, - { - "epoch": 0.99, - "learning_rate": 4.2443951669227547e-05, - "loss": 1.8605, - "step": 9660 - }, { "epoch": 1.0, - "learning_rate": 4.243522139963682e-05, - "loss": 1.954, - "step": 9670 - }, - { - "epoch": 1.0, - "learning_rate": 4.24264911300461e-05, - "loss": 2.0013, - "step": 9680 - }, - { - "epoch": 1.0, - "learning_rate": 4.241776086045537e-05, - "loss": 1.7673, - "step": 9690 - }, - { - "epoch": 1.0, - "learning_rate": 4.2409030590864646e-05, - "loss": 1.8135, - "step": 9700 - }, - { - "epoch": 1.0, - "learning_rate": 4.240030032127392e-05, - "loss": 1.922, - "step": 9710 - }, - { - "epoch": 1.0, - "eval_loss": 1.8737725019454956, - "eval_runtime": 988.0923, - "eval_samples_per_second": 52.423, - "eval_steps_per_second": 8.738, - "step": 9712 + "eval_loss": 0.8561133146286011, + "eval_runtime": 33.8169, + "eval_samples_per_second": 47.343, + "eval_steps_per_second": 7.895, + "step": 300 } ], "logging_steps": 10, - "max_steps": 58272, + "max_steps": 1800, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, - "total_flos": 1.21809903353856e+17, + "total_flos": 3763911720960000.0, "train_batch_size": 6, "trial_name": null, "trial_params": null