{ "best_metric": 1.4220008850097656, "best_model_checkpoint": "/scratch/s3545881/dumped/translation/mt5/2811562/checkpoint-4600", "epoch": 14.9755832881172, "global_step": 4600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.5e-06, "loss": 7.1735, "step": 10 }, { "epoch": 0.07, "learning_rate": 5e-06, "loss": 7.1895, "step": 20 }, { "epoch": 0.1, "learning_rate": 7.5e-06, "loss": 7.1285, "step": 30 }, { "epoch": 0.13, "learning_rate": 1e-05, "loss": 7.0295, "step": 40 }, { "epoch": 0.16, "learning_rate": 1.25e-05, "loss": 6.871, "step": 50 }, { "epoch": 0.2, "learning_rate": 1.5e-05, "loss": 6.7699, "step": 60 }, { "epoch": 0.23, "learning_rate": 1.7500000000000002e-05, "loss": 6.6541, "step": 70 }, { "epoch": 0.26, "learning_rate": 2e-05, "loss": 6.4698, "step": 80 }, { "epoch": 0.29, "learning_rate": 2.2499999999999998e-05, "loss": 6.4136, "step": 90 }, { "epoch": 0.33, "learning_rate": 2.5e-05, "loss": 6.2236, "step": 100 }, { "epoch": 0.36, "learning_rate": 2.75e-05, "loss": 6.1051, "step": 110 }, { "epoch": 0.39, "learning_rate": 3e-05, "loss": 5.9834, "step": 120 }, { "epoch": 0.42, "learning_rate": 3.2500000000000004e-05, "loss": 5.8334, "step": 130 }, { "epoch": 0.46, "learning_rate": 3.5000000000000004e-05, "loss": 5.7289, "step": 140 }, { "epoch": 0.49, "learning_rate": 3.75e-05, "loss": 5.6266, "step": 150 }, { "epoch": 0.52, "learning_rate": 4e-05, "loss": 5.488, "step": 160 }, { "epoch": 0.55, "learning_rate": 4.25e-05, "loss": 5.4496, "step": 170 }, { "epoch": 0.59, "learning_rate": 4.4999999999999996e-05, "loss": 5.3379, "step": 180 }, { "epoch": 0.62, "learning_rate": 4.75e-05, "loss": 5.2659, "step": 190 }, { "epoch": 0.65, "learning_rate": 5e-05, "loss": 5.1961, "step": 200 }, { "epoch": 0.65, "eval_bp": 1.0, "eval_counts": [ 12902, 6326, 3612, 2252 ], "eval_loss": 2.750916004180908, "eval_precisions": [ 4.406812080307677, 2.1854789674423745, 1.2622265088534077, 0.7959228393098232 ], "eval_ref_len": 41379, "eval_runtime": 262.2904, "eval_samples_per_second": 13.203, "eval_score": 1.7636802703289813, "eval_steps_per_second": 13.203, "eval_sys_len": 292774, "eval_totals": [ 292774, 289456, 286161, 282942 ], "step": 200 }, { "epoch": 0.68, "learning_rate": 5.25e-05, "loss": 5.17, "step": 210 }, { "epoch": 0.72, "learning_rate": 5.5e-05, "loss": 5.0736, "step": 220 }, { "epoch": 0.75, "learning_rate": 5.75e-05, "loss": 4.9931, "step": 230 }, { "epoch": 0.78, "learning_rate": 6e-05, "loss": 4.9457, "step": 240 }, { "epoch": 0.81, "learning_rate": 6.25e-05, "loss": 4.9108, "step": 250 }, { "epoch": 0.85, "learning_rate": 6.500000000000001e-05, "loss": 4.8353, "step": 260 }, { "epoch": 0.88, "learning_rate": 6.75e-05, "loss": 4.7755, "step": 270 }, { "epoch": 0.91, "learning_rate": 7.000000000000001e-05, "loss": 4.7669, "step": 280 }, { "epoch": 0.94, "learning_rate": 7.25e-05, "loss": 4.6519, "step": 290 }, { "epoch": 0.98, "learning_rate": 7.5e-05, "loss": 4.5968, "step": 300 }, { "epoch": 1.01, "learning_rate": 7.75e-05, "loss": 4.5827, "step": 310 }, { "epoch": 1.04, "learning_rate": 8e-05, "loss": 4.5738, "step": 320 }, { "epoch": 1.07, "learning_rate": 8.25e-05, "loss": 4.5119, "step": 330 }, { "epoch": 1.11, "learning_rate": 8.5e-05, "loss": 4.4241, "step": 340 }, { "epoch": 1.14, "learning_rate": 8.75e-05, "loss": 4.4606, "step": 350 }, { "epoch": 1.17, "learning_rate": 8.999999999999999e-05, "loss": 4.3099, "step": 360 }, { "epoch": 1.2, "learning_rate": 9.25e-05, "loss": 4.4264, "step": 370 }, { "epoch": 1.24, "learning_rate": 9.5e-05, "loss": 4.2987, "step": 380 }, { "epoch": 1.27, "learning_rate": 9.750000000000001e-05, "loss": 4.3585, "step": 390 }, { "epoch": 1.3, "learning_rate": 0.0001, "loss": 4.2855, "step": 400 }, { "epoch": 1.3, "eval_bp": 1.0, "eval_counts": [ 21482, 12393, 7972, 5427 ], "eval_loss": 2.258103609085083, "eval_precisions": [ 30.616840545008838, 18.57993133536229, 12.606144942203388, 9.071761697006169 ], "eval_ref_len": 41379, "eval_runtime": 225.927, "eval_samples_per_second": 15.328, "eval_score": 15.970540029112566, "eval_steps_per_second": 15.328, "eval_sys_len": 70164, "eval_totals": [ 70164, 66701, 63239, 59823 ], "step": 400 }, { "epoch": 1.33, "learning_rate": 0.0001025, "loss": 4.2679, "step": 410 }, { "epoch": 1.37, "learning_rate": 0.000105, "loss": 4.1738, "step": 420 }, { "epoch": 1.4, "learning_rate": 0.0001075, "loss": 4.1978, "step": 430 }, { "epoch": 1.43, "learning_rate": 0.00011, "loss": 4.2151, "step": 440 }, { "epoch": 1.47, "learning_rate": 0.00011250000000000001, "loss": 4.139, "step": 450 }, { "epoch": 1.5, "learning_rate": 0.000115, "loss": 4.1825, "step": 460 }, { "epoch": 1.53, "learning_rate": 0.0001175, "loss": 4.1172, "step": 470 }, { "epoch": 1.56, "learning_rate": 0.00012, "loss": 4.2029, "step": 480 }, { "epoch": 1.6, "learning_rate": 0.0001225, "loss": 4.1657, "step": 490 }, { "epoch": 1.63, "learning_rate": 0.000125, "loss": 4.0543, "step": 500 }, { "epoch": 1.66, "learning_rate": 0.0001275, "loss": 4.0173, "step": 510 }, { "epoch": 1.69, "learning_rate": 0.00013000000000000002, "loss": 4.0702, "step": 520 }, { "epoch": 1.73, "learning_rate": 0.00013250000000000002, "loss": 4.0172, "step": 530 }, { "epoch": 1.76, "learning_rate": 0.000135, "loss": 3.998, "step": 540 }, { "epoch": 1.79, "learning_rate": 0.0001375, "loss": 4.0506, "step": 550 }, { "epoch": 1.82, "learning_rate": 0.00014000000000000001, "loss": 3.9215, "step": 560 }, { "epoch": 1.86, "learning_rate": 0.0001425, "loss": 3.986, "step": 570 }, { "epoch": 1.89, "learning_rate": 0.000145, "loss": 3.9276, "step": 580 }, { "epoch": 1.92, "learning_rate": 0.0001475, "loss": 3.8933, "step": 590 }, { "epoch": 1.95, "learning_rate": 0.00015, "loss": 3.9534, "step": 600 }, { "epoch": 1.95, "eval_bp": 1.0, "eval_counts": [ 22456, 13442, 8970, 6310 ], "eval_loss": 2.0556671619415283, "eval_precisions": [ 34.72666821309828, 21.963334531551258, 15.534619514391604, 11.61485081083071 ], "eval_ref_len": 41379, "eval_runtime": 214.8511, "eval_samples_per_second": 16.118, "eval_score": 19.2605601155173, "eval_steps_per_second": 16.118, "eval_sys_len": 64665, "eval_totals": [ 64665, 61202, 57742, 54327 ], "step": 600 }, { "epoch": 1.99, "learning_rate": 0.0001525, "loss": 3.8561, "step": 610 }, { "epoch": 2.02, "learning_rate": 0.000155, "loss": 3.8084, "step": 620 }, { "epoch": 2.05, "learning_rate": 0.0001575, "loss": 3.8783, "step": 630 }, { "epoch": 2.08, "learning_rate": 0.00016, "loss": 3.8229, "step": 640 }, { "epoch": 2.12, "learning_rate": 0.00016250000000000002, "loss": 3.7835, "step": 650 }, { "epoch": 2.15, "learning_rate": 0.000165, "loss": 3.7847, "step": 660 }, { "epoch": 2.18, "learning_rate": 0.0001675, "loss": 3.8852, "step": 670 }, { "epoch": 2.21, "learning_rate": 0.00017, "loss": 3.7897, "step": 680 }, { "epoch": 2.25, "learning_rate": 0.0001725, "loss": 3.7977, "step": 690 }, { "epoch": 2.28, "learning_rate": 0.000175, "loss": 3.7706, "step": 700 }, { "epoch": 2.31, "learning_rate": 0.0001775, "loss": 3.7135, "step": 710 }, { "epoch": 2.34, "learning_rate": 0.00017999999999999998, "loss": 3.7296, "step": 720 }, { "epoch": 2.38, "learning_rate": 0.0001825, "loss": 3.7966, "step": 730 }, { "epoch": 2.41, "learning_rate": 0.000185, "loss": 3.6942, "step": 740 }, { "epoch": 2.44, "learning_rate": 0.0001875, "loss": 3.7341, "step": 750 }, { "epoch": 2.47, "learning_rate": 0.00019, "loss": 3.7491, "step": 760 }, { "epoch": 2.51, "learning_rate": 0.00019250000000000002, "loss": 3.6826, "step": 770 }, { "epoch": 2.54, "learning_rate": 0.00019500000000000002, "loss": 3.6763, "step": 780 }, { "epoch": 2.57, "learning_rate": 0.0001975, "loss": 3.5826, "step": 790 }, { "epoch": 2.6, "learning_rate": 0.0002, "loss": 3.6549, "step": 800 }, { "epoch": 2.6, "eval_bp": 1.0, "eval_counts": [ 24601, 15366, 10549, 7581 ], "eval_loss": 1.962100863456726, "eval_precisions": [ 53.96731380936712, 36.479749299653385, 27.26614800072372, 21.474095685919043 ], "eval_ref_len": 41379, "eval_runtime": 160.9582, "eval_samples_per_second": 21.515, "eval_score": 32.766520790805245, "eval_steps_per_second": 21.515, "eval_sys_len": 45585, "eval_totals": [ 45585, 42122, 38689, 35303 ], "step": 800 }, { "epoch": 2.64, "learning_rate": 0.00020250000000000002, "loss": 3.75, "step": 810 }, { "epoch": 2.67, "learning_rate": 0.000205, "loss": 3.5627, "step": 820 }, { "epoch": 2.7, "learning_rate": 0.0002075, "loss": 3.6651, "step": 830 }, { "epoch": 2.73, "learning_rate": 0.00021, "loss": 3.6312, "step": 840 }, { "epoch": 2.77, "learning_rate": 0.0002125, "loss": 3.6042, "step": 850 }, { "epoch": 2.8, "learning_rate": 0.000215, "loss": 3.5747, "step": 860 }, { "epoch": 2.83, "learning_rate": 0.0002175, "loss": 3.5851, "step": 870 }, { "epoch": 2.86, "learning_rate": 0.00022, "loss": 3.5618, "step": 880 }, { "epoch": 2.9, "learning_rate": 0.00022250000000000001, "loss": 3.5375, "step": 890 }, { "epoch": 2.93, "learning_rate": 0.00022500000000000002, "loss": 3.5951, "step": 900 }, { "epoch": 2.96, "learning_rate": 0.0002275, "loss": 3.582, "step": 910 }, { "epoch": 3.0, "learning_rate": 0.00023, "loss": 3.5011, "step": 920 }, { "epoch": 3.03, "learning_rate": 0.0002325, "loss": 3.4628, "step": 930 }, { "epoch": 3.06, "learning_rate": 0.000235, "loss": 3.5, "step": 940 }, { "epoch": 3.09, "learning_rate": 0.0002375, "loss": 3.5281, "step": 950 }, { "epoch": 3.13, "learning_rate": 0.00024, "loss": 3.4292, "step": 960 }, { "epoch": 3.16, "learning_rate": 0.00024249999999999999, "loss": 3.5526, "step": 970 }, { "epoch": 3.19, "learning_rate": 0.000245, "loss": 3.4211, "step": 980 }, { "epoch": 3.22, "learning_rate": 0.0002475, "loss": 3.4732, "step": 990 }, { "epoch": 3.26, "learning_rate": 0.00025, "loss": 3.5164, "step": 1000 }, { "epoch": 3.26, "eval_bp": 1.0, "eval_counts": [ 24977, 15865, 10957, 7898 ], "eval_loss": 1.8600678443908691, "eval_precisions": [ 52.542230262742706, 35.99627898534283, 26.975060932076122, 21.22547702230583 ], "eval_ref_len": 41379, "eval_runtime": 165.696, "eval_samples_per_second": 20.9, "eval_score": 32.258673250001976, "eval_steps_per_second": 20.9, "eval_sys_len": 47537, "eval_totals": [ 47537, 44074, 40619, 37210 ], "step": 1000 }, { "epoch": 3.29, "learning_rate": 0.0002525, "loss": 3.4451, "step": 1010 }, { "epoch": 3.32, "learning_rate": 0.000255, "loss": 3.4723, "step": 1020 }, { "epoch": 3.35, "learning_rate": 0.0002575, "loss": 3.4181, "step": 1030 }, { "epoch": 3.39, "learning_rate": 0.00026000000000000003, "loss": 3.432, "step": 1040 }, { "epoch": 3.42, "learning_rate": 0.00026250000000000004, "loss": 3.364, "step": 1050 }, { "epoch": 3.45, "learning_rate": 0.00026500000000000004, "loss": 3.3962, "step": 1060 }, { "epoch": 3.48, "learning_rate": 0.0002675, "loss": 3.4944, "step": 1070 }, { "epoch": 3.52, "learning_rate": 0.00027, "loss": 3.497, "step": 1080 }, { "epoch": 3.55, "learning_rate": 0.0002725, "loss": 3.4347, "step": 1090 }, { "epoch": 3.58, "learning_rate": 0.000275, "loss": 3.4106, "step": 1100 }, { "epoch": 3.61, "learning_rate": 0.0002775, "loss": 3.4384, "step": 1110 }, { "epoch": 3.65, "learning_rate": 0.00028000000000000003, "loss": 3.3933, "step": 1120 }, { "epoch": 3.68, "learning_rate": 0.0002825, "loss": 3.3448, "step": 1130 }, { "epoch": 3.71, "learning_rate": 0.000285, "loss": 3.3148, "step": 1140 }, { "epoch": 3.74, "learning_rate": 0.0002875, "loss": 3.2697, "step": 1150 }, { "epoch": 3.78, "learning_rate": 0.00029, "loss": 3.4034, "step": 1160 }, { "epoch": 3.81, "learning_rate": 0.0002925, "loss": 3.4009, "step": 1170 }, { "epoch": 3.84, "learning_rate": 0.000295, "loss": 3.3068, "step": 1180 }, { "epoch": 3.87, "learning_rate": 0.00029749999999999997, "loss": 3.4363, "step": 1190 }, { "epoch": 3.91, "learning_rate": 0.0003, "loss": 3.4333, "step": 1200 }, { "epoch": 3.91, "eval_bp": 1.0, "eval_counts": [ 24869, 15901, 11072, 8028 ], "eval_loss": 1.7562536001205444, "eval_precisions": [ 47.29745150247242, 32.37371989331596, 24.24773334501336, 18.999834331290085 ], "eval_ref_len": 41379, "eval_runtime": 214.2902, "eval_samples_per_second": 16.16, "eval_score": 28.980963954064624, "eval_steps_per_second": 16.16, "eval_sys_len": 52580, "eval_totals": [ 52580, 49117, 45662, 42253 ], "step": 1200 }, { "epoch": 3.94, "learning_rate": 0.0003025, "loss": 3.3664, "step": 1210 }, { "epoch": 3.97, "learning_rate": 0.000305, "loss": 3.3555, "step": 1220 }, { "epoch": 4.0, "learning_rate": 0.0003075, "loss": 3.3637, "step": 1230 }, { "epoch": 4.04, "learning_rate": 0.00031, "loss": 3.2577, "step": 1240 }, { "epoch": 4.07, "learning_rate": 0.0003125, "loss": 3.2929, "step": 1250 }, { "epoch": 4.1, "learning_rate": 0.000315, "loss": 3.2327, "step": 1260 }, { "epoch": 4.13, "learning_rate": 0.0003175, "loss": 3.3175, "step": 1270 }, { "epoch": 4.17, "learning_rate": 0.00032, "loss": 3.2598, "step": 1280 }, { "epoch": 4.2, "learning_rate": 0.00032250000000000003, "loss": 3.2131, "step": 1290 }, { "epoch": 4.23, "learning_rate": 0.00032500000000000004, "loss": 3.2873, "step": 1300 }, { "epoch": 4.26, "learning_rate": 0.00032750000000000005, "loss": 3.2196, "step": 1310 }, { "epoch": 4.3, "learning_rate": 0.00033, "loss": 3.3356, "step": 1320 }, { "epoch": 4.33, "learning_rate": 0.0003325, "loss": 3.2612, "step": 1330 }, { "epoch": 4.36, "learning_rate": 0.000335, "loss": 3.1025, "step": 1340 }, { "epoch": 4.4, "learning_rate": 0.0003375, "loss": 3.2298, "step": 1350 }, { "epoch": 4.43, "learning_rate": 0.00034, "loss": 3.1861, "step": 1360 }, { "epoch": 4.46, "learning_rate": 0.00034250000000000003, "loss": 3.2453, "step": 1370 }, { "epoch": 4.49, "learning_rate": 0.000345, "loss": 3.2116, "step": 1380 }, { "epoch": 4.53, "learning_rate": 0.0003475, "loss": 3.2456, "step": 1390 }, { "epoch": 4.56, "learning_rate": 0.00035, "loss": 3.2148, "step": 1400 }, { "epoch": 4.56, "eval_bp": 1.0, "eval_counts": [ 26323, 17238, 12161, 8884 ], "eval_loss": 1.7239768505096436, "eval_precisions": [ 59.99954412837345, 42.65881363062684, 32.90848081398495, 26.484617219174815 ], "eval_ref_len": 41379, "eval_runtime": 129.8828, "eval_samples_per_second": 26.662, "eval_score": 38.646891207773166, "eval_steps_per_second": 26.662, "eval_sys_len": 43872, "eval_totals": [ 43872, 40409, 36954, 33544 ], "step": 1400 }, { "epoch": 4.59, "learning_rate": 0.0003525, "loss": 3.2301, "step": 1410 }, { "epoch": 4.62, "learning_rate": 0.000355, "loss": 3.1593, "step": 1420 }, { "epoch": 4.66, "learning_rate": 0.0003575, "loss": 3.2608, "step": 1430 }, { "epoch": 4.69, "learning_rate": 0.00035999999999999997, "loss": 3.1374, "step": 1440 }, { "epoch": 4.72, "learning_rate": 0.0003625, "loss": 3.2217, "step": 1450 }, { "epoch": 4.75, "learning_rate": 0.000365, "loss": 3.2159, "step": 1460 }, { "epoch": 4.79, "learning_rate": 0.0003675, "loss": 3.2236, "step": 1470 }, { "epoch": 4.82, "learning_rate": 0.00037, "loss": 3.2447, "step": 1480 }, { "epoch": 4.85, "learning_rate": 0.0003725, "loss": 3.2312, "step": 1490 }, { "epoch": 4.88, "learning_rate": 0.000375, "loss": 3.2011, "step": 1500 }, { "epoch": 4.92, "learning_rate": 0.0003775, "loss": 3.2031, "step": 1510 }, { "epoch": 4.95, "learning_rate": 0.00038, "loss": 3.1596, "step": 1520 }, { "epoch": 4.98, "learning_rate": 0.00038250000000000003, "loss": 3.1573, "step": 1530 }, { "epoch": 5.01, "learning_rate": 0.00038500000000000003, "loss": 3.1637, "step": 1540 }, { "epoch": 5.05, "learning_rate": 0.00038750000000000004, "loss": 3.1586, "step": 1550 }, { "epoch": 5.08, "learning_rate": 0.00039000000000000005, "loss": 3.0617, "step": 1560 }, { "epoch": 5.11, "learning_rate": 0.0003925, "loss": 3.107, "step": 1570 }, { "epoch": 5.14, "learning_rate": 0.000395, "loss": 3.0754, "step": 1580 }, { "epoch": 5.18, "learning_rate": 0.0003975, "loss": 3.1476, "step": 1590 }, { "epoch": 5.21, "learning_rate": 0.0004, "loss": 3.1108, "step": 1600 }, { "epoch": 5.21, "eval_bp": 1.0, "eval_counts": [ 26002, 17132, 12142, 8953 ], "eval_loss": 1.6534518003463745, "eval_precisions": [ 60.742401943607355, 43.54412362749085, 33.831150738367235, 27.56295794593929 ], "eval_ref_len": 41379, "eval_runtime": 115.9758, "eval_samples_per_second": 29.86, "eval_score": 39.62926586495389, "eval_steps_per_second": 29.86, "eval_sys_len": 42807, "eval_totals": [ 42807, 39344, 35890, 32482 ], "step": 1600 }, { "epoch": 5.24, "learning_rate": 0.0004025, "loss": 3.0389, "step": 1610 }, { "epoch": 5.27, "learning_rate": 0.00040500000000000003, "loss": 3.1482, "step": 1620 }, { "epoch": 5.31, "learning_rate": 0.0004075, "loss": 3.1614, "step": 1630 }, { "epoch": 5.34, "learning_rate": 0.00041, "loss": 3.103, "step": 1640 }, { "epoch": 5.37, "learning_rate": 0.0004125, "loss": 3.144, "step": 1650 }, { "epoch": 5.4, "learning_rate": 0.000415, "loss": 3.0297, "step": 1660 }, { "epoch": 5.44, "learning_rate": 0.0004175, "loss": 3.0478, "step": 1670 }, { "epoch": 5.47, "learning_rate": 0.00042, "loss": 3.1079, "step": 1680 }, { "epoch": 5.5, "learning_rate": 0.00042249999999999997, "loss": 3.0638, "step": 1690 }, { "epoch": 5.53, "learning_rate": 0.000425, "loss": 3.0575, "step": 1700 }, { "epoch": 5.57, "learning_rate": 0.0004275, "loss": 3.1212, "step": 1710 }, { "epoch": 5.6, "learning_rate": 0.00043, "loss": 3.1422, "step": 1720 }, { "epoch": 5.63, "learning_rate": 0.0004325, "loss": 3.015, "step": 1730 }, { "epoch": 5.66, "learning_rate": 0.000435, "loss": 3.0151, "step": 1740 }, { "epoch": 5.7, "learning_rate": 0.0004375, "loss": 2.9851, "step": 1750 }, { "epoch": 5.73, "learning_rate": 0.00044, "loss": 3.0168, "step": 1760 }, { "epoch": 5.76, "learning_rate": 0.0004425, "loss": 2.9517, "step": 1770 }, { "epoch": 5.79, "learning_rate": 0.00044500000000000003, "loss": 3.0005, "step": 1780 }, { "epoch": 5.83, "learning_rate": 0.00044750000000000004, "loss": 3.0775, "step": 1790 }, { "epoch": 5.86, "learning_rate": 0.00045000000000000004, "loss": 3.0713, "step": 1800 }, { "epoch": 5.86, "eval_bp": 1.0, "eval_counts": [ 25470, 16777, 11904, 8755 ], "eval_loss": 1.617377758026123, "eval_precisions": [ 61.505397116708124, 44.2104985770001, 34.51135012901168, 28.164709667041983 ], "eval_ref_len": 41379, "eval_runtime": 121.9732, "eval_samples_per_second": 28.391, "eval_score": 40.32055180650292, "eval_steps_per_second": 28.391, "eval_sys_len": 41411, "eval_totals": [ 41411, 37948, 34493, 31085 ], "step": 1800 }, { "epoch": 5.89, "learning_rate": 0.00045250000000000005, "loss": 3.1174, "step": 1810 }, { "epoch": 5.93, "learning_rate": 0.000455, "loss": 2.9954, "step": 1820 }, { "epoch": 5.96, "learning_rate": 0.0004575, "loss": 3.0373, "step": 1830 }, { "epoch": 5.99, "learning_rate": 0.00046, "loss": 3.0608, "step": 1840 }, { "epoch": 6.02, "learning_rate": 0.0004625, "loss": 3.0306, "step": 1850 }, { "epoch": 6.06, "learning_rate": 0.000465, "loss": 2.9395, "step": 1860 }, { "epoch": 6.09, "learning_rate": 0.00046750000000000003, "loss": 3.0109, "step": 1870 }, { "epoch": 6.12, "learning_rate": 0.00047, "loss": 2.9562, "step": 1880 }, { "epoch": 6.15, "learning_rate": 0.0004725, "loss": 2.9876, "step": 1890 }, { "epoch": 6.19, "learning_rate": 0.000475, "loss": 2.9542, "step": 1900 }, { "epoch": 6.22, "learning_rate": 0.0004775, "loss": 2.8958, "step": 1910 }, { "epoch": 6.25, "learning_rate": 0.00048, "loss": 3.0231, "step": 1920 }, { "epoch": 6.28, "learning_rate": 0.0004825, "loss": 2.9398, "step": 1930 }, { "epoch": 6.32, "learning_rate": 0.00048499999999999997, "loss": 2.9273, "step": 1940 }, { "epoch": 6.35, "learning_rate": 0.0004875, "loss": 2.9431, "step": 1950 }, { "epoch": 6.38, "learning_rate": 0.00049, "loss": 2.976, "step": 1960 }, { "epoch": 6.41, "learning_rate": 0.0004925, "loss": 2.8617, "step": 1970 }, { "epoch": 6.45, "learning_rate": 0.000495, "loss": 2.9501, "step": 1980 }, { "epoch": 6.48, "learning_rate": 0.0004975, "loss": 2.9997, "step": 1990 }, { "epoch": 6.51, "learning_rate": 0.0005, "loss": 2.8715, "step": 2000 }, { "epoch": 6.51, "eval_bp": 1.0, "eval_counts": [ 26989, 18197, 13091, 9738 ], "eval_loss": 1.6082161664962769, "eval_precisions": [ 62.49623711937015, 45.81088565530437, 36.09717090387691, 29.638422205989773 ], "eval_ref_len": 41379, "eval_runtime": 116.2487, "eval_samples_per_second": 29.79, "eval_score": 41.834800362190585, "eval_steps_per_second": 29.79, "eval_sys_len": 43185, "eval_totals": [ 43185, 39722, 36266, 32856 ], "step": 2000 }, { "epoch": 6.54, "learning_rate": 0.0005024999999999999, "loss": 2.9772, "step": 2010 }, { "epoch": 6.58, "learning_rate": 0.000505, "loss": 2.9945, "step": 2020 }, { "epoch": 6.61, "learning_rate": 0.0005074999999999999, "loss": 2.8798, "step": 2030 }, { "epoch": 6.64, "learning_rate": 0.00051, "loss": 2.9645, "step": 2040 }, { "epoch": 6.67, "learning_rate": 0.0005124999999999999, "loss": 2.8499, "step": 2050 }, { "epoch": 6.71, "learning_rate": 0.000515, "loss": 2.9474, "step": 2060 }, { "epoch": 6.74, "learning_rate": 0.0005175, "loss": 2.9326, "step": 2070 }, { "epoch": 6.77, "learning_rate": 0.0005200000000000001, "loss": 2.8708, "step": 2080 }, { "epoch": 6.8, "learning_rate": 0.0005225, "loss": 2.9616, "step": 2090 }, { "epoch": 6.84, "learning_rate": 0.0005250000000000001, "loss": 2.9599, "step": 2100 }, { "epoch": 6.87, "learning_rate": 0.0005275, "loss": 2.8761, "step": 2110 }, { "epoch": 6.9, "learning_rate": 0.0005300000000000001, "loss": 3.0114, "step": 2120 }, { "epoch": 6.93, "learning_rate": 0.0005325, "loss": 2.9201, "step": 2130 }, { "epoch": 6.97, "learning_rate": 0.000535, "loss": 2.9802, "step": 2140 }, { "epoch": 7.0, "learning_rate": 0.0005375, "loss": 2.9193, "step": 2150 }, { "epoch": 7.03, "learning_rate": 0.00054, "loss": 2.8004, "step": 2160 }, { "epoch": 7.06, "learning_rate": 0.0005425, "loss": 2.8722, "step": 2170 }, { "epoch": 7.1, "learning_rate": 0.000545, "loss": 2.8149, "step": 2180 }, { "epoch": 7.13, "learning_rate": 0.0005475, "loss": 2.9112, "step": 2190 }, { "epoch": 7.16, "learning_rate": 0.00055, "loss": 2.8271, "step": 2200 }, { "epoch": 7.16, "eval_bp": 1.0, "eval_counts": [ 26323, 17758, 12855, 9588 ], "eval_loss": 1.547306776046753, "eval_precisions": [ 57.49011728219801, 41.9572819204234, 33.07177772060715, 27.03739213806328 ], "eval_ref_len": 41379, "eval_runtime": 141.8578, "eval_samples_per_second": 24.412, "eval_score": 38.32266653815659, "eval_steps_per_second": 24.412, "eval_sys_len": 45787, "eval_totals": [ 45787, 42324, 38870, 35462 ], "step": 2200 }, { "epoch": 7.19, "learning_rate": 0.0005525, "loss": 2.797, "step": 2210 }, { "epoch": 7.23, "learning_rate": 0.000555, "loss": 2.8883, "step": 2220 }, { "epoch": 7.26, "learning_rate": 0.0005575, "loss": 2.81, "step": 2230 }, { "epoch": 7.29, "learning_rate": 0.0005600000000000001, "loss": 2.8967, "step": 2240 }, { "epoch": 7.33, "learning_rate": 0.0005625000000000001, "loss": 2.8093, "step": 2250 }, { "epoch": 7.36, "learning_rate": 0.000565, "loss": 2.8463, "step": 2260 }, { "epoch": 7.39, "learning_rate": 0.0005675, "loss": 2.84, "step": 2270 }, { "epoch": 7.42, "learning_rate": 0.00057, "loss": 2.9007, "step": 2280 }, { "epoch": 7.46, "learning_rate": 0.0005725, "loss": 2.7673, "step": 2290 }, { "epoch": 7.49, "learning_rate": 0.000575, "loss": 2.8213, "step": 2300 }, { "epoch": 7.52, "learning_rate": 0.0005775, "loss": 2.8536, "step": 2310 }, { "epoch": 7.55, "learning_rate": 0.00058, "loss": 2.7698, "step": 2320 }, { "epoch": 7.59, "learning_rate": 0.0005825, "loss": 2.8104, "step": 2330 }, { "epoch": 7.62, "learning_rate": 0.000585, "loss": 2.8719, "step": 2340 }, { "epoch": 7.65, "learning_rate": 0.0005875, "loss": 2.8486, "step": 2350 }, { "epoch": 7.68, "learning_rate": 0.00059, "loss": 2.8941, "step": 2360 }, { "epoch": 7.72, "learning_rate": 0.0005925, "loss": 2.7646, "step": 2370 }, { "epoch": 7.75, "learning_rate": 0.0005949999999999999, "loss": 2.8403, "step": 2380 }, { "epoch": 7.78, "learning_rate": 0.0005975, "loss": 2.8481, "step": 2390 }, { "epoch": 7.81, "learning_rate": 0.0006, "loss": 2.8294, "step": 2400 }, { "epoch": 7.81, "eval_bp": 0.9479912131785159, "eval_counts": [ 27654, 18964, 13797, 10304 ], "eval_loss": 1.5549582242965698, "eval_precisions": [ 70.40044805376645, 52.945446423585906, 42.60042609689073, 35.52858423556996 ], "eval_ref_len": 41379, "eval_runtime": 69.016, "eval_samples_per_second": 50.177, "eval_score": 46.201202847952445, "eval_steps_per_second": 50.177, "eval_sys_len": 39281, "eval_totals": [ 39281, 35818, 32387, 29002 ], "step": 2400 }, { "epoch": 7.85, "learning_rate": 0.0006025000000000001, "loss": 2.8759, "step": 2410 }, { "epoch": 7.88, "learning_rate": 0.000605, "loss": 2.7903, "step": 2420 }, { "epoch": 7.91, "learning_rate": 0.0006075000000000001, "loss": 2.8623, "step": 2430 }, { "epoch": 7.94, "learning_rate": 0.00061, "loss": 2.7207, "step": 2440 }, { "epoch": 7.98, "learning_rate": 0.0006125000000000001, "loss": 2.889, "step": 2450 }, { "epoch": 8.01, "learning_rate": 0.000615, "loss": 2.8509, "step": 2460 }, { "epoch": 8.04, "learning_rate": 0.0006175000000000001, "loss": 2.7259, "step": 2470 }, { "epoch": 8.07, "learning_rate": 0.00062, "loss": 2.7449, "step": 2480 }, { "epoch": 8.11, "learning_rate": 0.0006225000000000001, "loss": 2.8018, "step": 2490 }, { "epoch": 8.14, "learning_rate": 0.000625, "loss": 2.7617, "step": 2500 }, { "epoch": 8.17, "learning_rate": 0.0006274999999999999, "loss": 2.7699, "step": 2510 }, { "epoch": 8.2, "learning_rate": 0.00063, "loss": 2.6918, "step": 2520 }, { "epoch": 8.24, "learning_rate": 0.0006324999999999999, "loss": 2.75, "step": 2530 }, { "epoch": 8.27, "learning_rate": 0.000635, "loss": 2.7171, "step": 2540 }, { "epoch": 8.3, "learning_rate": 0.0006374999999999999, "loss": 2.8189, "step": 2550 }, { "epoch": 8.33, "learning_rate": 0.00064, "loss": 2.8228, "step": 2560 }, { "epoch": 8.37, "learning_rate": 0.0006425, "loss": 2.6978, "step": 2570 }, { "epoch": 8.4, "learning_rate": 0.0006450000000000001, "loss": 2.714, "step": 2580 }, { "epoch": 8.43, "learning_rate": 0.0006475, "loss": 2.6573, "step": 2590 }, { "epoch": 8.46, "learning_rate": 0.0006500000000000001, "loss": 2.7587, "step": 2600 }, { "epoch": 8.46, "eval_bp": 1.0, "eval_counts": [ 26118, 17728, 12867, 9601 ], "eval_loss": 1.5125658512115479, "eval_precisions": [ 53.19239934013564, 38.84482229720847, 30.503532312360722, 24.765270326042096 ], "eval_ref_len": 41379, "eval_runtime": 170.3639, "eval_samples_per_second": 20.327, "eval_score": 35.34630181468264, "eval_steps_per_second": 20.327, "eval_sys_len": 49101, "eval_totals": [ 49101, 45638, 42182, 38768 ], "step": 2600 }, { "epoch": 8.5, "learning_rate": 0.0006525, "loss": 2.75, "step": 2610 }, { "epoch": 8.53, "learning_rate": 0.0006550000000000001, "loss": 2.7836, "step": 2620 }, { "epoch": 8.56, "learning_rate": 0.0006575, "loss": 2.7503, "step": 2630 }, { "epoch": 8.59, "learning_rate": 0.00066, "loss": 2.698, "step": 2640 }, { "epoch": 8.63, "learning_rate": 0.0006625, "loss": 2.7258, "step": 2650 }, { "epoch": 8.66, "learning_rate": 0.000665, "loss": 2.5896, "step": 2660 }, { "epoch": 8.69, "learning_rate": 0.0006675, "loss": 2.6823, "step": 2670 }, { "epoch": 8.72, "learning_rate": 0.00067, "loss": 2.7439, "step": 2680 }, { "epoch": 8.76, "learning_rate": 0.0006725, "loss": 2.7195, "step": 2690 }, { "epoch": 8.79, "learning_rate": 0.000675, "loss": 2.7273, "step": 2700 }, { "epoch": 8.82, "learning_rate": 0.0006775, "loss": 2.7541, "step": 2710 }, { "epoch": 8.86, "learning_rate": 0.00068, "loss": 2.7274, "step": 2720 }, { "epoch": 8.89, "learning_rate": 0.0006825000000000001, "loss": 2.7677, "step": 2730 }, { "epoch": 8.92, "learning_rate": 0.0006850000000000001, "loss": 2.6633, "step": 2740 }, { "epoch": 8.95, "learning_rate": 0.0006875, "loss": 2.7939, "step": 2750 }, { "epoch": 8.99, "learning_rate": 0.00069, "loss": 2.79, "step": 2760 }, { "epoch": 9.02, "learning_rate": 0.0006925, "loss": 2.7182, "step": 2770 }, { "epoch": 9.05, "learning_rate": 0.000695, "loss": 2.6684, "step": 2780 }, { "epoch": 9.08, "learning_rate": 0.0006975, "loss": 2.5166, "step": 2790 }, { "epoch": 9.12, "learning_rate": 0.0007, "loss": 2.6355, "step": 2800 }, { "epoch": 9.12, "eval_bp": 1.0, "eval_counts": [ 27093, 18616, 13629, 10263 ], "eval_loss": 1.4999642372131348, "eval_precisions": [ 64.89341317365269, 48.62224776033641, 39.12893686658436, 32.66598765039149 ], "eval_ref_len": 41379, "eval_runtime": 102.7522, "eval_samples_per_second": 33.702, "eval_score": 44.8133460943785, "eval_steps_per_second": 33.702, "eval_sys_len": 41750, "eval_totals": [ 41750, 38287, 34831, 31418 ], "step": 2800 }, { "epoch": 9.15, "learning_rate": 0.0007025, "loss": 2.5984, "step": 2810 }, { "epoch": 9.18, "learning_rate": 0.000705, "loss": 2.6082, "step": 2820 }, { "epoch": 9.21, "learning_rate": 0.0007075, "loss": 2.5937, "step": 2830 }, { "epoch": 9.25, "learning_rate": 0.00071, "loss": 2.6601, "step": 2840 }, { "epoch": 9.28, "learning_rate": 0.0007125, "loss": 2.7424, "step": 2850 }, { "epoch": 9.31, "learning_rate": 0.000715, "loss": 2.6533, "step": 2860 }, { "epoch": 9.34, "learning_rate": 0.0007175, "loss": 2.6721, "step": 2870 }, { "epoch": 9.38, "learning_rate": 0.0007199999999999999, "loss": 2.6039, "step": 2880 }, { "epoch": 9.41, "learning_rate": 0.0007225, "loss": 2.714, "step": 2890 }, { "epoch": 9.44, "learning_rate": 0.000725, "loss": 2.6207, "step": 2900 }, { "epoch": 9.47, "learning_rate": 0.0007275000000000001, "loss": 2.6425, "step": 2910 }, { "epoch": 9.51, "learning_rate": 0.00073, "loss": 2.6648, "step": 2920 }, { "epoch": 9.54, "learning_rate": 0.0007325000000000001, "loss": 2.66, "step": 2930 }, { "epoch": 9.57, "learning_rate": 0.000735, "loss": 2.6729, "step": 2940 }, { "epoch": 9.6, "learning_rate": 0.0007375000000000001, "loss": 2.6945, "step": 2950 }, { "epoch": 9.64, "learning_rate": 0.00074, "loss": 2.7379, "step": 2960 }, { "epoch": 9.67, "learning_rate": 0.0007425000000000001, "loss": 2.6595, "step": 2970 }, { "epoch": 9.7, "learning_rate": 0.000745, "loss": 2.5923, "step": 2980 }, { "epoch": 9.73, "learning_rate": 0.0007475000000000001, "loss": 2.6219, "step": 2990 }, { "epoch": 9.77, "learning_rate": 0.00075, "loss": 2.6973, "step": 3000 }, { "epoch": 9.77, "eval_bp": 1.0, "eval_counts": [ 26621, 18360, 13471, 10153 ], "eval_loss": 1.4800695180892944, "eval_precisions": [ 62.9993373722075, 47.328126208336556, 38.12151569176783, 31.803658689387294 ], "eval_ref_len": 41379, "eval_runtime": 122.6573, "eval_samples_per_second": 28.233, "eval_score": 43.60393001478689, "eval_steps_per_second": 28.233, "eval_sys_len": 42256, "eval_totals": [ 42256, 38793, 35337, 31924 ], "step": 3000 }, { "epoch": 9.8, "learning_rate": 0.0007524999999999999, "loss": 2.6431, "step": 3010 }, { "epoch": 9.83, "learning_rate": 0.000755, "loss": 2.7345, "step": 3020 }, { "epoch": 9.86, "learning_rate": 0.0007574999999999999, "loss": 2.659, "step": 3030 }, { "epoch": 9.9, "learning_rate": 0.00076, "loss": 2.6094, "step": 3040 }, { "epoch": 9.93, "learning_rate": 0.0007624999999999999, "loss": 2.6922, "step": 3050 }, { "epoch": 9.96, "learning_rate": 0.0007650000000000001, "loss": 2.5544, "step": 3060 }, { "epoch": 9.99, "learning_rate": 0.0007675, "loss": 2.6085, "step": 3070 }, { "epoch": 10.03, "learning_rate": 0.0007700000000000001, "loss": 2.6645, "step": 3080 }, { "epoch": 10.06, "learning_rate": 0.0007725, "loss": 2.5658, "step": 3090 }, { "epoch": 10.09, "learning_rate": 0.0007750000000000001, "loss": 2.471, "step": 3100 }, { "epoch": 10.12, "learning_rate": 0.0007775, "loss": 2.6254, "step": 3110 }, { "epoch": 10.16, "learning_rate": 0.0007800000000000001, "loss": 2.6021, "step": 3120 }, { "epoch": 10.19, "learning_rate": 0.0007825, "loss": 2.5589, "step": 3130 }, { "epoch": 10.22, "learning_rate": 0.000785, "loss": 2.6455, "step": 3140 }, { "epoch": 10.26, "learning_rate": 0.0007875, "loss": 2.5829, "step": 3150 }, { "epoch": 10.29, "learning_rate": 0.00079, "loss": 2.5544, "step": 3160 }, { "epoch": 10.32, "learning_rate": 0.0007925, "loss": 2.6645, "step": 3170 }, { "epoch": 10.35, "learning_rate": 0.000795, "loss": 2.5514, "step": 3180 }, { "epoch": 10.39, "learning_rate": 0.0007975, "loss": 2.6226, "step": 3190 }, { "epoch": 10.42, "learning_rate": 0.0008, "loss": 2.6525, "step": 3200 }, { "epoch": 10.42, "eval_bp": 0.9967563932434949, "eval_counts": [ 27078, 18724, 13712, 10292 ], "eval_loss": 1.4687154293060303, "eval_precisions": [ 65.65159413262214, 49.55799057752369, 39.94639631766008, 33.29235944879343 ], "eval_ref_len": 41379, "eval_runtime": 100.2708, "eval_samples_per_second": 34.536, "eval_score": 45.46051134991977, "eval_steps_per_second": 34.536, "eval_sys_len": 41245, "eval_totals": [ 41245, 37782, 34326, 30914 ], "step": 3200 }, { "epoch": 10.45, "learning_rate": 0.0008025, "loss": 2.4619, "step": 3210 }, { "epoch": 10.48, "learning_rate": 0.000805, "loss": 2.591, "step": 3220 }, { "epoch": 10.52, "learning_rate": 0.0008075000000000001, "loss": 2.5628, "step": 3230 }, { "epoch": 10.55, "learning_rate": 0.0008100000000000001, "loss": 2.5603, "step": 3240 }, { "epoch": 10.58, "learning_rate": 0.0008125000000000001, "loss": 2.5817, "step": 3250 }, { "epoch": 10.61, "learning_rate": 0.000815, "loss": 2.5175, "step": 3260 }, { "epoch": 10.65, "learning_rate": 0.0008175, "loss": 2.703, "step": 3270 }, { "epoch": 10.68, "learning_rate": 0.00082, "loss": 2.5288, "step": 3280 }, { "epoch": 10.71, "learning_rate": 0.0008225, "loss": 2.5492, "step": 3290 }, { "epoch": 10.74, "learning_rate": 0.000825, "loss": 2.5673, "step": 3300 }, { "epoch": 10.78, "learning_rate": 0.0008275, "loss": 2.554, "step": 3310 }, { "epoch": 10.81, "learning_rate": 0.00083, "loss": 2.5119, "step": 3320 }, { "epoch": 10.84, "learning_rate": 0.0008325, "loss": 2.5016, "step": 3330 }, { "epoch": 10.87, "learning_rate": 0.000835, "loss": 2.4994, "step": 3340 }, { "epoch": 10.91, "learning_rate": 0.0008375, "loss": 2.5911, "step": 3350 }, { "epoch": 10.94, "learning_rate": 0.00084, "loss": 2.5726, "step": 3360 }, { "epoch": 10.97, "learning_rate": 0.0008425, "loss": 2.51, "step": 3370 }, { "epoch": 11.0, "learning_rate": 0.0008449999999999999, "loss": 2.5334, "step": 3380 }, { "epoch": 11.04, "learning_rate": 0.0008475000000000001, "loss": 2.4746, "step": 3390 }, { "epoch": 11.07, "learning_rate": 0.00085, "loss": 2.4877, "step": 3400 }, { "epoch": 11.07, "eval_bp": 1.0, "eval_counts": [ 28177, 19707, 14557, 11007 ], "eval_loss": 1.4748998880386353, "eval_precisions": [ 66.65956943458718, 50.782075398768264, 41.14936680235188, 34.40870299165338 ], "eval_ref_len": 41379, "eval_runtime": 101.4694, "eval_samples_per_second": 34.129, "eval_score": 46.7897906835126, "eval_steps_per_second": 34.129, "eval_sys_len": 42270, "eval_totals": [ 42270, 38807, 35376, 31989 ], "step": 3400 }, { "epoch": 11.1, "learning_rate": 0.0008525000000000001, "loss": 2.4992, "step": 3410 }, { "epoch": 11.13, "learning_rate": 0.000855, "loss": 2.4663, "step": 3420 }, { "epoch": 11.17, "learning_rate": 0.0008575000000000001, "loss": 2.4946, "step": 3430 }, { "epoch": 11.2, "learning_rate": 0.00086, "loss": 2.3708, "step": 3440 }, { "epoch": 11.23, "learning_rate": 0.0008625000000000001, "loss": 2.4729, "step": 3450 }, { "epoch": 11.26, "learning_rate": 0.000865, "loss": 2.4967, "step": 3460 }, { "epoch": 11.3, "learning_rate": 0.0008675000000000001, "loss": 2.509, "step": 3470 }, { "epoch": 11.33, "learning_rate": 0.00087, "loss": 2.5677, "step": 3480 }, { "epoch": 11.36, "learning_rate": 0.0008725000000000001, "loss": 2.5343, "step": 3490 }, { "epoch": 11.39, "learning_rate": 0.000875, "loss": 2.5012, "step": 3500 }, { "epoch": 11.43, "learning_rate": 0.0008774999999999999, "loss": 2.4763, "step": 3510 }, { "epoch": 11.46, "learning_rate": 0.00088, "loss": 2.4803, "step": 3520 }, { "epoch": 11.49, "learning_rate": 0.0008824999999999999, "loss": 2.5076, "step": 3530 }, { "epoch": 11.52, "learning_rate": 0.000885, "loss": 2.5069, "step": 3540 }, { "epoch": 11.56, "learning_rate": 0.0008874999999999999, "loss": 2.583, "step": 3550 }, { "epoch": 11.59, "learning_rate": 0.0008900000000000001, "loss": 2.4486, "step": 3560 }, { "epoch": 11.62, "learning_rate": 0.0008925, "loss": 2.4851, "step": 3570 }, { "epoch": 11.65, "learning_rate": 0.0008950000000000001, "loss": 2.4667, "step": 3580 }, { "epoch": 11.69, "learning_rate": 0.0008975, "loss": 2.5137, "step": 3590 }, { "epoch": 11.72, "learning_rate": 0.0009000000000000001, "loss": 2.4139, "step": 3600 }, { "epoch": 11.72, "eval_bp": 1.0, "eval_counts": [ 26811, 18539, 13636, 10289 ], "eval_loss": 1.4529476165771484, "eval_precisions": [ 64.42938504794175, 48.595019659239846, 39.303625987202395, 32.89322250639386 ], "eval_ref_len": 41379, "eval_runtime": 110.5018, "eval_samples_per_second": 31.339, "eval_score": 44.8542601307095, "eval_steps_per_second": 31.339, "eval_sys_len": 41613, "eval_totals": [ 41613, 38150, 34694, 31280 ], "step": 3600 }, { "epoch": 11.75, "learning_rate": 0.0009025, "loss": 2.5072, "step": 3610 }, { "epoch": 11.79, "learning_rate": 0.0009050000000000001, "loss": 2.5271, "step": 3620 }, { "epoch": 11.82, "learning_rate": 0.0009075, "loss": 2.5521, "step": 3630 }, { "epoch": 11.85, "learning_rate": 0.00091, "loss": 2.3746, "step": 3640 }, { "epoch": 11.88, "learning_rate": 0.0009125, "loss": 2.5233, "step": 3650 }, { "epoch": 11.92, "learning_rate": 0.000915, "loss": 2.4914, "step": 3660 }, { "epoch": 11.95, "learning_rate": 0.0009175, "loss": 2.5744, "step": 3670 }, { "epoch": 11.98, "learning_rate": 0.00092, "loss": 2.5242, "step": 3680 }, { "epoch": 12.01, "learning_rate": 0.0009225, "loss": 2.4918, "step": 3690 }, { "epoch": 12.05, "learning_rate": 0.000925, "loss": 2.4723, "step": 3700 }, { "epoch": 12.08, "learning_rate": 0.0009275, "loss": 2.3919, "step": 3710 }, { "epoch": 12.11, "learning_rate": 0.00093, "loss": 2.4419, "step": 3720 }, { "epoch": 12.14, "learning_rate": 0.0009325000000000001, "loss": 2.3503, "step": 3730 }, { "epoch": 12.18, "learning_rate": 0.0009350000000000001, "loss": 2.388, "step": 3740 }, { "epoch": 12.21, "learning_rate": 0.0009375, "loss": 2.4935, "step": 3750 }, { "epoch": 12.24, "learning_rate": 0.00094, "loss": 2.3981, "step": 3760 }, { "epoch": 12.27, "learning_rate": 0.0009425, "loss": 2.3666, "step": 3770 }, { "epoch": 12.31, "learning_rate": 0.000945, "loss": 2.4323, "step": 3780 }, { "epoch": 12.34, "learning_rate": 0.0009475, "loss": 2.4228, "step": 3790 }, { "epoch": 12.37, "learning_rate": 0.00095, "loss": 2.4972, "step": 3800 }, { "epoch": 12.37, "eval_bp": 1.0, "eval_counts": [ 26521, 18238, 13350, 10018 ], "eval_loss": 1.4506553411483765, "eval_precisions": [ 58.50005514503143, 43.556553305311425, 34.72312534137904, 28.567354853427627 ], "eval_ref_len": 41379, "eval_runtime": 144.0478, "eval_samples_per_second": 24.041, "eval_score": 39.87260907407475, "eval_steps_per_second": 24.041, "eval_sys_len": 45335, "eval_totals": [ 45335, 41872, 38447, 35068 ], "step": 3800 }, { "epoch": 12.4, "learning_rate": 0.0009525, "loss": 2.4917, "step": 3810 }, { "epoch": 12.44, "learning_rate": 0.000955, "loss": 2.432, "step": 3820 }, { "epoch": 12.47, "learning_rate": 0.0009575, "loss": 2.4027, "step": 3830 }, { "epoch": 12.5, "learning_rate": 0.00096, "loss": 2.4329, "step": 3840 }, { "epoch": 12.53, "learning_rate": 0.0009625, "loss": 2.4021, "step": 3850 }, { "epoch": 12.57, "learning_rate": 0.000965, "loss": 2.5415, "step": 3860 }, { "epoch": 12.6, "learning_rate": 0.0009675, "loss": 2.3346, "step": 3870 }, { "epoch": 12.63, "learning_rate": 0.0009699999999999999, "loss": 2.4081, "step": 3880 }, { "epoch": 12.66, "learning_rate": 0.0009725000000000001, "loss": 2.419, "step": 3890 }, { "epoch": 12.7, "learning_rate": 0.000975, "loss": 2.4059, "step": 3900 }, { "epoch": 12.73, "learning_rate": 0.0009775, "loss": 2.3385, "step": 3910 }, { "epoch": 12.76, "learning_rate": 0.00098, "loss": 2.4485, "step": 3920 }, { "epoch": 12.79, "learning_rate": 0.0009825, "loss": 2.3762, "step": 3930 }, { "epoch": 12.83, "learning_rate": 0.000985, "loss": 2.4831, "step": 3940 }, { "epoch": 12.86, "learning_rate": 0.0009875, "loss": 2.4976, "step": 3950 }, { "epoch": 12.89, "learning_rate": 0.00099, "loss": 2.4762, "step": 3960 }, { "epoch": 12.92, "learning_rate": 0.0009925000000000001, "loss": 2.4236, "step": 3970 }, { "epoch": 12.96, "learning_rate": 0.000995, "loss": 2.4216, "step": 3980 }, { "epoch": 12.99, "learning_rate": 0.0009975000000000001, "loss": 2.3395, "step": 3990 }, { "epoch": 13.02, "learning_rate": 0.001, "loss": 2.3016, "step": 4000 }, { "epoch": 13.02, "eval_bp": 0.9913107345232208, "eval_counts": [ 28315, 19934, 14780, 11227 ], "eval_loss": 1.4572792053222656, "eval_precisions": [ 69.02562102337826, 53.07524362319612, 43.30881706566648, 36.52244632400781 ], "eval_ref_len": 41379, "eval_runtime": 79.9221, "eval_samples_per_second": 43.33, "eval_score": 48.63729731945582, "eval_steps_per_second": 43.33, "eval_sys_len": 41021, "eval_totals": [ 41021, 37558, 34127, 30740 ], "step": 4000 }, { "epoch": 13.05, "learning_rate": 0.001, "loss": 2.4317, "step": 4010 }, { "epoch": 13.09, "learning_rate": 0.001, "loss": 2.3301, "step": 4020 }, { "epoch": 13.12, "learning_rate": 0.001, "loss": 2.3254, "step": 4030 }, { "epoch": 13.15, "learning_rate": 0.001, "loss": 2.3389, "step": 4040 }, { "epoch": 13.19, "learning_rate": 0.001, "loss": 2.4051, "step": 4050 }, { "epoch": 13.22, "learning_rate": 0.001, "loss": 2.3279, "step": 4060 }, { "epoch": 13.25, "learning_rate": 0.001, "loss": 2.3066, "step": 4070 }, { "epoch": 13.28, "learning_rate": 0.001, "loss": 2.3249, "step": 4080 }, { "epoch": 13.32, "learning_rate": 0.001, "loss": 2.3425, "step": 4090 }, { "epoch": 13.35, "learning_rate": 0.001, "loss": 2.2547, "step": 4100 }, { "epoch": 13.38, "learning_rate": 0.001, "loss": 2.3656, "step": 4110 }, { "epoch": 13.41, "learning_rate": 0.001, "loss": 2.4137, "step": 4120 }, { "epoch": 13.45, "learning_rate": 0.001, "loss": 2.3343, "step": 4130 }, { "epoch": 13.48, "learning_rate": 0.001, "loss": 2.3188, "step": 4140 }, { "epoch": 13.51, "learning_rate": 0.001, "loss": 2.4557, "step": 4150 }, { "epoch": 13.54, "learning_rate": 0.001, "loss": 2.2675, "step": 4160 }, { "epoch": 13.58, "learning_rate": 0.001, "loss": 2.3623, "step": 4170 }, { "epoch": 13.61, "learning_rate": 0.001, "loss": 2.396, "step": 4180 }, { "epoch": 13.64, "learning_rate": 0.001, "loss": 2.3394, "step": 4190 }, { "epoch": 13.67, "learning_rate": 0.001, "loss": 2.385, "step": 4200 }, { "epoch": 13.67, "eval_bp": 0.9989119002923912, "eval_counts": [ 26679, 18540, 13665, 10316 ], "eval_loss": 1.4272891283035278, "eval_precisions": [ 64.54492669473073, 48.955665284782555, 39.701908829425605, 33.265615426784045 ], "eval_ref_len": 41379, "eval_runtime": 99.3005, "eval_samples_per_second": 34.874, "eval_score": 45.14869395785754, "eval_steps_per_second": 34.874, "eval_sys_len": 41334, "eval_totals": [ 41334, 37871, 34419, 31011 ], "step": 4200 }, { "epoch": 13.71, "learning_rate": 0.001, "loss": 2.2847, "step": 4210 }, { "epoch": 13.74, "learning_rate": 0.001, "loss": 2.3304, "step": 4220 }, { "epoch": 13.77, "learning_rate": 0.001, "loss": 2.3681, "step": 4230 }, { "epoch": 13.8, "learning_rate": 0.001, "loss": 2.3145, "step": 4240 }, { "epoch": 13.84, "learning_rate": 0.001, "loss": 2.3572, "step": 4250 }, { "epoch": 13.87, "learning_rate": 0.001, "loss": 2.3935, "step": 4260 }, { "epoch": 13.9, "learning_rate": 0.001, "loss": 2.2827, "step": 4270 }, { "epoch": 13.93, "learning_rate": 0.001, "loss": 2.3749, "step": 4280 }, { "epoch": 13.97, "learning_rate": 0.001, "loss": 2.3992, "step": 4290 }, { "epoch": 14.0, "learning_rate": 0.001, "loss": 2.398, "step": 4300 }, { "epoch": 14.03, "learning_rate": 0.001, "loss": 2.3011, "step": 4310 }, { "epoch": 14.06, "learning_rate": 0.001, "loss": 2.2635, "step": 4320 }, { "epoch": 14.1, "learning_rate": 0.001, "loss": 2.2014, "step": 4330 }, { "epoch": 14.13, "learning_rate": 0.001, "loss": 2.2523, "step": 4340 }, { "epoch": 14.16, "learning_rate": 0.001, "loss": 2.2941, "step": 4350 }, { "epoch": 14.19, "learning_rate": 0.001, "loss": 2.2856, "step": 4360 }, { "epoch": 14.23, "learning_rate": 0.001, "loss": 2.2582, "step": 4370 }, { "epoch": 14.26, "learning_rate": 0.001, "loss": 2.2663, "step": 4380 }, { "epoch": 14.29, "learning_rate": 0.001, "loss": 2.2206, "step": 4390 }, { "epoch": 14.32, "learning_rate": 0.001, "loss": 2.2694, "step": 4400 }, { "epoch": 14.32, "eval_bp": 0.9634105401802233, "eval_counts": [ 27903, 19593, 14525, 11034 ], "eval_loss": 1.4508273601531982, "eval_precisions": [ 69.94635515892911, 53.78407312855143, 44.05119340066115, 37.32873236577692 ], "eval_ref_len": 41379, "eval_runtime": 78.5863, "eval_samples_per_second": 44.066, "eval_score": 48.04700463705004, "eval_steps_per_second": 44.066, "eval_sys_len": 39892, "eval_totals": [ 39892, 36429, 32973, 29559 ], "step": 4400 }, { "epoch": 14.36, "learning_rate": 0.001, "loss": 2.3494, "step": 4410 }, { "epoch": 14.39, "learning_rate": 0.001, "loss": 2.3291, "step": 4420 }, { "epoch": 14.42, "learning_rate": 0.001, "loss": 2.2417, "step": 4430 }, { "epoch": 14.45, "learning_rate": 0.001, "loss": 2.2454, "step": 4440 }, { "epoch": 14.49, "learning_rate": 0.001, "loss": 2.2897, "step": 4450 }, { "epoch": 14.52, "learning_rate": 0.001, "loss": 2.2596, "step": 4460 }, { "epoch": 14.55, "learning_rate": 0.001, "loss": 2.2098, "step": 4470 }, { "epoch": 14.58, "learning_rate": 0.001, "loss": 2.2473, "step": 4480 }, { "epoch": 14.62, "learning_rate": 0.001, "loss": 2.3361, "step": 4490 }, { "epoch": 14.65, "learning_rate": 0.001, "loss": 2.2572, "step": 4500 }, { "epoch": 14.68, "learning_rate": 0.001, "loss": 2.2152, "step": 4510 }, { "epoch": 14.72, "learning_rate": 0.001, "loss": 2.3067, "step": 4520 }, { "epoch": 14.75, "learning_rate": 0.001, "loss": 2.2733, "step": 4530 }, { "epoch": 14.78, "learning_rate": 0.001, "loss": 2.2986, "step": 4540 }, { "epoch": 14.81, "learning_rate": 0.001, "loss": 2.2666, "step": 4550 }, { "epoch": 14.85, "learning_rate": 0.001, "loss": 2.3507, "step": 4560 }, { "epoch": 14.88, "learning_rate": 0.001, "loss": 2.3074, "step": 4570 }, { "epoch": 14.91, "learning_rate": 0.001, "loss": 2.3392, "step": 4580 }, { "epoch": 14.94, "learning_rate": 0.001, "loss": 2.2536, "step": 4590 }, { "epoch": 14.98, "learning_rate": 0.001, "loss": 2.2633, "step": 4600 }, { "epoch": 14.98, "eval_bp": 0.9646873396517708, "eval_counts": [ 27585, 19365, 14361, 10897 ], "eval_loss": 1.4220008850097656, "eval_precisions": [ 69.06091179931403, 53.08388157894737, 43.48523845571537, 36.79802789315503 ], "eval_ref_len": 41379, "eval_runtime": 85.4344, "eval_samples_per_second": 40.534, "eval_score": 47.47630639176475, "eval_steps_per_second": 40.534, "eval_sys_len": 39943, "eval_totals": [ 39943, 36480, 33025, 29613 ], "step": 4600 } ], "max_steps": 76750, "num_train_epochs": 250, "total_flos": 4.434135803203584e+16, "trial_name": null, "trial_params": null }