{ "best_metric": 1.2129933834075928, "best_model_checkpoint": "/scratch/s3545881/dumped/translation/mt5/3086467/checkpoint-2600", "epoch": 2.5157232704402515, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.5e-06, "loss": 38.1911, "step": 10 }, { "epoch": 0.02, "learning_rate": 5e-06, "loss": 37.83, "step": 20 }, { "epoch": 0.03, "learning_rate": 7.5e-06, "loss": 37.6392, "step": 30 }, { "epoch": 0.04, "learning_rate": 1e-05, "loss": 37.1085, "step": 40 }, { "epoch": 0.05, "learning_rate": 1.25e-05, "loss": 34.981, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.5e-05, "loss": 35.0533, "step": 60 }, { "epoch": 0.07, "learning_rate": 1.7500000000000002e-05, "loss": 35.04, "step": 70 }, { "epoch": 0.08, "learning_rate": 2e-05, "loss": 32.9116, "step": 80 }, { "epoch": 0.09, "learning_rate": 2.2499999999999998e-05, "loss": 30.5531, "step": 90 }, { "epoch": 0.1, "learning_rate": 2.5e-05, "loss": 29.1274, "step": 100 }, { "epoch": 0.11, "learning_rate": 2.75e-05, "loss": 26.7246, "step": 110 }, { "epoch": 0.12, "learning_rate": 3e-05, "loss": 25.1373, "step": 120 }, { "epoch": 0.13, "learning_rate": 3.2500000000000004e-05, "loss": 23.2634, "step": 130 }, { "epoch": 0.14, "learning_rate": 3.5000000000000004e-05, "loss": 22.8425, "step": 140 }, { "epoch": 0.15, "learning_rate": 3.75e-05, "loss": 21.5063, "step": 150 }, { "epoch": 0.15, "learning_rate": 4e-05, "loss": 22.2997, "step": 160 }, { "epoch": 0.16, "learning_rate": 4.25e-05, "loss": 21.2809, "step": 170 }, { "epoch": 0.17, "learning_rate": 4.4999999999999996e-05, "loss": 19.5596, "step": 180 }, { "epoch": 0.18, "learning_rate": 4.75e-05, "loss": 18.0032, "step": 190 }, { "epoch": 0.19, "learning_rate": 5e-05, "loss": 17.8914, "step": 200 }, { "epoch": 0.19, "eval_bp": 1.0, "eval_counts": [ 54, 0, 0, 0 ], "eval_loss": 13.871957778930664, "eval_precisions": [ 0.0008811521553960779, 8.163429241109658e-06, 4.084023723930451e-06, 2.0431677206960303e-06 ], "eval_ref_len": 41379, "eval_runtime": 1372.0851, "eval_samples_per_second": 2.524, "eval_score": 1.5652328496305504e-05, "eval_steps_per_second": 2.524, "eval_sys_len": 6128340, "eval_totals": [ 6128340, 6124877, 6121414, 6117951 ], "step": 200 }, { "epoch": 0.2, "learning_rate": 5.25e-05, "loss": 17.8526, "step": 210 }, { "epoch": 0.21, "learning_rate": 5.5e-05, "loss": 16.2109, "step": 220 }, { "epoch": 0.22, "learning_rate": 5.75e-05, "loss": 12.2353, "step": 230 }, { "epoch": 0.23, "learning_rate": 6e-05, "loss": 10.9366, "step": 240 }, { "epoch": 0.24, "learning_rate": 6.25e-05, "loss": 9.2885, "step": 250 }, { "epoch": 0.25, "learning_rate": 6.500000000000001e-05, "loss": 8.6068, "step": 260 }, { "epoch": 0.26, "learning_rate": 6.75e-05, "loss": 7.8635, "step": 270 }, { "epoch": 0.27, "learning_rate": 7.000000000000001e-05, "loss": 7.3931, "step": 280 }, { "epoch": 0.28, "learning_rate": 7.25e-05, "loss": 7.2515, "step": 290 }, { "epoch": 0.29, "learning_rate": 7.5e-05, "loss": 6.5442, "step": 300 }, { "epoch": 0.3, "learning_rate": 7.75e-05, "loss": 5.5183, "step": 310 }, { "epoch": 0.31, "learning_rate": 8e-05, "loss": 4.8676, "step": 320 }, { "epoch": 0.32, "learning_rate": 8.25e-05, "loss": 4.5166, "step": 330 }, { "epoch": 0.33, "learning_rate": 8.5e-05, "loss": 4.2264, "step": 340 }, { "epoch": 0.34, "learning_rate": 8.75e-05, "loss": 3.9749, "step": 350 }, { "epoch": 0.35, "learning_rate": 8.999999999999999e-05, "loss": 3.9146, "step": 360 }, { "epoch": 0.36, "learning_rate": 9.25e-05, "loss": 3.9167, "step": 370 }, { "epoch": 0.37, "learning_rate": 9.5e-05, "loss": 3.5985, "step": 380 }, { "epoch": 0.38, "learning_rate": 9.750000000000001e-05, "loss": 3.6357, "step": 390 }, { "epoch": 0.39, "learning_rate": 0.0001, "loss": 3.5958, "step": 400 }, { "epoch": 0.39, "eval_bp": 1.0, "eval_counts": [ 23327, 14080, 9412, 6561 ], "eval_loss": 1.8158609867095947, "eval_precisions": [ 34.692663483989946, 22.07657813038979, 15.603706958006597, 11.51982301506479 ], "eval_ref_len": 41379, "eval_runtime": 813.3206, "eval_samples_per_second": 4.258, "eval_score": 19.262415591304183, "eval_steps_per_second": 4.258, "eval_sys_len": 67239, "eval_totals": [ 67239, 63778, 60319, 56954 ], "step": 400 }, { "epoch": 0.4, "learning_rate": 0.0001025, "loss": 3.434, "step": 410 }, { "epoch": 0.41, "learning_rate": 0.000105, "loss": 3.4426, "step": 420 }, { "epoch": 0.42, "learning_rate": 0.0001075, "loss": 3.403, "step": 430 }, { "epoch": 0.43, "learning_rate": 0.00011, "loss": 3.2811, "step": 440 }, { "epoch": 0.44, "learning_rate": 0.00011250000000000001, "loss": 3.1628, "step": 450 }, { "epoch": 0.45, "learning_rate": 0.000115, "loss": 3.2809, "step": 460 }, { "epoch": 0.45, "learning_rate": 0.0001175, "loss": 3.2381, "step": 470 }, { "epoch": 0.46, "learning_rate": 0.00012, "loss": 3.124, "step": 480 }, { "epoch": 0.47, "learning_rate": 0.0001225, "loss": 3.0901, "step": 490 }, { "epoch": 0.48, "learning_rate": 0.000125, "loss": 3.081, "step": 500 }, { "epoch": 0.49, "learning_rate": 0.0001275, "loss": 3.007, "step": 510 }, { "epoch": 0.5, "learning_rate": 0.00013000000000000002, "loss": 3.0786, "step": 520 }, { "epoch": 0.51, "learning_rate": 0.00013250000000000002, "loss": 2.846, "step": 530 }, { "epoch": 0.52, "learning_rate": 0.000135, "loss": 3.0639, "step": 540 }, { "epoch": 0.53, "learning_rate": 0.0001375, "loss": 2.9821, "step": 550 }, { "epoch": 0.54, "learning_rate": 0.00014000000000000001, "loss": 2.9451, "step": 560 }, { "epoch": 0.55, "learning_rate": 0.0001425, "loss": 3.0656, "step": 570 }, { "epoch": 0.56, "learning_rate": 0.000145, "loss": 2.9421, "step": 580 }, { "epoch": 0.57, "learning_rate": 0.0001475, "loss": 2.9525, "step": 590 }, { "epoch": 0.58, "learning_rate": 0.00015, "loss": 2.829, "step": 600 }, { "epoch": 0.58, "eval_bp": 1.0, "eval_counts": [ 25430, 16521, 11674, 8627 ], "eval_loss": 1.4924871921539307, "eval_precisions": [ 54.06267273268421, 37.91394148020654, 29.083933331672437, 23.48249768631934 ], "eval_ref_len": 41379, "eval_runtime": 567.7325, "eval_samples_per_second": 6.1, "eval_score": 34.39722990216642, "eval_steps_per_second": 6.1, "eval_sys_len": 47038, "eval_totals": [ 47038, 43575, 40139, 36738 ], "step": 600 }, { "epoch": 0.59, "learning_rate": 0.0001525, "loss": 2.9094, "step": 610 }, { "epoch": 0.6, "learning_rate": 0.000155, "loss": 2.8831, "step": 620 }, { "epoch": 0.61, "learning_rate": 0.0001575, "loss": 2.9047, "step": 630 }, { "epoch": 0.62, "learning_rate": 0.00016, "loss": 2.7907, "step": 640 }, { "epoch": 0.63, "learning_rate": 0.00016250000000000002, "loss": 2.9057, "step": 650 }, { "epoch": 0.64, "learning_rate": 0.000165, "loss": 2.9247, "step": 660 }, { "epoch": 0.65, "learning_rate": 0.0001675, "loss": 2.7817, "step": 670 }, { "epoch": 0.66, "learning_rate": 0.00017, "loss": 2.918, "step": 680 }, { "epoch": 0.67, "learning_rate": 0.0001725, "loss": 2.8107, "step": 690 }, { "epoch": 0.68, "learning_rate": 0.000175, "loss": 2.8891, "step": 700 }, { "epoch": 0.69, "learning_rate": 0.0001775, "loss": 2.8172, "step": 710 }, { "epoch": 0.7, "learning_rate": 0.00017999999999999998, "loss": 2.7916, "step": 720 }, { "epoch": 0.71, "learning_rate": 0.0001825, "loss": 2.8317, "step": 730 }, { "epoch": 0.72, "learning_rate": 0.000185, "loss": 2.8397, "step": 740 }, { "epoch": 0.73, "learning_rate": 0.0001875, "loss": 2.7955, "step": 750 }, { "epoch": 0.74, "learning_rate": 0.00019, "loss": 2.8544, "step": 760 }, { "epoch": 0.75, "learning_rate": 0.00019250000000000002, "loss": 2.6689, "step": 770 }, { "epoch": 0.75, "learning_rate": 0.00019500000000000002, "loss": 2.8188, "step": 780 }, { "epoch": 0.76, "learning_rate": 0.0001975, "loss": 2.8814, "step": 790 }, { "epoch": 0.77, "learning_rate": 0.0002, "loss": 2.8454, "step": 800 }, { "epoch": 0.77, "eval_bp": 1.0, "eval_counts": [ 26308, 17719, 12828, 9567 ], "eval_loss": 1.3925918340682983, "eval_precisions": [ 62.5651026183738, 45.920800290260715, 36.4950213371266, 30.1227959697733 ], "eval_ref_len": 41379, "eval_runtime": 443.6977, "eval_samples_per_second": 7.805, "eval_score": 42.15679907316048, "eval_steps_per_second": 7.805, "eval_sys_len": 42049, "eval_totals": [ 42049, 38586, 35150, 31760 ], "step": 800 }, { "epoch": 0.78, "learning_rate": 0.00020250000000000002, "loss": 2.6834, "step": 810 }, { "epoch": 0.79, "learning_rate": 0.000205, "loss": 2.7196, "step": 820 }, { "epoch": 0.8, "learning_rate": 0.0002075, "loss": 2.7657, "step": 830 }, { "epoch": 0.81, "learning_rate": 0.00021, "loss": 2.7398, "step": 840 }, { "epoch": 0.82, "learning_rate": 0.0002125, "loss": 2.7553, "step": 850 }, { "epoch": 0.83, "learning_rate": 0.000215, "loss": 2.7465, "step": 860 }, { "epoch": 0.84, "learning_rate": 0.0002175, "loss": 2.7388, "step": 870 }, { "epoch": 0.85, "learning_rate": 0.00022, "loss": 2.7649, "step": 880 }, { "epoch": 0.86, "learning_rate": 0.00022250000000000001, "loss": 2.6727, "step": 890 }, { "epoch": 0.87, "learning_rate": 0.00022500000000000002, "loss": 2.7676, "step": 900 }, { "epoch": 0.88, "learning_rate": 0.0002275, "loss": 2.6523, "step": 910 }, { "epoch": 0.89, "learning_rate": 0.00023, "loss": 2.6841, "step": 920 }, { "epoch": 0.9, "learning_rate": 0.0002325, "loss": 2.6218, "step": 930 }, { "epoch": 0.91, "learning_rate": 0.000235, "loss": 2.7321, "step": 940 }, { "epoch": 0.92, "learning_rate": 0.0002375, "loss": 2.6987, "step": 950 }, { "epoch": 0.93, "learning_rate": 0.00024, "loss": 2.6447, "step": 960 }, { "epoch": 0.94, "learning_rate": 0.00024249999999999999, "loss": 2.6362, "step": 970 }, { "epoch": 0.95, "learning_rate": 0.000245, "loss": 2.537, "step": 980 }, { "epoch": 0.96, "learning_rate": 0.0002475, "loss": 2.6371, "step": 990 }, { "epoch": 0.97, "learning_rate": 0.00025, "loss": 2.7243, "step": 1000 }, { "epoch": 0.97, "eval_bp": 0.9837746608874699, "eval_counts": [ 27014, 18474, 13532, 10167 ], "eval_loss": 1.3362044095993042, "eval_precisions": [ 66.35227077346302, 49.59463087248322, 40.0201106083459, 33.41659819227609 ], "eval_ref_len": 41379, "eval_runtime": 372.1502, "eval_samples_per_second": 9.305, "eval_score": 45.058670747228426, "eval_steps_per_second": 9.305, "eval_sys_len": 40713, "eval_totals": [ 40713, 37250, 33813, 30425 ], "step": 1000 }, { "epoch": 0.98, "learning_rate": 0.0002525, "loss": 2.697, "step": 1010 }, { "epoch": 0.99, "learning_rate": 0.000255, "loss": 2.7133, "step": 1020 }, { "epoch": 1.0, "learning_rate": 0.0002575, "loss": 2.7248, "step": 1030 }, { "epoch": 1.01, "learning_rate": 0.00026000000000000003, "loss": 2.6421, "step": 1040 }, { "epoch": 1.02, "learning_rate": 0.00026250000000000004, "loss": 2.5892, "step": 1050 }, { "epoch": 1.03, "learning_rate": 0.00026500000000000004, "loss": 2.4721, "step": 1060 }, { "epoch": 1.04, "learning_rate": 0.0002675, "loss": 2.5665, "step": 1070 }, { "epoch": 1.04, "learning_rate": 0.00027, "loss": 2.5726, "step": 1080 }, { "epoch": 1.05, "learning_rate": 0.0002725, "loss": 2.4981, "step": 1090 }, { "epoch": 1.06, "learning_rate": 0.000275, "loss": 2.521, "step": 1100 }, { "epoch": 1.07, "learning_rate": 0.0002775, "loss": 2.4478, "step": 1110 }, { "epoch": 1.08, "learning_rate": 0.00028000000000000003, "loss": 2.6395, "step": 1120 }, { "epoch": 1.09, "learning_rate": 0.0002825, "loss": 2.5281, "step": 1130 }, { "epoch": 1.1, "learning_rate": 0.000285, "loss": 2.5509, "step": 1140 }, { "epoch": 1.11, "learning_rate": 0.0002875, "loss": 2.6368, "step": 1150 }, { "epoch": 1.12, "learning_rate": 0.00029, "loss": 2.5603, "step": 1160 }, { "epoch": 1.13, "learning_rate": 0.0002925, "loss": 2.5537, "step": 1170 }, { "epoch": 1.14, "learning_rate": 0.000295, "loss": 2.5146, "step": 1180 }, { "epoch": 1.15, "learning_rate": 0.00029749999999999997, "loss": 2.5939, "step": 1190 }, { "epoch": 1.16, "learning_rate": 0.0003, "loss": 2.5142, "step": 1200 }, { "epoch": 1.16, "eval_bp": 0.9513923408635812, "eval_counts": [ 25700, 17411, 12691, 9535 ], "eval_loss": 1.3157392740249634, "eval_precisions": [ 65.20360268933148, 48.42846016911437, 39.05163394670441, 32.77984048404841 ], "eval_ref_len": 41379, "eval_runtime": 391.265, "eval_samples_per_second": 8.851, "eval_score": 42.65934917770265, "eval_steps_per_second": 8.851, "eval_sys_len": 39415, "eval_totals": [ 39415, 35952, 32498, 29088 ], "step": 1200 }, { "epoch": 1.17, "learning_rate": 0.0003025, "loss": 2.4939, "step": 1210 }, { "epoch": 1.18, "learning_rate": 0.000305, "loss": 2.484, "step": 1220 }, { "epoch": 1.19, "learning_rate": 0.0003075, "loss": 2.5238, "step": 1230 }, { "epoch": 1.2, "learning_rate": 0.00031, "loss": 2.5446, "step": 1240 }, { "epoch": 1.21, "learning_rate": 0.0003125, "loss": 2.4818, "step": 1250 }, { "epoch": 1.22, "learning_rate": 0.000315, "loss": 2.6008, "step": 1260 }, { "epoch": 1.23, "learning_rate": 0.0003175, "loss": 2.4716, "step": 1270 }, { "epoch": 1.24, "learning_rate": 0.00032, "loss": 2.5132, "step": 1280 }, { "epoch": 1.25, "learning_rate": 0.00032250000000000003, "loss": 2.3965, "step": 1290 }, { "epoch": 1.26, "learning_rate": 0.00032500000000000004, "loss": 2.4486, "step": 1300 }, { "epoch": 1.27, "learning_rate": 0.00032750000000000005, "loss": 2.5132, "step": 1310 }, { "epoch": 1.28, "learning_rate": 0.00033, "loss": 2.4423, "step": 1320 }, { "epoch": 1.29, "learning_rate": 0.0003325, "loss": 2.4479, "step": 1330 }, { "epoch": 1.3, "learning_rate": 0.000335, "loss": 2.4405, "step": 1340 }, { "epoch": 1.31, "learning_rate": 0.0003375, "loss": 2.3889, "step": 1350 }, { "epoch": 1.32, "learning_rate": 0.00034, "loss": 2.517, "step": 1360 }, { "epoch": 1.33, "learning_rate": 0.00034250000000000003, "loss": 2.4968, "step": 1370 }, { "epoch": 1.34, "learning_rate": 0.000345, "loss": 2.4416, "step": 1380 }, { "epoch": 1.34, "learning_rate": 0.0003475, "loss": 2.5025, "step": 1390 }, { "epoch": 1.35, "learning_rate": 0.00035, "loss": 2.4588, "step": 1400 }, { "epoch": 1.35, "eval_bp": 1.0, "eval_counts": [ 27295, 19037, 14140, 10781 ], "eval_loss": 1.291477918624878, "eval_precisions": [ 64.2794903798601, 48.812820512820515, 39.77944072469476, 33.54700189812366 ], "eval_ref_len": 41379, "eval_runtime": 432.656, "eval_samples_per_second": 8.004, "eval_score": 45.23552952312078, "eval_steps_per_second": 8.004, "eval_sys_len": 42463, "eval_totals": [ 42463, 39000, 35546, 32137 ], "step": 1400 }, { "epoch": 1.36, "learning_rate": 0.0003525, "loss": 2.3642, "step": 1410 }, { "epoch": 1.37, "learning_rate": 0.000355, "loss": 2.4791, "step": 1420 }, { "epoch": 1.38, "learning_rate": 0.0003575, "loss": 2.5208, "step": 1430 }, { "epoch": 1.39, "learning_rate": 0.00035999999999999997, "loss": 2.4694, "step": 1440 }, { "epoch": 1.4, "learning_rate": 0.0003625, "loss": 2.5213, "step": 1450 }, { "epoch": 1.41, "learning_rate": 0.000365, "loss": 2.593, "step": 1460 }, { "epoch": 1.42, "learning_rate": 0.0003675, "loss": 2.4419, "step": 1470 }, { "epoch": 1.43, "learning_rate": 0.00037, "loss": 2.5569, "step": 1480 }, { "epoch": 1.44, "learning_rate": 0.0003725, "loss": 2.4762, "step": 1490 }, { "epoch": 1.45, "learning_rate": 0.000375, "loss": 2.4133, "step": 1500 }, { "epoch": 1.46, "learning_rate": 0.0003775, "loss": 2.4356, "step": 1510 }, { "epoch": 1.47, "learning_rate": 0.00038, "loss": 2.3339, "step": 1520 }, { "epoch": 1.48, "learning_rate": 0.00038250000000000003, "loss": 2.5701, "step": 1530 }, { "epoch": 1.49, "learning_rate": 0.00038500000000000003, "loss": 2.4678, "step": 1540 }, { "epoch": 1.5, "learning_rate": 0.00038750000000000004, "loss": 2.4838, "step": 1550 }, { "epoch": 1.51, "learning_rate": 0.00039000000000000005, "loss": 2.4115, "step": 1560 }, { "epoch": 1.52, "learning_rate": 0.0003925, "loss": 2.4298, "step": 1570 }, { "epoch": 1.53, "learning_rate": 0.000395, "loss": 2.4766, "step": 1580 }, { "epoch": 1.54, "learning_rate": 0.0003975, "loss": 2.4569, "step": 1590 }, { "epoch": 1.55, "learning_rate": 0.0004, "loss": 2.4723, "step": 1600 }, { "epoch": 1.55, "eval_bp": 1.0, "eval_counts": [ 27979, 19648, 14655, 11210 ], "eval_loss": 1.2627893686294556, "eval_precisions": [ 66.49792038027333, 50.88573500466176, 41.684444065193276, 35.31042303209752 ], "eval_ref_len": 41379, "eval_runtime": 423.0403, "eval_samples_per_second": 8.186, "eval_score": 47.24112838739144, "eval_steps_per_second": 8.186, "eval_sys_len": 42075, "eval_totals": [ 42075, 38612, 35157, 31747 ], "step": 1600 }, { "epoch": 1.56, "learning_rate": 0.0004025, "loss": 2.4175, "step": 1610 }, { "epoch": 1.57, "learning_rate": 0.00040500000000000003, "loss": 2.4488, "step": 1620 }, { "epoch": 1.58, "learning_rate": 0.0004075, "loss": 2.3282, "step": 1630 }, { "epoch": 1.59, "learning_rate": 0.00041, "loss": 2.438, "step": 1640 }, { "epoch": 1.6, "learning_rate": 0.0004125, "loss": 2.4686, "step": 1650 }, { "epoch": 1.61, "learning_rate": 0.000415, "loss": 2.5199, "step": 1660 }, { "epoch": 1.62, "learning_rate": 0.0004175, "loss": 2.403, "step": 1670 }, { "epoch": 1.63, "learning_rate": 0.00042, "loss": 2.4685, "step": 1680 }, { "epoch": 1.64, "learning_rate": 0.00042249999999999997, "loss": 2.481, "step": 1690 }, { "epoch": 1.64, "learning_rate": 0.000425, "loss": 2.508, "step": 1700 }, { "epoch": 1.65, "learning_rate": 0.0004275, "loss": 2.3564, "step": 1710 }, { "epoch": 1.66, "learning_rate": 0.00043, "loss": 2.4453, "step": 1720 }, { "epoch": 1.67, "learning_rate": 0.0004325, "loss": 2.3718, "step": 1730 }, { "epoch": 1.68, "learning_rate": 0.000435, "loss": 2.4693, "step": 1740 }, { "epoch": 1.69, "learning_rate": 0.0004375, "loss": 2.3191, "step": 1750 }, { "epoch": 1.7, "learning_rate": 0.00044, "loss": 2.462, "step": 1760 }, { "epoch": 1.71, "learning_rate": 0.0004425, "loss": 2.4048, "step": 1770 }, { "epoch": 1.72, "learning_rate": 0.00044500000000000003, "loss": 2.4254, "step": 1780 }, { "epoch": 1.73, "learning_rate": 0.00044750000000000004, "loss": 2.4188, "step": 1790 }, { "epoch": 1.74, "learning_rate": 0.00045000000000000004, "loss": 2.3556, "step": 1800 }, { "epoch": 1.74, "eval_bp": 1.0, "eval_counts": [ 26533, 18244, 13379, 10091 ], "eval_loss": 1.2693731784820557, "eval_precisions": [ 59.99412110523222, 44.756274072075165, 35.86094135306101, 29.769596129450985 ], "eval_ref_len": 41379, "eval_runtime": 498.5618, "eval_samples_per_second": 6.946, "eval_score": 41.147107587517766, "eval_steps_per_second": 6.946, "eval_sys_len": 44226, "eval_totals": [ 44226, 40763, 37308, 33897 ], "step": 1800 }, { "epoch": 1.75, "learning_rate": 0.00045250000000000005, "loss": 2.4454, "step": 1810 }, { "epoch": 1.76, "learning_rate": 0.000455, "loss": 2.5073, "step": 1820 }, { "epoch": 1.77, "learning_rate": 0.0004575, "loss": 2.3686, "step": 1830 }, { "epoch": 1.78, "learning_rate": 0.00046, "loss": 2.328, "step": 1840 }, { "epoch": 1.79, "learning_rate": 0.0004625, "loss": 2.3687, "step": 1850 }, { "epoch": 1.8, "learning_rate": 0.000465, "loss": 2.4619, "step": 1860 }, { "epoch": 1.81, "learning_rate": 0.00046750000000000003, "loss": 2.4447, "step": 1870 }, { "epoch": 1.82, "learning_rate": 0.00047, "loss": 2.2559, "step": 1880 }, { "epoch": 1.83, "learning_rate": 0.0004725, "loss": 2.525, "step": 1890 }, { "epoch": 1.84, "learning_rate": 0.000475, "loss": 2.2474, "step": 1900 }, { "epoch": 1.85, "learning_rate": 0.0004775, "loss": 2.3437, "step": 1910 }, { "epoch": 1.86, "learning_rate": 0.00048, "loss": 2.3925, "step": 1920 }, { "epoch": 1.87, "learning_rate": 0.0004825, "loss": 2.376, "step": 1930 }, { "epoch": 1.88, "learning_rate": 0.00048499999999999997, "loss": 2.4907, "step": 1940 }, { "epoch": 1.89, "learning_rate": 0.0004875, "loss": 2.3756, "step": 1950 }, { "epoch": 1.9, "learning_rate": 0.00049, "loss": 2.3658, "step": 1960 }, { "epoch": 1.91, "learning_rate": 0.0004925, "loss": 2.3665, "step": 1970 }, { "epoch": 1.92, "learning_rate": 0.000495, "loss": 2.4109, "step": 1980 }, { "epoch": 1.93, "learning_rate": 0.0004975, "loss": 2.4669, "step": 1990 }, { "epoch": 1.94, "learning_rate": 0.0005, "loss": 2.2996, "step": 2000 }, { "epoch": 1.94, "eval_bp": 0.9865215426026213, "eval_counts": [ 28224, 20004, 15005, 11483 ], "eval_loss": 1.2304675579071045, "eval_precisions": [ 69.1341090018371, 53.54103099405813, 44.25600943813597, 37.66029320127251 ], "eval_ref_len": 41379, "eval_runtime": 351.3926, "eval_samples_per_second": 9.855, "eval_score": 49.166051902310336, "eval_steps_per_second": 9.855, "eval_sys_len": 40825, "eval_totals": [ 40825, 37362, 33905, 30491 ], "step": 2000 }, { "epoch": 1.94, "learning_rate": 0.0005024999999999999, "loss": 2.4546, "step": 2010 }, { "epoch": 1.95, "learning_rate": 0.000505, "loss": 2.2653, "step": 2020 }, { "epoch": 1.96, "learning_rate": 0.0005074999999999999, "loss": 2.5073, "step": 2030 }, { "epoch": 1.97, "learning_rate": 0.00051, "loss": 2.363, "step": 2040 }, { "epoch": 1.98, "learning_rate": 0.0005124999999999999, "loss": 2.4462, "step": 2050 }, { "epoch": 1.99, "learning_rate": 0.000515, "loss": 2.2254, "step": 2060 }, { "epoch": 2.0, "learning_rate": 0.0005175, "loss": 2.3066, "step": 2070 }, { "epoch": 2.01, "learning_rate": 0.0005200000000000001, "loss": 2.1796, "step": 2080 }, { "epoch": 2.02, "learning_rate": 0.0005225, "loss": 2.2277, "step": 2090 }, { "epoch": 2.03, "learning_rate": 0.0005250000000000001, "loss": 2.1904, "step": 2100 }, { "epoch": 2.04, "learning_rate": 0.0005275, "loss": 2.1982, "step": 2110 }, { "epoch": 2.05, "learning_rate": 0.0005300000000000001, "loss": 2.2417, "step": 2120 }, { "epoch": 2.06, "learning_rate": 0.0005325, "loss": 2.1695, "step": 2130 }, { "epoch": 2.07, "learning_rate": 0.000535, "loss": 2.0767, "step": 2140 }, { "epoch": 2.08, "learning_rate": 0.0005375, "loss": 2.2385, "step": 2150 }, { "epoch": 2.09, "learning_rate": 0.00054, "loss": 2.1445, "step": 2160 }, { "epoch": 2.1, "learning_rate": 0.0005425, "loss": 2.1167, "step": 2170 }, { "epoch": 2.11, "learning_rate": 0.000545, "loss": 2.1323, "step": 2180 }, { "epoch": 2.12, "learning_rate": 0.0005475, "loss": 2.2441, "step": 2190 }, { "epoch": 2.13, "learning_rate": 0.00055, "loss": 2.1594, "step": 2200 }, { "epoch": 2.13, "eval_bp": 0.9408488346847412, "eval_counts": [ 28655, 20486, 15454, 11874 ], "eval_loss": 1.2293421030044556, "eval_precisions": [ 73.47247506474193, 57.64533738533401, 48.16280736746969, 41.40023011749939 ], "eval_ref_len": 41379, "eval_runtime": 294.8562, "eval_samples_per_second": 11.745, "eval_score": 50.719002358526716, "eval_steps_per_second": 11.745, "eval_sys_len": 39001, "eval_totals": [ 39001, 35538, 32087, 28681 ], "step": 2200 }, { "epoch": 2.14, "learning_rate": 0.0005525, "loss": 2.112, "step": 2210 }, { "epoch": 2.15, "learning_rate": 0.000555, "loss": 2.0766, "step": 2220 }, { "epoch": 2.16, "learning_rate": 0.0005575, "loss": 2.1893, "step": 2230 }, { "epoch": 2.17, "learning_rate": 0.0005600000000000001, "loss": 2.3039, "step": 2240 }, { "epoch": 2.18, "learning_rate": 0.0005625000000000001, "loss": 2.2214, "step": 2250 }, { "epoch": 2.19, "learning_rate": 0.000565, "loss": 2.1937, "step": 2260 }, { "epoch": 2.2, "learning_rate": 0.0005675, "loss": 2.158, "step": 2270 }, { "epoch": 2.21, "learning_rate": 0.00057, "loss": 2.1039, "step": 2280 }, { "epoch": 2.22, "learning_rate": 0.0005725, "loss": 2.1506, "step": 2290 }, { "epoch": 2.23, "learning_rate": 0.000575, "loss": 2.2367, "step": 2300 }, { "epoch": 2.24, "learning_rate": 0.0005775, "loss": 2.1951, "step": 2310 }, { "epoch": 2.24, "learning_rate": 0.00058, "loss": 2.1446, "step": 2320 }, { "epoch": 2.25, "learning_rate": 0.0005825, "loss": 2.1444, "step": 2330 }, { "epoch": 2.26, "learning_rate": 0.000585, "loss": 2.201, "step": 2340 }, { "epoch": 2.27, "learning_rate": 0.0005875, "loss": 2.139, "step": 2350 }, { "epoch": 2.28, "learning_rate": 0.00059, "loss": 2.221, "step": 2360 }, { "epoch": 2.29, "learning_rate": 0.0005925, "loss": 2.1895, "step": 2370 }, { "epoch": 2.3, "learning_rate": 0.0005949999999999999, "loss": 2.1868, "step": 2380 }, { "epoch": 2.31, "learning_rate": 0.0005975, "loss": 2.3043, "step": 2390 }, { "epoch": 2.32, "learning_rate": 0.0006, "loss": 2.1554, "step": 2400 }, { "epoch": 2.32, "eval_bp": 0.9914082371498613, "eval_counts": [ 26699, 18555, 13744, 10428 ], "eval_loss": 1.218345284461975, "eval_precisions": [ 65.07982937233395, 49.398328097545395, 40.28372120288411, 33.946417526612194 ], "eval_ref_len": 41379, "eval_runtime": 418.5547, "eval_samples_per_second": 8.274, "eval_score": 45.396573040490786, "eval_steps_per_second": 8.274, "eval_sys_len": 41025, "eval_totals": [ 41025, 37562, 34118, 30719 ], "step": 2400 }, { "epoch": 2.33, "learning_rate": 0.0006025000000000001, "loss": 2.2707, "step": 2410 }, { "epoch": 2.34, "learning_rate": 0.000605, "loss": 2.1905, "step": 2420 }, { "epoch": 2.35, "learning_rate": 0.0006075000000000001, "loss": 2.1385, "step": 2430 }, { "epoch": 2.36, "learning_rate": 0.00061, "loss": 2.1184, "step": 2440 }, { "epoch": 2.37, "learning_rate": 0.0006125000000000001, "loss": 2.1212, "step": 2450 }, { "epoch": 2.38, "learning_rate": 0.000615, "loss": 2.0652, "step": 2460 }, { "epoch": 2.39, "learning_rate": 0.0006175000000000001, "loss": 2.1779, "step": 2470 }, { "epoch": 2.4, "learning_rate": 0.00062, "loss": 2.2506, "step": 2480 }, { "epoch": 2.41, "learning_rate": 0.0006225000000000001, "loss": 2.2224, "step": 2490 }, { "epoch": 2.42, "learning_rate": 0.000625, "loss": 2.1451, "step": 2500 }, { "epoch": 2.43, "learning_rate": 0.0006274999999999999, "loss": 2.2586, "step": 2510 }, { "epoch": 2.44, "learning_rate": 0.00063, "loss": 2.2808, "step": 2520 }, { "epoch": 2.45, "learning_rate": 0.0006324999999999999, "loss": 2.1829, "step": 2530 }, { "epoch": 2.46, "learning_rate": 0.000635, "loss": 2.0542, "step": 2540 }, { "epoch": 2.47, "learning_rate": 0.0006374999999999999, "loss": 2.1116, "step": 2550 }, { "epoch": 2.48, "learning_rate": 0.00064, "loss": 2.1203, "step": 2560 }, { "epoch": 2.49, "learning_rate": 0.0006425, "loss": 2.1802, "step": 2570 }, { "epoch": 2.5, "learning_rate": 0.0006450000000000001, "loss": 2.2629, "step": 2580 }, { "epoch": 2.51, "learning_rate": 0.0006475, "loss": 2.2309, "step": 2590 }, { "epoch": 2.52, "learning_rate": 0.0006500000000000001, "loss": 2.1065, "step": 2600 }, { "epoch": 2.52, "eval_bp": 0.9958589183497218, "eval_counts": [ 28856, 20658, 15562, 11975 ], "eval_loss": 1.2129933834075928, "eval_precisions": [ 70.02523781789944, 54.730427871241226, 45.38746463674279, 38.78416893379971 ], "eval_ref_len": 41379, "eval_runtime": 360.2161, "eval_samples_per_second": 9.614, "eval_score": 50.753533004552594, "eval_steps_per_second": 9.614, "eval_sys_len": 41208, "eval_totals": [ 41208, 37745, 34287, 30876 ], "step": 2600 } ], "max_steps": 258250, "num_train_epochs": 250, "total_flos": 3.96768438500352e+16, "trial_name": null, "trial_params": null }