ben81828's picture
Training in progress, step 1600
896b922 verified
raw
history blame
86.4 kB
{"current_steps": 5, "total_steps": 3400, "loss": 2.8889, "lr": 2.9411764705882355e-06, "epoch": 0.0012876641771825909, "percentage": 0.15, "elapsed_time": "0:02:46", "remaining_time": "1 day, 7:20:36", "throughput": 317.97, "total_tokens": 52840}
{"current_steps": 10, "total_steps": 3400, "loss": 2.8165, "lr": 5.882352941176471e-06, "epoch": 0.0025753283543651817, "percentage": 0.29, "elapsed_time": "0:04:14", "remaining_time": "23:57:41", "throughput": 414.71, "total_tokens": 105528}
{"current_steps": 15, "total_steps": 3400, "loss": 2.8363, "lr": 8.823529411764707e-06, "epoch": 0.0038629925315477724, "percentage": 0.44, "elapsed_time": "0:05:42", "remaining_time": "21:27:36", "throughput": 463.76, "total_tokens": 158768}
{"current_steps": 20, "total_steps": 3400, "loss": 2.6853, "lr": 1.1764705882352942e-05, "epoch": 0.0051506567087303634, "percentage": 0.59, "elapsed_time": "0:07:11", "remaining_time": "20:14:07", "throughput": 489.08, "total_tokens": 210816}
{"current_steps": 25, "total_steps": 3400, "loss": 2.2992, "lr": 1.4705882352941177e-05, "epoch": 0.006438320885912954, "percentage": 0.74, "elapsed_time": "0:08:38", "remaining_time": "19:26:57", "throughput": 506.96, "total_tokens": 262936}
{"current_steps": 30, "total_steps": 3400, "loss": 1.8923, "lr": 1.7647058823529414e-05, "epoch": 0.007725985063095545, "percentage": 0.88, "elapsed_time": "0:10:08", "remaining_time": "18:58:31", "throughput": 518.43, "total_tokens": 315264}
{"current_steps": 35, "total_steps": 3400, "loss": 1.6984, "lr": 2.058823529411765e-05, "epoch": 0.009013649240278136, "percentage": 1.03, "elapsed_time": "0:11:36", "remaining_time": "18:36:02", "throughput": 528.14, "total_tokens": 367840}
{"current_steps": 40, "total_steps": 3400, "loss": 1.6434, "lr": 2.3529411764705884e-05, "epoch": 0.010301313417460727, "percentage": 1.18, "elapsed_time": "0:13:06", "remaining_time": "18:20:53", "throughput": 534.26, "total_tokens": 420112}
{"current_steps": 45, "total_steps": 3400, "loss": 1.4659, "lr": 2.647058823529412e-05, "epoch": 0.011588977594643318, "percentage": 1.32, "elapsed_time": "0:14:34", "remaining_time": "18:06:24", "throughput": 540.69, "total_tokens": 472728}
{"current_steps": 50, "total_steps": 3400, "loss": 1.3506, "lr": 2.9411764705882354e-05, "epoch": 0.012876641771825908, "percentage": 1.47, "elapsed_time": "0:16:03", "remaining_time": "17:56:24", "throughput": 544.27, "total_tokens": 524648}
{"current_steps": 50, "total_steps": 3400, "eval_loss": 1.1727452278137207, "epoch": 0.012876641771825908, "percentage": 1.47, "elapsed_time": "0:17:10", "remaining_time": "19:10:27", "throughput": 509.24, "total_tokens": 524648}
{"current_steps": 55, "total_steps": 3400, "loss": 1.1455, "lr": 3.235294117647059e-05, "epoch": 0.014164305949008499, "percentage": 1.62, "elapsed_time": "0:18:46", "remaining_time": "19:01:52", "throughput": 511.73, "total_tokens": 576472}
{"current_steps": 60, "total_steps": 3400, "loss": 0.9971, "lr": 3.529411764705883e-05, "epoch": 0.01545197012619109, "percentage": 1.76, "elapsed_time": "0:20:15", "remaining_time": "18:48:02", "throughput": 516.56, "total_tokens": 628056}
{"current_steps": 65, "total_steps": 3400, "loss": 0.9073, "lr": 3.8235294117647055e-05, "epoch": 0.01673963430337368, "percentage": 1.91, "elapsed_time": "0:21:44", "remaining_time": "18:35:51", "throughput": 521.45, "total_tokens": 680448}
{"current_steps": 70, "total_steps": 3400, "loss": 0.8386, "lr": 4.11764705882353e-05, "epoch": 0.018027298480556272, "percentage": 2.06, "elapsed_time": "0:23:19", "remaining_time": "18:29:19", "throughput": 524.37, "total_tokens": 733664}
{"current_steps": 75, "total_steps": 3400, "loss": 0.7827, "lr": 4.411764705882353e-05, "epoch": 0.01931496265773886, "percentage": 2.21, "elapsed_time": "0:24:51", "remaining_time": "18:22:18", "throughput": 526.93, "total_tokens": 786096}
{"current_steps": 80, "total_steps": 3400, "loss": 0.7814, "lr": 4.705882352941177e-05, "epoch": 0.020602626834921454, "percentage": 2.35, "elapsed_time": "0:26:25", "remaining_time": "18:16:38", "throughput": 528.66, "total_tokens": 838192}
{"current_steps": 85, "total_steps": 3400, "loss": 0.7297, "lr": 5e-05, "epoch": 0.021890291012104043, "percentage": 2.5, "elapsed_time": "0:27:54", "remaining_time": "18:08:20", "throughput": 531.61, "total_tokens": 890112}
{"current_steps": 90, "total_steps": 3400, "loss": 0.7894, "lr": 5.294117647058824e-05, "epoch": 0.023177955189286635, "percentage": 2.65, "elapsed_time": "0:29:24", "remaining_time": "18:01:48", "throughput": 534.58, "total_tokens": 943472}
{"current_steps": 95, "total_steps": 3400, "loss": 0.7758, "lr": 5.588235294117647e-05, "epoch": 0.024465619366469224, "percentage": 2.79, "elapsed_time": "0:30:52", "remaining_time": "17:54:11", "throughput": 538.09, "total_tokens": 996872}
{"current_steps": 100, "total_steps": 3400, "loss": 0.7577, "lr": 5.882352941176471e-05, "epoch": 0.025753283543651816, "percentage": 2.94, "elapsed_time": "0:32:21", "remaining_time": "17:47:53", "throughput": 540.69, "total_tokens": 1049816}
{"current_steps": 100, "total_steps": 3400, "eval_loss": 0.7517351508140564, "epoch": 0.025753283543651816, "percentage": 2.94, "elapsed_time": "0:33:00", "remaining_time": "18:09:13", "throughput": 530.1, "total_tokens": 1049816}
{"current_steps": 105, "total_steps": 3400, "loss": 0.7579, "lr": 6.176470588235295e-05, "epoch": 0.027040947720834405, "percentage": 3.09, "elapsed_time": "0:34:36", "remaining_time": "18:05:49", "throughput": 531.09, "total_tokens": 1102584}
{"current_steps": 110, "total_steps": 3400, "loss": 0.7659, "lr": 6.470588235294118e-05, "epoch": 0.028328611898016998, "percentage": 3.24, "elapsed_time": "0:36:03", "remaining_time": "17:58:31", "throughput": 534.07, "total_tokens": 1155512}
{"current_steps": 115, "total_steps": 3400, "loss": 0.7469, "lr": 6.764705882352942e-05, "epoch": 0.029616276075199587, "percentage": 3.38, "elapsed_time": "0:37:32", "remaining_time": "17:52:22", "throughput": 536.29, "total_tokens": 1207976}
{"current_steps": 120, "total_steps": 3400, "loss": 0.7353, "lr": 7.058823529411765e-05, "epoch": 0.03090394025238218, "percentage": 3.53, "elapsed_time": "0:39:00", "remaining_time": "17:46:11", "throughput": 538.27, "total_tokens": 1259776}
{"current_steps": 125, "total_steps": 3400, "loss": 0.7537, "lr": 7.352941176470589e-05, "epoch": 0.03219160442956477, "percentage": 3.68, "elapsed_time": "0:40:29", "remaining_time": "17:40:51", "throughput": 540.35, "total_tokens": 1312760}
{"current_steps": 130, "total_steps": 3400, "loss": 0.7669, "lr": 7.647058823529411e-05, "epoch": 0.03347926860674736, "percentage": 3.82, "elapsed_time": "0:41:57", "remaining_time": "17:35:14", "throughput": 542.54, "total_tokens": 1365616}
{"current_steps": 135, "total_steps": 3400, "loss": 0.722, "lr": 7.941176470588235e-05, "epoch": 0.03476693278392995, "percentage": 3.97, "elapsed_time": "0:43:26", "remaining_time": "17:30:31", "throughput": 543.91, "total_tokens": 1417544}
{"current_steps": 140, "total_steps": 3400, "loss": 0.7502, "lr": 8.23529411764706e-05, "epoch": 0.036054596961112545, "percentage": 4.12, "elapsed_time": "0:44:52", "remaining_time": "17:24:54", "throughput": 545.93, "total_tokens": 1469856}
{"current_steps": 145, "total_steps": 3400, "loss": 0.7174, "lr": 8.529411764705883e-05, "epoch": 0.037342261138295134, "percentage": 4.26, "elapsed_time": "0:46:20", "remaining_time": "17:20:11", "throughput": 547.25, "total_tokens": 1521496}
{"current_steps": 150, "total_steps": 3400, "loss": 0.7018, "lr": 8.823529411764706e-05, "epoch": 0.03862992531547772, "percentage": 4.41, "elapsed_time": "0:47:46", "remaining_time": "17:15:14", "throughput": 548.82, "total_tokens": 1573376}
{"current_steps": 150, "total_steps": 3400, "eval_loss": 0.7309949994087219, "epoch": 0.03862992531547772, "percentage": 4.41, "elapsed_time": "0:48:25", "remaining_time": "17:29:02", "throughput": 541.61, "total_tokens": 1573376}
{"current_steps": 155, "total_steps": 3400, "loss": 0.738, "lr": 9.11764705882353e-05, "epoch": 0.03991758949266031, "percentage": 4.56, "elapsed_time": "0:49:58", "remaining_time": "17:26:05", "throughput": 542.4, "total_tokens": 1626136}
{"current_steps": 160, "total_steps": 3400, "loss": 0.7579, "lr": 9.411764705882353e-05, "epoch": 0.04120525366984291, "percentage": 4.71, "elapsed_time": "0:51:24", "remaining_time": "17:20:51", "throughput": 544.34, "total_tokens": 1678760}
{"current_steps": 165, "total_steps": 3400, "loss": 0.7502, "lr": 9.705882352941177e-05, "epoch": 0.042492917847025496, "percentage": 4.85, "elapsed_time": "0:52:51", "remaining_time": "17:16:21", "throughput": 545.87, "total_tokens": 1731240}
{"current_steps": 170, "total_steps": 3400, "loss": 0.7448, "lr": 0.0001, "epoch": 0.043780582024208085, "percentage": 5.0, "elapsed_time": "0:54:17", "remaining_time": "17:11:35", "throughput": 547.57, "total_tokens": 1783816}
{"current_steps": 175, "total_steps": 3400, "loss": 0.6648, "lr": 9.999940874631277e-05, "epoch": 0.045068246201390674, "percentage": 5.15, "elapsed_time": "0:55:45", "remaining_time": "17:07:24", "throughput": 548.45, "total_tokens": 1834592}
{"current_steps": 180, "total_steps": 3400, "loss": 0.7759, "lr": 9.999763499923432e-05, "epoch": 0.04635591037857327, "percentage": 5.29, "elapsed_time": "0:57:11", "remaining_time": "17:02:59", "throughput": 550.3, "total_tokens": 1888176}
{"current_steps": 185, "total_steps": 3400, "loss": 0.7167, "lr": 9.999467880071402e-05, "epoch": 0.04764357455575586, "percentage": 5.44, "elapsed_time": "0:58:39", "remaining_time": "16:59:16", "throughput": 551.35, "total_tokens": 1940280}
{"current_steps": 190, "total_steps": 3400, "loss": 0.7483, "lr": 9.999054022066641e-05, "epoch": 0.04893123873293845, "percentage": 5.59, "elapsed_time": "1:00:04", "remaining_time": "16:55:04", "throughput": 552.88, "total_tokens": 1993096}
{"current_steps": 195, "total_steps": 3400, "loss": 0.7464, "lr": 9.998521935696953e-05, "epoch": 0.050218902910121044, "percentage": 5.74, "elapsed_time": "1:01:31", "remaining_time": "16:51:14", "throughput": 554.14, "total_tokens": 2045648}
{"current_steps": 200, "total_steps": 3400, "loss": 0.7594, "lr": 9.997871633546257e-05, "epoch": 0.05150656708730363, "percentage": 5.88, "elapsed_time": "1:02:57", "remaining_time": "16:47:25", "throughput": 555.61, "total_tokens": 2099008}
{"current_steps": 200, "total_steps": 3400, "eval_loss": 0.7274295687675476, "epoch": 0.05150656708730363, "percentage": 5.88, "elapsed_time": "1:03:35", "remaining_time": "16:57:35", "throughput": 550.06, "total_tokens": 2099008}
{"current_steps": 205, "total_steps": 3400, "loss": 0.706, "lr": 9.997103130994296e-05, "epoch": 0.05279423126448622, "percentage": 6.03, "elapsed_time": "1:05:08", "remaining_time": "16:55:12", "throughput": 550.54, "total_tokens": 2151680}
{"current_steps": 210, "total_steps": 3400, "loss": 0.7186, "lr": 9.996216446216267e-05, "epoch": 0.05408189544166881, "percentage": 6.18, "elapsed_time": "1:06:34", "remaining_time": "16:51:24", "throughput": 551.65, "total_tokens": 2203784}
{"current_steps": 215, "total_steps": 3400, "loss": 0.7009, "lr": 9.995211600182397e-05, "epoch": 0.055369559618851406, "percentage": 6.32, "elapsed_time": "1:08:00", "remaining_time": "16:47:34", "throughput": 552.73, "total_tokens": 2255632}
{"current_steps": 220, "total_steps": 3400, "loss": 0.6801, "lr": 9.994088616657444e-05, "epoch": 0.056657223796033995, "percentage": 6.47, "elapsed_time": "1:09:28", "remaining_time": "16:44:15", "throughput": 553.68, "total_tokens": 2308096}
{"current_steps": 225, "total_steps": 3400, "loss": 0.7569, "lr": 9.992847522200133e-05, "epoch": 0.057944887973216584, "percentage": 6.62, "elapsed_time": "1:10:55", "remaining_time": "16:40:46", "throughput": 554.88, "total_tokens": 2361168}
{"current_steps": 230, "total_steps": 3400, "loss": 0.7402, "lr": 9.99148834616253e-05, "epoch": 0.05923255215039917, "percentage": 6.76, "elapsed_time": "1:12:23", "remaining_time": "16:37:48", "throughput": 555.71, "total_tokens": 2413896}
{"current_steps": 235, "total_steps": 3400, "loss": 0.7191, "lr": 9.990011120689351e-05, "epoch": 0.06052021632758177, "percentage": 6.91, "elapsed_time": "1:13:51", "remaining_time": "16:34:47", "throughput": 556.47, "total_tokens": 2466136}
{"current_steps": 240, "total_steps": 3400, "loss": 0.7274, "lr": 9.988415880717194e-05, "epoch": 0.06180788050476436, "percentage": 7.06, "elapsed_time": "1:15:20", "remaining_time": "16:31:59", "throughput": 557.21, "total_tokens": 2518848}
{"current_steps": 245, "total_steps": 3400, "loss": 0.7704, "lr": 9.986702663973722e-05, "epoch": 0.06309554468194695, "percentage": 7.21, "elapsed_time": "1:16:48", "remaining_time": "16:29:01", "throughput": 558.22, "total_tokens": 2572384}
{"current_steps": 250, "total_steps": 3400, "loss": 0.7346, "lr": 9.98487151097676e-05, "epoch": 0.06438320885912954, "percentage": 7.35, "elapsed_time": "1:18:17", "remaining_time": "16:26:29", "throughput": 558.88, "total_tokens": 2625352}
{"current_steps": 250, "total_steps": 3400, "eval_loss": 0.7181503176689148, "epoch": 0.06438320885912954, "percentage": 7.35, "elapsed_time": "1:18:55", "remaining_time": "16:34:29", "throughput": 554.38, "total_tokens": 2625352}
{"current_steps": 255, "total_steps": 3400, "loss": 0.7408, "lr": 9.98292246503335e-05, "epoch": 0.06567087303631212, "percentage": 7.5, "elapsed_time": "1:20:28", "remaining_time": "16:32:30", "throughput": 554.67, "total_tokens": 2678216}
{"current_steps": 260, "total_steps": 3400, "loss": 0.7044, "lr": 9.980855572238714e-05, "epoch": 0.06695853721349472, "percentage": 7.65, "elapsed_time": "1:21:57", "remaining_time": "16:29:49", "throughput": 555.29, "total_tokens": 2730664}
{"current_steps": 265, "total_steps": 3400, "loss": 0.7334, "lr": 9.978670881475172e-05, "epoch": 0.06824620139067732, "percentage": 7.79, "elapsed_time": "1:23:25", "remaining_time": "16:26:51", "throughput": 556.14, "total_tokens": 2783584}
{"current_steps": 270, "total_steps": 3400, "loss": 0.7075, "lr": 9.976368444410985e-05, "epoch": 0.0695338655678599, "percentage": 7.94, "elapsed_time": "1:24:53", "remaining_time": "16:24:09", "throughput": 556.79, "total_tokens": 2836152}
{"current_steps": 275, "total_steps": 3400, "loss": 0.7039, "lr": 9.973948315499126e-05, "epoch": 0.0708215297450425, "percentage": 8.09, "elapsed_time": "1:26:21", "remaining_time": "16:21:18", "throughput": 557.35, "total_tokens": 2887808}
{"current_steps": 280, "total_steps": 3400, "loss": 0.6953, "lr": 9.971410551976002e-05, "epoch": 0.07210919392222509, "percentage": 8.24, "elapsed_time": "1:27:50", "remaining_time": "16:18:43", "throughput": 557.81, "total_tokens": 2939656}
{"current_steps": 285, "total_steps": 3400, "loss": 0.7022, "lr": 9.968755213860094e-05, "epoch": 0.07339685809940767, "percentage": 8.38, "elapsed_time": "1:29:17", "remaining_time": "16:15:56", "throughput": 558.4, "total_tokens": 2991632}
{"current_steps": 290, "total_steps": 3400, "loss": 0.6796, "lr": 9.96598236395054e-05, "epoch": 0.07468452227659027, "percentage": 8.53, "elapsed_time": "1:30:45", "remaining_time": "16:13:18", "throughput": 558.92, "total_tokens": 3043616}
{"current_steps": 295, "total_steps": 3400, "loss": 0.7346, "lr": 9.96309206782565e-05, "epoch": 0.07597218645377285, "percentage": 8.68, "elapsed_time": "1:32:13", "remaining_time": "16:10:42", "throughput": 559.66, "total_tokens": 3096920}
{"current_steps": 300, "total_steps": 3400, "loss": 0.6815, "lr": 9.960084393841355e-05, "epoch": 0.07725985063095545, "percentage": 8.82, "elapsed_time": "1:33:40", "remaining_time": "16:07:58", "throughput": 560.28, "total_tokens": 3149032}
{"current_steps": 300, "total_steps": 3400, "eval_loss": 0.7073924541473389, "epoch": 0.07725985063095545, "percentage": 8.82, "elapsed_time": "1:34:18", "remaining_time": "16:14:32", "throughput": 556.5, "total_tokens": 3149032}
{"current_steps": 305, "total_steps": 3400, "loss": 0.7208, "lr": 9.956959413129585e-05, "epoch": 0.07854751480813804, "percentage": 8.97, "elapsed_time": "1:35:53", "remaining_time": "16:13:04", "throughput": 556.45, "total_tokens": 3201560}
{"current_steps": 310, "total_steps": 3400, "loss": 0.7144, "lr": 9.953717199596598e-05, "epoch": 0.07983517898532062, "percentage": 9.12, "elapsed_time": "1:37:21", "remaining_time": "16:10:27", "throughput": 557.14, "total_tokens": 3254632}
{"current_steps": 315, "total_steps": 3400, "loss": 0.6861, "lr": 9.95035782992122e-05, "epoch": 0.08112284316250322, "percentage": 9.26, "elapsed_time": "1:38:52", "remaining_time": "16:08:22", "throughput": 557.33, "total_tokens": 3306432}
{"current_steps": 320, "total_steps": 3400, "loss": 0.6836, "lr": 9.94688138355304e-05, "epoch": 0.08241050733968582, "percentage": 9.41, "elapsed_time": "1:40:21", "remaining_time": "16:05:55", "throughput": 557.75, "total_tokens": 3358392}
{"current_steps": 325, "total_steps": 3400, "loss": 0.7353, "lr": 9.943287942710527e-05, "epoch": 0.0836981715168684, "percentage": 9.56, "elapsed_time": "1:41:50", "remaining_time": "16:03:39", "throughput": 558.24, "total_tokens": 3411424}
{"current_steps": 330, "total_steps": 3400, "loss": 0.6774, "lr": 9.939577592379088e-05, "epoch": 0.08498583569405099, "percentage": 9.71, "elapsed_time": "1:43:18", "remaining_time": "16:01:07", "throughput": 558.66, "total_tokens": 3462992}
{"current_steps": 335, "total_steps": 3400, "loss": 0.7331, "lr": 9.935750420309055e-05, "epoch": 0.08627349987123359, "percentage": 9.85, "elapsed_time": "1:44:49", "remaining_time": "15:59:00", "throughput": 559.08, "total_tokens": 3516136}
{"current_steps": 340, "total_steps": 3400, "loss": 0.6939, "lr": 9.931806517013612e-05, "epoch": 0.08756116404841617, "percentage": 10.0, "elapsed_time": "1:46:17", "remaining_time": "15:56:39", "throughput": 559.51, "total_tokens": 3568360}
{"current_steps": 345, "total_steps": 3400, "loss": 0.7158, "lr": 9.927745975766654e-05, "epoch": 0.08884882822559877, "percentage": 10.15, "elapsed_time": "1:47:46", "remaining_time": "15:54:25", "throughput": 559.88, "total_tokens": 3620696}
{"current_steps": 350, "total_steps": 3400, "loss": 0.6932, "lr": 9.923568892600578e-05, "epoch": 0.09013649240278135, "percentage": 10.29, "elapsed_time": "1:49:14", "remaining_time": "15:51:57", "throughput": 560.41, "total_tokens": 3673152}
{"current_steps": 350, "total_steps": 3400, "eval_loss": 0.7044599056243896, "epoch": 0.09013649240278135, "percentage": 10.29, "elapsed_time": "1:49:52", "remaining_time": "15:57:30", "throughput": 557.15, "total_tokens": 3673152}
{"current_steps": 355, "total_steps": 3400, "loss": 0.6778, "lr": 9.91927536630402e-05, "epoch": 0.09142415657996394, "percentage": 10.44, "elapsed_time": "1:51:26", "remaining_time": "15:55:53", "throughput": 557.13, "total_tokens": 3725296}
{"current_steps": 360, "total_steps": 3400, "loss": 0.6857, "lr": 9.91486549841951e-05, "epoch": 0.09271182075714654, "percentage": 10.59, "elapsed_time": "1:52:52", "remaining_time": "15:53:13", "throughput": 557.75, "total_tokens": 3777552}
{"current_steps": 365, "total_steps": 3400, "loss": 0.7184, "lr": 9.91033939324107e-05, "epoch": 0.09399948493432912, "percentage": 10.74, "elapsed_time": "1:54:21", "remaining_time": "15:50:50", "throughput": 558.25, "total_tokens": 3830200}
{"current_steps": 370, "total_steps": 3400, "loss": 0.7196, "lr": 9.905697157811761e-05, "epoch": 0.09528714911151172, "percentage": 10.88, "elapsed_time": "1:55:46", "remaining_time": "15:48:09", "throughput": 558.99, "total_tokens": 3883200}
{"current_steps": 375, "total_steps": 3400, "loss": 0.6914, "lr": 9.900938901921131e-05, "epoch": 0.09657481328869431, "percentage": 11.03, "elapsed_time": "1:57:14", "remaining_time": "15:45:41", "throughput": 559.5, "total_tokens": 3935576}
{"current_steps": 380, "total_steps": 3400, "loss": 0.6681, "lr": 9.896064738102635e-05, "epoch": 0.0978624774658769, "percentage": 11.18, "elapsed_time": "1:58:40", "remaining_time": "15:43:07", "throughput": 560.04, "total_tokens": 3987624}
{"current_steps": 385, "total_steps": 3400, "loss": 0.6723, "lr": 9.891074781630966e-05, "epoch": 0.09915014164305949, "percentage": 11.32, "elapsed_time": "2:00:06", "remaining_time": "15:40:32", "throughput": 560.58, "total_tokens": 4039680}
{"current_steps": 390, "total_steps": 3400, "loss": 0.6498, "lr": 9.885969150519331e-05, "epoch": 0.10043780582024209, "percentage": 11.47, "elapsed_time": "2:01:33", "remaining_time": "15:38:13", "throughput": 560.91, "total_tokens": 4091216}
{"current_steps": 395, "total_steps": 3400, "loss": 0.7311, "lr": 9.88074796551666e-05, "epoch": 0.10172546999742467, "percentage": 11.62, "elapsed_time": "2:03:01", "remaining_time": "15:35:58", "throughput": 561.41, "total_tokens": 4144264}
{"current_steps": 400, "total_steps": 3400, "loss": 0.7089, "lr": 9.875411350104744e-05, "epoch": 0.10301313417460727, "percentage": 11.76, "elapsed_time": "2:04:29", "remaining_time": "15:33:37", "throughput": 561.93, "total_tokens": 4197072}
{"current_steps": 400, "total_steps": 3400, "eval_loss": 0.6847750544548035, "epoch": 0.10301313417460727, "percentage": 11.76, "elapsed_time": "2:05:06", "remaining_time": "15:38:22", "throughput": 559.09, "total_tokens": 4197072}
{"current_steps": 405, "total_steps": 3400, "loss": 0.7021, "lr": 9.86995943049533e-05, "epoch": 0.10430079835178985, "percentage": 11.91, "elapsed_time": "2:06:38", "remaining_time": "15:36:29", "throughput": 559.3, "total_tokens": 4249656}
{"current_steps": 410, "total_steps": 3400, "loss": 0.6943, "lr": 9.864392335627117e-05, "epoch": 0.10558846252897244, "percentage": 12.06, "elapsed_time": "2:08:05", "remaining_time": "15:34:08", "throughput": 559.87, "total_tokens": 4302944}
{"current_steps": 415, "total_steps": 3400, "loss": 0.7146, "lr": 9.858710197162721e-05, "epoch": 0.10687612670615504, "percentage": 12.21, "elapsed_time": "2:09:31", "remaining_time": "15:31:37", "throughput": 560.45, "total_tokens": 4355480}
{"current_steps": 420, "total_steps": 3400, "loss": 0.6312, "lr": 9.852913149485556e-05, "epoch": 0.10816379088333762, "percentage": 12.35, "elapsed_time": "2:10:58", "remaining_time": "15:29:18", "throughput": 560.88, "total_tokens": 4407688}
{"current_steps": 425, "total_steps": 3400, "loss": 0.6877, "lr": 9.847001329696653e-05, "epoch": 0.10945145506052022, "percentage": 12.5, "elapsed_time": "2:12:24", "remaining_time": "15:26:51", "throughput": 561.37, "total_tokens": 4459736}
{"current_steps": 430, "total_steps": 3400, "loss": 0.6975, "lr": 9.840974877611422e-05, "epoch": 0.11073911923770281, "percentage": 12.65, "elapsed_time": "2:13:51", "remaining_time": "15:24:35", "throughput": 561.88, "total_tokens": 4512928}
{"current_steps": 435, "total_steps": 3400, "loss": 0.651, "lr": 9.834833935756344e-05, "epoch": 0.1120267834148854, "percentage": 12.79, "elapsed_time": "2:15:18", "remaining_time": "15:22:19", "throughput": 562.37, "total_tokens": 4565840}
{"current_steps": 440, "total_steps": 3400, "loss": 0.685, "lr": 9.828578649365601e-05, "epoch": 0.11331444759206799, "percentage": 12.94, "elapsed_time": "2:16:48", "remaining_time": "15:20:20", "throughput": 562.61, "total_tokens": 4618168}
{"current_steps": 445, "total_steps": 3400, "loss": 0.6258, "lr": 9.822209166377635e-05, "epoch": 0.11460211176925057, "percentage": 13.09, "elapsed_time": "2:18:17", "remaining_time": "15:18:17", "throughput": 562.81, "total_tokens": 4669784}
{"current_steps": 450, "total_steps": 3400, "loss": 0.6732, "lr": 9.815725637431662e-05, "epoch": 0.11588977594643317, "percentage": 13.24, "elapsed_time": "2:19:48", "remaining_time": "15:16:28", "throughput": 563.01, "total_tokens": 4722528}
{"current_steps": 450, "total_steps": 3400, "eval_loss": 0.6526497006416321, "epoch": 0.11588977594643317, "percentage": 13.24, "elapsed_time": "2:20:27", "remaining_time": "15:20:44", "throughput": 560.4, "total_tokens": 4722528}
{"current_steps": 455, "total_steps": 3400, "loss": 0.6544, "lr": 9.809128215864097e-05, "epoch": 0.11717744012361576, "percentage": 13.38, "elapsed_time": "2:22:00", "remaining_time": "15:19:06", "throughput": 560.37, "total_tokens": 4774400}
{"current_steps": 460, "total_steps": 3400, "loss": 0.652, "lr": 9.802417057704931e-05, "epoch": 0.11846510430079835, "percentage": 13.53, "elapsed_time": "2:23:28", "remaining_time": "15:17:02", "throughput": 560.67, "total_tokens": 4826704}
{"current_steps": 465, "total_steps": 3400, "loss": 0.6582, "lr": 9.795592321674045e-05, "epoch": 0.11975276847798094, "percentage": 13.68, "elapsed_time": "2:24:56", "remaining_time": "15:14:49", "throughput": 561.17, "total_tokens": 4880072}
{"current_steps": 470, "total_steps": 3400, "loss": 0.6506, "lr": 9.788654169177453e-05, "epoch": 0.12104043265516354, "percentage": 13.82, "elapsed_time": "2:26:24", "remaining_time": "15:12:43", "throughput": 561.43, "total_tokens": 4931968}
{"current_steps": 475, "total_steps": 3400, "loss": 0.6551, "lr": 9.781602764303487e-05, "epoch": 0.12232809683234612, "percentage": 13.97, "elapsed_time": "2:27:52", "remaining_time": "15:10:36", "throughput": 561.69, "total_tokens": 4983656}
{"current_steps": 480, "total_steps": 3400, "loss": 0.6978, "lr": 9.774438273818911e-05, "epoch": 0.12361576100952872, "percentage": 14.12, "elapsed_time": "2:29:20", "remaining_time": "15:08:32", "throughput": 562.05, "total_tokens": 5036528}
{"current_steps": 485, "total_steps": 3400, "loss": 0.6407, "lr": 9.767160867164979e-05, "epoch": 0.12490342518671131, "percentage": 14.26, "elapsed_time": "2:30:49", "remaining_time": "15:06:32", "throughput": 562.3, "total_tokens": 5088768}
{"current_steps": 490, "total_steps": 3400, "loss": 0.6641, "lr": 9.759770716453436e-05, "epoch": 0.1261910893638939, "percentage": 14.41, "elapsed_time": "2:32:17", "remaining_time": "15:04:26", "throughput": 562.73, "total_tokens": 5142080}
{"current_steps": 495, "total_steps": 3400, "loss": 0.6588, "lr": 9.752267996462434e-05, "epoch": 0.1274787535410765, "percentage": 14.56, "elapsed_time": "2:33:47", "remaining_time": "15:02:30", "throughput": 562.95, "total_tokens": 5194432}
{"current_steps": 500, "total_steps": 3400, "loss": 0.6304, "lr": 9.744652884632406e-05, "epoch": 0.12876641771825909, "percentage": 14.71, "elapsed_time": "2:35:15", "remaining_time": "15:00:27", "throughput": 563.24, "total_tokens": 5246640}
{"current_steps": 500, "total_steps": 3400, "eval_loss": 0.6272165775299072, "epoch": 0.12876641771825909, "percentage": 14.71, "elapsed_time": "2:35:54", "remaining_time": "15:04:16", "throughput": 560.87, "total_tokens": 5246640}
{"current_steps": 505, "total_steps": 3400, "loss": 0.5741, "lr": 9.736925561061871e-05, "epoch": 0.13005408189544168, "percentage": 14.85, "elapsed_time": "2:37:30", "remaining_time": "15:02:55", "throughput": 560.73, "total_tokens": 5299024}
{"current_steps": 510, "total_steps": 3400, "loss": 0.6535, "lr": 9.729086208503174e-05, "epoch": 0.13134174607262425, "percentage": 15.0, "elapsed_time": "2:38:58", "remaining_time": "15:00:50", "throughput": 561.18, "total_tokens": 5352664}
{"current_steps": 515, "total_steps": 3400, "loss": 0.6081, "lr": 9.721135012358156e-05, "epoch": 0.13262941024980685, "percentage": 15.15, "elapsed_time": "2:40:27", "remaining_time": "14:58:55", "throughput": 561.49, "total_tokens": 5406008}
{"current_steps": 520, "total_steps": 3400, "loss": 0.6792, "lr": 9.713072160673777e-05, "epoch": 0.13391707442698944, "percentage": 15.29, "elapsed_time": "2:41:55", "remaining_time": "14:56:51", "throughput": 561.9, "total_tokens": 5459368}
{"current_steps": 525, "total_steps": 3400, "loss": 0.6821, "lr": 9.704897844137673e-05, "epoch": 0.13520473860417204, "percentage": 15.44, "elapsed_time": "2:43:25", "remaining_time": "14:54:56", "throughput": 562.24, "total_tokens": 5512960}
{"current_steps": 530, "total_steps": 3400, "loss": 0.5835, "lr": 9.696612256073633e-05, "epoch": 0.13649240278135463, "percentage": 15.59, "elapsed_time": "2:44:52", "remaining_time": "14:52:51", "throughput": 562.56, "total_tokens": 5565368}
{"current_steps": 535, "total_steps": 3400, "loss": 0.6129, "lr": 9.688215592437039e-05, "epoch": 0.1377800669585372, "percentage": 15.74, "elapsed_time": "2:46:21", "remaining_time": "14:50:50", "throughput": 562.86, "total_tokens": 5618008}
{"current_steps": 540, "total_steps": 3400, "loss": 0.5765, "lr": 9.679708051810221e-05, "epoch": 0.1390677311357198, "percentage": 15.88, "elapsed_time": "2:47:47", "remaining_time": "14:48:38", "throughput": 563.22, "total_tokens": 5670072}
{"current_steps": 545, "total_steps": 3400, "loss": 0.6325, "lr": 9.67108983539777e-05, "epoch": 0.1403553953129024, "percentage": 16.03, "elapsed_time": "2:49:14", "remaining_time": "14:46:35", "throughput": 563.58, "total_tokens": 5722936}
{"current_steps": 550, "total_steps": 3400, "loss": 0.5596, "lr": 9.662361147021779e-05, "epoch": 0.141643059490085, "percentage": 16.18, "elapsed_time": "2:50:41", "remaining_time": "14:44:27", "throughput": 563.9, "total_tokens": 5774880}
{"current_steps": 550, "total_steps": 3400, "eval_loss": 0.5832681059837341, "epoch": 0.141643059490085, "percentage": 16.18, "elapsed_time": "2:51:19", "remaining_time": "14:47:45", "throughput": 561.8, "total_tokens": 5774880}
{"current_steps": 555, "total_steps": 3400, "loss": 0.5073, "lr": 9.653522193117013e-05, "epoch": 0.14293072366726758, "percentage": 16.32, "elapsed_time": "2:52:51", "remaining_time": "14:46:05", "throughput": 561.79, "total_tokens": 5826608}
{"current_steps": 560, "total_steps": 3400, "loss": 0.5652, "lr": 9.644573182726035e-05, "epoch": 0.14421838784445018, "percentage": 16.47, "elapsed_time": "2:54:18", "remaining_time": "14:43:58", "throughput": 562.21, "total_tokens": 5879776}
{"current_steps": 565, "total_steps": 3400, "loss": 0.5727, "lr": 9.63551432749426e-05, "epoch": 0.14550605202163275, "percentage": 16.62, "elapsed_time": "2:55:44", "remaining_time": "14:41:50", "throughput": 562.64, "total_tokens": 5932888}
{"current_steps": 570, "total_steps": 3400, "loss": 0.6251, "lr": 9.626345841664953e-05, "epoch": 0.14679371619881534, "percentage": 16.76, "elapsed_time": "2:57:12", "remaining_time": "14:39:49", "throughput": 562.87, "total_tokens": 5984648}
{"current_steps": 575, "total_steps": 3400, "loss": 0.6508, "lr": 9.617067942074153e-05, "epoch": 0.14808138037599794, "percentage": 16.91, "elapsed_time": "2:58:38", "remaining_time": "14:37:39", "throughput": 563.24, "total_tokens": 6037000}
{"current_steps": 580, "total_steps": 3400, "loss": 0.6686, "lr": 9.607680848145558e-05, "epoch": 0.14936904455318054, "percentage": 17.06, "elapsed_time": "3:00:06", "remaining_time": "14:35:39", "throughput": 563.62, "total_tokens": 6090512}
{"current_steps": 585, "total_steps": 3400, "loss": 0.5793, "lr": 9.598184781885318e-05, "epoch": 0.15065670873036313, "percentage": 17.21, "elapsed_time": "3:01:32", "remaining_time": "14:33:32", "throughput": 564.01, "total_tokens": 6143320}
{"current_steps": 590, "total_steps": 3400, "loss": 0.5954, "lr": 9.588579967876806e-05, "epoch": 0.1519443729075457, "percentage": 17.35, "elapsed_time": "3:02:59", "remaining_time": "14:31:33", "throughput": 564.28, "total_tokens": 6195720}
{"current_steps": 595, "total_steps": 3400, "loss": 0.5644, "lr": 9.578866633275288e-05, "epoch": 0.1532320370847283, "percentage": 17.5, "elapsed_time": "3:04:25", "remaining_time": "14:29:26", "throughput": 564.6, "total_tokens": 6247592}
{"current_steps": 600, "total_steps": 3400, "loss": 0.5794, "lr": 9.569045007802559e-05, "epoch": 0.1545197012619109, "percentage": 17.65, "elapsed_time": "3:05:53", "remaining_time": "14:27:29", "throughput": 564.81, "total_tokens": 6299656}
{"current_steps": 600, "total_steps": 3400, "eval_loss": 0.6039358973503113, "epoch": 0.1545197012619109, "percentage": 17.65, "elapsed_time": "3:06:31", "remaining_time": "14:30:28", "throughput": 562.88, "total_tokens": 6299656}
{"current_steps": 605, "total_steps": 3400, "loss": 0.6032, "lr": 9.55911532374151e-05, "epoch": 0.1558073654390935, "percentage": 17.79, "elapsed_time": "3:08:04", "remaining_time": "14:28:50", "throughput": 562.89, "total_tokens": 6351664}
{"current_steps": 610, "total_steps": 3400, "loss": 0.5942, "lr": 9.549077815930636e-05, "epoch": 0.15709502961627608, "percentage": 17.94, "elapsed_time": "3:09:32", "remaining_time": "14:26:53", "throughput": 563.1, "total_tokens": 6403688}
{"current_steps": 615, "total_steps": 3400, "loss": 0.5643, "lr": 9.538932721758474e-05, "epoch": 0.15838269379345868, "percentage": 18.09, "elapsed_time": "3:10:58", "remaining_time": "14:24:50", "throughput": 563.44, "total_tokens": 6456344}
{"current_steps": 620, "total_steps": 3400, "loss": 0.5914, "lr": 9.528680281157999e-05, "epoch": 0.15967035797064125, "percentage": 18.24, "elapsed_time": "3:12:26", "remaining_time": "14:22:51", "throughput": 563.74, "total_tokens": 6509000}
{"current_steps": 605, "total_steps": 3400, "loss": 0.6106, "lr": 9.55911532374151e-05, "epoch": 0.1558073654390935, "percentage": 17.79, "elapsed_time": "0:02:45", "remaining_time": "0:12:42", "throughput": 38471.59, "total_tokens": 6351680}
{"current_steps": 610, "total_steps": 3400, "loss": 0.5812, "lr": 9.549077815930636e-05, "epoch": 0.15709502961627608, "percentage": 17.94, "elapsed_time": "0:04:14", "remaining_time": "0:19:21", "throughput": 25207.5, "total_tokens": 6403648}
{"current_steps": 615, "total_steps": 3400, "loss": 0.5992, "lr": 9.538932721758474e-05, "epoch": 0.15838269379345868, "percentage": 18.09, "elapsed_time": "0:05:41", "remaining_time": "0:25:47", "throughput": 18896.22, "total_tokens": 6456328}
{"current_steps": 620, "total_steps": 3400, "loss": 0.587, "lr": 9.528680281157999e-05, "epoch": 0.15967035797064125, "percentage": 18.24, "elapsed_time": "0:07:10", "remaining_time": "0:32:11", "throughput": 15113.22, "total_tokens": 6509024}
{"current_steps": 625, "total_steps": 3400, "loss": 0.5836, "lr": 9.518320736600943e-05, "epoch": 0.16095802214782384, "percentage": 18.38, "elapsed_time": "0:08:38", "remaining_time": "0:38:21", "throughput": 12658.21, "total_tokens": 6561336}
{"current_steps": 630, "total_steps": 3400, "loss": 0.5913, "lr": 9.507854333092063e-05, "epoch": 0.16224568632500644, "percentage": 18.53, "elapsed_time": "0:10:07", "remaining_time": "0:44:29", "throughput": 10895.64, "total_tokens": 6614024}
{"current_steps": 635, "total_steps": 3400, "loss": 0.5693, "lr": 9.497281318163346e-05, "epoch": 0.16353335050218903, "percentage": 18.68, "elapsed_time": "0:11:34", "remaining_time": "0:50:24", "throughput": 9595.98, "total_tokens": 6666416}
{"current_steps": 640, "total_steps": 3400, "loss": 0.572, "lr": 9.486601941868154e-05, "epoch": 0.16482101467937163, "percentage": 18.82, "elapsed_time": "0:13:04", "remaining_time": "0:56:24", "throughput": 8560.2, "total_tokens": 6718200}
{"current_steps": 645, "total_steps": 3400, "loss": 0.6111, "lr": 9.475816456775313e-05, "epoch": 0.1661086788565542, "percentage": 18.97, "elapsed_time": "0:14:35", "remaining_time": "1:02:19", "throughput": 7734.51, "total_tokens": 6771256}
{"current_steps": 650, "total_steps": 3400, "loss": 0.5959, "lr": 9.464925117963133e-05, "epoch": 0.1673963430337368, "percentage": 19.12, "elapsed_time": "0:16:07", "remaining_time": "1:08:11", "throughput": 7056.83, "total_tokens": 6824008}
{"current_steps": 650, "total_steps": 3400, "eval_loss": 0.5542036890983582, "epoch": 0.1673963430337368, "percentage": 19.12, "elapsed_time": "0:17:15", "remaining_time": "1:13:02", "throughput": 6587.43, "total_tokens": 6824008}
{"current_steps": 655, "total_steps": 3400, "loss": 0.5344, "lr": 9.453928183013385e-05, "epoch": 0.1686840072109194, "percentage": 19.26, "elapsed_time": "0:18:55", "remaining_time": "1:19:19", "throughput": 6053.33, "total_tokens": 6875432}
{"current_steps": 660, "total_steps": 3400, "loss": 0.56, "lr": 9.442825912005202e-05, "epoch": 0.16997167138810199, "percentage": 19.41, "elapsed_time": "0:20:27", "remaining_time": "1:24:57", "throughput": 5642.47, "total_tokens": 6927768}
{"current_steps": 665, "total_steps": 3400, "loss": 0.5701, "lr": 9.431618567508933e-05, "epoch": 0.17125933556528458, "percentage": 19.56, "elapsed_time": "0:21:58", "remaining_time": "1:30:24", "throughput": 5292.74, "total_tokens": 6980544}
{"current_steps": 670, "total_steps": 3400, "loss": 0.5604, "lr": 9.420306414579925e-05, "epoch": 0.17254699974246718, "percentage": 19.71, "elapsed_time": "0:23:32", "remaining_time": "1:35:53", "throughput": 4980.28, "total_tokens": 7032584}
{"current_steps": 675, "total_steps": 3400, "loss": 0.5763, "lr": 9.408889720752266e-05, "epoch": 0.17383466391964975, "percentage": 19.85, "elapsed_time": "0:25:03", "remaining_time": "1:41:09", "throughput": 4712.79, "total_tokens": 7085048}
{"current_steps": 680, "total_steps": 3400, "loss": 0.5962, "lr": 9.397368756032445e-05, "epoch": 0.17512232809683234, "percentage": 20.0, "elapsed_time": "0:26:35", "remaining_time": "1:46:21", "throughput": 4474.29, "total_tokens": 7137952}
{"current_steps": 685, "total_steps": 3400, "loss": 0.5935, "lr": 9.385743792892982e-05, "epoch": 0.17640999227401494, "percentage": 20.15, "elapsed_time": "0:28:03", "remaining_time": "1:51:13", "throughput": 4270.71, "total_tokens": 7190584}
{"current_steps": 690, "total_steps": 3400, "loss": 0.5267, "lr": 9.374015106265968e-05, "epoch": 0.17769765645119753, "percentage": 20.29, "elapsed_time": "0:29:33", "remaining_time": "1:56:04", "throughput": 4084.88, "total_tokens": 7243440}
{"current_steps": 695, "total_steps": 3400, "loss": 0.5351, "lr": 9.362182973536569e-05, "epoch": 0.17898532062838013, "percentage": 20.44, "elapsed_time": "0:31:00", "remaining_time": "2:00:42", "throughput": 3920.82, "total_tokens": 7295568}
{"current_steps": 700, "total_steps": 3400, "loss": 0.5014, "lr": 9.35024767453647e-05, "epoch": 0.1802729848055627, "percentage": 20.59, "elapsed_time": "0:32:28", "remaining_time": "2:05:16", "throughput": 3770.1, "total_tokens": 7347040}
{"current_steps": 700, "total_steps": 3400, "eval_loss": 0.5440100431442261, "epoch": 0.1802729848055627, "percentage": 20.59, "elapsed_time": "0:33:07", "remaining_time": "2:07:47", "throughput": 3695.91, "total_tokens": 7347040}
{"current_steps": 705, "total_steps": 3400, "loss": 0.543, "lr": 9.338209491537257e-05, "epoch": 0.1815606489827453, "percentage": 20.74, "elapsed_time": "0:34:43", "remaining_time": "2:12:44", "throughput": 3551.6, "total_tokens": 7399584}
{"current_steps": 710, "total_steps": 3400, "loss": 0.4995, "lr": 9.326068709243727e-05, "epoch": 0.1828483131599279, "percentage": 20.88, "elapsed_time": "0:36:11", "remaining_time": "2:17:07", "throughput": 3432.24, "total_tokens": 7452928}
{"current_steps": 715, "total_steps": 3400, "loss": 0.5109, "lr": 9.313825614787177e-05, "epoch": 0.18413597733711048, "percentage": 21.03, "elapsed_time": "0:37:39", "remaining_time": "2:21:26", "throughput": 3321.05, "total_tokens": 7505112}
{"current_steps": 720, "total_steps": 3400, "loss": 0.4932, "lr": 9.301480497718593e-05, "epoch": 0.18542364151429308, "percentage": 21.18, "elapsed_time": "0:39:07", "remaining_time": "2:25:37", "throughput": 3219.44, "total_tokens": 7557608}
{"current_steps": 725, "total_steps": 3400, "loss": 0.5573, "lr": 9.289033650001817e-05, "epoch": 0.18671130569147568, "percentage": 21.32, "elapsed_time": "0:40:36", "remaining_time": "2:29:51", "throughput": 3122.82, "total_tokens": 7610048}
{"current_steps": 730, "total_steps": 3400, "loss": 0.5305, "lr": 9.276485366006634e-05, "epoch": 0.18799896986865824, "percentage": 21.47, "elapsed_time": "0:42:04", "remaining_time": "2:33:54", "throughput": 3034.8, "total_tokens": 7662056}
{"current_steps": 735, "total_steps": 3400, "loss": 0.5369, "lr": 9.263835942501807e-05, "epoch": 0.18928663404584084, "percentage": 21.62, "elapsed_time": "0:43:34", "remaining_time": "2:37:58", "throughput": 2950.71, "total_tokens": 7713656}
{"current_steps": 740, "total_steps": 3400, "loss": 0.5397, "lr": 9.251085678648072e-05, "epoch": 0.19057429822302344, "percentage": 21.76, "elapsed_time": "0:45:02", "remaining_time": "2:41:54", "throughput": 2873.57, "total_tokens": 7765992}
{"current_steps": 745, "total_steps": 3400, "loss": 0.5116, "lr": 9.238234875991046e-05, "epoch": 0.19186196240020603, "percentage": 21.91, "elapsed_time": "0:46:31", "remaining_time": "2:45:49", "throughput": 2800.48, "total_tokens": 7818448}
{"current_steps": 750, "total_steps": 3400, "loss": 0.541, "lr": 9.225283838454111e-05, "epoch": 0.19314962657738863, "percentage": 22.06, "elapsed_time": "0:47:59", "remaining_time": "2:49:34", "throughput": 2733.14, "total_tokens": 7870520}
{"current_steps": 750, "total_steps": 3400, "eval_loss": 0.5273815989494324, "epoch": 0.19314962657738863, "percentage": 22.06, "elapsed_time": "0:48:38", "remaining_time": "2:51:53", "throughput": 2696.45, "total_tokens": 7870520}
{"current_steps": 755, "total_steps": 3400, "loss": 0.4961, "lr": 9.21223287233121e-05, "epoch": 0.1944372907545712, "percentage": 22.21, "elapsed_time": "0:50:14", "remaining_time": "2:55:59", "throughput": 2628.5, "total_tokens": 7922736}
{"current_steps": 760, "total_steps": 3400, "loss": 0.4956, "lr": 9.199082286279622e-05, "epoch": 0.1957249549317538, "percentage": 22.35, "elapsed_time": "0:51:41", "remaining_time": "2:59:32", "throughput": 2571.82, "total_tokens": 7975304}
{"current_steps": 765, "total_steps": 3400, "loss": 0.4997, "lr": 9.185832391312644e-05, "epoch": 0.1970126191089364, "percentage": 22.5, "elapsed_time": "0:53:10", "remaining_time": "3:03:09", "throughput": 2515.96, "total_tokens": 8027448}
{"current_steps": 770, "total_steps": 3400, "loss": 0.5214, "lr": 9.172483500792244e-05, "epoch": 0.19830028328611898, "percentage": 22.65, "elapsed_time": "0:54:38", "remaining_time": "3:06:38", "throughput": 2464.71, "total_tokens": 8080944}
{"current_steps": 775, "total_steps": 3400, "loss": 0.6098, "lr": 9.159035930421658e-05, "epoch": 0.19958794746330158, "percentage": 22.79, "elapsed_time": "0:56:07", "remaining_time": "3:10:07", "throughput": 2414.98, "total_tokens": 8133392}
{"current_steps": 780, "total_steps": 3400, "loss": 0.5046, "lr": 9.145489998237902e-05, "epoch": 0.20087561164048418, "percentage": 22.94, "elapsed_time": "0:57:34", "remaining_time": "3:13:25", "throughput": 2369.16, "total_tokens": 8185360}
{"current_steps": 785, "total_steps": 3400, "loss": 0.5803, "lr": 9.131846024604274e-05, "epoch": 0.20216327581766674, "percentage": 23.09, "elapsed_time": "0:59:06", "remaining_time": "3:16:53", "throughput": 2322.96, "total_tokens": 8237672}
{"current_steps": 790, "total_steps": 3400, "loss": 0.5365, "lr": 9.11810433220276e-05, "epoch": 0.20345093999484934, "percentage": 23.24, "elapsed_time": "1:00:36", "remaining_time": "3:20:14", "throughput": 2279.6, "total_tokens": 8289688}
{"current_steps": 795, "total_steps": 3400, "loss": 0.5259, "lr": 9.104265246026415e-05, "epoch": 0.20473860417203193, "percentage": 23.38, "elapsed_time": "1:02:06", "remaining_time": "3:23:30", "throughput": 2238.56, "total_tokens": 8341624}
{"current_steps": 800, "total_steps": 3400, "loss": 0.5291, "lr": 9.090329093371666e-05, "epoch": 0.20602626834921453, "percentage": 23.53, "elapsed_time": "1:03:35", "remaining_time": "3:26:41", "throughput": 2199.61, "total_tokens": 8393696}
{"current_steps": 800, "total_steps": 3400, "eval_loss": 0.5219093561172485, "epoch": 0.20602626834921453, "percentage": 23.53, "elapsed_time": "1:04:15", "remaining_time": "3:28:51", "throughput": 2176.94, "total_tokens": 8393696}
{"current_steps": 805, "total_steps": 3400, "loss": 0.5449, "lr": 9.076296203830579e-05, "epoch": 0.20731393252639713, "percentage": 23.68, "elapsed_time": "1:05:50", "remaining_time": "3:32:16", "throughput": 2137.83, "total_tokens": 8446496}
{"current_steps": 810, "total_steps": 3400, "loss": 0.5625, "lr": 9.062166909283062e-05, "epoch": 0.2086015967035797, "percentage": 23.82, "elapsed_time": "1:07:19", "remaining_time": "3:35:17", "throughput": 2103.98, "total_tokens": 8499544}
{"current_steps": 815, "total_steps": 3400, "loss": 0.5564, "lr": 9.047941543889014e-05, "epoch": 0.2098892608807623, "percentage": 23.97, "elapsed_time": "1:08:48", "remaining_time": "3:38:13", "throughput": 2071.83, "total_tokens": 8552568}
{"current_steps": 820, "total_steps": 3400, "loss": 0.5487, "lr": 9.033620444080428e-05, "epoch": 0.2111769250579449, "percentage": 24.12, "elapsed_time": "1:10:19", "remaining_time": "3:41:15", "throughput": 2039.59, "total_tokens": 8605560}
{"current_steps": 825, "total_steps": 3400, "loss": 0.5719, "lr": 9.019203948553422e-05, "epoch": 0.21246458923512748, "percentage": 24.26, "elapsed_time": "1:11:48", "remaining_time": "3:44:07", "throughput": 2009.42, "total_tokens": 8657704}
{"current_steps": 830, "total_steps": 3400, "loss": 0.5235, "lr": 9.004692398260244e-05, "epoch": 0.21375225341231008, "percentage": 24.41, "elapsed_time": "1:13:19", "remaining_time": "3:47:03", "throughput": 1979.82, "total_tokens": 8711088}
{"current_steps": 835, "total_steps": 3400, "loss": 0.5566, "lr": 8.9900861364012e-05, "epoch": 0.21503991758949267, "percentage": 24.56, "elapsed_time": "1:14:51", "remaining_time": "3:49:58", "throughput": 1950.99, "total_tokens": 8763712}
{"current_steps": 840, "total_steps": 3400, "loss": 0.482, "lr": 8.975385508416532e-05, "epoch": 0.21632758176667524, "percentage": 24.71, "elapsed_time": "1:16:22", "remaining_time": "3:52:45", "throughput": 1923.79, "total_tokens": 8815760}
{"current_steps": 845, "total_steps": 3400, "loss": 0.5046, "lr": 8.960590861978265e-05, "epoch": 0.21761524594385784, "percentage": 24.85, "elapsed_time": "1:17:51", "remaining_time": "3:55:24", "throughput": 1898.38, "total_tokens": 8867720}
{"current_steps": 850, "total_steps": 3400, "loss": 0.5063, "lr": 8.945702546981969e-05, "epoch": 0.21890291012104043, "percentage": 25.0, "elapsed_time": "1:19:21", "remaining_time": "3:58:04", "throughput": 1873.33, "total_tokens": 8919608}
{"current_steps": 850, "total_steps": 3400, "eval_loss": 0.5525640249252319, "epoch": 0.21890291012104043, "percentage": 25.0, "elapsed_time": "1:20:00", "remaining_time": "4:00:01", "throughput": 1858.09, "total_tokens": 8919608}
{"current_steps": 855, "total_steps": 3400, "loss": 0.5853, "lr": 8.930720915538487e-05, "epoch": 0.22019057429822303, "percentage": 25.15, "elapsed_time": "1:21:34", "remaining_time": "4:02:50", "throughput": 1832.75, "total_tokens": 8971048}
{"current_steps": 860, "total_steps": 3400, "loss": 0.5534, "lr": 8.915646321965614e-05, "epoch": 0.22147823847540563, "percentage": 25.29, "elapsed_time": "1:23:04", "remaining_time": "4:05:22", "throughput": 1810.11, "total_tokens": 9022936}
{"current_steps": 865, "total_steps": 3400, "loss": 0.5623, "lr": 8.900479122779712e-05, "epoch": 0.2227659026525882, "percentage": 25.44, "elapsed_time": "1:24:32", "remaining_time": "4:07:47", "throughput": 1788.95, "total_tokens": 9075336}
{"current_steps": 870, "total_steps": 3400, "loss": 0.5561, "lr": 8.885219676687277e-05, "epoch": 0.2240535668297708, "percentage": 25.59, "elapsed_time": "1:26:03", "remaining_time": "4:10:14", "throughput": 1767.89, "total_tokens": 9127688}
{"current_steps": 875, "total_steps": 3400, "loss": 0.5449, "lr": 8.869868344576459e-05, "epoch": 0.22534123100695339, "percentage": 25.74, "elapsed_time": "1:27:31", "remaining_time": "4:12:34", "throughput": 1748.15, "total_tokens": 9180624}
{"current_steps": 880, "total_steps": 3400, "loss": 0.5062, "lr": 8.854425489508532e-05, "epoch": 0.22662889518413598, "percentage": 25.88, "elapsed_time": "1:29:02", "remaining_time": "4:14:57", "throughput": 1728.37, "total_tokens": 9233176}
{"current_steps": 885, "total_steps": 3400, "loss": 0.5033, "lr": 8.838891476709288e-05, "epoch": 0.22791655936131858, "percentage": 26.03, "elapsed_time": "1:30:30", "remaining_time": "4:17:12", "throughput": 1710.09, "total_tokens": 9286688}
{"current_steps": 890, "total_steps": 3400, "loss": 0.4845, "lr": 8.823266673560426e-05, "epoch": 0.22920422353850115, "percentage": 26.18, "elapsed_time": "1:31:59", "remaining_time": "4:19:26", "throughput": 1692.06, "total_tokens": 9339600}
{"current_steps": 895, "total_steps": 3400, "loss": 0.5595, "lr": 8.807551449590846e-05, "epoch": 0.23049188771568374, "percentage": 26.32, "elapsed_time": "1:33:27", "remaining_time": "4:21:34", "throughput": 1674.82, "total_tokens": 9391536}
{"current_steps": 900, "total_steps": 3400, "loss": 0.5251, "lr": 8.791746176467907e-05, "epoch": 0.23177955189286634, "percentage": 26.47, "elapsed_time": "1:34:57", "remaining_time": "4:23:46", "throughput": 1657.48, "total_tokens": 9443616}
{"current_steps": 900, "total_steps": 3400, "eval_loss": 0.49604204297065735, "epoch": 0.23177955189286634, "percentage": 26.47, "elapsed_time": "1:35:37", "remaining_time": "4:25:36", "throughput": 1646.06, "total_tokens": 9443616}
{"current_steps": 905, "total_steps": 3400, "loss": 0.5774, "lr": 8.775851227988656e-05, "epoch": 0.23306721607004893, "percentage": 26.62, "elapsed_time": "1:37:14", "remaining_time": "4:28:05", "throughput": 1627.77, "total_tokens": 9497304}
{"current_steps": 910, "total_steps": 3400, "loss": 0.5441, "lr": 8.759866980070963e-05, "epoch": 0.23435488024723153, "percentage": 26.76, "elapsed_time": "1:38:44", "remaining_time": "4:30:10", "throughput": 1611.88, "total_tokens": 9549416}
{"current_steps": 915, "total_steps": 3400, "loss": 0.4898, "lr": 8.743793810744654e-05, "epoch": 0.23564254442441412, "percentage": 26.91, "elapsed_time": "1:40:15", "remaining_time": "4:32:16", "throughput": 1596.21, "total_tokens": 9601800}
{"current_steps": 920, "total_steps": 3400, "loss": 0.4681, "lr": 8.727632100142551e-05, "epoch": 0.2369302086015967, "percentage": 27.06, "elapsed_time": "1:41:44", "remaining_time": "4:34:16", "throughput": 1581.28, "total_tokens": 9653600}
{"current_steps": 925, "total_steps": 3400, "loss": 0.4946, "lr": 8.711382230491493e-05, "epoch": 0.2382178727787793, "percentage": 27.21, "elapsed_time": "1:43:15", "remaining_time": "4:36:18", "throughput": 1566.7, "total_tokens": 9707224}
{"current_steps": 930, "total_steps": 3400, "loss": 0.5517, "lr": 8.695044586103296e-05, "epoch": 0.23950553695596188, "percentage": 27.35, "elapsed_time": "1:44:45", "remaining_time": "4:38:13", "throughput": 1552.86, "total_tokens": 9760096}
{"current_steps": 935, "total_steps": 3400, "loss": 0.6064, "lr": 8.678619553365659e-05, "epoch": 0.24079320113314448, "percentage": 27.5, "elapsed_time": "1:46:16", "remaining_time": "4:40:10", "throughput": 1538.94, "total_tokens": 9812672}
{"current_steps": 940, "total_steps": 3400, "loss": 0.5398, "lr": 8.662107520733027e-05, "epoch": 0.24208086531032708, "percentage": 27.65, "elapsed_time": "1:47:45", "remaining_time": "4:41:59", "throughput": 1526.05, "total_tokens": 9866200}
{"current_steps": 945, "total_steps": 3400, "loss": 0.5068, "lr": 8.64550887871741e-05, "epoch": 0.24336852948750964, "percentage": 27.79, "elapsed_time": "1:49:15", "remaining_time": "4:43:50", "throughput": 1512.93, "total_tokens": 9918160}
{"current_steps": 950, "total_steps": 3400, "loss": 0.5862, "lr": 8.628824019879137e-05, "epoch": 0.24465619366469224, "percentage": 27.94, "elapsed_time": "1:50:44", "remaining_time": "4:45:35", "throughput": 1500.64, "total_tokens": 9970600}
{"current_steps": 950, "total_steps": 3400, "eval_loss": 0.5085262656211853, "epoch": 0.24465619366469224, "percentage": 27.94, "elapsed_time": "1:51:23", "remaining_time": "4:47:15", "throughput": 1491.87, "total_tokens": 9970600}
{"current_steps": 955, "total_steps": 3400, "loss": 0.4549, "lr": 8.612053338817581e-05, "epoch": 0.24594385784187484, "percentage": 28.09, "elapsed_time": "1:53:00", "remaining_time": "4:49:20", "throughput": 1478.02, "total_tokens": 10022248}
{"current_steps": 960, "total_steps": 3400, "loss": 0.4791, "lr": 8.595197232161824e-05, "epoch": 0.24723152201905743, "percentage": 28.24, "elapsed_time": "1:54:30", "remaining_time": "4:51:02", "throughput": 1466.45, "total_tokens": 10075280}
{"current_steps": 965, "total_steps": 3400, "loss": 0.4833, "lr": 8.578256098561275e-05, "epoch": 0.24851918619624003, "percentage": 28.38, "elapsed_time": "1:56:02", "remaining_time": "4:52:47", "throughput": 1454.77, "total_tokens": 10128392}
{"current_steps": 970, "total_steps": 3400, "loss": 0.4672, "lr": 8.561230338676239e-05, "epoch": 0.24980685037342262, "percentage": 28.53, "elapsed_time": "1:57:32", "remaining_time": "4:54:27", "throughput": 1443.57, "total_tokens": 10180720}
{"current_steps": 975, "total_steps": 3400, "loss": 0.5205, "lr": 8.544120355168451e-05, "epoch": 0.2510945145506052, "percentage": 28.68, "elapsed_time": "1:59:03", "remaining_time": "4:56:07", "throughput": 1432.48, "total_tokens": 10233256}
{"current_steps": 980, "total_steps": 3400, "loss": 0.5124, "lr": 8.526926552691544e-05, "epoch": 0.2523821787277878, "percentage": 28.82, "elapsed_time": "2:00:33", "remaining_time": "4:57:42", "throughput": 1421.81, "total_tokens": 10284928}
{"current_steps": 985, "total_steps": 3400, "loss": 0.5034, "lr": 8.509649337881483e-05, "epoch": 0.2536698429049704, "percentage": 28.97, "elapsed_time": "2:02:03", "remaining_time": "4:59:15", "throughput": 1411.63, "total_tokens": 10338208}
{"current_steps": 990, "total_steps": 3400, "loss": 0.5226, "lr": 8.492289119346943e-05, "epoch": 0.254957507082153, "percentage": 29.12, "elapsed_time": "2:03:33", "remaining_time": "5:00:46", "throughput": 1401.61, "total_tokens": 10390224}
{"current_steps": 995, "total_steps": 3400, "loss": 0.5399, "lr": 8.474846307659658e-05, "epoch": 0.25624517125933555, "percentage": 29.26, "elapsed_time": "2:05:01", "remaining_time": "5:02:11", "throughput": 1392.18, "total_tokens": 10443080}
{"current_steps": 1000, "total_steps": 3400, "loss": 0.483, "lr": 8.457321315344694e-05, "epoch": 0.25753283543651817, "percentage": 29.41, "elapsed_time": "2:06:30", "remaining_time": "5:03:37", "throughput": 1382.7, "total_tokens": 10495592}
{"current_steps": 1000, "total_steps": 3400, "eval_loss": 0.5305114388465881, "epoch": 0.25753283543651817, "percentage": 29.41, "elapsed_time": "2:07:09", "remaining_time": "5:05:10", "throughput": 1375.65, "total_tokens": 10495592}
{"current_steps": 1005, "total_steps": 3400, "loss": 0.568, "lr": 8.439714556870704e-05, "epoch": 0.25882049961370074, "percentage": 29.56, "elapsed_time": "2:08:43", "remaining_time": "5:06:44", "throughput": 1365.8, "total_tokens": 10548136}
{"current_steps": 1010, "total_steps": 3400, "loss": 0.4335, "lr": 8.422026448640124e-05, "epoch": 0.26010816379088336, "percentage": 29.71, "elapsed_time": "2:10:12", "remaining_time": "5:08:07", "throughput": 1356.78, "total_tokens": 10600048}
{"current_steps": 1015, "total_steps": 3400, "loss": 0.5385, "lr": 8.40425740897932e-05, "epoch": 0.26139582796806593, "percentage": 29.85, "elapsed_time": "2:11:40", "remaining_time": "5:09:25", "throughput": 1348.21, "total_tokens": 10652160}
{"current_steps": 1020, "total_steps": 3400, "loss": 0.5171, "lr": 8.386407858128706e-05, "epoch": 0.2626834921452485, "percentage": 30.0, "elapsed_time": "2:13:11", "remaining_time": "5:10:46", "throughput": 1339.57, "total_tokens": 10705208}
{"current_steps": 1025, "total_steps": 3400, "loss": 0.5201, "lr": 8.368478218232787e-05, "epoch": 0.2639711563224311, "percentage": 30.15, "elapsed_time": "2:14:39", "remaining_time": "5:12:00", "throughput": 1331.61, "total_tokens": 10758688}
{"current_steps": 1030, "total_steps": 3400, "loss": 0.5521, "lr": 8.350468913330192e-05, "epoch": 0.2652588204996137, "percentage": 30.29, "elapsed_time": "2:16:08", "remaining_time": "5:13:16", "throughput": 1323.48, "total_tokens": 10811408}
{"current_steps": 1035, "total_steps": 3400, "loss": 0.4938, "lr": 8.33238036934364e-05, "epoch": 0.2665464846767963, "percentage": 30.44, "elapsed_time": "2:17:37", "remaining_time": "5:14:28", "throughput": 1315.65, "total_tokens": 10864144}
{"current_steps": 1040, "total_steps": 3400, "loss": 0.4828, "lr": 8.31421301406986e-05, "epoch": 0.2678341488539789, "percentage": 30.59, "elapsed_time": "2:19:08", "remaining_time": "5:15:44", "throughput": 1307.65, "total_tokens": 10916952}
{"current_steps": 1045, "total_steps": 3400, "loss": 0.5491, "lr": 8.29596727716949e-05, "epoch": 0.26912181303116145, "percentage": 30.74, "elapsed_time": "2:20:37", "remaining_time": "5:16:53", "throughput": 1300.06, "total_tokens": 10968824}
{"current_steps": 1050, "total_steps": 3400, "loss": 0.4628, "lr": 8.277643590156894e-05, "epoch": 0.2704094772083441, "percentage": 30.88, "elapsed_time": "2:22:08", "remaining_time": "5:18:07", "throughput": 1292.35, "total_tokens": 11021656}
{"current_steps": 1050, "total_steps": 3400, "eval_loss": 0.5039986371994019, "epoch": 0.2704094772083441, "percentage": 30.88, "elapsed_time": "2:22:48", "remaining_time": "5:19:37", "throughput": 1286.28, "total_tokens": 11021656}
{"current_steps": 1055, "total_steps": 3400, "loss": 0.4586, "lr": 8.259242386389973e-05, "epoch": 0.27169714138552664, "percentage": 31.03, "elapsed_time": "2:24:23", "remaining_time": "5:20:57", "throughput": 1278.23, "total_tokens": 11074336}
{"current_steps": 1060, "total_steps": 3400, "loss": 0.4939, "lr": 8.240764101059912e-05, "epoch": 0.27298480556270927, "percentage": 31.18, "elapsed_time": "2:25:55", "remaining_time": "5:22:07", "throughput": 1270.9, "total_tokens": 11126776}
{"current_steps": 1065, "total_steps": 3400, "loss": 0.4978, "lr": 8.222209171180883e-05, "epoch": 0.27427246973989183, "percentage": 31.32, "elapsed_time": "2:27:23", "remaining_time": "5:23:09", "throughput": 1264.18, "total_tokens": 11179680}
{"current_steps": 1070, "total_steps": 3400, "loss": 0.5695, "lr": 8.203578035579715e-05, "epoch": 0.2755601339170744, "percentage": 31.47, "elapsed_time": "2:28:53", "remaining_time": "5:24:12", "throughput": 1257.29, "total_tokens": 11231616}
{"current_steps": 1075, "total_steps": 3400, "loss": 0.4635, "lr": 8.184871134885513e-05, "epoch": 0.276847798094257, "percentage": 31.62, "elapsed_time": "2:30:21", "remaining_time": "5:25:11", "throughput": 1250.77, "total_tokens": 11283720}
{"current_steps": 1080, "total_steps": 3400, "loss": 0.4974, "lr": 8.166088911519235e-05, "epoch": 0.2781354622714396, "percentage": 31.76, "elapsed_time": "2:31:50", "remaining_time": "5:26:10", "throughput": 1244.32, "total_tokens": 11336144}
{"current_steps": 1085, "total_steps": 3400, "loss": 0.4439, "lr": 8.147231809683236e-05, "epoch": 0.2794231264486222, "percentage": 31.91, "elapsed_time": "2:33:19", "remaining_time": "5:27:07", "throughput": 1238.08, "total_tokens": 11389128}
{"current_steps": 1090, "total_steps": 3400, "loss": 0.4368, "lr": 8.128300275350756e-05, "epoch": 0.2807107906258048, "percentage": 32.06, "elapsed_time": "2:34:47", "remaining_time": "5:28:03", "throughput": 1231.92, "total_tokens": 11441864}
{"current_steps": 1095, "total_steps": 3400, "loss": 0.4895, "lr": 8.109294756255375e-05, "epoch": 0.2819984548029874, "percentage": 32.21, "elapsed_time": "2:36:17", "remaining_time": "5:28:59", "throughput": 1225.81, "total_tokens": 11494880}
{"current_steps": 1100, "total_steps": 3400, "loss": 0.4825, "lr": 8.090215701880419e-05, "epoch": 0.28328611898017, "percentage": 32.35, "elapsed_time": "2:37:45", "remaining_time": "5:29:51", "throughput": 1219.93, "total_tokens": 11547008}
{"current_steps": 1100, "total_steps": 3400, "eval_loss": 0.4798590838909149, "epoch": 0.28328611898017, "percentage": 32.35, "elapsed_time": "2:38:25", "remaining_time": "5:31:16", "throughput": 1214.71, "total_tokens": 11547008}
{"current_steps": 1105, "total_steps": 3400, "loss": 0.4927, "lr": 8.07106356344834e-05, "epoch": 0.28457378315735254, "percentage": 32.5, "elapsed_time": "2:40:02", "remaining_time": "5:32:23", "throughput": 1208.03, "total_tokens": 11600032}
{"current_steps": 1110, "total_steps": 3400, "loss": 0.4353, "lr": 8.051838793910038e-05, "epoch": 0.28586144733453517, "percentage": 32.65, "elapsed_time": "2:41:30", "remaining_time": "5:33:11", "throughput": 1202.47, "total_tokens": 11652120}
{"current_steps": 1115, "total_steps": 3400, "loss": 0.4891, "lr": 8.032541847934146e-05, "epoch": 0.28714911151171774, "percentage": 32.79, "elapsed_time": "2:42:59", "remaining_time": "5:34:01", "throughput": 1196.88, "total_tokens": 11705184}
{"current_steps": 1120, "total_steps": 3400, "loss": 0.4497, "lr": 8.013173181896283e-05, "epoch": 0.28843677568890036, "percentage": 32.94, "elapsed_time": "2:44:27", "remaining_time": "5:34:48", "throughput": 1191.54, "total_tokens": 11758032}
{"current_steps": 1125, "total_steps": 3400, "loss": 0.4927, "lr": 7.993733253868256e-05, "epoch": 0.28972443986608293, "percentage": 33.09, "elapsed_time": "2:45:57", "remaining_time": "5:35:36", "throughput": 1186.11, "total_tokens": 11810736}
{"current_steps": 1130, "total_steps": 3400, "loss": 0.4853, "lr": 7.974222523607236e-05, "epoch": 0.2910121040432655, "percentage": 33.24, "elapsed_time": "2:47:24", "remaining_time": "5:36:17", "throughput": 1181.07, "total_tokens": 11863152}
{"current_steps": 1135, "total_steps": 3400, "loss": 0.4458, "lr": 7.954641452544865e-05, "epoch": 0.2922997682204481, "percentage": 33.38, "elapsed_time": "2:48:52", "remaining_time": "5:37:00", "throughput": 1175.84, "total_tokens": 11914536}
{"current_steps": 1140, "total_steps": 3400, "loss": 0.3976, "lr": 7.934990503776363e-05, "epoch": 0.2935874323976307, "percentage": 33.53, "elapsed_time": "2:50:19", "remaining_time": "5:37:40", "throughput": 1170.89, "total_tokens": 11966064}
{"current_steps": 1145, "total_steps": 3400, "loss": 0.508, "lr": 7.915270142049566e-05, "epoch": 0.2948750965748133, "percentage": 33.68, "elapsed_time": "2:51:47", "remaining_time": "5:38:20", "throughput": 1166.0, "total_tokens": 12018928}
{"current_steps": 1150, "total_steps": 3400, "loss": 0.4553, "lr": 7.89548083375394e-05, "epoch": 0.2961627607519959, "percentage": 33.82, "elapsed_time": "2:53:14", "remaining_time": "5:38:57", "throughput": 1161.26, "total_tokens": 12071088}
{"current_steps": 1150, "total_steps": 3400, "eval_loss": 0.45381438732147217, "epoch": 0.2961627607519959, "percentage": 33.82, "elapsed_time": "2:53:53", "remaining_time": "5:40:12", "throughput": 1156.99, "total_tokens": 12071088}
{"current_steps": 1155, "total_steps": 3400, "loss": 0.4192, "lr": 7.875623046909544e-05, "epoch": 0.29745042492917845, "percentage": 33.97, "elapsed_time": "2:55:26", "remaining_time": "5:41:00", "throughput": 1151.57, "total_tokens": 12122128}
{"current_steps": 1160, "total_steps": 3400, "loss": 0.433, "lr": 7.855697251155967e-05, "epoch": 0.29873808910636107, "percentage": 34.12, "elapsed_time": "2:56:53", "remaining_time": "5:41:34", "throughput": 1147.09, "total_tokens": 12174288}
{"current_steps": 1165, "total_steps": 3400, "loss": 0.4817, "lr": 7.835703917741212e-05, "epoch": 0.30002575328354364, "percentage": 34.26, "elapsed_time": "2:58:21", "remaining_time": "5:42:09", "throughput": 1142.59, "total_tokens": 12227008}
{"current_steps": 1170, "total_steps": 3400, "loss": 0.485, "lr": 7.81564351951057e-05, "epoch": 0.30131341746072626, "percentage": 34.41, "elapsed_time": "2:59:48", "remaining_time": "5:42:41", "throughput": 1138.31, "total_tokens": 12280168}
{"current_steps": 1175, "total_steps": 3400, "loss": 0.4532, "lr": 7.795516530895414e-05, "epoch": 0.30260108163790883, "percentage": 34.56, "elapsed_time": "3:01:15", "remaining_time": "5:43:13", "throughput": 1134.03, "total_tokens": 12333072}
{"current_steps": 1180, "total_steps": 3400, "loss": 0.4643, "lr": 7.775323427901993e-05, "epoch": 0.3038887458150914, "percentage": 34.71, "elapsed_time": "3:02:43", "remaining_time": "5:43:46", "throughput": 1129.73, "total_tokens": 12386208}
{"current_steps": 1185, "total_steps": 3400, "loss": 0.4577, "lr": 7.755064688100171e-05, "epoch": 0.305176409992274, "percentage": 34.85, "elapsed_time": "3:04:11", "remaining_time": "5:44:16", "throughput": 1125.62, "total_tokens": 12439304}
{"current_steps": 1190, "total_steps": 3400, "loss": 0.4666, "lr": 7.734740790612136e-05, "epoch": 0.3064640741694566, "percentage": 35.0, "elapsed_time": "3:05:39", "remaining_time": "5:44:48", "throughput": 1121.31, "total_tokens": 12491360}
{"current_steps": 1195, "total_steps": 3400, "loss": 0.407, "lr": 7.714352216101055e-05, "epoch": 0.3077517383466392, "percentage": 35.15, "elapsed_time": "3:07:07", "remaining_time": "5:45:17", "throughput": 1117.27, "total_tokens": 12544264}
{"current_steps": 1200, "total_steps": 3400, "loss": 0.454, "lr": 7.693899446759727e-05, "epoch": 0.3090394025238218, "percentage": 35.29, "elapsed_time": "3:08:36", "remaining_time": "5:45:47", "throughput": 1113.05, "total_tokens": 12596208}
{"current_steps": 1200, "total_steps": 3400, "eval_loss": 0.49250805377960205, "epoch": 0.3090394025238218, "percentage": 35.29, "elapsed_time": "3:09:15", "remaining_time": "5:46:58", "throughput": 1109.25, "total_tokens": 12596208}
{"current_steps": 1205, "total_steps": 3400, "loss": 0.5226, "lr": 7.673382966299163e-05, "epoch": 0.31032706670100435, "percentage": 35.44, "elapsed_time": "3:10:48", "remaining_time": "5:47:33", "throughput": 1104.87, "total_tokens": 12648936}
{"current_steps": 1210, "total_steps": 3400, "loss": 0.4757, "lr": 7.65280325993715e-05, "epoch": 0.311614730878187, "percentage": 35.59, "elapsed_time": "3:12:16", "remaining_time": "5:48:00", "throughput": 1101.05, "total_tokens": 12702432}
{"current_steps": 1215, "total_steps": 3400, "loss": 0.451, "lr": 7.63216081438678e-05, "epoch": 0.31290239505536954, "percentage": 35.74, "elapsed_time": "3:13:43", "remaining_time": "5:48:23", "throughput": 1097.34, "total_tokens": 12755128}
{"current_steps": 1220, "total_steps": 3400, "loss": 0.4155, "lr": 7.611456117844934e-05, "epoch": 0.31419005923255217, "percentage": 35.88, "elapsed_time": "3:15:12", "remaining_time": "5:48:49", "throughput": 1093.54, "total_tokens": 12808152}
{"current_steps": 1225, "total_steps": 3400, "loss": 0.4094, "lr": 7.59068965998074e-05, "epoch": 0.31547772340973473, "percentage": 36.03, "elapsed_time": "3:16:39", "remaining_time": "5:49:10", "throughput": 1090.0, "total_tokens": 12861592}
{"current_steps": 1230, "total_steps": 3400, "loss": 0.4663, "lr": 7.569861931923989e-05, "epoch": 0.31676538758691736, "percentage": 36.18, "elapsed_time": "3:18:08", "remaining_time": "5:49:33", "throughput": 1086.31, "total_tokens": 12914240}
{"current_steps": 1235, "total_steps": 3400, "loss": 0.468, "lr": 7.548973426253521e-05, "epoch": 0.3180530517640999, "percentage": 36.32, "elapsed_time": "3:19:35", "remaining_time": "5:49:52", "throughput": 1082.86, "total_tokens": 12967472}
{"current_steps": 1240, "total_steps": 3400, "loss": 0.4744, "lr": 7.528024636985575e-05, "epoch": 0.3193407159412825, "percentage": 36.47, "elapsed_time": "3:21:04", "remaining_time": "5:50:15", "throughput": 1079.25, "total_tokens": 13020232}
{"current_steps": 1245, "total_steps": 3400, "loss": 0.4269, "lr": 7.507016059562107e-05, "epoch": 0.3206283801184651, "percentage": 36.62, "elapsed_time": "3:22:31", "remaining_time": "5:50:33", "throughput": 1075.84, "total_tokens": 13073032}
{"current_steps": 1250, "total_steps": 3400, "loss": 0.4725, "lr": 7.485948190839077e-05, "epoch": 0.3219160442956477, "percentage": 36.76, "elapsed_time": "3:24:00", "remaining_time": "5:50:52", "throughput": 1072.35, "total_tokens": 13125624}
{"current_steps": 1250, "total_steps": 3400, "eval_loss": 0.4339977502822876, "epoch": 0.3219160442956477, "percentage": 36.76, "elapsed_time": "3:24:39", "remaining_time": "5:52:00", "throughput": 1068.93, "total_tokens": 13125624}
{"current_steps": 1255, "total_steps": 3400, "loss": 0.4196, "lr": 7.464821529074679e-05, "epoch": 0.3232037084728303, "percentage": 36.91, "elapsed_time": "3:26:12", "remaining_time": "5:52:25", "throughput": 1065.2, "total_tokens": 13178656}
{"current_steps": 1260, "total_steps": 3400, "loss": 0.4349, "lr": 7.443636573917585e-05, "epoch": 0.3244913726500129, "percentage": 37.06, "elapsed_time": "3:27:40", "remaining_time": "5:52:42", "throughput": 1061.89, "total_tokens": 13231224}
{"current_steps": 1265, "total_steps": 3400, "loss": 0.4726, "lr": 7.422393826395108e-05, "epoch": 0.32577903682719545, "percentage": 37.21, "elapsed_time": "3:29:07", "remaining_time": "5:52:57", "throughput": 1058.62, "total_tokens": 13283208}
{"current_steps": 1270, "total_steps": 3400, "loss": 0.4604, "lr": 7.40109378890136e-05, "epoch": 0.32706670100437807, "percentage": 37.35, "elapsed_time": "3:30:35", "remaining_time": "5:53:11", "throughput": 1055.43, "total_tokens": 13335808}
{"current_steps": 1275, "total_steps": 3400, "loss": 0.4606, "lr": 7.379736965185368e-05, "epoch": 0.32835436518156064, "percentage": 37.5, "elapsed_time": "3:32:03", "remaining_time": "5:53:26", "throughput": 1052.3, "total_tokens": 13389112}
{"current_steps": 1280, "total_steps": 3400, "loss": 0.4487, "lr": 7.358323860339165e-05, "epoch": 0.32964202935874326, "percentage": 37.65, "elapsed_time": "3:33:30", "remaining_time": "5:53:36", "throughput": 1049.31, "total_tokens": 13441816}
{"current_steps": 1285, "total_steps": 3400, "loss": 0.422, "lr": 7.336854980785839e-05, "epoch": 0.33092969353592583, "percentage": 37.79, "elapsed_time": "3:34:58", "remaining_time": "5:53:49", "throughput": 1046.13, "total_tokens": 13493592}
{"current_steps": 1290, "total_steps": 3400, "loss": 0.5397, "lr": 7.315330834267553e-05, "epoch": 0.3322173577131084, "percentage": 37.94, "elapsed_time": "3:36:25", "remaining_time": "5:53:59", "throughput": 1043.14, "total_tokens": 13545696}
{"current_steps": 1295, "total_steps": 3400, "loss": 0.5022, "lr": 7.293751929833553e-05, "epoch": 0.333505021890291, "percentage": 38.09, "elapsed_time": "3:37:53", "remaining_time": "5:54:11", "throughput": 1040.05, "total_tokens": 13597560}
{"current_steps": 1300, "total_steps": 3400, "loss": 0.4794, "lr": 7.272118777828108e-05, "epoch": 0.3347926860674736, "percentage": 38.24, "elapsed_time": "3:39:20", "remaining_time": "5:54:19", "throughput": 1037.23, "total_tokens": 13650264}
{"current_steps": 1300, "total_steps": 3400, "eval_loss": 0.4991846978664398, "epoch": 0.3347926860674736, "percentage": 38.24, "elapsed_time": "3:39:58", "remaining_time": "5:55:20", "throughput": 1034.22, "total_tokens": 13650264}
{"current_steps": 1305, "total_steps": 3400, "loss": 0.4971, "lr": 7.250431889878455e-05, "epoch": 0.3360803502446562, "percentage": 38.38, "elapsed_time": "3:41:32", "remaining_time": "5:55:39", "throughput": 1030.84, "total_tokens": 13702584}
{"current_steps": 1310, "total_steps": 3400, "loss": 0.4574, "lr": 7.228691778882693e-05, "epoch": 0.3373680144218388, "percentage": 38.53, "elapsed_time": "3:42:59", "remaining_time": "5:55:45", "throughput": 1028.08, "total_tokens": 13755024}
{"current_steps": 1315, "total_steps": 3400, "loss": 0.4463, "lr": 7.20689895899765e-05, "epoch": 0.33865567859902135, "percentage": 38.68, "elapsed_time": "3:44:28", "remaining_time": "5:55:54", "throughput": 1025.21, "total_tokens": 13807528}
{"current_steps": 1320, "total_steps": 3400, "loss": 0.4549, "lr": 7.185053945626733e-05, "epoch": 0.33994334277620397, "percentage": 38.82, "elapsed_time": "3:45:54", "remaining_time": "5:55:59", "throughput": 1022.48, "total_tokens": 13859760}
{"current_steps": 1325, "total_steps": 3400, "loss": 0.4073, "lr": 7.163157255407732e-05, "epoch": 0.34123100695338654, "percentage": 38.97, "elapsed_time": "3:47:23", "remaining_time": "5:56:06", "throughput": 1019.66, "total_tokens": 13911656}
{"current_steps": 1330, "total_steps": 3400, "loss": 0.433, "lr": 7.141209406200599e-05, "epoch": 0.34251867113056916, "percentage": 39.12, "elapsed_time": "3:48:50", "remaining_time": "5:56:10", "throughput": 1016.99, "total_tokens": 13963816}
{"current_steps": 1335, "total_steps": 3400, "loss": 0.4244, "lr": 7.1192109170752e-05, "epoch": 0.34380633530775173, "percentage": 39.26, "elapsed_time": "3:50:19", "remaining_time": "5:56:15", "throughput": 1014.25, "total_tokens": 14016256}
{"current_steps": 1340, "total_steps": 3400, "loss": 0.4448, "lr": 7.097162308299054e-05, "epoch": 0.34509399948493436, "percentage": 39.41, "elapsed_time": "3:51:46", "remaining_time": "5:56:18", "throughput": 1011.67, "total_tokens": 14068768}
{"current_steps": 1345, "total_steps": 3400, "loss": 0.4608, "lr": 7.07506410132501e-05, "epoch": 0.3463816636621169, "percentage": 39.56, "elapsed_time": "3:53:14", "remaining_time": "5:56:22", "throughput": 1009.05, "total_tokens": 14121272}
{"current_steps": 1350, "total_steps": 3400, "loss": 0.3994, "lr": 7.052916818778918e-05, "epoch": 0.3476693278392995, "percentage": 39.71, "elapsed_time": "3:54:42", "remaining_time": "5:56:24", "throughput": 1006.46, "total_tokens": 14173240}
{"current_steps": 1350, "total_steps": 3400, "eval_loss": 0.460835725069046, "epoch": 0.3476693278392995, "percentage": 39.71, "elapsed_time": "3:55:20", "remaining_time": "5:57:22", "throughput": 1003.72, "total_tokens": 14173240}
{"current_steps": 1355, "total_steps": 3400, "loss": 0.41, "lr": 7.030720984447279e-05, "epoch": 0.3489569920164821, "percentage": 39.85, "elapsed_time": "3:56:54", "remaining_time": "5:57:33", "throughput": 1000.79, "total_tokens": 14226032}
{"current_steps": 1360, "total_steps": 3400, "loss": 0.3751, "lr": 7.008477123264848e-05, "epoch": 0.3502446561936647, "percentage": 40.0, "elapsed_time": "3:58:22", "remaining_time": "5:57:33", "throughput": 998.33, "total_tokens": 14278128}
{"current_steps": 1365, "total_steps": 3400, "loss": 0.4814, "lr": 6.986185761302224e-05, "epoch": 0.3515323203708473, "percentage": 40.15, "elapsed_time": "3:59:49", "remaining_time": "5:57:32", "throughput": 995.92, "total_tokens": 14330624}
{"current_steps": 1370, "total_steps": 3400, "loss": 0.5007, "lr": 6.963847425753403e-05, "epoch": 0.3528199845480299, "percentage": 40.29, "elapsed_time": "4:01:16", "remaining_time": "5:57:31", "throughput": 993.47, "total_tokens": 14382416}
{"current_steps": 1375, "total_steps": 3400, "loss": 0.4335, "lr": 6.941462644923318e-05, "epoch": 0.35410764872521244, "percentage": 40.44, "elapsed_time": "4:02:44", "remaining_time": "5:57:29", "throughput": 991.11, "total_tokens": 14434896}
{"current_steps": 1380, "total_steps": 3400, "loss": 0.4427, "lr": 6.919031948215335e-05, "epoch": 0.35539531290239507, "percentage": 40.59, "elapsed_time": "4:04:12", "remaining_time": "5:57:28", "throughput": 988.7, "total_tokens": 14487152}
{"current_steps": 1385, "total_steps": 3400, "loss": 0.42, "lr": 6.896555866118741e-05, "epoch": 0.35668297707957763, "percentage": 40.74, "elapsed_time": "4:05:39", "remaining_time": "5:57:24", "throughput": 986.43, "total_tokens": 14539608}
{"current_steps": 1390, "total_steps": 3400, "loss": 0.4573, "lr": 6.87403493019619e-05, "epoch": 0.35797064125676026, "percentage": 40.88, "elapsed_time": "4:07:08", "remaining_time": "5:57:22", "throughput": 984.08, "total_tokens": 14592168}
{"current_steps": 1395, "total_steps": 3400, "loss": 0.4341, "lr": 6.851469673071143e-05, "epoch": 0.3592583054339428, "percentage": 41.03, "elapsed_time": "4:08:35", "remaining_time": "5:57:17", "throughput": 981.81, "total_tokens": 14643920}
{"current_steps": 1400, "total_steps": 3400, "loss": 0.437, "lr": 6.828860628415253e-05, "epoch": 0.3605459696111254, "percentage": 41.18, "elapsed_time": "4:10:03", "remaining_time": "5:57:13", "throughput": 979.58, "total_tokens": 14697136}
{"current_steps": 1400, "total_steps": 3400, "eval_loss": 0.46620962023735046, "epoch": 0.3605459696111254, "percentage": 41.18, "elapsed_time": "4:10:41", "remaining_time": "5:58:08", "throughput": 977.08, "total_tokens": 14697136}
{"current_steps": 1405, "total_steps": 3400, "loss": 0.4377, "lr": 6.806208330935766e-05, "epoch": 0.361833633788308, "percentage": 41.32, "elapsed_time": "4:12:14", "remaining_time": "5:58:09", "throughput": 974.56, "total_tokens": 14749168}
{"current_steps": 1410, "total_steps": 3400, "loss": 0.412, "lr": 6.783513316362855e-05, "epoch": 0.3631212979654906, "percentage": 41.47, "elapsed_time": "4:13:42", "remaining_time": "5:58:04", "throughput": 972.35, "total_tokens": 14801568}
{"current_steps": 1415, "total_steps": 3400, "loss": 0.4441, "lr": 6.760776121436962e-05, "epoch": 0.3644089621426732, "percentage": 41.62, "elapsed_time": "4:15:09", "remaining_time": "5:57:56", "throughput": 970.22, "total_tokens": 14853384}
{"current_steps": 1420, "total_steps": 3400, "loss": 0.4576, "lr": 6.737997283896103e-05, "epoch": 0.3656966263198558, "percentage": 41.76, "elapsed_time": "4:16:37", "remaining_time": "5:57:50", "throughput": 968.1, "total_tokens": 14906632}
{"current_steps": 1425, "total_steps": 3400, "loss": 0.3853, "lr": 6.715177342463145e-05, "epoch": 0.36698429049703835, "percentage": 41.91, "elapsed_time": "4:18:04", "remaining_time": "5:57:41", "throughput": 966.07, "total_tokens": 14959240}
{"current_steps": 1430, "total_steps": 3400, "loss": 0.3755, "lr": 6.692316836833065e-05, "epoch": 0.36827195467422097, "percentage": 42.06, "elapsed_time": "4:19:32", "remaining_time": "5:57:33", "throughput": 964.01, "total_tokens": 15012256}
{"current_steps": 1435, "total_steps": 3400, "loss": 0.5384, "lr": 6.6694163076602e-05, "epoch": 0.36955961885140354, "percentage": 42.21, "elapsed_time": "4:21:00", "remaining_time": "5:57:24", "throughput": 961.97, "total_tokens": 15064664}
{"current_steps": 1440, "total_steps": 3400, "loss": 0.4377, "lr": 6.646476296545434e-05, "epoch": 0.37084728302858616, "percentage": 42.35, "elapsed_time": "4:22:28", "remaining_time": "5:57:15", "throughput": 959.94, "total_tokens": 15117384}
{"current_steps": 1445, "total_steps": 3400, "loss": 0.3876, "lr": 6.623497346023418e-05, "epoch": 0.37213494720576873, "percentage": 42.5, "elapsed_time": "4:23:55", "remaining_time": "5:57:05", "throughput": 957.94, "total_tokens": 15169880}
{"current_steps": 1450, "total_steps": 3400, "loss": 0.4065, "lr": 6.60047999954972e-05, "epoch": 0.37342261138295135, "percentage": 42.65, "elapsed_time": "4:25:23", "remaining_time": "5:56:54", "throughput": 955.96, "total_tokens": 15222568}
{"current_steps": 1450, "total_steps": 3400, "eval_loss": 0.4395444095134735, "epoch": 0.37342261138295135, "percentage": 42.65, "elapsed_time": "4:26:02", "remaining_time": "5:57:46", "throughput": 953.66, "total_tokens": 15222568}
{"current_steps": 1455, "total_steps": 3400, "loss": 0.4231, "lr": 6.57742480148798e-05, "epoch": 0.3747102755601339, "percentage": 42.79, "elapsed_time": "4:27:34", "remaining_time": "5:57:41", "throughput": 951.44, "total_tokens": 15275288}
{"current_steps": 1460, "total_steps": 3400, "loss": 0.4301, "lr": 6.554332297097031e-05, "epoch": 0.3759979397373165, "percentage": 42.94, "elapsed_time": "4:29:02", "remaining_time": "5:57:29", "throughput": 949.54, "total_tokens": 15328072}
{"current_steps": 1465, "total_steps": 3400, "loss": 0.446, "lr": 6.53120303251801e-05, "epoch": 0.3772856039144991, "percentage": 43.09, "elapsed_time": "4:30:30", "remaining_time": "5:57:17", "throughput": 947.56, "total_tokens": 15379120}
{"current_steps": 1470, "total_steps": 3400, "loss": 0.3764, "lr": 6.508037554761432e-05, "epoch": 0.3785732680916817, "percentage": 43.24, "elapsed_time": "4:31:57", "remaining_time": "5:57:04", "throughput": 945.66, "total_tokens": 15431104}
{"current_steps": 1475, "total_steps": 3400, "loss": 0.4423, "lr": 6.484836411694267e-05, "epoch": 0.3798609322688643, "percentage": 43.38, "elapsed_time": "4:33:25", "remaining_time": "5:56:50", "throughput": 943.75, "total_tokens": 15482816}
{"current_steps": 1480, "total_steps": 3400, "loss": 0.4439, "lr": 6.461600152026965e-05, "epoch": 0.3811485964460469, "percentage": 43.53, "elapsed_time": "4:34:52", "remaining_time": "5:56:35", "throughput": 941.94, "total_tokens": 15534896}
{"current_steps": 1485, "total_steps": 3400, "loss": 0.4408, "lr": 6.438329325300499e-05, "epoch": 0.38243626062322944, "percentage": 43.68, "elapsed_time": "4:36:20", "remaining_time": "5:56:22", "throughput": 940.09, "total_tokens": 15587496}
{"current_steps": 1490, "total_steps": 3400, "loss": 0.4086, "lr": 6.415024481873352e-05, "epoch": 0.38372392480041206, "percentage": 43.82, "elapsed_time": "4:37:47", "remaining_time": "5:56:05", "throughput": 938.33, "total_tokens": 15639672}
{"current_steps": 1495, "total_steps": 3400, "loss": 0.4489, "lr": 6.391686172908506e-05, "epoch": 0.38501158897759463, "percentage": 43.97, "elapsed_time": "4:39:16", "remaining_time": "5:55:51", "throughput": 936.55, "total_tokens": 15693120}
{"current_steps": 1500, "total_steps": 3400, "loss": 0.4338, "lr": 6.368314950360415e-05, "epoch": 0.38629925315477726, "percentage": 44.12, "elapsed_time": "4:40:42", "remaining_time": "5:55:34", "throughput": 934.81, "total_tokens": 15744848}
{"current_steps": 1500, "total_steps": 3400, "eval_loss": 0.45475366711616516, "epoch": 0.38629925315477726, "percentage": 44.12, "elapsed_time": "4:41:21", "remaining_time": "5:56:22", "throughput": 932.68, "total_tokens": 15744848}
{"current_steps": 1505, "total_steps": 3400, "loss": 0.4558, "lr": 6.344911366961934e-05, "epoch": 0.3875869173319598, "percentage": 44.26, "elapsed_time": "4:42:55", "remaining_time": "5:56:14", "throughput": 930.61, "total_tokens": 15797632}
{"current_steps": 1510, "total_steps": 3400, "loss": 0.4518, "lr": 6.321475976211266e-05, "epoch": 0.3888745815091424, "percentage": 44.41, "elapsed_time": "4:44:22", "remaining_time": "5:55:56", "throughput": 928.95, "total_tokens": 15850040}
{"current_steps": 1515, "total_steps": 3400, "loss": 0.4092, "lr": 6.298009332358856e-05, "epoch": 0.390162245686325, "percentage": 44.56, "elapsed_time": "4:45:50", "remaining_time": "5:55:39", "throughput": 927.21, "total_tokens": 15902496}
{"current_steps": 1520, "total_steps": 3400, "loss": 0.478, "lr": 6.274511990394294e-05, "epoch": 0.3914499098635076, "percentage": 44.71, "elapsed_time": "4:47:17", "remaining_time": "5:55:20", "throughput": 925.59, "total_tokens": 15954936}
{"current_steps": 1525, "total_steps": 3400, "loss": 0.4294, "lr": 6.250984506033183e-05, "epoch": 0.3927375740406902, "percentage": 44.85, "elapsed_time": "4:48:45", "remaining_time": "5:55:01", "throughput": 923.94, "total_tokens": 16007624}
{"current_steps": 1530, "total_steps": 3400, "loss": 0.3846, "lr": 6.227427435703997e-05, "epoch": 0.3940252382178728, "percentage": 45.0, "elapsed_time": "4:50:14", "remaining_time": "5:54:44", "throughput": 922.17, "total_tokens": 16059440}
{"current_steps": 1535, "total_steps": 3400, "loss": 0.4372, "lr": 6.203841336534924e-05, "epoch": 0.39531290239505534, "percentage": 45.15, "elapsed_time": "4:51:44", "remaining_time": "5:54:27", "throughput": 920.42, "total_tokens": 16111136}
{"current_steps": 1540, "total_steps": 3400, "loss": 0.484, "lr": 6.180226766340688e-05, "epoch": 0.39660056657223797, "percentage": 45.29, "elapsed_time": "4:53:13", "remaining_time": "5:54:09", "throughput": 918.74, "total_tokens": 16163976}
{"current_steps": 1545, "total_steps": 3400, "loss": 0.3965, "lr": 6.156584283609359e-05, "epoch": 0.39788823074942054, "percentage": 45.44, "elapsed_time": "4:54:43", "remaining_time": "5:53:51", "throughput": 917.09, "total_tokens": 16217192}
{"current_steps": 1550, "total_steps": 3400, "loss": 0.3872, "lr": 6.132914447489137e-05, "epoch": 0.39917589492660316, "percentage": 45.59, "elapsed_time": "4:56:11", "remaining_time": "5:53:30", "throughput": 915.52, "total_tokens": 16269896}
{"current_steps": 1550, "total_steps": 3400, "eval_loss": 0.4416767656803131, "epoch": 0.39917589492660316, "percentage": 45.59, "elapsed_time": "4:56:49", "remaining_time": "5:54:16", "throughput": 913.54, "total_tokens": 16269896}
{"current_steps": 1555, "total_steps": 3400, "loss": 0.4593, "lr": 6.109217817775139e-05, "epoch": 0.4004635591037857, "percentage": 45.74, "elapsed_time": "4:58:22", "remaining_time": "5:54:01", "throughput": 911.73, "total_tokens": 16322496}
{"current_steps": 1560, "total_steps": 3400, "loss": 0.4865, "lr": 6.085494954896156e-05, "epoch": 0.40175122328096835, "percentage": 45.88, "elapsed_time": "4:59:50", "remaining_time": "5:53:39", "throughput": 910.22, "total_tokens": 16375320}
{"current_steps": 1565, "total_steps": 3400, "loss": 0.4422, "lr": 6.061746419901388e-05, "epoch": 0.4030388874581509, "percentage": 46.03, "elapsed_time": "5:01:17", "remaining_time": "5:53:16", "throughput": 908.76, "total_tokens": 16428096}
{"current_steps": 1570, "total_steps": 3400, "loss": 0.3538, "lr": 6.0379727744471936e-05, "epoch": 0.4043265516353335, "percentage": 46.18, "elapsed_time": "5:02:45", "remaining_time": "5:52:53", "throughput": 907.26, "total_tokens": 16480832}
{"current_steps": 1575, "total_steps": 3400, "loss": 0.3923, "lr": 6.014174580783794e-05, "epoch": 0.4056142158125161, "percentage": 46.32, "elapsed_time": "5:04:12", "remaining_time": "5:52:29", "throughput": 905.86, "total_tokens": 16534016}
{"current_steps": 1580, "total_steps": 3400, "loss": 0.3967, "lr": 5.990352401741981e-05, "epoch": 0.4069018799896987, "percentage": 46.47, "elapsed_time": "5:05:40", "remaining_time": "5:52:06", "throughput": 904.34, "total_tokens": 16586216}
{"current_steps": 1585, "total_steps": 3400, "loss": 0.4212, "lr": 5.9665068007197976e-05, "epoch": 0.4081895441668813, "percentage": 46.62, "elapsed_time": "5:07:07", "remaining_time": "5:51:41", "throughput": 902.97, "total_tokens": 16639312}
{"current_steps": 1590, "total_steps": 3400, "loss": 0.3489, "lr": 5.94263834166923e-05, "epoch": 0.40947720834406387, "percentage": 46.76, "elapsed_time": "5:08:35", "remaining_time": "5:51:17", "throughput": 901.53, "total_tokens": 16692328}
{"current_steps": 1595, "total_steps": 3400, "loss": 0.4105, "lr": 5.918747589082853e-05, "epoch": 0.41076487252124644, "percentage": 46.91, "elapsed_time": "5:10:02", "remaining_time": "5:50:51", "throughput": 900.15, "total_tokens": 16745088}
{"current_steps": 1600, "total_steps": 3400, "loss": 0.3914, "lr": 5.8948351079804875e-05, "epoch": 0.41205253669842906, "percentage": 47.06, "elapsed_time": "5:11:30", "remaining_time": "5:50:26", "throughput": 898.78, "total_tokens": 16798768}
{"current_steps": 1600, "total_steps": 3400, "eval_loss": 0.4657597243785858, "epoch": 0.41205253669842906, "percentage": 47.06, "elapsed_time": "5:12:08", "remaining_time": "5:51:10", "throughput": 896.94, "total_tokens": 16798768}