|
{"current_steps": 5, "total_steps": 3400, "loss": 2.8889, "lr": 2.9411764705882355e-06, "epoch": 0.0012876641771825909, "percentage": 0.15, "elapsed_time": "0:02:46", "remaining_time": "1 day, 7:20:36", "throughput": 317.97, "total_tokens": 52840} |
|
{"current_steps": 10, "total_steps": 3400, "loss": 2.8165, "lr": 5.882352941176471e-06, "epoch": 0.0025753283543651817, "percentage": 0.29, "elapsed_time": "0:04:14", "remaining_time": "23:57:41", "throughput": 414.71, "total_tokens": 105528} |
|
{"current_steps": 15, "total_steps": 3400, "loss": 2.8363, "lr": 8.823529411764707e-06, "epoch": 0.0038629925315477724, "percentage": 0.44, "elapsed_time": "0:05:42", "remaining_time": "21:27:36", "throughput": 463.76, "total_tokens": 158768} |
|
{"current_steps": 20, "total_steps": 3400, "loss": 2.6853, "lr": 1.1764705882352942e-05, "epoch": 0.0051506567087303634, "percentage": 0.59, "elapsed_time": "0:07:11", "remaining_time": "20:14:07", "throughput": 489.08, "total_tokens": 210816} |
|
{"current_steps": 25, "total_steps": 3400, "loss": 2.2992, "lr": 1.4705882352941177e-05, "epoch": 0.006438320885912954, "percentage": 0.74, "elapsed_time": "0:08:38", "remaining_time": "19:26:57", "throughput": 506.96, "total_tokens": 262936} |
|
{"current_steps": 30, "total_steps": 3400, "loss": 1.8923, "lr": 1.7647058823529414e-05, "epoch": 0.007725985063095545, "percentage": 0.88, "elapsed_time": "0:10:08", "remaining_time": "18:58:31", "throughput": 518.43, "total_tokens": 315264} |
|
{"current_steps": 35, "total_steps": 3400, "loss": 1.6984, "lr": 2.058823529411765e-05, "epoch": 0.009013649240278136, "percentage": 1.03, "elapsed_time": "0:11:36", "remaining_time": "18:36:02", "throughput": 528.14, "total_tokens": 367840} |
|
{"current_steps": 40, "total_steps": 3400, "loss": 1.6434, "lr": 2.3529411764705884e-05, "epoch": 0.010301313417460727, "percentage": 1.18, "elapsed_time": "0:13:06", "remaining_time": "18:20:53", "throughput": 534.26, "total_tokens": 420112} |
|
{"current_steps": 45, "total_steps": 3400, "loss": 1.4659, "lr": 2.647058823529412e-05, "epoch": 0.011588977594643318, "percentage": 1.32, "elapsed_time": "0:14:34", "remaining_time": "18:06:24", "throughput": 540.69, "total_tokens": 472728} |
|
{"current_steps": 50, "total_steps": 3400, "loss": 1.3506, "lr": 2.9411764705882354e-05, "epoch": 0.012876641771825908, "percentage": 1.47, "elapsed_time": "0:16:03", "remaining_time": "17:56:24", "throughput": 544.27, "total_tokens": 524648} |
|
{"current_steps": 50, "total_steps": 3400, "eval_loss": 1.1727452278137207, "epoch": 0.012876641771825908, "percentage": 1.47, "elapsed_time": "0:17:10", "remaining_time": "19:10:27", "throughput": 509.24, "total_tokens": 524648} |
|
{"current_steps": 55, "total_steps": 3400, "loss": 1.1455, "lr": 3.235294117647059e-05, "epoch": 0.014164305949008499, "percentage": 1.62, "elapsed_time": "0:18:46", "remaining_time": "19:01:52", "throughput": 511.73, "total_tokens": 576472} |
|
{"current_steps": 60, "total_steps": 3400, "loss": 0.9971, "lr": 3.529411764705883e-05, "epoch": 0.01545197012619109, "percentage": 1.76, "elapsed_time": "0:20:15", "remaining_time": "18:48:02", "throughput": 516.56, "total_tokens": 628056} |
|
{"current_steps": 65, "total_steps": 3400, "loss": 0.9073, "lr": 3.8235294117647055e-05, "epoch": 0.01673963430337368, "percentage": 1.91, "elapsed_time": "0:21:44", "remaining_time": "18:35:51", "throughput": 521.45, "total_tokens": 680448} |
|
{"current_steps": 70, "total_steps": 3400, "loss": 0.8386, "lr": 4.11764705882353e-05, "epoch": 0.018027298480556272, "percentage": 2.06, "elapsed_time": "0:23:19", "remaining_time": "18:29:19", "throughput": 524.37, "total_tokens": 733664} |
|
{"current_steps": 75, "total_steps": 3400, "loss": 0.7827, "lr": 4.411764705882353e-05, "epoch": 0.01931496265773886, "percentage": 2.21, "elapsed_time": "0:24:51", "remaining_time": "18:22:18", "throughput": 526.93, "total_tokens": 786096} |
|
{"current_steps": 80, "total_steps": 3400, "loss": 0.7814, "lr": 4.705882352941177e-05, "epoch": 0.020602626834921454, "percentage": 2.35, "elapsed_time": "0:26:25", "remaining_time": "18:16:38", "throughput": 528.66, "total_tokens": 838192} |
|
{"current_steps": 85, "total_steps": 3400, "loss": 0.7297, "lr": 5e-05, "epoch": 0.021890291012104043, "percentage": 2.5, "elapsed_time": "0:27:54", "remaining_time": "18:08:20", "throughput": 531.61, "total_tokens": 890112} |
|
{"current_steps": 90, "total_steps": 3400, "loss": 0.7894, "lr": 5.294117647058824e-05, "epoch": 0.023177955189286635, "percentage": 2.65, "elapsed_time": "0:29:24", "remaining_time": "18:01:48", "throughput": 534.58, "total_tokens": 943472} |
|
{"current_steps": 95, "total_steps": 3400, "loss": 0.7758, "lr": 5.588235294117647e-05, "epoch": 0.024465619366469224, "percentage": 2.79, "elapsed_time": "0:30:52", "remaining_time": "17:54:11", "throughput": 538.09, "total_tokens": 996872} |
|
{"current_steps": 100, "total_steps": 3400, "loss": 0.7577, "lr": 5.882352941176471e-05, "epoch": 0.025753283543651816, "percentage": 2.94, "elapsed_time": "0:32:21", "remaining_time": "17:47:53", "throughput": 540.69, "total_tokens": 1049816} |
|
{"current_steps": 100, "total_steps": 3400, "eval_loss": 0.7517351508140564, "epoch": 0.025753283543651816, "percentage": 2.94, "elapsed_time": "0:33:00", "remaining_time": "18:09:13", "throughput": 530.1, "total_tokens": 1049816} |
|
{"current_steps": 105, "total_steps": 3400, "loss": 0.7579, "lr": 6.176470588235295e-05, "epoch": 0.027040947720834405, "percentage": 3.09, "elapsed_time": "0:34:36", "remaining_time": "18:05:49", "throughput": 531.09, "total_tokens": 1102584} |
|
{"current_steps": 110, "total_steps": 3400, "loss": 0.7659, "lr": 6.470588235294118e-05, "epoch": 0.028328611898016998, "percentage": 3.24, "elapsed_time": "0:36:03", "remaining_time": "17:58:31", "throughput": 534.07, "total_tokens": 1155512} |
|
{"current_steps": 115, "total_steps": 3400, "loss": 0.7469, "lr": 6.764705882352942e-05, "epoch": 0.029616276075199587, "percentage": 3.38, "elapsed_time": "0:37:32", "remaining_time": "17:52:22", "throughput": 536.29, "total_tokens": 1207976} |
|
{"current_steps": 120, "total_steps": 3400, "loss": 0.7353, "lr": 7.058823529411765e-05, "epoch": 0.03090394025238218, "percentage": 3.53, "elapsed_time": "0:39:00", "remaining_time": "17:46:11", "throughput": 538.27, "total_tokens": 1259776} |
|
{"current_steps": 125, "total_steps": 3400, "loss": 0.7537, "lr": 7.352941176470589e-05, "epoch": 0.03219160442956477, "percentage": 3.68, "elapsed_time": "0:40:29", "remaining_time": "17:40:51", "throughput": 540.35, "total_tokens": 1312760} |
|
{"current_steps": 130, "total_steps": 3400, "loss": 0.7669, "lr": 7.647058823529411e-05, "epoch": 0.03347926860674736, "percentage": 3.82, "elapsed_time": "0:41:57", "remaining_time": "17:35:14", "throughput": 542.54, "total_tokens": 1365616} |
|
{"current_steps": 135, "total_steps": 3400, "loss": 0.722, "lr": 7.941176470588235e-05, "epoch": 0.03476693278392995, "percentage": 3.97, "elapsed_time": "0:43:26", "remaining_time": "17:30:31", "throughput": 543.91, "total_tokens": 1417544} |
|
{"current_steps": 140, "total_steps": 3400, "loss": 0.7502, "lr": 8.23529411764706e-05, "epoch": 0.036054596961112545, "percentage": 4.12, "elapsed_time": "0:44:52", "remaining_time": "17:24:54", "throughput": 545.93, "total_tokens": 1469856} |
|
{"current_steps": 145, "total_steps": 3400, "loss": 0.7174, "lr": 8.529411764705883e-05, "epoch": 0.037342261138295134, "percentage": 4.26, "elapsed_time": "0:46:20", "remaining_time": "17:20:11", "throughput": 547.25, "total_tokens": 1521496} |
|
{"current_steps": 150, "total_steps": 3400, "loss": 0.7018, "lr": 8.823529411764706e-05, "epoch": 0.03862992531547772, "percentage": 4.41, "elapsed_time": "0:47:46", "remaining_time": "17:15:14", "throughput": 548.82, "total_tokens": 1573376} |
|
{"current_steps": 150, "total_steps": 3400, "eval_loss": 0.7309949994087219, "epoch": 0.03862992531547772, "percentage": 4.41, "elapsed_time": "0:48:25", "remaining_time": "17:29:02", "throughput": 541.61, "total_tokens": 1573376} |
|
{"current_steps": 155, "total_steps": 3400, "loss": 0.738, "lr": 9.11764705882353e-05, "epoch": 0.03991758949266031, "percentage": 4.56, "elapsed_time": "0:49:58", "remaining_time": "17:26:05", "throughput": 542.4, "total_tokens": 1626136} |
|
{"current_steps": 160, "total_steps": 3400, "loss": 0.7579, "lr": 9.411764705882353e-05, "epoch": 0.04120525366984291, "percentage": 4.71, "elapsed_time": "0:51:24", "remaining_time": "17:20:51", "throughput": 544.34, "total_tokens": 1678760} |
|
{"current_steps": 165, "total_steps": 3400, "loss": 0.7502, "lr": 9.705882352941177e-05, "epoch": 0.042492917847025496, "percentage": 4.85, "elapsed_time": "0:52:51", "remaining_time": "17:16:21", "throughput": 545.87, "total_tokens": 1731240} |
|
{"current_steps": 170, "total_steps": 3400, "loss": 0.7448, "lr": 0.0001, "epoch": 0.043780582024208085, "percentage": 5.0, "elapsed_time": "0:54:17", "remaining_time": "17:11:35", "throughput": 547.57, "total_tokens": 1783816} |
|
{"current_steps": 175, "total_steps": 3400, "loss": 0.6648, "lr": 9.999940874631277e-05, "epoch": 0.045068246201390674, "percentage": 5.15, "elapsed_time": "0:55:45", "remaining_time": "17:07:24", "throughput": 548.45, "total_tokens": 1834592} |
|
{"current_steps": 180, "total_steps": 3400, "loss": 0.7759, "lr": 9.999763499923432e-05, "epoch": 0.04635591037857327, "percentage": 5.29, "elapsed_time": "0:57:11", "remaining_time": "17:02:59", "throughput": 550.3, "total_tokens": 1888176} |
|
{"current_steps": 185, "total_steps": 3400, "loss": 0.7167, "lr": 9.999467880071402e-05, "epoch": 0.04764357455575586, "percentage": 5.44, "elapsed_time": "0:58:39", "remaining_time": "16:59:16", "throughput": 551.35, "total_tokens": 1940280} |
|
{"current_steps": 190, "total_steps": 3400, "loss": 0.7483, "lr": 9.999054022066641e-05, "epoch": 0.04893123873293845, "percentage": 5.59, "elapsed_time": "1:00:04", "remaining_time": "16:55:04", "throughput": 552.88, "total_tokens": 1993096} |
|
{"current_steps": 195, "total_steps": 3400, "loss": 0.7464, "lr": 9.998521935696953e-05, "epoch": 0.050218902910121044, "percentage": 5.74, "elapsed_time": "1:01:31", "remaining_time": "16:51:14", "throughput": 554.14, "total_tokens": 2045648} |
|
{"current_steps": 200, "total_steps": 3400, "loss": 0.7594, "lr": 9.997871633546257e-05, "epoch": 0.05150656708730363, "percentage": 5.88, "elapsed_time": "1:02:57", "remaining_time": "16:47:25", "throughput": 555.61, "total_tokens": 2099008} |
|
{"current_steps": 200, "total_steps": 3400, "eval_loss": 0.7274295687675476, "epoch": 0.05150656708730363, "percentage": 5.88, "elapsed_time": "1:03:35", "remaining_time": "16:57:35", "throughput": 550.06, "total_tokens": 2099008} |
|
{"current_steps": 205, "total_steps": 3400, "loss": 0.706, "lr": 9.997103130994296e-05, "epoch": 0.05279423126448622, "percentage": 6.03, "elapsed_time": "1:05:08", "remaining_time": "16:55:12", "throughput": 550.54, "total_tokens": 2151680} |
|
{"current_steps": 210, "total_steps": 3400, "loss": 0.7186, "lr": 9.996216446216267e-05, "epoch": 0.05408189544166881, "percentage": 6.18, "elapsed_time": "1:06:34", "remaining_time": "16:51:24", "throughput": 551.65, "total_tokens": 2203784} |
|
{"current_steps": 215, "total_steps": 3400, "loss": 0.7009, "lr": 9.995211600182397e-05, "epoch": 0.055369559618851406, "percentage": 6.32, "elapsed_time": "1:08:00", "remaining_time": "16:47:34", "throughput": 552.73, "total_tokens": 2255632} |
|
{"current_steps": 220, "total_steps": 3400, "loss": 0.6801, "lr": 9.994088616657444e-05, "epoch": 0.056657223796033995, "percentage": 6.47, "elapsed_time": "1:09:28", "remaining_time": "16:44:15", "throughput": 553.68, "total_tokens": 2308096} |
|
{"current_steps": 225, "total_steps": 3400, "loss": 0.7569, "lr": 9.992847522200133e-05, "epoch": 0.057944887973216584, "percentage": 6.62, "elapsed_time": "1:10:55", "remaining_time": "16:40:46", "throughput": 554.88, "total_tokens": 2361168} |
|
{"current_steps": 230, "total_steps": 3400, "loss": 0.7402, "lr": 9.99148834616253e-05, "epoch": 0.05923255215039917, "percentage": 6.76, "elapsed_time": "1:12:23", "remaining_time": "16:37:48", "throughput": 555.71, "total_tokens": 2413896} |
|
{"current_steps": 235, "total_steps": 3400, "loss": 0.7191, "lr": 9.990011120689351e-05, "epoch": 0.06052021632758177, "percentage": 6.91, "elapsed_time": "1:13:51", "remaining_time": "16:34:47", "throughput": 556.47, "total_tokens": 2466136} |
|
{"current_steps": 240, "total_steps": 3400, "loss": 0.7274, "lr": 9.988415880717194e-05, "epoch": 0.06180788050476436, "percentage": 7.06, "elapsed_time": "1:15:20", "remaining_time": "16:31:59", "throughput": 557.21, "total_tokens": 2518848} |
|
{"current_steps": 245, "total_steps": 3400, "loss": 0.7704, "lr": 9.986702663973722e-05, "epoch": 0.06309554468194695, "percentage": 7.21, "elapsed_time": "1:16:48", "remaining_time": "16:29:01", "throughput": 558.22, "total_tokens": 2572384} |
|
{"current_steps": 250, "total_steps": 3400, "loss": 0.7346, "lr": 9.98487151097676e-05, "epoch": 0.06438320885912954, "percentage": 7.35, "elapsed_time": "1:18:17", "remaining_time": "16:26:29", "throughput": 558.88, "total_tokens": 2625352} |
|
{"current_steps": 250, "total_steps": 3400, "eval_loss": 0.7181503176689148, "epoch": 0.06438320885912954, "percentage": 7.35, "elapsed_time": "1:18:55", "remaining_time": "16:34:29", "throughput": 554.38, "total_tokens": 2625352} |
|
{"current_steps": 255, "total_steps": 3400, "loss": 0.7408, "lr": 9.98292246503335e-05, "epoch": 0.06567087303631212, "percentage": 7.5, "elapsed_time": "1:20:28", "remaining_time": "16:32:30", "throughput": 554.67, "total_tokens": 2678216} |
|
{"current_steps": 260, "total_steps": 3400, "loss": 0.7044, "lr": 9.980855572238714e-05, "epoch": 0.06695853721349472, "percentage": 7.65, "elapsed_time": "1:21:57", "remaining_time": "16:29:49", "throughput": 555.29, "total_tokens": 2730664} |
|
{"current_steps": 265, "total_steps": 3400, "loss": 0.7334, "lr": 9.978670881475172e-05, "epoch": 0.06824620139067732, "percentage": 7.79, "elapsed_time": "1:23:25", "remaining_time": "16:26:51", "throughput": 556.14, "total_tokens": 2783584} |
|
{"current_steps": 270, "total_steps": 3400, "loss": 0.7075, "lr": 9.976368444410985e-05, "epoch": 0.0695338655678599, "percentage": 7.94, "elapsed_time": "1:24:53", "remaining_time": "16:24:09", "throughput": 556.79, "total_tokens": 2836152} |
|
{"current_steps": 275, "total_steps": 3400, "loss": 0.7039, "lr": 9.973948315499126e-05, "epoch": 0.0708215297450425, "percentage": 8.09, "elapsed_time": "1:26:21", "remaining_time": "16:21:18", "throughput": 557.35, "total_tokens": 2887808} |
|
{"current_steps": 280, "total_steps": 3400, "loss": 0.6953, "lr": 9.971410551976002e-05, "epoch": 0.07210919392222509, "percentage": 8.24, "elapsed_time": "1:27:50", "remaining_time": "16:18:43", "throughput": 557.81, "total_tokens": 2939656} |
|
{"current_steps": 285, "total_steps": 3400, "loss": 0.7022, "lr": 9.968755213860094e-05, "epoch": 0.07339685809940767, "percentage": 8.38, "elapsed_time": "1:29:17", "remaining_time": "16:15:56", "throughput": 558.4, "total_tokens": 2991632} |
|
{"current_steps": 290, "total_steps": 3400, "loss": 0.6796, "lr": 9.96598236395054e-05, "epoch": 0.07468452227659027, "percentage": 8.53, "elapsed_time": "1:30:45", "remaining_time": "16:13:18", "throughput": 558.92, "total_tokens": 3043616} |
|
{"current_steps": 295, "total_steps": 3400, "loss": 0.7346, "lr": 9.96309206782565e-05, "epoch": 0.07597218645377285, "percentage": 8.68, "elapsed_time": "1:32:13", "remaining_time": "16:10:42", "throughput": 559.66, "total_tokens": 3096920} |
|
{"current_steps": 300, "total_steps": 3400, "loss": 0.6815, "lr": 9.960084393841355e-05, "epoch": 0.07725985063095545, "percentage": 8.82, "elapsed_time": "1:33:40", "remaining_time": "16:07:58", "throughput": 560.28, "total_tokens": 3149032} |
|
{"current_steps": 300, "total_steps": 3400, "eval_loss": 0.7073924541473389, "epoch": 0.07725985063095545, "percentage": 8.82, "elapsed_time": "1:34:18", "remaining_time": "16:14:32", "throughput": 556.5, "total_tokens": 3149032} |
|
{"current_steps": 305, "total_steps": 3400, "loss": 0.7208, "lr": 9.956959413129585e-05, "epoch": 0.07854751480813804, "percentage": 8.97, "elapsed_time": "1:35:53", "remaining_time": "16:13:04", "throughput": 556.45, "total_tokens": 3201560} |
|
{"current_steps": 310, "total_steps": 3400, "loss": 0.7144, "lr": 9.953717199596598e-05, "epoch": 0.07983517898532062, "percentage": 9.12, "elapsed_time": "1:37:21", "remaining_time": "16:10:27", "throughput": 557.14, "total_tokens": 3254632} |
|
{"current_steps": 315, "total_steps": 3400, "loss": 0.6861, "lr": 9.95035782992122e-05, "epoch": 0.08112284316250322, "percentage": 9.26, "elapsed_time": "1:38:52", "remaining_time": "16:08:22", "throughput": 557.33, "total_tokens": 3306432} |
|
{"current_steps": 320, "total_steps": 3400, "loss": 0.6836, "lr": 9.94688138355304e-05, "epoch": 0.08241050733968582, "percentage": 9.41, "elapsed_time": "1:40:21", "remaining_time": "16:05:55", "throughput": 557.75, "total_tokens": 3358392} |
|
{"current_steps": 325, "total_steps": 3400, "loss": 0.7353, "lr": 9.943287942710527e-05, "epoch": 0.0836981715168684, "percentage": 9.56, "elapsed_time": "1:41:50", "remaining_time": "16:03:39", "throughput": 558.24, "total_tokens": 3411424} |
|
{"current_steps": 330, "total_steps": 3400, "loss": 0.6774, "lr": 9.939577592379088e-05, "epoch": 0.08498583569405099, "percentage": 9.71, "elapsed_time": "1:43:18", "remaining_time": "16:01:07", "throughput": 558.66, "total_tokens": 3462992} |
|
{"current_steps": 335, "total_steps": 3400, "loss": 0.7331, "lr": 9.935750420309055e-05, "epoch": 0.08627349987123359, "percentage": 9.85, "elapsed_time": "1:44:49", "remaining_time": "15:59:00", "throughput": 559.08, "total_tokens": 3516136} |
|
{"current_steps": 340, "total_steps": 3400, "loss": 0.6939, "lr": 9.931806517013612e-05, "epoch": 0.08756116404841617, "percentage": 10.0, "elapsed_time": "1:46:17", "remaining_time": "15:56:39", "throughput": 559.51, "total_tokens": 3568360} |
|
{"current_steps": 345, "total_steps": 3400, "loss": 0.7158, "lr": 9.927745975766654e-05, "epoch": 0.08884882822559877, "percentage": 10.15, "elapsed_time": "1:47:46", "remaining_time": "15:54:25", "throughput": 559.88, "total_tokens": 3620696} |
|
{"current_steps": 350, "total_steps": 3400, "loss": 0.6932, "lr": 9.923568892600578e-05, "epoch": 0.09013649240278135, "percentage": 10.29, "elapsed_time": "1:49:14", "remaining_time": "15:51:57", "throughput": 560.41, "total_tokens": 3673152} |
|
{"current_steps": 350, "total_steps": 3400, "eval_loss": 0.7044599056243896, "epoch": 0.09013649240278135, "percentage": 10.29, "elapsed_time": "1:49:52", "remaining_time": "15:57:30", "throughput": 557.15, "total_tokens": 3673152} |
|
{"current_steps": 355, "total_steps": 3400, "loss": 0.6778, "lr": 9.91927536630402e-05, "epoch": 0.09142415657996394, "percentage": 10.44, "elapsed_time": "1:51:26", "remaining_time": "15:55:53", "throughput": 557.13, "total_tokens": 3725296} |
|
{"current_steps": 360, "total_steps": 3400, "loss": 0.6857, "lr": 9.91486549841951e-05, "epoch": 0.09271182075714654, "percentage": 10.59, "elapsed_time": "1:52:52", "remaining_time": "15:53:13", "throughput": 557.75, "total_tokens": 3777552} |
|
{"current_steps": 365, "total_steps": 3400, "loss": 0.7184, "lr": 9.91033939324107e-05, "epoch": 0.09399948493432912, "percentage": 10.74, "elapsed_time": "1:54:21", "remaining_time": "15:50:50", "throughput": 558.25, "total_tokens": 3830200} |
|
{"current_steps": 370, "total_steps": 3400, "loss": 0.7196, "lr": 9.905697157811761e-05, "epoch": 0.09528714911151172, "percentage": 10.88, "elapsed_time": "1:55:46", "remaining_time": "15:48:09", "throughput": 558.99, "total_tokens": 3883200} |
|
{"current_steps": 375, "total_steps": 3400, "loss": 0.6914, "lr": 9.900938901921131e-05, "epoch": 0.09657481328869431, "percentage": 11.03, "elapsed_time": "1:57:14", "remaining_time": "15:45:41", "throughput": 559.5, "total_tokens": 3935576} |
|
{"current_steps": 380, "total_steps": 3400, "loss": 0.6681, "lr": 9.896064738102635e-05, "epoch": 0.0978624774658769, "percentage": 11.18, "elapsed_time": "1:58:40", "remaining_time": "15:43:07", "throughput": 560.04, "total_tokens": 3987624} |
|
{"current_steps": 385, "total_steps": 3400, "loss": 0.6723, "lr": 9.891074781630966e-05, "epoch": 0.09915014164305949, "percentage": 11.32, "elapsed_time": "2:00:06", "remaining_time": "15:40:32", "throughput": 560.58, "total_tokens": 4039680} |
|
{"current_steps": 390, "total_steps": 3400, "loss": 0.6498, "lr": 9.885969150519331e-05, "epoch": 0.10043780582024209, "percentage": 11.47, "elapsed_time": "2:01:33", "remaining_time": "15:38:13", "throughput": 560.91, "total_tokens": 4091216} |
|
{"current_steps": 395, "total_steps": 3400, "loss": 0.7311, "lr": 9.88074796551666e-05, "epoch": 0.10172546999742467, "percentage": 11.62, "elapsed_time": "2:03:01", "remaining_time": "15:35:58", "throughput": 561.41, "total_tokens": 4144264} |
|
{"current_steps": 400, "total_steps": 3400, "loss": 0.7089, "lr": 9.875411350104744e-05, "epoch": 0.10301313417460727, "percentage": 11.76, "elapsed_time": "2:04:29", "remaining_time": "15:33:37", "throughput": 561.93, "total_tokens": 4197072} |
|
{"current_steps": 400, "total_steps": 3400, "eval_loss": 0.6847750544548035, "epoch": 0.10301313417460727, "percentage": 11.76, "elapsed_time": "2:05:06", "remaining_time": "15:38:22", "throughput": 559.09, "total_tokens": 4197072} |
|
{"current_steps": 405, "total_steps": 3400, "loss": 0.7021, "lr": 9.86995943049533e-05, "epoch": 0.10430079835178985, "percentage": 11.91, "elapsed_time": "2:06:38", "remaining_time": "15:36:29", "throughput": 559.3, "total_tokens": 4249656} |
|
{"current_steps": 410, "total_steps": 3400, "loss": 0.6943, "lr": 9.864392335627117e-05, "epoch": 0.10558846252897244, "percentage": 12.06, "elapsed_time": "2:08:05", "remaining_time": "15:34:08", "throughput": 559.87, "total_tokens": 4302944} |
|
{"current_steps": 415, "total_steps": 3400, "loss": 0.7146, "lr": 9.858710197162721e-05, "epoch": 0.10687612670615504, "percentage": 12.21, "elapsed_time": "2:09:31", "remaining_time": "15:31:37", "throughput": 560.45, "total_tokens": 4355480} |
|
{"current_steps": 420, "total_steps": 3400, "loss": 0.6312, "lr": 9.852913149485556e-05, "epoch": 0.10816379088333762, "percentage": 12.35, "elapsed_time": "2:10:58", "remaining_time": "15:29:18", "throughput": 560.88, "total_tokens": 4407688} |
|
{"current_steps": 425, "total_steps": 3400, "loss": 0.6877, "lr": 9.847001329696653e-05, "epoch": 0.10945145506052022, "percentage": 12.5, "elapsed_time": "2:12:24", "remaining_time": "15:26:51", "throughput": 561.37, "total_tokens": 4459736} |
|
{"current_steps": 430, "total_steps": 3400, "loss": 0.6975, "lr": 9.840974877611422e-05, "epoch": 0.11073911923770281, "percentage": 12.65, "elapsed_time": "2:13:51", "remaining_time": "15:24:35", "throughput": 561.88, "total_tokens": 4512928} |
|
{"current_steps": 435, "total_steps": 3400, "loss": 0.651, "lr": 9.834833935756344e-05, "epoch": 0.1120267834148854, "percentage": 12.79, "elapsed_time": "2:15:18", "remaining_time": "15:22:19", "throughput": 562.37, "total_tokens": 4565840} |
|
{"current_steps": 440, "total_steps": 3400, "loss": 0.685, "lr": 9.828578649365601e-05, "epoch": 0.11331444759206799, "percentage": 12.94, "elapsed_time": "2:16:48", "remaining_time": "15:20:20", "throughput": 562.61, "total_tokens": 4618168} |
|
{"current_steps": 445, "total_steps": 3400, "loss": 0.6258, "lr": 9.822209166377635e-05, "epoch": 0.11460211176925057, "percentage": 13.09, "elapsed_time": "2:18:17", "remaining_time": "15:18:17", "throughput": 562.81, "total_tokens": 4669784} |
|
{"current_steps": 450, "total_steps": 3400, "loss": 0.6732, "lr": 9.815725637431662e-05, "epoch": 0.11588977594643317, "percentage": 13.24, "elapsed_time": "2:19:48", "remaining_time": "15:16:28", "throughput": 563.01, "total_tokens": 4722528} |
|
{"current_steps": 450, "total_steps": 3400, "eval_loss": 0.6526497006416321, "epoch": 0.11588977594643317, "percentage": 13.24, "elapsed_time": "2:20:27", "remaining_time": "15:20:44", "throughput": 560.4, "total_tokens": 4722528} |
|
{"current_steps": 455, "total_steps": 3400, "loss": 0.6544, "lr": 9.809128215864097e-05, "epoch": 0.11717744012361576, "percentage": 13.38, "elapsed_time": "2:22:00", "remaining_time": "15:19:06", "throughput": 560.37, "total_tokens": 4774400} |
|
{"current_steps": 460, "total_steps": 3400, "loss": 0.652, "lr": 9.802417057704931e-05, "epoch": 0.11846510430079835, "percentage": 13.53, "elapsed_time": "2:23:28", "remaining_time": "15:17:02", "throughput": 560.67, "total_tokens": 4826704} |
|
{"current_steps": 465, "total_steps": 3400, "loss": 0.6582, "lr": 9.795592321674045e-05, "epoch": 0.11975276847798094, "percentage": 13.68, "elapsed_time": "2:24:56", "remaining_time": "15:14:49", "throughput": 561.17, "total_tokens": 4880072} |
|
{"current_steps": 470, "total_steps": 3400, "loss": 0.6506, "lr": 9.788654169177453e-05, "epoch": 0.12104043265516354, "percentage": 13.82, "elapsed_time": "2:26:24", "remaining_time": "15:12:43", "throughput": 561.43, "total_tokens": 4931968} |
|
{"current_steps": 475, "total_steps": 3400, "loss": 0.6551, "lr": 9.781602764303487e-05, "epoch": 0.12232809683234612, "percentage": 13.97, "elapsed_time": "2:27:52", "remaining_time": "15:10:36", "throughput": 561.69, "total_tokens": 4983656} |
|
{"current_steps": 480, "total_steps": 3400, "loss": 0.6978, "lr": 9.774438273818911e-05, "epoch": 0.12361576100952872, "percentage": 14.12, "elapsed_time": "2:29:20", "remaining_time": "15:08:32", "throughput": 562.05, "total_tokens": 5036528} |
|
{"current_steps": 485, "total_steps": 3400, "loss": 0.6407, "lr": 9.767160867164979e-05, "epoch": 0.12490342518671131, "percentage": 14.26, "elapsed_time": "2:30:49", "remaining_time": "15:06:32", "throughput": 562.3, "total_tokens": 5088768} |
|
{"current_steps": 490, "total_steps": 3400, "loss": 0.6641, "lr": 9.759770716453436e-05, "epoch": 0.1261910893638939, "percentage": 14.41, "elapsed_time": "2:32:17", "remaining_time": "15:04:26", "throughput": 562.73, "total_tokens": 5142080} |
|
{"current_steps": 495, "total_steps": 3400, "loss": 0.6588, "lr": 9.752267996462434e-05, "epoch": 0.1274787535410765, "percentage": 14.56, "elapsed_time": "2:33:47", "remaining_time": "15:02:30", "throughput": 562.95, "total_tokens": 5194432} |
|
{"current_steps": 500, "total_steps": 3400, "loss": 0.6304, "lr": 9.744652884632406e-05, "epoch": 0.12876641771825909, "percentage": 14.71, "elapsed_time": "2:35:15", "remaining_time": "15:00:27", "throughput": 563.24, "total_tokens": 5246640} |
|
{"current_steps": 500, "total_steps": 3400, "eval_loss": 0.6272165775299072, "epoch": 0.12876641771825909, "percentage": 14.71, "elapsed_time": "2:35:54", "remaining_time": "15:04:16", "throughput": 560.87, "total_tokens": 5246640} |
|
{"current_steps": 505, "total_steps": 3400, "loss": 0.5741, "lr": 9.736925561061871e-05, "epoch": 0.13005408189544168, "percentage": 14.85, "elapsed_time": "2:37:30", "remaining_time": "15:02:55", "throughput": 560.73, "total_tokens": 5299024} |
|
{"current_steps": 510, "total_steps": 3400, "loss": 0.6535, "lr": 9.729086208503174e-05, "epoch": 0.13134174607262425, "percentage": 15.0, "elapsed_time": "2:38:58", "remaining_time": "15:00:50", "throughput": 561.18, "total_tokens": 5352664} |
|
{"current_steps": 515, "total_steps": 3400, "loss": 0.6081, "lr": 9.721135012358156e-05, "epoch": 0.13262941024980685, "percentage": 15.15, "elapsed_time": "2:40:27", "remaining_time": "14:58:55", "throughput": 561.49, "total_tokens": 5406008} |
|
{"current_steps": 520, "total_steps": 3400, "loss": 0.6792, "lr": 9.713072160673777e-05, "epoch": 0.13391707442698944, "percentage": 15.29, "elapsed_time": "2:41:55", "remaining_time": "14:56:51", "throughput": 561.9, "total_tokens": 5459368} |
|
{"current_steps": 525, "total_steps": 3400, "loss": 0.6821, "lr": 9.704897844137673e-05, "epoch": 0.13520473860417204, "percentage": 15.44, "elapsed_time": "2:43:25", "remaining_time": "14:54:56", "throughput": 562.24, "total_tokens": 5512960} |
|
{"current_steps": 530, "total_steps": 3400, "loss": 0.5835, "lr": 9.696612256073633e-05, "epoch": 0.13649240278135463, "percentage": 15.59, "elapsed_time": "2:44:52", "remaining_time": "14:52:51", "throughput": 562.56, "total_tokens": 5565368} |
|
{"current_steps": 535, "total_steps": 3400, "loss": 0.6129, "lr": 9.688215592437039e-05, "epoch": 0.1377800669585372, "percentage": 15.74, "elapsed_time": "2:46:21", "remaining_time": "14:50:50", "throughput": 562.86, "total_tokens": 5618008} |
|
{"current_steps": 540, "total_steps": 3400, "loss": 0.5765, "lr": 9.679708051810221e-05, "epoch": 0.1390677311357198, "percentage": 15.88, "elapsed_time": "2:47:47", "remaining_time": "14:48:38", "throughput": 563.22, "total_tokens": 5670072} |
|
{"current_steps": 545, "total_steps": 3400, "loss": 0.6325, "lr": 9.67108983539777e-05, "epoch": 0.1403553953129024, "percentage": 16.03, "elapsed_time": "2:49:14", "remaining_time": "14:46:35", "throughput": 563.58, "total_tokens": 5722936} |
|
{"current_steps": 550, "total_steps": 3400, "loss": 0.5596, "lr": 9.662361147021779e-05, "epoch": 0.141643059490085, "percentage": 16.18, "elapsed_time": "2:50:41", "remaining_time": "14:44:27", "throughput": 563.9, "total_tokens": 5774880} |
|
{"current_steps": 550, "total_steps": 3400, "eval_loss": 0.5832681059837341, "epoch": 0.141643059490085, "percentage": 16.18, "elapsed_time": "2:51:19", "remaining_time": "14:47:45", "throughput": 561.8, "total_tokens": 5774880} |
|
{"current_steps": 555, "total_steps": 3400, "loss": 0.5073, "lr": 9.653522193117013e-05, "epoch": 0.14293072366726758, "percentage": 16.32, "elapsed_time": "2:52:51", "remaining_time": "14:46:05", "throughput": 561.79, "total_tokens": 5826608} |
|
{"current_steps": 560, "total_steps": 3400, "loss": 0.5652, "lr": 9.644573182726035e-05, "epoch": 0.14421838784445018, "percentage": 16.47, "elapsed_time": "2:54:18", "remaining_time": "14:43:58", "throughput": 562.21, "total_tokens": 5879776} |
|
{"current_steps": 565, "total_steps": 3400, "loss": 0.5727, "lr": 9.63551432749426e-05, "epoch": 0.14550605202163275, "percentage": 16.62, "elapsed_time": "2:55:44", "remaining_time": "14:41:50", "throughput": 562.64, "total_tokens": 5932888} |
|
{"current_steps": 570, "total_steps": 3400, "loss": 0.6251, "lr": 9.626345841664953e-05, "epoch": 0.14679371619881534, "percentage": 16.76, "elapsed_time": "2:57:12", "remaining_time": "14:39:49", "throughput": 562.87, "total_tokens": 5984648} |
|
{"current_steps": 575, "total_steps": 3400, "loss": 0.6508, "lr": 9.617067942074153e-05, "epoch": 0.14808138037599794, "percentage": 16.91, "elapsed_time": "2:58:38", "remaining_time": "14:37:39", "throughput": 563.24, "total_tokens": 6037000} |
|
{"current_steps": 580, "total_steps": 3400, "loss": 0.6686, "lr": 9.607680848145558e-05, "epoch": 0.14936904455318054, "percentage": 17.06, "elapsed_time": "3:00:06", "remaining_time": "14:35:39", "throughput": 563.62, "total_tokens": 6090512} |
|
{"current_steps": 585, "total_steps": 3400, "loss": 0.5793, "lr": 9.598184781885318e-05, "epoch": 0.15065670873036313, "percentage": 17.21, "elapsed_time": "3:01:32", "remaining_time": "14:33:32", "throughput": 564.01, "total_tokens": 6143320} |
|
{"current_steps": 590, "total_steps": 3400, "loss": 0.5954, "lr": 9.588579967876806e-05, "epoch": 0.1519443729075457, "percentage": 17.35, "elapsed_time": "3:02:59", "remaining_time": "14:31:33", "throughput": 564.28, "total_tokens": 6195720} |
|
{"current_steps": 595, "total_steps": 3400, "loss": 0.5644, "lr": 9.578866633275288e-05, "epoch": 0.1532320370847283, "percentage": 17.5, "elapsed_time": "3:04:25", "remaining_time": "14:29:26", "throughput": 564.6, "total_tokens": 6247592} |
|
{"current_steps": 600, "total_steps": 3400, "loss": 0.5794, "lr": 9.569045007802559e-05, "epoch": 0.1545197012619109, "percentage": 17.65, "elapsed_time": "3:05:53", "remaining_time": "14:27:29", "throughput": 564.81, "total_tokens": 6299656} |
|
{"current_steps": 600, "total_steps": 3400, "eval_loss": 0.6039358973503113, "epoch": 0.1545197012619109, "percentage": 17.65, "elapsed_time": "3:06:31", "remaining_time": "14:30:28", "throughput": 562.88, "total_tokens": 6299656} |
|
|