|
{"current_steps": 10, "total_steps": 8660, "loss": 2.4977, "learning_rate": 5.773672055427253e-07, "epoch": 0.004618937644341801, "percentage": 0.12, "elapsed_time": "0:00:24", "remaining_time": "5:58:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 20, "total_steps": 8660, "loss": 2.7655, "learning_rate": 1.1547344110854505e-06, "epoch": 0.009237875288683603, "percentage": 0.23, "elapsed_time": "0:00:47", "remaining_time": "5:41:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 30, "total_steps": 8660, "loss": 2.7241, "learning_rate": 1.7321016166281756e-06, "epoch": 0.013856812933025405, "percentage": 0.35, "elapsed_time": "0:01:11", "remaining_time": "5:41:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 40, "total_steps": 8660, "loss": 2.8357, "learning_rate": 2.309468822170901e-06, "epoch": 0.018475750577367205, "percentage": 0.46, "elapsed_time": "0:01:32", "remaining_time": "5:32:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 50, "total_steps": 8660, "loss": 2.1159, "learning_rate": 2.886836027713626e-06, "epoch": 0.023094688221709007, "percentage": 0.58, "elapsed_time": "0:01:54", "remaining_time": "5:29:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 60, "total_steps": 8660, "loss": 1.498, "learning_rate": 3.464203233256351e-06, "epoch": 0.02771362586605081, "percentage": 0.69, "elapsed_time": "0:02:18", "remaining_time": "5:31:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 70, "total_steps": 8660, "loss": 0.9835, "learning_rate": 4.041570438799077e-06, "epoch": 0.03233256351039261, "percentage": 0.81, "elapsed_time": "0:02:43", "remaining_time": "5:33:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 80, "total_steps": 8660, "loss": 1.0009, "learning_rate": 4.618937644341802e-06, "epoch": 0.03695150115473441, "percentage": 0.92, "elapsed_time": "0:03:07", "remaining_time": "5:35:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 90, "total_steps": 8660, "loss": 1.0264, "learning_rate": 5.196304849884527e-06, "epoch": 0.04157043879907621, "percentage": 1.04, "elapsed_time": "0:03:29", "remaining_time": "5:32:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 100, "total_steps": 8660, "loss": 0.8218, "learning_rate": 5.773672055427252e-06, "epoch": 0.046189376443418015, "percentage": 1.15, "elapsed_time": "0:03:52", "remaining_time": "5:32:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 110, "total_steps": 8660, "loss": 0.8719, "learning_rate": 6.351039260969978e-06, "epoch": 0.050808314087759814, "percentage": 1.27, "elapsed_time": "0:04:16", "remaining_time": "5:31:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 120, "total_steps": 8660, "loss": 0.8592, "learning_rate": 6.928406466512702e-06, "epoch": 0.05542725173210162, "percentage": 1.39, "elapsed_time": "0:04:40", "remaining_time": "5:32:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 130, "total_steps": 8660, "loss": 0.7982, "learning_rate": 7.505773672055427e-06, "epoch": 0.06004618937644342, "percentage": 1.5, "elapsed_time": "0:05:02", "remaining_time": "5:31:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 140, "total_steps": 8660, "loss": 0.7203, "learning_rate": 8.083140877598153e-06, "epoch": 0.06466512702078522, "percentage": 1.62, "elapsed_time": "0:05:26", "remaining_time": "5:30:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 150, "total_steps": 8660, "loss": 0.7647, "learning_rate": 8.660508083140878e-06, "epoch": 0.06928406466512702, "percentage": 1.73, "elapsed_time": "0:05:47", "remaining_time": "5:28:52", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 160, "total_steps": 8660, "loss": 0.7378, "learning_rate": 9.237875288683604e-06, "epoch": 0.07390300230946882, "percentage": 1.85, "elapsed_time": "0:06:11", "remaining_time": "5:29:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 170, "total_steps": 8660, "loss": 0.7355, "learning_rate": 9.815242494226329e-06, "epoch": 0.07852193995381063, "percentage": 1.96, "elapsed_time": "0:06:36", "remaining_time": "5:29:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 180, "total_steps": 8660, "loss": 0.6911, "learning_rate": 1.0392609699769053e-05, "epoch": 0.08314087759815242, "percentage": 2.08, "elapsed_time": "0:07:00", "remaining_time": "5:30:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 190, "total_steps": 8660, "loss": 0.7972, "learning_rate": 1.0969976905311778e-05, "epoch": 0.08775981524249422, "percentage": 2.19, "elapsed_time": "0:07:22", "remaining_time": "5:29:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 200, "total_steps": 8660, "loss": 0.6792, "learning_rate": 1.1547344110854504e-05, "epoch": 0.09237875288683603, "percentage": 2.31, "elapsed_time": "0:07:46", "remaining_time": "5:29:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 210, "total_steps": 8660, "loss": 0.7378, "learning_rate": 1.2124711316397229e-05, "epoch": 0.09699769053117784, "percentage": 2.42, "elapsed_time": "0:08:09", "remaining_time": "5:28:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 220, "total_steps": 8660, "loss": 0.7131, "learning_rate": 1.2702078521939955e-05, "epoch": 0.10161662817551963, "percentage": 2.54, "elapsed_time": "0:08:31", "remaining_time": "5:26:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 230, "total_steps": 8660, "loss": 0.6806, "learning_rate": 1.3279445727482678e-05, "epoch": 0.10623556581986143, "percentage": 2.66, "elapsed_time": "0:08:52", "remaining_time": "5:25:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 240, "total_steps": 8660, "loss": 0.7117, "learning_rate": 1.3856812933025404e-05, "epoch": 0.11085450346420324, "percentage": 2.77, "elapsed_time": "0:09:13", "remaining_time": "5:23:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 250, "total_steps": 8660, "loss": 0.6354, "learning_rate": 1.4434180138568129e-05, "epoch": 0.11547344110854503, "percentage": 2.89, "elapsed_time": "0:09:36", "remaining_time": "5:23:04", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 260, "total_steps": 8660, "loss": 0.6266, "learning_rate": 1.5011547344110854e-05, "epoch": 0.12009237875288684, "percentage": 3.0, "elapsed_time": "0:09:59", "remaining_time": "5:22:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 270, "total_steps": 8660, "loss": 0.6894, "learning_rate": 1.558891454965358e-05, "epoch": 0.12471131639722864, "percentage": 3.12, "elapsed_time": "0:10:23", "remaining_time": "5:22:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 280, "total_steps": 8660, "loss": 0.66, "learning_rate": 1.6166281755196306e-05, "epoch": 0.12933025404157045, "percentage": 3.23, "elapsed_time": "0:10:46", "remaining_time": "5:22:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 290, "total_steps": 8660, "loss": 0.665, "learning_rate": 1.674364896073903e-05, "epoch": 0.13394919168591224, "percentage": 3.35, "elapsed_time": "0:11:08", "remaining_time": "5:21:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 300, "total_steps": 8660, "loss": 0.6676, "learning_rate": 1.7321016166281756e-05, "epoch": 0.13856812933025403, "percentage": 3.46, "elapsed_time": "0:11:31", "remaining_time": "5:21:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 310, "total_steps": 8660, "loss": 0.6236, "learning_rate": 1.789838337182448e-05, "epoch": 0.14318706697459585, "percentage": 3.58, "elapsed_time": "0:11:54", "remaining_time": "5:20:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 320, "total_steps": 8660, "loss": 0.6283, "learning_rate": 1.8475750577367208e-05, "epoch": 0.14780600461893764, "percentage": 3.7, "elapsed_time": "0:12:17", "remaining_time": "5:20:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 330, "total_steps": 8660, "loss": 0.6472, "learning_rate": 1.9053117782909933e-05, "epoch": 0.15242494226327943, "percentage": 3.81, "elapsed_time": "0:12:42", "remaining_time": "5:20:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 340, "total_steps": 8660, "loss": 0.6139, "learning_rate": 1.9630484988452657e-05, "epoch": 0.15704387990762125, "percentage": 3.93, "elapsed_time": "0:13:07", "remaining_time": "5:21:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 350, "total_steps": 8660, "loss": 0.6019, "learning_rate": 2.0207852193995382e-05, "epoch": 0.16166281755196305, "percentage": 4.04, "elapsed_time": "0:13:32", "remaining_time": "5:21:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 360, "total_steps": 8660, "loss": 0.6484, "learning_rate": 2.0785219399538107e-05, "epoch": 0.16628175519630484, "percentage": 4.16, "elapsed_time": "0:13:57", "remaining_time": "5:21:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 370, "total_steps": 8660, "loss": 0.6423, "learning_rate": 2.1362586605080835e-05, "epoch": 0.17090069284064666, "percentage": 4.27, "elapsed_time": "0:14:21", "remaining_time": "5:21:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 380, "total_steps": 8660, "loss": 0.6167, "learning_rate": 2.1939953810623556e-05, "epoch": 0.17551963048498845, "percentage": 4.39, "elapsed_time": "0:14:45", "remaining_time": "5:21:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 390, "total_steps": 8660, "loss": 0.6029, "learning_rate": 2.251732101616628e-05, "epoch": 0.18013856812933027, "percentage": 4.5, "elapsed_time": "0:15:10", "remaining_time": "5:21:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 400, "total_steps": 8660, "loss": 0.5511, "learning_rate": 2.309468822170901e-05, "epoch": 0.18475750577367206, "percentage": 4.62, "elapsed_time": "0:15:33", "remaining_time": "5:21:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 410, "total_steps": 8660, "loss": 0.5919, "learning_rate": 2.3672055427251733e-05, "epoch": 0.18937644341801385, "percentage": 4.73, "elapsed_time": "0:15:57", "remaining_time": "5:21:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 420, "total_steps": 8660, "loss": 0.5407, "learning_rate": 2.4249422632794458e-05, "epoch": 0.19399538106235567, "percentage": 4.85, "elapsed_time": "0:16:22", "remaining_time": "5:21:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 430, "total_steps": 8660, "loss": 0.5829, "learning_rate": 2.4826789838337182e-05, "epoch": 0.19861431870669746, "percentage": 4.97, "elapsed_time": "0:16:46", "remaining_time": "5:21:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 440, "total_steps": 8660, "loss": 0.593, "learning_rate": 2.540415704387991e-05, "epoch": 0.20323325635103925, "percentage": 5.08, "elapsed_time": "0:17:10", "remaining_time": "5:20:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 450, "total_steps": 8660, "loss": 0.56, "learning_rate": 2.5981524249422635e-05, "epoch": 0.20785219399538107, "percentage": 5.2, "elapsed_time": "0:17:35", "remaining_time": "5:20:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 460, "total_steps": 8660, "loss": 0.5448, "learning_rate": 2.6558891454965356e-05, "epoch": 0.21247113163972287, "percentage": 5.31, "elapsed_time": "0:17:57", "remaining_time": "5:20:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 470, "total_steps": 8660, "loss": 0.5707, "learning_rate": 2.7136258660508084e-05, "epoch": 0.21709006928406466, "percentage": 5.43, "elapsed_time": "0:18:20", "remaining_time": "5:19:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 480, "total_steps": 8660, "loss": 0.5891, "learning_rate": 2.771362586605081e-05, "epoch": 0.22170900692840648, "percentage": 5.54, "elapsed_time": "0:18:42", "remaining_time": "5:18:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 490, "total_steps": 8660, "loss": 0.6231, "learning_rate": 2.8290993071593537e-05, "epoch": 0.22632794457274827, "percentage": 5.66, "elapsed_time": "0:19:05", "remaining_time": "5:18:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 500, "total_steps": 8660, "loss": 0.6294, "learning_rate": 2.8868360277136258e-05, "epoch": 0.23094688221709006, "percentage": 5.77, "elapsed_time": "0:19:29", "remaining_time": "5:18:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 510, "total_steps": 8660, "loss": 0.5795, "learning_rate": 2.9445727482678986e-05, "epoch": 0.23556581986143188, "percentage": 5.89, "elapsed_time": "0:19:54", "remaining_time": "5:18:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 520, "total_steps": 8660, "loss": 0.5634, "learning_rate": 3.0023094688221707e-05, "epoch": 0.24018475750577367, "percentage": 6.0, "elapsed_time": "0:20:17", "remaining_time": "5:17:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 530, "total_steps": 8660, "loss": 0.5372, "learning_rate": 3.060046189376444e-05, "epoch": 0.24480369515011546, "percentage": 6.12, "elapsed_time": "0:20:39", "remaining_time": "5:16:59", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 540, "total_steps": 8660, "loss": 0.5914, "learning_rate": 3.117782909930716e-05, "epoch": 0.24942263279445728, "percentage": 6.24, "elapsed_time": "0:21:01", "remaining_time": "5:16:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 550, "total_steps": 8660, "loss": 0.5767, "learning_rate": 3.175519630484989e-05, "epoch": 0.2540415704387991, "percentage": 6.35, "elapsed_time": "0:21:21", "remaining_time": "5:14:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 560, "total_steps": 8660, "loss": 0.5569, "learning_rate": 3.233256351039261e-05, "epoch": 0.2586605080831409, "percentage": 6.47, "elapsed_time": "0:21:45", "remaining_time": "5:14:45", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 570, "total_steps": 8660, "loss": 0.6248, "learning_rate": 3.290993071593534e-05, "epoch": 0.2632794457274827, "percentage": 6.58, "elapsed_time": "0:22:07", "remaining_time": "5:14:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 580, "total_steps": 8660, "loss": 0.5629, "learning_rate": 3.348729792147806e-05, "epoch": 0.2678983833718245, "percentage": 6.7, "elapsed_time": "0:22:31", "remaining_time": "5:13:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 590, "total_steps": 8660, "loss": 0.5942, "learning_rate": 3.4064665127020787e-05, "epoch": 0.27251732101616627, "percentage": 6.81, "elapsed_time": "0:22:54", "remaining_time": "5:13:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 600, "total_steps": 8660, "loss": 0.5694, "learning_rate": 3.464203233256351e-05, "epoch": 0.27713625866050806, "percentage": 6.93, "elapsed_time": "0:23:18", "remaining_time": "5:13:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 610, "total_steps": 8660, "loss": 0.5459, "learning_rate": 3.5219399538106236e-05, "epoch": 0.2817551963048499, "percentage": 7.04, "elapsed_time": "0:23:41", "remaining_time": "5:12:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 620, "total_steps": 8660, "loss": 0.5686, "learning_rate": 3.579676674364896e-05, "epoch": 0.2863741339491917, "percentage": 7.16, "elapsed_time": "0:24:04", "remaining_time": "5:12:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 630, "total_steps": 8660, "loss": 0.5185, "learning_rate": 3.6374133949191685e-05, "epoch": 0.2909930715935335, "percentage": 7.27, "elapsed_time": "0:24:27", "remaining_time": "5:11:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 640, "total_steps": 8660, "loss": 0.5548, "learning_rate": 3.6951501154734416e-05, "epoch": 0.2956120092378753, "percentage": 7.39, "elapsed_time": "0:24:50", "remaining_time": "5:11:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 650, "total_steps": 8660, "loss": 0.5658, "learning_rate": 3.7528868360277134e-05, "epoch": 0.3002309468822171, "percentage": 7.51, "elapsed_time": "0:25:12", "remaining_time": "5:10:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 660, "total_steps": 8660, "loss": 0.5459, "learning_rate": 3.8106235565819866e-05, "epoch": 0.30484988452655887, "percentage": 7.62, "elapsed_time": "0:25:35", "remaining_time": "5:10:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 670, "total_steps": 8660, "loss": 0.5823, "learning_rate": 3.868360277136259e-05, "epoch": 0.3094688221709007, "percentage": 7.74, "elapsed_time": "0:25:59", "remaining_time": "5:09:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 680, "total_steps": 8660, "loss": 0.5481, "learning_rate": 3.9260969976905315e-05, "epoch": 0.3140877598152425, "percentage": 7.85, "elapsed_time": "0:26:21", "remaining_time": "5:09:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 690, "total_steps": 8660, "loss": 0.4907, "learning_rate": 3.983833718244804e-05, "epoch": 0.3187066974595843, "percentage": 7.97, "elapsed_time": "0:26:45", "remaining_time": "5:09:04", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 700, "total_steps": 8660, "loss": 0.5703, "learning_rate": 4.0415704387990764e-05, "epoch": 0.3233256351039261, "percentage": 8.08, "elapsed_time": "0:27:07", "remaining_time": "5:08:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 710, "total_steps": 8660, "loss": 0.6229, "learning_rate": 4.099307159353349e-05, "epoch": 0.3279445727482679, "percentage": 8.2, "elapsed_time": "0:27:32", "remaining_time": "5:08:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 720, "total_steps": 8660, "loss": 0.5022, "learning_rate": 4.1570438799076213e-05, "epoch": 0.3325635103926097, "percentage": 8.31, "elapsed_time": "0:27:54", "remaining_time": "5:07:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 730, "total_steps": 8660, "loss": 0.5642, "learning_rate": 4.214780600461894e-05, "epoch": 0.3371824480369515, "percentage": 8.43, "elapsed_time": "0:28:18", "remaining_time": "5:07:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 740, "total_steps": 8660, "loss": 0.543, "learning_rate": 4.272517321016167e-05, "epoch": 0.3418013856812933, "percentage": 8.55, "elapsed_time": "0:28:42", "remaining_time": "5:07:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 750, "total_steps": 8660, "loss": 0.5056, "learning_rate": 4.330254041570439e-05, "epoch": 0.3464203233256351, "percentage": 8.66, "elapsed_time": "0:29:05", "remaining_time": "5:06:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 760, "total_steps": 8660, "loss": 0.5215, "learning_rate": 4.387990762124711e-05, "epoch": 0.3510392609699769, "percentage": 8.78, "elapsed_time": "0:29:29", "remaining_time": "5:06:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 770, "total_steps": 8660, "loss": 0.5418, "learning_rate": 4.445727482678984e-05, "epoch": 0.3556581986143187, "percentage": 8.89, "elapsed_time": "0:29:53", "remaining_time": "5:06:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 780, "total_steps": 8660, "loss": 0.5197, "learning_rate": 4.503464203233256e-05, "epoch": 0.36027713625866054, "percentage": 9.01, "elapsed_time": "0:30:17", "remaining_time": "5:06:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 790, "total_steps": 8660, "loss": 0.5151, "learning_rate": 4.561200923787529e-05, "epoch": 0.3648960739030023, "percentage": 9.12, "elapsed_time": "0:30:40", "remaining_time": "5:05:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 800, "total_steps": 8660, "loss": 0.5037, "learning_rate": 4.618937644341802e-05, "epoch": 0.3695150115473441, "percentage": 9.24, "elapsed_time": "0:31:04", "remaining_time": "5:05:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 810, "total_steps": 8660, "loss": 0.5411, "learning_rate": 4.676674364896074e-05, "epoch": 0.3741339491916859, "percentage": 9.35, "elapsed_time": "0:31:26", "remaining_time": "5:04:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 820, "total_steps": 8660, "loss": 0.5034, "learning_rate": 4.7344110854503466e-05, "epoch": 0.3787528868360277, "percentage": 9.47, "elapsed_time": "0:31:47", "remaining_time": "5:03:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 830, "total_steps": 8660, "loss": 0.5077, "learning_rate": 4.792147806004619e-05, "epoch": 0.3833718244803695, "percentage": 9.58, "elapsed_time": "0:32:09", "remaining_time": "5:03:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 840, "total_steps": 8660, "loss": 0.4883, "learning_rate": 4.8498845265588916e-05, "epoch": 0.38799076212471134, "percentage": 9.7, "elapsed_time": "0:32:32", "remaining_time": "5:02:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 850, "total_steps": 8660, "loss": 0.4918, "learning_rate": 4.907621247113165e-05, "epoch": 0.39260969976905313, "percentage": 9.82, "elapsed_time": "0:32:56", "remaining_time": "5:02:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 860, "total_steps": 8660, "loss": 0.4736, "learning_rate": 4.9653579676674365e-05, "epoch": 0.3972286374133949, "percentage": 9.93, "elapsed_time": "0:33:20", "remaining_time": "5:02:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 870, "total_steps": 8660, "loss": 0.5243, "learning_rate": 4.999996750557592e-05, "epoch": 0.4018475750577367, "percentage": 10.05, "elapsed_time": "0:33:42", "remaining_time": "5:01:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 880, "total_steps": 8660, "loss": 0.5206, "learning_rate": 4.9999601944275146e-05, "epoch": 0.4064665127020785, "percentage": 10.16, "elapsed_time": "0:34:04", "remaining_time": "5:01:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 890, "total_steps": 8660, "loss": 0.5998, "learning_rate": 4.999883020960263e-05, "epoch": 0.4110854503464203, "percentage": 10.28, "elapsed_time": "0:34:24", "remaining_time": "5:00:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 900, "total_steps": 8660, "loss": 0.5097, "learning_rate": 4.99976523140969e-05, "epoch": 0.41570438799076215, "percentage": 10.39, "elapsed_time": "0:34:45", "remaining_time": "4:59:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 910, "total_steps": 8660, "loss": 0.4699, "learning_rate": 4.999606827689545e-05, "epoch": 0.42032332563510394, "percentage": 10.51, "elapsed_time": "0:35:06", "remaining_time": "4:58:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 920, "total_steps": 8660, "loss": 0.5223, "learning_rate": 4.999407812373445e-05, "epoch": 0.42494226327944573, "percentage": 10.62, "elapsed_time": "0:35:26", "remaining_time": "4:58:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 930, "total_steps": 8660, "loss": 0.5396, "learning_rate": 4.9991681886948297e-05, "epoch": 0.4295612009237875, "percentage": 10.74, "elapsed_time": "0:35:49", "remaining_time": "4:57:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 940, "total_steps": 8660, "loss": 0.4965, "learning_rate": 4.9988879605469106e-05, "epoch": 0.4341801385681293, "percentage": 10.85, "elapsed_time": "0:36:09", "remaining_time": "4:57:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 950, "total_steps": 8660, "loss": 0.472, "learning_rate": 4.99856713248261e-05, "epoch": 0.4387990762124711, "percentage": 10.97, "elapsed_time": "0:36:30", "remaining_time": "4:56:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 960, "total_steps": 8660, "loss": 0.4878, "learning_rate": 4.9982057097144826e-05, "epoch": 0.44341801385681295, "percentage": 11.09, "elapsed_time": "0:36:52", "remaining_time": "4:55:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 970, "total_steps": 8660, "loss": 0.4883, "learning_rate": 4.9978036981146344e-05, "epoch": 0.44803695150115475, "percentage": 11.2, "elapsed_time": "0:37:14", "remaining_time": "4:55:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 980, "total_steps": 8660, "loss": 0.4929, "learning_rate": 4.9973611042146254e-05, "epoch": 0.45265588914549654, "percentage": 11.32, "elapsed_time": "0:37:36", "remaining_time": "4:54:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 990, "total_steps": 8660, "loss": 0.5046, "learning_rate": 4.996877935205365e-05, "epoch": 0.45727482678983833, "percentage": 11.43, "elapsed_time": "0:37:58", "remaining_time": "4:54:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1000, "total_steps": 8660, "loss": 0.4539, "learning_rate": 4.996354198936993e-05, "epoch": 0.4618937644341801, "percentage": 11.55, "elapsed_time": "0:38:21", "remaining_time": "4:53:51", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1010, "total_steps": 8660, "loss": 0.5296, "learning_rate": 4.995789903918755e-05, "epoch": 0.4665127020785219, "percentage": 11.66, "elapsed_time": "0:38:42", "remaining_time": "4:53:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1020, "total_steps": 8660, "loss": 0.5004, "learning_rate": 4.99518505931886e-05, "epoch": 0.47113163972286376, "percentage": 11.78, "elapsed_time": "0:39:03", "remaining_time": "4:52:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1030, "total_steps": 8660, "loss": 0.5319, "learning_rate": 4.9945396749643356e-05, "epoch": 0.47575057736720555, "percentage": 11.89, "elapsed_time": "0:39:25", "remaining_time": "4:52:04", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1040, "total_steps": 8660, "loss": 0.4952, "learning_rate": 4.993853761340867e-05, "epoch": 0.48036951501154734, "percentage": 12.01, "elapsed_time": "0:39:48", "remaining_time": "4:51:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1050, "total_steps": 8660, "loss": 0.4807, "learning_rate": 4.993127329592625e-05, "epoch": 0.48498845265588914, "percentage": 12.12, "elapsed_time": "0:40:10", "remaining_time": "4:51:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1060, "total_steps": 8660, "loss": 0.5269, "learning_rate": 4.9923603915220874e-05, "epoch": 0.4896073903002309, "percentage": 12.24, "elapsed_time": "0:40:33", "remaining_time": "4:50:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1070, "total_steps": 8660, "loss": 0.5183, "learning_rate": 4.9915529595898445e-05, "epoch": 0.4942263279445728, "percentage": 12.36, "elapsed_time": "0:40:55", "remaining_time": "4:50:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1080, "total_steps": 8660, "loss": 0.4928, "learning_rate": 4.9907050469144e-05, "epoch": 0.49884526558891457, "percentage": 12.47, "elapsed_time": "0:41:17", "remaining_time": "4:49:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1090, "total_steps": 8660, "loss": 0.5144, "learning_rate": 4.9898166672719547e-05, "epoch": 0.5034642032332564, "percentage": 12.59, "elapsed_time": "0:41:39", "remaining_time": "4:49:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1100, "total_steps": 8660, "loss": 0.4733, "learning_rate": 4.9888878350961857e-05, "epoch": 0.5080831408775982, "percentage": 12.7, "elapsed_time": "0:42:05", "remaining_time": "4:49:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1110, "total_steps": 8660, "loss": 0.5054, "learning_rate": 4.987918565478008e-05, "epoch": 0.5127020785219399, "percentage": 12.82, "elapsed_time": "0:42:29", "remaining_time": "4:49:04", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1120, "total_steps": 8660, "loss": 0.4454, "learning_rate": 4.9869088741653335e-05, "epoch": 0.5173210161662818, "percentage": 12.93, "elapsed_time": "0:42:54", "remaining_time": "4:48:49", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1130, "total_steps": 8660, "loss": 0.4871, "learning_rate": 4.985858777562812e-05, "epoch": 0.5219399538106235, "percentage": 13.05, "elapsed_time": "0:43:17", "remaining_time": "4:48:28", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1140, "total_steps": 8660, "loss": 0.4605, "learning_rate": 4.9847682927315656e-05, "epoch": 0.5265588914549654, "percentage": 13.16, "elapsed_time": "0:43:41", "remaining_time": "4:48:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1150, "total_steps": 8660, "loss": 0.5281, "learning_rate": 4.983637437388913e-05, "epoch": 0.5311778290993071, "percentage": 13.28, "elapsed_time": "0:44:04", "remaining_time": "4:47:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1160, "total_steps": 8660, "loss": 0.4568, "learning_rate": 4.98246622990808e-05, "epoch": 0.535796766743649, "percentage": 13.39, "elapsed_time": "0:44:28", "remaining_time": "4:47:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1170, "total_steps": 8660, "loss": 0.5351, "learning_rate": 4.981254689317902e-05, "epoch": 0.5404157043879908, "percentage": 13.51, "elapsed_time": "0:44:52", "remaining_time": "4:47:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1180, "total_steps": 8660, "loss": 0.4378, "learning_rate": 4.9800028353025125e-05, "epoch": 0.5450346420323325, "percentage": 13.63, "elapsed_time": "0:45:14", "remaining_time": "4:46:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1190, "total_steps": 8660, "loss": 0.4539, "learning_rate": 4.978710688201026e-05, "epoch": 0.5496535796766744, "percentage": 13.74, "elapsed_time": "0:45:37", "remaining_time": "4:46:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1200, "total_steps": 8660, "loss": 0.5349, "learning_rate": 4.9773782690072055e-05, "epoch": 0.5542725173210161, "percentage": 13.86, "elapsed_time": "0:46:02", "remaining_time": "4:46:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1210, "total_steps": 8660, "loss": 0.4938, "learning_rate": 4.976005599369126e-05, "epoch": 0.558891454965358, "percentage": 13.97, "elapsed_time": "0:46:26", "remaining_time": "4:45:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1220, "total_steps": 8660, "loss": 0.4473, "learning_rate": 4.974592701588816e-05, "epoch": 0.5635103926096998, "percentage": 14.09, "elapsed_time": "0:46:50", "remaining_time": "4:45:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1230, "total_steps": 8660, "loss": 0.4854, "learning_rate": 4.9731395986218976e-05, "epoch": 0.5681293302540416, "percentage": 14.2, "elapsed_time": "0:47:12", "remaining_time": "4:45:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1240, "total_steps": 8660, "loss": 0.5108, "learning_rate": 4.9716463140772176e-05, "epoch": 0.5727482678983834, "percentage": 14.32, "elapsed_time": "0:47:36", "remaining_time": "4:44:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1250, "total_steps": 8660, "loss": 0.4664, "learning_rate": 4.970112872216459e-05, "epoch": 0.5773672055427251, "percentage": 14.43, "elapsed_time": "0:48:00", "remaining_time": "4:44:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1260, "total_steps": 8660, "loss": 0.5728, "learning_rate": 4.968539297953748e-05, "epoch": 0.581986143187067, "percentage": 14.55, "elapsed_time": "0:48:22", "remaining_time": "4:44:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1270, "total_steps": 8660, "loss": 0.4551, "learning_rate": 4.96692561685525e-05, "epoch": 0.5866050808314087, "percentage": 14.67, "elapsed_time": "0:48:45", "remaining_time": "4:43:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1280, "total_steps": 8660, "loss": 0.4358, "learning_rate": 4.9652718551387545e-05, "epoch": 0.5912240184757506, "percentage": 14.78, "elapsed_time": "0:49:07", "remaining_time": "4:43:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1290, "total_steps": 8660, "loss": 0.3883, "learning_rate": 4.963578039673248e-05, "epoch": 0.5958429561200924, "percentage": 14.9, "elapsed_time": "0:49:29", "remaining_time": "4:42:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1300, "total_steps": 8660, "loss": 0.4403, "learning_rate": 4.961844197978479e-05, "epoch": 0.6004618937644342, "percentage": 15.01, "elapsed_time": "0:49:52", "remaining_time": "4:42:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1310, "total_steps": 8660, "loss": 0.4753, "learning_rate": 4.960070358224508e-05, "epoch": 0.605080831408776, "percentage": 15.13, "elapsed_time": "0:50:15", "remaining_time": "4:41:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1320, "total_steps": 8660, "loss": 0.4728, "learning_rate": 4.9582565492312537e-05, "epoch": 0.6096997690531177, "percentage": 15.24, "elapsed_time": "0:50:39", "remaining_time": "4:41:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1330, "total_steps": 8660, "loss": 0.4544, "learning_rate": 4.9564028004680215e-05, "epoch": 0.6143187066974596, "percentage": 15.36, "elapsed_time": "0:51:02", "remaining_time": "4:41:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1340, "total_steps": 8660, "loss": 0.4649, "learning_rate": 4.9545091420530265e-05, "epoch": 0.6189376443418014, "percentage": 15.47, "elapsed_time": "0:51:26", "remaining_time": "4:41:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1350, "total_steps": 8660, "loss": 0.4609, "learning_rate": 4.9525756047529023e-05, "epoch": 0.6235565819861432, "percentage": 15.59, "elapsed_time": "0:51:48", "remaining_time": "4:40:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1360, "total_steps": 8660, "loss": 0.4832, "learning_rate": 4.950602219982207e-05, "epoch": 0.628175519630485, "percentage": 15.7, "elapsed_time": "0:52:12", "remaining_time": "4:40:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1370, "total_steps": 8660, "loss": 0.4751, "learning_rate": 4.948589019802901e-05, "epoch": 0.6327944572748267, "percentage": 15.82, "elapsed_time": "0:52:35", "remaining_time": "4:39:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1380, "total_steps": 8660, "loss": 0.4691, "learning_rate": 4.94653603692384e-05, "epoch": 0.6374133949191686, "percentage": 15.94, "elapsed_time": "0:53:01", "remaining_time": "4:39:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1390, "total_steps": 8660, "loss": 0.4397, "learning_rate": 4.9444433047002327e-05, "epoch": 0.6420323325635104, "percentage": 16.05, "elapsed_time": "0:53:27", "remaining_time": "4:39:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1400, "total_steps": 8660, "loss": 0.4737, "learning_rate": 4.9423108571331055e-05, "epoch": 0.6466512702078522, "percentage": 16.17, "elapsed_time": "0:53:52", "remaining_time": "4:39:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1410, "total_steps": 8660, "loss": 0.4329, "learning_rate": 4.940138728868746e-05, "epoch": 0.651270207852194, "percentage": 16.28, "elapsed_time": "0:54:16", "remaining_time": "4:39:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1420, "total_steps": 8660, "loss": 0.4488, "learning_rate": 4.937926955198144e-05, "epoch": 0.6558891454965358, "percentage": 16.4, "elapsed_time": "0:54:41", "remaining_time": "4:38:51", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1430, "total_steps": 8660, "loss": 0.4645, "learning_rate": 4.935675572056413e-05, "epoch": 0.6605080831408776, "percentage": 16.51, "elapsed_time": "0:55:05", "remaining_time": "4:38:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1440, "total_steps": 8660, "loss": 0.4492, "learning_rate": 4.933384616022211e-05, "epoch": 0.6651270207852193, "percentage": 16.63, "elapsed_time": "0:55:31", "remaining_time": "4:38:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1450, "total_steps": 8660, "loss": 0.4978, "learning_rate": 4.931054124317144e-05, "epoch": 0.6697459584295612, "percentage": 16.74, "elapsed_time": "0:55:57", "remaining_time": "4:38:17", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1460, "total_steps": 8660, "loss": 0.4442, "learning_rate": 4.928684134805162e-05, "epoch": 0.674364896073903, "percentage": 16.86, "elapsed_time": "0:56:22", "remaining_time": "4:38:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1470, "total_steps": 8660, "loss": 0.4412, "learning_rate": 4.926274685991944e-05, "epoch": 0.6789838337182448, "percentage": 16.97, "elapsed_time": "0:56:48", "remaining_time": "4:37:51", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1480, "total_steps": 8660, "loss": 0.4655, "learning_rate": 4.9238258170242703e-05, "epoch": 0.6836027713625866, "percentage": 17.09, "elapsed_time": "0:57:12", "remaining_time": "4:37:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1490, "total_steps": 8660, "loss": 0.3944, "learning_rate": 4.921337567689389e-05, "epoch": 0.6882217090069284, "percentage": 17.21, "elapsed_time": "0:57:35", "remaining_time": "4:37:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1500, "total_steps": 8660, "loss": 0.4545, "learning_rate": 4.91880997841437e-05, "epoch": 0.6928406466512702, "percentage": 17.32, "elapsed_time": "0:57:59", "remaining_time": "4:36:49", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1510, "total_steps": 8660, "loss": 0.4202, "learning_rate": 4.916243090265444e-05, "epoch": 0.6974595842956121, "percentage": 17.44, "elapsed_time": "0:58:24", "remaining_time": "4:36:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1520, "total_steps": 8660, "loss": 0.4488, "learning_rate": 4.91363694494734e-05, "epoch": 0.7020785219399538, "percentage": 17.55, "elapsed_time": "0:58:47", "remaining_time": "4:36:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1530, "total_steps": 8660, "loss": 0.4599, "learning_rate": 4.910991584802606e-05, "epoch": 0.7066974595842956, "percentage": 17.67, "elapsed_time": "0:59:10", "remaining_time": "4:35:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1540, "total_steps": 8660, "loss": 0.4469, "learning_rate": 4.9083070528109195e-05, "epoch": 0.7113163972286374, "percentage": 17.78, "elapsed_time": "0:59:35", "remaining_time": "4:35:29", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1550, "total_steps": 8660, "loss": 0.4756, "learning_rate": 4.905583392588392e-05, "epoch": 0.7159353348729792, "percentage": 17.9, "elapsed_time": "0:59:59", "remaining_time": "4:35:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1560, "total_steps": 8660, "loss": 0.4793, "learning_rate": 4.902820648386858e-05, "epoch": 0.7205542725173211, "percentage": 18.01, "elapsed_time": "1:00:24", "remaining_time": "4:34:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1570, "total_steps": 8660, "loss": 0.4445, "learning_rate": 4.900018865093158e-05, "epoch": 0.7251732101616628, "percentage": 18.13, "elapsed_time": "1:00:48", "remaining_time": "4:34:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1580, "total_steps": 8660, "loss": 0.4422, "learning_rate": 4.897178088228406e-05, "epoch": 0.7297921478060047, "percentage": 18.24, "elapsed_time": "1:01:13", "remaining_time": "4:34:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1590, "total_steps": 8660, "loss": 0.4386, "learning_rate": 4.8942983639472554e-05, "epoch": 0.7344110854503464, "percentage": 18.36, "elapsed_time": "1:01:37", "remaining_time": "4:34:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1600, "total_steps": 8660, "loss": 0.4321, "learning_rate": 4.891379739037142e-05, "epoch": 0.7390300230946882, "percentage": 18.48, "elapsed_time": "1:02:01", "remaining_time": "4:33:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1610, "total_steps": 8660, "loss": 0.4422, "learning_rate": 4.888422260917532e-05, "epoch": 0.74364896073903, "percentage": 18.59, "elapsed_time": "1:02:25", "remaining_time": "4:33:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1620, "total_steps": 8660, "loss": 0.4118, "learning_rate": 4.885425977639143e-05, "epoch": 0.7482678983833718, "percentage": 18.71, "elapsed_time": "1:02:48", "remaining_time": "4:32:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1630, "total_steps": 8660, "loss": 0.4352, "learning_rate": 4.8823909378831694e-05, "epoch": 0.7528868360277137, "percentage": 18.82, "elapsed_time": "1:03:13", "remaining_time": "4:32:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1640, "total_steps": 8660, "loss": 0.4222, "learning_rate": 4.8793171909604904e-05, "epoch": 0.7575057736720554, "percentage": 18.94, "elapsed_time": "1:03:36", "remaining_time": "4:32:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1650, "total_steps": 8660, "loss": 0.4458, "learning_rate": 4.876204786810867e-05, "epoch": 0.7621247113163973, "percentage": 19.05, "elapsed_time": "1:04:00", "remaining_time": "4:31:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1660, "total_steps": 8660, "loss": 0.4387, "learning_rate": 4.873053776002133e-05, "epoch": 0.766743648960739, "percentage": 19.17, "elapsed_time": "1:04:23", "remaining_time": "4:31:29", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1670, "total_steps": 8660, "loss": 0.4203, "learning_rate": 4.8698642097293684e-05, "epoch": 0.7713625866050808, "percentage": 19.28, "elapsed_time": "1:04:46", "remaining_time": "4:31:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1680, "total_steps": 8660, "loss": 0.4445, "learning_rate": 4.866636139814076e-05, "epoch": 0.7759815242494227, "percentage": 19.4, "elapsed_time": "1:05:09", "remaining_time": "4:30:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1690, "total_steps": 8660, "loss": 0.4392, "learning_rate": 4.863369618703331e-05, "epoch": 0.7806004618937644, "percentage": 19.52, "elapsed_time": "1:05:32", "remaining_time": "4:30:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1700, "total_steps": 8660, "loss": 0.4128, "learning_rate": 4.860064699468935e-05, "epoch": 0.7852193995381063, "percentage": 19.63, "elapsed_time": "1:05:55", "remaining_time": "4:29:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1710, "total_steps": 8660, "loss": 0.4211, "learning_rate": 4.85672143580655e-05, "epoch": 0.789838337182448, "percentage": 19.75, "elapsed_time": "1:06:17", "remaining_time": "4:29:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1720, "total_steps": 8660, "loss": 0.4641, "learning_rate": 4.85333988203483e-05, "epoch": 0.7944572748267898, "percentage": 19.86, "elapsed_time": "1:06:40", "remaining_time": "4:29:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1730, "total_steps": 8660, "loss": 0.4466, "learning_rate": 4.84992009309453e-05, "epoch": 0.7990762124711316, "percentage": 19.98, "elapsed_time": "1:07:04", "remaining_time": "4:28:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1740, "total_steps": 8660, "loss": 0.3786, "learning_rate": 4.8464621245476254e-05, "epoch": 0.8036951501154734, "percentage": 20.09, "elapsed_time": "1:07:27", "remaining_time": "4:28:17", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1750, "total_steps": 8660, "loss": 0.3948, "learning_rate": 4.8429660325764e-05, "epoch": 0.8083140877598153, "percentage": 20.21, "elapsed_time": "1:07:51", "remaining_time": "4:27:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1760, "total_steps": 8660, "loss": 0.3733, "learning_rate": 4.8394318739825364e-05, "epoch": 0.812933025404157, "percentage": 20.32, "elapsed_time": "1:08:14", "remaining_time": "4:27:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1770, "total_steps": 8660, "loss": 0.4702, "learning_rate": 4.835859706186193e-05, "epoch": 0.8175519630484989, "percentage": 20.44, "elapsed_time": "1:08:37", "remaining_time": "4:27:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1780, "total_steps": 8660, "loss": 0.4268, "learning_rate": 4.832249587225073e-05, "epoch": 0.8221709006928406, "percentage": 20.55, "elapsed_time": "1:09:02", "remaining_time": "4:26:51", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1790, "total_steps": 8660, "loss": 0.4198, "learning_rate": 4.828601575753476e-05, "epoch": 0.8267898383371824, "percentage": 20.67, "elapsed_time": "1:09:23", "remaining_time": "4:26:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1800, "total_steps": 8660, "loss": 0.4305, "learning_rate": 4.824915731041352e-05, "epoch": 0.8314087759815243, "percentage": 20.79, "elapsed_time": "1:09:46", "remaining_time": "4:25:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1810, "total_steps": 8660, "loss": 0.4373, "learning_rate": 4.821192112973333e-05, "epoch": 0.836027713625866, "percentage": 20.9, "elapsed_time": "1:10:11", "remaining_time": "4:25:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1820, "total_steps": 8660, "loss": 0.4477, "learning_rate": 4.817430782047762e-05, "epoch": 0.8406466512702079, "percentage": 21.02, "elapsed_time": "1:10:35", "remaining_time": "4:25:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1830, "total_steps": 8660, "loss": 0.4611, "learning_rate": 4.8136317993757114e-05, "epoch": 0.8452655889145496, "percentage": 21.13, "elapsed_time": "1:11:00", "remaining_time": "4:25:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1840, "total_steps": 8660, "loss": 0.4297, "learning_rate": 4.8097952266799876e-05, "epoch": 0.8498845265588915, "percentage": 21.25, "elapsed_time": "1:11:23", "remaining_time": "4:24:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1850, "total_steps": 8660, "loss": 0.4028, "learning_rate": 4.805921126294129e-05, "epoch": 0.8545034642032333, "percentage": 21.36, "elapsed_time": "1:11:46", "remaining_time": "4:24:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1860, "total_steps": 8660, "loss": 0.3982, "learning_rate": 4.802009561161396e-05, "epoch": 0.859122401847575, "percentage": 21.48, "elapsed_time": "1:12:07", "remaining_time": "4:23:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1870, "total_steps": 8660, "loss": 0.4213, "learning_rate": 4.798060594833743e-05, "epoch": 0.8637413394919169, "percentage": 21.59, "elapsed_time": "1:12:30", "remaining_time": "4:23:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1880, "total_steps": 8660, "loss": 0.4085, "learning_rate": 4.794074291470791e-05, "epoch": 0.8683602771362586, "percentage": 21.71, "elapsed_time": "1:12:54", "remaining_time": "4:22:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1890, "total_steps": 8660, "loss": 0.437, "learning_rate": 4.790050715838783e-05, "epoch": 0.8729792147806005, "percentage": 21.82, "elapsed_time": "1:13:17", "remaining_time": "4:22:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1900, "total_steps": 8660, "loss": 0.4407, "learning_rate": 4.7859899333095296e-05, "epoch": 0.8775981524249422, "percentage": 21.94, "elapsed_time": "1:13:40", "remaining_time": "4:22:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1910, "total_steps": 8660, "loss": 0.4415, "learning_rate": 4.7818920098593516e-05, "epoch": 0.8822170900692841, "percentage": 22.06, "elapsed_time": "1:14:04", "remaining_time": "4:21:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1920, "total_steps": 8660, "loss": 0.433, "learning_rate": 4.777757012068004e-05, "epoch": 0.8868360277136259, "percentage": 22.17, "elapsed_time": "1:14:30", "remaining_time": "4:21:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1930, "total_steps": 8660, "loss": 0.3957, "learning_rate": 4.773585007117597e-05, "epoch": 0.8914549653579676, "percentage": 22.29, "elapsed_time": "1:14:55", "remaining_time": "4:21:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1940, "total_steps": 8660, "loss": 0.4442, "learning_rate": 4.7693760627915015e-05, "epoch": 0.8960739030023095, "percentage": 22.4, "elapsed_time": "1:15:20", "remaining_time": "4:20:59", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1950, "total_steps": 8660, "loss": 0.502, "learning_rate": 4.765130247473252e-05, "epoch": 0.9006928406466512, "percentage": 22.52, "elapsed_time": "1:15:45", "remaining_time": "4:20:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1960, "total_steps": 8660, "loss": 0.4532, "learning_rate": 4.760847630145431e-05, "epoch": 0.9053117782909931, "percentage": 22.63, "elapsed_time": "1:16:11", "remaining_time": "4:20:27", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1970, "total_steps": 8660, "loss": 0.3951, "learning_rate": 4.756528280388551e-05, "epoch": 0.9099307159353349, "percentage": 22.75, "elapsed_time": "1:16:36", "remaining_time": "4:20:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1980, "total_steps": 8660, "loss": 0.4442, "learning_rate": 4.752172268379923e-05, "epoch": 0.9145496535796767, "percentage": 22.86, "elapsed_time": "1:17:02", "remaining_time": "4:19:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 1990, "total_steps": 8660, "loss": 0.4512, "learning_rate": 4.7477796648925186e-05, "epoch": 0.9191685912240185, "percentage": 22.98, "elapsed_time": "1:17:25", "remaining_time": "4:19:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2000, "total_steps": 8660, "loss": 0.4076, "learning_rate": 4.743350541293817e-05, "epoch": 0.9237875288683602, "percentage": 23.09, "elapsed_time": "1:17:48", "remaining_time": "4:19:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2010, "total_steps": 8660, "loss": 0.3686, "learning_rate": 4.738884969544646e-05, "epoch": 0.9284064665127021, "percentage": 23.21, "elapsed_time": "1:18:12", "remaining_time": "4:18:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2020, "total_steps": 8660, "loss": 0.3822, "learning_rate": 4.734383022198013e-05, "epoch": 0.9330254041570438, "percentage": 23.33, "elapsed_time": "1:18:34", "remaining_time": "4:18:17", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2030, "total_steps": 8660, "loss": 0.3934, "learning_rate": 4.72984477239793e-05, "epoch": 0.9376443418013857, "percentage": 23.44, "elapsed_time": "1:18:57", "remaining_time": "4:17:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2040, "total_steps": 8660, "loss": 0.4234, "learning_rate": 4.725270293878218e-05, "epoch": 0.9422632794457275, "percentage": 23.56, "elapsed_time": "1:19:21", "remaining_time": "4:17:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2050, "total_steps": 8660, "loss": 0.3855, "learning_rate": 4.720659660961316e-05, "epoch": 0.9468822170900693, "percentage": 23.67, "elapsed_time": "1:19:46", "remaining_time": "4:17:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2060, "total_steps": 8660, "loss": 0.4159, "learning_rate": 4.7160129485570706e-05, "epoch": 0.9515011547344111, "percentage": 23.79, "elapsed_time": "1:20:11", "remaining_time": "4:16:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2070, "total_steps": 8660, "loss": 0.3882, "learning_rate": 4.711330232161515e-05, "epoch": 0.9561200923787528, "percentage": 23.9, "elapsed_time": "1:20:37", "remaining_time": "4:16:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2080, "total_steps": 8660, "loss": 0.4304, "learning_rate": 4.706611587855651e-05, "epoch": 0.9607390300230947, "percentage": 24.02, "elapsed_time": "1:21:03", "remaining_time": "4:16:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2090, "total_steps": 8660, "loss": 0.4648, "learning_rate": 4.7018570923042054e-05, "epoch": 0.9653579676674365, "percentage": 24.13, "elapsed_time": "1:21:26", "remaining_time": "4:16:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2100, "total_steps": 8660, "loss": 0.407, "learning_rate": 4.697066822754388e-05, "epoch": 0.9699769053117783, "percentage": 24.25, "elapsed_time": "1:21:50", "remaining_time": "4:15:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2110, "total_steps": 8660, "loss": 0.4084, "learning_rate": 4.692240857034634e-05, "epoch": 0.9745958429561201, "percentage": 24.36, "elapsed_time": "1:22:15", "remaining_time": "4:15:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2120, "total_steps": 8660, "loss": 0.436, "learning_rate": 4.6873792735533436e-05, "epoch": 0.9792147806004619, "percentage": 24.48, "elapsed_time": "1:22:40", "remaining_time": "4:15:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2130, "total_steps": 8660, "loss": 0.4102, "learning_rate": 4.682482151297605e-05, "epoch": 0.9838337182448037, "percentage": 24.6, "elapsed_time": "1:23:04", "remaining_time": "4:14:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2140, "total_steps": 8660, "loss": 0.4126, "learning_rate": 4.6775495698319114e-05, "epoch": 0.9884526558891455, "percentage": 24.71, "elapsed_time": "1:23:29", "remaining_time": "4:14:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2150, "total_steps": 8660, "loss": 0.4056, "learning_rate": 4.672581609296868e-05, "epoch": 0.9930715935334873, "percentage": 24.83, "elapsed_time": "1:23:53", "remaining_time": "4:14:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2160, "total_steps": 8660, "loss": 0.4159, "learning_rate": 4.667578350407891e-05, "epoch": 0.9976905311778291, "percentage": 24.94, "elapsed_time": "1:24:18", "remaining_time": "4:13:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2170, "total_steps": 8660, "loss": 0.3425, "learning_rate": 4.662539874453896e-05, "epoch": 1.002309468822171, "percentage": 25.06, "elapsed_time": "1:24:43", "remaining_time": "4:13:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2180, "total_steps": 8660, "loss": 0.2923, "learning_rate": 4.657466263295978e-05, "epoch": 1.0069284064665127, "percentage": 25.17, "elapsed_time": "1:25:06", "remaining_time": "4:12:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2190, "total_steps": 8660, "loss": 0.3432, "learning_rate": 4.652357599366078e-05, "epoch": 1.0115473441108545, "percentage": 25.29, "elapsed_time": "1:25:29", "remaining_time": "4:12:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2200, "total_steps": 8660, "loss": 0.3093, "learning_rate": 4.6472139656656504e-05, "epoch": 1.0161662817551964, "percentage": 25.4, "elapsed_time": "1:25:54", "remaining_time": "4:12:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2210, "total_steps": 8660, "loss": 0.2854, "learning_rate": 4.642035445764305e-05, "epoch": 1.0207852193995381, "percentage": 25.52, "elapsed_time": "1:26:20", "remaining_time": "4:11:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2220, "total_steps": 8660, "loss": 0.3232, "learning_rate": 4.636822123798458e-05, "epoch": 1.0254041570438799, "percentage": 25.64, "elapsed_time": "1:26:44", "remaining_time": "4:11:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2230, "total_steps": 8660, "loss": 0.3597, "learning_rate": 4.6315740844699606e-05, "epoch": 1.0300230946882216, "percentage": 25.75, "elapsed_time": "1:27:09", "remaining_time": "4:11:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2240, "total_steps": 8660, "loss": 0.3165, "learning_rate": 4.626291413044723e-05, "epoch": 1.0346420323325636, "percentage": 25.87, "elapsed_time": "1:27:34", "remaining_time": "4:10:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2250, "total_steps": 8660, "loss": 0.3332, "learning_rate": 4.620974195351331e-05, "epoch": 1.0392609699769053, "percentage": 25.98, "elapsed_time": "1:27:58", "remaining_time": "4:10:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2260, "total_steps": 8660, "loss": 0.3207, "learning_rate": 4.615622517779648e-05, "epoch": 1.043879907621247, "percentage": 26.1, "elapsed_time": "1:28:22", "remaining_time": "4:10:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2270, "total_steps": 8660, "loss": 0.3206, "learning_rate": 4.6102364672794186e-05, "epoch": 1.048498845265589, "percentage": 26.21, "elapsed_time": "1:28:47", "remaining_time": "4:09:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2280, "total_steps": 8660, "loss": 0.3283, "learning_rate": 4.6048161313588454e-05, "epoch": 1.0531177829099307, "percentage": 26.33, "elapsed_time": "1:29:12", "remaining_time": "4:09:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2290, "total_steps": 8660, "loss": 0.3108, "learning_rate": 4.599361598083175e-05, "epoch": 1.0577367205542725, "percentage": 26.44, "elapsed_time": "1:29:37", "remaining_time": "4:09:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2300, "total_steps": 8660, "loss": 0.3134, "learning_rate": 4.593872956073267e-05, "epoch": 1.0623556581986142, "percentage": 26.56, "elapsed_time": "1:30:01", "remaining_time": "4:08:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2310, "total_steps": 8660, "loss": 0.3059, "learning_rate": 4.588350294504149e-05, "epoch": 1.0669745958429562, "percentage": 26.67, "elapsed_time": "1:30:26", "remaining_time": "4:08:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2320, "total_steps": 8660, "loss": 0.3063, "learning_rate": 4.582793703103574e-05, "epoch": 1.071593533487298, "percentage": 26.79, "elapsed_time": "1:30:51", "remaining_time": "4:08:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2330, "total_steps": 8660, "loss": 0.3358, "learning_rate": 4.577203272150557e-05, "epoch": 1.0762124711316396, "percentage": 26.91, "elapsed_time": "1:31:16", "remaining_time": "4:07:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2340, "total_steps": 8660, "loss": 0.327, "learning_rate": 4.571579092473911e-05, "epoch": 1.0808314087759816, "percentage": 27.02, "elapsed_time": "1:31:41", "remaining_time": "4:07:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2350, "total_steps": 8660, "loss": 0.3128, "learning_rate": 4.565921255450773e-05, "epoch": 1.0854503464203233, "percentage": 27.14, "elapsed_time": "1:32:05", "remaining_time": "4:07:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2360, "total_steps": 8660, "loss": 0.3295, "learning_rate": 4.5602298530051165e-05, "epoch": 1.090069284064665, "percentage": 27.25, "elapsed_time": "1:32:28", "remaining_time": "4:06:51", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2370, "total_steps": 8660, "loss": 0.3218, "learning_rate": 4.5545049776062577e-05, "epoch": 1.0946882217090068, "percentage": 27.37, "elapsed_time": "1:32:52", "remaining_time": "4:06:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2380, "total_steps": 8660, "loss": 0.3154, "learning_rate": 4.548746722267356e-05, "epoch": 1.0993071593533488, "percentage": 27.48, "elapsed_time": "1:33:17", "remaining_time": "4:06:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2390, "total_steps": 8660, "loss": 0.2857, "learning_rate": 4.542955180543901e-05, "epoch": 1.1039260969976905, "percentage": 27.6, "elapsed_time": "1:33:41", "remaining_time": "4:05:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2400, "total_steps": 8660, "loss": 0.2997, "learning_rate": 4.53713044653219e-05, "epoch": 1.1085450346420322, "percentage": 27.71, "elapsed_time": "1:34:05", "remaining_time": "4:05:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2410, "total_steps": 8660, "loss": 0.3157, "learning_rate": 4.531272614867806e-05, "epoch": 1.1131639722863742, "percentage": 27.83, "elapsed_time": "1:34:28", "remaining_time": "4:05:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2420, "total_steps": 8660, "loss": 0.3208, "learning_rate": 4.525381780724073e-05, "epoch": 1.117782909930716, "percentage": 27.94, "elapsed_time": "1:34:52", "remaining_time": "4:04:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2430, "total_steps": 8660, "loss": 0.2979, "learning_rate": 4.519458039810515e-05, "epoch": 1.1224018475750577, "percentage": 28.06, "elapsed_time": "1:35:18", "remaining_time": "4:04:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2440, "total_steps": 8660, "loss": 0.2979, "learning_rate": 4.513501488371294e-05, "epoch": 1.1270207852193996, "percentage": 28.18, "elapsed_time": "1:35:41", "remaining_time": "4:03:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2450, "total_steps": 8660, "loss": 0.3316, "learning_rate": 4.5075122231836575e-05, "epoch": 1.1316397228637414, "percentage": 28.29, "elapsed_time": "1:36:03", "remaining_time": "4:03:28", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2460, "total_steps": 8660, "loss": 0.3246, "learning_rate": 4.501490341556354e-05, "epoch": 1.136258660508083, "percentage": 28.41, "elapsed_time": "1:36:24", "remaining_time": "4:02:59", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2470, "total_steps": 8660, "loss": 0.3256, "learning_rate": 4.495435941328062e-05, "epoch": 1.140877598152425, "percentage": 28.52, "elapsed_time": "1:36:46", "remaining_time": "4:02:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2480, "total_steps": 8660, "loss": 0.3325, "learning_rate": 4.489349120865792e-05, "epoch": 1.1454965357967668, "percentage": 28.64, "elapsed_time": "1:37:07", "remaining_time": "4:02:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2490, "total_steps": 8660, "loss": 0.328, "learning_rate": 4.483229979063295e-05, "epoch": 1.1501154734411085, "percentage": 28.75, "elapsed_time": "1:37:29", "remaining_time": "4:01:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2500, "total_steps": 8660, "loss": 0.2955, "learning_rate": 4.4770786153394526e-05, "epoch": 1.1547344110854503, "percentage": 28.87, "elapsed_time": "1:37:53", "remaining_time": "4:01:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2510, "total_steps": 8660, "loss": 0.3145, "learning_rate": 4.470895129636662e-05, "epoch": 1.1593533487297922, "percentage": 28.98, "elapsed_time": "1:38:16", "remaining_time": "4:00:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2520, "total_steps": 8660, "loss": 0.324, "learning_rate": 4.464679622419211e-05, "epoch": 1.163972286374134, "percentage": 29.1, "elapsed_time": "1:38:41", "remaining_time": "4:00:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2530, "total_steps": 8660, "loss": 0.3178, "learning_rate": 4.45843219467165e-05, "epoch": 1.1685912240184757, "percentage": 29.21, "elapsed_time": "1:39:05", "remaining_time": "4:00:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2540, "total_steps": 8660, "loss": 0.2641, "learning_rate": 4.452152947897146e-05, "epoch": 1.1732101616628174, "percentage": 29.33, "elapsed_time": "1:39:28", "remaining_time": "3:59:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2550, "total_steps": 8660, "loss": 0.3144, "learning_rate": 4.445841984115836e-05, "epoch": 1.1778290993071594, "percentage": 29.45, "elapsed_time": "1:39:51", "remaining_time": "3:59:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2560, "total_steps": 8660, "loss": 0.2987, "learning_rate": 4.4394994058631724e-05, "epoch": 1.1824480369515011, "percentage": 29.56, "elapsed_time": "1:40:14", "remaining_time": "3:58:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2570, "total_steps": 8660, "loss": 0.314, "learning_rate": 4.433125316188249e-05, "epoch": 1.1870669745958429, "percentage": 29.68, "elapsed_time": "1:40:35", "remaining_time": "3:58:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2580, "total_steps": 8660, "loss": 0.3346, "learning_rate": 4.426719818652137e-05, "epoch": 1.1916859122401848, "percentage": 29.79, "elapsed_time": "1:40:58", "remaining_time": "3:57:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2590, "total_steps": 8660, "loss": 0.3118, "learning_rate": 4.4202830173261936e-05, "epoch": 1.1963048498845266, "percentage": 29.91, "elapsed_time": "1:41:22", "remaining_time": "3:57:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2600, "total_steps": 8660, "loss": 0.3118, "learning_rate": 4.413815016790376e-05, "epoch": 1.2009237875288683, "percentage": 30.02, "elapsed_time": "1:41:43", "remaining_time": "3:57:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2610, "total_steps": 8660, "loss": 0.3239, "learning_rate": 4.4073159221315396e-05, "epoch": 1.2055427251732103, "percentage": 30.14, "elapsed_time": "1:42:05", "remaining_time": "3:56:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2620, "total_steps": 8660, "loss": 0.3295, "learning_rate": 4.4007858389417354e-05, "epoch": 1.210161662817552, "percentage": 30.25, "elapsed_time": "1:42:29", "remaining_time": "3:56:17", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2630, "total_steps": 8660, "loss": 0.285, "learning_rate": 4.3942248733164884e-05, "epoch": 1.2147806004618937, "percentage": 30.37, "elapsed_time": "1:42:52", "remaining_time": "3:55:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2640, "total_steps": 8660, "loss": 0.315, "learning_rate": 4.3876331318530755e-05, "epoch": 1.2193995381062355, "percentage": 30.48, "elapsed_time": "1:43:15", "remaining_time": "3:55:27", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2650, "total_steps": 8660, "loss": 0.3065, "learning_rate": 4.381010721648798e-05, "epoch": 1.2240184757505774, "percentage": 30.6, "elapsed_time": "1:43:37", "remaining_time": "3:55:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2660, "total_steps": 8660, "loss": 0.3419, "learning_rate": 4.374357750299236e-05, "epoch": 1.2286374133949192, "percentage": 30.72, "elapsed_time": "1:44:01", "remaining_time": "3:54:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2670, "total_steps": 8660, "loss": 0.3044, "learning_rate": 4.3676743258965015e-05, "epoch": 1.233256351039261, "percentage": 30.83, "elapsed_time": "1:44:24", "remaining_time": "3:54:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2680, "total_steps": 8660, "loss": 0.3248, "learning_rate": 4.3609605570274856e-05, "epoch": 1.2378752886836029, "percentage": 30.95, "elapsed_time": "1:44:47", "remaining_time": "3:53:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2690, "total_steps": 8660, "loss": 0.3262, "learning_rate": 4.3542165527720915e-05, "epoch": 1.2424942263279446, "percentage": 31.06, "elapsed_time": "1:45:11", "remaining_time": "3:53:27", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2700, "total_steps": 8660, "loss": 0.2911, "learning_rate": 4.347442422701459e-05, "epoch": 1.2471131639722863, "percentage": 31.18, "elapsed_time": "1:45:34", "remaining_time": "3:53:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2710, "total_steps": 8660, "loss": 0.305, "learning_rate": 4.3406382768761935e-05, "epoch": 1.251732101616628, "percentage": 31.29, "elapsed_time": "1:45:58", "remaining_time": "3:52:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2720, "total_steps": 8660, "loss": 0.3197, "learning_rate": 4.333804225844567e-05, "epoch": 1.25635103926097, "percentage": 31.41, "elapsed_time": "1:46:22", "remaining_time": "3:52:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2730, "total_steps": 8660, "loss": 0.3265, "learning_rate": 4.326940380640731e-05, "epoch": 1.2609699769053118, "percentage": 31.52, "elapsed_time": "1:46:46", "remaining_time": "3:51:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2740, "total_steps": 8660, "loss": 0.294, "learning_rate": 4.3200468527829055e-05, "epoch": 1.2655889145496535, "percentage": 31.64, "elapsed_time": "1:47:08", "remaining_time": "3:51:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2750, "total_steps": 8660, "loss": 0.3141, "learning_rate": 4.313123754271574e-05, "epoch": 1.2702078521939955, "percentage": 31.76, "elapsed_time": "1:47:32", "remaining_time": "3:51:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2760, "total_steps": 8660, "loss": 0.3119, "learning_rate": 4.3061711975876564e-05, "epoch": 1.2748267898383372, "percentage": 31.87, "elapsed_time": "1:47:56", "remaining_time": "3:50:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2770, "total_steps": 8660, "loss": 0.3199, "learning_rate": 4.2991892956906865e-05, "epoch": 1.279445727482679, "percentage": 31.99, "elapsed_time": "1:48:21", "remaining_time": "3:50:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2780, "total_steps": 8660, "loss": 0.2817, "learning_rate": 4.2921781620169776e-05, "epoch": 1.2840646651270209, "percentage": 32.1, "elapsed_time": "1:48:46", "remaining_time": "3:50:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2790, "total_steps": 8660, "loss": 0.3012, "learning_rate": 4.285137910477774e-05, "epoch": 1.2886836027713626, "percentage": 32.22, "elapsed_time": "1:49:08", "remaining_time": "3:49:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2800, "total_steps": 8660, "loss": 0.2705, "learning_rate": 4.278068655457408e-05, "epoch": 1.2933025404157044, "percentage": 32.33, "elapsed_time": "1:49:30", "remaining_time": "3:49:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2810, "total_steps": 8660, "loss": 0.2814, "learning_rate": 4.27097051181143e-05, "epoch": 1.2979214780600463, "percentage": 32.45, "elapsed_time": "1:49:54", "remaining_time": "3:48:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2820, "total_steps": 8660, "loss": 0.3154, "learning_rate": 4.2638435948647564e-05, "epoch": 1.302540415704388, "percentage": 32.56, "elapsed_time": "1:50:20", "remaining_time": "3:48:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2830, "total_steps": 8660, "loss": 0.2716, "learning_rate": 4.256688020409786e-05, "epoch": 1.3071593533487298, "percentage": 32.68, "elapsed_time": "1:50:47", "remaining_time": "3:48:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2840, "total_steps": 8660, "loss": 0.2901, "learning_rate": 4.249503904704522e-05, "epoch": 1.3117782909930715, "percentage": 32.79, "elapsed_time": "1:51:12", "remaining_time": "3:47:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2850, "total_steps": 8660, "loss": 0.2968, "learning_rate": 4.242291364470682e-05, "epoch": 1.3163972286374133, "percentage": 32.91, "elapsed_time": "1:51:37", "remaining_time": "3:47:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2860, "total_steps": 8660, "loss": 0.337, "learning_rate": 4.235050516891804e-05, "epoch": 1.3210161662817552, "percentage": 33.03, "elapsed_time": "1:52:01", "remaining_time": "3:47:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2870, "total_steps": 8660, "loss": 0.3052, "learning_rate": 4.227781479611341e-05, "epoch": 1.325635103926097, "percentage": 33.14, "elapsed_time": "1:52:25", "remaining_time": "3:46:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2880, "total_steps": 8660, "loss": 0.2999, "learning_rate": 4.220484370730746e-05, "epoch": 1.3302540415704387, "percentage": 33.26, "elapsed_time": "1:52:49", "remaining_time": "3:46:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2890, "total_steps": 8660, "loss": 0.3077, "learning_rate": 4.213159308807563e-05, "epoch": 1.3348729792147807, "percentage": 33.37, "elapsed_time": "1:53:14", "remaining_time": "3:46:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2900, "total_steps": 8660, "loss": 0.3034, "learning_rate": 4.205806412853488e-05, "epoch": 1.3394919168591224, "percentage": 33.49, "elapsed_time": "1:53:38", "remaining_time": "3:45:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2910, "total_steps": 8660, "loss": 0.3049, "learning_rate": 4.198425802332446e-05, "epoch": 1.3441108545034641, "percentage": 33.6, "elapsed_time": "1:54:01", "remaining_time": "3:45:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2920, "total_steps": 8660, "loss": 0.2842, "learning_rate": 4.191017597158645e-05, "epoch": 1.348729792147806, "percentage": 33.72, "elapsed_time": "1:54:26", "remaining_time": "3:44:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2930, "total_steps": 8660, "loss": 0.2924, "learning_rate": 4.183581917694627e-05, "epoch": 1.3533487297921478, "percentage": 33.83, "elapsed_time": "1:54:50", "remaining_time": "3:44:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2940, "total_steps": 8660, "loss": 0.3303, "learning_rate": 4.1761188847493174e-05, "epoch": 1.3579676674364896, "percentage": 33.95, "elapsed_time": "1:55:15", "remaining_time": "3:44:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2950, "total_steps": 8660, "loss": 0.3022, "learning_rate": 4.168628619576056e-05, "epoch": 1.3625866050808315, "percentage": 34.06, "elapsed_time": "1:55:40", "remaining_time": "3:43:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2960, "total_steps": 8660, "loss": 0.2885, "learning_rate": 4.161111243870631e-05, "epoch": 1.3672055427251733, "percentage": 34.18, "elapsed_time": "1:56:04", "remaining_time": "3:43:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2970, "total_steps": 8660, "loss": 0.3114, "learning_rate": 4.1535668797693e-05, "epoch": 1.371824480369515, "percentage": 34.3, "elapsed_time": "1:56:28", "remaining_time": "3:43:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2980, "total_steps": 8660, "loss": 0.2594, "learning_rate": 4.145995649846808e-05, "epoch": 1.376443418013857, "percentage": 34.41, "elapsed_time": "1:56:52", "remaining_time": "3:42:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 2990, "total_steps": 8660, "loss": 0.3137, "learning_rate": 4.138397677114393e-05, "epoch": 1.3810623556581987, "percentage": 34.53, "elapsed_time": "1:57:16", "remaining_time": "3:42:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3000, "total_steps": 8660, "loss": 0.2967, "learning_rate": 4.1307730850177865e-05, "epoch": 1.3856812933025404, "percentage": 34.64, "elapsed_time": "1:57:40", "remaining_time": "3:42:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3010, "total_steps": 8660, "loss": 0.3043, "learning_rate": 4.123121997435215e-05, "epoch": 1.3903002309468822, "percentage": 34.76, "elapsed_time": "1:58:05", "remaining_time": "3:41:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3020, "total_steps": 8660, "loss": 0.2914, "learning_rate": 4.115444538675378e-05, "epoch": 1.394919168591224, "percentage": 34.87, "elapsed_time": "1:58:29", "remaining_time": "3:41:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3030, "total_steps": 8660, "loss": 0.2637, "learning_rate": 4.1077408334754335e-05, "epoch": 1.3995381062355658, "percentage": 34.99, "elapsed_time": "1:58:53", "remaining_time": "3:40:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3040, "total_steps": 8660, "loss": 0.2907, "learning_rate": 4.1000110069989724e-05, "epoch": 1.4041570438799076, "percentage": 35.1, "elapsed_time": "1:59:17", "remaining_time": "3:40:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3050, "total_steps": 8660, "loss": 0.3158, "learning_rate": 4.092255184833981e-05, "epoch": 1.4087759815242493, "percentage": 35.22, "elapsed_time": "1:59:41", "remaining_time": "3:40:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3060, "total_steps": 8660, "loss": 0.3259, "learning_rate": 4.0844734929908025e-05, "epoch": 1.4133949191685913, "percentage": 35.33, "elapsed_time": "2:00:06", "remaining_time": "3:39:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3070, "total_steps": 8660, "loss": 0.2839, "learning_rate": 4.076666057900091e-05, "epoch": 1.418013856812933, "percentage": 35.45, "elapsed_time": "2:00:31", "remaining_time": "3:39:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3080, "total_steps": 8660, "loss": 0.2865, "learning_rate": 4.068833006410757e-05, "epoch": 1.4226327944572748, "percentage": 35.57, "elapsed_time": "2:00:54", "remaining_time": "3:39:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3090, "total_steps": 8660, "loss": 0.2718, "learning_rate": 4.0609744657879024e-05, "epoch": 1.4272517321016167, "percentage": 35.68, "elapsed_time": "2:01:18", "remaining_time": "3:38:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3100, "total_steps": 8660, "loss": 0.3163, "learning_rate": 4.0530905637107584e-05, "epoch": 1.4318706697459584, "percentage": 35.8, "elapsed_time": "2:01:43", "remaining_time": "3:38:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3110, "total_steps": 8660, "loss": 0.3377, "learning_rate": 4.0451814282706085e-05, "epoch": 1.4364896073903002, "percentage": 35.91, "elapsed_time": "2:02:05", "remaining_time": "3:37:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3120, "total_steps": 8660, "loss": 0.2846, "learning_rate": 4.037247187968706e-05, "epoch": 1.4411085450346421, "percentage": 36.03, "elapsed_time": "2:02:29", "remaining_time": "3:37:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3130, "total_steps": 8660, "loss": 0.3111, "learning_rate": 4.02928797171419e-05, "epoch": 1.4457274826789839, "percentage": 36.14, "elapsed_time": "2:02:53", "remaining_time": "3:37:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3140, "total_steps": 8660, "loss": 0.2987, "learning_rate": 4.021303908821987e-05, "epoch": 1.4503464203233256, "percentage": 36.26, "elapsed_time": "2:03:17", "remaining_time": "3:36:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3150, "total_steps": 8660, "loss": 0.3045, "learning_rate": 4.013295129010712e-05, "epoch": 1.4549653579676676, "percentage": 36.37, "elapsed_time": "2:03:41", "remaining_time": "3:36:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3160, "total_steps": 8660, "loss": 0.3208, "learning_rate": 4.005261762400562e-05, "epoch": 1.4595842956120093, "percentage": 36.49, "elapsed_time": "2:04:04", "remaining_time": "3:35:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3170, "total_steps": 8660, "loss": 0.2706, "learning_rate": 3.997203939511198e-05, "epoch": 1.464203233256351, "percentage": 36.61, "elapsed_time": "2:04:27", "remaining_time": "3:35:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3180, "total_steps": 8660, "loss": 0.2948, "learning_rate": 3.989121791259629e-05, "epoch": 1.4688221709006928, "percentage": 36.72, "elapsed_time": "2:04:50", "remaining_time": "3:35:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3190, "total_steps": 8660, "loss": 0.2977, "learning_rate": 3.98101544895808e-05, "epoch": 1.4734411085450345, "percentage": 36.84, "elapsed_time": "2:05:12", "remaining_time": "3:34:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3200, "total_steps": 8660, "loss": 0.2933, "learning_rate": 3.972885044311867e-05, "epoch": 1.4780600461893765, "percentage": 36.95, "elapsed_time": "2:05:36", "remaining_time": "3:34:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3210, "total_steps": 8660, "loss": 0.2989, "learning_rate": 3.964730709417245e-05, "epoch": 1.4826789838337182, "percentage": 37.07, "elapsed_time": "2:06:00", "remaining_time": "3:33:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3220, "total_steps": 8660, "loss": 0.3293, "learning_rate": 3.956552576759272e-05, "epoch": 1.48729792147806, "percentage": 37.18, "elapsed_time": "2:06:23", "remaining_time": "3:33:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3230, "total_steps": 8660, "loss": 0.3134, "learning_rate": 3.948350779209653e-05, "epoch": 1.491916859122402, "percentage": 37.3, "elapsed_time": "2:06:46", "remaining_time": "3:33:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3240, "total_steps": 8660, "loss": 0.3405, "learning_rate": 3.940125450024581e-05, "epoch": 1.4965357967667436, "percentage": 37.41, "elapsed_time": "2:07:09", "remaining_time": "3:32:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3250, "total_steps": 8660, "loss": 0.3073, "learning_rate": 3.931876722842569e-05, "epoch": 1.5011547344110854, "percentage": 37.53, "elapsed_time": "2:07:31", "remaining_time": "3:32:17", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3260, "total_steps": 8660, "loss": 0.2643, "learning_rate": 3.9236047316822866e-05, "epoch": 1.5057736720554273, "percentage": 37.64, "elapsed_time": "2:07:55", "remaining_time": "3:31:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3270, "total_steps": 8660, "loss": 0.2983, "learning_rate": 3.9153096109403736e-05, "epoch": 1.510392609699769, "percentage": 37.76, "elapsed_time": "2:08:18", "remaining_time": "3:31:29", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3280, "total_steps": 8660, "loss": 0.2785, "learning_rate": 3.906991495389263e-05, "epoch": 1.5150115473441108, "percentage": 37.88, "elapsed_time": "2:08:41", "remaining_time": "3:31:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3290, "total_steps": 8660, "loss": 0.2953, "learning_rate": 3.898650520174989e-05, "epoch": 1.5196304849884528, "percentage": 37.99, "elapsed_time": "2:09:06", "remaining_time": "3:30:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3300, "total_steps": 8660, "loss": 0.278, "learning_rate": 3.890286820814989e-05, "epoch": 1.5242494226327945, "percentage": 38.11, "elapsed_time": "2:09:29", "remaining_time": "3:30:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3310, "total_steps": 8660, "loss": 0.2853, "learning_rate": 3.881900533195906e-05, "epoch": 1.5288683602771362, "percentage": 38.22, "elapsed_time": "2:09:54", "remaining_time": "3:29:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3320, "total_steps": 8660, "loss": 0.2794, "learning_rate": 3.873491793571378e-05, "epoch": 1.5334872979214782, "percentage": 38.34, "elapsed_time": "2:10:19", "remaining_time": "3:29:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3330, "total_steps": 8660, "loss": 0.3133, "learning_rate": 3.8650607385598256e-05, "epoch": 1.5381062355658197, "percentage": 38.45, "elapsed_time": "2:10:44", "remaining_time": "3:29:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3340, "total_steps": 8660, "loss": 0.2763, "learning_rate": 3.856607505142232e-05, "epoch": 1.5427251732101617, "percentage": 38.57, "elapsed_time": "2:11:08", "remaining_time": "3:28:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3350, "total_steps": 8660, "loss": 0.2951, "learning_rate": 3.8481322306599154e-05, "epoch": 1.5473441108545036, "percentage": 38.68, "elapsed_time": "2:11:32", "remaining_time": "3:28:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3360, "total_steps": 8660, "loss": 0.2754, "learning_rate": 3.839635052812302e-05, "epoch": 1.5519630484988451, "percentage": 38.8, "elapsed_time": "2:11:58", "remaining_time": "3:28:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3370, "total_steps": 8660, "loss": 0.2972, "learning_rate": 3.831116109654684e-05, "epoch": 1.556581986143187, "percentage": 38.91, "elapsed_time": "2:12:21", "remaining_time": "3:27:45", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3380, "total_steps": 8660, "loss": 0.2637, "learning_rate": 3.822575539595981e-05, "epoch": 1.5612009237875288, "percentage": 39.03, "elapsed_time": "2:12:45", "remaining_time": "3:27:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3390, "total_steps": 8660, "loss": 0.2946, "learning_rate": 3.8140134813964866e-05, "epoch": 1.5658198614318706, "percentage": 39.15, "elapsed_time": "2:13:08", "remaining_time": "3:26:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3400, "total_steps": 8660, "loss": 0.33, "learning_rate": 3.8054300741656186e-05, "epoch": 1.5704387990762125, "percentage": 39.26, "elapsed_time": "2:13:32", "remaining_time": "3:26:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3410, "total_steps": 8660, "loss": 0.2866, "learning_rate": 3.796825457359655e-05, "epoch": 1.5750577367205543, "percentage": 39.38, "elapsed_time": "2:13:56", "remaining_time": "3:26:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3420, "total_steps": 8660, "loss": 0.3171, "learning_rate": 3.7881997707794716e-05, "epoch": 1.579676674364896, "percentage": 39.49, "elapsed_time": "2:14:19", "remaining_time": "3:25:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3430, "total_steps": 8660, "loss": 0.2952, "learning_rate": 3.779553154568266e-05, "epoch": 1.584295612009238, "percentage": 39.61, "elapsed_time": "2:14:41", "remaining_time": "3:25:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3440, "total_steps": 8660, "loss": 0.2759, "learning_rate": 3.7708857492092874e-05, "epoch": 1.5889145496535797, "percentage": 39.72, "elapsed_time": "2:15:04", "remaining_time": "3:24:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3450, "total_steps": 8660, "loss": 0.2781, "learning_rate": 3.762197695523547e-05, "epoch": 1.5935334872979214, "percentage": 39.84, "elapsed_time": "2:15:25", "remaining_time": "3:24:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3460, "total_steps": 8660, "loss": 0.2587, "learning_rate": 3.7534891346675367e-05, "epoch": 1.5981524249422634, "percentage": 39.95, "elapsed_time": "2:15:48", "remaining_time": "3:24:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3470, "total_steps": 8660, "loss": 0.3124, "learning_rate": 3.744760208130928e-05, "epoch": 1.6027713625866051, "percentage": 40.07, "elapsed_time": "2:16:12", "remaining_time": "3:23:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3480, "total_steps": 8660, "loss": 0.298, "learning_rate": 3.73601105773428e-05, "epoch": 1.6073903002309469, "percentage": 40.18, "elapsed_time": "2:16:37", "remaining_time": "3:23:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3490, "total_steps": 8660, "loss": 0.2934, "learning_rate": 3.7272418256267347e-05, "epoch": 1.6120092378752888, "percentage": 40.3, "elapsed_time": "2:17:04", "remaining_time": "3:23:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3500, "total_steps": 8660, "loss": 0.2804, "learning_rate": 3.7184526542837016e-05, "epoch": 1.6166281755196303, "percentage": 40.42, "elapsed_time": "2:17:30", "remaining_time": "3:22:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3510, "total_steps": 8660, "loss": 0.2784, "learning_rate": 3.70964368650455e-05, "epoch": 1.6212471131639723, "percentage": 40.53, "elapsed_time": "2:17:57", "remaining_time": "3:22:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3520, "total_steps": 8660, "loss": 0.2792, "learning_rate": 3.700815065410284e-05, "epoch": 1.625866050808314, "percentage": 40.65, "elapsed_time": "2:18:24", "remaining_time": "3:22:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3530, "total_steps": 8660, "loss": 0.2988, "learning_rate": 3.691966934441219e-05, "epoch": 1.6304849884526558, "percentage": 40.76, "elapsed_time": "2:18:48", "remaining_time": "3:21:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3540, "total_steps": 8660, "loss": 0.3078, "learning_rate": 3.683099437354652e-05, "epoch": 1.6351039260969977, "percentage": 40.88, "elapsed_time": "2:19:13", "remaining_time": "3:21:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3550, "total_steps": 8660, "loss": 0.3172, "learning_rate": 3.674212718222524e-05, "epoch": 1.6397228637413395, "percentage": 40.99, "elapsed_time": "2:19:38", "remaining_time": "3:21:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3560, "total_steps": 8660, "loss": 0.2628, "learning_rate": 3.665306921429082e-05, "epoch": 1.6443418013856812, "percentage": 41.11, "elapsed_time": "2:20:04", "remaining_time": "3:20:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3570, "total_steps": 8660, "loss": 0.2734, "learning_rate": 3.65638219166853e-05, "epoch": 1.6489607390300232, "percentage": 41.22, "elapsed_time": "2:20:29", "remaining_time": "3:20:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3580, "total_steps": 8660, "loss": 0.2884, "learning_rate": 3.647438673942679e-05, "epoch": 1.653579676674365, "percentage": 41.34, "elapsed_time": "2:20:54", "remaining_time": "3:19:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3590, "total_steps": 8660, "loss": 0.3117, "learning_rate": 3.638476513558593e-05, "epoch": 1.6581986143187066, "percentage": 41.45, "elapsed_time": "2:21:18", "remaining_time": "3:19:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3600, "total_steps": 8660, "loss": 0.296, "learning_rate": 3.629495856126227e-05, "epoch": 1.6628175519630486, "percentage": 41.57, "elapsed_time": "2:21:44", "remaining_time": "3:19:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3610, "total_steps": 8660, "loss": 0.3182, "learning_rate": 3.62049684755606e-05, "epoch": 1.6674364896073903, "percentage": 41.69, "elapsed_time": "2:22:08", "remaining_time": "3:18:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3620, "total_steps": 8660, "loss": 0.2857, "learning_rate": 3.611479634056726e-05, "epoch": 1.672055427251732, "percentage": 41.8, "elapsed_time": "2:22:32", "remaining_time": "3:18:27", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3630, "total_steps": 8660, "loss": 0.2808, "learning_rate": 3.6024443621326386e-05, "epoch": 1.676674364896074, "percentage": 41.92, "elapsed_time": "2:22:56", "remaining_time": "3:18:04", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3640, "total_steps": 8660, "loss": 0.2801, "learning_rate": 3.5933911785816096e-05, "epoch": 1.6812933025404158, "percentage": 42.03, "elapsed_time": "2:23:19", "remaining_time": "3:17:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3650, "total_steps": 8660, "loss": 0.2881, "learning_rate": 3.5843202304924646e-05, "epoch": 1.6859122401847575, "percentage": 42.15, "elapsed_time": "2:23:41", "remaining_time": "3:17:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3660, "total_steps": 8660, "loss": 0.287, "learning_rate": 3.575231665242652e-05, "epoch": 1.6905311778290995, "percentage": 42.26, "elapsed_time": "2:24:05", "remaining_time": "3:16:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3670, "total_steps": 8660, "loss": 0.2941, "learning_rate": 3.5661256304958515e-05, "epoch": 1.695150115473441, "percentage": 42.38, "elapsed_time": "2:24:28", "remaining_time": "3:16:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3680, "total_steps": 8660, "loss": 0.2754, "learning_rate": 3.5570022741995726e-05, "epoch": 1.699769053117783, "percentage": 42.49, "elapsed_time": "2:24:51", "remaining_time": "3:16:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3690, "total_steps": 8660, "loss": 0.2977, "learning_rate": 3.547861744582751e-05, "epoch": 1.7043879907621247, "percentage": 42.61, "elapsed_time": "2:25:17", "remaining_time": "3:15:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3700, "total_steps": 8660, "loss": 0.3165, "learning_rate": 3.538704190153338e-05, "epoch": 1.7090069284064664, "percentage": 42.73, "elapsed_time": "2:25:41", "remaining_time": "3:15:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3710, "total_steps": 8660, "loss": 0.2717, "learning_rate": 3.529529759695897e-05, "epoch": 1.7136258660508084, "percentage": 42.84, "elapsed_time": "2:26:06", "remaining_time": "3:14:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3720, "total_steps": 8660, "loss": 0.2878, "learning_rate": 3.5203386022691725e-05, "epoch": 1.71824480369515, "percentage": 42.96, "elapsed_time": "2:26:31", "remaining_time": "3:14:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3730, "total_steps": 8660, "loss": 0.3015, "learning_rate": 3.511130867203679e-05, "epoch": 1.7228637413394918, "percentage": 43.07, "elapsed_time": "2:26:58", "remaining_time": "3:14:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3740, "total_steps": 8660, "loss": 0.273, "learning_rate": 3.5019067040992716e-05, "epoch": 1.7274826789838338, "percentage": 43.19, "elapsed_time": "2:27:24", "remaining_time": "3:13:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3750, "total_steps": 8660, "loss": 0.2934, "learning_rate": 3.492666262822712e-05, "epoch": 1.7321016166281755, "percentage": 43.3, "elapsed_time": "2:27:50", "remaining_time": "3:13:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3760, "total_steps": 8660, "loss": 0.2953, "learning_rate": 3.4834096935052384e-05, "epoch": 1.7367205542725173, "percentage": 43.42, "elapsed_time": "2:28:16", "remaining_time": "3:13:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3770, "total_steps": 8660, "loss": 0.2823, "learning_rate": 3.4741371465401257e-05, "epoch": 1.7413394919168592, "percentage": 43.53, "elapsed_time": "2:28:41", "remaining_time": "3:12:52", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3780, "total_steps": 8660, "loss": 0.2891, "learning_rate": 3.464848772580238e-05, "epoch": 1.745958429561201, "percentage": 43.65, "elapsed_time": "2:29:08", "remaining_time": "3:12:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3790, "total_steps": 8660, "loss": 0.3128, "learning_rate": 3.4555447225355846e-05, "epoch": 1.7505773672055427, "percentage": 43.76, "elapsed_time": "2:29:33", "remaining_time": "3:12:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3800, "total_steps": 8660, "loss": 0.2657, "learning_rate": 3.446225147570868e-05, "epoch": 1.7551963048498846, "percentage": 43.88, "elapsed_time": "2:29:58", "remaining_time": "3:11:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3810, "total_steps": 8660, "loss": 0.3069, "learning_rate": 3.4368901991030274e-05, "epoch": 1.7598152424942262, "percentage": 44.0, "elapsed_time": "2:30:24", "remaining_time": "3:11:27", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3820, "total_steps": 8660, "loss": 0.2777, "learning_rate": 3.427540028798777e-05, "epoch": 1.7644341801385681, "percentage": 44.11, "elapsed_time": "2:30:50", "remaining_time": "3:11:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3830, "total_steps": 8660, "loss": 0.2766, "learning_rate": 3.418174788572142e-05, "epoch": 1.76905311778291, "percentage": 44.23, "elapsed_time": "2:31:16", "remaining_time": "3:10:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3840, "total_steps": 8660, "loss": 0.2768, "learning_rate": 3.408794630581994e-05, "epoch": 1.7736720554272516, "percentage": 44.34, "elapsed_time": "2:31:42", "remaining_time": "3:10:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3850, "total_steps": 8660, "loss": 0.2866, "learning_rate": 3.399399707229577e-05, "epoch": 1.7782909930715936, "percentage": 44.46, "elapsed_time": "2:32:07", "remaining_time": "3:10:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3860, "total_steps": 8660, "loss": 0.2824, "learning_rate": 3.389990171156028e-05, "epoch": 1.7829099307159353, "percentage": 44.57, "elapsed_time": "2:32:30", "remaining_time": "3:09:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3870, "total_steps": 8660, "loss": 0.264, "learning_rate": 3.3805661752399e-05, "epoch": 1.787528868360277, "percentage": 44.69, "elapsed_time": "2:32:52", "remaining_time": "3:09:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3880, "total_steps": 8660, "loss": 0.2864, "learning_rate": 3.37112787259468e-05, "epoch": 1.792147806004619, "percentage": 44.8, "elapsed_time": "2:33:13", "remaining_time": "3:08:45", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3890, "total_steps": 8660, "loss": 0.2957, "learning_rate": 3.3616754165662975e-05, "epoch": 1.7967667436489607, "percentage": 44.92, "elapsed_time": "2:33:34", "remaining_time": "3:08:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3900, "total_steps": 8660, "loss": 0.2679, "learning_rate": 3.352208960730636e-05, "epoch": 1.8013856812933025, "percentage": 45.03, "elapsed_time": "2:33:55", "remaining_time": "3:07:51", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3910, "total_steps": 8660, "loss": 0.3021, "learning_rate": 3.342728658891034e-05, "epoch": 1.8060046189376444, "percentage": 45.15, "elapsed_time": "2:34:15", "remaining_time": "3:07:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3920, "total_steps": 8660, "loss": 0.2647, "learning_rate": 3.333234665075792e-05, "epoch": 1.8106235565819861, "percentage": 45.27, "elapsed_time": "2:34:36", "remaining_time": "3:06:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3930, "total_steps": 8660, "loss": 0.3233, "learning_rate": 3.323727133535665e-05, "epoch": 1.8152424942263279, "percentage": 45.38, "elapsed_time": "2:34:58", "remaining_time": "3:06:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3940, "total_steps": 8660, "loss": 0.2596, "learning_rate": 3.314206218741359e-05, "epoch": 1.8198614318706698, "percentage": 45.5, "elapsed_time": "2:35:18", "remaining_time": "3:06:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3950, "total_steps": 8660, "loss": 0.2887, "learning_rate": 3.304672075381017e-05, "epoch": 1.8244803695150116, "percentage": 45.61, "elapsed_time": "2:35:39", "remaining_time": "3:05:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3960, "total_steps": 8660, "loss": 0.2725, "learning_rate": 3.295124858357714e-05, "epoch": 1.8290993071593533, "percentage": 45.73, "elapsed_time": "2:36:00", "remaining_time": "3:05:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3970, "total_steps": 8660, "loss": 0.3009, "learning_rate": 3.285564722786932e-05, "epoch": 1.8337182448036953, "percentage": 45.84, "elapsed_time": "2:36:21", "remaining_time": "3:04:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3980, "total_steps": 8660, "loss": 0.2574, "learning_rate": 3.2759918239940446e-05, "epoch": 1.8383371824480368, "percentage": 45.96, "elapsed_time": "2:36:42", "remaining_time": "3:04:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 3990, "total_steps": 8660, "loss": 0.2773, "learning_rate": 3.2664063175117906e-05, "epoch": 1.8429561200923787, "percentage": 46.07, "elapsed_time": "2:37:02", "remaining_time": "3:03:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4000, "total_steps": 8660, "loss": 0.2487, "learning_rate": 3.25680835907775e-05, "epoch": 1.8475750577367207, "percentage": 46.19, "elapsed_time": "2:37:24", "remaining_time": "3:03:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4010, "total_steps": 8660, "loss": 0.2721, "learning_rate": 3.24719810463181e-05, "epoch": 1.8521939953810622, "percentage": 46.3, "elapsed_time": "2:37:49", "remaining_time": "3:03:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4020, "total_steps": 8660, "loss": 0.2623, "learning_rate": 3.237575710313636e-05, "epoch": 1.8568129330254042, "percentage": 46.42, "elapsed_time": "2:38:10", "remaining_time": "3:02:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4030, "total_steps": 8660, "loss": 0.2444, "learning_rate": 3.227941332460132e-05, "epoch": 1.861431870669746, "percentage": 46.54, "elapsed_time": "2:38:31", "remaining_time": "3:02:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4040, "total_steps": 8660, "loss": 0.3071, "learning_rate": 3.2182951276028975e-05, "epoch": 1.8660508083140877, "percentage": 46.65, "elapsed_time": "2:38:55", "remaining_time": "3:01:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4050, "total_steps": 8660, "loss": 0.2471, "learning_rate": 3.2086372524656905e-05, "epoch": 1.8706697459584296, "percentage": 46.77, "elapsed_time": "2:39:19", "remaining_time": "3:01:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4060, "total_steps": 8660, "loss": 0.2951, "learning_rate": 3.1989678639618786e-05, "epoch": 1.8752886836027713, "percentage": 46.88, "elapsed_time": "2:39:42", "remaining_time": "3:00:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4070, "total_steps": 8660, "loss": 0.2643, "learning_rate": 3.1892871191918884e-05, "epoch": 1.879907621247113, "percentage": 47.0, "elapsed_time": "2:40:07", "remaining_time": "3:00:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4080, "total_steps": 8660, "loss": 0.2774, "learning_rate": 3.1795951754406524e-05, "epoch": 1.884526558891455, "percentage": 47.11, "elapsed_time": "2:40:30", "remaining_time": "3:00:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4090, "total_steps": 8660, "loss": 0.3056, "learning_rate": 3.169892190175059e-05, "epoch": 1.8891454965357968, "percentage": 47.23, "elapsed_time": "2:40:54", "remaining_time": "2:59:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4100, "total_steps": 8660, "loss": 0.2803, "learning_rate": 3.160178321041387e-05, "epoch": 1.8937644341801385, "percentage": 47.34, "elapsed_time": "2:41:18", "remaining_time": "2:59:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4110, "total_steps": 8660, "loss": 0.2704, "learning_rate": 3.150453725862749e-05, "epoch": 1.8983833718244805, "percentage": 47.46, "elapsed_time": "2:41:39", "remaining_time": "2:58:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4120, "total_steps": 8660, "loss": 0.2924, "learning_rate": 3.1407185626365224e-05, "epoch": 1.9030023094688222, "percentage": 47.58, "elapsed_time": "2:42:01", "remaining_time": "2:58:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4130, "total_steps": 8660, "loss": 0.2686, "learning_rate": 3.130972989531792e-05, "epoch": 1.907621247113164, "percentage": 47.69, "elapsed_time": "2:42:24", "remaining_time": "2:58:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4140, "total_steps": 8660, "loss": 0.3036, "learning_rate": 3.1212171648867685e-05, "epoch": 1.912240184757506, "percentage": 47.81, "elapsed_time": "2:42:48", "remaining_time": "2:57:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4150, "total_steps": 8660, "loss": 0.2907, "learning_rate": 3.111451247206223e-05, "epoch": 1.9168591224018474, "percentage": 47.92, "elapsed_time": "2:43:12", "remaining_time": "2:57:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4160, "total_steps": 8660, "loss": 0.2824, "learning_rate": 3.1016753951589125e-05, "epoch": 1.9214780600461894, "percentage": 48.04, "elapsed_time": "2:43:36", "remaining_time": "2:56:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4170, "total_steps": 8660, "loss": 0.2877, "learning_rate": 3.0918897675749954e-05, "epoch": 1.9260969976905313, "percentage": 48.15, "elapsed_time": "2:44:00", "remaining_time": "2:56:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4180, "total_steps": 8660, "loss": 0.2974, "learning_rate": 3.0820945234434574e-05, "epoch": 1.9307159353348728, "percentage": 48.27, "elapsed_time": "2:44:23", "remaining_time": "2:56:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4190, "total_steps": 8660, "loss": 0.2679, "learning_rate": 3.072289821909526e-05, "epoch": 1.9353348729792148, "percentage": 48.38, "elapsed_time": "2:44:44", "remaining_time": "2:55:45", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4200, "total_steps": 8660, "loss": 0.2855, "learning_rate": 3.062475822272086e-05, "epoch": 1.9399538106235565, "percentage": 48.5, "elapsed_time": "2:45:07", "remaining_time": "2:55:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4210, "total_steps": 8660, "loss": 0.2897, "learning_rate": 3.052652683981088e-05, "epoch": 1.9445727482678983, "percentage": 48.61, "elapsed_time": "2:45:31", "remaining_time": "2:54:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4220, "total_steps": 8660, "loss": 0.2901, "learning_rate": 3.0428205666349613e-05, "epoch": 1.9491916859122402, "percentage": 48.73, "elapsed_time": "2:45:52", "remaining_time": "2:54:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4230, "total_steps": 8660, "loss": 0.2676, "learning_rate": 3.0329796299780194e-05, "epoch": 1.953810623556582, "percentage": 48.85, "elapsed_time": "2:46:13", "remaining_time": "2:54:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4240, "total_steps": 8660, "loss": 0.3301, "learning_rate": 3.023130033897865e-05, "epoch": 1.9584295612009237, "percentage": 48.96, "elapsed_time": "2:46:35", "remaining_time": "2:53:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4250, "total_steps": 8660, "loss": 0.2685, "learning_rate": 3.013271938422793e-05, "epoch": 1.9630484988452657, "percentage": 49.08, "elapsed_time": "2:46:59", "remaining_time": "2:53:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4260, "total_steps": 8660, "loss": 0.2626, "learning_rate": 3.0034055037191873e-05, "epoch": 1.9676674364896074, "percentage": 49.19, "elapsed_time": "2:47:22", "remaining_time": "2:52:52", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4270, "total_steps": 8660, "loss": 0.2684, "learning_rate": 2.9935308900889236e-05, "epoch": 1.9722863741339491, "percentage": 49.31, "elapsed_time": "2:47:46", "remaining_time": "2:52:29", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4280, "total_steps": 8660, "loss": 0.263, "learning_rate": 2.9836482579667596e-05, "epoch": 1.976905311778291, "percentage": 49.42, "elapsed_time": "2:48:11", "remaining_time": "2:52:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4290, "total_steps": 8660, "loss": 0.3022, "learning_rate": 2.973757767917734e-05, "epoch": 1.9815242494226328, "percentage": 49.54, "elapsed_time": "2:48:34", "remaining_time": "2:51:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4300, "total_steps": 8660, "loss": 0.2695, "learning_rate": 2.9638595806345514e-05, "epoch": 1.9861431870669746, "percentage": 49.65, "elapsed_time": "2:48:57", "remaining_time": "2:51:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4310, "total_steps": 8660, "loss": 0.2483, "learning_rate": 2.953953856934978e-05, "epoch": 1.9907621247113165, "percentage": 49.77, "elapsed_time": "2:49:18", "remaining_time": "2:50:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4320, "total_steps": 8660, "loss": 0.269, "learning_rate": 2.944040757759222e-05, "epoch": 1.995381062355658, "percentage": 49.88, "elapsed_time": "2:49:43", "remaining_time": "2:50:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4330, "total_steps": 8660, "loss": 0.3165, "learning_rate": 2.9341204441673266e-05, "epoch": 2.0, "percentage": 50.0, "elapsed_time": "2:50:06", "remaining_time": "2:50:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4340, "total_steps": 8660, "loss": 0.1673, "learning_rate": 2.9241930773365456e-05, "epoch": 2.004618937644342, "percentage": 50.12, "elapsed_time": "2:50:29", "remaining_time": "2:49:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4350, "total_steps": 8660, "loss": 0.1835, "learning_rate": 2.9142588185587295e-05, "epoch": 2.0092378752886835, "percentage": 50.23, "elapsed_time": "2:50:54", "remaining_time": "2:49:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4360, "total_steps": 8660, "loss": 0.1731, "learning_rate": 2.9043178292377044e-05, "epoch": 2.0138568129330254, "percentage": 50.35, "elapsed_time": "2:51:19", "remaining_time": "2:48:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4370, "total_steps": 8660, "loss": 0.1887, "learning_rate": 2.8943702708866475e-05, "epoch": 2.0184757505773674, "percentage": 50.46, "elapsed_time": "2:51:44", "remaining_time": "2:48:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4380, "total_steps": 8660, "loss": 0.1627, "learning_rate": 2.884416305125465e-05, "epoch": 2.023094688221709, "percentage": 50.58, "elapsed_time": "2:52:09", "remaining_time": "2:48:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4390, "total_steps": 8660, "loss": 0.1778, "learning_rate": 2.874456093678165e-05, "epoch": 2.027713625866051, "percentage": 50.69, "elapsed_time": "2:52:33", "remaining_time": "2:47:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4400, "total_steps": 8660, "loss": 0.1916, "learning_rate": 2.8644897983702312e-05, "epoch": 2.032332563510393, "percentage": 50.81, "elapsed_time": "2:52:57", "remaining_time": "2:47:27", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4410, "total_steps": 8660, "loss": 0.167, "learning_rate": 2.8545175811259917e-05, "epoch": 2.0369515011547343, "percentage": 50.92, "elapsed_time": "2:53:21", "remaining_time": "2:47:04", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4420, "total_steps": 8660, "loss": 0.1738, "learning_rate": 2.8445396039659915e-05, "epoch": 2.0415704387990763, "percentage": 51.04, "elapsed_time": "2:53:46", "remaining_time": "2:46:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4430, "total_steps": 8660, "loss": 0.1846, "learning_rate": 2.834556029004356e-05, "epoch": 2.046189376443418, "percentage": 51.15, "elapsed_time": "2:54:11", "remaining_time": "2:46:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4440, "total_steps": 8660, "loss": 0.1837, "learning_rate": 2.8245670184461605e-05, "epoch": 2.0508083140877598, "percentage": 51.27, "elapsed_time": "2:54:35", "remaining_time": "2:45:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4450, "total_steps": 8660, "loss": 0.1815, "learning_rate": 2.814572734584792e-05, "epoch": 2.0554272517321017, "percentage": 51.39, "elapsed_time": "2:55:01", "remaining_time": "2:45:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4460, "total_steps": 8660, "loss": 0.1735, "learning_rate": 2.8045733397993167e-05, "epoch": 2.0600461893764432, "percentage": 51.5, "elapsed_time": "2:55:27", "remaining_time": "2:45:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4470, "total_steps": 8660, "loss": 0.1766, "learning_rate": 2.7945689965518358e-05, "epoch": 2.064665127020785, "percentage": 51.62, "elapsed_time": "2:55:53", "remaining_time": "2:44:52", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4480, "total_steps": 8660, "loss": 0.185, "learning_rate": 2.784559867384851e-05, "epoch": 2.069284064665127, "percentage": 51.73, "elapsed_time": "2:56:17", "remaining_time": "2:44:29", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4490, "total_steps": 8660, "loss": 0.1924, "learning_rate": 2.774546114918621e-05, "epoch": 2.0739030023094687, "percentage": 51.85, "elapsed_time": "2:56:42", "remaining_time": "2:44:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4500, "total_steps": 8660, "loss": 0.1851, "learning_rate": 2.7645279018485203e-05, "epoch": 2.0785219399538106, "percentage": 51.96, "elapsed_time": "2:57:06", "remaining_time": "2:43:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4510, "total_steps": 8660, "loss": 0.1945, "learning_rate": 2.7545053909423964e-05, "epoch": 2.0831408775981526, "percentage": 52.08, "elapsed_time": "2:57:31", "remaining_time": "2:43:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4520, "total_steps": 8660, "loss": 0.195, "learning_rate": 2.744478745037923e-05, "epoch": 2.087759815242494, "percentage": 52.19, "elapsed_time": "2:57:56", "remaining_time": "2:42:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4530, "total_steps": 8660, "loss": 0.1734, "learning_rate": 2.7344481270399575e-05, "epoch": 2.092378752886836, "percentage": 52.31, "elapsed_time": "2:58:18", "remaining_time": "2:42:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4540, "total_steps": 8660, "loss": 0.1938, "learning_rate": 2.724413699917892e-05, "epoch": 2.096997690531178, "percentage": 52.42, "elapsed_time": "2:58:42", "remaining_time": "2:42:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4550, "total_steps": 8660, "loss": 0.1863, "learning_rate": 2.7143756267030073e-05, "epoch": 2.1016166281755195, "percentage": 52.54, "elapsed_time": "2:59:07", "remaining_time": "2:41:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4560, "total_steps": 8660, "loss": 0.1801, "learning_rate": 2.70433407048582e-05, "epoch": 2.1062355658198615, "percentage": 52.66, "elapsed_time": "2:59:31", "remaining_time": "2:41:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4570, "total_steps": 8660, "loss": 0.1951, "learning_rate": 2.6942891944134373e-05, "epoch": 2.1108545034642034, "percentage": 52.77, "elapsed_time": "2:59:55", "remaining_time": "2:41:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4580, "total_steps": 8660, "loss": 0.1766, "learning_rate": 2.6842411616869073e-05, "epoch": 2.115473441108545, "percentage": 52.89, "elapsed_time": "3:00:19", "remaining_time": "2:40:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4590, "total_steps": 8660, "loss": 0.1762, "learning_rate": 2.674190135558562e-05, "epoch": 2.120092378752887, "percentage": 53.0, "elapsed_time": "3:00:43", "remaining_time": "2:40:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4600, "total_steps": 8660, "loss": 0.165, "learning_rate": 2.6641362793293672e-05, "epoch": 2.1247113163972284, "percentage": 53.12, "elapsed_time": "3:01:06", "remaining_time": "2:39:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4610, "total_steps": 8660, "loss": 0.1601, "learning_rate": 2.6540797563462722e-05, "epoch": 2.1293302540415704, "percentage": 53.23, "elapsed_time": "3:01:31", "remaining_time": "2:39:28", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4620, "total_steps": 8660, "loss": 0.18, "learning_rate": 2.6440207299995513e-05, "epoch": 2.1339491916859123, "percentage": 53.35, "elapsed_time": "3:01:56", "remaining_time": "2:39:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4630, "total_steps": 8660, "loss": 0.1625, "learning_rate": 2.6339593637201533e-05, "epoch": 2.138568129330254, "percentage": 53.46, "elapsed_time": "3:02:20", "remaining_time": "2:38:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4640, "total_steps": 8660, "loss": 0.1774, "learning_rate": 2.6238958209770443e-05, "epoch": 2.143187066974596, "percentage": 53.58, "elapsed_time": "3:02:44", "remaining_time": "2:38:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4650, "total_steps": 8660, "loss": 0.1946, "learning_rate": 2.61383026527455e-05, "epoch": 2.147806004618938, "percentage": 53.7, "elapsed_time": "3:03:10", "remaining_time": "2:37:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4660, "total_steps": 8660, "loss": 0.1643, "learning_rate": 2.603762860149703e-05, "epoch": 2.1524249422632793, "percentage": 53.81, "elapsed_time": "3:03:32", "remaining_time": "2:37:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4670, "total_steps": 8660, "loss": 0.169, "learning_rate": 2.593693769169583e-05, "epoch": 2.1570438799076213, "percentage": 53.93, "elapsed_time": "3:03:56", "remaining_time": "2:37:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4680, "total_steps": 8660, "loss": 0.1902, "learning_rate": 2.583623155928659e-05, "epoch": 2.161662817551963, "percentage": 54.04, "elapsed_time": "3:04:20", "remaining_time": "2:36:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4690, "total_steps": 8660, "loss": 0.1776, "learning_rate": 2.5735511840461345e-05, "epoch": 2.1662817551963047, "percentage": 54.16, "elapsed_time": "3:04:45", "remaining_time": "2:36:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4700, "total_steps": 8660, "loss": 0.1957, "learning_rate": 2.5634780171632866e-05, "epoch": 2.1709006928406467, "percentage": 54.27, "elapsed_time": "3:05:09", "remaining_time": "2:36:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4710, "total_steps": 8660, "loss": 0.1728, "learning_rate": 2.553403818940807e-05, "epoch": 2.1755196304849886, "percentage": 54.39, "elapsed_time": "3:05:31", "remaining_time": "2:35:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4720, "total_steps": 8660, "loss": 0.1682, "learning_rate": 2.5433287530561452e-05, "epoch": 2.18013856812933, "percentage": 54.5, "elapsed_time": "3:05:54", "remaining_time": "2:35:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4730, "total_steps": 8660, "loss": 0.1646, "learning_rate": 2.5332529832008444e-05, "epoch": 2.184757505773672, "percentage": 54.62, "elapsed_time": "3:06:17", "remaining_time": "2:34:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4740, "total_steps": 8660, "loss": 0.1929, "learning_rate": 2.523176673077889e-05, "epoch": 2.1893764434180136, "percentage": 54.73, "elapsed_time": "3:06:40", "remaining_time": "2:34:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4750, "total_steps": 8660, "loss": 0.1734, "learning_rate": 2.5130999863990402e-05, "epoch": 2.1939953810623556, "percentage": 54.85, "elapsed_time": "3:07:05", "remaining_time": "2:34:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4760, "total_steps": 8660, "loss": 0.182, "learning_rate": 2.5030230868821764e-05, "epoch": 2.1986143187066975, "percentage": 54.97, "elapsed_time": "3:07:29", "remaining_time": "2:33:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4770, "total_steps": 8660, "loss": 0.175, "learning_rate": 2.4929461382486356e-05, "epoch": 2.203233256351039, "percentage": 55.08, "elapsed_time": "3:07:54", "remaining_time": "2:33:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4780, "total_steps": 8660, "loss": 0.1759, "learning_rate": 2.4828693042205513e-05, "epoch": 2.207852193995381, "percentage": 55.2, "elapsed_time": "3:08:16", "remaining_time": "2:32:49", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4790, "total_steps": 8660, "loss": 0.1702, "learning_rate": 2.472792748518198e-05, "epoch": 2.212471131639723, "percentage": 55.31, "elapsed_time": "3:08:39", "remaining_time": "2:32:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4800, "total_steps": 8660, "loss": 0.1677, "learning_rate": 2.4627166348573256e-05, "epoch": 2.2170900692840645, "percentage": 55.43, "elapsed_time": "3:09:02", "remaining_time": "2:32:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4810, "total_steps": 8660, "loss": 0.1499, "learning_rate": 2.452641126946503e-05, "epoch": 2.2217090069284064, "percentage": 55.54, "elapsed_time": "3:09:25", "remaining_time": "2:31:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4820, "total_steps": 8660, "loss": 0.1731, "learning_rate": 2.4425663884844577e-05, "epoch": 2.2263279445727484, "percentage": 55.66, "elapsed_time": "3:09:48", "remaining_time": "2:31:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4830, "total_steps": 8660, "loss": 0.182, "learning_rate": 2.4324925831574162e-05, "epoch": 2.23094688221709, "percentage": 55.77, "elapsed_time": "3:10:10", "remaining_time": "2:30:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4840, "total_steps": 8660, "loss": 0.1831, "learning_rate": 2.4224198746364422e-05, "epoch": 2.235565819861432, "percentage": 55.89, "elapsed_time": "3:10:32", "remaining_time": "2:30:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4850, "total_steps": 8660, "loss": 0.1704, "learning_rate": 2.412348426574782e-05, "epoch": 2.240184757505774, "percentage": 56.0, "elapsed_time": "3:10:55", "remaining_time": "2:29:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4860, "total_steps": 8660, "loss": 0.174, "learning_rate": 2.4022784026052002e-05, "epoch": 2.2448036951501154, "percentage": 56.12, "elapsed_time": "3:11:20", "remaining_time": "2:29:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4870, "total_steps": 8660, "loss": 0.1499, "learning_rate": 2.392209966337327e-05, "epoch": 2.2494226327944573, "percentage": 56.24, "elapsed_time": "3:11:43", "remaining_time": "2:29:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4880, "total_steps": 8660, "loss": 0.1975, "learning_rate": 2.382143281354995e-05, "epoch": 2.2540415704387993, "percentage": 56.35, "elapsed_time": "3:12:07", "remaining_time": "2:28:49", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4890, "total_steps": 8660, "loss": 0.1694, "learning_rate": 2.3720785112135826e-05, "epoch": 2.258660508083141, "percentage": 56.47, "elapsed_time": "3:12:30", "remaining_time": "2:28:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4900, "total_steps": 8660, "loss": 0.1795, "learning_rate": 2.36201581943736e-05, "epoch": 2.2632794457274827, "percentage": 56.58, "elapsed_time": "3:12:52", "remaining_time": "2:28:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4910, "total_steps": 8660, "loss": 0.194, "learning_rate": 2.351955369516826e-05, "epoch": 2.2678983833718247, "percentage": 56.7, "elapsed_time": "3:13:16", "remaining_time": "2:27:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4920, "total_steps": 8660, "loss": 0.1862, "learning_rate": 2.341897324906061e-05, "epoch": 2.272517321016166, "percentage": 56.81, "elapsed_time": "3:13:38", "remaining_time": "2:27:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4930, "total_steps": 8660, "loss": 0.1924, "learning_rate": 2.3318418490200595e-05, "epoch": 2.277136258660508, "percentage": 56.93, "elapsed_time": "3:14:01", "remaining_time": "2:26:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4940, "total_steps": 8660, "loss": 0.168, "learning_rate": 2.3217891052320855e-05, "epoch": 2.28175519630485, "percentage": 57.04, "elapsed_time": "3:14:25", "remaining_time": "2:26:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4950, "total_steps": 8660, "loss": 0.1816, "learning_rate": 2.3117392568710143e-05, "epoch": 2.2863741339491916, "percentage": 57.16, "elapsed_time": "3:14:48", "remaining_time": "2:26:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4960, "total_steps": 8660, "loss": 0.1765, "learning_rate": 2.301692467218677e-05, "epoch": 2.2909930715935336, "percentage": 57.27, "elapsed_time": "3:15:10", "remaining_time": "2:25:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4970, "total_steps": 8660, "loss": 0.1968, "learning_rate": 2.291648899507208e-05, "epoch": 2.295612009237875, "percentage": 57.39, "elapsed_time": "3:15:33", "remaining_time": "2:25:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4980, "total_steps": 8660, "loss": 0.1802, "learning_rate": 2.2816087169163987e-05, "epoch": 2.300230946882217, "percentage": 57.51, "elapsed_time": "3:15:56", "remaining_time": "2:24:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 4990, "total_steps": 8660, "loss": 0.1645, "learning_rate": 2.271572082571036e-05, "epoch": 2.304849884526559, "percentage": 57.62, "elapsed_time": "3:16:21", "remaining_time": "2:24:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5000, "total_steps": 8660, "loss": 0.1905, "learning_rate": 2.261539159538264e-05, "epoch": 2.3094688221709005, "percentage": 57.74, "elapsed_time": "3:16:43", "remaining_time": "2:24:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5010, "total_steps": 8660, "loss": 0.174, "learning_rate": 2.2515101108249235e-05, "epoch": 2.3140877598152425, "percentage": 57.85, "elapsed_time": "3:17:06", "remaining_time": "2:23:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5020, "total_steps": 8660, "loss": 0.1715, "learning_rate": 2.2414850993749097e-05, "epoch": 2.3187066974595845, "percentage": 57.97, "elapsed_time": "3:17:28", "remaining_time": "2:23:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5030, "total_steps": 8660, "loss": 0.1673, "learning_rate": 2.2314642880665257e-05, "epoch": 2.323325635103926, "percentage": 58.08, "elapsed_time": "3:17:51", "remaining_time": "2:22:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5040, "total_steps": 8660, "loss": 0.1683, "learning_rate": 2.2214478397098304e-05, "epoch": 2.327944572748268, "percentage": 58.2, "elapsed_time": "3:18:12", "remaining_time": "2:22:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5050, "total_steps": 8660, "loss": 0.1887, "learning_rate": 2.2114359170440004e-05, "epoch": 2.3325635103926095, "percentage": 58.31, "elapsed_time": "3:18:34", "remaining_time": "2:21:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5060, "total_steps": 8660, "loss": 0.1808, "learning_rate": 2.201428682734682e-05, "epoch": 2.3371824480369514, "percentage": 58.43, "elapsed_time": "3:18:56", "remaining_time": "2:21:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5070, "total_steps": 8660, "loss": 0.1889, "learning_rate": 2.1914262993713454e-05, "epoch": 2.3418013856812934, "percentage": 58.55, "elapsed_time": "3:19:18", "remaining_time": "2:21:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5080, "total_steps": 8660, "loss": 0.1664, "learning_rate": 2.181428929464652e-05, "epoch": 2.346420323325635, "percentage": 58.66, "elapsed_time": "3:19:40", "remaining_time": "2:20:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5090, "total_steps": 8660, "loss": 0.1722, "learning_rate": 2.1714367354438046e-05, "epoch": 2.351039260969977, "percentage": 58.78, "elapsed_time": "3:20:04", "remaining_time": "2:20:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5100, "total_steps": 8660, "loss": 0.1825, "learning_rate": 2.1614498796539124e-05, "epoch": 2.355658198614319, "percentage": 58.89, "elapsed_time": "3:20:26", "remaining_time": "2:19:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5110, "total_steps": 8660, "loss": 0.1738, "learning_rate": 2.151468524353356e-05, "epoch": 2.3602771362586603, "percentage": 59.01, "elapsed_time": "3:20:48", "remaining_time": "2:19:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5120, "total_steps": 8660, "loss": 0.1902, "learning_rate": 2.141492831711146e-05, "epoch": 2.3648960739030023, "percentage": 59.12, "elapsed_time": "3:21:10", "remaining_time": "2:19:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5130, "total_steps": 8660, "loss": 0.1746, "learning_rate": 2.1315229638042934e-05, "epoch": 2.3695150115473442, "percentage": 59.24, "elapsed_time": "3:21:34", "remaining_time": "2:18:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5140, "total_steps": 8660, "loss": 0.1542, "learning_rate": 2.1215590826151705e-05, "epoch": 2.3741339491916857, "percentage": 59.35, "elapsed_time": "3:21:58", "remaining_time": "2:18:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5150, "total_steps": 8660, "loss": 0.165, "learning_rate": 2.1116013500288824e-05, "epoch": 2.3787528868360277, "percentage": 59.47, "elapsed_time": "3:22:21", "remaining_time": "2:17:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5160, "total_steps": 8660, "loss": 0.2035, "learning_rate": 2.1016499278306405e-05, "epoch": 2.3833718244803697, "percentage": 59.58, "elapsed_time": "3:22:42", "remaining_time": "2:17:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5170, "total_steps": 8660, "loss": 0.1668, "learning_rate": 2.091704977703125e-05, "epoch": 2.387990762124711, "percentage": 59.7, "elapsed_time": "3:23:03", "remaining_time": "2:17:04", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5180, "total_steps": 8660, "loss": 0.1701, "learning_rate": 2.0817666612238673e-05, "epoch": 2.392609699769053, "percentage": 59.82, "elapsed_time": "3:23:25", "remaining_time": "2:16:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5190, "total_steps": 8660, "loss": 0.1747, "learning_rate": 2.071835139862619e-05, "epoch": 2.397228637413395, "percentage": 59.93, "elapsed_time": "3:23:48", "remaining_time": "2:16:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5200, "total_steps": 8660, "loss": 0.1711, "learning_rate": 2.061910574978728e-05, "epoch": 2.4018475750577366, "percentage": 60.05, "elapsed_time": "3:24:10", "remaining_time": "2:15:51", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5210, "total_steps": 8660, "loss": 0.1883, "learning_rate": 2.051993127818524e-05, "epoch": 2.4064665127020786, "percentage": 60.16, "elapsed_time": "3:24:32", "remaining_time": "2:15:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5220, "total_steps": 8660, "loss": 0.1912, "learning_rate": 2.0420829595126872e-05, "epoch": 2.4110854503464205, "percentage": 60.28, "elapsed_time": "3:24:53", "remaining_time": "2:15:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5230, "total_steps": 8660, "loss": 0.194, "learning_rate": 2.0321802310736433e-05, "epoch": 2.415704387990762, "percentage": 60.39, "elapsed_time": "3:25:14", "remaining_time": "2:14:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5240, "total_steps": 8660, "loss": 0.1777, "learning_rate": 2.0222851033929367e-05, "epoch": 2.420323325635104, "percentage": 60.51, "elapsed_time": "3:25:35", "remaining_time": "2:14:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5250, "total_steps": 8660, "loss": 0.1687, "learning_rate": 2.012397737238621e-05, "epoch": 2.424942263279446, "percentage": 60.62, "elapsed_time": "3:25:56", "remaining_time": "2:13:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5260, "total_steps": 8660, "loss": 0.1751, "learning_rate": 2.0025182932526507e-05, "epoch": 2.4295612009237875, "percentage": 60.74, "elapsed_time": "3:26:18", "remaining_time": "2:13:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5270, "total_steps": 8660, "loss": 0.159, "learning_rate": 1.9926469319482628e-05, "epoch": 2.4341801385681294, "percentage": 60.85, "elapsed_time": "3:26:38", "remaining_time": "2:12:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5280, "total_steps": 8660, "loss": 0.1603, "learning_rate": 1.9827838137073743e-05, "epoch": 2.438799076212471, "percentage": 60.97, "elapsed_time": "3:27:00", "remaining_time": "2:12:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5290, "total_steps": 8660, "loss": 0.1729, "learning_rate": 1.9729290987779777e-05, "epoch": 2.443418013856813, "percentage": 61.09, "elapsed_time": "3:27:22", "remaining_time": "2:12:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5300, "total_steps": 8660, "loss": 0.1744, "learning_rate": 1.9630829472715337e-05, "epoch": 2.448036951501155, "percentage": 61.2, "elapsed_time": "3:27:44", "remaining_time": "2:11:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5310, "total_steps": 8660, "loss": 0.1713, "learning_rate": 1.9532455191603715e-05, "epoch": 2.4526558891454964, "percentage": 61.32, "elapsed_time": "3:28:07", "remaining_time": "2:11:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5320, "total_steps": 8660, "loss": 0.175, "learning_rate": 1.9434169742750906e-05, "epoch": 2.4572748267898383, "percentage": 61.43, "elapsed_time": "3:28:32", "remaining_time": "2:10:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5330, "total_steps": 8660, "loss": 0.1809, "learning_rate": 1.93359747230196e-05, "epoch": 2.4618937644341803, "percentage": 61.55, "elapsed_time": "3:28:54", "remaining_time": "2:10:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5340, "total_steps": 8660, "loss": 0.1842, "learning_rate": 1.9237871727803314e-05, "epoch": 2.466512702078522, "percentage": 61.66, "elapsed_time": "3:29:18", "remaining_time": "2:10:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5350, "total_steps": 8660, "loss": 0.1682, "learning_rate": 1.9139862351000383e-05, "epoch": 2.4711316397228638, "percentage": 61.78, "elapsed_time": "3:29:41", "remaining_time": "2:09:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5360, "total_steps": 8660, "loss": 0.1761, "learning_rate": 1.904194818498813e-05, "epoch": 2.4757505773672057, "percentage": 61.89, "elapsed_time": "3:30:05", "remaining_time": "2:09:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5370, "total_steps": 8660, "loss": 0.1675, "learning_rate": 1.8944130820596967e-05, "epoch": 2.4803695150115472, "percentage": 62.01, "elapsed_time": "3:30:27", "remaining_time": "2:08:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5380, "total_steps": 8660, "loss": 0.1686, "learning_rate": 1.884641184708453e-05, "epoch": 2.484988452655889, "percentage": 62.12, "elapsed_time": "3:30:50", "remaining_time": "2:08:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5390, "total_steps": 8660, "loss": 0.1695, "learning_rate": 1.874879285210992e-05, "epoch": 2.4896073903002307, "percentage": 62.24, "elapsed_time": "3:31:14", "remaining_time": "2:08:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5400, "total_steps": 8660, "loss": 0.1601, "learning_rate": 1.8651275421707835e-05, "epoch": 2.4942263279445727, "percentage": 62.36, "elapsed_time": "3:31:34", "remaining_time": "2:07:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5410, "total_steps": 8660, "loss": 0.1701, "learning_rate": 1.8553861140262824e-05, "epoch": 2.4988452655889146, "percentage": 62.47, "elapsed_time": "3:31:57", "remaining_time": "2:07:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5420, "total_steps": 8660, "loss": 0.1798, "learning_rate": 1.8456551590483596e-05, "epoch": 2.503464203233256, "percentage": 62.59, "elapsed_time": "3:32:20", "remaining_time": "2:06:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5430, "total_steps": 8660, "loss": 0.1747, "learning_rate": 1.8359348353377226e-05, "epoch": 2.508083140877598, "percentage": 62.7, "elapsed_time": "3:32:44", "remaining_time": "2:06:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5440, "total_steps": 8660, "loss": 0.1777, "learning_rate": 1.826225300822354e-05, "epoch": 2.51270207852194, "percentage": 62.82, "elapsed_time": "3:33:07", "remaining_time": "2:06:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5450, "total_steps": 8660, "loss": 0.1701, "learning_rate": 1.8165267132549398e-05, "epoch": 2.5173210161662816, "percentage": 62.93, "elapsed_time": "3:33:32", "remaining_time": "2:05:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5460, "total_steps": 8660, "loss": 0.1715, "learning_rate": 1.8068392302103075e-05, "epoch": 2.5219399538106235, "percentage": 63.05, "elapsed_time": "3:33:54", "remaining_time": "2:05:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5470, "total_steps": 8660, "loss": 0.1883, "learning_rate": 1.7971630090828713e-05, "epoch": 2.5265588914549655, "percentage": 63.16, "elapsed_time": "3:34:19", "remaining_time": "2:04:59", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5480, "total_steps": 8660, "loss": 0.178, "learning_rate": 1.7874982070840684e-05, "epoch": 2.531177829099307, "percentage": 63.28, "elapsed_time": "3:34:41", "remaining_time": "2:04:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5490, "total_steps": 8660, "loss": 0.1535, "learning_rate": 1.7778449812398078e-05, "epoch": 2.535796766743649, "percentage": 63.39, "elapsed_time": "3:35:01", "remaining_time": "2:04:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5500, "total_steps": 8660, "loss": 0.176, "learning_rate": 1.7682034883879183e-05, "epoch": 2.540415704387991, "percentage": 63.51, "elapsed_time": "3:35:23", "remaining_time": "2:03:45", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5510, "total_steps": 8660, "loss": 0.1764, "learning_rate": 1.7585738851755993e-05, "epoch": 2.5450346420323324, "percentage": 63.63, "elapsed_time": "3:35:47", "remaining_time": "2:03:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5520, "total_steps": 8660, "loss": 0.1663, "learning_rate": 1.7489563280568803e-05, "epoch": 2.5496535796766744, "percentage": 63.74, "elapsed_time": "3:36:09", "remaining_time": "2:02:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5530, "total_steps": 8660, "loss": 0.1641, "learning_rate": 1.7393509732900713e-05, "epoch": 2.5542725173210163, "percentage": 63.86, "elapsed_time": "3:36:30", "remaining_time": "2:02:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5540, "total_steps": 8660, "loss": 0.172, "learning_rate": 1.7297579769352306e-05, "epoch": 2.558891454965358, "percentage": 63.97, "elapsed_time": "3:36:50", "remaining_time": "2:02:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5550, "total_steps": 8660, "loss": 0.1528, "learning_rate": 1.7201774948516287e-05, "epoch": 2.5635103926097, "percentage": 64.09, "elapsed_time": "3:37:11", "remaining_time": "2:01:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5560, "total_steps": 8660, "loss": 0.1839, "learning_rate": 1.7106096826952094e-05, "epoch": 2.5681293302540418, "percentage": 64.2, "elapsed_time": "3:37:35", "remaining_time": "2:01:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5570, "total_steps": 8660, "loss": 0.1697, "learning_rate": 1.701054695916072e-05, "epoch": 2.5727482678983833, "percentage": 64.32, "elapsed_time": "3:37:58", "remaining_time": "2:00:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5580, "total_steps": 8660, "loss": 0.1815, "learning_rate": 1.6915126897559348e-05, "epoch": 2.5773672055427252, "percentage": 64.43, "elapsed_time": "3:38:22", "remaining_time": "2:00:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5590, "total_steps": 8660, "loss": 0.1739, "learning_rate": 1.6819838192456177e-05, "epoch": 2.581986143187067, "percentage": 64.55, "elapsed_time": "3:38:46", "remaining_time": "2:00:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5600, "total_steps": 8660, "loss": 0.1687, "learning_rate": 1.6724682392025256e-05, "epoch": 2.5866050808314087, "percentage": 64.67, "elapsed_time": "3:39:09", "remaining_time": "1:59:45", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5610, "total_steps": 8660, "loss": 0.1815, "learning_rate": 1.6629661042281287e-05, "epoch": 2.5912240184757507, "percentage": 64.78, "elapsed_time": "3:39:33", "remaining_time": "1:59:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5620, "total_steps": 8660, "loss": 0.1681, "learning_rate": 1.6534775687054544e-05, "epoch": 2.5958429561200926, "percentage": 64.9, "elapsed_time": "3:39:56", "remaining_time": "1:58:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5630, "total_steps": 8660, "loss": 0.1889, "learning_rate": 1.644002786796575e-05, "epoch": 2.600461893764434, "percentage": 65.01, "elapsed_time": "3:40:21", "remaining_time": "1:58:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5640, "total_steps": 8660, "loss": 0.1615, "learning_rate": 1.6345419124401052e-05, "epoch": 2.605080831408776, "percentage": 65.13, "elapsed_time": "3:40:46", "remaining_time": "1:58:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5650, "total_steps": 8660, "loss": 0.1781, "learning_rate": 1.6250950993487035e-05, "epoch": 2.6096997690531176, "percentage": 65.24, "elapsed_time": "3:41:10", "remaining_time": "1:57:49", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5660, "total_steps": 8660, "loss": 0.1604, "learning_rate": 1.61566250100657e-05, "epoch": 2.6143187066974596, "percentage": 65.36, "elapsed_time": "3:41:36", "remaining_time": "1:57:27", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5670, "total_steps": 8660, "loss": 0.1517, "learning_rate": 1.606244270666955e-05, "epoch": 2.6189376443418015, "percentage": 65.47, "elapsed_time": "3:42:00", "remaining_time": "1:57:04", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5680, "total_steps": 8660, "loss": 0.189, "learning_rate": 1.5968405613496722e-05, "epoch": 2.623556581986143, "percentage": 65.59, "elapsed_time": "3:42:22", "remaining_time": "1:56:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5690, "total_steps": 8660, "loss": 0.1601, "learning_rate": 1.587451525838604e-05, "epoch": 2.628175519630485, "percentage": 65.7, "elapsed_time": "3:42:45", "remaining_time": "1:56:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5700, "total_steps": 8660, "loss": 0.1784, "learning_rate": 1.5780773166792306e-05, "epoch": 2.6327944572748265, "percentage": 65.82, "elapsed_time": "3:43:10", "remaining_time": "1:55:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5710, "total_steps": 8660, "loss": 0.1636, "learning_rate": 1.5687180861761407e-05, "epoch": 2.6374133949191685, "percentage": 65.94, "elapsed_time": "3:43:33", "remaining_time": "1:55:29", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5720, "total_steps": 8660, "loss": 0.178, "learning_rate": 1.559373986390565e-05, "epoch": 2.6420323325635104, "percentage": 66.05, "elapsed_time": "3:43:57", "remaining_time": "1:55:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5730, "total_steps": 8660, "loss": 0.1838, "learning_rate": 1.5500451691379003e-05, "epoch": 2.646651270207852, "percentage": 66.17, "elapsed_time": "3:44:20", "remaining_time": "1:54:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5740, "total_steps": 8660, "loss": 0.2045, "learning_rate": 1.5407317859852466e-05, "epoch": 2.651270207852194, "percentage": 66.28, "elapsed_time": "3:44:45", "remaining_time": "1:54:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5750, "total_steps": 8660, "loss": 0.1996, "learning_rate": 1.5314339882489438e-05, "epoch": 2.655889145496536, "percentage": 66.4, "elapsed_time": "3:45:10", "remaining_time": "1:53:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5760, "total_steps": 8660, "loss": 0.1815, "learning_rate": 1.5221519269921103e-05, "epoch": 2.6605080831408774, "percentage": 66.51, "elapsed_time": "3:45:35", "remaining_time": "1:53:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5770, "total_steps": 8660, "loss": 0.1591, "learning_rate": 1.5128857530221909e-05, "epoch": 2.6651270207852193, "percentage": 66.63, "elapsed_time": "3:45:59", "remaining_time": "1:53:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5780, "total_steps": 8660, "loss": 0.1857, "learning_rate": 1.5036356168885099e-05, "epoch": 2.6697459584295613, "percentage": 66.74, "elapsed_time": "3:46:21", "remaining_time": "1:52:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5790, "total_steps": 8660, "loss": 0.1658, "learning_rate": 1.494401668879819e-05, "epoch": 2.674364896073903, "percentage": 66.86, "elapsed_time": "3:46:44", "remaining_time": "1:52:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5800, "total_steps": 8660, "loss": 0.1821, "learning_rate": 1.4851840590218572e-05, "epoch": 2.678983833718245, "percentage": 66.97, "elapsed_time": "3:47:07", "remaining_time": "1:51:59", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5810, "total_steps": 8660, "loss": 0.1547, "learning_rate": 1.4759829370749184e-05, "epoch": 2.6836027713625867, "percentage": 67.09, "elapsed_time": "3:47:29", "remaining_time": "1:51:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5820, "total_steps": 8660, "loss": 0.1863, "learning_rate": 1.4667984525314093e-05, "epoch": 2.6882217090069283, "percentage": 67.21, "elapsed_time": "3:47:52", "remaining_time": "1:51:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5830, "total_steps": 8660, "loss": 0.1863, "learning_rate": 1.4576307546134302e-05, "epoch": 2.69284064665127, "percentage": 67.32, "elapsed_time": "3:48:15", "remaining_time": "1:50:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5840, "total_steps": 8660, "loss": 0.1716, "learning_rate": 1.4484799922703435e-05, "epoch": 2.697459584295612, "percentage": 67.44, "elapsed_time": "3:48:37", "remaining_time": "1:50:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5850, "total_steps": 8660, "loss": 0.1732, "learning_rate": 1.4393463141763541e-05, "epoch": 2.7020785219399537, "percentage": 67.55, "elapsed_time": "3:49:01", "remaining_time": "1:50:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5860, "total_steps": 8660, "loss": 0.1914, "learning_rate": 1.4302298687281007e-05, "epoch": 2.7066974595842956, "percentage": 67.67, "elapsed_time": "3:49:25", "remaining_time": "1:49:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5870, "total_steps": 8660, "loss": 0.1775, "learning_rate": 1.421130804042235e-05, "epoch": 2.7113163972286376, "percentage": 67.78, "elapsed_time": "3:49:48", "remaining_time": "1:49:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5880, "total_steps": 8660, "loss": 0.1465, "learning_rate": 1.4120492679530237e-05, "epoch": 2.715935334872979, "percentage": 67.9, "elapsed_time": "3:50:10", "remaining_time": "1:48:49", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5890, "total_steps": 8660, "loss": 0.1697, "learning_rate": 1.4029854080099408e-05, "epoch": 2.720554272517321, "percentage": 68.01, "elapsed_time": "3:50:31", "remaining_time": "1:48:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5900, "total_steps": 8660, "loss": 0.1584, "learning_rate": 1.3939393714752722e-05, "epoch": 2.725173210161663, "percentage": 68.13, "elapsed_time": "3:50:52", "remaining_time": "1:48:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5910, "total_steps": 8660, "loss": 0.1614, "learning_rate": 1.384911305321726e-05, "epoch": 2.7297921478060045, "percentage": 68.24, "elapsed_time": "3:51:15", "remaining_time": "1:47:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5920, "total_steps": 8660, "loss": 0.1677, "learning_rate": 1.3759013562300393e-05, "epoch": 2.7344110854503465, "percentage": 68.36, "elapsed_time": "3:51:38", "remaining_time": "1:47:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5930, "total_steps": 8660, "loss": 0.169, "learning_rate": 1.3669096705866008e-05, "epoch": 2.7390300230946885, "percentage": 68.48, "elapsed_time": "3:52:00", "remaining_time": "1:46:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5940, "total_steps": 8660, "loss": 0.1636, "learning_rate": 1.3579363944810647e-05, "epoch": 2.74364896073903, "percentage": 68.59, "elapsed_time": "3:52:22", "remaining_time": "1:46:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5950, "total_steps": 8660, "loss": 0.1597, "learning_rate": 1.3489816737039867e-05, "epoch": 2.748267898383372, "percentage": 68.71, "elapsed_time": "3:52:45", "remaining_time": "1:46:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5960, "total_steps": 8660, "loss": 0.1715, "learning_rate": 1.3400456537444492e-05, "epoch": 2.752886836027714, "percentage": 68.82, "elapsed_time": "3:53:05", "remaining_time": "1:45:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5970, "total_steps": 8660, "loss": 0.1674, "learning_rate": 1.3311284797876971e-05, "epoch": 2.7575057736720554, "percentage": 68.94, "elapsed_time": "3:53:28", "remaining_time": "1:45:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5980, "total_steps": 8660, "loss": 0.1933, "learning_rate": 1.3222302967127809e-05, "epoch": 2.7621247113163974, "percentage": 69.05, "elapsed_time": "3:53:52", "remaining_time": "1:44:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 5990, "total_steps": 8660, "loss": 0.1483, "learning_rate": 1.313351249090205e-05, "epoch": 2.766743648960739, "percentage": 69.17, "elapsed_time": "3:54:14", "remaining_time": "1:44:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6000, "total_steps": 8660, "loss": 0.1674, "learning_rate": 1.3044914811795734e-05, "epoch": 2.771362586605081, "percentage": 69.28, "elapsed_time": "3:54:37", "remaining_time": "1:44:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6010, "total_steps": 8660, "loss": 0.1483, "learning_rate": 1.2956511369272515e-05, "epoch": 2.775981524249423, "percentage": 69.4, "elapsed_time": "3:55:00", "remaining_time": "1:43:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6020, "total_steps": 8660, "loss": 0.1847, "learning_rate": 1.2868303599640227e-05, "epoch": 2.7806004618937643, "percentage": 69.52, "elapsed_time": "3:55:22", "remaining_time": "1:43:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6030, "total_steps": 8660, "loss": 0.1801, "learning_rate": 1.2780292936027572e-05, "epoch": 2.7852193995381063, "percentage": 69.63, "elapsed_time": "3:55:45", "remaining_time": "1:42:49", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6040, "total_steps": 8660, "loss": 0.1617, "learning_rate": 1.2692480808360851e-05, "epoch": 2.789838337182448, "percentage": 69.75, "elapsed_time": "3:56:09", "remaining_time": "1:42:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6050, "total_steps": 8660, "loss": 0.165, "learning_rate": 1.2604868643340684e-05, "epoch": 2.7944572748267897, "percentage": 69.86, "elapsed_time": "3:56:31", "remaining_time": "1:42:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6060, "total_steps": 8660, "loss": 0.1602, "learning_rate": 1.251745786441888e-05, "epoch": 2.7990762124711317, "percentage": 69.98, "elapsed_time": "3:56:58", "remaining_time": "1:41:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6070, "total_steps": 8660, "loss": 0.172, "learning_rate": 1.2430249891775295e-05, "epoch": 2.803695150115473, "percentage": 70.09, "elapsed_time": "3:57:23", "remaining_time": "1:41:17", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6080, "total_steps": 8660, "loss": 0.18, "learning_rate": 1.2343246142294723e-05, "epoch": 2.808314087759815, "percentage": 70.21, "elapsed_time": "3:57:47", "remaining_time": "1:40:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6090, "total_steps": 8660, "loss": 0.1696, "learning_rate": 1.225644802954394e-05, "epoch": 2.812933025404157, "percentage": 70.32, "elapsed_time": "3:58:11", "remaining_time": "1:40:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6100, "total_steps": 8660, "loss": 0.1582, "learning_rate": 1.2169856963748675e-05, "epoch": 2.8175519630484986, "percentage": 70.44, "elapsed_time": "3:58:35", "remaining_time": "1:40:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6110, "total_steps": 8660, "loss": 0.1747, "learning_rate": 1.2083474351770724e-05, "epoch": 2.8221709006928406, "percentage": 70.55, "elapsed_time": "3:58:59", "remaining_time": "1:39:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6120, "total_steps": 8660, "loss": 0.1675, "learning_rate": 1.1997301597085123e-05, "epoch": 2.8267898383371826, "percentage": 70.67, "elapsed_time": "3:59:23", "remaining_time": "1:39:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6130, "total_steps": 8660, "loss": 0.167, "learning_rate": 1.1911340099757281e-05, "epoch": 2.831408775981524, "percentage": 70.79, "elapsed_time": "3:59:48", "remaining_time": "1:38:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6140, "total_steps": 8660, "loss": 0.1598, "learning_rate": 1.1825591256420302e-05, "epoch": 2.836027713625866, "percentage": 70.9, "elapsed_time": "4:00:12", "remaining_time": "1:38:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6150, "total_steps": 8660, "loss": 0.1741, "learning_rate": 1.174005646025223e-05, "epoch": 2.840646651270208, "percentage": 71.02, "elapsed_time": "4:00:35", "remaining_time": "1:38:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6160, "total_steps": 8660, "loss": 0.1637, "learning_rate": 1.1654737100953447e-05, "epoch": 2.8452655889145495, "percentage": 71.13, "elapsed_time": "4:00:57", "remaining_time": "1:37:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6170, "total_steps": 8660, "loss": 0.1607, "learning_rate": 1.1569634564724105e-05, "epoch": 2.8498845265588915, "percentage": 71.25, "elapsed_time": "4:01:19", "remaining_time": "1:37:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6180, "total_steps": 8660, "loss": 0.1615, "learning_rate": 1.1484750234241595e-05, "epoch": 2.8545034642032334, "percentage": 71.36, "elapsed_time": "4:01:42", "remaining_time": "1:36:59", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6190, "total_steps": 8660, "loss": 0.1499, "learning_rate": 1.1400085488638044e-05, "epoch": 2.859122401847575, "percentage": 71.48, "elapsed_time": "4:02:03", "remaining_time": "1:36:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6200, "total_steps": 8660, "loss": 0.1713, "learning_rate": 1.131564170347798e-05, "epoch": 2.863741339491917, "percentage": 71.59, "elapsed_time": "4:02:26", "remaining_time": "1:36:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6210, "total_steps": 8660, "loss": 0.1454, "learning_rate": 1.1231420250735908e-05, "epoch": 2.868360277136259, "percentage": 71.71, "elapsed_time": "4:02:50", "remaining_time": "1:35:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6220, "total_steps": 8660, "loss": 0.161, "learning_rate": 1.114742249877409e-05, "epoch": 2.8729792147806004, "percentage": 71.82, "elapsed_time": "4:03:14", "remaining_time": "1:35:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6230, "total_steps": 8660, "loss": 0.1474, "learning_rate": 1.1063649812320254e-05, "epoch": 2.8775981524249423, "percentage": 71.94, "elapsed_time": "4:03:37", "remaining_time": "1:35:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6240, "total_steps": 8660, "loss": 0.1706, "learning_rate": 1.0980103552445442e-05, "epoch": 2.8822170900692843, "percentage": 72.06, "elapsed_time": "4:04:03", "remaining_time": "1:34:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6250, "total_steps": 8660, "loss": 0.1699, "learning_rate": 1.0896785076541923e-05, "epoch": 2.886836027713626, "percentage": 72.17, "elapsed_time": "4:04:26", "remaining_time": "1:34:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6260, "total_steps": 8660, "loss": 0.1791, "learning_rate": 1.0813695738301094e-05, "epoch": 2.8914549653579678, "percentage": 72.29, "elapsed_time": "4:04:49", "remaining_time": "1:33:51", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6270, "total_steps": 8660, "loss": 0.185, "learning_rate": 1.0730836887691522e-05, "epoch": 2.8960739030023097, "percentage": 72.4, "elapsed_time": "4:05:14", "remaining_time": "1:33:28", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6280, "total_steps": 8660, "loss": 0.1712, "learning_rate": 1.0648209870936993e-05, "epoch": 2.9006928406466512, "percentage": 72.52, "elapsed_time": "4:05:38", "remaining_time": "1:33:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6290, "total_steps": 8660, "loss": 0.1835, "learning_rate": 1.0565816030494626e-05, "epoch": 2.905311778290993, "percentage": 72.63, "elapsed_time": "4:06:01", "remaining_time": "1:32:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6300, "total_steps": 8660, "loss": 0.1642, "learning_rate": 1.0483656705033107e-05, "epoch": 2.909930715935335, "percentage": 72.75, "elapsed_time": "4:06:25", "remaining_time": "1:32:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6310, "total_steps": 8660, "loss": 0.1512, "learning_rate": 1.0401733229410906e-05, "epoch": 2.9145496535796767, "percentage": 72.86, "elapsed_time": "4:06:48", "remaining_time": "1:31:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6320, "total_steps": 8660, "loss": 0.1704, "learning_rate": 1.0320046934654576e-05, "epoch": 2.9191685912240186, "percentage": 72.98, "elapsed_time": "4:07:11", "remaining_time": "1:31:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6330, "total_steps": 8660, "loss": 0.1709, "learning_rate": 1.023859914793717e-05, "epoch": 2.92378752886836, "percentage": 73.09, "elapsed_time": "4:07:35", "remaining_time": "1:31:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6340, "total_steps": 8660, "loss": 0.1717, "learning_rate": 1.0157391192556629e-05, "epoch": 2.928406466512702, "percentage": 73.21, "elapsed_time": "4:07:58", "remaining_time": "1:30:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6350, "total_steps": 8660, "loss": 0.2006, "learning_rate": 1.007642438791434e-05, "epoch": 2.9330254041570436, "percentage": 73.33, "elapsed_time": "4:08:21", "remaining_time": "1:30:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6360, "total_steps": 8660, "loss": 0.1489, "learning_rate": 9.995700049493644e-06, "epoch": 2.9376443418013856, "percentage": 73.44, "elapsed_time": "4:08:45", "remaining_time": "1:29:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6370, "total_steps": 8660, "loss": 0.1696, "learning_rate": 9.915219488838482e-06, "epoch": 2.9422632794457275, "percentage": 73.56, "elapsed_time": "4:09:11", "remaining_time": "1:29:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6380, "total_steps": 8660, "loss": 0.1531, "learning_rate": 9.834984013532116e-06, "epoch": 2.946882217090069, "percentage": 73.67, "elapsed_time": "4:09:34", "remaining_time": "1:29:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6390, "total_steps": 8660, "loss": 0.1514, "learning_rate": 9.754994927175828e-06, "epoch": 2.951501154734411, "percentage": 73.79, "elapsed_time": "4:09:59", "remaining_time": "1:28:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6400, "total_steps": 8660, "loss": 0.1765, "learning_rate": 9.67525352936781e-06, "epoch": 2.956120092378753, "percentage": 73.9, "elapsed_time": "4:10:23", "remaining_time": "1:28:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6410, "total_steps": 8660, "loss": 0.1773, "learning_rate": 9.595761115681957e-06, "epoch": 2.9607390300230945, "percentage": 74.02, "elapsed_time": "4:10:45", "remaining_time": "1:28:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6420, "total_steps": 8660, "loss": 0.1697, "learning_rate": 9.516518977646929e-06, "epoch": 2.9653579676674364, "percentage": 74.13, "elapsed_time": "4:11:09", "remaining_time": "1:27:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6430, "total_steps": 8660, "loss": 0.1603, "learning_rate": 9.43752840272506e-06, "epoch": 2.9699769053117784, "percentage": 74.25, "elapsed_time": "4:11:35", "remaining_time": "1:27:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6440, "total_steps": 8660, "loss": 0.1547, "learning_rate": 9.358790674291527e-06, "epoch": 2.97459584295612, "percentage": 74.36, "elapsed_time": "4:12:00", "remaining_time": "1:26:52", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6450, "total_steps": 8660, "loss": 0.1822, "learning_rate": 9.280307071613426e-06, "epoch": 2.979214780600462, "percentage": 74.48, "elapsed_time": "4:12:25", "remaining_time": "1:26:29", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6460, "total_steps": 8660, "loss": 0.1635, "learning_rate": 9.20207886982906e-06, "epoch": 2.983833718244804, "percentage": 74.6, "elapsed_time": "4:12:49", "remaining_time": "1:26:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6470, "total_steps": 8660, "loss": 0.1881, "learning_rate": 9.124107339927148e-06, "epoch": 2.9884526558891453, "percentage": 74.71, "elapsed_time": "4:13:14", "remaining_time": "1:25:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6480, "total_steps": 8660, "loss": 0.1551, "learning_rate": 9.046393748726245e-06, "epoch": 2.9930715935334873, "percentage": 74.83, "elapsed_time": "4:13:40", "remaining_time": "1:25:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6490, "total_steps": 8660, "loss": 0.1787, "learning_rate": 8.968939358854104e-06, "epoch": 2.9976905311778292, "percentage": 74.94, "elapsed_time": "4:14:05", "remaining_time": "1:24:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6500, "total_steps": 8660, "loss": 0.1394, "learning_rate": 8.891745428727178e-06, "epoch": 3.0023094688221708, "percentage": 75.06, "elapsed_time": "4:14:30", "remaining_time": "1:24:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6510, "total_steps": 8660, "loss": 0.1309, "learning_rate": 8.814813212530207e-06, "epoch": 3.0069284064665127, "percentage": 75.17, "elapsed_time": "4:14:56", "remaining_time": "1:24:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6520, "total_steps": 8660, "loss": 0.125, "learning_rate": 8.738143960195786e-06, "epoch": 3.0115473441108547, "percentage": 75.29, "elapsed_time": "4:15:22", "remaining_time": "1:23:49", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6530, "total_steps": 8660, "loss": 0.1168, "learning_rate": 8.66173891738411e-06, "epoch": 3.016166281755196, "percentage": 75.4, "elapsed_time": "4:15:47", "remaining_time": "1:23:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6540, "total_steps": 8660, "loss": 0.1073, "learning_rate": 8.585599325462685e-06, "epoch": 3.020785219399538, "percentage": 75.52, "elapsed_time": "4:16:11", "remaining_time": "1:23:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6550, "total_steps": 8660, "loss": 0.1334, "learning_rate": 8.509726421486205e-06, "epoch": 3.02540415704388, "percentage": 75.64, "elapsed_time": "4:16:37", "remaining_time": "1:22:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6560, "total_steps": 8660, "loss": 0.1359, "learning_rate": 8.43412143817644e-06, "epoch": 3.0300230946882216, "percentage": 75.75, "elapsed_time": "4:17:01", "remaining_time": "1:22:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6570, "total_steps": 8660, "loss": 0.1218, "learning_rate": 8.358785603902175e-06, "epoch": 3.0346420323325636, "percentage": 75.87, "elapsed_time": "4:17:26", "remaining_time": "1:21:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6580, "total_steps": 8660, "loss": 0.1311, "learning_rate": 8.283720142659283e-06, "epoch": 3.0392609699769055, "percentage": 75.98, "elapsed_time": "4:17:51", "remaining_time": "1:21:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6590, "total_steps": 8660, "loss": 0.1206, "learning_rate": 8.20892627405086e-06, "epoch": 3.043879907621247, "percentage": 76.1, "elapsed_time": "4:18:17", "remaining_time": "1:21:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6600, "total_steps": 8660, "loss": 0.1099, "learning_rate": 8.134405213267351e-06, "epoch": 3.048498845265589, "percentage": 76.21, "elapsed_time": "4:18:41", "remaining_time": "1:20:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6610, "total_steps": 8660, "loss": 0.1107, "learning_rate": 8.06015817106687e-06, "epoch": 3.0531177829099305, "percentage": 76.33, "elapsed_time": "4:19:04", "remaining_time": "1:20:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6620, "total_steps": 8660, "loss": 0.1151, "learning_rate": 7.986186353755479e-06, "epoch": 3.0577367205542725, "percentage": 76.44, "elapsed_time": "4:19:27", "remaining_time": "1:19:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6630, "total_steps": 8660, "loss": 0.1084, "learning_rate": 7.912490963167595e-06, "epoch": 3.0623556581986144, "percentage": 76.56, "elapsed_time": "4:19:50", "remaining_time": "1:19:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6640, "total_steps": 8660, "loss": 0.1234, "learning_rate": 7.83907319664652e-06, "epoch": 3.066974595842956, "percentage": 76.67, "elapsed_time": "4:20:14", "remaining_time": "1:19:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6650, "total_steps": 8660, "loss": 0.1133, "learning_rate": 7.765934247024895e-06, "epoch": 3.071593533487298, "percentage": 76.79, "elapsed_time": "4:20:38", "remaining_time": "1:18:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6660, "total_steps": 8660, "loss": 0.1122, "learning_rate": 7.6930753026054e-06, "epoch": 3.07621247113164, "percentage": 76.91, "elapsed_time": "4:21:04", "remaining_time": "1:18:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6670, "total_steps": 8660, "loss": 0.1196, "learning_rate": 7.6204975471414145e-06, "epoch": 3.0808314087759814, "percentage": 77.02, "elapsed_time": "4:21:29", "remaining_time": "1:18:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6680, "total_steps": 8660, "loss": 0.1095, "learning_rate": 7.548202159817766e-06, "epoch": 3.0854503464203233, "percentage": 77.14, "elapsed_time": "4:21:52", "remaining_time": "1:17:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6690, "total_steps": 8660, "loss": 0.1122, "learning_rate": 7.476190315231624e-06, "epoch": 3.0900692840646653, "percentage": 77.25, "elapsed_time": "4:22:16", "remaining_time": "1:17:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6700, "total_steps": 8660, "loss": 0.1179, "learning_rate": 7.4044631833733475e-06, "epoch": 3.094688221709007, "percentage": 77.37, "elapsed_time": "4:22:39", "remaining_time": "1:16:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6710, "total_steps": 8660, "loss": 0.1254, "learning_rate": 7.333021929607545e-06, "epoch": 3.0993071593533488, "percentage": 77.48, "elapsed_time": "4:23:02", "remaining_time": "1:16:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6720, "total_steps": 8660, "loss": 0.1329, "learning_rate": 7.261867714654092e-06, "epoch": 3.1039260969976907, "percentage": 77.6, "elapsed_time": "4:23:25", "remaining_time": "1:16:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6730, "total_steps": 8660, "loss": 0.1096, "learning_rate": 7.191001694569277e-06, "epoch": 3.1085450346420322, "percentage": 77.71, "elapsed_time": "4:23:49", "remaining_time": "1:15:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6740, "total_steps": 8660, "loss": 0.1194, "learning_rate": 7.120425020727067e-06, "epoch": 3.113163972286374, "percentage": 77.83, "elapsed_time": "4:24:13", "remaining_time": "1:15:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6750, "total_steps": 8660, "loss": 0.1181, "learning_rate": 7.050138839800332e-06, "epoch": 3.1177829099307157, "percentage": 77.94, "elapsed_time": "4:24:38", "remaining_time": "1:14:52", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6760, "total_steps": 8660, "loss": 0.1092, "learning_rate": 6.980144293742252e-06, "epoch": 3.1224018475750577, "percentage": 78.06, "elapsed_time": "4:25:00", "remaining_time": "1:14:29", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6770, "total_steps": 8660, "loss": 0.1139, "learning_rate": 6.910442519767782e-06, "epoch": 3.1270207852193996, "percentage": 78.18, "elapsed_time": "4:25:25", "remaining_time": "1:14:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6780, "total_steps": 8660, "loss": 0.1232, "learning_rate": 6.841034650335115e-06, "epoch": 3.131639722863741, "percentage": 78.29, "elapsed_time": "4:25:50", "remaining_time": "1:13:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6790, "total_steps": 8660, "loss": 0.1212, "learning_rate": 6.7719218131273494e-06, "epoch": 3.136258660508083, "percentage": 78.41, "elapsed_time": "4:26:13", "remaining_time": "1:13:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6800, "total_steps": 8660, "loss": 0.1321, "learning_rate": 6.70310513103414e-06, "epoch": 3.140877598152425, "percentage": 78.52, "elapsed_time": "4:26:38", "remaining_time": "1:12:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6810, "total_steps": 8660, "loss": 0.1097, "learning_rate": 6.634585722133421e-06, "epoch": 3.1454965357967666, "percentage": 78.64, "elapsed_time": "4:27:01", "remaining_time": "1:12:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6820, "total_steps": 8660, "loss": 0.1308, "learning_rate": 6.566364699673311e-06, "epoch": 3.1501154734411085, "percentage": 78.75, "elapsed_time": "4:27:25", "remaining_time": "1:12:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6830, "total_steps": 8660, "loss": 0.1113, "learning_rate": 6.49844317205395e-06, "epoch": 3.1547344110854505, "percentage": 78.87, "elapsed_time": "4:27:48", "remaining_time": "1:11:45", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6840, "total_steps": 8660, "loss": 0.1219, "learning_rate": 6.430822242809559e-06, "epoch": 3.159353348729792, "percentage": 78.98, "elapsed_time": "4:28:11", "remaining_time": "1:11:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6850, "total_steps": 8660, "loss": 0.114, "learning_rate": 6.363503010590455e-06, "epoch": 3.163972286374134, "percentage": 79.1, "elapsed_time": "4:28:34", "remaining_time": "1:10:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6860, "total_steps": 8660, "loss": 0.1018, "learning_rate": 6.296486569145229e-06, "epoch": 3.168591224018476, "percentage": 79.21, "elapsed_time": "4:28:57", "remaining_time": "1:10:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6870, "total_steps": 8660, "loss": 0.1324, "learning_rate": 6.229774007302991e-06, "epoch": 3.1732101616628174, "percentage": 79.33, "elapsed_time": "4:29:20", "remaining_time": "1:10:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6880, "total_steps": 8660, "loss": 0.0978, "learning_rate": 6.16336640895564e-06, "epoch": 3.1778290993071594, "percentage": 79.45, "elapsed_time": "4:29:42", "remaining_time": "1:09:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6890, "total_steps": 8660, "loss": 0.1185, "learning_rate": 6.097264853040272e-06, "epoch": 3.1824480369515014, "percentage": 79.56, "elapsed_time": "4:30:06", "remaining_time": "1:09:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6900, "total_steps": 8660, "loss": 0.1048, "learning_rate": 6.03147041352167e-06, "epoch": 3.187066974595843, "percentage": 79.68, "elapsed_time": "4:30:28", "remaining_time": "1:08:59", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6910, "total_steps": 8660, "loss": 0.1302, "learning_rate": 5.96598415937483e-06, "epoch": 3.191685912240185, "percentage": 79.79, "elapsed_time": "4:30:52", "remaining_time": "1:08:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6920, "total_steps": 8660, "loss": 0.1171, "learning_rate": 5.900807154567589e-06, "epoch": 3.196304849884527, "percentage": 79.91, "elapsed_time": "4:31:14", "remaining_time": "1:08:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6930, "total_steps": 8660, "loss": 0.1136, "learning_rate": 5.8359404580433735e-06, "epoch": 3.2009237875288683, "percentage": 80.02, "elapsed_time": "4:31:36", "remaining_time": "1:07:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6940, "total_steps": 8660, "loss": 0.1107, "learning_rate": 5.77138512370394e-06, "epoch": 3.2055427251732103, "percentage": 80.14, "elapsed_time": "4:31:58", "remaining_time": "1:07:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6950, "total_steps": 8660, "loss": 0.0991, "learning_rate": 5.707142200392321e-06, "epoch": 3.2101616628175518, "percentage": 80.25, "elapsed_time": "4:32:20", "remaining_time": "1:07:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6960, "total_steps": 8660, "loss": 0.1272, "learning_rate": 5.643212731875705e-06, "epoch": 3.2147806004618937, "percentage": 80.37, "elapsed_time": "4:32:42", "remaining_time": "1:06:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6970, "total_steps": 8660, "loss": 0.1155, "learning_rate": 5.579597756828564e-06, "epoch": 3.2193995381062357, "percentage": 80.48, "elapsed_time": "4:33:03", "remaining_time": "1:06:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6980, "total_steps": 8660, "loss": 0.1097, "learning_rate": 5.5162983088156955e-06, "epoch": 3.224018475750577, "percentage": 80.6, "elapsed_time": "4:33:23", "remaining_time": "1:05:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 6990, "total_steps": 8660, "loss": 0.1133, "learning_rate": 5.453315416275481e-06, "epoch": 3.228637413394919, "percentage": 80.72, "elapsed_time": "4:33:46", "remaining_time": "1:05:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7000, "total_steps": 8660, "loss": 0.1239, "learning_rate": 5.390650102503184e-06, "epoch": 3.233256351039261, "percentage": 80.83, "elapsed_time": "4:34:10", "remaining_time": "1:05:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7010, "total_steps": 8660, "loss": 0.1136, "learning_rate": 5.328303385634264e-06, "epoch": 3.2378752886836026, "percentage": 80.95, "elapsed_time": "4:34:32", "remaining_time": "1:04:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7020, "total_steps": 8660, "loss": 0.1139, "learning_rate": 5.266276278627919e-06, "epoch": 3.2424942263279446, "percentage": 81.06, "elapsed_time": "4:34:54", "remaining_time": "1:04:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7030, "total_steps": 8660, "loss": 0.1028, "learning_rate": 5.204569789250546e-06, "epoch": 3.2471131639722866, "percentage": 81.18, "elapsed_time": "4:35:15", "remaining_time": "1:03:49", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7040, "total_steps": 8660, "loss": 0.1097, "learning_rate": 5.143184920059429e-06, "epoch": 3.251732101616628, "percentage": 81.29, "elapsed_time": "4:35:36", "remaining_time": "1:03:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7050, "total_steps": 8660, "loss": 0.1146, "learning_rate": 5.082122668386427e-06, "epoch": 3.25635103926097, "percentage": 81.41, "elapsed_time": "4:35:58", "remaining_time": "1:03:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7060, "total_steps": 8660, "loss": 0.1203, "learning_rate": 5.021384026321757e-06, "epoch": 3.2609699769053115, "percentage": 81.52, "elapsed_time": "4:36:21", "remaining_time": "1:02:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7070, "total_steps": 8660, "loss": 0.122, "learning_rate": 4.960969980697883e-06, "epoch": 3.2655889145496535, "percentage": 81.64, "elapsed_time": "4:36:43", "remaining_time": "1:02:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7080, "total_steps": 8660, "loss": 0.1053, "learning_rate": 4.90088151307351e-06, "epoch": 3.2702078521939955, "percentage": 81.76, "elapsed_time": "4:37:05", "remaining_time": "1:01:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7090, "total_steps": 8660, "loss": 0.1197, "learning_rate": 4.841119599717594e-06, "epoch": 3.274826789838337, "percentage": 81.87, "elapsed_time": "4:37:27", "remaining_time": "1:01:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7100, "total_steps": 8660, "loss": 0.1017, "learning_rate": 4.781685211593523e-06, "epoch": 3.279445727482679, "percentage": 81.99, "elapsed_time": "4:37:49", "remaining_time": "1:01:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7110, "total_steps": 8660, "loss": 0.1108, "learning_rate": 4.722579314343292e-06, "epoch": 3.284064665127021, "percentage": 82.1, "elapsed_time": "4:38:13", "remaining_time": "1:00:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7120, "total_steps": 8660, "loss": 0.1274, "learning_rate": 4.663802868271852e-06, "epoch": 3.2886836027713624, "percentage": 82.22, "elapsed_time": "4:38:37", "remaining_time": "1:00:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7130, "total_steps": 8660, "loss": 0.1037, "learning_rate": 4.605356828331511e-06, "epoch": 3.2933025404157044, "percentage": 82.33, "elapsed_time": "4:39:00", "remaining_time": "0:59:52", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7140, "total_steps": 8660, "loss": 0.1125, "learning_rate": 4.547242144106381e-06, "epoch": 3.2979214780600463, "percentage": 82.45, "elapsed_time": "4:39:22", "remaining_time": "0:59:28", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7150, "total_steps": 8660, "loss": 0.1108, "learning_rate": 4.489459759796988e-06, "epoch": 3.302540415704388, "percentage": 82.56, "elapsed_time": "4:39:46", "remaining_time": "0:59:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7160, "total_steps": 8660, "loss": 0.1074, "learning_rate": 4.432010614204926e-06, "epoch": 3.30715935334873, "percentage": 82.68, "elapsed_time": "4:40:10", "remaining_time": "0:58:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7170, "total_steps": 8660, "loss": 0.1169, "learning_rate": 4.374895640717569e-06, "epoch": 3.3117782909930717, "percentage": 82.79, "elapsed_time": "4:40:34", "remaining_time": "0:58:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7180, "total_steps": 8660, "loss": 0.1181, "learning_rate": 4.318115767292958e-06, "epoch": 3.3163972286374133, "percentage": 82.91, "elapsed_time": "4:40:57", "remaining_time": "0:57:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7190, "total_steps": 8660, "loss": 0.1279, "learning_rate": 4.261671916444679e-06, "epoch": 3.321016166281755, "percentage": 83.03, "elapsed_time": "4:41:22", "remaining_time": "0:57:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7200, "total_steps": 8660, "loss": 0.1212, "learning_rate": 4.2055650052269e-06, "epoch": 3.325635103926097, "percentage": 83.14, "elapsed_time": "4:41:46", "remaining_time": "0:57:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7210, "total_steps": 8660, "loss": 0.1183, "learning_rate": 4.149795945219476e-06, "epoch": 3.3302540415704387, "percentage": 83.26, "elapsed_time": "4:42:11", "remaining_time": "0:56:45", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7220, "total_steps": 8660, "loss": 0.1154, "learning_rate": 4.0943656425131095e-06, "epoch": 3.3348729792147807, "percentage": 83.37, "elapsed_time": "4:42:36", "remaining_time": "0:56:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7230, "total_steps": 8660, "loss": 0.1354, "learning_rate": 4.03927499769467e-06, "epoch": 3.3394919168591226, "percentage": 83.49, "elapsed_time": "4:43:02", "remaining_time": "0:55:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7240, "total_steps": 8660, "loss": 0.099, "learning_rate": 3.9845249058325205e-06, "epoch": 3.344110854503464, "percentage": 83.6, "elapsed_time": "4:43:27", "remaining_time": "0:55:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7250, "total_steps": 8660, "loss": 0.1104, "learning_rate": 3.930116256461997e-06, "epoch": 3.348729792147806, "percentage": 83.72, "elapsed_time": "4:43:51", "remaining_time": "0:55:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7260, "total_steps": 8660, "loss": 0.1251, "learning_rate": 3.876049933570966e-06, "epoch": 3.353348729792148, "percentage": 83.83, "elapsed_time": "4:44:15", "remaining_time": "0:54:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7270, "total_steps": 8660, "loss": 0.1338, "learning_rate": 3.822326815585428e-06, "epoch": 3.3579676674364896, "percentage": 83.95, "elapsed_time": "4:44:39", "remaining_time": "0:54:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7280, "total_steps": 8660, "loss": 0.0994, "learning_rate": 3.7689477753552916e-06, "epoch": 3.3625866050808315, "percentage": 84.06, "elapsed_time": "4:45:03", "remaining_time": "0:54:02", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7290, "total_steps": 8660, "loss": 0.1108, "learning_rate": 3.715913680140154e-06, "epoch": 3.367205542725173, "percentage": 84.18, "elapsed_time": "4:45:29", "remaining_time": "0:53:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7300, "total_steps": 8660, "loss": 0.118, "learning_rate": 3.663225391595218e-06, "epoch": 3.371824480369515, "percentage": 84.3, "elapsed_time": "4:45:53", "remaining_time": "0:53:15", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7310, "total_steps": 8660, "loss": 0.1069, "learning_rate": 3.6108837657573198e-06, "epoch": 3.376443418013857, "percentage": 84.41, "elapsed_time": "4:46:18", "remaining_time": "0:52:52", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7320, "total_steps": 8660, "loss": 0.1191, "learning_rate": 3.558889653030975e-06, "epoch": 3.3810623556581985, "percentage": 84.53, "elapsed_time": "4:46:41", "remaining_time": "0:52:28", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7330, "total_steps": 8660, "loss": 0.0935, "learning_rate": 3.507243898174592e-06, "epoch": 3.3856812933025404, "percentage": 84.64, "elapsed_time": "4:47:05", "remaining_time": "0:52:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7340, "total_steps": 8660, "loss": 0.1035, "learning_rate": 3.455947340286761e-06, "epoch": 3.3903002309468824, "percentage": 84.76, "elapsed_time": "4:47:29", "remaining_time": "0:51:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7350, "total_steps": 8660, "loss": 0.1104, "learning_rate": 3.405000812792572e-06, "epoch": 3.394919168591224, "percentage": 84.87, "elapsed_time": "4:47:53", "remaining_time": "0:51:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7360, "total_steps": 8660, "loss": 0.1117, "learning_rate": 3.3544051434301336e-06, "epoch": 3.399538106235566, "percentage": 84.99, "elapsed_time": "4:48:17", "remaining_time": "0:50:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7370, "total_steps": 8660, "loss": 0.1204, "learning_rate": 3.3041611542370705e-06, "epoch": 3.404157043879908, "percentage": 85.1, "elapsed_time": "4:48:41", "remaining_time": "0:50:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7380, "total_steps": 8660, "loss": 0.1054, "learning_rate": 3.2542696615371987e-06, "epoch": 3.4087759815242493, "percentage": 85.22, "elapsed_time": "4:49:05", "remaining_time": "0:50:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7390, "total_steps": 8660, "loss": 0.1135, "learning_rate": 3.2047314759272583e-06, "epoch": 3.4133949191685913, "percentage": 85.33, "elapsed_time": "4:49:29", "remaining_time": "0:49:45", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7400, "total_steps": 8660, "loss": 0.123, "learning_rate": 3.1555474022637448e-06, "epoch": 3.418013856812933, "percentage": 85.45, "elapsed_time": "4:49:52", "remaining_time": "0:49:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7410, "total_steps": 8660, "loss": 0.1158, "learning_rate": 3.1067182396498166e-06, "epoch": 3.4226327944572748, "percentage": 85.57, "elapsed_time": "4:50:16", "remaining_time": "0:48:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7420, "total_steps": 8660, "loss": 0.1064, "learning_rate": 3.05824478142234e-06, "epoch": 3.4272517321016167, "percentage": 85.68, "elapsed_time": "4:50:39", "remaining_time": "0:48:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7430, "total_steps": 8660, "loss": 0.1032, "learning_rate": 3.010127815138958e-06, "epoch": 3.4318706697459582, "percentage": 85.8, "elapsed_time": "4:51:02", "remaining_time": "0:48:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7440, "total_steps": 8660, "loss": 0.1121, "learning_rate": 2.962368122565351e-06, "epoch": 3.4364896073903, "percentage": 85.91, "elapsed_time": "4:51:26", "remaining_time": "0:47:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7450, "total_steps": 8660, "loss": 0.1208, "learning_rate": 2.9149664796624815e-06, "epoch": 3.441108545034642, "percentage": 86.03, "elapsed_time": "4:51:51", "remaining_time": "0:47:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7460, "total_steps": 8660, "loss": 0.131, "learning_rate": 2.867923656574012e-06, "epoch": 3.4457274826789837, "percentage": 86.14, "elapsed_time": "4:52:14", "remaining_time": "0:47:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7470, "total_steps": 8660, "loss": 0.1318, "learning_rate": 2.8212404176138017e-06, "epoch": 3.4503464203233256, "percentage": 86.26, "elapsed_time": "4:52:41", "remaining_time": "0:46:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7480, "total_steps": 8660, "loss": 0.1202, "learning_rate": 2.7749175212534672e-06, "epoch": 3.4549653579676676, "percentage": 86.37, "elapsed_time": "4:53:05", "remaining_time": "0:46:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7490, "total_steps": 8660, "loss": 0.1133, "learning_rate": 2.728955720110077e-06, "epoch": 3.459584295612009, "percentage": 86.49, "elapsed_time": "4:53:29", "remaining_time": "0:45:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7500, "total_steps": 8660, "loss": 0.1129, "learning_rate": 2.6833557609339005e-06, "epoch": 3.464203233256351, "percentage": 86.61, "elapsed_time": "4:53:52", "remaining_time": "0:45:27", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7510, "total_steps": 8660, "loss": 0.1166, "learning_rate": 2.6381183845963137e-06, "epoch": 3.468822170900693, "percentage": 86.72, "elapsed_time": "4:54:16", "remaining_time": "0:45:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7520, "total_steps": 8660, "loss": 0.12, "learning_rate": 2.5932443260777124e-06, "epoch": 3.4734411085450345, "percentage": 86.84, "elapsed_time": "4:54:40", "remaining_time": "0:44:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7530, "total_steps": 8660, "loss": 0.1178, "learning_rate": 2.5487343144556254e-06, "epoch": 3.4780600461893765, "percentage": 86.95, "elapsed_time": "4:55:02", "remaining_time": "0:44:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7540, "total_steps": 8660, "loss": 0.119, "learning_rate": 2.504589072892813e-06, "epoch": 3.4826789838337184, "percentage": 87.07, "elapsed_time": "4:55:27", "remaining_time": "0:43:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7550, "total_steps": 8660, "loss": 0.1056, "learning_rate": 2.4608093186255706e-06, "epoch": 3.48729792147806, "percentage": 87.18, "elapsed_time": "4:55:51", "remaining_time": "0:43:29", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7560, "total_steps": 8660, "loss": 0.1184, "learning_rate": 2.417395762952032e-06, "epoch": 3.491916859122402, "percentage": 87.3, "elapsed_time": "4:56:15", "remaining_time": "0:43:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7570, "total_steps": 8660, "loss": 0.1171, "learning_rate": 2.3743491112206472e-06, "epoch": 3.496535796766744, "percentage": 87.41, "elapsed_time": "4:56:39", "remaining_time": "0:42:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7580, "total_steps": 8660, "loss": 0.1068, "learning_rate": 2.3316700628186943e-06, "epoch": 3.5011547344110854, "percentage": 87.53, "elapsed_time": "4:57:02", "remaining_time": "0:42:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7590, "total_steps": 8660, "loss": 0.1066, "learning_rate": 2.2893593111609324e-06, "epoch": 3.5057736720554273, "percentage": 87.64, "elapsed_time": "4:57:24", "remaining_time": "0:41:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7600, "total_steps": 8660, "loss": 0.1129, "learning_rate": 2.2474175436783375e-06, "epoch": 3.5103926096997693, "percentage": 87.76, "elapsed_time": "4:57:45", "remaining_time": "0:41:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7610, "total_steps": 8660, "loss": 0.1303, "learning_rate": 2.2058454418069167e-06, "epoch": 3.515011547344111, "percentage": 87.88, "elapsed_time": "4:58:08", "remaining_time": "0:41:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7620, "total_steps": 8660, "loss": 0.101, "learning_rate": 2.1646436809766663e-06, "epoch": 3.5196304849884528, "percentage": 87.99, "elapsed_time": "4:58:31", "remaining_time": "0:40:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7630, "total_steps": 8660, "loss": 0.1146, "learning_rate": 2.1238129306005603e-06, "epoch": 3.5242494226327947, "percentage": 88.11, "elapsed_time": "4:58:55", "remaining_time": "0:40:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7640, "total_steps": 8660, "loss": 0.1286, "learning_rate": 2.083353854063708e-06, "epoch": 3.5288683602771362, "percentage": 88.22, "elapsed_time": "4:59:19", "remaining_time": "0:39:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7650, "total_steps": 8660, "loss": 0.1053, "learning_rate": 2.04326710871256e-06, "epoch": 3.533487297921478, "percentage": 88.34, "elapsed_time": "4:59:43", "remaining_time": "0:39:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7660, "total_steps": 8660, "loss": 0.1226, "learning_rate": 2.0035533458442253e-06, "epoch": 3.5381062355658197, "percentage": 88.45, "elapsed_time": "5:00:05", "remaining_time": "0:39:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7670, "total_steps": 8660, "loss": 0.1168, "learning_rate": 1.9642132106958867e-06, "epoch": 3.5427251732101617, "percentage": 88.57, "elapsed_time": "5:00:28", "remaining_time": "0:38:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7680, "total_steps": 8660, "loss": 0.1142, "learning_rate": 1.92524734243435e-06, "epoch": 3.5473441108545036, "percentage": 88.68, "elapsed_time": "5:00:52", "remaining_time": "0:38:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7690, "total_steps": 8660, "loss": 0.107, "learning_rate": 1.8866563741456067e-06, "epoch": 3.551963048498845, "percentage": 88.8, "elapsed_time": "5:01:16", "remaining_time": "0:38:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7700, "total_steps": 8660, "loss": 0.1253, "learning_rate": 1.8484409328245967e-06, "epoch": 3.556581986143187, "percentage": 88.91, "elapsed_time": "5:01:40", "remaining_time": "0:37:36", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7710, "total_steps": 8660, "loss": 0.1237, "learning_rate": 1.8106016393649895e-06, "epoch": 3.5612009237875286, "percentage": 89.03, "elapsed_time": "5:02:05", "remaining_time": "0:37:13", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7720, "total_steps": 8660, "loss": 0.1013, "learning_rate": 1.7731391085491046e-06, "epoch": 3.5658198614318706, "percentage": 89.15, "elapsed_time": "5:02:31", "remaining_time": "0:36:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7730, "total_steps": 8660, "loss": 0.1015, "learning_rate": 1.7360539490379413e-06, "epoch": 3.5704387990762125, "percentage": 89.26, "elapsed_time": "5:02:54", "remaining_time": "0:36:26", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7740, "total_steps": 8660, "loss": 0.1089, "learning_rate": 1.6993467633612565e-06, "epoch": 3.575057736720554, "percentage": 89.38, "elapsed_time": "5:03:18", "remaining_time": "0:36:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7750, "total_steps": 8660, "loss": 0.1147, "learning_rate": 1.6630181479078049e-06, "epoch": 3.579676674364896, "percentage": 89.49, "elapsed_time": "5:03:40", "remaining_time": "0:35:39", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7760, "total_steps": 8660, "loss": 0.1195, "learning_rate": 1.6270686929156314e-06, "epoch": 3.584295612009238, "percentage": 89.61, "elapsed_time": "5:04:05", "remaining_time": "0:35:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7770, "total_steps": 8660, "loss": 0.1171, "learning_rate": 1.5914989824624887e-06, "epoch": 3.5889145496535795, "percentage": 89.72, "elapsed_time": "5:04:29", "remaining_time": "0:34:52", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7780, "total_steps": 8660, "loss": 0.1124, "learning_rate": 1.5563095944563567e-06, "epoch": 3.5935334872979214, "percentage": 89.84, "elapsed_time": "5:04:50", "remaining_time": "0:34:28", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7790, "total_steps": 8660, "loss": 0.1159, "learning_rate": 1.5215011006260244e-06, "epoch": 3.5981524249422634, "percentage": 89.95, "elapsed_time": "5:05:12", "remaining_time": "0:34:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7800, "total_steps": 8660, "loss": 0.1239, "learning_rate": 1.487074066511826e-06, "epoch": 3.602771362586605, "percentage": 90.07, "elapsed_time": "5:05:37", "remaining_time": "0:33:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7810, "total_steps": 8660, "loss": 0.1207, "learning_rate": 1.4530290514564588e-06, "epoch": 3.607390300230947, "percentage": 90.18, "elapsed_time": "5:06:00", "remaining_time": "0:33:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7820, "total_steps": 8660, "loss": 0.114, "learning_rate": 1.4193666085958624e-06, "epoch": 3.612009237875289, "percentage": 90.3, "elapsed_time": "5:06:22", "remaining_time": "0:32:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7830, "total_steps": 8660, "loss": 0.1053, "learning_rate": 1.3860872848502682e-06, "epoch": 3.6166281755196303, "percentage": 90.42, "elapsed_time": "5:06:47", "remaining_time": "0:32:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7840, "total_steps": 8660, "loss": 0.1045, "learning_rate": 1.3531916209152895e-06, "epoch": 3.6212471131639723, "percentage": 90.53, "elapsed_time": "5:07:10", "remaining_time": "0:32:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7850, "total_steps": 8660, "loss": 0.1097, "learning_rate": 1.3206801512531396e-06, "epoch": 3.6258660508083143, "percentage": 90.65, "elapsed_time": "5:07:34", "remaining_time": "0:31:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7860, "total_steps": 8660, "loss": 0.1089, "learning_rate": 1.2885534040839698e-06, "epoch": 3.6304849884526558, "percentage": 90.76, "elapsed_time": "5:07:57", "remaining_time": "0:31:20", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7870, "total_steps": 8660, "loss": 0.1057, "learning_rate": 1.2568119013772471e-06, "epoch": 3.6351039260969977, "percentage": 90.88, "elapsed_time": "5:08:19", "remaining_time": "0:30:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7880, "total_steps": 8660, "loss": 0.1144, "learning_rate": 1.2254561588433189e-06, "epoch": 3.6397228637413397, "percentage": 90.99, "elapsed_time": "5:08:43", "remaining_time": "0:30:33", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7890, "total_steps": 8660, "loss": 0.1169, "learning_rate": 1.194486685925006e-06, "epoch": 3.644341801385681, "percentage": 91.11, "elapsed_time": "5:09:05", "remaining_time": "0:30:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7900, "total_steps": 8660, "loss": 0.1302, "learning_rate": 1.1639039857893192e-06, "epoch": 3.648960739030023, "percentage": 91.22, "elapsed_time": "5:09:28", "remaining_time": "0:29:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7910, "total_steps": 8660, "loss": 0.1138, "learning_rate": 1.1337085553193156e-06, "epoch": 3.653579676674365, "percentage": 91.34, "elapsed_time": "5:09:49", "remaining_time": "0:29:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7920, "total_steps": 8660, "loss": 0.1255, "learning_rate": 1.1039008851059963e-06, "epoch": 3.6581986143187066, "percentage": 91.45, "elapsed_time": "5:10:12", "remaining_time": "0:28:59", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7930, "total_steps": 8660, "loss": 0.1274, "learning_rate": 1.0744814594403462e-06, "epoch": 3.6628175519630486, "percentage": 91.57, "elapsed_time": "5:10:37", "remaining_time": "0:28:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7940, "total_steps": 8660, "loss": 0.1353, "learning_rate": 1.0454507563054745e-06, "epoch": 3.6674364896073905, "percentage": 91.69, "elapsed_time": "5:11:01", "remaining_time": "0:28:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7950, "total_steps": 8660, "loss": 0.1143, "learning_rate": 1.0168092473688245e-06, "epoch": 3.672055427251732, "percentage": 91.8, "elapsed_time": "5:11:25", "remaining_time": "0:27:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7960, "total_steps": 8660, "loss": 0.1233, "learning_rate": 9.88557397974546e-07, "epoch": 3.676674364896074, "percentage": 91.92, "elapsed_time": "5:11:48", "remaining_time": "0:27:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7970, "total_steps": 8660, "loss": 0.1111, "learning_rate": 9.606956671358997e-07, "epoch": 3.681293302540416, "percentage": 92.03, "elapsed_time": "5:12:12", "remaining_time": "0:27:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7980, "total_steps": 8660, "loss": 0.1134, "learning_rate": 9.332245075278162e-07, "epoch": 3.6859122401847575, "percentage": 92.15, "elapsed_time": "5:12:35", "remaining_time": "0:26:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 7990, "total_steps": 8660, "loss": 0.1182, "learning_rate": 9.061443654795498e-07, "epoch": 3.6905311778290995, "percentage": 92.26, "elapsed_time": "5:12:58", "remaining_time": "0:26:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8000, "total_steps": 8660, "loss": 0.1244, "learning_rate": 8.794556809674087e-07, "epoch": 3.695150115473441, "percentage": 92.38, "elapsed_time": "5:13:22", "remaining_time": "0:25:51", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8010, "total_steps": 8660, "loss": 0.1286, "learning_rate": 8.531588876076108e-07, "epoch": 3.699769053117783, "percentage": 92.49, "elapsed_time": "5:13:47", "remaining_time": "0:25:27", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8020, "total_steps": 8660, "loss": 0.1112, "learning_rate": 8.272544126492532e-07, "epoch": 3.7043879907621244, "percentage": 92.61, "elapsed_time": "5:14:12", "remaining_time": "0:25:04", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8030, "total_steps": 8660, "loss": 0.1053, "learning_rate": 8.017426769673514e-07, "epoch": 3.7090069284064664, "percentage": 92.73, "elapsed_time": "5:14:36", "remaining_time": "0:24:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8040, "total_steps": 8660, "loss": 0.1185, "learning_rate": 7.766240950560167e-07, "epoch": 3.7136258660508084, "percentage": 92.84, "elapsed_time": "5:15:00", "remaining_time": "0:24:17", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8050, "total_steps": 8660, "loss": 0.126, "learning_rate": 7.518990750217058e-07, "epoch": 3.71824480369515, "percentage": 92.96, "elapsed_time": "5:15:24", "remaining_time": "0:23:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8060, "total_steps": 8660, "loss": 0.1179, "learning_rate": 7.275680185766132e-07, "epoch": 3.722863741339492, "percentage": 93.07, "elapsed_time": "5:15:47", "remaining_time": "0:23:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8070, "total_steps": 8660, "loss": 0.1194, "learning_rate": 7.036313210321166e-07, "epoch": 3.727482678983834, "percentage": 93.19, "elapsed_time": "5:16:11", "remaining_time": "0:23:07", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8080, "total_steps": 8660, "loss": 0.1255, "learning_rate": 6.800893712923723e-07, "epoch": 3.7321016166281753, "percentage": 93.3, "elapsed_time": "5:16:34", "remaining_time": "0:22:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8090, "total_steps": 8660, "loss": 0.1284, "learning_rate": 6.569425518479999e-07, "epoch": 3.7367205542725173, "percentage": 93.42, "elapsed_time": "5:16:57", "remaining_time": "0:22:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8100, "total_steps": 8660, "loss": 0.1257, "learning_rate": 6.341912387698546e-07, "epoch": 3.741339491916859, "percentage": 93.53, "elapsed_time": "5:17:20", "remaining_time": "0:21:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8110, "total_steps": 8660, "loss": 0.1179, "learning_rate": 6.118358017029175e-07, "epoch": 3.7459584295612007, "percentage": 93.65, "elapsed_time": "5:17:44", "remaining_time": "0:21:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8120, "total_steps": 8660, "loss": 0.12, "learning_rate": 5.898766038603093e-07, "epoch": 3.7505773672055427, "percentage": 93.76, "elapsed_time": "5:18:09", "remaining_time": "0:21:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8130, "total_steps": 8660, "loss": 0.1082, "learning_rate": 5.683140020173616e-07, "epoch": 3.7551963048498846, "percentage": 93.88, "elapsed_time": "5:18:34", "remaining_time": "0:20:46", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8140, "total_steps": 8660, "loss": 0.0956, "learning_rate": 5.471483465058463e-07, "epoch": 3.759815242494226, "percentage": 94.0, "elapsed_time": "5:18:58", "remaining_time": "0:20:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8150, "total_steps": 8660, "loss": 0.1136, "learning_rate": 5.263799812082637e-07, "epoch": 3.764434180138568, "percentage": 94.11, "elapsed_time": "5:19:24", "remaining_time": "0:19:59", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8160, "total_steps": 8660, "loss": 0.1005, "learning_rate": 5.060092435522607e-07, "epoch": 3.76905311778291, "percentage": 94.23, "elapsed_time": "5:19:48", "remaining_time": "0:19:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8170, "total_steps": 8660, "loss": 0.1059, "learning_rate": 4.860364645051602e-07, "epoch": 3.7736720554272516, "percentage": 94.34, "elapsed_time": "5:20:12", "remaining_time": "0:19:12", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8180, "total_steps": 8660, "loss": 0.1245, "learning_rate": 4.664619685685656e-07, "epoch": 3.7782909930715936, "percentage": 94.46, "elapsed_time": "5:20:37", "remaining_time": "0:18:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8190, "total_steps": 8660, "loss": 0.1069, "learning_rate": 4.4728607377310383e-07, "epoch": 3.7829099307159355, "percentage": 94.57, "elapsed_time": "5:21:01", "remaining_time": "0:18:25", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8200, "total_steps": 8660, "loss": 0.109, "learning_rate": 4.2850909167324873e-07, "epoch": 3.787528868360277, "percentage": 94.69, "elapsed_time": "5:21:26", "remaining_time": "0:18:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8210, "total_steps": 8660, "loss": 0.0977, "learning_rate": 4.1013132734225857e-07, "epoch": 3.792147806004619, "percentage": 94.8, "elapsed_time": "5:21:50", "remaining_time": "0:17:38", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8220, "total_steps": 8660, "loss": 0.1209, "learning_rate": 3.921530793672329e-07, "epoch": 3.796766743648961, "percentage": 94.92, "elapsed_time": "5:22:15", "remaining_time": "0:17:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8230, "total_steps": 8660, "loss": 0.1171, "learning_rate": 3.745746398442468e-07, "epoch": 3.8013856812933025, "percentage": 95.03, "elapsed_time": "5:22:39", "remaining_time": "0:16:51", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8240, "total_steps": 8660, "loss": 0.1179, "learning_rate": 3.5739629437360475e-07, "epoch": 3.8060046189376444, "percentage": 95.15, "elapsed_time": "5:23:04", "remaining_time": "0:16:28", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8250, "total_steps": 8660, "loss": 0.1231, "learning_rate": 3.406183220552167e-07, "epoch": 3.8106235565819864, "percentage": 95.27, "elapsed_time": "5:23:28", "remaining_time": "0:16:04", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8260, "total_steps": 8660, "loss": 0.1369, "learning_rate": 3.242409954840403e-07, "epoch": 3.815242494226328, "percentage": 95.38, "elapsed_time": "5:23:54", "remaining_time": "0:15:41", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8270, "total_steps": 8660, "loss": 0.1167, "learning_rate": 3.082645807456763e-07, "epoch": 3.81986143187067, "percentage": 95.5, "elapsed_time": "5:24:17", "remaining_time": "0:15:17", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8280, "total_steps": 8660, "loss": 0.1199, "learning_rate": 2.9268933741202487e-07, "epoch": 3.824480369515012, "percentage": 95.61, "elapsed_time": "5:24:40", "remaining_time": "0:14:54", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8290, "total_steps": 8660, "loss": 0.1225, "learning_rate": 2.775155185370776e-07, "epoch": 3.8290993071593533, "percentage": 95.73, "elapsed_time": "5:25:03", "remaining_time": "0:14:30", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8300, "total_steps": 8660, "loss": 0.1097, "learning_rate": 2.6274337065280986e-07, "epoch": 3.8337182448036953, "percentage": 95.84, "elapsed_time": "5:25:27", "remaining_time": "0:14:06", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8310, "total_steps": 8660, "loss": 0.1087, "learning_rate": 2.483731337651618e-07, "epoch": 3.838337182448037, "percentage": 95.96, "elapsed_time": "5:25:50", "remaining_time": "0:13:43", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8320, "total_steps": 8660, "loss": 0.1197, "learning_rate": 2.3440504135016083e-07, "epoch": 3.8429561200923787, "percentage": 96.07, "elapsed_time": "5:26:14", "remaining_time": "0:13:19", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8330, "total_steps": 8660, "loss": 0.1203, "learning_rate": 2.2083932035010802e-07, "epoch": 3.8475750577367207, "percentage": 96.19, "elapsed_time": "5:26:36", "remaining_time": "0:12:56", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8340, "total_steps": 8660, "loss": 0.115, "learning_rate": 2.0767619116988934e-07, "epoch": 3.852193995381062, "percentage": 96.3, "elapsed_time": "5:27:01", "remaining_time": "0:12:32", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8350, "total_steps": 8660, "loss": 0.1042, "learning_rate": 1.9491586767342308e-07, "epoch": 3.856812933025404, "percentage": 96.42, "elapsed_time": "5:27:25", "remaining_time": "0:12:09", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8360, "total_steps": 8660, "loss": 0.0973, "learning_rate": 1.8255855718014858e-07, "epoch": 3.8614318706697457, "percentage": 96.54, "elapsed_time": "5:27:48", "remaining_time": "0:11:45", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8370, "total_steps": 8660, "loss": 0.1177, "learning_rate": 1.7060446046168188e-07, "epoch": 3.8660508083140877, "percentage": 96.65, "elapsed_time": "5:28:09", "remaining_time": "0:11:22", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8380, "total_steps": 8660, "loss": 0.1223, "learning_rate": 1.5905377173854873e-07, "epoch": 3.8706697459584296, "percentage": 96.77, "elapsed_time": "5:28:33", "remaining_time": "0:10:58", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8390, "total_steps": 8660, "loss": 0.1139, "learning_rate": 1.4790667867702335e-07, "epoch": 3.875288683602771, "percentage": 96.88, "elapsed_time": "5:28:55", "remaining_time": "0:10:35", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8400, "total_steps": 8660, "loss": 0.1099, "learning_rate": 1.371633623860835e-07, "epoch": 3.879907621247113, "percentage": 97.0, "elapsed_time": "5:29:20", "remaining_time": "0:10:11", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8410, "total_steps": 8660, "loss": 0.1092, "learning_rate": 1.268239974144686e-07, "epoch": 3.884526558891455, "percentage": 97.11, "elapsed_time": "5:29:45", "remaining_time": "0:09:48", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8420, "total_steps": 8660, "loss": 0.1172, "learning_rate": 1.1688875174784563e-07, "epoch": 3.8891454965357966, "percentage": 97.23, "elapsed_time": "5:30:08", "remaining_time": "0:09:24", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8430, "total_steps": 8660, "loss": 0.1071, "learning_rate": 1.073577868060699e-07, "epoch": 3.8937644341801385, "percentage": 97.34, "elapsed_time": "5:30:32", "remaining_time": "0:09:01", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8440, "total_steps": 8660, "loss": 0.125, "learning_rate": 9.823125744057582e-08, "epoch": 3.8983833718244805, "percentage": 97.46, "elapsed_time": "5:30:56", "remaining_time": "0:08:37", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8450, "total_steps": 8660, "loss": 0.1088, "learning_rate": 8.950931193185131e-08, "epoch": 3.903002309468822, "percentage": 97.58, "elapsed_time": "5:31:20", "remaining_time": "0:08:14", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8460, "total_steps": 8660, "loss": 0.1196, "learning_rate": 8.119209198703682e-08, "epoch": 3.907621247113164, "percentage": 97.69, "elapsed_time": "5:31:46", "remaining_time": "0:07:50", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8470, "total_steps": 8660, "loss": 0.1306, "learning_rate": 7.32797327376078e-08, "epoch": 3.912240184757506, "percentage": 97.81, "elapsed_time": "5:32:13", "remaining_time": "0:07:27", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8480, "total_steps": 8660, "loss": 0.1138, "learning_rate": 6.577236273720145e-08, "epoch": 3.9168591224018474, "percentage": 97.92, "elapsed_time": "5:32:37", "remaining_time": "0:07:03", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8490, "total_steps": 8660, "loss": 0.11, "learning_rate": 5.867010395951278e-08, "epoch": 3.9214780600461894, "percentage": 98.04, "elapsed_time": "5:32:59", "remaining_time": "0:06:40", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8500, "total_steps": 8660, "loss": 0.1127, "learning_rate": 5.197307179631017e-08, "epoch": 3.9260969976905313, "percentage": 98.15, "elapsed_time": "5:33:23", "remaining_time": "0:06:16", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8510, "total_steps": 8660, "loss": 0.1144, "learning_rate": 4.568137505557568e-08, "epoch": 3.930715935334873, "percentage": 98.27, "elapsed_time": "5:33:47", "remaining_time": "0:05:53", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8520, "total_steps": 8660, "loss": 0.1041, "learning_rate": 3.9795115959723184e-08, "epoch": 3.935334872979215, "percentage": 98.38, "elapsed_time": "5:34:10", "remaining_time": "0:05:29", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8530, "total_steps": 8660, "loss": 0.1045, "learning_rate": 3.4314390143938556e-08, "epoch": 3.9399538106235568, "percentage": 98.5, "elapsed_time": "5:34:33", "remaining_time": "0:05:05", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8540, "total_steps": 8660, "loss": 0.099, "learning_rate": 2.923928665464204e-08, "epoch": 3.9445727482678983, "percentage": 98.61, "elapsed_time": "5:34:57", "remaining_time": "0:04:42", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8550, "total_steps": 8660, "loss": 0.1132, "learning_rate": 2.4569887948019955e-08, "epoch": 3.9491916859122402, "percentage": 98.73, "elapsed_time": "5:35:21", "remaining_time": "0:04:18", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8560, "total_steps": 8660, "loss": 0.1188, "learning_rate": 2.030626988869522e-08, "epoch": 3.953810623556582, "percentage": 98.85, "elapsed_time": "5:35:44", "remaining_time": "0:03:55", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8570, "total_steps": 8660, "loss": 0.1245, "learning_rate": 1.6448501748494994e-08, "epoch": 3.9584295612009237, "percentage": 98.96, "elapsed_time": "5:36:09", "remaining_time": "0:03:31", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8580, "total_steps": 8660, "loss": 0.1117, "learning_rate": 1.2996646205329366e-08, "epoch": 3.9630484988452657, "percentage": 99.08, "elapsed_time": "5:36:33", "remaining_time": "0:03:08", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8590, "total_steps": 8660, "loss": 0.1085, "learning_rate": 9.950759342161608e-09, "epoch": 3.9676674364896076, "percentage": 99.19, "elapsed_time": "5:36:58", "remaining_time": "0:02:44", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8600, "total_steps": 8660, "loss": 0.1205, "learning_rate": 7.310890646100576e-09, "epoch": 3.972286374133949, "percentage": 99.31, "elapsed_time": "5:37:23", "remaining_time": "0:02:21", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8610, "total_steps": 8660, "loss": 0.1048, "learning_rate": 5.077083007609673e-09, "epoch": 3.976905311778291, "percentage": 99.42, "elapsed_time": "5:37:46", "remaining_time": "0:01:57", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8620, "total_steps": 8660, "loss": 0.1033, "learning_rate": 3.249372719793531e-09, "epoch": 3.981524249422633, "percentage": 99.54, "elapsed_time": "5:38:10", "remaining_time": "0:01:34", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8630, "total_steps": 8660, "loss": 0.1311, "learning_rate": 1.8277894778123694e-09, "epoch": 3.9861431870669746, "percentage": 99.65, "elapsed_time": "5:38:35", "remaining_time": "0:01:10", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8640, "total_steps": 8660, "loss": 0.0942, "learning_rate": 8.123563784073751e-10, "epoch": 3.9907621247113165, "percentage": 99.77, "elapsed_time": "5:38:59", "remaining_time": "0:00:47", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8650, "total_steps": 8660, "loss": 0.1166, "learning_rate": 2.0308991951212275e-10, "epoch": 3.995381062355658, "percentage": 99.88, "elapsed_time": "5:39:22", "remaining_time": "0:00:23", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8660, "total_steps": 8660, "loss": 0.1333, "learning_rate": 0.0, "epoch": 4.0, "percentage": 100.0, "elapsed_time": "5:39:46", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0} |
|
{"current_steps": 8660, "total_steps": 8660, "epoch": 4.0, "percentage": 100.0, "elapsed_time": "5:39:46", "remaining_time": "0:00:00", "throughput": "0.00", "total_tokens": 0} |
|
|