oh-dcft-v3.1-SN-405B-hacky / trainer_log.jsonl
sedrickkeh's picture
Training in progress, epoch 2
a123f9c verified
raw
history blame
17.5 kB
{"current_steps": 10, "total_steps": 960, "loss": 0.8071, "lr": 5e-06, "epoch": 0.0312256049960968, "percentage": 1.04, "elapsed_time": "0:09:40", "remaining_time": "15:19:06"}
{"current_steps": 20, "total_steps": 960, "loss": 0.7197, "lr": 5e-06, "epoch": 0.0624512099921936, "percentage": 2.08, "elapsed_time": "0:19:17", "remaining_time": "15:06:47"}
{"current_steps": 30, "total_steps": 960, "loss": 0.6985, "lr": 5e-06, "epoch": 0.0936768149882904, "percentage": 3.12, "elapsed_time": "0:28:53", "remaining_time": "14:55:51"}
{"current_steps": 40, "total_steps": 960, "loss": 0.6855, "lr": 5e-06, "epoch": 0.1249024199843872, "percentage": 4.17, "elapsed_time": "0:38:30", "remaining_time": "14:45:47"}
{"current_steps": 50, "total_steps": 960, "loss": 0.6727, "lr": 5e-06, "epoch": 0.156128024980484, "percentage": 5.21, "elapsed_time": "0:48:07", "remaining_time": "14:35:51"}
{"current_steps": 60, "total_steps": 960, "loss": 0.6576, "lr": 5e-06, "epoch": 0.1873536299765808, "percentage": 6.25, "elapsed_time": "0:57:43", "remaining_time": "14:25:51"}
{"current_steps": 70, "total_steps": 960, "loss": 0.6514, "lr": 5e-06, "epoch": 0.2185792349726776, "percentage": 7.29, "elapsed_time": "1:07:20", "remaining_time": "14:16:14"}
{"current_steps": 80, "total_steps": 960, "loss": 0.6457, "lr": 5e-06, "epoch": 0.2498048399687744, "percentage": 8.33, "elapsed_time": "1:16:56", "remaining_time": "14:06:25"}
{"current_steps": 90, "total_steps": 960, "loss": 0.6412, "lr": 5e-06, "epoch": 0.2810304449648712, "percentage": 9.38, "elapsed_time": "1:26:33", "remaining_time": "13:56:40"}
{"current_steps": 100, "total_steps": 960, "loss": 0.6384, "lr": 5e-06, "epoch": 0.312256049960968, "percentage": 10.42, "elapsed_time": "1:36:08", "remaining_time": "13:46:52"}
{"current_steps": 110, "total_steps": 960, "loss": 0.6335, "lr": 5e-06, "epoch": 0.3434816549570648, "percentage": 11.46, "elapsed_time": "1:45:45", "remaining_time": "13:37:14"}
{"current_steps": 120, "total_steps": 960, "loss": 0.6302, "lr": 5e-06, "epoch": 0.3747072599531616, "percentage": 12.5, "elapsed_time": "1:55:22", "remaining_time": "13:27:38"}
{"current_steps": 130, "total_steps": 960, "loss": 0.634, "lr": 5e-06, "epoch": 0.4059328649492584, "percentage": 13.54, "elapsed_time": "2:04:58", "remaining_time": "13:17:53"}
{"current_steps": 140, "total_steps": 960, "loss": 0.6211, "lr": 5e-06, "epoch": 0.4371584699453552, "percentage": 14.58, "elapsed_time": "2:14:35", "remaining_time": "13:08:19"}
{"current_steps": 150, "total_steps": 960, "loss": 0.6271, "lr": 5e-06, "epoch": 0.468384074941452, "percentage": 15.62, "elapsed_time": "2:24:14", "remaining_time": "12:58:51"}
{"current_steps": 160, "total_steps": 960, "loss": 0.6238, "lr": 5e-06, "epoch": 0.4996096799375488, "percentage": 16.67, "elapsed_time": "2:33:51", "remaining_time": "12:49:19"}
{"current_steps": 170, "total_steps": 960, "loss": 0.621, "lr": 5e-06, "epoch": 0.5308352849336456, "percentage": 17.71, "elapsed_time": "2:43:27", "remaining_time": "12:39:37"}
{"current_steps": 180, "total_steps": 960, "loss": 0.6215, "lr": 5e-06, "epoch": 0.5620608899297423, "percentage": 18.75, "elapsed_time": "2:53:04", "remaining_time": "12:30:00"}
{"current_steps": 190, "total_steps": 960, "loss": 0.6193, "lr": 5e-06, "epoch": 0.5932864949258392, "percentage": 19.79, "elapsed_time": "3:02:40", "remaining_time": "12:20:18"}
{"current_steps": 200, "total_steps": 960, "loss": 0.6175, "lr": 5e-06, "epoch": 0.624512099921936, "percentage": 20.83, "elapsed_time": "3:12:16", "remaining_time": "12:10:38"}
{"current_steps": 210, "total_steps": 960, "loss": 0.622, "lr": 5e-06, "epoch": 0.6557377049180327, "percentage": 21.88, "elapsed_time": "3:21:51", "remaining_time": "12:00:56"}
{"current_steps": 220, "total_steps": 960, "loss": 0.6158, "lr": 5e-06, "epoch": 0.6869633099141296, "percentage": 22.92, "elapsed_time": "3:31:29", "remaining_time": "11:51:21"}
{"current_steps": 230, "total_steps": 960, "loss": 0.6177, "lr": 5e-06, "epoch": 0.7181889149102264, "percentage": 23.96, "elapsed_time": "3:41:06", "remaining_time": "11:41:47"}
{"current_steps": 240, "total_steps": 960, "loss": 0.6148, "lr": 5e-06, "epoch": 0.7494145199063232, "percentage": 25.0, "elapsed_time": "3:50:43", "remaining_time": "11:32:11"}
{"current_steps": 250, "total_steps": 960, "loss": 0.6164, "lr": 5e-06, "epoch": 0.78064012490242, "percentage": 26.04, "elapsed_time": "4:00:21", "remaining_time": "11:22:37"}
{"current_steps": 260, "total_steps": 960, "loss": 0.6094, "lr": 5e-06, "epoch": 0.8118657298985168, "percentage": 27.08, "elapsed_time": "4:09:59", "remaining_time": "11:13:02"}
{"current_steps": 270, "total_steps": 960, "loss": 0.6103, "lr": 5e-06, "epoch": 0.8430913348946136, "percentage": 28.12, "elapsed_time": "4:19:35", "remaining_time": "11:03:25"}
{"current_steps": 280, "total_steps": 960, "loss": 0.6095, "lr": 5e-06, "epoch": 0.8743169398907104, "percentage": 29.17, "elapsed_time": "4:29:13", "remaining_time": "10:53:48"}
{"current_steps": 290, "total_steps": 960, "loss": 0.6065, "lr": 5e-06, "epoch": 0.9055425448868072, "percentage": 30.21, "elapsed_time": "4:38:50", "remaining_time": "10:44:12"}
{"current_steps": 300, "total_steps": 960, "loss": 0.6132, "lr": 5e-06, "epoch": 0.936768149882904, "percentage": 31.25, "elapsed_time": "4:48:26", "remaining_time": "10:34:34"}
{"current_steps": 310, "total_steps": 960, "loss": 0.6111, "lr": 5e-06, "epoch": 0.9679937548790007, "percentage": 32.29, "elapsed_time": "4:58:04", "remaining_time": "10:25:00"}
{"current_steps": 320, "total_steps": 960, "loss": 0.5986, "lr": 5e-06, "epoch": 0.9992193598750976, "percentage": 33.33, "elapsed_time": "5:07:42", "remaining_time": "10:15:24"}
{"current_steps": 320, "total_steps": 960, "eval_loss": 0.6128131151199341, "epoch": 0.9992193598750976, "percentage": 33.33, "elapsed_time": "5:13:30", "remaining_time": "10:27:00"}
{"current_steps": 330, "total_steps": 960, "loss": 0.6164, "lr": 5e-06, "epoch": 1.0308352849336455, "percentage": 34.38, "elapsed_time": "5:24:09", "remaining_time": "10:18:50"}
{"current_steps": 340, "total_steps": 960, "loss": 0.5662, "lr": 5e-06, "epoch": 1.0620608899297423, "percentage": 35.42, "elapsed_time": "5:33:47", "remaining_time": "10:08:40"}
{"current_steps": 350, "total_steps": 960, "loss": 0.5606, "lr": 5e-06, "epoch": 1.0932864949258392, "percentage": 36.46, "elapsed_time": "5:43:22", "remaining_time": "9:58:27"}
{"current_steps": 360, "total_steps": 960, "loss": 0.5648, "lr": 5e-06, "epoch": 1.124512099921936, "percentage": 37.5, "elapsed_time": "5:53:00", "remaining_time": "9:48:20"}
{"current_steps": 370, "total_steps": 960, "loss": 0.5582, "lr": 5e-06, "epoch": 1.1557377049180328, "percentage": 38.54, "elapsed_time": "6:02:38", "remaining_time": "9:38:16"}
{"current_steps": 380, "total_steps": 960, "loss": 0.5584, "lr": 5e-06, "epoch": 1.1869633099141297, "percentage": 39.58, "elapsed_time": "6:12:15", "remaining_time": "9:28:11"}
{"current_steps": 390, "total_steps": 960, "loss": 0.5515, "lr": 5e-06, "epoch": 1.2181889149102263, "percentage": 40.62, "elapsed_time": "6:21:52", "remaining_time": "9:18:07"}
{"current_steps": 400, "total_steps": 960, "loss": 0.5637, "lr": 5e-06, "epoch": 1.2494145199063231, "percentage": 41.67, "elapsed_time": "6:31:29", "remaining_time": "9:08:05"}
{"current_steps": 410, "total_steps": 960, "loss": 0.5618, "lr": 5e-06, "epoch": 1.28064012490242, "percentage": 42.71, "elapsed_time": "6:41:06", "remaining_time": "8:58:04"}
{"current_steps": 420, "total_steps": 960, "loss": 0.5554, "lr": 5e-06, "epoch": 1.3118657298985168, "percentage": 43.75, "elapsed_time": "6:50:43", "remaining_time": "8:48:04"}
{"current_steps": 430, "total_steps": 960, "loss": 0.5547, "lr": 5e-06, "epoch": 1.3430913348946136, "percentage": 44.79, "elapsed_time": "7:00:19", "remaining_time": "8:38:04"}
{"current_steps": 440, "total_steps": 960, "loss": 0.5523, "lr": 5e-06, "epoch": 1.3743169398907105, "percentage": 45.83, "elapsed_time": "7:09:56", "remaining_time": "8:28:06"}
{"current_steps": 450, "total_steps": 960, "loss": 0.5666, "lr": 5e-06, "epoch": 1.4055425448868073, "percentage": 46.88, "elapsed_time": "7:19:34", "remaining_time": "8:18:10"}
{"current_steps": 460, "total_steps": 960, "loss": 0.5558, "lr": 5e-06, "epoch": 1.436768149882904, "percentage": 47.92, "elapsed_time": "7:29:11", "remaining_time": "8:08:15"}
{"current_steps": 470, "total_steps": 960, "loss": 0.5584, "lr": 5e-06, "epoch": 1.4679937548790007, "percentage": 48.96, "elapsed_time": "7:38:49", "remaining_time": "7:58:20"}
{"current_steps": 480, "total_steps": 960, "loss": 0.5643, "lr": 5e-06, "epoch": 1.4992193598750976, "percentage": 50.0, "elapsed_time": "7:48:26", "remaining_time": "7:48:26"}
{"current_steps": 490, "total_steps": 960, "loss": 0.5598, "lr": 5e-06, "epoch": 1.5304449648711944, "percentage": 51.04, "elapsed_time": "7:58:03", "remaining_time": "7:38:32"}
{"current_steps": 500, "total_steps": 960, "loss": 0.5591, "lr": 5e-06, "epoch": 1.561670569867291, "percentage": 52.08, "elapsed_time": "8:07:40", "remaining_time": "7:28:40"}
{"current_steps": 510, "total_steps": 960, "loss": 0.5626, "lr": 5e-06, "epoch": 1.5928961748633879, "percentage": 53.12, "elapsed_time": "8:17:18", "remaining_time": "7:18:48"}
{"current_steps": 520, "total_steps": 960, "loss": 0.5556, "lr": 5e-06, "epoch": 1.6241217798594847, "percentage": 54.17, "elapsed_time": "8:26:56", "remaining_time": "7:08:56"}
{"current_steps": 530, "total_steps": 960, "loss": 0.5572, "lr": 5e-06, "epoch": 1.6553473848555815, "percentage": 55.21, "elapsed_time": "8:36:33", "remaining_time": "6:59:05"}
{"current_steps": 540, "total_steps": 960, "loss": 0.557, "lr": 5e-06, "epoch": 1.6865729898516784, "percentage": 56.25, "elapsed_time": "8:46:10", "remaining_time": "6:49:14"}
{"current_steps": 550, "total_steps": 960, "loss": 0.5617, "lr": 5e-06, "epoch": 1.7177985948477752, "percentage": 57.29, "elapsed_time": "8:55:47", "remaining_time": "6:39:24"}
{"current_steps": 560, "total_steps": 960, "loss": 0.5581, "lr": 5e-06, "epoch": 1.749024199843872, "percentage": 58.33, "elapsed_time": "9:05:23", "remaining_time": "6:29:34"}
{"current_steps": 570, "total_steps": 960, "loss": 0.5564, "lr": 5e-06, "epoch": 1.7802498048399689, "percentage": 59.38, "elapsed_time": "9:14:59", "remaining_time": "6:19:43"}
{"current_steps": 580, "total_steps": 960, "loss": 0.559, "lr": 5e-06, "epoch": 1.8114754098360657, "percentage": 60.42, "elapsed_time": "9:24:36", "remaining_time": "6:09:55"}
{"current_steps": 590, "total_steps": 960, "loss": 0.5616, "lr": 5e-06, "epoch": 1.8427010148321625, "percentage": 61.46, "elapsed_time": "9:34:14", "remaining_time": "6:00:07"}
{"current_steps": 600, "total_steps": 960, "loss": 0.553, "lr": 5e-06, "epoch": 1.8739266198282591, "percentage": 62.5, "elapsed_time": "9:43:52", "remaining_time": "5:50:19"}
{"current_steps": 610, "total_steps": 960, "loss": 0.5589, "lr": 5e-06, "epoch": 1.905152224824356, "percentage": 63.54, "elapsed_time": "9:53:30", "remaining_time": "5:40:32"}
{"current_steps": 620, "total_steps": 960, "loss": 0.5628, "lr": 5e-06, "epoch": 1.9363778298204528, "percentage": 64.58, "elapsed_time": "10:03:06", "remaining_time": "5:30:44"}
{"current_steps": 630, "total_steps": 960, "loss": 0.5522, "lr": 5e-06, "epoch": 1.9676034348165494, "percentage": 65.62, "elapsed_time": "10:12:44", "remaining_time": "5:20:57"}
{"current_steps": 640, "total_steps": 960, "loss": 0.5596, "lr": 5e-06, "epoch": 1.9988290398126463, "percentage": 66.67, "elapsed_time": "10:22:21", "remaining_time": "5:11:10"}
{"current_steps": 640, "total_steps": 960, "eval_loss": 0.6045193076133728, "epoch": 1.9988290398126463, "percentage": 66.67, "elapsed_time": "10:28:15", "remaining_time": "5:14:07"}
{"current_steps": 650, "total_steps": 960, "loss": 0.5662, "lr": 5e-06, "epoch": 2.030444964871194, "percentage": 67.71, "elapsed_time": "10:38:54", "remaining_time": "5:04:42"}
{"current_steps": 660, "total_steps": 960, "loss": 0.5018, "lr": 5e-06, "epoch": 2.061670569867291, "percentage": 68.75, "elapsed_time": "10:48:31", "remaining_time": "4:54:47"}
{"current_steps": 670, "total_steps": 960, "loss": 0.5057, "lr": 5e-06, "epoch": 2.092896174863388, "percentage": 69.79, "elapsed_time": "10:58:09", "remaining_time": "4:44:52"}
{"current_steps": 680, "total_steps": 960, "loss": 0.5036, "lr": 5e-06, "epoch": 2.1241217798594847, "percentage": 70.83, "elapsed_time": "11:07:46", "remaining_time": "4:34:58"}
{"current_steps": 690, "total_steps": 960, "loss": 0.5066, "lr": 5e-06, "epoch": 2.1553473848555815, "percentage": 71.88, "elapsed_time": "11:17:23", "remaining_time": "4:25:04"}
{"current_steps": 700, "total_steps": 960, "loss": 0.5086, "lr": 5e-06, "epoch": 2.1865729898516784, "percentage": 72.92, "elapsed_time": "11:27:01", "remaining_time": "4:15:10"}
{"current_steps": 710, "total_steps": 960, "loss": 0.508, "lr": 5e-06, "epoch": 2.217798594847775, "percentage": 73.96, "elapsed_time": "11:36:39", "remaining_time": "4:05:18"}
{"current_steps": 720, "total_steps": 960, "loss": 0.5073, "lr": 5e-06, "epoch": 2.249024199843872, "percentage": 75.0, "elapsed_time": "11:46:17", "remaining_time": "3:55:25"}
{"current_steps": 730, "total_steps": 960, "loss": 0.5126, "lr": 5e-06, "epoch": 2.280249804839969, "percentage": 76.04, "elapsed_time": "11:55:53", "remaining_time": "3:45:33"}
{"current_steps": 740, "total_steps": 960, "loss": 0.5098, "lr": 5e-06, "epoch": 2.3114754098360657, "percentage": 77.08, "elapsed_time": "12:05:31", "remaining_time": "3:35:41"}
{"current_steps": 750, "total_steps": 960, "loss": 0.5084, "lr": 5e-06, "epoch": 2.3427010148321625, "percentage": 78.12, "elapsed_time": "12:15:06", "remaining_time": "3:25:49"}
{"current_steps": 760, "total_steps": 960, "loss": 0.5097, "lr": 5e-06, "epoch": 2.3739266198282594, "percentage": 79.17, "elapsed_time": "12:24:44", "remaining_time": "3:15:59"}
{"current_steps": 770, "total_steps": 960, "loss": 0.5028, "lr": 5e-06, "epoch": 2.4051522248243558, "percentage": 80.21, "elapsed_time": "12:34:22", "remaining_time": "3:06:08"}
{"current_steps": 780, "total_steps": 960, "loss": 0.5102, "lr": 5e-06, "epoch": 2.4363778298204526, "percentage": 81.25, "elapsed_time": "12:43:59", "remaining_time": "2:56:18"}
{"current_steps": 790, "total_steps": 960, "loss": 0.5068, "lr": 5e-06, "epoch": 2.4676034348165494, "percentage": 82.29, "elapsed_time": "12:53:37", "remaining_time": "2:46:28"}
{"current_steps": 800, "total_steps": 960, "loss": 0.5141, "lr": 5e-06, "epoch": 2.4988290398126463, "percentage": 83.33, "elapsed_time": "13:03:14", "remaining_time": "2:36:38"}
{"current_steps": 810, "total_steps": 960, "loss": 0.5081, "lr": 5e-06, "epoch": 2.530054644808743, "percentage": 84.38, "elapsed_time": "13:12:52", "remaining_time": "2:26:49"}
{"current_steps": 820, "total_steps": 960, "loss": 0.5123, "lr": 5e-06, "epoch": 2.56128024980484, "percentage": 85.42, "elapsed_time": "13:22:30", "remaining_time": "2:17:00"}
{"current_steps": 830, "total_steps": 960, "loss": 0.5067, "lr": 5e-06, "epoch": 2.5925058548009368, "percentage": 86.46, "elapsed_time": "13:32:06", "remaining_time": "2:07:11"}
{"current_steps": 840, "total_steps": 960, "loss": 0.519, "lr": 5e-06, "epoch": 2.6237314597970336, "percentage": 87.5, "elapsed_time": "13:41:43", "remaining_time": "1:57:23"}
{"current_steps": 850, "total_steps": 960, "loss": 0.5048, "lr": 5e-06, "epoch": 2.6549570647931304, "percentage": 88.54, "elapsed_time": "13:51:21", "remaining_time": "1:47:35"}
{"current_steps": 860, "total_steps": 960, "loss": 0.5165, "lr": 5e-06, "epoch": 2.6861826697892273, "percentage": 89.58, "elapsed_time": "14:00:59", "remaining_time": "1:37:47"}
{"current_steps": 870, "total_steps": 960, "loss": 0.5154, "lr": 5e-06, "epoch": 2.717408274785324, "percentage": 90.62, "elapsed_time": "14:10:37", "remaining_time": "1:27:59"}
{"current_steps": 880, "total_steps": 960, "loss": 0.5091, "lr": 5e-06, "epoch": 2.748633879781421, "percentage": 91.67, "elapsed_time": "14:20:14", "remaining_time": "1:18:12"}
{"current_steps": 890, "total_steps": 960, "loss": 0.5112, "lr": 5e-06, "epoch": 2.7798594847775178, "percentage": 92.71, "elapsed_time": "14:29:52", "remaining_time": "1:08:25"}
{"current_steps": 900, "total_steps": 960, "loss": 0.5109, "lr": 5e-06, "epoch": 2.8110850897736146, "percentage": 93.75, "elapsed_time": "14:39:31", "remaining_time": "0:58:38"}
{"current_steps": 910, "total_steps": 960, "loss": 0.5166, "lr": 5e-06, "epoch": 2.8423106947697114, "percentage": 94.79, "elapsed_time": "14:49:07", "remaining_time": "0:48:51"}
{"current_steps": 920, "total_steps": 960, "loss": 0.519, "lr": 5e-06, "epoch": 2.873536299765808, "percentage": 95.83, "elapsed_time": "14:58:45", "remaining_time": "0:39:04"}
{"current_steps": 930, "total_steps": 960, "loss": 0.5181, "lr": 5e-06, "epoch": 2.9047619047619047, "percentage": 96.88, "elapsed_time": "15:08:22", "remaining_time": "0:29:18"}
{"current_steps": 940, "total_steps": 960, "loss": 0.5134, "lr": 5e-06, "epoch": 2.9359875097580015, "percentage": 97.92, "elapsed_time": "15:18:00", "remaining_time": "0:19:31"}
{"current_steps": 950, "total_steps": 960, "loss": 0.512, "lr": 5e-06, "epoch": 2.9672131147540983, "percentage": 98.96, "elapsed_time": "15:27:37", "remaining_time": "0:09:45"}
{"current_steps": 960, "total_steps": 960, "loss": 0.5083, "lr": 5e-06, "epoch": 2.998438719750195, "percentage": 100.0, "elapsed_time": "15:37:14", "remaining_time": "0:00:00"}