File size: 18,581 Bytes
7500161
 
 
 
 
 
 
 
 
 
 
e38e489
 
 
 
 
 
 
 
 
 
 
a0e3713
 
 
 
 
 
 
 
 
 
 
0e8eee8
 
 
 
 
 
 
 
 
 
 
70d60fc
 
 
 
 
 
 
 
 
 
 
588876c
 
 
 
 
 
 
 
 
 
 
0719fb6
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
{"current_steps": 5, "total_steps": 3400, "loss": 2.8889, "lr": 2.9411764705882355e-06, "epoch": 0.0012876641771825909, "percentage": 0.15, "elapsed_time": "0:02:46", "remaining_time": "1 day, 7:20:36", "throughput": 317.97, "total_tokens": 52840}
{"current_steps": 10, "total_steps": 3400, "loss": 2.8165, "lr": 5.882352941176471e-06, "epoch": 0.0025753283543651817, "percentage": 0.29, "elapsed_time": "0:04:14", "remaining_time": "23:57:41", "throughput": 414.71, "total_tokens": 105528}
{"current_steps": 15, "total_steps": 3400, "loss": 2.8363, "lr": 8.823529411764707e-06, "epoch": 0.0038629925315477724, "percentage": 0.44, "elapsed_time": "0:05:42", "remaining_time": "21:27:36", "throughput": 463.76, "total_tokens": 158768}
{"current_steps": 20, "total_steps": 3400, "loss": 2.6853, "lr": 1.1764705882352942e-05, "epoch": 0.0051506567087303634, "percentage": 0.59, "elapsed_time": "0:07:11", "remaining_time": "20:14:07", "throughput": 489.08, "total_tokens": 210816}
{"current_steps": 25, "total_steps": 3400, "loss": 2.2992, "lr": 1.4705882352941177e-05, "epoch": 0.006438320885912954, "percentage": 0.74, "elapsed_time": "0:08:38", "remaining_time": "19:26:57", "throughput": 506.96, "total_tokens": 262936}
{"current_steps": 30, "total_steps": 3400, "loss": 1.8923, "lr": 1.7647058823529414e-05, "epoch": 0.007725985063095545, "percentage": 0.88, "elapsed_time": "0:10:08", "remaining_time": "18:58:31", "throughput": 518.43, "total_tokens": 315264}
{"current_steps": 35, "total_steps": 3400, "loss": 1.6984, "lr": 2.058823529411765e-05, "epoch": 0.009013649240278136, "percentage": 1.03, "elapsed_time": "0:11:36", "remaining_time": "18:36:02", "throughput": 528.14, "total_tokens": 367840}
{"current_steps": 40, "total_steps": 3400, "loss": 1.6434, "lr": 2.3529411764705884e-05, "epoch": 0.010301313417460727, "percentage": 1.18, "elapsed_time": "0:13:06", "remaining_time": "18:20:53", "throughput": 534.26, "total_tokens": 420112}
{"current_steps": 45, "total_steps": 3400, "loss": 1.4659, "lr": 2.647058823529412e-05, "epoch": 0.011588977594643318, "percentage": 1.32, "elapsed_time": "0:14:34", "remaining_time": "18:06:24", "throughput": 540.69, "total_tokens": 472728}
{"current_steps": 50, "total_steps": 3400, "loss": 1.3506, "lr": 2.9411764705882354e-05, "epoch": 0.012876641771825908, "percentage": 1.47, "elapsed_time": "0:16:03", "remaining_time": "17:56:24", "throughput": 544.27, "total_tokens": 524648}
{"current_steps": 50, "total_steps": 3400, "eval_loss": 1.1727452278137207, "epoch": 0.012876641771825908, "percentage": 1.47, "elapsed_time": "0:17:10", "remaining_time": "19:10:27", "throughput": 509.24, "total_tokens": 524648}
{"current_steps": 55, "total_steps": 3400, "loss": 1.1455, "lr": 3.235294117647059e-05, "epoch": 0.014164305949008499, "percentage": 1.62, "elapsed_time": "0:18:46", "remaining_time": "19:01:52", "throughput": 511.73, "total_tokens": 576472}
{"current_steps": 60, "total_steps": 3400, "loss": 0.9971, "lr": 3.529411764705883e-05, "epoch": 0.01545197012619109, "percentage": 1.76, "elapsed_time": "0:20:15", "remaining_time": "18:48:02", "throughput": 516.56, "total_tokens": 628056}
{"current_steps": 65, "total_steps": 3400, "loss": 0.9073, "lr": 3.8235294117647055e-05, "epoch": 0.01673963430337368, "percentage": 1.91, "elapsed_time": "0:21:44", "remaining_time": "18:35:51", "throughput": 521.45, "total_tokens": 680448}
{"current_steps": 70, "total_steps": 3400, "loss": 0.8386, "lr": 4.11764705882353e-05, "epoch": 0.018027298480556272, "percentage": 2.06, "elapsed_time": "0:23:19", "remaining_time": "18:29:19", "throughput": 524.37, "total_tokens": 733664}
{"current_steps": 75, "total_steps": 3400, "loss": 0.7827, "lr": 4.411764705882353e-05, "epoch": 0.01931496265773886, "percentage": 2.21, "elapsed_time": "0:24:51", "remaining_time": "18:22:18", "throughput": 526.93, "total_tokens": 786096}
{"current_steps": 80, "total_steps": 3400, "loss": 0.7814, "lr": 4.705882352941177e-05, "epoch": 0.020602626834921454, "percentage": 2.35, "elapsed_time": "0:26:25", "remaining_time": "18:16:38", "throughput": 528.66, "total_tokens": 838192}
{"current_steps": 85, "total_steps": 3400, "loss": 0.7297, "lr": 5e-05, "epoch": 0.021890291012104043, "percentage": 2.5, "elapsed_time": "0:27:54", "remaining_time": "18:08:20", "throughput": 531.61, "total_tokens": 890112}
{"current_steps": 90, "total_steps": 3400, "loss": 0.7894, "lr": 5.294117647058824e-05, "epoch": 0.023177955189286635, "percentage": 2.65, "elapsed_time": "0:29:24", "remaining_time": "18:01:48", "throughput": 534.58, "total_tokens": 943472}
{"current_steps": 95, "total_steps": 3400, "loss": 0.7758, "lr": 5.588235294117647e-05, "epoch": 0.024465619366469224, "percentage": 2.79, "elapsed_time": "0:30:52", "remaining_time": "17:54:11", "throughput": 538.09, "total_tokens": 996872}
{"current_steps": 100, "total_steps": 3400, "loss": 0.7577, "lr": 5.882352941176471e-05, "epoch": 0.025753283543651816, "percentage": 2.94, "elapsed_time": "0:32:21", "remaining_time": "17:47:53", "throughput": 540.69, "total_tokens": 1049816}
{"current_steps": 100, "total_steps": 3400, "eval_loss": 0.7517351508140564, "epoch": 0.025753283543651816, "percentage": 2.94, "elapsed_time": "0:33:00", "remaining_time": "18:09:13", "throughput": 530.1, "total_tokens": 1049816}
{"current_steps": 105, "total_steps": 3400, "loss": 0.7579, "lr": 6.176470588235295e-05, "epoch": 0.027040947720834405, "percentage": 3.09, "elapsed_time": "0:34:36", "remaining_time": "18:05:49", "throughput": 531.09, "total_tokens": 1102584}
{"current_steps": 110, "total_steps": 3400, "loss": 0.7659, "lr": 6.470588235294118e-05, "epoch": 0.028328611898016998, "percentage": 3.24, "elapsed_time": "0:36:03", "remaining_time": "17:58:31", "throughput": 534.07, "total_tokens": 1155512}
{"current_steps": 115, "total_steps": 3400, "loss": 0.7469, "lr": 6.764705882352942e-05, "epoch": 0.029616276075199587, "percentage": 3.38, "elapsed_time": "0:37:32", "remaining_time": "17:52:22", "throughput": 536.29, "total_tokens": 1207976}
{"current_steps": 120, "total_steps": 3400, "loss": 0.7353, "lr": 7.058823529411765e-05, "epoch": 0.03090394025238218, "percentage": 3.53, "elapsed_time": "0:39:00", "remaining_time": "17:46:11", "throughput": 538.27, "total_tokens": 1259776}
{"current_steps": 125, "total_steps": 3400, "loss": 0.7537, "lr": 7.352941176470589e-05, "epoch": 0.03219160442956477, "percentage": 3.68, "elapsed_time": "0:40:29", "remaining_time": "17:40:51", "throughput": 540.35, "total_tokens": 1312760}
{"current_steps": 130, "total_steps": 3400, "loss": 0.7669, "lr": 7.647058823529411e-05, "epoch": 0.03347926860674736, "percentage": 3.82, "elapsed_time": "0:41:57", "remaining_time": "17:35:14", "throughput": 542.54, "total_tokens": 1365616}
{"current_steps": 135, "total_steps": 3400, "loss": 0.722, "lr": 7.941176470588235e-05, "epoch": 0.03476693278392995, "percentage": 3.97, "elapsed_time": "0:43:26", "remaining_time": "17:30:31", "throughput": 543.91, "total_tokens": 1417544}
{"current_steps": 140, "total_steps": 3400, "loss": 0.7502, "lr": 8.23529411764706e-05, "epoch": 0.036054596961112545, "percentage": 4.12, "elapsed_time": "0:44:52", "remaining_time": "17:24:54", "throughput": 545.93, "total_tokens": 1469856}
{"current_steps": 145, "total_steps": 3400, "loss": 0.7174, "lr": 8.529411764705883e-05, "epoch": 0.037342261138295134, "percentage": 4.26, "elapsed_time": "0:46:20", "remaining_time": "17:20:11", "throughput": 547.25, "total_tokens": 1521496}
{"current_steps": 150, "total_steps": 3400, "loss": 0.7018, "lr": 8.823529411764706e-05, "epoch": 0.03862992531547772, "percentage": 4.41, "elapsed_time": "0:47:46", "remaining_time": "17:15:14", "throughput": 548.82, "total_tokens": 1573376}
{"current_steps": 150, "total_steps": 3400, "eval_loss": 0.7309949994087219, "epoch": 0.03862992531547772, "percentage": 4.41, "elapsed_time": "0:48:25", "remaining_time": "17:29:02", "throughput": 541.61, "total_tokens": 1573376}
{"current_steps": 155, "total_steps": 3400, "loss": 0.738, "lr": 9.11764705882353e-05, "epoch": 0.03991758949266031, "percentage": 4.56, "elapsed_time": "0:49:58", "remaining_time": "17:26:05", "throughput": 542.4, "total_tokens": 1626136}
{"current_steps": 160, "total_steps": 3400, "loss": 0.7579, "lr": 9.411764705882353e-05, "epoch": 0.04120525366984291, "percentage": 4.71, "elapsed_time": "0:51:24", "remaining_time": "17:20:51", "throughput": 544.34, "total_tokens": 1678760}
{"current_steps": 165, "total_steps": 3400, "loss": 0.7502, "lr": 9.705882352941177e-05, "epoch": 0.042492917847025496, "percentage": 4.85, "elapsed_time": "0:52:51", "remaining_time": "17:16:21", "throughput": 545.87, "total_tokens": 1731240}
{"current_steps": 170, "total_steps": 3400, "loss": 0.7448, "lr": 0.0001, "epoch": 0.043780582024208085, "percentage": 5.0, "elapsed_time": "0:54:17", "remaining_time": "17:11:35", "throughput": 547.57, "total_tokens": 1783816}
{"current_steps": 175, "total_steps": 3400, "loss": 0.6648, "lr": 9.999940874631277e-05, "epoch": 0.045068246201390674, "percentage": 5.15, "elapsed_time": "0:55:45", "remaining_time": "17:07:24", "throughput": 548.45, "total_tokens": 1834592}
{"current_steps": 180, "total_steps": 3400, "loss": 0.7759, "lr": 9.999763499923432e-05, "epoch": 0.04635591037857327, "percentage": 5.29, "elapsed_time": "0:57:11", "remaining_time": "17:02:59", "throughput": 550.3, "total_tokens": 1888176}
{"current_steps": 185, "total_steps": 3400, "loss": 0.7167, "lr": 9.999467880071402e-05, "epoch": 0.04764357455575586, "percentage": 5.44, "elapsed_time": "0:58:39", "remaining_time": "16:59:16", "throughput": 551.35, "total_tokens": 1940280}
{"current_steps": 190, "total_steps": 3400, "loss": 0.7483, "lr": 9.999054022066641e-05, "epoch": 0.04893123873293845, "percentage": 5.59, "elapsed_time": "1:00:04", "remaining_time": "16:55:04", "throughput": 552.88, "total_tokens": 1993096}
{"current_steps": 195, "total_steps": 3400, "loss": 0.7464, "lr": 9.998521935696953e-05, "epoch": 0.050218902910121044, "percentage": 5.74, "elapsed_time": "1:01:31", "remaining_time": "16:51:14", "throughput": 554.14, "total_tokens": 2045648}
{"current_steps": 200, "total_steps": 3400, "loss": 0.7594, "lr": 9.997871633546257e-05, "epoch": 0.05150656708730363, "percentage": 5.88, "elapsed_time": "1:02:57", "remaining_time": "16:47:25", "throughput": 555.61, "total_tokens": 2099008}
{"current_steps": 200, "total_steps": 3400, "eval_loss": 0.7274295687675476, "epoch": 0.05150656708730363, "percentage": 5.88, "elapsed_time": "1:03:35", "remaining_time": "16:57:35", "throughput": 550.06, "total_tokens": 2099008}
{"current_steps": 205, "total_steps": 3400, "loss": 0.706, "lr": 9.997103130994296e-05, "epoch": 0.05279423126448622, "percentage": 6.03, "elapsed_time": "1:05:08", "remaining_time": "16:55:12", "throughput": 550.54, "total_tokens": 2151680}
{"current_steps": 210, "total_steps": 3400, "loss": 0.7186, "lr": 9.996216446216267e-05, "epoch": 0.05408189544166881, "percentage": 6.18, "elapsed_time": "1:06:34", "remaining_time": "16:51:24", "throughput": 551.65, "total_tokens": 2203784}
{"current_steps": 215, "total_steps": 3400, "loss": 0.7009, "lr": 9.995211600182397e-05, "epoch": 0.055369559618851406, "percentage": 6.32, "elapsed_time": "1:08:00", "remaining_time": "16:47:34", "throughput": 552.73, "total_tokens": 2255632}
{"current_steps": 220, "total_steps": 3400, "loss": 0.6801, "lr": 9.994088616657444e-05, "epoch": 0.056657223796033995, "percentage": 6.47, "elapsed_time": "1:09:28", "remaining_time": "16:44:15", "throughput": 553.68, "total_tokens": 2308096}
{"current_steps": 225, "total_steps": 3400, "loss": 0.7569, "lr": 9.992847522200133e-05, "epoch": 0.057944887973216584, "percentage": 6.62, "elapsed_time": "1:10:55", "remaining_time": "16:40:46", "throughput": 554.88, "total_tokens": 2361168}
{"current_steps": 230, "total_steps": 3400, "loss": 0.7402, "lr": 9.99148834616253e-05, "epoch": 0.05923255215039917, "percentage": 6.76, "elapsed_time": "1:12:23", "remaining_time": "16:37:48", "throughput": 555.71, "total_tokens": 2413896}
{"current_steps": 235, "total_steps": 3400, "loss": 0.7191, "lr": 9.990011120689351e-05, "epoch": 0.06052021632758177, "percentage": 6.91, "elapsed_time": "1:13:51", "remaining_time": "16:34:47", "throughput": 556.47, "total_tokens": 2466136}
{"current_steps": 240, "total_steps": 3400, "loss": 0.7274, "lr": 9.988415880717194e-05, "epoch": 0.06180788050476436, "percentage": 7.06, "elapsed_time": "1:15:20", "remaining_time": "16:31:59", "throughput": 557.21, "total_tokens": 2518848}
{"current_steps": 245, "total_steps": 3400, "loss": 0.7704, "lr": 9.986702663973722e-05, "epoch": 0.06309554468194695, "percentage": 7.21, "elapsed_time": "1:16:48", "remaining_time": "16:29:01", "throughput": 558.22, "total_tokens": 2572384}
{"current_steps": 250, "total_steps": 3400, "loss": 0.7346, "lr": 9.98487151097676e-05, "epoch": 0.06438320885912954, "percentage": 7.35, "elapsed_time": "1:18:17", "remaining_time": "16:26:29", "throughput": 558.88, "total_tokens": 2625352}
{"current_steps": 250, "total_steps": 3400, "eval_loss": 0.7181503176689148, "epoch": 0.06438320885912954, "percentage": 7.35, "elapsed_time": "1:18:55", "remaining_time": "16:34:29", "throughput": 554.38, "total_tokens": 2625352}
{"current_steps": 255, "total_steps": 3400, "loss": 0.7408, "lr": 9.98292246503335e-05, "epoch": 0.06567087303631212, "percentage": 7.5, "elapsed_time": "1:20:28", "remaining_time": "16:32:30", "throughput": 554.67, "total_tokens": 2678216}
{"current_steps": 260, "total_steps": 3400, "loss": 0.7044, "lr": 9.980855572238714e-05, "epoch": 0.06695853721349472, "percentage": 7.65, "elapsed_time": "1:21:57", "remaining_time": "16:29:49", "throughput": 555.29, "total_tokens": 2730664}
{"current_steps": 265, "total_steps": 3400, "loss": 0.7334, "lr": 9.978670881475172e-05, "epoch": 0.06824620139067732, "percentage": 7.79, "elapsed_time": "1:23:25", "remaining_time": "16:26:51", "throughput": 556.14, "total_tokens": 2783584}
{"current_steps": 270, "total_steps": 3400, "loss": 0.7075, "lr": 9.976368444410985e-05, "epoch": 0.0695338655678599, "percentage": 7.94, "elapsed_time": "1:24:53", "remaining_time": "16:24:09", "throughput": 556.79, "total_tokens": 2836152}
{"current_steps": 275, "total_steps": 3400, "loss": 0.7039, "lr": 9.973948315499126e-05, "epoch": 0.0708215297450425, "percentage": 8.09, "elapsed_time": "1:26:21", "remaining_time": "16:21:18", "throughput": 557.35, "total_tokens": 2887808}
{"current_steps": 280, "total_steps": 3400, "loss": 0.6953, "lr": 9.971410551976002e-05, "epoch": 0.07210919392222509, "percentage": 8.24, "elapsed_time": "1:27:50", "remaining_time": "16:18:43", "throughput": 557.81, "total_tokens": 2939656}
{"current_steps": 285, "total_steps": 3400, "loss": 0.7022, "lr": 9.968755213860094e-05, "epoch": 0.07339685809940767, "percentage": 8.38, "elapsed_time": "1:29:17", "remaining_time": "16:15:56", "throughput": 558.4, "total_tokens": 2991632}
{"current_steps": 290, "total_steps": 3400, "loss": 0.6796, "lr": 9.96598236395054e-05, "epoch": 0.07468452227659027, "percentage": 8.53, "elapsed_time": "1:30:45", "remaining_time": "16:13:18", "throughput": 558.92, "total_tokens": 3043616}
{"current_steps": 295, "total_steps": 3400, "loss": 0.7346, "lr": 9.96309206782565e-05, "epoch": 0.07597218645377285, "percentage": 8.68, "elapsed_time": "1:32:13", "remaining_time": "16:10:42", "throughput": 559.66, "total_tokens": 3096920}
{"current_steps": 300, "total_steps": 3400, "loss": 0.6815, "lr": 9.960084393841355e-05, "epoch": 0.07725985063095545, "percentage": 8.82, "elapsed_time": "1:33:40", "remaining_time": "16:07:58", "throughput": 560.28, "total_tokens": 3149032}
{"current_steps": 300, "total_steps": 3400, "eval_loss": 0.7073924541473389, "epoch": 0.07725985063095545, "percentage": 8.82, "elapsed_time": "1:34:18", "remaining_time": "16:14:32", "throughput": 556.5, "total_tokens": 3149032}
{"current_steps": 305, "total_steps": 3400, "loss": 0.7208, "lr": 9.956959413129585e-05, "epoch": 0.07854751480813804, "percentage": 8.97, "elapsed_time": "1:35:53", "remaining_time": "16:13:04", "throughput": 556.45, "total_tokens": 3201560}
{"current_steps": 310, "total_steps": 3400, "loss": 0.7144, "lr": 9.953717199596598e-05, "epoch": 0.07983517898532062, "percentage": 9.12, "elapsed_time": "1:37:21", "remaining_time": "16:10:27", "throughput": 557.14, "total_tokens": 3254632}
{"current_steps": 315, "total_steps": 3400, "loss": 0.6861, "lr": 9.95035782992122e-05, "epoch": 0.08112284316250322, "percentage": 9.26, "elapsed_time": "1:38:52", "remaining_time": "16:08:22", "throughput": 557.33, "total_tokens": 3306432}
{"current_steps": 320, "total_steps": 3400, "loss": 0.6836, "lr": 9.94688138355304e-05, "epoch": 0.08241050733968582, "percentage": 9.41, "elapsed_time": "1:40:21", "remaining_time": "16:05:55", "throughput": 557.75, "total_tokens": 3358392}
{"current_steps": 325, "total_steps": 3400, "loss": 0.7353, "lr": 9.943287942710527e-05, "epoch": 0.0836981715168684, "percentage": 9.56, "elapsed_time": "1:41:50", "remaining_time": "16:03:39", "throughput": 558.24, "total_tokens": 3411424}
{"current_steps": 330, "total_steps": 3400, "loss": 0.6774, "lr": 9.939577592379088e-05, "epoch": 0.08498583569405099, "percentage": 9.71, "elapsed_time": "1:43:18", "remaining_time": "16:01:07", "throughput": 558.66, "total_tokens": 3462992}
{"current_steps": 335, "total_steps": 3400, "loss": 0.7331, "lr": 9.935750420309055e-05, "epoch": 0.08627349987123359, "percentage": 9.85, "elapsed_time": "1:44:49", "remaining_time": "15:59:00", "throughput": 559.08, "total_tokens": 3516136}
{"current_steps": 340, "total_steps": 3400, "loss": 0.6939, "lr": 9.931806517013612e-05, "epoch": 0.08756116404841617, "percentage": 10.0, "elapsed_time": "1:46:17", "remaining_time": "15:56:39", "throughput": 559.51, "total_tokens": 3568360}
{"current_steps": 345, "total_steps": 3400, "loss": 0.7158, "lr": 9.927745975766654e-05, "epoch": 0.08884882822559877, "percentage": 10.15, "elapsed_time": "1:47:46", "remaining_time": "15:54:25", "throughput": 559.88, "total_tokens": 3620696}
{"current_steps": 350, "total_steps": 3400, "loss": 0.6932, "lr": 9.923568892600578e-05, "epoch": 0.09013649240278135, "percentage": 10.29, "elapsed_time": "1:49:14", "remaining_time": "15:51:57", "throughput": 560.41, "total_tokens": 3673152}
{"current_steps": 350, "total_steps": 3400, "eval_loss": 0.7044599056243896, "epoch": 0.09013649240278135, "percentage": 10.29, "elapsed_time": "1:49:52", "remaining_time": "15:57:30", "throughput": 557.15, "total_tokens": 3673152}