bigscience-bot
commited on
Commit
•
93bfcf3
1
Parent(s):
dc319f1
new data
Browse files- logs/main_log.txt +37 -0
logs/main_log.txt
CHANGED
@@ -66595,3 +66595,40 @@ time (ms)
|
|
66595 |
[2021-09-25 16:10:22] PULSE: tr8-104B is scheduled to start in 19:16:12 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
|
66596 |
[2021-09-25 16:10:22] PULSE: tr8-104B is waiting for the previous Job Array job to finish before scheduling a new one (1185639_[2-10%1] on 'gpu_p13' partition)
|
66597 |
[2021-09-25 16:10:22] PULSE: tr8-104B is running for 11:43:21 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66595 |
[2021-09-25 16:10:22] PULSE: tr8-104B is scheduled to start in 19:16:12 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
|
66596 |
[2021-09-25 16:10:22] PULSE: tr8-104B is waiting for the previous Job Array job to finish before scheduling a new one (1185639_[2-10%1] on 'gpu_p13' partition)
|
66597 |
[2021-09-25 16:10:22] PULSE: tr8-104B is running for 11:43:21 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
66598 |
+
iteration 8380/ 159576 | consumed samples: 429360 | elapsed time per iteration (ms): 22974.1 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.025337E+00 | loss scale: 2048.0 | grad norm: 89725.468 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66599 |
+
time (ms)
|
66600 |
+
iteration 8390/ 159576 | consumed samples: 430960 | elapsed time per iteration (ms): 22266.9 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.010270E+00 | loss scale: 1024.0 | grad norm: 33629.138 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66601 |
+
time (ms)
|
66602 |
+
iteration 8400/ 159576 | consumed samples: 432560 | elapsed time per iteration (ms): 22964.2 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.020833E+00 | loss scale: 1024.0 | grad norm: 46812.316 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66603 |
+
time (ms)
|
66604 |
+
iteration 8410/ 159576 | consumed samples: 434160 | elapsed time per iteration (ms): 22923.5 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.044554E+00 | loss scale: 1024.0 | grad norm: 55335.802 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66605 |
+
time (ms)
|
66606 |
+
iteration 8420/ 159576 | consumed samples: 435760 | elapsed time per iteration (ms): 22690.3 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.074860E+00 | loss scale: 1024.0 | grad norm: 27018.225 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66607 |
+
time (ms)
|
66608 |
+
iteration 8430/ 159576 | consumed samples: 437360 | elapsed time per iteration (ms): 22997.6 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.108445E+00 | loss scale: 1024.0 | grad norm: 95058.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66609 |
+
time (ms)
|
66610 |
+
iteration 8440/ 159576 | consumed samples: 438960 | elapsed time per iteration (ms): 22696.4 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.128921E+00 | loss scale: 1024.0 | grad norm: 44470.175 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66611 |
+
time (ms)
|
66612 |
+
iteration 8450/ 159576 | consumed samples: 440560 | elapsed time per iteration (ms): 22728.4 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.037349E+00 | loss scale: 1024.0 | grad norm: 32995.810 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66613 |
+
time (ms)
|
66614 |
+
iteration 8460/ 159576 | consumed samples: 442160 | elapsed time per iteration (ms): 22856.0 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.064864E+00 | loss scale: 1024.0 | grad norm: 23093.772 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66615 |
+
time (ms)
|
66616 |
+
iteration 8470/ 159576 | consumed samples: 443760 | elapsed time per iteration (ms): 22824.5 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.057752E+00 | loss scale: 1024.0 | grad norm: 34580.324 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66617 |
+
time (ms)
|
66618 |
+
iteration 8480/ 159576 | consumed samples: 445360 | elapsed time per iteration (ms): 22939.9 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.111783E+00 | loss scale: 1024.0 | grad norm: 30415.135 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66619 |
+
time (ms)
|
66620 |
+
iteration 8490/ 159576 | consumed samples: 446960 | elapsed time per iteration (ms): 22647.3 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.077787E+00 | loss scale: 1024.0 | grad norm: 44228.518 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66621 |
+
time (ms)
|
66622 |
+
iteration 8500/ 159576 | consumed samples: 448560 | elapsed time per iteration (ms): 22870.1 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.017307E+00 | loss scale: 1024.0 | grad norm: 31106.331 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66623 |
+
time (ms)
|
66624 |
+
[2021-09-25 17:00:02] PULSE: tr8-104B is scheduled to start in 18:26:32 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
|
66625 |
+
[2021-09-25 17:00:02] PULSE: tr8-104B is waiting for the previous Job Array job to finish before scheduling a new one (1185639_[2-10%1] on 'gpu_p13' partition)
|
66626 |
+
[2021-09-25 17:00:02] PULSE: tr8-104B is running for 12:33:01 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
66627 |
+
iteration 8510/ 159576 | consumed samples: 450160 | elapsed time per iteration (ms): 22836.1 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.033496E+00 | loss scale: 1024.0 | grad norm: 84589.712 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66628 |
+
time (ms)
|
66629 |
+
iteration 8520/ 159576 | consumed samples: 451760 | elapsed time per iteration (ms): 22678.6 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.034415E+00 | loss scale: 1024.0 | grad norm: 45889.295 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66630 |
+
time (ms)
|
66631 |
+
iteration 8530/ 159576 | consumed samples: 453360 | elapsed time per iteration (ms): 22820.3 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.022775E+00 | loss scale: 1024.0 | grad norm: 46421.613 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66632 |
+
time (ms)
|
66633 |
+
[2021-09-25 17:10:31] PULSE: tr8-104B is scheduled to start in 18:16:03 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
|
66634 |
+
[2021-09-25 17:10:31] PULSE: tr8-104B is running for 12:43:30 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|