bigscience-bot
commited on
Commit
•
3e1df60
1
Parent(s):
0aa8dcf
new data
Browse files- logs/main_log.txt +46 -0
logs/main_log.txt
CHANGED
@@ -66180,3 +66180,49 @@ time (ms)
|
|
66180 |
time (ms)
|
66181 |
[2021-09-25 06:08:34] PULSE: tr8-104B is waiting for the previous Job Array job to finish before scheduling a new one (1185639_[2-10%1] on 'gpu_p13' partition)
|
66182 |
[2021-09-25 06:08:34] PULSE: tr8-104B is running for 1:41:33 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66180 |
time (ms)
|
66181 |
[2021-09-25 06:08:34] PULSE: tr8-104B is waiting for the previous Job Array job to finish before scheduling a new one (1185639_[2-10%1] on 'gpu_p13' partition)
|
66182 |
[2021-09-25 06:08:34] PULSE: tr8-104B is running for 1:41:33 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
66183 |
+
iteration 6560/ 159576 | consumed samples: 221600 | elapsed time per iteration (ms): 17470.3 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.312242E+00 | loss scale: 2048.0 | grad norm: 58830.808 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66184 |
+
time (ms)
|
66185 |
+
iteration 6570/ 159576 | consumed samples: 222400 | elapsed time per iteration (ms): 17497.8 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.305868E+00 | loss scale: 2048.0 | grad norm: 95845.470 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66186 |
+
time (ms)
|
66187 |
+
iteration 6580/ 159576 | consumed samples: 223200 | elapsed time per iteration (ms): 17465.4 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.323441E+00 | loss scale: 2048.0 | grad norm: 67257.778 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66188 |
+
time (ms)
|
66189 |
+
iteration 6590/ 159576 | consumed samples: 224000 | elapsed time per iteration (ms): 17539.4 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.324122E+00 | loss scale: 2048.0 | grad norm: 68019.685 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66190 |
+
time (ms)
|
66191 |
+
iteration 6600/ 159576 | consumed samples: 224800 | elapsed time per iteration (ms): 17523.7 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.367977E+00 | loss scale: 2048.0 | grad norm: 72056.426 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66192 |
+
time (ms)
|
66193 |
+
iteration 6610/ 159576 | consumed samples: 225600 | elapsed time per iteration (ms): 17492.9 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.308113E+00 | loss scale: 2048.0 | grad norm: 149731.321 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66194 |
+
time (ms)
|
66195 |
+
iteration 6620/ 159576 | consumed samples: 226400 | elapsed time per iteration (ms): 17537.3 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.354418E+00 | loss scale: 2048.0 | grad norm: 62412.313 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66196 |
+
time (ms)
|
66197 |
+
iteration 6630/ 159576 | consumed samples: 227200 | elapsed time per iteration (ms): 17517.5 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.357222E+00 | loss scale: 2048.0 | grad norm: 85289.584 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66198 |
+
time (ms)
|
66199 |
+
iteration 6640/ 159576 | consumed samples: 228000 | elapsed time per iteration (ms): 17515.1 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.340989E+00 | loss scale: 2048.0 | grad norm: 56974.928 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66200 |
+
time (ms)
|
66201 |
+
iteration 6650/ 159576 | consumed samples: 228800 | elapsed time per iteration (ms): 17504.4 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.343948E+00 | loss scale: 2048.0 | grad norm: 94205.551 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66202 |
+
time (ms)
|
66203 |
+
iteration 6660/ 159576 | consumed samples: 229600 | elapsed time per iteration (ms): 17528.5 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.349052E+00 | loss scale: 2048.0 | grad norm: 59116.810 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66204 |
+
time (ms)
|
66205 |
+
iteration 6670/ 159576 | consumed samples: 230400 | elapsed time per iteration (ms): 17539.0 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.319823E+00 | loss scale: 2048.0 | grad norm: 89145.444 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66206 |
+
time (ms)
|
66207 |
+
iteration 6680/ 159576 | consumed samples: 231200 | elapsed time per iteration (ms): 17492.6 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.322467E+00 | loss scale: 2048.0 | grad norm: 79513.773 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66208 |
+
time (ms)
|
66209 |
+
iteration 6690/ 159576 | consumed samples: 232000 | elapsed time per iteration (ms): 17427.8 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.351400E+00 | loss scale: 2048.0 | grad norm: 80270.152 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66210 |
+
time (ms)
|
66211 |
+
iteration 6700/ 159576 | consumed samples: 232800 | elapsed time per iteration (ms): 17427.9 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.321815E+00 | loss scale: 2048.0 | grad norm: 89875.557 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66212 |
+
time (ms)
|
66213 |
+
iteration 6710/ 159576 | consumed samples: 233600 | elapsed time per iteration (ms): 17478.2 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.318744E+00 | loss scale: 2048.0 | grad norm: 75317.404 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66214 |
+
time (ms)
|
66215 |
+
[2021-09-25 06:55:50] PULSE: tr8-104B is scheduled to start in 1 day, 10:16:13 (at 2021-09-26T17:12:04) (1188168 on 'gpu_p13' partition)
|
66216 |
+
[2021-09-25 06:55:50] PULSE: tr8-104B is waiting for the previous Job Array job to finish before scheduling a new one (1185639_[2-10%1] on 'gpu_p13' partition)
|
66217 |
+
[2021-09-25 06:55:50] PULSE: tr8-104B is running for 2:28:49 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
66218 |
+
iteration 6720/ 159576 | consumed samples: 234400 | elapsed time per iteration (ms): 17509.5 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.297193E+00 | loss scale: 2048.0 | grad norm: 136372.702 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66219 |
+
time (ms)
|
66220 |
+
iteration 6730/ 159576 | consumed samples: 235200 | elapsed time per iteration (ms): 17514.2 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.303332E+00 | loss scale: 2048.0 | grad norm: 84302.661 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66221 |
+
time (ms)
|
66222 |
+
iteration 6740/ 159576 | consumed samples: 236000 | elapsed time per iteration (ms): 17530.2 | learning rate: 6.000E-05 | global batch size: 80 | lm loss: 6.327809E+00 | loss scale: 2048.0 | grad norm: 84736.807 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66223 |
+
time (ms)
|
66224 |
+
iteration 6750/ 159576 | consumed samples: 236912 | elapsed time per iteration (ms): 18323.3 | learning rate: 6.000E-05 | global batch size: 96 | lm loss: 6.320579E+00 | loss scale: 2048.0 | grad norm: 68855.991 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66225 |
+
time (ms)
|
66226 |
+
[2021-09-25 07:08:59] PULSE: tr8-104B is scheduled to start in 19:13:17 (at 2021-09-26T02:22:17) (1188168 on 'gpu_p13' partition)
|
66227 |
+
[2021-09-25 07:08:59] PULSE: tr8-104B is waiting for the previous Job Array job to finish before scheduling a new one (1185639_[2-10%1] on 'gpu_p13' partition)
|
66228 |
+
[2021-09-25 07:08:59] PULSE: tr8-104B is running for 2:41:58 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|