bigscience-bot
commited on
Commit
•
7a5a8b2
1
Parent(s):
28b1582
new data
Browse files- logs/main_log.txt +22 -0
logs/main_log.txt
CHANGED
@@ -66668,3 +66668,25 @@ time (ms)
|
|
66668 |
time (ms)
|
66669 |
[2021-09-25 18:10:35] PULSE: tr8-104B is scheduled to start in 17:15:59 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
|
66670 |
[2021-09-25 18:10:35] PULSE: tr8-104B is running for 13:43:34 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66668 |
time (ms)
|
66669 |
[2021-09-25 18:10:35] PULSE: tr8-104B is scheduled to start in 17:15:59 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
|
66670 |
[2021-09-25 18:10:35] PULSE: tr8-104B is running for 13:43:34 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
66671 |
+
iteration 8700/ 159576 | consumed samples: 481360 | elapsed time per iteration (ms): 23943.4 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.071510E+00 | loss scale: 1024.0 | grad norm: 24381.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66672 |
+
time (ms)
|
66673 |
+
iteration 8710/ 159576 | consumed samples: 483120 | elapsed time per iteration (ms): 23910.3 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.190697E+00 | loss scale: 1024.0 | grad norm: 41525.807 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66674 |
+
time (ms)
|
66675 |
+
iteration 8720/ 159576 | consumed samples: 484880 | elapsed time per iteration (ms): 23923.5 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.332158E+00 | loss scale: 1024.0 | grad norm: 23580.074 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66676 |
+
time (ms)
|
66677 |
+
iteration 8730/ 159576 | consumed samples: 486640 | elapsed time per iteration (ms): 23664.9 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.250137E+00 | loss scale: 1024.0 | grad norm: 33934.114 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66678 |
+
time (ms)
|
66679 |
+
iteration 8740/ 159576 | consumed samples: 488400 | elapsed time per iteration (ms): 24002.8 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.134158E+00 | loss scale: 1024.0 | grad norm: 18917.778 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66680 |
+
time (ms)
|
66681 |
+
iteration 8750/ 159576 | consumed samples: 490160 | elapsed time per iteration (ms): 23812.9 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.133132E+00 | loss scale: 1024.0 | grad norm: 24524.875 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66682 |
+
time (ms)
|
66683 |
+
iteration 8760/ 159576 | consumed samples: 491920 | elapsed time per iteration (ms): 24164.0 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.089709E+00 | loss scale: 1024.0 | grad norm: 18466.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66684 |
+
time (ms)
|
66685 |
+
iteration 8770/ 159576 | consumed samples: 493680 | elapsed time per iteration (ms): 23763.0 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.075866E+00 | loss scale: 1024.0 | grad norm: 21160.208 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66686 |
+
time (ms)
|
66687 |
+
iteration 8780/ 159576 | consumed samples: 495440 | elapsed time per iteration (ms): 23757.0 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.105405E+00 | loss scale: 1024.0 | grad norm: 21012.399 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66688 |
+
time (ms)
|
66689 |
+
iteration 8790/ 159576 | consumed samples: 497200 | elapsed time per iteration (ms): 23726.0 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.119524E+00 | loss scale: 1024.0 | grad norm: 19184.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66690 |
+
time (ms)
|
66691 |
+
[2021-09-25 18:51:17] PULSE: tr8-104B is scheduled to start in 19:55:07 (at 2021-09-26T14:46:25) (1188168 on 'gpu_p13' partition)
|
66692 |
+
[2021-09-25 18:51:17] PULSE: tr8-104B is running for 14:24:16 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|