bigscience-bot
commited on
Commit
•
9e58078
1
Parent(s):
e8acdf0
new data
Browse files- logs/main_log.txt +26 -0
logs/main_log.txt
CHANGED
@@ -66642,3 +66642,29 @@ time (ms)
|
|
66642 |
time (ms)
|
66643 |
[2021-09-25 17:26:58] PULSE: tr8-104B is scheduled to start in 17:59:36 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
|
66644 |
[2021-09-25 17:26:58] PULSE: tr8-104B is running for 12:59:57 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66642 |
time (ms)
|
66643 |
[2021-09-25 17:26:58] PULSE: tr8-104B is scheduled to start in 17:59:36 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
|
66644 |
[2021-09-25 17:26:58] PULSE: tr8-104B is running for 12:59:57 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
66645 |
+
iteration 8580/ 159576 | consumed samples: 461360 | elapsed time per iteration (ms): 22688.8 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.087028E+00 | loss scale: 1024.0 | grad norm: 46974.842 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66646 |
+
time (ms)
|
66647 |
+
iteration 8590/ 159576 | consumed samples: 462960 | elapsed time per iteration (ms): 22699.4 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.089204E+00 | loss scale: 1024.0 | grad norm: 44702.862 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66648 |
+
time (ms)
|
66649 |
+
iteration 8600/ 159576 | consumed samples: 464560 | elapsed time per iteration (ms): 22777.7 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.149306E+00 | loss scale: 1024.0 | grad norm: 261339.801 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66650 |
+
time (ms)
|
66651 |
+
iteration 8610/ 159576 | consumed samples: 466160 | elapsed time per iteration (ms): 22975.5 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.167276E+00 | loss scale: 1024.0 | grad norm: 105455.551 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66652 |
+
time (ms)
|
66653 |
+
iteration 8620/ 159576 | consumed samples: 467760 | elapsed time per iteration (ms): 23048.5 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.078442E+00 | loss scale: 1024.0 | grad norm: 84212.423 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66654 |
+
time (ms)
|
66655 |
+
iteration 8630/ 159576 | consumed samples: 469360 | elapsed time per iteration (ms): 22799.5 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.081234E+00 | loss scale: 1024.0 | grad norm: 52121.419 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66656 |
+
time (ms)
|
66657 |
+
iteration 8640/ 159576 | consumed samples: 470960 | elapsed time per iteration (ms): 22720.5 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.109283E+00 | loss scale: 1024.0 | grad norm: 48651.489 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66658 |
+
time (ms)
|
66659 |
+
iteration 8650/ 159576 | consumed samples: 472560 | elapsed time per iteration (ms): 22695.2 | learning rate: 6.000E-05 | global batch size: 160 | lm loss: 7.118199E+00 | loss scale: 1024.0 | grad norm: 26046.891 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66660 |
+
time (ms)
|
66661 |
+
iteration 8660/ 159576 | consumed samples: 474320 | elapsed time per iteration (ms): 23933.5 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.064212E+00 | loss scale: 1024.0 | grad norm: 40523.058 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66662 |
+
time (ms)
|
66663 |
+
iteration 8670/ 159576 | consumed samples: 476080 | elapsed time per iteration (ms): 23798.1 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.051229E+00 | loss scale: 1024.0 | grad norm: 28160.238 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66664 |
+
time (ms)
|
66665 |
+
iteration 8680/ 159576 | consumed samples: 477840 | elapsed time per iteration (ms): 23923.9 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.036906E+00 | loss scale: 1024.0 | grad norm: 51047.866 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66666 |
+
time (ms)
|
66667 |
+
iteration 8690/ 159576 | consumed samples: 479600 | elapsed time per iteration (ms): 23651.1 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.073657E+00 | loss scale: 1024.0 | grad norm: 141610.865 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66668 |
+
time (ms)
|
66669 |
+
[2021-09-25 18:10:35] PULSE: tr8-104B is scheduled to start in 17:15:59 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
|
66670 |
+
[2021-09-25 18:10:35] PULSE: tr8-104B is running for 13:43:34 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|