bigscience-bot
commited on
Commit
·
cca1868
1
Parent(s):
95b21d3
new data
Browse files- logs/main_log.txt +33 -0
logs/main_log.txt
CHANGED
@@ -87319,3 +87319,36 @@ time (ms)
|
|
87319 |
iteration 11720/ 159576 | consumed samples: 1380160 | elapsed time per iteration (ms): 22533.1 | learning rate: 6.000E-05 | global batch size: 480 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87320 |
time (ms)
|
87321 |
[2021-09-27 13:27:56] PULSE: tr8-104B is running for 9:33:40 since 2021-09-27T03:54:16 (1188168 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r6i7n[7-8],r7i0n[0-5],r7i1n[7-8],r7i2n[0-1,5,8],r7i3n2,r7i5n7,r7i6n[1-4,8],r7i7n[0-4,6-8],r8i0n[0-8],r8i1n[0-4],r8i2n8,r8i3n[0-3,8],r8i4n[0-1],r8i6n[2-3,5-6],r8i7n[3-8],r9i0n[0-6,8],r9i1n[0-8],r9i2n[0,3-8],r9i3n[0-2,6-8],r9i4n[0-6,8],r9i5n[0-8],r9i6n[0-8],r9i7n[1-8])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87319 |
iteration 11720/ 159576 | consumed samples: 1380160 | elapsed time per iteration (ms): 22533.1 | learning rate: 6.000E-05 | global batch size: 480 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87320 |
time (ms)
|
87321 |
[2021-09-27 13:27:56] PULSE: tr8-104B is running for 9:33:40 since 2021-09-27T03:54:16 (1188168 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r6i7n[7-8],r7i0n[0-5],r7i1n[7-8],r7i2n[0-1,5,8],r7i3n2,r7i5n7,r7i6n[1-4,8],r7i7n[0-4,6-8],r8i0n[0-8],r8i1n[0-4],r8i2n8,r8i3n[0-3,8],r8i4n[0-1],r8i6n[2-3,5-6],r8i7n[3-8],r9i0n[0-6,8],r9i1n[0-8],r9i2n[0,3-8],r9i3n[0-2,6-8],r9i4n[0-6,8],r9i5n[0-8],r9i6n[0-8],r9i7n[1-8])
|
87322 |
+
iteration 11730/ 159576 | consumed samples: 1384960 | elapsed time per iteration (ms): 22192.1 | learning rate: 6.000E-05 | global batch size: 480 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87323 |
+
time (ms)
|
87324 |
+
iteration 11740/ 159576 | consumed samples: 1389760 | elapsed time per iteration (ms): 22268.7 | learning rate: 6.000E-05 | global batch size: 480 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87325 |
+
time (ms)
|
87326 |
+
iteration 11750/ 159576 | consumed samples: 1394560 | elapsed time per iteration (ms): 22268.4 | learning rate: 6.000E-05 | global batch size: 480 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87327 |
+
time (ms)
|
87328 |
+
iteration 11760/ 159576 | consumed samples: 1399360 | elapsed time per iteration (ms): 22141.9 | learning rate: 6.000E-05 | global batch size: 480 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87329 |
+
time (ms)
|
87330 |
+
iteration 11770/ 159576 | consumed samples: 1404160 | elapsed time per iteration (ms): 21979.0 | learning rate: 6.000E-05 | global batch size: 480 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87331 |
+
time (ms)
|
87332 |
+
iteration 11780/ 159576 | consumed samples: 1408960 | elapsed time per iteration (ms): 22172.2 | learning rate: 6.000E-05 | global batch size: 480 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87333 |
+
time (ms)
|
87334 |
+
iteration 11790/ 159576 | consumed samples: 1413760 | elapsed time per iteration (ms): 22335.9 | learning rate: 6.000E-05 | global batch size: 480 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87335 |
+
time (ms)
|
87336 |
+
iteration 11800/ 159576 | consumed samples: 1418592 | elapsed time per iteration (ms): 22588.3 | learning rate: 6.000E-05 | global batch size: 496 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87337 |
+
time (ms)
|
87338 |
+
iteration 11810/ 159576 | consumed samples: 1423552 | elapsed time per iteration (ms): 22823.4 | learning rate: 6.000E-05 | global batch size: 496 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87339 |
+
time (ms)
|
87340 |
+
iteration 11820/ 159576 | consumed samples: 1428512 | elapsed time per iteration (ms): 22959.2 | learning rate: 6.000E-05 | global batch size: 496 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87341 |
+
time (ms)
|
87342 |
+
iteration 11830/ 159576 | consumed samples: 1433472 | elapsed time per iteration (ms): 23080.3 | learning rate: 6.000E-05 | global batch size: 496 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87343 |
+
time (ms)
|
87344 |
+
iteration 11840/ 159576 | consumed samples: 1438432 | elapsed time per iteration (ms): 23034.0 | learning rate: 6.000E-05 | global batch size: 496 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87345 |
+
time (ms)
|
87346 |
+
iteration 11850/ 159576 | consumed samples: 1443392 | elapsed time per iteration (ms): 23099.6 | learning rate: 6.000E-05 | global batch size: 496 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87347 |
+
time (ms)
|
87348 |
+
iteration 11860/ 159576 | consumed samples: 1448352 | elapsed time per iteration (ms): 23031.2 | learning rate: 6.000E-05 | global batch size: 496 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87349 |
+
time (ms)
|
87350 |
+
iteration 11870/ 159576 | consumed samples: 1453312 | elapsed time per iteration (ms): 22866.8 | learning rate: 6.000E-05 | global batch size: 496 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87351 |
+
time (ms)
|
87352 |
+
iteration 11880/ 159576 | consumed samples: 1458272 | elapsed time per iteration (ms): 23007.5 | learning rate: 6.000E-05 | global batch size: 496 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87353 |
+
time (ms)
|
87354 |
+
[2021-09-27 14:27:59] PULSE: tr8-104B is running for 10:33:43 since 2021-09-27T03:54:16 (1188168 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r6i7n[7-8],r7i0n[0-5],r7i1n[7-8],r7i2n[0-1,5,8],r7i3n2,r7i5n7,r7i6n[1-4,8],r7i7n[0-4,6-8],r8i0n[0-8],r8i1n[0-4],r8i2n8,r8i3n[0-3,8],r8i4n[0-1],r8i6n[2-3,5-6],r8i7n[3-8],r9i0n[0-6,8],r9i1n[0-8],r9i2n[0,3-8],r9i3n[0-2,6-8],r9i4n[0-6,8],r9i5n[0-8],r9i6n[0-8],r9i7n[1-8])
|