bigscience-bot
commited on
Commit
•
c2fb4a4
1
Parent(s):
03a2338
new data
Browse files- logs/main_log.txt +28 -0
logs/main_log.txt
CHANGED
@@ -87421,3 +87421,31 @@ time (ms)
|
|
87421 |
iteration 12170/ 159576 | consumed samples: 1611152 | elapsed time per iteration (ms): 24994.1 | learning rate: 6.000E-05 | global batch size: 560 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87422 |
time (ms)
|
87423 |
[2021-09-27 16:28:40] PULSE: tr8-104B is running for 12:34:24 since 2021-09-27T03:54:16 (1188168 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r6i7n[7-8],r7i0n[0-5],r7i1n[7-8],r7i2n[0-1,5,8],r7i3n2,r7i5n7,r7i6n[1-4,8],r7i7n[0-4,6-8],r8i0n[0-8],r8i1n[0-4],r8i2n8,r8i3n[0-3,8],r8i4n[0-1],r8i6n[2-3,5-6],r8i7n[3-8],r9i0n[0-6,8],r9i1n[0-8],r9i2n[0,3-8],r9i3n[0-2,6-8],r9i4n[0-6,8],r9i5n[0-8],r9i6n[0-8],r9i7n[1-8])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87421 |
iteration 12170/ 159576 | consumed samples: 1611152 | elapsed time per iteration (ms): 24994.1 | learning rate: 6.000E-05 | global batch size: 560 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87422 |
time (ms)
|
87423 |
[2021-09-27 16:28:40] PULSE: tr8-104B is running for 12:34:24 since 2021-09-27T03:54:16 (1188168 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r6i7n[7-8],r7i0n[0-5],r7i1n[7-8],r7i2n[0-1,5,8],r7i3n2,r7i5n7,r7i6n[1-4,8],r7i7n[0-4,6-8],r8i0n[0-8],r8i1n[0-4],r8i2n8,r8i3n[0-3,8],r8i4n[0-1],r8i6n[2-3,5-6],r8i7n[3-8],r9i0n[0-6,8],r9i1n[0-8],r9i2n[0,3-8],r9i3n[0-2,6-8],r9i4n[0-6,8],r9i5n[0-8],r9i6n[0-8],r9i7n[1-8])
|
87424 |
+
iteration 12180/ 159576 | consumed samples: 1616752 | elapsed time per iteration (ms): 25275.1 | learning rate: 6.000E-05 | global batch size: 560 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87425 |
+
time (ms)
|
87426 |
+
iteration 12190/ 159576 | consumed samples: 1622352 | elapsed time per iteration (ms): 25176.8 | learning rate: 6.000E-05 | global batch size: 560 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87427 |
+
time (ms)
|
87428 |
+
iteration 12200/ 159576 | consumed samples: 1627952 | elapsed time per iteration (ms): 25167.8 | learning rate: 6.000E-05 | global batch size: 560 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87429 |
+
time (ms)
|
87430 |
+
iteration 12210/ 159576 | consumed samples: 1633552 | elapsed time per iteration (ms): 25057.7 | learning rate: 6.000E-05 | global batch size: 560 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87431 |
+
time (ms)
|
87432 |
+
iteration 12220/ 159576 | consumed samples: 1639152 | elapsed time per iteration (ms): 25147.4 | learning rate: 6.000E-05 | global batch size: 560 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87433 |
+
time (ms)
|
87434 |
+
iteration 12230/ 159576 | consumed samples: 1644752 | elapsed time per iteration (ms): 25198.7 | learning rate: 6.000E-05 | global batch size: 560 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87435 |
+
time (ms)
|
87436 |
+
iteration 12240/ 159576 | consumed samples: 1650352 | elapsed time per iteration (ms): 24894.2 | learning rate: 6.000E-05 | global batch size: 560 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87437 |
+
time (ms)
|
87438 |
+
iteration 12250/ 159576 | consumed samples: 1656016 | elapsed time per iteration (ms): 25306.4 | learning rate: 6.000E-05 | global batch size: 576 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87439 |
+
time (ms)
|
87440 |
+
iteration 12260/ 159576 | consumed samples: 1661776 | elapsed time per iteration (ms): 25946.7 | learning rate: 6.000E-05 | global batch size: 576 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87441 |
+
time (ms)
|
87442 |
+
iteration 12270/ 159576 | consumed samples: 1667536 | elapsed time per iteration (ms): 25714.3 | learning rate: 6.000E-05 | global batch size: 576 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87443 |
+
time (ms)
|
87444 |
+
iteration 12280/ 159576 | consumed samples: 1673296 | elapsed time per iteration (ms): 25863.6 | learning rate: 6.000E-05 | global batch size: 576 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87445 |
+
time (ms)
|
87446 |
+
iteration 12290/ 159576 | consumed samples: 1679056 | elapsed time per iteration (ms): 26038.1 | learning rate: 6.000E-05 | global batch size: 576 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87447 |
+
time (ms)
|
87448 |
+
iteration 12300/ 159576 | consumed samples: 1684816 | elapsed time per iteration (ms): 25611.4 | learning rate: 6.000E-05 | global batch size: 576 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87449 |
+
time (ms)
|
87450 |
+
iteration 12310/ 159576 | consumed samples: 1690576 | elapsed time per iteration (ms): 25819.3 | learning rate: 6.000E-05 | global batch size: 576 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87451 |
+
time (ms)
|