bigscience-bot
commited on
Commit
•
dd20197
1
Parent(s):
c67c5d3
new data
Browse files- logs/main_log.txt +49 -0
logs/main_log.txt
CHANGED
@@ -86978,3 +86978,52 @@ time (ms)
|
|
86978 |
iteration 10090/ 159576 | consumed samples: 784912 | elapsed time per iteration (ms): 15060.9 | learning rate: 6.000E-05 | global batch size: 272 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86979 |
time (ms)
|
86980 |
[2021-09-27 05:14:32] PULSE: tr8-104B is running for 1:20:16 since 2021-09-27T03:54:16 (1188168 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r6i7n[7-8],r7i0n[0-5],r7i1n[7-8],r7i2n[0-1,5,8],r7i3n2,r7i5n7,r7i6n[1-4,8],r7i7n[0-4,6-8],r8i0n[0-8],r8i1n[0-4],r8i2n8,r8i3n[0-3,8],r8i4n[0-1],r8i6n[2-3,5-6],r8i7n[3-8],r9i0n[0-6,8],r9i1n[0-8],r9i2n[0,3-8],r9i3n[0-2,6-8],r9i4n[0-6,8],r9i5n[0-8],r9i6n[0-8],r9i7n[1-8])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86978 |
iteration 10090/ 159576 | consumed samples: 784912 | elapsed time per iteration (ms): 15060.9 | learning rate: 6.000E-05 | global batch size: 272 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86979 |
time (ms)
|
86980 |
[2021-09-27 05:14:32] PULSE: tr8-104B is running for 1:20:16 since 2021-09-27T03:54:16 (1188168 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r6i7n[7-8],r7i0n[0-5],r7i1n[7-8],r7i2n[0-1,5,8],r7i3n2,r7i5n7,r7i6n[1-4,8],r7i7n[0-4,6-8],r8i0n[0-8],r8i1n[0-4],r8i2n8,r8i3n[0-3,8],r8i4n[0-1],r8i6n[2-3,5-6],r8i7n[3-8],r9i0n[0-6,8],r9i1n[0-8],r9i2n[0,3-8],r9i3n[0-2,6-8],r9i4n[0-6,8],r9i5n[0-8],r9i6n[0-8],r9i7n[1-8])
|
86981 |
+
iteration 10100/ 159576 | consumed samples: 787632 | elapsed time per iteration (ms): 14624.0 | learning rate: 6.000E-05 | global batch size: 272 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86982 |
+
time (ms)
|
86983 |
+
iteration 10110/ 159576 | consumed samples: 790352 | elapsed time per iteration (ms): 14621.7 | learning rate: 6.000E-05 | global batch size: 272 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86984 |
+
time (ms)
|
86985 |
+
iteration 10120/ 159576 | consumed samples: 793072 | elapsed time per iteration (ms): 14685.1 | learning rate: 6.000E-05 | global batch size: 272 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86986 |
+
time (ms)
|
86987 |
+
iteration 10130/ 159576 | consumed samples: 795792 | elapsed time per iteration (ms): 14531.8 | learning rate: 6.000E-05 | global batch size: 272 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86988 |
+
time (ms)
|
86989 |
+
iteration 10140/ 159576 | consumed samples: 798512 | elapsed time per iteration (ms): 14629.6 | learning rate: 6.000E-05 | global batch size: 272 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86990 |
+
time (ms)
|
86991 |
+
iteration 10150/ 159576 | consumed samples: 801232 | elapsed time per iteration (ms): 14771.8 | learning rate: 6.000E-05 | global batch size: 272 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86992 |
+
time (ms)
|
86993 |
+
iteration 10160/ 159576 | consumed samples: 803984 | elapsed time per iteration (ms): 14889.9 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86994 |
+
time (ms)
|
86995 |
+
iteration 10170/ 159576 | consumed samples: 806864 | elapsed time per iteration (ms): 15471.9 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86996 |
+
time (ms)
|
86997 |
+
iteration 10180/ 159576 | consumed samples: 809744 | elapsed time per iteration (ms): 15228.6 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
86998 |
+
time (ms)
|
86999 |
+
iteration 10190/ 159576 | consumed samples: 812624 | elapsed time per iteration (ms): 15425.1 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87000 |
+
time (ms)
|
87001 |
+
iteration 10200/ 159576 | consumed samples: 815504 | elapsed time per iteration (ms): 15390.8 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87002 |
+
time (ms)
|
87003 |
+
iteration 10210/ 159576 | consumed samples: 818384 | elapsed time per iteration (ms): 15293.9 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87004 |
+
time (ms)
|
87005 |
+
iteration 10220/ 159576 | consumed samples: 821264 | elapsed time per iteration (ms): 15259.9 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87006 |
+
time (ms)
|
87007 |
+
iteration 10230/ 159576 | consumed samples: 824144 | elapsed time per iteration (ms): 15547.4 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87008 |
+
time (ms)
|
87009 |
+
iteration 10240/ 159576 | consumed samples: 827024 | elapsed time per iteration (ms): 15375.5 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87010 |
+
time (ms)
|
87011 |
+
iteration 10250/ 159576 | consumed samples: 829904 | elapsed time per iteration (ms): 15322.8 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87012 |
+
time (ms)
|
87013 |
+
iteration 10260/ 159576 | consumed samples: 832784 | elapsed time per iteration (ms): 15280.3 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87014 |
+
time (ms)
|
87015 |
+
iteration 10270/ 159576 | consumed samples: 835664 | elapsed time per iteration (ms): 15390.4 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87016 |
+
time (ms)
|
87017 |
+
iteration 10280/ 159576 | consumed samples: 838544 | elapsed time per iteration (ms): 15339.6 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87018 |
+
time (ms)
|
87019 |
+
iteration 10290/ 159576 | consumed samples: 841424 | elapsed time per iteration (ms): 15252.5 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87020 |
+
time (ms)
|
87021 |
+
iteration 10300/ 159576 | consumed samples: 844304 | elapsed time per iteration (ms): 15146.5 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87022 |
+
time (ms)
|
87023 |
+
iteration 10310/ 159576 | consumed samples: 847184 | elapsed time per iteration (ms): 15389.7 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87024 |
+
time (ms)
|
87025 |
+
iteration 10320/ 159576 | consumed samples: 850064 | elapsed time per iteration (ms): 15348.5 | learning rate: 6.000E-05 | global batch size: 288 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87026 |
+
time (ms)
|
87027 |
+
iteration 10330/ 159576 | consumed samples: 853072 | elapsed time per iteration (ms): 15779.0 | learning rate: 6.000E-05 | global batch size: 304 | loss scale: 1.0 | grad norm: 0.000 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
87028 |
+
time (ms)
|
87029 |
+
[2021-09-27 06:14:35] PULSE: tr8-104B is running for 2:20:19 since 2021-09-27T03:54:16 (1188168 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r6i7n[7-8],r7i0n[0-5],r7i1n[7-8],r7i2n[0-1,5,8],r7i3n2,r7i5n7,r7i6n[1-4,8],r7i7n[0-4,6-8],r8i0n[0-8],r8i1n[0-4],r8i2n8,r8i3n[0-3,8],r8i4n[0-1],r8i6n[2-3,5-6],r8i7n[3-8],r9i0n[0-6,8],r9i1n[0-8],r9i2n[0,3-8],r9i3n[0-2,6-8],r9i4n[0-6,8],r9i5n[0-8],r9i6n[0-8],r9i7n[1-8])
|