bigscience-bot
commited on
Commit
•
f949899
1
Parent(s):
d27013f
new data
Browse files- logs/main_log.txt +38 -0
logs/main_log.txt
CHANGED
@@ -66863,3 +66863,41 @@ time (ms)
|
|
66863 |
time (ms)
|
66864 |
[2021-09-25 23:11:05] PULSE: tr8-104B is scheduled to start in 18:13:44 (at 2021-09-26T17:24:50) (1188168 on 'gpu_p13' partition)
|
66865 |
[2021-09-25 23:11:05] PULSE: tr8-104B is running for 18:44:04 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66863 |
time (ms)
|
66864 |
[2021-09-25 23:11:05] PULSE: tr8-104B is scheduled to start in 18:13:44 (at 2021-09-26T17:24:50) (1188168 on 'gpu_p13' partition)
|
66865 |
[2021-09-25 23:11:05] PULSE: tr8-104B is running for 18:44:04 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
66866 |
+
iteration 9580/ 159576 | consumed samples: 656512 | elapsed time per iteration (ms): 19889.3 | learning rate: 6.000E-05 | global batch size: 224 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66867 |
+
time (ms)
|
66868 |
+
iteration 9590/ 159576 | consumed samples: 658752 | elapsed time per iteration (ms): 19672.3 | learning rate: 6.000E-05 | global batch size: 224 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66869 |
+
time (ms)
|
66870 |
+
iteration 9600/ 159576 | consumed samples: 660992 | elapsed time per iteration (ms): 19668.0 | learning rate: 6.000E-05 | global batch size: 224 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66871 |
+
time (ms)
|
66872 |
+
iteration 9610/ 159576 | consumed samples: 663360 | elapsed time per iteration (ms): 20660.1 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66873 |
+
time (ms)
|
66874 |
+
iteration 9620/ 159576 | consumed samples: 665760 | elapsed time per iteration (ms): 20759.5 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66875 |
+
time (ms)
|
66876 |
+
iteration 9630/ 159576 | consumed samples: 668160 | elapsed time per iteration (ms): 20573.3 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66877 |
+
time (ms)
|
66878 |
+
iteration 9640/ 159576 | consumed samples: 670560 | elapsed time per iteration (ms): 21117.4 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66879 |
+
time (ms)
|
66880 |
+
iteration 9650/ 159576 | consumed samples: 672960 | elapsed time per iteration (ms): 21312.3 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66881 |
+
time (ms)
|
66882 |
+
iteration 9660/ 159576 | consumed samples: 675360 | elapsed time per iteration (ms): 20596.0 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66883 |
+
time (ms)
|
66884 |
+
iteration 9670/ 159576 | consumed samples: 677760 | elapsed time per iteration (ms): 20413.4 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66885 |
+
time (ms)
|
66886 |
+
iteration 9680/ 159576 | consumed samples: 680160 | elapsed time per iteration (ms): 20820.1 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66887 |
+
time (ms)
|
66888 |
+
iteration 9690/ 159576 | consumed samples: 682560 | elapsed time per iteration (ms): 20882.2 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66889 |
+
time (ms)
|
66890 |
+
iteration 9700/ 159576 | consumed samples: 684960 | elapsed time per iteration (ms): 21320.0 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66891 |
+
time (ms)
|
66892 |
+
iteration 9710/ 159576 | consumed samples: 687360 | elapsed time per iteration (ms): 20632.6 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66893 |
+
time (ms)
|
66894 |
+
iteration 9720/ 159576 | consumed samples: 689760 | elapsed time per iteration (ms): 20593.0 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66895 |
+
time (ms)
|
66896 |
+
iteration 9730/ 159576 | consumed samples: 692160 | elapsed time per iteration (ms): 21160.0 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66897 |
+
time (ms)
|
66898 |
+
iteration 9740/ 159576 | consumed samples: 694560 | elapsed time per iteration (ms): 20918.8 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66899 |
+
time (ms)
|
66900 |
+
[2021-09-26 00:11:13] PULSE: tr8-104B is scheduled to start in 17:13:36 (at 2021-09-26T17:24:50) (1188168 on 'gpu_p13' partition)
|
66901 |
+
[2021-09-26 00:11:13] PULSE: tr8-104B is running for 19:44:12 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
66902 |
+
iteration 9750/ 159576 | consumed samples: 696960 | elapsed time per iteration (ms): 20828.1 | learning rate: 6.000E-05 | global batch size: 240 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66903 |
+
time (ms)
|