bigscience-bot
commited on
Commit
•
d2b160b
1
Parent(s):
936222e
new data
Browse files- logs/main_log.txt +32 -0
logs/main_log.txt
CHANGED
@@ -66702,3 +66702,35 @@ time (ms)
|
|
66702 |
time (ms)
|
66703 |
[2021-09-25 19:10:38] PULSE: tr8-104B is scheduled to start in 19:35:46 (at 2021-09-26T14:46:25) (1188168 on 'gpu_p13' partition)
|
66704 |
[2021-09-25 19:10:38] PULSE: tr8-104B is running for 14:43:37 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66702 |
time (ms)
|
66703 |
[2021-09-25 19:10:38] PULSE: tr8-104B is scheduled to start in 19:35:46 (at 2021-09-26T14:46:25) (1188168 on 'gpu_p13' partition)
|
66704 |
[2021-09-25 19:10:38] PULSE: tr8-104B is running for 14:43:37 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|
66705 |
+
iteration 8850/ 159576 | consumed samples: 507760 | elapsed time per iteration (ms): 23681.3 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.106478E+00 | loss scale: 1024.0 | grad norm: 15650.558 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66706 |
+
time (ms)
|
66707 |
+
iteration 8860/ 159576 | consumed samples: 509520 | elapsed time per iteration (ms): 23830.0 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.077826E+00 | loss scale: 1024.0 | grad norm: 13271.961 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66708 |
+
time (ms)
|
66709 |
+
iteration 8870/ 159576 | consumed samples: 511280 | elapsed time per iteration (ms): 23830.3 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.083195E+00 | loss scale: 1024.0 | grad norm: 13942.816 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66710 |
+
time (ms)
|
66711 |
+
iteration 8880/ 159576 | consumed samples: 513040 | elapsed time per iteration (ms): 23893.7 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.101151E+00 | loss scale: 1024.0 | grad norm: 17666.067 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66712 |
+
time (ms)
|
66713 |
+
iteration 8890/ 159576 | consumed samples: 514800 | elapsed time per iteration (ms): 23733.4 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.130984E+00 | loss scale: 2048.0 | grad norm: 41179.422 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66714 |
+
time (ms)
|
66715 |
+
iteration 8900/ 159576 | consumed samples: 516560 | elapsed time per iteration (ms): 23693.0 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.084023E+00 | loss scale: 2048.0 | grad norm: 32703.102 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66716 |
+
time (ms)
|
66717 |
+
iteration 8910/ 159576 | consumed samples: 518320 | elapsed time per iteration (ms): 23793.1 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.094463E+00 | loss scale: 2048.0 | grad norm: 46954.552 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66718 |
+
time (ms)
|
66719 |
+
iteration 8920/ 159576 | consumed samples: 520112 | elapsed time per iteration (ms): 23988.6 | learning rate: 6.000E-05 | global batch size: 192 | lm loss: 7.094890E+00 | loss scale: 2048.0 | grad norm: 20910.711 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66720 |
+
time (ms)
|
66721 |
+
iteration 8930/ 159576 | consumed samples: 522032 | elapsed time per iteration (ms): 24780.5 | learning rate: 6.000E-05 | global batch size: 192 | lm loss: 7.112840E+00 | loss scale: 2048.0 | grad norm: 23723.304 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66722 |
+
time (ms)
|
66723 |
+
iteration 8940/ 159576 | consumed samples: 523952 | elapsed time per iteration (ms): 24880.9 | learning rate: 6.000E-05 | global batch size: 192 | lm loss: 7.157214E+00 | loss scale: 2048.0 | grad norm: 35769.072 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66724 |
+
time (ms)
|
66725 |
+
iteration 8950/ 159576 | consumed samples: 525872 | elapsed time per iteration (ms): 24820.3 | learning rate: 6.000E-05 | global batch size: 192 | lm loss: 7.212303E+00 | loss scale: 2048.0 | grad norm: 20241.796 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66726 |
+
time (ms)
|
66727 |
+
iteration 8960/ 159576 | consumed samples: 527792 | elapsed time per iteration (ms): 24706.7 | learning rate: 6.000E-05 | global batch size: 192 | lm loss: 7.215181E+00 | loss scale: 2048.0 | grad norm: 48969.302 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66728 |
+
time (ms)
|
66729 |
+
iteration 8970/ 159576 | consumed samples: 529712 | elapsed time per iteration (ms): 23528.3 | learning rate: 6.000E-05 | global batch size: 192 | loss scale: 1024.0 | grad norm: 156762.139 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66730 |
+
time (ms)
|
66731 |
+
iteration 8980/ 159576 | consumed samples: 531632 | elapsed time per iteration (ms): 18302.5 | learning rate: 6.000E-05 | global batch size: 192 | loss scale: 2.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66732 |
+
time (ms)
|
66733 |
+
iteration 8990/ 159576 | consumed samples: 533552 | elapsed time per iteration (ms): 17645.0 | learning rate: 6.000E-05 | global batch size: 192 | loss scale: 1.0 | grad norm: 5927.930 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
66734 |
+
time (ms)
|
66735 |
+
[2021-09-25 20:10:52] PULSE: tr8-104B is scheduled to start in 18:35:32 (at 2021-09-26T14:46:25) (1188168 on 'gpu_p13' partition)
|
66736 |
+
[2021-09-25 20:10:52] PULSE: tr8-104B is running for 15:43:51 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
|