bigscience-bot
commited on
Commit
•
0c71f2e
1
Parent(s):
a1727b9
new data
Browse files- logs/main_log.txt +20 -0
logs/main_log.txt
CHANGED
@@ -99407,3 +99407,23 @@ time (ms)
|
|
99407 |
time (ms)
|
99408 |
iteration 6240/ 159576 | consumed samples: 196000 | elapsed time per iteration (ms): 17585.3 | learning rate: 5.423E-05 | global batch size: 80 | lm loss: 6.313773E+00 | loss scale: 4096.0 | grad norm: 104488.785 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99409 |
time (ms)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99407 |
time (ms)
|
99408 |
iteration 6240/ 159576 | consumed samples: 196000 | elapsed time per iteration (ms): 17585.3 | learning rate: 5.423E-05 | global batch size: 80 | lm loss: 6.313773E+00 | loss scale: 4096.0 | grad norm: 104488.785 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99409 |
time (ms)
|
99410 |
+
iteration 6250/ 159576 | consumed samples: 196800 | elapsed time per iteration (ms): 17683.9 | learning rate: 5.445E-05 | global batch size: 80 | lm loss: 6.302388E+00 | loss scale: 4096.0 | grad norm: 99404.120 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99411 |
+
time (ms)
|
99412 |
+
iteration 6260/ 159576 | consumed samples: 197600 | elapsed time per iteration (ms): 17834.3 | learning rate: 5.467E-05 | global batch size: 80 | lm loss: 6.322264E+00 | loss scale: 4096.0 | grad norm: 134601.608 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99413 |
+
time (ms)
|
99414 |
+
iteration 6270/ 159576 | consumed samples: 198400 | elapsed time per iteration (ms): 17647.5 | learning rate: 5.489E-05 | global batch size: 80 | lm loss: 6.319476E+00 | loss scale: 4096.0 | grad norm: 142879.794 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99415 |
+
time (ms)
|
99416 |
+
iteration 6280/ 159576 | consumed samples: 199200 | elapsed time per iteration (ms): 17607.4 | learning rate: 5.511E-05 | global batch size: 80 | lm loss: 6.321982E+00 | loss scale: 4096.0 | grad norm: 114136.314 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99417 |
+
time (ms)
|
99418 |
+
iteration 6290/ 159576 | consumed samples: 200000 | elapsed time per iteration (ms): 17636.6 | learning rate: 5.534E-05 | global batch size: 80 | lm loss: 6.272703E+00 | loss scale: 4096.0 | grad norm: 101011.949 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99419 |
+
time (ms)
|
99420 |
+
iteration 6300/ 159576 | consumed samples: 200800 | elapsed time per iteration (ms): 17537.9 | learning rate: 5.556E-05 | global batch size: 80 | lm loss: 6.295881E+00 | loss scale: 4096.0 | grad norm: 116874.031 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99421 |
+
time (ms)
|
99422 |
+
iteration 6310/ 159576 | consumed samples: 201600 | elapsed time per iteration (ms): 17634.4 | learning rate: 5.578E-05 | global batch size: 80 | lm loss: 6.324175E+00 | loss scale: 4096.0 | grad norm: 115938.037 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99423 |
+
time (ms)
|
99424 |
+
iteration 6320/ 159576 | consumed samples: 202400 | elapsed time per iteration (ms): 17796.6 | learning rate: 5.600E-05 | global batch size: 80 | lm loss: 6.301260E+00 | loss scale: 4096.0 | grad norm: 128639.863 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99425 |
+
time (ms)
|
99426 |
+
iteration 6330/ 159576 | consumed samples: 203200 | elapsed time per iteration (ms): 17684.4 | learning rate: 5.622E-05 | global batch size: 80 | lm loss: 6.325212E+00 | loss scale: 4096.0 | grad norm: 122331.136 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99427 |
+
time (ms)
|
99428 |
+
iteration 6340/ 159576 | consumed samples: 204000 | elapsed time per iteration (ms): 17751.1 | learning rate: 5.645E-05 | global batch size: 80 | lm loss: 6.315152E+00 | loss scale: 4096.0 | grad norm: 107257.166 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
|
99429 |
+
time (ms)
|