bigscience-bot commited on
Commit
7a5a8b2
1 Parent(s): 28b1582
Files changed (1) hide show
  1. logs/main_log.txt +22 -0
logs/main_log.txt CHANGED
@@ -66668,3 +66668,25 @@ time (ms)
66668
  time (ms)
66669
  [2021-09-25 18:10:35] PULSE: tr8-104B is scheduled to start in 17:15:59 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
66670
  [2021-09-25 18:10:35] PULSE: tr8-104B is running for 13:43:34 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66668
  time (ms)
66669
  [2021-09-25 18:10:35] PULSE: tr8-104B is scheduled to start in 17:15:59 (at 2021-09-26T11:26:35) (1188168 on 'gpu_p13' partition)
66670
  [2021-09-25 18:10:35] PULSE: tr8-104B is running for 13:43:34 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)
66671
+ iteration 8700/ 159576 | consumed samples: 481360 | elapsed time per iteration (ms): 23943.4 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.071510E+00 | loss scale: 1024.0 | grad norm: 24381.440 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
66672
+ time (ms)
66673
+ iteration 8710/ 159576 | consumed samples: 483120 | elapsed time per iteration (ms): 23910.3 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.190697E+00 | loss scale: 1024.0 | grad norm: 41525.807 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
66674
+ time (ms)
66675
+ iteration 8720/ 159576 | consumed samples: 484880 | elapsed time per iteration (ms): 23923.5 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.332158E+00 | loss scale: 1024.0 | grad norm: 23580.074 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
66676
+ time (ms)
66677
+ iteration 8730/ 159576 | consumed samples: 486640 | elapsed time per iteration (ms): 23664.9 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.250137E+00 | loss scale: 1024.0 | grad norm: 33934.114 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
66678
+ time (ms)
66679
+ iteration 8740/ 159576 | consumed samples: 488400 | elapsed time per iteration (ms): 24002.8 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.134158E+00 | loss scale: 1024.0 | grad norm: 18917.778 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
66680
+ time (ms)
66681
+ iteration 8750/ 159576 | consumed samples: 490160 | elapsed time per iteration (ms): 23812.9 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.133132E+00 | loss scale: 1024.0 | grad norm: 24524.875 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
66682
+ time (ms)
66683
+ iteration 8760/ 159576 | consumed samples: 491920 | elapsed time per iteration (ms): 24164.0 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.089709E+00 | loss scale: 1024.0 | grad norm: 18466.411 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
66684
+ time (ms)
66685
+ iteration 8770/ 159576 | consumed samples: 493680 | elapsed time per iteration (ms): 23763.0 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.075866E+00 | loss scale: 1024.0 | grad norm: 21160.208 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
66686
+ time (ms)
66687
+ iteration 8780/ 159576 | consumed samples: 495440 | elapsed time per iteration (ms): 23757.0 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.105405E+00 | loss scale: 1024.0 | grad norm: 21012.399 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
66688
+ time (ms)
66689
+ iteration 8790/ 159576 | consumed samples: 497200 | elapsed time per iteration (ms): 23726.0 | learning rate: 6.000E-05 | global batch size: 176 | lm loss: 7.119524E+00 | loss scale: 1024.0 | grad norm: 19184.310 | num zeros: 0.0 | number of skipped iterations: 0 | number of nan iterations: 0 |
66690
+ time (ms)
66691
+ [2021-09-25 18:51:17] PULSE: tr8-104B is scheduled to start in 19:55:07 (at 2021-09-26T14:46:25) (1188168 on 'gpu_p13' partition)
66692
+ [2021-09-25 18:51:17] PULSE: tr8-104B is running for 14:24:16 since 2021-09-25T04:27:01 (1185639_1 on 'gpu_p13' partition (r6i5n[7-8],r6i6n0,r7i2n[4-5],r7i3n2,r7i6n[2-4],r7i7n[7-8],r8i0n[0,2-3,5-8],r8i1n[0,2-4],r8i2n8,r8i3n[0-2],r8i5n[3-4],r8i7n[3-8],r9i0n[0-5],r9i1n[0-3],r9i2n[3-6,8],r9i3n[0-1,7-8],r9i4n[0-3],r9i5n[3-8],r9i6n0)