plip commited on
Commit
77978d0
1 Parent(s): 02e363a

Training in progress, step 330000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:229a42ebe682c3ef3fa77824f414f8052ce22269902d2cf833bbceae01b4ee94
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a22ef790a0c2f4f62e66619584ea27ceb6ef6b7c3985ca2d9f4be0901d33fb7e
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2234ce12404bc4801fab809254f08127bd71fca09a8dab3e0c720a225f0006ed
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91c8cb7bfe62bed4ad754b48993f10b7da9c98b6075ef529d78d9d2d0a013fff
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b54a8c5749446bd4a65592cc408c92cd1c1a63789b632ec709bae613de880e8
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d377ee122e46b8fa76b1d2b74be365b9135d1e8e6cede7fa0fe3ff751bb7334
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e12e956d2c1594d69772425e394d5c7340f5558535a744e143a62985c9f6b3a
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a10a546cee3b4518e4cb32764590234884d4cbc2be79dd9856924e21cdd3fd8
3
+ size 14567
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5034bb9708a34c35b3368c1e0fff63513e2cb5f1c0dd56fffa0328312b7e4831
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a33a6a869db9bf49441e17af5679e56376e6e829633250eded5937f47c2020aa
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e06a3dca10a2bccff3cb0c6a7b393b12b0f08503dc63d7b7533eeb15ed495c6
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03c277499f6406d581d5a424d585bc71aa0cb5fc2c33d575b72b1f53a009a2c5
3
+ size 14567
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a94f09290bc683f43d0869ce2fa5f9751184b5e70371828d250a3714d35fe40
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b59b474b08b06647b657ae7d97ae9d06a436e1b6da58f15374f2b25278144cf8
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4915e195da15bfd64d34239234d248cd0ab1ad7df671f2845974753597da8bc3
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0b7236bb0eefa8365b0f6eccaff3b57014a0d4fca20545eaa932daadd82f47
3
+ size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:057d8e4139ad7708d7871dd8361365fbd9951b2ae3daf5aded867e56c2fe457c
3
  size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ec7b72e871e355904aa3d42031ed74fefb8d0ce9e7a1a82df41ead92a13bbee
3
  size 14439
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7115cc7cbcc32a343bb9b4e7b15f1fa12bd3bb61d63d5248eaa0a65935d6e80c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a490aaf46199ae217198d1b46bb5a0022da5901da5b5a0d9b96059f1a94a188
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5673377a057c7734bd1a0ee14d972f6f3bfc67bb8208ac49ae618347d18d616b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:636cb28fce30ad56f68aface20193360fd815697da4c2ec39f5ca647b5e6b45b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.804221943074236,
5
- "global_step": 320000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6406,11 +6406,211 @@
6406
  "eval_samples_per_second": 2004.192,
6407
  "eval_steps_per_second": 32.067,
6408
  "step": 320000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6409
  }
6410
  ],
6411
  "max_steps": 500000,
6412
  "num_train_epochs": 16,
6413
- "total_flos": 1.0223516549216217e+22,
6414
  "trial_name": null,
6415
  "trial_params": null
6416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.110603878795306,
5
+ "global_step": 330000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6406
  "eval_samples_per_second": 2004.192,
6407
  "eval_steps_per_second": 32.067,
6408
  "step": 320000
6409
+ },
6410
+ {
6411
+ "epoch": 9.82,
6412
+ "learning_rate": 0.00010073144655927253,
6413
+ "loss": 0.3273,
6414
+ "step": 320500
6415
+ },
6416
+ {
6417
+ "epoch": 9.83,
6418
+ "learning_rate": 0.0001002870876838929,
6419
+ "loss": 0.3276,
6420
+ "step": 321000
6421
+ },
6422
+ {
6423
+ "epoch": 9.83,
6424
+ "eval_loss": 0.7801169157028198,
6425
+ "eval_runtime": 0.505,
6426
+ "eval_samples_per_second": 1980.071,
6427
+ "eval_steps_per_second": 31.681,
6428
+ "step": 321000
6429
+ },
6430
+ {
6431
+ "epoch": 9.85,
6432
+ "learning_rate": 9.984332714015662e-05,
6433
+ "loss": 0.3272,
6434
+ "step": 321500
6435
+ },
6436
+ {
6437
+ "epoch": 9.87,
6438
+ "learning_rate": 9.94001697809578e-05,
6439
+ "loss": 0.3273,
6440
+ "step": 322000
6441
+ },
6442
+ {
6443
+ "epoch": 9.87,
6444
+ "eval_loss": 0.7831940650939941,
6445
+ "eval_runtime": 0.5116,
6446
+ "eval_samples_per_second": 1954.711,
6447
+ "eval_steps_per_second": 31.275,
6448
+ "step": 322000
6449
+ },
6450
+ {
6451
+ "epoch": 9.88,
6452
+ "learning_rate": 9.895762045259445e-05,
6453
+ "loss": 0.3274,
6454
+ "step": 322500
6455
+ },
6456
+ {
6457
+ "epoch": 9.9,
6458
+ "learning_rate": 9.851568399471498e-05,
6459
+ "loss": 0.3277,
6460
+ "step": 323000
6461
+ },
6462
+ {
6463
+ "epoch": 9.9,
6464
+ "eval_loss": 0.7861186265945435,
6465
+ "eval_runtime": 0.4926,
6466
+ "eval_samples_per_second": 2030.062,
6467
+ "eval_steps_per_second": 32.481,
6468
+ "step": 323000
6469
+ },
6470
+ {
6471
+ "epoch": 9.91,
6472
+ "learning_rate": 9.807436524026574e-05,
6473
+ "loss": 0.3275,
6474
+ "step": 323500
6475
+ },
6476
+ {
6477
+ "epoch": 9.93,
6478
+ "learning_rate": 9.763366901543801e-05,
6479
+ "loss": 0.3269,
6480
+ "step": 324000
6481
+ },
6482
+ {
6483
+ "epoch": 9.93,
6484
+ "eval_loss": 0.7829710841178894,
6485
+ "eval_runtime": 0.5448,
6486
+ "eval_samples_per_second": 1835.417,
6487
+ "eval_steps_per_second": 29.367,
6488
+ "step": 324000
6489
+ },
6490
+ {
6491
+ "epoch": 9.94,
6492
+ "learning_rate": 9.719360013961495e-05,
6493
+ "loss": 0.3269,
6494
+ "step": 324500
6495
+ },
6496
+ {
6497
+ "epoch": 9.96,
6498
+ "learning_rate": 9.675416342531944e-05,
6499
+ "loss": 0.3269,
6500
+ "step": 325000
6501
+ },
6502
+ {
6503
+ "epoch": 9.96,
6504
+ "eval_loss": 0.7878097891807556,
6505
+ "eval_runtime": 0.4984,
6506
+ "eval_samples_per_second": 2006.57,
6507
+ "eval_steps_per_second": 32.105,
6508
+ "step": 325000
6509
+ },
6510
+ {
6511
+ "epoch": 9.97,
6512
+ "learning_rate": 9.631536367816086e-05,
6513
+ "loss": 0.3282,
6514
+ "step": 325500
6515
+ },
6516
+ {
6517
+ "epoch": 9.99,
6518
+ "learning_rate": 9.587720569678299e-05,
6519
+ "loss": 0.3267,
6520
+ "step": 326000
6521
+ },
6522
+ {
6523
+ "epoch": 9.99,
6524
+ "eval_loss": 0.7815366387367249,
6525
+ "eval_runtime": 0.5069,
6526
+ "eval_samples_per_second": 1972.941,
6527
+ "eval_steps_per_second": 31.567,
6528
+ "step": 326000
6529
+ },
6530
+ {
6531
+ "epoch": 10.0,
6532
+ "learning_rate": 9.543969427281131e-05,
6533
+ "loss": 0.3268,
6534
+ "step": 326500
6535
+ },
6536
+ {
6537
+ "epoch": 10.02,
6538
+ "learning_rate": 9.500283419080062e-05,
6539
+ "loss": 0.3269,
6540
+ "step": 327000
6541
+ },
6542
+ {
6543
+ "epoch": 10.02,
6544
+ "eval_loss": 0.7789347767829895,
6545
+ "eval_runtime": 0.5192,
6546
+ "eval_samples_per_second": 1925.943,
6547
+ "eval_steps_per_second": 30.815,
6548
+ "step": 327000
6549
+ },
6550
+ {
6551
+ "epoch": 10.03,
6552
+ "learning_rate": 9.45666302281829e-05,
6553
+ "loss": 0.3268,
6554
+ "step": 327500
6555
+ },
6556
+ {
6557
+ "epoch": 10.05,
6558
+ "learning_rate": 9.413108715521467e-05,
6559
+ "loss": 0.3266,
6560
+ "step": 328000
6561
+ },
6562
+ {
6563
+ "epoch": 10.05,
6564
+ "eval_loss": 0.7733815908432007,
6565
+ "eval_runtime": 0.5128,
6566
+ "eval_samples_per_second": 1950.206,
6567
+ "eval_steps_per_second": 31.203,
6568
+ "step": 328000
6569
+ },
6570
+ {
6571
+ "epoch": 10.06,
6572
+ "learning_rate": 9.369620973492525e-05,
6573
+ "loss": 0.3265,
6574
+ "step": 328500
6575
+ },
6576
+ {
6577
+ "epoch": 10.08,
6578
+ "learning_rate": 9.326200272306445e-05,
6579
+ "loss": 0.3262,
6580
+ "step": 329000
6581
+ },
6582
+ {
6583
+ "epoch": 10.08,
6584
+ "eval_loss": 0.7761348485946655,
6585
+ "eval_runtime": 0.5067,
6586
+ "eval_samples_per_second": 1973.572,
6587
+ "eval_steps_per_second": 31.577,
6588
+ "step": 329000
6589
+ },
6590
+ {
6591
+ "epoch": 10.1,
6592
+ "learning_rate": 9.282847086805059e-05,
6593
+ "loss": 0.3267,
6594
+ "step": 329500
6595
+ },
6596
+ {
6597
+ "epoch": 10.11,
6598
+ "learning_rate": 9.239561891091853e-05,
6599
+ "loss": 0.3264,
6600
+ "step": 330000
6601
+ },
6602
+ {
6603
+ "epoch": 10.11,
6604
+ "eval_loss": 0.7795162200927734,
6605
+ "eval_runtime": 0.5194,
6606
+ "eval_samples_per_second": 1925.383,
6607
+ "eval_steps_per_second": 30.806,
6608
+ "step": 330000
6609
  }
6610
  ],
6611
  "max_steps": 500000,
6612
  "num_train_epochs": 16,
6613
+ "total_flos": 1.0542995700038742e+22,
6614
  "trial_name": null,
6615
  "trial_params": null
6616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2234ce12404bc4801fab809254f08127bd71fca09a8dab3e0c720a225f0006ed
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91c8cb7bfe62bed4ad754b48993f10b7da9c98b6075ef529d78d9d2d0a013fff
3
  size 102501541