plip commited on
Commit
8cc5608
1 Parent(s): 7409293

Training in progress, step 490000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:915c0a3320162732d7d0277198910bd1c5fe36e91a2494488dcd5fe9ecfa67b3
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e2116038f17a755ee0cd7f714c11f53fb07e5f62178c545ddce403c568ffbe
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8136bbdd727fc3ba0cd916f87b90e8d7992cbeeded2bac704ee561d225bd54ff
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6673bee6889f96f4d0585736b0bbd0104ce06075881649e694ae573ad1d2887a
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58dd7aa49620cc3a97afcabbb7850c385a241b9dcc8d07d333d6f65040a60603
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644e2fc163f368345d150d3fc83a57447c36fc56a8c5b1ac9505e0d54bf78bd0
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02e70e32ef25d8d010a4b8053a0291d1477416df257354e41c4a63cf4dc2e643
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a9725305d551fdabba33dd56f81db3b2c581f84aafc700effd4e6475b7ec812
3
+ size 14439
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e68cc11f4318aee75c400e1894d33192a70b9883c303c9fa9cedc5946464e08
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbe151312b54baa9a4e64116f572138b7b71dec85430cd296cb3640e4c2da8a4
3
+ size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64a46f3a13aa9f98adfc5de8699e24132915127b3a6b2cea5827a2046f9700ac
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:368b6ac98a14548e894c6dec52e2926741be3331b9c05acbb8210a7533733def
3
+ size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb51fb524aa906bf1e2b1400af5152fa96888bd9aa6527a094a0343f901ed25d
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb22d9d447646758c04766cf11ca8c2e8fc19a36cd697a9779fc029a142baac3
3
+ size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35c12f52a705d4e5218bd23d5bd9019926a903ddc54e4326eae90cf72f0b4f8f
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:978268c9f7cbaa0bf6afcfb4a36b2945ad3b7df5c70456c1598647d77f700d81
3
+ size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4e0d82ec0129c8f13f015b8a1236f72049a8e4b658d7d70897f776f9bde416e
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea73efb8fdbffbda3c38be20fe382b5f7cbbd8a80c55ee21bf5ba148273fa6a6
3
+ size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f58c7e90cbf160b31d955598aee9f14bbda5806f105e3c62619fe3545749dd33
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:363c6a12aae6b2a1c1924495d50178e5b2b9c07c6657605723562b286ea4de81
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebbfa680b1187d22cc7371654116ef29dab3c85749ad34b845956736ad3b3612
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d798ff13d72fe751bc0ea721c37eb1e98064dde5819b90f3504db53fdceee97
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 14.706332914611355,
5
- "global_step": 480000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -9606,11 +9606,211 @@
9606
  "eval_samples_per_second": 1946.296,
9607
  "eval_steps_per_second": 31.141,
9608
  "step": 480000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9609
  }
9610
  ],
9611
  "max_steps": 500000,
9612
  "num_train_epochs": 16,
9613
- "total_flos": 1.5335270829848338e+22,
9614
  "trial_name": null,
9615
  "trial_params": null
9616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.012714850332424,
5
+ "global_step": 490000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
9606
  "eval_samples_per_second": 1946.296,
9607
  "eval_steps_per_second": 31.141,
9608
  "step": 480000
9609
+ },
9610
+ {
9611
+ "epoch": 14.72,
9612
+ "learning_rate": 1.1204252864868377e-05,
9613
+ "loss": 0.3127,
9614
+ "step": 480500
9615
+ },
9616
+ {
9617
+ "epoch": 14.74,
9618
+ "learning_rate": 1.1143368309400725e-05,
9619
+ "loss": 0.3125,
9620
+ "step": 481000
9621
+ },
9622
+ {
9623
+ "epoch": 14.74,
9624
+ "eval_loss": 0.7771226763725281,
9625
+ "eval_runtime": 0.5065,
9626
+ "eval_samples_per_second": 1974.24,
9627
+ "eval_steps_per_second": 31.588,
9628
+ "step": 481000
9629
+ },
9630
+ {
9631
+ "epoch": 14.75,
9632
+ "learning_rate": 1.1084056947009348e-05,
9633
+ "loss": 0.3125,
9634
+ "step": 481500
9635
+ },
9636
+ {
9637
+ "epoch": 14.77,
9638
+ "learning_rate": 1.1026319426313837e-05,
9639
+ "loss": 0.3124,
9640
+ "step": 482000
9641
+ },
9642
+ {
9643
+ "epoch": 14.77,
9644
+ "eval_loss": 0.7745999693870544,
9645
+ "eval_runtime": 0.4972,
9646
+ "eval_samples_per_second": 2011.082,
9647
+ "eval_steps_per_second": 32.177,
9648
+ "step": 482000
9649
+ },
9650
+ {
9651
+ "epoch": 14.78,
9652
+ "learning_rate": 1.097015637872247e-05,
9653
+ "loss": 0.3126,
9654
+ "step": 482500
9655
+ },
9656
+ {
9657
+ "epoch": 14.8,
9658
+ "learning_rate": 1.0915568418425301e-05,
9659
+ "loss": 0.3125,
9660
+ "step": 483000
9661
+ },
9662
+ {
9663
+ "epoch": 14.8,
9664
+ "eval_loss": 0.7761328220367432,
9665
+ "eval_runtime": 0.5155,
9666
+ "eval_samples_per_second": 1939.967,
9667
+ "eval_steps_per_second": 31.039,
9668
+ "step": 483000
9669
+ },
9670
+ {
9671
+ "epoch": 14.81,
9672
+ "learning_rate": 1.0862556142387571e-05,
9673
+ "loss": 0.3126,
9674
+ "step": 483500
9675
+ },
9676
+ {
9677
+ "epoch": 14.83,
9678
+ "learning_rate": 1.081112013034298e-05,
9679
+ "loss": 0.3127,
9680
+ "step": 484000
9681
+ },
9682
+ {
9683
+ "epoch": 14.83,
9684
+ "eval_loss": 0.7775337100028992,
9685
+ "eval_runtime": 0.5093,
9686
+ "eval_samples_per_second": 1963.507,
9687
+ "eval_steps_per_second": 31.416,
9688
+ "step": 484000
9689
+ },
9690
+ {
9691
+ "epoch": 14.84,
9692
+ "learning_rate": 1.0761260944787561e-05,
9693
+ "loss": 0.313,
9694
+ "step": 484500
9695
+ },
9696
+ {
9697
+ "epoch": 14.86,
9698
+ "learning_rate": 1.0712979130973347e-05,
9699
+ "loss": 0.3126,
9700
+ "step": 485000
9701
+ },
9702
+ {
9703
+ "epoch": 14.86,
9704
+ "eval_loss": 0.7740907073020935,
9705
+ "eval_runtime": 0.5188,
9706
+ "eval_samples_per_second": 1927.581,
9707
+ "eval_steps_per_second": 30.841,
9708
+ "step": 485000
9709
+ },
9710
+ {
9711
+ "epoch": 14.87,
9712
+ "learning_rate": 1.0666275216902535e-05,
9713
+ "loss": 0.3125,
9714
+ "step": 485500
9715
+ },
9716
+ {
9717
+ "epoch": 14.89,
9718
+ "learning_rate": 1.0621149713321656e-05,
9719
+ "loss": 0.3128,
9720
+ "step": 486000
9721
+ },
9722
+ {
9723
+ "epoch": 14.89,
9724
+ "eval_loss": 0.7765258550643921,
9725
+ "eval_runtime": 0.52,
9726
+ "eval_samples_per_second": 1923.021,
9727
+ "eval_steps_per_second": 30.768,
9728
+ "step": 486000
9729
+ },
9730
+ {
9731
+ "epoch": 14.91,
9732
+ "learning_rate": 1.0577603113715964e-05,
9733
+ "loss": 0.3125,
9734
+ "step": 486500
9735
+ },
9736
+ {
9737
+ "epoch": 14.92,
9738
+ "learning_rate": 1.0535635894304106e-05,
9739
+ "loss": 0.3126,
9740
+ "step": 487000
9741
+ },
9742
+ {
9743
+ "epoch": 14.92,
9744
+ "eval_loss": 0.7741805911064148,
9745
+ "eval_runtime": 0.5052,
9746
+ "eval_samples_per_second": 1979.424,
9747
+ "eval_steps_per_second": 31.671,
9748
+ "step": 487000
9749
+ },
9750
+ {
9751
+ "epoch": 14.94,
9752
+ "learning_rate": 1.0495248514032875e-05,
9753
+ "loss": 0.3123,
9754
+ "step": 487500
9755
+ },
9756
+ {
9757
+ "epoch": 14.95,
9758
+ "learning_rate": 1.045644141457218e-05,
9759
+ "loss": 0.3126,
9760
+ "step": 488000
9761
+ },
9762
+ {
9763
+ "epoch": 14.95,
9764
+ "eval_loss": 0.7744404077529907,
9765
+ "eval_runtime": 0.5273,
9766
+ "eval_samples_per_second": 1896.451,
9767
+ "eval_steps_per_second": 30.343,
9768
+ "step": 488000
9769
+ },
9770
+ {
9771
+ "epoch": 14.97,
9772
+ "learning_rate": 1.0419215020310254e-05,
9773
+ "loss": 0.3123,
9774
+ "step": 488500
9775
+ },
9776
+ {
9777
+ "epoch": 14.98,
9778
+ "learning_rate": 1.0383569738348988e-05,
9779
+ "loss": 0.3125,
9780
+ "step": 489000
9781
+ },
9782
+ {
9783
+ "epoch": 14.98,
9784
+ "eval_loss": 0.7724484205245972,
9785
+ "eval_runtime": 0.5078,
9786
+ "eval_samples_per_second": 1969.112,
9787
+ "eval_steps_per_second": 31.506,
9788
+ "step": 489000
9789
+ },
9790
+ {
9791
+ "epoch": 15.0,
9792
+ "learning_rate": 1.0349505958499436e-05,
9793
+ "loss": 0.3126,
9794
+ "step": 489500
9795
+ },
9796
+ {
9797
+ "epoch": 15.01,
9798
+ "learning_rate": 1.0317024053277693e-05,
9799
+ "loss": 0.3124,
9800
+ "step": 490000
9801
+ },
9802
+ {
9803
+ "epoch": 15.01,
9804
+ "eval_loss": 0.7753793001174927,
9805
+ "eval_runtime": 0.498,
9806
+ "eval_samples_per_second": 2008.198,
9807
+ "eval_steps_per_second": 32.131,
9808
+ "step": 490000
9809
  }
9810
  ],
9811
  "max_steps": 500000,
9812
  "num_train_epochs": 16,
9813
+ "total_flos": 1.5654749980670862e+22,
9814
  "trial_name": null,
9815
  "trial_params": null
9816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8136bbdd727fc3ba0cd916f87b90e8d7992cbeeded2bac704ee561d225bd54ff
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6673bee6889f96f4d0585736b0bbd0104ce06075881649e694ae573ad1d2887a
3
  size 102501541