plip commited on
Commit
26a4cde
1 Parent(s): 73ac59d

Training in progress, step 200000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae8b37bb3ef93266027d70d0e00d87efb44bc4019ad917063c09c81cee6c0fef
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66a034e9e1a9ae7c11a38b09da94316f7af857583cc75efaab5288ad761a4759
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eef92818d243a4d1828d62172dbe43bff5dcd02b87cc9982faba5e24c9f7d637
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe293a94506a0e024ec476b57e64723da55912f72fe82601c25ed8e88dd3d77
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c27ac61f2483d09b304f5bf47988fc50deadbc74a73d6e7fefa73c39e537290c
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27b65aef852c56a6070eb0b0c0dcb2609bdee33892af326659c5e916144ee935
3
+ size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e0fb22612ddda1e707530c7b94de033aded05bbe199ff38f1fecf009e4774c0
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50df4816cbaa34ba1b53e6066397f6b5dfa9e0b5a42191fa041b389e35141af5
3
+ size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2215278ab48eb111ec43fff1ae065a4f7afae50fbcb1a654d0f802e588d405a9
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c375b0eb05f3b84a9ffd242709f5cafdfac9ec9f6eb782f404bfa519ad8d88
3
+ size 14439
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dd18359c537f071111eb3e8deb7a71a5fcf4dbb9405ac62ad7e7fda32e4ae8a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1225a9177adba9ce38ceb1af0c8098a1a68d43fed76f6471ce68525b7fe2bef
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:382b413dfab48b07c22c47e33b4e458bbb779fe715bce1ce251c374236864125
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2159e22012f5706c7baac54c9475075e6a80f8026c23532336808d0140a2feea
3
+ size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bb8e841c948ba30f2f4604f711e7d297aa99f75c94f47e268088ac349f37f65
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad5204915df8edf520dd956c8e88b1a505319e42237567e2e0d2cc11f8993db5
3
+ size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c349e058fdec7be9df0364808b4348ab83fd3dc0926ee604186a2bf094abe0b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0083ff5459b01e74d8269a09b7fb0407b955c9098992dc865792b4c832e05385
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e7ced809382dda0f6a1445f89e0ce2bdd33b73bdfa295160cd94e6048944943
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef9a8f48c4b1e1e02dc723041280e57b4a38fe76785f0eae322407bb67501005
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0439027dd6f55adc764cb1317d963df8ccf36442066dafecce10b3f538efa8e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e461416c63c82734faef19b4949af829ad430bff342d30400b1c1da0cafb58f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.821256778700328,
5
- "global_step": 190000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3806,11 +3806,211 @@
3806
  "eval_samples_per_second": 1917.115,
3807
  "eval_steps_per_second": 30.674,
3808
  "step": 190000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3809
  }
3810
  ],
3811
  "max_steps": 500000,
3812
  "num_train_epochs": 16,
3813
- "total_flos": 6.070215696955621e+21,
3814
  "trial_name": null,
3815
  "trial_params": null
3816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.127638714421398,
5
+ "global_step": 200000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3806
  "eval_samples_per_second": 1917.115,
3807
  "eval_steps_per_second": 30.674,
3808
  "step": 190000
3809
+ },
3810
+ {
3811
+ "epoch": 5.84,
3812
+ "learning_rate": 0.00022146867474920118,
3813
+ "loss": 0.347,
3814
+ "step": 190500
3815
+ },
3816
+ {
3817
+ "epoch": 5.85,
3818
+ "learning_rate": 0.00022104215397571484,
3819
+ "loss": 0.3473,
3820
+ "step": 191000
3821
+ },
3822
+ {
3823
+ "epoch": 5.85,
3824
+ "eval_loss": 0.7774147391319275,
3825
+ "eval_runtime": 0.5178,
3826
+ "eval_samples_per_second": 1931.312,
3827
+ "eval_steps_per_second": 30.901,
3828
+ "step": 191000
3829
+ },
3830
+ {
3831
+ "epoch": 5.87,
3832
+ "learning_rate": 0.0002206149109758135,
3833
+ "loss": 0.3472,
3834
+ "step": 191500
3835
+ },
3836
+ {
3837
+ "epoch": 5.88,
3838
+ "learning_rate": 0.00022018695042175818,
3839
+ "loss": 0.3482,
3840
+ "step": 192000
3841
+ },
3842
+ {
3843
+ "epoch": 5.88,
3844
+ "eval_loss": 0.7872973680496216,
3845
+ "eval_runtime": 0.5262,
3846
+ "eval_samples_per_second": 1900.242,
3847
+ "eval_steps_per_second": 30.404,
3848
+ "step": 192000
3849
+ },
3850
+ {
3851
+ "epoch": 5.9,
3852
+ "learning_rate": 0.00021975827699365693,
3853
+ "loss": 0.3467,
3854
+ "step": 192500
3855
+ },
3856
+ {
3857
+ "epoch": 5.91,
3858
+ "learning_rate": 0.00021932889537941365,
3859
+ "loss": 0.3482,
3860
+ "step": 193000
3861
+ },
3862
+ {
3863
+ "epoch": 5.91,
3864
+ "eval_loss": 0.7847963571548462,
3865
+ "eval_runtime": 0.5508,
3866
+ "eval_samples_per_second": 1815.574,
3867
+ "eval_steps_per_second": 29.049,
3868
+ "step": 193000
3869
+ },
3870
+ {
3871
+ "epoch": 5.93,
3872
+ "learning_rate": 0.0002188988102746769,
3873
+ "loss": 0.3467,
3874
+ "step": 193500
3875
+ },
3876
+ {
3877
+ "epoch": 5.94,
3878
+ "learning_rate": 0.0002184680263827885,
3879
+ "loss": 0.3484,
3880
+ "step": 194000
3881
+ },
3882
+ {
3883
+ "epoch": 5.94,
3884
+ "eval_loss": 0.7891322374343872,
3885
+ "eval_runtime": 0.5219,
3886
+ "eval_samples_per_second": 1915.989,
3887
+ "eval_steps_per_second": 30.656,
3888
+ "step": 194000
3889
+ },
3890
+ {
3891
+ "epoch": 5.96,
3892
+ "learning_rate": 0.00021803654841473204,
3893
+ "loss": 0.346,
3894
+ "step": 194500
3895
+ },
3896
+ {
3897
+ "epoch": 5.97,
3898
+ "learning_rate": 0.00021760438108908142,
3899
+ "loss": 0.3457,
3900
+ "step": 195000
3901
+ },
3902
+ {
3903
+ "epoch": 5.97,
3904
+ "eval_loss": 0.784561276435852,
3905
+ "eval_runtime": 0.5317,
3906
+ "eval_samples_per_second": 1880.904,
3907
+ "eval_steps_per_second": 30.094,
3908
+ "step": 195000
3909
+ },
3910
+ {
3911
+ "epoch": 5.99,
3912
+ "learning_rate": 0.0002171715291319494,
3913
+ "loss": 0.3479,
3914
+ "step": 195500
3915
+ },
3916
+ {
3917
+ "epoch": 6.01,
3918
+ "learning_rate": 0.0002167379972769355,
3919
+ "loss": 0.3465,
3920
+ "step": 196000
3921
+ },
3922
+ {
3923
+ "epoch": 6.01,
3924
+ "eval_loss": 0.7826551795005798,
3925
+ "eval_runtime": 0.519,
3926
+ "eval_samples_per_second": 1926.794,
3927
+ "eval_steps_per_second": 30.829,
3928
+ "step": 196000
3929
+ },
3930
+ {
3931
+ "epoch": 6.02,
3932
+ "learning_rate": 0.0002163037902650747,
3933
+ "loss": 0.3459,
3934
+ "step": 196500
3935
+ },
3936
+ {
3937
+ "epoch": 6.04,
3938
+ "learning_rate": 0.0002158689128447853,
3939
+ "loss": 0.3456,
3940
+ "step": 197000
3941
+ },
3942
+ {
3943
+ "epoch": 6.04,
3944
+ "eval_loss": 0.7869973182678223,
3945
+ "eval_runtime": 0.5199,
3946
+ "eval_samples_per_second": 1923.629,
3947
+ "eval_steps_per_second": 30.778,
3948
+ "step": 197000
3949
+ },
3950
+ {
3951
+ "epoch": 6.05,
3952
+ "learning_rate": 0.00021543336977181704,
3953
+ "loss": 0.3456,
3954
+ "step": 197500
3955
+ },
3956
+ {
3957
+ "epoch": 6.07,
3958
+ "learning_rate": 0.00021499716580919933,
3959
+ "loss": 0.3458,
3960
+ "step": 198000
3961
+ },
3962
+ {
3963
+ "epoch": 6.07,
3964
+ "eval_loss": 0.7797828912734985,
3965
+ "eval_runtime": 0.5178,
3966
+ "eval_samples_per_second": 1931.291,
3967
+ "eval_steps_per_second": 30.901,
3968
+ "step": 198000
3969
+ },
3970
+ {
3971
+ "epoch": 6.08,
3972
+ "learning_rate": 0.00021456030572718866,
3973
+ "loss": 0.3456,
3974
+ "step": 198500
3975
+ },
3976
+ {
3977
+ "epoch": 6.1,
3978
+ "learning_rate": 0.000214122794303217,
3979
+ "loss": 0.3454,
3980
+ "step": 199000
3981
+ },
3982
+ {
3983
+ "epoch": 6.1,
3984
+ "eval_loss": 0.7822918891906738,
3985
+ "eval_runtime": 0.5124,
3986
+ "eval_samples_per_second": 1951.652,
3987
+ "eval_steps_per_second": 31.226,
3988
+ "step": 199000
3989
+ },
3990
+ {
3991
+ "epoch": 6.11,
3992
+ "learning_rate": 0.00021368463632183912,
3993
+ "loss": 0.3454,
3994
+ "step": 199500
3995
+ },
3996
+ {
3997
+ "epoch": 6.13,
3998
+ "learning_rate": 0.00021324583657468055,
3999
+ "loss": 0.3454,
4000
+ "step": 200000
4001
+ },
4002
+ {
4003
+ "epoch": 6.13,
4004
+ "eval_loss": 0.779866635799408,
4005
+ "eval_runtime": 0.5273,
4006
+ "eval_samples_per_second": 1896.43,
4007
+ "eval_steps_per_second": 30.343,
4008
+ "step": 200000
4009
  }
4010
  ],
4011
  "max_steps": 500000,
4012
  "num_train_epochs": 16,
4013
+ "total_flos": 6.389694847778145e+21,
4014
  "trial_name": null,
4015
  "trial_params": null
4016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eef92818d243a4d1828d62172dbe43bff5dcd02b87cc9982faba5e24c9f7d637
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfe293a94506a0e024ec476b57e64723da55912f72fe82601c25ed8e88dd3d77
3
  size 102501541