plip commited on
Commit
83be9ba
1 Parent(s): 61b37e6

Training in progress, step 200000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ce00af67b32f5031634448ef59d59c554d1e00a55c8b464f822021954d2f458
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1864e8b6d73df803d6169b4a02883f655f8f1f03dba67d279ba50bf4aec8c451
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26f449ea92715ca64c5aa73950f43c9ea9816a4a5ad265403dda147f77444d50
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7193b0396efbae3c97d694737aabe6c7c60bae54e10275e5280d057220eeba1
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45496c46423f623843268f065bb3004f921eb295069e4d4e3f3d7236dcb94f89
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649f99ced15f6aa95053cf61e777a2d14c6173f37ab5b51c676618360f19400a
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45496c46423f623843268f065bb3004f921eb295069e4d4e3f3d7236dcb94f89
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649f99ced15f6aa95053cf61e777a2d14c6173f37ab5b51c676618360f19400a
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45496c46423f623843268f065bb3004f921eb295069e4d4e3f3d7236dcb94f89
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649f99ced15f6aa95053cf61e777a2d14c6173f37ab5b51c676618360f19400a
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45496c46423f623843268f065bb3004f921eb295069e4d4e3f3d7236dcb94f89
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649f99ced15f6aa95053cf61e777a2d14c6173f37ab5b51c676618360f19400a
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45496c46423f623843268f065bb3004f921eb295069e4d4e3f3d7236dcb94f89
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649f99ced15f6aa95053cf61e777a2d14c6173f37ab5b51c676618360f19400a
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45496c46423f623843268f065bb3004f921eb295069e4d4e3f3d7236dcb94f89
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649f99ced15f6aa95053cf61e777a2d14c6173f37ab5b51c676618360f19400a
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45496c46423f623843268f065bb3004f921eb295069e4d4e3f3d7236dcb94f89
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649f99ced15f6aa95053cf61e777a2d14c6173f37ab5b51c676618360f19400a
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45496c46423f623843268f065bb3004f921eb295069e4d4e3f3d7236dcb94f89
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649f99ced15f6aa95053cf61e777a2d14c6173f37ab5b51c676618360f19400a
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0439027dd6f55adc764cb1317d963df8ccf36442066dafecce10b3f538efa8e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e461416c63c82734faef19b4949af829ad430bff342d30400b1c1da0cafb58f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.842244762730006,
5
- "global_step": 190000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3806,11 +3806,211 @@
3806
  "eval_samples_per_second": 780.471,
3807
  "eval_steps_per_second": 12.488,
3808
  "step": 190000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3809
  }
3810
  ],
3811
  "max_steps": 500000,
3812
  "num_train_epochs": 13,
3813
- "total_flos": 6.07020172106332e+21,
3814
  "trial_name": null,
3815
  "trial_params": null
3816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.097099750242112,
5
+ "global_step": 200000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3806
  "eval_samples_per_second": 780.471,
3807
  "eval_steps_per_second": 12.488,
3808
  "step": 190000
3809
+ },
3810
+ {
3811
+ "epoch": 4.85,
3812
+ "learning_rate": 0.00022146867474920118,
3813
+ "loss": 0.2961,
3814
+ "step": 190500
3815
+ },
3816
+ {
3817
+ "epoch": 4.87,
3818
+ "learning_rate": 0.00022104215397571484,
3819
+ "loss": 0.2959,
3820
+ "step": 191000
3821
+ },
3822
+ {
3823
+ "epoch": 4.87,
3824
+ "eval_loss": 0.8258107900619507,
3825
+ "eval_runtime": 1.248,
3826
+ "eval_samples_per_second": 801.284,
3827
+ "eval_steps_per_second": 12.821,
3828
+ "step": 191000
3829
+ },
3830
+ {
3831
+ "epoch": 4.88,
3832
+ "learning_rate": 0.0002206149109758135,
3833
+ "loss": 0.2954,
3834
+ "step": 191500
3835
+ },
3836
+ {
3837
+ "epoch": 4.89,
3838
+ "learning_rate": 0.00022018695042175818,
3839
+ "loss": 0.295,
3840
+ "step": 192000
3841
+ },
3842
+ {
3843
+ "epoch": 4.89,
3844
+ "eval_loss": 0.8216790556907654,
3845
+ "eval_runtime": 1.2627,
3846
+ "eval_samples_per_second": 791.937,
3847
+ "eval_steps_per_second": 12.671,
3848
+ "step": 192000
3849
+ },
3850
+ {
3851
+ "epoch": 4.91,
3852
+ "learning_rate": 0.00021975827699365693,
3853
+ "loss": 0.2949,
3854
+ "step": 192500
3855
+ },
3856
+ {
3857
+ "epoch": 4.92,
3858
+ "learning_rate": 0.00021932889537941365,
3859
+ "loss": 0.295,
3860
+ "step": 193000
3861
+ },
3862
+ {
3863
+ "epoch": 4.92,
3864
+ "eval_loss": 0.8129534721374512,
3865
+ "eval_runtime": 1.2745,
3866
+ "eval_samples_per_second": 784.628,
3867
+ "eval_steps_per_second": 12.554,
3868
+ "step": 193000
3869
+ },
3870
+ {
3871
+ "epoch": 4.93,
3872
+ "learning_rate": 0.0002188988102746769,
3873
+ "loss": 0.2949,
3874
+ "step": 193500
3875
+ },
3876
+ {
3877
+ "epoch": 4.94,
3878
+ "learning_rate": 0.0002184680263827885,
3879
+ "loss": 0.2968,
3880
+ "step": 194000
3881
+ },
3882
+ {
3883
+ "epoch": 4.94,
3884
+ "eval_loss": 0.8097214698791504,
3885
+ "eval_runtime": 1.2465,
3886
+ "eval_samples_per_second": 802.226,
3887
+ "eval_steps_per_second": 12.836,
3888
+ "step": 194000
3889
+ },
3890
+ {
3891
+ "epoch": 4.96,
3892
+ "learning_rate": 0.00021803654841473204,
3893
+ "loss": 0.2953,
3894
+ "step": 194500
3895
+ },
3896
+ {
3897
+ "epoch": 4.97,
3898
+ "learning_rate": 0.00021760438108908142,
3899
+ "loss": 0.2947,
3900
+ "step": 195000
3901
+ },
3902
+ {
3903
+ "epoch": 4.97,
3904
+ "eval_loss": 0.806962788105011,
3905
+ "eval_runtime": 1.2893,
3906
+ "eval_samples_per_second": 775.619,
3907
+ "eval_steps_per_second": 12.41,
3908
+ "step": 195000
3909
+ },
3910
+ {
3911
+ "epoch": 4.98,
3912
+ "learning_rate": 0.0002171715291319494,
3913
+ "loss": 0.2944,
3914
+ "step": 195500
3915
+ },
3916
+ {
3917
+ "epoch": 5.0,
3918
+ "learning_rate": 0.0002167379972769355,
3919
+ "loss": 0.2941,
3920
+ "step": 196000
3921
+ },
3922
+ {
3923
+ "epoch": 5.0,
3924
+ "eval_loss": 0.8226540684700012,
3925
+ "eval_runtime": 1.2611,
3926
+ "eval_samples_per_second": 792.99,
3927
+ "eval_steps_per_second": 12.688,
3928
+ "step": 196000
3929
+ },
3930
+ {
3931
+ "epoch": 5.01,
3932
+ "learning_rate": 0.0002163037902650747,
3933
+ "loss": 0.2948,
3934
+ "step": 196500
3935
+ },
3936
+ {
3937
+ "epoch": 5.02,
3938
+ "learning_rate": 0.0002158689128447853,
3939
+ "loss": 0.294,
3940
+ "step": 197000
3941
+ },
3942
+ {
3943
+ "epoch": 5.02,
3944
+ "eval_loss": 0.813295841217041,
3945
+ "eval_runtime": 1.2515,
3946
+ "eval_samples_per_second": 799.026,
3947
+ "eval_steps_per_second": 12.784,
3948
+ "step": 197000
3949
+ },
3950
+ {
3951
+ "epoch": 5.03,
3952
+ "learning_rate": 0.00021543336977181704,
3953
+ "loss": 0.2948,
3954
+ "step": 197500
3955
+ },
3956
+ {
3957
+ "epoch": 5.05,
3958
+ "learning_rate": 0.00021499716580919933,
3959
+ "loss": 0.2947,
3960
+ "step": 198000
3961
+ },
3962
+ {
3963
+ "epoch": 5.05,
3964
+ "eval_loss": 0.8141849637031555,
3965
+ "eval_runtime": 1.3465,
3966
+ "eval_samples_per_second": 742.661,
3967
+ "eval_steps_per_second": 11.883,
3968
+ "step": 198000
3969
+ },
3970
+ {
3971
+ "epoch": 5.06,
3972
+ "learning_rate": 0.00021456030572718866,
3973
+ "loss": 0.2944,
3974
+ "step": 198500
3975
+ },
3976
+ {
3977
+ "epoch": 5.07,
3978
+ "learning_rate": 0.000214122794303217,
3979
+ "loss": 0.2941,
3980
+ "step": 199000
3981
+ },
3982
+ {
3983
+ "epoch": 5.07,
3984
+ "eval_loss": 0.8159200549125671,
3985
+ "eval_runtime": 1.3775,
3986
+ "eval_samples_per_second": 725.952,
3987
+ "eval_steps_per_second": 11.615,
3988
+ "step": 199000
3989
+ },
3990
+ {
3991
+ "epoch": 5.08,
3992
+ "learning_rate": 0.00021368463632183912,
3993
+ "loss": 0.2948,
3994
+ "step": 199500
3995
+ },
3996
+ {
3997
+ "epoch": 5.1,
3998
+ "learning_rate": 0.00021324583657468055,
3999
+ "loss": 0.294,
4000
+ "step": 200000
4001
+ },
4002
+ {
4003
+ "epoch": 5.1,
4004
+ "eval_loss": 0.827436625957489,
4005
+ "eval_runtime": 1.3211,
4006
+ "eval_samples_per_second": 756.949,
4007
+ "eval_steps_per_second": 12.111,
4008
+ "step": 200000
4009
  }
4010
  ],
4011
  "max_steps": 500000,
4012
  "num_train_epochs": 13,
4013
+ "total_flos": 6.389675380924775e+21,
4014
  "trial_name": null,
4015
  "trial_params": null
4016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26f449ea92715ca64c5aa73950f43c9ea9816a4a5ad265403dda147f77444d50
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7193b0396efbae3c97d694737aabe6c7c60bae54e10275e5280d057220eeba1
3
  size 102501541