plip commited on
Commit
d97278d
1 Parent(s): d137203

Training in progress, step 50000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60cb01bc9e67975e1bb1ed22126a2e438f937a6615e57c3f2220be60495920b2
3
  size 202193937
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23ff89d2a81bd757315b81207676f44c6832e84aae635f046c3e647c3040b483
3
  size 202193937
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fe754e765d5235d6d33bcfe88cdb3a933ccbfc6b7ea5ab60542395d9d85e684
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2aabbbc7ed5c34ff08e86aca48f8310b460422b65244f1250c9b83aff072675
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24c2467e9acf06a94a3ca165e312978b81f51dda2e7f32b8607c9d3f773c46e2
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d79bfab61db4aceee2eb018ca16c6d4240f46f1f2f65a7d0758ecdb28fb7c5ff
3
+ size 14439
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f10426f56c7c01ca78fb77f1b5bd9d6f429ab8feed08a9636c7f90c77533d382
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d41251322a5c7e3d079b6606a53e326a7bb151cabc9051e0857ad86ba9d2a108
3
+ size 14439
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50c64be9bba64fb9b54421cbdb5d49bbf62e8dede6378e3090056f6fda12cc3b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba57e5623e2946c142c99b408508d7eb57876cf791e80cad946e566180db53c3
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15fb56fed836ca09e88803cfd50127dd08d2e98851e7cca02b6a5cfa6f8d7f94
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:864755d58f82608f1190822ec9564d234220e4c5990982be3ccbb807c5a0be73
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c328439be9bb4cea23920597b019c57083a44b1906a345c7adf7efb277d292f
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e60e4e496eff781a98bdb365b0f44c5af8c43d94935dec1bfea72243fe40e318
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdb4fabc1b2d87119cb5ed67a0ca4df3e9e1c5d3ea269627d2e42cc2977023fd
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f14620f77e9f29a7b2fc64f011dd315353254d0795197f0559b275ff978e16d
3
+ size 14439
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15fb56fed836ca09e88803cfd50127dd08d2e98851e7cca02b6a5cfa6f8d7f94
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ce42d7854aed6ba8f7821d926516d66fdbe5198bf7a96423e32af1d108a944
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ba5ebd9e984982fc400bda128508420216070834c053705f8708435245bde3f
3
- size 14439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2462e5ce80c7440f25a48dc31b2625ee60dfa64106981368292a46775f568768
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a74054da3729955ad5eacae83f875e2df84ec7d4ca3ed3437d6f2cf84557171f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ec5c60f6b831a1ad5ababd554115ca1132f641d196d63d42183ef95c8827963
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.2255277428842795,
5
- "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -806,11 +806,211 @@
806
  "eval_samples_per_second": 1858.547,
807
  "eval_steps_per_second": 29.737,
808
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
809
  }
810
  ],
811
  "max_steps": 500000,
812
  "num_train_epochs": 16,
813
- "total_flos": 1.2779405671460241e+21,
814
  "trial_name": null,
815
  "trial_params": null
816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.5319096786053494,
5
+ "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
806
  "eval_samples_per_second": 1858.547,
807
  "eval_steps_per_second": 29.737,
808
  "step": 40000
809
+ },
810
+ {
811
+ "epoch": 1.24,
812
+ "learning_rate": 0.00029923873905521244,
813
+ "loss": 0.4471,
814
+ "step": 40500
815
+ },
816
+ {
817
+ "epoch": 1.26,
818
+ "learning_rate": 0.000299188879869657,
819
+ "loss": 0.446,
820
+ "step": 41000
821
+ },
822
+ {
823
+ "epoch": 1.26,
824
+ "eval_loss": 0.7949715852737427,
825
+ "eval_runtime": 0.5157,
826
+ "eval_samples_per_second": 1939.022,
827
+ "eval_steps_per_second": 31.024,
828
+ "step": 41000
829
+ },
830
+ {
831
+ "epoch": 1.27,
832
+ "learning_rate": 0.00029913744385761244,
833
+ "loss": 0.4446,
834
+ "step": 41500
835
+ },
836
+ {
837
+ "epoch": 1.29,
838
+ "learning_rate": 0.00029908443158157465,
839
+ "loss": 0.4437,
840
+ "step": 42000
841
+ },
842
+ {
843
+ "epoch": 1.29,
844
+ "eval_loss": 0.7941656112670898,
845
+ "eval_runtime": 0.5261,
846
+ "eval_samples_per_second": 1900.807,
847
+ "eval_steps_per_second": 30.413,
848
+ "step": 42000
849
+ },
850
+ {
851
+ "epoch": 1.3,
852
+ "learning_rate": 0.0002990298436212775,
853
+ "loss": 0.4422,
854
+ "step": 42500
855
+ },
856
+ {
857
+ "epoch": 1.32,
858
+ "learning_rate": 0.0002989736805736861,
859
+ "loss": 0.4413,
860
+ "step": 43000
861
+ },
862
+ {
863
+ "epoch": 1.32,
864
+ "eval_loss": 0.7861095666885376,
865
+ "eval_runtime": 0.5221,
866
+ "eval_samples_per_second": 1915.408,
867
+ "eval_steps_per_second": 30.647,
868
+ "step": 43000
869
+ },
870
+ {
871
+ "epoch": 1.33,
872
+ "learning_rate": 0.00029891594305299065,
873
+ "loss": 0.44,
874
+ "step": 43500
875
+ },
876
+ {
877
+ "epoch": 1.35,
878
+ "learning_rate": 0.00029885663169059926,
879
+ "loss": 0.4391,
880
+ "step": 44000
881
+ },
882
+ {
883
+ "epoch": 1.35,
884
+ "eval_loss": 0.7935870289802551,
885
+ "eval_runtime": 0.5329,
886
+ "eval_samples_per_second": 1876.586,
887
+ "eval_steps_per_second": 30.025,
888
+ "step": 44000
889
+ },
890
+ {
891
+ "epoch": 1.36,
892
+ "learning_rate": 0.0002987957471351316,
893
+ "loss": 0.4378,
894
+ "step": 44500
895
+ },
896
+ {
897
+ "epoch": 1.38,
898
+ "learning_rate": 0.00029873329005241137,
899
+ "loss": 0.4366,
900
+ "step": 45000
901
+ },
902
+ {
903
+ "epoch": 1.38,
904
+ "eval_loss": 0.7878534197807312,
905
+ "eval_runtime": 0.5149,
906
+ "eval_samples_per_second": 1941.994,
907
+ "eval_steps_per_second": 31.072,
908
+ "step": 45000
909
+ },
910
+ {
911
+ "epoch": 1.39,
912
+ "learning_rate": 0.00029866926112545925,
913
+ "loss": 0.4355,
914
+ "step": 45500
915
+ },
916
+ {
917
+ "epoch": 1.41,
918
+ "learning_rate": 0.00029860366105448534,
919
+ "loss": 0.4342,
920
+ "step": 46000
921
+ },
922
+ {
923
+ "epoch": 1.41,
924
+ "eval_loss": 0.7861126661300659,
925
+ "eval_runtime": 0.5143,
926
+ "eval_samples_per_second": 1944.264,
927
+ "eval_steps_per_second": 31.108,
928
+ "step": 46000
929
+ },
930
+ {
931
+ "epoch": 1.42,
932
+ "learning_rate": 0.00029853649055688143,
933
+ "loss": 0.4333,
934
+ "step": 46500
935
+ },
936
+ {
937
+ "epoch": 1.44,
938
+ "learning_rate": 0.00029846775036721337,
939
+ "loss": 0.432,
940
+ "step": 47000
941
+ },
942
+ {
943
+ "epoch": 1.44,
944
+ "eval_loss": 0.7865832448005676,
945
+ "eval_runtime": 0.5183,
946
+ "eval_samples_per_second": 1929.412,
947
+ "eval_steps_per_second": 30.871,
948
+ "step": 47000
949
+ },
950
+ {
951
+ "epoch": 1.46,
952
+ "learning_rate": 0.0002983974412372129,
953
+ "loss": 0.4308,
954
+ "step": 47500
955
+ },
956
+ {
957
+ "epoch": 1.47,
958
+ "learning_rate": 0.00029832556393576934,
959
+ "loss": 0.4297,
960
+ "step": 48000
961
+ },
962
+ {
963
+ "epoch": 1.47,
964
+ "eval_loss": 0.777747392654419,
965
+ "eval_runtime": 0.527,
966
+ "eval_samples_per_second": 1897.653,
967
+ "eval_steps_per_second": 30.362,
968
+ "step": 48000
969
+ },
970
+ {
971
+ "epoch": 1.49,
972
+ "learning_rate": 0.0002982521192489214,
973
+ "loss": 0.4288,
974
+ "step": 48500
975
+ },
976
+ {
977
+ "epoch": 1.5,
978
+ "learning_rate": 0.0002981771079798483,
979
+ "loss": 0.4278,
980
+ "step": 49000
981
+ },
982
+ {
983
+ "epoch": 1.5,
984
+ "eval_loss": 0.7763716578483582,
985
+ "eval_runtime": 0.5211,
986
+ "eval_samples_per_second": 1919.193,
987
+ "eval_steps_per_second": 30.707,
988
+ "step": 49000
989
+ },
990
+ {
991
+ "epoch": 1.52,
992
+ "learning_rate": 0.00029810053094886136,
993
+ "loss": 0.4269,
994
+ "step": 49500
995
+ },
996
+ {
997
+ "epoch": 1.53,
998
+ "learning_rate": 0.00029802238899339473,
999
+ "loss": 0.4258,
1000
+ "step": 50000
1001
+ },
1002
+ {
1003
+ "epoch": 1.53,
1004
+ "eval_loss": 0.7902368307113647,
1005
+ "eval_runtime": 0.5206,
1006
+ "eval_samples_per_second": 1920.725,
1007
+ "eval_steps_per_second": 30.732,
1008
+ "step": 50000
1009
  }
1010
  ],
1011
  "max_steps": 500000,
1012
  "num_train_epochs": 16,
1013
+ "total_flos": 1.597427705920524e+21,
1014
  "trial_name": null,
1015
  "trial_params": null
1016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fe754e765d5235d6d33bcfe88cdb3a933ccbfc6b7ea5ab60542395d9d85e684
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2aabbbc7ed5c34ff08e86aca48f8310b460422b65244f1250c9b83aff072675
3
  size 102501541