SaladSlayer00 commited on
Commit
3d99841
1 Parent(s): 078c537

Training in progress, step 3500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ce4b8b5a646a747bf1c54d4e55c15da75e5472d5f29c289eff679a65ba3d219
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b5c8e90157800800810742119da6df94b2ac588dea70a3cc81b74764d5ce2d0
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:415af2ff1829c3edaf1a17ba6db5d45417fa3e84cc95554423a2869871551398
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f840942c031022bad1c4d3d5f86d7af9fe204a1446a4108ca9d4b212ed8d4aeb
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979cd4e52f730e90cd87d98931f6627f47d79df2a678fcfc674113ffa4af0794
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c7f9b299fba2544fcedfb299949c018ac6fda10a5718c64ae86eeb9017fefab
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ff56efc76c16a3b9a712527179ae61c8d6dfccc7e3a53f8c421d6329adacfbb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75c0ae6a6836b9f1cff7f37594b49b4f593a2c9ec51fb43d28d45c3186fbd4ab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 96.06429012981661,
3
  "best_model_checkpoint": "./another_local/checkpoint-1500",
4
- "epoch": 9.584664536741213,
5
  "eval_steps": 500,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -781,6 +781,135 @@
781
  "eval_steps_per_second": 0.102,
782
  "eval_wer": 133.89655882958996,
783
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
784
  }
785
  ],
786
  "logging_steps": 25,
@@ -788,7 +917,7 @@
788
  "num_input_tokens_seen": 0,
789
  "num_train_epochs": 13,
790
  "save_steps": 500,
791
- "total_flos": 1.383132106653696e+19,
792
  "trial_name": null,
793
  "trial_params": null
794
  }
 
1
  {
2
  "best_metric": 96.06429012981661,
3
  "best_model_checkpoint": "./another_local/checkpoint-1500",
4
+ "epoch": 11.182108626198083,
5
  "eval_steps": 500,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
781
  "eval_steps_per_second": 0.102,
782
  "eval_wer": 133.89655882958996,
783
  "step": 3000
784
+ },
785
+ {
786
+ "epoch": 9.66,
787
+ "learning_rate": 2.797142857142857e-06,
788
+ "loss": 0.0027,
789
+ "step": 3025
790
+ },
791
+ {
792
+ "epoch": 9.74,
793
+ "learning_rate": 2.725714285714286e-06,
794
+ "loss": 0.0014,
795
+ "step": 3050
796
+ },
797
+ {
798
+ "epoch": 9.82,
799
+ "learning_rate": 2.654285714285714e-06,
800
+ "loss": 0.0026,
801
+ "step": 3075
802
+ },
803
+ {
804
+ "epoch": 9.9,
805
+ "learning_rate": 2.582857142857143e-06,
806
+ "loss": 0.0017,
807
+ "step": 3100
808
+ },
809
+ {
810
+ "epoch": 9.98,
811
+ "learning_rate": 2.5114285714285718e-06,
812
+ "loss": 0.0016,
813
+ "step": 3125
814
+ },
815
+ {
816
+ "epoch": 10.06,
817
+ "learning_rate": 2.4400000000000004e-06,
818
+ "loss": 0.0022,
819
+ "step": 3150
820
+ },
821
+ {
822
+ "epoch": 10.14,
823
+ "learning_rate": 2.3685714285714285e-06,
824
+ "loss": 0.0023,
825
+ "step": 3175
826
+ },
827
+ {
828
+ "epoch": 10.22,
829
+ "learning_rate": 2.297142857142857e-06,
830
+ "loss": 0.0021,
831
+ "step": 3200
832
+ },
833
+ {
834
+ "epoch": 10.3,
835
+ "learning_rate": 2.2257142857142857e-06,
836
+ "loss": 0.0016,
837
+ "step": 3225
838
+ },
839
+ {
840
+ "epoch": 10.38,
841
+ "learning_rate": 2.1542857142857147e-06,
842
+ "loss": 0.0024,
843
+ "step": 3250
844
+ },
845
+ {
846
+ "epoch": 10.46,
847
+ "learning_rate": 2.0828571428571433e-06,
848
+ "loss": 0.0035,
849
+ "step": 3275
850
+ },
851
+ {
852
+ "epoch": 10.54,
853
+ "learning_rate": 2.0114285714285715e-06,
854
+ "loss": 0.0029,
855
+ "step": 3300
856
+ },
857
+ {
858
+ "epoch": 10.62,
859
+ "learning_rate": 1.94e-06,
860
+ "loss": 0.0013,
861
+ "step": 3325
862
+ },
863
+ {
864
+ "epoch": 10.7,
865
+ "learning_rate": 1.8685714285714289e-06,
866
+ "loss": 0.0034,
867
+ "step": 3350
868
+ },
869
+ {
870
+ "epoch": 10.78,
871
+ "learning_rate": 1.7971428571428572e-06,
872
+ "loss": 0.0021,
873
+ "step": 3375
874
+ },
875
+ {
876
+ "epoch": 10.86,
877
+ "learning_rate": 1.7257142857142858e-06,
878
+ "loss": 0.0017,
879
+ "step": 3400
880
+ },
881
+ {
882
+ "epoch": 10.94,
883
+ "learning_rate": 1.6542857142857144e-06,
884
+ "loss": 0.0023,
885
+ "step": 3425
886
+ },
887
+ {
888
+ "epoch": 11.02,
889
+ "learning_rate": 1.582857142857143e-06,
890
+ "loss": 0.0016,
891
+ "step": 3450
892
+ },
893
+ {
894
+ "epoch": 11.1,
895
+ "learning_rate": 1.5114285714285714e-06,
896
+ "loss": 0.0018,
897
+ "step": 3475
898
+ },
899
+ {
900
+ "epoch": 11.18,
901
+ "learning_rate": 1.44e-06,
902
+ "loss": 0.0033,
903
+ "step": 3500
904
+ },
905
+ {
906
+ "epoch": 11.18,
907
+ "eval_loss": 0.5348898768424988,
908
+ "eval_runtime": 2303.5501,
909
+ "eval_samples_per_second": 0.868,
910
+ "eval_steps_per_second": 0.109,
911
+ "eval_wer": 137.85802596332167,
912
+ "step": 3500
913
  }
914
  ],
915
  "logging_steps": 25,
 
917
  "num_input_tokens_seen": 0,
918
  "num_train_epochs": 13,
919
  "save_steps": 500,
920
+ "total_flos": 1.613538690269184e+19,
921
  "trial_name": null,
922
  "trial_params": null
923
  }