SaladSlayer00 commited on
Commit
3a660ef
1 Parent(s): 8a0dfae

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b5c8e90157800800810742119da6df94b2ac588dea70a3cc81b74764d5ce2d0
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fc6dd70651add99ed7978f4a1a498c9cf9f5df4a513779e562baacfc10bfeba
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f840942c031022bad1c4d3d5f86d7af9fe204a1446a4108ca9d4b212ed8d4aeb
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c12130ff78953a4ce0a1257bf758342a5e77a9de46862aa3e4e418c97f59cd24
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c7f9b299fba2544fcedfb299949c018ac6fda10a5718c64ae86eeb9017fefab
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07d039bc3fdc4b6313d1e6e6a91532f954d6b9332fea72a7271f539a91baf227
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75c0ae6a6836b9f1cff7f37594b49b4f593a2c9ec51fb43d28d45c3186fbd4ab
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7efbf80617c96c78286826ce59d9a12c86da62d7631874b3d6364a8e993ada60
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 96.06429012981661,
3
  "best_model_checkpoint": "./another_local/checkpoint-1500",
4
- "epoch": 11.182108626198083,
5
  "eval_steps": 500,
6
- "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -910,6 +910,135 @@
910
  "eval_steps_per_second": 0.109,
911
  "eval_wer": 137.85802596332167,
912
  "step": 3500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
913
  }
914
  ],
915
  "logging_steps": 25,
@@ -917,7 +1046,7 @@
917
  "num_input_tokens_seen": 0,
918
  "num_train_epochs": 13,
919
  "save_steps": 500,
920
- "total_flos": 1.613538690269184e+19,
921
  "trial_name": null,
922
  "trial_params": null
923
  }
 
1
  {
2
  "best_metric": 96.06429012981661,
3
  "best_model_checkpoint": "./another_local/checkpoint-1500",
4
+ "epoch": 12.779552715654953,
5
  "eval_steps": 500,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
910
  "eval_steps_per_second": 0.109,
911
  "eval_wer": 137.85802596332167,
912
  "step": 3500
913
+ },
914
+ {
915
+ "epoch": 11.26,
916
+ "learning_rate": 1.3685714285714286e-06,
917
+ "loss": 0.0018,
918
+ "step": 3525
919
+ },
920
+ {
921
+ "epoch": 11.34,
922
+ "learning_rate": 1.2971428571428574e-06,
923
+ "loss": 0.0012,
924
+ "step": 3550
925
+ },
926
+ {
927
+ "epoch": 11.42,
928
+ "learning_rate": 1.2257142857142857e-06,
929
+ "loss": 0.0018,
930
+ "step": 3575
931
+ },
932
+ {
933
+ "epoch": 11.5,
934
+ "learning_rate": 1.1542857142857143e-06,
935
+ "loss": 0.0025,
936
+ "step": 3600
937
+ },
938
+ {
939
+ "epoch": 11.58,
940
+ "learning_rate": 1.082857142857143e-06,
941
+ "loss": 0.0024,
942
+ "step": 3625
943
+ },
944
+ {
945
+ "epoch": 11.66,
946
+ "learning_rate": 1.0114285714285715e-06,
947
+ "loss": 0.003,
948
+ "step": 3650
949
+ },
950
+ {
951
+ "epoch": 11.74,
952
+ "learning_rate": 9.400000000000001e-07,
953
+ "loss": 0.001,
954
+ "step": 3675
955
+ },
956
+ {
957
+ "epoch": 11.82,
958
+ "learning_rate": 8.685714285714286e-07,
959
+ "loss": 0.0023,
960
+ "step": 3700
961
+ },
962
+ {
963
+ "epoch": 11.9,
964
+ "learning_rate": 7.971428571428572e-07,
965
+ "loss": 0.0024,
966
+ "step": 3725
967
+ },
968
+ {
969
+ "epoch": 11.98,
970
+ "learning_rate": 7.257142857142857e-07,
971
+ "loss": 0.0034,
972
+ "step": 3750
973
+ },
974
+ {
975
+ "epoch": 12.06,
976
+ "learning_rate": 6.542857142857144e-07,
977
+ "loss": 0.003,
978
+ "step": 3775
979
+ },
980
+ {
981
+ "epoch": 12.14,
982
+ "learning_rate": 5.82857142857143e-07,
983
+ "loss": 0.0012,
984
+ "step": 3800
985
+ },
986
+ {
987
+ "epoch": 12.22,
988
+ "learning_rate": 5.114285714285714e-07,
989
+ "loss": 0.0023,
990
+ "step": 3825
991
+ },
992
+ {
993
+ "epoch": 12.3,
994
+ "learning_rate": 4.4e-07,
995
+ "loss": 0.0017,
996
+ "step": 3850
997
+ },
998
+ {
999
+ "epoch": 12.38,
1000
+ "learning_rate": 3.685714285714286e-07,
1001
+ "loss": 0.0022,
1002
+ "step": 3875
1003
+ },
1004
+ {
1005
+ "epoch": 12.46,
1006
+ "learning_rate": 2.9714285714285715e-07,
1007
+ "loss": 0.0033,
1008
+ "step": 3900
1009
+ },
1010
+ {
1011
+ "epoch": 12.54,
1012
+ "learning_rate": 2.2571428571428574e-07,
1013
+ "loss": 0.0019,
1014
+ "step": 3925
1015
+ },
1016
+ {
1017
+ "epoch": 12.62,
1018
+ "learning_rate": 1.542857142857143e-07,
1019
+ "loss": 0.001,
1020
+ "step": 3950
1021
+ },
1022
+ {
1023
+ "epoch": 12.7,
1024
+ "learning_rate": 8.285714285714285e-08,
1025
+ "loss": 0.0019,
1026
+ "step": 3975
1027
+ },
1028
+ {
1029
+ "epoch": 12.78,
1030
+ "learning_rate": 1.142857142857143e-08,
1031
+ "loss": 0.0026,
1032
+ "step": 4000
1033
+ },
1034
+ {
1035
+ "epoch": 12.78,
1036
+ "eval_loss": 0.5372153520584106,
1037
+ "eval_runtime": 2322.1625,
1038
+ "eval_samples_per_second": 0.861,
1039
+ "eval_steps_per_second": 0.108,
1040
+ "eval_wer": 130.3266021017927,
1041
+ "step": 4000
1042
  }
1043
  ],
1044
  "logging_steps": 25,
 
1046
  "num_input_tokens_seen": 0,
1047
  "num_train_epochs": 13,
1048
  "save_steps": 500,
1049
+ "total_flos": 1.844176142204928e+19,
1050
  "trial_name": null,
1051
  "trial_params": null
1052
  }