SaladSlayer00
commited on
Commit
•
3d99841
1
Parent(s):
078c537
Training in progress, step 3500, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 966995080
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b5c8e90157800800810742119da6df94b2ac588dea70a3cc81b74764d5ce2d0
|
3 |
size 966995080
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1925064044
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f840942c031022bad1c4d3d5f86d7af9fe204a1446a4108ca9d4b212ed8d4aeb
|
3 |
size 1925064044
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c7f9b299fba2544fcedfb299949c018ac6fda10a5718c64ae86eeb9017fefab
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75c0ae6a6836b9f1cff7f37594b49b4f593a2c9ec51fb43d28d45c3186fbd4ab
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": 96.06429012981661,
|
3 |
"best_model_checkpoint": "./another_local/checkpoint-1500",
|
4 |
-
"epoch":
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -781,6 +781,135 @@
|
|
781 |
"eval_steps_per_second": 0.102,
|
782 |
"eval_wer": 133.89655882958996,
|
783 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
784 |
}
|
785 |
],
|
786 |
"logging_steps": 25,
|
@@ -788,7 +917,7 @@
|
|
788 |
"num_input_tokens_seen": 0,
|
789 |
"num_train_epochs": 13,
|
790 |
"save_steps": 500,
|
791 |
-
"total_flos": 1.
|
792 |
"trial_name": null,
|
793 |
"trial_params": null
|
794 |
}
|
|
|
1 |
{
|
2 |
"best_metric": 96.06429012981661,
|
3 |
"best_model_checkpoint": "./another_local/checkpoint-1500",
|
4 |
+
"epoch": 11.182108626198083,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
781 |
"eval_steps_per_second": 0.102,
|
782 |
"eval_wer": 133.89655882958996,
|
783 |
"step": 3000
|
784 |
+
},
|
785 |
+
{
|
786 |
+
"epoch": 9.66,
|
787 |
+
"learning_rate": 2.797142857142857e-06,
|
788 |
+
"loss": 0.0027,
|
789 |
+
"step": 3025
|
790 |
+
},
|
791 |
+
{
|
792 |
+
"epoch": 9.74,
|
793 |
+
"learning_rate": 2.725714285714286e-06,
|
794 |
+
"loss": 0.0014,
|
795 |
+
"step": 3050
|
796 |
+
},
|
797 |
+
{
|
798 |
+
"epoch": 9.82,
|
799 |
+
"learning_rate": 2.654285714285714e-06,
|
800 |
+
"loss": 0.0026,
|
801 |
+
"step": 3075
|
802 |
+
},
|
803 |
+
{
|
804 |
+
"epoch": 9.9,
|
805 |
+
"learning_rate": 2.582857142857143e-06,
|
806 |
+
"loss": 0.0017,
|
807 |
+
"step": 3100
|
808 |
+
},
|
809 |
+
{
|
810 |
+
"epoch": 9.98,
|
811 |
+
"learning_rate": 2.5114285714285718e-06,
|
812 |
+
"loss": 0.0016,
|
813 |
+
"step": 3125
|
814 |
+
},
|
815 |
+
{
|
816 |
+
"epoch": 10.06,
|
817 |
+
"learning_rate": 2.4400000000000004e-06,
|
818 |
+
"loss": 0.0022,
|
819 |
+
"step": 3150
|
820 |
+
},
|
821 |
+
{
|
822 |
+
"epoch": 10.14,
|
823 |
+
"learning_rate": 2.3685714285714285e-06,
|
824 |
+
"loss": 0.0023,
|
825 |
+
"step": 3175
|
826 |
+
},
|
827 |
+
{
|
828 |
+
"epoch": 10.22,
|
829 |
+
"learning_rate": 2.297142857142857e-06,
|
830 |
+
"loss": 0.0021,
|
831 |
+
"step": 3200
|
832 |
+
},
|
833 |
+
{
|
834 |
+
"epoch": 10.3,
|
835 |
+
"learning_rate": 2.2257142857142857e-06,
|
836 |
+
"loss": 0.0016,
|
837 |
+
"step": 3225
|
838 |
+
},
|
839 |
+
{
|
840 |
+
"epoch": 10.38,
|
841 |
+
"learning_rate": 2.1542857142857147e-06,
|
842 |
+
"loss": 0.0024,
|
843 |
+
"step": 3250
|
844 |
+
},
|
845 |
+
{
|
846 |
+
"epoch": 10.46,
|
847 |
+
"learning_rate": 2.0828571428571433e-06,
|
848 |
+
"loss": 0.0035,
|
849 |
+
"step": 3275
|
850 |
+
},
|
851 |
+
{
|
852 |
+
"epoch": 10.54,
|
853 |
+
"learning_rate": 2.0114285714285715e-06,
|
854 |
+
"loss": 0.0029,
|
855 |
+
"step": 3300
|
856 |
+
},
|
857 |
+
{
|
858 |
+
"epoch": 10.62,
|
859 |
+
"learning_rate": 1.94e-06,
|
860 |
+
"loss": 0.0013,
|
861 |
+
"step": 3325
|
862 |
+
},
|
863 |
+
{
|
864 |
+
"epoch": 10.7,
|
865 |
+
"learning_rate": 1.8685714285714289e-06,
|
866 |
+
"loss": 0.0034,
|
867 |
+
"step": 3350
|
868 |
+
},
|
869 |
+
{
|
870 |
+
"epoch": 10.78,
|
871 |
+
"learning_rate": 1.7971428571428572e-06,
|
872 |
+
"loss": 0.0021,
|
873 |
+
"step": 3375
|
874 |
+
},
|
875 |
+
{
|
876 |
+
"epoch": 10.86,
|
877 |
+
"learning_rate": 1.7257142857142858e-06,
|
878 |
+
"loss": 0.0017,
|
879 |
+
"step": 3400
|
880 |
+
},
|
881 |
+
{
|
882 |
+
"epoch": 10.94,
|
883 |
+
"learning_rate": 1.6542857142857144e-06,
|
884 |
+
"loss": 0.0023,
|
885 |
+
"step": 3425
|
886 |
+
},
|
887 |
+
{
|
888 |
+
"epoch": 11.02,
|
889 |
+
"learning_rate": 1.582857142857143e-06,
|
890 |
+
"loss": 0.0016,
|
891 |
+
"step": 3450
|
892 |
+
},
|
893 |
+
{
|
894 |
+
"epoch": 11.1,
|
895 |
+
"learning_rate": 1.5114285714285714e-06,
|
896 |
+
"loss": 0.0018,
|
897 |
+
"step": 3475
|
898 |
+
},
|
899 |
+
{
|
900 |
+
"epoch": 11.18,
|
901 |
+
"learning_rate": 1.44e-06,
|
902 |
+
"loss": 0.0033,
|
903 |
+
"step": 3500
|
904 |
+
},
|
905 |
+
{
|
906 |
+
"epoch": 11.18,
|
907 |
+
"eval_loss": 0.5348898768424988,
|
908 |
+
"eval_runtime": 2303.5501,
|
909 |
+
"eval_samples_per_second": 0.868,
|
910 |
+
"eval_steps_per_second": 0.109,
|
911 |
+
"eval_wer": 137.85802596332167,
|
912 |
+
"step": 3500
|
913 |
}
|
914 |
],
|
915 |
"logging_steps": 25,
|
|
|
917 |
"num_input_tokens_seen": 0,
|
918 |
"num_train_epochs": 13,
|
919 |
"save_steps": 500,
|
920 |
+
"total_flos": 1.613538690269184e+19,
|
921 |
"trial_name": null,
|
922 |
"trial_params": null
|
923 |
}
|