Gizachew commited on
Commit
ec7e849
1 Parent(s): 9ce3233

End of training

Browse files
README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: facebook/hubert-base-ls960
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ model-index:
9
+ - name: ckpts
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # ckpts
17
+
18
+ This model is a fine-tuned version of [facebook/hubert-base-ls960](https://huggingface.co/facebook/hubert-base-ls960) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.1910
21
+ - Accuracy: 0.9697
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 1e-05
41
+ - train_batch_size: 4
42
+ - eval_batch_size: 4
43
+ - seed: 42
44
+ - gradient_accumulation_steps: 2
45
+ - total_train_batch_size: 8
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: linear
48
+ - num_epochs: 5.0
49
+ - mixed_precision_training: Native AMP
50
+
51
+ ### Training results
52
+
53
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
54
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
55
+ | 0.0228 | 2.02 | 500 | 0.1773 | 0.9737 |
56
+ | 0.0385 | 4.04 | 1000 | 0.1910 | 0.9697 |
57
+
58
+
59
+ ### Framework versions
60
+
61
+ - Transformers 4.40.0.dev0
62
+ - Pytorch 2.1.2
63
+ - Datasets 2.18.1.dev0
64
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.99,
3
+ "total_flos": 3.160649695892544e+17,
4
+ "train_loss": 0.03992950008948322,
5
+ "train_runtime": 666.1547,
6
+ "train_samples": 1979,
7
+ "train_samples_per_second": 14.854,
8
+ "train_steps_per_second": 1.854
9
+ }
config.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/hubert-base-ls960",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": true,
5
+ "architectures": [
6
+ "HubertForSpeechClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_dropout": 0.0,
45
+ "feat_extract_norm": "group",
46
+ "feat_proj_dropout": 0.1,
47
+ "feat_proj_layer_norm": true,
48
+ "final_dropout": 0.1,
49
+ "finetuning_task": "wav2vec2_clf",
50
+ "gradient_checkpointing": false,
51
+ "hidden_act": "gelu",
52
+ "hidden_dropout": 0.1,
53
+ "hidden_dropout_prob": 0.1,
54
+ "hidden_size": 768,
55
+ "id2label": {
56
+ "0": "01Neutral",
57
+ "1": "02Fearful",
58
+ "2": "03Happy",
59
+ "3": "04Sad",
60
+ "4": "05Angry"
61
+ },
62
+ "initializer_range": 0.02,
63
+ "intermediate_size": 3072,
64
+ "label2id": {
65
+ "01Neutral": 0,
66
+ "02Fearful": 1,
67
+ "03Happy": 2,
68
+ "04Sad": 3,
69
+ "05Angry": 4
70
+ },
71
+ "layer_norm_eps": 1e-05,
72
+ "layerdrop": 0.1,
73
+ "mask_feature_length": 10,
74
+ "mask_feature_min_masks": 0,
75
+ "mask_feature_prob": 0.0,
76
+ "mask_time_length": 10,
77
+ "mask_time_min_masks": 2,
78
+ "mask_time_prob": 0.05,
79
+ "model_type": "hubert",
80
+ "num_attention_heads": 12,
81
+ "num_conv_pos_embedding_groups": 16,
82
+ "num_conv_pos_embeddings": 128,
83
+ "num_feat_extract_layers": 7,
84
+ "num_hidden_layers": 12,
85
+ "pad_token_id": 0,
86
+ "pooling_mode": "mean",
87
+ "problem_type": "single_label_classification",
88
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
89
+ "torch_dtype": "float32",
90
+ "transformers_version": "4.40.0.dev0",
91
+ "use_weighted_layer_sum": false,
92
+ "vocab_size": 32
93
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d0debed0760df769d15ac8682084547cc2c5e3cc04ab2f3880eafb91150c315
3
+ size 379890236
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": false,
8
+ "sampling_rate": 16000
9
+ }
runs/Apr12_16-09-39_6c0ec431eb03/events.out.tfevents.1712938183.6c0ec431eb03.34.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdcff2c21cd7f636d16377c28120e72a5ac1fa8cc3bb0e2eeabae20537f34203
3
+ size 5852
runs/Apr12_16-09-39_6c0ec431eb03/events.out.tfevents.1712939900.6c0ec431eb03.34.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5a26b3cf658a40031ab93cd221b1b85f63d448d90769cd6ebb026ae8000a109
3
+ size 5852
runs/Apr12_16-09-39_6c0ec431eb03/events.out.tfevents.1712940214.6c0ec431eb03.34.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42c6e5e2b531bef51b9a1303b98309a19bf20aa3e65a7ef0c43525bf8d31ea96
3
+ size 5852
runs/Apr12_16-09-39_6c0ec431eb03/events.out.tfevents.1712940242.6c0ec431eb03.34.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aa7443b5c435809a0a78667e31a4b04019df27788ed5d224e4c4baca446f28f
3
+ size 5901
runs/Apr12_16-09-39_6c0ec431eb03/events.out.tfevents.1712940289.6c0ec431eb03.34.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dcc8bceed9ca96ab476010eb4b5cf67083ae7f47d9aa983484707f7d9866ba0
3
+ size 5901
runs/Apr12_16-09-39_6c0ec431eb03/events.out.tfevents.1712940482.6c0ec431eb03.34.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc1355b1381c53cf6d4686bb1e90599724bb30f23b5ee8c50b7dc0b8248286d2
3
+ size 5901
runs/Apr12_16-09-39_6c0ec431eb03/events.out.tfevents.1712940519.6c0ec431eb03.34.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55481ceeb53a61c1b9145543cf11fd6c32b8b375f90a52f7c3279e2172c49b70
3
+ size 5901
runs/Apr12_16-09-39_6c0ec431eb03/events.out.tfevents.1712940671.6c0ec431eb03.34.7 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:763299b2ea96af62e59c0c7cd6e45fc7c0fec7810d2143e0f4df56fd0b62235c
3
+ size 5901
runs/Apr12_16-09-39_6c0ec431eb03/events.out.tfevents.1712941086.6c0ec431eb03.34.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:789d74ec93d39b683919e14627f5569d54d5624eb84ec44cec62a169ba53c70d
3
+ size 5901
runs/Apr12_16-58-56_6c0ec431eb03/events.out.tfevents.1712941164.6c0ec431eb03.34.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d32147e7aba5a7843309f6903031ae02315424ed1e2ea6707f32f1536dad38c8
3
+ size 8204
runs/Apr12_17-02-10_6c0ec431eb03/events.out.tfevents.1712941348.6c0ec431eb03.34.10 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7ac6b5b4834e9ba4c703845d7ae5c501441e9dafdb437ea7b401aa5889f912e
3
+ size 25289
runs/Apr12_17-17-03_6c0ec431eb03/events.out.tfevents.1712942228.6c0ec431eb03.34.11 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97b4720817badaf1e0159123c5a6324b528b7c3e563172f2b8fe1d151b38260b
3
+ size 9476
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.99,
3
+ "total_flos": 3.160649695892544e+17,
4
+ "train_loss": 0.03992950008948322,
5
+ "train_runtime": 666.1547,
6
+ "train_samples": 1979,
7
+ "train_samples_per_second": 14.854,
8
+ "train_steps_per_second": 1.854
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.98989898989899,
5
+ "eval_steps": 500,
6
+ "global_step": 1235,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.4,
13
+ "grad_norm": 0.006182590499520302,
14
+ "learning_rate": 9.206477732793523e-06,
15
+ "loss": 0.0729,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.81,
20
+ "grad_norm": 21.23886489868164,
21
+ "learning_rate": 8.39676113360324e-06,
22
+ "loss": 0.0698,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 1.21,
27
+ "grad_norm": 0.006576939485967159,
28
+ "learning_rate": 7.587044534412956e-06,
29
+ "loss": 0.0464,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 1.62,
34
+ "grad_norm": 0.009822617284953594,
35
+ "learning_rate": 6.785425101214575e-06,
36
+ "loss": 0.0689,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 2.02,
41
+ "grad_norm": 0.00560363894328475,
42
+ "learning_rate": 5.975708502024292e-06,
43
+ "loss": 0.0228,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 2.02,
48
+ "eval_accuracy": 0.973737359046936,
49
+ "eval_loss": 0.17728736996650696,
50
+ "eval_runtime": 23.1344,
51
+ "eval_samples_per_second": 21.397,
52
+ "eval_steps_per_second": 5.36,
53
+ "step": 500
54
+ },
55
+ {
56
+ "epoch": 2.42,
57
+ "grad_norm": 0.010259171947836876,
58
+ "learning_rate": 5.165991902834009e-06,
59
+ "loss": 0.0253,
60
+ "step": 600
61
+ },
62
+ {
63
+ "epoch": 2.83,
64
+ "grad_norm": 0.5489621758460999,
65
+ "learning_rate": 4.356275303643725e-06,
66
+ "loss": 0.0227,
67
+ "step": 700
68
+ },
69
+ {
70
+ "epoch": 3.23,
71
+ "grad_norm": 0.003071287414059043,
72
+ "learning_rate": 3.5465587044534415e-06,
73
+ "loss": 0.0398,
74
+ "step": 800
75
+ },
76
+ {
77
+ "epoch": 3.64,
78
+ "grad_norm": 0.0034055381547659636,
79
+ "learning_rate": 2.7368421052631583e-06,
80
+ "loss": 0.0299,
81
+ "step": 900
82
+ },
83
+ {
84
+ "epoch": 4.04,
85
+ "grad_norm": 0.005517472513020039,
86
+ "learning_rate": 1.9271255060728746e-06,
87
+ "loss": 0.0385,
88
+ "step": 1000
89
+ },
90
+ {
91
+ "epoch": 4.04,
92
+ "eval_accuracy": 0.9696969985961914,
93
+ "eval_loss": 0.190963014960289,
94
+ "eval_runtime": 23.4329,
95
+ "eval_samples_per_second": 21.124,
96
+ "eval_steps_per_second": 5.292,
97
+ "step": 1000
98
+ },
99
+ {
100
+ "epoch": 4.44,
101
+ "grad_norm": 19.359519958496094,
102
+ "learning_rate": 1.1174089068825912e-06,
103
+ "loss": 0.0365,
104
+ "step": 1100
105
+ },
106
+ {
107
+ "epoch": 4.85,
108
+ "grad_norm": 1.323317527770996,
109
+ "learning_rate": 3.0769230769230774e-07,
110
+ "loss": 0.0064,
111
+ "step": 1200
112
+ },
113
+ {
114
+ "epoch": 4.99,
115
+ "step": 1235,
116
+ "total_flos": 3.160649695892544e+17,
117
+ "train_loss": 0.03992950008948322,
118
+ "train_runtime": 666.1547,
119
+ "train_samples_per_second": 14.854,
120
+ "train_steps_per_second": 1.854
121
+ }
122
+ ],
123
+ "logging_steps": 100,
124
+ "max_steps": 1235,
125
+ "num_input_tokens_seen": 0,
126
+ "num_train_epochs": 5,
127
+ "save_steps": 500,
128
+ "total_flos": 3.160649695892544e+17,
129
+ "train_batch_size": 4,
130
+ "trial_name": null,
131
+ "trial_params": null
132
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f507758a89624d13b7bed17a703fed0e8a853809c160f6b3fee91839d462fc37
3
+ size 4984