ALBADDAWI commited on
Commit
aa6dd69
1 Parent(s): 683114e

ft-wav2vec2-with-minds

Browse files
Files changed (4) hide show
  1. README.md +56 -56
  2. config.json +20 -23
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- license: cc-by-nc-sa-4.0
3
- base_model: audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -15,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # ft-wav2vec2-with-minds
17
 
18
- This model is a fine-tuned version of [audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim](https://huggingface.co/audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.2564
21
- - Accuracy: 0.9400
22
 
23
  ## Model description
24
 
@@ -37,7 +37,7 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - learning_rate: 4e-05
41
  - train_batch_size: 120
42
  - eval_batch_size: 120
43
  - seed: 42
@@ -53,56 +53,56 @@ The following hyperparameters were used during training:
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
55
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
56
- | 2.0796 | 1.0 | 9 | 2.0809 | 0.1209 |
57
- | 2.0789 | 2.0 | 18 | 2.0779 | 0.1406 |
58
- | 2.0762 | 3.0 | 27 | 2.0724 | 0.1987 |
59
- | 2.0712 | 4.0 | 36 | 2.0627 | 0.2315 |
60
- | 2.0601 | 5.0 | 45 | 2.0423 | 0.3327 |
61
- | 2.0489 | 6.0 | 54 | 1.9888 | 0.5145 |
62
- | 2.0094 | 7.0 | 63 | 1.8840 | 0.6214 |
63
- | 1.9088 | 8.0 | 72 | 1.7428 | 0.6429 |
64
- | 1.7904 | 9.0 | 81 | 1.5916 | 0.6448 |
65
- | 1.6668 | 10.0 | 90 | 1.4391 | 0.7029 |
66
- | 1.5889 | 11.0 | 99 | 1.3026 | 0.7591 |
67
- | 1.4522 | 12.0 | 108 | 1.1715 | 0.7901 |
68
- | 1.3301 | 13.0 | 117 | 1.0506 | 0.8257 |
69
- | 1.2325 | 14.0 | 126 | 0.9515 | 0.8472 |
70
- | 1.1669 | 15.0 | 135 | 0.8527 | 0.8557 |
71
- | 1.0915 | 16.0 | 144 | 0.7745 | 0.8697 |
72
- | 1.0157 | 17.0 | 153 | 0.7060 | 0.8772 |
73
- | 0.9657 | 18.0 | 162 | 0.6602 | 0.8744 |
74
- | 0.8975 | 19.0 | 171 | 0.6002 | 0.8903 |
75
- | 0.8403 | 20.0 | 180 | 0.5651 | 0.8932 |
76
- | 0.8059 | 21.0 | 189 | 0.5243 | 0.8960 |
77
- | 0.731 | 22.0 | 198 | 0.4860 | 0.9044 |
78
- | 0.7139 | 23.0 | 207 | 0.4634 | 0.9044 |
79
- | 0.6903 | 24.0 | 216 | 0.4450 | 0.9082 |
80
- | 0.6597 | 25.0 | 225 | 0.4221 | 0.9072 |
81
- | 0.6146 | 26.0 | 234 | 0.4013 | 0.9166 |
82
- | 0.6162 | 27.0 | 243 | 0.3853 | 0.9119 |
83
- | 0.6252 | 28.0 | 252 | 0.3886 | 0.9100 |
84
- | 0.5666 | 29.0 | 261 | 0.3478 | 0.9269 |
85
- | 0.5698 | 30.0 | 270 | 0.3489 | 0.9250 |
86
- | 0.5575 | 31.0 | 279 | 0.3354 | 0.9260 |
87
- | 0.5298 | 32.0 | 288 | 0.3299 | 0.9203 |
88
- | 0.5267 | 33.0 | 297 | 0.3128 | 0.9297 |
89
- | 0.5558 | 34.0 | 306 | 0.3070 | 0.9316 |
90
- | 0.5541 | 35.0 | 315 | 0.3005 | 0.9335 |
91
- | 0.5328 | 36.0 | 324 | 0.2908 | 0.9363 |
92
- | 0.5566 | 37.0 | 333 | 0.2923 | 0.9325 |
93
- | 0.5184 | 38.0 | 342 | 0.2825 | 0.9363 |
94
- | 0.4649 | 39.0 | 351 | 0.2739 | 0.9391 |
95
- | 0.431 | 40.0 | 360 | 0.2698 | 0.9335 |
96
- | 0.4681 | 41.0 | 369 | 0.2643 | 0.9372 |
97
- | 0.4918 | 42.0 | 378 | 0.2611 | 0.9372 |
98
- | 0.4688 | 43.0 | 387 | 0.2608 | 0.9381 |
99
- | 0.4738 | 44.0 | 396 | 0.2621 | 0.9372 |
100
- | 0.4669 | 45.0 | 405 | 0.2604 | 0.9381 |
101
- | 0.4556 | 46.0 | 414 | 0.2596 | 0.9344 |
102
- | 0.4498 | 47.0 | 423 | 0.2564 | 0.9400 |
103
- | 0.4738 | 48.0 | 432 | 0.2564 | 0.9400 |
104
- | 0.4494 | 49.0 | 441 | 0.2564 | 0.9391 |
105
- | 0.447 | 50.0 | 450 | 0.2564 | 0.9391 |
106
 
107
 
108
  ### Framework versions
 
1
  ---
2
+ license: apache-2.0
3
+ base_model: facebook/wav2vec2-large
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
15
 
16
  # ft-wav2vec2-with-minds
17
 
18
+ This model is a fine-tuned version of [facebook/wav2vec2-large](https://huggingface.co/facebook/wav2vec2-large) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.0732
21
+ - Accuracy: 0.9822
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - learning_rate: 3e-05
41
  - train_batch_size: 120
42
  - eval_batch_size: 120
43
  - seed: 42
 
53
 
54
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
55
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
56
+ | 2.0814 | 1.0 | 9 | 2.0883 | 0.1143 |
57
+ | 2.064 | 2.0 | 18 | 2.0619 | 0.1678 |
58
+ | 2.0232 | 3.0 | 27 | 1.9712 | 0.2709 |
59
+ | 1.861 | 4.0 | 36 | 1.7455 | 0.3880 |
60
+ | 1.6003 | 5.0 | 45 | 1.5115 | 0.4724 |
61
+ | 1.4972 | 6.0 | 54 | 1.2623 | 0.5998 |
62
+ | 1.2332 | 7.0 | 63 | 1.0138 | 0.6935 |
63
+ | 1.081 | 8.0 | 72 | 0.8169 | 0.7601 |
64
+ | 0.9925 | 9.0 | 81 | 0.7757 | 0.7873 |
65
+ | 0.8516 | 10.0 | 90 | 0.6470 | 0.8163 |
66
+ | 0.7544 | 11.0 | 99 | 0.7208 | 0.7873 |
67
+ | 0.7006 | 12.0 | 108 | 0.5074 | 0.8557 |
68
+ | 0.591 | 13.0 | 117 | 0.4326 | 0.8782 |
69
+ | 0.5155 | 14.0 | 126 | 0.3707 | 0.9053 |
70
+ | 0.4715 | 15.0 | 135 | 0.3116 | 0.9091 |
71
+ | 0.4461 | 16.0 | 144 | 0.3167 | 0.9138 |
72
+ | 0.445 | 17.0 | 153 | 0.2963 | 0.9250 |
73
+ | 0.3899 | 18.0 | 162 | 0.2499 | 0.9353 |
74
+ | 0.3656 | 19.0 | 171 | 0.2756 | 0.9194 |
75
+ | 0.3255 | 20.0 | 180 | 0.2280 | 0.9297 |
76
+ | 0.2756 | 21.0 | 189 | 0.2178 | 0.9438 |
77
+ | 0.3119 | 22.0 | 198 | 0.1858 | 0.9513 |
78
+ | 0.2595 | 23.0 | 207 | 0.1794 | 0.9475 |
79
+ | 0.2713 | 24.0 | 216 | 0.1737 | 0.9466 |
80
+ | 0.2336 | 25.0 | 225 | 0.1758 | 0.9531 |
81
+ | 0.2359 | 26.0 | 234 | 0.1690 | 0.9485 |
82
+ | 0.2229 | 27.0 | 243 | 0.1336 | 0.9606 |
83
+ | 0.2145 | 28.0 | 252 | 0.1338 | 0.9700 |
84
+ | 0.1986 | 29.0 | 261 | 0.1525 | 0.9625 |
85
+ | 0.1811 | 30.0 | 270 | 0.1415 | 0.9653 |
86
+ | 0.165 | 31.0 | 279 | 0.1208 | 0.9672 |
87
+ | 0.1755 | 32.0 | 288 | 0.1266 | 0.9634 |
88
+ | 0.175 | 33.0 | 297 | 0.1269 | 0.9672 |
89
+ | 0.149 | 34.0 | 306 | 0.1072 | 0.9728 |
90
+ | 0.1606 | 35.0 | 315 | 0.1183 | 0.9738 |
91
+ | 0.161 | 36.0 | 324 | 0.1009 | 0.9719 |
92
+ | 0.1533 | 37.0 | 333 | 0.1000 | 0.9728 |
93
+ | 0.1239 | 38.0 | 342 | 0.1109 | 0.9691 |
94
+ | 0.1353 | 39.0 | 351 | 0.0905 | 0.9775 |
95
+ | 0.1287 | 40.0 | 360 | 0.0920 | 0.9738 |
96
+ | 0.223 | 41.0 | 369 | 0.0855 | 0.9775 |
97
+ | 0.1302 | 42.0 | 378 | 0.0748 | 0.9794 |
98
+ | 0.1249 | 43.0 | 387 | 0.0732 | 0.9822 |
99
+ | 0.1552 | 44.0 | 396 | 0.0688 | 0.9822 |
100
+ | 0.098 | 45.0 | 405 | 0.0777 | 0.9766 |
101
+ | 0.1459 | 46.0 | 414 | 0.0634 | 0.9813 |
102
+ | 0.1267 | 47.0 | 423 | 0.0653 | 0.9822 |
103
+ | 0.149 | 48.0 | 432 | 0.0709 | 0.9794 |
104
+ | 0.1135 | 49.0 | 441 | 0.0660 | 0.9813 |
105
+ | 0.118 | 50.0 | 450 | 0.0652 | 0.9813 |
106
 
107
 
108
  ### Framework versions
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim",
3
  "activation_dropout": 0.1,
4
  "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
@@ -14,7 +14,7 @@
14
  "classifier_proj_size": 256,
15
  "codevector_dim": 768,
16
  "contrastive_logits_temperature": 0.1,
17
- "conv_bias": true,
18
  "conv_dim": [
19
  512,
20
  512,
@@ -45,15 +45,14 @@
45
  "ctc_loss_reduction": "sum",
46
  "ctc_zero_infinity": false,
47
  "diversity_loss_weight": 0.1,
48
- "do_stable_layer_norm": true,
49
  "eos_token_id": 2,
50
  "feat_extract_activation": "gelu",
51
  "feat_extract_dropout": 0.0,
52
- "feat_extract_norm": "layer",
53
  "feat_proj_dropout": 0.1,
54
  "feat_quantizer_dropout": 0.0,
55
  "final_dropout": 0.1,
56
- "finetuning_task": "wav2vec2_reg",
57
  "gradient_checkpointing": false,
58
  "hidden_act": "gelu",
59
  "hidden_dropout": 0.1,
@@ -61,25 +60,25 @@
61
  "hidden_size": 1024,
62
  "id2label": {
63
  "0": "stop",
64
- "1": "no",
65
- "2": "left",
66
- "3": "right",
67
- "4": "yes",
68
- "5": "up",
69
- "6": "down",
70
- "7": "go"
71
  },
72
  "initializer_range": 0.02,
73
  "intermediate_size": 4096,
74
  "label2id": {
75
- "down": "6",
76
- "go": "7",
77
- "left": "2",
78
- "no": "1",
79
- "right": "3",
80
  "stop": "0",
81
- "up": "5",
82
- "yes": "4"
83
  },
84
  "layer_norm_eps": 1e-05,
85
  "layerdrop": 0.1,
@@ -97,12 +96,10 @@
97
  "num_conv_pos_embedding_groups": 16,
98
  "num_conv_pos_embeddings": 128,
99
  "num_feat_extract_layers": 7,
100
- "num_hidden_layers": 12,
101
  "num_negatives": 100,
102
  "output_hidden_size": 1024,
103
  "pad_token_id": 0,
104
- "pooling_mode": "mean",
105
- "problem_type": "regression",
106
  "proj_codevector_dim": 768,
107
  "tdnn_dilation": [
108
  1,
@@ -128,6 +125,6 @@
128
  "torch_dtype": "float32",
129
  "transformers_version": "4.35.2",
130
  "use_weighted_layer_sum": false,
131
- "vocab_size": null,
132
  "xvector_output_dim": 512
133
  }
 
1
  {
2
+ "_name_or_path": "facebook/wav2vec2-large",
3
  "activation_dropout": 0.1,
4
  "adapter_attn_dim": null,
5
  "adapter_kernel_size": 3,
 
14
  "classifier_proj_size": 256,
15
  "codevector_dim": 768,
16
  "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": false,
18
  "conv_dim": [
19
  512,
20
  512,
 
45
  "ctc_loss_reduction": "sum",
46
  "ctc_zero_infinity": false,
47
  "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": false,
49
  "eos_token_id": 2,
50
  "feat_extract_activation": "gelu",
51
  "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "group",
53
  "feat_proj_dropout": 0.1,
54
  "feat_quantizer_dropout": 0.0,
55
  "final_dropout": 0.1,
 
56
  "gradient_checkpointing": false,
57
  "hidden_act": "gelu",
58
  "hidden_dropout": 0.1,
 
60
  "hidden_size": 1024,
61
  "id2label": {
62
  "0": "stop",
63
+ "1": "down",
64
+ "2": "no",
65
+ "3": "left",
66
+ "4": "right",
67
+ "5": "yes",
68
+ "6": "go",
69
+ "7": "up"
70
  },
71
  "initializer_range": 0.02,
72
  "intermediate_size": 4096,
73
  "label2id": {
74
+ "down": "1",
75
+ "go": "6",
76
+ "left": "3",
77
+ "no": "2",
78
+ "right": "4",
79
  "stop": "0",
80
+ "up": "7",
81
+ "yes": "5"
82
  },
83
  "layer_norm_eps": 1e-05,
84
  "layerdrop": 0.1,
 
96
  "num_conv_pos_embedding_groups": 16,
97
  "num_conv_pos_embeddings": 128,
98
  "num_feat_extract_layers": 7,
99
+ "num_hidden_layers": 24,
100
  "num_negatives": 100,
101
  "output_hidden_size": 1024,
102
  "pad_token_id": 0,
 
 
103
  "proj_codevector_dim": 768,
104
  "tdnn_dilation": [
105
  1,
 
125
  "torch_dtype": "float32",
126
  "transformers_version": "4.35.2",
127
  "use_weighted_layer_sum": false,
128
+ "vocab_size": 32,
129
  "xvector_output_dim": 512
130
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:144f3ffc9e8cde213bd94555aca38e1ffb5dfb8bc507afc2a1856997f00d139b
3
- size 658222576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a398bcfd35483061316b87c40c842401a1160ced1b8a5ae390e39a02ca46c6f
3
+ size 1262824104
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e70e7084200ada453ee3d0ff5ff6e337686bbeaadaa5472f19d09722832ad5e6
3
  size 4143
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2eed9312a29a1bf16959421f1afa06489073a52b944433d18f7e5f37fd0d6e8
3
  size 4143