linl03 commited on
Commit
60366a2
·
verified ·
1 Parent(s): d8645e5

End of training

Browse files
README.md CHANGED
@@ -1,30 +1,15 @@
1
  ---
 
2
  language:
3
  - vi
4
- license: apache-2.0
5
- base_model: openai/whisper-small
6
  tags:
7
  - generated_from_trainer
8
- datasets:
9
- - common_voice_17_0
10
  metrics:
11
  - wer
12
  model-index:
13
  - name: Whisper small vi - Ox
14
- results:
15
- - task:
16
- name: Automatic Speech Recognition
17
- type: automatic-speech-recognition
18
- dataset:
19
- name: common_voice_17_0
20
- type: common_voice_17_0
21
- config: vi
22
- split: test
23
- args: vi
24
- metrics:
25
- - name: Wer
26
- type: wer
27
- value: 31.26665341022072
28
  ---
29
 
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,10 +17,10 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  # Whisper small vi - Ox
34
 
35
- This model is a fine-tuned version of [openai/whisper-small](https://huggingface.co/openai/whisper-small) on the common_voice_17_0 dataset.
36
  It achieves the following results on the evaluation set:
37
- - Loss: 1.0138
38
- - Wer: 31.2667
39
 
40
  ## Model description
41
 
@@ -55,10 +40,10 @@ More information needed
55
 
56
  The following hyperparameters were used during training:
57
  - learning_rate: 1e-05
58
- - train_batch_size: 16
59
  - eval_batch_size: 8
60
  - seed: 42
61
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
  - lr_scheduler_type: linear
63
  - lr_scheduler_warmup_steps: 500
64
  - num_epochs: 3.0
@@ -66,28 +51,162 @@ The following hyperparameters were used during training:
66
 
67
  ### Training results
68
 
69
- | Training Loss | Epoch | Step | Validation Loss | Wer |
70
- |:-------------:|:-----:|:-----:|:---------------:|:-------:|
71
- | 0.2276 | 0.08 | 1000 | 0.7506 | 29.8509 |
72
- | 0.1768 | 0.16 | 2000 | 0.8114 | 31.2189 |
73
- | 0.1828 | 0.24 | 3000 | 0.8569 | 31.2985 |
74
- | 0.1632 | 0.32 | 4000 | 0.8523 | 31.9268 |
75
- | 0.1566 | 0.4 | 5000 | 0.9062 | 31.9149 |
76
- | 0.1532 | 0.48 | 6000 | 0.8914 | 31.4496 |
77
- | 0.1593 | 0.56 | 7000 | 0.9342 | 31.9825 |
78
- | 0.1411 | 0.64 | 8000 | 0.9412 | 32.0302 |
79
- | 0.1531 | 0.72 | 9000 | 0.9456 | 31.6206 |
80
- | 0.1246 | 0.8 | 10000 | 0.9452 | 31.7240 |
81
- | 0.1336 | 0.88 | 11000 | 0.9622 | 31.1195 |
82
- | 0.1392 | 0.96 | 12000 | 0.9638 | 31.3939 |
83
- | 0.0725 | 1.04 | 13000 | 1.0032 | 31.5649 |
84
- | 0.0838 | 1.12 | 14000 | 1.0346 | 31.7916 |
85
- | 0.0766 | 1.2 | 15000 | 1.0138 | 31.2667 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
88
  ### Framework versions
89
 
90
- - Transformers 4.39.3
91
- - Pytorch 2.4.1
92
- - Datasets 3.0.1
93
- - Tokenizers 0.15.2
 
1
  ---
2
+ library_name: transformers
3
  language:
4
  - vi
5
+ base_model: weights/whisper-small-vi
 
6
  tags:
7
  - generated_from_trainer
 
 
8
  metrics:
9
  - wer
10
  model-index:
11
  - name: Whisper small vi - Ox
12
+ results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
17
 
18
  # Whisper small vi - Ox
19
 
20
+ This model is a fine-tuned version of [weights/whisper-small-vi](https://huggingface.co/weights/whisper-small-vi) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.1208
23
+ - Wer: 22.6497
24
 
25
  ## Model description
26
 
 
40
 
41
  The following hyperparameters were used during training:
42
  - learning_rate: 1e-05
43
+ - train_batch_size: 8
44
  - eval_batch_size: 8
45
  - seed: 42
46
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_steps: 500
49
  - num_epochs: 3.0
 
51
 
52
  ### Training results
53
 
54
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
55
+ |:-------------:|:------:|:------:|:---------------:|:-------:|
56
+ | 0.3871 | 0.0211 | 1000 | 0.4327 | 23.4515 |
57
+ | 0.3696 | 0.0422 | 2000 | 0.3824 | 23.7588 |
58
+ | 0.3576 | 0.0632 | 3000 | 0.3596 | 22.4671 |
59
+ | 0.3467 | 0.0843 | 4000 | 0.3394 | 20.5368 |
60
+ | 0.36 | 0.1054 | 5000 | 0.3254 | 18.4529 |
61
+ | 0.3182 | 0.1265 | 6000 | 0.3107 | 18.0351 |
62
+ | 0.3293 | 0.1476 | 7000 | 0.2972 | 17.0268 |
63
+ | 0.2692 | 0.1686 | 8000 | 0.2888 | 18.8514 |
64
+ | 0.3061 | 0.1897 | 9000 | 0.2854 | 17.1612 |
65
+ | 0.2654 | 0.2108 | 10000 | 0.2766 | 16.8539 |
66
+ | 0.2954 | 0.2319 | 11000 | 0.2692 | 16.1481 |
67
+ | 0.2703 | 0.2529 | 12000 | 0.2632 | 15.1733 |
68
+ | 0.2523 | 0.2740 | 13000 | 0.2594 | 15.1541 |
69
+ | 0.265 | 0.2951 | 14000 | 0.2556 | 14.9669 |
70
+ | 0.246 | 0.3162 | 15000 | 0.2515 | 15.7784 |
71
+ | 0.2249 | 0.3373 | 16000 | 0.2484 | 14.2514 |
72
+ | 0.2478 | 0.3583 | 17000 | 0.2414 | 14.8996 |
73
+ | 0.2246 | 0.3794 | 18000 | 0.2432 | 14.8468 |
74
+ | 0.2291 | 0.4005 | 19000 | 0.2333 | 15.4038 |
75
+ | 0.2336 | 0.4216 | 20000 | 0.2321 | 13.8577 |
76
+ | 0.2448 | 0.4427 | 21000 | 0.2287 | 12.9694 |
77
+ | 0.2174 | 0.4637 | 22000 | 0.2237 | 13.2479 |
78
+ | 0.2239 | 0.4848 | 23000 | 0.2255 | 12.7197 |
79
+ | 0.2065 | 0.5059 | 24000 | 0.2245 | 12.4316 |
80
+ | 0.2467 | 0.5270 | 25000 | 0.2215 | 13.4015 |
81
+ | 0.2257 | 0.5480 | 26000 | 0.2200 | 14.3955 |
82
+ | 0.229 | 0.5691 | 27000 | 0.2172 | 12.9021 |
83
+ | 0.2431 | 0.5902 | 28000 | 0.2134 | 13.4927 |
84
+ | 0.2258 | 0.6113 | 29000 | 0.2128 | 12.1819 |
85
+ | 0.2025 | 0.6324 | 30000 | 0.2117 | 11.9850 |
86
+ | 0.2014 | 0.6534 | 31000 | 0.2055 | 11.5193 |
87
+ | 0.2341 | 0.6745 | 32000 | 0.2038 | 11.5097 |
88
+ | 0.2136 | 0.6956 | 33000 | 0.2040 | 11.4760 |
89
+ | 0.2348 | 0.7167 | 34000 | 0.2034 | 11.5337 |
90
+ | 0.1944 | 0.7378 | 35000 | 0.2005 | 11.9610 |
91
+ | 0.1646 | 0.7588 | 36000 | 0.2005 | 11.3656 |
92
+ | 0.197 | 0.7799 | 37000 | 0.1938 | 11.0199 |
93
+ | 0.1877 | 0.8010 | 38000 | 0.1945 | 11.7209 |
94
+ | 0.2122 | 0.8221 | 39000 | 0.1910 | 10.8470 |
95
+ | 0.1736 | 0.8432 | 40000 | 0.1905 | 11.1831 |
96
+ | 0.1548 | 0.8642 | 41000 | 0.1907 | 11.2215 |
97
+ | 0.1754 | 0.8853 | 42000 | 0.1875 | 11.1207 |
98
+ | 0.2125 | 0.9064 | 43000 | 0.1864 | 10.5925 |
99
+ | 0.1975 | 0.9275 | 44000 | 0.1834 | 11.1639 |
100
+ | 0.18 | 0.9485 | 45000 | 0.1827 | 10.7414 |
101
+ | 0.2067 | 0.9696 | 46000 | 0.1828 | 10.5589 |
102
+ | 0.1728 | 0.9907 | 47000 | 0.1810 | 10.5493 |
103
+ | 0.1253 | 1.0118 | 48000 | 0.1822 | 10.6453 |
104
+ | 0.1284 | 1.0329 | 49000 | 0.1818 | 10.9238 |
105
+ | 0.1412 | 1.0539 | 50000 | 0.1812 | 10.4677 |
106
+ | 0.1266 | 1.0750 | 51000 | 0.1809 | 10.6838 |
107
+ | 0.1093 | 1.0961 | 52000 | 0.1808 | 10.2708 |
108
+ | 0.1293 | 1.1172 | 53000 | 0.1809 | 10.5973 |
109
+ | 0.1377 | 1.1383 | 54000 | 0.1779 | 9.9443 |
110
+ | 0.1135 | 1.1593 | 55000 | 0.1750 | 10.0595 |
111
+ | 0.1029 | 1.1804 | 56000 | 0.1739 | 9.6658 |
112
+ | 0.0959 | 1.2015 | 57000 | 0.1776 | 10.7030 |
113
+ | 0.1335 | 1.2226 | 58000 | 0.1736 | 10.1268 |
114
+ | 0.1166 | 1.2437 | 59000 | 0.1755 | 11.6873 |
115
+ | 0.1079 | 1.2647 | 60000 | 0.1741 | 9.8579 |
116
+ | 0.124 | 1.2858 | 61000 | 0.1719 | 9.6706 |
117
+ | 0.1279 | 1.3069 | 62000 | 0.1725 | 10.9094 |
118
+ | 0.1546 | 1.3280 | 63000 | 0.1740 | 10.0643 |
119
+ | 0.0961 | 1.3490 | 64000 | 0.1726 | 9.6514 |
120
+ | 0.1167 | 1.3701 | 65000 | 0.1715 | 9.9971 |
121
+ | 0.1072 | 1.3912 | 66000 | 0.1691 | 9.5410 |
122
+ | 0.1052 | 1.4123 | 67000 | 0.1708 | 10.0067 |
123
+ | 0.1234 | 1.4334 | 68000 | 0.1682 | 10.5589 |
124
+ | 0.1131 | 1.4544 | 69000 | 0.1665 | 10.6838 |
125
+ | 0.1188 | 1.4755 | 70000 | 0.1668 | 11.1159 |
126
+ | 0.1106 | 1.4966 | 71000 | 0.1666 | 9.3777 |
127
+ | 0.0984 | 1.5177 | 72000 | 0.1645 | 9.6754 |
128
+ | 0.1206 | 1.5388 | 73000 | 0.1636 | 10.6982 |
129
+ | 0.1369 | 1.5598 | 74000 | 0.1625 | 9.5554 |
130
+ | 0.1164 | 1.5809 | 75000 | 0.1619 | 9.4881 |
131
+ | 0.1326 | 1.6020 | 76000 | 0.1603 | 11.4184 |
132
+ | 0.1168 | 1.6231 | 77000 | 0.1607 | 10.6550 |
133
+ | 0.107 | 1.6441 | 78000 | 0.1613 | 10.0980 |
134
+ | 0.1222 | 1.6652 | 79000 | 0.1611 | 9.6562 |
135
+ | 0.1304 | 1.6863 | 80000 | 0.1615 | 9.7186 |
136
+ | 0.1288 | 1.7074 | 81000 | 0.1615 | 9.6466 |
137
+ | 0.1101 | 1.7285 | 82000 | 0.1587 | 12.8925 |
138
+ | 0.1256 | 1.7495 | 83000 | 0.1577 | 11.4760 |
139
+ | 0.1167 | 1.7706 | 84000 | 0.1564 | 9.0992 |
140
+ | 0.114 | 1.7917 | 85000 | 0.1533 | 11.4424 |
141
+ | 0.0918 | 1.8128 | 86000 | 0.1528 | 15.9896 |
142
+ | 0.1338 | 1.8339 | 87000 | 0.1521 | 15.8168 |
143
+ | 0.1009 | 1.8549 | 88000 | 0.1514 | 15.3078 |
144
+ | 0.1124 | 1.8760 | 89000 | 0.1511 | 14.3330 |
145
+ | 0.1161 | 1.8971 | 90000 | 0.1507 | 10.4389 |
146
+ | 0.102 | 1.9182 | 91000 | 0.1495 | 9.4209 |
147
+ | 0.0921 | 1.9393 | 92000 | 0.1473 | 12.5660 |
148
+ | 0.1142 | 1.9603 | 93000 | 0.1477 | 11.5865 |
149
+ | 0.0971 | 1.9814 | 94000 | 0.1482 | 17.4493 |
150
+ | 0.0562 | 2.0025 | 95000 | 0.1484 | 23.0769 |
151
+ | 0.0697 | 2.0236 | 96000 | 0.1491 | 20.2007 |
152
+ | 0.0691 | 2.0446 | 97000 | 0.1487 | 14.3234 |
153
+ | 0.0707 | 2.0657 | 98000 | 0.1486 | 15.2694 |
154
+ | 0.0529 | 2.0868 | 99000 | 0.1486 | 16.5322 |
155
+ | 0.061 | 2.1079 | 100000 | 0.1466 | 17.9343 |
156
+ | 0.077 | 2.1290 | 101000 | 0.1465 | 17.1852 |
157
+ | 0.0748 | 2.1500 | 102000 | 0.1474 | 15.5767 |
158
+ | 0.0624 | 2.1711 | 103000 | 0.1471 | 15.2118 |
159
+ | 0.0625 | 2.1922 | 104000 | 0.1452 | 27.2352 |
160
+ | 0.0876 | 2.2133 | 105000 | 0.1476 | 27.5137 |
161
+ | 0.0683 | 2.2344 | 106000 | 0.1468 | 20.1911 |
162
+ | 0.0539 | 2.2554 | 107000 | 0.1459 | 19.6101 |
163
+ | 0.0627 | 2.2765 | 108000 | 0.1462 | 19.4997 |
164
+ | 0.0548 | 2.2976 | 109000 | 0.1469 | 18.6546 |
165
+ | 0.0559 | 2.3187 | 110000 | 0.1453 | 15.9224 |
166
+ | 0.0667 | 2.3397 | 111000 | 0.1447 | 20.4312 |
167
+ | 0.0611 | 2.3608 | 112000 | 0.1442 | 19.0963 |
168
+ | 0.0672 | 2.3819 | 113000 | 0.1441 | 19.7349 |
169
+ | 0.0517 | 2.4030 | 114000 | 0.1435 | 17.6894 |
170
+ | 0.0584 | 2.4241 | 115000 | 0.1439 | 21.5884 |
171
+ | 0.0634 | 2.4451 | 116000 | 0.1428 | 22.2942 |
172
+ | 0.0754 | 2.4662 | 117000 | 0.1420 | 25.4346 |
173
+ | 0.0537 | 2.4873 | 118000 | 0.1413 | 29.4440 |
174
+ | 0.0478 | 2.5084 | 119000 | 0.1412 | 21.6796 |
175
+ | 0.0509 | 2.5295 | 120000 | 0.1414 | 22.2414 |
176
+ | 0.0749 | 2.5505 | 121000 | 0.1405 | 18.5153 |
177
+ | 0.069 | 2.5716 | 122000 | 0.1391 | 17.9679 |
178
+ | 0.0614 | 2.5927 | 123000 | 0.1395 | 19.7157 |
179
+ | 0.0628 | 2.6138 | 124000 | 0.1382 | 19.7926 |
180
+ | 0.0518 | 2.6349 | 125000 | 0.1390 | 19.3172 |
181
+ | 0.078 | 2.6559 | 126000 | 0.1379 | 23.3458 |
182
+ | 0.0578 | 2.6770 | 127000 | 0.1388 | 21.2427 |
183
+ | 0.0406 | 2.6981 | 128000 | 0.1384 | 24.9832 |
184
+ | 0.0494 | 2.7192 | 129000 | 0.1373 | 21.6124 |
185
+ | 0.0714 | 2.7402 | 130000 | 0.1375 | 22.4671 |
186
+ | 0.0646 | 2.7613 | 131000 | 0.1369 | 23.7203 |
187
+ | 0.0582 | 2.7824 | 132000 | 0.1372 | 22.2462 |
188
+ | 0.0594 | 2.8035 | 133000 | 0.1368 | 22.1646 |
189
+ | 0.0435 | 2.8246 | 134000 | 0.1364 | 21.4684 |
190
+ | 0.0509 | 2.8456 | 135000 | 0.1361 | 19.0243 |
191
+ | 0.0553 | 2.8667 | 136000 | 0.1365 | 21.0506 |
192
+ | 0.0716 | 2.8878 | 137000 | 0.1360 | 21.2859 |
193
+ | 0.0621 | 2.9089 | 138000 | 0.1359 | 19.7926 |
194
+ | 0.0597 | 2.9300 | 139000 | 0.1358 | 21.4011 |
195
+ | 0.0476 | 2.9510 | 140000 | 0.1357 | 21.3195 |
196
+ | 0.0483 | 2.9721 | 141000 | 0.1355 | 21.3195 |
197
+ | 0.0504 | 2.9932 | 142000 | 0.1355 | 22.1166 |
198
+ | 0.1197 | 2.8636 | 143000 | 0.1315 | 21.4829 |
199
+ | 0.1389 | 2.8836 | 144000 | 0.1266 | 22.1632 |
200
+ | 0.1242 | 2.9036 | 145000 | 0.1244 | 23.1137 |
201
+ | 0.1355 | 2.9236 | 146000 | 0.1228 | 21.0505 |
202
+ | 0.1257 | 2.9437 | 147000 | 0.1218 | 20.4334 |
203
+ | 0.1027 | 2.9637 | 148000 | 0.1212 | 21.6541 |
204
+ | 0.1186 | 2.9837 | 149000 | 0.1208 | 22.6497 |
205
 
206
 
207
  ### Framework versions
208
 
209
+ - Transformers 4.47.0
210
+ - Pytorch 2.5.1+cu121
211
+ - Datasets 3.1.0
212
+ - Tokenizers 0.21.0
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "whisper-small-vi",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
@@ -54,7 +54,7 @@
54
  "pad_token_id": 50257,
55
  "scale_embedding": false,
56
  "torch_dtype": "float32",
57
- "transformers_version": "4.39.3",
58
  "use_cache": true,
59
  "use_weighted_layer_sum": false,
60
  "vocab_size": 51865
 
1
  {
2
+ "_name_or_path": "weights/whisper-small-vi",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
54
  "pad_token_id": 50257,
55
  "scale_embedding": false,
56
  "torch_dtype": "float32",
57
+ "transformers_version": "4.47.0",
58
  "use_cache": true,
59
  "use_weighted_layer_sum": false,
60
  "vocab_size": 51865
generation_config.json CHANGED
@@ -156,7 +156,7 @@
156
  "no_timestamps_token_id": 50363,
157
  "pad_token_id": 50257,
158
  "prev_sot_token_id": 50361,
159
- "return_timestamps": false,
160
  "suppress_tokens": [
161
  1,
162
  2,
@@ -241,8 +241,6 @@
241
  49870,
242
  50254,
243
  50258,
244
- 50358,
245
- 50359,
246
  50360,
247
  50361,
248
  50362
@@ -252,5 +250,5 @@
252
  "transcribe": 50359,
253
  "translate": 50358
254
  },
255
- "transformers_version": "4.39.3"
256
  }
 
156
  "no_timestamps_token_id": 50363,
157
  "pad_token_id": 50257,
158
  "prev_sot_token_id": 50361,
159
+ "return_timestamps": true,
160
  "suppress_tokens": [
161
  1,
162
  2,
 
241
  49870,
242
  50254,
243
  50258,
 
 
244
  50360,
245
  50361,
246
  50362
 
250
  "transcribe": 50359,
251
  "translate": 50358
252
  },
253
+ "transformers_version": "4.47.0"
254
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cc9e35caea900150a8af3afb08597eebafd6b199198b2d6423053bfb04d01dd
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ae4e044f3f5d408847a41059530107b10c9ca6e3ec810c47400f51d18fd8023
3
  size 966995080
tokenizer_config.json CHANGED
@@ -12980,6 +12980,7 @@
12980
  "clean_up_tokenization_spaces": true,
12981
  "eos_token": "<|endoftext|>",
12982
  "errors": "replace",
 
12983
  "model_max_length": 1024,
12984
  "pad_token": "<|endoftext|>",
12985
  "processor_class": "WhisperProcessor",
 
12980
  "clean_up_tokenization_spaces": true,
12981
  "eos_token": "<|endoftext|>",
12982
  "errors": "replace",
12983
+ "extra_special_tokens": {},
12984
  "model_max_length": 1024,
12985
  "pad_token": "<|endoftext|>",
12986
  "processor_class": "WhisperProcessor",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7ab195e9151f96b29dd039ae5f4f957b0b4fc215e39b77433158c00cd23f7e1
3
- size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbadb16405c8b9cdf24d26606306824a7db312236685841345a6f060b64119a1
3
+ size 5496