MaghribLAB
commited on
Commit
•
0837a02
1
Parent(s):
8110a42
Upload tokenizer
Browse files- README.md +1 -1
- tokenizer_config.json +1 -1
- vocab.json +1 -1
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
license: cc-by-nc-4.0
|
3 |
-
base_model: facebook/mms-1b-all
|
4 |
tags:
|
5 |
- generated_from_trainer
|
|
|
6 |
model-index:
|
7 |
- name: wav2vec2-large-mms-1b-tunizi
|
8 |
results: []
|
|
|
1 |
---
|
2 |
license: cc-by-nc-4.0
|
|
|
3 |
tags:
|
4 |
- generated_from_trainer
|
5 |
+
base_model: facebook/mms-1b-all
|
6 |
model-index:
|
7 |
- name: wav2vec2-large-mms-1b-tunizi
|
8 |
results: []
|
tokenizer_config.json
CHANGED
@@ -59,7 +59,7 @@
|
|
59 |
"processor_class": "Wav2Vec2Processor",
|
60 |
"replace_word_delimiter_char": " ",
|
61 |
"return_attention_mask": false,
|
62 |
-
"target_lang": "
|
63 |
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
64 |
"unk_token": "[UNK]",
|
65 |
"word_delimiter_token": "|"
|
|
|
59 |
"processor_class": "Wav2Vec2Processor",
|
60 |
"replace_word_delimiter_char": " ",
|
61 |
"return_attention_mask": false,
|
62 |
+
"target_lang": "ara-aeb",
|
63 |
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
64 |
"unk_token": "[UNK]",
|
65 |
"word_delimiter_token": "|"
|
vocab.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
"'": 1,
|
4 |
"(": 2,
|
5 |
")": 3,
|
|
|
1 |
{
|
2 |
+
"ara-aeb": {
|
3 |
"'": 1,
|
4 |
"(": 2,
|
5 |
")": 3,
|