wav2vec2-conformer-cv-15 / tokenizer_config.json
pinot's picture
Upload tokenizer
5e66999
{
"added_tokens_decoder": {
"0": {
"content": "<pad>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"1": {
"content": "<s>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"2": {
"content": "</s>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"3": {
"content": "<unk>",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"11": {
"content": "kj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"12": {
"content": "ky",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"13": {
"content": "kw",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"15": {
"content": "sj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"16": {
"content": "sy",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"18": {
"content": "cj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"20": {
"content": "cy",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"22": {
"content": "nj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"23": {
"content": "ny",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"25": {
"content": "hj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"26": {
"content": "hy",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"29": {
"content": "my",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"32": {
"content": "ry",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"37": {
"content": "gj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"38": {
"content": "gy",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"39": {
"content": "gw",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"41": {
"content": "zj",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"42": {
"content": "zy",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"45": {
"content": "by",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"47": {
"content": "py",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"48": {
"content": "aH",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"49": {
"content": "iH",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"50": {
"content": "uH",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"51": {
"content": "eH",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
},
"52": {
"content": "oH",
"lstrip": true,
"normalized": false,
"rstrip": true,
"single_word": false,
"special": false
}
},
"bos_token": "<s>",
"clean_up_tokenization_spaces": true,
"do_lower_case": false,
"eos_token": "</s>",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<pad>",
"replace_word_delimiter_char": " ",
"target_lang": null,
"tokenizer_class": "Wav2Vec2CTCTokenizer",
"unk_token": "<unk>",
"word_delimiter_token": "|"
}