mms-tts-vie / tokenizer.json
Xenova's picture
Xenova HF staff
Upload tokenizer.json
d99db3f
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 95,
"content": "<unk>",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "Sequence",
"normalizers": [
{
"type": "Lowercase"
},
{
"type": "Replace",
"pattern": {
"Regex": "[^\u1ee5xs\u00e8\u00ec\u1eb7\u1ee9\u01a1u\u01b0\u1eb1\u1ef3\u00e2cv\u1ec7\u1ed3_\u1ef5\u1ed9\u1ebbg\u1ea1\u0129\u1ea3\u00f5\u1ee1\u1eb3\u00f4aio\u1eeb\u1ef9\u1ed5\u00e0\u1edd\u00fd\u00f9\u2013\u1ec1\u1ec5\u1eb5\u1ecf'\u00f2\u1ed1q\u1ecd\u1ea9\u1ef1\u00e32\u1ecbe\u0111\u00f3\u1ea7\u00e9\u1edb\u1ebf\u1ee7\u1eedd\u1eef\u1eaf\u1ebd\u1eabm\u1ec3\u0169\u1ec9\u1eb9\u1ed7\u00edy\u00fa\u00e1pkt\u1ea5l\u1edf h\u1ef7\u1eadb\u0103n\u00ear\u1ee3\\-]"
},
"content": ""
},
{
"type": "Strip",
"strip_left": true,
"strip_right": true
},
{
"type": "Replace",
"pattern": {
"Regex": "(?=.)|(?<!^)$"
},
"content": "\u1ee5"
}
]
},
"pre_tokenizer": {
"type": "Split",
"pattern": {
"Regex": ""
},
"behavior": "Isolated",
"invert": false
},
"post_processor": null,
"decoder": null,
"model": {
"vocab": {
"\u1ee5": 0,
"x": 1,
"s": 2,
"\u00e8": 3,
"\u00ec": 4,
"\u1eb7": 5,
"\u1ee9": 6,
"\u01a1": 7,
"u": 8,
"\u01b0": 9,
"\u1eb1": 10,
"\u1ef3": 11,
"\u00e2": 12,
"c": 13,
"v": 14,
"\u1ec7": 15,
"\u1ed3": 16,
"_": 17,
"\u1ef5": 18,
"\u1ed9": 19,
"\u1ebb": 20,
"g": 21,
"\u1ea1": 22,
"\u0129": 23,
"\u1ea3": 24,
"\u00f5": 25,
"\u1ee1": 26,
"\u1eb3": 27,
"\u00f4": 28,
"a": 29,
"i": 30,
"o": 31,
"\u1eeb": 32,
"\u1ef9": 33,
"\u1ed5": 34,
"\u00e0": 35,
"\u1edd": 36,
"\u00fd": 37,
"\u00f9": 38,
"\u2013": 39,
"\u1ec1": 40,
"\u1ec5": 41,
"\u1eb5": 42,
"\u1ecf": 43,
"'": 44,
"\u00f2": 45,
"\u1ed1": 46,
"q": 47,
"\u1ecd": 48,
"\u1ea9": 49,
"\u1ef1": 50,
"\u00e3": 51,
"2": 52,
"\u1ecb": 53,
"e": 54,
"\u0111": 55,
"\u00f3": 56,
"\u1ea7": 57,
"\u00e9": 58,
"\u1edb": 59,
"\u1ebf": 60,
"\u1ee7": 61,
"\u1eed": 62,
"d": 63,
"\u1eef": 64,
"\u1eaf": 65,
"\u1ebd": 66,
"\u1eab": 67,
"m": 68,
"\u1ec3": 69,
"\u0169": 70,
"\u1ec9": 71,
"\u1eb9": 72,
"\u1ed7": 73,
"\u00ed": 74,
"y": 75,
"\u00fa": 76,
"\u00e1": 77,
"p": 78,
"k": 79,
"t": 80,
"\u1ea5": 81,
"l": 82,
"\u1edf": 83,
" ": 84,
"h": 85,
"\u1ef7": 86,
"\u1ead": 87,
"b": 88,
"\u0103": 89,
"n": 90,
"\u00ea": 91,
"r": 92,
"\u1ee3": 93,
"-": 94,
"<unk>": 95
}
}
}