{ | |
"version": "1.0", | |
"truncation": null, | |
"padding": null, | |
"added_tokens": [ | |
{ | |
"id": 58, | |
"content": "<unk>", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": { | |
"type": "Sequence", | |
"normalizers": [ | |
{ | |
"type": "Lowercase" | |
}, | |
{ | |
"type": "Replace", | |
"pattern": { | |
"Regex": "[^3\u0b85\u0b88\u0b89_\u0ba8\u0b95 \u0bc7\u0bbe\u0bc6\u0b8a\u0bc0'7a\u0b8f\u0bb5\u0bcd\u0bb9\u0bc2\u0bb2\u0b9f2\u0bb4\u0bb0\u0bae\u0b92\u0bb1\u0b9e\u0b9a9\u0bbf\u0bb8\u0bc8\u0ba96\u0b8e\u0b87\u0baf\u0bcc\u0ba4\u0bcb\u0bb31\u0b86\u0baa0\u0ba3\u0b93\u0b99\u0bc1\u0b90\u0b9c4\u0bb7\u0bca5]" | |
}, | |
"content": "" | |
}, | |
{ | |
"type": "Strip", | |
"strip_left": true, | |
"strip_right": true | |
}, | |
{ | |
"type": "Replace", | |
"pattern": { | |
"Regex": "(?=.)|(?<!^)$" | |
}, | |
"content": "3" | |
} | |
] | |
}, | |
"pre_tokenizer": { | |
"type": "Split", | |
"pattern": { | |
"Regex": "" | |
}, | |
"behavior": "Isolated", | |
"invert": false | |
}, | |
"post_processor": null, | |
"decoder": null, | |
"model": { | |
"vocab": { | |
"3": 0, | |
"\u0b85": 1, | |
"\u0b88": 2, | |
"\u0b89": 3, | |
"_": 4, | |
"\u0ba8": 5, | |
"\u0b95": 6, | |
" ": 7, | |
"\u0bc7": 8, | |
"\u0bbe": 9, | |
"\u0bc6": 10, | |
"\u0b8a": 11, | |
"\u0bc0": 12, | |
"'": 13, | |
"7": 14, | |
"a": 15, | |
"\u0b8f": 16, | |
"\u0bb5": 17, | |
"\u0bcd": 18, | |
"\u0bb9": 19, | |
"\u0bc2": 20, | |
"\u0bb2": 21, | |
"\u0b9f": 22, | |
"2": 23, | |
"\u0bb4": 24, | |
"\u0bb0": 25, | |
"\u0bae": 26, | |
"\u0b92": 27, | |
"\u0bb1": 28, | |
"\u0b9e": 29, | |
"\u0b9a": 30, | |
"9": 31, | |
"\u0bbf": 32, | |
"\u0bb8": 33, | |
"\u0bc8": 34, | |
"\u0ba9": 35, | |
"6": 36, | |
"\u0b8e": 37, | |
"\u0b87": 38, | |
"\u0baf": 39, | |
"\u0bcc": 40, | |
"\u0ba4": 41, | |
"\u0bcb": 42, | |
"\u0bb3": 43, | |
"1": 44, | |
"\u0b86": 45, | |
"\u0baa": 46, | |
"0": 47, | |
"\u0ba3": 48, | |
"\u0b93": 49, | |
"\u0b99": 50, | |
"\u0bc1": 51, | |
"\u0b90": 52, | |
"\u0b9c": 53, | |
"4": 54, | |
"\u0bb7": 55, | |
"\u0bca": 56, | |
"5": 57, | |
"<unk>": 58 | |
} | |
} | |
} |