hubert-base-100-pr / vocab.json
voidful's picture
init commit - add tokenizer
62455ad
{
"0": 41,
"1": 21,
"2": 95,
"3": 76,
"4": 88,
"5": 91,
"6": 99,
"7": 34,
"8": 61,
"9": 17,
"AA0": 16,
"AA1": 0,
"AA2": 38,
"AE0": 102,
"AE1": 26,
"AE2": 29,
"AH0": 93,
"AH1": 78,
"AH2": 42,
"AO0": 106,
"AO1": 50,
"AO2": 68,
"AW0": 43,
"AW1": 89,
"AW2": 19,
"AY0": 40,
"AY1": 101,
"AY2": 49,
"B": 90,
"CH": 103,
"D": 51,
"DH": 5,
"EH0": 73,
"EH1": 47,
"EH2": 104,
"ER0": 48,
"ER1": 32,
"ER2": 3,
"EY0": 53,
"EY1": 84,
"EY2": 67,
"F": 18,
"G": 60,
"HH": 74,
"IH0": 31,
"IH1": 33,
"IH2": 30,
"IY0": 64,
"IY1": 44,
"IY2": 24,
"JH": 72,
"K": 39,
"L": 75,
"M": 59,
"N": 4,
"NG": 70,
"OW0": 1,
"OW1": 13,
"OW2": 52,
"OY0": 28,
"OY1": 63,
"OY2": 11,
"P": 86,
"R": 87,
"S": 25,
"SH": 69,
"T": 92,
"TH": 79,
"UH0": 62,
"UH1": 10,
"UH2": 57,
"UW0": 105,
"UW1": 23,
"UW2": 81,
"V": 27,
"W": 100,
"Y": 35,
"Z": 45,
"ZH": 46,
"[PAD]": 56,
"[UNK]": 20,
"a": 83,
"b": 12,
"c": 8,
"d": 6,
"e": 98,
"f": 37,
"g": 107,
"h": 7,
"i": 58,
"j": 85,
"k": 14,
"l": 66,
"m": 82,
"n": 9,
"o": 36,
"p": 15,
"q": 65,
"r": 77,
"s": 55,
"t": 22,
"u": 94,
"v": 54,
"w": 80,
"x": 96,
"y": 97,
"z": 71,
"|": 2
}