codebyzeb commited on
Commit
ccda29c
·
verified ·
1 Parent(s): 38d9e94

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +25 -23
  2. vocab.json +1 -1
tokenizer.json CHANGED
@@ -106,31 +106,33 @@
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
  "UTT_BOUNDARY": 3,
109
- "k": 4,
110
- "s": 5,
111
- "o": 6,
112
- "b": 7,
113
- "a": 8,
114
- "h": 9,
115
- "n": 10,
116
- "t̠ʃ": 11,
117
- "i": 12,
118
- "j": 13,
119
- "d": 14,
120
- "e": 15,
121
- "ʃ": 16,
122
- "u": 17,
123
  "ɡ": 18,
124
- "r": 19,
125
- "f": 20,
126
- "t": 21,
127
- "m": 22,
128
- "d̠ʒ": 23,
129
- "l": 24,
130
- "q": 25,
131
  "v": 26,
132
- "z": 27,
133
- "p": 28
 
 
134
  },
135
  "unk_token": "UNK"
136
  }
 
106
  "PAD": 1,
107
  "WORD_BOUNDARY": 2,
108
  "UTT_BOUNDARY": 3,
109
+ "m": 4,
110
+ "": 5,
111
+ "b": 6,
112
+ "s": 7,
113
+ "e": 8,
114
+ "r": 9,
115
+ "j": 10,
116
+ "h": 11,
117
+ "t̠ʃ": 12,
118
+ "": 13,
119
+ "d̪": 14,
120
+ "": 15,
121
+ "z": 16,
122
+ "ʃ": 17,
123
  "ɡ": 18,
124
+ "i": 19,
125
+ "u": 20,
126
+ "o": 21,
127
+ "f": 22,
128
+ "t̪ʰ": 23,
129
+ "ɑ": 24,
130
+ "d̠ʒ": 25,
131
  "v": 26,
132
+ "": 27,
133
+ "l": 28,
134
+ "w": 29,
135
+ "ɢ": 30
136
  },
137
  "unk_token": "UNK"
138
  }
vocab.json CHANGED
@@ -1 +1 @@
1
- {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"k":4,"s":5,"o":6,"b":7,"a":8,"h":9,"n":10,"t̠ʃ":11,"i":12,"j":13,"d":14,"e":15,"ʃ":16,"u":17,"ɡ":18,"r":19,"f":20,"t":21,"m":22,"d̠ʒ":23,"l":24,"q":25,"v":26,"z":27,"p":28}
 
1
+ {"UNK":0,"PAD":1,"WORD_BOUNDARY":2,"UTT_BOUNDARY":3,"m":4,"":5,"b":6,"s":7,"e":8,"r":9,"j":10,"h":11,"t̠ʃ":12,"":13,"d̪":14,"":15,"z":16,"ʃ":17,"ɡ":18,"i":19,"u":20,"o":21,"f":22,"t̪ʰ":23,"ɑ":24,"d̠ʒ":25,"v":26,"":27,"l":28,"w":29,"ɢ":30}