updated
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- data/Abkhaz-Adyghe.json +27 -27
- data/Afro-Asiatic.json +0 -0
- data/Algic.json +179 -179
- data/Amto-Musan.json +9 -9
- data/Andamanese.json +57 -57
- data/Arafundi.json +12 -12
- data/Arai (Left May).json +21 -21
- data/Arauan.json +24 -24
- data/Australian.json +0 -0
- data/Austro-Asiatic.json +0 -0
- data/Austronesian.json +0 -0
- data/Aymaran.json +18 -18
- data/Barbacoan.json +18 -18
- data/Bayono-Awbono.json +9 -9
- data/Border.json +57 -57
- data/Bororoan.json +12 -12
- data/Botocudoan.json +6 -6
- data/Caddoan.json +27 -27
- data/Cahuapanan.json +9 -9
- data/Cariban.json +159 -159
- data/Central Solomons.json +15 -15
- data/Chapacuran.json +21 -21
- data/Chibchan.json +108 -108
- data/Chimakuan.json +9 -9
- data/Chinookan.json +12 -12
- data/Chipaya-Uru.json +9 -9
- data/Chocoan.json +33 -33
- data/Cholonan.json +9 -9
- data/Chon.json +12 -12
- data/Chukotko-Kamchatkan.json +30 -30
- data/Chumashan.json +27 -27
- data/Cochimí-Yuman.json +45 -45
- data/Comecrudan.json +18 -18
- data/Constructed language.json +6 -6
- data/Coosan.json +9 -9
- data/Creole.json +639 -1185
- data/Dravidian.json +562 -891
- data/East Bird’s Head-Sentani.json +48 -48
- data/East Geelvink Bay.json +42 -42
- data/East New Britain.json +30 -30
- data/Eastern Trans-Fly.json +15 -15
- data/Eskimo-Aleut.json +51 -51
- data/Eyak-Athabaskan.json +186 -186
- data/Fas.json +9 -9
- data/Guajiboan.json +21 -21
- data/Guaykuruan.json +24 -24
- data/Gum.json +3 -3
- data/Haida.json +9 -9
- data/Harákmbut.json +9 -9
- data/Hmong-Mien.json +153 -153
data/Abkhaz-Adyghe.json
CHANGED
@@ -2,101 +2,101 @@
|
|
2 |
"name": "Abkhaz-Adyghe",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Abkhaz-Abazin",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Abkhaz",
|
15 |
"iso_1_code": "ab",
|
16 |
"iso_3_code": "abk",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "2",
|
|
|
20 |
"scripts": [
|
21 |
"Cyrl"
|
22 |
-
]
|
23 |
-
"own_tokenizer": false
|
24 |
},
|
25 |
{
|
26 |
"name": "Abaza",
|
27 |
"iso_1_code": null,
|
28 |
"iso_3_code": "abq",
|
29 |
-
"tokenizers": {},
|
30 |
"children": [],
|
|
|
31 |
"node_i": "3",
|
|
|
32 |
"scripts": [
|
33 |
"Cyrl"
|
34 |
-
]
|
35 |
-
"own_tokenizer": false
|
36 |
}
|
37 |
],
|
|
|
38 |
"node_i": "1",
|
39 |
-
"
|
40 |
-
"
|
41 |
},
|
42 |
{
|
43 |
"name": "Circassian",
|
44 |
"iso_1_code": null,
|
45 |
"iso_3_code": null,
|
46 |
-
"tokenizers": {},
|
47 |
"children": [
|
48 |
{
|
49 |
"name": "Adyghe",
|
50 |
"iso_1_code": null,
|
51 |
"iso_3_code": "ady",
|
52 |
-
"tokenizers": {},
|
53 |
"children": [],
|
|
|
54 |
"node_i": "5",
|
|
|
55 |
"scripts": [
|
56 |
"Cyrl"
|
57 |
-
]
|
58 |
-
"own_tokenizer": false
|
59 |
},
|
60 |
{
|
61 |
"name": "Kabardian",
|
62 |
"iso_1_code": null,
|
63 |
"iso_3_code": "kbd",
|
64 |
-
"tokenizers": {},
|
65 |
"children": [],
|
|
|
66 |
"node_i": "6",
|
|
|
67 |
"scripts": [
|
68 |
"Cyrl"
|
69 |
-
]
|
70 |
-
"own_tokenizer": false
|
71 |
}
|
72 |
],
|
|
|
73 |
"node_i": "4",
|
74 |
-
"
|
75 |
-
"
|
76 |
},
|
77 |
{
|
78 |
"name": "Ubyx",
|
79 |
"iso_1_code": null,
|
80 |
"iso_3_code": null,
|
81 |
-
"tokenizers": {},
|
82 |
"children": [
|
83 |
{
|
84 |
"name": "Ubykh",
|
85 |
"iso_1_code": null,
|
86 |
"iso_3_code": "uby",
|
87 |
-
"tokenizers": {},
|
88 |
"children": [],
|
|
|
89 |
"node_i": "8",
|
90 |
-
"
|
91 |
-
"
|
92 |
}
|
93 |
],
|
|
|
94 |
"node_i": "7",
|
95 |
-
"
|
96 |
-
"
|
97 |
}
|
98 |
],
|
|
|
99 |
"node_i": "0",
|
100 |
-
"
|
101 |
-
"
|
102 |
}
|
|
|
2 |
"name": "Abkhaz-Adyghe",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Abkhaz-Abazin",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Abkhaz",
|
13 |
"iso_1_code": "ab",
|
14 |
"iso_3_code": "abk",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "2",
|
18 |
+
"native_tokenizers": [],
|
19 |
"scripts": [
|
20 |
"Cyrl"
|
21 |
+
]
|
|
|
22 |
},
|
23 |
{
|
24 |
"name": "Abaza",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "abq",
|
|
|
27 |
"children": [],
|
28 |
+
"tokenizers": {},
|
29 |
"node_i": "3",
|
30 |
+
"native_tokenizers": [],
|
31 |
"scripts": [
|
32 |
"Cyrl"
|
33 |
+
]
|
|
|
34 |
}
|
35 |
],
|
36 |
+
"tokenizers": {},
|
37 |
"node_i": "1",
|
38 |
+
"native_tokenizers": [],
|
39 |
+
"scripts": []
|
40 |
},
|
41 |
{
|
42 |
"name": "Circassian",
|
43 |
"iso_1_code": null,
|
44 |
"iso_3_code": null,
|
|
|
45 |
"children": [
|
46 |
{
|
47 |
"name": "Adyghe",
|
48 |
"iso_1_code": null,
|
49 |
"iso_3_code": "ady",
|
|
|
50 |
"children": [],
|
51 |
+
"tokenizers": {},
|
52 |
"node_i": "5",
|
53 |
+
"native_tokenizers": [],
|
54 |
"scripts": [
|
55 |
"Cyrl"
|
56 |
+
]
|
|
|
57 |
},
|
58 |
{
|
59 |
"name": "Kabardian",
|
60 |
"iso_1_code": null,
|
61 |
"iso_3_code": "kbd",
|
|
|
62 |
"children": [],
|
63 |
+
"tokenizers": {},
|
64 |
"node_i": "6",
|
65 |
+
"native_tokenizers": [],
|
66 |
"scripts": [
|
67 |
"Cyrl"
|
68 |
+
]
|
|
|
69 |
}
|
70 |
],
|
71 |
+
"tokenizers": {},
|
72 |
"node_i": "4",
|
73 |
+
"native_tokenizers": [],
|
74 |
+
"scripts": []
|
75 |
},
|
76 |
{
|
77 |
"name": "Ubyx",
|
78 |
"iso_1_code": null,
|
79 |
"iso_3_code": null,
|
|
|
80 |
"children": [
|
81 |
{
|
82 |
"name": "Ubykh",
|
83 |
"iso_1_code": null,
|
84 |
"iso_3_code": "uby",
|
|
|
85 |
"children": [],
|
86 |
+
"tokenizers": {},
|
87 |
"node_i": "8",
|
88 |
+
"native_tokenizers": [],
|
89 |
+
"scripts": []
|
90 |
}
|
91 |
],
|
92 |
+
"tokenizers": {},
|
93 |
"node_i": "7",
|
94 |
+
"native_tokenizers": [],
|
95 |
+
"scripts": []
|
96 |
}
|
97 |
],
|
98 |
+
"tokenizers": {},
|
99 |
"node_i": "0",
|
100 |
+
"native_tokenizers": [],
|
101 |
+
"scripts": []
|
102 |
}
|
data/Afro-Asiatic.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Algic.json
CHANGED
@@ -2,630 +2,630 @@
|
|
2 |
"name": "Algic",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Algonquian",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Blackfoot",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "bla",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "558",
|
|
|
20 |
"scripts": [
|
21 |
"Latn"
|
22 |
-
]
|
23 |
-
"own_tokenizer": false
|
24 |
},
|
25 |
{
|
26 |
"name": "Cheyenne",
|
27 |
"iso_1_code": null,
|
28 |
"iso_3_code": "chy",
|
29 |
-
"tokenizers": {},
|
30 |
"children": [],
|
|
|
31 |
"node_i": "559",
|
32 |
-
"
|
33 |
-
"
|
34 |
},
|
35 |
{
|
36 |
"name": "Menominee",
|
37 |
"iso_1_code": null,
|
38 |
"iso_3_code": "mez",
|
39 |
-
"tokenizers": {},
|
40 |
"children": [],
|
|
|
41 |
"node_i": "560",
|
42 |
-
"
|
43 |
-
"
|
44 |
},
|
45 |
{
|
46 |
"name": "Miami",
|
47 |
"iso_1_code": null,
|
48 |
"iso_3_code": "mia",
|
49 |
-
"tokenizers": {},
|
50 |
"children": [],
|
|
|
51 |
"node_i": "561",
|
52 |
-
"
|
53 |
-
"
|
54 |
},
|
55 |
{
|
56 |
"name": "Nawathinehena",
|
57 |
"iso_1_code": null,
|
58 |
"iso_3_code": "nwa",
|
59 |
-
"tokenizers": {},
|
60 |
"children": [],
|
|
|
61 |
"node_i": "562",
|
62 |
-
"
|
63 |
-
"
|
64 |
},
|
65 |
{
|
66 |
"name": "Shawnee",
|
67 |
"iso_1_code": null,
|
68 |
"iso_3_code": "sjw",
|
69 |
-
"tokenizers": {},
|
70 |
"children": [],
|
|
|
71 |
"node_i": "563",
|
72 |
-
"
|
73 |
-
"
|
74 |
},
|
75 |
{
|
76 |
"name": "Arapaho",
|
77 |
"iso_1_code": null,
|
78 |
"iso_3_code": null,
|
79 |
-
"tokenizers": {},
|
80 |
"children": [
|
81 |
{
|
82 |
"name": "Arapaho",
|
83 |
"iso_1_code": null,
|
84 |
"iso_3_code": "arp",
|
85 |
-
"tokenizers": {},
|
86 |
"children": [],
|
|
|
87 |
"node_i": "565",
|
|
|
88 |
"scripts": [
|
89 |
"Latn"
|
90 |
-
]
|
91 |
-
"own_tokenizer": false
|
92 |
},
|
93 |
{
|
94 |
"name": "Gros Ventre",
|
95 |
"iso_1_code": null,
|
96 |
"iso_3_code": "ats",
|
97 |
-
"tokenizers": {},
|
98 |
"children": [],
|
|
|
99 |
"node_i": "566",
|
100 |
-
"
|
101 |
-
"
|
102 |
}
|
103 |
],
|
|
|
104 |
"node_i": "564",
|
105 |
-
"
|
106 |
-
"
|
107 |
},
|
108 |
{
|
109 |
"name": "Cree-Montagnais",
|
110 |
"iso_1_code": null,
|
111 |
"iso_3_code": null,
|
112 |
-
"tokenizers": {},
|
113 |
"children": [
|
114 |
{
|
115 |
"name": "Atikamekw",
|
116 |
"iso_1_code": null,
|
117 |
"iso_3_code": "atj",
|
118 |
-
"tokenizers": {},
|
119 |
"children": [],
|
|
|
120 |
"node_i": "568",
|
|
|
121 |
"scripts": [
|
122 |
"Latn"
|
123 |
-
]
|
124 |
-
"own_tokenizer": false
|
125 |
},
|
126 |
{
|
127 |
"name": "Cree, Southern East",
|
128 |
"iso_1_code": "cr",
|
129 |
"iso_3_code": "crj",
|
130 |
-
"tokenizers": {},
|
131 |
"children": [],
|
|
|
132 |
"node_i": "569",
|
|
|
133 |
"scripts": [
|
134 |
"Cans"
|
135 |
-
]
|
136 |
-
"own_tokenizer": false
|
137 |
},
|
138 |
{
|
139 |
"name": "Cree, Plains",
|
140 |
"iso_1_code": "cr",
|
141 |
"iso_3_code": "crk",
|
142 |
-
"tokenizers": {},
|
143 |
"children": [],
|
|
|
144 |
"node_i": "570",
|
|
|
145 |
"scripts": [
|
146 |
"Latn",
|
147 |
"Cans"
|
148 |
-
]
|
149 |
-
"own_tokenizer": false
|
150 |
},
|
151 |
{
|
152 |
"name": "Cree, Northern East",
|
153 |
"iso_1_code": "cr",
|
154 |
"iso_3_code": "crl",
|
155 |
-
"tokenizers": {},
|
156 |
"children": [],
|
|
|
157 |
"node_i": "571",
|
|
|
158 |
"scripts": [
|
159 |
"Cans"
|
160 |
-
]
|
161 |
-
"own_tokenizer": false
|
162 |
},
|
163 |
{
|
164 |
"name": "Cree, Moose",
|
165 |
"iso_1_code": "cr",
|
166 |
"iso_3_code": "crm",
|
167 |
-
"tokenizers": {},
|
168 |
"children": [],
|
|
|
169 |
"node_i": "572",
|
|
|
170 |
"scripts": [
|
171 |
"Cans"
|
172 |
-
]
|
173 |
-
"own_tokenizer": false
|
174 |
},
|
175 |
{
|
176 |
"name": "Cree, Swampy",
|
177 |
"iso_1_code": "cr",
|
178 |
"iso_3_code": "csw",
|
179 |
-
"tokenizers": {},
|
180 |
"children": [],
|
|
|
181 |
"node_i": "573",
|
|
|
182 |
"scripts": [
|
183 |
"Latn"
|
184 |
-
]
|
185 |
-
"own_tokenizer": false
|
186 |
},
|
187 |
{
|
188 |
"name": "Cree, Woods",
|
189 |
"iso_1_code": "cr",
|
190 |
"iso_3_code": "cwd",
|
191 |
-
"tokenizers": {},
|
192 |
"children": [],
|
|
|
193 |
"node_i": "574",
|
|
|
194 |
"scripts": [
|
195 |
"Cans"
|
196 |
-
]
|
197 |
-
"own_tokenizer": false
|
198 |
},
|
199 |
{
|
200 |
"name": "Innu",
|
201 |
"iso_1_code": null,
|
202 |
"iso_3_code": "moe",
|
203 |
-
"tokenizers": {},
|
204 |
"children": [],
|
|
|
205 |
"node_i": "575",
|
206 |
-
"
|
207 |
-
"
|
208 |
},
|
209 |
{
|
210 |
"name": "Naskapi",
|
211 |
"iso_1_code": null,
|
212 |
"iso_3_code": "nsk",
|
213 |
-
"tokenizers": {},
|
214 |
"children": [],
|
|
|
215 |
"node_i": "576",
|
216 |
-
"
|
217 |
-
"
|
218 |
}
|
219 |
],
|
|
|
220 |
"node_i": "567",
|
221 |
-
"
|
222 |
-
"
|
223 |
},
|
224 |
{
|
225 |
"name": "Eastern Algonquian",
|
226 |
"iso_1_code": null,
|
227 |
"iso_3_code": null,
|
228 |
-
"tokenizers": {},
|
229 |
"children": [
|
230 |
{
|
231 |
"name": "Carolina Algonquian",
|
232 |
"iso_1_code": null,
|
233 |
"iso_3_code": "crr",
|
234 |
-
"tokenizers": {},
|
235 |
"children": [],
|
|
|
236 |
"node_i": "578",
|
237 |
-
"
|
238 |
-
"
|
239 |
},
|
240 |
{
|
241 |
"name": "Etchemin",
|
242 |
"iso_1_code": null,
|
243 |
"iso_3_code": "etc",
|
244 |
-
"tokenizers": {},
|
245 |
"children": [],
|
|
|
246 |
"node_i": "579",
|
247 |
-
"
|
248 |
-
"
|
249 |
},
|
250 |
{
|
251 |
"name": "Mi\u2019kmaq",
|
252 |
"iso_1_code": null,
|
253 |
"iso_3_code": "mic",
|
254 |
-
"tokenizers": {},
|
255 |
"children": [],
|
|
|
256 |
"node_i": "580",
|
|
|
257 |
"scripts": [
|
258 |
"Latn"
|
259 |
-
]
|
260 |
-
"own_tokenizer": false
|
261 |
},
|
262 |
{
|
263 |
"name": "Powhatan",
|
264 |
"iso_1_code": null,
|
265 |
"iso_3_code": "pim",
|
266 |
-
"tokenizers": {},
|
267 |
"children": [],
|
|
|
268 |
"node_i": "581",
|
269 |
-
"
|
270 |
-
"
|
271 |
},
|
272 |
{
|
273 |
"name": "Malecite-Passamaquoddy",
|
274 |
"iso_1_code": null,
|
275 |
"iso_3_code": "pqm",
|
276 |
-
"tokenizers": {},
|
277 |
"children": [],
|
|
|
278 |
"node_i": "582",
|
279 |
-
"
|
280 |
-
"
|
281 |
},
|
282 |
{
|
283 |
"name": "Quiripi",
|
284 |
"iso_1_code": null,
|
285 |
"iso_3_code": "qyp",
|
286 |
-
"tokenizers": {},
|
287 |
"children": [],
|
|
|
288 |
"node_i": "583",
|
289 |
-
"
|
290 |
-
"
|
291 |
},
|
292 |
{
|
293 |
"name": "Wampanoag",
|
294 |
"iso_1_code": null,
|
295 |
"iso_3_code": "wam",
|
296 |
-
"tokenizers": {},
|
297 |
"children": [],
|
|
|
298 |
"node_i": "584",
|
299 |
-
"
|
300 |
-
"
|
301 |
},
|
302 |
{
|
303 |
"name": "Loup B",
|
304 |
"iso_1_code": null,
|
305 |
"iso_3_code": "xlb",
|
306 |
-
"tokenizers": {},
|
307 |
"children": [],
|
|
|
308 |
"node_i": "585",
|
309 |
-
"
|
310 |
-
"
|
311 |
},
|
312 |
{
|
313 |
"name": "Loup A",
|
314 |
"iso_1_code": null,
|
315 |
"iso_3_code": "xlo",
|
316 |
-
"tokenizers": {},
|
317 |
"children": [],
|
|
|
318 |
"node_i": "586",
|
319 |
-
"
|
320 |
-
"
|
321 |
},
|
322 |
{
|
323 |
"name": "Narragansett",
|
324 |
"iso_1_code": null,
|
325 |
"iso_3_code": "xnt",
|
326 |
-
"tokenizers": {},
|
327 |
"children": [],
|
|
|
328 |
"node_i": "587",
|
329 |
-
"
|
330 |
-
"
|
331 |
},
|
332 |
{
|
333 |
"name": "Mohegan-Pequot",
|
334 |
"iso_1_code": null,
|
335 |
"iso_3_code": "xpq",
|
336 |
-
"tokenizers": {},
|
337 |
"children": [],
|
|
|
338 |
"node_i": "588",
|
339 |
-
"
|
340 |
-
"
|
341 |
},
|
342 |
{
|
343 |
"name": "Abenaki",
|
344 |
"iso_1_code": null,
|
345 |
"iso_3_code": null,
|
346 |
-
"tokenizers": {},
|
347 |
"children": [
|
348 |
{
|
349 |
"name": "Abenaki, Eastern",
|
350 |
"iso_1_code": null,
|
351 |
"iso_3_code": "aaq",
|
352 |
-
"tokenizers": {},
|
353 |
"children": [],
|
|
|
354 |
"node_i": "590",
|
355 |
-
"
|
356 |
-
"
|
357 |
},
|
358 |
{
|
359 |
"name": "Abenaki, Western",
|
360 |
"iso_1_code": null,
|
361 |
"iso_3_code": "abe",
|
362 |
-
"tokenizers": {},
|
363 |
"children": [],
|
|
|
364 |
"node_i": "591",
|
365 |
-
"
|
366 |
-
"
|
367 |
}
|
368 |
],
|
|
|
369 |
"node_i": "589",
|
370 |
-
"
|
371 |
-
"
|
372 |
},
|
373 |
{
|
374 |
"name": "Delaware",
|
375 |
"iso_1_code": null,
|
376 |
"iso_3_code": null,
|
377 |
-
"tokenizers": {},
|
378 |
"children": [
|
379 |
{
|
380 |
"name": "Mahican",
|
381 |
"iso_1_code": null,
|
382 |
"iso_3_code": "mjy",
|
383 |
-
"tokenizers": {},
|
384 |
"children": [],
|
|
|
385 |
"node_i": "593",
|
386 |
-
"
|
387 |
-
"
|
388 |
},
|
389 |
{
|
390 |
"name": "Munsee",
|
391 |
"iso_1_code": null,
|
392 |
"iso_3_code": "umu",
|
393 |
-
"tokenizers": {},
|
394 |
"children": [],
|
|
|
395 |
"node_i": "594",
|
396 |
-
"
|
397 |
-
"
|
398 |
},
|
399 |
{
|
400 |
"name": "Unami",
|
401 |
"iso_1_code": null,
|
402 |
"iso_3_code": "unm",
|
403 |
-
"tokenizers": {},
|
404 |
"children": [],
|
|
|
405 |
"node_i": "595",
|
406 |
-
"
|
407 |
-
"
|
408 |
}
|
409 |
],
|
|
|
410 |
"node_i": "592",
|
411 |
-
"
|
412 |
-
"
|
413 |
},
|
414 |
{
|
415 |
"name": "Nanticoke-Conoy",
|
416 |
"iso_1_code": null,
|
417 |
"iso_3_code": null,
|
418 |
-
"tokenizers": {},
|
419 |
"children": [
|
420 |
{
|
421 |
"name": "Nanticoke",
|
422 |
"iso_1_code": null,
|
423 |
"iso_3_code": "nnt",
|
424 |
-
"tokenizers": {},
|
425 |
"children": [],
|
|
|
426 |
"node_i": "597",
|
427 |
-
"
|
428 |
-
"
|
429 |
},
|
430 |
{
|
431 |
"name": "Piscataway",
|
432 |
"iso_1_code": null,
|
433 |
"iso_3_code": "psy",
|
434 |
-
"tokenizers": {},
|
435 |
"children": [],
|
|
|
436 |
"node_i": "598",
|
437 |
-
"
|
438 |
-
"
|
439 |
}
|
440 |
],
|
|
|
441 |
"node_i": "596",
|
442 |
-
"
|
443 |
-
"
|
444 |
}
|
445 |
],
|
|
|
446 |
"node_i": "577",
|
447 |
-
"
|
448 |
-
"
|
449 |
},
|
450 |
{
|
451 |
"name": "Fox",
|
452 |
"iso_1_code": null,
|
453 |
"iso_3_code": null,
|
454 |
-
"tokenizers": {},
|
455 |
"children": [
|
456 |
{
|
457 |
"name": "Kickapoo",
|
458 |
"iso_1_code": null,
|
459 |
"iso_3_code": "kic",
|
460 |
-
"tokenizers": {},
|
461 |
"children": [],
|
|
|
462 |
"node_i": "600",
|
463 |
-
"
|
464 |
-
"
|
465 |
},
|
466 |
{
|
467 |
"name": "Meskwaki",
|
468 |
"iso_1_code": null,
|
469 |
"iso_3_code": "sac",
|
470 |
-
"tokenizers": {},
|
471 |
"children": [],
|
|
|
472 |
"node_i": "601",
|
473 |
-
"
|
474 |
-
"
|
475 |
}
|
476 |
],
|
|
|
477 |
"node_i": "599",
|
478 |
-
"
|
479 |
-
"
|
480 |
},
|
481 |
{
|
482 |
"name": "Ojibwa-Potawatomi",
|
483 |
"iso_1_code": null,
|
484 |
"iso_3_code": null,
|
485 |
-
"tokenizers": {},
|
486 |
"children": [
|
487 |
{
|
488 |
"name": "Algonquin",
|
489 |
"iso_1_code": null,
|
490 |
"iso_3_code": "alq",
|
491 |
-
"tokenizers": {},
|
492 |
"children": [],
|
|
|
493 |
"node_i": "603",
|
|
|
494 |
"scripts": [
|
495 |
"Latn"
|
496 |
-
]
|
497 |
-
"own_tokenizer": false
|
498 |
},
|
499 |
{
|
500 |
"name": "Chippewa",
|
501 |
"iso_1_code": "oj",
|
502 |
"iso_3_code": "ciw",
|
503 |
-
"tokenizers": {},
|
504 |
"children": [],
|
|
|
505 |
"node_i": "604",
|
506 |
-
"
|
507 |
-
"
|
508 |
},
|
509 |
{
|
510 |
"name": "Ojibwa, Northwestern",
|
511 |
"iso_1_code": "oj",
|
512 |
"iso_3_code": "ojb",
|
513 |
-
"tokenizers": {},
|
514 |
"children": [],
|
|
|
515 |
"node_i": "605",
|
|
|
516 |
"scripts": [
|
517 |
-
"
|
518 |
-
"
|
519 |
-
]
|
520 |
-
"own_tokenizer": false
|
521 |
},
|
522 |
{
|
523 |
"name": "Ojibwa, Central",
|
524 |
"iso_1_code": "oj",
|
525 |
"iso_3_code": "ojc",
|
526 |
-
"tokenizers": {},
|
527 |
"children": [],
|
|
|
528 |
"node_i": "606",
|
529 |
-
"
|
530 |
-
"
|
531 |
},
|
532 |
{
|
533 |
"name": "Ojibwa, Eastern",
|
534 |
"iso_1_code": "oj",
|
535 |
"iso_3_code": "ojg",
|
536 |
-
"tokenizers": {},
|
537 |
"children": [],
|
|
|
538 |
"node_i": "607",
|
539 |
-
"
|
540 |
-
"
|
541 |
},
|
542 |
{
|
543 |
"name": "Oji-Cree",
|
544 |
"iso_1_code": "oj",
|
545 |
"iso_3_code": "ojs",
|
546 |
-
"tokenizers": {},
|
547 |
"children": [],
|
|
|
548 |
"node_i": "608",
|
549 |
-
"
|
550 |
-
"
|
551 |
},
|
552 |
{
|
553 |
"name": "Ojibwa, Western",
|
554 |
"iso_1_code": "oj",
|
555 |
"iso_3_code": "ojw",
|
556 |
-
"tokenizers": {},
|
557 |
"children": [],
|
|
|
558 |
"node_i": "609",
|
559 |
-
"
|
560 |
-
"
|
561 |
},
|
562 |
{
|
563 |
"name": "Ottawa",
|
564 |
"iso_1_code": "oj",
|
565 |
"iso_3_code": "otw",
|
566 |
-
"tokenizers": {},
|
567 |
"children": [],
|
|
|
568 |
"node_i": "610",
|
|
|
569 |
"scripts": [
|
570 |
"Latn"
|
571 |
-
]
|
572 |
-
"own_tokenizer": false
|
573 |
},
|
574 |
{
|
575 |
"name": "Potawatomi",
|
576 |
"iso_1_code": null,
|
577 |
"iso_3_code": "pot",
|
578 |
-
"tokenizers": {},
|
579 |
"children": [],
|
|
|
580 |
"node_i": "611",
|
|
|
581 |
"scripts": [
|
582 |
"Latn"
|
583 |
-
]
|
584 |
-
"own_tokenizer": false
|
585 |
}
|
586 |
],
|
|
|
587 |
"node_i": "602",
|
588 |
-
"
|
589 |
-
"
|
590 |
}
|
591 |
],
|
|
|
592 |
"node_i": "557",
|
593 |
-
"
|
594 |
-
"
|
595 |
},
|
596 |
{
|
597 |
"name": "Ritwan",
|
598 |
"iso_1_code": null,
|
599 |
"iso_3_code": null,
|
600 |
-
"tokenizers": {},
|
601 |
"children": [
|
602 |
{
|
603 |
"name": "Wiyot",
|
604 |
"iso_1_code": null,
|
605 |
"iso_3_code": "wiy",
|
606 |
-
"tokenizers": {},
|
607 |
"children": [],
|
|
|
608 |
"node_i": "613",
|
609 |
-
"
|
610 |
-
"
|
611 |
},
|
612 |
{
|
613 |
"name": "Yurok",
|
614 |
"iso_1_code": null,
|
615 |
"iso_3_code": "yur",
|
616 |
-
"tokenizers": {},
|
617 |
"children": [],
|
|
|
618 |
"node_i": "614",
|
619 |
-
"
|
620 |
-
"
|
621 |
}
|
622 |
],
|
|
|
623 |
"node_i": "612",
|
624 |
-
"
|
625 |
-
"
|
626 |
}
|
627 |
],
|
|
|
628 |
"node_i": "556",
|
629 |
-
"
|
630 |
-
"
|
631 |
}
|
|
|
2 |
"name": "Algic",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Algonquian",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Blackfoot",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "bla",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "558",
|
18 |
+
"native_tokenizers": [],
|
19 |
"scripts": [
|
20 |
"Latn"
|
21 |
+
]
|
|
|
22 |
},
|
23 |
{
|
24 |
"name": "Cheyenne",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "chy",
|
|
|
27 |
"children": [],
|
28 |
+
"tokenizers": {},
|
29 |
"node_i": "559",
|
30 |
+
"native_tokenizers": [],
|
31 |
+
"scripts": []
|
32 |
},
|
33 |
{
|
34 |
"name": "Menominee",
|
35 |
"iso_1_code": null,
|
36 |
"iso_3_code": "mez",
|
|
|
37 |
"children": [],
|
38 |
+
"tokenizers": {},
|
39 |
"node_i": "560",
|
40 |
+
"native_tokenizers": [],
|
41 |
+
"scripts": []
|
42 |
},
|
43 |
{
|
44 |
"name": "Miami",
|
45 |
"iso_1_code": null,
|
46 |
"iso_3_code": "mia",
|
|
|
47 |
"children": [],
|
48 |
+
"tokenizers": {},
|
49 |
"node_i": "561",
|
50 |
+
"native_tokenizers": [],
|
51 |
+
"scripts": []
|
52 |
},
|
53 |
{
|
54 |
"name": "Nawathinehena",
|
55 |
"iso_1_code": null,
|
56 |
"iso_3_code": "nwa",
|
|
|
57 |
"children": [],
|
58 |
+
"tokenizers": {},
|
59 |
"node_i": "562",
|
60 |
+
"native_tokenizers": [],
|
61 |
+
"scripts": []
|
62 |
},
|
63 |
{
|
64 |
"name": "Shawnee",
|
65 |
"iso_1_code": null,
|
66 |
"iso_3_code": "sjw",
|
|
|
67 |
"children": [],
|
68 |
+
"tokenizers": {},
|
69 |
"node_i": "563",
|
70 |
+
"native_tokenizers": [],
|
71 |
+
"scripts": []
|
72 |
},
|
73 |
{
|
74 |
"name": "Arapaho",
|
75 |
"iso_1_code": null,
|
76 |
"iso_3_code": null,
|
|
|
77 |
"children": [
|
78 |
{
|
79 |
"name": "Arapaho",
|
80 |
"iso_1_code": null,
|
81 |
"iso_3_code": "arp",
|
|
|
82 |
"children": [],
|
83 |
+
"tokenizers": {},
|
84 |
"node_i": "565",
|
85 |
+
"native_tokenizers": [],
|
86 |
"scripts": [
|
87 |
"Latn"
|
88 |
+
]
|
|
|
89 |
},
|
90 |
{
|
91 |
"name": "Gros Ventre",
|
92 |
"iso_1_code": null,
|
93 |
"iso_3_code": "ats",
|
|
|
94 |
"children": [],
|
95 |
+
"tokenizers": {},
|
96 |
"node_i": "566",
|
97 |
+
"native_tokenizers": [],
|
98 |
+
"scripts": []
|
99 |
}
|
100 |
],
|
101 |
+
"tokenizers": {},
|
102 |
"node_i": "564",
|
103 |
+
"native_tokenizers": [],
|
104 |
+
"scripts": []
|
105 |
},
|
106 |
{
|
107 |
"name": "Cree-Montagnais",
|
108 |
"iso_1_code": null,
|
109 |
"iso_3_code": null,
|
|
|
110 |
"children": [
|
111 |
{
|
112 |
"name": "Atikamekw",
|
113 |
"iso_1_code": null,
|
114 |
"iso_3_code": "atj",
|
|
|
115 |
"children": [],
|
116 |
+
"tokenizers": {},
|
117 |
"node_i": "568",
|
118 |
+
"native_tokenizers": [],
|
119 |
"scripts": [
|
120 |
"Latn"
|
121 |
+
]
|
|
|
122 |
},
|
123 |
{
|
124 |
"name": "Cree, Southern East",
|
125 |
"iso_1_code": "cr",
|
126 |
"iso_3_code": "crj",
|
|
|
127 |
"children": [],
|
128 |
+
"tokenizers": {},
|
129 |
"node_i": "569",
|
130 |
+
"native_tokenizers": [],
|
131 |
"scripts": [
|
132 |
"Cans"
|
133 |
+
]
|
|
|
134 |
},
|
135 |
{
|
136 |
"name": "Cree, Plains",
|
137 |
"iso_1_code": "cr",
|
138 |
"iso_3_code": "crk",
|
|
|
139 |
"children": [],
|
140 |
+
"tokenizers": {},
|
141 |
"node_i": "570",
|
142 |
+
"native_tokenizers": [],
|
143 |
"scripts": [
|
144 |
"Latn",
|
145 |
"Cans"
|
146 |
+
]
|
|
|
147 |
},
|
148 |
{
|
149 |
"name": "Cree, Northern East",
|
150 |
"iso_1_code": "cr",
|
151 |
"iso_3_code": "crl",
|
|
|
152 |
"children": [],
|
153 |
+
"tokenizers": {},
|
154 |
"node_i": "571",
|
155 |
+
"native_tokenizers": [],
|
156 |
"scripts": [
|
157 |
"Cans"
|
158 |
+
]
|
|
|
159 |
},
|
160 |
{
|
161 |
"name": "Cree, Moose",
|
162 |
"iso_1_code": "cr",
|
163 |
"iso_3_code": "crm",
|
|
|
164 |
"children": [],
|
165 |
+
"tokenizers": {},
|
166 |
"node_i": "572",
|
167 |
+
"native_tokenizers": [],
|
168 |
"scripts": [
|
169 |
"Cans"
|
170 |
+
]
|
|
|
171 |
},
|
172 |
{
|
173 |
"name": "Cree, Swampy",
|
174 |
"iso_1_code": "cr",
|
175 |
"iso_3_code": "csw",
|
|
|
176 |
"children": [],
|
177 |
+
"tokenizers": {},
|
178 |
"node_i": "573",
|
179 |
+
"native_tokenizers": [],
|
180 |
"scripts": [
|
181 |
"Latn"
|
182 |
+
]
|
|
|
183 |
},
|
184 |
{
|
185 |
"name": "Cree, Woods",
|
186 |
"iso_1_code": "cr",
|
187 |
"iso_3_code": "cwd",
|
|
|
188 |
"children": [],
|
189 |
+
"tokenizers": {},
|
190 |
"node_i": "574",
|
191 |
+
"native_tokenizers": [],
|
192 |
"scripts": [
|
193 |
"Cans"
|
194 |
+
]
|
|
|
195 |
},
|
196 |
{
|
197 |
"name": "Innu",
|
198 |
"iso_1_code": null,
|
199 |
"iso_3_code": "moe",
|
|
|
200 |
"children": [],
|
201 |
+
"tokenizers": {},
|
202 |
"node_i": "575",
|
203 |
+
"native_tokenizers": [],
|
204 |
+
"scripts": []
|
205 |
},
|
206 |
{
|
207 |
"name": "Naskapi",
|
208 |
"iso_1_code": null,
|
209 |
"iso_3_code": "nsk",
|
|
|
210 |
"children": [],
|
211 |
+
"tokenizers": {},
|
212 |
"node_i": "576",
|
213 |
+
"native_tokenizers": [],
|
214 |
+
"scripts": []
|
215 |
}
|
216 |
],
|
217 |
+
"tokenizers": {},
|
218 |
"node_i": "567",
|
219 |
+
"native_tokenizers": [],
|
220 |
+
"scripts": []
|
221 |
},
|
222 |
{
|
223 |
"name": "Eastern Algonquian",
|
224 |
"iso_1_code": null,
|
225 |
"iso_3_code": null,
|
|
|
226 |
"children": [
|
227 |
{
|
228 |
"name": "Carolina Algonquian",
|
229 |
"iso_1_code": null,
|
230 |
"iso_3_code": "crr",
|
|
|
231 |
"children": [],
|
232 |
+
"tokenizers": {},
|
233 |
"node_i": "578",
|
234 |
+
"native_tokenizers": [],
|
235 |
+
"scripts": []
|
236 |
},
|
237 |
{
|
238 |
"name": "Etchemin",
|
239 |
"iso_1_code": null,
|
240 |
"iso_3_code": "etc",
|
|
|
241 |
"children": [],
|
242 |
+
"tokenizers": {},
|
243 |
"node_i": "579",
|
244 |
+
"native_tokenizers": [],
|
245 |
+
"scripts": []
|
246 |
},
|
247 |
{
|
248 |
"name": "Mi\u2019kmaq",
|
249 |
"iso_1_code": null,
|
250 |
"iso_3_code": "mic",
|
|
|
251 |
"children": [],
|
252 |
+
"tokenizers": {},
|
253 |
"node_i": "580",
|
254 |
+
"native_tokenizers": [],
|
255 |
"scripts": [
|
256 |
"Latn"
|
257 |
+
]
|
|
|
258 |
},
|
259 |
{
|
260 |
"name": "Powhatan",
|
261 |
"iso_1_code": null,
|
262 |
"iso_3_code": "pim",
|
|
|
263 |
"children": [],
|
264 |
+
"tokenizers": {},
|
265 |
"node_i": "581",
|
266 |
+
"native_tokenizers": [],
|
267 |
+
"scripts": []
|
268 |
},
|
269 |
{
|
270 |
"name": "Malecite-Passamaquoddy",
|
271 |
"iso_1_code": null,
|
272 |
"iso_3_code": "pqm",
|
|
|
273 |
"children": [],
|
274 |
+
"tokenizers": {},
|
275 |
"node_i": "582",
|
276 |
+
"native_tokenizers": [],
|
277 |
+
"scripts": []
|
278 |
},
|
279 |
{
|
280 |
"name": "Quiripi",
|
281 |
"iso_1_code": null,
|
282 |
"iso_3_code": "qyp",
|
|
|
283 |
"children": [],
|
284 |
+
"tokenizers": {},
|
285 |
"node_i": "583",
|
286 |
+
"native_tokenizers": [],
|
287 |
+
"scripts": []
|
288 |
},
|
289 |
{
|
290 |
"name": "Wampanoag",
|
291 |
"iso_1_code": null,
|
292 |
"iso_3_code": "wam",
|
|
|
293 |
"children": [],
|
294 |
+
"tokenizers": {},
|
295 |
"node_i": "584",
|
296 |
+
"native_tokenizers": [],
|
297 |
+
"scripts": []
|
298 |
},
|
299 |
{
|
300 |
"name": "Loup B",
|
301 |
"iso_1_code": null,
|
302 |
"iso_3_code": "xlb",
|
|
|
303 |
"children": [],
|
304 |
+
"tokenizers": {},
|
305 |
"node_i": "585",
|
306 |
+
"native_tokenizers": [],
|
307 |
+
"scripts": []
|
308 |
},
|
309 |
{
|
310 |
"name": "Loup A",
|
311 |
"iso_1_code": null,
|
312 |
"iso_3_code": "xlo",
|
|
|
313 |
"children": [],
|
314 |
+
"tokenizers": {},
|
315 |
"node_i": "586",
|
316 |
+
"native_tokenizers": [],
|
317 |
+
"scripts": []
|
318 |
},
|
319 |
{
|
320 |
"name": "Narragansett",
|
321 |
"iso_1_code": null,
|
322 |
"iso_3_code": "xnt",
|
|
|
323 |
"children": [],
|
324 |
+
"tokenizers": {},
|
325 |
"node_i": "587",
|
326 |
+
"native_tokenizers": [],
|
327 |
+
"scripts": []
|
328 |
},
|
329 |
{
|
330 |
"name": "Mohegan-Pequot",
|
331 |
"iso_1_code": null,
|
332 |
"iso_3_code": "xpq",
|
|
|
333 |
"children": [],
|
334 |
+
"tokenizers": {},
|
335 |
"node_i": "588",
|
336 |
+
"native_tokenizers": [],
|
337 |
+
"scripts": []
|
338 |
},
|
339 |
{
|
340 |
"name": "Abenaki",
|
341 |
"iso_1_code": null,
|
342 |
"iso_3_code": null,
|
|
|
343 |
"children": [
|
344 |
{
|
345 |
"name": "Abenaki, Eastern",
|
346 |
"iso_1_code": null,
|
347 |
"iso_3_code": "aaq",
|
|
|
348 |
"children": [],
|
349 |
+
"tokenizers": {},
|
350 |
"node_i": "590",
|
351 |
+
"native_tokenizers": [],
|
352 |
+
"scripts": []
|
353 |
},
|
354 |
{
|
355 |
"name": "Abenaki, Western",
|
356 |
"iso_1_code": null,
|
357 |
"iso_3_code": "abe",
|
|
|
358 |
"children": [],
|
359 |
+
"tokenizers": {},
|
360 |
"node_i": "591",
|
361 |
+
"native_tokenizers": [],
|
362 |
+
"scripts": []
|
363 |
}
|
364 |
],
|
365 |
+
"tokenizers": {},
|
366 |
"node_i": "589",
|
367 |
+
"native_tokenizers": [],
|
368 |
+
"scripts": []
|
369 |
},
|
370 |
{
|
371 |
"name": "Delaware",
|
372 |
"iso_1_code": null,
|
373 |
"iso_3_code": null,
|
|
|
374 |
"children": [
|
375 |
{
|
376 |
"name": "Mahican",
|
377 |
"iso_1_code": null,
|
378 |
"iso_3_code": "mjy",
|
|
|
379 |
"children": [],
|
380 |
+
"tokenizers": {},
|
381 |
"node_i": "593",
|
382 |
+
"native_tokenizers": [],
|
383 |
+
"scripts": []
|
384 |
},
|
385 |
{
|
386 |
"name": "Munsee",
|
387 |
"iso_1_code": null,
|
388 |
"iso_3_code": "umu",
|
|
|
389 |
"children": [],
|
390 |
+
"tokenizers": {},
|
391 |
"node_i": "594",
|
392 |
+
"native_tokenizers": [],
|
393 |
+
"scripts": []
|
394 |
},
|
395 |
{
|
396 |
"name": "Unami",
|
397 |
"iso_1_code": null,
|
398 |
"iso_3_code": "unm",
|
|
|
399 |
"children": [],
|
400 |
+
"tokenizers": {},
|
401 |
"node_i": "595",
|
402 |
+
"native_tokenizers": [],
|
403 |
+
"scripts": []
|
404 |
}
|
405 |
],
|
406 |
+
"tokenizers": {},
|
407 |
"node_i": "592",
|
408 |
+
"native_tokenizers": [],
|
409 |
+
"scripts": []
|
410 |
},
|
411 |
{
|
412 |
"name": "Nanticoke-Conoy",
|
413 |
"iso_1_code": null,
|
414 |
"iso_3_code": null,
|
|
|
415 |
"children": [
|
416 |
{
|
417 |
"name": "Nanticoke",
|
418 |
"iso_1_code": null,
|
419 |
"iso_3_code": "nnt",
|
|
|
420 |
"children": [],
|
421 |
+
"tokenizers": {},
|
422 |
"node_i": "597",
|
423 |
+
"native_tokenizers": [],
|
424 |
+
"scripts": []
|
425 |
},
|
426 |
{
|
427 |
"name": "Piscataway",
|
428 |
"iso_1_code": null,
|
429 |
"iso_3_code": "psy",
|
|
|
430 |
"children": [],
|
431 |
+
"tokenizers": {},
|
432 |
"node_i": "598",
|
433 |
+
"native_tokenizers": [],
|
434 |
+
"scripts": []
|
435 |
}
|
436 |
],
|
437 |
+
"tokenizers": {},
|
438 |
"node_i": "596",
|
439 |
+
"native_tokenizers": [],
|
440 |
+
"scripts": []
|
441 |
}
|
442 |
],
|
443 |
+
"tokenizers": {},
|
444 |
"node_i": "577",
|
445 |
+
"native_tokenizers": [],
|
446 |
+
"scripts": []
|
447 |
},
|
448 |
{
|
449 |
"name": "Fox",
|
450 |
"iso_1_code": null,
|
451 |
"iso_3_code": null,
|
|
|
452 |
"children": [
|
453 |
{
|
454 |
"name": "Kickapoo",
|
455 |
"iso_1_code": null,
|
456 |
"iso_3_code": "kic",
|
|
|
457 |
"children": [],
|
458 |
+
"tokenizers": {},
|
459 |
"node_i": "600",
|
460 |
+
"native_tokenizers": [],
|
461 |
+
"scripts": []
|
462 |
},
|
463 |
{
|
464 |
"name": "Meskwaki",
|
465 |
"iso_1_code": null,
|
466 |
"iso_3_code": "sac",
|
|
|
467 |
"children": [],
|
468 |
+
"tokenizers": {},
|
469 |
"node_i": "601",
|
470 |
+
"native_tokenizers": [],
|
471 |
+
"scripts": []
|
472 |
}
|
473 |
],
|
474 |
+
"tokenizers": {},
|
475 |
"node_i": "599",
|
476 |
+
"native_tokenizers": [],
|
477 |
+
"scripts": []
|
478 |
},
|
479 |
{
|
480 |
"name": "Ojibwa-Potawatomi",
|
481 |
"iso_1_code": null,
|
482 |
"iso_3_code": null,
|
|
|
483 |
"children": [
|
484 |
{
|
485 |
"name": "Algonquin",
|
486 |
"iso_1_code": null,
|
487 |
"iso_3_code": "alq",
|
|
|
488 |
"children": [],
|
489 |
+
"tokenizers": {},
|
490 |
"node_i": "603",
|
491 |
+
"native_tokenizers": [],
|
492 |
"scripts": [
|
493 |
"Latn"
|
494 |
+
]
|
|
|
495 |
},
|
496 |
{
|
497 |
"name": "Chippewa",
|
498 |
"iso_1_code": "oj",
|
499 |
"iso_3_code": "ciw",
|
|
|
500 |
"children": [],
|
501 |
+
"tokenizers": {},
|
502 |
"node_i": "604",
|
503 |
+
"native_tokenizers": [],
|
504 |
+
"scripts": []
|
505 |
},
|
506 |
{
|
507 |
"name": "Ojibwa, Northwestern",
|
508 |
"iso_1_code": "oj",
|
509 |
"iso_3_code": "ojb",
|
|
|
510 |
"children": [],
|
511 |
+
"tokenizers": {},
|
512 |
"node_i": "605",
|
513 |
+
"native_tokenizers": [],
|
514 |
"scripts": [
|
515 |
+
"Cans",
|
516 |
+
"Latn"
|
517 |
+
]
|
|
|
518 |
},
|
519 |
{
|
520 |
"name": "Ojibwa, Central",
|
521 |
"iso_1_code": "oj",
|
522 |
"iso_3_code": "ojc",
|
|
|
523 |
"children": [],
|
524 |
+
"tokenizers": {},
|
525 |
"node_i": "606",
|
526 |
+
"native_tokenizers": [],
|
527 |
+
"scripts": []
|
528 |
},
|
529 |
{
|
530 |
"name": "Ojibwa, Eastern",
|
531 |
"iso_1_code": "oj",
|
532 |
"iso_3_code": "ojg",
|
|
|
533 |
"children": [],
|
534 |
+
"tokenizers": {},
|
535 |
"node_i": "607",
|
536 |
+
"native_tokenizers": [],
|
537 |
+
"scripts": []
|
538 |
},
|
539 |
{
|
540 |
"name": "Oji-Cree",
|
541 |
"iso_1_code": "oj",
|
542 |
"iso_3_code": "ojs",
|
|
|
543 |
"children": [],
|
544 |
+
"tokenizers": {},
|
545 |
"node_i": "608",
|
546 |
+
"native_tokenizers": [],
|
547 |
+
"scripts": []
|
548 |
},
|
549 |
{
|
550 |
"name": "Ojibwa, Western",
|
551 |
"iso_1_code": "oj",
|
552 |
"iso_3_code": "ojw",
|
|
|
553 |
"children": [],
|
554 |
+
"tokenizers": {},
|
555 |
"node_i": "609",
|
556 |
+
"native_tokenizers": [],
|
557 |
+
"scripts": []
|
558 |
},
|
559 |
{
|
560 |
"name": "Ottawa",
|
561 |
"iso_1_code": "oj",
|
562 |
"iso_3_code": "otw",
|
|
|
563 |
"children": [],
|
564 |
+
"tokenizers": {},
|
565 |
"node_i": "610",
|
566 |
+
"native_tokenizers": [],
|
567 |
"scripts": [
|
568 |
"Latn"
|
569 |
+
]
|
|
|
570 |
},
|
571 |
{
|
572 |
"name": "Potawatomi",
|
573 |
"iso_1_code": null,
|
574 |
"iso_3_code": "pot",
|
|
|
575 |
"children": [],
|
576 |
+
"tokenizers": {},
|
577 |
"node_i": "611",
|
578 |
+
"native_tokenizers": [],
|
579 |
"scripts": [
|
580 |
"Latn"
|
581 |
+
]
|
|
|
582 |
}
|
583 |
],
|
584 |
+
"tokenizers": {},
|
585 |
"node_i": "602",
|
586 |
+
"native_tokenizers": [],
|
587 |
+
"scripts": []
|
588 |
}
|
589 |
],
|
590 |
+
"tokenizers": {},
|
591 |
"node_i": "557",
|
592 |
+
"native_tokenizers": [],
|
593 |
+
"scripts": []
|
594 |
},
|
595 |
{
|
596 |
"name": "Ritwan",
|
597 |
"iso_1_code": null,
|
598 |
"iso_3_code": null,
|
|
|
599 |
"children": [
|
600 |
{
|
601 |
"name": "Wiyot",
|
602 |
"iso_1_code": null,
|
603 |
"iso_3_code": "wiy",
|
|
|
604 |
"children": [],
|
605 |
+
"tokenizers": {},
|
606 |
"node_i": "613",
|
607 |
+
"native_tokenizers": [],
|
608 |
+
"scripts": []
|
609 |
},
|
610 |
{
|
611 |
"name": "Yurok",
|
612 |
"iso_1_code": null,
|
613 |
"iso_3_code": "yur",
|
|
|
614 |
"children": [],
|
615 |
+
"tokenizers": {},
|
616 |
"node_i": "614",
|
617 |
+
"native_tokenizers": [],
|
618 |
+
"scripts": []
|
619 |
}
|
620 |
],
|
621 |
+
"tokenizers": {},
|
622 |
"node_i": "612",
|
623 |
+
"native_tokenizers": [],
|
624 |
+
"scripts": []
|
625 |
}
|
626 |
],
|
627 |
+
"tokenizers": {},
|
628 |
"node_i": "556",
|
629 |
+
"native_tokenizers": [],
|
630 |
+
"scripts": []
|
631 |
}
|
data/Amto-Musan.json
CHANGED
@@ -2,30 +2,30 @@
|
|
2 |
"name": "Amto-Musan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Amto",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "amt",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "616",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Siawi",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "mmp",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "617",
|
24 |
-
"
|
25 |
-
"
|
26 |
}
|
27 |
],
|
|
|
28 |
"node_i": "615",
|
29 |
-
"
|
30 |
-
"
|
31 |
}
|
|
|
2 |
"name": "Amto-Musan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Amto",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "amt",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "616",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Siawi",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "mmp",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "617",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
}
|
26 |
],
|
27 |
+
"tokenizers": {},
|
28 |
"node_i": "615",
|
29 |
+
"native_tokenizers": [],
|
30 |
+
"scripts": []
|
31 |
}
|
data/Andamanese.json
CHANGED
@@ -2,194 +2,194 @@
|
|
2 |
"name": "Andamanese",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Great Andamanese",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Great Andamanese, Mixed",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "gac",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "620",
|
20 |
-
"
|
21 |
-
"
|
22 |
},
|
23 |
{
|
24 |
"name": "Central",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": null,
|
27 |
-
"tokenizers": {},
|
28 |
"children": [
|
29 |
{
|
30 |
"name": "Aka-Bea",
|
31 |
"iso_1_code": null,
|
32 |
"iso_3_code": "abj",
|
33 |
-
"tokenizers": {},
|
34 |
"children": [],
|
|
|
35 |
"node_i": "622",
|
36 |
-
"
|
37 |
-
"
|
38 |
},
|
39 |
{
|
40 |
"name": "Akar-Bale",
|
41 |
"iso_1_code": null,
|
42 |
"iso_3_code": "acl",
|
43 |
-
"tokenizers": {},
|
44 |
"children": [],
|
|
|
45 |
"node_i": "623",
|
46 |
-
"
|
47 |
-
"
|
48 |
},
|
49 |
{
|
50 |
"name": "Aka-Kede",
|
51 |
"iso_1_code": null,
|
52 |
"iso_3_code": "akx",
|
53 |
-
"tokenizers": {},
|
54 |
"children": [],
|
|
|
55 |
"node_i": "624",
|
56 |
-
"
|
57 |
-
"
|
58 |
},
|
59 |
{
|
60 |
"name": "Aka-Kol",
|
61 |
"iso_1_code": null,
|
62 |
"iso_3_code": "aky",
|
63 |
-
"tokenizers": {},
|
64 |
"children": [],
|
|
|
65 |
"node_i": "625",
|
66 |
-
"
|
67 |
-
"
|
68 |
},
|
69 |
{
|
70 |
"name": "A-Pucikwar",
|
71 |
"iso_1_code": null,
|
72 |
"iso_3_code": "apq",
|
73 |
-
"tokenizers": {},
|
74 |
"children": [],
|
|
|
75 |
"node_i": "626",
|
76 |
-
"
|
77 |
-
"
|
78 |
},
|
79 |
{
|
80 |
"name": "Oko-Juwoi",
|
81 |
"iso_1_code": null,
|
82 |
"iso_3_code": "okj",
|
83 |
-
"tokenizers": {},
|
84 |
"children": [],
|
|
|
85 |
"node_i": "627",
|
86 |
-
"
|
87 |
-
"
|
88 |
}
|
89 |
],
|
|
|
90 |
"node_i": "621",
|
91 |
-
"
|
92 |
-
"
|
93 |
},
|
94 |
{
|
95 |
"name": "Northern",
|
96 |
"iso_1_code": null,
|
97 |
"iso_3_code": null,
|
98 |
-
"tokenizers": {},
|
99 |
"children": [
|
100 |
{
|
101 |
"name": "Aka-Cari",
|
102 |
"iso_1_code": null,
|
103 |
"iso_3_code": "aci",
|
104 |
-
"tokenizers": {},
|
105 |
"children": [],
|
|
|
106 |
"node_i": "629",
|
107 |
-
"
|
108 |
-
"
|
109 |
},
|
110 |
{
|
111 |
"name": "Aka-Kora",
|
112 |
"iso_1_code": null,
|
113 |
"iso_3_code": "ack",
|
114 |
-
"tokenizers": {},
|
115 |
"children": [],
|
|
|
116 |
"node_i": "630",
|
117 |
-
"
|
118 |
-
"
|
119 |
},
|
120 |
{
|
121 |
"name": "Aka-Jeru",
|
122 |
"iso_1_code": null,
|
123 |
"iso_3_code": "akj",
|
124 |
-
"tokenizers": {},
|
125 |
"children": [],
|
|
|
126 |
"node_i": "631",
|
127 |
-
"
|
128 |
-
"
|
129 |
},
|
130 |
{
|
131 |
"name": "Aka-Bo",
|
132 |
"iso_1_code": null,
|
133 |
"iso_3_code": "akm",
|
134 |
-
"tokenizers": {},
|
135 |
"children": [],
|
|
|
136 |
"node_i": "632",
|
137 |
-
"
|
138 |
-
"
|
139 |
}
|
140 |
],
|
|
|
141 |
"node_i": "628",
|
142 |
-
"
|
143 |
-
"
|
144 |
}
|
145 |
],
|
|
|
146 |
"node_i": "619",
|
147 |
-
"
|
148 |
-
"
|
149 |
},
|
150 |
{
|
151 |
"name": "South Andamanese",
|
152 |
"iso_1_code": null,
|
153 |
"iso_3_code": null,
|
154 |
-
"tokenizers": {},
|
155 |
"children": [
|
156 |
{
|
157 |
"name": "Jarawa",
|
158 |
"iso_1_code": null,
|
159 |
"iso_3_code": "anq",
|
160 |
-
"tokenizers": {},
|
161 |
"children": [],
|
|
|
162 |
"node_i": "634",
|
163 |
-
"
|
164 |
-
"
|
165 |
},
|
166 |
{
|
167 |
"name": "\u00d6\u00f1ge",
|
168 |
"iso_1_code": null,
|
169 |
"iso_3_code": "oon",
|
170 |
-
"tokenizers": {},
|
171 |
"children": [],
|
|
|
172 |
"node_i": "635",
|
173 |
-
"
|
174 |
-
"
|
175 |
},
|
176 |
{
|
177 |
"name": "Sentinel",
|
178 |
"iso_1_code": null,
|
179 |
"iso_3_code": "std",
|
180 |
-
"tokenizers": {},
|
181 |
"children": [],
|
|
|
182 |
"node_i": "636",
|
183 |
-
"
|
184 |
-
"
|
185 |
}
|
186 |
],
|
|
|
187 |
"node_i": "633",
|
188 |
-
"
|
189 |
-
"
|
190 |
}
|
191 |
],
|
|
|
192 |
"node_i": "618",
|
193 |
-
"
|
194 |
-
"
|
195 |
}
|
|
|
2 |
"name": "Andamanese",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Great Andamanese",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Great Andamanese, Mixed",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "gac",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "620",
|
18 |
+
"native_tokenizers": [],
|
19 |
+
"scripts": []
|
20 |
},
|
21 |
{
|
22 |
"name": "Central",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": null,
|
|
|
25 |
"children": [
|
26 |
{
|
27 |
"name": "Aka-Bea",
|
28 |
"iso_1_code": null,
|
29 |
"iso_3_code": "abj",
|
|
|
30 |
"children": [],
|
31 |
+
"tokenizers": {},
|
32 |
"node_i": "622",
|
33 |
+
"native_tokenizers": [],
|
34 |
+
"scripts": []
|
35 |
},
|
36 |
{
|
37 |
"name": "Akar-Bale",
|
38 |
"iso_1_code": null,
|
39 |
"iso_3_code": "acl",
|
|
|
40 |
"children": [],
|
41 |
+
"tokenizers": {},
|
42 |
"node_i": "623",
|
43 |
+
"native_tokenizers": [],
|
44 |
+
"scripts": []
|
45 |
},
|
46 |
{
|
47 |
"name": "Aka-Kede",
|
48 |
"iso_1_code": null,
|
49 |
"iso_3_code": "akx",
|
|
|
50 |
"children": [],
|
51 |
+
"tokenizers": {},
|
52 |
"node_i": "624",
|
53 |
+
"native_tokenizers": [],
|
54 |
+
"scripts": []
|
55 |
},
|
56 |
{
|
57 |
"name": "Aka-Kol",
|
58 |
"iso_1_code": null,
|
59 |
"iso_3_code": "aky",
|
|
|
60 |
"children": [],
|
61 |
+
"tokenizers": {},
|
62 |
"node_i": "625",
|
63 |
+
"native_tokenizers": [],
|
64 |
+
"scripts": []
|
65 |
},
|
66 |
{
|
67 |
"name": "A-Pucikwar",
|
68 |
"iso_1_code": null,
|
69 |
"iso_3_code": "apq",
|
|
|
70 |
"children": [],
|
71 |
+
"tokenizers": {},
|
72 |
"node_i": "626",
|
73 |
+
"native_tokenizers": [],
|
74 |
+
"scripts": []
|
75 |
},
|
76 |
{
|
77 |
"name": "Oko-Juwoi",
|
78 |
"iso_1_code": null,
|
79 |
"iso_3_code": "okj",
|
|
|
80 |
"children": [],
|
81 |
+
"tokenizers": {},
|
82 |
"node_i": "627",
|
83 |
+
"native_tokenizers": [],
|
84 |
+
"scripts": []
|
85 |
}
|
86 |
],
|
87 |
+
"tokenizers": {},
|
88 |
"node_i": "621",
|
89 |
+
"native_tokenizers": [],
|
90 |
+
"scripts": []
|
91 |
},
|
92 |
{
|
93 |
"name": "Northern",
|
94 |
"iso_1_code": null,
|
95 |
"iso_3_code": null,
|
|
|
96 |
"children": [
|
97 |
{
|
98 |
"name": "Aka-Cari",
|
99 |
"iso_1_code": null,
|
100 |
"iso_3_code": "aci",
|
|
|
101 |
"children": [],
|
102 |
+
"tokenizers": {},
|
103 |
"node_i": "629",
|
104 |
+
"native_tokenizers": [],
|
105 |
+
"scripts": []
|
106 |
},
|
107 |
{
|
108 |
"name": "Aka-Kora",
|
109 |
"iso_1_code": null,
|
110 |
"iso_3_code": "ack",
|
|
|
111 |
"children": [],
|
112 |
+
"tokenizers": {},
|
113 |
"node_i": "630",
|
114 |
+
"native_tokenizers": [],
|
115 |
+
"scripts": []
|
116 |
},
|
117 |
{
|
118 |
"name": "Aka-Jeru",
|
119 |
"iso_1_code": null,
|
120 |
"iso_3_code": "akj",
|
|
|
121 |
"children": [],
|
122 |
+
"tokenizers": {},
|
123 |
"node_i": "631",
|
124 |
+
"native_tokenizers": [],
|
125 |
+
"scripts": []
|
126 |
},
|
127 |
{
|
128 |
"name": "Aka-Bo",
|
129 |
"iso_1_code": null,
|
130 |
"iso_3_code": "akm",
|
|
|
131 |
"children": [],
|
132 |
+
"tokenizers": {},
|
133 |
"node_i": "632",
|
134 |
+
"native_tokenizers": [],
|
135 |
+
"scripts": []
|
136 |
}
|
137 |
],
|
138 |
+
"tokenizers": {},
|
139 |
"node_i": "628",
|
140 |
+
"native_tokenizers": [],
|
141 |
+
"scripts": []
|
142 |
}
|
143 |
],
|
144 |
+
"tokenizers": {},
|
145 |
"node_i": "619",
|
146 |
+
"native_tokenizers": [],
|
147 |
+
"scripts": []
|
148 |
},
|
149 |
{
|
150 |
"name": "South Andamanese",
|
151 |
"iso_1_code": null,
|
152 |
"iso_3_code": null,
|
|
|
153 |
"children": [
|
154 |
{
|
155 |
"name": "Jarawa",
|
156 |
"iso_1_code": null,
|
157 |
"iso_3_code": "anq",
|
|
|
158 |
"children": [],
|
159 |
+
"tokenizers": {},
|
160 |
"node_i": "634",
|
161 |
+
"native_tokenizers": [],
|
162 |
+
"scripts": []
|
163 |
},
|
164 |
{
|
165 |
"name": "\u00d6\u00f1ge",
|
166 |
"iso_1_code": null,
|
167 |
"iso_3_code": "oon",
|
|
|
168 |
"children": [],
|
169 |
+
"tokenizers": {},
|
170 |
"node_i": "635",
|
171 |
+
"native_tokenizers": [],
|
172 |
+
"scripts": []
|
173 |
},
|
174 |
{
|
175 |
"name": "Sentinel",
|
176 |
"iso_1_code": null,
|
177 |
"iso_3_code": "std",
|
|
|
178 |
"children": [],
|
179 |
+
"tokenizers": {},
|
180 |
"node_i": "636",
|
181 |
+
"native_tokenizers": [],
|
182 |
+
"scripts": []
|
183 |
}
|
184 |
],
|
185 |
+
"tokenizers": {},
|
186 |
"node_i": "633",
|
187 |
+
"native_tokenizers": [],
|
188 |
+
"scripts": []
|
189 |
}
|
190 |
],
|
191 |
+
"tokenizers": {},
|
192 |
"node_i": "618",
|
193 |
+
"native_tokenizers": [],
|
194 |
+
"scripts": []
|
195 |
}
|
data/Arafundi.json
CHANGED
@@ -2,40 +2,40 @@
|
|
2 |
"name": "Arafundi",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Andai",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "afd",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "638",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Nanubae",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "afk",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "639",
|
24 |
-
"
|
25 |
-
"
|
26 |
},
|
27 |
{
|
28 |
"name": "Tapei",
|
29 |
"iso_1_code": null,
|
30 |
"iso_3_code": "afp",
|
31 |
-
"tokenizers": {},
|
32 |
"children": [],
|
|
|
33 |
"node_i": "640",
|
34 |
-
"
|
35 |
-
"
|
36 |
}
|
37 |
],
|
|
|
38 |
"node_i": "637",
|
39 |
-
"
|
40 |
-
"
|
41 |
}
|
|
|
2 |
"name": "Arafundi",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Andai",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "afd",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "638",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Nanubae",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "afk",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "639",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
},
|
26 |
{
|
27 |
"name": "Tapei",
|
28 |
"iso_1_code": null,
|
29 |
"iso_3_code": "afp",
|
|
|
30 |
"children": [],
|
31 |
+
"tokenizers": {},
|
32 |
"node_i": "640",
|
33 |
+
"native_tokenizers": [],
|
34 |
+
"scripts": []
|
35 |
}
|
36 |
],
|
37 |
+
"tokenizers": {},
|
38 |
"node_i": "637",
|
39 |
+
"native_tokenizers": [],
|
40 |
+
"scripts": []
|
41 |
}
|
data/Arai (Left May).json
CHANGED
@@ -2,72 +2,72 @@
|
|
2 |
"name": "Arai (Left May)",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Sawiyanu",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "amm",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "642",
|
|
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
-
]
|
17 |
-
"own_tokenizer": false
|
18 |
},
|
19 |
{
|
20 |
"name": "Bo",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": "bpw",
|
23 |
-
"tokenizers": {},
|
24 |
"children": [],
|
|
|
25 |
"node_i": "643",
|
26 |
-
"
|
27 |
-
"
|
28 |
},
|
29 |
{
|
30 |
"name": "Yawuno Teneyo",
|
31 |
"iso_1_code": null,
|
32 |
"iso_3_code": "itr",
|
33 |
-
"tokenizers": {},
|
34 |
"children": [],
|
|
|
35 |
"node_i": "644",
|
36 |
-
"
|
37 |
-
"
|
38 |
},
|
39 |
{
|
40 |
"name": "Nakwi",
|
41 |
"iso_1_code": null,
|
42 |
"iso_3_code": "nax",
|
43 |
-
"tokenizers": {},
|
44 |
"children": [],
|
|
|
45 |
"node_i": "645",
|
46 |
-
"
|
47 |
-
"
|
48 |
},
|
49 |
{
|
50 |
"name": "Nimo",
|
51 |
"iso_1_code": null,
|
52 |
"iso_3_code": "niw",
|
53 |
-
"tokenizers": {},
|
54 |
"children": [],
|
|
|
55 |
"node_i": "646",
|
56 |
-
"
|
57 |
-
"
|
58 |
},
|
59 |
{
|
60 |
"name": "Owiniga",
|
61 |
"iso_1_code": null,
|
62 |
"iso_3_code": "owi",
|
63 |
-
"tokenizers": {},
|
64 |
"children": [],
|
|
|
65 |
"node_i": "647",
|
66 |
-
"
|
67 |
-
"
|
68 |
}
|
69 |
],
|
|
|
70 |
"node_i": "641",
|
71 |
-
"
|
72 |
-
"
|
73 |
}
|
|
|
2 |
"name": "Arai (Left May)",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Sawiyanu",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "amm",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "642",
|
13 |
+
"native_tokenizers": [],
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
+
]
|
|
|
17 |
},
|
18 |
{
|
19 |
"name": "Bo",
|
20 |
"iso_1_code": null,
|
21 |
"iso_3_code": "bpw",
|
|
|
22 |
"children": [],
|
23 |
+
"tokenizers": {},
|
24 |
"node_i": "643",
|
25 |
+
"native_tokenizers": [],
|
26 |
+
"scripts": []
|
27 |
},
|
28 |
{
|
29 |
"name": "Yawuno Teneyo",
|
30 |
"iso_1_code": null,
|
31 |
"iso_3_code": "itr",
|
|
|
32 |
"children": [],
|
33 |
+
"tokenizers": {},
|
34 |
"node_i": "644",
|
35 |
+
"native_tokenizers": [],
|
36 |
+
"scripts": []
|
37 |
},
|
38 |
{
|
39 |
"name": "Nakwi",
|
40 |
"iso_1_code": null,
|
41 |
"iso_3_code": "nax",
|
|
|
42 |
"children": [],
|
43 |
+
"tokenizers": {},
|
44 |
"node_i": "645",
|
45 |
+
"native_tokenizers": [],
|
46 |
+
"scripts": []
|
47 |
},
|
48 |
{
|
49 |
"name": "Nimo",
|
50 |
"iso_1_code": null,
|
51 |
"iso_3_code": "niw",
|
|
|
52 |
"children": [],
|
53 |
+
"tokenizers": {},
|
54 |
"node_i": "646",
|
55 |
+
"native_tokenizers": [],
|
56 |
+
"scripts": []
|
57 |
},
|
58 |
{
|
59 |
"name": "Owiniga",
|
60 |
"iso_1_code": null,
|
61 |
"iso_3_code": "owi",
|
|
|
62 |
"children": [],
|
63 |
+
"tokenizers": {},
|
64 |
"node_i": "647",
|
65 |
+
"native_tokenizers": [],
|
66 |
+
"scripts": []
|
67 |
}
|
68 |
],
|
69 |
+
"tokenizers": {},
|
70 |
"node_i": "641",
|
71 |
+
"native_tokenizers": [],
|
72 |
+
"scripts": []
|
73 |
}
|
data/Arauan.json
CHANGED
@@ -2,87 +2,87 @@
|
|
2 |
"name": "Arauan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Aru\u00e1",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "aru",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "649",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Kulina",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "cul",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "650",
|
|
|
24 |
"scripts": [
|
25 |
"Latn"
|
26 |
-
]
|
27 |
-
"own_tokenizer": false
|
28 |
},
|
29 |
{
|
30 |
"name": "Den\u00ed",
|
31 |
"iso_1_code": null,
|
32 |
"iso_3_code": "dny",
|
33 |
-
"tokenizers": {},
|
34 |
"children": [],
|
|
|
35 |
"node_i": "651",
|
36 |
-
"
|
37 |
-
"
|
38 |
},
|
39 |
{
|
40 |
"name": "Paumar\u00ed",
|
41 |
"iso_1_code": null,
|
42 |
"iso_3_code": "pad",
|
43 |
-
"tokenizers": {},
|
44 |
"children": [],
|
|
|
45 |
"node_i": "652",
|
|
|
46 |
"scripts": [
|
47 |
"Latn"
|
48 |
-
]
|
49 |
-
"own_tokenizer": false
|
50 |
},
|
51 |
{
|
52 |
"name": "Suruah\u00e1",
|
53 |
"iso_1_code": null,
|
54 |
"iso_3_code": "swx",
|
55 |
-
"tokenizers": {},
|
56 |
"children": [],
|
|
|
57 |
"node_i": "653",
|
58 |
-
"
|
59 |
-
"
|
60 |
},
|
61 |
{
|
62 |
"name": "Jamamadi",
|
63 |
"iso_1_code": null,
|
64 |
"iso_3_code": null,
|
65 |
-
"tokenizers": {},
|
66 |
"children": [
|
67 |
{
|
68 |
"name": "Jamamad\u00ed",
|
69 |
"iso_1_code": null,
|
70 |
"iso_3_code": "jaa",
|
71 |
-
"tokenizers": {},
|
72 |
"children": [],
|
|
|
73 |
"node_i": "655",
|
|
|
74 |
"scripts": [
|
75 |
"Latn"
|
76 |
-
]
|
77 |
-
"own_tokenizer": false
|
78 |
}
|
79 |
],
|
|
|
80 |
"node_i": "654",
|
81 |
-
"
|
82 |
-
"
|
83 |
}
|
84 |
],
|
|
|
85 |
"node_i": "648",
|
86 |
-
"
|
87 |
-
"
|
88 |
}
|
|
|
2 |
"name": "Arauan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Aru\u00e1",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "aru",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "649",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Kulina",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "cul",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "650",
|
23 |
+
"native_tokenizers": [],
|
24 |
"scripts": [
|
25 |
"Latn"
|
26 |
+
]
|
|
|
27 |
},
|
28 |
{
|
29 |
"name": "Den\u00ed",
|
30 |
"iso_1_code": null,
|
31 |
"iso_3_code": "dny",
|
|
|
32 |
"children": [],
|
33 |
+
"tokenizers": {},
|
34 |
"node_i": "651",
|
35 |
+
"native_tokenizers": [],
|
36 |
+
"scripts": []
|
37 |
},
|
38 |
{
|
39 |
"name": "Paumar\u00ed",
|
40 |
"iso_1_code": null,
|
41 |
"iso_3_code": "pad",
|
|
|
42 |
"children": [],
|
43 |
+
"tokenizers": {},
|
44 |
"node_i": "652",
|
45 |
+
"native_tokenizers": [],
|
46 |
"scripts": [
|
47 |
"Latn"
|
48 |
+
]
|
|
|
49 |
},
|
50 |
{
|
51 |
"name": "Suruah\u00e1",
|
52 |
"iso_1_code": null,
|
53 |
"iso_3_code": "swx",
|
|
|
54 |
"children": [],
|
55 |
+
"tokenizers": {},
|
56 |
"node_i": "653",
|
57 |
+
"native_tokenizers": [],
|
58 |
+
"scripts": []
|
59 |
},
|
60 |
{
|
61 |
"name": "Jamamadi",
|
62 |
"iso_1_code": null,
|
63 |
"iso_3_code": null,
|
|
|
64 |
"children": [
|
65 |
{
|
66 |
"name": "Jamamad\u00ed",
|
67 |
"iso_1_code": null,
|
68 |
"iso_3_code": "jaa",
|
|
|
69 |
"children": [],
|
70 |
+
"tokenizers": {},
|
71 |
"node_i": "655",
|
72 |
+
"native_tokenizers": [],
|
73 |
"scripts": [
|
74 |
"Latn"
|
75 |
+
]
|
|
|
76 |
}
|
77 |
],
|
78 |
+
"tokenizers": {},
|
79 |
"node_i": "654",
|
80 |
+
"native_tokenizers": [],
|
81 |
+
"scripts": []
|
82 |
}
|
83 |
],
|
84 |
+
"tokenizers": {},
|
85 |
"node_i": "648",
|
86 |
+
"native_tokenizers": [],
|
87 |
+
"scripts": []
|
88 |
}
|
data/Australian.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Austro-Asiatic.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Austronesian.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/Aymaran.json
CHANGED
@@ -2,64 +2,64 @@
|
|
2 |
"name": "Aymaran",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Aymara",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Aymara, Southern",
|
15 |
"iso_1_code": "ay",
|
16 |
"iso_3_code": "ayc",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "3257",
|
20 |
-
"
|
21 |
-
"
|
22 |
},
|
23 |
{
|
24 |
"name": "Aymara, Central",
|
25 |
"iso_1_code": "ay",
|
26 |
"iso_3_code": "ayr",
|
27 |
-
"tokenizers": {},
|
28 |
"children": [],
|
|
|
29 |
"node_i": "3258",
|
|
|
30 |
"scripts": [
|
31 |
"Latn"
|
32 |
-
]
|
33 |
-
"own_tokenizer": false
|
34 |
}
|
35 |
],
|
|
|
36 |
"node_i": "3256",
|
37 |
-
"
|
38 |
-
"
|
39 |
},
|
40 |
{
|
41 |
"name": "Tupe",
|
42 |
"iso_1_code": null,
|
43 |
"iso_3_code": null,
|
44 |
-
"tokenizers": {},
|
45 |
"children": [
|
46 |
{
|
47 |
"name": "Jaqaru",
|
48 |
"iso_1_code": null,
|
49 |
"iso_3_code": "jqr",
|
50 |
-
"tokenizers": {},
|
51 |
"children": [],
|
|
|
52 |
"node_i": "3260",
|
53 |
-
"
|
54 |
-
"
|
55 |
}
|
56 |
],
|
|
|
57 |
"node_i": "3259",
|
58 |
-
"
|
59 |
-
"
|
60 |
}
|
61 |
],
|
|
|
62 |
"node_i": "3255",
|
63 |
-
"
|
64 |
-
"
|
65 |
}
|
|
|
2 |
"name": "Aymaran",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Aymara",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Aymara, Southern",
|
13 |
"iso_1_code": "ay",
|
14 |
"iso_3_code": "ayc",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3257",
|
18 |
+
"native_tokenizers": [],
|
19 |
+
"scripts": []
|
20 |
},
|
21 |
{
|
22 |
"name": "Aymara, Central",
|
23 |
"iso_1_code": "ay",
|
24 |
"iso_3_code": "ayr",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3258",
|
28 |
+
"native_tokenizers": [],
|
29 |
"scripts": [
|
30 |
"Latn"
|
31 |
+
]
|
|
|
32 |
}
|
33 |
],
|
34 |
+
"tokenizers": {},
|
35 |
"node_i": "3256",
|
36 |
+
"native_tokenizers": [],
|
37 |
+
"scripts": []
|
38 |
},
|
39 |
{
|
40 |
"name": "Tupe",
|
41 |
"iso_1_code": null,
|
42 |
"iso_3_code": null,
|
|
|
43 |
"children": [
|
44 |
{
|
45 |
"name": "Jaqaru",
|
46 |
"iso_1_code": null,
|
47 |
"iso_3_code": "jqr",
|
|
|
48 |
"children": [],
|
49 |
+
"tokenizers": {},
|
50 |
"node_i": "3260",
|
51 |
+
"native_tokenizers": [],
|
52 |
+
"scripts": []
|
53 |
}
|
54 |
],
|
55 |
+
"tokenizers": {},
|
56 |
"node_i": "3259",
|
57 |
+
"native_tokenizers": [],
|
58 |
+
"scripts": []
|
59 |
}
|
60 |
],
|
61 |
+
"tokenizers": {},
|
62 |
"node_i": "3255",
|
63 |
+
"native_tokenizers": [],
|
64 |
+
"scripts": []
|
65 |
}
|
data/Barbacoan.json
CHANGED
@@ -2,68 +2,68 @@
|
|
2 |
"name": "Barbacoan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Northern",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Awa-Cuaiquer",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "kwi",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "3263",
|
|
|
20 |
"scripts": [
|
21 |
"Latn"
|
22 |
-
]
|
23 |
-
"own_tokenizer": false
|
24 |
}
|
25 |
],
|
|
|
26 |
"node_i": "3262",
|
27 |
-
"
|
28 |
-
"
|
29 |
},
|
30 |
{
|
31 |
"name": "Southern",
|
32 |
"iso_1_code": null,
|
33 |
"iso_3_code": null,
|
34 |
-
"tokenizers": {},
|
35 |
"children": [
|
36 |
{
|
37 |
"name": "Chachi",
|
38 |
"iso_1_code": null,
|
39 |
"iso_3_code": "cbi",
|
40 |
-
"tokenizers": {},
|
41 |
"children": [],
|
|
|
42 |
"node_i": "3265",
|
|
|
43 |
"scripts": [
|
44 |
"Latn"
|
45 |
-
]
|
46 |
-
"own_tokenizer": false
|
47 |
},
|
48 |
{
|
49 |
"name": "Tsafiki",
|
50 |
"iso_1_code": null,
|
51 |
"iso_3_code": "cof",
|
52 |
-
"tokenizers": {},
|
53 |
"children": [],
|
|
|
54 |
"node_i": "3266",
|
|
|
55 |
"scripts": [
|
56 |
"Latn"
|
57 |
-
]
|
58 |
-
"own_tokenizer": false
|
59 |
}
|
60 |
],
|
|
|
61 |
"node_i": "3264",
|
62 |
-
"
|
63 |
-
"
|
64 |
}
|
65 |
],
|
|
|
66 |
"node_i": "3261",
|
67 |
-
"
|
68 |
-
"
|
69 |
}
|
|
|
2 |
"name": "Barbacoan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Northern",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Awa-Cuaiquer",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "kwi",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3263",
|
18 |
+
"native_tokenizers": [],
|
19 |
"scripts": [
|
20 |
"Latn"
|
21 |
+
]
|
|
|
22 |
}
|
23 |
],
|
24 |
+
"tokenizers": {},
|
25 |
"node_i": "3262",
|
26 |
+
"native_tokenizers": [],
|
27 |
+
"scripts": []
|
28 |
},
|
29 |
{
|
30 |
"name": "Southern",
|
31 |
"iso_1_code": null,
|
32 |
"iso_3_code": null,
|
|
|
33 |
"children": [
|
34 |
{
|
35 |
"name": "Chachi",
|
36 |
"iso_1_code": null,
|
37 |
"iso_3_code": "cbi",
|
|
|
38 |
"children": [],
|
39 |
+
"tokenizers": {},
|
40 |
"node_i": "3265",
|
41 |
+
"native_tokenizers": [],
|
42 |
"scripts": [
|
43 |
"Latn"
|
44 |
+
]
|
|
|
45 |
},
|
46 |
{
|
47 |
"name": "Tsafiki",
|
48 |
"iso_1_code": null,
|
49 |
"iso_3_code": "cof",
|
|
|
50 |
"children": [],
|
51 |
+
"tokenizers": {},
|
52 |
"node_i": "3266",
|
53 |
+
"native_tokenizers": [],
|
54 |
"scripts": [
|
55 |
"Latn"
|
56 |
+
]
|
|
|
57 |
}
|
58 |
],
|
59 |
+
"tokenizers": {},
|
60 |
"node_i": "3264",
|
61 |
+
"native_tokenizers": [],
|
62 |
+
"scripts": []
|
63 |
}
|
64 |
],
|
65 |
+
"tokenizers": {},
|
66 |
"node_i": "3261",
|
67 |
+
"native_tokenizers": [],
|
68 |
+
"scripts": []
|
69 |
}
|
data/Bayono-Awbono.json
CHANGED
@@ -2,30 +2,30 @@
|
|
2 |
"name": "Bayono-Awbono",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Awbono",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "awh",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3268",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Bayono",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "byl",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "3269",
|
24 |
-
"
|
25 |
-
"
|
26 |
}
|
27 |
],
|
|
|
28 |
"node_i": "3267",
|
29 |
-
"
|
30 |
-
"
|
31 |
}
|
|
|
2 |
"name": "Bayono-Awbono",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Awbono",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "awh",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3268",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Bayono",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "byl",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3269",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
}
|
26 |
],
|
27 |
+
"tokenizers": {},
|
28 |
"node_i": "3267",
|
29 |
+
"native_tokenizers": [],
|
30 |
+
"scripts": []
|
31 |
}
|
data/Border.json
CHANGED
@@ -2,197 +2,197 @@
|
|
2 |
"name": "Border",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Bewani",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Ainbai",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "aic",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "3272",
|
20 |
-
"
|
21 |
-
"
|
22 |
},
|
23 |
{
|
24 |
"name": "Kilmeri",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "kih",
|
27 |
-
"tokenizers": {},
|
28 |
"children": [],
|
|
|
29 |
"node_i": "3273",
|
30 |
-
"
|
31 |
-
"
|
32 |
},
|
33 |
{
|
34 |
"name": "Ningera",
|
35 |
"iso_1_code": null,
|
36 |
"iso_3_code": "nby",
|
37 |
-
"tokenizers": {},
|
38 |
"children": [],
|
|
|
39 |
"node_i": "3274",
|
40 |
-
"
|
41 |
-
"
|
42 |
},
|
43 |
{
|
44 |
"name": "Pagi",
|
45 |
"iso_1_code": null,
|
46 |
"iso_3_code": "pgi",
|
47 |
-
"tokenizers": {},
|
48 |
"children": [],
|
|
|
49 |
"node_i": "3275",
|
50 |
-
"
|
51 |
-
"
|
52 |
},
|
53 |
{
|
54 |
"name": "Umeda",
|
55 |
"iso_1_code": null,
|
56 |
"iso_3_code": "upi",
|
57 |
-
"tokenizers": {},
|
58 |
"children": [],
|
|
|
59 |
"node_i": "3276",
|
60 |
-
"
|
61 |
-
"
|
62 |
}
|
63 |
],
|
|
|
64 |
"node_i": "3271",
|
65 |
-
"
|
66 |
-
"
|
67 |
},
|
68 |
{
|
69 |
"name": "Taikat",
|
70 |
"iso_1_code": null,
|
71 |
"iso_3_code": null,
|
72 |
-
"tokenizers": {},
|
73 |
"children": [
|
74 |
{
|
75 |
"name": "Taikat",
|
76 |
"iso_1_code": null,
|
77 |
"iso_3_code": "aos",
|
78 |
-
"tokenizers": {},
|
79 |
"children": [],
|
|
|
80 |
"node_i": "3278",
|
81 |
-
"
|
82 |
-
"
|
83 |
},
|
84 |
{
|
85 |
"name": "Awyi",
|
86 |
"iso_1_code": null,
|
87 |
"iso_3_code": "auw",
|
88 |
-
"tokenizers": {},
|
89 |
"children": [],
|
|
|
90 |
"node_i": "3279",
|
91 |
-
"
|
92 |
-
"
|
93 |
}
|
94 |
],
|
|
|
95 |
"node_i": "3277",
|
96 |
-
"
|
97 |
-
"
|
98 |
},
|
99 |
{
|
100 |
"name": "Waris",
|
101 |
"iso_1_code": null,
|
102 |
"iso_3_code": null,
|
103 |
-
"tokenizers": {},
|
104 |
"children": [
|
105 |
{
|
106 |
"name": "Amanab",
|
107 |
"iso_1_code": null,
|
108 |
"iso_3_code": "amn",
|
109 |
-
"tokenizers": {},
|
110 |
"children": [],
|
|
|
111 |
"node_i": "3281",
|
|
|
112 |
"scripts": [
|
113 |
"Latn"
|
114 |
-
]
|
115 |
-
"own_tokenizer": false
|
116 |
},
|
117 |
{
|
118 |
"name": "Daonda",
|
119 |
"iso_1_code": null,
|
120 |
"iso_3_code": "dnd",
|
121 |
-
"tokenizers": {},
|
122 |
"children": [],
|
|
|
123 |
"node_i": "3282",
|
124 |
-
"
|
125 |
-
"
|
126 |
},
|
127 |
{
|
128 |
"name": "Imonda",
|
129 |
"iso_1_code": null,
|
130 |
"iso_3_code": "imn",
|
131 |
-
"tokenizers": {},
|
132 |
"children": [],
|
|
|
133 |
"node_i": "3283",
|
134 |
-
"
|
135 |
-
"
|
136 |
},
|
137 |
{
|
138 |
"name": "Manem",
|
139 |
"iso_1_code": null,
|
140 |
"iso_3_code": "jet",
|
141 |
-
"tokenizers": {},
|
142 |
"children": [],
|
|
|
143 |
"node_i": "3284",
|
144 |
-
"
|
145 |
-
"
|
146 |
},
|
147 |
{
|
148 |
"name": "Auwe",
|
149 |
"iso_1_code": null,
|
150 |
"iso_3_code": "smf",
|
151 |
-
"tokenizers": {},
|
152 |
"children": [],
|
|
|
153 |
"node_i": "3285",
|
154 |
-
"
|
155 |
-
"
|
156 |
},
|
157 |
{
|
158 |
"name": "Viid",
|
159 |
"iso_1_code": null,
|
160 |
"iso_3_code": "snu",
|
161 |
-
"tokenizers": {},
|
162 |
"children": [],
|
|
|
163 |
"node_i": "3286",
|
164 |
-
"
|
165 |
-
"
|
166 |
},
|
167 |
{
|
168 |
"name": "Sowanda",
|
169 |
"iso_1_code": null,
|
170 |
"iso_3_code": "sow",
|
171 |
-
"tokenizers": {},
|
172 |
"children": [],
|
|
|
173 |
"node_i": "3287",
|
174 |
-
"
|
175 |
-
"
|
176 |
},
|
177 |
{
|
178 |
"name": "Waris",
|
179 |
"iso_1_code": null,
|
180 |
"iso_3_code": "wrs",
|
181 |
-
"tokenizers": {},
|
182 |
"children": [],
|
|
|
183 |
"node_i": "3288",
|
|
|
184 |
"scripts": [
|
185 |
"Latn"
|
186 |
-
]
|
187 |
-
"own_tokenizer": false
|
188 |
}
|
189 |
],
|
|
|
190 |
"node_i": "3280",
|
191 |
-
"
|
192 |
-
"
|
193 |
}
|
194 |
],
|
|
|
195 |
"node_i": "3270",
|
196 |
-
"
|
197 |
-
"
|
198 |
}
|
|
|
2 |
"name": "Border",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Bewani",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Ainbai",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "aic",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3272",
|
18 |
+
"native_tokenizers": [],
|
19 |
+
"scripts": []
|
20 |
},
|
21 |
{
|
22 |
"name": "Kilmeri",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": "kih",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3273",
|
28 |
+
"native_tokenizers": [],
|
29 |
+
"scripts": []
|
30 |
},
|
31 |
{
|
32 |
"name": "Ningera",
|
33 |
"iso_1_code": null,
|
34 |
"iso_3_code": "nby",
|
|
|
35 |
"children": [],
|
36 |
+
"tokenizers": {},
|
37 |
"node_i": "3274",
|
38 |
+
"native_tokenizers": [],
|
39 |
+
"scripts": []
|
40 |
},
|
41 |
{
|
42 |
"name": "Pagi",
|
43 |
"iso_1_code": null,
|
44 |
"iso_3_code": "pgi",
|
|
|
45 |
"children": [],
|
46 |
+
"tokenizers": {},
|
47 |
"node_i": "3275",
|
48 |
+
"native_tokenizers": [],
|
49 |
+
"scripts": []
|
50 |
},
|
51 |
{
|
52 |
"name": "Umeda",
|
53 |
"iso_1_code": null,
|
54 |
"iso_3_code": "upi",
|
|
|
55 |
"children": [],
|
56 |
+
"tokenizers": {},
|
57 |
"node_i": "3276",
|
58 |
+
"native_tokenizers": [],
|
59 |
+
"scripts": []
|
60 |
}
|
61 |
],
|
62 |
+
"tokenizers": {},
|
63 |
"node_i": "3271",
|
64 |
+
"native_tokenizers": [],
|
65 |
+
"scripts": []
|
66 |
},
|
67 |
{
|
68 |
"name": "Taikat",
|
69 |
"iso_1_code": null,
|
70 |
"iso_3_code": null,
|
|
|
71 |
"children": [
|
72 |
{
|
73 |
"name": "Taikat",
|
74 |
"iso_1_code": null,
|
75 |
"iso_3_code": "aos",
|
|
|
76 |
"children": [],
|
77 |
+
"tokenizers": {},
|
78 |
"node_i": "3278",
|
79 |
+
"native_tokenizers": [],
|
80 |
+
"scripts": []
|
81 |
},
|
82 |
{
|
83 |
"name": "Awyi",
|
84 |
"iso_1_code": null,
|
85 |
"iso_3_code": "auw",
|
|
|
86 |
"children": [],
|
87 |
+
"tokenizers": {},
|
88 |
"node_i": "3279",
|
89 |
+
"native_tokenizers": [],
|
90 |
+
"scripts": []
|
91 |
}
|
92 |
],
|
93 |
+
"tokenizers": {},
|
94 |
"node_i": "3277",
|
95 |
+
"native_tokenizers": [],
|
96 |
+
"scripts": []
|
97 |
},
|
98 |
{
|
99 |
"name": "Waris",
|
100 |
"iso_1_code": null,
|
101 |
"iso_3_code": null,
|
|
|
102 |
"children": [
|
103 |
{
|
104 |
"name": "Amanab",
|
105 |
"iso_1_code": null,
|
106 |
"iso_3_code": "amn",
|
|
|
107 |
"children": [],
|
108 |
+
"tokenizers": {},
|
109 |
"node_i": "3281",
|
110 |
+
"native_tokenizers": [],
|
111 |
"scripts": [
|
112 |
"Latn"
|
113 |
+
]
|
|
|
114 |
},
|
115 |
{
|
116 |
"name": "Daonda",
|
117 |
"iso_1_code": null,
|
118 |
"iso_3_code": "dnd",
|
|
|
119 |
"children": [],
|
120 |
+
"tokenizers": {},
|
121 |
"node_i": "3282",
|
122 |
+
"native_tokenizers": [],
|
123 |
+
"scripts": []
|
124 |
},
|
125 |
{
|
126 |
"name": "Imonda",
|
127 |
"iso_1_code": null,
|
128 |
"iso_3_code": "imn",
|
|
|
129 |
"children": [],
|
130 |
+
"tokenizers": {},
|
131 |
"node_i": "3283",
|
132 |
+
"native_tokenizers": [],
|
133 |
+
"scripts": []
|
134 |
},
|
135 |
{
|
136 |
"name": "Manem",
|
137 |
"iso_1_code": null,
|
138 |
"iso_3_code": "jet",
|
|
|
139 |
"children": [],
|
140 |
+
"tokenizers": {},
|
141 |
"node_i": "3284",
|
142 |
+
"native_tokenizers": [],
|
143 |
+
"scripts": []
|
144 |
},
|
145 |
{
|
146 |
"name": "Auwe",
|
147 |
"iso_1_code": null,
|
148 |
"iso_3_code": "smf",
|
|
|
149 |
"children": [],
|
150 |
+
"tokenizers": {},
|
151 |
"node_i": "3285",
|
152 |
+
"native_tokenizers": [],
|
153 |
+
"scripts": []
|
154 |
},
|
155 |
{
|
156 |
"name": "Viid",
|
157 |
"iso_1_code": null,
|
158 |
"iso_3_code": "snu",
|
|
|
159 |
"children": [],
|
160 |
+
"tokenizers": {},
|
161 |
"node_i": "3286",
|
162 |
+
"native_tokenizers": [],
|
163 |
+
"scripts": []
|
164 |
},
|
165 |
{
|
166 |
"name": "Sowanda",
|
167 |
"iso_1_code": null,
|
168 |
"iso_3_code": "sow",
|
|
|
169 |
"children": [],
|
170 |
+
"tokenizers": {},
|
171 |
"node_i": "3287",
|
172 |
+
"native_tokenizers": [],
|
173 |
+
"scripts": []
|
174 |
},
|
175 |
{
|
176 |
"name": "Waris",
|
177 |
"iso_1_code": null,
|
178 |
"iso_3_code": "wrs",
|
|
|
179 |
"children": [],
|
180 |
+
"tokenizers": {},
|
181 |
"node_i": "3288",
|
182 |
+
"native_tokenizers": [],
|
183 |
"scripts": [
|
184 |
"Latn"
|
185 |
+
]
|
|
|
186 |
}
|
187 |
],
|
188 |
+
"tokenizers": {},
|
189 |
"node_i": "3280",
|
190 |
+
"native_tokenizers": [],
|
191 |
+
"scripts": []
|
192 |
}
|
193 |
],
|
194 |
+
"tokenizers": {},
|
195 |
"node_i": "3270",
|
196 |
+
"native_tokenizers": [],
|
197 |
+
"scripts": []
|
198 |
}
|
data/Bororoan.json
CHANGED
@@ -2,42 +2,42 @@
|
|
2 |
"name": "Bororoan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Bor\u00f4ro",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "bor",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3290",
|
|
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
-
]
|
17 |
-
"own_tokenizer": false
|
18 |
},
|
19 |
{
|
20 |
"name": "Otuke",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": "otu",
|
23 |
-
"tokenizers": {},
|
24 |
"children": [],
|
|
|
25 |
"node_i": "3291",
|
26 |
-
"
|
27 |
-
"
|
28 |
},
|
29 |
{
|
30 |
"name": "Umot\u00edna",
|
31 |
"iso_1_code": null,
|
32 |
"iso_3_code": "umo",
|
33 |
-
"tokenizers": {},
|
34 |
"children": [],
|
|
|
35 |
"node_i": "3292",
|
36 |
-
"
|
37 |
-
"
|
38 |
}
|
39 |
],
|
|
|
40 |
"node_i": "3289",
|
41 |
-
"
|
42 |
-
"
|
43 |
}
|
|
|
2 |
"name": "Bororoan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Bor\u00f4ro",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "bor",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3290",
|
13 |
+
"native_tokenizers": [],
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
+
]
|
|
|
17 |
},
|
18 |
{
|
19 |
"name": "Otuke",
|
20 |
"iso_1_code": null,
|
21 |
"iso_3_code": "otu",
|
|
|
22 |
"children": [],
|
23 |
+
"tokenizers": {},
|
24 |
"node_i": "3291",
|
25 |
+
"native_tokenizers": [],
|
26 |
+
"scripts": []
|
27 |
},
|
28 |
{
|
29 |
"name": "Umot\u00edna",
|
30 |
"iso_1_code": null,
|
31 |
"iso_3_code": "umo",
|
|
|
32 |
"children": [],
|
33 |
+
"tokenizers": {},
|
34 |
"node_i": "3292",
|
35 |
+
"native_tokenizers": [],
|
36 |
+
"scripts": []
|
37 |
}
|
38 |
],
|
39 |
+
"tokenizers": {},
|
40 |
"node_i": "3289",
|
41 |
+
"native_tokenizers": [],
|
42 |
+
"scripts": []
|
43 |
}
|
data/Botocudoan.json
CHANGED
@@ -2,20 +2,20 @@
|
|
2 |
"name": "Botocudoan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Krenak",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "kqq",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3294",
|
14 |
-
"
|
15 |
-
"
|
16 |
}
|
17 |
],
|
|
|
18 |
"node_i": "3293",
|
19 |
-
"
|
20 |
-
"
|
21 |
}
|
|
|
2 |
"name": "Botocudoan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Krenak",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "kqq",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3294",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
}
|
16 |
],
|
17 |
+
"tokenizers": {},
|
18 |
"node_i": "3293",
|
19 |
+
"native_tokenizers": [],
|
20 |
+
"scripts": []
|
21 |
}
|
data/Caddoan.json
CHANGED
@@ -2,93 +2,93 @@
|
|
2 |
"name": "Caddoan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Caddo",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "cad",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3296",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Northern Caddoan",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": null,
|
21 |
-
"tokenizers": {},
|
22 |
"children": [
|
23 |
{
|
24 |
"name": "Wichita",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "wic",
|
27 |
-
"tokenizers": {},
|
28 |
"children": [],
|
|
|
29 |
"node_i": "3298",
|
30 |
-
"
|
31 |
-
"
|
32 |
},
|
33 |
{
|
34 |
"name": "Kitsai-Proto-Pawnee",
|
35 |
"iso_1_code": null,
|
36 |
"iso_3_code": null,
|
37 |
-
"tokenizers": {},
|
38 |
"children": [
|
39 |
{
|
40 |
"name": "Kitsai",
|
41 |
"iso_1_code": null,
|
42 |
"iso_3_code": "kii",
|
43 |
-
"tokenizers": {},
|
44 |
"children": [],
|
|
|
45 |
"node_i": "3300",
|
46 |
-
"
|
47 |
-
"
|
48 |
},
|
49 |
{
|
50 |
"name": "Proto-Pawnee",
|
51 |
"iso_1_code": null,
|
52 |
"iso_3_code": null,
|
53 |
-
"tokenizers": {},
|
54 |
"children": [
|
55 |
{
|
56 |
"name": "Arikara",
|
57 |
"iso_1_code": null,
|
58 |
"iso_3_code": "ari",
|
59 |
-
"tokenizers": {},
|
60 |
"children": [],
|
|
|
61 |
"node_i": "3302",
|
62 |
-
"
|
63 |
-
"
|
64 |
},
|
65 |
{
|
66 |
"name": "Pawnee",
|
67 |
"iso_1_code": null,
|
68 |
"iso_3_code": "paw",
|
69 |
-
"tokenizers": {},
|
70 |
"children": [],
|
|
|
71 |
"node_i": "3303",
|
72 |
-
"
|
73 |
-
"
|
74 |
}
|
75 |
],
|
|
|
76 |
"node_i": "3301",
|
77 |
-
"
|
78 |
-
"
|
79 |
}
|
80 |
],
|
|
|
81 |
"node_i": "3299",
|
82 |
-
"
|
83 |
-
"
|
84 |
}
|
85 |
],
|
|
|
86 |
"node_i": "3297",
|
87 |
-
"
|
88 |
-
"
|
89 |
}
|
90 |
],
|
|
|
91 |
"node_i": "3295",
|
92 |
-
"
|
93 |
-
"
|
94 |
}
|
|
|
2 |
"name": "Caddoan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Caddo",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "cad",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3296",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Northern Caddoan",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": null,
|
|
|
20 |
"children": [
|
21 |
{
|
22 |
"name": "Wichita",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": "wic",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3298",
|
28 |
+
"native_tokenizers": [],
|
29 |
+
"scripts": []
|
30 |
},
|
31 |
{
|
32 |
"name": "Kitsai-Proto-Pawnee",
|
33 |
"iso_1_code": null,
|
34 |
"iso_3_code": null,
|
|
|
35 |
"children": [
|
36 |
{
|
37 |
"name": "Kitsai",
|
38 |
"iso_1_code": null,
|
39 |
"iso_3_code": "kii",
|
|
|
40 |
"children": [],
|
41 |
+
"tokenizers": {},
|
42 |
"node_i": "3300",
|
43 |
+
"native_tokenizers": [],
|
44 |
+
"scripts": []
|
45 |
},
|
46 |
{
|
47 |
"name": "Proto-Pawnee",
|
48 |
"iso_1_code": null,
|
49 |
"iso_3_code": null,
|
|
|
50 |
"children": [
|
51 |
{
|
52 |
"name": "Arikara",
|
53 |
"iso_1_code": null,
|
54 |
"iso_3_code": "ari",
|
|
|
55 |
"children": [],
|
56 |
+
"tokenizers": {},
|
57 |
"node_i": "3302",
|
58 |
+
"native_tokenizers": [],
|
59 |
+
"scripts": []
|
60 |
},
|
61 |
{
|
62 |
"name": "Pawnee",
|
63 |
"iso_1_code": null,
|
64 |
"iso_3_code": "paw",
|
|
|
65 |
"children": [],
|
66 |
+
"tokenizers": {},
|
67 |
"node_i": "3303",
|
68 |
+
"native_tokenizers": [],
|
69 |
+
"scripts": []
|
70 |
}
|
71 |
],
|
72 |
+
"tokenizers": {},
|
73 |
"node_i": "3301",
|
74 |
+
"native_tokenizers": [],
|
75 |
+
"scripts": []
|
76 |
}
|
77 |
],
|
78 |
+
"tokenizers": {},
|
79 |
"node_i": "3299",
|
80 |
+
"native_tokenizers": [],
|
81 |
+
"scripts": []
|
82 |
}
|
83 |
],
|
84 |
+
"tokenizers": {},
|
85 |
"node_i": "3297",
|
86 |
+
"native_tokenizers": [],
|
87 |
+
"scripts": []
|
88 |
}
|
89 |
],
|
90 |
+
"tokenizers": {},
|
91 |
"node_i": "3295",
|
92 |
+
"native_tokenizers": [],
|
93 |
+
"scripts": []
|
94 |
}
|
data/Cahuapanan.json
CHANGED
@@ -2,32 +2,32 @@
|
|
2 |
"name": "Cahuapanan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Shawi",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "cbt",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3305",
|
|
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
-
]
|
17 |
-
"own_tokenizer": false
|
18 |
},
|
19 |
{
|
20 |
"name": "Jebero",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": "jeb",
|
23 |
-
"tokenizers": {},
|
24 |
"children": [],
|
|
|
25 |
"node_i": "3306",
|
26 |
-
"
|
27 |
-
"
|
28 |
}
|
29 |
],
|
|
|
30 |
"node_i": "3304",
|
31 |
-
"
|
32 |
-
"
|
33 |
}
|
|
|
2 |
"name": "Cahuapanan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Shawi",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "cbt",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3305",
|
13 |
+
"native_tokenizers": [],
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
+
]
|
|
|
17 |
},
|
18 |
{
|
19 |
"name": "Jebero",
|
20 |
"iso_1_code": null,
|
21 |
"iso_3_code": "jeb",
|
|
|
22 |
"children": [],
|
23 |
+
"tokenizers": {},
|
24 |
"node_i": "3306",
|
25 |
+
"native_tokenizers": [],
|
26 |
+
"scripts": []
|
27 |
}
|
28 |
],
|
29 |
+
"tokenizers": {},
|
30 |
"node_i": "3304",
|
31 |
+
"native_tokenizers": [],
|
32 |
+
"scripts": []
|
33 |
}
|
data/Cariban.json
CHANGED
@@ -2,569 +2,569 @@
|
|
2 |
"name": "Cariban",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Carib",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "car",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3308",
|
|
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
-
]
|
17 |
-
"own_tokenizer": false
|
18 |
},
|
19 |
{
|
20 |
"name": "Central",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": null,
|
23 |
-
"tokenizers": {},
|
24 |
"children": [
|
25 |
{
|
26 |
"name": "Apala\u00ed",
|
27 |
"iso_1_code": null,
|
28 |
"iso_3_code": "apy",
|
29 |
-
"tokenizers": {},
|
30 |
"children": [],
|
|
|
31 |
"node_i": "3310",
|
|
|
32 |
"scripts": [
|
33 |
"Latn"
|
34 |
-
]
|
35 |
-
"own_tokenizer": false
|
36 |
},
|
37 |
{
|
38 |
"name": "Cuman\u00e1",
|
39 |
"iso_1_code": null,
|
40 |
"iso_3_code": null,
|
41 |
-
"tokenizers": {},
|
42 |
"children": [
|
43 |
{
|
44 |
"name": "Chaima",
|
45 |
"iso_1_code": null,
|
46 |
"iso_3_code": "ciy",
|
47 |
-
"tokenizers": {},
|
48 |
"children": [],
|
|
|
49 |
"node_i": "3312",
|
50 |
-
"
|
51 |
-
"
|
52 |
},
|
53 |
{
|
54 |
"name": "Cumanagoto",
|
55 |
"iso_1_code": null,
|
56 |
"iso_3_code": "cuo",
|
57 |
-
"tokenizers": {},
|
58 |
"children": [],
|
|
|
59 |
"node_i": "3313",
|
60 |
-
"
|
61 |
-
"
|
62 |
}
|
63 |
],
|
|
|
64 |
"node_i": "3311",
|
65 |
-
"
|
66 |
-
"
|
67 |
},
|
68 |
{
|
69 |
"name": "Makiritare",
|
70 |
"iso_1_code": null,
|
71 |
"iso_3_code": null,
|
72 |
-
"tokenizers": {},
|
73 |
"children": [
|
74 |
{
|
75 |
"name": "Maquiritari",
|
76 |
"iso_1_code": null,
|
77 |
"iso_3_code": "mch",
|
78 |
-
"tokenizers": {},
|
79 |
"children": [],
|
|
|
80 |
"node_i": "3315",
|
81 |
-
"
|
82 |
-
"
|
83 |
}
|
84 |
],
|
|
|
85 |
"node_i": "3314",
|
86 |
-
"
|
87 |
-
"
|
88 |
},
|
89 |
{
|
90 |
"name": "Mapoyo-Yavarana",
|
91 |
"iso_1_code": null,
|
92 |
"iso_3_code": null,
|
93 |
-
"tokenizers": {},
|
94 |
"children": [
|
95 |
{
|
96 |
"name": "Mapoyo",
|
97 |
"iso_1_code": null,
|
98 |
"iso_3_code": "mcg",
|
99 |
-
"tokenizers": {},
|
100 |
"children": [],
|
|
|
101 |
"node_i": "3317",
|
102 |
-
"
|
103 |
-
"
|
104 |
},
|
105 |
{
|
106 |
"name": "P\u00e9mono",
|
107 |
"iso_1_code": null,
|
108 |
"iso_3_code": "pev",
|
109 |
-
"tokenizers": {},
|
110 |
"children": [],
|
|
|
111 |
"node_i": "3318",
|
112 |
-
"
|
113 |
-
"
|
114 |
},
|
115 |
{
|
116 |
"name": "Tamanaku",
|
117 |
"iso_1_code": null,
|
118 |
"iso_3_code": "tmz",
|
119 |
-
"tokenizers": {},
|
120 |
"children": [],
|
|
|
121 |
"node_i": "3319",
|
122 |
-
"
|
123 |
-
"
|
124 |
},
|
125 |
{
|
126 |
"name": "Yabarana",
|
127 |
"iso_1_code": null,
|
128 |
"iso_3_code": "yar",
|
129 |
-
"tokenizers": {},
|
130 |
"children": [],
|
|
|
131 |
"node_i": "3320",
|
132 |
-
"
|
133 |
-
"
|
134 |
}
|
135 |
],
|
|
|
136 |
"node_i": "3316",
|
137 |
-
"
|
138 |
-
"
|
139 |
},
|
140 |
{
|
141 |
"name": "Wayana",
|
142 |
"iso_1_code": null,
|
143 |
"iso_3_code": null,
|
144 |
-
"tokenizers": {},
|
145 |
"children": [
|
146 |
{
|
147 |
"name": "Kaxui\u00e2na",
|
148 |
"iso_1_code": null,
|
149 |
"iso_3_code": "kbb",
|
150 |
-
"tokenizers": {},
|
151 |
"children": [],
|
|
|
152 |
"node_i": "3322",
|
153 |
-
"
|
154 |
-
"
|
155 |
},
|
156 |
{
|
157 |
"name": "Wayana",
|
158 |
"iso_1_code": null,
|
159 |
"iso_3_code": "way",
|
160 |
-
"tokenizers": {},
|
161 |
"children": [],
|
|
|
162 |
"node_i": "3323",
|
|
|
163 |
"scripts": [
|
164 |
"Latn"
|
165 |
-
]
|
166 |
-
"own_tokenizer": false
|
167 |
}
|
168 |
],
|
|
|
169 |
"node_i": "3321",
|
170 |
-
"
|
171 |
-
"
|
172 |
}
|
173 |
],
|
|
|
174 |
"node_i": "3309",
|
175 |
-
"
|
176 |
-
"
|
177 |
},
|
178 |
{
|
179 |
"name": "Kashuyana",
|
180 |
"iso_1_code": null,
|
181 |
"iso_3_code": null,
|
182 |
-
"tokenizers": {},
|
183 |
"children": [
|
184 |
{
|
185 |
"name": "Sikiana",
|
186 |
"iso_1_code": null,
|
187 |
"iso_3_code": "sik",
|
188 |
-
"tokenizers": {},
|
189 |
"children": [],
|
|
|
190 |
"node_i": "3325",
|
191 |
-
"
|
192 |
-
"
|
193 |
}
|
194 |
],
|
|
|
195 |
"node_i": "3324",
|
196 |
-
"
|
197 |
-
"
|
198 |
},
|
199 |
{
|
200 |
"name": "North Amazonian",
|
201 |
"iso_1_code": null,
|
202 |
"iso_3_code": null,
|
203 |
-
"tokenizers": {},
|
204 |
"children": [
|
205 |
{
|
206 |
"name": "Pem\u00f3n",
|
207 |
"iso_1_code": null,
|
208 |
"iso_3_code": null,
|
209 |
-
"tokenizers": {},
|
210 |
"children": [
|
211 |
{
|
212 |
"name": "Pem\u00f3n proper",
|
213 |
"iso_1_code": null,
|
214 |
"iso_3_code": null,
|
215 |
-
"tokenizers": {},
|
216 |
"children": [
|
217 |
{
|
218 |
"name": "Pemon",
|
219 |
"iso_1_code": null,
|
220 |
"iso_3_code": "aoc",
|
221 |
-
"tokenizers": {},
|
222 |
"children": [],
|
|
|
223 |
"node_i": "3329",
|
224 |
-
"
|
225 |
-
"
|
226 |
},
|
227 |
{
|
228 |
"name": "Macushi",
|
229 |
"iso_1_code": null,
|
230 |
"iso_3_code": "mbc",
|
231 |
-
"tokenizers": {},
|
232 |
"children": [],
|
|
|
233 |
"node_i": "3330",
|
|
|
234 |
"scripts": [
|
235 |
"Latn"
|
236 |
-
]
|
237 |
-
"own_tokenizer": false
|
238 |
},
|
239 |
{
|
240 |
"name": "Kapong",
|
241 |
"iso_1_code": null,
|
242 |
"iso_3_code": null,
|
243 |
-
"tokenizers": {},
|
244 |
"children": [
|
245 |
{
|
246 |
"name": "Akawaio",
|
247 |
"iso_1_code": null,
|
248 |
"iso_3_code": "ake",
|
249 |
-
"tokenizers": {},
|
250 |
"children": [],
|
|
|
251 |
"node_i": "3332",
|
|
|
252 |
"scripts": [
|
253 |
"Latn"
|
254 |
-
]
|
255 |
-
"own_tokenizer": false
|
256 |
},
|
257 |
{
|
258 |
"name": "Patamona",
|
259 |
"iso_1_code": null,
|
260 |
"iso_3_code": "pbc",
|
261 |
-
"tokenizers": {},
|
262 |
"children": [],
|
|
|
263 |
"node_i": "3333",
|
|
|
264 |
"scripts": [
|
265 |
"Latn"
|
266 |
-
]
|
267 |
-
"own_tokenizer": false
|
268 |
}
|
269 |
],
|
|
|
270 |
"node_i": "3331",
|
271 |
-
"
|
272 |
-
"
|
273 |
}
|
274 |
],
|
|
|
275 |
"node_i": "3328",
|
276 |
-
"
|
277 |
-
"
|
278 |
}
|
279 |
],
|
|
|
280 |
"node_i": "3327",
|
281 |
-
"
|
282 |
-
"
|
283 |
},
|
284 |
{
|
285 |
"name": "Yawaper\u00ed",
|
286 |
"iso_1_code": null,
|
287 |
"iso_3_code": null,
|
288 |
-
"tokenizers": {},
|
289 |
"children": [
|
290 |
{
|
291 |
"name": "Waimiri-Atroar\u00ed",
|
292 |
"iso_1_code": null,
|
293 |
"iso_3_code": "atr",
|
294 |
-
"tokenizers": {},
|
295 |
"children": [],
|
|
|
296 |
"node_i": "3335",
|
297 |
-
"
|
298 |
-
"
|
299 |
}
|
300 |
],
|
|
|
301 |
"node_i": "3334",
|
302 |
-
"
|
303 |
-
"
|
304 |
}
|
305 |
],
|
|
|
306 |
"node_i": "3326",
|
307 |
-
"
|
308 |
-
"
|
309 |
},
|
310 |
{
|
311 |
"name": "South Amazonian",
|
312 |
"iso_1_code": null,
|
313 |
"iso_3_code": null,
|
314 |
-
"tokenizers": {},
|
315 |
"children": [
|
316 |
{
|
317 |
"name": "E\u2019\u00f1apa Woromaipu",
|
318 |
"iso_1_code": null,
|
319 |
"iso_3_code": "pbh",
|
320 |
-
"tokenizers": {},
|
321 |
"children": [],
|
|
|
322 |
"node_i": "3337",
|
323 |
-
"
|
324 |
-
"
|
325 |
},
|
326 |
{
|
327 |
"name": "Arara",
|
328 |
"iso_1_code": null,
|
329 |
"iso_3_code": null,
|
330 |
-
"tokenizers": {},
|
331 |
"children": [
|
332 |
{
|
333 |
"name": "Arara, Par\u00e1",
|
334 |
"iso_1_code": null,
|
335 |
"iso_3_code": "aap",
|
336 |
-
"tokenizers": {},
|
337 |
"children": [],
|
|
|
338 |
"node_i": "3339",
|
339 |
-
"
|
340 |
-
"
|
341 |
},
|
342 |
{
|
343 |
"name": "Ikpeng",
|
344 |
"iso_1_code": null,
|
345 |
"iso_3_code": "txi",
|
346 |
-
"tokenizers": {},
|
347 |
"children": [],
|
|
|
348 |
"node_i": "3340",
|
349 |
-
"
|
350 |
-
"
|
351 |
}
|
352 |
],
|
|
|
353 |
"node_i": "3338",
|
354 |
-
"
|
355 |
-
"
|
356 |
},
|
357 |
{
|
358 |
"name": "Bakair\u00ed",
|
359 |
"iso_1_code": null,
|
360 |
"iso_3_code": null,
|
361 |
-
"tokenizers": {},
|
362 |
"children": [
|
363 |
{
|
364 |
"name": "Bakair\u00ed",
|
365 |
"iso_1_code": null,
|
366 |
"iso_3_code": "bkq",
|
367 |
-
"tokenizers": {},
|
368 |
"children": [],
|
|
|
369 |
"node_i": "3342",
|
|
|
370 |
"scripts": [
|
371 |
"Latn"
|
372 |
-
]
|
373 |
-
"own_tokenizer": false
|
374 |
},
|
375 |
{
|
376 |
"name": "Amonap",
|
377 |
"iso_1_code": null,
|
378 |
"iso_3_code": null,
|
379 |
-
"tokenizers": {},
|
380 |
"children": [
|
381 |
{
|
382 |
"name": "Kuik\u00faro-Kalap\u00e1lo",
|
383 |
"iso_1_code": null,
|
384 |
"iso_3_code": "kui",
|
385 |
-
"tokenizers": {},
|
386 |
"children": [],
|
|
|
387 |
"node_i": "3344",
|
388 |
-
"
|
389 |
-
"
|
390 |
},
|
391 |
{
|
392 |
"name": "Matipuhy",
|
393 |
"iso_1_code": null,
|
394 |
"iso_3_code": "mzo",
|
395 |
-
"tokenizers": {},
|
396 |
"children": [],
|
|
|
397 |
"node_i": "3345",
|
398 |
-
"
|
399 |
-
"
|
400 |
}
|
401 |
],
|
|
|
402 |
"node_i": "3343",
|
403 |
-
"
|
404 |
-
"
|
405 |
}
|
406 |
],
|
|
|
407 |
"node_i": "3341",
|
408 |
-
"
|
409 |
-
"
|
410 |
}
|
411 |
],
|
|
|
412 |
"node_i": "3336",
|
413 |
-
"
|
414 |
-
"
|
415 |
},
|
416 |
{
|
417 |
"name": "Tiriy\u00f3",
|
418 |
"iso_1_code": null,
|
419 |
"iso_3_code": null,
|
420 |
-
"tokenizers": {},
|
421 |
"children": [
|
422 |
{
|
423 |
"name": "Salum\u00e1",
|
424 |
"iso_1_code": null,
|
425 |
"iso_3_code": "slj",
|
426 |
-
"tokenizers": {},
|
427 |
"children": [],
|
|
|
428 |
"node_i": "3347",
|
429 |
-
"
|
430 |
-
"
|
431 |
},
|
432 |
{
|
433 |
"name": "Karihona",
|
434 |
"iso_1_code": null,
|
435 |
"iso_3_code": null,
|
436 |
-
"tokenizers": {},
|
437 |
"children": [
|
438 |
{
|
439 |
"name": "Carijona",
|
440 |
"iso_1_code": null,
|
441 |
"iso_3_code": "cbd",
|
442 |
-
"tokenizers": {},
|
443 |
"children": [],
|
|
|
444 |
"node_i": "3349",
|
445 |
-
"
|
446 |
-
"
|
447 |
}
|
448 |
],
|
|
|
449 |
"node_i": "3348",
|
450 |
-
"
|
451 |
-
"
|
452 |
},
|
453 |
{
|
454 |
"name": "Tiriy\u00f3",
|
455 |
"iso_1_code": null,
|
456 |
"iso_3_code": null,
|
457 |
-
"tokenizers": {},
|
458 |
"children": [
|
459 |
{
|
460 |
"name": "Akurio",
|
461 |
"iso_1_code": null,
|
462 |
"iso_3_code": "ako",
|
463 |
-
"tokenizers": {},
|
464 |
"children": [],
|
|
|
465 |
"node_i": "3351",
|
466 |
-
"
|
467 |
-
"
|
468 |
},
|
469 |
{
|
470 |
"name": "Tri\u00f3",
|
471 |
"iso_1_code": null,
|
472 |
"iso_3_code": "tri",
|
473 |
-
"tokenizers": {},
|
474 |
"children": [],
|
|
|
475 |
"node_i": "3352",
|
476 |
-
"
|
477 |
-
"
|
478 |
}
|
479 |
],
|
|
|
480 |
"node_i": "3350",
|
481 |
-
"
|
482 |
-
"
|
483 |
}
|
484 |
],
|
|
|
485 |
"node_i": "3346",
|
486 |
-
"
|
487 |
-
"
|
488 |
},
|
489 |
{
|
490 |
"name": "Waiwai",
|
491 |
"iso_1_code": null,
|
492 |
"iso_3_code": null,
|
493 |
-
"tokenizers": {},
|
494 |
"children": [
|
495 |
{
|
496 |
"name": "Hixkary\u00e1na",
|
497 |
"iso_1_code": null,
|
498 |
"iso_3_code": "hix",
|
499 |
-
"tokenizers": {},
|
500 |
"children": [],
|
|
|
501 |
"node_i": "3354",
|
|
|
502 |
"scripts": [
|
503 |
"Latn"
|
504 |
-
]
|
505 |
-
"own_tokenizer": false
|
506 |
},
|
507 |
{
|
508 |
"name": "Waiwai",
|
509 |
"iso_1_code": null,
|
510 |
"iso_3_code": "waw",
|
511 |
-
"tokenizers": {},
|
512 |
"children": [],
|
|
|
513 |
"node_i": "3355",
|
514 |
-
"
|
515 |
-
"
|
516 |
}
|
517 |
],
|
|
|
518 |
"node_i": "3353",
|
519 |
-
"
|
520 |
-
"
|
521 |
},
|
522 |
{
|
523 |
"name": "Yukpa",
|
524 |
"iso_1_code": null,
|
525 |
"iso_3_code": null,
|
526 |
-
"tokenizers": {},
|
527 |
"children": [
|
528 |
{
|
529 |
"name": "Yucpa-Yapreria",
|
530 |
"iso_1_code": null,
|
531 |
"iso_3_code": null,
|
532 |
-
"tokenizers": {},
|
533 |
"children": [
|
534 |
{
|
535 |
"name": "Japreria",
|
536 |
"iso_1_code": null,
|
537 |
"iso_3_code": "jru",
|
538 |
-
"tokenizers": {},
|
539 |
"children": [],
|
|
|
540 |
"node_i": "3358",
|
541 |
-
"
|
542 |
-
"
|
543 |
},
|
544 |
{
|
545 |
"name": "Yukpa",
|
546 |
"iso_1_code": null,
|
547 |
"iso_3_code": "yup",
|
548 |
-
"tokenizers": {},
|
549 |
"children": [],
|
|
|
550 |
"node_i": "3359",
|
|
|
551 |
"scripts": [
|
552 |
"Latn"
|
553 |
-
]
|
554 |
-
"own_tokenizer": false
|
555 |
}
|
556 |
],
|
|
|
557 |
"node_i": "3357",
|
558 |
-
"
|
559 |
-
"
|
560 |
}
|
561 |
],
|
|
|
562 |
"node_i": "3356",
|
563 |
-
"
|
564 |
-
"
|
565 |
}
|
566 |
],
|
|
|
567 |
"node_i": "3307",
|
568 |
-
"
|
569 |
-
"
|
570 |
}
|
|
|
2 |
"name": "Cariban",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Carib",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "car",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3308",
|
13 |
+
"native_tokenizers": [],
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
+
]
|
|
|
17 |
},
|
18 |
{
|
19 |
"name": "Central",
|
20 |
"iso_1_code": null,
|
21 |
"iso_3_code": null,
|
|
|
22 |
"children": [
|
23 |
{
|
24 |
"name": "Apala\u00ed",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "apy",
|
|
|
27 |
"children": [],
|
28 |
+
"tokenizers": {},
|
29 |
"node_i": "3310",
|
30 |
+
"native_tokenizers": [],
|
31 |
"scripts": [
|
32 |
"Latn"
|
33 |
+
]
|
|
|
34 |
},
|
35 |
{
|
36 |
"name": "Cuman\u00e1",
|
37 |
"iso_1_code": null,
|
38 |
"iso_3_code": null,
|
|
|
39 |
"children": [
|
40 |
{
|
41 |
"name": "Chaima",
|
42 |
"iso_1_code": null,
|
43 |
"iso_3_code": "ciy",
|
|
|
44 |
"children": [],
|
45 |
+
"tokenizers": {},
|
46 |
"node_i": "3312",
|
47 |
+
"native_tokenizers": [],
|
48 |
+
"scripts": []
|
49 |
},
|
50 |
{
|
51 |
"name": "Cumanagoto",
|
52 |
"iso_1_code": null,
|
53 |
"iso_3_code": "cuo",
|
|
|
54 |
"children": [],
|
55 |
+
"tokenizers": {},
|
56 |
"node_i": "3313",
|
57 |
+
"native_tokenizers": [],
|
58 |
+
"scripts": []
|
59 |
}
|
60 |
],
|
61 |
+
"tokenizers": {},
|
62 |
"node_i": "3311",
|
63 |
+
"native_tokenizers": [],
|
64 |
+
"scripts": []
|
65 |
},
|
66 |
{
|
67 |
"name": "Makiritare",
|
68 |
"iso_1_code": null,
|
69 |
"iso_3_code": null,
|
|
|
70 |
"children": [
|
71 |
{
|
72 |
"name": "Maquiritari",
|
73 |
"iso_1_code": null,
|
74 |
"iso_3_code": "mch",
|
|
|
75 |
"children": [],
|
76 |
+
"tokenizers": {},
|
77 |
"node_i": "3315",
|
78 |
+
"native_tokenizers": [],
|
79 |
+
"scripts": []
|
80 |
}
|
81 |
],
|
82 |
+
"tokenizers": {},
|
83 |
"node_i": "3314",
|
84 |
+
"native_tokenizers": [],
|
85 |
+
"scripts": []
|
86 |
},
|
87 |
{
|
88 |
"name": "Mapoyo-Yavarana",
|
89 |
"iso_1_code": null,
|
90 |
"iso_3_code": null,
|
|
|
91 |
"children": [
|
92 |
{
|
93 |
"name": "Mapoyo",
|
94 |
"iso_1_code": null,
|
95 |
"iso_3_code": "mcg",
|
|
|
96 |
"children": [],
|
97 |
+
"tokenizers": {},
|
98 |
"node_i": "3317",
|
99 |
+
"native_tokenizers": [],
|
100 |
+
"scripts": []
|
101 |
},
|
102 |
{
|
103 |
"name": "P\u00e9mono",
|
104 |
"iso_1_code": null,
|
105 |
"iso_3_code": "pev",
|
|
|
106 |
"children": [],
|
107 |
+
"tokenizers": {},
|
108 |
"node_i": "3318",
|
109 |
+
"native_tokenizers": [],
|
110 |
+
"scripts": []
|
111 |
},
|
112 |
{
|
113 |
"name": "Tamanaku",
|
114 |
"iso_1_code": null,
|
115 |
"iso_3_code": "tmz",
|
|
|
116 |
"children": [],
|
117 |
+
"tokenizers": {},
|
118 |
"node_i": "3319",
|
119 |
+
"native_tokenizers": [],
|
120 |
+
"scripts": []
|
121 |
},
|
122 |
{
|
123 |
"name": "Yabarana",
|
124 |
"iso_1_code": null,
|
125 |
"iso_3_code": "yar",
|
|
|
126 |
"children": [],
|
127 |
+
"tokenizers": {},
|
128 |
"node_i": "3320",
|
129 |
+
"native_tokenizers": [],
|
130 |
+
"scripts": []
|
131 |
}
|
132 |
],
|
133 |
+
"tokenizers": {},
|
134 |
"node_i": "3316",
|
135 |
+
"native_tokenizers": [],
|
136 |
+
"scripts": []
|
137 |
},
|
138 |
{
|
139 |
"name": "Wayana",
|
140 |
"iso_1_code": null,
|
141 |
"iso_3_code": null,
|
|
|
142 |
"children": [
|
143 |
{
|
144 |
"name": "Kaxui\u00e2na",
|
145 |
"iso_1_code": null,
|
146 |
"iso_3_code": "kbb",
|
|
|
147 |
"children": [],
|
148 |
+
"tokenizers": {},
|
149 |
"node_i": "3322",
|
150 |
+
"native_tokenizers": [],
|
151 |
+
"scripts": []
|
152 |
},
|
153 |
{
|
154 |
"name": "Wayana",
|
155 |
"iso_1_code": null,
|
156 |
"iso_3_code": "way",
|
|
|
157 |
"children": [],
|
158 |
+
"tokenizers": {},
|
159 |
"node_i": "3323",
|
160 |
+
"native_tokenizers": [],
|
161 |
"scripts": [
|
162 |
"Latn"
|
163 |
+
]
|
|
|
164 |
}
|
165 |
],
|
166 |
+
"tokenizers": {},
|
167 |
"node_i": "3321",
|
168 |
+
"native_tokenizers": [],
|
169 |
+
"scripts": []
|
170 |
}
|
171 |
],
|
172 |
+
"tokenizers": {},
|
173 |
"node_i": "3309",
|
174 |
+
"native_tokenizers": [],
|
175 |
+
"scripts": []
|
176 |
},
|
177 |
{
|
178 |
"name": "Kashuyana",
|
179 |
"iso_1_code": null,
|
180 |
"iso_3_code": null,
|
|
|
181 |
"children": [
|
182 |
{
|
183 |
"name": "Sikiana",
|
184 |
"iso_1_code": null,
|
185 |
"iso_3_code": "sik",
|
|
|
186 |
"children": [],
|
187 |
+
"tokenizers": {},
|
188 |
"node_i": "3325",
|
189 |
+
"native_tokenizers": [],
|
190 |
+
"scripts": []
|
191 |
}
|
192 |
],
|
193 |
+
"tokenizers": {},
|
194 |
"node_i": "3324",
|
195 |
+
"native_tokenizers": [],
|
196 |
+
"scripts": []
|
197 |
},
|
198 |
{
|
199 |
"name": "North Amazonian",
|
200 |
"iso_1_code": null,
|
201 |
"iso_3_code": null,
|
|
|
202 |
"children": [
|
203 |
{
|
204 |
"name": "Pem\u00f3n",
|
205 |
"iso_1_code": null,
|
206 |
"iso_3_code": null,
|
|
|
207 |
"children": [
|
208 |
{
|
209 |
"name": "Pem\u00f3n proper",
|
210 |
"iso_1_code": null,
|
211 |
"iso_3_code": null,
|
|
|
212 |
"children": [
|
213 |
{
|
214 |
"name": "Pemon",
|
215 |
"iso_1_code": null,
|
216 |
"iso_3_code": "aoc",
|
|
|
217 |
"children": [],
|
218 |
+
"tokenizers": {},
|
219 |
"node_i": "3329",
|
220 |
+
"native_tokenizers": [],
|
221 |
+
"scripts": []
|
222 |
},
|
223 |
{
|
224 |
"name": "Macushi",
|
225 |
"iso_1_code": null,
|
226 |
"iso_3_code": "mbc",
|
|
|
227 |
"children": [],
|
228 |
+
"tokenizers": {},
|
229 |
"node_i": "3330",
|
230 |
+
"native_tokenizers": [],
|
231 |
"scripts": [
|
232 |
"Latn"
|
233 |
+
]
|
|
|
234 |
},
|
235 |
{
|
236 |
"name": "Kapong",
|
237 |
"iso_1_code": null,
|
238 |
"iso_3_code": null,
|
|
|
239 |
"children": [
|
240 |
{
|
241 |
"name": "Akawaio",
|
242 |
"iso_1_code": null,
|
243 |
"iso_3_code": "ake",
|
|
|
244 |
"children": [],
|
245 |
+
"tokenizers": {},
|
246 |
"node_i": "3332",
|
247 |
+
"native_tokenizers": [],
|
248 |
"scripts": [
|
249 |
"Latn"
|
250 |
+
]
|
|
|
251 |
},
|
252 |
{
|
253 |
"name": "Patamona",
|
254 |
"iso_1_code": null,
|
255 |
"iso_3_code": "pbc",
|
|
|
256 |
"children": [],
|
257 |
+
"tokenizers": {},
|
258 |
"node_i": "3333",
|
259 |
+
"native_tokenizers": [],
|
260 |
"scripts": [
|
261 |
"Latn"
|
262 |
+
]
|
|
|
263 |
}
|
264 |
],
|
265 |
+
"tokenizers": {},
|
266 |
"node_i": "3331",
|
267 |
+
"native_tokenizers": [],
|
268 |
+
"scripts": []
|
269 |
}
|
270 |
],
|
271 |
+
"tokenizers": {},
|
272 |
"node_i": "3328",
|
273 |
+
"native_tokenizers": [],
|
274 |
+
"scripts": []
|
275 |
}
|
276 |
],
|
277 |
+
"tokenizers": {},
|
278 |
"node_i": "3327",
|
279 |
+
"native_tokenizers": [],
|
280 |
+
"scripts": []
|
281 |
},
|
282 |
{
|
283 |
"name": "Yawaper\u00ed",
|
284 |
"iso_1_code": null,
|
285 |
"iso_3_code": null,
|
|
|
286 |
"children": [
|
287 |
{
|
288 |
"name": "Waimiri-Atroar\u00ed",
|
289 |
"iso_1_code": null,
|
290 |
"iso_3_code": "atr",
|
|
|
291 |
"children": [],
|
292 |
+
"tokenizers": {},
|
293 |
"node_i": "3335",
|
294 |
+
"native_tokenizers": [],
|
295 |
+
"scripts": []
|
296 |
}
|
297 |
],
|
298 |
+
"tokenizers": {},
|
299 |
"node_i": "3334",
|
300 |
+
"native_tokenizers": [],
|
301 |
+
"scripts": []
|
302 |
}
|
303 |
],
|
304 |
+
"tokenizers": {},
|
305 |
"node_i": "3326",
|
306 |
+
"native_tokenizers": [],
|
307 |
+
"scripts": []
|
308 |
},
|
309 |
{
|
310 |
"name": "South Amazonian",
|
311 |
"iso_1_code": null,
|
312 |
"iso_3_code": null,
|
|
|
313 |
"children": [
|
314 |
{
|
315 |
"name": "E\u2019\u00f1apa Woromaipu",
|
316 |
"iso_1_code": null,
|
317 |
"iso_3_code": "pbh",
|
|
|
318 |
"children": [],
|
319 |
+
"tokenizers": {},
|
320 |
"node_i": "3337",
|
321 |
+
"native_tokenizers": [],
|
322 |
+
"scripts": []
|
323 |
},
|
324 |
{
|
325 |
"name": "Arara",
|
326 |
"iso_1_code": null,
|
327 |
"iso_3_code": null,
|
|
|
328 |
"children": [
|
329 |
{
|
330 |
"name": "Arara, Par\u00e1",
|
331 |
"iso_1_code": null,
|
332 |
"iso_3_code": "aap",
|
|
|
333 |
"children": [],
|
334 |
+
"tokenizers": {},
|
335 |
"node_i": "3339",
|
336 |
+
"native_tokenizers": [],
|
337 |
+
"scripts": []
|
338 |
},
|
339 |
{
|
340 |
"name": "Ikpeng",
|
341 |
"iso_1_code": null,
|
342 |
"iso_3_code": "txi",
|
|
|
343 |
"children": [],
|
344 |
+
"tokenizers": {},
|
345 |
"node_i": "3340",
|
346 |
+
"native_tokenizers": [],
|
347 |
+
"scripts": []
|
348 |
}
|
349 |
],
|
350 |
+
"tokenizers": {},
|
351 |
"node_i": "3338",
|
352 |
+
"native_tokenizers": [],
|
353 |
+
"scripts": []
|
354 |
},
|
355 |
{
|
356 |
"name": "Bakair\u00ed",
|
357 |
"iso_1_code": null,
|
358 |
"iso_3_code": null,
|
|
|
359 |
"children": [
|
360 |
{
|
361 |
"name": "Bakair\u00ed",
|
362 |
"iso_1_code": null,
|
363 |
"iso_3_code": "bkq",
|
|
|
364 |
"children": [],
|
365 |
+
"tokenizers": {},
|
366 |
"node_i": "3342",
|
367 |
+
"native_tokenizers": [],
|
368 |
"scripts": [
|
369 |
"Latn"
|
370 |
+
]
|
|
|
371 |
},
|
372 |
{
|
373 |
"name": "Amonap",
|
374 |
"iso_1_code": null,
|
375 |
"iso_3_code": null,
|
|
|
376 |
"children": [
|
377 |
{
|
378 |
"name": "Kuik\u00faro-Kalap\u00e1lo",
|
379 |
"iso_1_code": null,
|
380 |
"iso_3_code": "kui",
|
|
|
381 |
"children": [],
|
382 |
+
"tokenizers": {},
|
383 |
"node_i": "3344",
|
384 |
+
"native_tokenizers": [],
|
385 |
+
"scripts": []
|
386 |
},
|
387 |
{
|
388 |
"name": "Matipuhy",
|
389 |
"iso_1_code": null,
|
390 |
"iso_3_code": "mzo",
|
|
|
391 |
"children": [],
|
392 |
+
"tokenizers": {},
|
393 |
"node_i": "3345",
|
394 |
+
"native_tokenizers": [],
|
395 |
+
"scripts": []
|
396 |
}
|
397 |
],
|
398 |
+
"tokenizers": {},
|
399 |
"node_i": "3343",
|
400 |
+
"native_tokenizers": [],
|
401 |
+
"scripts": []
|
402 |
}
|
403 |
],
|
404 |
+
"tokenizers": {},
|
405 |
"node_i": "3341",
|
406 |
+
"native_tokenizers": [],
|
407 |
+
"scripts": []
|
408 |
}
|
409 |
],
|
410 |
+
"tokenizers": {},
|
411 |
"node_i": "3336",
|
412 |
+
"native_tokenizers": [],
|
413 |
+
"scripts": []
|
414 |
},
|
415 |
{
|
416 |
"name": "Tiriy\u00f3",
|
417 |
"iso_1_code": null,
|
418 |
"iso_3_code": null,
|
|
|
419 |
"children": [
|
420 |
{
|
421 |
"name": "Salum\u00e1",
|
422 |
"iso_1_code": null,
|
423 |
"iso_3_code": "slj",
|
|
|
424 |
"children": [],
|
425 |
+
"tokenizers": {},
|
426 |
"node_i": "3347",
|
427 |
+
"native_tokenizers": [],
|
428 |
+
"scripts": []
|
429 |
},
|
430 |
{
|
431 |
"name": "Karihona",
|
432 |
"iso_1_code": null,
|
433 |
"iso_3_code": null,
|
|
|
434 |
"children": [
|
435 |
{
|
436 |
"name": "Carijona",
|
437 |
"iso_1_code": null,
|
438 |
"iso_3_code": "cbd",
|
|
|
439 |
"children": [],
|
440 |
+
"tokenizers": {},
|
441 |
"node_i": "3349",
|
442 |
+
"native_tokenizers": [],
|
443 |
+
"scripts": []
|
444 |
}
|
445 |
],
|
446 |
+
"tokenizers": {},
|
447 |
"node_i": "3348",
|
448 |
+
"native_tokenizers": [],
|
449 |
+
"scripts": []
|
450 |
},
|
451 |
{
|
452 |
"name": "Tiriy\u00f3",
|
453 |
"iso_1_code": null,
|
454 |
"iso_3_code": null,
|
|
|
455 |
"children": [
|
456 |
{
|
457 |
"name": "Akurio",
|
458 |
"iso_1_code": null,
|
459 |
"iso_3_code": "ako",
|
|
|
460 |
"children": [],
|
461 |
+
"tokenizers": {},
|
462 |
"node_i": "3351",
|
463 |
+
"native_tokenizers": [],
|
464 |
+
"scripts": []
|
465 |
},
|
466 |
{
|
467 |
"name": "Tri\u00f3",
|
468 |
"iso_1_code": null,
|
469 |
"iso_3_code": "tri",
|
|
|
470 |
"children": [],
|
471 |
+
"tokenizers": {},
|
472 |
"node_i": "3352",
|
473 |
+
"native_tokenizers": [],
|
474 |
+
"scripts": []
|
475 |
}
|
476 |
],
|
477 |
+
"tokenizers": {},
|
478 |
"node_i": "3350",
|
479 |
+
"native_tokenizers": [],
|
480 |
+
"scripts": []
|
481 |
}
|
482 |
],
|
483 |
+
"tokenizers": {},
|
484 |
"node_i": "3346",
|
485 |
+
"native_tokenizers": [],
|
486 |
+
"scripts": []
|
487 |
},
|
488 |
{
|
489 |
"name": "Waiwai",
|
490 |
"iso_1_code": null,
|
491 |
"iso_3_code": null,
|
|
|
492 |
"children": [
|
493 |
{
|
494 |
"name": "Hixkary\u00e1na",
|
495 |
"iso_1_code": null,
|
496 |
"iso_3_code": "hix",
|
|
|
497 |
"children": [],
|
498 |
+
"tokenizers": {},
|
499 |
"node_i": "3354",
|
500 |
+
"native_tokenizers": [],
|
501 |
"scripts": [
|
502 |
"Latn"
|
503 |
+
]
|
|
|
504 |
},
|
505 |
{
|
506 |
"name": "Waiwai",
|
507 |
"iso_1_code": null,
|
508 |
"iso_3_code": "waw",
|
|
|
509 |
"children": [],
|
510 |
+
"tokenizers": {},
|
511 |
"node_i": "3355",
|
512 |
+
"native_tokenizers": [],
|
513 |
+
"scripts": []
|
514 |
}
|
515 |
],
|
516 |
+
"tokenizers": {},
|
517 |
"node_i": "3353",
|
518 |
+
"native_tokenizers": [],
|
519 |
+
"scripts": []
|
520 |
},
|
521 |
{
|
522 |
"name": "Yukpa",
|
523 |
"iso_1_code": null,
|
524 |
"iso_3_code": null,
|
|
|
525 |
"children": [
|
526 |
{
|
527 |
"name": "Yucpa-Yapreria",
|
528 |
"iso_1_code": null,
|
529 |
"iso_3_code": null,
|
|
|
530 |
"children": [
|
531 |
{
|
532 |
"name": "Japreria",
|
533 |
"iso_1_code": null,
|
534 |
"iso_3_code": "jru",
|
|
|
535 |
"children": [],
|
536 |
+
"tokenizers": {},
|
537 |
"node_i": "3358",
|
538 |
+
"native_tokenizers": [],
|
539 |
+
"scripts": []
|
540 |
},
|
541 |
{
|
542 |
"name": "Yukpa",
|
543 |
"iso_1_code": null,
|
544 |
"iso_3_code": "yup",
|
|
|
545 |
"children": [],
|
546 |
+
"tokenizers": {},
|
547 |
"node_i": "3359",
|
548 |
+
"native_tokenizers": [],
|
549 |
"scripts": [
|
550 |
"Latn"
|
551 |
+
]
|
|
|
552 |
}
|
553 |
],
|
554 |
+
"tokenizers": {},
|
555 |
"node_i": "3357",
|
556 |
+
"native_tokenizers": [],
|
557 |
+
"scripts": []
|
558 |
}
|
559 |
],
|
560 |
+
"tokenizers": {},
|
561 |
"node_i": "3356",
|
562 |
+
"native_tokenizers": [],
|
563 |
+
"scripts": []
|
564 |
}
|
565 |
],
|
566 |
+
"tokenizers": {},
|
567 |
"node_i": "3307",
|
568 |
+
"native_tokenizers": [],
|
569 |
+
"scripts": []
|
570 |
}
|
data/Central Solomons.json
CHANGED
@@ -2,50 +2,50 @@
|
|
2 |
"name": "Central Solomons",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Bilua",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "blb",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3361",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Lavukaleve",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "lvk",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "3362",
|
24 |
-
"
|
25 |
-
"
|
26 |
},
|
27 |
{
|
28 |
"name": "Savosavo",
|
29 |
"iso_1_code": null,
|
30 |
"iso_3_code": "svs",
|
31 |
-
"tokenizers": {},
|
32 |
"children": [],
|
|
|
33 |
"node_i": "3363",
|
34 |
-
"
|
35 |
-
"
|
36 |
},
|
37 |
{
|
38 |
"name": "Touo",
|
39 |
"iso_1_code": null,
|
40 |
"iso_3_code": "tqu",
|
41 |
-
"tokenizers": {},
|
42 |
"children": [],
|
|
|
43 |
"node_i": "3364",
|
44 |
-
"
|
45 |
-
"
|
46 |
}
|
47 |
],
|
|
|
48 |
"node_i": "3360",
|
49 |
-
"
|
50 |
-
"
|
51 |
}
|
|
|
2 |
"name": "Central Solomons",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Bilua",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "blb",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3361",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Lavukaleve",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "lvk",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3362",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
},
|
26 |
{
|
27 |
"name": "Savosavo",
|
28 |
"iso_1_code": null,
|
29 |
"iso_3_code": "svs",
|
|
|
30 |
"children": [],
|
31 |
+
"tokenizers": {},
|
32 |
"node_i": "3363",
|
33 |
+
"native_tokenizers": [],
|
34 |
+
"scripts": []
|
35 |
},
|
36 |
{
|
37 |
"name": "Touo",
|
38 |
"iso_1_code": null,
|
39 |
"iso_3_code": "tqu",
|
|
|
40 |
"children": [],
|
41 |
+
"tokenizers": {},
|
42 |
"node_i": "3364",
|
43 |
+
"native_tokenizers": [],
|
44 |
+
"scripts": []
|
45 |
}
|
46 |
],
|
47 |
+
"tokenizers": {},
|
48 |
"node_i": "3360",
|
49 |
+
"native_tokenizers": [],
|
50 |
+
"scripts": []
|
51 |
}
|
data/Chapacuran.json
CHANGED
@@ -2,72 +2,72 @@
|
|
2 |
"name": "Chapacuran",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Itene",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Itene",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "ite",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "3367",
|
20 |
-
"
|
21 |
-
"
|
22 |
},
|
23 |
{
|
24 |
"name": "Tor\u00e1",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "trz",
|
27 |
-
"tokenizers": {},
|
28 |
"children": [],
|
|
|
29 |
"node_i": "3368",
|
30 |
-
"
|
31 |
-
"
|
32 |
}
|
33 |
],
|
|
|
34 |
"node_i": "3366",
|
35 |
-
"
|
36 |
-
"
|
37 |
},
|
38 |
{
|
39 |
"name": "Wari",
|
40 |
"iso_1_code": null,
|
41 |
"iso_3_code": null,
|
42 |
-
"tokenizers": {},
|
43 |
"children": [
|
44 |
{
|
45 |
"name": "Oro Win",
|
46 |
"iso_1_code": null,
|
47 |
"iso_3_code": "orw",
|
48 |
-
"tokenizers": {},
|
49 |
"children": [],
|
|
|
50 |
"node_i": "3370",
|
51 |
-
"
|
52 |
-
"
|
53 |
},
|
54 |
{
|
55 |
"name": "Paka\u00e1snovos",
|
56 |
"iso_1_code": null,
|
57 |
"iso_3_code": "pav",
|
58 |
-
"tokenizers": {},
|
59 |
"children": [],
|
|
|
60 |
"node_i": "3371",
|
61 |
-
"
|
62 |
-
"
|
63 |
}
|
64 |
],
|
|
|
65 |
"node_i": "3369",
|
66 |
-
"
|
67 |
-
"
|
68 |
}
|
69 |
],
|
|
|
70 |
"node_i": "3365",
|
71 |
-
"
|
72 |
-
"
|
73 |
}
|
|
|
2 |
"name": "Chapacuran",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Itene",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Itene",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "ite",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3367",
|
18 |
+
"native_tokenizers": [],
|
19 |
+
"scripts": []
|
20 |
},
|
21 |
{
|
22 |
"name": "Tor\u00e1",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": "trz",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3368",
|
28 |
+
"native_tokenizers": [],
|
29 |
+
"scripts": []
|
30 |
}
|
31 |
],
|
32 |
+
"tokenizers": {},
|
33 |
"node_i": "3366",
|
34 |
+
"native_tokenizers": [],
|
35 |
+
"scripts": []
|
36 |
},
|
37 |
{
|
38 |
"name": "Wari",
|
39 |
"iso_1_code": null,
|
40 |
"iso_3_code": null,
|
|
|
41 |
"children": [
|
42 |
{
|
43 |
"name": "Oro Win",
|
44 |
"iso_1_code": null,
|
45 |
"iso_3_code": "orw",
|
|
|
46 |
"children": [],
|
47 |
+
"tokenizers": {},
|
48 |
"node_i": "3370",
|
49 |
+
"native_tokenizers": [],
|
50 |
+
"scripts": []
|
51 |
},
|
52 |
{
|
53 |
"name": "Paka\u00e1snovos",
|
54 |
"iso_1_code": null,
|
55 |
"iso_3_code": "pav",
|
|
|
56 |
"children": [],
|
57 |
+
"tokenizers": {},
|
58 |
"node_i": "3371",
|
59 |
+
"native_tokenizers": [],
|
60 |
+
"scripts": []
|
61 |
}
|
62 |
],
|
63 |
+
"tokenizers": {},
|
64 |
"node_i": "3369",
|
65 |
+
"native_tokenizers": [],
|
66 |
+
"scripts": []
|
67 |
}
|
68 |
],
|
69 |
+
"tokenizers": {},
|
70 |
"node_i": "3365",
|
71 |
+
"native_tokenizers": [],
|
72 |
+
"scripts": []
|
73 |
}
|
data/Chibchan.json
CHANGED
@@ -2,392 +2,392 @@
|
|
2 |
"name": "Chibchan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Chibchan A",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Boruca",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "brn",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "3374",
|
20 |
-
"
|
21 |
-
"
|
22 |
},
|
23 |
{
|
24 |
"name": "Teribe",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "tfr",
|
27 |
-
"tokenizers": {},
|
28 |
"children": [],
|
|
|
29 |
"node_i": "3375",
|
|
|
30 |
"scripts": [
|
31 |
"Latn"
|
32 |
-
]
|
33 |
-
"own_tokenizer": false
|
34 |
},
|
35 |
{
|
36 |
"name": "Guaymi\u00edc",
|
37 |
"iso_1_code": null,
|
38 |
"iso_3_code": null,
|
39 |
-
"tokenizers": {},
|
40 |
"children": [
|
41 |
{
|
42 |
"name": "Ng\u00e4bere",
|
43 |
"iso_1_code": null,
|
44 |
"iso_3_code": "gym",
|
45 |
-
"tokenizers": {},
|
46 |
"children": [],
|
|
|
47 |
"node_i": "3377",
|
|
|
48 |
"scripts": [
|
49 |
"Latn"
|
50 |
-
]
|
51 |
-
"own_tokenizer": false
|
52 |
},
|
53 |
{
|
54 |
"name": "Buglere",
|
55 |
"iso_1_code": null,
|
56 |
"iso_3_code": "sab",
|
57 |
-
"tokenizers": {},
|
58 |
"children": [],
|
|
|
59 |
"node_i": "3378",
|
|
|
60 |
"scripts": [
|
61 |
"Latn"
|
62 |
-
]
|
63 |
-
"own_tokenizer": false
|
64 |
}
|
65 |
],
|
|
|
66 |
"node_i": "3376",
|
67 |
-
"
|
68 |
-
"
|
69 |
},
|
70 |
{
|
71 |
"name": "Viceitic",
|
72 |
"iso_1_code": null,
|
73 |
"iso_3_code": null,
|
74 |
-
"tokenizers": {},
|
75 |
"children": [
|
76 |
{
|
77 |
"name": "Bribri",
|
78 |
"iso_1_code": null,
|
79 |
"iso_3_code": "bzd",
|
80 |
-
"tokenizers": {},
|
81 |
"children": [],
|
|
|
82 |
"node_i": "3380",
|
|
|
83 |
"scripts": [
|
84 |
"Latn"
|
85 |
-
]
|
86 |
-
"own_tokenizer": false
|
87 |
},
|
88 |
{
|
89 |
"name": "Cab\u00e9car",
|
90 |
"iso_1_code": null,
|
91 |
"iso_3_code": "cjp",
|
92 |
-
"tokenizers": {},
|
93 |
"children": [],
|
|
|
94 |
"node_i": "3381",
|
|
|
95 |
"scripts": [
|
96 |
"Latn"
|
97 |
-
]
|
98 |
-
"own_tokenizer": false
|
99 |
}
|
100 |
],
|
|
|
101 |
"node_i": "3379",
|
102 |
-
"
|
103 |
-
"
|
104 |
}
|
105 |
],
|
|
|
106 |
"node_i": "3373",
|
107 |
-
"
|
108 |
-
"
|
109 |
},
|
110 |
{
|
111 |
"name": "Chibchan B",
|
112 |
"iso_1_code": null,
|
113 |
"iso_3_code": null,
|
114 |
-
"tokenizers": {},
|
115 |
"children": [
|
116 |
{
|
117 |
"name": "Pech",
|
118 |
"iso_1_code": null,
|
119 |
"iso_3_code": "pay",
|
120 |
-
"tokenizers": {},
|
121 |
"children": [],
|
|
|
122 |
"node_i": "3383",
|
123 |
-
"
|
124 |
-
"
|
125 |
},
|
126 |
{
|
127 |
"name": "Eastern Chibchan",
|
128 |
"iso_1_code": null,
|
129 |
"iso_3_code": null,
|
130 |
-
"tokenizers": {},
|
131 |
"children": [
|
132 |
{
|
133 |
"name": "Colombian",
|
134 |
"iso_1_code": null,
|
135 |
"iso_3_code": null,
|
136 |
-
"tokenizers": {},
|
137 |
"children": [
|
138 |
{
|
139 |
"name": "Northern Colombian",
|
140 |
"iso_1_code": null,
|
141 |
"iso_3_code": null,
|
142 |
-
"tokenizers": {},
|
143 |
"children": [
|
144 |
{
|
145 |
"name": "Chimila",
|
146 |
"iso_1_code": null,
|
147 |
"iso_3_code": "cbg",
|
148 |
-
"tokenizers": {},
|
149 |
"children": [],
|
|
|
150 |
"node_i": "3387",
|
151 |
-
"
|
152 |
-
"
|
153 |
},
|
154 |
{
|
155 |
"name": "Arhuacan",
|
156 |
"iso_1_code": null,
|
157 |
"iso_3_code": null,
|
158 |
-
"tokenizers": {},
|
159 |
"children": [
|
160 |
{
|
161 |
"name": "Kogi",
|
162 |
"iso_1_code": null,
|
163 |
"iso_3_code": "kog",
|
164 |
-
"tokenizers": {},
|
165 |
"children": [],
|
|
|
166 |
"node_i": "3389",
|
|
|
167 |
"scripts": [
|
168 |
"Latn"
|
169 |
-
]
|
170 |
-
"own_tokenizer": false
|
171 |
},
|
172 |
{
|
173 |
"name": "Southern and Eastern Arhuacan",
|
174 |
"iso_1_code": null,
|
175 |
"iso_3_code": null,
|
176 |
-
"tokenizers": {},
|
177 |
"children": [
|
178 |
{
|
179 |
"name": "Arhuaco",
|
180 |
"iso_1_code": null,
|
181 |
"iso_3_code": "arh",
|
182 |
-
"tokenizers": {},
|
183 |
"children": [],
|
|
|
184 |
"node_i": "3391",
|
185 |
-
"
|
186 |
-
"
|
187 |
},
|
188 |
{
|
189 |
"name": "Guamaca-Atanque",
|
190 |
"iso_1_code": null,
|
191 |
"iso_3_code": null,
|
192 |
-
"tokenizers": {},
|
193 |
"children": [
|
194 |
{
|
195 |
"name": "Sanka",
|
196 |
"iso_1_code": null,
|
197 |
"iso_3_code": "mbp",
|
198 |
-
"tokenizers": {},
|
199 |
"children": [],
|
|
|
200 |
"node_i": "3393",
|
201 |
-
"
|
202 |
-
"
|
203 |
}
|
204 |
],
|
|
|
205 |
"node_i": "3392",
|
206 |
-
"
|
207 |
-
"
|
208 |
}
|
209 |
],
|
|
|
210 |
"node_i": "3390",
|
211 |
-
"
|
212 |
-
"
|
213 |
}
|
214 |
],
|
|
|
215 |
"node_i": "3388",
|
216 |
-
"
|
217 |
-
"
|
218 |
}
|
219 |
],
|
|
|
220 |
"node_i": "3386",
|
221 |
-
"
|
222 |
-
"
|
223 |
},
|
224 |
{
|
225 |
"name": "Southern Colombian",
|
226 |
"iso_1_code": null,
|
227 |
"iso_3_code": null,
|
228 |
-
"tokenizers": {},
|
229 |
"children": [
|
230 |
{
|
231 |
"name": "Bar\u00ed",
|
232 |
"iso_1_code": null,
|
233 |
"iso_3_code": "mot",
|
234 |
-
"tokenizers": {},
|
235 |
"children": [],
|
|
|
236 |
"node_i": "3395",
|
237 |
-
"
|
238 |
-
"
|
239 |
},
|
240 |
{
|
241 |
"name": "Cundicocuyese",
|
242 |
"iso_1_code": null,
|
243 |
"iso_3_code": null,
|
244 |
-
"tokenizers": {},
|
245 |
"children": [
|
246 |
{
|
247 |
"name": "Chibcha",
|
248 |
"iso_1_code": null,
|
249 |
"iso_3_code": "chb",
|
250 |
-
"tokenizers": {},
|
251 |
"children": [],
|
|
|
252 |
"node_i": "3397",
|
253 |
-
"
|
254 |
-
"
|
255 |
},
|
256 |
{
|
257 |
"name": "Tunebo, Barro Negro",
|
258 |
"iso_1_code": null,
|
259 |
"iso_3_code": "tbn",
|
260 |
-
"tokenizers": {},
|
261 |
"children": [],
|
|
|
262 |
"node_i": "3398",
|
263 |
-
"
|
264 |
-
"
|
265 |
},
|
266 |
{
|
267 |
"name": "Tunebo, Western",
|
268 |
"iso_1_code": null,
|
269 |
"iso_3_code": "tnb",
|
270 |
-
"tokenizers": {},
|
271 |
"children": [],
|
|
|
272 |
"node_i": "3399",
|
273 |
-
"
|
274 |
-
"
|
275 |
},
|
276 |
{
|
277 |
"name": "Tunebo, Angosturas",
|
278 |
"iso_1_code": null,
|
279 |
"iso_3_code": "tnd",
|
280 |
-
"tokenizers": {},
|
281 |
"children": [],
|
|
|
282 |
"node_i": "3400",
|
283 |
-
"
|
284 |
-
"
|
285 |
},
|
286 |
{
|
287 |
"name": "Tunebo, Central",
|
288 |
"iso_1_code": null,
|
289 |
"iso_3_code": "tuf",
|
290 |
-
"tokenizers": {},
|
291 |
"children": [],
|
|
|
292 |
"node_i": "3401",
|
|
|
293 |
"scripts": [
|
294 |
"Latn"
|
295 |
-
]
|
296 |
-
"own_tokenizer": false
|
297 |
}
|
298 |
],
|
|
|
299 |
"node_i": "3396",
|
300 |
-
"
|
301 |
-
"
|
302 |
}
|
303 |
],
|
|
|
304 |
"node_i": "3394",
|
305 |
-
"
|
306 |
-
"
|
307 |
}
|
308 |
],
|
|
|
309 |
"node_i": "3385",
|
310 |
-
"
|
311 |
-
"
|
312 |
},
|
313 |
{
|
314 |
"name": "Cuna",
|
315 |
"iso_1_code": null,
|
316 |
"iso_3_code": null,
|
317 |
-
"tokenizers": {},
|
318 |
"children": [
|
319 |
{
|
320 |
"name": "Kuna, San Blas",
|
321 |
"iso_1_code": null,
|
322 |
"iso_3_code": "cuk",
|
323 |
-
"tokenizers": {},
|
324 |
"children": [],
|
|
|
325 |
"node_i": "3403",
|
|
|
326 |
"scripts": [
|
327 |
"Latn"
|
328 |
-
]
|
329 |
-
"own_tokenizer": false
|
330 |
},
|
331 |
{
|
332 |
"name": "Kuna, Border",
|
333 |
"iso_1_code": null,
|
334 |
"iso_3_code": "kvn",
|
335 |
-
"tokenizers": {},
|
336 |
"children": [],
|
|
|
337 |
"node_i": "3404",
|
|
|
338 |
"scripts": [
|
339 |
"Latn"
|
340 |
-
]
|
341 |
-
"own_tokenizer": false
|
342 |
}
|
343 |
],
|
|
|
344 |
"node_i": "3402",
|
345 |
-
"
|
346 |
-
"
|
347 |
}
|
348 |
],
|
|
|
349 |
"node_i": "3384",
|
350 |
-
"
|
351 |
-
"
|
352 |
},
|
353 |
{
|
354 |
"name": "Votic",
|
355 |
"iso_1_code": null,
|
356 |
"iso_3_code": null,
|
357 |
-
"tokenizers": {},
|
358 |
"children": [
|
359 |
{
|
360 |
"name": "Mal\u00e9ku Ja\u00edka",
|
361 |
"iso_1_code": null,
|
362 |
"iso_3_code": "gut",
|
363 |
-
"tokenizers": {},
|
364 |
"children": [],
|
|
|
365 |
"node_i": "3406",
|
366 |
-
"
|
367 |
-
"
|
368 |
},
|
369 |
{
|
370 |
"name": "Rama",
|
371 |
"iso_1_code": null,
|
372 |
"iso_3_code": "rma",
|
373 |
-
"tokenizers": {},
|
374 |
"children": [],
|
|
|
375 |
"node_i": "3407",
|
376 |
-
"
|
377 |
-
"
|
378 |
}
|
379 |
],
|
|
|
380 |
"node_i": "3405",
|
381 |
-
"
|
382 |
-
"
|
383 |
}
|
384 |
],
|
|
|
385 |
"node_i": "3382",
|
386 |
-
"
|
387 |
-
"
|
388 |
}
|
389 |
],
|
|
|
390 |
"node_i": "3372",
|
391 |
-
"
|
392 |
-
"
|
393 |
}
|
|
|
2 |
"name": "Chibchan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Chibchan A",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Boruca",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "brn",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3374",
|
18 |
+
"native_tokenizers": [],
|
19 |
+
"scripts": []
|
20 |
},
|
21 |
{
|
22 |
"name": "Teribe",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": "tfr",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3375",
|
28 |
+
"native_tokenizers": [],
|
29 |
"scripts": [
|
30 |
"Latn"
|
31 |
+
]
|
|
|
32 |
},
|
33 |
{
|
34 |
"name": "Guaymi\u00edc",
|
35 |
"iso_1_code": null,
|
36 |
"iso_3_code": null,
|
|
|
37 |
"children": [
|
38 |
{
|
39 |
"name": "Ng\u00e4bere",
|
40 |
"iso_1_code": null,
|
41 |
"iso_3_code": "gym",
|
|
|
42 |
"children": [],
|
43 |
+
"tokenizers": {},
|
44 |
"node_i": "3377",
|
45 |
+
"native_tokenizers": [],
|
46 |
"scripts": [
|
47 |
"Latn"
|
48 |
+
]
|
|
|
49 |
},
|
50 |
{
|
51 |
"name": "Buglere",
|
52 |
"iso_1_code": null,
|
53 |
"iso_3_code": "sab",
|
|
|
54 |
"children": [],
|
55 |
+
"tokenizers": {},
|
56 |
"node_i": "3378",
|
57 |
+
"native_tokenizers": [],
|
58 |
"scripts": [
|
59 |
"Latn"
|
60 |
+
]
|
|
|
61 |
}
|
62 |
],
|
63 |
+
"tokenizers": {},
|
64 |
"node_i": "3376",
|
65 |
+
"native_tokenizers": [],
|
66 |
+
"scripts": []
|
67 |
},
|
68 |
{
|
69 |
"name": "Viceitic",
|
70 |
"iso_1_code": null,
|
71 |
"iso_3_code": null,
|
|
|
72 |
"children": [
|
73 |
{
|
74 |
"name": "Bribri",
|
75 |
"iso_1_code": null,
|
76 |
"iso_3_code": "bzd",
|
|
|
77 |
"children": [],
|
78 |
+
"tokenizers": {},
|
79 |
"node_i": "3380",
|
80 |
+
"native_tokenizers": [],
|
81 |
"scripts": [
|
82 |
"Latn"
|
83 |
+
]
|
|
|
84 |
},
|
85 |
{
|
86 |
"name": "Cab\u00e9car",
|
87 |
"iso_1_code": null,
|
88 |
"iso_3_code": "cjp",
|
|
|
89 |
"children": [],
|
90 |
+
"tokenizers": {},
|
91 |
"node_i": "3381",
|
92 |
+
"native_tokenizers": [],
|
93 |
"scripts": [
|
94 |
"Latn"
|
95 |
+
]
|
|
|
96 |
}
|
97 |
],
|
98 |
+
"tokenizers": {},
|
99 |
"node_i": "3379",
|
100 |
+
"native_tokenizers": [],
|
101 |
+
"scripts": []
|
102 |
}
|
103 |
],
|
104 |
+
"tokenizers": {},
|
105 |
"node_i": "3373",
|
106 |
+
"native_tokenizers": [],
|
107 |
+
"scripts": []
|
108 |
},
|
109 |
{
|
110 |
"name": "Chibchan B",
|
111 |
"iso_1_code": null,
|
112 |
"iso_3_code": null,
|
|
|
113 |
"children": [
|
114 |
{
|
115 |
"name": "Pech",
|
116 |
"iso_1_code": null,
|
117 |
"iso_3_code": "pay",
|
|
|
118 |
"children": [],
|
119 |
+
"tokenizers": {},
|
120 |
"node_i": "3383",
|
121 |
+
"native_tokenizers": [],
|
122 |
+
"scripts": []
|
123 |
},
|
124 |
{
|
125 |
"name": "Eastern Chibchan",
|
126 |
"iso_1_code": null,
|
127 |
"iso_3_code": null,
|
|
|
128 |
"children": [
|
129 |
{
|
130 |
"name": "Colombian",
|
131 |
"iso_1_code": null,
|
132 |
"iso_3_code": null,
|
|
|
133 |
"children": [
|
134 |
{
|
135 |
"name": "Northern Colombian",
|
136 |
"iso_1_code": null,
|
137 |
"iso_3_code": null,
|
|
|
138 |
"children": [
|
139 |
{
|
140 |
"name": "Chimila",
|
141 |
"iso_1_code": null,
|
142 |
"iso_3_code": "cbg",
|
|
|
143 |
"children": [],
|
144 |
+
"tokenizers": {},
|
145 |
"node_i": "3387",
|
146 |
+
"native_tokenizers": [],
|
147 |
+
"scripts": []
|
148 |
},
|
149 |
{
|
150 |
"name": "Arhuacan",
|
151 |
"iso_1_code": null,
|
152 |
"iso_3_code": null,
|
|
|
153 |
"children": [
|
154 |
{
|
155 |
"name": "Kogi",
|
156 |
"iso_1_code": null,
|
157 |
"iso_3_code": "kog",
|
|
|
158 |
"children": [],
|
159 |
+
"tokenizers": {},
|
160 |
"node_i": "3389",
|
161 |
+
"native_tokenizers": [],
|
162 |
"scripts": [
|
163 |
"Latn"
|
164 |
+
]
|
|
|
165 |
},
|
166 |
{
|
167 |
"name": "Southern and Eastern Arhuacan",
|
168 |
"iso_1_code": null,
|
169 |
"iso_3_code": null,
|
|
|
170 |
"children": [
|
171 |
{
|
172 |
"name": "Arhuaco",
|
173 |
"iso_1_code": null,
|
174 |
"iso_3_code": "arh",
|
|
|
175 |
"children": [],
|
176 |
+
"tokenizers": {},
|
177 |
"node_i": "3391",
|
178 |
+
"native_tokenizers": [],
|
179 |
+
"scripts": []
|
180 |
},
|
181 |
{
|
182 |
"name": "Guamaca-Atanque",
|
183 |
"iso_1_code": null,
|
184 |
"iso_3_code": null,
|
|
|
185 |
"children": [
|
186 |
{
|
187 |
"name": "Sanka",
|
188 |
"iso_1_code": null,
|
189 |
"iso_3_code": "mbp",
|
|
|
190 |
"children": [],
|
191 |
+
"tokenizers": {},
|
192 |
"node_i": "3393",
|
193 |
+
"native_tokenizers": [],
|
194 |
+
"scripts": []
|
195 |
}
|
196 |
],
|
197 |
+
"tokenizers": {},
|
198 |
"node_i": "3392",
|
199 |
+
"native_tokenizers": [],
|
200 |
+
"scripts": []
|
201 |
}
|
202 |
],
|
203 |
+
"tokenizers": {},
|
204 |
"node_i": "3390",
|
205 |
+
"native_tokenizers": [],
|
206 |
+
"scripts": []
|
207 |
}
|
208 |
],
|
209 |
+
"tokenizers": {},
|
210 |
"node_i": "3388",
|
211 |
+
"native_tokenizers": [],
|
212 |
+
"scripts": []
|
213 |
}
|
214 |
],
|
215 |
+
"tokenizers": {},
|
216 |
"node_i": "3386",
|
217 |
+
"native_tokenizers": [],
|
218 |
+
"scripts": []
|
219 |
},
|
220 |
{
|
221 |
"name": "Southern Colombian",
|
222 |
"iso_1_code": null,
|
223 |
"iso_3_code": null,
|
|
|
224 |
"children": [
|
225 |
{
|
226 |
"name": "Bar\u00ed",
|
227 |
"iso_1_code": null,
|
228 |
"iso_3_code": "mot",
|
|
|
229 |
"children": [],
|
230 |
+
"tokenizers": {},
|
231 |
"node_i": "3395",
|
232 |
+
"native_tokenizers": [],
|
233 |
+
"scripts": []
|
234 |
},
|
235 |
{
|
236 |
"name": "Cundicocuyese",
|
237 |
"iso_1_code": null,
|
238 |
"iso_3_code": null,
|
|
|
239 |
"children": [
|
240 |
{
|
241 |
"name": "Chibcha",
|
242 |
"iso_1_code": null,
|
243 |
"iso_3_code": "chb",
|
|
|
244 |
"children": [],
|
245 |
+
"tokenizers": {},
|
246 |
"node_i": "3397",
|
247 |
+
"native_tokenizers": [],
|
248 |
+
"scripts": []
|
249 |
},
|
250 |
{
|
251 |
"name": "Tunebo, Barro Negro",
|
252 |
"iso_1_code": null,
|
253 |
"iso_3_code": "tbn",
|
|
|
254 |
"children": [],
|
255 |
+
"tokenizers": {},
|
256 |
"node_i": "3398",
|
257 |
+
"native_tokenizers": [],
|
258 |
+
"scripts": []
|
259 |
},
|
260 |
{
|
261 |
"name": "Tunebo, Western",
|
262 |
"iso_1_code": null,
|
263 |
"iso_3_code": "tnb",
|
|
|
264 |
"children": [],
|
265 |
+
"tokenizers": {},
|
266 |
"node_i": "3399",
|
267 |
+
"native_tokenizers": [],
|
268 |
+
"scripts": []
|
269 |
},
|
270 |
{
|
271 |
"name": "Tunebo, Angosturas",
|
272 |
"iso_1_code": null,
|
273 |
"iso_3_code": "tnd",
|
|
|
274 |
"children": [],
|
275 |
+
"tokenizers": {},
|
276 |
"node_i": "3400",
|
277 |
+
"native_tokenizers": [],
|
278 |
+
"scripts": []
|
279 |
},
|
280 |
{
|
281 |
"name": "Tunebo, Central",
|
282 |
"iso_1_code": null,
|
283 |
"iso_3_code": "tuf",
|
|
|
284 |
"children": [],
|
285 |
+
"tokenizers": {},
|
286 |
"node_i": "3401",
|
287 |
+
"native_tokenizers": [],
|
288 |
"scripts": [
|
289 |
"Latn"
|
290 |
+
]
|
|
|
291 |
}
|
292 |
],
|
293 |
+
"tokenizers": {},
|
294 |
"node_i": "3396",
|
295 |
+
"native_tokenizers": [],
|
296 |
+
"scripts": []
|
297 |
}
|
298 |
],
|
299 |
+
"tokenizers": {},
|
300 |
"node_i": "3394",
|
301 |
+
"native_tokenizers": [],
|
302 |
+
"scripts": []
|
303 |
}
|
304 |
],
|
305 |
+
"tokenizers": {},
|
306 |
"node_i": "3385",
|
307 |
+
"native_tokenizers": [],
|
308 |
+
"scripts": []
|
309 |
},
|
310 |
{
|
311 |
"name": "Cuna",
|
312 |
"iso_1_code": null,
|
313 |
"iso_3_code": null,
|
|
|
314 |
"children": [
|
315 |
{
|
316 |
"name": "Kuna, San Blas",
|
317 |
"iso_1_code": null,
|
318 |
"iso_3_code": "cuk",
|
|
|
319 |
"children": [],
|
320 |
+
"tokenizers": {},
|
321 |
"node_i": "3403",
|
322 |
+
"native_tokenizers": [],
|
323 |
"scripts": [
|
324 |
"Latn"
|
325 |
+
]
|
|
|
326 |
},
|
327 |
{
|
328 |
"name": "Kuna, Border",
|
329 |
"iso_1_code": null,
|
330 |
"iso_3_code": "kvn",
|
|
|
331 |
"children": [],
|
332 |
+
"tokenizers": {},
|
333 |
"node_i": "3404",
|
334 |
+
"native_tokenizers": [],
|
335 |
"scripts": [
|
336 |
"Latn"
|
337 |
+
]
|
|
|
338 |
}
|
339 |
],
|
340 |
+
"tokenizers": {},
|
341 |
"node_i": "3402",
|
342 |
+
"native_tokenizers": [],
|
343 |
+
"scripts": []
|
344 |
}
|
345 |
],
|
346 |
+
"tokenizers": {},
|
347 |
"node_i": "3384",
|
348 |
+
"native_tokenizers": [],
|
349 |
+
"scripts": []
|
350 |
},
|
351 |
{
|
352 |
"name": "Votic",
|
353 |
"iso_1_code": null,
|
354 |
"iso_3_code": null,
|
|
|
355 |
"children": [
|
356 |
{
|
357 |
"name": "Mal\u00e9ku Ja\u00edka",
|
358 |
"iso_1_code": null,
|
359 |
"iso_3_code": "gut",
|
|
|
360 |
"children": [],
|
361 |
+
"tokenizers": {},
|
362 |
"node_i": "3406",
|
363 |
+
"native_tokenizers": [],
|
364 |
+
"scripts": []
|
365 |
},
|
366 |
{
|
367 |
"name": "Rama",
|
368 |
"iso_1_code": null,
|
369 |
"iso_3_code": "rma",
|
|
|
370 |
"children": [],
|
371 |
+
"tokenizers": {},
|
372 |
"node_i": "3407",
|
373 |
+
"native_tokenizers": [],
|
374 |
+
"scripts": []
|
375 |
}
|
376 |
],
|
377 |
+
"tokenizers": {},
|
378 |
"node_i": "3405",
|
379 |
+
"native_tokenizers": [],
|
380 |
+
"scripts": []
|
381 |
}
|
382 |
],
|
383 |
+
"tokenizers": {},
|
384 |
"node_i": "3382",
|
385 |
+
"native_tokenizers": [],
|
386 |
+
"scripts": []
|
387 |
}
|
388 |
],
|
389 |
+
"tokenizers": {},
|
390 |
"node_i": "3372",
|
391 |
+
"native_tokenizers": [],
|
392 |
+
"scripts": []
|
393 |
}
|
data/Chimakuan.json
CHANGED
@@ -2,30 +2,30 @@
|
|
2 |
"name": "Chimakuan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Quileute",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "qui",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3409",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Chemakum",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "xch",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "3410",
|
24 |
-
"
|
25 |
-
"
|
26 |
}
|
27 |
],
|
|
|
28 |
"node_i": "3408",
|
29 |
-
"
|
30 |
-
"
|
31 |
}
|
|
|
2 |
"name": "Chimakuan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Quileute",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "qui",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3409",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Chemakum",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "xch",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3410",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
}
|
26 |
],
|
27 |
+
"tokenizers": {},
|
28 |
"node_i": "3408",
|
29 |
+
"native_tokenizers": [],
|
30 |
+
"scripts": []
|
31 |
}
|
data/Chinookan.json
CHANGED
@@ -2,41 +2,41 @@
|
|
2 |
"name": "Chinookan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Chinook",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "chh",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3412",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Upper Chinookan",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": null,
|
21 |
-
"tokenizers": {},
|
22 |
"children": [
|
23 |
{
|
24 |
"name": "Wasco-Wishram",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "wac",
|
27 |
-
"tokenizers": {},
|
28 |
"children": [],
|
|
|
29 |
"node_i": "3414",
|
30 |
-
"
|
31 |
-
"
|
32 |
}
|
33 |
],
|
|
|
34 |
"node_i": "3413",
|
35 |
-
"
|
36 |
-
"
|
37 |
}
|
38 |
],
|
|
|
39 |
"node_i": "3411",
|
40 |
-
"
|
41 |
-
"
|
42 |
}
|
|
|
2 |
"name": "Chinookan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Chinook",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "chh",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3412",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Upper Chinookan",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": null,
|
|
|
20 |
"children": [
|
21 |
{
|
22 |
"name": "Wasco-Wishram",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": "wac",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3414",
|
28 |
+
"native_tokenizers": [],
|
29 |
+
"scripts": []
|
30 |
}
|
31 |
],
|
32 |
+
"tokenizers": {},
|
33 |
"node_i": "3413",
|
34 |
+
"native_tokenizers": [],
|
35 |
+
"scripts": []
|
36 |
}
|
37 |
],
|
38 |
+
"tokenizers": {},
|
39 |
"node_i": "3411",
|
40 |
+
"native_tokenizers": [],
|
41 |
+
"scripts": []
|
42 |
}
|
data/Chipaya-Uru.json
CHANGED
@@ -2,32 +2,32 @@
|
|
2 |
"name": "Chipaya-Uru",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Chipaya",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "cap",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3416",
|
|
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
-
]
|
17 |
-
"own_tokenizer": false
|
18 |
},
|
19 |
{
|
20 |
"name": "Uru",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": "ure",
|
23 |
-
"tokenizers": {},
|
24 |
"children": [],
|
|
|
25 |
"node_i": "3417",
|
26 |
-
"
|
27 |
-
"
|
28 |
}
|
29 |
],
|
|
|
30 |
"node_i": "3415",
|
31 |
-
"
|
32 |
-
"
|
33 |
}
|
|
|
2 |
"name": "Chipaya-Uru",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Chipaya",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "cap",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3416",
|
13 |
+
"native_tokenizers": [],
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
+
]
|
|
|
17 |
},
|
18 |
{
|
19 |
"name": "Uru",
|
20 |
"iso_1_code": null,
|
21 |
"iso_3_code": "ure",
|
|
|
22 |
"children": [],
|
23 |
+
"tokenizers": {},
|
24 |
"node_i": "3417",
|
25 |
+
"native_tokenizers": [],
|
26 |
+
"scripts": []
|
27 |
}
|
28 |
],
|
29 |
+
"tokenizers": {},
|
30 |
"node_i": "3415",
|
31 |
+
"native_tokenizers": [],
|
32 |
+
"scripts": []
|
33 |
}
|
data/Chocoan.json
CHANGED
@@ -2,121 +2,121 @@
|
|
2 |
"name": "Chocoan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Woun Meu",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "noa",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3419",
|
|
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
-
]
|
17 |
-
"own_tokenizer": false
|
18 |
},
|
19 |
{
|
20 |
"name": "Ember\u00e1",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": null,
|
23 |
-
"tokenizers": {},
|
24 |
"children": [
|
25 |
{
|
26 |
"name": "Northern Ember\u00e1",
|
27 |
"iso_1_code": null,
|
28 |
"iso_3_code": null,
|
29 |
-
"tokenizers": {},
|
30 |
"children": [
|
31 |
{
|
32 |
"name": "Embera Cat\u00edo",
|
33 |
"iso_1_code": null,
|
34 |
"iso_3_code": "cto",
|
35 |
-
"tokenizers": {},
|
36 |
"children": [],
|
|
|
37 |
"node_i": "3422",
|
|
|
38 |
"scripts": [
|
39 |
"Latn"
|
40 |
-
]
|
41 |
-
"own_tokenizer": false
|
42 |
},
|
43 |
{
|
44 |
"name": "Ember\u00e1, Northern",
|
45 |
"iso_1_code": null,
|
46 |
"iso_3_code": "emp",
|
47 |
-
"tokenizers": {},
|
48 |
"children": [],
|
|
|
49 |
"node_i": "3423",
|
|
|
50 |
"scripts": [
|
51 |
"Latn"
|
52 |
-
]
|
53 |
-
"own_tokenizer": false
|
54 |
}
|
55 |
],
|
|
|
56 |
"node_i": "3421",
|
57 |
-
"
|
58 |
-
"
|
59 |
},
|
60 |
{
|
61 |
"name": "Southern Ember\u00e1",
|
62 |
"iso_1_code": null,
|
63 |
"iso_3_code": null,
|
64 |
-
"tokenizers": {},
|
65 |
"children": [
|
66 |
{
|
67 |
"name": "Embera Baud\u00f3",
|
68 |
"iso_1_code": null,
|
69 |
"iso_3_code": "bdc",
|
70 |
-
"tokenizers": {},
|
71 |
"children": [],
|
|
|
72 |
"node_i": "3425",
|
73 |
-
"
|
74 |
-
"
|
75 |
},
|
76 |
{
|
77 |
"name": "Embera Cham\u00ed",
|
78 |
"iso_1_code": null,
|
79 |
"iso_3_code": "cmi",
|
80 |
-
"tokenizers": {},
|
81 |
"children": [],
|
|
|
82 |
"node_i": "3426",
|
83 |
-
"
|
84 |
-
"
|
85 |
},
|
86 |
{
|
87 |
"name": "Epena",
|
88 |
"iso_1_code": null,
|
89 |
"iso_3_code": "sja",
|
90 |
-
"tokenizers": {},
|
91 |
"children": [],
|
|
|
92 |
"node_i": "3427",
|
|
|
93 |
"scripts": [
|
94 |
"Latn"
|
95 |
-
]
|
96 |
-
"own_tokenizer": false
|
97 |
},
|
98 |
{
|
99 |
"name": "Embera Tad\u00f3",
|
100 |
"iso_1_code": null,
|
101 |
"iso_3_code": "tdc",
|
102 |
-
"tokenizers": {},
|
103 |
"children": [],
|
|
|
104 |
"node_i": "3428",
|
105 |
-
"
|
106 |
-
"
|
107 |
}
|
108 |
],
|
|
|
109 |
"node_i": "3424",
|
110 |
-
"
|
111 |
-
"
|
112 |
}
|
113 |
],
|
|
|
114 |
"node_i": "3420",
|
115 |
-
"
|
116 |
-
"
|
117 |
}
|
118 |
],
|
|
|
119 |
"node_i": "3418",
|
120 |
-
"
|
121 |
-
"
|
122 |
}
|
|
|
2 |
"name": "Chocoan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Woun Meu",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "noa",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3419",
|
13 |
+
"native_tokenizers": [],
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
+
]
|
|
|
17 |
},
|
18 |
{
|
19 |
"name": "Ember\u00e1",
|
20 |
"iso_1_code": null,
|
21 |
"iso_3_code": null,
|
|
|
22 |
"children": [
|
23 |
{
|
24 |
"name": "Northern Ember\u00e1",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": null,
|
|
|
27 |
"children": [
|
28 |
{
|
29 |
"name": "Embera Cat\u00edo",
|
30 |
"iso_1_code": null,
|
31 |
"iso_3_code": "cto",
|
|
|
32 |
"children": [],
|
33 |
+
"tokenizers": {},
|
34 |
"node_i": "3422",
|
35 |
+
"native_tokenizers": [],
|
36 |
"scripts": [
|
37 |
"Latn"
|
38 |
+
]
|
|
|
39 |
},
|
40 |
{
|
41 |
"name": "Ember\u00e1, Northern",
|
42 |
"iso_1_code": null,
|
43 |
"iso_3_code": "emp",
|
|
|
44 |
"children": [],
|
45 |
+
"tokenizers": {},
|
46 |
"node_i": "3423",
|
47 |
+
"native_tokenizers": [],
|
48 |
"scripts": [
|
49 |
"Latn"
|
50 |
+
]
|
|
|
51 |
}
|
52 |
],
|
53 |
+
"tokenizers": {},
|
54 |
"node_i": "3421",
|
55 |
+
"native_tokenizers": [],
|
56 |
+
"scripts": []
|
57 |
},
|
58 |
{
|
59 |
"name": "Southern Ember\u00e1",
|
60 |
"iso_1_code": null,
|
61 |
"iso_3_code": null,
|
|
|
62 |
"children": [
|
63 |
{
|
64 |
"name": "Embera Baud\u00f3",
|
65 |
"iso_1_code": null,
|
66 |
"iso_3_code": "bdc",
|
|
|
67 |
"children": [],
|
68 |
+
"tokenizers": {},
|
69 |
"node_i": "3425",
|
70 |
+
"native_tokenizers": [],
|
71 |
+
"scripts": []
|
72 |
},
|
73 |
{
|
74 |
"name": "Embera Cham\u00ed",
|
75 |
"iso_1_code": null,
|
76 |
"iso_3_code": "cmi",
|
|
|
77 |
"children": [],
|
78 |
+
"tokenizers": {},
|
79 |
"node_i": "3426",
|
80 |
+
"native_tokenizers": [],
|
81 |
+
"scripts": []
|
82 |
},
|
83 |
{
|
84 |
"name": "Epena",
|
85 |
"iso_1_code": null,
|
86 |
"iso_3_code": "sja",
|
|
|
87 |
"children": [],
|
88 |
+
"tokenizers": {},
|
89 |
"node_i": "3427",
|
90 |
+
"native_tokenizers": [],
|
91 |
"scripts": [
|
92 |
"Latn"
|
93 |
+
]
|
|
|
94 |
},
|
95 |
{
|
96 |
"name": "Embera Tad\u00f3",
|
97 |
"iso_1_code": null,
|
98 |
"iso_3_code": "tdc",
|
|
|
99 |
"children": [],
|
100 |
+
"tokenizers": {},
|
101 |
"node_i": "3428",
|
102 |
+
"native_tokenizers": [],
|
103 |
+
"scripts": []
|
104 |
}
|
105 |
],
|
106 |
+
"tokenizers": {},
|
107 |
"node_i": "3424",
|
108 |
+
"native_tokenizers": [],
|
109 |
+
"scripts": []
|
110 |
}
|
111 |
],
|
112 |
+
"tokenizers": {},
|
113 |
"node_i": "3420",
|
114 |
+
"native_tokenizers": [],
|
115 |
+
"scripts": []
|
116 |
}
|
117 |
],
|
118 |
+
"tokenizers": {},
|
119 |
"node_i": "3418",
|
120 |
+
"native_tokenizers": [],
|
121 |
+
"scripts": []
|
122 |
}
|
data/Cholonan.json
CHANGED
@@ -2,30 +2,30 @@
|
|
2 |
"name": "Cholonan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Chol\u00f3n",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "cht",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3430",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Hibito",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "hib",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "3431",
|
24 |
-
"
|
25 |
-
"
|
26 |
}
|
27 |
],
|
|
|
28 |
"node_i": "3429",
|
29 |
-
"
|
30 |
-
"
|
31 |
}
|
|
|
2 |
"name": "Cholonan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Chol\u00f3n",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "cht",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3430",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Hibito",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "hib",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3431",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
}
|
26 |
],
|
27 |
+
"tokenizers": {},
|
28 |
"node_i": "3429",
|
29 |
+
"native_tokenizers": [],
|
30 |
+
"scripts": []
|
31 |
}
|
data/Chon.json
CHANGED
@@ -2,41 +2,41 @@
|
|
2 |
"name": "Chon",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Tehuelche",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "teh",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3433",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Island Chon",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": null,
|
21 |
-
"tokenizers": {},
|
22 |
"children": [
|
23 |
{
|
24 |
"name": "Ona",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "ona",
|
27 |
-
"tokenizers": {},
|
28 |
"children": [],
|
|
|
29 |
"node_i": "3435",
|
30 |
-
"
|
31 |
-
"
|
32 |
}
|
33 |
],
|
|
|
34 |
"node_i": "3434",
|
35 |
-
"
|
36 |
-
"
|
37 |
}
|
38 |
],
|
|
|
39 |
"node_i": "3432",
|
40 |
-
"
|
41 |
-
"
|
42 |
}
|
|
|
2 |
"name": "Chon",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Tehuelche",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "teh",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3433",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Island Chon",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": null,
|
|
|
20 |
"children": [
|
21 |
{
|
22 |
"name": "Ona",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": "ona",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3435",
|
28 |
+
"native_tokenizers": [],
|
29 |
+
"scripts": []
|
30 |
}
|
31 |
],
|
32 |
+
"tokenizers": {},
|
33 |
"node_i": "3434",
|
34 |
+
"native_tokenizers": [],
|
35 |
+
"scripts": []
|
36 |
}
|
37 |
],
|
38 |
+
"tokenizers": {},
|
39 |
"node_i": "3432",
|
40 |
+
"native_tokenizers": [],
|
41 |
+
"scripts": []
|
42 |
}
|
data/Chukotko-Kamchatkan.json
CHANGED
@@ -2,108 +2,108 @@
|
|
2 |
"name": "Chukotko-Kamchatkan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Northern",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Chukot",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": null,
|
17 |
-
"tokenizers": {},
|
18 |
"children": [
|
19 |
{
|
20 |
"name": "Chukchi",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": "ckt",
|
23 |
-
"tokenizers": {},
|
24 |
"children": [],
|
|
|
25 |
"node_i": "3439",
|
|
|
26 |
"scripts": [
|
27 |
"Cyrl"
|
28 |
-
]
|
29 |
-
"own_tokenizer": false
|
30 |
}
|
31 |
],
|
|
|
32 |
"node_i": "3438",
|
33 |
-
"
|
34 |
-
"
|
35 |
},
|
36 |
{
|
37 |
"name": "Koryak-Alyutor",
|
38 |
"iso_1_code": null,
|
39 |
"iso_3_code": null,
|
40 |
-
"tokenizers": {},
|
41 |
"children": [
|
42 |
{
|
43 |
"name": "Alutor",
|
44 |
"iso_1_code": null,
|
45 |
"iso_3_code": "alr",
|
46 |
-
"tokenizers": {},
|
47 |
"children": [],
|
|
|
48 |
"node_i": "3441",
|
49 |
-
"
|
50 |
-
"
|
51 |
},
|
52 |
{
|
53 |
"name": "Koryak",
|
54 |
"iso_1_code": null,
|
55 |
"iso_3_code": "kpy",
|
56 |
-
"tokenizers": {},
|
57 |
"children": [],
|
|
|
58 |
"node_i": "3442",
|
59 |
-
"
|
60 |
-
"
|
61 |
},
|
62 |
{
|
63 |
"name": "Kerek",
|
64 |
"iso_1_code": null,
|
65 |
"iso_3_code": "krk",
|
66 |
-
"tokenizers": {},
|
67 |
"children": [],
|
|
|
68 |
"node_i": "3443",
|
69 |
-
"
|
70 |
-
"
|
71 |
}
|
72 |
],
|
|
|
73 |
"node_i": "3440",
|
74 |
-
"
|
75 |
-
"
|
76 |
}
|
77 |
],
|
|
|
78 |
"node_i": "3437",
|
79 |
-
"
|
80 |
-
"
|
81 |
},
|
82 |
{
|
83 |
"name": "Southern",
|
84 |
"iso_1_code": null,
|
85 |
"iso_3_code": null,
|
86 |
-
"tokenizers": {},
|
87 |
"children": [
|
88 |
{
|
89 |
"name": "Itelmen",
|
90 |
"iso_1_code": null,
|
91 |
"iso_3_code": "itl",
|
92 |
-
"tokenizers": {},
|
93 |
"children": [],
|
|
|
94 |
"node_i": "3445",
|
|
|
95 |
"scripts": [
|
96 |
"Cyrl"
|
97 |
-
]
|
98 |
-
"own_tokenizer": false
|
99 |
}
|
100 |
],
|
|
|
101 |
"node_i": "3444",
|
102 |
-
"
|
103 |
-
"
|
104 |
}
|
105 |
],
|
|
|
106 |
"node_i": "3436",
|
107 |
-
"
|
108 |
-
"
|
109 |
}
|
|
|
2 |
"name": "Chukotko-Kamchatkan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Northern",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Chukot",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": null,
|
|
|
15 |
"children": [
|
16 |
{
|
17 |
"name": "Chukchi",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "ckt",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3439",
|
23 |
+
"native_tokenizers": [],
|
24 |
"scripts": [
|
25 |
"Cyrl"
|
26 |
+
]
|
|
|
27 |
}
|
28 |
],
|
29 |
+
"tokenizers": {},
|
30 |
"node_i": "3438",
|
31 |
+
"native_tokenizers": [],
|
32 |
+
"scripts": []
|
33 |
},
|
34 |
{
|
35 |
"name": "Koryak-Alyutor",
|
36 |
"iso_1_code": null,
|
37 |
"iso_3_code": null,
|
|
|
38 |
"children": [
|
39 |
{
|
40 |
"name": "Alutor",
|
41 |
"iso_1_code": null,
|
42 |
"iso_3_code": "alr",
|
|
|
43 |
"children": [],
|
44 |
+
"tokenizers": {},
|
45 |
"node_i": "3441",
|
46 |
+
"native_tokenizers": [],
|
47 |
+
"scripts": []
|
48 |
},
|
49 |
{
|
50 |
"name": "Koryak",
|
51 |
"iso_1_code": null,
|
52 |
"iso_3_code": "kpy",
|
|
|
53 |
"children": [],
|
54 |
+
"tokenizers": {},
|
55 |
"node_i": "3442",
|
56 |
+
"native_tokenizers": [],
|
57 |
+
"scripts": []
|
58 |
},
|
59 |
{
|
60 |
"name": "Kerek",
|
61 |
"iso_1_code": null,
|
62 |
"iso_3_code": "krk",
|
|
|
63 |
"children": [],
|
64 |
+
"tokenizers": {},
|
65 |
"node_i": "3443",
|
66 |
+
"native_tokenizers": [],
|
67 |
+
"scripts": []
|
68 |
}
|
69 |
],
|
70 |
+
"tokenizers": {},
|
71 |
"node_i": "3440",
|
72 |
+
"native_tokenizers": [],
|
73 |
+
"scripts": []
|
74 |
}
|
75 |
],
|
76 |
+
"tokenizers": {},
|
77 |
"node_i": "3437",
|
78 |
+
"native_tokenizers": [],
|
79 |
+
"scripts": []
|
80 |
},
|
81 |
{
|
82 |
"name": "Southern",
|
83 |
"iso_1_code": null,
|
84 |
"iso_3_code": null,
|
|
|
85 |
"children": [
|
86 |
{
|
87 |
"name": "Itelmen",
|
88 |
"iso_1_code": null,
|
89 |
"iso_3_code": "itl",
|
|
|
90 |
"children": [],
|
91 |
+
"tokenizers": {},
|
92 |
"node_i": "3445",
|
93 |
+
"native_tokenizers": [],
|
94 |
"scripts": [
|
95 |
"Cyrl"
|
96 |
+
]
|
|
|
97 |
}
|
98 |
],
|
99 |
+
"tokenizers": {},
|
100 |
"node_i": "3444",
|
101 |
+
"native_tokenizers": [],
|
102 |
+
"scripts": []
|
103 |
}
|
104 |
],
|
105 |
+
"tokenizers": {},
|
106 |
"node_i": "3436",
|
107 |
+
"native_tokenizers": [],
|
108 |
+
"scripts": []
|
109 |
}
|
data/Chumashan.json
CHANGED
@@ -2,92 +2,92 @@
|
|
2 |
"name": "Chumashan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Obispe\u00f1o",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "obi",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3447",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Central Chumash",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": null,
|
21 |
-
"tokenizers": {},
|
22 |
"children": [
|
23 |
{
|
24 |
"name": "Barbare\u00f1o",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "boi",
|
27 |
-
"tokenizers": {},
|
28 |
"children": [],
|
|
|
29 |
"node_i": "3449",
|
30 |
-
"
|
31 |
-
"
|
32 |
},
|
33 |
{
|
34 |
"name": "Inese\u00f1o",
|
35 |
"iso_1_code": null,
|
36 |
"iso_3_code": "inz",
|
37 |
-
"tokenizers": {},
|
38 |
"children": [],
|
|
|
39 |
"node_i": "3450",
|
40 |
-
"
|
41 |
-
"
|
42 |
},
|
43 |
{
|
44 |
"name": "Purisime\u00f1o",
|
45 |
"iso_1_code": null,
|
46 |
"iso_3_code": "puy",
|
47 |
-
"tokenizers": {},
|
48 |
"children": [],
|
|
|
49 |
"node_i": "3451",
|
50 |
-
"
|
51 |
-
"
|
52 |
},
|
53 |
{
|
54 |
"name": "Venture\u00f1o",
|
55 |
"iso_1_code": null,
|
56 |
"iso_3_code": "veo",
|
57 |
-
"tokenizers": {},
|
58 |
"children": [],
|
|
|
59 |
"node_i": "3452",
|
60 |
-
"
|
61 |
-
"
|
62 |
}
|
63 |
],
|
|
|
64 |
"node_i": "3448",
|
65 |
-
"
|
66 |
-
"
|
67 |
},
|
68 |
{
|
69 |
"name": "Island Chumash",
|
70 |
"iso_1_code": null,
|
71 |
"iso_3_code": null,
|
72 |
-
"tokenizers": {},
|
73 |
"children": [
|
74 |
{
|
75 |
"name": "Cruze\u00f1o",
|
76 |
"iso_1_code": null,
|
77 |
"iso_3_code": "crz",
|
78 |
-
"tokenizers": {},
|
79 |
"children": [],
|
|
|
80 |
"node_i": "3454",
|
81 |
-
"
|
82 |
-
"
|
83 |
}
|
84 |
],
|
|
|
85 |
"node_i": "3453",
|
86 |
-
"
|
87 |
-
"
|
88 |
}
|
89 |
],
|
|
|
90 |
"node_i": "3446",
|
91 |
-
"
|
92 |
-
"
|
93 |
}
|
|
|
2 |
"name": "Chumashan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Obispe\u00f1o",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "obi",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3447",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Central Chumash",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": null,
|
|
|
20 |
"children": [
|
21 |
{
|
22 |
"name": "Barbare\u00f1o",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": "boi",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3449",
|
28 |
+
"native_tokenizers": [],
|
29 |
+
"scripts": []
|
30 |
},
|
31 |
{
|
32 |
"name": "Inese\u00f1o",
|
33 |
"iso_1_code": null,
|
34 |
"iso_3_code": "inz",
|
|
|
35 |
"children": [],
|
36 |
+
"tokenizers": {},
|
37 |
"node_i": "3450",
|
38 |
+
"native_tokenizers": [],
|
39 |
+
"scripts": []
|
40 |
},
|
41 |
{
|
42 |
"name": "Purisime\u00f1o",
|
43 |
"iso_1_code": null,
|
44 |
"iso_3_code": "puy",
|
|
|
45 |
"children": [],
|
46 |
+
"tokenizers": {},
|
47 |
"node_i": "3451",
|
48 |
+
"native_tokenizers": [],
|
49 |
+
"scripts": []
|
50 |
},
|
51 |
{
|
52 |
"name": "Venture\u00f1o",
|
53 |
"iso_1_code": null,
|
54 |
"iso_3_code": "veo",
|
|
|
55 |
"children": [],
|
56 |
+
"tokenizers": {},
|
57 |
"node_i": "3452",
|
58 |
+
"native_tokenizers": [],
|
59 |
+
"scripts": []
|
60 |
}
|
61 |
],
|
62 |
+
"tokenizers": {},
|
63 |
"node_i": "3448",
|
64 |
+
"native_tokenizers": [],
|
65 |
+
"scripts": []
|
66 |
},
|
67 |
{
|
68 |
"name": "Island Chumash",
|
69 |
"iso_1_code": null,
|
70 |
"iso_3_code": null,
|
|
|
71 |
"children": [
|
72 |
{
|
73 |
"name": "Cruze\u00f1o",
|
74 |
"iso_1_code": null,
|
75 |
"iso_3_code": "crz",
|
|
|
76 |
"children": [],
|
77 |
+
"tokenizers": {},
|
78 |
"node_i": "3454",
|
79 |
+
"native_tokenizers": [],
|
80 |
+
"scripts": []
|
81 |
}
|
82 |
],
|
83 |
+
"tokenizers": {},
|
84 |
"node_i": "3453",
|
85 |
+
"native_tokenizers": [],
|
86 |
+
"scripts": []
|
87 |
}
|
88 |
],
|
89 |
+
"tokenizers": {},
|
90 |
"node_i": "3446",
|
91 |
+
"native_tokenizers": [],
|
92 |
+
"scripts": []
|
93 |
}
|
data/Cochimí-Yuman.json
CHANGED
@@ -2,155 +2,155 @@
|
|
2 |
"name": "Cochim\u00ed-Yuman",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Yuman",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Cochimi",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "coj",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "3457",
|
20 |
-
"
|
21 |
-
"
|
22 |
},
|
23 |
{
|
24 |
"name": "Kiliwa",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "klb",
|
27 |
-
"tokenizers": {},
|
28 |
"children": [],
|
|
|
29 |
"node_i": "3458",
|
30 |
-
"
|
31 |
-
"
|
32 |
},
|
33 |
{
|
34 |
"name": "Delta-California",
|
35 |
"iso_1_code": null,
|
36 |
"iso_3_code": null,
|
37 |
-
"tokenizers": {},
|
38 |
"children": [
|
39 |
{
|
40 |
"name": "Cocopa",
|
41 |
"iso_1_code": null,
|
42 |
"iso_3_code": "coc",
|
43 |
-
"tokenizers": {},
|
44 |
"children": [],
|
|
|
45 |
"node_i": "3460",
|
46 |
-
"
|
47 |
-
"
|
48 |
},
|
49 |
{
|
50 |
"name": "Kumiai",
|
51 |
"iso_1_code": null,
|
52 |
"iso_3_code": "dih",
|
53 |
-
"tokenizers": {},
|
54 |
"children": [],
|
|
|
55 |
"node_i": "3461",
|
56 |
-
"
|
57 |
-
"
|
58 |
}
|
59 |
],
|
|
|
60 |
"node_i": "3459",
|
61 |
-
"
|
62 |
-
"
|
63 |
},
|
64 |
{
|
65 |
"name": "Pai",
|
66 |
"iso_1_code": null,
|
67 |
"iso_3_code": null,
|
68 |
-
"tokenizers": {},
|
69 |
"children": [
|
70 |
{
|
71 |
"name": "Paipai",
|
72 |
"iso_1_code": null,
|
73 |
"iso_3_code": "ppi",
|
74 |
-
"tokenizers": {},
|
75 |
"children": [],
|
|
|
76 |
"node_i": "3463",
|
77 |
-
"
|
78 |
-
"
|
79 |
},
|
80 |
{
|
81 |
"name": "Havasupai-Walapai-Yavapai",
|
82 |
"iso_1_code": null,
|
83 |
"iso_3_code": "yuf",
|
84 |
-
"tokenizers": {},
|
85 |
"children": [],
|
|
|
86 |
"node_i": "3464",
|
87 |
-
"
|
88 |
-
"
|
89 |
}
|
90 |
],
|
|
|
91 |
"node_i": "3462",
|
92 |
-
"
|
93 |
-
"
|
94 |
},
|
95 |
{
|
96 |
"name": "River",
|
97 |
"iso_1_code": null,
|
98 |
"iso_3_code": null,
|
99 |
-
"tokenizers": {},
|
100 |
"children": [
|
101 |
{
|
102 |
"name": "Mojave",
|
103 |
"iso_1_code": null,
|
104 |
"iso_3_code": null,
|
105 |
-
"tokenizers": {},
|
106 |
"children": [
|
107 |
{
|
108 |
"name": "Mohave",
|
109 |
"iso_1_code": null,
|
110 |
"iso_3_code": "mov",
|
111 |
-
"tokenizers": {},
|
112 |
"children": [],
|
|
|
113 |
"node_i": "3467",
|
114 |
-
"
|
115 |
-
"
|
116 |
},
|
117 |
{
|
118 |
"name": "Maricopa",
|
119 |
"iso_1_code": null,
|
120 |
"iso_3_code": "mrc",
|
121 |
-
"tokenizers": {},
|
122 |
"children": [],
|
|
|
123 |
"node_i": "3468",
|
124 |
-
"
|
125 |
-
"
|
126 |
},
|
127 |
{
|
128 |
"name": "Quechan",
|
129 |
"iso_1_code": null,
|
130 |
"iso_3_code": "yum",
|
131 |
-
"tokenizers": {},
|
132 |
"children": [],
|
|
|
133 |
"node_i": "3469",
|
134 |
-
"
|
135 |
-
"
|
136 |
}
|
137 |
],
|
|
|
138 |
"node_i": "3466",
|
139 |
-
"
|
140 |
-
"
|
141 |
}
|
142 |
],
|
|
|
143 |
"node_i": "3465",
|
144 |
-
"
|
145 |
-
"
|
146 |
}
|
147 |
],
|
|
|
148 |
"node_i": "3456",
|
149 |
-
"
|
150 |
-
"
|
151 |
}
|
152 |
],
|
|
|
153 |
"node_i": "3455",
|
154 |
-
"
|
155 |
-
"
|
156 |
}
|
|
|
2 |
"name": "Cochim\u00ed-Yuman",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Yuman",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Cochimi",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "coj",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3457",
|
18 |
+
"native_tokenizers": [],
|
19 |
+
"scripts": []
|
20 |
},
|
21 |
{
|
22 |
"name": "Kiliwa",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": "klb",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3458",
|
28 |
+
"native_tokenizers": [],
|
29 |
+
"scripts": []
|
30 |
},
|
31 |
{
|
32 |
"name": "Delta-California",
|
33 |
"iso_1_code": null,
|
34 |
"iso_3_code": null,
|
|
|
35 |
"children": [
|
36 |
{
|
37 |
"name": "Cocopa",
|
38 |
"iso_1_code": null,
|
39 |
"iso_3_code": "coc",
|
|
|
40 |
"children": [],
|
41 |
+
"tokenizers": {},
|
42 |
"node_i": "3460",
|
43 |
+
"native_tokenizers": [],
|
44 |
+
"scripts": []
|
45 |
},
|
46 |
{
|
47 |
"name": "Kumiai",
|
48 |
"iso_1_code": null,
|
49 |
"iso_3_code": "dih",
|
|
|
50 |
"children": [],
|
51 |
+
"tokenizers": {},
|
52 |
"node_i": "3461",
|
53 |
+
"native_tokenizers": [],
|
54 |
+
"scripts": []
|
55 |
}
|
56 |
],
|
57 |
+
"tokenizers": {},
|
58 |
"node_i": "3459",
|
59 |
+
"native_tokenizers": [],
|
60 |
+
"scripts": []
|
61 |
},
|
62 |
{
|
63 |
"name": "Pai",
|
64 |
"iso_1_code": null,
|
65 |
"iso_3_code": null,
|
|
|
66 |
"children": [
|
67 |
{
|
68 |
"name": "Paipai",
|
69 |
"iso_1_code": null,
|
70 |
"iso_3_code": "ppi",
|
|
|
71 |
"children": [],
|
72 |
+
"tokenizers": {},
|
73 |
"node_i": "3463",
|
74 |
+
"native_tokenizers": [],
|
75 |
+
"scripts": []
|
76 |
},
|
77 |
{
|
78 |
"name": "Havasupai-Walapai-Yavapai",
|
79 |
"iso_1_code": null,
|
80 |
"iso_3_code": "yuf",
|
|
|
81 |
"children": [],
|
82 |
+
"tokenizers": {},
|
83 |
"node_i": "3464",
|
84 |
+
"native_tokenizers": [],
|
85 |
+
"scripts": []
|
86 |
}
|
87 |
],
|
88 |
+
"tokenizers": {},
|
89 |
"node_i": "3462",
|
90 |
+
"native_tokenizers": [],
|
91 |
+
"scripts": []
|
92 |
},
|
93 |
{
|
94 |
"name": "River",
|
95 |
"iso_1_code": null,
|
96 |
"iso_3_code": null,
|
|
|
97 |
"children": [
|
98 |
{
|
99 |
"name": "Mojave",
|
100 |
"iso_1_code": null,
|
101 |
"iso_3_code": null,
|
|
|
102 |
"children": [
|
103 |
{
|
104 |
"name": "Mohave",
|
105 |
"iso_1_code": null,
|
106 |
"iso_3_code": "mov",
|
|
|
107 |
"children": [],
|
108 |
+
"tokenizers": {},
|
109 |
"node_i": "3467",
|
110 |
+
"native_tokenizers": [],
|
111 |
+
"scripts": []
|
112 |
},
|
113 |
{
|
114 |
"name": "Maricopa",
|
115 |
"iso_1_code": null,
|
116 |
"iso_3_code": "mrc",
|
|
|
117 |
"children": [],
|
118 |
+
"tokenizers": {},
|
119 |
"node_i": "3468",
|
120 |
+
"native_tokenizers": [],
|
121 |
+
"scripts": []
|
122 |
},
|
123 |
{
|
124 |
"name": "Quechan",
|
125 |
"iso_1_code": null,
|
126 |
"iso_3_code": "yum",
|
|
|
127 |
"children": [],
|
128 |
+
"tokenizers": {},
|
129 |
"node_i": "3469",
|
130 |
+
"native_tokenizers": [],
|
131 |
+
"scripts": []
|
132 |
}
|
133 |
],
|
134 |
+
"tokenizers": {},
|
135 |
"node_i": "3466",
|
136 |
+
"native_tokenizers": [],
|
137 |
+
"scripts": []
|
138 |
}
|
139 |
],
|
140 |
+
"tokenizers": {},
|
141 |
"node_i": "3465",
|
142 |
+
"native_tokenizers": [],
|
143 |
+
"scripts": []
|
144 |
}
|
145 |
],
|
146 |
+
"tokenizers": {},
|
147 |
"node_i": "3456",
|
148 |
+
"native_tokenizers": [],
|
149 |
+
"scripts": []
|
150 |
}
|
151 |
],
|
152 |
+
"tokenizers": {},
|
153 |
"node_i": "3455",
|
154 |
+
"native_tokenizers": [],
|
155 |
+
"scripts": []
|
156 |
}
|
data/Comecrudan.json
CHANGED
@@ -2,60 +2,60 @@
|
|
2 |
"name": "Comecrudan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Mamulique",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "emm",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3471",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Comecrudo",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "xcm",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "3472",
|
24 |
-
"
|
25 |
-
"
|
26 |
},
|
27 |
{
|
28 |
"name": "Cotoname",
|
29 |
"iso_1_code": null,
|
30 |
"iso_3_code": "xcn",
|
31 |
-
"tokenizers": {},
|
32 |
"children": [],
|
|
|
33 |
"node_i": "3473",
|
34 |
-
"
|
35 |
-
"
|
36 |
},
|
37 |
{
|
38 |
"name": "Coahuilteco",
|
39 |
"iso_1_code": null,
|
40 |
"iso_3_code": "xcw",
|
41 |
-
"tokenizers": {},
|
42 |
"children": [],
|
|
|
43 |
"node_i": "3474",
|
44 |
-
"
|
45 |
-
"
|
46 |
},
|
47 |
{
|
48 |
"name": "Garza",
|
49 |
"iso_1_code": null,
|
50 |
"iso_3_code": "xgr",
|
51 |
-
"tokenizers": {},
|
52 |
"children": [],
|
|
|
53 |
"node_i": "3475",
|
54 |
-
"
|
55 |
-
"
|
56 |
}
|
57 |
],
|
|
|
58 |
"node_i": "3470",
|
59 |
-
"
|
60 |
-
"
|
61 |
}
|
|
|
2 |
"name": "Comecrudan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Mamulique",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "emm",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3471",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Comecrudo",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "xcm",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3472",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
},
|
26 |
{
|
27 |
"name": "Cotoname",
|
28 |
"iso_1_code": null,
|
29 |
"iso_3_code": "xcn",
|
|
|
30 |
"children": [],
|
31 |
+
"tokenizers": {},
|
32 |
"node_i": "3473",
|
33 |
+
"native_tokenizers": [],
|
34 |
+
"scripts": []
|
35 |
},
|
36 |
{
|
37 |
"name": "Coahuilteco",
|
38 |
"iso_1_code": null,
|
39 |
"iso_3_code": "xcw",
|
|
|
40 |
"children": [],
|
41 |
+
"tokenizers": {},
|
42 |
"node_i": "3474",
|
43 |
+
"native_tokenizers": [],
|
44 |
+
"scripts": []
|
45 |
},
|
46 |
{
|
47 |
"name": "Garza",
|
48 |
"iso_1_code": null,
|
49 |
"iso_3_code": "xgr",
|
|
|
50 |
"children": [],
|
51 |
+
"tokenizers": {},
|
52 |
"node_i": "3475",
|
53 |
+
"native_tokenizers": [],
|
54 |
+
"scripts": []
|
55 |
}
|
56 |
],
|
57 |
+
"tokenizers": {},
|
58 |
"node_i": "3470",
|
59 |
+
"native_tokenizers": [],
|
60 |
+
"scripts": []
|
61 |
}
|
data/Constructed language.json
CHANGED
@@ -2,22 +2,22 @@
|
|
2 |
"name": "Constructed language",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Esperanto",
|
9 |
"iso_1_code": "eo",
|
10 |
"iso_3_code": "epo",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3477",
|
|
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
-
]
|
17 |
-
"own_tokenizer": false
|
18 |
}
|
19 |
],
|
|
|
20 |
"node_i": "3476",
|
21 |
-
"
|
22 |
-
"
|
23 |
}
|
|
|
2 |
"name": "Constructed language",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Esperanto",
|
8 |
"iso_1_code": "eo",
|
9 |
"iso_3_code": "epo",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3477",
|
13 |
+
"native_tokenizers": [],
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
+
]
|
|
|
17 |
}
|
18 |
],
|
19 |
+
"tokenizers": {},
|
20 |
"node_i": "3476",
|
21 |
+
"native_tokenizers": [],
|
22 |
+
"scripts": []
|
23 |
}
|
data/Coosan.json
CHANGED
@@ -2,30 +2,30 @@
|
|
2 |
"name": "Coosan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Coos",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "csz",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3479",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Miluk",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "iml",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "3480",
|
24 |
-
"
|
25 |
-
"
|
26 |
}
|
27 |
],
|
|
|
28 |
"node_i": "3478",
|
29 |
-
"
|
30 |
-
"
|
31 |
}
|
|
|
2 |
"name": "Coosan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Coos",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "csz",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3479",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Miluk",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "iml",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3480",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
}
|
26 |
],
|
27 |
+
"tokenizers": {},
|
28 |
"node_i": "3478",
|
29 |
+
"native_tokenizers": [],
|
30 |
+
"scripts": []
|
31 |
}
|
data/Creole.json
CHANGED
@@ -2,2288 +2,1742 @@
|
|
2 |
"name": "Creole",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {
|
6 |
-
"Arab": {
|
7 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
8 |
-
"original_lang_name": "malay",
|
9 |
-
"original_lang_code": "msa",
|
10 |
-
"scripts": [
|
11 |
-
"Latn",
|
12 |
-
"Arab",
|
13 |
-
"Thai"
|
14 |
-
],
|
15 |
-
"class_name": "SpaCyTokenizer",
|
16 |
-
"macrolanguage": true
|
17 |
-
},
|
18 |
-
"Latn": {
|
19 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
20 |
-
"original_lang_name": "malay",
|
21 |
-
"original_lang_code": "msa",
|
22 |
-
"scripts": [
|
23 |
-
"Latn",
|
24 |
-
"Arab",
|
25 |
-
"Thai"
|
26 |
-
],
|
27 |
-
"class_name": "SpaCyTokenizer",
|
28 |
-
"macrolanguage": true
|
29 |
-
},
|
30 |
-
"Thai": {
|
31 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
32 |
-
"original_lang_name": "malay",
|
33 |
-
"original_lang_code": "msa",
|
34 |
-
"scripts": [
|
35 |
-
"Latn",
|
36 |
-
"Arab",
|
37 |
-
"Thai"
|
38 |
-
],
|
39 |
-
"class_name": "SpaCyTokenizer",
|
40 |
-
"macrolanguage": true
|
41 |
-
}
|
42 |
-
},
|
43 |
"children": [
|
44 |
{
|
45 |
"name": "Afrikaans based",
|
46 |
"iso_1_code": null,
|
47 |
"iso_3_code": null,
|
48 |
-
"tokenizers": {},
|
49 |
"children": [
|
50 |
{
|
51 |
"name": "Flaaitaal",
|
52 |
"iso_1_code": null,
|
53 |
"iso_3_code": "fly",
|
54 |
-
"tokenizers": {},
|
55 |
"children": [],
|
|
|
56 |
"node_i": "3483",
|
57 |
-
"
|
58 |
-
"
|
59 |
},
|
60 |
{
|
61 |
"name": "Oorlams",
|
62 |
"iso_1_code": null,
|
63 |
"iso_3_code": "oor",
|
64 |
-
"tokenizers": {},
|
65 |
"children": [],
|
|
|
66 |
"node_i": "3484",
|
67 |
-
"
|
68 |
-
"
|
69 |
}
|
70 |
],
|
|
|
71 |
"node_i": "3482",
|
72 |
-
"
|
73 |
-
"
|
74 |
},
|
75 |
{
|
76 |
"name": "Arabic based",
|
77 |
"iso_1_code": null,
|
78 |
"iso_3_code": null,
|
79 |
-
"tokenizers": {
|
80 |
-
"Arab": {
|
81 |
-
"full_object": "SpaCyTokenizer(\"ar\")",
|
82 |
-
"original_lang_name": "arabic",
|
83 |
-
"original_lang_code": "ara",
|
84 |
-
"scripts": [
|
85 |
-
"Arab"
|
86 |
-
],
|
87 |
-
"class_name": "SpaCyTokenizer",
|
88 |
-
"macrolanguage": true
|
89 |
-
}
|
90 |
-
},
|
91 |
"children": [
|
92 |
{
|
93 |
"name": "Nubi",
|
94 |
"iso_1_code": null,
|
95 |
"iso_3_code": "kcn",
|
96 |
-
"tokenizers": {},
|
97 |
"children": [],
|
|
|
98 |
"node_i": "3486",
|
99 |
-
"
|
100 |
-
"
|
101 |
},
|
102 |
{
|
103 |
"name": "Arabic, Juba",
|
104 |
"iso_1_code": "ar",
|
105 |
"iso_3_code": "pga",
|
106 |
-
"tokenizers": {
|
107 |
-
"Arab": {
|
108 |
-
"full_object": "SpaCyTokenizer(\"ar\")",
|
109 |
-
"original_lang_name": "arabic",
|
110 |
-
"original_lang_code": "ara",
|
111 |
-
"scripts": [
|
112 |
-
"Arab"
|
113 |
-
],
|
114 |
-
"class_name": "SpaCyTokenizer",
|
115 |
-
"macrolanguage": true
|
116 |
-
}
|
117 |
-
},
|
118 |
"children": [],
|
|
|
119 |
"node_i": "3487",
|
120 |
-
"
|
121 |
-
"
|
122 |
}
|
123 |
],
|
|
|
124 |
"node_i": "3485",
|
125 |
-
"
|
126 |
-
"
|
127 |
},
|
128 |
{
|
129 |
"name": "Assamese based",
|
130 |
"iso_1_code": null,
|
131 |
"iso_3_code": null,
|
132 |
-
"tokenizers": {},
|
133 |
"children": [
|
134 |
{
|
135 |
"name": "Nagamese",
|
136 |
"iso_1_code": null,
|
137 |
"iso_3_code": "nag",
|
138 |
-
"tokenizers": {},
|
139 |
"children": [],
|
|
|
140 |
"node_i": "3489",
|
141 |
-
"
|
142 |
-
"
|
143 |
}
|
144 |
],
|
|
|
145 |
"node_i": "3488",
|
146 |
-
"
|
147 |
-
"
|
148 |
},
|
149 |
{
|
150 |
"name": "Dutch based",
|
151 |
"iso_1_code": null,
|
152 |
"iso_3_code": null,
|
153 |
-
"tokenizers": {},
|
154 |
"children": [
|
155 |
{
|
156 |
"name": "Berbice Dutch Creole",
|
157 |
"iso_1_code": null,
|
158 |
"iso_3_code": "brc",
|
159 |
-
"tokenizers": {},
|
160 |
"children": [],
|
|
|
161 |
"node_i": "3491",
|
162 |
-
"
|
163 |
-
"
|
164 |
},
|
165 |
{
|
166 |
"name": "Negerhollands",
|
167 |
"iso_1_code": null,
|
168 |
"iso_3_code": "dcr",
|
169 |
-
"tokenizers": {},
|
170 |
"children": [],
|
|
|
171 |
"node_i": "3492",
|
172 |
-
"
|
173 |
-
"
|
174 |
},
|
175 |
{
|
176 |
"name": "Javindo",
|
177 |
"iso_1_code": null,
|
178 |
"iso_3_code": "jvd",
|
179 |
-
"tokenizers": {},
|
180 |
"children": [],
|
|
|
181 |
"node_i": "3493",
|
182 |
-
"
|
183 |
-
"
|
184 |
},
|
185 |
{
|
186 |
"name": "Petjo",
|
187 |
"iso_1_code": null,
|
188 |
"iso_3_code": "pey",
|
189 |
-
"tokenizers": {},
|
190 |
"children": [],
|
|
|
191 |
"node_i": "3494",
|
192 |
-
"
|
193 |
-
"
|
194 |
},
|
195 |
{
|
196 |
"name": "Skepi Dutch Creole",
|
197 |
"iso_1_code": null,
|
198 |
"iso_3_code": "skw",
|
199 |
-
"tokenizers": {},
|
200 |
"children": [],
|
|
|
201 |
"node_i": "3495",
|
202 |
-
"
|
203 |
-
"
|
204 |
}
|
205 |
],
|
|
|
206 |
"node_i": "3490",
|
207 |
-
"
|
208 |
-
"
|
209 |
},
|
210 |
{
|
211 |
"name": "English based",
|
212 |
"iso_1_code": null,
|
213 |
"iso_3_code": null,
|
214 |
-
"tokenizers": {
|
215 |
-
"Latn": {
|
216 |
-
"full_object": "StanzaTokenizer(\"pcm\")",
|
217 |
-
"original_lang_name": "nigerian_pidgin",
|
218 |
-
"original_lang_code": "pcm",
|
219 |
-
"scripts": [
|
220 |
-
"Latn"
|
221 |
-
],
|
222 |
-
"class_name": "StanzaTokenizer",
|
223 |
-
"macrolanguage": false
|
224 |
-
}
|
225 |
-
},
|
226 |
"children": [
|
227 |
{
|
228 |
"name": "Saramaccan",
|
229 |
"iso_1_code": null,
|
230 |
"iso_3_code": "srm",
|
|
|
231 |
"tokenizers": {
|
232 |
"Latn": {
|
233 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
234 |
"original_lang_name": "nigerian_pidgin",
|
235 |
"original_lang_code": "pcm",
|
236 |
-
"
|
237 |
-
|
238 |
-
],
|
239 |
-
"class_name": "StanzaTokenizer",
|
240 |
-
"macrolanguage": false
|
241 |
}
|
242 |
},
|
243 |
-
"children": [],
|
244 |
"node_i": "3497",
|
|
|
245 |
"scripts": [
|
246 |
"Latn"
|
247 |
-
]
|
248 |
-
"own_tokenizer": false
|
249 |
},
|
250 |
{
|
251 |
"name": "Atlantic",
|
252 |
"iso_1_code": null,
|
253 |
"iso_3_code": null,
|
254 |
-
"tokenizers": {
|
255 |
-
"Latn": {
|
256 |
-
"full_object": "StanzaTokenizer(\"pcm\")",
|
257 |
-
"original_lang_name": "nigerian_pidgin",
|
258 |
-
"original_lang_code": "pcm",
|
259 |
-
"scripts": [
|
260 |
-
"Latn"
|
261 |
-
],
|
262 |
-
"class_name": "StanzaTokenizer",
|
263 |
-
"macrolanguage": false
|
264 |
-
}
|
265 |
-
},
|
266 |
"children": [
|
267 |
{
|
268 |
"name": "Eastern",
|
269 |
"iso_1_code": null,
|
270 |
"iso_3_code": null,
|
271 |
-
"tokenizers": {
|
272 |
-
"Latn": {
|
273 |
-
"full_object": "StanzaTokenizer(\"pcm\")",
|
274 |
-
"original_lang_name": "nigerian_pidgin",
|
275 |
-
"original_lang_code": "pcm",
|
276 |
-
"scripts": [
|
277 |
-
"Latn"
|
278 |
-
],
|
279 |
-
"class_name": "StanzaTokenizer",
|
280 |
-
"macrolanguage": false
|
281 |
-
}
|
282 |
-
},
|
283 |
"children": [
|
284 |
{
|
285 |
"name": "Turks and Caicos English Creole",
|
286 |
"iso_1_code": null,
|
287 |
"iso_3_code": "tch",
|
288 |
-
"tokenizers": {},
|
289 |
"children": [],
|
|
|
290 |
"node_i": "3500",
|
291 |
-
"
|
292 |
-
"
|
293 |
},
|
294 |
{
|
295 |
"name": "Northern",
|
296 |
"iso_1_code": null,
|
297 |
"iso_3_code": null,
|
298 |
-
"tokenizers": {
|
299 |
-
"Latn": {
|
300 |
-
"full_object": "StanzaTokenizer(\"pcm\")",
|
301 |
-
"original_lang_name": "nigerian_pidgin",
|
302 |
-
"original_lang_code": "pcm",
|
303 |
-
"scripts": [
|
304 |
-
"Latn"
|
305 |
-
],
|
306 |
-
"class_name": "StanzaTokenizer",
|
307 |
-
"macrolanguage": false
|
308 |
-
}
|
309 |
-
},
|
310 |
"children": [
|
311 |
{
|
312 |
"name": "Afro-Seminole Creole",
|
313 |
"iso_1_code": null,
|
314 |
"iso_3_code": "afs",
|
315 |
-
"tokenizers": {},
|
316 |
"children": [],
|
|
|
317 |
"node_i": "3502",
|
318 |
-
"
|
319 |
-
"
|
320 |
},
|
321 |
{
|
322 |
"name": "Bahamas English Creole",
|
323 |
"iso_1_code": null,
|
324 |
"iso_3_code": "bah",
|
325 |
-
"tokenizers": {},
|
326 |
"children": [],
|
|
|
327 |
"node_i": "3503",
|
328 |
-
"
|
329 |
-
"
|
330 |
},
|
331 |
{
|
332 |
"name": "Sea Island English Creole",
|
333 |
"iso_1_code": null,
|
334 |
"iso_3_code": "gul",
|
|
|
335 |
"tokenizers": {
|
336 |
"Latn": {
|
337 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
338 |
"original_lang_name": "nigerian_pidgin",
|
339 |
"original_lang_code": "pcm",
|
340 |
-
"
|
341 |
-
|
342 |
-
],
|
343 |
-
"class_name": "StanzaTokenizer",
|
344 |
-
"macrolanguage": false
|
345 |
}
|
346 |
},
|
347 |
-
"children": [],
|
348 |
"node_i": "3504",
|
|
|
349 |
"scripts": [
|
350 |
"Latn"
|
351 |
-
]
|
352 |
-
"own_tokenizer": false
|
353 |
}
|
354 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
355 |
"node_i": "3501",
|
356 |
-
"
|
357 |
-
"
|
358 |
},
|
359 |
{
|
360 |
"name": "Southern",
|
361 |
"iso_1_code": null,
|
362 |
"iso_3_code": null,
|
363 |
-
"tokenizers": {},
|
364 |
"children": [
|
365 |
{
|
366 |
"name": "Leeward Caribbean English Creole",
|
367 |
"iso_1_code": null,
|
368 |
"iso_3_code": "aig",
|
369 |
-
"tokenizers": {},
|
370 |
"children": [],
|
|
|
371 |
"node_i": "3506",
|
372 |
-
"
|
373 |
-
"
|
374 |
},
|
375 |
{
|
376 |
"name": "Bajan",
|
377 |
"iso_1_code": null,
|
378 |
"iso_3_code": "bjs",
|
379 |
-
"tokenizers": {},
|
380 |
"children": [],
|
|
|
381 |
"node_i": "3507",
|
382 |
-
"
|
383 |
-
"
|
384 |
},
|
385 |
{
|
386 |
"name": "Grenadian English Creole",
|
387 |
"iso_1_code": null,
|
388 |
"iso_3_code": "gcl",
|
389 |
-
"tokenizers": {},
|
390 |
"children": [],
|
|
|
391 |
"node_i": "3508",
|
392 |
-
"
|
393 |
-
"
|
394 |
},
|
395 |
{
|
396 |
"name": "Guyanese English Creole",
|
397 |
"iso_1_code": null,
|
398 |
"iso_3_code": "gyn",
|
399 |
-
"tokenizers": {},
|
400 |
"children": [],
|
|
|
401 |
"node_i": "3509",
|
402 |
-
"
|
403 |
-
"
|
404 |
},
|
405 |
{
|
406 |
"name": "Vincentian English Creole",
|
407 |
"iso_1_code": null,
|
408 |
"iso_3_code": "svc",
|
409 |
-
"tokenizers": {},
|
410 |
"children": [],
|
|
|
411 |
"node_i": "3510",
|
412 |
-
"
|
413 |
-
"
|
414 |
},
|
415 |
{
|
416 |
"name": "Tobagonian English Creole",
|
417 |
"iso_1_code": null,
|
418 |
"iso_3_code": "tgh",
|
419 |
-
"tokenizers": {},
|
420 |
"children": [],
|
|
|
421 |
"node_i": "3511",
|
422 |
-
"
|
423 |
-
"
|
424 |
},
|
425 |
{
|
426 |
"name": "Trinidadian English Creole",
|
427 |
"iso_1_code": null,
|
428 |
"iso_3_code": "trf",
|
429 |
-
"tokenizers": {},
|
430 |
"children": [],
|
|
|
431 |
"node_i": "3512",
|
432 |
-
"
|
433 |
-
"
|
434 |
},
|
435 |
{
|
436 |
"name": "Virgin Islands English Creole",
|
437 |
"iso_1_code": null,
|
438 |
"iso_3_code": "vic",
|
439 |
-
"tokenizers": {},
|
440 |
"children": [],
|
|
|
441 |
"node_i": "3513",
|
442 |
-
"
|
443 |
-
"
|
444 |
}
|
445 |
],
|
|
|
446 |
"node_i": "3505",
|
447 |
-
"
|
448 |
-
"
|
449 |
}
|
450 |
],
|
451 |
-
"node_i": "3499",
|
452 |
-
"scripts": [],
|
453 |
-
"own_tokenizer": false
|
454 |
-
},
|
455 |
-
{
|
456 |
-
"name": "Krio",
|
457 |
-
"iso_1_code": null,
|
458 |
-
"iso_3_code": null,
|
459 |
"tokenizers": {
|
460 |
"Latn": {
|
461 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
462 |
"original_lang_name": "nigerian_pidgin",
|
463 |
"original_lang_code": "pcm",
|
464 |
-
"
|
465 |
-
|
466 |
-
],
|
467 |
-
"class_name": "StanzaTokenizer",
|
468 |
-
"macrolanguage": false
|
469 |
}
|
470 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
"children": [
|
472 |
{
|
473 |
"name": "Equatorial Guinean Pidgin",
|
474 |
"iso_1_code": null,
|
475 |
"iso_3_code": "fpe",
|
476 |
-
"tokenizers": {},
|
477 |
"children": [],
|
|
|
478 |
"node_i": "3515",
|
479 |
-
"
|
480 |
-
"
|
481 |
},
|
482 |
{
|
483 |
"name": "Ghanaian Pidgin English",
|
484 |
"iso_1_code": null,
|
485 |
"iso_3_code": "gpe",
|
486 |
-
"tokenizers": {},
|
487 |
"children": [],
|
|
|
488 |
"node_i": "3516",
|
489 |
-
"
|
490 |
-
"
|
491 |
},
|
492 |
{
|
493 |
"name": "Krio",
|
494 |
"iso_1_code": null,
|
495 |
"iso_3_code": "kri",
|
|
|
496 |
"tokenizers": {
|
497 |
"Latn": {
|
498 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
499 |
"original_lang_name": "nigerian_pidgin",
|
500 |
"original_lang_code": "pcm",
|
501 |
-
"
|
502 |
-
|
503 |
-
],
|
504 |
-
"class_name": "StanzaTokenizer",
|
505 |
-
"macrolanguage": false
|
506 |
}
|
507 |
},
|
508 |
-
"children": [],
|
509 |
"node_i": "3517",
|
|
|
510 |
"scripts": [
|
511 |
"Latn"
|
512 |
-
]
|
513 |
-
"own_tokenizer": false
|
514 |
},
|
515 |
{
|
516 |
"name": "Pidgin, Nigerian",
|
517 |
"iso_1_code": null,
|
518 |
"iso_3_code": "pcm",
|
|
|
519 |
"tokenizers": {
|
520 |
"Latn": {
|
521 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
522 |
"original_lang_name": "nigerian_pidgin",
|
523 |
"original_lang_code": "pcm",
|
524 |
-
"
|
525 |
-
|
526 |
-
],
|
527 |
-
"class_name": "StanzaTokenizer",
|
528 |
-
"macrolanguage": false
|
529 |
}
|
530 |
},
|
531 |
-
"children": [],
|
532 |
"node_i": "3518",
|
533 |
-
"
|
534 |
"Latn"
|
535 |
],
|
536 |
-
"
|
|
|
|
|
537 |
},
|
538 |
{
|
539 |
"name": "Pidgin, Cameroon",
|
540 |
"iso_1_code": null,
|
541 |
"iso_3_code": "wes",
|
|
|
542 |
"tokenizers": {
|
543 |
"Latn": {
|
544 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
545 |
"original_lang_name": "nigerian_pidgin",
|
546 |
"original_lang_code": "pcm",
|
547 |
-
"
|
548 |
-
|
549 |
-
],
|
550 |
-
"class_name": "StanzaTokenizer",
|
551 |
-
"macrolanguage": false
|
552 |
}
|
553 |
},
|
554 |
-
"children": [],
|
555 |
"node_i": "3519",
|
|
|
556 |
"scripts": [
|
557 |
"Latn"
|
558 |
-
]
|
559 |
-
"own_tokenizer": false
|
560 |
}
|
561 |
],
|
562 |
-
"node_i": "3514",
|
563 |
-
"scripts": [],
|
564 |
-
"own_tokenizer": false
|
565 |
-
},
|
566 |
-
{
|
567 |
-
"name": "Suriname",
|
568 |
-
"iso_1_code": null,
|
569 |
-
"iso_3_code": null,
|
570 |
"tokenizers": {
|
571 |
"Latn": {
|
572 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
573 |
"original_lang_name": "nigerian_pidgin",
|
574 |
"original_lang_code": "pcm",
|
575 |
-
"
|
576 |
-
|
577 |
-
],
|
578 |
-
"class_name": "StanzaTokenizer",
|
579 |
-
"macrolanguage": false
|
580 |
}
|
581 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
582 |
"children": [
|
583 |
{
|
584 |
"name": "Sranan Tongo",
|
585 |
"iso_1_code": null,
|
586 |
"iso_3_code": "srn",
|
|
|
587 |
"tokenizers": {
|
588 |
"Latn": {
|
589 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
590 |
"original_lang_name": "nigerian_pidgin",
|
591 |
"original_lang_code": "pcm",
|
592 |
-
"
|
593 |
-
|
594 |
-
],
|
595 |
-
"class_name": "StanzaTokenizer",
|
596 |
-
"macrolanguage": false
|
597 |
}
|
598 |
},
|
599 |
-
"children": [],
|
600 |
"node_i": "3521",
|
|
|
601 |
"scripts": [
|
602 |
"Latn"
|
603 |
-
]
|
604 |
-
"own_tokenizer": false
|
605 |
},
|
606 |
{
|
607 |
"name": "Ndyuka",
|
608 |
"iso_1_code": null,
|
609 |
"iso_3_code": null,
|
610 |
-
"tokenizers": {
|
611 |
-
"Latn": {
|
612 |
-
"full_object": "StanzaTokenizer(\"pcm\")",
|
613 |
-
"original_lang_name": "nigerian_pidgin",
|
614 |
-
"original_lang_code": "pcm",
|
615 |
-
"scripts": [
|
616 |
-
"Latn"
|
617 |
-
],
|
618 |
-
"class_name": "StanzaTokenizer",
|
619 |
-
"macrolanguage": false
|
620 |
-
}
|
621 |
-
},
|
622 |
"children": [
|
623 |
{
|
624 |
"name": "Aukan",
|
625 |
"iso_1_code": null,
|
626 |
"iso_3_code": "djk",
|
|
|
627 |
"tokenizers": {
|
628 |
"Latn": {
|
629 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
630 |
"original_lang_name": "nigerian_pidgin",
|
631 |
"original_lang_code": "pcm",
|
632 |
-
"
|
633 |
-
|
634 |
-
],
|
635 |
-
"class_name": "StanzaTokenizer",
|
636 |
-
"macrolanguage": false
|
637 |
}
|
638 |
},
|
639 |
-
"children": [],
|
640 |
"node_i": "3523",
|
|
|
641 |
"scripts": [
|
642 |
"Latn"
|
643 |
-
]
|
644 |
-
"own_tokenizer": false
|
645 |
},
|
646 |
{
|
647 |
"name": "Kwinti",
|
648 |
"iso_1_code": null,
|
649 |
"iso_3_code": "kww",
|
650 |
-
"tokenizers": {},
|
651 |
"children": [],
|
|
|
652 |
"node_i": "3524",
|
653 |
-
"
|
654 |
-
"
|
655 |
}
|
656 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
657 |
"node_i": "3522",
|
658 |
-
"
|
659 |
-
"
|
660 |
}
|
661 |
],
|
662 |
-
"node_i": "3520",
|
663 |
-
"scripts": [],
|
664 |
-
"own_tokenizer": false
|
665 |
-
},
|
666 |
-
{
|
667 |
-
"name": "Western",
|
668 |
-
"iso_1_code": null,
|
669 |
-
"iso_3_code": null,
|
670 |
"tokenizers": {
|
671 |
"Latn": {
|
672 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
673 |
"original_lang_name": "nigerian_pidgin",
|
674 |
"original_lang_code": "pcm",
|
675 |
-
"
|
676 |
-
|
677 |
-
],
|
678 |
-
"class_name": "StanzaTokenizer",
|
679 |
-
"macrolanguage": false
|
680 |
}
|
681 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
682 |
"children": [
|
683 |
{
|
684 |
"name": "Belize English Creole",
|
685 |
"iso_1_code": null,
|
686 |
"iso_3_code": "bzj",
|
|
|
687 |
"tokenizers": {
|
688 |
"Latn": {
|
689 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
690 |
"original_lang_name": "nigerian_pidgin",
|
691 |
"original_lang_code": "pcm",
|
692 |
-
"
|
693 |
-
|
694 |
-
],
|
695 |
-
"class_name": "StanzaTokenizer",
|
696 |
-
"macrolanguage": false
|
697 |
}
|
698 |
},
|
699 |
-
"children": [],
|
700 |
"node_i": "3526",
|
|
|
701 |
"scripts": [
|
702 |
"Latn"
|
703 |
-
]
|
704 |
-
"own_tokenizer": false
|
705 |
},
|
706 |
{
|
707 |
"name": "Nicaragua English Creole",
|
708 |
"iso_1_code": null,
|
709 |
"iso_3_code": "bzk",
|
710 |
-
"tokenizers": {},
|
711 |
"children": [],
|
|
|
712 |
"node_i": "3527",
|
713 |
-
"
|
714 |
-
"
|
715 |
},
|
716 |
{
|
717 |
"name": "Islander English Creole",
|
718 |
"iso_1_code": null,
|
719 |
"iso_3_code": "icr",
|
|
|
720 |
"tokenizers": {
|
721 |
"Latn": {
|
722 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
723 |
"original_lang_name": "nigerian_pidgin",
|
724 |
"original_lang_code": "pcm",
|
725 |
-
"
|
726 |
-
|
727 |
-
],
|
728 |
-
"class_name": "StanzaTokenizer",
|
729 |
-
"macrolanguage": false
|
730 |
}
|
731 |
},
|
732 |
-
"children": [],
|
733 |
"node_i": "3528",
|
|
|
734 |
"scripts": [
|
735 |
"Latn"
|
736 |
-
]
|
737 |
-
"own_tokenizer": false
|
738 |
},
|
739 |
{
|
740 |
"name": "Jamaican English Creole",
|
741 |
"iso_1_code": null,
|
742 |
"iso_3_code": "jam",
|
|
|
743 |
"tokenizers": {
|
744 |
"Latn": {
|
745 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
746 |
"original_lang_name": "nigerian_pidgin",
|
747 |
"original_lang_code": "pcm",
|
748 |
-
"
|
749 |
-
|
750 |
-
],
|
751 |
-
"class_name": "StanzaTokenizer",
|
752 |
-
"macrolanguage": false
|
753 |
}
|
754 |
},
|
755 |
-
"children": [],
|
756 |
"node_i": "3529",
|
|
|
757 |
"scripts": [
|
758 |
"Latn"
|
759 |
-
]
|
760 |
-
"own_tokenizer": false
|
761 |
}
|
762 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
763 |
"node_i": "3525",
|
764 |
-
"
|
765 |
-
"
|
766 |
}
|
767 |
],
|
768 |
-
"node_i": "3498",
|
769 |
-
"scripts": [],
|
770 |
-
"own_tokenizer": false
|
771 |
-
},
|
772 |
-
{
|
773 |
-
"name": "Pacific",
|
774 |
-
"iso_1_code": null,
|
775 |
-
"iso_3_code": null,
|
776 |
"tokenizers": {
|
777 |
"Latn": {
|
778 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
779 |
"original_lang_name": "nigerian_pidgin",
|
780 |
"original_lang_code": "pcm",
|
781 |
-
"
|
782 |
-
|
783 |
-
],
|
784 |
-
"class_name": "StanzaTokenizer",
|
785 |
-
"macrolanguage": false
|
786 |
}
|
787 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
788 |
"children": [
|
789 |
{
|
790 |
"name": "Bislama",
|
791 |
"iso_1_code": "bi",
|
792 |
"iso_3_code": "bis",
|
|
|
793 |
"tokenizers": {
|
794 |
"Latn": {
|
795 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
796 |
"original_lang_name": "nigerian_pidgin",
|
797 |
"original_lang_code": "pcm",
|
798 |
-
"
|
799 |
-
|
800 |
-
],
|
801 |
-
"class_name": "StanzaTokenizer",
|
802 |
-
"macrolanguage": false
|
803 |
}
|
804 |
},
|
805 |
-
"children": [],
|
806 |
"node_i": "3531",
|
|
|
807 |
"scripts": [
|
808 |
"Latn"
|
809 |
-
]
|
810 |
-
"own_tokenizer": false
|
811 |
},
|
812 |
{
|
813 |
"name": "Hawaii Pidgin",
|
814 |
"iso_1_code": null,
|
815 |
"iso_3_code": "hwc",
|
|
|
816 |
"tokenizers": {
|
817 |
"Latn": {
|
818 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
819 |
"original_lang_name": "nigerian_pidgin",
|
820 |
"original_lang_code": "pcm",
|
821 |
-
"
|
822 |
-
|
823 |
-
],
|
824 |
-
"class_name": "StanzaTokenizer",
|
825 |
-
"macrolanguage": false
|
826 |
}
|
827 |
},
|
828 |
-
"children": [],
|
829 |
"node_i": "3532",
|
|
|
830 |
"scripts": [
|
831 |
"Latn"
|
832 |
-
]
|
833 |
-
"own_tokenizer": false
|
834 |
},
|
835 |
{
|
836 |
"name": "Ngatik Men\u2019s Creole",
|
837 |
"iso_1_code": null,
|
838 |
"iso_3_code": "ngm",
|
839 |
-
"tokenizers": {},
|
840 |
"children": [],
|
|
|
841 |
"node_i": "3533",
|
842 |
-
"
|
843 |
-
"
|
844 |
},
|
845 |
{
|
846 |
"name": "Pitcairn-Norfolk",
|
847 |
"iso_1_code": null,
|
848 |
"iso_3_code": "pih",
|
849 |
-
"tokenizers": {},
|
850 |
"children": [],
|
|
|
851 |
"node_i": "3534",
|
852 |
-
"
|
853 |
-
"
|
854 |
},
|
855 |
{
|
856 |
"name": "Pijin",
|
857 |
"iso_1_code": null,
|
858 |
"iso_3_code": "pis",
|
|
|
859 |
"tokenizers": {
|
860 |
"Latn": {
|
861 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
862 |
"original_lang_name": "nigerian_pidgin",
|
863 |
"original_lang_code": "pcm",
|
864 |
-
"
|
865 |
-
|
866 |
-
],
|
867 |
-
"class_name": "StanzaTokenizer",
|
868 |
-
"macrolanguage": false
|
869 |
}
|
870 |
},
|
871 |
-
"children": [],
|
872 |
"node_i": "3535",
|
|
|
873 |
"scripts": [
|
874 |
"Latn"
|
875 |
-
]
|
876 |
-
"own_tokenizer": false
|
877 |
},
|
878 |
{
|
879 |
"name": "Kriol",
|
880 |
"iso_1_code": null,
|
881 |
"iso_3_code": "rop",
|
|
|
882 |
"tokenizers": {
|
883 |
"Latn": {
|
884 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
885 |
"original_lang_name": "nigerian_pidgin",
|
886 |
"original_lang_code": "pcm",
|
887 |
-
"
|
888 |
-
|
889 |
-
],
|
890 |
-
"class_name": "StanzaTokenizer",
|
891 |
-
"macrolanguage": false
|
892 |
}
|
893 |
},
|
894 |
-
"children": [],
|
895 |
"node_i": "3536",
|
|
|
896 |
"scripts": [
|
897 |
"Latn"
|
898 |
-
]
|
899 |
-
"own_tokenizer": false
|
900 |
},
|
901 |
{
|
902 |
"name": "Torres Strait Creole",
|
903 |
"iso_1_code": null,
|
904 |
"iso_3_code": "tcs",
|
|
|
905 |
"tokenizers": {
|
906 |
"Latn": {
|
907 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
908 |
"original_lang_name": "nigerian_pidgin",
|
909 |
"original_lang_code": "pcm",
|
910 |
-
"
|
911 |
-
|
912 |
-
],
|
913 |
-
"class_name": "StanzaTokenizer",
|
914 |
-
"macrolanguage": false
|
915 |
}
|
916 |
},
|
917 |
-
"children": [],
|
918 |
"node_i": "3537",
|
|
|
919 |
"scripts": [
|
920 |
"Latn"
|
921 |
-
]
|
922 |
-
"own_tokenizer": false
|
923 |
},
|
924 |
{
|
925 |
"name": "Tok Pisin",
|
926 |
"iso_1_code": null,
|
927 |
"iso_3_code": "tpi",
|
|
|
928 |
"tokenizers": {
|
929 |
"Latn": {
|
930 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
931 |
"original_lang_name": "nigerian_pidgin",
|
932 |
"original_lang_code": "pcm",
|
933 |
-
"
|
934 |
-
|
935 |
-
],
|
936 |
-
"class_name": "StanzaTokenizer",
|
937 |
-
"macrolanguage": false
|
938 |
}
|
939 |
},
|
940 |
-
"children": [],
|
941 |
"node_i": "3538",
|
|
|
942 |
"scripts": [
|
943 |
"Latn"
|
944 |
-
]
|
945 |
-
"own_tokenizer": false
|
946 |
}
|
947 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
948 |
"node_i": "3530",
|
949 |
-
"
|
950 |
-
"
|
951 |
}
|
952 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
953 |
"node_i": "3496",
|
954 |
-
"
|
955 |
-
"
|
956 |
},
|
957 |
{
|
958 |
"name": "French based",
|
959 |
"iso_1_code": null,
|
960 |
"iso_3_code": null,
|
961 |
-
"tokenizers": {
|
962 |
-
"Arab": {
|
963 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
964 |
-
"original_lang_name": "malay",
|
965 |
-
"original_lang_code": "msa",
|
966 |
-
"scripts": [
|
967 |
-
"Latn",
|
968 |
-
"Arab",
|
969 |
-
"Thai"
|
970 |
-
],
|
971 |
-
"class_name": "SpaCyTokenizer",
|
972 |
-
"macrolanguage": true
|
973 |
-
},
|
974 |
-
"Latn": {
|
975 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
976 |
-
"original_lang_name": "malay",
|
977 |
-
"original_lang_code": "msa",
|
978 |
-
"scripts": [
|
979 |
-
"Latn",
|
980 |
-
"Arab",
|
981 |
-
"Thai"
|
982 |
-
],
|
983 |
-
"class_name": "SpaCyTokenizer",
|
984 |
-
"macrolanguage": true
|
985 |
-
},
|
986 |
-
"Thai": {
|
987 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
988 |
-
"original_lang_name": "malay",
|
989 |
-
"original_lang_code": "msa",
|
990 |
-
"scripts": [
|
991 |
-
"Latn",
|
992 |
-
"Arab",
|
993 |
-
"Thai"
|
994 |
-
],
|
995 |
-
"class_name": "SpaCyTokenizer",
|
996 |
-
"macrolanguage": true
|
997 |
-
}
|
998 |
-
},
|
999 |
"children": [
|
1000 |
{
|
1001 |
"name": "Lesser Antillean French Creole",
|
1002 |
"iso_1_code": null,
|
1003 |
"iso_3_code": "acf",
|
|
|
1004 |
"tokenizers": {
|
1005 |
"Latn": {
|
1006 |
-
"full_object": "
|
1007 |
-
"original_lang_name": "
|
1008 |
-
"original_lang_code": "
|
1009 |
-
"
|
1010 |
-
|
1011 |
-
"Arab",
|
1012 |
-
"Thai"
|
1013 |
-
],
|
1014 |
-
"class_name": "SpaCyTokenizer",
|
1015 |
-
"macrolanguage": true
|
1016 |
}
|
1017 |
},
|
1018 |
-
"children": [],
|
1019 |
"node_i": "3540",
|
|
|
1020 |
"scripts": [
|
1021 |
"Latn"
|
1022 |
-
]
|
1023 |
-
"own_tokenizer": false
|
1024 |
},
|
1025 |
{
|
1026 |
"name": "Tayo",
|
1027 |
"iso_1_code": null,
|
1028 |
"iso_3_code": "cks",
|
1029 |
-
"tokenizers": {},
|
1030 |
"children": [],
|
|
|
1031 |
"node_i": "3541",
|
1032 |
-
"
|
1033 |
-
"
|
1034 |
},
|
1035 |
{
|
1036 |
"name": "Seychelles French Creole",
|
1037 |
"iso_1_code": null,
|
1038 |
"iso_3_code": "crs",
|
|
|
1039 |
"tokenizers": {
|
1040 |
"Latn": {
|
1041 |
-
"full_object": "
|
1042 |
-
"original_lang_name": "
|
1043 |
-
"original_lang_code": "
|
1044 |
-
"
|
1045 |
-
|
1046 |
-
"Arab",
|
1047 |
-
"Thai"
|
1048 |
-
],
|
1049 |
-
"class_name": "SpaCyTokenizer",
|
1050 |
-
"macrolanguage": true
|
1051 |
}
|
1052 |
},
|
1053 |
-
"children": [],
|
1054 |
"node_i": "3542",
|
|
|
1055 |
"scripts": [
|
1056 |
"Latn"
|
1057 |
-
]
|
1058 |
-
"own_tokenizer": false
|
1059 |
},
|
1060 |
{
|
1061 |
"name": "Guadeloupean French Creole",
|
1062 |
"iso_1_code": null,
|
1063 |
"iso_3_code": "gcf",
|
|
|
1064 |
"tokenizers": {
|
1065 |
"Latn": {
|
1066 |
-
"full_object": "
|
1067 |
-
"original_lang_name": "
|
1068 |
-
"original_lang_code": "
|
1069 |
-
"
|
1070 |
-
|
1071 |
-
"Arab",
|
1072 |
-
"Thai"
|
1073 |
-
],
|
1074 |
-
"class_name": "SpaCyTokenizer",
|
1075 |
-
"macrolanguage": true
|
1076 |
}
|
1077 |
},
|
1078 |
-
"children": [],
|
1079 |
"node_i": "3543",
|
|
|
1080 |
"scripts": [
|
1081 |
"Latn"
|
1082 |
-
]
|
1083 |
-
"own_tokenizer": false
|
1084 |
},
|
1085 |
{
|
1086 |
"name": "Guianese French Creole",
|
1087 |
"iso_1_code": null,
|
1088 |
"iso_3_code": "gcr",
|
|
|
1089 |
"tokenizers": {
|
1090 |
"Latn": {
|
1091 |
-
"full_object": "
|
1092 |
-
"original_lang_name": "
|
1093 |
-
"original_lang_code": "
|
1094 |
-
"
|
1095 |
-
|
1096 |
-
"Arab",
|
1097 |
-
"Thai"
|
1098 |
-
],
|
1099 |
-
"class_name": "SpaCyTokenizer",
|
1100 |
-
"macrolanguage": true
|
1101 |
}
|
1102 |
},
|
1103 |
-
"children": [],
|
1104 |
"node_i": "3544",
|
|
|
1105 |
"scripts": [
|
1106 |
"Latn"
|
1107 |
-
]
|
1108 |
-
"own_tokenizer": false
|
1109 |
},
|
1110 |
{
|
1111 |
"name": "Haitian Creole",
|
1112 |
"iso_1_code": "ht",
|
1113 |
"iso_3_code": "hat",
|
|
|
1114 |
"tokenizers": {
|
1115 |
"Latn": {
|
1116 |
-
"full_object": "
|
1117 |
-
"original_lang_name": "
|
1118 |
-
"original_lang_code": "
|
1119 |
-
"
|
1120 |
-
|
1121 |
-
"Arab",
|
1122 |
-
"Thai"
|
1123 |
-
],
|
1124 |
-
"class_name": "SpaCyTokenizer",
|
1125 |
-
"macrolanguage": true
|
1126 |
}
|
1127 |
},
|
1128 |
-
"children": [],
|
1129 |
"node_i": "3545",
|
|
|
1130 |
"scripts": [
|
1131 |
"Latn"
|
1132 |
-
]
|
1133 |
-
"own_tokenizer": false
|
1134 |
},
|
1135 |
{
|
1136 |
"name": "Karipuna French Creole",
|
1137 |
"iso_1_code": null,
|
1138 |
"iso_3_code": "kmv",
|
1139 |
-
"tokenizers": {},
|
1140 |
"children": [],
|
|
|
1141 |
"node_i": "3546",
|
1142 |
-
"
|
1143 |
-
"
|
1144 |
},
|
1145 |
{
|
1146 |
"name": "Louisiana Creole",
|
1147 |
"iso_1_code": null,
|
1148 |
"iso_3_code": "lou",
|
1149 |
-
"tokenizers": {},
|
1150 |
"children": [],
|
|
|
1151 |
"node_i": "3547",
|
1152 |
-
"
|
1153 |
-
"
|
1154 |
},
|
1155 |
{
|
1156 |
"name": "Morisyen",
|
1157 |
"iso_1_code": null,
|
1158 |
"iso_3_code": "mfe",
|
|
|
1159 |
"tokenizers": {
|
1160 |
"Latn": {
|
1161 |
-
"full_object": "
|
1162 |
-
"original_lang_name": "
|
1163 |
-
"original_lang_code": "
|
1164 |
-
"
|
1165 |
-
|
1166 |
-
"Arab",
|
1167 |
-
"Thai"
|
1168 |
-
],
|
1169 |
-
"class_name": "SpaCyTokenizer",
|
1170 |
-
"macrolanguage": true
|
1171 |
}
|
1172 |
},
|
1173 |
-
"children": [],
|
1174 |
"node_i": "3548",
|
|
|
1175 |
"scripts": [
|
1176 |
"Latn"
|
1177 |
-
]
|
1178 |
-
"own_tokenizer": false
|
1179 |
},
|
1180 |
{
|
1181 |
"name": "R\u00e9union French Creole",
|
1182 |
"iso_1_code": null,
|
1183 |
"iso_3_code": "rcf",
|
|
|
1184 |
"tokenizers": {
|
1185 |
"Latn": {
|
1186 |
-
"full_object": "
|
1187 |
-
"original_lang_name": "
|
1188 |
-
"original_lang_code": "
|
1189 |
-
"
|
1190 |
-
|
1191 |
-
"Arab",
|
1192 |
-
"Thai"
|
1193 |
-
],
|
1194 |
-
"class_name": "SpaCyTokenizer",
|
1195 |
-
"macrolanguage": true
|
1196 |
}
|
1197 |
},
|
1198 |
-
"children": [],
|
1199 |
"node_i": "3549",
|
|
|
1200 |
"scripts": [
|
1201 |
"Latn"
|
1202 |
-
]
|
1203 |
-
"own_tokenizer": false
|
1204 |
},
|
1205 |
{
|
1206 |
"name": "San Miguel French Creole",
|
1207 |
"iso_1_code": null,
|
1208 |
"iso_3_code": "scf",
|
1209 |
-
"tokenizers": {},
|
1210 |
"children": [],
|
|
|
1211 |
"node_i": "3550",
|
1212 |
-
"
|
1213 |
-
"
|
1214 |
}
|
1215 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1216 |
"node_i": "3539",
|
1217 |
-
"
|
1218 |
-
"
|
1219 |
},
|
1220 |
{
|
1221 |
"name": "German based",
|
1222 |
"iso_1_code": null,
|
1223 |
"iso_3_code": null,
|
1224 |
-
"tokenizers": {},
|
1225 |
"children": [
|
1226 |
{
|
1227 |
"name": "Unserdeutsch",
|
1228 |
"iso_1_code": null,
|
1229 |
"iso_3_code": "uln",
|
1230 |
-
"tokenizers": {},
|
1231 |
"children": [],
|
|
|
1232 |
"node_i": "3552",
|
1233 |
-
"
|
1234 |
-
"
|
1235 |
}
|
1236 |
],
|
|
|
1237 |
"node_i": "3551",
|
1238 |
-
"
|
1239 |
-
"
|
1240 |
},
|
1241 |
{
|
1242 |
"name": "Hindi based",
|
1243 |
"iso_1_code": null,
|
1244 |
"iso_3_code": null,
|
1245 |
-
"tokenizers": {},
|
1246 |
"children": [
|
1247 |
{
|
1248 |
"name": "Andaman Hindi Creole",
|
1249 |
"iso_1_code": null,
|
1250 |
"iso_3_code": "hca",
|
1251 |
-
"tokenizers": {},
|
1252 |
"children": [],
|
|
|
1253 |
"node_i": "3554",
|
1254 |
-
"
|
1255 |
-
"
|
1256 |
}
|
1257 |
],
|
|
|
1258 |
"node_i": "3553",
|
1259 |
-
"
|
1260 |
-
"
|
1261 |
},
|
1262 |
{
|
1263 |
"name": "Iberian based",
|
1264 |
"iso_1_code": null,
|
1265 |
"iso_3_code": null,
|
1266 |
-
"tokenizers": {
|
1267 |
-
"Arab": {
|
1268 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1269 |
-
"original_lang_name": "malay",
|
1270 |
-
"original_lang_code": "msa",
|
1271 |
-
"scripts": [
|
1272 |
-
"Latn",
|
1273 |
-
"Arab",
|
1274 |
-
"Thai"
|
1275 |
-
],
|
1276 |
-
"class_name": "SpaCyTokenizer",
|
1277 |
-
"macrolanguage": true
|
1278 |
-
},
|
1279 |
-
"Latn": {
|
1280 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1281 |
-
"original_lang_name": "malay",
|
1282 |
-
"original_lang_code": "msa",
|
1283 |
-
"scripts": [
|
1284 |
-
"Latn",
|
1285 |
-
"Arab",
|
1286 |
-
"Thai"
|
1287 |
-
],
|
1288 |
-
"class_name": "SpaCyTokenizer",
|
1289 |
-
"macrolanguage": true
|
1290 |
-
},
|
1291 |
-
"Thai": {
|
1292 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1293 |
-
"original_lang_name": "malay",
|
1294 |
-
"original_lang_code": "msa",
|
1295 |
-
"scripts": [
|
1296 |
-
"Latn",
|
1297 |
-
"Arab",
|
1298 |
-
"Thai"
|
1299 |
-
],
|
1300 |
-
"class_name": "SpaCyTokenizer",
|
1301 |
-
"macrolanguage": true
|
1302 |
-
}
|
1303 |
-
},
|
1304 |
"children": [
|
1305 |
{
|
1306 |
"name": "Papiamentu",
|
1307 |
"iso_1_code": null,
|
1308 |
"iso_3_code": "pap",
|
|
|
1309 |
"tokenizers": {
|
1310 |
"Latn": {
|
1311 |
-
"full_object": "
|
1312 |
-
"original_lang_name": "
|
1313 |
-
"original_lang_code": "
|
1314 |
-
"
|
1315 |
-
|
1316 |
-
"Arab",
|
1317 |
-
"Thai"
|
1318 |
-
],
|
1319 |
-
"class_name": "SpaCyTokenizer",
|
1320 |
-
"macrolanguage": true
|
1321 |
}
|
1322 |
},
|
1323 |
-
"children": [],
|
1324 |
"node_i": "3556",
|
|
|
1325 |
"scripts": [
|
1326 |
"Latn"
|
1327 |
-
]
|
1328 |
-
"own_tokenizer": false
|
1329 |
}
|
1330 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1331 |
"node_i": "3555",
|
1332 |
-
"
|
1333 |
-
"
|
1334 |
},
|
1335 |
{
|
1336 |
"name": "Japanese-based",
|
1337 |
"iso_1_code": null,
|
1338 |
"iso_3_code": null,
|
1339 |
-
"tokenizers": {},
|
1340 |
"children": [
|
1341 |
{
|
1342 |
"name": "Yilan Creole",
|
1343 |
"iso_1_code": null,
|
1344 |
"iso_3_code": "ycr",
|
1345 |
-
"tokenizers": {},
|
1346 |
"children": [],
|
|
|
1347 |
"node_i": "3558",
|
1348 |
-
"
|
1349 |
-
"
|
1350 |
}
|
1351 |
],
|
|
|
1352 |
"node_i": "3557",
|
1353 |
-
"
|
1354 |
-
"
|
1355 |
},
|
1356 |
{
|
1357 |
"name": "Kongo based",
|
1358 |
"iso_1_code": null,
|
1359 |
"iso_3_code": null,
|
1360 |
-
"tokenizers": {
|
1361 |
-
"Arab": {
|
1362 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1363 |
-
"original_lang_name": "malay",
|
1364 |
-
"original_lang_code": "msa",
|
1365 |
-
"scripts": [
|
1366 |
-
"Latn",
|
1367 |
-
"Arab",
|
1368 |
-
"Thai"
|
1369 |
-
],
|
1370 |
-
"class_name": "SpaCyTokenizer",
|
1371 |
-
"macrolanguage": true
|
1372 |
-
},
|
1373 |
-
"Latn": {
|
1374 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1375 |
-
"original_lang_name": "malay",
|
1376 |
-
"original_lang_code": "msa",
|
1377 |
-
"scripts": [
|
1378 |
-
"Latn",
|
1379 |
-
"Arab",
|
1380 |
-
"Thai"
|
1381 |
-
],
|
1382 |
-
"class_name": "SpaCyTokenizer",
|
1383 |
-
"macrolanguage": true
|
1384 |
-
},
|
1385 |
-
"Thai": {
|
1386 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1387 |
-
"original_lang_name": "malay",
|
1388 |
-
"original_lang_code": "msa",
|
1389 |
-
"scripts": [
|
1390 |
-
"Latn",
|
1391 |
-
"Arab",
|
1392 |
-
"Thai"
|
1393 |
-
],
|
1394 |
-
"class_name": "SpaCyTokenizer",
|
1395 |
-
"macrolanguage": true
|
1396 |
-
}
|
1397 |
-
},
|
1398 |
"children": [
|
1399 |
{
|
1400 |
"name": "Kituba",
|
1401 |
"iso_1_code": null,
|
1402 |
"iso_3_code": "ktu",
|
|
|
1403 |
"tokenizers": {
|
1404 |
"Latn": {
|
1405 |
-
"full_object": "
|
1406 |
-
"original_lang_name": "
|
1407 |
-
"original_lang_code": "
|
1408 |
-
"
|
1409 |
-
|
1410 |
-
"Arab",
|
1411 |
-
"Thai"
|
1412 |
-
],
|
1413 |
-
"class_name": "SpaCyTokenizer",
|
1414 |
-
"macrolanguage": true
|
1415 |
}
|
1416 |
},
|
1417 |
-
"children": [],
|
1418 |
"node_i": "3560",
|
|
|
1419 |
"scripts": [
|
1420 |
"Latn"
|
1421 |
-
]
|
1422 |
-
"own_tokenizer": false
|
1423 |
},
|
1424 |
{
|
1425 |
"name": "Kituba",
|
1426 |
"iso_1_code": null,
|
1427 |
"iso_3_code": "mkw",
|
1428 |
-
"tokenizers": {},
|
1429 |
"children": [],
|
|
|
1430 |
"node_i": "3561",
|
1431 |
-
"
|
1432 |
-
"
|
1433 |
}
|
1434 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1435 |
"node_i": "3559",
|
1436 |
-
"
|
1437 |
-
"
|
1438 |
},
|
1439 |
{
|
1440 |
"name": "Malay based",
|
1441 |
"iso_1_code": null,
|
1442 |
"iso_3_code": null,
|
1443 |
-
"tokenizers": {
|
1444 |
-
"Latn": {
|
1445 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1446 |
-
"original_lang_name": "malay",
|
1447 |
-
"original_lang_code": "msa",
|
1448 |
-
"scripts": [
|
1449 |
-
"Latn",
|
1450 |
-
"Arab",
|
1451 |
-
"Thai"
|
1452 |
-
],
|
1453 |
-
"class_name": "SpaCyTokenizer",
|
1454 |
-
"macrolanguage": true
|
1455 |
-
},
|
1456 |
-
"Arab": {
|
1457 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1458 |
-
"original_lang_name": "malay",
|
1459 |
-
"original_lang_code": "msa",
|
1460 |
-
"scripts": [
|
1461 |
-
"Latn",
|
1462 |
-
"Arab",
|
1463 |
-
"Thai"
|
1464 |
-
],
|
1465 |
-
"class_name": "SpaCyTokenizer",
|
1466 |
-
"macrolanguage": true
|
1467 |
-
},
|
1468 |
-
"Thai": {
|
1469 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1470 |
-
"original_lang_name": "malay",
|
1471 |
-
"original_lang_code": "msa",
|
1472 |
-
"scripts": [
|
1473 |
-
"Latn",
|
1474 |
-
"Arab",
|
1475 |
-
"Thai"
|
1476 |
-
],
|
1477 |
-
"class_name": "SpaCyTokenizer",
|
1478 |
-
"macrolanguage": true
|
1479 |
-
}
|
1480 |
-
},
|
1481 |
"children": [
|
1482 |
{
|
1483 |
"name": "Malay, Ambonese",
|
1484 |
"iso_1_code": null,
|
1485 |
"iso_3_code": "abs",
|
|
|
1486 |
"tokenizers": {
|
1487 |
"Latn": {
|
1488 |
-
"full_object": "
|
1489 |
-
"original_lang_name": "
|
1490 |
-
"original_lang_code": "
|
1491 |
-
"
|
1492 |
-
|
1493 |
-
"Arab",
|
1494 |
-
"Thai"
|
1495 |
-
],
|
1496 |
-
"class_name": "SpaCyTokenizer",
|
1497 |
-
"macrolanguage": true
|
1498 |
}
|
1499 |
},
|
1500 |
-
"children": [],
|
1501 |
"node_i": "3563",
|
|
|
1502 |
"scripts": [
|
1503 |
"Latn"
|
1504 |
-
]
|
1505 |
-
"own_tokenizer": false
|
1506 |
},
|
1507 |
{
|
1508 |
"name": "Betawi",
|
1509 |
"iso_1_code": null,
|
1510 |
"iso_3_code": "bew",
|
|
|
1511 |
"tokenizers": {
|
1512 |
"Latn": {
|
1513 |
-
"full_object": "
|
1514 |
-
"original_lang_name": "
|
1515 |
-
"original_lang_code": "
|
1516 |
-
"
|
1517 |
-
|
1518 |
-
"Arab",
|
1519 |
-
"Thai"
|
1520 |
-
],
|
1521 |
-
"class_name": "SpaCyTokenizer",
|
1522 |
-
"macrolanguage": true
|
1523 |
}
|
1524 |
},
|
1525 |
-
"children": [],
|
1526 |
"node_i": "3564",
|
|
|
1527 |
"scripts": [
|
1528 |
"Latn"
|
1529 |
-
]
|
1530 |
-
"own_tokenizer": false
|
1531 |
},
|
1532 |
{
|
1533 |
"name": "Malay, Banda",
|
1534 |
"iso_1_code": null,
|
1535 |
"iso_3_code": "bpq",
|
1536 |
-
"tokenizers": {},
|
1537 |
"children": [],
|
|
|
1538 |
"node_i": "3565",
|
1539 |
-
"
|
1540 |
-
"
|
1541 |
},
|
1542 |
{
|
1543 |
"name": "Malaccan Malay Creole",
|
1544 |
"iso_1_code": null,
|
1545 |
"iso_3_code": "ccm",
|
1546 |
-
"tokenizers": {},
|
1547 |
"children": [],
|
|
|
1548 |
"node_i": "3566",
|
1549 |
-
"
|
1550 |
-
"
|
1551 |
},
|
1552 |
{
|
1553 |
"name": "Malay, Cocos Islands",
|
1554 |
"iso_1_code": "ms",
|
1555 |
"iso_3_code": "coa",
|
1556 |
-
"tokenizers": {
|
1557 |
-
"Latn": {
|
1558 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1559 |
-
"original_lang_name": "malay",
|
1560 |
-
"original_lang_code": "msa",
|
1561 |
-
"scripts": [
|
1562 |
-
"Latn",
|
1563 |
-
"Arab",
|
1564 |
-
"Thai"
|
1565 |
-
],
|
1566 |
-
"class_name": "SpaCyTokenizer",
|
1567 |
-
"macrolanguage": true
|
1568 |
-
},
|
1569 |
-
"Arab": {
|
1570 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1571 |
-
"original_lang_name": "malay",
|
1572 |
-
"original_lang_code": "msa",
|
1573 |
-
"scripts": [
|
1574 |
-
"Latn",
|
1575 |
-
"Arab",
|
1576 |
-
"Thai"
|
1577 |
-
],
|
1578 |
-
"class_name": "SpaCyTokenizer",
|
1579 |
-
"macrolanguage": true
|
1580 |
-
},
|
1581 |
-
"Thai": {
|
1582 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1583 |
-
"original_lang_name": "malay",
|
1584 |
-
"original_lang_code": "msa",
|
1585 |
-
"scripts": [
|
1586 |
-
"Latn",
|
1587 |
-
"Arab",
|
1588 |
-
"Thai"
|
1589 |
-
],
|
1590 |
-
"class_name": "SpaCyTokenizer",
|
1591 |
-
"macrolanguage": true
|
1592 |
-
}
|
1593 |
-
},
|
1594 |
"children": [],
|
|
|
1595 |
"node_i": "3567",
|
1596 |
-
"
|
1597 |
-
"
|
1598 |
},
|
1599 |
{
|
1600 |
"name": "Malay, Larantuka",
|
1601 |
"iso_1_code": null,
|
1602 |
"iso_3_code": "lrt",
|
1603 |
-
"tokenizers": {},
|
1604 |
"children": [],
|
|
|
1605 |
"node_i": "3568",
|
1606 |
-
"
|
1607 |
-
"
|
1608 |
},
|
1609 |
{
|
1610 |
"name": "Malay, North Moluccan",
|
1611 |
"iso_1_code": "ms",
|
1612 |
"iso_3_code": "max",
|
|
|
1613 |
"tokenizers": {
|
1614 |
"Latn": {
|
1615 |
-
"full_object": "
|
1616 |
-
"original_lang_name": "
|
1617 |
-
"original_lang_code": "
|
1618 |
-
"
|
1619 |
-
|
1620 |
-
"Arab",
|
1621 |
-
"Thai"
|
1622 |
-
],
|
1623 |
-
"class_name": "SpaCyTokenizer",
|
1624 |
-
"macrolanguage": true
|
1625 |
-
},
|
1626 |
-
"Arab": {
|
1627 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1628 |
-
"original_lang_name": "malay",
|
1629 |
-
"original_lang_code": "msa",
|
1630 |
-
"scripts": [
|
1631 |
-
"Latn",
|
1632 |
-
"Arab",
|
1633 |
-
"Thai"
|
1634 |
-
],
|
1635 |
-
"class_name": "SpaCyTokenizer",
|
1636 |
-
"macrolanguage": true
|
1637 |
-
},
|
1638 |
-
"Thai": {
|
1639 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1640 |
-
"original_lang_name": "malay",
|
1641 |
-
"original_lang_code": "msa",
|
1642 |
-
"scripts": [
|
1643 |
-
"Latn",
|
1644 |
-
"Arab",
|
1645 |
-
"Thai"
|
1646 |
-
],
|
1647 |
-
"class_name": "SpaCyTokenizer",
|
1648 |
-
"macrolanguage": true
|
1649 |
}
|
1650 |
},
|
1651 |
-
"children": [],
|
1652 |
"node_i": "3569",
|
|
|
1653 |
"scripts": [
|
1654 |
"Latn"
|
1655 |
-
]
|
1656 |
-
"own_tokenizer": true
|
1657 |
},
|
1658 |
{
|
1659 |
"name": "Malay, Baba",
|
1660 |
"iso_1_code": null,
|
1661 |
"iso_3_code": "mbf",
|
|
|
1662 |
"tokenizers": {
|
1663 |
"Latn": {
|
1664 |
-
"full_object": "
|
1665 |
-
"original_lang_name": "
|
1666 |
-
"original_lang_code": "
|
1667 |
-
"
|
1668 |
-
|
1669 |
-
"Arab",
|
1670 |
-
"Thai"
|
1671 |
-
],
|
1672 |
-
"class_name": "SpaCyTokenizer",
|
1673 |
-
"macrolanguage": true
|
1674 |
}
|
1675 |
},
|
1676 |
-
"children": [],
|
1677 |
"node_i": "3570",
|
|
|
1678 |
"scripts": [
|
1679 |
"Latn"
|
1680 |
-
]
|
1681 |
-
"own_tokenizer": false
|
1682 |
},
|
1683 |
{
|
1684 |
"name": "Malay, Balinese",
|
1685 |
"iso_1_code": null,
|
1686 |
"iso_3_code": "mhp",
|
1687 |
-
"tokenizers": {},
|
1688 |
"children": [],
|
|
|
1689 |
"node_i": "3571",
|
1690 |
-
"
|
1691 |
-
"
|
1692 |
},
|
1693 |
{
|
1694 |
"name": "Malay, Kupang",
|
1695 |
"iso_1_code": null,
|
1696 |
"iso_3_code": "mkn",
|
|
|
1697 |
"tokenizers": {
|
1698 |
"Latn": {
|
1699 |
-
"full_object": "
|
1700 |
-
"original_lang_name": "
|
1701 |
-
"original_lang_code": "
|
1702 |
-
"
|
1703 |
-
|
1704 |
-
"Arab",
|
1705 |
-
"Thai"
|
1706 |
-
],
|
1707 |
-
"class_name": "SpaCyTokenizer",
|
1708 |
-
"macrolanguage": true
|
1709 |
}
|
1710 |
},
|
1711 |
-
"children": [],
|
1712 |
"node_i": "3572",
|
|
|
1713 |
"scripts": [
|
1714 |
"Latn"
|
1715 |
-
]
|
1716 |
-
"own_tokenizer": false
|
1717 |
},
|
1718 |
{
|
1719 |
"name": "Indonesian, Peranakan",
|
1720 |
"iso_1_code": null,
|
1721 |
"iso_3_code": "pea",
|
1722 |
-
"tokenizers": {},
|
1723 |
"children": [],
|
|
|
1724 |
"node_i": "3573",
|
1725 |
-
"
|
1726 |
-
"
|
1727 |
},
|
1728 |
{
|
1729 |
"name": "Malay, Papuan",
|
1730 |
"iso_1_code": null,
|
1731 |
"iso_3_code": "pmy",
|
1732 |
-
"tokenizers": {},
|
1733 |
"children": [],
|
|
|
1734 |
"node_i": "3574",
|
1735 |
-
"
|
1736 |
-
"
|
1737 |
},
|
1738 |
{
|
1739 |
"name": "Sri Lankan Malay Creole",
|
1740 |
"iso_1_code": null,
|
1741 |
"iso_3_code": "sci",
|
1742 |
-
"tokenizers": {},
|
1743 |
"children": [],
|
|
|
1744 |
"node_i": "3575",
|
1745 |
-
"
|
1746 |
-
"
|
1747 |
},
|
1748 |
{
|
1749 |
"name": "Malay, Manado",
|
1750 |
"iso_1_code": "ms",
|
1751 |
"iso_3_code": "xmm",
|
|
|
1752 |
"tokenizers": {
|
1753 |
"Latn": {
|
1754 |
-
"full_object": "
|
1755 |
-
"original_lang_name": "
|
1756 |
-
"original_lang_code": "
|
1757 |
-
"
|
1758 |
-
|
1759 |
-
"Arab",
|
1760 |
-
"Thai"
|
1761 |
-
],
|
1762 |
-
"class_name": "SpaCyTokenizer",
|
1763 |
-
"macrolanguage": true
|
1764 |
-
},
|
1765 |
-
"Arab": {
|
1766 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1767 |
-
"original_lang_name": "malay",
|
1768 |
-
"original_lang_code": "msa",
|
1769 |
-
"scripts": [
|
1770 |
-
"Latn",
|
1771 |
-
"Arab",
|
1772 |
-
"Thai"
|
1773 |
-
],
|
1774 |
-
"class_name": "SpaCyTokenizer",
|
1775 |
-
"macrolanguage": true
|
1776 |
-
},
|
1777 |
-
"Thai": {
|
1778 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1779 |
-
"original_lang_name": "malay",
|
1780 |
-
"original_lang_code": "msa",
|
1781 |
-
"scripts": [
|
1782 |
-
"Latn",
|
1783 |
-
"Arab",
|
1784 |
-
"Thai"
|
1785 |
-
],
|
1786 |
-
"class_name": "SpaCyTokenizer",
|
1787 |
-
"macrolanguage": true
|
1788 |
}
|
1789 |
},
|
1790 |
-
"children": [],
|
1791 |
"node_i": "3576",
|
|
|
1792 |
"scripts": [
|
1793 |
"Latn"
|
1794 |
-
]
|
1795 |
-
"own_tokenizer": true
|
1796 |
}
|
1797 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1798 |
"node_i": "3562",
|
1799 |
-
"
|
1800 |
-
"
|
1801 |
},
|
1802 |
{
|
1803 |
"name": "Ngbandi based",
|
1804 |
"iso_1_code": null,
|
1805 |
"iso_3_code": null,
|
1806 |
-
"tokenizers": {
|
1807 |
-
"Arab": {
|
1808 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1809 |
-
"original_lang_name": "malay",
|
1810 |
-
"original_lang_code": "msa",
|
1811 |
-
"scripts": [
|
1812 |
-
"Latn",
|
1813 |
-
"Arab",
|
1814 |
-
"Thai"
|
1815 |
-
],
|
1816 |
-
"class_name": "SpaCyTokenizer",
|
1817 |
-
"macrolanguage": true
|
1818 |
-
},
|
1819 |
-
"Latn": {
|
1820 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1821 |
-
"original_lang_name": "malay",
|
1822 |
-
"original_lang_code": "msa",
|
1823 |
-
"scripts": [
|
1824 |
-
"Latn",
|
1825 |
-
"Arab",
|
1826 |
-
"Thai"
|
1827 |
-
],
|
1828 |
-
"class_name": "SpaCyTokenizer",
|
1829 |
-
"macrolanguage": true
|
1830 |
-
},
|
1831 |
-
"Thai": {
|
1832 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1833 |
-
"original_lang_name": "malay",
|
1834 |
-
"original_lang_code": "msa",
|
1835 |
-
"scripts": [
|
1836 |
-
"Latn",
|
1837 |
-
"Arab",
|
1838 |
-
"Thai"
|
1839 |
-
],
|
1840 |
-
"class_name": "SpaCyTokenizer",
|
1841 |
-
"macrolanguage": true
|
1842 |
-
}
|
1843 |
-
},
|
1844 |
"children": [
|
1845 |
{
|
1846 |
"name": "Sango",
|
1847 |
"iso_1_code": "sg",
|
1848 |
"iso_3_code": "sag",
|
|
|
1849 |
"tokenizers": {
|
1850 |
"Latn": {
|
1851 |
-
"full_object": "
|
1852 |
-
"original_lang_name": "
|
1853 |
-
"original_lang_code": "
|
1854 |
-
"
|
1855 |
-
|
1856 |
-
"Arab",
|
1857 |
-
"Thai"
|
1858 |
-
],
|
1859 |
-
"class_name": "SpaCyTokenizer",
|
1860 |
-
"macrolanguage": true
|
1861 |
}
|
1862 |
},
|
1863 |
-
"children": [],
|
1864 |
"node_i": "3578",
|
|
|
1865 |
"scripts": [
|
1866 |
"Latn"
|
1867 |
-
]
|
1868 |
-
"own_tokenizer": false
|
1869 |
},
|
1870 |
{
|
1871 |
"name": "Sango, Riverain",
|
1872 |
"iso_1_code": null,
|
1873 |
"iso_3_code": "snj",
|
1874 |
-
"tokenizers": {},
|
1875 |
"children": [],
|
|
|
1876 |
"node_i": "3579",
|
1877 |
-
"
|
1878 |
-
"
|
1879 |
}
|
1880 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1881 |
"node_i": "3577",
|
1882 |
-
"
|
1883 |
-
"
|
1884 |
},
|
1885 |
{
|
1886 |
"name": "Portuguese based",
|
1887 |
"iso_1_code": null,
|
1888 |
"iso_3_code": null,
|
1889 |
-
"tokenizers": {
|
1890 |
-
"Arab": {
|
1891 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1892 |
-
"original_lang_name": "malay",
|
1893 |
-
"original_lang_code": "msa",
|
1894 |
-
"scripts": [
|
1895 |
-
"Latn",
|
1896 |
-
"Arab",
|
1897 |
-
"Thai"
|
1898 |
-
],
|
1899 |
-
"class_name": "SpaCyTokenizer",
|
1900 |
-
"macrolanguage": true
|
1901 |
-
},
|
1902 |
-
"Latn": {
|
1903 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1904 |
-
"original_lang_name": "malay",
|
1905 |
-
"original_lang_code": "msa",
|
1906 |
-
"scripts": [
|
1907 |
-
"Latn",
|
1908 |
-
"Arab",
|
1909 |
-
"Thai"
|
1910 |
-
],
|
1911 |
-
"class_name": "SpaCyTokenizer",
|
1912 |
-
"macrolanguage": true
|
1913 |
-
},
|
1914 |
-
"Thai": {
|
1915 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
1916 |
-
"original_lang_name": "malay",
|
1917 |
-
"original_lang_code": "msa",
|
1918 |
-
"scripts": [
|
1919 |
-
"Latn",
|
1920 |
-
"Arab",
|
1921 |
-
"Thai"
|
1922 |
-
],
|
1923 |
-
"class_name": "SpaCyTokenizer",
|
1924 |
-
"macrolanguage": true
|
1925 |
-
}
|
1926 |
-
},
|
1927 |
"children": [
|
1928 |
{
|
1929 |
"name": "Angolar",
|
1930 |
"iso_1_code": null,
|
1931 |
"iso_3_code": "aoa",
|
1932 |
-
"tokenizers": {},
|
1933 |
"children": [],
|
|
|
1934 |
"node_i": "3581",
|
1935 |
-
"
|
1936 |
-
"
|
1937 |
},
|
1938 |
{
|
1939 |
"name": "Cafundo Creole",
|
1940 |
"iso_1_code": null,
|
1941 |
"iso_3_code": "ccd",
|
1942 |
-
"tokenizers": {},
|
1943 |
"children": [],
|
|
|
1944 |
"node_i": "3582",
|
1945 |
-
"
|
1946 |
-
"
|
1947 |
},
|
1948 |
{
|
1949 |
"name": "S\u00e3otomense",
|
1950 |
"iso_1_code": null,
|
1951 |
"iso_3_code": "cri",
|
|
|
1952 |
"tokenizers": {
|
1953 |
"Latn": {
|
1954 |
-
"full_object": "
|
1955 |
-
"original_lang_name": "
|
1956 |
-
"original_lang_code": "
|
1957 |
-
"
|
1958 |
-
|
1959 |
-
"Arab",
|
1960 |
-
"Thai"
|
1961 |
-
],
|
1962 |
-
"class_name": "SpaCyTokenizer",
|
1963 |
-
"macrolanguage": true
|
1964 |
}
|
1965 |
},
|
1966 |
-
"children": [],
|
1967 |
"node_i": "3583",
|
|
|
1968 |
"scripts": [
|
1969 |
"Latn"
|
1970 |
-
]
|
1971 |
-
"own_tokenizer": false
|
1972 |
},
|
1973 |
{
|
1974 |
"name": "Fa d\u2019Ambu",
|
1975 |
"iso_1_code": null,
|
1976 |
"iso_3_code": "fab",
|
1977 |
-
"tokenizers": {},
|
1978 |
"children": [],
|
|
|
1979 |
"node_i": "3584",
|
1980 |
-
"
|
1981 |
-
"
|
1982 |
},
|
1983 |
{
|
1984 |
"name": "Indo-Portuguese",
|
1985 |
"iso_1_code": null,
|
1986 |
"iso_3_code": "idb",
|
1987 |
-
"tokenizers": {},
|
1988 |
"children": [],
|
|
|
1989 |
"node_i": "3585",
|
1990 |
-
"
|
1991 |
-
"
|
1992 |
},
|
1993 |
{
|
1994 |
"name": "Kabuverdianu",
|
1995 |
"iso_1_code": null,
|
1996 |
"iso_3_code": "kea",
|
|
|
1997 |
"tokenizers": {
|
1998 |
"Latn": {
|
1999 |
-
"full_object": "
|
2000 |
-
"original_lang_name": "
|
2001 |
-
"original_lang_code": "
|
2002 |
-
"
|
2003 |
-
|
2004 |
-
"Arab",
|
2005 |
-
"Thai"
|
2006 |
-
],
|
2007 |
-
"class_name": "SpaCyTokenizer",
|
2008 |
-
"macrolanguage": true
|
2009 |
}
|
2010 |
},
|
2011 |
-
"children": [],
|
2012 |
"node_i": "3586",
|
|
|
2013 |
"scripts": [
|
2014 |
"Latn"
|
2015 |
-
]
|
2016 |
-
"own_tokenizer": false
|
2017 |
},
|
2018 |
{
|
2019 |
"name": "Malaccan Portuguese Creole",
|
2020 |
"iso_1_code": null,
|
2021 |
"iso_3_code": "mcm",
|
2022 |
-
"tokenizers": {},
|
2023 |
"children": [],
|
|
|
2024 |
"node_i": "3587",
|
2025 |
-
"
|
2026 |
-
"
|
2027 |
},
|
2028 |
{
|
2029 |
"name": "Macanese",
|
2030 |
"iso_1_code": null,
|
2031 |
"iso_3_code": "mzs",
|
2032 |
-
"tokenizers": {},
|
2033 |
"children": [],
|
|
|
2034 |
"node_i": "3588",
|
2035 |
-
"
|
2036 |
-
"
|
2037 |
},
|
2038 |
{
|
2039 |
"name": "Guinea-Bissau Creole",
|
2040 |
"iso_1_code": null,
|
2041 |
"iso_3_code": "pov",
|
|
|
2042 |
"tokenizers": {
|
2043 |
"Latn": {
|
2044 |
-
"full_object": "
|
2045 |
-
"original_lang_name": "
|
2046 |
-
"original_lang_code": "
|
2047 |
-
"
|
2048 |
-
|
2049 |
-
"Arab",
|
2050 |
-
"Thai"
|
2051 |
-
],
|
2052 |
-
"class_name": "SpaCyTokenizer",
|
2053 |
-
"macrolanguage": true
|
2054 |
}
|
2055 |
},
|
2056 |
-
"children": [],
|
2057 |
"node_i": "3589",
|
|
|
2058 |
"scripts": [
|
2059 |
"Latn"
|
2060 |
-
]
|
2061 |
-
"own_tokenizer": false
|
2062 |
},
|
2063 |
{
|
2064 |
"name": "Principense",
|
2065 |
"iso_1_code": null,
|
2066 |
"iso_3_code": "pre",
|
2067 |
-
"tokenizers": {},
|
2068 |
"children": [],
|
|
|
2069 |
"node_i": "3590",
|
2070 |
-
"
|
2071 |
-
"
|
2072 |
},
|
2073 |
{
|
2074 |
"name": "Ternate\u00f1o",
|
2075 |
"iso_1_code": null,
|
2076 |
"iso_3_code": "tmg",
|
2077 |
-
"tokenizers": {},
|
2078 |
"children": [],
|
|
|
2079 |
"node_i": "3591",
|
2080 |
-
"
|
2081 |
-
"
|
2082 |
},
|
2083 |
{
|
2084 |
"name": "Pidgin, Timor",
|
2085 |
"iso_1_code": null,
|
2086 |
"iso_3_code": "tvy",
|
2087 |
-
"tokenizers": {},
|
2088 |
"children": [],
|
|
|
2089 |
"node_i": "3592",
|
2090 |
-
"
|
2091 |
-
"
|
2092 |
},
|
2093 |
{
|
2094 |
"name": "Korlai Portuguese Creole",
|
2095 |
"iso_1_code": null,
|
2096 |
"iso_3_code": "vkp",
|
2097 |
-
"tokenizers": {},
|
2098 |
"children": [],
|
|
|
2099 |
"node_i": "3593",
|
2100 |
-
"
|
2101 |
-
"
|
2102 |
}
|
2103 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2104 |
"node_i": "3580",
|
2105 |
-
"
|
2106 |
-
"
|
2107 |
},
|
2108 |
{
|
2109 |
"name": "Spanish based",
|
2110 |
"iso_1_code": null,
|
2111 |
"iso_3_code": null,
|
2112 |
-
"tokenizers": {
|
2113 |
-
"Arab": {
|
2114 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
2115 |
-
"original_lang_name": "malay",
|
2116 |
-
"original_lang_code": "msa",
|
2117 |
-
"scripts": [
|
2118 |
-
"Latn",
|
2119 |
-
"Arab",
|
2120 |
-
"Thai"
|
2121 |
-
],
|
2122 |
-
"class_name": "SpaCyTokenizer",
|
2123 |
-
"macrolanguage": true
|
2124 |
-
},
|
2125 |
-
"Latn": {
|
2126 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
2127 |
-
"original_lang_name": "malay",
|
2128 |
-
"original_lang_code": "msa",
|
2129 |
-
"scripts": [
|
2130 |
-
"Latn",
|
2131 |
-
"Arab",
|
2132 |
-
"Thai"
|
2133 |
-
],
|
2134 |
-
"class_name": "SpaCyTokenizer",
|
2135 |
-
"macrolanguage": true
|
2136 |
-
},
|
2137 |
-
"Thai": {
|
2138 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
2139 |
-
"original_lang_name": "malay",
|
2140 |
-
"original_lang_code": "msa",
|
2141 |
-
"scripts": [
|
2142 |
-
"Latn",
|
2143 |
-
"Arab",
|
2144 |
-
"Thai"
|
2145 |
-
],
|
2146 |
-
"class_name": "SpaCyTokenizer",
|
2147 |
-
"macrolanguage": true
|
2148 |
-
}
|
2149 |
-
},
|
2150 |
"children": [
|
2151 |
{
|
2152 |
"name": "Chavacano",
|
2153 |
"iso_1_code": null,
|
2154 |
"iso_3_code": "cbk",
|
|
|
2155 |
"tokenizers": {
|
2156 |
"Latn": {
|
2157 |
-
"full_object": "
|
2158 |
-
"original_lang_name": "
|
2159 |
-
"original_lang_code": "
|
2160 |
-
"
|
2161 |
-
|
2162 |
-
"Arab",
|
2163 |
-
"Thai"
|
2164 |
-
],
|
2165 |
-
"class_name": "SpaCyTokenizer",
|
2166 |
-
"macrolanguage": true
|
2167 |
}
|
2168 |
},
|
2169 |
-
"children": [],
|
2170 |
"node_i": "3595",
|
|
|
2171 |
"scripts": [
|
2172 |
"Latn"
|
2173 |
-
]
|
2174 |
-
"own_tokenizer": false
|
2175 |
},
|
2176 |
{
|
2177 |
"name": "Palenquero",
|
2178 |
"iso_1_code": null,
|
2179 |
"iso_3_code": "pln",
|
2180 |
-
"tokenizers": {},
|
2181 |
"children": [],
|
|
|
2182 |
"node_i": "3596",
|
2183 |
-
"
|
2184 |
-
"
|
2185 |
}
|
2186 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2187 |
"node_i": "3594",
|
2188 |
-
"
|
2189 |
-
"
|
2190 |
},
|
2191 |
{
|
2192 |
"name": "Swahili based",
|
2193 |
"iso_1_code": null,
|
2194 |
"iso_3_code": null,
|
2195 |
-
"tokenizers": {},
|
2196 |
"children": [
|
2197 |
{
|
2198 |
"name": "Cutchi-Swahili",
|
2199 |
"iso_1_code": null,
|
2200 |
"iso_3_code": "ccl",
|
2201 |
-
"tokenizers": {},
|
2202 |
"children": [],
|
|
|
2203 |
"node_i": "3598",
|
2204 |
-
"
|
2205 |
-
"
|
2206 |
}
|
2207 |
],
|
|
|
2208 |
"node_i": "3597",
|
2209 |
-
"
|
2210 |
-
"
|
2211 |
},
|
2212 |
{
|
2213 |
"name": "Tetun based",
|
2214 |
"iso_1_code": null,
|
2215 |
"iso_3_code": null,
|
2216 |
-
"tokenizers": {
|
2217 |
-
"Arab": {
|
2218 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
2219 |
-
"original_lang_name": "malay",
|
2220 |
-
"original_lang_code": "msa",
|
2221 |
-
"scripts": [
|
2222 |
-
"Latn",
|
2223 |
-
"Arab",
|
2224 |
-
"Thai"
|
2225 |
-
],
|
2226 |
-
"class_name": "SpaCyTokenizer",
|
2227 |
-
"macrolanguage": true
|
2228 |
-
},
|
2229 |
-
"Latn": {
|
2230 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
2231 |
-
"original_lang_name": "malay",
|
2232 |
-
"original_lang_code": "msa",
|
2233 |
-
"scripts": [
|
2234 |
-
"Latn",
|
2235 |
-
"Arab",
|
2236 |
-
"Thai"
|
2237 |
-
],
|
2238 |
-
"class_name": "SpaCyTokenizer",
|
2239 |
-
"macrolanguage": true
|
2240 |
-
},
|
2241 |
-
"Thai": {
|
2242 |
-
"full_object": "SpaCyTokenizer(\"ms\")",
|
2243 |
-
"original_lang_name": "malay",
|
2244 |
-
"original_lang_code": "msa",
|
2245 |
-
"scripts": [
|
2246 |
-
"Latn",
|
2247 |
-
"Arab",
|
2248 |
-
"Thai"
|
2249 |
-
],
|
2250 |
-
"class_name": "SpaCyTokenizer",
|
2251 |
-
"macrolanguage": true
|
2252 |
-
}
|
2253 |
-
},
|
2254 |
"children": [
|
2255 |
{
|
2256 |
"name": "Tetun Dili",
|
2257 |
"iso_1_code": null,
|
2258 |
"iso_3_code": "tdt",
|
|
|
2259 |
"tokenizers": {
|
2260 |
"Latn": {
|
2261 |
-
"full_object": "
|
2262 |
-
"original_lang_name": "
|
2263 |
-
"original_lang_code": "
|
2264 |
-
"
|
2265 |
-
|
2266 |
-
"Arab",
|
2267 |
-
"Thai"
|
2268 |
-
],
|
2269 |
-
"class_name": "SpaCyTokenizer",
|
2270 |
-
"macrolanguage": true
|
2271 |
}
|
2272 |
},
|
2273 |
-
"children": [],
|
2274 |
"node_i": "3600",
|
|
|
2275 |
"scripts": [
|
2276 |
"Latn"
|
2277 |
-
]
|
2278 |
-
"own_tokenizer": false
|
2279 |
}
|
2280 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2281 |
"node_i": "3599",
|
2282 |
-
"
|
2283 |
-
"
|
2284 |
}
|
2285 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2286 |
"node_i": "3481",
|
2287 |
-
"
|
2288 |
-
"
|
2289 |
}
|
|
|
2 |
"name": "Creole",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Afrikaans based",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Flaaitaal",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "fly",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3483",
|
18 |
+
"native_tokenizers": [],
|
19 |
+
"scripts": []
|
20 |
},
|
21 |
{
|
22 |
"name": "Oorlams",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": "oor",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3484",
|
28 |
+
"native_tokenizers": [],
|
29 |
+
"scripts": []
|
30 |
}
|
31 |
],
|
32 |
+
"tokenizers": {},
|
33 |
"node_i": "3482",
|
34 |
+
"native_tokenizers": [],
|
35 |
+
"scripts": []
|
36 |
},
|
37 |
{
|
38 |
"name": "Arabic based",
|
39 |
"iso_1_code": null,
|
40 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
"children": [
|
42 |
{
|
43 |
"name": "Nubi",
|
44 |
"iso_1_code": null,
|
45 |
"iso_3_code": "kcn",
|
|
|
46 |
"children": [],
|
47 |
+
"tokenizers": {},
|
48 |
"node_i": "3486",
|
49 |
+
"native_tokenizers": [],
|
50 |
+
"scripts": []
|
51 |
},
|
52 |
{
|
53 |
"name": "Arabic, Juba",
|
54 |
"iso_1_code": "ar",
|
55 |
"iso_3_code": "pga",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
"children": [],
|
57 |
+
"tokenizers": {},
|
58 |
"node_i": "3487",
|
59 |
+
"native_tokenizers": [],
|
60 |
+
"scripts": []
|
61 |
}
|
62 |
],
|
63 |
+
"tokenizers": {},
|
64 |
"node_i": "3485",
|
65 |
+
"native_tokenizers": [],
|
66 |
+
"scripts": []
|
67 |
},
|
68 |
{
|
69 |
"name": "Assamese based",
|
70 |
"iso_1_code": null,
|
71 |
"iso_3_code": null,
|
|
|
72 |
"children": [
|
73 |
{
|
74 |
"name": "Nagamese",
|
75 |
"iso_1_code": null,
|
76 |
"iso_3_code": "nag",
|
|
|
77 |
"children": [],
|
78 |
+
"tokenizers": {},
|
79 |
"node_i": "3489",
|
80 |
+
"native_tokenizers": [],
|
81 |
+
"scripts": []
|
82 |
}
|
83 |
],
|
84 |
+
"tokenizers": {},
|
85 |
"node_i": "3488",
|
86 |
+
"native_tokenizers": [],
|
87 |
+
"scripts": []
|
88 |
},
|
89 |
{
|
90 |
"name": "Dutch based",
|
91 |
"iso_1_code": null,
|
92 |
"iso_3_code": null,
|
|
|
93 |
"children": [
|
94 |
{
|
95 |
"name": "Berbice Dutch Creole",
|
96 |
"iso_1_code": null,
|
97 |
"iso_3_code": "brc",
|
|
|
98 |
"children": [],
|
99 |
+
"tokenizers": {},
|
100 |
"node_i": "3491",
|
101 |
+
"native_tokenizers": [],
|
102 |
+
"scripts": []
|
103 |
},
|
104 |
{
|
105 |
"name": "Negerhollands",
|
106 |
"iso_1_code": null,
|
107 |
"iso_3_code": "dcr",
|
|
|
108 |
"children": [],
|
109 |
+
"tokenizers": {},
|
110 |
"node_i": "3492",
|
111 |
+
"native_tokenizers": [],
|
112 |
+
"scripts": []
|
113 |
},
|
114 |
{
|
115 |
"name": "Javindo",
|
116 |
"iso_1_code": null,
|
117 |
"iso_3_code": "jvd",
|
|
|
118 |
"children": [],
|
119 |
+
"tokenizers": {},
|
120 |
"node_i": "3493",
|
121 |
+
"native_tokenizers": [],
|
122 |
+
"scripts": []
|
123 |
},
|
124 |
{
|
125 |
"name": "Petjo",
|
126 |
"iso_1_code": null,
|
127 |
"iso_3_code": "pey",
|
|
|
128 |
"children": [],
|
129 |
+
"tokenizers": {},
|
130 |
"node_i": "3494",
|
131 |
+
"native_tokenizers": [],
|
132 |
+
"scripts": []
|
133 |
},
|
134 |
{
|
135 |
"name": "Skepi Dutch Creole",
|
136 |
"iso_1_code": null,
|
137 |
"iso_3_code": "skw",
|
|
|
138 |
"children": [],
|
139 |
+
"tokenizers": {},
|
140 |
"node_i": "3495",
|
141 |
+
"native_tokenizers": [],
|
142 |
+
"scripts": []
|
143 |
}
|
144 |
],
|
145 |
+
"tokenizers": {},
|
146 |
"node_i": "3490",
|
147 |
+
"native_tokenizers": [],
|
148 |
+
"scripts": []
|
149 |
},
|
150 |
{
|
151 |
"name": "English based",
|
152 |
"iso_1_code": null,
|
153 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
"children": [
|
155 |
{
|
156 |
"name": "Saramaccan",
|
157 |
"iso_1_code": null,
|
158 |
"iso_3_code": "srm",
|
159 |
+
"children": [],
|
160 |
"tokenizers": {
|
161 |
"Latn": {
|
162 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
163 |
"original_lang_name": "nigerian_pidgin",
|
164 |
"original_lang_code": "pcm",
|
165 |
+
"script": "Latn",
|
166 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
167 |
}
|
168 |
},
|
|
|
169 |
"node_i": "3497",
|
170 |
+
"native_tokenizers": [],
|
171 |
"scripts": [
|
172 |
"Latn"
|
173 |
+
]
|
|
|
174 |
},
|
175 |
{
|
176 |
"name": "Atlantic",
|
177 |
"iso_1_code": null,
|
178 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
"children": [
|
180 |
{
|
181 |
"name": "Eastern",
|
182 |
"iso_1_code": null,
|
183 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
"children": [
|
185 |
{
|
186 |
"name": "Turks and Caicos English Creole",
|
187 |
"iso_1_code": null,
|
188 |
"iso_3_code": "tch",
|
|
|
189 |
"children": [],
|
190 |
+
"tokenizers": {},
|
191 |
"node_i": "3500",
|
192 |
+
"native_tokenizers": [],
|
193 |
+
"scripts": []
|
194 |
},
|
195 |
{
|
196 |
"name": "Northern",
|
197 |
"iso_1_code": null,
|
198 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
"children": [
|
200 |
{
|
201 |
"name": "Afro-Seminole Creole",
|
202 |
"iso_1_code": null,
|
203 |
"iso_3_code": "afs",
|
|
|
204 |
"children": [],
|
205 |
+
"tokenizers": {},
|
206 |
"node_i": "3502",
|
207 |
+
"native_tokenizers": [],
|
208 |
+
"scripts": []
|
209 |
},
|
210 |
{
|
211 |
"name": "Bahamas English Creole",
|
212 |
"iso_1_code": null,
|
213 |
"iso_3_code": "bah",
|
|
|
214 |
"children": [],
|
215 |
+
"tokenizers": {},
|
216 |
"node_i": "3503",
|
217 |
+
"native_tokenizers": [],
|
218 |
+
"scripts": []
|
219 |
},
|
220 |
{
|
221 |
"name": "Sea Island English Creole",
|
222 |
"iso_1_code": null,
|
223 |
"iso_3_code": "gul",
|
224 |
+
"children": [],
|
225 |
"tokenizers": {
|
226 |
"Latn": {
|
227 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
228 |
"original_lang_name": "nigerian_pidgin",
|
229 |
"original_lang_code": "pcm",
|
230 |
+
"script": "Latn",
|
231 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
232 |
}
|
233 |
},
|
|
|
234 |
"node_i": "3504",
|
235 |
+
"native_tokenizers": [],
|
236 |
"scripts": [
|
237 |
"Latn"
|
238 |
+
]
|
|
|
239 |
}
|
240 |
],
|
241 |
+
"tokenizers": {
|
242 |
+
"Latn": {
|
243 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
244 |
+
"original_lang_name": "nigerian_pidgin",
|
245 |
+
"original_lang_code": "pcm",
|
246 |
+
"script": "Latn",
|
247 |
+
"class_name": "StanzaTokenizer"
|
248 |
+
}
|
249 |
+
},
|
250 |
"node_i": "3501",
|
251 |
+
"native_tokenizers": [],
|
252 |
+
"scripts": []
|
253 |
},
|
254 |
{
|
255 |
"name": "Southern",
|
256 |
"iso_1_code": null,
|
257 |
"iso_3_code": null,
|
|
|
258 |
"children": [
|
259 |
{
|
260 |
"name": "Leeward Caribbean English Creole",
|
261 |
"iso_1_code": null,
|
262 |
"iso_3_code": "aig",
|
|
|
263 |
"children": [],
|
264 |
+
"tokenizers": {},
|
265 |
"node_i": "3506",
|
266 |
+
"native_tokenizers": [],
|
267 |
+
"scripts": []
|
268 |
},
|
269 |
{
|
270 |
"name": "Bajan",
|
271 |
"iso_1_code": null,
|
272 |
"iso_3_code": "bjs",
|
|
|
273 |
"children": [],
|
274 |
+
"tokenizers": {},
|
275 |
"node_i": "3507",
|
276 |
+
"native_tokenizers": [],
|
277 |
+
"scripts": []
|
278 |
},
|
279 |
{
|
280 |
"name": "Grenadian English Creole",
|
281 |
"iso_1_code": null,
|
282 |
"iso_3_code": "gcl",
|
|
|
283 |
"children": [],
|
284 |
+
"tokenizers": {},
|
285 |
"node_i": "3508",
|
286 |
+
"native_tokenizers": [],
|
287 |
+
"scripts": []
|
288 |
},
|
289 |
{
|
290 |
"name": "Guyanese English Creole",
|
291 |
"iso_1_code": null,
|
292 |
"iso_3_code": "gyn",
|
|
|
293 |
"children": [],
|
294 |
+
"tokenizers": {},
|
295 |
"node_i": "3509",
|
296 |
+
"native_tokenizers": [],
|
297 |
+
"scripts": []
|
298 |
},
|
299 |
{
|
300 |
"name": "Vincentian English Creole",
|
301 |
"iso_1_code": null,
|
302 |
"iso_3_code": "svc",
|
|
|
303 |
"children": [],
|
304 |
+
"tokenizers": {},
|
305 |
"node_i": "3510",
|
306 |
+
"native_tokenizers": [],
|
307 |
+
"scripts": []
|
308 |
},
|
309 |
{
|
310 |
"name": "Tobagonian English Creole",
|
311 |
"iso_1_code": null,
|
312 |
"iso_3_code": "tgh",
|
|
|
313 |
"children": [],
|
314 |
+
"tokenizers": {},
|
315 |
"node_i": "3511",
|
316 |
+
"native_tokenizers": [],
|
317 |
+
"scripts": []
|
318 |
},
|
319 |
{
|
320 |
"name": "Trinidadian English Creole",
|
321 |
"iso_1_code": null,
|
322 |
"iso_3_code": "trf",
|
|
|
323 |
"children": [],
|
324 |
+
"tokenizers": {},
|
325 |
"node_i": "3512",
|
326 |
+
"native_tokenizers": [],
|
327 |
+
"scripts": []
|
328 |
},
|
329 |
{
|
330 |
"name": "Virgin Islands English Creole",
|
331 |
"iso_1_code": null,
|
332 |
"iso_3_code": "vic",
|
|
|
333 |
"children": [],
|
334 |
+
"tokenizers": {},
|
335 |
"node_i": "3513",
|
336 |
+
"native_tokenizers": [],
|
337 |
+
"scripts": []
|
338 |
}
|
339 |
],
|
340 |
+
"tokenizers": {},
|
341 |
"node_i": "3505",
|
342 |
+
"native_tokenizers": [],
|
343 |
+
"scripts": []
|
344 |
}
|
345 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
"tokenizers": {
|
347 |
"Latn": {
|
348 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
349 |
"original_lang_name": "nigerian_pidgin",
|
350 |
"original_lang_code": "pcm",
|
351 |
+
"script": "Latn",
|
352 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
353 |
}
|
354 |
},
|
355 |
+
"node_i": "3499",
|
356 |
+
"native_tokenizers": [],
|
357 |
+
"scripts": []
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"name": "Krio",
|
361 |
+
"iso_1_code": null,
|
362 |
+
"iso_3_code": null,
|
363 |
"children": [
|
364 |
{
|
365 |
"name": "Equatorial Guinean Pidgin",
|
366 |
"iso_1_code": null,
|
367 |
"iso_3_code": "fpe",
|
|
|
368 |
"children": [],
|
369 |
+
"tokenizers": {},
|
370 |
"node_i": "3515",
|
371 |
+
"native_tokenizers": [],
|
372 |
+
"scripts": []
|
373 |
},
|
374 |
{
|
375 |
"name": "Ghanaian Pidgin English",
|
376 |
"iso_1_code": null,
|
377 |
"iso_3_code": "gpe",
|
|
|
378 |
"children": [],
|
379 |
+
"tokenizers": {},
|
380 |
"node_i": "3516",
|
381 |
+
"native_tokenizers": [],
|
382 |
+
"scripts": []
|
383 |
},
|
384 |
{
|
385 |
"name": "Krio",
|
386 |
"iso_1_code": null,
|
387 |
"iso_3_code": "kri",
|
388 |
+
"children": [],
|
389 |
"tokenizers": {
|
390 |
"Latn": {
|
391 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
392 |
"original_lang_name": "nigerian_pidgin",
|
393 |
"original_lang_code": "pcm",
|
394 |
+
"script": "Latn",
|
395 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
396 |
}
|
397 |
},
|
|
|
398 |
"node_i": "3517",
|
399 |
+
"native_tokenizers": [],
|
400 |
"scripts": [
|
401 |
"Latn"
|
402 |
+
]
|
|
|
403 |
},
|
404 |
{
|
405 |
"name": "Pidgin, Nigerian",
|
406 |
"iso_1_code": null,
|
407 |
"iso_3_code": "pcm",
|
408 |
+
"children": [],
|
409 |
"tokenizers": {
|
410 |
"Latn": {
|
411 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
412 |
"original_lang_name": "nigerian_pidgin",
|
413 |
"original_lang_code": "pcm",
|
414 |
+
"script": "Latn",
|
415 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
416 |
}
|
417 |
},
|
|
|
418 |
"node_i": "3518",
|
419 |
+
"native_tokenizers": [
|
420 |
"Latn"
|
421 |
],
|
422 |
+
"scripts": [
|
423 |
+
"Latn"
|
424 |
+
]
|
425 |
},
|
426 |
{
|
427 |
"name": "Pidgin, Cameroon",
|
428 |
"iso_1_code": null,
|
429 |
"iso_3_code": "wes",
|
430 |
+
"children": [],
|
431 |
"tokenizers": {
|
432 |
"Latn": {
|
433 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
434 |
"original_lang_name": "nigerian_pidgin",
|
435 |
"original_lang_code": "pcm",
|
436 |
+
"script": "Latn",
|
437 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
438 |
}
|
439 |
},
|
|
|
440 |
"node_i": "3519",
|
441 |
+
"native_tokenizers": [],
|
442 |
"scripts": [
|
443 |
"Latn"
|
444 |
+
]
|
|
|
445 |
}
|
446 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
"tokenizers": {
|
448 |
"Latn": {
|
449 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
450 |
"original_lang_name": "nigerian_pidgin",
|
451 |
"original_lang_code": "pcm",
|
452 |
+
"script": "Latn",
|
453 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
454 |
}
|
455 |
},
|
456 |
+
"node_i": "3514",
|
457 |
+
"native_tokenizers": [],
|
458 |
+
"scripts": []
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"name": "Suriname",
|
462 |
+
"iso_1_code": null,
|
463 |
+
"iso_3_code": null,
|
464 |
"children": [
|
465 |
{
|
466 |
"name": "Sranan Tongo",
|
467 |
"iso_1_code": null,
|
468 |
"iso_3_code": "srn",
|
469 |
+
"children": [],
|
470 |
"tokenizers": {
|
471 |
"Latn": {
|
472 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
473 |
"original_lang_name": "nigerian_pidgin",
|
474 |
"original_lang_code": "pcm",
|
475 |
+
"script": "Latn",
|
476 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
477 |
}
|
478 |
},
|
|
|
479 |
"node_i": "3521",
|
480 |
+
"native_tokenizers": [],
|
481 |
"scripts": [
|
482 |
"Latn"
|
483 |
+
]
|
|
|
484 |
},
|
485 |
{
|
486 |
"name": "Ndyuka",
|
487 |
"iso_1_code": null,
|
488 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
489 |
"children": [
|
490 |
{
|
491 |
"name": "Aukan",
|
492 |
"iso_1_code": null,
|
493 |
"iso_3_code": "djk",
|
494 |
+
"children": [],
|
495 |
"tokenizers": {
|
496 |
"Latn": {
|
497 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
498 |
"original_lang_name": "nigerian_pidgin",
|
499 |
"original_lang_code": "pcm",
|
500 |
+
"script": "Latn",
|
501 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
502 |
}
|
503 |
},
|
|
|
504 |
"node_i": "3523",
|
505 |
+
"native_tokenizers": [],
|
506 |
"scripts": [
|
507 |
"Latn"
|
508 |
+
]
|
|
|
509 |
},
|
510 |
{
|
511 |
"name": "Kwinti",
|
512 |
"iso_1_code": null,
|
513 |
"iso_3_code": "kww",
|
|
|
514 |
"children": [],
|
515 |
+
"tokenizers": {},
|
516 |
"node_i": "3524",
|
517 |
+
"native_tokenizers": [],
|
518 |
+
"scripts": []
|
519 |
}
|
520 |
],
|
521 |
+
"tokenizers": {
|
522 |
+
"Latn": {
|
523 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
524 |
+
"original_lang_name": "nigerian_pidgin",
|
525 |
+
"original_lang_code": "pcm",
|
526 |
+
"script": "Latn",
|
527 |
+
"class_name": "StanzaTokenizer"
|
528 |
+
}
|
529 |
+
},
|
530 |
"node_i": "3522",
|
531 |
+
"native_tokenizers": [],
|
532 |
+
"scripts": []
|
533 |
}
|
534 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
535 |
"tokenizers": {
|
536 |
"Latn": {
|
537 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
538 |
"original_lang_name": "nigerian_pidgin",
|
539 |
"original_lang_code": "pcm",
|
540 |
+
"script": "Latn",
|
541 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
542 |
}
|
543 |
},
|
544 |
+
"node_i": "3520",
|
545 |
+
"native_tokenizers": [],
|
546 |
+
"scripts": []
|
547 |
+
},
|
548 |
+
{
|
549 |
+
"name": "Western",
|
550 |
+
"iso_1_code": null,
|
551 |
+
"iso_3_code": null,
|
552 |
"children": [
|
553 |
{
|
554 |
"name": "Belize English Creole",
|
555 |
"iso_1_code": null,
|
556 |
"iso_3_code": "bzj",
|
557 |
+
"children": [],
|
558 |
"tokenizers": {
|
559 |
"Latn": {
|
560 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
561 |
"original_lang_name": "nigerian_pidgin",
|
562 |
"original_lang_code": "pcm",
|
563 |
+
"script": "Latn",
|
564 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
565 |
}
|
566 |
},
|
|
|
567 |
"node_i": "3526",
|
568 |
+
"native_tokenizers": [],
|
569 |
"scripts": [
|
570 |
"Latn"
|
571 |
+
]
|
|
|
572 |
},
|
573 |
{
|
574 |
"name": "Nicaragua English Creole",
|
575 |
"iso_1_code": null,
|
576 |
"iso_3_code": "bzk",
|
|
|
577 |
"children": [],
|
578 |
+
"tokenizers": {},
|
579 |
"node_i": "3527",
|
580 |
+
"native_tokenizers": [],
|
581 |
+
"scripts": []
|
582 |
},
|
583 |
{
|
584 |
"name": "Islander English Creole",
|
585 |
"iso_1_code": null,
|
586 |
"iso_3_code": "icr",
|
587 |
+
"children": [],
|
588 |
"tokenizers": {
|
589 |
"Latn": {
|
590 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
591 |
"original_lang_name": "nigerian_pidgin",
|
592 |
"original_lang_code": "pcm",
|
593 |
+
"script": "Latn",
|
594 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
595 |
}
|
596 |
},
|
|
|
597 |
"node_i": "3528",
|
598 |
+
"native_tokenizers": [],
|
599 |
"scripts": [
|
600 |
"Latn"
|
601 |
+
]
|
|
|
602 |
},
|
603 |
{
|
604 |
"name": "Jamaican English Creole",
|
605 |
"iso_1_code": null,
|
606 |
"iso_3_code": "jam",
|
607 |
+
"children": [],
|
608 |
"tokenizers": {
|
609 |
"Latn": {
|
610 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
611 |
"original_lang_name": "nigerian_pidgin",
|
612 |
"original_lang_code": "pcm",
|
613 |
+
"script": "Latn",
|
614 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
615 |
}
|
616 |
},
|
|
|
617 |
"node_i": "3529",
|
618 |
+
"native_tokenizers": [],
|
619 |
"scripts": [
|
620 |
"Latn"
|
621 |
+
]
|
|
|
622 |
}
|
623 |
],
|
624 |
+
"tokenizers": {
|
625 |
+
"Latn": {
|
626 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
627 |
+
"original_lang_name": "nigerian_pidgin",
|
628 |
+
"original_lang_code": "pcm",
|
629 |
+
"script": "Latn",
|
630 |
+
"class_name": "StanzaTokenizer"
|
631 |
+
}
|
632 |
+
},
|
633 |
"node_i": "3525",
|
634 |
+
"native_tokenizers": [],
|
635 |
+
"scripts": []
|
636 |
}
|
637 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
638 |
"tokenizers": {
|
639 |
"Latn": {
|
640 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
641 |
"original_lang_name": "nigerian_pidgin",
|
642 |
"original_lang_code": "pcm",
|
643 |
+
"script": "Latn",
|
644 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
645 |
}
|
646 |
},
|
647 |
+
"node_i": "3498",
|
648 |
+
"native_tokenizers": [],
|
649 |
+
"scripts": []
|
650 |
+
},
|
651 |
+
{
|
652 |
+
"name": "Pacific",
|
653 |
+
"iso_1_code": null,
|
654 |
+
"iso_3_code": null,
|
655 |
"children": [
|
656 |
{
|
657 |
"name": "Bislama",
|
658 |
"iso_1_code": "bi",
|
659 |
"iso_3_code": "bis",
|
660 |
+
"children": [],
|
661 |
"tokenizers": {
|
662 |
"Latn": {
|
663 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
664 |
"original_lang_name": "nigerian_pidgin",
|
665 |
"original_lang_code": "pcm",
|
666 |
+
"script": "Latn",
|
667 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
668 |
}
|
669 |
},
|
|
|
670 |
"node_i": "3531",
|
671 |
+
"native_tokenizers": [],
|
672 |
"scripts": [
|
673 |
"Latn"
|
674 |
+
]
|
|
|
675 |
},
|
676 |
{
|
677 |
"name": "Hawaii Pidgin",
|
678 |
"iso_1_code": null,
|
679 |
"iso_3_code": "hwc",
|
680 |
+
"children": [],
|
681 |
"tokenizers": {
|
682 |
"Latn": {
|
683 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
684 |
"original_lang_name": "nigerian_pidgin",
|
685 |
"original_lang_code": "pcm",
|
686 |
+
"script": "Latn",
|
687 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
688 |
}
|
689 |
},
|
|
|
690 |
"node_i": "3532",
|
691 |
+
"native_tokenizers": [],
|
692 |
"scripts": [
|
693 |
"Latn"
|
694 |
+
]
|
|
|
695 |
},
|
696 |
{
|
697 |
"name": "Ngatik Men\u2019s Creole",
|
698 |
"iso_1_code": null,
|
699 |
"iso_3_code": "ngm",
|
|
|
700 |
"children": [],
|
701 |
+
"tokenizers": {},
|
702 |
"node_i": "3533",
|
703 |
+
"native_tokenizers": [],
|
704 |
+
"scripts": []
|
705 |
},
|
706 |
{
|
707 |
"name": "Pitcairn-Norfolk",
|
708 |
"iso_1_code": null,
|
709 |
"iso_3_code": "pih",
|
|
|
710 |
"children": [],
|
711 |
+
"tokenizers": {},
|
712 |
"node_i": "3534",
|
713 |
+
"native_tokenizers": [],
|
714 |
+
"scripts": []
|
715 |
},
|
716 |
{
|
717 |
"name": "Pijin",
|
718 |
"iso_1_code": null,
|
719 |
"iso_3_code": "pis",
|
720 |
+
"children": [],
|
721 |
"tokenizers": {
|
722 |
"Latn": {
|
723 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
724 |
"original_lang_name": "nigerian_pidgin",
|
725 |
"original_lang_code": "pcm",
|
726 |
+
"script": "Latn",
|
727 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
728 |
}
|
729 |
},
|
|
|
730 |
"node_i": "3535",
|
731 |
+
"native_tokenizers": [],
|
732 |
"scripts": [
|
733 |
"Latn"
|
734 |
+
]
|
|
|
735 |
},
|
736 |
{
|
737 |
"name": "Kriol",
|
738 |
"iso_1_code": null,
|
739 |
"iso_3_code": "rop",
|
740 |
+
"children": [],
|
741 |
"tokenizers": {
|
742 |
"Latn": {
|
743 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
744 |
"original_lang_name": "nigerian_pidgin",
|
745 |
"original_lang_code": "pcm",
|
746 |
+
"script": "Latn",
|
747 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
748 |
}
|
749 |
},
|
|
|
750 |
"node_i": "3536",
|
751 |
+
"native_tokenizers": [],
|
752 |
"scripts": [
|
753 |
"Latn"
|
754 |
+
]
|
|
|
755 |
},
|
756 |
{
|
757 |
"name": "Torres Strait Creole",
|
758 |
"iso_1_code": null,
|
759 |
"iso_3_code": "tcs",
|
760 |
+
"children": [],
|
761 |
"tokenizers": {
|
762 |
"Latn": {
|
763 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
764 |
"original_lang_name": "nigerian_pidgin",
|
765 |
"original_lang_code": "pcm",
|
766 |
+
"script": "Latn",
|
767 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
768 |
}
|
769 |
},
|
|
|
770 |
"node_i": "3537",
|
771 |
+
"native_tokenizers": [],
|
772 |
"scripts": [
|
773 |
"Latn"
|
774 |
+
]
|
|
|
775 |
},
|
776 |
{
|
777 |
"name": "Tok Pisin",
|
778 |
"iso_1_code": null,
|
779 |
"iso_3_code": "tpi",
|
780 |
+
"children": [],
|
781 |
"tokenizers": {
|
782 |
"Latn": {
|
783 |
"full_object": "StanzaTokenizer(\"pcm\")",
|
784 |
"original_lang_name": "nigerian_pidgin",
|
785 |
"original_lang_code": "pcm",
|
786 |
+
"script": "Latn",
|
787 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
788 |
}
|
789 |
},
|
|
|
790 |
"node_i": "3538",
|
791 |
+
"native_tokenizers": [],
|
792 |
"scripts": [
|
793 |
"Latn"
|
794 |
+
]
|
|
|
795 |
}
|
796 |
],
|
797 |
+
"tokenizers": {
|
798 |
+
"Latn": {
|
799 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
800 |
+
"original_lang_name": "nigerian_pidgin",
|
801 |
+
"original_lang_code": "pcm",
|
802 |
+
"script": "Latn",
|
803 |
+
"class_name": "StanzaTokenizer"
|
804 |
+
}
|
805 |
+
},
|
806 |
"node_i": "3530",
|
807 |
+
"native_tokenizers": [],
|
808 |
+
"scripts": []
|
809 |
}
|
810 |
],
|
811 |
+
"tokenizers": {
|
812 |
+
"Latn": {
|
813 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
814 |
+
"original_lang_name": "nigerian_pidgin",
|
815 |
+
"original_lang_code": "pcm",
|
816 |
+
"script": "Latn",
|
817 |
+
"class_name": "StanzaTokenizer"
|
818 |
+
}
|
819 |
+
},
|
820 |
"node_i": "3496",
|
821 |
+
"native_tokenizers": [],
|
822 |
+
"scripts": []
|
823 |
},
|
824 |
{
|
825 |
"name": "French based",
|
826 |
"iso_1_code": null,
|
827 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
828 |
"children": [
|
829 |
{
|
830 |
"name": "Lesser Antillean French Creole",
|
831 |
"iso_1_code": null,
|
832 |
"iso_3_code": "acf",
|
833 |
+
"children": [],
|
834 |
"tokenizers": {
|
835 |
"Latn": {
|
836 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
837 |
+
"original_lang_name": "nigerian_pidgin",
|
838 |
+
"original_lang_code": "pcm",
|
839 |
+
"script": "Latn",
|
840 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
841 |
}
|
842 |
},
|
|
|
843 |
"node_i": "3540",
|
844 |
+
"native_tokenizers": [],
|
845 |
"scripts": [
|
846 |
"Latn"
|
847 |
+
]
|
|
|
848 |
},
|
849 |
{
|
850 |
"name": "Tayo",
|
851 |
"iso_1_code": null,
|
852 |
"iso_3_code": "cks",
|
|
|
853 |
"children": [],
|
854 |
+
"tokenizers": {},
|
855 |
"node_i": "3541",
|
856 |
+
"native_tokenizers": [],
|
857 |
+
"scripts": []
|
858 |
},
|
859 |
{
|
860 |
"name": "Seychelles French Creole",
|
861 |
"iso_1_code": null,
|
862 |
"iso_3_code": "crs",
|
863 |
+
"children": [],
|
864 |
"tokenizers": {
|
865 |
"Latn": {
|
866 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
867 |
+
"original_lang_name": "nigerian_pidgin",
|
868 |
+
"original_lang_code": "pcm",
|
869 |
+
"script": "Latn",
|
870 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
871 |
}
|
872 |
},
|
|
|
873 |
"node_i": "3542",
|
874 |
+
"native_tokenizers": [],
|
875 |
"scripts": [
|
876 |
"Latn"
|
877 |
+
]
|
|
|
878 |
},
|
879 |
{
|
880 |
"name": "Guadeloupean French Creole",
|
881 |
"iso_1_code": null,
|
882 |
"iso_3_code": "gcf",
|
883 |
+
"children": [],
|
884 |
"tokenizers": {
|
885 |
"Latn": {
|
886 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
887 |
+
"original_lang_name": "nigerian_pidgin",
|
888 |
+
"original_lang_code": "pcm",
|
889 |
+
"script": "Latn",
|
890 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
891 |
}
|
892 |
},
|
|
|
893 |
"node_i": "3543",
|
894 |
+
"native_tokenizers": [],
|
895 |
"scripts": [
|
896 |
"Latn"
|
897 |
+
]
|
|
|
898 |
},
|
899 |
{
|
900 |
"name": "Guianese French Creole",
|
901 |
"iso_1_code": null,
|
902 |
"iso_3_code": "gcr",
|
903 |
+
"children": [],
|
904 |
"tokenizers": {
|
905 |
"Latn": {
|
906 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
907 |
+
"original_lang_name": "nigerian_pidgin",
|
908 |
+
"original_lang_code": "pcm",
|
909 |
+
"script": "Latn",
|
910 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
911 |
}
|
912 |
},
|
|
|
913 |
"node_i": "3544",
|
914 |
+
"native_tokenizers": [],
|
915 |
"scripts": [
|
916 |
"Latn"
|
917 |
+
]
|
|
|
918 |
},
|
919 |
{
|
920 |
"name": "Haitian Creole",
|
921 |
"iso_1_code": "ht",
|
922 |
"iso_3_code": "hat",
|
923 |
+
"children": [],
|
924 |
"tokenizers": {
|
925 |
"Latn": {
|
926 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
927 |
+
"original_lang_name": "nigerian_pidgin",
|
928 |
+
"original_lang_code": "pcm",
|
929 |
+
"script": "Latn",
|
930 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
931 |
}
|
932 |
},
|
|
|
933 |
"node_i": "3545",
|
934 |
+
"native_tokenizers": [],
|
935 |
"scripts": [
|
936 |
"Latn"
|
937 |
+
]
|
|
|
938 |
},
|
939 |
{
|
940 |
"name": "Karipuna French Creole",
|
941 |
"iso_1_code": null,
|
942 |
"iso_3_code": "kmv",
|
|
|
943 |
"children": [],
|
944 |
+
"tokenizers": {},
|
945 |
"node_i": "3546",
|
946 |
+
"native_tokenizers": [],
|
947 |
+
"scripts": []
|
948 |
},
|
949 |
{
|
950 |
"name": "Louisiana Creole",
|
951 |
"iso_1_code": null,
|
952 |
"iso_3_code": "lou",
|
|
|
953 |
"children": [],
|
954 |
+
"tokenizers": {},
|
955 |
"node_i": "3547",
|
956 |
+
"native_tokenizers": [],
|
957 |
+
"scripts": []
|
958 |
},
|
959 |
{
|
960 |
"name": "Morisyen",
|
961 |
"iso_1_code": null,
|
962 |
"iso_3_code": "mfe",
|
963 |
+
"children": [],
|
964 |
"tokenizers": {
|
965 |
"Latn": {
|
966 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
967 |
+
"original_lang_name": "nigerian_pidgin",
|
968 |
+
"original_lang_code": "pcm",
|
969 |
+
"script": "Latn",
|
970 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
971 |
}
|
972 |
},
|
|
|
973 |
"node_i": "3548",
|
974 |
+
"native_tokenizers": [],
|
975 |
"scripts": [
|
976 |
"Latn"
|
977 |
+
]
|
|
|
978 |
},
|
979 |
{
|
980 |
"name": "R\u00e9union French Creole",
|
981 |
"iso_1_code": null,
|
982 |
"iso_3_code": "rcf",
|
983 |
+
"children": [],
|
984 |
"tokenizers": {
|
985 |
"Latn": {
|
986 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
987 |
+
"original_lang_name": "nigerian_pidgin",
|
988 |
+
"original_lang_code": "pcm",
|
989 |
+
"script": "Latn",
|
990 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
991 |
}
|
992 |
},
|
|
|
993 |
"node_i": "3549",
|
994 |
+
"native_tokenizers": [],
|
995 |
"scripts": [
|
996 |
"Latn"
|
997 |
+
]
|
|
|
998 |
},
|
999 |
{
|
1000 |
"name": "San Miguel French Creole",
|
1001 |
"iso_1_code": null,
|
1002 |
"iso_3_code": "scf",
|
|
|
1003 |
"children": [],
|
1004 |
+
"tokenizers": {},
|
1005 |
"node_i": "3550",
|
1006 |
+
"native_tokenizers": [],
|
1007 |
+
"scripts": []
|
1008 |
}
|
1009 |
],
|
1010 |
+
"tokenizers": {
|
1011 |
+
"Latn": {
|
1012 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1013 |
+
"original_lang_name": "nigerian_pidgin",
|
1014 |
+
"original_lang_code": "pcm",
|
1015 |
+
"script": "Latn",
|
1016 |
+
"class_name": "StanzaTokenizer"
|
1017 |
+
}
|
1018 |
+
},
|
1019 |
"node_i": "3539",
|
1020 |
+
"native_tokenizers": [],
|
1021 |
+
"scripts": []
|
1022 |
},
|
1023 |
{
|
1024 |
"name": "German based",
|
1025 |
"iso_1_code": null,
|
1026 |
"iso_3_code": null,
|
|
|
1027 |
"children": [
|
1028 |
{
|
1029 |
"name": "Unserdeutsch",
|
1030 |
"iso_1_code": null,
|
1031 |
"iso_3_code": "uln",
|
|
|
1032 |
"children": [],
|
1033 |
+
"tokenizers": {},
|
1034 |
"node_i": "3552",
|
1035 |
+
"native_tokenizers": [],
|
1036 |
+
"scripts": []
|
1037 |
}
|
1038 |
],
|
1039 |
+
"tokenizers": {},
|
1040 |
"node_i": "3551",
|
1041 |
+
"native_tokenizers": [],
|
1042 |
+
"scripts": []
|
1043 |
},
|
1044 |
{
|
1045 |
"name": "Hindi based",
|
1046 |
"iso_1_code": null,
|
1047 |
"iso_3_code": null,
|
|
|
1048 |
"children": [
|
1049 |
{
|
1050 |
"name": "Andaman Hindi Creole",
|
1051 |
"iso_1_code": null,
|
1052 |
"iso_3_code": "hca",
|
|
|
1053 |
"children": [],
|
1054 |
+
"tokenizers": {},
|
1055 |
"node_i": "3554",
|
1056 |
+
"native_tokenizers": [],
|
1057 |
+
"scripts": []
|
1058 |
}
|
1059 |
],
|
1060 |
+
"tokenizers": {},
|
1061 |
"node_i": "3553",
|
1062 |
+
"native_tokenizers": [],
|
1063 |
+
"scripts": []
|
1064 |
},
|
1065 |
{
|
1066 |
"name": "Iberian based",
|
1067 |
"iso_1_code": null,
|
1068 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1069 |
"children": [
|
1070 |
{
|
1071 |
"name": "Papiamentu",
|
1072 |
"iso_1_code": null,
|
1073 |
"iso_3_code": "pap",
|
1074 |
+
"children": [],
|
1075 |
"tokenizers": {
|
1076 |
"Latn": {
|
1077 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1078 |
+
"original_lang_name": "nigerian_pidgin",
|
1079 |
+
"original_lang_code": "pcm",
|
1080 |
+
"script": "Latn",
|
1081 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1082 |
}
|
1083 |
},
|
|
|
1084 |
"node_i": "3556",
|
1085 |
+
"native_tokenizers": [],
|
1086 |
"scripts": [
|
1087 |
"Latn"
|
1088 |
+
]
|
|
|
1089 |
}
|
1090 |
],
|
1091 |
+
"tokenizers": {
|
1092 |
+
"Latn": {
|
1093 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1094 |
+
"original_lang_name": "nigerian_pidgin",
|
1095 |
+
"original_lang_code": "pcm",
|
1096 |
+
"script": "Latn",
|
1097 |
+
"class_name": "StanzaTokenizer"
|
1098 |
+
}
|
1099 |
+
},
|
1100 |
"node_i": "3555",
|
1101 |
+
"native_tokenizers": [],
|
1102 |
+
"scripts": []
|
1103 |
},
|
1104 |
{
|
1105 |
"name": "Japanese-based",
|
1106 |
"iso_1_code": null,
|
1107 |
"iso_3_code": null,
|
|
|
1108 |
"children": [
|
1109 |
{
|
1110 |
"name": "Yilan Creole",
|
1111 |
"iso_1_code": null,
|
1112 |
"iso_3_code": "ycr",
|
|
|
1113 |
"children": [],
|
1114 |
+
"tokenizers": {},
|
1115 |
"node_i": "3558",
|
1116 |
+
"native_tokenizers": [],
|
1117 |
+
"scripts": []
|
1118 |
}
|
1119 |
],
|
1120 |
+
"tokenizers": {},
|
1121 |
"node_i": "3557",
|
1122 |
+
"native_tokenizers": [],
|
1123 |
+
"scripts": []
|
1124 |
},
|
1125 |
{
|
1126 |
"name": "Kongo based",
|
1127 |
"iso_1_code": null,
|
1128 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1129 |
"children": [
|
1130 |
{
|
1131 |
"name": "Kituba",
|
1132 |
"iso_1_code": null,
|
1133 |
"iso_3_code": "ktu",
|
1134 |
+
"children": [],
|
1135 |
"tokenizers": {
|
1136 |
"Latn": {
|
1137 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1138 |
+
"original_lang_name": "nigerian_pidgin",
|
1139 |
+
"original_lang_code": "pcm",
|
1140 |
+
"script": "Latn",
|
1141 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1142 |
}
|
1143 |
},
|
|
|
1144 |
"node_i": "3560",
|
1145 |
+
"native_tokenizers": [],
|
1146 |
"scripts": [
|
1147 |
"Latn"
|
1148 |
+
]
|
|
|
1149 |
},
|
1150 |
{
|
1151 |
"name": "Kituba",
|
1152 |
"iso_1_code": null,
|
1153 |
"iso_3_code": "mkw",
|
|
|
1154 |
"children": [],
|
1155 |
+
"tokenizers": {},
|
1156 |
"node_i": "3561",
|
1157 |
+
"native_tokenizers": [],
|
1158 |
+
"scripts": []
|
1159 |
}
|
1160 |
],
|
1161 |
+
"tokenizers": {
|
1162 |
+
"Latn": {
|
1163 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1164 |
+
"original_lang_name": "nigerian_pidgin",
|
1165 |
+
"original_lang_code": "pcm",
|
1166 |
+
"script": "Latn",
|
1167 |
+
"class_name": "StanzaTokenizer"
|
1168 |
+
}
|
1169 |
+
},
|
1170 |
"node_i": "3559",
|
1171 |
+
"native_tokenizers": [],
|
1172 |
+
"scripts": []
|
1173 |
},
|
1174 |
{
|
1175 |
"name": "Malay based",
|
1176 |
"iso_1_code": null,
|
1177 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1178 |
"children": [
|
1179 |
{
|
1180 |
"name": "Malay, Ambonese",
|
1181 |
"iso_1_code": null,
|
1182 |
"iso_3_code": "abs",
|
1183 |
+
"children": [],
|
1184 |
"tokenizers": {
|
1185 |
"Latn": {
|
1186 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1187 |
+
"original_lang_name": "nigerian_pidgin",
|
1188 |
+
"original_lang_code": "pcm",
|
1189 |
+
"script": "Latn",
|
1190 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1191 |
}
|
1192 |
},
|
|
|
1193 |
"node_i": "3563",
|
1194 |
+
"native_tokenizers": [],
|
1195 |
"scripts": [
|
1196 |
"Latn"
|
1197 |
+
]
|
|
|
1198 |
},
|
1199 |
{
|
1200 |
"name": "Betawi",
|
1201 |
"iso_1_code": null,
|
1202 |
"iso_3_code": "bew",
|
1203 |
+
"children": [],
|
1204 |
"tokenizers": {
|
1205 |
"Latn": {
|
1206 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1207 |
+
"original_lang_name": "nigerian_pidgin",
|
1208 |
+
"original_lang_code": "pcm",
|
1209 |
+
"script": "Latn",
|
1210 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1211 |
}
|
1212 |
},
|
|
|
1213 |
"node_i": "3564",
|
1214 |
+
"native_tokenizers": [],
|
1215 |
"scripts": [
|
1216 |
"Latn"
|
1217 |
+
]
|
|
|
1218 |
},
|
1219 |
{
|
1220 |
"name": "Malay, Banda",
|
1221 |
"iso_1_code": null,
|
1222 |
"iso_3_code": "bpq",
|
|
|
1223 |
"children": [],
|
1224 |
+
"tokenizers": {},
|
1225 |
"node_i": "3565",
|
1226 |
+
"native_tokenizers": [],
|
1227 |
+
"scripts": []
|
1228 |
},
|
1229 |
{
|
1230 |
"name": "Malaccan Malay Creole",
|
1231 |
"iso_1_code": null,
|
1232 |
"iso_3_code": "ccm",
|
|
|
1233 |
"children": [],
|
1234 |
+
"tokenizers": {},
|
1235 |
"node_i": "3566",
|
1236 |
+
"native_tokenizers": [],
|
1237 |
+
"scripts": []
|
1238 |
},
|
1239 |
{
|
1240 |
"name": "Malay, Cocos Islands",
|
1241 |
"iso_1_code": "ms",
|
1242 |
"iso_3_code": "coa",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1243 |
"children": [],
|
1244 |
+
"tokenizers": {},
|
1245 |
"node_i": "3567",
|
1246 |
+
"native_tokenizers": [],
|
1247 |
+
"scripts": []
|
1248 |
},
|
1249 |
{
|
1250 |
"name": "Malay, Larantuka",
|
1251 |
"iso_1_code": null,
|
1252 |
"iso_3_code": "lrt",
|
|
|
1253 |
"children": [],
|
1254 |
+
"tokenizers": {},
|
1255 |
"node_i": "3568",
|
1256 |
+
"native_tokenizers": [],
|
1257 |
+
"scripts": []
|
1258 |
},
|
1259 |
{
|
1260 |
"name": "Malay, North Moluccan",
|
1261 |
"iso_1_code": "ms",
|
1262 |
"iso_3_code": "max",
|
1263 |
+
"children": [],
|
1264 |
"tokenizers": {
|
1265 |
"Latn": {
|
1266 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1267 |
+
"original_lang_name": "nigerian_pidgin",
|
1268 |
+
"original_lang_code": "pcm",
|
1269 |
+
"script": "Latn",
|
1270 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1271 |
}
|
1272 |
},
|
|
|
1273 |
"node_i": "3569",
|
1274 |
+
"native_tokenizers": [],
|
1275 |
"scripts": [
|
1276 |
"Latn"
|
1277 |
+
]
|
|
|
1278 |
},
|
1279 |
{
|
1280 |
"name": "Malay, Baba",
|
1281 |
"iso_1_code": null,
|
1282 |
"iso_3_code": "mbf",
|
1283 |
+
"children": [],
|
1284 |
"tokenizers": {
|
1285 |
"Latn": {
|
1286 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1287 |
+
"original_lang_name": "nigerian_pidgin",
|
1288 |
+
"original_lang_code": "pcm",
|
1289 |
+
"script": "Latn",
|
1290 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1291 |
}
|
1292 |
},
|
|
|
1293 |
"node_i": "3570",
|
1294 |
+
"native_tokenizers": [],
|
1295 |
"scripts": [
|
1296 |
"Latn"
|
1297 |
+
]
|
|
|
1298 |
},
|
1299 |
{
|
1300 |
"name": "Malay, Balinese",
|
1301 |
"iso_1_code": null,
|
1302 |
"iso_3_code": "mhp",
|
|
|
1303 |
"children": [],
|
1304 |
+
"tokenizers": {},
|
1305 |
"node_i": "3571",
|
1306 |
+
"native_tokenizers": [],
|
1307 |
+
"scripts": []
|
1308 |
},
|
1309 |
{
|
1310 |
"name": "Malay, Kupang",
|
1311 |
"iso_1_code": null,
|
1312 |
"iso_3_code": "mkn",
|
1313 |
+
"children": [],
|
1314 |
"tokenizers": {
|
1315 |
"Latn": {
|
1316 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1317 |
+
"original_lang_name": "nigerian_pidgin",
|
1318 |
+
"original_lang_code": "pcm",
|
1319 |
+
"script": "Latn",
|
1320 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1321 |
}
|
1322 |
},
|
|
|
1323 |
"node_i": "3572",
|
1324 |
+
"native_tokenizers": [],
|
1325 |
"scripts": [
|
1326 |
"Latn"
|
1327 |
+
]
|
|
|
1328 |
},
|
1329 |
{
|
1330 |
"name": "Indonesian, Peranakan",
|
1331 |
"iso_1_code": null,
|
1332 |
"iso_3_code": "pea",
|
|
|
1333 |
"children": [],
|
1334 |
+
"tokenizers": {},
|
1335 |
"node_i": "3573",
|
1336 |
+
"native_tokenizers": [],
|
1337 |
+
"scripts": []
|
1338 |
},
|
1339 |
{
|
1340 |
"name": "Malay, Papuan",
|
1341 |
"iso_1_code": null,
|
1342 |
"iso_3_code": "pmy",
|
|
|
1343 |
"children": [],
|
1344 |
+
"tokenizers": {},
|
1345 |
"node_i": "3574",
|
1346 |
+
"native_tokenizers": [],
|
1347 |
+
"scripts": []
|
1348 |
},
|
1349 |
{
|
1350 |
"name": "Sri Lankan Malay Creole",
|
1351 |
"iso_1_code": null,
|
1352 |
"iso_3_code": "sci",
|
|
|
1353 |
"children": [],
|
1354 |
+
"tokenizers": {},
|
1355 |
"node_i": "3575",
|
1356 |
+
"native_tokenizers": [],
|
1357 |
+
"scripts": []
|
1358 |
},
|
1359 |
{
|
1360 |
"name": "Malay, Manado",
|
1361 |
"iso_1_code": "ms",
|
1362 |
"iso_3_code": "xmm",
|
1363 |
+
"children": [],
|
1364 |
"tokenizers": {
|
1365 |
"Latn": {
|
1366 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1367 |
+
"original_lang_name": "nigerian_pidgin",
|
1368 |
+
"original_lang_code": "pcm",
|
1369 |
+
"script": "Latn",
|
1370 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1371 |
}
|
1372 |
},
|
|
|
1373 |
"node_i": "3576",
|
1374 |
+
"native_tokenizers": [],
|
1375 |
"scripts": [
|
1376 |
"Latn"
|
1377 |
+
]
|
|
|
1378 |
}
|
1379 |
],
|
1380 |
+
"tokenizers": {
|
1381 |
+
"Latn": {
|
1382 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1383 |
+
"original_lang_name": "nigerian_pidgin",
|
1384 |
+
"original_lang_code": "pcm",
|
1385 |
+
"script": "Latn",
|
1386 |
+
"class_name": "StanzaTokenizer"
|
1387 |
+
}
|
1388 |
+
},
|
1389 |
"node_i": "3562",
|
1390 |
+
"native_tokenizers": [],
|
1391 |
+
"scripts": []
|
1392 |
},
|
1393 |
{
|
1394 |
"name": "Ngbandi based",
|
1395 |
"iso_1_code": null,
|
1396 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1397 |
"children": [
|
1398 |
{
|
1399 |
"name": "Sango",
|
1400 |
"iso_1_code": "sg",
|
1401 |
"iso_3_code": "sag",
|
1402 |
+
"children": [],
|
1403 |
"tokenizers": {
|
1404 |
"Latn": {
|
1405 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1406 |
+
"original_lang_name": "nigerian_pidgin",
|
1407 |
+
"original_lang_code": "pcm",
|
1408 |
+
"script": "Latn",
|
1409 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1410 |
}
|
1411 |
},
|
|
|
1412 |
"node_i": "3578",
|
1413 |
+
"native_tokenizers": [],
|
1414 |
"scripts": [
|
1415 |
"Latn"
|
1416 |
+
]
|
|
|
1417 |
},
|
1418 |
{
|
1419 |
"name": "Sango, Riverain",
|
1420 |
"iso_1_code": null,
|
1421 |
"iso_3_code": "snj",
|
|
|
1422 |
"children": [],
|
1423 |
+
"tokenizers": {},
|
1424 |
"node_i": "3579",
|
1425 |
+
"native_tokenizers": [],
|
1426 |
+
"scripts": []
|
1427 |
}
|
1428 |
],
|
1429 |
+
"tokenizers": {
|
1430 |
+
"Latn": {
|
1431 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1432 |
+
"original_lang_name": "nigerian_pidgin",
|
1433 |
+
"original_lang_code": "pcm",
|
1434 |
+
"script": "Latn",
|
1435 |
+
"class_name": "StanzaTokenizer"
|
1436 |
+
}
|
1437 |
+
},
|
1438 |
"node_i": "3577",
|
1439 |
+
"native_tokenizers": [],
|
1440 |
+
"scripts": []
|
1441 |
},
|
1442 |
{
|
1443 |
"name": "Portuguese based",
|
1444 |
"iso_1_code": null,
|
1445 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1446 |
"children": [
|
1447 |
{
|
1448 |
"name": "Angolar",
|
1449 |
"iso_1_code": null,
|
1450 |
"iso_3_code": "aoa",
|
|
|
1451 |
"children": [],
|
1452 |
+
"tokenizers": {},
|
1453 |
"node_i": "3581",
|
1454 |
+
"native_tokenizers": [],
|
1455 |
+
"scripts": []
|
1456 |
},
|
1457 |
{
|
1458 |
"name": "Cafundo Creole",
|
1459 |
"iso_1_code": null,
|
1460 |
"iso_3_code": "ccd",
|
|
|
1461 |
"children": [],
|
1462 |
+
"tokenizers": {},
|
1463 |
"node_i": "3582",
|
1464 |
+
"native_tokenizers": [],
|
1465 |
+
"scripts": []
|
1466 |
},
|
1467 |
{
|
1468 |
"name": "S\u00e3otomense",
|
1469 |
"iso_1_code": null,
|
1470 |
"iso_3_code": "cri",
|
1471 |
+
"children": [],
|
1472 |
"tokenizers": {
|
1473 |
"Latn": {
|
1474 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1475 |
+
"original_lang_name": "nigerian_pidgin",
|
1476 |
+
"original_lang_code": "pcm",
|
1477 |
+
"script": "Latn",
|
1478 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1479 |
}
|
1480 |
},
|
|
|
1481 |
"node_i": "3583",
|
1482 |
+
"native_tokenizers": [],
|
1483 |
"scripts": [
|
1484 |
"Latn"
|
1485 |
+
]
|
|
|
1486 |
},
|
1487 |
{
|
1488 |
"name": "Fa d\u2019Ambu",
|
1489 |
"iso_1_code": null,
|
1490 |
"iso_3_code": "fab",
|
|
|
1491 |
"children": [],
|
1492 |
+
"tokenizers": {},
|
1493 |
"node_i": "3584",
|
1494 |
+
"native_tokenizers": [],
|
1495 |
+
"scripts": []
|
1496 |
},
|
1497 |
{
|
1498 |
"name": "Indo-Portuguese",
|
1499 |
"iso_1_code": null,
|
1500 |
"iso_3_code": "idb",
|
|
|
1501 |
"children": [],
|
1502 |
+
"tokenizers": {},
|
1503 |
"node_i": "3585",
|
1504 |
+
"native_tokenizers": [],
|
1505 |
+
"scripts": []
|
1506 |
},
|
1507 |
{
|
1508 |
"name": "Kabuverdianu",
|
1509 |
"iso_1_code": null,
|
1510 |
"iso_3_code": "kea",
|
1511 |
+
"children": [],
|
1512 |
"tokenizers": {
|
1513 |
"Latn": {
|
1514 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1515 |
+
"original_lang_name": "nigerian_pidgin",
|
1516 |
+
"original_lang_code": "pcm",
|
1517 |
+
"script": "Latn",
|
1518 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1519 |
}
|
1520 |
},
|
|
|
1521 |
"node_i": "3586",
|
1522 |
+
"native_tokenizers": [],
|
1523 |
"scripts": [
|
1524 |
"Latn"
|
1525 |
+
]
|
|
|
1526 |
},
|
1527 |
{
|
1528 |
"name": "Malaccan Portuguese Creole",
|
1529 |
"iso_1_code": null,
|
1530 |
"iso_3_code": "mcm",
|
|
|
1531 |
"children": [],
|
1532 |
+
"tokenizers": {},
|
1533 |
"node_i": "3587",
|
1534 |
+
"native_tokenizers": [],
|
1535 |
+
"scripts": []
|
1536 |
},
|
1537 |
{
|
1538 |
"name": "Macanese",
|
1539 |
"iso_1_code": null,
|
1540 |
"iso_3_code": "mzs",
|
|
|
1541 |
"children": [],
|
1542 |
+
"tokenizers": {},
|
1543 |
"node_i": "3588",
|
1544 |
+
"native_tokenizers": [],
|
1545 |
+
"scripts": []
|
1546 |
},
|
1547 |
{
|
1548 |
"name": "Guinea-Bissau Creole",
|
1549 |
"iso_1_code": null,
|
1550 |
"iso_3_code": "pov",
|
1551 |
+
"children": [],
|
1552 |
"tokenizers": {
|
1553 |
"Latn": {
|
1554 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1555 |
+
"original_lang_name": "nigerian_pidgin",
|
1556 |
+
"original_lang_code": "pcm",
|
1557 |
+
"script": "Latn",
|
1558 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1559 |
}
|
1560 |
},
|
|
|
1561 |
"node_i": "3589",
|
1562 |
+
"native_tokenizers": [],
|
1563 |
"scripts": [
|
1564 |
"Latn"
|
1565 |
+
]
|
|
|
1566 |
},
|
1567 |
{
|
1568 |
"name": "Principense",
|
1569 |
"iso_1_code": null,
|
1570 |
"iso_3_code": "pre",
|
|
|
1571 |
"children": [],
|
1572 |
+
"tokenizers": {},
|
1573 |
"node_i": "3590",
|
1574 |
+
"native_tokenizers": [],
|
1575 |
+
"scripts": []
|
1576 |
},
|
1577 |
{
|
1578 |
"name": "Ternate\u00f1o",
|
1579 |
"iso_1_code": null,
|
1580 |
"iso_3_code": "tmg",
|
|
|
1581 |
"children": [],
|
1582 |
+
"tokenizers": {},
|
1583 |
"node_i": "3591",
|
1584 |
+
"native_tokenizers": [],
|
1585 |
+
"scripts": []
|
1586 |
},
|
1587 |
{
|
1588 |
"name": "Pidgin, Timor",
|
1589 |
"iso_1_code": null,
|
1590 |
"iso_3_code": "tvy",
|
|
|
1591 |
"children": [],
|
1592 |
+
"tokenizers": {},
|
1593 |
"node_i": "3592",
|
1594 |
+
"native_tokenizers": [],
|
1595 |
+
"scripts": []
|
1596 |
},
|
1597 |
{
|
1598 |
"name": "Korlai Portuguese Creole",
|
1599 |
"iso_1_code": null,
|
1600 |
"iso_3_code": "vkp",
|
|
|
1601 |
"children": [],
|
1602 |
+
"tokenizers": {},
|
1603 |
"node_i": "3593",
|
1604 |
+
"native_tokenizers": [],
|
1605 |
+
"scripts": []
|
1606 |
}
|
1607 |
],
|
1608 |
+
"tokenizers": {
|
1609 |
+
"Latn": {
|
1610 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1611 |
+
"original_lang_name": "nigerian_pidgin",
|
1612 |
+
"original_lang_code": "pcm",
|
1613 |
+
"script": "Latn",
|
1614 |
+
"class_name": "StanzaTokenizer"
|
1615 |
+
}
|
1616 |
+
},
|
1617 |
"node_i": "3580",
|
1618 |
+
"native_tokenizers": [],
|
1619 |
+
"scripts": []
|
1620 |
},
|
1621 |
{
|
1622 |
"name": "Spanish based",
|
1623 |
"iso_1_code": null,
|
1624 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1625 |
"children": [
|
1626 |
{
|
1627 |
"name": "Chavacano",
|
1628 |
"iso_1_code": null,
|
1629 |
"iso_3_code": "cbk",
|
1630 |
+
"children": [],
|
1631 |
"tokenizers": {
|
1632 |
"Latn": {
|
1633 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1634 |
+
"original_lang_name": "nigerian_pidgin",
|
1635 |
+
"original_lang_code": "pcm",
|
1636 |
+
"script": "Latn",
|
1637 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1638 |
}
|
1639 |
},
|
|
|
1640 |
"node_i": "3595",
|
1641 |
+
"native_tokenizers": [],
|
1642 |
"scripts": [
|
1643 |
"Latn"
|
1644 |
+
]
|
|
|
1645 |
},
|
1646 |
{
|
1647 |
"name": "Palenquero",
|
1648 |
"iso_1_code": null,
|
1649 |
"iso_3_code": "pln",
|
|
|
1650 |
"children": [],
|
1651 |
+
"tokenizers": {},
|
1652 |
"node_i": "3596",
|
1653 |
+
"native_tokenizers": [],
|
1654 |
+
"scripts": []
|
1655 |
}
|
1656 |
],
|
1657 |
+
"tokenizers": {
|
1658 |
+
"Latn": {
|
1659 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1660 |
+
"original_lang_name": "nigerian_pidgin",
|
1661 |
+
"original_lang_code": "pcm",
|
1662 |
+
"script": "Latn",
|
1663 |
+
"class_name": "StanzaTokenizer"
|
1664 |
+
}
|
1665 |
+
},
|
1666 |
"node_i": "3594",
|
1667 |
+
"native_tokenizers": [],
|
1668 |
+
"scripts": []
|
1669 |
},
|
1670 |
{
|
1671 |
"name": "Swahili based",
|
1672 |
"iso_1_code": null,
|
1673 |
"iso_3_code": null,
|
|
|
1674 |
"children": [
|
1675 |
{
|
1676 |
"name": "Cutchi-Swahili",
|
1677 |
"iso_1_code": null,
|
1678 |
"iso_3_code": "ccl",
|
|
|
1679 |
"children": [],
|
1680 |
+
"tokenizers": {},
|
1681 |
"node_i": "3598",
|
1682 |
+
"native_tokenizers": [],
|
1683 |
+
"scripts": []
|
1684 |
}
|
1685 |
],
|
1686 |
+
"tokenizers": {},
|
1687 |
"node_i": "3597",
|
1688 |
+
"native_tokenizers": [],
|
1689 |
+
"scripts": []
|
1690 |
},
|
1691 |
{
|
1692 |
"name": "Tetun based",
|
1693 |
"iso_1_code": null,
|
1694 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1695 |
"children": [
|
1696 |
{
|
1697 |
"name": "Tetun Dili",
|
1698 |
"iso_1_code": null,
|
1699 |
"iso_3_code": "tdt",
|
1700 |
+
"children": [],
|
1701 |
"tokenizers": {
|
1702 |
"Latn": {
|
1703 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1704 |
+
"original_lang_name": "nigerian_pidgin",
|
1705 |
+
"original_lang_code": "pcm",
|
1706 |
+
"script": "Latn",
|
1707 |
+
"class_name": "StanzaTokenizer"
|
|
|
|
|
|
|
|
|
|
|
1708 |
}
|
1709 |
},
|
|
|
1710 |
"node_i": "3600",
|
1711 |
+
"native_tokenizers": [],
|
1712 |
"scripts": [
|
1713 |
"Latn"
|
1714 |
+
]
|
|
|
1715 |
}
|
1716 |
],
|
1717 |
+
"tokenizers": {
|
1718 |
+
"Latn": {
|
1719 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1720 |
+
"original_lang_name": "nigerian_pidgin",
|
1721 |
+
"original_lang_code": "pcm",
|
1722 |
+
"script": "Latn",
|
1723 |
+
"class_name": "StanzaTokenizer"
|
1724 |
+
}
|
1725 |
+
},
|
1726 |
"node_i": "3599",
|
1727 |
+
"native_tokenizers": [],
|
1728 |
+
"scripts": []
|
1729 |
}
|
1730 |
],
|
1731 |
+
"tokenizers": {
|
1732 |
+
"Latn": {
|
1733 |
+
"full_object": "StanzaTokenizer(\"pcm\")",
|
1734 |
+
"original_lang_name": "nigerian_pidgin",
|
1735 |
+
"original_lang_code": "pcm",
|
1736 |
+
"script": "Latn",
|
1737 |
+
"class_name": "StanzaTokenizer"
|
1738 |
+
}
|
1739 |
+
},
|
1740 |
"node_i": "3481",
|
1741 |
+
"native_tokenizers": [],
|
1742 |
+
"scripts": []
|
1743 |
}
|
data/Dravidian.json
CHANGED
@@ -2,1765 +2,1436 @@
|
|
2 |
"name": "Dravidian",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {
|
6 |
-
"Telu": {
|
7 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
8 |
-
"original_lang_name": "telugu",
|
9 |
-
"original_lang_code": "tel",
|
10 |
-
"scripts": [
|
11 |
-
"Telu",
|
12 |
-
"Latn"
|
13 |
-
],
|
14 |
-
"class_name": "SpaCyTokenizer",
|
15 |
-
"macrolanguage": false
|
16 |
-
},
|
17 |
-
"Latn": {
|
18 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
19 |
-
"original_lang_name": "tamil",
|
20 |
-
"original_lang_code": "tam",
|
21 |
-
"scripts": [
|
22 |
-
"Latn",
|
23 |
-
"Taml"
|
24 |
-
],
|
25 |
-
"class_name": "SpaCyTokenizer",
|
26 |
-
"macrolanguage": false
|
27 |
-
},
|
28 |
-
"Knda": {
|
29 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
30 |
-
"original_lang_name": "kannada",
|
31 |
-
"original_lang_code": "kan",
|
32 |
-
"scripts": [
|
33 |
-
"Latn",
|
34 |
-
"Knda"
|
35 |
-
],
|
36 |
-
"class_name": "SpaCyTokenizer",
|
37 |
-
"macrolanguage": false
|
38 |
-
},
|
39 |
-
"Mlym": {
|
40 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
41 |
-
"original_lang_name": "malayalam",
|
42 |
-
"original_lang_code": "mal",
|
43 |
-
"scripts": [
|
44 |
-
"Latn",
|
45 |
-
"Mlym"
|
46 |
-
],
|
47 |
-
"class_name": "SpaCyTokenizer",
|
48 |
-
"macrolanguage": false
|
49 |
-
},
|
50 |
-
"Taml": {
|
51 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
52 |
-
"original_lang_name": "tamil",
|
53 |
-
"original_lang_code": "tam",
|
54 |
-
"scripts": [
|
55 |
-
"Latn",
|
56 |
-
"Taml"
|
57 |
-
],
|
58 |
-
"class_name": "SpaCyTokenizer",
|
59 |
-
"macrolanguage": false
|
60 |
-
}
|
61 |
-
},
|
62 |
"children": [
|
63 |
{
|
64 |
"name": "Central",
|
65 |
"iso_1_code": null,
|
66 |
"iso_3_code": null,
|
67 |
-
"tokenizers": {},
|
68 |
"children": [
|
69 |
{
|
70 |
"name": "Kolami-Naiki",
|
71 |
"iso_1_code": null,
|
72 |
"iso_3_code": null,
|
73 |
-
"tokenizers": {},
|
74 |
"children": [
|
75 |
{
|
76 |
"name": "Kolami, Northwestern",
|
77 |
"iso_1_code": null,
|
78 |
"iso_3_code": "kfb",
|
79 |
-
"tokenizers": {},
|
80 |
"children": [],
|
|
|
81 |
"node_i": "3604",
|
82 |
-
"
|
83 |
-
"
|
84 |
},
|
85 |
{
|
86 |
"name": "Kolami, Southeastern",
|
87 |
"iso_1_code": null,
|
88 |
"iso_3_code": "nit",
|
89 |
-
"tokenizers": {},
|
90 |
"children": [],
|
|
|
91 |
"node_i": "3605",
|
92 |
-
"
|
93 |
-
"
|
94 |
}
|
95 |
],
|
|
|
96 |
"node_i": "3603",
|
97 |
-
"
|
98 |
-
"
|
99 |
},
|
100 |
{
|
101 |
"name": "Parji-Gadaba",
|
102 |
"iso_1_code": null,
|
103 |
"iso_3_code": null,
|
104 |
-
"tokenizers": {},
|
105 |
"children": [
|
106 |
{
|
107 |
"name": "Gadaba, Mudhili",
|
108 |
"iso_1_code": null,
|
109 |
"iso_3_code": "gau",
|
110 |
-
"tokenizers": {},
|
111 |
"children": [],
|
|
|
112 |
"node_i": "3607",
|
113 |
-
"
|
114 |
-
"
|
115 |
},
|
116 |
{
|
117 |
"name": "Gadaba, Pottangi Ollar",
|
118 |
"iso_1_code": null,
|
119 |
"iso_3_code": "gdb",
|
120 |
-
"tokenizers": {},
|
121 |
"children": [],
|
|
|
122 |
"node_i": "3608",
|
123 |
-
"
|
124 |
-
"
|
125 |
},
|
126 |
{
|
127 |
"name": "Duruwa",
|
128 |
"iso_1_code": null,
|
129 |
"iso_3_code": "pci",
|
130 |
-
"tokenizers": {},
|
131 |
"children": [],
|
|
|
132 |
"node_i": "3609",
|
133 |
-
"
|
134 |
-
"
|
135 |
}
|
136 |
],
|
|
|
137 |
"node_i": "3606",
|
138 |
-
"
|
139 |
-
"
|
140 |
}
|
141 |
],
|
|
|
142 |
"node_i": "3602",
|
143 |
-
"
|
144 |
-
"
|
145 |
},
|
146 |
{
|
147 |
"name": "Northern",
|
148 |
"iso_1_code": null,
|
149 |
"iso_3_code": null,
|
150 |
-
"tokenizers": {},
|
151 |
"children": [
|
152 |
{
|
153 |
"name": "Brahui",
|
154 |
"iso_1_code": null,
|
155 |
"iso_3_code": "brh",
|
156 |
-
"tokenizers": {},
|
157 |
"children": [],
|
|
|
158 |
"node_i": "3611",
|
|
|
159 |
"scripts": [
|
160 |
"Arab"
|
161 |
-
]
|
162 |
-
"own_tokenizer": false
|
163 |
},
|
164 |
{
|
165 |
"name": "Kumarbhag Paharia",
|
166 |
"iso_1_code": null,
|
167 |
"iso_3_code": "kmj",
|
168 |
-
"tokenizers": {},
|
169 |
"children": [],
|
|
|
170 |
"node_i": "3612",
|
171 |
-
"
|
172 |
-
"
|
173 |
},
|
174 |
{
|
175 |
"name": "Kurux",
|
176 |
"iso_1_code": null,
|
177 |
"iso_3_code": "kru",
|
178 |
-
"tokenizers": {},
|
179 |
"children": [],
|
|
|
180 |
"node_i": "3613",
|
|
|
181 |
"scripts": [
|
182 |
"Deva"
|
183 |
-
]
|
184 |
-
"own_tokenizer": false
|
185 |
},
|
186 |
{
|
187 |
"name": "Sauria Paharia",
|
188 |
"iso_1_code": null,
|
189 |
"iso_3_code": "mjt",
|
190 |
-
"tokenizers": {},
|
191 |
"children": [],
|
|
|
192 |
"node_i": "3614",
|
193 |
-
"
|
194 |
-
"
|
195 |
},
|
196 |
{
|
197 |
"name": "Kisan",
|
198 |
"iso_1_code": null,
|
199 |
"iso_3_code": "xis",
|
200 |
-
"tokenizers": {},
|
201 |
"children": [],
|
|
|
202 |
"node_i": "3615",
|
203 |
-
"
|
204 |
-
"
|
205 |
}
|
206 |
],
|
|
|
207 |
"node_i": "3610",
|
208 |
-
"
|
209 |
-
"
|
210 |
},
|
211 |
{
|
212 |
"name": "South-Central",
|
213 |
"iso_1_code": null,
|
214 |
"iso_3_code": null,
|
215 |
-
"tokenizers": {
|
216 |
-
"Telu": {
|
217 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
218 |
-
"original_lang_name": "telugu",
|
219 |
-
"original_lang_code": "tel",
|
220 |
-
"scripts": [
|
221 |
-
"Telu",
|
222 |
-
"Latn"
|
223 |
-
],
|
224 |
-
"class_name": "SpaCyTokenizer",
|
225 |
-
"macrolanguage": false
|
226 |
-
},
|
227 |
-
"Latn": {
|
228 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
229 |
-
"original_lang_name": "telugu",
|
230 |
-
"original_lang_code": "tel",
|
231 |
-
"scripts": [
|
232 |
-
"Telu",
|
233 |
-
"Latn"
|
234 |
-
],
|
235 |
-
"class_name": "SpaCyTokenizer",
|
236 |
-
"macrolanguage": false
|
237 |
-
}
|
238 |
-
},
|
239 |
"children": [
|
240 |
{
|
241 |
"name": "Gondi-Kui",
|
242 |
"iso_1_code": null,
|
243 |
"iso_3_code": null,
|
244 |
-
"tokenizers": {
|
245 |
-
"Telu": {
|
246 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
247 |
-
"original_lang_name": "telugu",
|
248 |
-
"original_lang_code": "tel",
|
249 |
-
"scripts": [
|
250 |
-
"Telu",
|
251 |
-
"Latn"
|
252 |
-
],
|
253 |
-
"class_name": "SpaCyTokenizer",
|
254 |
-
"macrolanguage": false
|
255 |
-
},
|
256 |
-
"Latn": {
|
257 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
258 |
-
"original_lang_name": "telugu",
|
259 |
-
"original_lang_code": "tel",
|
260 |
-
"scripts": [
|
261 |
-
"Telu",
|
262 |
-
"Latn"
|
263 |
-
],
|
264 |
-
"class_name": "SpaCyTokenizer",
|
265 |
-
"macrolanguage": false
|
266 |
-
}
|
267 |
-
},
|
268 |
"children": [
|
269 |
{
|
270 |
"name": "Gondi",
|
271 |
"iso_1_code": null,
|
272 |
"iso_3_code": null,
|
273 |
-
"tokenizers": {
|
274 |
-
"Telu": {
|
275 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
276 |
-
"original_lang_name": "telugu",
|
277 |
-
"original_lang_code": "tel",
|
278 |
-
"scripts": [
|
279 |
-
"Telu",
|
280 |
-
"Latn"
|
281 |
-
],
|
282 |
-
"class_name": "SpaCyTokenizer",
|
283 |
-
"macrolanguage": false
|
284 |
-
},
|
285 |
-
"Latn": {
|
286 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
287 |
-
"original_lang_name": "telugu",
|
288 |
-
"original_lang_code": "tel",
|
289 |
-
"scripts": [
|
290 |
-
"Telu",
|
291 |
-
"Latn"
|
292 |
-
],
|
293 |
-
"class_name": "SpaCyTokenizer",
|
294 |
-
"macrolanguage": false
|
295 |
-
}
|
296 |
-
},
|
297 |
"children": [
|
298 |
{
|
299 |
"name": "Maria, Dandami",
|
300 |
"iso_1_code": null,
|
301 |
"iso_3_code": "daq",
|
302 |
-
"tokenizers": {},
|
303 |
"children": [],
|
|
|
304 |
"node_i": "3619",
|
305 |
-
"
|
306 |
-
"
|
307 |
},
|
308 |
{
|
309 |
"name": "Muria, Eastern",
|
310 |
"iso_1_code": null,
|
311 |
"iso_3_code": "emu",
|
312 |
-
"tokenizers": {},
|
313 |
"children": [],
|
|
|
314 |
"node_i": "3620",
|
315 |
-
"
|
316 |
-
"
|
317 |
},
|
318 |
{
|
319 |
"name": "Gondi, Aheri",
|
320 |
"iso_1_code": null,
|
321 |
"iso_3_code": "esg",
|
322 |
-
"tokenizers": {},
|
323 |
"children": [],
|
|
|
324 |
"node_i": "3621",
|
325 |
-
"
|
326 |
-
"
|
327 |
},
|
328 |
{
|
329 |
"name": "Muria, Far Western",
|
330 |
"iso_1_code": null,
|
331 |
"iso_3_code": "fmu",
|
332 |
-
"tokenizers": {},
|
333 |
"children": [],
|
|
|
334 |
"node_i": "3622",
|
|
|
335 |
"scripts": [
|
336 |
"Deva"
|
337 |
-
]
|
338 |
-
"own_tokenizer": false
|
339 |
},
|
340 |
{
|
341 |
"name": "Gondi, Northern",
|
342 |
"iso_1_code": null,
|
343 |
"iso_3_code": "gno",
|
344 |
-
"tokenizers": {},
|
345 |
"children": [],
|
|
|
346 |
"node_i": "3623",
|
347 |
-
"
|
348 |
-
"
|
349 |
},
|
350 |
{
|
351 |
"name": "Khirwar",
|
352 |
"iso_1_code": null,
|
353 |
"iso_3_code": "kwx",
|
354 |
-
"tokenizers": {},
|
355 |
"children": [],
|
|
|
356 |
"node_i": "3624",
|
357 |
-
"
|
358 |
-
"
|
359 |
},
|
360 |
{
|
361 |
"name": "Maria",
|
362 |
"iso_1_code": null,
|
363 |
"iso_3_code": "mrr",
|
364 |
-
"tokenizers": {},
|
365 |
"children": [],
|
|
|
366 |
"node_i": "3625",
|
367 |
-
"
|
368 |
-
"
|
369 |
},
|
370 |
{
|
371 |
"name": "Muria, Western",
|
372 |
"iso_1_code": null,
|
373 |
"iso_3_code": "mut",
|
374 |
-
"tokenizers": {},
|
375 |
"children": [],
|
|
|
376 |
"node_i": "3626",
|
377 |
-
"
|
378 |
-
"
|
379 |
},
|
380 |
{
|
381 |
"name": "Nagarchal",
|
382 |
"iso_1_code": null,
|
383 |
"iso_3_code": "nbg",
|
384 |
-
"tokenizers": {},
|
385 |
"children": [],
|
|
|
386 |
"node_i": "3627",
|
387 |
-
"
|
388 |
-
"
|
389 |
},
|
390 |
{
|
391 |
"name": "Pardhan",
|
392 |
"iso_1_code": null,
|
393 |
"iso_3_code": "pch",
|
394 |
-
"tokenizers": {},
|
395 |
"children": [],
|
|
|
396 |
"node_i": "3628",
|
397 |
-
"
|
398 |
-
"
|
399 |
},
|
400 |
{
|
401 |
"name": "Gondi, Adilabad",
|
402 |
"iso_1_code": null,
|
403 |
"iso_3_code": "wsg",
|
|
|
404 |
"tokenizers": {
|
405 |
"Telu": {
|
406 |
-
"full_object": "
|
407 |
"original_lang_name": "telugu",
|
408 |
"original_lang_code": "tel",
|
409 |
-
"
|
410 |
-
|
411 |
-
"Latn"
|
412 |
-
],
|
413 |
-
"class_name": "SpaCyTokenizer",
|
414 |
-
"macrolanguage": false
|
415 |
}
|
416 |
},
|
417 |
-
"children": [],
|
418 |
"node_i": "3629",
|
|
|
419 |
"scripts": [
|
420 |
"Telu"
|
421 |
-
]
|
422 |
-
"own_tokenizer": false
|
423 |
}
|
424 |
],
|
425 |
-
"node_i": "3618",
|
426 |
-
"scripts": [],
|
427 |
-
"own_tokenizer": false
|
428 |
-
},
|
429 |
-
{
|
430 |
-
"name": "Konda-Kui",
|
431 |
-
"iso_1_code": null,
|
432 |
-
"iso_3_code": null,
|
433 |
"tokenizers": {
|
434 |
"Telu": {
|
435 |
-
"full_object": "
|
436 |
-
"original_lang_name": "telugu",
|
437 |
-
"original_lang_code": "tel",
|
438 |
-
"scripts": [
|
439 |
-
"Telu",
|
440 |
-
"Latn"
|
441 |
-
],
|
442 |
-
"class_name": "SpaCyTokenizer",
|
443 |
-
"macrolanguage": false
|
444 |
-
},
|
445 |
-
"Latn": {
|
446 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
447 |
"original_lang_name": "telugu",
|
448 |
"original_lang_code": "tel",
|
449 |
-
"
|
450 |
-
|
451 |
-
"Latn"
|
452 |
-
],
|
453 |
-
"class_name": "SpaCyTokenizer",
|
454 |
-
"macrolanguage": false
|
455 |
}
|
456 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
457 |
"children": [
|
458 |
{
|
459 |
"name": "Konda",
|
460 |
"iso_1_code": null,
|
461 |
"iso_3_code": null,
|
462 |
-
"tokenizers": {},
|
463 |
"children": [
|
464 |
{
|
465 |
"name": "Konda-Dora",
|
466 |
"iso_1_code": null,
|
467 |
"iso_3_code": "kfc",
|
468 |
-
"tokenizers": {},
|
469 |
"children": [],
|
|
|
470 |
"node_i": "3632",
|
471 |
-
"
|
472 |
-
"
|
473 |
},
|
474 |
{
|
475 |
"name": "Mukha-Dora",
|
476 |
"iso_1_code": null,
|
477 |
"iso_3_code": "mmk",
|
478 |
-
"tokenizers": {},
|
479 |
"children": [],
|
|
|
480 |
"node_i": "3633",
|
481 |
-
"
|
482 |
-
"
|
483 |
}
|
484 |
],
|
|
|
485 |
"node_i": "3631",
|
486 |
-
"
|
487 |
-
"
|
488 |
},
|
489 |
{
|
490 |
"name": "Manda-Kui",
|
491 |
"iso_1_code": null,
|
492 |
"iso_3_code": null,
|
493 |
-
"tokenizers": {
|
494 |
-
"Telu": {
|
495 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
496 |
-
"original_lang_name": "telugu",
|
497 |
-
"original_lang_code": "tel",
|
498 |
-
"scripts": [
|
499 |
-
"Telu",
|
500 |
-
"Latn"
|
501 |
-
],
|
502 |
-
"class_name": "SpaCyTokenizer",
|
503 |
-
"macrolanguage": false
|
504 |
-
},
|
505 |
-
"Latn": {
|
506 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
507 |
-
"original_lang_name": "telugu",
|
508 |
-
"original_lang_code": "tel",
|
509 |
-
"scripts": [
|
510 |
-
"Telu",
|
511 |
-
"Latn"
|
512 |
-
],
|
513 |
-
"class_name": "SpaCyTokenizer",
|
514 |
-
"macrolanguage": false
|
515 |
-
}
|
516 |
-
},
|
517 |
"children": [
|
518 |
{
|
519 |
"name": "Kui-Kuvi",
|
520 |
"iso_1_code": null,
|
521 |
"iso_3_code": null,
|
522 |
-
"tokenizers": {
|
523 |
-
"Telu": {
|
524 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
525 |
-
"original_lang_name": "telugu",
|
526 |
-
"original_lang_code": "tel",
|
527 |
-
"scripts": [
|
528 |
-
"Telu",
|
529 |
-
"Latn"
|
530 |
-
],
|
531 |
-
"class_name": "SpaCyTokenizer",
|
532 |
-
"macrolanguage": false
|
533 |
-
},
|
534 |
-
"Latn": {
|
535 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
536 |
-
"original_lang_name": "telugu",
|
537 |
-
"original_lang_code": "tel",
|
538 |
-
"scripts": [
|
539 |
-
"Telu",
|
540 |
-
"Latn"
|
541 |
-
],
|
542 |
-
"class_name": "SpaCyTokenizer",
|
543 |
-
"macrolanguage": false
|
544 |
-
}
|
545 |
-
},
|
546 |
"children": [
|
547 |
{
|
548 |
"name": "Kui, Dawik",
|
549 |
"iso_1_code": null,
|
550 |
"iso_3_code": "dwk",
|
551 |
-
"tokenizers": {},
|
552 |
"children": [],
|
|
|
553 |
"node_i": "3636",
|
554 |
-
"
|
555 |
-
"
|
556 |
},
|
557 |
{
|
558 |
"name": "Koya",
|
559 |
"iso_1_code": null,
|
560 |
"iso_3_code": "kff",
|
|
|
561 |
"tokenizers": {
|
562 |
"Telu": {
|
563 |
-
"full_object": "
|
564 |
"original_lang_name": "telugu",
|
565 |
"original_lang_code": "tel",
|
566 |
-
"
|
567 |
-
|
568 |
-
"Latn"
|
569 |
-
],
|
570 |
-
"class_name": "SpaCyTokenizer",
|
571 |
-
"macrolanguage": false
|
572 |
}
|
573 |
},
|
574 |
-
"children": [],
|
575 |
"node_i": "3637",
|
|
|
576 |
"scripts": [
|
577 |
"Telu"
|
578 |
-
]
|
579 |
-
"own_tokenizer": false
|
580 |
},
|
581 |
{
|
582 |
"name": "Kuvi",
|
583 |
"iso_1_code": null,
|
584 |
"iso_3_code": "kxv",
|
585 |
-
"tokenizers": {},
|
586 |
"children": [],
|
|
|
587 |
"node_i": "3638",
|
588 |
-
"
|
589 |
-
"
|
590 |
},
|
591 |
{
|
592 |
"name": "Kui",
|
593 |
"iso_1_code": null,
|
594 |
"iso_3_code": "uki",
|
595 |
-
"tokenizers": {},
|
596 |
"children": [],
|
|
|
597 |
"node_i": "3639",
|
598 |
-
"
|
599 |
-
"
|
600 |
}
|
601 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
602 |
"node_i": "3635",
|
603 |
-
"
|
604 |
-
"
|
605 |
},
|
606 |
{
|
607 |
"name": "Manda-Pengo",
|
608 |
"iso_1_code": null,
|
609 |
"iso_3_code": null,
|
610 |
-
"tokenizers": {},
|
611 |
"children": [
|
612 |
{
|
613 |
"name": "Manda",
|
614 |
"iso_1_code": null,
|
615 |
"iso_3_code": "mha",
|
616 |
-
"tokenizers": {},
|
617 |
"children": [],
|
|
|
618 |
"node_i": "3641",
|
619 |
-
"
|
620 |
-
"
|
621 |
},
|
622 |
{
|
623 |
"name": "Pengo",
|
624 |
"iso_1_code": null,
|
625 |
"iso_3_code": "peg",
|
626 |
-
"tokenizers": {},
|
627 |
"children": [],
|
|
|
628 |
"node_i": "3642",
|
629 |
-
"
|
630 |
-
"
|
631 |
}
|
632 |
],
|
|
|
633 |
"node_i": "3640",
|
634 |
-
"
|
635 |
-
"
|
636 |
}
|
637 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
638 |
"node_i": "3634",
|
639 |
-
"
|
640 |
-
"
|
641 |
}
|
642 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
643 |
"node_i": "3630",
|
644 |
-
"
|
645 |
-
"
|
646 |
}
|
647 |
],
|
648 |
-
"node_i": "3617",
|
649 |
-
"scripts": [],
|
650 |
-
"own_tokenizer": false
|
651 |
-
},
|
652 |
-
{
|
653 |
-
"name": "Telugu",
|
654 |
-
"iso_1_code": null,
|
655 |
-
"iso_3_code": null,
|
656 |
"tokenizers": {
|
657 |
"Telu": {
|
658 |
-
"full_object": "
|
659 |
"original_lang_name": "telugu",
|
660 |
"original_lang_code": "tel",
|
661 |
-
"
|
662 |
-
|
663 |
-
"Latn"
|
664 |
-
],
|
665 |
-
"class_name": "SpaCyTokenizer",
|
666 |
-
"macrolanguage": false
|
667 |
-
},
|
668 |
-
"Latn": {
|
669 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
670 |
-
"original_lang_name": "telugu",
|
671 |
-
"original_lang_code": "tel",
|
672 |
-
"scripts": [
|
673 |
-
"Telu",
|
674 |
-
"Latn"
|
675 |
-
],
|
676 |
-
"class_name": "SpaCyTokenizer",
|
677 |
-
"macrolanguage": false
|
678 |
}
|
679 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
680 |
"children": [
|
681 |
{
|
682 |
"name": "Chenchu",
|
683 |
"iso_1_code": null,
|
684 |
"iso_3_code": "cde",
|
685 |
-
"tokenizers": {},
|
686 |
"children": [],
|
|
|
687 |
"node_i": "3644",
|
688 |
-
"
|
689 |
-
"
|
690 |
},
|
691 |
{
|
692 |
"name": "Manna-Dora",
|
693 |
"iso_1_code": null,
|
694 |
"iso_3_code": "mju",
|
695 |
-
"tokenizers": {},
|
696 |
"children": [],
|
|
|
697 |
"node_i": "3645",
|
698 |
-
"
|
699 |
-
"
|
700 |
},
|
701 |
{
|
702 |
"name": "Telugu",
|
703 |
"iso_1_code": "te",
|
704 |
"iso_3_code": "tel",
|
|
|
705 |
"tokenizers": {
|
706 |
"Telu": {
|
707 |
-
"full_object": "
|
708 |
-
"original_lang_name": "telugu",
|
709 |
-
"original_lang_code": "tel",
|
710 |
-
"scripts": [
|
711 |
-
"Telu",
|
712 |
-
"Latn"
|
713 |
-
],
|
714 |
-
"class_name": "SpaCyTokenizer",
|
715 |
-
"macrolanguage": false
|
716 |
-
},
|
717 |
-
"Latn": {
|
718 |
-
"full_object": "SpaCyTokenizer(\"te\")",
|
719 |
"original_lang_name": "telugu",
|
720 |
"original_lang_code": "tel",
|
721 |
-
"
|
722 |
-
|
723 |
-
"Latn"
|
724 |
-
],
|
725 |
-
"class_name": "SpaCyTokenizer",
|
726 |
-
"macrolanguage": false
|
727 |
}
|
728 |
},
|
729 |
-
"children": [],
|
730 |
"node_i": "3646",
|
|
|
|
|
|
|
731 |
"scripts": [
|
732 |
"Telu",
|
733 |
"Latn"
|
734 |
-
]
|
735 |
-
"own_tokenizer": true
|
736 |
},
|
737 |
{
|
738 |
"name": "Waddar",
|
739 |
"iso_1_code": null,
|
740 |
"iso_3_code": "wbq",
|
741 |
-
"tokenizers": {},
|
742 |
"children": [],
|
|
|
743 |
"node_i": "3647",
|
744 |
-
"
|
745 |
-
"
|
746 |
}
|
747 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
748 |
"node_i": "3643",
|
749 |
-
"
|
750 |
-
"
|
751 |
}
|
752 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
753 |
"node_i": "3616",
|
754 |
-
"
|
755 |
-
"
|
756 |
},
|
757 |
{
|
758 |
"name": "Southern",
|
759 |
"iso_1_code": null,
|
760 |
"iso_3_code": null,
|
761 |
-
"tokenizers": {
|
762 |
-
"Latn": {
|
763 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
764 |
-
"original_lang_name": "tamil",
|
765 |
-
"original_lang_code": "tam",
|
766 |
-
"scripts": [
|
767 |
-
"Latn",
|
768 |
-
"Taml"
|
769 |
-
],
|
770 |
-
"class_name": "SpaCyTokenizer",
|
771 |
-
"macrolanguage": false
|
772 |
-
},
|
773 |
-
"Knda": {
|
774 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
775 |
-
"original_lang_name": "kannada",
|
776 |
-
"original_lang_code": "kan",
|
777 |
-
"scripts": [
|
778 |
-
"Latn",
|
779 |
-
"Knda"
|
780 |
-
],
|
781 |
-
"class_name": "SpaCyTokenizer",
|
782 |
-
"macrolanguage": false
|
783 |
-
},
|
784 |
-
"Mlym": {
|
785 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
786 |
-
"original_lang_name": "malayalam",
|
787 |
-
"original_lang_code": "mal",
|
788 |
-
"scripts": [
|
789 |
-
"Latn",
|
790 |
-
"Mlym"
|
791 |
-
],
|
792 |
-
"class_name": "SpaCyTokenizer",
|
793 |
-
"macrolanguage": false
|
794 |
-
},
|
795 |
-
"Taml": {
|
796 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
797 |
-
"original_lang_name": "tamil",
|
798 |
-
"original_lang_code": "tam",
|
799 |
-
"scripts": [
|
800 |
-
"Latn",
|
801 |
-
"Taml"
|
802 |
-
],
|
803 |
-
"class_name": "SpaCyTokenizer",
|
804 |
-
"macrolanguage": false
|
805 |
-
}
|
806 |
-
},
|
807 |
"children": [
|
808 |
{
|
809 |
"name": "Kurichiya",
|
810 |
"iso_1_code": null,
|
811 |
"iso_3_code": "kfh",
|
812 |
-
"tokenizers": {},
|
813 |
"children": [],
|
|
|
814 |
"node_i": "3649",
|
815 |
-
"
|
816 |
-
"
|
817 |
},
|
818 |
{
|
819 |
"name": "Kurumba, Attapady",
|
820 |
"iso_1_code": null,
|
821 |
"iso_3_code": "pkr",
|
822 |
-
"tokenizers": {},
|
823 |
"children": [],
|
|
|
824 |
"node_i": "3650",
|
825 |
-
"
|
826 |
-
"
|
827 |
},
|
828 |
{
|
829 |
"name": "Pathiya",
|
830 |
"iso_1_code": null,
|
831 |
"iso_3_code": "pty",
|
832 |
-
"tokenizers": {},
|
833 |
"children": [],
|
|
|
834 |
"node_i": "3651",
|
835 |
-
"
|
836 |
-
"
|
837 |
},
|
838 |
{
|
839 |
"name": "Muduga",
|
840 |
"iso_1_code": null,
|
841 |
"iso_3_code": "udg",
|
842 |
-
"tokenizers": {},
|
843 |
"children": [],
|
|
|
844 |
"node_i": "3652",
|
845 |
-
"
|
846 |
-
"
|
847 |
},
|
848 |
{
|
849 |
"name": "Kumbaran",
|
850 |
"iso_1_code": null,
|
851 |
"iso_3_code": "wkb",
|
852 |
-
"tokenizers": {},
|
853 |
"children": [],
|
|
|
854 |
"node_i": "3653",
|
855 |
-
"
|
856 |
-
"
|
857 |
},
|
858 |
{
|
859 |
"name": "Kalanadi",
|
860 |
"iso_1_code": null,
|
861 |
"iso_3_code": "wkl",
|
862 |
-
"tokenizers": {},
|
863 |
"children": [],
|
|
|
864 |
"node_i": "3654",
|
865 |
-
"
|
866 |
-
"
|
867 |
},
|
868 |
{
|
869 |
"name": "Kunduvadi",
|
870 |
"iso_1_code": null,
|
871 |
"iso_3_code": "wku",
|
872 |
-
"tokenizers": {},
|
873 |
"children": [],
|
|
|
874 |
"node_i": "3655",
|
875 |
-
"
|
876 |
-
"
|
877 |
},
|
878 |
{
|
879 |
"name": "Tamil-Kannada",
|
880 |
"iso_1_code": null,
|
881 |
"iso_3_code": null,
|
882 |
-
"tokenizers": {
|
883 |
-
"Latn": {
|
884 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
885 |
-
"original_lang_name": "tamil",
|
886 |
-
"original_lang_code": "tam",
|
887 |
-
"scripts": [
|
888 |
-
"Latn",
|
889 |
-
"Taml"
|
890 |
-
],
|
891 |
-
"class_name": "SpaCyTokenizer",
|
892 |
-
"macrolanguage": false
|
893 |
-
},
|
894 |
-
"Knda": {
|
895 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
896 |
-
"original_lang_name": "kannada",
|
897 |
-
"original_lang_code": "kan",
|
898 |
-
"scripts": [
|
899 |
-
"Latn",
|
900 |
-
"Knda"
|
901 |
-
],
|
902 |
-
"class_name": "SpaCyTokenizer",
|
903 |
-
"macrolanguage": false
|
904 |
-
},
|
905 |
-
"Mlym": {
|
906 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
907 |
-
"original_lang_name": "malayalam",
|
908 |
-
"original_lang_code": "mal",
|
909 |
-
"scripts": [
|
910 |
-
"Latn",
|
911 |
-
"Mlym"
|
912 |
-
],
|
913 |
-
"class_name": "SpaCyTokenizer",
|
914 |
-
"macrolanguage": false
|
915 |
-
},
|
916 |
-
"Taml": {
|
917 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
918 |
-
"original_lang_name": "tamil",
|
919 |
-
"original_lang_code": "tam",
|
920 |
-
"scripts": [
|
921 |
-
"Latn",
|
922 |
-
"Taml"
|
923 |
-
],
|
924 |
-
"class_name": "SpaCyTokenizer",
|
925 |
-
"macrolanguage": false
|
926 |
-
}
|
927 |
-
},
|
928 |
"children": [
|
929 |
{
|
930 |
"name": "Kannada",
|
931 |
"iso_1_code": null,
|
932 |
"iso_3_code": null,
|
933 |
-
"tokenizers": {
|
934 |
-
"Latn": {
|
935 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
936 |
-
"original_lang_name": "kannada",
|
937 |
-
"original_lang_code": "kan",
|
938 |
-
"scripts": [
|
939 |
-
"Latn",
|
940 |
-
"Knda"
|
941 |
-
],
|
942 |
-
"class_name": "SpaCyTokenizer",
|
943 |
-
"macrolanguage": false
|
944 |
-
},
|
945 |
-
"Knda": {
|
946 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
947 |
-
"original_lang_name": "kannada",
|
948 |
-
"original_lang_code": "kan",
|
949 |
-
"scripts": [
|
950 |
-
"Latn",
|
951 |
-
"Knda"
|
952 |
-
],
|
953 |
-
"class_name": "SpaCyTokenizer",
|
954 |
-
"macrolanguage": false
|
955 |
-
}
|
956 |
-
},
|
957 |
"children": [
|
958 |
{
|
959 |
"name": "Badaga",
|
960 |
"iso_1_code": null,
|
961 |
"iso_3_code": "bfq",
|
962 |
-
"tokenizers": {},
|
963 |
"children": [],
|
|
|
964 |
"node_i": "3658",
|
965 |
-
"
|
966 |
-
"
|
967 |
},
|
968 |
{
|
969 |
"name": "Holiya",
|
970 |
"iso_1_code": null,
|
971 |
"iso_3_code": "hoy",
|
972 |
-
"tokenizers": {},
|
973 |
"children": [],
|
|
|
974 |
"node_i": "3659",
|
975 |
-
"
|
976 |
-
"
|
977 |
},
|
978 |
{
|
979 |
"name": "Kannada",
|
980 |
"iso_1_code": "kn",
|
981 |
"iso_3_code": "kan",
|
|
|
982 |
"tokenizers": {
|
983 |
-
"Latn": {
|
984 |
-
"full_object": "SpaCyTokenizer(\"kn\")",
|
985 |
-
"original_lang_name": "kannada",
|
986 |
-
"original_lang_code": "kan",
|
987 |
-
"scripts": [
|
988 |
-
"Latn",
|
989 |
-
"Knda"
|
990 |
-
],
|
991 |
-
"class_name": "SpaCyTokenizer",
|
992 |
-
"macrolanguage": false
|
993 |
-
},
|
994 |
"Knda": {
|
995 |
-
"full_object": "
|
996 |
"original_lang_name": "kannada",
|
997 |
"original_lang_code": "kan",
|
998 |
-
"
|
999 |
-
|
1000 |
-
"Knda"
|
1001 |
-
],
|
1002 |
-
"class_name": "SpaCyTokenizer",
|
1003 |
-
"macrolanguage": false
|
1004 |
}
|
1005 |
},
|
1006 |
-
"children": [],
|
1007 |
"node_i": "3660",
|
|
|
|
|
|
|
1008 |
"scripts": [
|
1009 |
"Latn",
|
1010 |
"Knda"
|
1011 |
-
]
|
1012 |
-
"own_tokenizer": true
|
1013 |
},
|
1014 |
{
|
1015 |
"name": "Urali",
|
1016 |
"iso_1_code": null,
|
1017 |
"iso_3_code": "url",
|
1018 |
-
"tokenizers": {},
|
1019 |
"children": [],
|
|
|
1020 |
"node_i": "3661",
|
1021 |
-
"
|
1022 |
-
"
|
1023 |
}
|
1024 |
],
|
1025 |
-
"node_i": "3657",
|
1026 |
-
"scripts": [],
|
1027 |
-
"own_tokenizer": false
|
1028 |
-
},
|
1029 |
-
{
|
1030 |
-
"name": "Tamil-Kodagu",
|
1031 |
-
"iso_1_code": null,
|
1032 |
-
"iso_3_code": null,
|
1033 |
"tokenizers": {
|
1034 |
-
"
|
1035 |
-
"full_object": "
|
1036 |
-
"original_lang_name": "
|
1037 |
-
"original_lang_code": "
|
1038 |
-
"
|
1039 |
-
|
1040 |
-
"Taml"
|
1041 |
-
],
|
1042 |
-
"class_name": "SpaCyTokenizer",
|
1043 |
-
"macrolanguage": false
|
1044 |
-
},
|
1045 |
-
"Mlym": {
|
1046 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
1047 |
-
"original_lang_name": "malayalam",
|
1048 |
-
"original_lang_code": "mal",
|
1049 |
-
"scripts": [
|
1050 |
-
"Latn",
|
1051 |
-
"Mlym"
|
1052 |
-
],
|
1053 |
-
"class_name": "SpaCyTokenizer",
|
1054 |
-
"macrolanguage": false
|
1055 |
-
},
|
1056 |
-
"Taml": {
|
1057 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
1058 |
-
"original_lang_name": "tamil",
|
1059 |
-
"original_lang_code": "tam",
|
1060 |
-
"scripts": [
|
1061 |
-
"Latn",
|
1062 |
-
"Taml"
|
1063 |
-
],
|
1064 |
-
"class_name": "SpaCyTokenizer",
|
1065 |
-
"macrolanguage": false
|
1066 |
}
|
1067 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1068 |
"children": [
|
1069 |
{
|
1070 |
"name": "Kodagu",
|
1071 |
"iso_1_code": null,
|
1072 |
"iso_3_code": null,
|
1073 |
-
"tokenizers": {},
|
1074 |
"children": [
|
1075 |
{
|
1076 |
"name": "Kodava",
|
1077 |
"iso_1_code": null,
|
1078 |
"iso_3_code": "kfa",
|
1079 |
-
"tokenizers": {},
|
1080 |
"children": [],
|
|
|
1081 |
"node_i": "3664",
|
1082 |
-
"
|
1083 |
-
"
|
1084 |
},
|
1085 |
{
|
1086 |
"name": "Kurumba, Kannada",
|
1087 |
"iso_1_code": null,
|
1088 |
"iso_3_code": "kfi",
|
1089 |
-
"tokenizers": {},
|
1090 |
"children": [],
|
|
|
1091 |
"node_i": "3665",
|
1092 |
-
"
|
1093 |
-
"
|
1094 |
},
|
1095 |
{
|
1096 |
"name": "Kurumba, Mullu",
|
1097 |
"iso_1_code": null,
|
1098 |
"iso_3_code": "kpb",
|
1099 |
-
"tokenizers": {},
|
1100 |
"children": [],
|
|
|
1101 |
"node_i": "3666",
|
1102 |
-
"
|
1103 |
-
"
|
1104 |
},
|
1105 |
{
|
1106 |
"name": "Kurumba, Alu",
|
1107 |
"iso_1_code": null,
|
1108 |
"iso_3_code": "xua",
|
1109 |
-
"tokenizers": {},
|
1110 |
"children": [],
|
|
|
1111 |
"node_i": "3667",
|
1112 |
-
"
|
1113 |
-
"
|
1114 |
},
|
1115 |
{
|
1116 |
"name": "Kurumba, Jennu",
|
1117 |
"iso_1_code": null,
|
1118 |
"iso_3_code": "xuj",
|
1119 |
-
"tokenizers": {},
|
1120 |
"children": [],
|
|
|
1121 |
"node_i": "3668",
|
1122 |
-
"
|
1123 |
-
"
|
1124 |
}
|
1125 |
],
|
|
|
1126 |
"node_i": "3663",
|
1127 |
-
"
|
1128 |
-
"
|
1129 |
},
|
1130 |
{
|
1131 |
"name": "Tamil-Malayalam",
|
1132 |
"iso_1_code": null,
|
1133 |
"iso_3_code": null,
|
1134 |
-
"tokenizers": {
|
1135 |
-
"Latn": {
|
1136 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
1137 |
-
"original_lang_name": "tamil",
|
1138 |
-
"original_lang_code": "tam",
|
1139 |
-
"scripts": [
|
1140 |
-
"Latn",
|
1141 |
-
"Taml"
|
1142 |
-
],
|
1143 |
-
"class_name": "SpaCyTokenizer",
|
1144 |
-
"macrolanguage": false
|
1145 |
-
},
|
1146 |
-
"Mlym": {
|
1147 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
1148 |
-
"original_lang_name": "malayalam",
|
1149 |
-
"original_lang_code": "mal",
|
1150 |
-
"scripts": [
|
1151 |
-
"Latn",
|
1152 |
-
"Mlym"
|
1153 |
-
],
|
1154 |
-
"class_name": "SpaCyTokenizer",
|
1155 |
-
"macrolanguage": false
|
1156 |
-
},
|
1157 |
-
"Taml": {
|
1158 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
1159 |
-
"original_lang_name": "tamil",
|
1160 |
-
"original_lang_code": "tam",
|
1161 |
-
"scripts": [
|
1162 |
-
"Latn",
|
1163 |
-
"Taml"
|
1164 |
-
],
|
1165 |
-
"class_name": "SpaCyTokenizer",
|
1166 |
-
"macrolanguage": false
|
1167 |
-
}
|
1168 |
-
},
|
1169 |
"children": [
|
1170 |
{
|
1171 |
"name": "Mannan",
|
1172 |
"iso_1_code": null,
|
1173 |
"iso_3_code": "mjv",
|
1174 |
-
"tokenizers": {},
|
1175 |
"children": [],
|
|
|
1176 |
"node_i": "3670",
|
1177 |
-
"
|
1178 |
-
"
|
1179 |
},
|
1180 |
{
|
1181 |
"name": "Malayalam",
|
1182 |
"iso_1_code": null,
|
1183 |
"iso_3_code": null,
|
1184 |
-
"tokenizers": {
|
1185 |
-
"Latn": {
|
1186 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
1187 |
-
"original_lang_name": "malayalam",
|
1188 |
-
"original_lang_code": "mal",
|
1189 |
-
"scripts": [
|
1190 |
-
"Latn",
|
1191 |
-
"Mlym"
|
1192 |
-
],
|
1193 |
-
"class_name": "SpaCyTokenizer",
|
1194 |
-
"macrolanguage": false
|
1195 |
-
},
|
1196 |
-
"Mlym": {
|
1197 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
1198 |
-
"original_lang_name": "malayalam",
|
1199 |
-
"original_lang_code": "mal",
|
1200 |
-
"scripts": [
|
1201 |
-
"Latn",
|
1202 |
-
"Mlym"
|
1203 |
-
],
|
1204 |
-
"class_name": "SpaCyTokenizer",
|
1205 |
-
"macrolanguage": false
|
1206 |
-
}
|
1207 |
-
},
|
1208 |
"children": [
|
1209 |
{
|
1210 |
"name": "Aranadan",
|
1211 |
"iso_1_code": null,
|
1212 |
"iso_3_code": "aaf",
|
1213 |
-
"tokenizers": {},
|
1214 |
"children": [],
|
|
|
1215 |
"node_i": "3672",
|
1216 |
-
"
|
1217 |
-
"
|
1218 |
},
|
1219 |
{
|
1220 |
"name": "Kadar",
|
1221 |
"iso_1_code": null,
|
1222 |
"iso_3_code": "kej",
|
1223 |
-
"tokenizers": {},
|
1224 |
"children": [],
|
|
|
1225 |
"node_i": "3673",
|
1226 |
-
"
|
1227 |
-
"
|
1228 |
},
|
1229 |
{
|
1230 |
"name": "Malayalam",
|
1231 |
"iso_1_code": "ml",
|
1232 |
"iso_3_code": "mal",
|
|
|
1233 |
"tokenizers": {
|
1234 |
-
"Latn": {
|
1235 |
-
"full_object": "SpaCyTokenizer(\"ml\")",
|
1236 |
-
"original_lang_name": "malayalam",
|
1237 |
-
"original_lang_code": "mal",
|
1238 |
-
"scripts": [
|
1239 |
-
"Latn",
|
1240 |
-
"Mlym"
|
1241 |
-
],
|
1242 |
-
"class_name": "SpaCyTokenizer",
|
1243 |
-
"macrolanguage": false
|
1244 |
-
},
|
1245 |
"Mlym": {
|
1246 |
-
"full_object": "
|
1247 |
"original_lang_name": "malayalam",
|
1248 |
"original_lang_code": "mal",
|
1249 |
-
"
|
1250 |
-
|
1251 |
-
"Mlym"
|
1252 |
-
],
|
1253 |
-
"class_name": "SpaCyTokenizer",
|
1254 |
-
"macrolanguage": false
|
1255 |
}
|
1256 |
},
|
1257 |
-
"children": [],
|
1258 |
"node_i": "3674",
|
|
|
|
|
|
|
1259 |
"scripts": [
|
1260 |
"Latn",
|
1261 |
"Mlym"
|
1262 |
-
]
|
1263 |
-
"own_tokenizer": true
|
1264 |
},
|
1265 |
{
|
1266 |
"name": "Malapandaram",
|
1267 |
"iso_1_code": null,
|
1268 |
"iso_3_code": "mjp",
|
1269 |
-
"tokenizers": {},
|
1270 |
"children": [],
|
|
|
1271 |
"node_i": "3675",
|
1272 |
-
"
|
1273 |
-
"
|
1274 |
},
|
1275 |
{
|
1276 |
"name": "Malaryan",
|
1277 |
"iso_1_code": null,
|
1278 |
"iso_3_code": "mjq",
|
1279 |
-
"tokenizers": {},
|
1280 |
"children": [],
|
|
|
1281 |
"node_i": "3676",
|
1282 |
-
"
|
1283 |
-
"
|
1284 |
},
|
1285 |
{
|
1286 |
"name": "Malavedan",
|
1287 |
"iso_1_code": null,
|
1288 |
"iso_3_code": "mjr",
|
1289 |
-
"tokenizers": {},
|
1290 |
"children": [],
|
|
|
1291 |
"node_i": "3677",
|
1292 |
-
"
|
1293 |
-
"
|
1294 |
},
|
1295 |
{
|
1296 |
"name": "Paliyan",
|
1297 |
"iso_1_code": null,
|
1298 |
"iso_3_code": "pcf",
|
1299 |
-
"tokenizers": {},
|
1300 |
"children": [],
|
|
|
1301 |
"node_i": "3678",
|
1302 |
-
"
|
1303 |
-
"
|
1304 |
},
|
1305 |
{
|
1306 |
"name": "Paniya",
|
1307 |
"iso_1_code": null,
|
1308 |
"iso_3_code": "pcg",
|
1309 |
-
"tokenizers": {},
|
1310 |
"children": [],
|
|
|
1311 |
"node_i": "3679",
|
1312 |
-
"
|
1313 |
-
"
|
1314 |
},
|
1315 |
{
|
1316 |
"name": "Ravula",
|
1317 |
"iso_1_code": null,
|
1318 |
"iso_3_code": "yea",
|
1319 |
-
"tokenizers": {},
|
1320 |
"children": [],
|
|
|
1321 |
"node_i": "3680",
|
1322 |
-
"
|
1323 |
-
"
|
1324 |
}
|
1325 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1326 |
"node_i": "3671",
|
1327 |
-
"
|
1328 |
-
"
|
1329 |
},
|
1330 |
{
|
1331 |
"name": "Tamil",
|
1332 |
"iso_1_code": null,
|
1333 |
"iso_3_code": null,
|
1334 |
-
"tokenizers": {
|
1335 |
-
"Latn": {
|
1336 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
1337 |
-
"original_lang_name": "tamil",
|
1338 |
-
"original_lang_code": "tam",
|
1339 |
-
"scripts": [
|
1340 |
-
"Latn",
|
1341 |
-
"Taml"
|
1342 |
-
],
|
1343 |
-
"class_name": "SpaCyTokenizer",
|
1344 |
-
"macrolanguage": false
|
1345 |
-
},
|
1346 |
-
"Taml": {
|
1347 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
1348 |
-
"original_lang_name": "tamil",
|
1349 |
-
"original_lang_code": "tam",
|
1350 |
-
"scripts": [
|
1351 |
-
"Latn",
|
1352 |
-
"Taml"
|
1353 |
-
],
|
1354 |
-
"class_name": "SpaCyTokenizer",
|
1355 |
-
"macrolanguage": false
|
1356 |
-
}
|
1357 |
-
},
|
1358 |
"children": [
|
1359 |
{
|
1360 |
"name": "Eravallan",
|
1361 |
"iso_1_code": null,
|
1362 |
"iso_3_code": "era",
|
1363 |
-
"tokenizers": {},
|
1364 |
"children": [],
|
|
|
1365 |
"node_i": "3682",
|
1366 |
-
"
|
1367 |
-
"
|
1368 |
},
|
1369 |
{
|
1370 |
"name": "Irula",
|
1371 |
"iso_1_code": null,
|
1372 |
"iso_3_code": "iru",
|
1373 |
-
"tokenizers": {},
|
1374 |
"children": [],
|
|
|
1375 |
"node_i": "3683",
|
1376 |
-
"
|
1377 |
-
"
|
1378 |
},
|
1379 |
{
|
1380 |
"name": "Kaikadi",
|
1381 |
"iso_1_code": null,
|
1382 |
"iso_3_code": "kep",
|
1383 |
-
"tokenizers": {},
|
1384 |
"children": [],
|
|
|
1385 |
"node_i": "3684",
|
1386 |
-
"
|
1387 |
-
"
|
1388 |
},
|
1389 |
{
|
1390 |
"name": "Kanikkaran",
|
1391 |
"iso_1_code": null,
|
1392 |
"iso_3_code": "kev",
|
1393 |
-
"tokenizers": {},
|
1394 |
"children": [],
|
|
|
1395 |
"node_i": "3685",
|
1396 |
-
"
|
1397 |
-
"
|
1398 |
},
|
1399 |
{
|
1400 |
"name": "Muthuvan",
|
1401 |
"iso_1_code": null,
|
1402 |
"iso_3_code": "muv",
|
1403 |
-
"tokenizers": {},
|
1404 |
"children": [],
|
|
|
1405 |
"node_i": "3686",
|
1406 |
-
"
|
1407 |
-
"
|
1408 |
},
|
1409 |
{
|
1410 |
"name": "Sholaga",
|
1411 |
"iso_1_code": null,
|
1412 |
"iso_3_code": "sle",
|
1413 |
-
"tokenizers": {},
|
1414 |
"children": [],
|
|
|
1415 |
"node_i": "3687",
|
1416 |
-
"
|
1417 |
-
"
|
1418 |
},
|
1419 |
{
|
1420 |
"name": "Tamil",
|
1421 |
"iso_1_code": "ta",
|
1422 |
"iso_3_code": "tam",
|
|
|
1423 |
"tokenizers": {
|
1424 |
-
"Latn": {
|
1425 |
-
"full_object": "SpaCyTokenizer(\"ta\")",
|
1426 |
-
"original_lang_name": "tamil",
|
1427 |
-
"original_lang_code": "tam",
|
1428 |
-
"scripts": [
|
1429 |
-
"Latn",
|
1430 |
-
"Taml"
|
1431 |
-
],
|
1432 |
-
"class_name": "SpaCyTokenizer",
|
1433 |
-
"macrolanguage": false
|
1434 |
-
},
|
1435 |
"Taml": {
|
1436 |
-
"full_object": "
|
1437 |
"original_lang_name": "tamil",
|
1438 |
"original_lang_code": "tam",
|
1439 |
-
"
|
1440 |
-
|
1441 |
-
"Taml"
|
1442 |
-
],
|
1443 |
-
"class_name": "SpaCyTokenizer",
|
1444 |
-
"macrolanguage": false
|
1445 |
}
|
1446 |
},
|
1447 |
-
"children": [],
|
1448 |
"node_i": "3688",
|
|
|
|
|
|
|
1449 |
"scripts": [
|
1450 |
"Taml",
|
1451 |
"Latn"
|
1452 |
-
]
|
1453 |
-
"own_tokenizer": true
|
1454 |
},
|
1455 |
{
|
1456 |
"name": "Kurumba, Betta",
|
1457 |
"iso_1_code": null,
|
1458 |
"iso_3_code": "xub",
|
1459 |
-
"tokenizers": {},
|
1460 |
"children": [],
|
|
|
1461 |
"node_i": "3689",
|
1462 |
-
"
|
1463 |
-
"
|
1464 |
},
|
1465 |
{
|
1466 |
"name": "Yerukula",
|
1467 |
"iso_1_code": null,
|
1468 |
"iso_3_code": "yeu",
|
1469 |
-
"tokenizers": {},
|
1470 |
"children": [],
|
|
|
1471 |
"node_i": "3690",
|
1472 |
-
"
|
1473 |
-
"
|
1474 |
}
|
1475 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1476 |
"node_i": "3681",
|
1477 |
-
"
|
1478 |
-
"
|
1479 |
}
|
1480 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1481 |
"node_i": "3669",
|
1482 |
-
"
|
1483 |
-
"
|
1484 |
},
|
1485 |
{
|
1486 |
"name": "Toda-Kota",
|
1487 |
"iso_1_code": null,
|
1488 |
"iso_3_code": null,
|
1489 |
-
"tokenizers": {},
|
1490 |
"children": [
|
1491 |
{
|
1492 |
"name": "Kota",
|
1493 |
"iso_1_code": null,
|
1494 |
"iso_3_code": "kfe",
|
1495 |
-
"tokenizers": {},
|
1496 |
"children": [],
|
|
|
1497 |
"node_i": "3692",
|
1498 |
-
"
|
1499 |
-
"
|
1500 |
},
|
1501 |
{
|
1502 |
"name": "Toda",
|
1503 |
"iso_1_code": null,
|
1504 |
"iso_3_code": "tcx",
|
1505 |
-
"tokenizers": {},
|
1506 |
"children": [],
|
|
|
1507 |
"node_i": "3693",
|
1508 |
-
"
|
1509 |
-
"
|
1510 |
}
|
1511 |
],
|
|
|
1512 |
"node_i": "3691",
|
1513 |
-
"
|
1514 |
-
"
|
1515 |
}
|
1516 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1517 |
"node_i": "3662",
|
1518 |
-
"
|
1519 |
-
"
|
1520 |
},
|
1521 |
{
|
1522 |
"name": "Unclassified",
|
1523 |
"iso_1_code": null,
|
1524 |
"iso_3_code": null,
|
1525 |
-
"tokenizers": {},
|
1526 |
"children": [
|
1527 |
{
|
1528 |
"name": "Chetti, Wayanad",
|
1529 |
"iso_1_code": null,
|
1530 |
"iso_3_code": "ctt",
|
1531 |
-
"tokenizers": {},
|
1532 |
"children": [],
|
|
|
1533 |
"node_i": "3695",
|
1534 |
-
"
|
1535 |
-
"
|
1536 |
}
|
1537 |
],
|
|
|
1538 |
"node_i": "3694",
|
1539 |
-
"
|
1540 |
-
"
|
1541 |
}
|
1542 |
],
|
1543 |
-
"node_i": "3656",
|
1544 |
-
"scripts": [],
|
1545 |
-
"own_tokenizer": false
|
1546 |
-
},
|
1547 |
-
{
|
1548 |
-
"name": "Tulu",
|
1549 |
-
"iso_1_code": null,
|
1550 |
-
"iso_3_code": null,
|
1551 |
"tokenizers": {
|
1552 |
"Knda": {
|
1553 |
-
"full_object": "
|
1554 |
"original_lang_name": "kannada",
|
1555 |
"original_lang_code": "kan",
|
1556 |
-
"
|
1557 |
-
|
1558 |
-
|
1559 |
-
|
1560 |
-
"
|
1561 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1562 |
}
|
1563 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1564 |
"children": [
|
1565 |
{
|
1566 |
"name": "Bellari",
|
1567 |
"iso_1_code": null,
|
1568 |
"iso_3_code": "brw",
|
1569 |
-
"tokenizers": {},
|
1570 |
"children": [],
|
|
|
1571 |
"node_i": "3697",
|
1572 |
-
"
|
1573 |
-
"
|
1574 |
},
|
1575 |
{
|
1576 |
"name": "Kudiya",
|
1577 |
"iso_1_code": null,
|
1578 |
"iso_3_code": "kfg",
|
1579 |
-
"tokenizers": {},
|
1580 |
"children": [],
|
|
|
1581 |
"node_i": "3698",
|
1582 |
-
"
|
1583 |
-
"
|
1584 |
},
|
1585 |
{
|
1586 |
"name": "Tulu",
|
1587 |
"iso_1_code": null,
|
1588 |
"iso_3_code": "tcy",
|
|
|
1589 |
"tokenizers": {
|
1590 |
"Knda": {
|
1591 |
-
"full_object": "
|
1592 |
"original_lang_name": "kannada",
|
1593 |
"original_lang_code": "kan",
|
1594 |
-
"
|
1595 |
-
|
1596 |
-
"Knda"
|
1597 |
-
],
|
1598 |
-
"class_name": "SpaCyTokenizer",
|
1599 |
-
"macrolanguage": false
|
1600 |
}
|
1601 |
},
|
1602 |
-
"children": [],
|
1603 |
"node_i": "3699",
|
|
|
1604 |
"scripts": [
|
1605 |
"Knda"
|
1606 |
-
]
|
1607 |
-
"own_tokenizer": false
|
1608 |
},
|
1609 |
{
|
1610 |
"name": "Koraga",
|
1611 |
"iso_1_code": null,
|
1612 |
"iso_3_code": null,
|
1613 |
-
"tokenizers": {},
|
1614 |
"children": [
|
1615 |
{
|
1616 |
"name": "Koraga, Korra",
|
1617 |
"iso_1_code": null,
|
1618 |
"iso_3_code": "kfd",
|
1619 |
-
"tokenizers": {},
|
1620 |
"children": [],
|
|
|
1621 |
"node_i": "3701",
|
1622 |
-
"
|
1623 |
-
"
|
1624 |
},
|
1625 |
{
|
1626 |
"name": "Koraga, Mudu",
|
1627 |
"iso_1_code": null,
|
1628 |
"iso_3_code": "vmd",
|
1629 |
-
"tokenizers": {},
|
1630 |
"children": [],
|
|
|
1631 |
"node_i": "3702",
|
1632 |
-
"
|
1633 |
-
"
|
1634 |
}
|
1635 |
],
|
|
|
1636 |
"node_i": "3700",
|
1637 |
-
"
|
1638 |
-
"
|
1639 |
}
|
1640 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1641 |
"node_i": "3696",
|
1642 |
-
"
|
1643 |
-
"
|
1644 |
},
|
1645 |
{
|
1646 |
"name": "Unclassified",
|
1647 |
"iso_1_code": null,
|
1648 |
"iso_3_code": null,
|
1649 |
-
"tokenizers": {},
|
1650 |
"children": [
|
1651 |
{
|
1652 |
"name": "Mala Malasar",
|
1653 |
"iso_1_code": null,
|
1654 |
"iso_3_code": "ima",
|
1655 |
-
"tokenizers": {},
|
1656 |
"children": [],
|
|
|
1657 |
"node_i": "3704",
|
1658 |
-
"
|
1659 |
-
"
|
1660 |
},
|
1661 |
{
|
1662 |
"name": "Thachanadan",
|
1663 |
"iso_1_code": null,
|
1664 |
"iso_3_code": "thn",
|
1665 |
-
"tokenizers": {},
|
1666 |
"children": [],
|
|
|
1667 |
"node_i": "3705",
|
1668 |
-
"
|
1669 |
-
"
|
1670 |
},
|
1671 |
{
|
1672 |
"name": "Ullatan",
|
1673 |
"iso_1_code": null,
|
1674 |
"iso_3_code": "ull",
|
1675 |
-
"tokenizers": {},
|
1676 |
"children": [],
|
|
|
1677 |
"node_i": "3706",
|
1678 |
-
"
|
1679 |
-
"
|
1680 |
},
|
1681 |
{
|
1682 |
"name": "Malasar",
|
1683 |
"iso_1_code": null,
|
1684 |
"iso_3_code": "ymr",
|
1685 |
-
"tokenizers": {},
|
1686 |
"children": [],
|
|
|
1687 |
"node_i": "3707",
|
1688 |
-
"
|
1689 |
-
"
|
1690 |
}
|
1691 |
],
|
|
|
1692 |
"node_i": "3703",
|
1693 |
-
"
|
1694 |
-
"
|
1695 |
}
|
1696 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1697 |
"node_i": "3648",
|
1698 |
-
"
|
1699 |
-
"
|
1700 |
},
|
1701 |
{
|
1702 |
"name": "Unclassified",
|
1703 |
"iso_1_code": null,
|
1704 |
"iso_3_code": null,
|
1705 |
-
"tokenizers": {},
|
1706 |
"children": [
|
1707 |
{
|
1708 |
"name": "Allar",
|
1709 |
"iso_1_code": null,
|
1710 |
"iso_3_code": "all",
|
1711 |
-
"tokenizers": {},
|
1712 |
"children": [],
|
|
|
1713 |
"node_i": "3709",
|
1714 |
-
"
|
1715 |
-
"
|
1716 |
},
|
1717 |
{
|
1718 |
"name": "Bharia",
|
1719 |
"iso_1_code": null,
|
1720 |
"iso_3_code": "bha",
|
1721 |
-
"tokenizers": {},
|
1722 |
"children": [],
|
|
|
1723 |
"node_i": "3710",
|
1724 |
-
"
|
1725 |
-
"
|
1726 |
},
|
1727 |
{
|
1728 |
"name": "Malankuravan",
|
1729 |
"iso_1_code": null,
|
1730 |
"iso_3_code": "mjo",
|
1731 |
-
"tokenizers": {},
|
1732 |
"children": [],
|
|
|
1733 |
"node_i": "3711",
|
1734 |
-
"
|
1735 |
-
"
|
1736 |
},
|
1737 |
{
|
1738 |
"name": "Pattapu",
|
1739 |
"iso_1_code": null,
|
1740 |
"iso_3_code": "ptq",
|
1741 |
-
"tokenizers": {},
|
1742 |
"children": [],
|
|
|
1743 |
"node_i": "3712",
|
1744 |
-
"
|
1745 |
-
"
|
1746 |
},
|
1747 |
{
|
1748 |
"name": "Vishavan",
|
1749 |
"iso_1_code": null,
|
1750 |
"iso_3_code": "vis",
|
1751 |
-
"tokenizers": {},
|
1752 |
"children": [],
|
|
|
1753 |
"node_i": "3713",
|
1754 |
-
"
|
1755 |
-
"
|
1756 |
}
|
1757 |
],
|
|
|
1758 |
"node_i": "3708",
|
1759 |
-
"
|
1760 |
-
"
|
1761 |
}
|
1762 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1763 |
"node_i": "3601",
|
1764 |
-
"
|
1765 |
-
"
|
1766 |
}
|
|
|
2 |
"name": "Dravidian",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Central",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Kolami-Naiki",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": null,
|
|
|
15 |
"children": [
|
16 |
{
|
17 |
"name": "Kolami, Northwestern",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "kfb",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3604",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
},
|
26 |
{
|
27 |
"name": "Kolami, Southeastern",
|
28 |
"iso_1_code": null,
|
29 |
"iso_3_code": "nit",
|
|
|
30 |
"children": [],
|
31 |
+
"tokenizers": {},
|
32 |
"node_i": "3605",
|
33 |
+
"native_tokenizers": [],
|
34 |
+
"scripts": []
|
35 |
}
|
36 |
],
|
37 |
+
"tokenizers": {},
|
38 |
"node_i": "3603",
|
39 |
+
"native_tokenizers": [],
|
40 |
+
"scripts": []
|
41 |
},
|
42 |
{
|
43 |
"name": "Parji-Gadaba",
|
44 |
"iso_1_code": null,
|
45 |
"iso_3_code": null,
|
|
|
46 |
"children": [
|
47 |
{
|
48 |
"name": "Gadaba, Mudhili",
|
49 |
"iso_1_code": null,
|
50 |
"iso_3_code": "gau",
|
|
|
51 |
"children": [],
|
52 |
+
"tokenizers": {},
|
53 |
"node_i": "3607",
|
54 |
+
"native_tokenizers": [],
|
55 |
+
"scripts": []
|
56 |
},
|
57 |
{
|
58 |
"name": "Gadaba, Pottangi Ollar",
|
59 |
"iso_1_code": null,
|
60 |
"iso_3_code": "gdb",
|
|
|
61 |
"children": [],
|
62 |
+
"tokenizers": {},
|
63 |
"node_i": "3608",
|
64 |
+
"native_tokenizers": [],
|
65 |
+
"scripts": []
|
66 |
},
|
67 |
{
|
68 |
"name": "Duruwa",
|
69 |
"iso_1_code": null,
|
70 |
"iso_3_code": "pci",
|
|
|
71 |
"children": [],
|
72 |
+
"tokenizers": {},
|
73 |
"node_i": "3609",
|
74 |
+
"native_tokenizers": [],
|
75 |
+
"scripts": []
|
76 |
}
|
77 |
],
|
78 |
+
"tokenizers": {},
|
79 |
"node_i": "3606",
|
80 |
+
"native_tokenizers": [],
|
81 |
+
"scripts": []
|
82 |
}
|
83 |
],
|
84 |
+
"tokenizers": {},
|
85 |
"node_i": "3602",
|
86 |
+
"native_tokenizers": [],
|
87 |
+
"scripts": []
|
88 |
},
|
89 |
{
|
90 |
"name": "Northern",
|
91 |
"iso_1_code": null,
|
92 |
"iso_3_code": null,
|
|
|
93 |
"children": [
|
94 |
{
|
95 |
"name": "Brahui",
|
96 |
"iso_1_code": null,
|
97 |
"iso_3_code": "brh",
|
|
|
98 |
"children": [],
|
99 |
+
"tokenizers": {},
|
100 |
"node_i": "3611",
|
101 |
+
"native_tokenizers": [],
|
102 |
"scripts": [
|
103 |
"Arab"
|
104 |
+
]
|
|
|
105 |
},
|
106 |
{
|
107 |
"name": "Kumarbhag Paharia",
|
108 |
"iso_1_code": null,
|
109 |
"iso_3_code": "kmj",
|
|
|
110 |
"children": [],
|
111 |
+
"tokenizers": {},
|
112 |
"node_i": "3612",
|
113 |
+
"native_tokenizers": [],
|
114 |
+
"scripts": []
|
115 |
},
|
116 |
{
|
117 |
"name": "Kurux",
|
118 |
"iso_1_code": null,
|
119 |
"iso_3_code": "kru",
|
|
|
120 |
"children": [],
|
121 |
+
"tokenizers": {},
|
122 |
"node_i": "3613",
|
123 |
+
"native_tokenizers": [],
|
124 |
"scripts": [
|
125 |
"Deva"
|
126 |
+
]
|
|
|
127 |
},
|
128 |
{
|
129 |
"name": "Sauria Paharia",
|
130 |
"iso_1_code": null,
|
131 |
"iso_3_code": "mjt",
|
|
|
132 |
"children": [],
|
133 |
+
"tokenizers": {},
|
134 |
"node_i": "3614",
|
135 |
+
"native_tokenizers": [],
|
136 |
+
"scripts": []
|
137 |
},
|
138 |
{
|
139 |
"name": "Kisan",
|
140 |
"iso_1_code": null,
|
141 |
"iso_3_code": "xis",
|
|
|
142 |
"children": [],
|
143 |
+
"tokenizers": {},
|
144 |
"node_i": "3615",
|
145 |
+
"native_tokenizers": [],
|
146 |
+
"scripts": []
|
147 |
}
|
148 |
],
|
149 |
+
"tokenizers": {},
|
150 |
"node_i": "3610",
|
151 |
+
"native_tokenizers": [],
|
152 |
+
"scripts": []
|
153 |
},
|
154 |
{
|
155 |
"name": "South-Central",
|
156 |
"iso_1_code": null,
|
157 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
"children": [
|
159 |
{
|
160 |
"name": "Gondi-Kui",
|
161 |
"iso_1_code": null,
|
162 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
"children": [
|
164 |
{
|
165 |
"name": "Gondi",
|
166 |
"iso_1_code": null,
|
167 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
"children": [
|
169 |
{
|
170 |
"name": "Maria, Dandami",
|
171 |
"iso_1_code": null,
|
172 |
"iso_3_code": "daq",
|
|
|
173 |
"children": [],
|
174 |
+
"tokenizers": {},
|
175 |
"node_i": "3619",
|
176 |
+
"native_tokenizers": [],
|
177 |
+
"scripts": []
|
178 |
},
|
179 |
{
|
180 |
"name": "Muria, Eastern",
|
181 |
"iso_1_code": null,
|
182 |
"iso_3_code": "emu",
|
|
|
183 |
"children": [],
|
184 |
+
"tokenizers": {},
|
185 |
"node_i": "3620",
|
186 |
+
"native_tokenizers": [],
|
187 |
+
"scripts": []
|
188 |
},
|
189 |
{
|
190 |
"name": "Gondi, Aheri",
|
191 |
"iso_1_code": null,
|
192 |
"iso_3_code": "esg",
|
|
|
193 |
"children": [],
|
194 |
+
"tokenizers": {},
|
195 |
"node_i": "3621",
|
196 |
+
"native_tokenizers": [],
|
197 |
+
"scripts": []
|
198 |
},
|
199 |
{
|
200 |
"name": "Muria, Far Western",
|
201 |
"iso_1_code": null,
|
202 |
"iso_3_code": "fmu",
|
|
|
203 |
"children": [],
|
204 |
+
"tokenizers": {},
|
205 |
"node_i": "3622",
|
206 |
+
"native_tokenizers": [],
|
207 |
"scripts": [
|
208 |
"Deva"
|
209 |
+
]
|
|
|
210 |
},
|
211 |
{
|
212 |
"name": "Gondi, Northern",
|
213 |
"iso_1_code": null,
|
214 |
"iso_3_code": "gno",
|
|
|
215 |
"children": [],
|
216 |
+
"tokenizers": {},
|
217 |
"node_i": "3623",
|
218 |
+
"native_tokenizers": [],
|
219 |
+
"scripts": []
|
220 |
},
|
221 |
{
|
222 |
"name": "Khirwar",
|
223 |
"iso_1_code": null,
|
224 |
"iso_3_code": "kwx",
|
|
|
225 |
"children": [],
|
226 |
+
"tokenizers": {},
|
227 |
"node_i": "3624",
|
228 |
+
"native_tokenizers": [],
|
229 |
+
"scripts": []
|
230 |
},
|
231 |
{
|
232 |
"name": "Maria",
|
233 |
"iso_1_code": null,
|
234 |
"iso_3_code": "mrr",
|
|
|
235 |
"children": [],
|
236 |
+
"tokenizers": {},
|
237 |
"node_i": "3625",
|
238 |
+
"native_tokenizers": [],
|
239 |
+
"scripts": []
|
240 |
},
|
241 |
{
|
242 |
"name": "Muria, Western",
|
243 |
"iso_1_code": null,
|
244 |
"iso_3_code": "mut",
|
|
|
245 |
"children": [],
|
246 |
+
"tokenizers": {},
|
247 |
"node_i": "3626",
|
248 |
+
"native_tokenizers": [],
|
249 |
+
"scripts": []
|
250 |
},
|
251 |
{
|
252 |
"name": "Nagarchal",
|
253 |
"iso_1_code": null,
|
254 |
"iso_3_code": "nbg",
|
|
|
255 |
"children": [],
|
256 |
+
"tokenizers": {},
|
257 |
"node_i": "3627",
|
258 |
+
"native_tokenizers": [],
|
259 |
+
"scripts": []
|
260 |
},
|
261 |
{
|
262 |
"name": "Pardhan",
|
263 |
"iso_1_code": null,
|
264 |
"iso_3_code": "pch",
|
|
|
265 |
"children": [],
|
266 |
+
"tokenizers": {},
|
267 |
"node_i": "3628",
|
268 |
+
"native_tokenizers": [],
|
269 |
+
"scripts": []
|
270 |
},
|
271 |
{
|
272 |
"name": "Gondi, Adilabad",
|
273 |
"iso_1_code": null,
|
274 |
"iso_3_code": "wsg",
|
275 |
+
"children": [],
|
276 |
"tokenizers": {
|
277 |
"Telu": {
|
278 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
279 |
"original_lang_name": "telugu",
|
280 |
"original_lang_code": "tel",
|
281 |
+
"script": "Telu",
|
282 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
283 |
}
|
284 |
},
|
|
|
285 |
"node_i": "3629",
|
286 |
+
"native_tokenizers": [],
|
287 |
"scripts": [
|
288 |
"Telu"
|
289 |
+
]
|
|
|
290 |
}
|
291 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
"tokenizers": {
|
293 |
"Telu": {
|
294 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
"original_lang_name": "telugu",
|
296 |
"original_lang_code": "tel",
|
297 |
+
"script": "Telu",
|
298 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
299 |
}
|
300 |
},
|
301 |
+
"node_i": "3618",
|
302 |
+
"native_tokenizers": [],
|
303 |
+
"scripts": []
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"name": "Konda-Kui",
|
307 |
+
"iso_1_code": null,
|
308 |
+
"iso_3_code": null,
|
309 |
"children": [
|
310 |
{
|
311 |
"name": "Konda",
|
312 |
"iso_1_code": null,
|
313 |
"iso_3_code": null,
|
|
|
314 |
"children": [
|
315 |
{
|
316 |
"name": "Konda-Dora",
|
317 |
"iso_1_code": null,
|
318 |
"iso_3_code": "kfc",
|
|
|
319 |
"children": [],
|
320 |
+
"tokenizers": {},
|
321 |
"node_i": "3632",
|
322 |
+
"native_tokenizers": [],
|
323 |
+
"scripts": []
|
324 |
},
|
325 |
{
|
326 |
"name": "Mukha-Dora",
|
327 |
"iso_1_code": null,
|
328 |
"iso_3_code": "mmk",
|
|
|
329 |
"children": [],
|
330 |
+
"tokenizers": {},
|
331 |
"node_i": "3633",
|
332 |
+
"native_tokenizers": [],
|
333 |
+
"scripts": []
|
334 |
}
|
335 |
],
|
336 |
+
"tokenizers": {},
|
337 |
"node_i": "3631",
|
338 |
+
"native_tokenizers": [],
|
339 |
+
"scripts": []
|
340 |
},
|
341 |
{
|
342 |
"name": "Manda-Kui",
|
343 |
"iso_1_code": null,
|
344 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
345 |
"children": [
|
346 |
{
|
347 |
"name": "Kui-Kuvi",
|
348 |
"iso_1_code": null,
|
349 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
"children": [
|
351 |
{
|
352 |
"name": "Kui, Dawik",
|
353 |
"iso_1_code": null,
|
354 |
"iso_3_code": "dwk",
|
|
|
355 |
"children": [],
|
356 |
+
"tokenizers": {},
|
357 |
"node_i": "3636",
|
358 |
+
"native_tokenizers": [],
|
359 |
+
"scripts": []
|
360 |
},
|
361 |
{
|
362 |
"name": "Koya",
|
363 |
"iso_1_code": null,
|
364 |
"iso_3_code": "kff",
|
365 |
+
"children": [],
|
366 |
"tokenizers": {
|
367 |
"Telu": {
|
368 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
369 |
"original_lang_name": "telugu",
|
370 |
"original_lang_code": "tel",
|
371 |
+
"script": "Telu",
|
372 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
373 |
}
|
374 |
},
|
|
|
375 |
"node_i": "3637",
|
376 |
+
"native_tokenizers": [],
|
377 |
"scripts": [
|
378 |
"Telu"
|
379 |
+
]
|
|
|
380 |
},
|
381 |
{
|
382 |
"name": "Kuvi",
|
383 |
"iso_1_code": null,
|
384 |
"iso_3_code": "kxv",
|
|
|
385 |
"children": [],
|
386 |
+
"tokenizers": {},
|
387 |
"node_i": "3638",
|
388 |
+
"native_tokenizers": [],
|
389 |
+
"scripts": []
|
390 |
},
|
391 |
{
|
392 |
"name": "Kui",
|
393 |
"iso_1_code": null,
|
394 |
"iso_3_code": "uki",
|
|
|
395 |
"children": [],
|
396 |
+
"tokenizers": {},
|
397 |
"node_i": "3639",
|
398 |
+
"native_tokenizers": [],
|
399 |
+
"scripts": []
|
400 |
}
|
401 |
],
|
402 |
+
"tokenizers": {
|
403 |
+
"Telu": {
|
404 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
405 |
+
"original_lang_name": "telugu",
|
406 |
+
"original_lang_code": "tel",
|
407 |
+
"script": "Telu",
|
408 |
+
"class_name": "IndicNLPTokenizer"
|
409 |
+
}
|
410 |
+
},
|
411 |
"node_i": "3635",
|
412 |
+
"native_tokenizers": [],
|
413 |
+
"scripts": []
|
414 |
},
|
415 |
{
|
416 |
"name": "Manda-Pengo",
|
417 |
"iso_1_code": null,
|
418 |
"iso_3_code": null,
|
|
|
419 |
"children": [
|
420 |
{
|
421 |
"name": "Manda",
|
422 |
"iso_1_code": null,
|
423 |
"iso_3_code": "mha",
|
|
|
424 |
"children": [],
|
425 |
+
"tokenizers": {},
|
426 |
"node_i": "3641",
|
427 |
+
"native_tokenizers": [],
|
428 |
+
"scripts": []
|
429 |
},
|
430 |
{
|
431 |
"name": "Pengo",
|
432 |
"iso_1_code": null,
|
433 |
"iso_3_code": "peg",
|
|
|
434 |
"children": [],
|
435 |
+
"tokenizers": {},
|
436 |
"node_i": "3642",
|
437 |
+
"native_tokenizers": [],
|
438 |
+
"scripts": []
|
439 |
}
|
440 |
],
|
441 |
+
"tokenizers": {},
|
442 |
"node_i": "3640",
|
443 |
+
"native_tokenizers": [],
|
444 |
+
"scripts": []
|
445 |
}
|
446 |
],
|
447 |
+
"tokenizers": {
|
448 |
+
"Telu": {
|
449 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
450 |
+
"original_lang_name": "telugu",
|
451 |
+
"original_lang_code": "tel",
|
452 |
+
"script": "Telu",
|
453 |
+
"class_name": "IndicNLPTokenizer"
|
454 |
+
}
|
455 |
+
},
|
456 |
"node_i": "3634",
|
457 |
+
"native_tokenizers": [],
|
458 |
+
"scripts": []
|
459 |
}
|
460 |
],
|
461 |
+
"tokenizers": {
|
462 |
+
"Telu": {
|
463 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
464 |
+
"original_lang_name": "telugu",
|
465 |
+
"original_lang_code": "tel",
|
466 |
+
"script": "Telu",
|
467 |
+
"class_name": "IndicNLPTokenizer"
|
468 |
+
}
|
469 |
+
},
|
470 |
"node_i": "3630",
|
471 |
+
"native_tokenizers": [],
|
472 |
+
"scripts": []
|
473 |
}
|
474 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
475 |
"tokenizers": {
|
476 |
"Telu": {
|
477 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
478 |
"original_lang_name": "telugu",
|
479 |
"original_lang_code": "tel",
|
480 |
+
"script": "Telu",
|
481 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
482 |
}
|
483 |
},
|
484 |
+
"node_i": "3617",
|
485 |
+
"native_tokenizers": [],
|
486 |
+
"scripts": []
|
487 |
+
},
|
488 |
+
{
|
489 |
+
"name": "Telugu",
|
490 |
+
"iso_1_code": null,
|
491 |
+
"iso_3_code": null,
|
492 |
"children": [
|
493 |
{
|
494 |
"name": "Chenchu",
|
495 |
"iso_1_code": null,
|
496 |
"iso_3_code": "cde",
|
|
|
497 |
"children": [],
|
498 |
+
"tokenizers": {},
|
499 |
"node_i": "3644",
|
500 |
+
"native_tokenizers": [],
|
501 |
+
"scripts": []
|
502 |
},
|
503 |
{
|
504 |
"name": "Manna-Dora",
|
505 |
"iso_1_code": null,
|
506 |
"iso_3_code": "mju",
|
|
|
507 |
"children": [],
|
508 |
+
"tokenizers": {},
|
509 |
"node_i": "3645",
|
510 |
+
"native_tokenizers": [],
|
511 |
+
"scripts": []
|
512 |
},
|
513 |
{
|
514 |
"name": "Telugu",
|
515 |
"iso_1_code": "te",
|
516 |
"iso_3_code": "tel",
|
517 |
+
"children": [],
|
518 |
"tokenizers": {
|
519 |
"Telu": {
|
520 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
521 |
"original_lang_name": "telugu",
|
522 |
"original_lang_code": "tel",
|
523 |
+
"script": "Telu",
|
524 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
525 |
}
|
526 |
},
|
|
|
527 |
"node_i": "3646",
|
528 |
+
"native_tokenizers": [
|
529 |
+
"Telu"
|
530 |
+
],
|
531 |
"scripts": [
|
532 |
"Telu",
|
533 |
"Latn"
|
534 |
+
]
|
|
|
535 |
},
|
536 |
{
|
537 |
"name": "Waddar",
|
538 |
"iso_1_code": null,
|
539 |
"iso_3_code": "wbq",
|
|
|
540 |
"children": [],
|
541 |
+
"tokenizers": {},
|
542 |
"node_i": "3647",
|
543 |
+
"native_tokenizers": [],
|
544 |
+
"scripts": []
|
545 |
}
|
546 |
],
|
547 |
+
"tokenizers": {
|
548 |
+
"Telu": {
|
549 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
550 |
+
"original_lang_name": "telugu",
|
551 |
+
"original_lang_code": "tel",
|
552 |
+
"script": "Telu",
|
553 |
+
"class_name": "IndicNLPTokenizer"
|
554 |
+
}
|
555 |
+
},
|
556 |
"node_i": "3643",
|
557 |
+
"native_tokenizers": [],
|
558 |
+
"scripts": []
|
559 |
}
|
560 |
],
|
561 |
+
"tokenizers": {
|
562 |
+
"Telu": {
|
563 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
564 |
+
"original_lang_name": "telugu",
|
565 |
+
"original_lang_code": "tel",
|
566 |
+
"script": "Telu",
|
567 |
+
"class_name": "IndicNLPTokenizer"
|
568 |
+
}
|
569 |
+
},
|
570 |
"node_i": "3616",
|
571 |
+
"native_tokenizers": [],
|
572 |
+
"scripts": []
|
573 |
},
|
574 |
{
|
575 |
"name": "Southern",
|
576 |
"iso_1_code": null,
|
577 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
578 |
"children": [
|
579 |
{
|
580 |
"name": "Kurichiya",
|
581 |
"iso_1_code": null,
|
582 |
"iso_3_code": "kfh",
|
|
|
583 |
"children": [],
|
584 |
+
"tokenizers": {},
|
585 |
"node_i": "3649",
|
586 |
+
"native_tokenizers": [],
|
587 |
+
"scripts": []
|
588 |
},
|
589 |
{
|
590 |
"name": "Kurumba, Attapady",
|
591 |
"iso_1_code": null,
|
592 |
"iso_3_code": "pkr",
|
|
|
593 |
"children": [],
|
594 |
+
"tokenizers": {},
|
595 |
"node_i": "3650",
|
596 |
+
"native_tokenizers": [],
|
597 |
+
"scripts": []
|
598 |
},
|
599 |
{
|
600 |
"name": "Pathiya",
|
601 |
"iso_1_code": null,
|
602 |
"iso_3_code": "pty",
|
|
|
603 |
"children": [],
|
604 |
+
"tokenizers": {},
|
605 |
"node_i": "3651",
|
606 |
+
"native_tokenizers": [],
|
607 |
+
"scripts": []
|
608 |
},
|
609 |
{
|
610 |
"name": "Muduga",
|
611 |
"iso_1_code": null,
|
612 |
"iso_3_code": "udg",
|
|
|
613 |
"children": [],
|
614 |
+
"tokenizers": {},
|
615 |
"node_i": "3652",
|
616 |
+
"native_tokenizers": [],
|
617 |
+
"scripts": []
|
618 |
},
|
619 |
{
|
620 |
"name": "Kumbaran",
|
621 |
"iso_1_code": null,
|
622 |
"iso_3_code": "wkb",
|
|
|
623 |
"children": [],
|
624 |
+
"tokenizers": {},
|
625 |
"node_i": "3653",
|
626 |
+
"native_tokenizers": [],
|
627 |
+
"scripts": []
|
628 |
},
|
629 |
{
|
630 |
"name": "Kalanadi",
|
631 |
"iso_1_code": null,
|
632 |
"iso_3_code": "wkl",
|
|
|
633 |
"children": [],
|
634 |
+
"tokenizers": {},
|
635 |
"node_i": "3654",
|
636 |
+
"native_tokenizers": [],
|
637 |
+
"scripts": []
|
638 |
},
|
639 |
{
|
640 |
"name": "Kunduvadi",
|
641 |
"iso_1_code": null,
|
642 |
"iso_3_code": "wku",
|
|
|
643 |
"children": [],
|
644 |
+
"tokenizers": {},
|
645 |
"node_i": "3655",
|
646 |
+
"native_tokenizers": [],
|
647 |
+
"scripts": []
|
648 |
},
|
649 |
{
|
650 |
"name": "Tamil-Kannada",
|
651 |
"iso_1_code": null,
|
652 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
653 |
"children": [
|
654 |
{
|
655 |
"name": "Kannada",
|
656 |
"iso_1_code": null,
|
657 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
658 |
"children": [
|
659 |
{
|
660 |
"name": "Badaga",
|
661 |
"iso_1_code": null,
|
662 |
"iso_3_code": "bfq",
|
|
|
663 |
"children": [],
|
664 |
+
"tokenizers": {},
|
665 |
"node_i": "3658",
|
666 |
+
"native_tokenizers": [],
|
667 |
+
"scripts": []
|
668 |
},
|
669 |
{
|
670 |
"name": "Holiya",
|
671 |
"iso_1_code": null,
|
672 |
"iso_3_code": "hoy",
|
|
|
673 |
"children": [],
|
674 |
+
"tokenizers": {},
|
675 |
"node_i": "3659",
|
676 |
+
"native_tokenizers": [],
|
677 |
+
"scripts": []
|
678 |
},
|
679 |
{
|
680 |
"name": "Kannada",
|
681 |
"iso_1_code": "kn",
|
682 |
"iso_3_code": "kan",
|
683 |
+
"children": [],
|
684 |
"tokenizers": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
685 |
"Knda": {
|
686 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
687 |
"original_lang_name": "kannada",
|
688 |
"original_lang_code": "kan",
|
689 |
+
"script": "Knda",
|
690 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
691 |
}
|
692 |
},
|
|
|
693 |
"node_i": "3660",
|
694 |
+
"native_tokenizers": [
|
695 |
+
"Knda"
|
696 |
+
],
|
697 |
"scripts": [
|
698 |
"Latn",
|
699 |
"Knda"
|
700 |
+
]
|
|
|
701 |
},
|
702 |
{
|
703 |
"name": "Urali",
|
704 |
"iso_1_code": null,
|
705 |
"iso_3_code": "url",
|
|
|
706 |
"children": [],
|
707 |
+
"tokenizers": {},
|
708 |
"node_i": "3661",
|
709 |
+
"native_tokenizers": [],
|
710 |
+
"scripts": []
|
711 |
}
|
712 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
713 |
"tokenizers": {
|
714 |
+
"Knda": {
|
715 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
716 |
+
"original_lang_name": "kannada",
|
717 |
+
"original_lang_code": "kan",
|
718 |
+
"script": "Knda",
|
719 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
720 |
}
|
721 |
},
|
722 |
+
"node_i": "3657",
|
723 |
+
"native_tokenizers": [],
|
724 |
+
"scripts": []
|
725 |
+
},
|
726 |
+
{
|
727 |
+
"name": "Tamil-Kodagu",
|
728 |
+
"iso_1_code": null,
|
729 |
+
"iso_3_code": null,
|
730 |
"children": [
|
731 |
{
|
732 |
"name": "Kodagu",
|
733 |
"iso_1_code": null,
|
734 |
"iso_3_code": null,
|
|
|
735 |
"children": [
|
736 |
{
|
737 |
"name": "Kodava",
|
738 |
"iso_1_code": null,
|
739 |
"iso_3_code": "kfa",
|
|
|
740 |
"children": [],
|
741 |
+
"tokenizers": {},
|
742 |
"node_i": "3664",
|
743 |
+
"native_tokenizers": [],
|
744 |
+
"scripts": []
|
745 |
},
|
746 |
{
|
747 |
"name": "Kurumba, Kannada",
|
748 |
"iso_1_code": null,
|
749 |
"iso_3_code": "kfi",
|
|
|
750 |
"children": [],
|
751 |
+
"tokenizers": {},
|
752 |
"node_i": "3665",
|
753 |
+
"native_tokenizers": [],
|
754 |
+
"scripts": []
|
755 |
},
|
756 |
{
|
757 |
"name": "Kurumba, Mullu",
|
758 |
"iso_1_code": null,
|
759 |
"iso_3_code": "kpb",
|
|
|
760 |
"children": [],
|
761 |
+
"tokenizers": {},
|
762 |
"node_i": "3666",
|
763 |
+
"native_tokenizers": [],
|
764 |
+
"scripts": []
|
765 |
},
|
766 |
{
|
767 |
"name": "Kurumba, Alu",
|
768 |
"iso_1_code": null,
|
769 |
"iso_3_code": "xua",
|
|
|
770 |
"children": [],
|
771 |
+
"tokenizers": {},
|
772 |
"node_i": "3667",
|
773 |
+
"native_tokenizers": [],
|
774 |
+
"scripts": []
|
775 |
},
|
776 |
{
|
777 |
"name": "Kurumba, Jennu",
|
778 |
"iso_1_code": null,
|
779 |
"iso_3_code": "xuj",
|
|
|
780 |
"children": [],
|
781 |
+
"tokenizers": {},
|
782 |
"node_i": "3668",
|
783 |
+
"native_tokenizers": [],
|
784 |
+
"scripts": []
|
785 |
}
|
786 |
],
|
787 |
+
"tokenizers": {},
|
788 |
"node_i": "3663",
|
789 |
+
"native_tokenizers": [],
|
790 |
+
"scripts": []
|
791 |
},
|
792 |
{
|
793 |
"name": "Tamil-Malayalam",
|
794 |
"iso_1_code": null,
|
795 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
796 |
"children": [
|
797 |
{
|
798 |
"name": "Mannan",
|
799 |
"iso_1_code": null,
|
800 |
"iso_3_code": "mjv",
|
|
|
801 |
"children": [],
|
802 |
+
"tokenizers": {},
|
803 |
"node_i": "3670",
|
804 |
+
"native_tokenizers": [],
|
805 |
+
"scripts": []
|
806 |
},
|
807 |
{
|
808 |
"name": "Malayalam",
|
809 |
"iso_1_code": null,
|
810 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
811 |
"children": [
|
812 |
{
|
813 |
"name": "Aranadan",
|
814 |
"iso_1_code": null,
|
815 |
"iso_3_code": "aaf",
|
|
|
816 |
"children": [],
|
817 |
+
"tokenizers": {},
|
818 |
"node_i": "3672",
|
819 |
+
"native_tokenizers": [],
|
820 |
+
"scripts": []
|
821 |
},
|
822 |
{
|
823 |
"name": "Kadar",
|
824 |
"iso_1_code": null,
|
825 |
"iso_3_code": "kej",
|
|
|
826 |
"children": [],
|
827 |
+
"tokenizers": {},
|
828 |
"node_i": "3673",
|
829 |
+
"native_tokenizers": [],
|
830 |
+
"scripts": []
|
831 |
},
|
832 |
{
|
833 |
"name": "Malayalam",
|
834 |
"iso_1_code": "ml",
|
835 |
"iso_3_code": "mal",
|
836 |
+
"children": [],
|
837 |
"tokenizers": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
838 |
"Mlym": {
|
839 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
840 |
"original_lang_name": "malayalam",
|
841 |
"original_lang_code": "mal",
|
842 |
+
"script": "Mlym",
|
843 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
844 |
}
|
845 |
},
|
|
|
846 |
"node_i": "3674",
|
847 |
+
"native_tokenizers": [
|
848 |
+
"Mlym"
|
849 |
+
],
|
850 |
"scripts": [
|
851 |
"Latn",
|
852 |
"Mlym"
|
853 |
+
]
|
|
|
854 |
},
|
855 |
{
|
856 |
"name": "Malapandaram",
|
857 |
"iso_1_code": null,
|
858 |
"iso_3_code": "mjp",
|
|
|
859 |
"children": [],
|
860 |
+
"tokenizers": {},
|
861 |
"node_i": "3675",
|
862 |
+
"native_tokenizers": [],
|
863 |
+
"scripts": []
|
864 |
},
|
865 |
{
|
866 |
"name": "Malaryan",
|
867 |
"iso_1_code": null,
|
868 |
"iso_3_code": "mjq",
|
|
|
869 |
"children": [],
|
870 |
+
"tokenizers": {},
|
871 |
"node_i": "3676",
|
872 |
+
"native_tokenizers": [],
|
873 |
+
"scripts": []
|
874 |
},
|
875 |
{
|
876 |
"name": "Malavedan",
|
877 |
"iso_1_code": null,
|
878 |
"iso_3_code": "mjr",
|
|
|
879 |
"children": [],
|
880 |
+
"tokenizers": {},
|
881 |
"node_i": "3677",
|
882 |
+
"native_tokenizers": [],
|
883 |
+
"scripts": []
|
884 |
},
|
885 |
{
|
886 |
"name": "Paliyan",
|
887 |
"iso_1_code": null,
|
888 |
"iso_3_code": "pcf",
|
|
|
889 |
"children": [],
|
890 |
+
"tokenizers": {},
|
891 |
"node_i": "3678",
|
892 |
+
"native_tokenizers": [],
|
893 |
+
"scripts": []
|
894 |
},
|
895 |
{
|
896 |
"name": "Paniya",
|
897 |
"iso_1_code": null,
|
898 |
"iso_3_code": "pcg",
|
|
|
899 |
"children": [],
|
900 |
+
"tokenizers": {},
|
901 |
"node_i": "3679",
|
902 |
+
"native_tokenizers": [],
|
903 |
+
"scripts": []
|
904 |
},
|
905 |
{
|
906 |
"name": "Ravula",
|
907 |
"iso_1_code": null,
|
908 |
"iso_3_code": "yea",
|
|
|
909 |
"children": [],
|
910 |
+
"tokenizers": {},
|
911 |
"node_i": "3680",
|
912 |
+
"native_tokenizers": [],
|
913 |
+
"scripts": []
|
914 |
}
|
915 |
],
|
916 |
+
"tokenizers": {
|
917 |
+
"Mlym": {
|
918 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
919 |
+
"original_lang_name": "malayalam",
|
920 |
+
"original_lang_code": "mal",
|
921 |
+
"script": "Mlym",
|
922 |
+
"class_name": "IndicNLPTokenizer"
|
923 |
+
}
|
924 |
+
},
|
925 |
"node_i": "3671",
|
926 |
+
"native_tokenizers": [],
|
927 |
+
"scripts": []
|
928 |
},
|
929 |
{
|
930 |
"name": "Tamil",
|
931 |
"iso_1_code": null,
|
932 |
"iso_3_code": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
933 |
"children": [
|
934 |
{
|
935 |
"name": "Eravallan",
|
936 |
"iso_1_code": null,
|
937 |
"iso_3_code": "era",
|
|
|
938 |
"children": [],
|
939 |
+
"tokenizers": {},
|
940 |
"node_i": "3682",
|
941 |
+
"native_tokenizers": [],
|
942 |
+
"scripts": []
|
943 |
},
|
944 |
{
|
945 |
"name": "Irula",
|
946 |
"iso_1_code": null,
|
947 |
"iso_3_code": "iru",
|
|
|
948 |
"children": [],
|
949 |
+
"tokenizers": {},
|
950 |
"node_i": "3683",
|
951 |
+
"native_tokenizers": [],
|
952 |
+
"scripts": []
|
953 |
},
|
954 |
{
|
955 |
"name": "Kaikadi",
|
956 |
"iso_1_code": null,
|
957 |
"iso_3_code": "kep",
|
|
|
958 |
"children": [],
|
959 |
+
"tokenizers": {},
|
960 |
"node_i": "3684",
|
961 |
+
"native_tokenizers": [],
|
962 |
+
"scripts": []
|
963 |
},
|
964 |
{
|
965 |
"name": "Kanikkaran",
|
966 |
"iso_1_code": null,
|
967 |
"iso_3_code": "kev",
|
|
|
968 |
"children": [],
|
969 |
+
"tokenizers": {},
|
970 |
"node_i": "3685",
|
971 |
+
"native_tokenizers": [],
|
972 |
+
"scripts": []
|
973 |
},
|
974 |
{
|
975 |
"name": "Muthuvan",
|
976 |
"iso_1_code": null,
|
977 |
"iso_3_code": "muv",
|
|
|
978 |
"children": [],
|
979 |
+
"tokenizers": {},
|
980 |
"node_i": "3686",
|
981 |
+
"native_tokenizers": [],
|
982 |
+
"scripts": []
|
983 |
},
|
984 |
{
|
985 |
"name": "Sholaga",
|
986 |
"iso_1_code": null,
|
987 |
"iso_3_code": "sle",
|
|
|
988 |
"children": [],
|
989 |
+
"tokenizers": {},
|
990 |
"node_i": "3687",
|
991 |
+
"native_tokenizers": [],
|
992 |
+
"scripts": []
|
993 |
},
|
994 |
{
|
995 |
"name": "Tamil",
|
996 |
"iso_1_code": "ta",
|
997 |
"iso_3_code": "tam",
|
998 |
+
"children": [],
|
999 |
"tokenizers": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1000 |
"Taml": {
|
1001 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
1002 |
"original_lang_name": "tamil",
|
1003 |
"original_lang_code": "tam",
|
1004 |
+
"script": "Taml",
|
1005 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
1006 |
}
|
1007 |
},
|
|
|
1008 |
"node_i": "3688",
|
1009 |
+
"native_tokenizers": [
|
1010 |
+
"Taml"
|
1011 |
+
],
|
1012 |
"scripts": [
|
1013 |
"Taml",
|
1014 |
"Latn"
|
1015 |
+
]
|
|
|
1016 |
},
|
1017 |
{
|
1018 |
"name": "Kurumba, Betta",
|
1019 |
"iso_1_code": null,
|
1020 |
"iso_3_code": "xub",
|
|
|
1021 |
"children": [],
|
1022 |
+
"tokenizers": {},
|
1023 |
"node_i": "3689",
|
1024 |
+
"native_tokenizers": [],
|
1025 |
+
"scripts": []
|
1026 |
},
|
1027 |
{
|
1028 |
"name": "Yerukula",
|
1029 |
"iso_1_code": null,
|
1030 |
"iso_3_code": "yeu",
|
|
|
1031 |
"children": [],
|
1032 |
+
"tokenizers": {},
|
1033 |
"node_i": "3690",
|
1034 |
+
"native_tokenizers": [],
|
1035 |
+
"scripts": []
|
1036 |
}
|
1037 |
],
|
1038 |
+
"tokenizers": {
|
1039 |
+
"Taml": {
|
1040 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
1041 |
+
"original_lang_name": "tamil",
|
1042 |
+
"original_lang_code": "tam",
|
1043 |
+
"script": "Taml",
|
1044 |
+
"class_name": "IndicNLPTokenizer"
|
1045 |
+
}
|
1046 |
+
},
|
1047 |
"node_i": "3681",
|
1048 |
+
"native_tokenizers": [],
|
1049 |
+
"scripts": []
|
1050 |
}
|
1051 |
],
|
1052 |
+
"tokenizers": {
|
1053 |
+
"Mlym": {
|
1054 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
1055 |
+
"original_lang_name": "malayalam",
|
1056 |
+
"original_lang_code": "mal",
|
1057 |
+
"script": "Mlym",
|
1058 |
+
"class_name": "IndicNLPTokenizer"
|
1059 |
+
},
|
1060 |
+
"Taml": {
|
1061 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
1062 |
+
"original_lang_name": "tamil",
|
1063 |
+
"original_lang_code": "tam",
|
1064 |
+
"script": "Taml",
|
1065 |
+
"class_name": "IndicNLPTokenizer"
|
1066 |
+
}
|
1067 |
+
},
|
1068 |
"node_i": "3669",
|
1069 |
+
"native_tokenizers": [],
|
1070 |
+
"scripts": []
|
1071 |
},
|
1072 |
{
|
1073 |
"name": "Toda-Kota",
|
1074 |
"iso_1_code": null,
|
1075 |
"iso_3_code": null,
|
|
|
1076 |
"children": [
|
1077 |
{
|
1078 |
"name": "Kota",
|
1079 |
"iso_1_code": null,
|
1080 |
"iso_3_code": "kfe",
|
|
|
1081 |
"children": [],
|
1082 |
+
"tokenizers": {},
|
1083 |
"node_i": "3692",
|
1084 |
+
"native_tokenizers": [],
|
1085 |
+
"scripts": []
|
1086 |
},
|
1087 |
{
|
1088 |
"name": "Toda",
|
1089 |
"iso_1_code": null,
|
1090 |
"iso_3_code": "tcx",
|
|
|
1091 |
"children": [],
|
1092 |
+
"tokenizers": {},
|
1093 |
"node_i": "3693",
|
1094 |
+
"native_tokenizers": [],
|
1095 |
+
"scripts": []
|
1096 |
}
|
1097 |
],
|
1098 |
+
"tokenizers": {},
|
1099 |
"node_i": "3691",
|
1100 |
+
"native_tokenizers": [],
|
1101 |
+
"scripts": []
|
1102 |
}
|
1103 |
],
|
1104 |
+
"tokenizers": {
|
1105 |
+
"Mlym": {
|
1106 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
1107 |
+
"original_lang_name": "malayalam",
|
1108 |
+
"original_lang_code": "mal",
|
1109 |
+
"script": "Mlym",
|
1110 |
+
"class_name": "IndicNLPTokenizer"
|
1111 |
+
},
|
1112 |
+
"Taml": {
|
1113 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
1114 |
+
"original_lang_name": "tamil",
|
1115 |
+
"original_lang_code": "tam",
|
1116 |
+
"script": "Taml",
|
1117 |
+
"class_name": "IndicNLPTokenizer"
|
1118 |
+
}
|
1119 |
+
},
|
1120 |
"node_i": "3662",
|
1121 |
+
"native_tokenizers": [],
|
1122 |
+
"scripts": []
|
1123 |
},
|
1124 |
{
|
1125 |
"name": "Unclassified",
|
1126 |
"iso_1_code": null,
|
1127 |
"iso_3_code": null,
|
|
|
1128 |
"children": [
|
1129 |
{
|
1130 |
"name": "Chetti, Wayanad",
|
1131 |
"iso_1_code": null,
|
1132 |
"iso_3_code": "ctt",
|
|
|
1133 |
"children": [],
|
1134 |
+
"tokenizers": {},
|
1135 |
"node_i": "3695",
|
1136 |
+
"native_tokenizers": [],
|
1137 |
+
"scripts": []
|
1138 |
}
|
1139 |
],
|
1140 |
+
"tokenizers": {},
|
1141 |
"node_i": "3694",
|
1142 |
+
"native_tokenizers": [],
|
1143 |
+
"scripts": []
|
1144 |
}
|
1145 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1146 |
"tokenizers": {
|
1147 |
"Knda": {
|
1148 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
1149 |
"original_lang_name": "kannada",
|
1150 |
"original_lang_code": "kan",
|
1151 |
+
"script": "Knda",
|
1152 |
+
"class_name": "IndicNLPTokenizer"
|
1153 |
+
},
|
1154 |
+
"Mlym": {
|
1155 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
1156 |
+
"original_lang_name": "malayalam",
|
1157 |
+
"original_lang_code": "mal",
|
1158 |
+
"script": "Mlym",
|
1159 |
+
"class_name": "IndicNLPTokenizer"
|
1160 |
+
},
|
1161 |
+
"Taml": {
|
1162 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
1163 |
+
"original_lang_name": "tamil",
|
1164 |
+
"original_lang_code": "tam",
|
1165 |
+
"script": "Taml",
|
1166 |
+
"class_name": "IndicNLPTokenizer"
|
1167 |
}
|
1168 |
},
|
1169 |
+
"node_i": "3656",
|
1170 |
+
"native_tokenizers": [],
|
1171 |
+
"scripts": []
|
1172 |
+
},
|
1173 |
+
{
|
1174 |
+
"name": "Tulu",
|
1175 |
+
"iso_1_code": null,
|
1176 |
+
"iso_3_code": null,
|
1177 |
"children": [
|
1178 |
{
|
1179 |
"name": "Bellari",
|
1180 |
"iso_1_code": null,
|
1181 |
"iso_3_code": "brw",
|
|
|
1182 |
"children": [],
|
1183 |
+
"tokenizers": {},
|
1184 |
"node_i": "3697",
|
1185 |
+
"native_tokenizers": [],
|
1186 |
+
"scripts": []
|
1187 |
},
|
1188 |
{
|
1189 |
"name": "Kudiya",
|
1190 |
"iso_1_code": null,
|
1191 |
"iso_3_code": "kfg",
|
|
|
1192 |
"children": [],
|
1193 |
+
"tokenizers": {},
|
1194 |
"node_i": "3698",
|
1195 |
+
"native_tokenizers": [],
|
1196 |
+
"scripts": []
|
1197 |
},
|
1198 |
{
|
1199 |
"name": "Tulu",
|
1200 |
"iso_1_code": null,
|
1201 |
"iso_3_code": "tcy",
|
1202 |
+
"children": [],
|
1203 |
"tokenizers": {
|
1204 |
"Knda": {
|
1205 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
1206 |
"original_lang_name": "kannada",
|
1207 |
"original_lang_code": "kan",
|
1208 |
+
"script": "Knda",
|
1209 |
+
"class_name": "IndicNLPTokenizer"
|
|
|
|
|
|
|
|
|
1210 |
}
|
1211 |
},
|
|
|
1212 |
"node_i": "3699",
|
1213 |
+
"native_tokenizers": [],
|
1214 |
"scripts": [
|
1215 |
"Knda"
|
1216 |
+
]
|
|
|
1217 |
},
|
1218 |
{
|
1219 |
"name": "Koraga",
|
1220 |
"iso_1_code": null,
|
1221 |
"iso_3_code": null,
|
|
|
1222 |
"children": [
|
1223 |
{
|
1224 |
"name": "Koraga, Korra",
|
1225 |
"iso_1_code": null,
|
1226 |
"iso_3_code": "kfd",
|
|
|
1227 |
"children": [],
|
1228 |
+
"tokenizers": {},
|
1229 |
"node_i": "3701",
|
1230 |
+
"native_tokenizers": [],
|
1231 |
+
"scripts": []
|
1232 |
},
|
1233 |
{
|
1234 |
"name": "Koraga, Mudu",
|
1235 |
"iso_1_code": null,
|
1236 |
"iso_3_code": "vmd",
|
|
|
1237 |
"children": [],
|
1238 |
+
"tokenizers": {},
|
1239 |
"node_i": "3702",
|
1240 |
+
"native_tokenizers": [],
|
1241 |
+
"scripts": []
|
1242 |
}
|
1243 |
],
|
1244 |
+
"tokenizers": {},
|
1245 |
"node_i": "3700",
|
1246 |
+
"native_tokenizers": [],
|
1247 |
+
"scripts": []
|
1248 |
}
|
1249 |
],
|
1250 |
+
"tokenizers": {
|
1251 |
+
"Knda": {
|
1252 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
1253 |
+
"original_lang_name": "kannada",
|
1254 |
+
"original_lang_code": "kan",
|
1255 |
+
"script": "Knda",
|
1256 |
+
"class_name": "IndicNLPTokenizer"
|
1257 |
+
}
|
1258 |
+
},
|
1259 |
"node_i": "3696",
|
1260 |
+
"native_tokenizers": [],
|
1261 |
+
"scripts": []
|
1262 |
},
|
1263 |
{
|
1264 |
"name": "Unclassified",
|
1265 |
"iso_1_code": null,
|
1266 |
"iso_3_code": null,
|
|
|
1267 |
"children": [
|
1268 |
{
|
1269 |
"name": "Mala Malasar",
|
1270 |
"iso_1_code": null,
|
1271 |
"iso_3_code": "ima",
|
|
|
1272 |
"children": [],
|
1273 |
+
"tokenizers": {},
|
1274 |
"node_i": "3704",
|
1275 |
+
"native_tokenizers": [],
|
1276 |
+
"scripts": []
|
1277 |
},
|
1278 |
{
|
1279 |
"name": "Thachanadan",
|
1280 |
"iso_1_code": null,
|
1281 |
"iso_3_code": "thn",
|
|
|
1282 |
"children": [],
|
1283 |
+
"tokenizers": {},
|
1284 |
"node_i": "3705",
|
1285 |
+
"native_tokenizers": [],
|
1286 |
+
"scripts": []
|
1287 |
},
|
1288 |
{
|
1289 |
"name": "Ullatan",
|
1290 |
"iso_1_code": null,
|
1291 |
"iso_3_code": "ull",
|
|
|
1292 |
"children": [],
|
1293 |
+
"tokenizers": {},
|
1294 |
"node_i": "3706",
|
1295 |
+
"native_tokenizers": [],
|
1296 |
+
"scripts": []
|
1297 |
},
|
1298 |
{
|
1299 |
"name": "Malasar",
|
1300 |
"iso_1_code": null,
|
1301 |
"iso_3_code": "ymr",
|
|
|
1302 |
"children": [],
|
1303 |
+
"tokenizers": {},
|
1304 |
"node_i": "3707",
|
1305 |
+
"native_tokenizers": [],
|
1306 |
+
"scripts": []
|
1307 |
}
|
1308 |
],
|
1309 |
+
"tokenizers": {},
|
1310 |
"node_i": "3703",
|
1311 |
+
"native_tokenizers": [],
|
1312 |
+
"scripts": []
|
1313 |
}
|
1314 |
],
|
1315 |
+
"tokenizers": {
|
1316 |
+
"Knda": {
|
1317 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
1318 |
+
"original_lang_name": "kannada",
|
1319 |
+
"original_lang_code": "kan",
|
1320 |
+
"script": "Knda",
|
1321 |
+
"class_name": "IndicNLPTokenizer"
|
1322 |
+
},
|
1323 |
+
"Mlym": {
|
1324 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
1325 |
+
"original_lang_name": "malayalam",
|
1326 |
+
"original_lang_code": "mal",
|
1327 |
+
"script": "Mlym",
|
1328 |
+
"class_name": "IndicNLPTokenizer"
|
1329 |
+
},
|
1330 |
+
"Taml": {
|
1331 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
1332 |
+
"original_lang_name": "tamil",
|
1333 |
+
"original_lang_code": "tam",
|
1334 |
+
"script": "Taml",
|
1335 |
+
"class_name": "IndicNLPTokenizer"
|
1336 |
+
}
|
1337 |
+
},
|
1338 |
"node_i": "3648",
|
1339 |
+
"native_tokenizers": [],
|
1340 |
+
"scripts": []
|
1341 |
},
|
1342 |
{
|
1343 |
"name": "Unclassified",
|
1344 |
"iso_1_code": null,
|
1345 |
"iso_3_code": null,
|
|
|
1346 |
"children": [
|
1347 |
{
|
1348 |
"name": "Allar",
|
1349 |
"iso_1_code": null,
|
1350 |
"iso_3_code": "all",
|
|
|
1351 |
"children": [],
|
1352 |
+
"tokenizers": {},
|
1353 |
"node_i": "3709",
|
1354 |
+
"native_tokenizers": [],
|
1355 |
+
"scripts": []
|
1356 |
},
|
1357 |
{
|
1358 |
"name": "Bharia",
|
1359 |
"iso_1_code": null,
|
1360 |
"iso_3_code": "bha",
|
|
|
1361 |
"children": [],
|
1362 |
+
"tokenizers": {},
|
1363 |
"node_i": "3710",
|
1364 |
+
"native_tokenizers": [],
|
1365 |
+
"scripts": []
|
1366 |
},
|
1367 |
{
|
1368 |
"name": "Malankuravan",
|
1369 |
"iso_1_code": null,
|
1370 |
"iso_3_code": "mjo",
|
|
|
1371 |
"children": [],
|
1372 |
+
"tokenizers": {},
|
1373 |
"node_i": "3711",
|
1374 |
+
"native_tokenizers": [],
|
1375 |
+
"scripts": []
|
1376 |
},
|
1377 |
{
|
1378 |
"name": "Pattapu",
|
1379 |
"iso_1_code": null,
|
1380 |
"iso_3_code": "ptq",
|
|
|
1381 |
"children": [],
|
1382 |
+
"tokenizers": {},
|
1383 |
"node_i": "3712",
|
1384 |
+
"native_tokenizers": [],
|
1385 |
+
"scripts": []
|
1386 |
},
|
1387 |
{
|
1388 |
"name": "Vishavan",
|
1389 |
"iso_1_code": null,
|
1390 |
"iso_3_code": "vis",
|
|
|
1391 |
"children": [],
|
1392 |
+
"tokenizers": {},
|
1393 |
"node_i": "3713",
|
1394 |
+
"native_tokenizers": [],
|
1395 |
+
"scripts": []
|
1396 |
}
|
1397 |
],
|
1398 |
+
"tokenizers": {},
|
1399 |
"node_i": "3708",
|
1400 |
+
"native_tokenizers": [],
|
1401 |
+
"scripts": []
|
1402 |
}
|
1403 |
],
|
1404 |
+
"tokenizers": {
|
1405 |
+
"Telu": {
|
1406 |
+
"full_object": "IndicNLPTokenizer(\"te\")",
|
1407 |
+
"original_lang_name": "telugu",
|
1408 |
+
"original_lang_code": "tel",
|
1409 |
+
"script": "Telu",
|
1410 |
+
"class_name": "IndicNLPTokenizer"
|
1411 |
+
},
|
1412 |
+
"Knda": {
|
1413 |
+
"full_object": "IndicNLPTokenizer(\"kn\")",
|
1414 |
+
"original_lang_name": "kannada",
|
1415 |
+
"original_lang_code": "kan",
|
1416 |
+
"script": "Knda",
|
1417 |
+
"class_name": "IndicNLPTokenizer"
|
1418 |
+
},
|
1419 |
+
"Mlym": {
|
1420 |
+
"full_object": "IndicNLPTokenizer(\"ml\")",
|
1421 |
+
"original_lang_name": "malayalam",
|
1422 |
+
"original_lang_code": "mal",
|
1423 |
+
"script": "Mlym",
|
1424 |
+
"class_name": "IndicNLPTokenizer"
|
1425 |
+
},
|
1426 |
+
"Taml": {
|
1427 |
+
"full_object": "IndicNLPTokenizer(\"ta\")",
|
1428 |
+
"original_lang_name": "tamil",
|
1429 |
+
"original_lang_code": "tam",
|
1430 |
+
"script": "Taml",
|
1431 |
+
"class_name": "IndicNLPTokenizer"
|
1432 |
+
}
|
1433 |
+
},
|
1434 |
"node_i": "3601",
|
1435 |
+
"native_tokenizers": [],
|
1436 |
+
"scripts": []
|
1437 |
}
|
data/East Bird’s Head-Sentani.json
CHANGED
@@ -2,173 +2,173 @@
|
|
2 |
"name": "East Bird\u2019s Head-Sentani",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Burmeso",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Burmeso",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "bzu",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "3716",
|
20 |
-
"
|
21 |
-
"
|
22 |
}
|
23 |
],
|
|
|
24 |
"node_i": "3715",
|
25 |
-
"
|
26 |
-
"
|
27 |
},
|
28 |
{
|
29 |
"name": "East Bird\u2019s Head",
|
30 |
"iso_1_code": null,
|
31 |
"iso_3_code": null,
|
32 |
-
"tokenizers": {},
|
33 |
"children": [
|
34 |
{
|
35 |
"name": "Mantion",
|
36 |
"iso_1_code": null,
|
37 |
"iso_3_code": null,
|
38 |
-
"tokenizers": {},
|
39 |
"children": [
|
40 |
{
|
41 |
"name": "Sougb",
|
42 |
"iso_1_code": null,
|
43 |
"iso_3_code": "mnx",
|
44 |
-
"tokenizers": {},
|
45 |
"children": [],
|
|
|
46 |
"node_i": "3719",
|
|
|
47 |
"scripts": [
|
48 |
"Latn"
|
49 |
-
]
|
50 |
-
"own_tokenizer": false
|
51 |
}
|
52 |
],
|
|
|
53 |
"node_i": "3718",
|
54 |
-
"
|
55 |
-
"
|
56 |
},
|
57 |
{
|
58 |
"name": "Meax",
|
59 |
"iso_1_code": null,
|
60 |
"iso_3_code": null,
|
61 |
-
"tokenizers": {},
|
62 |
"children": [
|
63 |
{
|
64 |
"name": "Meyah",
|
65 |
"iso_1_code": null,
|
66 |
"iso_3_code": "mej",
|
67 |
-
"tokenizers": {},
|
68 |
"children": [],
|
|
|
69 |
"node_i": "3721",
|
|
|
70 |
"scripts": [
|
71 |
"Latn"
|
72 |
-
]
|
73 |
-
"own_tokenizer": false
|
74 |
},
|
75 |
{
|
76 |
"name": "Moskona",
|
77 |
"iso_1_code": null,
|
78 |
"iso_3_code": "mtj",
|
79 |
-
"tokenizers": {},
|
80 |
"children": [],
|
|
|
81 |
"node_i": "3722",
|
|
|
82 |
"scripts": [
|
83 |
"Latn"
|
84 |
-
]
|
85 |
-
"own_tokenizer": false
|
86 |
}
|
87 |
],
|
|
|
88 |
"node_i": "3720",
|
89 |
-
"
|
90 |
-
"
|
91 |
}
|
92 |
],
|
|
|
93 |
"node_i": "3717",
|
94 |
-
"
|
95 |
-
"
|
96 |
},
|
97 |
{
|
98 |
"name": "Sentani",
|
99 |
"iso_1_code": null,
|
100 |
"iso_3_code": null,
|
101 |
-
"tokenizers": {},
|
102 |
"children": [
|
103 |
{
|
104 |
"name": "Demta",
|
105 |
"iso_1_code": null,
|
106 |
"iso_3_code": null,
|
107 |
-
"tokenizers": {},
|
108 |
"children": [
|
109 |
{
|
110 |
"name": "Sowari",
|
111 |
"iso_1_code": null,
|
112 |
"iso_3_code": "dmy",
|
113 |
-
"tokenizers": {},
|
114 |
"children": [],
|
|
|
115 |
"node_i": "3725",
|
116 |
-
"
|
117 |
-
"
|
118 |
}
|
119 |
],
|
|
|
120 |
"node_i": "3724",
|
121 |
-
"
|
122 |
-
"
|
123 |
},
|
124 |
{
|
125 |
"name": "Sentani Proper",
|
126 |
"iso_1_code": null,
|
127 |
"iso_3_code": null,
|
128 |
-
"tokenizers": {},
|
129 |
"children": [
|
130 |
{
|
131 |
"name": "Nafri",
|
132 |
"iso_1_code": null,
|
133 |
"iso_3_code": "nxx",
|
134 |
-
"tokenizers": {},
|
135 |
"children": [],
|
|
|
136 |
"node_i": "3727",
|
137 |
-
"
|
138 |
-
"
|
139 |
},
|
140 |
{
|
141 |
"name": "Sentani",
|
142 |
"iso_1_code": null,
|
143 |
"iso_3_code": "set",
|
144 |
-
"tokenizers": {},
|
145 |
"children": [],
|
|
|
146 |
"node_i": "3728",
|
147 |
-
"
|
148 |
-
"
|
149 |
},
|
150 |
{
|
151 |
"name": "Tabla",
|
152 |
"iso_1_code": null,
|
153 |
"iso_3_code": "tnm",
|
154 |
-
"tokenizers": {},
|
155 |
"children": [],
|
|
|
156 |
"node_i": "3729",
|
157 |
-
"
|
158 |
-
"
|
159 |
}
|
160 |
],
|
|
|
161 |
"node_i": "3726",
|
162 |
-
"
|
163 |
-
"
|
164 |
}
|
165 |
],
|
|
|
166 |
"node_i": "3723",
|
167 |
-
"
|
168 |
-
"
|
169 |
}
|
170 |
],
|
|
|
171 |
"node_i": "3714",
|
172 |
-
"
|
173 |
-
"
|
174 |
}
|
|
|
2 |
"name": "East Bird\u2019s Head-Sentani",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Burmeso",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Burmeso",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "bzu",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3716",
|
18 |
+
"native_tokenizers": [],
|
19 |
+
"scripts": []
|
20 |
}
|
21 |
],
|
22 |
+
"tokenizers": {},
|
23 |
"node_i": "3715",
|
24 |
+
"native_tokenizers": [],
|
25 |
+
"scripts": []
|
26 |
},
|
27 |
{
|
28 |
"name": "East Bird\u2019s Head",
|
29 |
"iso_1_code": null,
|
30 |
"iso_3_code": null,
|
|
|
31 |
"children": [
|
32 |
{
|
33 |
"name": "Mantion",
|
34 |
"iso_1_code": null,
|
35 |
"iso_3_code": null,
|
|
|
36 |
"children": [
|
37 |
{
|
38 |
"name": "Sougb",
|
39 |
"iso_1_code": null,
|
40 |
"iso_3_code": "mnx",
|
|
|
41 |
"children": [],
|
42 |
+
"tokenizers": {},
|
43 |
"node_i": "3719",
|
44 |
+
"native_tokenizers": [],
|
45 |
"scripts": [
|
46 |
"Latn"
|
47 |
+
]
|
|
|
48 |
}
|
49 |
],
|
50 |
+
"tokenizers": {},
|
51 |
"node_i": "3718",
|
52 |
+
"native_tokenizers": [],
|
53 |
+
"scripts": []
|
54 |
},
|
55 |
{
|
56 |
"name": "Meax",
|
57 |
"iso_1_code": null,
|
58 |
"iso_3_code": null,
|
|
|
59 |
"children": [
|
60 |
{
|
61 |
"name": "Meyah",
|
62 |
"iso_1_code": null,
|
63 |
"iso_3_code": "mej",
|
|
|
64 |
"children": [],
|
65 |
+
"tokenizers": {},
|
66 |
"node_i": "3721",
|
67 |
+
"native_tokenizers": [],
|
68 |
"scripts": [
|
69 |
"Latn"
|
70 |
+
]
|
|
|
71 |
},
|
72 |
{
|
73 |
"name": "Moskona",
|
74 |
"iso_1_code": null,
|
75 |
"iso_3_code": "mtj",
|
|
|
76 |
"children": [],
|
77 |
+
"tokenizers": {},
|
78 |
"node_i": "3722",
|
79 |
+
"native_tokenizers": [],
|
80 |
"scripts": [
|
81 |
"Latn"
|
82 |
+
]
|
|
|
83 |
}
|
84 |
],
|
85 |
+
"tokenizers": {},
|
86 |
"node_i": "3720",
|
87 |
+
"native_tokenizers": [],
|
88 |
+
"scripts": []
|
89 |
}
|
90 |
],
|
91 |
+
"tokenizers": {},
|
92 |
"node_i": "3717",
|
93 |
+
"native_tokenizers": [],
|
94 |
+
"scripts": []
|
95 |
},
|
96 |
{
|
97 |
"name": "Sentani",
|
98 |
"iso_1_code": null,
|
99 |
"iso_3_code": null,
|
|
|
100 |
"children": [
|
101 |
{
|
102 |
"name": "Demta",
|
103 |
"iso_1_code": null,
|
104 |
"iso_3_code": null,
|
|
|
105 |
"children": [
|
106 |
{
|
107 |
"name": "Sowari",
|
108 |
"iso_1_code": null,
|
109 |
"iso_3_code": "dmy",
|
|
|
110 |
"children": [],
|
111 |
+
"tokenizers": {},
|
112 |
"node_i": "3725",
|
113 |
+
"native_tokenizers": [],
|
114 |
+
"scripts": []
|
115 |
}
|
116 |
],
|
117 |
+
"tokenizers": {},
|
118 |
"node_i": "3724",
|
119 |
+
"native_tokenizers": [],
|
120 |
+
"scripts": []
|
121 |
},
|
122 |
{
|
123 |
"name": "Sentani Proper",
|
124 |
"iso_1_code": null,
|
125 |
"iso_3_code": null,
|
|
|
126 |
"children": [
|
127 |
{
|
128 |
"name": "Nafri",
|
129 |
"iso_1_code": null,
|
130 |
"iso_3_code": "nxx",
|
|
|
131 |
"children": [],
|
132 |
+
"tokenizers": {},
|
133 |
"node_i": "3727",
|
134 |
+
"native_tokenizers": [],
|
135 |
+
"scripts": []
|
136 |
},
|
137 |
{
|
138 |
"name": "Sentani",
|
139 |
"iso_1_code": null,
|
140 |
"iso_3_code": "set",
|
|
|
141 |
"children": [],
|
142 |
+
"tokenizers": {},
|
143 |
"node_i": "3728",
|
144 |
+
"native_tokenizers": [],
|
145 |
+
"scripts": []
|
146 |
},
|
147 |
{
|
148 |
"name": "Tabla",
|
149 |
"iso_1_code": null,
|
150 |
"iso_3_code": "tnm",
|
|
|
151 |
"children": [],
|
152 |
+
"tokenizers": {},
|
153 |
"node_i": "3729",
|
154 |
+
"native_tokenizers": [],
|
155 |
+
"scripts": []
|
156 |
}
|
157 |
],
|
158 |
+
"tokenizers": {},
|
159 |
"node_i": "3726",
|
160 |
+
"native_tokenizers": [],
|
161 |
+
"scripts": []
|
162 |
}
|
163 |
],
|
164 |
+
"tokenizers": {},
|
165 |
"node_i": "3723",
|
166 |
+
"native_tokenizers": [],
|
167 |
+
"scripts": []
|
168 |
}
|
169 |
],
|
170 |
+
"tokenizers": {},
|
171 |
"node_i": "3714",
|
172 |
+
"native_tokenizers": [],
|
173 |
+
"scripts": []
|
174 |
}
|
data/East Geelvink Bay.json
CHANGED
@@ -2,143 +2,143 @@
|
|
2 |
"name": "East Geelvink Bay",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Anasi",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "bpo",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3731",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Barapasi",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "brp",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "3732",
|
24 |
-
"
|
25 |
-
"
|
26 |
},
|
27 |
{
|
28 |
"name": "Burate",
|
29 |
"iso_1_code": null,
|
30 |
"iso_3_code": "bti",
|
31 |
-
"tokenizers": {},
|
32 |
"children": [],
|
|
|
33 |
"node_i": "3733",
|
34 |
-
"
|
35 |
-
"
|
36 |
},
|
37 |
{
|
38 |
"name": "Kehu",
|
39 |
"iso_1_code": null,
|
40 |
"iso_3_code": "khh",
|
41 |
-
"tokenizers": {},
|
42 |
"children": [],
|
|
|
43 |
"node_i": "3734",
|
44 |
-
"
|
45 |
-
"
|
46 |
},
|
47 |
{
|
48 |
"name": "Kofei",
|
49 |
"iso_1_code": null,
|
50 |
"iso_3_code": "kpi",
|
51 |
-
"tokenizers": {},
|
52 |
"children": [],
|
|
|
53 |
"node_i": "3735",
|
54 |
-
"
|
55 |
-
"
|
56 |
},
|
57 |
{
|
58 |
"name": "Nisa",
|
59 |
"iso_1_code": null,
|
60 |
"iso_3_code": "njs",
|
61 |
-
"tokenizers": {},
|
62 |
"children": [],
|
|
|
63 |
"node_i": "3736",
|
64 |
-
"
|
65 |
-
"
|
66 |
},
|
67 |
{
|
68 |
"name": "Sauri",
|
69 |
"iso_1_code": null,
|
70 |
"iso_3_code": "srt",
|
71 |
-
"tokenizers": {},
|
72 |
"children": [],
|
|
|
73 |
"node_i": "3737",
|
74 |
-
"
|
75 |
-
"
|
76 |
},
|
77 |
{
|
78 |
"name": "Tefaro",
|
79 |
"iso_1_code": null,
|
80 |
"iso_3_code": "tfo",
|
81 |
-
"tokenizers": {},
|
82 |
"children": [],
|
|
|
83 |
"node_i": "3738",
|
84 |
-
"
|
85 |
-
"
|
86 |
},
|
87 |
{
|
88 |
"name": "Tunggare",
|
89 |
"iso_1_code": null,
|
90 |
"iso_3_code": "trt",
|
91 |
-
"tokenizers": {},
|
92 |
"children": [],
|
|
|
93 |
"node_i": "3739",
|
94 |
-
"
|
95 |
-
"
|
96 |
},
|
97 |
{
|
98 |
"name": "Woria",
|
99 |
"iso_1_code": null,
|
100 |
"iso_3_code": "wor",
|
101 |
-
"tokenizers": {},
|
102 |
"children": [],
|
|
|
103 |
"node_i": "3740",
|
104 |
-
"
|
105 |
-
"
|
106 |
},
|
107 |
{
|
108 |
"name": "Bauzi",
|
109 |
"iso_1_code": null,
|
110 |
"iso_3_code": null,
|
111 |
-
"tokenizers": {},
|
112 |
"children": [
|
113 |
{
|
114 |
"name": "Bauzi",
|
115 |
"iso_1_code": null,
|
116 |
"iso_3_code": "bvz",
|
117 |
-
"tokenizers": {},
|
118 |
"children": [],
|
|
|
119 |
"node_i": "3742",
|
|
|
120 |
"scripts": [
|
121 |
"Latn"
|
122 |
-
]
|
123 |
-
"own_tokenizer": false
|
124 |
},
|
125 |
{
|
126 |
"name": "Demisa",
|
127 |
"iso_1_code": null,
|
128 |
"iso_3_code": "dei",
|
129 |
-
"tokenizers": {},
|
130 |
"children": [],
|
|
|
131 |
"node_i": "3743",
|
132 |
-
"
|
133 |
-
"
|
134 |
}
|
135 |
],
|
|
|
136 |
"node_i": "3741",
|
137 |
-
"
|
138 |
-
"
|
139 |
}
|
140 |
],
|
|
|
141 |
"node_i": "3730",
|
142 |
-
"
|
143 |
-
"
|
144 |
}
|
|
|
2 |
"name": "East Geelvink Bay",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Anasi",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "bpo",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3731",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Barapasi",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "brp",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3732",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
},
|
26 |
{
|
27 |
"name": "Burate",
|
28 |
"iso_1_code": null,
|
29 |
"iso_3_code": "bti",
|
|
|
30 |
"children": [],
|
31 |
+
"tokenizers": {},
|
32 |
"node_i": "3733",
|
33 |
+
"native_tokenizers": [],
|
34 |
+
"scripts": []
|
35 |
},
|
36 |
{
|
37 |
"name": "Kehu",
|
38 |
"iso_1_code": null,
|
39 |
"iso_3_code": "khh",
|
|
|
40 |
"children": [],
|
41 |
+
"tokenizers": {},
|
42 |
"node_i": "3734",
|
43 |
+
"native_tokenizers": [],
|
44 |
+
"scripts": []
|
45 |
},
|
46 |
{
|
47 |
"name": "Kofei",
|
48 |
"iso_1_code": null,
|
49 |
"iso_3_code": "kpi",
|
|
|
50 |
"children": [],
|
51 |
+
"tokenizers": {},
|
52 |
"node_i": "3735",
|
53 |
+
"native_tokenizers": [],
|
54 |
+
"scripts": []
|
55 |
},
|
56 |
{
|
57 |
"name": "Nisa",
|
58 |
"iso_1_code": null,
|
59 |
"iso_3_code": "njs",
|
|
|
60 |
"children": [],
|
61 |
+
"tokenizers": {},
|
62 |
"node_i": "3736",
|
63 |
+
"native_tokenizers": [],
|
64 |
+
"scripts": []
|
65 |
},
|
66 |
{
|
67 |
"name": "Sauri",
|
68 |
"iso_1_code": null,
|
69 |
"iso_3_code": "srt",
|
|
|
70 |
"children": [],
|
71 |
+
"tokenizers": {},
|
72 |
"node_i": "3737",
|
73 |
+
"native_tokenizers": [],
|
74 |
+
"scripts": []
|
75 |
},
|
76 |
{
|
77 |
"name": "Tefaro",
|
78 |
"iso_1_code": null,
|
79 |
"iso_3_code": "tfo",
|
|
|
80 |
"children": [],
|
81 |
+
"tokenizers": {},
|
82 |
"node_i": "3738",
|
83 |
+
"native_tokenizers": [],
|
84 |
+
"scripts": []
|
85 |
},
|
86 |
{
|
87 |
"name": "Tunggare",
|
88 |
"iso_1_code": null,
|
89 |
"iso_3_code": "trt",
|
|
|
90 |
"children": [],
|
91 |
+
"tokenizers": {},
|
92 |
"node_i": "3739",
|
93 |
+
"native_tokenizers": [],
|
94 |
+
"scripts": []
|
95 |
},
|
96 |
{
|
97 |
"name": "Woria",
|
98 |
"iso_1_code": null,
|
99 |
"iso_3_code": "wor",
|
|
|
100 |
"children": [],
|
101 |
+
"tokenizers": {},
|
102 |
"node_i": "3740",
|
103 |
+
"native_tokenizers": [],
|
104 |
+
"scripts": []
|
105 |
},
|
106 |
{
|
107 |
"name": "Bauzi",
|
108 |
"iso_1_code": null,
|
109 |
"iso_3_code": null,
|
|
|
110 |
"children": [
|
111 |
{
|
112 |
"name": "Bauzi",
|
113 |
"iso_1_code": null,
|
114 |
"iso_3_code": "bvz",
|
|
|
115 |
"children": [],
|
116 |
+
"tokenizers": {},
|
117 |
"node_i": "3742",
|
118 |
+
"native_tokenizers": [],
|
119 |
"scripts": [
|
120 |
"Latn"
|
121 |
+
]
|
|
|
122 |
},
|
123 |
{
|
124 |
"name": "Demisa",
|
125 |
"iso_1_code": null,
|
126 |
"iso_3_code": "dei",
|
|
|
127 |
"children": [],
|
128 |
+
"tokenizers": {},
|
129 |
"node_i": "3743",
|
130 |
+
"native_tokenizers": [],
|
131 |
+
"scripts": []
|
132 |
}
|
133 |
],
|
134 |
+
"tokenizers": {},
|
135 |
"node_i": "3741",
|
136 |
+
"native_tokenizers": [],
|
137 |
+
"scripts": []
|
138 |
}
|
139 |
],
|
140 |
+
"tokenizers": {},
|
141 |
"node_i": "3730",
|
142 |
+
"native_tokenizers": [],
|
143 |
+
"scripts": []
|
144 |
}
|
data/East New Britain.json
CHANGED
@@ -2,104 +2,104 @@
|
|
2 |
"name": "East New Britain",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Baining",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Qaqet",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "byx",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "3746",
|
|
|
20 |
"scripts": [
|
21 |
"Latn"
|
22 |
-
]
|
23 |
-
"own_tokenizer": false
|
24 |
},
|
25 |
{
|
26 |
"name": "Kairak",
|
27 |
"iso_1_code": null,
|
28 |
"iso_3_code": "ckr",
|
29 |
-
"tokenizers": {},
|
30 |
"children": [],
|
|
|
31 |
"node_i": "3747",
|
32 |
-
"
|
33 |
-
"
|
34 |
},
|
35 |
{
|
36 |
"name": "Mali",
|
37 |
"iso_1_code": null,
|
38 |
"iso_3_code": "gcc",
|
39 |
-
"tokenizers": {},
|
40 |
"children": [],
|
|
|
41 |
"node_i": "3748",
|
42 |
-
"
|
43 |
-
"
|
44 |
},
|
45 |
{
|
46 |
"name": "Simbali",
|
47 |
"iso_1_code": null,
|
48 |
"iso_3_code": "smg",
|
49 |
-
"tokenizers": {},
|
50 |
"children": [],
|
|
|
51 |
"node_i": "3749",
|
52 |
-
"
|
53 |
-
"
|
54 |
},
|
55 |
{
|
56 |
"name": "Ura",
|
57 |
"iso_1_code": null,
|
58 |
"iso_3_code": "uro",
|
59 |
-
"tokenizers": {},
|
60 |
"children": [],
|
|
|
61 |
"node_i": "3750",
|
62 |
-
"
|
63 |
-
"
|
64 |
},
|
65 |
{
|
66 |
"name": "Makolkol",
|
67 |
"iso_1_code": null,
|
68 |
"iso_3_code": "zmh",
|
69 |
-
"tokenizers": {},
|
70 |
"children": [],
|
|
|
71 |
"node_i": "3751",
|
72 |
-
"
|
73 |
-
"
|
74 |
}
|
75 |
],
|
|
|
76 |
"node_i": "3745",
|
77 |
-
"
|
78 |
-
"
|
79 |
},
|
80 |
{
|
81 |
"name": "Taulil",
|
82 |
"iso_1_code": null,
|
83 |
"iso_3_code": null,
|
84 |
-
"tokenizers": {},
|
85 |
"children": [
|
86 |
{
|
87 |
"name": "Tulil",
|
88 |
"iso_1_code": null,
|
89 |
"iso_3_code": "tuh",
|
90 |
-
"tokenizers": {},
|
91 |
"children": [],
|
|
|
92 |
"node_i": "3753",
|
93 |
-
"
|
94 |
-
"
|
95 |
}
|
96 |
],
|
|
|
97 |
"node_i": "3752",
|
98 |
-
"
|
99 |
-
"
|
100 |
}
|
101 |
],
|
|
|
102 |
"node_i": "3744",
|
103 |
-
"
|
104 |
-
"
|
105 |
}
|
|
|
2 |
"name": "East New Britain",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Baining",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Qaqet",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "byx",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3746",
|
18 |
+
"native_tokenizers": [],
|
19 |
"scripts": [
|
20 |
"Latn"
|
21 |
+
]
|
|
|
22 |
},
|
23 |
{
|
24 |
"name": "Kairak",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "ckr",
|
|
|
27 |
"children": [],
|
28 |
+
"tokenizers": {},
|
29 |
"node_i": "3747",
|
30 |
+
"native_tokenizers": [],
|
31 |
+
"scripts": []
|
32 |
},
|
33 |
{
|
34 |
"name": "Mali",
|
35 |
"iso_1_code": null,
|
36 |
"iso_3_code": "gcc",
|
|
|
37 |
"children": [],
|
38 |
+
"tokenizers": {},
|
39 |
"node_i": "3748",
|
40 |
+
"native_tokenizers": [],
|
41 |
+
"scripts": []
|
42 |
},
|
43 |
{
|
44 |
"name": "Simbali",
|
45 |
"iso_1_code": null,
|
46 |
"iso_3_code": "smg",
|
|
|
47 |
"children": [],
|
48 |
+
"tokenizers": {},
|
49 |
"node_i": "3749",
|
50 |
+
"native_tokenizers": [],
|
51 |
+
"scripts": []
|
52 |
},
|
53 |
{
|
54 |
"name": "Ura",
|
55 |
"iso_1_code": null,
|
56 |
"iso_3_code": "uro",
|
|
|
57 |
"children": [],
|
58 |
+
"tokenizers": {},
|
59 |
"node_i": "3750",
|
60 |
+
"native_tokenizers": [],
|
61 |
+
"scripts": []
|
62 |
},
|
63 |
{
|
64 |
"name": "Makolkol",
|
65 |
"iso_1_code": null,
|
66 |
"iso_3_code": "zmh",
|
|
|
67 |
"children": [],
|
68 |
+
"tokenizers": {},
|
69 |
"node_i": "3751",
|
70 |
+
"native_tokenizers": [],
|
71 |
+
"scripts": []
|
72 |
}
|
73 |
],
|
74 |
+
"tokenizers": {},
|
75 |
"node_i": "3745",
|
76 |
+
"native_tokenizers": [],
|
77 |
+
"scripts": []
|
78 |
},
|
79 |
{
|
80 |
"name": "Taulil",
|
81 |
"iso_1_code": null,
|
82 |
"iso_3_code": null,
|
|
|
83 |
"children": [
|
84 |
{
|
85 |
"name": "Tulil",
|
86 |
"iso_1_code": null,
|
87 |
"iso_3_code": "tuh",
|
|
|
88 |
"children": [],
|
89 |
+
"tokenizers": {},
|
90 |
"node_i": "3753",
|
91 |
+
"native_tokenizers": [],
|
92 |
+
"scripts": []
|
93 |
}
|
94 |
],
|
95 |
+
"tokenizers": {},
|
96 |
"node_i": "3752",
|
97 |
+
"native_tokenizers": [],
|
98 |
+
"scripts": []
|
99 |
}
|
100 |
],
|
101 |
+
"tokenizers": {},
|
102 |
"node_i": "3744",
|
103 |
+
"native_tokenizers": [],
|
104 |
+
"scripts": []
|
105 |
}
|
data/Eastern Trans-Fly.json
CHANGED
@@ -2,54 +2,54 @@
|
|
2 |
"name": "Eastern Trans-Fly",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Bine",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "bon",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3755",
|
|
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
-
]
|
17 |
-
"own_tokenizer": false
|
18 |
},
|
19 |
{
|
20 |
"name": "Wipi",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": "gdr",
|
23 |
-
"tokenizers": {},
|
24 |
"children": [],
|
|
|
25 |
"node_i": "3756",
|
|
|
26 |
"scripts": [
|
27 |
"Latn"
|
28 |
-
]
|
29 |
-
"own_tokenizer": false
|
30 |
},
|
31 |
{
|
32 |
"name": "Gizrra",
|
33 |
"iso_1_code": null,
|
34 |
"iso_3_code": "tof",
|
35 |
-
"tokenizers": {},
|
36 |
"children": [],
|
|
|
37 |
"node_i": "3757",
|
38 |
-
"
|
39 |
-
"
|
40 |
},
|
41 |
{
|
42 |
"name": "Meriam Mir",
|
43 |
"iso_1_code": null,
|
44 |
"iso_3_code": "ulk",
|
45 |
-
"tokenizers": {},
|
46 |
"children": [],
|
|
|
47 |
"node_i": "3758",
|
48 |
-
"
|
49 |
-
"
|
50 |
}
|
51 |
],
|
|
|
52 |
"node_i": "3754",
|
53 |
-
"
|
54 |
-
"
|
55 |
}
|
|
|
2 |
"name": "Eastern Trans-Fly",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Bine",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "bon",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3755",
|
13 |
+
"native_tokenizers": [],
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
+
]
|
|
|
17 |
},
|
18 |
{
|
19 |
"name": "Wipi",
|
20 |
"iso_1_code": null,
|
21 |
"iso_3_code": "gdr",
|
|
|
22 |
"children": [],
|
23 |
+
"tokenizers": {},
|
24 |
"node_i": "3756",
|
25 |
+
"native_tokenizers": [],
|
26 |
"scripts": [
|
27 |
"Latn"
|
28 |
+
]
|
|
|
29 |
},
|
30 |
{
|
31 |
"name": "Gizrra",
|
32 |
"iso_1_code": null,
|
33 |
"iso_3_code": "tof",
|
|
|
34 |
"children": [],
|
35 |
+
"tokenizers": {},
|
36 |
"node_i": "3757",
|
37 |
+
"native_tokenizers": [],
|
38 |
+
"scripts": []
|
39 |
},
|
40 |
{
|
41 |
"name": "Meriam Mir",
|
42 |
"iso_1_code": null,
|
43 |
"iso_3_code": "ulk",
|
|
|
44 |
"children": [],
|
45 |
+
"tokenizers": {},
|
46 |
"node_i": "3758",
|
47 |
+
"native_tokenizers": [],
|
48 |
+
"scripts": []
|
49 |
}
|
50 |
],
|
51 |
+
"tokenizers": {},
|
52 |
"node_i": "3754",
|
53 |
+
"native_tokenizers": [],
|
54 |
+
"scripts": []
|
55 |
}
|
data/Eskimo-Aleut.json
CHANGED
@@ -2,189 +2,189 @@
|
|
2 |
"name": "Eskimo-Aleut",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Aleut",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Aleut",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "ale",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "3761",
|
20 |
-
"
|
21 |
-
"
|
22 |
}
|
23 |
],
|
|
|
24 |
"node_i": "3760",
|
25 |
-
"
|
26 |
-
"
|
27 |
},
|
28 |
{
|
29 |
"name": "Eskimo",
|
30 |
"iso_1_code": null,
|
31 |
"iso_3_code": null,
|
32 |
-
"tokenizers": {},
|
33 |
"children": [
|
34 |
{
|
35 |
"name": "Inuit-Inupiaq",
|
36 |
"iso_1_code": null,
|
37 |
"iso_3_code": null,
|
38 |
-
"tokenizers": {},
|
39 |
"children": [
|
40 |
{
|
41 |
"name": "Inupiatun, North Alaskan",
|
42 |
"iso_1_code": "ik",
|
43 |
"iso_3_code": "esi",
|
44 |
-
"tokenizers": {},
|
45 |
"children": [],
|
|
|
46 |
"node_i": "3764",
|
|
|
47 |
"scripts": [
|
48 |
"Latn"
|
49 |
-
]
|
50 |
-
"own_tokenizer": false
|
51 |
},
|
52 |
{
|
53 |
"name": "Inupiatun, Northwest Alaska",
|
54 |
"iso_1_code": "ik",
|
55 |
"iso_3_code": "esk",
|
56 |
-
"tokenizers": {},
|
57 |
"children": [],
|
|
|
58 |
"node_i": "3765",
|
|
|
59 |
"scripts": [
|
60 |
"Latn"
|
61 |
-
]
|
62 |
-
"own_tokenizer": false
|
63 |
},
|
64 |
{
|
65 |
"name": "Inuktitut, Eastern Canadian",
|
66 |
"iso_1_code": "iu",
|
67 |
"iso_3_code": "ike",
|
68 |
-
"tokenizers": {},
|
69 |
"children": [],
|
|
|
70 |
"node_i": "3766",
|
|
|
71 |
"scripts": [
|
72 |
"Cans"
|
73 |
-
]
|
74 |
-
"own_tokenizer": false
|
75 |
},
|
76 |
{
|
77 |
"name": "Inuinnaqtun",
|
78 |
"iso_1_code": "iu",
|
79 |
"iso_3_code": "ikt",
|
80 |
-
"tokenizers": {},
|
81 |
"children": [],
|
|
|
82 |
"node_i": "3767",
|
|
|
83 |
"scripts": [
|
84 |
"Latn"
|
85 |
-
]
|
86 |
-
"own_tokenizer": false
|
87 |
},
|
88 |
{
|
89 |
"name": "Greenlandic",
|
90 |
"iso_1_code": "kl",
|
91 |
"iso_3_code": "kal",
|
92 |
-
"tokenizers": {},
|
93 |
"children": [],
|
|
|
94 |
"node_i": "3768",
|
|
|
95 |
"scripts": [
|
96 |
"Latn"
|
97 |
-
]
|
98 |
-
"own_tokenizer": false
|
99 |
}
|
100 |
],
|
|
|
101 |
"node_i": "3763",
|
102 |
-
"
|
103 |
-
"
|
104 |
},
|
105 |
{
|
106 |
"name": "Yupik",
|
107 |
"iso_1_code": null,
|
108 |
"iso_3_code": null,
|
109 |
-
"tokenizers": {},
|
110 |
"children": [
|
111 |
{
|
112 |
"name": "Yupik, Saint Lawrence Island",
|
113 |
"iso_1_code": null,
|
114 |
"iso_3_code": "ess",
|
115 |
-
"tokenizers": {},
|
116 |
"children": [],
|
|
|
117 |
"node_i": "3770",
|
|
|
118 |
"scripts": [
|
119 |
"Latn"
|
120 |
-
]
|
121 |
-
"own_tokenizer": false
|
122 |
},
|
123 |
{
|
124 |
"name": "Yupik, Naukan",
|
125 |
"iso_1_code": null,
|
126 |
"iso_3_code": "ynk",
|
127 |
-
"tokenizers": {},
|
128 |
"children": [],
|
|
|
129 |
"node_i": "3771",
|
130 |
-
"
|
131 |
-
"
|
132 |
},
|
133 |
{
|
134 |
"name": "Yupik, Sirenik",
|
135 |
"iso_1_code": null,
|
136 |
"iso_3_code": "ysr",
|
137 |
-
"tokenizers": {},
|
138 |
"children": [],
|
|
|
139 |
"node_i": "3772",
|
140 |
-
"
|
141 |
-
"
|
142 |
},
|
143 |
{
|
144 |
"name": "Alaskan Yupik",
|
145 |
"iso_1_code": null,
|
146 |
"iso_3_code": null,
|
147 |
-
"tokenizers": {},
|
148 |
"children": [
|
149 |
{
|
150 |
"name": "Yupik, Pacific Gulf",
|
151 |
"iso_1_code": null,
|
152 |
"iso_3_code": "ems",
|
153 |
-
"tokenizers": {},
|
154 |
"children": [],
|
|
|
155 |
"node_i": "3774",
|
156 |
-
"
|
157 |
-
"
|
158 |
},
|
159 |
{
|
160 |
"name": "Yupik, Central",
|
161 |
"iso_1_code": null,
|
162 |
"iso_3_code": "esu",
|
163 |
-
"tokenizers": {},
|
164 |
"children": [],
|
|
|
165 |
"node_i": "3775",
|
|
|
166 |
"scripts": [
|
167 |
"Latn"
|
168 |
-
]
|
169 |
-
"own_tokenizer": false
|
170 |
}
|
171 |
],
|
|
|
172 |
"node_i": "3773",
|
173 |
-
"
|
174 |
-
"
|
175 |
}
|
176 |
],
|
|
|
177 |
"node_i": "3769",
|
178 |
-
"
|
179 |
-
"
|
180 |
}
|
181 |
],
|
|
|
182 |
"node_i": "3762",
|
183 |
-
"
|
184 |
-
"
|
185 |
}
|
186 |
],
|
|
|
187 |
"node_i": "3759",
|
188 |
-
"
|
189 |
-
"
|
190 |
}
|
|
|
2 |
"name": "Eskimo-Aleut",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Aleut",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Aleut",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "ale",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3761",
|
18 |
+
"native_tokenizers": [],
|
19 |
+
"scripts": []
|
20 |
}
|
21 |
],
|
22 |
+
"tokenizers": {},
|
23 |
"node_i": "3760",
|
24 |
+
"native_tokenizers": [],
|
25 |
+
"scripts": []
|
26 |
},
|
27 |
{
|
28 |
"name": "Eskimo",
|
29 |
"iso_1_code": null,
|
30 |
"iso_3_code": null,
|
|
|
31 |
"children": [
|
32 |
{
|
33 |
"name": "Inuit-Inupiaq",
|
34 |
"iso_1_code": null,
|
35 |
"iso_3_code": null,
|
|
|
36 |
"children": [
|
37 |
{
|
38 |
"name": "Inupiatun, North Alaskan",
|
39 |
"iso_1_code": "ik",
|
40 |
"iso_3_code": "esi",
|
|
|
41 |
"children": [],
|
42 |
+
"tokenizers": {},
|
43 |
"node_i": "3764",
|
44 |
+
"native_tokenizers": [],
|
45 |
"scripts": [
|
46 |
"Latn"
|
47 |
+
]
|
|
|
48 |
},
|
49 |
{
|
50 |
"name": "Inupiatun, Northwest Alaska",
|
51 |
"iso_1_code": "ik",
|
52 |
"iso_3_code": "esk",
|
|
|
53 |
"children": [],
|
54 |
+
"tokenizers": {},
|
55 |
"node_i": "3765",
|
56 |
+
"native_tokenizers": [],
|
57 |
"scripts": [
|
58 |
"Latn"
|
59 |
+
]
|
|
|
60 |
},
|
61 |
{
|
62 |
"name": "Inuktitut, Eastern Canadian",
|
63 |
"iso_1_code": "iu",
|
64 |
"iso_3_code": "ike",
|
|
|
65 |
"children": [],
|
66 |
+
"tokenizers": {},
|
67 |
"node_i": "3766",
|
68 |
+
"native_tokenizers": [],
|
69 |
"scripts": [
|
70 |
"Cans"
|
71 |
+
]
|
|
|
72 |
},
|
73 |
{
|
74 |
"name": "Inuinnaqtun",
|
75 |
"iso_1_code": "iu",
|
76 |
"iso_3_code": "ikt",
|
|
|
77 |
"children": [],
|
78 |
+
"tokenizers": {},
|
79 |
"node_i": "3767",
|
80 |
+
"native_tokenizers": [],
|
81 |
"scripts": [
|
82 |
"Latn"
|
83 |
+
]
|
|
|
84 |
},
|
85 |
{
|
86 |
"name": "Greenlandic",
|
87 |
"iso_1_code": "kl",
|
88 |
"iso_3_code": "kal",
|
|
|
89 |
"children": [],
|
90 |
+
"tokenizers": {},
|
91 |
"node_i": "3768",
|
92 |
+
"native_tokenizers": [],
|
93 |
"scripts": [
|
94 |
"Latn"
|
95 |
+
]
|
|
|
96 |
}
|
97 |
],
|
98 |
+
"tokenizers": {},
|
99 |
"node_i": "3763",
|
100 |
+
"native_tokenizers": [],
|
101 |
+
"scripts": []
|
102 |
},
|
103 |
{
|
104 |
"name": "Yupik",
|
105 |
"iso_1_code": null,
|
106 |
"iso_3_code": null,
|
|
|
107 |
"children": [
|
108 |
{
|
109 |
"name": "Yupik, Saint Lawrence Island",
|
110 |
"iso_1_code": null,
|
111 |
"iso_3_code": "ess",
|
|
|
112 |
"children": [],
|
113 |
+
"tokenizers": {},
|
114 |
"node_i": "3770",
|
115 |
+
"native_tokenizers": [],
|
116 |
"scripts": [
|
117 |
"Latn"
|
118 |
+
]
|
|
|
119 |
},
|
120 |
{
|
121 |
"name": "Yupik, Naukan",
|
122 |
"iso_1_code": null,
|
123 |
"iso_3_code": "ynk",
|
|
|
124 |
"children": [],
|
125 |
+
"tokenizers": {},
|
126 |
"node_i": "3771",
|
127 |
+
"native_tokenizers": [],
|
128 |
+
"scripts": []
|
129 |
},
|
130 |
{
|
131 |
"name": "Yupik, Sirenik",
|
132 |
"iso_1_code": null,
|
133 |
"iso_3_code": "ysr",
|
|
|
134 |
"children": [],
|
135 |
+
"tokenizers": {},
|
136 |
"node_i": "3772",
|
137 |
+
"native_tokenizers": [],
|
138 |
+
"scripts": []
|
139 |
},
|
140 |
{
|
141 |
"name": "Alaskan Yupik",
|
142 |
"iso_1_code": null,
|
143 |
"iso_3_code": null,
|
|
|
144 |
"children": [
|
145 |
{
|
146 |
"name": "Yupik, Pacific Gulf",
|
147 |
"iso_1_code": null,
|
148 |
"iso_3_code": "ems",
|
|
|
149 |
"children": [],
|
150 |
+
"tokenizers": {},
|
151 |
"node_i": "3774",
|
152 |
+
"native_tokenizers": [],
|
153 |
+
"scripts": []
|
154 |
},
|
155 |
{
|
156 |
"name": "Yupik, Central",
|
157 |
"iso_1_code": null,
|
158 |
"iso_3_code": "esu",
|
|
|
159 |
"children": [],
|
160 |
+
"tokenizers": {},
|
161 |
"node_i": "3775",
|
162 |
+
"native_tokenizers": [],
|
163 |
"scripts": [
|
164 |
"Latn"
|
165 |
+
]
|
|
|
166 |
}
|
167 |
],
|
168 |
+
"tokenizers": {},
|
169 |
"node_i": "3773",
|
170 |
+
"native_tokenizers": [],
|
171 |
+
"scripts": []
|
172 |
}
|
173 |
],
|
174 |
+
"tokenizers": {},
|
175 |
"node_i": "3769",
|
176 |
+
"native_tokenizers": [],
|
177 |
+
"scripts": []
|
178 |
}
|
179 |
],
|
180 |
+
"tokenizers": {},
|
181 |
"node_i": "3762",
|
182 |
+
"native_tokenizers": [],
|
183 |
+
"scripts": []
|
184 |
}
|
185 |
],
|
186 |
+
"tokenizers": {},
|
187 |
"node_i": "3759",
|
188 |
+
"native_tokenizers": [],
|
189 |
+
"scripts": []
|
190 |
}
|
data/Eyak-Athabaskan.json
CHANGED
@@ -2,648 +2,648 @@
|
|
2 |
"name": "Eyak-Athabaskan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Eyak",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "eya",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3777",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Athabaskan",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": null,
|
21 |
-
"tokenizers": {},
|
22 |
"children": [
|
23 |
{
|
24 |
"name": "Apachean",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": null,
|
27 |
-
"tokenizers": {},
|
28 |
"children": [
|
29 |
{
|
30 |
"name": "Navajo",
|
31 |
"iso_1_code": "nv",
|
32 |
"iso_3_code": "nav",
|
33 |
-
"tokenizers": {},
|
34 |
"children": [],
|
|
|
35 |
"node_i": "3780",
|
|
|
36 |
"scripts": [
|
37 |
"Latn"
|
38 |
-
]
|
39 |
-
"own_tokenizer": false
|
40 |
},
|
41 |
{
|
42 |
"name": "Apache",
|
43 |
"iso_1_code": null,
|
44 |
"iso_3_code": null,
|
45 |
-
"tokenizers": {},
|
46 |
"children": [
|
47 |
{
|
48 |
"name": "Apache, Jicarilla",
|
49 |
"iso_1_code": null,
|
50 |
"iso_3_code": "apj",
|
51 |
-
"tokenizers": {},
|
52 |
"children": [],
|
|
|
53 |
"node_i": "3782",
|
54 |
-
"
|
55 |
-
"
|
56 |
},
|
57 |
{
|
58 |
"name": "Apache, Kiowa",
|
59 |
"iso_1_code": null,
|
60 |
"iso_3_code": "apk",
|
61 |
-
"tokenizers": {},
|
62 |
"children": [],
|
|
|
63 |
"node_i": "3783",
|
64 |
-
"
|
65 |
-
"
|
66 |
},
|
67 |
{
|
68 |
"name": "Apache, Lipan",
|
69 |
"iso_1_code": null,
|
70 |
"iso_3_code": "apl",
|
71 |
-
"tokenizers": {},
|
72 |
"children": [],
|
|
|
73 |
"node_i": "3784",
|
74 |
-
"
|
75 |
-
"
|
76 |
},
|
77 |
{
|
78 |
"name": "Apache, Mescalero-Chiricahua",
|
79 |
"iso_1_code": null,
|
80 |
"iso_3_code": "apm",
|
81 |
-
"tokenizers": {},
|
82 |
"children": [],
|
|
|
83 |
"node_i": "3785",
|
84 |
-
"
|
85 |
-
"
|
86 |
},
|
87 |
{
|
88 |
"name": "Apache, Western",
|
89 |
"iso_1_code": null,
|
90 |
"iso_3_code": "apw",
|
91 |
-
"tokenizers": {},
|
92 |
"children": [],
|
|
|
93 |
"node_i": "3786",
|
|
|
94 |
"scripts": [
|
95 |
"Latn"
|
96 |
-
]
|
97 |
-
"own_tokenizer": false
|
98 |
}
|
99 |
],
|
|
|
100 |
"node_i": "3781",
|
101 |
-
"
|
102 |
-
"
|
103 |
}
|
104 |
],
|
|
|
105 |
"node_i": "3779",
|
106 |
-
"
|
107 |
-
"
|
108 |
},
|
109 |
{
|
110 |
"name": "Northern Athabaskan",
|
111 |
"iso_1_code": null,
|
112 |
"iso_3_code": null,
|
113 |
-
"tokenizers": {},
|
114 |
"children": [
|
115 |
{
|
116 |
"name": "Ahtena",
|
117 |
"iso_1_code": null,
|
118 |
"iso_3_code": "aht",
|
119 |
-
"tokenizers": {},
|
120 |
"children": [],
|
|
|
121 |
"node_i": "3788",
|
122 |
-
"
|
123 |
-
"
|
124 |
},
|
125 |
{
|
126 |
"name": "Babine",
|
127 |
"iso_1_code": null,
|
128 |
"iso_3_code": "bcr",
|
129 |
-
"tokenizers": {},
|
130 |
"children": [],
|
|
|
131 |
"node_i": "3789",
|
132 |
-
"
|
133 |
-
"
|
134 |
},
|
135 |
{
|
136 |
"name": "Beaver",
|
137 |
"iso_1_code": null,
|
138 |
"iso_3_code": "bea",
|
139 |
-
"tokenizers": {},
|
140 |
"children": [],
|
|
|
141 |
"node_i": "3790",
|
|
|
142 |
"scripts": [
|
143 |
"Latn"
|
144 |
-
]
|
145 |
-
"own_tokenizer": false
|
146 |
},
|
147 |
{
|
148 |
"name": "Dene",
|
149 |
"iso_1_code": null,
|
150 |
"iso_3_code": "chp",
|
151 |
-
"tokenizers": {},
|
152 |
"children": [],
|
|
|
153 |
"node_i": "3791",
|
154 |
-
"
|
155 |
-
"
|
156 |
},
|
157 |
{
|
158 |
"name": "Chilcotin",
|
159 |
"iso_1_code": null,
|
160 |
"iso_3_code": "clc",
|
161 |
-
"tokenizers": {},
|
162 |
"children": [],
|
|
|
163 |
"node_i": "3792",
|
164 |
-
"
|
165 |
-
"
|
166 |
},
|
167 |
{
|
168 |
"name": "Tlicho",
|
169 |
"iso_1_code": null,
|
170 |
"iso_3_code": "dgr",
|
171 |
-
"tokenizers": {},
|
172 |
"children": [],
|
|
|
173 |
"node_i": "3793",
|
|
|
174 |
"scripts": [
|
175 |
"Latn"
|
176 |
-
]
|
177 |
-
"own_tokenizer": false
|
178 |
},
|
179 |
{
|
180 |
"name": "Gwich\u2019in",
|
181 |
"iso_1_code": null,
|
182 |
"iso_3_code": "gwi",
|
183 |
-
"tokenizers": {},
|
184 |
"children": [],
|
|
|
185 |
"node_i": "3794",
|
|
|
186 |
"scripts": [
|
187 |
"Latn"
|
188 |
-
]
|
189 |
-
"own_tokenizer": false
|
190 |
},
|
191 |
{
|
192 |
"name": "Han",
|
193 |
"iso_1_code": null,
|
194 |
"iso_3_code": "haa",
|
195 |
-
"tokenizers": {},
|
196 |
"children": [],
|
|
|
197 |
"node_i": "3795",
|
198 |
-
"
|
199 |
-
"
|
200 |
},
|
201 |
{
|
202 |
"name": "Holikachuk",
|
203 |
"iso_1_code": null,
|
204 |
"iso_3_code": "hoi",
|
205 |
-
"tokenizers": {},
|
206 |
"children": [],
|
|
|
207 |
"node_i": "3796",
|
208 |
-
"
|
209 |
-
"
|
210 |
},
|
211 |
{
|
212 |
"name": "Deg Xinag",
|
213 |
"iso_1_code": null,
|
214 |
"iso_3_code": "ing",
|
215 |
-
"tokenizers": {},
|
216 |
"children": [],
|
|
|
217 |
"node_i": "3797",
|
218 |
-
"
|
219 |
-
"
|
220 |
},
|
221 |
{
|
222 |
"name": "Koyukon",
|
223 |
"iso_1_code": null,
|
224 |
"iso_3_code": "koy",
|
225 |
-
"tokenizers": {},
|
226 |
"children": [],
|
|
|
227 |
"node_i": "3798",
|
228 |
-
"
|
229 |
-
"
|
230 |
},
|
231 |
{
|
232 |
"name": "Kuskokwim, Upper",
|
233 |
"iso_1_code": null,
|
234 |
"iso_3_code": "kuu",
|
235 |
-
"tokenizers": {},
|
236 |
"children": [],
|
|
|
237 |
"node_i": "3799",
|
238 |
-
"
|
239 |
-
"
|
240 |
},
|
241 |
{
|
242 |
"name": "Sekani",
|
243 |
"iso_1_code": null,
|
244 |
"iso_3_code": "sek",
|
245 |
-
"tokenizers": {},
|
246 |
"children": [],
|
|
|
247 |
"node_i": "3800",
|
248 |
-
"
|
249 |
-
"
|
250 |
},
|
251 |
{
|
252 |
"name": "Sarsi",
|
253 |
"iso_1_code": null,
|
254 |
"iso_3_code": "srs",
|
255 |
-
"tokenizers": {},
|
256 |
"children": [],
|
|
|
257 |
"node_i": "3801",
|
258 |
-
"
|
259 |
-
"
|
260 |
},
|
261 |
{
|
262 |
"name": "Tanana, Lower",
|
263 |
"iso_1_code": null,
|
264 |
"iso_3_code": "taa",
|
265 |
-
"tokenizers": {},
|
266 |
"children": [],
|
|
|
267 |
"node_i": "3802",
|
268 |
-
"
|
269 |
-
"
|
270 |
},
|
271 |
{
|
272 |
"name": "Tanana, Upper",
|
273 |
"iso_1_code": null,
|
274 |
"iso_3_code": "tau",
|
275 |
-
"tokenizers": {},
|
276 |
"children": [],
|
|
|
277 |
"node_i": "3803",
|
278 |
-
"
|
279 |
-
"
|
280 |
},
|
281 |
{
|
282 |
"name": "Tanacross",
|
283 |
"iso_1_code": null,
|
284 |
"iso_3_code": "tcb",
|
285 |
-
"tokenizers": {},
|
286 |
"children": [],
|
|
|
287 |
"node_i": "3804",
|
288 |
-
"
|
289 |
-
"
|
290 |
},
|
291 |
{
|
292 |
"name": "Tanaina",
|
293 |
"iso_1_code": null,
|
294 |
"iso_3_code": "tfn",
|
295 |
-
"tokenizers": {},
|
296 |
"children": [],
|
|
|
297 |
"node_i": "3805",
|
298 |
-
"
|
299 |
-
"
|
300 |
},
|
301 |
{
|
302 |
"name": "Tsetsaut",
|
303 |
"iso_1_code": null,
|
304 |
"iso_3_code": "txc",
|
305 |
-
"tokenizers": {},
|
306 |
"children": [],
|
|
|
307 |
"node_i": "3806",
|
308 |
-
"
|
309 |
-
"
|
310 |
},
|
311 |
{
|
312 |
"name": "Carrier",
|
313 |
"iso_1_code": null,
|
314 |
"iso_3_code": null,
|
315 |
-
"tokenizers": {},
|
316 |
"children": [
|
317 |
{
|
318 |
"name": "Carrier, Southern",
|
319 |
"iso_1_code": null,
|
320 |
"iso_3_code": "caf",
|
321 |
-
"tokenizers": {},
|
322 |
"children": [],
|
|
|
323 |
"node_i": "3808",
|
|
|
324 |
"scripts": [
|
325 |
"Latn"
|
326 |
-
]
|
327 |
-
"own_tokenizer": false
|
328 |
},
|
329 |
{
|
330 |
"name": "Carrier",
|
331 |
"iso_1_code": null,
|
332 |
"iso_3_code": "crx",
|
333 |
-
"tokenizers": {},
|
334 |
"children": [],
|
|
|
335 |
"node_i": "3809",
|
|
|
336 |
"scripts": [
|
337 |
"Latn"
|
338 |
-
]
|
339 |
-
"own_tokenizer": false
|
340 |
}
|
341 |
],
|
|
|
342 |
"node_i": "3807",
|
343 |
-
"
|
344 |
-
"
|
345 |
},
|
346 |
{
|
347 |
"name": "Slavey-Hare",
|
348 |
"iso_1_code": null,
|
349 |
"iso_3_code": null,
|
350 |
-
"tokenizers": {},
|
351 |
"children": [
|
352 |
{
|
353 |
"name": "Slavey, North",
|
354 |
"iso_1_code": null,
|
355 |
"iso_3_code": "scs",
|
356 |
-
"tokenizers": {},
|
357 |
"children": [],
|
|
|
358 |
"node_i": "3811",
|
359 |
-
"
|
360 |
-
"
|
361 |
},
|
362 |
{
|
363 |
"name": "Slavey, South",
|
364 |
"iso_1_code": null,
|
365 |
"iso_3_code": "xsl",
|
366 |
-
"tokenizers": {},
|
367 |
"children": [],
|
|
|
368 |
"node_i": "3812",
|
369 |
-
"
|
370 |
-
"
|
371 |
}
|
372 |
],
|
|
|
373 |
"node_i": "3810",
|
374 |
-
"
|
375 |
-
"
|
376 |
},
|
377 |
{
|
378 |
"name": "Tahltan",
|
379 |
"iso_1_code": null,
|
380 |
"iso_3_code": null,
|
381 |
-
"tokenizers": {},
|
382 |
"children": [
|
383 |
{
|
384 |
"name": "Kaska",
|
385 |
"iso_1_code": null,
|
386 |
"iso_3_code": "kkz",
|
387 |
-
"tokenizers": {},
|
388 |
"children": [],
|
|
|
389 |
"node_i": "3814",
|
390 |
-
"
|
391 |
-
"
|
392 |
},
|
393 |
{
|
394 |
"name": "Tagish",
|
395 |
"iso_1_code": null,
|
396 |
"iso_3_code": "tgx",
|
397 |
-
"tokenizers": {},
|
398 |
"children": [],
|
|
|
399 |
"node_i": "3815",
|
400 |
-
"
|
401 |
-
"
|
402 |
},
|
403 |
{
|
404 |
"name": "Tahltan",
|
405 |
"iso_1_code": null,
|
406 |
"iso_3_code": "tht",
|
407 |
-
"tokenizers": {},
|
408 |
"children": [],
|
|
|
409 |
"node_i": "3816",
|
410 |
-
"
|
411 |
-
"
|
412 |
}
|
413 |
],
|
|
|
414 |
"node_i": "3813",
|
415 |
-
"
|
416 |
-
"
|
417 |
},
|
418 |
{
|
419 |
"name": "Tuchone",
|
420 |
"iso_1_code": null,
|
421 |
"iso_3_code": null,
|
422 |
-
"tokenizers": {},
|
423 |
"children": [
|
424 |
{
|
425 |
"name": "Tutchone, Southern",
|
426 |
"iso_1_code": null,
|
427 |
"iso_3_code": "tce",
|
428 |
-
"tokenizers": {},
|
429 |
"children": [],
|
|
|
430 |
"node_i": "3818",
|
431 |
-
"
|
432 |
-
"
|
433 |
},
|
434 |
{
|
435 |
"name": "Tutchone, Northern",
|
436 |
"iso_1_code": null,
|
437 |
"iso_3_code": "ttm",
|
438 |
-
"tokenizers": {},
|
439 |
"children": [],
|
|
|
440 |
"node_i": "3819",
|
441 |
-
"
|
442 |
-
"
|
443 |
}
|
444 |
],
|
|
|
445 |
"node_i": "3817",
|
446 |
-
"
|
447 |
-
"
|
448 |
}
|
449 |
],
|
|
|
450 |
"node_i": "3787",
|
451 |
-
"
|
452 |
-
"
|
453 |
},
|
454 |
{
|
455 |
"name": "Pacific Coast Athabaskan",
|
456 |
"iso_1_code": null,
|
457 |
"iso_3_code": null,
|
458 |
-
"tokenizers": {},
|
459 |
"children": [
|
460 |
{
|
461 |
"name": "Kwalhioqua-Tlatskanai",
|
462 |
"iso_1_code": null,
|
463 |
"iso_3_code": "qwt",
|
464 |
-
"tokenizers": {},
|
465 |
"children": [],
|
|
|
466 |
"node_i": "3821",
|
467 |
-
"
|
468 |
-
"
|
469 |
},
|
470 |
{
|
471 |
"name": "California Athabaskan",
|
472 |
"iso_1_code": null,
|
473 |
"iso_3_code": null,
|
474 |
-
"tokenizers": {},
|
475 |
"children": [
|
476 |
{
|
477 |
"name": "Hupa",
|
478 |
"iso_1_code": null,
|
479 |
"iso_3_code": "hup",
|
480 |
-
"tokenizers": {},
|
481 |
"children": [],
|
|
|
482 |
"node_i": "3823",
|
483 |
-
"
|
484 |
-
"
|
485 |
},
|
486 |
{
|
487 |
"name": "Kato",
|
488 |
"iso_1_code": null,
|
489 |
"iso_3_code": "ktw",
|
490 |
-
"tokenizers": {},
|
491 |
"children": [],
|
|
|
492 |
"node_i": "3824",
|
493 |
-
"
|
494 |
-
"
|
495 |
},
|
496 |
{
|
497 |
"name": "Mattole",
|
498 |
"iso_1_code": null,
|
499 |
"iso_3_code": "mvb",
|
500 |
-
"tokenizers": {},
|
501 |
"children": [],
|
|
|
502 |
"node_i": "3825",
|
503 |
-
"
|
504 |
-
"
|
505 |
},
|
506 |
{
|
507 |
"name": "Wailaki",
|
508 |
"iso_1_code": null,
|
509 |
"iso_3_code": "wlk",
|
510 |
-
"tokenizers": {},
|
511 |
"children": [],
|
|
|
512 |
"node_i": "3826",
|
513 |
-
"
|
514 |
-
"
|
515 |
}
|
516 |
],
|
|
|
517 |
"node_i": "3822",
|
518 |
-
"
|
519 |
-
"
|
520 |
},
|
521 |
{
|
522 |
"name": "Oregon Athabaskan",
|
523 |
"iso_1_code": null,
|
524 |
"iso_3_code": null,
|
525 |
-
"tokenizers": {},
|
526 |
"children": [
|
527 |
{
|
528 |
"name": "Galice",
|
529 |
"iso_1_code": null,
|
530 |
"iso_3_code": "gce",
|
531 |
-
"tokenizers": {},
|
532 |
"children": [],
|
|
|
533 |
"node_i": "3828",
|
534 |
-
"
|
535 |
-
"
|
536 |
},
|
537 |
{
|
538 |
"name": "Upper Umpqua",
|
539 |
"iso_1_code": null,
|
540 |
"iso_3_code": "xup",
|
541 |
-
"tokenizers": {},
|
542 |
"children": [],
|
|
|
543 |
"node_i": "3829",
|
544 |
-
"
|
545 |
-
"
|
546 |
},
|
547 |
{
|
548 |
"name": "Tolowa-Chetco",
|
549 |
"iso_1_code": null,
|
550 |
"iso_3_code": null,
|
551 |
-
"tokenizers": {},
|
552 |
"children": [
|
553 |
{
|
554 |
"name": "Chetco",
|
555 |
"iso_1_code": null,
|
556 |
"iso_3_code": "ctc",
|
557 |
-
"tokenizers": {},
|
558 |
"children": [],
|
|
|
559 |
"node_i": "3831",
|
560 |
-
"
|
561 |
-
"
|
562 |
},
|
563 |
{
|
564 |
"name": "Tolowa",
|
565 |
"iso_1_code": null,
|
566 |
"iso_3_code": "tol",
|
567 |
-
"tokenizers": {},
|
568 |
"children": [],
|
|
|
569 |
"node_i": "3832",
|
570 |
-
"
|
571 |
-
"
|
572 |
}
|
573 |
],
|
|
|
574 |
"node_i": "3830",
|
575 |
-
"
|
576 |
-
"
|
577 |
},
|
578 |
{
|
579 |
"name": "Tututni-Chasta Costa-Coquille",
|
580 |
"iso_1_code": null,
|
581 |
"iso_3_code": null,
|
582 |
-
"tokenizers": {},
|
583 |
"children": [
|
584 |
{
|
585 |
"name": "Coquille",
|
586 |
"iso_1_code": null,
|
587 |
"iso_3_code": "coq",
|
588 |
-
"tokenizers": {},
|
589 |
"children": [],
|
|
|
590 |
"node_i": "3834",
|
591 |
-
"
|
592 |
-
"
|
593 |
},
|
594 |
{
|
595 |
"name": "Tututni",
|
596 |
"iso_1_code": null,
|
597 |
"iso_3_code": "tuu",
|
598 |
-
"tokenizers": {},
|
599 |
"children": [],
|
|
|
600 |
"node_i": "3835",
|
601 |
-
"
|
602 |
-
"
|
603 |
}
|
604 |
],
|
|
|
605 |
"node_i": "3833",
|
606 |
-
"
|
607 |
-
"
|
608 |
}
|
609 |
],
|
|
|
610 |
"node_i": "3827",
|
611 |
-
"
|
612 |
-
"
|
613 |
}
|
614 |
],
|
|
|
615 |
"node_i": "3820",
|
616 |
-
"
|
617 |
-
"
|
618 |
}
|
619 |
],
|
|
|
620 |
"node_i": "3778",
|
621 |
-
"
|
622 |
-
"
|
623 |
},
|
624 |
{
|
625 |
"name": "Tlingit",
|
626 |
"iso_1_code": null,
|
627 |
"iso_3_code": null,
|
628 |
-
"tokenizers": {},
|
629 |
"children": [
|
630 |
{
|
631 |
"name": "Tlingit",
|
632 |
"iso_1_code": null,
|
633 |
"iso_3_code": "tli",
|
634 |
-
"tokenizers": {},
|
635 |
"children": [],
|
|
|
636 |
"node_i": "3837",
|
637 |
-
"
|
638 |
-
"
|
639 |
}
|
640 |
],
|
|
|
641 |
"node_i": "3836",
|
642 |
-
"
|
643 |
-
"
|
644 |
}
|
645 |
],
|
|
|
646 |
"node_i": "3776",
|
647 |
-
"
|
648 |
-
"
|
649 |
}
|
|
|
2 |
"name": "Eyak-Athabaskan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Eyak",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "eya",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3777",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Athabaskan",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": null,
|
|
|
20 |
"children": [
|
21 |
{
|
22 |
"name": "Apachean",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": null,
|
|
|
25 |
"children": [
|
26 |
{
|
27 |
"name": "Navajo",
|
28 |
"iso_1_code": "nv",
|
29 |
"iso_3_code": "nav",
|
|
|
30 |
"children": [],
|
31 |
+
"tokenizers": {},
|
32 |
"node_i": "3780",
|
33 |
+
"native_tokenizers": [],
|
34 |
"scripts": [
|
35 |
"Latn"
|
36 |
+
]
|
|
|
37 |
},
|
38 |
{
|
39 |
"name": "Apache",
|
40 |
"iso_1_code": null,
|
41 |
"iso_3_code": null,
|
|
|
42 |
"children": [
|
43 |
{
|
44 |
"name": "Apache, Jicarilla",
|
45 |
"iso_1_code": null,
|
46 |
"iso_3_code": "apj",
|
|
|
47 |
"children": [],
|
48 |
+
"tokenizers": {},
|
49 |
"node_i": "3782",
|
50 |
+
"native_tokenizers": [],
|
51 |
+
"scripts": []
|
52 |
},
|
53 |
{
|
54 |
"name": "Apache, Kiowa",
|
55 |
"iso_1_code": null,
|
56 |
"iso_3_code": "apk",
|
|
|
57 |
"children": [],
|
58 |
+
"tokenizers": {},
|
59 |
"node_i": "3783",
|
60 |
+
"native_tokenizers": [],
|
61 |
+
"scripts": []
|
62 |
},
|
63 |
{
|
64 |
"name": "Apache, Lipan",
|
65 |
"iso_1_code": null,
|
66 |
"iso_3_code": "apl",
|
|
|
67 |
"children": [],
|
68 |
+
"tokenizers": {},
|
69 |
"node_i": "3784",
|
70 |
+
"native_tokenizers": [],
|
71 |
+
"scripts": []
|
72 |
},
|
73 |
{
|
74 |
"name": "Apache, Mescalero-Chiricahua",
|
75 |
"iso_1_code": null,
|
76 |
"iso_3_code": "apm",
|
|
|
77 |
"children": [],
|
78 |
+
"tokenizers": {},
|
79 |
"node_i": "3785",
|
80 |
+
"native_tokenizers": [],
|
81 |
+
"scripts": []
|
82 |
},
|
83 |
{
|
84 |
"name": "Apache, Western",
|
85 |
"iso_1_code": null,
|
86 |
"iso_3_code": "apw",
|
|
|
87 |
"children": [],
|
88 |
+
"tokenizers": {},
|
89 |
"node_i": "3786",
|
90 |
+
"native_tokenizers": [],
|
91 |
"scripts": [
|
92 |
"Latn"
|
93 |
+
]
|
|
|
94 |
}
|
95 |
],
|
96 |
+
"tokenizers": {},
|
97 |
"node_i": "3781",
|
98 |
+
"native_tokenizers": [],
|
99 |
+
"scripts": []
|
100 |
}
|
101 |
],
|
102 |
+
"tokenizers": {},
|
103 |
"node_i": "3779",
|
104 |
+
"native_tokenizers": [],
|
105 |
+
"scripts": []
|
106 |
},
|
107 |
{
|
108 |
"name": "Northern Athabaskan",
|
109 |
"iso_1_code": null,
|
110 |
"iso_3_code": null,
|
|
|
111 |
"children": [
|
112 |
{
|
113 |
"name": "Ahtena",
|
114 |
"iso_1_code": null,
|
115 |
"iso_3_code": "aht",
|
|
|
116 |
"children": [],
|
117 |
+
"tokenizers": {},
|
118 |
"node_i": "3788",
|
119 |
+
"native_tokenizers": [],
|
120 |
+
"scripts": []
|
121 |
},
|
122 |
{
|
123 |
"name": "Babine",
|
124 |
"iso_1_code": null,
|
125 |
"iso_3_code": "bcr",
|
|
|
126 |
"children": [],
|
127 |
+
"tokenizers": {},
|
128 |
"node_i": "3789",
|
129 |
+
"native_tokenizers": [],
|
130 |
+
"scripts": []
|
131 |
},
|
132 |
{
|
133 |
"name": "Beaver",
|
134 |
"iso_1_code": null,
|
135 |
"iso_3_code": "bea",
|
|
|
136 |
"children": [],
|
137 |
+
"tokenizers": {},
|
138 |
"node_i": "3790",
|
139 |
+
"native_tokenizers": [],
|
140 |
"scripts": [
|
141 |
"Latn"
|
142 |
+
]
|
|
|
143 |
},
|
144 |
{
|
145 |
"name": "Dene",
|
146 |
"iso_1_code": null,
|
147 |
"iso_3_code": "chp",
|
|
|
148 |
"children": [],
|
149 |
+
"tokenizers": {},
|
150 |
"node_i": "3791",
|
151 |
+
"native_tokenizers": [],
|
152 |
+
"scripts": []
|
153 |
},
|
154 |
{
|
155 |
"name": "Chilcotin",
|
156 |
"iso_1_code": null,
|
157 |
"iso_3_code": "clc",
|
|
|
158 |
"children": [],
|
159 |
+
"tokenizers": {},
|
160 |
"node_i": "3792",
|
161 |
+
"native_tokenizers": [],
|
162 |
+
"scripts": []
|
163 |
},
|
164 |
{
|
165 |
"name": "Tlicho",
|
166 |
"iso_1_code": null,
|
167 |
"iso_3_code": "dgr",
|
|
|
168 |
"children": [],
|
169 |
+
"tokenizers": {},
|
170 |
"node_i": "3793",
|
171 |
+
"native_tokenizers": [],
|
172 |
"scripts": [
|
173 |
"Latn"
|
174 |
+
]
|
|
|
175 |
},
|
176 |
{
|
177 |
"name": "Gwich\u2019in",
|
178 |
"iso_1_code": null,
|
179 |
"iso_3_code": "gwi",
|
|
|
180 |
"children": [],
|
181 |
+
"tokenizers": {},
|
182 |
"node_i": "3794",
|
183 |
+
"native_tokenizers": [],
|
184 |
"scripts": [
|
185 |
"Latn"
|
186 |
+
]
|
|
|
187 |
},
|
188 |
{
|
189 |
"name": "Han",
|
190 |
"iso_1_code": null,
|
191 |
"iso_3_code": "haa",
|
|
|
192 |
"children": [],
|
193 |
+
"tokenizers": {},
|
194 |
"node_i": "3795",
|
195 |
+
"native_tokenizers": [],
|
196 |
+
"scripts": []
|
197 |
},
|
198 |
{
|
199 |
"name": "Holikachuk",
|
200 |
"iso_1_code": null,
|
201 |
"iso_3_code": "hoi",
|
|
|
202 |
"children": [],
|
203 |
+
"tokenizers": {},
|
204 |
"node_i": "3796",
|
205 |
+
"native_tokenizers": [],
|
206 |
+
"scripts": []
|
207 |
},
|
208 |
{
|
209 |
"name": "Deg Xinag",
|
210 |
"iso_1_code": null,
|
211 |
"iso_3_code": "ing",
|
|
|
212 |
"children": [],
|
213 |
+
"tokenizers": {},
|
214 |
"node_i": "3797",
|
215 |
+
"native_tokenizers": [],
|
216 |
+
"scripts": []
|
217 |
},
|
218 |
{
|
219 |
"name": "Koyukon",
|
220 |
"iso_1_code": null,
|
221 |
"iso_3_code": "koy",
|
|
|
222 |
"children": [],
|
223 |
+
"tokenizers": {},
|
224 |
"node_i": "3798",
|
225 |
+
"native_tokenizers": [],
|
226 |
+
"scripts": []
|
227 |
},
|
228 |
{
|
229 |
"name": "Kuskokwim, Upper",
|
230 |
"iso_1_code": null,
|
231 |
"iso_3_code": "kuu",
|
|
|
232 |
"children": [],
|
233 |
+
"tokenizers": {},
|
234 |
"node_i": "3799",
|
235 |
+
"native_tokenizers": [],
|
236 |
+
"scripts": []
|
237 |
},
|
238 |
{
|
239 |
"name": "Sekani",
|
240 |
"iso_1_code": null,
|
241 |
"iso_3_code": "sek",
|
|
|
242 |
"children": [],
|
243 |
+
"tokenizers": {},
|
244 |
"node_i": "3800",
|
245 |
+
"native_tokenizers": [],
|
246 |
+
"scripts": []
|
247 |
},
|
248 |
{
|
249 |
"name": "Sarsi",
|
250 |
"iso_1_code": null,
|
251 |
"iso_3_code": "srs",
|
|
|
252 |
"children": [],
|
253 |
+
"tokenizers": {},
|
254 |
"node_i": "3801",
|
255 |
+
"native_tokenizers": [],
|
256 |
+
"scripts": []
|
257 |
},
|
258 |
{
|
259 |
"name": "Tanana, Lower",
|
260 |
"iso_1_code": null,
|
261 |
"iso_3_code": "taa",
|
|
|
262 |
"children": [],
|
263 |
+
"tokenizers": {},
|
264 |
"node_i": "3802",
|
265 |
+
"native_tokenizers": [],
|
266 |
+
"scripts": []
|
267 |
},
|
268 |
{
|
269 |
"name": "Tanana, Upper",
|
270 |
"iso_1_code": null,
|
271 |
"iso_3_code": "tau",
|
|
|
272 |
"children": [],
|
273 |
+
"tokenizers": {},
|
274 |
"node_i": "3803",
|
275 |
+
"native_tokenizers": [],
|
276 |
+
"scripts": []
|
277 |
},
|
278 |
{
|
279 |
"name": "Tanacross",
|
280 |
"iso_1_code": null,
|
281 |
"iso_3_code": "tcb",
|
|
|
282 |
"children": [],
|
283 |
+
"tokenizers": {},
|
284 |
"node_i": "3804",
|
285 |
+
"native_tokenizers": [],
|
286 |
+
"scripts": []
|
287 |
},
|
288 |
{
|
289 |
"name": "Tanaina",
|
290 |
"iso_1_code": null,
|
291 |
"iso_3_code": "tfn",
|
|
|
292 |
"children": [],
|
293 |
+
"tokenizers": {},
|
294 |
"node_i": "3805",
|
295 |
+
"native_tokenizers": [],
|
296 |
+
"scripts": []
|
297 |
},
|
298 |
{
|
299 |
"name": "Tsetsaut",
|
300 |
"iso_1_code": null,
|
301 |
"iso_3_code": "txc",
|
|
|
302 |
"children": [],
|
303 |
+
"tokenizers": {},
|
304 |
"node_i": "3806",
|
305 |
+
"native_tokenizers": [],
|
306 |
+
"scripts": []
|
307 |
},
|
308 |
{
|
309 |
"name": "Carrier",
|
310 |
"iso_1_code": null,
|
311 |
"iso_3_code": null,
|
|
|
312 |
"children": [
|
313 |
{
|
314 |
"name": "Carrier, Southern",
|
315 |
"iso_1_code": null,
|
316 |
"iso_3_code": "caf",
|
|
|
317 |
"children": [],
|
318 |
+
"tokenizers": {},
|
319 |
"node_i": "3808",
|
320 |
+
"native_tokenizers": [],
|
321 |
"scripts": [
|
322 |
"Latn"
|
323 |
+
]
|
|
|
324 |
},
|
325 |
{
|
326 |
"name": "Carrier",
|
327 |
"iso_1_code": null,
|
328 |
"iso_3_code": "crx",
|
|
|
329 |
"children": [],
|
330 |
+
"tokenizers": {},
|
331 |
"node_i": "3809",
|
332 |
+
"native_tokenizers": [],
|
333 |
"scripts": [
|
334 |
"Latn"
|
335 |
+
]
|
|
|
336 |
}
|
337 |
],
|
338 |
+
"tokenizers": {},
|
339 |
"node_i": "3807",
|
340 |
+
"native_tokenizers": [],
|
341 |
+
"scripts": []
|
342 |
},
|
343 |
{
|
344 |
"name": "Slavey-Hare",
|
345 |
"iso_1_code": null,
|
346 |
"iso_3_code": null,
|
|
|
347 |
"children": [
|
348 |
{
|
349 |
"name": "Slavey, North",
|
350 |
"iso_1_code": null,
|
351 |
"iso_3_code": "scs",
|
|
|
352 |
"children": [],
|
353 |
+
"tokenizers": {},
|
354 |
"node_i": "3811",
|
355 |
+
"native_tokenizers": [],
|
356 |
+
"scripts": []
|
357 |
},
|
358 |
{
|
359 |
"name": "Slavey, South",
|
360 |
"iso_1_code": null,
|
361 |
"iso_3_code": "xsl",
|
|
|
362 |
"children": [],
|
363 |
+
"tokenizers": {},
|
364 |
"node_i": "3812",
|
365 |
+
"native_tokenizers": [],
|
366 |
+
"scripts": []
|
367 |
}
|
368 |
],
|
369 |
+
"tokenizers": {},
|
370 |
"node_i": "3810",
|
371 |
+
"native_tokenizers": [],
|
372 |
+
"scripts": []
|
373 |
},
|
374 |
{
|
375 |
"name": "Tahltan",
|
376 |
"iso_1_code": null,
|
377 |
"iso_3_code": null,
|
|
|
378 |
"children": [
|
379 |
{
|
380 |
"name": "Kaska",
|
381 |
"iso_1_code": null,
|
382 |
"iso_3_code": "kkz",
|
|
|
383 |
"children": [],
|
384 |
+
"tokenizers": {},
|
385 |
"node_i": "3814",
|
386 |
+
"native_tokenizers": [],
|
387 |
+
"scripts": []
|
388 |
},
|
389 |
{
|
390 |
"name": "Tagish",
|
391 |
"iso_1_code": null,
|
392 |
"iso_3_code": "tgx",
|
|
|
393 |
"children": [],
|
394 |
+
"tokenizers": {},
|
395 |
"node_i": "3815",
|
396 |
+
"native_tokenizers": [],
|
397 |
+
"scripts": []
|
398 |
},
|
399 |
{
|
400 |
"name": "Tahltan",
|
401 |
"iso_1_code": null,
|
402 |
"iso_3_code": "tht",
|
|
|
403 |
"children": [],
|
404 |
+
"tokenizers": {},
|
405 |
"node_i": "3816",
|
406 |
+
"native_tokenizers": [],
|
407 |
+
"scripts": []
|
408 |
}
|
409 |
],
|
410 |
+
"tokenizers": {},
|
411 |
"node_i": "3813",
|
412 |
+
"native_tokenizers": [],
|
413 |
+
"scripts": []
|
414 |
},
|
415 |
{
|
416 |
"name": "Tuchone",
|
417 |
"iso_1_code": null,
|
418 |
"iso_3_code": null,
|
|
|
419 |
"children": [
|
420 |
{
|
421 |
"name": "Tutchone, Southern",
|
422 |
"iso_1_code": null,
|
423 |
"iso_3_code": "tce",
|
|
|
424 |
"children": [],
|
425 |
+
"tokenizers": {},
|
426 |
"node_i": "3818",
|
427 |
+
"native_tokenizers": [],
|
428 |
+
"scripts": []
|
429 |
},
|
430 |
{
|
431 |
"name": "Tutchone, Northern",
|
432 |
"iso_1_code": null,
|
433 |
"iso_3_code": "ttm",
|
|
|
434 |
"children": [],
|
435 |
+
"tokenizers": {},
|
436 |
"node_i": "3819",
|
437 |
+
"native_tokenizers": [],
|
438 |
+
"scripts": []
|
439 |
}
|
440 |
],
|
441 |
+
"tokenizers": {},
|
442 |
"node_i": "3817",
|
443 |
+
"native_tokenizers": [],
|
444 |
+
"scripts": []
|
445 |
}
|
446 |
],
|
447 |
+
"tokenizers": {},
|
448 |
"node_i": "3787",
|
449 |
+
"native_tokenizers": [],
|
450 |
+
"scripts": []
|
451 |
},
|
452 |
{
|
453 |
"name": "Pacific Coast Athabaskan",
|
454 |
"iso_1_code": null,
|
455 |
"iso_3_code": null,
|
|
|
456 |
"children": [
|
457 |
{
|
458 |
"name": "Kwalhioqua-Tlatskanai",
|
459 |
"iso_1_code": null,
|
460 |
"iso_3_code": "qwt",
|
|
|
461 |
"children": [],
|
462 |
+
"tokenizers": {},
|
463 |
"node_i": "3821",
|
464 |
+
"native_tokenizers": [],
|
465 |
+
"scripts": []
|
466 |
},
|
467 |
{
|
468 |
"name": "California Athabaskan",
|
469 |
"iso_1_code": null,
|
470 |
"iso_3_code": null,
|
|
|
471 |
"children": [
|
472 |
{
|
473 |
"name": "Hupa",
|
474 |
"iso_1_code": null,
|
475 |
"iso_3_code": "hup",
|
|
|
476 |
"children": [],
|
477 |
+
"tokenizers": {},
|
478 |
"node_i": "3823",
|
479 |
+
"native_tokenizers": [],
|
480 |
+
"scripts": []
|
481 |
},
|
482 |
{
|
483 |
"name": "Kato",
|
484 |
"iso_1_code": null,
|
485 |
"iso_3_code": "ktw",
|
|
|
486 |
"children": [],
|
487 |
+
"tokenizers": {},
|
488 |
"node_i": "3824",
|
489 |
+
"native_tokenizers": [],
|
490 |
+
"scripts": []
|
491 |
},
|
492 |
{
|
493 |
"name": "Mattole",
|
494 |
"iso_1_code": null,
|
495 |
"iso_3_code": "mvb",
|
|
|
496 |
"children": [],
|
497 |
+
"tokenizers": {},
|
498 |
"node_i": "3825",
|
499 |
+
"native_tokenizers": [],
|
500 |
+
"scripts": []
|
501 |
},
|
502 |
{
|
503 |
"name": "Wailaki",
|
504 |
"iso_1_code": null,
|
505 |
"iso_3_code": "wlk",
|
|
|
506 |
"children": [],
|
507 |
+
"tokenizers": {},
|
508 |
"node_i": "3826",
|
509 |
+
"native_tokenizers": [],
|
510 |
+
"scripts": []
|
511 |
}
|
512 |
],
|
513 |
+
"tokenizers": {},
|
514 |
"node_i": "3822",
|
515 |
+
"native_tokenizers": [],
|
516 |
+
"scripts": []
|
517 |
},
|
518 |
{
|
519 |
"name": "Oregon Athabaskan",
|
520 |
"iso_1_code": null,
|
521 |
"iso_3_code": null,
|
|
|
522 |
"children": [
|
523 |
{
|
524 |
"name": "Galice",
|
525 |
"iso_1_code": null,
|
526 |
"iso_3_code": "gce",
|
|
|
527 |
"children": [],
|
528 |
+
"tokenizers": {},
|
529 |
"node_i": "3828",
|
530 |
+
"native_tokenizers": [],
|
531 |
+
"scripts": []
|
532 |
},
|
533 |
{
|
534 |
"name": "Upper Umpqua",
|
535 |
"iso_1_code": null,
|
536 |
"iso_3_code": "xup",
|
|
|
537 |
"children": [],
|
538 |
+
"tokenizers": {},
|
539 |
"node_i": "3829",
|
540 |
+
"native_tokenizers": [],
|
541 |
+
"scripts": []
|
542 |
},
|
543 |
{
|
544 |
"name": "Tolowa-Chetco",
|
545 |
"iso_1_code": null,
|
546 |
"iso_3_code": null,
|
|
|
547 |
"children": [
|
548 |
{
|
549 |
"name": "Chetco",
|
550 |
"iso_1_code": null,
|
551 |
"iso_3_code": "ctc",
|
|
|
552 |
"children": [],
|
553 |
+
"tokenizers": {},
|
554 |
"node_i": "3831",
|
555 |
+
"native_tokenizers": [],
|
556 |
+
"scripts": []
|
557 |
},
|
558 |
{
|
559 |
"name": "Tolowa",
|
560 |
"iso_1_code": null,
|
561 |
"iso_3_code": "tol",
|
|
|
562 |
"children": [],
|
563 |
+
"tokenizers": {},
|
564 |
"node_i": "3832",
|
565 |
+
"native_tokenizers": [],
|
566 |
+
"scripts": []
|
567 |
}
|
568 |
],
|
569 |
+
"tokenizers": {},
|
570 |
"node_i": "3830",
|
571 |
+
"native_tokenizers": [],
|
572 |
+
"scripts": []
|
573 |
},
|
574 |
{
|
575 |
"name": "Tututni-Chasta Costa-Coquille",
|
576 |
"iso_1_code": null,
|
577 |
"iso_3_code": null,
|
|
|
578 |
"children": [
|
579 |
{
|
580 |
"name": "Coquille",
|
581 |
"iso_1_code": null,
|
582 |
"iso_3_code": "coq",
|
|
|
583 |
"children": [],
|
584 |
+
"tokenizers": {},
|
585 |
"node_i": "3834",
|
586 |
+
"native_tokenizers": [],
|
587 |
+
"scripts": []
|
588 |
},
|
589 |
{
|
590 |
"name": "Tututni",
|
591 |
"iso_1_code": null,
|
592 |
"iso_3_code": "tuu",
|
|
|
593 |
"children": [],
|
594 |
+
"tokenizers": {},
|
595 |
"node_i": "3835",
|
596 |
+
"native_tokenizers": [],
|
597 |
+
"scripts": []
|
598 |
}
|
599 |
],
|
600 |
+
"tokenizers": {},
|
601 |
"node_i": "3833",
|
602 |
+
"native_tokenizers": [],
|
603 |
+
"scripts": []
|
604 |
}
|
605 |
],
|
606 |
+
"tokenizers": {},
|
607 |
"node_i": "3827",
|
608 |
+
"native_tokenizers": [],
|
609 |
+
"scripts": []
|
610 |
}
|
611 |
],
|
612 |
+
"tokenizers": {},
|
613 |
"node_i": "3820",
|
614 |
+
"native_tokenizers": [],
|
615 |
+
"scripts": []
|
616 |
}
|
617 |
],
|
618 |
+
"tokenizers": {},
|
619 |
"node_i": "3778",
|
620 |
+
"native_tokenizers": [],
|
621 |
+
"scripts": []
|
622 |
},
|
623 |
{
|
624 |
"name": "Tlingit",
|
625 |
"iso_1_code": null,
|
626 |
"iso_3_code": null,
|
|
|
627 |
"children": [
|
628 |
{
|
629 |
"name": "Tlingit",
|
630 |
"iso_1_code": null,
|
631 |
"iso_3_code": "tli",
|
|
|
632 |
"children": [],
|
633 |
+
"tokenizers": {},
|
634 |
"node_i": "3837",
|
635 |
+
"native_tokenizers": [],
|
636 |
+
"scripts": []
|
637 |
}
|
638 |
],
|
639 |
+
"tokenizers": {},
|
640 |
"node_i": "3836",
|
641 |
+
"native_tokenizers": [],
|
642 |
+
"scripts": []
|
643 |
}
|
644 |
],
|
645 |
+
"tokenizers": {},
|
646 |
"node_i": "3776",
|
647 |
+
"native_tokenizers": [],
|
648 |
+
"scripts": []
|
649 |
}
|
data/Fas.json
CHANGED
@@ -2,30 +2,30 @@
|
|
2 |
"name": "Fas",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Baibai",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "bbf",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3839",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Momu",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "fqs",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "3840",
|
24 |
-
"
|
25 |
-
"
|
26 |
}
|
27 |
],
|
|
|
28 |
"node_i": "3838",
|
29 |
-
"
|
30 |
-
"
|
31 |
}
|
|
|
2 |
"name": "Fas",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Baibai",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "bbf",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3839",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Momu",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "fqs",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3840",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
}
|
26 |
],
|
27 |
+
"tokenizers": {},
|
28 |
"node_i": "3838",
|
29 |
+
"native_tokenizers": [],
|
30 |
+
"scripts": []
|
31 |
}
|
data/Guajiboan.json
CHANGED
@@ -2,77 +2,77 @@
|
|
2 |
"name": "Guajiboan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Cuiba",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "cui",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3842",
|
|
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
-
]
|
17 |
-
"own_tokenizer": false
|
18 |
},
|
19 |
{
|
20 |
"name": "Guayabero",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": "guo",
|
23 |
-
"tokenizers": {},
|
24 |
"children": [],
|
|
|
25 |
"node_i": "3843",
|
|
|
26 |
"scripts": [
|
27 |
"Latn"
|
28 |
-
]
|
29 |
-
"own_tokenizer": false
|
30 |
},
|
31 |
{
|
32 |
"name": "Guajibo",
|
33 |
"iso_1_code": null,
|
34 |
"iso_3_code": null,
|
35 |
-
"tokenizers": {},
|
36 |
"children": [
|
37 |
{
|
38 |
"name": "Playero",
|
39 |
"iso_1_code": null,
|
40 |
"iso_3_code": "gob",
|
41 |
-
"tokenizers": {},
|
42 |
"children": [],
|
|
|
43 |
"node_i": "3845",
|
44 |
-
"
|
45 |
-
"
|
46 |
},
|
47 |
{
|
48 |
"name": "Guahibo",
|
49 |
"iso_1_code": null,
|
50 |
"iso_3_code": "guh",
|
51 |
-
"tokenizers": {},
|
52 |
"children": [],
|
|
|
53 |
"node_i": "3846",
|
|
|
54 |
"scripts": [
|
55 |
"Latn"
|
56 |
-
]
|
57 |
-
"own_tokenizer": false
|
58 |
},
|
59 |
{
|
60 |
"name": "Macagu\u00e1n",
|
61 |
"iso_1_code": null,
|
62 |
"iso_3_code": "mbn",
|
63 |
-
"tokenizers": {},
|
64 |
"children": [],
|
|
|
65 |
"node_i": "3847",
|
66 |
-
"
|
67 |
-
"
|
68 |
}
|
69 |
],
|
|
|
70 |
"node_i": "3844",
|
71 |
-
"
|
72 |
-
"
|
73 |
}
|
74 |
],
|
|
|
75 |
"node_i": "3841",
|
76 |
-
"
|
77 |
-
"
|
78 |
}
|
|
|
2 |
"name": "Guajiboan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Cuiba",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "cui",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3842",
|
13 |
+
"native_tokenizers": [],
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
+
]
|
|
|
17 |
},
|
18 |
{
|
19 |
"name": "Guayabero",
|
20 |
"iso_1_code": null,
|
21 |
"iso_3_code": "guo",
|
|
|
22 |
"children": [],
|
23 |
+
"tokenizers": {},
|
24 |
"node_i": "3843",
|
25 |
+
"native_tokenizers": [],
|
26 |
"scripts": [
|
27 |
"Latn"
|
28 |
+
]
|
|
|
29 |
},
|
30 |
{
|
31 |
"name": "Guajibo",
|
32 |
"iso_1_code": null,
|
33 |
"iso_3_code": null,
|
|
|
34 |
"children": [
|
35 |
{
|
36 |
"name": "Playero",
|
37 |
"iso_1_code": null,
|
38 |
"iso_3_code": "gob",
|
|
|
39 |
"children": [],
|
40 |
+
"tokenizers": {},
|
41 |
"node_i": "3845",
|
42 |
+
"native_tokenizers": [],
|
43 |
+
"scripts": []
|
44 |
},
|
45 |
{
|
46 |
"name": "Guahibo",
|
47 |
"iso_1_code": null,
|
48 |
"iso_3_code": "guh",
|
|
|
49 |
"children": [],
|
50 |
+
"tokenizers": {},
|
51 |
"node_i": "3846",
|
52 |
+
"native_tokenizers": [],
|
53 |
"scripts": [
|
54 |
"Latn"
|
55 |
+
]
|
|
|
56 |
},
|
57 |
{
|
58 |
"name": "Macagu\u00e1n",
|
59 |
"iso_1_code": null,
|
60 |
"iso_3_code": "mbn",
|
|
|
61 |
"children": [],
|
62 |
+
"tokenizers": {},
|
63 |
"node_i": "3847",
|
64 |
+
"native_tokenizers": [],
|
65 |
+
"scripts": []
|
66 |
}
|
67 |
],
|
68 |
+
"tokenizers": {},
|
69 |
"node_i": "3844",
|
70 |
+
"native_tokenizers": [],
|
71 |
+
"scripts": []
|
72 |
}
|
73 |
],
|
74 |
+
"tokenizers": {},
|
75 |
"node_i": "3841",
|
76 |
+
"native_tokenizers": [],
|
77 |
+
"scripts": []
|
78 |
}
|
data/Guaykuruan.json
CHANGED
@@ -2,90 +2,90 @@
|
|
2 |
"name": "Guaykuruan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Guaykur\u00fa",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Abipon",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": "axb",
|
17 |
-
"tokenizers": {},
|
18 |
"children": [],
|
|
|
19 |
"node_i": "3850",
|
20 |
-
"
|
21 |
-
"
|
22 |
},
|
23 |
{
|
24 |
"name": "Kadiw\u00e9u",
|
25 |
"iso_1_code": null,
|
26 |
"iso_3_code": "kbc",
|
27 |
-
"tokenizers": {},
|
28 |
"children": [],
|
|
|
29 |
"node_i": "3851",
|
|
|
30 |
"scripts": [
|
31 |
"Latn"
|
32 |
-
]
|
33 |
-
"own_tokenizer": false
|
34 |
}
|
35 |
],
|
|
|
36 |
"node_i": "3849",
|
37 |
-
"
|
38 |
-
"
|
39 |
},
|
40 |
{
|
41 |
"name": "Southern",
|
42 |
"iso_1_code": null,
|
43 |
"iso_3_code": null,
|
44 |
-
"tokenizers": {},
|
45 |
"children": [
|
46 |
{
|
47 |
"name": "Mocov\u00ed",
|
48 |
"iso_1_code": null,
|
49 |
"iso_3_code": "moc",
|
50 |
-
"tokenizers": {},
|
51 |
"children": [],
|
|
|
52 |
"node_i": "3853",
|
|
|
53 |
"scripts": [
|
54 |
"Latn"
|
55 |
-
]
|
56 |
-
"own_tokenizer": false
|
57 |
},
|
58 |
{
|
59 |
"name": "Pilag\u00e1",
|
60 |
"iso_1_code": null,
|
61 |
"iso_3_code": "plg",
|
62 |
-
"tokenizers": {},
|
63 |
"children": [],
|
|
|
64 |
"node_i": "3854",
|
|
|
65 |
"scripts": [
|
66 |
"Latn"
|
67 |
-
]
|
68 |
-
"own_tokenizer": false
|
69 |
},
|
70 |
{
|
71 |
"name": "Toba",
|
72 |
"iso_1_code": null,
|
73 |
"iso_3_code": "tob",
|
74 |
-
"tokenizers": {},
|
75 |
"children": [],
|
|
|
76 |
"node_i": "3855",
|
|
|
77 |
"scripts": [
|
78 |
"Latn"
|
79 |
-
]
|
80 |
-
"own_tokenizer": false
|
81 |
}
|
82 |
],
|
|
|
83 |
"node_i": "3852",
|
84 |
-
"
|
85 |
-
"
|
86 |
}
|
87 |
],
|
|
|
88 |
"node_i": "3848",
|
89 |
-
"
|
90 |
-
"
|
91 |
}
|
|
|
2 |
"name": "Guaykuruan",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Guaykur\u00fa",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Abipon",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": "axb",
|
|
|
15 |
"children": [],
|
16 |
+
"tokenizers": {},
|
17 |
"node_i": "3850",
|
18 |
+
"native_tokenizers": [],
|
19 |
+
"scripts": []
|
20 |
},
|
21 |
{
|
22 |
"name": "Kadiw\u00e9u",
|
23 |
"iso_1_code": null,
|
24 |
"iso_3_code": "kbc",
|
|
|
25 |
"children": [],
|
26 |
+
"tokenizers": {},
|
27 |
"node_i": "3851",
|
28 |
+
"native_tokenizers": [],
|
29 |
"scripts": [
|
30 |
"Latn"
|
31 |
+
]
|
|
|
32 |
}
|
33 |
],
|
34 |
+
"tokenizers": {},
|
35 |
"node_i": "3849",
|
36 |
+
"native_tokenizers": [],
|
37 |
+
"scripts": []
|
38 |
},
|
39 |
{
|
40 |
"name": "Southern",
|
41 |
"iso_1_code": null,
|
42 |
"iso_3_code": null,
|
|
|
43 |
"children": [
|
44 |
{
|
45 |
"name": "Mocov\u00ed",
|
46 |
"iso_1_code": null,
|
47 |
"iso_3_code": "moc",
|
|
|
48 |
"children": [],
|
49 |
+
"tokenizers": {},
|
50 |
"node_i": "3853",
|
51 |
+
"native_tokenizers": [],
|
52 |
"scripts": [
|
53 |
"Latn"
|
54 |
+
]
|
|
|
55 |
},
|
56 |
{
|
57 |
"name": "Pilag\u00e1",
|
58 |
"iso_1_code": null,
|
59 |
"iso_3_code": "plg",
|
|
|
60 |
"children": [],
|
61 |
+
"tokenizers": {},
|
62 |
"node_i": "3854",
|
63 |
+
"native_tokenizers": [],
|
64 |
"scripts": [
|
65 |
"Latn"
|
66 |
+
]
|
|
|
67 |
},
|
68 |
{
|
69 |
"name": "Toba",
|
70 |
"iso_1_code": null,
|
71 |
"iso_3_code": "tob",
|
|
|
72 |
"children": [],
|
73 |
+
"tokenizers": {},
|
74 |
"node_i": "3855",
|
75 |
+
"native_tokenizers": [],
|
76 |
"scripts": [
|
77 |
"Latn"
|
78 |
+
]
|
|
|
79 |
}
|
80 |
],
|
81 |
+
"tokenizers": {},
|
82 |
"node_i": "3852",
|
83 |
+
"native_tokenizers": [],
|
84 |
+
"scripts": []
|
85 |
}
|
86 |
],
|
87 |
+
"tokenizers": {},
|
88 |
"node_i": "3848",
|
89 |
+
"native_tokenizers": [],
|
90 |
+
"scripts": []
|
91 |
}
|
data/Gum.json
CHANGED
@@ -2,9 +2,9 @@
|
|
2 |
"name": "Gum",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [],
|
|
|
7 |
"node_i": "3856",
|
8 |
-
"
|
9 |
-
"
|
10 |
}
|
|
|
2 |
"name": "Gum",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [],
|
6 |
+
"tokenizers": {},
|
7 |
"node_i": "3856",
|
8 |
+
"native_tokenizers": [],
|
9 |
+
"scripts": []
|
10 |
}
|
data/Haida.json
CHANGED
@@ -2,30 +2,30 @@
|
|
2 |
"name": "Haida",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Haida, Southern",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "hax",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3858",
|
14 |
-
"
|
15 |
-
"
|
16 |
},
|
17 |
{
|
18 |
"name": "Haida, Northern",
|
19 |
"iso_1_code": null,
|
20 |
"iso_3_code": "hdn",
|
21 |
-
"tokenizers": {},
|
22 |
"children": [],
|
|
|
23 |
"node_i": "3859",
|
24 |
-
"
|
25 |
-
"
|
26 |
}
|
27 |
],
|
|
|
28 |
"node_i": "3857",
|
29 |
-
"
|
30 |
-
"
|
31 |
}
|
|
|
2 |
"name": "Haida",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Haida, Southern",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "hax",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3858",
|
13 |
+
"native_tokenizers": [],
|
14 |
+
"scripts": []
|
15 |
},
|
16 |
{
|
17 |
"name": "Haida, Northern",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "hdn",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3859",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
}
|
26 |
],
|
27 |
+
"tokenizers": {},
|
28 |
"node_i": "3857",
|
29 |
+
"native_tokenizers": [],
|
30 |
+
"scripts": []
|
31 |
}
|
data/Harákmbut.json
CHANGED
@@ -2,32 +2,32 @@
|
|
2 |
"name": "Har\u00e1kmbut",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Amarakaeri",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": "amr",
|
11 |
-
"tokenizers": {},
|
12 |
"children": [],
|
|
|
13 |
"node_i": "3861",
|
|
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
-
]
|
17 |
-
"own_tokenizer": false
|
18 |
},
|
19 |
{
|
20 |
"name": "Huachipaeri",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": "hug",
|
23 |
-
"tokenizers": {},
|
24 |
"children": [],
|
|
|
25 |
"node_i": "3862",
|
26 |
-
"
|
27 |
-
"
|
28 |
}
|
29 |
],
|
|
|
30 |
"node_i": "3860",
|
31 |
-
"
|
32 |
-
"
|
33 |
}
|
|
|
2 |
"name": "Har\u00e1kmbut",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Amarakaeri",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": "amr",
|
|
|
10 |
"children": [],
|
11 |
+
"tokenizers": {},
|
12 |
"node_i": "3861",
|
13 |
+
"native_tokenizers": [],
|
14 |
"scripts": [
|
15 |
"Latn"
|
16 |
+
]
|
|
|
17 |
},
|
18 |
{
|
19 |
"name": "Huachipaeri",
|
20 |
"iso_1_code": null,
|
21 |
"iso_3_code": "hug",
|
|
|
22 |
"children": [],
|
23 |
+
"tokenizers": {},
|
24 |
"node_i": "3862",
|
25 |
+
"native_tokenizers": [],
|
26 |
+
"scripts": []
|
27 |
}
|
28 |
],
|
29 |
+
"tokenizers": {},
|
30 |
"node_i": "3860",
|
31 |
+
"native_tokenizers": [],
|
32 |
+
"scripts": []
|
33 |
}
|
data/Hmong-Mien.json
CHANGED
@@ -2,527 +2,527 @@
|
|
2 |
"name": "Hmong-Mien",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
5 |
-
"tokenizers": {},
|
6 |
"children": [
|
7 |
{
|
8 |
"name": "Hmongic",
|
9 |
"iso_1_code": null,
|
10 |
"iso_3_code": null,
|
11 |
-
"tokenizers": {},
|
12 |
"children": [
|
13 |
{
|
14 |
"name": "Bunu",
|
15 |
"iso_1_code": null,
|
16 |
"iso_3_code": null,
|
17 |
-
"tokenizers": {},
|
18 |
"children": [
|
19 |
{
|
20 |
"name": "Bunu, Younuo",
|
21 |
"iso_1_code": null,
|
22 |
"iso_3_code": "buh",
|
23 |
-
"tokenizers": {},
|
24 |
"children": [],
|
|
|
25 |
"node_i": "3866",
|
26 |
-
"
|
27 |
-
"
|
28 |
},
|
29 |
{
|
30 |
"name": "Bunu, Wunai",
|
31 |
"iso_1_code": null,
|
32 |
"iso_3_code": "bwn",
|
33 |
-
"tokenizers": {},
|
34 |
"children": [],
|
|
|
35 |
"node_i": "3867",
|
36 |
-
"
|
37 |
-
"
|
38 |
},
|
39 |
{
|
40 |
"name": "Bunu, Bu-Nao",
|
41 |
"iso_1_code": null,
|
42 |
"iso_3_code": "bwx",
|
43 |
-
"tokenizers": {},
|
44 |
"children": [],
|
|
|
45 |
"node_i": "3868",
|
46 |
-
"
|
47 |
-
"
|
48 |
},
|
49 |
{
|
50 |
"name": "Bunu, Jiongnai",
|
51 |
"iso_1_code": null,
|
52 |
"iso_3_code": "pnu",
|
53 |
-
"tokenizers": {},
|
54 |
"children": [],
|
|
|
55 |
"node_i": "3869",
|
56 |
-
"
|
57 |
-
"
|
58 |
}
|
59 |
],
|
|
|
60 |
"node_i": "3865",
|
61 |
-
"
|
62 |
-
"
|
63 |
},
|
64 |
{
|
65 |
"name": "Chuanqiandian",
|
66 |
"iso_1_code": null,
|
67 |
"iso_3_code": null,
|
68 |
-
"tokenizers": {},
|
69 |
"children": [
|
70 |
{
|
71 |
"name": "Miao, Chuanqiandian Cluster",
|
72 |
"iso_1_code": null,
|
73 |
"iso_3_code": "cqd",
|
74 |
-
"tokenizers": {},
|
75 |
"children": [],
|
|
|
76 |
"node_i": "3871",
|
77 |
-
"
|
78 |
-
"
|
79 |
},
|
80 |
{
|
81 |
"name": "Miao, Southern Mashan",
|
82 |
"iso_1_code": null,
|
83 |
"iso_3_code": "hma",
|
84 |
-
"tokenizers": {},
|
85 |
"children": [],
|
|
|
86 |
"node_i": "3872",
|
87 |
-
"
|
88 |
-
"
|
89 |
},
|
90 |
{
|
91 |
"name": "Miao, Central Huishui",
|
92 |
"iso_1_code": null,
|
93 |
"iso_3_code": "hmc",
|
94 |
-
"tokenizers": {},
|
95 |
"children": [],
|
|
|
96 |
"node_i": "3873",
|
97 |
-
"
|
98 |
-
"
|
99 |
},
|
100 |
{
|
101 |
"name": "Miao, Large Flowery",
|
102 |
"iso_1_code": null,
|
103 |
"iso_3_code": "hmd",
|
104 |
-
"tokenizers": {},
|
105 |
"children": [],
|
|
|
106 |
"node_i": "3874",
|
107 |
-
"
|
108 |
-
"
|
109 |
},
|
110 |
{
|
111 |
"name": "Miao, Eastern Huishui",
|
112 |
"iso_1_code": null,
|
113 |
"iso_3_code": "hme",
|
114 |
-
"tokenizers": {},
|
115 |
"children": [],
|
|
|
116 |
"node_i": "3875",
|
117 |
-
"
|
118 |
-
"
|
119 |
},
|
120 |
{
|
121 |
"name": "Hmong Don",
|
122 |
"iso_1_code": null,
|
123 |
"iso_3_code": "hmf",
|
124 |
-
"tokenizers": {},
|
125 |
"children": [],
|
|
|
126 |
"node_i": "3876",
|
127 |
-
"
|
128 |
-
"
|
129 |
},
|
130 |
{
|
131 |
"name": "Miao, Southwestern Guiyang",
|
132 |
"iso_1_code": null,
|
133 |
"iso_3_code": "hmg",
|
134 |
-
"tokenizers": {},
|
135 |
"children": [],
|
|
|
136 |
"node_i": "3877",
|
137 |
-
"
|
138 |
-
"
|
139 |
},
|
140 |
{
|
141 |
"name": "Miao, Southwestern Huishui",
|
142 |
"iso_1_code": null,
|
143 |
"iso_3_code": "hmh",
|
144 |
-
"tokenizers": {},
|
145 |
"children": [],
|
|
|
146 |
"node_i": "3878",
|
147 |
-
"
|
148 |
-
"
|
149 |
},
|
150 |
{
|
151 |
"name": "Miao, Northern Huishui",
|
152 |
"iso_1_code": null,
|
153 |
"iso_3_code": "hmi",
|
154 |
-
"tokenizers": {},
|
155 |
"children": [],
|
|
|
156 |
"node_i": "3879",
|
157 |
-
"
|
158 |
-
"
|
159 |
},
|
160 |
{
|
161 |
"name": "Ge",
|
162 |
"iso_1_code": null,
|
163 |
"iso_3_code": "hmj",
|
164 |
-
"tokenizers": {},
|
165 |
"children": [],
|
|
|
166 |
"node_i": "3880",
|
167 |
-
"
|
168 |
-
"
|
169 |
},
|
170 |
{
|
171 |
"name": "Miao, Luopohe",
|
172 |
"iso_1_code": null,
|
173 |
"iso_3_code": "hml",
|
174 |
-
"tokenizers": {},
|
175 |
"children": [],
|
|
|
176 |
"node_i": "3881",
|
177 |
-
"
|
178 |
-
"
|
179 |
},
|
180 |
{
|
181 |
"name": "Miao, Central Mashan",
|
182 |
"iso_1_code": null,
|
183 |
"iso_3_code": "hmm",
|
184 |
-
"tokenizers": {},
|
185 |
"children": [],
|
|
|
186 |
"node_i": "3882",
|
187 |
-
"
|
188 |
-
"
|
189 |
},
|
190 |
{
|
191 |
"name": "Miao, Northern Mashan",
|
192 |
"iso_1_code": null,
|
193 |
"iso_3_code": "hmp",
|
194 |
-
"tokenizers": {},
|
195 |
"children": [],
|
|
|
196 |
"node_i": "3883",
|
197 |
-
"
|
198 |
-
"
|
199 |
},
|
200 |
{
|
201 |
"name": "Hmong D\u00f4",
|
202 |
"iso_1_code": null,
|
203 |
"iso_3_code": "hmv",
|
204 |
-
"tokenizers": {},
|
205 |
"children": [],
|
|
|
206 |
"node_i": "3884",
|
207 |
-
"
|
208 |
-
"
|
209 |
},
|
210 |
{
|
211 |
"name": "Miao, Western Mashan",
|
212 |
"iso_1_code": null,
|
213 |
"iso_3_code": "hmw",
|
214 |
-
"tokenizers": {},
|
215 |
"children": [],
|
|
|
216 |
"node_i": "3885",
|
217 |
-
"
|
218 |
-
"
|
219 |
},
|
220 |
{
|
221 |
"name": "Miao, Southern Guiyang",
|
222 |
"iso_1_code": null,
|
223 |
"iso_3_code": "hmy",
|
224 |
-
"tokenizers": {},
|
225 |
"children": [],
|
|
|
226 |
"node_i": "3886",
|
227 |
-
"
|
228 |
-
"
|
229 |
},
|
230 |
{
|
231 |
"name": "Sinicized Miao",
|
232 |
"iso_1_code": null,
|
233 |
"iso_3_code": "hmz",
|
234 |
-
"tokenizers": {},
|
235 |
"children": [],
|
|
|
236 |
"node_i": "3887",
|
237 |
-
"
|
238 |
-
"
|
239 |
},
|
240 |
{
|
241 |
"name": "Hmong Njua",
|
242 |
"iso_1_code": null,
|
243 |
"iso_3_code": "hnj",
|
244 |
-
"tokenizers": {},
|
245 |
"children": [],
|
|
|
246 |
"node_i": "3888",
|
|
|
247 |
"scripts": [
|
248 |
"Latn"
|
249 |
-
]
|
250 |
-
"own_tokenizer": false
|
251 |
},
|
252 |
{
|
253 |
"name": "Miao, Horned",
|
254 |
"iso_1_code": null,
|
255 |
"iso_3_code": "hrm",
|
256 |
-
"tokenizers": {},
|
257 |
"children": [],
|
|
|
258 |
"node_i": "3889",
|
259 |
-
"
|
260 |
-
"
|
261 |
},
|
262 |
{
|
263 |
"name": "Miao, Northern Guiyang",
|
264 |
"iso_1_code": null,
|
265 |
"iso_3_code": "huj",
|
266 |
-
"tokenizers": {},
|
267 |
"children": [],
|
|
|
268 |
"node_i": "3890",
|
269 |
-
"
|
270 |
-
"
|
271 |
},
|
272 |
{
|
273 |
"name": "Hmong Daw",
|
274 |
"iso_1_code": null,
|
275 |
"iso_3_code": "mww",
|
276 |
-
"tokenizers": {},
|
277 |
"children": [],
|
|
|
278 |
"node_i": "3891",
|
|
|
279 |
"scripts": [
|
280 |
"Latn"
|
281 |
-
]
|
282 |
-
"own_tokenizer": false
|
283 |
},
|
284 |
{
|
285 |
"name": "Miao, Small Flowery",
|
286 |
"iso_1_code": null,
|
287 |
"iso_3_code": "sfm",
|
288 |
-
"tokenizers": {},
|
289 |
"children": [],
|
|
|
290 |
"node_i": "3892",
|
291 |
-
"
|
292 |
-
"
|
293 |
}
|
294 |
],
|
|
|
295 |
"node_i": "3870",
|
296 |
-
"
|
297 |
-
"
|
298 |
},
|
299 |
{
|
300 |
"name": "Pa-hng",
|
301 |
"iso_1_code": null,
|
302 |
"iso_3_code": null,
|
303 |
-
"tokenizers": {},
|
304 |
"children": [
|
305 |
{
|
306 |
"name": "Pa-Hng",
|
307 |
"iso_1_code": null,
|
308 |
"iso_3_code": "pha",
|
309 |
-
"tokenizers": {},
|
310 |
"children": [],
|
|
|
311 |
"node_i": "3894",
|
312 |
-
"
|
313 |
-
"
|
314 |
}
|
315 |
],
|
|
|
316 |
"node_i": "3893",
|
317 |
-
"
|
318 |
-
"
|
319 |
},
|
320 |
{
|
321 |
"name": "Qiandong",
|
322 |
"iso_1_code": null,
|
323 |
"iso_3_code": null,
|
324 |
-
"tokenizers": {},
|
325 |
"children": [
|
326 |
{
|
327 |
"name": "Miao, Northern Qiandong",
|
328 |
"iso_1_code": null,
|
329 |
"iso_3_code": "hea",
|
330 |
-
"tokenizers": {},
|
331 |
"children": [],
|
|
|
332 |
"node_i": "3896",
|
333 |
-
"
|
334 |
-
"
|
335 |
},
|
336 |
{
|
337 |
"name": "Miao, Eastern Qiandong",
|
338 |
"iso_1_code": null,
|
339 |
"iso_3_code": "hmq",
|
340 |
-
"tokenizers": {},
|
341 |
"children": [],
|
|
|
342 |
"node_i": "3897",
|
343 |
-
"
|
344 |
-
"
|
345 |
},
|
346 |
{
|
347 |
"name": "Miao, Southern Qiandong",
|
348 |
"iso_1_code": null,
|
349 |
"iso_3_code": "hms",
|
350 |
-
"tokenizers": {},
|
351 |
"children": [],
|
|
|
352 |
"node_i": "3898",
|
353 |
-
"
|
354 |
-
"
|
355 |
},
|
356 |
{
|
357 |
"name": "N\u00e1-Meo",
|
358 |
"iso_1_code": null,
|
359 |
"iso_3_code": "neo",
|
360 |
-
"tokenizers": {},
|
361 |
"children": [],
|
|
|
362 |
"node_i": "3899",
|
363 |
-
"
|
364 |
-
"
|
365 |
}
|
366 |
],
|
|
|
367 |
"node_i": "3895",
|
368 |
-
"
|
369 |
-
"
|
370 |
},
|
371 |
{
|
372 |
"name": "Xiangxi",
|
373 |
"iso_1_code": null,
|
374 |
"iso_3_code": null,
|
375 |
-
"tokenizers": {},
|
376 |
"children": [
|
377 |
{
|
378 |
"name": "Miao, Western Xiangxi",
|
379 |
"iso_1_code": null,
|
380 |
"iso_3_code": "mmr",
|
381 |
-
"tokenizers": {},
|
382 |
"children": [],
|
|
|
383 |
"node_i": "3901",
|
384 |
-
"
|
385 |
-
"
|
386 |
},
|
387 |
{
|
388 |
"name": "Miao, Eastern Xiangxi",
|
389 |
"iso_1_code": null,
|
390 |
"iso_3_code": "muq",
|
391 |
-
"tokenizers": {},
|
392 |
"children": [],
|
|
|
393 |
"node_i": "3902",
|
394 |
-
"
|
395 |
-
"
|
396 |
}
|
397 |
],
|
|
|
398 |
"node_i": "3900",
|
399 |
-
"
|
400 |
-
"
|
401 |
}
|
402 |
],
|
|
|
403 |
"node_i": "3864",
|
404 |
-
"
|
405 |
-
"
|
406 |
},
|
407 |
{
|
408 |
"name": "Ho Nte",
|
409 |
"iso_1_code": null,
|
410 |
"iso_3_code": null,
|
411 |
-
"tokenizers": {},
|
412 |
"children": [
|
413 |
{
|
414 |
"name": "She",
|
415 |
"iso_1_code": null,
|
416 |
"iso_3_code": "shx",
|
417 |
-
"tokenizers": {},
|
418 |
"children": [],
|
|
|
419 |
"node_i": "3904",
|
420 |
-
"
|
421 |
-
"
|
422 |
}
|
423 |
],
|
|
|
424 |
"node_i": "3903",
|
425 |
-
"
|
426 |
-
"
|
427 |
},
|
428 |
{
|
429 |
"name": "Mienic",
|
430 |
"iso_1_code": null,
|
431 |
"iso_3_code": null,
|
432 |
-
"tokenizers": {},
|
433 |
"children": [
|
434 |
{
|
435 |
"name": "Biao-Jiao",
|
436 |
"iso_1_code": null,
|
437 |
"iso_3_code": null,
|
438 |
-
"tokenizers": {},
|
439 |
"children": [
|
440 |
{
|
441 |
"name": "Biao-Jiao Mien",
|
442 |
"iso_1_code": null,
|
443 |
"iso_3_code": "bje",
|
444 |
-
"tokenizers": {},
|
445 |
"children": [],
|
|
|
446 |
"node_i": "3907",
|
447 |
-
"
|
448 |
-
"
|
449 |
}
|
450 |
],
|
|
|
451 |
"node_i": "3906",
|
452 |
-
"
|
453 |
-
"
|
454 |
},
|
455 |
{
|
456 |
"name": "Mian-Jin",
|
457 |
"iso_1_code": null,
|
458 |
"iso_3_code": null,
|
459 |
-
"tokenizers": {},
|
460 |
"children": [
|
461 |
{
|
462 |
"name": "Biao Mon",
|
463 |
"iso_1_code": null,
|
464 |
"iso_3_code": "bmt",
|
465 |
-
"tokenizers": {},
|
466 |
"children": [],
|
|
|
467 |
"node_i": "3909",
|
468 |
-
"
|
469 |
-
"
|
470 |
},
|
471 |
{
|
472 |
"name": "Iu Mien",
|
473 |
"iso_1_code": null,
|
474 |
"iso_3_code": "ium",
|
475 |
-
"tokenizers": {},
|
476 |
"children": [],
|
|
|
477 |
"node_i": "3910",
|
|
|
478 |
"scripts": [
|
479 |
"Latn"
|
480 |
-
]
|
481 |
-
"own_tokenizer": false
|
482 |
},
|
483 |
{
|
484 |
"name": "Kim Mun",
|
485 |
"iso_1_code": null,
|
486 |
"iso_3_code": "mji",
|
487 |
-
"tokenizers": {},
|
488 |
"children": [],
|
|
|
489 |
"node_i": "3911",
|
490 |
-
"
|
491 |
-
"
|
492 |
}
|
493 |
],
|
|
|
494 |
"node_i": "3908",
|
495 |
-
"
|
496 |
-
"
|
497 |
},
|
498 |
{
|
499 |
"name": "Zaomin",
|
500 |
"iso_1_code": null,
|
501 |
"iso_3_code": null,
|
502 |
-
"tokenizers": {},
|
503 |
"children": [
|
504 |
{
|
505 |
"name": "Dzao Min",
|
506 |
"iso_1_code": null,
|
507 |
"iso_3_code": "bpn",
|
508 |
-
"tokenizers": {},
|
509 |
"children": [],
|
|
|
510 |
"node_i": "3913",
|
511 |
-
"
|
512 |
-
"
|
513 |
}
|
514 |
],
|
|
|
515 |
"node_i": "3912",
|
516 |
-
"
|
517 |
-
"
|
518 |
}
|
519 |
],
|
|
|
520 |
"node_i": "3905",
|
521 |
-
"
|
522 |
-
"
|
523 |
}
|
524 |
],
|
|
|
525 |
"node_i": "3863",
|
526 |
-
"
|
527 |
-
"
|
528 |
}
|
|
|
2 |
"name": "Hmong-Mien",
|
3 |
"iso_1_code": null,
|
4 |
"iso_3_code": null,
|
|
|
5 |
"children": [
|
6 |
{
|
7 |
"name": "Hmongic",
|
8 |
"iso_1_code": null,
|
9 |
"iso_3_code": null,
|
|
|
10 |
"children": [
|
11 |
{
|
12 |
"name": "Bunu",
|
13 |
"iso_1_code": null,
|
14 |
"iso_3_code": null,
|
|
|
15 |
"children": [
|
16 |
{
|
17 |
"name": "Bunu, Younuo",
|
18 |
"iso_1_code": null,
|
19 |
"iso_3_code": "buh",
|
|
|
20 |
"children": [],
|
21 |
+
"tokenizers": {},
|
22 |
"node_i": "3866",
|
23 |
+
"native_tokenizers": [],
|
24 |
+
"scripts": []
|
25 |
},
|
26 |
{
|
27 |
"name": "Bunu, Wunai",
|
28 |
"iso_1_code": null,
|
29 |
"iso_3_code": "bwn",
|
|
|
30 |
"children": [],
|
31 |
+
"tokenizers": {},
|
32 |
"node_i": "3867",
|
33 |
+
"native_tokenizers": [],
|
34 |
+
"scripts": []
|
35 |
},
|
36 |
{
|
37 |
"name": "Bunu, Bu-Nao",
|
38 |
"iso_1_code": null,
|
39 |
"iso_3_code": "bwx",
|
|
|
40 |
"children": [],
|
41 |
+
"tokenizers": {},
|
42 |
"node_i": "3868",
|
43 |
+
"native_tokenizers": [],
|
44 |
+
"scripts": []
|
45 |
},
|
46 |
{
|
47 |
"name": "Bunu, Jiongnai",
|
48 |
"iso_1_code": null,
|
49 |
"iso_3_code": "pnu",
|
|
|
50 |
"children": [],
|
51 |
+
"tokenizers": {},
|
52 |
"node_i": "3869",
|
53 |
+
"native_tokenizers": [],
|
54 |
+
"scripts": []
|
55 |
}
|
56 |
],
|
57 |
+
"tokenizers": {},
|
58 |
"node_i": "3865",
|
59 |
+
"native_tokenizers": [],
|
60 |
+
"scripts": []
|
61 |
},
|
62 |
{
|
63 |
"name": "Chuanqiandian",
|
64 |
"iso_1_code": null,
|
65 |
"iso_3_code": null,
|
|
|
66 |
"children": [
|
67 |
{
|
68 |
"name": "Miao, Chuanqiandian Cluster",
|
69 |
"iso_1_code": null,
|
70 |
"iso_3_code": "cqd",
|
|
|
71 |
"children": [],
|
72 |
+
"tokenizers": {},
|
73 |
"node_i": "3871",
|
74 |
+
"native_tokenizers": [],
|
75 |
+
"scripts": []
|
76 |
},
|
77 |
{
|
78 |
"name": "Miao, Southern Mashan",
|
79 |
"iso_1_code": null,
|
80 |
"iso_3_code": "hma",
|
|
|
81 |
"children": [],
|
82 |
+
"tokenizers": {},
|
83 |
"node_i": "3872",
|
84 |
+
"native_tokenizers": [],
|
85 |
+
"scripts": []
|
86 |
},
|
87 |
{
|
88 |
"name": "Miao, Central Huishui",
|
89 |
"iso_1_code": null,
|
90 |
"iso_3_code": "hmc",
|
|
|
91 |
"children": [],
|
92 |
+
"tokenizers": {},
|
93 |
"node_i": "3873",
|
94 |
+
"native_tokenizers": [],
|
95 |
+
"scripts": []
|
96 |
},
|
97 |
{
|
98 |
"name": "Miao, Large Flowery",
|
99 |
"iso_1_code": null,
|
100 |
"iso_3_code": "hmd",
|
|
|
101 |
"children": [],
|
102 |
+
"tokenizers": {},
|
103 |
"node_i": "3874",
|
104 |
+
"native_tokenizers": [],
|
105 |
+
"scripts": []
|
106 |
},
|
107 |
{
|
108 |
"name": "Miao, Eastern Huishui",
|
109 |
"iso_1_code": null,
|
110 |
"iso_3_code": "hme",
|
|
|
111 |
"children": [],
|
112 |
+
"tokenizers": {},
|
113 |
"node_i": "3875",
|
114 |
+
"native_tokenizers": [],
|
115 |
+
"scripts": []
|
116 |
},
|
117 |
{
|
118 |
"name": "Hmong Don",
|
119 |
"iso_1_code": null,
|
120 |
"iso_3_code": "hmf",
|
|
|
121 |
"children": [],
|
122 |
+
"tokenizers": {},
|
123 |
"node_i": "3876",
|
124 |
+
"native_tokenizers": [],
|
125 |
+
"scripts": []
|
126 |
},
|
127 |
{
|
128 |
"name": "Miao, Southwestern Guiyang",
|
129 |
"iso_1_code": null,
|
130 |
"iso_3_code": "hmg",
|
|
|
131 |
"children": [],
|
132 |
+
"tokenizers": {},
|
133 |
"node_i": "3877",
|
134 |
+
"native_tokenizers": [],
|
135 |
+
"scripts": []
|
136 |
},
|
137 |
{
|
138 |
"name": "Miao, Southwestern Huishui",
|
139 |
"iso_1_code": null,
|
140 |
"iso_3_code": "hmh",
|
|
|
141 |
"children": [],
|
142 |
+
"tokenizers": {},
|
143 |
"node_i": "3878",
|
144 |
+
"native_tokenizers": [],
|
145 |
+
"scripts": []
|
146 |
},
|
147 |
{
|
148 |
"name": "Miao, Northern Huishui",
|
149 |
"iso_1_code": null,
|
150 |
"iso_3_code": "hmi",
|
|
|
151 |
"children": [],
|
152 |
+
"tokenizers": {},
|
153 |
"node_i": "3879",
|
154 |
+
"native_tokenizers": [],
|
155 |
+
"scripts": []
|
156 |
},
|
157 |
{
|
158 |
"name": "Ge",
|
159 |
"iso_1_code": null,
|
160 |
"iso_3_code": "hmj",
|
|
|
161 |
"children": [],
|
162 |
+
"tokenizers": {},
|
163 |
"node_i": "3880",
|
164 |
+
"native_tokenizers": [],
|
165 |
+
"scripts": []
|
166 |
},
|
167 |
{
|
168 |
"name": "Miao, Luopohe",
|
169 |
"iso_1_code": null,
|
170 |
"iso_3_code": "hml",
|
|
|
171 |
"children": [],
|
172 |
+
"tokenizers": {},
|
173 |
"node_i": "3881",
|
174 |
+
"native_tokenizers": [],
|
175 |
+
"scripts": []
|
176 |
},
|
177 |
{
|
178 |
"name": "Miao, Central Mashan",
|
179 |
"iso_1_code": null,
|
180 |
"iso_3_code": "hmm",
|
|
|
181 |
"children": [],
|
182 |
+
"tokenizers": {},
|
183 |
"node_i": "3882",
|
184 |
+
"native_tokenizers": [],
|
185 |
+
"scripts": []
|
186 |
},
|
187 |
{
|
188 |
"name": "Miao, Northern Mashan",
|
189 |
"iso_1_code": null,
|
190 |
"iso_3_code": "hmp",
|
|
|
191 |
"children": [],
|
192 |
+
"tokenizers": {},
|
193 |
"node_i": "3883",
|
194 |
+
"native_tokenizers": [],
|
195 |
+
"scripts": []
|
196 |
},
|
197 |
{
|
198 |
"name": "Hmong D\u00f4",
|
199 |
"iso_1_code": null,
|
200 |
"iso_3_code": "hmv",
|
|
|
201 |
"children": [],
|
202 |
+
"tokenizers": {},
|
203 |
"node_i": "3884",
|
204 |
+
"native_tokenizers": [],
|
205 |
+
"scripts": []
|
206 |
},
|
207 |
{
|
208 |
"name": "Miao, Western Mashan",
|
209 |
"iso_1_code": null,
|
210 |
"iso_3_code": "hmw",
|
|
|
211 |
"children": [],
|
212 |
+
"tokenizers": {},
|
213 |
"node_i": "3885",
|
214 |
+
"native_tokenizers": [],
|
215 |
+
"scripts": []
|
216 |
},
|
217 |
{
|
218 |
"name": "Miao, Southern Guiyang",
|
219 |
"iso_1_code": null,
|
220 |
"iso_3_code": "hmy",
|
|
|
221 |
"children": [],
|
222 |
+
"tokenizers": {},
|
223 |
"node_i": "3886",
|
224 |
+
"native_tokenizers": [],
|
225 |
+
"scripts": []
|
226 |
},
|
227 |
{
|
228 |
"name": "Sinicized Miao",
|
229 |
"iso_1_code": null,
|
230 |
"iso_3_code": "hmz",
|
|
|
231 |
"children": [],
|
232 |
+
"tokenizers": {},
|
233 |
"node_i": "3887",
|
234 |
+
"native_tokenizers": [],
|
235 |
+
"scripts": []
|
236 |
},
|
237 |
{
|
238 |
"name": "Hmong Njua",
|
239 |
"iso_1_code": null,
|
240 |
"iso_3_code": "hnj",
|
|
|
241 |
"children": [],
|
242 |
+
"tokenizers": {},
|
243 |
"node_i": "3888",
|
244 |
+
"native_tokenizers": [],
|
245 |
"scripts": [
|
246 |
"Latn"
|
247 |
+
]
|
|
|
248 |
},
|
249 |
{
|
250 |
"name": "Miao, Horned",
|
251 |
"iso_1_code": null,
|
252 |
"iso_3_code": "hrm",
|
|
|
253 |
"children": [],
|
254 |
+
"tokenizers": {},
|
255 |
"node_i": "3889",
|
256 |
+
"native_tokenizers": [],
|
257 |
+
"scripts": []
|
258 |
},
|
259 |
{
|
260 |
"name": "Miao, Northern Guiyang",
|
261 |
"iso_1_code": null,
|
262 |
"iso_3_code": "huj",
|
|
|
263 |
"children": [],
|
264 |
+
"tokenizers": {},
|
265 |
"node_i": "3890",
|
266 |
+
"native_tokenizers": [],
|
267 |
+
"scripts": []
|
268 |
},
|
269 |
{
|
270 |
"name": "Hmong Daw",
|
271 |
"iso_1_code": null,
|
272 |
"iso_3_code": "mww",
|
|
|
273 |
"children": [],
|
274 |
+
"tokenizers": {},
|
275 |
"node_i": "3891",
|
276 |
+
"native_tokenizers": [],
|
277 |
"scripts": [
|
278 |
"Latn"
|
279 |
+
]
|
|
|
280 |
},
|
281 |
{
|
282 |
"name": "Miao, Small Flowery",
|
283 |
"iso_1_code": null,
|
284 |
"iso_3_code": "sfm",
|
|
|
285 |
"children": [],
|
286 |
+
"tokenizers": {},
|
287 |
"node_i": "3892",
|
288 |
+
"native_tokenizers": [],
|
289 |
+
"scripts": []
|
290 |
}
|
291 |
],
|
292 |
+
"tokenizers": {},
|
293 |
"node_i": "3870",
|
294 |
+
"native_tokenizers": [],
|
295 |
+
"scripts": []
|
296 |
},
|
297 |
{
|
298 |
"name": "Pa-hng",
|
299 |
"iso_1_code": null,
|
300 |
"iso_3_code": null,
|
|
|
301 |
"children": [
|
302 |
{
|
303 |
"name": "Pa-Hng",
|
304 |
"iso_1_code": null,
|
305 |
"iso_3_code": "pha",
|
|
|
306 |
"children": [],
|
307 |
+
"tokenizers": {},
|
308 |
"node_i": "3894",
|
309 |
+
"native_tokenizers": [],
|
310 |
+
"scripts": []
|
311 |
}
|
312 |
],
|
313 |
+
"tokenizers": {},
|
314 |
"node_i": "3893",
|
315 |
+
"native_tokenizers": [],
|
316 |
+
"scripts": []
|
317 |
},
|
318 |
{
|
319 |
"name": "Qiandong",
|
320 |
"iso_1_code": null,
|
321 |
"iso_3_code": null,
|
|
|
322 |
"children": [
|
323 |
{
|
324 |
"name": "Miao, Northern Qiandong",
|
325 |
"iso_1_code": null,
|
326 |
"iso_3_code": "hea",
|
|
|
327 |
"children": [],
|
328 |
+
"tokenizers": {},
|
329 |
"node_i": "3896",
|
330 |
+
"native_tokenizers": [],
|
331 |
+
"scripts": []
|
332 |
},
|
333 |
{
|
334 |
"name": "Miao, Eastern Qiandong",
|
335 |
"iso_1_code": null,
|
336 |
"iso_3_code": "hmq",
|
|
|
337 |
"children": [],
|
338 |
+
"tokenizers": {},
|
339 |
"node_i": "3897",
|
340 |
+
"native_tokenizers": [],
|
341 |
+
"scripts": []
|
342 |
},
|
343 |
{
|
344 |
"name": "Miao, Southern Qiandong",
|
345 |
"iso_1_code": null,
|
346 |
"iso_3_code": "hms",
|
|
|
347 |
"children": [],
|
348 |
+
"tokenizers": {},
|
349 |
"node_i": "3898",
|
350 |
+
"native_tokenizers": [],
|
351 |
+
"scripts": []
|
352 |
},
|
353 |
{
|
354 |
"name": "N\u00e1-Meo",
|
355 |
"iso_1_code": null,
|
356 |
"iso_3_code": "neo",
|
|
|
357 |
"children": [],
|
358 |
+
"tokenizers": {},
|
359 |
"node_i": "3899",
|
360 |
+
"native_tokenizers": [],
|
361 |
+
"scripts": []
|
362 |
}
|
363 |
],
|
364 |
+
"tokenizers": {},
|
365 |
"node_i": "3895",
|
366 |
+
"native_tokenizers": [],
|
367 |
+
"scripts": []
|
368 |
},
|
369 |
{
|
370 |
"name": "Xiangxi",
|
371 |
"iso_1_code": null,
|
372 |
"iso_3_code": null,
|
|
|
373 |
"children": [
|
374 |
{
|
375 |
"name": "Miao, Western Xiangxi",
|
376 |
"iso_1_code": null,
|
377 |
"iso_3_code": "mmr",
|
|
|
378 |
"children": [],
|
379 |
+
"tokenizers": {},
|
380 |
"node_i": "3901",
|
381 |
+
"native_tokenizers": [],
|
382 |
+
"scripts": []
|
383 |
},
|
384 |
{
|
385 |
"name": "Miao, Eastern Xiangxi",
|
386 |
"iso_1_code": null,
|
387 |
"iso_3_code": "muq",
|
|
|
388 |
"children": [],
|
389 |
+
"tokenizers": {},
|
390 |
"node_i": "3902",
|
391 |
+
"native_tokenizers": [],
|
392 |
+
"scripts": []
|
393 |
}
|
394 |
],
|
395 |
+
"tokenizers": {},
|
396 |
"node_i": "3900",
|
397 |
+
"native_tokenizers": [],
|
398 |
+
"scripts": []
|
399 |
}
|
400 |
],
|
401 |
+
"tokenizers": {},
|
402 |
"node_i": "3864",
|
403 |
+
"native_tokenizers": [],
|
404 |
+
"scripts": []
|
405 |
},
|
406 |
{
|
407 |
"name": "Ho Nte",
|
408 |
"iso_1_code": null,
|
409 |
"iso_3_code": null,
|
|
|
410 |
"children": [
|
411 |
{
|
412 |
"name": "She",
|
413 |
"iso_1_code": null,
|
414 |
"iso_3_code": "shx",
|
|
|
415 |
"children": [],
|
416 |
+
"tokenizers": {},
|
417 |
"node_i": "3904",
|
418 |
+
"native_tokenizers": [],
|
419 |
+
"scripts": []
|
420 |
}
|
421 |
],
|
422 |
+
"tokenizers": {},
|
423 |
"node_i": "3903",
|
424 |
+
"native_tokenizers": [],
|
425 |
+
"scripts": []
|
426 |
},
|
427 |
{
|
428 |
"name": "Mienic",
|
429 |
"iso_1_code": null,
|
430 |
"iso_3_code": null,
|
|
|
431 |
"children": [
|
432 |
{
|
433 |
"name": "Biao-Jiao",
|
434 |
"iso_1_code": null,
|
435 |
"iso_3_code": null,
|
|
|
436 |
"children": [
|
437 |
{
|
438 |
"name": "Biao-Jiao Mien",
|
439 |
"iso_1_code": null,
|
440 |
"iso_3_code": "bje",
|
|
|
441 |
"children": [],
|
442 |
+
"tokenizers": {},
|
443 |
"node_i": "3907",
|
444 |
+
"native_tokenizers": [],
|
445 |
+
"scripts": []
|
446 |
}
|
447 |
],
|
448 |
+
"tokenizers": {},
|
449 |
"node_i": "3906",
|
450 |
+
"native_tokenizers": [],
|
451 |
+
"scripts": []
|
452 |
},
|
453 |
{
|
454 |
"name": "Mian-Jin",
|
455 |
"iso_1_code": null,
|
456 |
"iso_3_code": null,
|
|
|
457 |
"children": [
|
458 |
{
|
459 |
"name": "Biao Mon",
|
460 |
"iso_1_code": null,
|
461 |
"iso_3_code": "bmt",
|
|
|
462 |
"children": [],
|
463 |
+
"tokenizers": {},
|
464 |
"node_i": "3909",
|
465 |
+
"native_tokenizers": [],
|
466 |
+
"scripts": []
|
467 |
},
|
468 |
{
|
469 |
"name": "Iu Mien",
|
470 |
"iso_1_code": null,
|
471 |
"iso_3_code": "ium",
|
|
|
472 |
"children": [],
|
473 |
+
"tokenizers": {},
|
474 |
"node_i": "3910",
|
475 |
+
"native_tokenizers": [],
|
476 |
"scripts": [
|
477 |
"Latn"
|
478 |
+
]
|
|
|
479 |
},
|
480 |
{
|
481 |
"name": "Kim Mun",
|
482 |
"iso_1_code": null,
|
483 |
"iso_3_code": "mji",
|
|
|
484 |
"children": [],
|
485 |
+
"tokenizers": {},
|
486 |
"node_i": "3911",
|
487 |
+
"native_tokenizers": [],
|
488 |
+
"scripts": []
|
489 |
}
|
490 |
],
|
491 |
+
"tokenizers": {},
|
492 |
"node_i": "3908",
|
493 |
+
"native_tokenizers": [],
|
494 |
+
"scripts": []
|
495 |
},
|
496 |
{
|
497 |
"name": "Zaomin",
|
498 |
"iso_1_code": null,
|
499 |
"iso_3_code": null,
|
|
|
500 |
"children": [
|
501 |
{
|
502 |
"name": "Dzao Min",
|
503 |
"iso_1_code": null,
|
504 |
"iso_3_code": "bpn",
|
|
|
505 |
"children": [],
|
506 |
+
"tokenizers": {},
|
507 |
"node_i": "3913",
|
508 |
+
"native_tokenizers": [],
|
509 |
+
"scripts": []
|
510 |
}
|
511 |
],
|
512 |
+
"tokenizers": {},
|
513 |
"node_i": "3912",
|
514 |
+
"native_tokenizers": [],
|
515 |
+
"scripts": []
|
516 |
}
|
517 |
],
|
518 |
+
"tokenizers": {},
|
519 |
"node_i": "3905",
|
520 |
+
"native_tokenizers": [],
|
521 |
+
"scripts": []
|
522 |
}
|
523 |
],
|
524 |
+
"tokenizers": {},
|
525 |
"node_i": "3863",
|
526 |
+
"native_tokenizers": [],
|
527 |
+
"scripts": []
|
528 |
}
|