guipenedo HF staff commited on
Commit
0741edf
1 Parent(s): f2b528f
This view is limited to 50 files because it contains too many changes.   See raw diff
data/Abkhaz-Adyghe.json CHANGED
@@ -2,101 +2,101 @@
2
  "name": "Abkhaz-Adyghe",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Abkhaz-Abazin",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Abkhaz",
15
  "iso_1_code": "ab",
16
  "iso_3_code": "abk",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "2",
 
20
  "scripts": [
21
  "Cyrl"
22
- ],
23
- "own_tokenizer": false
24
  },
25
  {
26
  "name": "Abaza",
27
  "iso_1_code": null,
28
  "iso_3_code": "abq",
29
- "tokenizers": {},
30
  "children": [],
 
31
  "node_i": "3",
 
32
  "scripts": [
33
  "Cyrl"
34
- ],
35
- "own_tokenizer": false
36
  }
37
  ],
 
38
  "node_i": "1",
39
- "scripts": [],
40
- "own_tokenizer": false
41
  },
42
  {
43
  "name": "Circassian",
44
  "iso_1_code": null,
45
  "iso_3_code": null,
46
- "tokenizers": {},
47
  "children": [
48
  {
49
  "name": "Adyghe",
50
  "iso_1_code": null,
51
  "iso_3_code": "ady",
52
- "tokenizers": {},
53
  "children": [],
 
54
  "node_i": "5",
 
55
  "scripts": [
56
  "Cyrl"
57
- ],
58
- "own_tokenizer": false
59
  },
60
  {
61
  "name": "Kabardian",
62
  "iso_1_code": null,
63
  "iso_3_code": "kbd",
64
- "tokenizers": {},
65
  "children": [],
 
66
  "node_i": "6",
 
67
  "scripts": [
68
  "Cyrl"
69
- ],
70
- "own_tokenizer": false
71
  }
72
  ],
 
73
  "node_i": "4",
74
- "scripts": [],
75
- "own_tokenizer": false
76
  },
77
  {
78
  "name": "Ubyx",
79
  "iso_1_code": null,
80
  "iso_3_code": null,
81
- "tokenizers": {},
82
  "children": [
83
  {
84
  "name": "Ubykh",
85
  "iso_1_code": null,
86
  "iso_3_code": "uby",
87
- "tokenizers": {},
88
  "children": [],
 
89
  "node_i": "8",
90
- "scripts": [],
91
- "own_tokenizer": false
92
  }
93
  ],
 
94
  "node_i": "7",
95
- "scripts": [],
96
- "own_tokenizer": false
97
  }
98
  ],
 
99
  "node_i": "0",
100
- "scripts": [],
101
- "own_tokenizer": false
102
  }
 
2
  "name": "Abkhaz-Adyghe",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Abkhaz-Abazin",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Abkhaz",
13
  "iso_1_code": "ab",
14
  "iso_3_code": "abk",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "2",
18
+ "native_tokenizers": [],
19
  "scripts": [
20
  "Cyrl"
21
+ ]
 
22
  },
23
  {
24
  "name": "Abaza",
25
  "iso_1_code": null,
26
  "iso_3_code": "abq",
 
27
  "children": [],
28
+ "tokenizers": {},
29
  "node_i": "3",
30
+ "native_tokenizers": [],
31
  "scripts": [
32
  "Cyrl"
33
+ ]
 
34
  }
35
  ],
36
+ "tokenizers": {},
37
  "node_i": "1",
38
+ "native_tokenizers": [],
39
+ "scripts": []
40
  },
41
  {
42
  "name": "Circassian",
43
  "iso_1_code": null,
44
  "iso_3_code": null,
 
45
  "children": [
46
  {
47
  "name": "Adyghe",
48
  "iso_1_code": null,
49
  "iso_3_code": "ady",
 
50
  "children": [],
51
+ "tokenizers": {},
52
  "node_i": "5",
53
+ "native_tokenizers": [],
54
  "scripts": [
55
  "Cyrl"
56
+ ]
 
57
  },
58
  {
59
  "name": "Kabardian",
60
  "iso_1_code": null,
61
  "iso_3_code": "kbd",
 
62
  "children": [],
63
+ "tokenizers": {},
64
  "node_i": "6",
65
+ "native_tokenizers": [],
66
  "scripts": [
67
  "Cyrl"
68
+ ]
 
69
  }
70
  ],
71
+ "tokenizers": {},
72
  "node_i": "4",
73
+ "native_tokenizers": [],
74
+ "scripts": []
75
  },
76
  {
77
  "name": "Ubyx",
78
  "iso_1_code": null,
79
  "iso_3_code": null,
 
80
  "children": [
81
  {
82
  "name": "Ubykh",
83
  "iso_1_code": null,
84
  "iso_3_code": "uby",
 
85
  "children": [],
86
+ "tokenizers": {},
87
  "node_i": "8",
88
+ "native_tokenizers": [],
89
+ "scripts": []
90
  }
91
  ],
92
+ "tokenizers": {},
93
  "node_i": "7",
94
+ "native_tokenizers": [],
95
+ "scripts": []
96
  }
97
  ],
98
+ "tokenizers": {},
99
  "node_i": "0",
100
+ "native_tokenizers": [],
101
+ "scripts": []
102
  }
data/Afro-Asiatic.json CHANGED
The diff for this file is too large to render. See raw diff
 
data/Algic.json CHANGED
@@ -2,630 +2,630 @@
2
  "name": "Algic",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Algonquian",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Blackfoot",
15
  "iso_1_code": null,
16
  "iso_3_code": "bla",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "558",
 
20
  "scripts": [
21
  "Latn"
22
- ],
23
- "own_tokenizer": false
24
  },
25
  {
26
  "name": "Cheyenne",
27
  "iso_1_code": null,
28
  "iso_3_code": "chy",
29
- "tokenizers": {},
30
  "children": [],
 
31
  "node_i": "559",
32
- "scripts": [],
33
- "own_tokenizer": false
34
  },
35
  {
36
  "name": "Menominee",
37
  "iso_1_code": null,
38
  "iso_3_code": "mez",
39
- "tokenizers": {},
40
  "children": [],
 
41
  "node_i": "560",
42
- "scripts": [],
43
- "own_tokenizer": false
44
  },
45
  {
46
  "name": "Miami",
47
  "iso_1_code": null,
48
  "iso_3_code": "mia",
49
- "tokenizers": {},
50
  "children": [],
 
51
  "node_i": "561",
52
- "scripts": [],
53
- "own_tokenizer": false
54
  },
55
  {
56
  "name": "Nawathinehena",
57
  "iso_1_code": null,
58
  "iso_3_code": "nwa",
59
- "tokenizers": {},
60
  "children": [],
 
61
  "node_i": "562",
62
- "scripts": [],
63
- "own_tokenizer": false
64
  },
65
  {
66
  "name": "Shawnee",
67
  "iso_1_code": null,
68
  "iso_3_code": "sjw",
69
- "tokenizers": {},
70
  "children": [],
 
71
  "node_i": "563",
72
- "scripts": [],
73
- "own_tokenizer": false
74
  },
75
  {
76
  "name": "Arapaho",
77
  "iso_1_code": null,
78
  "iso_3_code": null,
79
- "tokenizers": {},
80
  "children": [
81
  {
82
  "name": "Arapaho",
83
  "iso_1_code": null,
84
  "iso_3_code": "arp",
85
- "tokenizers": {},
86
  "children": [],
 
87
  "node_i": "565",
 
88
  "scripts": [
89
  "Latn"
90
- ],
91
- "own_tokenizer": false
92
  },
93
  {
94
  "name": "Gros Ventre",
95
  "iso_1_code": null,
96
  "iso_3_code": "ats",
97
- "tokenizers": {},
98
  "children": [],
 
99
  "node_i": "566",
100
- "scripts": [],
101
- "own_tokenizer": false
102
  }
103
  ],
 
104
  "node_i": "564",
105
- "scripts": [],
106
- "own_tokenizer": false
107
  },
108
  {
109
  "name": "Cree-Montagnais",
110
  "iso_1_code": null,
111
  "iso_3_code": null,
112
- "tokenizers": {},
113
  "children": [
114
  {
115
  "name": "Atikamekw",
116
  "iso_1_code": null,
117
  "iso_3_code": "atj",
118
- "tokenizers": {},
119
  "children": [],
 
120
  "node_i": "568",
 
121
  "scripts": [
122
  "Latn"
123
- ],
124
- "own_tokenizer": false
125
  },
126
  {
127
  "name": "Cree, Southern East",
128
  "iso_1_code": "cr",
129
  "iso_3_code": "crj",
130
- "tokenizers": {},
131
  "children": [],
 
132
  "node_i": "569",
 
133
  "scripts": [
134
  "Cans"
135
- ],
136
- "own_tokenizer": false
137
  },
138
  {
139
  "name": "Cree, Plains",
140
  "iso_1_code": "cr",
141
  "iso_3_code": "crk",
142
- "tokenizers": {},
143
  "children": [],
 
144
  "node_i": "570",
 
145
  "scripts": [
146
  "Latn",
147
  "Cans"
148
- ],
149
- "own_tokenizer": false
150
  },
151
  {
152
  "name": "Cree, Northern East",
153
  "iso_1_code": "cr",
154
  "iso_3_code": "crl",
155
- "tokenizers": {},
156
  "children": [],
 
157
  "node_i": "571",
 
158
  "scripts": [
159
  "Cans"
160
- ],
161
- "own_tokenizer": false
162
  },
163
  {
164
  "name": "Cree, Moose",
165
  "iso_1_code": "cr",
166
  "iso_3_code": "crm",
167
- "tokenizers": {},
168
  "children": [],
 
169
  "node_i": "572",
 
170
  "scripts": [
171
  "Cans"
172
- ],
173
- "own_tokenizer": false
174
  },
175
  {
176
  "name": "Cree, Swampy",
177
  "iso_1_code": "cr",
178
  "iso_3_code": "csw",
179
- "tokenizers": {},
180
  "children": [],
 
181
  "node_i": "573",
 
182
  "scripts": [
183
  "Latn"
184
- ],
185
- "own_tokenizer": false
186
  },
187
  {
188
  "name": "Cree, Woods",
189
  "iso_1_code": "cr",
190
  "iso_3_code": "cwd",
191
- "tokenizers": {},
192
  "children": [],
 
193
  "node_i": "574",
 
194
  "scripts": [
195
  "Cans"
196
- ],
197
- "own_tokenizer": false
198
  },
199
  {
200
  "name": "Innu",
201
  "iso_1_code": null,
202
  "iso_3_code": "moe",
203
- "tokenizers": {},
204
  "children": [],
 
205
  "node_i": "575",
206
- "scripts": [],
207
- "own_tokenizer": false
208
  },
209
  {
210
  "name": "Naskapi",
211
  "iso_1_code": null,
212
  "iso_3_code": "nsk",
213
- "tokenizers": {},
214
  "children": [],
 
215
  "node_i": "576",
216
- "scripts": [],
217
- "own_tokenizer": false
218
  }
219
  ],
 
220
  "node_i": "567",
221
- "scripts": [],
222
- "own_tokenizer": false
223
  },
224
  {
225
  "name": "Eastern Algonquian",
226
  "iso_1_code": null,
227
  "iso_3_code": null,
228
- "tokenizers": {},
229
  "children": [
230
  {
231
  "name": "Carolina Algonquian",
232
  "iso_1_code": null,
233
  "iso_3_code": "crr",
234
- "tokenizers": {},
235
  "children": [],
 
236
  "node_i": "578",
237
- "scripts": [],
238
- "own_tokenizer": false
239
  },
240
  {
241
  "name": "Etchemin",
242
  "iso_1_code": null,
243
  "iso_3_code": "etc",
244
- "tokenizers": {},
245
  "children": [],
 
246
  "node_i": "579",
247
- "scripts": [],
248
- "own_tokenizer": false
249
  },
250
  {
251
  "name": "Mi\u2019kmaq",
252
  "iso_1_code": null,
253
  "iso_3_code": "mic",
254
- "tokenizers": {},
255
  "children": [],
 
256
  "node_i": "580",
 
257
  "scripts": [
258
  "Latn"
259
- ],
260
- "own_tokenizer": false
261
  },
262
  {
263
  "name": "Powhatan",
264
  "iso_1_code": null,
265
  "iso_3_code": "pim",
266
- "tokenizers": {},
267
  "children": [],
 
268
  "node_i": "581",
269
- "scripts": [],
270
- "own_tokenizer": false
271
  },
272
  {
273
  "name": "Malecite-Passamaquoddy",
274
  "iso_1_code": null,
275
  "iso_3_code": "pqm",
276
- "tokenizers": {},
277
  "children": [],
 
278
  "node_i": "582",
279
- "scripts": [],
280
- "own_tokenizer": false
281
  },
282
  {
283
  "name": "Quiripi",
284
  "iso_1_code": null,
285
  "iso_3_code": "qyp",
286
- "tokenizers": {},
287
  "children": [],
 
288
  "node_i": "583",
289
- "scripts": [],
290
- "own_tokenizer": false
291
  },
292
  {
293
  "name": "Wampanoag",
294
  "iso_1_code": null,
295
  "iso_3_code": "wam",
296
- "tokenizers": {},
297
  "children": [],
 
298
  "node_i": "584",
299
- "scripts": [],
300
- "own_tokenizer": false
301
  },
302
  {
303
  "name": "Loup B",
304
  "iso_1_code": null,
305
  "iso_3_code": "xlb",
306
- "tokenizers": {},
307
  "children": [],
 
308
  "node_i": "585",
309
- "scripts": [],
310
- "own_tokenizer": false
311
  },
312
  {
313
  "name": "Loup A",
314
  "iso_1_code": null,
315
  "iso_3_code": "xlo",
316
- "tokenizers": {},
317
  "children": [],
 
318
  "node_i": "586",
319
- "scripts": [],
320
- "own_tokenizer": false
321
  },
322
  {
323
  "name": "Narragansett",
324
  "iso_1_code": null,
325
  "iso_3_code": "xnt",
326
- "tokenizers": {},
327
  "children": [],
 
328
  "node_i": "587",
329
- "scripts": [],
330
- "own_tokenizer": false
331
  },
332
  {
333
  "name": "Mohegan-Pequot",
334
  "iso_1_code": null,
335
  "iso_3_code": "xpq",
336
- "tokenizers": {},
337
  "children": [],
 
338
  "node_i": "588",
339
- "scripts": [],
340
- "own_tokenizer": false
341
  },
342
  {
343
  "name": "Abenaki",
344
  "iso_1_code": null,
345
  "iso_3_code": null,
346
- "tokenizers": {},
347
  "children": [
348
  {
349
  "name": "Abenaki, Eastern",
350
  "iso_1_code": null,
351
  "iso_3_code": "aaq",
352
- "tokenizers": {},
353
  "children": [],
 
354
  "node_i": "590",
355
- "scripts": [],
356
- "own_tokenizer": false
357
  },
358
  {
359
  "name": "Abenaki, Western",
360
  "iso_1_code": null,
361
  "iso_3_code": "abe",
362
- "tokenizers": {},
363
  "children": [],
 
364
  "node_i": "591",
365
- "scripts": [],
366
- "own_tokenizer": false
367
  }
368
  ],
 
369
  "node_i": "589",
370
- "scripts": [],
371
- "own_tokenizer": false
372
  },
373
  {
374
  "name": "Delaware",
375
  "iso_1_code": null,
376
  "iso_3_code": null,
377
- "tokenizers": {},
378
  "children": [
379
  {
380
  "name": "Mahican",
381
  "iso_1_code": null,
382
  "iso_3_code": "mjy",
383
- "tokenizers": {},
384
  "children": [],
 
385
  "node_i": "593",
386
- "scripts": [],
387
- "own_tokenizer": false
388
  },
389
  {
390
  "name": "Munsee",
391
  "iso_1_code": null,
392
  "iso_3_code": "umu",
393
- "tokenizers": {},
394
  "children": [],
 
395
  "node_i": "594",
396
- "scripts": [],
397
- "own_tokenizer": false
398
  },
399
  {
400
  "name": "Unami",
401
  "iso_1_code": null,
402
  "iso_3_code": "unm",
403
- "tokenizers": {},
404
  "children": [],
 
405
  "node_i": "595",
406
- "scripts": [],
407
- "own_tokenizer": false
408
  }
409
  ],
 
410
  "node_i": "592",
411
- "scripts": [],
412
- "own_tokenizer": false
413
  },
414
  {
415
  "name": "Nanticoke-Conoy",
416
  "iso_1_code": null,
417
  "iso_3_code": null,
418
- "tokenizers": {},
419
  "children": [
420
  {
421
  "name": "Nanticoke",
422
  "iso_1_code": null,
423
  "iso_3_code": "nnt",
424
- "tokenizers": {},
425
  "children": [],
 
426
  "node_i": "597",
427
- "scripts": [],
428
- "own_tokenizer": false
429
  },
430
  {
431
  "name": "Piscataway",
432
  "iso_1_code": null,
433
  "iso_3_code": "psy",
434
- "tokenizers": {},
435
  "children": [],
 
436
  "node_i": "598",
437
- "scripts": [],
438
- "own_tokenizer": false
439
  }
440
  ],
 
441
  "node_i": "596",
442
- "scripts": [],
443
- "own_tokenizer": false
444
  }
445
  ],
 
446
  "node_i": "577",
447
- "scripts": [],
448
- "own_tokenizer": false
449
  },
450
  {
451
  "name": "Fox",
452
  "iso_1_code": null,
453
  "iso_3_code": null,
454
- "tokenizers": {},
455
  "children": [
456
  {
457
  "name": "Kickapoo",
458
  "iso_1_code": null,
459
  "iso_3_code": "kic",
460
- "tokenizers": {},
461
  "children": [],
 
462
  "node_i": "600",
463
- "scripts": [],
464
- "own_tokenizer": false
465
  },
466
  {
467
  "name": "Meskwaki",
468
  "iso_1_code": null,
469
  "iso_3_code": "sac",
470
- "tokenizers": {},
471
  "children": [],
 
472
  "node_i": "601",
473
- "scripts": [],
474
- "own_tokenizer": false
475
  }
476
  ],
 
477
  "node_i": "599",
478
- "scripts": [],
479
- "own_tokenizer": false
480
  },
481
  {
482
  "name": "Ojibwa-Potawatomi",
483
  "iso_1_code": null,
484
  "iso_3_code": null,
485
- "tokenizers": {},
486
  "children": [
487
  {
488
  "name": "Algonquin",
489
  "iso_1_code": null,
490
  "iso_3_code": "alq",
491
- "tokenizers": {},
492
  "children": [],
 
493
  "node_i": "603",
 
494
  "scripts": [
495
  "Latn"
496
- ],
497
- "own_tokenizer": false
498
  },
499
  {
500
  "name": "Chippewa",
501
  "iso_1_code": "oj",
502
  "iso_3_code": "ciw",
503
- "tokenizers": {},
504
  "children": [],
 
505
  "node_i": "604",
506
- "scripts": [],
507
- "own_tokenizer": false
508
  },
509
  {
510
  "name": "Ojibwa, Northwestern",
511
  "iso_1_code": "oj",
512
  "iso_3_code": "ojb",
513
- "tokenizers": {},
514
  "children": [],
 
515
  "node_i": "605",
 
516
  "scripts": [
517
- "Latn",
518
- "Cans"
519
- ],
520
- "own_tokenizer": false
521
  },
522
  {
523
  "name": "Ojibwa, Central",
524
  "iso_1_code": "oj",
525
  "iso_3_code": "ojc",
526
- "tokenizers": {},
527
  "children": [],
 
528
  "node_i": "606",
529
- "scripts": [],
530
- "own_tokenizer": false
531
  },
532
  {
533
  "name": "Ojibwa, Eastern",
534
  "iso_1_code": "oj",
535
  "iso_3_code": "ojg",
536
- "tokenizers": {},
537
  "children": [],
 
538
  "node_i": "607",
539
- "scripts": [],
540
- "own_tokenizer": false
541
  },
542
  {
543
  "name": "Oji-Cree",
544
  "iso_1_code": "oj",
545
  "iso_3_code": "ojs",
546
- "tokenizers": {},
547
  "children": [],
 
548
  "node_i": "608",
549
- "scripts": [],
550
- "own_tokenizer": false
551
  },
552
  {
553
  "name": "Ojibwa, Western",
554
  "iso_1_code": "oj",
555
  "iso_3_code": "ojw",
556
- "tokenizers": {},
557
  "children": [],
 
558
  "node_i": "609",
559
- "scripts": [],
560
- "own_tokenizer": false
561
  },
562
  {
563
  "name": "Ottawa",
564
  "iso_1_code": "oj",
565
  "iso_3_code": "otw",
566
- "tokenizers": {},
567
  "children": [],
 
568
  "node_i": "610",
 
569
  "scripts": [
570
  "Latn"
571
- ],
572
- "own_tokenizer": false
573
  },
574
  {
575
  "name": "Potawatomi",
576
  "iso_1_code": null,
577
  "iso_3_code": "pot",
578
- "tokenizers": {},
579
  "children": [],
 
580
  "node_i": "611",
 
581
  "scripts": [
582
  "Latn"
583
- ],
584
- "own_tokenizer": false
585
  }
586
  ],
 
587
  "node_i": "602",
588
- "scripts": [],
589
- "own_tokenizer": false
590
  }
591
  ],
 
592
  "node_i": "557",
593
- "scripts": [],
594
- "own_tokenizer": false
595
  },
596
  {
597
  "name": "Ritwan",
598
  "iso_1_code": null,
599
  "iso_3_code": null,
600
- "tokenizers": {},
601
  "children": [
602
  {
603
  "name": "Wiyot",
604
  "iso_1_code": null,
605
  "iso_3_code": "wiy",
606
- "tokenizers": {},
607
  "children": [],
 
608
  "node_i": "613",
609
- "scripts": [],
610
- "own_tokenizer": false
611
  },
612
  {
613
  "name": "Yurok",
614
  "iso_1_code": null,
615
  "iso_3_code": "yur",
616
- "tokenizers": {},
617
  "children": [],
 
618
  "node_i": "614",
619
- "scripts": [],
620
- "own_tokenizer": false
621
  }
622
  ],
 
623
  "node_i": "612",
624
- "scripts": [],
625
- "own_tokenizer": false
626
  }
627
  ],
 
628
  "node_i": "556",
629
- "scripts": [],
630
- "own_tokenizer": false
631
  }
 
2
  "name": "Algic",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Algonquian",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Blackfoot",
13
  "iso_1_code": null,
14
  "iso_3_code": "bla",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "558",
18
+ "native_tokenizers": [],
19
  "scripts": [
20
  "Latn"
21
+ ]
 
22
  },
23
  {
24
  "name": "Cheyenne",
25
  "iso_1_code": null,
26
  "iso_3_code": "chy",
 
27
  "children": [],
28
+ "tokenizers": {},
29
  "node_i": "559",
30
+ "native_tokenizers": [],
31
+ "scripts": []
32
  },
33
  {
34
  "name": "Menominee",
35
  "iso_1_code": null,
36
  "iso_3_code": "mez",
 
37
  "children": [],
38
+ "tokenizers": {},
39
  "node_i": "560",
40
+ "native_tokenizers": [],
41
+ "scripts": []
42
  },
43
  {
44
  "name": "Miami",
45
  "iso_1_code": null,
46
  "iso_3_code": "mia",
 
47
  "children": [],
48
+ "tokenizers": {},
49
  "node_i": "561",
50
+ "native_tokenizers": [],
51
+ "scripts": []
52
  },
53
  {
54
  "name": "Nawathinehena",
55
  "iso_1_code": null,
56
  "iso_3_code": "nwa",
 
57
  "children": [],
58
+ "tokenizers": {},
59
  "node_i": "562",
60
+ "native_tokenizers": [],
61
+ "scripts": []
62
  },
63
  {
64
  "name": "Shawnee",
65
  "iso_1_code": null,
66
  "iso_3_code": "sjw",
 
67
  "children": [],
68
+ "tokenizers": {},
69
  "node_i": "563",
70
+ "native_tokenizers": [],
71
+ "scripts": []
72
  },
73
  {
74
  "name": "Arapaho",
75
  "iso_1_code": null,
76
  "iso_3_code": null,
 
77
  "children": [
78
  {
79
  "name": "Arapaho",
80
  "iso_1_code": null,
81
  "iso_3_code": "arp",
 
82
  "children": [],
83
+ "tokenizers": {},
84
  "node_i": "565",
85
+ "native_tokenizers": [],
86
  "scripts": [
87
  "Latn"
88
+ ]
 
89
  },
90
  {
91
  "name": "Gros Ventre",
92
  "iso_1_code": null,
93
  "iso_3_code": "ats",
 
94
  "children": [],
95
+ "tokenizers": {},
96
  "node_i": "566",
97
+ "native_tokenizers": [],
98
+ "scripts": []
99
  }
100
  ],
101
+ "tokenizers": {},
102
  "node_i": "564",
103
+ "native_tokenizers": [],
104
+ "scripts": []
105
  },
106
  {
107
  "name": "Cree-Montagnais",
108
  "iso_1_code": null,
109
  "iso_3_code": null,
 
110
  "children": [
111
  {
112
  "name": "Atikamekw",
113
  "iso_1_code": null,
114
  "iso_3_code": "atj",
 
115
  "children": [],
116
+ "tokenizers": {},
117
  "node_i": "568",
118
+ "native_tokenizers": [],
119
  "scripts": [
120
  "Latn"
121
+ ]
 
122
  },
123
  {
124
  "name": "Cree, Southern East",
125
  "iso_1_code": "cr",
126
  "iso_3_code": "crj",
 
127
  "children": [],
128
+ "tokenizers": {},
129
  "node_i": "569",
130
+ "native_tokenizers": [],
131
  "scripts": [
132
  "Cans"
133
+ ]
 
134
  },
135
  {
136
  "name": "Cree, Plains",
137
  "iso_1_code": "cr",
138
  "iso_3_code": "crk",
 
139
  "children": [],
140
+ "tokenizers": {},
141
  "node_i": "570",
142
+ "native_tokenizers": [],
143
  "scripts": [
144
  "Latn",
145
  "Cans"
146
+ ]
 
147
  },
148
  {
149
  "name": "Cree, Northern East",
150
  "iso_1_code": "cr",
151
  "iso_3_code": "crl",
 
152
  "children": [],
153
+ "tokenizers": {},
154
  "node_i": "571",
155
+ "native_tokenizers": [],
156
  "scripts": [
157
  "Cans"
158
+ ]
 
159
  },
160
  {
161
  "name": "Cree, Moose",
162
  "iso_1_code": "cr",
163
  "iso_3_code": "crm",
 
164
  "children": [],
165
+ "tokenizers": {},
166
  "node_i": "572",
167
+ "native_tokenizers": [],
168
  "scripts": [
169
  "Cans"
170
+ ]
 
171
  },
172
  {
173
  "name": "Cree, Swampy",
174
  "iso_1_code": "cr",
175
  "iso_3_code": "csw",
 
176
  "children": [],
177
+ "tokenizers": {},
178
  "node_i": "573",
179
+ "native_tokenizers": [],
180
  "scripts": [
181
  "Latn"
182
+ ]
 
183
  },
184
  {
185
  "name": "Cree, Woods",
186
  "iso_1_code": "cr",
187
  "iso_3_code": "cwd",
 
188
  "children": [],
189
+ "tokenizers": {},
190
  "node_i": "574",
191
+ "native_tokenizers": [],
192
  "scripts": [
193
  "Cans"
194
+ ]
 
195
  },
196
  {
197
  "name": "Innu",
198
  "iso_1_code": null,
199
  "iso_3_code": "moe",
 
200
  "children": [],
201
+ "tokenizers": {},
202
  "node_i": "575",
203
+ "native_tokenizers": [],
204
+ "scripts": []
205
  },
206
  {
207
  "name": "Naskapi",
208
  "iso_1_code": null,
209
  "iso_3_code": "nsk",
 
210
  "children": [],
211
+ "tokenizers": {},
212
  "node_i": "576",
213
+ "native_tokenizers": [],
214
+ "scripts": []
215
  }
216
  ],
217
+ "tokenizers": {},
218
  "node_i": "567",
219
+ "native_tokenizers": [],
220
+ "scripts": []
221
  },
222
  {
223
  "name": "Eastern Algonquian",
224
  "iso_1_code": null,
225
  "iso_3_code": null,
 
226
  "children": [
227
  {
228
  "name": "Carolina Algonquian",
229
  "iso_1_code": null,
230
  "iso_3_code": "crr",
 
231
  "children": [],
232
+ "tokenizers": {},
233
  "node_i": "578",
234
+ "native_tokenizers": [],
235
+ "scripts": []
236
  },
237
  {
238
  "name": "Etchemin",
239
  "iso_1_code": null,
240
  "iso_3_code": "etc",
 
241
  "children": [],
242
+ "tokenizers": {},
243
  "node_i": "579",
244
+ "native_tokenizers": [],
245
+ "scripts": []
246
  },
247
  {
248
  "name": "Mi\u2019kmaq",
249
  "iso_1_code": null,
250
  "iso_3_code": "mic",
 
251
  "children": [],
252
+ "tokenizers": {},
253
  "node_i": "580",
254
+ "native_tokenizers": [],
255
  "scripts": [
256
  "Latn"
257
+ ]
 
258
  },
259
  {
260
  "name": "Powhatan",
261
  "iso_1_code": null,
262
  "iso_3_code": "pim",
 
263
  "children": [],
264
+ "tokenizers": {},
265
  "node_i": "581",
266
+ "native_tokenizers": [],
267
+ "scripts": []
268
  },
269
  {
270
  "name": "Malecite-Passamaquoddy",
271
  "iso_1_code": null,
272
  "iso_3_code": "pqm",
 
273
  "children": [],
274
+ "tokenizers": {},
275
  "node_i": "582",
276
+ "native_tokenizers": [],
277
+ "scripts": []
278
  },
279
  {
280
  "name": "Quiripi",
281
  "iso_1_code": null,
282
  "iso_3_code": "qyp",
 
283
  "children": [],
284
+ "tokenizers": {},
285
  "node_i": "583",
286
+ "native_tokenizers": [],
287
+ "scripts": []
288
  },
289
  {
290
  "name": "Wampanoag",
291
  "iso_1_code": null,
292
  "iso_3_code": "wam",
 
293
  "children": [],
294
+ "tokenizers": {},
295
  "node_i": "584",
296
+ "native_tokenizers": [],
297
+ "scripts": []
298
  },
299
  {
300
  "name": "Loup B",
301
  "iso_1_code": null,
302
  "iso_3_code": "xlb",
 
303
  "children": [],
304
+ "tokenizers": {},
305
  "node_i": "585",
306
+ "native_tokenizers": [],
307
+ "scripts": []
308
  },
309
  {
310
  "name": "Loup A",
311
  "iso_1_code": null,
312
  "iso_3_code": "xlo",
 
313
  "children": [],
314
+ "tokenizers": {},
315
  "node_i": "586",
316
+ "native_tokenizers": [],
317
+ "scripts": []
318
  },
319
  {
320
  "name": "Narragansett",
321
  "iso_1_code": null,
322
  "iso_3_code": "xnt",
 
323
  "children": [],
324
+ "tokenizers": {},
325
  "node_i": "587",
326
+ "native_tokenizers": [],
327
+ "scripts": []
328
  },
329
  {
330
  "name": "Mohegan-Pequot",
331
  "iso_1_code": null,
332
  "iso_3_code": "xpq",
 
333
  "children": [],
334
+ "tokenizers": {},
335
  "node_i": "588",
336
+ "native_tokenizers": [],
337
+ "scripts": []
338
  },
339
  {
340
  "name": "Abenaki",
341
  "iso_1_code": null,
342
  "iso_3_code": null,
 
343
  "children": [
344
  {
345
  "name": "Abenaki, Eastern",
346
  "iso_1_code": null,
347
  "iso_3_code": "aaq",
 
348
  "children": [],
349
+ "tokenizers": {},
350
  "node_i": "590",
351
+ "native_tokenizers": [],
352
+ "scripts": []
353
  },
354
  {
355
  "name": "Abenaki, Western",
356
  "iso_1_code": null,
357
  "iso_3_code": "abe",
 
358
  "children": [],
359
+ "tokenizers": {},
360
  "node_i": "591",
361
+ "native_tokenizers": [],
362
+ "scripts": []
363
  }
364
  ],
365
+ "tokenizers": {},
366
  "node_i": "589",
367
+ "native_tokenizers": [],
368
+ "scripts": []
369
  },
370
  {
371
  "name": "Delaware",
372
  "iso_1_code": null,
373
  "iso_3_code": null,
 
374
  "children": [
375
  {
376
  "name": "Mahican",
377
  "iso_1_code": null,
378
  "iso_3_code": "mjy",
 
379
  "children": [],
380
+ "tokenizers": {},
381
  "node_i": "593",
382
+ "native_tokenizers": [],
383
+ "scripts": []
384
  },
385
  {
386
  "name": "Munsee",
387
  "iso_1_code": null,
388
  "iso_3_code": "umu",
 
389
  "children": [],
390
+ "tokenizers": {},
391
  "node_i": "594",
392
+ "native_tokenizers": [],
393
+ "scripts": []
394
  },
395
  {
396
  "name": "Unami",
397
  "iso_1_code": null,
398
  "iso_3_code": "unm",
 
399
  "children": [],
400
+ "tokenizers": {},
401
  "node_i": "595",
402
+ "native_tokenizers": [],
403
+ "scripts": []
404
  }
405
  ],
406
+ "tokenizers": {},
407
  "node_i": "592",
408
+ "native_tokenizers": [],
409
+ "scripts": []
410
  },
411
  {
412
  "name": "Nanticoke-Conoy",
413
  "iso_1_code": null,
414
  "iso_3_code": null,
 
415
  "children": [
416
  {
417
  "name": "Nanticoke",
418
  "iso_1_code": null,
419
  "iso_3_code": "nnt",
 
420
  "children": [],
421
+ "tokenizers": {},
422
  "node_i": "597",
423
+ "native_tokenizers": [],
424
+ "scripts": []
425
  },
426
  {
427
  "name": "Piscataway",
428
  "iso_1_code": null,
429
  "iso_3_code": "psy",
 
430
  "children": [],
431
+ "tokenizers": {},
432
  "node_i": "598",
433
+ "native_tokenizers": [],
434
+ "scripts": []
435
  }
436
  ],
437
+ "tokenizers": {},
438
  "node_i": "596",
439
+ "native_tokenizers": [],
440
+ "scripts": []
441
  }
442
  ],
443
+ "tokenizers": {},
444
  "node_i": "577",
445
+ "native_tokenizers": [],
446
+ "scripts": []
447
  },
448
  {
449
  "name": "Fox",
450
  "iso_1_code": null,
451
  "iso_3_code": null,
 
452
  "children": [
453
  {
454
  "name": "Kickapoo",
455
  "iso_1_code": null,
456
  "iso_3_code": "kic",
 
457
  "children": [],
458
+ "tokenizers": {},
459
  "node_i": "600",
460
+ "native_tokenizers": [],
461
+ "scripts": []
462
  },
463
  {
464
  "name": "Meskwaki",
465
  "iso_1_code": null,
466
  "iso_3_code": "sac",
 
467
  "children": [],
468
+ "tokenizers": {},
469
  "node_i": "601",
470
+ "native_tokenizers": [],
471
+ "scripts": []
472
  }
473
  ],
474
+ "tokenizers": {},
475
  "node_i": "599",
476
+ "native_tokenizers": [],
477
+ "scripts": []
478
  },
479
  {
480
  "name": "Ojibwa-Potawatomi",
481
  "iso_1_code": null,
482
  "iso_3_code": null,
 
483
  "children": [
484
  {
485
  "name": "Algonquin",
486
  "iso_1_code": null,
487
  "iso_3_code": "alq",
 
488
  "children": [],
489
+ "tokenizers": {},
490
  "node_i": "603",
491
+ "native_tokenizers": [],
492
  "scripts": [
493
  "Latn"
494
+ ]
 
495
  },
496
  {
497
  "name": "Chippewa",
498
  "iso_1_code": "oj",
499
  "iso_3_code": "ciw",
 
500
  "children": [],
501
+ "tokenizers": {},
502
  "node_i": "604",
503
+ "native_tokenizers": [],
504
+ "scripts": []
505
  },
506
  {
507
  "name": "Ojibwa, Northwestern",
508
  "iso_1_code": "oj",
509
  "iso_3_code": "ojb",
 
510
  "children": [],
511
+ "tokenizers": {},
512
  "node_i": "605",
513
+ "native_tokenizers": [],
514
  "scripts": [
515
+ "Cans",
516
+ "Latn"
517
+ ]
 
518
  },
519
  {
520
  "name": "Ojibwa, Central",
521
  "iso_1_code": "oj",
522
  "iso_3_code": "ojc",
 
523
  "children": [],
524
+ "tokenizers": {},
525
  "node_i": "606",
526
+ "native_tokenizers": [],
527
+ "scripts": []
528
  },
529
  {
530
  "name": "Ojibwa, Eastern",
531
  "iso_1_code": "oj",
532
  "iso_3_code": "ojg",
 
533
  "children": [],
534
+ "tokenizers": {},
535
  "node_i": "607",
536
+ "native_tokenizers": [],
537
+ "scripts": []
538
  },
539
  {
540
  "name": "Oji-Cree",
541
  "iso_1_code": "oj",
542
  "iso_3_code": "ojs",
 
543
  "children": [],
544
+ "tokenizers": {},
545
  "node_i": "608",
546
+ "native_tokenizers": [],
547
+ "scripts": []
548
  },
549
  {
550
  "name": "Ojibwa, Western",
551
  "iso_1_code": "oj",
552
  "iso_3_code": "ojw",
 
553
  "children": [],
554
+ "tokenizers": {},
555
  "node_i": "609",
556
+ "native_tokenizers": [],
557
+ "scripts": []
558
  },
559
  {
560
  "name": "Ottawa",
561
  "iso_1_code": "oj",
562
  "iso_3_code": "otw",
 
563
  "children": [],
564
+ "tokenizers": {},
565
  "node_i": "610",
566
+ "native_tokenizers": [],
567
  "scripts": [
568
  "Latn"
569
+ ]
 
570
  },
571
  {
572
  "name": "Potawatomi",
573
  "iso_1_code": null,
574
  "iso_3_code": "pot",
 
575
  "children": [],
576
+ "tokenizers": {},
577
  "node_i": "611",
578
+ "native_tokenizers": [],
579
  "scripts": [
580
  "Latn"
581
+ ]
 
582
  }
583
  ],
584
+ "tokenizers": {},
585
  "node_i": "602",
586
+ "native_tokenizers": [],
587
+ "scripts": []
588
  }
589
  ],
590
+ "tokenizers": {},
591
  "node_i": "557",
592
+ "native_tokenizers": [],
593
+ "scripts": []
594
  },
595
  {
596
  "name": "Ritwan",
597
  "iso_1_code": null,
598
  "iso_3_code": null,
 
599
  "children": [
600
  {
601
  "name": "Wiyot",
602
  "iso_1_code": null,
603
  "iso_3_code": "wiy",
 
604
  "children": [],
605
+ "tokenizers": {},
606
  "node_i": "613",
607
+ "native_tokenizers": [],
608
+ "scripts": []
609
  },
610
  {
611
  "name": "Yurok",
612
  "iso_1_code": null,
613
  "iso_3_code": "yur",
 
614
  "children": [],
615
+ "tokenizers": {},
616
  "node_i": "614",
617
+ "native_tokenizers": [],
618
+ "scripts": []
619
  }
620
  ],
621
+ "tokenizers": {},
622
  "node_i": "612",
623
+ "native_tokenizers": [],
624
+ "scripts": []
625
  }
626
  ],
627
+ "tokenizers": {},
628
  "node_i": "556",
629
+ "native_tokenizers": [],
630
+ "scripts": []
631
  }
data/Amto-Musan.json CHANGED
@@ -2,30 +2,30 @@
2
  "name": "Amto-Musan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Amto",
9
  "iso_1_code": null,
10
  "iso_3_code": "amt",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "616",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Siawi",
19
  "iso_1_code": null,
20
  "iso_3_code": "mmp",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "617",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  }
27
  ],
 
28
  "node_i": "615",
29
- "scripts": [],
30
- "own_tokenizer": false
31
  }
 
2
  "name": "Amto-Musan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Amto",
8
  "iso_1_code": null,
9
  "iso_3_code": "amt",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "616",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Siawi",
18
  "iso_1_code": null,
19
  "iso_3_code": "mmp",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "617",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  }
26
  ],
27
+ "tokenizers": {},
28
  "node_i": "615",
29
+ "native_tokenizers": [],
30
+ "scripts": []
31
  }
data/Andamanese.json CHANGED
@@ -2,194 +2,194 @@
2
  "name": "Andamanese",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Great Andamanese",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Great Andamanese, Mixed",
15
  "iso_1_code": null,
16
  "iso_3_code": "gac",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "620",
20
- "scripts": [],
21
- "own_tokenizer": false
22
  },
23
  {
24
  "name": "Central",
25
  "iso_1_code": null,
26
  "iso_3_code": null,
27
- "tokenizers": {},
28
  "children": [
29
  {
30
  "name": "Aka-Bea",
31
  "iso_1_code": null,
32
  "iso_3_code": "abj",
33
- "tokenizers": {},
34
  "children": [],
 
35
  "node_i": "622",
36
- "scripts": [],
37
- "own_tokenizer": false
38
  },
39
  {
40
  "name": "Akar-Bale",
41
  "iso_1_code": null,
42
  "iso_3_code": "acl",
43
- "tokenizers": {},
44
  "children": [],
 
45
  "node_i": "623",
46
- "scripts": [],
47
- "own_tokenizer": false
48
  },
49
  {
50
  "name": "Aka-Kede",
51
  "iso_1_code": null,
52
  "iso_3_code": "akx",
53
- "tokenizers": {},
54
  "children": [],
 
55
  "node_i": "624",
56
- "scripts": [],
57
- "own_tokenizer": false
58
  },
59
  {
60
  "name": "Aka-Kol",
61
  "iso_1_code": null,
62
  "iso_3_code": "aky",
63
- "tokenizers": {},
64
  "children": [],
 
65
  "node_i": "625",
66
- "scripts": [],
67
- "own_tokenizer": false
68
  },
69
  {
70
  "name": "A-Pucikwar",
71
  "iso_1_code": null,
72
  "iso_3_code": "apq",
73
- "tokenizers": {},
74
  "children": [],
 
75
  "node_i": "626",
76
- "scripts": [],
77
- "own_tokenizer": false
78
  },
79
  {
80
  "name": "Oko-Juwoi",
81
  "iso_1_code": null,
82
  "iso_3_code": "okj",
83
- "tokenizers": {},
84
  "children": [],
 
85
  "node_i": "627",
86
- "scripts": [],
87
- "own_tokenizer": false
88
  }
89
  ],
 
90
  "node_i": "621",
91
- "scripts": [],
92
- "own_tokenizer": false
93
  },
94
  {
95
  "name": "Northern",
96
  "iso_1_code": null,
97
  "iso_3_code": null,
98
- "tokenizers": {},
99
  "children": [
100
  {
101
  "name": "Aka-Cari",
102
  "iso_1_code": null,
103
  "iso_3_code": "aci",
104
- "tokenizers": {},
105
  "children": [],
 
106
  "node_i": "629",
107
- "scripts": [],
108
- "own_tokenizer": false
109
  },
110
  {
111
  "name": "Aka-Kora",
112
  "iso_1_code": null,
113
  "iso_3_code": "ack",
114
- "tokenizers": {},
115
  "children": [],
 
116
  "node_i": "630",
117
- "scripts": [],
118
- "own_tokenizer": false
119
  },
120
  {
121
  "name": "Aka-Jeru",
122
  "iso_1_code": null,
123
  "iso_3_code": "akj",
124
- "tokenizers": {},
125
  "children": [],
 
126
  "node_i": "631",
127
- "scripts": [],
128
- "own_tokenizer": false
129
  },
130
  {
131
  "name": "Aka-Bo",
132
  "iso_1_code": null,
133
  "iso_3_code": "akm",
134
- "tokenizers": {},
135
  "children": [],
 
136
  "node_i": "632",
137
- "scripts": [],
138
- "own_tokenizer": false
139
  }
140
  ],
 
141
  "node_i": "628",
142
- "scripts": [],
143
- "own_tokenizer": false
144
  }
145
  ],
 
146
  "node_i": "619",
147
- "scripts": [],
148
- "own_tokenizer": false
149
  },
150
  {
151
  "name": "South Andamanese",
152
  "iso_1_code": null,
153
  "iso_3_code": null,
154
- "tokenizers": {},
155
  "children": [
156
  {
157
  "name": "Jarawa",
158
  "iso_1_code": null,
159
  "iso_3_code": "anq",
160
- "tokenizers": {},
161
  "children": [],
 
162
  "node_i": "634",
163
- "scripts": [],
164
- "own_tokenizer": false
165
  },
166
  {
167
  "name": "\u00d6\u00f1ge",
168
  "iso_1_code": null,
169
  "iso_3_code": "oon",
170
- "tokenizers": {},
171
  "children": [],
 
172
  "node_i": "635",
173
- "scripts": [],
174
- "own_tokenizer": false
175
  },
176
  {
177
  "name": "Sentinel",
178
  "iso_1_code": null,
179
  "iso_3_code": "std",
180
- "tokenizers": {},
181
  "children": [],
 
182
  "node_i": "636",
183
- "scripts": [],
184
- "own_tokenizer": false
185
  }
186
  ],
 
187
  "node_i": "633",
188
- "scripts": [],
189
- "own_tokenizer": false
190
  }
191
  ],
 
192
  "node_i": "618",
193
- "scripts": [],
194
- "own_tokenizer": false
195
  }
 
2
  "name": "Andamanese",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Great Andamanese",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Great Andamanese, Mixed",
13
  "iso_1_code": null,
14
  "iso_3_code": "gac",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "620",
18
+ "native_tokenizers": [],
19
+ "scripts": []
20
  },
21
  {
22
  "name": "Central",
23
  "iso_1_code": null,
24
  "iso_3_code": null,
 
25
  "children": [
26
  {
27
  "name": "Aka-Bea",
28
  "iso_1_code": null,
29
  "iso_3_code": "abj",
 
30
  "children": [],
31
+ "tokenizers": {},
32
  "node_i": "622",
33
+ "native_tokenizers": [],
34
+ "scripts": []
35
  },
36
  {
37
  "name": "Akar-Bale",
38
  "iso_1_code": null,
39
  "iso_3_code": "acl",
 
40
  "children": [],
41
+ "tokenizers": {},
42
  "node_i": "623",
43
+ "native_tokenizers": [],
44
+ "scripts": []
45
  },
46
  {
47
  "name": "Aka-Kede",
48
  "iso_1_code": null,
49
  "iso_3_code": "akx",
 
50
  "children": [],
51
+ "tokenizers": {},
52
  "node_i": "624",
53
+ "native_tokenizers": [],
54
+ "scripts": []
55
  },
56
  {
57
  "name": "Aka-Kol",
58
  "iso_1_code": null,
59
  "iso_3_code": "aky",
 
60
  "children": [],
61
+ "tokenizers": {},
62
  "node_i": "625",
63
+ "native_tokenizers": [],
64
+ "scripts": []
65
  },
66
  {
67
  "name": "A-Pucikwar",
68
  "iso_1_code": null,
69
  "iso_3_code": "apq",
 
70
  "children": [],
71
+ "tokenizers": {},
72
  "node_i": "626",
73
+ "native_tokenizers": [],
74
+ "scripts": []
75
  },
76
  {
77
  "name": "Oko-Juwoi",
78
  "iso_1_code": null,
79
  "iso_3_code": "okj",
 
80
  "children": [],
81
+ "tokenizers": {},
82
  "node_i": "627",
83
+ "native_tokenizers": [],
84
+ "scripts": []
85
  }
86
  ],
87
+ "tokenizers": {},
88
  "node_i": "621",
89
+ "native_tokenizers": [],
90
+ "scripts": []
91
  },
92
  {
93
  "name": "Northern",
94
  "iso_1_code": null,
95
  "iso_3_code": null,
 
96
  "children": [
97
  {
98
  "name": "Aka-Cari",
99
  "iso_1_code": null,
100
  "iso_3_code": "aci",
 
101
  "children": [],
102
+ "tokenizers": {},
103
  "node_i": "629",
104
+ "native_tokenizers": [],
105
+ "scripts": []
106
  },
107
  {
108
  "name": "Aka-Kora",
109
  "iso_1_code": null,
110
  "iso_3_code": "ack",
 
111
  "children": [],
112
+ "tokenizers": {},
113
  "node_i": "630",
114
+ "native_tokenizers": [],
115
+ "scripts": []
116
  },
117
  {
118
  "name": "Aka-Jeru",
119
  "iso_1_code": null,
120
  "iso_3_code": "akj",
 
121
  "children": [],
122
+ "tokenizers": {},
123
  "node_i": "631",
124
+ "native_tokenizers": [],
125
+ "scripts": []
126
  },
127
  {
128
  "name": "Aka-Bo",
129
  "iso_1_code": null,
130
  "iso_3_code": "akm",
 
131
  "children": [],
132
+ "tokenizers": {},
133
  "node_i": "632",
134
+ "native_tokenizers": [],
135
+ "scripts": []
136
  }
137
  ],
138
+ "tokenizers": {},
139
  "node_i": "628",
140
+ "native_tokenizers": [],
141
+ "scripts": []
142
  }
143
  ],
144
+ "tokenizers": {},
145
  "node_i": "619",
146
+ "native_tokenizers": [],
147
+ "scripts": []
148
  },
149
  {
150
  "name": "South Andamanese",
151
  "iso_1_code": null,
152
  "iso_3_code": null,
 
153
  "children": [
154
  {
155
  "name": "Jarawa",
156
  "iso_1_code": null,
157
  "iso_3_code": "anq",
 
158
  "children": [],
159
+ "tokenizers": {},
160
  "node_i": "634",
161
+ "native_tokenizers": [],
162
+ "scripts": []
163
  },
164
  {
165
  "name": "\u00d6\u00f1ge",
166
  "iso_1_code": null,
167
  "iso_3_code": "oon",
 
168
  "children": [],
169
+ "tokenizers": {},
170
  "node_i": "635",
171
+ "native_tokenizers": [],
172
+ "scripts": []
173
  },
174
  {
175
  "name": "Sentinel",
176
  "iso_1_code": null,
177
  "iso_3_code": "std",
 
178
  "children": [],
179
+ "tokenizers": {},
180
  "node_i": "636",
181
+ "native_tokenizers": [],
182
+ "scripts": []
183
  }
184
  ],
185
+ "tokenizers": {},
186
  "node_i": "633",
187
+ "native_tokenizers": [],
188
+ "scripts": []
189
  }
190
  ],
191
+ "tokenizers": {},
192
  "node_i": "618",
193
+ "native_tokenizers": [],
194
+ "scripts": []
195
  }
data/Arafundi.json CHANGED
@@ -2,40 +2,40 @@
2
  "name": "Arafundi",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Andai",
9
  "iso_1_code": null,
10
  "iso_3_code": "afd",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "638",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Nanubae",
19
  "iso_1_code": null,
20
  "iso_3_code": "afk",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "639",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  },
27
  {
28
  "name": "Tapei",
29
  "iso_1_code": null,
30
  "iso_3_code": "afp",
31
- "tokenizers": {},
32
  "children": [],
 
33
  "node_i": "640",
34
- "scripts": [],
35
- "own_tokenizer": false
36
  }
37
  ],
 
38
  "node_i": "637",
39
- "scripts": [],
40
- "own_tokenizer": false
41
  }
 
2
  "name": "Arafundi",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Andai",
8
  "iso_1_code": null,
9
  "iso_3_code": "afd",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "638",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Nanubae",
18
  "iso_1_code": null,
19
  "iso_3_code": "afk",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "639",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  },
26
  {
27
  "name": "Tapei",
28
  "iso_1_code": null,
29
  "iso_3_code": "afp",
 
30
  "children": [],
31
+ "tokenizers": {},
32
  "node_i": "640",
33
+ "native_tokenizers": [],
34
+ "scripts": []
35
  }
36
  ],
37
+ "tokenizers": {},
38
  "node_i": "637",
39
+ "native_tokenizers": [],
40
+ "scripts": []
41
  }
data/Arai (Left May).json CHANGED
@@ -2,72 +2,72 @@
2
  "name": "Arai (Left May)",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Sawiyanu",
9
  "iso_1_code": null,
10
  "iso_3_code": "amm",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "642",
 
14
  "scripts": [
15
  "Latn"
16
- ],
17
- "own_tokenizer": false
18
  },
19
  {
20
  "name": "Bo",
21
  "iso_1_code": null,
22
  "iso_3_code": "bpw",
23
- "tokenizers": {},
24
  "children": [],
 
25
  "node_i": "643",
26
- "scripts": [],
27
- "own_tokenizer": false
28
  },
29
  {
30
  "name": "Yawuno Teneyo",
31
  "iso_1_code": null,
32
  "iso_3_code": "itr",
33
- "tokenizers": {},
34
  "children": [],
 
35
  "node_i": "644",
36
- "scripts": [],
37
- "own_tokenizer": false
38
  },
39
  {
40
  "name": "Nakwi",
41
  "iso_1_code": null,
42
  "iso_3_code": "nax",
43
- "tokenizers": {},
44
  "children": [],
 
45
  "node_i": "645",
46
- "scripts": [],
47
- "own_tokenizer": false
48
  },
49
  {
50
  "name": "Nimo",
51
  "iso_1_code": null,
52
  "iso_3_code": "niw",
53
- "tokenizers": {},
54
  "children": [],
 
55
  "node_i": "646",
56
- "scripts": [],
57
- "own_tokenizer": false
58
  },
59
  {
60
  "name": "Owiniga",
61
  "iso_1_code": null,
62
  "iso_3_code": "owi",
63
- "tokenizers": {},
64
  "children": [],
 
65
  "node_i": "647",
66
- "scripts": [],
67
- "own_tokenizer": false
68
  }
69
  ],
 
70
  "node_i": "641",
71
- "scripts": [],
72
- "own_tokenizer": false
73
  }
 
2
  "name": "Arai (Left May)",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Sawiyanu",
8
  "iso_1_code": null,
9
  "iso_3_code": "amm",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "642",
13
+ "native_tokenizers": [],
14
  "scripts": [
15
  "Latn"
16
+ ]
 
17
  },
18
  {
19
  "name": "Bo",
20
  "iso_1_code": null,
21
  "iso_3_code": "bpw",
 
22
  "children": [],
23
+ "tokenizers": {},
24
  "node_i": "643",
25
+ "native_tokenizers": [],
26
+ "scripts": []
27
  },
28
  {
29
  "name": "Yawuno Teneyo",
30
  "iso_1_code": null,
31
  "iso_3_code": "itr",
 
32
  "children": [],
33
+ "tokenizers": {},
34
  "node_i": "644",
35
+ "native_tokenizers": [],
36
+ "scripts": []
37
  },
38
  {
39
  "name": "Nakwi",
40
  "iso_1_code": null,
41
  "iso_3_code": "nax",
 
42
  "children": [],
43
+ "tokenizers": {},
44
  "node_i": "645",
45
+ "native_tokenizers": [],
46
+ "scripts": []
47
  },
48
  {
49
  "name": "Nimo",
50
  "iso_1_code": null,
51
  "iso_3_code": "niw",
 
52
  "children": [],
53
+ "tokenizers": {},
54
  "node_i": "646",
55
+ "native_tokenizers": [],
56
+ "scripts": []
57
  },
58
  {
59
  "name": "Owiniga",
60
  "iso_1_code": null,
61
  "iso_3_code": "owi",
 
62
  "children": [],
63
+ "tokenizers": {},
64
  "node_i": "647",
65
+ "native_tokenizers": [],
66
+ "scripts": []
67
  }
68
  ],
69
+ "tokenizers": {},
70
  "node_i": "641",
71
+ "native_tokenizers": [],
72
+ "scripts": []
73
  }
data/Arauan.json CHANGED
@@ -2,87 +2,87 @@
2
  "name": "Arauan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Aru\u00e1",
9
  "iso_1_code": null,
10
  "iso_3_code": "aru",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "649",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Kulina",
19
  "iso_1_code": null,
20
  "iso_3_code": "cul",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "650",
 
24
  "scripts": [
25
  "Latn"
26
- ],
27
- "own_tokenizer": false
28
  },
29
  {
30
  "name": "Den\u00ed",
31
  "iso_1_code": null,
32
  "iso_3_code": "dny",
33
- "tokenizers": {},
34
  "children": [],
 
35
  "node_i": "651",
36
- "scripts": [],
37
- "own_tokenizer": false
38
  },
39
  {
40
  "name": "Paumar\u00ed",
41
  "iso_1_code": null,
42
  "iso_3_code": "pad",
43
- "tokenizers": {},
44
  "children": [],
 
45
  "node_i": "652",
 
46
  "scripts": [
47
  "Latn"
48
- ],
49
- "own_tokenizer": false
50
  },
51
  {
52
  "name": "Suruah\u00e1",
53
  "iso_1_code": null,
54
  "iso_3_code": "swx",
55
- "tokenizers": {},
56
  "children": [],
 
57
  "node_i": "653",
58
- "scripts": [],
59
- "own_tokenizer": false
60
  },
61
  {
62
  "name": "Jamamadi",
63
  "iso_1_code": null,
64
  "iso_3_code": null,
65
- "tokenizers": {},
66
  "children": [
67
  {
68
  "name": "Jamamad\u00ed",
69
  "iso_1_code": null,
70
  "iso_3_code": "jaa",
71
- "tokenizers": {},
72
  "children": [],
 
73
  "node_i": "655",
 
74
  "scripts": [
75
  "Latn"
76
- ],
77
- "own_tokenizer": false
78
  }
79
  ],
 
80
  "node_i": "654",
81
- "scripts": [],
82
- "own_tokenizer": false
83
  }
84
  ],
 
85
  "node_i": "648",
86
- "scripts": [],
87
- "own_tokenizer": false
88
  }
 
2
  "name": "Arauan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Aru\u00e1",
8
  "iso_1_code": null,
9
  "iso_3_code": "aru",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "649",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Kulina",
18
  "iso_1_code": null,
19
  "iso_3_code": "cul",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "650",
23
+ "native_tokenizers": [],
24
  "scripts": [
25
  "Latn"
26
+ ]
 
27
  },
28
  {
29
  "name": "Den\u00ed",
30
  "iso_1_code": null,
31
  "iso_3_code": "dny",
 
32
  "children": [],
33
+ "tokenizers": {},
34
  "node_i": "651",
35
+ "native_tokenizers": [],
36
+ "scripts": []
37
  },
38
  {
39
  "name": "Paumar\u00ed",
40
  "iso_1_code": null,
41
  "iso_3_code": "pad",
 
42
  "children": [],
43
+ "tokenizers": {},
44
  "node_i": "652",
45
+ "native_tokenizers": [],
46
  "scripts": [
47
  "Latn"
48
+ ]
 
49
  },
50
  {
51
  "name": "Suruah\u00e1",
52
  "iso_1_code": null,
53
  "iso_3_code": "swx",
 
54
  "children": [],
55
+ "tokenizers": {},
56
  "node_i": "653",
57
+ "native_tokenizers": [],
58
+ "scripts": []
59
  },
60
  {
61
  "name": "Jamamadi",
62
  "iso_1_code": null,
63
  "iso_3_code": null,
 
64
  "children": [
65
  {
66
  "name": "Jamamad\u00ed",
67
  "iso_1_code": null,
68
  "iso_3_code": "jaa",
 
69
  "children": [],
70
+ "tokenizers": {},
71
  "node_i": "655",
72
+ "native_tokenizers": [],
73
  "scripts": [
74
  "Latn"
75
+ ]
 
76
  }
77
  ],
78
+ "tokenizers": {},
79
  "node_i": "654",
80
+ "native_tokenizers": [],
81
+ "scripts": []
82
  }
83
  ],
84
+ "tokenizers": {},
85
  "node_i": "648",
86
+ "native_tokenizers": [],
87
+ "scripts": []
88
  }
data/Australian.json CHANGED
The diff for this file is too large to render. See raw diff
 
data/Austro-Asiatic.json CHANGED
The diff for this file is too large to render. See raw diff
 
data/Austronesian.json CHANGED
The diff for this file is too large to render. See raw diff
 
data/Aymaran.json CHANGED
@@ -2,64 +2,64 @@
2
  "name": "Aymaran",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Aymara",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Aymara, Southern",
15
  "iso_1_code": "ay",
16
  "iso_3_code": "ayc",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "3257",
20
- "scripts": [],
21
- "own_tokenizer": false
22
  },
23
  {
24
  "name": "Aymara, Central",
25
  "iso_1_code": "ay",
26
  "iso_3_code": "ayr",
27
- "tokenizers": {},
28
  "children": [],
 
29
  "node_i": "3258",
 
30
  "scripts": [
31
  "Latn"
32
- ],
33
- "own_tokenizer": false
34
  }
35
  ],
 
36
  "node_i": "3256",
37
- "scripts": [],
38
- "own_tokenizer": false
39
  },
40
  {
41
  "name": "Tupe",
42
  "iso_1_code": null,
43
  "iso_3_code": null,
44
- "tokenizers": {},
45
  "children": [
46
  {
47
  "name": "Jaqaru",
48
  "iso_1_code": null,
49
  "iso_3_code": "jqr",
50
- "tokenizers": {},
51
  "children": [],
 
52
  "node_i": "3260",
53
- "scripts": [],
54
- "own_tokenizer": false
55
  }
56
  ],
 
57
  "node_i": "3259",
58
- "scripts": [],
59
- "own_tokenizer": false
60
  }
61
  ],
 
62
  "node_i": "3255",
63
- "scripts": [],
64
- "own_tokenizer": false
65
  }
 
2
  "name": "Aymaran",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Aymara",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Aymara, Southern",
13
  "iso_1_code": "ay",
14
  "iso_3_code": "ayc",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3257",
18
+ "native_tokenizers": [],
19
+ "scripts": []
20
  },
21
  {
22
  "name": "Aymara, Central",
23
  "iso_1_code": "ay",
24
  "iso_3_code": "ayr",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3258",
28
+ "native_tokenizers": [],
29
  "scripts": [
30
  "Latn"
31
+ ]
 
32
  }
33
  ],
34
+ "tokenizers": {},
35
  "node_i": "3256",
36
+ "native_tokenizers": [],
37
+ "scripts": []
38
  },
39
  {
40
  "name": "Tupe",
41
  "iso_1_code": null,
42
  "iso_3_code": null,
 
43
  "children": [
44
  {
45
  "name": "Jaqaru",
46
  "iso_1_code": null,
47
  "iso_3_code": "jqr",
 
48
  "children": [],
49
+ "tokenizers": {},
50
  "node_i": "3260",
51
+ "native_tokenizers": [],
52
+ "scripts": []
53
  }
54
  ],
55
+ "tokenizers": {},
56
  "node_i": "3259",
57
+ "native_tokenizers": [],
58
+ "scripts": []
59
  }
60
  ],
61
+ "tokenizers": {},
62
  "node_i": "3255",
63
+ "native_tokenizers": [],
64
+ "scripts": []
65
  }
data/Barbacoan.json CHANGED
@@ -2,68 +2,68 @@
2
  "name": "Barbacoan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Northern",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Awa-Cuaiquer",
15
  "iso_1_code": null,
16
  "iso_3_code": "kwi",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "3263",
 
20
  "scripts": [
21
  "Latn"
22
- ],
23
- "own_tokenizer": false
24
  }
25
  ],
 
26
  "node_i": "3262",
27
- "scripts": [],
28
- "own_tokenizer": false
29
  },
30
  {
31
  "name": "Southern",
32
  "iso_1_code": null,
33
  "iso_3_code": null,
34
- "tokenizers": {},
35
  "children": [
36
  {
37
  "name": "Chachi",
38
  "iso_1_code": null,
39
  "iso_3_code": "cbi",
40
- "tokenizers": {},
41
  "children": [],
 
42
  "node_i": "3265",
 
43
  "scripts": [
44
  "Latn"
45
- ],
46
- "own_tokenizer": false
47
  },
48
  {
49
  "name": "Tsafiki",
50
  "iso_1_code": null,
51
  "iso_3_code": "cof",
52
- "tokenizers": {},
53
  "children": [],
 
54
  "node_i": "3266",
 
55
  "scripts": [
56
  "Latn"
57
- ],
58
- "own_tokenizer": false
59
  }
60
  ],
 
61
  "node_i": "3264",
62
- "scripts": [],
63
- "own_tokenizer": false
64
  }
65
  ],
 
66
  "node_i": "3261",
67
- "scripts": [],
68
- "own_tokenizer": false
69
  }
 
2
  "name": "Barbacoan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Northern",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Awa-Cuaiquer",
13
  "iso_1_code": null,
14
  "iso_3_code": "kwi",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3263",
18
+ "native_tokenizers": [],
19
  "scripts": [
20
  "Latn"
21
+ ]
 
22
  }
23
  ],
24
+ "tokenizers": {},
25
  "node_i": "3262",
26
+ "native_tokenizers": [],
27
+ "scripts": []
28
  },
29
  {
30
  "name": "Southern",
31
  "iso_1_code": null,
32
  "iso_3_code": null,
 
33
  "children": [
34
  {
35
  "name": "Chachi",
36
  "iso_1_code": null,
37
  "iso_3_code": "cbi",
 
38
  "children": [],
39
+ "tokenizers": {},
40
  "node_i": "3265",
41
+ "native_tokenizers": [],
42
  "scripts": [
43
  "Latn"
44
+ ]
 
45
  },
46
  {
47
  "name": "Tsafiki",
48
  "iso_1_code": null,
49
  "iso_3_code": "cof",
 
50
  "children": [],
51
+ "tokenizers": {},
52
  "node_i": "3266",
53
+ "native_tokenizers": [],
54
  "scripts": [
55
  "Latn"
56
+ ]
 
57
  }
58
  ],
59
+ "tokenizers": {},
60
  "node_i": "3264",
61
+ "native_tokenizers": [],
62
+ "scripts": []
63
  }
64
  ],
65
+ "tokenizers": {},
66
  "node_i": "3261",
67
+ "native_tokenizers": [],
68
+ "scripts": []
69
  }
data/Bayono-Awbono.json CHANGED
@@ -2,30 +2,30 @@
2
  "name": "Bayono-Awbono",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Awbono",
9
  "iso_1_code": null,
10
  "iso_3_code": "awh",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3268",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Bayono",
19
  "iso_1_code": null,
20
  "iso_3_code": "byl",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "3269",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  }
27
  ],
 
28
  "node_i": "3267",
29
- "scripts": [],
30
- "own_tokenizer": false
31
  }
 
2
  "name": "Bayono-Awbono",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Awbono",
8
  "iso_1_code": null,
9
  "iso_3_code": "awh",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3268",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Bayono",
18
  "iso_1_code": null,
19
  "iso_3_code": "byl",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3269",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  }
26
  ],
27
+ "tokenizers": {},
28
  "node_i": "3267",
29
+ "native_tokenizers": [],
30
+ "scripts": []
31
  }
data/Border.json CHANGED
@@ -2,197 +2,197 @@
2
  "name": "Border",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Bewani",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Ainbai",
15
  "iso_1_code": null,
16
  "iso_3_code": "aic",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "3272",
20
- "scripts": [],
21
- "own_tokenizer": false
22
  },
23
  {
24
  "name": "Kilmeri",
25
  "iso_1_code": null,
26
  "iso_3_code": "kih",
27
- "tokenizers": {},
28
  "children": [],
 
29
  "node_i": "3273",
30
- "scripts": [],
31
- "own_tokenizer": false
32
  },
33
  {
34
  "name": "Ningera",
35
  "iso_1_code": null,
36
  "iso_3_code": "nby",
37
- "tokenizers": {},
38
  "children": [],
 
39
  "node_i": "3274",
40
- "scripts": [],
41
- "own_tokenizer": false
42
  },
43
  {
44
  "name": "Pagi",
45
  "iso_1_code": null,
46
  "iso_3_code": "pgi",
47
- "tokenizers": {},
48
  "children": [],
 
49
  "node_i": "3275",
50
- "scripts": [],
51
- "own_tokenizer": false
52
  },
53
  {
54
  "name": "Umeda",
55
  "iso_1_code": null,
56
  "iso_3_code": "upi",
57
- "tokenizers": {},
58
  "children": [],
 
59
  "node_i": "3276",
60
- "scripts": [],
61
- "own_tokenizer": false
62
  }
63
  ],
 
64
  "node_i": "3271",
65
- "scripts": [],
66
- "own_tokenizer": false
67
  },
68
  {
69
  "name": "Taikat",
70
  "iso_1_code": null,
71
  "iso_3_code": null,
72
- "tokenizers": {},
73
  "children": [
74
  {
75
  "name": "Taikat",
76
  "iso_1_code": null,
77
  "iso_3_code": "aos",
78
- "tokenizers": {},
79
  "children": [],
 
80
  "node_i": "3278",
81
- "scripts": [],
82
- "own_tokenizer": false
83
  },
84
  {
85
  "name": "Awyi",
86
  "iso_1_code": null,
87
  "iso_3_code": "auw",
88
- "tokenizers": {},
89
  "children": [],
 
90
  "node_i": "3279",
91
- "scripts": [],
92
- "own_tokenizer": false
93
  }
94
  ],
 
95
  "node_i": "3277",
96
- "scripts": [],
97
- "own_tokenizer": false
98
  },
99
  {
100
  "name": "Waris",
101
  "iso_1_code": null,
102
  "iso_3_code": null,
103
- "tokenizers": {},
104
  "children": [
105
  {
106
  "name": "Amanab",
107
  "iso_1_code": null,
108
  "iso_3_code": "amn",
109
- "tokenizers": {},
110
  "children": [],
 
111
  "node_i": "3281",
 
112
  "scripts": [
113
  "Latn"
114
- ],
115
- "own_tokenizer": false
116
  },
117
  {
118
  "name": "Daonda",
119
  "iso_1_code": null,
120
  "iso_3_code": "dnd",
121
- "tokenizers": {},
122
  "children": [],
 
123
  "node_i": "3282",
124
- "scripts": [],
125
- "own_tokenizer": false
126
  },
127
  {
128
  "name": "Imonda",
129
  "iso_1_code": null,
130
  "iso_3_code": "imn",
131
- "tokenizers": {},
132
  "children": [],
 
133
  "node_i": "3283",
134
- "scripts": [],
135
- "own_tokenizer": false
136
  },
137
  {
138
  "name": "Manem",
139
  "iso_1_code": null,
140
  "iso_3_code": "jet",
141
- "tokenizers": {},
142
  "children": [],
 
143
  "node_i": "3284",
144
- "scripts": [],
145
- "own_tokenizer": false
146
  },
147
  {
148
  "name": "Auwe",
149
  "iso_1_code": null,
150
  "iso_3_code": "smf",
151
- "tokenizers": {},
152
  "children": [],
 
153
  "node_i": "3285",
154
- "scripts": [],
155
- "own_tokenizer": false
156
  },
157
  {
158
  "name": "Viid",
159
  "iso_1_code": null,
160
  "iso_3_code": "snu",
161
- "tokenizers": {},
162
  "children": [],
 
163
  "node_i": "3286",
164
- "scripts": [],
165
- "own_tokenizer": false
166
  },
167
  {
168
  "name": "Sowanda",
169
  "iso_1_code": null,
170
  "iso_3_code": "sow",
171
- "tokenizers": {},
172
  "children": [],
 
173
  "node_i": "3287",
174
- "scripts": [],
175
- "own_tokenizer": false
176
  },
177
  {
178
  "name": "Waris",
179
  "iso_1_code": null,
180
  "iso_3_code": "wrs",
181
- "tokenizers": {},
182
  "children": [],
 
183
  "node_i": "3288",
 
184
  "scripts": [
185
  "Latn"
186
- ],
187
- "own_tokenizer": false
188
  }
189
  ],
 
190
  "node_i": "3280",
191
- "scripts": [],
192
- "own_tokenizer": false
193
  }
194
  ],
 
195
  "node_i": "3270",
196
- "scripts": [],
197
- "own_tokenizer": false
198
  }
 
2
  "name": "Border",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Bewani",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Ainbai",
13
  "iso_1_code": null,
14
  "iso_3_code": "aic",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3272",
18
+ "native_tokenizers": [],
19
+ "scripts": []
20
  },
21
  {
22
  "name": "Kilmeri",
23
  "iso_1_code": null,
24
  "iso_3_code": "kih",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3273",
28
+ "native_tokenizers": [],
29
+ "scripts": []
30
  },
31
  {
32
  "name": "Ningera",
33
  "iso_1_code": null,
34
  "iso_3_code": "nby",
 
35
  "children": [],
36
+ "tokenizers": {},
37
  "node_i": "3274",
38
+ "native_tokenizers": [],
39
+ "scripts": []
40
  },
41
  {
42
  "name": "Pagi",
43
  "iso_1_code": null,
44
  "iso_3_code": "pgi",
 
45
  "children": [],
46
+ "tokenizers": {},
47
  "node_i": "3275",
48
+ "native_tokenizers": [],
49
+ "scripts": []
50
  },
51
  {
52
  "name": "Umeda",
53
  "iso_1_code": null,
54
  "iso_3_code": "upi",
 
55
  "children": [],
56
+ "tokenizers": {},
57
  "node_i": "3276",
58
+ "native_tokenizers": [],
59
+ "scripts": []
60
  }
61
  ],
62
+ "tokenizers": {},
63
  "node_i": "3271",
64
+ "native_tokenizers": [],
65
+ "scripts": []
66
  },
67
  {
68
  "name": "Taikat",
69
  "iso_1_code": null,
70
  "iso_3_code": null,
 
71
  "children": [
72
  {
73
  "name": "Taikat",
74
  "iso_1_code": null,
75
  "iso_3_code": "aos",
 
76
  "children": [],
77
+ "tokenizers": {},
78
  "node_i": "3278",
79
+ "native_tokenizers": [],
80
+ "scripts": []
81
  },
82
  {
83
  "name": "Awyi",
84
  "iso_1_code": null,
85
  "iso_3_code": "auw",
 
86
  "children": [],
87
+ "tokenizers": {},
88
  "node_i": "3279",
89
+ "native_tokenizers": [],
90
+ "scripts": []
91
  }
92
  ],
93
+ "tokenizers": {},
94
  "node_i": "3277",
95
+ "native_tokenizers": [],
96
+ "scripts": []
97
  },
98
  {
99
  "name": "Waris",
100
  "iso_1_code": null,
101
  "iso_3_code": null,
 
102
  "children": [
103
  {
104
  "name": "Amanab",
105
  "iso_1_code": null,
106
  "iso_3_code": "amn",
 
107
  "children": [],
108
+ "tokenizers": {},
109
  "node_i": "3281",
110
+ "native_tokenizers": [],
111
  "scripts": [
112
  "Latn"
113
+ ]
 
114
  },
115
  {
116
  "name": "Daonda",
117
  "iso_1_code": null,
118
  "iso_3_code": "dnd",
 
119
  "children": [],
120
+ "tokenizers": {},
121
  "node_i": "3282",
122
+ "native_tokenizers": [],
123
+ "scripts": []
124
  },
125
  {
126
  "name": "Imonda",
127
  "iso_1_code": null,
128
  "iso_3_code": "imn",
 
129
  "children": [],
130
+ "tokenizers": {},
131
  "node_i": "3283",
132
+ "native_tokenizers": [],
133
+ "scripts": []
134
  },
135
  {
136
  "name": "Manem",
137
  "iso_1_code": null,
138
  "iso_3_code": "jet",
 
139
  "children": [],
140
+ "tokenizers": {},
141
  "node_i": "3284",
142
+ "native_tokenizers": [],
143
+ "scripts": []
144
  },
145
  {
146
  "name": "Auwe",
147
  "iso_1_code": null,
148
  "iso_3_code": "smf",
 
149
  "children": [],
150
+ "tokenizers": {},
151
  "node_i": "3285",
152
+ "native_tokenizers": [],
153
+ "scripts": []
154
  },
155
  {
156
  "name": "Viid",
157
  "iso_1_code": null,
158
  "iso_3_code": "snu",
 
159
  "children": [],
160
+ "tokenizers": {},
161
  "node_i": "3286",
162
+ "native_tokenizers": [],
163
+ "scripts": []
164
  },
165
  {
166
  "name": "Sowanda",
167
  "iso_1_code": null,
168
  "iso_3_code": "sow",
 
169
  "children": [],
170
+ "tokenizers": {},
171
  "node_i": "3287",
172
+ "native_tokenizers": [],
173
+ "scripts": []
174
  },
175
  {
176
  "name": "Waris",
177
  "iso_1_code": null,
178
  "iso_3_code": "wrs",
 
179
  "children": [],
180
+ "tokenizers": {},
181
  "node_i": "3288",
182
+ "native_tokenizers": [],
183
  "scripts": [
184
  "Latn"
185
+ ]
 
186
  }
187
  ],
188
+ "tokenizers": {},
189
  "node_i": "3280",
190
+ "native_tokenizers": [],
191
+ "scripts": []
192
  }
193
  ],
194
+ "tokenizers": {},
195
  "node_i": "3270",
196
+ "native_tokenizers": [],
197
+ "scripts": []
198
  }
data/Bororoan.json CHANGED
@@ -2,42 +2,42 @@
2
  "name": "Bororoan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Bor\u00f4ro",
9
  "iso_1_code": null,
10
  "iso_3_code": "bor",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3290",
 
14
  "scripts": [
15
  "Latn"
16
- ],
17
- "own_tokenizer": false
18
  },
19
  {
20
  "name": "Otuke",
21
  "iso_1_code": null,
22
  "iso_3_code": "otu",
23
- "tokenizers": {},
24
  "children": [],
 
25
  "node_i": "3291",
26
- "scripts": [],
27
- "own_tokenizer": false
28
  },
29
  {
30
  "name": "Umot\u00edna",
31
  "iso_1_code": null,
32
  "iso_3_code": "umo",
33
- "tokenizers": {},
34
  "children": [],
 
35
  "node_i": "3292",
36
- "scripts": [],
37
- "own_tokenizer": false
38
  }
39
  ],
 
40
  "node_i": "3289",
41
- "scripts": [],
42
- "own_tokenizer": false
43
  }
 
2
  "name": "Bororoan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Bor\u00f4ro",
8
  "iso_1_code": null,
9
  "iso_3_code": "bor",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3290",
13
+ "native_tokenizers": [],
14
  "scripts": [
15
  "Latn"
16
+ ]
 
17
  },
18
  {
19
  "name": "Otuke",
20
  "iso_1_code": null,
21
  "iso_3_code": "otu",
 
22
  "children": [],
23
+ "tokenizers": {},
24
  "node_i": "3291",
25
+ "native_tokenizers": [],
26
+ "scripts": []
27
  },
28
  {
29
  "name": "Umot\u00edna",
30
  "iso_1_code": null,
31
  "iso_3_code": "umo",
 
32
  "children": [],
33
+ "tokenizers": {},
34
  "node_i": "3292",
35
+ "native_tokenizers": [],
36
+ "scripts": []
37
  }
38
  ],
39
+ "tokenizers": {},
40
  "node_i": "3289",
41
+ "native_tokenizers": [],
42
+ "scripts": []
43
  }
data/Botocudoan.json CHANGED
@@ -2,20 +2,20 @@
2
  "name": "Botocudoan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Krenak",
9
  "iso_1_code": null,
10
  "iso_3_code": "kqq",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3294",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  }
17
  ],
 
18
  "node_i": "3293",
19
- "scripts": [],
20
- "own_tokenizer": false
21
  }
 
2
  "name": "Botocudoan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Krenak",
8
  "iso_1_code": null,
9
  "iso_3_code": "kqq",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3294",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  }
16
  ],
17
+ "tokenizers": {},
18
  "node_i": "3293",
19
+ "native_tokenizers": [],
20
+ "scripts": []
21
  }
data/Caddoan.json CHANGED
@@ -2,93 +2,93 @@
2
  "name": "Caddoan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Caddo",
9
  "iso_1_code": null,
10
  "iso_3_code": "cad",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3296",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Northern Caddoan",
19
  "iso_1_code": null,
20
  "iso_3_code": null,
21
- "tokenizers": {},
22
  "children": [
23
  {
24
  "name": "Wichita",
25
  "iso_1_code": null,
26
  "iso_3_code": "wic",
27
- "tokenizers": {},
28
  "children": [],
 
29
  "node_i": "3298",
30
- "scripts": [],
31
- "own_tokenizer": false
32
  },
33
  {
34
  "name": "Kitsai-Proto-Pawnee",
35
  "iso_1_code": null,
36
  "iso_3_code": null,
37
- "tokenizers": {},
38
  "children": [
39
  {
40
  "name": "Kitsai",
41
  "iso_1_code": null,
42
  "iso_3_code": "kii",
43
- "tokenizers": {},
44
  "children": [],
 
45
  "node_i": "3300",
46
- "scripts": [],
47
- "own_tokenizer": false
48
  },
49
  {
50
  "name": "Proto-Pawnee",
51
  "iso_1_code": null,
52
  "iso_3_code": null,
53
- "tokenizers": {},
54
  "children": [
55
  {
56
  "name": "Arikara",
57
  "iso_1_code": null,
58
  "iso_3_code": "ari",
59
- "tokenizers": {},
60
  "children": [],
 
61
  "node_i": "3302",
62
- "scripts": [],
63
- "own_tokenizer": false
64
  },
65
  {
66
  "name": "Pawnee",
67
  "iso_1_code": null,
68
  "iso_3_code": "paw",
69
- "tokenizers": {},
70
  "children": [],
 
71
  "node_i": "3303",
72
- "scripts": [],
73
- "own_tokenizer": false
74
  }
75
  ],
 
76
  "node_i": "3301",
77
- "scripts": [],
78
- "own_tokenizer": false
79
  }
80
  ],
 
81
  "node_i": "3299",
82
- "scripts": [],
83
- "own_tokenizer": false
84
  }
85
  ],
 
86
  "node_i": "3297",
87
- "scripts": [],
88
- "own_tokenizer": false
89
  }
90
  ],
 
91
  "node_i": "3295",
92
- "scripts": [],
93
- "own_tokenizer": false
94
  }
 
2
  "name": "Caddoan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Caddo",
8
  "iso_1_code": null,
9
  "iso_3_code": "cad",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3296",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Northern Caddoan",
18
  "iso_1_code": null,
19
  "iso_3_code": null,
 
20
  "children": [
21
  {
22
  "name": "Wichita",
23
  "iso_1_code": null,
24
  "iso_3_code": "wic",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3298",
28
+ "native_tokenizers": [],
29
+ "scripts": []
30
  },
31
  {
32
  "name": "Kitsai-Proto-Pawnee",
33
  "iso_1_code": null,
34
  "iso_3_code": null,
 
35
  "children": [
36
  {
37
  "name": "Kitsai",
38
  "iso_1_code": null,
39
  "iso_3_code": "kii",
 
40
  "children": [],
41
+ "tokenizers": {},
42
  "node_i": "3300",
43
+ "native_tokenizers": [],
44
+ "scripts": []
45
  },
46
  {
47
  "name": "Proto-Pawnee",
48
  "iso_1_code": null,
49
  "iso_3_code": null,
 
50
  "children": [
51
  {
52
  "name": "Arikara",
53
  "iso_1_code": null,
54
  "iso_3_code": "ari",
 
55
  "children": [],
56
+ "tokenizers": {},
57
  "node_i": "3302",
58
+ "native_tokenizers": [],
59
+ "scripts": []
60
  },
61
  {
62
  "name": "Pawnee",
63
  "iso_1_code": null,
64
  "iso_3_code": "paw",
 
65
  "children": [],
66
+ "tokenizers": {},
67
  "node_i": "3303",
68
+ "native_tokenizers": [],
69
+ "scripts": []
70
  }
71
  ],
72
+ "tokenizers": {},
73
  "node_i": "3301",
74
+ "native_tokenizers": [],
75
+ "scripts": []
76
  }
77
  ],
78
+ "tokenizers": {},
79
  "node_i": "3299",
80
+ "native_tokenizers": [],
81
+ "scripts": []
82
  }
83
  ],
84
+ "tokenizers": {},
85
  "node_i": "3297",
86
+ "native_tokenizers": [],
87
+ "scripts": []
88
  }
89
  ],
90
+ "tokenizers": {},
91
  "node_i": "3295",
92
+ "native_tokenizers": [],
93
+ "scripts": []
94
  }
data/Cahuapanan.json CHANGED
@@ -2,32 +2,32 @@
2
  "name": "Cahuapanan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Shawi",
9
  "iso_1_code": null,
10
  "iso_3_code": "cbt",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3305",
 
14
  "scripts": [
15
  "Latn"
16
- ],
17
- "own_tokenizer": false
18
  },
19
  {
20
  "name": "Jebero",
21
  "iso_1_code": null,
22
  "iso_3_code": "jeb",
23
- "tokenizers": {},
24
  "children": [],
 
25
  "node_i": "3306",
26
- "scripts": [],
27
- "own_tokenizer": false
28
  }
29
  ],
 
30
  "node_i": "3304",
31
- "scripts": [],
32
- "own_tokenizer": false
33
  }
 
2
  "name": "Cahuapanan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Shawi",
8
  "iso_1_code": null,
9
  "iso_3_code": "cbt",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3305",
13
+ "native_tokenizers": [],
14
  "scripts": [
15
  "Latn"
16
+ ]
 
17
  },
18
  {
19
  "name": "Jebero",
20
  "iso_1_code": null,
21
  "iso_3_code": "jeb",
 
22
  "children": [],
23
+ "tokenizers": {},
24
  "node_i": "3306",
25
+ "native_tokenizers": [],
26
+ "scripts": []
27
  }
28
  ],
29
+ "tokenizers": {},
30
  "node_i": "3304",
31
+ "native_tokenizers": [],
32
+ "scripts": []
33
  }
data/Cariban.json CHANGED
@@ -2,569 +2,569 @@
2
  "name": "Cariban",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Carib",
9
  "iso_1_code": null,
10
  "iso_3_code": "car",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3308",
 
14
  "scripts": [
15
  "Latn"
16
- ],
17
- "own_tokenizer": false
18
  },
19
  {
20
  "name": "Central",
21
  "iso_1_code": null,
22
  "iso_3_code": null,
23
- "tokenizers": {},
24
  "children": [
25
  {
26
  "name": "Apala\u00ed",
27
  "iso_1_code": null,
28
  "iso_3_code": "apy",
29
- "tokenizers": {},
30
  "children": [],
 
31
  "node_i": "3310",
 
32
  "scripts": [
33
  "Latn"
34
- ],
35
- "own_tokenizer": false
36
  },
37
  {
38
  "name": "Cuman\u00e1",
39
  "iso_1_code": null,
40
  "iso_3_code": null,
41
- "tokenizers": {},
42
  "children": [
43
  {
44
  "name": "Chaima",
45
  "iso_1_code": null,
46
  "iso_3_code": "ciy",
47
- "tokenizers": {},
48
  "children": [],
 
49
  "node_i": "3312",
50
- "scripts": [],
51
- "own_tokenizer": false
52
  },
53
  {
54
  "name": "Cumanagoto",
55
  "iso_1_code": null,
56
  "iso_3_code": "cuo",
57
- "tokenizers": {},
58
  "children": [],
 
59
  "node_i": "3313",
60
- "scripts": [],
61
- "own_tokenizer": false
62
  }
63
  ],
 
64
  "node_i": "3311",
65
- "scripts": [],
66
- "own_tokenizer": false
67
  },
68
  {
69
  "name": "Makiritare",
70
  "iso_1_code": null,
71
  "iso_3_code": null,
72
- "tokenizers": {},
73
  "children": [
74
  {
75
  "name": "Maquiritari",
76
  "iso_1_code": null,
77
  "iso_3_code": "mch",
78
- "tokenizers": {},
79
  "children": [],
 
80
  "node_i": "3315",
81
- "scripts": [],
82
- "own_tokenizer": false
83
  }
84
  ],
 
85
  "node_i": "3314",
86
- "scripts": [],
87
- "own_tokenizer": false
88
  },
89
  {
90
  "name": "Mapoyo-Yavarana",
91
  "iso_1_code": null,
92
  "iso_3_code": null,
93
- "tokenizers": {},
94
  "children": [
95
  {
96
  "name": "Mapoyo",
97
  "iso_1_code": null,
98
  "iso_3_code": "mcg",
99
- "tokenizers": {},
100
  "children": [],
 
101
  "node_i": "3317",
102
- "scripts": [],
103
- "own_tokenizer": false
104
  },
105
  {
106
  "name": "P\u00e9mono",
107
  "iso_1_code": null,
108
  "iso_3_code": "pev",
109
- "tokenizers": {},
110
  "children": [],
 
111
  "node_i": "3318",
112
- "scripts": [],
113
- "own_tokenizer": false
114
  },
115
  {
116
  "name": "Tamanaku",
117
  "iso_1_code": null,
118
  "iso_3_code": "tmz",
119
- "tokenizers": {},
120
  "children": [],
 
121
  "node_i": "3319",
122
- "scripts": [],
123
- "own_tokenizer": false
124
  },
125
  {
126
  "name": "Yabarana",
127
  "iso_1_code": null,
128
  "iso_3_code": "yar",
129
- "tokenizers": {},
130
  "children": [],
 
131
  "node_i": "3320",
132
- "scripts": [],
133
- "own_tokenizer": false
134
  }
135
  ],
 
136
  "node_i": "3316",
137
- "scripts": [],
138
- "own_tokenizer": false
139
  },
140
  {
141
  "name": "Wayana",
142
  "iso_1_code": null,
143
  "iso_3_code": null,
144
- "tokenizers": {},
145
  "children": [
146
  {
147
  "name": "Kaxui\u00e2na",
148
  "iso_1_code": null,
149
  "iso_3_code": "kbb",
150
- "tokenizers": {},
151
  "children": [],
 
152
  "node_i": "3322",
153
- "scripts": [],
154
- "own_tokenizer": false
155
  },
156
  {
157
  "name": "Wayana",
158
  "iso_1_code": null,
159
  "iso_3_code": "way",
160
- "tokenizers": {},
161
  "children": [],
 
162
  "node_i": "3323",
 
163
  "scripts": [
164
  "Latn"
165
- ],
166
- "own_tokenizer": false
167
  }
168
  ],
 
169
  "node_i": "3321",
170
- "scripts": [],
171
- "own_tokenizer": false
172
  }
173
  ],
 
174
  "node_i": "3309",
175
- "scripts": [],
176
- "own_tokenizer": false
177
  },
178
  {
179
  "name": "Kashuyana",
180
  "iso_1_code": null,
181
  "iso_3_code": null,
182
- "tokenizers": {},
183
  "children": [
184
  {
185
  "name": "Sikiana",
186
  "iso_1_code": null,
187
  "iso_3_code": "sik",
188
- "tokenizers": {},
189
  "children": [],
 
190
  "node_i": "3325",
191
- "scripts": [],
192
- "own_tokenizer": false
193
  }
194
  ],
 
195
  "node_i": "3324",
196
- "scripts": [],
197
- "own_tokenizer": false
198
  },
199
  {
200
  "name": "North Amazonian",
201
  "iso_1_code": null,
202
  "iso_3_code": null,
203
- "tokenizers": {},
204
  "children": [
205
  {
206
  "name": "Pem\u00f3n",
207
  "iso_1_code": null,
208
  "iso_3_code": null,
209
- "tokenizers": {},
210
  "children": [
211
  {
212
  "name": "Pem\u00f3n proper",
213
  "iso_1_code": null,
214
  "iso_3_code": null,
215
- "tokenizers": {},
216
  "children": [
217
  {
218
  "name": "Pemon",
219
  "iso_1_code": null,
220
  "iso_3_code": "aoc",
221
- "tokenizers": {},
222
  "children": [],
 
223
  "node_i": "3329",
224
- "scripts": [],
225
- "own_tokenizer": false
226
  },
227
  {
228
  "name": "Macushi",
229
  "iso_1_code": null,
230
  "iso_3_code": "mbc",
231
- "tokenizers": {},
232
  "children": [],
 
233
  "node_i": "3330",
 
234
  "scripts": [
235
  "Latn"
236
- ],
237
- "own_tokenizer": false
238
  },
239
  {
240
  "name": "Kapong",
241
  "iso_1_code": null,
242
  "iso_3_code": null,
243
- "tokenizers": {},
244
  "children": [
245
  {
246
  "name": "Akawaio",
247
  "iso_1_code": null,
248
  "iso_3_code": "ake",
249
- "tokenizers": {},
250
  "children": [],
 
251
  "node_i": "3332",
 
252
  "scripts": [
253
  "Latn"
254
- ],
255
- "own_tokenizer": false
256
  },
257
  {
258
  "name": "Patamona",
259
  "iso_1_code": null,
260
  "iso_3_code": "pbc",
261
- "tokenizers": {},
262
  "children": [],
 
263
  "node_i": "3333",
 
264
  "scripts": [
265
  "Latn"
266
- ],
267
- "own_tokenizer": false
268
  }
269
  ],
 
270
  "node_i": "3331",
271
- "scripts": [],
272
- "own_tokenizer": false
273
  }
274
  ],
 
275
  "node_i": "3328",
276
- "scripts": [],
277
- "own_tokenizer": false
278
  }
279
  ],
 
280
  "node_i": "3327",
281
- "scripts": [],
282
- "own_tokenizer": false
283
  },
284
  {
285
  "name": "Yawaper\u00ed",
286
  "iso_1_code": null,
287
  "iso_3_code": null,
288
- "tokenizers": {},
289
  "children": [
290
  {
291
  "name": "Waimiri-Atroar\u00ed",
292
  "iso_1_code": null,
293
  "iso_3_code": "atr",
294
- "tokenizers": {},
295
  "children": [],
 
296
  "node_i": "3335",
297
- "scripts": [],
298
- "own_tokenizer": false
299
  }
300
  ],
 
301
  "node_i": "3334",
302
- "scripts": [],
303
- "own_tokenizer": false
304
  }
305
  ],
 
306
  "node_i": "3326",
307
- "scripts": [],
308
- "own_tokenizer": false
309
  },
310
  {
311
  "name": "South Amazonian",
312
  "iso_1_code": null,
313
  "iso_3_code": null,
314
- "tokenizers": {},
315
  "children": [
316
  {
317
  "name": "E\u2019\u00f1apa Woromaipu",
318
  "iso_1_code": null,
319
  "iso_3_code": "pbh",
320
- "tokenizers": {},
321
  "children": [],
 
322
  "node_i": "3337",
323
- "scripts": [],
324
- "own_tokenizer": false
325
  },
326
  {
327
  "name": "Arara",
328
  "iso_1_code": null,
329
  "iso_3_code": null,
330
- "tokenizers": {},
331
  "children": [
332
  {
333
  "name": "Arara, Par\u00e1",
334
  "iso_1_code": null,
335
  "iso_3_code": "aap",
336
- "tokenizers": {},
337
  "children": [],
 
338
  "node_i": "3339",
339
- "scripts": [],
340
- "own_tokenizer": false
341
  },
342
  {
343
  "name": "Ikpeng",
344
  "iso_1_code": null,
345
  "iso_3_code": "txi",
346
- "tokenizers": {},
347
  "children": [],
 
348
  "node_i": "3340",
349
- "scripts": [],
350
- "own_tokenizer": false
351
  }
352
  ],
 
353
  "node_i": "3338",
354
- "scripts": [],
355
- "own_tokenizer": false
356
  },
357
  {
358
  "name": "Bakair\u00ed",
359
  "iso_1_code": null,
360
  "iso_3_code": null,
361
- "tokenizers": {},
362
  "children": [
363
  {
364
  "name": "Bakair\u00ed",
365
  "iso_1_code": null,
366
  "iso_3_code": "bkq",
367
- "tokenizers": {},
368
  "children": [],
 
369
  "node_i": "3342",
 
370
  "scripts": [
371
  "Latn"
372
- ],
373
- "own_tokenizer": false
374
  },
375
  {
376
  "name": "Amonap",
377
  "iso_1_code": null,
378
  "iso_3_code": null,
379
- "tokenizers": {},
380
  "children": [
381
  {
382
  "name": "Kuik\u00faro-Kalap\u00e1lo",
383
  "iso_1_code": null,
384
  "iso_3_code": "kui",
385
- "tokenizers": {},
386
  "children": [],
 
387
  "node_i": "3344",
388
- "scripts": [],
389
- "own_tokenizer": false
390
  },
391
  {
392
  "name": "Matipuhy",
393
  "iso_1_code": null,
394
  "iso_3_code": "mzo",
395
- "tokenizers": {},
396
  "children": [],
 
397
  "node_i": "3345",
398
- "scripts": [],
399
- "own_tokenizer": false
400
  }
401
  ],
 
402
  "node_i": "3343",
403
- "scripts": [],
404
- "own_tokenizer": false
405
  }
406
  ],
 
407
  "node_i": "3341",
408
- "scripts": [],
409
- "own_tokenizer": false
410
  }
411
  ],
 
412
  "node_i": "3336",
413
- "scripts": [],
414
- "own_tokenizer": false
415
  },
416
  {
417
  "name": "Tiriy\u00f3",
418
  "iso_1_code": null,
419
  "iso_3_code": null,
420
- "tokenizers": {},
421
  "children": [
422
  {
423
  "name": "Salum\u00e1",
424
  "iso_1_code": null,
425
  "iso_3_code": "slj",
426
- "tokenizers": {},
427
  "children": [],
 
428
  "node_i": "3347",
429
- "scripts": [],
430
- "own_tokenizer": false
431
  },
432
  {
433
  "name": "Karihona",
434
  "iso_1_code": null,
435
  "iso_3_code": null,
436
- "tokenizers": {},
437
  "children": [
438
  {
439
  "name": "Carijona",
440
  "iso_1_code": null,
441
  "iso_3_code": "cbd",
442
- "tokenizers": {},
443
  "children": [],
 
444
  "node_i": "3349",
445
- "scripts": [],
446
- "own_tokenizer": false
447
  }
448
  ],
 
449
  "node_i": "3348",
450
- "scripts": [],
451
- "own_tokenizer": false
452
  },
453
  {
454
  "name": "Tiriy\u00f3",
455
  "iso_1_code": null,
456
  "iso_3_code": null,
457
- "tokenizers": {},
458
  "children": [
459
  {
460
  "name": "Akurio",
461
  "iso_1_code": null,
462
  "iso_3_code": "ako",
463
- "tokenizers": {},
464
  "children": [],
 
465
  "node_i": "3351",
466
- "scripts": [],
467
- "own_tokenizer": false
468
  },
469
  {
470
  "name": "Tri\u00f3",
471
  "iso_1_code": null,
472
  "iso_3_code": "tri",
473
- "tokenizers": {},
474
  "children": [],
 
475
  "node_i": "3352",
476
- "scripts": [],
477
- "own_tokenizer": false
478
  }
479
  ],
 
480
  "node_i": "3350",
481
- "scripts": [],
482
- "own_tokenizer": false
483
  }
484
  ],
 
485
  "node_i": "3346",
486
- "scripts": [],
487
- "own_tokenizer": false
488
  },
489
  {
490
  "name": "Waiwai",
491
  "iso_1_code": null,
492
  "iso_3_code": null,
493
- "tokenizers": {},
494
  "children": [
495
  {
496
  "name": "Hixkary\u00e1na",
497
  "iso_1_code": null,
498
  "iso_3_code": "hix",
499
- "tokenizers": {},
500
  "children": [],
 
501
  "node_i": "3354",
 
502
  "scripts": [
503
  "Latn"
504
- ],
505
- "own_tokenizer": false
506
  },
507
  {
508
  "name": "Waiwai",
509
  "iso_1_code": null,
510
  "iso_3_code": "waw",
511
- "tokenizers": {},
512
  "children": [],
 
513
  "node_i": "3355",
514
- "scripts": [],
515
- "own_tokenizer": false
516
  }
517
  ],
 
518
  "node_i": "3353",
519
- "scripts": [],
520
- "own_tokenizer": false
521
  },
522
  {
523
  "name": "Yukpa",
524
  "iso_1_code": null,
525
  "iso_3_code": null,
526
- "tokenizers": {},
527
  "children": [
528
  {
529
  "name": "Yucpa-Yapreria",
530
  "iso_1_code": null,
531
  "iso_3_code": null,
532
- "tokenizers": {},
533
  "children": [
534
  {
535
  "name": "Japreria",
536
  "iso_1_code": null,
537
  "iso_3_code": "jru",
538
- "tokenizers": {},
539
  "children": [],
 
540
  "node_i": "3358",
541
- "scripts": [],
542
- "own_tokenizer": false
543
  },
544
  {
545
  "name": "Yukpa",
546
  "iso_1_code": null,
547
  "iso_3_code": "yup",
548
- "tokenizers": {},
549
  "children": [],
 
550
  "node_i": "3359",
 
551
  "scripts": [
552
  "Latn"
553
- ],
554
- "own_tokenizer": false
555
  }
556
  ],
 
557
  "node_i": "3357",
558
- "scripts": [],
559
- "own_tokenizer": false
560
  }
561
  ],
 
562
  "node_i": "3356",
563
- "scripts": [],
564
- "own_tokenizer": false
565
  }
566
  ],
 
567
  "node_i": "3307",
568
- "scripts": [],
569
- "own_tokenizer": false
570
  }
 
2
  "name": "Cariban",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Carib",
8
  "iso_1_code": null,
9
  "iso_3_code": "car",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3308",
13
+ "native_tokenizers": [],
14
  "scripts": [
15
  "Latn"
16
+ ]
 
17
  },
18
  {
19
  "name": "Central",
20
  "iso_1_code": null,
21
  "iso_3_code": null,
 
22
  "children": [
23
  {
24
  "name": "Apala\u00ed",
25
  "iso_1_code": null,
26
  "iso_3_code": "apy",
 
27
  "children": [],
28
+ "tokenizers": {},
29
  "node_i": "3310",
30
+ "native_tokenizers": [],
31
  "scripts": [
32
  "Latn"
33
+ ]
 
34
  },
35
  {
36
  "name": "Cuman\u00e1",
37
  "iso_1_code": null,
38
  "iso_3_code": null,
 
39
  "children": [
40
  {
41
  "name": "Chaima",
42
  "iso_1_code": null,
43
  "iso_3_code": "ciy",
 
44
  "children": [],
45
+ "tokenizers": {},
46
  "node_i": "3312",
47
+ "native_tokenizers": [],
48
+ "scripts": []
49
  },
50
  {
51
  "name": "Cumanagoto",
52
  "iso_1_code": null,
53
  "iso_3_code": "cuo",
 
54
  "children": [],
55
+ "tokenizers": {},
56
  "node_i": "3313",
57
+ "native_tokenizers": [],
58
+ "scripts": []
59
  }
60
  ],
61
+ "tokenizers": {},
62
  "node_i": "3311",
63
+ "native_tokenizers": [],
64
+ "scripts": []
65
  },
66
  {
67
  "name": "Makiritare",
68
  "iso_1_code": null,
69
  "iso_3_code": null,
 
70
  "children": [
71
  {
72
  "name": "Maquiritari",
73
  "iso_1_code": null,
74
  "iso_3_code": "mch",
 
75
  "children": [],
76
+ "tokenizers": {},
77
  "node_i": "3315",
78
+ "native_tokenizers": [],
79
+ "scripts": []
80
  }
81
  ],
82
+ "tokenizers": {},
83
  "node_i": "3314",
84
+ "native_tokenizers": [],
85
+ "scripts": []
86
  },
87
  {
88
  "name": "Mapoyo-Yavarana",
89
  "iso_1_code": null,
90
  "iso_3_code": null,
 
91
  "children": [
92
  {
93
  "name": "Mapoyo",
94
  "iso_1_code": null,
95
  "iso_3_code": "mcg",
 
96
  "children": [],
97
+ "tokenizers": {},
98
  "node_i": "3317",
99
+ "native_tokenizers": [],
100
+ "scripts": []
101
  },
102
  {
103
  "name": "P\u00e9mono",
104
  "iso_1_code": null,
105
  "iso_3_code": "pev",
 
106
  "children": [],
107
+ "tokenizers": {},
108
  "node_i": "3318",
109
+ "native_tokenizers": [],
110
+ "scripts": []
111
  },
112
  {
113
  "name": "Tamanaku",
114
  "iso_1_code": null,
115
  "iso_3_code": "tmz",
 
116
  "children": [],
117
+ "tokenizers": {},
118
  "node_i": "3319",
119
+ "native_tokenizers": [],
120
+ "scripts": []
121
  },
122
  {
123
  "name": "Yabarana",
124
  "iso_1_code": null,
125
  "iso_3_code": "yar",
 
126
  "children": [],
127
+ "tokenizers": {},
128
  "node_i": "3320",
129
+ "native_tokenizers": [],
130
+ "scripts": []
131
  }
132
  ],
133
+ "tokenizers": {},
134
  "node_i": "3316",
135
+ "native_tokenizers": [],
136
+ "scripts": []
137
  },
138
  {
139
  "name": "Wayana",
140
  "iso_1_code": null,
141
  "iso_3_code": null,
 
142
  "children": [
143
  {
144
  "name": "Kaxui\u00e2na",
145
  "iso_1_code": null,
146
  "iso_3_code": "kbb",
 
147
  "children": [],
148
+ "tokenizers": {},
149
  "node_i": "3322",
150
+ "native_tokenizers": [],
151
+ "scripts": []
152
  },
153
  {
154
  "name": "Wayana",
155
  "iso_1_code": null,
156
  "iso_3_code": "way",
 
157
  "children": [],
158
+ "tokenizers": {},
159
  "node_i": "3323",
160
+ "native_tokenizers": [],
161
  "scripts": [
162
  "Latn"
163
+ ]
 
164
  }
165
  ],
166
+ "tokenizers": {},
167
  "node_i": "3321",
168
+ "native_tokenizers": [],
169
+ "scripts": []
170
  }
171
  ],
172
+ "tokenizers": {},
173
  "node_i": "3309",
174
+ "native_tokenizers": [],
175
+ "scripts": []
176
  },
177
  {
178
  "name": "Kashuyana",
179
  "iso_1_code": null,
180
  "iso_3_code": null,
 
181
  "children": [
182
  {
183
  "name": "Sikiana",
184
  "iso_1_code": null,
185
  "iso_3_code": "sik",
 
186
  "children": [],
187
+ "tokenizers": {},
188
  "node_i": "3325",
189
+ "native_tokenizers": [],
190
+ "scripts": []
191
  }
192
  ],
193
+ "tokenizers": {},
194
  "node_i": "3324",
195
+ "native_tokenizers": [],
196
+ "scripts": []
197
  },
198
  {
199
  "name": "North Amazonian",
200
  "iso_1_code": null,
201
  "iso_3_code": null,
 
202
  "children": [
203
  {
204
  "name": "Pem\u00f3n",
205
  "iso_1_code": null,
206
  "iso_3_code": null,
 
207
  "children": [
208
  {
209
  "name": "Pem\u00f3n proper",
210
  "iso_1_code": null,
211
  "iso_3_code": null,
 
212
  "children": [
213
  {
214
  "name": "Pemon",
215
  "iso_1_code": null,
216
  "iso_3_code": "aoc",
 
217
  "children": [],
218
+ "tokenizers": {},
219
  "node_i": "3329",
220
+ "native_tokenizers": [],
221
+ "scripts": []
222
  },
223
  {
224
  "name": "Macushi",
225
  "iso_1_code": null,
226
  "iso_3_code": "mbc",
 
227
  "children": [],
228
+ "tokenizers": {},
229
  "node_i": "3330",
230
+ "native_tokenizers": [],
231
  "scripts": [
232
  "Latn"
233
+ ]
 
234
  },
235
  {
236
  "name": "Kapong",
237
  "iso_1_code": null,
238
  "iso_3_code": null,
 
239
  "children": [
240
  {
241
  "name": "Akawaio",
242
  "iso_1_code": null,
243
  "iso_3_code": "ake",
 
244
  "children": [],
245
+ "tokenizers": {},
246
  "node_i": "3332",
247
+ "native_tokenizers": [],
248
  "scripts": [
249
  "Latn"
250
+ ]
 
251
  },
252
  {
253
  "name": "Patamona",
254
  "iso_1_code": null,
255
  "iso_3_code": "pbc",
 
256
  "children": [],
257
+ "tokenizers": {},
258
  "node_i": "3333",
259
+ "native_tokenizers": [],
260
  "scripts": [
261
  "Latn"
262
+ ]
 
263
  }
264
  ],
265
+ "tokenizers": {},
266
  "node_i": "3331",
267
+ "native_tokenizers": [],
268
+ "scripts": []
269
  }
270
  ],
271
+ "tokenizers": {},
272
  "node_i": "3328",
273
+ "native_tokenizers": [],
274
+ "scripts": []
275
  }
276
  ],
277
+ "tokenizers": {},
278
  "node_i": "3327",
279
+ "native_tokenizers": [],
280
+ "scripts": []
281
  },
282
  {
283
  "name": "Yawaper\u00ed",
284
  "iso_1_code": null,
285
  "iso_3_code": null,
 
286
  "children": [
287
  {
288
  "name": "Waimiri-Atroar\u00ed",
289
  "iso_1_code": null,
290
  "iso_3_code": "atr",
 
291
  "children": [],
292
+ "tokenizers": {},
293
  "node_i": "3335",
294
+ "native_tokenizers": [],
295
+ "scripts": []
296
  }
297
  ],
298
+ "tokenizers": {},
299
  "node_i": "3334",
300
+ "native_tokenizers": [],
301
+ "scripts": []
302
  }
303
  ],
304
+ "tokenizers": {},
305
  "node_i": "3326",
306
+ "native_tokenizers": [],
307
+ "scripts": []
308
  },
309
  {
310
  "name": "South Amazonian",
311
  "iso_1_code": null,
312
  "iso_3_code": null,
 
313
  "children": [
314
  {
315
  "name": "E\u2019\u00f1apa Woromaipu",
316
  "iso_1_code": null,
317
  "iso_3_code": "pbh",
 
318
  "children": [],
319
+ "tokenizers": {},
320
  "node_i": "3337",
321
+ "native_tokenizers": [],
322
+ "scripts": []
323
  },
324
  {
325
  "name": "Arara",
326
  "iso_1_code": null,
327
  "iso_3_code": null,
 
328
  "children": [
329
  {
330
  "name": "Arara, Par\u00e1",
331
  "iso_1_code": null,
332
  "iso_3_code": "aap",
 
333
  "children": [],
334
+ "tokenizers": {},
335
  "node_i": "3339",
336
+ "native_tokenizers": [],
337
+ "scripts": []
338
  },
339
  {
340
  "name": "Ikpeng",
341
  "iso_1_code": null,
342
  "iso_3_code": "txi",
 
343
  "children": [],
344
+ "tokenizers": {},
345
  "node_i": "3340",
346
+ "native_tokenizers": [],
347
+ "scripts": []
348
  }
349
  ],
350
+ "tokenizers": {},
351
  "node_i": "3338",
352
+ "native_tokenizers": [],
353
+ "scripts": []
354
  },
355
  {
356
  "name": "Bakair\u00ed",
357
  "iso_1_code": null,
358
  "iso_3_code": null,
 
359
  "children": [
360
  {
361
  "name": "Bakair\u00ed",
362
  "iso_1_code": null,
363
  "iso_3_code": "bkq",
 
364
  "children": [],
365
+ "tokenizers": {},
366
  "node_i": "3342",
367
+ "native_tokenizers": [],
368
  "scripts": [
369
  "Latn"
370
+ ]
 
371
  },
372
  {
373
  "name": "Amonap",
374
  "iso_1_code": null,
375
  "iso_3_code": null,
 
376
  "children": [
377
  {
378
  "name": "Kuik\u00faro-Kalap\u00e1lo",
379
  "iso_1_code": null,
380
  "iso_3_code": "kui",
 
381
  "children": [],
382
+ "tokenizers": {},
383
  "node_i": "3344",
384
+ "native_tokenizers": [],
385
+ "scripts": []
386
  },
387
  {
388
  "name": "Matipuhy",
389
  "iso_1_code": null,
390
  "iso_3_code": "mzo",
 
391
  "children": [],
392
+ "tokenizers": {},
393
  "node_i": "3345",
394
+ "native_tokenizers": [],
395
+ "scripts": []
396
  }
397
  ],
398
+ "tokenizers": {},
399
  "node_i": "3343",
400
+ "native_tokenizers": [],
401
+ "scripts": []
402
  }
403
  ],
404
+ "tokenizers": {},
405
  "node_i": "3341",
406
+ "native_tokenizers": [],
407
+ "scripts": []
408
  }
409
  ],
410
+ "tokenizers": {},
411
  "node_i": "3336",
412
+ "native_tokenizers": [],
413
+ "scripts": []
414
  },
415
  {
416
  "name": "Tiriy\u00f3",
417
  "iso_1_code": null,
418
  "iso_3_code": null,
 
419
  "children": [
420
  {
421
  "name": "Salum\u00e1",
422
  "iso_1_code": null,
423
  "iso_3_code": "slj",
 
424
  "children": [],
425
+ "tokenizers": {},
426
  "node_i": "3347",
427
+ "native_tokenizers": [],
428
+ "scripts": []
429
  },
430
  {
431
  "name": "Karihona",
432
  "iso_1_code": null,
433
  "iso_3_code": null,
 
434
  "children": [
435
  {
436
  "name": "Carijona",
437
  "iso_1_code": null,
438
  "iso_3_code": "cbd",
 
439
  "children": [],
440
+ "tokenizers": {},
441
  "node_i": "3349",
442
+ "native_tokenizers": [],
443
+ "scripts": []
444
  }
445
  ],
446
+ "tokenizers": {},
447
  "node_i": "3348",
448
+ "native_tokenizers": [],
449
+ "scripts": []
450
  },
451
  {
452
  "name": "Tiriy\u00f3",
453
  "iso_1_code": null,
454
  "iso_3_code": null,
 
455
  "children": [
456
  {
457
  "name": "Akurio",
458
  "iso_1_code": null,
459
  "iso_3_code": "ako",
 
460
  "children": [],
461
+ "tokenizers": {},
462
  "node_i": "3351",
463
+ "native_tokenizers": [],
464
+ "scripts": []
465
  },
466
  {
467
  "name": "Tri\u00f3",
468
  "iso_1_code": null,
469
  "iso_3_code": "tri",
 
470
  "children": [],
471
+ "tokenizers": {},
472
  "node_i": "3352",
473
+ "native_tokenizers": [],
474
+ "scripts": []
475
  }
476
  ],
477
+ "tokenizers": {},
478
  "node_i": "3350",
479
+ "native_tokenizers": [],
480
+ "scripts": []
481
  }
482
  ],
483
+ "tokenizers": {},
484
  "node_i": "3346",
485
+ "native_tokenizers": [],
486
+ "scripts": []
487
  },
488
  {
489
  "name": "Waiwai",
490
  "iso_1_code": null,
491
  "iso_3_code": null,
 
492
  "children": [
493
  {
494
  "name": "Hixkary\u00e1na",
495
  "iso_1_code": null,
496
  "iso_3_code": "hix",
 
497
  "children": [],
498
+ "tokenizers": {},
499
  "node_i": "3354",
500
+ "native_tokenizers": [],
501
  "scripts": [
502
  "Latn"
503
+ ]
 
504
  },
505
  {
506
  "name": "Waiwai",
507
  "iso_1_code": null,
508
  "iso_3_code": "waw",
 
509
  "children": [],
510
+ "tokenizers": {},
511
  "node_i": "3355",
512
+ "native_tokenizers": [],
513
+ "scripts": []
514
  }
515
  ],
516
+ "tokenizers": {},
517
  "node_i": "3353",
518
+ "native_tokenizers": [],
519
+ "scripts": []
520
  },
521
  {
522
  "name": "Yukpa",
523
  "iso_1_code": null,
524
  "iso_3_code": null,
 
525
  "children": [
526
  {
527
  "name": "Yucpa-Yapreria",
528
  "iso_1_code": null,
529
  "iso_3_code": null,
 
530
  "children": [
531
  {
532
  "name": "Japreria",
533
  "iso_1_code": null,
534
  "iso_3_code": "jru",
 
535
  "children": [],
536
+ "tokenizers": {},
537
  "node_i": "3358",
538
+ "native_tokenizers": [],
539
+ "scripts": []
540
  },
541
  {
542
  "name": "Yukpa",
543
  "iso_1_code": null,
544
  "iso_3_code": "yup",
 
545
  "children": [],
546
+ "tokenizers": {},
547
  "node_i": "3359",
548
+ "native_tokenizers": [],
549
  "scripts": [
550
  "Latn"
551
+ ]
 
552
  }
553
  ],
554
+ "tokenizers": {},
555
  "node_i": "3357",
556
+ "native_tokenizers": [],
557
+ "scripts": []
558
  }
559
  ],
560
+ "tokenizers": {},
561
  "node_i": "3356",
562
+ "native_tokenizers": [],
563
+ "scripts": []
564
  }
565
  ],
566
+ "tokenizers": {},
567
  "node_i": "3307",
568
+ "native_tokenizers": [],
569
+ "scripts": []
570
  }
data/Central Solomons.json CHANGED
@@ -2,50 +2,50 @@
2
  "name": "Central Solomons",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Bilua",
9
  "iso_1_code": null,
10
  "iso_3_code": "blb",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3361",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Lavukaleve",
19
  "iso_1_code": null,
20
  "iso_3_code": "lvk",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "3362",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  },
27
  {
28
  "name": "Savosavo",
29
  "iso_1_code": null,
30
  "iso_3_code": "svs",
31
- "tokenizers": {},
32
  "children": [],
 
33
  "node_i": "3363",
34
- "scripts": [],
35
- "own_tokenizer": false
36
  },
37
  {
38
  "name": "Touo",
39
  "iso_1_code": null,
40
  "iso_3_code": "tqu",
41
- "tokenizers": {},
42
  "children": [],
 
43
  "node_i": "3364",
44
- "scripts": [],
45
- "own_tokenizer": false
46
  }
47
  ],
 
48
  "node_i": "3360",
49
- "scripts": [],
50
- "own_tokenizer": false
51
  }
 
2
  "name": "Central Solomons",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Bilua",
8
  "iso_1_code": null,
9
  "iso_3_code": "blb",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3361",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Lavukaleve",
18
  "iso_1_code": null,
19
  "iso_3_code": "lvk",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3362",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  },
26
  {
27
  "name": "Savosavo",
28
  "iso_1_code": null,
29
  "iso_3_code": "svs",
 
30
  "children": [],
31
+ "tokenizers": {},
32
  "node_i": "3363",
33
+ "native_tokenizers": [],
34
+ "scripts": []
35
  },
36
  {
37
  "name": "Touo",
38
  "iso_1_code": null,
39
  "iso_3_code": "tqu",
 
40
  "children": [],
41
+ "tokenizers": {},
42
  "node_i": "3364",
43
+ "native_tokenizers": [],
44
+ "scripts": []
45
  }
46
  ],
47
+ "tokenizers": {},
48
  "node_i": "3360",
49
+ "native_tokenizers": [],
50
+ "scripts": []
51
  }
data/Chapacuran.json CHANGED
@@ -2,72 +2,72 @@
2
  "name": "Chapacuran",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Itene",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Itene",
15
  "iso_1_code": null,
16
  "iso_3_code": "ite",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "3367",
20
- "scripts": [],
21
- "own_tokenizer": false
22
  },
23
  {
24
  "name": "Tor\u00e1",
25
  "iso_1_code": null,
26
  "iso_3_code": "trz",
27
- "tokenizers": {},
28
  "children": [],
 
29
  "node_i": "3368",
30
- "scripts": [],
31
- "own_tokenizer": false
32
  }
33
  ],
 
34
  "node_i": "3366",
35
- "scripts": [],
36
- "own_tokenizer": false
37
  },
38
  {
39
  "name": "Wari",
40
  "iso_1_code": null,
41
  "iso_3_code": null,
42
- "tokenizers": {},
43
  "children": [
44
  {
45
  "name": "Oro Win",
46
  "iso_1_code": null,
47
  "iso_3_code": "orw",
48
- "tokenizers": {},
49
  "children": [],
 
50
  "node_i": "3370",
51
- "scripts": [],
52
- "own_tokenizer": false
53
  },
54
  {
55
  "name": "Paka\u00e1snovos",
56
  "iso_1_code": null,
57
  "iso_3_code": "pav",
58
- "tokenizers": {},
59
  "children": [],
 
60
  "node_i": "3371",
61
- "scripts": [],
62
- "own_tokenizer": false
63
  }
64
  ],
 
65
  "node_i": "3369",
66
- "scripts": [],
67
- "own_tokenizer": false
68
  }
69
  ],
 
70
  "node_i": "3365",
71
- "scripts": [],
72
- "own_tokenizer": false
73
  }
 
2
  "name": "Chapacuran",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Itene",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Itene",
13
  "iso_1_code": null,
14
  "iso_3_code": "ite",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3367",
18
+ "native_tokenizers": [],
19
+ "scripts": []
20
  },
21
  {
22
  "name": "Tor\u00e1",
23
  "iso_1_code": null,
24
  "iso_3_code": "trz",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3368",
28
+ "native_tokenizers": [],
29
+ "scripts": []
30
  }
31
  ],
32
+ "tokenizers": {},
33
  "node_i": "3366",
34
+ "native_tokenizers": [],
35
+ "scripts": []
36
  },
37
  {
38
  "name": "Wari",
39
  "iso_1_code": null,
40
  "iso_3_code": null,
 
41
  "children": [
42
  {
43
  "name": "Oro Win",
44
  "iso_1_code": null,
45
  "iso_3_code": "orw",
 
46
  "children": [],
47
+ "tokenizers": {},
48
  "node_i": "3370",
49
+ "native_tokenizers": [],
50
+ "scripts": []
51
  },
52
  {
53
  "name": "Paka\u00e1snovos",
54
  "iso_1_code": null,
55
  "iso_3_code": "pav",
 
56
  "children": [],
57
+ "tokenizers": {},
58
  "node_i": "3371",
59
+ "native_tokenizers": [],
60
+ "scripts": []
61
  }
62
  ],
63
+ "tokenizers": {},
64
  "node_i": "3369",
65
+ "native_tokenizers": [],
66
+ "scripts": []
67
  }
68
  ],
69
+ "tokenizers": {},
70
  "node_i": "3365",
71
+ "native_tokenizers": [],
72
+ "scripts": []
73
  }
data/Chibchan.json CHANGED
@@ -2,392 +2,392 @@
2
  "name": "Chibchan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Chibchan A",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Boruca",
15
  "iso_1_code": null,
16
  "iso_3_code": "brn",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "3374",
20
- "scripts": [],
21
- "own_tokenizer": false
22
  },
23
  {
24
  "name": "Teribe",
25
  "iso_1_code": null,
26
  "iso_3_code": "tfr",
27
- "tokenizers": {},
28
  "children": [],
 
29
  "node_i": "3375",
 
30
  "scripts": [
31
  "Latn"
32
- ],
33
- "own_tokenizer": false
34
  },
35
  {
36
  "name": "Guaymi\u00edc",
37
  "iso_1_code": null,
38
  "iso_3_code": null,
39
- "tokenizers": {},
40
  "children": [
41
  {
42
  "name": "Ng\u00e4bere",
43
  "iso_1_code": null,
44
  "iso_3_code": "gym",
45
- "tokenizers": {},
46
  "children": [],
 
47
  "node_i": "3377",
 
48
  "scripts": [
49
  "Latn"
50
- ],
51
- "own_tokenizer": false
52
  },
53
  {
54
  "name": "Buglere",
55
  "iso_1_code": null,
56
  "iso_3_code": "sab",
57
- "tokenizers": {},
58
  "children": [],
 
59
  "node_i": "3378",
 
60
  "scripts": [
61
  "Latn"
62
- ],
63
- "own_tokenizer": false
64
  }
65
  ],
 
66
  "node_i": "3376",
67
- "scripts": [],
68
- "own_tokenizer": false
69
  },
70
  {
71
  "name": "Viceitic",
72
  "iso_1_code": null,
73
  "iso_3_code": null,
74
- "tokenizers": {},
75
  "children": [
76
  {
77
  "name": "Bribri",
78
  "iso_1_code": null,
79
  "iso_3_code": "bzd",
80
- "tokenizers": {},
81
  "children": [],
 
82
  "node_i": "3380",
 
83
  "scripts": [
84
  "Latn"
85
- ],
86
- "own_tokenizer": false
87
  },
88
  {
89
  "name": "Cab\u00e9car",
90
  "iso_1_code": null,
91
  "iso_3_code": "cjp",
92
- "tokenizers": {},
93
  "children": [],
 
94
  "node_i": "3381",
 
95
  "scripts": [
96
  "Latn"
97
- ],
98
- "own_tokenizer": false
99
  }
100
  ],
 
101
  "node_i": "3379",
102
- "scripts": [],
103
- "own_tokenizer": false
104
  }
105
  ],
 
106
  "node_i": "3373",
107
- "scripts": [],
108
- "own_tokenizer": false
109
  },
110
  {
111
  "name": "Chibchan B",
112
  "iso_1_code": null,
113
  "iso_3_code": null,
114
- "tokenizers": {},
115
  "children": [
116
  {
117
  "name": "Pech",
118
  "iso_1_code": null,
119
  "iso_3_code": "pay",
120
- "tokenizers": {},
121
  "children": [],
 
122
  "node_i": "3383",
123
- "scripts": [],
124
- "own_tokenizer": false
125
  },
126
  {
127
  "name": "Eastern Chibchan",
128
  "iso_1_code": null,
129
  "iso_3_code": null,
130
- "tokenizers": {},
131
  "children": [
132
  {
133
  "name": "Colombian",
134
  "iso_1_code": null,
135
  "iso_3_code": null,
136
- "tokenizers": {},
137
  "children": [
138
  {
139
  "name": "Northern Colombian",
140
  "iso_1_code": null,
141
  "iso_3_code": null,
142
- "tokenizers": {},
143
  "children": [
144
  {
145
  "name": "Chimila",
146
  "iso_1_code": null,
147
  "iso_3_code": "cbg",
148
- "tokenizers": {},
149
  "children": [],
 
150
  "node_i": "3387",
151
- "scripts": [],
152
- "own_tokenizer": false
153
  },
154
  {
155
  "name": "Arhuacan",
156
  "iso_1_code": null,
157
  "iso_3_code": null,
158
- "tokenizers": {},
159
  "children": [
160
  {
161
  "name": "Kogi",
162
  "iso_1_code": null,
163
  "iso_3_code": "kog",
164
- "tokenizers": {},
165
  "children": [],
 
166
  "node_i": "3389",
 
167
  "scripts": [
168
  "Latn"
169
- ],
170
- "own_tokenizer": false
171
  },
172
  {
173
  "name": "Southern and Eastern Arhuacan",
174
  "iso_1_code": null,
175
  "iso_3_code": null,
176
- "tokenizers": {},
177
  "children": [
178
  {
179
  "name": "Arhuaco",
180
  "iso_1_code": null,
181
  "iso_3_code": "arh",
182
- "tokenizers": {},
183
  "children": [],
 
184
  "node_i": "3391",
185
- "scripts": [],
186
- "own_tokenizer": false
187
  },
188
  {
189
  "name": "Guamaca-Atanque",
190
  "iso_1_code": null,
191
  "iso_3_code": null,
192
- "tokenizers": {},
193
  "children": [
194
  {
195
  "name": "Sanka",
196
  "iso_1_code": null,
197
  "iso_3_code": "mbp",
198
- "tokenizers": {},
199
  "children": [],
 
200
  "node_i": "3393",
201
- "scripts": [],
202
- "own_tokenizer": false
203
  }
204
  ],
 
205
  "node_i": "3392",
206
- "scripts": [],
207
- "own_tokenizer": false
208
  }
209
  ],
 
210
  "node_i": "3390",
211
- "scripts": [],
212
- "own_tokenizer": false
213
  }
214
  ],
 
215
  "node_i": "3388",
216
- "scripts": [],
217
- "own_tokenizer": false
218
  }
219
  ],
 
220
  "node_i": "3386",
221
- "scripts": [],
222
- "own_tokenizer": false
223
  },
224
  {
225
  "name": "Southern Colombian",
226
  "iso_1_code": null,
227
  "iso_3_code": null,
228
- "tokenizers": {},
229
  "children": [
230
  {
231
  "name": "Bar\u00ed",
232
  "iso_1_code": null,
233
  "iso_3_code": "mot",
234
- "tokenizers": {},
235
  "children": [],
 
236
  "node_i": "3395",
237
- "scripts": [],
238
- "own_tokenizer": false
239
  },
240
  {
241
  "name": "Cundicocuyese",
242
  "iso_1_code": null,
243
  "iso_3_code": null,
244
- "tokenizers": {},
245
  "children": [
246
  {
247
  "name": "Chibcha",
248
  "iso_1_code": null,
249
  "iso_3_code": "chb",
250
- "tokenizers": {},
251
  "children": [],
 
252
  "node_i": "3397",
253
- "scripts": [],
254
- "own_tokenizer": false
255
  },
256
  {
257
  "name": "Tunebo, Barro Negro",
258
  "iso_1_code": null,
259
  "iso_3_code": "tbn",
260
- "tokenizers": {},
261
  "children": [],
 
262
  "node_i": "3398",
263
- "scripts": [],
264
- "own_tokenizer": false
265
  },
266
  {
267
  "name": "Tunebo, Western",
268
  "iso_1_code": null,
269
  "iso_3_code": "tnb",
270
- "tokenizers": {},
271
  "children": [],
 
272
  "node_i": "3399",
273
- "scripts": [],
274
- "own_tokenizer": false
275
  },
276
  {
277
  "name": "Tunebo, Angosturas",
278
  "iso_1_code": null,
279
  "iso_3_code": "tnd",
280
- "tokenizers": {},
281
  "children": [],
 
282
  "node_i": "3400",
283
- "scripts": [],
284
- "own_tokenizer": false
285
  },
286
  {
287
  "name": "Tunebo, Central",
288
  "iso_1_code": null,
289
  "iso_3_code": "tuf",
290
- "tokenizers": {},
291
  "children": [],
 
292
  "node_i": "3401",
 
293
  "scripts": [
294
  "Latn"
295
- ],
296
- "own_tokenizer": false
297
  }
298
  ],
 
299
  "node_i": "3396",
300
- "scripts": [],
301
- "own_tokenizer": false
302
  }
303
  ],
 
304
  "node_i": "3394",
305
- "scripts": [],
306
- "own_tokenizer": false
307
  }
308
  ],
 
309
  "node_i": "3385",
310
- "scripts": [],
311
- "own_tokenizer": false
312
  },
313
  {
314
  "name": "Cuna",
315
  "iso_1_code": null,
316
  "iso_3_code": null,
317
- "tokenizers": {},
318
  "children": [
319
  {
320
  "name": "Kuna, San Blas",
321
  "iso_1_code": null,
322
  "iso_3_code": "cuk",
323
- "tokenizers": {},
324
  "children": [],
 
325
  "node_i": "3403",
 
326
  "scripts": [
327
  "Latn"
328
- ],
329
- "own_tokenizer": false
330
  },
331
  {
332
  "name": "Kuna, Border",
333
  "iso_1_code": null,
334
  "iso_3_code": "kvn",
335
- "tokenizers": {},
336
  "children": [],
 
337
  "node_i": "3404",
 
338
  "scripts": [
339
  "Latn"
340
- ],
341
- "own_tokenizer": false
342
  }
343
  ],
 
344
  "node_i": "3402",
345
- "scripts": [],
346
- "own_tokenizer": false
347
  }
348
  ],
 
349
  "node_i": "3384",
350
- "scripts": [],
351
- "own_tokenizer": false
352
  },
353
  {
354
  "name": "Votic",
355
  "iso_1_code": null,
356
  "iso_3_code": null,
357
- "tokenizers": {},
358
  "children": [
359
  {
360
  "name": "Mal\u00e9ku Ja\u00edka",
361
  "iso_1_code": null,
362
  "iso_3_code": "gut",
363
- "tokenizers": {},
364
  "children": [],
 
365
  "node_i": "3406",
366
- "scripts": [],
367
- "own_tokenizer": false
368
  },
369
  {
370
  "name": "Rama",
371
  "iso_1_code": null,
372
  "iso_3_code": "rma",
373
- "tokenizers": {},
374
  "children": [],
 
375
  "node_i": "3407",
376
- "scripts": [],
377
- "own_tokenizer": false
378
  }
379
  ],
 
380
  "node_i": "3405",
381
- "scripts": [],
382
- "own_tokenizer": false
383
  }
384
  ],
 
385
  "node_i": "3382",
386
- "scripts": [],
387
- "own_tokenizer": false
388
  }
389
  ],
 
390
  "node_i": "3372",
391
- "scripts": [],
392
- "own_tokenizer": false
393
  }
 
2
  "name": "Chibchan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Chibchan A",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Boruca",
13
  "iso_1_code": null,
14
  "iso_3_code": "brn",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3374",
18
+ "native_tokenizers": [],
19
+ "scripts": []
20
  },
21
  {
22
  "name": "Teribe",
23
  "iso_1_code": null,
24
  "iso_3_code": "tfr",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3375",
28
+ "native_tokenizers": [],
29
  "scripts": [
30
  "Latn"
31
+ ]
 
32
  },
33
  {
34
  "name": "Guaymi\u00edc",
35
  "iso_1_code": null,
36
  "iso_3_code": null,
 
37
  "children": [
38
  {
39
  "name": "Ng\u00e4bere",
40
  "iso_1_code": null,
41
  "iso_3_code": "gym",
 
42
  "children": [],
43
+ "tokenizers": {},
44
  "node_i": "3377",
45
+ "native_tokenizers": [],
46
  "scripts": [
47
  "Latn"
48
+ ]
 
49
  },
50
  {
51
  "name": "Buglere",
52
  "iso_1_code": null,
53
  "iso_3_code": "sab",
 
54
  "children": [],
55
+ "tokenizers": {},
56
  "node_i": "3378",
57
+ "native_tokenizers": [],
58
  "scripts": [
59
  "Latn"
60
+ ]
 
61
  }
62
  ],
63
+ "tokenizers": {},
64
  "node_i": "3376",
65
+ "native_tokenizers": [],
66
+ "scripts": []
67
  },
68
  {
69
  "name": "Viceitic",
70
  "iso_1_code": null,
71
  "iso_3_code": null,
 
72
  "children": [
73
  {
74
  "name": "Bribri",
75
  "iso_1_code": null,
76
  "iso_3_code": "bzd",
 
77
  "children": [],
78
+ "tokenizers": {},
79
  "node_i": "3380",
80
+ "native_tokenizers": [],
81
  "scripts": [
82
  "Latn"
83
+ ]
 
84
  },
85
  {
86
  "name": "Cab\u00e9car",
87
  "iso_1_code": null,
88
  "iso_3_code": "cjp",
 
89
  "children": [],
90
+ "tokenizers": {},
91
  "node_i": "3381",
92
+ "native_tokenizers": [],
93
  "scripts": [
94
  "Latn"
95
+ ]
 
96
  }
97
  ],
98
+ "tokenizers": {},
99
  "node_i": "3379",
100
+ "native_tokenizers": [],
101
+ "scripts": []
102
  }
103
  ],
104
+ "tokenizers": {},
105
  "node_i": "3373",
106
+ "native_tokenizers": [],
107
+ "scripts": []
108
  },
109
  {
110
  "name": "Chibchan B",
111
  "iso_1_code": null,
112
  "iso_3_code": null,
 
113
  "children": [
114
  {
115
  "name": "Pech",
116
  "iso_1_code": null,
117
  "iso_3_code": "pay",
 
118
  "children": [],
119
+ "tokenizers": {},
120
  "node_i": "3383",
121
+ "native_tokenizers": [],
122
+ "scripts": []
123
  },
124
  {
125
  "name": "Eastern Chibchan",
126
  "iso_1_code": null,
127
  "iso_3_code": null,
 
128
  "children": [
129
  {
130
  "name": "Colombian",
131
  "iso_1_code": null,
132
  "iso_3_code": null,
 
133
  "children": [
134
  {
135
  "name": "Northern Colombian",
136
  "iso_1_code": null,
137
  "iso_3_code": null,
 
138
  "children": [
139
  {
140
  "name": "Chimila",
141
  "iso_1_code": null,
142
  "iso_3_code": "cbg",
 
143
  "children": [],
144
+ "tokenizers": {},
145
  "node_i": "3387",
146
+ "native_tokenizers": [],
147
+ "scripts": []
148
  },
149
  {
150
  "name": "Arhuacan",
151
  "iso_1_code": null,
152
  "iso_3_code": null,
 
153
  "children": [
154
  {
155
  "name": "Kogi",
156
  "iso_1_code": null,
157
  "iso_3_code": "kog",
 
158
  "children": [],
159
+ "tokenizers": {},
160
  "node_i": "3389",
161
+ "native_tokenizers": [],
162
  "scripts": [
163
  "Latn"
164
+ ]
 
165
  },
166
  {
167
  "name": "Southern and Eastern Arhuacan",
168
  "iso_1_code": null,
169
  "iso_3_code": null,
 
170
  "children": [
171
  {
172
  "name": "Arhuaco",
173
  "iso_1_code": null,
174
  "iso_3_code": "arh",
 
175
  "children": [],
176
+ "tokenizers": {},
177
  "node_i": "3391",
178
+ "native_tokenizers": [],
179
+ "scripts": []
180
  },
181
  {
182
  "name": "Guamaca-Atanque",
183
  "iso_1_code": null,
184
  "iso_3_code": null,
 
185
  "children": [
186
  {
187
  "name": "Sanka",
188
  "iso_1_code": null,
189
  "iso_3_code": "mbp",
 
190
  "children": [],
191
+ "tokenizers": {},
192
  "node_i": "3393",
193
+ "native_tokenizers": [],
194
+ "scripts": []
195
  }
196
  ],
197
+ "tokenizers": {},
198
  "node_i": "3392",
199
+ "native_tokenizers": [],
200
+ "scripts": []
201
  }
202
  ],
203
+ "tokenizers": {},
204
  "node_i": "3390",
205
+ "native_tokenizers": [],
206
+ "scripts": []
207
  }
208
  ],
209
+ "tokenizers": {},
210
  "node_i": "3388",
211
+ "native_tokenizers": [],
212
+ "scripts": []
213
  }
214
  ],
215
+ "tokenizers": {},
216
  "node_i": "3386",
217
+ "native_tokenizers": [],
218
+ "scripts": []
219
  },
220
  {
221
  "name": "Southern Colombian",
222
  "iso_1_code": null,
223
  "iso_3_code": null,
 
224
  "children": [
225
  {
226
  "name": "Bar\u00ed",
227
  "iso_1_code": null,
228
  "iso_3_code": "mot",
 
229
  "children": [],
230
+ "tokenizers": {},
231
  "node_i": "3395",
232
+ "native_tokenizers": [],
233
+ "scripts": []
234
  },
235
  {
236
  "name": "Cundicocuyese",
237
  "iso_1_code": null,
238
  "iso_3_code": null,
 
239
  "children": [
240
  {
241
  "name": "Chibcha",
242
  "iso_1_code": null,
243
  "iso_3_code": "chb",
 
244
  "children": [],
245
+ "tokenizers": {},
246
  "node_i": "3397",
247
+ "native_tokenizers": [],
248
+ "scripts": []
249
  },
250
  {
251
  "name": "Tunebo, Barro Negro",
252
  "iso_1_code": null,
253
  "iso_3_code": "tbn",
 
254
  "children": [],
255
+ "tokenizers": {},
256
  "node_i": "3398",
257
+ "native_tokenizers": [],
258
+ "scripts": []
259
  },
260
  {
261
  "name": "Tunebo, Western",
262
  "iso_1_code": null,
263
  "iso_3_code": "tnb",
 
264
  "children": [],
265
+ "tokenizers": {},
266
  "node_i": "3399",
267
+ "native_tokenizers": [],
268
+ "scripts": []
269
  },
270
  {
271
  "name": "Tunebo, Angosturas",
272
  "iso_1_code": null,
273
  "iso_3_code": "tnd",
 
274
  "children": [],
275
+ "tokenizers": {},
276
  "node_i": "3400",
277
+ "native_tokenizers": [],
278
+ "scripts": []
279
  },
280
  {
281
  "name": "Tunebo, Central",
282
  "iso_1_code": null,
283
  "iso_3_code": "tuf",
 
284
  "children": [],
285
+ "tokenizers": {},
286
  "node_i": "3401",
287
+ "native_tokenizers": [],
288
  "scripts": [
289
  "Latn"
290
+ ]
 
291
  }
292
  ],
293
+ "tokenizers": {},
294
  "node_i": "3396",
295
+ "native_tokenizers": [],
296
+ "scripts": []
297
  }
298
  ],
299
+ "tokenizers": {},
300
  "node_i": "3394",
301
+ "native_tokenizers": [],
302
+ "scripts": []
303
  }
304
  ],
305
+ "tokenizers": {},
306
  "node_i": "3385",
307
+ "native_tokenizers": [],
308
+ "scripts": []
309
  },
310
  {
311
  "name": "Cuna",
312
  "iso_1_code": null,
313
  "iso_3_code": null,
 
314
  "children": [
315
  {
316
  "name": "Kuna, San Blas",
317
  "iso_1_code": null,
318
  "iso_3_code": "cuk",
 
319
  "children": [],
320
+ "tokenizers": {},
321
  "node_i": "3403",
322
+ "native_tokenizers": [],
323
  "scripts": [
324
  "Latn"
325
+ ]
 
326
  },
327
  {
328
  "name": "Kuna, Border",
329
  "iso_1_code": null,
330
  "iso_3_code": "kvn",
 
331
  "children": [],
332
+ "tokenizers": {},
333
  "node_i": "3404",
334
+ "native_tokenizers": [],
335
  "scripts": [
336
  "Latn"
337
+ ]
 
338
  }
339
  ],
340
+ "tokenizers": {},
341
  "node_i": "3402",
342
+ "native_tokenizers": [],
343
+ "scripts": []
344
  }
345
  ],
346
+ "tokenizers": {},
347
  "node_i": "3384",
348
+ "native_tokenizers": [],
349
+ "scripts": []
350
  },
351
  {
352
  "name": "Votic",
353
  "iso_1_code": null,
354
  "iso_3_code": null,
 
355
  "children": [
356
  {
357
  "name": "Mal\u00e9ku Ja\u00edka",
358
  "iso_1_code": null,
359
  "iso_3_code": "gut",
 
360
  "children": [],
361
+ "tokenizers": {},
362
  "node_i": "3406",
363
+ "native_tokenizers": [],
364
+ "scripts": []
365
  },
366
  {
367
  "name": "Rama",
368
  "iso_1_code": null,
369
  "iso_3_code": "rma",
 
370
  "children": [],
371
+ "tokenizers": {},
372
  "node_i": "3407",
373
+ "native_tokenizers": [],
374
+ "scripts": []
375
  }
376
  ],
377
+ "tokenizers": {},
378
  "node_i": "3405",
379
+ "native_tokenizers": [],
380
+ "scripts": []
381
  }
382
  ],
383
+ "tokenizers": {},
384
  "node_i": "3382",
385
+ "native_tokenizers": [],
386
+ "scripts": []
387
  }
388
  ],
389
+ "tokenizers": {},
390
  "node_i": "3372",
391
+ "native_tokenizers": [],
392
+ "scripts": []
393
  }
data/Chimakuan.json CHANGED
@@ -2,30 +2,30 @@
2
  "name": "Chimakuan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Quileute",
9
  "iso_1_code": null,
10
  "iso_3_code": "qui",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3409",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Chemakum",
19
  "iso_1_code": null,
20
  "iso_3_code": "xch",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "3410",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  }
27
  ],
 
28
  "node_i": "3408",
29
- "scripts": [],
30
- "own_tokenizer": false
31
  }
 
2
  "name": "Chimakuan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Quileute",
8
  "iso_1_code": null,
9
  "iso_3_code": "qui",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3409",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Chemakum",
18
  "iso_1_code": null,
19
  "iso_3_code": "xch",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3410",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  }
26
  ],
27
+ "tokenizers": {},
28
  "node_i": "3408",
29
+ "native_tokenizers": [],
30
+ "scripts": []
31
  }
data/Chinookan.json CHANGED
@@ -2,41 +2,41 @@
2
  "name": "Chinookan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Chinook",
9
  "iso_1_code": null,
10
  "iso_3_code": "chh",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3412",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Upper Chinookan",
19
  "iso_1_code": null,
20
  "iso_3_code": null,
21
- "tokenizers": {},
22
  "children": [
23
  {
24
  "name": "Wasco-Wishram",
25
  "iso_1_code": null,
26
  "iso_3_code": "wac",
27
- "tokenizers": {},
28
  "children": [],
 
29
  "node_i": "3414",
30
- "scripts": [],
31
- "own_tokenizer": false
32
  }
33
  ],
 
34
  "node_i": "3413",
35
- "scripts": [],
36
- "own_tokenizer": false
37
  }
38
  ],
 
39
  "node_i": "3411",
40
- "scripts": [],
41
- "own_tokenizer": false
42
  }
 
2
  "name": "Chinookan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Chinook",
8
  "iso_1_code": null,
9
  "iso_3_code": "chh",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3412",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Upper Chinookan",
18
  "iso_1_code": null,
19
  "iso_3_code": null,
 
20
  "children": [
21
  {
22
  "name": "Wasco-Wishram",
23
  "iso_1_code": null,
24
  "iso_3_code": "wac",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3414",
28
+ "native_tokenizers": [],
29
+ "scripts": []
30
  }
31
  ],
32
+ "tokenizers": {},
33
  "node_i": "3413",
34
+ "native_tokenizers": [],
35
+ "scripts": []
36
  }
37
  ],
38
+ "tokenizers": {},
39
  "node_i": "3411",
40
+ "native_tokenizers": [],
41
+ "scripts": []
42
  }
data/Chipaya-Uru.json CHANGED
@@ -2,32 +2,32 @@
2
  "name": "Chipaya-Uru",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Chipaya",
9
  "iso_1_code": null,
10
  "iso_3_code": "cap",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3416",
 
14
  "scripts": [
15
  "Latn"
16
- ],
17
- "own_tokenizer": false
18
  },
19
  {
20
  "name": "Uru",
21
  "iso_1_code": null,
22
  "iso_3_code": "ure",
23
- "tokenizers": {},
24
  "children": [],
 
25
  "node_i": "3417",
26
- "scripts": [],
27
- "own_tokenizer": false
28
  }
29
  ],
 
30
  "node_i": "3415",
31
- "scripts": [],
32
- "own_tokenizer": false
33
  }
 
2
  "name": "Chipaya-Uru",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Chipaya",
8
  "iso_1_code": null,
9
  "iso_3_code": "cap",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3416",
13
+ "native_tokenizers": [],
14
  "scripts": [
15
  "Latn"
16
+ ]
 
17
  },
18
  {
19
  "name": "Uru",
20
  "iso_1_code": null,
21
  "iso_3_code": "ure",
 
22
  "children": [],
23
+ "tokenizers": {},
24
  "node_i": "3417",
25
+ "native_tokenizers": [],
26
+ "scripts": []
27
  }
28
  ],
29
+ "tokenizers": {},
30
  "node_i": "3415",
31
+ "native_tokenizers": [],
32
+ "scripts": []
33
  }
data/Chocoan.json CHANGED
@@ -2,121 +2,121 @@
2
  "name": "Chocoan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Woun Meu",
9
  "iso_1_code": null,
10
  "iso_3_code": "noa",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3419",
 
14
  "scripts": [
15
  "Latn"
16
- ],
17
- "own_tokenizer": false
18
  },
19
  {
20
  "name": "Ember\u00e1",
21
  "iso_1_code": null,
22
  "iso_3_code": null,
23
- "tokenizers": {},
24
  "children": [
25
  {
26
  "name": "Northern Ember\u00e1",
27
  "iso_1_code": null,
28
  "iso_3_code": null,
29
- "tokenizers": {},
30
  "children": [
31
  {
32
  "name": "Embera Cat\u00edo",
33
  "iso_1_code": null,
34
  "iso_3_code": "cto",
35
- "tokenizers": {},
36
  "children": [],
 
37
  "node_i": "3422",
 
38
  "scripts": [
39
  "Latn"
40
- ],
41
- "own_tokenizer": false
42
  },
43
  {
44
  "name": "Ember\u00e1, Northern",
45
  "iso_1_code": null,
46
  "iso_3_code": "emp",
47
- "tokenizers": {},
48
  "children": [],
 
49
  "node_i": "3423",
 
50
  "scripts": [
51
  "Latn"
52
- ],
53
- "own_tokenizer": false
54
  }
55
  ],
 
56
  "node_i": "3421",
57
- "scripts": [],
58
- "own_tokenizer": false
59
  },
60
  {
61
  "name": "Southern Ember\u00e1",
62
  "iso_1_code": null,
63
  "iso_3_code": null,
64
- "tokenizers": {},
65
  "children": [
66
  {
67
  "name": "Embera Baud\u00f3",
68
  "iso_1_code": null,
69
  "iso_3_code": "bdc",
70
- "tokenizers": {},
71
  "children": [],
 
72
  "node_i": "3425",
73
- "scripts": [],
74
- "own_tokenizer": false
75
  },
76
  {
77
  "name": "Embera Cham\u00ed",
78
  "iso_1_code": null,
79
  "iso_3_code": "cmi",
80
- "tokenizers": {},
81
  "children": [],
 
82
  "node_i": "3426",
83
- "scripts": [],
84
- "own_tokenizer": false
85
  },
86
  {
87
  "name": "Epena",
88
  "iso_1_code": null,
89
  "iso_3_code": "sja",
90
- "tokenizers": {},
91
  "children": [],
 
92
  "node_i": "3427",
 
93
  "scripts": [
94
  "Latn"
95
- ],
96
- "own_tokenizer": false
97
  },
98
  {
99
  "name": "Embera Tad\u00f3",
100
  "iso_1_code": null,
101
  "iso_3_code": "tdc",
102
- "tokenizers": {},
103
  "children": [],
 
104
  "node_i": "3428",
105
- "scripts": [],
106
- "own_tokenizer": false
107
  }
108
  ],
 
109
  "node_i": "3424",
110
- "scripts": [],
111
- "own_tokenizer": false
112
  }
113
  ],
 
114
  "node_i": "3420",
115
- "scripts": [],
116
- "own_tokenizer": false
117
  }
118
  ],
 
119
  "node_i": "3418",
120
- "scripts": [],
121
- "own_tokenizer": false
122
  }
 
2
  "name": "Chocoan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Woun Meu",
8
  "iso_1_code": null,
9
  "iso_3_code": "noa",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3419",
13
+ "native_tokenizers": [],
14
  "scripts": [
15
  "Latn"
16
+ ]
 
17
  },
18
  {
19
  "name": "Ember\u00e1",
20
  "iso_1_code": null,
21
  "iso_3_code": null,
 
22
  "children": [
23
  {
24
  "name": "Northern Ember\u00e1",
25
  "iso_1_code": null,
26
  "iso_3_code": null,
 
27
  "children": [
28
  {
29
  "name": "Embera Cat\u00edo",
30
  "iso_1_code": null,
31
  "iso_3_code": "cto",
 
32
  "children": [],
33
+ "tokenizers": {},
34
  "node_i": "3422",
35
+ "native_tokenizers": [],
36
  "scripts": [
37
  "Latn"
38
+ ]
 
39
  },
40
  {
41
  "name": "Ember\u00e1, Northern",
42
  "iso_1_code": null,
43
  "iso_3_code": "emp",
 
44
  "children": [],
45
+ "tokenizers": {},
46
  "node_i": "3423",
47
+ "native_tokenizers": [],
48
  "scripts": [
49
  "Latn"
50
+ ]
 
51
  }
52
  ],
53
+ "tokenizers": {},
54
  "node_i": "3421",
55
+ "native_tokenizers": [],
56
+ "scripts": []
57
  },
58
  {
59
  "name": "Southern Ember\u00e1",
60
  "iso_1_code": null,
61
  "iso_3_code": null,
 
62
  "children": [
63
  {
64
  "name": "Embera Baud\u00f3",
65
  "iso_1_code": null,
66
  "iso_3_code": "bdc",
 
67
  "children": [],
68
+ "tokenizers": {},
69
  "node_i": "3425",
70
+ "native_tokenizers": [],
71
+ "scripts": []
72
  },
73
  {
74
  "name": "Embera Cham\u00ed",
75
  "iso_1_code": null,
76
  "iso_3_code": "cmi",
 
77
  "children": [],
78
+ "tokenizers": {},
79
  "node_i": "3426",
80
+ "native_tokenizers": [],
81
+ "scripts": []
82
  },
83
  {
84
  "name": "Epena",
85
  "iso_1_code": null,
86
  "iso_3_code": "sja",
 
87
  "children": [],
88
+ "tokenizers": {},
89
  "node_i": "3427",
90
+ "native_tokenizers": [],
91
  "scripts": [
92
  "Latn"
93
+ ]
 
94
  },
95
  {
96
  "name": "Embera Tad\u00f3",
97
  "iso_1_code": null,
98
  "iso_3_code": "tdc",
 
99
  "children": [],
100
+ "tokenizers": {},
101
  "node_i": "3428",
102
+ "native_tokenizers": [],
103
+ "scripts": []
104
  }
105
  ],
106
+ "tokenizers": {},
107
  "node_i": "3424",
108
+ "native_tokenizers": [],
109
+ "scripts": []
110
  }
111
  ],
112
+ "tokenizers": {},
113
  "node_i": "3420",
114
+ "native_tokenizers": [],
115
+ "scripts": []
116
  }
117
  ],
118
+ "tokenizers": {},
119
  "node_i": "3418",
120
+ "native_tokenizers": [],
121
+ "scripts": []
122
  }
data/Cholonan.json CHANGED
@@ -2,30 +2,30 @@
2
  "name": "Cholonan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Chol\u00f3n",
9
  "iso_1_code": null,
10
  "iso_3_code": "cht",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3430",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Hibito",
19
  "iso_1_code": null,
20
  "iso_3_code": "hib",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "3431",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  }
27
  ],
 
28
  "node_i": "3429",
29
- "scripts": [],
30
- "own_tokenizer": false
31
  }
 
2
  "name": "Cholonan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Chol\u00f3n",
8
  "iso_1_code": null,
9
  "iso_3_code": "cht",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3430",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Hibito",
18
  "iso_1_code": null,
19
  "iso_3_code": "hib",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3431",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  }
26
  ],
27
+ "tokenizers": {},
28
  "node_i": "3429",
29
+ "native_tokenizers": [],
30
+ "scripts": []
31
  }
data/Chon.json CHANGED
@@ -2,41 +2,41 @@
2
  "name": "Chon",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Tehuelche",
9
  "iso_1_code": null,
10
  "iso_3_code": "teh",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3433",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Island Chon",
19
  "iso_1_code": null,
20
  "iso_3_code": null,
21
- "tokenizers": {},
22
  "children": [
23
  {
24
  "name": "Ona",
25
  "iso_1_code": null,
26
  "iso_3_code": "ona",
27
- "tokenizers": {},
28
  "children": [],
 
29
  "node_i": "3435",
30
- "scripts": [],
31
- "own_tokenizer": false
32
  }
33
  ],
 
34
  "node_i": "3434",
35
- "scripts": [],
36
- "own_tokenizer": false
37
  }
38
  ],
 
39
  "node_i": "3432",
40
- "scripts": [],
41
- "own_tokenizer": false
42
  }
 
2
  "name": "Chon",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Tehuelche",
8
  "iso_1_code": null,
9
  "iso_3_code": "teh",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3433",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Island Chon",
18
  "iso_1_code": null,
19
  "iso_3_code": null,
 
20
  "children": [
21
  {
22
  "name": "Ona",
23
  "iso_1_code": null,
24
  "iso_3_code": "ona",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3435",
28
+ "native_tokenizers": [],
29
+ "scripts": []
30
  }
31
  ],
32
+ "tokenizers": {},
33
  "node_i": "3434",
34
+ "native_tokenizers": [],
35
+ "scripts": []
36
  }
37
  ],
38
+ "tokenizers": {},
39
  "node_i": "3432",
40
+ "native_tokenizers": [],
41
+ "scripts": []
42
  }
data/Chukotko-Kamchatkan.json CHANGED
@@ -2,108 +2,108 @@
2
  "name": "Chukotko-Kamchatkan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Northern",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Chukot",
15
  "iso_1_code": null,
16
  "iso_3_code": null,
17
- "tokenizers": {},
18
  "children": [
19
  {
20
  "name": "Chukchi",
21
  "iso_1_code": null,
22
  "iso_3_code": "ckt",
23
- "tokenizers": {},
24
  "children": [],
 
25
  "node_i": "3439",
 
26
  "scripts": [
27
  "Cyrl"
28
- ],
29
- "own_tokenizer": false
30
  }
31
  ],
 
32
  "node_i": "3438",
33
- "scripts": [],
34
- "own_tokenizer": false
35
  },
36
  {
37
  "name": "Koryak-Alyutor",
38
  "iso_1_code": null,
39
  "iso_3_code": null,
40
- "tokenizers": {},
41
  "children": [
42
  {
43
  "name": "Alutor",
44
  "iso_1_code": null,
45
  "iso_3_code": "alr",
46
- "tokenizers": {},
47
  "children": [],
 
48
  "node_i": "3441",
49
- "scripts": [],
50
- "own_tokenizer": false
51
  },
52
  {
53
  "name": "Koryak",
54
  "iso_1_code": null,
55
  "iso_3_code": "kpy",
56
- "tokenizers": {},
57
  "children": [],
 
58
  "node_i": "3442",
59
- "scripts": [],
60
- "own_tokenizer": false
61
  },
62
  {
63
  "name": "Kerek",
64
  "iso_1_code": null,
65
  "iso_3_code": "krk",
66
- "tokenizers": {},
67
  "children": [],
 
68
  "node_i": "3443",
69
- "scripts": [],
70
- "own_tokenizer": false
71
  }
72
  ],
 
73
  "node_i": "3440",
74
- "scripts": [],
75
- "own_tokenizer": false
76
  }
77
  ],
 
78
  "node_i": "3437",
79
- "scripts": [],
80
- "own_tokenizer": false
81
  },
82
  {
83
  "name": "Southern",
84
  "iso_1_code": null,
85
  "iso_3_code": null,
86
- "tokenizers": {},
87
  "children": [
88
  {
89
  "name": "Itelmen",
90
  "iso_1_code": null,
91
  "iso_3_code": "itl",
92
- "tokenizers": {},
93
  "children": [],
 
94
  "node_i": "3445",
 
95
  "scripts": [
96
  "Cyrl"
97
- ],
98
- "own_tokenizer": false
99
  }
100
  ],
 
101
  "node_i": "3444",
102
- "scripts": [],
103
- "own_tokenizer": false
104
  }
105
  ],
 
106
  "node_i": "3436",
107
- "scripts": [],
108
- "own_tokenizer": false
109
  }
 
2
  "name": "Chukotko-Kamchatkan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Northern",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Chukot",
13
  "iso_1_code": null,
14
  "iso_3_code": null,
 
15
  "children": [
16
  {
17
  "name": "Chukchi",
18
  "iso_1_code": null,
19
  "iso_3_code": "ckt",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3439",
23
+ "native_tokenizers": [],
24
  "scripts": [
25
  "Cyrl"
26
+ ]
 
27
  }
28
  ],
29
+ "tokenizers": {},
30
  "node_i": "3438",
31
+ "native_tokenizers": [],
32
+ "scripts": []
33
  },
34
  {
35
  "name": "Koryak-Alyutor",
36
  "iso_1_code": null,
37
  "iso_3_code": null,
 
38
  "children": [
39
  {
40
  "name": "Alutor",
41
  "iso_1_code": null,
42
  "iso_3_code": "alr",
 
43
  "children": [],
44
+ "tokenizers": {},
45
  "node_i": "3441",
46
+ "native_tokenizers": [],
47
+ "scripts": []
48
  },
49
  {
50
  "name": "Koryak",
51
  "iso_1_code": null,
52
  "iso_3_code": "kpy",
 
53
  "children": [],
54
+ "tokenizers": {},
55
  "node_i": "3442",
56
+ "native_tokenizers": [],
57
+ "scripts": []
58
  },
59
  {
60
  "name": "Kerek",
61
  "iso_1_code": null,
62
  "iso_3_code": "krk",
 
63
  "children": [],
64
+ "tokenizers": {},
65
  "node_i": "3443",
66
+ "native_tokenizers": [],
67
+ "scripts": []
68
  }
69
  ],
70
+ "tokenizers": {},
71
  "node_i": "3440",
72
+ "native_tokenizers": [],
73
+ "scripts": []
74
  }
75
  ],
76
+ "tokenizers": {},
77
  "node_i": "3437",
78
+ "native_tokenizers": [],
79
+ "scripts": []
80
  },
81
  {
82
  "name": "Southern",
83
  "iso_1_code": null,
84
  "iso_3_code": null,
 
85
  "children": [
86
  {
87
  "name": "Itelmen",
88
  "iso_1_code": null,
89
  "iso_3_code": "itl",
 
90
  "children": [],
91
+ "tokenizers": {},
92
  "node_i": "3445",
93
+ "native_tokenizers": [],
94
  "scripts": [
95
  "Cyrl"
96
+ ]
 
97
  }
98
  ],
99
+ "tokenizers": {},
100
  "node_i": "3444",
101
+ "native_tokenizers": [],
102
+ "scripts": []
103
  }
104
  ],
105
+ "tokenizers": {},
106
  "node_i": "3436",
107
+ "native_tokenizers": [],
108
+ "scripts": []
109
  }
data/Chumashan.json CHANGED
@@ -2,92 +2,92 @@
2
  "name": "Chumashan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Obispe\u00f1o",
9
  "iso_1_code": null,
10
  "iso_3_code": "obi",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3447",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Central Chumash",
19
  "iso_1_code": null,
20
  "iso_3_code": null,
21
- "tokenizers": {},
22
  "children": [
23
  {
24
  "name": "Barbare\u00f1o",
25
  "iso_1_code": null,
26
  "iso_3_code": "boi",
27
- "tokenizers": {},
28
  "children": [],
 
29
  "node_i": "3449",
30
- "scripts": [],
31
- "own_tokenizer": false
32
  },
33
  {
34
  "name": "Inese\u00f1o",
35
  "iso_1_code": null,
36
  "iso_3_code": "inz",
37
- "tokenizers": {},
38
  "children": [],
 
39
  "node_i": "3450",
40
- "scripts": [],
41
- "own_tokenizer": false
42
  },
43
  {
44
  "name": "Purisime\u00f1o",
45
  "iso_1_code": null,
46
  "iso_3_code": "puy",
47
- "tokenizers": {},
48
  "children": [],
 
49
  "node_i": "3451",
50
- "scripts": [],
51
- "own_tokenizer": false
52
  },
53
  {
54
  "name": "Venture\u00f1o",
55
  "iso_1_code": null,
56
  "iso_3_code": "veo",
57
- "tokenizers": {},
58
  "children": [],
 
59
  "node_i": "3452",
60
- "scripts": [],
61
- "own_tokenizer": false
62
  }
63
  ],
 
64
  "node_i": "3448",
65
- "scripts": [],
66
- "own_tokenizer": false
67
  },
68
  {
69
  "name": "Island Chumash",
70
  "iso_1_code": null,
71
  "iso_3_code": null,
72
- "tokenizers": {},
73
  "children": [
74
  {
75
  "name": "Cruze\u00f1o",
76
  "iso_1_code": null,
77
  "iso_3_code": "crz",
78
- "tokenizers": {},
79
  "children": [],
 
80
  "node_i": "3454",
81
- "scripts": [],
82
- "own_tokenizer": false
83
  }
84
  ],
 
85
  "node_i": "3453",
86
- "scripts": [],
87
- "own_tokenizer": false
88
  }
89
  ],
 
90
  "node_i": "3446",
91
- "scripts": [],
92
- "own_tokenizer": false
93
  }
 
2
  "name": "Chumashan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Obispe\u00f1o",
8
  "iso_1_code": null,
9
  "iso_3_code": "obi",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3447",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Central Chumash",
18
  "iso_1_code": null,
19
  "iso_3_code": null,
 
20
  "children": [
21
  {
22
  "name": "Barbare\u00f1o",
23
  "iso_1_code": null,
24
  "iso_3_code": "boi",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3449",
28
+ "native_tokenizers": [],
29
+ "scripts": []
30
  },
31
  {
32
  "name": "Inese\u00f1o",
33
  "iso_1_code": null,
34
  "iso_3_code": "inz",
 
35
  "children": [],
36
+ "tokenizers": {},
37
  "node_i": "3450",
38
+ "native_tokenizers": [],
39
+ "scripts": []
40
  },
41
  {
42
  "name": "Purisime\u00f1o",
43
  "iso_1_code": null,
44
  "iso_3_code": "puy",
 
45
  "children": [],
46
+ "tokenizers": {},
47
  "node_i": "3451",
48
+ "native_tokenizers": [],
49
+ "scripts": []
50
  },
51
  {
52
  "name": "Venture\u00f1o",
53
  "iso_1_code": null,
54
  "iso_3_code": "veo",
 
55
  "children": [],
56
+ "tokenizers": {},
57
  "node_i": "3452",
58
+ "native_tokenizers": [],
59
+ "scripts": []
60
  }
61
  ],
62
+ "tokenizers": {},
63
  "node_i": "3448",
64
+ "native_tokenizers": [],
65
+ "scripts": []
66
  },
67
  {
68
  "name": "Island Chumash",
69
  "iso_1_code": null,
70
  "iso_3_code": null,
 
71
  "children": [
72
  {
73
  "name": "Cruze\u00f1o",
74
  "iso_1_code": null,
75
  "iso_3_code": "crz",
 
76
  "children": [],
77
+ "tokenizers": {},
78
  "node_i": "3454",
79
+ "native_tokenizers": [],
80
+ "scripts": []
81
  }
82
  ],
83
+ "tokenizers": {},
84
  "node_i": "3453",
85
+ "native_tokenizers": [],
86
+ "scripts": []
87
  }
88
  ],
89
+ "tokenizers": {},
90
  "node_i": "3446",
91
+ "native_tokenizers": [],
92
+ "scripts": []
93
  }
data/Cochimí-Yuman.json CHANGED
@@ -2,155 +2,155 @@
2
  "name": "Cochim\u00ed-Yuman",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Yuman",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Cochimi",
15
  "iso_1_code": null,
16
  "iso_3_code": "coj",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "3457",
20
- "scripts": [],
21
- "own_tokenizer": false
22
  },
23
  {
24
  "name": "Kiliwa",
25
  "iso_1_code": null,
26
  "iso_3_code": "klb",
27
- "tokenizers": {},
28
  "children": [],
 
29
  "node_i": "3458",
30
- "scripts": [],
31
- "own_tokenizer": false
32
  },
33
  {
34
  "name": "Delta-California",
35
  "iso_1_code": null,
36
  "iso_3_code": null,
37
- "tokenizers": {},
38
  "children": [
39
  {
40
  "name": "Cocopa",
41
  "iso_1_code": null,
42
  "iso_3_code": "coc",
43
- "tokenizers": {},
44
  "children": [],
 
45
  "node_i": "3460",
46
- "scripts": [],
47
- "own_tokenizer": false
48
  },
49
  {
50
  "name": "Kumiai",
51
  "iso_1_code": null,
52
  "iso_3_code": "dih",
53
- "tokenizers": {},
54
  "children": [],
 
55
  "node_i": "3461",
56
- "scripts": [],
57
- "own_tokenizer": false
58
  }
59
  ],
 
60
  "node_i": "3459",
61
- "scripts": [],
62
- "own_tokenizer": false
63
  },
64
  {
65
  "name": "Pai",
66
  "iso_1_code": null,
67
  "iso_3_code": null,
68
- "tokenizers": {},
69
  "children": [
70
  {
71
  "name": "Paipai",
72
  "iso_1_code": null,
73
  "iso_3_code": "ppi",
74
- "tokenizers": {},
75
  "children": [],
 
76
  "node_i": "3463",
77
- "scripts": [],
78
- "own_tokenizer": false
79
  },
80
  {
81
  "name": "Havasupai-Walapai-Yavapai",
82
  "iso_1_code": null,
83
  "iso_3_code": "yuf",
84
- "tokenizers": {},
85
  "children": [],
 
86
  "node_i": "3464",
87
- "scripts": [],
88
- "own_tokenizer": false
89
  }
90
  ],
 
91
  "node_i": "3462",
92
- "scripts": [],
93
- "own_tokenizer": false
94
  },
95
  {
96
  "name": "River",
97
  "iso_1_code": null,
98
  "iso_3_code": null,
99
- "tokenizers": {},
100
  "children": [
101
  {
102
  "name": "Mojave",
103
  "iso_1_code": null,
104
  "iso_3_code": null,
105
- "tokenizers": {},
106
  "children": [
107
  {
108
  "name": "Mohave",
109
  "iso_1_code": null,
110
  "iso_3_code": "mov",
111
- "tokenizers": {},
112
  "children": [],
 
113
  "node_i": "3467",
114
- "scripts": [],
115
- "own_tokenizer": false
116
  },
117
  {
118
  "name": "Maricopa",
119
  "iso_1_code": null,
120
  "iso_3_code": "mrc",
121
- "tokenizers": {},
122
  "children": [],
 
123
  "node_i": "3468",
124
- "scripts": [],
125
- "own_tokenizer": false
126
  },
127
  {
128
  "name": "Quechan",
129
  "iso_1_code": null,
130
  "iso_3_code": "yum",
131
- "tokenizers": {},
132
  "children": [],
 
133
  "node_i": "3469",
134
- "scripts": [],
135
- "own_tokenizer": false
136
  }
137
  ],
 
138
  "node_i": "3466",
139
- "scripts": [],
140
- "own_tokenizer": false
141
  }
142
  ],
 
143
  "node_i": "3465",
144
- "scripts": [],
145
- "own_tokenizer": false
146
  }
147
  ],
 
148
  "node_i": "3456",
149
- "scripts": [],
150
- "own_tokenizer": false
151
  }
152
  ],
 
153
  "node_i": "3455",
154
- "scripts": [],
155
- "own_tokenizer": false
156
  }
 
2
  "name": "Cochim\u00ed-Yuman",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Yuman",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Cochimi",
13
  "iso_1_code": null,
14
  "iso_3_code": "coj",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3457",
18
+ "native_tokenizers": [],
19
+ "scripts": []
20
  },
21
  {
22
  "name": "Kiliwa",
23
  "iso_1_code": null,
24
  "iso_3_code": "klb",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3458",
28
+ "native_tokenizers": [],
29
+ "scripts": []
30
  },
31
  {
32
  "name": "Delta-California",
33
  "iso_1_code": null,
34
  "iso_3_code": null,
 
35
  "children": [
36
  {
37
  "name": "Cocopa",
38
  "iso_1_code": null,
39
  "iso_3_code": "coc",
 
40
  "children": [],
41
+ "tokenizers": {},
42
  "node_i": "3460",
43
+ "native_tokenizers": [],
44
+ "scripts": []
45
  },
46
  {
47
  "name": "Kumiai",
48
  "iso_1_code": null,
49
  "iso_3_code": "dih",
 
50
  "children": [],
51
+ "tokenizers": {},
52
  "node_i": "3461",
53
+ "native_tokenizers": [],
54
+ "scripts": []
55
  }
56
  ],
57
+ "tokenizers": {},
58
  "node_i": "3459",
59
+ "native_tokenizers": [],
60
+ "scripts": []
61
  },
62
  {
63
  "name": "Pai",
64
  "iso_1_code": null,
65
  "iso_3_code": null,
 
66
  "children": [
67
  {
68
  "name": "Paipai",
69
  "iso_1_code": null,
70
  "iso_3_code": "ppi",
 
71
  "children": [],
72
+ "tokenizers": {},
73
  "node_i": "3463",
74
+ "native_tokenizers": [],
75
+ "scripts": []
76
  },
77
  {
78
  "name": "Havasupai-Walapai-Yavapai",
79
  "iso_1_code": null,
80
  "iso_3_code": "yuf",
 
81
  "children": [],
82
+ "tokenizers": {},
83
  "node_i": "3464",
84
+ "native_tokenizers": [],
85
+ "scripts": []
86
  }
87
  ],
88
+ "tokenizers": {},
89
  "node_i": "3462",
90
+ "native_tokenizers": [],
91
+ "scripts": []
92
  },
93
  {
94
  "name": "River",
95
  "iso_1_code": null,
96
  "iso_3_code": null,
 
97
  "children": [
98
  {
99
  "name": "Mojave",
100
  "iso_1_code": null,
101
  "iso_3_code": null,
 
102
  "children": [
103
  {
104
  "name": "Mohave",
105
  "iso_1_code": null,
106
  "iso_3_code": "mov",
 
107
  "children": [],
108
+ "tokenizers": {},
109
  "node_i": "3467",
110
+ "native_tokenizers": [],
111
+ "scripts": []
112
  },
113
  {
114
  "name": "Maricopa",
115
  "iso_1_code": null,
116
  "iso_3_code": "mrc",
 
117
  "children": [],
118
+ "tokenizers": {},
119
  "node_i": "3468",
120
+ "native_tokenizers": [],
121
+ "scripts": []
122
  },
123
  {
124
  "name": "Quechan",
125
  "iso_1_code": null,
126
  "iso_3_code": "yum",
 
127
  "children": [],
128
+ "tokenizers": {},
129
  "node_i": "3469",
130
+ "native_tokenizers": [],
131
+ "scripts": []
132
  }
133
  ],
134
+ "tokenizers": {},
135
  "node_i": "3466",
136
+ "native_tokenizers": [],
137
+ "scripts": []
138
  }
139
  ],
140
+ "tokenizers": {},
141
  "node_i": "3465",
142
+ "native_tokenizers": [],
143
+ "scripts": []
144
  }
145
  ],
146
+ "tokenizers": {},
147
  "node_i": "3456",
148
+ "native_tokenizers": [],
149
+ "scripts": []
150
  }
151
  ],
152
+ "tokenizers": {},
153
  "node_i": "3455",
154
+ "native_tokenizers": [],
155
+ "scripts": []
156
  }
data/Comecrudan.json CHANGED
@@ -2,60 +2,60 @@
2
  "name": "Comecrudan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Mamulique",
9
  "iso_1_code": null,
10
  "iso_3_code": "emm",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3471",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Comecrudo",
19
  "iso_1_code": null,
20
  "iso_3_code": "xcm",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "3472",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  },
27
  {
28
  "name": "Cotoname",
29
  "iso_1_code": null,
30
  "iso_3_code": "xcn",
31
- "tokenizers": {},
32
  "children": [],
 
33
  "node_i": "3473",
34
- "scripts": [],
35
- "own_tokenizer": false
36
  },
37
  {
38
  "name": "Coahuilteco",
39
  "iso_1_code": null,
40
  "iso_3_code": "xcw",
41
- "tokenizers": {},
42
  "children": [],
 
43
  "node_i": "3474",
44
- "scripts": [],
45
- "own_tokenizer": false
46
  },
47
  {
48
  "name": "Garza",
49
  "iso_1_code": null,
50
  "iso_3_code": "xgr",
51
- "tokenizers": {},
52
  "children": [],
 
53
  "node_i": "3475",
54
- "scripts": [],
55
- "own_tokenizer": false
56
  }
57
  ],
 
58
  "node_i": "3470",
59
- "scripts": [],
60
- "own_tokenizer": false
61
  }
 
2
  "name": "Comecrudan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Mamulique",
8
  "iso_1_code": null,
9
  "iso_3_code": "emm",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3471",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Comecrudo",
18
  "iso_1_code": null,
19
  "iso_3_code": "xcm",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3472",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  },
26
  {
27
  "name": "Cotoname",
28
  "iso_1_code": null,
29
  "iso_3_code": "xcn",
 
30
  "children": [],
31
+ "tokenizers": {},
32
  "node_i": "3473",
33
+ "native_tokenizers": [],
34
+ "scripts": []
35
  },
36
  {
37
  "name": "Coahuilteco",
38
  "iso_1_code": null,
39
  "iso_3_code": "xcw",
 
40
  "children": [],
41
+ "tokenizers": {},
42
  "node_i": "3474",
43
+ "native_tokenizers": [],
44
+ "scripts": []
45
  },
46
  {
47
  "name": "Garza",
48
  "iso_1_code": null,
49
  "iso_3_code": "xgr",
 
50
  "children": [],
51
+ "tokenizers": {},
52
  "node_i": "3475",
53
+ "native_tokenizers": [],
54
+ "scripts": []
55
  }
56
  ],
57
+ "tokenizers": {},
58
  "node_i": "3470",
59
+ "native_tokenizers": [],
60
+ "scripts": []
61
  }
data/Constructed language.json CHANGED
@@ -2,22 +2,22 @@
2
  "name": "Constructed language",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Esperanto",
9
  "iso_1_code": "eo",
10
  "iso_3_code": "epo",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3477",
 
14
  "scripts": [
15
  "Latn"
16
- ],
17
- "own_tokenizer": false
18
  }
19
  ],
 
20
  "node_i": "3476",
21
- "scripts": [],
22
- "own_tokenizer": false
23
  }
 
2
  "name": "Constructed language",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Esperanto",
8
  "iso_1_code": "eo",
9
  "iso_3_code": "epo",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3477",
13
+ "native_tokenizers": [],
14
  "scripts": [
15
  "Latn"
16
+ ]
 
17
  }
18
  ],
19
+ "tokenizers": {},
20
  "node_i": "3476",
21
+ "native_tokenizers": [],
22
+ "scripts": []
23
  }
data/Coosan.json CHANGED
@@ -2,30 +2,30 @@
2
  "name": "Coosan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Coos",
9
  "iso_1_code": null,
10
  "iso_3_code": "csz",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3479",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Miluk",
19
  "iso_1_code": null,
20
  "iso_3_code": "iml",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "3480",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  }
27
  ],
 
28
  "node_i": "3478",
29
- "scripts": [],
30
- "own_tokenizer": false
31
  }
 
2
  "name": "Coosan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Coos",
8
  "iso_1_code": null,
9
  "iso_3_code": "csz",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3479",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Miluk",
18
  "iso_1_code": null,
19
  "iso_3_code": "iml",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3480",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  }
26
  ],
27
+ "tokenizers": {},
28
  "node_i": "3478",
29
+ "native_tokenizers": [],
30
+ "scripts": []
31
  }
data/Creole.json CHANGED
@@ -2,2288 +2,1742 @@
2
  "name": "Creole",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {
6
- "Arab": {
7
- "full_object": "SpaCyTokenizer(\"ms\")",
8
- "original_lang_name": "malay",
9
- "original_lang_code": "msa",
10
- "scripts": [
11
- "Latn",
12
- "Arab",
13
- "Thai"
14
- ],
15
- "class_name": "SpaCyTokenizer",
16
- "macrolanguage": true
17
- },
18
- "Latn": {
19
- "full_object": "SpaCyTokenizer(\"ms\")",
20
- "original_lang_name": "malay",
21
- "original_lang_code": "msa",
22
- "scripts": [
23
- "Latn",
24
- "Arab",
25
- "Thai"
26
- ],
27
- "class_name": "SpaCyTokenizer",
28
- "macrolanguage": true
29
- },
30
- "Thai": {
31
- "full_object": "SpaCyTokenizer(\"ms\")",
32
- "original_lang_name": "malay",
33
- "original_lang_code": "msa",
34
- "scripts": [
35
- "Latn",
36
- "Arab",
37
- "Thai"
38
- ],
39
- "class_name": "SpaCyTokenizer",
40
- "macrolanguage": true
41
- }
42
- },
43
  "children": [
44
  {
45
  "name": "Afrikaans based",
46
  "iso_1_code": null,
47
  "iso_3_code": null,
48
- "tokenizers": {},
49
  "children": [
50
  {
51
  "name": "Flaaitaal",
52
  "iso_1_code": null,
53
  "iso_3_code": "fly",
54
- "tokenizers": {},
55
  "children": [],
 
56
  "node_i": "3483",
57
- "scripts": [],
58
- "own_tokenizer": false
59
  },
60
  {
61
  "name": "Oorlams",
62
  "iso_1_code": null,
63
  "iso_3_code": "oor",
64
- "tokenizers": {},
65
  "children": [],
 
66
  "node_i": "3484",
67
- "scripts": [],
68
- "own_tokenizer": false
69
  }
70
  ],
 
71
  "node_i": "3482",
72
- "scripts": [],
73
- "own_tokenizer": false
74
  },
75
  {
76
  "name": "Arabic based",
77
  "iso_1_code": null,
78
  "iso_3_code": null,
79
- "tokenizers": {
80
- "Arab": {
81
- "full_object": "SpaCyTokenizer(\"ar\")",
82
- "original_lang_name": "arabic",
83
- "original_lang_code": "ara",
84
- "scripts": [
85
- "Arab"
86
- ],
87
- "class_name": "SpaCyTokenizer",
88
- "macrolanguage": true
89
- }
90
- },
91
  "children": [
92
  {
93
  "name": "Nubi",
94
  "iso_1_code": null,
95
  "iso_3_code": "kcn",
96
- "tokenizers": {},
97
  "children": [],
 
98
  "node_i": "3486",
99
- "scripts": [],
100
- "own_tokenizer": false
101
  },
102
  {
103
  "name": "Arabic, Juba",
104
  "iso_1_code": "ar",
105
  "iso_3_code": "pga",
106
- "tokenizers": {
107
- "Arab": {
108
- "full_object": "SpaCyTokenizer(\"ar\")",
109
- "original_lang_name": "arabic",
110
- "original_lang_code": "ara",
111
- "scripts": [
112
- "Arab"
113
- ],
114
- "class_name": "SpaCyTokenizer",
115
- "macrolanguage": true
116
- }
117
- },
118
  "children": [],
 
119
  "node_i": "3487",
120
- "scripts": [],
121
- "own_tokenizer": true
122
  }
123
  ],
 
124
  "node_i": "3485",
125
- "scripts": [],
126
- "own_tokenizer": false
127
  },
128
  {
129
  "name": "Assamese based",
130
  "iso_1_code": null,
131
  "iso_3_code": null,
132
- "tokenizers": {},
133
  "children": [
134
  {
135
  "name": "Nagamese",
136
  "iso_1_code": null,
137
  "iso_3_code": "nag",
138
- "tokenizers": {},
139
  "children": [],
 
140
  "node_i": "3489",
141
- "scripts": [],
142
- "own_tokenizer": false
143
  }
144
  ],
 
145
  "node_i": "3488",
146
- "scripts": [],
147
- "own_tokenizer": false
148
  },
149
  {
150
  "name": "Dutch based",
151
  "iso_1_code": null,
152
  "iso_3_code": null,
153
- "tokenizers": {},
154
  "children": [
155
  {
156
  "name": "Berbice Dutch Creole",
157
  "iso_1_code": null,
158
  "iso_3_code": "brc",
159
- "tokenizers": {},
160
  "children": [],
 
161
  "node_i": "3491",
162
- "scripts": [],
163
- "own_tokenizer": false
164
  },
165
  {
166
  "name": "Negerhollands",
167
  "iso_1_code": null,
168
  "iso_3_code": "dcr",
169
- "tokenizers": {},
170
  "children": [],
 
171
  "node_i": "3492",
172
- "scripts": [],
173
- "own_tokenizer": false
174
  },
175
  {
176
  "name": "Javindo",
177
  "iso_1_code": null,
178
  "iso_3_code": "jvd",
179
- "tokenizers": {},
180
  "children": [],
 
181
  "node_i": "3493",
182
- "scripts": [],
183
- "own_tokenizer": false
184
  },
185
  {
186
  "name": "Petjo",
187
  "iso_1_code": null,
188
  "iso_3_code": "pey",
189
- "tokenizers": {},
190
  "children": [],
 
191
  "node_i": "3494",
192
- "scripts": [],
193
- "own_tokenizer": false
194
  },
195
  {
196
  "name": "Skepi Dutch Creole",
197
  "iso_1_code": null,
198
  "iso_3_code": "skw",
199
- "tokenizers": {},
200
  "children": [],
 
201
  "node_i": "3495",
202
- "scripts": [],
203
- "own_tokenizer": false
204
  }
205
  ],
 
206
  "node_i": "3490",
207
- "scripts": [],
208
- "own_tokenizer": false
209
  },
210
  {
211
  "name": "English based",
212
  "iso_1_code": null,
213
  "iso_3_code": null,
214
- "tokenizers": {
215
- "Latn": {
216
- "full_object": "StanzaTokenizer(\"pcm\")",
217
- "original_lang_name": "nigerian_pidgin",
218
- "original_lang_code": "pcm",
219
- "scripts": [
220
- "Latn"
221
- ],
222
- "class_name": "StanzaTokenizer",
223
- "macrolanguage": false
224
- }
225
- },
226
  "children": [
227
  {
228
  "name": "Saramaccan",
229
  "iso_1_code": null,
230
  "iso_3_code": "srm",
 
231
  "tokenizers": {
232
  "Latn": {
233
  "full_object": "StanzaTokenizer(\"pcm\")",
234
  "original_lang_name": "nigerian_pidgin",
235
  "original_lang_code": "pcm",
236
- "scripts": [
237
- "Latn"
238
- ],
239
- "class_name": "StanzaTokenizer",
240
- "macrolanguage": false
241
  }
242
  },
243
- "children": [],
244
  "node_i": "3497",
 
245
  "scripts": [
246
  "Latn"
247
- ],
248
- "own_tokenizer": false
249
  },
250
  {
251
  "name": "Atlantic",
252
  "iso_1_code": null,
253
  "iso_3_code": null,
254
- "tokenizers": {
255
- "Latn": {
256
- "full_object": "StanzaTokenizer(\"pcm\")",
257
- "original_lang_name": "nigerian_pidgin",
258
- "original_lang_code": "pcm",
259
- "scripts": [
260
- "Latn"
261
- ],
262
- "class_name": "StanzaTokenizer",
263
- "macrolanguage": false
264
- }
265
- },
266
  "children": [
267
  {
268
  "name": "Eastern",
269
  "iso_1_code": null,
270
  "iso_3_code": null,
271
- "tokenizers": {
272
- "Latn": {
273
- "full_object": "StanzaTokenizer(\"pcm\")",
274
- "original_lang_name": "nigerian_pidgin",
275
- "original_lang_code": "pcm",
276
- "scripts": [
277
- "Latn"
278
- ],
279
- "class_name": "StanzaTokenizer",
280
- "macrolanguage": false
281
- }
282
- },
283
  "children": [
284
  {
285
  "name": "Turks and Caicos English Creole",
286
  "iso_1_code": null,
287
  "iso_3_code": "tch",
288
- "tokenizers": {},
289
  "children": [],
 
290
  "node_i": "3500",
291
- "scripts": [],
292
- "own_tokenizer": false
293
  },
294
  {
295
  "name": "Northern",
296
  "iso_1_code": null,
297
  "iso_3_code": null,
298
- "tokenizers": {
299
- "Latn": {
300
- "full_object": "StanzaTokenizer(\"pcm\")",
301
- "original_lang_name": "nigerian_pidgin",
302
- "original_lang_code": "pcm",
303
- "scripts": [
304
- "Latn"
305
- ],
306
- "class_name": "StanzaTokenizer",
307
- "macrolanguage": false
308
- }
309
- },
310
  "children": [
311
  {
312
  "name": "Afro-Seminole Creole",
313
  "iso_1_code": null,
314
  "iso_3_code": "afs",
315
- "tokenizers": {},
316
  "children": [],
 
317
  "node_i": "3502",
318
- "scripts": [],
319
- "own_tokenizer": false
320
  },
321
  {
322
  "name": "Bahamas English Creole",
323
  "iso_1_code": null,
324
  "iso_3_code": "bah",
325
- "tokenizers": {},
326
  "children": [],
 
327
  "node_i": "3503",
328
- "scripts": [],
329
- "own_tokenizer": false
330
  },
331
  {
332
  "name": "Sea Island English Creole",
333
  "iso_1_code": null,
334
  "iso_3_code": "gul",
 
335
  "tokenizers": {
336
  "Latn": {
337
  "full_object": "StanzaTokenizer(\"pcm\")",
338
  "original_lang_name": "nigerian_pidgin",
339
  "original_lang_code": "pcm",
340
- "scripts": [
341
- "Latn"
342
- ],
343
- "class_name": "StanzaTokenizer",
344
- "macrolanguage": false
345
  }
346
  },
347
- "children": [],
348
  "node_i": "3504",
 
349
  "scripts": [
350
  "Latn"
351
- ],
352
- "own_tokenizer": false
353
  }
354
  ],
 
 
 
 
 
 
 
 
 
355
  "node_i": "3501",
356
- "scripts": [],
357
- "own_tokenizer": false
358
  },
359
  {
360
  "name": "Southern",
361
  "iso_1_code": null,
362
  "iso_3_code": null,
363
- "tokenizers": {},
364
  "children": [
365
  {
366
  "name": "Leeward Caribbean English Creole",
367
  "iso_1_code": null,
368
  "iso_3_code": "aig",
369
- "tokenizers": {},
370
  "children": [],
 
371
  "node_i": "3506",
372
- "scripts": [],
373
- "own_tokenizer": false
374
  },
375
  {
376
  "name": "Bajan",
377
  "iso_1_code": null,
378
  "iso_3_code": "bjs",
379
- "tokenizers": {},
380
  "children": [],
 
381
  "node_i": "3507",
382
- "scripts": [],
383
- "own_tokenizer": false
384
  },
385
  {
386
  "name": "Grenadian English Creole",
387
  "iso_1_code": null,
388
  "iso_3_code": "gcl",
389
- "tokenizers": {},
390
  "children": [],
 
391
  "node_i": "3508",
392
- "scripts": [],
393
- "own_tokenizer": false
394
  },
395
  {
396
  "name": "Guyanese English Creole",
397
  "iso_1_code": null,
398
  "iso_3_code": "gyn",
399
- "tokenizers": {},
400
  "children": [],
 
401
  "node_i": "3509",
402
- "scripts": [],
403
- "own_tokenizer": false
404
  },
405
  {
406
  "name": "Vincentian English Creole",
407
  "iso_1_code": null,
408
  "iso_3_code": "svc",
409
- "tokenizers": {},
410
  "children": [],
 
411
  "node_i": "3510",
412
- "scripts": [],
413
- "own_tokenizer": false
414
  },
415
  {
416
  "name": "Tobagonian English Creole",
417
  "iso_1_code": null,
418
  "iso_3_code": "tgh",
419
- "tokenizers": {},
420
  "children": [],
 
421
  "node_i": "3511",
422
- "scripts": [],
423
- "own_tokenizer": false
424
  },
425
  {
426
  "name": "Trinidadian English Creole",
427
  "iso_1_code": null,
428
  "iso_3_code": "trf",
429
- "tokenizers": {},
430
  "children": [],
 
431
  "node_i": "3512",
432
- "scripts": [],
433
- "own_tokenizer": false
434
  },
435
  {
436
  "name": "Virgin Islands English Creole",
437
  "iso_1_code": null,
438
  "iso_3_code": "vic",
439
- "tokenizers": {},
440
  "children": [],
 
441
  "node_i": "3513",
442
- "scripts": [],
443
- "own_tokenizer": false
444
  }
445
  ],
 
446
  "node_i": "3505",
447
- "scripts": [],
448
- "own_tokenizer": false
449
  }
450
  ],
451
- "node_i": "3499",
452
- "scripts": [],
453
- "own_tokenizer": false
454
- },
455
- {
456
- "name": "Krio",
457
- "iso_1_code": null,
458
- "iso_3_code": null,
459
  "tokenizers": {
460
  "Latn": {
461
  "full_object": "StanzaTokenizer(\"pcm\")",
462
  "original_lang_name": "nigerian_pidgin",
463
  "original_lang_code": "pcm",
464
- "scripts": [
465
- "Latn"
466
- ],
467
- "class_name": "StanzaTokenizer",
468
- "macrolanguage": false
469
  }
470
  },
 
 
 
 
 
 
 
 
471
  "children": [
472
  {
473
  "name": "Equatorial Guinean Pidgin",
474
  "iso_1_code": null,
475
  "iso_3_code": "fpe",
476
- "tokenizers": {},
477
  "children": [],
 
478
  "node_i": "3515",
479
- "scripts": [],
480
- "own_tokenizer": false
481
  },
482
  {
483
  "name": "Ghanaian Pidgin English",
484
  "iso_1_code": null,
485
  "iso_3_code": "gpe",
486
- "tokenizers": {},
487
  "children": [],
 
488
  "node_i": "3516",
489
- "scripts": [],
490
- "own_tokenizer": false
491
  },
492
  {
493
  "name": "Krio",
494
  "iso_1_code": null,
495
  "iso_3_code": "kri",
 
496
  "tokenizers": {
497
  "Latn": {
498
  "full_object": "StanzaTokenizer(\"pcm\")",
499
  "original_lang_name": "nigerian_pidgin",
500
  "original_lang_code": "pcm",
501
- "scripts": [
502
- "Latn"
503
- ],
504
- "class_name": "StanzaTokenizer",
505
- "macrolanguage": false
506
  }
507
  },
508
- "children": [],
509
  "node_i": "3517",
 
510
  "scripts": [
511
  "Latn"
512
- ],
513
- "own_tokenizer": false
514
  },
515
  {
516
  "name": "Pidgin, Nigerian",
517
  "iso_1_code": null,
518
  "iso_3_code": "pcm",
 
519
  "tokenizers": {
520
  "Latn": {
521
  "full_object": "StanzaTokenizer(\"pcm\")",
522
  "original_lang_name": "nigerian_pidgin",
523
  "original_lang_code": "pcm",
524
- "scripts": [
525
- "Latn"
526
- ],
527
- "class_name": "StanzaTokenizer",
528
- "macrolanguage": false
529
  }
530
  },
531
- "children": [],
532
  "node_i": "3518",
533
- "scripts": [
534
  "Latn"
535
  ],
536
- "own_tokenizer": true
 
 
537
  },
538
  {
539
  "name": "Pidgin, Cameroon",
540
  "iso_1_code": null,
541
  "iso_3_code": "wes",
 
542
  "tokenizers": {
543
  "Latn": {
544
  "full_object": "StanzaTokenizer(\"pcm\")",
545
  "original_lang_name": "nigerian_pidgin",
546
  "original_lang_code": "pcm",
547
- "scripts": [
548
- "Latn"
549
- ],
550
- "class_name": "StanzaTokenizer",
551
- "macrolanguage": false
552
  }
553
  },
554
- "children": [],
555
  "node_i": "3519",
 
556
  "scripts": [
557
  "Latn"
558
- ],
559
- "own_tokenizer": false
560
  }
561
  ],
562
- "node_i": "3514",
563
- "scripts": [],
564
- "own_tokenizer": false
565
- },
566
- {
567
- "name": "Suriname",
568
- "iso_1_code": null,
569
- "iso_3_code": null,
570
  "tokenizers": {
571
  "Latn": {
572
  "full_object": "StanzaTokenizer(\"pcm\")",
573
  "original_lang_name": "nigerian_pidgin",
574
  "original_lang_code": "pcm",
575
- "scripts": [
576
- "Latn"
577
- ],
578
- "class_name": "StanzaTokenizer",
579
- "macrolanguage": false
580
  }
581
  },
 
 
 
 
 
 
 
 
582
  "children": [
583
  {
584
  "name": "Sranan Tongo",
585
  "iso_1_code": null,
586
  "iso_3_code": "srn",
 
587
  "tokenizers": {
588
  "Latn": {
589
  "full_object": "StanzaTokenizer(\"pcm\")",
590
  "original_lang_name": "nigerian_pidgin",
591
  "original_lang_code": "pcm",
592
- "scripts": [
593
- "Latn"
594
- ],
595
- "class_name": "StanzaTokenizer",
596
- "macrolanguage": false
597
  }
598
  },
599
- "children": [],
600
  "node_i": "3521",
 
601
  "scripts": [
602
  "Latn"
603
- ],
604
- "own_tokenizer": false
605
  },
606
  {
607
  "name": "Ndyuka",
608
  "iso_1_code": null,
609
  "iso_3_code": null,
610
- "tokenizers": {
611
- "Latn": {
612
- "full_object": "StanzaTokenizer(\"pcm\")",
613
- "original_lang_name": "nigerian_pidgin",
614
- "original_lang_code": "pcm",
615
- "scripts": [
616
- "Latn"
617
- ],
618
- "class_name": "StanzaTokenizer",
619
- "macrolanguage": false
620
- }
621
- },
622
  "children": [
623
  {
624
  "name": "Aukan",
625
  "iso_1_code": null,
626
  "iso_3_code": "djk",
 
627
  "tokenizers": {
628
  "Latn": {
629
  "full_object": "StanzaTokenizer(\"pcm\")",
630
  "original_lang_name": "nigerian_pidgin",
631
  "original_lang_code": "pcm",
632
- "scripts": [
633
- "Latn"
634
- ],
635
- "class_name": "StanzaTokenizer",
636
- "macrolanguage": false
637
  }
638
  },
639
- "children": [],
640
  "node_i": "3523",
 
641
  "scripts": [
642
  "Latn"
643
- ],
644
- "own_tokenizer": false
645
  },
646
  {
647
  "name": "Kwinti",
648
  "iso_1_code": null,
649
  "iso_3_code": "kww",
650
- "tokenizers": {},
651
  "children": [],
 
652
  "node_i": "3524",
653
- "scripts": [],
654
- "own_tokenizer": false
655
  }
656
  ],
 
 
 
 
 
 
 
 
 
657
  "node_i": "3522",
658
- "scripts": [],
659
- "own_tokenizer": false
660
  }
661
  ],
662
- "node_i": "3520",
663
- "scripts": [],
664
- "own_tokenizer": false
665
- },
666
- {
667
- "name": "Western",
668
- "iso_1_code": null,
669
- "iso_3_code": null,
670
  "tokenizers": {
671
  "Latn": {
672
  "full_object": "StanzaTokenizer(\"pcm\")",
673
  "original_lang_name": "nigerian_pidgin",
674
  "original_lang_code": "pcm",
675
- "scripts": [
676
- "Latn"
677
- ],
678
- "class_name": "StanzaTokenizer",
679
- "macrolanguage": false
680
  }
681
  },
 
 
 
 
 
 
 
 
682
  "children": [
683
  {
684
  "name": "Belize English Creole",
685
  "iso_1_code": null,
686
  "iso_3_code": "bzj",
 
687
  "tokenizers": {
688
  "Latn": {
689
  "full_object": "StanzaTokenizer(\"pcm\")",
690
  "original_lang_name": "nigerian_pidgin",
691
  "original_lang_code": "pcm",
692
- "scripts": [
693
- "Latn"
694
- ],
695
- "class_name": "StanzaTokenizer",
696
- "macrolanguage": false
697
  }
698
  },
699
- "children": [],
700
  "node_i": "3526",
 
701
  "scripts": [
702
  "Latn"
703
- ],
704
- "own_tokenizer": false
705
  },
706
  {
707
  "name": "Nicaragua English Creole",
708
  "iso_1_code": null,
709
  "iso_3_code": "bzk",
710
- "tokenizers": {},
711
  "children": [],
 
712
  "node_i": "3527",
713
- "scripts": [],
714
- "own_tokenizer": false
715
  },
716
  {
717
  "name": "Islander English Creole",
718
  "iso_1_code": null,
719
  "iso_3_code": "icr",
 
720
  "tokenizers": {
721
  "Latn": {
722
  "full_object": "StanzaTokenizer(\"pcm\")",
723
  "original_lang_name": "nigerian_pidgin",
724
  "original_lang_code": "pcm",
725
- "scripts": [
726
- "Latn"
727
- ],
728
- "class_name": "StanzaTokenizer",
729
- "macrolanguage": false
730
  }
731
  },
732
- "children": [],
733
  "node_i": "3528",
 
734
  "scripts": [
735
  "Latn"
736
- ],
737
- "own_tokenizer": false
738
  },
739
  {
740
  "name": "Jamaican English Creole",
741
  "iso_1_code": null,
742
  "iso_3_code": "jam",
 
743
  "tokenizers": {
744
  "Latn": {
745
  "full_object": "StanzaTokenizer(\"pcm\")",
746
  "original_lang_name": "nigerian_pidgin",
747
  "original_lang_code": "pcm",
748
- "scripts": [
749
- "Latn"
750
- ],
751
- "class_name": "StanzaTokenizer",
752
- "macrolanguage": false
753
  }
754
  },
755
- "children": [],
756
  "node_i": "3529",
 
757
  "scripts": [
758
  "Latn"
759
- ],
760
- "own_tokenizer": false
761
  }
762
  ],
 
 
 
 
 
 
 
 
 
763
  "node_i": "3525",
764
- "scripts": [],
765
- "own_tokenizer": false
766
  }
767
  ],
768
- "node_i": "3498",
769
- "scripts": [],
770
- "own_tokenizer": false
771
- },
772
- {
773
- "name": "Pacific",
774
- "iso_1_code": null,
775
- "iso_3_code": null,
776
  "tokenizers": {
777
  "Latn": {
778
  "full_object": "StanzaTokenizer(\"pcm\")",
779
  "original_lang_name": "nigerian_pidgin",
780
  "original_lang_code": "pcm",
781
- "scripts": [
782
- "Latn"
783
- ],
784
- "class_name": "StanzaTokenizer",
785
- "macrolanguage": false
786
  }
787
  },
 
 
 
 
 
 
 
 
788
  "children": [
789
  {
790
  "name": "Bislama",
791
  "iso_1_code": "bi",
792
  "iso_3_code": "bis",
 
793
  "tokenizers": {
794
  "Latn": {
795
  "full_object": "StanzaTokenizer(\"pcm\")",
796
  "original_lang_name": "nigerian_pidgin",
797
  "original_lang_code": "pcm",
798
- "scripts": [
799
- "Latn"
800
- ],
801
- "class_name": "StanzaTokenizer",
802
- "macrolanguage": false
803
  }
804
  },
805
- "children": [],
806
  "node_i": "3531",
 
807
  "scripts": [
808
  "Latn"
809
- ],
810
- "own_tokenizer": false
811
  },
812
  {
813
  "name": "Hawaii Pidgin",
814
  "iso_1_code": null,
815
  "iso_3_code": "hwc",
 
816
  "tokenizers": {
817
  "Latn": {
818
  "full_object": "StanzaTokenizer(\"pcm\")",
819
  "original_lang_name": "nigerian_pidgin",
820
  "original_lang_code": "pcm",
821
- "scripts": [
822
- "Latn"
823
- ],
824
- "class_name": "StanzaTokenizer",
825
- "macrolanguage": false
826
  }
827
  },
828
- "children": [],
829
  "node_i": "3532",
 
830
  "scripts": [
831
  "Latn"
832
- ],
833
- "own_tokenizer": false
834
  },
835
  {
836
  "name": "Ngatik Men\u2019s Creole",
837
  "iso_1_code": null,
838
  "iso_3_code": "ngm",
839
- "tokenizers": {},
840
  "children": [],
 
841
  "node_i": "3533",
842
- "scripts": [],
843
- "own_tokenizer": false
844
  },
845
  {
846
  "name": "Pitcairn-Norfolk",
847
  "iso_1_code": null,
848
  "iso_3_code": "pih",
849
- "tokenizers": {},
850
  "children": [],
 
851
  "node_i": "3534",
852
- "scripts": [],
853
- "own_tokenizer": false
854
  },
855
  {
856
  "name": "Pijin",
857
  "iso_1_code": null,
858
  "iso_3_code": "pis",
 
859
  "tokenizers": {
860
  "Latn": {
861
  "full_object": "StanzaTokenizer(\"pcm\")",
862
  "original_lang_name": "nigerian_pidgin",
863
  "original_lang_code": "pcm",
864
- "scripts": [
865
- "Latn"
866
- ],
867
- "class_name": "StanzaTokenizer",
868
- "macrolanguage": false
869
  }
870
  },
871
- "children": [],
872
  "node_i": "3535",
 
873
  "scripts": [
874
  "Latn"
875
- ],
876
- "own_tokenizer": false
877
  },
878
  {
879
  "name": "Kriol",
880
  "iso_1_code": null,
881
  "iso_3_code": "rop",
 
882
  "tokenizers": {
883
  "Latn": {
884
  "full_object": "StanzaTokenizer(\"pcm\")",
885
  "original_lang_name": "nigerian_pidgin",
886
  "original_lang_code": "pcm",
887
- "scripts": [
888
- "Latn"
889
- ],
890
- "class_name": "StanzaTokenizer",
891
- "macrolanguage": false
892
  }
893
  },
894
- "children": [],
895
  "node_i": "3536",
 
896
  "scripts": [
897
  "Latn"
898
- ],
899
- "own_tokenizer": false
900
  },
901
  {
902
  "name": "Torres Strait Creole",
903
  "iso_1_code": null,
904
  "iso_3_code": "tcs",
 
905
  "tokenizers": {
906
  "Latn": {
907
  "full_object": "StanzaTokenizer(\"pcm\")",
908
  "original_lang_name": "nigerian_pidgin",
909
  "original_lang_code": "pcm",
910
- "scripts": [
911
- "Latn"
912
- ],
913
- "class_name": "StanzaTokenizer",
914
- "macrolanguage": false
915
  }
916
  },
917
- "children": [],
918
  "node_i": "3537",
 
919
  "scripts": [
920
  "Latn"
921
- ],
922
- "own_tokenizer": false
923
  },
924
  {
925
  "name": "Tok Pisin",
926
  "iso_1_code": null,
927
  "iso_3_code": "tpi",
 
928
  "tokenizers": {
929
  "Latn": {
930
  "full_object": "StanzaTokenizer(\"pcm\")",
931
  "original_lang_name": "nigerian_pidgin",
932
  "original_lang_code": "pcm",
933
- "scripts": [
934
- "Latn"
935
- ],
936
- "class_name": "StanzaTokenizer",
937
- "macrolanguage": false
938
  }
939
  },
940
- "children": [],
941
  "node_i": "3538",
 
942
  "scripts": [
943
  "Latn"
944
- ],
945
- "own_tokenizer": false
946
  }
947
  ],
 
 
 
 
 
 
 
 
 
948
  "node_i": "3530",
949
- "scripts": [],
950
- "own_tokenizer": false
951
  }
952
  ],
 
 
 
 
 
 
 
 
 
953
  "node_i": "3496",
954
- "scripts": [],
955
- "own_tokenizer": false
956
  },
957
  {
958
  "name": "French based",
959
  "iso_1_code": null,
960
  "iso_3_code": null,
961
- "tokenizers": {
962
- "Arab": {
963
- "full_object": "SpaCyTokenizer(\"ms\")",
964
- "original_lang_name": "malay",
965
- "original_lang_code": "msa",
966
- "scripts": [
967
- "Latn",
968
- "Arab",
969
- "Thai"
970
- ],
971
- "class_name": "SpaCyTokenizer",
972
- "macrolanguage": true
973
- },
974
- "Latn": {
975
- "full_object": "SpaCyTokenizer(\"ms\")",
976
- "original_lang_name": "malay",
977
- "original_lang_code": "msa",
978
- "scripts": [
979
- "Latn",
980
- "Arab",
981
- "Thai"
982
- ],
983
- "class_name": "SpaCyTokenizer",
984
- "macrolanguage": true
985
- },
986
- "Thai": {
987
- "full_object": "SpaCyTokenizer(\"ms\")",
988
- "original_lang_name": "malay",
989
- "original_lang_code": "msa",
990
- "scripts": [
991
- "Latn",
992
- "Arab",
993
- "Thai"
994
- ],
995
- "class_name": "SpaCyTokenizer",
996
- "macrolanguage": true
997
- }
998
- },
999
  "children": [
1000
  {
1001
  "name": "Lesser Antillean French Creole",
1002
  "iso_1_code": null,
1003
  "iso_3_code": "acf",
 
1004
  "tokenizers": {
1005
  "Latn": {
1006
- "full_object": "SpaCyTokenizer(\"ms\")",
1007
- "original_lang_name": "malay",
1008
- "original_lang_code": "msa",
1009
- "scripts": [
1010
- "Latn",
1011
- "Arab",
1012
- "Thai"
1013
- ],
1014
- "class_name": "SpaCyTokenizer",
1015
- "macrolanguage": true
1016
  }
1017
  },
1018
- "children": [],
1019
  "node_i": "3540",
 
1020
  "scripts": [
1021
  "Latn"
1022
- ],
1023
- "own_tokenizer": false
1024
  },
1025
  {
1026
  "name": "Tayo",
1027
  "iso_1_code": null,
1028
  "iso_3_code": "cks",
1029
- "tokenizers": {},
1030
  "children": [],
 
1031
  "node_i": "3541",
1032
- "scripts": [],
1033
- "own_tokenizer": false
1034
  },
1035
  {
1036
  "name": "Seychelles French Creole",
1037
  "iso_1_code": null,
1038
  "iso_3_code": "crs",
 
1039
  "tokenizers": {
1040
  "Latn": {
1041
- "full_object": "SpaCyTokenizer(\"ms\")",
1042
- "original_lang_name": "malay",
1043
- "original_lang_code": "msa",
1044
- "scripts": [
1045
- "Latn",
1046
- "Arab",
1047
- "Thai"
1048
- ],
1049
- "class_name": "SpaCyTokenizer",
1050
- "macrolanguage": true
1051
  }
1052
  },
1053
- "children": [],
1054
  "node_i": "3542",
 
1055
  "scripts": [
1056
  "Latn"
1057
- ],
1058
- "own_tokenizer": false
1059
  },
1060
  {
1061
  "name": "Guadeloupean French Creole",
1062
  "iso_1_code": null,
1063
  "iso_3_code": "gcf",
 
1064
  "tokenizers": {
1065
  "Latn": {
1066
- "full_object": "SpaCyTokenizer(\"ms\")",
1067
- "original_lang_name": "malay",
1068
- "original_lang_code": "msa",
1069
- "scripts": [
1070
- "Latn",
1071
- "Arab",
1072
- "Thai"
1073
- ],
1074
- "class_name": "SpaCyTokenizer",
1075
- "macrolanguage": true
1076
  }
1077
  },
1078
- "children": [],
1079
  "node_i": "3543",
 
1080
  "scripts": [
1081
  "Latn"
1082
- ],
1083
- "own_tokenizer": false
1084
  },
1085
  {
1086
  "name": "Guianese French Creole",
1087
  "iso_1_code": null,
1088
  "iso_3_code": "gcr",
 
1089
  "tokenizers": {
1090
  "Latn": {
1091
- "full_object": "SpaCyTokenizer(\"ms\")",
1092
- "original_lang_name": "malay",
1093
- "original_lang_code": "msa",
1094
- "scripts": [
1095
- "Latn",
1096
- "Arab",
1097
- "Thai"
1098
- ],
1099
- "class_name": "SpaCyTokenizer",
1100
- "macrolanguage": true
1101
  }
1102
  },
1103
- "children": [],
1104
  "node_i": "3544",
 
1105
  "scripts": [
1106
  "Latn"
1107
- ],
1108
- "own_tokenizer": false
1109
  },
1110
  {
1111
  "name": "Haitian Creole",
1112
  "iso_1_code": "ht",
1113
  "iso_3_code": "hat",
 
1114
  "tokenizers": {
1115
  "Latn": {
1116
- "full_object": "SpaCyTokenizer(\"ms\")",
1117
- "original_lang_name": "malay",
1118
- "original_lang_code": "msa",
1119
- "scripts": [
1120
- "Latn",
1121
- "Arab",
1122
- "Thai"
1123
- ],
1124
- "class_name": "SpaCyTokenizer",
1125
- "macrolanguage": true
1126
  }
1127
  },
1128
- "children": [],
1129
  "node_i": "3545",
 
1130
  "scripts": [
1131
  "Latn"
1132
- ],
1133
- "own_tokenizer": false
1134
  },
1135
  {
1136
  "name": "Karipuna French Creole",
1137
  "iso_1_code": null,
1138
  "iso_3_code": "kmv",
1139
- "tokenizers": {},
1140
  "children": [],
 
1141
  "node_i": "3546",
1142
- "scripts": [],
1143
- "own_tokenizer": false
1144
  },
1145
  {
1146
  "name": "Louisiana Creole",
1147
  "iso_1_code": null,
1148
  "iso_3_code": "lou",
1149
- "tokenizers": {},
1150
  "children": [],
 
1151
  "node_i": "3547",
1152
- "scripts": [],
1153
- "own_tokenizer": false
1154
  },
1155
  {
1156
  "name": "Morisyen",
1157
  "iso_1_code": null,
1158
  "iso_3_code": "mfe",
 
1159
  "tokenizers": {
1160
  "Latn": {
1161
- "full_object": "SpaCyTokenizer(\"ms\")",
1162
- "original_lang_name": "malay",
1163
- "original_lang_code": "msa",
1164
- "scripts": [
1165
- "Latn",
1166
- "Arab",
1167
- "Thai"
1168
- ],
1169
- "class_name": "SpaCyTokenizer",
1170
- "macrolanguage": true
1171
  }
1172
  },
1173
- "children": [],
1174
  "node_i": "3548",
 
1175
  "scripts": [
1176
  "Latn"
1177
- ],
1178
- "own_tokenizer": false
1179
  },
1180
  {
1181
  "name": "R\u00e9union French Creole",
1182
  "iso_1_code": null,
1183
  "iso_3_code": "rcf",
 
1184
  "tokenizers": {
1185
  "Latn": {
1186
- "full_object": "SpaCyTokenizer(\"ms\")",
1187
- "original_lang_name": "malay",
1188
- "original_lang_code": "msa",
1189
- "scripts": [
1190
- "Latn",
1191
- "Arab",
1192
- "Thai"
1193
- ],
1194
- "class_name": "SpaCyTokenizer",
1195
- "macrolanguage": true
1196
  }
1197
  },
1198
- "children": [],
1199
  "node_i": "3549",
 
1200
  "scripts": [
1201
  "Latn"
1202
- ],
1203
- "own_tokenizer": false
1204
  },
1205
  {
1206
  "name": "San Miguel French Creole",
1207
  "iso_1_code": null,
1208
  "iso_3_code": "scf",
1209
- "tokenizers": {},
1210
  "children": [],
 
1211
  "node_i": "3550",
1212
- "scripts": [],
1213
- "own_tokenizer": false
1214
  }
1215
  ],
 
 
 
 
 
 
 
 
 
1216
  "node_i": "3539",
1217
- "scripts": [],
1218
- "own_tokenizer": false
1219
  },
1220
  {
1221
  "name": "German based",
1222
  "iso_1_code": null,
1223
  "iso_3_code": null,
1224
- "tokenizers": {},
1225
  "children": [
1226
  {
1227
  "name": "Unserdeutsch",
1228
  "iso_1_code": null,
1229
  "iso_3_code": "uln",
1230
- "tokenizers": {},
1231
  "children": [],
 
1232
  "node_i": "3552",
1233
- "scripts": [],
1234
- "own_tokenizer": false
1235
  }
1236
  ],
 
1237
  "node_i": "3551",
1238
- "scripts": [],
1239
- "own_tokenizer": false
1240
  },
1241
  {
1242
  "name": "Hindi based",
1243
  "iso_1_code": null,
1244
  "iso_3_code": null,
1245
- "tokenizers": {},
1246
  "children": [
1247
  {
1248
  "name": "Andaman Hindi Creole",
1249
  "iso_1_code": null,
1250
  "iso_3_code": "hca",
1251
- "tokenizers": {},
1252
  "children": [],
 
1253
  "node_i": "3554",
1254
- "scripts": [],
1255
- "own_tokenizer": false
1256
  }
1257
  ],
 
1258
  "node_i": "3553",
1259
- "scripts": [],
1260
- "own_tokenizer": false
1261
  },
1262
  {
1263
  "name": "Iberian based",
1264
  "iso_1_code": null,
1265
  "iso_3_code": null,
1266
- "tokenizers": {
1267
- "Arab": {
1268
- "full_object": "SpaCyTokenizer(\"ms\")",
1269
- "original_lang_name": "malay",
1270
- "original_lang_code": "msa",
1271
- "scripts": [
1272
- "Latn",
1273
- "Arab",
1274
- "Thai"
1275
- ],
1276
- "class_name": "SpaCyTokenizer",
1277
- "macrolanguage": true
1278
- },
1279
- "Latn": {
1280
- "full_object": "SpaCyTokenizer(\"ms\")",
1281
- "original_lang_name": "malay",
1282
- "original_lang_code": "msa",
1283
- "scripts": [
1284
- "Latn",
1285
- "Arab",
1286
- "Thai"
1287
- ],
1288
- "class_name": "SpaCyTokenizer",
1289
- "macrolanguage": true
1290
- },
1291
- "Thai": {
1292
- "full_object": "SpaCyTokenizer(\"ms\")",
1293
- "original_lang_name": "malay",
1294
- "original_lang_code": "msa",
1295
- "scripts": [
1296
- "Latn",
1297
- "Arab",
1298
- "Thai"
1299
- ],
1300
- "class_name": "SpaCyTokenizer",
1301
- "macrolanguage": true
1302
- }
1303
- },
1304
  "children": [
1305
  {
1306
  "name": "Papiamentu",
1307
  "iso_1_code": null,
1308
  "iso_3_code": "pap",
 
1309
  "tokenizers": {
1310
  "Latn": {
1311
- "full_object": "SpaCyTokenizer(\"ms\")",
1312
- "original_lang_name": "malay",
1313
- "original_lang_code": "msa",
1314
- "scripts": [
1315
- "Latn",
1316
- "Arab",
1317
- "Thai"
1318
- ],
1319
- "class_name": "SpaCyTokenizer",
1320
- "macrolanguage": true
1321
  }
1322
  },
1323
- "children": [],
1324
  "node_i": "3556",
 
1325
  "scripts": [
1326
  "Latn"
1327
- ],
1328
- "own_tokenizer": false
1329
  }
1330
  ],
 
 
 
 
 
 
 
 
 
1331
  "node_i": "3555",
1332
- "scripts": [],
1333
- "own_tokenizer": false
1334
  },
1335
  {
1336
  "name": "Japanese-based",
1337
  "iso_1_code": null,
1338
  "iso_3_code": null,
1339
- "tokenizers": {},
1340
  "children": [
1341
  {
1342
  "name": "Yilan Creole",
1343
  "iso_1_code": null,
1344
  "iso_3_code": "ycr",
1345
- "tokenizers": {},
1346
  "children": [],
 
1347
  "node_i": "3558",
1348
- "scripts": [],
1349
- "own_tokenizer": false
1350
  }
1351
  ],
 
1352
  "node_i": "3557",
1353
- "scripts": [],
1354
- "own_tokenizer": false
1355
  },
1356
  {
1357
  "name": "Kongo based",
1358
  "iso_1_code": null,
1359
  "iso_3_code": null,
1360
- "tokenizers": {
1361
- "Arab": {
1362
- "full_object": "SpaCyTokenizer(\"ms\")",
1363
- "original_lang_name": "malay",
1364
- "original_lang_code": "msa",
1365
- "scripts": [
1366
- "Latn",
1367
- "Arab",
1368
- "Thai"
1369
- ],
1370
- "class_name": "SpaCyTokenizer",
1371
- "macrolanguage": true
1372
- },
1373
- "Latn": {
1374
- "full_object": "SpaCyTokenizer(\"ms\")",
1375
- "original_lang_name": "malay",
1376
- "original_lang_code": "msa",
1377
- "scripts": [
1378
- "Latn",
1379
- "Arab",
1380
- "Thai"
1381
- ],
1382
- "class_name": "SpaCyTokenizer",
1383
- "macrolanguage": true
1384
- },
1385
- "Thai": {
1386
- "full_object": "SpaCyTokenizer(\"ms\")",
1387
- "original_lang_name": "malay",
1388
- "original_lang_code": "msa",
1389
- "scripts": [
1390
- "Latn",
1391
- "Arab",
1392
- "Thai"
1393
- ],
1394
- "class_name": "SpaCyTokenizer",
1395
- "macrolanguage": true
1396
- }
1397
- },
1398
  "children": [
1399
  {
1400
  "name": "Kituba",
1401
  "iso_1_code": null,
1402
  "iso_3_code": "ktu",
 
1403
  "tokenizers": {
1404
  "Latn": {
1405
- "full_object": "SpaCyTokenizer(\"ms\")",
1406
- "original_lang_name": "malay",
1407
- "original_lang_code": "msa",
1408
- "scripts": [
1409
- "Latn",
1410
- "Arab",
1411
- "Thai"
1412
- ],
1413
- "class_name": "SpaCyTokenizer",
1414
- "macrolanguage": true
1415
  }
1416
  },
1417
- "children": [],
1418
  "node_i": "3560",
 
1419
  "scripts": [
1420
  "Latn"
1421
- ],
1422
- "own_tokenizer": false
1423
  },
1424
  {
1425
  "name": "Kituba",
1426
  "iso_1_code": null,
1427
  "iso_3_code": "mkw",
1428
- "tokenizers": {},
1429
  "children": [],
 
1430
  "node_i": "3561",
1431
- "scripts": [],
1432
- "own_tokenizer": false
1433
  }
1434
  ],
 
 
 
 
 
 
 
 
 
1435
  "node_i": "3559",
1436
- "scripts": [],
1437
- "own_tokenizer": false
1438
  },
1439
  {
1440
  "name": "Malay based",
1441
  "iso_1_code": null,
1442
  "iso_3_code": null,
1443
- "tokenizers": {
1444
- "Latn": {
1445
- "full_object": "SpaCyTokenizer(\"ms\")",
1446
- "original_lang_name": "malay",
1447
- "original_lang_code": "msa",
1448
- "scripts": [
1449
- "Latn",
1450
- "Arab",
1451
- "Thai"
1452
- ],
1453
- "class_name": "SpaCyTokenizer",
1454
- "macrolanguage": true
1455
- },
1456
- "Arab": {
1457
- "full_object": "SpaCyTokenizer(\"ms\")",
1458
- "original_lang_name": "malay",
1459
- "original_lang_code": "msa",
1460
- "scripts": [
1461
- "Latn",
1462
- "Arab",
1463
- "Thai"
1464
- ],
1465
- "class_name": "SpaCyTokenizer",
1466
- "macrolanguage": true
1467
- },
1468
- "Thai": {
1469
- "full_object": "SpaCyTokenizer(\"ms\")",
1470
- "original_lang_name": "malay",
1471
- "original_lang_code": "msa",
1472
- "scripts": [
1473
- "Latn",
1474
- "Arab",
1475
- "Thai"
1476
- ],
1477
- "class_name": "SpaCyTokenizer",
1478
- "macrolanguage": true
1479
- }
1480
- },
1481
  "children": [
1482
  {
1483
  "name": "Malay, Ambonese",
1484
  "iso_1_code": null,
1485
  "iso_3_code": "abs",
 
1486
  "tokenizers": {
1487
  "Latn": {
1488
- "full_object": "SpaCyTokenizer(\"ms\")",
1489
- "original_lang_name": "malay",
1490
- "original_lang_code": "msa",
1491
- "scripts": [
1492
- "Latn",
1493
- "Arab",
1494
- "Thai"
1495
- ],
1496
- "class_name": "SpaCyTokenizer",
1497
- "macrolanguage": true
1498
  }
1499
  },
1500
- "children": [],
1501
  "node_i": "3563",
 
1502
  "scripts": [
1503
  "Latn"
1504
- ],
1505
- "own_tokenizer": false
1506
  },
1507
  {
1508
  "name": "Betawi",
1509
  "iso_1_code": null,
1510
  "iso_3_code": "bew",
 
1511
  "tokenizers": {
1512
  "Latn": {
1513
- "full_object": "SpaCyTokenizer(\"ms\")",
1514
- "original_lang_name": "malay",
1515
- "original_lang_code": "msa",
1516
- "scripts": [
1517
- "Latn",
1518
- "Arab",
1519
- "Thai"
1520
- ],
1521
- "class_name": "SpaCyTokenizer",
1522
- "macrolanguage": true
1523
  }
1524
  },
1525
- "children": [],
1526
  "node_i": "3564",
 
1527
  "scripts": [
1528
  "Latn"
1529
- ],
1530
- "own_tokenizer": false
1531
  },
1532
  {
1533
  "name": "Malay, Banda",
1534
  "iso_1_code": null,
1535
  "iso_3_code": "bpq",
1536
- "tokenizers": {},
1537
  "children": [],
 
1538
  "node_i": "3565",
1539
- "scripts": [],
1540
- "own_tokenizer": false
1541
  },
1542
  {
1543
  "name": "Malaccan Malay Creole",
1544
  "iso_1_code": null,
1545
  "iso_3_code": "ccm",
1546
- "tokenizers": {},
1547
  "children": [],
 
1548
  "node_i": "3566",
1549
- "scripts": [],
1550
- "own_tokenizer": false
1551
  },
1552
  {
1553
  "name": "Malay, Cocos Islands",
1554
  "iso_1_code": "ms",
1555
  "iso_3_code": "coa",
1556
- "tokenizers": {
1557
- "Latn": {
1558
- "full_object": "SpaCyTokenizer(\"ms\")",
1559
- "original_lang_name": "malay",
1560
- "original_lang_code": "msa",
1561
- "scripts": [
1562
- "Latn",
1563
- "Arab",
1564
- "Thai"
1565
- ],
1566
- "class_name": "SpaCyTokenizer",
1567
- "macrolanguage": true
1568
- },
1569
- "Arab": {
1570
- "full_object": "SpaCyTokenizer(\"ms\")",
1571
- "original_lang_name": "malay",
1572
- "original_lang_code": "msa",
1573
- "scripts": [
1574
- "Latn",
1575
- "Arab",
1576
- "Thai"
1577
- ],
1578
- "class_name": "SpaCyTokenizer",
1579
- "macrolanguage": true
1580
- },
1581
- "Thai": {
1582
- "full_object": "SpaCyTokenizer(\"ms\")",
1583
- "original_lang_name": "malay",
1584
- "original_lang_code": "msa",
1585
- "scripts": [
1586
- "Latn",
1587
- "Arab",
1588
- "Thai"
1589
- ],
1590
- "class_name": "SpaCyTokenizer",
1591
- "macrolanguage": true
1592
- }
1593
- },
1594
  "children": [],
 
1595
  "node_i": "3567",
1596
- "scripts": [],
1597
- "own_tokenizer": true
1598
  },
1599
  {
1600
  "name": "Malay, Larantuka",
1601
  "iso_1_code": null,
1602
  "iso_3_code": "lrt",
1603
- "tokenizers": {},
1604
  "children": [],
 
1605
  "node_i": "3568",
1606
- "scripts": [],
1607
- "own_tokenizer": false
1608
  },
1609
  {
1610
  "name": "Malay, North Moluccan",
1611
  "iso_1_code": "ms",
1612
  "iso_3_code": "max",
 
1613
  "tokenizers": {
1614
  "Latn": {
1615
- "full_object": "SpaCyTokenizer(\"ms\")",
1616
- "original_lang_name": "malay",
1617
- "original_lang_code": "msa",
1618
- "scripts": [
1619
- "Latn",
1620
- "Arab",
1621
- "Thai"
1622
- ],
1623
- "class_name": "SpaCyTokenizer",
1624
- "macrolanguage": true
1625
- },
1626
- "Arab": {
1627
- "full_object": "SpaCyTokenizer(\"ms\")",
1628
- "original_lang_name": "malay",
1629
- "original_lang_code": "msa",
1630
- "scripts": [
1631
- "Latn",
1632
- "Arab",
1633
- "Thai"
1634
- ],
1635
- "class_name": "SpaCyTokenizer",
1636
- "macrolanguage": true
1637
- },
1638
- "Thai": {
1639
- "full_object": "SpaCyTokenizer(\"ms\")",
1640
- "original_lang_name": "malay",
1641
- "original_lang_code": "msa",
1642
- "scripts": [
1643
- "Latn",
1644
- "Arab",
1645
- "Thai"
1646
- ],
1647
- "class_name": "SpaCyTokenizer",
1648
- "macrolanguage": true
1649
  }
1650
  },
1651
- "children": [],
1652
  "node_i": "3569",
 
1653
  "scripts": [
1654
  "Latn"
1655
- ],
1656
- "own_tokenizer": true
1657
  },
1658
  {
1659
  "name": "Malay, Baba",
1660
  "iso_1_code": null,
1661
  "iso_3_code": "mbf",
 
1662
  "tokenizers": {
1663
  "Latn": {
1664
- "full_object": "SpaCyTokenizer(\"ms\")",
1665
- "original_lang_name": "malay",
1666
- "original_lang_code": "msa",
1667
- "scripts": [
1668
- "Latn",
1669
- "Arab",
1670
- "Thai"
1671
- ],
1672
- "class_name": "SpaCyTokenizer",
1673
- "macrolanguage": true
1674
  }
1675
  },
1676
- "children": [],
1677
  "node_i": "3570",
 
1678
  "scripts": [
1679
  "Latn"
1680
- ],
1681
- "own_tokenizer": false
1682
  },
1683
  {
1684
  "name": "Malay, Balinese",
1685
  "iso_1_code": null,
1686
  "iso_3_code": "mhp",
1687
- "tokenizers": {},
1688
  "children": [],
 
1689
  "node_i": "3571",
1690
- "scripts": [],
1691
- "own_tokenizer": false
1692
  },
1693
  {
1694
  "name": "Malay, Kupang",
1695
  "iso_1_code": null,
1696
  "iso_3_code": "mkn",
 
1697
  "tokenizers": {
1698
  "Latn": {
1699
- "full_object": "SpaCyTokenizer(\"ms\")",
1700
- "original_lang_name": "malay",
1701
- "original_lang_code": "msa",
1702
- "scripts": [
1703
- "Latn",
1704
- "Arab",
1705
- "Thai"
1706
- ],
1707
- "class_name": "SpaCyTokenizer",
1708
- "macrolanguage": true
1709
  }
1710
  },
1711
- "children": [],
1712
  "node_i": "3572",
 
1713
  "scripts": [
1714
  "Latn"
1715
- ],
1716
- "own_tokenizer": false
1717
  },
1718
  {
1719
  "name": "Indonesian, Peranakan",
1720
  "iso_1_code": null,
1721
  "iso_3_code": "pea",
1722
- "tokenizers": {},
1723
  "children": [],
 
1724
  "node_i": "3573",
1725
- "scripts": [],
1726
- "own_tokenizer": false
1727
  },
1728
  {
1729
  "name": "Malay, Papuan",
1730
  "iso_1_code": null,
1731
  "iso_3_code": "pmy",
1732
- "tokenizers": {},
1733
  "children": [],
 
1734
  "node_i": "3574",
1735
- "scripts": [],
1736
- "own_tokenizer": false
1737
  },
1738
  {
1739
  "name": "Sri Lankan Malay Creole",
1740
  "iso_1_code": null,
1741
  "iso_3_code": "sci",
1742
- "tokenizers": {},
1743
  "children": [],
 
1744
  "node_i": "3575",
1745
- "scripts": [],
1746
- "own_tokenizer": false
1747
  },
1748
  {
1749
  "name": "Malay, Manado",
1750
  "iso_1_code": "ms",
1751
  "iso_3_code": "xmm",
 
1752
  "tokenizers": {
1753
  "Latn": {
1754
- "full_object": "SpaCyTokenizer(\"ms\")",
1755
- "original_lang_name": "malay",
1756
- "original_lang_code": "msa",
1757
- "scripts": [
1758
- "Latn",
1759
- "Arab",
1760
- "Thai"
1761
- ],
1762
- "class_name": "SpaCyTokenizer",
1763
- "macrolanguage": true
1764
- },
1765
- "Arab": {
1766
- "full_object": "SpaCyTokenizer(\"ms\")",
1767
- "original_lang_name": "malay",
1768
- "original_lang_code": "msa",
1769
- "scripts": [
1770
- "Latn",
1771
- "Arab",
1772
- "Thai"
1773
- ],
1774
- "class_name": "SpaCyTokenizer",
1775
- "macrolanguage": true
1776
- },
1777
- "Thai": {
1778
- "full_object": "SpaCyTokenizer(\"ms\")",
1779
- "original_lang_name": "malay",
1780
- "original_lang_code": "msa",
1781
- "scripts": [
1782
- "Latn",
1783
- "Arab",
1784
- "Thai"
1785
- ],
1786
- "class_name": "SpaCyTokenizer",
1787
- "macrolanguage": true
1788
  }
1789
  },
1790
- "children": [],
1791
  "node_i": "3576",
 
1792
  "scripts": [
1793
  "Latn"
1794
- ],
1795
- "own_tokenizer": true
1796
  }
1797
  ],
 
 
 
 
 
 
 
 
 
1798
  "node_i": "3562",
1799
- "scripts": [],
1800
- "own_tokenizer": false
1801
  },
1802
  {
1803
  "name": "Ngbandi based",
1804
  "iso_1_code": null,
1805
  "iso_3_code": null,
1806
- "tokenizers": {
1807
- "Arab": {
1808
- "full_object": "SpaCyTokenizer(\"ms\")",
1809
- "original_lang_name": "malay",
1810
- "original_lang_code": "msa",
1811
- "scripts": [
1812
- "Latn",
1813
- "Arab",
1814
- "Thai"
1815
- ],
1816
- "class_name": "SpaCyTokenizer",
1817
- "macrolanguage": true
1818
- },
1819
- "Latn": {
1820
- "full_object": "SpaCyTokenizer(\"ms\")",
1821
- "original_lang_name": "malay",
1822
- "original_lang_code": "msa",
1823
- "scripts": [
1824
- "Latn",
1825
- "Arab",
1826
- "Thai"
1827
- ],
1828
- "class_name": "SpaCyTokenizer",
1829
- "macrolanguage": true
1830
- },
1831
- "Thai": {
1832
- "full_object": "SpaCyTokenizer(\"ms\")",
1833
- "original_lang_name": "malay",
1834
- "original_lang_code": "msa",
1835
- "scripts": [
1836
- "Latn",
1837
- "Arab",
1838
- "Thai"
1839
- ],
1840
- "class_name": "SpaCyTokenizer",
1841
- "macrolanguage": true
1842
- }
1843
- },
1844
  "children": [
1845
  {
1846
  "name": "Sango",
1847
  "iso_1_code": "sg",
1848
  "iso_3_code": "sag",
 
1849
  "tokenizers": {
1850
  "Latn": {
1851
- "full_object": "SpaCyTokenizer(\"ms\")",
1852
- "original_lang_name": "malay",
1853
- "original_lang_code": "msa",
1854
- "scripts": [
1855
- "Latn",
1856
- "Arab",
1857
- "Thai"
1858
- ],
1859
- "class_name": "SpaCyTokenizer",
1860
- "macrolanguage": true
1861
  }
1862
  },
1863
- "children": [],
1864
  "node_i": "3578",
 
1865
  "scripts": [
1866
  "Latn"
1867
- ],
1868
- "own_tokenizer": false
1869
  },
1870
  {
1871
  "name": "Sango, Riverain",
1872
  "iso_1_code": null,
1873
  "iso_3_code": "snj",
1874
- "tokenizers": {},
1875
  "children": [],
 
1876
  "node_i": "3579",
1877
- "scripts": [],
1878
- "own_tokenizer": false
1879
  }
1880
  ],
 
 
 
 
 
 
 
 
 
1881
  "node_i": "3577",
1882
- "scripts": [],
1883
- "own_tokenizer": false
1884
  },
1885
  {
1886
  "name": "Portuguese based",
1887
  "iso_1_code": null,
1888
  "iso_3_code": null,
1889
- "tokenizers": {
1890
- "Arab": {
1891
- "full_object": "SpaCyTokenizer(\"ms\")",
1892
- "original_lang_name": "malay",
1893
- "original_lang_code": "msa",
1894
- "scripts": [
1895
- "Latn",
1896
- "Arab",
1897
- "Thai"
1898
- ],
1899
- "class_name": "SpaCyTokenizer",
1900
- "macrolanguage": true
1901
- },
1902
- "Latn": {
1903
- "full_object": "SpaCyTokenizer(\"ms\")",
1904
- "original_lang_name": "malay",
1905
- "original_lang_code": "msa",
1906
- "scripts": [
1907
- "Latn",
1908
- "Arab",
1909
- "Thai"
1910
- ],
1911
- "class_name": "SpaCyTokenizer",
1912
- "macrolanguage": true
1913
- },
1914
- "Thai": {
1915
- "full_object": "SpaCyTokenizer(\"ms\")",
1916
- "original_lang_name": "malay",
1917
- "original_lang_code": "msa",
1918
- "scripts": [
1919
- "Latn",
1920
- "Arab",
1921
- "Thai"
1922
- ],
1923
- "class_name": "SpaCyTokenizer",
1924
- "macrolanguage": true
1925
- }
1926
- },
1927
  "children": [
1928
  {
1929
  "name": "Angolar",
1930
  "iso_1_code": null,
1931
  "iso_3_code": "aoa",
1932
- "tokenizers": {},
1933
  "children": [],
 
1934
  "node_i": "3581",
1935
- "scripts": [],
1936
- "own_tokenizer": false
1937
  },
1938
  {
1939
  "name": "Cafundo Creole",
1940
  "iso_1_code": null,
1941
  "iso_3_code": "ccd",
1942
- "tokenizers": {},
1943
  "children": [],
 
1944
  "node_i": "3582",
1945
- "scripts": [],
1946
- "own_tokenizer": false
1947
  },
1948
  {
1949
  "name": "S\u00e3otomense",
1950
  "iso_1_code": null,
1951
  "iso_3_code": "cri",
 
1952
  "tokenizers": {
1953
  "Latn": {
1954
- "full_object": "SpaCyTokenizer(\"ms\")",
1955
- "original_lang_name": "malay",
1956
- "original_lang_code": "msa",
1957
- "scripts": [
1958
- "Latn",
1959
- "Arab",
1960
- "Thai"
1961
- ],
1962
- "class_name": "SpaCyTokenizer",
1963
- "macrolanguage": true
1964
  }
1965
  },
1966
- "children": [],
1967
  "node_i": "3583",
 
1968
  "scripts": [
1969
  "Latn"
1970
- ],
1971
- "own_tokenizer": false
1972
  },
1973
  {
1974
  "name": "Fa d\u2019Ambu",
1975
  "iso_1_code": null,
1976
  "iso_3_code": "fab",
1977
- "tokenizers": {},
1978
  "children": [],
 
1979
  "node_i": "3584",
1980
- "scripts": [],
1981
- "own_tokenizer": false
1982
  },
1983
  {
1984
  "name": "Indo-Portuguese",
1985
  "iso_1_code": null,
1986
  "iso_3_code": "idb",
1987
- "tokenizers": {},
1988
  "children": [],
 
1989
  "node_i": "3585",
1990
- "scripts": [],
1991
- "own_tokenizer": false
1992
  },
1993
  {
1994
  "name": "Kabuverdianu",
1995
  "iso_1_code": null,
1996
  "iso_3_code": "kea",
 
1997
  "tokenizers": {
1998
  "Latn": {
1999
- "full_object": "SpaCyTokenizer(\"ms\")",
2000
- "original_lang_name": "malay",
2001
- "original_lang_code": "msa",
2002
- "scripts": [
2003
- "Latn",
2004
- "Arab",
2005
- "Thai"
2006
- ],
2007
- "class_name": "SpaCyTokenizer",
2008
- "macrolanguage": true
2009
  }
2010
  },
2011
- "children": [],
2012
  "node_i": "3586",
 
2013
  "scripts": [
2014
  "Latn"
2015
- ],
2016
- "own_tokenizer": false
2017
  },
2018
  {
2019
  "name": "Malaccan Portuguese Creole",
2020
  "iso_1_code": null,
2021
  "iso_3_code": "mcm",
2022
- "tokenizers": {},
2023
  "children": [],
 
2024
  "node_i": "3587",
2025
- "scripts": [],
2026
- "own_tokenizer": false
2027
  },
2028
  {
2029
  "name": "Macanese",
2030
  "iso_1_code": null,
2031
  "iso_3_code": "mzs",
2032
- "tokenizers": {},
2033
  "children": [],
 
2034
  "node_i": "3588",
2035
- "scripts": [],
2036
- "own_tokenizer": false
2037
  },
2038
  {
2039
  "name": "Guinea-Bissau Creole",
2040
  "iso_1_code": null,
2041
  "iso_3_code": "pov",
 
2042
  "tokenizers": {
2043
  "Latn": {
2044
- "full_object": "SpaCyTokenizer(\"ms\")",
2045
- "original_lang_name": "malay",
2046
- "original_lang_code": "msa",
2047
- "scripts": [
2048
- "Latn",
2049
- "Arab",
2050
- "Thai"
2051
- ],
2052
- "class_name": "SpaCyTokenizer",
2053
- "macrolanguage": true
2054
  }
2055
  },
2056
- "children": [],
2057
  "node_i": "3589",
 
2058
  "scripts": [
2059
  "Latn"
2060
- ],
2061
- "own_tokenizer": false
2062
  },
2063
  {
2064
  "name": "Principense",
2065
  "iso_1_code": null,
2066
  "iso_3_code": "pre",
2067
- "tokenizers": {},
2068
  "children": [],
 
2069
  "node_i": "3590",
2070
- "scripts": [],
2071
- "own_tokenizer": false
2072
  },
2073
  {
2074
  "name": "Ternate\u00f1o",
2075
  "iso_1_code": null,
2076
  "iso_3_code": "tmg",
2077
- "tokenizers": {},
2078
  "children": [],
 
2079
  "node_i": "3591",
2080
- "scripts": [],
2081
- "own_tokenizer": false
2082
  },
2083
  {
2084
  "name": "Pidgin, Timor",
2085
  "iso_1_code": null,
2086
  "iso_3_code": "tvy",
2087
- "tokenizers": {},
2088
  "children": [],
 
2089
  "node_i": "3592",
2090
- "scripts": [],
2091
- "own_tokenizer": false
2092
  },
2093
  {
2094
  "name": "Korlai Portuguese Creole",
2095
  "iso_1_code": null,
2096
  "iso_3_code": "vkp",
2097
- "tokenizers": {},
2098
  "children": [],
 
2099
  "node_i": "3593",
2100
- "scripts": [],
2101
- "own_tokenizer": false
2102
  }
2103
  ],
 
 
 
 
 
 
 
 
 
2104
  "node_i": "3580",
2105
- "scripts": [],
2106
- "own_tokenizer": false
2107
  },
2108
  {
2109
  "name": "Spanish based",
2110
  "iso_1_code": null,
2111
  "iso_3_code": null,
2112
- "tokenizers": {
2113
- "Arab": {
2114
- "full_object": "SpaCyTokenizer(\"ms\")",
2115
- "original_lang_name": "malay",
2116
- "original_lang_code": "msa",
2117
- "scripts": [
2118
- "Latn",
2119
- "Arab",
2120
- "Thai"
2121
- ],
2122
- "class_name": "SpaCyTokenizer",
2123
- "macrolanguage": true
2124
- },
2125
- "Latn": {
2126
- "full_object": "SpaCyTokenizer(\"ms\")",
2127
- "original_lang_name": "malay",
2128
- "original_lang_code": "msa",
2129
- "scripts": [
2130
- "Latn",
2131
- "Arab",
2132
- "Thai"
2133
- ],
2134
- "class_name": "SpaCyTokenizer",
2135
- "macrolanguage": true
2136
- },
2137
- "Thai": {
2138
- "full_object": "SpaCyTokenizer(\"ms\")",
2139
- "original_lang_name": "malay",
2140
- "original_lang_code": "msa",
2141
- "scripts": [
2142
- "Latn",
2143
- "Arab",
2144
- "Thai"
2145
- ],
2146
- "class_name": "SpaCyTokenizer",
2147
- "macrolanguage": true
2148
- }
2149
- },
2150
  "children": [
2151
  {
2152
  "name": "Chavacano",
2153
  "iso_1_code": null,
2154
  "iso_3_code": "cbk",
 
2155
  "tokenizers": {
2156
  "Latn": {
2157
- "full_object": "SpaCyTokenizer(\"ms\")",
2158
- "original_lang_name": "malay",
2159
- "original_lang_code": "msa",
2160
- "scripts": [
2161
- "Latn",
2162
- "Arab",
2163
- "Thai"
2164
- ],
2165
- "class_name": "SpaCyTokenizer",
2166
- "macrolanguage": true
2167
  }
2168
  },
2169
- "children": [],
2170
  "node_i": "3595",
 
2171
  "scripts": [
2172
  "Latn"
2173
- ],
2174
- "own_tokenizer": false
2175
  },
2176
  {
2177
  "name": "Palenquero",
2178
  "iso_1_code": null,
2179
  "iso_3_code": "pln",
2180
- "tokenizers": {},
2181
  "children": [],
 
2182
  "node_i": "3596",
2183
- "scripts": [],
2184
- "own_tokenizer": false
2185
  }
2186
  ],
 
 
 
 
 
 
 
 
 
2187
  "node_i": "3594",
2188
- "scripts": [],
2189
- "own_tokenizer": false
2190
  },
2191
  {
2192
  "name": "Swahili based",
2193
  "iso_1_code": null,
2194
  "iso_3_code": null,
2195
- "tokenizers": {},
2196
  "children": [
2197
  {
2198
  "name": "Cutchi-Swahili",
2199
  "iso_1_code": null,
2200
  "iso_3_code": "ccl",
2201
- "tokenizers": {},
2202
  "children": [],
 
2203
  "node_i": "3598",
2204
- "scripts": [],
2205
- "own_tokenizer": false
2206
  }
2207
  ],
 
2208
  "node_i": "3597",
2209
- "scripts": [],
2210
- "own_tokenizer": false
2211
  },
2212
  {
2213
  "name": "Tetun based",
2214
  "iso_1_code": null,
2215
  "iso_3_code": null,
2216
- "tokenizers": {
2217
- "Arab": {
2218
- "full_object": "SpaCyTokenizer(\"ms\")",
2219
- "original_lang_name": "malay",
2220
- "original_lang_code": "msa",
2221
- "scripts": [
2222
- "Latn",
2223
- "Arab",
2224
- "Thai"
2225
- ],
2226
- "class_name": "SpaCyTokenizer",
2227
- "macrolanguage": true
2228
- },
2229
- "Latn": {
2230
- "full_object": "SpaCyTokenizer(\"ms\")",
2231
- "original_lang_name": "malay",
2232
- "original_lang_code": "msa",
2233
- "scripts": [
2234
- "Latn",
2235
- "Arab",
2236
- "Thai"
2237
- ],
2238
- "class_name": "SpaCyTokenizer",
2239
- "macrolanguage": true
2240
- },
2241
- "Thai": {
2242
- "full_object": "SpaCyTokenizer(\"ms\")",
2243
- "original_lang_name": "malay",
2244
- "original_lang_code": "msa",
2245
- "scripts": [
2246
- "Latn",
2247
- "Arab",
2248
- "Thai"
2249
- ],
2250
- "class_name": "SpaCyTokenizer",
2251
- "macrolanguage": true
2252
- }
2253
- },
2254
  "children": [
2255
  {
2256
  "name": "Tetun Dili",
2257
  "iso_1_code": null,
2258
  "iso_3_code": "tdt",
 
2259
  "tokenizers": {
2260
  "Latn": {
2261
- "full_object": "SpaCyTokenizer(\"ms\")",
2262
- "original_lang_name": "malay",
2263
- "original_lang_code": "msa",
2264
- "scripts": [
2265
- "Latn",
2266
- "Arab",
2267
- "Thai"
2268
- ],
2269
- "class_name": "SpaCyTokenizer",
2270
- "macrolanguage": true
2271
  }
2272
  },
2273
- "children": [],
2274
  "node_i": "3600",
 
2275
  "scripts": [
2276
  "Latn"
2277
- ],
2278
- "own_tokenizer": false
2279
  }
2280
  ],
 
 
 
 
 
 
 
 
 
2281
  "node_i": "3599",
2282
- "scripts": [],
2283
- "own_tokenizer": false
2284
  }
2285
  ],
 
 
 
 
 
 
 
 
 
2286
  "node_i": "3481",
2287
- "scripts": [],
2288
- "own_tokenizer": false
2289
  }
 
2
  "name": "Creole",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "children": [
6
  {
7
  "name": "Afrikaans based",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Flaaitaal",
13
  "iso_1_code": null,
14
  "iso_3_code": "fly",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3483",
18
+ "native_tokenizers": [],
19
+ "scripts": []
20
  },
21
  {
22
  "name": "Oorlams",
23
  "iso_1_code": null,
24
  "iso_3_code": "oor",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3484",
28
+ "native_tokenizers": [],
29
+ "scripts": []
30
  }
31
  ],
32
+ "tokenizers": {},
33
  "node_i": "3482",
34
+ "native_tokenizers": [],
35
+ "scripts": []
36
  },
37
  {
38
  "name": "Arabic based",
39
  "iso_1_code": null,
40
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
41
  "children": [
42
  {
43
  "name": "Nubi",
44
  "iso_1_code": null,
45
  "iso_3_code": "kcn",
 
46
  "children": [],
47
+ "tokenizers": {},
48
  "node_i": "3486",
49
+ "native_tokenizers": [],
50
+ "scripts": []
51
  },
52
  {
53
  "name": "Arabic, Juba",
54
  "iso_1_code": "ar",
55
  "iso_3_code": "pga",
 
 
 
 
 
 
 
 
 
 
 
 
56
  "children": [],
57
+ "tokenizers": {},
58
  "node_i": "3487",
59
+ "native_tokenizers": [],
60
+ "scripts": []
61
  }
62
  ],
63
+ "tokenizers": {},
64
  "node_i": "3485",
65
+ "native_tokenizers": [],
66
+ "scripts": []
67
  },
68
  {
69
  "name": "Assamese based",
70
  "iso_1_code": null,
71
  "iso_3_code": null,
 
72
  "children": [
73
  {
74
  "name": "Nagamese",
75
  "iso_1_code": null,
76
  "iso_3_code": "nag",
 
77
  "children": [],
78
+ "tokenizers": {},
79
  "node_i": "3489",
80
+ "native_tokenizers": [],
81
+ "scripts": []
82
  }
83
  ],
84
+ "tokenizers": {},
85
  "node_i": "3488",
86
+ "native_tokenizers": [],
87
+ "scripts": []
88
  },
89
  {
90
  "name": "Dutch based",
91
  "iso_1_code": null,
92
  "iso_3_code": null,
 
93
  "children": [
94
  {
95
  "name": "Berbice Dutch Creole",
96
  "iso_1_code": null,
97
  "iso_3_code": "brc",
 
98
  "children": [],
99
+ "tokenizers": {},
100
  "node_i": "3491",
101
+ "native_tokenizers": [],
102
+ "scripts": []
103
  },
104
  {
105
  "name": "Negerhollands",
106
  "iso_1_code": null,
107
  "iso_3_code": "dcr",
 
108
  "children": [],
109
+ "tokenizers": {},
110
  "node_i": "3492",
111
+ "native_tokenizers": [],
112
+ "scripts": []
113
  },
114
  {
115
  "name": "Javindo",
116
  "iso_1_code": null,
117
  "iso_3_code": "jvd",
 
118
  "children": [],
119
+ "tokenizers": {},
120
  "node_i": "3493",
121
+ "native_tokenizers": [],
122
+ "scripts": []
123
  },
124
  {
125
  "name": "Petjo",
126
  "iso_1_code": null,
127
  "iso_3_code": "pey",
 
128
  "children": [],
129
+ "tokenizers": {},
130
  "node_i": "3494",
131
+ "native_tokenizers": [],
132
+ "scripts": []
133
  },
134
  {
135
  "name": "Skepi Dutch Creole",
136
  "iso_1_code": null,
137
  "iso_3_code": "skw",
 
138
  "children": [],
139
+ "tokenizers": {},
140
  "node_i": "3495",
141
+ "native_tokenizers": [],
142
+ "scripts": []
143
  }
144
  ],
145
+ "tokenizers": {},
146
  "node_i": "3490",
147
+ "native_tokenizers": [],
148
+ "scripts": []
149
  },
150
  {
151
  "name": "English based",
152
  "iso_1_code": null,
153
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
154
  "children": [
155
  {
156
  "name": "Saramaccan",
157
  "iso_1_code": null,
158
  "iso_3_code": "srm",
159
+ "children": [],
160
  "tokenizers": {
161
  "Latn": {
162
  "full_object": "StanzaTokenizer(\"pcm\")",
163
  "original_lang_name": "nigerian_pidgin",
164
  "original_lang_code": "pcm",
165
+ "script": "Latn",
166
+ "class_name": "StanzaTokenizer"
 
 
 
167
  }
168
  },
 
169
  "node_i": "3497",
170
+ "native_tokenizers": [],
171
  "scripts": [
172
  "Latn"
173
+ ]
 
174
  },
175
  {
176
  "name": "Atlantic",
177
  "iso_1_code": null,
178
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
179
  "children": [
180
  {
181
  "name": "Eastern",
182
  "iso_1_code": null,
183
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
184
  "children": [
185
  {
186
  "name": "Turks and Caicos English Creole",
187
  "iso_1_code": null,
188
  "iso_3_code": "tch",
 
189
  "children": [],
190
+ "tokenizers": {},
191
  "node_i": "3500",
192
+ "native_tokenizers": [],
193
+ "scripts": []
194
  },
195
  {
196
  "name": "Northern",
197
  "iso_1_code": null,
198
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
199
  "children": [
200
  {
201
  "name": "Afro-Seminole Creole",
202
  "iso_1_code": null,
203
  "iso_3_code": "afs",
 
204
  "children": [],
205
+ "tokenizers": {},
206
  "node_i": "3502",
207
+ "native_tokenizers": [],
208
+ "scripts": []
209
  },
210
  {
211
  "name": "Bahamas English Creole",
212
  "iso_1_code": null,
213
  "iso_3_code": "bah",
 
214
  "children": [],
215
+ "tokenizers": {},
216
  "node_i": "3503",
217
+ "native_tokenizers": [],
218
+ "scripts": []
219
  },
220
  {
221
  "name": "Sea Island English Creole",
222
  "iso_1_code": null,
223
  "iso_3_code": "gul",
224
+ "children": [],
225
  "tokenizers": {
226
  "Latn": {
227
  "full_object": "StanzaTokenizer(\"pcm\")",
228
  "original_lang_name": "nigerian_pidgin",
229
  "original_lang_code": "pcm",
230
+ "script": "Latn",
231
+ "class_name": "StanzaTokenizer"
 
 
 
232
  }
233
  },
 
234
  "node_i": "3504",
235
+ "native_tokenizers": [],
236
  "scripts": [
237
  "Latn"
238
+ ]
 
239
  }
240
  ],
241
+ "tokenizers": {
242
+ "Latn": {
243
+ "full_object": "StanzaTokenizer(\"pcm\")",
244
+ "original_lang_name": "nigerian_pidgin",
245
+ "original_lang_code": "pcm",
246
+ "script": "Latn",
247
+ "class_name": "StanzaTokenizer"
248
+ }
249
+ },
250
  "node_i": "3501",
251
+ "native_tokenizers": [],
252
+ "scripts": []
253
  },
254
  {
255
  "name": "Southern",
256
  "iso_1_code": null,
257
  "iso_3_code": null,
 
258
  "children": [
259
  {
260
  "name": "Leeward Caribbean English Creole",
261
  "iso_1_code": null,
262
  "iso_3_code": "aig",
 
263
  "children": [],
264
+ "tokenizers": {},
265
  "node_i": "3506",
266
+ "native_tokenizers": [],
267
+ "scripts": []
268
  },
269
  {
270
  "name": "Bajan",
271
  "iso_1_code": null,
272
  "iso_3_code": "bjs",
 
273
  "children": [],
274
+ "tokenizers": {},
275
  "node_i": "3507",
276
+ "native_tokenizers": [],
277
+ "scripts": []
278
  },
279
  {
280
  "name": "Grenadian English Creole",
281
  "iso_1_code": null,
282
  "iso_3_code": "gcl",
 
283
  "children": [],
284
+ "tokenizers": {},
285
  "node_i": "3508",
286
+ "native_tokenizers": [],
287
+ "scripts": []
288
  },
289
  {
290
  "name": "Guyanese English Creole",
291
  "iso_1_code": null,
292
  "iso_3_code": "gyn",
 
293
  "children": [],
294
+ "tokenizers": {},
295
  "node_i": "3509",
296
+ "native_tokenizers": [],
297
+ "scripts": []
298
  },
299
  {
300
  "name": "Vincentian English Creole",
301
  "iso_1_code": null,
302
  "iso_3_code": "svc",
 
303
  "children": [],
304
+ "tokenizers": {},
305
  "node_i": "3510",
306
+ "native_tokenizers": [],
307
+ "scripts": []
308
  },
309
  {
310
  "name": "Tobagonian English Creole",
311
  "iso_1_code": null,
312
  "iso_3_code": "tgh",
 
313
  "children": [],
314
+ "tokenizers": {},
315
  "node_i": "3511",
316
+ "native_tokenizers": [],
317
+ "scripts": []
318
  },
319
  {
320
  "name": "Trinidadian English Creole",
321
  "iso_1_code": null,
322
  "iso_3_code": "trf",
 
323
  "children": [],
324
+ "tokenizers": {},
325
  "node_i": "3512",
326
+ "native_tokenizers": [],
327
+ "scripts": []
328
  },
329
  {
330
  "name": "Virgin Islands English Creole",
331
  "iso_1_code": null,
332
  "iso_3_code": "vic",
 
333
  "children": [],
334
+ "tokenizers": {},
335
  "node_i": "3513",
336
+ "native_tokenizers": [],
337
+ "scripts": []
338
  }
339
  ],
340
+ "tokenizers": {},
341
  "node_i": "3505",
342
+ "native_tokenizers": [],
343
+ "scripts": []
344
  }
345
  ],
 
 
 
 
 
 
 
 
346
  "tokenizers": {
347
  "Latn": {
348
  "full_object": "StanzaTokenizer(\"pcm\")",
349
  "original_lang_name": "nigerian_pidgin",
350
  "original_lang_code": "pcm",
351
+ "script": "Latn",
352
+ "class_name": "StanzaTokenizer"
 
 
 
353
  }
354
  },
355
+ "node_i": "3499",
356
+ "native_tokenizers": [],
357
+ "scripts": []
358
+ },
359
+ {
360
+ "name": "Krio",
361
+ "iso_1_code": null,
362
+ "iso_3_code": null,
363
  "children": [
364
  {
365
  "name": "Equatorial Guinean Pidgin",
366
  "iso_1_code": null,
367
  "iso_3_code": "fpe",
 
368
  "children": [],
369
+ "tokenizers": {},
370
  "node_i": "3515",
371
+ "native_tokenizers": [],
372
+ "scripts": []
373
  },
374
  {
375
  "name": "Ghanaian Pidgin English",
376
  "iso_1_code": null,
377
  "iso_3_code": "gpe",
 
378
  "children": [],
379
+ "tokenizers": {},
380
  "node_i": "3516",
381
+ "native_tokenizers": [],
382
+ "scripts": []
383
  },
384
  {
385
  "name": "Krio",
386
  "iso_1_code": null,
387
  "iso_3_code": "kri",
388
+ "children": [],
389
  "tokenizers": {
390
  "Latn": {
391
  "full_object": "StanzaTokenizer(\"pcm\")",
392
  "original_lang_name": "nigerian_pidgin",
393
  "original_lang_code": "pcm",
394
+ "script": "Latn",
395
+ "class_name": "StanzaTokenizer"
 
 
 
396
  }
397
  },
 
398
  "node_i": "3517",
399
+ "native_tokenizers": [],
400
  "scripts": [
401
  "Latn"
402
+ ]
 
403
  },
404
  {
405
  "name": "Pidgin, Nigerian",
406
  "iso_1_code": null,
407
  "iso_3_code": "pcm",
408
+ "children": [],
409
  "tokenizers": {
410
  "Latn": {
411
  "full_object": "StanzaTokenizer(\"pcm\")",
412
  "original_lang_name": "nigerian_pidgin",
413
  "original_lang_code": "pcm",
414
+ "script": "Latn",
415
+ "class_name": "StanzaTokenizer"
 
 
 
416
  }
417
  },
 
418
  "node_i": "3518",
419
+ "native_tokenizers": [
420
  "Latn"
421
  ],
422
+ "scripts": [
423
+ "Latn"
424
+ ]
425
  },
426
  {
427
  "name": "Pidgin, Cameroon",
428
  "iso_1_code": null,
429
  "iso_3_code": "wes",
430
+ "children": [],
431
  "tokenizers": {
432
  "Latn": {
433
  "full_object": "StanzaTokenizer(\"pcm\")",
434
  "original_lang_name": "nigerian_pidgin",
435
  "original_lang_code": "pcm",
436
+ "script": "Latn",
437
+ "class_name": "StanzaTokenizer"
 
 
 
438
  }
439
  },
 
440
  "node_i": "3519",
441
+ "native_tokenizers": [],
442
  "scripts": [
443
  "Latn"
444
+ ]
 
445
  }
446
  ],
 
 
 
 
 
 
 
 
447
  "tokenizers": {
448
  "Latn": {
449
  "full_object": "StanzaTokenizer(\"pcm\")",
450
  "original_lang_name": "nigerian_pidgin",
451
  "original_lang_code": "pcm",
452
+ "script": "Latn",
453
+ "class_name": "StanzaTokenizer"
 
 
 
454
  }
455
  },
456
+ "node_i": "3514",
457
+ "native_tokenizers": [],
458
+ "scripts": []
459
+ },
460
+ {
461
+ "name": "Suriname",
462
+ "iso_1_code": null,
463
+ "iso_3_code": null,
464
  "children": [
465
  {
466
  "name": "Sranan Tongo",
467
  "iso_1_code": null,
468
  "iso_3_code": "srn",
469
+ "children": [],
470
  "tokenizers": {
471
  "Latn": {
472
  "full_object": "StanzaTokenizer(\"pcm\")",
473
  "original_lang_name": "nigerian_pidgin",
474
  "original_lang_code": "pcm",
475
+ "script": "Latn",
476
+ "class_name": "StanzaTokenizer"
 
 
 
477
  }
478
  },
 
479
  "node_i": "3521",
480
+ "native_tokenizers": [],
481
  "scripts": [
482
  "Latn"
483
+ ]
 
484
  },
485
  {
486
  "name": "Ndyuka",
487
  "iso_1_code": null,
488
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
489
  "children": [
490
  {
491
  "name": "Aukan",
492
  "iso_1_code": null,
493
  "iso_3_code": "djk",
494
+ "children": [],
495
  "tokenizers": {
496
  "Latn": {
497
  "full_object": "StanzaTokenizer(\"pcm\")",
498
  "original_lang_name": "nigerian_pidgin",
499
  "original_lang_code": "pcm",
500
+ "script": "Latn",
501
+ "class_name": "StanzaTokenizer"
 
 
 
502
  }
503
  },
 
504
  "node_i": "3523",
505
+ "native_tokenizers": [],
506
  "scripts": [
507
  "Latn"
508
+ ]
 
509
  },
510
  {
511
  "name": "Kwinti",
512
  "iso_1_code": null,
513
  "iso_3_code": "kww",
 
514
  "children": [],
515
+ "tokenizers": {},
516
  "node_i": "3524",
517
+ "native_tokenizers": [],
518
+ "scripts": []
519
  }
520
  ],
521
+ "tokenizers": {
522
+ "Latn": {
523
+ "full_object": "StanzaTokenizer(\"pcm\")",
524
+ "original_lang_name": "nigerian_pidgin",
525
+ "original_lang_code": "pcm",
526
+ "script": "Latn",
527
+ "class_name": "StanzaTokenizer"
528
+ }
529
+ },
530
  "node_i": "3522",
531
+ "native_tokenizers": [],
532
+ "scripts": []
533
  }
534
  ],
 
 
 
 
 
 
 
 
535
  "tokenizers": {
536
  "Latn": {
537
  "full_object": "StanzaTokenizer(\"pcm\")",
538
  "original_lang_name": "nigerian_pidgin",
539
  "original_lang_code": "pcm",
540
+ "script": "Latn",
541
+ "class_name": "StanzaTokenizer"
 
 
 
542
  }
543
  },
544
+ "node_i": "3520",
545
+ "native_tokenizers": [],
546
+ "scripts": []
547
+ },
548
+ {
549
+ "name": "Western",
550
+ "iso_1_code": null,
551
+ "iso_3_code": null,
552
  "children": [
553
  {
554
  "name": "Belize English Creole",
555
  "iso_1_code": null,
556
  "iso_3_code": "bzj",
557
+ "children": [],
558
  "tokenizers": {
559
  "Latn": {
560
  "full_object": "StanzaTokenizer(\"pcm\")",
561
  "original_lang_name": "nigerian_pidgin",
562
  "original_lang_code": "pcm",
563
+ "script": "Latn",
564
+ "class_name": "StanzaTokenizer"
 
 
 
565
  }
566
  },
 
567
  "node_i": "3526",
568
+ "native_tokenizers": [],
569
  "scripts": [
570
  "Latn"
571
+ ]
 
572
  },
573
  {
574
  "name": "Nicaragua English Creole",
575
  "iso_1_code": null,
576
  "iso_3_code": "bzk",
 
577
  "children": [],
578
+ "tokenizers": {},
579
  "node_i": "3527",
580
+ "native_tokenizers": [],
581
+ "scripts": []
582
  },
583
  {
584
  "name": "Islander English Creole",
585
  "iso_1_code": null,
586
  "iso_3_code": "icr",
587
+ "children": [],
588
  "tokenizers": {
589
  "Latn": {
590
  "full_object": "StanzaTokenizer(\"pcm\")",
591
  "original_lang_name": "nigerian_pidgin",
592
  "original_lang_code": "pcm",
593
+ "script": "Latn",
594
+ "class_name": "StanzaTokenizer"
 
 
 
595
  }
596
  },
 
597
  "node_i": "3528",
598
+ "native_tokenizers": [],
599
  "scripts": [
600
  "Latn"
601
+ ]
 
602
  },
603
  {
604
  "name": "Jamaican English Creole",
605
  "iso_1_code": null,
606
  "iso_3_code": "jam",
607
+ "children": [],
608
  "tokenizers": {
609
  "Latn": {
610
  "full_object": "StanzaTokenizer(\"pcm\")",
611
  "original_lang_name": "nigerian_pidgin",
612
  "original_lang_code": "pcm",
613
+ "script": "Latn",
614
+ "class_name": "StanzaTokenizer"
 
 
 
615
  }
616
  },
 
617
  "node_i": "3529",
618
+ "native_tokenizers": [],
619
  "scripts": [
620
  "Latn"
621
+ ]
 
622
  }
623
  ],
624
+ "tokenizers": {
625
+ "Latn": {
626
+ "full_object": "StanzaTokenizer(\"pcm\")",
627
+ "original_lang_name": "nigerian_pidgin",
628
+ "original_lang_code": "pcm",
629
+ "script": "Latn",
630
+ "class_name": "StanzaTokenizer"
631
+ }
632
+ },
633
  "node_i": "3525",
634
+ "native_tokenizers": [],
635
+ "scripts": []
636
  }
637
  ],
 
 
 
 
 
 
 
 
638
  "tokenizers": {
639
  "Latn": {
640
  "full_object": "StanzaTokenizer(\"pcm\")",
641
  "original_lang_name": "nigerian_pidgin",
642
  "original_lang_code": "pcm",
643
+ "script": "Latn",
644
+ "class_name": "StanzaTokenizer"
 
 
 
645
  }
646
  },
647
+ "node_i": "3498",
648
+ "native_tokenizers": [],
649
+ "scripts": []
650
+ },
651
+ {
652
+ "name": "Pacific",
653
+ "iso_1_code": null,
654
+ "iso_3_code": null,
655
  "children": [
656
  {
657
  "name": "Bislama",
658
  "iso_1_code": "bi",
659
  "iso_3_code": "bis",
660
+ "children": [],
661
  "tokenizers": {
662
  "Latn": {
663
  "full_object": "StanzaTokenizer(\"pcm\")",
664
  "original_lang_name": "nigerian_pidgin",
665
  "original_lang_code": "pcm",
666
+ "script": "Latn",
667
+ "class_name": "StanzaTokenizer"
 
 
 
668
  }
669
  },
 
670
  "node_i": "3531",
671
+ "native_tokenizers": [],
672
  "scripts": [
673
  "Latn"
674
+ ]
 
675
  },
676
  {
677
  "name": "Hawaii Pidgin",
678
  "iso_1_code": null,
679
  "iso_3_code": "hwc",
680
+ "children": [],
681
  "tokenizers": {
682
  "Latn": {
683
  "full_object": "StanzaTokenizer(\"pcm\")",
684
  "original_lang_name": "nigerian_pidgin",
685
  "original_lang_code": "pcm",
686
+ "script": "Latn",
687
+ "class_name": "StanzaTokenizer"
 
 
 
688
  }
689
  },
 
690
  "node_i": "3532",
691
+ "native_tokenizers": [],
692
  "scripts": [
693
  "Latn"
694
+ ]
 
695
  },
696
  {
697
  "name": "Ngatik Men\u2019s Creole",
698
  "iso_1_code": null,
699
  "iso_3_code": "ngm",
 
700
  "children": [],
701
+ "tokenizers": {},
702
  "node_i": "3533",
703
+ "native_tokenizers": [],
704
+ "scripts": []
705
  },
706
  {
707
  "name": "Pitcairn-Norfolk",
708
  "iso_1_code": null,
709
  "iso_3_code": "pih",
 
710
  "children": [],
711
+ "tokenizers": {},
712
  "node_i": "3534",
713
+ "native_tokenizers": [],
714
+ "scripts": []
715
  },
716
  {
717
  "name": "Pijin",
718
  "iso_1_code": null,
719
  "iso_3_code": "pis",
720
+ "children": [],
721
  "tokenizers": {
722
  "Latn": {
723
  "full_object": "StanzaTokenizer(\"pcm\")",
724
  "original_lang_name": "nigerian_pidgin",
725
  "original_lang_code": "pcm",
726
+ "script": "Latn",
727
+ "class_name": "StanzaTokenizer"
 
 
 
728
  }
729
  },
 
730
  "node_i": "3535",
731
+ "native_tokenizers": [],
732
  "scripts": [
733
  "Latn"
734
+ ]
 
735
  },
736
  {
737
  "name": "Kriol",
738
  "iso_1_code": null,
739
  "iso_3_code": "rop",
740
+ "children": [],
741
  "tokenizers": {
742
  "Latn": {
743
  "full_object": "StanzaTokenizer(\"pcm\")",
744
  "original_lang_name": "nigerian_pidgin",
745
  "original_lang_code": "pcm",
746
+ "script": "Latn",
747
+ "class_name": "StanzaTokenizer"
 
 
 
748
  }
749
  },
 
750
  "node_i": "3536",
751
+ "native_tokenizers": [],
752
  "scripts": [
753
  "Latn"
754
+ ]
 
755
  },
756
  {
757
  "name": "Torres Strait Creole",
758
  "iso_1_code": null,
759
  "iso_3_code": "tcs",
760
+ "children": [],
761
  "tokenizers": {
762
  "Latn": {
763
  "full_object": "StanzaTokenizer(\"pcm\")",
764
  "original_lang_name": "nigerian_pidgin",
765
  "original_lang_code": "pcm",
766
+ "script": "Latn",
767
+ "class_name": "StanzaTokenizer"
 
 
 
768
  }
769
  },
 
770
  "node_i": "3537",
771
+ "native_tokenizers": [],
772
  "scripts": [
773
  "Latn"
774
+ ]
 
775
  },
776
  {
777
  "name": "Tok Pisin",
778
  "iso_1_code": null,
779
  "iso_3_code": "tpi",
780
+ "children": [],
781
  "tokenizers": {
782
  "Latn": {
783
  "full_object": "StanzaTokenizer(\"pcm\")",
784
  "original_lang_name": "nigerian_pidgin",
785
  "original_lang_code": "pcm",
786
+ "script": "Latn",
787
+ "class_name": "StanzaTokenizer"
 
 
 
788
  }
789
  },
 
790
  "node_i": "3538",
791
+ "native_tokenizers": [],
792
  "scripts": [
793
  "Latn"
794
+ ]
 
795
  }
796
  ],
797
+ "tokenizers": {
798
+ "Latn": {
799
+ "full_object": "StanzaTokenizer(\"pcm\")",
800
+ "original_lang_name": "nigerian_pidgin",
801
+ "original_lang_code": "pcm",
802
+ "script": "Latn",
803
+ "class_name": "StanzaTokenizer"
804
+ }
805
+ },
806
  "node_i": "3530",
807
+ "native_tokenizers": [],
808
+ "scripts": []
809
  }
810
  ],
811
+ "tokenizers": {
812
+ "Latn": {
813
+ "full_object": "StanzaTokenizer(\"pcm\")",
814
+ "original_lang_name": "nigerian_pidgin",
815
+ "original_lang_code": "pcm",
816
+ "script": "Latn",
817
+ "class_name": "StanzaTokenizer"
818
+ }
819
+ },
820
  "node_i": "3496",
821
+ "native_tokenizers": [],
822
+ "scripts": []
823
  },
824
  {
825
  "name": "French based",
826
  "iso_1_code": null,
827
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
828
  "children": [
829
  {
830
  "name": "Lesser Antillean French Creole",
831
  "iso_1_code": null,
832
  "iso_3_code": "acf",
833
+ "children": [],
834
  "tokenizers": {
835
  "Latn": {
836
+ "full_object": "StanzaTokenizer(\"pcm\")",
837
+ "original_lang_name": "nigerian_pidgin",
838
+ "original_lang_code": "pcm",
839
+ "script": "Latn",
840
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
841
  }
842
  },
 
843
  "node_i": "3540",
844
+ "native_tokenizers": [],
845
  "scripts": [
846
  "Latn"
847
+ ]
 
848
  },
849
  {
850
  "name": "Tayo",
851
  "iso_1_code": null,
852
  "iso_3_code": "cks",
 
853
  "children": [],
854
+ "tokenizers": {},
855
  "node_i": "3541",
856
+ "native_tokenizers": [],
857
+ "scripts": []
858
  },
859
  {
860
  "name": "Seychelles French Creole",
861
  "iso_1_code": null,
862
  "iso_3_code": "crs",
863
+ "children": [],
864
  "tokenizers": {
865
  "Latn": {
866
+ "full_object": "StanzaTokenizer(\"pcm\")",
867
+ "original_lang_name": "nigerian_pidgin",
868
+ "original_lang_code": "pcm",
869
+ "script": "Latn",
870
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
871
  }
872
  },
 
873
  "node_i": "3542",
874
+ "native_tokenizers": [],
875
  "scripts": [
876
  "Latn"
877
+ ]
 
878
  },
879
  {
880
  "name": "Guadeloupean French Creole",
881
  "iso_1_code": null,
882
  "iso_3_code": "gcf",
883
+ "children": [],
884
  "tokenizers": {
885
  "Latn": {
886
+ "full_object": "StanzaTokenizer(\"pcm\")",
887
+ "original_lang_name": "nigerian_pidgin",
888
+ "original_lang_code": "pcm",
889
+ "script": "Latn",
890
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
891
  }
892
  },
 
893
  "node_i": "3543",
894
+ "native_tokenizers": [],
895
  "scripts": [
896
  "Latn"
897
+ ]
 
898
  },
899
  {
900
  "name": "Guianese French Creole",
901
  "iso_1_code": null,
902
  "iso_3_code": "gcr",
903
+ "children": [],
904
  "tokenizers": {
905
  "Latn": {
906
+ "full_object": "StanzaTokenizer(\"pcm\")",
907
+ "original_lang_name": "nigerian_pidgin",
908
+ "original_lang_code": "pcm",
909
+ "script": "Latn",
910
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
911
  }
912
  },
 
913
  "node_i": "3544",
914
+ "native_tokenizers": [],
915
  "scripts": [
916
  "Latn"
917
+ ]
 
918
  },
919
  {
920
  "name": "Haitian Creole",
921
  "iso_1_code": "ht",
922
  "iso_3_code": "hat",
923
+ "children": [],
924
  "tokenizers": {
925
  "Latn": {
926
+ "full_object": "StanzaTokenizer(\"pcm\")",
927
+ "original_lang_name": "nigerian_pidgin",
928
+ "original_lang_code": "pcm",
929
+ "script": "Latn",
930
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
931
  }
932
  },
 
933
  "node_i": "3545",
934
+ "native_tokenizers": [],
935
  "scripts": [
936
  "Latn"
937
+ ]
 
938
  },
939
  {
940
  "name": "Karipuna French Creole",
941
  "iso_1_code": null,
942
  "iso_3_code": "kmv",
 
943
  "children": [],
944
+ "tokenizers": {},
945
  "node_i": "3546",
946
+ "native_tokenizers": [],
947
+ "scripts": []
948
  },
949
  {
950
  "name": "Louisiana Creole",
951
  "iso_1_code": null,
952
  "iso_3_code": "lou",
 
953
  "children": [],
954
+ "tokenizers": {},
955
  "node_i": "3547",
956
+ "native_tokenizers": [],
957
+ "scripts": []
958
  },
959
  {
960
  "name": "Morisyen",
961
  "iso_1_code": null,
962
  "iso_3_code": "mfe",
963
+ "children": [],
964
  "tokenizers": {
965
  "Latn": {
966
+ "full_object": "StanzaTokenizer(\"pcm\")",
967
+ "original_lang_name": "nigerian_pidgin",
968
+ "original_lang_code": "pcm",
969
+ "script": "Latn",
970
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
971
  }
972
  },
 
973
  "node_i": "3548",
974
+ "native_tokenizers": [],
975
  "scripts": [
976
  "Latn"
977
+ ]
 
978
  },
979
  {
980
  "name": "R\u00e9union French Creole",
981
  "iso_1_code": null,
982
  "iso_3_code": "rcf",
983
+ "children": [],
984
  "tokenizers": {
985
  "Latn": {
986
+ "full_object": "StanzaTokenizer(\"pcm\")",
987
+ "original_lang_name": "nigerian_pidgin",
988
+ "original_lang_code": "pcm",
989
+ "script": "Latn",
990
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
991
  }
992
  },
 
993
  "node_i": "3549",
994
+ "native_tokenizers": [],
995
  "scripts": [
996
  "Latn"
997
+ ]
 
998
  },
999
  {
1000
  "name": "San Miguel French Creole",
1001
  "iso_1_code": null,
1002
  "iso_3_code": "scf",
 
1003
  "children": [],
1004
+ "tokenizers": {},
1005
  "node_i": "3550",
1006
+ "native_tokenizers": [],
1007
+ "scripts": []
1008
  }
1009
  ],
1010
+ "tokenizers": {
1011
+ "Latn": {
1012
+ "full_object": "StanzaTokenizer(\"pcm\")",
1013
+ "original_lang_name": "nigerian_pidgin",
1014
+ "original_lang_code": "pcm",
1015
+ "script": "Latn",
1016
+ "class_name": "StanzaTokenizer"
1017
+ }
1018
+ },
1019
  "node_i": "3539",
1020
+ "native_tokenizers": [],
1021
+ "scripts": []
1022
  },
1023
  {
1024
  "name": "German based",
1025
  "iso_1_code": null,
1026
  "iso_3_code": null,
 
1027
  "children": [
1028
  {
1029
  "name": "Unserdeutsch",
1030
  "iso_1_code": null,
1031
  "iso_3_code": "uln",
 
1032
  "children": [],
1033
+ "tokenizers": {},
1034
  "node_i": "3552",
1035
+ "native_tokenizers": [],
1036
+ "scripts": []
1037
  }
1038
  ],
1039
+ "tokenizers": {},
1040
  "node_i": "3551",
1041
+ "native_tokenizers": [],
1042
+ "scripts": []
1043
  },
1044
  {
1045
  "name": "Hindi based",
1046
  "iso_1_code": null,
1047
  "iso_3_code": null,
 
1048
  "children": [
1049
  {
1050
  "name": "Andaman Hindi Creole",
1051
  "iso_1_code": null,
1052
  "iso_3_code": "hca",
 
1053
  "children": [],
1054
+ "tokenizers": {},
1055
  "node_i": "3554",
1056
+ "native_tokenizers": [],
1057
+ "scripts": []
1058
  }
1059
  ],
1060
+ "tokenizers": {},
1061
  "node_i": "3553",
1062
+ "native_tokenizers": [],
1063
+ "scripts": []
1064
  },
1065
  {
1066
  "name": "Iberian based",
1067
  "iso_1_code": null,
1068
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1069
  "children": [
1070
  {
1071
  "name": "Papiamentu",
1072
  "iso_1_code": null,
1073
  "iso_3_code": "pap",
1074
+ "children": [],
1075
  "tokenizers": {
1076
  "Latn": {
1077
+ "full_object": "StanzaTokenizer(\"pcm\")",
1078
+ "original_lang_name": "nigerian_pidgin",
1079
+ "original_lang_code": "pcm",
1080
+ "script": "Latn",
1081
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1082
  }
1083
  },
 
1084
  "node_i": "3556",
1085
+ "native_tokenizers": [],
1086
  "scripts": [
1087
  "Latn"
1088
+ ]
 
1089
  }
1090
  ],
1091
+ "tokenizers": {
1092
+ "Latn": {
1093
+ "full_object": "StanzaTokenizer(\"pcm\")",
1094
+ "original_lang_name": "nigerian_pidgin",
1095
+ "original_lang_code": "pcm",
1096
+ "script": "Latn",
1097
+ "class_name": "StanzaTokenizer"
1098
+ }
1099
+ },
1100
  "node_i": "3555",
1101
+ "native_tokenizers": [],
1102
+ "scripts": []
1103
  },
1104
  {
1105
  "name": "Japanese-based",
1106
  "iso_1_code": null,
1107
  "iso_3_code": null,
 
1108
  "children": [
1109
  {
1110
  "name": "Yilan Creole",
1111
  "iso_1_code": null,
1112
  "iso_3_code": "ycr",
 
1113
  "children": [],
1114
+ "tokenizers": {},
1115
  "node_i": "3558",
1116
+ "native_tokenizers": [],
1117
+ "scripts": []
1118
  }
1119
  ],
1120
+ "tokenizers": {},
1121
  "node_i": "3557",
1122
+ "native_tokenizers": [],
1123
+ "scripts": []
1124
  },
1125
  {
1126
  "name": "Kongo based",
1127
  "iso_1_code": null,
1128
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1129
  "children": [
1130
  {
1131
  "name": "Kituba",
1132
  "iso_1_code": null,
1133
  "iso_3_code": "ktu",
1134
+ "children": [],
1135
  "tokenizers": {
1136
  "Latn": {
1137
+ "full_object": "StanzaTokenizer(\"pcm\")",
1138
+ "original_lang_name": "nigerian_pidgin",
1139
+ "original_lang_code": "pcm",
1140
+ "script": "Latn",
1141
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1142
  }
1143
  },
 
1144
  "node_i": "3560",
1145
+ "native_tokenizers": [],
1146
  "scripts": [
1147
  "Latn"
1148
+ ]
 
1149
  },
1150
  {
1151
  "name": "Kituba",
1152
  "iso_1_code": null,
1153
  "iso_3_code": "mkw",
 
1154
  "children": [],
1155
+ "tokenizers": {},
1156
  "node_i": "3561",
1157
+ "native_tokenizers": [],
1158
+ "scripts": []
1159
  }
1160
  ],
1161
+ "tokenizers": {
1162
+ "Latn": {
1163
+ "full_object": "StanzaTokenizer(\"pcm\")",
1164
+ "original_lang_name": "nigerian_pidgin",
1165
+ "original_lang_code": "pcm",
1166
+ "script": "Latn",
1167
+ "class_name": "StanzaTokenizer"
1168
+ }
1169
+ },
1170
  "node_i": "3559",
1171
+ "native_tokenizers": [],
1172
+ "scripts": []
1173
  },
1174
  {
1175
  "name": "Malay based",
1176
  "iso_1_code": null,
1177
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1178
  "children": [
1179
  {
1180
  "name": "Malay, Ambonese",
1181
  "iso_1_code": null,
1182
  "iso_3_code": "abs",
1183
+ "children": [],
1184
  "tokenizers": {
1185
  "Latn": {
1186
+ "full_object": "StanzaTokenizer(\"pcm\")",
1187
+ "original_lang_name": "nigerian_pidgin",
1188
+ "original_lang_code": "pcm",
1189
+ "script": "Latn",
1190
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1191
  }
1192
  },
 
1193
  "node_i": "3563",
1194
+ "native_tokenizers": [],
1195
  "scripts": [
1196
  "Latn"
1197
+ ]
 
1198
  },
1199
  {
1200
  "name": "Betawi",
1201
  "iso_1_code": null,
1202
  "iso_3_code": "bew",
1203
+ "children": [],
1204
  "tokenizers": {
1205
  "Latn": {
1206
+ "full_object": "StanzaTokenizer(\"pcm\")",
1207
+ "original_lang_name": "nigerian_pidgin",
1208
+ "original_lang_code": "pcm",
1209
+ "script": "Latn",
1210
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1211
  }
1212
  },
 
1213
  "node_i": "3564",
1214
+ "native_tokenizers": [],
1215
  "scripts": [
1216
  "Latn"
1217
+ ]
 
1218
  },
1219
  {
1220
  "name": "Malay, Banda",
1221
  "iso_1_code": null,
1222
  "iso_3_code": "bpq",
 
1223
  "children": [],
1224
+ "tokenizers": {},
1225
  "node_i": "3565",
1226
+ "native_tokenizers": [],
1227
+ "scripts": []
1228
  },
1229
  {
1230
  "name": "Malaccan Malay Creole",
1231
  "iso_1_code": null,
1232
  "iso_3_code": "ccm",
 
1233
  "children": [],
1234
+ "tokenizers": {},
1235
  "node_i": "3566",
1236
+ "native_tokenizers": [],
1237
+ "scripts": []
1238
  },
1239
  {
1240
  "name": "Malay, Cocos Islands",
1241
  "iso_1_code": "ms",
1242
  "iso_3_code": "coa",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1243
  "children": [],
1244
+ "tokenizers": {},
1245
  "node_i": "3567",
1246
+ "native_tokenizers": [],
1247
+ "scripts": []
1248
  },
1249
  {
1250
  "name": "Malay, Larantuka",
1251
  "iso_1_code": null,
1252
  "iso_3_code": "lrt",
 
1253
  "children": [],
1254
+ "tokenizers": {},
1255
  "node_i": "3568",
1256
+ "native_tokenizers": [],
1257
+ "scripts": []
1258
  },
1259
  {
1260
  "name": "Malay, North Moluccan",
1261
  "iso_1_code": "ms",
1262
  "iso_3_code": "max",
1263
+ "children": [],
1264
  "tokenizers": {
1265
  "Latn": {
1266
+ "full_object": "StanzaTokenizer(\"pcm\")",
1267
+ "original_lang_name": "nigerian_pidgin",
1268
+ "original_lang_code": "pcm",
1269
+ "script": "Latn",
1270
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1271
  }
1272
  },
 
1273
  "node_i": "3569",
1274
+ "native_tokenizers": [],
1275
  "scripts": [
1276
  "Latn"
1277
+ ]
 
1278
  },
1279
  {
1280
  "name": "Malay, Baba",
1281
  "iso_1_code": null,
1282
  "iso_3_code": "mbf",
1283
+ "children": [],
1284
  "tokenizers": {
1285
  "Latn": {
1286
+ "full_object": "StanzaTokenizer(\"pcm\")",
1287
+ "original_lang_name": "nigerian_pidgin",
1288
+ "original_lang_code": "pcm",
1289
+ "script": "Latn",
1290
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1291
  }
1292
  },
 
1293
  "node_i": "3570",
1294
+ "native_tokenizers": [],
1295
  "scripts": [
1296
  "Latn"
1297
+ ]
 
1298
  },
1299
  {
1300
  "name": "Malay, Balinese",
1301
  "iso_1_code": null,
1302
  "iso_3_code": "mhp",
 
1303
  "children": [],
1304
+ "tokenizers": {},
1305
  "node_i": "3571",
1306
+ "native_tokenizers": [],
1307
+ "scripts": []
1308
  },
1309
  {
1310
  "name": "Malay, Kupang",
1311
  "iso_1_code": null,
1312
  "iso_3_code": "mkn",
1313
+ "children": [],
1314
  "tokenizers": {
1315
  "Latn": {
1316
+ "full_object": "StanzaTokenizer(\"pcm\")",
1317
+ "original_lang_name": "nigerian_pidgin",
1318
+ "original_lang_code": "pcm",
1319
+ "script": "Latn",
1320
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1321
  }
1322
  },
 
1323
  "node_i": "3572",
1324
+ "native_tokenizers": [],
1325
  "scripts": [
1326
  "Latn"
1327
+ ]
 
1328
  },
1329
  {
1330
  "name": "Indonesian, Peranakan",
1331
  "iso_1_code": null,
1332
  "iso_3_code": "pea",
 
1333
  "children": [],
1334
+ "tokenizers": {},
1335
  "node_i": "3573",
1336
+ "native_tokenizers": [],
1337
+ "scripts": []
1338
  },
1339
  {
1340
  "name": "Malay, Papuan",
1341
  "iso_1_code": null,
1342
  "iso_3_code": "pmy",
 
1343
  "children": [],
1344
+ "tokenizers": {},
1345
  "node_i": "3574",
1346
+ "native_tokenizers": [],
1347
+ "scripts": []
1348
  },
1349
  {
1350
  "name": "Sri Lankan Malay Creole",
1351
  "iso_1_code": null,
1352
  "iso_3_code": "sci",
 
1353
  "children": [],
1354
+ "tokenizers": {},
1355
  "node_i": "3575",
1356
+ "native_tokenizers": [],
1357
+ "scripts": []
1358
  },
1359
  {
1360
  "name": "Malay, Manado",
1361
  "iso_1_code": "ms",
1362
  "iso_3_code": "xmm",
1363
+ "children": [],
1364
  "tokenizers": {
1365
  "Latn": {
1366
+ "full_object": "StanzaTokenizer(\"pcm\")",
1367
+ "original_lang_name": "nigerian_pidgin",
1368
+ "original_lang_code": "pcm",
1369
+ "script": "Latn",
1370
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1371
  }
1372
  },
 
1373
  "node_i": "3576",
1374
+ "native_tokenizers": [],
1375
  "scripts": [
1376
  "Latn"
1377
+ ]
 
1378
  }
1379
  ],
1380
+ "tokenizers": {
1381
+ "Latn": {
1382
+ "full_object": "StanzaTokenizer(\"pcm\")",
1383
+ "original_lang_name": "nigerian_pidgin",
1384
+ "original_lang_code": "pcm",
1385
+ "script": "Latn",
1386
+ "class_name": "StanzaTokenizer"
1387
+ }
1388
+ },
1389
  "node_i": "3562",
1390
+ "native_tokenizers": [],
1391
+ "scripts": []
1392
  },
1393
  {
1394
  "name": "Ngbandi based",
1395
  "iso_1_code": null,
1396
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1397
  "children": [
1398
  {
1399
  "name": "Sango",
1400
  "iso_1_code": "sg",
1401
  "iso_3_code": "sag",
1402
+ "children": [],
1403
  "tokenizers": {
1404
  "Latn": {
1405
+ "full_object": "StanzaTokenizer(\"pcm\")",
1406
+ "original_lang_name": "nigerian_pidgin",
1407
+ "original_lang_code": "pcm",
1408
+ "script": "Latn",
1409
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1410
  }
1411
  },
 
1412
  "node_i": "3578",
1413
+ "native_tokenizers": [],
1414
  "scripts": [
1415
  "Latn"
1416
+ ]
 
1417
  },
1418
  {
1419
  "name": "Sango, Riverain",
1420
  "iso_1_code": null,
1421
  "iso_3_code": "snj",
 
1422
  "children": [],
1423
+ "tokenizers": {},
1424
  "node_i": "3579",
1425
+ "native_tokenizers": [],
1426
+ "scripts": []
1427
  }
1428
  ],
1429
+ "tokenizers": {
1430
+ "Latn": {
1431
+ "full_object": "StanzaTokenizer(\"pcm\")",
1432
+ "original_lang_name": "nigerian_pidgin",
1433
+ "original_lang_code": "pcm",
1434
+ "script": "Latn",
1435
+ "class_name": "StanzaTokenizer"
1436
+ }
1437
+ },
1438
  "node_i": "3577",
1439
+ "native_tokenizers": [],
1440
+ "scripts": []
1441
  },
1442
  {
1443
  "name": "Portuguese based",
1444
  "iso_1_code": null,
1445
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1446
  "children": [
1447
  {
1448
  "name": "Angolar",
1449
  "iso_1_code": null,
1450
  "iso_3_code": "aoa",
 
1451
  "children": [],
1452
+ "tokenizers": {},
1453
  "node_i": "3581",
1454
+ "native_tokenizers": [],
1455
+ "scripts": []
1456
  },
1457
  {
1458
  "name": "Cafundo Creole",
1459
  "iso_1_code": null,
1460
  "iso_3_code": "ccd",
 
1461
  "children": [],
1462
+ "tokenizers": {},
1463
  "node_i": "3582",
1464
+ "native_tokenizers": [],
1465
+ "scripts": []
1466
  },
1467
  {
1468
  "name": "S\u00e3otomense",
1469
  "iso_1_code": null,
1470
  "iso_3_code": "cri",
1471
+ "children": [],
1472
  "tokenizers": {
1473
  "Latn": {
1474
+ "full_object": "StanzaTokenizer(\"pcm\")",
1475
+ "original_lang_name": "nigerian_pidgin",
1476
+ "original_lang_code": "pcm",
1477
+ "script": "Latn",
1478
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1479
  }
1480
  },
 
1481
  "node_i": "3583",
1482
+ "native_tokenizers": [],
1483
  "scripts": [
1484
  "Latn"
1485
+ ]
 
1486
  },
1487
  {
1488
  "name": "Fa d\u2019Ambu",
1489
  "iso_1_code": null,
1490
  "iso_3_code": "fab",
 
1491
  "children": [],
1492
+ "tokenizers": {},
1493
  "node_i": "3584",
1494
+ "native_tokenizers": [],
1495
+ "scripts": []
1496
  },
1497
  {
1498
  "name": "Indo-Portuguese",
1499
  "iso_1_code": null,
1500
  "iso_3_code": "idb",
 
1501
  "children": [],
1502
+ "tokenizers": {},
1503
  "node_i": "3585",
1504
+ "native_tokenizers": [],
1505
+ "scripts": []
1506
  },
1507
  {
1508
  "name": "Kabuverdianu",
1509
  "iso_1_code": null,
1510
  "iso_3_code": "kea",
1511
+ "children": [],
1512
  "tokenizers": {
1513
  "Latn": {
1514
+ "full_object": "StanzaTokenizer(\"pcm\")",
1515
+ "original_lang_name": "nigerian_pidgin",
1516
+ "original_lang_code": "pcm",
1517
+ "script": "Latn",
1518
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1519
  }
1520
  },
 
1521
  "node_i": "3586",
1522
+ "native_tokenizers": [],
1523
  "scripts": [
1524
  "Latn"
1525
+ ]
 
1526
  },
1527
  {
1528
  "name": "Malaccan Portuguese Creole",
1529
  "iso_1_code": null,
1530
  "iso_3_code": "mcm",
 
1531
  "children": [],
1532
+ "tokenizers": {},
1533
  "node_i": "3587",
1534
+ "native_tokenizers": [],
1535
+ "scripts": []
1536
  },
1537
  {
1538
  "name": "Macanese",
1539
  "iso_1_code": null,
1540
  "iso_3_code": "mzs",
 
1541
  "children": [],
1542
+ "tokenizers": {},
1543
  "node_i": "3588",
1544
+ "native_tokenizers": [],
1545
+ "scripts": []
1546
  },
1547
  {
1548
  "name": "Guinea-Bissau Creole",
1549
  "iso_1_code": null,
1550
  "iso_3_code": "pov",
1551
+ "children": [],
1552
  "tokenizers": {
1553
  "Latn": {
1554
+ "full_object": "StanzaTokenizer(\"pcm\")",
1555
+ "original_lang_name": "nigerian_pidgin",
1556
+ "original_lang_code": "pcm",
1557
+ "script": "Latn",
1558
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1559
  }
1560
  },
 
1561
  "node_i": "3589",
1562
+ "native_tokenizers": [],
1563
  "scripts": [
1564
  "Latn"
1565
+ ]
 
1566
  },
1567
  {
1568
  "name": "Principense",
1569
  "iso_1_code": null,
1570
  "iso_3_code": "pre",
 
1571
  "children": [],
1572
+ "tokenizers": {},
1573
  "node_i": "3590",
1574
+ "native_tokenizers": [],
1575
+ "scripts": []
1576
  },
1577
  {
1578
  "name": "Ternate\u00f1o",
1579
  "iso_1_code": null,
1580
  "iso_3_code": "tmg",
 
1581
  "children": [],
1582
+ "tokenizers": {},
1583
  "node_i": "3591",
1584
+ "native_tokenizers": [],
1585
+ "scripts": []
1586
  },
1587
  {
1588
  "name": "Pidgin, Timor",
1589
  "iso_1_code": null,
1590
  "iso_3_code": "tvy",
 
1591
  "children": [],
1592
+ "tokenizers": {},
1593
  "node_i": "3592",
1594
+ "native_tokenizers": [],
1595
+ "scripts": []
1596
  },
1597
  {
1598
  "name": "Korlai Portuguese Creole",
1599
  "iso_1_code": null,
1600
  "iso_3_code": "vkp",
 
1601
  "children": [],
1602
+ "tokenizers": {},
1603
  "node_i": "3593",
1604
+ "native_tokenizers": [],
1605
+ "scripts": []
1606
  }
1607
  ],
1608
+ "tokenizers": {
1609
+ "Latn": {
1610
+ "full_object": "StanzaTokenizer(\"pcm\")",
1611
+ "original_lang_name": "nigerian_pidgin",
1612
+ "original_lang_code": "pcm",
1613
+ "script": "Latn",
1614
+ "class_name": "StanzaTokenizer"
1615
+ }
1616
+ },
1617
  "node_i": "3580",
1618
+ "native_tokenizers": [],
1619
+ "scripts": []
1620
  },
1621
  {
1622
  "name": "Spanish based",
1623
  "iso_1_code": null,
1624
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1625
  "children": [
1626
  {
1627
  "name": "Chavacano",
1628
  "iso_1_code": null,
1629
  "iso_3_code": "cbk",
1630
+ "children": [],
1631
  "tokenizers": {
1632
  "Latn": {
1633
+ "full_object": "StanzaTokenizer(\"pcm\")",
1634
+ "original_lang_name": "nigerian_pidgin",
1635
+ "original_lang_code": "pcm",
1636
+ "script": "Latn",
1637
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1638
  }
1639
  },
 
1640
  "node_i": "3595",
1641
+ "native_tokenizers": [],
1642
  "scripts": [
1643
  "Latn"
1644
+ ]
 
1645
  },
1646
  {
1647
  "name": "Palenquero",
1648
  "iso_1_code": null,
1649
  "iso_3_code": "pln",
 
1650
  "children": [],
1651
+ "tokenizers": {},
1652
  "node_i": "3596",
1653
+ "native_tokenizers": [],
1654
+ "scripts": []
1655
  }
1656
  ],
1657
+ "tokenizers": {
1658
+ "Latn": {
1659
+ "full_object": "StanzaTokenizer(\"pcm\")",
1660
+ "original_lang_name": "nigerian_pidgin",
1661
+ "original_lang_code": "pcm",
1662
+ "script": "Latn",
1663
+ "class_name": "StanzaTokenizer"
1664
+ }
1665
+ },
1666
  "node_i": "3594",
1667
+ "native_tokenizers": [],
1668
+ "scripts": []
1669
  },
1670
  {
1671
  "name": "Swahili based",
1672
  "iso_1_code": null,
1673
  "iso_3_code": null,
 
1674
  "children": [
1675
  {
1676
  "name": "Cutchi-Swahili",
1677
  "iso_1_code": null,
1678
  "iso_3_code": "ccl",
 
1679
  "children": [],
1680
+ "tokenizers": {},
1681
  "node_i": "3598",
1682
+ "native_tokenizers": [],
1683
+ "scripts": []
1684
  }
1685
  ],
1686
+ "tokenizers": {},
1687
  "node_i": "3597",
1688
+ "native_tokenizers": [],
1689
+ "scripts": []
1690
  },
1691
  {
1692
  "name": "Tetun based",
1693
  "iso_1_code": null,
1694
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1695
  "children": [
1696
  {
1697
  "name": "Tetun Dili",
1698
  "iso_1_code": null,
1699
  "iso_3_code": "tdt",
1700
+ "children": [],
1701
  "tokenizers": {
1702
  "Latn": {
1703
+ "full_object": "StanzaTokenizer(\"pcm\")",
1704
+ "original_lang_name": "nigerian_pidgin",
1705
+ "original_lang_code": "pcm",
1706
+ "script": "Latn",
1707
+ "class_name": "StanzaTokenizer"
 
 
 
 
 
1708
  }
1709
  },
 
1710
  "node_i": "3600",
1711
+ "native_tokenizers": [],
1712
  "scripts": [
1713
  "Latn"
1714
+ ]
 
1715
  }
1716
  ],
1717
+ "tokenizers": {
1718
+ "Latn": {
1719
+ "full_object": "StanzaTokenizer(\"pcm\")",
1720
+ "original_lang_name": "nigerian_pidgin",
1721
+ "original_lang_code": "pcm",
1722
+ "script": "Latn",
1723
+ "class_name": "StanzaTokenizer"
1724
+ }
1725
+ },
1726
  "node_i": "3599",
1727
+ "native_tokenizers": [],
1728
+ "scripts": []
1729
  }
1730
  ],
1731
+ "tokenizers": {
1732
+ "Latn": {
1733
+ "full_object": "StanzaTokenizer(\"pcm\")",
1734
+ "original_lang_name": "nigerian_pidgin",
1735
+ "original_lang_code": "pcm",
1736
+ "script": "Latn",
1737
+ "class_name": "StanzaTokenizer"
1738
+ }
1739
+ },
1740
  "node_i": "3481",
1741
+ "native_tokenizers": [],
1742
+ "scripts": []
1743
  }
data/Dravidian.json CHANGED
@@ -2,1765 +2,1436 @@
2
  "name": "Dravidian",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {
6
- "Telu": {
7
- "full_object": "SpaCyTokenizer(\"te\")",
8
- "original_lang_name": "telugu",
9
- "original_lang_code": "tel",
10
- "scripts": [
11
- "Telu",
12
- "Latn"
13
- ],
14
- "class_name": "SpaCyTokenizer",
15
- "macrolanguage": false
16
- },
17
- "Latn": {
18
- "full_object": "SpaCyTokenizer(\"ta\")",
19
- "original_lang_name": "tamil",
20
- "original_lang_code": "tam",
21
- "scripts": [
22
- "Latn",
23
- "Taml"
24
- ],
25
- "class_name": "SpaCyTokenizer",
26
- "macrolanguage": false
27
- },
28
- "Knda": {
29
- "full_object": "SpaCyTokenizer(\"kn\")",
30
- "original_lang_name": "kannada",
31
- "original_lang_code": "kan",
32
- "scripts": [
33
- "Latn",
34
- "Knda"
35
- ],
36
- "class_name": "SpaCyTokenizer",
37
- "macrolanguage": false
38
- },
39
- "Mlym": {
40
- "full_object": "SpaCyTokenizer(\"ml\")",
41
- "original_lang_name": "malayalam",
42
- "original_lang_code": "mal",
43
- "scripts": [
44
- "Latn",
45
- "Mlym"
46
- ],
47
- "class_name": "SpaCyTokenizer",
48
- "macrolanguage": false
49
- },
50
- "Taml": {
51
- "full_object": "SpaCyTokenizer(\"ta\")",
52
- "original_lang_name": "tamil",
53
- "original_lang_code": "tam",
54
- "scripts": [
55
- "Latn",
56
- "Taml"
57
- ],
58
- "class_name": "SpaCyTokenizer",
59
- "macrolanguage": false
60
- }
61
- },
62
  "children": [
63
  {
64
  "name": "Central",
65
  "iso_1_code": null,
66
  "iso_3_code": null,
67
- "tokenizers": {},
68
  "children": [
69
  {
70
  "name": "Kolami-Naiki",
71
  "iso_1_code": null,
72
  "iso_3_code": null,
73
- "tokenizers": {},
74
  "children": [
75
  {
76
  "name": "Kolami, Northwestern",
77
  "iso_1_code": null,
78
  "iso_3_code": "kfb",
79
- "tokenizers": {},
80
  "children": [],
 
81
  "node_i": "3604",
82
- "scripts": [],
83
- "own_tokenizer": false
84
  },
85
  {
86
  "name": "Kolami, Southeastern",
87
  "iso_1_code": null,
88
  "iso_3_code": "nit",
89
- "tokenizers": {},
90
  "children": [],
 
91
  "node_i": "3605",
92
- "scripts": [],
93
- "own_tokenizer": false
94
  }
95
  ],
 
96
  "node_i": "3603",
97
- "scripts": [],
98
- "own_tokenizer": false
99
  },
100
  {
101
  "name": "Parji-Gadaba",
102
  "iso_1_code": null,
103
  "iso_3_code": null,
104
- "tokenizers": {},
105
  "children": [
106
  {
107
  "name": "Gadaba, Mudhili",
108
  "iso_1_code": null,
109
  "iso_3_code": "gau",
110
- "tokenizers": {},
111
  "children": [],
 
112
  "node_i": "3607",
113
- "scripts": [],
114
- "own_tokenizer": false
115
  },
116
  {
117
  "name": "Gadaba, Pottangi Ollar",
118
  "iso_1_code": null,
119
  "iso_3_code": "gdb",
120
- "tokenizers": {},
121
  "children": [],
 
122
  "node_i": "3608",
123
- "scripts": [],
124
- "own_tokenizer": false
125
  },
126
  {
127
  "name": "Duruwa",
128
  "iso_1_code": null,
129
  "iso_3_code": "pci",
130
- "tokenizers": {},
131
  "children": [],
 
132
  "node_i": "3609",
133
- "scripts": [],
134
- "own_tokenizer": false
135
  }
136
  ],
 
137
  "node_i": "3606",
138
- "scripts": [],
139
- "own_tokenizer": false
140
  }
141
  ],
 
142
  "node_i": "3602",
143
- "scripts": [],
144
- "own_tokenizer": false
145
  },
146
  {
147
  "name": "Northern",
148
  "iso_1_code": null,
149
  "iso_3_code": null,
150
- "tokenizers": {},
151
  "children": [
152
  {
153
  "name": "Brahui",
154
  "iso_1_code": null,
155
  "iso_3_code": "brh",
156
- "tokenizers": {},
157
  "children": [],
 
158
  "node_i": "3611",
 
159
  "scripts": [
160
  "Arab"
161
- ],
162
- "own_tokenizer": false
163
  },
164
  {
165
  "name": "Kumarbhag Paharia",
166
  "iso_1_code": null,
167
  "iso_3_code": "kmj",
168
- "tokenizers": {},
169
  "children": [],
 
170
  "node_i": "3612",
171
- "scripts": [],
172
- "own_tokenizer": false
173
  },
174
  {
175
  "name": "Kurux",
176
  "iso_1_code": null,
177
  "iso_3_code": "kru",
178
- "tokenizers": {},
179
  "children": [],
 
180
  "node_i": "3613",
 
181
  "scripts": [
182
  "Deva"
183
- ],
184
- "own_tokenizer": false
185
  },
186
  {
187
  "name": "Sauria Paharia",
188
  "iso_1_code": null,
189
  "iso_3_code": "mjt",
190
- "tokenizers": {},
191
  "children": [],
 
192
  "node_i": "3614",
193
- "scripts": [],
194
- "own_tokenizer": false
195
  },
196
  {
197
  "name": "Kisan",
198
  "iso_1_code": null,
199
  "iso_3_code": "xis",
200
- "tokenizers": {},
201
  "children": [],
 
202
  "node_i": "3615",
203
- "scripts": [],
204
- "own_tokenizer": false
205
  }
206
  ],
 
207
  "node_i": "3610",
208
- "scripts": [],
209
- "own_tokenizer": false
210
  },
211
  {
212
  "name": "South-Central",
213
  "iso_1_code": null,
214
  "iso_3_code": null,
215
- "tokenizers": {
216
- "Telu": {
217
- "full_object": "SpaCyTokenizer(\"te\")",
218
- "original_lang_name": "telugu",
219
- "original_lang_code": "tel",
220
- "scripts": [
221
- "Telu",
222
- "Latn"
223
- ],
224
- "class_name": "SpaCyTokenizer",
225
- "macrolanguage": false
226
- },
227
- "Latn": {
228
- "full_object": "SpaCyTokenizer(\"te\")",
229
- "original_lang_name": "telugu",
230
- "original_lang_code": "tel",
231
- "scripts": [
232
- "Telu",
233
- "Latn"
234
- ],
235
- "class_name": "SpaCyTokenizer",
236
- "macrolanguage": false
237
- }
238
- },
239
  "children": [
240
  {
241
  "name": "Gondi-Kui",
242
  "iso_1_code": null,
243
  "iso_3_code": null,
244
- "tokenizers": {
245
- "Telu": {
246
- "full_object": "SpaCyTokenizer(\"te\")",
247
- "original_lang_name": "telugu",
248
- "original_lang_code": "tel",
249
- "scripts": [
250
- "Telu",
251
- "Latn"
252
- ],
253
- "class_name": "SpaCyTokenizer",
254
- "macrolanguage": false
255
- },
256
- "Latn": {
257
- "full_object": "SpaCyTokenizer(\"te\")",
258
- "original_lang_name": "telugu",
259
- "original_lang_code": "tel",
260
- "scripts": [
261
- "Telu",
262
- "Latn"
263
- ],
264
- "class_name": "SpaCyTokenizer",
265
- "macrolanguage": false
266
- }
267
- },
268
  "children": [
269
  {
270
  "name": "Gondi",
271
  "iso_1_code": null,
272
  "iso_3_code": null,
273
- "tokenizers": {
274
- "Telu": {
275
- "full_object": "SpaCyTokenizer(\"te\")",
276
- "original_lang_name": "telugu",
277
- "original_lang_code": "tel",
278
- "scripts": [
279
- "Telu",
280
- "Latn"
281
- ],
282
- "class_name": "SpaCyTokenizer",
283
- "macrolanguage": false
284
- },
285
- "Latn": {
286
- "full_object": "SpaCyTokenizer(\"te\")",
287
- "original_lang_name": "telugu",
288
- "original_lang_code": "tel",
289
- "scripts": [
290
- "Telu",
291
- "Latn"
292
- ],
293
- "class_name": "SpaCyTokenizer",
294
- "macrolanguage": false
295
- }
296
- },
297
  "children": [
298
  {
299
  "name": "Maria, Dandami",
300
  "iso_1_code": null,
301
  "iso_3_code": "daq",
302
- "tokenizers": {},
303
  "children": [],
 
304
  "node_i": "3619",
305
- "scripts": [],
306
- "own_tokenizer": false
307
  },
308
  {
309
  "name": "Muria, Eastern",
310
  "iso_1_code": null,
311
  "iso_3_code": "emu",
312
- "tokenizers": {},
313
  "children": [],
 
314
  "node_i": "3620",
315
- "scripts": [],
316
- "own_tokenizer": false
317
  },
318
  {
319
  "name": "Gondi, Aheri",
320
  "iso_1_code": null,
321
  "iso_3_code": "esg",
322
- "tokenizers": {},
323
  "children": [],
 
324
  "node_i": "3621",
325
- "scripts": [],
326
- "own_tokenizer": false
327
  },
328
  {
329
  "name": "Muria, Far Western",
330
  "iso_1_code": null,
331
  "iso_3_code": "fmu",
332
- "tokenizers": {},
333
  "children": [],
 
334
  "node_i": "3622",
 
335
  "scripts": [
336
  "Deva"
337
- ],
338
- "own_tokenizer": false
339
  },
340
  {
341
  "name": "Gondi, Northern",
342
  "iso_1_code": null,
343
  "iso_3_code": "gno",
344
- "tokenizers": {},
345
  "children": [],
 
346
  "node_i": "3623",
347
- "scripts": [],
348
- "own_tokenizer": false
349
  },
350
  {
351
  "name": "Khirwar",
352
  "iso_1_code": null,
353
  "iso_3_code": "kwx",
354
- "tokenizers": {},
355
  "children": [],
 
356
  "node_i": "3624",
357
- "scripts": [],
358
- "own_tokenizer": false
359
  },
360
  {
361
  "name": "Maria",
362
  "iso_1_code": null,
363
  "iso_3_code": "mrr",
364
- "tokenizers": {},
365
  "children": [],
 
366
  "node_i": "3625",
367
- "scripts": [],
368
- "own_tokenizer": false
369
  },
370
  {
371
  "name": "Muria, Western",
372
  "iso_1_code": null,
373
  "iso_3_code": "mut",
374
- "tokenizers": {},
375
  "children": [],
 
376
  "node_i": "3626",
377
- "scripts": [],
378
- "own_tokenizer": false
379
  },
380
  {
381
  "name": "Nagarchal",
382
  "iso_1_code": null,
383
  "iso_3_code": "nbg",
384
- "tokenizers": {},
385
  "children": [],
 
386
  "node_i": "3627",
387
- "scripts": [],
388
- "own_tokenizer": false
389
  },
390
  {
391
  "name": "Pardhan",
392
  "iso_1_code": null,
393
  "iso_3_code": "pch",
394
- "tokenizers": {},
395
  "children": [],
 
396
  "node_i": "3628",
397
- "scripts": [],
398
- "own_tokenizer": false
399
  },
400
  {
401
  "name": "Gondi, Adilabad",
402
  "iso_1_code": null,
403
  "iso_3_code": "wsg",
 
404
  "tokenizers": {
405
  "Telu": {
406
- "full_object": "SpaCyTokenizer(\"te\")",
407
  "original_lang_name": "telugu",
408
  "original_lang_code": "tel",
409
- "scripts": [
410
- "Telu",
411
- "Latn"
412
- ],
413
- "class_name": "SpaCyTokenizer",
414
- "macrolanguage": false
415
  }
416
  },
417
- "children": [],
418
  "node_i": "3629",
 
419
  "scripts": [
420
  "Telu"
421
- ],
422
- "own_tokenizer": false
423
  }
424
  ],
425
- "node_i": "3618",
426
- "scripts": [],
427
- "own_tokenizer": false
428
- },
429
- {
430
- "name": "Konda-Kui",
431
- "iso_1_code": null,
432
- "iso_3_code": null,
433
  "tokenizers": {
434
  "Telu": {
435
- "full_object": "SpaCyTokenizer(\"te\")",
436
- "original_lang_name": "telugu",
437
- "original_lang_code": "tel",
438
- "scripts": [
439
- "Telu",
440
- "Latn"
441
- ],
442
- "class_name": "SpaCyTokenizer",
443
- "macrolanguage": false
444
- },
445
- "Latn": {
446
- "full_object": "SpaCyTokenizer(\"te\")",
447
  "original_lang_name": "telugu",
448
  "original_lang_code": "tel",
449
- "scripts": [
450
- "Telu",
451
- "Latn"
452
- ],
453
- "class_name": "SpaCyTokenizer",
454
- "macrolanguage": false
455
  }
456
  },
 
 
 
 
 
 
 
 
457
  "children": [
458
  {
459
  "name": "Konda",
460
  "iso_1_code": null,
461
  "iso_3_code": null,
462
- "tokenizers": {},
463
  "children": [
464
  {
465
  "name": "Konda-Dora",
466
  "iso_1_code": null,
467
  "iso_3_code": "kfc",
468
- "tokenizers": {},
469
  "children": [],
 
470
  "node_i": "3632",
471
- "scripts": [],
472
- "own_tokenizer": false
473
  },
474
  {
475
  "name": "Mukha-Dora",
476
  "iso_1_code": null,
477
  "iso_3_code": "mmk",
478
- "tokenizers": {},
479
  "children": [],
 
480
  "node_i": "3633",
481
- "scripts": [],
482
- "own_tokenizer": false
483
  }
484
  ],
 
485
  "node_i": "3631",
486
- "scripts": [],
487
- "own_tokenizer": false
488
  },
489
  {
490
  "name": "Manda-Kui",
491
  "iso_1_code": null,
492
  "iso_3_code": null,
493
- "tokenizers": {
494
- "Telu": {
495
- "full_object": "SpaCyTokenizer(\"te\")",
496
- "original_lang_name": "telugu",
497
- "original_lang_code": "tel",
498
- "scripts": [
499
- "Telu",
500
- "Latn"
501
- ],
502
- "class_name": "SpaCyTokenizer",
503
- "macrolanguage": false
504
- },
505
- "Latn": {
506
- "full_object": "SpaCyTokenizer(\"te\")",
507
- "original_lang_name": "telugu",
508
- "original_lang_code": "tel",
509
- "scripts": [
510
- "Telu",
511
- "Latn"
512
- ],
513
- "class_name": "SpaCyTokenizer",
514
- "macrolanguage": false
515
- }
516
- },
517
  "children": [
518
  {
519
  "name": "Kui-Kuvi",
520
  "iso_1_code": null,
521
  "iso_3_code": null,
522
- "tokenizers": {
523
- "Telu": {
524
- "full_object": "SpaCyTokenizer(\"te\")",
525
- "original_lang_name": "telugu",
526
- "original_lang_code": "tel",
527
- "scripts": [
528
- "Telu",
529
- "Latn"
530
- ],
531
- "class_name": "SpaCyTokenizer",
532
- "macrolanguage": false
533
- },
534
- "Latn": {
535
- "full_object": "SpaCyTokenizer(\"te\")",
536
- "original_lang_name": "telugu",
537
- "original_lang_code": "tel",
538
- "scripts": [
539
- "Telu",
540
- "Latn"
541
- ],
542
- "class_name": "SpaCyTokenizer",
543
- "macrolanguage": false
544
- }
545
- },
546
  "children": [
547
  {
548
  "name": "Kui, Dawik",
549
  "iso_1_code": null,
550
  "iso_3_code": "dwk",
551
- "tokenizers": {},
552
  "children": [],
 
553
  "node_i": "3636",
554
- "scripts": [],
555
- "own_tokenizer": false
556
  },
557
  {
558
  "name": "Koya",
559
  "iso_1_code": null,
560
  "iso_3_code": "kff",
 
561
  "tokenizers": {
562
  "Telu": {
563
- "full_object": "SpaCyTokenizer(\"te\")",
564
  "original_lang_name": "telugu",
565
  "original_lang_code": "tel",
566
- "scripts": [
567
- "Telu",
568
- "Latn"
569
- ],
570
- "class_name": "SpaCyTokenizer",
571
- "macrolanguage": false
572
  }
573
  },
574
- "children": [],
575
  "node_i": "3637",
 
576
  "scripts": [
577
  "Telu"
578
- ],
579
- "own_tokenizer": false
580
  },
581
  {
582
  "name": "Kuvi",
583
  "iso_1_code": null,
584
  "iso_3_code": "kxv",
585
- "tokenizers": {},
586
  "children": [],
 
587
  "node_i": "3638",
588
- "scripts": [],
589
- "own_tokenizer": false
590
  },
591
  {
592
  "name": "Kui",
593
  "iso_1_code": null,
594
  "iso_3_code": "uki",
595
- "tokenizers": {},
596
  "children": [],
 
597
  "node_i": "3639",
598
- "scripts": [],
599
- "own_tokenizer": false
600
  }
601
  ],
 
 
 
 
 
 
 
 
 
602
  "node_i": "3635",
603
- "scripts": [],
604
- "own_tokenizer": false
605
  },
606
  {
607
  "name": "Manda-Pengo",
608
  "iso_1_code": null,
609
  "iso_3_code": null,
610
- "tokenizers": {},
611
  "children": [
612
  {
613
  "name": "Manda",
614
  "iso_1_code": null,
615
  "iso_3_code": "mha",
616
- "tokenizers": {},
617
  "children": [],
 
618
  "node_i": "3641",
619
- "scripts": [],
620
- "own_tokenizer": false
621
  },
622
  {
623
  "name": "Pengo",
624
  "iso_1_code": null,
625
  "iso_3_code": "peg",
626
- "tokenizers": {},
627
  "children": [],
 
628
  "node_i": "3642",
629
- "scripts": [],
630
- "own_tokenizer": false
631
  }
632
  ],
 
633
  "node_i": "3640",
634
- "scripts": [],
635
- "own_tokenizer": false
636
  }
637
  ],
 
 
 
 
 
 
 
 
 
638
  "node_i": "3634",
639
- "scripts": [],
640
- "own_tokenizer": false
641
  }
642
  ],
 
 
 
 
 
 
 
 
 
643
  "node_i": "3630",
644
- "scripts": [],
645
- "own_tokenizer": false
646
  }
647
  ],
648
- "node_i": "3617",
649
- "scripts": [],
650
- "own_tokenizer": false
651
- },
652
- {
653
- "name": "Telugu",
654
- "iso_1_code": null,
655
- "iso_3_code": null,
656
  "tokenizers": {
657
  "Telu": {
658
- "full_object": "SpaCyTokenizer(\"te\")",
659
  "original_lang_name": "telugu",
660
  "original_lang_code": "tel",
661
- "scripts": [
662
- "Telu",
663
- "Latn"
664
- ],
665
- "class_name": "SpaCyTokenizer",
666
- "macrolanguage": false
667
- },
668
- "Latn": {
669
- "full_object": "SpaCyTokenizer(\"te\")",
670
- "original_lang_name": "telugu",
671
- "original_lang_code": "tel",
672
- "scripts": [
673
- "Telu",
674
- "Latn"
675
- ],
676
- "class_name": "SpaCyTokenizer",
677
- "macrolanguage": false
678
  }
679
  },
 
 
 
 
 
 
 
 
680
  "children": [
681
  {
682
  "name": "Chenchu",
683
  "iso_1_code": null,
684
  "iso_3_code": "cde",
685
- "tokenizers": {},
686
  "children": [],
 
687
  "node_i": "3644",
688
- "scripts": [],
689
- "own_tokenizer": false
690
  },
691
  {
692
  "name": "Manna-Dora",
693
  "iso_1_code": null,
694
  "iso_3_code": "mju",
695
- "tokenizers": {},
696
  "children": [],
 
697
  "node_i": "3645",
698
- "scripts": [],
699
- "own_tokenizer": false
700
  },
701
  {
702
  "name": "Telugu",
703
  "iso_1_code": "te",
704
  "iso_3_code": "tel",
 
705
  "tokenizers": {
706
  "Telu": {
707
- "full_object": "SpaCyTokenizer(\"te\")",
708
- "original_lang_name": "telugu",
709
- "original_lang_code": "tel",
710
- "scripts": [
711
- "Telu",
712
- "Latn"
713
- ],
714
- "class_name": "SpaCyTokenizer",
715
- "macrolanguage": false
716
- },
717
- "Latn": {
718
- "full_object": "SpaCyTokenizer(\"te\")",
719
  "original_lang_name": "telugu",
720
  "original_lang_code": "tel",
721
- "scripts": [
722
- "Telu",
723
- "Latn"
724
- ],
725
- "class_name": "SpaCyTokenizer",
726
- "macrolanguage": false
727
  }
728
  },
729
- "children": [],
730
  "node_i": "3646",
 
 
 
731
  "scripts": [
732
  "Telu",
733
  "Latn"
734
- ],
735
- "own_tokenizer": true
736
  },
737
  {
738
  "name": "Waddar",
739
  "iso_1_code": null,
740
  "iso_3_code": "wbq",
741
- "tokenizers": {},
742
  "children": [],
 
743
  "node_i": "3647",
744
- "scripts": [],
745
- "own_tokenizer": false
746
  }
747
  ],
 
 
 
 
 
 
 
 
 
748
  "node_i": "3643",
749
- "scripts": [],
750
- "own_tokenizer": false
751
  }
752
  ],
 
 
 
 
 
 
 
 
 
753
  "node_i": "3616",
754
- "scripts": [],
755
- "own_tokenizer": false
756
  },
757
  {
758
  "name": "Southern",
759
  "iso_1_code": null,
760
  "iso_3_code": null,
761
- "tokenizers": {
762
- "Latn": {
763
- "full_object": "SpaCyTokenizer(\"ta\")",
764
- "original_lang_name": "tamil",
765
- "original_lang_code": "tam",
766
- "scripts": [
767
- "Latn",
768
- "Taml"
769
- ],
770
- "class_name": "SpaCyTokenizer",
771
- "macrolanguage": false
772
- },
773
- "Knda": {
774
- "full_object": "SpaCyTokenizer(\"kn\")",
775
- "original_lang_name": "kannada",
776
- "original_lang_code": "kan",
777
- "scripts": [
778
- "Latn",
779
- "Knda"
780
- ],
781
- "class_name": "SpaCyTokenizer",
782
- "macrolanguage": false
783
- },
784
- "Mlym": {
785
- "full_object": "SpaCyTokenizer(\"ml\")",
786
- "original_lang_name": "malayalam",
787
- "original_lang_code": "mal",
788
- "scripts": [
789
- "Latn",
790
- "Mlym"
791
- ],
792
- "class_name": "SpaCyTokenizer",
793
- "macrolanguage": false
794
- },
795
- "Taml": {
796
- "full_object": "SpaCyTokenizer(\"ta\")",
797
- "original_lang_name": "tamil",
798
- "original_lang_code": "tam",
799
- "scripts": [
800
- "Latn",
801
- "Taml"
802
- ],
803
- "class_name": "SpaCyTokenizer",
804
- "macrolanguage": false
805
- }
806
- },
807
  "children": [
808
  {
809
  "name": "Kurichiya",
810
  "iso_1_code": null,
811
  "iso_3_code": "kfh",
812
- "tokenizers": {},
813
  "children": [],
 
814
  "node_i": "3649",
815
- "scripts": [],
816
- "own_tokenizer": false
817
  },
818
  {
819
  "name": "Kurumba, Attapady",
820
  "iso_1_code": null,
821
  "iso_3_code": "pkr",
822
- "tokenizers": {},
823
  "children": [],
 
824
  "node_i": "3650",
825
- "scripts": [],
826
- "own_tokenizer": false
827
  },
828
  {
829
  "name": "Pathiya",
830
  "iso_1_code": null,
831
  "iso_3_code": "pty",
832
- "tokenizers": {},
833
  "children": [],
 
834
  "node_i": "3651",
835
- "scripts": [],
836
- "own_tokenizer": false
837
  },
838
  {
839
  "name": "Muduga",
840
  "iso_1_code": null,
841
  "iso_3_code": "udg",
842
- "tokenizers": {},
843
  "children": [],
 
844
  "node_i": "3652",
845
- "scripts": [],
846
- "own_tokenizer": false
847
  },
848
  {
849
  "name": "Kumbaran",
850
  "iso_1_code": null,
851
  "iso_3_code": "wkb",
852
- "tokenizers": {},
853
  "children": [],
 
854
  "node_i": "3653",
855
- "scripts": [],
856
- "own_tokenizer": false
857
  },
858
  {
859
  "name": "Kalanadi",
860
  "iso_1_code": null,
861
  "iso_3_code": "wkl",
862
- "tokenizers": {},
863
  "children": [],
 
864
  "node_i": "3654",
865
- "scripts": [],
866
- "own_tokenizer": false
867
  },
868
  {
869
  "name": "Kunduvadi",
870
  "iso_1_code": null,
871
  "iso_3_code": "wku",
872
- "tokenizers": {},
873
  "children": [],
 
874
  "node_i": "3655",
875
- "scripts": [],
876
- "own_tokenizer": false
877
  },
878
  {
879
  "name": "Tamil-Kannada",
880
  "iso_1_code": null,
881
  "iso_3_code": null,
882
- "tokenizers": {
883
- "Latn": {
884
- "full_object": "SpaCyTokenizer(\"ta\")",
885
- "original_lang_name": "tamil",
886
- "original_lang_code": "tam",
887
- "scripts": [
888
- "Latn",
889
- "Taml"
890
- ],
891
- "class_name": "SpaCyTokenizer",
892
- "macrolanguage": false
893
- },
894
- "Knda": {
895
- "full_object": "SpaCyTokenizer(\"kn\")",
896
- "original_lang_name": "kannada",
897
- "original_lang_code": "kan",
898
- "scripts": [
899
- "Latn",
900
- "Knda"
901
- ],
902
- "class_name": "SpaCyTokenizer",
903
- "macrolanguage": false
904
- },
905
- "Mlym": {
906
- "full_object": "SpaCyTokenizer(\"ml\")",
907
- "original_lang_name": "malayalam",
908
- "original_lang_code": "mal",
909
- "scripts": [
910
- "Latn",
911
- "Mlym"
912
- ],
913
- "class_name": "SpaCyTokenizer",
914
- "macrolanguage": false
915
- },
916
- "Taml": {
917
- "full_object": "SpaCyTokenizer(\"ta\")",
918
- "original_lang_name": "tamil",
919
- "original_lang_code": "tam",
920
- "scripts": [
921
- "Latn",
922
- "Taml"
923
- ],
924
- "class_name": "SpaCyTokenizer",
925
- "macrolanguage": false
926
- }
927
- },
928
  "children": [
929
  {
930
  "name": "Kannada",
931
  "iso_1_code": null,
932
  "iso_3_code": null,
933
- "tokenizers": {
934
- "Latn": {
935
- "full_object": "SpaCyTokenizer(\"kn\")",
936
- "original_lang_name": "kannada",
937
- "original_lang_code": "kan",
938
- "scripts": [
939
- "Latn",
940
- "Knda"
941
- ],
942
- "class_name": "SpaCyTokenizer",
943
- "macrolanguage": false
944
- },
945
- "Knda": {
946
- "full_object": "SpaCyTokenizer(\"kn\")",
947
- "original_lang_name": "kannada",
948
- "original_lang_code": "kan",
949
- "scripts": [
950
- "Latn",
951
- "Knda"
952
- ],
953
- "class_name": "SpaCyTokenizer",
954
- "macrolanguage": false
955
- }
956
- },
957
  "children": [
958
  {
959
  "name": "Badaga",
960
  "iso_1_code": null,
961
  "iso_3_code": "bfq",
962
- "tokenizers": {},
963
  "children": [],
 
964
  "node_i": "3658",
965
- "scripts": [],
966
- "own_tokenizer": false
967
  },
968
  {
969
  "name": "Holiya",
970
  "iso_1_code": null,
971
  "iso_3_code": "hoy",
972
- "tokenizers": {},
973
  "children": [],
 
974
  "node_i": "3659",
975
- "scripts": [],
976
- "own_tokenizer": false
977
  },
978
  {
979
  "name": "Kannada",
980
  "iso_1_code": "kn",
981
  "iso_3_code": "kan",
 
982
  "tokenizers": {
983
- "Latn": {
984
- "full_object": "SpaCyTokenizer(\"kn\")",
985
- "original_lang_name": "kannada",
986
- "original_lang_code": "kan",
987
- "scripts": [
988
- "Latn",
989
- "Knda"
990
- ],
991
- "class_name": "SpaCyTokenizer",
992
- "macrolanguage": false
993
- },
994
  "Knda": {
995
- "full_object": "SpaCyTokenizer(\"kn\")",
996
  "original_lang_name": "kannada",
997
  "original_lang_code": "kan",
998
- "scripts": [
999
- "Latn",
1000
- "Knda"
1001
- ],
1002
- "class_name": "SpaCyTokenizer",
1003
- "macrolanguage": false
1004
  }
1005
  },
1006
- "children": [],
1007
  "node_i": "3660",
 
 
 
1008
  "scripts": [
1009
  "Latn",
1010
  "Knda"
1011
- ],
1012
- "own_tokenizer": true
1013
  },
1014
  {
1015
  "name": "Urali",
1016
  "iso_1_code": null,
1017
  "iso_3_code": "url",
1018
- "tokenizers": {},
1019
  "children": [],
 
1020
  "node_i": "3661",
1021
- "scripts": [],
1022
- "own_tokenizer": false
1023
  }
1024
  ],
1025
- "node_i": "3657",
1026
- "scripts": [],
1027
- "own_tokenizer": false
1028
- },
1029
- {
1030
- "name": "Tamil-Kodagu",
1031
- "iso_1_code": null,
1032
- "iso_3_code": null,
1033
  "tokenizers": {
1034
- "Latn": {
1035
- "full_object": "SpaCyTokenizer(\"ta\")",
1036
- "original_lang_name": "tamil",
1037
- "original_lang_code": "tam",
1038
- "scripts": [
1039
- "Latn",
1040
- "Taml"
1041
- ],
1042
- "class_name": "SpaCyTokenizer",
1043
- "macrolanguage": false
1044
- },
1045
- "Mlym": {
1046
- "full_object": "SpaCyTokenizer(\"ml\")",
1047
- "original_lang_name": "malayalam",
1048
- "original_lang_code": "mal",
1049
- "scripts": [
1050
- "Latn",
1051
- "Mlym"
1052
- ],
1053
- "class_name": "SpaCyTokenizer",
1054
- "macrolanguage": false
1055
- },
1056
- "Taml": {
1057
- "full_object": "SpaCyTokenizer(\"ta\")",
1058
- "original_lang_name": "tamil",
1059
- "original_lang_code": "tam",
1060
- "scripts": [
1061
- "Latn",
1062
- "Taml"
1063
- ],
1064
- "class_name": "SpaCyTokenizer",
1065
- "macrolanguage": false
1066
  }
1067
  },
 
 
 
 
 
 
 
 
1068
  "children": [
1069
  {
1070
  "name": "Kodagu",
1071
  "iso_1_code": null,
1072
  "iso_3_code": null,
1073
- "tokenizers": {},
1074
  "children": [
1075
  {
1076
  "name": "Kodava",
1077
  "iso_1_code": null,
1078
  "iso_3_code": "kfa",
1079
- "tokenizers": {},
1080
  "children": [],
 
1081
  "node_i": "3664",
1082
- "scripts": [],
1083
- "own_tokenizer": false
1084
  },
1085
  {
1086
  "name": "Kurumba, Kannada",
1087
  "iso_1_code": null,
1088
  "iso_3_code": "kfi",
1089
- "tokenizers": {},
1090
  "children": [],
 
1091
  "node_i": "3665",
1092
- "scripts": [],
1093
- "own_tokenizer": false
1094
  },
1095
  {
1096
  "name": "Kurumba, Mullu",
1097
  "iso_1_code": null,
1098
  "iso_3_code": "kpb",
1099
- "tokenizers": {},
1100
  "children": [],
 
1101
  "node_i": "3666",
1102
- "scripts": [],
1103
- "own_tokenizer": false
1104
  },
1105
  {
1106
  "name": "Kurumba, Alu",
1107
  "iso_1_code": null,
1108
  "iso_3_code": "xua",
1109
- "tokenizers": {},
1110
  "children": [],
 
1111
  "node_i": "3667",
1112
- "scripts": [],
1113
- "own_tokenizer": false
1114
  },
1115
  {
1116
  "name": "Kurumba, Jennu",
1117
  "iso_1_code": null,
1118
  "iso_3_code": "xuj",
1119
- "tokenizers": {},
1120
  "children": [],
 
1121
  "node_i": "3668",
1122
- "scripts": [],
1123
- "own_tokenizer": false
1124
  }
1125
  ],
 
1126
  "node_i": "3663",
1127
- "scripts": [],
1128
- "own_tokenizer": false
1129
  },
1130
  {
1131
  "name": "Tamil-Malayalam",
1132
  "iso_1_code": null,
1133
  "iso_3_code": null,
1134
- "tokenizers": {
1135
- "Latn": {
1136
- "full_object": "SpaCyTokenizer(\"ta\")",
1137
- "original_lang_name": "tamil",
1138
- "original_lang_code": "tam",
1139
- "scripts": [
1140
- "Latn",
1141
- "Taml"
1142
- ],
1143
- "class_name": "SpaCyTokenizer",
1144
- "macrolanguage": false
1145
- },
1146
- "Mlym": {
1147
- "full_object": "SpaCyTokenizer(\"ml\")",
1148
- "original_lang_name": "malayalam",
1149
- "original_lang_code": "mal",
1150
- "scripts": [
1151
- "Latn",
1152
- "Mlym"
1153
- ],
1154
- "class_name": "SpaCyTokenizer",
1155
- "macrolanguage": false
1156
- },
1157
- "Taml": {
1158
- "full_object": "SpaCyTokenizer(\"ta\")",
1159
- "original_lang_name": "tamil",
1160
- "original_lang_code": "tam",
1161
- "scripts": [
1162
- "Latn",
1163
- "Taml"
1164
- ],
1165
- "class_name": "SpaCyTokenizer",
1166
- "macrolanguage": false
1167
- }
1168
- },
1169
  "children": [
1170
  {
1171
  "name": "Mannan",
1172
  "iso_1_code": null,
1173
  "iso_3_code": "mjv",
1174
- "tokenizers": {},
1175
  "children": [],
 
1176
  "node_i": "3670",
1177
- "scripts": [],
1178
- "own_tokenizer": false
1179
  },
1180
  {
1181
  "name": "Malayalam",
1182
  "iso_1_code": null,
1183
  "iso_3_code": null,
1184
- "tokenizers": {
1185
- "Latn": {
1186
- "full_object": "SpaCyTokenizer(\"ml\")",
1187
- "original_lang_name": "malayalam",
1188
- "original_lang_code": "mal",
1189
- "scripts": [
1190
- "Latn",
1191
- "Mlym"
1192
- ],
1193
- "class_name": "SpaCyTokenizer",
1194
- "macrolanguage": false
1195
- },
1196
- "Mlym": {
1197
- "full_object": "SpaCyTokenizer(\"ml\")",
1198
- "original_lang_name": "malayalam",
1199
- "original_lang_code": "mal",
1200
- "scripts": [
1201
- "Latn",
1202
- "Mlym"
1203
- ],
1204
- "class_name": "SpaCyTokenizer",
1205
- "macrolanguage": false
1206
- }
1207
- },
1208
  "children": [
1209
  {
1210
  "name": "Aranadan",
1211
  "iso_1_code": null,
1212
  "iso_3_code": "aaf",
1213
- "tokenizers": {},
1214
  "children": [],
 
1215
  "node_i": "3672",
1216
- "scripts": [],
1217
- "own_tokenizer": false
1218
  },
1219
  {
1220
  "name": "Kadar",
1221
  "iso_1_code": null,
1222
  "iso_3_code": "kej",
1223
- "tokenizers": {},
1224
  "children": [],
 
1225
  "node_i": "3673",
1226
- "scripts": [],
1227
- "own_tokenizer": false
1228
  },
1229
  {
1230
  "name": "Malayalam",
1231
  "iso_1_code": "ml",
1232
  "iso_3_code": "mal",
 
1233
  "tokenizers": {
1234
- "Latn": {
1235
- "full_object": "SpaCyTokenizer(\"ml\")",
1236
- "original_lang_name": "malayalam",
1237
- "original_lang_code": "mal",
1238
- "scripts": [
1239
- "Latn",
1240
- "Mlym"
1241
- ],
1242
- "class_name": "SpaCyTokenizer",
1243
- "macrolanguage": false
1244
- },
1245
  "Mlym": {
1246
- "full_object": "SpaCyTokenizer(\"ml\")",
1247
  "original_lang_name": "malayalam",
1248
  "original_lang_code": "mal",
1249
- "scripts": [
1250
- "Latn",
1251
- "Mlym"
1252
- ],
1253
- "class_name": "SpaCyTokenizer",
1254
- "macrolanguage": false
1255
  }
1256
  },
1257
- "children": [],
1258
  "node_i": "3674",
 
 
 
1259
  "scripts": [
1260
  "Latn",
1261
  "Mlym"
1262
- ],
1263
- "own_tokenizer": true
1264
  },
1265
  {
1266
  "name": "Malapandaram",
1267
  "iso_1_code": null,
1268
  "iso_3_code": "mjp",
1269
- "tokenizers": {},
1270
  "children": [],
 
1271
  "node_i": "3675",
1272
- "scripts": [],
1273
- "own_tokenizer": false
1274
  },
1275
  {
1276
  "name": "Malaryan",
1277
  "iso_1_code": null,
1278
  "iso_3_code": "mjq",
1279
- "tokenizers": {},
1280
  "children": [],
 
1281
  "node_i": "3676",
1282
- "scripts": [],
1283
- "own_tokenizer": false
1284
  },
1285
  {
1286
  "name": "Malavedan",
1287
  "iso_1_code": null,
1288
  "iso_3_code": "mjr",
1289
- "tokenizers": {},
1290
  "children": [],
 
1291
  "node_i": "3677",
1292
- "scripts": [],
1293
- "own_tokenizer": false
1294
  },
1295
  {
1296
  "name": "Paliyan",
1297
  "iso_1_code": null,
1298
  "iso_3_code": "pcf",
1299
- "tokenizers": {},
1300
  "children": [],
 
1301
  "node_i": "3678",
1302
- "scripts": [],
1303
- "own_tokenizer": false
1304
  },
1305
  {
1306
  "name": "Paniya",
1307
  "iso_1_code": null,
1308
  "iso_3_code": "pcg",
1309
- "tokenizers": {},
1310
  "children": [],
 
1311
  "node_i": "3679",
1312
- "scripts": [],
1313
- "own_tokenizer": false
1314
  },
1315
  {
1316
  "name": "Ravula",
1317
  "iso_1_code": null,
1318
  "iso_3_code": "yea",
1319
- "tokenizers": {},
1320
  "children": [],
 
1321
  "node_i": "3680",
1322
- "scripts": [],
1323
- "own_tokenizer": false
1324
  }
1325
  ],
 
 
 
 
 
 
 
 
 
1326
  "node_i": "3671",
1327
- "scripts": [],
1328
- "own_tokenizer": false
1329
  },
1330
  {
1331
  "name": "Tamil",
1332
  "iso_1_code": null,
1333
  "iso_3_code": null,
1334
- "tokenizers": {
1335
- "Latn": {
1336
- "full_object": "SpaCyTokenizer(\"ta\")",
1337
- "original_lang_name": "tamil",
1338
- "original_lang_code": "tam",
1339
- "scripts": [
1340
- "Latn",
1341
- "Taml"
1342
- ],
1343
- "class_name": "SpaCyTokenizer",
1344
- "macrolanguage": false
1345
- },
1346
- "Taml": {
1347
- "full_object": "SpaCyTokenizer(\"ta\")",
1348
- "original_lang_name": "tamil",
1349
- "original_lang_code": "tam",
1350
- "scripts": [
1351
- "Latn",
1352
- "Taml"
1353
- ],
1354
- "class_name": "SpaCyTokenizer",
1355
- "macrolanguage": false
1356
- }
1357
- },
1358
  "children": [
1359
  {
1360
  "name": "Eravallan",
1361
  "iso_1_code": null,
1362
  "iso_3_code": "era",
1363
- "tokenizers": {},
1364
  "children": [],
 
1365
  "node_i": "3682",
1366
- "scripts": [],
1367
- "own_tokenizer": false
1368
  },
1369
  {
1370
  "name": "Irula",
1371
  "iso_1_code": null,
1372
  "iso_3_code": "iru",
1373
- "tokenizers": {},
1374
  "children": [],
 
1375
  "node_i": "3683",
1376
- "scripts": [],
1377
- "own_tokenizer": false
1378
  },
1379
  {
1380
  "name": "Kaikadi",
1381
  "iso_1_code": null,
1382
  "iso_3_code": "kep",
1383
- "tokenizers": {},
1384
  "children": [],
 
1385
  "node_i": "3684",
1386
- "scripts": [],
1387
- "own_tokenizer": false
1388
  },
1389
  {
1390
  "name": "Kanikkaran",
1391
  "iso_1_code": null,
1392
  "iso_3_code": "kev",
1393
- "tokenizers": {},
1394
  "children": [],
 
1395
  "node_i": "3685",
1396
- "scripts": [],
1397
- "own_tokenizer": false
1398
  },
1399
  {
1400
  "name": "Muthuvan",
1401
  "iso_1_code": null,
1402
  "iso_3_code": "muv",
1403
- "tokenizers": {},
1404
  "children": [],
 
1405
  "node_i": "3686",
1406
- "scripts": [],
1407
- "own_tokenizer": false
1408
  },
1409
  {
1410
  "name": "Sholaga",
1411
  "iso_1_code": null,
1412
  "iso_3_code": "sle",
1413
- "tokenizers": {},
1414
  "children": [],
 
1415
  "node_i": "3687",
1416
- "scripts": [],
1417
- "own_tokenizer": false
1418
  },
1419
  {
1420
  "name": "Tamil",
1421
  "iso_1_code": "ta",
1422
  "iso_3_code": "tam",
 
1423
  "tokenizers": {
1424
- "Latn": {
1425
- "full_object": "SpaCyTokenizer(\"ta\")",
1426
- "original_lang_name": "tamil",
1427
- "original_lang_code": "tam",
1428
- "scripts": [
1429
- "Latn",
1430
- "Taml"
1431
- ],
1432
- "class_name": "SpaCyTokenizer",
1433
- "macrolanguage": false
1434
- },
1435
  "Taml": {
1436
- "full_object": "SpaCyTokenizer(\"ta\")",
1437
  "original_lang_name": "tamil",
1438
  "original_lang_code": "tam",
1439
- "scripts": [
1440
- "Latn",
1441
- "Taml"
1442
- ],
1443
- "class_name": "SpaCyTokenizer",
1444
- "macrolanguage": false
1445
  }
1446
  },
1447
- "children": [],
1448
  "node_i": "3688",
 
 
 
1449
  "scripts": [
1450
  "Taml",
1451
  "Latn"
1452
- ],
1453
- "own_tokenizer": true
1454
  },
1455
  {
1456
  "name": "Kurumba, Betta",
1457
  "iso_1_code": null,
1458
  "iso_3_code": "xub",
1459
- "tokenizers": {},
1460
  "children": [],
 
1461
  "node_i": "3689",
1462
- "scripts": [],
1463
- "own_tokenizer": false
1464
  },
1465
  {
1466
  "name": "Yerukula",
1467
  "iso_1_code": null,
1468
  "iso_3_code": "yeu",
1469
- "tokenizers": {},
1470
  "children": [],
 
1471
  "node_i": "3690",
1472
- "scripts": [],
1473
- "own_tokenizer": false
1474
  }
1475
  ],
 
 
 
 
 
 
 
 
 
1476
  "node_i": "3681",
1477
- "scripts": [],
1478
- "own_tokenizer": false
1479
  }
1480
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1481
  "node_i": "3669",
1482
- "scripts": [],
1483
- "own_tokenizer": false
1484
  },
1485
  {
1486
  "name": "Toda-Kota",
1487
  "iso_1_code": null,
1488
  "iso_3_code": null,
1489
- "tokenizers": {},
1490
  "children": [
1491
  {
1492
  "name": "Kota",
1493
  "iso_1_code": null,
1494
  "iso_3_code": "kfe",
1495
- "tokenizers": {},
1496
  "children": [],
 
1497
  "node_i": "3692",
1498
- "scripts": [],
1499
- "own_tokenizer": false
1500
  },
1501
  {
1502
  "name": "Toda",
1503
  "iso_1_code": null,
1504
  "iso_3_code": "tcx",
1505
- "tokenizers": {},
1506
  "children": [],
 
1507
  "node_i": "3693",
1508
- "scripts": [],
1509
- "own_tokenizer": false
1510
  }
1511
  ],
 
1512
  "node_i": "3691",
1513
- "scripts": [],
1514
- "own_tokenizer": false
1515
  }
1516
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1517
  "node_i": "3662",
1518
- "scripts": [],
1519
- "own_tokenizer": false
1520
  },
1521
  {
1522
  "name": "Unclassified",
1523
  "iso_1_code": null,
1524
  "iso_3_code": null,
1525
- "tokenizers": {},
1526
  "children": [
1527
  {
1528
  "name": "Chetti, Wayanad",
1529
  "iso_1_code": null,
1530
  "iso_3_code": "ctt",
1531
- "tokenizers": {},
1532
  "children": [],
 
1533
  "node_i": "3695",
1534
- "scripts": [],
1535
- "own_tokenizer": false
1536
  }
1537
  ],
 
1538
  "node_i": "3694",
1539
- "scripts": [],
1540
- "own_tokenizer": false
1541
  }
1542
  ],
1543
- "node_i": "3656",
1544
- "scripts": [],
1545
- "own_tokenizer": false
1546
- },
1547
- {
1548
- "name": "Tulu",
1549
- "iso_1_code": null,
1550
- "iso_3_code": null,
1551
  "tokenizers": {
1552
  "Knda": {
1553
- "full_object": "SpaCyTokenizer(\"kn\")",
1554
  "original_lang_name": "kannada",
1555
  "original_lang_code": "kan",
1556
- "scripts": [
1557
- "Latn",
1558
- "Knda"
1559
- ],
1560
- "class_name": "SpaCyTokenizer",
1561
- "macrolanguage": false
 
 
 
 
 
 
 
 
 
 
1562
  }
1563
  },
 
 
 
 
 
 
 
 
1564
  "children": [
1565
  {
1566
  "name": "Bellari",
1567
  "iso_1_code": null,
1568
  "iso_3_code": "brw",
1569
- "tokenizers": {},
1570
  "children": [],
 
1571
  "node_i": "3697",
1572
- "scripts": [],
1573
- "own_tokenizer": false
1574
  },
1575
  {
1576
  "name": "Kudiya",
1577
  "iso_1_code": null,
1578
  "iso_3_code": "kfg",
1579
- "tokenizers": {},
1580
  "children": [],
 
1581
  "node_i": "3698",
1582
- "scripts": [],
1583
- "own_tokenizer": false
1584
  },
1585
  {
1586
  "name": "Tulu",
1587
  "iso_1_code": null,
1588
  "iso_3_code": "tcy",
 
1589
  "tokenizers": {
1590
  "Knda": {
1591
- "full_object": "SpaCyTokenizer(\"kn\")",
1592
  "original_lang_name": "kannada",
1593
  "original_lang_code": "kan",
1594
- "scripts": [
1595
- "Latn",
1596
- "Knda"
1597
- ],
1598
- "class_name": "SpaCyTokenizer",
1599
- "macrolanguage": false
1600
  }
1601
  },
1602
- "children": [],
1603
  "node_i": "3699",
 
1604
  "scripts": [
1605
  "Knda"
1606
- ],
1607
- "own_tokenizer": false
1608
  },
1609
  {
1610
  "name": "Koraga",
1611
  "iso_1_code": null,
1612
  "iso_3_code": null,
1613
- "tokenizers": {},
1614
  "children": [
1615
  {
1616
  "name": "Koraga, Korra",
1617
  "iso_1_code": null,
1618
  "iso_3_code": "kfd",
1619
- "tokenizers": {},
1620
  "children": [],
 
1621
  "node_i": "3701",
1622
- "scripts": [],
1623
- "own_tokenizer": false
1624
  },
1625
  {
1626
  "name": "Koraga, Mudu",
1627
  "iso_1_code": null,
1628
  "iso_3_code": "vmd",
1629
- "tokenizers": {},
1630
  "children": [],
 
1631
  "node_i": "3702",
1632
- "scripts": [],
1633
- "own_tokenizer": false
1634
  }
1635
  ],
 
1636
  "node_i": "3700",
1637
- "scripts": [],
1638
- "own_tokenizer": false
1639
  }
1640
  ],
 
 
 
 
 
 
 
 
 
1641
  "node_i": "3696",
1642
- "scripts": [],
1643
- "own_tokenizer": false
1644
  },
1645
  {
1646
  "name": "Unclassified",
1647
  "iso_1_code": null,
1648
  "iso_3_code": null,
1649
- "tokenizers": {},
1650
  "children": [
1651
  {
1652
  "name": "Mala Malasar",
1653
  "iso_1_code": null,
1654
  "iso_3_code": "ima",
1655
- "tokenizers": {},
1656
  "children": [],
 
1657
  "node_i": "3704",
1658
- "scripts": [],
1659
- "own_tokenizer": false
1660
  },
1661
  {
1662
  "name": "Thachanadan",
1663
  "iso_1_code": null,
1664
  "iso_3_code": "thn",
1665
- "tokenizers": {},
1666
  "children": [],
 
1667
  "node_i": "3705",
1668
- "scripts": [],
1669
- "own_tokenizer": false
1670
  },
1671
  {
1672
  "name": "Ullatan",
1673
  "iso_1_code": null,
1674
  "iso_3_code": "ull",
1675
- "tokenizers": {},
1676
  "children": [],
 
1677
  "node_i": "3706",
1678
- "scripts": [],
1679
- "own_tokenizer": false
1680
  },
1681
  {
1682
  "name": "Malasar",
1683
  "iso_1_code": null,
1684
  "iso_3_code": "ymr",
1685
- "tokenizers": {},
1686
  "children": [],
 
1687
  "node_i": "3707",
1688
- "scripts": [],
1689
- "own_tokenizer": false
1690
  }
1691
  ],
 
1692
  "node_i": "3703",
1693
- "scripts": [],
1694
- "own_tokenizer": false
1695
  }
1696
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1697
  "node_i": "3648",
1698
- "scripts": [],
1699
- "own_tokenizer": false
1700
  },
1701
  {
1702
  "name": "Unclassified",
1703
  "iso_1_code": null,
1704
  "iso_3_code": null,
1705
- "tokenizers": {},
1706
  "children": [
1707
  {
1708
  "name": "Allar",
1709
  "iso_1_code": null,
1710
  "iso_3_code": "all",
1711
- "tokenizers": {},
1712
  "children": [],
 
1713
  "node_i": "3709",
1714
- "scripts": [],
1715
- "own_tokenizer": false
1716
  },
1717
  {
1718
  "name": "Bharia",
1719
  "iso_1_code": null,
1720
  "iso_3_code": "bha",
1721
- "tokenizers": {},
1722
  "children": [],
 
1723
  "node_i": "3710",
1724
- "scripts": [],
1725
- "own_tokenizer": false
1726
  },
1727
  {
1728
  "name": "Malankuravan",
1729
  "iso_1_code": null,
1730
  "iso_3_code": "mjo",
1731
- "tokenizers": {},
1732
  "children": [],
 
1733
  "node_i": "3711",
1734
- "scripts": [],
1735
- "own_tokenizer": false
1736
  },
1737
  {
1738
  "name": "Pattapu",
1739
  "iso_1_code": null,
1740
  "iso_3_code": "ptq",
1741
- "tokenizers": {},
1742
  "children": [],
 
1743
  "node_i": "3712",
1744
- "scripts": [],
1745
- "own_tokenizer": false
1746
  },
1747
  {
1748
  "name": "Vishavan",
1749
  "iso_1_code": null,
1750
  "iso_3_code": "vis",
1751
- "tokenizers": {},
1752
  "children": [],
 
1753
  "node_i": "3713",
1754
- "scripts": [],
1755
- "own_tokenizer": false
1756
  }
1757
  ],
 
1758
  "node_i": "3708",
1759
- "scripts": [],
1760
- "own_tokenizer": false
1761
  }
1762
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1763
  "node_i": "3601",
1764
- "scripts": [],
1765
- "own_tokenizer": false
1766
  }
 
2
  "name": "Dravidian",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "children": [
6
  {
7
  "name": "Central",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Kolami-Naiki",
13
  "iso_1_code": null,
14
  "iso_3_code": null,
 
15
  "children": [
16
  {
17
  "name": "Kolami, Northwestern",
18
  "iso_1_code": null,
19
  "iso_3_code": "kfb",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3604",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  },
26
  {
27
  "name": "Kolami, Southeastern",
28
  "iso_1_code": null,
29
  "iso_3_code": "nit",
 
30
  "children": [],
31
+ "tokenizers": {},
32
  "node_i": "3605",
33
+ "native_tokenizers": [],
34
+ "scripts": []
35
  }
36
  ],
37
+ "tokenizers": {},
38
  "node_i": "3603",
39
+ "native_tokenizers": [],
40
+ "scripts": []
41
  },
42
  {
43
  "name": "Parji-Gadaba",
44
  "iso_1_code": null,
45
  "iso_3_code": null,
 
46
  "children": [
47
  {
48
  "name": "Gadaba, Mudhili",
49
  "iso_1_code": null,
50
  "iso_3_code": "gau",
 
51
  "children": [],
52
+ "tokenizers": {},
53
  "node_i": "3607",
54
+ "native_tokenizers": [],
55
+ "scripts": []
56
  },
57
  {
58
  "name": "Gadaba, Pottangi Ollar",
59
  "iso_1_code": null,
60
  "iso_3_code": "gdb",
 
61
  "children": [],
62
+ "tokenizers": {},
63
  "node_i": "3608",
64
+ "native_tokenizers": [],
65
+ "scripts": []
66
  },
67
  {
68
  "name": "Duruwa",
69
  "iso_1_code": null,
70
  "iso_3_code": "pci",
 
71
  "children": [],
72
+ "tokenizers": {},
73
  "node_i": "3609",
74
+ "native_tokenizers": [],
75
+ "scripts": []
76
  }
77
  ],
78
+ "tokenizers": {},
79
  "node_i": "3606",
80
+ "native_tokenizers": [],
81
+ "scripts": []
82
  }
83
  ],
84
+ "tokenizers": {},
85
  "node_i": "3602",
86
+ "native_tokenizers": [],
87
+ "scripts": []
88
  },
89
  {
90
  "name": "Northern",
91
  "iso_1_code": null,
92
  "iso_3_code": null,
 
93
  "children": [
94
  {
95
  "name": "Brahui",
96
  "iso_1_code": null,
97
  "iso_3_code": "brh",
 
98
  "children": [],
99
+ "tokenizers": {},
100
  "node_i": "3611",
101
+ "native_tokenizers": [],
102
  "scripts": [
103
  "Arab"
104
+ ]
 
105
  },
106
  {
107
  "name": "Kumarbhag Paharia",
108
  "iso_1_code": null,
109
  "iso_3_code": "kmj",
 
110
  "children": [],
111
+ "tokenizers": {},
112
  "node_i": "3612",
113
+ "native_tokenizers": [],
114
+ "scripts": []
115
  },
116
  {
117
  "name": "Kurux",
118
  "iso_1_code": null,
119
  "iso_3_code": "kru",
 
120
  "children": [],
121
+ "tokenizers": {},
122
  "node_i": "3613",
123
+ "native_tokenizers": [],
124
  "scripts": [
125
  "Deva"
126
+ ]
 
127
  },
128
  {
129
  "name": "Sauria Paharia",
130
  "iso_1_code": null,
131
  "iso_3_code": "mjt",
 
132
  "children": [],
133
+ "tokenizers": {},
134
  "node_i": "3614",
135
+ "native_tokenizers": [],
136
+ "scripts": []
137
  },
138
  {
139
  "name": "Kisan",
140
  "iso_1_code": null,
141
  "iso_3_code": "xis",
 
142
  "children": [],
143
+ "tokenizers": {},
144
  "node_i": "3615",
145
+ "native_tokenizers": [],
146
+ "scripts": []
147
  }
148
  ],
149
+ "tokenizers": {},
150
  "node_i": "3610",
151
+ "native_tokenizers": [],
152
+ "scripts": []
153
  },
154
  {
155
  "name": "South-Central",
156
  "iso_1_code": null,
157
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  "children": [
159
  {
160
  "name": "Gondi-Kui",
161
  "iso_1_code": null,
162
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  "children": [
164
  {
165
  "name": "Gondi",
166
  "iso_1_code": null,
167
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  "children": [
169
  {
170
  "name": "Maria, Dandami",
171
  "iso_1_code": null,
172
  "iso_3_code": "daq",
 
173
  "children": [],
174
+ "tokenizers": {},
175
  "node_i": "3619",
176
+ "native_tokenizers": [],
177
+ "scripts": []
178
  },
179
  {
180
  "name": "Muria, Eastern",
181
  "iso_1_code": null,
182
  "iso_3_code": "emu",
 
183
  "children": [],
184
+ "tokenizers": {},
185
  "node_i": "3620",
186
+ "native_tokenizers": [],
187
+ "scripts": []
188
  },
189
  {
190
  "name": "Gondi, Aheri",
191
  "iso_1_code": null,
192
  "iso_3_code": "esg",
 
193
  "children": [],
194
+ "tokenizers": {},
195
  "node_i": "3621",
196
+ "native_tokenizers": [],
197
+ "scripts": []
198
  },
199
  {
200
  "name": "Muria, Far Western",
201
  "iso_1_code": null,
202
  "iso_3_code": "fmu",
 
203
  "children": [],
204
+ "tokenizers": {},
205
  "node_i": "3622",
206
+ "native_tokenizers": [],
207
  "scripts": [
208
  "Deva"
209
+ ]
 
210
  },
211
  {
212
  "name": "Gondi, Northern",
213
  "iso_1_code": null,
214
  "iso_3_code": "gno",
 
215
  "children": [],
216
+ "tokenizers": {},
217
  "node_i": "3623",
218
+ "native_tokenizers": [],
219
+ "scripts": []
220
  },
221
  {
222
  "name": "Khirwar",
223
  "iso_1_code": null,
224
  "iso_3_code": "kwx",
 
225
  "children": [],
226
+ "tokenizers": {},
227
  "node_i": "3624",
228
+ "native_tokenizers": [],
229
+ "scripts": []
230
  },
231
  {
232
  "name": "Maria",
233
  "iso_1_code": null,
234
  "iso_3_code": "mrr",
 
235
  "children": [],
236
+ "tokenizers": {},
237
  "node_i": "3625",
238
+ "native_tokenizers": [],
239
+ "scripts": []
240
  },
241
  {
242
  "name": "Muria, Western",
243
  "iso_1_code": null,
244
  "iso_3_code": "mut",
 
245
  "children": [],
246
+ "tokenizers": {},
247
  "node_i": "3626",
248
+ "native_tokenizers": [],
249
+ "scripts": []
250
  },
251
  {
252
  "name": "Nagarchal",
253
  "iso_1_code": null,
254
  "iso_3_code": "nbg",
 
255
  "children": [],
256
+ "tokenizers": {},
257
  "node_i": "3627",
258
+ "native_tokenizers": [],
259
+ "scripts": []
260
  },
261
  {
262
  "name": "Pardhan",
263
  "iso_1_code": null,
264
  "iso_3_code": "pch",
 
265
  "children": [],
266
+ "tokenizers": {},
267
  "node_i": "3628",
268
+ "native_tokenizers": [],
269
+ "scripts": []
270
  },
271
  {
272
  "name": "Gondi, Adilabad",
273
  "iso_1_code": null,
274
  "iso_3_code": "wsg",
275
+ "children": [],
276
  "tokenizers": {
277
  "Telu": {
278
+ "full_object": "IndicNLPTokenizer(\"te\")",
279
  "original_lang_name": "telugu",
280
  "original_lang_code": "tel",
281
+ "script": "Telu",
282
+ "class_name": "IndicNLPTokenizer"
 
 
 
 
283
  }
284
  },
 
285
  "node_i": "3629",
286
+ "native_tokenizers": [],
287
  "scripts": [
288
  "Telu"
289
+ ]
 
290
  }
291
  ],
 
 
 
 
 
 
 
 
292
  "tokenizers": {
293
  "Telu": {
294
+ "full_object": "IndicNLPTokenizer(\"te\")",
 
 
 
 
 
 
 
 
 
 
 
295
  "original_lang_name": "telugu",
296
  "original_lang_code": "tel",
297
+ "script": "Telu",
298
+ "class_name": "IndicNLPTokenizer"
 
 
 
 
299
  }
300
  },
301
+ "node_i": "3618",
302
+ "native_tokenizers": [],
303
+ "scripts": []
304
+ },
305
+ {
306
+ "name": "Konda-Kui",
307
+ "iso_1_code": null,
308
+ "iso_3_code": null,
309
  "children": [
310
  {
311
  "name": "Konda",
312
  "iso_1_code": null,
313
  "iso_3_code": null,
 
314
  "children": [
315
  {
316
  "name": "Konda-Dora",
317
  "iso_1_code": null,
318
  "iso_3_code": "kfc",
 
319
  "children": [],
320
+ "tokenizers": {},
321
  "node_i": "3632",
322
+ "native_tokenizers": [],
323
+ "scripts": []
324
  },
325
  {
326
  "name": "Mukha-Dora",
327
  "iso_1_code": null,
328
  "iso_3_code": "mmk",
 
329
  "children": [],
330
+ "tokenizers": {},
331
  "node_i": "3633",
332
+ "native_tokenizers": [],
333
+ "scripts": []
334
  }
335
  ],
336
+ "tokenizers": {},
337
  "node_i": "3631",
338
+ "native_tokenizers": [],
339
+ "scripts": []
340
  },
341
  {
342
  "name": "Manda-Kui",
343
  "iso_1_code": null,
344
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  "children": [
346
  {
347
  "name": "Kui-Kuvi",
348
  "iso_1_code": null,
349
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  "children": [
351
  {
352
  "name": "Kui, Dawik",
353
  "iso_1_code": null,
354
  "iso_3_code": "dwk",
 
355
  "children": [],
356
+ "tokenizers": {},
357
  "node_i": "3636",
358
+ "native_tokenizers": [],
359
+ "scripts": []
360
  },
361
  {
362
  "name": "Koya",
363
  "iso_1_code": null,
364
  "iso_3_code": "kff",
365
+ "children": [],
366
  "tokenizers": {
367
  "Telu": {
368
+ "full_object": "IndicNLPTokenizer(\"te\")",
369
  "original_lang_name": "telugu",
370
  "original_lang_code": "tel",
371
+ "script": "Telu",
372
+ "class_name": "IndicNLPTokenizer"
 
 
 
 
373
  }
374
  },
 
375
  "node_i": "3637",
376
+ "native_tokenizers": [],
377
  "scripts": [
378
  "Telu"
379
+ ]
 
380
  },
381
  {
382
  "name": "Kuvi",
383
  "iso_1_code": null,
384
  "iso_3_code": "kxv",
 
385
  "children": [],
386
+ "tokenizers": {},
387
  "node_i": "3638",
388
+ "native_tokenizers": [],
389
+ "scripts": []
390
  },
391
  {
392
  "name": "Kui",
393
  "iso_1_code": null,
394
  "iso_3_code": "uki",
 
395
  "children": [],
396
+ "tokenizers": {},
397
  "node_i": "3639",
398
+ "native_tokenizers": [],
399
+ "scripts": []
400
  }
401
  ],
402
+ "tokenizers": {
403
+ "Telu": {
404
+ "full_object": "IndicNLPTokenizer(\"te\")",
405
+ "original_lang_name": "telugu",
406
+ "original_lang_code": "tel",
407
+ "script": "Telu",
408
+ "class_name": "IndicNLPTokenizer"
409
+ }
410
+ },
411
  "node_i": "3635",
412
+ "native_tokenizers": [],
413
+ "scripts": []
414
  },
415
  {
416
  "name": "Manda-Pengo",
417
  "iso_1_code": null,
418
  "iso_3_code": null,
 
419
  "children": [
420
  {
421
  "name": "Manda",
422
  "iso_1_code": null,
423
  "iso_3_code": "mha",
 
424
  "children": [],
425
+ "tokenizers": {},
426
  "node_i": "3641",
427
+ "native_tokenizers": [],
428
+ "scripts": []
429
  },
430
  {
431
  "name": "Pengo",
432
  "iso_1_code": null,
433
  "iso_3_code": "peg",
 
434
  "children": [],
435
+ "tokenizers": {},
436
  "node_i": "3642",
437
+ "native_tokenizers": [],
438
+ "scripts": []
439
  }
440
  ],
441
+ "tokenizers": {},
442
  "node_i": "3640",
443
+ "native_tokenizers": [],
444
+ "scripts": []
445
  }
446
  ],
447
+ "tokenizers": {
448
+ "Telu": {
449
+ "full_object": "IndicNLPTokenizer(\"te\")",
450
+ "original_lang_name": "telugu",
451
+ "original_lang_code": "tel",
452
+ "script": "Telu",
453
+ "class_name": "IndicNLPTokenizer"
454
+ }
455
+ },
456
  "node_i": "3634",
457
+ "native_tokenizers": [],
458
+ "scripts": []
459
  }
460
  ],
461
+ "tokenizers": {
462
+ "Telu": {
463
+ "full_object": "IndicNLPTokenizer(\"te\")",
464
+ "original_lang_name": "telugu",
465
+ "original_lang_code": "tel",
466
+ "script": "Telu",
467
+ "class_name": "IndicNLPTokenizer"
468
+ }
469
+ },
470
  "node_i": "3630",
471
+ "native_tokenizers": [],
472
+ "scripts": []
473
  }
474
  ],
 
 
 
 
 
 
 
 
475
  "tokenizers": {
476
  "Telu": {
477
+ "full_object": "IndicNLPTokenizer(\"te\")",
478
  "original_lang_name": "telugu",
479
  "original_lang_code": "tel",
480
+ "script": "Telu",
481
+ "class_name": "IndicNLPTokenizer"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
  }
483
  },
484
+ "node_i": "3617",
485
+ "native_tokenizers": [],
486
+ "scripts": []
487
+ },
488
+ {
489
+ "name": "Telugu",
490
+ "iso_1_code": null,
491
+ "iso_3_code": null,
492
  "children": [
493
  {
494
  "name": "Chenchu",
495
  "iso_1_code": null,
496
  "iso_3_code": "cde",
 
497
  "children": [],
498
+ "tokenizers": {},
499
  "node_i": "3644",
500
+ "native_tokenizers": [],
501
+ "scripts": []
502
  },
503
  {
504
  "name": "Manna-Dora",
505
  "iso_1_code": null,
506
  "iso_3_code": "mju",
 
507
  "children": [],
508
+ "tokenizers": {},
509
  "node_i": "3645",
510
+ "native_tokenizers": [],
511
+ "scripts": []
512
  },
513
  {
514
  "name": "Telugu",
515
  "iso_1_code": "te",
516
  "iso_3_code": "tel",
517
+ "children": [],
518
  "tokenizers": {
519
  "Telu": {
520
+ "full_object": "IndicNLPTokenizer(\"te\")",
 
 
 
 
 
 
 
 
 
 
 
521
  "original_lang_name": "telugu",
522
  "original_lang_code": "tel",
523
+ "script": "Telu",
524
+ "class_name": "IndicNLPTokenizer"
 
 
 
 
525
  }
526
  },
 
527
  "node_i": "3646",
528
+ "native_tokenizers": [
529
+ "Telu"
530
+ ],
531
  "scripts": [
532
  "Telu",
533
  "Latn"
534
+ ]
 
535
  },
536
  {
537
  "name": "Waddar",
538
  "iso_1_code": null,
539
  "iso_3_code": "wbq",
 
540
  "children": [],
541
+ "tokenizers": {},
542
  "node_i": "3647",
543
+ "native_tokenizers": [],
544
+ "scripts": []
545
  }
546
  ],
547
+ "tokenizers": {
548
+ "Telu": {
549
+ "full_object": "IndicNLPTokenizer(\"te\")",
550
+ "original_lang_name": "telugu",
551
+ "original_lang_code": "tel",
552
+ "script": "Telu",
553
+ "class_name": "IndicNLPTokenizer"
554
+ }
555
+ },
556
  "node_i": "3643",
557
+ "native_tokenizers": [],
558
+ "scripts": []
559
  }
560
  ],
561
+ "tokenizers": {
562
+ "Telu": {
563
+ "full_object": "IndicNLPTokenizer(\"te\")",
564
+ "original_lang_name": "telugu",
565
+ "original_lang_code": "tel",
566
+ "script": "Telu",
567
+ "class_name": "IndicNLPTokenizer"
568
+ }
569
+ },
570
  "node_i": "3616",
571
+ "native_tokenizers": [],
572
+ "scripts": []
573
  },
574
  {
575
  "name": "Southern",
576
  "iso_1_code": null,
577
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
  "children": [
579
  {
580
  "name": "Kurichiya",
581
  "iso_1_code": null,
582
  "iso_3_code": "kfh",
 
583
  "children": [],
584
+ "tokenizers": {},
585
  "node_i": "3649",
586
+ "native_tokenizers": [],
587
+ "scripts": []
588
  },
589
  {
590
  "name": "Kurumba, Attapady",
591
  "iso_1_code": null,
592
  "iso_3_code": "pkr",
 
593
  "children": [],
594
+ "tokenizers": {},
595
  "node_i": "3650",
596
+ "native_tokenizers": [],
597
+ "scripts": []
598
  },
599
  {
600
  "name": "Pathiya",
601
  "iso_1_code": null,
602
  "iso_3_code": "pty",
 
603
  "children": [],
604
+ "tokenizers": {},
605
  "node_i": "3651",
606
+ "native_tokenizers": [],
607
+ "scripts": []
608
  },
609
  {
610
  "name": "Muduga",
611
  "iso_1_code": null,
612
  "iso_3_code": "udg",
 
613
  "children": [],
614
+ "tokenizers": {},
615
  "node_i": "3652",
616
+ "native_tokenizers": [],
617
+ "scripts": []
618
  },
619
  {
620
  "name": "Kumbaran",
621
  "iso_1_code": null,
622
  "iso_3_code": "wkb",
 
623
  "children": [],
624
+ "tokenizers": {},
625
  "node_i": "3653",
626
+ "native_tokenizers": [],
627
+ "scripts": []
628
  },
629
  {
630
  "name": "Kalanadi",
631
  "iso_1_code": null,
632
  "iso_3_code": "wkl",
 
633
  "children": [],
634
+ "tokenizers": {},
635
  "node_i": "3654",
636
+ "native_tokenizers": [],
637
+ "scripts": []
638
  },
639
  {
640
  "name": "Kunduvadi",
641
  "iso_1_code": null,
642
  "iso_3_code": "wku",
 
643
  "children": [],
644
+ "tokenizers": {},
645
  "node_i": "3655",
646
+ "native_tokenizers": [],
647
+ "scripts": []
648
  },
649
  {
650
  "name": "Tamil-Kannada",
651
  "iso_1_code": null,
652
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
  "children": [
654
  {
655
  "name": "Kannada",
656
  "iso_1_code": null,
657
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
  "children": [
659
  {
660
  "name": "Badaga",
661
  "iso_1_code": null,
662
  "iso_3_code": "bfq",
 
663
  "children": [],
664
+ "tokenizers": {},
665
  "node_i": "3658",
666
+ "native_tokenizers": [],
667
+ "scripts": []
668
  },
669
  {
670
  "name": "Holiya",
671
  "iso_1_code": null,
672
  "iso_3_code": "hoy",
 
673
  "children": [],
674
+ "tokenizers": {},
675
  "node_i": "3659",
676
+ "native_tokenizers": [],
677
+ "scripts": []
678
  },
679
  {
680
  "name": "Kannada",
681
  "iso_1_code": "kn",
682
  "iso_3_code": "kan",
683
+ "children": [],
684
  "tokenizers": {
 
 
 
 
 
 
 
 
 
 
 
685
  "Knda": {
686
+ "full_object": "IndicNLPTokenizer(\"kn\")",
687
  "original_lang_name": "kannada",
688
  "original_lang_code": "kan",
689
+ "script": "Knda",
690
+ "class_name": "IndicNLPTokenizer"
 
 
 
 
691
  }
692
  },
 
693
  "node_i": "3660",
694
+ "native_tokenizers": [
695
+ "Knda"
696
+ ],
697
  "scripts": [
698
  "Latn",
699
  "Knda"
700
+ ]
 
701
  },
702
  {
703
  "name": "Urali",
704
  "iso_1_code": null,
705
  "iso_3_code": "url",
 
706
  "children": [],
707
+ "tokenizers": {},
708
  "node_i": "3661",
709
+ "native_tokenizers": [],
710
+ "scripts": []
711
  }
712
  ],
 
 
 
 
 
 
 
 
713
  "tokenizers": {
714
+ "Knda": {
715
+ "full_object": "IndicNLPTokenizer(\"kn\")",
716
+ "original_lang_name": "kannada",
717
+ "original_lang_code": "kan",
718
+ "script": "Knda",
719
+ "class_name": "IndicNLPTokenizer"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
720
  }
721
  },
722
+ "node_i": "3657",
723
+ "native_tokenizers": [],
724
+ "scripts": []
725
+ },
726
+ {
727
+ "name": "Tamil-Kodagu",
728
+ "iso_1_code": null,
729
+ "iso_3_code": null,
730
  "children": [
731
  {
732
  "name": "Kodagu",
733
  "iso_1_code": null,
734
  "iso_3_code": null,
 
735
  "children": [
736
  {
737
  "name": "Kodava",
738
  "iso_1_code": null,
739
  "iso_3_code": "kfa",
 
740
  "children": [],
741
+ "tokenizers": {},
742
  "node_i": "3664",
743
+ "native_tokenizers": [],
744
+ "scripts": []
745
  },
746
  {
747
  "name": "Kurumba, Kannada",
748
  "iso_1_code": null,
749
  "iso_3_code": "kfi",
 
750
  "children": [],
751
+ "tokenizers": {},
752
  "node_i": "3665",
753
+ "native_tokenizers": [],
754
+ "scripts": []
755
  },
756
  {
757
  "name": "Kurumba, Mullu",
758
  "iso_1_code": null,
759
  "iso_3_code": "kpb",
 
760
  "children": [],
761
+ "tokenizers": {},
762
  "node_i": "3666",
763
+ "native_tokenizers": [],
764
+ "scripts": []
765
  },
766
  {
767
  "name": "Kurumba, Alu",
768
  "iso_1_code": null,
769
  "iso_3_code": "xua",
 
770
  "children": [],
771
+ "tokenizers": {},
772
  "node_i": "3667",
773
+ "native_tokenizers": [],
774
+ "scripts": []
775
  },
776
  {
777
  "name": "Kurumba, Jennu",
778
  "iso_1_code": null,
779
  "iso_3_code": "xuj",
 
780
  "children": [],
781
+ "tokenizers": {},
782
  "node_i": "3668",
783
+ "native_tokenizers": [],
784
+ "scripts": []
785
  }
786
  ],
787
+ "tokenizers": {},
788
  "node_i": "3663",
789
+ "native_tokenizers": [],
790
+ "scripts": []
791
  },
792
  {
793
  "name": "Tamil-Malayalam",
794
  "iso_1_code": null,
795
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
796
  "children": [
797
  {
798
  "name": "Mannan",
799
  "iso_1_code": null,
800
  "iso_3_code": "mjv",
 
801
  "children": [],
802
+ "tokenizers": {},
803
  "node_i": "3670",
804
+ "native_tokenizers": [],
805
+ "scripts": []
806
  },
807
  {
808
  "name": "Malayalam",
809
  "iso_1_code": null,
810
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
811
  "children": [
812
  {
813
  "name": "Aranadan",
814
  "iso_1_code": null,
815
  "iso_3_code": "aaf",
 
816
  "children": [],
817
+ "tokenizers": {},
818
  "node_i": "3672",
819
+ "native_tokenizers": [],
820
+ "scripts": []
821
  },
822
  {
823
  "name": "Kadar",
824
  "iso_1_code": null,
825
  "iso_3_code": "kej",
 
826
  "children": [],
827
+ "tokenizers": {},
828
  "node_i": "3673",
829
+ "native_tokenizers": [],
830
+ "scripts": []
831
  },
832
  {
833
  "name": "Malayalam",
834
  "iso_1_code": "ml",
835
  "iso_3_code": "mal",
836
+ "children": [],
837
  "tokenizers": {
 
 
 
 
 
 
 
 
 
 
 
838
  "Mlym": {
839
+ "full_object": "IndicNLPTokenizer(\"ml\")",
840
  "original_lang_name": "malayalam",
841
  "original_lang_code": "mal",
842
+ "script": "Mlym",
843
+ "class_name": "IndicNLPTokenizer"
 
 
 
 
844
  }
845
  },
 
846
  "node_i": "3674",
847
+ "native_tokenizers": [
848
+ "Mlym"
849
+ ],
850
  "scripts": [
851
  "Latn",
852
  "Mlym"
853
+ ]
 
854
  },
855
  {
856
  "name": "Malapandaram",
857
  "iso_1_code": null,
858
  "iso_3_code": "mjp",
 
859
  "children": [],
860
+ "tokenizers": {},
861
  "node_i": "3675",
862
+ "native_tokenizers": [],
863
+ "scripts": []
864
  },
865
  {
866
  "name": "Malaryan",
867
  "iso_1_code": null,
868
  "iso_3_code": "mjq",
 
869
  "children": [],
870
+ "tokenizers": {},
871
  "node_i": "3676",
872
+ "native_tokenizers": [],
873
+ "scripts": []
874
  },
875
  {
876
  "name": "Malavedan",
877
  "iso_1_code": null,
878
  "iso_3_code": "mjr",
 
879
  "children": [],
880
+ "tokenizers": {},
881
  "node_i": "3677",
882
+ "native_tokenizers": [],
883
+ "scripts": []
884
  },
885
  {
886
  "name": "Paliyan",
887
  "iso_1_code": null,
888
  "iso_3_code": "pcf",
 
889
  "children": [],
890
+ "tokenizers": {},
891
  "node_i": "3678",
892
+ "native_tokenizers": [],
893
+ "scripts": []
894
  },
895
  {
896
  "name": "Paniya",
897
  "iso_1_code": null,
898
  "iso_3_code": "pcg",
 
899
  "children": [],
900
+ "tokenizers": {},
901
  "node_i": "3679",
902
+ "native_tokenizers": [],
903
+ "scripts": []
904
  },
905
  {
906
  "name": "Ravula",
907
  "iso_1_code": null,
908
  "iso_3_code": "yea",
 
909
  "children": [],
910
+ "tokenizers": {},
911
  "node_i": "3680",
912
+ "native_tokenizers": [],
913
+ "scripts": []
914
  }
915
  ],
916
+ "tokenizers": {
917
+ "Mlym": {
918
+ "full_object": "IndicNLPTokenizer(\"ml\")",
919
+ "original_lang_name": "malayalam",
920
+ "original_lang_code": "mal",
921
+ "script": "Mlym",
922
+ "class_name": "IndicNLPTokenizer"
923
+ }
924
+ },
925
  "node_i": "3671",
926
+ "native_tokenizers": [],
927
+ "scripts": []
928
  },
929
  {
930
  "name": "Tamil",
931
  "iso_1_code": null,
932
  "iso_3_code": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
933
  "children": [
934
  {
935
  "name": "Eravallan",
936
  "iso_1_code": null,
937
  "iso_3_code": "era",
 
938
  "children": [],
939
+ "tokenizers": {},
940
  "node_i": "3682",
941
+ "native_tokenizers": [],
942
+ "scripts": []
943
  },
944
  {
945
  "name": "Irula",
946
  "iso_1_code": null,
947
  "iso_3_code": "iru",
 
948
  "children": [],
949
+ "tokenizers": {},
950
  "node_i": "3683",
951
+ "native_tokenizers": [],
952
+ "scripts": []
953
  },
954
  {
955
  "name": "Kaikadi",
956
  "iso_1_code": null,
957
  "iso_3_code": "kep",
 
958
  "children": [],
959
+ "tokenizers": {},
960
  "node_i": "3684",
961
+ "native_tokenizers": [],
962
+ "scripts": []
963
  },
964
  {
965
  "name": "Kanikkaran",
966
  "iso_1_code": null,
967
  "iso_3_code": "kev",
 
968
  "children": [],
969
+ "tokenizers": {},
970
  "node_i": "3685",
971
+ "native_tokenizers": [],
972
+ "scripts": []
973
  },
974
  {
975
  "name": "Muthuvan",
976
  "iso_1_code": null,
977
  "iso_3_code": "muv",
 
978
  "children": [],
979
+ "tokenizers": {},
980
  "node_i": "3686",
981
+ "native_tokenizers": [],
982
+ "scripts": []
983
  },
984
  {
985
  "name": "Sholaga",
986
  "iso_1_code": null,
987
  "iso_3_code": "sle",
 
988
  "children": [],
989
+ "tokenizers": {},
990
  "node_i": "3687",
991
+ "native_tokenizers": [],
992
+ "scripts": []
993
  },
994
  {
995
  "name": "Tamil",
996
  "iso_1_code": "ta",
997
  "iso_3_code": "tam",
998
+ "children": [],
999
  "tokenizers": {
 
 
 
 
 
 
 
 
 
 
 
1000
  "Taml": {
1001
+ "full_object": "IndicNLPTokenizer(\"ta\")",
1002
  "original_lang_name": "tamil",
1003
  "original_lang_code": "tam",
1004
+ "script": "Taml",
1005
+ "class_name": "IndicNLPTokenizer"
 
 
 
 
1006
  }
1007
  },
 
1008
  "node_i": "3688",
1009
+ "native_tokenizers": [
1010
+ "Taml"
1011
+ ],
1012
  "scripts": [
1013
  "Taml",
1014
  "Latn"
1015
+ ]
 
1016
  },
1017
  {
1018
  "name": "Kurumba, Betta",
1019
  "iso_1_code": null,
1020
  "iso_3_code": "xub",
 
1021
  "children": [],
1022
+ "tokenizers": {},
1023
  "node_i": "3689",
1024
+ "native_tokenizers": [],
1025
+ "scripts": []
1026
  },
1027
  {
1028
  "name": "Yerukula",
1029
  "iso_1_code": null,
1030
  "iso_3_code": "yeu",
 
1031
  "children": [],
1032
+ "tokenizers": {},
1033
  "node_i": "3690",
1034
+ "native_tokenizers": [],
1035
+ "scripts": []
1036
  }
1037
  ],
1038
+ "tokenizers": {
1039
+ "Taml": {
1040
+ "full_object": "IndicNLPTokenizer(\"ta\")",
1041
+ "original_lang_name": "tamil",
1042
+ "original_lang_code": "tam",
1043
+ "script": "Taml",
1044
+ "class_name": "IndicNLPTokenizer"
1045
+ }
1046
+ },
1047
  "node_i": "3681",
1048
+ "native_tokenizers": [],
1049
+ "scripts": []
1050
  }
1051
  ],
1052
+ "tokenizers": {
1053
+ "Mlym": {
1054
+ "full_object": "IndicNLPTokenizer(\"ml\")",
1055
+ "original_lang_name": "malayalam",
1056
+ "original_lang_code": "mal",
1057
+ "script": "Mlym",
1058
+ "class_name": "IndicNLPTokenizer"
1059
+ },
1060
+ "Taml": {
1061
+ "full_object": "IndicNLPTokenizer(\"ta\")",
1062
+ "original_lang_name": "tamil",
1063
+ "original_lang_code": "tam",
1064
+ "script": "Taml",
1065
+ "class_name": "IndicNLPTokenizer"
1066
+ }
1067
+ },
1068
  "node_i": "3669",
1069
+ "native_tokenizers": [],
1070
+ "scripts": []
1071
  },
1072
  {
1073
  "name": "Toda-Kota",
1074
  "iso_1_code": null,
1075
  "iso_3_code": null,
 
1076
  "children": [
1077
  {
1078
  "name": "Kota",
1079
  "iso_1_code": null,
1080
  "iso_3_code": "kfe",
 
1081
  "children": [],
1082
+ "tokenizers": {},
1083
  "node_i": "3692",
1084
+ "native_tokenizers": [],
1085
+ "scripts": []
1086
  },
1087
  {
1088
  "name": "Toda",
1089
  "iso_1_code": null,
1090
  "iso_3_code": "tcx",
 
1091
  "children": [],
1092
+ "tokenizers": {},
1093
  "node_i": "3693",
1094
+ "native_tokenizers": [],
1095
+ "scripts": []
1096
  }
1097
  ],
1098
+ "tokenizers": {},
1099
  "node_i": "3691",
1100
+ "native_tokenizers": [],
1101
+ "scripts": []
1102
  }
1103
  ],
1104
+ "tokenizers": {
1105
+ "Mlym": {
1106
+ "full_object": "IndicNLPTokenizer(\"ml\")",
1107
+ "original_lang_name": "malayalam",
1108
+ "original_lang_code": "mal",
1109
+ "script": "Mlym",
1110
+ "class_name": "IndicNLPTokenizer"
1111
+ },
1112
+ "Taml": {
1113
+ "full_object": "IndicNLPTokenizer(\"ta\")",
1114
+ "original_lang_name": "tamil",
1115
+ "original_lang_code": "tam",
1116
+ "script": "Taml",
1117
+ "class_name": "IndicNLPTokenizer"
1118
+ }
1119
+ },
1120
  "node_i": "3662",
1121
+ "native_tokenizers": [],
1122
+ "scripts": []
1123
  },
1124
  {
1125
  "name": "Unclassified",
1126
  "iso_1_code": null,
1127
  "iso_3_code": null,
 
1128
  "children": [
1129
  {
1130
  "name": "Chetti, Wayanad",
1131
  "iso_1_code": null,
1132
  "iso_3_code": "ctt",
 
1133
  "children": [],
1134
+ "tokenizers": {},
1135
  "node_i": "3695",
1136
+ "native_tokenizers": [],
1137
+ "scripts": []
1138
  }
1139
  ],
1140
+ "tokenizers": {},
1141
  "node_i": "3694",
1142
+ "native_tokenizers": [],
1143
+ "scripts": []
1144
  }
1145
  ],
 
 
 
 
 
 
 
 
1146
  "tokenizers": {
1147
  "Knda": {
1148
+ "full_object": "IndicNLPTokenizer(\"kn\")",
1149
  "original_lang_name": "kannada",
1150
  "original_lang_code": "kan",
1151
+ "script": "Knda",
1152
+ "class_name": "IndicNLPTokenizer"
1153
+ },
1154
+ "Mlym": {
1155
+ "full_object": "IndicNLPTokenizer(\"ml\")",
1156
+ "original_lang_name": "malayalam",
1157
+ "original_lang_code": "mal",
1158
+ "script": "Mlym",
1159
+ "class_name": "IndicNLPTokenizer"
1160
+ },
1161
+ "Taml": {
1162
+ "full_object": "IndicNLPTokenizer(\"ta\")",
1163
+ "original_lang_name": "tamil",
1164
+ "original_lang_code": "tam",
1165
+ "script": "Taml",
1166
+ "class_name": "IndicNLPTokenizer"
1167
  }
1168
  },
1169
+ "node_i": "3656",
1170
+ "native_tokenizers": [],
1171
+ "scripts": []
1172
+ },
1173
+ {
1174
+ "name": "Tulu",
1175
+ "iso_1_code": null,
1176
+ "iso_3_code": null,
1177
  "children": [
1178
  {
1179
  "name": "Bellari",
1180
  "iso_1_code": null,
1181
  "iso_3_code": "brw",
 
1182
  "children": [],
1183
+ "tokenizers": {},
1184
  "node_i": "3697",
1185
+ "native_tokenizers": [],
1186
+ "scripts": []
1187
  },
1188
  {
1189
  "name": "Kudiya",
1190
  "iso_1_code": null,
1191
  "iso_3_code": "kfg",
 
1192
  "children": [],
1193
+ "tokenizers": {},
1194
  "node_i": "3698",
1195
+ "native_tokenizers": [],
1196
+ "scripts": []
1197
  },
1198
  {
1199
  "name": "Tulu",
1200
  "iso_1_code": null,
1201
  "iso_3_code": "tcy",
1202
+ "children": [],
1203
  "tokenizers": {
1204
  "Knda": {
1205
+ "full_object": "IndicNLPTokenizer(\"kn\")",
1206
  "original_lang_name": "kannada",
1207
  "original_lang_code": "kan",
1208
+ "script": "Knda",
1209
+ "class_name": "IndicNLPTokenizer"
 
 
 
 
1210
  }
1211
  },
 
1212
  "node_i": "3699",
1213
+ "native_tokenizers": [],
1214
  "scripts": [
1215
  "Knda"
1216
+ ]
 
1217
  },
1218
  {
1219
  "name": "Koraga",
1220
  "iso_1_code": null,
1221
  "iso_3_code": null,
 
1222
  "children": [
1223
  {
1224
  "name": "Koraga, Korra",
1225
  "iso_1_code": null,
1226
  "iso_3_code": "kfd",
 
1227
  "children": [],
1228
+ "tokenizers": {},
1229
  "node_i": "3701",
1230
+ "native_tokenizers": [],
1231
+ "scripts": []
1232
  },
1233
  {
1234
  "name": "Koraga, Mudu",
1235
  "iso_1_code": null,
1236
  "iso_3_code": "vmd",
 
1237
  "children": [],
1238
+ "tokenizers": {},
1239
  "node_i": "3702",
1240
+ "native_tokenizers": [],
1241
+ "scripts": []
1242
  }
1243
  ],
1244
+ "tokenizers": {},
1245
  "node_i": "3700",
1246
+ "native_tokenizers": [],
1247
+ "scripts": []
1248
  }
1249
  ],
1250
+ "tokenizers": {
1251
+ "Knda": {
1252
+ "full_object": "IndicNLPTokenizer(\"kn\")",
1253
+ "original_lang_name": "kannada",
1254
+ "original_lang_code": "kan",
1255
+ "script": "Knda",
1256
+ "class_name": "IndicNLPTokenizer"
1257
+ }
1258
+ },
1259
  "node_i": "3696",
1260
+ "native_tokenizers": [],
1261
+ "scripts": []
1262
  },
1263
  {
1264
  "name": "Unclassified",
1265
  "iso_1_code": null,
1266
  "iso_3_code": null,
 
1267
  "children": [
1268
  {
1269
  "name": "Mala Malasar",
1270
  "iso_1_code": null,
1271
  "iso_3_code": "ima",
 
1272
  "children": [],
1273
+ "tokenizers": {},
1274
  "node_i": "3704",
1275
+ "native_tokenizers": [],
1276
+ "scripts": []
1277
  },
1278
  {
1279
  "name": "Thachanadan",
1280
  "iso_1_code": null,
1281
  "iso_3_code": "thn",
 
1282
  "children": [],
1283
+ "tokenizers": {},
1284
  "node_i": "3705",
1285
+ "native_tokenizers": [],
1286
+ "scripts": []
1287
  },
1288
  {
1289
  "name": "Ullatan",
1290
  "iso_1_code": null,
1291
  "iso_3_code": "ull",
 
1292
  "children": [],
1293
+ "tokenizers": {},
1294
  "node_i": "3706",
1295
+ "native_tokenizers": [],
1296
+ "scripts": []
1297
  },
1298
  {
1299
  "name": "Malasar",
1300
  "iso_1_code": null,
1301
  "iso_3_code": "ymr",
 
1302
  "children": [],
1303
+ "tokenizers": {},
1304
  "node_i": "3707",
1305
+ "native_tokenizers": [],
1306
+ "scripts": []
1307
  }
1308
  ],
1309
+ "tokenizers": {},
1310
  "node_i": "3703",
1311
+ "native_tokenizers": [],
1312
+ "scripts": []
1313
  }
1314
  ],
1315
+ "tokenizers": {
1316
+ "Knda": {
1317
+ "full_object": "IndicNLPTokenizer(\"kn\")",
1318
+ "original_lang_name": "kannada",
1319
+ "original_lang_code": "kan",
1320
+ "script": "Knda",
1321
+ "class_name": "IndicNLPTokenizer"
1322
+ },
1323
+ "Mlym": {
1324
+ "full_object": "IndicNLPTokenizer(\"ml\")",
1325
+ "original_lang_name": "malayalam",
1326
+ "original_lang_code": "mal",
1327
+ "script": "Mlym",
1328
+ "class_name": "IndicNLPTokenizer"
1329
+ },
1330
+ "Taml": {
1331
+ "full_object": "IndicNLPTokenizer(\"ta\")",
1332
+ "original_lang_name": "tamil",
1333
+ "original_lang_code": "tam",
1334
+ "script": "Taml",
1335
+ "class_name": "IndicNLPTokenizer"
1336
+ }
1337
+ },
1338
  "node_i": "3648",
1339
+ "native_tokenizers": [],
1340
+ "scripts": []
1341
  },
1342
  {
1343
  "name": "Unclassified",
1344
  "iso_1_code": null,
1345
  "iso_3_code": null,
 
1346
  "children": [
1347
  {
1348
  "name": "Allar",
1349
  "iso_1_code": null,
1350
  "iso_3_code": "all",
 
1351
  "children": [],
1352
+ "tokenizers": {},
1353
  "node_i": "3709",
1354
+ "native_tokenizers": [],
1355
+ "scripts": []
1356
  },
1357
  {
1358
  "name": "Bharia",
1359
  "iso_1_code": null,
1360
  "iso_3_code": "bha",
 
1361
  "children": [],
1362
+ "tokenizers": {},
1363
  "node_i": "3710",
1364
+ "native_tokenizers": [],
1365
+ "scripts": []
1366
  },
1367
  {
1368
  "name": "Malankuravan",
1369
  "iso_1_code": null,
1370
  "iso_3_code": "mjo",
 
1371
  "children": [],
1372
+ "tokenizers": {},
1373
  "node_i": "3711",
1374
+ "native_tokenizers": [],
1375
+ "scripts": []
1376
  },
1377
  {
1378
  "name": "Pattapu",
1379
  "iso_1_code": null,
1380
  "iso_3_code": "ptq",
 
1381
  "children": [],
1382
+ "tokenizers": {},
1383
  "node_i": "3712",
1384
+ "native_tokenizers": [],
1385
+ "scripts": []
1386
  },
1387
  {
1388
  "name": "Vishavan",
1389
  "iso_1_code": null,
1390
  "iso_3_code": "vis",
 
1391
  "children": [],
1392
+ "tokenizers": {},
1393
  "node_i": "3713",
1394
+ "native_tokenizers": [],
1395
+ "scripts": []
1396
  }
1397
  ],
1398
+ "tokenizers": {},
1399
  "node_i": "3708",
1400
+ "native_tokenizers": [],
1401
+ "scripts": []
1402
  }
1403
  ],
1404
+ "tokenizers": {
1405
+ "Telu": {
1406
+ "full_object": "IndicNLPTokenizer(\"te\")",
1407
+ "original_lang_name": "telugu",
1408
+ "original_lang_code": "tel",
1409
+ "script": "Telu",
1410
+ "class_name": "IndicNLPTokenizer"
1411
+ },
1412
+ "Knda": {
1413
+ "full_object": "IndicNLPTokenizer(\"kn\")",
1414
+ "original_lang_name": "kannada",
1415
+ "original_lang_code": "kan",
1416
+ "script": "Knda",
1417
+ "class_name": "IndicNLPTokenizer"
1418
+ },
1419
+ "Mlym": {
1420
+ "full_object": "IndicNLPTokenizer(\"ml\")",
1421
+ "original_lang_name": "malayalam",
1422
+ "original_lang_code": "mal",
1423
+ "script": "Mlym",
1424
+ "class_name": "IndicNLPTokenizer"
1425
+ },
1426
+ "Taml": {
1427
+ "full_object": "IndicNLPTokenizer(\"ta\")",
1428
+ "original_lang_name": "tamil",
1429
+ "original_lang_code": "tam",
1430
+ "script": "Taml",
1431
+ "class_name": "IndicNLPTokenizer"
1432
+ }
1433
+ },
1434
  "node_i": "3601",
1435
+ "native_tokenizers": [],
1436
+ "scripts": []
1437
  }
data/East Bird’s Head-Sentani.json CHANGED
@@ -2,173 +2,173 @@
2
  "name": "East Bird\u2019s Head-Sentani",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Burmeso",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Burmeso",
15
  "iso_1_code": null,
16
  "iso_3_code": "bzu",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "3716",
20
- "scripts": [],
21
- "own_tokenizer": false
22
  }
23
  ],
 
24
  "node_i": "3715",
25
- "scripts": [],
26
- "own_tokenizer": false
27
  },
28
  {
29
  "name": "East Bird\u2019s Head",
30
  "iso_1_code": null,
31
  "iso_3_code": null,
32
- "tokenizers": {},
33
  "children": [
34
  {
35
  "name": "Mantion",
36
  "iso_1_code": null,
37
  "iso_3_code": null,
38
- "tokenizers": {},
39
  "children": [
40
  {
41
  "name": "Sougb",
42
  "iso_1_code": null,
43
  "iso_3_code": "mnx",
44
- "tokenizers": {},
45
  "children": [],
 
46
  "node_i": "3719",
 
47
  "scripts": [
48
  "Latn"
49
- ],
50
- "own_tokenizer": false
51
  }
52
  ],
 
53
  "node_i": "3718",
54
- "scripts": [],
55
- "own_tokenizer": false
56
  },
57
  {
58
  "name": "Meax",
59
  "iso_1_code": null,
60
  "iso_3_code": null,
61
- "tokenizers": {},
62
  "children": [
63
  {
64
  "name": "Meyah",
65
  "iso_1_code": null,
66
  "iso_3_code": "mej",
67
- "tokenizers": {},
68
  "children": [],
 
69
  "node_i": "3721",
 
70
  "scripts": [
71
  "Latn"
72
- ],
73
- "own_tokenizer": false
74
  },
75
  {
76
  "name": "Moskona",
77
  "iso_1_code": null,
78
  "iso_3_code": "mtj",
79
- "tokenizers": {},
80
  "children": [],
 
81
  "node_i": "3722",
 
82
  "scripts": [
83
  "Latn"
84
- ],
85
- "own_tokenizer": false
86
  }
87
  ],
 
88
  "node_i": "3720",
89
- "scripts": [],
90
- "own_tokenizer": false
91
  }
92
  ],
 
93
  "node_i": "3717",
94
- "scripts": [],
95
- "own_tokenizer": false
96
  },
97
  {
98
  "name": "Sentani",
99
  "iso_1_code": null,
100
  "iso_3_code": null,
101
- "tokenizers": {},
102
  "children": [
103
  {
104
  "name": "Demta",
105
  "iso_1_code": null,
106
  "iso_3_code": null,
107
- "tokenizers": {},
108
  "children": [
109
  {
110
  "name": "Sowari",
111
  "iso_1_code": null,
112
  "iso_3_code": "dmy",
113
- "tokenizers": {},
114
  "children": [],
 
115
  "node_i": "3725",
116
- "scripts": [],
117
- "own_tokenizer": false
118
  }
119
  ],
 
120
  "node_i": "3724",
121
- "scripts": [],
122
- "own_tokenizer": false
123
  },
124
  {
125
  "name": "Sentani Proper",
126
  "iso_1_code": null,
127
  "iso_3_code": null,
128
- "tokenizers": {},
129
  "children": [
130
  {
131
  "name": "Nafri",
132
  "iso_1_code": null,
133
  "iso_3_code": "nxx",
134
- "tokenizers": {},
135
  "children": [],
 
136
  "node_i": "3727",
137
- "scripts": [],
138
- "own_tokenizer": false
139
  },
140
  {
141
  "name": "Sentani",
142
  "iso_1_code": null,
143
  "iso_3_code": "set",
144
- "tokenizers": {},
145
  "children": [],
 
146
  "node_i": "3728",
147
- "scripts": [],
148
- "own_tokenizer": false
149
  },
150
  {
151
  "name": "Tabla",
152
  "iso_1_code": null,
153
  "iso_3_code": "tnm",
154
- "tokenizers": {},
155
  "children": [],
 
156
  "node_i": "3729",
157
- "scripts": [],
158
- "own_tokenizer": false
159
  }
160
  ],
 
161
  "node_i": "3726",
162
- "scripts": [],
163
- "own_tokenizer": false
164
  }
165
  ],
 
166
  "node_i": "3723",
167
- "scripts": [],
168
- "own_tokenizer": false
169
  }
170
  ],
 
171
  "node_i": "3714",
172
- "scripts": [],
173
- "own_tokenizer": false
174
  }
 
2
  "name": "East Bird\u2019s Head-Sentani",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Burmeso",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Burmeso",
13
  "iso_1_code": null,
14
  "iso_3_code": "bzu",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3716",
18
+ "native_tokenizers": [],
19
+ "scripts": []
20
  }
21
  ],
22
+ "tokenizers": {},
23
  "node_i": "3715",
24
+ "native_tokenizers": [],
25
+ "scripts": []
26
  },
27
  {
28
  "name": "East Bird\u2019s Head",
29
  "iso_1_code": null,
30
  "iso_3_code": null,
 
31
  "children": [
32
  {
33
  "name": "Mantion",
34
  "iso_1_code": null,
35
  "iso_3_code": null,
 
36
  "children": [
37
  {
38
  "name": "Sougb",
39
  "iso_1_code": null,
40
  "iso_3_code": "mnx",
 
41
  "children": [],
42
+ "tokenizers": {},
43
  "node_i": "3719",
44
+ "native_tokenizers": [],
45
  "scripts": [
46
  "Latn"
47
+ ]
 
48
  }
49
  ],
50
+ "tokenizers": {},
51
  "node_i": "3718",
52
+ "native_tokenizers": [],
53
+ "scripts": []
54
  },
55
  {
56
  "name": "Meax",
57
  "iso_1_code": null,
58
  "iso_3_code": null,
 
59
  "children": [
60
  {
61
  "name": "Meyah",
62
  "iso_1_code": null,
63
  "iso_3_code": "mej",
 
64
  "children": [],
65
+ "tokenizers": {},
66
  "node_i": "3721",
67
+ "native_tokenizers": [],
68
  "scripts": [
69
  "Latn"
70
+ ]
 
71
  },
72
  {
73
  "name": "Moskona",
74
  "iso_1_code": null,
75
  "iso_3_code": "mtj",
 
76
  "children": [],
77
+ "tokenizers": {},
78
  "node_i": "3722",
79
+ "native_tokenizers": [],
80
  "scripts": [
81
  "Latn"
82
+ ]
 
83
  }
84
  ],
85
+ "tokenizers": {},
86
  "node_i": "3720",
87
+ "native_tokenizers": [],
88
+ "scripts": []
89
  }
90
  ],
91
+ "tokenizers": {},
92
  "node_i": "3717",
93
+ "native_tokenizers": [],
94
+ "scripts": []
95
  },
96
  {
97
  "name": "Sentani",
98
  "iso_1_code": null,
99
  "iso_3_code": null,
 
100
  "children": [
101
  {
102
  "name": "Demta",
103
  "iso_1_code": null,
104
  "iso_3_code": null,
 
105
  "children": [
106
  {
107
  "name": "Sowari",
108
  "iso_1_code": null,
109
  "iso_3_code": "dmy",
 
110
  "children": [],
111
+ "tokenizers": {},
112
  "node_i": "3725",
113
+ "native_tokenizers": [],
114
+ "scripts": []
115
  }
116
  ],
117
+ "tokenizers": {},
118
  "node_i": "3724",
119
+ "native_tokenizers": [],
120
+ "scripts": []
121
  },
122
  {
123
  "name": "Sentani Proper",
124
  "iso_1_code": null,
125
  "iso_3_code": null,
 
126
  "children": [
127
  {
128
  "name": "Nafri",
129
  "iso_1_code": null,
130
  "iso_3_code": "nxx",
 
131
  "children": [],
132
+ "tokenizers": {},
133
  "node_i": "3727",
134
+ "native_tokenizers": [],
135
+ "scripts": []
136
  },
137
  {
138
  "name": "Sentani",
139
  "iso_1_code": null,
140
  "iso_3_code": "set",
 
141
  "children": [],
142
+ "tokenizers": {},
143
  "node_i": "3728",
144
+ "native_tokenizers": [],
145
+ "scripts": []
146
  },
147
  {
148
  "name": "Tabla",
149
  "iso_1_code": null,
150
  "iso_3_code": "tnm",
 
151
  "children": [],
152
+ "tokenizers": {},
153
  "node_i": "3729",
154
+ "native_tokenizers": [],
155
+ "scripts": []
156
  }
157
  ],
158
+ "tokenizers": {},
159
  "node_i": "3726",
160
+ "native_tokenizers": [],
161
+ "scripts": []
162
  }
163
  ],
164
+ "tokenizers": {},
165
  "node_i": "3723",
166
+ "native_tokenizers": [],
167
+ "scripts": []
168
  }
169
  ],
170
+ "tokenizers": {},
171
  "node_i": "3714",
172
+ "native_tokenizers": [],
173
+ "scripts": []
174
  }
data/East Geelvink Bay.json CHANGED
@@ -2,143 +2,143 @@
2
  "name": "East Geelvink Bay",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Anasi",
9
  "iso_1_code": null,
10
  "iso_3_code": "bpo",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3731",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Barapasi",
19
  "iso_1_code": null,
20
  "iso_3_code": "brp",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "3732",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  },
27
  {
28
  "name": "Burate",
29
  "iso_1_code": null,
30
  "iso_3_code": "bti",
31
- "tokenizers": {},
32
  "children": [],
 
33
  "node_i": "3733",
34
- "scripts": [],
35
- "own_tokenizer": false
36
  },
37
  {
38
  "name": "Kehu",
39
  "iso_1_code": null,
40
  "iso_3_code": "khh",
41
- "tokenizers": {},
42
  "children": [],
 
43
  "node_i": "3734",
44
- "scripts": [],
45
- "own_tokenizer": false
46
  },
47
  {
48
  "name": "Kofei",
49
  "iso_1_code": null,
50
  "iso_3_code": "kpi",
51
- "tokenizers": {},
52
  "children": [],
 
53
  "node_i": "3735",
54
- "scripts": [],
55
- "own_tokenizer": false
56
  },
57
  {
58
  "name": "Nisa",
59
  "iso_1_code": null,
60
  "iso_3_code": "njs",
61
- "tokenizers": {},
62
  "children": [],
 
63
  "node_i": "3736",
64
- "scripts": [],
65
- "own_tokenizer": false
66
  },
67
  {
68
  "name": "Sauri",
69
  "iso_1_code": null,
70
  "iso_3_code": "srt",
71
- "tokenizers": {},
72
  "children": [],
 
73
  "node_i": "3737",
74
- "scripts": [],
75
- "own_tokenizer": false
76
  },
77
  {
78
  "name": "Tefaro",
79
  "iso_1_code": null,
80
  "iso_3_code": "tfo",
81
- "tokenizers": {},
82
  "children": [],
 
83
  "node_i": "3738",
84
- "scripts": [],
85
- "own_tokenizer": false
86
  },
87
  {
88
  "name": "Tunggare",
89
  "iso_1_code": null,
90
  "iso_3_code": "trt",
91
- "tokenizers": {},
92
  "children": [],
 
93
  "node_i": "3739",
94
- "scripts": [],
95
- "own_tokenizer": false
96
  },
97
  {
98
  "name": "Woria",
99
  "iso_1_code": null,
100
  "iso_3_code": "wor",
101
- "tokenizers": {},
102
  "children": [],
 
103
  "node_i": "3740",
104
- "scripts": [],
105
- "own_tokenizer": false
106
  },
107
  {
108
  "name": "Bauzi",
109
  "iso_1_code": null,
110
  "iso_3_code": null,
111
- "tokenizers": {},
112
  "children": [
113
  {
114
  "name": "Bauzi",
115
  "iso_1_code": null,
116
  "iso_3_code": "bvz",
117
- "tokenizers": {},
118
  "children": [],
 
119
  "node_i": "3742",
 
120
  "scripts": [
121
  "Latn"
122
- ],
123
- "own_tokenizer": false
124
  },
125
  {
126
  "name": "Demisa",
127
  "iso_1_code": null,
128
  "iso_3_code": "dei",
129
- "tokenizers": {},
130
  "children": [],
 
131
  "node_i": "3743",
132
- "scripts": [],
133
- "own_tokenizer": false
134
  }
135
  ],
 
136
  "node_i": "3741",
137
- "scripts": [],
138
- "own_tokenizer": false
139
  }
140
  ],
 
141
  "node_i": "3730",
142
- "scripts": [],
143
- "own_tokenizer": false
144
  }
 
2
  "name": "East Geelvink Bay",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Anasi",
8
  "iso_1_code": null,
9
  "iso_3_code": "bpo",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3731",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Barapasi",
18
  "iso_1_code": null,
19
  "iso_3_code": "brp",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3732",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  },
26
  {
27
  "name": "Burate",
28
  "iso_1_code": null,
29
  "iso_3_code": "bti",
 
30
  "children": [],
31
+ "tokenizers": {},
32
  "node_i": "3733",
33
+ "native_tokenizers": [],
34
+ "scripts": []
35
  },
36
  {
37
  "name": "Kehu",
38
  "iso_1_code": null,
39
  "iso_3_code": "khh",
 
40
  "children": [],
41
+ "tokenizers": {},
42
  "node_i": "3734",
43
+ "native_tokenizers": [],
44
+ "scripts": []
45
  },
46
  {
47
  "name": "Kofei",
48
  "iso_1_code": null,
49
  "iso_3_code": "kpi",
 
50
  "children": [],
51
+ "tokenizers": {},
52
  "node_i": "3735",
53
+ "native_tokenizers": [],
54
+ "scripts": []
55
  },
56
  {
57
  "name": "Nisa",
58
  "iso_1_code": null,
59
  "iso_3_code": "njs",
 
60
  "children": [],
61
+ "tokenizers": {},
62
  "node_i": "3736",
63
+ "native_tokenizers": [],
64
+ "scripts": []
65
  },
66
  {
67
  "name": "Sauri",
68
  "iso_1_code": null,
69
  "iso_3_code": "srt",
 
70
  "children": [],
71
+ "tokenizers": {},
72
  "node_i": "3737",
73
+ "native_tokenizers": [],
74
+ "scripts": []
75
  },
76
  {
77
  "name": "Tefaro",
78
  "iso_1_code": null,
79
  "iso_3_code": "tfo",
 
80
  "children": [],
81
+ "tokenizers": {},
82
  "node_i": "3738",
83
+ "native_tokenizers": [],
84
+ "scripts": []
85
  },
86
  {
87
  "name": "Tunggare",
88
  "iso_1_code": null,
89
  "iso_3_code": "trt",
 
90
  "children": [],
91
+ "tokenizers": {},
92
  "node_i": "3739",
93
+ "native_tokenizers": [],
94
+ "scripts": []
95
  },
96
  {
97
  "name": "Woria",
98
  "iso_1_code": null,
99
  "iso_3_code": "wor",
 
100
  "children": [],
101
+ "tokenizers": {},
102
  "node_i": "3740",
103
+ "native_tokenizers": [],
104
+ "scripts": []
105
  },
106
  {
107
  "name": "Bauzi",
108
  "iso_1_code": null,
109
  "iso_3_code": null,
 
110
  "children": [
111
  {
112
  "name": "Bauzi",
113
  "iso_1_code": null,
114
  "iso_3_code": "bvz",
 
115
  "children": [],
116
+ "tokenizers": {},
117
  "node_i": "3742",
118
+ "native_tokenizers": [],
119
  "scripts": [
120
  "Latn"
121
+ ]
 
122
  },
123
  {
124
  "name": "Demisa",
125
  "iso_1_code": null,
126
  "iso_3_code": "dei",
 
127
  "children": [],
128
+ "tokenizers": {},
129
  "node_i": "3743",
130
+ "native_tokenizers": [],
131
+ "scripts": []
132
  }
133
  ],
134
+ "tokenizers": {},
135
  "node_i": "3741",
136
+ "native_tokenizers": [],
137
+ "scripts": []
138
  }
139
  ],
140
+ "tokenizers": {},
141
  "node_i": "3730",
142
+ "native_tokenizers": [],
143
+ "scripts": []
144
  }
data/East New Britain.json CHANGED
@@ -2,104 +2,104 @@
2
  "name": "East New Britain",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Baining",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Qaqet",
15
  "iso_1_code": null,
16
  "iso_3_code": "byx",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "3746",
 
20
  "scripts": [
21
  "Latn"
22
- ],
23
- "own_tokenizer": false
24
  },
25
  {
26
  "name": "Kairak",
27
  "iso_1_code": null,
28
  "iso_3_code": "ckr",
29
- "tokenizers": {},
30
  "children": [],
 
31
  "node_i": "3747",
32
- "scripts": [],
33
- "own_tokenizer": false
34
  },
35
  {
36
  "name": "Mali",
37
  "iso_1_code": null,
38
  "iso_3_code": "gcc",
39
- "tokenizers": {},
40
  "children": [],
 
41
  "node_i": "3748",
42
- "scripts": [],
43
- "own_tokenizer": false
44
  },
45
  {
46
  "name": "Simbali",
47
  "iso_1_code": null,
48
  "iso_3_code": "smg",
49
- "tokenizers": {},
50
  "children": [],
 
51
  "node_i": "3749",
52
- "scripts": [],
53
- "own_tokenizer": false
54
  },
55
  {
56
  "name": "Ura",
57
  "iso_1_code": null,
58
  "iso_3_code": "uro",
59
- "tokenizers": {},
60
  "children": [],
 
61
  "node_i": "3750",
62
- "scripts": [],
63
- "own_tokenizer": false
64
  },
65
  {
66
  "name": "Makolkol",
67
  "iso_1_code": null,
68
  "iso_3_code": "zmh",
69
- "tokenizers": {},
70
  "children": [],
 
71
  "node_i": "3751",
72
- "scripts": [],
73
- "own_tokenizer": false
74
  }
75
  ],
 
76
  "node_i": "3745",
77
- "scripts": [],
78
- "own_tokenizer": false
79
  },
80
  {
81
  "name": "Taulil",
82
  "iso_1_code": null,
83
  "iso_3_code": null,
84
- "tokenizers": {},
85
  "children": [
86
  {
87
  "name": "Tulil",
88
  "iso_1_code": null,
89
  "iso_3_code": "tuh",
90
- "tokenizers": {},
91
  "children": [],
 
92
  "node_i": "3753",
93
- "scripts": [],
94
- "own_tokenizer": false
95
  }
96
  ],
 
97
  "node_i": "3752",
98
- "scripts": [],
99
- "own_tokenizer": false
100
  }
101
  ],
 
102
  "node_i": "3744",
103
- "scripts": [],
104
- "own_tokenizer": false
105
  }
 
2
  "name": "East New Britain",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Baining",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Qaqet",
13
  "iso_1_code": null,
14
  "iso_3_code": "byx",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3746",
18
+ "native_tokenizers": [],
19
  "scripts": [
20
  "Latn"
21
+ ]
 
22
  },
23
  {
24
  "name": "Kairak",
25
  "iso_1_code": null,
26
  "iso_3_code": "ckr",
 
27
  "children": [],
28
+ "tokenizers": {},
29
  "node_i": "3747",
30
+ "native_tokenizers": [],
31
+ "scripts": []
32
  },
33
  {
34
  "name": "Mali",
35
  "iso_1_code": null,
36
  "iso_3_code": "gcc",
 
37
  "children": [],
38
+ "tokenizers": {},
39
  "node_i": "3748",
40
+ "native_tokenizers": [],
41
+ "scripts": []
42
  },
43
  {
44
  "name": "Simbali",
45
  "iso_1_code": null,
46
  "iso_3_code": "smg",
 
47
  "children": [],
48
+ "tokenizers": {},
49
  "node_i": "3749",
50
+ "native_tokenizers": [],
51
+ "scripts": []
52
  },
53
  {
54
  "name": "Ura",
55
  "iso_1_code": null,
56
  "iso_3_code": "uro",
 
57
  "children": [],
58
+ "tokenizers": {},
59
  "node_i": "3750",
60
+ "native_tokenizers": [],
61
+ "scripts": []
62
  },
63
  {
64
  "name": "Makolkol",
65
  "iso_1_code": null,
66
  "iso_3_code": "zmh",
 
67
  "children": [],
68
+ "tokenizers": {},
69
  "node_i": "3751",
70
+ "native_tokenizers": [],
71
+ "scripts": []
72
  }
73
  ],
74
+ "tokenizers": {},
75
  "node_i": "3745",
76
+ "native_tokenizers": [],
77
+ "scripts": []
78
  },
79
  {
80
  "name": "Taulil",
81
  "iso_1_code": null,
82
  "iso_3_code": null,
 
83
  "children": [
84
  {
85
  "name": "Tulil",
86
  "iso_1_code": null,
87
  "iso_3_code": "tuh",
 
88
  "children": [],
89
+ "tokenizers": {},
90
  "node_i": "3753",
91
+ "native_tokenizers": [],
92
+ "scripts": []
93
  }
94
  ],
95
+ "tokenizers": {},
96
  "node_i": "3752",
97
+ "native_tokenizers": [],
98
+ "scripts": []
99
  }
100
  ],
101
+ "tokenizers": {},
102
  "node_i": "3744",
103
+ "native_tokenizers": [],
104
+ "scripts": []
105
  }
data/Eastern Trans-Fly.json CHANGED
@@ -2,54 +2,54 @@
2
  "name": "Eastern Trans-Fly",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Bine",
9
  "iso_1_code": null,
10
  "iso_3_code": "bon",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3755",
 
14
  "scripts": [
15
  "Latn"
16
- ],
17
- "own_tokenizer": false
18
  },
19
  {
20
  "name": "Wipi",
21
  "iso_1_code": null,
22
  "iso_3_code": "gdr",
23
- "tokenizers": {},
24
  "children": [],
 
25
  "node_i": "3756",
 
26
  "scripts": [
27
  "Latn"
28
- ],
29
- "own_tokenizer": false
30
  },
31
  {
32
  "name": "Gizrra",
33
  "iso_1_code": null,
34
  "iso_3_code": "tof",
35
- "tokenizers": {},
36
  "children": [],
 
37
  "node_i": "3757",
38
- "scripts": [],
39
- "own_tokenizer": false
40
  },
41
  {
42
  "name": "Meriam Mir",
43
  "iso_1_code": null,
44
  "iso_3_code": "ulk",
45
- "tokenizers": {},
46
  "children": [],
 
47
  "node_i": "3758",
48
- "scripts": [],
49
- "own_tokenizer": false
50
  }
51
  ],
 
52
  "node_i": "3754",
53
- "scripts": [],
54
- "own_tokenizer": false
55
  }
 
2
  "name": "Eastern Trans-Fly",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Bine",
8
  "iso_1_code": null,
9
  "iso_3_code": "bon",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3755",
13
+ "native_tokenizers": [],
14
  "scripts": [
15
  "Latn"
16
+ ]
 
17
  },
18
  {
19
  "name": "Wipi",
20
  "iso_1_code": null,
21
  "iso_3_code": "gdr",
 
22
  "children": [],
23
+ "tokenizers": {},
24
  "node_i": "3756",
25
+ "native_tokenizers": [],
26
  "scripts": [
27
  "Latn"
28
+ ]
 
29
  },
30
  {
31
  "name": "Gizrra",
32
  "iso_1_code": null,
33
  "iso_3_code": "tof",
 
34
  "children": [],
35
+ "tokenizers": {},
36
  "node_i": "3757",
37
+ "native_tokenizers": [],
38
+ "scripts": []
39
  },
40
  {
41
  "name": "Meriam Mir",
42
  "iso_1_code": null,
43
  "iso_3_code": "ulk",
 
44
  "children": [],
45
+ "tokenizers": {},
46
  "node_i": "3758",
47
+ "native_tokenizers": [],
48
+ "scripts": []
49
  }
50
  ],
51
+ "tokenizers": {},
52
  "node_i": "3754",
53
+ "native_tokenizers": [],
54
+ "scripts": []
55
  }
data/Eskimo-Aleut.json CHANGED
@@ -2,189 +2,189 @@
2
  "name": "Eskimo-Aleut",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Aleut",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Aleut",
15
  "iso_1_code": null,
16
  "iso_3_code": "ale",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "3761",
20
- "scripts": [],
21
- "own_tokenizer": false
22
  }
23
  ],
 
24
  "node_i": "3760",
25
- "scripts": [],
26
- "own_tokenizer": false
27
  },
28
  {
29
  "name": "Eskimo",
30
  "iso_1_code": null,
31
  "iso_3_code": null,
32
- "tokenizers": {},
33
  "children": [
34
  {
35
  "name": "Inuit-Inupiaq",
36
  "iso_1_code": null,
37
  "iso_3_code": null,
38
- "tokenizers": {},
39
  "children": [
40
  {
41
  "name": "Inupiatun, North Alaskan",
42
  "iso_1_code": "ik",
43
  "iso_3_code": "esi",
44
- "tokenizers": {},
45
  "children": [],
 
46
  "node_i": "3764",
 
47
  "scripts": [
48
  "Latn"
49
- ],
50
- "own_tokenizer": false
51
  },
52
  {
53
  "name": "Inupiatun, Northwest Alaska",
54
  "iso_1_code": "ik",
55
  "iso_3_code": "esk",
56
- "tokenizers": {},
57
  "children": [],
 
58
  "node_i": "3765",
 
59
  "scripts": [
60
  "Latn"
61
- ],
62
- "own_tokenizer": false
63
  },
64
  {
65
  "name": "Inuktitut, Eastern Canadian",
66
  "iso_1_code": "iu",
67
  "iso_3_code": "ike",
68
- "tokenizers": {},
69
  "children": [],
 
70
  "node_i": "3766",
 
71
  "scripts": [
72
  "Cans"
73
- ],
74
- "own_tokenizer": false
75
  },
76
  {
77
  "name": "Inuinnaqtun",
78
  "iso_1_code": "iu",
79
  "iso_3_code": "ikt",
80
- "tokenizers": {},
81
  "children": [],
 
82
  "node_i": "3767",
 
83
  "scripts": [
84
  "Latn"
85
- ],
86
- "own_tokenizer": false
87
  },
88
  {
89
  "name": "Greenlandic",
90
  "iso_1_code": "kl",
91
  "iso_3_code": "kal",
92
- "tokenizers": {},
93
  "children": [],
 
94
  "node_i": "3768",
 
95
  "scripts": [
96
  "Latn"
97
- ],
98
- "own_tokenizer": false
99
  }
100
  ],
 
101
  "node_i": "3763",
102
- "scripts": [],
103
- "own_tokenizer": false
104
  },
105
  {
106
  "name": "Yupik",
107
  "iso_1_code": null,
108
  "iso_3_code": null,
109
- "tokenizers": {},
110
  "children": [
111
  {
112
  "name": "Yupik, Saint Lawrence Island",
113
  "iso_1_code": null,
114
  "iso_3_code": "ess",
115
- "tokenizers": {},
116
  "children": [],
 
117
  "node_i": "3770",
 
118
  "scripts": [
119
  "Latn"
120
- ],
121
- "own_tokenizer": false
122
  },
123
  {
124
  "name": "Yupik, Naukan",
125
  "iso_1_code": null,
126
  "iso_3_code": "ynk",
127
- "tokenizers": {},
128
  "children": [],
 
129
  "node_i": "3771",
130
- "scripts": [],
131
- "own_tokenizer": false
132
  },
133
  {
134
  "name": "Yupik, Sirenik",
135
  "iso_1_code": null,
136
  "iso_3_code": "ysr",
137
- "tokenizers": {},
138
  "children": [],
 
139
  "node_i": "3772",
140
- "scripts": [],
141
- "own_tokenizer": false
142
  },
143
  {
144
  "name": "Alaskan Yupik",
145
  "iso_1_code": null,
146
  "iso_3_code": null,
147
- "tokenizers": {},
148
  "children": [
149
  {
150
  "name": "Yupik, Pacific Gulf",
151
  "iso_1_code": null,
152
  "iso_3_code": "ems",
153
- "tokenizers": {},
154
  "children": [],
 
155
  "node_i": "3774",
156
- "scripts": [],
157
- "own_tokenizer": false
158
  },
159
  {
160
  "name": "Yupik, Central",
161
  "iso_1_code": null,
162
  "iso_3_code": "esu",
163
- "tokenizers": {},
164
  "children": [],
 
165
  "node_i": "3775",
 
166
  "scripts": [
167
  "Latn"
168
- ],
169
- "own_tokenizer": false
170
  }
171
  ],
 
172
  "node_i": "3773",
173
- "scripts": [],
174
- "own_tokenizer": false
175
  }
176
  ],
 
177
  "node_i": "3769",
178
- "scripts": [],
179
- "own_tokenizer": false
180
  }
181
  ],
 
182
  "node_i": "3762",
183
- "scripts": [],
184
- "own_tokenizer": false
185
  }
186
  ],
 
187
  "node_i": "3759",
188
- "scripts": [],
189
- "own_tokenizer": false
190
  }
 
2
  "name": "Eskimo-Aleut",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Aleut",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Aleut",
13
  "iso_1_code": null,
14
  "iso_3_code": "ale",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3761",
18
+ "native_tokenizers": [],
19
+ "scripts": []
20
  }
21
  ],
22
+ "tokenizers": {},
23
  "node_i": "3760",
24
+ "native_tokenizers": [],
25
+ "scripts": []
26
  },
27
  {
28
  "name": "Eskimo",
29
  "iso_1_code": null,
30
  "iso_3_code": null,
 
31
  "children": [
32
  {
33
  "name": "Inuit-Inupiaq",
34
  "iso_1_code": null,
35
  "iso_3_code": null,
 
36
  "children": [
37
  {
38
  "name": "Inupiatun, North Alaskan",
39
  "iso_1_code": "ik",
40
  "iso_3_code": "esi",
 
41
  "children": [],
42
+ "tokenizers": {},
43
  "node_i": "3764",
44
+ "native_tokenizers": [],
45
  "scripts": [
46
  "Latn"
47
+ ]
 
48
  },
49
  {
50
  "name": "Inupiatun, Northwest Alaska",
51
  "iso_1_code": "ik",
52
  "iso_3_code": "esk",
 
53
  "children": [],
54
+ "tokenizers": {},
55
  "node_i": "3765",
56
+ "native_tokenizers": [],
57
  "scripts": [
58
  "Latn"
59
+ ]
 
60
  },
61
  {
62
  "name": "Inuktitut, Eastern Canadian",
63
  "iso_1_code": "iu",
64
  "iso_3_code": "ike",
 
65
  "children": [],
66
+ "tokenizers": {},
67
  "node_i": "3766",
68
+ "native_tokenizers": [],
69
  "scripts": [
70
  "Cans"
71
+ ]
 
72
  },
73
  {
74
  "name": "Inuinnaqtun",
75
  "iso_1_code": "iu",
76
  "iso_3_code": "ikt",
 
77
  "children": [],
78
+ "tokenizers": {},
79
  "node_i": "3767",
80
+ "native_tokenizers": [],
81
  "scripts": [
82
  "Latn"
83
+ ]
 
84
  },
85
  {
86
  "name": "Greenlandic",
87
  "iso_1_code": "kl",
88
  "iso_3_code": "kal",
 
89
  "children": [],
90
+ "tokenizers": {},
91
  "node_i": "3768",
92
+ "native_tokenizers": [],
93
  "scripts": [
94
  "Latn"
95
+ ]
 
96
  }
97
  ],
98
+ "tokenizers": {},
99
  "node_i": "3763",
100
+ "native_tokenizers": [],
101
+ "scripts": []
102
  },
103
  {
104
  "name": "Yupik",
105
  "iso_1_code": null,
106
  "iso_3_code": null,
 
107
  "children": [
108
  {
109
  "name": "Yupik, Saint Lawrence Island",
110
  "iso_1_code": null,
111
  "iso_3_code": "ess",
 
112
  "children": [],
113
+ "tokenizers": {},
114
  "node_i": "3770",
115
+ "native_tokenizers": [],
116
  "scripts": [
117
  "Latn"
118
+ ]
 
119
  },
120
  {
121
  "name": "Yupik, Naukan",
122
  "iso_1_code": null,
123
  "iso_3_code": "ynk",
 
124
  "children": [],
125
+ "tokenizers": {},
126
  "node_i": "3771",
127
+ "native_tokenizers": [],
128
+ "scripts": []
129
  },
130
  {
131
  "name": "Yupik, Sirenik",
132
  "iso_1_code": null,
133
  "iso_3_code": "ysr",
 
134
  "children": [],
135
+ "tokenizers": {},
136
  "node_i": "3772",
137
+ "native_tokenizers": [],
138
+ "scripts": []
139
  },
140
  {
141
  "name": "Alaskan Yupik",
142
  "iso_1_code": null,
143
  "iso_3_code": null,
 
144
  "children": [
145
  {
146
  "name": "Yupik, Pacific Gulf",
147
  "iso_1_code": null,
148
  "iso_3_code": "ems",
 
149
  "children": [],
150
+ "tokenizers": {},
151
  "node_i": "3774",
152
+ "native_tokenizers": [],
153
+ "scripts": []
154
  },
155
  {
156
  "name": "Yupik, Central",
157
  "iso_1_code": null,
158
  "iso_3_code": "esu",
 
159
  "children": [],
160
+ "tokenizers": {},
161
  "node_i": "3775",
162
+ "native_tokenizers": [],
163
  "scripts": [
164
  "Latn"
165
+ ]
 
166
  }
167
  ],
168
+ "tokenizers": {},
169
  "node_i": "3773",
170
+ "native_tokenizers": [],
171
+ "scripts": []
172
  }
173
  ],
174
+ "tokenizers": {},
175
  "node_i": "3769",
176
+ "native_tokenizers": [],
177
+ "scripts": []
178
  }
179
  ],
180
+ "tokenizers": {},
181
  "node_i": "3762",
182
+ "native_tokenizers": [],
183
+ "scripts": []
184
  }
185
  ],
186
+ "tokenizers": {},
187
  "node_i": "3759",
188
+ "native_tokenizers": [],
189
+ "scripts": []
190
  }
data/Eyak-Athabaskan.json CHANGED
@@ -2,648 +2,648 @@
2
  "name": "Eyak-Athabaskan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Eyak",
9
  "iso_1_code": null,
10
  "iso_3_code": "eya",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3777",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Athabaskan",
19
  "iso_1_code": null,
20
  "iso_3_code": null,
21
- "tokenizers": {},
22
  "children": [
23
  {
24
  "name": "Apachean",
25
  "iso_1_code": null,
26
  "iso_3_code": null,
27
- "tokenizers": {},
28
  "children": [
29
  {
30
  "name": "Navajo",
31
  "iso_1_code": "nv",
32
  "iso_3_code": "nav",
33
- "tokenizers": {},
34
  "children": [],
 
35
  "node_i": "3780",
 
36
  "scripts": [
37
  "Latn"
38
- ],
39
- "own_tokenizer": false
40
  },
41
  {
42
  "name": "Apache",
43
  "iso_1_code": null,
44
  "iso_3_code": null,
45
- "tokenizers": {},
46
  "children": [
47
  {
48
  "name": "Apache, Jicarilla",
49
  "iso_1_code": null,
50
  "iso_3_code": "apj",
51
- "tokenizers": {},
52
  "children": [],
 
53
  "node_i": "3782",
54
- "scripts": [],
55
- "own_tokenizer": false
56
  },
57
  {
58
  "name": "Apache, Kiowa",
59
  "iso_1_code": null,
60
  "iso_3_code": "apk",
61
- "tokenizers": {},
62
  "children": [],
 
63
  "node_i": "3783",
64
- "scripts": [],
65
- "own_tokenizer": false
66
  },
67
  {
68
  "name": "Apache, Lipan",
69
  "iso_1_code": null,
70
  "iso_3_code": "apl",
71
- "tokenizers": {},
72
  "children": [],
 
73
  "node_i": "3784",
74
- "scripts": [],
75
- "own_tokenizer": false
76
  },
77
  {
78
  "name": "Apache, Mescalero-Chiricahua",
79
  "iso_1_code": null,
80
  "iso_3_code": "apm",
81
- "tokenizers": {},
82
  "children": [],
 
83
  "node_i": "3785",
84
- "scripts": [],
85
- "own_tokenizer": false
86
  },
87
  {
88
  "name": "Apache, Western",
89
  "iso_1_code": null,
90
  "iso_3_code": "apw",
91
- "tokenizers": {},
92
  "children": [],
 
93
  "node_i": "3786",
 
94
  "scripts": [
95
  "Latn"
96
- ],
97
- "own_tokenizer": false
98
  }
99
  ],
 
100
  "node_i": "3781",
101
- "scripts": [],
102
- "own_tokenizer": false
103
  }
104
  ],
 
105
  "node_i": "3779",
106
- "scripts": [],
107
- "own_tokenizer": false
108
  },
109
  {
110
  "name": "Northern Athabaskan",
111
  "iso_1_code": null,
112
  "iso_3_code": null,
113
- "tokenizers": {},
114
  "children": [
115
  {
116
  "name": "Ahtena",
117
  "iso_1_code": null,
118
  "iso_3_code": "aht",
119
- "tokenizers": {},
120
  "children": [],
 
121
  "node_i": "3788",
122
- "scripts": [],
123
- "own_tokenizer": false
124
  },
125
  {
126
  "name": "Babine",
127
  "iso_1_code": null,
128
  "iso_3_code": "bcr",
129
- "tokenizers": {},
130
  "children": [],
 
131
  "node_i": "3789",
132
- "scripts": [],
133
- "own_tokenizer": false
134
  },
135
  {
136
  "name": "Beaver",
137
  "iso_1_code": null,
138
  "iso_3_code": "bea",
139
- "tokenizers": {},
140
  "children": [],
 
141
  "node_i": "3790",
 
142
  "scripts": [
143
  "Latn"
144
- ],
145
- "own_tokenizer": false
146
  },
147
  {
148
  "name": "Dene",
149
  "iso_1_code": null,
150
  "iso_3_code": "chp",
151
- "tokenizers": {},
152
  "children": [],
 
153
  "node_i": "3791",
154
- "scripts": [],
155
- "own_tokenizer": false
156
  },
157
  {
158
  "name": "Chilcotin",
159
  "iso_1_code": null,
160
  "iso_3_code": "clc",
161
- "tokenizers": {},
162
  "children": [],
 
163
  "node_i": "3792",
164
- "scripts": [],
165
- "own_tokenizer": false
166
  },
167
  {
168
  "name": "Tlicho",
169
  "iso_1_code": null,
170
  "iso_3_code": "dgr",
171
- "tokenizers": {},
172
  "children": [],
 
173
  "node_i": "3793",
 
174
  "scripts": [
175
  "Latn"
176
- ],
177
- "own_tokenizer": false
178
  },
179
  {
180
  "name": "Gwich\u2019in",
181
  "iso_1_code": null,
182
  "iso_3_code": "gwi",
183
- "tokenizers": {},
184
  "children": [],
 
185
  "node_i": "3794",
 
186
  "scripts": [
187
  "Latn"
188
- ],
189
- "own_tokenizer": false
190
  },
191
  {
192
  "name": "Han",
193
  "iso_1_code": null,
194
  "iso_3_code": "haa",
195
- "tokenizers": {},
196
  "children": [],
 
197
  "node_i": "3795",
198
- "scripts": [],
199
- "own_tokenizer": false
200
  },
201
  {
202
  "name": "Holikachuk",
203
  "iso_1_code": null,
204
  "iso_3_code": "hoi",
205
- "tokenizers": {},
206
  "children": [],
 
207
  "node_i": "3796",
208
- "scripts": [],
209
- "own_tokenizer": false
210
  },
211
  {
212
  "name": "Deg Xinag",
213
  "iso_1_code": null,
214
  "iso_3_code": "ing",
215
- "tokenizers": {},
216
  "children": [],
 
217
  "node_i": "3797",
218
- "scripts": [],
219
- "own_tokenizer": false
220
  },
221
  {
222
  "name": "Koyukon",
223
  "iso_1_code": null,
224
  "iso_3_code": "koy",
225
- "tokenizers": {},
226
  "children": [],
 
227
  "node_i": "3798",
228
- "scripts": [],
229
- "own_tokenizer": false
230
  },
231
  {
232
  "name": "Kuskokwim, Upper",
233
  "iso_1_code": null,
234
  "iso_3_code": "kuu",
235
- "tokenizers": {},
236
  "children": [],
 
237
  "node_i": "3799",
238
- "scripts": [],
239
- "own_tokenizer": false
240
  },
241
  {
242
  "name": "Sekani",
243
  "iso_1_code": null,
244
  "iso_3_code": "sek",
245
- "tokenizers": {},
246
  "children": [],
 
247
  "node_i": "3800",
248
- "scripts": [],
249
- "own_tokenizer": false
250
  },
251
  {
252
  "name": "Sarsi",
253
  "iso_1_code": null,
254
  "iso_3_code": "srs",
255
- "tokenizers": {},
256
  "children": [],
 
257
  "node_i": "3801",
258
- "scripts": [],
259
- "own_tokenizer": false
260
  },
261
  {
262
  "name": "Tanana, Lower",
263
  "iso_1_code": null,
264
  "iso_3_code": "taa",
265
- "tokenizers": {},
266
  "children": [],
 
267
  "node_i": "3802",
268
- "scripts": [],
269
- "own_tokenizer": false
270
  },
271
  {
272
  "name": "Tanana, Upper",
273
  "iso_1_code": null,
274
  "iso_3_code": "tau",
275
- "tokenizers": {},
276
  "children": [],
 
277
  "node_i": "3803",
278
- "scripts": [],
279
- "own_tokenizer": false
280
  },
281
  {
282
  "name": "Tanacross",
283
  "iso_1_code": null,
284
  "iso_3_code": "tcb",
285
- "tokenizers": {},
286
  "children": [],
 
287
  "node_i": "3804",
288
- "scripts": [],
289
- "own_tokenizer": false
290
  },
291
  {
292
  "name": "Tanaina",
293
  "iso_1_code": null,
294
  "iso_3_code": "tfn",
295
- "tokenizers": {},
296
  "children": [],
 
297
  "node_i": "3805",
298
- "scripts": [],
299
- "own_tokenizer": false
300
  },
301
  {
302
  "name": "Tsetsaut",
303
  "iso_1_code": null,
304
  "iso_3_code": "txc",
305
- "tokenizers": {},
306
  "children": [],
 
307
  "node_i": "3806",
308
- "scripts": [],
309
- "own_tokenizer": false
310
  },
311
  {
312
  "name": "Carrier",
313
  "iso_1_code": null,
314
  "iso_3_code": null,
315
- "tokenizers": {},
316
  "children": [
317
  {
318
  "name": "Carrier, Southern",
319
  "iso_1_code": null,
320
  "iso_3_code": "caf",
321
- "tokenizers": {},
322
  "children": [],
 
323
  "node_i": "3808",
 
324
  "scripts": [
325
  "Latn"
326
- ],
327
- "own_tokenizer": false
328
  },
329
  {
330
  "name": "Carrier",
331
  "iso_1_code": null,
332
  "iso_3_code": "crx",
333
- "tokenizers": {},
334
  "children": [],
 
335
  "node_i": "3809",
 
336
  "scripts": [
337
  "Latn"
338
- ],
339
- "own_tokenizer": false
340
  }
341
  ],
 
342
  "node_i": "3807",
343
- "scripts": [],
344
- "own_tokenizer": false
345
  },
346
  {
347
  "name": "Slavey-Hare",
348
  "iso_1_code": null,
349
  "iso_3_code": null,
350
- "tokenizers": {},
351
  "children": [
352
  {
353
  "name": "Slavey, North",
354
  "iso_1_code": null,
355
  "iso_3_code": "scs",
356
- "tokenizers": {},
357
  "children": [],
 
358
  "node_i": "3811",
359
- "scripts": [],
360
- "own_tokenizer": false
361
  },
362
  {
363
  "name": "Slavey, South",
364
  "iso_1_code": null,
365
  "iso_3_code": "xsl",
366
- "tokenizers": {},
367
  "children": [],
 
368
  "node_i": "3812",
369
- "scripts": [],
370
- "own_tokenizer": false
371
  }
372
  ],
 
373
  "node_i": "3810",
374
- "scripts": [],
375
- "own_tokenizer": false
376
  },
377
  {
378
  "name": "Tahltan",
379
  "iso_1_code": null,
380
  "iso_3_code": null,
381
- "tokenizers": {},
382
  "children": [
383
  {
384
  "name": "Kaska",
385
  "iso_1_code": null,
386
  "iso_3_code": "kkz",
387
- "tokenizers": {},
388
  "children": [],
 
389
  "node_i": "3814",
390
- "scripts": [],
391
- "own_tokenizer": false
392
  },
393
  {
394
  "name": "Tagish",
395
  "iso_1_code": null,
396
  "iso_3_code": "tgx",
397
- "tokenizers": {},
398
  "children": [],
 
399
  "node_i": "3815",
400
- "scripts": [],
401
- "own_tokenizer": false
402
  },
403
  {
404
  "name": "Tahltan",
405
  "iso_1_code": null,
406
  "iso_3_code": "tht",
407
- "tokenizers": {},
408
  "children": [],
 
409
  "node_i": "3816",
410
- "scripts": [],
411
- "own_tokenizer": false
412
  }
413
  ],
 
414
  "node_i": "3813",
415
- "scripts": [],
416
- "own_tokenizer": false
417
  },
418
  {
419
  "name": "Tuchone",
420
  "iso_1_code": null,
421
  "iso_3_code": null,
422
- "tokenizers": {},
423
  "children": [
424
  {
425
  "name": "Tutchone, Southern",
426
  "iso_1_code": null,
427
  "iso_3_code": "tce",
428
- "tokenizers": {},
429
  "children": [],
 
430
  "node_i": "3818",
431
- "scripts": [],
432
- "own_tokenizer": false
433
  },
434
  {
435
  "name": "Tutchone, Northern",
436
  "iso_1_code": null,
437
  "iso_3_code": "ttm",
438
- "tokenizers": {},
439
  "children": [],
 
440
  "node_i": "3819",
441
- "scripts": [],
442
- "own_tokenizer": false
443
  }
444
  ],
 
445
  "node_i": "3817",
446
- "scripts": [],
447
- "own_tokenizer": false
448
  }
449
  ],
 
450
  "node_i": "3787",
451
- "scripts": [],
452
- "own_tokenizer": false
453
  },
454
  {
455
  "name": "Pacific Coast Athabaskan",
456
  "iso_1_code": null,
457
  "iso_3_code": null,
458
- "tokenizers": {},
459
  "children": [
460
  {
461
  "name": "Kwalhioqua-Tlatskanai",
462
  "iso_1_code": null,
463
  "iso_3_code": "qwt",
464
- "tokenizers": {},
465
  "children": [],
 
466
  "node_i": "3821",
467
- "scripts": [],
468
- "own_tokenizer": false
469
  },
470
  {
471
  "name": "California Athabaskan",
472
  "iso_1_code": null,
473
  "iso_3_code": null,
474
- "tokenizers": {},
475
  "children": [
476
  {
477
  "name": "Hupa",
478
  "iso_1_code": null,
479
  "iso_3_code": "hup",
480
- "tokenizers": {},
481
  "children": [],
 
482
  "node_i": "3823",
483
- "scripts": [],
484
- "own_tokenizer": false
485
  },
486
  {
487
  "name": "Kato",
488
  "iso_1_code": null,
489
  "iso_3_code": "ktw",
490
- "tokenizers": {},
491
  "children": [],
 
492
  "node_i": "3824",
493
- "scripts": [],
494
- "own_tokenizer": false
495
  },
496
  {
497
  "name": "Mattole",
498
  "iso_1_code": null,
499
  "iso_3_code": "mvb",
500
- "tokenizers": {},
501
  "children": [],
 
502
  "node_i": "3825",
503
- "scripts": [],
504
- "own_tokenizer": false
505
  },
506
  {
507
  "name": "Wailaki",
508
  "iso_1_code": null,
509
  "iso_3_code": "wlk",
510
- "tokenizers": {},
511
  "children": [],
 
512
  "node_i": "3826",
513
- "scripts": [],
514
- "own_tokenizer": false
515
  }
516
  ],
 
517
  "node_i": "3822",
518
- "scripts": [],
519
- "own_tokenizer": false
520
  },
521
  {
522
  "name": "Oregon Athabaskan",
523
  "iso_1_code": null,
524
  "iso_3_code": null,
525
- "tokenizers": {},
526
  "children": [
527
  {
528
  "name": "Galice",
529
  "iso_1_code": null,
530
  "iso_3_code": "gce",
531
- "tokenizers": {},
532
  "children": [],
 
533
  "node_i": "3828",
534
- "scripts": [],
535
- "own_tokenizer": false
536
  },
537
  {
538
  "name": "Upper Umpqua",
539
  "iso_1_code": null,
540
  "iso_3_code": "xup",
541
- "tokenizers": {},
542
  "children": [],
 
543
  "node_i": "3829",
544
- "scripts": [],
545
- "own_tokenizer": false
546
  },
547
  {
548
  "name": "Tolowa-Chetco",
549
  "iso_1_code": null,
550
  "iso_3_code": null,
551
- "tokenizers": {},
552
  "children": [
553
  {
554
  "name": "Chetco",
555
  "iso_1_code": null,
556
  "iso_3_code": "ctc",
557
- "tokenizers": {},
558
  "children": [],
 
559
  "node_i": "3831",
560
- "scripts": [],
561
- "own_tokenizer": false
562
  },
563
  {
564
  "name": "Tolowa",
565
  "iso_1_code": null,
566
  "iso_3_code": "tol",
567
- "tokenizers": {},
568
  "children": [],
 
569
  "node_i": "3832",
570
- "scripts": [],
571
- "own_tokenizer": false
572
  }
573
  ],
 
574
  "node_i": "3830",
575
- "scripts": [],
576
- "own_tokenizer": false
577
  },
578
  {
579
  "name": "Tututni-Chasta Costa-Coquille",
580
  "iso_1_code": null,
581
  "iso_3_code": null,
582
- "tokenizers": {},
583
  "children": [
584
  {
585
  "name": "Coquille",
586
  "iso_1_code": null,
587
  "iso_3_code": "coq",
588
- "tokenizers": {},
589
  "children": [],
 
590
  "node_i": "3834",
591
- "scripts": [],
592
- "own_tokenizer": false
593
  },
594
  {
595
  "name": "Tututni",
596
  "iso_1_code": null,
597
  "iso_3_code": "tuu",
598
- "tokenizers": {},
599
  "children": [],
 
600
  "node_i": "3835",
601
- "scripts": [],
602
- "own_tokenizer": false
603
  }
604
  ],
 
605
  "node_i": "3833",
606
- "scripts": [],
607
- "own_tokenizer": false
608
  }
609
  ],
 
610
  "node_i": "3827",
611
- "scripts": [],
612
- "own_tokenizer": false
613
  }
614
  ],
 
615
  "node_i": "3820",
616
- "scripts": [],
617
- "own_tokenizer": false
618
  }
619
  ],
 
620
  "node_i": "3778",
621
- "scripts": [],
622
- "own_tokenizer": false
623
  },
624
  {
625
  "name": "Tlingit",
626
  "iso_1_code": null,
627
  "iso_3_code": null,
628
- "tokenizers": {},
629
  "children": [
630
  {
631
  "name": "Tlingit",
632
  "iso_1_code": null,
633
  "iso_3_code": "tli",
634
- "tokenizers": {},
635
  "children": [],
 
636
  "node_i": "3837",
637
- "scripts": [],
638
- "own_tokenizer": false
639
  }
640
  ],
 
641
  "node_i": "3836",
642
- "scripts": [],
643
- "own_tokenizer": false
644
  }
645
  ],
 
646
  "node_i": "3776",
647
- "scripts": [],
648
- "own_tokenizer": false
649
  }
 
2
  "name": "Eyak-Athabaskan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Eyak",
8
  "iso_1_code": null,
9
  "iso_3_code": "eya",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3777",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Athabaskan",
18
  "iso_1_code": null,
19
  "iso_3_code": null,
 
20
  "children": [
21
  {
22
  "name": "Apachean",
23
  "iso_1_code": null,
24
  "iso_3_code": null,
 
25
  "children": [
26
  {
27
  "name": "Navajo",
28
  "iso_1_code": "nv",
29
  "iso_3_code": "nav",
 
30
  "children": [],
31
+ "tokenizers": {},
32
  "node_i": "3780",
33
+ "native_tokenizers": [],
34
  "scripts": [
35
  "Latn"
36
+ ]
 
37
  },
38
  {
39
  "name": "Apache",
40
  "iso_1_code": null,
41
  "iso_3_code": null,
 
42
  "children": [
43
  {
44
  "name": "Apache, Jicarilla",
45
  "iso_1_code": null,
46
  "iso_3_code": "apj",
 
47
  "children": [],
48
+ "tokenizers": {},
49
  "node_i": "3782",
50
+ "native_tokenizers": [],
51
+ "scripts": []
52
  },
53
  {
54
  "name": "Apache, Kiowa",
55
  "iso_1_code": null,
56
  "iso_3_code": "apk",
 
57
  "children": [],
58
+ "tokenizers": {},
59
  "node_i": "3783",
60
+ "native_tokenizers": [],
61
+ "scripts": []
62
  },
63
  {
64
  "name": "Apache, Lipan",
65
  "iso_1_code": null,
66
  "iso_3_code": "apl",
 
67
  "children": [],
68
+ "tokenizers": {},
69
  "node_i": "3784",
70
+ "native_tokenizers": [],
71
+ "scripts": []
72
  },
73
  {
74
  "name": "Apache, Mescalero-Chiricahua",
75
  "iso_1_code": null,
76
  "iso_3_code": "apm",
 
77
  "children": [],
78
+ "tokenizers": {},
79
  "node_i": "3785",
80
+ "native_tokenizers": [],
81
+ "scripts": []
82
  },
83
  {
84
  "name": "Apache, Western",
85
  "iso_1_code": null,
86
  "iso_3_code": "apw",
 
87
  "children": [],
88
+ "tokenizers": {},
89
  "node_i": "3786",
90
+ "native_tokenizers": [],
91
  "scripts": [
92
  "Latn"
93
+ ]
 
94
  }
95
  ],
96
+ "tokenizers": {},
97
  "node_i": "3781",
98
+ "native_tokenizers": [],
99
+ "scripts": []
100
  }
101
  ],
102
+ "tokenizers": {},
103
  "node_i": "3779",
104
+ "native_tokenizers": [],
105
+ "scripts": []
106
  },
107
  {
108
  "name": "Northern Athabaskan",
109
  "iso_1_code": null,
110
  "iso_3_code": null,
 
111
  "children": [
112
  {
113
  "name": "Ahtena",
114
  "iso_1_code": null,
115
  "iso_3_code": "aht",
 
116
  "children": [],
117
+ "tokenizers": {},
118
  "node_i": "3788",
119
+ "native_tokenizers": [],
120
+ "scripts": []
121
  },
122
  {
123
  "name": "Babine",
124
  "iso_1_code": null,
125
  "iso_3_code": "bcr",
 
126
  "children": [],
127
+ "tokenizers": {},
128
  "node_i": "3789",
129
+ "native_tokenizers": [],
130
+ "scripts": []
131
  },
132
  {
133
  "name": "Beaver",
134
  "iso_1_code": null,
135
  "iso_3_code": "bea",
 
136
  "children": [],
137
+ "tokenizers": {},
138
  "node_i": "3790",
139
+ "native_tokenizers": [],
140
  "scripts": [
141
  "Latn"
142
+ ]
 
143
  },
144
  {
145
  "name": "Dene",
146
  "iso_1_code": null,
147
  "iso_3_code": "chp",
 
148
  "children": [],
149
+ "tokenizers": {},
150
  "node_i": "3791",
151
+ "native_tokenizers": [],
152
+ "scripts": []
153
  },
154
  {
155
  "name": "Chilcotin",
156
  "iso_1_code": null,
157
  "iso_3_code": "clc",
 
158
  "children": [],
159
+ "tokenizers": {},
160
  "node_i": "3792",
161
+ "native_tokenizers": [],
162
+ "scripts": []
163
  },
164
  {
165
  "name": "Tlicho",
166
  "iso_1_code": null,
167
  "iso_3_code": "dgr",
 
168
  "children": [],
169
+ "tokenizers": {},
170
  "node_i": "3793",
171
+ "native_tokenizers": [],
172
  "scripts": [
173
  "Latn"
174
+ ]
 
175
  },
176
  {
177
  "name": "Gwich\u2019in",
178
  "iso_1_code": null,
179
  "iso_3_code": "gwi",
 
180
  "children": [],
181
+ "tokenizers": {},
182
  "node_i": "3794",
183
+ "native_tokenizers": [],
184
  "scripts": [
185
  "Latn"
186
+ ]
 
187
  },
188
  {
189
  "name": "Han",
190
  "iso_1_code": null,
191
  "iso_3_code": "haa",
 
192
  "children": [],
193
+ "tokenizers": {},
194
  "node_i": "3795",
195
+ "native_tokenizers": [],
196
+ "scripts": []
197
  },
198
  {
199
  "name": "Holikachuk",
200
  "iso_1_code": null,
201
  "iso_3_code": "hoi",
 
202
  "children": [],
203
+ "tokenizers": {},
204
  "node_i": "3796",
205
+ "native_tokenizers": [],
206
+ "scripts": []
207
  },
208
  {
209
  "name": "Deg Xinag",
210
  "iso_1_code": null,
211
  "iso_3_code": "ing",
 
212
  "children": [],
213
+ "tokenizers": {},
214
  "node_i": "3797",
215
+ "native_tokenizers": [],
216
+ "scripts": []
217
  },
218
  {
219
  "name": "Koyukon",
220
  "iso_1_code": null,
221
  "iso_3_code": "koy",
 
222
  "children": [],
223
+ "tokenizers": {},
224
  "node_i": "3798",
225
+ "native_tokenizers": [],
226
+ "scripts": []
227
  },
228
  {
229
  "name": "Kuskokwim, Upper",
230
  "iso_1_code": null,
231
  "iso_3_code": "kuu",
 
232
  "children": [],
233
+ "tokenizers": {},
234
  "node_i": "3799",
235
+ "native_tokenizers": [],
236
+ "scripts": []
237
  },
238
  {
239
  "name": "Sekani",
240
  "iso_1_code": null,
241
  "iso_3_code": "sek",
 
242
  "children": [],
243
+ "tokenizers": {},
244
  "node_i": "3800",
245
+ "native_tokenizers": [],
246
+ "scripts": []
247
  },
248
  {
249
  "name": "Sarsi",
250
  "iso_1_code": null,
251
  "iso_3_code": "srs",
 
252
  "children": [],
253
+ "tokenizers": {},
254
  "node_i": "3801",
255
+ "native_tokenizers": [],
256
+ "scripts": []
257
  },
258
  {
259
  "name": "Tanana, Lower",
260
  "iso_1_code": null,
261
  "iso_3_code": "taa",
 
262
  "children": [],
263
+ "tokenizers": {},
264
  "node_i": "3802",
265
+ "native_tokenizers": [],
266
+ "scripts": []
267
  },
268
  {
269
  "name": "Tanana, Upper",
270
  "iso_1_code": null,
271
  "iso_3_code": "tau",
 
272
  "children": [],
273
+ "tokenizers": {},
274
  "node_i": "3803",
275
+ "native_tokenizers": [],
276
+ "scripts": []
277
  },
278
  {
279
  "name": "Tanacross",
280
  "iso_1_code": null,
281
  "iso_3_code": "tcb",
 
282
  "children": [],
283
+ "tokenizers": {},
284
  "node_i": "3804",
285
+ "native_tokenizers": [],
286
+ "scripts": []
287
  },
288
  {
289
  "name": "Tanaina",
290
  "iso_1_code": null,
291
  "iso_3_code": "tfn",
 
292
  "children": [],
293
+ "tokenizers": {},
294
  "node_i": "3805",
295
+ "native_tokenizers": [],
296
+ "scripts": []
297
  },
298
  {
299
  "name": "Tsetsaut",
300
  "iso_1_code": null,
301
  "iso_3_code": "txc",
 
302
  "children": [],
303
+ "tokenizers": {},
304
  "node_i": "3806",
305
+ "native_tokenizers": [],
306
+ "scripts": []
307
  },
308
  {
309
  "name": "Carrier",
310
  "iso_1_code": null,
311
  "iso_3_code": null,
 
312
  "children": [
313
  {
314
  "name": "Carrier, Southern",
315
  "iso_1_code": null,
316
  "iso_3_code": "caf",
 
317
  "children": [],
318
+ "tokenizers": {},
319
  "node_i": "3808",
320
+ "native_tokenizers": [],
321
  "scripts": [
322
  "Latn"
323
+ ]
 
324
  },
325
  {
326
  "name": "Carrier",
327
  "iso_1_code": null,
328
  "iso_3_code": "crx",
 
329
  "children": [],
330
+ "tokenizers": {},
331
  "node_i": "3809",
332
+ "native_tokenizers": [],
333
  "scripts": [
334
  "Latn"
335
+ ]
 
336
  }
337
  ],
338
+ "tokenizers": {},
339
  "node_i": "3807",
340
+ "native_tokenizers": [],
341
+ "scripts": []
342
  },
343
  {
344
  "name": "Slavey-Hare",
345
  "iso_1_code": null,
346
  "iso_3_code": null,
 
347
  "children": [
348
  {
349
  "name": "Slavey, North",
350
  "iso_1_code": null,
351
  "iso_3_code": "scs",
 
352
  "children": [],
353
+ "tokenizers": {},
354
  "node_i": "3811",
355
+ "native_tokenizers": [],
356
+ "scripts": []
357
  },
358
  {
359
  "name": "Slavey, South",
360
  "iso_1_code": null,
361
  "iso_3_code": "xsl",
 
362
  "children": [],
363
+ "tokenizers": {},
364
  "node_i": "3812",
365
+ "native_tokenizers": [],
366
+ "scripts": []
367
  }
368
  ],
369
+ "tokenizers": {},
370
  "node_i": "3810",
371
+ "native_tokenizers": [],
372
+ "scripts": []
373
  },
374
  {
375
  "name": "Tahltan",
376
  "iso_1_code": null,
377
  "iso_3_code": null,
 
378
  "children": [
379
  {
380
  "name": "Kaska",
381
  "iso_1_code": null,
382
  "iso_3_code": "kkz",
 
383
  "children": [],
384
+ "tokenizers": {},
385
  "node_i": "3814",
386
+ "native_tokenizers": [],
387
+ "scripts": []
388
  },
389
  {
390
  "name": "Tagish",
391
  "iso_1_code": null,
392
  "iso_3_code": "tgx",
 
393
  "children": [],
394
+ "tokenizers": {},
395
  "node_i": "3815",
396
+ "native_tokenizers": [],
397
+ "scripts": []
398
  },
399
  {
400
  "name": "Tahltan",
401
  "iso_1_code": null,
402
  "iso_3_code": "tht",
 
403
  "children": [],
404
+ "tokenizers": {},
405
  "node_i": "3816",
406
+ "native_tokenizers": [],
407
+ "scripts": []
408
  }
409
  ],
410
+ "tokenizers": {},
411
  "node_i": "3813",
412
+ "native_tokenizers": [],
413
+ "scripts": []
414
  },
415
  {
416
  "name": "Tuchone",
417
  "iso_1_code": null,
418
  "iso_3_code": null,
 
419
  "children": [
420
  {
421
  "name": "Tutchone, Southern",
422
  "iso_1_code": null,
423
  "iso_3_code": "tce",
 
424
  "children": [],
425
+ "tokenizers": {},
426
  "node_i": "3818",
427
+ "native_tokenizers": [],
428
+ "scripts": []
429
  },
430
  {
431
  "name": "Tutchone, Northern",
432
  "iso_1_code": null,
433
  "iso_3_code": "ttm",
 
434
  "children": [],
435
+ "tokenizers": {},
436
  "node_i": "3819",
437
+ "native_tokenizers": [],
438
+ "scripts": []
439
  }
440
  ],
441
+ "tokenizers": {},
442
  "node_i": "3817",
443
+ "native_tokenizers": [],
444
+ "scripts": []
445
  }
446
  ],
447
+ "tokenizers": {},
448
  "node_i": "3787",
449
+ "native_tokenizers": [],
450
+ "scripts": []
451
  },
452
  {
453
  "name": "Pacific Coast Athabaskan",
454
  "iso_1_code": null,
455
  "iso_3_code": null,
 
456
  "children": [
457
  {
458
  "name": "Kwalhioqua-Tlatskanai",
459
  "iso_1_code": null,
460
  "iso_3_code": "qwt",
 
461
  "children": [],
462
+ "tokenizers": {},
463
  "node_i": "3821",
464
+ "native_tokenizers": [],
465
+ "scripts": []
466
  },
467
  {
468
  "name": "California Athabaskan",
469
  "iso_1_code": null,
470
  "iso_3_code": null,
 
471
  "children": [
472
  {
473
  "name": "Hupa",
474
  "iso_1_code": null,
475
  "iso_3_code": "hup",
 
476
  "children": [],
477
+ "tokenizers": {},
478
  "node_i": "3823",
479
+ "native_tokenizers": [],
480
+ "scripts": []
481
  },
482
  {
483
  "name": "Kato",
484
  "iso_1_code": null,
485
  "iso_3_code": "ktw",
 
486
  "children": [],
487
+ "tokenizers": {},
488
  "node_i": "3824",
489
+ "native_tokenizers": [],
490
+ "scripts": []
491
  },
492
  {
493
  "name": "Mattole",
494
  "iso_1_code": null,
495
  "iso_3_code": "mvb",
 
496
  "children": [],
497
+ "tokenizers": {},
498
  "node_i": "3825",
499
+ "native_tokenizers": [],
500
+ "scripts": []
501
  },
502
  {
503
  "name": "Wailaki",
504
  "iso_1_code": null,
505
  "iso_3_code": "wlk",
 
506
  "children": [],
507
+ "tokenizers": {},
508
  "node_i": "3826",
509
+ "native_tokenizers": [],
510
+ "scripts": []
511
  }
512
  ],
513
+ "tokenizers": {},
514
  "node_i": "3822",
515
+ "native_tokenizers": [],
516
+ "scripts": []
517
  },
518
  {
519
  "name": "Oregon Athabaskan",
520
  "iso_1_code": null,
521
  "iso_3_code": null,
 
522
  "children": [
523
  {
524
  "name": "Galice",
525
  "iso_1_code": null,
526
  "iso_3_code": "gce",
 
527
  "children": [],
528
+ "tokenizers": {},
529
  "node_i": "3828",
530
+ "native_tokenizers": [],
531
+ "scripts": []
532
  },
533
  {
534
  "name": "Upper Umpqua",
535
  "iso_1_code": null,
536
  "iso_3_code": "xup",
 
537
  "children": [],
538
+ "tokenizers": {},
539
  "node_i": "3829",
540
+ "native_tokenizers": [],
541
+ "scripts": []
542
  },
543
  {
544
  "name": "Tolowa-Chetco",
545
  "iso_1_code": null,
546
  "iso_3_code": null,
 
547
  "children": [
548
  {
549
  "name": "Chetco",
550
  "iso_1_code": null,
551
  "iso_3_code": "ctc",
 
552
  "children": [],
553
+ "tokenizers": {},
554
  "node_i": "3831",
555
+ "native_tokenizers": [],
556
+ "scripts": []
557
  },
558
  {
559
  "name": "Tolowa",
560
  "iso_1_code": null,
561
  "iso_3_code": "tol",
 
562
  "children": [],
563
+ "tokenizers": {},
564
  "node_i": "3832",
565
+ "native_tokenizers": [],
566
+ "scripts": []
567
  }
568
  ],
569
+ "tokenizers": {},
570
  "node_i": "3830",
571
+ "native_tokenizers": [],
572
+ "scripts": []
573
  },
574
  {
575
  "name": "Tututni-Chasta Costa-Coquille",
576
  "iso_1_code": null,
577
  "iso_3_code": null,
 
578
  "children": [
579
  {
580
  "name": "Coquille",
581
  "iso_1_code": null,
582
  "iso_3_code": "coq",
 
583
  "children": [],
584
+ "tokenizers": {},
585
  "node_i": "3834",
586
+ "native_tokenizers": [],
587
+ "scripts": []
588
  },
589
  {
590
  "name": "Tututni",
591
  "iso_1_code": null,
592
  "iso_3_code": "tuu",
 
593
  "children": [],
594
+ "tokenizers": {},
595
  "node_i": "3835",
596
+ "native_tokenizers": [],
597
+ "scripts": []
598
  }
599
  ],
600
+ "tokenizers": {},
601
  "node_i": "3833",
602
+ "native_tokenizers": [],
603
+ "scripts": []
604
  }
605
  ],
606
+ "tokenizers": {},
607
  "node_i": "3827",
608
+ "native_tokenizers": [],
609
+ "scripts": []
610
  }
611
  ],
612
+ "tokenizers": {},
613
  "node_i": "3820",
614
+ "native_tokenizers": [],
615
+ "scripts": []
616
  }
617
  ],
618
+ "tokenizers": {},
619
  "node_i": "3778",
620
+ "native_tokenizers": [],
621
+ "scripts": []
622
  },
623
  {
624
  "name": "Tlingit",
625
  "iso_1_code": null,
626
  "iso_3_code": null,
 
627
  "children": [
628
  {
629
  "name": "Tlingit",
630
  "iso_1_code": null,
631
  "iso_3_code": "tli",
 
632
  "children": [],
633
+ "tokenizers": {},
634
  "node_i": "3837",
635
+ "native_tokenizers": [],
636
+ "scripts": []
637
  }
638
  ],
639
+ "tokenizers": {},
640
  "node_i": "3836",
641
+ "native_tokenizers": [],
642
+ "scripts": []
643
  }
644
  ],
645
+ "tokenizers": {},
646
  "node_i": "3776",
647
+ "native_tokenizers": [],
648
+ "scripts": []
649
  }
data/Fas.json CHANGED
@@ -2,30 +2,30 @@
2
  "name": "Fas",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Baibai",
9
  "iso_1_code": null,
10
  "iso_3_code": "bbf",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3839",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Momu",
19
  "iso_1_code": null,
20
  "iso_3_code": "fqs",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "3840",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  }
27
  ],
 
28
  "node_i": "3838",
29
- "scripts": [],
30
- "own_tokenizer": false
31
  }
 
2
  "name": "Fas",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Baibai",
8
  "iso_1_code": null,
9
  "iso_3_code": "bbf",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3839",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Momu",
18
  "iso_1_code": null,
19
  "iso_3_code": "fqs",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3840",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  }
26
  ],
27
+ "tokenizers": {},
28
  "node_i": "3838",
29
+ "native_tokenizers": [],
30
+ "scripts": []
31
  }
data/Guajiboan.json CHANGED
@@ -2,77 +2,77 @@
2
  "name": "Guajiboan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Cuiba",
9
  "iso_1_code": null,
10
  "iso_3_code": "cui",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3842",
 
14
  "scripts": [
15
  "Latn"
16
- ],
17
- "own_tokenizer": false
18
  },
19
  {
20
  "name": "Guayabero",
21
  "iso_1_code": null,
22
  "iso_3_code": "guo",
23
- "tokenizers": {},
24
  "children": [],
 
25
  "node_i": "3843",
 
26
  "scripts": [
27
  "Latn"
28
- ],
29
- "own_tokenizer": false
30
  },
31
  {
32
  "name": "Guajibo",
33
  "iso_1_code": null,
34
  "iso_3_code": null,
35
- "tokenizers": {},
36
  "children": [
37
  {
38
  "name": "Playero",
39
  "iso_1_code": null,
40
  "iso_3_code": "gob",
41
- "tokenizers": {},
42
  "children": [],
 
43
  "node_i": "3845",
44
- "scripts": [],
45
- "own_tokenizer": false
46
  },
47
  {
48
  "name": "Guahibo",
49
  "iso_1_code": null,
50
  "iso_3_code": "guh",
51
- "tokenizers": {},
52
  "children": [],
 
53
  "node_i": "3846",
 
54
  "scripts": [
55
  "Latn"
56
- ],
57
- "own_tokenizer": false
58
  },
59
  {
60
  "name": "Macagu\u00e1n",
61
  "iso_1_code": null,
62
  "iso_3_code": "mbn",
63
- "tokenizers": {},
64
  "children": [],
 
65
  "node_i": "3847",
66
- "scripts": [],
67
- "own_tokenizer": false
68
  }
69
  ],
 
70
  "node_i": "3844",
71
- "scripts": [],
72
- "own_tokenizer": false
73
  }
74
  ],
 
75
  "node_i": "3841",
76
- "scripts": [],
77
- "own_tokenizer": false
78
  }
 
2
  "name": "Guajiboan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Cuiba",
8
  "iso_1_code": null,
9
  "iso_3_code": "cui",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3842",
13
+ "native_tokenizers": [],
14
  "scripts": [
15
  "Latn"
16
+ ]
 
17
  },
18
  {
19
  "name": "Guayabero",
20
  "iso_1_code": null,
21
  "iso_3_code": "guo",
 
22
  "children": [],
23
+ "tokenizers": {},
24
  "node_i": "3843",
25
+ "native_tokenizers": [],
26
  "scripts": [
27
  "Latn"
28
+ ]
 
29
  },
30
  {
31
  "name": "Guajibo",
32
  "iso_1_code": null,
33
  "iso_3_code": null,
 
34
  "children": [
35
  {
36
  "name": "Playero",
37
  "iso_1_code": null,
38
  "iso_3_code": "gob",
 
39
  "children": [],
40
+ "tokenizers": {},
41
  "node_i": "3845",
42
+ "native_tokenizers": [],
43
+ "scripts": []
44
  },
45
  {
46
  "name": "Guahibo",
47
  "iso_1_code": null,
48
  "iso_3_code": "guh",
 
49
  "children": [],
50
+ "tokenizers": {},
51
  "node_i": "3846",
52
+ "native_tokenizers": [],
53
  "scripts": [
54
  "Latn"
55
+ ]
 
56
  },
57
  {
58
  "name": "Macagu\u00e1n",
59
  "iso_1_code": null,
60
  "iso_3_code": "mbn",
 
61
  "children": [],
62
+ "tokenizers": {},
63
  "node_i": "3847",
64
+ "native_tokenizers": [],
65
+ "scripts": []
66
  }
67
  ],
68
+ "tokenizers": {},
69
  "node_i": "3844",
70
+ "native_tokenizers": [],
71
+ "scripts": []
72
  }
73
  ],
74
+ "tokenizers": {},
75
  "node_i": "3841",
76
+ "native_tokenizers": [],
77
+ "scripts": []
78
  }
data/Guaykuruan.json CHANGED
@@ -2,90 +2,90 @@
2
  "name": "Guaykuruan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Guaykur\u00fa",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Abipon",
15
  "iso_1_code": null,
16
  "iso_3_code": "axb",
17
- "tokenizers": {},
18
  "children": [],
 
19
  "node_i": "3850",
20
- "scripts": [],
21
- "own_tokenizer": false
22
  },
23
  {
24
  "name": "Kadiw\u00e9u",
25
  "iso_1_code": null,
26
  "iso_3_code": "kbc",
27
- "tokenizers": {},
28
  "children": [],
 
29
  "node_i": "3851",
 
30
  "scripts": [
31
  "Latn"
32
- ],
33
- "own_tokenizer": false
34
  }
35
  ],
 
36
  "node_i": "3849",
37
- "scripts": [],
38
- "own_tokenizer": false
39
  },
40
  {
41
  "name": "Southern",
42
  "iso_1_code": null,
43
  "iso_3_code": null,
44
- "tokenizers": {},
45
  "children": [
46
  {
47
  "name": "Mocov\u00ed",
48
  "iso_1_code": null,
49
  "iso_3_code": "moc",
50
- "tokenizers": {},
51
  "children": [],
 
52
  "node_i": "3853",
 
53
  "scripts": [
54
  "Latn"
55
- ],
56
- "own_tokenizer": false
57
  },
58
  {
59
  "name": "Pilag\u00e1",
60
  "iso_1_code": null,
61
  "iso_3_code": "plg",
62
- "tokenizers": {},
63
  "children": [],
 
64
  "node_i": "3854",
 
65
  "scripts": [
66
  "Latn"
67
- ],
68
- "own_tokenizer": false
69
  },
70
  {
71
  "name": "Toba",
72
  "iso_1_code": null,
73
  "iso_3_code": "tob",
74
- "tokenizers": {},
75
  "children": [],
 
76
  "node_i": "3855",
 
77
  "scripts": [
78
  "Latn"
79
- ],
80
- "own_tokenizer": false
81
  }
82
  ],
 
83
  "node_i": "3852",
84
- "scripts": [],
85
- "own_tokenizer": false
86
  }
87
  ],
 
88
  "node_i": "3848",
89
- "scripts": [],
90
- "own_tokenizer": false
91
  }
 
2
  "name": "Guaykuruan",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Guaykur\u00fa",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Abipon",
13
  "iso_1_code": null,
14
  "iso_3_code": "axb",
 
15
  "children": [],
16
+ "tokenizers": {},
17
  "node_i": "3850",
18
+ "native_tokenizers": [],
19
+ "scripts": []
20
  },
21
  {
22
  "name": "Kadiw\u00e9u",
23
  "iso_1_code": null,
24
  "iso_3_code": "kbc",
 
25
  "children": [],
26
+ "tokenizers": {},
27
  "node_i": "3851",
28
+ "native_tokenizers": [],
29
  "scripts": [
30
  "Latn"
31
+ ]
 
32
  }
33
  ],
34
+ "tokenizers": {},
35
  "node_i": "3849",
36
+ "native_tokenizers": [],
37
+ "scripts": []
38
  },
39
  {
40
  "name": "Southern",
41
  "iso_1_code": null,
42
  "iso_3_code": null,
 
43
  "children": [
44
  {
45
  "name": "Mocov\u00ed",
46
  "iso_1_code": null,
47
  "iso_3_code": "moc",
 
48
  "children": [],
49
+ "tokenizers": {},
50
  "node_i": "3853",
51
+ "native_tokenizers": [],
52
  "scripts": [
53
  "Latn"
54
+ ]
 
55
  },
56
  {
57
  "name": "Pilag\u00e1",
58
  "iso_1_code": null,
59
  "iso_3_code": "plg",
 
60
  "children": [],
61
+ "tokenizers": {},
62
  "node_i": "3854",
63
+ "native_tokenizers": [],
64
  "scripts": [
65
  "Latn"
66
+ ]
 
67
  },
68
  {
69
  "name": "Toba",
70
  "iso_1_code": null,
71
  "iso_3_code": "tob",
 
72
  "children": [],
73
+ "tokenizers": {},
74
  "node_i": "3855",
75
+ "native_tokenizers": [],
76
  "scripts": [
77
  "Latn"
78
+ ]
 
79
  }
80
  ],
81
+ "tokenizers": {},
82
  "node_i": "3852",
83
+ "native_tokenizers": [],
84
+ "scripts": []
85
  }
86
  ],
87
+ "tokenizers": {},
88
  "node_i": "3848",
89
+ "native_tokenizers": [],
90
+ "scripts": []
91
  }
data/Gum.json CHANGED
@@ -2,9 +2,9 @@
2
  "name": "Gum",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [],
 
7
  "node_i": "3856",
8
- "scripts": [],
9
- "own_tokenizer": false
10
  }
 
2
  "name": "Gum",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [],
6
+ "tokenizers": {},
7
  "node_i": "3856",
8
+ "native_tokenizers": [],
9
+ "scripts": []
10
  }
data/Haida.json CHANGED
@@ -2,30 +2,30 @@
2
  "name": "Haida",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Haida, Southern",
9
  "iso_1_code": null,
10
  "iso_3_code": "hax",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3858",
14
- "scripts": [],
15
- "own_tokenizer": false
16
  },
17
  {
18
  "name": "Haida, Northern",
19
  "iso_1_code": null,
20
  "iso_3_code": "hdn",
21
- "tokenizers": {},
22
  "children": [],
 
23
  "node_i": "3859",
24
- "scripts": [],
25
- "own_tokenizer": false
26
  }
27
  ],
 
28
  "node_i": "3857",
29
- "scripts": [],
30
- "own_tokenizer": false
31
  }
 
2
  "name": "Haida",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Haida, Southern",
8
  "iso_1_code": null,
9
  "iso_3_code": "hax",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3858",
13
+ "native_tokenizers": [],
14
+ "scripts": []
15
  },
16
  {
17
  "name": "Haida, Northern",
18
  "iso_1_code": null,
19
  "iso_3_code": "hdn",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3859",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  }
26
  ],
27
+ "tokenizers": {},
28
  "node_i": "3857",
29
+ "native_tokenizers": [],
30
+ "scripts": []
31
  }
data/Harákmbut.json CHANGED
@@ -2,32 +2,32 @@
2
  "name": "Har\u00e1kmbut",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Amarakaeri",
9
  "iso_1_code": null,
10
  "iso_3_code": "amr",
11
- "tokenizers": {},
12
  "children": [],
 
13
  "node_i": "3861",
 
14
  "scripts": [
15
  "Latn"
16
- ],
17
- "own_tokenizer": false
18
  },
19
  {
20
  "name": "Huachipaeri",
21
  "iso_1_code": null,
22
  "iso_3_code": "hug",
23
- "tokenizers": {},
24
  "children": [],
 
25
  "node_i": "3862",
26
- "scripts": [],
27
- "own_tokenizer": false
28
  }
29
  ],
 
30
  "node_i": "3860",
31
- "scripts": [],
32
- "own_tokenizer": false
33
  }
 
2
  "name": "Har\u00e1kmbut",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Amarakaeri",
8
  "iso_1_code": null,
9
  "iso_3_code": "amr",
 
10
  "children": [],
11
+ "tokenizers": {},
12
  "node_i": "3861",
13
+ "native_tokenizers": [],
14
  "scripts": [
15
  "Latn"
16
+ ]
 
17
  },
18
  {
19
  "name": "Huachipaeri",
20
  "iso_1_code": null,
21
  "iso_3_code": "hug",
 
22
  "children": [],
23
+ "tokenizers": {},
24
  "node_i": "3862",
25
+ "native_tokenizers": [],
26
+ "scripts": []
27
  }
28
  ],
29
+ "tokenizers": {},
30
  "node_i": "3860",
31
+ "native_tokenizers": [],
32
+ "scripts": []
33
  }
data/Hmong-Mien.json CHANGED
@@ -2,527 +2,527 @@
2
  "name": "Hmong-Mien",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
5
- "tokenizers": {},
6
  "children": [
7
  {
8
  "name": "Hmongic",
9
  "iso_1_code": null,
10
  "iso_3_code": null,
11
- "tokenizers": {},
12
  "children": [
13
  {
14
  "name": "Bunu",
15
  "iso_1_code": null,
16
  "iso_3_code": null,
17
- "tokenizers": {},
18
  "children": [
19
  {
20
  "name": "Bunu, Younuo",
21
  "iso_1_code": null,
22
  "iso_3_code": "buh",
23
- "tokenizers": {},
24
  "children": [],
 
25
  "node_i": "3866",
26
- "scripts": [],
27
- "own_tokenizer": false
28
  },
29
  {
30
  "name": "Bunu, Wunai",
31
  "iso_1_code": null,
32
  "iso_3_code": "bwn",
33
- "tokenizers": {},
34
  "children": [],
 
35
  "node_i": "3867",
36
- "scripts": [],
37
- "own_tokenizer": false
38
  },
39
  {
40
  "name": "Bunu, Bu-Nao",
41
  "iso_1_code": null,
42
  "iso_3_code": "bwx",
43
- "tokenizers": {},
44
  "children": [],
 
45
  "node_i": "3868",
46
- "scripts": [],
47
- "own_tokenizer": false
48
  },
49
  {
50
  "name": "Bunu, Jiongnai",
51
  "iso_1_code": null,
52
  "iso_3_code": "pnu",
53
- "tokenizers": {},
54
  "children": [],
 
55
  "node_i": "3869",
56
- "scripts": [],
57
- "own_tokenizer": false
58
  }
59
  ],
 
60
  "node_i": "3865",
61
- "scripts": [],
62
- "own_tokenizer": false
63
  },
64
  {
65
  "name": "Chuanqiandian",
66
  "iso_1_code": null,
67
  "iso_3_code": null,
68
- "tokenizers": {},
69
  "children": [
70
  {
71
  "name": "Miao, Chuanqiandian Cluster",
72
  "iso_1_code": null,
73
  "iso_3_code": "cqd",
74
- "tokenizers": {},
75
  "children": [],
 
76
  "node_i": "3871",
77
- "scripts": [],
78
- "own_tokenizer": false
79
  },
80
  {
81
  "name": "Miao, Southern Mashan",
82
  "iso_1_code": null,
83
  "iso_3_code": "hma",
84
- "tokenizers": {},
85
  "children": [],
 
86
  "node_i": "3872",
87
- "scripts": [],
88
- "own_tokenizer": false
89
  },
90
  {
91
  "name": "Miao, Central Huishui",
92
  "iso_1_code": null,
93
  "iso_3_code": "hmc",
94
- "tokenizers": {},
95
  "children": [],
 
96
  "node_i": "3873",
97
- "scripts": [],
98
- "own_tokenizer": false
99
  },
100
  {
101
  "name": "Miao, Large Flowery",
102
  "iso_1_code": null,
103
  "iso_3_code": "hmd",
104
- "tokenizers": {},
105
  "children": [],
 
106
  "node_i": "3874",
107
- "scripts": [],
108
- "own_tokenizer": false
109
  },
110
  {
111
  "name": "Miao, Eastern Huishui",
112
  "iso_1_code": null,
113
  "iso_3_code": "hme",
114
- "tokenizers": {},
115
  "children": [],
 
116
  "node_i": "3875",
117
- "scripts": [],
118
- "own_tokenizer": false
119
  },
120
  {
121
  "name": "Hmong Don",
122
  "iso_1_code": null,
123
  "iso_3_code": "hmf",
124
- "tokenizers": {},
125
  "children": [],
 
126
  "node_i": "3876",
127
- "scripts": [],
128
- "own_tokenizer": false
129
  },
130
  {
131
  "name": "Miao, Southwestern Guiyang",
132
  "iso_1_code": null,
133
  "iso_3_code": "hmg",
134
- "tokenizers": {},
135
  "children": [],
 
136
  "node_i": "3877",
137
- "scripts": [],
138
- "own_tokenizer": false
139
  },
140
  {
141
  "name": "Miao, Southwestern Huishui",
142
  "iso_1_code": null,
143
  "iso_3_code": "hmh",
144
- "tokenizers": {},
145
  "children": [],
 
146
  "node_i": "3878",
147
- "scripts": [],
148
- "own_tokenizer": false
149
  },
150
  {
151
  "name": "Miao, Northern Huishui",
152
  "iso_1_code": null,
153
  "iso_3_code": "hmi",
154
- "tokenizers": {},
155
  "children": [],
 
156
  "node_i": "3879",
157
- "scripts": [],
158
- "own_tokenizer": false
159
  },
160
  {
161
  "name": "Ge",
162
  "iso_1_code": null,
163
  "iso_3_code": "hmj",
164
- "tokenizers": {},
165
  "children": [],
 
166
  "node_i": "3880",
167
- "scripts": [],
168
- "own_tokenizer": false
169
  },
170
  {
171
  "name": "Miao, Luopohe",
172
  "iso_1_code": null,
173
  "iso_3_code": "hml",
174
- "tokenizers": {},
175
  "children": [],
 
176
  "node_i": "3881",
177
- "scripts": [],
178
- "own_tokenizer": false
179
  },
180
  {
181
  "name": "Miao, Central Mashan",
182
  "iso_1_code": null,
183
  "iso_3_code": "hmm",
184
- "tokenizers": {},
185
  "children": [],
 
186
  "node_i": "3882",
187
- "scripts": [],
188
- "own_tokenizer": false
189
  },
190
  {
191
  "name": "Miao, Northern Mashan",
192
  "iso_1_code": null,
193
  "iso_3_code": "hmp",
194
- "tokenizers": {},
195
  "children": [],
 
196
  "node_i": "3883",
197
- "scripts": [],
198
- "own_tokenizer": false
199
  },
200
  {
201
  "name": "Hmong D\u00f4",
202
  "iso_1_code": null,
203
  "iso_3_code": "hmv",
204
- "tokenizers": {},
205
  "children": [],
 
206
  "node_i": "3884",
207
- "scripts": [],
208
- "own_tokenizer": false
209
  },
210
  {
211
  "name": "Miao, Western Mashan",
212
  "iso_1_code": null,
213
  "iso_3_code": "hmw",
214
- "tokenizers": {},
215
  "children": [],
 
216
  "node_i": "3885",
217
- "scripts": [],
218
- "own_tokenizer": false
219
  },
220
  {
221
  "name": "Miao, Southern Guiyang",
222
  "iso_1_code": null,
223
  "iso_3_code": "hmy",
224
- "tokenizers": {},
225
  "children": [],
 
226
  "node_i": "3886",
227
- "scripts": [],
228
- "own_tokenizer": false
229
  },
230
  {
231
  "name": "Sinicized Miao",
232
  "iso_1_code": null,
233
  "iso_3_code": "hmz",
234
- "tokenizers": {},
235
  "children": [],
 
236
  "node_i": "3887",
237
- "scripts": [],
238
- "own_tokenizer": false
239
  },
240
  {
241
  "name": "Hmong Njua",
242
  "iso_1_code": null,
243
  "iso_3_code": "hnj",
244
- "tokenizers": {},
245
  "children": [],
 
246
  "node_i": "3888",
 
247
  "scripts": [
248
  "Latn"
249
- ],
250
- "own_tokenizer": false
251
  },
252
  {
253
  "name": "Miao, Horned",
254
  "iso_1_code": null,
255
  "iso_3_code": "hrm",
256
- "tokenizers": {},
257
  "children": [],
 
258
  "node_i": "3889",
259
- "scripts": [],
260
- "own_tokenizer": false
261
  },
262
  {
263
  "name": "Miao, Northern Guiyang",
264
  "iso_1_code": null,
265
  "iso_3_code": "huj",
266
- "tokenizers": {},
267
  "children": [],
 
268
  "node_i": "3890",
269
- "scripts": [],
270
- "own_tokenizer": false
271
  },
272
  {
273
  "name": "Hmong Daw",
274
  "iso_1_code": null,
275
  "iso_3_code": "mww",
276
- "tokenizers": {},
277
  "children": [],
 
278
  "node_i": "3891",
 
279
  "scripts": [
280
  "Latn"
281
- ],
282
- "own_tokenizer": false
283
  },
284
  {
285
  "name": "Miao, Small Flowery",
286
  "iso_1_code": null,
287
  "iso_3_code": "sfm",
288
- "tokenizers": {},
289
  "children": [],
 
290
  "node_i": "3892",
291
- "scripts": [],
292
- "own_tokenizer": false
293
  }
294
  ],
 
295
  "node_i": "3870",
296
- "scripts": [],
297
- "own_tokenizer": false
298
  },
299
  {
300
  "name": "Pa-hng",
301
  "iso_1_code": null,
302
  "iso_3_code": null,
303
- "tokenizers": {},
304
  "children": [
305
  {
306
  "name": "Pa-Hng",
307
  "iso_1_code": null,
308
  "iso_3_code": "pha",
309
- "tokenizers": {},
310
  "children": [],
 
311
  "node_i": "3894",
312
- "scripts": [],
313
- "own_tokenizer": false
314
  }
315
  ],
 
316
  "node_i": "3893",
317
- "scripts": [],
318
- "own_tokenizer": false
319
  },
320
  {
321
  "name": "Qiandong",
322
  "iso_1_code": null,
323
  "iso_3_code": null,
324
- "tokenizers": {},
325
  "children": [
326
  {
327
  "name": "Miao, Northern Qiandong",
328
  "iso_1_code": null,
329
  "iso_3_code": "hea",
330
- "tokenizers": {},
331
  "children": [],
 
332
  "node_i": "3896",
333
- "scripts": [],
334
- "own_tokenizer": false
335
  },
336
  {
337
  "name": "Miao, Eastern Qiandong",
338
  "iso_1_code": null,
339
  "iso_3_code": "hmq",
340
- "tokenizers": {},
341
  "children": [],
 
342
  "node_i": "3897",
343
- "scripts": [],
344
- "own_tokenizer": false
345
  },
346
  {
347
  "name": "Miao, Southern Qiandong",
348
  "iso_1_code": null,
349
  "iso_3_code": "hms",
350
- "tokenizers": {},
351
  "children": [],
 
352
  "node_i": "3898",
353
- "scripts": [],
354
- "own_tokenizer": false
355
  },
356
  {
357
  "name": "N\u00e1-Meo",
358
  "iso_1_code": null,
359
  "iso_3_code": "neo",
360
- "tokenizers": {},
361
  "children": [],
 
362
  "node_i": "3899",
363
- "scripts": [],
364
- "own_tokenizer": false
365
  }
366
  ],
 
367
  "node_i": "3895",
368
- "scripts": [],
369
- "own_tokenizer": false
370
  },
371
  {
372
  "name": "Xiangxi",
373
  "iso_1_code": null,
374
  "iso_3_code": null,
375
- "tokenizers": {},
376
  "children": [
377
  {
378
  "name": "Miao, Western Xiangxi",
379
  "iso_1_code": null,
380
  "iso_3_code": "mmr",
381
- "tokenizers": {},
382
  "children": [],
 
383
  "node_i": "3901",
384
- "scripts": [],
385
- "own_tokenizer": false
386
  },
387
  {
388
  "name": "Miao, Eastern Xiangxi",
389
  "iso_1_code": null,
390
  "iso_3_code": "muq",
391
- "tokenizers": {},
392
  "children": [],
 
393
  "node_i": "3902",
394
- "scripts": [],
395
- "own_tokenizer": false
396
  }
397
  ],
 
398
  "node_i": "3900",
399
- "scripts": [],
400
- "own_tokenizer": false
401
  }
402
  ],
 
403
  "node_i": "3864",
404
- "scripts": [],
405
- "own_tokenizer": false
406
  },
407
  {
408
  "name": "Ho Nte",
409
  "iso_1_code": null,
410
  "iso_3_code": null,
411
- "tokenizers": {},
412
  "children": [
413
  {
414
  "name": "She",
415
  "iso_1_code": null,
416
  "iso_3_code": "shx",
417
- "tokenizers": {},
418
  "children": [],
 
419
  "node_i": "3904",
420
- "scripts": [],
421
- "own_tokenizer": false
422
  }
423
  ],
 
424
  "node_i": "3903",
425
- "scripts": [],
426
- "own_tokenizer": false
427
  },
428
  {
429
  "name": "Mienic",
430
  "iso_1_code": null,
431
  "iso_3_code": null,
432
- "tokenizers": {},
433
  "children": [
434
  {
435
  "name": "Biao-Jiao",
436
  "iso_1_code": null,
437
  "iso_3_code": null,
438
- "tokenizers": {},
439
  "children": [
440
  {
441
  "name": "Biao-Jiao Mien",
442
  "iso_1_code": null,
443
  "iso_3_code": "bje",
444
- "tokenizers": {},
445
  "children": [],
 
446
  "node_i": "3907",
447
- "scripts": [],
448
- "own_tokenizer": false
449
  }
450
  ],
 
451
  "node_i": "3906",
452
- "scripts": [],
453
- "own_tokenizer": false
454
  },
455
  {
456
  "name": "Mian-Jin",
457
  "iso_1_code": null,
458
  "iso_3_code": null,
459
- "tokenizers": {},
460
  "children": [
461
  {
462
  "name": "Biao Mon",
463
  "iso_1_code": null,
464
  "iso_3_code": "bmt",
465
- "tokenizers": {},
466
  "children": [],
 
467
  "node_i": "3909",
468
- "scripts": [],
469
- "own_tokenizer": false
470
  },
471
  {
472
  "name": "Iu Mien",
473
  "iso_1_code": null,
474
  "iso_3_code": "ium",
475
- "tokenizers": {},
476
  "children": [],
 
477
  "node_i": "3910",
 
478
  "scripts": [
479
  "Latn"
480
- ],
481
- "own_tokenizer": false
482
  },
483
  {
484
  "name": "Kim Mun",
485
  "iso_1_code": null,
486
  "iso_3_code": "mji",
487
- "tokenizers": {},
488
  "children": [],
 
489
  "node_i": "3911",
490
- "scripts": [],
491
- "own_tokenizer": false
492
  }
493
  ],
 
494
  "node_i": "3908",
495
- "scripts": [],
496
- "own_tokenizer": false
497
  },
498
  {
499
  "name": "Zaomin",
500
  "iso_1_code": null,
501
  "iso_3_code": null,
502
- "tokenizers": {},
503
  "children": [
504
  {
505
  "name": "Dzao Min",
506
  "iso_1_code": null,
507
  "iso_3_code": "bpn",
508
- "tokenizers": {},
509
  "children": [],
 
510
  "node_i": "3913",
511
- "scripts": [],
512
- "own_tokenizer": false
513
  }
514
  ],
 
515
  "node_i": "3912",
516
- "scripts": [],
517
- "own_tokenizer": false
518
  }
519
  ],
 
520
  "node_i": "3905",
521
- "scripts": [],
522
- "own_tokenizer": false
523
  }
524
  ],
 
525
  "node_i": "3863",
526
- "scripts": [],
527
- "own_tokenizer": false
528
  }
 
2
  "name": "Hmong-Mien",
3
  "iso_1_code": null,
4
  "iso_3_code": null,
 
5
  "children": [
6
  {
7
  "name": "Hmongic",
8
  "iso_1_code": null,
9
  "iso_3_code": null,
 
10
  "children": [
11
  {
12
  "name": "Bunu",
13
  "iso_1_code": null,
14
  "iso_3_code": null,
 
15
  "children": [
16
  {
17
  "name": "Bunu, Younuo",
18
  "iso_1_code": null,
19
  "iso_3_code": "buh",
 
20
  "children": [],
21
+ "tokenizers": {},
22
  "node_i": "3866",
23
+ "native_tokenizers": [],
24
+ "scripts": []
25
  },
26
  {
27
  "name": "Bunu, Wunai",
28
  "iso_1_code": null,
29
  "iso_3_code": "bwn",
 
30
  "children": [],
31
+ "tokenizers": {},
32
  "node_i": "3867",
33
+ "native_tokenizers": [],
34
+ "scripts": []
35
  },
36
  {
37
  "name": "Bunu, Bu-Nao",
38
  "iso_1_code": null,
39
  "iso_3_code": "bwx",
 
40
  "children": [],
41
+ "tokenizers": {},
42
  "node_i": "3868",
43
+ "native_tokenizers": [],
44
+ "scripts": []
45
  },
46
  {
47
  "name": "Bunu, Jiongnai",
48
  "iso_1_code": null,
49
  "iso_3_code": "pnu",
 
50
  "children": [],
51
+ "tokenizers": {},
52
  "node_i": "3869",
53
+ "native_tokenizers": [],
54
+ "scripts": []
55
  }
56
  ],
57
+ "tokenizers": {},
58
  "node_i": "3865",
59
+ "native_tokenizers": [],
60
+ "scripts": []
61
  },
62
  {
63
  "name": "Chuanqiandian",
64
  "iso_1_code": null,
65
  "iso_3_code": null,
 
66
  "children": [
67
  {
68
  "name": "Miao, Chuanqiandian Cluster",
69
  "iso_1_code": null,
70
  "iso_3_code": "cqd",
 
71
  "children": [],
72
+ "tokenizers": {},
73
  "node_i": "3871",
74
+ "native_tokenizers": [],
75
+ "scripts": []
76
  },
77
  {
78
  "name": "Miao, Southern Mashan",
79
  "iso_1_code": null,
80
  "iso_3_code": "hma",
 
81
  "children": [],
82
+ "tokenizers": {},
83
  "node_i": "3872",
84
+ "native_tokenizers": [],
85
+ "scripts": []
86
  },
87
  {
88
  "name": "Miao, Central Huishui",
89
  "iso_1_code": null,
90
  "iso_3_code": "hmc",
 
91
  "children": [],
92
+ "tokenizers": {},
93
  "node_i": "3873",
94
+ "native_tokenizers": [],
95
+ "scripts": []
96
  },
97
  {
98
  "name": "Miao, Large Flowery",
99
  "iso_1_code": null,
100
  "iso_3_code": "hmd",
 
101
  "children": [],
102
+ "tokenizers": {},
103
  "node_i": "3874",
104
+ "native_tokenizers": [],
105
+ "scripts": []
106
  },
107
  {
108
  "name": "Miao, Eastern Huishui",
109
  "iso_1_code": null,
110
  "iso_3_code": "hme",
 
111
  "children": [],
112
+ "tokenizers": {},
113
  "node_i": "3875",
114
+ "native_tokenizers": [],
115
+ "scripts": []
116
  },
117
  {
118
  "name": "Hmong Don",
119
  "iso_1_code": null,
120
  "iso_3_code": "hmf",
 
121
  "children": [],
122
+ "tokenizers": {},
123
  "node_i": "3876",
124
+ "native_tokenizers": [],
125
+ "scripts": []
126
  },
127
  {
128
  "name": "Miao, Southwestern Guiyang",
129
  "iso_1_code": null,
130
  "iso_3_code": "hmg",
 
131
  "children": [],
132
+ "tokenizers": {},
133
  "node_i": "3877",
134
+ "native_tokenizers": [],
135
+ "scripts": []
136
  },
137
  {
138
  "name": "Miao, Southwestern Huishui",
139
  "iso_1_code": null,
140
  "iso_3_code": "hmh",
 
141
  "children": [],
142
+ "tokenizers": {},
143
  "node_i": "3878",
144
+ "native_tokenizers": [],
145
+ "scripts": []
146
  },
147
  {
148
  "name": "Miao, Northern Huishui",
149
  "iso_1_code": null,
150
  "iso_3_code": "hmi",
 
151
  "children": [],
152
+ "tokenizers": {},
153
  "node_i": "3879",
154
+ "native_tokenizers": [],
155
+ "scripts": []
156
  },
157
  {
158
  "name": "Ge",
159
  "iso_1_code": null,
160
  "iso_3_code": "hmj",
 
161
  "children": [],
162
+ "tokenizers": {},
163
  "node_i": "3880",
164
+ "native_tokenizers": [],
165
+ "scripts": []
166
  },
167
  {
168
  "name": "Miao, Luopohe",
169
  "iso_1_code": null,
170
  "iso_3_code": "hml",
 
171
  "children": [],
172
+ "tokenizers": {},
173
  "node_i": "3881",
174
+ "native_tokenizers": [],
175
+ "scripts": []
176
  },
177
  {
178
  "name": "Miao, Central Mashan",
179
  "iso_1_code": null,
180
  "iso_3_code": "hmm",
 
181
  "children": [],
182
+ "tokenizers": {},
183
  "node_i": "3882",
184
+ "native_tokenizers": [],
185
+ "scripts": []
186
  },
187
  {
188
  "name": "Miao, Northern Mashan",
189
  "iso_1_code": null,
190
  "iso_3_code": "hmp",
 
191
  "children": [],
192
+ "tokenizers": {},
193
  "node_i": "3883",
194
+ "native_tokenizers": [],
195
+ "scripts": []
196
  },
197
  {
198
  "name": "Hmong D\u00f4",
199
  "iso_1_code": null,
200
  "iso_3_code": "hmv",
 
201
  "children": [],
202
+ "tokenizers": {},
203
  "node_i": "3884",
204
+ "native_tokenizers": [],
205
+ "scripts": []
206
  },
207
  {
208
  "name": "Miao, Western Mashan",
209
  "iso_1_code": null,
210
  "iso_3_code": "hmw",
 
211
  "children": [],
212
+ "tokenizers": {},
213
  "node_i": "3885",
214
+ "native_tokenizers": [],
215
+ "scripts": []
216
  },
217
  {
218
  "name": "Miao, Southern Guiyang",
219
  "iso_1_code": null,
220
  "iso_3_code": "hmy",
 
221
  "children": [],
222
+ "tokenizers": {},
223
  "node_i": "3886",
224
+ "native_tokenizers": [],
225
+ "scripts": []
226
  },
227
  {
228
  "name": "Sinicized Miao",
229
  "iso_1_code": null,
230
  "iso_3_code": "hmz",
 
231
  "children": [],
232
+ "tokenizers": {},
233
  "node_i": "3887",
234
+ "native_tokenizers": [],
235
+ "scripts": []
236
  },
237
  {
238
  "name": "Hmong Njua",
239
  "iso_1_code": null,
240
  "iso_3_code": "hnj",
 
241
  "children": [],
242
+ "tokenizers": {},
243
  "node_i": "3888",
244
+ "native_tokenizers": [],
245
  "scripts": [
246
  "Latn"
247
+ ]
 
248
  },
249
  {
250
  "name": "Miao, Horned",
251
  "iso_1_code": null,
252
  "iso_3_code": "hrm",
 
253
  "children": [],
254
+ "tokenizers": {},
255
  "node_i": "3889",
256
+ "native_tokenizers": [],
257
+ "scripts": []
258
  },
259
  {
260
  "name": "Miao, Northern Guiyang",
261
  "iso_1_code": null,
262
  "iso_3_code": "huj",
 
263
  "children": [],
264
+ "tokenizers": {},
265
  "node_i": "3890",
266
+ "native_tokenizers": [],
267
+ "scripts": []
268
  },
269
  {
270
  "name": "Hmong Daw",
271
  "iso_1_code": null,
272
  "iso_3_code": "mww",
 
273
  "children": [],
274
+ "tokenizers": {},
275
  "node_i": "3891",
276
+ "native_tokenizers": [],
277
  "scripts": [
278
  "Latn"
279
+ ]
 
280
  },
281
  {
282
  "name": "Miao, Small Flowery",
283
  "iso_1_code": null,
284
  "iso_3_code": "sfm",
 
285
  "children": [],
286
+ "tokenizers": {},
287
  "node_i": "3892",
288
+ "native_tokenizers": [],
289
+ "scripts": []
290
  }
291
  ],
292
+ "tokenizers": {},
293
  "node_i": "3870",
294
+ "native_tokenizers": [],
295
+ "scripts": []
296
  },
297
  {
298
  "name": "Pa-hng",
299
  "iso_1_code": null,
300
  "iso_3_code": null,
 
301
  "children": [
302
  {
303
  "name": "Pa-Hng",
304
  "iso_1_code": null,
305
  "iso_3_code": "pha",
 
306
  "children": [],
307
+ "tokenizers": {},
308
  "node_i": "3894",
309
+ "native_tokenizers": [],
310
+ "scripts": []
311
  }
312
  ],
313
+ "tokenizers": {},
314
  "node_i": "3893",
315
+ "native_tokenizers": [],
316
+ "scripts": []
317
  },
318
  {
319
  "name": "Qiandong",
320
  "iso_1_code": null,
321
  "iso_3_code": null,
 
322
  "children": [
323
  {
324
  "name": "Miao, Northern Qiandong",
325
  "iso_1_code": null,
326
  "iso_3_code": "hea",
 
327
  "children": [],
328
+ "tokenizers": {},
329
  "node_i": "3896",
330
+ "native_tokenizers": [],
331
+ "scripts": []
332
  },
333
  {
334
  "name": "Miao, Eastern Qiandong",
335
  "iso_1_code": null,
336
  "iso_3_code": "hmq",
 
337
  "children": [],
338
+ "tokenizers": {},
339
  "node_i": "3897",
340
+ "native_tokenizers": [],
341
+ "scripts": []
342
  },
343
  {
344
  "name": "Miao, Southern Qiandong",
345
  "iso_1_code": null,
346
  "iso_3_code": "hms",
 
347
  "children": [],
348
+ "tokenizers": {},
349
  "node_i": "3898",
350
+ "native_tokenizers": [],
351
+ "scripts": []
352
  },
353
  {
354
  "name": "N\u00e1-Meo",
355
  "iso_1_code": null,
356
  "iso_3_code": "neo",
 
357
  "children": [],
358
+ "tokenizers": {},
359
  "node_i": "3899",
360
+ "native_tokenizers": [],
361
+ "scripts": []
362
  }
363
  ],
364
+ "tokenizers": {},
365
  "node_i": "3895",
366
+ "native_tokenizers": [],
367
+ "scripts": []
368
  },
369
  {
370
  "name": "Xiangxi",
371
  "iso_1_code": null,
372
  "iso_3_code": null,
 
373
  "children": [
374
  {
375
  "name": "Miao, Western Xiangxi",
376
  "iso_1_code": null,
377
  "iso_3_code": "mmr",
 
378
  "children": [],
379
+ "tokenizers": {},
380
  "node_i": "3901",
381
+ "native_tokenizers": [],
382
+ "scripts": []
383
  },
384
  {
385
  "name": "Miao, Eastern Xiangxi",
386
  "iso_1_code": null,
387
  "iso_3_code": "muq",
 
388
  "children": [],
389
+ "tokenizers": {},
390
  "node_i": "3902",
391
+ "native_tokenizers": [],
392
+ "scripts": []
393
  }
394
  ],
395
+ "tokenizers": {},
396
  "node_i": "3900",
397
+ "native_tokenizers": [],
398
+ "scripts": []
399
  }
400
  ],
401
+ "tokenizers": {},
402
  "node_i": "3864",
403
+ "native_tokenizers": [],
404
+ "scripts": []
405
  },
406
  {
407
  "name": "Ho Nte",
408
  "iso_1_code": null,
409
  "iso_3_code": null,
 
410
  "children": [
411
  {
412
  "name": "She",
413
  "iso_1_code": null,
414
  "iso_3_code": "shx",
 
415
  "children": [],
416
+ "tokenizers": {},
417
  "node_i": "3904",
418
+ "native_tokenizers": [],
419
+ "scripts": []
420
  }
421
  ],
422
+ "tokenizers": {},
423
  "node_i": "3903",
424
+ "native_tokenizers": [],
425
+ "scripts": []
426
  },
427
  {
428
  "name": "Mienic",
429
  "iso_1_code": null,
430
  "iso_3_code": null,
 
431
  "children": [
432
  {
433
  "name": "Biao-Jiao",
434
  "iso_1_code": null,
435
  "iso_3_code": null,
 
436
  "children": [
437
  {
438
  "name": "Biao-Jiao Mien",
439
  "iso_1_code": null,
440
  "iso_3_code": "bje",
 
441
  "children": [],
442
+ "tokenizers": {},
443
  "node_i": "3907",
444
+ "native_tokenizers": [],
445
+ "scripts": []
446
  }
447
  ],
448
+ "tokenizers": {},
449
  "node_i": "3906",
450
+ "native_tokenizers": [],
451
+ "scripts": []
452
  },
453
  {
454
  "name": "Mian-Jin",
455
  "iso_1_code": null,
456
  "iso_3_code": null,
 
457
  "children": [
458
  {
459
  "name": "Biao Mon",
460
  "iso_1_code": null,
461
  "iso_3_code": "bmt",
 
462
  "children": [],
463
+ "tokenizers": {},
464
  "node_i": "3909",
465
+ "native_tokenizers": [],
466
+ "scripts": []
467
  },
468
  {
469
  "name": "Iu Mien",
470
  "iso_1_code": null,
471
  "iso_3_code": "ium",
 
472
  "children": [],
473
+ "tokenizers": {},
474
  "node_i": "3910",
475
+ "native_tokenizers": [],
476
  "scripts": [
477
  "Latn"
478
+ ]
 
479
  },
480
  {
481
  "name": "Kim Mun",
482
  "iso_1_code": null,
483
  "iso_3_code": "mji",
 
484
  "children": [],
485
+ "tokenizers": {},
486
  "node_i": "3911",
487
+ "native_tokenizers": [],
488
+ "scripts": []
489
  }
490
  ],
491
+ "tokenizers": {},
492
  "node_i": "3908",
493
+ "native_tokenizers": [],
494
+ "scripts": []
495
  },
496
  {
497
  "name": "Zaomin",
498
  "iso_1_code": null,
499
  "iso_3_code": null,
 
500
  "children": [
501
  {
502
  "name": "Dzao Min",
503
  "iso_1_code": null,
504
  "iso_3_code": "bpn",
 
505
  "children": [],
506
+ "tokenizers": {},
507
  "node_i": "3913",
508
+ "native_tokenizers": [],
509
+ "scripts": []
510
  }
511
  ],
512
+ "tokenizers": {},
513
  "node_i": "3912",
514
+ "native_tokenizers": [],
515
+ "scripts": []
516
  }
517
  ],
518
+ "tokenizers": {},
519
  "node_i": "3905",
520
+ "native_tokenizers": [],
521
+ "scripts": []
522
  }
523
  ],
524
+ "tokenizers": {},
525
  "node_i": "3863",
526
+ "native_tokenizers": [],
527
+ "scripts": []
528
  }