diff --git "a/data/Austro-Asiatic.json" "b/data/Austro-Asiatic.json" --- "a/data/Austro-Asiatic.json" +++ "b/data/Austro-Asiatic.json" @@ -2,3264 +2,3158 @@ "name": "Austro-Asiatic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mon-Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jah Hut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Jah Hut", "iso_1_code": null, "iso_3_code": "jah", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1167", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1166", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chewong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cheq Wong", "iso_1_code": null, "iso_3_code": "cwg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1170", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1169", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Batek", "iso_1_code": null, "iso_3_code": "btq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1172", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jehai", "iso_1_code": null, "iso_3_code": "jhi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1173", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Minriq", "iso_1_code": null, "iso_3_code": "mnq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1174", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mintil", "iso_1_code": null, "iso_3_code": "mzt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1175", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1171", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tonga", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ten\u2019edn", "iso_1_code": null, "iso_3_code": "tnz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1177", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1176", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kintaq", "iso_1_code": null, "iso_3_code": "knq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1179", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kensiu", "iso_1_code": null, "iso_3_code": "kns", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1180", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1178", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1168", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Senoic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lanoh", "iso_1_code": null, "iso_3_code": "lnh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1182", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sab\u00fcm", "iso_1_code": null, "iso_3_code": "sbo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1183", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semai", "iso_1_code": null, "iso_3_code": "sea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1184", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semnam", "iso_1_code": null, "iso_3_code": "ssm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1185", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Temiar", "iso_1_code": null, "iso_3_code": "tea", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1186", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1181", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Aslian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mah Meri", "iso_1_code": null, "iso_3_code": "mhe", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1188", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semelai", "iso_1_code": null, "iso_3_code": "sza", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1189", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Semaq Beri", "iso_1_code": null, "iso_3_code": "szc", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1190", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Temoq", "iso_1_code": null, "iso_3_code": "tmo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1191", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1187", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1165", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Eastern Mon-Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Central Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Alak", "iso_1_code": null, "iso_3_code": "alk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1195", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bahnar", "iso_1_code": null, "iso_3_code": "bdq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1196", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Romam", "iso_1_code": null, "iso_3_code": "rmx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1197", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tampuan", "iso_1_code": null, "iso_3_code": "tpu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1198", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1194", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Cua", "iso_1_code": null, "iso_3_code": "cua", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1200", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1199", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "North Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Katua", "iso_1_code": null, "iso_3_code": "kta", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1202", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kachok", "iso_1_code": null, "iso_3_code": "xkk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1203", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kayong", "iso_1_code": null, "iso_3_code": "kxy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1205", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Takua", "iso_1_code": null, "iso_3_code": "tkz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1206", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1204", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Trieng", "iso_1_code": null, "iso_3_code": "stg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1208", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Talieng", "iso_1_code": null, "iso_3_code": "tdf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1209", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Duan", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Halang Doan", "iso_1_code": null, "iso_3_code": "hld", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1211", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1210", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jeh-Halang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Halang", "iso_1_code": null, "iso_3_code": "hal", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1213", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Jeh", "iso_1_code": null, "iso_3_code": "jeh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1214", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1212", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rengao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Rengao", "iso_1_code": null, "iso_3_code": "ren", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1216", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1215", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sedang-Todrah", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Sedang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hre", "iso_1_code": null, "iso_3_code": "hre", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1219", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sedang", "iso_1_code": null, "iso_3_code": "sed", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1220", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1218", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Todrah-Monom", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Monom", "iso_1_code": null, "iso_3_code": "moo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1222", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Todrah", "iso_1_code": null, "iso_3_code": "tdr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1223", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1221", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1217", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1207", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1201", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Stieng, Budeh", "iso_1_code": null, "iso_3_code": "stt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1225", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sre-Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mnong, Eastern", "iso_1_code": null, "iso_3_code": "mng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1229", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1228", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern-Central Mnong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Mnong, Central", "iso_1_code": null, "iso_3_code": "cmo", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1231", + "native_tokenizers": [], "scripts": [ "Latn", "Khmr" - ], - "own_tokenizer": false + ] }, { "name": "Mnong, Southern", "iso_1_code": null, "iso_3_code": "mnn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1232", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kraol", "iso_1_code": null, "iso_3_code": "rka", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1233", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1230", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1227", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sre", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Maa", "iso_1_code": null, "iso_3_code": "cma", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1235", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koho", "iso_1_code": null, "iso_3_code": "kpm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1236", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1234", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1226", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Stieng-Chrau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chrau", "iso_1_code": null, "iso_3_code": "crw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1238", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mel-Khaonh", "iso_1_code": null, "iso_3_code": "hkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1239", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Stieng, Bulo", "iso_1_code": null, "iso_3_code": "sti", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1240", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1237", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1224", - "scripts": [], - "own_tokenizer": false - }, - { + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, + "node_i": "1224", + "native_tokenizers": [], + "scripts": [] + }, + { "name": "West Bahnaric", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lavi", "iso_1_code": null, "iso_3_code": "lvi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1242", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Brao-Kravet", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Brao", "iso_1_code": null, "iso_3_code": "brb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1244", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Krung", "iso_1_code": null, "iso_3_code": "krr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1245", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kavet", "iso_1_code": null, "iso_3_code": "krv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1246", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sou", "iso_1_code": null, "iso_3_code": "sqq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1247", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1243", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Laven", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Laven", "iso_1_code": null, "iso_3_code": "lbo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1249", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1248", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyaheun", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyaheun", "iso_1_code": null, "iso_3_code": "nev", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1251", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1250", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Oi-The", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Oy", "iso_1_code": null, "iso_3_code": "oyb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1253", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sapuan", "iso_1_code": null, "iso_3_code": "spu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1254", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1252", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1241", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1193", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Katuic", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "1193", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Katuic", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Central Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ta\u2019oih", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Ir", "iso_1_code": null, "iso_3_code": "irr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1258", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ong", "iso_1_code": null, "iso_3_code": "oog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1259", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ta\u2019oih, Upper", "iso_1_code": null, "iso_3_code": "tth", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1260", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ta\u2019oih, Lower", "iso_1_code": null, "iso_3_code": "tto", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1261", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1257", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1256", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "East Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Katu-Pacoh", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Katu, Eastern", "iso_1_code": null, "iso_3_code": "ktv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1264", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Katu, Western", "iso_1_code": null, "iso_3_code": "kuf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1265", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pacoh", "iso_1_code": null, "iso_3_code": "pac", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1266", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Phuong", "iso_1_code": null, "iso_3_code": "phg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1267", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tareng", "iso_1_code": null, "iso_3_code": "tgr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1268", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1263", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngeq-Nkriang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kriang", "iso_1_code": null, "iso_3_code": "ngt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1270", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1269", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1262", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "West Katuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bru", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bru, Eastern", "iso_1_code": null, "iso_3_code": "bru", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1273", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Bru, Western", "iso_1_code": null, "iso_3_code": "brv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1274", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Katang, Northern", "iso_1_code": null, "iso_3_code": "ncq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1275", + "native_tokenizers": [], "scripts": [ "Laoo" - ], - "own_tokenizer": false + ] }, { "name": "Katang, Southern", "iso_1_code": null, "iso_3_code": "sct", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1276", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "So", "iso_1_code": null, "iso_3_code": "sss", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1277", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khua", "iso_1_code": null, "iso_3_code": "xhv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1278", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1272", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuay", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kuay", "iso_1_code": null, "iso_3_code": "kdt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1280", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nyeu", "iso_1_code": null, "iso_3_code": "nyl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1281", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1279", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1271", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1255", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khmer", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khmer", "iso_1_code": "km", "iso_3_code": "khm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1283", + "native_tokenizers": [], "scripts": [ "Khmr" - ], - "own_tokenizer": false + ] }, { "name": "Khmer, Northern", "iso_1_code": null, "iso_3_code": "kxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1284", + "native_tokenizers": [], "scripts": [ "Thai" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "1282", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pearic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Eastern", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Pear", "iso_1_code": null, "iso_3_code": "pcb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1287", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1286", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chong", "iso_1_code": null, "iso_3_code": "cog", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1290", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chung", "iso_1_code": null, "iso_3_code": "scq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1291", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1289", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samre", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Somray", "iso_1_code": null, "iso_3_code": "smu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1293", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samre", "iso_1_code": null, "iso_3_code": "sxm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1294", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1292", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Suoy", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Su\u2019ung", "iso_1_code": null, "iso_3_code": "syo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1296", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1295", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1288", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1285", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1192", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Monic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mon", "iso_1_code": null, "iso_3_code": "mnw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1298", + "native_tokenizers": [], "scripts": [ "Mymr" - ], - "own_tokenizer": false + ] } ], + "tokenizers": {}, "node_i": "1297", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nicobar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Car", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Nicobarese, Car", "iso_1_code": null, "iso_3_code": "caq", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1301", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1300", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chowra-Teressa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Chaura", "iso_1_code": null, "iso_3_code": "crv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1303", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Teressa", "iso_1_code": null, "iso_3_code": "tef", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1304", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1302", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Great Nicobar", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nicobarese, Southern", "iso_1_code": null, "iso_3_code": "nik", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1306", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1305", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Nancowry", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nicobarese, Central", "iso_1_code": null, "iso_3_code": "ncb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1308", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1307", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Shom Peng", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Shom Peng", "iso_1_code": null, "iso_3_code": "sii", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1310", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1309", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1299", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Northern Mon-Khmer", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "1299", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Northern Mon-Khmer", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Khasian", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "War-Jaintia", "iso_1_code": null, "iso_3_code": "aml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1313", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khasi", "iso_1_code": null, "iso_3_code": "kha", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1314", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] }, { "name": "Lyngngam", "iso_1_code": null, "iso_3_code": "lyg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1315", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Pnar", "iso_1_code": null, "iso_3_code": "pbv", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1316", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1312", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khmuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khao", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khao", "iso_1_code": null, "iso_3_code": "xao", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1319", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1318", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mal-Khmu\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khmu\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Khuen", "iso_1_code": null, "iso_3_code": "khf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1322", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Khmu", "iso_1_code": null, "iso_3_code": "kjg", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1323", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "O\u2019du", "iso_1_code": null, "iso_3_code": "tyh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1324", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1321", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mal-Prai", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mal", "iso_1_code": null, "iso_3_code": "mlf", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1326", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Prai", "iso_1_code": null, "iso_3_code": "prt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1327", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1325", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1320", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mlabri", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mlabri", "iso_1_code": null, "iso_3_code": "mra", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1329", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1328", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Xinh Mul", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Phong-Kniang", "iso_1_code": null, "iso_3_code": "pnx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1331", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Puoc", "iso_1_code": null, "iso_3_code": "puo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1332", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1330", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1317", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Mang", "iso_1_code": null, "iso_3_code": "zng", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1334", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1333", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Eastern Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Angkuic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hu", "iso_1_code": null, "iso_3_code": "huo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1338", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kon Keu", "iso_1_code": null, "iso_3_code": "kkn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1339", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Man Met", "iso_1_code": null, "iso_3_code": "mml", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1340", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mok", "iso_1_code": null, "iso_3_code": "mqt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1341", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Samtao", "iso_1_code": null, "iso_3_code": "stu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1342", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tai Loi", "iso_1_code": null, "iso_3_code": "tlq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1343", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muak Sa-aak", "iso_1_code": null, "iso_3_code": "ukk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1344", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "U", "iso_1_code": null, "iso_3_code": "uuu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1345", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kiorr", "iso_1_code": null, "iso_3_code": "xko", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1346", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1337", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bit-Khang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bit", "iso_1_code": null, "iso_3_code": "bgk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1348", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bumang", "iso_1_code": null, "iso_3_code": "bvp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1349", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kh\u00e1ng", "iso_1_code": null, "iso_3_code": "kjm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1350", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1347", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lametic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Con", "iso_1_code": null, "iso_3_code": "cno", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1352", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Rmeet", "iso_1_code": null, "iso_3_code": "lbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1353", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1351", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Waic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Bulang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Blang", "iso_1_code": null, "iso_3_code": "blr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1356", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1355", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Lawa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Lawa, Western", "iso_1_code": null, "iso_3_code": "lcp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1358", + "native_tokenizers": [], "scripts": [ "Thai" - ], - "own_tokenizer": false + ] }, { "name": "Lawa, Eastern", "iso_1_code": null, "iso_3_code": "lwl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1359", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1357", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wa", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Wa, Parauk", "iso_1_code": null, "iso_3_code": "prk", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1361", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Awa", "iso_1_code": null, "iso_3_code": "vwa", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1362", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Wa, Vo", "iso_1_code": null, "iso_3_code": "wbm", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1363", + "native_tokenizers": [], "scripts": [ "Latn" - ], - "own_tokenizer": false + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1360", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1354", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1336", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Western Palaungic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Danau", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Danau", "iso_1_code": null, "iso_3_code": "dnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1366", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1365", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palaung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Palaung, Ruching", "iso_1_code": null, "iso_3_code": "pce", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1368", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palaung, Shwe", "iso_1_code": null, "iso_3_code": "pll", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1369", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palaung, Rumai", "iso_1_code": null, "iso_3_code": "rbb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1370", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1367", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Riang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Riang Lang", "iso_1_code": null, "iso_3_code": "ril", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1372", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Riang Lai", "iso_1_code": null, "iso_3_code": "yin", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1373", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1371", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1364", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1335", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1311", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Palyu", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bugan", "iso_1_code": null, "iso_3_code": "bbh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1375", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bolyu", "iso_1_code": null, "iso_3_code": "ply", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1376", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1374", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Southern Monic", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Nyahkur", "iso_1_code": null, "iso_3_code": "cbn", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1378", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1377", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Unclassified", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kemiehua", "iso_1_code": null, "iso_3_code": "kfj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1380", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kuanhua", "iso_1_code": null, "iso_3_code": "xnh", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1381", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1379", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Viet-Muong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Chut", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Arem", "iso_1_code": null, "iso_3_code": "aem", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1384", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Maleng", "iso_1_code": null, "iso_3_code": "pkt", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1385", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Chut", "iso_1_code": null, "iso_3_code": "scb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1386", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1383", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Cuoi", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Hung", "iso_1_code": null, "iso_3_code": "hnu", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1388", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Tho", "iso_1_code": null, "iso_3_code": "tou", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1389", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1387", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muong", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bo", "iso_1_code": null, "iso_3_code": "bgl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1391", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Muong", "iso_1_code": null, "iso_3_code": "mtq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1392", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ngu\u00f4n", "iso_1_code": null, "iso_3_code": "nuo", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1393", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1390", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Thavung", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Aheu", "iso_1_code": null, "iso_3_code": "thm", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1395", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1394", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Vietnamese", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Vietnamese", "iso_1_code": "vi", "iso_3_code": "vie", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1397", - "scripts": [ + "native_tokenizers": [ "Latn" ], - "own_tokenizer": true + "scripts": [ + "Latn" + ] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1396", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1382", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1164", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Munda", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "1164", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Munda", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "North Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Kherwari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Agariya", "iso_1_code": null, "iso_3_code": "agi", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1401", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Bijori", "iso_1_code": null, "iso_3_code": "bix", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1402", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kodaku", "iso_1_code": null, "iso_3_code": "ksz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1403", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mundari", "iso_1_code": null, "iso_3_code": null, - "tokenizers": { - "Latn": { - "full_object": "SpaCyTokenizer(\"vi\")", - "original_lang_name": "vietnamese", - "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false - } - }, "children": [ { "name": "Asuri", "iso_1_code": null, "iso_3_code": "asr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1405", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Birhor", "iso_1_code": null, "iso_3_code": "biy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1406", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koda", "iso_1_code": null, "iso_3_code": "cdz", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1407", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Kol", "iso_1_code": null, "iso_3_code": "ekl", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1408", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Ho", "iso_1_code": null, "iso_3_code": "hoc", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1409", + "native_tokenizers": [], "scripts": [ "Latn", "Wara" - ], - "own_tokenizer": false + ] }, { "name": "Korwa", "iso_1_code": null, "iso_3_code": "kfp", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1410", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Mundari", "iso_1_code": null, "iso_3_code": "unr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1411", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Munda", "iso_1_code": null, "iso_3_code": "unx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1412", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], - "node_i": "1404", - "scripts": [], - "own_tokenizer": false - }, - { - "name": "Santali", - "iso_1_code": null, - "iso_3_code": null, "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, + "node_i": "1404", + "native_tokenizers": [], + "scripts": [] + }, + { + "name": "Santali", + "iso_1_code": null, + "iso_3_code": null, "children": [ { "name": "Mahali", "iso_1_code": null, "iso_3_code": "mjx", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1414", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Santhali", "iso_1_code": null, "iso_3_code": "sat", + "children": [], "tokenizers": { "Latn": { "full_object": "SpaCyTokenizer(\"vi\")", "original_lang_name": "vietnamese", "original_lang_code": "vie", - "scripts": [ - "Latn" - ], - "class_name": "SpaCyTokenizer", - "macrolanguage": false + "script": "Latn", + "class_name": "SpaCyTokenizer" } }, - "children": [], "node_i": "1415", + "native_tokenizers": [], "scripts": [ "Latn", "Olck" - ], - "own_tokenizer": false + ] }, { "name": "Turi", "iso_1_code": null, "iso_3_code": "trd", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1416", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1413", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1400", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Korku", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Korku", "iso_1_code": null, "iso_3_code": "kfq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1418", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1417", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1399", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "South Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Kharia-Juang", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Juang", "iso_1_code": null, "iso_3_code": "jun", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1421", + "native_tokenizers": [], "scripts": [ "Orya" - ], - "own_tokenizer": false + ] }, { "name": "Kharia", "iso_1_code": null, "iso_3_code": "khr", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1422", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1420", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Koraput Munda", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gutob-Remo-Geta\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Geta\u2019", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gata\u2019", "iso_1_code": null, "iso_3_code": "gaq", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1426", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1425", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gutob-Remo", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Bondo", "iso_1_code": null, "iso_3_code": "bfw", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1428", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Gadaba, Bodo", "iso_1_code": null, "iso_3_code": "gbj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1429", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1427", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1424", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sora-Juray-Gorum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Gorum", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Parenga", "iso_1_code": null, "iso_3_code": "pcj", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1432", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1431", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sora-Juray", "iso_1_code": null, "iso_3_code": null, - "tokenizers": {}, "children": [ { "name": "Juray", "iso_1_code": null, "iso_3_code": "juy", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1434", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] }, { "name": "Sora", "iso_1_code": null, "iso_3_code": "srb", - "tokenizers": {}, "children": [], + "tokenizers": {}, "node_i": "1435", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1433", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1430", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1423", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": {}, "node_i": "1419", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1398", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } ], + "tokenizers": { + "Latn": { + "full_object": "SpaCyTokenizer(\"vi\")", + "original_lang_name": "vietnamese", + "original_lang_code": "vie", + "script": "Latn", + "class_name": "SpaCyTokenizer" + } + }, "node_i": "1163", - "scripts": [], - "own_tokenizer": false + "native_tokenizers": [], + "scripts": [] } \ No newline at end of file