File size: 567 Bytes
b96e750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# Copyright (c) 2024 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import os


def generate_poly_lexicon(file_path: str):
    """Generate poly char lexicon for Mandarin Chinese."""
    poly_dict = {}

    with open(file_path, "r", encoding="utf-8") as readf:
        txt_list = readf.readlines()
        for txt in txt_list:
            word = txt.strip("\n")
            if word not in poly_dict:
                poly_dict[word] = 1
        readf.close()
    return poly_dict