ing0 commited on
Commit
5749475
·
1 Parent(s): 060348d

mandarin hub

Browse files
diffrhythm/g2p/g2p/mandarin.py CHANGED
@@ -11,6 +11,7 @@ from typing import List
11
  from diffrhythm.g2p.g2p.chinese_model_g2p import BertPolyPredict
12
  from diffrhythm.g2p.utils.front_utils import *
13
  import os
 
14
 
15
  # from g2pw import G2PWConverter
16
 
@@ -181,18 +182,17 @@ _bopomofo_to_ipa = [
181
  ]
182
  must_not_er_words = {"女儿", "老儿", "男儿", "少儿", "小儿"}
183
 
 
 
 
 
 
184
  word_pinyin_dict = {}
185
- with open(
186
- r"./diffrhythm/g2p/sources/chinese_lexicon.txt", "r", encoding="utf-8"
187
- ) as fread:
188
  txt_list = fread.readlines()
189
- for i, txt in enumerate(txt_list):
190
- try:
191
- word, pinyin = txt.strip().split("\t")
192
- word_pinyin_dict[word] = pinyin
193
- except:
194
- print(txt.strip())
195
- print(f"************** {i} ****************")
196
  fread.close()
197
 
198
  pinyin_2_bopomofo_dict = {}
 
11
  from diffrhythm.g2p.g2p.chinese_model_g2p import BertPolyPredict
12
  from diffrhythm.g2p.utils.front_utils import *
13
  import os
14
+ from huggingface_hub import hf_hub_download
15
 
16
  # from g2pw import G2PWConverter
17
 
 
182
  ]
183
  must_not_er_words = {"女儿", "老儿", "男儿", "少儿", "小儿"}
184
 
185
+
186
+ chinese_lexicon_path = hf_hub_download(
187
+ repo_id="ASLP-lab/DiffRhythm",
188
+ filename="chinese_lexicon.txt"
189
+ )
190
  word_pinyin_dict = {}
191
+ with open(chinese_lexicon_path, "r", encoding="utf-8") as fread:
 
 
192
  txt_list = fread.readlines()
193
+ for txt in txt_list:
194
+ word, pinyin = txt.strip().split("\t")
195
+ word_pinyin_dict[word] = pinyin
 
 
 
 
196
  fread.close()
197
 
198
  pinyin_2_bopomofo_dict = {}
diffrhythm/g2p/sources/bpmf_2_pinyin.txt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf24a3306ffbef6b1fbfc1780e25933361bc4d6587b8eb331b13241b8d892ba2
3
- size 256
 
 
 
 
diffrhythm/g2p/sources/g2p_chinese_model/polychar.txt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9df819e371415c23e26a2907236a0228ef93772cc698d93dc0c18844956d1011
3
- size 636
 
 
 
 
diffrhythm/g2p/sources/g2p_chinese_model/vocab.txt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:45bbac6b341c319adc98a532532882e91a9cefc0329aa57bac9ae761c27b291c
3
- size 109540
 
 
 
 
diffrhythm/g2p/sources/pinyin_2_bpmf.txt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc8b048a20cf61d04b5a36bc6939db74095719b8099269a1269023ee3e6535b4
3
- size 5262