Spaces:
Running
Running
Commit
·
826b5e0
1
Parent(s):
d6566ec
Update app.py
Browse files
app.py
CHANGED
@@ -56,9 +56,9 @@ device = (
|
|
56 |
else "cpu"
|
57 |
)
|
58 |
)
|
59 |
-
|
60 |
BandList = {
|
61 |
-
|
62 |
"Afterglow":["蘭","モカ","ひまり","巴","つぐみ"],
|
63 |
"HelloHappyWorld":["こころ","美咲","薫","花音","はぐみ"],
|
64 |
"PastelPalettes":["彩","日菜","千聖","イヴ","麻弥"],
|
@@ -86,9 +86,10 @@ def get_net_g(model_path: str, version: str, device: str, hps):
|
|
86 |
return net_g
|
87 |
|
88 |
def get_text(text, language_str, hps, device):
|
|
|
89 |
norm_text, phone, tone, word2ph = clean_text(text, language_str)
|
90 |
phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str)
|
91 |
-
|
92 |
if hps.data.add_blank:
|
93 |
phone = commons.intersperse(phone, 0)
|
94 |
tone = commons.intersperse(tone, 0)
|
@@ -157,10 +158,12 @@ def infer(
|
|
157 |
):
|
158 |
|
159 |
language= 'JP' if is_japanese(text) else 'ZH'
|
|
|
160 |
bert, ja_bert, en_bert, phones, tones, lang_ids = get_text(
|
161 |
text, language, hps, device
|
162 |
)
|
163 |
emo = get_emo_(reference_audio, emotion)
|
|
|
164 |
with torch.no_grad():
|
165 |
x_tst = phones.to(device).unsqueeze(0)
|
166 |
tones = tones.to(device).unsqueeze(0)
|
@@ -170,7 +173,6 @@ def infer(
|
|
170 |
en_bert = en_bert.to(device).unsqueeze(0)
|
171 |
x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
|
172 |
emo = emo.to(device).unsqueeze(0)
|
173 |
-
print(emo)
|
174 |
del phones
|
175 |
speakers = torch.LongTensor([hps.data.spk2id[sid]]).to(device)
|
176 |
audio = (
|
@@ -216,7 +218,7 @@ if __name__ == "__main__":
|
|
216 |
emotional_model = EmotionModel.from_pretrained(emotional_model_name).to(device)
|
217 |
languages = [ "Auto", "ZH", "JP"]
|
218 |
modelPaths = []
|
219 |
-
for dirpath, dirnames, filenames in os.walk(
|
220 |
for filename in filenames:
|
221 |
modelPaths.append(os.path.join(dirpath, filename))
|
222 |
hps = utils.get_hparams_from_file('Data/Bushiroad/configs/config.json')
|
|
|
56 |
else "cpu"
|
57 |
)
|
58 |
)
|
59 |
+
device = "cpu"
|
60 |
BandList = {
|
61 |
+
"PoppinParty":["香澄","有咲","たえ","りみ","沙綾"],
|
62 |
"Afterglow":["蘭","モカ","ひまり","巴","つぐみ"],
|
63 |
"HelloHappyWorld":["こころ","美咲","薫","花音","はぐみ"],
|
64 |
"PastelPalettes":["彩","日菜","千聖","イヴ","麻弥"],
|
|
|
86 |
return net_g
|
87 |
|
88 |
def get_text(text, language_str, hps, device):
|
89 |
+
# 在此处实现当前版本的get_text
|
90 |
norm_text, phone, tone, word2ph = clean_text(text, language_str)
|
91 |
phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str)
|
92 |
+
|
93 |
if hps.data.add_blank:
|
94 |
phone = commons.intersperse(phone, 0)
|
95 |
tone = commons.intersperse(tone, 0)
|
|
|
158 |
):
|
159 |
|
160 |
language= 'JP' if is_japanese(text) else 'ZH'
|
161 |
+
print(language)
|
162 |
bert, ja_bert, en_bert, phones, tones, lang_ids = get_text(
|
163 |
text, language, hps, device
|
164 |
)
|
165 |
emo = get_emo_(reference_audio, emotion)
|
166 |
+
print(emo)
|
167 |
with torch.no_grad():
|
168 |
x_tst = phones.to(device).unsqueeze(0)
|
169 |
tones = tones.to(device).unsqueeze(0)
|
|
|
173 |
en_bert = en_bert.to(device).unsqueeze(0)
|
174 |
x_tst_lengths = torch.LongTensor([phones.size(0)]).to(device)
|
175 |
emo = emo.to(device).unsqueeze(0)
|
|
|
176 |
del phones
|
177 |
speakers = torch.LongTensor([hps.data.spk2id[sid]]).to(device)
|
178 |
audio = (
|
|
|
218 |
emotional_model = EmotionModel.from_pretrained(emotional_model_name).to(device)
|
219 |
languages = [ "Auto", "ZH", "JP"]
|
220 |
modelPaths = []
|
221 |
+
for dirpath, dirnames, filenames in os.walk('Data/Bushiroad/models/'):
|
222 |
for filename in filenames:
|
223 |
modelPaths.append(os.path.join(dirpath, filename))
|
224 |
hps = utils.get_hparams_from_file('Data/Bushiroad/configs/config.json')
|