Mahiruoshi commited on
Commit
cb9e52f
·
1 Parent(s): d0a23bd

更新全员模型

Browse files
Files changed (1) hide show
  1. app.py +28 -13
app.py CHANGED
@@ -58,8 +58,19 @@ device = (
58
  )
59
 
60
  BandList = {
 
 
 
 
 
 
 
61
  "MyGo":["燈","愛音","そよ","立希","楽奈"],
62
- "AveMujica":["祥子","睦","海鈴","にゃむ","初華"]
 
 
 
 
63
  }
64
 
65
  def get_net_g(model_path: str, version: str, device: str, hps):
@@ -77,7 +88,7 @@ def get_net_g(model_path: str, version: str, device: str, hps):
77
  def get_text(text, language_str, hps, device):
78
  norm_text, phone, tone, word2ph = clean_text(text, language_str)
79
  phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str)
80
- print(text)
81
  if hps.data.add_blank:
82
  phone = commons.intersperse(phone, 0)
83
  tone = commons.intersperse(tone, 0)
@@ -114,11 +125,12 @@ def get_text(text, language_str, hps, device):
114
  return bert, ja_bert, en_bert, phone, tone, language
115
 
116
  def get_emo_(reference_audio, emotion):
117
- emo = (
118
- torch.from_numpy(get_emo(reference_audio))
119
- if reference_audio
120
- else torch.Tensor([emotion])
121
- )
 
122
  return emo
123
 
124
  def get_emo(path):
@@ -202,15 +214,15 @@ if __name__ == "__main__":
202
  REPO_ID = "audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim"
203
  emotional_processor = Wav2Vec2Processor.from_pretrained(emotional_model_name)
204
  emotional_model = EmotionModel.from_pretrained(emotional_model_name).to(device)
205
- hps = utils.get_hparams_from_file('Data/BanGDream/configs/config.json')
206
  net_g = get_net_g(
207
- model_path='Data/BanGDream/models/G_132000.pth', version="2.1", device=device, hps=hps
208
  )
209
  speaker_ids = hps.data.spk2id
210
  speakers = list(speaker_ids.keys())
211
  languages = [ "Auto", "ZH", "JP"]
212
  modelPaths = []
213
- for dirpath, dirnames, filenames in os.walk("Data/BanGDream/models/"):
214
  for filename in filenames:
215
  modelPaths.append(os.path.join(dirpath, filename))
216
  with gr.Blocks() as app:
@@ -219,7 +231,7 @@ if __name__ == "__main__":
219
  for name in BandList[band]:
220
  with gr.TabItem(name):
221
  classifiedPaths = []
222
- for dirpath, dirnames, filenames in os.walk("Data/BanGDream/classifedSample/"+name):
223
  for filename in filenames:
224
  classifiedPaths.append(os.path.join(dirpath, filename))
225
  with gr.Row():
@@ -234,7 +246,7 @@ if __name__ == "__main__":
234
  minimum=0.1, maximum=2, value=1, step=0.01, label="语速调节"
235
  )
236
  emotion = gr.Slider(
237
- minimum=-10, maximum=10, value=0, step=0.1, label="Emotion"
238
  )
239
  with gr.Accordion(label="参数设定", open=False):
240
  sdp_ratio = gr.Slider(
@@ -260,7 +272,10 @@ if __name__ == "__main__":
260
  placeholder="输入纯日语或者中文",
261
  value="为什么要演奏春日影!",
262
  )
263
- reference_audio = gr.Dropdown(label = "情感参考", choices = classifiedPaths, value = classifiedPaths[0], type = "value")
 
 
 
264
  btn = gr.Button("点击生成", variant="primary")
265
  audio_output = gr.Audio(label="Output Audio")
266
  '''
 
58
  )
59
 
60
  BandList = {
61
+ "PoppinParty":["香澄","有咲","たえ","りみ","沙綾"],
62
+ "Afterglow":["蘭","モカ","ひまり","巴","つぐみ"],
63
+ "HelloHappyWorld":["こころ","美咲","薫","花音","はぐみ"],
64
+ "PastelPalettes":["彩","日菜","千聖","イヴ","麻弥"],
65
+ "Roselia":["友希那","紗夜","リサ","燐子","あこ"],
66
+ "RaiseASuilen":["レイヤ","ロック","ますき","チュチュ","パレオ"],
67
+ "Morfonica":["ましろ","瑠唯","つくし","七深","透子"],
68
  "MyGo":["燈","愛音","そよ","立希","楽奈"],
69
+ "AveMujica":["祥子","睦","海鈴","にゃむ","初華"],
70
+ "圣翔音乐学园":["華戀","光","香子","雙葉","真晝","純那","克洛迪娜","真矢","奈奈"],
71
+ "凛明馆女子学校":["珠緒","壘","文","悠悠子","一愛"],
72
+ "弗隆提亚艺术学校":["艾露","艾露露","菈樂菲","司","靜羽"],
73
+ "西克菲尔特音乐学院":["晶","未知留","八千代","栞","美帆"]
74
  }
75
 
76
  def get_net_g(model_path: str, version: str, device: str, hps):
 
88
  def get_text(text, language_str, hps, device):
89
  norm_text, phone, tone, word2ph = clean_text(text, language_str)
90
  phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str)
91
+ #print(text)
92
  if hps.data.add_blank:
93
  phone = commons.intersperse(phone, 0)
94
  tone = commons.intersperse(tone, 0)
 
125
  return bert, ja_bert, en_bert, phone, tone, language
126
 
127
  def get_emo_(reference_audio, emotion):
128
+
129
+ if (emotion == 10 and reference_audio):
130
+ emo = torch.from_numpy(get_emo(reference_audio))
131
+ else:
132
+ emo = torch.Tensor([emotion])
133
+
134
  return emo
135
 
136
  def get_emo(path):
 
214
  REPO_ID = "audeering/wav2vec2-large-robust-12-ft-emotion-msp-dim"
215
  emotional_processor = Wav2Vec2Processor.from_pretrained(emotional_model_name)
216
  emotional_model = EmotionModel.from_pretrained(emotional_model_name).to(device)
217
+ hps = utils.get_hparams_from_file('Data/Bushiroad/configs/config.json')
218
  net_g = get_net_g(
219
+ model_path='Data/Bushiroad/models/G_29000.pth', version="2.1", device=device, hps=hps
220
  )
221
  speaker_ids = hps.data.spk2id
222
  speakers = list(speaker_ids.keys())
223
  languages = [ "Auto", "ZH", "JP"]
224
  modelPaths = []
225
+ for dirpath, dirnames, filenames in os.walk("Data/Bushiroad/models/"):
226
  for filename in filenames:
227
  modelPaths.append(os.path.join(dirpath, filename))
228
  with gr.Blocks() as app:
 
231
  for name in BandList[band]:
232
  with gr.TabItem(name):
233
  classifiedPaths = []
234
+ for dirpath, dirnames, filenames in os.walk("Data/Bushiroad/classifedSample/"+name):
235
  for filename in filenames:
236
  classifiedPaths.append(os.path.join(dirpath, filename))
237
  with gr.Row():
 
246
  minimum=0.1, maximum=2, value=1, step=0.01, label="语速调节"
247
  )
248
  emotion = gr.Slider(
249
+ minimum=-10, maximum=10, value=0, step=0.1, label="Emotion参数(调至10开启音频参考,如不启动则设为0)"
250
  )
251
  with gr.Accordion(label="参数设定", open=False):
252
  sdp_ratio = gr.Slider(
 
272
  placeholder="输入纯日语或者中文",
273
  value="为什么要演奏春日影!",
274
  )
275
+ try:
276
+ reference_audio = gr.Dropdown(label = "情感参考", choices = classifiedPaths, value = classifiedPaths[0], type = "value")
277
+ except:
278
+ reference_audio = gr.Audio(label="情感参考音频)", type="filepath")
279
  btn = gr.Button("点击生成", variant="primary")
280
  audio_output = gr.Audio(label="Output Audio")
281
  '''