Plachta commited on
Commit
743b0e1
1 Parent(s): 12a0f39

更新了参数调整功能

Browse files
Files changed (1) hide show
  1. app.py +14 -4
app.py CHANGED
@@ -34,7 +34,7 @@ net_g = SynthesizerTrn(
34
  **hps.model)
35
  _ = net_g.eval()
36
 
37
- _ = utils.load_checkpoint("pretrained_models/G_1153000.pth", net_g, None)
38
 
39
  title = "Umamusume voice synthesizer \n 赛马娘语音合成器"
40
  description = """
@@ -47,8 +47,11 @@ If your input language is not Japanese, it will be translated to Japanese by Goo
47
  article = """
48
 
49
  """
 
 
 
50
 
51
- def infer(text, character, language):
52
  if language == '日本語':
53
  pass
54
  elif language == '简体中文':
@@ -61,7 +64,7 @@ def infer(text, character, language):
61
  x_tst = stn_tst.unsqueeze(0)
62
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
63
  sid = torch.LongTensor([char_id])
64
- audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=0.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.cpu().float().numpy()
65
  return (text,(22050, audio))
66
 
67
  # We instantiate the Textbox class
@@ -96,5 +99,12 @@ examples = [['お疲れ様です,トレーナーさん。', '1:无声铃鹿',
96
  ['授業中に出しだら,学校生活終わるですわ。', '12:目白麦昆','日本語'],
97
  ['お帰りなさい,お兄様!', '29:米浴','日本語'],
98
  ['私の処女をもらっでください!', '29:米浴','日本語']]
99
- gr.Interface(fn=infer, inputs=[textbox, char_dropdown, language_dropdown], outputs=["text","audio"],
 
 
 
 
 
 
 
100
  title=title, description=description, article=article, examples = examples).launch()
 
34
  **hps.model)
35
  _ = net_g.eval()
36
 
37
+ _ = utils.load_checkpoint("pretrained_models/uma_1153000.pth", net_g, None)
38
 
39
  title = "Umamusume voice synthesizer \n 赛马娘语音合成器"
40
  description = """
 
47
  article = """
48
 
49
  """
50
+ duration_slider = gr.Slider(minimum=0.1, maximum=5, value=1, step=0.1, label='时长 Duration')
51
+ noise_scale_slider = gr.Slider(minimum=0.1, maximum=5, value=0.667, step=0.001, label='噪声比例 noise_scale')
52
+ noise_scale_w_slider = gr.Slider(minimum=0.1, maximum=5, value=0.8, step=0.1, label='噪声偏差 noise_scale_w')
53
 
54
+ def infer(text, character, language, duration, noise_scale, noise_scale_w):
55
  if language == '日本語':
56
  pass
57
  elif language == '简体中文':
 
64
  x_tst = stn_tst.unsqueeze(0)
65
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
66
  sid = torch.LongTensor([char_id])
67
+ audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=duration)[0][0,0].data.cpu().float().numpy()
68
  return (text,(22050, audio))
69
 
70
  # We instantiate the Textbox class
 
99
  ['授業中に出しだら,学校生活終わるですわ。', '12:目白麦昆','日本語'],
100
  ['お帰りなさい,お兄様!', '29:米浴','日本語'],
101
  ['私の処女をもらっでください!', '29:米浴','日本語']]
102
+ gr.Interface(fn=infer, inputs=[
103
+ textbox,
104
+ char_dropdown,
105
+ language_dropdown,
106
+ duration_slider,
107
+ noise_scale_slider,
108
+ noise_scale_w_slider,
109
+ ], outputs=["text","audio"],
110
  title=title, description=description, article=article, examples = examples).launch()