admin commited on
Commit
4ee714e
·
1 Parent(s): 685acde
Files changed (1) hide show
  1. app.py +64 -33
app.py CHANGED
@@ -101,17 +101,13 @@ def tts_fn(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
101
 
102
  def text_splitter(text: str):
103
  punctuation = r"[。,;,!,?,〜,\n,\r,\t,.,!,;,?,~, ]"
104
- # 使用正则表达式根据标点符号分割文本,并忽略重叠的分隔符
105
  sentences = re.split(punctuation, text.strip())
106
- # 过滤掉空字符串
107
  return [sentence.strip() for sentence in sentences if sentence.strip()]
108
 
109
 
110
  def concatenate_audios(audio_samples, sample_rate=44100):
111
  half_second_silence = np.zeros(int(sample_rate / 2))
112
- # 初始化最终的音频数组
113
  final_audio = audio_samples[0]
114
- # 遍历音频样本列表,并将它们连接起来,每个样本之间插入半秒钟的静音
115
  for sample in audio_samples[1:]:
116
  final_audio = np.concatenate((final_audio, half_second_silence, sample))
117
 
@@ -121,19 +117,18 @@ def concatenate_audios(audio_samples, sample_rate=44100):
121
 
122
  def read_text(file_path: str):
123
  try:
124
- # 打开文件并读取内容
125
  with open(file_path, "r", encoding="utf-8") as file:
126
  content = file.read()
127
  return content
128
 
129
  except FileNotFoundError:
130
- print(f"文件未找到: {file_path}")
131
 
132
  except IOError:
133
- print(f"读取文件时发生错误: {file_path}")
134
 
135
  except Exception as e:
136
- print(f"发生未知错误: {e}")
137
 
138
 
139
  def infer_tab1(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
@@ -215,62 +210,98 @@ if __name__ == "__main__":
215
  with gr.Blocks() as app:
216
  gr.Markdown(
217
  """
218
- <center>
219
- 欢迎使用此创空间, 此创空间基于 <a href="https://github.com/fishaudio/Bert-VITS2">Bert-vits2</a> 开源项目制作,完全免费。使用此创空间必须遵守当地相关法律法规,禁止用其从事任何违法犯罪活动。首次推理需耗时下载模型,还请耐心等待。另外,移至最底端有原理浅讲。
220
- </center>
221
- """
222
  )
223
 
224
- with gr.Tab("输入模式"):
225
  gr.Interface(
226
- fn=infer_tab2, # 使用 text_to_speech 函数
227
  inputs=[
228
- gr.TextArea(label="请输入简体中文文案", show_copy_button=True),
229
- gr.Dropdown(choices=speakers, value="莱依拉", label="角色"),
 
 
 
 
230
  gr.Slider(
231
- minimum=0, maximum=1, value=0.2, step=0.1, label="语调调节"
232
- ), # SDP/DP混合比
 
 
 
 
233
  gr.Slider(
234
- minimum=0.1, maximum=2, value=0.6, step=0.1, label="感情调节"
 
 
 
 
235
  ),
236
  gr.Slider(
237
- minimum=0.1, maximum=2, value=0.8, step=0.1, label="音素长度"
 
 
 
 
238
  ),
239
  gr.Slider(
240
- minimum=0.1, maximum=2, value=1, step=0.1, label="生成时长"
 
 
 
 
241
  ),
242
  ],
243
- outputs=gr.Audio(label="输出音频"),
244
  flagging_mode="never",
245
  concurrency_limit=4,
246
  )
247
 
248
- with gr.Tab("上传模式"):
249
  gr.Interface(
250
- fn=infer_tab1, # 使用 text_to_speech 函数
251
  inputs=[
252
  gr.components.File(
253
- label="请上传简体中文TXT文案",
254
  type="filepath",
255
  file_types=[".txt"],
256
  ),
257
- gr.Dropdown(choices=speakers, value="莱依拉", label="角色"),
258
  gr.Slider(
259
- minimum=0, maximum=1, value=0.2, step=0.1, label="语调调节"
260
- ), # SDP/DP混合比
 
 
 
 
261
  gr.Slider(
262
- minimum=0.1, maximum=2, value=0.6, step=0.1, label="感情调节"
 
 
 
 
263
  ),
264
  gr.Slider(
265
- minimum=0.1, maximum=2, value=0.8, step=0.1, label="音素长度"
 
 
 
 
266
  ),
267
  gr.Slider(
268
- minimum=0.1, maximum=2, value=1, step=0.1, label="生成时长"
 
 
 
 
269
  ),
270
  ],
271
  outputs=[
272
- gr.Audio(label="输出音频"),
273
- gr.TextArea(label="文案提取结果", show_copy_button=True),
 
 
 
274
  ],
275
  flagging_mode="never",
276
  concurrency_limit=4,
 
101
 
102
  def text_splitter(text: str):
103
  punctuation = r"[。,;,!,?,〜,\n,\r,\t,.,!,;,?,~, ]"
 
104
  sentences = re.split(punctuation, text.strip())
 
105
  return [sentence.strip() for sentence in sentences if sentence.strip()]
106
 
107
 
108
  def concatenate_audios(audio_samples, sample_rate=44100):
109
  half_second_silence = np.zeros(int(sample_rate / 2))
 
110
  final_audio = audio_samples[0]
 
111
  for sample in audio_samples[1:]:
112
  final_audio = np.concatenate((final_audio, half_second_silence, sample))
113
 
 
117
 
118
  def read_text(file_path: str):
119
  try:
 
120
  with open(file_path, "r", encoding="utf-8") as file:
121
  content = file.read()
122
  return content
123
 
124
  except FileNotFoundError:
125
+ print(f"File Not Found: {file_path}")
126
 
127
  except IOError:
128
+ print(f"An error occurred reading the file: {file_path}")
129
 
130
  except Exception as e:
131
+ print(f"An unknown error has occurred: {e}")
132
 
133
 
134
  def infer_tab1(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
 
210
  with gr.Blocks() as app:
211
  gr.Markdown(
212
  """
213
+ Welcome to the Space, which is based on the open source project <a href="https://github.com/fishaudio/Bert-VITS2">Bert-vits2</a>, and moved to the bottom for an explanation of the principle. This Space must be used in accordance with local laws and regulations, prohibiting the use of it for any criminal activities."""
 
 
 
214
  )
215
 
216
+ with gr.Tab("Input Mode"):
217
  gr.Interface(
218
+ fn=infer_tab2,
219
  inputs=[
220
+ gr.TextArea(
221
+ label="Please input the Simplified Chinese text",
222
+ placeholder="The first inference takes time to download the model, so be patient.",
223
+ show_copy_button=True,
224
+ ),
225
+ gr.Dropdown(choices=speakers, value="莱依拉", label="Role"),
226
  gr.Slider(
227
+ minimum=0,
228
+ maximum=1,
229
+ value=0.2,
230
+ step=0.1,
231
+ label="Modulation of intonation",
232
+ ), # SDP/DP Mix Ratio
233
  gr.Slider(
234
+ minimum=0.1,
235
+ maximum=2,
236
+ value=0.6,
237
+ step=0.1,
238
+ label="Emotional adjustment",
239
  ),
240
  gr.Slider(
241
+ minimum=0.1,
242
+ maximum=2,
243
+ value=0.8,
244
+ step=0.1,
245
+ label="Phoneme length",
246
  ),
247
  gr.Slider(
248
+ minimum=0.1,
249
+ maximum=2,
250
+ value=1,
251
+ step=0.1,
252
+ label="Output duration",
253
  ),
254
  ],
255
+ outputs=gr.Audio(label="Output Audio"),
256
  flagging_mode="never",
257
  concurrency_limit=4,
258
  )
259
 
260
+ with gr.Tab("Upload Mode"):
261
  gr.Interface(
262
+ fn=infer_tab1, # Use text_to_speech func
263
  inputs=[
264
  gr.components.File(
265
+ label="Please upload a simplified Chinese TXT",
266
  type="filepath",
267
  file_types=[".txt"],
268
  ),
269
+ gr.Dropdown(choices=speakers, value="莱依拉", label="Role"),
270
  gr.Slider(
271
+ minimum=0,
272
+ maximum=1,
273
+ value=0.2,
274
+ step=0.1,
275
+ label="Modulation of intonation",
276
+ ),
277
  gr.Slider(
278
+ minimum=0.1,
279
+ maximum=2,
280
+ value=0.6,
281
+ step=0.1,
282
+ label="Emotional adjustment",
283
  ),
284
  gr.Slider(
285
+ minimum=0.1,
286
+ maximum=2,
287
+ value=0.8,
288
+ step=0.1,
289
+ label="Phoneme length",
290
  ),
291
  gr.Slider(
292
+ minimum=0.1,
293
+ maximum=2,
294
+ value=1,
295
+ step=0.1,
296
+ label="Output duration",
297
  ),
298
  ],
299
  outputs=[
300
+ gr.Audio(label="Output Audio"),
301
+ gr.TextArea(
302
+ label="Result of TXT extraction",
303
+ show_copy_button=True,
304
+ ),
305
  ],
306
  flagging_mode="never",
307
  concurrency_limit=4,