Spaces:

Genius-Society
/

hoyoTTS

Running

App Files Files Community

admin commited on 8 days ago

Commit

4ee714e

1 Parent(s): 685acde

2 en

Browse files

Files changed (1) hide show

app.py +64 -33

app.py CHANGED Viewed

@@ -101,17 +101,13 @@ def tts_fn(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
 def text_splitter(text: str):
     punctuation = r"[。,；,！,？,〜,\n,\r,\t,.,!,;,?,~, ]"
-    # 使用正则表达式根据标点符号分割文本，并忽略重叠的分隔符
     sentences = re.split(punctuation, text.strip())
-    # 过滤掉空字符串
     return [sentence.strip() for sentence in sentences if sentence.strip()]
 def concatenate_audios(audio_samples, sample_rate=44100):
     half_second_silence = np.zeros(int(sample_rate / 2))
-    # 初始化最终的音频数组
     final_audio = audio_samples[0]
-    # 遍历音频样本列表，并将它们连接起来，每个样本之间插入半秒钟的静音
     for sample in audio_samples[1:]:
         final_audio = np.concatenate((final_audio, half_second_silence, sample))
@@ -121,19 +117,18 @@ def concatenate_audios(audio_samples, sample_rate=44100):
 def read_text(file_path: str):
     try:
-        # 打开文件并读取内容
         with open(file_path, "r", encoding="utf-8") as file:
             content = file.read()
             return content
     except FileNotFoundError:
-        print(f"文件未找到: {file_path}")
     except IOError:
-        print(f"读取文件时发生错误: {file_path}")
     except Exception as e:
-        print(f"发生未知错误: {e}")
 def infer_tab1(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
@@ -215,62 +210,98 @@ if __name__ == "__main__":
     with gr.Blocks() as app:
         gr.Markdown(
             """
-<center>
-欢迎使用此创空间, 此创空间基于 <a href="https://github.com/fishaudio/Bert-VITS2">Bert-vits2</a> 开源项目制作，完全免费。使用此创空间必须遵守当地相关法律法规，禁止用其从事任何违法犯罪活动。首次推理需耗时下载模型，还请耐心等待。另外，移至最底端有原理浅讲。
-</center>
-"""
         )
-        with gr.Tab("输入模式"):
             gr.Interface(
-                fn=infer_tab2,  # 使用 text_to_speech 函数
                 inputs=[
-                    gr.TextArea(label="请输入简体中文文案", show_copy_button=True),
-                    gr.Dropdown(choices=speakers, value="莱依拉", label="角色"),
                     gr.Slider(
-                        minimum=0, maximum=1, value=0.2, step=0.1, label="语调调节"
-                    ),  # SDP/DP混合比
                     gr.Slider(
-                        minimum=0.1, maximum=2, value=0.6, step=0.1, label="感情调节"
                     ),
                     gr.Slider(
-                        minimum=0.1, maximum=2, value=0.8, step=0.1, label="音素长度"
                     ),
                     gr.Slider(
-                        minimum=0.1, maximum=2, value=1, step=0.1, label="生成时长"
                     ),
                 ],
-                outputs=gr.Audio(label="输出音频"),
                 flagging_mode="never",
                 concurrency_limit=4,
             )
-        with gr.Tab("上传模式"):
             gr.Interface(
-                fn=infer_tab1,  # 使用 text_to_speech 函数
                 inputs=[
                     gr.components.File(
-                        label="请上传简体中文TXT文案",
                         type="filepath",
                         file_types=[".txt"],
                     ),
-                    gr.Dropdown(choices=speakers, value="莱依拉", label="角色"),
                     gr.Slider(
-                        minimum=0, maximum=1, value=0.2, step=0.1, label="语调调节"
-                    ),  # SDP/DP混合比
                     gr.Slider(
-                        minimum=0.1, maximum=2, value=0.6, step=0.1, label="感情调节"
                     ),
                     gr.Slider(
-                        minimum=0.1, maximum=2, value=0.8, step=0.1, label="音素长度"
                     ),
                     gr.Slider(
-                        minimum=0.1, maximum=2, value=1, step=0.1, label="生成时长"
                     ),
                 ],
                 outputs=[
-                    gr.Audio(label="输出音频"),
-                    gr.TextArea(label="文案提取结果", show_copy_button=True),
                 ],
                 flagging_mode="never",
                 concurrency_limit=4,

 def text_splitter(text: str):
     punctuation = r"[。,；,！,？,〜,\n,\r,\t,.,!,;,?,~, ]"
     sentences = re.split(punctuation, text.strip())
     return [sentence.strip() for sentence in sentences if sentence.strip()]
 def concatenate_audios(audio_samples, sample_rate=44100):
     half_second_silence = np.zeros(int(sample_rate / 2))
     final_audio = audio_samples[0]
     for sample in audio_samples[1:]:
         final_audio = np.concatenate((final_audio, half_second_silence, sample))
 def read_text(file_path: str):
     try:
         with open(file_path, "r", encoding="utf-8") as file:
             content = file.read()
             return content
     except FileNotFoundError:
+        print(f"File Not Found: {file_path}")
     except IOError:
+        print(f"An error occurred reading the file: {file_path}")
     except Exception as e:
+        print(f"An unknown error has occurred: {e}")
 def infer_tab1(text, speaker, sdp_ratio, noise_scale, noise_scale_w, length_scale):
     with gr.Blocks() as app:
         gr.Markdown(
             """
+Welcome to the Space, which is based on the open source project <a href="https://github.com/fishaudio/Bert-VITS2">Bert-vits2</a>, and moved to the bottom for an explanation of the principle. This Space must be used in accordance with local laws and regulations, prohibiting the use of it for any criminal activities."""
         )
+        with gr.Tab("Input Mode"):
             gr.Interface(
+                fn=infer_tab2,
                 inputs=[
+                    gr.TextArea(
+                        label="Please input the Simplified Chinese text",
+                        placeholder="The first inference takes time to download the model, so be patient.",
+                        show_copy_button=True,
+                    ),
+                    gr.Dropdown(choices=speakers, value="莱依拉", label="Role"),
                     gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        value=0.2,
+                        step=0.1,
+                        label="Modulation of intonation",
+                    ),  # SDP/DP Mix Ratio
                     gr.Slider(
+                        minimum=0.1,
+                        maximum=2,
+                        value=0.6,
+                        step=0.1,
+                        label="Emotional adjustment",
                     ),
                     gr.Slider(
+                        minimum=0.1,
+                        maximum=2,
+                        value=0.8,
+                        step=0.1,
+                        label="Phoneme length",
                     ),
                     gr.Slider(
+                        minimum=0.1,
+                        maximum=2,
+                        value=1,
+                        step=0.1,
+                        label="Output duration",
                     ),
                 ],
+                outputs=gr.Audio(label="Output Audio"),
                 flagging_mode="never",
                 concurrency_limit=4,
             )
+        with gr.Tab("Upload Mode"):
             gr.Interface(
+                fn=infer_tab1,  # Use text_to_speech func
                 inputs=[
                     gr.components.File(
+                        label="Please upload a simplified Chinese TXT",
                         type="filepath",
                         file_types=[".txt"],
                     ),
+                    gr.Dropdown(choices=speakers, value="莱依拉", label="Role"),
                     gr.Slider(
+                        minimum=0,
+                        maximum=1,
+                        value=0.2,
+                        step=0.1,
+                        label="Modulation of intonation",
+                    ),
                     gr.Slider(
+                        minimum=0.1,
+                        maximum=2,
+                        value=0.6,
+                        step=0.1,
+                        label="Emotional adjustment",
                     ),
                     gr.Slider(
+                        minimum=0.1,
+                        maximum=2,
+                        value=0.8,
+                        step=0.1,
+                        label="Phoneme length",
                     ),
                     gr.Slider(
+                        minimum=0.1,
+                        maximum=2,
+                        value=1,
+                        step=0.1,
+                        label="Output duration",
                     ),
                 ],
                 outputs=[
+                    gr.Audio(label="Output Audio"),
+                    gr.TextArea(
+                        label="Result of TXT extraction",
+                        show_copy_button=True,
+                    ),
                 ],
                 flagging_mode="never",
                 concurrency_limit=4,