openvoice2

Running

App Files Files Community

poemsforaphrodite commited on May 29

Commit

02cd175

•

1 Parent(s): 9c0d38a

Upload openvoice_app.py with huggingface_hub

Browse files

Files changed (1) hide show

openvoice_app.py +11 -12

openvoice_app.py CHANGED Viewed

@@ -2,15 +2,18 @@ import os
 import torch
 import argparse
 import gradio as gr
-from zipfile import ZipFile
 import langid
 from openvoice import se_extractor
 from openvoice.api import BaseSpeakerTTS, ToneColorConverter
 parser = argparse.ArgumentParser()
 parser.add_argument("--share", action='store_true', default=False, help="make link public")
 args = parser.parse_args()
 en_ckpt_base = 'base_speakers/EN'
 zh_ckpt_base = 'base_speakers/ZH'
 ckpt_converter = 'converter'
@@ -43,7 +46,6 @@ def predict(prompt, style, audio_file_pth):
     if language_predicted not in supported_languages:
         text_hint += f"[ERROR] The detected language {language_predicted} is not supported. Supported languages: {supported_languages}\n"
-        gr.Warning(f"The detected language {language_predicted} is not supported. Supported languages: {supported_languages}")
         return text_hint, None, None
     if language_predicted == "zh":
@@ -52,7 +54,6 @@ def predict(prompt, style, audio_file_pth):
         language = 'Chinese'
         if style != 'default':
             text_hint += f"[ERROR] The style {style} is not supported for Chinese. Supported style: 'default'\n"
-            gr.Warning(f"The style {style} is not supported for Chinese. Supported style: 'default'")
             return text_hint, None, None
     else:
         tts_model = en_base_speaker_tts
@@ -60,23 +61,19 @@ def predict(prompt, style, audio_file_pth):
         language = 'English'
         if style not in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']:
             text_hint += f"[ERROR] The style {style} is not supported for English. Supported styles: ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']\n"
-            gr.Warning(f"The style {style} is not supported for English. Supported styles: ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']")
             return text_hint, None, None
     if len(prompt) < 2:
         text_hint += "[ERROR] Please provide a longer prompt text.\n"
-        gr.Warning("Please provide a longer prompt text.")
         return text_hint, None, None
     if len(prompt) > 200:
         text_hint += "[ERROR] Text length limited to 200 characters. Please try shorter text.\n"
-        gr.Warning("Text length limited to 200 characters. Please try shorter text.")
         return text_hint, None, None
     try:
         target_se, audio_name = se_extractor.get_se(audio_file_pth, tone_color_converter, target_dir='processed', vad=True)
     except Exception as e:
         text_hint += f"[ERROR] Error extracting tone color: {str(e)}\n"
-        gr.Warning(f"[ERROR] Error extracting tone color: {str(e)}")
         return text_hint, None, None
     src_path = f'{output_dir}/tmp.wav'
@@ -91,8 +88,7 @@ def predict(prompt, style, audio_file_pth):
 title = "MyShell OpenVoice"
 with gr.Blocks(gr.themes.Glass()) as demo:
     with gr.Row():
         with gr.Column():
@@ -104,7 +100,7 @@ with gr.Blocks(gr.themes.Glass()) as demo:
             style_gr = gr.Dropdown(
                 label="Style",
                 choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
-                info="Please upload a reference audio file, it should be 1 minute long and clear.",
                 max_choices=1,
                 value="default",
             )
@@ -122,5 +118,8 @@ with gr.Blocks(gr.themes.Glass()) as demo:
             tts_button.click(predict, [input_text_gr, style_gr, ref_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
-demo.queue()
-demo.launch(debug=True, show_api=True, share=args.share)

 import torch
 import argparse
 import gradio as gr
 import langid
 from openvoice import se_extractor
 from openvoice.api import BaseSpeakerTTS, ToneColorConverter
+from dotenv import load_dotenv
+# Argument parsing
 parser = argparse.ArgumentParser()
 parser.add_argument("--share", action='store_true', default=False, help="make link public")
 args = parser.parse_args()
+load_dotenv()
+# Paths and device setup
 en_ckpt_base = 'base_speakers/EN'
 zh_ckpt_base = 'base_speakers/ZH'
 ckpt_converter = 'converter'
     if language_predicted not in supported_languages:
         text_hint += f"[ERROR] The detected language {language_predicted} is not supported. Supported languages: {supported_languages}\n"
         return text_hint, None, None
     if language_predicted == "zh":
         language = 'Chinese'
         if style != 'default':
             text_hint += f"[ERROR] The style {style} is not supported for Chinese. Supported style: 'default'\n"
             return text_hint, None, None
     else:
         tts_model = en_base_speaker_tts
         language = 'English'
         if style not in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']:
             text_hint += f"[ERROR] The style {style} is not supported for English. Supported styles: ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']\n"
             return text_hint, None, None
     if len(prompt) < 2:
         text_hint += "[ERROR] Please provide a longer prompt text.\n"
         return text_hint, None, None
     if len(prompt) > 200:
         text_hint += "[ERROR] Text length limited to 200 characters. Please try shorter text.\n"
         return text_hint, None, None
     try:
         target_se, audio_name = se_extractor.get_se(audio_file_pth, tone_color_converter, target_dir='processed', vad=True)
     except Exception as e:
         text_hint += f"[ERROR] Error extracting tone color: {str(e)}\n"
         return text_hint, None, None
     src_path = f'{output_dir}/tmp.wav'
 title = "MyShell OpenVoice"
+# Gradio interface setup
 with gr.Blocks(gr.themes.Glass()) as demo:
     with gr.Row():
         with gr.Column():
             style_gr = gr.Dropdown(
                 label="Style",
                 choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
+                info="Please upload a reference audio file that is atleast 1 minute long. For best results, ensure the audio is clear. You can use Adobe Podcast Enhance(https://podcast.adobe.com/enhance) to improve the audio quality before uploading.",
                 max_choices=1,
                 value="default",
             )
             tts_button.click(predict, [input_text_gr, style_gr, ref_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
+demo.queue()
+demo.launch(debug=True, show_api=False, share=args.share)
+# Hide Gradio footer
+css = "footer {visibility: hidden}"