poemsforaphrodite commited on
Commit
02cd175
1 Parent(s): 9c0d38a

Upload openvoice_app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. openvoice_app.py +11 -12
openvoice_app.py CHANGED
@@ -2,15 +2,18 @@ import os
2
  import torch
3
  import argparse
4
  import gradio as gr
5
- from zipfile import ZipFile
6
  import langid
7
  from openvoice import se_extractor
8
  from openvoice.api import BaseSpeakerTTS, ToneColorConverter
 
9
 
 
10
  parser = argparse.ArgumentParser()
11
  parser.add_argument("--share", action='store_true', default=False, help="make link public")
12
  args = parser.parse_args()
 
13
 
 
14
  en_ckpt_base = 'base_speakers/EN'
15
  zh_ckpt_base = 'base_speakers/ZH'
16
  ckpt_converter = 'converter'
@@ -43,7 +46,6 @@ def predict(prompt, style, audio_file_pth):
43
 
44
  if language_predicted not in supported_languages:
45
  text_hint += f"[ERROR] The detected language {language_predicted} is not supported. Supported languages: {supported_languages}\n"
46
- gr.Warning(f"The detected language {language_predicted} is not supported. Supported languages: {supported_languages}")
47
  return text_hint, None, None
48
 
49
  if language_predicted == "zh":
@@ -52,7 +54,6 @@ def predict(prompt, style, audio_file_pth):
52
  language = 'Chinese'
53
  if style != 'default':
54
  text_hint += f"[ERROR] The style {style} is not supported for Chinese. Supported style: 'default'\n"
55
- gr.Warning(f"The style {style} is not supported for Chinese. Supported style: 'default'")
56
  return text_hint, None, None
57
  else:
58
  tts_model = en_base_speaker_tts
@@ -60,23 +61,19 @@ def predict(prompt, style, audio_file_pth):
60
  language = 'English'
61
  if style not in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']:
62
  text_hint += f"[ERROR] The style {style} is not supported for English. Supported styles: ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']\n"
63
- gr.Warning(f"The style {style} is not supported for English. Supported styles: ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']")
64
  return text_hint, None, None
65
 
66
  if len(prompt) < 2:
67
  text_hint += "[ERROR] Please provide a longer prompt text.\n"
68
- gr.Warning("Please provide a longer prompt text.")
69
  return text_hint, None, None
70
  if len(prompt) > 200:
71
  text_hint += "[ERROR] Text length limited to 200 characters. Please try shorter text.\n"
72
- gr.Warning("Text length limited to 200 characters. Please try shorter text.")
73
  return text_hint, None, None
74
 
75
  try:
76
  target_se, audio_name = se_extractor.get_se(audio_file_pth, tone_color_converter, target_dir='processed', vad=True)
77
  except Exception as e:
78
  text_hint += f"[ERROR] Error extracting tone color: {str(e)}\n"
79
- gr.Warning(f"[ERROR] Error extracting tone color: {str(e)}")
80
  return text_hint, None, None
81
 
82
  src_path = f'{output_dir}/tmp.wav'
@@ -91,8 +88,7 @@ def predict(prompt, style, audio_file_pth):
91
 
92
  title = "MyShell OpenVoice"
93
 
94
-
95
-
96
  with gr.Blocks(gr.themes.Glass()) as demo:
97
  with gr.Row():
98
  with gr.Column():
@@ -104,7 +100,7 @@ with gr.Blocks(gr.themes.Glass()) as demo:
104
  style_gr = gr.Dropdown(
105
  label="Style",
106
  choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
107
- info="Please upload a reference audio file, it should be 1 minute long and clear.",
108
  max_choices=1,
109
  value="default",
110
  )
@@ -122,5 +118,8 @@ with gr.Blocks(gr.themes.Glass()) as demo:
122
 
123
  tts_button.click(predict, [input_text_gr, style_gr, ref_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
124
 
125
- demo.queue()
126
- demo.launch(debug=True, show_api=True, share=args.share)
 
 
 
 
2
  import torch
3
  import argparse
4
  import gradio as gr
 
5
  import langid
6
  from openvoice import se_extractor
7
  from openvoice.api import BaseSpeakerTTS, ToneColorConverter
8
+ from dotenv import load_dotenv
9
 
10
+ # Argument parsing
11
  parser = argparse.ArgumentParser()
12
  parser.add_argument("--share", action='store_true', default=False, help="make link public")
13
  args = parser.parse_args()
14
+ load_dotenv()
15
 
16
+ # Paths and device setup
17
  en_ckpt_base = 'base_speakers/EN'
18
  zh_ckpt_base = 'base_speakers/ZH'
19
  ckpt_converter = 'converter'
 
46
 
47
  if language_predicted not in supported_languages:
48
  text_hint += f"[ERROR] The detected language {language_predicted} is not supported. Supported languages: {supported_languages}\n"
 
49
  return text_hint, None, None
50
 
51
  if language_predicted == "zh":
 
54
  language = 'Chinese'
55
  if style != 'default':
56
  text_hint += f"[ERROR] The style {style} is not supported for Chinese. Supported style: 'default'\n"
 
57
  return text_hint, None, None
58
  else:
59
  tts_model = en_base_speaker_tts
 
61
  language = 'English'
62
  if style not in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']:
63
  text_hint += f"[ERROR] The style {style} is not supported for English. Supported styles: ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']\n"
 
64
  return text_hint, None, None
65
 
66
  if len(prompt) < 2:
67
  text_hint += "[ERROR] Please provide a longer prompt text.\n"
 
68
  return text_hint, None, None
69
  if len(prompt) > 200:
70
  text_hint += "[ERROR] Text length limited to 200 characters. Please try shorter text.\n"
 
71
  return text_hint, None, None
72
 
73
  try:
74
  target_se, audio_name = se_extractor.get_se(audio_file_pth, tone_color_converter, target_dir='processed', vad=True)
75
  except Exception as e:
76
  text_hint += f"[ERROR] Error extracting tone color: {str(e)}\n"
 
77
  return text_hint, None, None
78
 
79
  src_path = f'{output_dir}/tmp.wav'
 
88
 
89
  title = "MyShell OpenVoice"
90
 
91
+ # Gradio interface setup
 
92
  with gr.Blocks(gr.themes.Glass()) as demo:
93
  with gr.Row():
94
  with gr.Column():
 
100
  style_gr = gr.Dropdown(
101
  label="Style",
102
  choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
103
+ info="Please upload a reference audio file that is atleast 1 minute long. For best results, ensure the audio is clear. You can use Adobe Podcast Enhance(https://podcast.adobe.com/enhance) to improve the audio quality before uploading.",
104
  max_choices=1,
105
  value="default",
106
  )
 
118
 
119
  tts_button.click(predict, [input_text_gr, style_gr, ref_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
120
 
121
+ demo.queue()
122
+ demo.launch(debug=True, show_api=False, share=args.share)
123
+
124
+ # Hide Gradio footer
125
+ css = "footer {visibility: hidden}"