Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -20,13 +20,10 @@ def generate_tts(text, temperature, repetition_penalty, speaker_selection, refer
|
|
20 |
interface = outetts.InterfaceHF(model_version="0.3", cfg=model_config)
|
21 |
|
22 |
try:
|
23 |
-
# Validate inputs for custom speaker
|
24 |
if reference_audio:
|
25 |
speaker = interface.create_speaker(reference_audio)
|
26 |
-
# Use selected default speaker
|
27 |
elif speaker_selection and speaker_selection != "None":
|
28 |
speaker = interface.load_default_speaker(speaker_selection)
|
29 |
-
# No speaker - random characteristics
|
30 |
else:
|
31 |
speaker = None
|
32 |
|
@@ -39,175 +36,58 @@ def generate_tts(text, temperature, repetition_penalty, speaker_selection, refer
|
|
39 |
)
|
40 |
output = interface.generate(config=gen_cfg)
|
41 |
|
42 |
-
# Verify output
|
43 |
if output.audio is None:
|
44 |
raise ValueError("Model failed to generate audio. This may be due to input length constraints or early EOS token.")
|
45 |
|
46 |
-
# Save and return output
|
47 |
output_path = "output.wav"
|
48 |
output.save(output_path)
|
49 |
return output_path, None
|
50 |
except Exception as e:
|
51 |
return None, str(e)
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
.container {
|
56 |
-
background: linear-gradient(145deg, #f3f4f6, #ffffff);
|
57 |
-
border-radius: 20px;
|
58 |
-
box-shadow: 10px 10px 20px #d1d1d1, -10px -10px 20px #ffffff;
|
59 |
-
padding: 2rem;
|
60 |
-
margin: 1rem;
|
61 |
-
transition: all 0.3s ease;
|
62 |
-
min-height: 800px;
|
63 |
-
width: 100%;
|
64 |
-
max-width: 1400px;
|
65 |
-
margin: 0 auto;
|
66 |
-
}
|
67 |
-
|
68 |
-
.title {
|
69 |
-
font-size: 2.5rem;
|
70 |
-
font-weight: bold;
|
71 |
-
color: #1a1a1a;
|
72 |
-
text-align: center;
|
73 |
-
margin-bottom: 2rem;
|
74 |
-
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.1);
|
75 |
-
}
|
76 |
-
|
77 |
-
.input-group {
|
78 |
-
background: #ffffff;
|
79 |
-
border-radius: 15px;
|
80 |
-
padding: 1.5rem;
|
81 |
-
margin: 1rem 0;
|
82 |
-
box-shadow: inset 5px 5px 10px #e0e0e0, inset -5px -5px 10px #ffffff;
|
83 |
-
}
|
84 |
-
|
85 |
-
.button-3d {
|
86 |
-
background: linear-gradient(145deg, #3b82f6, #2563eb);
|
87 |
-
color: white;
|
88 |
-
border: none;
|
89 |
-
padding: 0.8rem 1.5rem;
|
90 |
-
border-radius: 10px;
|
91 |
-
font-weight: bold;
|
92 |
-
cursor: pointer;
|
93 |
-
transition: all 0.3s ease;
|
94 |
-
box-shadow: 5px 5px 10px #d1d1d1, -5px -5px 10px #ffffff;
|
95 |
-
}
|
96 |
-
|
97 |
-
.button-3d:hover {
|
98 |
-
transform: translateY(-2px);
|
99 |
-
box-shadow: 7px 7px 15px #d1d1d1, -7px -7px 15px #ffffff;
|
100 |
-
}
|
101 |
-
|
102 |
-
.slider-3d {
|
103 |
-
height: 12px;
|
104 |
-
border-radius: 6px;
|
105 |
-
background: linear-gradient(145deg, #e6e7eb, #ffffff);
|
106 |
-
box-shadow: inset 3px 3px 6px #d1d1d1, inset -3px -3px 6px #ffffff;
|
107 |
-
}
|
108 |
-
|
109 |
-
.error-box {
|
110 |
-
background: #fee2e2;
|
111 |
-
border-left: 4px solid #ef4444;
|
112 |
-
padding: 1rem;
|
113 |
-
border-radius: 8px;
|
114 |
-
margin: 1rem 0;
|
115 |
-
}
|
116 |
-
|
117 |
-
.right-column {
|
118 |
-
display: flex;
|
119 |
-
flex-direction: column;
|
120 |
-
gap: 1rem;
|
121 |
-
}
|
122 |
-
|
123 |
-
.options-panel {
|
124 |
-
margin-top: 2rem;
|
125 |
-
background: linear-gradient(145deg, #f3f4f6, #ffffff);
|
126 |
-
border-radius: 15px;
|
127 |
-
padding: 2.5rem;
|
128 |
-
box-shadow: 5px 5px 10px #d1d1d1, -5px -5px 10px #ffffff;
|
129 |
-
display: flex;
|
130 |
-
flex-direction: column;
|
131 |
-
gap: 2rem;
|
132 |
-
}
|
133 |
-
|
134 |
-
.input-box {
|
135 |
-
padding: 1.5rem;
|
136 |
-
background: #ffffff;
|
137 |
-
border-radius: 12px;
|
138 |
-
margin-bottom: 1.5rem;
|
139 |
-
box-shadow: inset 3px 3px 7px #e0e0e0, inset -3px -3px 7px #ffffff;
|
140 |
-
}
|
141 |
-
|
142 |
-
.slider-container {
|
143 |
-
margin: 2rem 0;
|
144 |
-
}
|
145 |
-
|
146 |
-
.textbox-container {
|
147 |
-
min-height: 150px;
|
148 |
-
}
|
149 |
-
"""
|
150 |
-
|
151 |
-
# Create the Gradio interface with 3D styling
|
152 |
-
with gr.Blocks(css=custom_css) as demo:
|
153 |
-
gr.Markdown('<div class="title">Voice Clone Multilingual TTS</div>')
|
154 |
|
155 |
-
error_box = gr.Textbox(label="Error Messages", visible=False
|
156 |
|
157 |
-
with gr.Row(
|
158 |
-
# Left column for text input
|
159 |
with gr.Column(scale=1):
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
elem_classes="input-group",
|
165 |
-
lines=8
|
166 |
-
)
|
167 |
-
|
168 |
-
submit_button = gr.Button(
|
169 |
-
"Generate Speech",
|
170 |
-
elem_classes="button-3d"
|
171 |
)
|
|
|
|
|
172 |
|
173 |
-
|
174 |
-
with gr.Column(scale=1, elem_classes="right-column"):
|
175 |
-
# Audio output at the top
|
176 |
audio_output = gr.Audio(
|
177 |
label="Generated Audio",
|
178 |
-
type="filepath"
|
179 |
-
elem_classes="input-group"
|
180 |
)
|
181 |
|
182 |
-
|
183 |
-
with gr.Group(elem_classes="options-panel"):
|
184 |
speaker_dropdown = gr.Dropdown(
|
185 |
choices=get_available_speakers(),
|
186 |
value="en_male_1",
|
187 |
-
label="Speaker Selection"
|
188 |
-
elem_classes="input-group"
|
189 |
)
|
190 |
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
elem_classes="slider-3d"
|
197 |
-
)
|
198 |
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
elem_classes="slider-3d"
|
205 |
-
)
|
206 |
|
207 |
reference_audio = gr.Audio(
|
208 |
label="Reference Audio (for voice cloning)",
|
209 |
-
type="filepath"
|
210 |
-
elem_classes="input-group"
|
211 |
)
|
212 |
|
213 |
gr.Markdown("""
|
@@ -216,7 +96,7 @@ with gr.Blocks(css=custom_css) as demo:
|
|
216 |
- For transcription interface will use Whisper turbo to transcribe the audio file
|
217 |
- Longer audio clips will reduce maximum output length
|
218 |
- Custom speaker overrides speaker selection
|
219 |
-
"""
|
220 |
|
221 |
submit_button.click(
|
222 |
fn=generate_tts,
|
|
|
20 |
interface = outetts.InterfaceHF(model_version="0.3", cfg=model_config)
|
21 |
|
22 |
try:
|
|
|
23 |
if reference_audio:
|
24 |
speaker = interface.create_speaker(reference_audio)
|
|
|
25 |
elif speaker_selection and speaker_selection != "None":
|
26 |
speaker = interface.load_default_speaker(speaker_selection)
|
|
|
27 |
else:
|
28 |
speaker = None
|
29 |
|
|
|
36 |
)
|
37 |
output = interface.generate(config=gen_cfg)
|
38 |
|
|
|
39 |
if output.audio is None:
|
40 |
raise ValueError("Model failed to generate audio. This may be due to input length constraints or early EOS token.")
|
41 |
|
|
|
42 |
output_path = "output.wav"
|
43 |
output.save(output_path)
|
44 |
return output_path, None
|
45 |
except Exception as e:
|
46 |
return None, str(e)
|
47 |
|
48 |
+
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange") as demo:
|
49 |
+
gr.Markdown("# Voice Clone Multilingual TTS")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
+
error_box = gr.Textbox(label="Error Messages", visible=False)
|
52 |
|
53 |
+
with gr.Row():
|
|
|
54 |
with gr.Column(scale=1):
|
55 |
+
text_input = gr.Textbox(
|
56 |
+
label="Text to Synthesize",
|
57 |
+
placeholder="Enter text here...",
|
58 |
+
lines=8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
)
|
60 |
+
|
61 |
+
submit_button = gr.Button("Generate Speech")
|
62 |
|
63 |
+
with gr.Column(scale=1):
|
|
|
|
|
64 |
audio_output = gr.Audio(
|
65 |
label="Generated Audio",
|
66 |
+
type="filepath"
|
|
|
67 |
)
|
68 |
|
69 |
+
with gr.Group():
|
|
|
70 |
speaker_dropdown = gr.Dropdown(
|
71 |
choices=get_available_speakers(),
|
72 |
value="en_male_1",
|
73 |
+
label="Speaker Selection"
|
|
|
74 |
)
|
75 |
|
76 |
+
temperature = gr.Slider(
|
77 |
+
0.1, 1.0,
|
78 |
+
value=0.1,
|
79 |
+
label="Temperature (lower = more stable tone, higher = more expressive)"
|
80 |
+
)
|
|
|
|
|
81 |
|
82 |
+
repetition_penalty = gr.Slider(
|
83 |
+
0.5, 2.0,
|
84 |
+
value=1.1,
|
85 |
+
label="Repetition Penalty"
|
86 |
+
)
|
|
|
|
|
87 |
|
88 |
reference_audio = gr.Audio(
|
89 |
label="Reference Audio (for voice cloning)",
|
90 |
+
type="filepath"
|
|
|
91 |
)
|
92 |
|
93 |
gr.Markdown("""
|
|
|
96 |
- For transcription interface will use Whisper turbo to transcribe the audio file
|
97 |
- Longer audio clips will reduce maximum output length
|
98 |
- Custom speaker overrides speaker selection
|
99 |
+
""")
|
100 |
|
101 |
submit_button.click(
|
102 |
fn=generate_tts,
|