naonauno commited on
Commit
e366fb9
·
verified ·
1 Parent(s): 1b37547

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -87
app.py CHANGED
@@ -183,59 +183,54 @@ with gr.Blocks(title="Vevo Voice Conversion") as demo:
183
  label="Inference Mode",
184
  interactive=True
185
  )
186
-
187
- # Audio inputs
188
- content_audio = gr.Audio(
189
- label="Source Audio",
190
- type="filepath",
191
- visible=True,
192
- interactive=True
193
- )
194
-
195
- ref_style_audio = gr.Audio(
196
- label="Reference Style Audio",
197
- type="filepath",
198
- visible=True,
199
- interactive=True
200
- )
201
-
202
- ref_timbre_audio = gr.Audio(
203
- label="Reference Timbre Audio",
204
- type="filepath",
205
- visible=True,
206
- interactive=True
207
- )
208
-
209
- # Text inputs
210
- src_text = gr.Textbox(
211
- label="Source Text",
212
- placeholder="Enter text for TTS mode",
213
- visible=False,
214
- interactive=True
215
- )
216
-
217
- ref_text = gr.Textbox(
218
- label="Reference Style Text",
219
- placeholder="Optional: Enter reference text",
220
- visible=False,
221
- interactive=True
222
- )
223
-
224
- src_language = gr.Dropdown(
225
- choices=["en", "zh"],
226
- value="en",
227
- label="Source Language",
228
- visible=False,
229
- interactive=True
230
- )
231
-
232
- ref_language = gr.Dropdown(
233
- choices=["en", "zh"],
234
- value="en",
235
- label="Reference Language",
236
- visible=False,
237
- interactive=True
238
- )
239
 
240
  with gr.Row():
241
  steps = gr.Slider(
@@ -247,43 +242,22 @@ with gr.Blocks(title="Vevo Voice Conversion") as demo:
247
  )
248
 
249
  with gr.Row():
250
- submit_btn = gr.Button("Generate")
 
 
251
  output_audio = gr.Audio(label="Generated Audio")
252
 
253
- # Handle visibility of components based on mode
254
- def update_interface(mode):
255
- is_tts = mode == "tts"
256
- is_timbre = mode == "timbre"
257
-
258
- return [
259
- # Audio components visibility
260
- not is_tts, # content_audio
261
- not is_timbre and not is_tts, # ref_style_audio
262
- True, # ref_timbre_audio (always visible)
263
- # Text components visibility
264
- is_tts, # src_text
265
- is_tts, # ref_text
266
- is_tts, # src_language
267
- is_tts # ref_language
268
- ]
269
-
270
- mode.change(
271
- fn=update_interface,
272
- inputs=[mode],
273
- outputs=[
274
- content_audio,
275
- ref_style_audio,
276
- ref_timbre_audio,
277
- src_text,
278
- ref_text,
279
- src_language,
280
- ref_language
281
- ]
282
- )
283
 
284
- # Handle generation
285
  submit_btn.click(
286
- fn=process_audio,
287
  inputs=[
288
  mode,
289
  content_audio,
@@ -295,8 +269,33 @@ with gr.Blocks(title="Vevo Voice Conversion") as demo:
295
  ref_language,
296
  steps
297
  ],
298
- outputs=output_audio
299
  )
300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  if __name__ == "__main__":
302
  demo.queue().launch()
 
183
  label="Inference Mode",
184
  interactive=True
185
  )
186
+
187
+ with gr.Tabs():
188
+ with gr.TabItem("Audio Inputs"):
189
+ content_audio = gr.Audio(
190
+ label="Source Audio",
191
+ type="filepath",
192
+ interactive=True
193
+ )
194
+
195
+ ref_style_audio = gr.Audio(
196
+ label="Reference Style Audio",
197
+ type="filepath",
198
+ interactive=True
199
+ )
200
+
201
+ ref_timbre_audio = gr.Audio(
202
+ label="Reference Timbre Audio",
203
+ type="filepath",
204
+ interactive=True
205
+ )
206
+
207
+ with gr.TabItem("Text Inputs (TTS Mode)"):
208
+ src_text = gr.Textbox(
209
+ label="Source Text",
210
+ placeholder="Enter text for TTS mode",
211
+ interactive=True
212
+ )
213
+
214
+ ref_text = gr.Textbox(
215
+ label="Reference Style Text (Optional)",
216
+ placeholder="Enter reference text",
217
+ interactive=True
218
+ )
219
+
220
+ with gr.Row():
221
+ src_language = gr.Dropdown(
222
+ choices=["en", "zh"],
223
+ value="en",
224
+ label="Source Language",
225
+ interactive=True
226
+ )
227
+
228
+ ref_language = gr.Dropdown(
229
+ choices=["en", "zh"],
230
+ value="en",
231
+ label="Reference Language",
232
+ interactive=True
233
+ )
 
 
 
 
 
234
 
235
  with gr.Row():
236
  steps = gr.Slider(
 
242
  )
243
 
244
  with gr.Row():
245
+ with gr.Column():
246
+ submit_btn = gr.Button("Generate")
247
+ error_box = gr.Textbox(label="Status", interactive=False)
248
  output_audio = gr.Audio(label="Generated Audio")
249
 
250
+ def process_with_error_handling(*args):
251
+ try:
252
+ result = process_audio(*args)
253
+ error_box.update(value="Success!")
254
+ return [result, "Success!"]
255
+ except Exception as e:
256
+ error_msg = str(e)
257
+ return [None, error_msg]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
 
259
  submit_btn.click(
260
+ fn=process_with_error_handling,
261
  inputs=[
262
  mode,
263
  content_audio,
 
269
  ref_language,
270
  steps
271
  ],
272
+ outputs=[output_audio, error_box]
273
  )
274
 
275
+ # Example usage text
276
+ gr.Markdown("""
277
+ ## Quick Start Guide
278
+
279
+ 1. Select your mode:
280
+ - **Voice**: Full voice conversion (style + timbre)
281
+ - **Timbre**: Only voice timbre conversion
282
+ - **TTS**: Text-to-speech with voice cloning
283
+
284
+ 2. For Voice/Timbre modes:
285
+ - Upload source audio (what you want to convert)
286
+ - Upload reference audio(s)
287
+
288
+ 3. For TTS mode:
289
+ - Enter your text
290
+ - Select language
291
+ - Upload reference audio(s)
292
+
293
+ 4. Adjust steps slider:
294
+ - Higher values = better quality but slower
295
+ - Lower values = faster but lower quality
296
+
297
+ 5. Click Generate and wait for processing
298
+ """)
299
+
300
  if __name__ == "__main__":
301
  demo.queue().launch()