Spaces:
Paused
Paused
File size: 2,623 Bytes
7d05777 4d59a71 034f936 0d6259a b94855f 5ca9cb4 d97359f 034f936 9232118 5be0054 034f936 b94855f 034f936 53f6df5 bb14413 0d6259a 53f6df5 034f936 bb14413 034f936 e77a799 034f936 e77a799 034f936 4d59a71 5be0054 4d59a71 bb14413 4d59a71 5be0054 4d59a71 034f936 4d59a71 d97359f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import gradio as gr
import os
from coqui_tts import run_audio_generation_v1
from metaVoice import run_audio_generation_v2
import shutil
os.environ["COQUI_TOS_AGREED"] = "1"
def process_audio(input_text, speaker_audio, speaker_name, option_selected):
try:
# Ensure necessary directories exist
os.makedirs("./tmp/audio/input_src/", exist_ok=True)
os.makedirs("audio", exist_ok=True)
print(f"Received audio file: {speaker_audio}")
if not speaker_audio or not os.path.exists(speaker_audio):
return "Error: The uploaded audio file is missing or invalid. Please upload again."
# Copy speaker audio to the required location
speaker_audio_path = "./tmp/audio/input_src/0.wav"
if os.path.exists(speaker_audio):
shutil.copy(speaker_audio, speaker_audio_path)
else:
raise FileNotFoundError(f"Speaker audio file not found: {speaker_audio}")
# Check selected option and execute corresponding function
if option_selected == "Xtts_v2":
# Generate TTS audio using run_audio_generation_v1
run_audio_generation_v1(input_text)
elif option_selected =="metaVoice":
# return f"The option is {option_selected }not implemented yet."
run_audio_generation_v2(input_text)
print(os.listdir('./tmp/audio/'))
else:
return f"The option is not implemented yet."
# Save the output audio under the speaker's name
# speaker_output_path = f"audio/{speaker_name}.wav"
# os.rename("audio/output.wav", speaker_output_path)
return "./tmp/audio/generated-custom.wav"
except Exception as e:
return str(e)
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Audio Cloning and Text-to-Speech")
with gr.Row():
text_input = gr.Textbox(label="Input Text", placeholder="Enter your text here.")
with gr.Row():
speaker_audio = gr.Audio(label="Speaker Audio (to be cloned)", type='filepath', format='wav')
speaker_name = gr.Textbox(label="Speaker Name", placeholder="Enter the speaker's name.")
option_selected = gr.Dropdown(choices=["Xtts_v2", "metaVoice", "more"], label="Select an Option")
submit_btn = gr.Button("Submit")
output_audio = gr.Audio(label="Generated Audio Output", type='filepath')
submit_btn.click(
fn=process_audio,
inputs=[text_input, speaker_audio, speaker_name, option_selected],
outputs=output_audio,
)
# Launch the Gradio app
demo.launch()
|