File size: 2,436 Bytes
7d05777
4d59a71
034f936
b94855f
9232118
034f936
 
 
 
 
 
9232118
 
 
 
 
5be0054
034f936
b94855f
 
 
 
034f936
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d59a71
 
 
 
 
 
 
 
 
5be0054
4d59a71
 
034f936
4d59a71
 
 
5be0054
4d59a71
 
034f936
4d59a71
 
 
 
 
f8a14a5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
import os
from coqui_tts import run_audio_generation_v1
import shutil
os.environ["COQUI_LICENSE"] = "1"  # Automatically agrees to the non-commercial license

def process_audio(input_text, speaker_audio, speaker_name, option_selected):
    try:
        # Ensure necessary directories exist
        os.makedirs("./tmp/audio/input_src/", exist_ok=True)
        os.makedirs("audio", exist_ok=True)
        print(f"Received audio file: {speaker_audio}")

        if not speaker_audio or not os.path.exists(speaker_audio):
            return "Error: The uploaded audio file is missing or invalid. Please upload again."

        # Copy speaker audio to the required location
        speaker_audio_path = "./tmp/audio/input_src/0.wav"
        if os.path.exists(speaker_audio):
            shutil.copy(speaker_audio, speaker_audio_path)
        else:
            raise FileNotFoundError(f"Speaker audio file not found: {speaker_audio}")
        
        # Check selected option and execute corresponding function
        if option_selected == "Xtts_v2":
            # Generate TTS audio using run_audio_generation_v1
            run_audio_generation_v1(input_text)
        else:
            return f"The option '{option_selected}' is not implemented yet."
        
        # Save the output audio under the speaker's name
        speaker_output_path = f"audio/{speaker_name}.wav"
        os.rename("audio/output.wav", speaker_output_path)
        
        return speaker_output_path
    
    except Exception as e:
        return str(e)

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Audio Cloning and Text-to-Speech")

    with gr.Row():
        text_input = gr.Textbox(label="Input Text", placeholder="Enter your text here.")
    
    with gr.Row():
        speaker_audio = gr.Audio(label="Speaker Audio (to be cloned)", type='filepath', format='wav')
        speaker_name = gr.Textbox(label="Speaker Name", placeholder="Enter the speaker's name.")

    option_selected = gr.Dropdown(choices=["Xtts_v2", "metaVoice(not working at the moment)", "more"], label="Select an Option")

    submit_btn = gr.Button("Submit")

    output_audio = gr.Audio(label="Generated Audio Output", type='filepath')

    submit_btn.click(
        fn=process_audio,
        inputs=[text_input, speaker_audio, speaker_name, option_selected],
        outputs=output_audio,
    )

# Launch the Gradio app
demo.launch(share=True)