File size: 2,623 Bytes
7d05777
4d59a71
034f936
0d6259a
b94855f
5ca9cb4
 
d97359f
034f936
 
 
 
 
 
9232118
 
 
 
 
5be0054
034f936
b94855f
 
 
 
034f936
 
 
 
 
53f6df5
bb14413
0d6259a
 
53f6df5
034f936
bb14413
034f936
 
e77a799
 
034f936
e77a799
034f936
 
 
4d59a71
 
 
 
 
 
 
 
 
5be0054
4d59a71
 
bb14413
4d59a71
 
 
5be0054
4d59a71
 
034f936
4d59a71
 
 
 
 
d97359f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import gradio as gr
import os
from coqui_tts import run_audio_generation_v1
from metaVoice import run_audio_generation_v2
import shutil


os.environ["COQUI_TOS_AGREED"] = "1"

def process_audio(input_text, speaker_audio, speaker_name, option_selected):
    try:
        # Ensure necessary directories exist
        os.makedirs("./tmp/audio/input_src/", exist_ok=True)
        os.makedirs("audio", exist_ok=True)
        print(f"Received audio file: {speaker_audio}")

        if not speaker_audio or not os.path.exists(speaker_audio):
            return "Error: The uploaded audio file is missing or invalid. Please upload again."

        # Copy speaker audio to the required location
        speaker_audio_path = "./tmp/audio/input_src/0.wav"
        if os.path.exists(speaker_audio):
            shutil.copy(speaker_audio, speaker_audio_path)
        else:
            raise FileNotFoundError(f"Speaker audio file not found: {speaker_audio}")
        
        # Check selected option and execute corresponding function
        if option_selected == "Xtts_v2":
            # Generate TTS audio using run_audio_generation_v1
            run_audio_generation_v1(input_text)
            
        elif option_selected =="metaVoice":
            # return f"The option is {option_selected }not implemented yet."
            run_audio_generation_v2(input_text)
            print(os.listdir('./tmp/audio/'))
        else:
            return f"The option is not implemented yet."
        
        # Save the output audio under the speaker's name
        # speaker_output_path = f"audio/{speaker_name}.wav"
        # os.rename("audio/output.wav", speaker_output_path)
        
        return "./tmp/audio/generated-custom.wav"
    
    except Exception as e:
        return str(e)

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Audio Cloning and Text-to-Speech")

    with gr.Row():
        text_input = gr.Textbox(label="Input Text", placeholder="Enter your text here.")
    
    with gr.Row():
        speaker_audio = gr.Audio(label="Speaker Audio (to be cloned)", type='filepath', format='wav')
        speaker_name = gr.Textbox(label="Speaker Name", placeholder="Enter the speaker's name.")

    option_selected = gr.Dropdown(choices=["Xtts_v2", "metaVoice", "more"], label="Select an Option")

    submit_btn = gr.Button("Submit")

    output_audio = gr.Audio(label="Generated Audio Output", type='filepath')

    submit_btn.click(
        fn=process_audio,
        inputs=[text_input, speaker_audio, speaker_name, option_selected],
        outputs=output_audio,
    )

# Launch the Gradio app
demo.launch()