File size: 10,222 Bytes
5f76498
e976963
b3b1505
e976963
 
6718e39
 
e976963
10321f9
 
 
 
 
 
 
 
 
b3b1505
 
 
6718e39
e976963
 
 
6718e39
 
 
 
 
 
 
b3b1505
 
 
 
 
 
 
 
 
 
 
10321f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3b1505
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60ca74a
 
 
 
 
 
 
 
 
 
 
 
 
 
b3b1505
 
6718e39
 
835fd69
6718e39
 
 
 
 
085162d
6718e39
 
 
5f76498
085162d
 
4cf4bfb
 
6718e39
085162d
60ca74a
6718e39
b3b1505
60ca74a
 
 
 
 
 
 
 
 
 
 
 
 
 
d105998
60ca74a
 
 
 
 
 
 
 
 
 
5277ceb
60ca74a
 
 
6718e39
 
 
 
085162d
6718e39
18f29ed
 
 
 
 
 
d105998
 
 
 
67df0e3
6718e39
18f29ed
e976963
b3b1505
60ca74a
b3b1505
 
 
6718e39
 
2644ffe
6718e39
 
 
4b79e01
6718e39
d105998
6718e39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import os, sys
import gradio as gr
from audio_separator.separator import Separator
from main import song_cover_pipeline
from audio_effects import add_audio_effects
from modules.model_management import ignore_files, update_models_list, download_from_url, upload_zip_model, upload_separate_files
from modules.ui_updates import show_hop_slider, update_f0_method, update_button_text_voc, update_button_text_inst, swap_visibility, swap_buttons
from modules.file_processing import process_file_upload
import os
import re
import random
from scipy.io.wavfile import write
from scipy.io.wavfile import read
import numpy as np
import gradio as gr
import yt_dlp
import subprocess

separator = Separator()

# Setup directories
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
rvc_models_dir = os.path.join(BASE_DIR, 'rvc_models')

# Check for warnings
warning = sys.argv[1] if len(sys.argv) > 1 else 'False'
warning = True if warning == 'True' else False

# Initialize voice models
voice_models = ignore_files(rvc_models_dir)



UVR_5_MODELS = [
    {"model_name": "BS-Roformer-Viperx-1297", "checkpoint": "model_bs_roformer_ep_317_sdr_12.9755.ckpt"},
    {"model_name": "MDX23C-InstVoc HQ 2", "checkpoint": "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"},
    {"model_name": "Kim Vocal 2", "checkpoint": "Kim_Vocal_2.onnx"},
    {"model_name": "5_HP-Karaoke", "checkpoint": "5_HP-Karaoke-UVR.pth"},
    {"model_name": "UVR-DeNoise by FoxJoy", "checkpoint": "UVR-DeNoise.pth"},
    {"model_name": "UVR-DeEcho-DeReverb by FoxJoy", "checkpoint": "UVR-DeEcho-DeReverb.pth"},
]



def download_audio(url):
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': 'ytdl/%(title)s.%(ext)s',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'wav',
            'preferredquality': '192',
        }],
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(url, download=True)
        file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
        sample_rate, audio_data = read(file_path)
        audio_array = np.asarray(audio_data, dtype=np.int16)

        return sample_rate, audio_array




def inf_handler(audio, model_name): 
    model_found = False
    for model_info in UVR_5_MODELS:
        if model_info["model_name"] == model_name:
            separator.load_model(model_info["checkpoint"])
            model_found = True
            break
    if not model_found:
        separator.load_model()
    output_files = separator.separate(audio)
    vocals = output_files[0]
    inst = output_files[1]
    return vocals, inst


def inference(audio, model_name):
        output_data = inf_handler(audio, model_name)
        vocals = output_data[0]
        inst = output_data[1]

        return vocals, inst


# Sample processing functions for audio and text inputs
def process_audio(audio_path):
    return f"Audio file received: {audio_path}"

def process_text(text_input):
    return f"Text received: {text_input}"

# Function to handle input type dynamically
def dynamic_input(input_type):
    if input_type == "Audio Upload":
        return gr.Audio(label='🎡 Upload Audio', interactive=True, type="filepath")
    else:
        return gr.Textbox(label="πŸ“ Enter Text", placeholder="Type your text here...", interactive=True)



# Define the Gradio interface
with gr.Blocks(title="🎀 RVC Inference", css="footer{display:none !important}") as app:
    gr.Markdown("# RVC INFERENCE ")
    # Show warning message if applicable
    if warning:
        gr.HTML("<center><h2>⚠️ This space is running slowly due to limited resources. Use the faster <a href='https://colab.research.google.com/drive/1HzuPgICRrjqUWQWb5Zn-l07m099-n-Nr'>Google Colab Notebook</a>.</h2></center>")
    
    # Main Tab: Voice Conversion
    with gr.Row():
            rvc_model = gr.Dropdown(voice_models, label='πŸŽ™οΈ Voice Models')
            output_format = gr.Dropdown(['mp3', 'flac', 'wav'], value='mp3', label='🎧 Output Format')
            pitch = gr.Slider(-12, 12, value=0, step=1, label='🎚️ Pitch Adjustment', info='-12 for deeper voice, +12 for higher voice')

    with gr.Row():
        refresh_btn = gr.Button('πŸ”„ Refresh Models')
        generate_btn = gr.Button("🎼 Generate", variant="primary")
       

        
    with gr.Tab("🎢 Voice Conversion"): 
        with gr.Column():
            audio_input = gr.Audio(label='🎡 Upload Audio', interactive=True, type="filepath")


        with gr.Accordion(" πŸ”§ Vocal Separator (UVR)"):
            gr.Markdown("β­• Separate vocals and instruments from an audio file using UVR models.")
            
            with gr.Accordion("πŸ”— Separation by Link", open=False):
                with gr.Row():
                    mdx23c_link = gr.Textbox(label="πŸ”— Link",placeholder="πŸ“‹ Paste the link here",interactive=True)
                with gr.Row():
                    gr.Markdown("πŸ’‘ You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
                with gr.Row():
                    mdx23c_download_button = gr.Button("⬇️ Download!",variant="primary")
            
            uvr5_audio_file = gr.Audio(label=" πŸ“² Audio File",type="filepath")
            
            with gr.Row():
                uvr5_model = gr.Dropdown(label="〽️ Model", choices=[model["model_name"] for model in UVR_5_MODELS])
                uvr5_button = gr.Button("πŸ”§ Separate Vocals", variant="primary")
            uvr5_output_inst = gr.Audio(type="filepath", label="Output 2",)




        mdx23c_download_button.click(download_audio, [mdx23c_link], [uvr5_audio_file])

        uvr5_button.click(inference, [uvr5_audio_file, uvr5_model], [uvr5_output_inst, audio_input])


    
        with gr.Accordion('βš™οΈ Voice Conversion Settings', open=False):
            use_hybrid_methods = gr.Checkbox(label="🧬 Use Hybrid Methods", value=False)
            f0_method = gr.Dropdown(['rmvpe+', 'fcpe', 'rmvpe', 'mangio-crepe', 'crepe'], value='rmvpe+', label='πŸ”§ F0 Method')
            use_hybrid_methods.change(update_f0_method, inputs=use_hybrid_methods, outputs=f0_method)
            crepe_hop_length = gr.Slider(8, 512, value=128, step=8, label='πŸŽ›οΈ Crepe Hop Length')
            f0_method.change(show_hop_slider, inputs=f0_method, outputs=crepe_hop_length)
            with gr.Row():
                index_rate = gr.Slider(0, 1, value=0, label=' πŸ”§ Index Rate', info='Controls the extent to which the index file influences the analysis results. A higher value increases the influence of the index file, but may amplify breathing artifacts in the audio. Choosing a lower value may help reduce artifacts.')
                filter_radius = gr.Slider(0, 7, value=3, step=1, label='πŸ“‘ Filter Radius', info='Manages the radius of filtering the pitch analysis results. If the filtering value is three or higher, median filtering is applied to reduce breathing noise in the audio recording.')
                rms_mix_rate = gr.Slider(0, 1, value=0.25, step=0.01, label='〽️ RMS Mix Rate', info='Controls the extent to which the output signal is mixed with its envelope. A value close to 1 increases the use of the envelope of the output signal, which may improve sound quality.')
                protect = gr.Slider(0, 0.5, value=0.33, step=0.01, label='β›” Consonant Protection', info='Controls the extent to which individual consonants and breathing sounds are protected from electroacoustic breaks and other artifacts. A maximum value of 0.5 provides the most protection, but may increase the indexing effect, which may negatively impact sound quality. Reducing the value may decrease the extent of protection, but reduce the indexing effect.')


        converted_audio = gr.Audio(label='🎢 Converted Voice')


        
        refresh_btn.click(update_models_list, None, outputs=rvc_model)
        generate_btn.click(song_cover_pipeline, inputs=[audio_input, rvc_model, pitch, f0_method, crepe_hop_length, index_rate, filter_radius, rms_mix_rate, protect,  output_format], outputs=[converted_audio])


    


    
    # Merge/Process Tab
    with gr.Tab('πŸ”„ Merge/Process'):
        with gr.Row():
            vocal_input = gr.Audio(label='🎀 Vocals', interactive=True)
            instrumental_input = gr.Audio(label='🎹 Instrumental', interactive=True)
        
        with gr.Row():
            process_btn = gr.Button("🎬 Process")
        ai_cover_output = gr.Audio(label='🎧 AI Cover')

        process_btn.click(add_audio_effects, inputs=[vocal_input, instrumental_input], outputs=[ai_cover_output])

    # Model Options Tab
    with gr.Tab('πŸ“¦ Model Options'):
        with gr.Tab('πŸ”— Download Models'):
            model_zip_link = gr.Text(label='πŸ”— Enter Model Download Link')
            model_name = gr.Text(label='πŸ“ Model Name')
            download_btn = gr.Button('⬇️ Download Model')
            dl_output_message = gr.Text(label='πŸ“’ Output Message')
            download_btn.click(download_from_url, inputs=[model_zip_link, model_name], outputs=dl_output_message)

        with gr.Tab('πŸ“€ Upload ZIP Models'):
            zip_file = gr.File(label='πŸ“ Upload ZIP File', file_types=['.zip'])
            zip_model_name = gr.Text(label='πŸ“ Model Name')
            upload_btn = gr.Button('πŸ“€ Upload Model')
            upload_output_message = gr.Text(label='πŸ“’ Upload Status')
            upload_btn.click(upload_zip_model, inputs=[zip_file, zip_model_name], outputs=upload_output_message)

        with gr.Tab('πŸ“€ Upload Separate Files'):
            pth_file = gr.File(label='πŸ“‚ Upload .pth File', file_types=['.pth'])
            index_file = gr.File(label='πŸ“‚ Upload .index File', file_types=['.index'])
            sep_model_name = gr.Text(label='πŸ“ Model Name')
            separate_upload_btn = gr.Button('πŸ“€ Upload Files')
            separate_upload_output = gr.Text(label='πŸ“’ Upload Status')
            separate_upload_btn.click(upload_separate_files, inputs=[pth_file, index_file, sep_model_name], outputs=separate_upload_output)

app.launch(share=True)