Spaces:
Running
Running
File size: 10,222 Bytes
5f76498 e976963 b3b1505 e976963 6718e39 e976963 10321f9 b3b1505 6718e39 e976963 6718e39 b3b1505 10321f9 b3b1505 60ca74a b3b1505 6718e39 835fd69 6718e39 085162d 6718e39 5f76498 085162d 4cf4bfb 6718e39 085162d 60ca74a 6718e39 b3b1505 60ca74a d105998 60ca74a 5277ceb 60ca74a 6718e39 085162d 6718e39 18f29ed d105998 67df0e3 6718e39 18f29ed e976963 b3b1505 60ca74a b3b1505 6718e39 2644ffe 6718e39 4b79e01 6718e39 d105998 6718e39 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
import os, sys
import gradio as gr
from audio_separator.separator import Separator
from main import song_cover_pipeline
from audio_effects import add_audio_effects
from modules.model_management import ignore_files, update_models_list, download_from_url, upload_zip_model, upload_separate_files
from modules.ui_updates import show_hop_slider, update_f0_method, update_button_text_voc, update_button_text_inst, swap_visibility, swap_buttons
from modules.file_processing import process_file_upload
import os
import re
import random
from scipy.io.wavfile import write
from scipy.io.wavfile import read
import numpy as np
import gradio as gr
import yt_dlp
import subprocess
separator = Separator()
# Setup directories
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
rvc_models_dir = os.path.join(BASE_DIR, 'rvc_models')
# Check for warnings
warning = sys.argv[1] if len(sys.argv) > 1 else 'False'
warning = True if warning == 'True' else False
# Initialize voice models
voice_models = ignore_files(rvc_models_dir)
UVR_5_MODELS = [
{"model_name": "BS-Roformer-Viperx-1297", "checkpoint": "model_bs_roformer_ep_317_sdr_12.9755.ckpt"},
{"model_name": "MDX23C-InstVoc HQ 2", "checkpoint": "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"},
{"model_name": "Kim Vocal 2", "checkpoint": "Kim_Vocal_2.onnx"},
{"model_name": "5_HP-Karaoke", "checkpoint": "5_HP-Karaoke-UVR.pth"},
{"model_name": "UVR-DeNoise by FoxJoy", "checkpoint": "UVR-DeNoise.pth"},
{"model_name": "UVR-DeEcho-DeReverb by FoxJoy", "checkpoint": "UVR-DeEcho-DeReverb.pth"},
]
def download_audio(url):
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'ytdl/%(title)s.%(ext)s',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
'preferredquality': '192',
}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
sample_rate, audio_data = read(file_path)
audio_array = np.asarray(audio_data, dtype=np.int16)
return sample_rate, audio_array
def inf_handler(audio, model_name):
model_found = False
for model_info in UVR_5_MODELS:
if model_info["model_name"] == model_name:
separator.load_model(model_info["checkpoint"])
model_found = True
break
if not model_found:
separator.load_model()
output_files = separator.separate(audio)
vocals = output_files[0]
inst = output_files[1]
return vocals, inst
def inference(audio, model_name):
output_data = inf_handler(audio, model_name)
vocals = output_data[0]
inst = output_data[1]
return vocals, inst
# Sample processing functions for audio and text inputs
def process_audio(audio_path):
return f"Audio file received: {audio_path}"
def process_text(text_input):
return f"Text received: {text_input}"
# Function to handle input type dynamically
def dynamic_input(input_type):
if input_type == "Audio Upload":
return gr.Audio(label='π΅ Upload Audio', interactive=True, type="filepath")
else:
return gr.Textbox(label="π Enter Text", placeholder="Type your text here...", interactive=True)
# Define the Gradio interface
with gr.Blocks(title="π€ RVC Inference", css="footer{display:none !important}") as app:
gr.Markdown("# RVC INFERENCE ")
# Show warning message if applicable
if warning:
gr.HTML("<center><h2>β οΈ This space is running slowly due to limited resources. Use the faster <a href='https://colab.research.google.com/drive/1HzuPgICRrjqUWQWb5Zn-l07m099-n-Nr'>Google Colab Notebook</a>.</h2></center>")
# Main Tab: Voice Conversion
with gr.Row():
rvc_model = gr.Dropdown(voice_models, label='ποΈ Voice Models')
output_format = gr.Dropdown(['mp3', 'flac', 'wav'], value='mp3', label='π§ Output Format')
pitch = gr.Slider(-12, 12, value=0, step=1, label='ποΈ Pitch Adjustment', info='-12 for deeper voice, +12 for higher voice')
with gr.Row():
refresh_btn = gr.Button('π Refresh Models')
generate_btn = gr.Button("πΌ Generate", variant="primary")
with gr.Tab("πΆ Voice Conversion"):
with gr.Column():
audio_input = gr.Audio(label='π΅ Upload Audio', interactive=True, type="filepath")
with gr.Accordion(" π§ Vocal Separator (UVR)"):
gr.Markdown("β Separate vocals and instruments from an audio file using UVR models.")
with gr.Accordion("π Separation by Link", open=False):
with gr.Row():
mdx23c_link = gr.Textbox(label="π Link",placeholder="π Paste the link here",interactive=True)
with gr.Row():
gr.Markdown("π‘ You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
with gr.Row():
mdx23c_download_button = gr.Button("β¬οΈ Download!",variant="primary")
uvr5_audio_file = gr.Audio(label=" π² Audio File",type="filepath")
with gr.Row():
uvr5_model = gr.Dropdown(label="γ½οΈ Model", choices=[model["model_name"] for model in UVR_5_MODELS])
uvr5_button = gr.Button("π§ Separate Vocals", variant="primary")
uvr5_output_inst = gr.Audio(type="filepath", label="Output 2",)
mdx23c_download_button.click(download_audio, [mdx23c_link], [uvr5_audio_file])
uvr5_button.click(inference, [uvr5_audio_file, uvr5_model], [uvr5_output_inst, audio_input])
with gr.Accordion('βοΈ Voice Conversion Settings', open=False):
use_hybrid_methods = gr.Checkbox(label="𧬠Use Hybrid Methods", value=False)
f0_method = gr.Dropdown(['rmvpe+', 'fcpe', 'rmvpe', 'mangio-crepe', 'crepe'], value='rmvpe+', label='π§ F0 Method')
use_hybrid_methods.change(update_f0_method, inputs=use_hybrid_methods, outputs=f0_method)
crepe_hop_length = gr.Slider(8, 512, value=128, step=8, label='ποΈ Crepe Hop Length')
f0_method.change(show_hop_slider, inputs=f0_method, outputs=crepe_hop_length)
with gr.Row():
index_rate = gr.Slider(0, 1, value=0, label=' π§ Index Rate', info='Controls the extent to which the index file influences the analysis results. A higher value increases the influence of the index file, but may amplify breathing artifacts in the audio. Choosing a lower value may help reduce artifacts.')
filter_radius = gr.Slider(0, 7, value=3, step=1, label='π‘ Filter Radius', info='Manages the radius of filtering the pitch analysis results. If the filtering value is three or higher, median filtering is applied to reduce breathing noise in the audio recording.')
rms_mix_rate = gr.Slider(0, 1, value=0.25, step=0.01, label='γ½οΈ RMS Mix Rate', info='Controls the extent to which the output signal is mixed with its envelope. A value close to 1 increases the use of the envelope of the output signal, which may improve sound quality.')
protect = gr.Slider(0, 0.5, value=0.33, step=0.01, label='β Consonant Protection', info='Controls the extent to which individual consonants and breathing sounds are protected from electroacoustic breaks and other artifacts. A maximum value of 0.5 provides the most protection, but may increase the indexing effect, which may negatively impact sound quality. Reducing the value may decrease the extent of protection, but reduce the indexing effect.')
converted_audio = gr.Audio(label='πΆ Converted Voice')
refresh_btn.click(update_models_list, None, outputs=rvc_model)
generate_btn.click(song_cover_pipeline, inputs=[audio_input, rvc_model, pitch, f0_method, crepe_hop_length, index_rate, filter_radius, rms_mix_rate, protect, output_format], outputs=[converted_audio])
# Merge/Process Tab
with gr.Tab('π Merge/Process'):
with gr.Row():
vocal_input = gr.Audio(label='π€ Vocals', interactive=True)
instrumental_input = gr.Audio(label='πΉ Instrumental', interactive=True)
with gr.Row():
process_btn = gr.Button("π¬ Process")
ai_cover_output = gr.Audio(label='π§ AI Cover')
process_btn.click(add_audio_effects, inputs=[vocal_input, instrumental_input], outputs=[ai_cover_output])
# Model Options Tab
with gr.Tab('π¦ Model Options'):
with gr.Tab('π Download Models'):
model_zip_link = gr.Text(label='π Enter Model Download Link')
model_name = gr.Text(label='π Model Name')
download_btn = gr.Button('β¬οΈ Download Model')
dl_output_message = gr.Text(label='π’ Output Message')
download_btn.click(download_from_url, inputs=[model_zip_link, model_name], outputs=dl_output_message)
with gr.Tab('π€ Upload ZIP Models'):
zip_file = gr.File(label='π Upload ZIP File', file_types=['.zip'])
zip_model_name = gr.Text(label='π Model Name')
upload_btn = gr.Button('π€ Upload Model')
upload_output_message = gr.Text(label='π’ Upload Status')
upload_btn.click(upload_zip_model, inputs=[zip_file, zip_model_name], outputs=upload_output_message)
with gr.Tab('π€ Upload Separate Files'):
pth_file = gr.File(label='π Upload .pth File', file_types=['.pth'])
index_file = gr.File(label='π Upload .index File', file_types=['.index'])
sep_model_name = gr.Text(label='π Model Name')
separate_upload_btn = gr.Button('π€ Upload Files')
separate_upload_output = gr.Text(label='π’ Upload Status')
separate_upload_btn.click(upload_separate_files, inputs=[pth_file, index_file, sep_model_name], outputs=separate_upload_output)
app.launch(share=True)
|