CoverGen-RVC

Running

App Files Files Community

CoverGen-RVC / src /covergen.py

nevreal

Update src/covergen.py

4cf4bfb verified 3 months ago

raw

history blame contribute delete

10.2 kB

	import os, sys
	import gradio as gr
	from audio_separator.separator import Separator
	from main import song_cover_pipeline
	from audio_effects import add_audio_effects
	from modules.model_management import ignore_files, update_models_list, download_from_url, upload_zip_model, upload_separate_files
	from modules.ui_updates import show_hop_slider, update_f0_method, update_button_text_voc, update_button_text_inst, swap_visibility, swap_buttons
	from modules.file_processing import process_file_upload
	import os
	import re
	import random
	from scipy.io.wavfile import write
	from scipy.io.wavfile import read
	import numpy as np
	import gradio as gr
	import yt_dlp
	import subprocess

	separator = Separator()

	# Setup directories
	BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
	rvc_models_dir = os.path.join(BASE_DIR, 'rvc_models')

	# Check for warnings
	warning = sys.argv[1] if len(sys.argv) > 1 else 'False'
	warning = True if warning == 'True' else False

	# Initialize voice models
	voice_models = ignore_files(rvc_models_dir)



	UVR_5_MODELS = [
	{"model_name": "BS-Roformer-Viperx-1297", "checkpoint": "model_bs_roformer_ep_317_sdr_12.9755.ckpt"},
	{"model_name": "MDX23C-InstVoc HQ 2", "checkpoint": "MDX23C-8KFFT-InstVoc_HQ_2.ckpt"},
	{"model_name": "Kim Vocal 2", "checkpoint": "Kim_Vocal_2.onnx"},
	{"model_name": "5_HP-Karaoke", "checkpoint": "5_HP-Karaoke-UVR.pth"},
	{"model_name": "UVR-DeNoise by FoxJoy", "checkpoint": "UVR-DeNoise.pth"},
	{"model_name": "UVR-DeEcho-DeReverb by FoxJoy", "checkpoint": "UVR-DeEcho-DeReverb.pth"},
	]



	def download_audio(url):
	ydl_opts = {
	'format': 'bestaudio/best',
	'outtmpl': 'ytdl/%(title)s.%(ext)s',
	'postprocessors': [{
	'key': 'FFmpegExtractAudio',
	'preferredcodec': 'wav',
	'preferredquality': '192',
	}],
	}

	with yt_dlp.YoutubeDL(ydl_opts) as ydl:
	info_dict = ydl.extract_info(url, download=True)
	file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
	sample_rate, audio_data = read(file_path)
	audio_array = np.asarray(audio_data, dtype=np.int16)

	return sample_rate, audio_array




	def inf_handler(audio, model_name):
	model_found = False
	for model_info in UVR_5_MODELS:
	if model_info["model_name"] == model_name:
	separator.load_model(model_info["checkpoint"])
	model_found = True
	break
	if not model_found:
	separator.load_model()
	output_files = separator.separate(audio)
	vocals = output_files[0]
	inst = output_files[1]
	return vocals, inst


	def inference(audio, model_name):
	output_data = inf_handler(audio, model_name)
	vocals = output_data[0]
	inst = output_data[1]

	return vocals, inst


	# Sample processing functions for audio and text inputs
	def process_audio(audio_path):
	return f"Audio file received: {audio_path}"

	def process_text(text_input):
	return f"Text received: {text_input}"

	# Function to handle input type dynamically
	def dynamic_input(input_type):
	if input_type == "Audio Upload":
	return gr.Audio(label='🎵 Upload Audio', interactive=True, type="filepath")
	else:
	return gr.Textbox(label="📝 Enter Text", placeholder="Type your text here...", interactive=True)



	# Define the Gradio interface
	with gr.Blocks(title="🎤 RVC Inference", css="footer{display:none !important}") as app:
	gr.Markdown("# RVC INFERENCE ")
	# Show warning message if applicable
	if warning:
	gr.HTML("<center><h2>⚠️ This space is running slowly due to limited resources. Use the faster <a href='https://colab.research.google.com/drive/1HzuPgICRrjqUWQWb5Zn-l07m099-n-Nr'>Google Colab Notebook</a>.</h2></center>")

	# Main Tab: Voice Conversion
	with gr.Row():
	rvc_model = gr.Dropdown(voice_models, label='🎙️ Voice Models')
	output_format = gr.Dropdown(['mp3', 'flac', 'wav'], value='mp3', label='🎧 Output Format')
	pitch = gr.Slider(-12, 12, value=0, step=1, label='🎚️ Pitch Adjustment', info='-12 for deeper voice, +12 for higher voice')

	with gr.Row():
	refresh_btn = gr.Button('🔄 Refresh Models')
	generate_btn = gr.Button("🎼 Generate", variant="primary")



	with gr.Tab("🎶 Voice Conversion"):
	with gr.Column():
	audio_input = gr.Audio(label='🎵 Upload Audio', interactive=True, type="filepath")


	with gr.Accordion(" 🔧 Vocal Separator (UVR)"):
	gr.Markdown("⭕ Separate vocals and instruments from an audio file using UVR models.")

	with gr.Accordion("🔗 Separation by Link", open=False):
	with gr.Row():
	mdx23c_link = gr.Textbox(label="🔗 Link",placeholder="📋 Paste the link here",interactive=True)
	with gr.Row():
	gr.Markdown("💡 You can paste the link to the video/audio from many sites, check the complete list [here](https://github.com/yt-dlp/yt-dlp/blob/master/supportedsites.md)")
	with gr.Row():
	mdx23c_download_button = gr.Button("⬇️ Download!",variant="primary")

	uvr5_audio_file = gr.Audio(label=" 📲 Audio File",type="filepath")

	with gr.Row():
	uvr5_model = gr.Dropdown(label="〽️ Model", choices=[model["model_name"] for model in UVR_5_MODELS])
	uvr5_button = gr.Button("🔧 Separate Vocals", variant="primary")
	uvr5_output_inst = gr.Audio(type="filepath", label="Output 2",)




	mdx23c_download_button.click(download_audio, [mdx23c_link], [uvr5_audio_file])

	uvr5_button.click(inference, [uvr5_audio_file, uvr5_model], [uvr5_output_inst, audio_input])



	with gr.Accordion('⚙️ Voice Conversion Settings', open=False):
	use_hybrid_methods = gr.Checkbox(label="🧬 Use Hybrid Methods", value=False)
	f0_method = gr.Dropdown(['rmvpe+', 'fcpe', 'rmvpe', 'mangio-crepe', 'crepe'], value='rmvpe+', label='🔧 F0 Method')
	use_hybrid_methods.change(update_f0_method, inputs=use_hybrid_methods, outputs=f0_method)
	crepe_hop_length = gr.Slider(8, 512, value=128, step=8, label='🎛️ Crepe Hop Length')
	f0_method.change(show_hop_slider, inputs=f0_method, outputs=crepe_hop_length)
	with gr.Row():
	index_rate = gr.Slider(0, 1, value=0, label=' 🔧 Index Rate', info='Controls the extent to which the index file influences the analysis results. A higher value increases the influence of the index file, but may amplify breathing artifacts in the audio. Choosing a lower value may help reduce artifacts.')
	filter_radius = gr.Slider(0, 7, value=3, step=1, label='📡 Filter Radius', info='Manages the radius of filtering the pitch analysis results. If the filtering value is three or higher, median filtering is applied to reduce breathing noise in the audio recording.')
	rms_mix_rate = gr.Slider(0, 1, value=0.25, step=0.01, label='〽️ RMS Mix Rate', info='Controls the extent to which the output signal is mixed with its envelope. A value close to 1 increases the use of the envelope of the output signal, which may improve sound quality.')
	protect = gr.Slider(0, 0.5, value=0.33, step=0.01, label='⛔ Consonant Protection', info='Controls the extent to which individual consonants and breathing sounds are protected from electroacoustic breaks and other artifacts. A maximum value of 0.5 provides the most protection, but may increase the indexing effect, which may negatively impact sound quality. Reducing the value may decrease the extent of protection, but reduce the indexing effect.')


	converted_audio = gr.Audio(label='🎶 Converted Voice')



	refresh_btn.click(update_models_list, None, outputs=rvc_model)
	generate_btn.click(song_cover_pipeline, inputs=[audio_input, rvc_model, pitch, f0_method, crepe_hop_length, index_rate, filter_radius, rms_mix_rate, protect, output_format], outputs=[converted_audio])






	# Merge/Process Tab
	with gr.Tab('🔄 Merge/Process'):
	with gr.Row():
	vocal_input = gr.Audio(label='🎤 Vocals', interactive=True)
	instrumental_input = gr.Audio(label='🎹 Instrumental', interactive=True)

	with gr.Row():
	process_btn = gr.Button("🎬 Process")
	ai_cover_output = gr.Audio(label='🎧 AI Cover')

	process_btn.click(add_audio_effects, inputs=[vocal_input, instrumental_input], outputs=[ai_cover_output])

	# Model Options Tab
	with gr.Tab('📦 Model Options'):
	with gr.Tab('🔗 Download Models'):
	model_zip_link = gr.Text(label='🔗 Enter Model Download Link')
	model_name = gr.Text(label='📝 Model Name')
	download_btn = gr.Button('⬇️ Download Model')
	dl_output_message = gr.Text(label='📢 Output Message')
	download_btn.click(download_from_url, inputs=[model_zip_link, model_name], outputs=dl_output_message)

	with gr.Tab('📤 Upload ZIP Models'):
	zip_file = gr.File(label='📁 Upload ZIP File', file_types=['.zip'])
	zip_model_name = gr.Text(label='📝 Model Name')
	upload_btn = gr.Button('📤 Upload Model')
	upload_output_message = gr.Text(label='📢 Upload Status')
	upload_btn.click(upload_zip_model, inputs=[zip_file, zip_model_name], outputs=upload_output_message)

	with gr.Tab('📤 Upload Separate Files'):
	pth_file = gr.File(label='📂 Upload .pth File', file_types=['.pth'])
	index_file = gr.File(label='📂 Upload .index File', file_types=['.index'])
	sep_model_name = gr.Text(label='📝 Model Name')
	separate_upload_btn = gr.Button('📤 Upload Files')
	separate_upload_output = gr.Text(label='📢 Upload Status')
	separate_upload_btn.click(upload_separate_files, inputs=[pth_file, index_file, sep_model_name], outputs=separate_upload_output)

	app.launch(share=True)