import os import re import random from scipy.io.wavfile import write from scipy.io.wavfile import read import numpy as np import gradio as gr import yt_dlp import subprocess mdxnet_models = [ 'UVR-MDX-NET-Inst_full_292.onnx', 'UVR-MDX-NET_Inst_187_beta.onnx', 'UVR-MDX-NET_Inst_82_beta.onnx', 'UVR-MDX-NET_Inst_90_beta.onnx', 'UVR-MDX-NET_Main_340.onnx', 'UVR-MDX-NET_Main_390.onnx', 'UVR-MDX-NET_Main_406.onnx', 'UVR-MDX-NET_Main_427.onnx', 'UVR-MDX-NET_Main_438.onnx', 'UVR-MDX-NET-Inst_HQ_1.onnx', 'UVR-MDX-NET-Inst_HQ_2.onnx', 'UVR-MDX-NET-Inst_HQ_3.onnx', 'UVR-MDX-NET-Inst_HQ_4.onnx', 'UVR_MDXNET_Main.onnx', 'UVR-MDX-NET-Inst_Main.onnx', 'UVR_MDXNET_1_9703.onnx', 'UVR_MDXNET_2_9682.onnx', 'UVR_MDXNET_3_9662.onnx', 'UVR-MDX-NET-Inst_1.onnx', 'UVR-MDX-NET-Inst_2.onnx', 'UVR-MDX-NET-Inst_3.onnx', 'UVR_MDXNET_KARA.onnx', 'UVR_MDXNET_KARA_2.onnx', 'UVR_MDXNET_9482.onnx', 'UVR-MDX-NET-Voc_FT.onnx', 'Kim_Vocal_1.onnx', 'Kim_Vocal_2.onnx', 'Kim_Inst.onnx', 'Reverb_HQ_By_FoxJoy.onnx', 'UVR-MDX-NET_Crowd_HQ_1.onnx', 'kuielab_a_vocals.onnx', 'kuielab_a_other.onnx', 'kuielab_a_bass.onnx', 'kuielab_a_drums.onnx', 'kuielab_b_vocals.onnx', 'kuielab_b_other.onnx', 'kuielab_b_bass.onnx', 'kuielab_b_drums.onnx', ] output_format = [ 'wav', 'flac', 'mp3', ] mdxnet_overlap_values = [ '0.25', '0.5', '0.75', '0.99', ] def download_audio(url): ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': 'ytdl/%(title)s.%(ext)s', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192', }], } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=True) file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav' sample_rate, audio_data = read(file_path) audio_array = np.asarray(audio_data, dtype=np.int16) return sample_rate, audio_array def mdxnet_separator(mdxnet_audio, mdxnet_model, mdxnet_output_format, mdxnet_segment_size, mdxnet_overlap, mdxnet_denoise): files_list = [] files_list.clear() directory = "./outputs" random_id = str(random.randint(10000, 99999)) pattern = f"{random_id}" os.makedirs("outputs", exist_ok=True) write(f'{random_id}.wav', mdxnet_audio[0], mdxnet_audio[1]) prompt = f"audio-separator {random_id}.wav --model_filename {mdxnet_model} --output_dir=./outputs --output_format={mdxnet_output_format} --normalization=0.9 --mdx_segment_size={mdxnet_segment_size} --mdx_overlap={mdxnet_overlap}" if mdxnet_denoise: prompt += " --mdx_enable_denoise" os.system(prompt) for file in os.listdir(directory): if re.search(pattern, file): files_list.append(os.path.join(directory, file)) stem1_file = files_list[0] stem2_file = files_list[1] return stem1_file, stem2_file def mdxnet_batch(path_input, path_output, model, output_format, overlap, segment_size, denoise): found_files = [] logs = [] logs.clear() extensions = (".mp3", ".wav", ".flac") for audio_files in os.listdir(path_input): if audio_files.endswith(extensions): found_files.append(audio_files) total_files = len(found_files) if total_files == 0: logs.append("No valid audio files.") yield "\n".join(logs) else: logs.append(f"{total_files} audio files found") found_files.sort() for audio_files in found_files: file_path = os.path.join(path_input, audio_files) prompt = ["audio-separator", file_path, "-m", f"{model}", f"--output_dir={path_output}", f"--output_format={output_format}", "--normalization=0.9", f"--mdx_overlap={overlap}", f"--mdx_segment_size={segment_size}"] if denoise: prompt.append("--mdx_enable_denoise") logs.append(f"Processing file: {audio_files}") yield "\n".join(logs) subprocess.run(prompt) logs.append(f"File: {audio_files} processed!") yield "\n".join(logs) with gr.Blocks(theme="Blane187/fuchsia", title="🎵 UVR5 MDX 🎵") as app: gr.Markdown("