import gradio as gr import torch import demucs.separate import shlex import os import spaces import subprocess # Check if CUDA is available device = "cuda" if torch.cuda.is_available() else "cpu" # Define the inference function @spaces.GPU def inference(audio_file, model_name, vocals, drums, bass, other, mp3, mp3_bitrate): """ Performs inference using Demucs and mixes the selected stems. Args: audio_file: The audio file to separate. model_name: The name of the Demucs model to use. vocals: Whether to include vocals in the mix. drums: Whether to include drums in the mix. bass: Whether to include bass in the mix. other: Whether to include other instruments in the mix. mp3: Whether to save the output as MP3. mp3_bitrate: The bitrate of the output MP3 file. Returns: The path to the mixed audio file. """ # Construct the command line arguments for Demucs cmd = f"demucs -n {model_name} --clip-mode clamp --shifts=1" if mp3: cmd += f" --mp3 --mp3-bitrate={mp3_bitrate}" cmd += f" --filename \"{audio_file}\"" # Run Demucs demucs.separate.main(shlex.split(cmd)) # Get the output file paths output_dir = os.path.join("separated", model_name, os.path.splitext(os.path.basename(audio_file))[0]) stems = { "vocals": os.path.join(output_dir, "vocals.wav"), "drums": os.path.join(output_dir, "drums.wav"), "bass": os.path.join(output_dir, "bass.wav"), "other": os.path.join(output_dir, "other.wav"), } # Mix the selected stems selected_stems = [stems[stem] for stem, include in zip(["vocals", "drums", "bass", "other"], [vocals, drums, bass, other]) if include] if not selected_stems: raise gr.Error("Please select at least one stem to mix.") output_file = os.path.join(output_dir, "mixed.wav") if len(selected_stems) == 1: # If only one stem is selected, just copy it os.rename(selected_stems[0], output_file) else: # Otherwise, use ffmpeg to mix the stems ffmpeg_cmd = ["ffmpeg", "-y"] for stem in selected_stems: ffmpeg_cmd.extend(["-i", f"\"{stem}\""]) # Quote file paths ffmpeg_cmd.extend(["-filter_complex", f"amix=inputs={len(selected_stems)}:duration=longest", f"\"{output_file}\""]) # Quote file paths subprocess.run(ffmpeg_cmd, check=True) return output_file # Define the Gradio interface iface = gr.Interface( fn=inference, inputs=[ gr.Audio(type="filepath"), gr.Dropdown(["htdemucs", "htdemucs_ft", "htdemucs_6s", "hdemucs_mmi", "mdx", "mdx_extra", "mdx_q", "mdx_extra_q"], label="Model Name"), gr.Checkbox(label="Vocals", value=True), gr.Checkbox(label="Drums", value=True), gr.Checkbox(label="Bass", value=True), gr.Checkbox(label="Other", value=True), gr.Checkbox(label="Save as MP3"), gr.Slider(128, 320, step=32, label="MP3 Bitrate", visible=False), ], outputs=gr.Audio(type="filepath"), title="Demucs Music Source Separation and Mixing", description="Separate vocals, drums, bass, and other instruments from your music using Demucs and mix the selected stems.", ) # Launch the Gradio interface iface.launch()