Spaces:

owiedotch
/

demucs-stem-separation

Running on Zero

File size: 4,379 Bytes

import gradio as gr
import torch
import demucs.separate
import shlex
import os
import spaces
import subprocess

# Check if CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Check if sox is installed and install it if necessary
try:
    subprocess.run(["sox", "--version"], check=True, capture_output=True)
except FileNotFoundError:
    print("sox is not installed. Trying to install it now...")
    try:
        subprocess.run(["apt-get", "update"], check=True)
        subprocess.run(["apt-get", "install", "-y", "sox"], check=True)
        print("sox has been installed.")
    except subprocess.CalledProcessError as e:
        print(f"Error installing sox: {e}")
        print("Please install sox manually or try adding the following repository to your sources list:")
        print("deb http://deb.debian.org/debian stretch main contrib non-free")
        exit(1)

# Define the inference function
@spaces.GPU
def inference(audio_file, model_name, vocals, drums, bass, other, mp3, mp3_bitrate):
    """
    Performs inference using Demucs and mixes the selected stems.

    Args:
        audio_file: The audio file to separate.
        model_name: The name of the Demucs model to use.
        vocals: Whether to include vocals in the mix.
        drums: Whether to include drums in the mix.
        bass: Whether to include bass in the mix.
        other: Whether to include other instruments in the mix.
        mp3: Whether to save the output as MP3.
        mp3_bitrate: The bitrate of the output MP3 file.

    Returns:
        The path to the mixed audio file.
    """

    # Construct the command line arguments for Demucs
    cmd = f"demucs -n {model_name} --clip-mode clamp --shifts=1"
    if mp3:
        cmd += f" --mp3 --mp3-bitrate={mp3_bitrate}"
    cmd += f" --filename \"{audio_file}\""

    # Run Demucs
    os.chdir(os.path.dirname(audio_file))  # Change working directory
    demucs.separate.main(shlex.split(cmd))
    os.chdir(os.path.dirname(__file__))  # Change back to original directory

    # Get the output file paths
    output_dir = os.path.join("separated", model_name, os.path.splitext(os.path.basename(audio_file))[0])
    stems = {
        "vocals": os.path.join(output_dir, "vocals.wav"),
        "drums": os.path.join(output_dir, "drums.wav"),
        "bass": os.path.join(output_dir, "bass.wav"),
        "other": os.path.join(output_dir, "other.wav"),
    }

    # Mix the selected stems
    selected_stems = [stems[stem] for stem, include in zip(["vocals", "drums", "bass", "other"], [vocals, drums, bass, other]) if include]
    if not selected_stems:
        raise gr.Error("Please select at least one stem to mix.")

    output_file = os.path.join(output_dir, "mixed.wav")
    if len(selected_stems) == 1:
        # If only one stem is selected, just copy it
        os.rename(selected_stems[0], output_file)
    else:
        # Otherwise, use sox to mix the stems
        sox_cmd = ["sox", "-m"] + selected_stems + [output_file]
        subprocess.run(sox_cmd, check=True)

    # Automatically convert to MP3 if requested
    if mp3:
        mp3_output_file = os.path.splitext(output_file)[0] + ".mp3"
        ffmpeg_cmd = ["ffmpeg", "-y", "-i", output_file, "-ab", str(mp3_bitrate) + "k", mp3_output_file]
        subprocess.run(ffmpeg_cmd, check=True)
        output_file = mp3_output_file  # Update output_file to the MP3 file

    return output_file

# Define the Gradio interface
iface = gr.Interface(
    fn=inference,
    inputs=[
        gr.Audio(type="filepath"),
        gr.Dropdown(["htdemucs", "htdemucs_ft", "htdemucs_6s", "hdemucs_mmi", "mdx", "mdx_extra", "mdx_q", "mdx_extra_q"], label="Model Name", value="htdemucs_ft"),  # Set default value
        gr.Checkbox(label="Vocals", value=True),
        gr.Checkbox(label="Drums", value=True),
        gr.Checkbox(label="Bass", value=True),
        gr.Checkbox(label="Other", value=True),
        gr.Checkbox(label="Save as MP3", value=False),  # Set default value to False
        gr.Slider(128, 320, step=32, label="MP3 Bitrate", visible=True),  # Set visible to True
    ],
    outputs=gr.Audio(type="filepath"),
    title="Demucs Music Source Separation and Mixing",
    description="Separate vocals, drums, bass, and other instruments from your music using Demucs and mix the selected stems.",
)

# Launch the Gradio interface
iface.launch()