Spaces:

owiedotch
/

demucs-stem-separation

Running on Zero

App Files Files Community

owiedotch commited on Sep 1

Commit

b072ff1

•

1 Parent(s): ff064e2

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -60

app.py CHANGED Viewed

@@ -1,102 +1,65 @@
 import gradio as gr
 import torch
-# import demucs.api
 import os
 import spaces
-import subprocess
 from pydub import AudioSegment
 from typing import Tuple, Dict, List
 from demucs.apply import apply_model
 from demucs.separate import load_track
 from demucs.pretrained import get_model
-from demucs.audio import save_audio  # Import save_audio from demucs.audio
-# check if cuda is available
 device: str = "cuda" if torch.cuda.is_available() else "cpu"
-# check if sox is installed and install it if necessary
-try:
-    subprocess.run(["sox", "--version"], check=True, capture_output=True)
-except FileNotFoundError:
-    print("sox is not installed. trying to install it now...")
-    try:
-        subprocess.run(["apt-get", "update"], check=True)
-        subprocess.run(["apt-get", "install", "-y", "sox"], check=True)
-        print("sox has been installed.")
-    except subprocess.CalledProcessError as e:
-        print(f"error installing sox: {e}")
-        print("please install sox manually or try adding the following repository to your sources list:")
-        print("deb http://deb.debian.org/debian stretch main contrib non-free")
-        exit(1)
-# define the inference function
 @spaces.GPU
-def inference(audio_file: str, model_name: str, vocals: bool, drums: bool, bass: bool, other: bool, mp3: bool, mp3_bitrate: int) -> Tuple[str, str]:
-    """
-    performs inference using demucs and mixes the selected stems.
-    args:
-        audio_file: the audio file to separate.
-        model_name: the name of the demucs model to use.
-        vocals: whether to include vocals in the mix.
-        drums: whether to include drums in the mix.
-        bass: whether to include bass in the mix.
-        other: whether to include other instruments in the mix.
-        mp3: whether to save the output as mp3.
-        mp3_bitrate: the bitrate of the output mp3 file.
-    returns:
-        a tuple containing the path to the mixed audio file and the separation log.
-    """
-    # initialize demucs separator
-    # separator: demucs.api.Separator = demucs.api.Separator(model=model_name)
     separator = get_model(name=model_name)
-    # separate audio file and capture log
-    import io
-    log_stream = io.StringIO()
-    # origin, separated = separator.separate_audio_file(audio_file, progress=True, log_stream=log_stream)
-    wav = load_track(audio_file, separator.samplerate, channels=separator.audio_channels)
     ref = wav.mean(0)
     wav = (wav - ref.view(1, -1)).to(device)
-    sources = apply_model(separator, wav, device=device, progress=True, log_stream=log_stream)
     sources = sources * ref.view(1, -1) + ref.view(1, -1)
-    separation_log = log_stream.getvalue()
-    # get the output file paths
     output_dir: str = os.path.join("separated", model_name, os.path.splitext(os.path.basename(audio_file))[0])
-    os.makedirs(output_dir, exist_ok=True)  # create output directory if it doesn't exist
     stems: Dict[str, str] = {}
     for stem, source in zip(separator.sources, sources):
         stem_path: str = os.path.join(output_dir, f"{stem}.wav")
-        # demucs.api.save_audio(source, stem_path, samplerate=separator.samplerate)
-        save_audio(source, stem_path, separator.samplerate)  # Use save_audio
         stems[stem] = stem_path
-    # mix the selected stems
     selected_stems: List[str] = [stems[stem] for stem, include in zip(["vocals", "drums", "bass", "other"], [vocals, drums, bass, other]) if include]
     if not selected_stems:
-        raise gr.Error("please select at least one stem to mix.")
     output_file: str = os.path.join(output_dir, "mixed.wav")
     if len(selected_stems) == 1:
-        # if only one stem is selected, just copy it
         os.rename(selected_stems[0], output_file)
     else:
-        # otherwise, use pydub to mix the stems
         mixed_audio: AudioSegment = AudioSegment.empty()
         for stem_path in selected_stems:
             mixed_audio += AudioSegment.from_wav(stem_path)
         mixed_audio.export(output_file, format="wav")
-    # automatically convert to mp3 if requested
     if mp3:
         mp3_output_file: str = os.path.splitext(output_file)[0] + ".mp3"
         mixed_audio.export(mp3_output_file, format="mp3", bitrate=str(mp3_bitrate) + "k")
-        output_file = mp3_output_file  # update output_file to the mp3 file
-    return output_file, separation_log
 # Define the Gradio interface
 with gr.Blocks() as iface:
@@ -123,7 +86,7 @@ with gr.Blocks() as iface:
         with gr.Column(scale=1):
             output_audio = gr.Audio(type="filepath", label="Processed Audio")
-            separation_log = gr.Textbox(label="Separation Log", lines=10)
     submit_btn.click(
         fn=inference,
@@ -131,7 +94,6 @@ with gr.Blocks() as iface:
         outputs=[output_audio, separation_log]
     )
-    # Make MP3 bitrate slider visible only when "Save as MP3" is checked
     mp3_checkbox.change(
         fn=lambda mp3: gr.update(visible=mp3),
         inputs=mp3_checkbox,

 import gradio as gr
 import torch
 import os
 import spaces
 from pydub import AudioSegment
 from typing import Tuple, Dict, List
 from demucs.apply import apply_model
 from demucs.separate import load_track
 from demucs.pretrained import get_model
+from demucs.audio import save_audio
 device: str = "cuda" if torch.cuda.is_available() else "cpu"
+# Define the inference function
 @spaces.GPU
+def inference(audio_file: str, model_name: str, vocals: bool, drums: bool, bass: bool, other: bool, mp3: bool, mp3_bitrate: int) -> Tuple[str, gr.HTML]:
     separator = get_model(name=model_name)
+    def stream_log(message):
+        return f"<pre style='margin-bottom: 0;'>[{model_name}] {message}</pre>"
+    yield None, stream_log("Starting separation process...")
+    yield None, stream_log(f"Loading audio file: {audio_file}")
+    wav = load_track(audio_file, separator.samplerate, separator.audio_channels)
     ref = wav.mean(0)
     wav = (wav - ref.view(1, -1)).to(device)
+    yield None, stream_log("Audio loaded successfully. Applying model...")
+    sources = apply_model(separator, wav, device=device, progress=True)
     sources = sources * ref.view(1, -1) + ref.view(1, -1)
+    yield None, stream_log("Model applied. Processing stems...")
     output_dir: str = os.path.join("separated", model_name, os.path.splitext(os.path.basename(audio_file))[0])
+    os.makedirs(output_dir, exist_ok=True)
     stems: Dict[str, str] = {}
     for stem, source in zip(separator.sources, sources):
         stem_path: str = os.path.join(output_dir, f"{stem}.wav")
+        save_audio(source, stem_path, separator.samplerate)
         stems[stem] = stem_path
+        yield None, stream_log(f"Saved {stem} stem")
     selected_stems: List[str] = [stems[stem] for stem, include in zip(["vocals", "drums", "bass", "other"], [vocals, drums, bass, other]) if include]
     if not selected_stems:
+        raise gr.Error("Please select at least one stem to mix.")
     output_file: str = os.path.join(output_dir, "mixed.wav")
+    yield None, stream_log("Mixing selected stems...")
     if len(selected_stems) == 1:
         os.rename(selected_stems[0], output_file)
     else:
         mixed_audio: AudioSegment = AudioSegment.empty()
         for stem_path in selected_stems:
             mixed_audio += AudioSegment.from_wav(stem_path)
         mixed_audio.export(output_file, format="wav")
     if mp3:
+        yield None, stream_log(f"Converting to MP3 (bitrate: {mp3_bitrate}k)...")
         mp3_output_file: str = os.path.splitext(output_file)[0] + ".mp3"
         mixed_audio.export(mp3_output_file, format="mp3", bitrate=str(mp3_bitrate) + "k")
+        output_file = mp3_output_file
+    yield None, stream_log("Process completed successfully!")
+    yield output_file, gr.HTML("<pre style='color: green;'>Separation and mixing completed successfully!</pre>")
 # Define the Gradio interface
 with gr.Blocks() as iface:
         with gr.Column(scale=1):
             output_audio = gr.Audio(type="filepath", label="Processed Audio")
+            separation_log = gr.HTML()
     submit_btn.click(
         fn=inference,
         outputs=[output_audio, separation_log]
     )
     mp3_checkbox.change(
         fn=lambda mp3: gr.update(visible=mp3),
         inputs=mp3_checkbox,