Spaces:

Politrees
/

Audio-Separator

Running

App Files Files Community

Politrees commited on 5 days ago

Commit

955d618

verified ·

1 Parent(s): 512878b

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -30

app.py CHANGED Viewed

@@ -420,15 +420,15 @@ with gr.Blocks(
                     with gr.Group():
                         roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
                         with gr.Row():
-                            roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
-                            roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
-                            roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
-                            roformer_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
-                            roformer_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
-                            roformer_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             roformer_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
@@ -447,15 +447,15 @@ with gr.Blocks(
                     with gr.Group():
                         mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
                         with gr.Row():
-                            mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
-                            mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
-                            mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
-                            mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
-                            mdx23c_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
-                            mdx23c_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
@@ -474,15 +474,15 @@ with gr.Blocks(
                     with gr.Group():
                         mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
                         with gr.Row():
-                            mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
-                            mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
-                            mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
-                            mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
-                            mdx_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
-                            mdx_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             mdx_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
@@ -504,15 +504,15 @@ with gr.Blocks(
                             vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
                             vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
                         with gr.Row():
-                            vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", visible=False)
-                            vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
-                            vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
-                            vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
-                            vr_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
-                            vr_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             vr_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
@@ -531,14 +531,14 @@ with gr.Blocks(
                     with gr.Group():
                         demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
                         with gr.Row():
-                            demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
-                            demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
-                            demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
-                            demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
-                            demucs_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
         with gr.Row():
             demucs_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
@@ -589,7 +589,7 @@ with gr.Blocks(
         with gr.Group():
             with gr.Row(equal_height=True):
                 list_filter = gr.Dropdown(value="vocals", choices=["vocals", "instrumental", "drums", "bass", "guitar", "piano", "other"], label="List filter", info="Filter and sort the model list by 'stem'")
-                list_limit = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="List limit", info="Limit the number of models shown.")
                 list_button = gr.Button("Show list", variant="primary")
         output_list = gr.HTML(label="Leaderboard")

                     with gr.Group():
                         roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
                         with gr.Row():
+                            roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", show_reset_button=False, visible=False)
+                            roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.", show_reset_button=False)
+                            roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.", show_reset_button=False)
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
+                            roformer_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
+                            roformer_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
+                            roformer_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
         with gr.Row():
             roformer_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
                     with gr.Group():
                         mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
                         with gr.Row():
+                            mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", show_reset_button=False, visible=False)
+                            mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.", show_reset_button=False)
+                            mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.", show_reset_button=False)
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
+                            mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
+                            mdx23c_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
+                            mdx23c_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
         with gr.Row():
             mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
                     with gr.Group():
                         mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
                         with gr.Row():
+                            mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.", show_reset_button=False)
+                            mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", show_reset_button=False)
+                            mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.", show_reset_button=False)
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
+                            mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
+                            mdx_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
+                            mdx_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
         with gr.Row():
             mdx_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
                             vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
                             vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
                         with gr.Row():
+                            vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", show_reset_button=False, visible=False)
+                            vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.", show_reset_button=False)
+                            vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.", show_reset_button=False)
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
+                            vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
+                            vr_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
+                            vr_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
         with gr.Row():
             vr_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
                     with gr.Group():
                         demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
                         with gr.Row():
+                            demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.", show_reset_button=False)
+                            demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.", show_reset_button=False)
+                            demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.", show_reset_button=False)
                 with gr.Column(variant='panel'):
                     with gr.Group():
                         with gr.Row():
+                            demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
+                            demucs_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
         with gr.Row():
             demucs_audio = gr.Audio(label="Input Audio", type="filepath")
         with gr.Row():
         with gr.Group():
             with gr.Row(equal_height=True):
                 list_filter = gr.Dropdown(value="vocals", choices=["vocals", "instrumental", "drums", "bass", "guitar", "piano", "other"], label="List filter", info="Filter and sort the model list by 'stem'")
+                list_limit = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="List limit", info="Limit the number of models shown.", show_reset_button=False)
                 list_button = gr.Button("Show list", variant="primary")
         output_list = gr.HTML(label="Leaderboard")