Politrees commited on
Commit
955d618
·
verified ·
1 Parent(s): 512878b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -30
app.py CHANGED
@@ -420,15 +420,15 @@ with gr.Blocks(
420
  with gr.Group():
421
  roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
422
  with gr.Row():
423
- roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
424
- roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.")
425
- roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
426
  with gr.Column(variant='panel'):
427
  with gr.Group():
428
  with gr.Row():
429
- roformer_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
430
- roformer_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
431
- roformer_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
432
  with gr.Row():
433
  roformer_audio = gr.Audio(label="Input Audio", type="filepath")
434
  with gr.Row():
@@ -447,15 +447,15 @@ with gr.Blocks(
447
  with gr.Group():
448
  mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
449
  with gr.Row():
450
- mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", visible=False)
451
- mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
452
- mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.")
453
  with gr.Column(variant='panel'):
454
  with gr.Group():
455
  with gr.Row():
456
- mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
457
- mdx23c_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
458
- mdx23c_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
459
  with gr.Row():
460
  mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
461
  with gr.Row():
@@ -474,15 +474,15 @@ with gr.Blocks(
474
  with gr.Group():
475
  mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
476
  with gr.Row():
477
- mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.")
478
- mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.")
479
- mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.")
480
  with gr.Column(variant='panel'):
481
  with gr.Group():
482
  with gr.Row():
483
- mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
484
- mdx_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
485
- mdx_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
486
  with gr.Row():
487
  mdx_audio = gr.Audio(label="Input Audio", type="filepath")
488
  with gr.Row():
@@ -504,15 +504,15 @@ with gr.Blocks(
504
  vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
505
  vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
506
  with gr.Row():
507
- vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", visible=False)
508
- vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.")
509
- vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.")
510
  with gr.Column(variant='panel'):
511
  with gr.Group():
512
  with gr.Row():
513
- vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.")
514
- vr_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
515
- vr_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
516
  with gr.Row():
517
  vr_audio = gr.Audio(label="Input Audio", type="filepath")
518
  with gr.Row():
@@ -531,14 +531,14 @@ with gr.Blocks(
531
  with gr.Group():
532
  demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
533
  with gr.Row():
534
- demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.")
535
- demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.")
536
- demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.")
537
  with gr.Column(variant='panel'):
538
  with gr.Group():
539
  with gr.Row():
540
- demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.")
541
- demucs_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.")
542
  with gr.Row():
543
  demucs_audio = gr.Audio(label="Input Audio", type="filepath")
544
  with gr.Row():
@@ -589,7 +589,7 @@ with gr.Blocks(
589
  with gr.Group():
590
  with gr.Row(equal_height=True):
591
  list_filter = gr.Dropdown(value="vocals", choices=["vocals", "instrumental", "drums", "bass", "guitar", "piano", "other"], label="List filter", info="Filter and sort the model list by 'stem'")
592
- list_limit = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="List limit", info="Limit the number of models shown.")
593
  list_button = gr.Button("Show list", variant="primary")
594
 
595
  output_list = gr.HTML(label="Leaderboard")
 
420
  with gr.Group():
421
  roformer_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
422
  with gr.Row():
423
+ roformer_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", show_reset_button=False, visible=False)
424
+ roformer_overlap = gr.Slider(minimum=2, maximum=10, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Lower is better but slower.", show_reset_button=False)
425
+ roformer_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.", show_reset_button=False)
426
  with gr.Column(variant='panel'):
427
  with gr.Group():
428
  with gr.Row():
429
+ roformer_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
430
+ roformer_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
431
+ roformer_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
432
  with gr.Row():
433
  roformer_audio = gr.Audio(label="Input Audio", type="filepath")
434
  with gr.Row():
 
447
  with gr.Group():
448
  mdx23c_override_seg_size = gr.Checkbox(value=False, label="Override segment size", info="Override model default segment size instead of using the model default value.")
449
  with gr.Row():
450
+ mdx23c_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", show_reset_button=False, visible=False)
451
+ mdx23c_overlap = gr.Slider(minimum=2, maximum=50, step=1, value=8, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.", show_reset_button=False)
452
+ mdx23c_pitch_shift = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch shift", info="Shift audio pitch by a number of semitones while processing. may improve output for deep/high vocals.", show_reset_button=False)
453
  with gr.Column(variant='panel'):
454
  with gr.Group():
455
  with gr.Row():
456
+ mdx23c_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
457
+ mdx23c_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
458
+ mdx23c_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
459
  with gr.Row():
460
  mdx23c_audio = gr.Audio(label="Input Audio", type="filepath")
461
  with gr.Row():
 
474
  with gr.Group():
475
  mdx_denoise = gr.Checkbox(value=False, label="Denoise", info="Enable denoising after separation.")
476
  with gr.Row():
477
+ mdx_hop_length = gr.Slider(minimum=32, maximum=2048, step=32, value=1024, label="Hop Length", info="Usually called stride in neural networks; only change if you know what you're doing.", show_reset_button=False)
478
+ mdx_seg_size = gr.Slider(minimum=32, maximum=4000, step=32, value=256, label="Segment Size", info="Larger consumes more resources, but may give better results.", show_reset_button=False)
479
+ mdx_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Amount of overlap between prediction windows. Higher is better but slower.", show_reset_button=False)
480
  with gr.Column(variant='panel'):
481
  with gr.Group():
482
  with gr.Row():
483
+ mdx_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
484
+ mdx_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
485
+ mdx_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
486
  with gr.Row():
487
  mdx_audio = gr.Audio(label="Input Audio", type="filepath")
488
  with gr.Row():
 
504
  vr_tta = gr.Checkbox(value=False, label="TTA", info="Enable Test-Time-Augmentation; slow but improves quality.")
505
  vr_high_end_process = gr.Checkbox(value=False, label="High End Process", info="Mirror the missing frequency range of the output.")
506
  with gr.Row():
507
+ vr_post_process_threshold = gr.Slider(minimum=0.1, maximum=0.3, step=0.1, value=0.2, label="Post Process Threshold", info="Threshold for post-processing.", show_reset_button=False, visible=False)
508
+ vr_window_size = gr.Slider(minimum=320, maximum=1024, step=32, value=512, label="Window Size", info="Balance quality and speed. 1024 = fast but lower, 320 = slower but better quality.", show_reset_button=False)
509
+ vr_aggression = gr.Slider(minimum=1, maximum=100, step=1, value=5, label="Agression", info="Intensity of primary stem extraction.", show_reset_button=False)
510
  with gr.Column(variant='panel'):
511
  with gr.Group():
512
  with gr.Row():
513
+ vr_batch_size = gr.Slider(minimum=1, maximum=16, step=1, value=1, label="Batch Size", info="Larger consumes more RAM but may process slightly faster.", show_reset_button=False)
514
+ vr_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
515
+ vr_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
516
  with gr.Row():
517
  vr_audio = gr.Audio(label="Input Audio", type="filepath")
518
  with gr.Row():
 
531
  with gr.Group():
532
  demucs_segments_enabled = gr.Checkbox(value=True, label="Segment-wise processing", info="Enable segment-wise processing.")
533
  with gr.Row():
534
+ demucs_seg_size = gr.Slider(minimum=1, maximum=100, step=1, value=40, label="Segment Size", info="Size of segments into which the audio is split. Higher = slower but better quality.", show_reset_button=False)
535
+ demucs_overlap = gr.Slider(minimum=0.001, maximum=0.999, step=0.001, value=0.25, label="Overlap", info="Overlap between prediction windows. Higher = slower but better quality.", show_reset_button=False)
536
+ demucs_shifts = gr.Slider(minimum=0, maximum=20, step=1, value=2, label="Shifts", info="Number of predictions with random shifts, higher = slower but better quality.", show_reset_button=False)
537
  with gr.Column(variant='panel'):
538
  with gr.Group():
539
  with gr.Row():
540
+ demucs_norm_threshold = gr.Slider(minimum=0.1, maximum=1, step=0.1, value=0.9, label="Normalization threshold", info="The threshold for audio normalization.", show_reset_button=False)
541
+ demucs_amp_threshold = gr.Slider(minimum=0.0, maximum=1, step=0.1, value=0.0, label="Amplification threshold", info="The threshold for audio amplification.", show_reset_button=False)
542
  with gr.Row():
543
  demucs_audio = gr.Audio(label="Input Audio", type="filepath")
544
  with gr.Row():
 
589
  with gr.Group():
590
  with gr.Row(equal_height=True):
591
  list_filter = gr.Dropdown(value="vocals", choices=["vocals", "instrumental", "drums", "bass", "guitar", "piano", "other"], label="List filter", info="Filter and sort the model list by 'stem'")
592
+ list_limit = gr.Slider(minimum=1, maximum=10, step=1, value=5, label="List limit", info="Limit the number of models shown.", show_reset_button=False)
593
  list_button = gr.Button("Show list", variant="primary")
594
 
595
  output_list = gr.HTML(label="Leaderboard")