{ | |
"audio_channels": 2, | |
"bottom_channels": 512, | |
"cac": true, | |
"channels": 48, | |
"channels_time": null, | |
"context": 1, | |
"context_enc": 0, | |
"dconv_comp": 8, | |
"dconv_depth": 2, | |
"dconv_init": 0.001, | |
"dconv_mode": 3, | |
"depth": 4, | |
"emb_scale": 10, | |
"emb_smooth": true, | |
"end_iters": 0, | |
"freq_emb": 0.2, | |
"growth": 2, | |
"kernel_size": 8, | |
"multi_freqs": [], | |
"multi_freqs_depth": 3, | |
"nfft": 4096, | |
"norm_groups": 4, | |
"norm_starts": 4, | |
"rescale": 0.1, | |
"rewrite": true, | |
"samplerate": 44100, | |
"segment": 7.8, | |
"sources": [ | |
"drums", | |
"bass", | |
"other", | |
"vocals" | |
], | |
"stride": 4, | |
"t_auto_sparsity": false, | |
"t_cape_augment": true, | |
"t_cape_glob_loc_scale": [ | |
5000.0, | |
1.0, | |
1.4 | |
], | |
"t_cape_mean_normalize": true, | |
"t_cross_first": false, | |
"t_dropout": 0.02, | |
"t_emb": "sin", | |
"t_gelu": true, | |
"t_global_window": 100, | |
"t_group_norm": false, | |
"t_heads": 8, | |
"t_hidden_scale": 4.0, | |
"t_layer_scale": true, | |
"t_layers": 5, | |
"t_lr": null, | |
"t_mask_random_seed": 42, | |
"t_mask_type": "diag", | |
"t_max_period": 10000.0, | |
"t_max_positions": 10000, | |
"t_norm_first": true, | |
"t_norm_in": true, | |
"t_norm_in_group": false, | |
"t_norm_out": true, | |
"t_sin_random_shift": 0, | |
"t_sparse_attn_window": 400, | |
"t_sparse_cross_attn": false, | |
"t_sparse_self_attn": false, | |
"t_sparsity": 0.95, | |
"t_weight_decay": 0.0, | |
"t_weight_pos_embed": 1.0, | |
"time_stride": 2, | |
"use_train_segment": true, | |
"wiener_iters": 0, | |
"wiener_residual": false | |
} |