cemsubakan commited on
Commit
b067c77
·
verified ·
1 Parent(s): ee98495

hparams cleanup

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +2 -106
hyperparams.yaml CHANGED
@@ -1,99 +1,22 @@
1
- # Generated 2021-12-01 from:
2
- # /home/mila/s/subakany/speechbrain-soundskrit/recipes/WHAMandWHAMR/enhancement/hparams/sepformer-wham.yaml
3
- # yamllint disable
4
  # ################################
5
- # Model: SepFormer for source separation
6
- # https://arxiv.org/abs/2010.13154
7
- #
8
  # Dataset : WHAM!
9
  # ################################
10
- # Basic parameters
11
- # Seed needs to be set at top of yaml, before objects with parameters are made
12
- #
13
- seed: 1234
14
- __set_seed: !apply:torch.manual_seed [1234]
15
 
16
- # Data params
17
-
18
- # the data folder for the wham dataset
19
- # needs to end with wham_original for the wham dataset
20
- # respecting this convention effects the code functionality
21
- data_folder: /network/tmp1/subakany/wham_original
22
- task: enhancement
23
- dereverberate: false
24
-
25
- # the path for wsj0/si_tr_s/ folder -- only needed if dynamic mixing is used
26
- # e.g. /yourpath/wsj0-processed/si_tr_s/
27
- ## you need to convert the original wsj0 to 8k
28
- # you can do this conversion with the script ../meta/preprocess_dynamic_mixing.py
29
- base_folder_dm: /network/tmp1/subakany/wsj0-processed/si_tr_s/
30
-
31
- experiment_name: sepformer-wham-enhancement
32
- output_folder: results/sepformer-wham-enhancement/1234
33
- train_log: results/sepformer-wham-enhancement/1234/train_log.txt
34
- save_folder: results/sepformer-wham-enhancement/1234/save
35
-
36
- # the file names should start with whamr instead of whamorg
37
- train_data: results/sepformer-wham-enhancement/1234/save/whamorg_tr.csv
38
- valid_data: results/sepformer-wham-enhancement/1234/save/whamorg_cv.csv
39
- test_data: results/sepformer-wham-enhancement/1234/save/whamorg_tt.csv
40
- skip_prep: false
41
-
42
-
43
- # Experiment params
44
- auto_mix_prec: true # Set it to True for mixed precision
45
- test_only: true
46
  num_spks: 1 # set to 3 for wsj0-3mix
47
- progressbar: true
48
- save_audio: false # Save estimated sources on disk
49
  sample_rate: 8000
50
 
51
- # Training parameters
52
- N_epochs: 200
53
- batch_size: 1
54
- lr: 0.00015
55
- clip_grad_norm: 5
56
- loss_upper_lim: 999999 # this is the upper limit for an acceptable loss
57
- # if True, the training sequences are cut to a specified length
58
- limit_training_signal_len: false
59
- # this is the length of sequences if we choose to limit
60
- # the signal length of training sequences
61
- training_signal_len: 32000000
62
-
63
- # Set it to True to dynamically create mixtures at training time
64
- dynamic_mixing: true
65
-
66
- # Parameters for data augmentation
67
- use_wavedrop: false
68
- use_speedperturb: true
69
- use_speedperturb_sameforeachsource: false
70
- use_rand_shift: false
71
- min_shift: -8000
72
- max_shift: 8000
73
-
74
-
75
- # loss thresholding -- this thresholds the training loss
76
- threshold_byloss: true
77
- threshold: -30
78
-
79
  # Encoder parameters
80
  N_encoder_out: 256
81
  out_channels: 256
82
  kernel_size: 16
83
  kernel_stride: 8
84
 
85
- # Dataloader options
86
- dataloader_opts:
87
- batch_size: 1
88
- num_workers: 3
89
-
90
-
91
  # Specifying the network
92
  Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
93
  kernel_size: 16
94
  out_channels: 256
95
 
96
-
97
  SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
98
  num_layers: 8
99
  d_model: 256
@@ -113,7 +36,6 @@ SBtfinter: &id002 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
113
  norm_before: true
114
 
115
  MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
116
-
117
  num_spks: 1
118
  in_channels: 256
119
  out_channels: 256
@@ -132,39 +54,13 @@ Decoder: &id004 !new:speechbrain.lobes.models.dual_path.Decoder
132
  stride: 8
133
  bias: false
134
 
135
- optimizer: !name:torch.optim.Adam
136
- lr: 0.00015
137
- weight_decay: 0
138
-
139
- loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
140
-
141
- lr_scheduler: &id007 !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
142
-
143
- factor: 0.5
144
- patience: 2
145
- dont_halve_until_epoch: 65
146
-
147
- epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
148
- limit: 200
149
-
150
  modules:
151
  encoder: *id003
152
  decoder: *id004
153
  masknet: *id005
154
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
155
- checkpoints_dir: results/sepformer-wham-enhancement/1234/save
156
- recoverables:
157
- encoder: *id003
158
- decoder: *id004
159
- masknet: *id005
160
- counter: *id006
161
- lr_scheduler: *id007
162
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
163
- save_file: results/sepformer-wham-enhancement/1234/train_log.txt
164
 
165
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
166
  loadables:
167
  encoder: !ref <Encoder>
168
  masknet: !ref <MaskNet>
169
- decoder: !ref <Decoder>
170
-
 
 
 
 
1
  # ################################
2
+ # Model: Pretrained SepFormer for speech enhancement
 
 
3
  # Dataset : WHAM!
4
  # ################################
 
 
 
 
 
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  num_spks: 1 # set to 3 for wsj0-3mix
 
 
7
  sample_rate: 8000
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Encoder parameters
10
  N_encoder_out: 256
11
  out_channels: 256
12
  kernel_size: 16
13
  kernel_stride: 8
14
 
 
 
 
 
 
 
15
  # Specifying the network
16
  Encoder: &id003 !new:speechbrain.lobes.models.dual_path.Encoder
17
  kernel_size: 16
18
  out_channels: 256
19
 
 
20
  SBtfintra: &id001 !new:speechbrain.lobes.models.dual_path.SBTransformerBlock
21
  num_layers: 8
22
  d_model: 256
 
36
  norm_before: true
37
 
38
  MaskNet: &id005 !new:speechbrain.lobes.models.dual_path.Dual_Path_Model
 
39
  num_spks: 1
40
  in_channels: 256
41
  out_channels: 256
 
54
  stride: 8
55
  bias: false
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  modules:
58
  encoder: *id003
59
  decoder: *id004
60
  masknet: *id005
 
 
 
 
 
 
 
 
 
 
61
 
62
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
63
  loadables:
64
  encoder: !ref <Encoder>
65
  masknet: !ref <MaskNet>
66
+ decoder: !ref <Decoder>