English
Sound Classification
CNN14
cemsubakan commited on
Commit
8e55a46
·
verified ·
1 Parent(s): 590fd20

Update hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +13 -141
hyperparams.yaml CHANGED
@@ -1,91 +1,10 @@
1
- # Generated 2022-11-21 from:
2
- # /home/cem/Dropbox/speechbrain-1/recipes/ESC50/classification/hparams/cnn14.yaml
3
- # yamllint disable
4
- # #################################
5
- # Basic training parameters for sound classification using the ESC50 dataset.
6
- # This recipe uses the ecapa-tdnn backbone for classification.
7
- #
8
- # Author:
9
- # * Cem Subakan
10
- # (based on the SpeechBrain UrbanSound8k recipe)
11
- # #################################
12
 
13
- # Seed needs to be set at top of yaml, before objects with parameters are made
14
- seed: 11
15
- __set_seed: !!python/object/apply:torch.manual_seed [11]
16
-
17
- # Set up folders for reading from and writing to
18
- # Dataset must already exist at `audio_data_folder`
19
- data_folder: /data2/ESC-50-master
20
- # e.g., /localscratch/UrbanSound8K
21
- open_rir_folder: <data_folder>/RIRS # Change if needed
22
- audio_data_folder: /data2/ESC-50-master/audio
23
-
24
- # TODO the follwing folder will contain the resampled audio
25
- # files (mono channel and config SR) to train on
26
- #reasmpled_audio_data_folder: !ref <data_folder>/audio_mono16kHz
27
- #
28
- experiment_name: cnn14
29
- output_folder: ./results/cnn14/11
30
- save_folder: ./results/cnn14/11/save
31
- train_log: ./results/cnn14/11/train_log.txt
32
-
33
- test_only: false
34
-
35
- # Tensorboard logs
36
- use_tensorboard: false
37
- tensorboard_logs_folder: ./results/cnn14/11/tb_logs/
38
-
39
- # Path where data manifest files will be stored
40
- train_annotation: /data2/ESC-50-master/manifest/train.json
41
- valid_annotation: /data2/ESC-50-master/manifest/valid.json
42
- test_annotation: /data2/ESC-50-master/manifest/test.json
43
-
44
- # To standardize results, UrbanSound8k has pre-separated samples into
45
- # 10 folds for multi-fold validation
46
- train_fold_nums: [1, 2, 3]
47
- valid_fold_nums: [4]
48
- test_fold_nums: [5]
49
- skip_manifest_creation: false
50
-
51
- ckpt_interval_minutes: 15 # save checkpoint every N min
52
-
53
- # Training parameters
54
- number_of_epochs: 200
55
- batch_size: 32
56
- lr: 0.0002
57
- base_lr: 0.00000001
58
- max_lr: 0.0002
59
- step_size: 65000
60
  sample_rate: 44100
61
 
62
  device: cpu
63
 
64
- # Feature parameters
65
- n_mels: 80
66
- left_frames: 0
67
- right_frames: 0
68
- deltas: false
69
- amp_to_db: true
70
- normalize: true
71
- use_melspectra: true
72
-
73
- # Number of classes
74
- out_n_neurons: 50
75
-
76
- # Note that it's actually important to shuffle the data here
77
- # (or at the very least, not sort the data by duration)
78
- # Also note that this does not violate the UrbanSound8k "no-shuffle" policy
79
- # because this does not mix samples from folds in train to valid/test, only
80
- # within train or valid, or test
81
- shuffle: true
82
- dataloader_options:
83
- batch_size: 32
84
- shuffle: true
85
- num_workers: 0
86
-
87
  # Functions
88
- compute_features: &id003 !new:speechbrain.lobes.features.Fbank
89
  n_mels: 80
90
  left_frames: 0
91
  right_frames: 0
@@ -96,33 +15,16 @@ compute_features: &id003 !new:speechbrain.lobes.features.Fbank
96
  hop_length: 10
97
 
98
  use_pretrain: false
99
- embedding_model: &id009 !new:speechbrain.lobes.models.Cnn14.Cnn14
100
  mel_bins: 80
101
  emb_dim: 2048
102
 
103
- classifier: &id010 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
104
  input_size: 2048
105
  out_neurons: 50
106
  lin_blocks: 1
107
 
108
- epoch_counter: &id012 !new:speechbrain.utils.epoch_loop.EpochCounter
109
-
110
-
111
- # If you do not want to use the pretrained separator you can simply delete pretrained_separator field.
112
- limit: 200
113
-
114
-
115
- # Definition of the augmentation pipeline.
116
- # If concat_augment = False, the augmentation techniques are applied
117
- # in sequence. If concat_augment = True, all the augmented signals
118
- # # are concatenated in a single big batch.
119
-
120
- augment_pipeline: []
121
- concat_augment: true
122
-
123
-
124
- mean_var_norm: &id011 !new:speechbrain.processing.features.InputNormalization
125
-
126
  norm_type: sentence
127
  std_norm: false
128
 
@@ -131,55 +33,25 @@ n_fft: 1024
131
  spec_mag_power: 0.5
132
  hop_length: 11.6099
133
  win_length: 23.2199
134
- compute_stft: &id001 !new:speechbrain.processing.features.STFT
 
135
  n_fft: 1024
136
  hop_length: 11.6099
137
  win_length: 23.2199
138
  sample_rate: 44100
139
 
140
- compute_fbank: &id002 !new:speechbrain.processing.features.Filterbank
141
  n_mels: 80
142
  n_fft: 1024
143
  sample_rate: 44100
144
 
145
  modules:
146
- compute_stft: *id001
147
- compute_fbank: *id002
148
- compute_features: *id003
149
- embedding_model: *id009
150
- classifier: *id010
151
- mean_var_norm: *id011
152
- compute_cost: !new:speechbrain.nnet.losses.LogSoftmaxWrapper
153
- loss_fn: !new:speechbrain.nnet.losses.AdditiveAngularMargin
154
- margin: 0.2
155
- scale: 30
156
-
157
- # compute_error: !name:speechbrain.nnet.losses.classification_error
158
-
159
- opt_class: !name:torch.optim.Adam
160
- lr: 0.0002
161
- weight_decay: 0.000002
162
-
163
- lr_annealing: !new:speechbrain.nnet.schedulers.CyclicLRScheduler
164
- base_lr: 0.00000001
165
- max_lr: 0.0002
166
- step_size: 65000
167
-
168
- # Logging + checkpoints
169
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
170
- save_file: ./results/cnn14/11/train_log.txt
171
-
172
- error_stats: !name:speechbrain.utils.metric_stats.MetricStats
173
- metric: !name:speechbrain.nnet.losses.classification_error
174
- reduction: batch
175
-
176
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
177
- checkpoints_dir: ./results/cnn14/11/save
178
- recoverables:
179
- embedding_model: *id009
180
- classifier: *id010
181
- normalizer: *id011
182
- counter: *id012
183
 
184
 
185
  label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
 
 
 
 
 
 
 
 
 
 
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  sample_rate: 44100
3
 
4
  device: cpu
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  # Functions
7
+ compute_features: !new:speechbrain.lobes.features.Fbank
8
  n_mels: 80
9
  left_frames: 0
10
  right_frames: 0
 
15
  hop_length: 10
16
 
17
  use_pretrain: false
18
+ embedding_model: !new:speechbrain.lobes.models.Cnn14.Cnn14
19
  mel_bins: 80
20
  emb_dim: 2048
21
 
22
+ classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
23
  input_size: 2048
24
  out_neurons: 50
25
  lin_blocks: 1
26
 
27
+ mean_var_norm: !new:speechbrain.processing.features.InputNormalization
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  norm_type: sentence
29
  std_norm: false
30
 
 
33
  spec_mag_power: 0.5
34
  hop_length: 11.6099
35
  win_length: 23.2199
36
+
37
+ compute_stft: !new:speechbrain.processing.features.STFT
38
  n_fft: 1024
39
  hop_length: 11.6099
40
  win_length: 23.2199
41
  sample_rate: 44100
42
 
43
+ compute_fbank: !new:speechbrain.processing.features.Filterbank
44
  n_mels: 80
45
  n_fft: 1024
46
  sample_rate: 44100
47
 
48
  modules:
49
+ compute_stft: !ref <compute_stft>
50
+ compute_fbank: !ref <compute_fbank>
51
+ compute_features: !ref <compute_features>
52
+ embedding_model: !ref <embedding_model>
53
+ classifier: !ref <classifier>
54
+ mean_var_norm: !ref <mean_var_norm>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
 
57
  label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder