File size: 3,678 Bytes
388541c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# Generated 2023-05-14 from:
# /home/agorin/cryceleb2023/hparams/ecapa_voxceleb_basic.yaml
# yamllint disable
# ################################
# Model: Speaker identification with ECAPA for CryCeleb
# Authors: David Budaghyan
# ################################
ckpt_interval_minutes: 15 # save checkpoint every N min
##### SEED
seed: 3011
__set_seed: !apply:crybrain_config_utils.set_seed [3011]
# DataLoader
bs: 32
train_dataloader_options:
batch_size: 32
shuffle: true
val_dataloader_options:
batch_size: 2
shuffle: false
##### ESTIMATOR COMPONENTS
# Fbank (feature extractor)
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
compute_features: &id002 !new:speechbrain.lobes.features.Fbank
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
# ECAPA
emb_dim: 192
embedding_model: &id001 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
input_size: 80
channels: [1024, 1024, 1024, 1024, 3072]
kernel_sizes: [5, 3, 3, 3, 1]
dilations: [1, 2, 3, 4, 1]
groups: [1, 1, 1, 1, 1]
attention_channels: 128
lin_neurons: 192
# If you do not want to use the pretrained encoder you can simply delete pretrained_encoder field.
pretrained_model_name: spkrec-ecapa-voxceleb
pretrained_embedding_model_path: speechbrain/spkrec-ecapa-voxceleb/embedding_model.ckpt
pretrained_embedding_model: !new:speechbrain.utils.parameter_transfer.Pretrainer
collect_in: ./experiments/ecapa_voxceleb_ft_basic/ckpts
loadables:
model: *id001
paths:
model: speechbrain/spkrec-ecapa-voxceleb/embedding_model.ckpt
# CLASSIFIER
n_classes: 348
# check-yaml disable
classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
input_size: 192
out_neurons: 348
##### EPOCH COUNTER
n_epochs: 1000
epoch_counter: &id005 !new:speechbrain.utils.epoch_loop.EpochCounter
limit: 1000
##### OPTIMIZER
start_lr: 0.0001
opt_class: !name:torch.optim.Adam
lr: 0.0001
weight_decay: 0.000002
##### LEARNING RATE SCHEDULERS
lrsched_name: cyclic
# one of:
# onplateau
# cyclic
lr_min: 0.0000000001
lr_scheduler: &id006 !apply:crybrain_config_utils.choose_lrsched
lrsched_name: cyclic
#below are kwargs, only the ones relevant to the type of scheduler will be
#used for initialization in `choose_lrsched`
#onplateau (ReduceLROnPlateau)
lr_min: 0.0000000001
factor: 0.4
patience: 10
dont_halve_until_epoch: 35
#cyclic (CyclicLRScheduler)
base_lr: 0.00000001
max_lr: 0.0001
step_size: 100
mode: triangular
gamma: 1.0
scale_fn:
scale_mode: cycle
sample_rate: 16000
mean_var_norm: &id004 !new:speechbrain.processing.features.InputNormalization
norm_type: sentence
std_norm: false
modules:
compute_features: *id002
embedding_model: *id001
classifier: *id003
mean_var_norm: *id004
compute_cost: !new:speechbrain.nnet.losses.LogSoftmaxWrapper
loss_fn: !new:speechbrain.nnet.losses.AdditiveAngularMargin
margin: 0.2
scale: 30
classification_stats: !name:speechbrain.utils.metric_stats.ClassificationStats
###################################################################
### OUTPUT PATHS ###
experiment_name: ecapa_voxceleb_ft_basic
# must run from the directory which contains "experiments"
experiment_dir: ./experiments/ecapa_voxceleb_ft_basic
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: ./experiments/ecapa_voxceleb_ft_basic/train_log.txt
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: ./experiments/ecapa_voxceleb_ft_basic/ckpts
recoverables:
embedding_model: *id001
classifier: *id003
normalizer: *id004
counter: *id005
lr_scheduler: *id006
|