ZhenYe234 commited on
Commit
fee43c7
·
verified ·
1 Parent(s): dd01da8

Upload config_wavlm.yaml

Browse files
Files changed (1) hide show
  1. config_wavlm.yaml +137 -0
config_wavlm.yaml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generator:
2
+ name: SoundStream
3
+ config:
4
+ n_filters: 32
5
+ D: 256
6
+ target_bandwidths:
7
+ - 0.5
8
+ - 1
9
+ - 1.5
10
+ - 2
11
+ - 4
12
+ ratios:
13
+ - 8
14
+ - 5
15
+ - 4
16
+ - 2
17
+ sample_rate: 16000
18
+ bins: 1024
19
+ semantic_techer: wavlm_base_plus
20
+ d_list:
21
+ - mfd
22
+ mfd:
23
+ name: MultiFrequencyDiscriminator
24
+ config:
25
+ hop_lengths:
26
+ - 32
27
+ - 64
28
+ - 128
29
+ - 256
30
+ - 512
31
+ - 1024
32
+ hidden_channels:
33
+ - 64
34
+ - 128
35
+ - 256
36
+ - 512
37
+ - 512
38
+ - 512
39
+ domain: double
40
+ mel_scale: true
41
+ sample_rate: 16000
42
+ mpd:
43
+ name: MultiPeriodDiscriminator
44
+ config:
45
+ period_sizes:
46
+ - 2
47
+ - 3
48
+ - 5
49
+ - 7
50
+ - 11
51
+ period_kernel_size: 5
52
+ msd:
53
+ name: MultiScaleDiscriminator
54
+ config:
55
+ num_scales: 3
56
+ pool_kernel_size: 4
57
+ pool_stride: 2
58
+ optimizer:
59
+ g:
60
+ name: AdamW
61
+ config:
62
+ lr: 0.0002
63
+ betas:
64
+ - 0.8
65
+ - 0.99
66
+ eps: 1.0e-06
67
+ d:
68
+ name: AdamW
69
+ config:
70
+ lr: 0.0002
71
+ betas:
72
+ - 0.8
73
+ - 0.99
74
+ eps: 1.0e-06
75
+ lr_scheduler:
76
+ g:
77
+ name: ExponentialLR
78
+ config:
79
+ gamma: 0.999
80
+ d:
81
+ name: ExponentialLR
82
+ config:
83
+ gamma: 0.999
84
+ criterion:
85
+ g_criterion:
86
+ name: losses.generator_loss.GeneratorSTFTLoss
87
+ config:
88
+ use_mel_loss: false
89
+ adv_criterion: MSEGLoss
90
+ mel_loss_weight: 45
91
+ use_feature_match: true
92
+ feat_match_loss_weight: 20
93
+ use_full_stft_loss: true
94
+ use_sub_stft_loss: true
95
+ full_stft_loss_weight: 1
96
+ sub_stft_loss_weight: 1
97
+ mel_scale_loss:
98
+ sampling_rate: 16000
99
+ n_fft: 1024
100
+ num_mels: 80
101
+ hop_size: 160
102
+ win_size: 800
103
+ fmin: 0
104
+ full_multi_scale_stft_loss:
105
+ fft_sizes:
106
+ - 512
107
+ - 1024
108
+ - 2048
109
+ win_sizes:
110
+ - 480
111
+ - 960
112
+ - 1200
113
+ hop_sizes:
114
+ - 120
115
+ - 240
116
+ - 300
117
+ sub_multi_scale_stft_loss:
118
+ num_bands: 6
119
+ fft_sizes:
120
+ - 128
121
+ - 256
122
+ - 256
123
+ win_sizes:
124
+ - 80
125
+ - 120
126
+ - 200
127
+ hop_sizes:
128
+ - 20
129
+ - 40
130
+ - 50
131
+ d_criterion:
132
+ name: losses.discriminator_loss.MSEDiscriminatorLoss
133
+ config: null
134
+ commit_loss_weight: 1.0
135
+ codebook_loss_weight: 100
136
+ audio_norm_scale: 0.95
137
+