HoneyTian commited on
Commit
8143896
·
1 Parent(s): 0d6ae9b
examples/nx_denoise/yaml/config.yaml CHANGED
@@ -1,11 +1,20 @@
1
- model_name: "nx_clean_unet"
2
 
3
  sample_rate: 8000
4
  segment_size: 16000
5
  n_fft: 512
6
  win_size: 200
7
  hop_size: 80
 
8
 
 
 
 
 
 
 
 
 
9
  down_sampling_num_layers: 6
10
  down_sampling_in_channels: 1
11
  down_sampling_hidden_channels: 64
@@ -13,7 +22,7 @@ down_sampling_kernel_size: 4
13
  down_sampling_stride: 2
14
 
15
  causal_in_channels: 1
16
- causal_out_channels: 1
17
  causal_kernel_size: 3
18
  causal_bias: false
19
  causal_separable: true
@@ -34,7 +43,7 @@ discriminator_in_channel: 2
34
 
35
  compress_factor: 0.3
36
 
37
- batch_size: 64
38
  learning_rate: 0.0005
39
  adam_b1: 0.8
40
  adam_b2: 0.99
 
1
+ model_name: "nx_denoise"
2
 
3
  sample_rate: 8000
4
  segment_size: 16000
5
  n_fft: 512
6
  win_size: 200
7
  hop_size: 80
8
+ # 因为 hop_size 取 80,则相当于 stft 的时间步是 10ms 一步,所以降采样也考虑到差不多的分辨率。
9
 
10
+ # 2**down_sampling_num_layers,
11
+ # 例如 2**6=64 就意味着 64 个值在降采样之后是一个时间步,
12
+ # 则一步是 64/sample_rate = 0.008秒。
13
+ # 那么 tsfm_chunk_size=2 则为16ms,tsfm_chunk_size=4 则为32ms
14
+ # 假设每次向左看1秒,向右看30ms,则:
15
+ # tsfm_chunk_size=1,tsfm_num_left_chunks=128,tsfm_num_right_chunks=4
16
+ # tsfm_chunk_size=2,tsfm_num_left_chunks=64,tsfm_num_right_chunks=2
17
+ # tsfm_chunk_size=4,tsfm_num_left_chunks=32,tsfm_num_right_chunks=1
18
  down_sampling_num_layers: 6
19
  down_sampling_in_channels: 1
20
  down_sampling_hidden_channels: 64
 
22
  down_sampling_stride: 2
23
 
24
  causal_in_channels: 1
25
+ causal_out_channels: 64
26
  causal_kernel_size: 3
27
  causal_bias: false
28
  causal_separable: true
 
43
 
44
  compress_factor: 0.3
45
 
46
+ batch_size: 4
47
  learning_rate: 0.0005
48
  adam_b1: 0.8
49
  adam_b2: 0.99
toolbox/torchaudio/models/nx_denoise/yaml/config.yaml CHANGED
@@ -1,4 +1,4 @@
1
- model_name: "nx_clean_unet"
2
 
3
  sample_rate: 8000
4
  segment_size: 16000
 
1
+ model_name: "nx_denoise"
2
 
3
  sample_rate: 8000
4
  segment_size: 16000