HoneyTian commited on
Commit
b78e61f
·
1 Parent(s): 8143896
examples/nx_denoise/run.sh CHANGED
@@ -3,27 +3,11 @@
3
  : <<'END'
4
 
5
 
6
- sh run.sh --stage 2 --stop_stage 2 --system_version windows --file_folder_name file_dir --final_model_name mpnet-aishell-20250224 \
7
- --noise_dir "E:/Users/tianx/HuggingDatasets/nx_noise/data/noise" \
8
- --speech_dir "E:/programmer/asr_datasets/aishell/data_aishell/wav/train"
9
-
10
-
11
- sh run.sh --stage 3 --stop_stage 3 --system_version centos --file_folder_name file_dir --final_model_name mpnet-aishell-20250224 \
12
- --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
13
- --speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train"
14
-
15
- sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name nx-clean-unet-aishell-20250228 \
16
  --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
17
  --speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train" \
18
  --max_epochs 100
19
 
20
-
21
- sh run.sh --stage 1 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name mpnet-nx-speech-20250224 \
22
- --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
23
- --speech_dir "/data/tianxing/HuggingDatasets/nx_noise/data/speech" \
24
- --max_epochs 100 --max_count 10000
25
-
26
-
27
  END
28
 
29
 
 
3
  : <<'END'
4
 
5
 
6
+ sh run.sh --stage 2 --stop_stage 2 --system_version centos --file_folder_name file_dir --final_model_name nx-denoise-aishell-20250228 \
 
 
 
 
 
 
 
 
 
7
  --noise_dir "/data/tianxing/HuggingDatasets/nx_noise/data/noise" \
8
  --speech_dir "/data/tianxing/HuggingDatasets/aishell/data_aishell/wav/train" \
9
  --max_epochs 100
10
 
 
 
 
 
 
 
 
11
  END
12
 
13
 
examples/nx_denoise/yaml/config.yaml CHANGED
@@ -5,16 +5,7 @@ segment_size: 16000
5
  n_fft: 512
6
  win_size: 200
7
  hop_size: 80
8
- # 因为 hop_size 取 80,则相当于 stft 的时间步是 10ms 一步,所以降采样也考虑到差不多的分辨率。
9
 
10
- # 2**down_sampling_num_layers,
11
- # 例如 2**6=64 就意味着 64 个值在降采样之后是一个时间步,
12
- # 则一步是 64/sample_rate = 0.008秒。
13
- # 那么 tsfm_chunk_size=2 则为16ms,tsfm_chunk_size=4 则为32ms
14
- # 假设每次向左看1秒,向右看30ms,则:
15
- # tsfm_chunk_size=1,tsfm_num_left_chunks=128,tsfm_num_right_chunks=4
16
- # tsfm_chunk_size=2,tsfm_num_left_chunks=64,tsfm_num_right_chunks=2
17
- # tsfm_chunk_size=4,tsfm_num_left_chunks=32,tsfm_num_right_chunks=1
18
  down_sampling_num_layers: 6
19
  down_sampling_in_channels: 1
20
  down_sampling_hidden_channels: 64
 
5
  n_fft: 512
6
  win_size: 200
7
  hop_size: 80
 
8
 
 
 
 
 
 
 
 
 
9
  down_sampling_num_layers: 6
10
  down_sampling_in_channels: 1
11
  down_sampling_hidden_channels: 64
toolbox/torchaudio/models/nx_denoise/modeling_nx_denoise.py CHANGED
@@ -244,6 +244,8 @@ class NXDenoise(nn.Module):
244
 
245
  # ts transformer
246
  # bottle_neck shape: [batch_size, channels, time_steps, freq_dim]
 
 
247
 
248
  # causal conv out
249
  bottle_neck = self.causal_conv_out.forward(bottle_neck)
 
244
 
245
  # ts transformer
246
  # bottle_neck shape: [batch_size, channels, time_steps, freq_dim]
247
+ bottle_neck = self.ts_transformer.forward(bottle_neck)
248
+ # bottle_neck shape: [batch_size, channels, time_steps, freq_dim]
249
 
250
  # causal conv out
251
  bottle_neck = self.causal_conv_out.forward(bottle_neck)