HoneyTian commited on
Commit
6c8bea2
·
1 Parent(s): 7a982a2
examples/mpnet_aishell/run.sh CHANGED
@@ -109,7 +109,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
109
  $verbose && echo "stage 2: train model"
110
  cd "${work_dir}" || exit 1
111
  python3 step_2_train_model.py \
112
- --train_dataset "${valid_dataset}" \
113
  --valid_dataset "${valid_dataset}" \
114
  --serialization_dir "${file_dir}" \
115
  --config_file "${config_file}" \
 
109
  $verbose && echo "stage 2: train model"
110
  cd "${work_dir}" || exit 1
111
  python3 step_2_train_model.py \
112
+ --train_dataset "${train_dataset}" \
113
  --valid_dataset "${valid_dataset}" \
114
  --serialization_dir "${file_dir}" \
115
  --config_file "${config_file}" \
examples/mpnet_aishell/step_1_prepare_data.py CHANGED
@@ -42,6 +42,8 @@ def get_args():
42
 
43
  parser.add_argument("--target_sample_rate", default=8000, type=int)
44
 
 
 
45
  args = parser.parse_args()
46
  return args
47
 
@@ -99,6 +101,9 @@ def get_dataset(args):
99
  count = 0
100
  process_bar = tqdm(desc="build dataset excel")
101
  for noise, speech in zip(noise_generator, speech_generator):
 
 
 
102
 
103
  noise_filename = noise["filename"]
104
  noise_raw_duration = noise["raw_duration"]
 
42
 
43
  parser.add_argument("--target_sample_rate", default=8000, type=int)
44
 
45
+ parser.add_argument("--scale", default=0.01, type=float)
46
+
47
  args = parser.parse_args()
48
  return args
49
 
 
101
  count = 0
102
  process_bar = tqdm(desc="build dataset excel")
103
  for noise, speech in zip(noise_generator, speech_generator):
104
+ flag = random.random()
105
+ if flag > args.scale:
106
+ continue
107
 
108
  noise_filename = noise["filename"]
109
  noise_raw_duration = noise["raw_duration"]