Spaces:
Running
Running
Yurii Paniv
commited on
Commit
·
eb57397
1
Parent(s):
2b6da0d
Improve training steps
Browse files- training/.gitignore +2 -1
- training/STEPS.md +18 -4
- training/train_vits.yaml +7 -5
training/.gitignore
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
espnet
|
|
|
|
1 |
+
espnet
|
2 |
+
data
|
training/STEPS.md
CHANGED
@@ -2,10 +2,9 @@ Setup env
|
|
2 |
Link: https://espnet.github.io/espnet/installation.html
|
3 |
|
4 |
0. `sudo apt-get install cmake sox libsndfile1-dev ffmpeg`
|
5 |
-
1. `git clone https://github.com/espnet/espnet
|
6 |
-
`conda create -p ./.venv python=3.8`
|
7 |
-
`conda install -c anaconda cudatoolkit`
|
8 |
2. `cd ./espnet/tools`
|
|
|
9 |
3. `CONDA_TOOLS_DIR=$(dirname ${CONDA_EXE})/..`
|
10 |
./setup_anaconda.sh ${CONDA_TOOLS_DIR} espnet 3.8
|
11 |
5. `make`
|
@@ -16,4 +15,19 @@ make
|
|
16 |
# run training
|
17 |
|
18 |
cd ../egs2/ljspeech/tts1
|
19 |
-
./run.sh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
Link: https://espnet.github.io/espnet/installation.html
|
3 |
|
4 |
0. `sudo apt-get install cmake sox libsndfile1-dev ffmpeg`
|
5 |
+
1. `git clone --branch v.202209 https://github.com/espnet/espnet`
|
|
|
|
|
6 |
2. `cd ./espnet/tools`
|
7 |
+
./setup_anaconda.sh anaconda espnet 3.8
|
8 |
3. `CONDA_TOOLS_DIR=$(dirname ${CONDA_EXE})/..`
|
9 |
./setup_anaconda.sh ${CONDA_TOOLS_DIR} espnet 3.8
|
10 |
5. `make`
|
|
|
15 |
# run training
|
16 |
|
17 |
cd ../egs2/ljspeech/tts1
|
18 |
+
./run.sh
|
19 |
+
|
20 |
+
./run.sh \
|
21 |
+
--stage 2 \
|
22 |
+
--use_sid true \
|
23 |
+
--fs 22050 \
|
24 |
+
--n_fft 1024 \
|
25 |
+
--n_shift 256 \
|
26 |
+
--win_length null \
|
27 |
+
--dumpdir dump/22k \
|
28 |
+
--expdir exp/22k \
|
29 |
+
--tts_task gan_tts \
|
30 |
+
--feats_extract linear_spectrogram \
|
31 |
+
--feats_normalize none \
|
32 |
+
--train_config ./conf/tuning/train_vits.yaml \
|
33 |
+
--inference_config ./conf/tuning/decode_vits.yaml
|
training/train_vits.yaml
CHANGED
@@ -16,8 +16,8 @@ tts_conf:
|
|
16 |
generator_type: vits_generator
|
17 |
generator_params:
|
18 |
hidden_channels: 192
|
19 |
-
spks:
|
20 |
-
global_channels:
|
21 |
segment_size: 32
|
22 |
text_encoder_attention_heads: 2
|
23 |
text_encoder_ffn_expand: 4
|
@@ -159,16 +159,18 @@ generator_first: false # whether to start updating generator first
|
|
159 |
# OTHER TRAINING SETTING #
|
160 |
##########################################################
|
161 |
#num_iters_per_epoch: 1000 # number of iterations per epoch
|
162 |
-
max_epoch:
|
163 |
accum_grad: 1 # gradient accumulation
|
164 |
-
batch_bins:
|
165 |
batch_type: numel # how to make batch
|
|
|
166 |
grad_clip: -1 # gradient clipping norm
|
167 |
grad_noise: false # whether to use gradient noise injection
|
168 |
sort_in_batch: descending # how to sort data in making batch
|
169 |
sort_batch: descending # how to sort created batches
|
170 |
-
num_workers:
|
171 |
use_amp: false # whether to use pytorch amp
|
|
|
172 |
log_interval: 50 # log interval in iterations
|
173 |
keep_nbest_models: 10 # number of models to keep
|
174 |
num_att_plot: 3 # number of attention figures to be saved in every check
|
|
|
16 |
generator_type: vits_generator
|
17 |
generator_params:
|
18 |
hidden_channels: 192
|
19 |
+
spks: 128
|
20 |
+
global_channels: 256
|
21 |
segment_size: 32
|
22 |
text_encoder_attention_heads: 2
|
23 |
text_encoder_ffn_expand: 4
|
|
|
159 |
# OTHER TRAINING SETTING #
|
160 |
##########################################################
|
161 |
#num_iters_per_epoch: 1000 # number of iterations per epoch
|
162 |
+
max_epoch: 30 # number of epochs
|
163 |
accum_grad: 1 # gradient accumulation
|
164 |
+
batch_bins: 1900000 # batch bins (feats_type=raw)
|
165 |
batch_type: numel # how to make batch
|
166 |
+
#batch_type: sorted # how to make batchbatch_size: 1
|
167 |
grad_clip: -1 # gradient clipping norm
|
168 |
grad_noise: false # whether to use gradient noise injection
|
169 |
sort_in_batch: descending # how to sort data in making batch
|
170 |
sort_batch: descending # how to sort created batches
|
171 |
+
num_workers: 1 # number of workers of data loader
|
172 |
use_amp: false # whether to use pytorch amp
|
173 |
+
train_dtype: float32
|
174 |
log_interval: 50 # log interval in iterations
|
175 |
keep_nbest_models: 10 # number of models to keep
|
176 |
num_att_plot: 3 # number of attention figures to be saved in every check
|