Upload models
Browse files- config.yaml +90 -0
- pytorch_model.bin +3 -0
- spk_info.npz +3 -0
config.yaml
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
data:
|
2 |
+
block_size: 512
|
3 |
+
duration: 1.5
|
4 |
+
encoder: dpwavlmbase
|
5 |
+
encoder_ckpt: models/pretrained/dphubert/DPWavLM-sp0.75.pth
|
6 |
+
encoder_hop_size: 320
|
7 |
+
encoder_out_channels: 768
|
8 |
+
encoder_sample_rate: 16000
|
9 |
+
extensions:
|
10 |
+
- wav
|
11 |
+
- flac
|
12 |
+
- mp3
|
13 |
+
- m4a
|
14 |
+
f0_extractor: rmvpe
|
15 |
+
f0_max: 1200
|
16 |
+
f0_min: 65
|
17 |
+
sampling_rate: 44100
|
18 |
+
spk_embed_channels: 256
|
19 |
+
spk_embed_encoder: pyannote.audio
|
20 |
+
spk_embed_encoder_ckpt: ./models/pretrained/pyannote.audio/wespeaker-voxceleb-resnet34-LM/pytorch_model.bin
|
21 |
+
spk_embed_encoder_sample_rate: 16000
|
22 |
+
volume_window_size: 8
|
23 |
+
device: cuda
|
24 |
+
env:
|
25 |
+
gpu_id: 0
|
26 |
+
loss:
|
27 |
+
beta: 1.0
|
28 |
+
fft_max: 2048
|
29 |
+
fft_min: 128
|
30 |
+
gamma: 0.0
|
31 |
+
n_ffts:
|
32 |
+
- 32
|
33 |
+
- 64
|
34 |
+
- 128
|
35 |
+
- 256
|
36 |
+
- 512
|
37 |
+
- 1024
|
38 |
+
- 2048
|
39 |
+
overlap: 0.5
|
40 |
+
use_multi_scale_log_freq: true
|
41 |
+
model:
|
42 |
+
f0_input_variance: 0.0
|
43 |
+
f0_offset_size_downsamples: 16
|
44 |
+
harmonic_env_size_downsamples: 16
|
45 |
+
no_use_embed_conv: false
|
46 |
+
noise_env_size_downsamples: 16
|
47 |
+
noise_seed: 289
|
48 |
+
noise_to_harmonic_phase: false
|
49 |
+
type: CombSubMinimumNoisedPhase
|
50 |
+
units_hidden_channels: 256
|
51 |
+
units_layers:
|
52 |
+
- - 10
|
53 |
+
- 11
|
54 |
+
use_add_noise_env: false
|
55 |
+
use_discriminator: true
|
56 |
+
use_f0_offset: false
|
57 |
+
use_harmonic_env: true
|
58 |
+
use_noise_env: false
|
59 |
+
use_speaker_embed: true
|
60 |
+
win_length: 2048
|
61 |
+
train:
|
62 |
+
accelerator:
|
63 |
+
log_with: tensorboard
|
64 |
+
accelerator_project_config:
|
65 |
+
total_limit: 10
|
66 |
+
allow_tf32: true
|
67 |
+
amp_dtype: fp32
|
68 |
+
batch_size: 32
|
69 |
+
cache_all_data: true
|
70 |
+
cache_device: cpu
|
71 |
+
cache_fp16: true
|
72 |
+
epochs: 100
|
73 |
+
frame_hop_random_max: 64
|
74 |
+
frame_hop_random_min: 32
|
75 |
+
interval_log: 10
|
76 |
+
interval_val: 2000
|
77 |
+
loss_variation: 0.1
|
78 |
+
low_similar_loss_variation: 0.7
|
79 |
+
lr: 0.0003
|
80 |
+
num_workers: 2
|
81 |
+
only_u2c_stack: false
|
82 |
+
save_states: true
|
83 |
+
sched_cooldown: 2
|
84 |
+
sched_factor: 0.5
|
85 |
+
sched_gamma: 0.99999
|
86 |
+
sched_min_lr: 1.0e-05
|
87 |
+
sched_patience: 50
|
88 |
+
sched_threshold: 1.0e-05
|
89 |
+
sched_threshold_mode: rel
|
90 |
+
weight_decay: 1.0e-05
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:742c9855d8713c4d87ad694927a6179301a93673c55c382a1c12b70f5a10dd95
|
3 |
+
size 23861118
|
spk_info.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70323a08262c22aba09d750142484b9ac1890cef9c9e7b446a0d167a0ff25354
|
3 |
+
size 153737
|