RinneAi commited on
Commit
e57a367
1 Parent(s): f97182f

Upload 3 files

Browse files
model_assets/Rinne/Rinne.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95d92d1b35ed25ef73f66249300b888968ac60333179be606066a6b27dccb521
3
- size 198768188
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72d8969b3883f5abd22ba7afd8d9a9353f04f78199f1e340d472c8bdd8ee3c59
3
+ size 251150980
model_assets/Rinne/config.json CHANGED
@@ -5,29 +5,34 @@
5
  "eval_interval": 2000,
6
  "seed": 42,
7
  "epochs": 1000,
8
- "learning_rate": 0.0002,
9
  "betas": [
10
  0.8,
11
  0.99
12
  ],
13
  "eps": 1e-09,
14
  "batch_size": 3,
15
- "bf16_run": true,
16
- "lr_decay": 0.99995,
 
17
  "segment_size": 16384,
18
  "init_lr_ratio": 1,
19
  "warmup_epochs": 0,
20
  "c_mel": 45,
21
  "c_kl": 1.0,
 
22
  "skip_optimizer": false,
23
  "freeze_ZH_bert": false,
24
  "freeze_JP_bert": false,
25
  "freeze_EN_bert": false,
26
- "freeze_style": false
 
 
27
  },
28
  "data": {
29
- "training_files": "Data/Rinne/train.list",
30
- "validation_files": "Data/Rinne/val.list",
 
31
  "max_wav_value": 32768.0,
32
  "sampling_rate": 44100,
33
  "filter_length": 2048,
@@ -39,19 +44,20 @@
39
  "add_blank": true,
40
  "n_speakers": 1,
41
  "cleaned_text": true,
 
 
 
42
  "num_styles": 1,
43
  "style2id": {
44
  "Neutral": 0
45
- },
46
- "spk2id": {
47
- "Rinne": 0
48
  }
49
  },
50
  "model": {
51
  "use_spk_conditioned_encoder": true,
52
  "use_noise_scaled_mas": true,
53
  "use_mel_posterior_encoder": false,
54
- "use_duration_discriminator": true,
 
55
  "inter_channels": 192,
56
  "hidden_channels": 192,
57
  "filter_channels": 768,
@@ -99,7 +105,14 @@
99
  ],
100
  "n_layers_q": 3,
101
  "use_spectral_norm": false,
102
- "gin_channels": 256
 
 
 
 
 
 
 
103
  },
104
- "version": "1.3"
105
  }
 
5
  "eval_interval": 2000,
6
  "seed": 42,
7
  "epochs": 1000,
8
+ "learning_rate": 0.0001,
9
  "betas": [
10
  0.8,
11
  0.99
12
  ],
13
  "eps": 1e-09,
14
  "batch_size": 3,
15
+ "bf16_run": false,
16
+ "fp16_run": false,
17
+ "lr_decay": 0.99996,
18
  "segment_size": 16384,
19
  "init_lr_ratio": 1,
20
  "warmup_epochs": 0,
21
  "c_mel": 45,
22
  "c_kl": 1.0,
23
+ "c_commit": 100,
24
  "skip_optimizer": false,
25
  "freeze_ZH_bert": false,
26
  "freeze_JP_bert": false,
27
  "freeze_EN_bert": false,
28
+ "freeze_emo": false,
29
+ "freeze_style": false,
30
+ "freeze_decoder": false
31
  },
32
  "data": {
33
+ "use_jp_extra": true,
34
+ "training_files": "Data\\Rinne\\train.list",
35
+ "validation_files": "Data\\Rinne\\val.list",
36
  "max_wav_value": 32768.0,
37
  "sampling_rate": 44100,
38
  "filter_length": 2048,
 
44
  "add_blank": true,
45
  "n_speakers": 1,
46
  "cleaned_text": true,
47
+ "spk2id": {
48
+ "Rinne": 0
49
+ },
50
  "num_styles": 1,
51
  "style2id": {
52
  "Neutral": 0
 
 
 
53
  }
54
  },
55
  "model": {
56
  "use_spk_conditioned_encoder": true,
57
  "use_noise_scaled_mas": true,
58
  "use_mel_posterior_encoder": false,
59
+ "use_duration_discriminator": false,
60
+ "use_wavlm_discriminator": true,
61
  "inter_channels": 192,
62
  "hidden_channels": 192,
63
  "filter_channels": 768,
 
105
  ],
106
  "n_layers_q": 3,
107
  "use_spectral_norm": false,
108
+ "gin_channels": 512,
109
+ "slm": {
110
+ "model": "./slm/wavlm-base-plus",
111
+ "sr": 16000,
112
+ "hidden": 768,
113
+ "nlayers": 13,
114
+ "initial_channel": 64
115
+ }
116
  },
117
+ "version": "2.4.1-JP-Extra"
118
  }