Mahiruoshi commited on
Commit
8e35048
1 Parent(s): 599fee4

Upload 76 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +280 -0
  2. bert/bert-base-japanese-v3/.gitattributes +34 -0
  3. bert/bert-base-japanese-v3/README.md +53 -0
  4. bert/bert-base-japanese-v3/config.json +19 -0
  5. bert/bert-base-japanese-v3/tokenizer_config.json +10 -0
  6. bert/bert-base-japanese-v3/vocab.txt +0 -0
  7. bert/bert-large-japanese-v2/.gitattributes +34 -0
  8. bert/bert-large-japanese-v2/README.md +53 -0
  9. bert/bert-large-japanese-v2/config.json +19 -0
  10. bert/bert-large-japanese-v2/tokenizer_config.json +10 -0
  11. bert/bert-large-japanese-v2/vocab.txt +0 -0
  12. bert/bert_models.json +14 -0
  13. bert/chinese-roberta-wwm-ext-large/.gitattributes +9 -0
  14. bert/chinese-roberta-wwm-ext-large/README.md +57 -0
  15. bert/chinese-roberta-wwm-ext-large/added_tokens.json +1 -0
  16. bert/chinese-roberta-wwm-ext-large/config.json +28 -0
  17. bert/chinese-roberta-wwm-ext-large/pytorch_model.bin +3 -0
  18. bert/chinese-roberta-wwm-ext-large/special_tokens_map.json +1 -0
  19. bert/chinese-roberta-wwm-ext-large/tokenizer.json +0 -0
  20. bert/chinese-roberta-wwm-ext-large/tokenizer_config.json +1 -0
  21. bert/chinese-roberta-wwm-ext-large/vocab.txt +0 -0
  22. bert/deberta-v2-large-japanese-char-wwm/.gitattributes +34 -0
  23. bert/deberta-v2-large-japanese-char-wwm/README.md +89 -0
  24. bert/deberta-v2-large-japanese-char-wwm/config.json +37 -0
  25. bert/deberta-v2-large-japanese-char-wwm/pytorch_model.bin +3 -0
  26. bert/deberta-v2-large-japanese-char-wwm/special_tokens_map.json +7 -0
  27. bert/deberta-v2-large-japanese-char-wwm/tokenizer_config.json +19 -0
  28. bert/deberta-v2-large-japanese-char-wwm/vocab.txt +0 -0
  29. bert/deberta-v2-large-japanese/.gitattributes +34 -0
  30. bert/deberta-v2-large-japanese/README.md +111 -0
  31. bert/deberta-v2-large-japanese/config.json +38 -0
  32. bert/deberta-v2-large-japanese/special_tokens_map.json +9 -0
  33. bert/deberta-v2-large-japanese/tokenizer.json +0 -0
  34. bert/deberta-v2-large-japanese/tokenizer_config.json +15 -0
  35. bert/deberta-v3-large/.gitattributes +27 -0
  36. bert/deberta-v3-large/README.md +93 -0
  37. bert/deberta-v3-large/config.json +22 -0
  38. bert/deberta-v3-large/generator_config.json +22 -0
  39. bert/deberta-v3-large/pytorch_model.bin +3 -0
  40. bert/deberta-v3-large/spm.model +3 -0
  41. bert/deberta-v3-large/tokenizer_config.json +4 -0
  42. commons.py +158 -0
  43. config.py +248 -0
  44. config.yml +177 -0
  45. onnx/BangDreamApi.json +244 -0
  46. onnx/BangDreamApi/BangDreamApi_dec.onnx +3 -0
  47. onnx/BangDreamApi/BangDreamApi_dp.onnx +3 -0
  48. onnx/BangDreamApi/BangDreamApi_emb.onnx +3 -0
  49. onnx/BangDreamApi/BangDreamApi_enc_p.onnx +3 -0
  50. onnx/BangDreamApi/BangDreamApi_flow.onnx +3 -0
app.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from onnx_modules.V230_OnnxInference import OnnxInferenceSession
2
+ import numpy as np
3
+ import torch
4
+ from scipy.io.wavfile import write
5
+ from text import cleaned_text_to_sequence, get_bert
6
+ from text.cleaner import clean_text
7
+ import utils
8
+ import commons
9
+ import uuid
10
+ from flask import Flask, request, jsonify, render_template_string
11
+ from flask_cors import CORS
12
+ import gradio as gr
13
+ import os
14
+ from threading import Thread
15
+
16
+ hps = utils.get_hparams_from_file('onnx/BangDreamApi.json')
17
+ device = 'cpu'
18
+
19
+ BandList = {
20
+ "PoppinParty":["香澄","有咲","たえ","りみ","沙綾"],
21
+ "Afterglow":["蘭","モカ","ひまり","巴","つぐみ"],
22
+ "HelloHappyWorld":["こころ","美咲","薫","花音","はぐみ"],
23
+ "PastelPalettes":["彩","日菜","千聖","イヴ","麻弥"],
24
+ "Roselia":["友希那","紗夜","リサ","燐子","あこ"],
25
+ "RaiseASuilen":["レイヤ","ロック","ますき","チュチュ","パレオ"],
26
+ "Morfonica":["ましろ","瑠唯","つくし","七深","透子"],
27
+ "MyGo":["燈","愛音","そよ","立希","楽奈"],
28
+ "AveMujica":["祥子","睦","海鈴","にゃむ","初華"],
29
+ "圣翔音乐学园":["華戀","光","香子","雙葉","真晝","純那","克洛迪娜","真矢","奈奈"],
30
+ "凛明馆女子学校":["珠緒","壘","文","悠悠子","一愛"],
31
+ "弗隆提亚艺术学校":["艾露","艾露露","菈樂菲","司","靜羽"],
32
+ "西克菲尔特音乐学院":["晶","未知留","八千代","栞","美帆"]
33
+ }
34
+
35
+ Session = OnnxInferenceSession(
36
+ {
37
+ "enc" : "onnx/BangDreamApi/BangDreamApi_enc_p.onnx",
38
+ "emb_g" : "onnx/BangDreamApi/BangDreamApi_emb.onnx",
39
+ "dp" : "onnx/BangDreamApi/BangDreamApi_dp.onnx",
40
+ "sdp" : "onnx/BangDreamApi/BangDreamApi_sdp.onnx",
41
+ "flow" : "onnx/BangDreamApi/BangDreamApi_flow.onnx",
42
+ "dec" : "onnx/BangDreamApi/BangDreamApi_dec.onnx"
43
+ },
44
+ Providers = ["CPUExecutionProvider"]
45
+ )
46
+
47
+ def get_text(text, language_str, hps, device, style_text=None, style_weight=0.7):
48
+ style_text = None if style_text == "" else style_text
49
+ norm_text, phone, tone, word2ph = clean_text(text, language_str)
50
+ phone, tone, language = cleaned_text_to_sequence(phone, tone, language_str)
51
+
52
+ if True:
53
+ phone = commons.intersperse(phone, 0)
54
+ tone = commons.intersperse(tone, 0)
55
+ language = commons.intersperse(language, 0)
56
+ for i in range(len(word2ph)):
57
+ word2ph[i] = word2ph[i] * 2
58
+ word2ph[0] += 1
59
+ bert_ori = get_bert(
60
+ norm_text, word2ph, language_str, device, style_text, style_weight
61
+ )
62
+ del word2ph
63
+ assert bert_ori.shape[-1] == len(phone), phone
64
+
65
+ if language_str == "ZH":
66
+ bert = bert_ori
67
+ ja_bert = torch.randn(1024, len(phone))
68
+ en_bert = torch.randn(1024, len(phone))
69
+ elif language_str == "JP":
70
+ bert = torch.randn(1024, len(phone))
71
+ ja_bert = bert_ori
72
+ en_bert = torch.randn(1024, len(phone))
73
+ elif language_str == "EN":
74
+ bert = torch.randn(1024, len(phone))
75
+ ja_bert = torch.randn(1024, len(phone))
76
+ en_bert = bert_ori
77
+ else:
78
+ raise ValueError("language_str should be ZH, JP or EN")
79
+
80
+ assert bert.shape[-1] == len(
81
+ phone
82
+ ), f"Bert seq len {bert.shape[-1]} != {len(phone)}"
83
+
84
+ phone = torch.LongTensor(phone)
85
+ tone = torch.LongTensor(tone)
86
+ language = torch.LongTensor(language)
87
+ return bert, ja_bert, en_bert, phone, tone, language
88
+
89
+
90
+ def infer(
91
+ text,
92
+ sid,
93
+ style_text=None,
94
+ style_weight=0.7,
95
+ sdp_ratio=0.5,
96
+ noise_scale=0.6,
97
+ noise_scale_w=0.667,
98
+ length_scale=1,
99
+ unique_filename = 'temp.wav'
100
+ ):
101
+ language= 'JP' if is_japanese(text) else 'ZH'
102
+ bert, ja_bert, en_bert, phones, tone, language = get_text(
103
+ text,
104
+ language,
105
+ hps,
106
+ device,
107
+ style_text=style_text,
108
+ style_weight=style_weight,
109
+ )
110
+ with torch.no_grad():
111
+ x_tst = phones.unsqueeze(0).to(device).numpy()
112
+ language = np.zeros_like(x_tst)
113
+ tone = np.zeros_like(x_tst)
114
+ bert = bert.to(device).transpose(0, 1).numpy()
115
+ ja_bert = ja_bert.to(device).transpose(0, 1).numpy()
116
+ en_bert = en_bert.to(device).transpose(0, 1).numpy()
117
+ del phones
118
+ sid = np.array([hps.spk2id[sid]])
119
+ audio = Session(
120
+ x_tst,
121
+ tone,
122
+ language,
123
+ bert,
124
+ ja_bert,
125
+ en_bert,
126
+ sid,
127
+ seed=114514,
128
+ seq_noise_scale=noise_scale_w,
129
+ sdp_noise_scale=noise_scale,
130
+ length_scale=length_scale,
131
+ sdp_ratio=sdp_ratio,
132
+ )
133
+ del x_tst, tone, language, bert, ja_bert, en_bert, sid
134
+ write(unique_filename, 44100, audio)
135
+ return (44100,gr.processing_utils.convert_to_16_bit_wav(audio))
136
+
137
+ def is_japanese(string):
138
+ for ch in string:
139
+ if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
140
+ return True
141
+ return False
142
+
143
+
144
+ Flaskapp = Flask(__name__)
145
+ CORS(Flaskapp)
146
+ @Flaskapp.route('/')
147
+
148
+ def tts():
149
+ global last_text, last_model
150
+ speaker = request.args.get('speaker')
151
+ sdp_ratio = float(request.args.get('sdp_ratio', 0.2))
152
+ noise_scale = float(request.args.get('noise_scale', 0.6))
153
+ noise_scale_w = float(request.args.get('noise_scale_w', 0.8))
154
+ length_scale = float(request.args.get('length_scale', 1))
155
+ style_weight = float(request.args.get('style_weight', 0.7))
156
+ style_text = request.args.get('style_text', 'happy')
157
+ text = request.args.get('text')
158
+ is_chat = request.args.get('is_chat', 'false').lower() == 'true'
159
+ #model = request.args.get('model',modelPaths[-1])
160
+
161
+ if not speaker or not text:
162
+ return render_template_string("""
163
+ <!DOCTYPE html>
164
+ <html>
165
+ <head>
166
+ <title>TTS API Documentation</title>
167
+ </head>
168
+ <body>
169
+ <iframe src="http://127.0.0.1:7860" style="width:100%; height:100vh; border:none;"></iframe>
170
+ </body>
171
+ </html>
172
+ """)
173
+ '''
174
+ if model != last_model:
175
+ unique_filename = loadmodel(model)
176
+ last_model = model
177
+ '''
178
+ if is_chat and text == last_text:
179
+ # Generate 1 second of silence and return
180
+ unique_filename = 'blank.wav'
181
+ silence = np.zeros(44100, dtype=np.int16)
182
+ write(unique_filename , 44100, silence)
183
+ else:
184
+ last_text = text
185
+ unique_filename = f"temp{uuid.uuid4()}.wav"
186
+ infer(text, sdp_ratio=sdp_ratio, noise_scale=noise_scale, noise_scale_w=noise_scale_w, length_scale=length_scale,sid = speaker, style_text=style_text, style_weight=style_weight,unique_filename=unique_filename)
187
+ with open(unique_filename ,'rb') as bit:
188
+ wav_bytes = bit.read()
189
+ os.remove(unique_filename)
190
+ headers = {
191
+ 'Content-Type': 'audio/wav',
192
+ 'Text': unique_filename .encode('utf-8')}
193
+ return wav_bytes, 200, headers
194
+
195
+ def gradio_interface():
196
+ return app.launch(share=False)
197
+
198
+ if __name__ == "__main__":
199
+ speaker_ids = hps.spk2id
200
+ speakers = list(speaker_ids.keys())
201
+ last_text = ""
202
+ with gr.Blocks() as app:
203
+ for band in BandList:
204
+ with gr.TabItem(band):
205
+ for name in BandList[band]:
206
+ with gr.TabItem(name):
207
+ with gr.Row():
208
+ with gr.Column():
209
+ with gr.Row():
210
+ gr.Markdown(
211
+ '<div align="center">'
212
+ f'<img style="width:auto;height:400px;" src="https://mahiruoshi-bangdream-bert-vits2.hf.space/file/image/{name}.png">'
213
+ '</div>'
214
+ )
215
+ length_scale = gr.Slider(
216
+ minimum=0.1, maximum=2, value=1, step=0.01, label="语速调节"
217
+ )
218
+ with gr.Accordion(label="参数设定", open=False):
219
+ sdp_ratio = gr.Slider(
220
+ minimum=0, maximum=1, value=0.5, step=0.01, label="SDP/DP混合比"
221
+ )
222
+ noise_scale = gr.Slider(
223
+ minimum=0.1, maximum=2, value=0.6, step=0.01, label="感情调节"
224
+ )
225
+ noise_scale_w = gr.Slider(
226
+ minimum=0.1, maximum=2, value=0.667, step=0.01, label="音素长度"
227
+ )
228
+ speaker = gr.Dropdown(
229
+ choices=speakers, value=name, label="说话人"
230
+ )
231
+ '''
232
+ with gr.Accordion(label="切换模型", open=False):
233
+ modelstrs = gr.Dropdown(label = "模型", choices = modelPaths, value = modelPaths[0], type = "value")
234
+ btnMod = gr.Button("载入模型")
235
+ statusa = gr.TextArea()
236
+ btnMod.click(loadmodel, inputs=[modelstrs], outputs = [statusa])
237
+ '''
238
+ with gr.Column():
239
+ text = gr.TextArea(
240
+ label="输入纯日语或者中文",
241
+ placeholder="输入纯日语或者中文",
242
+ value="为什么要演奏春日影!",
243
+ )
244
+ style_text = gr.Textbox(label="辅助文本")
245
+ style_weight = gr.Slider(
246
+ minimum=0,
247
+ maximum=1,
248
+ value=0.7,
249
+ step=0.1,
250
+ label="Weight",
251
+ info="主文本和辅助文本的bert混合比率,0表示仅主文本,1表示仅辅助文本",
252
+ )
253
+ btn = gr.Button("点击生成", variant="primary")
254
+ audio_output = gr.Audio(label="Output Audio")
255
+ '''
256
+ btntran = gr.Button("快速中翻日")
257
+ translateResult = gr.TextArea("从这复制翻译后的文本")
258
+ btntran.click(translate, inputs=[text], outputs = [translateResult])
259
+ '''
260
+ btn.click(
261
+ infer,
262
+ inputs=[
263
+ text,
264
+ speaker,
265
+ style_text,
266
+ style_weight,
267
+ sdp_ratio,
268
+ noise_scale,
269
+ noise_scale_w,
270
+ length_scale,
271
+ ],
272
+ outputs=[audio_output],
273
+ )
274
+
275
+ api_thread = Thread(target=Flaskapp.run, args=("0.0.0.0", 5000))
276
+ gradio_thread = Thread(target=gradio_interface)
277
+ gradio_thread.start()
278
+ print("推理页面已开启!")
279
+ api_thread.start()
280
+ print("api页面已开启!运行在5000端口")
bert/bert-base-japanese-v3/.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
bert/bert-base-japanese-v3/README.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - cc100
5
+ - wikipedia
6
+ language:
7
+ - ja
8
+ widget:
9
+ - text: 東北大学で[MASK]の研究をしています。
10
+ ---
11
+
12
+ # BERT base Japanese (unidic-lite with whole word masking, CC-100 and jawiki-20230102)
13
+
14
+ This is a [BERT](https://github.com/google-research/bert) model pretrained on texts in the Japanese language.
15
+
16
+ This version of the model processes input texts with word-level tokenization based on the Unidic 2.1.2 dictionary (available in [unidic-lite](https://pypi.org/project/unidic-lite/) package), followed by the WordPiece subword tokenization.
17
+ Additionally, the model is trained with the whole word masking enabled for the masked language modeling (MLM) objective.
18
+
19
+ The codes for the pretraining are available at [cl-tohoku/bert-japanese](https://github.com/cl-tohoku/bert-japanese/).
20
+
21
+ ## Model architecture
22
+
23
+ The model architecture is the same as the original BERT base model; 12 layers, 768 dimensions of hidden states, and 12 attention heads.
24
+
25
+ ## Training Data
26
+
27
+ The model is trained on the Japanese portion of [CC-100 dataset](https://data.statmt.org/cc-100/) and the Japanese version of Wikipedia.
28
+ For Wikipedia, we generated a text corpus from the [Wikipedia Cirrussearch dump file](https://dumps.wikimedia.org/other/cirrussearch/) as of January 2, 2023.
29
+ The corpus files generated from CC-100 and Wikipedia are 74.3GB and 4.9GB in size and consist of approximately 392M and 34M sentences, respectively.
30
+
31
+ For the purpose of splitting texts into sentences, we used [fugashi](https://github.com/polm/fugashi) with [mecab-ipadic-NEologd](https://github.com/neologd/mecab-ipadic-neologd) dictionary (v0.0.7).
32
+
33
+ ## Tokenization
34
+
35
+ The texts are first tokenized by MeCab with the Unidic 2.1.2 dictionary and then split into subwords by the WordPiece algorithm.
36
+ The vocabulary size is 32768.
37
+
38
+ We used [fugashi](https://github.com/polm/fugashi) and [unidic-lite](https://github.com/polm/unidic-lite) packages for the tokenization.
39
+
40
+ ## Training
41
+
42
+ We trained the model first on the CC-100 corpus for 1M steps and then on the Wikipedia corpus for another 1M steps.
43
+ For training of the MLM (masked language modeling) objective, we introduced whole word masking in which all of the subword tokens corresponding to a single word (tokenized by MeCab) are masked at once.
44
+
45
+ For training of each model, we used a v3-8 instance of Cloud TPUs provided by [TPU Research Cloud](https://sites.research.google/trc/about/).
46
+
47
+ ## Licenses
48
+
49
+ The pretrained models are distributed under the Apache License 2.0.
50
+
51
+ ## Acknowledgments
52
+
53
+ This model is trained with Cloud TPUs provided by [TPU Research Cloud](https://sites.research.google/trc/about/) program.
bert/bert-base-japanese-v3/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForPreTraining"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "hidden_act": "gelu",
7
+ "hidden_dropout_prob": 0.1,
8
+ "hidden_size": 768,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 3072,
11
+ "layer_norm_eps": 1e-12,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "bert",
14
+ "num_attention_heads": 12,
15
+ "num_hidden_layers": 12,
16
+ "pad_token_id": 0,
17
+ "type_vocab_size": 2,
18
+ "vocab_size": 32768
19
+ }
bert/bert-base-japanese-v3/tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "BertJapaneseTokenizer",
3
+ "model_max_length": 512,
4
+ "do_lower_case": false,
5
+ "word_tokenizer_type": "mecab",
6
+ "subword_tokenizer_type": "wordpiece",
7
+ "mecab_kwargs": {
8
+ "mecab_dic": "unidic_lite"
9
+ }
10
+ }
bert/bert-base-japanese-v3/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
bert/bert-large-japanese-v2/.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
bert/bert-large-japanese-v2/README.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ datasets:
4
+ - cc100
5
+ - wikipedia
6
+ language:
7
+ - ja
8
+ widget:
9
+ - text: 東北大学で[MASK]の研究をしています。
10
+ ---
11
+
12
+ # BERT large Japanese (unidic-lite with whole word masking, CC-100 and jawiki-20230102)
13
+
14
+ This is a [BERT](https://github.com/google-research/bert) model pretrained on texts in the Japanese language.
15
+
16
+ This version of the model processes input texts with word-level tokenization based on the Unidic 2.1.2 dictionary (available in [unidic-lite](https://pypi.org/project/unidic-lite/) package), followed by the WordPiece subword tokenization.
17
+ Additionally, the model is trained with the whole word masking enabled for the masked language modeling (MLM) objective.
18
+
19
+ The codes for the pretraining are available at [cl-tohoku/bert-japanese](https://github.com/cl-tohoku/bert-japanese/).
20
+
21
+ ## Model architecture
22
+
23
+ The model architecture is the same as the original BERT large model; 24 layers, 1024 dimensions of hidden states, and 16 attention heads.
24
+
25
+ ## Training Data
26
+
27
+ The model is trained on the Japanese portion of [CC-100 dataset](https://data.statmt.org/cc-100/) and the Japanese version of Wikipedia.
28
+ For Wikipedia, we generated a text corpus from the [Wikipedia Cirrussearch dump file](https://dumps.wikimedia.org/other/cirrussearch/) as of January 2, 2023.
29
+ The corpus files generated from CC-100 and Wikipedia are 74.3GB and 4.9GB in size and consist of approximately 392M and 34M sentences, respectively.
30
+
31
+ For the purpose of splitting texts into sentences, we used [fugashi](https://github.com/polm/fugashi) with [mecab-ipadic-NEologd](https://github.com/neologd/mecab-ipadic-neologd) dictionary (v0.0.7).
32
+
33
+ ## Tokenization
34
+
35
+ The texts are first tokenized by MeCab with the Unidic 2.1.2 dictionary and then split into subwords by the WordPiece algorithm.
36
+ The vocabulary size is 32768.
37
+
38
+ We used [fugashi](https://github.com/polm/fugashi) and [unidic-lite](https://github.com/polm/unidic-lite) packages for the tokenization.
39
+
40
+ ## Training
41
+
42
+ We trained the model first on the CC-100 corpus for 1M steps and then on the Wikipedia corpus for another 1M steps.
43
+ For training of the MLM (masked language modeling) objective, we introduced whole word masking in which all of the subword tokens corresponding to a single word (tokenized by MeCab) are masked at once.
44
+
45
+ For training of each model, we used a v3-8 instance of Cloud TPUs provided by [TPU Research Cloud](https://sites.research.google/trc/about/).
46
+
47
+ ## Licenses
48
+
49
+ The pretrained models are distributed under the Apache License 2.0.
50
+
51
+ ## Acknowledgments
52
+
53
+ This model is trained with Cloud TPUs provided by [TPU Research Cloud](https://sites.research.google/trc/about/) program.
bert/bert-large-japanese-v2/config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForPreTraining"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "hidden_act": "gelu",
7
+ "hidden_dropout_prob": 0.1,
8
+ "hidden_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4096,
11
+ "layer_norm_eps": 1e-12,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "bert",
14
+ "num_attention_heads": 16,
15
+ "num_hidden_layers": 24,
16
+ "pad_token_id": 0,
17
+ "type_vocab_size": 2,
18
+ "vocab_size": 32768
19
+ }
bert/bert-large-japanese-v2/tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tokenizer_class": "BertJapaneseTokenizer",
3
+ "model_max_length": 512,
4
+ "do_lower_case": false,
5
+ "word_tokenizer_type": "mecab",
6
+ "subword_tokenizer_type": "wordpiece",
7
+ "mecab_kwargs": {
8
+ "mecab_dic": "unidic_lite"
9
+ }
10
+ }
bert/bert-large-japanese-v2/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
bert/bert_models.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "deberta-v2-large-japanese-char-wwm": {
3
+ "repo_id": "ku-nlp/deberta-v2-large-japanese-char-wwm",
4
+ "files": ["pytorch_model.bin"]
5
+ },
6
+ "chinese-roberta-wwm-ext-large": {
7
+ "repo_id": "hfl/chinese-roberta-wwm-ext-large",
8
+ "files": ["pytorch_model.bin"]
9
+ },
10
+ "deberta-v3-large": {
11
+ "repo_id": "microsoft/deberta-v3-large",
12
+ "files": ["spm.model", "pytorch_model.bin"]
13
+ }
14
+ }
bert/chinese-roberta-wwm-ext-large/.gitattributes ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.h5 filter=lfs diff=lfs merge=lfs -text
5
+ *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
bert/chinese-roberta-wwm-ext-large/README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - zh
4
+ tags:
5
+ - bert
6
+ license: "apache-2.0"
7
+ ---
8
+
9
+ # Please use 'Bert' related functions to load this model!
10
+
11
+ ## Chinese BERT with Whole Word Masking
12
+ For further accelerating Chinese natural language processing, we provide **Chinese pre-trained BERT with Whole Word Masking**.
13
+
14
+ **[Pre-Training with Whole Word Masking for Chinese BERT](https://arxiv.org/abs/1906.08101)**
15
+ Yiming Cui, Wanxiang Che, Ting Liu, Bing Qin, Ziqing Yang, Shijin Wang, Guoping Hu
16
+
17
+ This repository is developed based on:https://github.com/google-research/bert
18
+
19
+ You may also interested in,
20
+ - Chinese BERT series: https://github.com/ymcui/Chinese-BERT-wwm
21
+ - Chinese MacBERT: https://github.com/ymcui/MacBERT
22
+ - Chinese ELECTRA: https://github.com/ymcui/Chinese-ELECTRA
23
+ - Chinese XLNet: https://github.com/ymcui/Chinese-XLNet
24
+ - Knowledge Distillation Toolkit - TextBrewer: https://github.com/airaria/TextBrewer
25
+
26
+ More resources by HFL: https://github.com/ymcui/HFL-Anthology
27
+
28
+ ## Citation
29
+ If you find the technical report or resource is useful, please cite the following technical report in your paper.
30
+ - Primary: https://arxiv.org/abs/2004.13922
31
+ ```
32
+ @inproceedings{cui-etal-2020-revisiting,
33
+ title = "Revisiting Pre-Trained Models for {C}hinese Natural Language Processing",
34
+ author = "Cui, Yiming and
35
+ Che, Wanxiang and
36
+ Liu, Ting and
37
+ Qin, Bing and
38
+ Wang, Shijin and
39
+ Hu, Guoping",
40
+ booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: Findings",
41
+ month = nov,
42
+ year = "2020",
43
+ address = "Online",
44
+ publisher = "Association for Computational Linguistics",
45
+ url = "https://www.aclweb.org/anthology/2020.findings-emnlp.58",
46
+ pages = "657--668",
47
+ }
48
+ ```
49
+ - Secondary: https://arxiv.org/abs/1906.08101
50
+ ```
51
+ @article{chinese-bert-wwm,
52
+ title={Pre-Training with Whole Word Masking for Chinese BERT},
53
+ author={Cui, Yiming and Che, Wanxiang and Liu, Ting and Qin, Bing and Yang, Ziqing and Wang, Shijin and Hu, Guoping},
54
+ journal={arXiv preprint arXiv:1906.08101},
55
+ year={2019}
56
+ }
57
+ ```
bert/chinese-roberta-wwm-ext-large/added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
bert/chinese-roberta-wwm-ext-large/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "directionality": "bidi",
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 0,
21
+ "pooler_fc_size": 768,
22
+ "pooler_num_attention_heads": 12,
23
+ "pooler_num_fc_layers": 3,
24
+ "pooler_size_per_head": 128,
25
+ "pooler_type": "first_token_transform",
26
+ "type_vocab_size": 2,
27
+ "vocab_size": 21128
28
+ }
bert/chinese-roberta-wwm-ext-large/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ac62d49144d770c5ca9a5d1d3039c4995665a080febe63198189857c6bd11cd
3
+ size 1306484351
bert/chinese-roberta-wwm-ext-large/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
bert/chinese-roberta-wwm-ext-large/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
bert/chinese-roberta-wwm-ext-large/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"init_inputs": []}
bert/chinese-roberta-wwm-ext-large/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
bert/deberta-v2-large-japanese-char-wwm/.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
bert/deberta-v2-large-japanese-char-wwm/README.md ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: ja
3
+ license: cc-by-sa-4.0
4
+ library_name: transformers
5
+ tags:
6
+ - deberta
7
+ - deberta-v2
8
+ - fill-mask
9
+ - character
10
+ - wwm
11
+ datasets:
12
+ - wikipedia
13
+ - cc100
14
+ - oscar
15
+ metrics:
16
+ - accuracy
17
+ mask_token: "[MASK]"
18
+ widget:
19
+ - text: "京都大学で自然言語処理を[MASK][MASK]する。"
20
+ ---
21
+
22
+ # Model Card for Japanese character-level DeBERTa V2 large
23
+
24
+ ## Model description
25
+
26
+ This is a Japanese DeBERTa V2 large model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the Japanese portion of OSCAR.
27
+ This model is trained with character-level tokenization and whole word masking.
28
+
29
+ ## How to use
30
+
31
+ You can use this model for masked language modeling as follows:
32
+
33
+ ```python
34
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
35
+ tokenizer = AutoTokenizer.from_pretrained('ku-nlp/deberta-v2-large-japanese-char-wwm')
36
+ model = AutoModelForMaskedLM.from_pretrained('ku-nlp/deberta-v2-large-japanese-char-wwm')
37
+
38
+ sentence = '京都大学で自然言語処理を[MASK][MASK]する。'
39
+ encoding = tokenizer(sentence, return_tensors='pt')
40
+ ...
41
+ ```
42
+
43
+ You can also fine-tune this model on downstream tasks.
44
+
45
+ ## Tokenization
46
+
47
+ There is no need to tokenize texts in advance, and you can give raw texts to the tokenizer.
48
+ The texts are tokenized into character-level tokens by [sentencepiece](https://github.com/google/sentencepiece).
49
+
50
+ ## Training data
51
+
52
+ We used the following corpora for pre-training:
53
+
54
+ - Japanese Wikipedia (as of 20221020, 3.2GB, 27M sentences, 1.3M documents)
55
+ - Japanese portion of CC-100 (85GB, 619M sentences, 66M documents)
56
+ - Japanese portion of OSCAR (54GB, 326M sentences, 25M documents)
57
+
58
+ Note that we filtered out documents annotated with "header", "footer", or "noisy" tags in OSCAR.
59
+ Also note that Japanese Wikipedia was duplicated 10 times to make the total size of the corpus comparable to that of CC-100 and OSCAR. As a result, the total size of the training data is 171GB.
60
+
61
+ ## Training procedure
62
+
63
+ We first segmented texts in the corpora into words using [Juman++ 2.0.0-rc3](https://github.com/ku-nlp/jumanpp/releases/tag/v2.0.0-rc3) for whole word masking.
64
+ Then, we built a sentencepiece model with 22,012 tokens including all characters that appear in the training corpus.
65
+
66
+ We tokenized raw corpora into character-level subwords using the sentencepiece model and trained the Japanese DeBERTa model using [transformers](https://github.com/huggingface/transformers) library.
67
+ The training took 26 days using 16 NVIDIA A100-SXM4-40GB GPUs.
68
+
69
+ The following hyperparameters were used during pre-training:
70
+
71
+ - learning_rate: 1e-4
72
+ - per_device_train_batch_size: 26
73
+ - distributed_type: multi-GPU
74
+ - num_devices: 16
75
+ - gradient_accumulation_steps: 8
76
+ - total_train_batch_size: 3,328
77
+ - max_seq_length: 512
78
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-06
79
+ - lr_scheduler_type: linear schedule with warmup (lr = 0 at 300k steps)
80
+ - training_steps: 260,000
81
+ - warmup_steps: 10,000
82
+
83
+ The accuracy of the trained model on the masked language modeling task was 0.795.
84
+ The evaluation set consists of 5,000 randomly sampled documents from each of the training corpora.
85
+
86
+ ## Acknowledgments
87
+
88
+ This work was supported by Joint Usage/Research Center for Interdisciplinary Large-scale Information Infrastructures (JHPCN) through General Collaboration Project no. jh221004, "Developing a Platform for Constructing and Sharing of Large-Scale Japanese Language Models".
89
+ For training models, we used the mdx: a platform for the data-driven future.
bert/deberta-v2-large-japanese-char-wwm/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV2ForMaskedLM"
4
+ ],
5
+ "attention_head_size": 64,
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "conv_act": "gelu",
8
+ "conv_kernel_size": 3,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-07,
15
+ "max_position_embeddings": 512,
16
+ "max_relative_positions": -1,
17
+ "model_type": "deberta-v2",
18
+ "norm_rel_ebd": "layer_norm",
19
+ "num_attention_heads": 16,
20
+ "num_hidden_layers": 24,
21
+ "pad_token_id": 0,
22
+ "pooler_dropout": 0,
23
+ "pooler_hidden_act": "gelu",
24
+ "pooler_hidden_size": 1024,
25
+ "pos_att_type": [
26
+ "p2c",
27
+ "c2p"
28
+ ],
29
+ "position_biased_input": false,
30
+ "position_buckets": 256,
31
+ "relative_attention": true,
32
+ "share_att_key": true,
33
+ "torch_dtype": "float16",
34
+ "transformers_version": "4.25.1",
35
+ "type_vocab_size": 0,
36
+ "vocab_size": 22012
37
+ }
bert/deberta-v2-large-japanese-char-wwm/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf0dab8ad87bd7c22e85ec71e04f2240804fda6d33196157d6b5923af6ea1201
3
+ size 1318456639
bert/deberta-v2-large-japanese-char-wwm/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
bert/deberta-v2-large-japanese-char-wwm/tokenizer_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": false,
4
+ "do_subword_tokenize": true,
5
+ "do_word_tokenize": true,
6
+ "jumanpp_kwargs": null,
7
+ "mask_token": "[MASK]",
8
+ "mecab_kwargs": null,
9
+ "model_max_length": 1000000000000000019884624838656,
10
+ "never_split": null,
11
+ "pad_token": "[PAD]",
12
+ "sep_token": "[SEP]",
13
+ "special_tokens_map_file": null,
14
+ "subword_tokenizer_type": "character",
15
+ "sudachi_kwargs": null,
16
+ "tokenizer_class": "BertJapaneseTokenizer",
17
+ "unk_token": "[UNK]",
18
+ "word_tokenizer_type": "basic"
19
+ }
bert/deberta-v2-large-japanese-char-wwm/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
bert/deberta-v2-large-japanese/.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
bert/deberta-v2-large-japanese/README.md ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: ja
3
+ license: cc-by-sa-4.0
4
+ library_name: transformers
5
+ tags:
6
+ - deberta
7
+ - deberta-v2
8
+ - fill-mask
9
+ datasets:
10
+ - wikipedia
11
+ - cc100
12
+ - oscar
13
+ metrics:
14
+ - accuracy
15
+ mask_token: "[MASK]"
16
+ widget:
17
+ - text: "京都 大学 で 自然 言語 処理 を [MASK] する 。"
18
+ ---
19
+
20
+ # Model Card for Japanese DeBERTa V2 large
21
+
22
+ ## Model description
23
+
24
+ This is a Japanese DeBERTa V2 large model pre-trained on Japanese Wikipedia, the Japanese portion of CC-100, and the
25
+ Japanese portion of OSCAR.
26
+
27
+ ## How to use
28
+
29
+ You can use this model for masked language modeling as follows:
30
+
31
+ ```python
32
+ from transformers import AutoTokenizer, AutoModelForMaskedLM
33
+
34
+ tokenizer = AutoTokenizer.from_pretrained('ku-nlp/deberta-v2-large-japanese')
35
+ model = AutoModelForMaskedLM.from_pretrained('ku-nlp/deberta-v2-large-japanese')
36
+
37
+ sentence = '京都 大学 で 自然 言語 処理 を [MASK] する 。' # input should be segmented into words by Juman++ in advance
38
+ encoding = tokenizer(sentence, return_tensors='pt')
39
+ ...
40
+ ```
41
+
42
+ You can also fine-tune this model on downstream tasks.
43
+
44
+ ## Tokenization
45
+
46
+ The input text should be segmented into words by [Juman++](https://github.com/ku-nlp/jumanpp) in
47
+ advance. [Juman++ 2.0.0-rc3](https://github.com/ku-nlp/jumanpp/releases/tag/v2.0.0-rc3) was used for pre-training. Each
48
+ word is tokenized into subwords by [sentencepiece](https://github.com/google/sentencepiece).
49
+
50
+ ## Training data
51
+
52
+ We used the following corpora for pre-training:
53
+
54
+ - Japanese Wikipedia (as of 20221020, 3.2GB, 27M sentences, 1.3M documents)
55
+ - Japanese portion of CC-100 (85GB, 619M sentences, 66M documents)
56
+ - Japanese portion of OSCAR (54GB, 326M sentences, 25M documents)
57
+
58
+ Note that we filtered out documents annotated with "header", "footer", or "noisy" tags in OSCAR.
59
+ Also note that Japanese Wikipedia was duplicated 10 times to make the total size of the corpus comparable to that of
60
+ CC-100 and OSCAR. As a result, the total size of the training data is 171GB.
61
+
62
+ ## Training procedure
63
+
64
+ We first segmented texts in the corpora into words using [Juman++](https://github.com/ku-nlp/jumanpp).
65
+ Then, we built a sentencepiece model with 32000 tokens including words ([JumanDIC](https://github.com/ku-nlp/JumanDIC))
66
+ and subwords induced by the unigram language model of [sentencepiece](https://github.com/google/sentencepiece).
67
+
68
+ We tokenized the segmented corpora into subwords using the sentencepiece model and trained the Japanese DeBERTa model
69
+ using [transformers](https://github.com/huggingface/transformers) library.
70
+ The training took 36 days using 8 NVIDIA A100-SXM4-40GB GPUs.
71
+
72
+ The following hyperparameters were used during pre-training:
73
+
74
+ - learning_rate: 1e-4
75
+ - per_device_train_batch_size: 18
76
+ - distributed_type: multi-GPU
77
+ - num_devices: 8
78
+ - gradient_accumulation_steps: 16
79
+ - total_train_batch_size: 2,304
80
+ - max_seq_length: 512
81
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-06
82
+ - lr_scheduler_type: linear schedule with warmup
83
+ - training_steps: 300,000
84
+ - warmup_steps: 10,000
85
+
86
+ The accuracy of the trained model on the masked language modeling task was 0.799.
87
+ The evaluation set consists of 5,000 randomly sampled documents from each of the training corpora.
88
+
89
+ ## Fine-tuning on NLU tasks
90
+
91
+ We fine-tuned the following models and evaluated them on the dev set of JGLUE.
92
+ We tuned learning rate and training epochs for each model and task
93
+ following [the JGLUE paper](https://www.jstage.jst.go.jp/article/jnlp/30/1/30_63/_pdf/-char/ja).
94
+
95
+ | Model | MARC-ja/acc | JSTS/pearson | JSTS/spearman | JNLI/acc | JSQuAD/EM | JSQuAD/F1 | JComQA/acc |
96
+ |-------------------------------|-------------|--------------|---------------|----------|-----------|-----------|------------|
97
+ | Waseda RoBERTa base | 0.965 | 0.913 | 0.876 | 0.905 | 0.853 | 0.916 | 0.853 |
98
+ | Waseda RoBERTa large (seq512) | 0.969 | 0.925 | 0.890 | 0.928 | 0.910 | 0.955 | 0.900 |
99
+ | LUKE Japanese base* | 0.965 | 0.916 | 0.877 | 0.912 | - | - | 0.842 |
100
+ | LUKE Japanese large* | 0.965 | 0.932 | 0.902 | 0.927 | - | - | 0.893 |
101
+ | DeBERTaV2 base | 0.970 | 0.922 | 0.886 | 0.922 | 0.899 | 0.951 | 0.873 |
102
+ | DeBERTaV2 large | 0.968 | 0.925 | 0.892 | 0.924 | 0.912 | 0.959 | 0.890 |
103
+
104
+ *The scores of LUKE are from [the official repository](https://github.com/studio-ousia/luke).
105
+
106
+ ## Acknowledgments
107
+
108
+ This work was supported by Joint Usage/Research Center for Interdisciplinary Large-scale Information Infrastructures (
109
+ JHPCN) through General Collaboration Project no. jh221004, "Developing a Platform for Constructing and Sharing of
110
+ Large-Scale Japanese Language Models".
111
+ For training models, we used the mdx: a platform for the data-driven future.
bert/deberta-v2-large-japanese/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "configs/deberta_v2_large.json",
3
+ "architectures": [
4
+ "DebertaV2ForMaskedLM"
5
+ ],
6
+ "attention_head_size": 64,
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "conv_act": "gelu",
9
+ "conv_kernel_size": 3,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1024,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-07,
16
+ "max_position_embeddings": 512,
17
+ "max_relative_positions": -1,
18
+ "model_type": "deberta-v2",
19
+ "norm_rel_ebd": "layer_norm",
20
+ "num_attention_heads": 16,
21
+ "num_hidden_layers": 24,
22
+ "pad_token_id": 0,
23
+ "pooler_dropout": 0,
24
+ "pooler_hidden_act": "gelu",
25
+ "pooler_hidden_size": 1024,
26
+ "pos_att_type": [
27
+ "p2c",
28
+ "c2p"
29
+ ],
30
+ "position_biased_input": false,
31
+ "position_buckets": 256,
32
+ "relative_attention": true,
33
+ "share_att_key": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.23.1",
36
+ "type_vocab_size": 0,
37
+ "vocab_size": 32000
38
+ }
bert/deberta-v2-large-japanese/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
bert/deberta-v2-large-japanese/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
bert/deberta-v2-large-japanese/tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "eos_token": "[SEP]",
6
+ "keep_accents": true,
7
+ "mask_token": "[MASK]",
8
+ "pad_token": "[PAD]",
9
+ "sep_token": "[SEP]",
10
+ "sp_model_kwargs": {},
11
+ "special_tokens_map_file": null,
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
bert/deberta-v3-large/.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
bert/deberta-v3-large/README.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - deberta
5
+ - deberta-v3
6
+ - fill-mask
7
+ thumbnail: https://huggingface.co/front/thumbnails/microsoft.png
8
+ license: mit
9
+ ---
10
+
11
+ ## DeBERTaV3: Improving DeBERTa using ELECTRA-Style Pre-Training with Gradient-Disentangled Embedding Sharing
12
+
13
+ [DeBERTa](https://arxiv.org/abs/2006.03654) improves the BERT and RoBERTa models using disentangled attention and enhanced mask decoder. With those two improvements, DeBERTa out perform RoBERTa on a majority of NLU tasks with 80GB training data.
14
+
15
+ In [DeBERTa V3](https://arxiv.org/abs/2111.09543), we further improved the efficiency of DeBERTa using ELECTRA-Style pre-training with Gradient Disentangled Embedding Sharing. Compared to DeBERTa, our V3 version significantly improves the model performance on downstream tasks. You can find more technique details about the new model from our [paper](https://arxiv.org/abs/2111.09543).
16
+
17
+ Please check the [official repository](https://github.com/microsoft/DeBERTa) for more implementation details and updates.
18
+
19
+ The DeBERTa V3 large model comes with 24 layers and a hidden size of 1024. It has 304M backbone parameters with a vocabulary containing 128K tokens which introduces 131M parameters in the Embedding layer. This model was trained using the 160GB data as DeBERTa V2.
20
+
21
+
22
+ #### Fine-tuning on NLU tasks
23
+
24
+ We present the dev results on SQuAD 2.0 and MNLI tasks.
25
+
26
+ | Model |Vocabulary(K)|Backbone #Params(M)| SQuAD 2.0(F1/EM) | MNLI-m/mm(ACC)|
27
+ |-------------------|----------|-------------------|-----------|----------|
28
+ | RoBERTa-large |50 |304 | 89.4/86.5 | 90.2 |
29
+ | XLNet-large |32 |- | 90.6/87.9 | 90.8 |
30
+ | DeBERTa-large |50 |- | 90.7/88.0 | 91.3 |
31
+ | **DeBERTa-v3-large**|128|304 | **91.5/89.0**| **91.8/91.9**|
32
+
33
+
34
+ #### Fine-tuning with HF transformers
35
+
36
+ ```bash
37
+ #!/bin/bash
38
+
39
+ cd transformers/examples/pytorch/text-classification/
40
+
41
+ pip install datasets
42
+ export TASK_NAME=mnli
43
+
44
+ output_dir="ds_results"
45
+
46
+ num_gpus=8
47
+
48
+ batch_size=8
49
+
50
+ python -m torch.distributed.launch --nproc_per_node=${num_gpus} \
51
+ run_glue.py \
52
+ --model_name_or_path microsoft/deberta-v3-large \
53
+ --task_name $TASK_NAME \
54
+ --do_train \
55
+ --do_eval \
56
+ --evaluation_strategy steps \
57
+ --max_seq_length 256 \
58
+ --warmup_steps 50 \
59
+ --per_device_train_batch_size ${batch_size} \
60
+ --learning_rate 6e-6 \
61
+ --num_train_epochs 2 \
62
+ --output_dir $output_dir \
63
+ --overwrite_output_dir \
64
+ --logging_steps 1000 \
65
+ --logging_dir $output_dir
66
+
67
+ ```
68
+
69
+ ### Citation
70
+
71
+ If you find DeBERTa useful for your work, please cite the following papers:
72
+
73
+ ``` latex
74
+ @misc{he2021debertav3,
75
+ title={DeBERTaV3: Improving DeBERTa using ELECTRA-Style Pre-Training with Gradient-Disentangled Embedding Sharing},
76
+ author={Pengcheng He and Jianfeng Gao and Weizhu Chen},
77
+ year={2021},
78
+ eprint={2111.09543},
79
+ archivePrefix={arXiv},
80
+ primaryClass={cs.CL}
81
+ }
82
+ ```
83
+
84
+ ``` latex
85
+ @inproceedings{
86
+ he2021deberta,
87
+ title={DEBERTA: DECODING-ENHANCED BERT WITH DISENTANGLED ATTENTION},
88
+ author={Pengcheng He and Xiaodong Liu and Jianfeng Gao and Weizhu Chen},
89
+ booktitle={International Conference on Learning Representations},
90
+ year={2021},
91
+ url={https://openreview.net/forum?id=XPZIaotutsD}
92
+ }
93
+ ```
bert/deberta-v3-large/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "deberta-v2",
3
+ "attention_probs_dropout_prob": 0.1,
4
+ "hidden_act": "gelu",
5
+ "hidden_dropout_prob": 0.1,
6
+ "hidden_size": 1024,
7
+ "initializer_range": 0.02,
8
+ "intermediate_size": 4096,
9
+ "max_position_embeddings": 512,
10
+ "relative_attention": true,
11
+ "position_buckets": 256,
12
+ "norm_rel_ebd": "layer_norm",
13
+ "share_att_key": true,
14
+ "pos_att_type": "p2c|c2p",
15
+ "layer_norm_eps": 1e-7,
16
+ "max_relative_positions": -1,
17
+ "position_biased_input": false,
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 24,
20
+ "type_vocab_size": 0,
21
+ "vocab_size": 128100
22
+ }
bert/deberta-v3-large/generator_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "deberta-v2",
3
+ "attention_probs_dropout_prob": 0.1,
4
+ "hidden_act": "gelu",
5
+ "hidden_dropout_prob": 0.1,
6
+ "hidden_size": 1024,
7
+ "initializer_range": 0.02,
8
+ "intermediate_size": 4096,
9
+ "max_position_embeddings": 512,
10
+ "relative_attention": true,
11
+ "position_buckets": 256,
12
+ "norm_rel_ebd": "layer_norm",
13
+ "share_att_key": true,
14
+ "pos_att_type": "p2c|c2p",
15
+ "layer_norm_eps": 1e-7,
16
+ "max_relative_positions": -1,
17
+ "position_biased_input": false,
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 12,
20
+ "type_vocab_size": 0,
21
+ "vocab_size": 128100
22
+ }
bert/deberta-v3-large/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd5b5d93e2db101aaf281df0ea1216c07ad73620ff59c5b42dccac4bf2eef5b5
3
+ size 873673253
bert/deberta-v3-large/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
bert/deberta-v3-large/tokenizer_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "do_lower_case": false,
3
+ "vocab_type": "spm"
4
+ }
commons.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import torch
3
+ from torch.nn import functional as F
4
+
5
+
6
+ def init_weights(m, mean=0.0, std=0.01):
7
+ classname = m.__class__.__name__
8
+ if classname.find("Conv") != -1:
9
+ m.weight.data.normal_(mean, std)
10
+
11
+
12
+ def get_padding(kernel_size, dilation=1):
13
+ return int((kernel_size * dilation - dilation) / 2)
14
+
15
+
16
+ def convert_pad_shape(pad_shape):
17
+ layer = pad_shape[::-1]
18
+ pad_shape = [item for sublist in layer for item in sublist]
19
+ return pad_shape
20
+
21
+
22
+ def intersperse(lst, item):
23
+ result = [item] * (len(lst) * 2 + 1)
24
+ result[1::2] = lst
25
+ return result
26
+
27
+
28
+ def kl_divergence(m_p, logs_p, m_q, logs_q):
29
+ """KL(P||Q)"""
30
+ kl = (logs_q - logs_p) - 0.5
31
+ kl += (
32
+ 0.5 * (torch.exp(2.0 * logs_p) + ((m_p - m_q) ** 2)) * torch.exp(-2.0 * logs_q)
33
+ )
34
+ return kl
35
+
36
+
37
+ def rand_gumbel(shape):
38
+ """Sample from the Gumbel distribution, protect from overflows."""
39
+ uniform_samples = torch.rand(shape) * 0.99998 + 0.00001
40
+ return -torch.log(-torch.log(uniform_samples))
41
+
42
+
43
+ def rand_gumbel_like(x):
44
+ g = rand_gumbel(x.size()).to(dtype=x.dtype, device=x.device)
45
+ return g
46
+
47
+
48
+ def slice_segments(x, ids_str, segment_size=4):
49
+ gather_indices = ids_str.view(x.size(0), 1, 1).repeat(
50
+ 1, x.size(1), 1
51
+ ) + torch.arange(segment_size, device=x.device)
52
+ return torch.gather(x, 2, gather_indices)
53
+
54
+
55
+ def rand_slice_segments(x, x_lengths=None, segment_size=4):
56
+ b, d, t = x.size()
57
+ if x_lengths is None:
58
+ x_lengths = t
59
+ ids_str_max = torch.clamp(x_lengths - segment_size + 1, min=0)
60
+ ids_str = (torch.rand([b], device=x.device) * ids_str_max).to(dtype=torch.long)
61
+ ret = slice_segments(x, ids_str, segment_size)
62
+ return ret, ids_str
63
+
64
+
65
+ def get_timing_signal_1d(length, channels, min_timescale=1.0, max_timescale=1.0e4):
66
+ position = torch.arange(length, dtype=torch.float)
67
+ num_timescales = channels // 2
68
+ log_timescale_increment = math.log(float(max_timescale) / float(min_timescale)) / (
69
+ num_timescales - 1
70
+ )
71
+ inv_timescales = min_timescale * torch.exp(
72
+ torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment
73
+ )
74
+ scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1)
75
+ signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0)
76
+ signal = F.pad(signal, [0, 0, 0, channels % 2])
77
+ signal = signal.view(1, channels, length)
78
+ return signal
79
+
80
+
81
+ def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4):
82
+ b, channels, length = x.size()
83
+ signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale)
84
+ return x + signal.to(dtype=x.dtype, device=x.device)
85
+
86
+
87
+ def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis=1):
88
+ b, channels, length = x.size()
89
+ signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale)
90
+ return torch.cat([x, signal.to(dtype=x.dtype, device=x.device)], axis)
91
+
92
+
93
+ def subsequent_mask(length):
94
+ mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0)
95
+ return mask
96
+
97
+
98
+ @torch.jit.script
99
+ def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
100
+ n_channels_int = n_channels[0]
101
+ in_act = input_a + input_b
102
+ t_act = torch.tanh(in_act[:, :n_channels_int, :])
103
+ s_act = torch.sigmoid(in_act[:, n_channels_int:, :])
104
+ acts = t_act * s_act
105
+ return acts
106
+
107
+
108
+ def convert_pad_shape(pad_shape):
109
+ layer = pad_shape[::-1]
110
+ pad_shape = [item for sublist in layer for item in sublist]
111
+ return pad_shape
112
+
113
+
114
+ def shift_1d(x):
115
+ x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1]
116
+ return x
117
+
118
+
119
+ def sequence_mask(length, max_length=None):
120
+ if max_length is None:
121
+ max_length = length.max()
122
+ x = torch.arange(max_length, dtype=length.dtype, device=length.device)
123
+ return x.unsqueeze(0) < length.unsqueeze(1)
124
+
125
+
126
+ def generate_path(duration, mask):
127
+ """
128
+ duration: [b, 1, t_x]
129
+ mask: [b, 1, t_y, t_x]
130
+ """
131
+
132
+ b, _, t_y, t_x = mask.shape
133
+ cum_duration = torch.cumsum(duration, -1)
134
+
135
+ cum_duration_flat = cum_duration.view(b * t_x)
136
+ path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype)
137
+ path = path.view(b, t_x, t_y)
138
+ path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1]
139
+ path = path.unsqueeze(1).transpose(2, 3) * mask
140
+ return path
141
+
142
+
143
+ def clip_grad_value_(parameters, clip_value, norm_type=2):
144
+ if isinstance(parameters, torch.Tensor):
145
+ parameters = [parameters]
146
+ parameters = list(filter(lambda p: p.grad is not None, parameters))
147
+ norm_type = float(norm_type)
148
+ if clip_value is not None:
149
+ clip_value = float(clip_value)
150
+
151
+ total_norm = 0
152
+ for p in parameters:
153
+ param_norm = p.grad.data.norm(norm_type)
154
+ total_norm += param_norm.item() ** norm_type
155
+ if clip_value is not None:
156
+ p.grad.data.clamp_(min=-clip_value, max=clip_value)
157
+ total_norm = total_norm ** (1.0 / norm_type)
158
+ return total_norm
config.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ @Desc: 全局配置文件读取
3
+ """
4
+ import argparse
5
+ import yaml
6
+ from typing import Dict, List
7
+ import os
8
+ import shutil
9
+ import sys
10
+
11
+
12
+ class Resample_config:
13
+ """重采样配置"""
14
+
15
+ def __init__(self, in_dir: str, out_dir: str, sampling_rate: int = 44100):
16
+ self.sampling_rate: int = sampling_rate # 目标采样率
17
+ self.in_dir: str = in_dir # 待处理音频目录路径
18
+ self.out_dir: str = out_dir # 重采样输出路径
19
+
20
+ @classmethod
21
+ def from_dict(cls, dataset_path: str, data: Dict[str, any]):
22
+ """从字典中生成实例"""
23
+
24
+ # 不检查路径是否有效,此逻辑在resample.py中处理
25
+ data["in_dir"] = os.path.join(dataset_path, data["in_dir"])
26
+ data["out_dir"] = os.path.join(dataset_path, data["out_dir"])
27
+
28
+ return cls(**data)
29
+
30
+
31
+ class Preprocess_text_config:
32
+ """数据预处理配置"""
33
+
34
+ def __init__(
35
+ self,
36
+ transcription_path: str,
37
+ cleaned_path: str,
38
+ train_path: str,
39
+ val_path: str,
40
+ config_path: str,
41
+ val_per_lang: int = 5,
42
+ max_val_total: int = 10000,
43
+ clean: bool = True,
44
+ ):
45
+ self.transcription_path: str = transcription_path # 原始文本文件路径,文本格式应为{wav_path}|{speaker_name}|{language}|{text}。
46
+ self.cleaned_path: str = cleaned_path # 数据清洗后文本路径,可以不填。不填则将在原始文本目录生成
47
+ self.train_path: str = train_path # 训练集路径,可以不填。不填则将在原始文本目录生成
48
+ self.val_path: str = val_path # 验证集路径,可以不填。不填则将在原始文本目录生成
49
+ self.config_path: str = config_path # 配置文件路径
50
+ self.val_per_lang: int = val_per_lang # 每个speaker的验证集条数
51
+ self.max_val_total: int = max_val_total # 验证集最大条数,多于的会被截断并放到训练集中
52
+ self.clean: bool = clean # 是否进行数据清洗
53
+
54
+ @classmethod
55
+ def from_dict(cls, dataset_path: str, data: Dict[str, any]):
56
+ """从字典中生成实例"""
57
+
58
+ data["transcription_path"] = os.path.join(
59
+ dataset_path, data["transcription_path"]
60
+ )
61
+ if data["cleaned_path"] == "" or data["cleaned_path"] is None:
62
+ data["cleaned_path"] = None
63
+ else:
64
+ data["cleaned_path"] = os.path.join(dataset_path, data["cleaned_path"])
65
+ data["train_path"] = os.path.join(dataset_path, data["train_path"])
66
+ data["val_path"] = os.path.join(dataset_path, data["val_path"])
67
+ data["config_path"] = os.path.join(dataset_path, data["config_path"])
68
+
69
+ return cls(**data)
70
+
71
+
72
+ class Bert_gen_config:
73
+ """bert_gen 配置"""
74
+
75
+ def __init__(
76
+ self,
77
+ config_path: str,
78
+ num_processes: int = 2,
79
+ device: str = "cuda",
80
+ use_multi_device: bool = False,
81
+ ):
82
+ self.config_path = config_path
83
+ self.num_processes = num_processes
84
+ self.device = device
85
+ self.use_multi_device = use_multi_device
86
+
87
+ @classmethod
88
+ def from_dict(cls, dataset_path: str, data: Dict[str, any]):
89
+ data["config_path"] = os.path.join(dataset_path, data["config_path"])
90
+
91
+ return cls(**data)
92
+
93
+
94
+ class Emo_gen_config:
95
+ """emo_gen 配置"""
96
+
97
+ def __init__(
98
+ self,
99
+ config_path: str,
100
+ num_processes: int = 2,
101
+ device: str = "cuda",
102
+ use_multi_device: bool = False,
103
+ ):
104
+ self.config_path = config_path
105
+ self.num_processes = num_processes
106
+ self.device = device
107
+ self.use_multi_device = use_multi_device
108
+
109
+ @classmethod
110
+ def from_dict(cls, dataset_path: str, data: Dict[str, any]):
111
+ data["config_path"] = os.path.join(dataset_path, data["config_path"])
112
+
113
+ return cls(**data)
114
+
115
+
116
+ class Train_ms_config:
117
+ """训练配置"""
118
+
119
+ def __init__(
120
+ self,
121
+ config_path: str,
122
+ env: Dict[str, any],
123
+ base: Dict[str, any],
124
+ model: str,
125
+ num_workers: int,
126
+ spec_cache: bool,
127
+ keep_ckpts: int,
128
+ ):
129
+ self.env = env # 需要加载的环境变量
130
+ self.base = base # 底模配置
131
+ self.model = model # 训练模型存储目录,该路径为相对于dataset_path的路径,而非项目根目录
132
+ self.config_path = config_path # 配置文件路径
133
+ self.num_workers = num_workers # worker数量
134
+ self.spec_cache = spec_cache # 是否启用spec缓存
135
+ self.keep_ckpts = keep_ckpts # ckpt数量
136
+
137
+ @classmethod
138
+ def from_dict(cls, dataset_path: str, data: Dict[str, any]):
139
+ # data["model"] = os.path.join(dataset_path, data["model"])
140
+ data["config_path"] = os.path.join(dataset_path, data["config_path"])
141
+
142
+ return cls(**data)
143
+
144
+
145
+ class Webui_config:
146
+ """webui 配置"""
147
+
148
+ def __init__(
149
+ self,
150
+ device: str,
151
+ model: str,
152
+ config_path: str,
153
+ language_identification_library: str,
154
+ port: int = 7860,
155
+ share: bool = False,
156
+ debug: bool = False,
157
+ ):
158
+ self.device: str = device
159
+ self.model: str = model # 端口号
160
+ self.config_path: str = config_path # 是否公开部署,对外网开放
161
+ self.port: int = port # 是否开启debug模式
162
+ self.share: bool = share # 模型路径
163
+ self.debug: bool = debug # 配置文件路径
164
+ self.language_identification_library: str = (
165
+ language_identification_library # 语种识别库
166
+ )
167
+
168
+ @classmethod
169
+ def from_dict(cls, dataset_path: str, data: Dict[str, any]):
170
+ data["config_path"] = os.path.join(dataset_path, data["config_path"])
171
+ data["model"] = os.path.join(dataset_path, data["model"])
172
+ return cls(**data)
173
+
174
+
175
+ class Server_config:
176
+ def __init__(
177
+ self, models: List[Dict[str, any]], port: int = 5000, device: str = "cuda"
178
+ ):
179
+ self.models: List[Dict[str, any]] = models # 需要加载的所有模型的配置
180
+ self.port: int = port # 端口号
181
+ self.device: str = device # 模型默认使用设备
182
+
183
+ @classmethod
184
+ def from_dict(cls, data: Dict[str, any]):
185
+ return cls(**data)
186
+
187
+
188
+ class Translate_config:
189
+ """翻译api配置"""
190
+
191
+ def __init__(self, app_key: str, secret_key: str):
192
+ self.app_key = app_key
193
+ self.secret_key = secret_key
194
+
195
+ @classmethod
196
+ def from_dict(cls, data: Dict[str, any]):
197
+ return cls(**data)
198
+
199
+
200
+ class Config:
201
+ def __init__(self, config_path: str):
202
+ if not os.path.isfile(config_path) and os.path.isfile("default_config.yml"):
203
+ shutil.copy(src="default_config.yml", dst=config_path)
204
+ print(
205
+ f"已根据默认配置文件default_config.yml生成配置文件{config_path}。请按该配置文件的说明进行配置后重新运行。"
206
+ )
207
+ print("如无特殊需求,请勿修改default_config.yml或备份该文件。")
208
+ sys.exit(0)
209
+ with open(file=config_path, mode="r", encoding="utf-8") as file:
210
+ yaml_config: Dict[str, any] = yaml.safe_load(file.read())
211
+ dataset_path: str = yaml_config["dataset_path"]
212
+ openi_token: str = yaml_config["openi_token"]
213
+ self.dataset_path: str = dataset_path
214
+ self.mirror: str = yaml_config["mirror"]
215
+ self.openi_token: str = openi_token
216
+ self.resample_config: Resample_config = Resample_config.from_dict(
217
+ dataset_path, yaml_config["resample"]
218
+ )
219
+ self.preprocess_text_config: Preprocess_text_config = (
220
+ Preprocess_text_config.from_dict(
221
+ dataset_path, yaml_config["preprocess_text"]
222
+ )
223
+ )
224
+ self.bert_gen_config: Bert_gen_config = Bert_gen_config.from_dict(
225
+ dataset_path, yaml_config["bert_gen"]
226
+ )
227
+ self.emo_gen_config: Emo_gen_config = Emo_gen_config.from_dict(
228
+ dataset_path, yaml_config["emo_gen"]
229
+ )
230
+ self.train_ms_config: Train_ms_config = Train_ms_config.from_dict(
231
+ dataset_path, yaml_config["train_ms"]
232
+ )
233
+ self.webui_config: Webui_config = Webui_config.from_dict(
234
+ dataset_path, yaml_config["webui"]
235
+ )
236
+ self.server_config: Server_config = Server_config.from_dict(
237
+ yaml_config["server"]
238
+ )
239
+ self.translate_config: Translate_config = Translate_config.from_dict(
240
+ yaml_config["translate"]
241
+ )
242
+
243
+
244
+ parser = argparse.ArgumentParser()
245
+ # 为避免与以前的config.json起冲突,将其更名如下
246
+ parser.add_argument("-y", "--yml_config", type=str, default="config.yml")
247
+ args, _ = parser.parse_known_args()
248
+ config = Config(args.yml_config)
config.yml ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 全局配置
2
+ # 对于希望在同一时间使用多个配置文件的情况,例如两个GPU同时跑两个训练集:通过环境变量指定配置文件,不指定则默认为./config.yml
3
+
4
+ # 拟提供通用路径配置,统一存放数据,避免数据放得很乱
5
+ # 每个数据集与其对应的模型存放至统一路径下,后续所有的路径配置均为相对于datasetPath的路径
6
+ # 不填或者填空则路径为相对于项目根目录的路径
7
+ dataset_path: "Data/V23"
8
+
9
+ # 模型镜像源,默认huggingface,使用openi镜像源需指定openi_token
10
+ mirror: ""
11
+ openi_token: "" # openi token
12
+
13
+ # resample 音频重采样配置
14
+ # 注意, “:” 后需要加空格
15
+ resample:
16
+ # 目标重采样率
17
+ sampling_rate: 44100
18
+ # 音频文件输入路径,重采样会将该路径下所有.wav音频文件重采样
19
+ # 请填入相对于datasetPath的相对路径
20
+ in_dir: "" # 相对于根目录的路径为 /datasetPath/in_dir
21
+ # 音频文件重采样后输出路径
22
+ out_dir: ""
23
+
24
+
25
+ # preprocess_text 数据集预处理相关配置
26
+ # 注意, “:” 后需要加空格
27
+ preprocess_text:
28
+ # 原始文本文件路径,文本格式应为{wav_path}|{speaker_name}|{language}|{text}。
29
+ transcription_path: "filelists/whole.list"
30
+ # 数据清洗后文本路径,可以不填。不填则将在原始文本目录生成
31
+ cleaned_path: ""
32
+ # 训练集路径
33
+ train_path: "filelists/train.list"
34
+ # 验证集路径
35
+ val_path: "filelists/val.list"
36
+ # 配置文件路径
37
+ config_path: "configs/config.json"
38
+ # 每个语言的验证集条数
39
+ val_per_lang: 4
40
+ # 验证集最大条数,多于的会被截断并放到训练集中
41
+ max_val_total: 800
42
+ # 是否进行数据清洗
43
+ clean: true
44
+
45
+
46
+ # bert_gen 相关配置
47
+ # 注意, “:” 后需要加空格
48
+ bert_gen:
49
+ # 训练数据集配置文件路径
50
+ config_path: "configs/config.json"
51
+ # 并行数
52
+ num_processes: 4
53
+ # 使用设备:可选项 "cpu" 显卡推理,"cpu" cpu推理
54
+ # 该选项同时决定了get_bert_feature的默认设备
55
+ device: "cpu"
56
+ # 使用多卡推理
57
+ use_multi_device: false
58
+
59
+ # emo_gen 相关配置
60
+ # 注意, “:” 后需要加空格
61
+ emo_gen:
62
+ # 训练数据集配置文件路径
63
+ config_path: "configs/config.json"
64
+ # 并行数
65
+ num_processes: 16
66
+ # 使用设备:可选项 "cpu" 显卡推理,"cpu" cpu推理
67
+ device: "cpu"
68
+ # 使用多卡推理
69
+ use_multi_device: false
70
+
71
+ # train 训练配置
72
+ # 注意, “:” 后需要加空格
73
+ train_ms:
74
+ env:
75
+ MASTER_ADDR: "localhost"
76
+ MASTER_PORT: 10086
77
+ WORLD_SIZE: 1
78
+ LOCAL_RANK: 0
79
+ RANK: 0
80
+ # 可以填写任意名的环境变量
81
+ # THE_ENV_VAR_YOU_NEED_TO_USE: "1234567"
82
+ # 底模设置
83
+ base:
84
+ use_base_model: True
85
+ repo_id: "Stardust_minus/Bert-VITS2"
86
+ model_image: "Bert-VITS2_2.3底模" # openi网页的模型名
87
+ # 训练模型存储目录:与旧版本的区别,原先数据集是存放在logs/model_name下的,现在改为统一存放在Data/你的数据集/models下
88
+ model: "models"
89
+ # 配置文件路径
90
+ config_path: "configs/config.json"
91
+ # 训练使用的worker,不建议超过CPU核心数
92
+ num_workers: 22
93
+ # 关闭此项可以节约接近50%的磁盘空间,但是可能导致实际训练速度变慢和更高的CPU使用率。
94
+ spec_cache: True
95
+ # 保存的检查点数量,多于此数目的权重会被删除来节省空间。
96
+ keep_ckpts: 8
97
+
98
+
99
+ # webui webui配置
100
+ # 注意, “:” 后需要加空格
101
+ webui:
102
+ # 推理设备
103
+ device: "cpu"
104
+ # 模型路径
105
+ model: "models/G_408000.pth"
106
+ # 配置文件路径
107
+ config_path: "configs/config.json"
108
+ # 端口号
109
+ port: 7860
110
+ # 是否公开部署,对外网开放
111
+ share: false
112
+ # 是否开启debug模式
113
+ debug: false
114
+ # 语种识别库,可选langid, fastlid
115
+ language_identification_library: "langid"
116
+
117
+
118
+ # server-fastapi配置
119
+ # 注意, “:” 后需要加空格
120
+ # 注意,本配置下的所有配置均为相对于根目录的路径
121
+ server:
122
+ # 端口号
123
+ port: 5000
124
+ # 模型默认使用设备:但是当前并没有实现这个配置。
125
+ device: "cpu"
126
+ # 需要加载的所有模型的配置,可以填多个模型,也可以不填模型,等网页成功后手动加载模型
127
+ # 不加载模型的配置格式:删除默认给的两个模型配置,给models赋值 [ ],也就是空列表。参考模型2的speakers 即 models: [ ]
128
+ # 注意,所有模型都必须正确配置model与config的路径,空路径会导致加载错误。
129
+ # 也可以不填模型,等网页加载成功后手动填写models。
130
+ models:
131
+ - # 模型的路径
132
+ model: ""
133
+ # 模型config.json的路径
134
+ config: ""
135
+ # 模型使用设备,若填写则会覆盖默认配置
136
+ device: "cpu"
137
+ # 模型默认使用的语言
138
+ language: "ZH"
139
+ # 模型人物默认参数
140
+ # 不必填写所有人物,不填的使用默认值
141
+ # 暂时不用填写,当前尚未实现按人区分配置
142
+ speakers:
143
+ - speaker: "科比"
144
+ sdp_ratio: 0.2
145
+ noise_scale: 0.6
146
+ noise_scale_w: 0.8
147
+ length_scale: 1
148
+ - speaker: "五条悟"
149
+ sdp_ratio: 0.3
150
+ noise_scale: 0.7
151
+ noise_scale_w: 0.8
152
+ length_scale: 0.5
153
+ - speaker: "安倍晋三"
154
+ sdp_ratio: 0.2
155
+ noise_scale: 0.6
156
+ noise_scale_w: 0.8
157
+ length_scale: 1.2
158
+ - # 模型的路径
159
+ model: ""
160
+ # 模型config.json的路径
161
+ config: ""
162
+ # 模型使用设备,若填写则会覆盖默认配置
163
+ device: "cpu"
164
+ # 模型默认使用的语言
165
+ language: "JP"
166
+ # 模型人物默认参数
167
+ # 不必填写所有人物,不填的使用默认值
168
+ speakers: [ ] # 也可以不填
169
+
170
+ # 百度翻译开放平台 api配置
171
+ # api接入文档 https://api.fanyi.baidu.com/doc/21
172
+ # 请不要在github等网站公开分享你的app id 与 key
173
+ translate:
174
+ # 你的APPID
175
+ "app_key": "20231117001883321"
176
+ # 你的密钥
177
+ "secret_key": "lMQbvZHeJveDceLof2wf"
onnx/BangDreamApi.json ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "Folder": "BangDreamApi",
3
+ "Name": "BangDreamApi",
4
+ "Type": "BertVits",
5
+ "Symbol": [
6
+ "_",
7
+ "AA",
8
+ "E",
9
+ "EE",
10
+ "En",
11
+ "N",
12
+ "OO",
13
+ "V",
14
+ "a",
15
+ "a:",
16
+ "aa",
17
+ "ae",
18
+ "ah",
19
+ "ai",
20
+ "an",
21
+ "ang",
22
+ "ao",
23
+ "aw",
24
+ "ay",
25
+ "b",
26
+ "by",
27
+ "c",
28
+ "ch",
29
+ "d",
30
+ "dh",
31
+ "dy",
32
+ "e",
33
+ "e:",
34
+ "eh",
35
+ "ei",
36
+ "en",
37
+ "eng",
38
+ "er",
39
+ "ey",
40
+ "f",
41
+ "g",
42
+ "gy",
43
+ "h",
44
+ "hh",
45
+ "hy",
46
+ "i",
47
+ "i0",
48
+ "i:",
49
+ "ia",
50
+ "ian",
51
+ "iang",
52
+ "iao",
53
+ "ie",
54
+ "ih",
55
+ "in",
56
+ "ing",
57
+ "iong",
58
+ "ir",
59
+ "iu",
60
+ "iy",
61
+ "j",
62
+ "jh",
63
+ "k",
64
+ "ky",
65
+ "l",
66
+ "m",
67
+ "my",
68
+ "n",
69
+ "ng",
70
+ "ny",
71
+ "o",
72
+ "o:",
73
+ "ong",
74
+ "ou",
75
+ "ow",
76
+ "oy",
77
+ "p",
78
+ "py",
79
+ "q",
80
+ "r",
81
+ "ry",
82
+ "s",
83
+ "sh",
84
+ "t",
85
+ "th",
86
+ "ts",
87
+ "ty",
88
+ "u",
89
+ "u:",
90
+ "ua",
91
+ "uai",
92
+ "uan",
93
+ "uang",
94
+ "uh",
95
+ "ui",
96
+ "un",
97
+ "uo",
98
+ "uw",
99
+ "v",
100
+ "van",
101
+ "ve",
102
+ "vn",
103
+ "w",
104
+ "x",
105
+ "y",
106
+ "z",
107
+ "zh",
108
+ "zy",
109
+ "!",
110
+ "?",
111
+ "\u2026",
112
+ ",",
113
+ ".",
114
+ "'",
115
+ "-",
116
+ "SP",
117
+ "UNK"
118
+ ],
119
+ "Cleaner": "",
120
+ "Rate": 44100,
121
+ "CharaMix": true,
122
+ "spk2id": {
123
+ "香澄": 0,
124
+ "有咲": 1,
125
+ "沙綾": 2,
126
+ "りみ": 3,
127
+ "たえ": 4,
128
+ "三月七1": 5,
129
+ "紗夜": 6,
130
+ "ロック": 7,
131
+ "パレオ": 8,
132
+ "レイヤ": 9,
133
+ "チュチュ": 10,
134
+ "彩": 11,
135
+ "千聖": 12,
136
+ "イヴ": 13,
137
+ "日菜": 14,
138
+ "麻弥": 15,
139
+ "蘭": 16,
140
+ "モカ": 17,
141
+ "巴": 18,
142
+ "ひまり": 19,
143
+ "つぐみ": 20,
144
+ "はぐみ": 21,
145
+ "花音": 22,
146
+ "美咲": 23,
147
+ "薫": 24,
148
+ "こころ": 25,
149
+ "つくし": 26,
150
+ "七深": 27,
151
+ "透子": 28,
152
+ "ましろ": 29,
153
+ "瑠唯": 30,
154
+ "友希那": 31,
155
+ "あこ": 32,
156
+ "リサ": 33,
157
+ "燐子": 34,
158
+ "燈": 35,
159
+ "愛音": 36,
160
+ "楽奈": 37,
161
+ "そよ": 38,
162
+ "立希": 39,
163
+ "ますき": 40,
164
+ "祥子": 41,
165
+ "睦": 42,
166
+ "海鈴": 43,
167
+ "にゃむ": 44,
168
+ "初華": 45,
169
+ "華戀": 46,
170
+ "晶": 47,
171
+ "光": 48,
172
+ "未知留": 49,
173
+ "香子": 50,
174
+ "雙葉": 51,
175
+ "真晝": 52,
176
+ "艾露": 53,
177
+ "珠緒": 54,
178
+ "艾露露": 55,
179
+ "純那": 56,
180
+ "克洛迪娜": 57,
181
+ "真矢": 58,
182
+ "奈奈": 59,
183
+ "壘": 60,
184
+ "文": 61,
185
+ "一愛": 62,
186
+ "菈樂菲": 63,
187
+ "司": 64,
188
+ "美空": 65,
189
+ "靜羽": 66,
190
+ "悠悠子": 67,
191
+ "八千代": 68,
192
+ "栞": 69,
193
+ "美帆": 70,
194
+ "芙蘿菈": 71,
195
+ "克蕾兒": 72,
196
+ "安德露": 73,
197
+ "瑪莉亞貝菈": 74,
198
+ "克拉迪亞": 75,
199
+ "桃樂西": 76,
200
+ "瑪麗安": 77,
201
+ "八重神子1": 78,
202
+ "娜塔莎": 79,
203
+ "宵宫": 80,
204
+ "派蒙11": 81,
205
+ "派蒙13": 82,
206
+ "派蒙3": 83,
207
+ "派蒙7": 84,
208
+ "派蒙8": 85,
209
+ "派蒙9": 86,
210
+ "派蒙10": 87,
211
+ "派蒙6": 88,
212
+ "派蒙4": 89,
213
+ "派蒙1": 90,
214
+ "派蒙2": 91,
215
+ "派蒙15": 92,
216
+ "派蒙16": 93,
217
+ "派蒙14": 94,
218
+ "派蒙12": 95,
219
+ "派蒙5": 96,
220
+ "纳西妲1": 97
221
+ }
222
+ ,
223
+ "LanguageMap": {
224
+ "ZH": [
225
+ 0,
226
+ 0
227
+ ],
228
+ "JP": [
229
+ 1,
230
+ 6
231
+ ],
232
+ "EN": [
233
+ 2,
234
+ 8
235
+ ]
236
+ },
237
+ "Dict": "BasicDict",
238
+ "BertPath": [
239
+ "chinese-roberta-wwm-ext-large",
240
+ "deberta-v2-large-japanese",
241
+ "bert-base-japanese-v3"
242
+ ],
243
+ "Clap": "clap-htsat-fused"
244
+ }
onnx/BangDreamApi/BangDreamApi_dec.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e5fb525ea96065d5dcf99145abb9bac3e364ad7ce8f08b229d0ed59944502ba
3
+ size 59185716
onnx/BangDreamApi/BangDreamApi_dp.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e46029104046450433930ee6711d55d637bf42a85e005fbcef3e1584c6b9f35
3
+ size 1810905
onnx/BangDreamApi/BangDreamApi_emb.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d211b76421b82774672355681b40b565bb841d16b4315bff49a3995e14b6660
3
+ size 202530
onnx/BangDreamApi/BangDreamApi_enc_p.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ca616e194d8629020bc579c913b7e25deac1e4a211e2f1d27b0f10df7b4c4c
3
+ size 33237250
onnx/BangDreamApi/BangDreamApi_flow.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:971e53fc6586231a89459bc876c86e6a565f22bbba46e40b7367c6558391603c
3
+ size 120711608