Mahiruoshi
commited on
Commit
•
3efe616
1
Parent(s):
79fddf7
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,7 @@ from text.symbols import symbols
|
|
18 |
from text import text_to_sequence
|
19 |
import unicodedata
|
20 |
from scipy.io.wavfile import write
|
|
|
21 |
|
22 |
def get_text(text, hps):
|
23 |
text_norm = text_to_sequence(text, hps.data.text_cleaners)
|
@@ -88,20 +89,50 @@ def selection(speaker):
|
|
88 |
elif speaker == "派蒙":
|
89 |
spk = 16
|
90 |
return spk
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
if language == "中文":
|
94 |
-
tts_input1 = "[ZH]" +
|
95 |
-
return tts_input1
|
96 |
-
if language == "英文":
|
97 |
-
tts_input1 = "[EN]" + tts_input0.replace('\n','.').replace(' ',',') + "[EN]"
|
98 |
return tts_input1
|
|
|
|
|
|
|
|
|
99 |
elif language == "日文":
|
100 |
-
tts_input1 = "[JA]" +
|
101 |
return tts_input1
|
102 |
-
def infer(language,text,speaker_id,
|
|
|
103 |
speaker_id = int(selection(speaker_id))
|
104 |
-
stn_tst = get_text(sle(language,text), hps_ms)
|
105 |
with torch.no_grad():
|
106 |
x_tst = stn_tst.unsqueeze(0).to(dev)
|
107 |
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
|
@@ -112,11 +143,10 @@ def infer(language,text,speaker_id, n_scale= 0.667,n_scale_w = 0.8, l_scale = 1
|
|
112 |
spending_time = "推理时间:"+str(t2-t1)+"s"
|
113 |
print(spending_time)
|
114 |
return (hps_ms.data.sampling_rate, audio)
|
115 |
-
lan = ["中文","日文","
|
116 |
idols = ["高咲侑(误)","歩夢","かすみ","しずく","果林","愛","彼方","せつ菜","璃奈","栞子","エマ","ランジュ","ミア","三色绘恋1","三色绘恋2","派蒙"]
|
117 |
|
118 |
|
119 |
-
|
120 |
dev = torch.device("cpu")
|
121 |
hps_ms = utils.get_hparams_from_file("config.json")
|
122 |
net_g_ms = SynthesizerTrn(
|
@@ -138,14 +168,16 @@ with app:
|
|
138 |
|
139 |
with gr.TabItem("Basic"):
|
140 |
|
141 |
-
tts_input1 = gr.TextArea(label="
|
142 |
-
|
|
|
|
|
143 |
para_input1 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声比例", value=0.667)
|
144 |
para_input2 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声偏差", value=0.8)
|
145 |
para_input3 = gr.Slider(minimum= 0.1,maximum=10,label="更改时间比例", value=1)
|
146 |
tts_submit = gr.Button("Generate", variant="primary")
|
147 |
speaker1 = gr.Dropdown(label="选择说话人",choices=idols, value="かすみ", interactive=True)
|
148 |
tts_output2 = gr.Audio(label="Output")
|
149 |
-
tts_submit.click(infer, [language,tts_input1,speaker1,para_input1,para_input2,para_input3], [tts_output2])
|
150 |
#app.launch(share=True)
|
151 |
app.launch()
|
|
|
18 |
from text import text_to_sequence
|
19 |
import unicodedata
|
20 |
from scipy.io.wavfile import write
|
21 |
+
import openai
|
22 |
|
23 |
def get_text(text, hps):
|
24 |
text_norm = text_to_sequence(text, hps.data.text_cleaners)
|
|
|
89 |
elif speaker == "派蒙":
|
90 |
spk = 16
|
91 |
return spk
|
92 |
+
def friend_chat(text,key,call_name,indentity):
|
93 |
+
call_name = call_name
|
94 |
+
openai.api_key = key
|
95 |
+
identity = identity
|
96 |
+
start_sequence = '\n'+str(call_name)+':'
|
97 |
+
restart_sequence = "\nYou: "
|
98 |
+
all_text = identity + restart_sequence
|
99 |
+
if 1 == 1:
|
100 |
+
prompt0 = text #当期prompt
|
101 |
+
if text == 'quit':
|
102 |
+
return prompt0
|
103 |
+
prompt = identity + prompt0 + start_sequence
|
104 |
+
|
105 |
+
response = openai.Completion.create(
|
106 |
+
model="text-davinci-003",
|
107 |
+
prompt=prompt,
|
108 |
+
temperature=0.5,
|
109 |
+
max_tokens=1000,
|
110 |
+
top_p=1.0,
|
111 |
+
frequency_penalty=0.5,
|
112 |
+
presence_penalty=0.0,
|
113 |
+
stop=["\nYou:"]
|
114 |
+
)
|
115 |
+
return response['choices'][0]['text'].strip()
|
116 |
+
def is_japanese(string):
|
117 |
+
for ch in string:
|
118 |
+
if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
|
119 |
+
return True
|
120 |
+
return False
|
121 |
+
def sle(language,text,tts_input2,call_name,tts_input3):
|
122 |
if language == "中文":
|
123 |
+
tts_input1 = "[ZH]" + text.replace('\n','。').replace(' ',',') + "[ZH]"
|
|
|
|
|
|
|
124 |
return tts_input1
|
125 |
+
if language == "对话":
|
126 |
+
text = friend_chat(text,tts_input2,call_name,tts_input3).replace('\n','。').replace(' ',',')
|
127 |
+
text = f"[JA]{text}[JA]" if is_japanese(text) else f"[ZH]{text}[ZH]"
|
128 |
+
return text
|
129 |
elif language == "日文":
|
130 |
+
tts_input1 = "[JA]" + text.replace('\n','。').replace(' ',',') + "[JA]"
|
131 |
return tts_input1
|
132 |
+
def infer(language,text,tts_input2,tts_input3,speaker_id,n_scale= 0.667,n_scale_w = 0.8, l_scale = 1 ):
|
133 |
+
speaker_name = speaker_id
|
134 |
speaker_id = int(selection(speaker_id))
|
135 |
+
stn_tst = get_text(sle(language,text,tts_input2,speaker_name,tts_input3), hps_ms)
|
136 |
with torch.no_grad():
|
137 |
x_tst = stn_tst.unsqueeze(0).to(dev)
|
138 |
x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(dev)
|
|
|
143 |
spending_time = "推理时间:"+str(t2-t1)+"s"
|
144 |
print(spending_time)
|
145 |
return (hps_ms.data.sampling_rate, audio)
|
146 |
+
lan = ["中文","日文","对话"]
|
147 |
idols = ["高咲侑(误)","歩夢","かすみ","しずく","果林","愛","彼方","せつ菜","璃奈","栞子","エマ","ランジュ","ミア","三色绘恋1","三色绘恋2","派蒙"]
|
148 |
|
149 |
|
|
|
150 |
dev = torch.device("cpu")
|
151 |
hps_ms = utils.get_hparams_from_file("config.json")
|
152 |
net_g_ms = SynthesizerTrn(
|
|
|
168 |
|
169 |
with gr.TabItem("Basic"):
|
170 |
|
171 |
+
tts_input1 = gr.TextArea(label="输入你的文本", value="一次審査、二次審査、それぞれの欄に記入をお願いします。")
|
172 |
+
tts_input2 = gr.TextArea(label="如需使用openai,输入你的openai-key", value="官网")
|
173 |
+
tts_input3 = gr.TextArea(label="写上你给她的设定", value="恶魔系学妹。")
|
174 |
+
language = gr.Dropdown(label="选择合成方式",choices=lan, value="日文", interactive=True)
|
175 |
para_input1 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声比例", value=0.667)
|
176 |
para_input2 = gr.Slider(minimum= 0.01,maximum=1.0,label="更改噪声偏差", value=0.8)
|
177 |
para_input3 = gr.Slider(minimum= 0.1,maximum=10,label="更改时间比例", value=1)
|
178 |
tts_submit = gr.Button("Generate", variant="primary")
|
179 |
speaker1 = gr.Dropdown(label="选择说话人",choices=idols, value="かすみ", interactive=True)
|
180 |
tts_output2 = gr.Audio(label="Output")
|
181 |
+
tts_submit.click(infer, [language,tts_input1,tts_input2,tts_input3,speaker1,para_input1,para_input2,para_input3], [tts_output2])
|
182 |
#app.launch(share=True)
|
183 |
app.launch()
|