Spaces:

Mahiruoshi
/

vits-chatbot

Running

App Files Files Community

Mahiruoshi commited on Apr 27, 2023

Commit

5c5159a

•

1 Parent(s): a46a731

Upload 2 files

Browse files

Files changed (2) hide show

main.py +241 -0
部署流程.md +9 -0

main.py ADDED Viewed

	@@ -0,0 +1,241 @@

+import re
+import gradio as gr
+import torch
+import unicodedata
+import commons
+import utils
+import pathlib
+from models import SynthesizerTrn
+from text import text_to_sequence
+import time
+import os
+import io
+from scipy.io.wavfile import write
+from flask import Flask, request
+from threading import Thread
+import openai
+import requests
+import json
+import soundfile as sf
+from scipy import signal
+class VitsGradio:
+    def __init__(self):
+        self.lan = ["中文","日文","自动"]
+        self.chatapi = ["gpt-3.5-turbo","gpt3"]
+        self.modelPaths = []
+        for root,dirs,files in os.walk("checkpoints"):
+            for dir in dirs:
+                self.modelPaths.append(dir)
+        with gr.Blocks() as self.Vits:
+            with gr.Tab("调试用"):
+                with gr.Row():
+                    with gr.Column():
+                        with gr.Row():
+                            with gr.Column():
+                                self.text = gr.TextArea(label="Text", value="你好")
+                                with gr.Accordion(label="测试api", open=False):
+                                    self.local_chat1 = gr.Checkbox(value=False, label="使用网址+文本进行模拟")
+                                    self.url_input = gr.TextArea(label="键入测试", value="http://127.0.0.1:8080/chat?Text=")
+                                    butto = gr.Button("模拟前端抓取语音文件")
+                                btnVC = gr.Button("测试tts+对话程序")
+                            with gr.Column():
+                                output2 = gr.TextArea(label="回复")
+                                output1 = gr.Audio(label="采样率22050")
+                                output3 = gr.outputs.File(label="44100hz: output.wav")
+                butto.click(self.Simul, inputs=[self.text, self.url_input], outputs=[output2,output3])
+                btnVC.click(self.tts_fn, inputs=[self.text], outputs=[output1,output2])
+            with gr.Tab("控制面板"):
+                with gr.Row():
+                    with gr.Column():
+                        with gr.Row():
+                            with gr.Column():
+                                self.api_input1 = gr.TextArea(label="输入api-key或本地存储说话模型的路径", value="https://platform.openai.com/account/api-keys")
+                                with gr.Accordion(label="chatbot选择", open=False):
+                                                self.api_input2 = gr.Checkbox(value=True, label="采用gpt3.5")
+                                                self.local_chat1 = gr.Checkbox(value=False, label="启动本地chatbot")
+                                                self.local_chat2 = gr.Checkbox(value=True, label="是否量化")
+                                                res = gr.TextArea()
+                                Botselection = gr.Button("完成chatbot设定")
+                                Botselection.click(self.check_bot, inputs=[self.api_input1,self.api_input2,self.local_chat1,self.local_chat2], outputs = [res])
+                                self.input1 = gr.Dropdown(label = "模型", choices = self.modelPaths, value = self.modelPaths[0], type = "value")
+                                self.input2 = gr.Dropdown(label="Language", choices=self.lan, value="自动", interactive=True)
+                            with gr.Column():
+                                btnVC = gr.Button("完成vits TTS端设定")
+                                self.input3 = gr.Dropdown(label="Speaker", choices=list(range(101)), value=0, interactive=True)
+                                self.input4 = gr.Slider(minimum=0, maximum=1.0, label="更改噪声比例(noise scale)，以控制情感", value=0.267)
+                                self.input5 = gr.Slider(minimum=0, maximum=1.0, label="更改噪声偏差(noise scale w)，以控制音素长短", value=0.7)
+                                self.input6 = gr.Slider(minimum=0.1, maximum=10, label="duration", value=1)
+                                statusa = gr.TextArea()
+                btnVC.click(self.create_tts_fn, inputs=[self.input1, self.input2, self.input3, self.input4, self.input5, self.input6], outputs = [statusa])
+    def Simul(self,text,url_input):
+        web = url_input + text
+        res = requests.get(web)
+        music = res.content
+        with open('output.wav', 'wb') as code:
+            code.write(music)
+        file_path = "output.wav"
+        return web,file_path
+    def chatgpt(self,text):
+        self.messages.append({"role": "user", "content": text},)
+        chat = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages= self.messages)
+        reply = chat.choices[0].message.content
+        return reply
+    def ChATGLM(self,text):
+        if text == 'clear':
+            self.history = []
+        response, new_history = self.model.chat(self.tokenizer, text, self.history)
+        response = response.replace(" ",'').replace("\n",'.')
+        self.history = new_history
+        return response
+    def gpt3_chat(self,text):
+        call_name = "Waifu"
+        openai.api_key = args.key
+        identity = ""
+        start_sequence = '\n'+str(call_name)+':'
+        restart_sequence = "\nYou: "
+        if 1 == 1:
+            prompt0 = text #当期prompt
+        if text == 'quit':
+            return prompt0
+        prompt = identity + prompt0 + start_sequence
+        response = openai.Completion.create(
+            model="text-davinci-003",
+            prompt=prompt,
+            temperature=0.5,
+            max_tokens=1000,
+            top_p=1.0,
+            frequency_penalty=0.5,
+            presence_penalty=0.0,
+            stop=["\nYou:"]
+        )
+        return response['choices'][0]['text'].strip()
+    def check_bot(self,api_input1,api_input2,local_chat1,local_chat2):
+        if local_chat1:
+            from transformers import AutoTokenizer, AutoModel
+            self.tokenizer = AutoTokenizer.from_pretrained(api_input1, trust_remote_code=True)
+            if local_chat2:
+                self.model = AutoModel.from_pretrained(api_input1, trust_remote_code=True).half().quantize(4).cuda()
+            else:
+                self.model = AutoModel.from_pretrained(api_input1, trust_remote_code=True)
+            self.history = []
+        else:
+            self.messages = []
+            openai.api_key = api_input1
+        return "Finished"
+    def is_japanese(self,string):
+        for ch in string:
+            if ord(ch) > 0x3040 and ord(ch) < 0x30FF:
+                return True
+        return False
+    def is_english(self,string):
+        import re
+        pattern = re.compile('^[A-Za-z0-9.,:;!?()_*"\' ]+$')
+        if pattern.fullmatch(string):
+            return True
+        else:
+            return False
+    def get_text(self,text, hps, cleaned=False):
+        if cleaned:
+            text_norm = text_to_sequence(text, self.hps_ms.symbols, [])
+        else:
+            text_norm = text_to_sequence(text, self.hps_ms.symbols, self.hps_ms.data.text_cleaners)
+        if self.hps_ms.data.add_blank:
+            text_norm = commons.intersperse(text_norm, 0)
+        text_norm = torch.LongTensor(text_norm)
+        return text_norm
+    def get_label(self,text, label):
+        if f'[{label}]' in text:
+            return True, text.replace(f'[{label}]', '')
+        else:
+            return False, text
+    def sle(self,language,text):
+        text = text.replace('\n','。').replace(' ',',')
+        if language == "中文":
+            tts_input1 = "[ZH]" + text + "[ZH]"
+            return tts_input1
+        elif language == "自动":
+            tts_input1 = f"[JA]{text}[JA]" if self.is_japanese(text) else f"[ZH]{text}[ZH]"
+            return tts_input1
+        elif language == "日文":
+            tts_input1 = "[JA]" + text + "[JA]"
+            return tts_input1
+    def create_tts_fn(self,path, input2, input3, n_scale= 0.667,n_scale_w = 0.8, l_scale = 1 ):
+        self.language = input2
+        self.speaker_id = int(input3)
+        self.n_scale = n_scale
+        self.n_scale_w = n_scale_w
+        self.l_scale = l_scale
+        self.dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.hps_ms = utils.get_hparams_from_file(f"checkpoints/{path}/config.json")
+        self.n_speakers = self.hps_ms.data.n_speakers if 'n_speakers' in self.hps_ms.data.keys() else 0
+        self.n_symbols = len(self.hps_ms.symbols) if 'symbols' in self.hps_ms.keys() else 0
+        self.net_g_ms = SynthesizerTrn(
+            self.n_symbols,
+            self.hps_ms.data.filter_length // 2 + 1,
+            self.hps_ms.train.segment_size // self.hps_ms.data.hop_length,
+            n_speakers=self.n_speakers,
+            **self.hps_ms.model).to(self.dev)
+        _ = self.net_g_ms.eval()
+        _ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", self.net_g_ms)
+        return 'success'
+    def tts_fn(self,text):
+        if self.local_chat1:
+            text = self.chatgpt(text)
+        elif self.api_input2:
+            text = self.ChATGLM(text)
+        else:
+            text = self.gpt3_chat(text)
+        print(text)
+        text =self.sle(self.language,text)
+        with torch.no_grad():
+            stn_tst = self.get_text(text, self.hps_ms, cleaned=False)
+            x_tst = stn_tst.unsqueeze(0).to(self.dev)
+            x_tst_lengths = torch.LongTensor([stn_tst.size(0)]).to(self.dev)
+            sid = torch.LongTensor([self.speaker_id]).to(self.dev)
+            audio = self.net_g_ms.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=self.n_scale, noise_scale_w=self.n_scale_w, length_scale=self.l_scale)[0][
+                0, 0].data.cpu().float().numpy()
+        resampled_audio_data = signal.resample(audio, len(audio) * 2)
+        sf.write('temp.wav', resampled_audio_data, 44100, 'PCM_24')
+        return (self.hps_ms.data.sampling_rate, audio),text.replace('[JA]','').replace('[ZH]','')
+app = Flask(__name__)
+print("开始部���")
+grVits = VitsGradio()
+@app.route('/chat')
+def text_api():
+    message = request.args.get('Text','')
+    audio,text = grVits.tts_fn(message)
+    text = text.replace('[JA]','').replace('[ZH]','')
+    with open('temp.wav','rb') as bit:
+        wav_bytes = bit.read()
+    headers = {
+            'Content-Type': 'audio/wav',
+            'Text': text.encode('utf-8')}
+    return wav_bytes, 200, headers
+def gradio_interface():
+    return grVits.Vits.launch()
+if __name__ == '__main__':
+    api_thread = Thread(target=app.run, args=("0.0.0.0", 8080))
+    gradio_thread = Thread(target=gradio_interface)
+    api_thread.start()
+    gradio_thread.start()

部署流程.md ADDED Viewed

	@@ -0,0 +1,9 @@

+```sh
+#前置条件 已安装Anaconda
+conda create -n chatbot python=3.8
+conda activate chatbot
+git clone https://huggingface.co/spaces/Mahiruoshi/vits-chatbot
+cd vits-chatbot
+pip install -r requirements.txt
+python main.py
+```