OpenAI-TTS-Voice-Conversion

Running

File size: 3,410 Bytes

aa95dd5
 
 
530748c
aa95dd5
 
 
 
 
 
8b0db71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ebcd98
aa95dd5
7ebcd98
 
aa95dd5
 
 
 
 
 
 
 
 
 
 
 
 
8b0db71
 
 
 
 
 
 
5a5ad75
 
 
 
1deb9a5
 
 
 
8b0db71
 
 
 
 
 
 
1deb9a5
8b0db71
 
 
 
 
 
 
 
 
aa95dd5
8b0db71
aa95dd5

import gradio as gr
import os
import tempfile
from openai import OpenAI

# Set an environment variable for key
os.environ['OPENAI_API_KEY'] = os.environ.get('OPENAI_API_KEY')

client = OpenAI() # add api_key

import torch
import torchaudio
import gradio as gr
from scipy.io import wavfile
from scipy.io.wavfile import write

knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device='cpu')

def voice_change(audio_in, audio_ref):
    samplerate1, data1 = wavfile.read(audio_in)
    samplerate2, data2 = wavfile.read(audio_ref)
    write("./audio_in.wav", samplerate1, data1)
    write("./audio_ref.wav", samplerate2, data2)

    query_seq = knn_vc.get_features("./audio_in.wav")
    matching_set = knn_vc.get_matching_set(["./audio_ref.wav"])
    out_wav = knn_vc.match(query_seq, matching_set, topk=4)
    torchaudio.save('output.wav', out_wav[None], 16000)
    return 'output.wav'


def tts(text, model, voice):
    response = client.audio.speech.create(
        model=model, #"tts-1","tts-1-hd"
        voice=voice, #'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'
        input=text,
    )

    # Create a temp file to save the audio
    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
        temp_file.write(response.content)

    # Get the file path of the temp file
    temp_file_path = temp_file.name

    return temp_file_path


app = gr.Blocks()

with app:
    gr.Markdown("# <center>🥳🎶🎡 - KNN-VC AI变声</center>")
    gr.Markdown("### <center>🌟 - 3秒实时AI变声，支持中日英在内的所有语言！无需训练、一键变声！🍻 </center>")
    gr.Markdown("### <center>🌊 - 更多精彩应用，敬请关注[滔滔AI](http://www.talktalkai.com)；滔滔AI，为爱滔滔！💕</center>")

    with gr.Row(variant='panel'):
      api_key = gr.Textbox(type='password', label='OpenAI API Key', placeholder='Enter your API key to access the TTS demo')
      model = gr.Dropdown(choices=['tts-1','tts-1-hd'], label='Model', value='tts-1')
      voice = gr.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='Voice Options', value='alloy')
    with gr.Row():
      with gr.Column():
        inp_text = gr.Textbox(label="请填写您想生成的文本（中英文皆可）", placeholder="想说却还没说的 还很多 攒着是因为想写成歌")
        btn_text = gr.Button("一键开启真实拟声吧", variant="primary")

      with gr.Column():
        inp1 = gr.Audio(type="filepath", label="请上传AI变声的原音频（决定变声后的语音内容）")
        inp2 = gr.Audio(type="filepath", label="请上传AI变声的参照音频（决定变声后的语音音色）")
        btn1 = gr.Button("一键开启AI变声吧", variant="primary")
      with gr.Column():
        out1 = gr.Audio(type="filepath", label="AI变声后的专属音频")
      btn_text.click(tts, [inp_text, model, voice, api_key], inp1)
      btn1.click(voice_change, [inp1, inp2], out1)

    gr.Markdown("### <center>注意❗：请不要生成会对个人以及组织造成侵害的内容，此程序仅供科研、学习及个人娱乐使用。</center>")
    gr.HTML('''
        <div class="footer">
                    <p>🌊🏞️🎶 - 江水东流急，滔滔无尽声。 明·顾璘
                    </p>
        </div>
    ''')

app.launch(show_error=True)