File size: 4,053 Bytes
aa95dd5 530748c aa95dd5 2b48c61 aa95dd5 875ff99 aa95dd5 8b0db71 2b48c61 aa95dd5 8b0db71 925df59 8b0db71 5a5ad75 fc7a5f0 7fb1b05 1deb9a5 5961eaf 1deb9a5 8b0db71 5961eaf 8b0db71 1deb9a5 8b0db71 fc7a5f0 8b0db71 aa95dd5 8b0db71 aa95dd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import gradio as gr
import os
import tempfile
from openai import OpenAI
# Set an environment variable for key
#os.environ['OPENAI_API_KEY'] = os.environ.get('OPENAI_API_KEY')
#client = OpenAI() # add api_key
import torch
import torchaudio
import gradio as gr
from scipy.io import wavfile
from scipy.io.wavfile import write
knn_vc = torch.hub.load('bshall/knn-vc', 'knn_vc', prematched=True, trust_repo=True, pretrained=True, device='cpu')
def voice_change(audio_in, audio_ref):
samplerate1, data1 = wavfile.read(audio_in)
samplerate2, data2 = wavfile.read(audio_ref)
write("./audio_in.wav", samplerate1, data1)
write("./audio_ref.wav", samplerate2, data2)
query_seq = knn_vc.get_features("./audio_in.wav")
matching_set = knn_vc.get_matching_set(["./audio_ref.wav"])
out_wav = knn_vc.match(query_seq, matching_set, topk=4)
torchaudio.save('output.wav', out_wav[None], 16000)
return 'output.wav'
def tts(text, model, voice, api_key):
if api_key == '':
raise gr.Error('Please enter your OpenAI API Key')
else:
try:
client = OpenAI(api_key=api_key)
response = client.audio.speech.create(
model=model, # "tts-1","tts-1-hd"
voice=voice, # 'alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'
input=text,
)
except Exception as error:
# Handle any exception that occurs
raise gr.Error("An error occurred while generating speech. Please check your API key and try again.")
print(str(error))
# Create a temp file to save the audio
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
temp_file.write(response.content)
# Get the file path of the temp file
temp_file_path = temp_file.name
return temp_file_path
app = gr.Blocks()
with app:
gr.Markdown("# <center>🦄 - OpenAI TTS + AI变声</center>")
gr.Markdown("### <center>🎶 地表最强文本转语音模型 + 3秒实时AI变声,支持中文!Powered by [OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech) and [KNN-VC](https://github.com/bshall/knn-vc) </center>")
gr.Markdown("### <center>🌊 更多精彩应用,敬请关注[滔滔AI](http://www.talktalkai.com);滔滔AI,为爱滔滔!💕</center>")
with gr.Row(variant='panel'):
api_key = gr.Textbox(type='password', label='OpenAI API Key', placeholder='请在此填写您的OpenAI API Key')
model = gr.Dropdown(choices=['tts-1','tts-1-hd'], label='请选择模型(tts-1推理更快,tts-1-hd音质更好)', value='tts-1')
voice = gr.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='请选择一个说话人', value='alloy')
with gr.Row():
with gr.Column():
inp_text = gr.Textbox(label="请填写您想生成的文本(中英文皆可)", placeholder="想说却还没说的 还很多 攒着是因为想写成歌", lines=5)
btn_text = gr.Button("一键开启真实拟声吧", variant="primary")
with gr.Column():
inp1 = gr.Audio(type="filepath", label="OpenAI TTS真实拟声", interactive=False)
inp2 = gr.Audio(type="filepath", label="请上传AI变声的参照音频(决定变声后的语音音色)")
btn1 = gr.Button("一键开启AI变声吧", variant="primary")
with gr.Column():
out1 = gr.Audio(type="filepath", label="AI变声后的专属音频")
btn_text.click(tts, [inp_text, model, voice, api_key], inp1)
btn1.click(voice_change, [inp1, inp2], out1)
gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。Get your OpenAI API Key [here](https://platform.openai.com/api-keys).</center>")
gr.HTML('''
<div class="footer">
<p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
</p>
</div>
''')
app.launch(show_error=True)
|