tts-demo / app.py
Atotti's picture
Upload app.py
2741c27 verified
import gradio as gr
import soundfile as sf
import torch
from transformers import AutoTokenizer
from parler_tts import ParlerTTSForConditionalGeneration
from rubyinserter import add_ruby
# グローバル変数
device = "cuda" if torch.cuda.is_available() else "cpu"
def gen(model_name: str, prompt: str, description: str, output_file_path: str) -> None:
model = ParlerTTSForConditionalGeneration.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model.name = model_name
prompt = add_ruby(prompt)
input_ids = tokenizer(description, return_tensors="pt").input_ids.to(device)
prompt_input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
generation = model.generate(input_ids=input_ids, prompt_input_ids=prompt_input_ids)
audio_arr = generation.cpu().numpy().squeeze()
sf.write(output_file_path, audio_arr, model.config.sampling_rate)
def generate_audio(model_name: str, prompt: str, description: str):
output_file_path = "output.wav"
gen(model_name, prompt, description, output_file_path)
return output_file_path
def main():
# モデル名の選択肢
model_choices = [
"Atotti/parler-tts-mini-bate-voiceactress100-ex-ayuto",
"Atotti/parler-tts-mini-bate-voiceactress100-ex-hiroki",
"Atotti/parler-tts-mini-bate-voiceactress100-ex-olimov",
"Atotti/parler-tts-mini-bate-voiceactress100-ex-mako",
"Atotti/parler-tts-mini-bate-voiceactress100-ex-hinako"
]
# Gradioインターフェースの設定
with gr.Blocks() as demo:
gr.Markdown("## Text-to-Speech Demo")
with gr.Row():
model_name_input = gr.Dropdown(choices=model_choices, label="Model Name")
prompt_input = gr.Textbox(label="Prompt", placeholder="例: テキスト入力")
description_input = gr.Textbox(label="Description", placeholder=f"例: Ayuto's voice delivers her words at a moderate speed with a quite monotone tone slightly low pitch in a confined environment. The pace of her speech is slow, resulting in a quite clear audio recording.")
generate_button = gr.Button("Generate Audio")
audio_output = gr.Audio(label="Generated Audio", type="filepath")
# ボタンと関数の連携
generate_button.click(
generate_audio,
inputs=[model_name_input, prompt_input, description_input],
outputs=audio_output
)
return demo
if __name__ == "__main__":
demo = main()
demo.launch()