tortoisse-tts / app.py
osanseviero's picture
Update app.py
510f17f
raw
history blame
1.37 kB
import torch
import torchaudio
import torch.nn as nn
import torch.nn.functional as F
import IPython
import sys
import subprocess
subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "--force-reinstall", "git+https://github.com/osanseviero/tortoise-tts.git"])
# entmax could not be installed at same time as torch
subprocess.check_call([sys.executable, "-m", "pip", "install", "entmax"])
from tortoise_tts.api import TextToSpeech
from tortoise_tts.utils.audio import load_audio, get_voices
# This will download all the models used by Tortoise from the HF hub.
tts = TextToSpeech()
voices = [
"angie",
"daniel",
"deniro",
"emma",
"freeman",
"geralt",
"halle",
"jlaw",
"lj",
"snakes",
"tom",
"William",
]
voices = get_voices()
preset = "fastest"
def inference(text, voice):
cond_paths = voices[voice]
conds = []
for cond_path in cond_paths:
c = load_audio(cond_path, 22050)
conds.append(c)
gen = tts.tts_with_preset(text, conds, preset)
return gen
text = "Joining two modalities results in a surprising increase in generalization! What would happen if we combined them all?"
iface = gr.Interface(
generate_tone,
inputs=[
gr.inputs.Textbox(type="text", default=text, label="Text"),
gr.inputs.Dropdown(voices, type="index"),
],
outputs="audio",
)
iface.launch()