Spaces:
Runtime error
Runtime error
File size: 4,090 Bytes
3ee94dd e80739d 3ee94dd 1b7f8cd 3ee94dd c68294e 3ee94dd f007a1f 3ee94dd 3468586 3ee94dd d019a70 3ee94dd 3468586 3ee94dd 7d0f455 d019a70 3ee94dd 68baa0d 3ee94dd d019a70 b1929e6 8821299 3ee94dd d019a70 3ee94dd 8b8606a 1b7f8cd 3ee94dd 8b8606a 3ee94dd 8b8606a 3ee94dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
MODELS = {}
SPEAKERS = {}
MAX_TXT_LEN = 100
manager = ModelManager()
MODEL_NAMES = manager.list_tts_models()
# filter out multi-speaker models and slow wavegrad vocoders
filters = ["vctk", "your_tts", "ek1"]
MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)]
# reorder models
MODEL_NAMES[0], MODEL_NAMES[1], MODEL_NAMES[2]= MODEL_NAMES[1], MODEL_NAMES[2], MODEL_NAMES[0]
print(MODEL_NAMES)
def tts(text: str, model_name: str, speaker_idx: str=None):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
print(text, model_name)
# download model
model_path, config_path, model_item = manager.download_model(f"tts_models/{model_name}")
vocoder_name: Optional[str] = model_item["default_vocoder"]
# download vocoder
vocoder_path = None
vocoder_config_path = None
if vocoder_name is not None:
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
# init synthesizer
synthesizer = Synthesizer(
model_path, config_path, None, None, vocoder_path, vocoder_config_path,
)
# synthesize
if synthesizer is None:
raise NameError("model not found")
wavs = synthesizer.tts(text, speaker_idx)
# return output
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wavs, fp)
return fp.name
article= """
Visit us on Coqui.ai and drop a π to π<a href="https://github.com/coqui-ai/TTS" target="_blank">CoquiTTS</a>.
<br/>
Run CoquiTTS locally for the best result. Check out our π<a href="https://tts.readthedocs.io/en/latest/inference.html">documentation</a>.
```bash
$ pip install TTS
...
$ tts --list_models
...
$ tts --text "Text for TTS" --model_name "<type>/<language>/<dataset>/<model_name>" --out_path folder/to/save/output.wav
```
<img src="https://static.scarf.sh/a.png?x-pxid=1404a024-e647-4406-bb9a-4ade0c931182" />
<br/>
π <b> Model contributors</b>
- <a href="https://github.com/nmstoker/" target="_blank">@nmstoker</a>
- <a href="https://github.com/kaiidams/" target="_blank">@kaiidams</a>
- <a href="https://github.com/WeberJulian/" target="_blank">@WeberJulian,</a>
- <a href="https://github.com/Edresson/" target="_blank">@Edresson</a>
- <a href="https://github.com/thorstenMueller/" target="_blank">@thorstenMueller</a>
- <a href="https://github.com/r-dh/" target="_blank">@r-dh</a>
- <a href="https://github.com/kirianguiller/" target="_blank">@kirianguiller</a>
- <a href="https://github.com/robinhad/" target="_blank">@robinhad</a>
- <a href="https://github.com/fkarabiber/" target="_blank">@fkarabiber</a>
- <a href="https://github.com/nicolalandro/" target="_blank">@nicolalandro</a>
π Drop a β¨PRβ¨ on πΈTTS to share a new model and have it included here.
"""
iface = gr.Interface(
fn=tts,
inputs=[
gr.inputs.Textbox(
label="Input Text",
default="This sentence has been generated by a speech synthesis system.",
),
gr.inputs.Radio(
label="Pick a TTS Model - (language/dataset/model_name)",
choices=MODEL_NAMES,
),
# gr.inputs.Dropdown(label="Select a speaker", choices=SPEAKERS, default=None)
# gr.inputs.Audio(source="microphone", label="Record your voice.", type="numpy", label=None, optional=False)
],
outputs=gr.outputs.Audio(label="Output"),
title="πΈπ¬ CoquiTTS Demo",
theme="grass",
description="πΈπ¬ Coqui TTS - a deep learning toolkit for Text-to-Speech, battle-tested in research and production.",
article=article,
allow_flagging=False,
flagging_options=['error', 'bad-quality', 'wrong-pronounciation'],
layout="vertical",
live=False
)
iface.launch(share=False)
|