File size: 4,090 Bytes
3ee94dd
 
 
 
 
 
 
 
 
 
 
e80739d
3ee94dd
 
 
 
 
1b7f8cd
 
3ee94dd
c68294e
 
 
3ee94dd
 
 
 
f007a1f
 
 
3ee94dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3468586
3ee94dd
d019a70
3ee94dd
3468586
3ee94dd
 
 
 
 
 
 
 
7d0f455
d019a70
3ee94dd
68baa0d
3ee94dd
d019a70
 
 
 
 
 
 
 
b1929e6
8821299
3ee94dd
d019a70
3ee94dd
 
 
 
 
 
8b8606a
1b7f8cd
3ee94dd
 
8b8606a
3ee94dd
 
 
 
 
 
 
 
 
 
 
 
8b8606a
3ee94dd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import tempfile
from typing import Optional
from TTS.config import load_config
import gradio as gr
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer


MODELS = {}
SPEAKERS = {}
MAX_TXT_LEN = 100


manager = ModelManager()
MODEL_NAMES = manager.list_tts_models()

# filter out multi-speaker models and slow wavegrad vocoders
filters = ["vctk", "your_tts", "ek1"]
MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)]

# reorder models
MODEL_NAMES[0], MODEL_NAMES[1], MODEL_NAMES[2]= MODEL_NAMES[1], MODEL_NAMES[2], MODEL_NAMES[0]
print(MODEL_NAMES)


def tts(text: str, model_name: str, speaker_idx: str=None):
    if len(text) > MAX_TXT_LEN:
        text = text[:MAX_TXT_LEN]
        print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
    print(text, model_name)
    # download model
    model_path, config_path, model_item = manager.download_model(f"tts_models/{model_name}")
    vocoder_name: Optional[str] = model_item["default_vocoder"]
    # download vocoder
    vocoder_path = None
    vocoder_config_path = None
    if vocoder_name is not None:
        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
    # init synthesizer
    synthesizer = Synthesizer(
        model_path, config_path, None, None, vocoder_path, vocoder_config_path,
    )
    # synthesize
    if synthesizer is None:
        raise NameError("model not found")
    wavs = synthesizer.tts(text, speaker_idx)
    # return output
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name



article= """
Visit us on Coqui.ai and drop a 🌟 to πŸ”—<a href="https://github.com/coqui-ai/TTS" target="_blank">CoquiTTS</a>.

<br/>

Run CoquiTTS locally for the best result. Check out our πŸ”—<a href="https://tts.readthedocs.io/en/latest/inference.html">documentation</a>.

```bash
$ pip install TTS
...
$ tts --list_models
...
$ tts --text "Text for TTS" --model_name "<type>/<language>/<dataset>/<model_name>" --out_path folder/to/save/output.wav
```
<img src="https://static.scarf.sh/a.png?x-pxid=1404a024-e647-4406-bb9a-4ade0c931182" />
<br/>

πŸ‘‘ <b> Model contributors</b>

- <a href="https://github.com/nmstoker/" target="_blank">@nmstoker</a>
- <a href="https://github.com/kaiidams/" target="_blank">@kaiidams</a>
- <a href="https://github.com/WeberJulian/" target="_blank">@WeberJulian,</a>
- <a href="https://github.com/Edresson/" target="_blank">@Edresson</a>
- <a href="https://github.com/thorstenMueller/" target="_blank">@thorstenMueller</a>
- <a href="https://github.com/r-dh/" target="_blank">@r-dh</a>
- <a href="https://github.com/kirianguiller/" target="_blank">@kirianguiller</a>
- <a href="https://github.com/robinhad/" target="_blank">@robinhad</a>
- <a href="https://github.com/fkarabiber/" target="_blank">@fkarabiber</a>
- <a href="https://github.com/nicolalandro/" target="_blank">@nicolalandro</a>

πŸ‘‰ Drop a ✨PR✨ on 🐸TTS to share a new model and have it included here.
"""

iface = gr.Interface(
    fn=tts,
    inputs=[
        gr.inputs.Textbox(
            label="Input Text",
            default="This sentence has been generated by a speech synthesis system.",
        ),
        gr.inputs.Radio(
            label="Pick a TTS Model - (language/dataset/model_name)",
            choices=MODEL_NAMES,
        ),
        # gr.inputs.Dropdown(label="Select a speaker", choices=SPEAKERS, default=None)
        # gr.inputs.Audio(source="microphone", label="Record your voice.", type="numpy", label=None, optional=False)
    ],
    outputs=gr.outputs.Audio(label="Output"),
    title="πŸΈπŸ’¬ CoquiTTS Demo",
    theme="grass",
    description="πŸΈπŸ’¬  Coqui TTS - a deep learning toolkit for Text-to-Speech, battle-tested in research and production.",
    article=article,
    allow_flagging=False,
    flagging_options=['error', 'bad-quality', 'wrong-pronounciation'],
    layout="vertical",
    live=False
)
iface.launch(share=False)