Spaces:

mbarnig
/

lb_de_fr_en_pt_COQUI_VITS_TTS

Running

File size: 3,648 Bytes

b59e10d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5061d72
91242cb
5061d72
 
b59e10d
 
f6795d0
91242cb
b59e10d
 
 
5061d72
f9fbbaa
 
5061d72
 
b59e10d
 
 
91242cb
 
 
 
 
b59e10d
 
 
 
 
09f086d
0fca99a
 
b59e10d
 
7c7b0cd
b59e10d

import gradio as gr
import tempfile
from TTS.utils.synthesizer import Synthesizer
from huggingface_hub import hf_hub_download

REPO_ID = "mbarnig/lb-de-fr-en-pt-coqui-vits-tts"

my_title = "🇩🇪 🇫🇷 🇬🇧 🇵🇹 Mir schwätzen och Lëtzebuergesch ! 🇱🇺"
my_description = "First multilingual-multispeaker Text-to-Speech (TTS) synthesizer speaking Luxembourgish. This model is based on [YourTTS](https://github.com/Edresson/YourTTS), thanks to 🐸 [Coqui.ai](https://coqui.ai/)." 
lb_text = "An der Zäit hunn sech den Nordwand an d'Sonn gestridden, wie vun hinnen zwee wuel méi staark wier, wéi e Wanderer, deen an ee waarme Mantel agepak war, iwwert de Wee koum."
de_text = "Einst stritten sich Nordwind und Sonne, wer von ihnen beiden wohl der Stärkere wäre, als ein Wanderer, der in einen warmen Mantel gehüllt war, des Weges daherkam."
fr_text = "La bise et le soleil se disputaient, chacun assurant qu'il était le plus fort, quand ils ont vu un voyageur qui s'avançait, enveloppé dans son manteau."
en_text = "The North Wind and the Sun were disputing which was the stronger, when a traveler came along wrapped in a warm cloak."
pt_text = "O vento norte e o Sol discutiam quem era o mais forte, quando surgiu um viajante envolvido numa capa."

TTS_VOICES = [
  "Bernard",
  "Bunny",
  "Ed",
  "Guy",
  "Judith",
  "Kerstin",
  "Linda",
  "Thorsten"
]  

TTS_LANGUAGES = [
  "Deutsch",
  "English",
  "Français",
  "Lëtzebuergesch",  
  "Português"
]

my_examples = [
  [lb_text, "Judith", "Lëtzebuergesch"],
  [de_text, "Thorsten", "Deutsch"],
  [fr_text, "Bernard", "Français"],
  [en_text, "Linda", "English"],
  [pt_text, "Ed", "Português"]
]

my_article = "<h3>User guide</h3><p>1. Press the Submit button to generate a speech file with the default values. 2. Change the default values by clicking an example row. 3. Select a language and a voice and enter your own text. Have fun!</p><p>Go to <a href='https://www.web3.lu/category/audio-technologies/'>Internet with a Brain</a> to read some technical infos.</p>"

my_inputs = [
  gr.Textbox(lines=5, label="Input Text"),
  gr.Radio(label="Speaker", choices = TTS_VOICES, value = "Judith"),
  gr.Radio(label="Language", choices = TTS_LANGUAGES, value = "Lëtzebuergesch"),
]

my_outputs = gr.Audio(type="filepath", label="Output Audio")

def tts(text: str, speaker_idx: str, language_idx: str):
    best_model_path = hf_hub_download(repo_id=REPO_ID, filename="best_model.pth") 
    config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json")
    speakers_path = hf_hub_download(repo_id=REPO_ID, filename="speakers.pth")
    languages_path = hf_hub_download(repo_id=REPO_ID, filename="language_ids.json")
    speaker_encoder_model_path = hf_hub_download(repo_id=REPO_ID, filename="model_se.pth")
    speaker_encoder_config_path = hf_hub_download(repo_id=REPO_ID, filename="config_se.json")
    
    # init synthesizer  
    synthesizer = Synthesizer(
        best_model_path,
        config_path,
        speakers_path,
        languages_path,
        None,
        None,
        speaker_encoder_model_path,
        speaker_encoder_config_path,
        False
    )

    # create audio file
    wavs = synthesizer.tts(text, speaker_idx, language_idx)
    with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp:
        synthesizer.save_wav(wavs, fp)                      
    return fp.name 
 
iface = gr.Interface(
    fn=tts, 
    inputs=my_inputs, 
    outputs=my_outputs, 
    title=my_title, 
    description = my_description, 
    article = my_article,
    examples = my_examples, 
    allow_flagging=False
)
iface.launch()