import gradio as gr import tempfile from TTS.utils.synthesizer import Synthesizer from huggingface_hub import hf_hub_download REPO_ID = "mbarnig/lb-de-fr-en-pt-coqui-vits-tts" my_title = "🇩🇪 🇫🇷 🇬🇧 🇵🇹 Mir schwätzen och Lëtzebuergesch ! 🇱🇺" my_description = "First multilingual-multispeaker Text-to-Speech (TTS) synthesizer speaking Luxembourgish. This model is based on [YourTTS](https://github.com/Edresson/YourTTS), thanks to 🐸 [Coqui.ai](https://coqui.ai/)." lb_text = "An der Zäit hunn sech den Nordwand an d'Sonn gestridden, wie vun hinnen zwee wuel méi staark wier, wéi e Wanderer, deen an ee waarme Mantel agepak war, iwwert de Wee koum." de_text = "Einst stritten sich Nordwind und Sonne, wer von ihnen beiden wohl der Stärkere wäre, als ein Wanderer, der in einen warmen Mantel gehüllt war, des Weges daherkam." fr_text = "La bise et le soleil se disputaient, chacun assurant qu'il était le plus fort, quand ils ont vu un voyageur qui s'avançait, enveloppé dans son manteau." en_text = "The North Wind and the Sun were disputing which was the stronger, when a traveler came along wrapped in a warm cloak." pt_text = "O vento norte e o Sol discutiam quem era o mais forte, quando surgiu um viajante envolvido numa capa." TTS_VOICES = [ "Bernard", "Bunny", "Ed", "Guy", "Judith", "Kerstin", "Linda", "Thorsten" ] TTS_LANGUAGES = [ "Deutsch", "English", "Français", "Lëtzebuergesch", "Português" ] my_examples = [ [lb_text, "Judith", "Lëtzebuergesch"], [de_text, "Thorsten", "Deutsch"], [fr_text, "Bernard", "Français"], [en_text, "Linda", "English"], [pt_text, "Ed", "Português"] ] my_article = "
1. Press the Submit button to generate a speech file with the default values. 2. Change the default values by clicking an example row. 3. Select a language and a voice and enter your own text. Have fun!
Go to Internet with a Brain to read some technical infos.
" my_inputs = [ gr.Textbox(lines=5, label="Input Text"), gr.Radio(label="Speaker", choices = TTS_VOICES, value = "Judith"), gr.Radio(label="Language", choices = TTS_LANGUAGES, value = "Lëtzebuergesch"), ] my_outputs = gr.Audio(type="filepath", label="Output Audio") def tts(text: str, speaker_idx: str, language_idx: str): best_model_path = hf_hub_download(repo_id=REPO_ID, filename="best_model.pth") config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json") speakers_path = hf_hub_download(repo_id=REPO_ID, filename="speakers.pth") languages_path = hf_hub_download(repo_id=REPO_ID, filename="language_ids.json") speaker_encoder_model_path = hf_hub_download(repo_id=REPO_ID, filename="model_se.pth") speaker_encoder_config_path = hf_hub_download(repo_id=REPO_ID, filename="config_se.json") # init synthesizer synthesizer = Synthesizer( best_model_path, config_path, speakers_path, languages_path, None, None, speaker_encoder_model_path, speaker_encoder_config_path, False ) # create audio file wavs = synthesizer.tts(text, speaker_idx, language_idx) with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp: synthesizer.save_wav(wavs, fp) return fp.name iface = gr.Interface( fn=tts, inputs=my_inputs, outputs=my_outputs, title=my_title, description = my_description, article = my_article, examples = my_examples, allow_flagging=False ) iface.launch()