import gradio as gr import tempfile from TTS.utils.synthesizer import Synthesizer from huggingface_hub import hf_hub_download REPO_ID = "mbarnig/lb-de-fr-en-pt-coqui-vits-tts" my_title = "🇩🇪 🇫🇷 🇬🇧 🇵🇹 Mir schwätzen och Lëtzebuergesch ! 🇱🇺" my_description = "First multilingual-multispeaker Text-to-Speech (TTS) synthesizer speaking Luxembourgish. This model is based on [YourTTS](https://github.com/Edresson/YourTTS), thanks to 🐸 [Coqui.ai](https://coqui.ai/)." lb_text = "An der Zäit hunn sech den Nordwand an d'Sonn gestridden, wie vun hinnen zwee wuel méi staark wier, wéi e Wanderer, deen an ee waarme Mantel agepak war, iwwert de Wee koum." de_text = "Einst stritten sich Nordwind und Sonne, wer von ihnen beiden wohl der Stärkere wäre, als ein Wanderer, der in einen warmen Mantel gehüllt war, des Weges daherkam." fr_text = "La bise et le soleil se disputaient, chacun assurant qu'il était le plus fort, quand ils ont vu un voyageur qui s'avançait, enveloppé dans son manteau." en_text = "The North Wind and the Sun were disputing which was the stronger, when a traveler came along wrapped in a warm cloak." pt_text = "O vento norte e o Sol discutiam quem era o mais forte, quando surgiu um viajante envolvido numa capa." TTS_VOICES = [ "Judith", "Luc", "Kerstin", "Ed", "Linda" ] TTS_LANGUAGES = [ "x-lb", "x-de", "fr-fr", "en", "pt-br" ] my_examples = [ [lb_text,"x-lb","Luc"], [de_text,"x-de","Judith"], [fr_text,"fr-fr","Kerstin"], [en_text,"en","Ed"], [pt_text,"pt-br","Linda"], ] my_article = "
1. Press the Submit button to generate a speech file with the default values. 2. Change the default values by clicking an example row. 3. Select a language and a voice and enter your own text. Have fun!
Go to Internet with a Brain to read some technical infos.
" my_inputs = [ gr.inputs.Textbox(lines=5, label="Input Text", default=lb_text), gr.inputs.Radio(label="Select a language", choices = TTS_LANGUAGES, default = "x-lb"), gr.inputs.Radio(label="Select a voice", choices = TTS_VOICES, default = "Judith"), ] my_outputs = gr.outputs.Audio(type="file", label="Output Audio") def tts(text: str, speaker_idx: str, language_idx: str): best_model_path = hf_hub_download(repo_id=REPO_ID, filename="best_model.pth") print(best_model_path) config_path = hf_hub_download(repo_id=REPO_ID, filename="config.json") print(config_path) speakers_path = hf_hub_download(repo_id=REPO_ID, filename="speakers.pth") print(speakers_path) languages_path = hf_hub_download(repo_id=REPO_ID, filename="language_ids.json") print(languages_path) speaker_encoder_model_path = hf_hub_download(repo_id=REPO_ID, filename="model_se.pth") print(speaker_encoder_model_path) speaker_encoder_config_path = hf_hub_download(repo_id=REPO_ID, filename="config_se.json") print(speaker_encoder_config_path) # init synthesizer synthesizer = Synthesizer( best_model_path, config_path, speakers_path, languages_path, None, None, speaker_encoder_model_path, speaker_encoder_config_path, False ) # create audio file wavs = synthesizer.tts(text, speaker_idx, language_idx) with tempfile.NamedTemporaryFile(suffix = ".wav", delete = False) as fp: synthesizer.save_wav(wavs, fp) return fp.name iface = gr.Interface( fn=tts, inputs=my_inputs, outputs=my_outputs, title=my_title, description = my_description, article = my_article, examples = my_examples, allow_flagging=False ) iface.launch()