Spaces:

Flux9665
/

IMS-Toucan

Running

App Files Files

Florian Lux commited on Feb 20, 2022

Commit

5468bc2

1 Parent(s): 49696ae

microphone input seems to randomly break everything

Browse files

Files changed (1) hide show

app.py +5 -12

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ class TTS_Interface:
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model = Meta_FastSpeech2(device=self.device)
-    def read(self, prompt, language, path_to_audio):
         language_id_lookup = {
             "English"  : "en",
             "German"   : "de",
@@ -45,11 +45,6 @@ class TTS_Interface:
             "French"   : "fr"
             }
         self.model.set_language(language_id_lookup[language])
-        if path_to_audio is not None:
-            try:
-                self.model.set_utterance_embedding(path_to_audio)
-            except RuntimeError:
-                pass
         wav = self.model(prompt)
         return 48000, float2pcm(wav.cpu().numpy())
@@ -58,7 +53,9 @@ meta_model = TTS_Interface()
 article = "<p style='text-align: left'>This is still a work in progress, models will be exchanged for better ones as soon as they are done. All of those languages are spoken by a single model. Speakers can be transferred across languages. More languages will be added soon.</p><p style='text-align: center'><a href='https://github.com/DigitalPhonetics/IMS-Toucan' target='_blank'>Click here to learn more about the IMS Toucan Speech Synthesis Toolkit</a></p>"
 iface = gr.Interface(fn=meta_model.read,
-                     inputs=[gr.inputs.Textbox(lines=2, placeholder="write what you want the synthesis to read here...", label=" "),
                              gr.inputs.Dropdown(['English',
                                                  'German',
                                                  'Greek',
@@ -67,11 +64,7 @@ iface = gr.Interface(fn=meta_model.read,
                                                  'Russian',
                                                  'Hungarian',
                                                  'Dutch',
-                                                 'French'], type="value", default='English', label="Language Selection"),
-                             gr.inputs.Audio(source="microphone",
-                                             optional=True,
-                                             label="Make the TTS imitate your Voice (optional, press once to start recording and again to stop)",
-                                             type="filepath")],
                      outputs=gr.outputs.Audio(type="numpy", label=None),
                      layout="vertical",
                      title="IMS Toucan Multilingual Multispeaker Demo",

         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.model = Meta_FastSpeech2(device=self.device)
+    def read(self, prompt, language):
         language_id_lookup = {
             "English"  : "en",
             "German"   : "de",
             "French"   : "fr"
             }
         self.model.set_language(language_id_lookup[language])
         wav = self.model(prompt)
         return 48000, float2pcm(wav.cpu().numpy())
 article = "<p style='text-align: left'>This is still a work in progress, models will be exchanged for better ones as soon as they are done. All of those languages are spoken by a single model. Speakers can be transferred across languages. More languages will be added soon.</p><p style='text-align: center'><a href='https://github.com/DigitalPhonetics/IMS-Toucan' target='_blank'>Click here to learn more about the IMS Toucan Speech Synthesis Toolkit</a></p>"
 iface = gr.Interface(fn=meta_model.read,
+                     inputs=[gr.inputs.Textbox(lines=2,
+                                               placeholder="write what you want the synthesis to read here...",
+                                               label=" "),
                              gr.inputs.Dropdown(['English',
                                                  'German',
                                                  'Greek',
                                                  'Russian',
                                                  'Hungarian',
                                                  'Dutch',
+                                                 'French'], type="value", default='English', label="Language Selection")],
                      outputs=gr.outputs.Audio(type="numpy", label=None),
                      layout="vertical",
                      title="IMS Toucan Multilingual Multispeaker Demo",