Florian Lux commited on
Commit
5468bc2
·
1 Parent(s): 49696ae

microphone input seems to randomly break everything

Browse files
Files changed (1) hide show
  1. app.py +5 -12
app.py CHANGED
@@ -32,7 +32,7 @@ class TTS_Interface:
32
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
33
  self.model = Meta_FastSpeech2(device=self.device)
34
 
35
- def read(self, prompt, language, path_to_audio):
36
  language_id_lookup = {
37
  "English" : "en",
38
  "German" : "de",
@@ -45,11 +45,6 @@ class TTS_Interface:
45
  "French" : "fr"
46
  }
47
  self.model.set_language(language_id_lookup[language])
48
- if path_to_audio is not None:
49
- try:
50
- self.model.set_utterance_embedding(path_to_audio)
51
- except RuntimeError:
52
- pass
53
  wav = self.model(prompt)
54
  return 48000, float2pcm(wav.cpu().numpy())
55
 
@@ -58,7 +53,9 @@ meta_model = TTS_Interface()
58
  article = "<p style='text-align: left'>This is still a work in progress, models will be exchanged for better ones as soon as they are done. All of those languages are spoken by a single model. Speakers can be transferred across languages. More languages will be added soon.</p><p style='text-align: center'><a href='https://github.com/DigitalPhonetics/IMS-Toucan' target='_blank'>Click here to learn more about the IMS Toucan Speech Synthesis Toolkit</a></p>"
59
 
60
  iface = gr.Interface(fn=meta_model.read,
61
- inputs=[gr.inputs.Textbox(lines=2, placeholder="write what you want the synthesis to read here...", label=" "),
 
 
62
  gr.inputs.Dropdown(['English',
63
  'German',
64
  'Greek',
@@ -67,11 +64,7 @@ iface = gr.Interface(fn=meta_model.read,
67
  'Russian',
68
  'Hungarian',
69
  'Dutch',
70
- 'French'], type="value", default='English', label="Language Selection"),
71
- gr.inputs.Audio(source="microphone",
72
- optional=True,
73
- label="Make the TTS imitate your Voice (optional, press once to start recording and again to stop)",
74
- type="filepath")],
75
  outputs=gr.outputs.Audio(type="numpy", label=None),
76
  layout="vertical",
77
  title="IMS Toucan Multilingual Multispeaker Demo",
 
32
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
33
  self.model = Meta_FastSpeech2(device=self.device)
34
 
35
+ def read(self, prompt, language):
36
  language_id_lookup = {
37
  "English" : "en",
38
  "German" : "de",
 
45
  "French" : "fr"
46
  }
47
  self.model.set_language(language_id_lookup[language])
 
 
 
 
 
48
  wav = self.model(prompt)
49
  return 48000, float2pcm(wav.cpu().numpy())
50
 
 
53
  article = "<p style='text-align: left'>This is still a work in progress, models will be exchanged for better ones as soon as they are done. All of those languages are spoken by a single model. Speakers can be transferred across languages. More languages will be added soon.</p><p style='text-align: center'><a href='https://github.com/DigitalPhonetics/IMS-Toucan' target='_blank'>Click here to learn more about the IMS Toucan Speech Synthesis Toolkit</a></p>"
54
 
55
  iface = gr.Interface(fn=meta_model.read,
56
+ inputs=[gr.inputs.Textbox(lines=2,
57
+ placeholder="write what you want the synthesis to read here...",
58
+ label=" "),
59
  gr.inputs.Dropdown(['English',
60
  'German',
61
  'Greek',
 
64
  'Russian',
65
  'Hungarian',
66
  'Dutch',
67
+ 'French'], type="value", default='English', label="Language Selection")],
 
 
 
 
68
  outputs=gr.outputs.Audio(type="numpy", label=None),
69
  layout="vertical",
70
  title="IMS Toucan Multilingual Multispeaker Demo",