ccoreilly commited on
Commit
2c3e79f
1 Parent(s): 4e5413c

return numpy arrays

Browse files
Files changed (2) hide show
  1. app.py +14 -11
  2. engine.py +1 -1
app.py CHANGED
@@ -54,16 +54,15 @@ def tts(text, speaker_idx):
54
  print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
55
  print(text)
56
 
57
- speakers_maping_path = os.getcwd() + "/speaker_map.json"
58
-
59
- # Map speaker aliases to speaker ids
60
- with open(speakers_maping_path, 'r') as fp:
61
- maping = json.load(fp)
62
 
63
- #speaker_idx = maping[speaker_idx]
 
 
64
 
65
- # synthesize
66
- wavs = model_bsc.tts(text, speaker_idx)
67
  # return output
68
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
69
  model_bsc.save_wav(wavs, fp)
@@ -82,13 +81,17 @@ article= ""
82
  iface = gr.Interface(
83
  fn=tts,
84
  inputs=[
85
- gr.inputs.Textbox(
86
  label="Text",
87
  default="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
88
  ),
89
- gr.inputs.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default=None)
 
 
 
 
 
90
  ],
91
- outputs=gr.outputs.Audio(label="Output",type="filepath"),
92
  title="🗣️ TTS Català Multi Parlant - VITS 🗣️",
93
  description=description,
94
  article=article,
 
54
  print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
55
  print(text)
56
 
57
+ # synthesize
58
+ wav_bsc = model_bsc.tts(text, speaker_idx)
59
+ wav_coll = model_collectivat.tts(text)
60
+ wav_piper = model_piper.synthesize(text)
 
61
 
62
+ return (model_bsc.tts_config.audio["sample_rate"], wav_bsc),
63
+ (22000, wav_coll),
64
+ (16000, wav_piper)
65
 
 
 
66
  # return output
67
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
68
  model_bsc.save_wav(wavs, fp)
 
81
  iface = gr.Interface(
82
  fn=tts,
83
  inputs=[
84
+ gr.Textbox(
85
  label="Text",
86
  default="L'Èlia i l'Alí a l'aula. L'oli i l'ou. Lulú olorava la lila.",
87
  ),
88
+ gr.Dropdown(label="Selecciona un parlant", choices=SPEAKERS, default=None)
89
+ ],
90
+ outputs=[
91
+ gr.Audio(label="BSC VITS",type="numpy"),
92
+ gr.Audio(label="Collectivat Fastspeech",type="numpy"),
93
+ gr.Audio(label="Piper VITS",type="numpy")
94
  ],
 
95
  title="🗣️ TTS Català Multi Parlant - VITS 🗣️",
96
  description=description,
97
  article=article,
engine.py CHANGED
@@ -104,7 +104,7 @@ class Piper:
104
  },
105
  )[0].squeeze((0, 1))
106
  audio = audio_float_to_int16(audio.squeeze())
107
-
108
  # Convert to WAV
109
  with io.BytesIO() as wav_io:
110
  wav_file: wave.Wave_write = wave.open(wav_io, "wb")
 
104
  },
105
  )[0].squeeze((0, 1))
106
  audio = audio_float_to_int16(audio.squeeze())
107
+ return audio
108
  # Convert to WAV
109
  with io.BytesIO() as wav_io:
110
  wav_file: wave.Wave_write = wave.open(wav_io, "wb")