gorkemgoknar commited on
Commit
ef149f3
·
1 Parent(s): 613c465

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -4
app.py CHANGED
@@ -15,6 +15,11 @@ os.environ["COQUI_TOS_AGREED"] = "1"
15
  # Most users expect text to be their own language, there is checkbox to disable it
16
  import langid
17
 
 
 
 
 
 
18
  import gradio as gr
19
  from scipy.io.wavfile import write
20
  from pydub import AudioSegment
@@ -185,9 +190,19 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
185
  try:
186
 
187
  t_latent=time.time()
188
-
189
- gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
190
-
 
 
 
 
 
 
 
 
 
 
191
  latent_calculation_time = time.time() - t_latent
192
  ##metrics_text=f"Embedding calculation time: {latent_calculation_time:.2f} seconds\n"
193
 
@@ -230,12 +245,51 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
230
  DEVICE_ASSERT_PROMPT=prompt
231
  DEVICE_ASSERT_LANG=language
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  # HF Space specific.. This error is unrecoverable need to restart space
235
  api.restart_space(repo_id=repo_id)
236
  else:
237
  print("RuntimeError: non device-side assert error:", str(e))
238
- raise e
 
 
 
 
 
 
 
 
239
 
240
  wav = torch.cat(wav_chunks, dim=0)
241
  torchaudio.save("output.wav", wav.squeeze().unsqueeze(0).cpu(), 24000)
 
15
  # Most users expect text to be their own language, there is checkbox to disable it
16
  import langid
17
 
18
+ import base64
19
+ import csv
20
+ from io import StringIO
21
+ import datetime
22
+
23
  import gradio as gr
24
  from scipy.io.wavfile import write
25
  from pydub import AudioSegment
 
190
  try:
191
 
192
  t_latent=time.time()
193
+ try:
194
+ gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
195
+ except Exception as e:
196
+ if "Failed to decode" in str(e):
197
+ print("Speaker encoding error", str(e))
198
+ gr.Warning("It appears something wrong with reference, did you unmute your microphone?")
199
+ return (
200
+ None,
201
+ None,
202
+ None,
203
+ None,
204
+ )
205
+
206
  latent_calculation_time = time.time() - t_latent
207
  ##metrics_text=f"Embedding calculation time: {latent_calculation_time:.2f} seconds\n"
208
 
 
245
  DEVICE_ASSERT_PROMPT=prompt
246
  DEVICE_ASSERT_LANG=language
247
 
248
+ # just before restarting save what caused the issue so we can handle it in future
249
+ # Uploading Error data only happens for unrecovarable error
250
+ error_time = datetime.datetime.now().strftime('%d-%m-%Y-%H:%M:%S')
251
+ error_data = [error_time, prompt, language, audio_file_pth, mic_file_path, use_mic, voice_cleanup, no_lang_auto_detect, agree]
252
+ error_data = [str(e) if type(e)!=str else e for e in error_data]
253
+ print(error_data)
254
+ print(speaker_wav)
255
+ write_io = StringIO()
256
+ csv.writer(write_io).writerows(error_data)
257
+ csv_upload= write_io.getvalue().encode()
258
+
259
+ filename = error_time+"_xtts-stream_" + str(uuid.uuid4()) +".csv"
260
+ print("Writing error csv")
261
+ error_api = HfApi()
262
+ error_api.upload_file(
263
+ path_or_fileobj=csv_upload,
264
+ path_in_repo=filename,
265
+ repo_id="coqui/xtts-flagged-dataset",
266
+ repo_type="dataset",
267
+ )
268
+
269
+ #speaker_wav
270
+ print("Writing error reference audio")
271
+ speaker_filename = error_time+"_reference_xtts-stream_"+ str(uuid.uuid4()) +".wav"
272
+ error_api = HfApi()
273
+ error_api.upload_file(
274
+ path_or_fileobj=speaker_wav,
275
+ path_in_repo=speaker_filename,
276
+ repo_id="coqui/xtts-flagged-dataset",
277
+ repo_type="dataset",
278
+ )
279
 
280
  # HF Space specific.. This error is unrecoverable need to restart space
281
  api.restart_space(repo_id=repo_id)
282
  else:
283
  print("RuntimeError: non device-side assert error:", str(e))
284
+
285
+ gr.Warning("Something unexpected happened please retry again.")
286
+ return (
287
+ None,
288
+ None,
289
+ None,
290
+ None,
291
+ )
292
+
293
 
294
  wav = torch.cat(wav_chunks, dim=0)
295
  torchaudio.save("output.wav", wav.squeeze().unsqueeze(0).cpu(), 24000)