Spaces:

LPhilp1943
/

speech_2_speech_voice_cloning

Build error

LPhilp1943 commited on Mar 17

Commit

592ca27

•

1 Parent(s): 792c625

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,9 +32,13 @@ def speech_to_text(input_audio_or_text):
     return transcription.strip()
 def text_to_speech(text):
     text = text.lower().translate(str.maketrans('', '', string.punctuation))
     inputs = tts_tokenizer(text, return_tensors="pt")
-    inputs['input_ids'] = inputs['input_ids'].long()  # Corrected assignment for input_ids
     with torch.no_grad():
         output = tts_model(**inputs).waveform
     waveform = output.numpy().squeeze()
@@ -59,4 +63,4 @@ iface = gr.Interface(
     description="This app converts speech to text and then back to speech, ensuring the output audio is resampled to 16kHz."
 )
-iface.launch(share=True)  # Added `share=True` for creating a public link

     return transcription.strip()
 def text_to_speech(text):
+    # Ensure the text input is not empty to avoid padding errors in the transformer model
+    if not text.strip():
+        return "The text input is empty, please provide a valid string."
     text = text.lower().translate(str.maketrans('', '', string.punctuation))
     inputs = tts_tokenizer(text, return_tensors="pt")
+    inputs['input_ids'] = inputs['input_ids'].long()  # Ensure input_ids are of type Long
     with torch.no_grad():
         output = tts_model(**inputs).waveform
     waveform = output.numpy().squeeze()
     description="This app converts speech to text and then back to speech, ensuring the output audio is resampled to 16kHz."
 )
+iface.launch(share=True)