Spaces:

srinivasbilla
/

llasa-3b-tts

Running on Zero

srinivasbilla commited on 13 days ago

Commit

2779b9c

verified ·

1 Parent(s): 54e3c7d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -72,6 +72,12 @@ def infer(sample_audio_path, target_text, progress=gr.Progress()):
         prompt_wav = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform_mono)
         prompt_text = whisper_turbo_pipe(prompt_wav[0].numpy())['text'].strip()
         progress(0.5, 'Transcribed! Generating speech...')
         input_text = prompt_text + ' ' + target_text
@@ -104,7 +110,7 @@ def infer(sample_audio_path, target_text, progress=gr.Progress()):
             # Generate the speech autoregressively
             outputs = model.generate(
                 input_ids,
-                max_length=2500,  # We trained our model with a max length of 2048
                 eos_token_id= speech_end_id ,
                 do_sample=True,
                 top_p=1,

         prompt_wav = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform_mono)
         prompt_text = whisper_turbo_pipe(prompt_wav[0].numpy())['text'].strip()
         progress(0.5, 'Transcribed! Generating speech...')
+        if len(target_text) == 0:
+            return None
+        elif len(target_text) > 300:
+            gr.warning("Text is too long. Please keep it under 300 characters.")
+            target_text = target_text[:300]
         input_text = prompt_text + ' ' + target_text
             # Generate the speech autoregressively
             outputs = model.generate(
                 input_ids,
+                max_length=2048,  # We trained our model with a max length of 2048
                 eos_token_id= speech_end_id ,
                 do_sample=True,
                 top_p=1,