NeuralFalcon commited on
Commit
de85bf8
·
verified ·
1 Parent(s): 4502dba

Update KOKORO/utils.py

Browse files
Files changed (1) hide show
  1. KOKORO/utils.py +9 -6
KOKORO/utils.py CHANGED
@@ -226,7 +226,7 @@ def parse_speechtypes_text(gen_text):
226
 
227
  return segments
228
 
229
- def podcast(MODEL, device, gen_text, speed=1.0, trim=0, pad_between_segments=0, remove_silence=True, minimum_silence=50):
230
  segments = parse_speechtypes_text(gen_text)
231
  speed = clamp_speed(speed)
232
  trim = clamp_trim(trim)
@@ -276,16 +276,20 @@ def podcast(MODEL, device, gen_text, speed=1.0, trim=0, pad_between_segments=0,
276
  return output_file
277
 
278
  def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments=0.5, output_file="",remove_silence=True,minimum_silence=50):
279
- text=clean_text(text)
280
- segments = large_text(text, voice_name)
281
  voice_pack_path = f"./KOKORO/voices/{voice_name}.pt"
 
 
 
 
 
282
  VOICEPACK = torch.load(voice_pack_path, weights_only=True).to(device)
283
  speed = clamp_speed(speed)
284
  trim = clamp_trim(trim)
285
  silence_duration = clamp_trim(pad_between_segments)
286
  output_file=get_random_file_name(output_file)
287
  if debug:
288
- print(f'Loaded voice: {voice_name}')
289
  print(f"Speed: {speed}")
290
  print(f"Trim: {trim}")
291
  print(f"Silence duration: {silence_duration}")
@@ -305,7 +309,7 @@ def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments
305
  text = i[1]
306
  if debug:
307
  print(i)
308
- audio, out_ps = generate(MODEL, text, VOICEPACK, lang=voice_name[0], speed=speed)
309
  audio = trim_if_needed(audio, trim)
310
 
311
  # Scale audio from float32 to int16
@@ -339,4 +343,3 @@ def tts_file_name(text):
339
  # Construct the file name
340
  file_name = f"{temp_folder}/{truncated_text}_{random_string}.wav"
341
  return file_name
342
-
 
226
 
227
  return segments
228
 
229
+ def podcast(MODEL, device, gen_text, speed=1.0, trim=0.5, pad_between_segments=0, remove_silence=True, minimum_silence=50):
230
  segments = parse_speechtypes_text(gen_text)
231
  speed = clamp_speed(speed)
232
  trim = clamp_trim(trim)
 
276
  return output_file
277
 
278
  def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments=0.5, output_file="",remove_silence=True,minimum_silence=50):
279
+ language = voice_name[0]
 
280
  voice_pack_path = f"./KOKORO/voices/{voice_name}.pt"
281
+ if voice_name.endswith(".pt"):
282
+ language="a"
283
+ voice_pack_path=voice_name
284
+ text=clean_text(text)
285
+ segments = large_text(text, language)
286
  VOICEPACK = torch.load(voice_pack_path, weights_only=True).to(device)
287
  speed = clamp_speed(speed)
288
  trim = clamp_trim(trim)
289
  silence_duration = clamp_trim(pad_between_segments)
290
  output_file=get_random_file_name(output_file)
291
  if debug:
292
+ print(f'Loaded voice: {voice_pack_path}')
293
  print(f"Speed: {speed}")
294
  print(f"Trim: {trim}")
295
  print(f"Silence duration: {silence_duration}")
 
309
  text = i[1]
310
  if debug:
311
  print(i)
312
+ audio, out_ps = generate(MODEL, text, VOICEPACK, lang=language, speed=speed)
313
  audio = trim_if_needed(audio, trim)
314
 
315
  # Scale audio from float32 to int16
 
343
  # Construct the file name
344
  file_name = f"{temp_folder}/{truncated_text}_{random_string}.wav"
345
  return file_name