Spaces:
Running
Running
NeuralFalcon
commited on
Update KOKORO/utils.py
Browse files- KOKORO/utils.py +9 -6
KOKORO/utils.py
CHANGED
@@ -226,7 +226,7 @@ def parse_speechtypes_text(gen_text):
|
|
226 |
|
227 |
return segments
|
228 |
|
229 |
-
def podcast(MODEL, device, gen_text, speed=1.0, trim=0, pad_between_segments=0, remove_silence=True, minimum_silence=50):
|
230 |
segments = parse_speechtypes_text(gen_text)
|
231 |
speed = clamp_speed(speed)
|
232 |
trim = clamp_trim(trim)
|
@@ -276,16 +276,20 @@ def podcast(MODEL, device, gen_text, speed=1.0, trim=0, pad_between_segments=0,
|
|
276 |
return output_file
|
277 |
|
278 |
def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments=0.5, output_file="",remove_silence=True,minimum_silence=50):
|
279 |
-
|
280 |
-
segments = large_text(text, voice_name)
|
281 |
voice_pack_path = f"./KOKORO/voices/{voice_name}.pt"
|
|
|
|
|
|
|
|
|
|
|
282 |
VOICEPACK = torch.load(voice_pack_path, weights_only=True).to(device)
|
283 |
speed = clamp_speed(speed)
|
284 |
trim = clamp_trim(trim)
|
285 |
silence_duration = clamp_trim(pad_between_segments)
|
286 |
output_file=get_random_file_name(output_file)
|
287 |
if debug:
|
288 |
-
print(f'Loaded voice: {
|
289 |
print(f"Speed: {speed}")
|
290 |
print(f"Trim: {trim}")
|
291 |
print(f"Silence duration: {silence_duration}")
|
@@ -305,7 +309,7 @@ def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments
|
|
305 |
text = i[1]
|
306 |
if debug:
|
307 |
print(i)
|
308 |
-
audio, out_ps = generate(MODEL, text, VOICEPACK, lang=
|
309 |
audio = trim_if_needed(audio, trim)
|
310 |
|
311 |
# Scale audio from float32 to int16
|
@@ -339,4 +343,3 @@ def tts_file_name(text):
|
|
339 |
# Construct the file name
|
340 |
file_name = f"{temp_folder}/{truncated_text}_{random_string}.wav"
|
341 |
return file_name
|
342 |
-
|
|
|
226 |
|
227 |
return segments
|
228 |
|
229 |
+
def podcast(MODEL, device, gen_text, speed=1.0, trim=0.5, pad_between_segments=0, remove_silence=True, minimum_silence=50):
|
230 |
segments = parse_speechtypes_text(gen_text)
|
231 |
speed = clamp_speed(speed)
|
232 |
trim = clamp_trim(trim)
|
|
|
276 |
return output_file
|
277 |
|
278 |
def tts(MODEL,device,text, voice_name, speed=1.0, trim=0.5, pad_between_segments=0.5, output_file="",remove_silence=True,minimum_silence=50):
|
279 |
+
language = voice_name[0]
|
|
|
280 |
voice_pack_path = f"./KOKORO/voices/{voice_name}.pt"
|
281 |
+
if voice_name.endswith(".pt"):
|
282 |
+
language="a"
|
283 |
+
voice_pack_path=voice_name
|
284 |
+
text=clean_text(text)
|
285 |
+
segments = large_text(text, language)
|
286 |
VOICEPACK = torch.load(voice_pack_path, weights_only=True).to(device)
|
287 |
speed = clamp_speed(speed)
|
288 |
trim = clamp_trim(trim)
|
289 |
silence_duration = clamp_trim(pad_between_segments)
|
290 |
output_file=get_random_file_name(output_file)
|
291 |
if debug:
|
292 |
+
print(f'Loaded voice: {voice_pack_path}')
|
293 |
print(f"Speed: {speed}")
|
294 |
print(f"Trim: {trim}")
|
295 |
print(f"Silence duration: {silence_duration}")
|
|
|
309 |
text = i[1]
|
310 |
if debug:
|
311 |
print(i)
|
312 |
+
audio, out_ps = generate(MODEL, text, VOICEPACK, lang=language, speed=speed)
|
313 |
audio = trim_if_needed(audio, trim)
|
314 |
|
315 |
# Scale audio from float32 to int16
|
|
|
343 |
# Construct the file name
|
344 |
file_name = f"{temp_folder}/{truncated_text}_{random_string}.wav"
|
345 |
return file_name
|
|