|
from kokoro import generate
|
|
from models import build_model
|
|
from scipy.io.wavfile import write
|
|
from pydub import AudioSegment
|
|
import torch
|
|
import sys
|
|
import numpy as np
|
|
import os
|
|
|
|
text = "Hello world"
|
|
if len(sys.argv) > 1:
|
|
text = sys.argv[1]
|
|
print("Got text: ", text)
|
|
|
|
VOICE_NAME = [
|
|
'af',
|
|
'af_bella', 'af_sarah', 'am_adam', 'am_michael',
|
|
'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis',
|
|
'af_nicole', 'af_sky',
|
|
][3]
|
|
|
|
VOICEPACK = torch.load(f'voices/{VOICE_NAME}.pt', weights_only=True).to('cpu')
|
|
print(f'Loaded voice: {VOICE_NAME}\n')
|
|
|
|
|
|
MODEL_PATH = 'kokoro-v0_19.pth'
|
|
if not os.path.exists(MODEL_PATH):
|
|
print("\n--------------------\n")
|
|
print(f"Error: Model file '{MODEL_PATH}' does not exist.")
|
|
sys.exit(1)
|
|
|
|
MODEL = None
|
|
if 'MODEL' in globals() and MODEL is not None:
|
|
print("\n--------------------\n")
|
|
print("\nModel already loaded.")
|
|
else:
|
|
MODEL = build_model(MODEL_PATH, 'cpu')
|
|
print("\n--------------------\n")
|
|
print("Model loaded.")
|
|
|
|
audio_data, out_ps = generate(MODEL, text, VOICEPACK, lang=VOICE_NAME[0])
|
|
|
|
|
|
audio_data = np.array(audio_data)
|
|
normalized_audio = audio_data / np.max(np.abs(audio_data))
|
|
scaled_audio = np.int16(normalized_audio * 32767)
|
|
|
|
|
|
wav_path = f'./outputs/{text.split(" ")[0]}.wav'
|
|
write(wav_path, 24000, scaled_audio)
|
|
|
|
print("\n--------------------\n")
|
|
print(f'[SYSTEM] WAV file saved at: {wav_path}')
|
|
|
|
|
|
mp3_path = f'./outputs/{text.split(" ")[0]}.mp3'
|
|
audio_segment = AudioSegment.from_file(wav_path, format="wav")
|
|
audio_segment.export(mp3_path, format="mp3")
|
|
|
|
print(f'[SYSTEM] MP3 file saved at: {mp3_path}')
|
|
|