File size: 1,856 Bytes
83a55ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from kokoro import generate
from models import build_model
from scipy.io.wavfile import write
from pydub import AudioSegment
import torch
import sys
import numpy as np
import os

text = "Hello world"
if len(sys.argv) > 1:
    text = sys.argv[1]
    print("Got text: ", text)

VOICE_NAME = [
    'af',  # Default voice is a 50-50 mix of Bella & Sarah
    'af_bella', 'af_sarah', 'am_adam', 'am_michael',
    'bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis',
    'af_nicole', 'af_sky',
][3]

VOICEPACK = torch.load(f'voices/{VOICE_NAME}.pt', weights_only=True).to('cpu')
print(f'Loaded voice: {VOICE_NAME}\n')

# Check if the model is already loaded
MODEL_PATH = 'kokoro-v0_19.pth'
if not os.path.exists(MODEL_PATH):
    print("\n--------------------\n")
    print(f"Error: Model file '{MODEL_PATH}' does not exist.")
    sys.exit(1)

MODEL = None
if 'MODEL' in globals() and MODEL is not None:
    print("\n--------------------\n")
    print("\nModel already loaded.")
else:
    MODEL = build_model(MODEL_PATH, 'cpu')
    print("\n--------------------\n")
    print("Model loaded.")

audio_data, out_ps = generate(MODEL, text, VOICEPACK, lang=VOICE_NAME[0])

# Normalize and scale audio data
audio_data = np.array(audio_data)  # Ensure it's a NumPy array
normalized_audio = audio_data / np.max(np.abs(audio_data))
scaled_audio = np.int16(normalized_audio * 32767)

# Save as WAV
wav_path = f'./outputs/{text.split(" ")[0]}.wav'
write(wav_path, 24000, scaled_audio)

print("\n--------------------\n")
print(f'[SYSTEM] WAV file saved at: {wav_path}')

# Optional: Convert to MP3 using pydub
mp3_path = f'./outputs/{text.split(" ")[0]}.mp3'
audio_segment = AudioSegment.from_file(wav_path, format="wav")
audio_segment.export(mp3_path, format="mp3")

print(f'[SYSTEM] MP3 file saved at: {mp3_path}')