File size: 1,895 Bytes
454da09 fc1fe4c 897a296 fc1fe4c 454da09 8a61d62 454da09 fc1fe4c 454da09 fc1fe4c 454da09 8a61d62 454da09 8a61d62 454da09 fc1fe4c 454da09 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import numpy as np
import onnxruntime as ort
import torch
import scipy.io.wavfile as wav
import gradio as gr
from huggingface_hub import hf_hub_download # Add this import
# Download the ONNX model from Hugging Face Hub
model_path = hf_hub_download(
repo_id="onnx-community/Kokoro-82M-ONNX",
filename="model.onnx",
cache_dir="."
)
# Load the ONNX model
ort_session = ort.InferenceSession(model_path)
# Define speaker options (replace with actual speaker IDs or embeddings)
speaker_options = {
"Speaker 1": "spk_1_embedding",
"Speaker 2": "spk_2_embedding",
"Speaker 3": "spk_3_embedding",
}
# Function to generate speech
def generate_speech(text, speaker):
# Preprocess the input text and speaker embedding
input_text = np.array([text], dtype=np.str_)
speaker_embedding = np.array([speaker_options[speaker]], dtype=np.float32)
# Run the ONNX model
ort_inputs = {
"text": input_text,
"speaker_embedding": speaker_embedding,
}
ort_outputs = ort_session.run(None, ort_inputs)
# Postprocess the output (assuming the output is a waveform)
waveform = ort_outputs[0].squeeze()
# Save the waveform as a WAV file
output_file = "output.wav"
wav.write(output_file, 22050, waveform) # Adjust sample rate as needed
return output_file
# Gradio interface
def tts_app(text, speaker):
audio_file = generate_speech(text, speaker)
return audio_file
# Create the Gradio app
iface = gr.Interface(
fn=tts_app,
inputs=[
gr.Textbox(label="Input Text"),
gr.Dropdown(choices=list(speaker_options.keys()), label="Speaker"),
],
outputs=gr.Audio(label="Generated Speech", type="filepath"),
title="Text-to-Speech with Kokoro-82M-ONNX",
description="Generate speech from text using the Kokoro-82M-ONNX model with multiple speaker options.",
)
# Launch the app
iface.launch() |