Spaces:

hivecorp
/

kkr2

Runtime error

App Files Files Community

kkr2 / app.py

hivecorp

Update app.py

897a296 verified 1 day ago

raw

history blame contribute delete

1.9 kB

	import numpy as np
	import onnxruntime as ort
	import torch
	import scipy.io.wavfile as wav
	import gradio as gr
	from huggingface_hub import hf_hub_download # Add this import

	# Download the ONNX model from Hugging Face Hub
	model_path = hf_hub_download(
	repo_id="onnx-community/Kokoro-82M-ONNX",
	filename="model.onnx",
	cache_dir="."
	)

	# Load the ONNX model
	ort_session = ort.InferenceSession(model_path)

	# Define speaker options (replace with actual speaker IDs or embeddings)
	speaker_options = {
	"Speaker 1": "spk_1_embedding",
	"Speaker 2": "spk_2_embedding",
	"Speaker 3": "spk_3_embedding",
	}

	# Function to generate speech
	def generate_speech(text, speaker):
	# Preprocess the input text and speaker embedding
	input_text = np.array([text], dtype=np.str_)
	speaker_embedding = np.array([speaker_options[speaker]], dtype=np.float32)

	# Run the ONNX model
	ort_inputs = {
	"text": input_text,
	"speaker_embedding": speaker_embedding,
	}
	ort_outputs = ort_session.run(None, ort_inputs)

	# Postprocess the output (assuming the output is a waveform)
	waveform = ort_outputs[0].squeeze()

	# Save the waveform as a WAV file
	output_file = "output.wav"
	wav.write(output_file, 22050, waveform) # Adjust sample rate as needed

	return output_file

	# Gradio interface
	def tts_app(text, speaker):
	audio_file = generate_speech(text, speaker)
	return audio_file

	# Create the Gradio app
	iface = gr.Interface(
	fn=tts_app,
	inputs=[
	gr.Textbox(label="Input Text"),
	gr.Dropdown(choices=list(speaker_options.keys()), label="Speaker"),
	],
	outputs=gr.Audio(label="Generated Speech", type="filepath"),
	title="Text-to-Speech with Kokoro-82M-ONNX",
	description="Generate speech from text using the Kokoro-82M-ONNX model with multiple speaker options.",
	)

	# Launch the app
	iface.launch()