Spaces:

gsarti
/

local-tts

Running

App Files Files Community

local-tts / app.py

gsarti

Add to LFS

ecc3537 19 days ago

raw

history blame

4.46 kB

	import os
	import spaces
	import tempfile
	import soundfile as sf
	import requests
	from markdown import Markdown
	from io import StringIO

	import gradio as gr
	from kokoro_onnx import Kokoro
	from markitdown import MarkItDown

	md = MarkItDown()
	kokoro = Kokoro("kokoro-v0_19.onnx", "voices.json")
	voices = {
	"en-us": ['af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky', 'am_adam', 'am_michael'],
	"en-gb": ['bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis']
	}

	def unmark_element(element, stream=None):
	if stream is None:
	stream = StringIO()
	if element.text:
	stream.write(element.text)
	for sub in element:
	unmark_element(sub, stream)
	if element.tail:
	stream.write(element.tail)
	return stream.getvalue()


	# patching Markdown
	Markdown.output_formats["plain"] = unmark_element
	__md = Markdown(output_format="plain")
	__md.stripTopLevelTags = False


	def markdown2text(text):
	return __md.convert(text)


	@spaces.GPU
	def text_to_speech(text, voice, speed, lang):
	try:
	# Generate audio
	samples, sample_rate = kokoro.create(
	text,
	voice=voice,
	speed=float(speed),
	lang=lang
	)

	# Create temporary file
	temp_dir = tempfile.mkdtemp()
	temp_path = os.path.join(temp_dir, "output.wav")

	# Save to temporary file
	sf.write(temp_path, samples, sample_rate)
	return temp_path
	except Exception as e:
	return f"Error: {str(e)}"


	def create_temp_html_from_url(url: str) -> str:
	try:
	response = requests.get(url)
	response.raise_for_status()
	html = response.text
	temp_dir = tempfile.mkdtemp()
	temp_path = os.path.join(temp_dir, "output.html")

	with open(temp_path, "w") as f:
	f.write(html)
	except Exception as e:
	raise requests.HTTPError(f"Error fetching URL: {str(e)}") from e
	return temp_path


	def process_input(input_type, url_input, file_input, text_input, voice, speed, lang):
	if input_type in ["URL", "File"]:
	if input_type == "URL":
	filepath = create_temp_html_from_url(url_input)
	else:
	filepath = file_input
	print(filepath)
	markdown = md.convert(filepath).text_content
	text = markdown2text(markdown)
	else:
	markdown = text_input
	text = text_input
	audio_path = text_to_speech(text, voice, speed, lang)
	return markdown, audio_path


	with gr.Blocks() as demo:
	gr.Markdown(
	"# Local TTS demo 🗣️ \nProvide a URL or upload a file to convert its content into speech using [Markitdown](https://github.com/microsoft/markitdown) and [Kokoro-ONNX](https://github.com/thewh1teagle/kokoro-onnx)."
	)

	with gr.Row():
	with gr.Column():
	input_type = gr.Radio(["URL", "File", "Custom Text"], label="Input Type")
	with gr.Row():
	speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed")
	lang = gr.Dropdown(choices=voices.keys(), label="Language", value="en-us")
	voice = gr.Dropdown(choices=voices[lang.value], label="Voice", value=voices[lang.value][0])
	with gr.Column():
	url_input = gr.Textbox(label="Enter URL")
	file_input = gr.File(label="Upload File", visible=False)
	text_input = gr.Textbox(label="Text", visible=False, lines=5, placeholder="Enter text here", show_label=False, interactive=True)

	def toggle_file_input(input_type):
	return gr.update(visible=(input_type == "File")), gr.update(
	visible=(input_type == "URL"),
	), gr.update(visible=(input_type == "Custom Text"))

	def update_lang(lang):
	return gr.Dropdown(choices=voices[lang], label="Voice", value=voices[lang][0])

	input_type.change(toggle_file_input, input_type, [file_input, url_input, text_input])
	lang.change(update_lang, lang, [voice])

	with gr.Accordion("Markdown output", open=False):
	output_markdown = gr.Markdown("Parsed markdown will appear here", label="Parsed Text", show_copy_button=True)
	output_audio = gr.Audio(label="Generated Audio")
	submit_button = gr.Button("Convert")

	submit_button.click(
	process_input,
	inputs=[input_type, url_input, file_input, text_input, voice, speed, lang],
	outputs=[output_markdown, output_audio],
	)

	demo.launch()