Spaces:

gsarti
/

local-tts

Running

App Files Files Community

gsarti commited on 9 days ago

Commit

ecc3537

1 Parent(s): 4ecef2b

Add to LFS

Browse files

Files changed (9) hide show

.gitattributes +1 -0
.gitignore +116 -0
README.md +8 -2
app.py +134 -0
kokoro-v0_19.onnx +3 -0
pyproject.toml +14 -0
requirements.txt +4 -0
uv.lock +0 -0
voices.json +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,116 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Gradio specific
+gradio/
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# ruff
+.ruff_cache/
+# celery beat schedule file
+celerybeat-schedule
+# dotenv
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyderworkspace
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: Local Tts
-emoji: 🏢
 colorFrom: purple
 colorTo: green
 sdk: gradio
@@ -8,7 +8,13 @@ sdk_version: 5.12.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 short_description: Local TTS for docs and webpages w/  Kokoro-TTS + Markitdown
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Local TTS
+emoji: 🗣️
 colorFrom: purple
 colorTo: green
 sdk: gradio
 app_file: app.py
 pinned: false
 license: apache-2.0
+pinned: true
 short_description: Local TTS for docs and webpages w/  Kokoro-TTS + Markitdown
+tags:
+  - tts
+  - speech
+  - reader
+  - kokoro
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import os
+import spaces
+import tempfile
+import soundfile as sf
+import requests
+from markdown import Markdown
+from io import StringIO
+import gradio as gr
+from kokoro_onnx import Kokoro
+from markitdown import MarkItDown
+md = MarkItDown()
+kokoro = Kokoro("kokoro-v0_19.onnx", "voices.json")
+voices = {
+    "en-us": ['af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky', 'am_adam', 'am_michael'],
+    "en-gb": ['bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis']
+}
+def unmark_element(element, stream=None):
+    if stream is None:
+        stream = StringIO()
+    if element.text:
+        stream.write(element.text)
+    for sub in element:
+        unmark_element(sub, stream)
+    if element.tail:
+        stream.write(element.tail)
+    return stream.getvalue()
+# patching Markdown
+Markdown.output_formats["plain"] = unmark_element
+__md = Markdown(output_format="plain")
+__md.stripTopLevelTags = False
+def markdown2text(text):
+    return __md.convert(text)
+@spaces.GPU
+def text_to_speech(text, voice, speed, lang):
+    try:
+        # Generate audio
+        samples, sample_rate = kokoro.create(
+            text,
+            voice=voice,
+            speed=float(speed),
+            lang=lang
+        )
+        # Create temporary file
+        temp_dir = tempfile.mkdtemp()
+        temp_path = os.path.join(temp_dir, "output.wav")
+        # Save to temporary file
+        sf.write(temp_path, samples, sample_rate)
+        return temp_path
+    except Exception as e:
+        return f"Error: {str(e)}"
+def create_temp_html_from_url(url: str) -> str:
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        html = response.text
+        temp_dir = tempfile.mkdtemp()
+        temp_path = os.path.join(temp_dir, "output.html")
+        with open(temp_path, "w") as f:
+            f.write(html)
+    except Exception as e:
+        raise requests.HTTPError(f"Error fetching URL: {str(e)}") from e
+    return temp_path
+def process_input(input_type, url_input, file_input, text_input, voice, speed, lang):
+    if input_type in ["URL", "File"]:
+        if input_type == "URL":
+            filepath = create_temp_html_from_url(url_input)
+        else:
+            filepath = file_input
+        print(filepath)
+        markdown = md.convert(filepath).text_content
+        text = markdown2text(markdown)
+    else:
+        markdown = text_input
+        text = text_input
+    audio_path = text_to_speech(text, voice, speed, lang)
+    return markdown, audio_path
+with gr.Blocks() as demo:
+    gr.Markdown(
+        "# Local TTS demo 🗣️ \nProvide a URL or upload a file to convert its content into speech using [Markitdown](https://github.com/microsoft/markitdown) and [Kokoro-ONNX](https://github.com/thewh1teagle/kokoro-onnx)."
+    )
+    with gr.Row():
+        with gr.Column():
+            input_type = gr.Radio(["URL", "File", "Custom Text"], label="Input Type")
+            with gr.Row():
+                speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed")
+                lang = gr.Dropdown(choices=voices.keys(), label="Language", value="en-us")
+                voice = gr.Dropdown(choices=voices[lang.value], label="Voice", value=voices[lang.value][0])
+        with gr.Column():
+            url_input = gr.Textbox(label="Enter URL")
+            file_input = gr.File(label="Upload File", visible=False)
+            text_input = gr.Textbox(label="Text", visible=False, lines=5, placeholder="Enter text here", show_label=False, interactive=True)
+    def toggle_file_input(input_type):
+        return gr.update(visible=(input_type == "File")), gr.update(
+            visible=(input_type == "URL"),
+        ), gr.update(visible=(input_type == "Custom Text"))
+    def update_lang(lang):
+        return gr.Dropdown(choices=voices[lang], label="Voice", value=voices[lang][0])
+    input_type.change(toggle_file_input, input_type, [file_input, url_input, text_input])
+    lang.change(update_lang, lang, [voice])
+    with gr.Accordion("Markdown output", open=False):
+        output_markdown = gr.Markdown("Parsed markdown will appear here", label="Parsed Text", show_copy_button=True)
+    output_audio = gr.Audio(label="Generated Audio")
+    submit_button = gr.Button("Convert")
+    submit_button.click(
+        process_input,
+        inputs=[input_type, url_input, file_input, text_input, voice, speed, lang],
+        outputs=[output_markdown, output_audio],
+    )
+demo.launch()

kokoro-v0_19.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dece567789190ebe987bd245d95c09d5ac86de28ff0c325c2e3faaf3de04442c
+size 325525180

pyproject.toml ADDED Viewed

	@@ -0,0 +1,14 @@

+[project]
+name = "local-tts"
+version = "0.1.0"
+description = "Local TTS for docs and webpages w/  Kokoro-TTS + Markitdown"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "gradio>=5.3.0",
+    "kokoro-onnx>=0.2.7",
+    "markitdown>=0.0.1a3",
+    "ruff>=0.9.1",
+    "soundfile>=0.13.0",
+    "spaces>=0.32.0",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+spaces
+gradio>=5.3.0
+kokoro-onnx>=0.2.7
+markitdown>=0.0.1a3

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

voices.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc24670e8333cb30990726c5d99e991afc14645139d1a9d2d1858d4fba08df05
+size 54060439