gsarti commited on
Commit
ecc3537
·
1 Parent(s): 4ecef2b

Add to LFS

Browse files
Files changed (9) hide show
  1. .gitattributes +1 -0
  2. .gitignore +116 -0
  3. README.md +8 -2
  4. app.py +134 -0
  5. kokoro-v0_19.onnx +3 -0
  6. pyproject.toml +14 -0
  7. requirements.txt +4 -0
  8. uv.lock +0 -0
  9. voices.json +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.json filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ *.egg-info/
24
+ .installed.cfg
25
+ *.egg
26
+
27
+ # Gradio specific
28
+ gradio/
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+
53
+ # Translations
54
+ *.mo
55
+ *.pot
56
+
57
+ # Django stuff:
58
+ *.log
59
+ local_settings.py
60
+ db.sqlite3
61
+
62
+ # Flask stuff:
63
+ instance/
64
+ .webassets-cache
65
+
66
+ # Scrapy stuff:
67
+ .scrapy
68
+
69
+ # Sphinx documentation
70
+ docs/_build/
71
+
72
+ # PyBuilder
73
+ target/
74
+
75
+ # Jupyter Notebook
76
+ .ipynb_checkpoints
77
+
78
+ # IPython
79
+ profile_default/
80
+ ipython_config.py
81
+
82
+ # pyenv
83
+ .python-version
84
+
85
+ # ruff
86
+ .ruff_cache/
87
+
88
+ # celery beat schedule file
89
+ celerybeat-schedule
90
+
91
+ # dotenv
92
+ .env
93
+ .venv
94
+ env/
95
+ venv/
96
+ ENV/
97
+ env.bak/
98
+ venv.bak/
99
+
100
+ # Spyder project settings
101
+ .spyderproject
102
+ .spyderworkspace
103
+
104
+ # Rope project settings
105
+ .ropeproject
106
+
107
+ # mkdocs documentation
108
+ /site
109
+
110
+ # mypy
111
+ .mypy_cache/
112
+ .dmypy.json
113
+ dmypy.json
114
+
115
+ # Pyre type checker
116
+ .pyre/
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Local Tts
3
- emoji: 🏢
4
  colorFrom: purple
5
  colorTo: green
6
  sdk: gradio
@@ -8,7 +8,13 @@ sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
11
  short_description: Local TTS for docs and webpages w/ Kokoro-TTS + Markitdown
 
 
 
 
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Local TTS
3
+ emoji: 🗣️
4
  colorFrom: purple
5
  colorTo: green
6
  sdk: gradio
 
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ pinned: true
12
  short_description: Local TTS for docs and webpages w/ Kokoro-TTS + Markitdown
13
+ tags:
14
+ - tts
15
+ - speech
16
+ - reader
17
+ - kokoro
18
  ---
19
 
20
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import spaces
3
+ import tempfile
4
+ import soundfile as sf
5
+ import requests
6
+ from markdown import Markdown
7
+ from io import StringIO
8
+
9
+ import gradio as gr
10
+ from kokoro_onnx import Kokoro
11
+ from markitdown import MarkItDown
12
+
13
+ md = MarkItDown()
14
+ kokoro = Kokoro("kokoro-v0_19.onnx", "voices.json")
15
+ voices = {
16
+ "en-us": ['af', 'af_bella', 'af_nicole', 'af_sarah', 'af_sky', 'am_adam', 'am_michael'],
17
+ "en-gb": ['bf_emma', 'bf_isabella', 'bm_george', 'bm_lewis']
18
+ }
19
+
20
+ def unmark_element(element, stream=None):
21
+ if stream is None:
22
+ stream = StringIO()
23
+ if element.text:
24
+ stream.write(element.text)
25
+ for sub in element:
26
+ unmark_element(sub, stream)
27
+ if element.tail:
28
+ stream.write(element.tail)
29
+ return stream.getvalue()
30
+
31
+
32
+ # patching Markdown
33
+ Markdown.output_formats["plain"] = unmark_element
34
+ __md = Markdown(output_format="plain")
35
+ __md.stripTopLevelTags = False
36
+
37
+
38
+ def markdown2text(text):
39
+ return __md.convert(text)
40
+
41
+
42
+ @spaces.GPU
43
+ def text_to_speech(text, voice, speed, lang):
44
+ try:
45
+ # Generate audio
46
+ samples, sample_rate = kokoro.create(
47
+ text,
48
+ voice=voice,
49
+ speed=float(speed),
50
+ lang=lang
51
+ )
52
+
53
+ # Create temporary file
54
+ temp_dir = tempfile.mkdtemp()
55
+ temp_path = os.path.join(temp_dir, "output.wav")
56
+
57
+ # Save to temporary file
58
+ sf.write(temp_path, samples, sample_rate)
59
+ return temp_path
60
+ except Exception as e:
61
+ return f"Error: {str(e)}"
62
+
63
+
64
+ def create_temp_html_from_url(url: str) -> str:
65
+ try:
66
+ response = requests.get(url)
67
+ response.raise_for_status()
68
+ html = response.text
69
+ temp_dir = tempfile.mkdtemp()
70
+ temp_path = os.path.join(temp_dir, "output.html")
71
+
72
+ with open(temp_path, "w") as f:
73
+ f.write(html)
74
+ except Exception as e:
75
+ raise requests.HTTPError(f"Error fetching URL: {str(e)}") from e
76
+ return temp_path
77
+
78
+
79
+ def process_input(input_type, url_input, file_input, text_input, voice, speed, lang):
80
+ if input_type in ["URL", "File"]:
81
+ if input_type == "URL":
82
+ filepath = create_temp_html_from_url(url_input)
83
+ else:
84
+ filepath = file_input
85
+ print(filepath)
86
+ markdown = md.convert(filepath).text_content
87
+ text = markdown2text(markdown)
88
+ else:
89
+ markdown = text_input
90
+ text = text_input
91
+ audio_path = text_to_speech(text, voice, speed, lang)
92
+ return markdown, audio_path
93
+
94
+
95
+ with gr.Blocks() as demo:
96
+ gr.Markdown(
97
+ "# Local TTS demo 🗣️ \nProvide a URL or upload a file to convert its content into speech using [Markitdown](https://github.com/microsoft/markitdown) and [Kokoro-ONNX](https://github.com/thewh1teagle/kokoro-onnx)."
98
+ )
99
+
100
+ with gr.Row():
101
+ with gr.Column():
102
+ input_type = gr.Radio(["URL", "File", "Custom Text"], label="Input Type")
103
+ with gr.Row():
104
+ speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1, label="Speed")
105
+ lang = gr.Dropdown(choices=voices.keys(), label="Language", value="en-us")
106
+ voice = gr.Dropdown(choices=voices[lang.value], label="Voice", value=voices[lang.value][0])
107
+ with gr.Column():
108
+ url_input = gr.Textbox(label="Enter URL")
109
+ file_input = gr.File(label="Upload File", visible=False)
110
+ text_input = gr.Textbox(label="Text", visible=False, lines=5, placeholder="Enter text here", show_label=False, interactive=True)
111
+
112
+ def toggle_file_input(input_type):
113
+ return gr.update(visible=(input_type == "File")), gr.update(
114
+ visible=(input_type == "URL"),
115
+ ), gr.update(visible=(input_type == "Custom Text"))
116
+
117
+ def update_lang(lang):
118
+ return gr.Dropdown(choices=voices[lang], label="Voice", value=voices[lang][0])
119
+
120
+ input_type.change(toggle_file_input, input_type, [file_input, url_input, text_input])
121
+ lang.change(update_lang, lang, [voice])
122
+
123
+ with gr.Accordion("Markdown output", open=False):
124
+ output_markdown = gr.Markdown("Parsed markdown will appear here", label="Parsed Text", show_copy_button=True)
125
+ output_audio = gr.Audio(label="Generated Audio")
126
+ submit_button = gr.Button("Convert")
127
+
128
+ submit_button.click(
129
+ process_input,
130
+ inputs=[input_type, url_input, file_input, text_input, voice, speed, lang],
131
+ outputs=[output_markdown, output_audio],
132
+ )
133
+
134
+ demo.launch()
kokoro-v0_19.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dece567789190ebe987bd245d95c09d5ac86de28ff0c325c2e3faaf3de04442c
3
+ size 325525180
pyproject.toml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "local-tts"
3
+ version = "0.1.0"
4
+ description = "Local TTS for docs and webpages w/ Kokoro-TTS + Markitdown"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "gradio>=5.3.0",
9
+ "kokoro-onnx>=0.2.7",
10
+ "markitdown>=0.0.1a3",
11
+ "ruff>=0.9.1",
12
+ "soundfile>=0.13.0",
13
+ "spaces>=0.32.0",
14
+ ]
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ spaces
2
+ gradio>=5.3.0
3
+ kokoro-onnx>=0.2.7
4
+ markitdown>=0.0.1a3
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
voices.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc24670e8333cb30990726c5d99e991afc14645139d1a9d2d1858d4fba08df05
3
+ size 54060439