Spaces:
Runtime error
Runtime error
github-actions[bot]
commited on
Commit
·
c322dc7
1
Parent(s):
5af6319
Sync with https://github.com/mozilla-ai/document-to-podcast
Browse files- Dockerfile +3 -0
- app.py +15 -6
Dockerfile
CHANGED
@@ -8,6 +8,8 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
|
|
8 |
git \
|
9 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
10 |
|
|
|
|
|
11 |
RUN useradd -m -u 1000 user
|
12 |
|
13 |
USER user
|
@@ -18,6 +20,7 @@ ENV HOME=/home/user \
|
|
18 |
WORKDIR $HOME/app
|
19 |
|
20 |
RUN pip3 install https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
|
|
|
21 |
RUN pip3 install document-to-podcast
|
22 |
|
23 |
COPY --chown=user . $HOME/app
|
|
|
8 |
git \
|
9 |
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
10 |
|
11 |
+
RUN apt-get install espeak-ng -y
|
12 |
+
|
13 |
RUN useradd -m -u 1000 user
|
14 |
|
15 |
USER user
|
|
|
20 |
WORKDIR $HOME/app
|
21 |
|
22 |
RUN pip3 install https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu122/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
|
23 |
+
RUN pip3 install phonemizer
|
24 |
RUN pip3 install document-to-podcast
|
25 |
|
26 |
COPY --chown=user . $HOME/app
|
app.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
"""Streamlit app for converting documents to podcasts."""
|
2 |
|
|
|
|
|
3 |
import re
|
4 |
from pathlib import Path
|
5 |
-
import io
|
6 |
|
7 |
import numpy as np
|
8 |
import soundfile as sf
|
@@ -22,13 +23,16 @@ from document_to_podcast.utils import stack_audio_segments
|
|
22 |
@st.cache_resource
|
23 |
def load_text_to_text_model():
|
24 |
return load_llama_cpp_model(
|
25 |
-
model_id="
|
26 |
)
|
27 |
|
28 |
|
29 |
@st.cache_resource
|
30 |
def load_text_to_speech_model():
|
31 |
-
|
|
|
|
|
|
|
32 |
|
33 |
|
34 |
def numpy_to_wav(audio_array: np.ndarray, sample_rate: int) -> io.BytesIO:
|
@@ -115,10 +119,15 @@ if "clean_text" in st.session_state:
|
|
115 |
text_model = load_text_to_text_model()
|
116 |
speech_model = load_text_to_speech_model()
|
117 |
|
|
|
|
|
|
|
|
|
|
|
118 |
st.markdown(
|
119 |
"For this demo, we are using the following models: \n"
|
120 |
-
"- [
|
121 |
-
"
|
122 |
)
|
123 |
st.markdown(
|
124 |
"You can check the [Customization Guide](https://mozilla-ai.github.io/document-to-podcast/customization/)"
|
@@ -187,7 +196,7 @@ if "clean_text" in st.session_state:
|
|
187 |
|
188 |
if st.session_state[gen_button]:
|
189 |
audio_np = stack_audio_segments(
|
190 |
-
st.session_state.audio, speech_model.sample_rate
|
191 |
)
|
192 |
audio_wav = numpy_to_wav(audio_np, speech_model.sample_rate)
|
193 |
if st.download_button(
|
|
|
1 |
"""Streamlit app for converting documents to podcasts."""
|
2 |
|
3 |
+
import io
|
4 |
+
import os
|
5 |
import re
|
6 |
from pathlib import Path
|
|
|
7 |
|
8 |
import numpy as np
|
9 |
import soundfile as sf
|
|
|
23 |
@st.cache_resource
|
24 |
def load_text_to_text_model():
|
25 |
return load_llama_cpp_model(
|
26 |
+
model_id="bartowski/Qwen2.5-3B-Instruct-GGUF/Qwen2.5-3B-Instruct-f16.gguf"
|
27 |
)
|
28 |
|
29 |
|
30 |
@st.cache_resource
|
31 |
def load_text_to_speech_model():
|
32 |
+
if os.environ.get("HF_SPACE") == "TRUE":
|
33 |
+
return load_tts_model("hexgrad/Kokoro-82M/kokoro-v0_19.pth")
|
34 |
+
else:
|
35 |
+
return load_tts_model("OuteAI/OuteTTS-0.2-500M-GGUF/OuteTTS-0.2-500M-FP16.gguf")
|
36 |
|
37 |
|
38 |
def numpy_to_wav(audio_array: np.ndarray, sample_rate: int) -> io.BytesIO:
|
|
|
119 |
text_model = load_text_to_text_model()
|
120 |
speech_model = load_text_to_speech_model()
|
121 |
|
122 |
+
if os.environ.get("HF_SPACE") == "TRUE":
|
123 |
+
tts_link = "- [hexgrad/Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M)"
|
124 |
+
else:
|
125 |
+
tts_link = "- [OuteAI/OuteTTS-0.2-500M](https://huggingface.co/OuteAI/OuteTTS-0.2-500M-GGUF)"
|
126 |
+
|
127 |
st.markdown(
|
128 |
"For this demo, we are using the following models: \n"
|
129 |
+
"- [Qwen2.5-3B-Instruct](https://huggingface.co/bartowski/Qwen2.5-3B-Instruct-GGUF)\n"
|
130 |
+
f"{tts_link}\n"
|
131 |
)
|
132 |
st.markdown(
|
133 |
"You can check the [Customization Guide](https://mozilla-ai.github.io/document-to-podcast/customization/)"
|
|
|
196 |
|
197 |
if st.session_state[gen_button]:
|
198 |
audio_np = stack_audio_segments(
|
199 |
+
st.session_state.audio, speech_model.sample_rate, silence_pad=0.0
|
200 |
)
|
201 |
audio_wav = numpy_to_wav(audio_np, speech_model.sample_rate)
|
202 |
if st.download_button(
|