ai-audio-books / app.py
Aliaksandr
merge dev into main (#13)
f655f69 unverified
import os
from pathlib import Path
import gradio as gr
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
load_dotenv()
from data import samples_to_split as samples
from src.builder import AudiobookBuilder
from src.config import FILE_SIZE_MAX, MAX_TEXT_LEN, logger
from src.web.utils import create_status_html
from src.web.variables import DESCRIPTION_JS, GRADIO_THEME, STATUS_DISPLAY_HTML, VOICE_UPLOAD_JS
def get_auth_params():
user = os.environ["AUTH_USER"]
password = os.environ["AUTH_PASS"]
return (user, password)
def parse_pdf(file_path):
"""Parse the PDF file and return the text content."""
loader = PyPDFLoader(file_path)
documents = loader.load()
return "\n".join([doc.page_content for doc in documents])
def load_text_from_file(uploaded_file):
temp_file_path = uploaded_file.name
if os.path.getsize(temp_file_path) > FILE_SIZE_MAX * 1024 * 1024:
raise ValueError(f"The uploaded file exceeds the size limit of {FILE_SIZE_MAX} MB.")
if uploaded_file.name.endswith(".txt"):
with open(temp_file_path, "r", encoding="utf-8") as file:
text = file.read()
elif uploaded_file.name.endswith(".pdf"):
text = parse_pdf(temp_file_path)
else:
raise ValueError("Unsupported file type. Please upload a .txt or .pdf file.")
return text
async def audiobook_builder(
text: str,
uploaded_file,
generate_effects: bool,
use_user_voice: bool,
voice_id: str | None = None,
):
builder = AudiobookBuilder()
if uploaded_file is not None:
try:
text = load_text_from_file(uploaded_file=uploaded_file)
except Exception as e:
logger.exception(e)
msg = "Failed to load text from the provided document"
gr.Warning(msg)
yield None, str(e), builder.html_generator.generate_error(msg)
return
if not text:
logger.info(f"No text was passed. can't generate an audiobook")
msg = 'Please provide the text to generate audiobook from'
gr.Warning(msg)
yield None, "", builder.html_generator.generate_error(msg)
return
if (text_len := len(text)) > MAX_TEXT_LEN:
msg = (
f"Input text length of {text_len} characters "
f"exceeded current limit of {MAX_TEXT_LEN} characters. "
"Please input a shorter text."
)
logger.info(msg)
gr.Warning(msg)
yield None, "", builder.html_generator.generate_error(msg)
return
async for stage in builder.run(text, generate_effects, use_user_voice, voice_id):
yield stage
def refresh():
return None, None, None, STATUS_DISPLAY_HTML
with gr.Blocks(js=DESCRIPTION_JS, theme=GRADIO_THEME) as ui:
with gr.Row(variant="panel"):
text_input = gr.Textbox(label="Enter the book text here", lines=15)
file_input = gr.File(
label="Upload a text file or PDF",
file_types=[".txt", ".pdf"],
visible=True,
)
examples = gr.Examples(
examples=[
[samples.GATSBY_1],
[samples.GATSBY_2],
[samples.WONDERFUL_CHRISTMAS_1],
[samples.WONDERFUL_CHRISTMAS_2],
],
inputs=text_input,
label="Sample Inputs",
example_labels=[
"The Great Gatsby, #1",
"The Great Gatsby, #2",
"The Wonderful Christmas in Pumpkin Delight Lane, #1",
"The Wonderful Christmas in Pumpkin Delight Lane, #2",
],
)
error_output = gr.Textbox(label="Error Message", interactive=False, visible=False)
effects_generation_checkbox = gr.Checkbox(
label="Add sound effects",
value=False,
info="Select if you want to add occasional sound effect to the audiobook",
)
use_voice_checkbox = gr.Checkbox(
label="Use my voice",
value=False,
info="Select if you want to use your voice for whole or part of the audiobook (Generations may take longer than usual)",
)
submit_button = gr.Button("Generate the audiobook", variant="primary")
with gr.Row(variant="panel"):
add_voice_btn = gr.Button("Add my voice", variant="primary")
refresh_button = gr.Button("Refresh", variant="secondary")
voice_result = gr.Textbox(visible=False, interactive=False, label="Processed Result")
status_display = gr.HTML(value=STATUS_DISPLAY_HTML, label="Generation Status")
audio_output = gr.Audio(
label='Generated audio. Please wait for the waveform to appear, before hitting "Play"',
type="filepath",
)
# callbacks
add_voice_btn.click(fn=None, inputs=None, outputs=voice_result, js=VOICE_UPLOAD_JS)
submit_button.click(
fn=audiobook_builder,
inputs=[
text_input,
file_input,
effects_generation_checkbox,
use_voice_checkbox,
voice_result,
], # Include the uploaded file as an input
outputs=[
audio_output,
error_output,
status_display,
], # Include the audio output and error message output
)
refresh_button.click(
fn=refresh,
inputs=[],
outputs=[
audio_output,
error_output,
file_input,
], # Reset audio output, error message, and uploaded file
)
text_input.change(
fn=lambda _: gr.update(visible=False), # Hide the error field
inputs=[text_input],
outputs=error_output,
)
file_input.change(
fn=lambda _: gr.update(visible=False), # Hide the error field
inputs=[file_input],
outputs=error_output,
)
refresh_button.click(
fn=lambda _: gr.update(visible=False), # Hide the error field
inputs=[],
outputs=error_output,
)
ui.launch(auth=get_auth_params())