Spaces:

ales
/

ai-audio-books

Running

File size: 5,959 Bytes

ddb099d
0925810
ddb099d
 
 
c2260d3
ddb099d
 
 
9281119
f655f69
 
 
 
ddb099d
 
7f664fb
 
 
 
3a9a0d8
 
c2fa877
 
 
 
 
0925810
ddb099d
c2fa877
 
ddb099d
c2fa877
f655f69
ddb099d
c2fa877
 
 
 
 
 
 
ddb099d
c2fa877
c2260d3
 
f655f69
8797a8a
 
 
f655f69
 
 
 
 
c2260d3
c2fa877
 
 
 
f655f69
 
 
 
 
 
 
 
 
 
 
c2260d3
e59e248
f655f69
e59e248
 
 
 
f655f69
 
 
 
e59e248
f655f69
 
c2260d3
 
 
f655f69
ddb099d
 
f655f69
ddb099d
e4deef7
c2fa877
9281119
 
f655f69
c2fa877
ddb099d
9281119
 
 
 
 
 
 
 
 
 
e4deef7
 
 
 
9281119
 
 
 
ddb099d
e4deef7
f655f69
e4deef7
 
 
9281119
f655f69
 
 
 
 
 
 
 
9281119
f655f69
9281119
8797a8a
f655f69
 
 
 
 
 
 
 
 
 
ddb099d
f655f69
9281119
 
 
 
f655f69
 
9281119
c2fa877
 
 
f655f69
c2fa877
c2260d3
 
 
 
c2fa877
 
 
 
 
c2260d3
 
1decf45
ddb099d
c2fa877
ddb099d
c2260d3
1decf45
c2260d3
c2fa877
c2260d3
 
1decf45
c2260d3
 
 
ddb099d
3a9a0d8

import os
from pathlib import Path

import gradio as gr
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader

load_dotenv()

from data import samples_to_split as samples
from src.builder import AudiobookBuilder
from src.config import FILE_SIZE_MAX, MAX_TEXT_LEN, logger
from src.web.utils import create_status_html
from src.web.variables import DESCRIPTION_JS, GRADIO_THEME, STATUS_DISPLAY_HTML, VOICE_UPLOAD_JS


def get_auth_params():
    user = os.environ["AUTH_USER"]
    password = os.environ["AUTH_PASS"]
    return (user, password)


def parse_pdf(file_path):
    """Parse the PDF file and return the text content."""
    loader = PyPDFLoader(file_path)
    documents = loader.load()
    return "\n".join([doc.page_content for doc in documents])


def load_text_from_file(uploaded_file):
    temp_file_path = uploaded_file.name

    if os.path.getsize(temp_file_path) > FILE_SIZE_MAX * 1024 * 1024:
        raise ValueError(f"The uploaded file exceeds the size limit of {FILE_SIZE_MAX} MB.")

    if uploaded_file.name.endswith(".txt"):
        with open(temp_file_path, "r", encoding="utf-8") as file:
            text = file.read()
    elif uploaded_file.name.endswith(".pdf"):
        text = parse_pdf(temp_file_path)
    else:
        raise ValueError("Unsupported file type. Please upload a .txt or .pdf file.")

    return text


async def audiobook_builder(
    text: str,
    uploaded_file,
    generate_effects: bool,
    use_user_voice: bool,
    voice_id: str | None = None,
):
    builder = AudiobookBuilder()

    if uploaded_file is not None:
        try:
            text = load_text_from_file(uploaded_file=uploaded_file)
        except Exception as e:
            logger.exception(e)
            msg = "Failed to load text from the provided document"
            gr.Warning(msg)
            yield None, str(e), builder.html_generator.generate_error(msg)
            return

    if not text:
        logger.info(f"No text was passed. can't generate an audiobook")
        msg = 'Please provide the text to generate audiobook from'
        gr.Warning(msg)
        yield None, "", builder.html_generator.generate_error(msg)
        return

    if (text_len := len(text)) > MAX_TEXT_LEN:
        msg = (
            f"Input text length of {text_len} characters "
            f"exceeded current limit of {MAX_TEXT_LEN} characters. "
            "Please input a shorter text."
        )
        logger.info(msg)
        gr.Warning(msg)
        yield None, "", builder.html_generator.generate_error(msg)
        return

    async for stage in builder.run(text, generate_effects, use_user_voice, voice_id):
        yield stage


def refresh():
    return None, None, None, STATUS_DISPLAY_HTML


with gr.Blocks(js=DESCRIPTION_JS, theme=GRADIO_THEME) as ui:
    with gr.Row(variant="panel"):
        text_input = gr.Textbox(label="Enter the book text here", lines=15)
        file_input = gr.File(
            label="Upload a text file or PDF",
            file_types=[".txt", ".pdf"],
            visible=True,
        )

    examples = gr.Examples(
        examples=[
            [samples.GATSBY_1],
            [samples.GATSBY_2],
            [samples.WONDERFUL_CHRISTMAS_1],
            [samples.WONDERFUL_CHRISTMAS_2],
        ],
        inputs=text_input,
        label="Sample Inputs",
        example_labels=[
            "The Great Gatsby, #1",
            "The Great Gatsby, #2",
            "The Wonderful Christmas in Pumpkin Delight Lane, #1",
            "The Wonderful Christmas in Pumpkin Delight Lane, #2",
        ],
    )

    error_output = gr.Textbox(label="Error Message", interactive=False, visible=False)

    effects_generation_checkbox = gr.Checkbox(
        label="Add sound effects",
        value=False,
        info="Select if you want to add occasional sound effect to the audiobook",
    )

    use_voice_checkbox = gr.Checkbox(
        label="Use my voice",
        value=False,
        info="Select if you want to use your voice for whole or part of the audiobook (Generations may take longer than usual)",
    )

    submit_button = gr.Button("Generate the audiobook", variant="primary")

    with gr.Row(variant="panel"):
        add_voice_btn = gr.Button("Add my voice", variant="primary")
        refresh_button = gr.Button("Refresh", variant="secondary")

    voice_result = gr.Textbox(visible=False, interactive=False, label="Processed Result")
    status_display = gr.HTML(value=STATUS_DISPLAY_HTML, label="Generation Status")
    audio_output = gr.Audio(
        label='Generated audio. Please wait for the waveform to appear, before hitting "Play"',
        type="filepath",
    )

    # callbacks

    add_voice_btn.click(fn=None, inputs=None, outputs=voice_result, js=VOICE_UPLOAD_JS)
    submit_button.click(
        fn=audiobook_builder,
        inputs=[
            text_input,
            file_input,
            effects_generation_checkbox,
            use_voice_checkbox,
            voice_result,
        ],  # Include the uploaded file as an input
        outputs=[
            audio_output,
            error_output,
            status_display,
        ],  # Include the audio output and error message output
    )
    refresh_button.click(
        fn=refresh,
        inputs=[],
        outputs=[
            audio_output,
            error_output,
            file_input,
        ],  # Reset audio output, error message, and uploaded file
    )
    text_input.change(
        fn=lambda _: gr.update(visible=False),  # Hide the error field
        inputs=[text_input],
        outputs=error_output,
    )
    file_input.change(
        fn=lambda _: gr.update(visible=False),  # Hide the error field
        inputs=[file_input],
        outputs=error_output,
    )
    refresh_button.click(
        fn=lambda _: gr.update(visible=False),  # Hide the error field
        inputs=[],
        outputs=error_output,
    )

ui.launch(auth=get_auth_params())