Spaces:

matjarm
/

image-to-text-comparison

Running

File size: 5,955 Bytes

681078d

import os
import random
import uuid
import gradio as gr
import numpy as np
from PIL import Image
import torch
from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
from typing import Tuple

# CSS for Gradio Interface
css = '''
.gradio-container{max-width: 575px !important}
h1{text-align:center}
footer {
    visibility: hidden
}
'''

DESCRIPTION = """
## Text-to-Image Generator 🚀
Create stunning images from text prompts using Stable Diffusion XL. Explore high-quality styles and customizable options.
"""

# Example Prompts
examples = [
    "A beautiful sunset over the ocean, ultra-realistic, high resolution",
    "A futuristic cityscape with flying cars, cyberpunk theme, vibrant colors",
    "A cozy cabin in the woods during winter, detailed and realistic",
    "A magical forest with glowing plants and creatures, fantasy art",
]

# Model Configurations
MODEL_OPTIONS = {
    "LIGHTNING V5.0": "SG161222/RealVisXL_V5.0_Lightning",
    "LIGHTNING V4.0": "SG161222/RealVisXL_V4.0_Lightning",
}

# Define Styles
style_list = [
    {
        "name": "Ultra HD",
        "prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
        "negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
    },
    {
        "name": "4K Realistic",
        "prompt": "realistic 4K image of {prompt}. sharp, detailed, vibrant colors, photorealistic",
        "negative_prompt": "cartoonish, blurry, low resolution",
    },
    {
        "name": "Minimal Style",
        "prompt": "{prompt}, clean, minimalistic",
        "negative_prompt": "",
    },
]

styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
DEFAULT_STYLE_NAME = "Ultra HD"

# Define Global Variables
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
MAX_IMAGE_SIZE = 4096
MAX_SEED = np.iinfo(np.int32).max

# Load Model Function
def load_and_prepare_model(model_id):
    pipe = StableDiffusionXLPipeline.from_pretrained(
        model_id,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    ).to(device)
    pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
    return pipe

# Load Models
models = {key: load_and_prepare_model(value) for key, value in MODEL_OPTIONS.items()}

# Generate Function
def generate_image(
    model_choice: str,
    prompt: str,
    negative_prompt: str,
    style_name: str,
    width: int,
    height: int,
    guidance_scale: float,
    num_steps: int,
    num_images: int,
    randomize_seed: bool,
    seed: int,
):
    # Apply Style
    positive_style, negative_style = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
    styled_prompt = positive_style.replace("{prompt}", prompt)
    styled_negative_prompt = negative_style + (negative_prompt if negative_prompt else "")

    # Randomize Seed if Enabled
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator(device=device).manual_seed(seed)

    # Generate Images
    pipe = models[model_choice]
    images = pipe(
        prompt=[styled_prompt] * num_images,
        negative_prompt=[styled_negative_prompt] * num_images,
        width=width,
        height=height,
        guidance_scale=guidance_scale,
        num_inference_steps=num_steps,
        generator=generator,
        output_type="pil",
    ).images

    # Save and Return Images
    image_paths = []
    for img in images:
        unique_name = f"{uuid.uuid4()}.png"
        img.save(unique_name)
        image_paths.append(unique_name)

    return image_paths, seed

# Gradio Interface
with gr.Blocks(css=css) as demo:
    gr.Markdown(DESCRIPTION)

    with gr.Row():
        model_choice = gr.Dropdown(
            label="Select Model",
            choices=list(MODEL_OPTIONS.keys()),
            value="LIGHTNING V5.0",
        )

    prompt = gr.Textbox(
        label="Prompt",
        placeholder="Enter your creative prompt here...",
    )
    
    negative_prompt = gr.Textbox(
        label="Negative Prompt",
        placeholder="Optional: Add details you want to avoid...",
        value="blurry, deformed, low-quality, cartoonish",
    )
    
    style_name = gr.Radio(
        label="Style",
        choices=list(styles.keys()),
        value=DEFAULT_STYLE_NAME,
    )

    with gr.Accordion("Advanced Options", open=False):
        width = gr.Slider(label="Width", minimum=512, maximum=2048, step=8, value=1024)
        height = gr.Slider(label="Height", minimum=512, maximum=2048, step=8, value=1024)
        guidance_scale = gr.Slider(
            label="Guidance Scale",
            minimum=1,
            maximum=20,
            step=0.5,
            value=7.5,
        )
        num_steps = gr.Slider(
            label="Steps",
            minimum=1,
            maximum=50,
            step=1,
            value=25,
        )
        num_images = gr.Slider(
            label="Number of Images",
            minimum=1,
            maximum=5,
            step=1,
            value=1,
        )
        randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
        seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)

    with gr.Row():
        run_button = gr.Button("Generate Images")
        result_gallery = gr.Gallery(label="Generated Images", show_label=False)

    run_button.click(
        generate_image,
        inputs=[
            model_choice,
            prompt,
            negative_prompt,
            style_name,
            width,
            height,
            guidance_scale,
            num_steps,
            num_images,
            randomize_seed,
            seed,
        ],
        outputs=[result_gallery, seed],
    )

    gr.Examples(
        examples=examples,
        inputs=prompt,
    )

if __name__ == "__main__":
    demo.queue(max_size=50).launch()