Spaces:

matjarm
/

image-to-text-comparison

Running

App Files Files Community

image-to-text-comparison / app.py

matjarm

init

681078d about 2 months ago

raw

history blame

5.96 kB

	import os
	import random
	import uuid
	import gradio as gr
	import numpy as np
	from PIL import Image
	import torch
	from diffusers import StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
	from typing import Tuple

	# CSS for Gradio Interface
	css = '''
	.gradio-container{max-width: 575px !important}
	h1{text-align:center}
	footer {
	visibility: hidden
	}
	'''

	DESCRIPTION = """
	## Text-to-Image Generator 🚀
	Create stunning images from text prompts using Stable Diffusion XL. Explore high-quality styles and customizable options.
	"""

	# Example Prompts
	examples = [
	"A beautiful sunset over the ocean, ultra-realistic, high resolution",
	"A futuristic cityscape with flying cars, cyberpunk theme, vibrant colors",
	"A cozy cabin in the woods during winter, detailed and realistic",
	"A magical forest with glowing plants and creatures, fantasy art",
	]

	# Model Configurations
	MODEL_OPTIONS = {
	"LIGHTNING V5.0": "SG161222/RealVisXL_V5.0_Lightning",
	"LIGHTNING V4.0": "SG161222/RealVisXL_V4.0_Lightning",
	}

	# Define Styles
	style_list = [
	{
	"name": "Ultra HD",
	"prompt": "hyper-realistic 8K image of {prompt}. ultra-detailed, lifelike, high-resolution, sharp, vibrant colors, photorealistic",
	"negative_prompt": "cartoonish, low resolution, blurry, simplistic, abstract, deformed, ugly",
	},
	{
	"name": "4K Realistic",
	"prompt": "realistic 4K image of {prompt}. sharp, detailed, vibrant colors, photorealistic",
	"negative_prompt": "cartoonish, blurry, low resolution",
	},
	{
	"name": "Minimal Style",
	"prompt": "{prompt}, clean, minimalistic",
	"negative_prompt": "",
	},
	]

	styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
	DEFAULT_STYLE_NAME = "Ultra HD"

	# Define Global Variables
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	MAX_IMAGE_SIZE = 4096
	MAX_SEED = np.iinfo(np.int32).max

	# Load Model Function
	def load_and_prepare_model(model_id):
	pipe = StableDiffusionXLPipeline.from_pretrained(
	model_id,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	).to(device)
	pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
	return pipe

	# Load Models
	models = {key: load_and_prepare_model(value) for key, value in MODEL_OPTIONS.items()}

	# Generate Function
	def generate_image(
	model_choice: str,
	prompt: str,
	negative_prompt: str,
	style_name: str,
	width: int,
	height: int,
	guidance_scale: float,
	num_steps: int,
	num_images: int,
	randomize_seed: bool,
	seed: int,
	):
	# Apply Style
	positive_style, negative_style = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
	styled_prompt = positive_style.replace("{prompt}", prompt)
	styled_negative_prompt = negative_style + (negative_prompt if negative_prompt else "")

	# Randomize Seed if Enabled
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	generator = torch.Generator(device=device).manual_seed(seed)

	# Generate Images
	pipe = models[model_choice]
	images = pipe(
	prompt=[styled_prompt] * num_images,
	negative_prompt=[styled_negative_prompt] * num_images,
	width=width,
	height=height,
	guidance_scale=guidance_scale,
	num_inference_steps=num_steps,
	generator=generator,
	output_type="pil",
	).images

	# Save and Return Images
	image_paths = []
	for img in images:
	unique_name = f"{uuid.uuid4()}.png"
	img.save(unique_name)
	image_paths.append(unique_name)

	return image_paths, seed

	# Gradio Interface
	with gr.Blocks(css=css) as demo:
	gr.Markdown(DESCRIPTION)

	with gr.Row():
	model_choice = gr.Dropdown(
	label="Select Model",
	choices=list(MODEL_OPTIONS.keys()),
	value="LIGHTNING V5.0",
	)

	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Enter your creative prompt here...",
	)

	negative_prompt = gr.Textbox(
	label="Negative Prompt",
	placeholder="Optional: Add details you want to avoid...",
	value="blurry, deformed, low-quality, cartoonish",
	)

	style_name = gr.Radio(
	label="Style",
	choices=list(styles.keys()),
	value=DEFAULT_STYLE_NAME,
	)

	with gr.Accordion("Advanced Options", open=False):
	width = gr.Slider(label="Width", minimum=512, maximum=2048, step=8, value=1024)
	height = gr.Slider(label="Height", minimum=512, maximum=2048, step=8, value=1024)
	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=1,
	maximum=20,
	step=0.5,
	value=7.5,
	)
	num_steps = gr.Slider(
	label="Steps",
	minimum=1,
	maximum=50,
	step=1,
	value=25,
	)
	num_images = gr.Slider(
	label="Number of Images",
	minimum=1,
	maximum=5,
	step=1,
	value=1,
	)
	randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
	seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)

	with gr.Row():
	run_button = gr.Button("Generate Images")
	result_gallery = gr.Gallery(label="Generated Images", show_label=False)

	run_button.click(
	generate_image,
	inputs=[
	model_choice,
	prompt,
	negative_prompt,
	style_name,
	width,
	height,
	guidance_scale,
	num_steps,
	num_images,
	randomize_seed,
	seed,
	],
	outputs=[result_gallery, seed],
	)

	gr.Examples(
	examples=examples,
	inputs=prompt,
	)

	if __name__ == "__main__":
	demo.queue(max_size=50).launch()