Spaces:

prithivMLmods
/

Text-to-Image

Running

App Files Files Community

Text-to-Image / last-commit /doc.txt

prithivMLmods

Rename last-commit/demo.txt to last-commit/doc.txt

0a329ff verified 6 months ago

raw

history blame contribute delete

9.21 kB

	# TEXT 2 IMAGE PLAYGROUND Documentation

	## Overview

	TEXT 2 IMAGE PLAYGROUND is a Gradio-based web application designed to generate images from text prompts using advanced AI models. It offers various model options, customization parameters, and a user-friendly interface for an enhanced user experience.

	## Features

	- Model Selection: Choose from multiple AI models to generate images with different styles and qualities.
	- Custom Prompts: Input text prompts to define the content and style of the generated images.
	- Negative Prompts: Use negative prompts to avoid unwanted elements in the images.
	- Image Customization: Adjust parameters like seed, width, height, guidance scale, and number of inference steps.
	- Random Seed Generation: Enable random seed generation for varied outputs.
	- Image Gallery: View a gallery of predefined images for inspiration.

	## Interface

	### Description

	```markdown
	## TEXT 2 IMAGE PLAYGROUND 🥠
	```

	### CSS

	```css
	.gradio-container {
	max-width: 690px !important;
	}
	h1 {
	text-align: center;
	}
	footer {
	visibility: hidden;
	}
	```

	### JavaScript

	```javascript
	function refresh() {
	const url = new URL(window.location);
	if (url.searchParams.get('__theme') !== 'dark') {
	url.searchParams.set('__theme', 'dark');
	window.location.href = url.href;
	}
	}
	```

	### Examples

	Predefined text prompts for quick testing:

	- 3d image, cute girl, in the style of Pixar...
	- Chocolate dripping from a donut against a yellow background...
	- Illustration of A starry night camp in the mountains...
	- Man in brown leather jacket posing for camera...
	- Commercial photography, giant burger...

	## Model Options

	```python
	MODEL_OPTIONS = {
	"Realism : V4.0_Lightning🔥": "SG161222/RealVisXL_V4.0_Lightning",
	"Detailed/SOTA : Mobius🚀": "Corcelio/mobius",
	"Anime : Cagliostrolab🍺": "cagliostrolab/animagine-xl-3.1"
	}
	```

	## Configuration

	Environment variables and configurations:

	```python
	MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "4096"))
	USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE", "0") == "1"
	ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD", "0") == "1"
	BATCH_SIZE = int(os.getenv("BATCH_SIZE", "1"))
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	```

	## Model Loading and Preparation

	Function to load and prepare models:

	```python
	def load_and_prepare_model(model_id):
	pipe = StableDiffusionXLPipeline.from_pretrained(
	model_id,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	use_safetensors=True,
	add_watermarker=False,
	).to(device)
	pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)

	if USE_TORCH_COMPILE:
	pipe.compile()

	if ENABLE_CPU_OFFLOAD:
	pipe.enable_model_cpu_offload()

	return pipe

	models = {key: load_and_prepare_model(value) for key, value in MODEL_OPTIONS.items()}
	```

	## Image Generation

	Function to generate images based on user inputs:

	```python
	@spaces.GPU(duration=60, enable_queue=True)
	def generate(
	model_choice: str,
	prompt: str,
	negative_prompt: str = "",
	use_negative_prompt: bool = False,
	seed: int = 1,
	width: int = 1024,
	height: int = 1024,
	guidance_scale: float = 3,
	num_inference_steps: int = 25,
	randomize_seed: bool = False,
	use_resolution_binning: bool = True,
	num_images: int = 1,
	progress=gr.Progress(track_tqdm=True),
	):
	global models
	pipe = models[model_choice]

	seed = int(randomize_seed_fn(seed, randomize_seed))
	generator = torch.Generator(device=device).manual_seed(seed)

	options = {
	"prompt": [prompt] * num_images,
	"negative_prompt": [negative_prompt] * num_images if use_negative_prompt else None,
	"width": width,
	"height": height,
	"guidance_scale": guidance_scale,
	"num_inference_steps": num_inference_steps,
	"generator": generator,
	"output_type": "pil",
	}

	if use_resolution_binning:
	options["use_resolution_binning"] = True

	images = []
	for i in range(0, num_images, BATCH_SIZE):
	batch_options = options.copy()
	batch_options["prompt"] = options["prompt"][i:i+BATCH_SIZE]
	if "negative_prompt" in batch_options:
	batch_options["negative_prompt"] = options["negative_prompt"][i:i+BATCH_SIZE]
	images.extend(pipe(**batch_options).images)

	image_paths = [save_image(img) for img in images]
	return image_paths, seed
	```

	## Load Predefined Images

	Function to load predefined images for the gallery:

	```python
	def load_predefined_images():
	predefined_images = [
	"assets/1.png",
	"assets/2.png",
	"assets/3.png",
	"assets/4.png",
	"assets/5.png",
	"assets/6.png",
	"assets/7.png",
	"assets/8.png",
	"assets/9.png",
	"assets/10.png",
	"assets/11.png",
	"assets/12.png",
	]
	return predefined_images
	```

	## Gradio Interface

	Creating the Gradio interface:

	```python
	with gr.Blocks(css=css, theme="bethecloud/storj_theme", js=js_func) as demo:
	gr.Markdown(DESCRIPTIONx)
	with gr.Row():
	prompt = gr.Text(
	label="Prompt",
	show_label=False,
	max_lines=1,
	placeholder="Enter your prompt",
	value="Chocolate dripping from a donut against a yellow background, in the style of brocore, hyper-realistic oil --ar 2:3 --q 2 --s 750 --v 5 --ar 2:3 --q 2 --s 750 --v 5",
	container=False,
	)
	run_button = gr.Button("Run🚀", scale=0)
	result = gr.Gallery(label="Result", columns=1, show_label=False)

	with gr.Row():
	model_choice = gr.Dropdown(
	label="Model Selection ☑️",
	choices=list(MODEL_OPTIONS.keys()),
	value="Realism : V4.0_Lightning🔥"
	)

	with gr.Accordion("Advanced options", open=True):
	num_images = gr.Slider(
	label="Number of Images",
	minimum=1,
	maximum=1,
	step=1,
	value=1,
	)
	with gr.Row():
	with gr.Column(scale=1):
	use_negative_prompt = gr.Checkbox(label="Use negative prompt", value=True)
	negative_prompt = gr.Text(
	label="Negative prompt",
	max_lines=5,
	lines=4,
	placeholder="Enter a negative prompt",
	value="(deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, (mutated hands and fingers:1.4), disconnected limbs, mutation, mutated, ugly, disgusting, blurry, amputation",
	visible=True,
	)
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
	with gr.Row():
	width = gr.Slider(
	label="Width",
	minimum=512,
	maximum=MAX_IMAGE_SIZE,
	step=64,
	value=1024,
	)
	height = gr.Slider(
	label="Height",
	minimum=512,
	maximum=MAX_IMAGE_SIZE,
	step=64,
	value=1024,
	)
	with gr.Row():
	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=0.1,
	maximum=6,
	step=0.1,
	value=3.0,
	)
	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=35,
	step=1,
	value=20,
	)

	gr.Examples(
	examples=examples,
	inputs=prompt,
	cache_examples=False
	)

	use_negative_prompt.change(
	fn=lambda x: gr.update(visible=x),
	inputs=use_negative_prompt,
	outputs=negative_prompt,
	api_name=False,
	)

	gr.on(
	triggers=[
	prompt.submit,
	negative_prompt.submit,
	run_button.click,
	],
	fn=generate,
	inputs=[
	model_choice,
	prompt,
	negative_prompt,
	use_negative_prompt,
	seed,
	width,
	height,
	guidance_scale,
	num_inference_steps,
	randomize_seed,
	num_images
	],
	outputs=[result, seed],
	api_name="run",
	)

	with gr.Column(scale=3):
	gr.Markdown("### Image Gallery")
	predefined_gallery = gr.Gallery(label="Image Gallery", columns=4, show_label=False, value=load

	_predefined_images())

	if __name__ == "__main__":
	demo.queue(max_size=40).launch(show_api=False)
	```

	## Running the Application

	To run the application, simply execute the script. The interface will launch and be accessible via a web browser.

	```sh
	python app.py
	```