metadata
library_name: keras-hub
Model Overview
Stable Diffusion 3 Medium
Model
Stable Diffusion 3 Medium is a Multimodal Diffusion Transformer (MMDiT) text-to-image model that features greatly improved performance in image quality, typography, complex prompt understanding, and resource-efficiency.
For more technical details, please refer to the Research paper.
Please note: this model is released under the Stability Community License. For Enterprise License visit Stability.ai or contact us for commercial licensing details.
Model Description
- Developed by: Stability AI
- Model type: MMDiT text-to-image generative model
- Model Description: This is a model that can be used to generate images based on text prompts. It is a Multimodal Diffusion Transformer (https://arxiv.org/abs/2403.03206) that uses three fixed, pretrained text encoders (OpenCLIP-ViT/G, CLIP-ViT/L and T5-xxl)
Model card
https://huggingface.co./stabilityai/stable-diffusion-3-medium
Example Usage
# Pretrained Stable Diffusion 3 model.
model = keras_hub.models.StableDiffusion3Backbone.from_preset(
"stable_diffusion_3_medium"
)
# Randomly initialized Stable Diffusion 3 model with custom config.
vae = keras_hub.models.VAEBackbone(...)
clip_l = keras_hub.models.CLIPTextEncoder(...)
clip_g = keras_hub.models.CLIPTextEncoder(...)
model = keras_hub.models.StableDiffusion3Backbone(
mmdit_patch_size=2,
mmdit_num_heads=4,
mmdit_hidden_dim=256,
mmdit_depth=4,
mmdit_position_size=192,
vae=vae,
clip_l=clip_l,
clip_g=clip_g,
)
# Image to image example
image_to_image = keras_hub.models.StableDiffusion3ImageToImage.from_preset(
"stable_diffusion_3_medium", height=512, width=512
)
image_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
}
)
# Generate with batched prompts.
image_to_image.generate(
{
"images": np.ones((2, 512, 512, 3), dtype="float32"),
"prompts": ["cute wallpaper art of a cat", "cute wallpaper art of a dog"],
}
)
# Generate with different `num_steps`, `guidance_scale` and `strength`.
image_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
}
num_steps=50,
guidance_scale=5.0,
strength=0.6,
)
# Generate with `negative_prompts`.
text_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
"negative_prompts": "green color",
}
)
# inpainting example
reference_image = np.ones((1024, 1024, 3), dtype="float32")
reference_mask = np.ones((1024, 1024), dtype="float32")
inpaint = keras_hub.models.StableDiffusion3Inpaint.from_preset(
"stable_diffusion_3_medium", height=512, width=512
)
inpaint.generate(
reference_image,
reference_mask,
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
)
# Generate with batched prompts.
reference_images = np.ones((2, 512, 512, 3), dtype="float32")
reference_mask = np.ones((2, 1024, 1024), dtype="float32")
inpaint.generate(
reference_images,
reference_mask,
["cute wallpaper art of a cat", "cute wallpaper art of a dog"]
)
# Generate with different `num_steps`, `guidance_scale` and `strength`.
inpaint.generate(
reference_image,
reference_mask,
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
num_steps=50,
guidance_scale=5.0,
strength=0.6,
)
# text to image example
text_to_image = keras_hub.models.StableDiffusion3TextToImage.from_preset(
"stable_diffusion_3_medium", height=512, width=512
)
text_to_image.generate(
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
)
# Generate with batched prompts.
text_to_image.generate(
["cute wallpaper art of a cat", "cute wallpaper art of a dog"]
)
# Generate with different `num_steps` and `guidance_scale`.
text_to_image.generate(
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
num_steps=50,
guidance_scale=5.0,
)
# Generate with `negative_prompts`.
text_to_image.generate(
{
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
"negative_prompts": "green color",
}
)
Example Usage with Hugging Face URI
# Pretrained Stable Diffusion 3 model.
model = keras_hub.models.StableDiffusion3Backbone.from_preset(
"hf://keras/stable_diffusion_3_medium"
)
# Randomly initialized Stable Diffusion 3 model with custom config.
vae = keras_hub.models.VAEBackbone(...)
clip_l = keras_hub.models.CLIPTextEncoder(...)
clip_g = keras_hub.models.CLIPTextEncoder(...)
model = keras_hub.models.StableDiffusion3Backbone(
mmdit_patch_size=2,
mmdit_num_heads=4,
mmdit_hidden_dim=256,
mmdit_depth=4,
mmdit_position_size=192,
vae=vae,
clip_l=clip_l,
clip_g=clip_g,
)
# Image to image example
image_to_image = keras_hub.models.StableDiffusion3ImageToImage.from_preset(
"hf://keras/stable_diffusion_3_medium", height=512, width=512
)
image_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
}
)
# Generate with batched prompts.
image_to_image.generate(
{
"images": np.ones((2, 512, 512, 3), dtype="float32"),
"prompts": ["cute wallpaper art of a cat", "cute wallpaper art of a dog"],
}
)
# Generate with different `num_steps`, `guidance_scale` and `strength`.
image_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
}
num_steps=50,
guidance_scale=5.0,
strength=0.6,
)
# Generate with `negative_prompts`.
text_to_image.generate(
{
"images": np.ones((512, 512, 3), dtype="float32"),
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
"negative_prompts": "green color",
}
)
# inpainting example
reference_image = np.ones((1024, 1024, 3), dtype="float32")
reference_mask = np.ones((1024, 1024), dtype="float32")
inpaint = keras_hub.models.StableDiffusion3Inpaint.from_preset(
"hf://keras/stable_diffusion_3_medium", height=512, width=512
)
inpaint.generate(
reference_image,
reference_mask,
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
)
# Generate with batched prompts.
reference_images = np.ones((2, 512, 512, 3), dtype="float32")
reference_mask = np.ones((2, 1024, 1024), dtype="float32")
inpaint.generate(
reference_images,
reference_mask,
["cute wallpaper art of a cat", "cute wallpaper art of a dog"]
)
# Generate with different `num_steps`, `guidance_scale` and `strength`.
inpaint.generate(
reference_image,
reference_mask,
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
num_steps=50,
guidance_scale=5.0,
strength=0.6,
)
# text to image example
text_to_image = keras_hub.models.StableDiffusion3TextToImage.from_preset(
"hf://keras/stable_diffusion_3_medium", height=512, width=512
)
text_to_image.generate(
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"
)
# Generate with batched prompts.
text_to_image.generate(
["cute wallpaper art of a cat", "cute wallpaper art of a dog"]
)
# Generate with different `num_steps` and `guidance_scale`.
text_to_image.generate(
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
num_steps=50,
guidance_scale=5.0,
)
# Generate with `negative_prompts`.
text_to_image.generate(
{
"prompts": "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
"negative_prompts": "green color",
}
)