sd15 / src /app.py
1aurent's picture
init
d7a0271 unverified
import gradio as gr # pyright: ignore[reportMissingTypeStubs]
import pillow_heif # pyright: ignore[reportMissingTypeStubs]
import spaces # pyright: ignore[reportMissingTypeStubs]
import torch
from huggingface_hub import ( # pyright: ignore[reportMissingTypeStubs]
hf_hub_download, # pyright: ignore[reportUnknownVariableType]
)
from PIL import Image
from refiners.fluxion.utils import manual_seed, no_grad
from refiners.foundationals.latent_diffusion.stable_diffusion_1 import StableDiffusion_1
pillow_heif.register_heif_opener() # pyright: ignore[reportUnknownMemberType]
pillow_heif.register_avif_opener() # pyright: ignore[reportUnknownMemberType]
TITLE = """
# SD1.5 with Refiners
"""
# initialize the model, on the cpu
DEVICE_CPU = torch.device("cpu")
DEVICE_GPU = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DTYPE = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float32
model = StableDiffusion_1(device=DEVICE_CPU, dtype=DTYPE)
model.unet.load_from_safetensors(
tensors_path=hf_hub_download(
repo_id="refiners/sd15.unet",
filename="model.safetensors",
revision="6b01fc610c7465fa79e44c52c4d2eb0ea56821c9",
)
)
model.lda.load_from_safetensors(
tensors_path=hf_hub_download(
repo_id="refiners/sd15.autoencoder",
filename="model.safetensors",
revision="7565efe4812d8e14072111ab326b15eea4c908a5",
)
)
model.clip_text_encoder.load_from_safetensors(
tensors_path=hf_hub_download(
repo_id="refiners/sd15.text_encoder",
filename="model.safetensors",
revision="1b5023ecf0d646b7403f4ad182b6f0ab6b251fef",
)
)
# "move" the model to the gpu, this is handled/intercepted by Zero GPU
model.to(device=DEVICE_GPU, dtype=DTYPE)
model.unet.to(device=DEVICE_GPU, dtype=DTYPE)
model.lda.to(device=DEVICE_GPU, dtype=DTYPE)
model.clip_text_encoder.to(device=DEVICE_GPU, dtype=DTYPE)
model.solver.to(device=DEVICE_GPU, dtype=DTYPE)
model.device = DEVICE_GPU
model.dtype = DTYPE
@spaces.GPU
@no_grad()
def process(
prompt: str,
negative_prompt: str,
condition_scale: float,
num_inference_steps: int,
seed: int,
) -> Image.Image:
assert condition_scale >= 0
assert num_inference_steps > 0
assert seed >= 0
# set the seed
manual_seed(seed)
# compute embeddings
clip_text_embedding = model.compute_clip_text_embedding(
text=prompt,
negative_text=negative_prompt,
)
# init latents
x = model.init_latents(size=(512, 512))
# denoise latents
for step in model.steps:
x = model(
x,
step=step,
clip_text_embedding=clip_text_embedding,
condition_scale=condition_scale,
)
# decode denoised latents
image = model.lda.latents_to_image(x)
return image
with gr.Blocks() as demo:
gr.Markdown(TITLE)
with gr.Column():
with gr.Row():
prompt = gr.Text(
label="Prompt",
show_label=False,
max_lines=1,
placeholder="Enter your prompt",
container=False,
)
run_button = gr.Button(
value="Run",
scale=0,
)
output_image = gr.Image(
label="Output Image",
image_mode="RGB",
type="pil",
)
with gr.Accordion("Advanced Settings", open=True):
negative_prompt = gr.Textbox(
label="Negative Prompt",
placeholder="Enter your (optional) negative prompt",
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=100_000,
value=2,
step=1,
)
condition_scale = gr.Slider(
label="Condition scale",
minimum=0,
maximum=20,
value=7.5,
step=0.05,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=1,
maximum=50,
value=30,
step=1,
)
run_button.click(
fn=process,
inputs=[
prompt,
negative_prompt,
condition_scale,
num_inference_steps,
seed,
],
outputs=output_image,
)
gr.Examples( # pyright: ignore[reportUnknownMemberType]
examples=[
[
"a cute cat, detailed high-quality professional image",
"lowres, bad anatomy, bad hands, cropped, worst quality",
7.5,
30,
2,
],
[
"a cute dog, detailed high-quality professional image",
"lowres, bad anatomy, bad hands, cropped, worst quality",
7.5,
30,
2,
],
],
inputs=[
prompt,
negative_prompt,
condition_scale,
num_inference_steps,
seed,
],
outputs=output_image,
fn=process,
cache_examples=True,
cache_mode="lazy",
run_on_click=False,
)
demo.launch()