Spaces:

JOY-Huang
/

InstantIR

Running on Zero

File size: 10,974 Bytes

import os
print(os.listdir("InstantX/InstantIR"))
import torch
import random
import numpy as np
import gradio as gr
from PIL import Image
from torchvision import transforms

from diffusers import (
    DDPMScheduler,
    StableDiffusionXLPipeline
)
from schedulers.lcm_single_step_scheduler import LCMSingleStepScheduler
from diffusers.utils import convert_unet_state_dict_to_peft
from peft import LoraConfig, set_peft_model_state_dict
from transformers import (
    AutoImageProcessor, AutoModel
)

from module.ip_adapter.utils import init_ip_adapter_in_unet
from module.ip_adapter.resampler import Resampler
from module.aggregator import Aggregator
from pipelines.sdxl_instantir import InstantIRPipeline, LCM_LORA_MODULES, PREVIEWER_LORA_MODULES


transform = transforms.Compose([
    transforms.Resize(1024, interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.CenterCrop(1024),
])

device = "cuda" if torch.cuda.is_available() else "cpu"
sdxl_repo_id = "stabilityai/stable-diffusion-xl-base-1.0"
instantir_repo_id = "InstantX/InstantIR"
dinov2_repo_id = "facebook/dinov2-large"

if torch.cuda.is_available():
    torch_dtype = torch.float16
else:
    torch_dtype = torch.float32

print("Loading vision encoder...")
image_encoder = AutoModel.from_pretrained(dinov2_repo_id, torch_dtype=torch_dtype)
image_processor = AutoImageProcessor.from_pretrained(dinov2_repo_id)

print("Loading SDXL...")
pipe = StableDiffusionXLPipeline.from_pretrained(
    sdxl_repo_id,
    torch_dtype=torch.float16,
)
unet = pipe.unet

print("Initializing Aggregator...")
aggregator = Aggregator.from_unet(unet, load_weights_from_unet=False)

print("Loading LQ-Adapter...")
image_proj_model = Resampler(
    dim=1280,
    depth=4,
    dim_head=64,
    heads=20,
    num_queries=64,
    embedding_dim=image_encoder.config.hidden_size,
    output_dim=unet.config.cross_attention_dim,
    ff_mult=4
)
init_ip_adapter_in_unet(
    unet,
    image_proj_model,
    "InstantX/InstantIR/adapter.pt",
    adapter_tokens=64,
)
print("Initializing InstantIR...")
pipe = InstantIRPipeline(
        pipe.vae, pipe.text_encoder, pipe.text_encoder_2, pipe.tokenizer, pipe.tokenizer_2,
        unet, aggregator, pipe.scheduler, feature_extractor=image_processor, image_encoder=image_encoder,
)

# Add Previewer LoRA.
lora_state_dict, alpha_dict = StableDiffusionXLPipeline.lora_state_dict(
    "InstantX/InstantIR/previewer_lora_weights.bin",
    # weight_name="previewer_lora_weights.bin",

)
unet_state_dict = {
    f'{k.replace("unet.", "")}': v for k, v in lora_state_dict.items() if k.startswith("unet.")
}
unet_state_dict = convert_unet_state_dict_to_peft(unet_state_dict)
lora_state_dict = dict()
for k, v in unet_state_dict.items():
    if "ip" in k:
        k = k.replace("attn2", "attn2.processor")
        lora_state_dict[k] = v
    else:
        lora_state_dict[k] = v
if alpha_dict:
    lora_alpha = next(iter(alpha_dict.values()))
else:
    lora_alpha = 1
print(f"use lora alpha {lora_alpha}")
lora_config = LoraConfig(
    r=64,
    target_modules=PREVIEWER_LORA_MODULES,
    lora_alpha=lora_alpha,
    lora_dropout=0.0,
)

# Add LCM LoRA.
lora_state_dict, alpha_dict = StableDiffusionXLPipeline.lora_state_dict(
    "latent-consistency/lcm-lora-sdxl"
)
unet_state_dict = {
    f'{k.replace("unet.", "")}': v for k, v in lora_state_dict.items() if k.startswith("unet.")
}
unet_state_dict = convert_unet_state_dict_to_peft(unet_state_dict)
if alpha_dict:
    lora_alpha = next(iter(alpha_dict.values()))
else:
    lora_alpha = 1
print(f"use lora alpha {lora_alpha}")
lora_config = LoraConfig(
    r=64,
    target_modules=LCM_LORA_MODULES,
    lora_alpha=lora_alpha,
    lora_dropout=0.0,
)

unet.add_adapter(lora_config, "lcm")
incompatible_keys = set_peft_model_state_dict(unet, unet_state_dict, adapter_name="lcm")
if incompatible_keys is not None:
    # check only for unexpected keys
    unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
    missing_keys = getattr(incompatible_keys, "missing_keys", None)
    if unexpected_keys:
        raise ValueError(
            f"Loading adapter weights from state_dict led to unexpected keys not found in the model: "
            f" {unexpected_keys}. "
        )

unet.disable_adapters()
pipe.scheduler = DDPMScheduler.from_pretrained(
    sdxl_repo_id,
    subfolder="scheduler"
)
lcm_scheduler = LCMSingleStepScheduler.from_config(pipe.scheduler.config)
# Load weights.
print("Loading checkpoint...")
aggregator_state_dict = torch.load(
    "InstantX/InstantIR/aggregator.pt",
    map_location="cpu"
)
aggregator.load_state_dict(aggregator_state_dict, strict=True)
aggregator.to(dtype=torch.float16)
unet.to(dtype=torch.float16)
pipe=pipe.to(device)

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024

def unpack_pipe_out(preview_row, index):
    return preview_row[index][0]

def dynamic_preview_slider(sampling_steps):
    print(sampling_steps)
    return gr.Slider(label="Restoration Previews", value=sampling_steps-1, minimum=0, maximum=sampling_steps-1, step=1)

def dynamic_guidance_slider(sampling_steps):
    return gr.Slider(label="Start Free Rendering", value=sampling_steps, minimum=0, maximum=sampling_steps, step=1)

def show_final_preview(preview_row):
    return preview_row[-1][0]

# @spaces.GPU #[uncomment to use ZeroGPU]
def instantir_restore(lq, prompt="", steps=30, cfg_scale=7.0, guidance_end=1.0, creative_restoration=False, seed=3407):
    if creative_restoration:
        if "lcm" not in pipe.unet.active_adapters():
            pipe.unet.set_adapter('lcm')
    else:
        if "previewer" not in pipe.unet.active_adapters():
            pipe.unet.set_adapter('previewer')

    if isinstance(guidance_end, int):
        guidance_end = guidance_end / steps
    with torch.no_grad(): lq = [transform(lq)]
    generator = torch.Generator(device=device).manual_seed(seed)

    out = pipe(
        prompt=[prompt]*len(lq),
        image=lq,
        ip_adapter_image=[lq],
        num_inference_steps=steps,
        generator=generator,
        controlnet_conditioning_scale=1.0,
        # negative_original_size=(256,256),
        # negative_target_size=(1024,1024),
        negative_prompt=[""]*len(lq),
        guidance_scale=cfg_scale,
        control_guidance_end=guidance_end,
        # control_guidance_start=0.5,
        previewer_scheduler=lcm_scheduler,
        return_dict=False,
        save_preview_row=True,
        # reference_latent = reference_latents,
        # output_type='pt'
    )
    for i, preview_img in enumerate(out[1]):
        preview_img.append(f"preview_{i}")
    return out[0][0], out[1]

examples = [
    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
    "An astronaut riding a green horse",
    "A delicious ceviche cheesecake slice",
]

css="""
#col-container {
    margin: 0 auto;
    max-width: 640px;
}
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown(
    """
    # InstantIR: Blind Image Restoration with Instant Generative Reference.

    ### **Official 🤗 Gradio demo of [InstantIR](https://arxiv.org/abs/2410.06551).**
    ### **InstantIR can not only help you restore your broken image, but also capable of imaginative re-creation following your text prompts. See advance usage for more details!**
    ## Basic usage: revitalize your image
    1. Upload an image you want to restore;
    2. Optionally, tune the `Steps` `CFG Scale` parameters. Typically higher steps lead to better results, but less than 50 is recommended for efficiency;
    3. Click `InstantIR magic!`.
    """)
    with gr.Row():
        lq_img = gr.Image(label="Low-quality image", type="pil")
        with gr.Column(elem_id="col-container"):
            with gr.Row():
                steps = gr.Number(label="Steps", value=20, step=1)
                cfg_scale = gr.Number(label="CFG Scale", value=7.0, step=0.1)
                seed = gr.Number(label="Seed", value=42, step=1)
            # guidance_start = gr.Slider(label="Guidance Start", value=1.0, minimum=0.0, maximum=1.0, step=0.05)
            guidance_end = gr.Slider(label="Start Free Rendering", value=20, minimum=0, maximum=20, step=1)
            prompt = gr.Textbox(
                label="Restoration prompts (Optional)", show_label=False,
                placeholder="Restoration prompts (Optional)", value='',
                # container=False,
            )
            mode = gr.Checkbox(label="Creative Restoration", value=False)
    # with gr.Accordion("Advanced Settings", open=False):
    with gr.Row():
        with gr.Row():
            restore_btn = gr.Button("InstantIR magic!")
            clear_btn = gr.ClearButton()
        index = gr.Slider(label="Restoration Previews", value=19, minimum=0, maximum=19, step=1)
    with gr.Row():
        output = gr.Image(label="InstantIR restored", type="pil")
        preview = gr.Image(label="Preview", type="pil")
    # gr.Examples(
    #     examples = examples,
    #     inputs = [prompt]
    # )
    # gr.on(
    #     triggers=[restore_btn.click, prompt.submit],
    #     fn = infer,
    #     inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
    #     outputs = [result, seed]
    # )
    pipe_out = gr.Gallery(visible=False)
    clear_btn.add([lq_img, output, preview])
    restore_btn.click(instantir_restore, inputs=[lq_img, prompt, steps, cfg_scale, guidance_end, mode, seed], outputs=[output, pipe_out], api_name="InstantIR")
    steps.change(dynamic_guidance_slider, inputs=steps, outputs=guidance_end)
    output.change(dynamic_preview_slider, inputs=steps, outputs=index)
    index.release(unpack_pipe_out, inputs=[pipe_out, index], outputs=preview)
    output.change(show_final_preview, inputs=pipe_out, outputs=preview)
    gr.Markdown(
    """
    ## Advance usage:
    ### Browse restoration variants:
    1. After InstantIR processing, drag the `Restoration Previews` slider to explore other in-progress versions;
    2. If you like one of them, set the `Start Free Rendering` slider to the same value to get a more refined result.
    ### Creative restoration:
    1. Check the `Creative Restoration` checkbox;
    2. Input your text prompts in the `Restoration prompts` textbox;
    3. Set `Start Free Rendering` slider to a medium value (around half of the `steps`) to provide adequate room for InstantIR creation.
    
    ## Examples
    Here are some examplar usage of InstantIR:
    """)
    # examples = gr.Gallery(label="Examples")

    gr.Markdown(
    """
    ## Citation
    If InstantIR is helpful to your work, please cite our paper via:

    ```
    @article{huang2024instantir,
        title={InstantIR: Blind Image Restoration with Instant Generative Reference},
        author={Huang, Jen-Yuan and Wang, Haofan and Wang, Qixun and Bai, Xu and Ai, Hao and Xing, Peng and Huang, Jen-Tse},
        journal={arXiv preprint arXiv:2410.06551},
        year={2024}
    }
    ```
    """)

demo.queue().launch(debug=True)