Spaces:

JOY-Huang
/

InstantIR

Running on Zero

App Files Files Community

JOY-Huang commited on Oct 20, 2024

Commit

028ae97

1 Parent(s): fe2ea78

Update space

Browse files

Files changed (1) hide show

app.py +274 -111

app.py CHANGED Viewed

@@ -1,43 +1,209 @@
-import gradio as gr
-import numpy as np
-import random
-#import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
 import torch
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/sdxl-turbo" #Replace to the model you would like to use
 if torch.cuda.is_available():
     torch_dtype = torch.float16
 else:
     torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
-#@spaces.GPU #[uncomment to use ZeroGPU]
-def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, progress=gr.Progress(track_tqdm=True)):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt = prompt,
-        negative_prompt = negative_prompt,
-        guidance_scale = guidance_scale,
-        num_inference_steps = num_inference_steps,
-        width = width,
-        height = height,
-        generator = generator
-    ).images[0]
-    return image, seed
 examples = [
     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
@@ -53,90 +219,87 @@ css="""
 """
 with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""
-        # Text-to-Image Gradio Template
-        """)
-        with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0)
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024, #Replace with defaults that work for your model
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024, #Replace with defaults that work for your model
-                )
             with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0, #Replace with defaults that work for your model
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=2, #Replace with defaults that work for your model
-                )
-        gr.Examples(
-            examples = examples,
-            inputs = [prompt]
-        )
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn = infer,
-        inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs = [result, seed]
-    )
-demo.queue().launch()

 import torch
+import random
+import numpy as np
+import gradio as gr
+from PIL import Image
+from torchvision import transforms
+from diffusers import (
+    DDPMScheduler,
+    StableDiffusionXLPipeline
+)
+from schedulers.lcm_single_step_scheduler import LCMSingleStepScheduler
+from diffusers.utils import convert_unet_state_dict_to_peft
+from peft import LoraConfig, set_peft_model_state_dict
+from transformers import (
+    AutoImageProcessor, AutoModel
+)
+from module.ip_adapter.utils import init_ip_adapter_in_unet
+from module.ip_adapter.resampler import Resampler
+from module.aggregator import Aggregator
+from pipelines.sdxl_instantir import InstantIRPipeline, LCM_LORA_MODULES, PREVIEWER_LORA_MODULES
+transform = transforms.Compose([
+    transforms.Resize(1024, interpolation=transforms.InterpolationMode.BILINEAR),
+    transforms.CenterCrop(1024),
+])
 device = "cuda" if torch.cuda.is_available() else "cpu"
+sdxl_repo_id = "stabilityai/stable-diffusion-xl-base-1.0"
+instantir_repo_id = "instantx/instantir"
+dinov2_repo_id = "facebook/dinov2-large"
 if torch.cuda.is_available():
     torch_dtype = torch.float16
 else:
     torch_dtype = torch.float32
+print("Loading vision encoder...")
+image_encoder = AutoModel.from_pretrained(dinov2_repo_id, torch_dtype=torch_dtype)
+image_processor = AutoImageProcessor.from_pretrained(dinov2_repo_id)
+print("Loading SDXL...")
+pipe = StableDiffusionXLPipeline.from_pretrained(
+    sdxl_repo_id,
+    torch_dtype=torch.float16,
+)
+unet = pipe.unet
+print("Initializing Aggregator...")
+aggregator = Aggregator.from_unet(unet, load_weights_from_unet=False)
+print("Loading LQ-Adapter...")
+image_proj_model = Resampler(
+    dim=1280,
+    depth=4,
+    dim_head=64,
+    heads=20,
+    num_queries=64,
+    embedding_dim=image_encoder.config.hidden_size,
+    output_dim=unet.config.cross_attention_dim,
+    ff_mult=4
+)
+init_ip_adapter_in_unet(
+    unet,
+    image_proj_model,
+    "InstantX/InstantIR/adapter.pt",
+    adapter_tokens=64,
+)
+print("Initializing InstantIR...")
+pipe = InstantIRPipeline(
+        pipe.vae, pipe.text_encoder, pipe.text_encoder_2, pipe.tokenizer, pipe.tokenizer_2,
+        unet, aggregator, pipe.scheduler, feature_extractor=image_processor, image_encoder=image_encoder,
+)
+# Add Previewer LoRA.
+lora_state_dict, alpha_dict = StableDiffusionXLPipeline.lora_state_dict(
+    "InstantX/InstantIR/previewer_lora_weights.bin",
+    # weight_name="previewer_lora_weights.bin",
+)
+unet_state_dict = {
+    f'{k.replace("unet.", "")}': v for k, v in lora_state_dict.items() if k.startswith("unet.")
+}
+unet_state_dict = convert_unet_state_dict_to_peft(unet_state_dict)
+lora_state_dict = dict()
+for k, v in unet_state_dict.items():
+    if "ip" in k:
+        k = k.replace("attn2", "attn2.processor")
+        lora_state_dict[k] = v
+    else:
+        lora_state_dict[k] = v
+if alpha_dict:
+    lora_alpha = next(iter(alpha_dict.values()))
+else:
+    lora_alpha = 1
+print(f"use lora alpha {lora_alpha}")
+lora_config = LoraConfig(
+    r=64,
+    target_modules=PREVIEWER_LORA_MODULES,
+    lora_alpha=lora_alpha,
+    lora_dropout=0.0,
+)
+# Add LCM LoRA.
+lora_state_dict, alpha_dict = StableDiffusionXLPipeline.lora_state_dict(
+    "latent-consistency/lcm-lora-sdxl"
+)
+unet_state_dict = {
+    f'{k.replace("unet.", "")}': v for k, v in lora_state_dict.items() if k.startswith("unet.")
+}
+unet_state_dict = convert_unet_state_dict_to_peft(unet_state_dict)
+if alpha_dict:
+    lora_alpha = next(iter(alpha_dict.values()))
+else:
+    lora_alpha = 1
+print(f"use lora alpha {lora_alpha}")
+lora_config = LoraConfig(
+    r=64,
+    target_modules=LCM_LORA_MODULES,
+    lora_alpha=lora_alpha,
+    lora_dropout=0.0,
+)
+unet.add_adapter(lora_config, "lcm")
+incompatible_keys = set_peft_model_state_dict(unet, unet_state_dict, adapter_name="lcm")
+if incompatible_keys is not None:
+    # check only for unexpected keys
+    unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
+    missing_keys = getattr(incompatible_keys, "missing_keys", None)
+    if unexpected_keys:
+        raise ValueError(
+            f"Loading adapter weights from state_dict led to unexpected keys not found in the model: "
+            f" {unexpected_keys}. "
+        )
+unet.disable_adapters()
+pipe.scheduler = DDPMScheduler.from_pretrained(
+    sdxl_repo_id,
+    subfolder="scheduler"
+)
+lcm_scheduler = LCMSingleStepScheduler.from_config(pipe.scheduler.config)
+# Load weights.
+print("Loading checkpoint...")
+aggregator_state_dict = torch.load(
+    "InstantX/InstantIR/aggregator.pt",
+    map_location="cpu"
+)
+aggregator.load_state_dict(aggregator_state_dict, strict=True)
+aggregator.to(dtype=torch.float16)
+unet.to(dtype=torch.float16)
+pipe=pipe.to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
+def unpack_pipe_out(preview_row, index):
+    return preview_row[index][0]
+def dynamic_preview_slider(sampling_steps):
+    print(sampling_steps)
+    return gr.Slider(label="Restoration Previews", value=sampling_steps-1, minimum=0, maximum=sampling_steps-1, step=1)
+def dynamic_guidance_slider(sampling_steps):
+    return gr.Slider(label="Start Free Rendering", value=sampling_steps, minimum=0, maximum=sampling_steps, step=1)
+def show_final_preview(preview_row):
+    return preview_row[-1][0]
+# @spaces.GPU #[uncomment to use ZeroGPU]
+def instantir_restore(lq, prompt="", steps=30, cfg_scale=7.0, guidance_end=1.0, creative_restoration=False, seed=3407):
+    if creative_restoration:
+        if "lcm" not in pipe.unet.active_adapters():
+            pipe.unet.set_adapter('lcm')
+    else:
+        if "previewer" not in pipe.unet.active_adapters():
+            pipe.unet.set_adapter('previewer')
+    if isinstance(guidance_end, int):
+        guidance_end = guidance_end / steps
+    with torch.no_grad(): lq = [transform(lq)]
+    generator = torch.Generator(device=device).manual_seed(seed)
+    out = pipe(
+        prompt=[prompt]*len(lq),
+        image=lq,
+        ip_adapter_image=[lq],
+        num_inference_steps=steps,
+        generator=generator,
+        controlnet_conditioning_scale=1.0,
+        # negative_original_size=(256,256),
+        # negative_target_size=(1024,1024),
+        negative_prompt=[""]*len(lq),
+        guidance_scale=cfg_scale,
+        control_guidance_end=guidance_end,
+        # control_guidance_start=0.5,
+        previewer_scheduler=lcm_scheduler,
+        return_dict=False,
+        save_preview_row=True,
+        # reference_latent = reference_latents,
+        # output_type='pt'
+    )
+    for i, preview_img in enumerate(out[1]):
+        preview_img.append(f"preview_{i}")
+    return out[0][0], out[1]
 examples = [
     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
 """
 with gr.Blocks(css=css) as demo:
+    gr.Markdown(
+    """
+    # InstantIR: Blind Image Restoration with Instant Generative Reference.
+    ### **Official 🤗 Gradio demo of [InstantIR](https://arxiv.org/abs/2410.06551).**
+    ### **InstantIR can not only help you restore your broken image, but also capable of imaginative re-creation following your text prompts. See advance usage for more details!**
+    ## Basic usage: revitalize your image
+    1. Upload an image you want to restore;
+    2. Optionally, tune the `Steps` `CFG Scale` parameters. Typically higher steps lead to better results, but less than 50 is recommended for efficiency;
+    3. Click `InstantIR magic!`.
+    """)
+    with gr.Row():
+        lq_img = gr.Image(label="Low-quality image", type="pil")
+        with gr.Column(elem_id="col-container"):
             with gr.Row():
+                steps = gr.Number(label="Steps", value=20, step=1)
+                cfg_scale = gr.Number(label="CFG Scale", value=7.0, step=0.1)
+                seed = gr.Number(label="Seed", value=42, step=1)
+            # guidance_start = gr.Slider(label="Guidance Start", value=1.0, minimum=0.0, maximum=1.0, step=0.05)
+            guidance_end = gr.Slider(label="Start Free Rendering", value=20, minimum=0, maximum=20, step=1)
+            prompt = gr.Textbox(
+                label="Restoration prompts (Optional)", show_label=False,
+                placeholder="Restoration prompts (Optional)", value='',
+                # container=False,
+            )
+            mode = gr.Checkbox(label="Creative Restoration", value=False)
+    # with gr.Accordion("Advanced Settings", open=False):
+    with gr.Row():
+        with gr.Row():
+            restore_btn = gr.Button("InstantIR magic!")
+            clear_btn = gr.ClearButton()
+        index = gr.Slider(label="Restoration Previews", value=19, minimum=0, maximum=19, step=1)
+    with gr.Row():
+        output = gr.Image(label="InstantIR restored", type="pil")
+        preview = gr.Image(label="Preview", type="pil")
+    # gr.Examples(
+    #     examples = examples,
+    #     inputs = [prompt]
+    # )
+    # gr.on(
+    #     triggers=[restore_btn.click, prompt.submit],
+    #     fn = infer,
+    #     inputs = [prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
+    #     outputs = [result, seed]
+    # )
+    pipe_out = gr.Gallery(visible=False)
+    clear_btn.add([lq_img, output, preview])
+    restore_btn.click(instantir_restore, inputs=[lq_img, prompt, steps, cfg_scale, guidance_end, mode, seed], outputs=[output, pipe_out], api_name="InstantIR")
+    steps.change(dynamic_guidance_slider, inputs=steps, outputs=guidance_end)
+    output.change(dynamic_preview_slider, inputs=steps, outputs=index)
+    index.release(unpack_pipe_out, inputs=[pipe_out, index], outputs=preview)
+    output.change(show_final_preview, inputs=pipe_out, outputs=preview)
+    gr.Markdown(
+    """
+    ## Advance usage:
+    ### Browse restoration variants:
+    1. After InstantIR processing, drag the `Restoration Previews` slider to explore other in-progress versions;
+    2. If you like one of them, set the `Start Free Rendering` slider to the same value to get a more refined result.
+    ### Creative restoration:
+    1. Check the `Creative Restoration` checkbox;
+    2. Input your text prompts in the `Restoration prompts` textbox;
+    3. Set `Start Free Rendering` slider to a medium value (around half of the `steps`) to provide adequate room for InstantIR creation.
+    ## Examples
+    Here are some examplar usage of InstantIR:
+    """)
+    # examples = gr.Gallery(label="Examples")
+    gr.Markdown(
+    """
+    ## Citation
+    If InstantIR is helpful to your work, please cite our paper via:
+    ```
+    @article{huang2024instantir,
+        title={InstantIR: Blind Image Restoration with Instant Generative Reference},
+        author={Huang, Jen-Yuan and Wang, Haofan and Wang, Qixun and Bai, Xu and Ai, Hao and Xing, Peng and Huang, Jen-Tse},
+        journal={arXiv preprint arXiv:2410.06551},
+        year={2024}
+    }
+    ```
+    """)
+demo.queue().launch(debug=True)