ginid

Running on Zero

File size: 10,106 Bytes

import spaces
import time
import os

import gradio as gr
import torch
from einops import rearrange
from PIL import Image

from flux.cli import SamplingOptions
from flux.sampling import denoise, get_noise, get_schedule, prepare, unpack
from flux.util import load_ae, load_clip, load_flow_model, load_t5
from pulid.pipeline_flux import PuLIDPipeline
from pulid.utils import resize_numpy_image_long


def get_models(name: str, device: torch.device, offload: bool):
    t5 = load_t5(device, max_length=128)
    clip = load_clip(device)
    model = load_flow_model(name, device="cpu" if offload else device)
    model.eval()
    ae = load_ae(name, device="cpu" if offload else device)
    return model, ae, t5, clip


class FluxGenerator:
    def __init__(self):
        self.device = torch.device('cuda')
        self.offload = False
        self.model_name = 'flux-dev'
        self.model, self.ae, self.t5, self.clip = get_models(
            self.model_name,
            device=self.device,
            offload=self.offload,
        )
        self.pulid_model = PuLIDPipeline(self.model, 'cuda', weight_dtype=torch.bfloat16)
        self.pulid_model.load_pretrain()


flux_generator = FluxGenerator()


@spaces.GPU
@torch.inference_mode()
def generate_image(
        width,
        height,
        num_steps,
        start_step,
        guidance,
        seed,
        prompt,
        id_image=None,
        id_weight=1.0,
        neg_prompt="",
        true_cfg=1.0,
        timestep_to_start_cfg=1,
        max_sequence_length=128,
):
    flux_generator.t5.max_length = max_sequence_length

    seed = int(seed)
    if seed == -1:
        seed = None

    opts = SamplingOptions(
        prompt=prompt,
        width=width,
        height=height,
        num_steps=num_steps,
        guidance=guidance,
        seed=seed,
    )

    if opts.seed is None:
        opts.seed = torch.Generator(device="cpu").seed()

    t0 = time.perf_counter()

    use_true_cfg = abs(true_cfg - 1.0) > 1e-2

    if id_image is not None:
        id_image = resize_numpy_image_long(id_image, 1024)
        id_embeddings, uncond_id_embeddings = flux_generator.pulid_model.get_id_embedding(id_image, cal_uncond=use_true_cfg)
    else:
        id_embeddings = None
        uncond_id_embeddings = None

    # prepare input
    x = get_noise(
        1,
        opts.height,
        opts.width,
        device=flux_generator.device,
        dtype=torch.bfloat16,
        seed=opts.seed,
    )
    timesteps = get_schedule(
        opts.num_steps,
        x.shape[-1] * x.shape[-2] // 4,
        shift=True,
    )

    if flux_generator.offload:
        flux_generator.t5, flux_generator.clip = flux_generator.t5.to(flux_generator.device), flux_generator.clip.to(flux_generator.device)
    inp = prepare(t5=flux_generator.t5, clip=flux_generator.clip, img=x, prompt=opts.prompt)
    inp_neg = prepare(t5=flux_generator.t5, clip=flux_generator.clip, img=x, prompt=neg_prompt) if use_true_cfg else None

    # offload TEs to CPU, load model to gpu
    if flux_generator.offload:
        flux_generator.t5, flux_generator.clip = flux_generator.t5.cpu(), flux_generator.clip.cpu()
        torch.cuda.empty_cache()
        flux_generator.model = flux_generator.model.to(flux_generator.device)

    # denoise initial noise
    x = denoise(
        flux_generator.model, **inp, timesteps=timesteps, guidance=opts.guidance, id=id_embeddings, id_weight=id_weight,
        start_step=start_step, uncond_id=uncond_id_embeddings, true_cfg=true_cfg,
        timestep_to_start_cfg=timestep_to_start_cfg,
        neg_txt=inp_neg["txt"] if use_true_cfg else None,
        neg_txt_ids=inp_neg["txt_ids"] if use_true_cfg else None,
        neg_vec=inp_neg["vec"] if use_true_cfg else None,
    )

    # offload model, load autoencoder to gpu
    if flux_generator.offload:
        flux_generator.model.cpu()
        torch.cuda.empty_cache()
        flux_generator.ae.decoder.to(x.device)

    # decode latents to pixel space
    x = unpack(x.float(), opts.height, opts.width)
    with torch.autocast(device_type=flux_generator.device.type, dtype=torch.bfloat16):
        x = flux_generator.ae.decode(x)

    if flux_generator.offload:
        flux_generator.ae.decoder.cpu()
        torch.cuda.empty_cache()

    t1 = time.perf_counter()

    # bring into PIL format
    x = x.clamp(-1, 1)
    x = rearrange(x[0], "c h w -> h w c")

    img = Image.fromarray((127.5 * (x + 1.0)).cpu().byte().numpy())
    return img, str(opts.seed), flux_generator.pulid_model.debug_img_list


css = """
footer {
    visibility: hidden;
}
"""

def create_demo(args, model_name: str, device: str = "cuda" if torch.cuda.is_available() else "cpu",
                offload: bool = False):
    
    with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css) as demo:
        gr.Markdown("### 'AI 포토 지니'이용 안내: 1) '스타일'중 하나를 선택. 2) 웹캠을 클릭하고 얼굴이 보이면 카메라 버튼 클릭. 3) '생성' 버튼을 클릭하고 기다리면 됩니다.")
        
        with gr.Row():
            with gr.Column():
                prompt = gr.Textbox(label="프롬프트", value="초상화, 색감, 영화적")
                id_image = gr.Image(label="ID 이미지", sources=["webcam", "upload"], type="numpy")
                generate_btn = gr.Button("생성")
                
            with gr.Column():
                output_image = gr.Image(label="생성된 이미지")

        with gr.Row():
            with gr.Column():
                gr.Markdown("### 스타일")
                
                all_examples = [
#우주 여행I            ['I am an astronaut on a spacewalk. There is no helmet, and my face is visible. The background is Earth & starship as seen from space shuttle.', 'example_inputs/1.webp'],
#우주 여행II           ['I am an astronaut on a spacewalk. There is no helmet, and my face is visible. The background is Earth & starship as seen from space shuttle.I am holding sign with glowing green text "I Love Mom"', 'example_inputs/2.webp'],
#내가 어른이 되면       ['profile photo of a 40-year-old Adult Looking straight ahead, wear suite', 'example_inputs/3.webp'],
#아이언맨 변신          ['I am an "IRON MAN"', 'example_inputs/4.webp'],
#화성 탐험             ['I am wearing a spacesuit and have become an astronaut walking on Mars. I'm not wearing a helmet. I'm looking straight ahead. The background is a desolate area of Mars, and a space rover and a space station can be seen.', 'example_inputs/5.webp'],                    
#스파이더맨             ['I am an "spider MAN"', 'example_inputs/6.webp'],                    
#우주선 조종            ['I am wearing a spacesuit and have become an astronaut. I am piloting a spacecraft. Through the spacecraft's window, I can see outer space.', 'example_inputs/7.webp'],
#만화 주인공            ['portrait, pixar style', 'example_inputs/8.webp'],
#원더우먼               ['I am an "wonder woman"', 'example_inputs/9.webp'],
#카우보이               ['Cowboy, american comics style', 'example_inputs/10.webp'],


                ]

                example_images = [example[1] for example in all_examples]
                example_captions = [example[0] for example in all_examples]

                gallery = gr.Gallery(
                    value=list(zip(example_images, example_captions)),
                    label="예시 갤러리",
                    show_label=False,
                    elem_id="gallery",
                    columns=5,
                    rows=2,
                    object_fit="contain",
                    height="auto"
                )

                def fill_example(evt: gr.SelectData):
                    return [all_examples[evt.index][i] for i in [0, 1]]

                gallery.select(
                    fill_example,
                    None,
                    [prompt, id_image],
                )

        generate_btn.click(
            fn=lambda *args: generate_image(*args)[0],  # Only return the first item (the image)
            inputs=[
                gr.Slider(256, 1536, 896, step=16, visible=False),  # width
                gr.Slider(256, 1536, 1152, step=16, visible=False),  # height
                gr.Slider(1, 20, 20, step=1, visible=False),  # num_steps
                gr.Slider(0, 10, 0, step=1, visible=False),  # start_step
                gr.Slider(1.0, 10.0, 4, step=0.1, visible=False),  # guidance
                gr.Textbox(-1, visible=False),  # seed
                prompt, 
                id_image, 
                gr.Slider(0.0, 3.0, 1, step=0.05, visible=False),  # id_weight
                gr.Textbox("Low quality, worst quality, text, signature, watermark, extra limbs", visible=False),  # neg_prompt
                gr.Slider(1.0, 10.0, 1, step=0.1, visible=False),  # true_cfg
                gr.Slider(0, 20, 1, step=1, visible=False),  # timestep_to_start_cfg
                gr.Slider(128, 512, 128, step=128, visible=False),  # max_sequence_length
            ],
            outputs=[output_image],
        )

    return demo

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="PuLID for FLUX.1-dev")
    parser.add_argument("--name", type=str, default="flux-dev", choices=list('flux-dev'),
                        help="현재는 flux-dev만 지원합니다")
    parser.add_argument("--device", type=str, default="cuda" if torch.cuda.is_available() else "cpu",
                        help="사용할 디바이스")
    parser.add_argument("--offload", action="store_true", help="사용하지 않을 때 모델을 CPU로 옮깁니다")
    parser.add_argument("--port", type=int, default=8080, help="사용할 포트")
    parser.add_argument("--dev", action='store_true', help="개발 모드")
    parser.add_argument("--pretrained_model", type=str, help='개발용')
    args = parser.parse_args()

    import huggingface_hub
    huggingface_hub.login(os.getenv('HF_TOKEN'))

    demo = create_demo(args, args.name, args.device, args.offload)
    demo.launch(share=True)