Spaces:

xuan2k
/

Thesis-Demo

Runtime error

App Files Files Community

xuan2k commited on Jun 10, 2024

Commit

4979800

1 Parent(s): e5efca7

update demo

Browse files

Files changed (8) hide show

.gitignore +2 -1
.log/log.txt +5 -5
SegFormer +1 -1
mask.png +0 -0
output.png +0 -0
streamlit_test.py +3 -0
test.png +0 -0
test.py +168 -242

.gitignore CHANGED Viewed

@@ -2,4 +2,5 @@ __pycache__
 *.pyc
 checkpoints/
 I2SB/
-*.pth

 *.pyc
 checkpoints/
 I2SB/
+*.pth
+SegFormer/

.log/log.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-[19:02:29] INFO     (0:00:00) Loaded options from opt_pkl_path=PosixPath('I2SB/results/inpaint-freeform2030/options.pkl')!
            INFO     (0:00:00) [Diffusion] Built I2SB diffusion: steps=1000!
-[19:02:33] INFO     (0:00:03) [Net] Initialized network from ckpt_pkl='I2SB/data/256x256_diffusion_uncond_fixedsigma.pkl'! Size=552807171!
-[19:02:44] INFO     (0:00:14) [Net] Loaded pretrained adm ckpt_pt='I2SB/data/256x256_diffusion_uncond_fixedsigma.pt'!
-[19:02:49] INFO     (0:00:19) [Net] Loaded network ckpt: I2SB/results/inpaint-freeform2030/latest.pt!
-[19:02:50] INFO     (0:00:20) [Ema] Loaded ema ckpt: I2SB/results/inpaint-freeform2030/latest.pt!

+[19:58:55] INFO     (0:00:00) Loaded options from opt_pkl_path=PosixPath('I2SB/results/inpaint-freeform2030/options.pkl')!
            INFO     (0:00:00) [Diffusion] Built I2SB diffusion: steps=1000!
+[19:58:58] INFO     (0:00:03) [Net] Initialized network from ckpt_pkl='I2SB/data/256x256_diffusion_uncond_fixedsigma.pkl'! Size=552807171!
+[19:59:02] INFO     (0:00:07) [Net] Loaded pretrained adm ckpt_pt='I2SB/data/256x256_diffusion_uncond_fixedsigma.pt'!
+[19:59:06] INFO     (0:00:11) [Net] Loaded network ckpt: I2SB/results/inpaint-freeform2030/latest.pt!
+[19:59:08] INFO     (0:00:13) [Ema] Loaded ema ckpt: I2SB/results/inpaint-freeform2030/latest.pt!

SegFormer CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~64ab11278eb30b8e2d8ea1d10a777fc5b1563948~~


1	+ Subproject commit ccc3dd500c4091a583b4b2749e35da501e670aca

mask.png ADDED Viewed

output.png CHANGED Viewed

streamlit_test.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ import streamlit as st
2	+
3	+ st.write("Hello")

test.png CHANGED Viewed

test.py CHANGED Viewed

@@ -40,6 +40,7 @@ from GroundingDINO.groundingdino.util.utils import clean_state_dict, get_phrases
 import sys
 sys.path.insert(0, "/home/ubuntu/Thesis-Demo/I2SB")
 import numpy as np
 import torch
@@ -62,6 +63,18 @@ from I2SB.i2sb import Runner, ckpt_util, download_ckpt
 from I2SB.logger import Logger
 from I2SB.sample import *
 import cv2
@@ -89,13 +102,6 @@ if os.environ.get('IS_MY_DEBUG') is not None:
     inpainting_enable = False
     kosmos_enable = False
-if lama_cleaner_enable:
-    try:
-        from lama_cleaner.model_manager import ModelManager
-        from lama_cleaner.schema import Config as lama_Config
-    except Exception as e:
-        lama_cleaner_enable = False
 # segment anything
 from segment_anything import build_sam, SamPredictor, SamAutomaticMaskGenerator
@@ -191,13 +197,16 @@ def get_point(img, sel_pix, evt: gr.SelectData):
 def undo_button(orig_img, sel_pix):
-    temp = orig_img.copy()
-    temp = np.array(temp, dtype=np.uint8)
-    if len(sel_pix) != 0:
-        sel_pix.pop()
-        for point in sel_pix:
-            cv2.drawMarker(temp, point, colors[0], markerType=markers[0], markerSize=6, thickness=2)
-    return Image.fromarray(temp).convert("RGB")
 def clear_button(orig_img):
@@ -256,10 +265,22 @@ def load_i2sb_model():
         runner.ema = ExponentialMovingAverage(
             runner.net.parameters(), decay=0.99)  # re-init ema with fp16 weight
     print("Loading time:", (time.time()-s)*1e3, "ms.")
     i2sb_model = runner
     return runner
 def plot_boxes_to_image(image_pil, tgt):
     H, W = tgt["size"]
     boxes = tgt["boxes"]
@@ -326,42 +347,6 @@ def load_image(image_path):
     return image_pil, image
-def get_grounding_output(model, image, caption, box_threshold, text_threshold, with_logits=True, device="cpu"):
-    caption = caption.lower()
-    caption = caption.strip()
-    if not caption.endswith("."):
-        caption = caption + "."
-    model = model.to(device)
-    image = image.to(device)
-    with torch.no_grad():
-        outputs = model(image[None], captions=[caption])
-    logits = outputs["pred_logits"].cpu().sigmoid()[0]  # (nq, 256)
-    boxes = outputs["pred_boxes"].cpu()[0]  # (nq, 4)
-    logits.shape[0]
-    # filter output
-    logits_filt = logits.clone()
-    boxes_filt = boxes.clone()
-    filt_mask = logits_filt.max(dim=1)[0] > box_threshold
-    logits_filt = logits_filt[filt_mask]  # num_filt, 256
-    boxes_filt = boxes_filt[filt_mask]  # num_filt, 4
-    logits_filt.shape[0]
-    # get phrase
-    tokenlizer = model.tokenizer
-    tokenized = tokenlizer(caption)
-    # build pred
-    pred_phrases = []
-    for logit, box in zip(logits_filt, boxes_filt):
-        pred_phrase = get_phrases_from_posmap(logit > text_threshold, tokenized, tokenlizer)
-        if with_logits:
-            pred_phrases.append(pred_phrase + f"({str(logit.max().item())[:4]})")
-        else:
-            pred_phrases.append(pred_phrase)
-    return boxes_filt, pred_phrases
 def show_mask(mask, ax, random_color=False):
     if random_color:
         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
@@ -447,99 +432,45 @@ def load_sd_model(device):
         )
         sd_model = sd_model.to(device)
-def forward_i2sb(img, mask):
-    print(np.unique(img),mask.shape)
     mask = np.where(mask > 0, 1, 0)
     img_tensor = i2sb_transforms(img).to(
             i2sb_opt.device).unsqueeze(0)
     mask_tensor = torch.from_numpy(np.resize(np.array(mask), (256,256))).to(
             i2sb_opt.device).unsqueeze(0).unsqueeze(0)
-    print("POST PROCESSING\t", torch.unique(img_tensor))
-    # corrupt_tensor = img_tensor * (1. - mask_tensor) + mask_tensor
     f = time.time()
     xs, _ = i2sb_model.ddpm_sampling(
         ckpt_opt, img_tensor, mask=mask_tensor, cond=None, clip_denoise=i2sb_opt.clip_denoise, nfe=nfe, verbose=i2sb_opt.n_gpu_per_node == 1)
     recon_img = xs[:, 0, ...].to(i2sb_opt.device)
-    tu.save_image((recon_img+1)/2, "output.png")
     print(recon_img.shape)
-    return transforms.ToPILImage()(((recon_img+1)/2)[0])
-def lama_cleaner_process(image, mask, cleaner_size_limit=1080):
-    try:
-        logger.info(f'_______lama_cleaner_process_______1____')
-        ori_image = image
-        if mask.shape[0] == image.shape[1] and mask.shape[1] == image.shape[0] and mask.shape[0] != mask.shape[1]:
-            # rotate image
-            logger.info(f'_______lama_cleaner_process_______2____')
-            ori_image = np.transpose(image[::-1, ...][:, ::-1], axes=(1, 0, 2))[::-1, ...]
-            logger.info(f'_______lama_cleaner_process_______3____')
-            image = ori_image
-        logger.info(f'_______lama_cleaner_process_______4____')
-        original_shape = ori_image.shape
-        logger.info(f'_______lama_cleaner_process_______5____')
-        interpolation = cv2.INTER_CUBIC
-        size_limit = cleaner_size_limit
-        if size_limit == -1:
-            logger.info(f'_______lama_cleaner_process_______6____')
-            size_limit = max(image.shape)
-        else:
-            logger.info(f'_______lama_cleaner_process_______7____')
-            size_limit = int(size_limit)
-        logger.info(f'_______lama_cleaner_process_______8____')
-        config = lama_Config(
-            ldm_steps=25,
-            ldm_sampler='plms',
-            zits_wireframe=True,
-            hd_strategy='Original',
-            hd_strategy_crop_margin=196,
-            hd_strategy_crop_trigger_size=1280,
-            hd_strategy_resize_limit=2048,
-            prompt='',
-            use_croper=False,
-            croper_x=0,
-            croper_y=0,
-            croper_height=512,
-            croper_width=512,
-            sd_mask_blur=5,
-            sd_strength=0.75,
-            sd_steps=50,
-            sd_guidance_scale=7.5,
-            sd_sampler='ddim',
-            sd_seed=42,
-            cv2_flag='INPAINT_NS',
-            cv2_radius=5,
-        )
-        logger.info(f'_______lama_cleaner_process_______9____')
-        if config.sd_seed == -1:
-            config.sd_seed = random.randint(1, 999999999)
-        # logger.info(f"Origin image shape_0_: {original_shape} / {size_limit}")
-        logger.info(f'_______lama_cleaner_process_______10____')
-        image = resize_max_size(image, size_limit=size_limit, interpolation=interpolation)
-        # logger.info(f"Resized image shape_1_: {image.shape}")
-        # logger.info(f"mask image shape_0_: {mask.shape} / {type(mask)}")
-        logger.info(f'_______lama_cleaner_process_______11____')
-        mask = resize_max_size(mask, size_limit=size_limit, interpolation=interpolation)
-        # logger.info(f"mask image shape_1_: {mask.shape} / {type(mask)}")
-        logger.info(f'_______lama_cleaner_process_______12____')
-        res_np_img = lama_cleaner_model(image, mask, config)
-        logger.info(f'_______lama_cleaner_process_______13____')
-        torch.cuda.empty_cache()
-        logger.info(f'_______lama_cleaner_process_______14____')
-        image = Image.open(io.BytesIO(numpy_to_bytes(res_np_img, 'png')))
-        logger.info(f'_______lama_cleaner_process_______15____')
-    except Exception as e:
-        logger.info(f'lama_cleaner_process[Error]:' + str(e))
-        image = None
-    return  image
 # visualization
 def draw_selected_mask(mask, draw):
@@ -632,27 +563,15 @@ def get_time_cost(run_task_time, time_cost_str):
     return run_task_time, time_cost_str
 def run_anything_task(input_image, input_points, origin_image, task_type,
-            mask_source_radio, cleaner_size_limit=1080):
     run_task_time = 0
     time_cost_str = ''
     run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     print("HERE................", task_type)
-    if (task_type == 'Kosmos-2'):
-        global kosmos_model, kosmos_processor
-        if isinstance(input_image, dict):
-            image_pil, image = load_image(input_image['image'].convert("RGB"))
-            input_img = input_image['image']
-        else:
-            image_pil, image = load_image(input_image.convert("RGB"))
-            input_img = input_image
-        kosmos_image, kosmos_text, kosmos_entities = kosmos_generate_predictions(image_pil, kosmos_model, kosmos_processor)
-        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
-        return None, None, time_cost_str, kosmos_image, gr.Textbox.update(visible=(time_cost_str !='')), kosmos_text, kosmos_entities
     if input_image is None:
-            return [], gr.Gallery.update(label='Please upload a image!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     file_temp = int(time.time())
     logger.info(f'run_anything_task_002/{device}_[{file_temp}]_{task_type}/[{mask_source_radio}]_1_')
@@ -682,92 +601,119 @@ def run_anything_task(input_image, input_points, origin_image, task_type,
         groundingdino_device = 'cpu'
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_2_')
-    if task_type == 'segment' or ((task_type in ['inpainting', 'outpainting'] or task_type == 'remove') and mask_source_radio == mask_source_segment):
-        image = np.array(input_img)
-        if sam_predictor:
-            sam_predictor.set_image(image)
-        if sam_predictor:
-            logger.info(f"Forward with: {input_points}")
-            masks, _, _, _ = sam_predictor.predict(
-                point_coords = np.array(input_points),
-                point_labels = np.array([1 for _ in range(len(input_points))]),
-                # boxes = transformed_boxes,
-                multimask_output = False,
-            )
-            # masks: [9, 1, 512, 512]
-            assert sam_checkpoint, 'sam_checkpoint is not found!'
         else:
-            run_mode = "rectangle"
-        # draw output image
-        plt.figure(figsize=(10, 10))
-        plt.imshow(origin_image)
-        for mask in masks:
-            show_mask(mask, plt.gca(), random_color=True)
-        # for box, label in zip(boxes_filt, pred_phrases):
-        #     show_box(box.cpu().numpy(), plt.gca(), label)
-        plt.axis('off')
-        image_path = os.path.join(output_dir, f"grounding_seg_output_{file_temp}.jpg")
-        plt.savefig(image_path, bbox_inches="tight")
-        plt.clf()
-        plt.close('all')
-        segment_image_result = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
-        os.remove(image_path)
         output_images.append(Image.fromarray(segment_image_result))
-        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_3_')
     if task_type == 'detection' or task_type == 'segment':
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_9_')
-        return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
-    elif task_type in ['inpainting', 'outpainting'] or task_type == 'remove':
-        if mask_source_radio == mask_source_segment:
-            task_type = 'remove'
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_4_')
-        if mask_source_radio == mask_source_draw:
-            input_mask_pil = input_image['mask']
-            input_mask = np.array(input_mask_pil.convert("L"))
-            mask_pil = input_mask_pil
-            mask = input_mask
         else:
-            masks_ori = copy.deepcopy(masks)
-            masks = torch.where(masks > 0, True, False)
-            mask = masks[0][0].cpu().numpy()
-            mask_pil = Image.fromarray(mask)
         output_images.append(mask_pil.convert("RGB"))
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
-        if task_type in ['inpainting', 'outpainting']:
             # image_inpainting = sd_model(prompt = "", image=image_source_for_inpaint, mask_image=image_mask_for_inpaint).images[0]
-            input_img.save("test.png")
-            image_inpainting = forward_i2sb(input_img, mask)
-            print("RESULT\t", np.array(image_inpainting))
         else:
             # remove from mask
             aasds = 1
             logger.info(f'run_anything_task_[{file_temp}]_{task_type}_6_')
-            image_inpainting = lama_cleaner_process(np.array(image_pil), np.array(mask_pil.convert("L")), cleaner_size_limit)
             if image_inpainting is None:
                 logger.info(f'run_anything_task_failed_')
-                return None, None, None, None, None, None, None
             # output_images.append(image_inpainting)
             # run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_7_')
         image_inpainting = image_inpainting.resize((image_pil.size[0], image_pil.size[1]))
         output_images.append(image_inpainting)
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_9_')
-        return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
     else:
         logger.info(f"task_type:{task_type} error!")
     logger.info(f'run_anything_task_[{file_temp}]_9_9_')
-    return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !='')), None, None, None
 def change_radio_display(task_type, mask_source_radio, orig_img):
     mask_source_radio_visible = False
@@ -789,20 +735,19 @@ def change_radio_display(task_type, mask_source_radio, orig_img):
         mask_source_radio_visible = True
     if task_type == "relate anything":
         num_relation_visible = True
-    if task_type == "segment":
-        ret = gr.Image(value= orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "editor")# tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
-    elif task_type == "inpainting":
         ret = gr.Image(value = orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
     return  (gr.Radio.update(visible=mask_source_radio_visible),
             gr.Slider.update(visible=num_relation_visible),
             gr.Gallery.update(visible=image_gallery_visible),
-            gr.Radio.update(visible=kosmos_input_visible),
-            gr.Image.update(visible=kosmos_output_visible),
-            gr.HighlightedText.update(visible=kosmos_text_output_visible),
             ret, [],
-            gr.Button("Undo point", visible = task_type == "segment"),
-            gr.Button("Clear point", visible = task_type == "segment"),)
 def get_model_device(module):
     try:
@@ -832,10 +777,11 @@ def main_gradio(args):
         with gr.Row():
             with gr.Column():
                 selected_points = gr.State([])
-                original_image = gr.State()
                 task_types = ["segment"]
                 if inpainting_enable:
                     task_types.append("inpainting")
                 input_image = gr.Image(elem_id="image_upload", type='pil', label="Upload", height=512)
@@ -854,7 +800,7 @@ def main_gradio(args):
                 with gr.Row():
                     with gr.Column():
-                        undo_point_button = gr.Button("Undo point")
                         undo_point_button.click(
                             fn= undo_button,
                             inputs=[original_image, selected_points],
@@ -863,7 +809,7 @@ def main_gradio(args):
                     with gr.Column():
-                        clear_point_button = gr.Button("Clear point")
                         clear_point_button.click(
                             fn= clear_button,
                             inputs=[original_image],
@@ -876,10 +822,15 @@ def main_gradio(args):
                 mask_source_radio = gr.Radio([mask_source_draw, mask_source_segment],
                                     value=mask_source_draw, label="Mask from",
                                     visible=False)
                 num_relation = gr.Slider(label="How many relations do you want to see", minimum=1, maximum=20, value=5, step=1, visible=False)
-                kosmos_input = gr.Radio(["Brief", "Detailed"], label="Kosmos Description Type", value="Brief", visible=False)
                 run_button = gr.Button(label="Run", visible=True)
                 # with gr.Accordion("Advanced options", open=False) as advanced_options:
                 #     box_threshold = gr.Slider(
@@ -900,47 +851,21 @@ def main_gradio(args):
             with gr.Column():
                 image_gallery = gr.Gallery(label="result images", show_label=True, elem_id="gallery", height=512, visible=True
-                    ).style(preview=True, columns=[5], object_fit="scale-down", height="auto")
                 time_cost = gr.Textbox(label="Time cost by step (ms):", visible=False, interactive=False)
-                kosmos_output = gr.Image(type="pil", label="result images", visible=False)
-                kosmos_text_output = gr.HighlightedText(
-                                    label="Generated Description",
-                                    combine_adjacent=False,
-                                    show_legend=True,
-                                    visible=False,
-                                ).style(color_map=color_map)
-                # record which text span (label) is selected
-                selected = gr.Number(-1, show_label=False, placeholder="Selected", visible=False)
-                # record the current `entities`
-                entity_output = gr.Textbox(visible=False)
-                # get the current selected span label
-                def get_text_span_label(evt: gr.SelectData):
-                    if evt.value[-1] is None:
-                        return -1
-                    return int(evt.value[-1])
-                # and set this information to `selected`
-                kosmos_text_output.select(get_text_span_label, None, selected)
-                # update output image when we change the span (enity) selection
-                def update_output_image(img_input, image_output, entities, idx):
-                    entities = ast.literal_eval(entities)
-                    updated_image = draw_entity_boxes_on_image(img_input, entities, entity_index=idx)
-                    return updated_image
-                selected.change(update_output_image, [kosmos_output, kosmos_output, entity_output, selected], [kosmos_output])
             run_button.click(fn=run_anything_task, inputs=[
                             input_image, selected_points, original_image, task_type,
-                            mask_source_radio],
-                            outputs=[image_gallery, image_gallery, time_cost, time_cost, kosmos_output, kosmos_text_output, entity_output], show_progress=True, queue=True)
             mask_source_radio.change(fn=change_radio_display, inputs=[task_type, mask_source_radio, original_image],
                             outputs=[mask_source_radio, num_relation])
             task_type.change(fn=change_radio_display, inputs=[task_type, mask_source_radio, original_image],
                             outputs=[mask_source_radio, num_relation,
-                            image_gallery, kosmos_input, kosmos_output, kosmos_text_output, input_image, selected_points, undo_point_button, clear_point_button
                             ])
         # DESCRIPTION = f'### This demo from [Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything). <br>'
@@ -985,6 +910,7 @@ if __name__ == "__main__":
     if sam_enable:
         load_sam_model(device)
     if inpainting_enable:
         load_sd_model(device)

 import sys
 sys.path.insert(0, "/home/ubuntu/Thesis-Demo/I2SB")
+sys.path.insert(0, "/home/ubuntu/Thesis-Demo/SegFormer")
 import numpy as np
 import torch
 from I2SB.logger import Logger
 from I2SB.sample import *
+from pathlib import Path
+inpaint_checkpoint = Path("/home/ubuntu/Thesis-Demo/I2SB/results")
+if not inpaint_checkpoint.exists():
+    os.system("pip install transformers==4.32.0")
+# SegFormer
+from PIL import Image
+from SegFormer.mmseg.apis import inference_segmentor, init_segmentor, visualize_result_pyplot
+from SegFormer.mmseg.core.evaluation import get_palette
 import cv2
     inpainting_enable = False
     kosmos_enable = False
 # segment anything
 from segment_anything import build_sam, SamPredictor, SamAutomaticMaskGenerator
 def undo_button(orig_img, sel_pix):
+    if orig_img:
+        temp = orig_img.copy()
+        temp = np.array(temp, dtype=np.uint8)
+        if len(sel_pix) != 0:
+            sel_pix.pop()
+            for point in sel_pix:
+                cv2.drawMarker(temp, point, colors[0], markerType=markers[0], markerSize=6, thickness=2)
+        return Image.fromarray(temp).convert("RGB")
+    return orig_img
 def clear_button(orig_img):
         runner.ema = ExponentialMovingAverage(
             runner.net.parameters(), decay=0.99)  # re-init ema with fp16 weight
+    logger.info(f"I2SB Loading time:\t {(time.time()-s)*1e3} ms.")
     print("Loading time:", (time.time()-s)*1e3, "ms.")
     i2sb_model = runner
     return runner
+def load_segformer(device):
+    global segformer_model
+    s = time.time()
+    config = "SegFormer/local_configs/segformer/B3/segformer.b3.256x256.wtm.160k.py"
+    checkpoint = "SegFormer/work_dirs/segformer.b3.256x256.wtm.160k/iter_160000.pth"
+    model = init_segmentor(config, checkpoint, device=device)
+    logger.info(f"SegFormer Loading time:\t {(time.time()-s)*1e3} ms.")
+    segformer_model = model
+    return model
 def plot_boxes_to_image(image_pil, tgt):
     H, W = tgt["size"]
     boxes = tgt["boxes"]
     return image_pil, image
 def show_mask(mask, ax, random_color=False):
     if random_color:
         color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
         )
         sd_model = sd_model.to(device)
+def forward_i2sb(img, mask, dilation_mask_extend):
+    print(np.unique(mask),mask.shape)
     mask = np.where(mask > 0, 1, 0)
+    print(np.unique(mask),mask.shape)
+    mask = mask.astype(np.uint8)
+    if dilation_mask_extend.isdigit():
+        kernel_size = int(dilation_mask_extend)
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (int(kernel_size), int(kernel_size)))
+        mask = cv2.dilate(mask, kernel, iterations = 1)
     img_tensor = i2sb_transforms(img).to(
             i2sb_opt.device).unsqueeze(0)
     mask_tensor = torch.from_numpy(np.resize(np.array(mask), (256,256))).to(
             i2sb_opt.device).unsqueeze(0).unsqueeze(0)
+    # print("POST PROCESSING\t", torch.unique(img_tensor))
+    corrupt_tensor = img_tensor * (1. - mask_tensor) + mask_tensor
+    print("DOUBLE CHECK:\t", corrupt_tensor.shape)
+    print("DOUBLE CHECK:\t", img_tensor.shape)
+    print("DOUBLE CHECK:\t", mask_tensor.shape)
     f = time.time()
     xs, _ = i2sb_model.ddpm_sampling(
         ckpt_opt, img_tensor, mask=mask_tensor, cond=None, clip_denoise=i2sb_opt.clip_denoise, nfe=nfe, verbose=i2sb_opt.n_gpu_per_node == 1)
     recon_img = xs[:, 0, ...].to(i2sb_opt.device)
+    # tu.save_image((recon_img+1)/2, "output.png")
+    # tu.save_image((corrupt_tensor+1)/2, "output.png")
     print(recon_img.shape)
+    return transforms.ToPILImage()(((recon_img+1)/2)[0]), transforms.ToPILImage()(((corrupt_tensor+1)/2)[0])
+def forward_segformer(img):
+    img_np = np.array(img)
+    img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+    result = inference_segmentor(segformer_model, img_np)
+    return np.asarray(result[0], dtype=np.uint8)
 # visualization
 def draw_selected_mask(mask, draw):
     return run_task_time, time_cost_str
 def run_anything_task(input_image, input_points, origin_image, task_type,
+            mask_source_radio, segmentation_radio, dilation_mask_extend):
     run_task_time = 0
     time_cost_str = ''
     run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     print("HERE................", task_type)
     if input_image is None:
+            return [], gr.Gallery.update(label='Please upload a image!😂😂😂😂'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !=''))
     file_temp = int(time.time())
     logger.info(f'run_anything_task_002/{device}_[{file_temp}]_{task_type}/[{mask_source_radio}]_1_')
         groundingdino_device = 'cpu'
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_2_')
+    if task_type == 'segment' or task_type == 'pipeline':
+        image = np.array(origin_image)
+        if segmentation_radio == "SAM":
+            if sam_predictor:
+                sam_predictor.set_image(image)
+            if sam_predictor:
+                logger.info(f"Forward with: {input_points}")
+                masks, _, _, _ = sam_predictor.predict(
+                    point_coords = np.array(input_points),
+                    point_labels = np.array([1 for _ in range(len(input_points))]),
+                    # boxes = transformed_boxes,
+                    multimask_output = False,
+                )
+                # masks: [9, 1, 512, 512]
+                assert sam_checkpoint, 'sam_checkpoint is not found!'
+            else:
+                run_mode = "rectangle"
+            # draw output image
+            plt.figure(figsize=(10, 10))
+            plt.imshow(origin_image)
+            for mask in masks:
+                show_mask(mask, plt.gca(), random_color=True)
+            # for box, label in zip(boxes_filt, pred_phrases):
+            #     show_box(box.cpu().numpy(), plt.gca(), label)
+            plt.axis('off')
+            image_path = os.path.join(output_dir, f"grounding_seg_output_{file_temp}.jpg")
+            plt.savefig(image_path, bbox_inches="tight")
+            plt.clf()
+            plt.close('all')
+            segment_image_result = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
+            os.remove(image_path)
         else:
+            masks = forward_segformer(image)
+            segment_image_result = visualize_result_pyplot(segformer_model, image, masks, get_palette("wtm"), dilation=dilation_mask_extend)# if task_type == "pipeline" else None)
         output_images.append(Image.fromarray(segment_image_result))
+        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
     logger.info(f'run_anything_task_[{file_temp}]_{task_type}_3_')
     if task_type == 'detection' or task_type == 'segment':
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_9_')
+        return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !=''))
+    elif task_type in ['inpainting', 'outpainting'] or task_type == 'pipeline':
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_4_')
+        if task_type == "pipeline":
+            if segmentation_radio == "SAM":
+                masks_ori = copy.deepcopy(masks)
+                print(masks.shape)
+                # masks = torch.where(masks > 0, True, False)
+                mask = masks[0]
+                mask_pil = Image.fromarray(mask)
+                mask = np.where(mask == True, 1, 0)
+            else:
+                mask = masks
+                save_mask = copy.deepcopy(mask)
+                save_mask = np.where(mask > 0, 255, 0).astype(np.uint8)
+                print((save_mask.dtype))
+                mask_pil = Image.fromarray(save_mask)
         else:
+            if mask_source_radio == mask_source_draw:
+                input_mask_pil = input_image['mask']
+                input_mask = np.array(input_mask_pil.convert("L"))
+                mask_pil = input_mask_pil
+                mask = input_mask
+            else:
+                pass
+            #     masks_ori = copy.deepcopy(masks)
+            #     masks = torch.where(masks > 0, True, False)
+            #     mask = masks[0][0].cpu().numpy()
+            #     mask_pil = Image.fromarray(mask)
         output_images.append(mask_pil.convert("RGB"))
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
+        if task_type in ['inpainting', 'pipeline']:
             # image_inpainting = sd_model(prompt = "", image=image_source_for_inpaint, mask_image=image_mask_for_inpaint).images[0]
+            # input_img.save("test.png")
+            w, h = input_img.size
+            input_img = input_img.resize((256,256))
+            image_inpainting, corrupted = forward_i2sb(input_img, mask, dilation_mask_extend)
+            input_img = input_img.resize((w,h))
+            corrupted = corrupted.resize((w,h))
+            image_inpainting = image_inpainting.resize((w,h))
+            # print("RESULT\t", np.array(image_inpainting))
         else:
             # remove from mask
             aasds = 1
             logger.info(f'run_anything_task_[{file_temp}]_{task_type}_6_')
             if image_inpainting is None:
                 logger.info(f'run_anything_task_failed_')
+                return None, None, None, None
             # output_images.append(image_inpainting)
             # run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_7_')
         image_inpainting = image_inpainting.resize((image_pil.size[0], image_pil.size[1]))
+        output_images.append(corrupted)
         output_images.append(image_inpainting)
         run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
         logger.info(f'run_anything_task_[{file_temp}]_{task_type}_9_')
+        return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !=''))
     else:
         logger.info(f"task_type:{task_type} error!")
     logger.info(f'run_anything_task_[{file_temp}]_9_9_')
+    return output_images, gr.Gallery.update(label='result images'), time_cost_str, gr.Textbox.update(visible=(time_cost_str !=''))
 def change_radio_display(task_type, mask_source_radio, orig_img):
     mask_source_radio_visible = False
         mask_source_radio_visible = True
     if task_type == "relate anything":
         num_relation_visible = True
+    if task_type == "inpainting":
         ret = gr.Image(value = orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
+    elif task_type in ["segment", "pipeline"]:
+        ret = gr.Image(value= orig_img, elem_id="image_upload", type='pil', label="Upload", height=512, tool = "editor")# tool = "sketch", brush_color='#00FFFF', mask_opacity=0.6)
     return  (gr.Radio.update(visible=mask_source_radio_visible),
             gr.Slider.update(visible=num_relation_visible),
             gr.Gallery.update(visible=image_gallery_visible),
+            gr.Radio(["SegFormer", "SAM"], value="SAM", label="Segementation Model", visible= task_type != "inpainting"),
+            gr.Textbox(label="Dilation kernel size", value='7',  visible= task_type == "pipeline"),
             ret, [],
+            gr.Button("Undo point", visible = task_type != "inpainting"),
+            gr.Button("Clear point", visible = task_type != "inpainting"),)
 def get_model_device(module):
     try:
         with gr.Row():
             with gr.Column():
                 selected_points = gr.State([])
+                original_image = gr.State(None)
                 task_types = ["segment"]
                 if inpainting_enable:
                     task_types.append("inpainting")
+                    task_types.append("pipeline")
                 input_image = gr.Image(elem_id="image_upload", type='pil', label="Upload", height=512)
                 with gr.Row():
                     with gr.Column():
+                        undo_point_button = gr.Button("Undo point", visible= True if original_image is not None else False)
                         undo_point_button.click(
                             fn= undo_button,
                             inputs=[original_image, selected_points],
                     with gr.Column():
+                        clear_point_button = gr.Button("Clear point", visible= True if original_image is not None else False)
                         clear_point_button.click(
                             fn= clear_button,
                             inputs=[original_image],
                 mask_source_radio = gr.Radio([mask_source_draw, mask_source_segment],
                                     value=mask_source_draw, label="Mask from",
                                     visible=False)
+                segmentation_radio = gr.Radio(["SegFormer", "SAM"],
+                                    value="SAM", label="Segementation Model",
+                                    visible=True)
+                dilation_mask_extend = gr.Textbox(label="Dilation kernel size", value='5', visible=False)
                 num_relation = gr.Slider(label="How many relations do you want to see", minimum=1, maximum=20, value=5, step=1, visible=False)
                 run_button = gr.Button(label="Run", visible=True)
                 # with gr.Accordion("Advanced options", open=False) as advanced_options:
                 #     box_threshold = gr.Slider(
             with gr.Column():
                 image_gallery = gr.Gallery(label="result images", show_label=True, elem_id="gallery", height=512, visible=True
+                    ).style(preview=True, columns=[5], object_fit="scale-down", height=512)
                 time_cost = gr.Textbox(label="Time cost by step (ms):", visible=False, interactive=False)
             run_button.click(fn=run_anything_task, inputs=[
                             input_image, selected_points, original_image, task_type,
+                            mask_source_radio, segmentation_radio, dilation_mask_extend],
+                            outputs=[image_gallery, image_gallery, time_cost, time_cost], show_progress=True, queue=True)
             mask_source_radio.change(fn=change_radio_display, inputs=[task_type, mask_source_radio, original_image],
                             outputs=[mask_source_radio, num_relation])
             task_type.change(fn=change_radio_display, inputs=[task_type, mask_source_radio, original_image],
                             outputs=[mask_source_radio, num_relation,
+                            image_gallery, segmentation_radio, dilation_mask_extend, input_image, selected_points, undo_point_button, clear_point_button
                             ])
         # DESCRIPTION = f'### This demo from [Grounded-Segment-Anything](https://github.com/IDEA-Research/Grounded-Segment-Anything). <br>'
     if sam_enable:
         load_sam_model(device)
+        load_segformer(device)
     if inpainting_enable:
         load_sd_model(device)