# Reference: https://huggingface.co./spaces/haotiz/glip-zeroshot-demo/blob/main/app.py import requests import os from io import BytesIO from PIL import Image import numpy as np from pathlib import Path import gradio as gr import warnings warnings.filterwarnings("ignore") from maskrcnn_benchmark.config import cfg from maskrcnn_benchmark.engine.predictor_glip import GLIPDemo config_file = "configs/pretrain_new/desco_glip.yaml" weight_file = "MODEL/desco_glip_tiny.pth" # update the config options with the config file # manual override some options cfg.local_rank = 0 cfg.num_gpus = 1 cfg.merge_from_file(config_file) cfg.merge_from_list(["MODEL.WEIGHT", weight_file]) cfg.merge_from_list(["MODEL.DEVICE", "cuda"]) glip_demo = GLIPDemo( cfg, min_image_size=800, confidence_threshold=0.7, show_mask_heatmaps=False ) config_file = "configs/pretrain_new/desco_fiber.yaml" weight_file = "MODEL/desco_fiber_base.pth" from copy import deepcopy cfg = deepcopy(cfg) cfg.merge_from_file(config_file) cfg.merge_from_list(["MODEL.WEIGHT", weight_file]) cfg.merge_from_list(["MODEL.DEVICE", "cuda"]) fiber_demo = GLIPDemo( cfg, min_image_size=800, confidence_threshold=0.7, show_mask_heatmaps=False ) athetics_params = { "skip_name": False, # whether we overlay the phrase over the box "override_color": (0, 90, 190), "text_size": 1.0, "text_pixel": 3, "box_alpha": 1.0, "box_pixel": 5, "text_offset_original": 8, # distance between text and box } def predict(image, text, ground_tokens=""): ground_tokens = None if ground_tokens.strip() == "" else ground_tokens.strip().split(";") result, _ = glip_demo.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params) fiber_result, _ = fiber_demo.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params) return result[:, :, [2, 1, 0]], fiber_result[:, :, [2, 1, 0]] image = gr.inputs.Image() gr.Interface( description="Object Recognition with DesCo (https://github.com/liunian-harold-li/DesCo)", fn=predict, inputs=["image", "text", "text"], outputs=[ gr.outputs.Image( type="pil", label="DesCo-GLIP" ), gr.outputs.Image( type="pil", label="DesCo-FIBER" ), ], examples=[ ["./1.jpg", "A clown making a balloon animal for a pretty lady.", "clown"], ["./1.jpg", "A clown kicking a soccer ball for a pretty lady.", "clown"], ["./2.jpg", "A kind of tool, wooden handle with a round head.", "tool"], ["./3.jpg", "Bumblebee, yellow with black accents.", "Bumblebee"], ], article=Path("docs/intro.md").read_text() ).launch()