evf-sam2 / utils /grefcoco.py
wondervictor's picture
add app
a93afca
raw
history blame
6.27 kB
import contextlib
import copy
import io
import logging
import os
import random
import numpy as np
import pycocotools.mask as mask_util
from detectron2.structures import Boxes, BoxMode, PolygonMasks, RotatedBoxes
from detectron2.utils.file_io import PathManager
from fvcore.common.timer import Timer
from PIL import Image
"""
This file contains functions to parse RefCOCO-format annotations into dicts in "Detectron2 format".
"""
logger = logging.getLogger(__name__)
__all__ = ["load_refcoco_json"]
def load_grefcoco_json(
refer_root,
dataset_name,
splitby,
split,
image_root,
extra_annotation_keys=None,
extra_refer_keys=None,
):
if dataset_name == "refcocop":
dataset_name = "refcoco+"
if dataset_name == "refcoco" or dataset_name == "refcoco+":
splitby == "unc"
if dataset_name == "refcocog":
assert splitby == "umd" or splitby == "google"
dataset_id = "_".join([dataset_name, splitby, split])
from .grefer import G_REFER
logger.info("Loading dataset {} ({}-{}) ...".format(dataset_name, splitby, split))
logger.info("Refcoco root: {}".format(refer_root))
timer = Timer()
refer_root = PathManager.get_local_path(refer_root)
with contextlib.redirect_stdout(io.StringIO()):
refer_api = G_REFER(data_root=refer_root, dataset=dataset_name, splitBy=splitby)
if timer.seconds() > 1:
logger.info(
"Loading {} takes {:.2f} seconds.".format(dataset_id, timer.seconds())
)
ref_ids = refer_api.getRefIds(split=split)
img_ids = refer_api.getImgIds(ref_ids)
refs = refer_api.loadRefs(ref_ids)
imgs = [refer_api.loadImgs(ref["image_id"])[0] for ref in refs]
anns = [refer_api.loadAnns(ref["ann_id"]) for ref in refs]
imgs_refs_anns = list(zip(imgs, refs, anns))
logger.info(
"Loaded {} images, {} referring object sets in G_RefCOCO format from {}".format(
len(img_ids), len(ref_ids), dataset_id
)
)
dataset_dicts = []
ann_keys = ["iscrowd", "bbox", "category_id"] + (extra_annotation_keys or [])
ref_keys = ["raw", "sent_id"] + (extra_refer_keys or [])
ann_lib = {}
NT_count = 0
MT_count = 0
for img_dict, ref_dict, anno_dicts in imgs_refs_anns:
record = {}
record["source"] = "grefcoco"
record["file_name"] = os.path.join(image_root, img_dict["file_name"])
record["height"] = img_dict["height"]
record["width"] = img_dict["width"]
image_id = record["image_id"] = img_dict["id"]
# Check that information of image, ann and ref match each other
# This fails only when the data parsing logic or the annotation file is buggy.
assert ref_dict["image_id"] == image_id
assert ref_dict["split"] == split
if not isinstance(ref_dict["ann_id"], list):
ref_dict["ann_id"] = [ref_dict["ann_id"]]
# No target samples
if None in anno_dicts:
assert anno_dicts == [None]
assert ref_dict["ann_id"] == [-1]
record["empty"] = True
obj = {key: None for key in ann_keys if key in ann_keys}
obj["bbox_mode"] = BoxMode.XYWH_ABS
obj["empty"] = True
obj = [obj]
# Multi target samples
else:
record["empty"] = False
obj = []
for anno_dict in anno_dicts:
ann_id = anno_dict["id"]
if anno_dict["iscrowd"]:
continue
assert anno_dict["image_id"] == image_id
assert ann_id in ref_dict["ann_id"]
if ann_id in ann_lib:
ann = ann_lib[ann_id]
else:
ann = {key: anno_dict[key] for key in ann_keys if key in anno_dict}
ann["bbox_mode"] = BoxMode.XYWH_ABS
ann["empty"] = False
segm = anno_dict.get("segmentation", None)
assert segm # either list[list[float]] or dict(RLE)
if isinstance(segm, dict):
if isinstance(segm["counts"], list):
# convert to compressed RLE
segm = mask_util.frPyObjects(segm, *segm["size"])
else:
# filter out invalid polygons (< 3 points)
segm = [
poly
for poly in segm
if len(poly) % 2 == 0 and len(poly) >= 6
]
if len(segm) == 0:
num_instances_without_valid_segmentation += 1
continue # ignore this instance
ann["segmentation"] = segm
ann_lib[ann_id] = ann
obj.append(ann)
record["annotations"] = obj
# Process referring expressions
sents = ref_dict["sentences"]
for sent in sents:
ref_record = record.copy()
ref = {key: sent[key] for key in ref_keys if key in sent}
ref["ref_id"] = ref_dict["ref_id"]
ref_record["sentence"] = ref
dataset_dicts.append(ref_record)
# if ref_record['empty']:
# NT_count += 1
# else:
# MT_count += 1
# logger.info("NT samples: %d, MT samples: %d", NT_count, MT_count)
# Debug mode
# return dataset_dicts[:100]
return dataset_dicts
if __name__ == "__main__":
"""
Test the COCO json dataset loader.
Usage:
python -m detectron2.data.datasets.coco \
path/to/json path/to/image_root dataset_name
"dataset_name" can be "coco_2014_minival_100", or other
pre-registered ones
"""
import sys
REFCOCO_PATH = "/mnt/lustre/hhding/code/ReLA/datasets"
COCO_TRAIN_2014_IMAGE_ROOT = "/mnt/lustre/hhding/code/ReLA/datasets/images"
REFCOCO_DATASET = "grefcoco"
REFCOCO_SPLITBY = "unc"
REFCOCO_SPLIT = "train"
dicts = load_grefcoco_json(
REFCOCO_PATH,
REFCOCO_DATASET,
REFCOCO_SPLITBY,
REFCOCO_SPLIT,
COCO_TRAIN_2014_IMAGE_ROOT,
)
print(1)