Spaces:
Runtime error
Runtime error
File size: 6,108 Bytes
f549064 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# Copyright (c) OpenMMLab. All rights reserved.
import json
import logging
import os.path as osp
import warnings
from typing import List, Union
import mmcv
from mmengine.dist import get_rank
from mmengine.fileio import dump, load
from mmengine.logging import print_log
from mmengine.utils import ProgressBar
from mmdet.registry import DATASETS
from .base_det_dataset import BaseDetDataset
@DATASETS.register_module()
class CrowdHumanDataset(BaseDetDataset):
r"""Dataset for CrowdHuman.
Args:
data_root (str): The root directory for
``data_prefix`` and ``ann_file``.
ann_file (str): Annotation file path.
extra_ann_file (str | optional):The path of extra image metas
for CrowdHuman. It can be created by CrowdHumanDataset
automatically or by tools/misc/get_crowdhuman_id_hw.py
manually. Defaults to None.
"""
METAINFO = {
'classes': ('person', ),
# palette is a list of color tuples, which is used for visualization.
'palette': [(220, 20, 60)]
}
def __init__(self, data_root, ann_file, extra_ann_file=None, **kwargs):
# extra_ann_file record the size of each image. This file is
# automatically created when you first load the CrowdHuman
# dataset by mmdet.
if extra_ann_file is not None:
self.extra_ann_exist = True
self.extra_anns = load(extra_ann_file)
else:
ann_file_name = osp.basename(ann_file)
if 'train' in ann_file_name:
self.extra_ann_file = osp.join(data_root, 'id_hw_train.json')
elif 'val' in ann_file_name:
self.extra_ann_file = osp.join(data_root, 'id_hw_val.json')
self.extra_ann_exist = False
if not osp.isfile(self.extra_ann_file):
print_log(
'extra_ann_file does not exist, prepare to collect '
'image height and width...',
level=logging.INFO)
self.extra_anns = {}
else:
self.extra_ann_exist = True
self.extra_anns = load(self.extra_ann_file)
super().__init__(data_root=data_root, ann_file=ann_file, **kwargs)
def load_data_list(self) -> List[dict]:
"""Load annotations from an annotation file named as ``self.ann_file``
Returns:
List[dict]: A list of annotation.
""" # noqa: E501
anno_strs = self.file_client.get_text(
self.ann_file).strip().split('\n')
print_log('loading CrowdHuman annotation...', level=logging.INFO)
data_list = []
prog_bar = ProgressBar(len(anno_strs))
for i, anno_str in enumerate(anno_strs):
anno_dict = json.loads(anno_str)
parsed_data_info = self.parse_data_info(anno_dict)
data_list.append(parsed_data_info)
prog_bar.update()
if not self.extra_ann_exist and get_rank() == 0:
# TODO: support file client
try:
dump(self.extra_anns, self.extra_ann_file, file_format='json')
except: # noqa
warnings.warn(
'Cache files can not be saved automatically! To speed up'
'loading the dataset, please manually generate the cache'
' file by file tools/misc/get_crowdhuman_id_hw.py')
print_log(
f'\nsave extra_ann_file in {self.data_root}',
level=logging.INFO)
del self.extra_anns
print_log('\nDone', level=logging.INFO)
return data_list
def parse_data_info(self, raw_data_info: dict) -> Union[dict, List[dict]]:
"""Parse raw annotation to target format.
Args:
raw_data_info (dict): Raw data information load from ``ann_file``
Returns:
Union[dict, List[dict]]: Parsed annotation.
"""
data_info = {}
img_path = osp.join(self.data_prefix['img'],
f"{raw_data_info['ID']}.jpg")
data_info['img_path'] = img_path
data_info['img_id'] = raw_data_info['ID']
if not self.extra_ann_exist:
img_bytes = self.file_client.get(img_path)
img = mmcv.imfrombytes(img_bytes, backend='cv2')
data_info['height'], data_info['width'] = img.shape[:2]
self.extra_anns[raw_data_info['ID']] = img.shape[:2]
del img, img_bytes
else:
data_info['height'], data_info['width'] = self.extra_anns[
raw_data_info['ID']]
instances = []
for i, ann in enumerate(raw_data_info['gtboxes']):
instance = {}
if ann['tag'] not in self.metainfo['classes']:
instance['bbox_label'] = -1
instance['ignore_flag'] = 1
else:
instance['bbox_label'] = self.metainfo['classes'].index(
ann['tag'])
instance['ignore_flag'] = 0
if 'extra' in ann:
if 'ignore' in ann['extra']:
if ann['extra']['ignore'] != 0:
instance['bbox_label'] = -1
instance['ignore_flag'] = 1
x1, y1, w, h = ann['fbox']
bbox = [x1, y1, x1 + w, y1 + h]
instance['bbox'] = bbox
# Record the full bbox(fbox), head bbox(hbox) and visible
# bbox(vbox) as additional information. If you need to use
# this information, you just need to design the pipeline
# instead of overriding the CrowdHumanDataset.
instance['fbox'] = bbox
hbox = ann['hbox']
instance['hbox'] = [
hbox[0], hbox[1], hbox[0] + hbox[2], hbox[1] + hbox[3]
]
vbox = ann['vbox']
instance['vbox'] = [
vbox[0], vbox[1], vbox[0] + vbox[2], vbox[1] + vbox[3]
]
instances.append(instance)
data_info['instances'] = instances
return data_info
|