AiOS / util /formatting.py
ttxskk
update
d7e58f0
from collections.abc import Sequence
import mmcv
import numpy as np
import torch
from mmcv.parallel import DataContainer as DC
from PIL import Image
def to_tensor(data):
"""Convert objects of various python types to :obj:`torch.Tensor`.
Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
:class:`Sequence`, :class:`int` and :class:`float`.
"""
if isinstance(data, torch.Tensor):
return data
elif isinstance(data, np.ndarray):
return torch.from_numpy(data)
elif isinstance(data, Sequence) and not mmcv.is_str(data):
return torch.tensor(data)
elif isinstance(data, int):
return torch.LongTensor([data])
elif isinstance(data, float):
return torch.FloatTensor([data])
else:
raise TypeError(
f'Type {type(data)} cannot be converted to tensor.'
'Supported types are: `numpy.ndarray`, `torch.Tensor`, '
'`Sequence`, `int` and `float`')
class ToTensor(object):
def __init__(self, keys):
self.keys = keys
def __call__(self, results):
for key in self.keys:
results[key] = to_tensor(results[key])
return results
def __repr__(self):
return self.__class__.__name__ + f'(keys={self.keys})'
class ImageToTensor(object):
def __init__(self, keys):
self.keys = keys
def __call__(self, results):
for key in self.keys:
img = results[key]
if len(img.shape) < 3:
img = np.expand_dims(img, -1)
results[key] = to_tensor(img.transpose(2, 0, 1))
return results
def __repr__(self):
return self.__class__.__name__ + f'(keys={self.keys})'
class Transpose(object):
def __init__(self, keys, order):
self.keys = keys
self.order = order
def __call__(self, results):
for key in self.keys:
results[key] = results[key].transpose(self.order)
return results
def __repr__(self):
return self.__class__.__name__ + \
f'(keys={self.keys}, order={self.order})'
class ToPIL(object):
def __init__(self):
pass
def __call__(self, results):
results['img'] = Image.fromarray(results['img'])
return results
class ToNumpy(object):
def __init__(self):
pass
def __call__(self, results):
results['img'] = np.array(results['img'], dtype=np.float32)
return results
class Collect(object):
"""Collect data from the loader relevant to the specific task.
This is usually the last stage of the data loader pipeline. Typically keys
is set to some subset of "img" and "gt_label".
Args:
keys (Sequence[str]): Keys of results to be collected in ``data``.
meta_keys (Sequence[str], optional): Meta keys to be converted to
``mmcv.DataContainer`` and collected in ``data[img_metas]``.
Default: ``('filename', 'ori_shape', 'img_shape', 'flip',
'flip_direction', 'img_norm_cfg')``
Returns:
dict: The result dict contains the following keys
- keys in``self.keys``
- ``img_metas`` if available
"""
def __init__(self,
keys,
meta_keys=('filename', 'ori_filename', 'ori_shape',
'img_shape', 'flip', 'flip_direction',
'img_norm_cfg')):
self.keys = keys
self.meta_keys = meta_keys
def __call__(self, results):
data = {}
img_meta = {}
for key in self.meta_keys:
if key in results:
img_meta[key] = results[key]
data['img_metas'] = DC(img_meta, cpu_only=True)
for key in self.keys:
data[key] = results[key]
return data
def __repr__(self):
return self.__class__.__name__ + \
f'(keys={self.keys}, meta_keys={self.meta_keys})'
class ToDataContainer:
"""Convert results to :obj:`mmcv.DataContainer` by given fields.
Args:
fields (Sequence[dict]): Each field is a dict like
``dict(key='xxx', **kwargs)``. The ``key`` in result will
be converted to :obj:`mmcv.DataContainer` with ``**kwargs``.
Default: ``(dict(key='img', stack=True), dict(key='gt_bboxes'),
dict(key='gt_labels'))``.
"""
def __init__(self,
fields=(dict(key='img', stack=True), dict(key='gt_bboxes'),
dict(key='gt_labels'))):
self.fields = fields
def __call__(self, results):
"""Call function to convert data in results to
:obj:`mmcv.DataContainer`.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data converted to \
:obj:`mmcv.DataContainer`.
"""
for field in self.fields:
field = field.copy()
key = field.pop('key')
results[key] = DC(results[key], **field)
return results
def __repr__(self):
return self.__class__.__name__ + f'(fields={self.fields})'
class DefaultFormatBundle:
"""Default formatting bundle.
It simplifies the pipeline of formatting common fields, including "img",
"proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
These fields are formatted as follows.
- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
- proposals: (1)to tensor, (2)to DataContainer
- gt_bboxes: (1)to tensor, (2)to DataContainer
- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
- gt_labels: (1)to tensor, (2)to DataContainer
- gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)
- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, \
(3)to DataContainer (stack=True)
Args:
img_to_float (bool): Whether to force the image to be converted to
float type. Default: True.
pad_val (dict): A dict for padding value in batch collating,
the default value is `dict(img=0, masks=0, seg=255)`.
Without this argument, the padding value of "gt_semantic_seg"
will be set to 0 by default, which should be 255.
"""
def __init__(self,
img_to_float=True,
pad_val=dict(img=0, masks=0, seg=255)):
self.img_to_float = img_to_float
self.pad_val = pad_val
def __call__(self, results):
"""Call function to transform and format common fields in results.
Args:
results (dict): Result dict contains the data to convert.
Returns:
dict: The result dict contains the data that is formatted with \
default bundle.
"""
data_keys = [
'joint_img', # keypoints2d
'smplx_joint_img', #smplx_joint_img, # projected smplx if valid cam_param, else same as keypoints2d
'joint_cam', # joint_cam actually not used in any loss, # raw kps3d probably without ra
'smplx_joint_cam', # kps3d with body, face, hand ra
'smplx_pose',
'smplx_shape',
'smplx_expr',
'lhand_bbox_center',
'lhand_bbox_size',
'rhand_bbox_center',
'rhand_bbox_size',
'face_bbox_center',
'face_bbox_size',
'body_bbox_center',
'body_bbox_size',
'joint_valid',
'joint_trunc',
'smplx_joint_valid',
'smplx_joint_trunc',
'smplx_pose_valid',
'smplx_shape_valid',
'smplx_expr_valid',
'is_3D',
'lhand_bbox_valid',
'rhand_bbox_valid',
'face_bbox_valid',
'body_bbox_valid',
'body_bbox',
'lhand_bbox',
'rhand_bbox',
'face_bbox',
'gender',
'bb2img_trans',
'img2bb_trans',
'ann_idx'
]
if 'img' in results:
img = results['img']
if self.img_to_float is True and img.dtype == np.uint8:
# Normally, image is of uint8 type without normalization.
# At this time, it needs to be forced to be converted to
# flot32, otherwise the model training and inference
# will be wrong. Only used for YOLOX currently .
img = img.astype(np.float32)
# add default meta keys
results = self._add_default_meta_keys(results)
if len(img.shape) < 3:
img = np.expand_dims(img, -1)
img = np.ascontiguousarray(img.transpose(2, 0, 1))
results['img'] = DC(to_tensor(img),
padding_value=self.pad_val['img'],
stack=True)
for key in data_keys:
if key not in results:
continue
results[key] = DC(to_tensor(results[key]))
# if 'gt_masks' in results:
# results['gt_masks'] = DC(
# results['gt_masks'],
# padding_value=self.pad_val['masks'],
# cpu_only=True)
# if 'gt_semantic_seg' in results:
# results['gt_semantic_seg'] = DC(
# to_tensor(results['gt_semantic_seg'][None, ...]),
# padding_value=self.pad_val['seg'],
# stack=True)
return results
def _add_default_meta_keys(self, results):
"""Add default meta keys.
We set default meta keys including `pad_shape`, `scale_factor` and
`img_norm_cfg` to avoid the case where no `Resize`, `Normalize` and
`Pad` are implemented during the whole pipeline.
Args:
results (dict): Result dict contains the data to convert.
Returns:
results (dict): Updated result dict contains the data to convert.
"""
img = results['img']
results.setdefault('pad_shape', img.shape)
results.setdefault('scale_factor', 1.0)
num_channels = 1 if len(img.shape) < 3 else img.shape[2]
results.setdefault(
'img_norm_cfg',
dict(mean=np.zeros(num_channels, dtype=np.float32),
std=np.ones(num_channels, dtype=np.float32),
to_rgb=False))
return results
def __repr__(self):
return self.__class__.__name__ + \
f'(img_to_float={self.img_to_float})'
class WrapFieldsToLists(object):
"""Wrap fields of the data dictionary into lists for evaluation.
This class can be used as a last step of a test or validation
pipeline for single image evaluation or inference.
Example:
>>> test_pipeline = [
>>> dict(type='LoadImageFromFile'),
>>> dict(type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
>>> dict(type='ImageToTensor', keys=['img']),
>>> dict(type='Collect', keys=['img']),
>>> dict(type='WrapIntoLists')
>>> ]
"""
def __call__(self, results):
# Wrap dict fields into lists
for key, val in results.items():
results[key] = [val]
return results
def __repr__(self):
return f'{self.__class__.__name__}()'