Spaces:

ttxskk
/

AiOS

Running on L40S

AiOS / util /formatting.py

ttxskk

update

d7e58f0 21 days ago

11.5 kB

	from collections.abc import Sequence

	import mmcv
	import numpy as np
	import torch
	from mmcv.parallel import DataContainer as DC
	from PIL import Image


	def to_tensor(data):
	"""Convert objects of various python types to :obj:`torch.Tensor`.

	Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
	:class:`Sequence`, :class:`int` and :class:`float`.
	"""
	if isinstance(data, torch.Tensor):
	return data
	elif isinstance(data, np.ndarray):
	return torch.from_numpy(data)
	elif isinstance(data, Sequence) and not mmcv.is_str(data):
	return torch.tensor(data)
	elif isinstance(data, int):
	return torch.LongTensor([data])
	elif isinstance(data, float):
	return torch.FloatTensor([data])
	else:
	raise TypeError(
	f'Type {type(data)} cannot be converted to tensor.'
	'Supported types are: `numpy.ndarray`, `torch.Tensor`, '
	'`Sequence`, `int` and `float`')


	class ToTensor(object):
	def __init__(self, keys):
	self.keys = keys

	def __call__(self, results):
	for key in self.keys:
	results[key] = to_tensor(results[key])
	return results

	def __repr__(self):
	return self.__class__.__name__ + f'(keys={self.keys})'


	class ImageToTensor(object):
	def __init__(self, keys):
	self.keys = keys

	def __call__(self, results):
	for key in self.keys:
	img = results[key]
	if len(img.shape) < 3:
	img = np.expand_dims(img, -1)
	results[key] = to_tensor(img.transpose(2, 0, 1))
	return results

	def __repr__(self):
	return self.__class__.__name__ + f'(keys={self.keys})'


	class Transpose(object):
	def __init__(self, keys, order):
	self.keys = keys
	self.order = order

	def __call__(self, results):
	for key in self.keys:
	results[key] = results[key].transpose(self.order)
	return results

	def __repr__(self):
	return self.__class__.__name__ + \
	f'(keys={self.keys}, order={self.order})'


	class ToPIL(object):
	def __init__(self):
	pass

	def __call__(self, results):
	results['img'] = Image.fromarray(results['img'])
	return results


	class ToNumpy(object):
	def __init__(self):
	pass

	def __call__(self, results):
	results['img'] = np.array(results['img'], dtype=np.float32)
	return results


	class Collect(object):
	"""Collect data from the loader relevant to the specific task.

	This is usually the last stage of the data loader pipeline. Typically keys
	is set to some subset of "img" and "gt_label".

	Args:
	keys (Sequence[str]): Keys of results to be collected in ``data``.
	meta_keys (Sequence[str], optional): Meta keys to be converted to
	``mmcv.DataContainer`` and collected in ``data[img_metas]``.
	Default: ``('filename', 'ori_shape', 'img_shape', 'flip',
	'flip_direction', 'img_norm_cfg')``

	Returns:
	dict: The result dict contains the following keys
	- keys in``self.keys``
	- ``img_metas`` if available
	"""
	def __init__(self,
	keys,
	meta_keys=('filename', 'ori_filename', 'ori_shape',
	'img_shape', 'flip', 'flip_direction',
	'img_norm_cfg')):
	self.keys = keys
	self.meta_keys = meta_keys

	def __call__(self, results):
	data = {}
	img_meta = {}
	for key in self.meta_keys:
	if key in results:
	img_meta[key] = results[key]
	data['img_metas'] = DC(img_meta, cpu_only=True)
	for key in self.keys:
	data[key] = results[key]
	return data

	def __repr__(self):
	return self.__class__.__name__ + \
	f'(keys={self.keys}, meta_keys={self.meta_keys})'


	class ToDataContainer:
	"""Convert results to :obj:`mmcv.DataContainer` by given fields.

	Args:
	fields (Sequence[dict]): Each field is a dict like
	``dict(key='xxx', **kwargs)``. The ``key`` in result will
	be converted to :obj:`mmcv.DataContainer` with ``**kwargs``.
	Default: ``(dict(key='img', stack=True), dict(key='gt_bboxes'),
	dict(key='gt_labels'))``.
	"""
	def __init__(self,
	fields=(dict(key='img', stack=True), dict(key='gt_bboxes'),
	dict(key='gt_labels'))):
	self.fields = fields

	def __call__(self, results):
	"""Call function to convert data in results to
	:obj:`mmcv.DataContainer`.

	Args:
	results (dict): Result dict contains the data to convert.

	Returns:
	dict: The result dict contains the data converted to \
	:obj:`mmcv.DataContainer`.
	"""

	for field in self.fields:
	field = field.copy()
	key = field.pop('key')
	results[key] = DC(results[key], **field)
	return results

	def __repr__(self):
	return self.__class__.__name__ + f'(fields={self.fields})'


	class DefaultFormatBundle:
	"""Default formatting bundle.

	It simplifies the pipeline of formatting common fields, including "img",
	"proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
	These fields are formatted as follows.

	- img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
	- proposals: (1)to tensor, (2)to DataContainer
	- gt_bboxes: (1)to tensor, (2)to DataContainer
	- gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
	- gt_labels: (1)to tensor, (2)to DataContainer
	- gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True)
	- gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, \
	(3)to DataContainer (stack=True)

	Args:
	img_to_float (bool): Whether to force the image to be converted to
	float type. Default: True.
	pad_val (dict): A dict for padding value in batch collating,
	the default value is `dict(img=0, masks=0, seg=255)`.
	Without this argument, the padding value of "gt_semantic_seg"
	will be set to 0 by default, which should be 255.
	"""
	def __init__(self,
	img_to_float=True,
	pad_val=dict(img=0, masks=0, seg=255)):
	self.img_to_float = img_to_float
	self.pad_val = pad_val

	def __call__(self, results):
	"""Call function to transform and format common fields in results.

	Args:
	results (dict): Result dict contains the data to convert.

	Returns:
	dict: The result dict contains the data that is formatted with \
	default bundle.
	"""
	data_keys = [
	'joint_img', # keypoints2d
	'smplx_joint_img', #smplx_joint_img, # projected smplx if valid cam_param, else same as keypoints2d
	'joint_cam', # joint_cam actually not used in any loss, # raw kps3d probably without ra
	'smplx_joint_cam', # kps3d with body, face, hand ra
	'smplx_pose',
	'smplx_shape',
	'smplx_expr',
	'lhand_bbox_center',
	'lhand_bbox_size',
	'rhand_bbox_center',
	'rhand_bbox_size',
	'face_bbox_center',
	'face_bbox_size',
	'body_bbox_center',
	'body_bbox_size',
	'joint_valid',
	'joint_trunc',
	'smplx_joint_valid',
	'smplx_joint_trunc',
	'smplx_pose_valid',
	'smplx_shape_valid',
	'smplx_expr_valid',
	'is_3D',
	'lhand_bbox_valid',
	'rhand_bbox_valid',
	'face_bbox_valid',
	'body_bbox_valid',
	'body_bbox',
	'lhand_bbox',
	'rhand_bbox',
	'face_bbox',
	'gender',
	'bb2img_trans',
	'img2bb_trans',
	'ann_idx'
	]
	if 'img' in results:
	img = results['img']
	if self.img_to_float is True and img.dtype == np.uint8:
	# Normally, image is of uint8 type without normalization.
	# At this time, it needs to be forced to be converted to
	# flot32, otherwise the model training and inference
	# will be wrong. Only used for YOLOX currently .
	img = img.astype(np.float32)
	# add default meta keys


	results = self._add_default_meta_keys(results)
	if len(img.shape) < 3:
	img = np.expand_dims(img, -1)

	img = np.ascontiguousarray(img.transpose(2, 0, 1))
	results['img'] = DC(to_tensor(img),
	padding_value=self.pad_val['img'],
	stack=True)
	for key in data_keys:
	if key not in results:
	continue
	results[key] = DC(to_tensor(results[key]))
	# if 'gt_masks' in results:
	# results['gt_masks'] = DC(
	# results['gt_masks'],
	# padding_value=self.pad_val['masks'],
	# cpu_only=True)
	# if 'gt_semantic_seg' in results:
	# results['gt_semantic_seg'] = DC(
	# to_tensor(results['gt_semantic_seg'][None, ...]),
	# padding_value=self.pad_val['seg'],
	# stack=True)
	return results

	def _add_default_meta_keys(self, results):
	"""Add default meta keys.

	We set default meta keys including `pad_shape`, `scale_factor` and
	`img_norm_cfg` to avoid the case where no `Resize`, `Normalize` and
	`Pad` are implemented during the whole pipeline.

	Args:
	results (dict): Result dict contains the data to convert.

	Returns:
	results (dict): Updated result dict contains the data to convert.
	"""
	img = results['img']
	results.setdefault('pad_shape', img.shape)
	results.setdefault('scale_factor', 1.0)
	num_channels = 1 if len(img.shape) < 3 else img.shape[2]
	results.setdefault(
	'img_norm_cfg',
	dict(mean=np.zeros(num_channels, dtype=np.float32),
	std=np.ones(num_channels, dtype=np.float32),
	to_rgb=False))
	return results

	def __repr__(self):
	return self.__class__.__name__ + \
	f'(img_to_float={self.img_to_float})'


	class WrapFieldsToLists(object):
	"""Wrap fields of the data dictionary into lists for evaluation.

	This class can be used as a last step of a test or validation
	pipeline for single image evaluation or inference.

	Example:
	>>> test_pipeline = [
	>>> dict(type='LoadImageFromFile'),
	>>> dict(type='Normalize',
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.12, 57.375],
	to_rgb=True),
	>>> dict(type='ImageToTensor', keys=['img']),
	>>> dict(type='Collect', keys=['img']),
	>>> dict(type='WrapIntoLists')
	>>> ]
	"""
	def __call__(self, results):
	# Wrap dict fields into lists
	for key, val in results.items():
	results[key] = [val]
	return results

	def __repr__(self):
	return f'{self.__class__.__name__}()'