Spaces:

KyanChen
/

ai-photo-gallery

Runtime error

App Files Files Community

ai-photo-gallery / mmdet /datasets /transforms /transforms.py

KyanChen

init

f549064 almost 2 years ago

raw

history blame

141 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import copy
	import inspect
	import math
	from typing import List, Optional, Sequence, Tuple, Union

	import cv2
	import mmcv
	import numpy as np
	from mmcv.image.geometric import _scale_size
	from mmcv.transforms import BaseTransform
	from mmcv.transforms import Pad as MMCV_Pad
	from mmcv.transforms import RandomFlip as MMCV_RandomFlip
	from mmcv.transforms import Resize as MMCV_Resize
	from mmcv.transforms.utils import avoid_cache_randomness, cache_randomness
	from mmengine.dataset import BaseDataset
	from mmengine.utils import is_str
	from numpy import random

	from mmdet.registry import TRANSFORMS
	from mmdet.structures.bbox import HorizontalBoxes, autocast_box_type
	from mmdet.structures.mask import BitmapMasks, PolygonMasks
	from mmdet.utils import log_img_scale

	try:
	from imagecorruptions import corrupt
	except ImportError:
	corrupt = None

	try:
	import albumentations
	from albumentations import Compose
	except ImportError:
	albumentations = None
	Compose = None

	Number = Union[int, float]


	@TRANSFORMS.register_module()
	class Resize(MMCV_Resize):
	"""Resize images & bbox & seg.

	This transform resizes the input image according to ``scale`` or
	``scale_factor``. Bboxes, masks, and seg map are then resized
	with the same scale factor.
	if ``scale`` and ``scale_factor`` are both set, it will use ``scale`` to
	resize.

	Required Keys:

	- img
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_masks (BitmapMasks \| PolygonMasks) (optional)
	- gt_seg_map (np.uint8) (optional)

	Modified Keys:

	- img
	- img_shape
	- gt_bboxes
	- gt_masks
	- gt_seg_map


	Added Keys:

	- scale
	- scale_factor
	- keep_ratio
	- homography_matrix

	Args:
	scale (int or tuple): Images scales for resizing. Defaults to None
	scale_factor (float or tuple[float]): Scale factors for resizing.
	Defaults to None.
	keep_ratio (bool): Whether to keep the aspect ratio when resizing the
	image. Defaults to False.
	clip_object_border (bool): Whether to clip the objects
	outside the border of the image. In some dataset like MOT17, the gt
	bboxes are allowed to cross the border of images. Therefore, we
	don't need to clip the gt bboxes in these cases. Defaults to True.
	backend (str): Image resize backend, choices are 'cv2' and 'pillow'.
	These two backends generates slightly different results. Defaults
	to 'cv2'.
	interpolation (str): Interpolation method, accepted values are
	"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
	backend, "nearest", "bilinear" for 'pillow' backend. Defaults
	to 'bilinear'.
	"""

	def _resize_masks(self, results: dict) -> None:
	"""Resize masks with ``results['scale']``"""
	if results.get('gt_masks', None) is not None:
	if self.keep_ratio:
	results['gt_masks'] = results['gt_masks'].rescale(
	results['scale'])
	else:
	results['gt_masks'] = results['gt_masks'].resize(
	results['img_shape'])

	def _resize_bboxes(self, results: dict) -> None:
	"""Resize bounding boxes with ``results['scale_factor']``."""
	if results.get('gt_bboxes', None) is not None:
	results['gt_bboxes'].rescale_(results['scale_factor'])
	if self.clip_object_border:
	results['gt_bboxes'].clip_(results['img_shape'])

	def _resize_seg(self, results: dict) -> None:
	"""Resize semantic segmentation map with ``results['scale']``."""
	if results.get('gt_seg_map', None) is not None:
	if self.keep_ratio:
	gt_seg = mmcv.imrescale(
	results['gt_seg_map'],
	results['scale'],
	interpolation='nearest',
	backend=self.backend)
	else:
	gt_seg = mmcv.imresize(
	results['gt_seg_map'],
	results['scale'],
	interpolation='nearest',
	backend=self.backend)
	results['gt_seg_map'] = gt_seg

	def _record_homography_matrix(self, results: dict) -> None:
	"""Record the homography matrix for the Resize."""
	w_scale, h_scale = results['scale_factor']
	homography_matrix = np.array(
	[[w_scale, 0, 0], [0, h_scale, 0], [0, 0, 1]], dtype=np.float32)
	if results.get('homography_matrix', None) is None:
	results['homography_matrix'] = homography_matrix
	else:
	results['homography_matrix'] = homography_matrix @ results[
	'homography_matrix']

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""Transform function to resize images, bounding boxes and semantic
	segmentation map.

	Args:
	results (dict): Result dict from loading pipeline.
	Returns:
	dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map',
	'scale', 'scale_factor', 'height', 'width', and 'keep_ratio' keys
	are updated in result dict.
	"""
	if self.scale:
	results['scale'] = self.scale
	else:
	img_shape = results['img'].shape[:2]
	results['scale'] = _scale_size(img_shape[::-1], self.scale_factor)
	self._resize_img(results)
	self._resize_bboxes(results)
	self._resize_masks(results)
	self._resize_seg(results)
	self._record_homography_matrix(results)
	return results

	def __repr__(self) -> str:
	repr_str = self.__class__.__name__
	repr_str += f'(scale={self.scale}, '
	repr_str += f'scale_factor={self.scale_factor}, '
	repr_str += f'keep_ratio={self.keep_ratio}, '
	repr_str += f'clip_object_border={self.clip_object_border}), '
	repr_str += f'backend={self.backend}), '
	repr_str += f'interpolation={self.interpolation})'
	return repr_str


	@TRANSFORMS.register_module()
	class FixShapeResize(Resize):
	"""Resize images & bbox & seg to the specified size.

	This transform resizes the input image according to ``width`` and
	``height``. Bboxes, masks, and seg map are then resized
	with the same parameters.

	Required Keys:

	- img
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_masks (BitmapMasks \| PolygonMasks) (optional)
	- gt_seg_map (np.uint8) (optional)

	Modified Keys:

	- img
	- img_shape
	- gt_bboxes
	- gt_masks
	- gt_seg_map


	Added Keys:

	- scale
	- scale_factor
	- keep_ratio
	- homography_matrix

	Args:
	width (int): width for resizing.
	height (int): height for resizing.
	Defaults to None.
	pad_val (Number \| dict[str, Number], optional): Padding value for if
	the pad_mode is "constant". If it is a single number, the value
	to pad the image is the number and to pad the semantic
	segmentation map is 255. If it is a dict, it should have the
	following keys:

	- img: The value to pad the image.
	- seg: The value to pad the semantic segmentation map.
	Defaults to dict(img=0, seg=255).
	keep_ratio (bool): Whether to keep the aspect ratio when resizing the
	image. Defaults to False.
	clip_object_border (bool): Whether to clip the objects
	outside the border of the image. In some dataset like MOT17, the gt
	bboxes are allowed to cross the border of images. Therefore, we
	don't need to clip the gt bboxes in these cases. Defaults to True.
	backend (str): Image resize backend, choices are 'cv2' and 'pillow'.
	These two backends generates slightly different results. Defaults
	to 'cv2'.
	interpolation (str): Interpolation method, accepted values are
	"nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
	backend, "nearest", "bilinear" for 'pillow' backend. Defaults
	to 'bilinear'.
	"""

	def __init__(self,
	width: int,
	height: int,
	pad_val: Union[Number, dict] = dict(img=0, seg=255),
	keep_ratio: bool = False,
	clip_object_border: bool = True,
	backend: str = 'cv2',
	interpolation: str = 'bilinear') -> None:
	assert width is not None and height is not None, (
	'`width` and'
	'`height` can not be `None`')

	self.width = width
	self.height = height
	self.scale = (width, height)

	self.backend = backend
	self.interpolation = interpolation
	self.keep_ratio = keep_ratio
	self.clip_object_border = clip_object_border

	if keep_ratio is True:
	# padding to the fixed size when keep_ratio=True
	self.pad_transform = Pad(size=self.scale, pad_val=pad_val)

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""Transform function to resize images, bounding boxes and semantic
	segmentation map.

	Args:
	results (dict): Result dict from loading pipeline.
	Returns:
	dict: Resized results, 'img', 'gt_bboxes', 'gt_seg_map',
	'scale', 'scale_factor', 'height', 'width', and 'keep_ratio' keys
	are updated in result dict.
	"""
	img = results['img']
	h, w = img.shape[:2]
	if self.keep_ratio:
	scale_factor = min(self.width / w, self.height / h)
	results['scale_factor'] = (scale_factor, scale_factor)
	real_w, real_h = int(w * float(scale_factor) +
	0.5), int(h * float(scale_factor) + 0.5)
	img, scale_factor = mmcv.imrescale(
	results['img'], (real_w, real_h),
	interpolation=self.interpolation,
	return_scale=True,
	backend=self.backend)
	# the w_scale and h_scale has minor difference
	# a real fix should be done in the mmcv.imrescale in the future
	results['img'] = img
	results['img_shape'] = img.shape[:2]
	results['keep_ratio'] = self.keep_ratio
	results['scale'] = (real_w, real_h)
	else:
	results['scale'] = (self.width, self.height)
	results['scale_factor'] = (self.width / w, self.height / h)
	super()._resize_img(results)

	self._resize_bboxes(results)
	self._resize_masks(results)
	self._resize_seg(results)
	self._record_homography_matrix(results)
	if self.keep_ratio:
	self.pad_transform(results)
	return results

	def __repr__(self) -> str:
	repr_str = self.__class__.__name__
	repr_str += f'(width={self.width}, height={self.height}, '
	repr_str += f'keep_ratio={self.keep_ratio}, '
	repr_str += f'clip_object_border={self.clip_object_border}), '
	repr_str += f'backend={self.backend}), '
	repr_str += f'interpolation={self.interpolation})'
	return repr_str


	@TRANSFORMS.register_module()
	class RandomFlip(MMCV_RandomFlip):
	"""Flip the image & bbox & mask & segmentation map. Added or Updated keys:
	flip, flip_direction, img, gt_bboxes, and gt_seg_map. There are 3 flip
	modes:

	- ``prob`` is float, ``direction`` is string: the image will be
	``direction``ly flipped with probability of ``prob`` .
	E.g., ``prob=0.5``, ``direction='horizontal'``,
	then image will be horizontally flipped with probability of 0.5.
	- ``prob`` is float, ``direction`` is list of string: the image will
	be ``direction[i]``ly flipped with probability of
	``prob/len(direction)``.
	E.g., ``prob=0.5``, ``direction=['horizontal', 'vertical']``,
	then image will be horizontally flipped with probability of 0.25,
	vertically with probability of 0.25.
	- ``prob`` is list of float, ``direction`` is list of string:
	given ``len(prob) == len(direction)``, the image will
	be ``direction[i]``ly flipped with probability of ``prob[i]``.
	E.g., ``prob=[0.3, 0.5]``, ``direction=['horizontal',
	'vertical']``, then image will be horizontally flipped with
	probability of 0.3, vertically with probability of 0.5.


	Required Keys:

	- img
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_masks (BitmapMasks \| PolygonMasks) (optional)
	- gt_seg_map (np.uint8) (optional)

	Modified Keys:

	- img
	- gt_bboxes
	- gt_masks
	- gt_seg_map

	Added Keys:

	- flip
	- flip_direction
	- homography_matrix


	Args:
	prob (float \| list[float], optional): The flipping probability.
	Defaults to None.
	direction(str \| list[str]): The flipping direction. Options
	If input is a list, the length must equal ``prob``. Each
	element in ``prob`` indicates the flip probability of
	corresponding direction. Defaults to 'horizontal'.
	"""

	def _record_homography_matrix(self, results: dict) -> None:
	"""Record the homography matrix for the RandomFlip."""
	cur_dir = results['flip_direction']
	h, w = results['img'].shape[:2]

	if cur_dir == 'horizontal':
	homography_matrix = np.array([[-1, 0, w], [0, 1, 0], [0, 0, 1]],
	dtype=np.float32)
	elif cur_dir == 'vertical':
	homography_matrix = np.array([[1, 0, 0], [0, -1, h], [0, 0, 1]],
	dtype=np.float32)
	elif cur_dir == 'diagonal':
	homography_matrix = np.array([[-1, 0, w], [0, -1, h], [0, 0, 1]],
	dtype=np.float32)
	else:
	homography_matrix = np.eye(3, dtype=np.float32)

	if results.get('homography_matrix', None) is None:
	results['homography_matrix'] = homography_matrix
	else:
	results['homography_matrix'] = homography_matrix @ results[
	'homography_matrix']

	@autocast_box_type()
	def _flip(self, results: dict) -> None:
	"""Flip images, bounding boxes, and semantic segmentation map."""
	# flip image
	results['img'] = mmcv.imflip(
	results['img'], direction=results['flip_direction'])

	img_shape = results['img'].shape[:2]

	# flip bboxes
	if results.get('gt_bboxes', None) is not None:
	results['gt_bboxes'].flip_(img_shape, results['flip_direction'])

	# flip masks
	if results.get('gt_masks', None) is not None:
	results['gt_masks'] = results['gt_masks'].flip(
	results['flip_direction'])

	# flip segs
	if results.get('gt_seg_map', None) is not None:
	results['gt_seg_map'] = mmcv.imflip(
	results['gt_seg_map'], direction=results['flip_direction'])

	# record homography matrix for flip
	self._record_homography_matrix(results)


	@TRANSFORMS.register_module()
	class RandomShift(BaseTransform):
	"""Shift the image and box given shift pixels and probability.

	Required Keys:

	- img
	- gt_bboxes (BaseBoxes[torch.float32])
	- gt_bboxes_labels (np.int64)
	- gt_ignore_flags (bool) (optional)

	Modified Keys:

	- img
	- gt_bboxes
	- gt_bboxes_labels
	- gt_ignore_flags (bool) (optional)

	Args:
	prob (float): Probability of shifts. Defaults to 0.5.
	max_shift_px (int): The max pixels for shifting. Defaults to 32.
	filter_thr_px (int): The width and height threshold for filtering.
	The bbox and the rest of the targets below the width and
	height threshold will be filtered. Defaults to 1.
	"""

	def __init__(self,
	prob: float = 0.5,
	max_shift_px: int = 32,
	filter_thr_px: int = 1) -> None:
	assert 0 <= prob <= 1
	assert max_shift_px >= 0
	self.prob = prob
	self.max_shift_px = max_shift_px
	self.filter_thr_px = int(filter_thr_px)

	@cache_randomness
	def _random_prob(self) -> float:
	return random.uniform(0, 1)

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""Transform function to random shift images, bounding boxes.

	Args:
	results (dict): Result dict from loading pipeline.

	Returns:
	dict: Shift results.
	"""
	if self._random_prob() < self.prob:
	img_shape = results['img'].shape[:2]

	random_shift_x = random.randint(-self.max_shift_px,
	self.max_shift_px)
	random_shift_y = random.randint(-self.max_shift_px,
	self.max_shift_px)
	new_x = max(0, random_shift_x)
	ori_x = max(0, -random_shift_x)
	new_y = max(0, random_shift_y)
	ori_y = max(0, -random_shift_y)

	# TODO: support mask and semantic segmentation maps.
	bboxes = results['gt_bboxes'].clone()
	bboxes.translate_([random_shift_x, random_shift_y])

	# clip border
	bboxes.clip_(img_shape)

	# remove invalid bboxes
	valid_inds = (bboxes.widths > self.filter_thr_px).numpy() & (
	bboxes.heights > self.filter_thr_px).numpy()
	# If the shift does not contain any gt-bbox area, skip this
	# image.
	if not valid_inds.any():
	return results
	bboxes = bboxes[valid_inds]
	results['gt_bboxes'] = bboxes
	results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
	valid_inds]

	if results.get('gt_ignore_flags', None) is not None:
	results['gt_ignore_flags'] = \
	results['gt_ignore_flags'][valid_inds]

	# shift img
	img = results['img']
	new_img = np.zeros_like(img)
	img_h, img_w = img.shape[:2]
	new_h = img_h - np.abs(random_shift_y)
	new_w = img_w - np.abs(random_shift_x)
	new_img[new_y:new_y + new_h, new_x:new_x + new_w] \
	= img[ori_y:ori_y + new_h, ori_x:ori_x + new_w]
	results['img'] = new_img

	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(prob={self.prob}, '
	repr_str += f'max_shift_px={self.max_shift_px}, '
	repr_str += f'filter_thr_px={self.filter_thr_px})'
	return repr_str


	@TRANSFORMS.register_module()
	class Pad(MMCV_Pad):
	"""Pad the image & segmentation map.

	There are three padding modes: (1) pad to a fixed size and (2) pad to the
	minimum size that is divisible by some number. and (3)pad to square. Also,
	pad to square and pad to the minimum size can be used as the same time.

	Required Keys:

	- img
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_masks (BitmapMasks \| PolygonMasks) (optional)
	- gt_seg_map (np.uint8) (optional)

	Modified Keys:

	- img
	- img_shape
	- gt_masks
	- gt_seg_map

	Added Keys:

	- pad_shape
	- pad_fixed_size
	- pad_size_divisor

	Args:
	size (tuple, optional): Fixed padding size.
	Expected padding shape (width, height). Defaults to None.
	size_divisor (int, optional): The divisor of padded size. Defaults to
	None.
	pad_to_square (bool): Whether to pad the image into a square.
	Currently only used for YOLOX. Defaults to False.
	pad_val (Number \| dict[str, Number], optional) - Padding value for if
	the pad_mode is "constant". If it is a single number, the value
	to pad the image is the number and to pad the semantic
	segmentation map is 255. If it is a dict, it should have the
	following keys:

	- img: The value to pad the image.
	- seg: The value to pad the semantic segmentation map.
	Defaults to dict(img=0, seg=255).
	padding_mode (str): Type of padding. Should be: constant, edge,
	reflect or symmetric. Defaults to 'constant'.

	- constant: pads with a constant value, this value is specified
	with pad_val.
	- edge: pads with the last value at the edge of the image.
	- reflect: pads with reflection of image without repeating the last
	value on the edge. For example, padding [1, 2, 3, 4] with 2
	elements on both sides in reflect mode will result in
	[3, 2, 1, 2, 3, 4, 3, 2].
	- symmetric: pads with reflection of image repeating the last value
	on the edge. For example, padding [1, 2, 3, 4] with 2 elements on
	both sides in symmetric mode will result in
	[2, 1, 1, 2, 3, 4, 4, 3]
	"""

	def _pad_masks(self, results: dict) -> None:
	"""Pad masks according to ``results['pad_shape']``."""
	if results.get('gt_masks', None) is not None:
	pad_val = self.pad_val.get('masks', 0)
	pad_shape = results['pad_shape'][:2]
	results['gt_masks'] = results['gt_masks'].pad(
	pad_shape, pad_val=pad_val)

	def transform(self, results: dict) -> dict:
	"""Call function to pad images, masks, semantic segmentation maps.

	Args:
	results (dict): Result dict from loading pipeline.

	Returns:
	dict: Updated result dict.
	"""
	self._pad_img(results)
	self._pad_seg(results)
	self._pad_masks(results)
	return results


	@TRANSFORMS.register_module()
	class RandomCrop(BaseTransform):
	"""Random crop the image & bboxes & masks.

	The absolute ``crop_size`` is sampled based on ``crop_type`` and
	``image_size``, then the cropped results are generated.

	Required Keys:

	- img
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_masks (BitmapMasks \| PolygonMasks) (optional)
	- gt_ignore_flags (bool) (optional)
	- gt_seg_map (np.uint8) (optional)

	Modified Keys:

	- img
	- img_shape
	- gt_bboxes (optional)
	- gt_bboxes_labels (optional)
	- gt_masks (optional)
	- gt_ignore_flags (optional)
	- gt_seg_map (optional)

	Added Keys:

	- homography_matrix

	Args:
	crop_size (tuple): The relative ratio or absolute pixels of
	(width, height).
	crop_type (str, optional): One of "relative_range", "relative",
	"absolute", "absolute_range". "relative" randomly crops
	(h * crop_size[0], w * crop_size[1]) part from an input of size
	(h, w). "relative_range" uniformly samples relative crop size from
	range [crop_size[0], 1] and [crop_size[1], 1] for height and width
	respectively. "absolute" crops from an input with absolute size
	(crop_size[0], crop_size[1]). "absolute_range" uniformly samples
	crop_h in range [crop_size[0], min(h, crop_size[1])] and crop_w
	in range [crop_size[0], min(w, crop_size[1])].
	Defaults to "absolute".
	allow_negative_crop (bool, optional): Whether to allow a crop that does
	not contain any bbox area. Defaults to False.
	recompute_bbox (bool, optional): Whether to re-compute the boxes based
	on cropped instance masks. Defaults to False.
	bbox_clip_border (bool, optional): Whether clip the objects outside
	the border of the image. Defaults to True.

	Note:
	- If the image is smaller than the absolute crop size, return the
	original image.
	- The keys for bboxes, labels and masks must be aligned. That is,
	``gt_bboxes`` corresponds to ``gt_labels`` and ``gt_masks``, and
	``gt_bboxes_ignore`` corresponds to ``gt_labels_ignore`` and
	``gt_masks_ignore``.
	- If the crop does not contain any gt-bbox region and
	``allow_negative_crop`` is set to False, skip this image.
	"""

	def __init__(self,
	crop_size: tuple,
	crop_type: str = 'absolute',
	allow_negative_crop: bool = False,
	recompute_bbox: bool = False,
	bbox_clip_border: bool = True) -> None:
	if crop_type not in [
	'relative_range', 'relative', 'absolute', 'absolute_range'
	]:
	raise ValueError(f'Invalid crop_type {crop_type}.')
	if crop_type in ['absolute', 'absolute_range']:
	assert crop_size[0] > 0 and crop_size[1] > 0
	assert isinstance(crop_size[0], int) and isinstance(
	crop_size[1], int)
	if crop_type == 'absolute_range':
	assert crop_size[0] <= crop_size[1]
	else:
	assert 0 < crop_size[0] <= 1 and 0 < crop_size[1] <= 1
	self.crop_size = crop_size
	self.crop_type = crop_type
	self.allow_negative_crop = allow_negative_crop
	self.bbox_clip_border = bbox_clip_border
	self.recompute_bbox = recompute_bbox

	def _crop_data(self, results: dict, crop_size: Tuple[int, int],
	allow_negative_crop: bool) -> Union[dict, None]:
	"""Function to randomly crop images, bounding boxes, masks, semantic
	segmentation maps.

	Args:
	results (dict): Result dict from loading pipeline.
	crop_size (Tuple[int, int]): Expected absolute size after
	cropping, (h, w).
	allow_negative_crop (bool): Whether to allow a crop that does not
	contain any bbox area.

	Returns:
	results (Union[dict, None]): Randomly cropped results, 'img_shape'
	key in result dict is updated according to crop size. None will
	be returned when there is no valid bbox after cropping.
	"""
	assert crop_size[0] > 0 and crop_size[1] > 0
	img = results['img']
	margin_h = max(img.shape[0] - crop_size[0], 0)
	margin_w = max(img.shape[1] - crop_size[1], 0)
	offset_h, offset_w = self._rand_offset((margin_h, margin_w))
	crop_y1, crop_y2 = offset_h, offset_h + crop_size[0]
	crop_x1, crop_x2 = offset_w, offset_w + crop_size[1]

	# Record the homography matrix for the RandomCrop
	homography_matrix = np.array(
	[[1, 0, -offset_w], [0, 1, -offset_h], [0, 0, 1]],
	dtype=np.float32)
	if results.get('homography_matrix', None) is None:
	results['homography_matrix'] = homography_matrix
	else:
	results['homography_matrix'] = homography_matrix @ results[
	'homography_matrix']

	# crop the image
	img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...]
	img_shape = img.shape
	results['img'] = img
	results['img_shape'] = img_shape

	# crop bboxes accordingly and clip to the image boundary
	if results.get('gt_bboxes', None) is not None:
	bboxes = results['gt_bboxes']
	bboxes.translate_([-offset_w, -offset_h])
	if self.bbox_clip_border:
	bboxes.clip_(img_shape[:2])
	valid_inds = bboxes.is_inside(img_shape[:2]).numpy()
	# If the crop does not contain any gt-bbox area and
	# allow_negative_crop is False, skip this image.
	if (not valid_inds.any() and not allow_negative_crop):
	return None

	results['gt_bboxes'] = bboxes[valid_inds]

	if results.get('gt_ignore_flags', None) is not None:
	results['gt_ignore_flags'] = \
	results['gt_ignore_flags'][valid_inds]

	if results.get('gt_bboxes_labels', None) is not None:
	results['gt_bboxes_labels'] = \
	results['gt_bboxes_labels'][valid_inds]

	if results.get('gt_masks', None) is not None:
	results['gt_masks'] = results['gt_masks'][
	valid_inds.nonzero()[0]].crop(
	np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]))
	if self.recompute_bbox:
	results['gt_bboxes'] = results['gt_masks'].get_bboxes(
	type(results['gt_bboxes']))

	# crop semantic seg
	if results.get('gt_seg_map', None) is not None:
	results['gt_seg_map'] = results['gt_seg_map'][crop_y1:crop_y2,
	crop_x1:crop_x2]

	return results

	@cache_randomness
	def _rand_offset(self, margin: Tuple[int, int]) -> Tuple[int, int]:
	"""Randomly generate crop offset.

	Args:
	margin (Tuple[int, int]): The upper bound for the offset generated
	randomly.

	Returns:
	Tuple[int, int]: The random offset for the crop.
	"""
	margin_h, margin_w = margin
	offset_h = np.random.randint(0, margin_h + 1)
	offset_w = np.random.randint(0, margin_w + 1)

	return offset_h, offset_w

	@cache_randomness
	def _get_crop_size(self, image_size: Tuple[int, int]) -> Tuple[int, int]:
	"""Randomly generates the absolute crop size based on `crop_type` and
	`image_size`.

	Args:
	image_size (Tuple[int, int]): (h, w).

	Returns:
	crop_size (Tuple[int, int]): (crop_h, crop_w) in absolute pixels.
	"""
	h, w = image_size
	if self.crop_type == 'absolute':
	return min(self.crop_size[1], h), min(self.crop_size[0], w)
	elif self.crop_type == 'absolute_range':
	crop_h = np.random.randint(
	min(h, self.crop_size[0]),
	min(h, self.crop_size[1]) + 1)
	crop_w = np.random.randint(
	min(w, self.crop_size[0]),
	min(w, self.crop_size[1]) + 1)
	return crop_h, crop_w
	elif self.crop_type == 'relative':
	crop_w, crop_h = self.crop_size
	return int(h * crop_h + 0.5), int(w * crop_w + 0.5)
	else:
	# 'relative_range'
	crop_size = np.asarray(self.crop_size, dtype=np.float32)
	crop_h, crop_w = crop_size + np.random.rand(2) * (1 - crop_size)
	return int(h * crop_h + 0.5), int(w * crop_w + 0.5)

	@autocast_box_type()
	def transform(self, results: dict) -> Union[dict, None]:
	"""Transform function to randomly crop images, bounding boxes, masks,
	semantic segmentation maps.

	Args:
	results (dict): Result dict from loading pipeline.

	Returns:
	results (Union[dict, None]): Randomly cropped results, 'img_shape'
	key in result dict is updated according to crop size. None will
	be returned when there is no valid bbox after cropping.
	"""
	image_size = results['img'].shape[:2]
	crop_size = self._get_crop_size(image_size)
	results = self._crop_data(results, crop_size, self.allow_negative_crop)
	return results

	def __repr__(self) -> str:
	repr_str = self.__class__.__name__
	repr_str += f'(crop_size={self.crop_size}, '
	repr_str += f'crop_type={self.crop_type}, '
	repr_str += f'allow_negative_crop={self.allow_negative_crop}, '
	repr_str += f'recompute_bbox={self.recompute_bbox}, '
	repr_str += f'bbox_clip_border={self.bbox_clip_border})'
	return repr_str


	@TRANSFORMS.register_module()
	class SegRescale(BaseTransform):
	"""Rescale semantic segmentation maps.

	This transform rescale the ``gt_seg_map`` according to ``scale_factor``.

	Required Keys:

	- gt_seg_map

	Modified Keys:

	- gt_seg_map

	Args:
	scale_factor (float): The scale factor of the final output. Defaults
	to 1.
	backend (str): Image rescale backend, choices are 'cv2' and 'pillow'.
	These two backends generates slightly different results. Defaults
	to 'cv2'.
	"""

	def __init__(self, scale_factor: float = 1, backend: str = 'cv2') -> None:
	self.scale_factor = scale_factor
	self.backend = backend

	def transform(self, results: dict) -> dict:
	"""Transform function to scale the semantic segmentation map.

	Args:
	results (dict): Result dict from loading pipeline.

	Returns:
	dict: Result dict with semantic segmentation map scaled.
	"""
	if self.scale_factor != 1:
	results['gt_seg_map'] = mmcv.imrescale(
	results['gt_seg_map'],
	self.scale_factor,
	interpolation='nearest',
	backend=self.backend)

	return results

	def __repr__(self) -> str:
	repr_str = self.__class__.__name__
	repr_str += f'(scale_factor={self.scale_factor}, '
	repr_str += f'backend={self.backend})'
	return repr_str


	@TRANSFORMS.register_module()
	class PhotoMetricDistortion(BaseTransform):
	"""Apply photometric distortion to image sequentially, every transformation
	is applied with a probability of 0.5. The position of random contrast is in
	second or second to last.

	1. random brightness
	2. random contrast (mode 0)
	3. convert color from BGR to HSV
	4. random saturation
	5. random hue
	6. convert color from HSV to BGR
	7. random contrast (mode 1)
	8. randomly swap channels

	Required Keys:

	- img (np.uint8)

	Modified Keys:

	- img (np.float32)

	Args:
	brightness_delta (int): delta of brightness.
	contrast_range (sequence): range of contrast.
	saturation_range (sequence): range of saturation.
	hue_delta (int): delta of hue.
	"""

	def __init__(self,
	brightness_delta: int = 32,
	contrast_range: Sequence[Number] = (0.5, 1.5),
	saturation_range: Sequence[Number] = (0.5, 1.5),
	hue_delta: int = 18) -> None:
	self.brightness_delta = brightness_delta
	self.contrast_lower, self.contrast_upper = contrast_range
	self.saturation_lower, self.saturation_upper = saturation_range
	self.hue_delta = hue_delta

	@cache_randomness
	def _random_flags(self) -> Sequence[Number]:
	mode = random.randint(2)
	brightness_flag = random.randint(2)
	contrast_flag = random.randint(2)
	saturation_flag = random.randint(2)
	hue_flag = random.randint(2)
	swap_flag = random.randint(2)
	delta_value = random.uniform(-self.brightness_delta,
	self.brightness_delta)
	alpha_value = random.uniform(self.contrast_lower, self.contrast_upper)
	saturation_value = random.uniform(self.saturation_lower,
	self.saturation_upper)
	hue_value = random.uniform(-self.hue_delta, self.hue_delta)
	swap_value = random.permutation(3)

	return (mode, brightness_flag, contrast_flag, saturation_flag,
	hue_flag, swap_flag, delta_value, alpha_value,
	saturation_value, hue_value, swap_value)

	def transform(self, results: dict) -> dict:
	"""Transform function to perform photometric distortion on images.

	Args:
	results (dict): Result dict from loading pipeline.

	Returns:
	dict: Result dict with images distorted.
	"""
	assert 'img' in results, '`img` is not found in results'
	img = results['img']
	img = img.astype(np.float32)

	(mode, brightness_flag, contrast_flag, saturation_flag, hue_flag,
	swap_flag, delta_value, alpha_value, saturation_value, hue_value,
	swap_value) = self._random_flags()

	# random brightness
	if brightness_flag:
	img += delta_value

	# mode == 0 --> do random contrast first
	# mode == 1 --> do random contrast last
	if mode == 1:
	if contrast_flag:
	img *= alpha_value

	# convert color from BGR to HSV
	img = mmcv.bgr2hsv(img)

	# random saturation
	if saturation_flag:
	img[..., 1] *= saturation_value
	# For image(type=float32), after convert bgr to hsv by opencv,
	# valid saturation value range is [0, 1]
	if saturation_value > 1:
	img[..., 1] = img[..., 1].clip(0, 1)

	# random hue
	if hue_flag:
	img[..., 0] += hue_value
	img[..., 0][img[..., 0] > 360] -= 360
	img[..., 0][img[..., 0] < 0] += 360

	# convert color from HSV to BGR
	img = mmcv.hsv2bgr(img)

	# random contrast
	if mode == 0:
	if contrast_flag:
	img *= alpha_value

	# randomly swap channels
	if swap_flag:
	img = img[..., swap_value]

	results['img'] = img
	return results

	def __repr__(self) -> str:
	repr_str = self.__class__.__name__
	repr_str += f'(brightness_delta={self.brightness_delta}, '
	repr_str += 'contrast_range='
	repr_str += f'{(self.contrast_lower, self.contrast_upper)}, '
	repr_str += 'saturation_range='
	repr_str += f'{(self.saturation_lower, self.saturation_upper)}, '
	repr_str += f'hue_delta={self.hue_delta})'
	return repr_str


	@TRANSFORMS.register_module()
	class Expand(BaseTransform):
	"""Random expand the image & bboxes & masks & segmentation map.

	Randomly place the original image on a canvas of ``ratio`` x original image
	size filled with mean values. The ratio is in the range of ratio_range.

	Required Keys:

	- img
	- img_shape
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_masks (BitmapMasks \| PolygonMasks) (optional)
	- gt_seg_map (np.uint8) (optional)

	Modified Keys:

	- img
	- img_shape
	- gt_bboxes
	- gt_masks
	- gt_seg_map


	Args:
	mean (sequence): mean value of dataset.
	to_rgb (bool): if need to convert the order of mean to align with RGB.
	ratio_range (sequence)): range of expand ratio.
	seg_ignore_label (int): label of ignore segmentation map.
	prob (float): probability of applying this transformation
	"""

	def __init__(self,
	mean: Sequence[Number] = (0, 0, 0),
	to_rgb: bool = True,
	ratio_range: Sequence[Number] = (1, 4),
	seg_ignore_label: int = None,
	prob: float = 0.5) -> None:
	self.to_rgb = to_rgb
	self.ratio_range = ratio_range
	if to_rgb:
	self.mean = mean[::-1]
	else:
	self.mean = mean
	self.min_ratio, self.max_ratio = ratio_range
	self.seg_ignore_label = seg_ignore_label
	self.prob = prob

	@cache_randomness
	def _random_prob(self) -> float:
	return random.uniform(0, 1)

	@cache_randomness
	def _random_ratio(self) -> float:
	return random.uniform(self.min_ratio, self.max_ratio)

	@cache_randomness
	def _random_left_top(self, ratio: float, h: int,
	w: int) -> Tuple[int, int]:
	left = int(random.uniform(0, w * ratio - w))
	top = int(random.uniform(0, h * ratio - h))
	return left, top

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""Transform function to expand images, bounding boxes, masks,
	segmentation map.

	Args:
	results (dict): Result dict from loading pipeline.

	Returns:
	dict: Result dict with images, bounding boxes, masks, segmentation
	map expanded.
	"""
	if self._random_prob() > self.prob:
	return results
	assert 'img' in results, '`img` is not found in results'
	img = results['img']
	h, w, c = img.shape
	ratio = self._random_ratio()
	# speedup expand when meets large image
	if np.all(self.mean == self.mean[0]):
	expand_img = np.empty((int(h * ratio), int(w * ratio), c),
	img.dtype)
	expand_img.fill(self.mean[0])
	else:
	expand_img = np.full((int(h * ratio), int(w * ratio), c),
	self.mean,
	dtype=img.dtype)
	left, top = self._random_left_top(ratio, h, w)
	expand_img[top:top + h, left:left + w] = img
	results['img'] = expand_img
	results['img_shape'] = expand_img.shape[:2]

	# expand bboxes
	if results.get('gt_bboxes', None) is not None:
	results['gt_bboxes'].translate_([left, top])

	# expand masks
	if results.get('gt_masks', None) is not None:
	results['gt_masks'] = results['gt_masks'].expand(
	int(h * ratio), int(w * ratio), top, left)

	# expand segmentation map
	if results.get('gt_seg_map', None) is not None:
	gt_seg = results['gt_seg_map']
	expand_gt_seg = np.full((int(h * ratio), int(w * ratio)),
	self.seg_ignore_label,
	dtype=gt_seg.dtype)
	expand_gt_seg[top:top + h, left:left + w] = gt_seg
	results['gt_seg_map'] = expand_gt_seg

	return results

	def __repr__(self) -> str:
	repr_str = self.__class__.__name__
	repr_str += f'(mean={self.mean}, to_rgb={self.to_rgb}, '
	repr_str += f'ratio_range={self.ratio_range}, '
	repr_str += f'seg_ignore_label={self.seg_ignore_label}, '
	repr_str += f'prob={self.prob})'
	return repr_str


	@TRANSFORMS.register_module()
	class MinIoURandomCrop(BaseTransform):
	"""Random crop the image & bboxes & masks & segmentation map, the cropped
	patches have minimum IoU requirement with original image & bboxes & masks.

	& segmentation map, the IoU threshold is randomly selected from min_ious.


	Required Keys:

	- img
	- img_shape
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_masks (BitmapMasks \| PolygonMasks) (optional)
	- gt_ignore_flags (bool) (optional)
	- gt_seg_map (np.uint8) (optional)

	Modified Keys:

	- img
	- img_shape
	- gt_bboxes
	- gt_bboxes_labels
	- gt_masks
	- gt_ignore_flags
	- gt_seg_map


	Args:
	min_ious (Sequence[float]): minimum IoU threshold for all intersections
	with bounding boxes.
	min_crop_size (float): minimum crop's size (i.e. h,w := ah, aw,
	where a >= min_crop_size).
	bbox_clip_border (bool, optional): Whether clip the objects outside
	the border of the image. Defaults to True.
	"""

	def __init__(self,
	min_ious: Sequence[float] = (0.1, 0.3, 0.5, 0.7, 0.9),
	min_crop_size: float = 0.3,
	bbox_clip_border: bool = True) -> None:

	self.min_ious = min_ious
	self.sample_mode = (1, *min_ious, 0)
	self.min_crop_size = min_crop_size
	self.bbox_clip_border = bbox_clip_border

	@cache_randomness
	def _random_mode(self) -> Number:
	return random.choice(self.sample_mode)

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""Transform function to crop images and bounding boxes with minimum
	IoU constraint.

	Args:
	results (dict): Result dict from loading pipeline.

	Returns:
	dict: Result dict with images and bounding boxes cropped, \
	'img_shape' key is updated.
	"""
	assert 'img' in results, '`img` is not found in results'
	assert 'gt_bboxes' in results, '`gt_bboxes` is not found in results'
	img = results['img']
	boxes = results['gt_bboxes']
	h, w, c = img.shape
	while True:
	mode = self._random_mode()
	self.mode = mode
	if mode == 1:
	return results

	min_iou = self.mode
	for i in range(50):
	new_w = random.uniform(self.min_crop_size * w, w)
	new_h = random.uniform(self.min_crop_size * h, h)

	# h / w in [0.5, 2]
	if new_h / new_w < 0.5 or new_h / new_w > 2:
	continue

	left = random.uniform(w - new_w)
	top = random.uniform(h - new_h)

	patch = np.array(
	(int(left), int(top), int(left + new_w), int(top + new_h)))
	# Line or point crop is not allowed
	if patch[2] == patch[0] or patch[3] == patch[1]:
	continue
	overlaps = boxes.overlaps(
	HorizontalBoxes(patch.reshape(-1, 4).astype(np.float32)),
	boxes).numpy().reshape(-1)
	if len(overlaps) > 0 and overlaps.min() < min_iou:
	continue

	# center of boxes should inside the crop img
	# only adjust boxes and instance masks when the gt is not empty
	if len(overlaps) > 0:
	# adjust boxes
	def is_center_of_bboxes_in_patch(boxes, patch):
	centers = boxes.centers.numpy()
	mask = ((centers[:, 0] > patch[0]) *
	(centers[:, 1] > patch[1]) *
	(centers[:, 0] < patch[2]) *
	(centers[:, 1] < patch[3]))
	return mask

	mask = is_center_of_bboxes_in_patch(boxes, patch)
	if not mask.any():
	continue
	if results.get('gt_bboxes', None) is not None:
	boxes = results['gt_bboxes']
	mask = is_center_of_bboxes_in_patch(boxes, patch)
	boxes = boxes[mask]
	boxes.translate_([-patch[0], -patch[1]])
	if self.bbox_clip_border:
	boxes.clip_(
	[patch[3] - patch[1], patch[2] - patch[0]])
	results['gt_bboxes'] = boxes

	# ignore_flags
	if results.get('gt_ignore_flags', None) is not None:
	results['gt_ignore_flags'] = \
	results['gt_ignore_flags'][mask]

	# labels
	if results.get('gt_bboxes_labels', None) is not None:
	results['gt_bboxes_labels'] = results[
	'gt_bboxes_labels'][mask]

	# mask fields
	if results.get('gt_masks', None) is not None:
	results['gt_masks'] = results['gt_masks'][
	mask.nonzero()[0]].crop(patch)
	# adjust the img no matter whether the gt is empty before crop
	img = img[patch[1]:patch[3], patch[0]:patch[2]]
	results['img'] = img
	results['img_shape'] = img.shape[:2]

	# seg fields
	if results.get('gt_seg_map', None) is not None:
	results['gt_seg_map'] = results['gt_seg_map'][
	patch[1]:patch[3], patch[0]:patch[2]]
	return results

	def __repr__(self) -> str:
	repr_str = self.__class__.__name__
	repr_str += f'(min_ious={self.min_ious}, '
	repr_str += f'min_crop_size={self.min_crop_size}, '
	repr_str += f'bbox_clip_border={self.bbox_clip_border})'
	return repr_str


	@TRANSFORMS.register_module()
	class Corrupt(BaseTransform):
	"""Corruption augmentation.

	Corruption transforms implemented based on
	`imagecorruptions <https://github.com/bethgelab/imagecorruptions>`_.

	Required Keys:

	- img (np.uint8)


	Modified Keys:

	- img (np.uint8)


	Args:
	corruption (str): Corruption name.
	severity (int): The severity of corruption. Defaults to 1.
	"""

	def __init__(self, corruption: str, severity: int = 1) -> None:
	self.corruption = corruption
	self.severity = severity

	def transform(self, results: dict) -> dict:
	"""Call function to corrupt image.

	Args:
	results (dict): Result dict from loading pipeline.

	Returns:
	dict: Result dict with images corrupted.
	"""

	if corrupt is None:
	raise RuntimeError('imagecorruptions is not installed')
	results['img'] = corrupt(
	results['img'].astype(np.uint8),
	corruption_name=self.corruption,
	severity=self.severity)
	return results

	def __repr__(self) -> str:
	repr_str = self.__class__.__name__
	repr_str += f'(corruption={self.corruption}, '
	repr_str += f'severity={self.severity})'
	return repr_str


	@TRANSFORMS.register_module()
	@avoid_cache_randomness
	class Albu(BaseTransform):
	"""Albumentation augmentation.

	Adds custom transformations from Albumentations library.
	Please, visit `https://albumentations.readthedocs.io`
	to get more information.

	Required Keys:

	- img (np.uint8)
	- gt_bboxes (HorizontalBoxes[torch.float32]) (optional)
	- gt_masks (BitmapMasks \| PolygonMasks) (optional)

	Modified Keys:

	- img (np.uint8)
	- gt_bboxes (HorizontalBoxes[torch.float32]) (optional)
	- gt_masks (BitmapMasks \| PolygonMasks) (optional)
	- img_shape (tuple)

	An example of ``transforms`` is as followed:

	.. code-block::

	[
	dict(
	type='ShiftScaleRotate',
	shift_limit=0.0625,
	scale_limit=0.0,
	rotate_limit=0,
	interpolation=1,
	p=0.5),
	dict(
	type='RandomBrightnessContrast',
	brightness_limit=[0.1, 0.3],
	contrast_limit=[0.1, 0.3],
	p=0.2),
	dict(type='ChannelShuffle', p=0.1),
	dict(
	type='OneOf',
	transforms=[
	dict(type='Blur', blur_limit=3, p=1.0),
	dict(type='MedianBlur', blur_limit=3, p=1.0)
	],
	p=0.1),
	]

	Args:
	transforms (list[dict]): A list of albu transformations
	bbox_params (dict, optional): Bbox_params for albumentation `Compose`
	keymap (dict, optional): Contains
	{'input key':'albumentation-style key'}
	skip_img_without_anno (bool): Whether to skip the image if no ann left
	after aug. Defaults to False.
	"""

	def __init__(self,
	transforms: List[dict],
	bbox_params: Optional[dict] = None,
	keymap: Optional[dict] = None,
	skip_img_without_anno: bool = False) -> None:
	if Compose is None:
	raise RuntimeError('albumentations is not installed')

	# Args will be modified later, copying it will be safer
	transforms = copy.deepcopy(transforms)
	if bbox_params is not None:
	bbox_params = copy.deepcopy(bbox_params)
	if keymap is not None:
	keymap = copy.deepcopy(keymap)
	self.transforms = transforms
	self.filter_lost_elements = False
	self.skip_img_without_anno = skip_img_without_anno

	# A simple workaround to remove masks without boxes
	if (isinstance(bbox_params, dict) and 'label_fields' in bbox_params
	and 'filter_lost_elements' in bbox_params):
	self.filter_lost_elements = True
	self.origin_label_fields = bbox_params['label_fields']
	bbox_params['label_fields'] = ['idx_mapper']
	del bbox_params['filter_lost_elements']

	self.bbox_params = (
	self.albu_builder(bbox_params) if bbox_params else None)
	self.aug = Compose([self.albu_builder(t) for t in self.transforms],
	bbox_params=self.bbox_params)

	if not keymap:
	self.keymap_to_albu = {
	'img': 'image',
	'gt_masks': 'masks',
	'gt_bboxes': 'bboxes'
	}
	else:
	self.keymap_to_albu = keymap
	self.keymap_back = {v: k for k, v in self.keymap_to_albu.items()}

	def albu_builder(self, cfg: dict) -> albumentations:
	"""Import a module from albumentations.

	It inherits some of :func:`build_from_cfg` logic.

	Args:
	cfg (dict): Config dict. It should at least contain the key "type".

	Returns:
	obj: The constructed object.
	"""

	assert isinstance(cfg, dict) and 'type' in cfg
	args = cfg.copy()
	obj_type = args.pop('type')
	if is_str(obj_type):
	if albumentations is None:
	raise RuntimeError('albumentations is not installed')
	obj_cls = getattr(albumentations, obj_type)
	elif inspect.isclass(obj_type):
	obj_cls = obj_type
	else:
	raise TypeError(
	f'type must be a str or valid type, but got {type(obj_type)}')

	if 'transforms' in args:
	args['transforms'] = [
	self.albu_builder(transform)
	for transform in args['transforms']
	]

	return obj_cls(**args)

	@staticmethod
	def mapper(d: dict, keymap: dict) -> dict:
	"""Dictionary mapper. Renames keys according to keymap provided.

	Args:
	d (dict): old dict
	keymap (dict): {'old_key':'new_key'}
	Returns:
	dict: new dict.
	"""
	updated_dict = {}
	for k, v in zip(d.keys(), d.values()):
	new_k = keymap.get(k, k)
	updated_dict[new_k] = d[k]
	return updated_dict

	@autocast_box_type()
	def transform(self, results: dict) -> Union[dict, None]:
	"""Transform function of Albu."""
	# TODO: gt_seg_map is not currently supported
	# dict to albumentations format
	results = self.mapper(results, self.keymap_to_albu)
	results, ori_masks = self._preprocess_results(results)
	results = self.aug(**results)
	results = self._postprocess_results(results, ori_masks)
	if results is None:
	return None
	# back to the original format
	results = self.mapper(results, self.keymap_back)
	results['img_shape'] = results['img'].shape
	return results

	def _preprocess_results(self, results: dict) -> tuple:
	"""Pre-processing results to facilitate the use of Albu."""
	if 'bboxes' in results:
	# to list of boxes
	if not isinstance(results['bboxes'], HorizontalBoxes):
	raise NotImplementedError(
	'Albu only supports horizontal boxes now')
	bboxes = results['bboxes'].numpy()
	results['bboxes'] = [x for x in bboxes]
	# add pseudo-field for filtration
	if self.filter_lost_elements:
	results['idx_mapper'] = np.arange(len(results['bboxes']))

	# TODO: Support mask structure in albu
	ori_masks = None
	if 'masks' in results:
	if isinstance(results['masks'], PolygonMasks):
	raise NotImplementedError(
	'Albu only supports BitMap masks now')
	ori_masks = results['masks']
	if albumentations.__version__ < '0.5':
	results['masks'] = results['masks'].masks
	else:
	results['masks'] = [mask for mask in results['masks'].masks]

	return results, ori_masks

	def _postprocess_results(
	self,
	results: dict,
	ori_masks: Optional[Union[BitmapMasks,
	PolygonMasks]] = None) -> dict:
	"""Post-processing Albu output."""
	# albumentations may return np.array or list on different versions
	if 'gt_bboxes_labels' in results and isinstance(
	results['gt_bboxes_labels'], list):
	results['gt_bboxes_labels'] = np.array(
	results['gt_bboxes_labels'], dtype=np.int64)
	if 'gt_ignore_flags' in results and isinstance(
	results['gt_ignore_flags'], list):
	results['gt_ignore_flags'] = np.array(
	results['gt_ignore_flags'], dtype=bool)

	if 'bboxes' in results:
	if isinstance(results['bboxes'], list):
	results['bboxes'] = np.array(
	results['bboxes'], dtype=np.float32)
	results['bboxes'] = results['bboxes'].reshape(-1, 4)
	results['bboxes'] = HorizontalBoxes(results['bboxes'])

	# filter label_fields
	if self.filter_lost_elements:

	for label in self.origin_label_fields:
	results[label] = np.array(
	[results[label][i] for i in results['idx_mapper']])
	if 'masks' in results:
	assert ori_masks is not None
	results['masks'] = np.array(
	[results['masks'][i] for i in results['idx_mapper']])
	results['masks'] = ori_masks.__class__(
	results['masks'], results['image'].shape[0],
	results['image'].shape[1])

	if (not len(results['idx_mapper'])
	and self.skip_img_without_anno):
	return None
	elif 'masks' in results:
	results['masks'] = ori_masks.__class__(
	results['masks'], results['image'].shape[0],
	results['image'].shape[1])

	return results

	def __repr__(self) -> str:
	repr_str = self.__class__.__name__ + f'(transforms={self.transforms})'
	return repr_str


	@TRANSFORMS.register_module()
	@avoid_cache_randomness
	class RandomCenterCropPad(BaseTransform):
	"""Random center crop and random around padding for CornerNet.

	This operation generates randomly cropped image from the original image and
	pads it simultaneously. Different from :class:`RandomCrop`, the output
	shape may not equal to ``crop_size`` strictly. We choose a random value
	from ``ratios`` and the output shape could be larger or smaller than
	``crop_size``. The padding operation is also different from :class:`Pad`,
	here we use around padding instead of right-bottom padding.

	The relation between output image (padding image) and original image:

	.. code:: text

	output image

	+----------------------------+
	\| padded area \|
	+------\|----------------------------\|----------+
	\| \| cropped area \| \|
	\| \| +---------------+ \| \|
	\| \| \| . center \| \| \| original image
	\| \| \| range \| \| \|
	\| \| +---------------+ \| \|
	+------\|----------------------------\|----------+
	\| padded area \|
	+----------------------------+

	There are 5 main areas in the figure:

	- output image: output image of this operation, also called padding
	image in following instruction.
	- original image: input image of this operation.
	- padded area: non-intersect area of output image and original image.
	- cropped area: the overlap of output image and original image.
	- center range: a smaller area where random center chosen from.
	center range is computed by ``border`` and original image's shape
	to avoid our random center is too close to original image's border.

	Also this operation act differently in train and test mode, the summary
	pipeline is listed below.

	Train pipeline:

	1. Choose a ``random_ratio`` from ``ratios``, the shape of padding image
	will be ``random_ratio * crop_size``.
	2. Choose a ``random_center`` in center range.
	3. Generate padding image with center matches the ``random_center``.
	4. Initialize the padding image with pixel value equals to ``mean``.
	5. Copy the cropped area to padding image.
	6. Refine annotations.

	Test pipeline:

	1. Compute output shape according to ``test_pad_mode``.
	2. Generate padding image with center matches the original image
	center.
	3. Initialize the padding image with pixel value equals to ``mean``.
	4. Copy the ``cropped area`` to padding image.

	Required Keys:

	- img (np.float32)
	- img_shape (tuple)
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_ignore_flags (bool) (optional)

	Modified Keys:

	- img (np.float32)
	- img_shape (tuple)
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_ignore_flags (bool) (optional)

	Args:
	crop_size (tuple, optional): expected size after crop, final size will
	computed according to ratio. Requires (width, height)
	in train mode, and None in test mode.
	ratios (tuple, optional): random select a ratio from tuple and crop
	image to (crop_size[0] * ratio) * (crop_size[1] * ratio).
	Only available in train mode. Defaults to (0.9, 1.0, 1.1).
	border (int, optional): max distance from center select area to image
	border. Only available in train mode. Defaults to 128.
	mean (sequence, optional): Mean values of 3 channels.
	std (sequence, optional): Std values of 3 channels.
	to_rgb (bool, optional): Whether to convert the image from BGR to RGB.
	test_mode (bool): whether involve random variables in transform.
	In train mode, crop_size is fixed, center coords and ratio is
	random selected from predefined lists. In test mode, crop_size
	is image's original shape, center coords and ratio is fixed.
	Defaults to False.
	test_pad_mode (tuple, optional): padding method and padding shape
	value, only available in test mode. Default is using
	'logical_or' with 127 as padding shape value.

	- 'logical_or': final_shape = input_shape \| padding_shape_value
	- 'size_divisor': final_shape = int(
	ceil(input_shape / padding_shape_value) * padding_shape_value)

	Defaults to ('logical_or', 127).
	test_pad_add_pix (int): Extra padding pixel in test mode.
	Defaults to 0.
	bbox_clip_border (bool): Whether clip the objects outside
	the border of the image. Defaults to True.
	"""

	def __init__(self,
	crop_size: Optional[tuple] = None,
	ratios: Optional[tuple] = (0.9, 1.0, 1.1),
	border: Optional[int] = 128,
	mean: Optional[Sequence] = None,
	std: Optional[Sequence] = None,
	to_rgb: Optional[bool] = None,
	test_mode: bool = False,
	test_pad_mode: Optional[tuple] = ('logical_or', 127),
	test_pad_add_pix: int = 0,
	bbox_clip_border: bool = True) -> None:
	if test_mode:
	assert crop_size is None, 'crop_size must be None in test mode'
	assert ratios is None, 'ratios must be None in test mode'
	assert border is None, 'border must be None in test mode'
	assert isinstance(test_pad_mode, (list, tuple))
	assert test_pad_mode[0] in ['logical_or', 'size_divisor']
	else:
	assert isinstance(crop_size, (list, tuple))
	assert crop_size[0] > 0 and crop_size[1] > 0, (
	'crop_size must > 0 in train mode')
	assert isinstance(ratios, (list, tuple))
	assert test_pad_mode is None, (
	'test_pad_mode must be None in train mode')

	self.crop_size = crop_size
	self.ratios = ratios
	self.border = border
	# We do not set default value to mean, std and to_rgb because these
	# hyper-parameters are easy to forget but could affect the performance.
	# Please use the same setting as Normalize for performance assurance.
	assert mean is not None and std is not None and to_rgb is not None
	self.to_rgb = to_rgb
	self.input_mean = mean
	self.input_std = std
	if to_rgb:
	self.mean = mean[::-1]
	self.std = std[::-1]
	else:
	self.mean = mean
	self.std = std
	self.test_mode = test_mode
	self.test_pad_mode = test_pad_mode
	self.test_pad_add_pix = test_pad_add_pix
	self.bbox_clip_border = bbox_clip_border

	def _get_border(self, border, size):
	"""Get final border for the target size.

	This function generates a ``final_border`` according to image's shape.
	The area between ``final_border`` and ``size - final_border`` is the
	``center range``. We randomly choose center from the ``center range``
	to avoid our random center is too close to original image's border.
	Also ``center range`` should be larger than 0.

	Args:
	border (int): The initial border, default is 128.
	size (int): The width or height of original image.
	Returns:
	int: The final border.
	"""
	k = 2 * border / size
	i = pow(2, np.ceil(np.log2(np.ceil(k))) + (k == int(k)))
	return border // i

	def _filter_boxes(self, patch, boxes):
	"""Check whether the center of each box is in the patch.

	Args:
	patch (list[int]): The cropped area, [left, top, right, bottom].
	boxes (numpy array, (N x 4)): Ground truth boxes.

	Returns:
	mask (numpy array, (N,)): Each box is inside or outside the patch.
	"""
	center = boxes.centers.numpy()
	mask = (center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (
	center[:, 0] < patch[2]) * (
	center[:, 1] < patch[3])
	return mask

	def _crop_image_and_paste(self, image, center, size):
	"""Crop image with a given center and size, then paste the cropped
	image to a blank image with two centers align.

	This function is equivalent to generating a blank image with ``size``
	as its shape. Then cover it on the original image with two centers (
	the center of blank image and the random center of original image)
	aligned. The overlap area is paste from the original image and the
	outside area is filled with ``mean pixel``.

	Args:
	image (np array, H x W x C): Original image.
	center (list[int]): Target crop center coord.
	size (list[int]): Target crop size. [target_h, target_w]

	Returns:
	cropped_img (np array, target_h x target_w x C): Cropped image.
	border (np array, 4): The distance of four border of
	``cropped_img`` to the original image area, [top, bottom,
	left, right]
	patch (list[int]): The cropped area, [left, top, right, bottom].
	"""
	center_y, center_x = center
	target_h, target_w = size
	img_h, img_w, img_c = image.shape

	x0 = max(0, center_x - target_w // 2)
	x1 = min(center_x + target_w // 2, img_w)
	y0 = max(0, center_y - target_h // 2)
	y1 = min(center_y + target_h // 2, img_h)
	patch = np.array((int(x0), int(y0), int(x1), int(y1)))

	left, right = center_x - x0, x1 - center_x
	top, bottom = center_y - y0, y1 - center_y

	cropped_center_y, cropped_center_x = target_h // 2, target_w // 2
	cropped_img = np.zeros((target_h, target_w, img_c), dtype=image.dtype)
	for i in range(img_c):
	cropped_img[:, :, i] += self.mean[i]
	y_slice = slice(cropped_center_y - top, cropped_center_y + bottom)
	x_slice = slice(cropped_center_x - left, cropped_center_x + right)
	cropped_img[y_slice, x_slice, :] = image[y0:y1, x0:x1, :]

	border = np.array([
	cropped_center_y - top, cropped_center_y + bottom,
	cropped_center_x - left, cropped_center_x + right
	],
	dtype=np.float32)

	return cropped_img, border, patch

	def _train_aug(self, results):
	"""Random crop and around padding the original image.

	Args:
	results (dict): Image infomations in the augment pipeline.

	Returns:
	results (dict): The updated dict.
	"""
	img = results['img']
	h, w, c = img.shape
	gt_bboxes = results['gt_bboxes']
	while True:
	scale = random.choice(self.ratios)
	new_h = int(self.crop_size[1] * scale)
	new_w = int(self.crop_size[0] * scale)
	h_border = self._get_border(self.border, h)
	w_border = self._get_border(self.border, w)

	for i in range(50):
	center_x = random.randint(low=w_border, high=w - w_border)
	center_y = random.randint(low=h_border, high=h - h_border)

	cropped_img, border, patch = self._crop_image_and_paste(
	img, [center_y, center_x], [new_h, new_w])

	if len(gt_bboxes) == 0:
	results['img'] = cropped_img
	results['img_shape'] = cropped_img.shape
	return results

	# if image do not have valid bbox, any crop patch is valid.
	mask = self._filter_boxes(patch, gt_bboxes)
	if not mask.any():
	continue

	results['img'] = cropped_img
	results['img_shape'] = cropped_img.shape

	x0, y0, x1, y1 = patch

	left_w, top_h = center_x - x0, center_y - y0
	cropped_center_x, cropped_center_y = new_w // 2, new_h // 2

	# crop bboxes accordingly and clip to the image boundary
	gt_bboxes = gt_bboxes[mask]
	gt_bboxes.translate_([
	cropped_center_x - left_w - x0,
	cropped_center_y - top_h - y0
	])
	if self.bbox_clip_border:
	gt_bboxes.clip_([new_h, new_w])
	keep = gt_bboxes.is_inside([new_h, new_w]).numpy()
	gt_bboxes = gt_bboxes[keep]

	results['gt_bboxes'] = gt_bboxes

	# ignore_flags
	if results.get('gt_ignore_flags', None) is not None:
	gt_ignore_flags = results['gt_ignore_flags'][mask]
	results['gt_ignore_flags'] = \
	gt_ignore_flags[keep]

	# labels
	if results.get('gt_bboxes_labels', None) is not None:
	gt_labels = results['gt_bboxes_labels'][mask]
	results['gt_bboxes_labels'] = gt_labels[keep]

	if 'gt_masks' in results or 'gt_seg_map' in results:
	raise NotImplementedError(
	'RandomCenterCropPad only supports bbox.')

	return results

	def _test_aug(self, results):
	"""Around padding the original image without cropping.

	The padding mode and value are from ``test_pad_mode``.

	Args:
	results (dict): Image infomations in the augment pipeline.

	Returns:
	results (dict): The updated dict.
	"""
	img = results['img']
	h, w, c = img.shape
	if self.test_pad_mode[0] in ['logical_or']:
	# self.test_pad_add_pix is only used for centernet
	target_h = (h \| self.test_pad_mode[1]) + self.test_pad_add_pix
	target_w = (w \| self.test_pad_mode[1]) + self.test_pad_add_pix
	elif self.test_pad_mode[0] in ['size_divisor']:
	divisor = self.test_pad_mode[1]
	target_h = int(np.ceil(h / divisor)) * divisor
	target_w = int(np.ceil(w / divisor)) * divisor
	else:
	raise NotImplementedError(
	'RandomCenterCropPad only support two testing pad mode:'
	'logical-or and size_divisor.')

	cropped_img, border, _ = self._crop_image_and_paste(
	img, [h // 2, w // 2], [target_h, target_w])
	results['img'] = cropped_img
	results['img_shape'] = cropped_img.shape
	results['border'] = border
	return results

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	img = results['img']
	assert img.dtype == np.float32, (
	'RandomCenterCropPad needs the input image of dtype np.float32,'
	' please set "to_float32=True" in "LoadImageFromFile" pipeline')
	h, w, c = img.shape
	assert c == len(self.mean)
	if self.test_mode:
	return self._test_aug(results)
	else:
	return self._train_aug(results)

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(crop_size={self.crop_size}, '
	repr_str += f'ratios={self.ratios}, '
	repr_str += f'border={self.border}, '
	repr_str += f'mean={self.input_mean}, '
	repr_str += f'std={self.input_std}, '
	repr_str += f'to_rgb={self.to_rgb}, '
	repr_str += f'test_mode={self.test_mode}, '
	repr_str += f'test_pad_mode={self.test_pad_mode}, '
	repr_str += f'bbox_clip_border={self.bbox_clip_border})'
	return repr_str


	@TRANSFORMS.register_module()
	class CutOut(BaseTransform):
	"""CutOut operation.

	Randomly drop some regions of image used in
	`Cutout <https://arxiv.org/abs/1708.04552>`_.

	Required Keys:

	- img

	Modified Keys:

	- img

	Args:
	n_holes (int or tuple[int, int]): Number of regions to be dropped.
	If it is given as a list, number of holes will be randomly
	selected from the closed interval [``n_holes[0]``, ``n_holes[1]``].
	cutout_shape (tuple[int, int] or list[tuple[int, int]], optional):
	The candidate shape of dropped regions. It can be
	``tuple[int, int]`` to use a fixed cutout shape, or
	``list[tuple[int, int]]`` to randomly choose shape
	from the list. Defaults to None.
	cutout_ratio (tuple[float, float] or list[tuple[float, float]],
	optional): The candidate ratio of dropped regions. It can be
	``tuple[float, float]`` to use a fixed ratio or
	``list[tuple[float, float]]`` to randomly choose ratio
	from the list. Please note that ``cutout_shape`` and
	``cutout_ratio`` cannot be both given at the same time.
	Defaults to None.
	fill_in (tuple[float, float, float] or tuple[int, int, int]): The value
	of pixel to fill in the dropped regions. Defaults to (0, 0, 0).
	"""

	def __init__(
	self,
	n_holes: Union[int, Tuple[int, int]],
	cutout_shape: Optional[Union[Tuple[int, int],
	List[Tuple[int, int]]]] = None,
	cutout_ratio: Optional[Union[Tuple[float, float],
	List[Tuple[float, float]]]] = None,
	fill_in: Union[Tuple[float, float, float], Tuple[int, int,
	int]] = (0, 0, 0)
	) -> None:

	assert (cutout_shape is None) ^ (cutout_ratio is None), \
	'Either cutout_shape or cutout_ratio should be specified.'
	assert (isinstance(cutout_shape, (list, tuple))
	or isinstance(cutout_ratio, (list, tuple)))
	if isinstance(n_holes, tuple):
	assert len(n_holes) == 2 and 0 <= n_holes[0] < n_holes[1]
	else:
	n_holes = (n_holes, n_holes)
	self.n_holes = n_holes
	self.fill_in = fill_in
	self.with_ratio = cutout_ratio is not None
	self.candidates = cutout_ratio if self.with_ratio else cutout_shape
	if not isinstance(self.candidates, list):
	self.candidates = [self.candidates]

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""Call function to drop some regions of image."""
	h, w, c = results['img'].shape
	n_holes = np.random.randint(self.n_holes[0], self.n_holes[1] + 1)
	for _ in range(n_holes):
	x1 = np.random.randint(0, w)
	y1 = np.random.randint(0, h)
	index = np.random.randint(0, len(self.candidates))
	if not self.with_ratio:
	cutout_w, cutout_h = self.candidates[index]
	else:
	cutout_w = int(self.candidates[index][0] * w)
	cutout_h = int(self.candidates[index][1] * h)

	x2 = np.clip(x1 + cutout_w, 0, w)
	y2 = np.clip(y1 + cutout_h, 0, h)
	results['img'][y1:y2, x1:x2, :] = self.fill_in

	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(n_holes={self.n_holes}, '
	repr_str += (f'cutout_ratio={self.candidates}, ' if self.with_ratio
	else f'cutout_shape={self.candidates}, ')
	repr_str += f'fill_in={self.fill_in})'
	return repr_str


	@TRANSFORMS.register_module()
	class Mosaic(BaseTransform):
	"""Mosaic augmentation.

	Given 4 images, mosaic transform combines them into
	one output image. The output image is composed of the parts from each sub-
	image.

	.. code:: text

	mosaic transform
	center_x
	+------------------------------+
	\| pad \| pad \|
	\| +-----------+ \|
	\| \| \| \|
	\| \| image1 \|--------+ \|
	\| \| \| \| \|
	\| \| \| image2 \| \|
	center_y \|----+-------------+-----------\|
	\| \| cropped \| \|
	\|pad \| image3 \| image4 \|
	\| \| \| \|
	+----\|-------------+-----------+
	\| \|
	+-------------+

	The mosaic transform steps are as follows:

	1. Choose the mosaic center as the intersections of 4 images
	2. Get the left top image according to the index, and randomly
	sample another 3 images from the custom dataset.
	3. Sub image will be cropped if image is larger than mosaic patch

	Required Keys:

	- img
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_ignore_flags (bool) (optional)
	- mix_results (List[dict])

	Modified Keys:

	- img
	- img_shape
	- gt_bboxes (optional)
	- gt_bboxes_labels (optional)
	- gt_ignore_flags (optional)

	Args:
	img_scale (Sequence[int]): Image size after mosaic pipeline of single
	image. The shape order should be (width, height).
	Defaults to (640, 640).
	center_ratio_range (Sequence[float]): Center ratio range of mosaic
	output. Defaults to (0.5, 1.5).
	bbox_clip_border (bool, optional): Whether to clip the objects outside
	the border of the image. In some dataset like MOT17, the gt bboxes
	are allowed to cross the border of images. Therefore, we don't
	need to clip the gt bboxes in these cases. Defaults to True.
	pad_val (int): Pad value. Defaults to 114.
	prob (float): Probability of applying this transformation.
	Defaults to 1.0.
	"""

	def __init__(self,
	img_scale: Tuple[int, int] = (640, 640),
	center_ratio_range: Tuple[float, float] = (0.5, 1.5),
	bbox_clip_border: bool = True,
	pad_val: float = 114.0,
	prob: float = 1.0) -> None:
	assert isinstance(img_scale, tuple)
	assert 0 <= prob <= 1.0, 'The probability should be in range [0,1]. ' \
	f'got {prob}.'

	log_img_scale(img_scale, skip_square=True, shape_order='wh')
	self.img_scale = img_scale
	self.center_ratio_range = center_ratio_range
	self.bbox_clip_border = bbox_clip_border
	self.pad_val = pad_val
	self.prob = prob

	@cache_randomness
	def get_indexes(self, dataset: BaseDataset) -> int:
	"""Call function to collect indexes.

	Args:
	dataset (:obj:`MultiImageMixDataset`): The dataset.

	Returns:
	list: indexes.
	"""

	indexes = [random.randint(0, len(dataset)) for _ in range(3)]
	return indexes

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""Mosaic transform function.

	Args:
	results (dict): Result dict.

	Returns:
	dict: Updated result dict.
	"""
	if random.uniform(0, 1) > self.prob:
	return results

	assert 'mix_results' in results
	mosaic_bboxes = []
	mosaic_bboxes_labels = []
	mosaic_ignore_flags = []
	if len(results['img'].shape) == 3:
	mosaic_img = np.full(
	(int(self.img_scale[1] * 2), int(self.img_scale[0] * 2), 3),
	self.pad_val,
	dtype=results['img'].dtype)
	else:
	mosaic_img = np.full(
	(int(self.img_scale[1] * 2), int(self.img_scale[0] * 2)),
	self.pad_val,
	dtype=results['img'].dtype)

	# mosaic center x, y
	center_x = int(
	random.uniform(self.center_ratio_range) self.img_scale[0])
	center_y = int(
	random.uniform(self.center_ratio_range) self.img_scale[1])
	center_position = (center_x, center_y)

	loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right')
	for i, loc in enumerate(loc_strs):
	if loc == 'top_left':
	results_patch = copy.deepcopy(results)
	else:
	results_patch = copy.deepcopy(results['mix_results'][i - 1])

	img_i = results_patch['img']
	h_i, w_i = img_i.shape[:2]
	# keep_ratio resize
	scale_ratio_i = min(self.img_scale[1] / h_i,
	self.img_scale[0] / w_i)
	img_i = mmcv.imresize(
	img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i)))

	# compute the combine parameters
	paste_coord, crop_coord = self._mosaic_combine(
	loc, center_position, img_i.shape[:2][::-1])
	x1_p, y1_p, x2_p, y2_p = paste_coord
	x1_c, y1_c, x2_c, y2_c = crop_coord

	# crop and paste image
	mosaic_img[y1_p:y2_p, x1_p:x2_p] = img_i[y1_c:y2_c, x1_c:x2_c]

	# adjust coordinate
	gt_bboxes_i = results_patch['gt_bboxes']
	gt_bboxes_labels_i = results_patch['gt_bboxes_labels']
	gt_ignore_flags_i = results_patch['gt_ignore_flags']

	padw = x1_p - x1_c
	padh = y1_p - y1_c
	gt_bboxes_i.rescale_([scale_ratio_i, scale_ratio_i])
	gt_bboxes_i.translate_([padw, padh])
	mosaic_bboxes.append(gt_bboxes_i)
	mosaic_bboxes_labels.append(gt_bboxes_labels_i)
	mosaic_ignore_flags.append(gt_ignore_flags_i)

	mosaic_bboxes = mosaic_bboxes[0].cat(mosaic_bboxes, 0)
	mosaic_bboxes_labels = np.concatenate(mosaic_bboxes_labels, 0)
	mosaic_ignore_flags = np.concatenate(mosaic_ignore_flags, 0)

	if self.bbox_clip_border:
	mosaic_bboxes.clip_([2 * self.img_scale[1], 2 * self.img_scale[0]])
	# remove outside bboxes
	inside_inds = mosaic_bboxes.is_inside(
	[2 * self.img_scale[1], 2 * self.img_scale[0]]).numpy()
	mosaic_bboxes = mosaic_bboxes[inside_inds]
	mosaic_bboxes_labels = mosaic_bboxes_labels[inside_inds]
	mosaic_ignore_flags = mosaic_ignore_flags[inside_inds]

	results['img'] = mosaic_img
	results['img_shape'] = mosaic_img.shape
	results['gt_bboxes'] = mosaic_bboxes
	results['gt_bboxes_labels'] = mosaic_bboxes_labels
	results['gt_ignore_flags'] = mosaic_ignore_flags
	return results

	def _mosaic_combine(
	self, loc: str, center_position_xy: Sequence[float],
	img_shape_wh: Sequence[int]) -> Tuple[Tuple[int], Tuple[int]]:
	"""Calculate global coordinate of mosaic image and local coordinate of
	cropped sub-image.

	Args:
	loc (str): Index for the sub-image, loc in ('top_left',
	'top_right', 'bottom_left', 'bottom_right').
	center_position_xy (Sequence[float]): Mixing center for 4 images,
	(x, y).
	img_shape_wh (Sequence[int]): Width and height of sub-image

	Returns:
	tuple[tuple[float]]: Corresponding coordinate of pasting and
	cropping
	- paste_coord (tuple): paste corner coordinate in mosaic image.
	- crop_coord (tuple): crop corner coordinate in mosaic image.
	"""
	assert loc in ('top_left', 'top_right', 'bottom_left', 'bottom_right')
	if loc == 'top_left':
	# index0 to top left part of image
	x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \
	max(center_position_xy[1] - img_shape_wh[1], 0), \
	center_position_xy[0], \
	center_position_xy[1]
	crop_coord = img_shape_wh[0] - (x2 - x1), img_shape_wh[1] - (
	y2 - y1), img_shape_wh[0], img_shape_wh[1]

	elif loc == 'top_right':
	# index1 to top right part of image
	x1, y1, x2, y2 = center_position_xy[0], \
	max(center_position_xy[1] - img_shape_wh[1], 0), \
	min(center_position_xy[0] + img_shape_wh[0],
	self.img_scale[0] * 2), \
	center_position_xy[1]
	crop_coord = 0, img_shape_wh[1] - (y2 - y1), min(
	img_shape_wh[0], x2 - x1), img_shape_wh[1]

	elif loc == 'bottom_left':
	# index2 to bottom left part of image
	x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \
	center_position_xy[1], \
	center_position_xy[0], \
	min(self.img_scale[1] * 2, center_position_xy[1] +
	img_shape_wh[1])
	crop_coord = img_shape_wh[0] - (x2 - x1), 0, img_shape_wh[0], min(
	y2 - y1, img_shape_wh[1])

	else:
	# index3 to bottom right part of image
	x1, y1, x2, y2 = center_position_xy[0], \
	center_position_xy[1], \
	min(center_position_xy[0] + img_shape_wh[0],
	self.img_scale[0] * 2), \
	min(self.img_scale[1] * 2, center_position_xy[1] +
	img_shape_wh[1])
	crop_coord = 0, 0, min(img_shape_wh[0],
	x2 - x1), min(y2 - y1, img_shape_wh[1])

	paste_coord = x1, y1, x2, y2
	return paste_coord, crop_coord

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(img_scale={self.img_scale}, '
	repr_str += f'center_ratio_range={self.center_ratio_range}, '
	repr_str += f'pad_val={self.pad_val}, '
	repr_str += f'prob={self.prob})'
	return repr_str


	@TRANSFORMS.register_module()
	class MixUp(BaseTransform):
	"""MixUp data augmentation.

	.. code:: text

	mixup transform
	+------------------------------+
	\| mixup image \| \|
	\| +--------\|--------+ \|
	\| \| \| \| \|
	\|---------------+ \| \|
	\| \| \| \|
	\| \| image \| \|
	\| \| \| \|
	\| \| \| \|
	\| \|-----------------+ \|
	\| pad \|
	+------------------------------+

	The mixup transform steps are as follows:

	1. Another random image is picked by dataset and embedded in
	the top left patch(after padding and resizing)
	2. The target of mixup transform is the weighted average of mixup
	image and origin image.

	Required Keys:

	- img
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_ignore_flags (bool) (optional)
	- mix_results (List[dict])


	Modified Keys:

	- img
	- img_shape
	- gt_bboxes (optional)
	- gt_bboxes_labels (optional)
	- gt_ignore_flags (optional)


	Args:
	img_scale (Sequence[int]): Image output size after mixup pipeline.
	The shape order should be (width, height). Defaults to (640, 640).
	ratio_range (Sequence[float]): Scale ratio of mixup image.
	Defaults to (0.5, 1.5).
	flip_ratio (float): Horizontal flip ratio of mixup image.
	Defaults to 0.5.
	pad_val (int): Pad value. Defaults to 114.
	max_iters (int): The maximum number of iterations. If the number of
	iterations is greater than `max_iters`, but gt_bbox is still
	empty, then the iteration is terminated. Defaults to 15.
	bbox_clip_border (bool, optional): Whether to clip the objects outside
	the border of the image. In some dataset like MOT17, the gt bboxes
	are allowed to cross the border of images. Therefore, we don't
	need to clip the gt bboxes in these cases. Defaults to True.
	"""

	def __init__(self,
	img_scale: Tuple[int, int] = (640, 640),
	ratio_range: Tuple[float, float] = (0.5, 1.5),
	flip_ratio: float = 0.5,
	pad_val: float = 114.0,
	max_iters: int = 15,
	bbox_clip_border: bool = True) -> None:
	assert isinstance(img_scale, tuple)
	log_img_scale(img_scale, skip_square=True, shape_order='wh')
	self.dynamic_scale = img_scale
	self.ratio_range = ratio_range
	self.flip_ratio = flip_ratio
	self.pad_val = pad_val
	self.max_iters = max_iters
	self.bbox_clip_border = bbox_clip_border

	@cache_randomness
	def get_indexes(self, dataset: BaseDataset) -> int:
	"""Call function to collect indexes.

	Args:
	dataset (:obj:`MultiImageMixDataset`): The dataset.

	Returns:
	list: indexes.
	"""

	for i in range(self.max_iters):
	index = random.randint(0, len(dataset))
	gt_bboxes_i = dataset[index]['gt_bboxes']
	if len(gt_bboxes_i) != 0:
	break

	return index

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""MixUp transform function.

	Args:
	results (dict): Result dict.

	Returns:
	dict: Updated result dict.
	"""

	assert 'mix_results' in results
	assert len(
	results['mix_results']) == 1, 'MixUp only support 2 images now !'

	if results['mix_results'][0]['gt_bboxes'].shape[0] == 0:
	# empty bbox
	return results

	retrieve_results = results['mix_results'][0]
	retrieve_img = retrieve_results['img']

	jit_factor = random.uniform(*self.ratio_range)
	is_filp = random.uniform(0, 1) > self.flip_ratio

	if len(retrieve_img.shape) == 3:
	out_img = np.ones(
	(self.dynamic_scale[1], self.dynamic_scale[0], 3),
	dtype=retrieve_img.dtype) * self.pad_val
	else:
	out_img = np.ones(
	self.dynamic_scale[::-1],
	dtype=retrieve_img.dtype) * self.pad_val

	# 1. keep_ratio resize
	scale_ratio = min(self.dynamic_scale[1] / retrieve_img.shape[0],
	self.dynamic_scale[0] / retrieve_img.shape[1])
	retrieve_img = mmcv.imresize(
	retrieve_img, (int(retrieve_img.shape[1] * scale_ratio),
	int(retrieve_img.shape[0] * scale_ratio)))

	# 2. paste
	out_img[:retrieve_img.shape[0], :retrieve_img.shape[1]] = retrieve_img

	# 3. scale jit
	scale_ratio *= jit_factor
	out_img = mmcv.imresize(out_img, (int(out_img.shape[1] * jit_factor),
	int(out_img.shape[0] * jit_factor)))

	# 4. flip
	if is_filp:
	out_img = out_img[:, ::-1, :]

	# 5. random crop
	ori_img = results['img']
	origin_h, origin_w = out_img.shape[:2]
	target_h, target_w = ori_img.shape[:2]
	padded_img = np.ones((max(origin_h, target_h), max(
	origin_w, target_w), 3)) * self.pad_val
	padded_img = padded_img.astype(np.uint8)
	padded_img[:origin_h, :origin_w] = out_img

	x_offset, y_offset = 0, 0
	if padded_img.shape[0] > target_h:
	y_offset = random.randint(0, padded_img.shape[0] - target_h)
	if padded_img.shape[1] > target_w:
	x_offset = random.randint(0, padded_img.shape[1] - target_w)
	padded_cropped_img = padded_img[y_offset:y_offset + target_h,
	x_offset:x_offset + target_w]

	# 6. adjust bbox
	retrieve_gt_bboxes = retrieve_results['gt_bboxes']
	retrieve_gt_bboxes.rescale_([scale_ratio, scale_ratio])
	if self.bbox_clip_border:
	retrieve_gt_bboxes.clip_([origin_h, origin_w])

	if is_filp:
	retrieve_gt_bboxes.flip_([origin_h, origin_w],
	direction='horizontal')

	# 7. filter
	cp_retrieve_gt_bboxes = retrieve_gt_bboxes.clone()
	cp_retrieve_gt_bboxes.translate_([-x_offset, -y_offset])
	if self.bbox_clip_border:
	cp_retrieve_gt_bboxes.clip_([target_h, target_w])

	# 8. mix up
	ori_img = ori_img.astype(np.float32)
	mixup_img = 0.5 * ori_img + 0.5 * padded_cropped_img.astype(np.float32)

	retrieve_gt_bboxes_labels = retrieve_results['gt_bboxes_labels']
	retrieve_gt_ignore_flags = retrieve_results['gt_ignore_flags']

	mixup_gt_bboxes = cp_retrieve_gt_bboxes.cat(
	(results['gt_bboxes'], cp_retrieve_gt_bboxes), dim=0)
	mixup_gt_bboxes_labels = np.concatenate(
	(results['gt_bboxes_labels'], retrieve_gt_bboxes_labels), axis=0)
	mixup_gt_ignore_flags = np.concatenate(
	(results['gt_ignore_flags'], retrieve_gt_ignore_flags), axis=0)

	# remove outside bbox
	inside_inds = mixup_gt_bboxes.is_inside([target_h, target_w]).numpy()
	mixup_gt_bboxes = mixup_gt_bboxes[inside_inds]
	mixup_gt_bboxes_labels = mixup_gt_bboxes_labels[inside_inds]
	mixup_gt_ignore_flags = mixup_gt_ignore_flags[inside_inds]

	results['img'] = mixup_img.astype(np.uint8)
	results['img_shape'] = mixup_img.shape
	results['gt_bboxes'] = mixup_gt_bboxes
	results['gt_bboxes_labels'] = mixup_gt_bboxes_labels
	results['gt_ignore_flags'] = mixup_gt_ignore_flags

	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(dynamic_scale={self.dynamic_scale}, '
	repr_str += f'ratio_range={self.ratio_range}, '
	repr_str += f'flip_ratio={self.flip_ratio}, '
	repr_str += f'pad_val={self.pad_val}, '
	repr_str += f'max_iters={self.max_iters}, '
	repr_str += f'bbox_clip_border={self.bbox_clip_border})'
	return repr_str


	@TRANSFORMS.register_module()
	class RandomAffine(BaseTransform):
	"""Random affine transform data augmentation.

	This operation randomly generates affine transform matrix which including
	rotation, translation, shear and scaling transforms.

	Required Keys:

	- img
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_ignore_flags (bool) (optional)

	Modified Keys:

	- img
	- img_shape
	- gt_bboxes (optional)
	- gt_bboxes_labels (optional)
	- gt_ignore_flags (optional)

	Args:
	max_rotate_degree (float): Maximum degrees of rotation transform.
	Defaults to 10.
	max_translate_ratio (float): Maximum ratio of translation.
	Defaults to 0.1.
	scaling_ratio_range (tuple[float]): Min and max ratio of
	scaling transform. Defaults to (0.5, 1.5).
	max_shear_degree (float): Maximum degrees of shear
	transform. Defaults to 2.
	border (tuple[int]): Distance from width and height sides of input
	image to adjust output shape. Only used in mosaic dataset.
	Defaults to (0, 0).
	border_val (tuple[int]): Border padding values of 3 channels.
	Defaults to (114, 114, 114).
	bbox_clip_border (bool, optional): Whether to clip the objects outside
	the border of the image. In some dataset like MOT17, the gt bboxes
	are allowed to cross the border of images. Therefore, we don't
	need to clip the gt bboxes in these cases. Defaults to True.
	"""

	def __init__(self,
	max_rotate_degree: float = 10.0,
	max_translate_ratio: float = 0.1,
	scaling_ratio_range: Tuple[float, float] = (0.5, 1.5),
	max_shear_degree: float = 2.0,
	border: Tuple[int, int] = (0, 0),
	border_val: Tuple[int, int, int] = (114, 114, 114),
	bbox_clip_border: bool = True) -> None:
	assert 0 <= max_translate_ratio <= 1
	assert scaling_ratio_range[0] <= scaling_ratio_range[1]
	assert scaling_ratio_range[0] > 0
	self.max_rotate_degree = max_rotate_degree
	self.max_translate_ratio = max_translate_ratio
	self.scaling_ratio_range = scaling_ratio_range
	self.max_shear_degree = max_shear_degree
	self.border = border
	self.border_val = border_val
	self.bbox_clip_border = bbox_clip_border

	@cache_randomness
	def _get_random_homography_matrix(self, height, width):
	# Rotation
	rotation_degree = random.uniform(-self.max_rotate_degree,
	self.max_rotate_degree)
	rotation_matrix = self._get_rotation_matrix(rotation_degree)

	# Scaling
	scaling_ratio = random.uniform(self.scaling_ratio_range[0],
	self.scaling_ratio_range[1])
	scaling_matrix = self._get_scaling_matrix(scaling_ratio)

	# Shear
	x_degree = random.uniform(-self.max_shear_degree,
	self.max_shear_degree)
	y_degree = random.uniform(-self.max_shear_degree,
	self.max_shear_degree)
	shear_matrix = self._get_shear_matrix(x_degree, y_degree)

	# Translation
	trans_x = random.uniform(-self.max_translate_ratio,
	self.max_translate_ratio) * width
	trans_y = random.uniform(-self.max_translate_ratio,
	self.max_translate_ratio) * height
	translate_matrix = self._get_translation_matrix(trans_x, trans_y)

	warp_matrix = (
	translate_matrix @ shear_matrix @ rotation_matrix @ scaling_matrix)
	return warp_matrix

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	img = results['img']
	height = img.shape[0] + self.border[1] * 2
	width = img.shape[1] + self.border[0] * 2

	warp_matrix = self._get_random_homography_matrix(height, width)

	img = cv2.warpPerspective(
	img,
	warp_matrix,
	dsize=(width, height),
	borderValue=self.border_val)
	results['img'] = img
	results['img_shape'] = img.shape

	bboxes = results['gt_bboxes']
	num_bboxes = len(bboxes)
	if num_bboxes:
	bboxes.project_(warp_matrix)
	if self.bbox_clip_border:
	bboxes.clip_([height, width])
	# remove outside bbox
	valid_index = bboxes.is_inside([height, width]).numpy()
	results['gt_bboxes'] = bboxes[valid_index]
	results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
	valid_index]
	results['gt_ignore_flags'] = results['gt_ignore_flags'][
	valid_index]

	if 'gt_masks' in results:
	raise NotImplementedError('RandomAffine only supports bbox.')
	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(max_rotate_degree={self.max_rotate_degree}, '
	repr_str += f'max_translate_ratio={self.max_translate_ratio}, '
	repr_str += f'scaling_ratio_range={self.scaling_ratio_range}, '
	repr_str += f'max_shear_degree={self.max_shear_degree}, '
	repr_str += f'border={self.border}, '
	repr_str += f'border_val={self.border_val}, '
	repr_str += f'bbox_clip_border={self.bbox_clip_border})'
	return repr_str

	@staticmethod
	def _get_rotation_matrix(rotate_degrees: float) -> np.ndarray:
	radian = math.radians(rotate_degrees)
	rotation_matrix = np.array(
	[[np.cos(radian), -np.sin(radian), 0.],
	[np.sin(radian), np.cos(radian), 0.], [0., 0., 1.]],
	dtype=np.float32)
	return rotation_matrix

	@staticmethod
	def _get_scaling_matrix(scale_ratio: float) -> np.ndarray:
	scaling_matrix = np.array(
	[[scale_ratio, 0., 0.], [0., scale_ratio, 0.], [0., 0., 1.]],
	dtype=np.float32)
	return scaling_matrix

	@staticmethod
	def _get_shear_matrix(x_shear_degrees: float,
	y_shear_degrees: float) -> np.ndarray:
	x_radian = math.radians(x_shear_degrees)
	y_radian = math.radians(y_shear_degrees)
	shear_matrix = np.array([[1, np.tan(x_radian), 0.],
	[np.tan(y_radian), 1, 0.], [0., 0., 1.]],
	dtype=np.float32)
	return shear_matrix

	@staticmethod
	def _get_translation_matrix(x: float, y: float) -> np.ndarray:
	translation_matrix = np.array([[1, 0., x], [0., 1, y], [0., 0., 1.]],
	dtype=np.float32)
	return translation_matrix


	@TRANSFORMS.register_module()
	class YOLOXHSVRandomAug(BaseTransform):
	"""Apply HSV augmentation to image sequentially. It is referenced from
	https://github.com/Megvii-
	BaseDetection/YOLOX/blob/main/yolox/data/data_augment.py#L21.

	Required Keys:

	- img

	Modified Keys:

	- img

	Args:
	hue_delta (int): delta of hue. Defaults to 5.
	saturation_delta (int): delta of saturation. Defaults to 30.
	value_delta (int): delat of value. Defaults to 30.
	"""

	def __init__(self,
	hue_delta: int = 5,
	saturation_delta: int = 30,
	value_delta: int = 30) -> None:
	self.hue_delta = hue_delta
	self.saturation_delta = saturation_delta
	self.value_delta = value_delta

	@cache_randomness
	def _get_hsv_gains(self):
	hsv_gains = np.random.uniform(-1, 1, 3) * [
	self.hue_delta, self.saturation_delta, self.value_delta
	]
	# random selection of h, s, v
	hsv_gains *= np.random.randint(0, 2, 3)
	# prevent overflow
	hsv_gains = hsv_gains.astype(np.int16)
	return hsv_gains

	def transform(self, results: dict) -> dict:
	img = results['img']
	hsv_gains = self._get_hsv_gains()
	img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.int16)

	img_hsv[..., 0] = (img_hsv[..., 0] + hsv_gains[0]) % 180
	img_hsv[..., 1] = np.clip(img_hsv[..., 1] + hsv_gains[1], 0, 255)
	img_hsv[..., 2] = np.clip(img_hsv[..., 2] + hsv_gains[2], 0, 255)
	cv2.cvtColor(img_hsv.astype(img.dtype), cv2.COLOR_HSV2BGR, dst=img)

	results['img'] = img
	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(hue_delta={self.hue_delta}, '
	repr_str += f'saturation_delta={self.saturation_delta}, '
	repr_str += f'value_delta={self.value_delta})'
	return repr_str


	@TRANSFORMS.register_module()
	class CopyPaste(BaseTransform):
	"""Simple Copy-Paste is a Strong Data Augmentation Method for Instance
	Segmentation The simple copy-paste transform steps are as follows:

	1. The destination image is already resized with aspect ratio kept,
	cropped and padded.
	2. Randomly select a source image, which is also already resized
	with aspect ratio kept, cropped and padded in a similar way
	as the destination image.
	3. Randomly select some objects from the source image.
	4. Paste these source objects to the destination image directly,
	due to the source and destination image have the same size.
	5. Update object masks of the destination image, for some origin objects
	may be occluded.
	6. Generate bboxes from the updated destination masks and
	filter some objects which are totally occluded, and adjust bboxes
	which are partly occluded.
	7. Append selected source bboxes, masks, and labels.

	Required Keys:

	- img
	- gt_bboxes (BaseBoxes[torch.float32]) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_ignore_flags (bool) (optional)
	- gt_masks (BitmapMasks) (optional)

	Modified Keys:

	- img
	- gt_bboxes (optional)
	- gt_bboxes_labels (optional)
	- gt_ignore_flags (optional)
	- gt_masks (optional)

	Args:
	max_num_pasted (int): The maximum number of pasted objects.
	Defaults to 100.
	bbox_occluded_thr (int): The threshold of occluded bbox.
	Defaults to 10.
	mask_occluded_thr (int): The threshold of occluded mask.
	Defaults to 300.
	selected (bool): Whether select objects or not. If select is False,
	all objects of the source image will be pasted to the
	destination image.
	Defaults to True.
	"""

	def __init__(
	self,
	max_num_pasted: int = 100,
	bbox_occluded_thr: int = 10,
	mask_occluded_thr: int = 300,
	selected: bool = True,
	) -> None:
	self.max_num_pasted = max_num_pasted
	self.bbox_occluded_thr = bbox_occluded_thr
	self.mask_occluded_thr = mask_occluded_thr
	self.selected = selected

	@cache_randomness
	def get_indexes(self, dataset: BaseDataset) -> int:
	"""Call function to collect indexes.s.

	Args:
	dataset (:obj:`MultiImageMixDataset`): The dataset.
	Returns:
	list: Indexes.
	"""
	return random.randint(0, len(dataset))

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""Transform function to make a copy-paste of image.

	Args:
	results (dict): Result dict.
	Returns:
	dict: Result dict with copy-paste transformed.
	"""

	assert 'mix_results' in results
	num_images = len(results['mix_results'])
	assert num_images == 1, \
	f'CopyPaste only supports processing 2 images, got {num_images}'
	if self.selected:
	selected_results = self._select_object(results['mix_results'][0])
	else:
	selected_results = results['mix_results'][0]
	return self._copy_paste(results, selected_results)

	@cache_randomness
	def _get_selected_inds(self, num_bboxes: int) -> np.ndarray:
	max_num_pasted = min(num_bboxes + 1, self.max_num_pasted)
	num_pasted = np.random.randint(0, max_num_pasted)
	return np.random.choice(num_bboxes, size=num_pasted, replace=False)

	def _select_object(self, results: dict) -> dict:
	"""Select some objects from the source results."""
	bboxes = results['gt_bboxes']
	labels = results['gt_bboxes_labels']
	masks = results['gt_masks']
	ignore_flags = results['gt_ignore_flags']

	selected_inds = self._get_selected_inds(bboxes.shape[0])

	selected_bboxes = bboxes[selected_inds]
	selected_labels = labels[selected_inds]
	selected_masks = masks[selected_inds]
	selected_ignore_flags = ignore_flags[selected_inds]

	results['gt_bboxes'] = selected_bboxes
	results['gt_bboxes_labels'] = selected_labels
	results['gt_masks'] = selected_masks
	results['gt_ignore_flags'] = selected_ignore_flags
	return results

	def _copy_paste(self, dst_results: dict, src_results: dict) -> dict:
	"""CopyPaste transform function.

	Args:
	dst_results (dict): Result dict of the destination image.
	src_results (dict): Result dict of the source image.
	Returns:
	dict: Updated result dict.
	"""
	dst_img = dst_results['img']
	dst_bboxes = dst_results['gt_bboxes']
	dst_labels = dst_results['gt_bboxes_labels']
	dst_masks = dst_results['gt_masks']
	dst_ignore_flags = dst_results['gt_ignore_flags']

	src_img = src_results['img']
	src_bboxes = src_results['gt_bboxes']
	src_labels = src_results['gt_bboxes_labels']
	src_masks = src_results['gt_masks']
	src_ignore_flags = src_results['gt_ignore_flags']

	if len(src_bboxes) == 0:
	return dst_results

	# update masks and generate bboxes from updated masks
	composed_mask = np.where(np.any(src_masks.masks, axis=0), 1, 0)
	updated_dst_masks = self._get_updated_masks(dst_masks, composed_mask)
	updated_dst_bboxes = updated_dst_masks.get_bboxes(type(dst_bboxes))
	assert len(updated_dst_bboxes) == len(updated_dst_masks)

	# filter totally occluded objects
	l1_distance = (updated_dst_bboxes.tensor - dst_bboxes.tensor).abs()
	bboxes_inds = (l1_distance <= self.bbox_occluded_thr).all(
	dim=-1).numpy()
	masks_inds = updated_dst_masks.masks.sum(
	axis=(1, 2)) > self.mask_occluded_thr
	valid_inds = bboxes_inds \| masks_inds

	# Paste source objects to destination image directly
	img = dst_img * (1 - composed_mask[..., np.newaxis]
	) + src_img * composed_mask[..., np.newaxis]
	bboxes = src_bboxes.cat([updated_dst_bboxes[valid_inds], src_bboxes])
	labels = np.concatenate([dst_labels[valid_inds], src_labels])
	masks = np.concatenate(
	[updated_dst_masks.masks[valid_inds], src_masks.masks])
	ignore_flags = np.concatenate(
	[dst_ignore_flags[valid_inds], src_ignore_flags])

	dst_results['img'] = img
	dst_results['gt_bboxes'] = bboxes
	dst_results['gt_bboxes_labels'] = labels
	dst_results['gt_masks'] = BitmapMasks(masks, masks.shape[1],
	masks.shape[2])
	dst_results['gt_ignore_flags'] = ignore_flags

	return dst_results

	def _get_updated_masks(self, masks: BitmapMasks,
	composed_mask: np.ndarray) -> BitmapMasks:
	"""Update masks with composed mask."""
	assert masks.masks.shape[-2:] == composed_mask.shape[-2:], \
	'Cannot compare two arrays of different size'
	masks.masks = np.where(composed_mask, 0, masks.masks)
	return masks

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(max_num_pasted={self.max_num_pasted}, '
	repr_str += f'bbox_occluded_thr={self.bbox_occluded_thr}, '
	repr_str += f'mask_occluded_thr={self.mask_occluded_thr}, '
	repr_str += f'selected={self.selected})'
	return repr_str


	@TRANSFORMS.register_module()
	class RandomErasing(BaseTransform):
	"""RandomErasing operation.

	Random Erasing randomly selects a rectangle region
	in an image and erases its pixels with random values.
	`RandomErasing <https://arxiv.org/abs/1708.04896>`_.

	Required Keys:

	- img
	- gt_bboxes (HorizontalBoxes[torch.float32]) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_ignore_flags (bool) (optional)
	- gt_masks (BitmapMasks) (optional)

	Modified Keys:
	- img
	- gt_bboxes (optional)
	- gt_bboxes_labels (optional)
	- gt_ignore_flags (optional)
	- gt_masks (optional)

	Args:
	n_patches (int or tuple[int, int]): Number of regions to be dropped.
	If it is given as a tuple, number of patches will be randomly
	selected from the closed interval [``n_patches[0]``,
	``n_patches[1]``].
	ratio (float or tuple[float, float]): The ratio of erased regions.
	It can be ``float`` to use a fixed ratio or ``tuple[float, float]``
	to randomly choose ratio from the interval.
	squared (bool): Whether to erase square region. Defaults to True.
	bbox_erased_thr (float): The threshold for the maximum area proportion
	of the bbox to be erased. When the proportion of the area where the
	bbox is erased is greater than the threshold, the bbox will be
	removed. Defaults to 0.9.
	img_border_value (int or float or tuple): The filled values for
	image border. If float, the same fill value will be used for
	all the three channels of image. If tuple, it should be 3 elements.
	Defaults to 128.
	mask_border_value (int): The fill value used for masks. Defaults to 0.
	seg_ignore_label (int): The fill value used for segmentation map.
	Note this value must equals ``ignore_label`` in ``semantic_head``
	of the corresponding config. Defaults to 255.
	"""

	def __init__(
	self,
	n_patches: Union[int, Tuple[int, int]],
	ratio: Union[float, Tuple[float, float]],
	squared: bool = True,
	bbox_erased_thr: float = 0.9,
	img_border_value: Union[int, float, tuple] = 128,
	mask_border_value: int = 0,
	seg_ignore_label: int = 255,
	) -> None:
	if isinstance(n_patches, tuple):
	assert len(n_patches) == 2 and 0 <= n_patches[0] < n_patches[1]
	else:
	n_patches = (n_patches, n_patches)
	if isinstance(ratio, tuple):
	assert len(ratio) == 2 and 0 <= ratio[0] < ratio[1] <= 1
	else:
	ratio = (ratio, ratio)

	self.n_patches = n_patches
	self.ratio = ratio
	self.squared = squared
	self.bbox_erased_thr = bbox_erased_thr
	self.img_border_value = img_border_value
	self.mask_border_value = mask_border_value
	self.seg_ignore_label = seg_ignore_label

	@cache_randomness
	def _get_patches(self, img_shape: Tuple[int, int]) -> List[list]:
	"""Get patches for random erasing."""
	patches = []
	n_patches = np.random.randint(self.n_patches[0], self.n_patches[1] + 1)
	for _ in range(n_patches):
	if self.squared:
	ratio = np.random.random() * (self.ratio[1] -
	self.ratio[0]) + self.ratio[0]
	ratio = (ratio, ratio)
	else:
	ratio = (np.random.random() * (self.ratio[1] - self.ratio[0]) +
	self.ratio[0], np.random.random() *
	(self.ratio[1] - self.ratio[0]) + self.ratio[0])
	ph, pw = int(img_shape[0] * ratio[0]), int(img_shape[1] * ratio[1])
	px1, py1 = np.random.randint(0,
	img_shape[1] - pw), np.random.randint(
	0, img_shape[0] - ph)
	px2, py2 = px1 + pw, py1 + ph
	patches.append([px1, py1, px2, py2])
	return np.array(patches)

	def _transform_img(self, results: dict, patches: List[list]) -> None:
	"""Random erasing the image."""
	for patch in patches:
	px1, py1, px2, py2 = patch
	results['img'][py1:py2, px1:px2, :] = self.img_border_value

	def _transform_bboxes(self, results: dict, patches: List[list]) -> None:
	"""Random erasing the bboxes."""
	bboxes = results['gt_bboxes']
	# TODO: unify the logic by using operators in BaseBoxes.
	assert isinstance(bboxes, HorizontalBoxes)
	bboxes = bboxes.numpy()
	left_top = np.maximum(bboxes[:, None, :2], patches[:, :2])
	right_bottom = np.minimum(bboxes[:, None, 2:], patches[:, 2:])
	wh = np.maximum(right_bottom - left_top, 0)
	inter_areas = wh[:, :, 0] * wh[:, :, 1]
	bbox_areas = (bboxes[:, 2] - bboxes[:, 0]) * (
	bboxes[:, 3] - bboxes[:, 1])
	bboxes_erased_ratio = inter_areas.sum(-1) / (bbox_areas + 1e-7)
	valid_inds = bboxes_erased_ratio < self.bbox_erased_thr
	results['gt_bboxes'] = HorizontalBoxes(bboxes[valid_inds])
	results['gt_bboxes_labels'] = results['gt_bboxes_labels'][valid_inds]
	results['gt_ignore_flags'] = results['gt_ignore_flags'][valid_inds]
	if results.get('gt_masks', None) is not None:
	results['gt_masks'] = results['gt_masks'][valid_inds]

	def _transform_masks(self, results: dict, patches: List[list]) -> None:
	"""Random erasing the masks."""
	for patch in patches:
	px1, py1, px2, py2 = patch
	results['gt_masks'].masks[:, py1:py2,
	px1:px2] = self.mask_border_value

	def _transform_seg(self, results: dict, patches: List[list]) -> None:
	"""Random erasing the segmentation map."""
	for patch in patches:
	px1, py1, px2, py2 = patch
	results['gt_seg_map'][py1:py2, px1:px2] = self.seg_ignore_label

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""Transform function to erase some regions of image."""
	patches = self._get_patches(results['img_shape'])
	self._transform_img(results, patches)
	if results.get('gt_bboxes', None) is not None:
	self._transform_bboxes(results, patches)
	if results.get('gt_masks', None) is not None:
	self._transform_masks(results, patches)
	if results.get('gt_seg_map', None) is not None:
	self._transform_seg(results, patches)
	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(n_patches={self.n_patches}, '
	repr_str += f'ratio={self.ratio}, '
	repr_str += f'squared={self.squared}, '
	repr_str += f'bbox_erased_thr={self.bbox_erased_thr}, '
	repr_str += f'img_border_value={self.img_border_value}, '
	repr_str += f'mask_border_value={self.mask_border_value}, '
	repr_str += f'seg_ignore_label={self.seg_ignore_label})'
	return repr_str


	@TRANSFORMS.register_module()
	class CachedMosaic(Mosaic):
	"""Cached mosaic augmentation.

	Cached mosaic transform will random select images from the cache
	and combine them into one output image.

	.. code:: text

	mosaic transform
	center_x
	+------------------------------+
	\| pad \| pad \|
	\| +-----------+ \|
	\| \| \| \|
	\| \| image1 \|--------+ \|
	\| \| \| \| \|
	\| \| \| image2 \| \|
	center_y \|----+-------------+-----------\|
	\| \| cropped \| \|
	\|pad \| image3 \| image4 \|
	\| \| \| \|
	+----\|-------------+-----------+
	\| \|
	+-------------+

	The cached mosaic transform steps are as follows:

	1. Append the results from the last transform into the cache.
	2. Choose the mosaic center as the intersections of 4 images
	3. Get the left top image according to the index, and randomly
	sample another 3 images from the result cache.
	4. Sub image will be cropped if image is larger than mosaic patch

	Required Keys:

	- img
	- gt_bboxes (np.float32) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_ignore_flags (bool) (optional)

	Modified Keys:

	- img
	- img_shape
	- gt_bboxes (optional)
	- gt_bboxes_labels (optional)
	- gt_ignore_flags (optional)

	Args:
	img_scale (Sequence[int]): Image size after mosaic pipeline of single
	image. The shape order should be (width, height).
	Defaults to (640, 640).
	center_ratio_range (Sequence[float]): Center ratio range of mosaic
	output. Defaults to (0.5, 1.5).
	bbox_clip_border (bool, optional): Whether to clip the objects outside
	the border of the image. In some dataset like MOT17, the gt bboxes
	are allowed to cross the border of images. Therefore, we don't
	need to clip the gt bboxes in these cases. Defaults to True.
	pad_val (int): Pad value. Defaults to 114.
	prob (float): Probability of applying this transformation.
	Defaults to 1.0.
	max_cached_images (int): The maximum length of the cache. The larger
	the cache, the stronger the randomness of this transform. As a
	rule of thumb, providing 10 caches for each image suffices for
	randomness. Defaults to 40.
	random_pop (bool): Whether to randomly pop a result from the cache
	when the cache is full. If set to False, use FIFO popping method.
	Defaults to True.
	"""

	def __init__(self,
	*args,
	max_cached_images: int = 40,
	random_pop: bool = True,
	**kwargs) -> None:
	super().__init__(args, *kwargs)
	self.results_cache = []
	self.random_pop = random_pop
	assert max_cached_images >= 4, 'The length of cache must >= 4, ' \
	f'but got {max_cached_images}.'
	self.max_cached_images = max_cached_images

	@cache_randomness
	def get_indexes(self, cache: list) -> list:
	"""Call function to collect indexes.

	Args:
	cache (list): The results cache.

	Returns:
	list: indexes.
	"""

	indexes = [random.randint(0, len(cache) - 1) for _ in range(3)]
	return indexes

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""Mosaic transform function.

	Args:
	results (dict): Result dict.

	Returns:
	dict: Updated result dict.
	"""
	# cache and pop images
	self.results_cache.append(copy.deepcopy(results))
	if len(self.results_cache) > self.max_cached_images:
	if self.random_pop:
	index = random.randint(0, len(self.results_cache) - 1)
	else:
	index = 0
	self.results_cache.pop(index)

	if len(self.results_cache) <= 4:
	return results

	if random.uniform(0, 1) > self.prob:
	return results
	indices = self.get_indexes(self.results_cache)
	mix_results = [copy.deepcopy(self.results_cache[i]) for i in indices]

	# TODO: refactor mosaic to reuse these code.
	mosaic_bboxes = []
	mosaic_bboxes_labels = []
	mosaic_ignore_flags = []
	mosaic_masks = []
	with_mask = True if 'gt_masks' in results else False

	if len(results['img'].shape) == 3:
	mosaic_img = np.full(
	(int(self.img_scale[1] * 2), int(self.img_scale[0] * 2), 3),
	self.pad_val,
	dtype=results['img'].dtype)
	else:
	mosaic_img = np.full(
	(int(self.img_scale[1] * 2), int(self.img_scale[0] * 2)),
	self.pad_val,
	dtype=results['img'].dtype)

	# mosaic center x, y
	center_x = int(
	random.uniform(self.center_ratio_range) self.img_scale[0])
	center_y = int(
	random.uniform(self.center_ratio_range) self.img_scale[1])
	center_position = (center_x, center_y)

	loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right')
	for i, loc in enumerate(loc_strs):
	if loc == 'top_left':
	results_patch = copy.deepcopy(results)
	else:
	results_patch = copy.deepcopy(mix_results[i - 1])

	img_i = results_patch['img']
	h_i, w_i = img_i.shape[:2]
	# keep_ratio resize
	scale_ratio_i = min(self.img_scale[1] / h_i,
	self.img_scale[0] / w_i)
	img_i = mmcv.imresize(
	img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i)))

	# compute the combine parameters
	paste_coord, crop_coord = self._mosaic_combine(
	loc, center_position, img_i.shape[:2][::-1])
	x1_p, y1_p, x2_p, y2_p = paste_coord
	x1_c, y1_c, x2_c, y2_c = crop_coord

	# crop and paste image
	mosaic_img[y1_p:y2_p, x1_p:x2_p] = img_i[y1_c:y2_c, x1_c:x2_c]

	# adjust coordinate
	gt_bboxes_i = results_patch['gt_bboxes']
	gt_bboxes_labels_i = results_patch['gt_bboxes_labels']
	gt_ignore_flags_i = results_patch['gt_ignore_flags']

	padw = x1_p - x1_c
	padh = y1_p - y1_c
	gt_bboxes_i.rescale_([scale_ratio_i, scale_ratio_i])
	gt_bboxes_i.translate_([padw, padh])
	mosaic_bboxes.append(gt_bboxes_i)
	mosaic_bboxes_labels.append(gt_bboxes_labels_i)
	mosaic_ignore_flags.append(gt_ignore_flags_i)
	if with_mask and results_patch.get('gt_masks', None) is not None:
	gt_masks_i = results_patch['gt_masks']
	gt_masks_i = gt_masks_i.rescale(float(scale_ratio_i))
	gt_masks_i = gt_masks_i.translate(
	out_shape=(int(self.img_scale[0] * 2),
	int(self.img_scale[1] * 2)),
	offset=padw,
	direction='horizontal')
	gt_masks_i = gt_masks_i.translate(
	out_shape=(int(self.img_scale[0] * 2),
	int(self.img_scale[1] * 2)),
	offset=padh,
	direction='vertical')
	mosaic_masks.append(gt_masks_i)

	mosaic_bboxes = mosaic_bboxes[0].cat(mosaic_bboxes, 0)
	mosaic_bboxes_labels = np.concatenate(mosaic_bboxes_labels, 0)
	mosaic_ignore_flags = np.concatenate(mosaic_ignore_flags, 0)

	if self.bbox_clip_border:
	mosaic_bboxes.clip_([2 * self.img_scale[1], 2 * self.img_scale[0]])
	# remove outside bboxes
	inside_inds = mosaic_bboxes.is_inside(
	[2 * self.img_scale[1], 2 * self.img_scale[0]]).numpy()
	mosaic_bboxes = mosaic_bboxes[inside_inds]
	mosaic_bboxes_labels = mosaic_bboxes_labels[inside_inds]
	mosaic_ignore_flags = mosaic_ignore_flags[inside_inds]

	results['img'] = mosaic_img
	results['img_shape'] = mosaic_img.shape
	results['gt_bboxes'] = mosaic_bboxes
	results['gt_bboxes_labels'] = mosaic_bboxes_labels
	results['gt_ignore_flags'] = mosaic_ignore_flags

	if with_mask:
	mosaic_masks = mosaic_masks[0].cat(mosaic_masks)
	results['gt_masks'] = mosaic_masks[inside_inds]
	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(img_scale={self.img_scale}, '
	repr_str += f'center_ratio_range={self.center_ratio_range}, '
	repr_str += f'pad_val={self.pad_val}, '
	repr_str += f'prob={self.prob}, '
	repr_str += f'max_cached_images={self.max_cached_images}, '
	repr_str += f'random_pop={self.random_pop})'
	return repr_str


	@TRANSFORMS.register_module()
	class CachedMixUp(BaseTransform):
	"""Cached mixup data augmentation.

	.. code:: text

	mixup transform
	+------------------------------+
	\| mixup image \| \|
	\| +--------\|--------+ \|
	\| \| \| \| \|
	\|---------------+ \| \|
	\| \| \| \|
	\| \| image \| \|
	\| \| \| \|
	\| \| \| \|
	\| \|-----------------+ \|
	\| pad \|
	+------------------------------+

	The cached mixup transform steps are as follows:

	1. Append the results from the last transform into the cache.
	2. Another random image is picked from the cache and embedded in
	the top left patch(after padding and resizing)
	3. The target of mixup transform is the weighted average of mixup
	image and origin image.

	Required Keys:

	- img
	- gt_bboxes (np.float32) (optional)
	- gt_bboxes_labels (np.int64) (optional)
	- gt_ignore_flags (bool) (optional)
	- mix_results (List[dict])


	Modified Keys:

	- img
	- img_shape
	- gt_bboxes (optional)
	- gt_bboxes_labels (optional)
	- gt_ignore_flags (optional)


	Args:
	img_scale (Sequence[int]): Image output size after mixup pipeline.
	The shape order should be (width, height). Defaults to (640, 640).
	ratio_range (Sequence[float]): Scale ratio of mixup image.
	Defaults to (0.5, 1.5).
	flip_ratio (float): Horizontal flip ratio of mixup image.
	Defaults to 0.5.
	pad_val (int): Pad value. Defaults to 114.
	max_iters (int): The maximum number of iterations. If the number of
	iterations is greater than `max_iters`, but gt_bbox is still
	empty, then the iteration is terminated. Defaults to 15.
	bbox_clip_border (bool, optional): Whether to clip the objects outside
	the border of the image. In some dataset like MOT17, the gt bboxes
	are allowed to cross the border of images. Therefore, we don't
	need to clip the gt bboxes in these cases. Defaults to True.
	max_cached_images (int): The maximum length of the cache. The larger
	the cache, the stronger the randomness of this transform. As a
	rule of thumb, providing 10 caches for each image suffices for
	randomness. Defaults to 20.
	random_pop (bool): Whether to randomly pop a result from the cache
	when the cache is full. If set to False, use FIFO popping method.
	Defaults to True.
	prob (float): Probability of applying this transformation.
	Defaults to 1.0.
	"""

	def __init__(self,
	img_scale: Tuple[int, int] = (640, 640),
	ratio_range: Tuple[float, float] = (0.5, 1.5),
	flip_ratio: float = 0.5,
	pad_val: float = 114.0,
	max_iters: int = 15,
	bbox_clip_border: bool = True,
	max_cached_images: int = 20,
	random_pop: bool = True,
	prob: float = 1.0) -> None:
	assert isinstance(img_scale, tuple)
	assert max_cached_images >= 2, 'The length of cache must >= 2, ' \
	f'but got {max_cached_images}.'
	assert 0 <= prob <= 1.0, 'The probability should be in range [0,1]. ' \
	f'got {prob}.'
	self.dynamic_scale = img_scale
	self.ratio_range = ratio_range
	self.flip_ratio = flip_ratio
	self.pad_val = pad_val
	self.max_iters = max_iters
	self.bbox_clip_border = bbox_clip_border
	self.results_cache = []

	self.max_cached_images = max_cached_images
	self.random_pop = random_pop
	self.prob = prob

	@cache_randomness
	def get_indexes(self, cache: list) -> int:
	"""Call function to collect indexes.

	Args:
	cache (list): The result cache.

	Returns:
	int: index.
	"""

	for i in range(self.max_iters):
	index = random.randint(0, len(cache) - 1)
	gt_bboxes_i = cache[index]['gt_bboxes']
	if len(gt_bboxes_i) != 0:
	break
	return index

	@autocast_box_type()
	def transform(self, results: dict) -> dict:
	"""MixUp transform function.

	Args:
	results (dict): Result dict.

	Returns:
	dict: Updated result dict.
	"""
	# cache and pop images
	self.results_cache.append(copy.deepcopy(results))
	if len(self.results_cache) > self.max_cached_images:
	if self.random_pop:
	index = random.randint(0, len(self.results_cache) - 1)
	else:
	index = 0
	self.results_cache.pop(index)

	if len(self.results_cache) <= 1:
	return results

	if random.uniform(0, 1) > self.prob:
	return results

	index = self.get_indexes(self.results_cache)
	retrieve_results = copy.deepcopy(self.results_cache[index])

	# TODO: refactor mixup to reuse these code.
	if retrieve_results['gt_bboxes'].shape[0] == 0:
	# empty bbox
	return results

	retrieve_img = retrieve_results['img']
	with_mask = True if 'gt_masks' in results else False

	jit_factor = random.uniform(*self.ratio_range)
	is_filp = random.uniform(0, 1) > self.flip_ratio

	if len(retrieve_img.shape) == 3:
	out_img = np.ones(
	(self.dynamic_scale[1], self.dynamic_scale[0], 3),
	dtype=retrieve_img.dtype) * self.pad_val
	else:
	out_img = np.ones(
	self.dynamic_scale[::-1],
	dtype=retrieve_img.dtype) * self.pad_val

	# 1. keep_ratio resize
	scale_ratio = min(self.dynamic_scale[1] / retrieve_img.shape[0],
	self.dynamic_scale[0] / retrieve_img.shape[1])
	retrieve_img = mmcv.imresize(
	retrieve_img, (int(retrieve_img.shape[1] * scale_ratio),
	int(retrieve_img.shape[0] * scale_ratio)))

	# 2. paste
	out_img[:retrieve_img.shape[0], :retrieve_img.shape[1]] = retrieve_img

	# 3. scale jit
	scale_ratio *= jit_factor
	out_img = mmcv.imresize(out_img, (int(out_img.shape[1] * jit_factor),
	int(out_img.shape[0] * jit_factor)))

	# 4. flip
	if is_filp:
	out_img = out_img[:, ::-1, :]

	# 5. random crop
	ori_img = results['img']
	origin_h, origin_w = out_img.shape[:2]
	target_h, target_w = ori_img.shape[:2]
	padded_img = np.ones((max(origin_h, target_h), max(
	origin_w, target_w), 3)) * self.pad_val
	padded_img = padded_img.astype(np.uint8)
	padded_img[:origin_h, :origin_w] = out_img

	x_offset, y_offset = 0, 0
	if padded_img.shape[0] > target_h:
	y_offset = random.randint(0, padded_img.shape[0] - target_h)
	if padded_img.shape[1] > target_w:
	x_offset = random.randint(0, padded_img.shape[1] - target_w)
	padded_cropped_img = padded_img[y_offset:y_offset + target_h,
	x_offset:x_offset + target_w]

	# 6. adjust bbox
	retrieve_gt_bboxes = retrieve_results['gt_bboxes']
	retrieve_gt_bboxes.rescale_([scale_ratio, scale_ratio])
	if with_mask:
	retrieve_gt_masks = retrieve_results['gt_masks'].rescale(
	scale_ratio)

	if self.bbox_clip_border:
	retrieve_gt_bboxes.clip_([origin_h, origin_w])

	if is_filp:
	retrieve_gt_bboxes.flip_([origin_h, origin_w],
	direction='horizontal')
	if with_mask:
	retrieve_gt_masks = retrieve_gt_masks.flip()

	# 7. filter
	cp_retrieve_gt_bboxes = retrieve_gt_bboxes.clone()
	cp_retrieve_gt_bboxes.translate_([-x_offset, -y_offset])
	if with_mask:
	retrieve_gt_masks = retrieve_gt_masks.translate(
	out_shape=(target_h, target_w),
	offset=-x_offset,
	direction='horizontal')
	retrieve_gt_masks = retrieve_gt_masks.translate(
	out_shape=(target_h, target_w),
	offset=-y_offset,
	direction='vertical')

	if self.bbox_clip_border:
	cp_retrieve_gt_bboxes.clip_([target_h, target_w])

	# 8. mix up
	ori_img = ori_img.astype(np.float32)
	mixup_img = 0.5 * ori_img + 0.5 * padded_cropped_img.astype(np.float32)

	retrieve_gt_bboxes_labels = retrieve_results['gt_bboxes_labels']
	retrieve_gt_ignore_flags = retrieve_results['gt_ignore_flags']

	mixup_gt_bboxes = cp_retrieve_gt_bboxes.cat(
	(results['gt_bboxes'], cp_retrieve_gt_bboxes), dim=0)
	mixup_gt_bboxes_labels = np.concatenate(
	(results['gt_bboxes_labels'], retrieve_gt_bboxes_labels), axis=0)
	mixup_gt_ignore_flags = np.concatenate(
	(results['gt_ignore_flags'], retrieve_gt_ignore_flags), axis=0)
	if with_mask:
	mixup_gt_masks = retrieve_gt_masks.cat(
	[results['gt_masks'], retrieve_gt_masks])

	# remove outside bbox
	inside_inds = mixup_gt_bboxes.is_inside([target_h, target_w]).numpy()
	mixup_gt_bboxes = mixup_gt_bboxes[inside_inds]
	mixup_gt_bboxes_labels = mixup_gt_bboxes_labels[inside_inds]
	mixup_gt_ignore_flags = mixup_gt_ignore_flags[inside_inds]
	if with_mask:
	mixup_gt_masks = mixup_gt_masks[inside_inds]

	results['img'] = mixup_img.astype(np.uint8)
	results['img_shape'] = mixup_img.shape
	results['gt_bboxes'] = mixup_gt_bboxes
	results['gt_bboxes_labels'] = mixup_gt_bboxes_labels
	results['gt_ignore_flags'] = mixup_gt_ignore_flags
	if with_mask:
	results['gt_masks'] = mixup_gt_masks
	return results

	def __repr__(self):
	repr_str = self.__class__.__name__
	repr_str += f'(dynamic_scale={self.dynamic_scale}, '
	repr_str += f'ratio_range={self.ratio_range}, '
	repr_str += f'flip_ratio={self.flip_ratio}, '
	repr_str += f'pad_val={self.pad_val}, '
	repr_str += f'max_iters={self.max_iters}, '
	repr_str += f'bbox_clip_border={self.bbox_clip_border}, '
	repr_str += f'max_cached_images={self.max_cached_images}, '
	repr_str += f'random_pop={self.random_pop}, '
	repr_str += f'prob={self.prob})'
	return repr_str