zdou0830's picture
desco
749745d
raw
history blame
14.6 kB
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import cv2
import random
import numpy as np
import math
import torch
import torchvision
from torchvision.transforms import functional as F
from maskrcnn_benchmark.structures.bounding_box import BoxList
def matrix_iou(a, b, relative=False):
"""
return iou of a and b, numpy version for data augenmentation
"""
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
if relative:
ious = area_i / (area_b[:, np.newaxis] + 1e-12)
else:
ious = area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12)
return ious
class RACompose(object):
def __init__(self, pre_transforms, rand_transforms, post_transforms, concurrent=2):
self.preprocess = pre_transforms
self.transforms = post_transforms
self.rand_transforms = rand_transforms
self.concurrent = concurrent
def __call__(self, image, target):
for t in self.preprocess:
image, target = t(image, target)
for t in random.choices(self.rand_transforms, k=self.concurrent):
image = np.array(image)
image, target = t(image, target)
for t in self.transforms:
image, target = t(image, target)
return image, target
def __repr__(self):
format_string = self.__class__.__name__ + "("
for t in self.preprocess:
format_string += "\n"
format_string += " {0}".format(t)
format_string += "\nRandom select {0} from: (".format(self.concurrent)
for t in self.rand_transforms:
format_string += "\n"
format_string += " {0}".format(t)
format_string += ")\nThen, apply:"
for t in self.transforms:
format_string += "\n"
format_string += " {0}".format(t)
format_string += "\n)"
return format_string
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, target=None):
for t in self.transforms:
image, target = t(image, target)
if target is None:
return image
return image, target
def __repr__(self):
format_string = self.__class__.__name__ + "("
for t in self.transforms:
format_string += "\n"
format_string += " {0}".format(t)
format_string += "\n)"
return format_string
class Resize(object):
def __init__(self, min_size, max_size, restrict=False):
if not isinstance(min_size, (list, tuple)):
min_size = (min_size,)
self.min_size = min_size
self.max_size = max_size
self.restrict = restrict
# modified from torchvision to add support for max size
def get_size(self, image_size):
w, h = image_size
size = random.choice(self.min_size)
max_size = self.max_size
if self.restrict:
return (size, max_size)
if max_size is not None:
min_original_size = float(min((w, h)))
max_original_size = float(max((w, h)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
if (w <= h and w == size) or (h <= w and h == size):
return (h, w)
if w < h:
ow = size
oh = int(size * h / w)
else:
oh = size
ow = int(size * w / h)
return (oh, ow)
def __call__(self, image, target):
if isinstance(image, np.ndarray):
image_size = self.get_size(image.shape[:2])
image = cv2.resize(image, image_size)
new_size = image_size
else:
image = F.resize(image, self.get_size(image.size))
new_size = image.size
if target is not None:
target = target.resize(new_size)
return image, target
class RandomHorizontalFlip(object):
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, image, target):
if random.random() < self.prob:
if isinstance(image, np.ndarray):
image = np.fliplr(image)
else:
image = F.hflip(image)
if target is not None:
target = target.transpose(0)
return image, target
class RandomVerticalFlip(object):
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, image, target):
if random.random() < self.prob:
if isinstance(image, np.ndarray):
image = np.flipud(image)
else:
image = F.vflip(image)
target = target.transpose(1)
return image, target
class ToTensor(object):
def __call__(self, image, target):
return F.to_tensor(image), target
class Normalize(object):
def __init__(self, mean, std, format="rgb"):
self.mean = mean
self.std = std
self.format = format.lower()
def __call__(self, image, target):
if "bgr" in self.format:
image = image[[2, 1, 0]]
if "255" in self.format:
image = image * 255
image = F.normalize(image, mean=self.mean, std=self.std)
return image, target
class ColorJitter(object):
def __init__(
self,
brightness=0.0,
contrast=0.0,
saturation=0.0,
hue=0.0,
):
self.color_jitter = torchvision.transforms.ColorJitter(
brightness=brightness,
contrast=contrast,
saturation=saturation,
hue=hue,
)
def __call__(self, image, target):
image = self.color_jitter(image)
return image, target
class RandomCrop(object):
def __init__(self, prob=0.5, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3):
# 1: return ori img
self.prob = prob
self.sample_mode = (1, *min_ious, 0)
self.min_crop_size = min_crop_size
def __call__(self, img, target):
if random.random() > self.prob:
return img, target
h, w, c = img.shape
boxes = target.bbox.numpy()
labels = target.get_field("labels")
while True:
mode = random.choice(self.sample_mode)
if mode == 1:
return img, target
min_iou = mode
new_w = random.uniform(self.min_crop_size * w, w)
new_h = random.uniform(self.min_crop_size * h, h)
# h / w in [0.5, 2]
if new_h / new_w < 0.5 or new_h / new_w > 2:
continue
left = random.uniform(0, w - new_w)
top = random.uniform(0, h - new_h)
patch = np.array([left, top, left + new_w, top + new_h])
overlaps = matrix_iou(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
if overlaps.min() < min_iou:
continue
# center of boxes should inside the crop img
center = (boxes[:, :2] + boxes[:, 2:]) / 2
mask = (
(center[:, 0] > patch[0])
* (center[:, 1] > patch[1])
* (center[:, 0] < patch[2])
* (center[:, 1] < patch[3])
)
if not mask.any():
continue
boxes = boxes[mask]
labels = labels[mask]
# adjust boxes
img = img[int(patch[1]) : int(patch[3]), int(patch[0]) : int(patch[2])]
boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
boxes -= np.tile(patch[:2], 2)
new_target = BoxList(boxes, (img.shape[1], img.shape[0]), mode="xyxy")
new_target.add_field("labels", labels)
return img, new_target
class RandomAffine(object):
def __init__(
self,
prob=0.5,
degrees=(-10, 10),
translate=(0.1, 0.1),
scale=(0.9, 1.1),
shear=(-2, 2),
borderValue=(127.5, 127.5, 127.5),
):
self.prob = prob
self.degrees = degrees
self.translate = translate
self.scale = scale
self.shear = shear
self.borderValue = borderValue
def __call__(self, img, targets=None):
if random.random() > self.prob:
return img, targets
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
border = 0 # width of added border (optional)
# height = max(img.shape[0], img.shape[1]) + border * 2
height, width, _ = img.shape
bbox = targets.bbox
# Rotation and Scale
R = np.eye(3)
a = random.random() * (self.degrees[1] - self.degrees[0]) + self.degrees[0]
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations
s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s)
# Translation
T = np.eye(3)
T[0, 2] = (random.random() * 2 - 1) * self.translate[0] * img.shape[0] + border # x translation (pixels)
T[1, 2] = (random.random() * 2 - 1) * self.translate[1] * img.shape[1] + border # y translation (pixels)
# Shear
S = np.eye(3)
S[0, 1] = math.tan(
(random.random() * (self.shear[1] - self.shear[0]) + self.shear[0]) * math.pi / 180
) # x shear (deg)
S[1, 0] = math.tan(
(random.random() * (self.shear[1] - self.shear[0]) + self.shear[0]) * math.pi / 180
) # y shear (deg)
M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
imw = cv2.warpPerspective(
img, M, dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=self.borderValue
) # BGR order borderValue
# Return warped points also
if targets:
n = bbox.shape[0]
points = bbox[:, 0:4]
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1])
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = (xy @ M.T)[:, :2].reshape(n, 8)
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# apply angle-based reduction
radians = a * math.pi / 180
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
x = (xy[:, 2] + xy[:, 0]) / 2
y = (xy[:, 3] + xy[:, 1]) / 2
w = (xy[:, 2] - xy[:, 0]) * reduction
h = (xy[:, 3] - xy[:, 1]) * reduction
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
# reject warped points outside of image
x1 = np.clip(xy[:, 0], 0, width)
y1 = np.clip(xy[:, 1], 0, height)
x2 = np.clip(xy[:, 2], 0, width)
y2 = np.clip(xy[:, 3], 0, height)
new_bbox = np.concatenate((x1, y1, x2, y2)).reshape(4, n).T
targets.bbox = torch.as_tensor(new_bbox, dtype=torch.float32)
return imw, targets
class RandomErasing:
def __init__(
self,
prob=0.5,
era_l=0.02,
era_h=1 / 3,
min_aspect=0.3,
mode="const",
max_count=1,
max_overlap=0.3,
max_value=255,
):
self.prob = prob
self.era_l = era_l
self.era_h = era_h
self.min_aspect = min_aspect
self.min_count = 1
self.max_count = max_count
self.max_overlap = max_overlap
self.max_value = max_value
self.mode = mode.lower()
assert self.mode in ["const", "rand", "pixel"], "invalid erase mode: %s" % self.mode
def _get_pixels(self, patch_size):
if self.mode == "pixel":
return np.random.random(patch_size) * self.max_value
elif self.mode == "rand":
return np.random.random((1, 1, patch_size[-1])) * self.max_value
else:
return np.zeros((1, 1, patch_size[-1]))
def __call__(self, image, target):
if random.random() > self.prob:
return image, target
ih, iw, ic = image.shape
ia = ih * iw
count = self.min_count if self.min_count == self.max_count else random.randint(self.min_count, self.max_count)
erase_boxes = []
for _ in range(count):
for try_idx in range(10):
erase_area = random.uniform(self.era_l, self.era_h) * ia / count
aspect_ratio = math.exp(random.uniform(math.log(self.min_aspect), math.log(1 / self.min_aspect)))
eh = int(round(math.sqrt(erase_area * aspect_ratio)))
ew = int(round(math.sqrt(erase_area / aspect_ratio)))
if eh < ih and ew < iw:
x = random.randint(0, iw - ew)
y = random.randint(0, ih - eh)
image[y : y + eh, x : x + ew, :] = self._get_pixels((eh, ew, ic))
erase_boxes.append([x, y, x + ew, y + eh])
break
if target is not None and len(erase_boxes) > 0:
boxes = target.bbox.numpy()
labels = target.get_field("labels")
overlap = matrix_iou(np.array(erase_boxes), boxes, relative=True)
mask = overlap.max(axis=0) < self.max_overlap
boxes = boxes[mask]
labels = labels[mask]
target.bbox = torch.as_tensor(boxes, dtype=torch.float32)
target.add_field("labels", labels)
return image, target