Spaces:
Sleeping
Sleeping
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. | |
import cv2 | |
import random | |
import numpy as np | |
import math | |
import torch | |
import torchvision | |
from torchvision.transforms import functional as F | |
from maskrcnn_benchmark.structures.bounding_box import BoxList | |
def matrix_iou(a, b, relative=False): | |
""" | |
return iou of a and b, numpy version for data augenmentation | |
""" | |
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) | |
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) | |
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) | |
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) | |
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) | |
if relative: | |
ious = area_i / (area_b[:, np.newaxis] + 1e-12) | |
else: | |
ious = area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12) | |
return ious | |
class RACompose(object): | |
def __init__(self, pre_transforms, rand_transforms, post_transforms, concurrent=2): | |
self.preprocess = pre_transforms | |
self.transforms = post_transforms | |
self.rand_transforms = rand_transforms | |
self.concurrent = concurrent | |
def __call__(self, image, target): | |
for t in self.preprocess: | |
image, target = t(image, target) | |
for t in random.choices(self.rand_transforms, k=self.concurrent): | |
image = np.array(image) | |
image, target = t(image, target) | |
for t in self.transforms: | |
image, target = t(image, target) | |
return image, target | |
def __repr__(self): | |
format_string = self.__class__.__name__ + "(" | |
for t in self.preprocess: | |
format_string += "\n" | |
format_string += " {0}".format(t) | |
format_string += "\nRandom select {0} from: (".format(self.concurrent) | |
for t in self.rand_transforms: | |
format_string += "\n" | |
format_string += " {0}".format(t) | |
format_string += ")\nThen, apply:" | |
for t in self.transforms: | |
format_string += "\n" | |
format_string += " {0}".format(t) | |
format_string += "\n)" | |
return format_string | |
class Compose(object): | |
def __init__(self, transforms): | |
self.transforms = transforms | |
def __call__(self, image, target=None): | |
for t in self.transforms: | |
image, target = t(image, target) | |
if target is None: | |
return image | |
return image, target | |
def __repr__(self): | |
format_string = self.__class__.__name__ + "(" | |
for t in self.transforms: | |
format_string += "\n" | |
format_string += " {0}".format(t) | |
format_string += "\n)" | |
return format_string | |
class Resize(object): | |
def __init__(self, min_size, max_size, restrict=False): | |
if not isinstance(min_size, (list, tuple)): | |
min_size = (min_size,) | |
self.min_size = min_size | |
self.max_size = max_size | |
self.restrict = restrict | |
# modified from torchvision to add support for max size | |
def get_size(self, image_size): | |
w, h = image_size | |
size = random.choice(self.min_size) | |
max_size = self.max_size | |
if self.restrict: | |
return (size, max_size) | |
if max_size is not None: | |
min_original_size = float(min((w, h))) | |
max_original_size = float(max((w, h))) | |
if max_original_size / min_original_size * size > max_size: | |
size = int(round(max_size * min_original_size / max_original_size)) | |
if (w <= h and w == size) or (h <= w and h == size): | |
return (h, w) | |
if w < h: | |
ow = size | |
oh = int(size * h / w) | |
else: | |
oh = size | |
ow = int(size * w / h) | |
return (oh, ow) | |
def __call__(self, image, target): | |
if isinstance(image, np.ndarray): | |
image_size = self.get_size(image.shape[:2]) | |
image = cv2.resize(image, image_size) | |
new_size = image_size | |
else: | |
image = F.resize(image, self.get_size(image.size)) | |
new_size = image.size | |
if target is not None: | |
target = target.resize(new_size) | |
return image, target | |
class RandomHorizontalFlip(object): | |
def __init__(self, prob=0.5): | |
self.prob = prob | |
def __call__(self, image, target): | |
if random.random() < self.prob: | |
if isinstance(image, np.ndarray): | |
image = np.fliplr(image) | |
else: | |
image = F.hflip(image) | |
if target is not None: | |
target = target.transpose(0) | |
return image, target | |
class RandomVerticalFlip(object): | |
def __init__(self, prob=0.5): | |
self.prob = prob | |
def __call__(self, image, target): | |
if random.random() < self.prob: | |
if isinstance(image, np.ndarray): | |
image = np.flipud(image) | |
else: | |
image = F.vflip(image) | |
target = target.transpose(1) | |
return image, target | |
class ToTensor(object): | |
def __call__(self, image, target): | |
return F.to_tensor(image), target | |
class Normalize(object): | |
def __init__(self, mean, std, format="rgb"): | |
self.mean = mean | |
self.std = std | |
self.format = format.lower() | |
def __call__(self, image, target): | |
if "bgr" in self.format: | |
image = image[[2, 1, 0]] | |
if "255" in self.format: | |
image = image * 255 | |
image = F.normalize(image, mean=self.mean, std=self.std) | |
return image, target | |
class ColorJitter(object): | |
def __init__( | |
self, | |
brightness=0.0, | |
contrast=0.0, | |
saturation=0.0, | |
hue=0.0, | |
): | |
self.color_jitter = torchvision.transforms.ColorJitter( | |
brightness=brightness, | |
contrast=contrast, | |
saturation=saturation, | |
hue=hue, | |
) | |
def __call__(self, image, target): | |
image = self.color_jitter(image) | |
return image, target | |
class RandomCrop(object): | |
def __init__(self, prob=0.5, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3): | |
# 1: return ori img | |
self.prob = prob | |
self.sample_mode = (1, *min_ious, 0) | |
self.min_crop_size = min_crop_size | |
def __call__(self, img, target): | |
if random.random() > self.prob: | |
return img, target | |
h, w, c = img.shape | |
boxes = target.bbox.numpy() | |
labels = target.get_field("labels") | |
while True: | |
mode = random.choice(self.sample_mode) | |
if mode == 1: | |
return img, target | |
min_iou = mode | |
new_w = random.uniform(self.min_crop_size * w, w) | |
new_h = random.uniform(self.min_crop_size * h, h) | |
# h / w in [0.5, 2] | |
if new_h / new_w < 0.5 or new_h / new_w > 2: | |
continue | |
left = random.uniform(0, w - new_w) | |
top = random.uniform(0, h - new_h) | |
patch = np.array([left, top, left + new_w, top + new_h]) | |
overlaps = matrix_iou(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1) | |
if overlaps.min() < min_iou: | |
continue | |
# center of boxes should inside the crop img | |
center = (boxes[:, :2] + boxes[:, 2:]) / 2 | |
mask = ( | |
(center[:, 0] > patch[0]) | |
* (center[:, 1] > patch[1]) | |
* (center[:, 0] < patch[2]) | |
* (center[:, 1] < patch[3]) | |
) | |
if not mask.any(): | |
continue | |
boxes = boxes[mask] | |
labels = labels[mask] | |
# adjust boxes | |
img = img[int(patch[1]) : int(patch[3]), int(patch[0]) : int(patch[2])] | |
boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:]) | |
boxes[:, :2] = boxes[:, :2].clip(min=patch[:2]) | |
boxes -= np.tile(patch[:2], 2) | |
new_target = BoxList(boxes, (img.shape[1], img.shape[0]), mode="xyxy") | |
new_target.add_field("labels", labels) | |
return img, new_target | |
class RandomAffine(object): | |
def __init__( | |
self, | |
prob=0.5, | |
degrees=(-10, 10), | |
translate=(0.1, 0.1), | |
scale=(0.9, 1.1), | |
shear=(-2, 2), | |
borderValue=(127.5, 127.5, 127.5), | |
): | |
self.prob = prob | |
self.degrees = degrees | |
self.translate = translate | |
self.scale = scale | |
self.shear = shear | |
self.borderValue = borderValue | |
def __call__(self, img, targets=None): | |
if random.random() > self.prob: | |
return img, targets | |
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) | |
# https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4 | |
border = 0 # width of added border (optional) | |
# height = max(img.shape[0], img.shape[1]) + border * 2 | |
height, width, _ = img.shape | |
bbox = targets.bbox | |
# Rotation and Scale | |
R = np.eye(3) | |
a = random.random() * (self.degrees[1] - self.degrees[0]) + self.degrees[0] | |
# a += random.choice([-180, -90, 0, 90]) # 90deg rotations added to small rotations | |
s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0] | |
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) | |
# Translation | |
T = np.eye(3) | |
T[0, 2] = (random.random() * 2 - 1) * self.translate[0] * img.shape[0] + border # x translation (pixels) | |
T[1, 2] = (random.random() * 2 - 1) * self.translate[1] * img.shape[1] + border # y translation (pixels) | |
# Shear | |
S = np.eye(3) | |
S[0, 1] = math.tan( | |
(random.random() * (self.shear[1] - self.shear[0]) + self.shear[0]) * math.pi / 180 | |
) # x shear (deg) | |
S[1, 0] = math.tan( | |
(random.random() * (self.shear[1] - self.shear[0]) + self.shear[0]) * math.pi / 180 | |
) # y shear (deg) | |
M = S @ T @ R # Combined rotation matrix. ORDER IS IMPORTANT HERE!! | |
imw = cv2.warpPerspective( | |
img, M, dsize=(width, height), flags=cv2.INTER_LINEAR, borderValue=self.borderValue | |
) # BGR order borderValue | |
# Return warped points also | |
if targets: | |
n = bbox.shape[0] | |
points = bbox[:, 0:4] | |
area0 = (points[:, 2] - points[:, 0]) * (points[:, 3] - points[:, 1]) | |
# warp points | |
xy = np.ones((n * 4, 3)) | |
xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1 | |
xy = (xy @ M.T)[:, :2].reshape(n, 8) | |
# create new boxes | |
x = xy[:, [0, 2, 4, 6]] | |
y = xy[:, [1, 3, 5, 7]] | |
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T | |
# apply angle-based reduction | |
radians = a * math.pi / 180 | |
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 | |
x = (xy[:, 2] + xy[:, 0]) / 2 | |
y = (xy[:, 3] + xy[:, 1]) / 2 | |
w = (xy[:, 2] - xy[:, 0]) * reduction | |
h = (xy[:, 3] - xy[:, 1]) * reduction | |
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T | |
# reject warped points outside of image | |
x1 = np.clip(xy[:, 0], 0, width) | |
y1 = np.clip(xy[:, 1], 0, height) | |
x2 = np.clip(xy[:, 2], 0, width) | |
y2 = np.clip(xy[:, 3], 0, height) | |
new_bbox = np.concatenate((x1, y1, x2, y2)).reshape(4, n).T | |
targets.bbox = torch.as_tensor(new_bbox, dtype=torch.float32) | |
return imw, targets | |
class RandomErasing: | |
def __init__( | |
self, | |
prob=0.5, | |
era_l=0.02, | |
era_h=1 / 3, | |
min_aspect=0.3, | |
mode="const", | |
max_count=1, | |
max_overlap=0.3, | |
max_value=255, | |
): | |
self.prob = prob | |
self.era_l = era_l | |
self.era_h = era_h | |
self.min_aspect = min_aspect | |
self.min_count = 1 | |
self.max_count = max_count | |
self.max_overlap = max_overlap | |
self.max_value = max_value | |
self.mode = mode.lower() | |
assert self.mode in ["const", "rand", "pixel"], "invalid erase mode: %s" % self.mode | |
def _get_pixels(self, patch_size): | |
if self.mode == "pixel": | |
return np.random.random(patch_size) * self.max_value | |
elif self.mode == "rand": | |
return np.random.random((1, 1, patch_size[-1])) * self.max_value | |
else: | |
return np.zeros((1, 1, patch_size[-1])) | |
def __call__(self, image, target): | |
if random.random() > self.prob: | |
return image, target | |
ih, iw, ic = image.shape | |
ia = ih * iw | |
count = self.min_count if self.min_count == self.max_count else random.randint(self.min_count, self.max_count) | |
erase_boxes = [] | |
for _ in range(count): | |
for try_idx in range(10): | |
erase_area = random.uniform(self.era_l, self.era_h) * ia / count | |
aspect_ratio = math.exp(random.uniform(math.log(self.min_aspect), math.log(1 / self.min_aspect))) | |
eh = int(round(math.sqrt(erase_area * aspect_ratio))) | |
ew = int(round(math.sqrt(erase_area / aspect_ratio))) | |
if eh < ih and ew < iw: | |
x = random.randint(0, iw - ew) | |
y = random.randint(0, ih - eh) | |
image[y : y + eh, x : x + ew, :] = self._get_pixels((eh, ew, ic)) | |
erase_boxes.append([x, y, x + ew, y + eh]) | |
break | |
if target is not None and len(erase_boxes) > 0: | |
boxes = target.bbox.numpy() | |
labels = target.get_field("labels") | |
overlap = matrix_iou(np.array(erase_boxes), boxes, relative=True) | |
mask = overlap.max(axis=0) < self.max_overlap | |
boxes = boxes[mask] | |
labels = labels[mask] | |
target.bbox = torch.as_tensor(boxes, dtype=torch.float32) | |
target.add_field("labels", labels) | |
return image, target | |