|
|
|
|
|
""" |
|
Generic Image Transform utillities. |
|
""" |
|
|
|
import cv2 |
|
import random, math |
|
import numpy as np |
|
from collections import Iterable |
|
|
|
import torch.nn.functional as F |
|
from torch.autograd import Variable |
|
|
|
|
|
class ResizePad: |
|
""" |
|
Resize and pad an image to given size. |
|
""" |
|
|
|
def __init__(self, size): |
|
if not isinstance(size, (int, Iterable)): |
|
raise TypeError('Got inappropriate size arg: {}'.format(size)) |
|
|
|
self.h, self.w = size |
|
|
|
def __call__(self, img): |
|
h, w = img.shape[:2] |
|
scale = min(self.h / h, self.w / w) |
|
resized_h = int(np.round(h * scale)) |
|
resized_w = int(np.round(w * scale)) |
|
pad_h = int(np.floor(self.h - resized_h) / 2) |
|
pad_w = int(np.floor(self.w - resized_w) / 2) |
|
|
|
resized_img = cv2.resize(img, (resized_w, resized_h)) |
|
|
|
|
|
if img.ndim > 2: |
|
new_img = np.zeros( |
|
(self.h, self.w, img.shape[-1]), dtype=resized_img.dtype) |
|
else: |
|
resized_img = np.expand_dims(resized_img, -1) |
|
new_img = np.zeros((self.h, self.w, 1), dtype=resized_img.dtype) |
|
new_img[pad_h: pad_h + resized_h, |
|
pad_w: pad_w + resized_w, ...] = resized_img |
|
return new_img |
|
|
|
|
|
class CropResize: |
|
"""Remove padding and resize image to its original size.""" |
|
|
|
def __call__(self, img, size): |
|
if not isinstance(size, (int, Iterable)): |
|
raise TypeError('Got inappropriate size arg: {}'.format(size)) |
|
im_h, im_w = img.data.shape[:2] |
|
input_h, input_w = size |
|
scale = max(input_h / im_h, input_w / im_w) |
|
|
|
resized_h = int(np.round(im_h * scale)) |
|
|
|
resized_w = int(np.round(im_w * scale)) |
|
|
|
crop_h = int(np.floor(resized_h - input_h) / 2) |
|
|
|
crop_w = int(np.floor(resized_w - input_w) / 2) |
|
|
|
|
|
resized_img = F.upsample( |
|
img.unsqueeze(0).unsqueeze(0), size=(resized_h, resized_w), |
|
mode='bilinear') |
|
|
|
resized_img = resized_img.squeeze().unsqueeze(0) |
|
|
|
return resized_img[0, crop_h: crop_h + input_h, |
|
crop_w: crop_w + input_w] |
|
|
|
|
|
class ResizeImage: |
|
"""Resize the largest of the sides of the image to a given size""" |
|
def __init__(self, size): |
|
if not isinstance(size, (int, Iterable)): |
|
raise TypeError('Got inappropriate size arg: {}'.format(size)) |
|
|
|
self.size = size |
|
|
|
def __call__(self, img): |
|
im_h, im_w = img.shape[-2:] |
|
scale = min(self.size / im_h, self.size / im_w) |
|
resized_h = int(np.round(im_h * scale)) |
|
resized_w = int(np.round(im_w * scale)) |
|
out = F.upsample( |
|
Variable(img).unsqueeze(0), size=(resized_h, resized_w), |
|
mode='bilinear').squeeze().data |
|
return out |
|
|
|
|
|
class ResizeAnnotation: |
|
"""Resize the largest of the sides of the annotation to a given size""" |
|
def __init__(self, size): |
|
if not isinstance(size, (int, Iterable)): |
|
raise TypeError('Got inappropriate size arg: {}'.format(size)) |
|
|
|
self.size = size |
|
|
|
def __call__(self, img): |
|
im_h, im_w = img.shape[-2:] |
|
scale = min(self.size / im_h, self.size / im_w) |
|
resized_h = int(np.round(im_h * scale)) |
|
resized_w = int(np.round(im_w * scale)) |
|
out = F.upsample( |
|
Variable(img).unsqueeze(0).unsqueeze(0), |
|
size=(resized_h, resized_w), |
|
mode='bilinear').squeeze().data |
|
return out |
|
|
|
|
|
class ToNumpy: |
|
"""Transform an torch.*Tensor to an numpy ndarray.""" |
|
|
|
def __call__(self, x): |
|
return x.numpy() |
|
|
|
def letterbox(img, mask, height, color=(123.7, 116.3, 103.5)): |
|
shape = img.shape[:2] |
|
ratio = float(height) / max(shape) |
|
new_shape = (round(shape[1] * ratio), round(shape[0] * ratio)) |
|
dw = (height - new_shape[0]) / 2 |
|
dh = (height - new_shape[1]) / 2 |
|
top, bottom = round(dh - 0.1), round(dh + 0.1) |
|
left, right = round(dw - 0.1), round(dw + 0.1) |
|
img = cv2.resize(img, new_shape, interpolation=cv2.INTER_AREA) |
|
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) |
|
if mask is not None: |
|
mask = cv2.resize(mask, new_shape, interpolation=cv2.INTER_NEAREST) |
|
|
|
|
|
mask = cv2.copyMakeBorder(mask, top, bottom, left, right, cv2.BORDER_CONSTANT, value=1) |
|
|
|
return img, mask, ratio, dw, dh |
|
|
|
def random_affine(img, mask, targets, degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-2, 2), |
|
borderValue=(123.7, 116.3, 103.5), all_bbox=None): |
|
border = 0 |
|
height = max(img.shape[0], img.shape[1]) + border * 2 |
|
|
|
|
|
R = np.eye(3) |
|
a = random.random() * (degrees[1] - degrees[0]) + degrees[0] |
|
|
|
s = random.random() * (scale[1] - scale[0]) + scale[0] |
|
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(img.shape[1] / 2, img.shape[0] / 2), scale=s) |
|
|
|
|
|
T = np.eye(3) |
|
T[0, 2] = (random.random() * 2 - 1) * translate[0] * img.shape[0] + border |
|
T[1, 2] = (random.random() * 2 - 1) * translate[1] * img.shape[1] + border |
|
|
|
|
|
S = np.eye(3) |
|
S[0, 1] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) |
|
S[1, 0] = math.tan((random.random() * (shear[1] - shear[0]) + shear[0]) * math.pi / 180) |
|
|
|
M = S @ T @ R |
|
imw = cv2.warpPerspective(img, M, dsize=(height, height), flags=cv2.INTER_LINEAR, |
|
borderValue=borderValue) |
|
if mask is not None: |
|
maskw = cv2.warpPerspective(mask, M, dsize=(height, height), flags=cv2.INTER_NEAREST, |
|
borderValue=1) |
|
else: |
|
maskw = None |
|
|
|
|
|
if type(targets)==type([1]): |
|
targetlist=[] |
|
for bbox in targets: |
|
targetlist.append(wrap_points(bbox, M, height, a)) |
|
return imw, maskw, targetlist, M |
|
elif all_bbox is not None: |
|
targets = wrap_points(targets, M, height, a) |
|
for ii in range(all_bbox.shape[0]): |
|
all_bbox[ii,:] = wrap_points(all_bbox[ii,:], M, height, a) |
|
return imw, maskw, targets, all_bbox, M |
|
elif targets is not None: |
|
targets = wrap_points(targets, M, height, a) |
|
return imw, maskw, targets, M |
|
else: |
|
return imw |
|
|
|
def wrap_points(targets, M, height, a): |
|
|
|
|
|
points = targets.copy() |
|
|
|
area0 = (points[2] - points[0]) * (points[3] - points[1]) |
|
|
|
|
|
xy = np.ones((4, 3)) |
|
xy[:, :2] = points[[0, 1, 2, 3, 0, 3, 2, 1]].reshape(4, 2) |
|
xy = (xy @ M.T)[:, :2].reshape(1, 8) |
|
|
|
|
|
x = xy[:, [0, 2, 4, 6]] |
|
y = xy[:, [1, 3, 5, 7]] |
|
xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, 1).T |
|
|
|
|
|
radians = a * math.pi / 180 |
|
reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5 |
|
x = (xy[:, 2] + xy[:, 0]) / 2 |
|
y = (xy[:, 3] + xy[:, 1]) / 2 |
|
w = (xy[:, 2] - xy[:, 0]) * reduction |
|
h = (xy[:, 3] - xy[:, 1]) * reduction |
|
xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, 1).T |
|
|
|
|
|
np.clip(xy, 0, height, out=xy) |
|
w = xy[:, 2] - xy[:, 0] |
|
h = xy[:, 3] - xy[:, 1] |
|
area = w * h |
|
ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16)) |
|
i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10) |
|
|
|
|
|
|
|
|
|
|
|
targets = xy[0] |
|
return targets |