|
import math |
|
import warnings |
|
import random |
|
import numbers |
|
import numpy as np |
|
from PIL import Image, ImageFilter |
|
from collections.abc import Sequence |
|
|
|
import torch |
|
import torchvision.transforms.functional as TF |
|
|
|
_pil_interpolation_to_str = { |
|
Image.NEAREST: 'PIL.Image.NEAREST', |
|
Image.BILINEAR: 'PIL.Image.BILINEAR', |
|
Image.BICUBIC: 'PIL.Image.BICUBIC', |
|
Image.LANCZOS: 'PIL.Image.LANCZOS', |
|
Image.HAMMING: 'PIL.Image.HAMMING', |
|
Image.BOX: 'PIL.Image.BOX', |
|
} |
|
|
|
|
|
def _get_image_size(img): |
|
if TF._is_pil_image(img): |
|
return img.size |
|
elif isinstance(img, torch.Tensor) and img.dim() > 2: |
|
return img.shape[-2:][::-1] |
|
else: |
|
raise TypeError("Unexpected type {}".format(type(img))) |
|
|
|
|
|
class RandomHorizontalFlip(object): |
|
"""Horizontal flip the given PIL Image randomly with a given probability. |
|
|
|
Args: |
|
p (float): probability of the image being flipped. Default value is 0.5 |
|
""" |
|
def __init__(self, p=0.5): |
|
self.p = p |
|
|
|
def __call__(self, img, mask): |
|
""" |
|
Args: |
|
img (PIL Image): Image to be flipped. |
|
|
|
Returns: |
|
PIL Image: Randomly flipped image. |
|
""" |
|
if random.random() < self.p: |
|
img = TF.hflip(img) |
|
mask = TF.hflip(mask) |
|
return img, mask |
|
|
|
def __repr__(self): |
|
return self.__class__.__name__ + '(p={})'.format(self.p) |
|
|
|
|
|
class RandomVerticalFlip(object): |
|
"""Vertical flip the given PIL Image randomly with a given probability. |
|
|
|
Args: |
|
p (float): probability of the image being flipped. Default value is 0.5 |
|
""" |
|
def __init__(self, p=0.5): |
|
self.p = p |
|
|
|
def __call__(self, img, mask): |
|
""" |
|
Args: |
|
img (PIL Image): Image to be flipped. |
|
|
|
Returns: |
|
PIL Image: Randomly flipped image. |
|
""" |
|
if random.random() < self.p: |
|
img = TF.vflip(img) |
|
mask = TF.vflip(mask) |
|
return img, mask |
|
|
|
def __repr__(self): |
|
return self.__class__.__name__ + '(p={})'.format(self.p) |
|
|
|
|
|
class GaussianBlur(object): |
|
"""Gaussian blur augmentation from SimCLR: https://arxiv.org/abs/2002.05709""" |
|
def __init__(self, sigma=[.1, 2.]): |
|
self.sigma = sigma |
|
|
|
def __call__(self, x): |
|
sigma = random.uniform(self.sigma[0], self.sigma[1]) |
|
x = x.filter(ImageFilter.GaussianBlur(radius=sigma)) |
|
return x |
|
|
|
|
|
class RandomAffine(object): |
|
"""Random affine transformation of the image keeping center invariant |
|
|
|
Args: |
|
degrees (sequence or float or int): Range of degrees to select from. |
|
If degrees is a number instead of sequence like (min, max), the range of degrees |
|
will be (-degrees, +degrees). Set to 0 to deactivate rotations. |
|
translate (tuple, optional): tuple of maximum absolute fraction for horizontal |
|
and vertical translations. For example translate=(a, b), then horizontal shift |
|
is randomly sampled in the range -img_width * a < dx < img_width * a and vertical shift is |
|
randomly sampled in the range -img_height * b < dy < img_height * b. Will not translate by default. |
|
scale (tuple, optional): scaling factor interval, e.g (a, b), then scale is |
|
randomly sampled from the range a <= scale <= b. Will keep original scale by default. |
|
shear (sequence or float or int, optional): Range of degrees to select from. |
|
If shear is a number, a shear parallel to the x axis in the range (-shear, +shear) |
|
will be apllied. Else if shear is a tuple or list of 2 values a shear parallel to the x axis in the |
|
range (shear[0], shear[1]) will be applied. Else if shear is a tuple or list of 4 values, |
|
a x-axis shear in (shear[0], shear[1]) and y-axis shear in (shear[2], shear[3]) will be applied. |
|
Will not apply shear by default |
|
resample ({PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC}, optional): |
|
An optional resampling filter. See `filters`_ for more information. |
|
If omitted, or if the image has mode "1" or "P", it is set to PIL.Image.NEAREST. |
|
fillcolor (tuple or int): Optional fill color (Tuple for RGB Image And int for grayscale) for the area |
|
outside the transform in the output image.(Pillow>=5.0.0) |
|
|
|
.. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters |
|
|
|
""" |
|
def __init__(self, |
|
degrees, |
|
translate=None, |
|
scale=None, |
|
shear=None, |
|
resample=False, |
|
fillcolor=0): |
|
if isinstance(degrees, numbers.Number): |
|
if degrees < 0: |
|
raise ValueError( |
|
"If degrees is a single number, it must be positive.") |
|
self.degrees = (-degrees, degrees) |
|
else: |
|
assert isinstance(degrees, (tuple, list)) and len(degrees) == 2, \ |
|
"degrees should be a list or tuple and it must be of length 2." |
|
self.degrees = degrees |
|
|
|
if translate is not None: |
|
assert isinstance(translate, (tuple, list)) and len(translate) == 2, \ |
|
"translate should be a list or tuple and it must be of length 2." |
|
for t in translate: |
|
if not (0.0 <= t <= 1.0): |
|
raise ValueError( |
|
"translation values should be between 0 and 1") |
|
self.translate = translate |
|
|
|
if scale is not None: |
|
assert isinstance(scale, (tuple, list)) and len(scale) == 2, \ |
|
"scale should be a list or tuple and it must be of length 2." |
|
for s in scale: |
|
if s <= 0: |
|
raise ValueError("scale values should be positive") |
|
self.scale = scale |
|
|
|
if shear is not None: |
|
if isinstance(shear, numbers.Number): |
|
if shear < 0: |
|
raise ValueError( |
|
"If shear is a single number, it must be positive.") |
|
self.shear = (-shear, shear) |
|
else: |
|
assert isinstance(shear, (tuple, list)) and \ |
|
(len(shear) == 2 or len(shear) == 4), \ |
|
"shear should be a list or tuple and it must be of length 2 or 4." |
|
|
|
if len(shear) == 2: |
|
self.shear = [shear[0], shear[1], 0., 0.] |
|
elif len(shear) == 4: |
|
self.shear = [s for s in shear] |
|
else: |
|
self.shear = shear |
|
|
|
self.resample = resample |
|
self.fillcolor = fillcolor |
|
|
|
@staticmethod |
|
def get_params(degrees, translate, scale_ranges, shears, img_size): |
|
"""Get parameters for affine transformation |
|
|
|
Returns: |
|
sequence: params to be passed to the affine transformation |
|
""" |
|
angle = random.uniform(degrees[0], degrees[1]) |
|
if translate is not None: |
|
max_dx = translate[0] * img_size[0] |
|
max_dy = translate[1] * img_size[1] |
|
translations = (np.round(random.uniform(-max_dx, max_dx)), |
|
np.round(random.uniform(-max_dy, max_dy))) |
|
else: |
|
translations = (0, 0) |
|
|
|
if scale_ranges is not None: |
|
scale = random.uniform(scale_ranges[0], scale_ranges[1]) |
|
else: |
|
scale = 1.0 |
|
|
|
if shears is not None: |
|
if len(shears) == 2: |
|
shear = [random.uniform(shears[0], shears[1]), 0.] |
|
elif len(shears) == 4: |
|
shear = [ |
|
random.uniform(shears[0], shears[1]), |
|
random.uniform(shears[2], shears[3]) |
|
] |
|
else: |
|
shear = 0.0 |
|
|
|
return angle, translations, scale, shear |
|
|
|
def __call__(self, img, mask): |
|
""" |
|
img (PIL Image): Image to be transformed. |
|
|
|
Returns: |
|
PIL Image: Affine transformed image. |
|
""" |
|
ret = self.get_params(self.degrees, self.translate, self.scale, |
|
self.shear, img.size) |
|
img = TF.affine(img, |
|
*ret, |
|
resample=self.resample, |
|
fillcolor=self.fillcolor) |
|
mask = TF.affine(mask, *ret, resample=Image.NEAREST, fillcolor=0) |
|
return img, mask |
|
|
|
def __repr__(self): |
|
s = '{name}(degrees={degrees}' |
|
if self.translate is not None: |
|
s += ', translate={translate}' |
|
if self.scale is not None: |
|
s += ', scale={scale}' |
|
if self.shear is not None: |
|
s += ', shear={shear}' |
|
if self.resample > 0: |
|
s += ', resample={resample}' |
|
if self.fillcolor != 0: |
|
s += ', fillcolor={fillcolor}' |
|
s += ')' |
|
d = dict(self.__dict__) |
|
d['resample'] = _pil_interpolation_to_str[d['resample']] |
|
return s.format(name=self.__class__.__name__, **d) |
|
|
|
|
|
class RandomCrop(object): |
|
"""Crop the given PIL Image at a random location. |
|
|
|
Args: |
|
size (sequence or int): Desired output size of the crop. If size is an |
|
int instead of sequence like (h, w), a square crop (size, size) is |
|
made. |
|
padding (int or sequence, optional): Optional padding on each border |
|
of the image. Default is None, i.e no padding. If a sequence of length |
|
4 is provided, it is used to pad left, top, right, bottom borders |
|
respectively. If a sequence of length 2 is provided, it is used to |
|
pad left/right, top/bottom borders, respectively. |
|
pad_if_needed (boolean): It will pad the image if smaller than the |
|
desired size to avoid raising an exception. Since cropping is done |
|
after padding, the padding seems to be done at a random offset. |
|
fill: Pixel fill value for constant fill. Default is 0. If a tuple of |
|
length 3, it is used to fill R, G, B channels respectively. |
|
This value is only used when the padding_mode is constant |
|
padding_mode: Type of padding. Should be: constant, edge, reflect or symmetric. Default is constant. |
|
|
|
- constant: pads with a constant value, this value is specified with fill |
|
|
|
- edge: pads with the last value on the edge of the image |
|
|
|
- reflect: pads with reflection of image (without repeating the last value on the edge) |
|
|
|
padding [1, 2, 3, 4] with 2 elements on both sides in reflect mode |
|
will result in [3, 2, 1, 2, 3, 4, 3, 2] |
|
|
|
- symmetric: pads with reflection of image (repeating the last value on the edge) |
|
|
|
padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode |
|
will result in [2, 1, 1, 2, 3, 4, 4, 3] |
|
|
|
""" |
|
def __init__(self, |
|
size, |
|
padding=None, |
|
pad_if_needed=False, |
|
fill=0, |
|
padding_mode='constant'): |
|
if isinstance(size, numbers.Number): |
|
self.size = (int(size), int(size)) |
|
else: |
|
self.size = size |
|
self.padding = padding |
|
self.pad_if_needed = pad_if_needed |
|
self.fill = fill |
|
self.padding_mode = padding_mode |
|
|
|
@staticmethod |
|
def get_params(img, output_size): |
|
"""Get parameters for ``crop`` for a random crop. |
|
|
|
Args: |
|
img (PIL Image): Image to be cropped. |
|
output_size (tuple): Expected output size of the crop. |
|
|
|
Returns: |
|
tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. |
|
""" |
|
w, h = _get_image_size(img) |
|
th, tw = output_size |
|
if w == tw and h == th: |
|
return 0, 0, h, w |
|
|
|
i = random.randint(0, h - th) |
|
j = random.randint(0, w - tw) |
|
return i, j, th, tw |
|
|
|
def __call__(self, img, mask): |
|
""" |
|
Args: |
|
img (PIL Image): Image to be cropped. |
|
|
|
Returns: |
|
PIL Image: Cropped image. |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
i, j, h, w = self.get_params(img, self.size) |
|
img = TF.crop(img, i, j, h, w) |
|
mask = TF.crop(mask, i, j, h, w) |
|
|
|
return img, mask |
|
|
|
def __repr__(self): |
|
return self.__class__.__name__ + '(size={0}, padding={1})'.format( |
|
self.size, self.padding) |
|
|
|
|
|
class RandomResizedCrop(object): |
|
"""Crop the given PIL Image to random size and aspect ratio. |
|
|
|
A crop of random size (default: of 0.08 to 1.0) of the original size and a random |
|
aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop |
|
is finally resized to given size. |
|
This is popularly used to train the Inception networks. |
|
|
|
Args: |
|
size: expected output size of each edge |
|
scale: range of size of the origin size cropped |
|
ratio: range of aspect ratio of the origin aspect ratio cropped |
|
interpolation: Default: PIL.Image.BILINEAR |
|
""" |
|
def __init__(self, |
|
size, |
|
scale=(0.08, 1.0), |
|
ratio=(3. / 4., 4. / 3.), |
|
interpolation=Image.BILINEAR): |
|
if isinstance(size, (tuple, list)): |
|
self.size = size |
|
else: |
|
self.size = (size, size) |
|
if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): |
|
warnings.warn("range should be of kind (min, max)") |
|
|
|
self.interpolation = interpolation |
|
self.scale = scale |
|
self.ratio = ratio |
|
|
|
@staticmethod |
|
def get_params(img, scale, ratio): |
|
"""Get parameters for ``crop`` for a random sized crop. |
|
|
|
Args: |
|
img (PIL Image): Image to be cropped. |
|
scale (tuple): range of size of the origin size cropped |
|
ratio (tuple): range of aspect ratio of the origin aspect ratio cropped |
|
|
|
Returns: |
|
tuple: params (i, j, h, w) to be passed to ``crop`` for a random |
|
sized crop. |
|
""" |
|
width, height = _get_image_size(img) |
|
area = height * width |
|
|
|
for _ in range(10): |
|
target_area = random.uniform(*scale) * area |
|
log_ratio = (math.log(ratio[0]), math.log(ratio[1])) |
|
aspect_ratio = math.exp(random.uniform(*log_ratio)) |
|
|
|
w = int(round(math.sqrt(target_area * aspect_ratio))) |
|
h = int(round(math.sqrt(target_area / aspect_ratio))) |
|
|
|
if 0 < w <= width and 0 < h <= height: |
|
i = random.randint(0, height - h) |
|
j = random.randint(0, width - w) |
|
return i, j, h, w |
|
|
|
|
|
in_ratio = float(width) / float(height) |
|
if (in_ratio < min(ratio)): |
|
w = width |
|
h = int(round(w / min(ratio))) |
|
elif (in_ratio > max(ratio)): |
|
h = height |
|
w = int(round(h * max(ratio))) |
|
else: |
|
w = width |
|
h = height |
|
i = (height - h) // 2 |
|
j = (width - w) // 2 |
|
return i, j, h, w |
|
|
|
def __call__(self, img, mask): |
|
""" |
|
Args: |
|
img (PIL Image): Image to be cropped and resized. |
|
|
|
Returns: |
|
PIL Image: Randomly cropped and resized image. |
|
""" |
|
i, j, h, w = self.get_params(img, self.scale, self.ratio) |
|
|
|
img = TF.resized_crop(img, i, j, h, w, self.size, self.interpolation) |
|
mask = TF.resized_crop(mask, i, j, h, w, self.size, Image.NEAREST) |
|
return img, mask |
|
|
|
def __repr__(self): |
|
interpolate_str = _pil_interpolation_to_str[self.interpolation] |
|
format_string = self.__class__.__name__ + '(size={0}'.format(self.size) |
|
format_string += ', scale={0}'.format( |
|
tuple(round(s, 4) for s in self.scale)) |
|
format_string += ', ratio={0}'.format( |
|
tuple(round(r, 4) for r in self.ratio)) |
|
format_string += ', interpolation={0})'.format(interpolate_str) |
|
return format_string |
|
|
|
|
|
class ToOnehot(object): |
|
"""To oneshot tensor |
|
|
|
Args: |
|
max_obj_n (float): Maximum number of the objects |
|
""" |
|
def __init__(self, max_obj_n, shuffle): |
|
self.max_obj_n = max_obj_n |
|
self.shuffle = shuffle |
|
|
|
def __call__(self, mask, obj_list=None): |
|
""" |
|
Args: |
|
mask (Mask in Numpy): Mask to be converted. |
|
|
|
Returns: |
|
Tensor: Converted mask in onehot format. |
|
""" |
|
|
|
new_mask = np.zeros((self.max_obj_n + 1, *mask.shape), np.uint8) |
|
|
|
if not obj_list: |
|
obj_list = list() |
|
obj_max = mask.max() + 1 |
|
for i in range(1, obj_max): |
|
tmp = (mask == i).astype(np.uint8) |
|
if tmp.max() > 0: |
|
obj_list.append(i) |
|
|
|
if self.shuffle: |
|
random.shuffle(obj_list) |
|
obj_list = obj_list[:self.max_obj_n] |
|
|
|
for i in range(len(obj_list)): |
|
new_mask[i + 1] = (mask == obj_list[i]).astype(np.uint8) |
|
new_mask[0] = 1 - np.sum(new_mask, axis=0) |
|
|
|
return torch.from_numpy(new_mask), obj_list |
|
|
|
def __repr__(self): |
|
return self.__class__.__name__ + '(max_obj_n={})'.format( |
|
self.max_obj_n) |
|
|
|
|
|
class Resize(torch.nn.Module): |
|
"""Resize the input image to the given size. |
|
The image can be a PIL Image or a torch Tensor, in which case it is expected |
|
to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions |
|
|
|
Args: |
|
size (sequence or int): Desired output size. If size is a sequence like |
|
(h, w), output size will be matched to this. If size is an int, |
|
smaller edge of the image will be matched to this number. |
|
i.e, if height > width, then image will be rescaled to |
|
(size * height / width, size). |
|
In torchscript mode padding as single int is not supported, use a tuple or |
|
list of length 1: ``[size, ]``. |
|
interpolation (int, optional): Desired interpolation enum defined by `filters`_. |
|
Default is ``PIL.Image.BILINEAR``. If input is Tensor, only ``PIL.Image.NEAREST``, ``PIL.Image.BILINEAR`` |
|
and ``PIL.Image.BICUBIC`` are supported. |
|
""" |
|
def __init__(self, size, interpolation=Image.BILINEAR): |
|
super().__init__() |
|
if not isinstance(size, (int, Sequence)): |
|
raise TypeError("Size should be int or sequence. Got {}".format( |
|
type(size))) |
|
if isinstance(size, Sequence) and len(size) not in (1, 2): |
|
raise ValueError( |
|
"If size is a sequence, it should have 1 or 2 values") |
|
self.size = size |
|
self.interpolation = interpolation |
|
|
|
def forward(self, img, mask): |
|
""" |
|
Args: |
|
img (PIL Image or Tensor): Image to be scaled. |
|
|
|
Returns: |
|
PIL Image or Tensor: Rescaled image. |
|
""" |
|
img = TF.resize(img, self.size, self.interpolation) |
|
mask = TF.resize(mask, self.size, Image.NEAREST) |
|
return img, mask |
|
|
|
def __repr__(self): |
|
interpolate_str = _pil_interpolation_to_str[self.interpolation] |
|
return self.__class__.__name__ + '(size={0}, interpolation={1})'.format( |
|
self.size, interpolate_str) |
|
|