# Copyright (c) OpenMMLab. All rights reserved. import math import warnings from typing import Optional import torch import torch.nn as nn from torch import Tensor from mmdet.registry import MODELS from mmdet.structures.bbox import bbox_overlaps from .utils import weighted_loss @weighted_loss def iou_loss(pred: Tensor, target: Tensor, linear: bool = False, mode: str = 'log', eps: float = 1e-6) -> Tensor: """IoU loss. Computing the IoU loss between a set of predicted bboxes and target bboxes. The loss is calculated as negative log of IoU. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): Corresponding gt bboxes, shape (n, 4). linear (bool, optional): If True, use linear scale of loss instead of log scale. Default: False. mode (str): Loss scaling mode, including "linear", "square", and "log". Default: 'log' eps (float): Epsilon to avoid log(0). Return: Tensor: Loss tensor. """ assert mode in ['linear', 'square', 'log'] if linear: mode = 'linear' warnings.warn('DeprecationWarning: Setting "linear=True" in ' 'iou_loss is deprecated, please use "mode=`linear`" ' 'instead.') ious = bbox_overlaps(pred, target, is_aligned=True).clamp(min=eps) if mode == 'linear': loss = 1 - ious elif mode == 'square': loss = 1 - ious**2 elif mode == 'log': loss = -ious.log() else: raise NotImplementedError return loss @weighted_loss def bounded_iou_loss(pred: Tensor, target: Tensor, beta: float = 0.2, eps: float = 1e-3) -> Tensor: """BIoULoss. This is an implementation of paper `Improving Object Localization with Fitness NMS and Bounded IoU Loss. `_. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): Corresponding gt bboxes, shape (n, 4). beta (float, optional): Beta parameter in smoothl1. eps (float, optional): Epsilon to avoid NaN values. Return: Tensor: Loss tensor. """ pred_ctrx = (pred[:, 0] + pred[:, 2]) * 0.5 pred_ctry = (pred[:, 1] + pred[:, 3]) * 0.5 pred_w = pred[:, 2] - pred[:, 0] pred_h = pred[:, 3] - pred[:, 1] with torch.no_grad(): target_ctrx = (target[:, 0] + target[:, 2]) * 0.5 target_ctry = (target[:, 1] + target[:, 3]) * 0.5 target_w = target[:, 2] - target[:, 0] target_h = target[:, 3] - target[:, 1] dx = target_ctrx - pred_ctrx dy = target_ctry - pred_ctry loss_dx = 1 - torch.max( (target_w - 2 * dx.abs()) / (target_w + 2 * dx.abs() + eps), torch.zeros_like(dx)) loss_dy = 1 - torch.max( (target_h - 2 * dy.abs()) / (target_h + 2 * dy.abs() + eps), torch.zeros_like(dy)) loss_dw = 1 - torch.min(target_w / (pred_w + eps), pred_w / (target_w + eps)) loss_dh = 1 - torch.min(target_h / (pred_h + eps), pred_h / (target_h + eps)) # view(..., -1) does not work for empty tensor loss_comb = torch.stack([loss_dx, loss_dy, loss_dw, loss_dh], dim=-1).flatten(1) loss = torch.where(loss_comb < beta, 0.5 * loss_comb * loss_comb / beta, loss_comb - 0.5 * beta) return loss @weighted_loss def giou_loss(pred: Tensor, target: Tensor, eps: float = 1e-7) -> Tensor: r"""`Generalized Intersection over Union: A Metric and A Loss for Bounding Box Regression `_. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): Corresponding gt bboxes, shape (n, 4). eps (float): Epsilon to avoid log(0). Return: Tensor: Loss tensor. """ gious = bbox_overlaps(pred, target, mode='giou', is_aligned=True, eps=eps) loss = 1 - gious return loss @weighted_loss def diou_loss(pred: Tensor, target: Tensor, eps: float = 1e-7) -> Tensor: r"""Implementation of `Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression https://arxiv.org/abs/1911.08287`_. Code is modified from https://github.com/Zzh-tju/DIoU. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): Corresponding gt bboxes, shape (n, 4). eps (float): Epsilon to avoid log(0). Return: Tensor: Loss tensor. """ # overlap lt = torch.max(pred[:, :2], target[:, :2]) rb = torch.min(pred[:, 2:], target[:, 2:]) wh = (rb - lt).clamp(min=0) overlap = wh[:, 0] * wh[:, 1] # union ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1]) ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1]) union = ap + ag - overlap + eps # IoU ious = overlap / union # enclose area enclose_x1y1 = torch.min(pred[:, :2], target[:, :2]) enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:]) enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0) cw = enclose_wh[:, 0] ch = enclose_wh[:, 1] c2 = cw**2 + ch**2 + eps b1_x1, b1_y1 = pred[:, 0], pred[:, 1] b1_x2, b1_y2 = pred[:, 2], pred[:, 3] b2_x1, b2_y1 = target[:, 0], target[:, 1] b2_x2, b2_y2 = target[:, 2], target[:, 3] left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4 right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4 rho2 = left + right # DIoU dious = ious - rho2 / c2 loss = 1 - dious return loss @weighted_loss def ciou_loss(pred: Tensor, target: Tensor, eps: float = 1e-7) -> Tensor: r"""`Implementation of paper `Enhancing Geometric Factors into Model Learning and Inference for Object Detection and Instance Segmentation `_. Code is modified from https://github.com/Zzh-tju/CIoU. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): Corresponding gt bboxes, shape (n, 4). eps (float): Epsilon to avoid log(0). Return: Tensor: Loss tensor. """ # overlap lt = torch.max(pred[:, :2], target[:, :2]) rb = torch.min(pred[:, 2:], target[:, 2:]) wh = (rb - lt).clamp(min=0) overlap = wh[:, 0] * wh[:, 1] # union ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1]) ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1]) union = ap + ag - overlap + eps # IoU ious = overlap / union # enclose area enclose_x1y1 = torch.min(pred[:, :2], target[:, :2]) enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:]) enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0) cw = enclose_wh[:, 0] ch = enclose_wh[:, 1] c2 = cw**2 + ch**2 + eps b1_x1, b1_y1 = pred[:, 0], pred[:, 1] b1_x2, b1_y2 = pred[:, 2], pred[:, 3] b2_x1, b2_y1 = target[:, 0], target[:, 1] b2_x2, b2_y2 = target[:, 2], target[:, 3] w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4 right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4 rho2 = left + right factor = 4 / math.pi**2 v = factor * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha = (ious > 0.5).float() * v / (1 - ious + v) # CIoU cious = ious - (rho2 / c2 + alpha * v) loss = 1 - cious.clamp(min=-1.0, max=1.0) return loss @weighted_loss def eiou_loss(pred: Tensor, target: Tensor, smooth_point: float = 0.1, eps: float = 1e-7) -> Tensor: r"""Implementation of paper `Extended-IoU Loss: A Systematic IoU-Related Method: Beyond Simplified Regression for Better Localization `_ Code is modified from https://github.com//ShiqiYu/libfacedetection.train. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): Corresponding gt bboxes, shape (n, 4). smooth_point (float): hyperparameter, default is 0.1. eps (float): Epsilon to avoid log(0). Return: Tensor: Loss tensor. """ px1, py1, px2, py2 = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3] tx1, ty1, tx2, ty2 = target[:, 0], target[:, 1], target[:, 2], target[:, 3] # extent top left ex1 = torch.min(px1, tx1) ey1 = torch.min(py1, ty1) # intersection coordinates ix1 = torch.max(px1, tx1) iy1 = torch.max(py1, ty1) ix2 = torch.min(px2, tx2) iy2 = torch.min(py2, ty2) # extra xmin = torch.min(ix1, ix2) ymin = torch.min(iy1, iy2) xmax = torch.max(ix1, ix2) ymax = torch.max(iy1, iy2) # Intersection intersection = (ix2 - ex1) * (iy2 - ey1) + (xmin - ex1) * (ymin - ey1) - ( ix1 - ex1) * (ymax - ey1) - (xmax - ex1) * ( iy1 - ey1) # Union union = (px2 - px1) * (py2 - py1) + (tx2 - tx1) * ( ty2 - ty1) - intersection + eps # IoU ious = 1 - (intersection / union) # Smooth-EIoU smooth_sign = (ious < smooth_point).detach().float() loss = 0.5 * smooth_sign * (ious**2) / smooth_point + (1 - smooth_sign) * ( ious - 0.5 * smooth_point) return loss @MODELS.register_module() class IoULoss(nn.Module): """IoULoss. Computing the IoU loss between a set of predicted bboxes and target bboxes. Args: linear (bool): If True, use linear scale of loss else determined by mode. Default: False. eps (float): Epsilon to avoid log(0). reduction (str): Options are "none", "mean" and "sum". loss_weight (float): Weight of loss. mode (str): Loss scaling mode, including "linear", "square", and "log". Default: 'log' """ def __init__(self, linear: bool = False, eps: float = 1e-6, reduction: str = 'mean', loss_weight: float = 1.0, mode: str = 'log') -> None: super().__init__() assert mode in ['linear', 'square', 'log'] if linear: mode = 'linear' warnings.warn('DeprecationWarning: Setting "linear=True" in ' 'IOULoss is deprecated, please use "mode=`linear`" ' 'instead.') self.mode = mode self.linear = linear self.eps = eps self.reduction = reduction self.loss_weight = loss_weight def forward(self, pred: Tensor, target: Tensor, weight: Optional[Tensor] = None, avg_factor: Optional[int] = None, reduction_override: Optional[str] = None, **kwargs) -> Tensor: """Forward function. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): The learning target of the prediction, shape (n, 4). weight (Tensor, optional): The weight of loss for each prediction. Defaults to None. avg_factor (int, optional): Average factor that is used to average the loss. Defaults to None. reduction_override (str, optional): The reduction method used to override the original reduction method of the loss. Defaults to None. Options are "none", "mean" and "sum". Return: Tensor: Loss tensor. """ assert reduction_override in (None, 'none', 'mean', 'sum') reduction = ( reduction_override if reduction_override else self.reduction) if (weight is not None) and (not torch.any(weight > 0)) and ( reduction != 'none'): if pred.dim() == weight.dim() + 1: weight = weight.unsqueeze(1) return (pred * weight).sum() # 0 if weight is not None and weight.dim() > 1: # TODO: remove this in the future # reduce the weight of shape (n, 4) to (n,) to match the # iou_loss of shape (n,) assert weight.shape == pred.shape weight = weight.mean(-1) loss = self.loss_weight * iou_loss( pred, target, weight, mode=self.mode, eps=self.eps, reduction=reduction, avg_factor=avg_factor, **kwargs) return loss @MODELS.register_module() class BoundedIoULoss(nn.Module): """BIoULoss. This is an implementation of paper `Improving Object Localization with Fitness NMS and Bounded IoU Loss. `_. Args: beta (float, optional): Beta parameter in smoothl1. eps (float, optional): Epsilon to avoid NaN values. reduction (str): Options are "none", "mean" and "sum". loss_weight (float): Weight of loss. """ def __init__(self, beta: float = 0.2, eps: float = 1e-3, reduction: str = 'mean', loss_weight: float = 1.0) -> None: super().__init__() self.beta = beta self.eps = eps self.reduction = reduction self.loss_weight = loss_weight def forward(self, pred: Tensor, target: Tensor, weight: Optional[Tensor] = None, avg_factor: Optional[int] = None, reduction_override: Optional[str] = None, **kwargs) -> Tensor: """Forward function. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): The learning target of the prediction, shape (n, 4). weight (Optional[Tensor], optional): The weight of loss for each prediction. Defaults to None. avg_factor (Optional[int], optional): Average factor that is used to average the loss. Defaults to None. reduction_override (Optional[str], optional): The reduction method used to override the original reduction method of the loss. Defaults to None. Options are "none", "mean" and "sum". Returns: Tensor: Loss tensor. """ if weight is not None and not torch.any(weight > 0): if pred.dim() == weight.dim() + 1: weight = weight.unsqueeze(1) return (pred * weight).sum() # 0 assert reduction_override in (None, 'none', 'mean', 'sum') reduction = ( reduction_override if reduction_override else self.reduction) loss = self.loss_weight * bounded_iou_loss( pred, target, weight, beta=self.beta, eps=self.eps, reduction=reduction, avg_factor=avg_factor, **kwargs) return loss @MODELS.register_module() class GIoULoss(nn.Module): r"""`Generalized Intersection over Union: A Metric and A Loss for Bounding Box Regression `_. Args: eps (float): Epsilon to avoid log(0). reduction (str): Options are "none", "mean" and "sum". loss_weight (float): Weight of loss. """ def __init__(self, eps: float = 1e-6, reduction: str = 'mean', loss_weight: float = 1.0) -> None: super().__init__() self.eps = eps self.reduction = reduction self.loss_weight = loss_weight def forward(self, pred: Tensor, target: Tensor, weight: Optional[Tensor] = None, avg_factor: Optional[int] = None, reduction_override: Optional[str] = None, **kwargs) -> Tensor: """Forward function. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): The learning target of the prediction, shape (n, 4). weight (Optional[Tensor], optional): The weight of loss for each prediction. Defaults to None. avg_factor (Optional[int], optional): Average factor that is used to average the loss. Defaults to None. reduction_override (Optional[str], optional): The reduction method used to override the original reduction method of the loss. Defaults to None. Options are "none", "mean" and "sum". Returns: Tensor: Loss tensor. """ if weight is not None and not torch.any(weight > 0): if pred.dim() == weight.dim() + 1: weight = weight.unsqueeze(1) return (pred * weight).sum() # 0 assert reduction_override in (None, 'none', 'mean', 'sum') reduction = ( reduction_override if reduction_override else self.reduction) if weight is not None and weight.dim() > 1: # TODO: remove this in the future # reduce the weight of shape (n, 4) to (n,) to match the # giou_loss of shape (n,) assert weight.shape == pred.shape weight = weight.mean(-1) loss = self.loss_weight * giou_loss( pred, target, weight, eps=self.eps, reduction=reduction, avg_factor=avg_factor, **kwargs) return loss @MODELS.register_module() class DIoULoss(nn.Module): r"""Implementation of `Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression https://arxiv.org/abs/1911.08287`_. Code is modified from https://github.com/Zzh-tju/DIoU. Args: eps (float): Epsilon to avoid log(0). reduction (str): Options are "none", "mean" and "sum". loss_weight (float): Weight of loss. """ def __init__(self, eps: float = 1e-6, reduction: str = 'mean', loss_weight: float = 1.0) -> None: super().__init__() self.eps = eps self.reduction = reduction self.loss_weight = loss_weight def forward(self, pred: Tensor, target: Tensor, weight: Optional[Tensor] = None, avg_factor: Optional[int] = None, reduction_override: Optional[str] = None, **kwargs) -> Tensor: """Forward function. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): The learning target of the prediction, shape (n, 4). weight (Optional[Tensor], optional): The weight of loss for each prediction. Defaults to None. avg_factor (Optional[int], optional): Average factor that is used to average the loss. Defaults to None. reduction_override (Optional[str], optional): The reduction method used to override the original reduction method of the loss. Defaults to None. Options are "none", "mean" and "sum". Returns: Tensor: Loss tensor. """ if weight is not None and not torch.any(weight > 0): if pred.dim() == weight.dim() + 1: weight = weight.unsqueeze(1) return (pred * weight).sum() # 0 assert reduction_override in (None, 'none', 'mean', 'sum') reduction = ( reduction_override if reduction_override else self.reduction) if weight is not None and weight.dim() > 1: # TODO: remove this in the future # reduce the weight of shape (n, 4) to (n,) to match the # giou_loss of shape (n,) assert weight.shape == pred.shape weight = weight.mean(-1) loss = self.loss_weight * diou_loss( pred, target, weight, eps=self.eps, reduction=reduction, avg_factor=avg_factor, **kwargs) return loss @MODELS.register_module() class CIoULoss(nn.Module): r"""`Implementation of paper `Enhancing Geometric Factors into Model Learning and Inference for Object Detection and Instance Segmentation `_. Code is modified from https://github.com/Zzh-tju/CIoU. Args: eps (float): Epsilon to avoid log(0). reduction (str): Options are "none", "mean" and "sum". loss_weight (float): Weight of loss. """ def __init__(self, eps: float = 1e-6, reduction: str = 'mean', loss_weight: float = 1.0) -> None: super().__init__() self.eps = eps self.reduction = reduction self.loss_weight = loss_weight def forward(self, pred: Tensor, target: Tensor, weight: Optional[Tensor] = None, avg_factor: Optional[int] = None, reduction_override: Optional[str] = None, **kwargs) -> Tensor: """Forward function. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): The learning target of the prediction, shape (n, 4). weight (Optional[Tensor], optional): The weight of loss for each prediction. Defaults to None. avg_factor (Optional[int], optional): Average factor that is used to average the loss. Defaults to None. reduction_override (Optional[str], optional): The reduction method used to override the original reduction method of the loss. Defaults to None. Options are "none", "mean" and "sum". Returns: Tensor: Loss tensor. """ if weight is not None and not torch.any(weight > 0): if pred.dim() == weight.dim() + 1: weight = weight.unsqueeze(1) return (pred * weight).sum() # 0 assert reduction_override in (None, 'none', 'mean', 'sum') reduction = ( reduction_override if reduction_override else self.reduction) if weight is not None and weight.dim() > 1: # TODO: remove this in the future # reduce the weight of shape (n, 4) to (n,) to match the # giou_loss of shape (n,) assert weight.shape == pred.shape weight = weight.mean(-1) loss = self.loss_weight * ciou_loss( pred, target, weight, eps=self.eps, reduction=reduction, avg_factor=avg_factor, **kwargs) return loss @MODELS.register_module() class EIoULoss(nn.Module): r"""Implementation of paper `Extended-IoU Loss: A Systematic IoU-Related Method: Beyond Simplified Regression for Better Localization `_ Code is modified from https://github.com//ShiqiYu/libfacedetection.train. Args: eps (float): Epsilon to avoid log(0). reduction (str): Options are "none", "mean" and "sum". loss_weight (float): Weight of loss. smooth_point (float): hyperparameter, default is 0.1. """ def __init__(self, eps: float = 1e-6, reduction: str = 'mean', loss_weight: float = 1.0, smooth_point: float = 0.1) -> None: super().__init__() self.eps = eps self.reduction = reduction self.loss_weight = loss_weight self.smooth_point = smooth_point def forward(self, pred: Tensor, target: Tensor, weight: Optional[Tensor] = None, avg_factor: Optional[int] = None, reduction_override: Optional[str] = None, **kwargs) -> Tensor: """Forward function. Args: pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), shape (n, 4). target (Tensor): The learning target of the prediction, shape (n, 4). weight (Optional[Tensor], optional): The weight of loss for each prediction. Defaults to None. avg_factor (Optional[int], optional): Average factor that is used to average the loss. Defaults to None. reduction_override (Optional[str], optional): The reduction method used to override the original reduction method of the loss. Defaults to None. Options are "none", "mean" and "sum". Returns: Tensor: Loss tensor. """ if weight is not None and not torch.any(weight > 0): if pred.dim() == weight.dim() + 1: weight = weight.unsqueeze(1) return (pred * weight).sum() # 0 assert reduction_override in (None, 'none', 'mean', 'sum') reduction = ( reduction_override if reduction_override else self.reduction) if weight is not None and weight.dim() > 1: assert weight.shape == pred.shape weight = weight.mean(-1) loss = self.loss_weight * eiou_loss( pred, target, weight, smooth_point=self.smooth_point, eps=self.eps, reduction=reduction, avg_factor=avg_factor, **kwargs) return loss