Spaces:
Runtime error
Runtime error
# Copyright (c) OpenMMLab. All rights reserved. | |
import copy | |
from typing import List, Tuple | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from mmcv.ops import nms | |
from mmengine.structures import InstanceData | |
from torch import Tensor | |
from mmdet.registry import MODELS | |
from mmdet.utils import ConfigType, InstanceList, MultiConfig, OptInstanceList | |
from .guided_anchor_head import GuidedAnchorHead | |
class GARPNHead(GuidedAnchorHead): | |
"""Guided-Anchor-based RPN head.""" | |
def __init__(self, | |
in_channels: int, | |
num_classes: int = 1, | |
init_cfg: MultiConfig = dict( | |
type='Normal', | |
layer='Conv2d', | |
std=0.01, | |
override=dict( | |
type='Normal', | |
name='conv_loc', | |
std=0.01, | |
bias_prob=0.01)), | |
**kwargs) -> None: | |
super().__init__( | |
num_classes=num_classes, | |
in_channels=in_channels, | |
init_cfg=init_cfg, | |
**kwargs) | |
def _init_layers(self) -> None: | |
"""Initialize layers of the head.""" | |
self.rpn_conv = nn.Conv2d( | |
self.in_channels, self.feat_channels, 3, padding=1) | |
super(GARPNHead, self)._init_layers() | |
def forward_single(self, x: Tensor) -> Tuple[Tensor]: | |
"""Forward feature of a single scale level.""" | |
x = self.rpn_conv(x) | |
x = F.relu(x, inplace=True) | |
(cls_score, bbox_pred, shape_pred, | |
loc_pred) = super().forward_single(x) | |
return cls_score, bbox_pred, shape_pred, loc_pred | |
def loss_by_feat( | |
self, | |
cls_scores: List[Tensor], | |
bbox_preds: List[Tensor], | |
shape_preds: List[Tensor], | |
loc_preds: List[Tensor], | |
batch_gt_instances: InstanceList, | |
batch_img_metas: List[dict], | |
batch_gt_instances_ignore: OptInstanceList = None) -> dict: | |
"""Calculate the loss based on the features extracted by the detection | |
head. | |
Args: | |
cls_scores (list[Tensor]): Box scores for each scale level | |
has shape (N, num_anchors * num_classes, H, W). | |
bbox_preds (list[Tensor]): Box energies / deltas for each scale | |
level with shape (N, num_anchors * 4, H, W). | |
shape_preds (list[Tensor]): shape predictions for each scale | |
level with shape (N, 1, H, W). | |
loc_preds (list[Tensor]): location predictions for each scale | |
level with shape (N, num_anchors * 2, H, W). | |
batch_gt_instances (list[:obj:`InstanceData`]): Batch of | |
gt_instance. It usually includes ``bboxes`` and ``labels`` | |
attributes. | |
batch_img_metas (list[dict]): Meta information of each image, e.g., | |
image size, scaling factor, etc. | |
batch_gt_instances_ignore (list[:obj:`InstanceData`], optional): | |
Batch of gt_instances_ignore. It includes ``bboxes`` attribute | |
data that is ignored during training and testing. | |
Defaults to None. | |
Returns: | |
dict: A dictionary of loss components. | |
""" | |
losses = super().loss_by_feat( | |
cls_scores, | |
bbox_preds, | |
shape_preds, | |
loc_preds, | |
batch_gt_instances, | |
batch_img_metas, | |
batch_gt_instances_ignore=batch_gt_instances_ignore) | |
return dict( | |
loss_rpn_cls=losses['loss_cls'], | |
loss_rpn_bbox=losses['loss_bbox'], | |
loss_anchor_shape=losses['loss_shape'], | |
loss_anchor_loc=losses['loss_loc']) | |
def _predict_by_feat_single(self, | |
cls_scores: List[Tensor], | |
bbox_preds: List[Tensor], | |
mlvl_anchors: List[Tensor], | |
mlvl_masks: List[Tensor], | |
img_meta: dict, | |
cfg: ConfigType, | |
rescale: bool = False) -> InstanceData: | |
"""Transform a single image's features extracted from the head into | |
bbox results. | |
Args: | |
cls_scores (list[Tensor]): Box scores from all scale | |
levels of a single image, each item has shape | |
(num_priors * num_classes, H, W). | |
bbox_preds (list[Tensor]): Box energies / deltas from | |
all scale levels of a single image, each item has shape | |
(num_priors * 4, H, W). | |
mlvl_anchors (list[Tensor]): Each element in the list is | |
the anchors of a single level in feature pyramid. it has | |
shape (num_priors, 4). | |
mlvl_masks (list[Tensor]): Each element in the list is location | |
masks of a single level. | |
img_meta (dict): Image meta info. | |
cfg (:obj:`ConfigDict` or dict): Test / postprocessing | |
configuration, if None, test_cfg would be used. | |
rescale (bool): If True, return boxes in original image space. | |
Defaults to False. | |
Returns: | |
:obj:`InstanceData`: Detection results of each image | |
after the post process. | |
Each item usually contains following keys. | |
- scores (Tensor): Classification scores, has a shape | |
(num_instance, ) | |
- labels (Tensor): Labels of bboxes, has a shape (num_instances, ). | |
- bboxes (Tensor): Has a shape (num_instances, 4), the last | |
dimension 4 arrange as (x1, y1, x2, y2). | |
""" | |
cfg = self.test_cfg if cfg is None else cfg | |
cfg = copy.deepcopy(cfg) | |
assert cfg.nms.get('type', 'nms') == 'nms', 'GARPNHead only support ' \ | |
'naive nms.' | |
mlvl_proposals = [] | |
for idx in range(len(cls_scores)): | |
rpn_cls_score = cls_scores[idx] | |
rpn_bbox_pred = bbox_preds[idx] | |
anchors = mlvl_anchors[idx] | |
mask = mlvl_masks[idx] | |
assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:] | |
# if no location is kept, end. | |
if mask.sum() == 0: | |
continue | |
rpn_cls_score = rpn_cls_score.permute(1, 2, 0) | |
if self.use_sigmoid_cls: | |
rpn_cls_score = rpn_cls_score.reshape(-1) | |
scores = rpn_cls_score.sigmoid() | |
else: | |
rpn_cls_score = rpn_cls_score.reshape(-1, 2) | |
# remind that we set FG labels to [0, num_class-1] | |
# since mmdet v2.0 | |
# BG cat_id: num_class | |
scores = rpn_cls_score.softmax(dim=1)[:, :-1] | |
# filter scores, bbox_pred w.r.t. mask. | |
# anchors are filtered in get_anchors() beforehand. | |
scores = scores[mask] | |
rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, | |
4)[mask, :] | |
if scores.dim() == 0: | |
rpn_bbox_pred = rpn_bbox_pred.unsqueeze(0) | |
anchors = anchors.unsqueeze(0) | |
scores = scores.unsqueeze(0) | |
# filter anchors, bbox_pred, scores w.r.t. scores | |
if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre: | |
_, topk_inds = scores.topk(cfg.nms_pre) | |
rpn_bbox_pred = rpn_bbox_pred[topk_inds, :] | |
anchors = anchors[topk_inds, :] | |
scores = scores[topk_inds] | |
# get proposals w.r.t. anchors and rpn_bbox_pred | |
proposals = self.bbox_coder.decode( | |
anchors, rpn_bbox_pred, max_shape=img_meta['img_shape']) | |
# filter out too small bboxes | |
if cfg.min_bbox_size >= 0: | |
w = proposals[:, 2] - proposals[:, 0] | |
h = proposals[:, 3] - proposals[:, 1] | |
valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size) | |
if not valid_mask.all(): | |
proposals = proposals[valid_mask] | |
scores = scores[valid_mask] | |
# NMS in current level | |
proposals, _ = nms(proposals, scores, cfg.nms.iou_threshold) | |
proposals = proposals[:cfg.nms_post, :] | |
mlvl_proposals.append(proposals) | |
proposals = torch.cat(mlvl_proposals, 0) | |
if cfg.get('nms_across_levels', False): | |
# NMS across multi levels | |
proposals, _ = nms(proposals[:, :4], proposals[:, -1], | |
cfg.nms.iou_threshold) | |
proposals = proposals[:cfg.max_per_img, :] | |
else: | |
scores = proposals[:, 4] | |
num = min(cfg.max_per_img, proposals.shape[0]) | |
_, topk_inds = scores.topk(num) | |
proposals = proposals[topk_inds, :] | |
bboxes = proposals[:, :-1] | |
scores = proposals[:, -1] | |
if rescale: | |
assert img_meta.get('scale_factor') is not None | |
bboxes /= bboxes.new_tensor(img_meta['scale_factor']).repeat( | |
(1, 2)) | |
results = InstanceData() | |
results.bboxes = bboxes | |
results.scores = scores | |
results.labels = scores.new_zeros(scores.size(0), dtype=torch.long) | |
return results | |