Spaces:

KyanChen
/

ai-photo-gallery

Runtime error

App Files Files Community

ai-photo-gallery / mmdet /models /dense_heads /ga_rpn_head.py

KyanChen

init

f549064 over 1 year ago

raw

history blame

9.46 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import copy
	from typing import List, Tuple

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from mmcv.ops import nms
	from mmengine.structures import InstanceData
	from torch import Tensor

	from mmdet.registry import MODELS
	from mmdet.utils import ConfigType, InstanceList, MultiConfig, OptInstanceList
	from .guided_anchor_head import GuidedAnchorHead


	@MODELS.register_module()
	class GARPNHead(GuidedAnchorHead):
	"""Guided-Anchor-based RPN head."""

	def __init__(self,
	in_channels: int,
	num_classes: int = 1,
	init_cfg: MultiConfig = dict(
	type='Normal',
	layer='Conv2d',
	std=0.01,
	override=dict(
	type='Normal',
	name='conv_loc',
	std=0.01,
	bias_prob=0.01)),
	**kwargs) -> None:
	super().__init__(
	num_classes=num_classes,
	in_channels=in_channels,
	init_cfg=init_cfg,
	**kwargs)

	def _init_layers(self) -> None:
	"""Initialize layers of the head."""
	self.rpn_conv = nn.Conv2d(
	self.in_channels, self.feat_channels, 3, padding=1)
	super(GARPNHead, self)._init_layers()

	def forward_single(self, x: Tensor) -> Tuple[Tensor]:
	"""Forward feature of a single scale level."""

	x = self.rpn_conv(x)
	x = F.relu(x, inplace=True)
	(cls_score, bbox_pred, shape_pred,
	loc_pred) = super().forward_single(x)
	return cls_score, bbox_pred, shape_pred, loc_pred

	def loss_by_feat(
	self,
	cls_scores: List[Tensor],
	bbox_preds: List[Tensor],
	shape_preds: List[Tensor],
	loc_preds: List[Tensor],
	batch_gt_instances: InstanceList,
	batch_img_metas: List[dict],
	batch_gt_instances_ignore: OptInstanceList = None) -> dict:
	"""Calculate the loss based on the features extracted by the detection
	head.

	Args:
	cls_scores (list[Tensor]): Box scores for each scale level
	has shape (N, num_anchors * num_classes, H, W).
	bbox_preds (list[Tensor]): Box energies / deltas for each scale
	level with shape (N, num_anchors * 4, H, W).
	shape_preds (list[Tensor]): shape predictions for each scale
	level with shape (N, 1, H, W).
	loc_preds (list[Tensor]): location predictions for each scale
	level with shape (N, num_anchors * 2, H, W).
	batch_gt_instances (list[:obj:`InstanceData`]): Batch of
	gt_instance. It usually includes ``bboxes`` and ``labels``
	attributes.
	batch_img_metas (list[dict]): Meta information of each image, e.g.,
	image size, scaling factor, etc.
	batch_gt_instances_ignore (list[:obj:`InstanceData`], optional):
	Batch of gt_instances_ignore. It includes ``bboxes`` attribute
	data that is ignored during training and testing.
	Defaults to None.

	Returns:
	dict: A dictionary of loss components.
	"""
	losses = super().loss_by_feat(
	cls_scores,
	bbox_preds,
	shape_preds,
	loc_preds,
	batch_gt_instances,
	batch_img_metas,
	batch_gt_instances_ignore=batch_gt_instances_ignore)
	return dict(
	loss_rpn_cls=losses['loss_cls'],
	loss_rpn_bbox=losses['loss_bbox'],
	loss_anchor_shape=losses['loss_shape'],
	loss_anchor_loc=losses['loss_loc'])

	def _predict_by_feat_single(self,
	cls_scores: List[Tensor],
	bbox_preds: List[Tensor],
	mlvl_anchors: List[Tensor],
	mlvl_masks: List[Tensor],
	img_meta: dict,
	cfg: ConfigType,
	rescale: bool = False) -> InstanceData:
	"""Transform a single image's features extracted from the head into
	bbox results.

	Args:
	cls_scores (list[Tensor]): Box scores from all scale
	levels of a single image, each item has shape
	(num_priors * num_classes, H, W).
	bbox_preds (list[Tensor]): Box energies / deltas from
	all scale levels of a single image, each item has shape
	(num_priors * 4, H, W).
	mlvl_anchors (list[Tensor]): Each element in the list is
	the anchors of a single level in feature pyramid. it has
	shape (num_priors, 4).
	mlvl_masks (list[Tensor]): Each element in the list is location
	masks of a single level.
	img_meta (dict): Image meta info.
	cfg (:obj:`ConfigDict` or dict): Test / postprocessing
	configuration, if None, test_cfg would be used.
	rescale (bool): If True, return boxes in original image space.
	Defaults to False.

	Returns:
	:obj:`InstanceData`: Detection results of each image
	after the post process.
	Each item usually contains following keys.

	- scores (Tensor): Classification scores, has a shape
	(num_instance, )
	- labels (Tensor): Labels of bboxes, has a shape (num_instances, ).
	- bboxes (Tensor): Has a shape (num_instances, 4), the last
	dimension 4 arrange as (x1, y1, x2, y2).
	"""
	cfg = self.test_cfg if cfg is None else cfg
	cfg = copy.deepcopy(cfg)
	assert cfg.nms.get('type', 'nms') == 'nms', 'GARPNHead only support ' \
	'naive nms.'

	mlvl_proposals = []
	for idx in range(len(cls_scores)):
	rpn_cls_score = cls_scores[idx]
	rpn_bbox_pred = bbox_preds[idx]
	anchors = mlvl_anchors[idx]
	mask = mlvl_masks[idx]
	assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]
	# if no location is kept, end.
	if mask.sum() == 0:
	continue
	rpn_cls_score = rpn_cls_score.permute(1, 2, 0)
	if self.use_sigmoid_cls:
	rpn_cls_score = rpn_cls_score.reshape(-1)
	scores = rpn_cls_score.sigmoid()
	else:
	rpn_cls_score = rpn_cls_score.reshape(-1, 2)
	# remind that we set FG labels to [0, num_class-1]
	# since mmdet v2.0
	# BG cat_id: num_class
	scores = rpn_cls_score.softmax(dim=1)[:, :-1]
	# filter scores, bbox_pred w.r.t. mask.
	# anchors are filtered in get_anchors() beforehand.
	scores = scores[mask]
	rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1,
	4)[mask, :]
	if scores.dim() == 0:
	rpn_bbox_pred = rpn_bbox_pred.unsqueeze(0)
	anchors = anchors.unsqueeze(0)
	scores = scores.unsqueeze(0)
	# filter anchors, bbox_pred, scores w.r.t. scores
	if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
	_, topk_inds = scores.topk(cfg.nms_pre)
	rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]
	anchors = anchors[topk_inds, :]
	scores = scores[topk_inds]
	# get proposals w.r.t. anchors and rpn_bbox_pred
	proposals = self.bbox_coder.decode(
	anchors, rpn_bbox_pred, max_shape=img_meta['img_shape'])
	# filter out too small bboxes
	if cfg.min_bbox_size >= 0:
	w = proposals[:, 2] - proposals[:, 0]
	h = proposals[:, 3] - proposals[:, 1]
	valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size)
	if not valid_mask.all():
	proposals = proposals[valid_mask]
	scores = scores[valid_mask]

	# NMS in current level
	proposals, _ = nms(proposals, scores, cfg.nms.iou_threshold)
	proposals = proposals[:cfg.nms_post, :]
	mlvl_proposals.append(proposals)
	proposals = torch.cat(mlvl_proposals, 0)
	if cfg.get('nms_across_levels', False):
	# NMS across multi levels
	proposals, _ = nms(proposals[:, :4], proposals[:, -1],
	cfg.nms.iou_threshold)
	proposals = proposals[:cfg.max_per_img, :]
	else:
	scores = proposals[:, 4]
	num = min(cfg.max_per_img, proposals.shape[0])
	_, topk_inds = scores.topk(num)
	proposals = proposals[topk_inds, :]

	bboxes = proposals[:, :-1]
	scores = proposals[:, -1]
	if rescale:
	assert img_meta.get('scale_factor') is not None
	bboxes /= bboxes.new_tensor(img_meta['scale_factor']).repeat(
	(1, 2))

	results = InstanceData()
	results.bboxes = bboxes
	results.scores = scores
	results.labels = scores.new_zeros(scores.size(0), dtype=torch.long)
	return results