Spaces:
Runtime error
Runtime error
try: | |
from shapely.errors import ShapelyDeprecationWarning | |
import warnings | |
warnings.filterwarnings('ignore', category=ShapelyDeprecationWarning) | |
except: | |
pass | |
import copy | |
import itertools | |
import logging | |
import os | |
from collections import OrderedDict | |
from typing import Any, Dict, List, Set | |
import pycocotools.mask as mask_util | |
import torch | |
import numpy as np | |
import detectron2.utils.comm as comm | |
from detectron2.checkpoint import DetectionCheckpointer | |
from detectron2.config import get_cfg | |
from detectron2.data import MetadataCatalog, build_detection_train_loader, build_detection_test_loader | |
from detectron2.engine import ( | |
DefaultTrainer, | |
default_argument_parser, | |
default_setup, | |
launch, | |
) | |
from detectron2.evaluation import ( | |
CityscapesInstanceEvaluator, | |
CityscapesSemSegEvaluator, | |
COCOEvaluator, | |
COCOPanopticEvaluator, | |
DatasetEvaluators, | |
LVISEvaluator, | |
SemSegEvaluator, | |
verify_results, | |
) | |
from detectron2.solver.build import maybe_add_gradient_clipping | |
from detectron2.utils.logger import setup_logger | |
from frozenseg import ( | |
COCOInstanceNewBaselineDatasetMapper, | |
COCOPanopticNewBaselineDatasetMapper, | |
InstanceSegEvaluator, | |
MaskFormerInstanceDatasetMapper, | |
MaskFormerPanopticDatasetMapper, | |
MaskFormerSemanticDatasetMapper, | |
SemanticSegmentorWithTTA, | |
add_maskformer2_config, | |
add_frozenseg_config, | |
) | |
from detectron2.solver import build_lr_scheduler | |
from collections import OrderedDict | |
from detectron2.utils.file_io import PathManager | |
from detectron2.utils.comm import all_gather, is_main_process, synchronize | |
import json | |
from detectron2.evaluation.sem_seg_evaluation import load_image_into_numpy_array | |
warnings.filterwarnings("ignore") | |
def prepare_class_names_from_metadata(metadata, train_metadata): | |
def split_labels(x): | |
res = [] | |
for x_ in x: | |
x_ = x_.replace(', ', ',') | |
x_ = x_.split(',') # there can be multiple synonyms for single class | |
res.append(x_) | |
return res | |
# get text classifier | |
try: | |
class_names = split_labels(metadata.stuff_classes) # it includes both thing and stuff | |
train_class_names = split_labels(train_metadata.stuff_classes) | |
except: | |
# this could be for insseg, where only thing_classes are available | |
class_names = split_labels(metadata.thing_classes) | |
train_class_names = split_labels(train_metadata.thing_classes) | |
train_class_names = {l for label in train_class_names for l in label} | |
category_overlapping_list = [] | |
for test_class_names in class_names: | |
is_overlapping = not set(train_class_names).isdisjoint(set(test_class_names)) | |
category_overlapping_list.append(is_overlapping) | |
category_overlapping_list = np.array(category_overlapping_list) | |
return category_overlapping_list | |
class SemSegSeenUnseenRecallEvaluator(SemSegEvaluator): | |
def __init__(self, | |
dataset_name, | |
distributed=True, | |
output_dir=None, | |
*, | |
sem_seg_loading_fn=load_image_into_numpy_array, | |
num_classes=None, | |
ignore_label=None, | |
train_dataset_name = None): | |
# recall of the final result | |
super().__init__(dataset_name,distributed,output_dir,sem_seg_loading_fn=sem_seg_loading_fn,num_classes=num_classes,ignore_label=ignore_label) | |
train_metadata = MetadataCatalog.get(train_dataset_name) | |
test_metadata = MetadataCatalog.get(dataset_name) | |
self.category_overlapping_mask = prepare_class_names_from_metadata(test_metadata, train_metadata) | |
self.iou_thresholds = [0.5, 0.75, 0.9] | |
def reset(self): | |
self._conf_matrix = np.zeros((self._num_classes + 1, self._num_classes + 1), dtype=np.int64) | |
self._b_conf_matrix = np.zeros( | |
(self._num_classes + 1, self._num_classes + 1), dtype=np.int64 | |
) | |
self._predictions = [] | |
self._unseen_tp_cnt = np.zeros(len(self.iou_thresholds), dtype=np.int64) | |
self._seen_tp_cnt = np.zeros(len(self.iou_thresholds), dtype=np.int64) | |
self._unseen_labels = np.zeros(1, dtype=np.int64) | |
self._seen_labels = np.zeros(1, dtype=np.int64) | |
def process(self, inputs, outputs): | |
""" | |
outputs: list of dicts with key "sem_seg" that contains 250 queries semantic | |
segmentation prediction. | |
""" | |
for input, output in zip(inputs, outputs): | |
output = output["recall_seg"].to(self._cpu_device) # (n,h,w) | |
output = output>0 | |
pred = np.array(output, dtype=int) #(n,h,w) | |
gt_filename = self.input_file_to_gt_file[input["file_name"]] | |
gt = self.sem_seg_loading_fn(gt_filename, dtype=int) | |
gt[gt == self._ignore_label] = self._num_classes | |
gt_classes = np.delete(np.unique(gt), np.where(np.unique(gt) == self._num_classes)) | |
for c in gt_classes: | |
if self.category_overlapping_mask[c] == 1: | |
self._seen_labels += 1 | |
else: | |
self._unseen_labels += 1 | |
for i, thresh in enumerate(self.iou_thresholds): | |
for c in gt_classes: | |
mask_true = gt == c # (h,w) | |
iou = self.calculate_iou(mask_true, pred) # n | |
if self.category_overlapping_mask[c] == 1: | |
self._seen_tp_cnt[i] += np.any(iou>thresh) | |
else: | |
self._unseen_tp_cnt[i] += np.any(iou>thresh) | |
# [[tp_0.5, tp_0.75, tp_0.9], [tp_0.5, tp_0.75, tp_0.9]] | |
self._predictions.extend(self.encode_json_recall_seg(pred, input["file_name"])) | |
def calculate_iou(self, mask1, mask2): | |
intersection = np.logical_and(mask1, mask2) | |
union = np.logical_or(mask1, mask2) | |
return np.sum(intersection, axis=(1,2)) / np.sum(union, axis=(1,2)) | |
def encode_json_recall_seg(self, recall_seg, input_file_name): | |
json_list = [] | |
for mask_pred in recall_seg: | |
mask_pred = mask_pred.astype(np.uint8) | |
mask_rle = mask_util.encode(np.array(mask_pred[:,:,None], order="F"))[0] | |
mask_rle["counts"] = mask_rle["counts"].decode("utf-8") | |
json_list.append( | |
{"file_name": input_file_name, "segmentation": mask_rle} | |
) | |
return json_list | |
def evaluate(self): | |
if self._distributed: | |
synchronize() | |
seen_tp_list = all_gather(self._seen_tp_cnt) | |
unseen_tp_list = all_gather(self._unseen_tp_cnt) | |
seen_labels = all_gather(self._seen_labels) | |
unseen_labels = all_gather(self._unseen_labels) | |
if not is_main_process(): | |
return | |
self._seen_tp_cnt = np.zeros_like(self._seen_tp_cnt) | |
self._unseen_tp_cnt = np.zeros_like(self._unseen_tp_cnt) | |
self._seen_labels = np.zeros_like(self._seen_labels) | |
self._unseen_labels = np.zeros_like(self._unseen_labels) | |
for seen_tp in seen_tp_list: | |
self._seen_tp_cnt += seen_tp | |
for unseen_tp in unseen_tp_list: | |
self._unseen_tp_cnt += unseen_tp | |
for label1 in seen_labels: | |
self._seen_labels += label1 | |
for label2 in unseen_labels: | |
self._unseen_labels += label2 | |
if self._output_dir: | |
PathManager.mkdirs(self._output_dir) | |
file_path = os.path.join(self._output_dir, "recall_predictions.json") | |
with PathManager.open(file_path, "w") as f: | |
f.write(json.dumps(self._predictions)) | |
# instance-level Recall | |
seen_recalls = self._seen_tp_cnt / self._seen_labels | |
unseen_recalls = self._unseen_tp_cnt / self._unseen_labels | |
assert len(seen_recalls) == 3 | |
assert len(unseen_recalls) == 3 | |
res = {} | |
for i, iou_threshold in enumerate([0.5, 0.75, 0.9]): | |
res[f"S_Recall@IoU={iou_threshold:.2f}"] = 100 * seen_recalls[i] | |
res[f"U_Recall@IoU={iou_threshold:.2f}"] = 100 * unseen_recalls[i] | |
if self._output_dir: | |
file_path = os.path.join(self._output_dir, "recall_evaluation.pth") | |
with PathManager.open(file_path, "wb") as f: | |
torch.save(res, f) | |
results = OrderedDict({"recall_seg": res}) | |
self._logger.info(results) | |
return results | |
class Trainer(DefaultTrainer): | |
""" | |
Extension of the Trainer class adapted to FrozenSeg. | |
""" | |
def build_evaluator(cls, cfg, dataset_name, output_folder=None): | |
""" | |
Create evaluator(s) for a given dataset. | |
This uses the special metadata "evaluator_type" associated with each | |
builtin dataset. For your own dataset, you can simply create an | |
evaluator manually in your script and do not have to worry about the | |
hacky if-else logic here. | |
""" | |
if output_folder is None: | |
output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") | |
evaluator_list = [] | |
evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type | |
# semantic segmentation | |
if evaluator_type in ["sem_seg", "ade20k_panoptic_seg"] and cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON: | |
evaluator_list.append( | |
SemSegEvaluator( | |
dataset_name, | |
distributed=True, | |
output_dir=output_folder, | |
) | |
) | |
if cfg.MODEL.MASK_FORMER.TEST.RECALL_ON: | |
evaluator_list.append( | |
SemSegSeenUnseenRecallEvaluator( | |
dataset_name, | |
distributed=True, | |
output_dir=output_folder, | |
train_dataset_name=cfg.DATASETS.TRAIN[0] | |
) | |
) | |
# instance segmentation | |
if evaluator_type == "coco": | |
evaluator_list.append(COCOEvaluator(dataset_name, output_dir=output_folder)) | |
# panoptic segmentation | |
if evaluator_type in [ | |
"coco_panoptic_seg", | |
"ade20k_panoptic_seg", | |
"cityscapes_panoptic_seg", | |
"mapillary_vistas_panoptic_seg", | |
]: | |
if cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON: | |
evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) | |
# COCO | |
if evaluator_type == "coco_panoptic_seg" and cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON: | |
evaluator_list.append(InstanceSegEvaluator(dataset_name, output_dir=output_folder)) | |
if evaluator_type == "coco_panoptic_seg" and cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON: | |
evaluator_list.append(SemSegEvaluator(dataset_name, distributed=True, output_dir=output_folder)) | |
# Mapillary Vistas | |
if evaluator_type == "mapillary_vistas_panoptic_seg" and cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON: | |
evaluator_list.append(InstanceSegEvaluator(dataset_name, output_dir=output_folder)) | |
if evaluator_type == "mapillary_vistas_panoptic_seg" and cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON: | |
evaluator_list.append(SemSegEvaluator(dataset_name, distributed=True, output_dir=output_folder)) | |
# Cityscapes | |
if evaluator_type == "cityscapes_instance": | |
assert ( | |
torch.cuda.device_count() > comm.get_rank() | |
), "CityscapesEvaluator currently do not work with multiple machines." | |
return CityscapesInstanceEvaluator(dataset_name) | |
if evaluator_type == "cityscapes_sem_seg": | |
assert ( | |
torch.cuda.device_count() > comm.get_rank() | |
), "CityscapesEvaluator currently do not work with multiple machines." | |
return CityscapesSemSegEvaluator(dataset_name) | |
if evaluator_type == "cityscapes_panoptic_seg": | |
if cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON: | |
assert ( | |
torch.cuda.device_count() > comm.get_rank() | |
), "CityscapesEvaluator currently do not work with multiple machines." | |
evaluator_list.append(CityscapesSemSegEvaluator(dataset_name)) #!!! | |
if cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON: | |
assert ( | |
torch.cuda.device_count() > comm.get_rank() | |
), "CityscapesEvaluator currently do not work with multiple machines." | |
evaluator_list.append(CityscapesInstanceEvaluator(dataset_name)) | |
# ADE20K | |
if evaluator_type == "ade20k_panoptic_seg" and cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON: | |
evaluator_list.append(InstanceSegEvaluator(dataset_name, output_dir=output_folder)) | |
# LVIS | |
if evaluator_type == "lvis": | |
return LVISEvaluator(dataset_name, output_dir=output_folder) | |
if len(evaluator_list) == 0: | |
raise NotImplementedError( | |
"no Evaluator for the dataset {} with the type {}".format( | |
dataset_name, evaluator_type | |
) | |
) | |
elif len(evaluator_list) == 1: | |
return evaluator_list[0] | |
return DatasetEvaluators(evaluator_list) | |
def build_train_loader(cls, cfg): | |
# Semantic segmentation dataset mapper | |
if cfg.INPUT.DATASET_MAPPER_NAME == "mask_former_semantic": | |
mapper = MaskFormerSemanticDatasetMapper(cfg, True) | |
return build_detection_train_loader(cfg, mapper=mapper) | |
# Panoptic segmentation dataset mapper | |
elif cfg.INPUT.DATASET_MAPPER_NAME == "mask_former_panoptic": | |
mapper = MaskFormerPanopticDatasetMapper(cfg, True) | |
return build_detection_train_loader(cfg, mapper=mapper) | |
# Instance segmentation dataset mapper | |
elif cfg.INPUT.DATASET_MAPPER_NAME == "mask_former_instance": | |
mapper = MaskFormerInstanceDatasetMapper(cfg, True) | |
return build_detection_train_loader(cfg, mapper=mapper) | |
# coco instance segmentation lsj new baseline | |
elif cfg.INPUT.DATASET_MAPPER_NAME == "coco_instance_lsj": | |
mapper = COCOInstanceNewBaselineDatasetMapper(cfg, True) | |
return build_detection_train_loader(cfg, mapper=mapper) | |
# coco panoptic segmentation lsj new baseline | |
elif cfg.INPUT.DATASET_MAPPER_NAME == "coco_panoptic_lsj": | |
mapper = COCOPanopticNewBaselineDatasetMapper(cfg, True) | |
return build_detection_train_loader(cfg, mapper=mapper) | |
else: | |
mapper = None | |
return build_detection_train_loader(cfg, mapper=mapper) | |
def build_lr_scheduler(cls, cfg, optimizer): | |
""" | |
It now calls :func:`detectron2.solver.build_lr_scheduler`. | |
Overwrite it if you'd like a different scheduler. | |
""" | |
return build_lr_scheduler(cfg, optimizer) | |
def build_test_loader(cls, cfg, dataset_name): | |
return build_detection_test_loader(cfg, dataset_name) | |
def build_optimizer(cls, cfg, model): | |
weight_decay_norm = cfg.SOLVER.WEIGHT_DECAY_NORM | |
weight_decay_embed = cfg.SOLVER.WEIGHT_DECAY_EMBED | |
defaults = {} | |
defaults["lr"] = cfg.SOLVER.BASE_LR | |
defaults["weight_decay"] = cfg.SOLVER.WEIGHT_DECAY | |
norm_module_types = ( | |
torch.nn.BatchNorm1d, | |
torch.nn.BatchNorm2d, | |
torch.nn.BatchNorm3d, | |
torch.nn.SyncBatchNorm, | |
# NaiveSyncBatchNorm inherits from BatchNorm2d | |
torch.nn.GroupNorm, | |
torch.nn.InstanceNorm1d, | |
torch.nn.InstanceNorm2d, | |
torch.nn.InstanceNorm3d, | |
torch.nn.LayerNorm, | |
torch.nn.LocalResponseNorm, | |
) | |
params: List[Dict[str, Any]] = [] | |
memo: Set[torch.nn.parameter.Parameter] = set() | |
for module_name, module in model.named_modules(): | |
for module_param_name, value in module.named_parameters(recurse=False): | |
if not value.requires_grad: | |
continue | |
# Avoid duplicating parameters | |
if value in memo: | |
continue | |
memo.add(value) | |
hyperparams = copy.copy(defaults) | |
if "backbone" in module_name: | |
hyperparams["lr"] = hyperparams["lr"] * cfg.SOLVER.BACKBONE_MULTIPLIER | |
if ( | |
"relative_position_bias_table" in module_param_name | |
or "absolute_pos_embed" in module_param_name | |
): | |
print(module_param_name) | |
hyperparams["weight_decay"] = 0.0 | |
if isinstance(module, norm_module_types): | |
hyperparams["weight_decay"] = weight_decay_norm | |
if isinstance(module, torch.nn.Embedding): | |
hyperparams["weight_decay"] = weight_decay_embed | |
params.append({"params": [value], **hyperparams}) | |
def maybe_add_full_model_gradient_clipping(optim): | |
# detectron2 doesn't have full model gradient clipping now | |
clip_norm_val = cfg.SOLVER.CLIP_GRADIENTS.CLIP_VALUE | |
enable = ( | |
cfg.SOLVER.CLIP_GRADIENTS.ENABLED | |
and cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model" | |
and clip_norm_val > 0.0 | |
) | |
class FullModelGradientClippingOptimizer(optim): | |
def step(self, closure=None): | |
all_params = itertools.chain(*[x["params"] for x in self.param_groups]) | |
torch.nn.utils.clip_grad_norm_(all_params, clip_norm_val) | |
super().step(closure=closure) | |
return FullModelGradientClippingOptimizer if enable else optim | |
optimizer_type = cfg.SOLVER.OPTIMIZER | |
if optimizer_type == "SGD": | |
optimizer = maybe_add_full_model_gradient_clipping(torch.optim.SGD)( | |
params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM | |
) | |
elif optimizer_type == "ADAMW": | |
optimizer = maybe_add_full_model_gradient_clipping(torch.optim.AdamW)( | |
params, cfg.SOLVER.BASE_LR | |
) | |
else: | |
raise NotImplementedError(f"no optimizer type {optimizer_type}") | |
if not cfg.SOLVER.CLIP_GRADIENTS.CLIP_TYPE == "full_model": | |
optimizer = maybe_add_gradient_clipping(cfg, optimizer) | |
return optimizer | |
def test_with_TTA(cls, cfg, model): | |
logger = logging.getLogger("detectron2.trainer") | |
# In the end of training, run an evaluation with TTA. | |
logger.info("Running inference with test-time augmentation ...") | |
model = SemanticSegmentorWithTTA(cfg, model) | |
evaluators = [ | |
cls.build_evaluator( | |
cfg, name, output_folder=os.path.join(cfg.OUTPUT_DIR, "inference_TTA") | |
) | |
for name in cfg.DATASETS.TEST | |
] | |
res = cls.test(cfg, model, evaluators) | |
res = OrderedDict({k + "_TTA": v for k, v in res.items()}) | |
return res | |
def setup(args): | |
""" | |
Create configs and perform basic setups. | |
""" | |
cfg = get_cfg() | |
add_maskformer2_config(cfg) | |
add_frozenseg_config(cfg) | |
cfg.merge_from_file(args.config_file) | |
cfg.merge_from_list(args.opts) | |
cfg.freeze() | |
default_setup(cfg, args) | |
setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="frozenSeg",enable_propagation=True) | |
return cfg | |
def main(args): | |
cfg = setup(args) | |
if args.eval_only: | |
model = Trainer.build_model(cfg) | |
total_params = sum(p.numel() for p in model.parameters()) | |
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | |
frozen_params = sum(p.numel() for p in model.parameters() if not p.requires_grad) | |
frozen_params_exclude_text = 0 | |
for n, p in model.named_parameters(): | |
if p.requires_grad: | |
continue | |
if 'clip_model.token_embedding' in n or 'clip_model.positional_embedding' in n or 'clip_model.transformer' in n or 'clip_model.ln_final' in n or 'clip_model.text_projection' in n: | |
continue | |
frozen_params_exclude_text += p.numel() | |
print(f"total_params: {total_params}, trainable_params: {trainable_params}, frozen_params: {frozen_params}, frozen_params_exclude_text: {frozen_params_exclude_text}") | |
DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( | |
cfg.MODEL.WEIGHTS, resume=args.resume | |
) | |
res = Trainer.test(cfg, model) | |
if cfg.TEST.AUG.ENABLED: | |
res.update(Trainer.test_with_TTA(cfg, model)) | |
if comm.is_main_process(): | |
verify_results(cfg, res) | |
return res | |
trainer = Trainer(cfg) | |
trainer.resume_or_load(resume=args.resume) | |
return trainer.train() | |
if __name__ == "__main__": | |
args = default_argument_parser().parse_args() | |
print("Command Line Args:", args) | |
launch( | |
main, | |
args.num_gpus, | |
num_machines=args.num_machines, | |
machine_rank=args.machine_rank, | |
dist_url=args.dist_url, | |
args=(args,), | |
) | |