Spaces:
Runtime error
Runtime error
# ------------------------------------------------------------------------ | |
# HOTR official code : engine/arg_parser.py | |
# Copyright (c) Kakao Brain, Inc. and its affiliates. All Rights Reserved | |
# Modified arguments are represented with * | |
# ------------------------------------------------------------------------ | |
# Modified from DETR (https://github.com/facebookresearch/detr) | |
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved | |
# ------------------------------------------------------------------------ | |
import argparse | |
import hotr.util.misc as utils | |
def get_args_parser(): | |
parser = argparse.ArgumentParser('Set transformer detector', add_help=False) | |
parser.add_argument('--lr', default=1e-4, type=float) | |
parser.add_argument('--lr_backbone', default=1e-5, type=float) | |
parser.add_argument('--batch_size', default=2, type=int) | |
parser.add_argument('--weight_decay', default=1e-4, type=float) | |
parser.add_argument('--epochs', default=100, type=int) | |
parser.add_argument('--lr_drop', default=80, type=int) | |
parser.add_argument('--clip_max_norm', default=0.1, type=float, | |
help='gradient clipping max norm') | |
# DETR Model parameters | |
parser.add_argument('--frozen_weights', type=str, default=None, | |
help="Path to the pretrained model. If set, only the mask head will be trained") | |
parser.add_argument('--pretrain_interaction_tf', type=str, default=None, | |
help="Path to the pretrained model. If set, only the mask head will be trained") | |
# DETR Backbone | |
parser.add_argument('--backbone', default='resnet50', type=str, | |
help="Name of the convolutional backbone to use") | |
parser.add_argument('--dilation', action='store_true', | |
help="If true, we replace stride with dilation in the last convolutional block (DC5)") | |
parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'), | |
help="Type of positional embedding to use on top of the image features") | |
# DETR Transformer (= Encoder, Instance Decoder) | |
parser.add_argument('--enc_layers', default=6, type=int, | |
help="Number of encoding layers in the transformer") | |
parser.add_argument('--dec_layers', default=6, type=int, | |
help="Number of decoding layers in the transformer") | |
parser.add_argument('--dim_feedforward', default=2048, type=int, | |
help="Intermediate size of the feedforward layers in the transformer blocks") | |
parser.add_argument('--hidden_dim', default=256, type=int, | |
help="Size of the embeddings (dimension of the transformer)") | |
parser.add_argument('--dropout', default=0.1, type=float, | |
help="Dropout applied in the transformer") | |
parser.add_argument('--nheads', default=8, type=int, | |
help="Number of attention heads inside the transformer's attentions") | |
parser.add_argument('--num_queries', default=100, type=int, | |
help="Number of query slots") | |
parser.add_argument('--pre_norm', action='store_true') | |
parser.add_argument('--decoder_form', default=2, type=int, | |
help="1-decoder or 2-decoder") | |
# Segmentation | |
parser.add_argument('--masks', action='store_true', | |
help="Train segmentation head if the flag is provided") | |
# Loss Option | |
parser.add_argument('--no_aux_loss', dest='aux_loss', action='store_false', | |
help="Disables auxiliary decoding losses (loss at each layer)") | |
# Loss coefficients (DETR) | |
parser.add_argument('--mask_loss_coef', default=1, type=float) | |
parser.add_argument('--dice_loss_coef', default=1, type=float) | |
parser.add_argument('--bbox_loss_coef', default=5, type=float) | |
parser.add_argument('--giou_loss_coef', default=2, type=float) | |
parser.add_argument('--eos_coef', default=0.1, type=float, | |
help="Relative classification weight of the no-object class") | |
# Matcher (DETR) | |
parser.add_argument('--set_cost_class', default=1, type=float, | |
help="Class coefficient in the matching cost") | |
parser.add_argument('--set_cost_bbox', default=5, type=float, | |
help="L1 box coefficient in the matching cost") | |
parser.add_argument('--set_cost_giou', default=2, type=float, | |
help="giou box coefficient in the matching cost") | |
# * HOI Detection | |
parser.add_argument('--HOIDet', action='store_true', | |
help="Train HOI Detection head if the flag is provided") | |
parser.add_argument('--share_enc', action='store_true', | |
help="Share the Encoder in DETR for HOI Detection if the flag is provided") | |
parser.add_argument('--pretrained_dec', action='store_true', | |
help="Use Pre-trained Decoder in DETR for Interaction Decoder if the flag is provided") | |
parser.add_argument('--hoi_enc_layers', default=1, type=int, | |
help="Number of decoding layers in HOI transformer") | |
parser.add_argument('--hoi_dec_layers', default=1, type=int, | |
help="Number of decoding layers in HOI transformer") | |
parser.add_argument('--hoi_nheads', default=8, type=int, | |
help="Number of decoding layers in HOI transformer") | |
parser.add_argument('--hoi_dim_feedforward', default=2048, type=int, | |
help="Number of decoding layers in HOI transformer") | |
# parser.add_argument('--hoi_mode', type=str, default=None, help='[inst | pair | all]') | |
parser.add_argument('--num_hoi_queries', default=100, type=int, | |
help="Number of Queries for Interaction Decoder") | |
parser.add_argument('--hoi_aux_loss', action='store_true') | |
# * HOTR Matcher | |
parser.add_argument('--set_cost_idx', default=1, type=float, | |
help="IDX coefficient in the matching cost") | |
parser.add_argument('--set_cost_act', default=1, type=float, | |
help="Action coefficient in the matching cost") | |
parser.add_argument('--set_cost_tgt', default=1, type=float, | |
help="Target coefficient in the matching cost") | |
# * HOTR Loss coefficients | |
parser.add_argument('--temperature', default=0.05, type=float, help="temperature") | |
parser.add_argument('--hoi_consistency_loss_coef', default=1, type=float) | |
parser.add_argument('--hoi_idx_loss_coef', default=1, type=float) | |
parser.add_argument('--hoi_idx_consistency_loss_coef', default=1, type=float) | |
parser.add_argument('--hoi_act_loss_coef', default=1, type=float) | |
parser.add_argument('--hoi_act_consistency_loss_coef', default=1, type=float) | |
parser.add_argument('--hoi_tgt_loss_coef', default=1, type=float) | |
parser.add_argument('--hoi_tgt_consistency_loss_coef', default=1, type=float) | |
parser.add_argument('--hoi_eos_coef', default=0.1, type=float, help="Relative classification weight of the no-object class") | |
parser.add_argument('--ramp_down_epoch',default=10000,type=int) | |
parser.add_argument('--ramp_up_epoch',default=0,type=int) | |
#consistency | |
parser.add_argument('--use_consis',action='store_true',help='use consistency regularization') | |
parser.add_argument('--share_dec_param',action='store_true',help = 'share decoder parameters of all stages') | |
parser.add_argument("--augpath_name", type=utils.arg_as_list,default=[], | |
help='choose which augmented inference paths to use. (p2:x->HO->I,p3:x->HI->O,p4:x->OI->H)') | |
parser.add_argument('--stop_grad_stage',action='store_true',help='Do not back propogate loss to previous stage') | |
parser.add_argument('--path_id', default=0, type=int) | |
parser.add_argument('--sep_enc_forward',action='store_true') | |
# * dataset parameters | |
parser.add_argument('--dataset_file', help='[coco | vcoco]') | |
parser.add_argument('--data_path', type=str) | |
parser.add_argument('--object_threshold', type=float, default=0, help='Threshold for object confidence') | |
# machine parameters | |
parser.add_argument('--output_dir', default='', | |
help='path where to save, empty for no saving') | |
parser.add_argument('--custom_path', default='', | |
help="Data path for custom inference. Only required for custom_main.py") | |
parser.add_argument('--device', default='cuda', | |
help='device to use for training / testing') | |
parser.add_argument('--seed', default=42, type=int) | |
parser.add_argument('--resume', default='', help='resume from checkpoint') | |
parser.add_argument('--start_epoch', default=0, type=int, metavar='N', | |
help='start epoch') | |
parser.add_argument('--num_workers', default=2, type=int) | |
# mode | |
parser.add_argument('--eval', action='store_true', help="Only evaluate results if the flag is provided") | |
parser.add_argument('--validate', action='store_true', help="Validate after every epoch") | |
# distributed training parameters | |
parser.add_argument('--world_size', default=1, type=int, | |
help='number of distributed processes') | |
parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training') | |
# * WanDB | |
parser.add_argument('--wandb', action='store_true') | |
parser.add_argument('--project_name', default='hotr_cpc') | |
parser.add_argument('--group_name', default='mlv') | |
parser.add_argument('--run_name', default='run_000001') | |
return parser | |