_base_ = [ '../_base_/models/resnet50.py', '../_base_/datasets/imagenet_bs256_rsb_a12.py', '../_base_/schedules/imagenet_bs2048_rsb.py', '../_base_/default_runtime.py' ] # model settings model = dict( backbone=dict( norm_cfg=dict(type='SyncBN', requires_grad=True), drop_path_rate=0.05, ), head=dict( loss=dict( type='LabelSmoothLoss', label_smooth_val=0.1, mode='original', use_sigmoid=True, )), train_cfg=dict(augments=[ dict(type='Mixup', alpha=0.2), dict(type='CutMix', alpha=1.0) ]), ) # dataset settings train_dataloader = dict(sampler=dict(type='RepeatAugSampler', shuffle=True)) # schedule settings optim_wrapper = dict( optimizer=dict(weight_decay=0.01), paramwise_cfg=dict(bias_decay_mult=0., norm_decay_mult=0.), ) param_scheduler = [ # warm up learning rate scheduler dict( type='LinearLR', start_factor=0.0001, by_epoch=True, begin=0, end=5, # update by iter convert_to_iter_based=True), # main learning rate scheduler dict( type='CosineAnnealingLR', T_max=595, eta_min=1.0e-6, by_epoch=True, begin=5, end=600) ] train_cfg = dict(by_epoch=True, max_epochs=600)