File size: 1,584 Bytes

f83ff13

from transformers import PretrainedConfig


# Define configuration class
class DaViTConfig(PretrainedConfig):
    model_type = "davit"

    def __init__(
        self,
        in_chans=3,
        # num_classes=1000,
        depths=(1, 1, 9, 1),
        patch_size=(7, 3, 3, 3),
        patch_stride=(4, 2, 2, 2),
        patch_padding=(3, 1, 1, 1),
        patch_prenorm=(False, True, True, True),
        embed_dims=(128, 256, 512, 1024),
        num_heads=(4, 8, 16, 32),
        num_groups=(4, 8, 16, 32),
        window_size=12,
        mlp_ratio=4.0,
        qkv_bias=True,
        drop_path_rate=0.1,
        norm_layer="layer_norm",
        enable_checkpoint=False,
        conv_at_attn=True,
        conv_at_ffn=True,
        projection_dim=768,
        **kwargs
    ):
        super().__init__(**kwargs)
        self.in_chans = in_chans
        # self.num_classes = num_classes # Classes remove for AutoModel
        self.depths = depths
        self.patch_size = patch_size
        self.patch_stride = patch_stride
        self.patch_padding = patch_padding
        self.patch_prenorm = patch_prenorm
        self.embed_dims = embed_dims
        self.num_heads = num_heads
        self.num_groups = num_groups
        self.window_size = window_size
        self.mlp_ratio = mlp_ratio
        self.qkv_bias = qkv_bias
        self.drop_path_rate = drop_path_rate
        self.norm_layer = norm_layer
        self.enable_checkpoint = enable_checkpoint
        self.conv_at_attn = conv_at_attn
        self.conv_at_ffn = conv_at_ffn
        self.projection_dim = projection_dim