ermu2001

Upload folder using huggingface_hub

fe3247f verified 11 months ago

14.1 kB

	CifNetForImageClassification(
	(resnet): CifNetModel(
	(embedder): CifNetEmbeddings(
	(embedder): CifNetConvLayer(
	(convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
	(normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(activation): SiLU()
	)
	(pooler): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
	)
	(encoder): CifNetEncoder(
	(stages): ModuleList(
	(0): CifNetStage(
	(layers): Sequential(
	(0): CifNetSelfAttentionLayer(
	(shortcut): CifNetShortCut(
	(convolution): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
	(normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	)
	(in_conv): CifNetConvLayer(
	(convolution): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
	(normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(activation): SiLU()
	)
	(attention): CifNetSelfAttention(
	(q_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
	(k_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
	(v_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
	(o_proj): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))
	)
	(activation): SiLU()
	(attention_norm): CifNetRMSNorm()
	(out_conv): CifNetConvLayer(
	(convolution): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(activation): SiLU()
	)
	)
	)
	)
	(1-3): 3 x CifNetStage(
	(layers): Sequential(
	(0): CifNetSelfAttentionLayer(
	(shortcut): Identity()
	(in_conv): CifNetConvLayer(
	(convolution): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(activation): SiLU()
	)
	(attention): CifNetSelfAttention(
	(q_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
	(k_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
	(v_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
	(o_proj): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))
	)
	(activation): SiLU()
	(attention_norm): CifNetRMSNorm()
	(out_conv): CifNetConvLayer(
	(convolution): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(activation): SiLU()
	)
	)
	(1): CifNetSelfAttentionLayer(
	(shortcut): Identity()
	(in_conv): CifNetConvLayer(
	(convolution): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(activation): SiLU()
	)
	(attention): CifNetSelfAttention(
	(q_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
	(k_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
	(v_proj): Conv2d(128, 32, kernel_size=(1, 1), stride=(1, 1))
	(o_proj): Conv2d(32, 128, kernel_size=(1, 1), stride=(1, 1))
	)
	(activation): SiLU()
	(attention_norm): CifNetRMSNorm()
	(out_conv): CifNetConvLayer(
	(convolution): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
	(normalization): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
	(activation): SiLU()
	)
	)
	)
	)
	)
	)
	(pooler): AdaptiveAvgPool2d(output_size=(1, 1))
	)
	(classifier): Sequential(
	(0): Flatten(start_dim=1, end_dim=-1)
	(1): Linear(in_features=128, out_features=10, bias=True)
	)
	)
	----------------------------------------------------------------
	Layer (type) Output Shape Param #
	================================================================
	Conv2d-1 [4, 64, 112, 112] 9,408
	BatchNorm2d-2 [4, 64, 112, 112] 128
	SiLU-3 [4, 64, 112, 112] 0
	CifNetConvLayer-4 [4, 64, 112, 112] 0
	MaxPool2d-5 [4, 64, 56, 56] 0
	CifNetEmbeddings-6 [4, 64, 56, 56] 0
	Conv2d-7 [4, 128, 28, 28] 73,728
	BatchNorm2d-8 [4, 128, 28, 28] 256
	SiLU-9 [4, 128, 28, 28] 0
	CifNetConvLayer-10 [4, 128, 28, 28] 0
	CifNetRMSNorm-11 [4, 28, 28, 128] 128
	Conv2d-12 [4, 32, 28, 28] 4,128
	Conv2d-13 [4, 32, 28, 28] 4,128
	Conv2d-14 [4, 32, 28, 28] 4,128
	Conv2d-15 [4, 128, 28, 28] 4,224
	CifNetSelfAttention-16 [4, 128, 28, 28] 0
	SiLU-17 [4, 128, 28, 28] 0
	Conv2d-18 [4, 128, 28, 28] 147,456
	BatchNorm2d-19 [4, 128, 28, 28] 256
	SiLU-20 [4, 128, 28, 28] 0
	CifNetConvLayer-21 [4, 128, 28, 28] 0
	Conv2d-22 [4, 128, 28, 28] 8,192
	BatchNorm2d-23 [4, 128, 28, 28] 256
	CifNetShortCut-24 [4, 128, 28, 28] 0
	CifNetSelfAttentionLayer-25 [4, 128, 28, 28] 0
	CifNetStage-26 [4, 128, 28, 28] 0
	Conv2d-27 [4, 128, 28, 28] 147,456
	BatchNorm2d-28 [4, 128, 28, 28] 256
	SiLU-29 [4, 128, 28, 28] 0
	CifNetConvLayer-30 [4, 128, 28, 28] 0
	CifNetRMSNorm-31 [4, 28, 28, 128] 128
	Conv2d-32 [4, 32, 28, 28] 4,128
	Conv2d-33 [4, 32, 28, 28] 4,128
	Conv2d-34 [4, 32, 28, 28] 4,128
	Conv2d-35 [4, 128, 28, 28] 4,224
	CifNetSelfAttention-36 [4, 128, 28, 28] 0
	SiLU-37 [4, 128, 28, 28] 0
	Conv2d-38 [4, 128, 28, 28] 147,456
	BatchNorm2d-39 [4, 128, 28, 28] 256
	SiLU-40 [4, 128, 28, 28] 0
	CifNetConvLayer-41 [4, 128, 28, 28] 0
	Identity-42 [4, 128, 28, 28] 0
	CifNetSelfAttentionLayer-43 [4, 128, 28, 28] 0
	Conv2d-44 [4, 128, 28, 28] 147,456
	BatchNorm2d-45 [4, 128, 28, 28] 256
	SiLU-46 [4, 128, 28, 28] 0
	CifNetConvLayer-47 [4, 128, 28, 28] 0
	CifNetRMSNorm-48 [4, 28, 28, 128] 128
	Conv2d-49 [4, 32, 28, 28] 4,128
	Conv2d-50 [4, 32, 28, 28] 4,128
	Conv2d-51 [4, 32, 28, 28] 4,128
	Conv2d-52 [4, 128, 28, 28] 4,224
	CifNetSelfAttention-53 [4, 128, 28, 28] 0
	SiLU-54 [4, 128, 28, 28] 0
	Conv2d-55 [4, 128, 28, 28] 147,456
	BatchNorm2d-56 [4, 128, 28, 28] 256
	SiLU-57 [4, 128, 28, 28] 0
	CifNetConvLayer-58 [4, 128, 28, 28] 0
	Identity-59 [4, 128, 28, 28] 0
	CifNetSelfAttentionLayer-60 [4, 128, 28, 28] 0
	CifNetStage-61 [4, 128, 28, 28] 0
	Conv2d-62 [4, 128, 28, 28] 147,456
	BatchNorm2d-63 [4, 128, 28, 28] 256
	SiLU-64 [4, 128, 28, 28] 0
	CifNetConvLayer-65 [4, 128, 28, 28] 0
	CifNetRMSNorm-66 [4, 28, 28, 128] 128
	Conv2d-67 [4, 32, 28, 28] 4,128
	Conv2d-68 [4, 32, 28, 28] 4,128
	Conv2d-69 [4, 32, 28, 28] 4,128
	Conv2d-70 [4, 128, 28, 28] 4,224
	CifNetSelfAttention-71 [4, 128, 28, 28] 0
	SiLU-72 [4, 128, 28, 28] 0
	Conv2d-73 [4, 128, 28, 28] 147,456
	BatchNorm2d-74 [4, 128, 28, 28] 256
	SiLU-75 [4, 128, 28, 28] 0
	CifNetConvLayer-76 [4, 128, 28, 28] 0
	Identity-77 [4, 128, 28, 28] 0
	CifNetSelfAttentionLayer-78 [4, 128, 28, 28] 0
	Conv2d-79 [4, 128, 28, 28] 147,456
	BatchNorm2d-80 [4, 128, 28, 28] 256
	SiLU-81 [4, 128, 28, 28] 0
	CifNetConvLayer-82 [4, 128, 28, 28] 0
	CifNetRMSNorm-83 [4, 28, 28, 128] 128
	Conv2d-84 [4, 32, 28, 28] 4,128
	Conv2d-85 [4, 32, 28, 28] 4,128
	Conv2d-86 [4, 32, 28, 28] 4,128
	Conv2d-87 [4, 128, 28, 28] 4,224
	CifNetSelfAttention-88 [4, 128, 28, 28] 0
	SiLU-89 [4, 128, 28, 28] 0
	Conv2d-90 [4, 128, 28, 28] 147,456
	BatchNorm2d-91 [4, 128, 28, 28] 256
	SiLU-92 [4, 128, 28, 28] 0
	CifNetConvLayer-93 [4, 128, 28, 28] 0
	Identity-94 [4, 128, 28, 28] 0
	CifNetSelfAttentionLayer-95 [4, 128, 28, 28] 0
	CifNetStage-96 [4, 128, 28, 28] 0
	Conv2d-97 [4, 128, 28, 28] 147,456
	BatchNorm2d-98 [4, 128, 28, 28] 256
	SiLU-99 [4, 128, 28, 28] 0
	CifNetConvLayer-100 [4, 128, 28, 28] 0
	CifNetRMSNorm-101 [4, 28, 28, 128] 128
	Conv2d-102 [4, 32, 28, 28] 4,128
	Conv2d-103 [4, 32, 28, 28] 4,128
	Conv2d-104 [4, 32, 28, 28] 4,128
	Conv2d-105 [4, 128, 28, 28] 4,224
	CifNetSelfAttention-106 [4, 128, 28, 28] 0
	SiLU-107 [4, 128, 28, 28] 0
	Conv2d-108 [4, 128, 28, 28] 147,456
	BatchNorm2d-109 [4, 128, 28, 28] 256
	SiLU-110 [4, 128, 28, 28] 0
	CifNetConvLayer-111 [4, 128, 28, 28] 0
	Identity-112 [4, 128, 28, 28] 0
	CifNetSelfAttentionLayer-113 [4, 128, 28, 28] 0
	Conv2d-114 [4, 128, 28, 28] 147,456
	BatchNorm2d-115 [4, 128, 28, 28] 256
	SiLU-116 [4, 128, 28, 28] 0
	CifNetConvLayer-117 [4, 128, 28, 28] 0
	CifNetRMSNorm-118 [4, 28, 28, 128] 128
	Conv2d-119 [4, 32, 28, 28] 4,128
	Conv2d-120 [4, 32, 28, 28] 4,128
	Conv2d-121 [4, 32, 28, 28] 4,128
	Conv2d-122 [4, 128, 28, 28] 4,224
	CifNetSelfAttention-123 [4, 128, 28, 28] 0
	SiLU-124 [4, 128, 28, 28] 0
	Conv2d-125 [4, 128, 28, 28] 147,456
	BatchNorm2d-126 [4, 128, 28, 28] 256
	SiLU-127 [4, 128, 28, 28] 0
	CifNetConvLayer-128 [4, 128, 28, 28] 0
	Identity-129 [4, 128, 28, 28] 0
	CifNetSelfAttentionLayer-130 [4, 128, 28, 28] 0
	CifNetStage-131 [4, 128, 28, 28] 0
	CifNetEncoder-132 [[-1, 128, 28, 28]] 0
	AdaptiveAvgPool2d-133 [4, 128, 1, 1] 0
	CifNetModel-134 [[-1, 128, 28, 28], [-1, 128, 1, 1]] 0
	Flatten-135 [4, 128] 0
	Linear-136 [4, 10] 1,290
	================================================================
	Total params: 2,130,666
	Trainable params: 2,130,666
	Non-trainable params: 0
	----------------------------------------------------------------
	Input size (MB): 2.30
	Forward/backward pass size (MB): 542.07
	Params size (MB): 8.13
	Estimated Total Size (MB): 552.50
	----------------------------------------------------------------