1. print(sum(p.numel() for p in model.parameters() if p.requires_grad))
예를들어 다양한 모델에 대하여
from torchvision.models import *
if __name__ == '__main__':
model = vgg11()
print("vgg11 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
model = vgg13()
print("vgg13 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
model = vgg16()
print("vgg16 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
model = vgg19()
print("vgg19 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
model = resnet18()
print("resnet18 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
model = resnet34()
print("resnet34 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
model = resnet50()
print("resnet50 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
model = resnet101()
print("resnet101 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
model = resnet152()
print("resnet152 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
결과:
vgg11 : 132863336
vgg13 : 133047848
vgg16 : 138357544
vgg19 : 143667240
resnet18 : 11689512
resnet34 : 21797672
resnet50 : 25557032
resnet101 : 44549160
resnet152 : 60192808
2. torchsummary
from torchvision.models import *
from torchsummary import summary
if __name__ == '__main__':
model = vgg11().cuda()
summary(model, (3, 224, 224), batch_size=16)
결과
---------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [16, 64, 224, 224] 1,792
ReLU-2 [16, 64, 224, 224] 0
MaxPool2d-3 [16, 64, 112, 112] 0
Conv2d-4 [16, 128, 112, 112] 73,856
ReLU-5 [16, 128, 112, 112] 0
MaxPool2d-6 [16, 128, 56, 56] 0
Conv2d-7 [16, 256, 56, 56] 295,168
ReLU-8 [16, 256, 56, 56] 0
Conv2d-9 [16, 256, 56, 56] 590,080
ReLU-10 [16, 256, 56, 56] 0
MaxPool2d-11 [16, 256, 28, 28] 0
Conv2d-12 [16, 512, 28, 28] 1,180,160
ReLU-13 [16, 512, 28, 28] 0
Conv2d-14 [16, 512, 28, 28] 2,359,808
ReLU-15 [16, 512, 28, 28] 0
MaxPool2d-16 [16, 512, 14, 14] 0
Conv2d-17 [16, 512, 14, 14] 2,359,808
ReLU-18 [16, 512, 14, 14] 0
Conv2d-19 [16, 512, 14, 14] 2,359,808
ReLU-20 [16, 512, 14, 14] 0
MaxPool2d-21 [16, 512, 7, 7] 0
AdaptiveAvgPool2d-22 [16, 512, 7, 7] 0
Linear-23 [16, 4096] 102,764,544
ReLU-24 [16, 4096] 0
Dropout-25 [16, 4096] 0
Linear-26 [16, 4096] 16,781,312
ReLU-27 [16, 4096] 0
Dropout-28 [16, 4096] 0
Linear-29 [16, 1000] 4,097,000
================================================================
Total params: 132,863,336
Trainable params: 132,863,336
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 9.19
Forward/backward pass size (MB): 2006.00
Params size (MB): 506.83
Estimated Total Size (MB): 2522.02
----------------------------------------------------------------
에서 구현한 ViT 도 되네요!
model = ViT().cuda()
summary(model, (3, 32, 32), batch_size=16)
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [16, 192, 8, 8] 9,408
EmbeddingLayer-2 [16, 65, 192] 0
LayerNorm-3 [16, 65, 192] 384
Linear-4 [16, 65, 576] 110,592
Dropout-5 [16, 12, 65, 65] 0
Linear-6 [16, 65, 192] 37,056
Dropout-7 [16, 65, 192] 0
MSA-8 [16, 65, 192] 0
LayerNorm-9 [16, 65, 192] 384
Linear-10 [16, 65, 384] 74,112
GELU-11 [16, 65, 384] 0
Dropout-12 [16, 65, 384] 0
Linear-13 [16, 65, 192] 73,920
Dropout-14 [16, 65, 192] 0
MLP-15 [16, 65, 192] 0
Block-16 [16, 65, 192] 0
LayerNorm-17 [16, 65, 192] 384
Linear-18 [16, 65, 576] 110,592
Dropout-19 [16, 12, 65, 65] 0
Linear-20 [16, 65, 192] 37,056
Dropout-21 [16, 65, 192] 0
MSA-22 [16, 65, 192] 0
LayerNorm-23 [16, 65, 192] 384
Linear-24 [16, 65, 384] 74,112
GELU-25 [16, 65, 384] 0
Dropout-26 [16, 65, 384] 0
Linear-27 [16, 65, 192] 73,920
Dropout-28 [16, 65, 192] 0
MLP-29 [16, 65, 192] 0
Block-30 [16, 65, 192] 0
LayerNorm-31 [16, 65, 192] 384
Linear-32 [16, 65, 576] 110,592
Dropout-33 [16, 12, 65, 65] 0
Linear-34 [16, 65, 192] 37,056
Dropout-35 [16, 65, 192] 0
MSA-36 [16, 65, 192] 0
LayerNorm-37 [16, 65, 192] 384
Linear-38 [16, 65, 384] 74,112
GELU-39 [16, 65, 384] 0
Dropout-40 [16, 65, 384] 0
Linear-41 [16, 65, 192] 73,920
Dropout-42 [16, 65, 192] 0
MLP-43 [16, 65, 192] 0
Block-44 [16, 65, 192] 0
LayerNorm-45 [16, 65, 192] 384
Linear-46 [16, 65, 576] 110,592
Dropout-47 [16, 12, 65, 65] 0
Linear-48 [16, 65, 192] 37,056
Dropout-49 [16, 65, 192] 0
MSA-50 [16, 65, 192] 0
LayerNorm-51 [16, 65, 192] 384
Linear-52 [16, 65, 384] 74,112
GELU-53 [16, 65, 384] 0
Dropout-54 [16, 65, 384] 0
Linear-55 [16, 65, 192] 73,920
Dropout-56 [16, 65, 192] 0
MLP-57 [16, 65, 192] 0
Block-58 [16, 65, 192] 0
LayerNorm-59 [16, 65, 192] 384
Linear-60 [16, 65, 576] 110,592
Dropout-61 [16, 12, 65, 65] 0
Linear-62 [16, 65, 192] 37,056
Dropout-63 [16, 65, 192] 0
MSA-64 [16, 65, 192] 0
LayerNorm-65 [16, 65, 192] 384
Linear-66 [16, 65, 384] 74,112
GELU-67 [16, 65, 384] 0
Dropout-68 [16, 65, 384] 0
Linear-69 [16, 65, 192] 73,920
Dropout-70 [16, 65, 192] 0
MLP-71 [16, 65, 192] 0
Block-72 [16, 65, 192] 0
LayerNorm-73 [16, 65, 192] 384
Linear-74 [16, 65, 576] 110,592
Dropout-75 [16, 12, 65, 65] 0
Linear-76 [16, 65, 192] 37,056
Dropout-77 [16, 65, 192] 0
MSA-78 [16, 65, 192] 0
LayerNorm-79 [16, 65, 192] 384
Linear-80 [16, 65, 384] 74,112
GELU-81 [16, 65, 384] 0
Dropout-82 [16, 65, 384] 0
Linear-83 [16, 65, 192] 73,920
Dropout-84 [16, 65, 192] 0
MLP-85 [16, 65, 192] 0
Block-86 [16, 65, 192] 0
LayerNorm-87 [16, 65, 192] 384
Linear-88 [16, 65, 576] 110,592
Dropout-89 [16, 12, 65, 65] 0
Linear-90 [16, 65, 192] 37,056
Dropout-91 [16, 65, 192] 0
MSA-92 [16, 65, 192] 0
LayerNorm-93 [16, 65, 192] 384
Linear-94 [16, 65, 384] 74,112
GELU-95 [16, 65, 384] 0
Dropout-96 [16, 65, 384] 0
Linear-97 [16, 65, 192] 73,920
Dropout-98 [16, 65, 192] 0
MLP-99 [16, 65, 192] 0
Block-100 [16, 65, 192] 0
LayerNorm-101 [16, 65, 192] 384
Linear-102 [16, 65, 576] 110,592
Dropout-103 [16, 12, 65, 65] 0
Linear-104 [16, 65, 192] 37,056
Dropout-105 [16, 65, 192] 0
MSA-106 [16, 65, 192] 0
LayerNorm-107 [16, 65, 192] 384
Linear-108 [16, 65, 384] 74,112
GELU-109 [16, 65, 384] 0
Dropout-110 [16, 65, 384] 0
Linear-111 [16, 65, 192] 73,920
Dropout-112 [16, 65, 192] 0
MLP-113 [16, 65, 192] 0
Block-114 [16, 65, 192] 0
LayerNorm-115 [16, 65, 192] 384
Linear-116 [16, 65, 576] 110,592
Dropout-117 [16, 12, 65, 65] 0
Linear-118 [16, 65, 192] 37,056
Dropout-119 [16, 65, 192] 0
MSA-120 [16, 65, 192] 0
LayerNorm-121 [16, 65, 192] 384
Linear-122 [16, 65, 384] 74,112
GELU-123 [16, 65, 384] 0
Dropout-124 [16, 65, 384] 0
Linear-125 [16, 65, 192] 73,920
Dropout-126 [16, 65, 192] 0
MLP-127 [16, 65, 192] 0
Block-128 [16, 65, 192] 0
LayerNorm-129 [16, 65, 192] 384
Linear-130 [16, 65, 576] 110,592
Dropout-131 [16, 12, 65, 65] 0
Linear-132 [16, 65, 192] 37,056
Dropout-133 [16, 65, 192] 0
MSA-134 [16, 65, 192] 0
LayerNorm-135 [16, 65, 192] 384
Linear-136 [16, 65, 384] 74,112
GELU-137 [16, 65, 384] 0
Dropout-138 [16, 65, 384] 0
Linear-139 [16, 65, 192] 73,920
Dropout-140 [16, 65, 192] 0
MLP-141 [16, 65, 192] 0
Block-142 [16, 65, 192] 0
LayerNorm-143 [16, 65, 192] 384
Linear-144 [16, 65, 576] 110,592
Dropout-145 [16, 12, 65, 65] 0
Linear-146 [16, 65, 192] 37,056
Dropout-147 [16, 65, 192] 0
MSA-148 [16, 65, 192] 0
LayerNorm-149 [16, 65, 192] 384
Linear-150 [16, 65, 384] 74,112
GELU-151 [16, 65, 384] 0
Dropout-152 [16, 65, 384] 0
Linear-153 [16, 65, 192] 73,920
Dropout-154 [16, 65, 192] 0
MLP-155 [16, 65, 192] 0
Block-156 [16, 65, 192] 0
LayerNorm-157 [16, 65, 192] 384
Linear-158 [16, 65, 576] 110,592
Dropout-159 [16, 12, 65, 65] 0
Linear-160 [16, 65, 192] 37,056
Dropout-161 [16, 65, 192] 0
MSA-162 [16, 65, 192] 0
LayerNorm-163 [16, 65, 192] 384
Linear-164 [16, 65, 384] 74,112
GELU-165 [16, 65, 384] 0
Dropout-166 [16, 65, 384] 0
Linear-167 [16, 65, 192] 73,920
Dropout-168 [16, 65, 192] 0
MLP-169 [16, 65, 192] 0
Block-170 [16, 65, 192] 0
LayerNorm-171 [16, 65, 192] 384
Linear-172 [16, 65, 10] 1,930
================================================================
Total params: 3,569,098
Trainable params: 3,569,098
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Forward/backward pass size (MB): 407.96
Params size (MB): 13.62
Estimated Total Size (MB): 421.76
----------------------------------------------------------------
'Pytorch' 카테고리의 다른 글
[Pytorch] torch.nn.Unfold (0) | 2023.03.16 |
---|---|
[Pytorch] torch.tensor.repeat() 사용하기 (0) | 2023.03.08 |
[Pytorch] torch.flatten() 사용하기 (1) | 2023.01.26 |
[Pytorch] 분류(classification)문제 에서 label 변환 (one-hot vs class) (0) | 2022.12.04 |
[Pytorch] pytorch 에서 np.where 처럼 index 가져오기 (0) | 2022.08.17 |
댓글