본문 바로가기
Pytorch

[Pytorch] network parameter 갯수 확인

by pulluper 2023. 2. 1.
반응형

1. print(sum(p.numel() for p in model.parameters() if p.requires_grad))

 

예를들어 다양한 모델에 대하여 

 

from torchvision.models import *

if __name__ == '__main__':
    model = vgg11()
    print("vgg11 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
    model = vgg13()
    print("vgg13 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
    model = vgg16()
    print("vgg16 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
    model = vgg19()
    print("vgg19 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
    model = resnet18()
    print("resnet18 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
    model = resnet34()
    print("resnet34 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
    model = resnet50()
    print("resnet50 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
    model = resnet101()
    print("resnet101 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))
    model = resnet152()
    print("resnet152 : ", sum(p.numel() for p in model.parameters() if p.requires_grad))

 

결과:

 

vgg11 :  132863336
vgg13 :  133047848
vgg16 :  138357544
vgg19 :  143667240
resnet18 :  11689512
resnet34 :  21797672
resnet50 :  25557032
resnet101 :  44549160
resnet152 :  60192808

 

2. torchsummary 

 

from torchvision.models import *
from torchsummary import summary

if __name__ == '__main__':
    model = vgg11().cuda()
    summary(model, (3, 224, 224), batch_size=16)

 

결과

---------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1         [16, 64, 224, 224]           1,792
              ReLU-2         [16, 64, 224, 224]               0
         MaxPool2d-3         [16, 64, 112, 112]               0
            Conv2d-4        [16, 128, 112, 112]          73,856
              ReLU-5        [16, 128, 112, 112]               0
         MaxPool2d-6          [16, 128, 56, 56]               0
            Conv2d-7          [16, 256, 56, 56]         295,168
              ReLU-8          [16, 256, 56, 56]               0
            Conv2d-9          [16, 256, 56, 56]         590,080
             ReLU-10          [16, 256, 56, 56]               0
        MaxPool2d-11          [16, 256, 28, 28]               0
           Conv2d-12          [16, 512, 28, 28]       1,180,160
             ReLU-13          [16, 512, 28, 28]               0
           Conv2d-14          [16, 512, 28, 28]       2,359,808
             ReLU-15          [16, 512, 28, 28]               0
        MaxPool2d-16          [16, 512, 14, 14]               0
           Conv2d-17          [16, 512, 14, 14]       2,359,808
             ReLU-18          [16, 512, 14, 14]               0
           Conv2d-19          [16, 512, 14, 14]       2,359,808
             ReLU-20          [16, 512, 14, 14]               0
        MaxPool2d-21            [16, 512, 7, 7]               0
AdaptiveAvgPool2d-22            [16, 512, 7, 7]               0
           Linear-23                 [16, 4096]     102,764,544
             ReLU-24                 [16, 4096]               0
          Dropout-25                 [16, 4096]               0
           Linear-26                 [16, 4096]      16,781,312
             ReLU-27                 [16, 4096]               0
          Dropout-28                 [16, 4096]               0
           Linear-29                 [16, 1000]       4,097,000
================================================================
Total params: 132,863,336
Trainable params: 132,863,336
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 9.19
Forward/backward pass size (MB): 2006.00
Params size (MB): 506.83
Estimated Total Size (MB): 2522.02
----------------------------------------------------------------

 

 

 

https://csm-kr.tistory.com/54

 

[DNN] VIT(vision transformer) 리뷰 및 코드구현(CIFAR10) (ICLR2021)

Introduction 안녕하세요 pulluper입니다. 👏 이번 포스팅에서는 NLP에서 강력한 성능으로 기준이 된 Transformer (Self-Attention)을 vision task에 적용하여 sota(state-of-the-art)의 성능을 달성한 ICLR2021에 발표된 vi

csm-kr.tistory.com

에서 구현한 ViT 도 되네요! 

 

    model = ViT().cuda()
    summary(model, (3, 32, 32), batch_size=16)

 

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Conv2d-1            [16, 192, 8, 8]           9,408
    EmbeddingLayer-2              [16, 65, 192]               0
         LayerNorm-3              [16, 65, 192]             384
            Linear-4              [16, 65, 576]         110,592
           Dropout-5           [16, 12, 65, 65]               0
            Linear-6              [16, 65, 192]          37,056
           Dropout-7              [16, 65, 192]               0
               MSA-8              [16, 65, 192]               0
         LayerNorm-9              [16, 65, 192]             384
           Linear-10              [16, 65, 384]          74,112
             GELU-11              [16, 65, 384]               0
          Dropout-12              [16, 65, 384]               0
           Linear-13              [16, 65, 192]          73,920
          Dropout-14              [16, 65, 192]               0
              MLP-15              [16, 65, 192]               0
            Block-16              [16, 65, 192]               0
        LayerNorm-17              [16, 65, 192]             384
           Linear-18              [16, 65, 576]         110,592
          Dropout-19           [16, 12, 65, 65]               0
           Linear-20              [16, 65, 192]          37,056
          Dropout-21              [16, 65, 192]               0
              MSA-22              [16, 65, 192]               0
        LayerNorm-23              [16, 65, 192]             384
           Linear-24              [16, 65, 384]          74,112
             GELU-25              [16, 65, 384]               0
          Dropout-26              [16, 65, 384]               0
           Linear-27              [16, 65, 192]          73,920
          Dropout-28              [16, 65, 192]               0
              MLP-29              [16, 65, 192]               0
            Block-30              [16, 65, 192]               0
        LayerNorm-31              [16, 65, 192]             384
           Linear-32              [16, 65, 576]         110,592
          Dropout-33           [16, 12, 65, 65]               0
           Linear-34              [16, 65, 192]          37,056
          Dropout-35              [16, 65, 192]               0
              MSA-36              [16, 65, 192]               0
        LayerNorm-37              [16, 65, 192]             384
           Linear-38              [16, 65, 384]          74,112
             GELU-39              [16, 65, 384]               0
          Dropout-40              [16, 65, 384]               0
           Linear-41              [16, 65, 192]          73,920
          Dropout-42              [16, 65, 192]               0
              MLP-43              [16, 65, 192]               0
            Block-44              [16, 65, 192]               0
        LayerNorm-45              [16, 65, 192]             384
           Linear-46              [16, 65, 576]         110,592
          Dropout-47           [16, 12, 65, 65]               0
           Linear-48              [16, 65, 192]          37,056
          Dropout-49              [16, 65, 192]               0
              MSA-50              [16, 65, 192]               0
        LayerNorm-51              [16, 65, 192]             384
           Linear-52              [16, 65, 384]          74,112
             GELU-53              [16, 65, 384]               0
          Dropout-54              [16, 65, 384]               0
           Linear-55              [16, 65, 192]          73,920
          Dropout-56              [16, 65, 192]               0
              MLP-57              [16, 65, 192]               0
            Block-58              [16, 65, 192]               0
        LayerNorm-59              [16, 65, 192]             384
           Linear-60              [16, 65, 576]         110,592
          Dropout-61           [16, 12, 65, 65]               0
           Linear-62              [16, 65, 192]          37,056
          Dropout-63              [16, 65, 192]               0
              MSA-64              [16, 65, 192]               0
        LayerNorm-65              [16, 65, 192]             384
           Linear-66              [16, 65, 384]          74,112
             GELU-67              [16, 65, 384]               0
          Dropout-68              [16, 65, 384]               0
           Linear-69              [16, 65, 192]          73,920
          Dropout-70              [16, 65, 192]               0
              MLP-71              [16, 65, 192]               0
            Block-72              [16, 65, 192]               0
        LayerNorm-73              [16, 65, 192]             384
           Linear-74              [16, 65, 576]         110,592
          Dropout-75           [16, 12, 65, 65]               0
           Linear-76              [16, 65, 192]          37,056
          Dropout-77              [16, 65, 192]               0
              MSA-78              [16, 65, 192]               0
        LayerNorm-79              [16, 65, 192]             384
           Linear-80              [16, 65, 384]          74,112
             GELU-81              [16, 65, 384]               0
          Dropout-82              [16, 65, 384]               0
           Linear-83              [16, 65, 192]          73,920
          Dropout-84              [16, 65, 192]               0
              MLP-85              [16, 65, 192]               0
            Block-86              [16, 65, 192]               0
        LayerNorm-87              [16, 65, 192]             384
           Linear-88              [16, 65, 576]         110,592
          Dropout-89           [16, 12, 65, 65]               0
           Linear-90              [16, 65, 192]          37,056
          Dropout-91              [16, 65, 192]               0
              MSA-92              [16, 65, 192]               0
        LayerNorm-93              [16, 65, 192]             384
           Linear-94              [16, 65, 384]          74,112
             GELU-95              [16, 65, 384]               0
          Dropout-96              [16, 65, 384]               0
           Linear-97              [16, 65, 192]          73,920
          Dropout-98              [16, 65, 192]               0
              MLP-99              [16, 65, 192]               0
           Block-100              [16, 65, 192]               0
       LayerNorm-101              [16, 65, 192]             384
          Linear-102              [16, 65, 576]         110,592
         Dropout-103           [16, 12, 65, 65]               0
          Linear-104              [16, 65, 192]          37,056
         Dropout-105              [16, 65, 192]               0
             MSA-106              [16, 65, 192]               0
       LayerNorm-107              [16, 65, 192]             384
          Linear-108              [16, 65, 384]          74,112
            GELU-109              [16, 65, 384]               0
         Dropout-110              [16, 65, 384]               0
          Linear-111              [16, 65, 192]          73,920
         Dropout-112              [16, 65, 192]               0
             MLP-113              [16, 65, 192]               0
           Block-114              [16, 65, 192]               0
       LayerNorm-115              [16, 65, 192]             384
          Linear-116              [16, 65, 576]         110,592
         Dropout-117           [16, 12, 65, 65]               0
          Linear-118              [16, 65, 192]          37,056
         Dropout-119              [16, 65, 192]               0
             MSA-120              [16, 65, 192]               0
       LayerNorm-121              [16, 65, 192]             384
          Linear-122              [16, 65, 384]          74,112
            GELU-123              [16, 65, 384]               0
         Dropout-124              [16, 65, 384]               0
          Linear-125              [16, 65, 192]          73,920
         Dropout-126              [16, 65, 192]               0
             MLP-127              [16, 65, 192]               0
           Block-128              [16, 65, 192]               0
       LayerNorm-129              [16, 65, 192]             384
          Linear-130              [16, 65, 576]         110,592
         Dropout-131           [16, 12, 65, 65]               0
          Linear-132              [16, 65, 192]          37,056
         Dropout-133              [16, 65, 192]               0
             MSA-134              [16, 65, 192]               0
       LayerNorm-135              [16, 65, 192]             384
          Linear-136              [16, 65, 384]          74,112
            GELU-137              [16, 65, 384]               0
         Dropout-138              [16, 65, 384]               0
          Linear-139              [16, 65, 192]          73,920
         Dropout-140              [16, 65, 192]               0
             MLP-141              [16, 65, 192]               0
           Block-142              [16, 65, 192]               0
       LayerNorm-143              [16, 65, 192]             384
          Linear-144              [16, 65, 576]         110,592
         Dropout-145           [16, 12, 65, 65]               0
          Linear-146              [16, 65, 192]          37,056
         Dropout-147              [16, 65, 192]               0
             MSA-148              [16, 65, 192]               0
       LayerNorm-149              [16, 65, 192]             384
          Linear-150              [16, 65, 384]          74,112
            GELU-151              [16, 65, 384]               0
         Dropout-152              [16, 65, 384]               0
          Linear-153              [16, 65, 192]          73,920
         Dropout-154              [16, 65, 192]               0
             MLP-155              [16, 65, 192]               0
           Block-156              [16, 65, 192]               0
       LayerNorm-157              [16, 65, 192]             384
          Linear-158              [16, 65, 576]         110,592
         Dropout-159           [16, 12, 65, 65]               0
          Linear-160              [16, 65, 192]          37,056
         Dropout-161              [16, 65, 192]               0
             MSA-162              [16, 65, 192]               0
       LayerNorm-163              [16, 65, 192]             384
          Linear-164              [16, 65, 384]          74,112
            GELU-165              [16, 65, 384]               0
         Dropout-166              [16, 65, 384]               0
          Linear-167              [16, 65, 192]          73,920
         Dropout-168              [16, 65, 192]               0
             MLP-169              [16, 65, 192]               0
           Block-170              [16, 65, 192]               0
       LayerNorm-171              [16, 65, 192]             384
          Linear-172               [16, 65, 10]           1,930
================================================================
Total params: 3,569,098
Trainable params: 3,569,098
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Forward/backward pass size (MB): 407.96
Params size (MB): 13.62
Estimated Total Size (MB): 421.76
----------------------------------------------------------------

반응형

댓글