From 47bff6348dd41b7428706725d69a3096a037611f Mon Sep 17 00:00:00 2001 From: Guillermo Pinto Ruiz Date: Mon, 24 Jun 2024 07:26:57 -0500 Subject: [PATCH] Add Xception on architectures (#4) * Add Xception on architectures * add torchinfo * added in CSV * add torchinfo version --- automation/notebooks-table-data.csv | 1 + notebooks/architectures/xception.ipynb | 856 +++++++++++++++++++++++++ 2 files changed, 857 insertions(+) create mode 100644 notebooks/architectures/xception.ipynb diff --git a/automation/notebooks-table-data.csv b/automation/notebooks-table-data.csv index fbe1be0..ab4e39f 100644 --- a/automation/notebooks-table-data.csv +++ b/automation/notebooks-table-data.csv @@ -2,3 +2,4 @@ display_name, notebook_name, github_repository_path, arxiv_index Focal Loss,losses/focal-loss.ipynb,https://github.com/facebookresearch/Detectron,1708.02002 CELoss vs NLLLoss, losses/celoss-vs-nllloss.ipynb, , Network In Network, architectures/network-in-network.ipynb,,1312.4400 +Xception, architectures/xception.ipynb,,1610.02357v3 diff --git a/notebooks/architectures/xception.ipynb b/notebooks/architectures/xception.ipynb new file mode 100644 index 0000000..c1f1ff8 --- /dev/null +++ b/notebooks/architectures/xception.ipynb @@ -0,0 +1,856 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![deep-learning-notes](https://github.com/semilleroCV/deep-learning-notes/raw/main/assets/banner-notebook.png)](https://github.com/semilleroCV/deep-learning-notes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dftDiCQg3vYy" + }, + "source": [ + "# **Xception from scratch** " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#@title **Install required packages**\n", + "\n", + "%%capture\n", + "! pip install torchinfo" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 121 + }, + "id": "zEwblwS83vY1", + "outputId": "db2b27e3-fe45-4be0-fb46-b78167b5e019" + }, + "outputs": [], + "source": [ + "#@title **Import required libraries**.\n", + "\n", + "# Pytorch essentials\n", + "import torch # 2.2.1\n", + "import torch.nn as nn\n", + "from torchinfo import summary # 1.8.0" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch: 2.2.1\n", + "torchinfo: 1.8.0\n" + ] + } + ], + "source": [ + "print(f'torch: {torch.__version__}')\n", + "print(f'torchinfo: {torchinfo.__version__}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**The Xception architecture**: the data first goes through the entry flow, then through the middle flow which is repeated eight times, and finally through the exit flow. Note that all Convolution and SeparableConvolution layers are followed by batch normalization [7] (not included in the diagram). All SeparableConvolution layers use a depth multiplier of 1 (no depth expansion)\n", + "\n", + "In this architecture we first perform the 1x1 convolution and then the 3x3 separable convolution.\n", + "\n", + "
\n", + "\n", + "The entire architecture looks like this:\n", + "\n", + "
\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Entry flow**" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class DoubleConvBlock(nn.Module):\n", + " def __init__(self, in_channels: int, out_channels: int):\n", + " super().__init__()\n", + " # Double convolutional block at the beginning of Xception\n", + " self.double_conv = nn.Sequential(\n", + " nn.Conv2d(in_channels, out_channels//2, kernel_size=3, stride=2, bias=False),\n", + " nn.BatchNorm2d(out_channels//2),\n", + " nn.ReLU(inplace=True),\n", + " nn.Conv2d(out_channels//2, out_channels, kernel_size=3, stride=1, bias=False),\n", + " nn.BatchNorm2d(out_channels),\n", + " nn.ReLU(inplace=True)\n", + " )\n", + "\n", + " def forward(self, x):\n", + " return self.double_conv(x)\n", + " \n", + "class SeparableConv2d(nn.Module):\n", + " def __init__(self, in_channels: int, out_channels: int):\n", + " super().__init__()\n", + "\n", + " self.depth_wise_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, groups=in_channels, padding=1, bias=False)\n", + " self.one_by_one = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)\n", + "\n", + " def forward(self, x):\n", + " x = self.depth_wise_conv(x)\n", + " x = self.one_by_one(x)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "class XceptionModule(nn.Module):\n", + " def __init__(self, in_channels: int, out_channels: int, relu_at_start=True):\n", + " super().__init__()\n", + "\n", + " # first one by one\n", + " self.one_by_one = nn.Sequential(\n", + " nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2, bias=False),\n", + " nn.BatchNorm2d(out_channels)\n", + " )\n", + "\n", + " if relu_at_start:\n", + " self.double_depth_wise_conv = nn.Sequential(\n", + " nn.ReLU(inplace=False),\n", + " SeparableConv2d(in_channels, out_channels),\n", + " nn.BatchNorm2d(out_channels),\n", + " nn.ReLU(inplace=False),\n", + " SeparableConv2d(out_channels, out_channels),\n", + " nn.BatchNorm2d(out_channels),\n", + " nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n", + " )\n", + " else:\n", + " self.double_depth_wise_conv = nn.Sequential(\n", + " SeparableConv2d(in_channels, out_channels),\n", + " nn.BatchNorm2d(out_channels),\n", + " nn.ReLU(inplace=False),\n", + " SeparableConv2d(out_channels, out_channels),\n", + " nn.BatchNorm2d(out_channels),\n", + " nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n", + " )\n", + "\n", + " def forward(self, x):\n", + " x1 = self.one_by_one(x)\n", + " x2 =self.double_depth_wise_conv(x)\n", + " x = torch.add(x1, x2)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "class EntryFlowModule(nn.Module):\n", + " def __init__(self, in_channels: int, out_channels: int):\n", + " super().__init__()\n", + "\n", + " # 2d double convolution at start\n", + " self.double_conv = DoubleConvBlock(in_channels, 64)\n", + "\n", + " self.block1 = XceptionModule(64, 128, relu_at_start=False)\n", + " self.block2 = XceptionModule(128, 256)\n", + " self.block3 = XceptionModule(256, out_channels)\n", + " \n", + " def forward(self, x):\n", + " x = self.double_conv(x)\n", + " x = self.block1(x)\n", + " x = self.block2(x)\n", + " x = self.block3(x)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Entrada: (torch.Size([2, 1, 299, 299]), {torch.float32})\n", + "Salida: (torch.Size([2, 728, 19, 19]), torch.float32)\n" + ] + } + ], + "source": [ + "# Test the module to see if it gives the expected result.\n", + "\n", + "input_image = torch.rand([2, 1, 299, 299])\n", + "print(f\"Entrada: {input_image.size(), {input_image.dtype}}\")\n", + "model = EntryFlowModule(in_channels=1, out_channels=728)\n", + "ouput = model(input_image)\n", + "print(f\"Salida: {ouput.size(), ouput.dtype}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Middle flow**" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "class XceptionMiddleModule(nn.Module):\n", + " def __init__(self, in_channels: int, out_channels: int):\n", + " super().__init__()\n", + "\n", + " # triple separable conv 2d\n", + " self.triple_depth_wise_conv = nn.Sequential(\n", + " nn.ReLU(inplace=True),\n", + " SeparableConv2d(in_channels, out_channels),\n", + " nn.BatchNorm2d(out_channels),\n", + " nn.ReLU(inplace=True),\n", + " SeparableConv2d(out_channels, out_channels),\n", + " nn.BatchNorm2d(out_channels),\n", + " nn.ReLU(inplace=True),\n", + " SeparableConv2d(out_channels, out_channels),\n", + " nn.BatchNorm2d(out_channels),\n", + " )\n", + " \n", + " def forward(self, x):\n", + " x = self.triple_depth_wise_conv(x)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "class MiddleFlowModule(nn.Module):\n", + " def __init__(self, in_channels, out_channels):\n", + " super().__init__()\n", + " self.middle_flow = nn.Sequential()\n", + " for _ in range(8):\n", + " self.middle_flow.append(XceptionMiddleModule(in_channels, out_channels))\n", + "\n", + " def forward(self, x):\n", + " x = self.middle_flow(x)\n", + " return x\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Entrada: (torch.Size([2, 728, 19, 19]), {torch.float32})\n", + "Salida: (torch.Size([2, 728, 19, 19]), torch.float32)\n" + ] + } + ], + "source": [ + "# Test the module to see if it gives the expected result.\n", + "\n", + "input_image = torch.rand([2, 728, 19, 19])\n", + "print(f\"Entrada: {input_image.size(), {input_image.dtype}}\")\n", + "model = MiddleFlowModule(in_channels=728, out_channels=728)\n", + "ouput = model(input_image)\n", + "print(f\"Salida: {ouput.size(), ouput.dtype}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Exit flow**" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "class XceptionExitModule(nn.Module):\n", + " def __init__(self, in_channels: int, out_channels: int):\n", + " super().__init__()\n", + "\n", + " # first one by one\n", + " self.one_by_one = nn.Sequential(\n", + " nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2, bias=False),\n", + " nn.BatchNorm2d(out_channels)\n", + " )\n", + "\n", + " self.double_depth_wise_conv = nn.Sequential(\n", + " nn.ReLU(inplace=False),\n", + " SeparableConv2d(in_channels, in_channels),\n", + " nn.BatchNorm2d(in_channels),\n", + " nn.ReLU(inplace=False),\n", + " SeparableConv2d(in_channels, out_channels),\n", + " nn.BatchNorm2d(out_channels),\n", + " nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n", + " )\n", + "\n", + " def forward(self, x):\n", + " x1 = self.one_by_one(x)\n", + " x2 =self.double_depth_wise_conv(x)\n", + " x = torch.add(x1, x2)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "class ExitFlowModule(nn.Module):\n", + " def __init__(self, in_channels, n_classes):\n", + " super().__init__()\n", + "\n", + " self.block1 = XceptionExitModule(in_channels, 1024)\n", + " self.block2 = nn.Sequential(\n", + " SeparableConv2d(1024, 1536),\n", + " nn.BatchNorm2d(1536),\n", + " nn.ReLU(inplace=True),\n", + " SeparableConv2d(1536, 2048),\n", + " nn.BatchNorm2d(2048),\n", + " nn.ReLU(inplace=True),\n", + " )\n", + "\n", + " self.gap = nn.AdaptiveAvgPool2d((1, 1))\n", + " self.last_fc = nn.Linear(2048, n_classes)\n", + " \n", + " def forward(self, x):\n", + " x = self.block1(x)\n", + " x = self.block2(x)\n", + " x = self.gap(x)\n", + " x = x.view(x.size(0), -1)\n", + " x = self.last_fc(x)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Entrada: (torch.Size([2, 728, 19, 19]), {torch.float32})\n", + "Salida: (torch.Size([2, 1]), torch.float32)\n" + ] + } + ], + "source": [ + "# Test the module to see if it gives the expected result.\n", + "\n", + "input_image = torch.rand([2, 728, 19, 19])\n", + "print(f\"Entrada: {input_image.size(), {input_image.dtype}}\")\n", + "model = ExitFlowModule(in_channels=728, n_classes=1)\n", + "ouput = model(input_image)\n", + "print(f\"Salida: {ouput.size(), ouput.dtype}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### **Full model**" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "class Xception(nn.Module):\n", + " def __init__(self, n_channels, n_classes):\n", + " super().__init__()\n", + " self.entry_flow = EntryFlowModule(n_channels, 728)\n", + " self.middle_flow = MiddleFlowModule(728, 728)\n", + " self.exit_flow = ExitFlowModule(728, n_classes)\n", + "\n", + " def forward(self, x):\n", + " x = self.entry_flow(x)\n", + " x = self.middle_flow(x)\n", + " x = self.exit_flow(x)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZjVVbqbIXB1e", + "outputId": "b1c9b45a-ef55-4fbf-9d76-d9753ecbf9ed" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input: (torch.Size([2, 1, 299, 299]), torch.float32)\n", + "Ouput: (torch.Size([2, 1]), torch.float32)\n" + ] + } + ], + "source": [ + "# Test the model to see if it gives the expected result.\n", + "\n", + "input_image = torch.rand([2, 1, 299, 299])\n", + "print(f\"Input: {input_image.size(), input_image.dtype}\")\n", + "model = Xception(n_channels=1, n_classes=1)\n", + "ouput = model(input_image)\n", + "print(f\"Ouput: {ouput.size(), ouput.dtype}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Xception(\n", + " (entry_flow): EntryFlowModule(\n", + " (double_conv): DoubleConvBlock(\n", + " (double_conv): Sequential(\n", + " (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)\n", + " (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)\n", + " (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU(inplace=True)\n", + " )\n", + " )\n", + " (block1): XceptionModule(\n", + " (one_by_one): Sequential(\n", + " (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", + " (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (double_depth_wise_conv): Sequential(\n", + " (0): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)\n", + " (one_by_one): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU()\n", + " (3): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)\n", + " (one_by_one): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", + " )\n", + " )\n", + " (block2): XceptionModule(\n", + " (one_by_one): Sequential(\n", + " (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", + " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (double_depth_wise_conv): Sequential(\n", + " (0): ReLU()\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)\n", + " (one_by_one): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU()\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)\n", + " (one_by_one): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", + " )\n", + " )\n", + " (block3): XceptionModule(\n", + " (one_by_one): Sequential(\n", + " (0): Conv2d(256, 728, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", + " (1): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (double_depth_wise_conv): Sequential(\n", + " (0): ReLU()\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)\n", + " (one_by_one): Conv2d(256, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU()\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", + " )\n", + " )\n", + " )\n", + " (middle_flow): MiddleFlowModule(\n", + " (middle_flow): Sequential(\n", + " (0): XceptionMiddleModule(\n", + " (triple_depth_wise_conv): Sequential(\n", + " (0): ReLU(inplace=True)\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU(inplace=True)\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): ReLU(inplace=True)\n", + " (7): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (1): XceptionMiddleModule(\n", + " (triple_depth_wise_conv): Sequential(\n", + " (0): ReLU(inplace=True)\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU(inplace=True)\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): ReLU(inplace=True)\n", + " (7): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (2): XceptionMiddleModule(\n", + " (triple_depth_wise_conv): Sequential(\n", + " (0): ReLU(inplace=True)\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU(inplace=True)\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): ReLU(inplace=True)\n", + " (7): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (3): XceptionMiddleModule(\n", + " (triple_depth_wise_conv): Sequential(\n", + " (0): ReLU(inplace=True)\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU(inplace=True)\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): ReLU(inplace=True)\n", + " (7): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (4): XceptionMiddleModule(\n", + " (triple_depth_wise_conv): Sequential(\n", + " (0): ReLU(inplace=True)\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU(inplace=True)\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): ReLU(inplace=True)\n", + " (7): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (5): XceptionMiddleModule(\n", + " (triple_depth_wise_conv): Sequential(\n", + " (0): ReLU(inplace=True)\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU(inplace=True)\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): ReLU(inplace=True)\n", + " (7): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (6): XceptionMiddleModule(\n", + " (triple_depth_wise_conv): Sequential(\n", + " (0): ReLU(inplace=True)\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU(inplace=True)\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): ReLU(inplace=True)\n", + " (7): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " (7): XceptionMiddleModule(\n", + " (triple_depth_wise_conv): Sequential(\n", + " (0): ReLU(inplace=True)\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU(inplace=True)\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): ReLU(inplace=True)\n", + " (7): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (exit_flow): ExitFlowModule(\n", + " (block1): XceptionExitModule(\n", + " (one_by_one): Sequential(\n", + " (0): Conv2d(728, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", + " (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (double_depth_wise_conv): Sequential(\n", + " (0): ReLU()\n", + " (1): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (3): ReLU()\n", + " (4): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n", + " (one_by_one): Conv2d(728, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (5): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (6): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n", + " )\n", + " )\n", + " (block2): Sequential(\n", + " (0): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024, bias=False)\n", + " (one_by_one): Conv2d(1024, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (1): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (2): ReLU(inplace=True)\n", + " (3): SeparableConv2d(\n", + " (depth_wise_conv): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536, bias=False)\n", + " (one_by_one): Conv2d(1536, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", + " )\n", + " (4): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (5): ReLU(inplace=True)\n", + " )\n", + " (gap): AdaptiveAvgPool2d(output_size=(1, 1))\n", + " (last_fc): Linear(in_features=2048, out_features=1, bias=True)\n", + " )\n", + ")\n" + ] + } + ], + "source": [ + "print(model)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "====================================================================================================\n", + "Layer (type:depth-idx) Output Shape Param #\n", + "====================================================================================================\n", + "Xception [2, 1] --\n", + "├─EntryFlowModule: 1-1 [2, 728, 19, 19] --\n", + "│ └─DoubleConvBlock: 2-1 [2, 64, 147, 147] --\n", + "│ │ └─Sequential: 3-1 [2, 64, 147, 147] 18,912\n", + "│ └─XceptionModule: 2-2 [2, 128, 74, 74] --\n", + "│ │ └─Sequential: 3-2 [2, 128, 74, 74] 8,448\n", + "│ │ └─Sequential: 3-3 [2, 128, 74, 74] 26,816\n", + "│ └─XceptionModule: 2-3 [2, 256, 37, 37] --\n", + "│ │ └─Sequential: 3-4 [2, 256, 37, 37] 33,280\n", + "│ │ └─Sequential: 3-5 [2, 256, 37, 37] 102,784\n", + "│ └─XceptionModule: 2-4 [2, 728, 19, 19] --\n", + "│ │ └─Sequential: 3-6 [2, 728, 19, 19] 187,824\n", + "│ │ └─Sequential: 3-7 [2, 728, 19, 19] 728,120\n", + "├─MiddleFlowModule: 1-2 [2, 728, 19, 19] --\n", + "│ └─Sequential: 2-5 [2, 728, 19, 19] --\n", + "│ │ └─XceptionMiddleModule: 3-8 [2, 728, 19, 19] 1,613,976\n", + "│ │ └─XceptionMiddleModule: 3-9 [2, 728, 19, 19] 1,613,976\n", + "│ │ └─XceptionMiddleModule: 3-10 [2, 728, 19, 19] 1,613,976\n", + "│ │ └─XceptionMiddleModule: 3-11 [2, 728, 19, 19] 1,613,976\n", + "│ │ └─XceptionMiddleModule: 3-12 [2, 728, 19, 19] 1,613,976\n", + "│ │ └─XceptionMiddleModule: 3-13 [2, 728, 19, 19] 1,613,976\n", + "│ │ └─XceptionMiddleModule: 3-14 [2, 728, 19, 19] 1,613,976\n", + "│ │ └─XceptionMiddleModule: 3-15 [2, 728, 19, 19] 1,613,976\n", + "├─ExitFlowModule: 1-3 [2, 1] --\n", + "│ └─XceptionExitModule: 2-6 [2, 1024, 10, 10] --\n", + "│ │ └─Sequential: 3-16 [2, 1024, 10, 10] 747,520\n", + "│ │ └─Sequential: 3-17 [2, 1024, 10, 10] 1,292,064\n", + "│ └─Sequential: 2-7 [2, 2048, 10, 10] --\n", + "│ │ └─SeparableConv2d: 3-18 [2, 1536, 10, 10] 1,582,080\n", + "│ │ └─BatchNorm2d: 3-19 [2, 1536, 10, 10] 3,072\n", + "│ │ └─ReLU: 3-20 [2, 1536, 10, 10] --\n", + "│ │ └─SeparableConv2d: 3-21 [2, 2048, 10, 10] 3,159,552\n", + "│ │ └─BatchNorm2d: 3-22 [2, 2048, 10, 10] 4,096\n", + "│ │ └─ReLU: 3-23 [2, 2048, 10, 10] --\n", + "│ └─AdaptiveAvgPool2d: 2-8 [2, 2048, 1, 1] --\n", + "│ └─Linear: 2-9 [2, 1] 2,049\n", + "====================================================================================================\n", + "Total params: 20,808,425\n", + "Trainable params: 20,808,425\n", + "Non-trainable params: 0\n", + "Total mult-adds (G): 16.69\n", + "====================================================================================================\n", + "Input size (MB): 0.72\n", + "Forward/backward pass size (MB): 911.39\n", + "Params size (MB): 83.23\n", + "Estimated Total Size (MB): 995.34\n", + "====================================================================================================" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "summary(model, input_image.size())" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}