diff --git a/automation/notebooks-table-data.csv b/automation/notebooks-table-data.csv
index fbe1be0..ab4e39f 100644
--- a/automation/notebooks-table-data.csv
+++ b/automation/notebooks-table-data.csv
@@ -2,3 +2,4 @@ display_name, notebook_name, github_repository_path, arxiv_index
Focal Loss,losses/focal-loss.ipynb,https://github.com/facebookresearch/Detectron,1708.02002
CELoss vs NLLLoss, losses/celoss-vs-nllloss.ipynb, ,
Network In Network, architectures/network-in-network.ipynb,,1312.4400
+Xception, architectures/xception.ipynb,,1610.02357v3
diff --git a/notebooks/architectures/xception.ipynb b/notebooks/architectures/xception.ipynb
new file mode 100644
index 0000000..c1f1ff8
--- /dev/null
+++ b/notebooks/architectures/xception.ipynb
@@ -0,0 +1,856 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "[](https://github.com/semilleroCV/deep-learning-notes)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "dftDiCQg3vYy"
+ },
+ "source": [
+ "# **Xception from scratch** "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#@title **Install required packages**\n",
+ "\n",
+ "%%capture\n",
+ "! pip install torchinfo"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 121
+ },
+ "id": "zEwblwS83vY1",
+ "outputId": "db2b27e3-fe45-4be0-fb46-b78167b5e019"
+ },
+ "outputs": [],
+ "source": [
+ "#@title **Import required libraries**.\n",
+ "\n",
+ "# Pytorch essentials\n",
+ "import torch # 2.2.1\n",
+ "import torch.nn as nn\n",
+ "from torchinfo import summary # 1.8.0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "torch: 2.2.1\n",
+ "torchinfo: 1.8.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f'torch: {torch.__version__}')\n",
+ "print(f'torchinfo: {torchinfo.__version__}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**The Xception architecture**: the data first goes through the entry flow, then through the middle flow which is repeated eight times, and finally through the exit flow. Note that all Convolution and SeparableConvolution layers are followed by batch normalization [7] (not included in the diagram). All SeparableConvolution layers use a depth multiplier of 1 (no depth expansion)\n",
+ "\n",
+ "In this architecture we first perform the 1x1 convolution and then the 3x3 separable convolution.\n",
+ "\n",
+ "
\n",
+ "\n",
+ "The entire architecture looks like this:\n",
+ "\n",
+ "
\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### **Entry flow**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class DoubleConvBlock(nn.Module):\n",
+ " def __init__(self, in_channels: int, out_channels: int):\n",
+ " super().__init__()\n",
+ " # Double convolutional block at the beginning of Xception\n",
+ " self.double_conv = nn.Sequential(\n",
+ " nn.Conv2d(in_channels, out_channels//2, kernel_size=3, stride=2, bias=False),\n",
+ " nn.BatchNorm2d(out_channels//2),\n",
+ " nn.ReLU(inplace=True),\n",
+ " nn.Conv2d(out_channels//2, out_channels, kernel_size=3, stride=1, bias=False),\n",
+ " nn.BatchNorm2d(out_channels),\n",
+ " nn.ReLU(inplace=True)\n",
+ " )\n",
+ "\n",
+ " def forward(self, x):\n",
+ " return self.double_conv(x)\n",
+ " \n",
+ "class SeparableConv2d(nn.Module):\n",
+ " def __init__(self, in_channels: int, out_channels: int):\n",
+ " super().__init__()\n",
+ "\n",
+ " self.depth_wise_conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, groups=in_channels, padding=1, bias=False)\n",
+ " self.one_by_one = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)\n",
+ "\n",
+ " def forward(self, x):\n",
+ " x = self.depth_wise_conv(x)\n",
+ " x = self.one_by_one(x)\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class XceptionModule(nn.Module):\n",
+ " def __init__(self, in_channels: int, out_channels: int, relu_at_start=True):\n",
+ " super().__init__()\n",
+ "\n",
+ " # first one by one\n",
+ " self.one_by_one = nn.Sequential(\n",
+ " nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2, bias=False),\n",
+ " nn.BatchNorm2d(out_channels)\n",
+ " )\n",
+ "\n",
+ " if relu_at_start:\n",
+ " self.double_depth_wise_conv = nn.Sequential(\n",
+ " nn.ReLU(inplace=False),\n",
+ " SeparableConv2d(in_channels, out_channels),\n",
+ " nn.BatchNorm2d(out_channels),\n",
+ " nn.ReLU(inplace=False),\n",
+ " SeparableConv2d(out_channels, out_channels),\n",
+ " nn.BatchNorm2d(out_channels),\n",
+ " nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n",
+ " )\n",
+ " else:\n",
+ " self.double_depth_wise_conv = nn.Sequential(\n",
+ " SeparableConv2d(in_channels, out_channels),\n",
+ " nn.BatchNorm2d(out_channels),\n",
+ " nn.ReLU(inplace=False),\n",
+ " SeparableConv2d(out_channels, out_channels),\n",
+ " nn.BatchNorm2d(out_channels),\n",
+ " nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n",
+ " )\n",
+ "\n",
+ " def forward(self, x):\n",
+ " x1 = self.one_by_one(x)\n",
+ " x2 =self.double_depth_wise_conv(x)\n",
+ " x = torch.add(x1, x2)\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class EntryFlowModule(nn.Module):\n",
+ " def __init__(self, in_channels: int, out_channels: int):\n",
+ " super().__init__()\n",
+ "\n",
+ " # 2d double convolution at start\n",
+ " self.double_conv = DoubleConvBlock(in_channels, 64)\n",
+ "\n",
+ " self.block1 = XceptionModule(64, 128, relu_at_start=False)\n",
+ " self.block2 = XceptionModule(128, 256)\n",
+ " self.block3 = XceptionModule(256, out_channels)\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.double_conv(x)\n",
+ " x = self.block1(x)\n",
+ " x = self.block2(x)\n",
+ " x = self.block3(x)\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Entrada: (torch.Size([2, 1, 299, 299]), {torch.float32})\n",
+ "Salida: (torch.Size([2, 728, 19, 19]), torch.float32)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Test the module to see if it gives the expected result.\n",
+ "\n",
+ "input_image = torch.rand([2, 1, 299, 299])\n",
+ "print(f\"Entrada: {input_image.size(), {input_image.dtype}}\")\n",
+ "model = EntryFlowModule(in_channels=1, out_channels=728)\n",
+ "ouput = model(input_image)\n",
+ "print(f\"Salida: {ouput.size(), ouput.dtype}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### **Middle flow**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class XceptionMiddleModule(nn.Module):\n",
+ " def __init__(self, in_channels: int, out_channels: int):\n",
+ " super().__init__()\n",
+ "\n",
+ " # triple separable conv 2d\n",
+ " self.triple_depth_wise_conv = nn.Sequential(\n",
+ " nn.ReLU(inplace=True),\n",
+ " SeparableConv2d(in_channels, out_channels),\n",
+ " nn.BatchNorm2d(out_channels),\n",
+ " nn.ReLU(inplace=True),\n",
+ " SeparableConv2d(out_channels, out_channels),\n",
+ " nn.BatchNorm2d(out_channels),\n",
+ " nn.ReLU(inplace=True),\n",
+ " SeparableConv2d(out_channels, out_channels),\n",
+ " nn.BatchNorm2d(out_channels),\n",
+ " )\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.triple_depth_wise_conv(x)\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class MiddleFlowModule(nn.Module):\n",
+ " def __init__(self, in_channels, out_channels):\n",
+ " super().__init__()\n",
+ " self.middle_flow = nn.Sequential()\n",
+ " for _ in range(8):\n",
+ " self.middle_flow.append(XceptionMiddleModule(in_channels, out_channels))\n",
+ "\n",
+ " def forward(self, x):\n",
+ " x = self.middle_flow(x)\n",
+ " return x\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Entrada: (torch.Size([2, 728, 19, 19]), {torch.float32})\n",
+ "Salida: (torch.Size([2, 728, 19, 19]), torch.float32)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Test the module to see if it gives the expected result.\n",
+ "\n",
+ "input_image = torch.rand([2, 728, 19, 19])\n",
+ "print(f\"Entrada: {input_image.size(), {input_image.dtype}}\")\n",
+ "model = MiddleFlowModule(in_channels=728, out_channels=728)\n",
+ "ouput = model(input_image)\n",
+ "print(f\"Salida: {ouput.size(), ouput.dtype}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### **Exit flow**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class XceptionExitModule(nn.Module):\n",
+ " def __init__(self, in_channels: int, out_channels: int):\n",
+ " super().__init__()\n",
+ "\n",
+ " # first one by one\n",
+ " self.one_by_one = nn.Sequential(\n",
+ " nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2, bias=False),\n",
+ " nn.BatchNorm2d(out_channels)\n",
+ " )\n",
+ "\n",
+ " self.double_depth_wise_conv = nn.Sequential(\n",
+ " nn.ReLU(inplace=False),\n",
+ " SeparableConv2d(in_channels, in_channels),\n",
+ " nn.BatchNorm2d(in_channels),\n",
+ " nn.ReLU(inplace=False),\n",
+ " SeparableConv2d(in_channels, out_channels),\n",
+ " nn.BatchNorm2d(out_channels),\n",
+ " nn.MaxPool2d(kernel_size=3, stride=2, padding=1),\n",
+ " )\n",
+ "\n",
+ " def forward(self, x):\n",
+ " x1 = self.one_by_one(x)\n",
+ " x2 =self.double_depth_wise_conv(x)\n",
+ " x = torch.add(x1, x2)\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class ExitFlowModule(nn.Module):\n",
+ " def __init__(self, in_channels, n_classes):\n",
+ " super().__init__()\n",
+ "\n",
+ " self.block1 = XceptionExitModule(in_channels, 1024)\n",
+ " self.block2 = nn.Sequential(\n",
+ " SeparableConv2d(1024, 1536),\n",
+ " nn.BatchNorm2d(1536),\n",
+ " nn.ReLU(inplace=True),\n",
+ " SeparableConv2d(1536, 2048),\n",
+ " nn.BatchNorm2d(2048),\n",
+ " nn.ReLU(inplace=True),\n",
+ " )\n",
+ "\n",
+ " self.gap = nn.AdaptiveAvgPool2d((1, 1))\n",
+ " self.last_fc = nn.Linear(2048, n_classes)\n",
+ " \n",
+ " def forward(self, x):\n",
+ " x = self.block1(x)\n",
+ " x = self.block2(x)\n",
+ " x = self.gap(x)\n",
+ " x = x.view(x.size(0), -1)\n",
+ " x = self.last_fc(x)\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Entrada: (torch.Size([2, 728, 19, 19]), {torch.float32})\n",
+ "Salida: (torch.Size([2, 1]), torch.float32)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Test the module to see if it gives the expected result.\n",
+ "\n",
+ "input_image = torch.rand([2, 728, 19, 19])\n",
+ "print(f\"Entrada: {input_image.size(), {input_image.dtype}}\")\n",
+ "model = ExitFlowModule(in_channels=728, n_classes=1)\n",
+ "ouput = model(input_image)\n",
+ "print(f\"Salida: {ouput.size(), ouput.dtype}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### **Full model**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "class Xception(nn.Module):\n",
+ " def __init__(self, n_channels, n_classes):\n",
+ " super().__init__()\n",
+ " self.entry_flow = EntryFlowModule(n_channels, 728)\n",
+ " self.middle_flow = MiddleFlowModule(728, 728)\n",
+ " self.exit_flow = ExitFlowModule(728, n_classes)\n",
+ "\n",
+ " def forward(self, x):\n",
+ " x = self.entry_flow(x)\n",
+ " x = self.middle_flow(x)\n",
+ " x = self.exit_flow(x)\n",
+ " return x"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ZjVVbqbIXB1e",
+ "outputId": "b1c9b45a-ef55-4fbf-9d76-d9753ecbf9ed"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Input: (torch.Size([2, 1, 299, 299]), torch.float32)\n",
+ "Ouput: (torch.Size([2, 1]), torch.float32)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Test the model to see if it gives the expected result.\n",
+ "\n",
+ "input_image = torch.rand([2, 1, 299, 299])\n",
+ "print(f\"Input: {input_image.size(), input_image.dtype}\")\n",
+ "model = Xception(n_channels=1, n_classes=1)\n",
+ "ouput = model(input_image)\n",
+ "print(f\"Ouput: {ouput.size(), ouput.dtype}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Xception(\n",
+ " (entry_flow): EntryFlowModule(\n",
+ " (double_conv): DoubleConvBlock(\n",
+ " (double_conv): Sequential(\n",
+ " (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)\n",
+ " (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (2): ReLU(inplace=True)\n",
+ " (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)\n",
+ " (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (5): ReLU(inplace=True)\n",
+ " )\n",
+ " )\n",
+ " (block1): XceptionModule(\n",
+ " (one_by_one): Sequential(\n",
+ " (0): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
+ " (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " (double_depth_wise_conv): Sequential(\n",
+ " (0): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)\n",
+ " (one_by_one): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (2): ReLU()\n",
+ " (3): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)\n",
+ " (one_by_one): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (5): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
+ " )\n",
+ " )\n",
+ " (block2): XceptionModule(\n",
+ " (one_by_one): Sequential(\n",
+ " (0): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
+ " (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " (double_depth_wise_conv): Sequential(\n",
+ " (0): ReLU()\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=128, bias=False)\n",
+ " (one_by_one): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU()\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)\n",
+ " (one_by_one): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
+ " )\n",
+ " )\n",
+ " (block3): XceptionModule(\n",
+ " (one_by_one): Sequential(\n",
+ " (0): Conv2d(256, 728, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
+ " (1): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " (double_depth_wise_conv): Sequential(\n",
+ " (0): ReLU()\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=256, bias=False)\n",
+ " (one_by_one): Conv2d(256, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU()\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " (middle_flow): MiddleFlowModule(\n",
+ " (middle_flow): Sequential(\n",
+ " (0): XceptionMiddleModule(\n",
+ " (triple_depth_wise_conv): Sequential(\n",
+ " (0): ReLU(inplace=True)\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU(inplace=True)\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): ReLU(inplace=True)\n",
+ " (7): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " )\n",
+ " (1): XceptionMiddleModule(\n",
+ " (triple_depth_wise_conv): Sequential(\n",
+ " (0): ReLU(inplace=True)\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU(inplace=True)\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): ReLU(inplace=True)\n",
+ " (7): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " )\n",
+ " (2): XceptionMiddleModule(\n",
+ " (triple_depth_wise_conv): Sequential(\n",
+ " (0): ReLU(inplace=True)\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU(inplace=True)\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): ReLU(inplace=True)\n",
+ " (7): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " )\n",
+ " (3): XceptionMiddleModule(\n",
+ " (triple_depth_wise_conv): Sequential(\n",
+ " (0): ReLU(inplace=True)\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU(inplace=True)\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): ReLU(inplace=True)\n",
+ " (7): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " )\n",
+ " (4): XceptionMiddleModule(\n",
+ " (triple_depth_wise_conv): Sequential(\n",
+ " (0): ReLU(inplace=True)\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU(inplace=True)\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): ReLU(inplace=True)\n",
+ " (7): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " )\n",
+ " (5): XceptionMiddleModule(\n",
+ " (triple_depth_wise_conv): Sequential(\n",
+ " (0): ReLU(inplace=True)\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU(inplace=True)\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): ReLU(inplace=True)\n",
+ " (7): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " )\n",
+ " (6): XceptionMiddleModule(\n",
+ " (triple_depth_wise_conv): Sequential(\n",
+ " (0): ReLU(inplace=True)\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU(inplace=True)\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): ReLU(inplace=True)\n",
+ " (7): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " )\n",
+ " (7): XceptionMiddleModule(\n",
+ " (triple_depth_wise_conv): Sequential(\n",
+ " (0): ReLU(inplace=True)\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU(inplace=True)\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): ReLU(inplace=True)\n",
+ " (7): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (8): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " )\n",
+ " (exit_flow): ExitFlowModule(\n",
+ " (block1): XceptionExitModule(\n",
+ " (one_by_one): Sequential(\n",
+ " (0): Conv2d(728, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
+ " (1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " )\n",
+ " (double_depth_wise_conv): Sequential(\n",
+ " (0): ReLU()\n",
+ " (1): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 728, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (2): BatchNorm2d(728, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (3): ReLU()\n",
+ " (4): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(728, 728, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=728, bias=False)\n",
+ " (one_by_one): Conv2d(728, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (5): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (6): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
+ " )\n",
+ " )\n",
+ " (block2): Sequential(\n",
+ " (0): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1024, bias=False)\n",
+ " (one_by_one): Conv2d(1024, 1536, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (1): BatchNorm2d(1536, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (2): ReLU(inplace=True)\n",
+ " (3): SeparableConv2d(\n",
+ " (depth_wise_conv): Conv2d(1536, 1536, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=1536, bias=False)\n",
+ " (one_by_one): Conv2d(1536, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
+ " )\n",
+ " (4): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
+ " (5): ReLU(inplace=True)\n",
+ " )\n",
+ " (gap): AdaptiveAvgPool2d(output_size=(1, 1))\n",
+ " (last_fc): Linear(in_features=2048, out_features=1, bias=True)\n",
+ " )\n",
+ ")\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(model)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "====================================================================================================\n",
+ "Layer (type:depth-idx) Output Shape Param #\n",
+ "====================================================================================================\n",
+ "Xception [2, 1] --\n",
+ "├─EntryFlowModule: 1-1 [2, 728, 19, 19] --\n",
+ "│ └─DoubleConvBlock: 2-1 [2, 64, 147, 147] --\n",
+ "│ │ └─Sequential: 3-1 [2, 64, 147, 147] 18,912\n",
+ "│ └─XceptionModule: 2-2 [2, 128, 74, 74] --\n",
+ "│ │ └─Sequential: 3-2 [2, 128, 74, 74] 8,448\n",
+ "│ │ └─Sequential: 3-3 [2, 128, 74, 74] 26,816\n",
+ "│ └─XceptionModule: 2-3 [2, 256, 37, 37] --\n",
+ "│ │ └─Sequential: 3-4 [2, 256, 37, 37] 33,280\n",
+ "│ │ └─Sequential: 3-5 [2, 256, 37, 37] 102,784\n",
+ "│ └─XceptionModule: 2-4 [2, 728, 19, 19] --\n",
+ "│ │ └─Sequential: 3-6 [2, 728, 19, 19] 187,824\n",
+ "│ │ └─Sequential: 3-7 [2, 728, 19, 19] 728,120\n",
+ "├─MiddleFlowModule: 1-2 [2, 728, 19, 19] --\n",
+ "│ └─Sequential: 2-5 [2, 728, 19, 19] --\n",
+ "│ │ └─XceptionMiddleModule: 3-8 [2, 728, 19, 19] 1,613,976\n",
+ "│ │ └─XceptionMiddleModule: 3-9 [2, 728, 19, 19] 1,613,976\n",
+ "│ │ └─XceptionMiddleModule: 3-10 [2, 728, 19, 19] 1,613,976\n",
+ "│ │ └─XceptionMiddleModule: 3-11 [2, 728, 19, 19] 1,613,976\n",
+ "│ │ └─XceptionMiddleModule: 3-12 [2, 728, 19, 19] 1,613,976\n",
+ "│ │ └─XceptionMiddleModule: 3-13 [2, 728, 19, 19] 1,613,976\n",
+ "│ │ └─XceptionMiddleModule: 3-14 [2, 728, 19, 19] 1,613,976\n",
+ "│ │ └─XceptionMiddleModule: 3-15 [2, 728, 19, 19] 1,613,976\n",
+ "├─ExitFlowModule: 1-3 [2, 1] --\n",
+ "│ └─XceptionExitModule: 2-6 [2, 1024, 10, 10] --\n",
+ "│ │ └─Sequential: 3-16 [2, 1024, 10, 10] 747,520\n",
+ "│ │ └─Sequential: 3-17 [2, 1024, 10, 10] 1,292,064\n",
+ "│ └─Sequential: 2-7 [2, 2048, 10, 10] --\n",
+ "│ │ └─SeparableConv2d: 3-18 [2, 1536, 10, 10] 1,582,080\n",
+ "│ │ └─BatchNorm2d: 3-19 [2, 1536, 10, 10] 3,072\n",
+ "│ │ └─ReLU: 3-20 [2, 1536, 10, 10] --\n",
+ "│ │ └─SeparableConv2d: 3-21 [2, 2048, 10, 10] 3,159,552\n",
+ "│ │ └─BatchNorm2d: 3-22 [2, 2048, 10, 10] 4,096\n",
+ "│ │ └─ReLU: 3-23 [2, 2048, 10, 10] --\n",
+ "│ └─AdaptiveAvgPool2d: 2-8 [2, 2048, 1, 1] --\n",
+ "│ └─Linear: 2-9 [2, 1] 2,049\n",
+ "====================================================================================================\n",
+ "Total params: 20,808,425\n",
+ "Trainable params: 20,808,425\n",
+ "Non-trainable params: 0\n",
+ "Total mult-adds (G): 16.69\n",
+ "====================================================================================================\n",
+ "Input size (MB): 0.72\n",
+ "Forward/backward pass size (MB): 911.39\n",
+ "Params size (MB): 83.23\n",
+ "Estimated Total Size (MB): 995.34\n",
+ "===================================================================================================="
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "summary(model, input_image.size())"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}