diff --git a/automation/notebooks-table-data.csv b/automation/notebooks-table-data.csv index 54b4dd0..1f7641f 100644 --- a/automation/notebooks-table-data.csv +++ b/automation/notebooks-table-data.csv @@ -5,3 +5,4 @@ Network In Network, architectures/network-in-network.ipynb,,1312.4400 Xception, architectures/xception.ipynb,,1610.02357v3 Pixel Shuffle, modules/pixel-shuffle.ipynb,,1609.05158 U-Net, architectures/unet.ipynb,,1505.04597 +Mobilenet V1, architectures/mobilenetv1.ipynb,,1704.04861 diff --git a/notebooks/architectures/mobilenetv1.ipynb b/notebooks/architectures/mobilenetv1.ipynb new file mode 100644 index 0000000..0ca1bec --- /dev/null +++ b/notebooks/architectures/mobilenetv1.ipynb @@ -0,0 +1,238 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[![deep-learning-notes](https://github.com/semilleroCV/deep-learning-notes/raw/main/assets/banner-notebook.png)](https://github.com/semilleroCV/deep-learning-notes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MobileNet V1" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "#@title **Install required packages**\n", + "\n", + "! pip install torchinfo" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "#@title **Importing libraries**\n", + "\n", + "import torch # 2.3.1+cu121\n", + "import torchinfo #1.8.0\n", + "\n", + "import torch.nn as nn\n", + "from torch import Tensor" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch version: 2.3.1+cu121\n", + "torchinfo version: 1.8.0\n" + ] + } + ], + "source": [ + "# Note: Not all dependencies have the __version__ method.\n", + "\n", + "print(f\"torch version: {torch.__version__}\")\n", + "print(f\"torchinfo version: {torchinfo.__version__}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Mobilenet V1 architecture code" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "class ConvBlock(nn.Module):\n", + " def __init__(self, in_channels: int, out_channels: int, stride: int):\n", + " super(ConvBlock, self).__init__()\n", + "\n", + " self.conv_blk = nn.Sequential(\n", + " nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),\n", + " nn.BatchNorm2d(out_channels),\n", + " nn.ReLU(),\n", + " )\n", + "\n", + " def forward(self, x):\n", + " return self.conv_blk(x)\n", + "\n", + "\n", + "class DepthwiseConvBlock(nn.Module):\n", + " def __init__(self, in_channels: int, out_channels: int, stride: int):\n", + " super(DepthwiseConvBlock, self).__init__()\n", + "\n", + " self.depthwise_conv_blk = nn.Sequential(\n", + " nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False),\n", + " nn.BatchNorm2d(in_channels),\n", + " nn.ReLU(inplace=True),\n", + " nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),\n", + " nn.BatchNorm2d(out_channels),\n", + " nn.ReLU(inplace=True),\n", + " )\n", + "\n", + " def forward(self, x):\n", + " return self.depthwise_conv_blk(x)\n", + "\n", + "\n", + "class MobileNetV1(nn.Module):\n", + " def __init__(self, layer_config: list, depth_multiplier: int, num_classes: int = 1000):\n", + " super(MobileNetV1, self).__init__()\n", + "\n", + " \"\"\"depth multiplier is also called width_multiplier or alpha\"\"\"\n", + "\n", + " self.model = nn.Sequential()\n", + "\n", + " self.model.add_module('conv_blk_1', ConvBlock(3, 32, 2))\n", + "\n", + " for idx, params in enumerate(layer_config):\n", + " \"\"\"layer_params: List -> (in_channels, out_channels, stride)\"\"\"\n", + " self.model.add_module(f\"dw_blk_{idx}\",DepthwiseConvBlock(int(params[0]*depth_multiplier),\n", + " (params[1]*depth_multiplier), params[2]))\n", + " \n", + " self.model.add_module('pool', nn.AdaptiveAvgPool2d(1))\n", + " self.model.add_module('flatten', nn.Flatten())\n", + " self.model.add_module('fc',nn.Linear(1024, num_classes))\n", + "\n", + " def forward(self, x):\n", + " return self.model(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "==========================================================================================\n", + "Layer (type:depth-idx) Output Shape Param #\n", + "==========================================================================================\n", + "MobileNetV1 [1, 1000] --\n", + "├─Sequential: 1-1 [1, 1000] --\n", + "│ └─ConvBlock: 2-1 [1, 32, 112, 112] --\n", + "│ │ └─Sequential: 3-1 [1, 32, 112, 112] 928\n", + "│ └─DepthwiseConvBlock: 2-2 [1, 64, 112, 112] --\n", + "│ │ └─Sequential: 3-2 [1, 64, 112, 112] 2,528\n", + "│ └─DepthwiseConvBlock: 2-3 [1, 128, 56, 56] --\n", + "│ │ └─Sequential: 3-3 [1, 128, 56, 56] 9,152\n", + "│ └─DepthwiseConvBlock: 2-4 [1, 128, 56, 56] --\n", + "│ │ └─Sequential: 3-4 [1, 128, 56, 56] 18,048\n", + "│ └─DepthwiseConvBlock: 2-5 [1, 256, 28, 28] --\n", + "│ │ └─Sequential: 3-5 [1, 256, 28, 28] 34,688\n", + "│ └─DepthwiseConvBlock: 2-6 [1, 256, 28, 28] --\n", + "│ │ └─Sequential: 3-6 [1, 256, 28, 28] 68,864\n", + "│ └─DepthwiseConvBlock: 2-7 [1, 512, 14, 14] --\n", + "│ │ └─Sequential: 3-7 [1, 512, 14, 14] 134,912\n", + "│ └─DepthwiseConvBlock: 2-8 [1, 512, 14, 14] --\n", + "│ │ └─Sequential: 3-8 [1, 512, 14, 14] 268,800\n", + "│ └─DepthwiseConvBlock: 2-9 [1, 512, 14, 14] --\n", + "│ │ └─Sequential: 3-9 [1, 512, 14, 14] 268,800\n", + "│ └─DepthwiseConvBlock: 2-10 [1, 512, 14, 14] --\n", + "│ │ └─Sequential: 3-10 [1, 512, 14, 14] 268,800\n", + "│ └─DepthwiseConvBlock: 2-11 [1, 512, 14, 14] --\n", + "│ │ └─Sequential: 3-11 [1, 512, 14, 14] 268,800\n", + "│ └─DepthwiseConvBlock: 2-12 [1, 512, 14, 14] --\n", + "│ │ └─Sequential: 3-12 [1, 512, 14, 14] 268,800\n", + "│ └─DepthwiseConvBlock: 2-13 [1, 1024, 7, 7] --\n", + "│ │ └─Sequential: 3-13 [1, 1024, 7, 7] 531,968\n", + "│ └─DepthwiseConvBlock: 2-14 [1, 1024, 7, 7] --\n", + "│ │ └─Sequential: 3-14 [1, 1024, 7, 7] 1,061,888\n", + "│ └─AdaptiveAvgPool2d: 2-15 [1, 1024, 1, 1] --\n", + "│ └─Flatten: 2-16 [1, 1024] --\n", + "│ └─Linear: 2-17 [1, 1000] 1,025,000\n", + "==========================================================================================\n", + "Total params: 4,231,976\n", + "Trainable params: 4,231,976\n", + "Non-trainable params: 0\n", + "Total mult-adds (M): 568.76\n", + "==========================================================================================\n", + "Input size (MB): 0.60\n", + "Forward/backward pass size (MB): 80.69\n", + "Params size (MB): 16.93\n", + "Estimated Total Size (MB): 98.22\n", + "==========================================================================================" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Declare parameters for depth-wise separable convolution layers -> (in_channels, out_channels, stride)\n", + "dw_params = [\n", + " (32, 64, 1),\n", + " (64, 128, 2),\n", + " (128, 128, 1),\n", + " (128, 256, 2),\n", + " (256, 256, 1),\n", + " (256, 512, 2),\n", + " (512, 512, 1),\n", + " (512, 512, 1),\n", + " (512, 512, 1),\n", + " (512, 512, 1),\n", + " (512, 512, 1),\n", + " (512, 1024, 2),\n", + " (1024, 1024, 1),\n", + "]\n", + "\n", + "model = MobileNetV1(layer_config=dw_params, depth_multiplier=1, num_classes=1000)\n", + "torchinfo.summary(model, (3, 224, 224), batch_dim = 0)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}