semilleroCV · guillepinto · Jun 29, 2024 · Jun 29, 2024 · Jun 29, 2024
@@ -5,3 +5,4 @@ Network In Network, architectures/network-in-network.ipynb,,1312.4400
 Xception, architectures/xception.ipynb,,1610.02357v3
 Pixel Shuffle, modules/pixel-shuffle.ipynb,,1609.05158
 U-Net, architectures/unet.ipynb,,1505.04597
+Mobilenet V1, architectures/mobilenetv1.ipynb,,1704.04861
@@ -0,0 +1,238 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![deep-learning-notes](https://github.com/semilleroCV/deep-learning-notes/raw/main/assets/banner-notebook.png)](https://github.com/semilleroCV/deep-learning-notes)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## MobileNet V1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "#@title **Install required packages**\n",
+    "\n",
+    "! pip install torchinfo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#@title **Importing libraries**\n",
+    "\n",
+    "import torch # 2.3.1+cu121\n",
+    "import torchinfo #1.8.0\n",
+    "\n",
+    "import torch.nn as nn\n",
+    "from torch import Tensor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch version: 2.3.1+cu121\n",
+      "torchinfo version: 1.8.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Note: Not all dependencies have the __version__ method.\n",
+    "\n",
+    "print(f\"torch version: {torch.__version__}\")\n",
+    "print(f\"torchinfo version: {torchinfo.__version__}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Mobilenet V1 architecture code"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ConvBlock(nn.Module):\n",
+    "    def __init__(self, in_channels: int, out_channels: int, stride: int):\n",
+    "        super(ConvBlock, self).__init__()\n",
+    "\n",
+    "        self.conv_blk = nn.Sequential(\n",
+    "            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),\n",
+    "            nn.BatchNorm2d(out_channels),\n",
+    "            nn.ReLU(),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.conv_blk(x)\n",
+    "\n",
+    "\n",
+    "class DepthwiseConvBlock(nn.Module):\n",
+    "    def __init__(self, in_channels: int, out_channels: int, stride: int):\n",
+    "        super(DepthwiseConvBlock, self).__init__()\n",
+    "\n",
+    "        self.depthwise_conv_blk = nn.Sequential(\n",
+    "            nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels, bias=False),\n",
+    "            nn.BatchNorm2d(in_channels),\n",
+    "            nn.ReLU(inplace=True),\n",
+    "            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),\n",
+    "            nn.BatchNorm2d(out_channels),\n",
+    "            nn.ReLU(inplace=True),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.depthwise_conv_blk(x)\n",
+    "\n",
+    "\n",
+    "class MobileNetV1(nn.Module):\n",
+    "    def __init__(self, layer_config: list, depth_multiplier: int, num_classes: int = 1000):\n",
+    "        super(MobileNetV1, self).__init__()\n",
+    "\n",
+    "        \"\"\"depth multiplier is also called width_multiplier or alpha\"\"\"\n",
+    "\n",
+    "        self.model = nn.Sequential()\n",
+    "\n",
+    "        self.model.add_module('conv_blk_1', ConvBlock(3, 32, 2))\n",
+    "\n",
+    "        for idx, params in enumerate(layer_config):\n",
+    "          \"\"\"layer_params: List -> (in_channels, out_channels, stride)\"\"\"\n",
+    "          self.model.add_module(f\"dw_blk_{idx}\",DepthwiseConvBlock(int(params[0]*depth_multiplier),\n",
+    "                                                                   (params[1]*depth_multiplier), params[2]))\n",
+    "          \n",
+    "        self.model.add_module('pool', nn.AdaptiveAvgPool2d(1))\n",
+    "        self.model.add_module('flatten', nn.Flatten())\n",
+    "        self.model.add_module('fc',nn.Linear(1024, num_classes))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.model(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "==========================================================================================\n",
+       "Layer (type:depth-idx)                   Output Shape              Param #\n",
+       "==========================================================================================\n",
+       "MobileNetV1                              [1, 1000]                 --\n",
+       "├─Sequential: 1-1                        [1, 1000]                 --\n",
+       "│    └─ConvBlock: 2-1                    [1, 32, 112, 112]         --\n",
+       "│    │    └─Sequential: 3-1              [1, 32, 112, 112]         928\n",
+       "│    └─DepthwiseConvBlock: 2-2           [1, 64, 112, 112]         --\n",
+       "│    │    └─Sequential: 3-2              [1, 64, 112, 112]         2,528\n",
+       "│    └─DepthwiseConvBlock: 2-3           [1, 128, 56, 56]          --\n",
+       "│    │    └─Sequential: 3-3              [1, 128, 56, 56]          9,152\n",
+       "│    └─DepthwiseConvBlock: 2-4           [1, 128, 56, 56]          --\n",
+       "│    │    └─Sequential: 3-4              [1, 128, 56, 56]          18,048\n",
+       "│    └─DepthwiseConvBlock: 2-5           [1, 256, 28, 28]          --\n",
+       "│    │    └─Sequential: 3-5              [1, 256, 28, 28]          34,688\n",
+       "│    └─DepthwiseConvBlock: 2-6           [1, 256, 28, 28]          --\n",
+       "│    │    └─Sequential: 3-6              [1, 256, 28, 28]          68,864\n",
+       "│    └─DepthwiseConvBlock: 2-7           [1, 512, 14, 14]          --\n",
+       "│    │    └─Sequential: 3-7              [1, 512, 14, 14]          134,912\n",
+       "│    └─DepthwiseConvBlock: 2-8           [1, 512, 14, 14]          --\n",
+       "│    │    └─Sequential: 3-8              [1, 512, 14, 14]          268,800\n",
+       "│    └─DepthwiseConvBlock: 2-9           [1, 512, 14, 14]          --\n",
+       "│    │    └─Sequential: 3-9              [1, 512, 14, 14]          268,800\n",
+       "│    └─DepthwiseConvBlock: 2-10          [1, 512, 14, 14]          --\n",
+       "│    │    └─Sequential: 3-10             [1, 512, 14, 14]          268,800\n",
+       "│    └─DepthwiseConvBlock: 2-11          [1, 512, 14, 14]          --\n",
+       "│    │    └─Sequential: 3-11             [1, 512, 14, 14]          268,800\n",
+       "│    └─DepthwiseConvBlock: 2-12          [1, 512, 14, 14]          --\n",
+       "│    │    └─Sequential: 3-12             [1, 512, 14, 14]          268,800\n",
+       "│    └─DepthwiseConvBlock: 2-13          [1, 1024, 7, 7]           --\n",
+       "│    │    └─Sequential: 3-13             [1, 1024, 7, 7]           531,968\n",
+       "│    └─DepthwiseConvBlock: 2-14          [1, 1024, 7, 7]           --\n",
+       "│    │    └─Sequential: 3-14             [1, 1024, 7, 7]           1,061,888\n",
+       "│    └─AdaptiveAvgPool2d: 2-15           [1, 1024, 1, 1]           --\n",
+       "│    └─Flatten: 2-16                     [1, 1024]                 --\n",
+       "│    └─Linear: 2-17                      [1, 1000]                 1,025,000\n",
+       "==========================================================================================\n",
+       "Total params: 4,231,976\n",
+       "Trainable params: 4,231,976\n",
+       "Non-trainable params: 0\n",
+       "Total mult-adds (M): 568.76\n",
+       "==========================================================================================\n",
+       "Input size (MB): 0.60\n",
+       "Forward/backward pass size (MB): 80.69\n",
+       "Params size (MB): 16.93\n",
+       "Estimated Total Size (MB): 98.22\n",
+       "=========================================================================================="
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Declare parameters for depth-wise separable convolution layers -> (in_channels, out_channels, stride)\n",
+    "dw_params = [\n",
+    "    (32, 64, 1),\n",
+    "    (64, 128, 2),\n",
+    "    (128, 128, 1),\n",
+    "    (128, 256, 2),\n",
+    "    (256, 256, 1),\n",
+    "    (256, 512, 2),\n",
+    "    (512, 512, 1),\n",
+    "    (512, 512, 1),\n",
+    "    (512, 512, 1),\n",
+    "    (512, 512, 1),\n",
+    "    (512, 512, 1),\n",
+    "    (512, 1024, 2),\n",
+    "    (1024, 1024, 1),\n",
+    "]\n",
+    "\n",
+    "model = MobileNetV1(layer_config=dw_params, depth_multiplier=1, num_classes=1000)\n",
+    "torchinfo.summary(model, (3, 224, 224), batch_dim = 0)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}