Add MLP Mixer on architectures (#17)

* Add MLP Mixer on architectures * Add MLP Mixer on architectures * cleaning dirty output --------- Co-authored-by: Guillermo Pinto Ruiz <camachopinto_@hotmail.com>
semilleroCV · Jul 11, 2024 · 0f24840 · 0f24840
1 parent 2e0366f
commit 0f24840
Show file tree

Hide file tree

Showing 2 changed files with 252 additions and 0 deletions.
diff --git a/automation/notebooks-table-data.csv b/automation/notebooks-table-data.csv
@@ -9,3 +9,4 @@ DenseNet, architectures/densenet.ipynb,,1608.06993v5
 Mobilenet V1, architectures/mobilenetv1.ipynb,,1704.04861
 Inception V1, architectures/inceptionv1.ipynb,,1409.4842
 Intersection Over Union (IoU),metrics/intersection-over-union.ipynb,,1902.09630
+MLP Mixer, architectures/mlp-mixer.ipynb,,2105.01601
diff --git a/notebooks/architectures/mlp-mixer.ipynb b/notebooks/architectures/mlp-mixer.ipynb
@@ -0,0 +1,251 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![deep-learning-notes](https://github.com/semilleroCV/deep-learning-notes/raw/main/assets/banner-notebook.png)](https://github.com/semilleroCV/deep-learning-notes)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## MLP Mixer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "#@title **Install required packages**\n",
+    "\n",
+    "! pip install torchinfo einops"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#@title **Importing libraries**\n",
+    "\n",
+    "import torch #2.3.1+cu121\n",
+    "import torch.nn as nn \n",
+    "import torchinfo #1.8.0\n",
+    "\n",
+    "import einops #0.8.0\n",
+    "from einops.layers.torch import Rearrange"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch version: 2.3.1+cu121\n",
+      "torchinfo version: 1.8.0\n",
+      "einops version: 0.8.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Note: Not all dependencies have the __version__ method.\n",
+    "\n",
+    "print(f\"torch version: {torch.__version__}\")\n",
+    "print(f\"torchinfo version: {torchinfo.__version__}\")\n",
+    "print(f\"einops version: {einops.__version__}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### MLP-Mixer architecture code\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class PatchEmbedding(nn.Module):\n",
+    "    def __init__(self, in_channels: int, patch_size: int, embed_dim: int):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)\n",
+    "        self.rearrange = Rearrange('b e h w -> b (h w) e')\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        _, _, H, W = x.size()\n",
+    "\n",
+    "        x = self.proj(x)\n",
+    "        x = self.rearrange(x)\n",
+    "\n",
+    "        return x\n",
+    "\n",
+    "\n",
+    "class MLPBlock(nn.Module):\n",
+    "    def __init__(self, input_dim: int, hidden_dim: int):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.mlp_blk = nn.Sequential(\n",
+    "            nn.Linear(input_dim, hidden_dim),\n",
+    "            nn.GELU(),\n",
+    "            nn.Linear(hidden_dim, input_dim),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "      return self.mlp_blk(x)\n",
+    "\n",
+    "\n",
+    "class MixerBlock(nn.Module):\n",
+    "    def __init__(self, dim: int, pix_per_patch: int, token_dim: int, channel_dim: int):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.token_mixer = nn.Sequential(\n",
+    "            nn.LayerNorm(dim),\n",
+    "            Rearrange('b n c -> b c n'),\n",
+    "            MLPBlock(pix_per_patch, token_dim),\n",
+    "            Rearrange('b c n -> b n c'),\n",
+    "        )\n",
+    "\n",
+    "        self.channel_mixer = nn.Sequential(\n",
+    "            nn.LayerNorm(dim),\n",
+    "            MLPBlock(dim, channel_dim),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "\n",
+    "        x = x + self.token_mixer(x)\n",
+    "        x = x + self.channel_mixer(x)\n",
+    "\n",
+    "        return x\n",
+    "\n",
+    "\n",
+    "class MLPMixer(nn.Module):\n",
+    "    def __init__(self, num_classes: int, hidden_dim: int, depth: int, in_channels: int = 3, img_size: int = 224,\n",
+    "                 patch_size: int = 16, token_dim: int = 256, channel_dim: int = 256):\n",
+    "        super().__init__()\n",
+    "\n",
+    "        self.patch_embed = PatchEmbedding(in_channels, patch_size, hidden_dim)\n",
+    "        pix_per_patch =  (img_size// patch_size) ** 2\n",
+    "\n",
+    "        self.mixer_blks = nn.Sequential()\n",
+    "\n",
+    "        for i in range(depth):\n",
+    "            self.mixer_blks.add_module(f\"MixerBlock_{i}\", \n",
+    "                                       MixerBlock(hidden_dim, pix_per_patch, token_dim, channel_dim))\n",
+    "\n",
+    "        self.norm = nn.LayerNorm(hidden_dim)\n",
+    "        self.head = nn.Linear(hidden_dim, num_classes)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = self.patch_embed(x)\n",
+    "        x = self.mixer_blks(x)\n",
+    "        x = self.norm(x)\n",
+    "        x = x.mean(dim=1)\n",
+    "        x = self.head(x)\n",
+    "\n",
+    "        return x"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "===============================================================================================\n",
+       "Layer (type:depth-idx)                        Output Shape              Param #\n",
+       "===============================================================================================\n",
+       "MLPMixer                                      [1, 1000]                 --\n",
+       "├─PatchEmbedding: 1-1                         [1, 196, 512]             --\n",
+       "│    └─Conv2d: 2-1                            [1, 512, 14, 14]          393,728\n",
+       "│    └─Rearrange: 2-2                         [1, 196, 512]             --\n",
+       "├─Sequential: 1-2                             [1, 196, 512]             --\n",
+       "│    └─MixerBlock: 2-3                        [1, 196, 512]             --\n",
+       "│    │    └─Sequential: 3-1                   [1, 196, 512]             101,828\n",
+       "│    │    └─Sequential: 3-2                   [1, 196, 512]             2,100,736\n",
+       "│    └─MixerBlock: 2-4                        [1, 196, 512]             --\n",
+       "│    │    └─Sequential: 3-3                   [1, 196, 512]             101,828\n",
+       "│    │    └─Sequential: 3-4                   [1, 196, 512]             2,100,736\n",
+       "│    └─MixerBlock: 2-5                        [1, 196, 512]             --\n",
+       "│    │    └─Sequential: 3-5                   [1, 196, 512]             101,828\n",
+       "│    │    └─Sequential: 3-6                   [1, 196, 512]             2,100,736\n",
+       "│    └─MixerBlock: 2-6                        [1, 196, 512]             --\n",
+       "│    │    └─Sequential: 3-7                   [1, 196, 512]             101,828\n",
+       "│    │    └─Sequential: 3-8                   [1, 196, 512]             2,100,736\n",
+       "│    └─MixerBlock: 2-7                        [1, 196, 512]             --\n",
+       "│    │    └─Sequential: 3-9                   [1, 196, 512]             101,828\n",
+       "│    │    └─Sequential: 3-10                  [1, 196, 512]             2,100,736\n",
+       "│    └─MixerBlock: 2-8                        [1, 196, 512]             --\n",
+       "│    │    └─Sequential: 3-11                  [1, 196, 512]             101,828\n",
+       "│    │    └─Sequential: 3-12                  [1, 196, 512]             2,100,736\n",
+       "│    └─MixerBlock: 2-9                        [1, 196, 512]             --\n",
+       "│    │    └─Sequential: 3-13                  [1, 196, 512]             101,828\n",
+       "│    │    └─Sequential: 3-14                  [1, 196, 512]             2,100,736\n",
+       "│    └─MixerBlock: 2-10                       [1, 196, 512]             --\n",
+       "│    │    └─Sequential: 3-15                  [1, 196, 512]             101,828\n",
+       "│    │    └─Sequential: 3-16                  [1, 196, 512]             2,100,736\n",
+       "├─LayerNorm: 1-3                              [1, 196, 512]             1,024\n",
+       "├─Linear: 1-4                                 [1, 1000]                 513,000\n",
+       "===============================================================================================\n",
+       "Total params: 18,528,264\n",
+       "Trainable params: 18,528,264\n",
+       "Non-trainable params: 0\n",
+       "Total mult-adds (Units.MEGABYTES): 95.31\n",
+       "===============================================================================================\n",
+       "Input size (MB): 0.60\n",
+       "Forward/backward pass size (MB): 61.38\n",
+       "Params size (MB): 74.11\n",
+       "Estimated Total Size (MB): 136.10\n",
+       "==============================================================================================="
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model = MLPMixer(num_classes=1000, hidden_dim=512, depth=8, patch_size=16,\n",
+    "                 token_dim=256, channel_dim=2048)\n",
+    "torchinfo.summary(model, (3, 224, 224), batch_dim = 0)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}