Skip to content

Commit

Permalink
Add MLP Mixer on architectures (#17)
Browse files Browse the repository at this point in the history
* Add MLP Mixer  on architectures

* Add MLP Mixer on architectures

* cleaning dirty output

---------

Co-authored-by: Guillermo Pinto Ruiz <camachopinto_@hotmail.com>
  • Loading branch information
HenryMantilla and guillepinto authored Jul 11, 2024
1 parent 2e0366f commit 0f24840
Show file tree
Hide file tree
Showing 2 changed files with 252 additions and 0 deletions.
1 change: 1 addition & 0 deletions automation/notebooks-table-data.csv
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ DenseNet, architectures/densenet.ipynb,,1608.06993v5
Mobilenet V1, architectures/mobilenetv1.ipynb,,1704.04861
Inception V1, architectures/inceptionv1.ipynb,,1409.4842
Intersection Over Union (IoU),metrics/intersection-over-union.ipynb,,1902.09630
MLP Mixer, architectures/mlp-mixer.ipynb,,2105.01601
251 changes: 251 additions & 0 deletions notebooks/architectures/mlp-mixer.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[![deep-learning-notes](https://github.com/semilleroCV/deep-learning-notes/raw/main/assets/banner-notebook.png)](https://github.com/semilleroCV/deep-learning-notes)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## MLP Mixer"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"#@title **Install required packages**\n",
"\n",
"! pip install torchinfo einops"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#@title **Importing libraries**\n",
"\n",
"import torch #2.3.1+cu121\n",
"import torch.nn as nn \n",
"import torchinfo #1.8.0\n",
"\n",
"import einops #0.8.0\n",
"from einops.layers.torch import Rearrange"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch version: 2.3.1+cu121\n",
"torchinfo version: 1.8.0\n",
"einops version: 0.8.0\n"
]
}
],
"source": [
"# Note: Not all dependencies have the __version__ method.\n",
"\n",
"print(f\"torch version: {torch.__version__}\")\n",
"print(f\"torchinfo version: {torchinfo.__version__}\")\n",
"print(f\"einops version: {einops.__version__}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### MLP-Mixer architecture code\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"class PatchEmbedding(nn.Module):\n",
" def __init__(self, in_channels: int, patch_size: int, embed_dim: int):\n",
" super().__init__()\n",
"\n",
" self.proj = nn.Conv2d(in_channels, embed_dim, kernel_size=patch_size, stride=patch_size)\n",
" self.rearrange = Rearrange('b e h w -> b (h w) e')\n",
"\n",
" def forward(self, x):\n",
" _, _, H, W = x.size()\n",
"\n",
" x = self.proj(x)\n",
" x = self.rearrange(x)\n",
"\n",
" return x\n",
"\n",
"\n",
"class MLPBlock(nn.Module):\n",
" def __init__(self, input_dim: int, hidden_dim: int):\n",
" super().__init__()\n",
"\n",
" self.mlp_blk = nn.Sequential(\n",
" nn.Linear(input_dim, hidden_dim),\n",
" nn.GELU(),\n",
" nn.Linear(hidden_dim, input_dim),\n",
" )\n",
"\n",
" def forward(self, x):\n",
" return self.mlp_blk(x)\n",
"\n",
"\n",
"class MixerBlock(nn.Module):\n",
" def __init__(self, dim: int, pix_per_patch: int, token_dim: int, channel_dim: int):\n",
" super().__init__()\n",
"\n",
" self.token_mixer = nn.Sequential(\n",
" nn.LayerNorm(dim),\n",
" Rearrange('b n c -> b c n'),\n",
" MLPBlock(pix_per_patch, token_dim),\n",
" Rearrange('b c n -> b n c'),\n",
" )\n",
"\n",
" self.channel_mixer = nn.Sequential(\n",
" nn.LayerNorm(dim),\n",
" MLPBlock(dim, channel_dim),\n",
" )\n",
"\n",
" def forward(self, x):\n",
"\n",
" x = x + self.token_mixer(x)\n",
" x = x + self.channel_mixer(x)\n",
"\n",
" return x\n",
"\n",
"\n",
"class MLPMixer(nn.Module):\n",
" def __init__(self, num_classes: int, hidden_dim: int, depth: int, in_channels: int = 3, img_size: int = 224,\n",
" patch_size: int = 16, token_dim: int = 256, channel_dim: int = 256):\n",
" super().__init__()\n",
"\n",
" self.patch_embed = PatchEmbedding(in_channels, patch_size, hidden_dim)\n",
" pix_per_patch = (img_size// patch_size) ** 2\n",
"\n",
" self.mixer_blks = nn.Sequential()\n",
"\n",
" for i in range(depth):\n",
" self.mixer_blks.add_module(f\"MixerBlock_{i}\", \n",
" MixerBlock(hidden_dim, pix_per_patch, token_dim, channel_dim))\n",
"\n",
" self.norm = nn.LayerNorm(hidden_dim)\n",
" self.head = nn.Linear(hidden_dim, num_classes)\n",
"\n",
" def forward(self, x):\n",
" x = self.patch_embed(x)\n",
" x = self.mixer_blks(x)\n",
" x = self.norm(x)\n",
" x = x.mean(dim=1)\n",
" x = self.head(x)\n",
"\n",
" return x"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"===============================================================================================\n",
"Layer (type:depth-idx) Output Shape Param #\n",
"===============================================================================================\n",
"MLPMixer [1, 1000] --\n",
"├─PatchEmbedding: 1-1 [1, 196, 512] --\n",
"│ └─Conv2d: 2-1 [1, 512, 14, 14] 393,728\n",
"│ └─Rearrange: 2-2 [1, 196, 512] --\n",
"├─Sequential: 1-2 [1, 196, 512] --\n",
"│ └─MixerBlock: 2-3 [1, 196, 512] --\n",
"│ │ └─Sequential: 3-1 [1, 196, 512] 101,828\n",
"│ │ └─Sequential: 3-2 [1, 196, 512] 2,100,736\n",
"│ └─MixerBlock: 2-4 [1, 196, 512] --\n",
"│ │ └─Sequential: 3-3 [1, 196, 512] 101,828\n",
"│ │ └─Sequential: 3-4 [1, 196, 512] 2,100,736\n",
"│ └─MixerBlock: 2-5 [1, 196, 512] --\n",
"│ │ └─Sequential: 3-5 [1, 196, 512] 101,828\n",
"│ │ └─Sequential: 3-6 [1, 196, 512] 2,100,736\n",
"│ └─MixerBlock: 2-6 [1, 196, 512] --\n",
"│ │ └─Sequential: 3-7 [1, 196, 512] 101,828\n",
"│ │ └─Sequential: 3-8 [1, 196, 512] 2,100,736\n",
"│ └─MixerBlock: 2-7 [1, 196, 512] --\n",
"│ │ └─Sequential: 3-9 [1, 196, 512] 101,828\n",
"│ │ └─Sequential: 3-10 [1, 196, 512] 2,100,736\n",
"│ └─MixerBlock: 2-8 [1, 196, 512] --\n",
"│ │ └─Sequential: 3-11 [1, 196, 512] 101,828\n",
"│ │ └─Sequential: 3-12 [1, 196, 512] 2,100,736\n",
"│ └─MixerBlock: 2-9 [1, 196, 512] --\n",
"│ │ └─Sequential: 3-13 [1, 196, 512] 101,828\n",
"│ │ └─Sequential: 3-14 [1, 196, 512] 2,100,736\n",
"│ └─MixerBlock: 2-10 [1, 196, 512] --\n",
"│ │ └─Sequential: 3-15 [1, 196, 512] 101,828\n",
"│ │ └─Sequential: 3-16 [1, 196, 512] 2,100,736\n",
"├─LayerNorm: 1-3 [1, 196, 512] 1,024\n",
"├─Linear: 1-4 [1, 1000] 513,000\n",
"===============================================================================================\n",
"Total params: 18,528,264\n",
"Trainable params: 18,528,264\n",
"Non-trainable params: 0\n",
"Total mult-adds (Units.MEGABYTES): 95.31\n",
"===============================================================================================\n",
"Input size (MB): 0.60\n",
"Forward/backward pass size (MB): 61.38\n",
"Params size (MB): 74.11\n",
"Estimated Total Size (MB): 136.10\n",
"==============================================================================================="
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = MLPMixer(num_classes=1000, hidden_dim=512, depth=8, patch_size=16,\n",
" token_dim=256, channel_dim=2048)\n",
"torchinfo.summary(model, (3, 224, 224), batch_dim = 0)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 0f24840

Please sign in to comment.