Add CELoss vs NLLLoss (#9)

Co-authored-by: Fabian Perez *-* <nelsonfabiancs8@gmail.com>
semilleroCV · Jun 19, 2024 · 2b3c561 · 2b3c561
1 parent b96be04
commit 2b3c561
Show file tree

Hide file tree

Showing 2 changed files with 244 additions and 0 deletions.
diff --git a/automation/notebooks-table-data.csv b/automation/notebooks-table-data.csv
@@ -1,3 +1,4 @@
 display_name, notebook_name, github_repository_path, arxiv_index
 Focal Loss,losses/focal-loss.ipynb,https://github.com/facebookresearch/Detectron,1708.02002
+CELoss vs NLLLoss, losses/celoss-vs-nllloss.ipynb, , 
 Network In Network, architectures/network-in-network.ipynb,,1312.4400
diff --git a/notebooks/losses/celoss-vs-nllloss.ipynb b/notebooks/losses/celoss-vs-nllloss.ipynb
@@ -0,0 +1,243 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![deep-learning-notes](https://github.com/semilleroCV/deep-learning-notes/raw/main/assets/banner-notebook.png)](https://github.com/semilleroCV/deep-learning-notes)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# <font color='#4C5FDA'>**Cross Entropy Loss vs Negative Log Likelihood Loss** </font> <a name=\"tema1\">"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Original: https://www.youtube.com/watch?v=Ni1ViB1Ezjs&ab_channel=MakeesyAI\n",
+    "import torch # 2.2.1\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2.2.1\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(torch.__version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([3, 5]) torch.float32\n",
+      "tensor([[0.4499, 0.8788, 0.5056, 0.1445, 0.0907],\n",
+      "        [0.4715, 0.6950, 0.8860, 0.1334, 0.2139],\n",
+      "        [0.1923, 0.5645, 0.9867, 0.4618, 0.1355]], requires_grad=True)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# I simulate the model output with batch size = 3 and 5 classes.\n",
+    "\n",
+    "# The requires_grad simulates as if we were training a model.\n",
+    "prediction = torch.rand(3, 5, requires_grad=True) \n",
+    "print(prediction.size(), prediction.dtype)\n",
+    "print(prediction)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([3]) torch.int64\n",
+      "tensor([0, 1, 4])\n"
+     ]
+    }
+   ],
+   "source": [
+    "# I simulate the expected outputs of each batch element.\n",
+    "# For the first element corresponds 0, for the second 1 and for the third 4.\n",
+    "target = torch.tensor([0, 1, 4])\n",
+    "print(target.size(), target.dtype)\n",
+    "print(target)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "log_softmax = nn.LogSoftmax(dim=-1) # dim=-1 is so that it always operates on the model outputs not on the batch.\n",
+    "loss_fn_nll = nn.NLLLoss()\n",
+    "loss_fn_ce = nn.CrossEntropyLoss()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor(1.6802, grad_fn=<NllLossBackward0>)\n",
+      "tensor(1.6802, grad_fn=<NllLossBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "loss_nll = loss_fn_nll(log_softmax(prediction), target) # We have to apply log softmax\n",
+    "loss_ce = loss_fn_ce(prediction, target) # Cross Entropy applies softmax\n",
+    "\n",
+    "# We obtain the loss as if we were training.\n",
+    "loss_nll.backward()\n",
+    "loss_ce.backward()\n",
+    "\n",
+    "# Imprimos la pérdida\n",
+    "print(loss_nll)\n",
+    "print(loss_ce)\n",
+    "\n",
+    "# Interestingly, both use the same method for error propagation."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# <font color='#4C5FDA'>**Extra: Binary Cross Entropy Loss vs Binary Cross Entropy Loss With Logits Loss** </font> <a name=\"tema1\">"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([3, 1]) torch.float32\n",
+      "tensor([[0.1320],\n",
+      "        [0.3074],\n",
+      "        [0.6341]], requires_grad=True)\n"
+     ]
+    }
+   ],
+   "source": [
+    "prediction = torch.rand(3, 1, requires_grad=True) \n",
+    "print(prediction.size(), prediction.dtype)\n",
+    "print(prediction)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([3, 1]) torch.float32\n",
+      "tensor([[0.],\n",
+      "        [1.],\n",
+      "        [0.]])\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = torch.tensor([0, 1, 0]).unsqueeze(1).float() # Only two classes\n",
+    "print(target.size(), target.dtype)\n",
+    "print(target)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sigmoid = nn.Sigmoid() \n",
+    "loss_fn_bce = nn.BCELoss()\n",
+    "loss_fn_bcewl = nn.BCEWithLogitsLoss() # This applies sigmoid "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "tensor(0.7907, grad_fn=<BinaryCrossEntropyBackward0>)\n",
+      "tensor(0.7907, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)\n"
+     ]
+    }
+   ],
+   "source": [
+    "loss_bce = loss_fn_bce(sigmoid(prediction), target) # We have to apply sigmoid\n",
+    "loss_bcewl = loss_fn_bcewl(prediction, target)\n",
+    "\n",
+    "# We obtain the loss as if we were training.\n",
+    "loss_bce.backward()\n",
+    "loss_bcewl.backward()\n",
+    "\n",
+    "# We print the loss\n",
+    "print(loss_bce)\n",
+    "print(loss_bcewl)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "carvana-unet",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}