Add Mean Pinball Loss function (#235)

elixir-nx · Mar 4, 2024 · 4dccc0a · 4dccc0a
1 parent cd64e15
commit 4dccc0a
Show file tree

Hide file tree

Showing 3 changed files with 223 additions and 0 deletions.
diff --git a/lib/scholar/metrics/regression.ex b/lib/scholar/metrics/regression.ex
@@ -519,6 +519,125 @@ defmodule Scholar.Metrics.Regression do
     Nx.reduce_max(Nx.abs(y_true - y_pred))
   end
 
+  mean_pinball_loss_opts = [
+    alpha: [
+      type: :float,
+      default: 0.5,
+      doc: """
+      The slope of the pinball loss, default=0.5,
+      This loss is equivalent to $$mean_absolute_error$$ when $$\alpha$$ is 0.5,
+      $$\alpha = 0.95$$ is minimized by estimators of the 95th percentile.
+      """
+    ],
+    sample_weights: [
+      type:
+        {:or,
+         [
+           {:custom, Scholar.Options, :weights, []},
+           {:custom, Scholar.Options, :multi_weights, []}
+         ]},
+      doc: """
+      The weights for each observation. If not provided,
+      all observations are assigned equal weight.
+      """
+    ],
+    multioutput: [
+      type:
+        {:or,
+         [
+           {:custom, Scholar.Options, :weights, []},
+           {:in, [:raw_values, :uniform_average]}
+         ]},
+      default: :uniform_average,
+      doc: """
+      Defines aggregating of multiple output values.
+      Array-like value defines weights used to average errors.
+      Defaults to `:uniform_average`.
+
+        `:raw_values` :
+            Returns a full set of errors in case of multioutput input.
+
+        `:uniform_average` :
+            Errors of all outputs are averaged with uniform weight.
+
+      The weights for each observation. If not provided,
+      all observations are assigned equal weight.
+      """
+    ]
+  ]
+
+  @mean_pinball_loss_schema NimbleOptions.new!(mean_pinball_loss_opts)
+
+  @doc ~S"""
+  Calculates the mean pinball loss to evaluate predictive performance of quantile regression models.
+
+  $$pinball(y, \hat{y}) = \frac{1}{n) \sum_{i=1}^{n} \alpha max(\hat{y_i} - y_i, 0) +
+  (1 - \alpha) max(\hat{y_i} - y_i, 0)$$
+
+  The residual error is defined as $$|y - \hat{y}|$$ where $y$ is a true value
+  and $\hat{y}$ is a predicted value.
+
+  #{NimbleOptions.docs(@mean_pinball_loss_schema)}
+
+  ## Examples
+
+      iex> y_true = Nx.tensor([1, 2, 3])
+      iex> y_pred = Nx.tensor([2, 3, 4])
+      iex> Scholar.Metrics.Regression.mean_pinball_loss(y_true, y_pred)
+      #Nx.Tensor<
+        f32
+        0.5
+      >
+      iex> y_true = Nx.tensor([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
+      iex> y_pred = Nx.tensor([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])
+      iex> Scholar.Metrics.Regression.mean_pinball_loss(y_true, y_pred, alpha: 0.5, multioutput: :raw_values)
+      #Nx.Tensor<
+        f32[4]
+        [0.5, 0.3333333432674408, 0.0, 0.0]
+      >
+  """
+  deftransform mean_pinball_loss(y_true, y_pred, opts \\ []) do
+    mean_pinball_loss_n(y_true, y_pred, NimbleOptions.validate!(opts, @mean_pinball_loss_schema))
+  end
+
+  defnp mean_pinball_loss_n(y_true, y_pred, opts) do
+    assert_same_shape!(y_true, y_pred)
+    alpha = opts[:alpha]
+
+    # Formula adapted from sklearn:
+    # https://github.com/scikit-learn/scikit-learn/blob/128e40ed593c57e8b9e57a4109928d58fa8bf359/sklearn/metrics/_regression.py#L299
+    diff = y_true - y_pred
+    sign = diff >= 0
+    loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff
+
+    output_errors = handle_sample_weights(loss, opts, axes: [0])
+    # mimics the sklearn behavior
+    case opts[:multioutput] do
+      # raw_values returns plain output errors. One value per channel.
+      :raw_values ->
+        output_errors
+
+      # uniform_average returns the mean of the above. Note how they are averaged.
+      :uniform_average ->
+        output_errors
+        |> Nx.mean()
+
+      # pass `:multioutput` as sample weights to average the error of each output
+      multi_output_weights ->
+        handle_sample_weights(output_errors, sample_weights: multi_output_weights)
+    end
+  end
+
+  defnp handle_sample_weights(loss, opts, mean_opts \\ []) do
+    case opts[:sample_weights] do
+      nil ->
+        Nx.mean(loss, mean_opts)
+
+      weights ->
+        Nx.weighted_mean(loss, weights, mean_opts)
+    end
+  end
+
   defnp check_shape(y_true, y_pred) do
     assert_rank!(y_true, 1)
     assert_same_shape!(y_true, y_pred)

diff --git a/lib/scholar/options.ex b/lib/scholar/options.ex
@@ -83,6 +83,16 @@ defmodule Scholar.Options do
     end
   end
 
+  def multi_weights(weights) do
+    if is_nil(weights) or
+         (Nx.is_tensor(weights) and Nx.rank(weights) > 1) do
+      {:ok, weights}
+    else
+      {:error,
+       "expected weights to be a tensor with rank greater than 1, got: #{inspect(weights)}"}
+    end
+  end
+
   def key(key) do
     if Nx.is_tensor(key) and Nx.type(key) == {:u, 32} and Nx.shape(key) == {2} do
       {:ok, key}

diff --git a/test/scholar/metrics/regression_test.exs b/test/scholar/metrics/regression_test.exs
@@ -66,4 +66,98 @@ defmodule Scholar.Metrics.RegressionTest do
       assert Nx.equal(d2, r2)
     end
   end
+
+  describe "mean_pinball_loss/3" do
+    test "mean_pinball_loss cases from sklearn" do
+      # Test cases copied from sklearn:
+      # https://github.com/scikit-learn/scikit-learn/blob/128e40ed593c57e8b9e57a4109928d58fa8bf359/sklearn/metrics/tests/test_regression.py#L49      
+
+      y_true = Nx.linspace(1, 50, n: 50)
+      y_pred = Nx.add(y_true, 1)
+      y_pred_2 = Nx.add(y_true, -1)
+
+      assert Regression.mean_pinball_loss(y_true, y_pred) == Nx.tensor(0.5)
+      assert Regression.mean_pinball_loss(y_true, y_pred_2) == Nx.tensor(0.5)
+      assert Regression.mean_pinball_loss(y_true, y_pred, alpha: 0.4) == Nx.tensor(0.6)
+      assert Regression.mean_pinball_loss(y_true, y_pred_2, alpha: 0.4) == Nx.tensor(0.4)
+    end
+
+    test "mean_pinball_loss with sample weight" do
+      y_true = Nx.tensor([1, 2, 3, 4, 5, 6])
+      y_pred = Nx.tensor([2, 3, 4, 6, 7, 8])
+      sample_weights = Nx.tensor([1.5, 1.5, 1.5, 0.5, 0.5, 0.5])
+      wrong_sample_weights = Nx.tensor([1.5, 1.5, 1.5, 0.5, 0.5, 0.5, 1, 1, 1])
+
+      assert Regression.mean_pinball_loss(y_true, y_pred) == Nx.tensor(0.75)
+
+      assert Regression.mean_pinball_loss(
+               y_true,
+               y_pred,
+               alpha: 0.5,
+               sample_weights: sample_weights
+             ) == Nx.tensor(0.625)
+
+      assert_raise ArgumentError, fn ->
+        Regression.mean_pinball_loss(y_true, y_pred,
+          alpha: 0.5,
+          sample_weights: wrong_sample_weights
+        )
+      end
+    end
+
+    test "mean_pinball_loss with multioutput" do
+      y_true = Nx.tensor([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
+      y_pred = Nx.tensor([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]])
+
+      sample_weight =
+        Nx.tensor([[0.5, 0.5, 0.5, 1.5], [1.5, 0.5, 1.5, 1.5], [1.5, 1.5, 1.5, 1.5]])
+
+      expected_error = Nx.tensor((1 + 2 / 3) / 8)
+      expected_raw_values_tensor = Nx.tensor([0.5, 0.33333333, 0.0, 0.0])
+      expected_raw_values_weighted_tensor = Nx.tensor([0.5, 0.4, 0.0, 0.0])
+
+      mpbl = Regression.mean_pinball_loss(y_true, y_pred)
+      assert_all_close(mpbl, expected_error)
+      ## this assertion yields false due to precision error
+      mpbl =
+        Regression.mean_pinball_loss(
+          y_true,
+          y_pred,
+          alpha: 0.5,
+          multioutput: :uniform_average
+        )
+
+      assert_all_close(mpbl, expected_error)
+      mpbl = Regression.mean_pinball_loss(y_true, y_pred, alpha: 0.5, multioutput: :raw_values)
+      assert_all_close(mpbl, expected_raw_values_tensor)
+
+      mpbl =
+        Regression.mean_pinball_loss(y_true, y_pred,
+          alpha: 0.5,
+          sample_weights: sample_weight,
+          multioutput: :raw_values
+        )
+
+      assert_all_close(mpbl, expected_raw_values_weighted_tensor)
+
+      mpbl =
+        Regression.mean_pinball_loss(y_true, y_pred,
+          alpha: 0.5,
+          sample_weights: sample_weight,
+          multioutput: :uniform_average
+        )
+
+      assert_all_close(mpbl, Nx.tensor(0.225))
+
+      mpbl =
+        Regression.mean_pinball_loss(y_true, y_pred,
+          alpha: 0.5,
+          multioutput: Nx.tensor([1, 2, 3, 4])
+        )
+
+      assert_all_close(mpbl, Nx.tensor(0.1166666))
+      mpbl = Regression.mean_pinball_loss(y_true, y_pred, alpha: 0.5, multioutput: nil)
+      assert_all_close(mpbl, expected_error)
+    end
+  end
 end