diff --git a/lib/scholar/metrics/regression.ex b/lib/scholar/metrics/regression.ex index aa2385ad..b26e4605 100644 --- a/lib/scholar/metrics/regression.ex +++ b/lib/scholar/metrics/regression.ex @@ -519,6 +519,125 @@ defmodule Scholar.Metrics.Regression do Nx.reduce_max(Nx.abs(y_true - y_pred)) end + mean_pinball_loss_opts = [ + alpha: [ + type: :float, + default: 0.5, + doc: """ + The slope of the pinball loss, default=0.5, + This loss is equivalent to $$mean_absolute_error$$ when $$\alpha$$ is 0.5, + $$\alpha = 0.95$$ is minimized by estimators of the 95th percentile. + """ + ], + sample_weights: [ + type: + {:or, + [ + {:custom, Scholar.Options, :weights, []}, + {:custom, Scholar.Options, :multi_weights, []} + ]}, + doc: """ + The weights for each observation. If not provided, + all observations are assigned equal weight. + """ + ], + multioutput: [ + type: + {:or, + [ + {:custom, Scholar.Options, :weights, []}, + {:in, [:raw_values, :uniform_average]} + ]}, + default: :uniform_average, + doc: """ + Defines aggregating of multiple output values. + Array-like value defines weights used to average errors. + Defaults to `:uniform_average`. + + `:raw_values` : + Returns a full set of errors in case of multioutput input. + + `:uniform_average` : + Errors of all outputs are averaged with uniform weight. + + The weights for each observation. If not provided, + all observations are assigned equal weight. + """ + ] + ] + + @mean_pinball_loss_schema NimbleOptions.new!(mean_pinball_loss_opts) + + @doc ~S""" + Calculates the mean pinball loss to evaluate predictive performance of quantile regression models. + + $$pinball(y, \hat{y}) = \frac{1}{n) \sum_{i=1}^{n} \alpha max(\hat{y_i} - y_i, 0) + + (1 - \alpha) max(\hat{y_i} - y_i, 0)$$ + + The residual error is defined as $$|y - \hat{y}|$$ where $y$ is a true value + and $\hat{y}$ is a predicted value. + + #{NimbleOptions.docs(@mean_pinball_loss_schema)} + + ## Examples + + iex> y_true = Nx.tensor([1, 2, 3]) + iex> y_pred = Nx.tensor([2, 3, 4]) + iex> Scholar.Metrics.Regression.mean_pinball_loss(y_true, y_pred) + #Nx.Tensor< + f32 + 0.5 + > + iex> y_true = Nx.tensor([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]]) + iex> y_pred = Nx.tensor([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]]) + iex> Scholar.Metrics.Regression.mean_pinball_loss(y_true, y_pred, alpha: 0.5, multioutput: :raw_values) + #Nx.Tensor< + f32[4] + [0.5, 0.3333333432674408, 0.0, 0.0] + > + """ + deftransform mean_pinball_loss(y_true, y_pred, opts \\ []) do + mean_pinball_loss_n(y_true, y_pred, NimbleOptions.validate!(opts, @mean_pinball_loss_schema)) + end + + defnp mean_pinball_loss_n(y_true, y_pred, opts) do + assert_same_shape!(y_true, y_pred) + alpha = opts[:alpha] + + # Formula adapted from sklearn: + # https://github.com/scikit-learn/scikit-learn/blob/128e40ed593c57e8b9e57a4109928d58fa8bf359/sklearn/metrics/_regression.py#L299 + diff = y_true - y_pred + sign = diff >= 0 + loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff + + output_errors = handle_sample_weights(loss, opts, axes: [0]) + # mimics the sklearn behavior + case opts[:multioutput] do + # raw_values returns plain output errors. One value per channel. + :raw_values -> + output_errors + + # uniform_average returns the mean of the above. Note how they are averaged. + :uniform_average -> + output_errors + |> Nx.mean() + + # pass `:multioutput` as sample weights to average the error of each output + multi_output_weights -> + handle_sample_weights(output_errors, sample_weights: multi_output_weights) + end + end + + defnp handle_sample_weights(loss, opts, mean_opts \\ []) do + case opts[:sample_weights] do + nil -> + Nx.mean(loss, mean_opts) + + weights -> + Nx.weighted_mean(loss, weights, mean_opts) + end + end + defnp check_shape(y_true, y_pred) do assert_rank!(y_true, 1) assert_same_shape!(y_true, y_pred) diff --git a/lib/scholar/options.ex b/lib/scholar/options.ex index e0173ad1..e1ac99c9 100644 --- a/lib/scholar/options.ex +++ b/lib/scholar/options.ex @@ -83,6 +83,16 @@ defmodule Scholar.Options do end end + def multi_weights(weights) do + if is_nil(weights) or + (Nx.is_tensor(weights) and Nx.rank(weights) > 1) do + {:ok, weights} + else + {:error, + "expected weights to be a tensor with rank greater than 1, got: #{inspect(weights)}"} + end + end + def key(key) do if Nx.is_tensor(key) and Nx.type(key) == {:u, 32} and Nx.shape(key) == {2} do {:ok, key} diff --git a/test/scholar/metrics/regression_test.exs b/test/scholar/metrics/regression_test.exs index 2522da25..ef305e75 100644 --- a/test/scholar/metrics/regression_test.exs +++ b/test/scholar/metrics/regression_test.exs @@ -66,4 +66,98 @@ defmodule Scholar.Metrics.RegressionTest do assert Nx.equal(d2, r2) end end + + describe "mean_pinball_loss/3" do + test "mean_pinball_loss cases from sklearn" do + # Test cases copied from sklearn: + # https://github.com/scikit-learn/scikit-learn/blob/128e40ed593c57e8b9e57a4109928d58fa8bf359/sklearn/metrics/tests/test_regression.py#L49 + + y_true = Nx.linspace(1, 50, n: 50) + y_pred = Nx.add(y_true, 1) + y_pred_2 = Nx.add(y_true, -1) + + assert Regression.mean_pinball_loss(y_true, y_pred) == Nx.tensor(0.5) + assert Regression.mean_pinball_loss(y_true, y_pred_2) == Nx.tensor(0.5) + assert Regression.mean_pinball_loss(y_true, y_pred, alpha: 0.4) == Nx.tensor(0.6) + assert Regression.mean_pinball_loss(y_true, y_pred_2, alpha: 0.4) == Nx.tensor(0.4) + end + + test "mean_pinball_loss with sample weight" do + y_true = Nx.tensor([1, 2, 3, 4, 5, 6]) + y_pred = Nx.tensor([2, 3, 4, 6, 7, 8]) + sample_weights = Nx.tensor([1.5, 1.5, 1.5, 0.5, 0.5, 0.5]) + wrong_sample_weights = Nx.tensor([1.5, 1.5, 1.5, 0.5, 0.5, 0.5, 1, 1, 1]) + + assert Regression.mean_pinball_loss(y_true, y_pred) == Nx.tensor(0.75) + + assert Regression.mean_pinball_loss( + y_true, + y_pred, + alpha: 0.5, + sample_weights: sample_weights + ) == Nx.tensor(0.625) + + assert_raise ArgumentError, fn -> + Regression.mean_pinball_loss(y_true, y_pred, + alpha: 0.5, + sample_weights: wrong_sample_weights + ) + end + end + + test "mean_pinball_loss with multioutput" do + y_true = Nx.tensor([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]]) + y_pred = Nx.tensor([[0, 0, 0, 1], [1, 0, 1, 1], [0, 0, 0, 1]]) + + sample_weight = + Nx.tensor([[0.5, 0.5, 0.5, 1.5], [1.5, 0.5, 1.5, 1.5], [1.5, 1.5, 1.5, 1.5]]) + + expected_error = Nx.tensor((1 + 2 / 3) / 8) + expected_raw_values_tensor = Nx.tensor([0.5, 0.33333333, 0.0, 0.0]) + expected_raw_values_weighted_tensor = Nx.tensor([0.5, 0.4, 0.0, 0.0]) + + mpbl = Regression.mean_pinball_loss(y_true, y_pred) + assert_all_close(mpbl, expected_error) + ## this assertion yields false due to precision error + mpbl = + Regression.mean_pinball_loss( + y_true, + y_pred, + alpha: 0.5, + multioutput: :uniform_average + ) + + assert_all_close(mpbl, expected_error) + mpbl = Regression.mean_pinball_loss(y_true, y_pred, alpha: 0.5, multioutput: :raw_values) + assert_all_close(mpbl, expected_raw_values_tensor) + + mpbl = + Regression.mean_pinball_loss(y_true, y_pred, + alpha: 0.5, + sample_weights: sample_weight, + multioutput: :raw_values + ) + + assert_all_close(mpbl, expected_raw_values_weighted_tensor) + + mpbl = + Regression.mean_pinball_loss(y_true, y_pred, + alpha: 0.5, + sample_weights: sample_weight, + multioutput: :uniform_average + ) + + assert_all_close(mpbl, Nx.tensor(0.225)) + + mpbl = + Regression.mean_pinball_loss(y_true, y_pred, + alpha: 0.5, + multioutput: Nx.tensor([1, 2, 3, 4]) + ) + + assert_all_close(mpbl, Nx.tensor(0.1166666)) + mpbl = Regression.mean_pinball_loss(y_true, y_pred, alpha: 0.5, multioutput: nil) + assert_all_close(mpbl, expected_error) + end + end end