|
| 1 | +"""Metrics on binary predictions for anomaly detection.""" |
| 2 | + |
| 3 | +__maintainer__ = ["CodeLionX"] |
| 4 | +__all__ = ["range_precision", "range_recall", "range_f_score"] |
| 5 | + |
| 6 | +import warnings |
| 7 | + |
| 8 | +import numpy as np |
| 9 | + |
| 10 | +from aeon.performance_metrics.anomaly_detection._util import check_y |
| 11 | +from aeon.utils.validation._dependencies import _check_soft_dependencies |
| 12 | + |
| 13 | + |
| 14 | +def range_precision( |
| 15 | + y_true: np.ndarray, |
| 16 | + y_pred: np.ndarray, |
| 17 | + alpha: float = 0, |
| 18 | + cardinality: str = "reciprocal", |
| 19 | + bias: str = "flat", |
| 20 | +) -> float: |
| 21 | + """Compute the range-based precision metric. |
| 22 | +
|
| 23 | + Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_. This |
| 24 | + implementation uses the community package `prts <https://pypi.org/project/prts/>`_ |
| 25 | + as a soft-dependency. |
| 26 | +
|
| 27 | + Range precision is the average precision of each predicted anomaly range. For each |
| 28 | + predicted continuous anomaly range the overlap size, position, and cardinality is |
| 29 | + considered. For more details, please refer to the paper [1]_. |
| 30 | +
|
| 31 | + Parameters |
| 32 | + ---------- |
| 33 | + y_true : np.ndarray |
| 34 | + True binary labels of shape (n_instances,). |
| 35 | + y_pred : np.ndarray |
| 36 | + Anomaly scores for each point of the time series of shape (n_instances,). |
| 37 | + alpha : float |
| 38 | + Weight of the existence reward. Because precision by definition emphasizes on |
| 39 | + prediction quality, there is no need for an existence reward and this value |
| 40 | + should always be set to 0. |
| 41 | + cardinality : {'reciprocal', 'one', 'udf_gamma'} |
| 42 | + Cardinality type. |
| 43 | + bias : {'flat', 'front', 'middle', 'back'} |
| 44 | + Positional bias type. |
| 45 | +
|
| 46 | + Returns |
| 47 | + ------- |
| 48 | + float |
| 49 | + Range-based precision |
| 50 | +
|
| 51 | + References |
| 52 | + ---------- |
| 53 | + .. [1] Tatbul, Nesime, Tae Jun Lee, Stan Zdonik, Mejbah Alam, and Justin |
| 54 | + Gottschlich. "Precision and Recall for Time Series." In Proceedings of the |
| 55 | + International Conference on Neural Information Processing Systems (NeurIPS), |
| 56 | + 1920–30. 2018. |
| 57 | + http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf. |
| 58 | + """ |
| 59 | + _check_soft_dependencies("prts", obj="range_precision", suppress_import_stdout=True) |
| 60 | + |
| 61 | + from prts import ts_precision |
| 62 | + |
| 63 | + y_true, y_pred = check_y(y_true, y_pred, force_y_pred_continuous=False) |
| 64 | + if np.unique(y_pred).shape[0] == 1: |
| 65 | + warnings.warn( |
| 66 | + "Cannot compute metric for a constant value in y_score, returning 0.0!", |
| 67 | + stacklevel=2, |
| 68 | + ) |
| 69 | + return 0.0 |
| 70 | + return ts_precision(y_true, y_pred, alpha=alpha, cardinality=cardinality, bias=bias) |
| 71 | + |
| 72 | + |
| 73 | +def range_recall( |
| 74 | + y_true: np.ndarray, |
| 75 | + y_pred: np.ndarray, |
| 76 | + alpha: float = 0, |
| 77 | + cardinality: str = "reciprocal", |
| 78 | + bias: str = "flat", |
| 79 | +) -> float: |
| 80 | + """Compute the range-based recall metric. |
| 81 | +
|
| 82 | + Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_. This |
| 83 | + implementation uses the community package `prts <https://pypi.org/project/prts/>`_ |
| 84 | + as a soft-dependency. |
| 85 | +
|
| 86 | + Range recall is the average recall of each real anomaly range. For each real |
| 87 | + anomaly range the overlap size, position, and cardinality with predicted anomaly |
| 88 | + ranges are considered. In addition, an existence reward can be given that boosts |
| 89 | + the recall even if just a single point of the real anomaly is in the predicted |
| 90 | + ranges. For more details, please refer to the paper [1]_. |
| 91 | +
|
| 92 | + Parameters |
| 93 | + ---------- |
| 94 | + y_true : np.ndarray |
| 95 | + True binary labels of shape (n_instances,). |
| 96 | + y_pred : np.ndarray |
| 97 | + Anomaly scores for each point of the time series of shape (n_instances,). |
| 98 | + alpha : float |
| 99 | + Weight of the existence reward. If 0: no existence reward, if 1: only existence |
| 100 | + reward. The existence reward is given if the real anomaly range has overlap |
| 101 | + with even a single point of the predicted anomaly range. |
| 102 | + cardinality : {'reciprocal', 'one', 'udf_gamma'} |
| 103 | + Cardinality type. |
| 104 | + bias : {'flat', 'front', 'middle', 'back'} |
| 105 | + Positional bias type. |
| 106 | +
|
| 107 | + Returns |
| 108 | + ------- |
| 109 | + float |
| 110 | + Range-based recall |
| 111 | +
|
| 112 | + References |
| 113 | + ---------- |
| 114 | + .. [1] Tatbul, Nesime, Tae Jun Lee, Stan Zdonik, Mejbah Alam, and Justin |
| 115 | + Gottschlich. "Precision and Recall for Time Series." In Proceedings of the |
| 116 | + International Conference on Neural Information Processing Systems (NeurIPS), |
| 117 | + 1920–30. 2018. |
| 118 | + http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf. |
| 119 | + """ |
| 120 | + _check_soft_dependencies("prts", obj="range_recall", suppress_import_stdout=True) |
| 121 | + |
| 122 | + from prts import ts_recall |
| 123 | + |
| 124 | + y_true, y_pred = check_y(y_true, y_pred, force_y_pred_continuous=False) |
| 125 | + if np.unique(y_pred).shape[0] == 1: |
| 126 | + warnings.warn( |
| 127 | + "Cannot compute metric for a constant value in y_score, returning 0.0!", |
| 128 | + stacklevel=2, |
| 129 | + ) |
| 130 | + return 0.0 |
| 131 | + return ts_recall(y_true, y_pred, alpha=alpha, cardinality=cardinality, bias=bias) |
| 132 | + |
| 133 | + |
| 134 | +def range_f_score( |
| 135 | + y_true: np.ndarray, |
| 136 | + y_pred: np.ndarray, |
| 137 | + beta: float = 1, |
| 138 | + p_alpha: float = 0, |
| 139 | + r_alpha: float = 0.5, |
| 140 | + cardinality: str = "reciprocal", |
| 141 | + p_bias: str = "flat", |
| 142 | + r_bias: str = "flat", |
| 143 | +) -> float: |
| 144 | + """Compute the F-score using the range-based recall and precision metrics. |
| 145 | +
|
| 146 | + Range-based metrics were introduced by Tatbul et al. at NeurIPS 2018 [1]_. This |
| 147 | + implementation uses the community package `prts <https://pypi.org/project/prts/>`_ |
| 148 | + as a soft-dependency. |
| 149 | +
|
| 150 | + The F-beta score is the weighted harmonic mean of precision and recall, reaching |
| 151 | + its optimal value at 1 and its worst value at 0. This implementation uses the |
| 152 | + range-based precision and range-based recall as basis. |
| 153 | +
|
| 154 | + Parameters |
| 155 | + ---------- |
| 156 | + y_true : np.ndarray |
| 157 | + True binary labels of shape (n_instances,). |
| 158 | + y_pred : np.ndarray |
| 159 | + Anomaly scores for each point of the time series of shape (n_instances,). |
| 160 | + beta : float |
| 161 | + F-score beta determines the weight of recall in the combined score. |
| 162 | + beta < 1 lends more weight to precision, while beta > 1 favors recall. |
| 163 | + p_alpha : float |
| 164 | + Weight of the existence reward for the range-based precision. For most - when |
| 165 | + not all - cases, `p_alpha` should be set to 0. |
| 166 | + r_alpha : float |
| 167 | + Weight of the existence reward. If 0: no existence reward, if 1: only |
| 168 | + existence reward. |
| 169 | + cardinality : {'reciprocal', 'one', 'udf_gamma'} |
| 170 | + Cardinality type. |
| 171 | + p_bias : {'flat', 'front', 'middle', 'back'} |
| 172 | + Positional bias type. |
| 173 | + r_bias : {'flat', 'front', 'middle', 'back'} |
| 174 | + Positional bias type. |
| 175 | +
|
| 176 | + Returns |
| 177 | + ------- |
| 178 | + float |
| 179 | + Range-based F-score |
| 180 | +
|
| 181 | + References |
| 182 | + ---------- |
| 183 | + .. [1] Tatbul, Nesime, Tae Jun Lee, Stan Zdonik, Mejbah Alam, and Justin |
| 184 | + Gottschlich. "Precision and Recall for Time Series." In Proceedings of the |
| 185 | + International Conference on Neural Information Processing Systems (NeurIPS), |
| 186 | + 1920–30. 2018. |
| 187 | + http://papers.nips.cc/paper/7462-precision-and-recall-for-time-series.pdf. |
| 188 | + """ |
| 189 | + _check_soft_dependencies("prts", obj="range_recall", suppress_import_stdout=True) |
| 190 | + |
| 191 | + from prts import ts_fscore |
| 192 | + |
| 193 | + y_true, y_pred = check_y(y_true, y_pred, force_y_pred_continuous=False) |
| 194 | + if np.unique(y_pred).shape[0] == 1: |
| 195 | + warnings.warn( |
| 196 | + "Cannot compute metric for a constant value in y_score, returning 0.0!", |
| 197 | + stacklevel=2, |
| 198 | + ) |
| 199 | + return 0.0 |
| 200 | + return ts_fscore( |
| 201 | + y_true, |
| 202 | + y_pred, |
| 203 | + beta=beta, |
| 204 | + p_alpha=p_alpha, |
| 205 | + r_alpha=r_alpha, |
| 206 | + cardinality=cardinality, |
| 207 | + p_bias=p_bias, |
| 208 | + r_bias=r_bias, |
| 209 | + ) |
0 commit comments