From 8c07ea3cd70262d7043fb1167099ba6b3ae40cbf Mon Sep 17 00:00:00 2001 From: tina Date: Sun, 6 Apr 2025 15:41:48 +0800 Subject: [PATCH 1/4] add difference transformer to series transformations --- aeon/transformations/series/__init__.py | 2 + aeon/transformations/series/_diff.py | 115 ++++++++++++++++++ .../transformations/series/tests/test_diff.py | 28 +++++ 3 files changed, 145 insertions(+) create mode 100644 aeon/transformations/series/_diff.py create mode 100644 aeon/transformations/series/tests/test_diff.py diff --git a/aeon/transformations/series/__init__.py b/aeon/transformations/series/__init__.py index 8b71ba9fc8..01bb5faa55 100644 --- a/aeon/transformations/series/__init__.py +++ b/aeon/transformations/series/__init__.py @@ -21,6 +21,7 @@ "SIVSeriesTransformer", "PCASeriesTransformer", "WarpingSeriesTransformer", + "DifferenceTransformer", ] from aeon.transformations.series._acf import ( @@ -32,6 +33,7 @@ from aeon.transformations.series._boxcox import BoxCoxTransformer from aeon.transformations.series._clasp import ClaSPTransformer from aeon.transformations.series._dft import DFTSeriesTransformer +from aeon.transformations.series._diff import DifferenceTransformer from aeon.transformations.series._dobin import Dobin from aeon.transformations.series._exp_smoothing import ExpSmoothingSeriesTransformer from aeon.transformations.series._gauss import GaussSeriesTransformer diff --git a/aeon/transformations/series/_diff.py b/aeon/transformations/series/_diff.py new file mode 100644 index 0000000000..029ecd8561 --- /dev/null +++ b/aeon/transformations/series/_diff.py @@ -0,0 +1,115 @@ +import numpy as np + +from aeon.transformations.series.base import BaseSeriesTransformer + +__maintainer__ = [] +__all__ = ["DifferenceTransformer"] + + +class DifferenceTransformer(BaseSeriesTransformer): + """ + Calculates the n-th order difference of a time series. + + Transforms a time series X into a series Y representing the difference + calculated `order` times. + - Order 1: Y[t] = X[t] - X[t-1] + - Order 2: Y[t] = (X[t] - X[t-1]) - (X[t-1] - X[t-2]) = X[t] - 2*X[t-1] + X[t-2] + - ... and so on. + + The first `order` element(s) of the transformed series along the time axis + will be NaN, so that the output series will have the same shape as the input series. + + Parameters + ---------- + order : int, default=1 + The order of differencing. Must be a positive integer. + + axis : int, default=1 + The axis along which the difference is computed. Assumed to be the + time axis. + If `axis == 0`, assumes shape `(n_timepoints, n_channels)`. + If `axis == 1`, assumes shape `(n_channels, n_timepoints)`. + + Notes + ----- + This transformer assumes the input series does not contain NaN values where + the difference needs to be computed. + + Examples + -------- + >>> import numpy as np + >>> from aeon.transformations.series._diff import DifferenceTransformer + >>> X1 = np.array([[1, 3, 2, 5, 4, 7, 6, 9, 8, 10]]) + >>> dt = DifferenceTransformer() + >>> Xt1 = dt.fit_transform(X1) + >>> print(Xt1) + [[nan 2. -1. 3. -1. 3. -1. 3. -1. 2.]] + + >>> X2 = np.array([[1, 3, 2, 5, 4, 7, 6, 9, 8, 10]]) + >>> dt2 = DifferenceTransformer(order=2) + >>> Xt2 = dt2.fit_transform(X2) + >>> print(Xt2) + [[nan nan -3. 4. -4. 4. -4. 4. -4. 3.]] + + >>> X3 = np.array([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]]) + >>> dt = DifferenceTransformer() + >>> Xt3 = dt.fit_transform(X3) + >>> print(Xt3) + [[nan 1. 1. 1. 1.] + [nan -1. -1. -1. -1.]] + + >>> X4 = np.array([[1, 5], [2, 4], [3, 3], [4, 2], [5, 1]]) + >>> dt_axis0 = DifferenceTransformer(axis=0) + >>> Xt4 = dt_axis0.fit_transform(X4, axis=0) + >>> print(Xt4) + [[nan nan] + [ 1. -1.] + [ 1. -1.] + [ 1. -1.] + [ 1. -1.]] + """ + + _tags = { + "capability:multivariate": True, + "X_inner_type": "np.ndarray", + "fit_is_empty": True, + } + + def __init__(self, order=1, axis=1): + if not isinstance(order, int) or order < 1: + raise ValueError(f"`order` must be a positive integer, but got {order}") + self.order = order + super().__init__(axis=axis) + + def _transform(self, X, y=None): + """ + Perform the n-th order differencing transformation. + + Parameters + ---------- + X : np.ndarray + + y : ignored argument for interface compatibility + + Returns + ------- + Xt : np.ndarray + Transformed version of X with the same shape, containing the + n-th order difference. + The first `order` elements along the time axis are NaN. + """ + diff_X = np.diff(X, n=self.order, axis=self.axis) + + # Check if diff_X is integer type. + # If so, cast to float to allow inserting np.nan. + if not np.issubdtype(diff_X.dtype, np.floating): + diff_X = diff_X.astype(np.float64) + + # Insert the NaN at the beginning + nan_shape = list(X.shape) + nan_shape[self.axis] = self.order + nans_to_prepend = np.full(nan_shape, np.nan, dtype=np.float64) + + Xt = np.concatenate([nans_to_prepend, diff_X], axis=self.axis) + + return Xt diff --git a/aeon/transformations/series/tests/test_diff.py b/aeon/transformations/series/tests/test_diff.py new file mode 100644 index 0000000000..e595991a56 --- /dev/null +++ b/aeon/transformations/series/tests/test_diff.py @@ -0,0 +1,28 @@ +"""Tests for Difference transformation.""" + +import numpy as np + +from aeon.transformations.series._diff import DifferenceTransformer + + +def test_diff(): + """Tests basic first and second order differencing.""" + X = np.array([[1.0, 4.0, 9.0, 16.0, 25.0, 36.0]]) + + dt1 = DifferenceTransformer(order=1) + Xt1 = dt1.fit_transform(X) + expected1 = np.array([[np.nan, 3.0, 5.0, 7.0, 9.0, 11.0]]) + + assert Xt1.shape == X.shape, "Shape mismatch for order 1" + np.testing.assert_allclose( + Xt1, expected1, equal_nan=True, err_msg="Value mismatch for order 1" + ) + + dt2 = DifferenceTransformer(order=2) + Xt2 = dt2.fit_transform(X) + expected2 = np.array([[np.nan, np.nan, 2.0, 2.0, 2.0, 2.0]]) + + assert Xt2.shape == X.shape, "Shape mismatch for order 2" + np.testing.assert_allclose( + Xt2, expected2, equal_nan=True, err_msg="Value mismatch for order 2" + ) From 321fdc2ffc3d3ef93e7888137855855f1e0f94fd Mon Sep 17 00:00:00 2001 From: tina Date: Sun, 6 Apr 2025 15:59:55 +0800 Subject: [PATCH 2/4] add myself to .all-contributorsrc --- .all-contributorsrc | 10 ++++++++++ aeon/transformations/series/_diff.py | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.all-contributorsrc b/.all-contributorsrc index cb0670d31a..0a909a2076 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -2683,6 +2683,16 @@ "contributions": [ "code" ] + }, + { + "login": "TinaJin0228", + "name": "Tina Jin", + "avatar_url": "https://avatars.githubusercontent.com/TinaJin0228", + "profile": "https://github.com/TinaJin0228", + "contributions": [ + "code", + "doc" + ] } ], "commitType": "docs" diff --git a/aeon/transformations/series/_diff.py b/aeon/transformations/series/_diff.py index 029ecd8561..ba3943cb4e 100644 --- a/aeon/transformations/series/_diff.py +++ b/aeon/transformations/series/_diff.py @@ -2,7 +2,7 @@ from aeon.transformations.series.base import BaseSeriesTransformer -__maintainer__ = [] +__maintainer__ = ["TinaJin0228"] __all__ = ["DifferenceTransformer"] From 8f78ec98e57b49640bd59f5570f35efb9fea7e67 Mon Sep 17 00:00:00 2001 From: tina Date: Sun, 6 Apr 2025 16:07:24 +0800 Subject: [PATCH 3/4] small modification --- aeon/transformations/series/_diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/transformations/series/_diff.py b/aeon/transformations/series/_diff.py index ba3943cb4e..0580538267 100644 --- a/aeon/transformations/series/_diff.py +++ b/aeon/transformations/series/_diff.py @@ -2,7 +2,7 @@ from aeon.transformations.series.base import BaseSeriesTransformer -__maintainer__ = ["TinaJin0228"] +__maintainer__ = ["Tina Jin"] __all__ = ["DifferenceTransformer"] From 606110cd8600aef319339ec4e9590941f1facaf5 Mon Sep 17 00:00:00 2001 From: tina Date: Sun, 13 Apr 2025 22:30:09 +0800 Subject: [PATCH 4/4] modify according to reviews --- aeon/transformations/series/_diff.py | 75 +++++++------------ .../transformations/series/tests/test_diff.py | 17 ++++- 2 files changed, 40 insertions(+), 52 deletions(-) diff --git a/aeon/transformations/series/_diff.py b/aeon/transformations/series/_diff.py index 0580538267..9c2240f437 100644 --- a/aeon/transformations/series/_diff.py +++ b/aeon/transformations/series/_diff.py @@ -12,24 +12,22 @@ class DifferenceTransformer(BaseSeriesTransformer): Transforms a time series X into a series Y representing the difference calculated `order` times. + + The time series are supposed to be all in rows, + with shape (n_channels, n_timepoints) + - Order 1: Y[t] = X[t] - X[t-1] - Order 2: Y[t] = (X[t] - X[t-1]) - (X[t-1] - X[t-2]) = X[t] - 2*X[t-1] + X[t-2] - ... and so on. - The first `order` element(s) of the transformed series along the time axis - will be NaN, so that the output series will have the same shape as the input series. + The transformed series will be shorter than the input series by `order` + elements along the time axis. Parameters ---------- order : int, default=1 The order of differencing. Must be a positive integer. - axis : int, default=1 - The axis along which the difference is computed. Assumed to be the - time axis. - If `axis == 0`, assumes shape `(n_timepoints, n_channels)`. - If `axis == 1`, assumes shape `(n_channels, n_timepoints)`. - Notes ----- This transformer assumes the input series does not contain NaN values where @@ -39,34 +37,24 @@ class DifferenceTransformer(BaseSeriesTransformer): -------- >>> import numpy as np >>> from aeon.transformations.series._diff import DifferenceTransformer - >>> X1 = np.array([[1, 3, 2, 5, 4, 7, 6, 9, 8, 10]]) + >>> X1 = np.array([[1, 3, 2, 5, 4, 7, 6, 9, 8, 10]]) # Shape (1, 10) >>> dt = DifferenceTransformer() >>> Xt1 = dt.fit_transform(X1) - >>> print(Xt1) - [[nan 2. -1. 3. -1. 3. -1. 3. -1. 2.]] + >>> print(Xt1) # Shape (1, 9) + [[ 2 -1 3 -1 3 -1 3 -1 2]] - >>> X2 = np.array([[1, 3, 2, 5, 4, 7, 6, 9, 8, 10]]) + >>> X2 = np.array([[1, 3, 2, 5, 4, 7, 6, 9, 8, 10]]) # Shape (1, 10) >>> dt2 = DifferenceTransformer(order=2) >>> Xt2 = dt2.fit_transform(X2) - >>> print(Xt2) - [[nan nan -3. 4. -4. 4. -4. 4. -4. 3.]] + >>> print(Xt2) # Shape (1, 8) + [[-3 4 -4 4 -4 4 -4 3]] - >>> X3 = np.array([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]]) + >>> X3 = np.array([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]]) # Shape (2, 5) >>> dt = DifferenceTransformer() >>> Xt3 = dt.fit_transform(X3) - >>> print(Xt3) - [[nan 1. 1. 1. 1.] - [nan -1. -1. -1. -1.]] - - >>> X4 = np.array([[1, 5], [2, 4], [3, 3], [4, 2], [5, 1]]) - >>> dt_axis0 = DifferenceTransformer(axis=0) - >>> Xt4 = dt_axis0.fit_transform(X4, axis=0) - >>> print(Xt4) - [[nan nan] - [ 1. -1.] - [ 1. -1.] - [ 1. -1.] - [ 1. -1.]] + >>> print(Xt3) # Shape (2, 4) + [[ 1 1 1 1] + [-1 -1 -1 -1]] """ _tags = { @@ -75,11 +63,9 @@ class DifferenceTransformer(BaseSeriesTransformer): "fit_is_empty": True, } - def __init__(self, order=1, axis=1): - if not isinstance(order, int) or order < 1: - raise ValueError(f"`order` must be a positive integer, but got {order}") + def __init__(self, order=1): self.order = order - super().__init__(axis=axis) + super().__init__(axis=1) def _transform(self, X, y=None): """ @@ -87,29 +73,22 @@ def _transform(self, X, y=None): Parameters ---------- - X : np.ndarray - + X : Time series to transform. With shape (n_channels, n_timepoints). y : ignored argument for interface compatibility Returns ------- Xt : np.ndarray - Transformed version of X with the same shape, containing the - n-th order difference. - The first `order` elements along the time axis are NaN. + Transformed version of X, containing the n-th order difference. + Shape will be (n_channels, n_timepoints - order). """ - diff_X = np.diff(X, n=self.order, axis=self.axis) - - # Check if diff_X is integer type. - # If so, cast to float to allow inserting np.nan. - if not np.issubdtype(diff_X.dtype, np.floating): - diff_X = diff_X.astype(np.float64) + if not isinstance(self.order, int) or self.order < 1: + raise ValueError( + f"`order` must be a positive integer, but got {self.order}" + ) - # Insert the NaN at the beginning - nan_shape = list(X.shape) - nan_shape[self.axis] = self.order - nans_to_prepend = np.full(nan_shape, np.nan, dtype=np.float64) + diff_X = np.diff(X, n=self.order, axis=1) - Xt = np.concatenate([nans_to_prepend, diff_X], axis=self.axis) + Xt = diff_X return Xt diff --git a/aeon/transformations/series/tests/test_diff.py b/aeon/transformations/series/tests/test_diff.py index e595991a56..9f54fccf7d 100644 --- a/aeon/transformations/series/tests/test_diff.py +++ b/aeon/transformations/series/tests/test_diff.py @@ -11,18 +11,27 @@ def test_diff(): dt1 = DifferenceTransformer(order=1) Xt1 = dt1.fit_transform(X) - expected1 = np.array([[np.nan, 3.0, 5.0, 7.0, 9.0, 11.0]]) + expected1 = np.array([[3.0, 5.0, 7.0, 9.0, 11.0]]) - assert Xt1.shape == X.shape, "Shape mismatch for order 1" np.testing.assert_allclose( Xt1, expected1, equal_nan=True, err_msg="Value mismatch for order 1" ) dt2 = DifferenceTransformer(order=2) Xt2 = dt2.fit_transform(X) - expected2 = np.array([[np.nan, np.nan, 2.0, 2.0, 2.0, 2.0]]) + expected2 = np.array([[2.0, 2.0, 2.0, 2.0]]) - assert Xt2.shape == X.shape, "Shape mismatch for order 2" np.testing.assert_allclose( Xt2, expected2, equal_nan=True, err_msg="Value mismatch for order 2" ) + + Y = np.array([[1, 2, 3, 4], [5, 3, 1, 8]]) + + Yt1 = dt1.fit_transform(Y) + expected3 = np.array([[1, 1, 1], [-2, -2, 7]]) + np.testing.assert_allclose( + Yt1, + expected3, + equal_nan=True, + err_msg="Value mismatch for order 1,multivariate", + )