Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Add collection transformer for normalisation #2005

Merged
merged 28 commits into from
Sep 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
b9427bf
Added Methods for normalisation
aryanpola Aug 23, 2024
1694bed
Automatic `pre-commit` fixes
aryanpola Aug 23, 2024
96a9075
pre-commit fixes
aryanpola Aug 23, 2024
1c9d149
changed file
aryanpola Aug 23, 2024
e8ab775
Automatic `pre-commit` fixes
aryanpola Aug 23, 2024
71a5904
fixes
aryanpola Aug 23, 2024
f362029
Merge remote-tracking branch 'origin/normalise' into normalise
aryanpola Aug 23, 2024
c1dbd3a
Automatic `pre-commit` fixes
aryanpola Aug 23, 2024
3091f74
Requested Changes done
aryanpola Aug 24, 2024
9848732
Merge remote-tracking branch 'origin/normalise' into normalise
aryanpola Aug 24, 2024
2da39c8
Added test for normalise class
aryanpola Aug 25, 2024
4d12e03
Automatic `pre-commit` fixes
aryanpola Aug 25, 2024
85f0745
Added Docstrings
aryanpola Aug 25, 2024
1b6fd4a
Merge remote-tracking branch 'origin/normalise' into normalise
aryanpola Aug 25, 2024
2800858
Merge branch 'main' into normalise
aryanpola Aug 25, 2024
90a708d
Merge branch 'main' into normalise
aryanpola Aug 28, 2024
e19cb24
Transforme to api and changes
aryanpola Sep 2, 2024
38db562
Merge remote-tracking branch 'origin/normalise' into normalise
aryanpola Sep 2, 2024
57f8b9b
Requested changes done
aryanpola Sep 7, 2024
901537e
changes in stadardisation method
aryanpola Sep 16, 2024
a65d166
Automatic `pre-commit` fixes
aryanpola Sep 16, 2024
166c4c6
changes
aryanpola Sep 16, 2024
1df0019
Merge remote-tracking branch 'upstream/main' into normalise
aryanpola Sep 17, 2024
de623e3
Merge remote-tracking branch 'origin/normalise' into normalise
aryanpola Sep 17, 2024
e6229c1
Automatic `pre-commit` fixes
aryanpola Sep 17, 2024
2de75e6
Requested changes made
aryanpola Sep 17, 2024
0fe08d6
Merge remote-tracking branch 'origin/normalise' into normalise
aryanpola Sep 17, 2024
d18d081
deleted unnecessary file
aryanpola Sep 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions aeon/transformations/collection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@
"SlopeTransformer",
"TimeSeriesScaler",
"TruncationTransformer",
"Normalise",
]

from aeon.transformations.collection._collection_wrapper import (
CollectionToSeriesWrapper,
)
from aeon.transformations.collection._downsample import DownsampleTransformer
from aeon.transformations.collection._normalise import Normalise
from aeon.transformations.collection.acf import AutocorrelationFunctionTransformer
from aeon.transformations.collection.ar_coefficient import ARCoefficientTransformer
from aeon.transformations.collection.base import BaseCollectionTransformer
Expand Down
93 changes: 93 additions & 0 deletions aeon/transformations/collection/_normalise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
"""Normalization like z-normalization, standardization and min-max scaling."""

from typing import Optional

import numpy as np

from aeon.transformations.collection.base import BaseCollectionTransformer


class Normalise(BaseCollectionTransformer):
"""Normaliser transformer for collections.

This transformer applies different normalization techniques to time series data,
ensuring that the data is scaled consistently across all samples. It supports
methods such as z-normalization, standardization, and min-max scaling, which are
applied along the timepoints axis (the last axis).

This transformer converts all input data to 3D numpy arrays of shape
(n_cases, n_channels, n_timepoints). Normalization is always applied along the
timepoints axis. For input types that are not already 3D, the transformer handles
the conversion internally.

Parameters
----------
method : str, optional (default="z_norm")
The normalization method to apply.
Supported methods: "z_norm", "standardize", "min_max".

z_norm: Subtracts the mean and divides by the standard deviation
along the specified axis. Used to center the data and standardize its variance,
making it dimensionless. This is useful when comparing datasets with different
units.

standardize: Subtracts the mean along the specified axis. Used to center data.

min_max: Useful when you need to normalize data to a bounded range, which is
important for algorithms that require or perform better with inputs within a
specific range.
"""

_tags = {
"X_inner_type": "numpy3D",
"fit_is_empty": True,
"capability:multivariate": True,
}

def __init__(self, method: str = "z-norm"):
self.method = method
super().__init__()

def _transform(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> np.ndarray:
"""
Transform method to apply the seleted normalisation.

keepdims=True: bool, Retains the reduced axes with size one in the output,
preserving the number of dimensions of the array.

Parameters
----------
X: np.ndarray
The input samples to transform.
y: array-like or list, optional
The class values for X (not used in this method).

Returns
-------
X_transformed : np.ndarray
The normalized data.
"""
# Axis=-1 normalises across all the timepoints in the 3D array
if self.method in {"z_norm"}:
mean = np.mean(X, axis=-1, keepdims=True)
std = np.std(X, axis=-1, keepdims=True)

# Handle cases where std is 0
std_nonzero = np.where(std == 0, 1, std)
return (X - mean) / std_nonzero

if self.method in {"standardize"}:
mean = np.mean(X, axis=-1, keepdims=True)

return X - mean

elif self.method == "min_max":
min_val = np.min(X, axis=-1, keepdims=True)
max_val = np.max(X, axis=-1, keepdims=True)

# Prevent division by zero in case min_val == max_val
range_nonzero = np.where(max_val == min_val, 1, max_val - min_val)
return (X - min_val) / range_nonzero

else:
raise ValueError(f"Unknown normalization method: {self.method}")
87 changes: 87 additions & 0 deletions aeon/transformations/collection/tests/test_normalise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""
The module contains tests for the Normalise class.

It includes tests for different normalization methods such as
z-normalization, standardization, and min-max scaling. Additionally,
it tests the behavior of the Normalise class when provided with an
invalid normalization method.
"""

import numpy as np
import pytest

from aeon.transformations.collection._normalise import Normalise


# Test function for z-normalization
def test_z_norm():
"""
Test the z-normalization method of the Normalise class.

This function creates a 3D numpy array, applies
z-normalization using the Normalise class, and asserts
that the transformed data has a mean close to 0 and a
standard deviation close to 1 along the specified axis.
"""
X = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
normaliser = Normalise(method="z_norm")
X_transformed = normaliser._transform(X)

mean = np.mean(X_transformed, axis=-1)
std = np.std(X_transformed, axis=-1)

assert np.allclose(mean, 0)
assert np.allclose(std, 1)


# Test function for standardization
def test_standardize():
"""
Test the standardization method of the Normalise class.

This function creates a 3D numpy array, applies standardization
using the Normalise class, and asserts that the transformed data
has a mean close to 0 and a standard deviation close to 1 along
the specified axis.
"""
X = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
normaliser = Normalise(method="standardize")
X_transformed = normaliser._transform(X)

mean = np.mean(X_transformed, axis=-1)

assert np.allclose(mean, 0)


# Test function for min_max.
def test_min_max():
"""
Test the min-max normalization method of the Normalise class.

This function creates a 3D numpy array, applies min-max normalization
using the Normalise class, and asserts that the transformed data has
a minimum value of 0 and a maximum value of 1 along the specified axis.
"""
X = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
normaliser = Normalise(method="min_max")
X_transformed = normaliser._transform(X)

min_val = np.min(X_transformed, axis=-1)
max_val = np.max(X_transformed, axis=-1)

assert np.allclose(min_val, 0)
assert np.allclose(max_val, 1)


def test_invalid_method():
"""
Tests behavior of Normalise class when an invalid normalization method is provided.

This function creates a 3D numpy array and attempts to apply an invalid
normalization method using the Normalise class. It asserts that a ValueError
is raised with the appropriate error message.
"""
X = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
with pytest.raises(ValueError, match="Unknown normalization method: invalid"):
normaliser = Normalise(method="invalid")
normaliser._transform(X)