Merge branch 'main' into pla

Moonzyyy · web-flow · commit dccddb6f4440 · 2024-08-16T14:39:40.000+01:00
diff --git a/aeon/classification/compose/tests/test_ensemble.py b/aeon/classification/compose/tests/test_ensemble.py
@@ -63,7 +63,9 @@ def test_classifier_ensemble(classifiers):
 )
 def test_classifier_ensemble_weights(weights):
     """Test classifier ensemble weight options."""
-    X_train, y_train = make_example_3d_numpy(n_cases=10, n_timepoints=12)
+    X_train, y_train = make_example_3d_numpy(
+        n_cases=10, n_timepoints=12, min_cases_per_label=2
+    )
     X_test, _ = make_example_3d_numpy(n_cases=10, n_timepoints=12)
 
     ensemble = ClassifierEnsemble(classifiers=mixed_ensemble, weights=weights)
diff --git a/aeon/clustering/__init__.py b/aeon/clustering/__init__.py
@@ -6,6 +6,7 @@
     "TimeSeriesCLARA",
     "TimeSeriesCLARANS",
     "TimeSeriesKMeans",
+    "TimeSeriesKShape",
     "TimeSeriesKShapes",
     "TimeSeriesKernelKMeans",
     "DummyClusterer",
@@ -15,6 +16,7 @@
 from aeon.clustering._clarans import TimeSeriesCLARANS
 from aeon.clustering._k_means import TimeSeriesKMeans
 from aeon.clustering._k_medoids import TimeSeriesKMedoids
+from aeon.clustering._k_shape import TimeSeriesKShape
 from aeon.clustering._k_shapes import TimeSeriesKShapes
 from aeon.clustering._kernel_k_means import TimeSeriesKernelKMeans
 from aeon.clustering.base import BaseClusterer
diff --git a/aeon/clustering/_k_shape.py b/aeon/clustering/_k_shape.py
@@ -0,0 +1,186 @@
+"""Time series kshapes."""
+
+from typing import Union
+
+import numpy as np
+from numpy.random import RandomState
+
+from aeon.clustering.base import BaseClusterer
+from aeon.utils.validation._dependencies import _check_soft_dependencies
+
+
+class TimeSeriesKShape(BaseClusterer):
+    """Kshape algorithm: wrapper of the ``tslearn`` implementation.
+
+    Parameters
+    ----------
+    n_clusters: int, default=8
+        The number of clusters to form as well as the number of
+        centroids to generate.
+    init_algorithm: str or np.ndarray, default='random'
+        Method for initializing cluster centres. Any of the following are valid:
+        ['random']. Or a np.ndarray of shape (n_clusters, n_channels, n_timepoints)
+        and gives the initial cluster centres.
+    n_init: int, default=10
+        Number of times the k-means algorithm will be run with different
+        centroid seeds. The final result will be the best output of n_init
+        consecutive runs in terms of inertia.
+    max_iter: int, default=30
+        Maximum number of iterations of the k-means algorithm for a single
+        run.
+    tol: float, default=1e-4
+        Relative tolerance with regards to Frobenius norm of the difference
+        in the cluster centres of two consecutive iterations to declare
+        convergence.
+    verbose: bool, default=False
+        Verbosity mode.
+    random_state: int or np.random.RandomState instance or None, default=None
+        Determines random number generation for centroid initialization.
+
+    Attributes
+    ----------
+    labels_: np.ndarray (1d array of shape (n_cases,))
+        Labels that is the index each time series belongs to.
+    inertia_: float
+        Sum of squared distances of samples to their closest cluster centre, weighted by
+        the sample weights if provided.
+    n_iter_: int
+        Number of iterations run.
+
+    References
+    ----------
+    .. [1] John Paparrizos and Luis Gravano. 2016.
+       K-Shape: Efficient and Accurate Clustering of Time Series.
+       SIGMOD Rec. 45, 1 (March 2016), 69–76.
+       https://doi.org/10.1145/2949741.2949758
+
+    Examples
+    --------
+    >>> from aeon.clustering import TimeSeriesKShape
+    >>> from aeon.datasets import load_basic_motions
+    >>> # Load data
+    >>> X_train, y_train = load_basic_motions(split="TRAIN")[0:10]
+    >>> X_test, y_test = load_basic_motions(split="TEST")[0:10]
+    >>> # Example of KShapes clustering
+    >>> ks = TimeSeriesKShape(n_clusters=3, random_state=1)  # doctest: +SKIP
+    >>> ks.fit(X_train)  # doctest: +SKIP
+    TimeSeriesKShape(n_clusters=3, random_state=1)
+    >>> preds = ks.predict(X_test)  # doctest: +SKIP
+    """
+
+    _tags = {
+        "capability:multivariate": True,
+        "python_dependencies": "tslearn",
+    }
+
+    def __init__(
+        self,
+        n_clusters: int = 8,
+        init_algorithm: Union[str, np.ndarray] = "random",
+        n_init: int = 10,
+        max_iter: int = 300,
+        tol: float = 1e-4,
+        verbose: bool = False,
+        random_state: Union[int, RandomState] = None,
+    ):
+        self.init_algorithm = init_algorithm
+        self.n_init = n_init
+        self.max_iter = max_iter
+        self.tol = tol
+        self.verbose = verbose
+        self.random_state = random_state
+
+        self.cluster_centers_ = None
+        self.labels_ = None
+        self.inertia_ = None
+        self.n_iter_ = 0
+
+        self._tslearn_k_shapes = None
+
+        super().__init__(n_clusters=n_clusters)
+
+    def _fit(self, X, y=None):
+        """Fit time series clusterer to training data.
+
+        Parameters
+        ----------
+        X: np.ndarray, of shape (n_cases, n_channels, n_timepoints) or
+                (n_cases, n_timepoints)
+            A collection of time series instances.
+        y: ignored, exists for API consistency reasons.
+
+        Returns
+        -------
+        self:
+            Fitted estimator.
+        """
+        _check_soft_dependencies("tslearn", severity="error")
+        from tslearn.clustering import KShape
+
+        self._tslearn_k_shapes = KShape(
+            n_clusters=self.n_clusters,
+            max_iter=self.max_iter,
+            tol=self.tol,
+            random_state=self.random_state,
+            n_init=self.n_init,
+            verbose=self.verbose,
+            init=self.init_algorithm,
+        )
+
+        _X = X.swapaxes(1, 2)
+
+        self._tslearn_k_shapes.fit(_X)
+        self._cluster_centers = self._tslearn_k_shapes.cluster_centers_
+        self.labels_ = self._tslearn_k_shapes.labels_
+        self.inertia_ = self._tslearn_k_shapes.inertia_
+        self.n_iter_ = self._tslearn_k_shapes.n_iter_
+
+    def _predict(self, X, y=None) -> np.ndarray:
+        """Predict the closest cluster each sample in X belongs to.
+
+        Parameters
+        ----------
+        X: np.ndarray, of shape (n_cases, n_channels, n_timepoints) or
+                (n_cases, n_timepoints)
+            A collection of time series instances.
+        y: ignored, exists for API consistency reasons.
+
+        Returns
+        -------
+        np.ndarray (1d array of shape (n_cases,))
+            Index of the cluster each time series in X belongs to.
+        """
+        _X = X.swapaxes(1, 2)
+        return self._tslearn_k_shapes.predict(_X)
+
+    @classmethod
+    def get_test_params(cls, parameter_set="default"):
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+
+
+        Returns
+        -------
+        params : dict or list of dict, default={}
+            Parameters to create testing instances of the class
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+            `create_test_instance` uses the first (or only) dictionary in `params`
+        """
+        return {
+            "n_clusters": 2,
+            "init_algorithm": "random",
+            "n_init": 1,
+            "max_iter": 1,
+            "tol": 1e-4,
+            "verbose": False,
+            "random_state": 1,
+        }
+
+    def _score(self, X, y=None):
+        return np.abs(self.inertia_)
diff --git a/aeon/clustering/_k_shapes.py b/aeon/clustering/_k_shapes.py
@@ -3,12 +3,20 @@
 from typing import Union
 
 import numpy as np
+from deprecated.sphinx import deprecated
 from numpy.random import RandomState
 
 from aeon.clustering.base import BaseClusterer
 from aeon.utils.validation._dependencies import _check_soft_dependencies
 
 
+# TODO: remove in v1.0.0
+@deprecated(
+    version="1.0.0",
+    reason="TimeSeriesKShapes class has been renamed to TimeSeriesKShape. "
+    "The TimeSeriesKShapes version will be removed in version 1.0.0.",
+    category=FutureWarning,
+)
 class TimeSeriesKShapes(BaseClusterer):
     """Kshape algorithm: wrapper of the ``tslearn`` implementation.
 
diff --git a/aeon/clustering/tests/test_k_shape.py b/aeon/clustering/tests/test_k_shape.py
@@ -1,9 +1,9 @@
-"""Tests for time series k-shapes."""
+"""Tests for time series k-shape."""
 
 import numpy as np
 import pytest
 
-from aeon.clustering._k_shapes import TimeSeriesKShapes
+from aeon.clustering._k_shape import TimeSeriesKShape
 from aeon.datasets import load_basic_motions
 from aeon.utils.validation._dependencies import _check_estimator_deps
 
@@ -18,7 +18,7 @@
 
 
 @pytest.mark.skipif(
-    not _check_estimator_deps(TimeSeriesKShapes, severity="none"),
+    not _check_estimator_deps(TimeSeriesKShape, severity="none"),
     reason="skip test if required soft dependencies not available",
 )
 def test_kshapes():
@@ -28,7 +28,7 @@ def test_kshapes():
     X_train, y_train = load_basic_motions(split="train")
     X_test, y_test = load_basic_motions(split="test")
 
-    kshapes = TimeSeriesKShapes(random_state=1, n_clusters=3)
+    kshapes = TimeSeriesKShape(random_state=1, n_clusters=3)
     kshapes.fit(X_train[0:max_train])
     test_shape_result = kshapes.predict(X_test[0:max_train])
     score = kshapes.score(X_test[0:max_train])
diff --git a/aeon/testing/data_generation/_collection.py b/aeon/testing/data_generation/_collection.py
@@ -23,6 +23,7 @@ def make_example_3d_numpy(
     n_channels: int = 1,
     n_timepoints: int = 12,
     n_labels: int = 2,
+    min_cases_per_label: int = 1,
     regression_target: bool = False,
     random_state: Union[int, None] = None,
     return_y: bool = True,
@@ -44,6 +45,8 @@ def make_example_3d_numpy(
         The number of features/series length to generate.
     n_labels : int
         The number of unique labels to generate.
+    min_cases_per_label : int
+        The minimum number of samples per unique label.
     regression_target : bool
         If True, the target will be a scalar float, otherwise an int.
     random_state : int or None
@@ -85,9 +88,11 @@ def make_example_3d_numpy(
     y = X[:, 0, 0].astype(int)
 
     for i in range(n_labels):
-        if len(y) > i:
-            X[i, 0, 0] = i
-            y[i] = i
+        for j in range(min_cases_per_label):
+            idx = i * min_cases_per_label + j
+            if len(y) > idx:
+                X[idx, 0, 0] = i
+                y[idx] = i
     X = X * (y[:, None, None] + 1)
 
     if regression_target:
@@ -103,6 +108,7 @@ def make_example_2d_numpy_collection(
     n_cases: int = 10,
     n_timepoints: int = 8,
     n_labels: int = 2,
+    min_cases_per_label: int = 1,
     regression_target: bool = False,
     random_state: Union[int, None] = None,
     return_y: bool = True,
@@ -122,6 +128,8 @@ def make_example_2d_numpy_collection(
         The number of features/series length to generate.
     n_labels : int
         The number of unique labels to generate.
+    min_cases_per_label : int
+        The minimum number of samples per unique label.
     regression_target : bool
         If True, the target will be a scalar float, otherwise an int.
     random_state : int or None
@@ -159,9 +167,11 @@ def make_example_2d_numpy_collection(
     y = X[:, 0].astype(int)
 
     for i in range(n_labels):
-        if len(y) > i:
-            X[i, 0] = i
-            y[i] = i
+        for j in range(min_cases_per_label):
+            idx = i * min_cases_per_label + j
+            if len(y) > idx:
+                X[idx, 0] = i
+                y[idx] = i
     X = X * (y[:, None] + 1)
 
     if regression_target:
diff --git a/docs/api_reference/clustering.rst b/docs/api_reference/clustering.rst
@@ -34,6 +34,7 @@ Clustering Algorithms
     TimeSeriesKMeans
     TimeSeriesKMedoids
     TimeSeriesKShapes
+    TimeSeriesKShape
     TimeSeriesKernelKMeans
     TimeSeriesCLARA
     TimeSeriesCLARANS