|
| 1 | +"""Time series kshapes.""" |
| 2 | + |
| 3 | +from typing import Union |
| 4 | + |
| 5 | +import numpy as np |
| 6 | +from numpy.random import RandomState |
| 7 | + |
| 8 | +from aeon.clustering.base import BaseClusterer |
| 9 | +from aeon.utils.validation._dependencies import _check_soft_dependencies |
| 10 | + |
| 11 | + |
| 12 | +class TimeSeriesKShape(BaseClusterer): |
| 13 | + """Kshape algorithm: wrapper of the ``tslearn`` implementation. |
| 14 | +
|
| 15 | + Parameters |
| 16 | + ---------- |
| 17 | + n_clusters: int, default=8 |
| 18 | + The number of clusters to form as well as the number of |
| 19 | + centroids to generate. |
| 20 | + init_algorithm: str or np.ndarray, default='random' |
| 21 | + Method for initializing cluster centres. Any of the following are valid: |
| 22 | + ['random']. Or a np.ndarray of shape (n_clusters, n_channels, n_timepoints) |
| 23 | + and gives the initial cluster centres. |
| 24 | + n_init: int, default=10 |
| 25 | + Number of times the k-means algorithm will be run with different |
| 26 | + centroid seeds. The final result will be the best output of n_init |
| 27 | + consecutive runs in terms of inertia. |
| 28 | + max_iter: int, default=30 |
| 29 | + Maximum number of iterations of the k-means algorithm for a single |
| 30 | + run. |
| 31 | + tol: float, default=1e-4 |
| 32 | + Relative tolerance with regards to Frobenius norm of the difference |
| 33 | + in the cluster centres of two consecutive iterations to declare |
| 34 | + convergence. |
| 35 | + verbose: bool, default=False |
| 36 | + Verbosity mode. |
| 37 | + random_state: int or np.random.RandomState instance or None, default=None |
| 38 | + Determines random number generation for centroid initialization. |
| 39 | +
|
| 40 | + Attributes |
| 41 | + ---------- |
| 42 | + labels_: np.ndarray (1d array of shape (n_cases,)) |
| 43 | + Labels that is the index each time series belongs to. |
| 44 | + inertia_: float |
| 45 | + Sum of squared distances of samples to their closest cluster centre, weighted by |
| 46 | + the sample weights if provided. |
| 47 | + n_iter_: int |
| 48 | + Number of iterations run. |
| 49 | +
|
| 50 | + References |
| 51 | + ---------- |
| 52 | + .. [1] John Paparrizos and Luis Gravano. 2016. |
| 53 | + K-Shape: Efficient and Accurate Clustering of Time Series. |
| 54 | + SIGMOD Rec. 45, 1 (March 2016), 69–76. |
| 55 | + https://doi.org/10.1145/2949741.2949758 |
| 56 | +
|
| 57 | + Examples |
| 58 | + -------- |
| 59 | + >>> from aeon.clustering import TimeSeriesKShape |
| 60 | + >>> from aeon.datasets import load_basic_motions |
| 61 | + >>> # Load data |
| 62 | + >>> X_train, y_train = load_basic_motions(split="TRAIN")[0:10] |
| 63 | + >>> X_test, y_test = load_basic_motions(split="TEST")[0:10] |
| 64 | + >>> # Example of KShapes clustering |
| 65 | + >>> ks = TimeSeriesKShape(n_clusters=3, random_state=1) # doctest: +SKIP |
| 66 | + >>> ks.fit(X_train) # doctest: +SKIP |
| 67 | + TimeSeriesKShape(n_clusters=3, random_state=1) |
| 68 | + >>> preds = ks.predict(X_test) # doctest: +SKIP |
| 69 | + """ |
| 70 | + |
| 71 | + _tags = { |
| 72 | + "capability:multivariate": True, |
| 73 | + "python_dependencies": "tslearn", |
| 74 | + } |
| 75 | + |
| 76 | + def __init__( |
| 77 | + self, |
| 78 | + n_clusters: int = 8, |
| 79 | + init_algorithm: Union[str, np.ndarray] = "random", |
| 80 | + n_init: int = 10, |
| 81 | + max_iter: int = 300, |
| 82 | + tol: float = 1e-4, |
| 83 | + verbose: bool = False, |
| 84 | + random_state: Union[int, RandomState] = None, |
| 85 | + ): |
| 86 | + self.init_algorithm = init_algorithm |
| 87 | + self.n_init = n_init |
| 88 | + self.max_iter = max_iter |
| 89 | + self.tol = tol |
| 90 | + self.verbose = verbose |
| 91 | + self.random_state = random_state |
| 92 | + |
| 93 | + self.cluster_centers_ = None |
| 94 | + self.labels_ = None |
| 95 | + self.inertia_ = None |
| 96 | + self.n_iter_ = 0 |
| 97 | + |
| 98 | + self._tslearn_k_shapes = None |
| 99 | + |
| 100 | + super().__init__(n_clusters=n_clusters) |
| 101 | + |
| 102 | + def _fit(self, X, y=None): |
| 103 | + """Fit time series clusterer to training data. |
| 104 | +
|
| 105 | + Parameters |
| 106 | + ---------- |
| 107 | + X: np.ndarray, of shape (n_cases, n_channels, n_timepoints) or |
| 108 | + (n_cases, n_timepoints) |
| 109 | + A collection of time series instances. |
| 110 | + y: ignored, exists for API consistency reasons. |
| 111 | +
|
| 112 | + Returns |
| 113 | + ------- |
| 114 | + self: |
| 115 | + Fitted estimator. |
| 116 | + """ |
| 117 | + _check_soft_dependencies("tslearn", severity="error") |
| 118 | + from tslearn.clustering import KShape |
| 119 | + |
| 120 | + self._tslearn_k_shapes = KShape( |
| 121 | + n_clusters=self.n_clusters, |
| 122 | + max_iter=self.max_iter, |
| 123 | + tol=self.tol, |
| 124 | + random_state=self.random_state, |
| 125 | + n_init=self.n_init, |
| 126 | + verbose=self.verbose, |
| 127 | + init=self.init_algorithm, |
| 128 | + ) |
| 129 | + |
| 130 | + _X = X.swapaxes(1, 2) |
| 131 | + |
| 132 | + self._tslearn_k_shapes.fit(_X) |
| 133 | + self._cluster_centers = self._tslearn_k_shapes.cluster_centers_ |
| 134 | + self.labels_ = self._tslearn_k_shapes.labels_ |
| 135 | + self.inertia_ = self._tslearn_k_shapes.inertia_ |
| 136 | + self.n_iter_ = self._tslearn_k_shapes.n_iter_ |
| 137 | + |
| 138 | + def _predict(self, X, y=None) -> np.ndarray: |
| 139 | + """Predict the closest cluster each sample in X belongs to. |
| 140 | +
|
| 141 | + Parameters |
| 142 | + ---------- |
| 143 | + X: np.ndarray, of shape (n_cases, n_channels, n_timepoints) or |
| 144 | + (n_cases, n_timepoints) |
| 145 | + A collection of time series instances. |
| 146 | + y: ignored, exists for API consistency reasons. |
| 147 | +
|
| 148 | + Returns |
| 149 | + ------- |
| 150 | + np.ndarray (1d array of shape (n_cases,)) |
| 151 | + Index of the cluster each time series in X belongs to. |
| 152 | + """ |
| 153 | + _X = X.swapaxes(1, 2) |
| 154 | + return self._tslearn_k_shapes.predict(_X) |
| 155 | + |
| 156 | + @classmethod |
| 157 | + def get_test_params(cls, parameter_set="default"): |
| 158 | + """Return testing parameter settings for the estimator. |
| 159 | +
|
| 160 | + Parameters |
| 161 | + ---------- |
| 162 | + parameter_set : str, default="default" |
| 163 | + Name of the set of test parameters to return, for use in tests. If no |
| 164 | + special parameters are defined for a value, will return `"default"` set. |
| 165 | +
|
| 166 | +
|
| 167 | + Returns |
| 168 | + ------- |
| 169 | + params : dict or list of dict, default={} |
| 170 | + Parameters to create testing instances of the class |
| 171 | + Each dict are parameters to construct an "interesting" test instance, i.e., |
| 172 | + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. |
| 173 | + `create_test_instance` uses the first (or only) dictionary in `params` |
| 174 | + """ |
| 175 | + return { |
| 176 | + "n_clusters": 2, |
| 177 | + "init_algorithm": "random", |
| 178 | + "n_init": 1, |
| 179 | + "max_iter": 1, |
| 180 | + "tol": 1e-4, |
| 181 | + "verbose": False, |
| 182 | + "random_state": 1, |
| 183 | + } |
| 184 | + |
| 185 | + def _score(self, X, y=None): |
| 186 | + return np.abs(self.inertia_) |
0 commit comments