Skip to content

Commit dccddb6

Browse files
authored
Merge branch 'main' into pla
2 parents 07560d9 + 9467176 commit dccddb6

File tree

7 files changed

+220
-11
lines changed

7 files changed

+220
-11
lines changed

aeon/classification/compose/tests/test_ensemble.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ def test_classifier_ensemble(classifiers):
6363
)
6464
def test_classifier_ensemble_weights(weights):
6565
"""Test classifier ensemble weight options."""
66-
X_train, y_train = make_example_3d_numpy(n_cases=10, n_timepoints=12)
66+
X_train, y_train = make_example_3d_numpy(
67+
n_cases=10, n_timepoints=12, min_cases_per_label=2
68+
)
6769
X_test, _ = make_example_3d_numpy(n_cases=10, n_timepoints=12)
6870

6971
ensemble = ClassifierEnsemble(classifiers=mixed_ensemble, weights=weights)

aeon/clustering/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"TimeSeriesCLARA",
77
"TimeSeriesCLARANS",
88
"TimeSeriesKMeans",
9+
"TimeSeriesKShape",
910
"TimeSeriesKShapes",
1011
"TimeSeriesKernelKMeans",
1112
"DummyClusterer",
@@ -15,6 +16,7 @@
1516
from aeon.clustering._clarans import TimeSeriesCLARANS
1617
from aeon.clustering._k_means import TimeSeriesKMeans
1718
from aeon.clustering._k_medoids import TimeSeriesKMedoids
19+
from aeon.clustering._k_shape import TimeSeriesKShape
1820
from aeon.clustering._k_shapes import TimeSeriesKShapes
1921
from aeon.clustering._kernel_k_means import TimeSeriesKernelKMeans
2022
from aeon.clustering.base import BaseClusterer

aeon/clustering/_k_shape.py

+186
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
"""Time series kshapes."""
2+
3+
from typing import Union
4+
5+
import numpy as np
6+
from numpy.random import RandomState
7+
8+
from aeon.clustering.base import BaseClusterer
9+
from aeon.utils.validation._dependencies import _check_soft_dependencies
10+
11+
12+
class TimeSeriesKShape(BaseClusterer):
13+
"""Kshape algorithm: wrapper of the ``tslearn`` implementation.
14+
15+
Parameters
16+
----------
17+
n_clusters: int, default=8
18+
The number of clusters to form as well as the number of
19+
centroids to generate.
20+
init_algorithm: str or np.ndarray, default='random'
21+
Method for initializing cluster centres. Any of the following are valid:
22+
['random']. Or a np.ndarray of shape (n_clusters, n_channels, n_timepoints)
23+
and gives the initial cluster centres.
24+
n_init: int, default=10
25+
Number of times the k-means algorithm will be run with different
26+
centroid seeds. The final result will be the best output of n_init
27+
consecutive runs in terms of inertia.
28+
max_iter: int, default=30
29+
Maximum number of iterations of the k-means algorithm for a single
30+
run.
31+
tol: float, default=1e-4
32+
Relative tolerance with regards to Frobenius norm of the difference
33+
in the cluster centres of two consecutive iterations to declare
34+
convergence.
35+
verbose: bool, default=False
36+
Verbosity mode.
37+
random_state: int or np.random.RandomState instance or None, default=None
38+
Determines random number generation for centroid initialization.
39+
40+
Attributes
41+
----------
42+
labels_: np.ndarray (1d array of shape (n_cases,))
43+
Labels that is the index each time series belongs to.
44+
inertia_: float
45+
Sum of squared distances of samples to their closest cluster centre, weighted by
46+
the sample weights if provided.
47+
n_iter_: int
48+
Number of iterations run.
49+
50+
References
51+
----------
52+
.. [1] John Paparrizos and Luis Gravano. 2016.
53+
K-Shape: Efficient and Accurate Clustering of Time Series.
54+
SIGMOD Rec. 45, 1 (March 2016), 69–76.
55+
https://doi.org/10.1145/2949741.2949758
56+
57+
Examples
58+
--------
59+
>>> from aeon.clustering import TimeSeriesKShape
60+
>>> from aeon.datasets import load_basic_motions
61+
>>> # Load data
62+
>>> X_train, y_train = load_basic_motions(split="TRAIN")[0:10]
63+
>>> X_test, y_test = load_basic_motions(split="TEST")[0:10]
64+
>>> # Example of KShapes clustering
65+
>>> ks = TimeSeriesKShape(n_clusters=3, random_state=1) # doctest: +SKIP
66+
>>> ks.fit(X_train) # doctest: +SKIP
67+
TimeSeriesKShape(n_clusters=3, random_state=1)
68+
>>> preds = ks.predict(X_test) # doctest: +SKIP
69+
"""
70+
71+
_tags = {
72+
"capability:multivariate": True,
73+
"python_dependencies": "tslearn",
74+
}
75+
76+
def __init__(
77+
self,
78+
n_clusters: int = 8,
79+
init_algorithm: Union[str, np.ndarray] = "random",
80+
n_init: int = 10,
81+
max_iter: int = 300,
82+
tol: float = 1e-4,
83+
verbose: bool = False,
84+
random_state: Union[int, RandomState] = None,
85+
):
86+
self.init_algorithm = init_algorithm
87+
self.n_init = n_init
88+
self.max_iter = max_iter
89+
self.tol = tol
90+
self.verbose = verbose
91+
self.random_state = random_state
92+
93+
self.cluster_centers_ = None
94+
self.labels_ = None
95+
self.inertia_ = None
96+
self.n_iter_ = 0
97+
98+
self._tslearn_k_shapes = None
99+
100+
super().__init__(n_clusters=n_clusters)
101+
102+
def _fit(self, X, y=None):
103+
"""Fit time series clusterer to training data.
104+
105+
Parameters
106+
----------
107+
X: np.ndarray, of shape (n_cases, n_channels, n_timepoints) or
108+
(n_cases, n_timepoints)
109+
A collection of time series instances.
110+
y: ignored, exists for API consistency reasons.
111+
112+
Returns
113+
-------
114+
self:
115+
Fitted estimator.
116+
"""
117+
_check_soft_dependencies("tslearn", severity="error")
118+
from tslearn.clustering import KShape
119+
120+
self._tslearn_k_shapes = KShape(
121+
n_clusters=self.n_clusters,
122+
max_iter=self.max_iter,
123+
tol=self.tol,
124+
random_state=self.random_state,
125+
n_init=self.n_init,
126+
verbose=self.verbose,
127+
init=self.init_algorithm,
128+
)
129+
130+
_X = X.swapaxes(1, 2)
131+
132+
self._tslearn_k_shapes.fit(_X)
133+
self._cluster_centers = self._tslearn_k_shapes.cluster_centers_
134+
self.labels_ = self._tslearn_k_shapes.labels_
135+
self.inertia_ = self._tslearn_k_shapes.inertia_
136+
self.n_iter_ = self._tslearn_k_shapes.n_iter_
137+
138+
def _predict(self, X, y=None) -> np.ndarray:
139+
"""Predict the closest cluster each sample in X belongs to.
140+
141+
Parameters
142+
----------
143+
X: np.ndarray, of shape (n_cases, n_channels, n_timepoints) or
144+
(n_cases, n_timepoints)
145+
A collection of time series instances.
146+
y: ignored, exists for API consistency reasons.
147+
148+
Returns
149+
-------
150+
np.ndarray (1d array of shape (n_cases,))
151+
Index of the cluster each time series in X belongs to.
152+
"""
153+
_X = X.swapaxes(1, 2)
154+
return self._tslearn_k_shapes.predict(_X)
155+
156+
@classmethod
157+
def get_test_params(cls, parameter_set="default"):
158+
"""Return testing parameter settings for the estimator.
159+
160+
Parameters
161+
----------
162+
parameter_set : str, default="default"
163+
Name of the set of test parameters to return, for use in tests. If no
164+
special parameters are defined for a value, will return `"default"` set.
165+
166+
167+
Returns
168+
-------
169+
params : dict or list of dict, default={}
170+
Parameters to create testing instances of the class
171+
Each dict are parameters to construct an "interesting" test instance, i.e.,
172+
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
173+
`create_test_instance` uses the first (or only) dictionary in `params`
174+
"""
175+
return {
176+
"n_clusters": 2,
177+
"init_algorithm": "random",
178+
"n_init": 1,
179+
"max_iter": 1,
180+
"tol": 1e-4,
181+
"verbose": False,
182+
"random_state": 1,
183+
}
184+
185+
def _score(self, X, y=None):
186+
return np.abs(self.inertia_)

aeon/clustering/_k_shapes.py

+8
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,20 @@
33
from typing import Union
44

55
import numpy as np
6+
from deprecated.sphinx import deprecated
67
from numpy.random import RandomState
78

89
from aeon.clustering.base import BaseClusterer
910
from aeon.utils.validation._dependencies import _check_soft_dependencies
1011

1112

13+
# TODO: remove in v1.0.0
14+
@deprecated(
15+
version="1.0.0",
16+
reason="TimeSeriesKShapes class has been renamed to TimeSeriesKShape. "
17+
"The TimeSeriesKShapes version will be removed in version 1.0.0.",
18+
category=FutureWarning,
19+
)
1220
class TimeSeriesKShapes(BaseClusterer):
1321
"""Kshape algorithm: wrapper of the ``tslearn`` implementation.
1422

aeon/clustering/tests/test_k_shapes.py aeon/clustering/tests/test_k_shape.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
"""Tests for time series k-shapes."""
1+
"""Tests for time series k-shape."""
22

33
import numpy as np
44
import pytest
55

6-
from aeon.clustering._k_shapes import TimeSeriesKShapes
6+
from aeon.clustering._k_shape import TimeSeriesKShape
77
from aeon.datasets import load_basic_motions
88
from aeon.utils.validation._dependencies import _check_estimator_deps
99

@@ -18,7 +18,7 @@
1818

1919

2020
@pytest.mark.skipif(
21-
not _check_estimator_deps(TimeSeriesKShapes, severity="none"),
21+
not _check_estimator_deps(TimeSeriesKShape, severity="none"),
2222
reason="skip test if required soft dependencies not available",
2323
)
2424
def test_kshapes():
@@ -28,7 +28,7 @@ def test_kshapes():
2828
X_train, y_train = load_basic_motions(split="train")
2929
X_test, y_test = load_basic_motions(split="test")
3030

31-
kshapes = TimeSeriesKShapes(random_state=1, n_clusters=3)
31+
kshapes = TimeSeriesKShape(random_state=1, n_clusters=3)
3232
kshapes.fit(X_train[0:max_train])
3333
test_shape_result = kshapes.predict(X_test[0:max_train])
3434
score = kshapes.score(X_test[0:max_train])

aeon/testing/data_generation/_collection.py

+16-6
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ def make_example_3d_numpy(
2323
n_channels: int = 1,
2424
n_timepoints: int = 12,
2525
n_labels: int = 2,
26+
min_cases_per_label: int = 1,
2627
regression_target: bool = False,
2728
random_state: Union[int, None] = None,
2829
return_y: bool = True,
@@ -44,6 +45,8 @@ def make_example_3d_numpy(
4445
The number of features/series length to generate.
4546
n_labels : int
4647
The number of unique labels to generate.
48+
min_cases_per_label : int
49+
The minimum number of samples per unique label.
4750
regression_target : bool
4851
If True, the target will be a scalar float, otherwise an int.
4952
random_state : int or None
@@ -85,9 +88,11 @@ def make_example_3d_numpy(
8588
y = X[:, 0, 0].astype(int)
8689

8790
for i in range(n_labels):
88-
if len(y) > i:
89-
X[i, 0, 0] = i
90-
y[i] = i
91+
for j in range(min_cases_per_label):
92+
idx = i * min_cases_per_label + j
93+
if len(y) > idx:
94+
X[idx, 0, 0] = i
95+
y[idx] = i
9196
X = X * (y[:, None, None] + 1)
9297

9398
if regression_target:
@@ -103,6 +108,7 @@ def make_example_2d_numpy_collection(
103108
n_cases: int = 10,
104109
n_timepoints: int = 8,
105110
n_labels: int = 2,
111+
min_cases_per_label: int = 1,
106112
regression_target: bool = False,
107113
random_state: Union[int, None] = None,
108114
return_y: bool = True,
@@ -122,6 +128,8 @@ def make_example_2d_numpy_collection(
122128
The number of features/series length to generate.
123129
n_labels : int
124130
The number of unique labels to generate.
131+
min_cases_per_label : int
132+
The minimum number of samples per unique label.
125133
regression_target : bool
126134
If True, the target will be a scalar float, otherwise an int.
127135
random_state : int or None
@@ -159,9 +167,11 @@ def make_example_2d_numpy_collection(
159167
y = X[:, 0].astype(int)
160168

161169
for i in range(n_labels):
162-
if len(y) > i:
163-
X[i, 0] = i
164-
y[i] = i
170+
for j in range(min_cases_per_label):
171+
idx = i * min_cases_per_label + j
172+
if len(y) > idx:
173+
X[idx, 0] = i
174+
y[idx] = i
165175
X = X * (y[:, None] + 1)
166176

167177
if regression_target:

docs/api_reference/clustering.rst

+1
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ Clustering Algorithms
3434
TimeSeriesKMeans
3535
TimeSeriesKMedoids
3636
TimeSeriesKShapes
37+
TimeSeriesKShape
3738
TimeSeriesKernelKMeans
3839
TimeSeriesCLARA
3940
TimeSeriesCLARANS

0 commit comments

Comments
 (0)