diff --git a/aeon/clustering/__init__.py b/aeon/clustering/__init__.py
index 2eec5142cf..4b88d27585 100644
--- a/aeon/clustering/__init__.py
+++ b/aeon/clustering/__init__.py
@@ -8,6 +8,7 @@
     "TimeSeriesKMeans",
     "TimeSeriesKShape",
     "TimeSeriesKernelKMeans",
+    "KASBA",
     "ElasticSOM",
     "KSpectralCentroid",
     "DummyClusterer",
@@ -20,6 +21,7 @@
 from aeon.clustering._k_medoids import TimeSeriesKMedoids
 from aeon.clustering._k_sc import KSpectralCentroid
 from aeon.clustering._k_shape import TimeSeriesKShape
+from aeon.clustering._kasba import KASBA
 from aeon.clustering._kernel_k_means import TimeSeriesKernelKMeans
 from aeon.clustering.base import BaseClusterer
 from aeon.clustering.dummy import DummyClusterer
diff --git a/aeon/clustering/_kasba.py b/aeon/clustering/_kasba.py
new file mode 100644
index 0000000000..e6db7f58e0
--- /dev/null
+++ b/aeon/clustering/_kasba.py
@@ -0,0 +1,363 @@
+"""Time series kmeans."""
+
+from typing import Optional
+
+__maintainer__ = ["chrisholder"]
+
+from typing import Callable, Union
+
+import numpy as np
+from numpy.random import RandomState
+from sklearn.utils import check_random_state
+
+from aeon.clustering._k_means import EmptyClusterError
+from aeon.clustering.averaging import kasba_average
+from aeon.clustering.base import BaseClusterer
+from aeon.distances import distance as distance_func
+from aeon.distances import pairwise_distance
+
+
+class KASBA(BaseClusterer):
+    """KASBA clusterer [1]_.
+
+    KASBA is a $k$-means clustering algorithm designed for use with the MSM distance
+    metric [2]_ however, it can be used with any elastic distance that is a metric.
+    KASBA finds initial clusters using an adapted form of kmeans++ to use
+    elastic distances, a fast assignment step that exploits the metric property
+    to avoid distance calculations in assignment, and an adapted elastic barycentre
+    average that uses a stochastic gradient descent to find the barycentre averages.
+
+    Parameters
+    ----------
+    n_clusters : int, default=8
+        The number of clusters to form as well as the number of centroids to generate.
+    distance : str or callable, default='msm'
+        The distance metric to use. If a string, must be one of the following:
+        'msm', 'twe'. The distance measure use MUST be a metric.
+    ba_subset_size : float, default=0.5
+        The proportion of the data to use in the barycentre average step. For the first
+        iteration all the data will be used however, on subsequent iterations a subset
+        of the data will be used. This will be a % of the data passed (e.g. 0.5 = 50%).
+        If there are less than 10 data points, all the available data will be used
+        every iteration.
+    initial_step_size : float, default=0.05
+        The initial step size for the stochastic gradient descent in the
+        barycentre average step.
+    max_iter : int, default=300
+        Maximum number of iterations of the k-means algorithm before it is forcibly
+        stopped.
+    tol : float, default=1e-6
+        Relative tolerance in regard to Frobenius norm of the difference
+        in the cluster centers of two consecutive iterations to declare
+        convergence.
+    distance_params : dict, default=None
+        Dictionary containing kwargs for the distance being used. For example if you
+        wanted to specify a cost for MSM you would pass
+        distance_params={"c": 0.2}. See documentation of aeon.distances for more
+        details.
+    decay_rate : float, default=0.1
+        The decay rate for the step size in the barycentre average step. The
+        initial_step_size will be multiplied by np.exp(-decay_rate * i) every iteration
+        where i is the current iteration.
+    verbose : bool, default=False
+        Verbosity mode.
+    random_state : int, np.random.RandomState instance or None, default=None
+        Determines random number generation for centroid initialization.
+        If `int`, random_state is the seed used by the random number generator;
+        If `np.random.RandomState` instance,
+        random_state is the random number generator;
+        If `None`, the random number generator is the `RandomState` instance used
+        by `np.random`.
+
+    Attributes
+    ----------
+    cluster_centers_ : 3d np.ndarray
+        Array of shape (n_clusters, n_channels, n_timepoints))
+        Time series that represent each of the cluster centers.
+    labels_ : 1d np.ndarray
+        1d array of shape (n_case,)
+        Labels that is the index each time series belongs to.
+    inertia_ : float
+        Sum of squared distances of samples to their closest cluster center.
+    n_iter_ : int
+        Number of iterations run.
+
+    References
+    ----------
+    .. [1] Holder, Christopher & Bagnall, Anthony. (2024).
+       Rock the KASBA: Blazingly Fast and Accurate Time Series Clustering.
+       10.48550/arXiv.2411.17838.
+
+    .. [2] Stefan A., Athitsos V., Das G.: The Move-Split-Merge metric for time
+    series. IEEE Transactions on Knowledge and Data Engineering 25(6), 2013.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from aeon.clustering import KASBA
+    >>> X = np.random.random(size=(10,2,20))
+    >>> clst= KASBA(distance="msm",n_clusters=2)
+    >>> clst.fit(X)
+    KASBA(n_clusters=2)
+    >>> preds = clst.predict(X)
+    """
+
+    _tags = {
+        "capability:multivariate": True,
+        "algorithm_type": "distance",
+    }
+
+    def __init__(
+        self,
+        n_clusters: int = 8,
+        distance: Union[str, Callable] = "msm",
+        ba_subset_size: float = 0.5,
+        initial_step_size: float = 0.05,
+        max_iter: int = 300,
+        tol: float = 1e-6,
+        distance_params: Optional[dict] = None,
+        decay_rate: float = 0.1,
+        verbose: bool = False,
+        random_state: Optional[Union[int, RandomState]] = None,
+    ):
+        self.distance = distance
+        self.max_iter = max_iter
+        self.tol = tol
+        self.verbose = verbose
+        self.random_state = random_state
+        self.distance_params = distance_params
+        self.initial_step_size = initial_step_size
+        self.ba_subset_size = ba_subset_size
+        self.decay_rate = decay_rate
+        self.n_clusters = n_clusters
+
+        self.cluster_centers_ = None
+        self.labels_ = None
+        self.inertia_ = None
+        self.n_iter_ = 0
+
+        self._random_state = None
+        self._distance_params = {}
+
+        super().__init__()
+
+    def _fit(self, X: np.ndarray, y=None):
+        self._check_params(X)
+        cluster_centres, distances_to_centres, labels = self._elastic_kmeans_plus_plus(
+            X,
+        )
+        self.labels_, self.cluster_centers_, self.inertia_, self.n_iter_ = self._kasba(
+            X,
+            cluster_centres,
+            distances_to_centres,
+            labels,
+        )
+
+        return self
+
+    def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
+        if isinstance(self.distance, str):
+            pairwise_matrix = pairwise_distance(
+                X, self.cluster_centers_, method=self.distance, **self._distance_params
+            )
+        else:
+            pairwise_matrix = pairwise_distance(
+                X,
+                self.cluster_centers_,
+                method=self.distance,
+                **self._distance_params,
+            )
+        return pairwise_matrix.argmin(axis=1)
+
+    def _kasba(
+        self,
+        X,
+        cluster_centres,
+        distances_to_centres,
+        labels,
+    ):
+        inertia = np.inf
+        prev_inertia = np.inf
+        prev_labels = None
+        prev_cluster_centres = None
+        for i in range(self.max_iter):
+            cluster_centres, distances_to_centres = self._recalculate_centroids(
+                X,
+                cluster_centres,
+                labels,
+                distances_to_centres,
+            )
+
+            labels, distances_to_centres, inertia = self._fast_assign(
+                X,
+                cluster_centres,
+                distances_to_centres,
+                labels,
+                i == 0,
+            )
+
+            labels, cluster_centres, distances_to_centres = self._handle_empty_cluster(
+                X,
+                cluster_centres,
+                distances_to_centres,
+                labels,
+            )
+
+            if np.array_equal(prev_labels, labels):
+                if self.verbose:
+                    print(  # noqa: T001, T201
+                        f"Converged at iteration {i}, "  # noqa: T001, T201
+                        f"inertia {inertia:.5f}."  # noqa: T001, T201
+                    )  # noqa: T001, T201
+                break
+
+            prev_inertia = inertia
+            prev_labels = labels.copy()
+            prev_cluster_centres = cluster_centres.copy()
+
+            if self.verbose is True:
+                print(f"Iteration {i}, inertia {prev_inertia}.")  # noqa: T001, T201
+
+        if inertia < prev_inertia:
+            return prev_labels, prev_cluster_centres, prev_inertia, i + 1
+        return labels, cluster_centres, inertia, i + 1
+
+    def _fast_assign(
+        self,
+        X,
+        cluster_centres,
+        distances_to_centres,
+        labels,
+        is_first_iteration,
+    ):
+        distances_between_centres = pairwise_distance(
+            cluster_centres,
+            method=self.distance,
+            **self._distance_params,
+        )
+        for i in range(X.shape[0]):
+            min_dist = distances_to_centres[i]
+            closest = labels[i]
+            for j in range(self.n_clusters):
+                if not is_first_iteration and j == closest:
+                    continue
+                bound = distances_between_centres[j, closest] / 2.0
+                if min_dist < bound:
+                    continue
+
+                dist = distance_func(
+                    X[i],
+                    cluster_centres[j],
+                    method=self.distance,
+                    **self._distance_params,
+                )
+                if dist < min_dist:
+                    min_dist = dist
+                    closest = j
+
+            labels[i] = closest
+            distances_to_centres[i] = min_dist
+
+        inertia = np.sum(distances_to_centres**2)
+        if self.verbose:
+            print(f"{inertia:.5f}", end=" --> ")  # noqa: T001, T201
+        return labels, distances_to_centres, inertia
+
+    def _recalculate_centroids(
+        self,
+        X,
+        cluster_centres,
+        labels,
+        distances_to_centres,
+    ):
+        for j in range(self.n_clusters):
+            current_cluster_indices = labels == j
+
+            previous_distance_to_centre = distances_to_centres[current_cluster_indices]
+            previous_cost = np.sum(previous_distance_to_centre)
+            curr_centre, dist_to_centre = kasba_average(
+                X=X[current_cluster_indices],
+                init_barycenter=cluster_centres[j],
+                previous_cost=previous_cost,
+                previous_distance_to_centre=previous_distance_to_centre,
+                distance=self.distance,
+                max_iters=50,
+                tol=self.tol,
+                verbose=self.verbose,
+                random_state=self._random_state,
+                ba_subset_size=self.ba_subset_size,
+                initial_step_size=self.initial_step_size,
+                decay_rate=self.decay_rate,
+                **self._distance_params,
+            )
+
+            cluster_centres[j] = curr_centre
+            distances_to_centres[current_cluster_indices] = dist_to_centre
+
+        return cluster_centres, distances_to_centres
+
+    def _handle_empty_cluster(
+        self,
+        X: np.ndarray,
+        cluster_centres: np.ndarray,
+        distances_to_centres: np.ndarray,
+        labels: np.ndarray,
+    ):
+        empty_clusters = np.setdiff1d(np.arange(self.n_clusters), labels)
+        j = 0
+        while empty_clusters.size > 0:
+            current_empty_cluster_index = empty_clusters[0]
+            index_furthest_from_centre = distances_to_centres.argmax()
+            cluster_centres[current_empty_cluster_index] = X[index_furthest_from_centre]
+            curr_pw = pairwise_distance(
+                X, cluster_centres, method=self.distance, **self._distance_params
+            )
+            labels = curr_pw.argmin(axis=1)
+            distances_to_centres = curr_pw.min(axis=1)
+            empty_clusters = np.setdiff1d(np.arange(self.n_clusters), labels)
+            j += 1
+            if j > self.n_clusters:
+                raise EmptyClusterError
+
+        return labels, cluster_centres, distances_to_centres
+
+    def _elastic_kmeans_plus_plus(
+        self,
+        X,
+    ):
+        initial_center_idx = self._random_state.randint(X.shape[0])
+        indexes = [initial_center_idx]
+
+        min_distances = pairwise_distance(
+            X, X[initial_center_idx], method=self.distance, **self._distance_params
+        ).flatten()
+        labels = np.zeros(X.shape[0], dtype=int)
+
+        for i in range(1, self.n_clusters):
+            probabilities = min_distances / min_distances.sum()
+            next_center_idx = self._random_state.choice(X.shape[0], p=probabilities)
+            indexes.append(next_center_idx)
+
+            new_distances = pairwise_distance(
+                X, X[next_center_idx], method=self.distance, **self._distance_params
+            ).flatten()
+
+            closer_points = new_distances < min_distances
+            min_distances[closer_points] = new_distances[closer_points]
+            labels[closer_points] = i
+
+        centers = X[indexes]
+        return centers, min_distances, labels
+
+    def _check_params(self, X: np.ndarray) -> None:
+        self._random_state = check_random_state(self.random_state)
+
+        if self.n_clusters > X.shape[0]:
+            raise ValueError(
+                f"n_clusters ({self.n_clusters}) cannot be larger than "
+                f"n_cases ({X.shape[0]})"
+            )
+
+        self._distance_params = {
+            **(self.distance_params or {}),
+        }
diff --git a/aeon/clustering/averaging/__init__.py b/aeon/clustering/averaging/__init__.py
index d5d12fb99d..59b10fda2d 100644
--- a/aeon/clustering/averaging/__init__.py
+++ b/aeon/clustering/averaging/__init__.py
@@ -7,6 +7,7 @@
     "subgradient_barycenter_average",
     "VALID_BA_METRICS",
     "shift_invariant_average",
+    "kasba_average",
 ]
 
 from aeon.clustering.averaging._averaging import mean_average
@@ -14,6 +15,7 @@
 from aeon.clustering.averaging._ba_subgradient import subgradient_barycenter_average
 from aeon.clustering.averaging._ba_utils import VALID_BA_METRICS
 from aeon.clustering.averaging._barycenter_averaging import elastic_barycenter_average
+from aeon.clustering.averaging._kasba_average import kasba_average
 from aeon.clustering.averaging._shift_scale_invariant_averaging import (
     shift_invariant_average,
 )
diff --git a/aeon/clustering/averaging/_kasba_average.py b/aeon/clustering/averaging/_kasba_average.py
new file mode 100644
index 0000000000..c4f5d28642
--- /dev/null
+++ b/aeon/clustering/averaging/_kasba_average.py
@@ -0,0 +1,177 @@
+from typing import Optional
+
+import numpy as np
+from numba import njit
+from sklearn.utils import check_random_state
+
+from aeon.distances import msm_alignment_path, pairwise_distance, twe_alignment_path
+
+
+def kasba_average(
+    X: np.ndarray,
+    init_barycenter: np.ndarray,
+    previous_cost: float,
+    previous_distance_to_centre: np.ndarray,
+    distance: str = "msm",
+    max_iters: int = 50,
+    tol=1e-5,
+    ba_subset_size: float = 0.5,
+    initial_step_size: float = 0.05,
+    decay_rate: float = 0.1,
+    verbose: bool = False,
+    random_state: Optional[int] = None,
+    **kwargs,
+) -> tuple[np.ndarray, np.ndarray]:
+    """KASBA average [1]_.
+
+    The KASBA clusterer proposed an adapted version of the Stochastic Subgradient
+    Elastic Barycentre Average. The algorithm works by iterating randomly over X.
+    If it is the first iteration then all the values are used. However, if it is not
+    the first iteration then a subset is used. The subset size is determined by the
+    parameter ba_subset_size which is the percentage of the data to use. If there are
+    less than 10 data points, all the available data will be used every iteration.
+
+    Parameters
+    ----------
+    X: np.ndarray, of shape (n_cases, n_channels, n_timepoints) or
+            (n_cases, n_timepoints)
+        A collection of time series instances to take the average from.
+    init_barycenter: np.ndarray, of shape (n_channels, n_timepoints)
+        The initial barycentre to refine.
+    previous_cost: float
+        The summed total distance from all time series in X to the init_barycentre.
+    previous_distance_to_centre: np.ndarray, of shape (n_cases,)
+        The distance between each time series in X and the init_barycentre.
+    distance: str, default='msm'
+        String defining the distance to use for averaging. Distance to
+        compute similarity between time series. A list of valid strings for metrics
+        can be found in the documentation form
+        :func:`aeon.distances.get_distance_function`.
+    max_iters: int, default=30
+        Maximum number iterations for dba to update over.
+    tol : float (default: 1e-5)
+        Tolerance to use for early stopping: if the decrease in cost is lower
+        than this value, the Expectation-Maximization procedure stops.
+    ba_subset_size : float, default=0.5
+        The proportion of the data to use in the barycentre average step. For the first
+        iteration all the data will be used however, on subsequent iterations a subset
+        of the data will be used. This will be a % of the data passed (e.g. 0.5 = 50%).
+        If there are less than 10 data points, all the available data will be used
+        every iteration.
+    initial_step_size : float, default=0.05
+        The initial step size for the gradient descent.
+    decay_rate : float, default=0.1
+        The decay rate for the step size in the barycentre average step. The
+        initial_step_size will be multiplied by np.exp(-decay_rate * i) every iteration
+        where i is the current iteration.
+    verbose: bool, default=False
+        Boolean that controls the verbosity.
+    random_state: int or None, default=None
+        Random state to use for the barycenter averaging.
+
+    Returns
+    -------
+    np.ndarray of shape (n_channels, n_timepoints)
+        Time series that is the KASBA average of the collection of instances provided.
+
+    References
+    ----------
+    .. [1] Holder, Christopher & Bagnall, Anthony. (2024).
+       Rock the KASBA: Blazingly Fast and Accurate Time Series Clustering.
+       10.48550/arXiv.2411.17838.
+    """
+    if len(X) <= 1:
+        return X, np.zeros(X.shape[0])
+
+    if X.ndim == 3:
+        _X = X
+    elif X.ndim == 2:
+        _X = X.reshape((X.shape[0], 1, X.shape[1]))
+    else:
+        raise ValueError("X must be a 2D or 3D array")
+
+    random_state = check_random_state(random_state)
+    X_size = _X.shape[0]
+
+    barycenter = np.copy(init_barycenter)
+    prev_barycenter = np.copy(init_barycenter)
+
+    distances_to_centre = np.zeros(X_size)
+    num_ts_to_use = min(X_size, max(10, int(ba_subset_size * X_size)))
+    for i in range(max_iters):
+        shuffled_indices = random_state.permutation(X_size)
+        if i > 0:
+            shuffled_indices = shuffled_indices[:num_ts_to_use]
+
+        current_step_size = initial_step_size * np.exp(-decay_rate * i)
+
+        barycenter = _kasba_refine_one_iter(
+            barycenter=barycenter,
+            X=_X,
+            shuffled_indices=shuffled_indices,
+            current_step_size=current_step_size,
+            distance=distance,
+            **kwargs,
+        )
+
+        pw_dist = pairwise_distance(_X, barycenter, method=distance, **kwargs)
+        cost = np.sum(pw_dist)
+        distances_to_centre = pw_dist.flatten()
+
+        # Cost is the sum of distance to the centre
+        if abs(previous_cost - cost) < tol:
+            if previous_cost < cost:
+                barycenter = prev_barycenter
+                distances_to_centre = previous_distance_to_centre
+            break
+        elif previous_cost < cost:
+            barycenter = prev_barycenter
+            distances_to_centre = previous_distance_to_centre
+            break
+
+        prev_barycenter = barycenter
+        previous_distance_to_centre = distances_to_centre.copy()
+        previous_cost = cost
+
+        if verbose:
+            print(f"[Subset-SSG-BA] epoch {i}, cost {cost}")  # noqa: T001, T201
+
+    return barycenter, distances_to_centre
+
+
+@njit(cache=True, fastmath=True)
+def _kasba_refine_one_iter(
+    barycenter: np.ndarray,
+    X: np.ndarray,
+    shuffled_indices: np.ndarray,
+    current_step_size,
+    distance: str = "msm",
+    nu: float = 0.001,
+    lmbda: float = 1.0,
+    independent: bool = True,
+    c: float = 1.0,
+):
+
+    X_size, X_dims, X_timepoints = X.shape
+
+    barycenter_copy = np.copy(barycenter)
+
+    for i in shuffled_indices:
+        curr_ts = X[i]
+        if distance == "twe":
+            curr_alignment, curr_cost = twe_alignment_path(
+                curr_ts, barycenter_copy, nu=nu, lmbda=lmbda
+            )
+        elif distance == "msm":
+            curr_alignment, curr_cost = msm_alignment_path(
+                curr_ts, barycenter_copy, independent=independent, c=c
+            )
+        else:
+            raise ValueError(f"Invalid distance metric: {distance}")
+
+        new_ba = np.zeros((X_dims, X_timepoints))
+        for j, k in curr_alignment:
+            new_ba[:, k] += barycenter_copy[:, k] - curr_ts[:, j]
+
+        barycenter_copy -= (2.0 * current_step_size) * new_ba
+    return barycenter_copy
diff --git a/aeon/clustering/tests/test_kasba.py b/aeon/clustering/tests/test_kasba.py
new file mode 100644
index 0000000000..2d8389629e
--- /dev/null
+++ b/aeon/clustering/tests/test_kasba.py
@@ -0,0 +1,29 @@
+"""Test KASBA."""
+
+import numpy as np
+
+from aeon.benchmarking.metrics.clustering import clustering_accuracy_score
+from aeon.clustering import KASBA
+from aeon.testing.data_generation import make_example_3d_numpy
+
+
+def test_univariate_kasba():
+    """Test KASBA on univariate data."""
+    X, y = make_example_3d_numpy(20, 1, 10, random_state=1, return_y=True)
+
+    kasba = KASBA(n_clusters=len(np.unique(y)), random_state=1)
+
+    kasba.fit(X)
+    score = clustering_accuracy_score(y, kasba.labels_)
+    assert score == 0.95
+
+
+def test_multivariate_kasba():
+    """Test KASBA on multivariate data."""
+    X, y = make_example_3d_numpy(20, 3, 10, random_state=1, return_y=True)
+
+    kasba = KASBA(n_clusters=len(np.unique(y)), random_state=1)
+
+    kasba.fit(X)
+    score = clustering_accuracy_score(y, kasba.labels_)
+    assert score == 0.55
diff --git a/docs/api_reference/clustering.rst b/docs/api_reference/clustering.rst
index 20c8e779da..e66e54dfbc 100644
--- a/docs/api_reference/clustering.rst
+++ b/docs/api_reference/clustering.rst
@@ -18,6 +18,7 @@ Clustering Algorithms
     :toctree: auto_generated/
     :template: class.rst
 
+    KASBA
     TimeSeriesKMeans
     TimeSeriesKMedoids
     TimeSeriesKShape
@@ -76,6 +77,7 @@ Averaging
     :toctree: auto_generated/
     :template: function.rst
 
+    kasba_average
     elastic_barycenter_average
     mean_average
     petitjean_barycenter_average
diff --git a/docs/papers_using_aeon.md b/docs/papers_using_aeon.md
index e6238906e3..30dba0fd44 100644
--- a/docs/papers_using_aeon.md
+++ b/docs/papers_using_aeon.md
@@ -37,6 +37,10 @@ the paper and a link to the code in your personal GitHub or other repository.
   with k-medoids based algorithms. In International Workshop on Advanced Analytics and
   Learning on Temporal Data (pp. 39-55).
   [Paper](https://link.springer.com/chapter/10.1007/978-3-031-49896-1_4)
+- Holder, Christopher & Bagnall, Anthony. (2024).
+  Rock the KASBA: Blazingly Fast and Accurate Time Series Clustering.
+  10.48550/arXiv.2411.17838.
+[Paper](https://arxiv.org/abs/2411.17838)
 
 ## Regression