Merge branch 'main' into dtw_gi

aeon-toolkit · Mar 8, 2025 · 3d648ef · 3d648ef
2 parents 2dba48d + 236d039
commit 3d648ef
Show file tree

Hide file tree

Showing 34 changed files with 3,217 additions and 309 deletions.
diff --git a/.all-contributorsrc b/.all-contributorsrc
@@ -2656,6 +2656,24 @@
       "contributions": [
         "doc"
       ]
+    },
+    {
+      "login": "shinymack",
+      "name": "Akash Kawle",
+      "avatar_url": "https://avatars.githubusercontent.com/u/128881349?v=4",
+      "profile": "https://github.com/shinymack",
+      "contributions": [
+        "code"
+      ]
+    },
+    {
+      "login": "kevinzb56",
+      "name": "Kevin Shah",
+      "avatar_url": "https://avatars.githubusercontent.com/u/161136814?v=4",
+      "profile": "https://github.com/kevinzb56",
+      "contributions": [
+        "doc"
+      ]
     }
   ],
   "commitType": "docs"

diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml
@@ -27,7 +27,7 @@ jobs:
           persist-credentials: false
 
       - name: Run analysis
-        uses: ossf/scorecard-action@v2.4.0
+        uses: ossf/scorecard-action@v2.4.1
         with:
           results_file: results.sarif
           results_format: sarif

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
         args: [ "--create", "--python-folders", "aeon" ]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.6
+    rev: v0.9.7
     hooks:
       - id: ruff
         args: [ "--fix"]

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
diff --git a/aeon/base/_base.py b/aeon/base/_base.py
@@ -415,6 +415,18 @@ def __sklearn_is_fitted__(self):
         """Check fitted status and return a Boolean value."""
         return self.is_fitted
 
+    def __sklearn_tags__(self):
+        """Return sklearn style tags for the estimator."""
+        aeon_tags = self.get_tags()
+        sklearn_tags = super().__sklearn_tags__()
+        sklearn_tags.non_deterministic = aeon_tags.get("non_deterministic", False)
+        sklearn_tags.target_tags.one_d_labels = True
+        sklearn_tags.input_tags.three_d_array = True
+        sklearn_tags.input_tags.allow_nan = aeon_tags.get(
+            "capability:missing_values", False
+        )
+        return sklearn_tags
+
     def _validate_data(self, **kwargs):
         """Sklearn data validation."""
         raise NotImplementedError(

diff --git a/aeon/base/_base_collection.py b/aeon/base/_base_collection.py
@@ -1,4 +1,24 @@
-"""Base class for estimators that fit collections of time series."""
+"""
+Base class for estimators that fit collections of time series.
+
+    class name: BaseCollectionEstimator
+
+Defining methods:
+    preprocessing         - _preprocess_collection(self, X, store_metadata=True)
+    input checking        - _check_X(self, X)
+    input conversion      - _convert_X(self, X)
+    shape checking        - _check_shape(self, X)
+
+Inherited inspection methods:
+    hyper-parameter inspection  - get_params()
+    fitted parameter inspection - get_fitted_params()
+
+State:
+    fitted model/strategy   - by convention, any attributes ending in "_"
+    fitted state flag       - is_fitted (property)
+    fitted state inspection - check_is_fitted()
+
+"""
 
 from abc import abstractmethod
 

diff --git a/aeon/classification/base.py b/aeon/classification/base.py
@@ -26,6 +26,7 @@ class name: BaseClassifier
 
 import numpy as np
 import pandas as pd
+from sklearn.base import ClassifierMixin
 from sklearn.metrics import get_scorer, get_scorer_names
 from sklearn.model_selection import cross_val_predict
 
@@ -35,7 +36,7 @@ class name: BaseClassifier
 from aeon.utils.validation.labels import check_classification_y
 
 
-class BaseClassifier(BaseCollectionEstimator):
+class BaseClassifier(ClassifierMixin, BaseCollectionEstimator):
     """
     Abstract base class for time series classifiers.
 
@@ -66,7 +67,6 @@ def __init__(self):
         self.classes_ = []  # classes seen in y, unique labels
         self.n_classes_ = -1  # number of unique classes in y
         self._class_dictionary = {}
-        self._estimator_type = "classifier"
 
         super().__init__()
 

diff --git a/aeon/classification/deep_learning/base.py b/aeon/classification/deep_learning/base.py
@@ -1,6 +1,23 @@
 """
 Abstract base class for the Keras neural network classifiers.
 
+    class name: BaseDeepClassifier
+
+Defining methods:
+    fitting         - fit(self, X, y)
+    predicting      - predict(self, X)
+                    - predict_proba(self, X)
+    model building - build_model(self, input_shape, n_classes) (abstract method)
+
+Inherited inspection methods:
+    hyper-parameter inspection  - get_params()
+    fitted parameter inspection - get_fitted_params()
+
+State:
+    fitted model/strategy   - by convention, any attributes ending in "_"
+    fitted state flag       - is_fitted (property)
+    fitted state inspection - check_is_fitted()
+
 The reason for this class between BaseClassifier and deep_learning classifiers is
 because we can generalise tags, _predict and _predict_proba
 """

diff --git a/aeon/classification/feature_based/_catch22.py b/aeon/classification/feature_based/_catch22.py
@@ -67,6 +67,17 @@ class Catch22Classifier(BaseClassifier):
         if None a 'prefer' value of "threads" is used by default.
         Valid options are "loky", "multiprocessing", "threading" or a custom backend.
         See the joblib Parallel documentation for more details.
+    class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
+        From sklearn documentation:
+        If not given, all classes are supposed to have weight one.
+        The “balanced” mode uses the values of y to automatically adjust weights
+        inversely proportional to class frequencies in the input data as
+        n_samples / (n_classes * np.bincount(y))
+        The “balanced_subsample” mode is the same as “balanced” except that weights
+        are computed based on the bootstrap sample for every tree grown.
+        For multi-output, the weights of each column of y will be multiplied.
+        Note that these weights will be multiplied with sample_weight (passed through
+        the fit method) if sample_weight is specified.
 
     Attributes
     ----------
@@ -132,6 +143,7 @@ def __init__(
         random_state=None,
         n_jobs=1,
         parallel_backend=None,
+        class_weight=None,
     ):
         self.features = features
         self.catch24 = catch24
@@ -142,6 +154,7 @@ def __init__(
         self.random_state = random_state
         self.n_jobs = n_jobs
         self.parallel_backend = parallel_backend
+        self.class_weight = class_weight
 
         super().__init__()
 
@@ -175,7 +188,7 @@ def _fit(self, X, y):
 
         self.estimator_ = _clone_estimator(
             (
-                RandomForestClassifier(n_estimators=200)
+                RandomForestClassifier(n_estimators=200, class_weight=self.class_weight)
                 if self.estimator is None
                 else self.estimator
             ),

diff --git a/aeon/classification/feature_based/_signature_classifier.py b/aeon/classification/feature_based/_signature_classifier.py
@@ -61,6 +61,17 @@ class SignatureClassifier(BaseClassifier):
         Signature truncation depth.
     random_state : int, default=None
         If `int`, random_state is the seed used by the random number generator;
+    class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
+        From sklearn documentation:
+        If not given, all classes are supposed to have weight one.
+        The “balanced” mode uses the values of y to automatically adjust weights
+        inversely proportional to class frequencies in the input data as
+        n_samples / (n_classes * np.bincount(y))
+        The “balanced_subsample” mode is the same as “balanced” except that weights
+        are computed based on the bootstrap sample for every tree grown.
+        For multi-output, the weights of each column of y will be multiplied.
+        Note that these weights will be multiplied with sample_weight (passed through
+        the fit method) if sample_weight is specified.
 
     Attributes
     ----------
@@ -105,6 +116,7 @@ def __init__(
         sig_tfm="signature",
         depth=4,
         random_state=None,
+        class_weight=None,
     ):
         self.estimator = estimator
         self.augmentation_list = augmentation_list
@@ -116,7 +128,7 @@ def __init__(
         self.sig_tfm = sig_tfm
         self.depth = depth
         self.random_state = random_state
-
+        self.class_weight = class_weight
         super().__init__()
 
         self.signature_method = SignatureTransformer(
@@ -135,7 +147,9 @@ def _setup_classification_pipeline(self):
         """Set up the full signature method pipeline."""
         # Use rf if no classifier is set
         if self.estimator is None:
-            classifier = RandomForestClassifier(random_state=self.random_state)
+            classifier = RandomForestClassifier(
+                random_state=self.random_state, class_weight=self.class_weight
+            )
         else:
             classifier = _clone_estimator(self.estimator, self.random_state)
 

diff --git a/aeon/classification/feature_based/_summary.py b/aeon/classification/feature_based/_summary.py
@@ -43,6 +43,17 @@ class SummaryClassifier(BaseClassifier):
         If `RandomState` instance, random_state is the random number generator;
         If `None`, the random number generator is the `RandomState` instance used
         by `np.random`.
+    class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
+        From sklearn documentation:
+        If not given, all classes are supposed to have weight one.
+        The “balanced” mode uses the values of y to automatically adjust weights
+        inversely proportional to class frequencies in the input data as
+        n_samples / (n_classes * np.bincount(y))
+        The “balanced_subsample” mode is the same as “balanced” except that weights
+        are computed based on the bootstrap sample for every tree grown.
+        For multi-output, the weights of each column of y will be multiplied.
+        Note that these weights will be multiplied with sample_weight (passed through
+        the fit method) if sample_weight is specified.
 
     Attributes
     ----------
@@ -85,13 +96,16 @@ def __init__(
         estimator=None,
         n_jobs=1,
         random_state=None,
+        class_weight=None,
     ):
         self.summary_stats = summary_stats
         self.estimator = estimator
 
         self.n_jobs = n_jobs
         self.random_state = random_state
 
+        self.class_weight = class_weight
+
         super().__init__()
 
     def _fit(self, X, y):
@@ -120,7 +134,7 @@ def _fit(self, X, y):
 
         self.estimator_ = _clone_estimator(
             (
-                RandomForestClassifier(n_estimators=200)
+                RandomForestClassifier(n_estimators=200, class_weight=self.class_weight)
                 if self.estimator is None
                 else self.estimator
             ),

diff --git a/aeon/classification/feature_based/_tsfresh.py b/aeon/classification/feature_based/_tsfresh.py
@@ -46,6 +46,17 @@ class TSFreshClassifier(BaseClassifier):
         If `RandomState` instance, random_state is the random number generator;
         If `None`, the random number generator is the `RandomState` instance used
         by `np.random`.
+    class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
+        From sklearn documentation:
+        If not given, all classes are supposed to have weight one.
+        The “balanced” mode uses the values of y to automatically adjust weights
+        inversely proportional to class frequencies in the input data as
+        n_samples / (n_classes * np.bincount(y))
+        The “balanced_subsample” mode is the same as “balanced” except that weights
+        are computed based on the bootstrap sample for every tree grown.
+        For multi-output, the weights of each column of y will be multiplied.
+        Note that these weights will be multiplied with sample_weight (passed through
+        the fit method) if sample_weight is specified.
 
     Attributes
     ----------
@@ -86,6 +97,7 @@ def __init__(
         n_jobs=1,
         chunksize=None,
         random_state=None,
+        class_weight=None,
     ):
         self.default_fc_parameters = default_fc_parameters
         self.relevant_feature_extractor = relevant_feature_extractor
@@ -99,6 +111,7 @@ def __init__(
         self._transformer = None
         self._return_majority_class = False
         self._majority_class = 0
+        self.class_weight = class_weight
 
         super().__init__()
 
@@ -137,7 +150,7 @@ def _fit(self, X, y):
         )
         self.estimator_ = _clone_estimator(
             (
-                RandomForestClassifier(n_estimators=200)
+                RandomForestClassifier(n_estimators=200, class_weight=self.class_weight)
                 if self.estimator is None
                 else self.estimator
             ),

diff --git a/aeon/classification/feature_based/tests/test_catch22.py b/aeon/classification/feature_based/tests/test_catch22.py
@@ -1,6 +1,7 @@
 """Test catch 22 classifier."""
 
 import numpy as np
+import pytest
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.linear_model import RidgeClassifier
 
@@ -19,3 +20,21 @@ def test_catch22():
     c22.fit(X, y)
     p = c22.predict_proba(X)
     assert np.all(np.isin(p, [0, 1]))
+
+
+@pytest.mark.parametrize("class_weight", ["balanced", "balanced_subsample"])
+def test_catch22_classifier_with_class_weight(class_weight):
+    """Test catch22 classifier with class weight."""
+    X, y = make_example_3d_numpy(
+        n_cases=10, n_channels=1, n_timepoints=12, return_y=True, random_state=0
+    )
+    clf = Catch22Classifier(
+        estimator=RandomForestClassifier(n_estimators=5),
+        outlier_norm=True,
+        random_state=0,
+        class_weight=class_weight,
+    )
+    clf.fit(X, y)
+    predictions = clf.predict(X)
+    assert len(predictions) == len(y)
+    assert set(predictions).issubset(set(y))
diff --git a/aeon/classification/feature_based/tests/test_signature.py b/aeon/classification/feature_based/tests/test_signature.py
@@ -18,3 +18,24 @@ def test_signature_classifier():
     cls = SignatureClassifier(estimator=None)
     cls._fit(X, y)
     assert isinstance(cls.pipeline.named_steps["classifier"], RandomForestClassifier)
+
+
+@pytest.mark.skipif(
+    not _check_soft_dependencies("esig", severity="none"),
+    reason="skip test if required soft dependency esig not available",
+)
+@pytest.mark.parametrize("class_weight", ["balanced", "balanced_subsample"])
+def test_signature_classifier_with_class_weight(class_weight):
+    """Test signature classifier with class weight."""
+    X, y = make_example_3d_numpy(
+        n_cases=10, n_channels=1, n_timepoints=12, return_y=True, random_state=0
+    )
+    clf = SignatureClassifier(
+        estimator=RandomForestClassifier(n_estimators=5),
+        random_state=0,
+        class_weight=class_weight,
+    )
+    clf.fit(X, y)
+    predictions = clf.predict(X)
+    assert len(predictions) == len(y)
+    assert set(predictions).issubset(set(y))
diff --git a/aeon/classification/feature_based/tests/test_summary.py b/aeon/classification/feature_based/tests/test_summary.py
@@ -1,6 +1,7 @@
 """Test summary classifier."""
 
 import numpy as np
+import pytest
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.linear_model import RidgeClassifier
 
@@ -19,3 +20,20 @@ def test_summary_classifier():
     cls.fit(X, y)
     p = cls.predict_proba(X)
     assert np.all(np.isin(p, [0, 1]))
+
+
+@pytest.mark.parametrize("class_weight", ["balanced", "balanced_subsample"])
+def test_summary_classifier_with_class_weight(class_weight):
+    """Test summary classifier with class weight."""
+    X, y = make_example_3d_numpy(
+        n_cases=10, n_channels=1, n_timepoints=12, return_y=True, random_state=0
+    )
+    clf = SummaryClassifier(
+        estimator=RandomForestClassifier(n_estimators=5),
+        random_state=0,
+        class_weight=class_weight,
+    )
+    clf.fit(X, y)
+    predictions = clf.predict(X)
+    assert len(predictions) == len(y)
+    assert set(predictions).issubset(set(y))