Skip to content

Commit

Permalink
Merge branch 'main' into dtw_gi
Browse files Browse the repository at this point in the history
  • Loading branch information
tanishy7777 authored Mar 8, 2025
2 parents 2dba48d + 236d039 commit 3d648ef
Show file tree
Hide file tree
Showing 34 changed files with 3,217 additions and 309 deletions.
18 changes: 18 additions & 0 deletions .all-contributorsrc
Original file line number Diff line number Diff line change
Expand Up @@ -2656,6 +2656,24 @@
"contributions": [
"doc"
]
},
{
"login": "shinymack",
"name": "Akash Kawle",
"avatar_url": "https://avatars.githubusercontent.com/u/128881349?v=4",
"profile": "https://github.com/shinymack",
"contributions": [
"code"
]
},
{
"login": "kevinzb56",
"name": "Kevin Shah",
"avatar_url": "https://avatars.githubusercontent.com/u/161136814?v=4",
"profile": "https://github.com/kevinzb56",
"contributions": [
"doc"
]
}
],
"commitType": "docs"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scorecard.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
persist-credentials: false

- name: Run analysis
uses: ossf/scorecard-action@v2.4.0
uses: ossf/scorecard-action@v2.4.1
with:
results_file: results.sarif
results_format: sarif
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ repos:
args: [ "--create", "--python-folders", "aeon" ]

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.6
rev: v0.9.7
hooks:
- id: ruff
args: [ "--fix"]
Expand Down
100 changes: 51 additions & 49 deletions CONTRIBUTORS.md

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions aeon/base/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,18 @@ def __sklearn_is_fitted__(self):
"""Check fitted status and return a Boolean value."""
return self.is_fitted

def __sklearn_tags__(self):
"""Return sklearn style tags for the estimator."""
aeon_tags = self.get_tags()
sklearn_tags = super().__sklearn_tags__()
sklearn_tags.non_deterministic = aeon_tags.get("non_deterministic", False)
sklearn_tags.target_tags.one_d_labels = True
sklearn_tags.input_tags.three_d_array = True
sklearn_tags.input_tags.allow_nan = aeon_tags.get(
"capability:missing_values", False
)
return sklearn_tags

def _validate_data(self, **kwargs):
"""Sklearn data validation."""
raise NotImplementedError(
Expand Down
22 changes: 21 additions & 1 deletion aeon/base/_base_collection.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
"""Base class for estimators that fit collections of time series."""
"""
Base class for estimators that fit collections of time series.
class name: BaseCollectionEstimator
Defining methods:
preprocessing - _preprocess_collection(self, X, store_metadata=True)
input checking - _check_X(self, X)
input conversion - _convert_X(self, X)
shape checking - _check_shape(self, X)
Inherited inspection methods:
hyper-parameter inspection - get_params()
fitted parameter inspection - get_fitted_params()
State:
fitted model/strategy - by convention, any attributes ending in "_"
fitted state flag - is_fitted (property)
fitted state inspection - check_is_fitted()
"""

from abc import abstractmethod

Expand Down
4 changes: 2 additions & 2 deletions aeon/classification/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class name: BaseClassifier

import numpy as np
import pandas as pd
from sklearn.base import ClassifierMixin
from sklearn.metrics import get_scorer, get_scorer_names
from sklearn.model_selection import cross_val_predict

Expand All @@ -35,7 +36,7 @@ class name: BaseClassifier
from aeon.utils.validation.labels import check_classification_y


class BaseClassifier(BaseCollectionEstimator):
class BaseClassifier(ClassifierMixin, BaseCollectionEstimator):
"""
Abstract base class for time series classifiers.
Expand Down Expand Up @@ -66,7 +67,6 @@ def __init__(self):
self.classes_ = [] # classes seen in y, unique labels
self.n_classes_ = -1 # number of unique classes in y
self._class_dictionary = {}
self._estimator_type = "classifier"

super().__init__()

Expand Down
17 changes: 17 additions & 0 deletions aeon/classification/deep_learning/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
"""
Abstract base class for the Keras neural network classifiers.
class name: BaseDeepClassifier
Defining methods:
fitting - fit(self, X, y)
predicting - predict(self, X)
- predict_proba(self, X)
model building - build_model(self, input_shape, n_classes) (abstract method)
Inherited inspection methods:
hyper-parameter inspection - get_params()
fitted parameter inspection - get_fitted_params()
State:
fitted model/strategy - by convention, any attributes ending in "_"
fitted state flag - is_fitted (property)
fitted state inspection - check_is_fitted()
The reason for this class between BaseClassifier and deep_learning classifiers is
because we can generalise tags, _predict and _predict_proba
"""
Expand Down
15 changes: 14 additions & 1 deletion aeon/classification/feature_based/_catch22.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,17 @@ class Catch22Classifier(BaseClassifier):
if None a 'prefer' value of "threads" is used by default.
Valid options are "loky", "multiprocessing", "threading" or a custom backend.
See the joblib Parallel documentation for more details.
class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
From sklearn documentation:
If not given, all classes are supposed to have weight one.
The “balanced” mode uses the values of y to automatically adjust weights
inversely proportional to class frequencies in the input data as
n_samples / (n_classes * np.bincount(y))
The “balanced_subsample” mode is the same as “balanced” except that weights
are computed based on the bootstrap sample for every tree grown.
For multi-output, the weights of each column of y will be multiplied.
Note that these weights will be multiplied with sample_weight (passed through
the fit method) if sample_weight is specified.
Attributes
----------
Expand Down Expand Up @@ -132,6 +143,7 @@ def __init__(
random_state=None,
n_jobs=1,
parallel_backend=None,
class_weight=None,
):
self.features = features
self.catch24 = catch24
Expand All @@ -142,6 +154,7 @@ def __init__(
self.random_state = random_state
self.n_jobs = n_jobs
self.parallel_backend = parallel_backend
self.class_weight = class_weight

super().__init__()

Expand Down Expand Up @@ -175,7 +188,7 @@ def _fit(self, X, y):

self.estimator_ = _clone_estimator(
(
RandomForestClassifier(n_estimators=200)
RandomForestClassifier(n_estimators=200, class_weight=self.class_weight)
if self.estimator is None
else self.estimator
),
Expand Down
18 changes: 16 additions & 2 deletions aeon/classification/feature_based/_signature_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,17 @@ class SignatureClassifier(BaseClassifier):
Signature truncation depth.
random_state : int, default=None
If `int`, random_state is the seed used by the random number generator;
class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
From sklearn documentation:
If not given, all classes are supposed to have weight one.
The “balanced” mode uses the values of y to automatically adjust weights
inversely proportional to class frequencies in the input data as
n_samples / (n_classes * np.bincount(y))
The “balanced_subsample” mode is the same as “balanced” except that weights
are computed based on the bootstrap sample for every tree grown.
For multi-output, the weights of each column of y will be multiplied.
Note that these weights will be multiplied with sample_weight (passed through
the fit method) if sample_weight is specified.
Attributes
----------
Expand Down Expand Up @@ -105,6 +116,7 @@ def __init__(
sig_tfm="signature",
depth=4,
random_state=None,
class_weight=None,
):
self.estimator = estimator
self.augmentation_list = augmentation_list
Expand All @@ -116,7 +128,7 @@ def __init__(
self.sig_tfm = sig_tfm
self.depth = depth
self.random_state = random_state

self.class_weight = class_weight
super().__init__()

self.signature_method = SignatureTransformer(
Expand All @@ -135,7 +147,9 @@ def _setup_classification_pipeline(self):
"""Set up the full signature method pipeline."""
# Use rf if no classifier is set
if self.estimator is None:
classifier = RandomForestClassifier(random_state=self.random_state)
classifier = RandomForestClassifier(
random_state=self.random_state, class_weight=self.class_weight
)
else:
classifier = _clone_estimator(self.estimator, self.random_state)

Expand Down
16 changes: 15 additions & 1 deletion aeon/classification/feature_based/_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,17 @@ class SummaryClassifier(BaseClassifier):
If `RandomState` instance, random_state is the random number generator;
If `None`, the random number generator is the `RandomState` instance used
by `np.random`.
class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
From sklearn documentation:
If not given, all classes are supposed to have weight one.
The “balanced” mode uses the values of y to automatically adjust weights
inversely proportional to class frequencies in the input data as
n_samples / (n_classes * np.bincount(y))
The “balanced_subsample” mode is the same as “balanced” except that weights
are computed based on the bootstrap sample for every tree grown.
For multi-output, the weights of each column of y will be multiplied.
Note that these weights will be multiplied with sample_weight (passed through
the fit method) if sample_weight is specified.
Attributes
----------
Expand Down Expand Up @@ -85,13 +96,16 @@ def __init__(
estimator=None,
n_jobs=1,
random_state=None,
class_weight=None,
):
self.summary_stats = summary_stats
self.estimator = estimator

self.n_jobs = n_jobs
self.random_state = random_state

self.class_weight = class_weight

super().__init__()

def _fit(self, X, y):
Expand Down Expand Up @@ -120,7 +134,7 @@ def _fit(self, X, y):

self.estimator_ = _clone_estimator(
(
RandomForestClassifier(n_estimators=200)
RandomForestClassifier(n_estimators=200, class_weight=self.class_weight)
if self.estimator is None
else self.estimator
),
Expand Down
15 changes: 14 additions & 1 deletion aeon/classification/feature_based/_tsfresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ class TSFreshClassifier(BaseClassifier):
If `RandomState` instance, random_state is the random number generator;
If `None`, the random number generator is the `RandomState` instance used
by `np.random`.
class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
From sklearn documentation:
If not given, all classes are supposed to have weight one.
The “balanced” mode uses the values of y to automatically adjust weights
inversely proportional to class frequencies in the input data as
n_samples / (n_classes * np.bincount(y))
The “balanced_subsample” mode is the same as “balanced” except that weights
are computed based on the bootstrap sample for every tree grown.
For multi-output, the weights of each column of y will be multiplied.
Note that these weights will be multiplied with sample_weight (passed through
the fit method) if sample_weight is specified.
Attributes
----------
Expand Down Expand Up @@ -86,6 +97,7 @@ def __init__(
n_jobs=1,
chunksize=None,
random_state=None,
class_weight=None,
):
self.default_fc_parameters = default_fc_parameters
self.relevant_feature_extractor = relevant_feature_extractor
Expand All @@ -99,6 +111,7 @@ def __init__(
self._transformer = None
self._return_majority_class = False
self._majority_class = 0
self.class_weight = class_weight

super().__init__()

Expand Down Expand Up @@ -137,7 +150,7 @@ def _fit(self, X, y):
)
self.estimator_ = _clone_estimator(
(
RandomForestClassifier(n_estimators=200)
RandomForestClassifier(n_estimators=200, class_weight=self.class_weight)
if self.estimator is None
else self.estimator
),
Expand Down
19 changes: 19 additions & 0 deletions aeon/classification/feature_based/tests/test_catch22.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Test catch 22 classifier."""

import numpy as np
import pytest
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier

Expand All @@ -19,3 +20,21 @@ def test_catch22():
c22.fit(X, y)
p = c22.predict_proba(X)
assert np.all(np.isin(p, [0, 1]))


@pytest.mark.parametrize("class_weight", ["balanced", "balanced_subsample"])
def test_catch22_classifier_with_class_weight(class_weight):
"""Test catch22 classifier with class weight."""
X, y = make_example_3d_numpy(
n_cases=10, n_channels=1, n_timepoints=12, return_y=True, random_state=0
)
clf = Catch22Classifier(
estimator=RandomForestClassifier(n_estimators=5),
outlier_norm=True,
random_state=0,
class_weight=class_weight,
)
clf.fit(X, y)
predictions = clf.predict(X)
assert len(predictions) == len(y)
assert set(predictions).issubset(set(y))
21 changes: 21 additions & 0 deletions aeon/classification/feature_based/tests/test_signature.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,24 @@ def test_signature_classifier():
cls = SignatureClassifier(estimator=None)
cls._fit(X, y)
assert isinstance(cls.pipeline.named_steps["classifier"], RandomForestClassifier)


@pytest.mark.skipif(
not _check_soft_dependencies("esig", severity="none"),
reason="skip test if required soft dependency esig not available",
)
@pytest.mark.parametrize("class_weight", ["balanced", "balanced_subsample"])
def test_signature_classifier_with_class_weight(class_weight):
"""Test signature classifier with class weight."""
X, y = make_example_3d_numpy(
n_cases=10, n_channels=1, n_timepoints=12, return_y=True, random_state=0
)
clf = SignatureClassifier(
estimator=RandomForestClassifier(n_estimators=5),
random_state=0,
class_weight=class_weight,
)
clf.fit(X, y)
predictions = clf.predict(X)
assert len(predictions) == len(y)
assert set(predictions).issubset(set(y))
18 changes: 18 additions & 0 deletions aeon/classification/feature_based/tests/test_summary.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Test summary classifier."""

import numpy as np
import pytest
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import RidgeClassifier

Expand All @@ -19,3 +20,20 @@ def test_summary_classifier():
cls.fit(X, y)
p = cls.predict_proba(X)
assert np.all(np.isin(p, [0, 1]))


@pytest.mark.parametrize("class_weight", ["balanced", "balanced_subsample"])
def test_summary_classifier_with_class_weight(class_weight):
"""Test summary classifier with class weight."""
X, y = make_example_3d_numpy(
n_cases=10, n_channels=1, n_timepoints=12, return_y=True, random_state=0
)
clf = SummaryClassifier(
estimator=RandomForestClassifier(n_estimators=5),
random_state=0,
class_weight=class_weight,
)
clf.fit(X, y)
predictions = clf.predict(X)
assert len(predictions) == len(y)
assert set(predictions).issubset(set(y))
Loading

0 comments on commit 3d648ef

Please sign in to comment.