Merge branch 'main' into 2341-enh-indexsearch-class-with-attimo-algor…

…ithm
aeon-toolkit · Feb 14, 2025 · 9c671f6 · 9c671f6
2 parents 3587de1 + 7617d53
commit 9c671f6
Show file tree

Hide file tree

Showing 15 changed files with 112 additions and 72 deletions.
diff --git a/.github/workflows/periodic_github_maintenace.yml b/.github/workflows/periodic_github_maintenace.yml
@@ -23,7 +23,7 @@ jobs:
           private-key: ${{ secrets.PR_APP_KEY }}
 
       - name: Stale Branches
-        uses: crs-k/stale-branches@v6.0.2
+        uses: crs-k/stale-branches@v7.0.0
         with:
           repo-token: ${{ steps.app-token.outputs.token }}
           days-before-stale: 140

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@ repos:
         args: [ "--create", "--python-folders", "aeon" ]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.3
+    rev: v0.9.5
     hooks:
       - id: ruff
         args: [ "--fix"]
@@ -41,7 +41,7 @@ repos:
         args: [ "--py39-plus" ]
 
   - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
+    rev: 6.0.0
     hooks:
       - id: isort
         name: isort
@@ -55,7 +55,7 @@ repos:
         args: [ "--max-line-length=88", "--extend-ignore=E203" ]
 
   - repo: https://github.com/psf/black
-    rev: 24.10.0
+    rev: 25.1.0
     hooks:
       - id: black
         language_version: python3

diff --git a/aeon/classification/dictionary_based/_cboss.py b/aeon/classification/dictionary_based/_cboss.py
@@ -93,6 +93,13 @@ class ContractableBOSS(BaseClassifier):
     weights_ :
         Weight of each classifier in the ensemble.
 
+    Raises
+    ------
+    ValueError
+        Raised when ``min_window`` is greater than ``max_window + 1``.
+        This ensures that ``min_window`` does not exceed ``max_window``,
+        preventing invalid window size configurations.
+
     See Also
     --------
     BOSSEnsemble, IndividualBOSS
@@ -305,7 +312,6 @@ def _predict(self, X) -> np.ndarray:
         -------
         1D np.ndarray
             Predicted class labels shape = (n_cases).
-
         """
         rng = check_random_state(self.random_state)
         return np.array(

diff --git a/aeon/clustering/averaging/_averaging.py b/aeon/clustering/averaging/_averaging.py
@@ -38,7 +38,7 @@ def mean_average(X: np.ndarray, **kwargs) -> np.ndarray:
 
 
 def _resolve_average_callable(
-    averaging_method: Union[str, Callable[[np.ndarray, dict], np.ndarray]]
+    averaging_method: Union[str, Callable[[np.ndarray, dict], np.ndarray]],
 ) -> Callable[[np.ndarray, dict], np.ndarray]:
     """Resolve a string or callable to a averaging callable.
 

diff --git a/aeon/datasets/_tsad_data_loaders.py b/aeon/datasets/_tsad_data_loaders.py
@@ -269,7 +269,7 @@ def load_from_timeeval_csv_file(path: Path) -> tuple[np.ndarray, np.ndarray]:
 
 
 def load_kdd_tsad_135(
-    split: Literal["train", "test"] = "test"
+    split: Literal["train", "test"] = "test",
 ) -> tuple[np.ndarray, np.ndarray]:
     """Load the KDD-TSAD 135 UCR_Anomaly_Internal_Bleeding16 univariate dataset.
 
@@ -363,7 +363,7 @@ def load_daphnet_s06r02e0() -> tuple[np.ndarray, np.ndarray]:
 def load_ecg_diff_count_3(
     learning_type: Literal[
         "unsupervised", "semi-supervised", "supervised"
-    ] = "unsupervised"
+    ] = "unsupervised",
 ) -> Union[
     tuple[np.ndarray, np.ndarray], tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]
 ]:

diff --git a/aeon/distances/_distance.py b/aeon/distances/_distance.py
@@ -498,7 +498,7 @@ def get_distance_function(method: Union[str, DistanceFunction]) -> DistanceFunct
 
 
 def get_pairwise_distance_function(
-    method: Union[str, PairwiseFunction]
+    method: Union[str, PairwiseFunction],
 ) -> PairwiseFunction:
     """Get the pairwise distance function for a given method string or callable.
 

diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py
@@ -9,7 +9,6 @@
 
 import joblib
 import numpy as np
-from numpy.testing import assert_array_almost_equal
 from sklearn.exceptions import NotFittedError
 
 from aeon.anomaly_detection.base import BaseAnomalyDetector
@@ -625,20 +624,18 @@ def check_persistence_via_pickle(estimator, datatype):
     for method in NON_STATE_CHANGING_METHODS_ARRAYLIKE:
         if hasattr(estimator, method) and callable(getattr(estimator, method)):
             output = _run_estimator_method(estimator, method, datatype, "test")
-            assert_array_almost_equal(
-                output,
-                results[i],
-                err_msg=f"Running {method} after fit twice with test "
-                f"parameters gives different results.",
-            )
+            same, msg = deep_equals(output, results[i], return_msg=True)
+            if not same:
+                raise ValueError(
+                    f"Running {type(estimator)} {method} with test parameters after "
+                    f"serialisation gives different results. "
+                    f"Check equivalence message: {msg}"
+                )
             i += 1
 
 
 def check_fit_deterministic(estimator, datatype):
-    """Test that fit is deterministic.
-
-    Check that calling fit twice is equivalent to calling it once.
-    """
+    """Check that calling fit twice is equivalent to calling it once."""
     estimator = _clone_estimator(estimator, random_state=0)
     _run_estimator_method(estimator, "fit", datatype, "train")
 
@@ -648,17 +645,19 @@ def check_fit_deterministic(estimator, datatype):
             output = _run_estimator_method(estimator, method, datatype, "test")
             results.append(output)
 
-    # run fit and other methods a second time
+    # run fit a second time
     _run_estimator_method(estimator, "fit", datatype, "train")
 
+    # check output of predict/transform etc does not change
     i = 0
     for method in NON_STATE_CHANGING_METHODS_ARRAYLIKE:
         if hasattr(estimator, method) and callable(getattr(estimator, method)):
             output = _run_estimator_method(estimator, method, datatype, "test")
-            assert_array_almost_equal(
-                output,
-                results[i],
-                err_msg=f"Running {method} after fit twice with test "
-                f"parameters gives different results.",
-            )
+            same, msg = deep_equals(output, results[i], return_msg=True)
+            if not same:
+                raise ValueError(
+                    f"Running {type(estimator)} {method} with test parameters after "
+                    f"two calls to fit gives different results."
+                    f"Check equivalence message: {msg}"
+                )
             i += 1
diff --git a/aeon/testing/testing_config.py b/aeon/testing/testing_config.py
@@ -23,7 +23,10 @@
 NUMBA_DISABLED = os.environ.get("NUMBA_DISABLE_JIT") == "1"
 
 # exclude estimators here for short term fixes
-EXCLUDE_ESTIMATORS = ["REDCOMETS"]
+EXCLUDE_ESTIMATORS = [
+    "REDCOMETS",
+    "HydraTransformer",  # returns a pytorch Tensor
+]
 
 # Exclude specific tests for estimators here
 EXCLUDED_TESTS = {
@@ -49,7 +52,7 @@
     "RSASTClassifier": ["check_fit_deterministic"],
     "SAST": ["check_fit_deterministic"],
     "RSAST": ["check_fit_deterministic"],
-    "SFA": ["check_persistence_via_pickle", "check_fit_deterministic"],
+    "MatrixProfile": ["check_persistence_via_pickle"],
     # missed in legacy testing, changes state in predict/transform
     "FLUSSSegmenter": ["check_non_state_changing_method"],
     "InformationGainSegmenter": ["check_non_state_changing_method"],

diff --git a/aeon/testing/utils/deep_equals.py b/aeon/testing/utils/deep_equals.py
@@ -56,7 +56,7 @@ def _deep_equals(x, y, depth, ignore_index):
     elif isinstance(x, pd.DataFrame):
         return _dataframe_equals(x, y, depth, ignore_index)
     elif isinstance(x, np.ndarray):
-        return _numpy_equals(x, y, depth)
+        return _numpy_equals(x, y, depth, ignore_index)
     elif isinstance(x, (list, tuple)):
         return _list_equals(x, y, depth, ignore_index)
     elif isinstance(x, dict):
@@ -72,7 +72,7 @@ def _deep_equals(x, y, depth, ignore_index):
         eq = np.isnan(y)
         msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}"
         return eq, msg
-    elif isinstance(x == y, bool):
+    elif isinstance(x == y, (bool, np.bool_)):
         eq = x == y
         msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}"
         return eq, msg
@@ -128,13 +128,21 @@ def _dataframe_equals(x, y, depth, ignore_index):
         return eq, msg
 
 
-def _numpy_equals(x, y, depth):
+def _numpy_equals(x, y, depth, ignore_index):
     if x.dtype != y.dtype:
         return False, f"x.dtype ({x.dtype}) != y.dtype ({y.dtype})"
 
-    eq = np.allclose(x, y, equal_nan=True)
-    msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}"
-    return eq, msg
+    if x.dtype == "object":
+        for i in range(len(x)):
+            eq, msg = _deep_equals(x[i], y[i], depth + 1, ignore_index)
+
+            if not eq:
+                return False, msg + f", idx={i}"
+    else:
+        eq = np.allclose(x, y, equal_nan=True)
+        msg = "" if eq else f"x ({x}) != y ({y}), depth={depth}"
+        return eq, msg
+    return True, ""
 
 
 def _csrmatrix_equals(x, y, depth):

diff --git a/aeon/testing/utils/tests/test_deep_equals.py b/aeon/testing/utils/tests/test_deep_equals.py
@@ -14,7 +14,7 @@
     42,
     [],
     (()),
-    [([([([()])])])],
+    [[[[()]]]],
     np.array([2, 3, 4]),
     np.array([2, 4, 5]),
     3.5,

diff --git a/docs/conf.py b/docs/conf.py
@@ -388,7 +388,7 @@ def _does_not_start_with_underscore(input_string):
 
             # For case where tag is not included output as not supported
             if not _val or _val is None:
-                data[abbrevation].append("\u274C")
+                data[abbrevation].append("\u274c")
             else:
                 data[abbrevation].append("\u2705")
 

diff --git a/docs/developer_guide.md b/docs/developer_guide.md
@@ -20,6 +20,27 @@ their [developer's guide](https://scikit-learn.org/stable/developers/index.html)
 :::{grid-item-card}
 :text-align: center
 
+Type Hints
+
+^^^
+
+Adding type hints to `aeon` code.
+
++++
+
+```{button-ref} developer_guide/adding_typehints
+:color: primary
+:click-parent:
+:expand:
+
+Type Hints
+```
+
+:::
+
+:::{grid-item-card}
+:text-align: center
+
 AEP's
 
 ^^^
@@ -190,6 +211,7 @@ Testing
 ```{toctree}
 :hidden:
 
+developer_guide/adding_typehints.md
 developer_guide/aep.md
 developer_guide/coding_standards.md
 developer_guide/dependencies.md

diff --git a/docs/developer_guide/adding_typehints.md b/docs/developer_guide/adding_typehints.md
@@ -1,53 +1,54 @@
 # Adding Type Hints
 
-## Introduction to Type Hints
-
-Type hints are a way to indicate the expected data types of variables, function parameters, and return values in Python. They enhance code readability and help with static type checking, making it easier to catch errors before runtime.
-
-
-Type hints act as a form of documentation that helps developers understand the types of arguments a function expects and what it returns.
-
-
-## Basic Syntax
-
-For example, here is a simple function whose argument and return type are declared in the annotations:
+Type hints are a way to indicate the expected data types of variables, function
+parameters, and return values. They enhance code readability and help with static
+type checking, making it easier to catch errors.
 
+For example, here is a simple function whose argument and return type are declared
+in the annotations:
 
 ```python
-def greeting(name: str) -> str:
-    return 'Hello ' + name
-```
-
-
-Learn more about type hints in [python docs](https://docs.python.org/3/library/typing.html) and [PEP 484](https://peps.python.org/pep-0484/)
-
-
-# Dealing with Soft Dependency Type Hints
+from typing import List
 
+def sum_ints_return_str(int_list: List[int]) -> str:
+    return str(sum(int_list))
+```
 
+Type hints are not currently mandatory in `aeon`, but we aim to progressively integrate
+them into the code base. Learn more about type hints in the
+[Python documentation](https://docs.python.org/3/library/typing.html)
+and [PEP 484](https://peps.python.org/pep-0484/).
 
-When working with models that have soft dependencies, additional considerations are required to ensure that your code remains robust and maintainable. Soft dependencies are optional packages or modules that your application does not require at runtime but may be used in specific situations, such as during type-checking or when certain features are enabled.
+## Soft Dependency Type Hints
 
- The typing.TYPE_CHECKING constant ensures that imports for type hints are only evaluated when type-checking is done and NOT in the runtime. This prevents errors when the soft dependancies are not available. Here is an example that of [PyODAdapter](https://github.com/aeon-toolkit/aeon/blob/main/aeon/anomaly_detection/_pyodadapter.py):
+When working with modules that use soft dependencies, additional considerations are
+required to ensure that your code can still run even without these dependencies
+installed.
 
+Here is an example snippet taken from [PyODAdapter](https://www.aeon-toolkit.org/en/stable/api_reference/auto_generated/aeon.anomaly_detection.PyODAdapter.html).
+It uses the `pyod` library, which is a soft dependency. The `TYPE_CHECKING` constant
+is used to ensure that the `pyod` library is only imported at the top level while type
+checking is performed. `from __future__ import annotations` is used to allow forward
+references in type hints. See [PEP 563](https://peps.python.org/pep-0563/) for more
+information. The `pyod` `BaseDetector` class can now be used in type hints with
+these additions.
 
  ```python
-from aeon.anomaly_detection.base import BaseAnomalyDetector
-from aeon.utils.validation._dependencies import _check_soft_dependencies
-from typing import TYPE_CHECKING, Any
+from __future__ import annotations
 
+from aeon.anomaly_detection.base import BaseAnomalyDetector
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from pyod.models.base import BaseDetector
 
-
 class PyODAdapter(BaseAnomalyDetector):
-    ...
-
-    def _is_pyod_model(model: Any) -> bool:
-        """Check if the provided model is a PyOD model."""
-        from pyod.models.base import BaseDetector
-
-        return isinstance(model, BaseDetector)
-   ...
+    def __init__(
+        self, pyod_model: BaseDetector, window_size: int = 10, stride: int = 1
+    ):
+        self.pyod_model = pyod_model
+        self.window_size = window_size
+        self.stride = stride
+
+        super().__init__(axis=0)
 ```
diff --git a/docs/developer_guide/documentation.md b/docs/developer_guide/documentation.md
@@ -154,7 +154,7 @@ Here are a few examples of `aeon` code with good documentation.
 
 ### Estimators
 
-[BOSSEnsemble](https://www.aeon-toolkit.org/en/latest/api_reference/auto_generated/aeon.classification.dictionary_based.BOSSEnsemble.html#aeon.classification.dictionary_based.BOSSEnsemble)
+[BOSSEnsemble](https://www.aeon-toolkit.org/en/stable/api_reference/auto_generated/aeon.classification.dictionary_based.BOSSEnsemble.html#aeon.classification.dictionary_based.BOSSEnsemble)
 
 ### Functions
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -176,6 +176,7 @@ addopts = '''
     --dist worksteal
     --reruns 2
     --only-rerun "crashed while running"
+    --only-rerun "zipfile.BadZipFile"
 '''
 filterwarnings = '''
     ignore::UserWarning