Merge pull request #264 from juaml/chore/repo-maint

[MAINT]: Regular repository maintenance
juaml · May 14, 2024 · 59c487d · 59c487d
2 parents 215a773 + 40dffa2
commit 59c487d
Show file tree

Hide file tree

Showing 19 changed files with 75 additions and 71 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ ci:
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.6.0
     hooks:
       - id: check-ast
       - id: check-docstring-first
@@ -17,27 +17,27 @@ repos:
       - id: trailing-whitespace
         args: [--markdown-linebreak-ext=md]
   - repo: https://github.com/abravalheri/validate-pyproject
-    rev: v0.12.2
+    rev: v0.17
     hooks:
       - id: validate-pyproject
-  - repo: https://github.com/psf/black
-    rev: 23.3.0
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 24.4.2
     hooks:
       - id: black
         exclude: ^(docs/|examples/)
         args: [--check]
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.267
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.4
     hooks:
       - id: ruff
         exclude: ^(__init__.py)
-        args: [--format, grouped, --show-fixes]
+        args: [--output-format, grouped, --show-fixes]
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.2.6
     hooks:
       - id: codespell
         exclude: ^(.github/|docs/)
-        args: [--config, tox.ini]
+        args: [--toml, pyproject.toml]
   - repo: https://github.com/pre-commit/pygrep-hooks
     rev: v1.10.0
     hooks:

diff --git a/docs/index.rst b/docs/index.rst
@@ -86,14 +86,14 @@ Indices and tables
 Indices and tables
 ==================
 
-If you use julearn in a scientific publication, please use the following 
+If you use julearn in a scientific publication, please use the following
 reference
 
-    Hamdan, Sami, Shammi More, Leonard Sasse, Vera Komeyer, 
-    Kaustubh R. Patil, and Federico Raimondo. ‘Julearn: 
-    An Easy-to-Use Library for Leakage-Free Evaluation and Inspection of 
-    ML Models’. arXiv, 19 October 2023. 
+    Hamdan, Sami, Shammi More, Leonard Sasse, Vera Komeyer,
+    Kaustubh R. Patil, and Federico Raimondo. ‘Julearn:
+    An Easy-to-Use Library for Leakage-Free Evaluation and Inspection of
+    ML Models’. arXiv, 19 October 2023.
     https://doi.org/10.48550/arXiv.2310.12568.
 
-Since julearn is also heavily reliant on scikit-learn, please also cite 
+Since julearn is also heavily reliant on scikit-learn, please also cite
 them: https://scikit-learn.org/stable/about.html#citing-scikit-learn
diff --git a/examples/99_docs/run_hyperparameters_docs.py b/examples/99_docs/run_hyperparameters_docs.py
@@ -253,7 +253,7 @@
 # hyperparameters values.
 #
 # Other searchers that ``julearn`` provides are the
-# :class:`~sklearn.model_selection.RandomizedSearchCV`, 
+# :class:`~sklearn.model_selection.RandomizedSearchCV`,
 # :class:`~skopt.BayesSearchCV` and
 # :class:`~optuna_integration.sklearn.OptunaSearchCV`.
 #
@@ -383,14 +383,14 @@
 # searcher. However, the optuna searcher behaviour is controlled by a
 # :class:`~optuna.study.Study` object. This object can be passed to the
 # searcher using the ``study`` parameter in the ``search_params`` dictionary.
-# 
+#
 # .. important::
 #    The optuna searcher requires that all the hyperparameters are specified
-#    as distributions, even the categorical ones. 
+#    as distributions, even the categorical ones.
 #
 # We first modify the pipeline creator so the ``select_k`` parameter is
 # specified as a distribution. We exemplarily use a categorical distribution
-# for the ``class_weight`` hyperparameter, trying the ``"balanced"`` and 
+# for the ``class_weight`` hyperparameter, trying the ``"balanced"`` and
 # ``None`` values.
 
 creator = PipelineCreator(problem_type="classification")
@@ -445,12 +445,12 @@
 # searcher, bayesian searcher and optuna searcher. The distributions are
 # either specified toolbox-specific method or  a tuple convention with the
 # following format: ``(low, high, distribution)`` where the distribution can
-# be either ``"log-uniform"`` or ``"uniform"`` or 
+# be either ``"log-uniform"`` or ``"uniform"`` or
 # ``(a, b, c, d, ..., "categorical")`` where ``a``, ``b``, ``c``, ``d``, etc.
 # are the possible categorical values for the hyperparameter.
 #
-# For example, we can specify the ``C`` and ``gamma`` hyperparameters of the 
-# :class:`~sklearn.svm.SVC` as  log-uniform distributions, while keeping 
+# For example, we can specify the ``C`` and ``gamma`` hyperparameters of the
+# :class:`~sklearn.svm.SVC` as  log-uniform distributions, while keeping
 # the ``with_mean`` parameter of the
 # :class:`~sklearn.preprocessing.StandardScaler` as a categorical parameter
 # with two options.
@@ -469,7 +469,7 @@
 # While this will work for any of the ``random``, ``bayes`` or ``optuna``
 # searcher options, it is important to note that both ``bayes`` and ``optuna``
 # searchers accept further parameters to specify distributions. For example,
-# the ``bayes`` searcher distributions are defined using the 
+# the ``bayes`` searcher distributions are defined using the
 # :class:`~skopt.space.space.Categorical`, :class:`~skopt.space.space.Integer`
 # and :class:`~skopt.space.space.Real`.
 #
@@ -493,7 +493,7 @@
 #
 #
 # For example, we can define a uniform distribution from 0.5 to 0.9 with a 0.05
-# step for the ``n_components`` of a :class:`~sklearn.decomposition.PCA` 
+# step for the ``n_components`` of a :class:`~sklearn.decomposition.PCA`
 # transformer, while keeping a log-uniform distribution for the ``C`` and
 # ``gamma`` hyperparameters of the :class:`~sklearn.svm.SVC` model.
 from optuna.distributions import FloatDistribution

diff --git a/ignore_words.txt b/ignore_words.txt
@@ -5,4 +5,4 @@ fpr
 master
 whis
 jupyter
-arange
+arange
diff --git a/julearn/conftest.py b/julearn/conftest.py
@@ -286,6 +286,7 @@ def bayes_search_params(request: FixtureRequest) -> Optional[Dict]:
 
     return request.param
 
+
 @fixture(
     params=[
         {"kind": "optuna", "n_trials": 10, "cv": 3},
@@ -310,6 +311,7 @@ def optuna_search_params(request: FixtureRequest) -> Optional[Dict]:
 
     return request.param
 
+
 _tuning_params = {
     "zscore": {"with_mean": [True, False]},
     "pca": {"n_components": [0.2, 0.7]},

diff --git a/julearn/model_selection/__init__.py b/julearn/model_selection/__init__.py
@@ -21,4 +21,3 @@
 
 register_bayes_searcher()
 register_optuna_searcher()
-
diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py
@@ -89,8 +89,10 @@ def _prepare_optuna_hyperparameters_distributions(
                     )
                     out[k] = optd.FloatDistribution(v[0], v[1], log=True)
             elif v[2] == "categorical":
-                logger.info(f"Hyperparameter {k} is categorical with 2 "
-                            f"options: [{v[0]} and {v[1]}]")
+                logger.info(
+                    f"Hyperparameter {k} is categorical with 2 "
+                    f"options: [{v[0]} and {v[1]}]"
+                )
                 out[k] = optd.CategoricalDistribution((v[0], v[1]))
             else:
                 out[k] = v

diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py
@@ -14,6 +14,7 @@
 
 optd = pytest.importorskip("optuna.distributions")
 
+
 @pytest.mark.parametrize(
     "params_to_tune,expected_types, expected_dist",
     [

diff --git a/julearn/model_selection/tests/test_skopt_searcher.py b/julearn/model_selection/tests/test_skopt_searcher.py
@@ -14,6 +14,7 @@
 
 sksp = pytest.importorskip("skopt.space")
 
+
 @pytest.mark.parametrize(
     "params_to_tune,expected_types, expected_dist",
     [

diff --git a/julearn/pipeline/merger.py b/julearn/pipeline/merger.py
@@ -89,10 +89,7 @@ def merge_pipelines(
         # TODO: Fix this comparison, as it always returns False.
         for s in pipelines[1:]:
             if isinstance(s, BaseSearchCV):
-                if (
-                    s.estimator.named_steps[t_step_name]  # type: ignore
-                    != t
-                ):
+                if s.estimator.named_steps[t_step_name] != t:  # type: ignore
                     different_steps.append(t_step_name)
                     break
             else:

diff --git a/julearn/pipeline/tests/test_merger.py b/julearn/pipeline/tests/test_merger.py
@@ -51,10 +51,14 @@ def test_merger_pipelines() -> None:
     assert "scaler" == named_steps[1]
     assert "rf" == named_steps[2]
     assert len(merged.param_distributions) == 3  # type: ignore
-    assert (
-        merged.param_distributions[-1]["rf__max_features"]  # type: ignore
-        == [2, 3, 7, 42]
-    )
+    assert merged.param_distributions[-1][
+        "rf__max_features"
+    ] == [  # type: ignore
+        2,
+        3,
+        7,
+        42,
+    ]
 
 
 def test_merger_errors() -> None:

diff --git a/julearn/tests/test_api.py b/julearn/tests/test_api.py
@@ -455,9 +455,8 @@ def test_tune_hyperparam_gridsearch(df_iris: pd.DataFrame) -> None:
     )
 
     assert len(actual.columns) == len(expected) + 5  # type: ignore
-    assert (
-        len(actual["test_accuracy"])  # type: ignore
-        == len(expected["test_accuracy"])
+    assert len(actual["test_accuracy"]) == len(  # type: ignore
+        expected["test_accuracy"]
     )
     assert all(
         a == b
@@ -540,9 +539,8 @@ def test_tune_hyperparam_gridsearch_groups(df_iris: pd.DataFrame) -> None:
     )
 
     assert len(actual.columns) == len(expected) + 5  # type: ignore
-    assert (
-        len(actual["test_accuracy"])  # type: ignore
-        == len(expected["test_accuracy"])
+    assert len(actual["test_accuracy"]) == len(  # type: ignore
+        expected["test_accuracy"]
     )
     assert all(
         a == b
@@ -630,9 +628,8 @@ def test_tune_hyperparam_randomsearch(df_iris: pd.DataFrame) -> None:
     )
 
     assert len(actual.columns) == len(expected) + 5  # type: ignore
-    assert (
-        len(actual["test_accuracy"])  # type: ignore
-        == len(expected["test_accuracy"])
+    assert len(actual["test_accuracy"]) == len(  # type: ignore
+        expected["test_accuracy"]
     )
     assert all(
         a == b
@@ -746,13 +743,11 @@ def test_tune_hyperparams_multiple_grid(df_iris: pd.DataFrame) -> None:
 
     assert len(actual1.columns) == len(expected) + 5  # type: ignore
     assert len(actual2.columns) == len(expected) + 5  # type: ignore
-    assert (
-        len(actual1["test_accuracy"])  # type: ignore
-        == len(expected["test_accuracy"])
+    assert len(actual1["test_accuracy"]) == len(  # type: ignore
+        expected["test_accuracy"]
     )
-    assert (
-        len(actual2["test_accuracy"])  # type: ignore
-        == len(expected["test_accuracy"])
+    assert len(actual2["test_accuracy"]) == len(  # type: ignore
+        expected["test_accuracy"]
     )
     assert all(
         a == b

diff --git a/julearn/transformers/cbpm.py b/julearn/transformers/cbpm.py
@@ -284,8 +284,10 @@ def get_feature_names_out(self, input_features=None):
         cols = (
             ["positive"]
             if self.used_corr_sign_ == "pos"
-            else ["negative"]
-            if self.used_corr_sign_ == "neg"
-            else ["positive", "negative"]
+            else (
+                ["negative"]
+                if self.used_corr_sign_ == "neg"
+                else ["positive", "negative"]
+            )
         )
         return np.array(cols, dtype=object)
diff --git a/julearn/transformers/dataframe/change_column_types.py b/julearn/transformers/dataframe/change_column_types.py
@@ -75,9 +75,9 @@ def _fit(
             if "__:type:__" in col:
                 name, old_type = col.split("__:type:__")
                 if old_type in self.X_types_renamer:
-                    to_rename[
-                        col
-                    ] = f"{name}__:type:__{self.X_types_renamer[old_type]}"
+                    to_rename[col] = (
+                        f"{name}__:type:__{self.X_types_renamer[old_type]}"
+                    )
         self._renamer = to_rename
         return self
 

diff --git a/julearn/transformers/tests/test_jucolumntransformers.py b/julearn/transformers/tests/test_jucolumntransformers.py
@@ -156,9 +156,9 @@ def test_JuColumnTransformer_row_select():
     )
 
     mean_both = (
-        transformer_both.fit(
-            X
-        ).column_transformer_.transformers_[0][1].mean_  # type: ignore
+        transformer_both.fit(X)
+        .column_transformer_.transformers_[0][1]
+        .mean_  # type: ignore
     )
 
     assert_almost_equal(

diff --git a/julearn/utils/checks.py b/julearn/utils/checks.py
@@ -1,4 +1,5 @@
 """Implement various checks for the input of the functions."""
+
 # Author: Federico Raimondo <f.raimondo@fz-juelich.de>
 # License: BSD 3 clause
 

diff --git a/julearn/utils/logging.py b/julearn/utils/logging.py
@@ -222,9 +222,7 @@ def raise_error(
         raise klass(msg)
 
 
-def warn_with_log(
-    msg: str, category: Type[Warning] = RuntimeWarning
-) -> None:
+def warn_with_log(msg: str, category: Type[Warning] = RuntimeWarning) -> None:
     """Warn, but first log it.
 
     Parameters

diff --git a/julearn/utils/testing.py b/julearn/utils/testing.py
@@ -185,7 +185,7 @@ def do_scoring_test(
     scorers: List[str],
     groups: Optional[str] = None,
     X_types: Optional[Dict[str, List[str]]] = None,  # noqa: N803
-    cv: Union[int, BaseCrossValidator]  = 5,
+    cv: Union[int, BaseCrossValidator] = 5,
     sk_y: Optional[np.ndarray] = None,
     decimal: int = 5,
 ):

diff --git a/pyproject.toml b/pyproject.toml
@@ -104,6 +104,13 @@ builtin = "clear,rare,informal,names,usage,code"
 
 [tool.ruff]
 line-length = 79
+extend-exclude = [
+    "__init__.py",
+    "docs",
+    "examples",
+]
+
+[tool.ruff.lint]
 select = [
     # flake8-bugbear
     "B",
@@ -136,11 +143,6 @@ select = [
     # flake8-2020
     "YTT",
 ]
-extend-exclude = [
-    "__init__.py",
-    "docs",
-    "examples",
-]
 extend-ignore = [
     # Use of `functools.lru_cache` or `functools.cache` on methods can lead to
     # memory leaks. The cache may retain instance references, preventing garbage
@@ -163,7 +165,7 @@ extend-ignore = [
     "PGH003",
 ]
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 lines-after-imports = 2
 known-first-party = ["julearn"]
 known-third-party =[
@@ -178,7 +180,7 @@ known-third-party =[
     "pytest",
 ]
 
-[tool.ruff.mccabe]
+[tool.ruff.lint.mccabe]
 max-complexity = 20
 
 [tool.towncrier]
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,4 +5,4 @@ fpr @@
     master
     whis
     jupyter
-    arange
+    arange
Original file line number	Diff line number	Diff line change
Expand Up		@@ -21,4 +21,3 @@

		register_bayes_searcher()
		register_optuna_searcher()