diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b121e86be..27bfab1ee 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ ci: repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.6.0 hooks: - id: check-ast - id: check-docstring-first @@ -17,27 +17,27 @@ repos: - id: trailing-whitespace args: [--markdown-linebreak-ext=md] - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.12.2 + rev: v0.17 hooks: - id: validate-pyproject - - repo: https://github.com/psf/black - rev: 23.3.0 + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.4.2 hooks: - id: black exclude: ^(docs/|examples/) args: [--check] - - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.267 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.4.4 hooks: - id: ruff exclude: ^(__init__.py) - args: [--format, grouped, --show-fixes] + args: [--output-format, grouped, --show-fixes] - repo: https://github.com/codespell-project/codespell - rev: v2.2.4 + rev: v2.2.6 hooks: - id: codespell exclude: ^(.github/|docs/) - args: [--config, tox.ini] + args: [--toml, pyproject.toml] - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.10.0 hooks: diff --git a/docs/index.rst b/docs/index.rst index 9b4f9c587..1a52ebf55 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -86,14 +86,14 @@ Indices and tables Indices and tables ================== -If you use julearn in a scientific publication, please use the following +If you use julearn in a scientific publication, please use the following reference - Hamdan, Sami, Shammi More, Leonard Sasse, Vera Komeyer, - Kaustubh R. Patil, and Federico Raimondo. ‘Julearn: - An Easy-to-Use Library for Leakage-Free Evaluation and Inspection of - ML Models’. arXiv, 19 October 2023. + Hamdan, Sami, Shammi More, Leonard Sasse, Vera Komeyer, + Kaustubh R. Patil, and Federico Raimondo. ‘Julearn: + An Easy-to-Use Library for Leakage-Free Evaluation and Inspection of + ML Models’. arXiv, 19 October 2023. https://doi.org/10.48550/arXiv.2310.12568. -Since julearn is also heavily reliant on scikit-learn, please also cite +Since julearn is also heavily reliant on scikit-learn, please also cite them: https://scikit-learn.org/stable/about.html#citing-scikit-learn diff --git a/examples/99_docs/run_hyperparameters_docs.py b/examples/99_docs/run_hyperparameters_docs.py index fec76a8d7..ce476262a 100644 --- a/examples/99_docs/run_hyperparameters_docs.py +++ b/examples/99_docs/run_hyperparameters_docs.py @@ -253,7 +253,7 @@ # hyperparameters values. # # Other searchers that ``julearn`` provides are the -# :class:`~sklearn.model_selection.RandomizedSearchCV`, +# :class:`~sklearn.model_selection.RandomizedSearchCV`, # :class:`~skopt.BayesSearchCV` and # :class:`~optuna_integration.sklearn.OptunaSearchCV`. # @@ -383,14 +383,14 @@ # searcher. However, the optuna searcher behaviour is controlled by a # :class:`~optuna.study.Study` object. This object can be passed to the # searcher using the ``study`` parameter in the ``search_params`` dictionary. -# +# # .. important:: # The optuna searcher requires that all the hyperparameters are specified -# as distributions, even the categorical ones. +# as distributions, even the categorical ones. # # We first modify the pipeline creator so the ``select_k`` parameter is # specified as a distribution. We exemplarily use a categorical distribution -# for the ``class_weight`` hyperparameter, trying the ``"balanced"`` and +# for the ``class_weight`` hyperparameter, trying the ``"balanced"`` and # ``None`` values. creator = PipelineCreator(problem_type="classification") @@ -445,12 +445,12 @@ # searcher, bayesian searcher and optuna searcher. The distributions are # either specified toolbox-specific method or a tuple convention with the # following format: ``(low, high, distribution)`` where the distribution can -# be either ``"log-uniform"`` or ``"uniform"`` or +# be either ``"log-uniform"`` or ``"uniform"`` or # ``(a, b, c, d, ..., "categorical")`` where ``a``, ``b``, ``c``, ``d``, etc. # are the possible categorical values for the hyperparameter. # -# For example, we can specify the ``C`` and ``gamma`` hyperparameters of the -# :class:`~sklearn.svm.SVC` as log-uniform distributions, while keeping +# For example, we can specify the ``C`` and ``gamma`` hyperparameters of the +# :class:`~sklearn.svm.SVC` as log-uniform distributions, while keeping # the ``with_mean`` parameter of the # :class:`~sklearn.preprocessing.StandardScaler` as a categorical parameter # with two options. @@ -469,7 +469,7 @@ # While this will work for any of the ``random``, ``bayes`` or ``optuna`` # searcher options, it is important to note that both ``bayes`` and ``optuna`` # searchers accept further parameters to specify distributions. For example, -# the ``bayes`` searcher distributions are defined using the +# the ``bayes`` searcher distributions are defined using the # :class:`~skopt.space.space.Categorical`, :class:`~skopt.space.space.Integer` # and :class:`~skopt.space.space.Real`. # @@ -493,7 +493,7 @@ # # # For example, we can define a uniform distribution from 0.5 to 0.9 with a 0.05 -# step for the ``n_components`` of a :class:`~sklearn.decomposition.PCA` +# step for the ``n_components`` of a :class:`~sklearn.decomposition.PCA` # transformer, while keeping a log-uniform distribution for the ``C`` and # ``gamma`` hyperparameters of the :class:`~sklearn.svm.SVC` model. from optuna.distributions import FloatDistribution diff --git a/ignore_words.txt b/ignore_words.txt index 585932d33..a6fabd63d 100644 --- a/ignore_words.txt +++ b/ignore_words.txt @@ -5,4 +5,4 @@ fpr master whis jupyter -arange \ No newline at end of file +arange diff --git a/julearn/conftest.py b/julearn/conftest.py index cb4c2868d..b4b27e69c 100644 --- a/julearn/conftest.py +++ b/julearn/conftest.py @@ -286,6 +286,7 @@ def bayes_search_params(request: FixtureRequest) -> Optional[Dict]: return request.param + @fixture( params=[ {"kind": "optuna", "n_trials": 10, "cv": 3}, @@ -310,6 +311,7 @@ def optuna_search_params(request: FixtureRequest) -> Optional[Dict]: return request.param + _tuning_params = { "zscore": {"with_mean": [True, False]}, "pca": {"n_components": [0.2, 0.7]}, diff --git a/julearn/model_selection/__init__.py b/julearn/model_selection/__init__.py index 01356d0e6..db038f65b 100644 --- a/julearn/model_selection/__init__.py +++ b/julearn/model_selection/__init__.py @@ -21,4 +21,3 @@ register_bayes_searcher() register_optuna_searcher() - diff --git a/julearn/model_selection/_optuna_searcher.py b/julearn/model_selection/_optuna_searcher.py index a05801fc2..0e1751137 100644 --- a/julearn/model_selection/_optuna_searcher.py +++ b/julearn/model_selection/_optuna_searcher.py @@ -89,8 +89,10 @@ def _prepare_optuna_hyperparameters_distributions( ) out[k] = optd.FloatDistribution(v[0], v[1], log=True) elif v[2] == "categorical": - logger.info(f"Hyperparameter {k} is categorical with 2 " - f"options: [{v[0]} and {v[1]}]") + logger.info( + f"Hyperparameter {k} is categorical with 2 " + f"options: [{v[0]} and {v[1]}]" + ) out[k] = optd.CategoricalDistribution((v[0], v[1])) else: out[k] = v diff --git a/julearn/model_selection/tests/test_optuna_searcher.py b/julearn/model_selection/tests/test_optuna_searcher.py index 3d0c30db2..231f5becd 100644 --- a/julearn/model_selection/tests/test_optuna_searcher.py +++ b/julearn/model_selection/tests/test_optuna_searcher.py @@ -14,6 +14,7 @@ optd = pytest.importorskip("optuna.distributions") + @pytest.mark.parametrize( "params_to_tune,expected_types, expected_dist", [ diff --git a/julearn/model_selection/tests/test_skopt_searcher.py b/julearn/model_selection/tests/test_skopt_searcher.py index ea3598a87..38846da0e 100644 --- a/julearn/model_selection/tests/test_skopt_searcher.py +++ b/julearn/model_selection/tests/test_skopt_searcher.py @@ -14,6 +14,7 @@ sksp = pytest.importorskip("skopt.space") + @pytest.mark.parametrize( "params_to_tune,expected_types, expected_dist", [ diff --git a/julearn/pipeline/merger.py b/julearn/pipeline/merger.py index 1ac4bdc04..39ac734f2 100644 --- a/julearn/pipeline/merger.py +++ b/julearn/pipeline/merger.py @@ -89,10 +89,7 @@ def merge_pipelines( # TODO: Fix this comparison, as it always returns False. for s in pipelines[1:]: if isinstance(s, BaseSearchCV): - if ( - s.estimator.named_steps[t_step_name] # type: ignore - != t - ): + if s.estimator.named_steps[t_step_name] != t: # type: ignore different_steps.append(t_step_name) break else: diff --git a/julearn/pipeline/tests/test_merger.py b/julearn/pipeline/tests/test_merger.py index 54468ebf0..1a93e1786 100644 --- a/julearn/pipeline/tests/test_merger.py +++ b/julearn/pipeline/tests/test_merger.py @@ -51,10 +51,14 @@ def test_merger_pipelines() -> None: assert "scaler" == named_steps[1] assert "rf" == named_steps[2] assert len(merged.param_distributions) == 3 # type: ignore - assert ( - merged.param_distributions[-1]["rf__max_features"] # type: ignore - == [2, 3, 7, 42] - ) + assert merged.param_distributions[-1][ + "rf__max_features" + ] == [ # type: ignore + 2, + 3, + 7, + 42, + ] def test_merger_errors() -> None: diff --git a/julearn/tests/test_api.py b/julearn/tests/test_api.py index 5f9def69b..0d05f7dec 100644 --- a/julearn/tests/test_api.py +++ b/julearn/tests/test_api.py @@ -455,9 +455,8 @@ def test_tune_hyperparam_gridsearch(df_iris: pd.DataFrame) -> None: ) assert len(actual.columns) == len(expected) + 5 # type: ignore - assert ( - len(actual["test_accuracy"]) # type: ignore - == len(expected["test_accuracy"]) + assert len(actual["test_accuracy"]) == len( # type: ignore + expected["test_accuracy"] ) assert all( a == b @@ -540,9 +539,8 @@ def test_tune_hyperparam_gridsearch_groups(df_iris: pd.DataFrame) -> None: ) assert len(actual.columns) == len(expected) + 5 # type: ignore - assert ( - len(actual["test_accuracy"]) # type: ignore - == len(expected["test_accuracy"]) + assert len(actual["test_accuracy"]) == len( # type: ignore + expected["test_accuracy"] ) assert all( a == b @@ -630,9 +628,8 @@ def test_tune_hyperparam_randomsearch(df_iris: pd.DataFrame) -> None: ) assert len(actual.columns) == len(expected) + 5 # type: ignore - assert ( - len(actual["test_accuracy"]) # type: ignore - == len(expected["test_accuracy"]) + assert len(actual["test_accuracy"]) == len( # type: ignore + expected["test_accuracy"] ) assert all( a == b @@ -746,13 +743,11 @@ def test_tune_hyperparams_multiple_grid(df_iris: pd.DataFrame) -> None: assert len(actual1.columns) == len(expected) + 5 # type: ignore assert len(actual2.columns) == len(expected) + 5 # type: ignore - assert ( - len(actual1["test_accuracy"]) # type: ignore - == len(expected["test_accuracy"]) + assert len(actual1["test_accuracy"]) == len( # type: ignore + expected["test_accuracy"] ) - assert ( - len(actual2["test_accuracy"]) # type: ignore - == len(expected["test_accuracy"]) + assert len(actual2["test_accuracy"]) == len( # type: ignore + expected["test_accuracy"] ) assert all( a == b diff --git a/julearn/transformers/cbpm.py b/julearn/transformers/cbpm.py index 58dd98a27..0a4d698aa 100644 --- a/julearn/transformers/cbpm.py +++ b/julearn/transformers/cbpm.py @@ -284,8 +284,10 @@ def get_feature_names_out(self, input_features=None): cols = ( ["positive"] if self.used_corr_sign_ == "pos" - else ["negative"] - if self.used_corr_sign_ == "neg" - else ["positive", "negative"] + else ( + ["negative"] + if self.used_corr_sign_ == "neg" + else ["positive", "negative"] + ) ) return np.array(cols, dtype=object) diff --git a/julearn/transformers/dataframe/change_column_types.py b/julearn/transformers/dataframe/change_column_types.py index 845590c5d..6e382b4f0 100644 --- a/julearn/transformers/dataframe/change_column_types.py +++ b/julearn/transformers/dataframe/change_column_types.py @@ -75,9 +75,9 @@ def _fit( if "__:type:__" in col: name, old_type = col.split("__:type:__") if old_type in self.X_types_renamer: - to_rename[ - col - ] = f"{name}__:type:__{self.X_types_renamer[old_type]}" + to_rename[col] = ( + f"{name}__:type:__{self.X_types_renamer[old_type]}" + ) self._renamer = to_rename return self diff --git a/julearn/transformers/tests/test_jucolumntransformers.py b/julearn/transformers/tests/test_jucolumntransformers.py index 7bf8309fa..13cd3d9be 100644 --- a/julearn/transformers/tests/test_jucolumntransformers.py +++ b/julearn/transformers/tests/test_jucolumntransformers.py @@ -156,9 +156,9 @@ def test_JuColumnTransformer_row_select(): ) mean_both = ( - transformer_both.fit( - X - ).column_transformer_.transformers_[0][1].mean_ # type: ignore + transformer_both.fit(X) + .column_transformer_.transformers_[0][1] + .mean_ # type: ignore ) assert_almost_equal( diff --git a/julearn/utils/checks.py b/julearn/utils/checks.py index 919e796dc..a3a407bf0 100644 --- a/julearn/utils/checks.py +++ b/julearn/utils/checks.py @@ -1,4 +1,5 @@ """Implement various checks for the input of the functions.""" + # Author: Federico Raimondo # License: BSD 3 clause diff --git a/julearn/utils/logging.py b/julearn/utils/logging.py index c7c7516e9..0c2993d92 100644 --- a/julearn/utils/logging.py +++ b/julearn/utils/logging.py @@ -222,9 +222,7 @@ def raise_error( raise klass(msg) -def warn_with_log( - msg: str, category: Type[Warning] = RuntimeWarning -) -> None: +def warn_with_log(msg: str, category: Type[Warning] = RuntimeWarning) -> None: """Warn, but first log it. Parameters diff --git a/julearn/utils/testing.py b/julearn/utils/testing.py index 7afd9d2ab..1bcac8322 100644 --- a/julearn/utils/testing.py +++ b/julearn/utils/testing.py @@ -185,7 +185,7 @@ def do_scoring_test( scorers: List[str], groups: Optional[str] = None, X_types: Optional[Dict[str, List[str]]] = None, # noqa: N803 - cv: Union[int, BaseCrossValidator] = 5, + cv: Union[int, BaseCrossValidator] = 5, sk_y: Optional[np.ndarray] = None, decimal: int = 5, ): diff --git a/pyproject.toml b/pyproject.toml index e8841b95d..f6f44ab3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -104,6 +104,13 @@ builtin = "clear,rare,informal,names,usage,code" [tool.ruff] line-length = 79 +extend-exclude = [ + "__init__.py", + "docs", + "examples", +] + +[tool.ruff.lint] select = [ # flake8-bugbear "B", @@ -136,11 +143,6 @@ select = [ # flake8-2020 "YTT", ] -extend-exclude = [ - "__init__.py", - "docs", - "examples", -] extend-ignore = [ # Use of `functools.lru_cache` or `functools.cache` on methods can lead to # memory leaks. The cache may retain instance references, preventing garbage @@ -163,7 +165,7 @@ extend-ignore = [ "PGH003", ] -[tool.ruff.isort] +[tool.ruff.lint.isort] lines-after-imports = 2 known-first-party = ["julearn"] known-third-party =[ @@ -178,7 +180,7 @@ known-third-party =[ "pytest", ] -[tool.ruff.mccabe] +[tool.ruff.lint.mccabe] max-complexity = 20 [tool.towncrier]