From 5a74227bc3825ae75a73d032ea0040a9b4563ceb Mon Sep 17 00:00:00 2001 From: Daniel Grindrod Date: Fri, 1 Nov 2024 02:06:15 +0000 Subject: [PATCH] Flaml: fix lgbm reproducibility (#1369) * fix: Fixed bug where every underlying LGBMRegressor or LGBMClassifier had n_estimators = 1 * test: Added test showing case where FLAMLised CatBoostModel result isn't reproducible * fix: Fixing issue where callbacks cause LGBM results to not be reproducible * Update test/automl/test_regression.py Co-authored-by: Li Jiang * fix: Adding back the LGBM EarlyStopping * refactor: Fix tweaked to ensure other models aren't likely to be affected * test: Fixed test to allow reproduced results to be better than the FLAML results, when LGBM earlystopping is involved --------- Co-authored-by: Daniel Grindrod Co-authored-by: Li Jiang --- flaml/automl/model.py | 7 ++--- test/automl/test_classification.py | 2 +- test/automl/test_regression.py | 49 +++++++++++++++++++++++++++++- 3 files changed, 52 insertions(+), 6 deletions(-) diff --git a/flaml/automl/model.py b/flaml/automl/model.py index 182b4103ec..024a641dd3 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -1585,18 +1585,17 @@ def fit(self, X_train, y_train, budget=None, free_mem_ratio=0, **kwargs): callbacks = None if callbacks is None: self._fit(X_train, y_train, **kwargs) - else: - self._fit(X_train, y_train, callbacks=callbacks, **kwargs) - if callbacks is None: # for xgboost>=1.6.0, pop callbacks to enable pickle callbacks = self.params.pop("callbacks") self._model.set_params(callbacks=callbacks[:-1]) + else: + self._fit(X_train, y_train, callbacks=callbacks, **kwargs) best_iteration = ( getattr(self._model.get_booster(), "best_iteration", None) if isinstance(self, XGBoostSklearnEstimator) else self._model.best_iteration_ ) - if best_iteration is not None: + if best_iteration is not None and best_iteration > 0: self._model.set_params(n_estimators=best_iteration + 1) else: self._fit(X_train, y_train, **kwargs) diff --git a/test/automl/test_classification.py b/test/automl/test_classification.py index d4ff03b972..4305e3bcff 100644 --- a/test/automl/test_classification.py +++ b/test/automl/test_classification.py @@ -493,7 +493,7 @@ def test_reproducibility_of_classification_models(estimator: str): "extra_tree", "histgb", "kneighbor", - # "lgbm", + "lgbm", # "lrl1", "lrl2", "svc", diff --git a/test/automl/test_regression.py b/test/automl/test_regression.py index 87f8ed6b23..892ad1eceb 100644 --- a/test/automl/test_regression.py +++ b/test/automl/test_regression.py @@ -339,6 +339,52 @@ def test_reproducibility_of_catboost_regression_model(): assert pytest.approx(val_loss_flaml) == reproduced_val_loss +def test_reproducibility_of_lgbm_regression_model(): + """FLAML finds the best model for a given dataset, which it then provides to users. + + However, there are reported issues around LGBMs - see here: + https://github.com/microsoft/FLAML/issues/1368 + In this test we take the best LGBM regression model which FLAML provided us, and then retrain and test it on the + same folds, to verify that the result is reproducible. + """ + automl = AutoML() + automl_settings = { + "time_budget": 3, + "task": "regression", + "n_jobs": 1, + "estimator_list": ["lgbm"], + "eval_method": "cv", + "n_splits": 9, + "metric": "r2", + "keep_search_state": True, + "skip_transform": True, + "retrain_full": True, + } + X, y = fetch_california_housing(return_X_y=True, as_frame=True) + automl.fit(X_train=X, y_train=y, **automl_settings) + best_model = automl.model + assert best_model is not None + config = best_model.get_params() + val_loss_flaml = automl.best_result["val_loss"] + + # Take the best model, and see if we can reproduce the best result + reproduced_val_loss, metric_for_logging, train_time, pred_time = automl._state.task.evaluate_model_CV( + config=config, + estimator=best_model, + X_train_all=automl._state.X_train_all, + y_train_all=automl._state.y_train_all, + budget=None, + kf=automl._state.kf, + eval_metric="r2", + best_val_loss=None, + cv_score_agg_func=None, + log_training_metric=False, + fit_kwargs=None, + free_mem_ratio=0, + ) + assert pytest.approx(val_loss_flaml) == reproduced_val_loss or val_loss_flaml > reproduced_val_loss + + @pytest.mark.parametrize( "estimator", [ @@ -347,7 +393,7 @@ def test_reproducibility_of_catboost_regression_model(): "extra_tree", "histgb", "kneighbor", - # "lgbm", + "lgbm", "rf", "xgboost", "xgb_limitdepth", @@ -376,6 +422,7 @@ def test_reproducibility_of_underlying_regression_models(estimator: str): "metric": "r2", "keep_search_state": True, "skip_transform": True, + "retrain_full": False, } X, y = fetch_california_housing(return_X_y=True, as_frame=True) automl.fit(X_train=X, y_train=y, **automl_settings)