fix bug in hierarchical search space (#248); optional dependency on l…

…gbm and xgb (#250) * close #249 * admissible region * best_config can be None * optional dependency on lgbm and xgb resolve #252
microsoft · Oct 16, 2021 · 524f22b · 524f22b
1 parent fe65fa1
commit 524f22b
Show file tree

Hide file tree

Showing 10 changed files with 365 additions and 262 deletions.
diff --git a/flaml/automl.py b/flaml/automl.py
@@ -395,7 +395,8 @@ def best_iteration(self):
     @property
     def best_config(self):
         """A dictionary of the best configuration."""
-        return self._search_states[self._best_estimator].best_config
+        state = self._search_states.get(self._best_estimator)
+        return state and getattr(state, "best_config", None)
 
     @property
     def best_config_per_estimator(self):
@@ -1104,7 +1105,7 @@ def low_cost_partial_config(self) -> dict:
             (b) otherwise, it is a nested dict with 'ml' as the key, and
             a list of the low_cost_partial_configs as the value, corresponding
             to each learner's low_cost_partial_config; the estimator index as
-            an integer corresponding to the cheapest learner is appeneded to the
+            an integer corresponding to the cheapest learner is appended to the
             list at the end.
 
         """

diff --git a/flaml/model.py b/flaml/model.py
@@ -4,12 +4,10 @@
 """
 
 import numpy as np
-import xgboost as xgb
 import time
 from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
 from sklearn.ensemble import ExtraTreesRegressor, ExtraTreesClassifier
 from sklearn.linear_model import LogisticRegression
-from lightgbm import LGBMClassifier, LGBMRegressor, LGBMRanker
 from scipy.sparse import issparse
 import pandas as pd
 from . import tune
@@ -286,10 +284,16 @@ def __init__(self, task="binary", **config):
         if "verbose" not in self.params:
             self.params["verbose"] = -1
         if "regression" == task:
+            from lightgbm import LGBMRegressor
+
             self.estimator_class = LGBMRegressor
         elif "rank" == task:
+            from lightgbm import LGBMRanker
+
             self.estimator_class = LGBMRanker
         else:
+            from lightgbm import LGBMClassifier
+
             self.estimator_class = LGBMClassifier
         self._time_per_iter = None
         self._train_size = 0
@@ -432,6 +436,8 @@ def __init__(
         self.params["verbosity"] = 0
 
     def fit(self, X_train, y_train, budget=None, **kwargs):
+        import xgboost as xgb
+
         start_time = time.time()
         if issparse(X_train):
             self.params["tree_method"] = "auto"
@@ -458,6 +464,8 @@ def fit(self, X_train, y_train, budget=None, **kwargs):
         return train_time
 
     def predict(self, X_test):
+        import xgboost as xgb
+
         if not issparse(X_test):
             X_test = self._preprocess(X_test)
         dtest = xgb.DMatrix(X_test)
@@ -492,6 +500,7 @@ def __init__(
         super().__init__(task, **config)
         del self.params["verbose"]
         self.params["verbosity"] = 0
+        import xgboost as xgb
 
         self.estimator_class = xgb.XGBRegressor
         if "rank" == task:

diff --git a/flaml/searcher/blendsearch.py b/flaml/searcher/blendsearch.py
@@ -313,7 +313,13 @@ def _init_search(self):
             {},
             recursive=True,
         )
-        self._ls_bound_max = self._ls_bound_min.copy()
+        self._ls_bound_max = normalize(
+            self._ls.init_config.copy(),
+            self._ls.space,
+            self._ls.init_config,
+            {},
+            recursive=True,
+        )
         self._gs_admissible_min = self._ls_bound_min.copy()
         self._gs_admissible_max = self._ls_bound_max.copy()
         self._result = {}  # config_signature: tuple -> result: Dict
@@ -492,6 +498,11 @@ def _update_admissible_region(
                     subspace[key],
                     domain[choice],
                 )
+                if len(admissible_max[key]) > len(domain.categories):
+                    # points + index
+                    normal = (choice + 0.5) / len(domain.categories)
+                    admissible_max[key][-1] = max(normal, admissible_max[key][-1])
+                    admissible_min[key][-1] = min(normal, admissible_min[key][-1])
             elif isinstance(value, dict):
                 self._update_admissible_region(
                     value,
@@ -583,6 +594,7 @@ def _create_thread_from_best_candidate(self):
             )
 
     def _expand_admissible_region(self, lower, upper, space):
+        """expand the admissible region for the subspace `space`"""
         for key in upper:
             ub = upper[key]
             if isinstance(ub, list):

diff --git a/flaml/searcher/online_searcher.py b/flaml/searcher/online_searcher.py
@@ -138,7 +138,7 @@ def __init__(
 
         # value: trial_id, key: searcher_trial_id
         self._trialid_to_searcher_trial_id = {}
-        
+
         self._challenger_list = []
         # initialize the search in set_search_properties
         self.set_search_properties(

diff --git a/flaml/training_log.py b/flaml/training_log.py
@@ -6,7 +6,9 @@
 import json
 from typing import IO
 from contextlib import contextmanager
-import warnings
+import logging
+
+logger = logging.getLogger("flaml.automl")
 
 
 class TrainingLogRecord(object):
@@ -113,8 +115,8 @@ def checkpoint(self):
         if self.file is None:
             raise IOError("Call open() to open the outpute file first.")
         if self.current_best_loss_record_id is None:
-            warnings.warn(
-                "checkpoint() called before any record is written, " "skipped."
+            logger.warning(
+                "flaml.training_log: checkpoint() called before any record is written, skipped."
             )
             return
         record = TrainingLogCheckPoint(self.current_best_loss_record_id)