Merge master into stable (#559)

* New model_repr is a dictionary which is not compatible with f.write * Add VolumeLimitExceeded to retry lists * Save additional information by default
openml · Jun 28, 2023 · d36b185 · d36b185
1 parent 319b48e
commit d36b185
Show file tree

Hide file tree

Showing 5 changed files with 49 additions and 24 deletions.
diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py
@@ -12,6 +12,7 @@
 import os
 import re
 from typing import Generic, Tuple, TypeVar, List
+
 import arff
 import pandas as pd
 import pandas.api.types as pat

diff --git a/frameworks/TunedRandomForest/exec.py b/frameworks/TunedRandomForest/exec.py
@@ -227,19 +227,6 @@ def infer(data):
         )
         log.info(f"Finished inference time measurements.")
 
-    def infer(data):
-        data = pd.read_parquet(data) if isinstance(data, str) else data
-        return rf.predict(data)
-
-    inference_times = {}
-    if config.measure_inference_time:
-        inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
-        test_data = X_test if isinstance(X_test, pd.DataFrame) else pd.DataFrame(X_test)
-        inference_times["df"] = measure_inference_times(
-            infer,
-            [(1, test_data.sample(1, random_state=i)) for i in range(100)],
-        )
-
     return result(
         output_file=config.output_predictions_file,
         predictions=predictions,

diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import math
 import os
@@ -139,7 +140,10 @@ def run(dataset, config):
     auto_sklearn = estimator(**constr_params, **training_params)
     with Timer() as training:
         auto_sklearn.fit(X_train, y_train, **fit_extra_params)
+    # Any log call after `auto_sklearn.fit` gets swallowed because it reconfigures logging
+    # Have to open an issue to set up `logging_config` right or have better defaults.
     log.info(f"Finished fit in {training.duration}s.")
+    print(f"Finished fit in {training.duration}s.")
 
     def infer(data: Union[str, pd.DataFrame]):
         test_data = pd.read_parquet(data) if isinstance(data, str) else data
@@ -159,6 +163,7 @@ def sample_one_test_row(seed: int):
             infer, [(1, sample_one_test_row(seed=i)) for i in range(100)],
         )
         log.info(f"Finished inference time measurements.")
+        print(f"Finished inference time measurements.")
 
     # Convert output to strings for classification
     log.info("Predicting on the test set.")
@@ -167,6 +172,7 @@ def sample_one_test_row(seed: int):
         predictions = auto_sklearn.predict(X_test)
     probabilities = auto_sklearn.predict_proba(X_test) if is_classification else None
     log.info(f"Finished predict in {predict.duration}s.")
+    print(f"Finished predict in {predict.duration}s.")
 
     save_artifacts(auto_sklearn, config)
 
@@ -182,16 +188,37 @@ def sample_one_test_row(seed: int):
                   )
 
 
+def save_models(estimator, config):
+    models_repr = estimator.show_models()
+    log.info("Trained Ensemble:\n%s", models_repr)
+    print("Trained Ensemble:\n%s", models_repr)
+
+    if isinstance(models_repr, str):
+        models_file = os.path.join(output_subdir('models', config), 'models.txt')
+        with open(models_file, 'w') as f:
+            f.write(models_repr)
+    elif isinstance(models_repr, dict):
+        models_file = os.path.join(output_subdir('models', config), 'models.json')
+        with open(models_file, 'w') as f:
+            json.dump(models_repr, f, default=lambda obj: str(obj))
+    else:
+        log.warning(f"Saving 'models' where {type(models_repr)=} not supported.")
+        print(f"Saving 'models' where {type(models_repr)=} not supported.")
+
+
 def save_artifacts(estimator, config):
-    try:
-        models_repr = estimator.show_models()
-        log.debug("Trained Ensemble:\n%s", models_repr)
-        artifacts = config.framework_params.get('_save_artifacts', [])
-        if 'models' in artifacts:
-            models_file = os.path.join(output_subdir('models', config), 'models.txt')
-            with open(models_file, 'w') as f:
-                f.write(models_repr)
-        if 'debug_as_files' in artifacts or 'debug_as_zip' in artifacts:
+    artifacts = config.framework_params.get('_save_artifacts', [])
+    artifacts = [artifacts] if isinstance(artifacts, str) else artifacts
+    if 'models' in artifacts:
+        try:
+            save_models(estimator, config)
+        except Exception as e:
+            log.info(f"Error when saving 'models': {e}.", exc_info=True)
+            print(f"Error when saving 'models': {e}.")
+
+    if 'debug_as_files' in artifacts or 'debug_as_zip' in artifacts:
+        try:
+            log.info('Saving debug artifacts!')
             print('Saving debug artifacts!')
             debug_dir = output_subdir('debug', config)
             ignore_extensions = ['.npy', '.pcs', '.model', '.cv_model', '.ensemble', '.pkl']
@@ -216,8 +243,9 @@ def _copy(filename, **_):
                     os.path.join(debug_dir, "artifacts.zip"),
                     filter_=lambda p: os.path.splitext(p)[1] not in ignore_extensions
                 )
-    except Exception as e:
-        log.debug("Error when saving artifacts= {e}.".format(e), exc_info=True)
+        except Exception as e:
+            log.info(f"Error when saving 'debug': {e}.", exc_info=True)
+            print(f"Error when saving 'debug': {e}.")
 
 
 if __name__ == '__main__':

diff --git a/resources/config.yaml b/resources/config.yaml
@@ -217,10 +217,12 @@ aws:                    # configuration namespace for AWS mode.
       - 'MaxSpotInstanceCountExceeded'
       - 'InsufficientFreeAddressesInSubnet'
       - 'InsufficientInstanceCapacity'
+      - 'VolumeLimitExceeded'
     retry_on_states:                         # EC2 instance states that will trigger a job reschedule.
       - 'Server.SpotInstanceShutdown'
       - 'Server.SpotInstanceTermination'
       - 'Server.InsufficientInstanceCapacity'
+      - 'Client.VolumeLimitExceeded'
 
   max_timeout_seconds: 21600    #
   os_mem_size_mb: 0             # overrides the default amount of memory left to the os in AWS mode, and set to 0 for fairness as we can't always prevent frameworks from using all available memory.

diff --git a/resources/frameworks_2023Q2.yaml b/resources/frameworks_2023Q2.yaml
@@ -35,11 +35,14 @@ AutoGluon_hq_il001:
 
 autosklearn:
   version: '0.15.0'
+  params:
+    _save_artifacts: ['models', 'debug_as_zip']
 
 autosklearn2:
   extends: autosklearn
   params:
     _askl2: true
+    _save_artifacts: ['models', 'debug_as_zip']
 
 AutoWEKA:
   version: '2.6'
@@ -58,6 +61,8 @@ GAMA_benchmark:
 
 H2OAutoML:
   version: '3.40.0.4'
+  params:
+    _save_artifacts: ['leaderboard', 'logs']
 
 lightautoml:
   version: '0.3.7.3'
@@ -93,6 +98,8 @@ mlr3automl:
 
 TPOT:
   version: '0.12.0'
+  params:
+    _save_artifacts: ['models']
 
 
 #######################################