diff --git a/amlb/datasets/openml.py b/amlb/datasets/openml.py index 15dd68ae9..678d1854d 100644 --- a/amlb/datasets/openml.py +++ b/amlb/datasets/openml.py @@ -12,6 +12,7 @@ import os import re from typing import Generic, Tuple, TypeVar, List + import arff import pandas as pd import pandas.api.types as pat diff --git a/frameworks/TunedRandomForest/exec.py b/frameworks/TunedRandomForest/exec.py index 2a1dc915d..a2e41813d 100644 --- a/frameworks/TunedRandomForest/exec.py +++ b/frameworks/TunedRandomForest/exec.py @@ -227,19 +227,6 @@ def infer(data): ) log.info(f"Finished inference time measurements.") - def infer(data): - data = pd.read_parquet(data) if isinstance(data, str) else data - return rf.predict(data) - - inference_times = {} - if config.measure_inference_time: - inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files) - test_data = X_test if isinstance(X_test, pd.DataFrame) else pd.DataFrame(X_test) - inference_times["df"] = measure_inference_times( - infer, - [(1, test_data.sample(1, random_state=i)) for i in range(100)], - ) - return result( output_file=config.output_predictions_file, predictions=predictions, diff --git a/frameworks/autosklearn/exec.py b/frameworks/autosklearn/exec.py index 0ebbe1edf..dda275f2a 100644 --- a/frameworks/autosklearn/exec.py +++ b/frameworks/autosklearn/exec.py @@ -1,3 +1,4 @@ +import json import logging import math import os @@ -139,7 +140,10 @@ def run(dataset, config): auto_sklearn = estimator(**constr_params, **training_params) with Timer() as training: auto_sklearn.fit(X_train, y_train, **fit_extra_params) + # Any log call after `auto_sklearn.fit` gets swallowed because it reconfigures logging + # Have to open an issue to set up `logging_config` right or have better defaults. log.info(f"Finished fit in {training.duration}s.") + print(f"Finished fit in {training.duration}s.") def infer(data: Union[str, pd.DataFrame]): test_data = pd.read_parquet(data) if isinstance(data, str) else data @@ -159,6 +163,7 @@ def sample_one_test_row(seed: int): infer, [(1, sample_one_test_row(seed=i)) for i in range(100)], ) log.info(f"Finished inference time measurements.") + print(f"Finished inference time measurements.") # Convert output to strings for classification log.info("Predicting on the test set.") @@ -167,6 +172,7 @@ def sample_one_test_row(seed: int): predictions = auto_sklearn.predict(X_test) probabilities = auto_sklearn.predict_proba(X_test) if is_classification else None log.info(f"Finished predict in {predict.duration}s.") + print(f"Finished predict in {predict.duration}s.") save_artifacts(auto_sklearn, config) @@ -182,16 +188,37 @@ def sample_one_test_row(seed: int): ) +def save_models(estimator, config): + models_repr = estimator.show_models() + log.info("Trained Ensemble:\n%s", models_repr) + print("Trained Ensemble:\n%s", models_repr) + + if isinstance(models_repr, str): + models_file = os.path.join(output_subdir('models', config), 'models.txt') + with open(models_file, 'w') as f: + f.write(models_repr) + elif isinstance(models_repr, dict): + models_file = os.path.join(output_subdir('models', config), 'models.json') + with open(models_file, 'w') as f: + json.dump(models_repr, f, default=lambda obj: str(obj)) + else: + log.warning(f"Saving 'models' where {type(models_repr)=} not supported.") + print(f"Saving 'models' where {type(models_repr)=} not supported.") + + def save_artifacts(estimator, config): - try: - models_repr = estimator.show_models() - log.debug("Trained Ensemble:\n%s", models_repr) - artifacts = config.framework_params.get('_save_artifacts', []) - if 'models' in artifacts: - models_file = os.path.join(output_subdir('models', config), 'models.txt') - with open(models_file, 'w') as f: - f.write(models_repr) - if 'debug_as_files' in artifacts or 'debug_as_zip' in artifacts: + artifacts = config.framework_params.get('_save_artifacts', []) + artifacts = [artifacts] if isinstance(artifacts, str) else artifacts + if 'models' in artifacts: + try: + save_models(estimator, config) + except Exception as e: + log.info(f"Error when saving 'models': {e}.", exc_info=True) + print(f"Error when saving 'models': {e}.") + + if 'debug_as_files' in artifacts or 'debug_as_zip' in artifacts: + try: + log.info('Saving debug artifacts!') print('Saving debug artifacts!') debug_dir = output_subdir('debug', config) ignore_extensions = ['.npy', '.pcs', '.model', '.cv_model', '.ensemble', '.pkl'] @@ -216,8 +243,9 @@ def _copy(filename, **_): os.path.join(debug_dir, "artifacts.zip"), filter_=lambda p: os.path.splitext(p)[1] not in ignore_extensions ) - except Exception as e: - log.debug("Error when saving artifacts= {e}.".format(e), exc_info=True) + except Exception as e: + log.info(f"Error when saving 'debug': {e}.", exc_info=True) + print(f"Error when saving 'debug': {e}.") if __name__ == '__main__': diff --git a/resources/config.yaml b/resources/config.yaml index bb7b43f9b..faae3657e 100644 --- a/resources/config.yaml +++ b/resources/config.yaml @@ -217,10 +217,12 @@ aws: # configuration namespace for AWS mode. - 'MaxSpotInstanceCountExceeded' - 'InsufficientFreeAddressesInSubnet' - 'InsufficientInstanceCapacity' + - 'VolumeLimitExceeded' retry_on_states: # EC2 instance states that will trigger a job reschedule. - 'Server.SpotInstanceShutdown' - 'Server.SpotInstanceTermination' - 'Server.InsufficientInstanceCapacity' + - 'Client.VolumeLimitExceeded' max_timeout_seconds: 21600 # os_mem_size_mb: 0 # overrides the default amount of memory left to the os in AWS mode, and set to 0 for fairness as we can't always prevent frameworks from using all available memory. diff --git a/resources/frameworks_2023Q2.yaml b/resources/frameworks_2023Q2.yaml index 8923bf246..59491e194 100644 --- a/resources/frameworks_2023Q2.yaml +++ b/resources/frameworks_2023Q2.yaml @@ -35,11 +35,14 @@ AutoGluon_hq_il001: autosklearn: version: '0.15.0' + params: + _save_artifacts: ['models', 'debug_as_zip'] autosklearn2: extends: autosklearn params: _askl2: true + _save_artifacts: ['models', 'debug_as_zip'] AutoWEKA: version: '2.6' @@ -58,6 +61,8 @@ GAMA_benchmark: H2OAutoML: version: '3.40.0.4' + params: + _save_artifacts: ['leaderboard', 'logs'] lightautoml: version: '0.3.7.3' @@ -93,6 +98,8 @@ mlr3automl: TPOT: version: '0.12.0' + params: + _save_artifacts: ['models'] #######################################