Skip to content

Commit

Permalink
Merge master into stable (#559)
Browse files Browse the repository at this point in the history
* New model_repr is a dictionary which is not compatible with f.write
* Add VolumeLimitExceeded to retry lists
* Save additional information by default
  • Loading branch information
PGijsbers authored Jun 28, 2023
1 parent 319b48e commit d36b185
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 24 deletions.
1 change: 1 addition & 0 deletions amlb/datasets/openml.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import os
import re
from typing import Generic, Tuple, TypeVar, List

import arff
import pandas as pd
import pandas.api.types as pat
Expand Down
13 changes: 0 additions & 13 deletions frameworks/TunedRandomForest/exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,19 +227,6 @@ def infer(data):
)
log.info(f"Finished inference time measurements.")

def infer(data):
data = pd.read_parquet(data) if isinstance(data, str) else data
return rf.predict(data)

inference_times = {}
if config.measure_inference_time:
inference_times["file"] = measure_inference_times(infer, dataset.inference_subsample_files)
test_data = X_test if isinstance(X_test, pd.DataFrame) else pd.DataFrame(X_test)
inference_times["df"] = measure_inference_times(
infer,
[(1, test_data.sample(1, random_state=i)) for i in range(100)],
)

return result(
output_file=config.output_predictions_file,
predictions=predictions,
Expand Down
50 changes: 39 additions & 11 deletions frameworks/autosklearn/exec.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
import math
import os
Expand Down Expand Up @@ -139,7 +140,10 @@ def run(dataset, config):
auto_sklearn = estimator(**constr_params, **training_params)
with Timer() as training:
auto_sklearn.fit(X_train, y_train, **fit_extra_params)
# Any log call after `auto_sklearn.fit` gets swallowed because it reconfigures logging
# Have to open an issue to set up `logging_config` right or have better defaults.
log.info(f"Finished fit in {training.duration}s.")
print(f"Finished fit in {training.duration}s.")

def infer(data: Union[str, pd.DataFrame]):
test_data = pd.read_parquet(data) if isinstance(data, str) else data
Expand All @@ -159,6 +163,7 @@ def sample_one_test_row(seed: int):
infer, [(1, sample_one_test_row(seed=i)) for i in range(100)],
)
log.info(f"Finished inference time measurements.")
print(f"Finished inference time measurements.")

# Convert output to strings for classification
log.info("Predicting on the test set.")
Expand All @@ -167,6 +172,7 @@ def sample_one_test_row(seed: int):
predictions = auto_sklearn.predict(X_test)
probabilities = auto_sklearn.predict_proba(X_test) if is_classification else None
log.info(f"Finished predict in {predict.duration}s.")
print(f"Finished predict in {predict.duration}s.")

save_artifacts(auto_sklearn, config)

Expand All @@ -182,16 +188,37 @@ def sample_one_test_row(seed: int):
)


def save_models(estimator, config):
models_repr = estimator.show_models()
log.info("Trained Ensemble:\n%s", models_repr)
print("Trained Ensemble:\n%s", models_repr)

if isinstance(models_repr, str):
models_file = os.path.join(output_subdir('models', config), 'models.txt')
with open(models_file, 'w') as f:
f.write(models_repr)
elif isinstance(models_repr, dict):
models_file = os.path.join(output_subdir('models', config), 'models.json')
with open(models_file, 'w') as f:
json.dump(models_repr, f, default=lambda obj: str(obj))
else:
log.warning(f"Saving 'models' where {type(models_repr)=} not supported.")
print(f"Saving 'models' where {type(models_repr)=} not supported.")


def save_artifacts(estimator, config):
try:
models_repr = estimator.show_models()
log.debug("Trained Ensemble:\n%s", models_repr)
artifacts = config.framework_params.get('_save_artifacts', [])
if 'models' in artifacts:
models_file = os.path.join(output_subdir('models', config), 'models.txt')
with open(models_file, 'w') as f:
f.write(models_repr)
if 'debug_as_files' in artifacts or 'debug_as_zip' in artifacts:
artifacts = config.framework_params.get('_save_artifacts', [])
artifacts = [artifacts] if isinstance(artifacts, str) else artifacts
if 'models' in artifacts:
try:
save_models(estimator, config)
except Exception as e:
log.info(f"Error when saving 'models': {e}.", exc_info=True)
print(f"Error when saving 'models': {e}.")

if 'debug_as_files' in artifacts or 'debug_as_zip' in artifacts:
try:
log.info('Saving debug artifacts!')
print('Saving debug artifacts!')
debug_dir = output_subdir('debug', config)
ignore_extensions = ['.npy', '.pcs', '.model', '.cv_model', '.ensemble', '.pkl']
Expand All @@ -216,8 +243,9 @@ def _copy(filename, **_):
os.path.join(debug_dir, "artifacts.zip"),
filter_=lambda p: os.path.splitext(p)[1] not in ignore_extensions
)
except Exception as e:
log.debug("Error when saving artifacts= {e}.".format(e), exc_info=True)
except Exception as e:
log.info(f"Error when saving 'debug': {e}.", exc_info=True)
print(f"Error when saving 'debug': {e}.")


if __name__ == '__main__':
Expand Down
2 changes: 2 additions & 0 deletions resources/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -217,10 +217,12 @@ aws: # configuration namespace for AWS mode.
- 'MaxSpotInstanceCountExceeded'
- 'InsufficientFreeAddressesInSubnet'
- 'InsufficientInstanceCapacity'
- 'VolumeLimitExceeded'
retry_on_states: # EC2 instance states that will trigger a job reschedule.
- 'Server.SpotInstanceShutdown'
- 'Server.SpotInstanceTermination'
- 'Server.InsufficientInstanceCapacity'
- 'Client.VolumeLimitExceeded'

max_timeout_seconds: 21600 #
os_mem_size_mb: 0 # overrides the default amount of memory left to the os in AWS mode, and set to 0 for fairness as we can't always prevent frameworks from using all available memory.
Expand Down
7 changes: 7 additions & 0 deletions resources/frameworks_2023Q2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,14 @@ AutoGluon_hq_il001:

autosklearn:
version: '0.15.0'
params:
_save_artifacts: ['models', 'debug_as_zip']

autosklearn2:
extends: autosklearn
params:
_askl2: true
_save_artifacts: ['models', 'debug_as_zip']

AutoWEKA:
version: '2.6'
Expand All @@ -58,6 +61,8 @@ GAMA_benchmark:

H2OAutoML:
version: '3.40.0.4'
params:
_save_artifacts: ['leaderboard', 'logs']

lightautoml:
version: '0.3.7.3'
Expand Down Expand Up @@ -93,6 +98,8 @@ mlr3automl:

TPOT:
version: '0.12.0'
params:
_save_artifacts: ['models']


#######################################
Expand Down

0 comments on commit d36b185

Please sign in to comment.