Skip to content

Commit

Permalink
Added clear possibility of forcing EO provided at split-time by valid…
Browse files Browse the repository at this point in the history
…ation splitter to be used for computing the metric.
  • Loading branch information
guillaume-chevalier committed Aug 15, 2022
1 parent 9d9bc25 commit b70aa53
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 3 deletions.
2 changes: 1 addition & 1 deletion examples/auto_ml/plot_automl_loop_clean_kata.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def main(tmpdir: str):
auto_ml = AutoML(
pipeline=pipeline,
hyperparams_optimizer=RandomSearchSampler(),
validation_splitter=ValidationSplitter(validation_size=0.20),
validation_splitter=ValidationSplitter(validation_size=0.20).set_to_force_expected_outputs_for_scoring(),
scoring_callback=ScoringCallback(accuracy_score, higher_score_is_better=True),
n_trials=7,
epochs=1,
Expand Down
4 changes: 2 additions & 2 deletions neuraxle/metaopt/auto_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def train_split(
eval_dact_train: DACT[IDT, ARG_Y_PREDICTD, ARG_Y_EXPECTED] = eval_dact_train
_has_empty_eo = eval_dact_train.expected_outputs is None or (hasattr(
eval_dact_train.expected_outputs, "__len__") and len(eval_dact_train.expected_outputs) == 0)
if _has_empty_eo:
if _has_empty_eo or self.validation_splitter.force_fixed_metric_expected_outputs is True:
eval_dact_train = eval_dact_train.with_eo(train_dact.expected_outputs)

if val_dact is not None:
Expand All @@ -151,7 +151,7 @@ def train_split(
eval_dact_valid: DACT[IDT, ARG_Y_PREDICTD, ARG_Y_EXPECTED] = eval_dact_valid
_has_empty_eo = eval_dact_valid.expected_outputs is None or (hasattr(
eval_dact_valid.expected_outputs, "__len__") and len(eval_dact_valid.expected_outputs) == 0)
if _has_empty_eo:
if _has_empty_eo or self.validation_splitter.force_fixed_metric_expected_outputs is True:
eval_dact_valid = eval_dact_valid.with_eo(val_dact.expected_outputs)
else:
eval_dact_valid = None
Expand Down
24 changes: 24 additions & 0 deletions neuraxle/metaopt/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,29 @@


class BaseValidationSplitter(ABC):

def __init__(self, force_fixed_metric_expected_outputs: bool = False):
"""
:param force_fixed_metric_expected_outputs: If True, the expected outputs provided at split time are used to compute the metric instead of their possibly modified version after passing through the pipeline. More info in the documentation of :func:`set_to_force_expected_outputs_for_scoring`.
"""
self.force_fixed_metric_expected_outputs: bool = False

def set_to_force_expected_outputs_for_scoring(self) -> 'BaseValidationSplitter':
"""
Set self.force_fixed_metric_expected_outputs to True.
Use this in case you do not want the pipeline to be able to
affect the Y (expected_output) value throughout the fit or transform process. This is to have a way to
force using the provided expected output for the calculation of metrics in the Trainer's epochs loop.
Do not use this when the pipeline can change the expected_outputs, for instance within an autoencoder
that would split a time series and set its own expected output inside the pipeline, such as where the
initial expected_output would be none at split time, and then would be computed on the fly through the
pipeline and would be expected to be used for the metrics after this computation.
"""
self.force_fixed_metric_expected_outputs = True
return self

def split_dact(self, data_container: DACT, context: CX) -> FoldsList[Tuple[TrainDACT, ValidDACT]]:
"""
Wrap a validation split function with a split data container function.
Expand Down Expand Up @@ -107,6 +130,7 @@ class ValidationSplitter(BaseValidationSplitter):
"""

def __init__(self, validation_size: float):
BaseValidationSplitter.__init__(self)
self.validation_size = validation_size

def split(
Expand Down

0 comments on commit b70aa53

Please sign in to comment.