Added clear possibility of forcing EO provided at split-time by valid…

…ation splitter to be used for computing the metric.
Neuraxio · Aug 15, 2022 · b70aa53 · b70aa53
1 parent 9d9bc25
commit b70aa53
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 3 deletions.
diff --git a/examples/auto_ml/plot_automl_loop_clean_kata.py b/examples/auto_ml/plot_automl_loop_clean_kata.py
@@ -116,7 +116,7 @@ def main(tmpdir: str):
     auto_ml = AutoML(
         pipeline=pipeline,
         hyperparams_optimizer=RandomSearchSampler(),
-        validation_splitter=ValidationSplitter(validation_size=0.20),
+        validation_splitter=ValidationSplitter(validation_size=0.20).set_to_force_expected_outputs_for_scoring(),
         scoring_callback=ScoringCallback(accuracy_score, higher_score_is_better=True),
         n_trials=7,
         epochs=1,

diff --git a/neuraxle/metaopt/auto_ml.py b/neuraxle/metaopt/auto_ml.py
@@ -139,7 +139,7 @@ def train_split(
             eval_dact_train: DACT[IDT, ARG_Y_PREDICTD, ARG_Y_EXPECTED] = eval_dact_train
             _has_empty_eo = eval_dact_train.expected_outputs is None or (hasattr(
                 eval_dact_train.expected_outputs, "__len__") and len(eval_dact_train.expected_outputs) == 0)
-            if _has_empty_eo:
+            if _has_empty_eo or self.validation_splitter.force_fixed_metric_expected_outputs is True:
                 eval_dact_train = eval_dact_train.with_eo(train_dact.expected_outputs)
 
             if val_dact is not None:
@@ -151,7 +151,7 @@ def train_split(
                 eval_dact_valid: DACT[IDT, ARG_Y_PREDICTD, ARG_Y_EXPECTED] = eval_dact_valid
                 _has_empty_eo = eval_dact_valid.expected_outputs is None or (hasattr(
                     eval_dact_valid.expected_outputs, "__len__") and len(eval_dact_valid.expected_outputs) == 0)
-                if _has_empty_eo:
+                if _has_empty_eo or self.validation_splitter.force_fixed_metric_expected_outputs is True:
                     eval_dact_valid = eval_dact_valid.with_eo(val_dact.expected_outputs)
             else:
                 eval_dact_valid = None

diff --git a/neuraxle/metaopt/validation.py b/neuraxle/metaopt/validation.py
@@ -38,6 +38,29 @@
 
 
 class BaseValidationSplitter(ABC):
+
+    def __init__(self, force_fixed_metric_expected_outputs: bool = False):
+        """
+        :param force_fixed_metric_expected_outputs: If True, the expected outputs provided at split time are used to compute the metric instead of their possibly modified version after passing through the pipeline. More info in the documentation of :func:`set_to_force_expected_outputs_for_scoring`.
+        """
+        self.force_fixed_metric_expected_outputs: bool = False
+
+    def set_to_force_expected_outputs_for_scoring(self) -> 'BaseValidationSplitter':
+        """
+        Set self.force_fixed_metric_expected_outputs to True.
+
+        Use this in case you do not want the pipeline to be able to
+        affect the Y (expected_output) value throughout the fit or transform process. This is to have a way to
+        force using the provided expected output for the calculation of metrics in the Trainer's epochs loop.
+
+        Do not use this when the pipeline can change the expected_outputs, for instance within an autoencoder
+        that would split a time series and set its own expected output inside the pipeline, such as where the
+        initial expected_output would be none at split time, and then would be computed on the fly through the
+        pipeline and would be expected to be used for the metrics after this computation.
+        """
+        self.force_fixed_metric_expected_outputs = True
+        return self
+
     def split_dact(self, data_container: DACT, context: CX) -> FoldsList[Tuple[TrainDACT, ValidDACT]]:
         """
         Wrap a validation split function with a split data container function.
@@ -107,6 +130,7 @@ class ValidationSplitter(BaseValidationSplitter):
     """
 
     def __init__(self, validation_size: float):
+        BaseValidationSplitter.__init__(self)
         self.validation_size = validation_size
 
     def split(