diff --git a/nomenclature/processor/data_validator.py b/nomenclature/processor/data_validator.py index dda02f9c..86e94abc 100644 --- a/nomenclature/processor/data_validator.py +++ b/nomenclature/processor/data_validator.py @@ -7,7 +7,7 @@ from pandas import concat from pyam import IamDataFrame from pyam.logging import adjust_log_level -from pydantic import computed_field, field_validator, model_validator +from pydantic import computed_field, field_validator, model_validator, Field from nomenclature.definition import DataStructureDefinition from nomenclature.error import ErrorCollector @@ -86,9 +86,49 @@ def criteria(self): ) +class DataValidationCriteriaRange(DataValidationCriteria): + range: list[float] = Field(..., min_length=2, max_length=2) + + @model_validator(mode="after") + def check_range_is_valid(self): + if self.range[0] > self.range[1]: + raise ValueError("Validation range is invalid: " + str(self.criteria)) + return self + + @computed_field + def upper_bound(self) -> float: + return self.range[1] + + @computed_field + def lower_bound(self) -> float: + return self.range[0] + + @property + def validation_args(self): + """Attributes used for validation (as bounds).""" + return self.model_dump( + exclude_none=True, + exclude_unset=True, + exclude=["warning_level", "range"], + ) + + @property + def criteria(self): + return self.model_dump( + exclude_none=True, + exclude_unset=True, + exclude=["warning_level", "lower_bound", "upper_bound"], + ) + + class DataValidationCriteriaMultiple(IamcDataFilter): validation: ( - list[DataValidationCriteriaValue | DataValidationCriteriaBounds] | None + list[ + DataValidationCriteriaValue + | DataValidationCriteriaBounds + | DataValidationCriteriaRange + ] + | None ) = None @model_validator(mode="after") diff --git a/tests/data/validation/validate_data/validate_warning_range.yaml b/tests/data/validation/validate_data/validate_warning_range.yaml new file mode 100644 index 00000000..d61e5e2f --- /dev/null +++ b/tests/data/validation/validate_data/validate_warning_range.yaml @@ -0,0 +1,11 @@ + - variable: Primary Energy + year: 2010 + validation: + - range: [ 1, 5 ] + - warning_level: low + upper_bound: 2.5 + lower_bound: 1 + - variable: Primary Energy|Coal + year: 2010 + upper_bound: 5 + lower_bound: 1 diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py index 01a41682..d1ca0fed 100644 --- a/tests/test_validate_data.py +++ b/tests/test_validate_data.py @@ -137,7 +137,7 @@ def test_DataValidator_apply_fails(simple_df, file, item_1, item_2, item_3, capl @pytest.mark.parametrize( "file, value", - [("joined", 6.0), ("joined", 3.0), ("legacy", 6.0)], + [("joined", 6.0), ("joined", 3.0), ("legacy", 6.0), ("range", 6.0)], ) def test_DataValidator_validate_with_warning(file, value, simple_df, caplog): """Checks that failed validation rows are printed in log.""" @@ -154,6 +154,7 @@ def test_DataValidator_validate_with_warning(file, value, simple_df, caplog): 0 model_a scen_a World Primary Energy EJ/yr 2010 6.0 error 1 model_a scen_b World Primary Energy EJ/yr 2010 7.0 error""" ) + if file == "legacy": # prints both error and low warning levels for legacy format # because these are treated as independent validation-criteria @@ -164,6 +165,11 @@ def test_DataValidator_validate_with_warning(file, value, simple_df, caplog): 0 model_a scen_a World Primary Energy EJ/yr 2010 6.0 low 1 model_a scen_b World Primary Energy EJ/yr 2010 7.0 low""" + if file == "range": + failed_validation_message = failed_validation_message.replace( + "upper_bound: 5.0, lower_bound: 1.0", "range: [1.0, 5.0]" + ) + if value == 3.0: # prints each warning level when each is triggered by different rows failed_validation_message = """