From 941fc73b255347c79ef62afd559ca24b8dcad9c6 Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Thu, 16 Jan 2025 10:55:50 +0100 Subject: [PATCH] Filter mappings from external repo (#460) * Add filter for external model mappings * Use external model mapping filter in RegionProcessor * Add config test * Add RegionProcessor test * Update docs * Apply suggestions from code review Co-authored-by: Daniel Huppmann * Change match_models to return list of matches instead of boolean * Add only model matches from external repos --------- Co-authored-by: Daniel Huppmann --- docs/user_guide/config.rst | 22 +++++++++++ nomenclature/config.py | 13 +++++++ nomenclature/processor/region.py | 38 +++++++++++++------ tests/data/config/filter_mappings.yaml | 9 +++++ .../external_repo_test/nomenclature.yaml | 5 ++- tests/test_config.py | 20 ++++++++-- tests/test_region_aggregation.py | 8 +--- 7 files changed, 92 insertions(+), 23 deletions(-) create mode 100644 tests/data/config/filter_mappings.yaml diff --git a/docs/user_guide/config.rst b/docs/user_guide/config.rst index 57f3db8e..65ef0634 100644 --- a/docs/user_guide/config.rst +++ b/docs/user_guide/config.rst @@ -152,3 +152,25 @@ validation: - region - variable - scenario + + +Filter model mappings from external repositories +------------------------------------------------ + +We often only want to use a subset of models in a particular project (and not import all mappings), so +there is an option to filter for specific model mappings. This works very similarly to +the filtering for definitions. + +.. code:: yaml + + repositories: + common-definitions: + url: https://github.com/IAMconsortium/common-definitions.git/ + mappings: + repository: + name: common-definitions + include: + - MESSAGEix-GLOBIOM 2.1-M-R12 + +The above example retrieves only the model mapping for *MESSAGEix-GLOBIOM 2.1-M-R12* +from the common-definitions repository. diff --git a/nomenclature/config.py b/nomenclature/config.py index fb5272c2..24c7eb52 100644 --- a/nomenclature/config.py +++ b/nomenclature/config.py @@ -209,6 +209,19 @@ def repos(self) -> dict[str, str]: class MappingRepository(BaseModel): name: str + include: list[str] = ["*"] + + @property + def regex_include_patterns(self): + return [re.compile(escape_regexp(pattern) + "$") for pattern in self.include] + + def match_models(self, models: list[str]) -> list[str]: + return [ + model + for model in models + for pattern in self.regex_include_patterns + if re.match(pattern, model) is not None + ] class RegionMappingConfig(BaseModel): diff --git a/nomenclature/processor/region.py b/nomenclature/processor/region.py index 976223d6..e853e648 100644 --- a/nomenclature/processor/region.py +++ b/nomenclature/processor/region.py @@ -233,7 +233,7 @@ def check_exclude_common_region_overlap( return _check_exclude_region_overlap(v, "common_regions") @classmethod - def from_file(cls, file: Path | str): + def from_file(cls, file: Path | str) -> "RegionAggregationMapping": """Initialize a RegionAggregationMapping from a file. Parameters @@ -380,6 +380,10 @@ def upload_native_regions(self) -> list[str]: def reverse_rename_mapping(self) -> dict[str, str]: return {renamed: original for original, renamed in self.rename_mapping.items()} + @property + def models(self) -> list[str]: + return self.model + def check_unexpected_regions(self, df: IamDataFrame) -> None: # Raise error if a region in the input data is not used in the model mapping @@ -479,21 +483,31 @@ def from_directory(cls, path: DirectoryPath, dsd: DataStructureDefinition): mapping_dict: dict[str, RegionAggregationMapping] = {} errors = ErrorCollector() - mapping_files = [f for f in path.glob("**/*") if f.suffix in {".yaml", ".yml"}] + mapping_files = [mapping_file for mapping_file in path.glob("**/*.y*ml")] + # Read model mappings from external repositories for repository in dsd.config.mappings.repositories: - mapping_files.extend( - f - for f in ( - dsd.config.repositories[repository.name].local_path / "mappings" - ).glob("**/*") - if f.suffix in {".yaml", ".yml"} - ) + for mapping_file in ( + dsd.config.repositories[repository.name].local_path / "mappings" + ).glob("**/*.y*ml"): + mapping = RegionAggregationMapping.from_file(mapping_file) + for model in repository.match_models(mapping.models): + if model not in mapping_dict: + mapping_dict[model] = mapping + else: + errors.append( + ValueError( + "Multiple region aggregation mappings for " + f"model {model} in [{mapping.file}, " + f"{mapping_dict[model].file}]" + ) + ) - for file in mapping_files: + # Read model mappings from the local repository + for mapping_file in mapping_files: try: - mapping = RegionAggregationMapping.from_file(file) - for model in mapping.model: + mapping = RegionAggregationMapping.from_file(mapping_file) + for model in mapping.models: if model not in mapping_dict: mapping_dict[model] = mapping else: diff --git a/tests/data/config/filter_mappings.yaml b/tests/data/config/filter_mappings.yaml new file mode 100644 index 00000000..90a95fb7 --- /dev/null +++ b/tests/data/config/filter_mappings.yaml @@ -0,0 +1,9 @@ +repositories: + common-definitions: + url: https://github.com/IAMconsortium/common-definitions.git/ + hash: 091c0fe +mappings: + repository: + name: common-definitions + include: + - MESSAGEix-GLOBIOM 2.1-M-R12 diff --git a/tests/data/region_processing/external_repo_test/nomenclature.yaml b/tests/data/region_processing/external_repo_test/nomenclature.yaml index 1945b452..aae2198a 100644 --- a/tests/data/region_processing/external_repo_test/nomenclature.yaml +++ b/tests/data/region_processing/external_repo_test/nomenclature.yaml @@ -11,4 +11,7 @@ definitions: variable: repository: common-definitions mappings: - repository: common-definitions + repository: + name: common-definitions + include: + - REMIND-MAgPIE 3.1-4.6 diff --git a/tests/test_config.py b/tests/test_config.py index 1385bfa5..73f9efa8 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,10 +2,7 @@ import pytest from pytest import raises -from nomenclature.config import ( - Repository, - NomenclatureConfig, -) +from nomenclature.config import Repository, NomenclatureConfig, MappingRepository from conftest import TEST_DATA_DIR, clean_up_external_repos @@ -93,3 +90,18 @@ def test_config_with_filter(config_file): assert isinstance(config.definitions.variable.repositories, list) finally: clean_up_external_repos(config.repositories) + + +def test_config_external_repo_mapping_filter(): + + config = NomenclatureConfig.from_file( + TEST_DATA_DIR / "config" / "filter_mappings.yaml" + ) + exp = MappingRepository( + name="common-definitions", include=["MESSAGEix-GLOBIOM 2.1-M-R12"] + ) + try: + assert isinstance(config.mappings.repositories, list) + assert config.mappings.repositories[0] == exp + finally: + clean_up_external_repos(config.repositories) diff --git a/tests/test_region_aggregation.py b/tests/test_region_aggregation.py index 80ed54cb..5640b71f 100644 --- a/tests/test_region_aggregation.py +++ b/tests/test_region_aggregation.py @@ -239,7 +239,7 @@ def test_region_processor_unexpected_region_raises(): def test_mapping_from_external_repository(): - # This test reads both mappings and definitions from an external repository only + # This test reads definitions and the mapping for only MESSAGEix-GLOBIOM 2.1-M-R12 # from an external repository only try: processor = RegionProcessor.from_directory( TEST_FOLDER_REGION_PROCESSING / "external_repo_test" / "mappings", @@ -247,11 +247,7 @@ def test_mapping_from_external_repository(): TEST_FOLDER_REGION_PROCESSING / "external_repo_test" / "definitions" ), ) - - assert all( - model in processor.mappings.keys() - for model in ("REMIND 3.1", "REMIND-MAgPIE 3.1-4.6") - ) + assert {"REMIND-MAgPIE 3.1-4.6"} == set(processor.mappings.keys()) finally: clean_up_external_repos(dsd.config.repositories)