From a7ebe03da700e27811546fea34b552d3da920f22 Mon Sep 17 00:00:00 2001 From: Romain Beucher Date: Fri, 24 Jan 2025 12:20:09 +1100 Subject: [PATCH] Update NSIDC g02202, Fix #3868 --- doc/sphinx/source/input.rst | 2 +- .../data/cmor_config/NSIDC-G02202-nh.yml | 23 +++ .../data/cmor_config/NSIDC-G02202-sh.yml | 6 +- esmvaltool/cmorizers/data/datasets.yml | 6 +- .../formatters/datasets/nsidc_g02202_nh.py | 182 ++++++++++++++++++ .../formatters/datasets/nsidc_g02202_sh.py | 16 +- .../recipes/examples/recipe_check_obs.yml | 4 +- 7 files changed, 223 insertions(+), 16 deletions(-) create mode 100644 esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-nh.yml create mode 100644 esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_nh.py diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index f9bcfafc3e..c711320272 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -441,7 +441,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | NSIDC-0116-[nh|sh] [#note4]_ | usi, vsi (day) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| NSIDC-g02202-[sh] | siconc (SImon) | 3 | Python | +| NSIDC-g02202-[nh|sh] | siconc (SImon) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | OceanSODA-ETHZ | areacello (Ofx), co3os, dissicos, fgco2, phos, spco2, talkos (Omon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ diff --git a/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-nh.yml b/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-nh.yml new file mode 100644 index 0000000000..3259bb159b --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-nh.yml @@ -0,0 +1,23 @@ +--- +filename: sic_psn25_{year}.*.nc +# Common global attributes for Cmorizer output +attributes: + dataset_id: NSIDC-G02202-nh + version: '5' + tier: 3 + modeling_realm: reanaly + project_id: OBS6 + source: 'https://nsidc.org/data/g02202/versions/5' + reference: 'nsidc-g02202' + comment: '' + +variables: + siconc: + mip: SImon + raw: cdr_seaice_conc_monthly + compress: true + + +custom: + create_areacello: true + area_file: psn25area_v3.dat diff --git a/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-sh.yml b/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-sh.yml index 0bdeea488a..56fcd314bf 100644 --- a/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-sh.yml +++ b/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-sh.yml @@ -1,13 +1,13 @@ --- -filename: seaice_conc_monthly_sh_{year}.*.nc +filename: sic_pss25_{year}.*.nc # Common global attributes for Cmorizer output attributes: dataset_id: NSIDC-G02202-sh - version: '4' + version: '5' tier: 3 modeling_realm: reanaly project_id: OBS6 - source: 'https://nsidc.org/data/g02202/versions/4' + source: 'https://nsidc.org/data/g02202/versions/5' reference: 'nsidc-g02202' comment: '' diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 092ffc3340..43193b7cae 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -1131,15 +1131,15 @@ datasets: NSIDC-G02202-sh: tier: 3 - source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v4shmday - last_access: 2023-05-13 + source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v5shmday + last_access: 2025-01-24 info: | Download monthly data. Login required for download, and also requires citation only to use NSIDC-G02202-nh: tier: 3 - source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v4shmday + source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v5shmday last_access: 2025-01-24 info: | Download monthly data. diff --git a/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_nh.py b/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_nh.py new file mode 100644 index 0000000000..d63b24f0a5 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_nh.py @@ -0,0 +1,182 @@ +"""ESMValTool CMORizer for Sea Ice Concentration CDR (Northern Hemisphere). + +Tier + Tier 3: restricted dataset. + +Source + https://nsidc.org/data/g02202/versions/5 + +Last access + 20250124 + +Download and processing instructions + Download data from: + https://noaadata.apps.nsidc.org/NOAA/G02202_V5/north/monthly + lat and lon from: + https://noaadata.apps.nsidc.org/NOAA/G02202_V5/ancillary/ + area file: + ftp://sidads.colorado.edu/DATASETS/seaice/polar-stereo/tools/ + psn25area_v3.dat + + https://nsidc.org/sites/default/files/documents/user-guide/g02202-v005-userguide.pdf + +""" + +import logging +import os +import re + +import numpy as np + +import iris +from cf_units import Unit +from iris.coords import AuxCoord + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + + +def _get_filepaths(in_dir, basename, yyyy): + """Find correct name of file (extend basename with timestamp).""" + f_name = basename.format(year=yyyy) + regex = re.compile(f_name) + return_files = [] + for files in os.listdir(in_dir): + + if regex.match(files): + return_files.append(os.path.join(in_dir, files)) + + return return_files + + +def _fix_time_coord(cube, _field, _filename): + """Set time points to central day of month.""" + time_coord = cube.coord('time') + new_unit = Unit('days since 1850-01-01 00:00:00', calendar='standard') + time_coord.convert_units(new_unit) + old_time = new_unit.num2date(time_coord.points) + new_time = [d.replace(day=15) for d in old_time] + time_coord.points = new_unit.date2num(new_time) + + +def _prom_dim_coord(cube, _field, _filename): + iris.util.promote_aux_coord_to_dim_coord(cube, 'time') + + +def _create_coord(cubes, var_name, standard_name): + cube = cubes.extract_cube(standard_name) + coord = AuxCoord( + cube.data, + standard_name=standard_name, + long_name=cube.long_name, + var_name=var_name, + units='degrees' # cube.units, + ) + return coord + + +def _extract_variable(raw_var, cmor_info, attrs, filepath, out_dir, latlon): + """Extract variable from all files.""" + var = cmor_info.short_name + cubes = iris.load(filepath, raw_var, _prom_dim_coord) + iris.util.equalise_attributes(cubes) + + cube = cubes.concatenate_cube() + iris.util.promote_aux_coord_to_dim_coord(cube, 'projection_y_coordinate') + iris.util.promote_aux_coord_to_dim_coord(cube, 'projection_x_coordinate') + cube.coord('projection_y_coordinate').rename('y') + cube.coord('projection_x_coordinate').rename('x') + + cube.add_aux_coord(latlon[0], (1, 2)) + cube.add_aux_coord(latlon[1], (1, 2)) + # add coord typesi + area_type = AuxCoord([1.0], standard_name='area_type', var_name='type', + long_name='Sea Ice area type') + cube.add_aux_coord(area_type) + + # cube.convert_units(cmor_info.units) + cube.units = '%' + cube.data[cube.data > 100] = np.nan + cube = cube * 100 + + # utils.fix_coords(cube) #latlon multidimensional + utils.fix_var_metadata(cube, cmor_info) + utils.set_global_atts(cube, attrs) + + utils.save_variable(cube, + var, + out_dir, + attrs, + unlimited_dimensions=['time']) + + return cube + + +def _create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir): + if not cfg['custom'].get('create_areacello', False): + return + var_info = cfg['cmor_table'].get_variable('Ofx', 'areacello') + glob_attrs['mip'] = 'Ofx' + lat_coord = sample_cube.coord('latitude') + + area_file = os.path.join(in_dir, cfg['custom']['area_file']) + with open(area_file, 'rb') as datfile: + areasdmnd = np.fromfile(datfile, + dtype=np.int32).reshape(lat_coord.shape) + + # Divide by 1000 to get km2 then multiply by 1e6 to m2 ...*1000 + ardata = areasdmnd * 1000 + + cube = iris.cube.Cube(ardata, + standard_name=var_info.standard_name, + long_name=var_info.long_name, + var_name=var_info.short_name, + units='m2', + dim_coords_and_dims=[(sample_cube.coord('y'), 0), + (sample_cube.coord('x'), 1)]) + cube.add_aux_coord(lat_coord, (0, 1)) + cube.add_aux_coord(sample_cube.coord('longitude'), (0, 1)) + utils.fix_var_metadata(cube, var_info) + utils.set_global_atts(cube, glob_attrs) + utils.save_variable(cube, var_info.short_name, out_dir, glob_attrs, + zlib=True) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + glob_attrs = cfg['attributes'] + cmor_table = cfg['cmor_table'] + + # get aux nc file + cubesaux = iris.load(os.path.join(in_dir, 'G02202-ancillary-psn25-v05r00.nc')) + lat_coord = _create_coord(cubesaux, 'lat', 'latitude') + lon_coord = _create_coord(cubesaux, 'lon', 'longitude') + year = 1978 + # split by year.. + sample_cube = None + for year in range(1979, 2025, 1): + + filepaths = _get_filepaths(in_dir, cfg['filename'], year) + + if len(filepaths) > 0: + logger.info("Found %d files in '%s'", len(filepaths), in_dir) + + for (var, var_info) in cfg['variables'].items(): + logger.info("CMORizing variable '%s'", var) + glob_attrs['mip'] = var_info['mip'] + cmor_info = cmor_table.get_variable(var_info['mip'], var) + raw_var = var_info.get('raw', var) + sample_cube = _extract_variable(raw_var, cmor_info, + glob_attrs, filepaths, + out_dir, [lat_coord, + lon_coord]) + + else: + logger.info("No files found ") + logger.info("year: %d basename: %s", year, cfg['filename']) + + year += 1 + + if sample_cube is not None: + _create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_sh.py b/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_sh.py index 202e370043..868e36460a 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_sh.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_sh.py @@ -1,24 +1,24 @@ -"""ESMValTool CMORizer for Sea Ice Concentration CDR. +"""ESMValTool CMORizer for Sea Ice Concentration CDR (Southern Hemisphere). Tier Tier 3: restricted dataset. Source - https://nsidc.org/data/g02202/versions/4 + https://nsidc.org/data/g02202/versions/5 Last access - 20231213 + 20250124 Download and processing instructions Download data from: - https://noaadata.apps.nsidc.org/NOAA/G02202_V4/south/monthly + https://noaadata.apps.nsidc.org/NOAA/G02202_V5/south/monthly lat and lon from: - https://noaadata.apps.nsidc.org/NOAA/G02202_V4/ancillary/ + https://noaadata.apps.nsidc.org/NOAA/G02202_V5/ancillary/ area file: ftp://sidads.colorado.edu/DATASETS/seaice/polar-stereo/tools/ pss25area_v3.dat - https://nsidc.org/sites/default/files/g02202-v004-userguide_1_1.pdf + https://nsidc.org/sites/default/files/documents/user-guide/g02202-v005-userguide.pdf """ @@ -155,14 +155,14 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): cmor_table = cfg['cmor_table'] # get aux nc file - cubesaux = iris.load(os.path.join(in_dir, 'G02202-cdr-ancillary-sh.nc')) + cubesaux = iris.load(os.path.join(in_dir, 'G02202-ancillary-pss25-v05r00.nc')) lat_coord = _create_coord(cubesaux, 'lat', 'latitude') lon_coord = _create_coord(cubesaux, 'lon', 'longitude') year = 1978 # split by year.. sample_cube = None - for year in range(1979, 2022, 1): + for year in range(1979, 2025, 1): filepaths = _get_filepaths(in_dir, cfg['filename'], year) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 880aef831a..aaafb8e15f 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -2020,8 +2020,10 @@ diagnostics: siconc: mip: SImon additional_datasets: + - {dataset: NSIDC-G02202-nh, project: OBS6, tier: 3, + type: reanaly, version: 5, start_year: 1979, end_year: 2024} - {dataset: NSIDC-G02202-sh, project: OBS6, tier: 3, - type: reanaly, version: 4, start_year: 1979, end_year: 2022} + type: reanaly, version: 5, start_year: 1979, end_year: 2024} scripts: null UWisc: