diff --git a/README.md b/README.md index e178c1a..768f652 100644 --- a/README.md +++ b/README.md @@ -111,13 +111,13 @@ station_obs_file : /pollen_measured_2024020118.atab station_mod_file : /pollen_modelled_2024020118.atab hour_incr : 1 ``` -`pov_infile`: This GRIB2 file must include the fields `tthrs`, `tthre` (for POAC, `saisl` instead), `saisn` and `ctsum` if the module `update_phenology` is called. If the module `update_strength` is called `pov_infile` must include the fields `saisn` and `tune`. `pov_infile` is used as template for `pov_outfile`, i.e. the whole file is copied to `pov_outfile` with adapted values. Date and time information of `pov_infile` does not have to be correct, ICON just throws warnings. +`pov_infile`: This GRIB2 file must include the fields `tthrs`, `tthre` (for POAC, `saisl` instead), `saisn` and `ctsum` if the module `update_phenology` is called. If the module `update_strength` is called `pov_infile` must include the fields `saisn` and `tune`. If at least one of these mandatory fields is missing the package exits with status 1 and tells the user. `pov_infile` is used as template for `pov_outfile`, i.e. the whole file is copied to `pov_outfile` with adapted values. Date and time information of `pov_infile` does not have to be correct, ICON just throws warnings. `pov_outfile`: Same as `pov_infile` but with adapted values. `t2m_file`: This GRIB2 file must include T_2M valid for 12h UTC of the current day (only used if the module `update_phenology` is called). `const_file`: This GRIB2 file must contain CLON and CLAT of the unstructured grid used in `pov_infile` and `t2m_file`. -`station_obs_file`: Observed hourly pollen concentrations (ATAB format) of the latest 120 hours relative to the target date of `pov_outfile`. The timestamps of the data in this file may vary depending on data availability, time of extraction etc. Missing values are allowed. -`station_mod_file`: Modelled hourly pollen concentrations (ATAB format) of the latest 120 hours relative to the target date of `pov_outfile`. The timestamps of the data in this file may vary depending on data availability, time of extraction etc. Missing values are allowed. Same stations as in `station_obs_file` (only used if the module `update_strength` is called). +`station_obs_file`: Observed hourly pollen concentrations (ATAB format) of the latest 120 hours relative to the target date of `pov_outfile`. The timestamps of the data in this file may vary depending on data availability, time of extraction etc. Missing values are allowed but at least 50% of each station must be there. If not, the package exits with status 1 and tells the user. +`station_mod_file`: Modelled hourly pollen concentrations (ATAB format) of the latest 120 hours relative to the target date of `pov_outfile`. The timestamps of the data in this file may vary depending on data availability, time of extraction etc. In case of missing values the package exits with status 1 and tells the user. Same stations as in `station_obs_file` (only used if the module `update_strength` is called). `hour_incr`: Increment of the timestamp of the outfile relative to the infile in hours (defaults to 1; negative values also supported). This parameter should be adapted if the calibration is done for a subsequent run more than one hour ahead. diff --git a/src/realtime_pollen_calibration/update_phenology.py b/src/realtime_pollen_calibration/update_phenology.py index f9a1c88..c2cfe55 100644 --- a/src/realtime_pollen_calibration/update_phenology.py +++ b/src/realtime_pollen_calibration/update_phenology.py @@ -7,6 +7,7 @@ """A module for the update of start and end of the pollen season.""" # Standard library +import sys from datetime import datetime, timedelta import numpy as np @@ -28,7 +29,6 @@ def read_pov_file(pov_infile, pol_fields): Args: pov_infile: GRIB2 file containing pollen fields. pol_fields: Names of the pollen fields. - config_obj: Object containing the configuration Returns: Fields for the pollen calibration. @@ -51,6 +51,10 @@ def read_pov_file(pov_infile, pol_fields): # Delete the message codes_release(rec) + + # Check if all mandatory fields for all species read are present. If not, exit. + utils.check_mandatory_fields(cal_fields, pol_fields, pov_infile) + return cal_fields @@ -80,10 +84,20 @@ def read_t2m_file(t2m_file, config_obj): date_obj_fmt = date_obj.strftime("%Y-%m-%dT%H:00:00.000000000") time_values = np.datetime64(date_obj_fmt) codes_release(rec) + if "T_2M" not in cal_fields: + print( + f"The mandatory field T_2M could not be read from {t2m_file}\n" + "No update of the phenology is done until this is fixed!\n" + "Pollen are still calculated but this should be fixed " + "within a few days." + ) + sys.exit(1) + else: + print("T_2M field has been read from t2m_file.") return cal_fields, time_values -def update_phenology_realtime(config_obj: utils.Config, verbose: bool = False): +def update_phenology_realtime(config_obj: utils.Config, verbose: bool = True): """Update the temperature threshold fields and POACsaisl. Args: diff --git a/src/realtime_pollen_calibration/update_strength.py b/src/realtime_pollen_calibration/update_strength.py index 198adae..9dabded 100644 --- a/src/realtime_pollen_calibration/update_strength.py +++ b/src/realtime_pollen_calibration/update_strength.py @@ -61,10 +61,14 @@ def read_pov_file(pov_infile, pol_fields, config_obj): time_values = np.datetime64(date_obj_fmt) codes_release(rec) + + # Check if all mandatory fields for all species read are present. If not, exit. + utils.check_mandatory_fields(cal_fields, pol_fields, pov_infile) + return cal_fields, time_values -def update_strength_realtime(config_obj: utils.Config, verbose: bool = False): +def update_strength_realtime(config_obj: utils.Config, verbose: bool = True): """Update the tune field. Args: diff --git a/src/realtime_pollen_calibration/utils.py b/src/realtime_pollen_calibration/utils.py index edca971..f0d4dd0 100644 --- a/src/realtime_pollen_calibration/utils.py +++ b/src/realtime_pollen_calibration/utils.py @@ -7,6 +7,7 @@ """Utils for the command line tool.""" # Standard library import logging +import sys from collections import namedtuple from dataclasses import dataclass from datetime import datetime, timedelta @@ -114,8 +115,9 @@ def read_clon_clat(const_file): def read_atab( - pollen_type: str, file_obs_stns: str, file_mod_stns: str = "", verbose: bool = False + pollen_type: str, file_obs_stns: str, file_mod_stns: str = "", verbose: bool = True ) -> ObsModData: + # pylint: disable=too-many-locals """Read the pollen concentrations and the station locations from the ATAB files. Args: @@ -179,14 +181,17 @@ def read_obs_header(file_data: str): data = pd.read_csv( file_obs_stns, header=headerdata.n_header, - delim_whitespace=True, + sep=r"\s+", parse_dates=[[1, 2, 3, 4, 5]], ) data = data[data["PARAMETER"] == pollen_type].iloc[:, 2:].to_numpy() if file_mod_stns != "": stn_indicators_mod = None + missing_value = None with open(file_mod_stns, encoding="utf-8") as f: for n, line in enumerate(f): + if line.strip()[0:18] == "Missing_value_code": + missing_value = float(line.strip()[20:]) if line.strip()[0:9] == "Indicator": stn_indicators_mod = np.array(line.strip()[29:].split(" ")) if line.strip()[0:9] == "PARAMETER": @@ -196,14 +201,26 @@ def read_obs_header(file_data: str): data_mod = pd.read_csv( file_mod_stns, header=n_header_mod, - delim_whitespace=True, + sep=r"\s+", parse_dates=[[3, 4, 5, 6, 7]], ) data_mod = data_mod[data_mod["PARAMETER"] == pollen_type].iloc[:, 4:].to_numpy() + if missing_value in data_mod: + print( + "There is at least one missing value", + f"in the model data file {file_mod_stns}.\n", + "Please check the reason (fieldextra retrieval namelist?).", + "No pollen calibration update is performed until this is fixed! ", + "Pollen in ICON will still work, but calibration fields get ", + "more and more outdated.", + ) + sys.exit(1) else: data_mod = 0 istation_mod = 0 - data = treat_missing(data, headerdata.missing_value, verbose=verbose) + data = treat_missing( + data, headerdata.missing_value, headerdata.stn_indicators, verbose=verbose + ) return ObsModData( data, headerdata.coord_stns, headerdata.missing_value, data_mod, istation_mod ) @@ -228,16 +245,14 @@ def create_data_arrays(cal_fields, clon, clat, time_values): def treat_missing( array, missing_value: float = -9999.0, - tune_pol_default: float = 1.0, - verbose: bool = False, + stn_indicators: str = "", + verbose: bool = True, ): """Treat the missing values of the input array. Args: array: Array containing the concentration values. missing_value: Value considered as a missing value. - tune_pol_default: Default value to which all values of a station - are set if more than 10% of the observations are missing. verbose: Optional additional debug prints. Returns: @@ -251,27 +266,32 @@ def treat_missing( skip_missing_stn[istation] = np.count_nonzero(array_missing[:, istation]) if verbose: print( - f"Station n°{istation} has", + f"Station {stn_indicators[istation]} has", f"{skip_missing_stn[istation]} missing values", ) if skip_missing_stn[istation] > 0: if ( np.count_nonzero(np.abs(array[:, istation] - missing_value) < 0.01) / len(array[:, istation]) - < 0.1 + < 0.5 ): idx1 = np.where(np.abs(array[:, istation] - missing_value) > 0.01) idx2 = np.where(np.abs(array[:, istation] - missing_value) < 0.01) if verbose: print( - "Less than 10% of the data is missing, ", + "Less than 50% of the data is missing, ", f"mean of the rest is: {np.mean(array[idx1, istation])}", ) array[idx2, istation] = np.mean(array[idx1, istation]) else: - if verbose: - print("More than 10% of the data is missing") - array[:, istation] = tune_pol_default + print( + f"Station {stn_indicators[istation]} has more than 50% missing ", + "data. Please check the reason (Does jretrievedwh still work?).\n", + "No pollen calibration update is performed until this is fixed! ", + "Pollen in ICON will still work, but calibration fields get ", + "more and more outdated.", + ) + sys.exit(1) return array @@ -427,7 +447,6 @@ def get_change_tune( # pylint: disable=R0913 f"Current tune value {tune_stns.values[0]} ", f"and saisn: {saisn_stns.values[0]}", ) - print("-----------------------------------------") if (saisn_stns > 0) and ((sum_obs <= 720) or (sum_mod <= 720)): if verbose: print( @@ -497,6 +516,7 @@ def get_change_phenol( # pylint: disable=R0912,R0914,R0915 sum_obs = np.sum(obs_mod_data.data_obs[:, istation]) if verbose: print( + f"Current pollen type is: {pollen_type}, ", f"Current station n°{istation}, ", f"(lat: {obs_mod_data.coord_stns[istation][0]}, ", f"lon: {obs_mod_data.coord_stns[istation][1]}), ", @@ -615,6 +635,37 @@ def get_change_phenol( # pylint: disable=R0912,R0914,R0915 return ChangePhenologyFields(change_tthrs, change_tthre, change_saisl) +def check_mandatory_fields(cal_fields, pol_fields, pov_infile): + """Check if all mandatory fields for all species read are present. + + Args: + cal_fields: Dictionary of calibration fields. + pol_fields: Names of the pollen fields. + pov_infile: GRIB2 file containing pollen fields. + + Exits: + If any mandatory fields are missing. + + """ + species_read = {key[:4] for key in cal_fields.keys()} + + req_fields = [fld for fld in pol_fields if fld[:4] in species_read] + + missing_fields = [fld for fld in req_fields if fld not in cal_fields.keys()] + + if missing_fields: + print( + f"The mandatory field(s): {missing_fields}\n", + f"is/are missing in {pov_infile}\n" + "No pollen calibration is done until this is fixed!\n" + "Pollen are still calculated but this should be fixed " + "within a few days.", + ) + sys.exit(1) + else: + print("All mandatory fields have been read from pov_infile.") + + def to_grib(inp: str, outp: str, dict_fields: dict, hour_incr: int) -> None: """Output fields to a GRIB file.