Skip to content

Commit

Permalink
improved error handling for missing data and log information (#18)
Browse files Browse the repository at this point in the history
* improved error handling for missing data and log information

* check all grib2 infiles for missing fields

* Update README.md

---------

Co-authored-by: Andreas Pauling <paa@balfrin-ln003.cscs.ch>
Co-authored-by: Andreas Pauling <paa@balfrin-ln002.cscs.ch>
  • Loading branch information
3 people authored Aug 28, 2024
1 parent 4d52ffe commit 7fd1801
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 21 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,13 @@ station_obs_file : <path>/pollen_measured_2024020118.atab
station_mod_file : <path>/pollen_modelled_2024020118.atab
hour_incr : 1
```
`pov_infile`: This GRIB2 file must include the fields `tthrs`, `tthre` (for POAC, `saisl` instead), `saisn` and `ctsum` if the module `update_phenology` is called. If the module `update_strength` is called `pov_infile` must include the fields `saisn` and `tune`. `pov_infile` is used as template for `pov_outfile`, i.e. the whole file is copied to `pov_outfile` with adapted values. Date and time information of `pov_infile` does not have to be correct, ICON just throws warnings.
`pov_infile`: This GRIB2 file must include the fields `tthrs`, `tthre` (for POAC, `saisl` instead), `saisn` and `ctsum` if the module `update_phenology` is called. If the module `update_strength` is called `pov_infile` must include the fields `saisn` and `tune`. If at least one of these mandatory fields is missing the package exits with status 1 and tells the user. `pov_infile` is used as template for `pov_outfile`, i.e. the whole file is copied to `pov_outfile` with adapted values. Date and time information of `pov_infile` does not have to be correct, ICON just throws warnings.

`pov_outfile`: Same as `pov_infile` but with adapted values.
`t2m_file`: This GRIB2 file must include T_2M valid for 12h UTC of the current day (only used if the module `update_phenology` is called).
`const_file`: This GRIB2 file must contain CLON and CLAT of the unstructured grid used in `pov_infile` and `t2m_file`.
`station_obs_file`: Observed hourly pollen concentrations (ATAB format) of the latest 120 hours relative to the target date of `pov_outfile`. The timestamps of the data in this file may vary depending on data availability, time of extraction etc. Missing values are allowed.
`station_mod_file`: Modelled hourly pollen concentrations (ATAB format) of the latest 120 hours relative to the target date of `pov_outfile`. The timestamps of the data in this file may vary depending on data availability, time of extraction etc. Missing values are allowed. Same stations as in `station_obs_file` (only used if the module `update_strength` is called).
`station_obs_file`: Observed hourly pollen concentrations (ATAB format) of the latest 120 hours relative to the target date of `pov_outfile`. The timestamps of the data in this file may vary depending on data availability, time of extraction etc. Missing values are allowed but at least 50% of each station must be there. If not, the package exits with status 1 and tells the user.
`station_mod_file`: Modelled hourly pollen concentrations (ATAB format) of the latest 120 hours relative to the target date of `pov_outfile`. The timestamps of the data in this file may vary depending on data availability, time of extraction etc. In case of missing values the package exits with status 1 and tells the user. Same stations as in `station_obs_file` (only used if the module `update_strength` is called).
`hour_incr`: Increment of the timestamp of the outfile relative to the infile in hours (defaults to 1; negative values also supported). This parameter should be adapted if the calibration is done for a subsequent run more than one hour ahead.


Expand Down
18 changes: 16 additions & 2 deletions src/realtime_pollen_calibration/update_phenology.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""A module for the update of start and end of the pollen season."""

# Standard library
import sys
from datetime import datetime, timedelta

import numpy as np
Expand All @@ -28,7 +29,6 @@ def read_pov_file(pov_infile, pol_fields):
Args:
pov_infile: GRIB2 file containing pollen fields.
pol_fields: Names of the pollen fields.
config_obj: Object containing the configuration
Returns:
Fields for the pollen calibration.
Expand All @@ -51,6 +51,10 @@ def read_pov_file(pov_infile, pol_fields):

# Delete the message
codes_release(rec)

# Check if all mandatory fields for all species read are present. If not, exit.
utils.check_mandatory_fields(cal_fields, pol_fields, pov_infile)

return cal_fields


Expand Down Expand Up @@ -80,10 +84,20 @@ def read_t2m_file(t2m_file, config_obj):
date_obj_fmt = date_obj.strftime("%Y-%m-%dT%H:00:00.000000000")
time_values = np.datetime64(date_obj_fmt)
codes_release(rec)
if "T_2M" not in cal_fields:
print(
f"The mandatory field T_2M could not be read from {t2m_file}\n"
"No update of the phenology is done until this is fixed!\n"
"Pollen are still calculated but this should be fixed "
"within a few days."
)
sys.exit(1)
else:
print("T_2M field has been read from t2m_file.")
return cal_fields, time_values


def update_phenology_realtime(config_obj: utils.Config, verbose: bool = False):
def update_phenology_realtime(config_obj: utils.Config, verbose: bool = True):
"""Update the temperature threshold fields and POACsaisl.
Args:
Expand Down
6 changes: 5 additions & 1 deletion src/realtime_pollen_calibration/update_strength.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,14 @@ def read_pov_file(pov_infile, pol_fields, config_obj):
time_values = np.datetime64(date_obj_fmt)

codes_release(rec)

# Check if all mandatory fields for all species read are present. If not, exit.
utils.check_mandatory_fields(cal_fields, pol_fields, pov_infile)

return cal_fields, time_values


def update_strength_realtime(config_obj: utils.Config, verbose: bool = False):
def update_strength_realtime(config_obj: utils.Config, verbose: bool = True):
"""Update the tune field.
Args:
Expand Down
81 changes: 66 additions & 15 deletions src/realtime_pollen_calibration/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""Utils for the command line tool."""
# Standard library
import logging
import sys
from collections import namedtuple
from dataclasses import dataclass
from datetime import datetime, timedelta
Expand Down Expand Up @@ -114,8 +115,9 @@ def read_clon_clat(const_file):


def read_atab(
pollen_type: str, file_obs_stns: str, file_mod_stns: str = "", verbose: bool = False
pollen_type: str, file_obs_stns: str, file_mod_stns: str = "", verbose: bool = True
) -> ObsModData:
# pylint: disable=too-many-locals
"""Read the pollen concentrations and the station locations from the ATAB files.
Args:
Expand Down Expand Up @@ -179,14 +181,17 @@ def read_obs_header(file_data: str):
data = pd.read_csv(
file_obs_stns,
header=headerdata.n_header,
delim_whitespace=True,
sep=r"\s+",
parse_dates=[[1, 2, 3, 4, 5]],
)
data = data[data["PARAMETER"] == pollen_type].iloc[:, 2:].to_numpy()
if file_mod_stns != "":
stn_indicators_mod = None
missing_value = None
with open(file_mod_stns, encoding="utf-8") as f:
for n, line in enumerate(f):
if line.strip()[0:18] == "Missing_value_code":
missing_value = float(line.strip()[20:])
if line.strip()[0:9] == "Indicator":
stn_indicators_mod = np.array(line.strip()[29:].split(" "))
if line.strip()[0:9] == "PARAMETER":
Expand All @@ -196,14 +201,26 @@ def read_obs_header(file_data: str):
data_mod = pd.read_csv(
file_mod_stns,
header=n_header_mod,
delim_whitespace=True,
sep=r"\s+",
parse_dates=[[3, 4, 5, 6, 7]],
)
data_mod = data_mod[data_mod["PARAMETER"] == pollen_type].iloc[:, 4:].to_numpy()
if missing_value in data_mod:
print(
"There is at least one missing value",
f"in the model data file {file_mod_stns}.\n",
"Please check the reason (fieldextra retrieval namelist?).",
"No pollen calibration update is performed until this is fixed! ",
"Pollen in ICON will still work, but calibration fields get ",
"more and more outdated.",
)
sys.exit(1)
else:
data_mod = 0
istation_mod = 0
data = treat_missing(data, headerdata.missing_value, verbose=verbose)
data = treat_missing(
data, headerdata.missing_value, headerdata.stn_indicators, verbose=verbose
)
return ObsModData(
data, headerdata.coord_stns, headerdata.missing_value, data_mod, istation_mod
)
Expand All @@ -228,16 +245,14 @@ def create_data_arrays(cal_fields, clon, clat, time_values):
def treat_missing(
array,
missing_value: float = -9999.0,
tune_pol_default: float = 1.0,
verbose: bool = False,
stn_indicators: str = "",
verbose: bool = True,
):
"""Treat the missing values of the input array.
Args:
array: Array containing the concentration values.
missing_value: Value considered as a missing value.
tune_pol_default: Default value to which all values of a station
are set if more than 10% of the observations are missing.
verbose: Optional additional debug prints.
Returns:
Expand All @@ -251,27 +266,32 @@ def treat_missing(
skip_missing_stn[istation] = np.count_nonzero(array_missing[:, istation])
if verbose:
print(
f"Station {istation} has",
f"Station {stn_indicators[istation]} has",
f"{skip_missing_stn[istation]} missing values",
)
if skip_missing_stn[istation] > 0:
if (
np.count_nonzero(np.abs(array[:, istation] - missing_value) < 0.01)
/ len(array[:, istation])
< 0.1
< 0.5
):
idx1 = np.where(np.abs(array[:, istation] - missing_value) > 0.01)
idx2 = np.where(np.abs(array[:, istation] - missing_value) < 0.01)
if verbose:
print(
"Less than 10% of the data is missing, ",
"Less than 50% of the data is missing, ",
f"mean of the rest is: {np.mean(array[idx1, istation])}",
)
array[idx2, istation] = np.mean(array[idx1, istation])
else:
if verbose:
print("More than 10% of the data is missing")
array[:, istation] = tune_pol_default
print(
f"Station {stn_indicators[istation]} has more than 50% missing ",
"data. Please check the reason (Does jretrievedwh still work?).\n",
"No pollen calibration update is performed until this is fixed! ",
"Pollen in ICON will still work, but calibration fields get ",
"more and more outdated.",
)
sys.exit(1)
return array


Expand Down Expand Up @@ -427,7 +447,6 @@ def get_change_tune( # pylint: disable=R0913
f"Current tune value {tune_stns.values[0]} ",
f"and saisn: {saisn_stns.values[0]}",
)
print("-----------------------------------------")
if (saisn_stns > 0) and ((sum_obs <= 720) or (sum_mod <= 720)):
if verbose:
print(
Expand Down Expand Up @@ -497,6 +516,7 @@ def get_change_phenol( # pylint: disable=R0912,R0914,R0915
sum_obs = np.sum(obs_mod_data.data_obs[:, istation])
if verbose:
print(
f"Current pollen type is: {pollen_type}, ",
f"Current station n°{istation}, ",
f"(lat: {obs_mod_data.coord_stns[istation][0]}, ",
f"lon: {obs_mod_data.coord_stns[istation][1]}), ",
Expand Down Expand Up @@ -615,6 +635,37 @@ def get_change_phenol( # pylint: disable=R0912,R0914,R0915
return ChangePhenologyFields(change_tthrs, change_tthre, change_saisl)


def check_mandatory_fields(cal_fields, pol_fields, pov_infile):
"""Check if all mandatory fields for all species read are present.
Args:
cal_fields: Dictionary of calibration fields.
pol_fields: Names of the pollen fields.
pov_infile: GRIB2 file containing pollen fields.
Exits:
If any mandatory fields are missing.
"""
species_read = {key[:4] for key in cal_fields.keys()}

req_fields = [fld for fld in pol_fields if fld[:4] in species_read]

missing_fields = [fld for fld in req_fields if fld not in cal_fields.keys()]

if missing_fields:
print(
f"The mandatory field(s): {missing_fields}\n",
f"is/are missing in {pov_infile}\n"
"No pollen calibration is done until this is fixed!\n"
"Pollen are still calculated but this should be fixed "
"within a few days.",
)
sys.exit(1)
else:
print("All mandatory fields have been read from pov_infile.")


def to_grib(inp: str, outp: str, dict_fields: dict, hour_incr: int) -> None:
"""Output fields to a GRIB file.
Expand Down

0 comments on commit 7fd1801

Please sign in to comment.