diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000000..ca70a5b2a0 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,3 @@ +[report] +exclude_also = + if TYPE_CHECKING: diff --git a/auxiliary_tools/cdat_regression_testing/660-cosp-histogram/660_cosp_histogram_regression_test_netcdf.ipynb b/auxiliary_tools/cdat_regression_testing/660-cosp-histogram/660_cosp_histogram_regression_test_netcdf.ipynb new file mode 100644 index 0000000000..a9bb918e7d --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/660-cosp-histogram/660_cosp_histogram_regression_test_netcdf.ipynb @@ -0,0 +1,270 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CDAT Migration Regression Testing Notebook (`.nc` files)\n", + "\n", + "This notebook is used to perform regression testing between the development and\n", + "production versions of a diagnostic set.\n", + "\n", + "## How it works\n", + "\n", + "It compares the relative differences (%) between ref and test variables between\n", + "the dev and `main` branches.\n", + "\n", + "## How to use\n", + "\n", + "PREREQUISITE: The diagnostic set's netCDF stored in `.json` files in two directories\n", + "(dev and `main` branches).\n", + "\n", + "1. Make a copy of this notebook under `auxiliary_tools/cdat_regression_testing/`.\n", + "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" xarray dask pandas matplotlib-base ipykernel`\n", + "3. Run `mamba activate cdat_regression_test`\n", + "4. Update `SET_DIR` and `SET_NAME` in the copy of your notebook.\n", + "5. Run all cells IN ORDER.\n", + "6. Review results for any outstanding differences (>=1e-5 relative tolerance).\n", + " - Debug these differences (e.g., bug in metrics functions, incorrect variable references, etc.)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Code\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "import glob\n", + "\n", + "import numpy as np\n", + "import xarray as xr\n", + "\n", + "# TODO: Update SET_NAME and SET_DIR\n", + "SET_NAME = \"cosp_histogram\"\n", + "SET_DIR = \"660-cosp-histogram\"\n", + "\n", + "DEV_PATH = f\"/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/{SET_DIR}/{SET_NAME}/**\"\n", + "MAIN_PATH = f\"/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/{SET_NAME}/**\"\n", + "\n", + "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.nc\"))\n", + "MAIN_GLOB = sorted(glob.glob(MAIN_PATH + \"/*.nc\"))\n", + "\n", + "if len(DEV_GLOB) == 0 or len(MAIN_GLOB) == 0:\n", + " raise IOError(\"No files found at DEV_PATH and/or MAIN_PATH.\")\n", + "\n", + "if len(DEV_GLOB) != len(MAIN_GLOB):\n", + " raise IOError(\"Number of files do not match at DEV_PATH and MAIN_PATH.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "def _get_var_to_filepath_map():\n", + " var_to_file = defaultdict(lambda: defaultdict(dict))\n", + "\n", + " for dev_file, main_file in zip(DEV_GLOB, MAIN_GLOB):\n", + " # Example:\n", + " # \"/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-ANN-global_test.nc\"\n", + " file_arr = dev_file.split(\"/\")\n", + "\n", + " # Example: \"test\"\n", + " data_type = dev_file.split(\"_\")[-1].split(\".nc\")[0]\n", + "\n", + " # Skip comparing `.nc` \"diff\" files because comparing relative diffs of\n", + " # does not make sense.\n", + " if data_type == \"test\" or data_type == \"ref\":\n", + " # Example: \"ISCCP\"\n", + " model = file_arr[-2].split(\"-\")[0]\n", + " season = \"JJA\" if \"JJA\" in dev_file else \"ANN\"\n", + "\n", + " var_to_file[model][data_type][season] = (dev_file, main_file)\n", + "\n", + " return var_to_file\n", + "\n", + "\n", + "def _get_relative_diffs(var_to_filepath):\n", + " # Absolute tolerance of 0 and relative tolerance of 1e-5.\n", + " # We are mainly focusing on relative tolerance here (in percentage terms).\n", + " atol = 0\n", + " rtol = 1e-5\n", + "\n", + " for model, data_types in var_to_filepath.items():\n", + " for _, seasons in data_types.items():\n", + " for _, filepaths in seasons.items():\n", + " print(\"Comparing:\")\n", + " print(filepaths[0], \"\\n\", filepaths[1])\n", + " ds1 = xr.open_dataset(filepaths[0])\n", + " ds2 = xr.open_dataset(filepaths[1])\n", + "\n", + " try:\n", + " var_key = f\"COSP_HISTOGRAM_{model}\"\n", + " np.testing.assert_allclose(\n", + " ds1[var_key].values,\n", + " ds2[var_key].values,\n", + " atol=atol,\n", + " rtol=rtol,\n", + " )\n", + " except AssertionError as e:\n", + " print(e)\n", + " else:\n", + " print(f\" * All close and within relative tolerance ({rtol})\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Compare the netCDF files between branches\n", + "\n", + "- Compare \"ref\" and \"test\" files\n", + "- \"diff\" files are ignored because getting relative diffs for these does not make sense (relative diff will be above tolerance)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "var_to_filepaths = _get_var_to_filepath_map()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-ANN-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-ANN-global_ref.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-JJA-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-JJA-global_ref.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-ANN-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-ANN-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-JJA-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-JJA-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-ANN-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-ANN-global_ref.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-JJA-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-JJA-global_ref.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-ANN-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-ANN-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-JJA-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-JJA-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-ANN-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-ANN-global_ref.nc\n", + "\n", + "Not equal to tolerance rtol=1e-05, atol=0\n", + "\n", + "Mismatched elements: 42 / 42 (100%)\n", + "Max absolute difference: 4.23048367e-05\n", + "Max relative difference: 1.16682146e-05\n", + " x: array([[0.703907, 2.669376, 3.065526, 1.579834, 0.363847, 0.128541],\n", + " [0.147366, 1.152637, 3.67049 , 3.791006, 1.398453, 0.392103],\n", + " [0.07496 , 0.474791, 1.37002 , 1.705649, 0.786423, 0.346744],...\n", + " y: array([[0.703899, 2.669347, 3.065492, 1.579816, 0.363843, 0.12854 ],\n", + " [0.147364, 1.152624, 3.670448, 3.790965, 1.398438, 0.392099],\n", + " [0.074959, 0.474786, 1.370004, 1.705629, 0.786415, 0.34674 ],...\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-JJA-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-JJA-global_ref.nc\n", + "\n", + "Not equal to tolerance rtol=1e-05, atol=0\n", + "\n", + "Mismatched elements: 42 / 42 (100%)\n", + "Max absolute difference: 4.91806181e-05\n", + "Max relative difference: 1.3272405e-05\n", + " x: array([[0.62896 , 2.657657, 3.206268, 1.704946, 0.398659, 0.169424],\n", + " [0.147569, 1.228835, 3.697387, 3.727142, 1.223123, 0.436504],\n", + " [0.072129, 0.508413, 1.167637, 1.412202, 0.638085, 0.362268],...\n", + " y: array([[0.628952, 2.657625, 3.206227, 1.704924, 0.398654, 0.169422],\n", + " [0.147567, 1.228819, 3.697338, 3.727093, 1.223107, 0.436498],\n", + " [0.072128, 0.508407, 1.167622, 1.412183, 0.638076, 0.362263],...\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-ANN-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-ANN-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-JJA-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-JJA-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n" + ] + } + ], + "source": [ + "_get_relative_diffs(var_to_filepaths)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Results\n", + "\n", + "- The relative tolerance of all files are 1e-05, which means things should be good to go.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cdat_regression_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/auxiliary_tools/cdat_regression_testing/660-cosp-histogram/660_cosp_histogram_run_script.py b/auxiliary_tools/cdat_regression_testing/660-cosp-histogram/660_cosp_histogram_run_script.py new file mode 100644 index 0000000000..79dbda4f26 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/660-cosp-histogram/660_cosp_histogram_run_script.py @@ -0,0 +1,6 @@ +from auxiliary_tools.cdat_regression_testing.base_run_script import run_set + +SET_NAME = "cosp_histogram" +SET_DIR = "660-cosp-histogram" + +run_set(SET_NAME, SET_DIR) diff --git a/auxiliary_tools/cdat_regression_testing/base_run_script.py b/auxiliary_tools/cdat_regression_testing/base_run_script.py index f7cef4c78f..88c735d9b2 100644 --- a/auxiliary_tools/cdat_regression_testing/base_run_script.py +++ b/auxiliary_tools/cdat_regression_testing/base_run_script.py @@ -44,7 +44,7 @@ class MachinePaths(TypedDict): tc_test: str -def run_set(set_name: str, set_dir: str, save_netcdf: bool): +def run_set(set_name: str, set_dir: str): machine_paths: MachinePaths = _get_machine_paths() param = CoreParameter() @@ -62,7 +62,7 @@ def run_set(set_name: str, set_dir: str, save_netcdf: bool): param.num_workers = 5 # Make sure to save the netCDF files to compare outputs. - param.save_netcdf = save_netcdf + param.save_netcdf = True # Set specific parameters for new sets enso_param = EnsoDiagsParameter() diff --git a/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test.ipynb b/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test_json.ipynb similarity index 96% rename from auxiliary_tools/cdat_regression_testing/template_cdat_regression_test.ipynb rename to auxiliary_tools/cdat_regression_testing/template_cdat_regression_test_json.ipynb index 3cd30e7e50..a38b98351f 100644 --- a/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test.ipynb +++ b/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test_json.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# CDAT Migration Regression Testing Notebook\n", + "# CDAT Migration Regression Testing Notebook (`.json` metrics)\n", "\n", "This notebook is used to perform regression testing between the development and\n", "production versions of a diagnostic set.\n", @@ -47,25 +47,27 @@ "metadata": {}, "outputs": [], "source": [ + "from collections import defaultdict\n", "import glob\n", - "from auxiliary_tools.cdat_regression_testing.utils import (\n", - " get_metrics,\n", - " get_rel_diffs,\n", - " get_num_metrics_above_diff_thres,\n", - " highlight_large_diffs,\n", - " sort_columns,\n", - " update_diffs_to_pct,\n", - " PERCENTAGE_COLUMNS,\n", - ")\n", "\n", - "import pandas as pd\n", + "import numpy as np\n", + "import xarray as xr\n", + "\n", + "# TODO: Update SET_NAME and SET_DIR\n", + "SET_NAME = \"cosp_histogram\"\n", + "SET_DIR = \"660-cosp-histogram\"\n", "\n", - "# TODO: Update DEV_RESULTS and MAIN_RESULTS to your diagnostic sets.\n", - "DEV_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples_658/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", - "MAIN_PATH = \"/global/cfs/cdirs/e3sm/www/vo13/examples/ex1_modTS_vs_modTS_3years/lat_lon/model_vs_model\"\n", + "DEV_PATH = f\"/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/{SET_DIR}/{SET_NAME}/**\"\n", + "MAIN_PATH = f\"/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/{SET_NAME}/**\"\n", "\n", "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.json\"))\n", - "MAIN_GLOB = sorted(glob.glob(MAIN_PATH + \"/*.json\"))" + "MAIN_GLOB = sorted(glob.glob(MAIN_PATH + \"/*.json\"))\n", + "\n", + "if len(DEV_GLOB) == 0 or len(MAIN_GLOB) == 0:\n", + " raise IOError(\"No files found at DEV_PATH and/or MAIN_PATH.\")\n", + "\n", + "if len(DEV_GLOB) != len(MAIN_GLOB):\n", + " raise IOError(\"Number of files do not match at DEV_PATH and MAIN_PATH.\")" ] }, { diff --git a/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test_netcdf.ipynb b/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test_netcdf.ipynb new file mode 100644 index 0000000000..a021b736b3 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test_netcdf.ipynb @@ -0,0 +1,260 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CDAT Migration Regression Testing Notebook (`.nc` files)\n", + "\n", + "This notebook is used to perform regression testing between the development and\n", + "production versions of a diagnostic set.\n", + "\n", + "## How it works\n", + "\n", + "It compares the relative differences (%) between ref and test variables between\n", + "the dev and `main` branches.\n", + "\n", + "## How to use\n", + "\n", + "PREREQUISITE: The diagnostic set's netCDF stored in `.json` files in two directories\n", + "(dev and `main` branches).\n", + "\n", + "1. Make a copy of this notebook under `auxiliary_tools/cdat_regression_testing/`.\n", + "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" xarray dask pandas matplotlib-base ipykernel`\n", + "3. Run `mamba activate cdat_regression_test`\n", + "4. Update `SET_DIR` and `SET_NAME` in the copy of your notebook.\n", + "5. Run all cells IN ORDER.\n", + "6. Review results for any outstanding differences (>=1e-5 relative tolerance).\n", + " - Debug these differences (e.g., bug in metrics functions, incorrect variable references, etc.)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Code\n" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "import glob\n", + "\n", + "import numpy as np\n", + "import xarray as xr\n", + "\n", + "# TODO: Update SET_NAME and SET_DIR\n", + "SET_NAME = \"\"\n", + "SET_DIR = \"\"\n", + "\n", + "DEV_PATH = f\"/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/{SET_DIR}/{SET_NAME}/**\"\n", + "MAIN_PATH = f\"/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/{SET_NAME}/**\"\n", + "\n", + "DEV_GLOB = sorted(glob.glob(DEV_PATH + \"/*.nc\"))\n", + "MAIN_GLOB = sorted(glob.glob(MAIN_PATH + \"/*.nc\"))\n", + "\n", + "if len(DEV_GLOB) == 0 or len(MAIN_GLOB) == 0:\n", + " raise IOError(\"No files found at DEV_PATH and/or MAIN_PATH.\")\n", + "\n", + "if len(DEV_GLOB) != len(MAIN_GLOB):\n", + " raise IOError(\"Number of files do not match at DEV_PATH and MAIN_PATH.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "def _get_var_to_filepath_map():\n", + " var_to_file = defaultdict(lambda: defaultdict(dict))\n", + "\n", + " for dev_file, main_file in zip(DEV_GLOB, MAIN_GLOB):\n", + " # Example:\n", + " # \"/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-ANN-global_test.nc\"\n", + " file_arr = dev_file.split(\"/\")\n", + "\n", + " # Example: \"test\"\n", + " data_type = dev_file.split(\"_\")[-1].split(\".nc\")[0]\n", + "\n", + " # Skip comparing `.nc` \"diff\" files because comparing relative diffs of\n", + " # does not make sense.\n", + " if data_type == \"test\" or data_type == \"ref\":\n", + " # Example: \"ISCCP\"\n", + " model = file_arr[-2].split(\"-\")[0]\n", + " season = \"JJA\" if \"JJA\" in dev_file else \"ANN\"\n", + "\n", + " var_to_file[model][data_type][season] = (dev_file, main_file)\n", + "\n", + " return var_to_file\n", + "\n", + "\n", + "def _get_relative_diffs(var_to_filepath):\n", + " # Absolute tolerance of 0 and relative tolerance of 1e-5.\n", + " # We are mainly focusing on relative tolerance here (in percentage terms).\n", + " atol = 0\n", + " rtol = 1e-5\n", + "\n", + " for model, data_types in var_to_filepath.items():\n", + " for _, seasons in data_types.items():\n", + " for _, filepaths in seasons.items():\n", + " print(\"Comparing:\")\n", + " print(filepaths[0], \"\\n\", filepaths[1])\n", + " ds1 = xr.open_dataset(filepaths[0])\n", + " ds2 = xr.open_dataset(filepaths[1])\n", + "\n", + " try:\n", + " var_key = f\"COSP_HISTOGRAM_{model}\"\n", + " np.testing.assert_allclose(\n", + " ds1[var_key].values,\n", + " ds2[var_key].values,\n", + " atol=atol,\n", + " rtol=rtol,\n", + " )\n", + " except AssertionError as e:\n", + " print(e)\n", + " else:\n", + " print(f\" * All close and within relative tolerance ({rtol})\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Compare the netCDF files between branches\n", + "\n", + "- Compare \"ref\" and \"test\" files\n", + "- \"diff\" files are ignored because getting relative diffs for these does not make sense (relative diff will be above tolerance)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "var_to_filepaths = _get_var_to_filepath_map()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-ANN-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-ANN-global_ref.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-JJA-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-JJA-global_ref.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-ANN-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-ANN-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-JJA-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/ISCCP-COSP/ISCCPCOSP-COSP_HISTOGRAM_ISCCP-JJA-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-ANN-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-ANN-global_ref.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-JJA-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-JJA-global_ref.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-ANN-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-ANN-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-JJA-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MISR-COSP/MISRCOSP-COSP_HISTOGRAM_MISR-JJA-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-ANN-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-ANN-global_ref.nc\n", + "\n", + "Not equal to tolerance rtol=1e-05, atol=0\n", + "\n", + "Mismatched elements: 42 / 42 (100%)\n", + "Max absolute difference: 4.23048367e-05\n", + "Max relative difference: 1.16682146e-05\n", + " x: array([[0.703907, 2.669376, 3.065526, 1.579834, 0.363847, 0.128541],\n", + " [0.147366, 1.152637, 3.67049 , 3.791006, 1.398453, 0.392103],\n", + " [0.07496 , 0.474791, 1.37002 , 1.705649, 0.786423, 0.346744],...\n", + " y: array([[0.703899, 2.669347, 3.065492, 1.579816, 0.363843, 0.12854 ],\n", + " [0.147364, 1.152624, 3.670448, 3.790965, 1.398438, 0.392099],\n", + " [0.074959, 0.474786, 1.370004, 1.705629, 0.786415, 0.34674 ],...\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-JJA-global_ref.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-JJA-global_ref.nc\n", + "\n", + "Not equal to tolerance rtol=1e-05, atol=0\n", + "\n", + "Mismatched elements: 42 / 42 (100%)\n", + "Max absolute difference: 4.91806181e-05\n", + "Max relative difference: 1.3272405e-05\n", + " x: array([[0.62896 , 2.657657, 3.206268, 1.704946, 0.398659, 0.169424],\n", + " [0.147569, 1.228835, 3.697387, 3.727142, 1.223123, 0.436504],\n", + " [0.072129, 0.508413, 1.167637, 1.412202, 0.638085, 0.362268],...\n", + " y: array([[0.628952, 2.657625, 3.206227, 1.704924, 0.398654, 0.169422],\n", + " [0.147567, 1.228819, 3.697338, 3.727093, 1.223107, 0.436498],\n", + " [0.072128, 0.508407, 1.167622, 1.412183, 0.638076, 0.362263],...\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-ANN-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-ANN-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n", + "Comparing:\n", + "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/660-cosp-histogram/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-JJA-global_test.nc \n", + " /global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/main/cosp_histogram/MODIS-COSP/MODISCOSP-COSP_HISTOGRAM_MODIS-JJA-global_test.nc\n", + " * All close and within relative tolerance (1e-05)\n" + ] + } + ], + "source": [ + "_get_relative_diffs(var_to_filepaths)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Results\n", + "\n", + "- The relative tolerance of all files are 1e-05, which means things should be good to go.\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cdat_regression_test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/auxiliary_tools/cdat_regression_testing/template_run_script.py b/auxiliary_tools/cdat_regression_testing/template_run_script.py index 6837244966..43d3bef627 100644 --- a/auxiliary_tools/cdat_regression_testing/template_run_script.py +++ b/auxiliary_tools/cdat_regression_testing/template_run_script.py @@ -12,18 +12,12 @@ "meridional_mean_2d", "annual_cycle_zonal_mean", "enso_diags", "qbo", "area_mean_time_series", "diurnal_cycle", "streamflow", "arm_diags", "tc_analysis", "aerosol_aeronet", "aerosol_budget", "mp_partition", -6. Update `SAVE_NETCDF` boolean variable. - - Set to True if your set does not produce metrics `.json` files, such as - cosp_histogram which only calculates spatial average and saves them to - netCDF files. - - Set to False if your set produces metrics `.json` files and you only - need to compare those in regression testing. -7. Run this script +6. Run this script - Make sure to run this command on NERSC perlmutter cpu: `salloc --nodes 1 --qos interactive --time 01:00:00 --constraint cpu --account=e3sm conda activate ` - python auxiliary_tools/cdat_regression_testing/ -8. Make a copy of the CDAT regression testing notebook in the same directory +7. Make a copy of the CDAT regression testing notebook in the same directory as this script and follow the instructions there to start testing. """ from auxiliary_tools.cdat_regression_testing.base_run_script import run_set @@ -35,7 +29,5 @@ # to the base results_dir, "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/". # Example: "671-lat-lon" SET_DIR = "" -# TODO: Update SET_TO_NETCDF as needed. -SAVE_NETCDF = True -run_set(SET_NAME, SET_DIR, SAVE_NETCDF) +run_set(SET_NAME, SET_DIR) diff --git a/e3sm_diags/derivations/derivations.py b/e3sm_diags/derivations/derivations.py index ef29ae4711..2b179243b5 100644 --- a/e3sm_diags/derivations/derivations.py +++ b/e3sm_diags/derivations/derivations.py @@ -13,7 +13,7 @@ data for these variables to the formula function 'prect()'. """ from collections import OrderedDict -from typing import Callable, Dict, Tuple +from typing import Callable, Dict, Tuple, Union from e3sm_diags.derivations.formulas import ( albedo, @@ -50,24 +50,33 @@ tref_range, w_convert_q, ) +from e3sm_diags.derivations.formulas_cosp import ( + cosp_bin_sum, + cosp_histogram_standardize, +) from e3sm_diags.derivations.utils import ( _apply_land_sea_mask, aplusb, convert_units, - cosp_bin_sum, - cosp_histogram_standardize, rename, ) # A type annotation ordered dictionary that maps a tuple of source variable(s) # to a derivation function. -DerivedVariableMap = OrderedDict[Tuple[str, ...], Callable] +DerivedVariableMap = Union[ + OrderedDict[Tuple[str, ...], Callable], Dict[Tuple[str, ...], Callable] +] # A type annotation for a dictionary mapping the key of a derived variable # to an ordered dictionary that maps a tuple of source variable(s) to a # derivation function. DerivedVariablesMap = Dict[str, DerivedVariableMap] +# A list of functions that need the target variable key as an argument to use +# for further operations. This variable is used in the Dataset class variable +# derivation method. +FUNC_NEEDS_TARGET_VAR = [cosp_bin_sum, cosp_histogram_standardize] + DERIVED_VARIABLES: DerivedVariablesMap = { "PRECT": OrderedDict( @@ -581,242 +590,74 @@ ] ), # ISCCP - "CLDTOT_TAU1.3_ISCCP": OrderedDict( - [ - ( - ("FISCCP1_COSP",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 1.3, None), target_units="%" - ), - ), - ( - ("CLISCCP",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 1.3, None), target_units="%" - ), - ), - ] - ), - "CLDTOT_TAU1.3_9.4_ISCCP": OrderedDict( - [ - ( - ("FISCCP1_COSP",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 1.3, 9.4), target_units="%" - ), - ), - ( - ("CLISCCP",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 1.3, 9.4), target_units="%" - ), - ), - ] - ), - "CLDTOT_TAU9.4_ISCCP": OrderedDict( - [ - ( - ("FISCCP1_COSP",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 9.4, None), target_units="%" - ), - ), - ( - ("CLISCCP",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 9.4, None), target_units="%" - ), - ), - ] - ), + "CLDTOT_TAU1.3_ISCCP": { + ("FISCCP1_COSP",): cosp_bin_sum, + ("CLISCCP",): cosp_bin_sum, + }, + "CLDTOT_TAU1.3_9.4_ISCCP": { + ("FISCCP1_COSP",): cosp_bin_sum, + ("CLISCCP",): cosp_bin_sum, + }, + "CLDTOT_TAU9.4_ISCCP": { + ("FISCCP1_COSP",): cosp_bin_sum, + ("CLISCCP",): cosp_bin_sum, + }, # MODIS - "CLDTOT_TAU1.3_MODIS": OrderedDict( - [ - ( - ("CLMODIS",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 1.3, None), target_units="%" - ), - ), - ] - ), - "CLDTOT_TAU1.3_9.4_MODIS": OrderedDict( - [ - ( - ("CLMODIS",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 1.3, 9.4), target_units="%" - ), - ), - ] - ), - "CLDTOT_TAU9.4_MODIS": OrderedDict( - [ - ( - ("CLMODIS",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 9.4, None), target_units="%" - ), - ), - ] - ), - "CLDHGH_TAU1.3_MODIS": OrderedDict( - [ - ( - ("CLMODIS",), - lambda cld: convert_units( - cosp_bin_sum(cld, 440, 0, 1.3, None), target_units="%" - ), - ), - ] - ), - "CLDHGH_TAU1.3_9.4_MODIS": OrderedDict( - [ - ( - ("CLMODIS",), - lambda cld: convert_units( - cosp_bin_sum(cld, 440, 0, 1.3, 9.4), target_units="%" - ), - ), - ] - ), - "CLDHGH_TAU9.4_MODIS": OrderedDict( - [ - ( - ("CLMODIS",), - lambda cld: convert_units( - cosp_bin_sum(cld, 440, 0, 9.4, None), target_units="%" - ), - ), - ] - ), + "CLDTOT_TAU1.3_MODIS": { + ("CLMODIS",): cosp_bin_sum, + }, + "CLDTOT_TAU1.3_9.4_MODIS": { + ("CLMODIS",): cosp_bin_sum, + }, + "CLDTOT_TAU9.4_MODIS": { + ("CLMODIS",): cosp_bin_sum, + }, + "CLDHGH_TAU1.3_MODIS": { + ("CLMODIS",): cosp_bin_sum, + }, + "CLDHGH_TAU1.3_9.4_MODIS": { + ("CLMODIS",): cosp_bin_sum, + }, + "CLDHGH_TAU9.4_MODIS": { + ("CLMODIS",): cosp_bin_sum, + }, # MISR - "CLDTOT_TAU1.3_MISR": OrderedDict( - [ - ( - ("CLD_MISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 1.3, None), target_units="%" - ), - ), - ( - ("CLMISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 1.3, None), target_units="%" - ), - ), - ] - ), - "CLDTOT_TAU1.3_9.4_MISR": OrderedDict( - [ - ( - ("CLD_MISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 1.3, 9.4), target_units="%" - ), - ), - ( - ("CLMISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 1.3, 9.4), target_units="%" - ), - ), - ] - ), - "CLDTOT_TAU9.4_MISR": OrderedDict( - [ - ( - ("CLD_MISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 9.4, None), target_units="%" - ), - ), - ( - ("CLMISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, None, None, 9.4, None), target_units="%" - ), - ), - ] - ), - "CLDLOW_TAU1.3_MISR": OrderedDict( - [ - ( - ("CLD_MISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, 0, 3, 1.3, None), target_units="%" - ), - ), - ( - ("CLMISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, 0, 3, 1.3, None), target_units="%" - ), - ), - ] - ), - "CLDLOW_TAU1.3_9.4_MISR": OrderedDict( - [ - ( - ("CLD_MISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, 0, 3, 1.3, 9.4), target_units="%" - ), - ), - ( - ("CLMISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, 0, 3, 1.3, 9.4), target_units="%" - ), - ), - ] - ), - "CLDLOW_TAU9.4_MISR": OrderedDict( - [ - ( - ("CLD_MISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, 0, 3, 9.4, None), target_units="%" - ), - ), - ( - ("CLMISR",), - lambda cld: convert_units( - cosp_bin_sum(cld, 0, 3, 9.4, None), target_units="%" - ), - ), - ] - ), + "CLDTOT_TAU1.3_MISR": { + ("CLD_MISR",): cosp_bin_sum, + ("CLMISR",): cosp_bin_sum, + }, + "CLDTOT_TAU1.3_9.4_MISR": { + ("CLD_MISR",): cosp_bin_sum, + ("CLMISR",): cosp_bin_sum, + }, + "CLDTOT_TAU9.4_MISR": { + ("CLD_MISR",): cosp_bin_sum, + ("CLMISR",): cosp_bin_sum, + }, + "CLDLOW_TAU1.3_MISR": { + ("CLD_MISR",): cosp_bin_sum, + ("CLMISR",): cosp_bin_sum, + }, + "CLDLOW_TAU1.3_9.4_MISR": { + ("CLD_MISR",): cosp_bin_sum, + ("CLMISR",): cosp_bin_sum, + }, + "CLDLOW_TAU9.4_MISR": { + ("CLD_MISR",): cosp_bin_sum, + ("CLMISR",): cosp_bin_sum, + }, # COSP cloud fraction joint histogram - "COSP_HISTOGRAM_MISR": OrderedDict( - [ - ( - ("CLD_MISR",), - lambda cld: cosp_histogram_standardize(rename(cld)), - ), - (("CLMISR",), lambda cld: cosp_histogram_standardize(rename(cld))), - ] - ), - "COSP_HISTOGRAM_MODIS": OrderedDict( - [ - ( - ("CLMODIS",), - lambda cld: cosp_histogram_standardize(rename(cld)), - ), - ] - ), - "COSP_HISTOGRAM_ISCCP": OrderedDict( - [ - ( - ("FISCCP1_COSP",), - lambda cld: cosp_histogram_standardize(rename(cld)), - ), - ( - ("CLISCCP",), - lambda cld: cosp_histogram_standardize(rename(cld)), - ), - ] - ), + "COSP_HISTOGRAM_MISR": { + ("CLD_MISR",): cosp_histogram_standardize, + ("CLMISR",): cosp_histogram_standardize, + }, + "COSP_HISTOGRAM_MODIS": { + ("CLMODIS",): cosp_histogram_standardize, + }, + "COSP_HISTOGRAM_ISCCP": { + ("FISCCP1_COSP",): cosp_histogram_standardize, + ("CLISCCP",): cosp_histogram_standardize, + }, "ICEFRAC": OrderedDict( [ ( diff --git a/e3sm_diags/derivations/formulas_cosp.py b/e3sm_diags/derivations/formulas_cosp.py new file mode 100644 index 0000000000..3ee6fe9d15 --- /dev/null +++ b/e3sm_diags/derivations/formulas_cosp.py @@ -0,0 +1,367 @@ +from typing import Dict, List, Literal, Optional, Tuple, TypedDict + +import xarray as xr + +from e3sm_diags.derivations.formulas import convert_units + +# A dictionary storing attributes for "prs" and "tau" cloud axes. +# 1. 'keys': a list of valid variable keys found in the dataset, used for +# dynamic mapping. +# 2. 'min_mask': the minimum value to standardize the axis on for histogram +# plotting. +CloudAxis = Literal["prs", "tau"] +CloudHistMapAttrs = TypedDict( + "CloudHistMapAttrs", {"keys": List[str], "min_mask": float} +) +CLOUD_HIST_MAP: Dict[CloudAxis, CloudHistMapAttrs] = { + "prs": { + "keys": ["cosp_prs", "cosp_htmisr", "modis_prs", "misr_cth", "isccp_prs"], + "min_mask": 0.0, + }, + "tau": { + "keys": ["cosp_tau", "cosp_tau_modis", "modis_tau", "misr_tau", "isccp_tau"], + "min_mask": 0.3, + }, +} + +# A dictionary mapping the target variable key to the "prs" and "tau" axes +# adjustment ranges. If either value in the (min, max) tuple is None, then +# the actual value from the axis is used instead. +AdjRange = Tuple[Optional[float], Optional[float]] +CLOUD_BIN_SUM_MAP: Dict[str, Dict[str, AdjRange]] = { + # ISSCCP + "CLDTOT_TAU1.3_ISCCP": {"prs": (None, None), "tau": (1.3, None)}, + "CLDTOT_TAU1.3_9.4_ISCCP": {"prs": (None, None), "tau": (1.3, 9.4)}, + "CLDTOT_TAU9.4_ISCCP": {"prs": (None, None), "tau": (9.4, None)}, + # MODIS + "CLDTOT_TAU1.3_MODIS": {"prs": (None, None), "tau": (1.3, None)}, + "CLDTOT_TAU1.3_9.4_MODIS": {"prs": (None, None), "tau": (1.3, 9.4)}, + "CLDTOT_TAU9.4_MODIS": {"prs": (None, None), "tau": (9.4, None)}, + "CLDHGH_TAU1.3_MODIS": {"prs": (440, 0), "tau": (1.3, None)}, + "CLDHGH_TAU1.3_9.4_MODIS": {"prs": (440, 0), "tau": (1.3, 9.4)}, + "CLDHGH_TAU9.4_MODIS": {"prs": (440, 0), "tau": (9.4, None)}, + # MISR + "CLDTOT_TAU1.3_MISR": {"prs": (None, None), "tau": (1.3, None)}, + "CLDTOT_TAU1.3_9.4_MISR": {"prs": (None, None), "tau": (1.3, 9.4)}, + "CLDTOT_TAU9.4_MISR": {"prs": (None, None), "tau": (9.4, None)}, + "CLDLOW_TAU1.3_MISR": {"prs": (0, 3), "tau": (1.3, None)}, + "CLDLOW_TAU1.3_9.4_MISR": {"prs": (0, 3), "tau": (1.3, 9.4)}, + "CLDLOW_TAU9.4_MISR": {"prs": (0, 3), "tau": (9.4, None)}, +} + +# A dictionary mapping the names of "prs" axes to the unit adjustment value. +# - COSP v2 "cosp_pr" is in units "Pa" instead of "hPa" (v1). +# - COSP v2 "cosp_htmisr" is in units "m" instead of "km" (v1). +PRS_UNIT_ADJ_MAP = {"cosp_prs": 100, "cosp_htmisr": 1000} + + +def cosp_histogram_standardize(target_var_key: str, var: xr.DataArray) -> xr.DataArray: + """Standardize cloud top pressure and cloud thickness bins. + + This standardization makes the cloud variable data suitable for plotting. + + Parameters + ---------- + target_var_key : str + The target variable key (e.g,. "CLDTOT_TAU1.3_ISCCP"). + var : xr.DataArray + The variable in the dataset used for deriving the target variable + (e.g., "CLD_MODIS"). + + Returns + ------- + xr.DataArray + The target variable, which is the standardized version of the derived + variable (``var``). + """ + prs = _get_cloud_axis(var, "prs") + tau = _get_cloud_axis(var, "tau") + + # Mask on the min and max of prs and/or tau, then subset by dropping masked + # values. + var_std = _subset_cloud_var(var.copy(), prs, tau) + var_std.name = target_var_key + + return var_std + + +def cosp_bin_sum(target_var_key: str, var: xr.DataArray) -> xr.DataArray: + """Get the cosp bin sum for the derived variable. + + Parameters + ---------- + target_var_key : str + The target variable key (e.g,. "CLDTOT_TAU1.3_ISCCP"). + var : xr.DataArray + The variable in the dataset used for deriving the target variable + (e.g., "CLD_MODIS"). + + Returns + ------- + xr.DataArray + The target variable, which is the cosp bin sum of the derived variable, + ``var``. + """ + # 1. Get cloud axes + prs = _get_cloud_axis(var, "prs") + tau = _get_cloud_axis(var, "tau") + + # 2. Get the prs and tau axis adjustment ranges if they are set. + prs_adj_range = CLOUD_BIN_SUM_MAP[target_var_key]["prs"] + tau_adj_range = CLOUD_BIN_SUM_MAP[target_var_key]["tau"] + + # 3. Get prs range, lim. + prs_range = _get_prs_subset_range(prs, prs_adj_range) + + # 4. Get tau range and lim. + tau_range, tau_lim = _get_tau_subset_range_and_str(tau, tau_adj_range) + + # 4. Get the axes mask conditional and subset the variable on it. + cond = ( + (prs >= prs_range[0]) + & (prs <= prs_range[-1]) + & (tau >= tau_range[0]) + & (tau <= tau_range[-1]) + ) + var_sub = var.where(cond, drop=True) + + # 5. Sum on axis=0 and axis=1 (tau and prs) + var_sum = var_sub.sum(dim=[prs.name, tau.name]) + + # 6. Set the variable's long name based on the original variable's name and + # prs ranges. + var_key = str(var.name) + simulation = _get_simulation_str(var_key) + prs_cloud_level = _get_prs_cloud_level_str(var_key, prs_range, prs_adj_range) + + if simulation is not None and prs_cloud_level is not None: + var_sum.attrs["long_name"] = f"{simulation}: {prs_cloud_level} with {tau_lim}" + + # 7. Convert units to %. + final_var = convert_units(var_sum, "%") + + return final_var + + +def _get_cloud_axis(var: xr.DataArray, axis: CloudAxis) -> xr.DataArray: + da_axis = None + + keys = CLOUD_HIST_MAP[axis]["keys"] + for key in keys: + if key in var.dims: + da_axis = var[key] + + break + + if da_axis is None: + raise KeyError( + f"The {axis!r} axis is not in the {var.name!r} to " + "standardize the cosp histogram." + ) + + return da_axis + + +def _subset_cloud_var( + var: xr.DataArray, prs: xr.DataArray, tau: xr.DataArray +) -> xr.DataArray: + prs_min = CLOUD_HIST_MAP["prs"]["min_mask"] + prs_max = prs[-1].item() + + tau_min = CLOUD_HIST_MAP["tau"]["min_mask"] + tau_max = tau[-1].item() + + # MISR model and MISR obs + if var.name in ["CLD_MISR", "CLMISR"]: + # COSP v2 cosp_htmisr in units m instead of km as in v1 and COSP v2 + # cosp_htmisr[0] equals to 0 instead of -99 as in v1 so the cloud + # varable needs to be masked manually by slicing out the first index + # on the cosp_htmisr axis. + if var.name == "CLD_MISR" and prs.max().item() > 1000: + var = var.isel({prs.name: slice(1, None)}) + prs_max = 1000.0 * prs_max + + cond = (prs >= prs_min) & (prs <= prs_max) & (tau >= tau_min) & (tau <= tau_max) + # ISCCP model, # ISCCP obs, and MODIS + elif var.name in ["FISCCP1_COSP", "CLISCCP", "CLMODIS"]: + cond = (tau >= tau_min) & (tau <= tau_max) + + var_sub = var.where(cond, drop=True) + + return var_sub + + +def _get_prs_subset_range( + prs: xr.DataArray, prs_adj_range: AdjRange +) -> Tuple[float, float]: + """Get the pr axis subset range. + + This function loops over the min and max values for the actual and adjusted + pr range. If the adjusted range is not None, then use that as the range + value. Otherwise use the actual value from the prs axis. + + Adjust the units if the axis key is in ``PR_NAMES_TO_ADJUST_UNITS``. + + Parameters + ---------- + prs : xr.DataArray + The prs axis. + prs_adj_range : AdjRange + The prs axis adjustment range. + + Returns + ------- + Tuple[float, float] + A tuple of the (min, max) for the prs subset range. + """ + act_range = (prs[0].item(), prs[-1].item()) + final_range: List[float] = [] + + for act_val, adj_val in zip(act_range, prs_adj_range): + if adj_val is not None: + if prs.name in PRS_UNIT_ADJ_MAP.keys() and prs.max().item() > 1000: + adj_val = adj_val * PRS_UNIT_ADJ_MAP[str(prs.name)] + + final_range.append(adj_val) + else: + final_range.append(act_val) + + return tuple(final_range) # type: ignore + + +def _get_tau_subset_range_and_str( + tau: xr.DataArray, tau_adj_range: AdjRange +) -> Tuple[Tuple[float, float], str]: + """Get the tau range for subsetting and the tau range string. + + Parameters + ---------- + tau : xr.DataArray + The tau axis. + tau_adj_range : AdjRange + The tau axis adjustment range. + + Returns + ------- + Tuple[Tuple[float, float], str] + A tuple consisting of the tau range (min, max) and the tau range string. + """ + # The adjusted min and max values to use if either if them are None. + adj_min, adj_max = tau_adj_range + + # 1. Adjust min. + if adj_min is not None and adj_max is None: + act_max = tau[-1].item() + + range = (adj_min, act_max) + range_str = f"tau > {adj_min}" + # 2. Adjust min and max. + elif adj_min is not None and adj_max is not None: + range = (adj_min, adj_max) + range_str = f"{adj_min} < tau < {adj_max}" + + return range, range_str + + +def _get_simulation_str(var_key: str) -> Optional[str]: + """Get the prs simulation string. + + Parameters + ---------- + var_key : str + The key of the variable in the dataset. + + Returns + ------- + Optional[str] + The optional prs simulation string. + """ + sim = None + + # NOTE: This does not cover all variable keys. + if var_key == "FISCCP1_COSP": + sim = "ISCCP" + elif var_key == "CLMODIS": + sim = "MODIS" + elif var_key == "CLD_MISR": + sim = "MISR" + + return sim + + +def _get_prs_cloud_level_str( + var_key: str, + prs_act_range: Tuple[float, float], + prs_adj_range: AdjRange, +) -> Optional[str]: + """Get the prs cloud level and simulation type. + + Parameters + ---------- + var_key: str + The key of the variable in the dataset. + prs_act_range : Tuple[float, float] + The prs actual range. + prs_adj_range : AdjRange + The prs adjusted range. + + Returns + ------- + Optional[str] + The optional cloud level string. + """ + adj_min, adj_max = prs_adj_range + cloud_level = None + + if adj_min is None and adj_max is None: + cloud_level = "total cloud fraction" + + # NOTE: This does not cover all variable keys, including "FISCCP1_COSP". + if var_key == "CLMODIS": + cloud_level = _get_cloud_level( + prs_act_range, low_bnds=(440, 44000), high_bnds=(680, 68000) + ) + elif var_key == "CLD_MISR": + cloud_level = _get_cloud_level( + prs_act_range, low_bnds=(7, 7000), high_bnds=(3, 3000) + ) + + return cloud_level + + +def _get_cloud_level( + prs_act_range: Tuple[float, float], + low_bnds: Tuple[float, float], + high_bnds: Tuple[float, float], +) -> str: + """Get the cloud type based on prs values and the specified boundaries + + Thresholds for cloud levels: + 1. cloud top height: high cloud (<440hPa or > 7km) + 2. mid-level cloud (440-680hPa, 3-7 km) + 3. low clouds (>680hPa, < 3km) + + Parameters + ---------- + prs_act_range : Tuple[float, float] + The prs actual range. + low_bnds : Tuple[float, float] + The low bounds. + high_bnds : Tuple[float, float] + The high bounds. + + Returns + ------- + str + The cloud level string. + """ + min, max = prs_act_range + + if min in low_bnds and max in high_bnds: + return "middle cloud fraction" + elif min in low_bnds: + return "high cloud fraction" + elif max in high_bnds: + return "low cloud fraction" + + return "total cloud fraction" diff --git a/e3sm_diags/derivations/utils.py b/e3sm_diags/derivations/utils.py index b790417183..34a74c5ef1 100644 --- a/e3sm_diags/derivations/utils.py +++ b/e3sm_diags/derivations/utils.py @@ -2,17 +2,9 @@ This module defines general utilities for deriving variables, including unit conversion functions, renaming variables, etc. """ -from typing import TYPE_CHECKING, Optional, Tuple - -import MV2 -import numpy as np import xarray as xr from genutil import udunits -if TYPE_CHECKING: - from cdms2.axis import FileAxis - from cdms2.fvariable import FileVariable - def rename(new_name: str): """Given the new name, just return it.""" @@ -88,6 +80,7 @@ def convert_units(var: xr.DataArray, target_units: str): # noqa: C901 elif var.attrs["units"] in ["gN/m^2/day", "gP/m^2/day", "gC/m^2/day"]: pass else: + # FIXME: Replace genutil.udunits module. temp = udunits(1.0, var.attrs["units"]) coeff, offset = temp.how(target_units) @@ -124,186 +117,3 @@ def _apply_land_sea_mask( masked_var = var.where(cond=cond, drop=False) return masked_var - - -def adjust_prs_val_units( - prs: "FileAxis", prs_val: float, prs_val0: Optional[float] -) -> float: - """Adjust the prs_val units based on the prs.id""" - # FIXME: Refactor this function to operate on xr.Dataset/xr.DataArray. - # COSP v2 cosp_pr in units Pa instead of hPa as in v1 - # COSP v2 cosp_htmisr in units m instead of km as in v1 - adjust_ids = {"cosp_prs": 100, "cosp_htmisr": 1000} - - if prs_val0: - prs_val = prs_val0 - if prs.id in adjust_ids.keys() and max(prs.getData()) > 1000: - prs_val = prs_val * adjust_ids[prs.id] - - return prs_val - - -def determine_cloud_level( - prs_low: float, - prs_high: float, - low_bnds: Tuple[int, int], - high_bnds: Tuple[int, int], -) -> str: - """Determines the cloud type based on prs values and the specified boundaries""" - # Threshold for cloud top height: high cloud (<440hPa or > 7km), midlevel cloud (440-680hPa, 3-7 km) and low clouds (>680hPa, < 3km) - if prs_low in low_bnds and prs_high in high_bnds: - return "middle cloud fraction" - elif prs_low in low_bnds: - return "high cloud fraction" - elif prs_high in high_bnds: - return "low cloud fraction" - else: - return "total cloud fraction" - - -def cosp_bin_sum( - cld: "FileVariable", - prs_low0: Optional[float], - prs_high0: Optional[float], - tau_low0: Optional[float], - tau_high0: Optional[float], -): - # FIXME: Refactor this function to operate on xr.Dataset/xr.DataArray. - """sum of cosp bins to calculate cloud fraction in specified cloud top pressure / height and - cloud thickness bins, input variable has dimension (cosp_prs,cosp_tau,lat,lon)/(cosp_ht,cosp_tau,lat,lon) - """ - prs: FileAxis = cld.getAxis(0) - tau: FileAxis = cld.getAxis(1) - - prs_low: float = adjust_prs_val_units(prs, prs[0], prs_low0) - prs_high: float = adjust_prs_val_units(prs, prs[-1], prs_high0) - - if prs_low0 is None and prs_high0 is None: - prs_lim = "total cloud fraction" - - tau_high, tau_low, tau_lim = determine_tau(tau, tau_low0, tau_high0) - - if cld.id == "FISCCP1_COSP": # ISCCP model - cld_bin = cld(cosp_prs=(prs_low, prs_high), cosp_tau=(tau_low, tau_high)) - simulator = "ISCCP" - if cld.id == "CLISCCP": # ISCCP obs - cld_bin = cld(isccp_prs=(prs_low, prs_high), isccp_tau=(tau_low, tau_high)) - - if cld.id == "CLMODIS": # MODIS - prs_lim = determine_cloud_level(prs_low, prs_high, (440, 44000), (680, 68000)) - simulator = "MODIS" - - if prs.id == "cosp_prs": # Model - cld_bin = cld( - cosp_prs=(prs_low, prs_high), cosp_tau_modis=(tau_low, tau_high) - ) - elif prs.id == "modis_prs": # Obs - cld_bin = cld(modis_prs=(prs_low, prs_high), modis_tau=(tau_low, tau_high)) - - if cld.id == "CLD_MISR": # MISR model - if max(prs) > 1000: # COSP v2 cosp_htmisr in units m instead of km as in v1 - cld = cld[ - 1:, :, :, : - ] # COSP v2 cosp_htmisr[0] equals to 0 instead of -99 as in v1, therefore cld needs to be masked manually - cld_bin = cld(cosp_htmisr=(prs_low, prs_high), cosp_tau=(tau_low, tau_high)) - prs_lim = determine_cloud_level(prs_low, prs_high, (7, 7000), (3, 3000)) - simulator = "MISR" - if cld.id == "CLMISR": # MISR obs - cld_bin = cld(misr_cth=(prs_low, prs_high), misr_tau=(tau_low, tau_high)) - - cld_bin_sum = MV2.sum(MV2.sum(cld_bin, axis=1), axis=0) - - try: - cld_bin_sum.long_name = simulator + ": " + prs_lim + " with " + tau_lim - # cld_bin_sum.long_name = "{}: {} with {}".format(simulator, prs_lim, tau_lim) - except BaseException: - pass - return cld_bin_sum - - -def determine_tau( - tau: "FileAxis", tau_low0: Optional[float], tau_high0: Optional[float] -): - # FIXME: Refactor this function to operate on xr.Dataset/xr.DataArray. - tau_low = tau[0] - tau_high = tau[-1] - - if tau_low0 is None and tau_high0: - tau_high = tau_high0 - tau_lim = "tau <" + str(tau_high0) - elif tau_high0 is None and tau_low0: - tau_low = tau_low0 - tau_lim = "tau >" + str(tau_low0) - elif tau_low0 is None and tau_high0 is None: - tau_lim = str(tau_low) + "< tau < " + str(tau_high) - else: - tau_low = tau_low0 - tau_high = tau_high0 - tau_lim = str(tau_low) + "< tau < " + str(tau_high) - - return tau_high, tau_low, tau_lim - - -def cosp_histogram_standardize(cld: "FileVariable"): - # TODO: Refactor this function to operate on xr.Dataset/xr.DataArray. - """standarize cloud top pressure and cloud thickness bins to dimensions that - suitable for plotting, input variable has dimention (cosp_prs,cosp_tau)""" - prs = cld.getAxis(0) - tau = cld.getAxis(1) - - prs[0] - prs_high = prs[-1] - tau[0] - tau_high = tau[-1] - - prs_bounds = getattr(prs, "bounds") - if prs_bounds is None: - cloud_prs_bounds = np.array( - [ - [1000.0, 800.0], - [800.0, 680.0], - [680.0, 560.0], - [560.0, 440.0], - [440.0, 310.0], - [310.0, 180.0], - [180.0, 0.0], - ] - ) # length 7 - prs.setBounds(np.array(cloud_prs_bounds, dtype=np.float32)) - - tau_bounds = getattr(tau, "bounds") - if tau_bounds is None: - cloud_tau_bounds = np.array( - [ - [0.3, 1.3], - [1.3, 3.6], - [3.6, 9.4], - [9.4, 23], - [23, 60], - [60, 379], - ] - ) # length 6 - tau.setBounds(np.array(cloud_tau_bounds, dtype=np.float32)) - - if cld.id == "FISCCP1_COSP": # ISCCP model - cld_hist = cld(cosp_tau=(0.3, tau_high)) - if cld.id == "CLISCCP": # ISCCP obs - cld_hist = cld(isccp_tau=(0.3, tau_high)) - - if cld.id == "CLMODIS": # MODIS - try: - cld_hist = cld(cosp_tau_modis=(0.3, tau_high)) # MODIS model - except BaseException: - cld_hist = cld(modis_tau=(0.3, tau_high)) # MODIS obs - - if cld.id == "CLD_MISR": # MISR model - if max(prs) > 1000: # COSP v2 cosp_htmisr in units m instead of km as in v1 - cld = cld[ - 1:, :, :, : - ] # COSP v2 cosp_htmisr[0] equals to 0 instead of -99 as in v1, therefore cld needs to be masked manually - prs_high = 1000.0 * prs_high - cld_hist = cld(cosp_tau=(0.3, tau_high), cosp_htmisr=(0, prs_high)) - if cld.id == "CLMISR": # MISR obs - cld_hist = cld(misr_tau=(0.3, tau_high), misr_cth=(0, prs_high)) - - return cld_hist diff --git a/e3sm_diags/driver/cosp_histogram_driver.py b/e3sm_diags/driver/cosp_histogram_driver.py index 5018ad3899..c319763012 100755 --- a/e3sm_diags/driver/cosp_histogram_driver.py +++ b/e3sm_diags/driver/cosp_histogram_driver.py @@ -1,15 +1,15 @@ from __future__ import annotations -import os from typing import TYPE_CHECKING -import cdms2 +import xarray as xr -import e3sm_diags -from e3sm_diags.driver import utils +from e3sm_diags.driver.utils.dataset_xr import Dataset +from e3sm_diags.driver.utils.io import _save_data_metrics_and_plots +from e3sm_diags.driver.utils.regrid import _subset_on_region from e3sm_diags.logger import custom_logger -from e3sm_diags.metrics import corr, max_cdms, mean, min_cdms, rmse -from e3sm_diags.plot import plot +from e3sm_diags.metrics.metrics import spatial_avg +from e3sm_diags.plot.cosp_histogram_plot import plot as plot_func if TYPE_CHECKING: from e3sm_diags.parameter.core_parameter import CoreParameter @@ -17,114 +17,129 @@ logger = custom_logger(__name__) -def create_metrics(ref, test, ref_regrid, test_regrid, diff): - """Creates the mean, max, min, rmse, corr in a dictionary""" - metrics_dict = {} - metrics_dict["ref"] = { - "min": min_cdms(ref), - "max": max_cdms(ref), - "mean": mean(ref), - } - metrics_dict["test"] = { - "min": min_cdms(test), - "max": max_cdms(test), - "mean": mean(test), - } - - metrics_dict["diff"] = { - "min": min_cdms(diff), - "max": max_cdms(diff), - "mean": mean(diff), - } - metrics_dict["misc"] = { - "rmse": rmse(test_regrid, ref_regrid), - "corr": corr(test_regrid, ref_regrid), - } - - return metrics_dict +def run_diag(parameter: CoreParameter) -> CoreParameter: + """Get metrics for the cosp_histogram diagnostic set. + This function loops over each variable, season, pressure level, and region. -def run_diag(parameter: CoreParameter) -> CoreParameter: + It subsets the test and reference variables on the selected region, then + calculates the spatial average for both variables. The difference between + the test and reference spatial averages is calculated. Afterwards, the + spatial averages for the test, ref, and differences are plotted. + + Parameters + ---------- + parameter : CoreParameter + The parameter for the diagnostic. + + Returns + ------- + CoreParameter + The parameter for the diagnostic with the result (completed or failed). + """ variables = parameter.variables seasons = parameter.seasons ref_name = getattr(parameter, "ref_name", "") regions = parameter.regions - test_data = utils.dataset.Dataset(parameter, test=True) - ref_data = utils.dataset.Dataset(parameter, ref=True) - - for season in seasons: - # Get the name of the data, appended with the years averaged. - parameter.test_name_yrs = utils.general.get_name_and_yrs( - parameter, test_data, season - ) - parameter.ref_name_yrs = utils.general.get_name_and_yrs( - parameter, ref_data, season - ) - - # Get land/ocean fraction for masking. - try: - land_frac = test_data.get_climo_variable("LANDFRAC", season) - ocean_frac = test_data.get_climo_variable("OCNFRAC", season) - except Exception: - mask_path = os.path.join( - e3sm_diags.INSTALL_PATH, "acme_ne30_ocean_land_mask.nc" - ) - with cdms2.open(mask_path) as f: - land_frac = f("LANDFRAC") - ocean_frac = f("OCNFRAC") - - for var in variables: - logger.info("Variable: {}".format(var)) - parameter.var_id = var - - mv1 = test_data.get_climo_variable(var, season) - mv2 = ref_data.get_climo_variable(var, season) - - parameter.viewer_descr[var] = ( - mv1.long_name - if hasattr(mv1, "long_name") - else "No long_name attr in test data." - ) + test_ds = Dataset(parameter, data_type="test") + ref_ds = Dataset(parameter, data_type="ref") - for region in regions: - logger.info("Selected region: {}".format(region)) + for var_key in variables: + logger.info("Variable: {}".format(var_key)) + parameter.var_id = var_key - mv1_domain = utils.general.select_region( - region, mv1, land_frac, ocean_frac, parameter - ) - mv2_domain = utils.general.select_region( - region, mv2, land_frac, ocean_frac, parameter - ) + for season in seasons: + parameter._set_name_yrs_attrs(test_ds, ref_ds, season) - parameter.output_file = "-".join([ref_name, var, season, region]) - parameter.main_title = str(" ".join([var, season, region])) + ds_test = test_ds.get_climo_dataset(var_key, season) + ds_ref = ref_ds.get_ref_climo_dataset(var_key, season, ds_test) - mv1_domain_mean = mean(mv1_domain) - mv2_domain_mean = mean(mv2_domain) - diff = mv1_domain_mean - mv2_domain_mean + for region in regions: + logger.info("Selected region: {}".format(region)) - mv1_domain_mean.id = var - mv2_domain_mean.id = var - diff.id = var + ds_test_region = _subset_on_region(ds_test, var_key, region) + ds_ref_region = _subset_on_region(ds_ref, var_key, region) - parameter.backend = ( - "mpl" # For now, there's no vcs support for this set. + parameter._set_param_output_attrs( + var_key, season, region, ref_name, ilev=None ) - plot( - parameter.current_set, - mv2_domain_mean, - mv1_domain_mean, - diff, - {}, - parameter, + + # Make a copy of the regional datasets to overwrite the existing + # variable with its spatial average. + ds_test_avg = ds_test.copy() + ds_ref_avg = ds_test.copy() + ds_test_avg[var_key] = spatial_avg( + ds_test_region, var_key, as_list=False ) - utils.general.save_ncfiles( - parameter.current_set, - mv1_domain_mean, - mv2_domain_mean, - diff, + ds_ref_avg[var_key] = spatial_avg(ds_ref_region, var_key, as_list=False) + + # The dimension names of both variables must be aligned to + # perform arithmetic with Xarray. Sometimes the dimension names + # might differ based on the derived variable (e.g., + # "cosp_htmisr" vs. "misr_cth"). + ds_test_avg[var_key] = _align_test_to_ref_dims( + ds_test_avg[var_key], ds_ref_avg[var_key] + ) + ds_diff_avg = _get_diff_of_avg(var_key, ds_test_avg, ds_ref_avg) + + _save_data_metrics_and_plots( parameter, + plot_func, + var_key, + ds_test_avg, + ds_ref_avg, + ds_diff_avg, + metrics_dict=None, ) return parameter + + +def _get_diff_of_avg( + var_key: str, ds_test_avg: xr.Dataset, ds_ref_avg: xr.Dataset +) -> xr.Dataset: + # Use the test dataset as the base dataset to subtract the reference dataset + # from. + ds_diff_avg = ds_test_avg.copy() + + # There are case where the axes of the test and ref datasets aren't in the + # same units. We avoid label-based Xarray arithmetic which expect coordinates + # with the same units and will produce np.nan results by subtracting. + # Instead, we subtract using the reference xr.DataArray's `np.array` + # (`.values`). + ds_diff_avg[var_key] = ds_diff_avg[var_key] - ds_ref_avg[var_key].values + + return ds_diff_avg + + +def _align_test_to_ref_dims( + da_test: xr.DataArray, da_ref: xr.DataArray +) -> xr.DataArray: + """Align the dimensions of the test data to the ref data. + + This is useful for situations where label-based arithmetic needs to be + performed using Xarray. + + Parameters + ---------- + da_test : xr.DataArray + The test dataarray. + da_ref : xr.DataArray + The ref dataarray. + + Returns + ------- + xr.DataArray + The test dataarray with dimensions aligned to the ref dataarray. + """ + da_test_new = da_test.copy() + + # NOTE: This logic assumes that prs and tau are in the same order for + # the test and ref variables. If they are not, then this will break or + # perform incorrect arithmetic. + da_test_new = da_test_new.rename( + {dim1: dim2 for dim1, dim2 in zip(da_test.dims, da_ref.dims)} + ) + + return da_test_new diff --git a/e3sm_diags/driver/utils/dataset_xr.py b/e3sm_diags/driver/utils/dataset_xr.py index e77ad938a2..f3706701d8 100644 --- a/e3sm_diags/driver/utils/dataset_xr.py +++ b/e3sm_diags/driver/utils/dataset_xr.py @@ -23,6 +23,7 @@ from e3sm_diags.derivations.derivations import ( DERIVED_VARIABLES, + FUNC_NEEDS_TARGET_VAR, DerivedVariableMap, DerivedVariablesMap, ) @@ -251,7 +252,7 @@ def _get_global_attr_from_climo_dataset( """ filepath = self._get_climo_filepath(season) - ds = xr.open_dataset(filepath) + ds = self._open_climo_dataset(filepath) attr_val = ds.attrs.get(attr) return attr_val @@ -382,7 +383,7 @@ def _get_climo_dataset(self, season: str) -> xr.Dataset: using other datasets. """ filepath = self._get_climo_filepath(season) - ds = xr.open_dataset(filepath, use_cftime=True) + ds = self._open_climo_dataset(filepath) if self.var in ds.variables: pass @@ -398,6 +399,64 @@ def _get_climo_dataset(self, season: str) -> xr.Dataset: return ds + def _open_climo_dataset(self, filepath: str) -> xr.Dataset: + """Open a climatology dataset. + + Some climatology files have "time" as a scalar variable. If the scalar + variable is a single integer instead of a 1D array with a length + matching the equivalent dimension size, Xarray will `raise ValueError: + dimension 'time' already exists as a scalar variable`. For this case, + the "time" scalar variable is dropped when opening the dataset. + + If the scalar variable is dropped or climatology file only has a + "time" dimension without coordinates, new "time" coordinates will be + added to the dataset. + + Related issue: https://github.com/pydata/xarray/issues/1709 + + Parameters + ---------- + filepath : str + The path to a climatology dataset. + + Returns + ------- + xr.Dataset + The climatology dataset. + + Raises + ------ + ValueError + Raised for all ValueErrors other than "dimension 'time' already + exists as a scalar variable". + """ + # No need to decode times because the climatology is already calculated. + # Times only need to be decoded if climatology is being calculated + # (time series datasets). + args = {"path": filepath, "decode_times": False, "add_bounds": ["X", "Y"]} + time_coords = xr.DataArray( + name="time", + dims=["time"], + data=[0], + attrs={"axis": "T", "standard_name": "time"}, + ) + + try: + ds = xc.open_dataset(**args) + except ValueError as e: # pragma: no cover + # FIXME: Need to fix the test that covers this code block. + msg = str(e) + + if "dimension 'time' already exists as a scalar variable" in msg: + ds = xc.open_dataset(**args, drop_variables=["time"]) + else: + raise ValueError(msg) + + if "time" not in ds.coords: + ds["time"] = time_coords + + return ds + def _get_climo_filepath(self, season: str) -> str: """Return the path to the climatology file. @@ -571,23 +630,20 @@ def _get_dataset_with_derived_climo_var(self, ds: xr.Dataset) -> xr.Dataset: matching_target_var_map = self._get_matching_climo_src_vars( ds, target_var, target_var_map ) - # Since there's only one set of vars, we get the first and only set - # of vars from the derived variable dictionary. - src_var_keys = list(matching_target_var_map.keys())[0] - - # Get the source variable DataArrays and apply the derivation function. - # Example: - # [xr.DataArray(name="PRECC",...), xr.DataArray(name="PRECL",...)] - src_vars = [] - for var in src_var_keys: - src_vars.append(ds[var]) + # NOTE: Since there's only one set of vars, we get the first and only set + # of vars from the derived variable dictionary. + # 1. Get the derivation function. derivation_func = list(matching_target_var_map.values())[0] - derived_var: xr.DataArray = derivation_func(*src_vars) - # Add the derived variable to the final xr.Dataset object and return it. - ds_final = ds.copy() - ds_final[target_var] = derived_var + # 2. Get the derivation function arguments using source variable keys. + # Example: [xr.DataArray(name="PRECC",...), xr.DataArray(name="PRECL",...)] + src_var_keys = list(matching_target_var_map.keys())[0] + + # 3. Use the derivation function to derive the variable. + ds_final = self._get_dataset_with_derivation_func( + ds, derivation_func, src_var_keys, target_var + ) return ds_final @@ -720,26 +776,70 @@ def _get_dataset_with_derived_ts_var(self) -> xr.Dataset: matching_target_var_map = self._get_matching_time_series_src_vars( self.root_path, target_var_map ) - src_var_keys = list(matching_target_var_map.keys())[0] - # Unlike the climatology dataset, the source variables for - # time series data can be found in multiple datasets so a single - # xr.Dataset object is returned containing all of them. + # NOTE: Since there's only one set of vars, we get the first and only set + # of vars from the derived variable dictionary. + # 1. Get the derivation function. + derivation_func = list(matching_target_var_map.values())[0] + + # 2. Get the derivation function arguments using source variable keys. + # Example: [xr.DataArray(name="PRECC",...), xr.DataArray(name="PRECL",...)] + # Unlike the climatology dataset, the source variables for time series + # data can be found in multiple datasets so a single xr.Dataset object + # is returned containing all of them. + src_var_keys = list(matching_target_var_map.keys())[0] ds = self._get_dataset_with_source_vars(src_var_keys) - # Get the source variable DataArrays. - # Example: - # [xr.DataArray(name="PRECC",...), xr.DataArray(name="PRECL",...)] - src_vars = [ds[var] for var in src_var_keys] + # 3. Use the derivation function to derive the variable. + ds_final = self._get_dataset_with_derivation_func( + ds, derivation_func, src_var_keys, target_var + ) - # Using the source variables, apply the matching derivation function. - derivation_func = list(matching_target_var_map.values())[0] - derived_var: xr.DataArray = derivation_func(*src_vars) + return ds_final - # Add the derived variable to the final xr.Dataset object and return it. - ds[target_var] = derived_var + def _get_dataset_with_derivation_func( + self, + ds: xr.Dataset, + func: Callable, + src_var_keys: Tuple[str, ...], + target_var_key: str, + ) -> xr.Dataset: + """ + Get the dataset with the target variable using the derivation function + and source variables. - return ds + Parameters + ---------- + ds : xr.Dataset + The dataset with source variables used for deriving the target + variable. + func : Callable + The derivation function that uses the source variables to derive + the target variables. + src_var_keys : Tuple[str, ...] + The source variable keys. + target_var_key : str + The target variable key. + + Returns + ------- + xr.Dataset + The dataset with the derived target variable. + """ + func_args = [ds[var].copy() for var in src_var_keys] + + if func in FUNC_NEEDS_TARGET_VAR: + func_args = [target_var_key] + func_args # type: ignore # pragma: nocover + + # Derive the target variable, then align the dataset dimensions to it. + # Dimensional alignment is necessary for cases where the target variable + # has been subsetted (e.g., `cosp_histogram_standardize()`). `xr.align` + # returns both objects and element 0 is the xr.Dataset that is needed. + derived_var = func(*func_args) # pragma: nocover + ds_final = xr.align(ds.copy(), derived_var)[0] + ds_final[target_var_key] = derived_var + + return ds_final def _get_matching_time_series_src_vars( self, path: str, target_var_map: DerivedVariableMap @@ -785,8 +885,8 @@ def _get_matching_time_series_src_vars( return {(self.var,): lambda x: x} raise IOError( - f"Neither does {self.var} nor the variables in {possible_vars} " - f"have valid files in {path}." + f"No files found for target variable {self.var} or derived variables " + f"({possible_vars}) in {path}." ) def _get_dataset_with_source_vars(self, vars_to_get: Tuple[str, ...]) -> xr.Dataset: @@ -811,6 +911,7 @@ def _get_dataset_with_source_vars(self, vars_to_get: Tuple[str, ...]) -> xr.Data datasets.append(ds) ds = xr.merge(datasets) + ds = self._squeeze_time_dim(ds) return ds diff --git a/e3sm_diags/driver/utils/io.py b/e3sm_diags/driver/utils/io.py index 9b559a0659..f7e1e16e8f 100644 --- a/e3sm_diags/driver/utils/io.py +++ b/e3sm_diags/driver/utils/io.py @@ -3,7 +3,7 @@ import errno import json import os -from typing import Callable +from typing import Callable, Literal import xarray as xr @@ -111,6 +111,61 @@ def _write_vars_to_netcdf( ----- Replaces `e3sm_diags.driver.utils.general.save_ncfiles()`. """ + _write_to_netcdf(parameter, ds_test[var_key], var_key, "test") + + if ds_ref is not None: + _write_to_netcdf(parameter, ds_ref[var_key], var_key, "ref") + + if ds_diff is not None: + _write_to_netcdf(parameter, ds_diff[var_key], var_key, "diff") + + +def _write_to_netcdf( + parameter, + var: xr.DataArray, + var_key: str, + data_type: Literal["test", "ref", "diff"], +): + dir_path = _get_output_dir(parameter) + filename = f"{parameter.output_file}_{data_type}.nc" + + filepath = os.path.join(dir_path, filename) + + var.to_netcdf(filepath) + + logger.info(f"'{var_key}' {data_type} variable output saved in: {filepath}") + + return filename + + +def _write_vars_to_single_netcdf( + parameter: CoreParameter, + var_key, + ds_test: xr.Dataset, + ds_ref: xr.Dataset | None, + ds_diff: xr.Dataset | None, +): + """Saves the test, reference, and difference variables to a single netCDF. + + NOTE: This function is not currently being used because we need to save + individual netCDF files (`_write_vars_to_netcdf()`) to perform regression + testing against the `main` branch, which saves files individually. + + Parameters + ---------- + parameter : CoreParameter + The parameter object used to configure the diagnostic runs for the + sets. The referenced attributes include `save_netcdf, `current_set`, + `var_id`, `ref_name`, and `output_file`, `results_dir`, and `case_id`. + ds_test : xr.Dataset + The dataset containing the test variable. + ds_ref : xr.Dataset + The dataset containing the ref variable. If this is a model-only run + then it will be the same dataset as ``ds_test``. + ds_diff : Optional[xr.DataArray] + The optional dataset containing the difference between the test and + reference variables. + """ dir_path = _get_output_dir(parameter) filename = f"{parameter.output_file}_output.nc" output_file = os.path.join(dir_path, filename) diff --git a/e3sm_diags/parameter/core_parameter.py b/e3sm_diags/parameter/core_parameter.py index 7b0da9e2c8..e87c59176d 100644 --- a/e3sm_diags/parameter/core_parameter.py +++ b/e3sm_diags/parameter/core_parameter.py @@ -40,7 +40,6 @@ # "tropical_subseasonal", ] - if TYPE_CHECKING: from e3sm_diags.driver.utils.dataset_xr import Dataset diff --git a/e3sm_diags/plot/cartopy/cosp_histogram_plot.py b/e3sm_diags/plot/cosp_histogram_plot.py similarity index 73% rename from e3sm_diags/plot/cartopy/cosp_histogram_plot.py rename to e3sm_diags/plot/cosp_histogram_plot.py index eab7cd3289..20bbc0755d 100644 --- a/e3sm_diags/plot/cartopy/cosp_histogram_plot.py +++ b/e3sm_diags/plot/cosp_histogram_plot.py @@ -1,12 +1,13 @@ -from __future__ import print_function - import os +from typing import List, Tuple, Union import matplotlib import numpy as np +import xarray as xr from e3sm_diags.driver.utils.general import get_output_dir from e3sm_diags.logger import custom_logger +from e3sm_diags.parameter.core_parameter import CoreParameter from e3sm_diags.plot import get_colormap matplotlib.use("Agg") @@ -30,36 +31,118 @@ border = (-0.10, -0.05, 0.13, 0.033) -def get_ax_size(fig, ax): - bbox = ax.get_window_extent().transformed(fig.dpi_scale_trans.inverted()) - width, height = bbox.width, bbox.height - width *= fig.dpi - height *= fig.dpi - return width, height +def plot( + parameter: CoreParameter, + da_test: xr.DataArray, + da_ref: xr.DataArray, + da_diff: xr.DataArray, +): + fig = plt.figure(figsize=parameter.figsize, dpi=parameter.dpi) + _plot_panel( + 0, + da_test, + fig, + parameter, + parameter.contour_levels, + "rainbow", + title=(parameter.test_name_yrs, parameter.test_title, da_test.units), + ) + _plot_panel( + 1, + da_ref, + fig, + parameter, + parameter.contour_levels, + "rainbow", + title=(parameter.ref_name_yrs, parameter.reference_title, da_test.units), + ) + _plot_panel( + 2, + da_diff, + fig, + parameter, + parameter.diff_levels, + parameter.diff_colormap, + title=(parameter.diff_name, parameter.diff_title, da_test.units), + ) -def plot_panel(n, fig, _, var, clevels, cmap, title, parameters, stats=None): + # Figure title + fig.suptitle(parameter.main_title, x=0.5, y=0.96, fontsize=18) + + # Save figure + for f in parameter.output_format: + f = f.lower().split(".")[-1] + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + parameter.output_file + "." + f, + ) + plt.savefig(fnm) + # Get the filename that the user has passed in and display that. + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + parameter.output_file + "." + f, + ) + logger.info(f"Plot saved in: {fnm}") + + # Save individual subplots + for f in parameter.output_format_subplot: + fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + parameter.output_file, + ) + page = fig.get_size_inches() + i = 0 + for p in panel: + # Extent of subplot + subpage = np.array(p).reshape(2, 2) + subpage[1, :] = subpage[0, :] + subpage[1, :] + subpage = subpage + np.array(border).reshape(2, 2) + subpage = list(((subpage) * page).flatten()) # type: ignore + extent = matplotlib.transforms.Bbox.from_extents(*subpage) + # Save subplot + fname = fnm + ".%i." % (i) + f + plt.savefig(fname, bbox_inches=extent) + + orig_fnm = os.path.join( + get_output_dir(parameter.current_set, parameter), + parameter.output_file, + ) + fname = orig_fnm + ".%i." % (i) + f + logger.info(f"Sub-plot saved in: {fname}") + + i += 1 + + plt.close() + + +def _plot_panel( + subplot_num: int, + var: xr.DataArray, + fig: plt.Figure, + parameter: CoreParameter, + contour_levels: List[float], + color_map: str, + title: Tuple[Union[str, None], str, str], +): # Contour levels levels = None norm = None - if len(clevels) > 0: - levels = [-1.0e8] + clevels + [1.0e8] + if len(contour_levels) > 0: + levels = [-1.0e8] + contour_levels + [1.0e8] norm = colors.BoundaryNorm(boundaries=levels, ncolors=256) # Contour plot - ax = fig.add_axes(panel[n]) + ax = fig.add_axes(panel[subplot_num]) - var.getAxis(1) - var.getAxis(0) - - cmap = get_colormap(cmap, parameters) - p1 = plt.pcolormesh(var, cmap=cmap, norm=norm) + color_map = get_colormap(color_map, parameter) + p1 = plt.pcolormesh(var, cmap=color_map, norm=norm) # Calculate 3 x 3 grids for cloud fraction for nine cloud class # Place cloud fraction of each cloud class in plot: cld_3x3 = np.zeros((3, 3)) for j in range(3): for i in range(3): - if var.id.find("MISR") != -1: + if "MISR" in str(var.name): if j == 0: cld_3x3[j, i] = var[0:6, 2 * i : 2 * i + 2].sum() ax.text( @@ -118,7 +201,7 @@ def plot_panel(n, fig, _, var, clevels, cmap, title, parameters, stats=None): plt.axvline(x=2, linewidth=2, color="k") plt.axvline(x=4, linewidth=2, color="k") - if var.id.find("MISR") != -1: + if "MISR" in str(var.name): plt.axhline(y=6, linewidth=2, color="k") plt.axhline(y=9, linewidth=2, color="k") yticks = [0, 0.5, 1, 1.5, 2, 2.5, 3, 4, 5, 7, 9, 11, 13, 15, 17, 23] @@ -133,29 +216,24 @@ def plot_panel(n, fig, _, var, clevels, cmap, title, parameters, stats=None): ylabels = ["%.1f" % i for i in yticks] ax.set_yticklabels(ylabels) ax.set_ylabel("Cloud Top Pressure (mb)") + xticks = [0.3, 1.3, 3.6, 9.4, 23, 60, 379] xlabels = ["%.1f" % i for i in xticks] ax.set_xticklabels(xlabels) ax.set_xlabel("Cloud Optical Thickness") - # xlabels = ['%.1f' %i for i in x.getBounds()[:,0]]+['%.1f' %x.getBounds()[-1,-1]] - # ylabels = ['%.1f' %i for i in y.getBounds()[:,0]]+['%.1f' %y.getBounds()[-1,-1]] - - # ax.set_xticklabels(xlabels) - # ax.set_yticklabels(ylabels) if title[0] is not None: ax.set_title(title[0], loc="left", fontdict=plotSideTitle) if title[1] is not None: ax.set_title(title[1], fontdict=plotTitle) - # ax.set_title('cloud frac: %.1f'%total_cf+'%', loc='right', fontdict=plotSideTitle) + ax.set_title("%", loc="right", fontdict=plotSideTitle) - # if title[2] != None: ax.set_title(title[2], loc='right', fontdict=plotSideTitle) - # ax.set_ylabel('Cloud Top Height (km)') # Color bar - cbax = fig.add_axes((panel[n][0] + 0.6635, panel[n][1] + 0.0215, 0.0326, 0.1792)) + cbax = fig.add_axes( + (panel[subplot_num][0] + 0.6635, panel[subplot_num][1] + 0.0215, 0.0326, 0.1792) + ) cbar = fig.colorbar(p1, cax=cbax, extend="both") - w, h = get_ax_size(fig, cbax) if levels is None: cbar.ax.tick_params(labelsize=9.0, length=0) @@ -164,104 +242,4 @@ def plot_panel(n, fig, _, var, clevels, cmap, title, parameters, stats=None): cbar.set_ticks(levels[1:-1]) labels = ["%4.1f" % level for level in levels[1:-1]] cbar.ax.set_yticklabels(labels, ha="right") - # cbar.ax.set_yticklabels(labels,ha='right') cbar.ax.tick_params(labelsize=9.0, pad=25, length=0) - - -def plot(reference, test, diff, _, parameter): - # Create figure, projection - fig = plt.figure(figsize=parameter.figsize, dpi=parameter.dpi) - - plot_panel( - 0, - fig, - _, - test, - parameter.contour_levels, - "rainbow", - (parameter.test_name_yrs, parameter.test_title, test.units), - parameter, - ) - plot_panel( - 1, - fig, - _, - reference, - parameter.contour_levels, - "rainbow", - (parameter.ref_name_yrs, parameter.reference_title, test.units), - parameter, - ) - plot_panel( - 2, - fig, - _, - diff, - parameter.diff_levels, - parameter.diff_colormap, - (parameter.diff_name, parameter.diff_title, test.units), - parameter, - ) - - # min2 = metrics_dict['ref']['min'] - # mean2 = metrics_dict['ref']['mean'] - # max2 = metrics_dict['ref']['max'] - # plot_panel(1, fig, proj, reference, parameter.contour_levels, 'viridis', - # (parameter.reference_name,parameter.reference_title,reference.units),stats=(max2,mean2,min2)) - # - # # Third panel - # min3 = metrics_dict['diff']['min'] - # mean3 = metrics_dict['diff']['mean'] - # max3 = metrics_dict['diff']['max'] - # r = metrics_dict['misc']['rmse'] - # c = metrics_dict['misc']['corr'] - # plot_panel(2, fig, proj, diff, parameter.diff_levels, 'RdBu_r', (None,parameter.diff_title,None), stats=(max3,mean3,min3,r,c)) - # - - # Figure title - fig.suptitle(parameter.main_title, x=0.5, y=0.96, fontsize=18) - - # Save figure - for f in parameter.output_format: - f = f.lower().split(".")[-1] - fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file + "." + f, - ) - plt.savefig(fnm) - # Get the filename that the user has passed in and display that. - fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file + "." + f, - ) - logger.info(f"Plot saved in: {fnm}") - - # Save individual subplots - for f in parameter.output_format_subplot: - fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file, - ) - page = fig.get_size_inches() - i = 0 - for p in panel: - # Extent of subplot - subpage = np.array(p).reshape(2, 2) - subpage[1, :] = subpage[0, :] + subpage[1, :] - subpage = subpage + np.array(border).reshape(2, 2) - subpage = list(((subpage) * page).flatten()) # type: ignore - extent = matplotlib.transforms.Bbox.from_extents(*subpage) - # Save subplot - fname = fnm + ".%i." % (i) + f - plt.savefig(fname, bbox_inches=extent) - - orig_fnm = os.path.join( - get_output_dir(parameter.current_set, parameter), - parameter.output_file, - ) - fname = orig_fnm + ".%i." % (i) + f - logger.info(f"Sub-plot saved in: {fname}") - - i += 1 - - plt.close() diff --git a/e3sm_diags/plot/lat_lon_plot.py b/e3sm_diags/plot/lat_lon_plot.py index a6b8a0da02..05dda58a31 100644 --- a/e3sm_diags/plot/lat_lon_plot.py +++ b/e3sm_diags/plot/lat_lon_plot.py @@ -36,8 +36,9 @@ def plot( The test data. da_ref : xr.DataArray | None The optional reference data. - ds_diff : xr.DataArray | None - The difference between ``ds_test_regrid`` and ``ds_ref_regrid``. + da_diff : xr.DataArray | None + The difference between `da_test` and `da_ref` (both are gridded to + the lower resolution of the two beforehand). metrics_dict : Metrics The metrics. """ diff --git a/tests/e3sm_diags/derivations/test_formulas_cosp.py b/tests/e3sm_diags/derivations/test_formulas_cosp.py new file mode 100644 index 0000000000..bb2f4c71cc --- /dev/null +++ b/tests/e3sm_diags/derivations/test_formulas_cosp.py @@ -0,0 +1,758 @@ +import numpy as np +import pytest +import xarray as xr + +from e3sm_diags.derivations.formulas_cosp import ( + cosp_bin_sum, + cosp_histogram_standardize, +) + + +class TestCospHistogramStandardize: + @pytest.fixture(autouse=True) + def setup(self): + self.target_var_key = "CLDTOT_TAU1.3_MISR" + self.ds = xr.Dataset( + data_vars={ + "cld_var_dummy": xr.DataArray( + data=np.array( + [ + [1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + [2.0, 2.0, 2.0, 2.0, 2.0, 2.0], + [3.0, 3.0, 3.0, 3.0, 3.0, 3.0], + [4.0, 4.0, 4.0, 4.0, 4.0, 4.0], + [5.0, 5.0, 5.0, 5.0, 5.0, 5.0], + [6.0, 6.0, 6.0, 6.0, 6.0, 6.0], + [7.0, 7.0, 7.0, 7.0, 7.0, 7.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + attrs={"test_attr": "test"}, + ), + "cosp_htmisr_bnds": xr.DataArray( + data=np.array( + [ + [-1.0, 0.0], + [0.0, 1.0], + [0.5, 1.5], + [1.5, 2.5], + [2.5, 3.5], + [3.5, 4.5], + [4.5, 5.5], + ] + ), + dims=["cosp_htmisr", "bnds"], + ), + "cosp_tau_bnds": xr.DataArray( + data=np.array( + [ + [-1.0, 0.0], + [0.0, 1.0], + [0.5, 1.5], + [1.5, 2.5], + [2.5, 3.5], + [3.5, 4.5], + ] + ), + dims=["cosp_tau", "bnds"], + ), + }, + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([-0.5, 0.5, 1, 2, 3, 4, 5]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([0.0, 0.5, 1, 2, 3, 4]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + ) + + def test_raises_error_if_dataset_is_missing_prs_or_tau_axis(self): + ds1 = self.ds.copy() + ds1 = ds1.rename({"cld_var_dummy": "CLD_MISR", "cosp_htmisr": "invalid_name"}) + + with pytest.raises(KeyError): + cosp_histogram_standardize(self.target_var_key, ds1["CLD_MISR"]) + + ds2 = self.ds.copy() + ds2 = ds2.rename({"cld_var_dummy": "CLD_MISR", "cosp_tau": "invalid_name"}) + + with pytest.raises(KeyError): + cosp_histogram_standardize(self.target_var_key, ds2["CLD_MISR"]) + + @pytest.mark.parametrize("var_key", ("CLD_MISR", "CLMISR")) + def test_standardizes_cld_misr_and_clmisr(self, var_key): + ds = self.ds.copy() + + ds = ds.rename({"cld_var_dummy": var_key}) + + result = cosp_histogram_standardize(self.target_var_key, ds[var_key]) + expected = xr.DataArray( + name=self.target_var_key, + data=np.array( + [ + [2.0, 2.0, 2.0, 2.0, 2.0], + [3.0, 3.0, 3.0, 3.0, 3.0], + [4.0, 4.0, 4.0, 4.0, 4.0], + [5.0, 5.0, 5.0, 5.0, 5.0], + [6.0, 6.0, 6.0, 6.0, 6.0], + [7.0, 7.0, 7.0, 7.0, 7.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([0.5, 1, 2, 3, 4, 5]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([0.5, 1, 2, 3, 4]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + attrs={"test_attr": "test"}, + ) + + xr.testing.assert_identical(result, expected) + + def test_returns_sum_with_cld_misr_with_unit_adjustment(self): + target_var_key = "CLDLOW_TAU1.3_MISR" + + ds1 = xr.Dataset( + data_vars={ + "CLD_MISR": xr.DataArray( + data=np.array( + [ + [1.0, 1.0, 1.0], + [2.0, 2.0, 2.0], + [3.0, 3.0, 3.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + attrs={"test_attr": "test"}, + ), + }, + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([0, 1, 2000]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([-0.5, 1.5, 2.0]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + ) + + result = cosp_histogram_standardize(target_var_key, ds1["CLD_MISR"]) + + expected = xr.DataArray( + name=target_var_key, + data=np.array( + [[2.0, 2.0], [3.0, 3.0]], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([1, 2000]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([1.5, 2.0]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + attrs={"test_attr": "test"}, + ) + + xr.testing.assert_identical(result, expected) + + @pytest.mark.xfail + @pytest.mark.parametrize("var_key", ("CLD_MISR", "CLMISR")) + def test_standardizes_cld_misr_and_cldmisr_and_adds_default_bnds_if_bnds_are_missing( + self, var_key + ): + # TODO: Update this test if missing bounds are dynamically generated. + ds = self.ds.copy() + ds = ds.drop_vars(["cosp_htmisr_bnds", "cosp_tau_bnds"]) + + # Rename the cloud variable placeholder to the variable to be tested. + ds = ds.rename({"cld_var_dummy": var_key}) + result = cosp_histogram_standardize(self.target_var_key, ds[var_key]) + + expected = xr.DataArray( + name=self.target_var_key, + data=np.array( + [ + [2.0, 2.0, 2.0, 2.0, 2.0], + [3.0, 3.0, 3.0, 3.0, 3.0], + [4.0, 4.0, 4.0, 4.0, 4.0], + [5.0, 5.0, 5.0, 5.0, 5.0], + [6.0, 6.0, 6.0, 6.0, 6.0], + [7.0, 7.0, 7.0, 7.0, 7.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([0.5, 1, 2, 3, 4, 5]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([0.5, 1, 2, 3, 4]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + attrs={"test_attr": "test"}, + ) + + xr.testing.assert_identical(result, expected) + + @pytest.mark.parametrize("var_key", ("FISCCP1_COSP", "CLISCCP", "CLMODIS")) + def test_standardizes_fisccp1_cosp_clisccp_and_clmodis(self, var_key): + ds = self.ds.copy() + + # Rename the cloud variable placeholder to the variable to be tested + # and also rename the "cosp_tau" dimension to test other dimension keys. + ds = ds.rename({"cld_var_dummy": var_key}) + result = cosp_histogram_standardize(self.target_var_key, ds[var_key]) + expected = xr.DataArray( + name=self.target_var_key, + data=np.array( + [ + [1.0, 1.0, 1.0, 1.0, 1.0], + [2.0, 2.0, 2.0, 2.0, 2.0], + [3.0, 3.0, 3.0, 3.0, 3.0], + [4.0, 4.0, 4.0, 4.0, 4.0], + [5.0, 5.0, 5.0, 5.0, 5.0], + [6.0, 6.0, 6.0, 6.0, 6.0], + [7.0, 7.0, 7.0, 7.0, 7.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([-0.5, 0.5, 1, 2, 3, 4, 5]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([0.5, 1, 2, 3, 4]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + attrs={"test_attr": "test"}, + ) + + xr.testing.assert_identical(result, expected) + + +class TestCospBinSum: + @pytest.fixture(autouse=True) + def setup(self): + self.ds = xr.Dataset( + data_vars={ + "cld_var_dummy": xr.DataArray( + data=np.array( + [ + [1.0, 1.0, 1.0], + [2.0, 2.0, 2.0], + [3.0, 3.0, 3.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + attrs={"test_attr": "test"}, + ), + }, + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([-0.5, 0.5, 1]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([0.0, 0.5, 1]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + ) + + def test_raises_error_if_dataset_is_missing_prs_or_tau_axis(self): + target_var_key = "CLDTOT_TAU1.3_MISR" + + ds1 = self.ds.copy() + ds1 = ds1.rename({"cosp_htmisr": "invalid_name"}) + + with pytest.raises(KeyError): + cosp_bin_sum(target_var_key, ds1["CLD_MISR"]) + + ds2 = self.ds.copy() + ds2 = ds2.rename({"cosp_tau": "invalid_name"}) + + with pytest.raises(KeyError): + cosp_bin_sum(target_var_key, ds1["CLD_MISR"]) + + def test_returns_sum(self): + target_var_key = "CLDTOT_TAU1.3_MISR" + + ds1 = self.ds.copy() + ds1 = ds1.rename({"cld_var_dummy": "CLD_MISR"}) + + result = cosp_bin_sum(target_var_key, ds1["CLD_MISR"]) + + expected = xr.DataArray( + name="CLD_MISR", + data=np.array(0.0), + attrs={"long_name": "MISR: total cloud fraction with tau > 1.3"}, + ) + + xr.testing.assert_identical(result, expected) + + def test_returns_sum_using_prs_subset(self): + target_var_key = "CLDLOW_TAU1.3_MISR" + + ds1 = xr.Dataset( + data_vars={ + "CLD_MISR": xr.DataArray( + data=np.array( + [ + [1.0, 1.0, 1.0], + [2.0, 2.0, 2.0], + [3.0, 3.0, 3.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + attrs={"test_attr": "test"}, + ), + }, + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([0, 1, 2]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([-0.5, 1.5, 2.0]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + ) + + result = cosp_bin_sum(target_var_key, ds1["CLD_MISR"]) + + expected = xr.DataArray( + name="CLD_MISR", + data=np.array(12), + attrs={"long_name": "MISR: low cloud fraction with tau > 1.3"}, + ) + + xr.testing.assert_identical(result, expected) + + def test_returns_sum_using_prs_subset_with_unit_adjustment(self): + target_var_key = "CLDLOW_TAU1.3_MISR" + + ds1 = xr.Dataset( + data_vars={ + "CLD_MISR": xr.DataArray( + data=np.array( + [ + [1.0, 1.0, 1.0], + [2.0, 2.0, 2.0], + [3.0, 3.0, 3.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + attrs={"test_attr": "test"}, + ), + }, + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([0, 1, 2000]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([-0.5, 1.5, 2.0]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + ) + + result = cosp_bin_sum(target_var_key, ds1["CLD_MISR"]) + + expected = xr.DataArray( + name="CLD_MISR", + data=np.array(12), + attrs={"long_name": "MISR: low cloud fraction with tau > 1.3"}, + ) + + xr.testing.assert_identical(result, expected) + + def test_returns_sum_using_tau_subset_with_adjusted_min_and_max(self): + target_var_key = "CLDTOT_TAU1.3_9.4_ISCCP" + + ds1 = xr.Dataset( + data_vars={ + "CLD_MISR": xr.DataArray( + data=np.array( + [ + [1.0, 1.0, 1.0], + [2.0, 2.0, 2.0], + [3.0, 3.0, 3.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + attrs={"test_attr": "test"}, + ), + }, + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([0, 1, 2]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([-0.5, 1.5, 10.0]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + ) + + result = cosp_bin_sum(target_var_key, ds1["CLD_MISR"]) + + expected = xr.DataArray( + name="CLD_MISR", + data=np.array(6), + attrs={"long_name": "MISR: total cloud fraction with 1.3 < tau < 9.4"}, + ) + + xr.testing.assert_identical(result, expected) + + def test_returns_sum_using_tau_subset_with_adjusted_min_only(self): + target_var_key = "CLDTOT_TAU1.3_ISCCP" + + ds1 = xr.Dataset( + data_vars={ + "CLD_MISR": xr.DataArray( + data=np.array( + [ + [1.0, 1.0, 1.0], + [2.0, 2.0, 2.0], + [3.0, 3.0, 3.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + attrs={"test_attr": "test"}, + ), + }, + coords={ + "cosp_htmisr": xr.DataArray( + data=np.array([0, 1, 2]), + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([-0.5, 1.5, 2.0]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + ) + + result = cosp_bin_sum(target_var_key, ds1["CLD_MISR"]) + + expected = xr.DataArray( + name="CLD_MISR", + data=np.array(12), + attrs={"long_name": "MISR: total cloud fraction with tau > 1.3"}, + ) + + xr.testing.assert_identical(result, expected) + + @pytest.mark.parametrize( + "var_key,expected", + [ + ("FISCCP1_COSP", "ISCCP: total cloud fraction with tau > 1.3"), + ( + "CLMODIS", + "MODIS: total cloud fraction with tau > 1.3", + ), + ("CLD_MISR", "MISR: total cloud fraction with tau > 1.3"), + ], + ) + def test_sets_variable_long_name_attr_if_matching_simulation_and_cloud_levels_are_set( + self, var_key, expected + ): + target_var_key = "CLDTOT_TAU1.3_MODIS" + + ds1 = self.ds.copy() + ds1 = ds1.rename({"cld_var_dummy": var_key}) + + result_var = cosp_bin_sum(target_var_key, ds1[var_key]) + result = result_var.attrs["long_name"] + assert result == expected + + @pytest.mark.parametrize( + "var_key,expected,prs_axis_data", + [ + ( + "CLMODIS", + "MODIS: middle cloud fraction with tau > 1.3", + np.array([440, 500, 680]), + ), + ( + "CLD_MISR", + "MISR: middle cloud fraction with tau > 1.3", + np.array([7, 5, 3]), + ), + ], + ) + def test_sets_variable_long_name_attr_with_middle_cloud_fraction( + self, var_key, expected, prs_axis_data + ): + # Min in low_bnds and max in high_bnds + target_var_key = "CLDTOT_TAU1.3_MODIS" + + ds1 = xr.Dataset( + data_vars={ + var_key: xr.DataArray( + data=np.array( + [ + [1.0, 1.0, 1.0], + [2.0, 2.0, 2.0], + [3.0, 3.0, 3.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + attrs={"test_attr": "test"}, + ), + }, + coords={ + "cosp_htmisr": xr.DataArray( + data=prs_axis_data, + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([-0.5, 1.5, 2.0]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + ) + + result_var = cosp_bin_sum(target_var_key, ds1[var_key]) + result = result_var.attrs["long_name"] + assert result == expected + + @pytest.mark.parametrize( + "var_key,expected,prs_axis_data", + [ + ( + "CLMODIS", + "MODIS: high cloud fraction with tau > 1.3", + np.array([440, 500, 600]), + ), + ( + "CLD_MISR", + "MISR: high cloud fraction with tau > 1.3", + np.array([7, 5, 6]), + ), + ], + ) + def test_sets_variable_long_name_attr_with_high_cloud_fraction( + self, var_key, expected, prs_axis_data + ): + target_var_key = "CLDTOT_TAU1.3_MODIS" + + ds1 = xr.Dataset( + data_vars={ + var_key: xr.DataArray( + data=np.array( + [ + [1.0, 1.0, 1.0], + [2.0, 2.0, 2.0], + [3.0, 3.0, 3.0], + ], + dtype="float64", + ), + dims=["cosp_htmisr", "cosp_tau"], + attrs={"test_attr": "test"}, + ), + }, + coords={ + "cosp_htmisr": xr.DataArray( + data=prs_axis_data, + dims=["cosp_htmisr"], + attrs={ + "bounds": "cosp_htmisr_bnds", + "units": "km", + "long_name": "COSP MISR height", + "realtopology": "linear", + }, + ), + "cosp_tau": xr.DataArray( + data=np.array([-0.5, 1.5, 2.0]), + dims=["cosp_tau"], + attrs={ + "bounds": "cosp_tau_bnds", + "units": "1", + "long_name": "COSP MEAN ISCCP optional depth", + "realtopology": "linear", + }, + ), + }, + ) + + result_var = cosp_bin_sum(target_var_key, ds1[var_key]) + result = result_var.attrs["long_name"] + assert result == expected diff --git a/tests/e3sm_diags/driver/utils/test_dataset_xr.py b/tests/e3sm_diags/driver/utils/test_dataset_xr.py index 75eed59e76..4b112161ed 100644 --- a/tests/e3sm_diags/driver/utils/test_dataset_xr.py +++ b/tests/e3sm_diags/driver/utils/test_dataset_xr.py @@ -16,6 +16,44 @@ ) from e3sm_diags.parameter.core_parameter import CoreParameter +# Reusable spatial coords dictionary for composing an xr.Dataest. +spatial_coords = { + "lat": xr.DataArray( + dims="lat", + data=np.array([-90.0, 90]), + attrs={ + "axis": "Y", + "long_name": "latitude", + "standard_name": "latitude", + "bounds": "lat_bnds", + }, + ), + "lon": xr.DataArray( + dims="lon", + data=np.array([0.0, 180]), + attrs={ + "axis": "X", + "long_name": "longitude", + "standard_name": "longitude", + "bounds": "lon_bnds", + }, + ), +} + +# Reusable spatial bounds dictionary for composing an xr.Dataest. +spatial_bounds = { + "lat_bnds": xr.DataArray( + name="lat_bnds", + data=[[-90.0, 0.0], [0.0, 90.0]], + dims=["lat", "bnds"], + ), + "lon_bnds": xr.DataArray( + name="lat_bnds", + data=[[-90.0, 90.0], [90, 270]], + dims=["lon", "bnds"], + ), +} + def _create_parameter_object( dataset_type: Literal["ref", "test"], @@ -216,11 +254,9 @@ def setup(self, tmp_path): self.data_path.mkdir() # Set up climatology dataset and save to a temp file. - # TODO: Update this to an actual climatology dataset structure self.ds_climo = xr.Dataset( coords={ - "lat": [-90, 90], - "lon": [0, 180], + **spatial_coords, "time": xr.DataArray( dims="time", data=np.array( @@ -240,6 +276,7 @@ def setup(self, tmp_path): ), }, data_vars={ + **spatial_bounds, "ts": xr.DataArray( name="ts", data=np.array( @@ -248,7 +285,7 @@ def setup(self, tmp_path): ] ), dims=["time", "lat", "lon"], - ) + ), }, ) self.ds_climo.time.encoding = {"units": "days since 2000-01-01"} @@ -390,12 +427,46 @@ def setup(self, tmp_path): self.data_path = tmp_path / "input_data" self.data_path.mkdir() + self.spatial_coords = { + "lat": xr.DataArray( + dims="lat", + data=np.array([-90.0, 90]), + attrs={ + "axis": "Y", + "long_name": "latitude", + "standard_name": "latitude", + "bounds": "lat_bnds", + }, + ), + "lon": xr.DataArray( + dims="lon", + data=np.array([0.0, 180]), + attrs={ + "axis": "X", + "long_name": "longitude", + "standard_name": "longitude", + "bounds": "lon_bnds", + }, + ), + } + self.spatial_bounds = { + "lat_bnds": xr.DataArray( + name="lat_bnds", + data=[[-90.0, 0.0], [0.0, 90.0]], + dims=["lat", "bnds"], + ), + "lon_bnds": xr.DataArray( + name="lat_bnds", + data=[[-90.0, 90.0], [90, 270]], + dims=["lon", "bnds"], + ), + } + # Set up climatology dataset and save to a temp file. # TODO: Update this to an actual climatology dataset structure self.ds_climo = xr.Dataset( coords={ - "lat": [-90, 90], - "lon": [0, 180], + **spatial_coords, "time": xr.DataArray( dims="time", data=np.array( @@ -415,6 +486,7 @@ def setup(self, tmp_path): ), }, data_vars={ + **spatial_bounds, "ts": xr.DataArray( name="ts", data=np.array( @@ -423,7 +495,7 @@ def setup(self, tmp_path): ] ), dims=["time", "lat", "lon"], - ) + ), }, ) self.ds_climo.time.encoding = {"units": "days since 2000-01-01"} @@ -431,8 +503,7 @@ def setup(self, tmp_path): # Set up time series dataset and save to a temp file. self.ds_ts = xr.Dataset( coords={ - "lat": [-90, 90], - "lon": [0, 180], + **spatial_coords, "time": xr.DataArray( dims="time", data=np.array( @@ -573,9 +644,7 @@ def test_returns_climo_dataset_using_test_file_variable(self): xr.testing.assert_identical(result, expected) - def test_returns_climo_dataset_using_ref_file_variable_test_name_and_season( - self, - ): + def test_returns_climo_dataset_using_ref_file_variable_test_name_and_season(self): # Example: {test_data_path}/{test_name}_{season}.nc parameter = _create_parameter_object( "ref", "climo", self.data_path, "2000", "2001" @@ -589,9 +658,7 @@ def test_returns_climo_dataset_using_ref_file_variable_test_name_and_season( xr.testing.assert_identical(result, expected) - def test_returns_climo_dataset_using_test_file_variable_test_name_and_season( - self, - ): + def test_returns_climo_dataset_using_test_file_variable_test_name_and_season(self): # Example: {test_data_path}/{test_name}_{season}.nc parameter = _create_parameter_object( "test", "climo", self.data_path, "2000", "2001" @@ -651,8 +718,7 @@ def test_returns_climo_dataset_with_derived_variable(self): # We will derive the "PRECT" variable using the "pr" variable. ds_pr = xr.Dataset( coords={ - "lat": [-90, 90], - "lon": [0, 180], + **spatial_coords, "time": xr.DataArray( dims="time", data=np.array( @@ -672,6 +738,7 @@ def test_returns_climo_dataset_with_derived_variable(self): ), }, data_vars={ + **spatial_bounds, "pr": xr.DataArray( xr.DataArray( data=np.array( @@ -702,11 +769,56 @@ def test_returns_climo_dataset_with_derived_variable(self): xr.testing.assert_identical(result, expected) + @pytest.mark.xfail + def test_returns_climo_dataset_using_derived_var_directly_from_dataset_and_replaces_scalar_time_var( + self, + ): + # FIXME: This test needs to cover `except` block in `_open_dataset()`. + # The issue is that we can't create a dummy dataset with an incorrect + # time scalar variable using Xarray because it just throws the error + # below. We might need to use another library like netCDF4 to create + # a dummy dataset. + ds_precst = xr.Dataset( + coords={ + **spatial_coords, + }, + data_vars={ + **spatial_bounds, + "time": xr.DataArray( + dims="time", + data=0, + ), + "PRECST": xr.DataArray( + xr.DataArray( + data=np.array( + [ + [[1.0, 1.0], [1.0, 1.0]], + ] + ), + dims=["time", "lat", "lon"], + attrs={"units": "mm/s"}, + ) + ), + }, + ) + + parameter = _create_parameter_object( + "ref", "climo", self.data_path, "2000", "2001" + ) + parameter.ref_file = "pr_200001_200112.nc" + ds_precst.to_netcdf(f"{self.data_path}/{parameter.ref_file}") + + ds = Dataset(parameter, data_type="ref") + + result = ds.get_climo_dataset("PRECST", season="ANN") + expected = ds_precst.squeeze(dim="time").drop_vars("time") + + xr.testing.assert_identical(result, expected) + def test_returns_climo_dataset_using_derived_var_directly_from_dataset(self): ds_precst = xr.Dataset( coords={ - "lat": [-90, 90], - "lon": [0, 180], + **spatial_coords, "time": xr.DataArray( dims="time", data=np.array( @@ -726,6 +838,7 @@ def test_returns_climo_dataset_using_derived_var_directly_from_dataset(self): ), }, data_vars={ + **spatial_bounds, "PRECST": xr.DataArray( xr.DataArray( data=np.array( @@ -756,8 +869,7 @@ def test_returns_climo_dataset_using_derived_var_directly_from_dataset(self): def test_returns_climo_dataset_using_source_variable_with_wildcard(self): ds_precst = xr.Dataset( coords={ - "lat": [-90, 90], - "lon": [0, 180], + **spatial_coords, "time": xr.DataArray( dims="time", data=np.array( @@ -777,6 +889,7 @@ def test_returns_climo_dataset_using_source_variable_with_wildcard(self): ), }, data_vars={ + **spatial_bounds, "bc_a?DDF": xr.DataArray( xr.DataArray( data=np.array( @@ -879,8 +992,7 @@ def test_raises_error_if_dataset_has_no_matching_source_variables_to_derive_vari def test_raises_error_if_no_datasets_found_to_derive_variable(self): ds_precst = xr.Dataset( coords={ - "lat": [-90, 90], - "lon": [0, 180], + **spatial_coords, "time": xr.DataArray( dims="time", data=np.array( @@ -900,6 +1012,7 @@ def test_raises_error_if_no_datasets_found_to_derive_variable(self): ), }, data_vars={ + **spatial_bounds, "invalid": xr.DataArray( xr.DataArray( data=np.array( @@ -1327,10 +1440,10 @@ def setup(self, tmp_path): self.data_path = tmp_path / "input_data" self.data_path.mkdir() # Set up climatology dataset and save to a temp file. + self.ds_climo = xr.Dataset( coords={ - "lat": [-90, 90], - "lon": [0, 180], + **spatial_coords, "time": xr.DataArray( dims="time", data=np.array( @@ -1350,6 +1463,7 @@ def setup(self, tmp_path): ), }, data_vars={ + **spatial_bounds, "ts": xr.DataArray( name="ts", data=np.array( @@ -1358,7 +1472,7 @@ def setup(self, tmp_path): ] ), dims=["time", "lat", "lon"], - ) + ), }, ) self.ds_climo.time.encoding = {"units": "days since 2000-01-01"} @@ -1580,3 +1694,4 @@ def test_returns_test_name_and_years_averaged_as_single_string_with_timeseries_d expected = "short_test_name (1800-1850)" assert result == expected + assert result == expected diff --git a/tests/e3sm_diags/driver/utils/test_io.py b/tests/e3sm_diags/driver/utils/test_io.py index 067393910b..85ebea8284 100644 --- a/tests/e3sm_diags/driver/utils/test_io.py +++ b/tests/e3sm_diags/driver/utils/test_io.py @@ -6,7 +6,11 @@ import pytest import xarray as xr -from e3sm_diags.driver.utils.io import _get_output_dir, _write_vars_to_netcdf +from e3sm_diags.driver.utils.io import ( + _get_output_dir, + _write_vars_to_netcdf, + _write_vars_to_single_netcdf, +) from e3sm_diags.parameter.core_parameter import CoreParameter @@ -40,11 +44,62 @@ def setup(self, tmp_path: Path): ) self.ds_diff = self.ds_test - self.ds_ref + def test_writes_test_ref_and_diff_variables_to_files(self, caplog): + # Silence info logger message about saving to a directory. + caplog.set_level(logging.CRITICAL) + + _write_vars_to_netcdf( + self.param, self.var_key, self.ds_test, self.ds_ref, self.ds_diff + ) + + test_result = xr.open_dataset(f"{self.dir}/{self.var_key}_test.nc") + test_expected = self.ds_test.copy() + xr.testing.assert_identical(test_result, test_expected) + + ref_result = xr.open_dataset(f"{self.dir}/{self.var_key}_ref.nc") + ref_expected = self.ds_ref.copy() + xr.testing.assert_identical(ref_result, ref_expected) + + diff_result = xr.open_dataset(f"{self.dir}/{self.var_key}_diff.nc") + diff_expected = self.ds_diff.copy() + xr.testing.assert_identical(diff_result, diff_expected) + + +class TestWriteVarsToSingleNetcdf: + @pytest.fixture(autouse=True) + def setup(self, tmp_path: Path): + self.param = CoreParameter() + self.var_key = "ts" + + # Need to prepend with tmp_path because we use pytest to create temp + # dirs for storing files temporarily for the test runs. + self.param.results_dir = f"{tmp_path}/results_dir" + self.param.current_set = "lat_lon" + self.param.case_id = "lat_lon_MERRA" + self.param.output_file = "ts" + + # Create the results directory, which uses the CoreParameter attributes. + # Example: "///_test.nc>" + self.dir = ( + tmp_path / "results_dir" / self.param.current_set / self.param.case_id + ) + self.dir.mkdir(parents=True) + + # Input variables for the function + self.var_key = "ts" + self.ds_test = xr.Dataset( + data_vars={"ts": xr.DataArray(name="ts", data=[1, 1, 1])} + ) + self.ds_ref = xr.Dataset( + data_vars={"ts": xr.DataArray(name="ts", data=[2, 2, 2])} + ) + self.ds_diff = self.ds_test - self.ds_ref + def test_writes_test_variable_to_file(self, caplog): # Silence info logger message about saving to a directory. caplog.set_level(logging.CRITICAL) - _write_vars_to_netcdf(self.param, self.var_key, self.ds_test, None, None) + _write_vars_to_single_netcdf(self.param, self.var_key, self.ds_test, None, None) expected = self.ds_test.copy() expected = expected.rename_vars({"ts": "ts_test"}) @@ -56,7 +111,7 @@ def test_writes_ref_and_diff_variables_to_file(self, caplog): # Silence info logger message about saving to a directory. caplog.set_level(logging.CRITICAL) - _write_vars_to_netcdf( + _write_vars_to_single_netcdf( self.param, self.var_key, self.ds_test, self.ds_ref, self.ds_diff )