From 0cd23e17ce46e620f494320850f28b28d00c7894 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Wed, 24 Jan 2024 11:18:59 -0800 Subject: [PATCH] Add template run scripts --- auxiliary_tools/__init__.py | 0 .../cdat_regression_testing/__init__.py | 0 .../base_run_script.py | 257 ++++++++++++++++++ .../template_run_script.py | 41 +++ 4 files changed, 298 insertions(+) create mode 100644 auxiliary_tools/__init__.py create mode 100644 auxiliary_tools/cdat_regression_testing/__init__.py create mode 100644 auxiliary_tools/cdat_regression_testing/base_run_script.py create mode 100644 auxiliary_tools/cdat_regression_testing/template_run_script.py diff --git a/auxiliary_tools/__init__.py b/auxiliary_tools/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/auxiliary_tools/cdat_regression_testing/__init__.py b/auxiliary_tools/cdat_regression_testing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/auxiliary_tools/cdat_regression_testing/base_run_script.py b/auxiliary_tools/cdat_regression_testing/base_run_script.py new file mode 100644 index 000000000..f7cef4c78 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/base_run_script.py @@ -0,0 +1,257 @@ +""" +This is a copy of `examples/run_v2_9_0_all_sets_E3SM_machines.py` with +some slight tweaks to make it geared towards CDAT migration refactoring work. +""" +# flake8: noqa E501 + +import os +from typing import List, Tuple, TypedDict + +from mache import MachineInfo + +from e3sm_diags.parameter.annual_cycle_zonal_mean_parameter import ACzonalmeanParameter +from e3sm_diags.parameter.area_mean_time_series_parameter import ( + AreaMeanTimeSeriesParameter, +) +from e3sm_diags.parameter.arm_diags_parameter import ARMDiagsParameter +from e3sm_diags.parameter.core_parameter import CoreParameter +from e3sm_diags.parameter.diurnal_cycle_parameter import DiurnalCycleParameter +from e3sm_diags.parameter.enso_diags_parameter import EnsoDiagsParameter +from e3sm_diags.parameter.mp_partition_parameter import MPpartitionParameter +from e3sm_diags.parameter.qbo_parameter import QboParameter +from e3sm_diags.parameter.streamflow_parameter import StreamflowParameter +from e3sm_diags.parameter.tc_analysis_parameter import TCAnalysisParameter +from e3sm_diags.parameter.zonal_mean_2d_stratosphere_parameter import ( + ZonalMean2dStratosphereParameter, +) +from e3sm_diags.run import runner + +# The location where results will be stored based on your branch changes. +BASE_RESULTS_DIR = "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/" + + +class MachinePaths(TypedDict): + html_path: str + obs_climo: str + test_climo: str + obs_ts: str + test_ts: str + dc_obs_climo: str + dc_test_climo: str + arm_obs: str + arm_test: str + tc_obs: str + tc_test: str + + +def run_set(set_name: str, set_dir: str, save_netcdf: bool): + machine_paths: MachinePaths = _get_machine_paths() + + param = CoreParameter() + + param.reference_data_path = machine_paths["obs_climo"] + param.test_data_path = machine_paths["test_climo"] + param.test_name = "20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis" + param.seasons = [ + "ANN", + "JJA", + ] # Default setting: seasons = ["ANN", "DJF", "MAM", "JJA", "SON"] + + param.results_dir = os.path.join(BASE_RESULTS_DIR, set_dir) + param.multiprocessing = True + param.num_workers = 5 + + # Make sure to save the netCDF files to compare outputs. + param.save_netcdf = save_netcdf + + # Set specific parameters for new sets + enso_param = EnsoDiagsParameter() + enso_param.reference_data_path = machine_paths["obs_ts"] + enso_param.test_data_path = machine_paths["test_ts"] + enso_param.test_name = "e3sm_v2" + enso_param.test_start_yr = "0051" + enso_param.test_end_yr = "0060" + # Enso obs data range from year 1979 to 2016 + enso_param.ref_start_yr = "2001" + enso_param.ref_end_yr = "2010" + + qbo_param = QboParameter() + qbo_param.reference_data_path = machine_paths["obs_ts"] + qbo_param.test_data_path = machine_paths["test_ts"] + qbo_param.test_name = "e3sm_v2" + qbo_param.start_yr = "0051" + qbo_param.end_yr = "0060" + # Qbo obs data range from year 1979 to 2019 + # Number of years of test and ref should match + qbo_param.ref_start_yr = "2001" + qbo_param.ref_end_yr = "2010" + + ts_param = AreaMeanTimeSeriesParameter() + ts_param.reference_data_path = machine_paths["obs_ts"] + ts_param.test_data_path = machine_paths["test_ts"] + ts_param.test_name = "e3sm_v2" + ts_param.start_yr = "0051" + ts_param.end_yr = "0060" + + dc_param = DiurnalCycleParameter() + dc_param.reference_data_path = machine_paths["dc_obs_climo"] + dc_param.test_data_path = machine_paths["dc_test_climo"] + dc_param.short_test_name = "e3sm_v2" + # Plotting diurnal cycle amplitude on different scales. Default is True + dc_param.normalize_test_amp = False + + streamflow_param = StreamflowParameter() + streamflow_param.reference_data_path = machine_paths["obs_ts"] + streamflow_param.test_data_path = machine_paths["test_ts"] + streamflow_param.short_test_name = "e3sm_v2" + streamflow_param.test_start_yr = "0051" + streamflow_param.test_end_yr = "0060" + # Streamflow gauge station data range from year 1986 to 1995 + streamflow_param.ref_start_yr = "1986" + streamflow_param.ref_end_yr = "1995" + + arm_param = ARMDiagsParameter() + arm_param.reference_data_path = machine_paths["arm_obs"] + arm_param.ref_name = "armdiags" + arm_param.test_data_path = machine_paths["arm_test"] + arm_param.test_name = "e3sm_v2" + # arm_param.test_start_yr = "1996" + # arm_param.test_end_yr = "2010" + arm_param.test_start_yr = "1985" + arm_param.test_end_yr = "2014" + # For model vs obs, the ref start and end year can be any four digit strings. + # For now, will use all available years form obs + arm_param.ref_start_yr = "0001" + arm_param.ref_end_yr = "0001" + + tc_param = TCAnalysisParameter() + tc_param.reference_data_path = machine_paths["tc_obs"] + tc_param.test_data_path = machine_paths["tc_test"] + tc_param.short_test_name = "e3sm_v2" + tc_param.test_start_yr = "0051" + tc_param.test_end_yr = "0060" + # For model vs obs, the ref start and end year can be any four digit strings. + # For now, use all available years form obs by default. + tc_param.ref_start_yr = "1979" + tc_param.ref_end_yr = "2018" + + ac_param = ACzonalmeanParameter() + zm_param = ZonalMean2dStratosphereParameter() + + mp_param = MPpartitionParameter() + # mp_param.reference_data_path = machine_paths["obs_ts"] + mp_param.test_data_path = machine_paths["test_ts"] + mp_param.short_test_name = "e3sm_v2" + mp_param.test_start_yr = "0051" + mp_param.test_end_yr = "0060" + + runner.sets_to_run = [set_name] + + runner.run_diags( + [ + param, + zm_param, + ac_param, + enso_param, + qbo_param, + ts_param, + dc_param, + streamflow_param, + arm_param, + tc_param, + mp_param, + ] + ) + + return param.results_dir + + +def _get_machine_paths() -> MachinePaths: + """Returns the paths on the machine that are required to run e3sm_diags. + + Returns + ------- + MachinePaths + A dictionary of paths on the machine, with the key being the path type + and the value being the absolute path string. + """ + # Get the current machine's configuration info. + machine_info = MachineInfo() + machine = machine_info.machine + + if machine not in [ + "anvil", + "chrysalis", + "compy", + "pm-cpu", + "cori-haswell", + "cori-knl", + ]: + raise ValueError(f"e3sm_diags is not supported on this machine ({machine}).") + + # Path to the HTML outputs for the current user. + web_portal_base_path = machine_info.config.get("web_portal", "base_path") + html_path = f"{web_portal_base_path}/{machine_info.username}/" + + # Path to the reference data directory. + diags_base_path = machine_info.diagnostics_base + ref_data_dir = f"{diags_base_path}/observations/Atm" + + # Paths to the test data directories. + test_data_dir, test_data_dir2 = _get_test_data_dirs(machine) + + # Construct the paths required by e3sm_diags using the base paths above. + machine_paths: MachinePaths = { + "html_path": html_path, + "obs_climo": f"{ref_data_dir}/climatology", + "test_climo": f"{test_data_dir}/climatology/rgr/", + "obs_ts": f"{ref_data_dir}/time-series/", + "test_ts": f"{test_data_dir}/time-series/rgr/", + "dc_obs_climo": f"{ref_data_dir}/climatology", + "dc_test_climo": f"{test_data_dir}/diurnal_climatology/rgr", + "arm_obs": f"{ref_data_dir}/arm-diags-data/", + "arm_test": f"{test_data_dir2}/arm-diags-data/", + "tc_obs": f"{ref_data_dir}/tc-analysis/", + "tc_test": f"{test_data_dir}/tc-analysis/", + } + + return machine_paths + + +def _get_test_data_dirs(machine: str) -> Tuple[str, str]: + """Get the directories for test data based on the machine. + + The second path is for using the high frequency grid box output at ARM sites + from another simulation when the output is available. + + Parameters + ---------- + machine : str + The name of the machine. + + Returns + ------- + Tuple[str, str] + A tuple of two strings, each representing a test data directory path. + """ + test_data_dirs = None + + # TODO: Update this function to use `mache` after the directories are updated. + if machine in ["chrysalis", "anvil"]: + base = "/lcrc/group/e3sm/public_html/e3sm_diags_test_data/postprocessed_e3sm_v2_data_for_e3sm_diags" + elif machine in ["compy"]: + base = "/compyfs/e3sm_diags_data/postprocessed_e3sm_v2_data_for_e3sm_diags" + elif machine in ["cori-haswell", "cori-knl", "pm-cpu"]: + base = "/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags" + + test_data_dirs = ( + f"{base}/20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis", + # f"{base}/20210719.PhaseII.F20TR-P3.NGD.ne30pg2.compy", + f"{base}/20221103.v2.LR.amip.NGD_v3atm.chrysalis", + ) + + return test_data_dirs # type: ignore + + +if __name__ == "__main__": + run_set() diff --git a/auxiliary_tools/cdat_regression_testing/template_run_script.py b/auxiliary_tools/cdat_regression_testing/template_run_script.py new file mode 100644 index 000000000..683724496 --- /dev/null +++ b/auxiliary_tools/cdat_regression_testing/template_run_script.py @@ -0,0 +1,41 @@ +""" +The template run script used for generating results on your development branch. + +Steps: +1. Activate your conda dev env for your branch +2. `make install` to install the latest version of your branch code into the env +3. Copy this script into `auxiliary_tools/cdat_regression_testing/-` +4. Update `SET_DIR` string variable +5. Update `SET_NAME` string variable. + - Options include: "lat_lon", "zonal_mean_xy", "zonal_mean_2d", + "zonal_mean_2d_stratosphere", "polar", "cosp_histogram", + "meridional_mean_2d", "annual_cycle_zonal_mean", "enso_diags", "qbo", + "area_mean_time_series", "diurnal_cycle", "streamflow", "arm_diags", + "tc_analysis", "aerosol_aeronet", "aerosol_budget", "mp_partition", +6. Update `SAVE_NETCDF` boolean variable. + - Set to True if your set does not produce metrics `.json` files, such as + cosp_histogram which only calculates spatial average and saves them to + netCDF files. + - Set to False if your set produces metrics `.json` files and you only + need to compare those in regression testing. +7. Run this script + - Make sure to run this command on NERSC perlmutter cpu: + `salloc --nodes 1 --qos interactive --time 01:00:00 --constraint cpu --account=e3sm + conda activate ` + - python auxiliary_tools/cdat_regression_testing/ +8. Make a copy of the CDAT regression testing notebook in the same directory + as this script and follow the instructions there to start testing. +""" +from auxiliary_tools.cdat_regression_testing.base_run_script import run_set + +# TODO: Update SETS_TO_RUN to the single set you are refactoring. +# Example: "lat_lon" +SET_NAME = "" +# TODO: Update SET_DIR to . This string gets appended +# to the base results_dir, "/global/cfs/projectdirs/e3sm/e3sm_diags_cdat_test/". +# Example: "671-lat-lon" +SET_DIR = "" +# TODO: Update SET_TO_NETCDF as needed. +SAVE_NETCDF = True + +run_set(SET_NAME, SET_DIR, SAVE_NETCDF)