E3SM-Project · tomvothecoder · Feb 15, 2024 · Oct 10, 2023 · Feb 15, 2024 · Feb 15, 2024
diff --git a/..._regression_testing/654-zonal_mean_xy/654-zonal_mean_xy_cdat_regression_test_netcdf.ipynb b/..._regression_testing/654-zonal_mean_xy/654-zonal_mean_xy_cdat_regression_test_netcdf.ipynb
diff --git a/auxiliary_tools/cdat_regression_testing/654-zonal_mean_xy/654-zonal_mean_xy_run_script.py b/auxiliary_tools/cdat_regression_testing/654-zonal_mean_xy/654-zonal_mean_xy_run_script.py
@@ -0,0 +1,6 @@
+from auxiliary_tools.cdat_regression_testing.base_run_script import run_set
+
+SET_NAME = "zonal_mean_xy"
+SET_DIR = "654-zonal_mean_xy"
+
+run_set(SET_NAME, SET_DIR)
diff --git a/...testing/654-zonal_mean_xy/debug_654-zonal_mean_xy_cdat_regression_test_netcdf_debug.ipynb b/...testing/654-zonal_mean_xy/debug_654-zonal_mean_xy_cdat_regression_test_netcdf_debug.ipynb
diff --git a/...ary_tools/cdat_regression_testing/654-zonal_mean_xy/debug_654-zonal_mean_xy_run_script.py b/...ary_tools/cdat_regression_testing/654-zonal_mean_xy/debug_654-zonal_mean_xy_run_script.py
@@ -0,0 +1,8 @@
+from auxiliary_tools.cdat_regression_testing.base_run_script import run_set
+
+SET_NAME = "zonal_mean_xy"
+SET_DIR = "debug-654-zonal_mean_xy"
+# CFG_PATH = "auxiliary_tools/cdat_regression_testing/654-zonal_mean_xy/debug_zonal_mean_xy_model_vs_obs.cfg"
+
+run_set(SET_NAME, SET_DIR)
+# run_set(SET_NAME, SET_DIR, CFG_PATH, multiprocessing=False)
diff --git a/...iary_tools/cdat_regression_testing/654-zonal_mean_xy/debug_zonal_mean_xy_model_vs_obs.cfg b/...iary_tools/cdat_regression_testing/654-zonal_mean_xy/debug_zonal_mean_xy_model_vs_obs.cfg
@@ -0,0 +1,64 @@
+[#]
+sets = ["zonal_mean_xy"]
+case_id = "Cloud ISCCP"
+variables = ["CLDTOT_TAU1.3_ISCCP"]
+ref_name = "ISCCPCOSP"
+reference_name = "ISCCP"
+seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
+
+[#]
+sets = ["zonal_mean_xy"]
+case_id = "Cloud ISCCP"
+variables = ["CLDTOT_TAU1.3_9.4_ISCCP"]
+ref_name = "ISCCPCOSP"
+reference_name = "ISCCP"
+seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
+
+[#]
+sets = ["zonal_mean_xy"]
+case_id = "Cloud ISCCP"
+variables = ["CLDTOT_TAU9.4_ISCCP"]
+ref_name = "ISCCPCOSP"
+reference_name = "ISCCP"
+seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
+
+
+[#]
+sets = ["zonal_mean_xy"]
+case_id = "Cloud MODIS"
+variables = ["CLDTOT_TAU1.3_MODIS"]
+ref_name = "MODISCOSP"
+reference_name = "MODIS Simulator"
+seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
+
+[#]
+sets = ["zonal_mean_xy"]
+case_id = "Cloud MODIS"
+variables = ["CLDTOT_TAU1.3_9.4_MODIS"]
+ref_name = "MODISCOSP"
+reference_name = "MODIS Simulator"
+seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
+
+[#]
+sets = ["zonal_mean_xy"]
+case_id = "Cloud MODIS"
+variables = ["CLDTOT_TAU9.4_MODIS"]
+ref_name = "MODISCOSP"
+reference_name = "MODIS Simulator"
+seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
+
+[#]
+sets = ["zonal_mean_xy"]
+case_id = "Cloud MODIS"
+variables = ["CLDHGH_TAU1.3_MODIS"]
+ref_name = "MODISCOSP"
+reference_name = "MODIS Simulator"
+seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
+
+[#]
+sets = ["zonal_mean_xy"]
+case_id = "Cloud MODIS"
+variables = ["CLDHGH_TAU1.3_9.4_MODIS"]
+ref_name = "MODISCOSP"
+reference_name = "MODIS Simulator"
+seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
diff --git a/auxiliary_tools/cdat_regression_testing/654-zonal_mean_xy/test_refactor/diags.cfg b/auxiliary_tools/cdat_regression_testing/654-zonal_mean_xy/test_refactor/diags.cfg
@@ -0,0 +1,58 @@
+[#]
+sets = ["zonal_mean_xy"]
+case_id = "CERES-EBAF-surface-v4.1"
+variables = ["FLNS"]
+ref_name = "ceres_ebaf_surface_v4.1"
+reference_name = "CERES-EBAF v4.1"
+seasons = ["ANN"]
+
+[#]
+sets = ["zonal_mean_xy"]
+case_id = "MERRA2"
+variables = ["TMQ"]
+ref_name = "MERRA2"
+reference_name = "MERRA2 Reanalysis"
+seasons = ["ANN"]
+
+#[#]
+#sets = ["zonal_mean_xy"]
+#case_id = "MERRA2"
+#variables = ["TREFHT"]
+#regions = ["global"]
+#ref_name = "MERRA2"
+#reference_name = "MERRA2 Reanalysis"
+#seasons = ["ANN"]
+#[#]
+#sets = ["zonal_mean_xy"]
+#case_id = "GPCP_OAFLux"
+#variables = ["PminusE"]
+#ref_name = "GPCP_OAFLux"
+#reference_name = "PRECT(GPCP) minus QFLX(OAFLux)"
+#seasons = ["ANN"]
+##
+#[#]
+#sets = ["zonal_mean_xy"]
+#case_id = "COREv2_Flux"
+#variables = ["PminusE"]
+#ref_name = "COREv2_Flux"
+#reference_name = "COREv2_Flux"
+#seasons = ["ANN"]
+#
+#
+#[#]
+#sets = ["zonal_mean_xy"]
+#case_id = "GPCP_v3.2"
+#variables = ["PRECT"]
+#ref_name = "GPCP_v3.2"
+#reference_name = "GPCP v3.2"
+#seasons = ["ANN"]
+#regions = ["global"]
+#
+#[#]
+#sets = ["zonal_mean_xy"]
+#case_id = "ERA5"
+#variables = ["T"]
+#ref_name = "ERA5"
+#reference_name = "ERA5 Reanalysis"
+#seasons = ["ANN"]
+#plevs = [850.0]
diff --git a/auxiliary_tools/cdat_regression_testing/654-zonal_mean_xy/test_refactor/run_zonal_mean_xy.py b/auxiliary_tools/cdat_regression_testing/654-zonal_mean_xy/test_refactor/run_zonal_mean_xy.py
@@ -0,0 +1,42 @@
+import os
+import sys
+
+from e3sm_diags.parameter.core_parameter import CoreParameter
+from e3sm_diags.run import runner
+
+param = CoreParameter()
+
+# Location of the data.
+param.reference_data_path = (
+    "/global/cfs/cdirs/e3sm/e3sm_diags/obs_for_e3sm_diags/climatology/"
+)
+param.test_data_path = (
+    "/global/cfs/cdirs/e3sm/e3sm_diags/postprocessed_e3sm_v2_data_for_e3sm_diags/20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis/climatology/rgr"
+)
+# Name of the test model data, used to find the climo files.
+param.test_name = "20210528.v2rc3e.piControl.ne30pg2_EC30to60E2r2.chrysalis"
+# An optional, shorter name to be used instead of the test_name.
+param.short_test_name = "v2rc3e"
+
+# What plotsets to run the diags on.
+#param.sets = ["lat_lon"]
+# Name of the folder where the results are stored.
+# Change `prefix` to use your directory.
+prefix = "/global/cfs/cdirs/e3sm/www/chengzhu/test_e3sm_refactor"
+param.results_dir = os.path.join(prefix, "ex5_model_to_obs")
+
+# Below are more optional arguments.
+
+# Title of the difference plots.
+param.diff_title = "Model - Obs."
+# Save the netcdf files for each of the ref, test, and diff plot.
+param.save_netcdf = True
+# For running with multiprocessing.
+# param.multiprocessing = True
+# param.num_workers = 32
+# Use the specified `.cfg` file for debugging
+CFG_PATH = "examples/test_refactor/diags.cfg"
+sys.argv.extend(["-d", CFG_PATH])
+
+runner.sets_to_run = ["zonal_mean_xy"]
+runner.run_diags([param])
diff --git a/auxiliary_tools/cdat_regression_testing/base_run_script.py b/auxiliary_tools/cdat_regression_testing/base_run_script.py
@@ -5,6 +5,7 @@
 # flake8: noqa E501
 
 import os
+import sys
 from typing import List, Tuple, TypedDict
 
 from mache import MachineInfo
@@ -44,7 +45,15 @@ class MachinePaths(TypedDict):
     tc_test: str
 
 
-def run_set(set_name: str, set_dir: str):
+def run_set(
+    set_name: str,
+    set_dir: str,
+    cfg_path: str | None = None,
+    multiprocessing: bool = True,
+):
+    if cfg_path is not None:
+        sys.argv.extend(["--diags", cfg_path])
+
     machine_paths: MachinePaths = _get_machine_paths()
 
     param = CoreParameter()
@@ -58,7 +67,7 @@ def run_set(set_name: str, set_dir: str):
     ]  # Default setting: seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
 
     param.results_dir = os.path.join(BASE_RESULTS_DIR, set_dir)
-    param.multiprocessing = True
+    param.multiprocessing = multiprocessing
     param.num_workers = 5
 
     # Make sure to save the netCDF files to compare outputs.
@@ -251,7 +260,3 @@ def _get_test_data_dirs(machine: str) -> Tuple[str, str]:
     )
 
     return test_data_dirs  # type: ignore
-
-
-if __name__ == "__main__":
-    run_set()
diff --git a/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test_json.ipynb b/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test_json.ipynb
@@ -26,7 +26,7 @@
     "(dev and `main` branches).\n",
     "\n",
     "1. Make a copy of this notebook under `auxiliary_tools/cdat_regression_testing/<DIR_NAME>`.\n",
-    "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" pandas matplotlib-base ipykernel`\n",
+    "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" xarray netcdf4 dask pandas matplotlib-base ipykernel`\n",
     "3. Run `mamba activate cdat_regression_test`\n",
     "4. Update `DEV_PATH` and `MAIN_PATH` in the copy of your notebook.\n",
     "5. Run all cells IN ORDER.\n",

diff --git a/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test_netcdf.ipynb b/auxiliary_tools/cdat_regression_testing/template_cdat_regression_test_netcdf.ipynb
@@ -20,7 +20,7 @@
     "(dev and `main` branches).\n",
     "\n",
     "1. Make a copy of this notebook under `auxiliary_tools/cdat_regression_testing/<DIR_NAME>`.\n",
-    "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" xarray dask pandas matplotlib-base ipykernel`\n",
+    "2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" xarray netcdf4 dask pandas matplotlib-base ipykernel`\n",
     "3. Run `mamba activate cdat_regression_test`\n",
     "4. Update `SET_DIR` and `SET_NAME` in the copy of your notebook.\n",
     "5. Run all cells IN ORDER.\n",

diff --git a/auxiliary_tools/cdat_regression_testing/template_run_script.py b/auxiliary_tools/cdat_regression_testing/template_run_script.py
@@ -12,13 +12,23 @@
      "meridional_mean_2d", "annual_cycle_zonal_mean", "enso_diags", "qbo",
      "area_mean_time_series", "diurnal_cycle", "streamflow", "arm_diags",
      "tc_analysis", "aerosol_aeronet", "aerosol_budget", "mp_partition",
-6. Run this script
-   - Make sure to run this command on NERSC perlmutter cpu:
-    `salloc --nodes 1 --qos interactive --time 01:00:00 --constraint cpu --account=e3sm
-    conda activate <NAME-OF-DEV-ENV>`
-   - python auxiliary_tools/cdat_regression_testing/<ISSUE-<SET_NAME>
+
+6. Run this script as a Python module
+   - `auxiliary_tools` is not included in `setup.py`, so `-m` is required
+     to run the script as a Python module
+   - Command: python -m auxiliary_tools.cdat_regression_testing.<ISSUE>-<SET_NAME>.<SCRIPT-NAME>
+   - Example: python -m auxiliary_tools.cdat_regression_testing.660_cosp_histogram.run_script
+
 7. Make a copy of the CDAT regression testing notebook in the same directory
    as this script and follow the instructions there to start testing.
+
+8. <OPTIONAL> Update `CFG_PATH` to a custom cfg file to debug specific variables.
+   - It is useful to create a custom cfg based on the default diags to debug
+     specific variables that are running into problems.
+   - For example, copy `zonal_mean_xy_model_vs_model.cfg` into the same directory
+     as the copy of this script, then modify it to specific variables. Afterwards
+     update `CFG_PATH` to the path of that .cfg file.
+   - Tip: Use VS Code to step through the code with the Python debugger.
 """
 from auxiliary_tools.cdat_regression_testing.base_run_script import run_set
 
@@ -30,4 +40,12 @@
 # Example: "671-lat-lon"
 SET_DIR = ""
 
-run_set(SET_NAME, SET_DIR)
+# TODO: <OPTIONAL> UPDATE CFG_PATH if using a custom cfg file for debugging.
+# Example: "auxiliary_tools/cdat_regression_testing/654_zonal_mean_xy.cfg"
+CFG_PATH: str | None = None
+
+# TODO: <OPTIONAL> Update MULTIPROCESSING based on whether to run in parallel or
+# serial. For debugging purposes, set to False to run serially.
+MULTIPROCESSING = True
+
+run_set(SET_NAME, SET_DIR, CFG_PATH, MULTIPROCESSING)
diff --git a/e3sm_diags/derivations/derivations.py b/e3sm_diags/derivations/derivations.py
@@ -87,6 +87,7 @@
             ),
             (("pr",), lambda pr: qflxconvert_units(rename(pr))),
             (("PRECC", "PRECL"), lambda precc, precl: prect(precc, precl)),
+            (("sat_gauge_precip",), rename),
         ]
     ),
     "PRECST": OrderedDict(
@@ -767,7 +768,7 @@
                     "QFLX",
                 ),
                 lambda precc, precl, qflx: pminuse_convert_units(
-                    prect(precc, precl) - pminuse_convert_units(qflx)
+                    prect(precc, precl) - qflxconvert_units(qflx)
                 ),
             ),
             (

diff --git a/e3sm_diags/derivations/formulas.py b/e3sm_diags/derivations/formulas.py
@@ -77,15 +77,16 @@ def qflx_convert_to_lhflx_approxi(var: xr.DataArray):
 
 
 def pminuse_convert_units(var: xr.DataArray):
-    if (
-        var.attrs["units"] == "kg/m2/s"
-        or var.attrs["units"] == "kg m-2 s-1"
-        or var.attrs["units"] == "kg/s/m^2"
-    ):
-        # need to find a solution for units not included in udunits
-        # var = convert_units( var, 'kg/m2/s' )
-        var = var * 3600.0 * 24  # convert to mm/day
-        var.attrs["units"] = "mm/day"
+    if hasattr(var, "units"):
+        if (
+            var.attrs["units"] == "kg/m2/s"
+            or var.attrs["units"] == "kg m-2 s-1"
+            or var.attrs["units"] == "kg/s/m^2"
+        ):
+            # need to find a solution for units not included in udunits
+            # var = convert_units( var, 'kg/m2/s' )
+            var = var * 3600.0 * 24  # convert to mm/day
+    var.attrs["units"] = "mm/day"
     var.attrs["long_name"] = "precip. flux - evap. flux"
     return var