Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor 654 zonal mean xy #752

Merged
merged 12 commits into from
Feb 15, 2024

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from auxiliary_tools.cdat_regression_testing.base_run_script import run_set

SET_NAME = "zonal_mean_xy"
SET_DIR = "654-zonal_mean_xy"

run_set(SET_NAME, SET_DIR)

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from auxiliary_tools.cdat_regression_testing.base_run_script import run_set

SET_NAME = "zonal_mean_xy"
SET_DIR = "debug-654-zonal_mean_xy"
# CFG_PATH = "auxiliary_tools/cdat_regression_testing/654-zonal_mean_xy/debug_zonal_mean_xy_model_vs_obs.cfg"

run_set(SET_NAME, SET_DIR)
# run_set(SET_NAME, SET_DIR, CFG_PATH, multiprocessing=False)
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
[#]
sets = ["zonal_mean_xy"]
case_id = "Cloud ISCCP"
variables = ["CLDTOT_TAU1.3_ISCCP"]
ref_name = "ISCCPCOSP"
reference_name = "ISCCP"
seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]

[#]
sets = ["zonal_mean_xy"]
case_id = "Cloud ISCCP"
variables = ["CLDTOT_TAU1.3_9.4_ISCCP"]
ref_name = "ISCCPCOSP"
reference_name = "ISCCP"
seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]

[#]
sets = ["zonal_mean_xy"]
case_id = "Cloud ISCCP"
variables = ["CLDTOT_TAU9.4_ISCCP"]
ref_name = "ISCCPCOSP"
reference_name = "ISCCP"
seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]


[#]
sets = ["zonal_mean_xy"]
case_id = "Cloud MODIS"
variables = ["CLDTOT_TAU1.3_MODIS"]
ref_name = "MODISCOSP"
reference_name = "MODIS Simulator"
seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]

[#]
sets = ["zonal_mean_xy"]
case_id = "Cloud MODIS"
variables = ["CLDTOT_TAU1.3_9.4_MODIS"]
ref_name = "MODISCOSP"
reference_name = "MODIS Simulator"
seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]

[#]
sets = ["zonal_mean_xy"]
case_id = "Cloud MODIS"
variables = ["CLDTOT_TAU9.4_MODIS"]
ref_name = "MODISCOSP"
reference_name = "MODIS Simulator"
seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]

[#]
sets = ["zonal_mean_xy"]
case_id = "Cloud MODIS"
variables = ["CLDHGH_TAU1.3_MODIS"]
ref_name = "MODISCOSP"
reference_name = "MODIS Simulator"
seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]

[#]
sets = ["zonal_mean_xy"]
case_id = "Cloud MODIS"
variables = ["CLDHGH_TAU1.3_9.4_MODIS"]
ref_name = "MODISCOSP"
reference_name = "MODIS Simulator"
seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
17 changes: 11 additions & 6 deletions auxiliary_tools/cdat_regression_testing/base_run_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# flake8: noqa E501

import os
import sys
from typing import List, Tuple, TypedDict

from mache import MachineInfo
Expand Down Expand Up @@ -44,7 +45,15 @@ class MachinePaths(TypedDict):
tc_test: str


def run_set(set_name: str, set_dir: str):
def run_set(
set_name: str,
set_dir: str,
cfg_path: str | None = None,
multiprocessing: bool = True,
):
if cfg_path is not None:
sys.argv.extend(["--diags", cfg_path])

machine_paths: MachinePaths = _get_machine_paths()

param = CoreParameter()
Expand All @@ -58,7 +67,7 @@ def run_set(set_name: str, set_dir: str):
] # Default setting: seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]

param.results_dir = os.path.join(BASE_RESULTS_DIR, set_dir)
param.multiprocessing = True
param.multiprocessing = multiprocessing
param.num_workers = 5

# Make sure to save the netCDF files to compare outputs.
Expand Down Expand Up @@ -251,7 +260,3 @@ def _get_test_data_dirs(machine: str) -> Tuple[str, str]:
)

return test_data_dirs # type: ignore


if __name__ == "__main__":
run_set()
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"(dev and `main` branches).\n",
"\n",
"1. Make a copy of this notebook under `auxiliary_tools/cdat_regression_testing/<DIR_NAME>`.\n",
"2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" xarray dask pandas matplotlib-base ipykernel`\n",
"2. Run `mamba create -n cdat_regression_test -y -c conda-forge \"python<3.12\" xarray netcdf4 dask pandas matplotlib-base ipykernel`\n",
"3. Run `mamba activate cdat_regression_test`\n",
"4. Update `SET_DIR` and `SET_NAME` in the copy of your notebook.\n",
"5. Run all cells IN ORDER.\n",
Expand Down
30 changes: 24 additions & 6 deletions auxiliary_tools/cdat_regression_testing/template_run_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,23 @@
"meridional_mean_2d", "annual_cycle_zonal_mean", "enso_diags", "qbo",
"area_mean_time_series", "diurnal_cycle", "streamflow", "arm_diags",
"tc_analysis", "aerosol_aeronet", "aerosol_budget", "mp_partition",
6. Run this script
- Make sure to run this command on NERSC perlmutter cpu:
`salloc --nodes 1 --qos interactive --time 01:00:00 --constraint cpu --account=e3sm
conda activate <NAME-OF-DEV-ENV>`
- python auxiliary_tools/cdat_regression_testing/<ISSUE-<SET_NAME>

6. Run this script as a Python module
- `auxiliary_tools` is not included in `setup.py`, so `-m` is required
to run the script as a Python module
- Command: python -m auxiliary_tools.cdat_regression_testing.<ISSUE>-<SET_NAME>.<SCRIPT-NAME>
- Example: python -m auxiliary_tools.cdat_regression_testing.660_cosp_histogram.run_script

7. Make a copy of the CDAT regression testing notebook in the same directory
as this script and follow the instructions there to start testing.

8. <OPTIONAL> Update `CFG_PATH` to a custom cfg file to debug specific variables.
- It is useful to create a custom cfg based on the default diags to debug
specific variables that are running into problems.
- For example, copy `zonal_mean_xy_model_vs_model.cfg` into the same directory
as the copy of this script, then modify it to specific variables. Afterwards
update `CFG_PATH` to the path of that .cfg file.
- Tip: Use VS Code to step through the code with the Python debugger.
"""
from auxiliary_tools.cdat_regression_testing.base_run_script import run_set

Expand All @@ -30,4 +40,12 @@
# Example: "671-lat-lon"
SET_DIR = ""

run_set(SET_NAME, SET_DIR)
# TODO: <OPTIONAL> UPDATE CFG_PATH if using a custom cfg file for debugging.
# Example: "auxiliary_tools/cdat_regression_testing/654_zonal_mean_xy.cfg"
CFG_PATH: str | None = None

# TODO: <OPTIONAL> Update MULTIPROCESSING based on whether to run in parallel or
# serial. For debugging purposes, set to False to run serially.
MULTIPROCESSING = True

run_set(SET_NAME, SET_DIR, CFG_PATH, MULTIPROCESSING)
3 changes: 2 additions & 1 deletion e3sm_diags/derivations/derivations.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
),
(("pr",), lambda pr: qflxconvert_units(rename(pr))),
(("PRECC", "PRECL"), lambda precc, precl: prect(precc, precl)),
(("sat_gauge_precip",), rename),
]
),
"PRECST": OrderedDict(
Expand Down Expand Up @@ -767,7 +768,7 @@
"QFLX",
),
lambda precc, precl, qflx: pminuse_convert_units(
prect(precc, precl) - pminuse_convert_units(qflx)
prect(precc, precl) - qflxconvert_units(qflx)
),
),
(
Expand Down
19 changes: 10 additions & 9 deletions e3sm_diags/derivations/formulas.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,16 @@ def qflx_convert_to_lhflx_approxi(var: xr.DataArray):


def pminuse_convert_units(var: xr.DataArray):
if (
var.attrs["units"] == "kg/m2/s"
or var.attrs["units"] == "kg m-2 s-1"
or var.attrs["units"] == "kg/s/m^2"
):
# need to find a solution for units not included in udunits
# var = convert_units( var, 'kg/m2/s' )
var = var * 3600.0 * 24 # convert to mm/day
var.attrs["units"] = "mm/day"
if hasattr(var, "units"):
tomvothecoder marked this conversation as resolved.
Show resolved Hide resolved
if (
var.attrs["units"] == "kg/m2/s"
or var.attrs["units"] == "kg m-2 s-1"
or var.attrs["units"] == "kg/s/m^2"
):
# need to find a solution for units not included in udunits
# var = convert_units( var, 'kg/m2/s' )
var = var * 3600.0 * 24 # convert to mm/day
var.attrs["units"] = "mm/day"
var.attrs["long_name"] = "precip. flux - evap. flux"
return var

Expand Down
23 changes: 14 additions & 9 deletions e3sm_diags/derivations/formulas_cosp.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,19 +116,20 @@ def cosp_bin_sum(target_var_key: str, var: xr.DataArray) -> xr.DataArray:
# 4. Get tau range and lim.
tau_range, tau_lim = _get_tau_subset_range_and_str(tau, tau_adj_range)

# 4. Get the axes mask conditional and subset the variable on it.
# 5. Get the axes mask conditional and subset the variable on it.
cond = (
(prs >= prs_range[0])
& (prs <= prs_range[-1])
& (tau >= tau_range[0])
& (tau <= tau_range[-1])
)

var_sub = var.where(cond, drop=True)

# 5. Sum on axis=0 and axis=1 (tau and prs)
# 7. Sum on axis=0 and axis=1 (tau and prs)
var_sum = var_sub.sum(dim=[prs.name, tau.name])
Copy link
Collaborator

@tomvothecoder tomvothecoder Feb 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

RE: Issue 1 in #752 (comment)

The changes in cosp_bin_sum() now fix the missing outputs for the remaining variables: ISCCPCOSP-CLDTOT_TAU1.3_9.4_ISCCP, ISCCPCOSP-CLDTOT_TAU1.3_ISCCP, MODISCOSP-CLDHGH/TOT_TAU1.3_9.4_MODIS, MODISCOSP-CLDHGH/TOT_TAU1.3_MODIS

Root Cause

  1. The prs_range can be in descending order (e.g., (90000.0, 9000.0))
  2. This caused the condition (cond) to not match any of the data values when subsetting with .where()
    • The condition being used is >=90000.0 and <=9000.0 which is incorrect
    • The correct condition should to be the other way around, >= 9000 and <= 90000
  3. Since there are no matching values, the below numpy error is thrown with drop=True (drop all nan values that don't match):
    • ValueError: Cannot apply_along_axis when any iteration dimensions are 0

Solution

Sort prs_range and tau_range before subsetting

Side-note

The CDMS2 version of sub-setting a variable is pretty confusing. Notice below how prs_low and prs_high are used to subset, but prs_low is actually the larger value with my example, (90000.0, 9000.0)

I think CDMS2 swaps both values around and subsets to include all values within the range. It uses the correct sub-setting condition, >= 9000 and <= 90000.

if cld.id == "FISCCP1_COSP": # ISCCP model
cld_bin = cld(cosp_prs=(prs_low, prs_high), cosp_tau=(tau_low, tau_high))
simulator = "ISCCP"


# 6. Set the variable's long name based on the original variable's name and
# 8. Set the variable's long name based on the original variable's name and
# prs ranges.
var_key = str(var.name)
simulation = _get_simulation_str(var_key)
Expand All @@ -137,7 +138,7 @@ def cosp_bin_sum(target_var_key: str, var: xr.DataArray) -> xr.DataArray:
if simulation is not None and prs_cloud_level is not None:
var_sum.attrs["long_name"] = f"{simulation}: {prs_cloud_level} with {tau_lim}"

# 7. Convert units to %.
# 9. Convert units to %.
final_var = convert_units(var_sum, "%")

return final_var
Expand Down Expand Up @@ -215,18 +216,20 @@ def _get_prs_subset_range(
A tuple of the (min, max) for the prs subset range.
"""
act_range = (prs[0].item(), prs[-1].item())
final_range: List[float] = []
range: List[float] = []

for act_val, adj_val in zip(act_range, prs_adj_range):
if adj_val is not None:
if prs.name in PRS_UNIT_ADJ_MAP.keys() and prs.max().item() > 1000:
adj_val = adj_val * PRS_UNIT_ADJ_MAP[str(prs.name)]

final_range.append(adj_val)
range.append(adj_val)
else:
final_range.append(act_val)
range.append(act_val)

final_range = tuple(sorted(range))

return tuple(final_range) # type: ignore
return final_range # type: ignore


def _get_tau_subset_range_and_str(
Expand Down Expand Up @@ -260,7 +263,9 @@ def _get_tau_subset_range_and_str(
range = (adj_min, adj_max)
range_str = f"{adj_min} < tau < {adj_max}"

return range, range_str
final_range = tuple(sorted(range))

return final_range, range_str # type: ignore


def _get_simulation_str(var_key: str) -> Optional[str]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ sets = ["zonal_mean_xy"]
case_id = "GPCP_v3.2"
variables = ["PRECT"]
ref_name = "GPCP_v3.2"
reference_name = "GPCP v2.2"
reference_name = "GPCP v3.2"
seasons = ["ANN", "DJF", "MAM", "JJA", "SON"]
regions = ["global"]

Expand Down
Loading
Loading