Calculating and Caching MOC#

%load_ext autoreload
%autoreload 2

%matplotlib inline

%set_env CESMDATAROOT=/glade/scratch/eromashkova/tmp
import os
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr

import dask
from dask.distributed import Client
import intake

import pop_tools

import nc_time_axis
import util
env: CESMDATAROOT=/glade/scratch/eromashkova/tmp

Reading in parameters#

### this cell will get parametrized

path_to_cat = "placeholder"
subset_kwargs = {}
asset_path = "placeholder"


### this is here so it can be run alone
#path_to_cat = "temp_data/ssh_cat_subset.json"
# Parameters
casename = "gcp-cases"
path_to_cat = "/glade/u/home/eromashkova/codes/my-cesm-experiment-extended/notebooks/temp_data/gcp-cases_subset.json"
cluster_scheduler_address = "tcp://10.12.206.47:39726"
subset_kwargs = {"frequency": "month_1", "component": "ocn", "variable": "MOC"}
asset_path = "/glade/u/home/eromashkova/codes/my-cesm-experiment-extended/notebooks/cache_data/gcp-cases_POP_MOC_extract_cat_1.nc"

Connecting to cluster#

if cluster_scheduler_address is None:
    cluster, client = util.get_ClusterClient()
    cluster.scale(12)
else:
    client = Client(cluster_scheduler_address)
client

Client

Client-9f1d16f9-a1a9-11ed-92ca-3cecef1aca46

Connection method: Direct
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/eromashkova/proxy/8787/status

Scheduler Info

Scheduler

Scheduler-7cfddfff-723a-4eb5-8b97-437cfe055d0a

Comm: tcp://10.12.206.47:39726 Workers: 0
Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/eromashkova/proxy/8787/status Total threads: 0
Started: Just now Total memory: 0 B

Workers

Reading in data with subsetting#

print(path_to_cat)
dset_dict = (intake.open_esm_datastore(path_to_cat).search(**subset_kwargs)).to_dataset_dict()
/glade/u/home/eromashkova/codes/my-cesm-experiment-extended/notebooks/temp_data/gcp-cases_subset.json

--> The keys in the returned dictionary of datasets are constructed as follows:
	'component.stream.case'
100.00% [1/1 00:18<00:00]
ds_pop = dset_dict[list(dset_dict.keys())[0]] #grabbing an arbitrary element of dset_dict

ds_pop_grid = pop_tools.get_grid('POP_gx1v7')

ds_pop["TLAT"] = ds_pop_grid.TLAT
ds_pop["TLONG"] = ds_pop_grid.TLONG
ds_pop["ULAT"] = ds_pop_grid.ULAT
ds_pop["ULONG"] = ds_pop_grid.ULONG
ds_pop["KMT"] = ds_pop_grid.KMT
j_26n = np.abs(ds_pop.lat_aux_grid - 26.0).argmin().values
j_45n = np.abs(ds_pop.lat_aux_grid - 43.0).argmin().values
ds_pop
<xarray.Dataset>
Dimensions:                 (moc_comp: 3, transport_comp: 5, transport_reg: 2,
                             z_t: 60, z_t_150m: 15, z_w: 60, z_w_top: 60,
                             z_w_bot: 60, lat_aux_grid: 395, moc_z: 61,
                             nlat: 384, nlon: 320, time: 4428, d2: 2)
Coordinates: (12/66)
    moc_components          (moc_comp) |S384 dask.array<chunksize=(3,), meta=np.ndarray>
    transport_components    (transport_comp) |S384 dask.array<chunksize=(5,), meta=np.ndarray>
    transport_regions       (transport_reg) |S384 dask.array<chunksize=(2,), meta=np.ndarray>
  * z_t                     (z_t) float32 500.0 1.5e+03 ... 5.125e+05 5.375e+05
  * z_t_150m                (z_t_150m) float32 500.0 1.5e+03 ... 1.45e+04
  * z_w                     (z_w) float32 0.0 1e+03 2e+03 ... 5e+05 5.25e+05
    ...                      ...
    salinity_factor         float64 -0.00347
    sflux_factor            float64 0.1
    nsurface_t              float64 8.61e+04
    nsurface_u              float64 8.297e+04
    time_bound              (time, d2) object dask.array<chunksize=(300, 2), meta=np.ndarray>
  * time                    (time) object 1653-02-01 00:00:00 ... 2022-01-01 ...
Dimensions without coordinates: moc_comp, transport_comp, transport_reg, nlat,
                                nlon, d2
Data variables:
    MOC                     (time, transport_reg, moc_comp, moc_z, lat_aux_grid) float32 dask.array<chunksize=(300, 2, 3, 61, 395), meta=np.ndarray>
Attributes: (12/20)
    history:                           none
    Conventions:                       CF-1.0; http://www.cgd.ucar.edu/cms/ea...
    time_period_freq:                  month_1
    model_doi_url:                     https://doi.org/10.5065/D67H1H0V
    contents:                          Diagnostic and Prognostic Variables
    source:                            CCSM POP2, the CCSM Ocean Component
    ...                                ...
    intake_esm_attrs:long_name:        Meridional Overturning Circulation
    intake_esm_attrs:units:            Sverdrups
    intake_esm_attrs:vertical_levels:  1
    intake_esm_attrs:frequency:        month_1
    intake_esm_attrs:_data_format_:    netcdf
    intake_esm_dataset_key:            ocn.pop.h.g.e22.GOMIPECOIAF_JRA-1p4-20...
print(ds_pop['transport_regions'].values)
[b'Global Ocean - Marginal Seas'
 b'Atlantic Ocean + Mediterranean Sea + Labrador Sea + GIN Sea + Arctic Ocean + Hudson Bay']
print(ds_pop['moc_components'].values)
[b'Eulerian Mean' b'Eddy-Induced (bolus)' b'Submeso']
AMOC_series_26n = ds_pop.MOC.isel(lat_aux_grid=j_26n,transport_reg=1).sum(dim='moc_comp').sel(moc_z=slice(250e2,6000e2)).max(dim='moc_z')
AMOC_series_45n = ds_pop.MOC.isel(lat_aux_grid=j_45n,transport_reg=1).sum(dim='moc_comp').sel(moc_z=slice(250e2,6000e2)).max(dim='moc_z')
AMOC_series_26n
<xarray.DataArray 'MOC' (time: 4428)>
dask.array<_nanmax_skip-aggregate, shape=(4428,), dtype=float32, chunksize=(300,), chunktype=numpy.ndarray>
Coordinates: (12/34)
    transport_regions       |S384 dask.array<chunksize=(), meta=np.ndarray>
    lat_aux_grid            float32 25.8
    days_in_norm_year       timedelta64[ns] 365 days
    grav                    float64 980.6
    omega                   float64 7.292e-05
    radius                  float64 6.371e+08
    ...                      ...
    fwflux_factor           float64 0.0001
    salinity_factor         float64 -0.00347
    sflux_factor            float64 0.1
    nsurface_t              float64 8.61e+04
    nsurface_u              float64 8.297e+04
  * time                    (time) object 1653-02-01 00:00:00 ... 2022-01-01 ...
#AMOC_series_26n.plot()
#AMOC_series_45n.plot()
ds_26n = xr.Dataset(
    {'MOC': (('time'), AMOC_series_26n.data)},
    coords={'time':ds_pop.time}
)
ds_45n = xr.Dataset(
    {'MOC': (('time'), AMOC_series_45n.data)},
    coords={'time':ds_pop.time}
)
                                          
ds_26n.to_netcdf(asset_path)