Source code for scripts.averaging.create_TEM_files

import xarray as xr
import numpy as np
from scipy import integrate
from numpy import ma
from datetime import date
from pathlib import Path
from glob import glob
from itertools import chain


[docs] def create_TEM_files(adf): """ Calculate the TEM variables and create new netCDF files """ #Notify user that script has started: msg = "\n Generating CAM TEM diagnostics files..." print(f"{msg}\n {'-' * (len(msg)-3)}") #Special ADF variables #CAM simulation variables (these quantities are always lists): case_names = adf.get_cam_info("cam_case_name", required=True) base_name = adf.get_baseline_info("cam_case_name") #Grab h4 history files locations cam_hist_locs = adf.get_cam_info("cam_hist_loc", required=True) #Extract test case years start_years = adf.climo_yrs["syears"] end_years = adf.climo_yrs["eyears"] res = adf.variable_defaults # will be dict of variable-specific plot preferences if "qbo" in adf.plotting_scripts: var_list = ['uzm','epfy','epfz','vtem','wtem', 'psitem','utendepfd','utendvtem','utendwtem'] else: var_list = ['uzm','epfy','epfz','vtem','wtem','psitem','utendepfd'] tem_locs = [] #Grab TEM diagnostics options #---------------------------- #Extract TEM file save locations tem_base_loc = adf.get_baseline_info("cam_tem_loc") tem_case_locs = adf.get_cam_info("cam_tem_loc") #If path not specified, skip TEM calculation? if tem_case_locs is None: print("\t 'cam_tem_loc' not found in 'diag_cam_climo', so no TEM files/diagnostics will be generated.") pass else: for tem_case_loc in tem_case_locs: tem_case_loc = Path(tem_case_loc) #Check if TEM directory exists, and if not, then create it: if not tem_case_loc.is_dir(): print(f" {tem_case_loc} not found, making new directory") tem_case_loc.mkdir(parents=True) #End if tem_locs.append(tem_case_loc) #End for #Set default to h4 hist_nums = adf.get_cam_info("tem_hist_str") if hist_nums is None: hist_nums = ["h4"]*len(case_names) #Get test case(s) tem over-write boolean and force to list if not by default overwrite_tem_cases = adf.get_cam_info("overwrite_tem") #If overwrite argument is missing, then default to False: if overwrite_tem_cases is None: overwrite_tem_cases = [False]*len(case_names) #If overwrite argument is a scalar, #then convert it into a list: if not isinstance(overwrite_tem_cases, list): overwrite_tem_cases = [overwrite_tem_cases] #Check if comparing to observations if adf.get_basic_info("compare_obs"): var_obs_dict = adf.var_obs_dict #If dictionary is empty, then there are no observations, so quit here: if not var_obs_dict: print("No observations found to plot against, so no obs-based TEM plot will be generated.") pass print(f"\t Processing TEM for observations :") base_name = "Obs" #Save Obs TEM file to first test case location output_loc_idx = tem_locs[0] #Check if re-gridded directory exists, and if not, then create it: if not output_loc_idx.is_dir(): print(f" {output_loc_idx} not found, making new directory") output_loc_idx.mkdir(parents=True) #End if print(f"\t NOTE: Observation TEM file being saved to '{output_loc_idx}'") #Set baseline file name as full path tem_fil = output_loc_idx / f'{base_name}.TEMdiag.nc' #Group all TEM observation files together tem_obs_fils = [] for var in var_list: if var in res: #Gather from variable defaults file obs_file_path = Path(res[var]["obs_file"]) if not obs_file_path.is_file(): obs_data_loc = adf.get_basic_info("obs_data_loc") obs_file_path = Path(obs_data_loc)/obs_file_path #It's likely multiple TEM vars will come from one file, so check #to see if it already exists from other vars. if obs_file_path not in tem_obs_fils: tem_obs_fils.append(obs_file_path) ds = xr.open_mfdataset(tem_obs_fils,combine="nested") start_year = str(ds.time[0].values)[0:4] end_year = str(ds.time[-1].values)[0:4] #Update the attributes ds.attrs['created'] = str(date.today()) ds['lev']=ds['level'] ds['zalat']=ds['lat'] #Make a copy of obs data so we don't do anything bad ds_obs = ds.copy() ds_base = xr.Dataset({'uzm': xr.Variable(('time', 'lev', 'zalat'), ds_obs.uzm.data), 'epfy': xr.Variable(('time', 'lev', 'zalat'), ds_obs.epfy.data), 'epfz': xr.Variable(('time', 'lev', 'zalat'), ds_obs.epfz.data), 'vtem': xr.Variable(('time', 'lev', 'zalat'), ds_obs.vtem.data), 'wtem': xr.Variable(('time', 'lev', 'zalat'), ds_obs.wtem.data), 'psitem': xr.Variable(('time', 'lev', 'zalat'), ds_obs.psitem.data), 'utendepfd': xr.Variable(('time', 'lev', 'zalat'), ds_obs.utendepfd.data), 'utendvtem': xr.Variable(('time', 'lev', 'zalat'), ds_obs.utendvtem.data), 'utendwtem': xr.Variable(('time', 'lev', 'zalat'), ds_obs.utendwtem.data), 'lev': xr.Variable('lev', ds_obs.level.values), 'zalat': xr.Variable('zalat', ds_obs.lat.values), 'time': xr.Variable('time', ds_obs.time.values) }) # write output to a netcdf file ds_base.to_netcdf(tem_fil, unlimited_dims='time', mode='w') else: if tem_base_loc: cam_hist_locs.append(adf.get_baseline_info("cam_hist_loc", required=True)) #Set default to h4 hist_num = adf.get_baseline_info("tem_hist_str") if hist_num is None: hist_num = "h4" #Extract baseline years (which may be empty strings if using Obs): syear_baseline = adf.climo_yrs["syear_baseline"] eyear_baseline = adf.climo_yrs["eyear_baseline"] case_names.append(base_name) start_years.append(syear_baseline) end_years.append(eyear_baseline) tem_base_loc = Path(tem_base_loc) #Check if TEM directory exists, and if not, then create it: if not tem_base_loc.is_dir(): print(f" {tem_base_loc} not found, making new directory") tem_base_loc.mkdir(parents=True) #End if tem_locs.append(tem_base_loc) overwrite_tem_cases.append(adf.get_baseline_info("overwrite_tem", False)) hist_nums.append(hist_num) else: print("\t 'cam_tem_loc' not found in 'diag_cam_baseline_climo', so no baseline files/diagnostics will be generated.") #End if (check for obs) #Loop over cases: for case_idx, case_name in enumerate(case_names): print(f"\t Processing TEM for case '{case_name}' :") #Extract start and end year values: start_year = start_years[case_idx] end_year = end_years[case_idx] #Create path object for the CAM history file(s) location: starting_location = Path(cam_hist_locs[case_idx]) #Check that path actually exists: if not starting_location.is_dir(): emsg = f"Provided 'cam_hist_loc' directory '{starting_location}' not found." emsg += " Script is ending here." return #End if #Check if history files actually exist. If not then kill script: hist_str = f"*{hist_nums[case_idx]}" if not list(starting_location.glob(hist_str+'.*.nc')): emsg = f"No CAM history {hist_str} files found in '{starting_location}'." emsg += " Script is ending here." adf.end_diag_fail(emsg) #End if #Get full path and file for file name output_loc_idx = tem_locs[case_idx] #Check if re-gridded directory exists, and if not, then create it: if not output_loc_idx.is_dir(): print(f" {output_loc_idx} not found, making new directory") output_loc_idx.mkdir(parents=True) #End if #Set case file name tem_fil = output_loc_idx / f'{case_name}.TEMdiag_{start_year}-{end_year}.nc' #Get current case tem over-write boolean overwrite_tem = overwrite_tem_cases[case_idx] #If files exist, then check if over-writing is allowed: if (tem_fil.is_file()) and (not overwrite_tem): print(f"\t INFO: Found TEM file and clobber is False, so moving to next case.") pass else: if tem_fil.is_file(): print(f"\t INFO: Found TEM file but clobber is True, so over-writing file.") #Glob each set of years #NOTE: This will make a nested list hist_files = [] for yr in np.arange(int(start_year),int(end_year)+1): #Grab all leading zeros for climo year just in case yr = f"{str(yr).zfill(4)}" hist_files.append(glob(f"{starting_location}/{hist_str}.{yr}*.nc")) #Flatten list of lists to 1d list hist_files = sorted(list(chain.from_iterable(hist_files))) ds = xr.open_mfdataset(hist_files) #iterate over the times in a dataset for idx,_ in enumerate(ds.time.values): if idx == 0: dstem0 = calc_tem(ds.squeeze().isel(time=idx)) else: dstem = calc_tem(ds.squeeze().isel(time=idx)) dstem0 = xr.concat([dstem0, dstem],'time') #End if #End if #Update the attributes dstem0.attrs = ds.attrs dstem0.attrs['created'] = str(date.today()) dstem0['lev']=ds['lev'] # write output to a netcdf file dstem0.to_netcdf(tem_fil, unlimited_dims='time', mode='w') #End if (file creation or over-write file) #Notify user that script has ended: print(" ...TEM variables have been calculated successfully.")
[docs] def calc_tem(ds): """ # calc_tem() function to calculate TEM diagnostics on CAM/WACCM output # This assumes the data have already been organized into zonal mean fluxes # Uzm, THzm, VTHzm, Vzm, UVzm, UWzm, Wzm # note that calculations are performed on model interface levels, which is ok # in the stratosphere but not in the troposphere. If interested in tropospheric # TEM diagnostics, make sure input fields have been interpolated to true pressure levels. # The code follows the 'TEM recipe' from Appendix A of Gerber, E. P. and Manzini, E.: # The Dynamics and Variability Model Intercomparison Project (DynVarMIP) for CMIP6: # assessing the stratosphere–troposphere system, Geosci. Model Dev., 9, 3413–3425, # https://doi.org/10.5194/gmd-9-3413-2016, 2016 and the corrigendum. # pdf available here: https://gmd.copernicus.org/articles/9/3413/2016/gmd-9-3413-2016.pdf, # https://gmd.copernicus.org/articles/9/3413/2016/gmd-9-3413-2016-corrigendum.pdf # Output from post-processing function # Table A1. Momentum budget variable list (2-D monthly / daily zonal means, YZT). # Name Long name [unit] # epfy northward component of the Eliassen–Palm flux [m3 s−2] # epfz upward component of the Eliassen–Palm flux [m3 s−2] # vtem Transformed Eulerian mean northward wind [m s−1] # wtem Transformed Eulerian mean upward wind [m s−1] # psitem Transformed Eulerian mean mass stream function [kg s−1] # utendepfd tendency of eastward wind due to Eliassen–Palm flux divergence [m s−2] # utendvtem tendency of eastward wind due to TEM northward wind advection and the Coriolis term [m s−2] # utendwtem tendency of eastward wind due to TEM upward wind advection [m s−2] # this utility based on python code developed by Isla Simpson 25 Feb 2021 # initial coding of stand alone function by Dan Marsh 16 Dec 2022 # NOTE: function expects an xarray dataset with dataarrays of dimension (nlev,nlat) # to process more than one timestep iterate over time. """ # constants for TEM calculations p0 = 101325. a = 6.371e6 om = 7.29212e-5 H = 7000. g0 = 9.80665 nlat = ds['zalat'].size nlev = ds['lev'].size latrad = np.radians(ds.zalat) coslat = np.cos(latrad) coslat2d = np.tile(coslat,(nlev,1)) pre = ds['lev']*100. # pressure levels in Pascals f = 2.*om*np.sin(latrad[:]) f2d = np.tile(f,(nlev,1)) # change missing values to NaNs uzm = ds['Uzm'] uzm.values = ma.masked_greater_equal(uzm, 1e33) vzm = ds['Vzm'] vzm.values = ma.masked_greater_equal(vzm, 1e33) wzm = ds['Wzm'] wzm.values = ma.masked_greater_equal(wzm, 1e33) thzm = ds['THzm'] thzm.values = ma.masked_greater_equal(thzm, 1e33) uvzm = ds['UVzm'] uvzm.values = ma.masked_greater_equal(uvzm, 1e33) uwzm = ds['UWzm'] uwzm.values = ma.masked_greater_equal(uwzm, 1e33) vthzm = ds['VTHzm'] vthzm.values = ma.masked_greater_equal(vthzm, 1e33) # convert w terms from m/s to Pa/s wzm = -1.*wzm*pre/H uwzm = -1.*uwzm*pre/H # compute the latitudinal gradient of U dudphi = (1./(a*coslat2d))*np.gradient(uzm*coslat2d, latrad, axis=1) # compute the vertical gradient of theta and u dthdp = np.gradient(thzm, pre, axis=0) dudp = np.gradient(uzm, pre, axis=0) # compute eddy streamfunction and its vertical gradient psieddy = vthzm/dthdp dpsidp = np.gradient(psieddy, pre, axis=0) # (1/acos(phii))**d(psi*cosphi/dphi) for getting w* dpsidy = (1./(a*coslat2d)) \ * np.gradient(psieddy*coslat2d, latrad, axis=1) # TEM vertical velocity (Eq A7 of dynvarmip) wtem = wzm+dpsidy # utendwtem (Eq A10 of dynvarmip) utendwtem = -1.*wtem*dudp # vtem (Eq A6 of dynvarmip) vtem = vzm-dpsidp # utendvtem (Eq A9 of dynvarmip) utendvtem = vtem*(f2d - dudphi) # calculate E-P fluxes epfy = a*coslat2d*(dudp*psieddy - uvzm) # Eq A2 epfz = a*coslat2d*((f2d-dudphi)*psieddy - uwzm) # Eq A3 # calculate E-P flux divergence and zonal wind tendency # due to resolved waves (Eq A5) depfydphi = (1./(a*coslat2d)) \ * np.gradient(epfy*coslat2d, latrad, axis=1) depfzdp = np.gradient(epfz, pre, axis=0) utendepfd = (depfydphi + depfzdp)/(a*coslat2d) utendepfd = xr.DataArray(utendepfd, coords = ds.Uzm.coords, name='utendepfd') # TEM stream function, Eq A8 topvzm = np.zeros([1,nlat]) vzmwithzero = np.concatenate((topvzm, vzm), axis=0) prewithzero = np.concatenate((np.zeros([1]), pre)) intv = integrate.cumtrapz(vzmwithzero,prewithzero,axis=0) psitem = (2*np.pi*a*coslat2d/g0)*(intv - psieddy) # final scaling of E-P fluxes and divergence to transform to log-pressure epfy = epfy*pre/p0 # A13 epfz = -1.*(H/p0)*epfz # A14 wtem = -1.*(H/pre)*wtem # A16 # # add long name and unit attributes to TEM diagnostics uzm.attrs['long_name'] = 'Zonal-Mean zonal wind' uzm.attrs['units'] = 'm/s' vzm.attrs['long_name'] = 'Zonal-Mean meridional wind' vzm.attrs['units'] = 'm/s' epfy.attrs['long_name'] = 'northward component of E-P flux' epfy.attrs['units'] = 'm3/s2' epfz.attrs['long_name'] = 'upward component of E-P flux' epfz.attrs['units'] = 'm3/s2' vtem.attrs['long_name'] = 'Transformed Eulerian mean northward wind' vtem.attrs['units'] = 'm/s' wtem.attrs['long_name'] = 'Transformed Eulerian mean upward wind' wtem.attrs['units'] = 'm/s' psitem.attrs['long_name'] = 'Transformed Eulerian mean mass stream function' psitem.attrs['units'] = 'kg/s' utendepfd.attrs['long_name'] = 'tendency of eastward wind due to Eliassen-Palm flux divergence' utendepfd.attrs['units'] = 'm/s2' utendvtem.attrs['long_name'] = 'tendency of eastward wind due to TEM northward wind advection and the coriolis term' utendvtem.attrs['units'] = 'm/s2' utendwtem.attrs['long_name'] = 'tendency of eastward wind due to TEM upward wind advection' utendwtem.attrs['units'] = 'm/s2' epfy.values = np.float32(epfy.values) epfz.values = np.float32(epfz.values) wtem.values = np.float32(wtem.values) psitem.values = np.float32(psitem.values) utendepfd.values = np.float32(utendepfd.values) utendvtem.values = np.float32(utendvtem.values) utendwtem.values = np.float32(utendwtem.values) dstem = xr.Dataset(data_vars=dict(date = ds.date, datesec = ds.datesec, time_bnds = ds.time_bnds, uzm = uzm, vzm = vzm, epfy = epfy, epfz = epfz, vtem = vtem, wtem = wtem, psitem = psitem, utendepfd = utendepfd, utendvtem = utendvtem, utendwtem = utendwtem )) return dstem