Source code for scripts.analysis.aerosol_gas_tables

import numpy as np
import xarray as xr
import sys
from pathlib import Path
import warnings  # use to warn user about missing files.

from datetime import datetime
import numpy as np
import itertools

try:
    import pandas as pd
except ImportError:
    print("Pandas module does not exist in python path, but is needed for amwg_table.")
    print("Please install module, e.g. 'pip install pandas'.")
    sys.exit(1)
#End except

# Import necessary ADF modules:
from lib.adf_base import AdfError

[docs] def aerosol_gas_tables(adfobj): ''' Calculate aerosol and gaseous budget tables **Default set of variables:** change in lib/adf_variable_defaults.yaml GAS_VARIABLES: ['CH4','CH3CCL3', 'CO', 'O3', 'ISOP', 'MTERP', 'CH3OH', 'CH3COCH3'] AEROSOL_VARIABLES: ['AOD','SOA', 'SALT', 'DUST', 'POM', 'BC', 'SO4'] Default output for tables: **Gases:** CH4_BURDEN (Tg), CH4_CHEM_LOSS (Tg/yr), CH4_LIFETIME (years) CH3CCL3_BURDEN (Tg), CH3CCL3_CHEM_LOSS (Tg/yr), CH3CCL3_LIFETIME (days) CO_EMIS (Tg/yr), CO_BURDEN (Tg), CO_CHEM_LOSS (Tg/yr), CO_CHEM_PROD (Tg/yr), CO_DRYDEP (Tg/yr) CO_TDEP (Tg/yr), CO_LIFETIME (days), CO_TEND (Tg/yr) O3_BURDEN (Tg), O3_CHEM_LOSS (Tg/yr), O3_CHEM_PROD (Tg/yr), O3_DRYDEP (Tg/yr), O3_TDEP (Tg/yr) O3_LIFETIME (days), O3_TEND (Tg/yr), O3_STE (Tg/yr) LNOx_PROD (Tg N/yr) ISOP_EMIS (Tg/yr), ISOP_BURDEN (Tg) Monoterpene_EMIS (Tg/yr), Monoterpene_BURDEN (Tg) Methanol_EMIS (Tg/yr), Methanol_BURDEN (Tg), Methanol_DRYDEP (Tg/yr), Methanol_WETDEP (Tg/yr), Methanol_TDEP (Tg/yr) Acetone_EMIS (Tg/yr), Acetone_BURDEN (Tg), Acetone_DRYDEP (Tg/yr), Acetone_WETDEP (Tg/yr), Acetone_TDEP (Tg/yr) **Aerosols:** AOD_mean SOA_BURDEN (Tg), SOA_CHEM_LOSS (Tg/yr), SOA_DRYDEP (Tg/yr), SOA_WETDEP (Tg/yr), SOA_GAEX (Tg/yr), SOA_LIFETIME (days) SALT_EMIS (Tg/yr), SALT_BURDEN (Tg), SALT_DRYDEP (Tg/yr), SALT_WETDEP (Tg/yr), SALT_LIFETIME (days) DUST_EMIS (Tg/yr), DUST_BURDEN (Tg), DUST_DRYDEP (Tg/yr), DUST_WETDEP (Tg/yr), DUST_LIFETIME (days) POM_EMIS (Tg/yr), POM_BURDEN (Tg), POM_DRYDEP (Tg/yr), POM_WETDEP (Tg/yr), POM_LIFETIME (days) BC_EMIS (Tg/yr), BC_BURDEN (Tg), BC_DRYDEP (Tg/yr), BC_WETDEP (Tg/yr), BC_LIFETIME (days) SO4_EMIS_elevated (Tg S/yr), SO4_BURDEN (Tg S), SO4_DRYDEP (Tg S/yr), SO4_WETDEP (Tg S/yr), SO4_GAEX (Tg S/yr) SO4_LIFETIME (days), SO4_AQUEOUS (Tg S/yr), SO4_NUCLEATION (Tg S/yr) **List of variable names and descriptions for clarity** - ListVars: list of all available variables from given history file - GAS_VARIABLES: list fo necessary CAM gaseous variables - AEROSOL_VARIABLES: list fo necessary CAM aerosol variables - AEROSOLS: list of necessary aerosols for computations MODIFICATION HISTORY: Behrooz Roozitalab, 02, NOV, 2022: VERSION 1.00 - Initial version Justin Richling, 27 Nov, 2023 - updated to fit to ADF and check with old AMWG chem/aerosol tables - fixed: * added difference bewtween cases column to tables Behrooz Roozitalab, 8 Aug, 2024 - fixed: * lifetime inconsitencies * Removed redundant calculations to improve the speed * Verified the results against the NCL script. ''' #Notify user that script has started: msg = "\n Calculating chemistry/aerosol budget tables..." print(f"{msg}\n {'-' * (len(msg)-3)}") # Inputs #------- # Variable defaults info res = adfobj.variable_defaults # dict of variable-specific plot preferences bres = res['budget_tables'] # list of the gaseous variables to be caculated. GAS_VARIABLES = bres['GAS_VARIABLES'] # list of the aerosol variables to be caculated. AEROSOL_VARIABLES = bres['AEROSOL_VARIABLES'] #list of all the variables to be caculated. VARIABLES = GAS_VARIABLES + AEROSOL_VARIABLES # For the case that outputs are saved for a specific region. # i.e., when using fincllonlat in user_nl_cam ext1_SE = bres['ext1_SE'] # Tropospheric Values # ------------------- # if True, calculate only Tropospheric values # if False, all layers # tropopause is defiend as o3>150ppb. If needed, change accordingly. Tropospheric = bres['Tropospheric'] ### NOT WORKING FOR NOW # To calculate the budgets only for a region # Lat/Lon extent limit = bres['limit'] regional = bres['regional'] # Dictionary for Molecular weights. Keys must be consistent with variable name # For aerosols, the MW is used only for chemical loss, chemical production, and elevated emission calculations # For SO4, we report everything in terms of Sulfur, so we use Sulfur MW here MW = bres['MW'] # Avogadro's Number AVO = float(bres['AVO']) # gravity gr = float(bres['gr']) # Mw air Mwair = float(bres['Mwair']) # The variables in the list below must be aerosols - do not add AOD and DAOD # no need to change this list, unless for a specific need! AEROSOLS = bres['AEROSOLS'] # Start gathering case, path, and data info #----------------------------------------- # CAM simulation variables (these quantities are always lists): case_names = adfobj.get_cam_info('cam_case_name', required=True) # Grab all case nickname(s) test_nicknames_list = adfobj.case_nicknames["test_nicknames"] nicknames_list = test_nicknames_list # Grab climo years start_years = adfobj.climo_yrs["syears"] end_years = adfobj.climo_yrs["eyears"] #Grab history strings: hist_strs = adfobj.hist_string["test_hist_str"] # Grab history file locations from config yaml file hist_locs = adfobj.get_cam_info("cam_hist_loc", required=True) # Check if this is test model vs baseline model # If so, update test case(s) lists created above if not adfobj.compare_obs: # Get baseline case info case_names += [adfobj.get_baseline_info("cam_case_name")] nicknames_list += [adfobj.case_nicknames["base_nickname"]] # Grab climo years start_years += [adfobj.climo_yrs["syear_baseline"]] end_years += [adfobj.climo_yrs["eyear_baseline"]] # Get history file info hist_strs += [adfobj.hist_string["base_hist_str"]] hist_locs += [adfobj.get_baseline_info("cam_hist_loc")] # End if # Check to ensure number of case names matches number history file locations. # If not, exit script if len(hist_locs) != len(case_names): errmsg = "Error: number of cases does not match number of history file locations. Script is exiting." raise AdfError(errmsg) # Initialize nicknames dictionary nicknames = {} # Filter the list to include only strings that are possible h0 strings # - Search for either h0 or h0a substrings = {"cam.h0","cam.h0a"} case_hist_strs = [] print("hist_strs",hist_strs,"\n") for cam_case_str in hist_strs: # Check each possible h0 string for string in cam_case_str: if string in substrings: case_hist_strs.append(string) break # Create path object for the CAM history file(s) location: data_dirs = [] for case_idx,case in enumerate(case_names): print(f"\t Looking for history location: {hist_locs[case_idx]}") nicknames[case] = nicknames_list[case_idx] #Check that history file input directory actually exists: if (hist_locs[case_idx] is None) or (not Path(hist_locs[case_idx]).is_dir()): errmsg = f"History files directory '{hist_locs[case_idx]}' not found. Script is exiting." raise AdfError(errmsg) #Write to debug log if enabled: adfobj.debug_log(f"DEBUG: location of history files is {str(hist_locs[case_idx])}") # Update list for found directories data_dirs.append(hist_locs[case_idx]) # End gathering case, path, and data info #----------------------------------------- # Periods of Interest # ------------------- # choose the period of interest. Plots will be averaged within this period durations = {} num_yrs = {} # Main function #-------------- # Set dictionary of components for each case Dic_scn_var_comp = {} areas = {} trops = {} insides = {} for i,case in enumerate(case_names): start_year = start_years[i] end_year = end_years[i] start_date = f"{start_year}-1-1" end_date = f"{end_year}-1-1" # Create time periods start_period = datetime.strptime(start_date, "%Y-%m-%d") end_period = datetime.strptime(end_date, "%Y-%m-%d") # Calculated duration of time period in seconds? durations[case_names[i]] = (end_period-start_period).days*86400+365*86400 # Get number of years for calculations num_yrs[case_names[i]] = (int(end_year)-int(start_year))+1 # Get currenty history file directory data_dir = data_dirs[i] # Get all files, lats, lons, and area weights for current case Files,Lats,Lons,areas[case],ext1_SE = Get_files(adfobj,data_dir,start_year,end_year,case_hist_strs[i],area=True) # find the name of all the variables in the file. # this will help the code to work for the variables that are not in the files (assingn 0s) tmp_file = xr.open_dataset(Path(data_dir) / Files[0]) ListVars = list(tmp_file.variables) # Set up and fill dictionaries for components for current cases dic_SE = set_dic_SE(ListVars,ext1_SE) dic_SE = fill_dic_SE(adfobj, dic_SE, VARIABLES, ListVars, ext1_SE, AEROSOLS, MW, AVO, gr, Mwair) text = f'\n\t Calculating values for {case}' print(text) print("\t " + "-" * (len(text) - 2)) # Gather dictionary data for current case # NOTE: The calculations can take a long time... Dic_crit, Dic_scn_var_comp[case] = make_Dic_scn_var_comp(adfobj, VARIABLES, data_dir, dic_SE, Files, ext1_SE, AEROSOLS) # Regional refinement # NOTE: This function 'Inside_SE' is unavailable at the moment! - JR 10/2024 if regional: #inside = Inside_SE_region(current_lat,current_lon,dir_shapefile) inside = Inside_SE(Lats,Lons,limit) else: if len(np.shape(areas[case])) == 1: inside = np.full((len(Lons)),True) else: inside = np.full((len(Lats),len(Lons)),True) # Set critical threshold current_crit = Dic_crit[0] if Tropospheric: trop = np.where(current_crit>150,np.nan,current_crit) #strat=np.where(current_crit>150,current_crit,np.nan) else: trop=current_crit trops[case] = trop insides[case] = inside # Make and save tables table_kwargs = {"adfobj":adfobj, "Dic_scn_var_comp":Dic_scn_var_comp, "areas":areas, "trops":trops, "case_names":case_names, "nicknames":nicknames, "durations":durations, "insides":insides, "num_yrs":num_yrs, "AEROSOLS":AEROSOLS} # Create the budget tables #------------------------- # Aerosols if len(AEROSOL_VARIABLES) > 0: print("\tMaking table for aerosols") make_table(vars=AEROSOL_VARIABLES, chem_type='aerosols', **table_kwargs) # Gases if len(GAS_VARIABLES) > 0: print("\tMaking table for gases") make_table(vars=GAS_VARIABLES, chem_type='gases', **table_kwargs)
####### ################## # Helper functions ##################
[docs] def list_files(adfobj, directory, start_year ,end_year, h_case): """ This function extracts the files in the directory that are within the chosen dates and history number. """ # History file year range yrs = np.arange(int(start_year), int(end_year)+1) all_filenames = [] for i in yrs: all_filenames.append(sorted(Path(directory).glob(f'*.{h_case}.{i}-*'))) # Flattening the list of lists filenames = list(itertools.chain.from_iterable(sorted(all_filenames))) if len(filenames)==0: #sys.exit(" Directory has no outputs ") msg = f"chem/aerosol tables, 'list_files':" msg += f"\n\t - Directory '{directory}' has no outputs." adfobj.debug_log(msg) return filenames
#####
[docs] def Get_files(adfobj, data_dir, start_year, end_year, h_case, **kwargs): """ This function retrieves the files, latitude, and longitude information in all the directories within the chosen dates. """ ext1_SE = kwargs.pop('ext1_SE','') area = kwargs.pop('area',False) Earth_rad=6.371e6 # Earth Radius in meters current_files = list_files(adfobj, data_dir, start_year, end_year,h_case) # get the Lat and Lons for each case tmp_file = xr.open_dataset(Path(data_dir) / current_files[0]) lon = tmp_file['lon'+ext1_SE].data lon[lon > 180.] -= 360 # shift longitude from 0-360˚ to -180-180˚ lat = tmp_file['lat'+ext1_SE].data if area == True: try: tmp_area = tmp_file['area'+ext1_SE].data Earth_area = 4 * np.pi * Earth_rad**(2) areas = tmp_area*Earth_area/np.nansum(tmp_area) except KeyError: try: tmp_area = tmp_file['AREA'+ext1_SE].isel(time=0).data Earth_area = 4 * np.pi * Earth_rad**(2) areas = tmp_area*Earth_area/np.nansum(tmp_area) except: dlon = np.abs(lon[1]-lon[0]) dlat = np.abs(lat[1]-lat[0]) lon2d,lat2d = np.meshgrid(lon,lat) #area=np.zeros_like(lat2d) dy = Earth_rad*dlat*np.pi/180 dx = Earth_rad*np.cos(lat2d*np.pi/180)*dlon*np.pi/180 tmp_area = dx*dy areas = tmp_area # End if # Variables to return return current_files,lat,lon,areas,ext1_SE
#####
[docs] def set_dic_SE(ListVars, ext1_SE): """ Initialize dictionary to house all the relevant tabel data """ # Initialize dictionary #---------------------- dic_SE={} # Chemistry #---------- dic_SE['O3']={'O3'+ext1_SE:1e9} # covert to ppb for Tropopause calculation dic_SE['CH4']={'CH4'+ext1_SE:1} dic_SE['CO']={'CO'+ext1_SE:1} dic_SE['ISOP']={'ISOP'+ext1_SE:1} dic_SE['MTERP']={'MTERP'+ext1_SE:1} dic_SE['CH3OH']={'CH3OH'+ext1_SE:1} dic_SE['CH3COCH3']={'CH3COCH3'+ext1_SE:1} dic_SE['CH3CCL3']={'CH3CCL3'+ext1_SE:1} # Aerosols #--------- dic_SE['DAOD']={'AODDUSTdn'+ext1_SE:1} dic_SE['AOD']={'AODVISdn'+ext1_SE:1} dic_SE['DUST']={'dst_a1'+ext1_SE:1, 'dst_a2'+ext1_SE:1, 'dst_a3'+ext1_SE:1} dic_SE['SALT']={'ncl_a1'+ext1_SE:1, 'ncl_a2'+ext1_SE:1, 'ncl_a3'+ext1_SE:1} dic_SE['POM']={'pom_a1'+ext1_SE:1, 'pom_a4'+ext1_SE:1} dic_SE['BC']={'bc_a1'+ext1_SE:1, 'bc_a4'+ext1_SE:1} dic_SE['SO4']={'so4_a1'+ext1_SE:1, 'so4_a2'+ext1_SE:1, 'so4_a3'+ext1_SE:1, 'so4_a5'+ext1_SE:1} # FOR SOA, first check if the integrated bins are included if (('soa_a1'+ext1_SE in ListVars ) & ('soa_a1'+ext1_SE in ListVars )): dic_SE['SOA'] = {'soa_a1'+ext1_SE:1, 'soa_a2'+ext1_SE:1} else: dic_SE['SOA'] = {'soa1_a1'+ext1_SE:1, 'soa2_a1'+ext1_SE:1, 'soa3_a1'+ext1_SE:1, 'soa4_a1'+ext1_SE:1, 'soa5_a1'+ext1_SE:1, 'soa1_a2'+ext1_SE:1, 'soa2_a2'+ext1_SE:1, 'soa3_a2'+ext1_SE:1, 'soa4_a2'+ext1_SE:1, 'soa5_a2'+ext1_SE:1} # End if return dic_SE
#####
[docs] def fill_dic_SE(adfobj, dic_SE, variables, ListVars, ext1_SE, AEROSOLS, MW, AVO, gr, Mwair): """ Function for dealing with conversion factors for different components and filling the main data dictionary 'dic_SE' Input dictionary and return updated dictionary 'dic_SE' Arguments --------- variables : list - list of main variables? ListVars : list - list of ??????? Returns ------- dic_SE : dict - full dictionary of derived variables Some conversion factors need density or Layer's pressure, that will be accounted for when reading the files. We convert everying to kg/m2/s or kg/m2 or kg/s, so that final Tg/yr or Tg results are consistent """ # Logging info message msg = f"chem/aerosol tables: 'fill_dic_SE'" for var in variables: if 'AOD' in var: dic_SE[var+'_AOD']={} else: dic_SE[var+'_BURDEN']={} dic_SE[var+'_CHML']={} dic_SE[var+'_CHMP']={} dic_SE[var+'_SF']={} dic_SE[var+'_CLXF']={} dic_SE[var+'_DDF']={} dic_SE[var+'_WDF']={} if var in AEROSOLS: dic_SE[var+'_GAEX']={} dic_SE[var+'_DDFC']={} dic_SE[var+'_WDFC']={} else: dic_SE[var+'_TEND']={} dic_SE[var+'_LNO']={} # End if # We have nucleation and aqueous chemistry for sulfate. if var=='SO4': dic_SE[var+'_NUCL']={} dic_SE[var+'_AQS']={} # End if # Grab the variable keys var_keys = dic_SE[var].keys() for key in var_keys: msg += f"\n\t Creating component of {var}: {key}" # for CHML and CHMP: # original unit : [molec/cm3/s] # following Tilmes code to convert to [kg/m2/s] # conversion: Mw*rho*delP*1e3/Avo/gr # rho and delP will be applied when reading the files in SEbudget function. # for AOD and DAOD: if 'AOD' in var: if key in ListVars: dic_SE[var+'_AOD'][key+ext1_SE]=1 else: dic_SE[var+'_AOD']['PS'+ext1_SE]=0. # End if continue # AOD doesn't need any other budget calculations # End if # for CHML and CHMP: # original unit : [molec/cm3/s] # following Tilmes code to convert to [kg/m2/s] # conversion: Mw*rho*delP*1e3/Avo/gr # rho and delP will be applied when reading the files in SEbudget function. if key=='O3'+ext1_SE: # for O3, we should not include fast cycling reactions # As a result, we use below diagnostics in the model if available. If not, we use CHML and CHMP if ((key+'_Loss' in ListVars) & (key+'_Prod' in ListVars)) : dic_SE[var+'_CHML'][key+'_Loss'+ext1_SE]=MW[var]*1e3/AVO/gr dic_SE[var+'_CHMP'][key+'_Prod'+ext1_SE]=MW[var]*1e3/AVO/gr else: if key+'_CHML' in ListVars: dic_SE[var+'_CHML'][key+'_CHML'+ext1_SE]=MW[var]*1e3/AVO/gr else: dic_SE[var+'_CHML']['U'+ext1_SE]=0 # End if if key+'_CHMP' in ListVars: dic_SE[var+'_CHMP'][key+'_CHMP'+ext1_SE]=MW[var]*1e3/AVO/gr else: dic_SE[var+'_CHMP']['U'+ext1_SE]=0 # End if # End if else: if key+'_CHML' in ListVars: dic_SE[var+'_CHML'][key+'_CHML'+ext1_SE]=MW[var]*1e3/AVO/gr else: dic_SE[var+'_CHML']['U'+ext1_SE]=0 # End if if key+'_CHMP' in ListVars: dic_SE[var+'_CHMP'][key+'_CHMP'+ext1_SE]=MW[var]*1e3/AVO/gr else: dic_SE[var+'_CHMP']['U'+ext1_SE]=0 # End if # End if # for SF: # original unit: [kg/m2/s] if 'SF'+key in ListVars: if var=='SO4': dic_SE[var+'_SF']['SF'+key+ext1_SE]=32.066/115.11 else: dic_SE[var+'_SF']['SF'+key+ext1_SE]=1 # End if elif key+'SF' in ListVars: dic_SE[var+'_SF'][key+ext1_SE+'SF']=1 else: dic_SE[var+'_SF']['PS'+ext1_SE]=0. # End if # for CLXF: # original unit: [molec/cm2/s] # conversion: Mw*10/Avo if key+'_CLXF' in ListVars: dic_SE[var+'_CLXF'][key+'_CLXF'+ext1_SE]=MW[var]*10/AVO # convert [molec/cm2/s] to [kg/m2/s] else: dic_SE[var+'_CLXF']['PS'+ext1_SE]=0. # End if # Aerosols if var in AEROSOLS: # for each species: # original unit : [kg/kg] in dry air # convert to [kg/m2] # conversion: delP/gr # delP will be applied when reading the files in SEbudget function. if key in ListVars: if var=='SO4': # For SO4, we report all the budget calculation for Sulfur. dic_SE[var+'_BURDEN'][key+ext1_SE]=(32.066/115.11)/gr else: dic_SE[var+'_BURDEN'][key+ext1_SE]=1/gr # End if else: dic_SE[var+'_BURDEN']['U'+ext1_SE]=0 # End if # for DDF: # original unit: [kg/m2/s] if key+'DDF' in ListVars: if var=='SO4': dic_SE[var+'_DDF'][key+ext1_SE+'DDF']=32.066/115.11 else: dic_SE[var+'_DDF'][key+ext1_SE+'DDF']=1 # End if else: dic_SE[var+'_DDF']['PS'+ext1_SE]=0. # End if # for SFWET: # original unit: [kg/m2/s] if key+'SFWET' in ListVars: if var=='SO4': dic_SE[var+'_WDF'][key+ext1_SE+'SFWET']=32.066/115.11 else: dic_SE[var+'_WDF'][key+ext1_SE+'SFWET']=1 # End if else: dic_SE[var+'_WDF']['PS'+ext1_SE]=0. # End if # for sfgaex1: # original unit: [kg/m2/s] if key+'_sfgaex1' in ListVars: if var=='SO4': dic_SE[var+'_GAEX'][key+ext1_SE+'_sfgaex1']=32.066/115.11 else: dic_SE[var+'_GAEX'][key+ext1_SE+'_sfgaex1']=1 # End if else: dic_SE[var+'_GAEX']['PS'+ext1_SE]=0. # End if # for DDF in cloud water: # original unit: [kg/m2/s] cloud_key=key[:-2]+'c'+key[-1] if cloud_key+ext1_SE+'DDF' in ListVars: if var=='SO4': dic_SE[var+'_DDFC'][cloud_key+ext1_SE+'DDF']=32.066/115.11 else: dic_SE[var+'_DDFC'][cloud_key+ext1_SE+'DDF']=1 # End if else: dic_SE[var+'_DDFC']['PS'+ext1_SE]=0. # End if # for SFWET in cloud water: # original unit: [kg/m2/s] if cloud_key+ext1_SE+'SFWET' in ListVars: if var=='SO4': dic_SE[var+'_WDFC'][cloud_key+ext1_SE+'SFWET']=32.066/115.11 else: dic_SE[var+'_WDFC'][cloud_key+ext1_SE+'SFWET']=1 # End if else: dic_SE[var+'_WDFC']['PS'+ext1_SE]=0. # End if if var=='SO4': # for Nucleation : # original unit: [kg/m2/s] if key+ext1_SE+'_sfnnuc1' in ListVars: dic_SE[var+'_NUCL'][key+ext1_SE+'_sfnnuc1']=32.066/115.11 else: dic_SE[var+'_NUCL']['PS'+ext1_SE]=0. # End if # for Aqueous phase : # original unit: [kg/m2/s] if (('AQSO4_H2O2'+ext1_SE in ListVars) & ('AQSO4_O3'+ext1_SE in ListVars)) : dic_SE[var+'_AQS']['AQSO4_H2O2'+ext1_SE]=32.066/115.11 dic_SE[var+'_AQS']['AQSO4_O3'+ext1_SE]=32.066/115.11 else: # original unit: [kg/m2/s] if cloud_key+'AQSO4'+ext1_SE in ListVars: dic_SE[var+'_AQS'][cloud_key+'AQSO4'+ext1_SE]=32.066/115.11 else: dic_SE[var+'_AQS']['PS'+ext1_SE]=0. # End if if cloud_key+'AQH2SO4'+ext1_SE in ListVars: dic_SE[var+'_AQS'][cloud_key+'AQH2SO4'+ext1_SE]=32.066/115.11 else: dic_SE[var+'_AQS']['PS'+ext1_SE]=0. # End if # End if # End if else: # Gases # for each species: # original unit : [mole/mole] in dry air # convert to [kg/m2] # conversion: Mw*delP/Mwair/gr Mwair=28.97 gr/mole # delP will be applied when reading the files in SEbudget function. if key in ListVars: dic_SE[var+'_BURDEN'][key+ext1_SE]=MW[var]/Mwair/gr else: dic_SE[var+'_BURDEN']['U'+ext1_SE]=0 # End if # for DF: # original unit: [kg/m2/s] if 'DF_'+key in ListVars: dic_SE[var+'_DDF']['DF_'+key+ext1_SE]=1 else: dic_SE[var+'_DDF']['PS'+ext1_SE]=0. # End if # for WD: # original unit: [kg/m2/s] if 'WD_'+key in ListVars: dic_SE[var+'_WDF']['WD_'+key+ext1_SE]=1 else: dic_SE[var+'_WDF']['PS'+ext1_SE]=0. # End if # for Chem tendency: # original unit: [kg/s] # conversion: not needed if 'D'+key+'CHM' in ListVars: dic_SE[var+'_TEND']['D'+key+'CHM'+ext1_SE]=1 # convert [kg/s] to [kg/s] else: dic_SE[var+'_TEND']['U'+ext1_SE]=0 # End if # for Lightning NO production: (always in gas) # original unit: [Tg N/Yr] # conversion: not needed if 'LNO_COL_PROD' in ListVars: dic_SE[var+'_LNO']['LNO_COL_PROD'+ext1_SE]=1 # convert [Tg N/yr] to [Tg N /yr] else: dic_SE[var+'_LNO']['PS'+ext1_SE]=0 # End if # End if (aerosols or gases) # End for # End for # Write to log adfobj.debug_log(msg) return dic_SE
#####
[docs] def make_Dic_scn_var_comp(adfobj, variables, current_dir, dic_SE, current_files, ext1_SE, AEROSOLS): """ This function retrieves the files, latitude, and longitude information in all the directories within the chosen dates. current_dir: list - showing the directories to look for files. always end with '/' current_files: list - List of CAM history files start_year: string - Starting year end_year: string - Ending year kwargs ------ ext1_SE: string - specify if the files are for only a region, which changes to variable names. ex: if you saved files for a only a box region ($LL_lat$,$LL_lon$,$UR_lat$,$UR_lon$), the 'lat' variable will be saved as: 'lat_$LL_lon$e_to_$UR_lon$e_$LL_lat$n_to_$UR_lat$n' for instance: 'lat_65e_to_91e_20n_to_32n' Returns ------- Dic_crit: - dictionary for critical values for current case Dic_scn_var_comp: - full dictionary of all variables and components for current case NOTE: The LNO is lightning NOx, which should be reported explicitly rather as CO_LNO, O3_LNO, ... """ # Set lists to gather necessary variables for logging missing_vars_tot = [] needed_vars_tot = [] # Initialize final component dictionary Dic_var_comp={} for current_var in variables: if 'AOD' in current_var: components=[current_var+'_AOD'] else: if current_var in AEROSOLS: # AEROSOLS # Components are: burden, chemical loss, chemical prod, dry deposition, # surface emissions, elevated emissions, wet deposition, gas-aerosol exchange components=[current_var+'_BURDEN',current_var+'_CHML',current_var+'_CHMP', current_var+'_DDF',current_var+'_WDF', current_var+'_SF', current_var+'_CLXF', current_var+'_DDFC',current_var+'_WDFC'] if current_var=='SO4': # For SULF we also have AQS, nucleation, and strat-trop gas exchange components.append(current_var+'_AQS') components.append(current_var+'_NUCL') components.append(current_var+'_GAEX') components.remove(current_var+'_CHMP') #components.append(current_var+'_CLXF') # BRT - CLXF is added above. if current_var == "SOA": components.append(current_var+'_GAEX') #End if - AEROSOLS else: # CHEMS # Components are: burden, chemical loss, chemical prod, dry/wet deposition, # surface emissions, elevated emissions, chemical tendency # I always add Lightning NOx production when calculating O3 budget. components=[current_var+'_BURDEN',current_var+'_CHML',current_var+'_CHMP', current_var+'_DDF',current_var+'_WDF', current_var+'_SF', current_var+'_CLXF', current_var+'_TEND'] if current_var =="O3": components.append(current_var+'_LNO') # End if # End if msg = f"chem/aerosol tables: 'make_Dic_scn_var_comp'" msg += f"\n\t Current CAM variable: {current_var}" msg += f"\n\t Derived components for CAM variable {current_var}: {components}" #adfobj.debug_log(msg) Dic_comp={} for comp in components: # Write details to log file msg += f"\n\t\t calculate derived component: {comp} for main variable, {current_var}" adfobj.debug_log(msg) # Get component values current_data,missing_vars,needed_vars = SEbudget(adfobj,dic_SE,current_dir,current_files,comp,ext1_SE) # Gather info for debugging for var_m in missing_vars: if var_m not in missing_vars_tot: missing_vars_tot.append(var_m) for var_n in needed_vars: if var_n not in needed_vars_tot: needed_vars_tot.append(var_n) # End for # End for #TODO: check this section to see if it can't be better run # Set dictionary for component Dic_comp[comp] = current_data # Set dictionary for key of current variable with dictionary values of all # necessary constituents for calculating the current variable Dic_var_comp[current_var] = Dic_comp Dic_scn_var_comp = Dic_var_comp # Critical threshholds, just run this once # this is for finding tropospheric values current_crit=SEbudget(adfobj,dic_SE,current_dir,current_files,'O3',ext1_SE) Dic_crit=current_crit # Log info to logging file msg = f"chem/aerosol tables:" msg += f"\n\t - potential missing variables from budget? {missing_vars_tot}" adfobj.debug_log(msg) msg = f"chem/aerosol tables:" msg += f"\n\t - needed variables for budget {needed_vars_tot}" adfobj.debug_log(msg) return Dic_crit,Dic_scn_var_comp
#####
[docs] def SEbudget(adfobj,dic_SE,data_dir,files,var,ext1_SE,**kwargs): """ Function used for getting the data for the budget calculation. This is the chunk of code that takes the longest by far. Example: ~70/75 mins per case for 9 years ** This is for both chemistry and aeorosl calculations dic_SE: dictionary specyfing what variables to get. For example, for precipitation you can define SE as: dic_SE['PRECT']={'PRECC'+ext1_SE:8.64e7,'PRECL'+ext1_SE:8.64e7} - It means to sum the file variables "PRECC" and "PRECL" for my arbitrary desired variable named "PRECT" - It also has the option to apply conversion factors. For instance, PRECL and PRECC are in m/s. 8.64e7 is used to convernt m/s to mm/day data_dir: string of the directory that contains the files. always end with '/' files: list of the files to be read var: string showing the variable to be extracted. -> this will be the individual componnent, ie O3_CHMP, SOA_WDF, etc. """ # gas constanct Rgas=287.04 #[J/K/Kg]=8.314/0.028965 # Set lists to gather necessary variables for logging missing_vars = [] needed_vars = [] all_data=[] for file in range(len(files)): ds=xr.open_dataset(Path(data_dir) / files[file]) # Calculate these just once if file==0: mock_2d=np.zeros_like(np.array(ds['PS'+ext1_SE].isel(time=0))) mock_3d=np.zeros_like(np.array(ds['U'+ext1_SE].isel(time=0))) # Star gathering of variable data data=[] for i in dic_SE[var].keys(): if i not in needed_vars: needed_vars.append(i) if file == 0: msg = f"chem/aerosol tables: 'SEbudget'" msg += f"\n\t\t ** variable(s) needed for derived var {var}: {dic_SE[var].keys()}" msg += f"\n\t\t - constituent for derived var {var}: {i}" adfobj.debug_log(msg) if ((i!='PS'+ext1_SE) and (i!='U'+ext1_SE) ) : data.append(np.array(ds[i].isel(time=0))*dic_SE[var][i]) else: if i=='PS'+ext1_SE: data.append(mock_2d) else: data.append(mock_3d) # End if if var not in missing_vars: missing_vars.append(var) # End if # Get total summed data for this history file data data=np.sum(data,axis=0) try: delP=np.array(ds['PDELDRY'+ext1_SE].isel(time=0)) except: hyai=np.array(ds['hyai']) hybi=np.array(ds['hybi']) try: PS=np.array(ds['PSDRY'+ext1_SE].isel(time=0)) except: PS=np.array(ds['PS'+ext1_SE].isel(time=0)) # End try/except P0=1e5 Plevel=np.zeros_like(np.array(ds['U'+ext1_SE])) for i in range(len(Plevel)): Plevel[i]=hyai[i]*P0+hybi[i]*PS delP=Plevel[1:]-Plevel[:-1] # End try/except if ('CHML' in var) or ('CHMP' in var) : Temp=np.array(ds['T'+ext1_SE].isel(time=0)) Pres=np.array(ds['PMID'+ext1_SE].isel(time=0)) rho= Pres/(Rgas*Temp) data=data*delP/rho elif ('BURDEN' in var): data=data*delP else: data=data # End if # Add data to list all_data.append(data) # Take mean all_data=np.nanmean(all_data,axis=0) return all_data,missing_vars,needed_vars
#####
[docs] def calc_budget_data(current_var, Dic_scn_var_comp, area, trop, inside, num_yrs, duration, AEROSOLS): """ Function to run through desired table values for calculations for the table entries """ # Initialize full data dictionary for current table type chem_dict = {} # Update variable marker if neccessary if current_var == 'SO4': specifier = ' S' else: specifier = '' # Calculate values for given variable if 'AOD' in current_var: # Burden spc_burd = Dic_scn_var_comp[current_var][current_var+'_AOD'] burden = np.ma.masked_where(inside==False,spc_burd) #convert Kg/m2 to Tg BURDEN = np.ma.sum(burden*area)/np.ma.sum(area) chem_dict[f"{current_var}_mean"] = np.round(BURDEN,5) else: # Surface Emissions spc_sf = Dic_scn_var_comp[current_var][current_var+'_SF'] tmp_sf = spc_sf sf = np.ma.masked_where(inside==False,tmp_sf*area) #convert Kg/m2/s to Tg/yr SF = np.ma.sum(sf*duration*1e-9)/num_yrs chem_dict[f"{current_var}_EMIS (Tg{specifier}/yr)"] = np.round(SF,5) # Elevated Emissions spc_clxf = Dic_scn_var_comp[current_var][current_var+'_CLXF'] tmp_clxf = spc_clxf clxf = np.ma.masked_where(inside==False,tmp_clxf*area) #convert Kg/m2/s to Tg/yr CLXF = np.ma.sum(clxf*duration*1e-9)/num_yrs chem_dict[f"{current_var}_EMIS_elevated (Tg{specifier}/yr)"] = np.round(CLXF,5) # Burden spc_burd = Dic_scn_var_comp[current_var][current_var+'_BURDEN'] spc_burd = np.where(np.isnan(trop),np.nan,spc_burd) tmp_burden = np.nansum(spc_burd*area,axis=0) burden = np.ma.masked_where(inside==False,tmp_burden) #convert Kg/m2 to Tg BURDEN = np.ma.sum(burden*1e-9) chem_dict[f"{current_var}_BURDEN (Tg{specifier})"] = np.round(BURDEN,5) # Chemical Loss spc_chml = Dic_scn_var_comp[current_var][current_var+'_CHML'] spc_chml = np.where(np.isnan(trop),np.nan,spc_chml) tmp_chml = np.nansum(spc_chml*area,axis=0) chml = np.ma.masked_where(inside==False,tmp_chml) #convert Kg/m2/s to Tg/yr CHML = np.ma.sum(chml*duration*1e-9)/num_yrs chem_dict[f"{current_var}_CHEM_LOSS (Tg{specifier}/yr)"] = np.round(CHML,5) # Chemical Production if current_var == 'SO4': # chemical production is basically the elevated emissions. # We have removed it for SO4 budget. and put 0 here, so, we don't report it chem_dict[f"{current_var}_CHEM_PROD (Tg{specifier}/yr)"] = 0 else: spc_chmp = Dic_scn_var_comp[current_var][current_var+'_CHMP'] spc_chmp = np.where(np.isnan(trop),np.nan,spc_chmp) tmp_chmp = np.nansum(spc_chmp*area,axis=0) chmp = np.ma.masked_where(inside==False,tmp_chmp) #convert Kg/m2/s to Tg/yr CHMP = np.ma.sum(chmp*duration*1e-9)/num_yrs chem_dict[f"{current_var}_CHEM_PROD (Tg{specifier}/yr)"] = np.round(CHMP,5) # End if # Aerosol calculations #--------------------- if current_var in AEROSOLS: # Dry Deposition Flux spc_ddfa = Dic_scn_var_comp[current_var][current_var+'_DDF'] spc_ddfc = Dic_scn_var_comp[current_var][current_var+'_DDFC'] spc_ddf = spc_ddfa +spc_ddfc tmp_ddf = spc_ddf ddf = np.ma.masked_where(inside==False,tmp_ddf*area) #convert Kg/m2/s to Tg/yr DDF = np.ma.sum(ddf*duration*1e-9)/num_yrs chem_dict[f"{current_var}_DRYDEP (Tg{specifier}/yr)"] = np.round(DDF,5) # Wet deposition spc_wdfa = Dic_scn_var_comp[current_var][current_var+'_WDF'] spc_wdfc = Dic_scn_var_comp[current_var][current_var+'_WDFC'] spc_wdf = spc_wdfa +spc_wdfc tmp_wdf = spc_wdf wdf = np.ma.masked_where(inside==False,tmp_wdf*area) #convert Kg/m2/s to Tg/yr WDF = np.ma.sum(wdf*duration*1e-9)/num_yrs chem_dict[f"{current_var}_WETDEP (Tg{specifier}/yr)"] = np.round(WDF,5) if current_var in ["SOA",'SO4']: # gas-aerosol Exchange spc_gaex = Dic_scn_var_comp[current_var][current_var+'_GAEX'] tmp_gaex = spc_gaex gaex = np.ma.masked_where(inside==False,tmp_gaex*area) #convert Kg/m2/s to Tg/yr GAEX = np.ma.sum(gaex*duration*1e-9)/num_yrs chem_dict[f"{current_var}_GAEX (Tg{specifier}/yr)"] = np.round(GAEX,5) # LifeTime = Burden/(loss+deposition) LT = BURDEN/(CHML+DDF-WDF)* duration/86400/num_yrs # days chem_dict[f"{current_var}_LIFETIME (days)"] = np.round(LT,5) if current_var == 'SO4': # Aqueous Chemistry spc_aqs = Dic_scn_var_comp[current_var][current_var+'_AQS'] tmp_aqs = spc_aqs aqs = np.ma.masked_where(inside==False,tmp_aqs*area) #convert Kg/m2/s to Tg/yr AQS = np.ma.sum(aqs*duration*1e-9)/num_yrs chem_dict[f"{current_var}_AQUEOUS (Tg{specifier}/yr)"] = np.round(AQS,5) # Nucleation spc_nucl = Dic_scn_var_comp[current_var][current_var+'_NUCL'] tmp_nucl = spc_nucl nucl = np.ma.masked_where(inside==False,tmp_nucl*area) #convert Kg/m2/s to Tg/yr NUCL = np.ma.sum(nucl*duration*1e-9)/num_yrs chem_dict[f"{current_var}_NUCLEATION (Tg{specifier}/yr)"] = np.round(NUCL,5) # Gaseous calculations #--------------------- else: # Dry Deposition Flux spc_ddf = Dic_scn_var_comp[current_var][current_var+'_DDF'] tmp_ddf = spc_ddf ddf = np.ma.masked_where(inside==False,tmp_ddf*area) #convert Kg/m2/s to Tg/yr DDF = np.ma.sum(ddf*duration*1e-9)/num_yrs chem_dict[f"{current_var}_DRYDEP (Tg/yr)"] = np.round(DDF,5) # Wet Deposition Flux spc_wdf = Dic_scn_var_comp[current_var][current_var+'_WDF'] tmp_wdf = spc_wdf wdf = np.ma.masked_where(inside==False,tmp_wdf*area) #convert Kg/m2/s to Tg/yr WDF = -1*np.ma.sum(wdf*duration*1e-9)/num_yrs chem_dict[f"{current_var}_WETDEP (Tg/yr)"] = np.round(WDF,5) # Total Deposition TDEP = DDF - WDF chem_dict[f"{current_var}_TDEP (Tg/yr)"] = np.round(TDEP,5) # LifeTime = Burden/(loss+deposition) if current_var == "CH4": LT = BURDEN/(CHML+DDF-WDF) # years chem_dict[f"{current_var}_LIFETIME (years)"] = np.round(LT,5) else: if (CHML+DDF-WDF) > 0: if CHML != 0: LT = BURDEN/(CHML+DDF-WDF)*duration/86400/num_yrs # days chem_dict[f"{current_var}_LIFETIME (days)"] = np.round(LT,5) else: # do not report lifetime if chemical loss (for gases) is not included in the model outputs # and put 0 here, so, we don't report it chem_dict[f"{current_var}_LIFETIME (days)"] = 0 # End if # End if # End if #NET = CHMP-CHML # Chemical Tendency spc_tnd = Dic_scn_var_comp[current_var][current_var+'_TEND'] spc_tnd = np.where(np.isnan(trop),np.nan,spc_tnd) tmp_tnd = np.nansum(spc_tnd,axis=0) tnd = np.ma.masked_where(inside==False,tmp_tnd) #convert Kg/s to Tg/yr TND = np.ma.sum(tnd*duration*1e-9)/num_yrs chem_dict[f"{current_var}_TEND (Tg/yr)"] = np.round(TND,5) # O3 dependent calculations if current_var == "O3": # Stratospheric-Tropospheric Exchange STE = DDF-TND chem_dict[f"{current_var}_STE (Tg/yr)"] = np.round(STE,5) # Lightning NOX production spc_lno = Dic_scn_var_comp[current_var][current_var+'_LNO'] tmp_lno = np.ma.masked_where(inside==False,spc_lno) LNO = np.ma.sum(tmp_lno) chem_dict[f"{current_var}_LNO (Tg N/yr)"] = np.round(LNO,5) # End if (aerosol or gas) return chem_dict
#####
[docs] def make_table(adfobj, vars, chem_type, Dic_scn_var_comp, areas, trops, case_names, nicknames, durations, insides, num_yrs, AEROSOLS): """ Create CSV table for aeorosols and gases, if applicable Table includes column values of variable, case(s), difference (if applicable) If this is a single model vs model run: 4 columns first column: variables names, second column: test case variable values third column: baseline case variable values final column: difference of test and baseline. If this is a model vs obs run: 2 columns first column: variables names, second column: test case variable values """ # Initialize an empty dictionary to store DataFrames dfs = {} #Special ADF variable which contains the output paths for #all generated plots and tables for each case: output_locs = adfobj.plot_location #Convert output location string to a Path object: output_location = Path(output_locs[0]) # Loop over model cases for case in case_names: nickname = nicknames[case] # Collect row data in a list of dictionaries durations[case] rows = [] for current_var in vars: chem_dict = calc_budget_data(current_var, Dic_scn_var_comp[case], areas[case], trops[case], insides[case], num_yrs[case], durations[case], AEROSOLS) # Loop through table variables for key, val in chem_dict.items(): if val != 0: # Skip variables with a value of 0 print(f"\t - Variable '{key}' being added to table") rows.append({'variable': key, nickname: np.round(val, 3)}) else: msg = f"chem/aerosol tables:" msg += f"\n\t - Variable '{key}' has value of 0, will not add to table" adfobj.debug_log(msg) # End if # End for # End for # Create the DataFrame for the current case table_df = pd.DataFrame(rows) if chem_type == 'gases': # Replace compound names directly in the DataFrame replacements = { 'MTERP': 'Monoterpene', 'CH3OH': 'Methanol', 'CH3COCH3': 'Acetone', 'O3_LNO': 'LNOx_PROD' } table_df['variable'] = table_df['variable'].replace(replacements, regex=True) # End if # Store the DataFrame in the dictionary dfs[nickname] = table_df # End for # Merge the DataFrames on the 'variable' column if len(case_names) == 2: table_df = pd.merge(dfs[nicknames[case_names[0]]], dfs[nicknames[case_names[1]]], on='variable') # Calculate the differences between case columns table_df['difference'] = table_df[nicknames[case_names[0]]] - table_df[nicknames[case_names[1]]] #Create output file name: output_csv_file = output_location / f'ADF_amwg_{chem_type}_table.csv' # Save table to CSV and add table dataframe to website (if enabled) table_df.to_csv(output_csv_file, index=False) adfobj.add_website_data(table_df, chem_type, case, plot_type="Tables")
#####