LCOV - code coverage report
Current view: top level - dynamics/se/dycore - prim_advance_mod.F90 (source / functions) Hit Total Coverage
Test: coverage.info Lines: 547 763 71.7 %
Date: 2024-12-17 17:57:11 Functions: 9 10 90.0 %

          Line data    Source code
       1             : module prim_advance_mod
       2             :   use shr_kind_mod,   only: r8=>shr_kind_r8
       3             :   use edgetype_mod,   only: EdgeBuffer_t
       4             :   use perf_mod,       only: t_startf, t_stopf, t_adj_detailf !, t_barrierf _EXTERNAL
       5             :   use cam_abortutils, only: endrun
       6             :   use parallel_mod,   only: parallel_t, HME_BNDRY_P2P!,HME_BNDRY_A2A
       7             :   use thread_mod ,    only: horz_num_threads, vert_num_threads, omp_set_nested
       8             : 
       9             :   implicit none
      10             :   private
      11             :   save
      12             : 
      13             :   public :: prim_advance_exp, prim_advance_init, applyCAMforcing, tot_energy_dyn, compute_omega
      14             : 
      15             :   type (EdgeBuffer_t) :: edge3,edgeOmega,edgeSponge
      16             :   real (kind=r8), allocatable :: ur_weights(:)
      17             : contains
      18             : 
      19        1536 :   subroutine prim_advance_init(par, elem)
      20             :     use edge_mod,       only: initEdgeBuffer
      21             :     use element_mod,    only: element_t
      22             :     use dimensions_mod, only: nlev,ksponge_end
      23             :     use control_mod,    only: qsplit
      24             : 
      25             :     type (parallel_t)                       :: par
      26             :     type (element_t), target, intent(inout) :: elem(:)
      27             :     integer                                 :: i
      28             : 
      29        1536 :     call initEdgeBuffer(par,edge3   ,elem,4*nlev   ,bndry_type=HME_BNDRY_P2P, nthreads=horz_num_threads)
      30        1536 :     if (ksponge_end>0) then
      31        1536 :        call initEdgeBuffer(par,edgeSponge,elem,4*ksponge_end,bndry_type=HME_BNDRY_P2P, nthreads=horz_num_threads)
      32             :     end if
      33        1536 :     call initEdgeBuffer(par,edgeOmega ,elem,nlev         ,bndry_type=HME_BNDRY_P2P, nthreads=horz_num_threads)
      34             : 
      35        4608 :     if(.not. allocated(ur_weights)) allocate(ur_weights(qsplit))
      36        3072 :     ur_weights(:)=0.0_r8
      37             : 
      38        1536 :     if(mod(qsplit,2).NE.0)then
      39        1536 :       ur_weights(1)=1.0_r8/qsplit
      40        1536 :       do i=3,qsplit,2
      41        1536 :         ur_weights(i)=2.0_r8/qsplit
      42             :       enddo
      43             :     else
      44           0 :       do i=2,qsplit,2
      45           0 :         ur_weights(i)=2.0_r8/qsplit
      46             :       enddo
      47             :     endif
      48        1536 :   end subroutine prim_advance_init
      49             : 
      50     2216448 :   subroutine prim_advance_exp(elem, fvm, deriv, hvcoord, hybrid,dt, tl,  nets, nete)
      51        1536 :     use control_mod,       only: tstep_type, qsplit
      52             :     use derivative_mod,    only: derivative_t
      53             :     use dimensions_mod,    only: np, nlev
      54             :     use element_mod,       only: element_t
      55             :     use hybvcoord_mod,     only: hvcoord_t
      56             :     use hybrid_mod,        only: hybrid_t
      57             :     use se_dyn_time_mod,   only: TimeLevel_t,  timelevel_qdp, tevolve
      58             :     use fvm_control_volume_mod, only: fvm_struct
      59             :     use cam_thermo,        only: get_kappa_dry
      60             :     use air_composition,   only: thermodynamic_active_species_num
      61             :     use air_composition,   only: thermodynamic_active_species_idx_dycore, get_cp
      62             :     use physconst,         only: cpair
      63             :     implicit none
      64             : 
      65             :     type (element_t), intent(inout), target   :: elem(:)
      66             :     type(fvm_struct)     , intent(inout) :: fvm(:)
      67             :     type (derivative_t)  , intent(in) :: deriv
      68             :     type (hvcoord_t)                  :: hvcoord
      69             :     type (hybrid_t)      , intent(in) :: hybrid
      70             :     real (kind=r8), intent(in) :: dt
      71             :     type (TimeLevel_t)   , intent(in) :: tl
      72             :     integer              , intent(in) :: nets
      73             :     integer              , intent(in) :: nete
      74             : 
      75             :     ! Local
      76             :     real (kind=r8) :: dt_vis, eta_ave_w
      77             :     integer        :: ie,nm1,n0,np1,k,qn0,m_cnst, nq
      78     4432896 :     real (kind=r8) :: inv_cp_full(np,np,nlev,nets:nete)
      79     4432896 :     real (kind=r8) :: qwater(np,np,nlev,thermodynamic_active_species_num,nets:nete)
      80     4432896 :     integer        :: qidx(thermodynamic_active_species_num)
      81     4432896 :     real (kind=r8) :: kappa(np,np,nlev,nets:nete)
      82             : 
      83     2216448 :     nm1   = tl%nm1
      84     2216448 :     n0    = tl%n0
      85     2216448 :     np1   = tl%np1
      86             : 
      87     2216448 :     call TimeLevel_Qdp(tl, qsplit, qn0)  ! compute current Qdp() timelevel
      88             :     !
      89             :     !   tstep_type=1  RK2-SSP 3 stage (as used by tracers)           CFL=.58
      90             :     !                    optimal in terms of SSP CFL, but not        CFLSSP=2
      91             :     !                    optimal in terms of CFL
      92             :     !                    typically requires qsplit=3
      93             :     !                    but if windspeed > 340m/s, could use this
      94             :     !                    with qsplit=1
      95             :     !   tstep_type=2  classic RK3                                    CFL=1.73 (sqrt(3))
      96             :     !
      97             :     !   tstep_type=3  Kinnmark&Gray RK4 4 stage                      CFL=sqrt(8)=2.8
      98             :     !                 should we replace by standard RK4 (CFL=sqrt(8))?
      99             :     !                 (K&G 1st order method has CFL=3)
     100             :     !   tstep_type=4  Kinnmark&Gray RK3 5 stage 3rd order            CFL=3.87  (sqrt(15))
     101             :     !                 From Paul Ullrich.  3rd order for nonlinear terms also
     102             :     !                 K&G method is only 3rd order for linear
     103             :     !                 optimal: for windspeeds ~120m/s,gravity: 340m/2
     104             :     !                 run with qsplit=1
     105             :     !                 (K&G 2nd order method has CFL=4. tiny CFL improvement not worth 2nd order)
     106             :     !
     107             : 
     108     2216448 :     call omp_set_nested(.true.)
     109             : 
     110             :     ! default weights for computing mean dynamics fluxes
     111     2216448 :     eta_ave_w = 1_r8/qsplit
     112             : 
     113             :     ! ==================================
     114             :     ! Take timestep
     115             :     ! ==================================
     116     2216448 :     call t_startf('prim_adv_prep')
     117    15515136 :     do nq=1,thermodynamic_active_species_num
     118    15515136 :       qidx(nq) = nq
     119             :     end do
     120    17800848 :     do ie=nets,nete
     121   111307248 :       do nq=1,thermodynamic_active_species_num
     122    93506400 :         m_cnst = thermodynamic_active_species_idx_dycore(nq)
     123             :         !
     124             :         ! make sure Q is updated
     125             :         !
     126 >18272*10^7 :         qwater(:,:,:,nq,ie) = elem(ie)%state%Qdp(:,:,:,m_cnst,qn0)/elem(ie)%state%dp3d(:,:,:,n0)
     127             :       end do
     128             :     end do
     129             :     !
     130             :     ! compute Cp and kappa=Rdry/cpdry here and not in RK-stages since Q stays constant
     131             :     !
     132    17800848 :     do ie=nets,nete
     133             :       call get_cp(qwater(:,:,:,:,ie),.true.,&
     134    17800848 :            inv_cp_full(:,:,:,ie), active_species_idx_dycore=qidx)
     135             :     end do
     136    17800848 :     do ie=nets,nete
     137    17800848 :       call get_kappa_dry(qwater(:,:,:,:,ie), qidx, kappa(:,:,:,ie))
     138             :     end do
     139     2216448 :     call t_stopf('prim_adv_prep')
     140             : 
     141     2216448 :     dt_vis = dt
     142             : 
     143     2216448 :     if (tstep_type==1) then
     144             :       ! RK2-SSP 3 stage.  matches tracer scheme. optimal SSP CFL, but
     145             :       ! not optimal for regular CFL
     146             :       ! u1 = u0 + dt/2 RHS(u0)
     147             :       call compute_and_apply_rhs(np1,n0,n0,dt/2,elem,hvcoord,hybrid,&
     148           0 :            deriv,nets,nete,eta_ave_w/3,inv_cp_full,qwater,qidx,kappa)
     149             :       ! u2 = u1 + dt/2 RHS(u1)
     150             :       call compute_and_apply_rhs(np1,np1,np1,dt/2,elem,hvcoord,hybrid,&
     151           0 :            deriv,nets,nete,eta_ave_w/3,inv_cp_full,qwater,qidx,kappa)
     152             :       ! u3 = u2 + dt/2 RHS(u2)
     153             :       call compute_and_apply_rhs(np1,np1,np1,dt/2,elem,hvcoord,hybrid,&
     154           0 :            deriv,nets,nete,eta_ave_w/3,inv_cp_full,qwater,qidx,kappa)
     155             : 
     156             :       ! unew = u/3 +2*u3/3  = u + 1/3 (RHS(u) + RHS(u1) + RHS(u2))
     157           0 :       do ie=nets,nete
     158           0 :         elem(ie)%state%v(:,:,:,:,np1)= elem(ie)%state%v(:,:,:,:,n0)/3 &
     159           0 :              + 2*elem(ie)%state%v(:,:,:,:,np1)/3
     160             :         elem(ie)%state%T(:,:,:,np1)= elem(ie)%state%T(:,:,:,n0)/3 &
     161           0 :              + 2*elem(ie)%state%T(:,:,:,np1)/3
     162             :         elem(ie)%state%dp3d(:,:,:,np1)= elem(ie)%state%dp3d(:,:,:,n0)/3 &
     163           0 :              + 2*elem(ie)%state%dp3d(:,:,:,np1)/3
     164             :       enddo
     165     2216448 :     else if (tstep_type==2) then
     166             :       ! classic RK3  CFL=sqrt(3)
     167             :       ! u1 = u0 + dt/3 RHS(u0)
     168             :       call compute_and_apply_rhs(np1,n0,n0,dt/3,elem,hvcoord,hybrid,&
     169           0 :            deriv,nets,nete,0.0_r8,inv_cp_full,qwater,qidx,kappa)
     170             :       ! u2 = u0 + dt/2 RHS(u1)
     171             :       call compute_and_apply_rhs(np1,n0,np1,dt/2,elem,hvcoord,hybrid,&
     172           0 :            deriv,nets,nete,0.0_r8,inv_cp_full,qwater,qidx,kappa)
     173             :       ! u3 = u0 + dt RHS(u2)
     174             :       call compute_and_apply_rhs(np1,n0,np1,dt,elem,hvcoord,hybrid,&
     175           0 :            deriv,nets,nete,eta_ave_w,inv_cp_full,qwater,qidx,kappa)
     176     2216448 :     else if (tstep_type==3) then
     177             :       ! KG 4th order 4 stage:   CFL=sqrt(8)
     178             :       ! low storage version of classic RK4
     179             :       ! u1 = u0 + dt/4 RHS(u0)
     180             :       call compute_and_apply_rhs(np1,n0,n0,dt/4,elem,hvcoord,hybrid,&
     181           0 :            deriv,nets,nete,0.0_r8,inv_cp_full,qwater,qidx,kappa)
     182             :       ! u2 = u0 + dt/3 RHS(u1)
     183             :       call compute_and_apply_rhs(np1,n0,np1,dt/3,elem,hvcoord,hybrid,&
     184           0 :            deriv,nets,nete,0.0_r8,inv_cp_full,qwater,qidx,kappa)
     185             :       ! u3 = u0 + dt/2 RHS(u2)
     186             :       call compute_and_apply_rhs(np1,n0,np1,dt/2,elem,hvcoord,hybrid,&
     187           0 :            deriv,nets,nete,0.0_r8,inv_cp_full,qwater,qidx,kappa)
     188             :       ! u4 = u0 + dt RHS(u3)
     189             :       call compute_and_apply_rhs(np1,n0,np1,dt,elem,hvcoord,hybrid,&
     190           0 :            deriv,nets,nete,eta_ave_w,inv_cp_full,qwater,qidx,kappa)
     191     2216448 :     else if (tstep_type==4) then
     192             :       !
     193             :       ! Ullrich 3nd order 5 stage:   CFL=sqrt( 4^2 -1) = 3.87
     194             :       ! u1 = u0 + dt/5 RHS(u0)  (save u1 in timelevel nm1)
     195             :       ! rhs: t=t
     196             :       call compute_and_apply_rhs(nm1,n0,n0,dt/5,elem,hvcoord,hybrid,&
     197     2216448 :            deriv,nets,nete,eta_ave_w/4,inv_cp_full,qwater,qidx,kappa)
     198             :       !
     199             :       ! u2 = u0 + dt/5 RHS(u1); rhs: t=t+dt/5
     200             :       !
     201             :       call compute_and_apply_rhs(np1,n0,nm1,dt/5,elem,hvcoord,hybrid,&
     202     2216448 :            deriv,nets,nete,0.0_r8,inv_cp_full,qwater,qidx,kappa)
     203             :       !
     204             :       ! u3 = u0 + dt/3 RHS(u2); rhs: t=t+2*dt/5
     205             :       !
     206             :       call compute_and_apply_rhs(np1,n0,np1,dt/3,elem,hvcoord,hybrid,&
     207     2216448 :            deriv,nets,nete,0.0_r8,inv_cp_full,qwater,qidx,kappa)
     208             :       !
     209             :       ! u4 = u0 + 2dt/3 RHS(u3); rhs: t=t+2*dt/5+dt/3
     210             :       !
     211             :       call compute_and_apply_rhs(np1,n0,np1,2*dt/3,elem,hvcoord,hybrid,&
     212     2216448 :            deriv,nets,nete,0.0_r8,inv_cp_full,qwater,qidx,kappa)
     213             :       ! compute (5*u1/4 - u0/4) in timelevel nm1:
     214    17800848 :       do ie=nets,nete
     215    31168800 :         elem(ie)%state%v(:,:,:,:,nm1)= (5*elem(ie)%state%v(:,:,:,:,nm1) &
     216 62368768800 :              - elem(ie)%state%v(:,:,:,:,n0) ) /4
     217             :         elem(ie)%state%T(:,:,:,nm1)= (5*elem(ie)%state%T(:,:,:,nm1) &
     218 30451917600 :              - elem(ie)%state%T(:,:,:,n0) )/4
     219             :         elem(ie)%state%dp3d(:,:,:,nm1)= (5*elem(ie)%state%dp3d(:,:,:,nm1) &
     220 30454134048 :              - elem(ie)%state%dp3d(:,:,:,n0) )/4
     221             :       enddo
     222             :       ! u5 = (5*u1/4 - u0/4) + 3dt/4 RHS(u4)
     223             :       !
     224             :       ! phl: rhs: t=t+2*dt/5+dt/3+3*dt/4         -wrong RK times ...
     225             :       !
     226             :       call compute_and_apply_rhs(np1,nm1,np1,3*dt/4,elem,hvcoord,hybrid,&
     227     2216448 :            deriv,nets,nete,3*eta_ave_w/4,inv_cp_full,qwater,qidx,kappa)
     228             :       ! final method is the same as:
     229             :       ! u5 = u0 +  dt/4 RHS(u0)) + 3dt/4 RHS(u4)
     230             :     else
     231           0 :       call endrun('ERROR: bad choice of tstep_type')
     232             :     endif
     233             : 
     234             :     ! ==============================================
     235             :     ! Time-split Horizontal diffusion: nu.del^2 or nu.del^4
     236             :     ! U(*) = U(t+1)  + dt2 * HYPER_DIFF_TERM(t+1)
     237             :     ! ==============================================
     238             : 
     239     2216448 :     call t_startf('advance_hypervis')
     240             : 
     241             :     ! note:time step computes u(t+1)= u(t*) + RHS.
     242             :     ! for consistency, dt_vis = t-1 - t*, so this is timestep method dependent
     243             : 
     244             :     ! forward-in-time, hypervis applied to dp3d
     245             :     call advance_hypervis_dp(edge3,elem,fvm,hybrid,deriv,np1,qn0,nets,nete,dt_vis,eta_ave_w,&
     246     2216448 :          inv_cp_full,hvcoord)
     247             : 
     248     2216448 :     call t_stopf('advance_hypervis')
     249             :     !
     250             :     ! update psdry
     251             :     !
     252    17800848 :     do ie=nets,nete
     253   327272400 :       elem(ie)%state%psdry(:,:) = hvcoord%hyai(1)*hvcoord%ps0
     254  1467150048 :       do k=1,nlev
     255 30451917600 :         elem(ie)%state%psdry(:,:) = elem(ie)%state%psdry(:,:)+elem(ie)%state%dp3d(:,:,k,np1)
     256             :       end do
     257             :     end do
     258     2216448 :     tevolve=tevolve+dt
     259             : 
     260     2216448 :     call omp_set_nested(.false.)
     261             : 
     262     2216448 :   end subroutine prim_advance_exp
     263             : 
     264             : 
     265      738816 :   subroutine applyCAMforcing(elem,fvm,np1,np1_qdp,dt_dribble,dt_phys,nets,nete,nsubstep)
     266     2216448 :     use dimensions_mod,         only: np, nc, nlev, qsize, ntrac, use_cslam
     267             :     use element_mod,            only: element_t
     268             :     use control_mod,            only: ftype, ftype_conserve
     269             :     use fvm_control_volume_mod, only: fvm_struct
     270             :     use air_composition,        only: thermodynamic_active_species_idx_dycore
     271             :     use cam_thermo,             only: get_dp, MASS_MIXING_RATIO
     272             :     type (element_t)     , intent(inout) :: elem(:)
     273             :     type(fvm_struct)     , intent(inout) :: fvm(:)
     274             :     real (kind=r8), intent(in) :: dt_dribble, dt_phys
     275             :     integer,  intent(in) :: np1,nets,nete,np1_qdp,nsubstep
     276             : 
     277             :     ! local
     278             :     integer :: i,j,k,ie,q
     279             :     real (kind=r8) :: v1,dt_local, dt_local_tracer,tmp
     280             :     real (kind=r8) :: dt_local_tracer_fvm
     281     1477632 :     real (kind=r8) :: ftmp(np,np,nlev,qsize,nets:nete) !diagnostics
     282             :     real (kind=r8) :: pdel(np,np,nlev)
     283      738816 :     real (kind=r8), allocatable :: ftmp_fvm(:,:,:,:,:) !diagnostics
     284             : 
     285      738816 :     call t_startf('applyCAMforc')
     286     2955264 :     if (use_cslam) allocate(ftmp_fvm(nc,nc,nlev,ntrac,nets:nete))
     287             : 
     288      738816 :     if (ftype==0) then
     289             :       !
     290             :       ! "Dribble" tendencies: divide total adjustment with nsplit and
     291             :       !                       add adjustments to state after each
     292             :       !                       vertical remap
     293             :       !
     294           0 :       dt_local            = dt_dribble
     295           0 :       dt_local_tracer     = dt_dribble
     296           0 :       dt_local_tracer_fvm = dt_dribble
     297      738816 :     else if (ftype==1) then
     298             :       !
     299             :       ! CAM-FV-stype forcing, i.e. equivalent to updating state once in the
     300             :       ! beginning of dynamics
     301             :       !
     302           0 :       dt_local            = dt_phys
     303           0 :       dt_local_tracer     = dt_phys
     304           0 :       dt_local_tracer_fvm = dt_phys
     305           0 :       if (nsubstep.ne.1) then
     306             :         !
     307             :         ! do nothing
     308             :         !
     309           0 :         dt_local            = 0.0_r8
     310           0 :         dt_local_tracer     = 0.0_r8
     311           0 :         dt_local_tracer_fvm = 0.0_r8
     312             :       end if
     313      738816 :     else if (ftype==2) then
     314             :       !
     315             :       ! do state-update for tracers and "dribbling" forcing for u,v,T
     316             :       !
     317      738816 :       dt_local            = dt_dribble
     318      738816 :       if (use_cslam) then
     319      738816 :         dt_local_tracer     = dt_dribble
     320      738816 :         dt_local_tracer_fvm = dt_phys
     321      738816 :         if (nsubstep.ne.1) then
     322      369408 :           dt_local_tracer_fvm = 0.0_r8
     323             :         end if
     324             :       else
     325           0 :         dt_local_tracer     = dt_phys
     326           0 :         dt_local_tracer_fvm = dt_phys
     327           0 :         if (nsubstep.ne.1) then
     328           0 :           dt_local_tracer     = 0.0_r8
     329           0 :           dt_local_tracer_fvm = 0.0_r8
     330             :         end if
     331             :       end if
     332             :     end if
     333             : 
     334     5933616 :     do ie=nets,nete
     335             :       !
     336             :       ! tracers
     337             :       !
     338     5194800 :       if (.not.use_cslam.and.dt_local_tracer>0) then
     339             : #if (defined COLUMN_OPENMP)
     340             :     !$omp parallel do num_threads(tracer_num_threads) private(q,k,i,j,v1)
     341             : #endif
     342           0 :         do q=1,qsize
     343           0 :           do k=1,nlev
     344           0 :             do j=1,np
     345           0 :               do i=1,np
     346             :                 !
     347             :                 ! FQ holds q-tendency: (qnew-qold)/dt_physics
     348             :                 !
     349           0 :                 v1 = dt_local_tracer*elem(ie)%derived%FQ(i,j,k,q)
     350           0 :                 if (elem(ie)%state%Qdp(i,j,k,q,np1_qdp) + v1 < 0 .and. v1<0) then
     351           0 :                   if (elem(ie)%state%Qdp(i,j,k,q,np1_qdp) < 0 ) then
     352             :                     v1=0  ! Q already negative, dont make it more so
     353             :                   else
     354           0 :                     v1 = -elem(ie)%state%Qdp(i,j,k,q,np1_qdp)
     355             :                   endif
     356             :                 endif
     357           0 :                 elem(ie)%state%Qdp(i,j,k,q,np1_qdp) = elem(ie)%state%Qdp(i,j,k,q,np1_qdp)+v1
     358           0 :                 ftmp(i,j,k,q,ie) = dt_local_tracer*&
     359           0 :                      elem(ie)%derived%FQ(i,j,k,q)-v1 !Only used for diagnostics!
     360             :               enddo
     361             :             enddo
     362             :           enddo
     363             :         enddo
     364             :       else
     365 60909030000 :         ftmp(:,:,:,:,ie) = 0.0_r8
     366             :       end if
     367     5194800 :       if (use_cslam.and.dt_local_tracer_fvm>0) then
     368             :         !
     369             :         ! Repeat for the fvm tracers: fc holds tendency (fc_new-fc_old)/dt_physics
     370             :         !
     371    31168800 :         do q = 1, ntrac
     372  2688309000 :           do k = 1, nlev
     373 10657132200 :             do j = 1, nc
     374 34542822600 :               do i = 1, nc
     375 23914261800 :                 tmp = dt_local_tracer_fvm*fvm(ie)%fc(i,j,k,q)/fvm(ie)%dp_fvm(i,j,k)
     376 23914261800 :                 v1 = tmp
     377 23914261800 :                 if (fvm(ie)%c(i,j,k,q) + v1 < 0 .and. v1<0) then
     378   234923308 :                   if (fvm(ie)%c(i,j,k,q) < 0 ) then
     379             :                     v1 = 0  ! C already negative, dont make it more so
     380             :                   else
     381   234923308 :                     v1 = -fvm(ie)%c(i,j,k,q)
     382             :                   end if
     383             :                 end if
     384 23914261800 :                 fvm(ie)%c(i,j,k,q) = fvm(ie)%c(i,j,k,q)+ v1
     385 31885682400 :                 ftmp_fvm(i,j,k,q,ie) = tmp-v1 !Only used for diagnostics!
     386             :               end do
     387             :             end do
     388             :           end do
     389             :         end do
     390             :       else
     391 34573991400 :         if (use_cslam) ftmp_fvm(:,:,:,:,ie) = 0.0_r8
     392             :       end if
     393             : 
     394     5933616 :       if (ftype_conserve==1.and..not.use_cslam) then
     395           0 :         call get_dp(elem(ie)%state%Qdp(:,:,:,1:qsize,np1_qdp), MASS_MIXING_RATIO, &
     396           0 :              thermodynamic_active_species_idx_dycore, elem(ie)%state%dp3d(:,:,:,np1), pdel)
     397           0 :         do k=1,nlev
     398           0 :           do j=1,np
     399           0 :             do i = 1,np
     400           0 :               pdel(i,j,k)=elem(ie)%derived%FDP(i,j,k)/pdel(i,j,k)
     401           0 :               elem(ie)%state%T(i,j,k,np1) = elem(ie)%state%T(i,j,k,np1) + &
     402           0 :                    dt_local*elem(ie)%derived%FT(i,j,k)*pdel(i,j,k)
     403             :               !
     404             :               ! momentum conserving: dp*u
     405             :               !
     406             :               elem(ie)%state%v(i,j,1,k,np1) = elem(ie)%state%v(i,j,1,k,np1) + &
     407           0 :                    dt_local*elem(ie)%derived%FM(i,j,1,k)*pdel(i,j,k)!elem(ie)%state%dp3d(i,j,k,np1)
     408             :               elem(ie)%state%v(i,j,2,k,np1) = elem(ie)%state%v(i,j,2,k,np1) + &
     409           0 :                    dt_local*elem(ie)%derived%FM(i,j,2,k)*pdel(i,j,k)!/elem(ie)%state%dp3d(i,j,k,np1)
     410             :             end do
     411             :           end do
     412             :         end do
     413             :       else
     414    10389600 :         elem(ie)%state%T(:,:,:,np1) = elem(ie)%state%T(:,:,:,np1) + &
     415 10161028800 :              dt_local*elem(ie)%derived%FT(:,:,:)
     416             :         elem(ie)%state%v(:,:,:,:,np1) = elem(ie)%state%v(:,:,:,:,np1) + &
     417 20779200000 :              dt_local*elem(ie)%derived%FM(:,:,:,:)
     418             :       end if
     419             :     end do
     420      738816 :     if (use_cslam) then
     421      738816 :       call output_qdp_var_dynamics(ftmp_fvm(:,:,:,:,:),nc,ntrac,nets,nete,'PDC')
     422             :     else
     423           0 :       call output_qdp_var_dynamics(ftmp(:,:,:,:,:),np,qsize,nets,nete,'PDC')
     424             :     end if
     425      738816 :     if (ftype==1.and.nsubstep==1) call tot_energy_dyn(elem,fvm,nets,nete,np1,np1_qdp,'p2d')
     426      738816 :     if (use_cslam) deallocate(ftmp_fvm)
     427      738816 :     call t_stopf('applyCAMforc')
     428      738816 :   end subroutine applyCAMforcing
     429             : 
     430             : 
     431     2216448 :   subroutine advance_hypervis_dp(edge3,elem,fvm,hybrid,deriv,nt,qn0,nets,nete,dt2,eta_ave_w,inv_cp_full,hvcoord)
     432             :     !
     433             :     !  take one timestep of:
     434             :     !          u(:,:,:,np) = u(:,:,:,np) +  dt2*nu*laplacian**order ( u )
     435             :     !          T(:,:,:,np) = T(:,:,:,np) +  dt2*nu_t*laplacian**order ( T )
     436             :     !
     437             :     !
     438             :     !  For correct scaling, dt2 should be the same 'dt2' used in the leapfrog advace
     439             :     !
     440             :     !
     441             :     use physconst,      only: cappa, cpair
     442             :     use cam_thermo,     only: get_molecular_diff_coef, get_rho_dry
     443             :     use dimensions_mod, only: np, nlev, nc, use_cslam, npsq, qsize, ksponge_end
     444             :     use dimensions_mod, only: nu_scale_top,nu_lev,kmvis_ref,kmcnd_ref,rho_ref,km_sponge_factor
     445             :     use dimensions_mod, only: nu_t_lev
     446             :     use control_mod,    only: nu, nu_t, hypervis_subcycle,hypervis_subcycle_sponge, nu_p, nu_top
     447             :     use control_mod,    only: molecular_diff,sponge_del4_lev
     448             :     use hybrid_mod,     only: hybrid_t!, get_loop_ranges
     449             :     use element_mod,    only: element_t
     450             :     use derivative_mod, only: derivative_t, laplace_sphere_wk, vlaplace_sphere_wk, vlaplace_sphere_wk_mol
     451             :     use derivative_mod, only: subcell_Laplace_fluxes, subcell_dss_fluxes
     452             :     use edge_mod,       only: edgevpack, edgevunpack, edgeDGVunpack
     453             :     use edgetype_mod,   only: EdgeBuffer_t, EdgeDescriptor_t
     454             :     use bndry_mod,      only: bndry_exchange
     455             :     use viscosity_mod,  only: biharmonic_wk_dp3d
     456             :     use hybvcoord_mod,  only: hvcoord_t
     457             :     use fvm_control_volume_mod, only: fvm_struct
     458             :     use air_composition, only: thermodynamic_active_species_idx_dycore
     459             :     use cam_history,     only: outfld, hist_fld_active
     460             : 
     461             :     type (hybrid_t)    , intent(in)   :: hybrid
     462             :     type (element_t)   , intent(inout), target :: elem(:)
     463             :     type(fvm_struct)   , intent(inout)   :: fvm(:)
     464             :     type (EdgeBuffer_t), intent(inout):: edge3
     465             :     type (derivative_t), intent(in  ) :: deriv
     466             :     integer            , intent(in)   :: nets,nete, nt, qn0
     467             :     real (kind=r8)     , intent(in)   :: inv_cp_full(np,np,nlev,nets:nete)
     468             :     type (hvcoord_t)   , intent(in)   :: hvcoord
     469             :     real (kind=r8) :: eta_ave_w  ! weighting for mean flux terms
     470             :     real (kind=r8) :: dt2
     471             :     ! local
     472             :     integer :: k,kptr,i,j,ie,ic
     473             :     integer :: kbeg, kend, kblk
     474     4432896 :     real (kind=r8), dimension(np,np,2,nlev,nets:nete)      :: vtens
     475     4432896 :     real (kind=r8), dimension(np,np,nlev,nets:nete)        :: ttens, dptens
     476             :     real (kind=r8), dimension(0:np+1,0:np+1,nlev)          :: corners
     477             :     real (kind=r8), dimension(2,2,2)                       :: cflux
     478             :     real (kind=r8)                                         :: temp      (np,np,nlev)
     479             :     real (kind=r8)                                         :: tempflux(nc,nc,4)
     480     4432896 :     real (kind=r8), dimension(nc,nc,4,nlev,nets:nete)      :: dpflux
     481             :     type (EdgeDescriptor_t)                                :: desc
     482             : 
     483             :     real (kind=r8), dimension(np,np)            :: lap_t,lap_dp
     484     4432896 :     real (kind=r8), dimension(np,np,ksponge_end,nets:nete):: kmvis,kmcnd,rho_dry
     485             :     real (kind=r8), dimension(np,np,nlev)       :: tmp_kmvis,tmp_kmcnd
     486             :     real (kind=r8), dimension(np,np,2)          :: lap_v
     487             :     real (kind=r8)                              :: v1,v2,v1new,v2new,dt,heating
     488             :     real (kind=r8)                              :: laplace_fluxes(nc,nc,4)
     489             :     real (kind=r8)                              :: rhypervis_subcycle
     490             :     real (kind=r8)                              :: nu_ratio1, ptop, inv_rho
     491             :     real (kind=r8)                              :: nu_temp, nu_dp, nu_velo
     492             : 
     493     2216448 :     if (nu_t == 0 .and. nu == 0 .and. nu_p==0 ) return;
     494             : 
     495     2216448 :     ptop = hvcoord%hyai(1)*hvcoord%ps0
     496             : 
     497     2216448 :     kbeg=1; kend=nlev
     498             : 
     499     2216448 :     kblk = kend - kbeg + 1
     500             : 
     501     2216448 :     dt=dt2/hypervis_subcycle
     502             : 
     503             :      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     504             :      !  hyper viscosity
     505             :      !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     506             : 
     507     8865792 :     do ic=1,hypervis_subcycle
     508     6649344 :       call tot_energy_dyn(elem,fvm,nets,nete,nt,qn0,'dBH')
     509             : 
     510     6649344 :       rhypervis_subcycle=1.0_r8/real(hypervis_subcycle,kind=r8)
     511     6649344 :       call biharmonic_wk_dp3d(elem,dptens,dpflux,ttens,vtens,deriv,edge3,hybrid,nt,nets,nete,kbeg,kend)
     512             : 
     513    53402544 :       do ie=nets,nete
     514             :         ! compute mean flux
     515    46753200 :         if (nu_p>0) then
     516  4394800800 :           do k=kbeg,kend
     517             :             !OMP_COLLAPSE_SIMD
     518             :             !DIR_VECTOR_ALIGNED
     519 21786991200 :             do j=1,np
     520 91308999600 :               do i=1,np
     521 >27827*10^7 :                 elem(ie)%derived%dpdiss_ave(i,j,k)=elem(ie)%derived%dpdiss_ave(i,j,k)+&
     522 >34784*10^7 :                      rhypervis_subcycle*eta_ave_w*(elem(ie)%state%dp3d(i,j,k,nt)-elem(ie)%derived%dp_ref(i,j,k))
     523             :                 elem(ie)%derived%dpdiss_biharmonic(i,j,k)=elem(ie)%derived%dpdiss_biharmonic(i,j,k)+&
     524 86960952000 :                      rhypervis_subcycle*eta_ave_w*dptens(i,j,k,ie)
     525             :               enddo
     526             :             enddo
     527             :           enddo
     528             :         endif
     529             :         !$omp parallel do num_threads(vert_num_threads) private(lap_t,lap_dp,lap_v,laplace_fluxes,nu_ratio1,i,j,k)
     530  4394800800 :         do k=kbeg,kend
     531             :           ! advace in time.
     532             :           ! note: DSS commutes with time stepping, so we can time advance and then DSS.
     533             :           ! note: weak operators alreayd have mass matrix "included"
     534             : 
     535             :           !OMP_COLLAPSE_SIMD
     536             :           !DIR_VECTOR_ALIGNED
     537 21740238000 :           do j=1,np
     538 91308999600 :             do i=1,np
     539 69568761600 :               ttens(i,j,k,ie)   = -nu_t_lev(k)*ttens(i,j,k,ie)
     540 69568761600 :               dptens(i,j,k,ie)  = -nu_p*dptens(i,j,k,ie)
     541 69568761600 :               vtens(i,j,1,k,ie) = -nu_lev(k)*vtens(i,j,1,k,ie)
     542 86960952000 :               vtens(i,j,2,k,ie) = -nu_lev(k)*vtens(i,j,2,k,ie)
     543             :             enddo
     544             :           enddo
     545             : 
     546  4348047600 :           if (use_cslam) then
     547             :             !OMP_COLLAPSE_SIMD
     548             :             !DIR_VECTOR_ALIGNED
     549 17392190400 :             do j=1,nc
     550 56524618800 :               do i=1,nc
     551             :                 !
     552             :                 ! del4 mass flux for CSLAM
     553             :                 !
     554 >15652*10^7 :                 elem(ie)%sub_elem_mass_flux(i,j,:,k) = elem(ie)%sub_elem_mass_flux(i,j,:,k) - &
     555 >36523*10^7 :                      rhypervis_subcycle*eta_ave_w*nu_p*dpflux(i,j,:,k,ie)
     556             :               enddo
     557             :             enddo
     558             :           endif
     559             : 
     560             :           ! NOTE: we will DSS all tendicies, EXCEPT for dp3d, where we DSS the new state
     561             :           !OMP_COLLAPSE_SIMD
     562             :           !DIR_VECTOR_ALIGNED
     563 21786991200 :           do j=1,np
     564 91308999600 :             do i=1,np
     565 >34784*10^7 :               elem(ie)%state%dp3d(i,j,k,nt) = elem(ie)%state%dp3d(i,j,k,nt)*elem(ie)%spheremp(i,j)&
     566 >43480*10^7 :                    + dt*dptens(i,j,k,ie)
     567             :             enddo
     568             :           enddo
     569             : 
     570             :         enddo
     571             : 
     572    46753200 :         kptr = kbeg - 1
     573    46753200 :         call edgeVpack(edge3,ttens(:,:,kbeg:kend,ie),kblk,kptr,ie)
     574             : 
     575    46753200 :         kptr = kbeg - 1 + nlev
     576 91355752800 :         call edgeVpack(edge3,vtens(:,:,1,kbeg:kend,ie),kblk,kptr,ie)
     577             : 
     578    46753200 :         kptr = kbeg - 1 + 2*nlev
     579 91355752800 :         call edgeVpack(edge3,vtens(:,:,2,kbeg:kend,ie),kblk,kptr,ie)
     580             : 
     581    46753200 :         kptr = kbeg - 1 + 3*nlev
     582    53402544 :         call edgeVpack(edge3,elem(ie)%state%dp3d(:,:,kbeg:kend,nt),kblk,kptr,ie)
     583             :       enddo
     584             : 
     585     6649344 :       call bndry_exchange(hybrid,edge3,location='advance_hypervis_dp2')
     586             : 
     587    53402544 :       do ie=nets,nete
     588             : 
     589    46753200 :         kptr = kbeg - 1
     590    46753200 :         call edgeVunpack(edge3,ttens(:,:,kbeg:kend,ie),kblk,kptr,ie)
     591             : 
     592    46753200 :         kptr = kbeg - 1 + nlev
     593 >18266*10^7 :         call edgeVunpack(edge3,vtens(:,:,1,kbeg:kend,ie),kblk,kptr,ie)
     594             : 
     595    46753200 :         kptr = kbeg - 1 + 2*nlev
     596 >18266*10^7 :         call edgeVunpack(edge3,vtens(:,:,2,kbeg:kend,ie),kblk,kptr,ie)
     597             : 
     598    46753200 :         if (use_cslam) then
     599  4394800800 :           do k=kbeg,kend
     600 91308999600 :             temp(:,:,k) = elem(ie)%state%dp3d(:,:,k,nt) / elem(ie)%spheremp  ! STATE before DSS
     601 >18696*10^7 :             corners(0:np+1,0:np+1,k) = 0.0_r8
     602 91355752800 :             corners(1:np  ,1:np  ,k) = elem(ie)%state%dp3d(1:np,1:np,k,nt) ! fill in interior data of STATE*mass
     603             :           enddo
     604             :         endif
     605    46753200 :         kptr = kbeg - 1 + 3*nlev
     606    46753200 :         call edgeVunpack(edge3,elem(ie)%state%dp3d(:,:,kbeg:kend,nt),kblk,kptr,ie)
     607             : 
     608    46753200 :         if (use_cslam) then
     609    46753200 :           desc = elem(ie)%desc
     610             : 
     611    46753200 :           kptr = kbeg - 1 + 3*nlev
     612    46753200 :           call edgeDGVunpack(edge3,corners(:,:,kbeg:kend),kblk,kptr,ie)
     613  4394800800 :           do k=kbeg,kend
     614 >18696*10^7 :             corners(:,:,k) = corners(:,:,k)/dt !note: array size is 0:np+1
     615             :             !OMP_COLLAPSE_SIMD
     616             :             !DIR_VECTOR_ALIGNED
     617 21740238000 :             do j=1,np
     618 91308999600 :               do i=1,np
     619 69568761600 :                 temp(i,j,k) =  elem(ie)%rspheremp(i,j)*elem(ie)%state%dp3d(i,j,k,nt) - temp(i,j,k)
     620 86960952000 :                 temp(i,j,k) =  temp(i,j,k)/dt
     621             :               enddo
     622             :             enddo
     623             : 
     624  4348047600 :             call distribute_flux_at_corners(cflux, corners(:,:,k), desc%getmapP)
     625             : 
     626 13044142800 :             cflux(1,1,:)   = elem(ie)%rspheremp(1,  1) * cflux(1,1,:)
     627 13044142800 :             cflux(2,1,:)   = elem(ie)%rspheremp(np, 1) * cflux(2,1,:)
     628 13044142800 :             cflux(1,2,:)   = elem(ie)%rspheremp(1, np) * cflux(1,2,:)
     629 13044142800 :             cflux(2,2,:)   = elem(ie)%rspheremp(np,np) * cflux(2,2,:)
     630             : 
     631  4348047600 :             call subcell_dss_fluxes(temp(:,:,k), np, nc, elem(ie)%metdet,cflux,tempflux)
     632  4348047600 :             elem(ie)%sub_elem_mass_flux(:,:,:,k) = elem(ie)%sub_elem_mass_flux(:,:,:,k) + &
     633 >23484*10^7 :                  rhypervis_subcycle*eta_ave_w*tempflux
     634             :           end do
     635             :         endif
     636             : 
     637             :         ! apply inverse mass matrix, accumulate tendencies
     638             :         !$omp parallel do num_threads(vert_num_threads) private(k,i,j)
     639  4394800800 :         do k=kbeg,kend
     640             :           !OMP_COLLAPSE_SIMD
     641             :           !DIR_VECTOR_ALIGNED
     642 21786991200 :           do j=1,np
     643 91308999600 :             do i=1,np
     644 69568761600 :               vtens(i,j,1,k,ie)=dt*vtens(i,j,1,k,ie)*elem(ie)%rspheremp(i,j)
     645 69568761600 :               vtens(i,j,2,k,ie)=dt*vtens(i,j,2,k,ie)*elem(ie)%rspheremp(i,j)
     646 69568761600 :               ttens(i,j,k,ie)=dt*ttens(i,j,k,ie)*elem(ie)%rspheremp(i,j)
     647 86960952000 :               elem(ie)%state%dp3d(i,j,k,nt)=elem(ie)%state%dp3d(i,j,k,nt)*elem(ie)%rspheremp(i,j)
     648             :             enddo
     649             :           enddo
     650             :         enddo
     651             : 
     652             :         !$omp parallel do num_threads(vert_num_threads) private(k,i,j)
     653  4401450144 :         do k=kbeg,kend
     654             :           !OMP_COLLAPSE_SIMD
     655             :           !DIR_VECTOR_ALIGNED
     656 21786991200 :           do j=1,np
     657 91308999600 :             do i=1,np
     658             :               ! update v first (gives better results than updating v after heating)
     659 >34784*10^7 :               elem(ie)%state%v(i,j,:,k,nt)=elem(ie)%state%v(i,j,:,k,nt) + &
     660 >55655*10^7 :                    vtens(i,j,:,k,ie)
     661             :               elem(ie)%state%T(i,j,k,nt)=elem(ie)%state%T(i,j,k,nt) &
     662 86960952000 :                    +ttens(i,j,k,ie)
     663             :             enddo
     664             :           enddo
     665             :         enddo
     666             :       end do
     667             : 
     668     6649344 :       call tot_energy_dyn(elem,fvm,nets,nete,nt,qn0,'dCH')
     669    53402544 :       do ie=nets,nete
     670             :         !$omp parallel do num_threads(vert_num_threads), private(k,i,j,v1,v2,heating)
     671  4167684144 :         do k=sponge_del4_lev+2,nlev
     672             :           !
     673             :           ! only do "frictional heating" away from sponge
     674             :           !
     675             :           !OMP_COLLAPSE_SIMD
     676             :           !DIR_VECTOR_ALIGNED
     677 20618161200 :           do j=1,np
     678 86399913600 :             do i=1,np
     679 65828505600 :               v1new=elem(ie)%state%v(i,j,1,k,nt)
     680 65828505600 :               v2new=elem(ie)%state%v(i,j,2,k,nt)
     681 65828505600 :               v1   =elem(ie)%state%v(i,j,1,k,nt)- vtens(i,j,1,k,ie)
     682 65828505600 :               v2   =elem(ie)%state%v(i,j,2,k,nt)- vtens(i,j,2,k,ie)
     683 65828505600 :               heating = 0.5_r8*(v1new*v1new+v2new*v2new-(v1*v1+v2*v2))
     684             : 
     685             :               elem(ie)%state%T(i,j,k,nt)=elem(ie)%state%T(i,j,k,nt) &
     686 82285632000 :                    -heating*inv_cp_full(i,j,k,ie)
     687             :             enddo
     688             :           enddo
     689             :         enddo
     690             :       enddo
     691     8865792 :       call tot_energy_dyn(elem,fvm,nets,nete,nt,qn0,'dAH')
     692             :     end do
     693             : 
     694             :     !
     695             :     !***************************************************************
     696             :     !
     697             :     ! sponge layer damping
     698             :     !
     699             :     !***************************************************************
     700             :     !
     701     2216448 :     call t_startf('sponge_diff')
     702             :     !
     703             :     ! compute coefficients for horizontal diffusion
     704             :     !
     705     2216448 :     if (molecular_diff==1) then
     706           0 :       do ie=nets,nete
     707           0 :         call get_rho_dry(elem(ie)%state%Qdp(:,:,:,1:qsize,qn0),  &
     708             :              elem(ie)%state%T(:,:,:,nt), ptop, elem(ie)%state%dp3d(:,:,:,nt),&
     709             :              .true., rho_dry=rho_dry(:,:,:,ie),                                              &
     710           0 :              active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
     711             :       end do
     712             : 
     713           0 :       do ie=nets,nete
     714             :         !
     715             :         ! compute molecular diffusion and thermal conductivity coefficients at mid-levels
     716             :         !
     717           0 :         call get_molecular_diff_coef(elem(ie)%state%T(:,:,:,nt), .false., km_sponge_factor(1:ksponge_end), kmvis(:,:,:,ie),&
     718           0 :              kmcnd(:,:,:,ie), elem(ie)%state%Qdp(:,:,:,1:qsize,qn0), fact=1.0_r8/elem(ie)%state%dp3d(:,:,1:ksponge_end,nt),&
     719           0 :              active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
     720             :       end do
     721             :       !
     722             :       ! diagnostics
     723             :       !
     724           0 :       if (hist_fld_active('nu_kmvis')) then
     725           0 :         do ie=nets,nete
     726           0 :           tmp_kmvis = 0.0_r8
     727           0 :           do k=1,ksponge_end
     728           0 :             tmp_kmvis(:,:,k) = kmvis(:,:,k,ie)/rho_dry(:,:,k,ie)
     729             :           end do
     730           0 :           call outfld('nu_kmvis',RESHAPE(tmp_kmvis(:,:,:), (/npsq,nlev/)), npsq, ie)
     731             :         end do
     732             :       end if
     733           0 :       if (hist_fld_active('nu_kmcnd')) then
     734           0 :         do ie=nets,nete
     735           0 :           tmp_kmcnd = 0.0_r8
     736           0 :           do k=1,ksponge_end
     737           0 :             tmp_kmcnd(:,:,k) = kmcnd(:,:,k,ie)*inv_cp_full(:,:,k,ie)/rho_dry(:,:,k,ie)
     738             :           end do
     739           0 :           call outfld('nu_kmcnd',RESHAPE(tmp_kmcnd(:,:,:), (/npsq,nlev/)), npsq, ie)
     740             :         end do
     741             :       end if
     742           0 :       if (hist_fld_active('nu_kmcnd_dp')) then
     743           0 :         do ie=nets,nete
     744           0 :           tmp_kmcnd = 0.0_r8
     745           0 :           do k=1,ksponge_end
     746           0 :             tmp_kmcnd(:,:,k) = kmcnd(:,:,k,ie)/(cpair*rho_ref(k))
     747             :           end do
     748           0 :           call outfld('nu_kmcnd_dp',RESHAPE(tmp_kmcnd(:,:,:), (/npsq,nlev/)), npsq, ie)
     749             :         end do
     750             :       end if
     751             : 
     752             :       !
     753             :       ! scale by reference value
     754             :       !
     755           0 :       do ie=nets,nete
     756           0 :         do k=1,ksponge_end
     757           0 :           kmcnd(:,:,k,ie) = kmcnd(:,:,k,ie)/kmcnd_ref(k)
     758           0 :           kmvis(:,:,k,ie) = kmvis(:,:,k,ie)/kmvis_ref(k)
     759             :         end do
     760             :       end do
     761             :     end if
     762             :     !
     763             :     ! Horizontal Laplacian diffusion
     764             :     !
     765     2216448 :     dt=dt2/hypervis_subcycle_sponge
     766     2216448 :     call tot_energy_dyn(elem,fvm,nets,nete,nt,qn0,'dBS')
     767     2216448 :     kblk = ksponge_end
     768     8865792 :     do ic=1,hypervis_subcycle_sponge
     769     6649344 :       rhypervis_subcycle=1.0_r8/real(hypervis_subcycle_sponge,kind=r8)
     770    53402544 :       do ie=nets,nete
     771   233766000 :         do k=1,ksponge_end
     772   187012800 :           if (nu_top>0.or.molecular_diff>1) then
     773             :             !**************************************************************
     774             :             !
     775             :             ! traditional sponge formulation (constant coefficients)
     776             :             !
     777             :             !**************************************************************
     778   187012800 :             call laplace_sphere_wk(elem(ie)%state%T(:,:,k,nt),deriv,elem(ie),lap_t,var_coef=.false.)
     779   187012800 :             call laplace_sphere_wk(elem(ie)%state%dp3d(:,:,k,nt),deriv,elem(ie),lap_dp,var_coef=.false.)
     780   187012800 :             nu_ratio1=1.0_r8
     781   187012800 :             call vlaplace_sphere_wk(elem(ie)%state%v(:,:,:,k,nt),deriv,elem(ie),.true.,lap_v, var_coef=.false.,&
     782   374025600 :                  nu_ratio=nu_ratio1)
     783             : 
     784   187012800 :             nu_dp   = nu_scale_top(k)*nu_top
     785   187012800 :             nu_temp = nu_scale_top(k)*nu_top
     786   187012800 :             nu_velo = nu_scale_top(k)*nu_top
     787   187012800 :             if (molecular_diff>1) then
     788           0 :               nu_dp   = nu_dp   + kmcnd_ref(k)/(cpair*rho_ref(k))
     789             :             end if
     790             : 
     791             :             !OMP_COLLAPSE_SIMD
     792             :             !DIR_VECTOR_ALIGNED
     793   935064000 :             do j=1,np
     794  3927268800 :               do i=1,np
     795  2992204800 :                 ttens(i,j,k,ie)   = nu_temp*lap_t(i,j)
     796  2992204800 :                 dptens(i,j,k,ie)  = nu_dp  *lap_dp(i,j)
     797  2992204800 :                 vtens(i,j,1,k,ie) = nu_velo*lap_v(i,j,1)
     798  3740256000 :                 vtens(i,j,2,k,ie) = nu_velo*lap_v(i,j,2)
     799             :               enddo
     800             :             enddo
     801             :           end if
     802   187012800 :           if (molecular_diff>0) then
     803             :             !************************************************************************
     804             :             !
     805             :             ! sponge formulation using molecular diffusion and thermal conductivity
     806             :             !
     807             :             !************************************************************************
     808           0 :             call vlaplace_sphere_wk_mol(elem(ie)%state%v(:,:,:,k,nt),deriv,elem(ie),.false.,kmvis(:,:,k,ie),lap_v)
     809           0 :             call laplace_sphere_wk(elem(ie)%state%T(:,:,k,nt),deriv,elem(ie),lap_t ,var_coef=.false.,mol_nu=kmcnd(:,:,k,ie))
     810             : 
     811             :             !OMP_COLLAPSE_SIMD
     812             :             !DIR_VECTOR_ALIGNED
     813           0 :             do j=1,np
     814           0 :               do i=1,np
     815           0 :                 inv_rho = 1.0_r8/rho_dry(i,j,k,ie)
     816           0 :                 ttens(i,j,k,ie)   = ttens(i,j,k,ie)  + kmcnd_ref(k)*inv_cp_full(i,j,k,ie)*inv_rho*lap_t(i,j)
     817           0 :                 vtens(i,j,1,k,ie) = vtens(i,j,1,k,ie)+ kmvis_ref(k)*inv_rho*lap_v(i,j,1)
     818           0 :                 vtens(i,j,2,k,ie) = vtens(i,j,2,k,ie)+ kmvis_ref(k)*inv_rho*lap_v(i,j,2)
     819             :               end do
     820             :             end do
     821             :           end if
     822             : 
     823   187012800 :           if (use_cslam.and.nu_dp>0) then
     824             :             !
     825             :             ! mass flux for CSLAM due to sponge layer diffusion on dp
     826             :             !
     827   187012800 :             call subcell_Laplace_fluxes(elem(ie)%state%dp3d(:,:,k,nt),deriv,elem(ie),np,nc,laplace_fluxes)
     828   374025600 :             elem(ie)%sub_elem_mass_flux(:,:,:,k) = elem(ie)%sub_elem_mass_flux(:,:,:,k) + &
     829 10285704000 :                  rhypervis_subcycle*eta_ave_w*nu_dp*laplace_fluxes
     830             :           endif
     831             : 
     832             :           ! NOTE: we will DSS all tendencies, EXCEPT for dp3d, where we DSS the new state
     833             :           !OMP_COLLAPSE_SIMD
     834             :           !DIR_VECTOR_ALIGNED
     835   981817200 :           do j=1,np
     836  3927268800 :             do i=1,np
     837 14961024000 :               elem(ie)%state%dp3d(i,j,k,nt) = elem(ie)%state%dp3d(i,j,k,nt)*elem(ie)%spheremp(i,j)&
     838 18701280000 :                    + dt*dptens(i,j,k,ie)
     839             :             enddo
     840             :           enddo
     841             : 
     842             :         enddo
     843             : 
     844             : 
     845    46753200 :         kptr = 0
     846    46753200 :         call edgeVpack(edgeSponge,ttens(:,:,1:ksponge_end,ie),kblk,kptr,ie)
     847             : 
     848    46753200 :         kptr = ksponge_end
     849  3974022000 :         call edgeVpack(edgeSponge,vtens(:,:,1,1:ksponge_end,ie),kblk,kptr,ie)
     850             : 
     851    46753200 :         kptr = 2*ksponge_end
     852  3974022000 :         call edgeVpack(edgeSponge,vtens(:,:,2,1:ksponge_end,ie),kblk,kptr,ie)
     853             : 
     854    46753200 :         kptr = 3*ksponge_end
     855    53402544 :         call edgeVpack(edgeSponge,elem(ie)%state%dp3d(:,:,1:ksponge_end,nt),kblk,kptr,ie)
     856             :       enddo
     857             : 
     858     6649344 :       call bndry_exchange(hybrid,edgeSponge,location='advance_hypervis_sponge')
     859             : 
     860    55618992 :       do ie=nets,nete
     861             : 
     862    46753200 :         kptr = 0
     863    46753200 :         call edgeVunpack(edgeSponge,ttens(:,:,1:ksponge_end,ie),kblk,kptr,ie)
     864             : 
     865    46753200 :         kptr = ksponge_end
     866  7901290800 :         call edgeVunpack(edgeSponge,vtens(:,:,1,1:ksponge_end,ie),kblk,kptr,ie)
     867             : 
     868    46753200 :         kptr = 2*ksponge_end
     869  7901290800 :         call edgeVunpack(edgeSponge,vtens(:,:,2,1:ksponge_end,ie),kblk,kptr,ie)
     870             : 
     871    46753200 :         if (use_cslam.and.nu_dp>0.0_r8) then
     872   233766000 :           do k=1,ksponge_end
     873  3927268800 :             temp(:,:,k) = elem(ie)%state%dp3d(:,:,k,nt) / elem(ie)%spheremp  ! STATE before DSS
     874  8041550400 :             corners(0:np+1,0:np+1,k) = 0.0_r8
     875  3974022000 :             corners(1:np  ,1:np  ,k) = elem(ie)%state%dp3d(1:np,1:np,k,nt) ! fill in interior data of STATE*mass
     876             :           enddo
     877             :         endif
     878    46753200 :         kptr = 3*ksponge_end
     879    46753200 :         call edgeVunpack(edgeSponge,elem(ie)%state%dp3d(:,:,1:ksponge_end,nt),kblk,kptr,ie)
     880             : 
     881    46753200 :         if (use_cslam.and.nu_dp>0.0_r8) then
     882    46753200 :           desc = elem(ie)%desc
     883             : 
     884    46753200 :           kptr = 3*ksponge_end
     885    46753200 :           call edgeDGVunpack(edgeSponge,corners(:,:,1:ksponge_end),kblk,kptr,ie)
     886   233766000 :           do k=1,ksponge_end
     887  8041550400 :             corners(:,:,k) = corners(:,:,k)/dt !note: array size is 0:np+1
     888             :             !OMP_COLLAPSE_SIMD
     889             :             !DIR_VECTOR_ALIGNED
     890   935064000 :             do j=1,np
     891  3927268800 :               do i=1,np
     892  2992204800 :                 temp(i,j,k) =  elem(ie)%rspheremp(i,j)*elem(ie)%state%dp3d(i,j,k,nt) - temp(i,j,k)
     893  3740256000 :                 temp(i,j,k) =  temp(i,j,k)/dt
     894             :               enddo
     895             :             enddo
     896             : 
     897   187012800 :             call distribute_flux_at_corners(cflux, corners(:,:,k), desc%getmapP)
     898             : 
     899   561038400 :             cflux(1,1,:)   = elem(ie)%rspheremp(1,  1) * cflux(1,1,:)
     900   561038400 :             cflux(2,1,:)   = elem(ie)%rspheremp(np, 1) * cflux(2,1,:)
     901   561038400 :             cflux(1,2,:)   = elem(ie)%rspheremp(1, np) * cflux(1,2,:)
     902   561038400 :             cflux(2,2,:)   = elem(ie)%rspheremp(np,np) * cflux(2,2,:)
     903             : 
     904   187012800 :             call subcell_dss_fluxes(temp(:,:,k), np, nc, elem(ie)%metdet,cflux,tempflux)
     905   187012800 :             elem(ie)%sub_elem_mass_flux(:,:,:,k) = elem(ie)%sub_elem_mass_flux(:,:,:,k) + &
     906 10145444400 :                  rhypervis_subcycle*eta_ave_w*tempflux
     907             :           end do
     908             :         endif
     909             : 
     910             :         ! apply inverse mass matrix, accumulate tendencies
     911             :         !$omp parallel do num_threads(vert_num_threads) private(k,i,j)
     912   233766000 :         do k=1,ksponge_end
     913             :           !OMP_COLLAPSE_SIMD
     914             :           !DIR_VECTOR_ALIGNED
     915   981817200 :           do j=1,np
     916  3927268800 :             do i=1,np
     917  2992204800 :               vtens(i,j,1,k,ie)=dt*vtens(i,j,1,k,ie)*elem(ie)%rspheremp(i,j)
     918  2992204800 :               vtens(i,j,2,k,ie)=dt*vtens(i,j,2,k,ie)*elem(ie)%rspheremp(i,j)
     919  2992204800 :               ttens(i,j,k,ie)=dt*ttens(i,j,k,ie)*elem(ie)%rspheremp(i,j)
     920  2992204800 :               elem(ie)%state%dp3d(i,j,k,nt)=elem(ie)%state%dp3d(i,j,k,nt)*elem(ie)%rspheremp(i,j)
     921             :               ! update v first (gives better results than updating v after heating)
     922  8976614400 :               elem(ie)%state%v(i,j,:,k,nt)=elem(ie)%state%v(i,j,:,k,nt) + vtens(i,j,:,k,ie)
     923  3740256000 :               elem(ie)%state%T(i,j,  k,nt)=elem(ie)%state%T(i,j,  k,nt) + ttens(i,j,  k,ie)
     924             :             enddo
     925             :           enddo
     926             :         enddo
     927    53402544 :         if (molecular_diff>0) then
     928             :           !
     929             :           ! no frictional heating for artificial sponge
     930             :           !
     931             :           !$omp parallel do num_threads(vert_num_threads) private(k,i,j,v1,v2,v1new,v2new)
     932           0 :           do k=1,ksponge_end
     933             :             !OMP_COLLAPSE_SIMD
     934             :             !DIR_VECTOR_ALIGNED
     935           0 :             do j=1,np
     936           0 :               do i=1,np
     937           0 :                 v1new=elem(ie)%state%v(i,j,1,k,nt)
     938           0 :                 v2new=elem(ie)%state%v(i,j,2,k,nt)
     939           0 :                 v1   =elem(ie)%state%v(i,j,1,k,nt)- vtens(i,j,1,k,ie)
     940           0 :                 v2   =elem(ie)%state%v(i,j,2,k,nt)- vtens(i,j,2,k,ie)
     941             :                 !
     942             :                 ! frictional heating
     943             :                 !
     944           0 :                 heating = 0.5_r8*(v1new*v1new+v2new*v2new-(v1*v1+v2*v2))
     945             :                 elem(ie)%state%T(i,j,k,nt)=elem(ie)%state%T(i,j,k,nt) &
     946           0 :                      -heating*inv_cp_full(i,j,k,ie)
     947             :               enddo
     948             :             enddo
     949             :           enddo
     950             :         end if
     951             :       end do
     952             :     end do
     953     2216448 :     call t_stopf('sponge_diff')
     954     2216448 :     call tot_energy_dyn(elem,fvm,nets,nete,nt,qn0,'dAS')
     955     4432896 :   end subroutine advance_hypervis_dp
     956             : 
     957             : 
     958             : 
     959    33246720 :    subroutine compute_and_apply_rhs(np1,nm1,n0,dt2,elem,hvcoord,hybrid,&
     960    11082240 :         deriv,nets,nete,eta_ave_w,inv_cp_full,qwater,qidx,kappa)
     961             :      ! ===================================
     962             :      ! compute the RHS, accumulate into u(np1) and apply DSS
     963             :      !
     964             :      !           u(np1) = u(nm1) + dt2*DSS[ RHS(u(n0)) ]
     965             :      !
     966             :      ! This subroutine is normally called to compute a leapfrog timestep
     967             :      ! but by adjusting np1,nm1,n0 and dt2, many other timesteps can be
     968             :      ! accomodated.  For example, setting nm1=np1=n0 this routine will
     969             :      ! take a forward euler step, overwriting the input with the output.
     970             :      !
     971             :      ! if  dt2<0, then the DSS'd RHS is returned in timelevel np1
     972             :      !
     973             :      ! Combining the RHS and DSS pack operation in one routine
     974             :      ! allows us to fuse these two loops for more cache reuse
     975             :      !
     976             :      ! Combining the dt advance and DSS unpack operation in one routine
     977             :      ! allows us to fuse these two loops for more cache reuse
     978             :      !
     979             :      ! ===================================
     980     2216448 :      use dimensions_mod,  only: np, nc, nlev, use_cslam
     981             :      use control_mod,     only: pgf_formulation
     982             :      use hybrid_mod,      only: hybrid_t
     983             :      use element_mod,     only: element_t
     984             :      use derivative_mod,  only: derivative_t, divergence_sphere, gradient_sphere, vorticity_sphere
     985             :      use derivative_mod,  only: subcell_div_fluxes, subcell_dss_fluxes
     986             :      use edge_mod,        only: edgevpack, edgevunpack, edgeDGVunpack
     987             :      use edgetype_mod,    only: edgedescriptor_t
     988             :      use bndry_mod,       only: bndry_exchange
     989             :      use hybvcoord_mod,   only: hvcoord_t
     990             :      use cam_thermo,      only: get_gz, get_virtual_temp
     991             :      use air_composition, only: thermodynamic_active_species_num, dry_air_species_num
     992             :      use air_composition, only: get_cp_dry, get_R_dry
     993             :      use physconst,       only: tref,cpair,rga,lapse_rate
     994             : 
     995             :      implicit none
     996             :      integer,        intent(in) :: np1,nm1,n0,nets,nete
     997             :      real (kind=r8), intent(in) :: dt2
     998             : 
     999             :      type (hvcoord_t)     , intent(in) :: hvcoord
    1000             :      type (hybrid_t)      , intent(in) :: hybrid
    1001             :      type (element_t)     , intent(inout), target :: elem(:)
    1002             :      type (derivative_t)  , intent(in) :: deriv
    1003             :      real (kind=r8)       , intent(in) :: inv_cp_full(np,np,nlev,nets:nete)
    1004             :      real (kind=r8)       , intent(in) :: qwater(np,np,nlev,thermodynamic_active_species_num,nets:nete)
    1005             :      integer              , intent(in) :: qidx(thermodynamic_active_species_num)
    1006             :      real (kind=r8)       , intent(in) :: kappa(np,np,nlev,nets:nete)
    1007             : 
    1008             :      real (kind=r8) :: eta_ave_w  ! weighting for eta_dot_dpdn mean flux
    1009             : 
    1010             :     ! local
    1011             :      real (kind=r8), dimension(np,np,nlev)                         :: phi
    1012             :      real (kind=r8), dimension(np,np,nlev)                         :: omega_full
    1013             :      real (kind=r8), dimension(np,np,nlev)                         :: divdp_dry
    1014             :      real (kind=r8), dimension(np,np,nlev)                         :: divdp_full
    1015             :      real (kind=r8), dimension(np,np,2)                            :: vtemp
    1016             :      real (kind=r8), dimension(np,np,2)                            :: grad_kappa_term
    1017             :      real (kind=r8), dimension(np,np,2,nlev)                       :: vdp_dry
    1018             :      real (kind=r8), dimension(np,np,2,nlev)                       :: vdp_full
    1019             :      real (kind=r8), dimension(np,np,nlev)                         :: vgrad_p_full
    1020             :      real (kind=r8), dimension(np,np,2     )                       :: v            !
    1021             :      real (kind=r8), dimension(np,np)                              :: vgrad_T      ! v.grad(T)
    1022             :      real (kind=r8), dimension(np,np)                              :: Ephi         ! kinetic energy + PHI term
    1023             :      real (kind=r8), dimension(np,np,2,nlev)                       :: grad_p_full
    1024             :      real (kind=r8), dimension(np,np,nlev)                         :: vort         ! vorticity
    1025             :      real (kind=r8), dimension(np,np,nlev)                         :: dp_dry       ! delta pressure dry
    1026             :      real (kind=r8), dimension(np,np,nlev)                         :: R_dry, cp_dry!
    1027             :      real (kind=r8), dimension(np,np,nlev)                         :: p_full       ! pressure
    1028             :      real (kind=r8), dimension(np,np,nlev)                         :: dp_full
    1029             :      real (kind=r8), dimension(np,np)                              :: exner
    1030             :      real (kind=r8), dimension(0:np+1,0:np+1,nlev)                 :: corners
    1031             :      real (kind=r8), dimension(2,2,2)                              :: cflux
    1032             :      real (kind=r8), dimension(np,np)                              :: suml
    1033             :      real (kind=r8) :: vtens1(np,np,nlev),vtens2(np,np,nlev),ttens(np,np,nlev)
    1034             :      real (kind=r8) :: stashdp3d (np,np,nlev),tempdp3d(np,np), tempflux(nc,nc,4)
    1035             :      real (kind=r8) :: ckk, term, T_v(np,np,nlev)
    1036             :      real (kind=r8), dimension(np,np,2) :: pgf_term
    1037             :      real (kind=r8), dimension(np,np,2) :: grad_exner,grad_logexner
    1038             :      real (kind=r8) :: T0,T1
    1039             :      real (kind=r8), dimension(np,np)   :: theta_v
    1040             : 
    1041             :      type (EdgeDescriptor_t):: desc
    1042             : 
    1043             :      real (kind=r8) :: sum_water(np,np,nlev), density_inv(np,np)
    1044             :      real (kind=r8) :: E,v1,v2,glnps1,glnps2
    1045             :      integer        :: i,j,k,kptr,ie
    1046             :      real (kind=r8) :: ptop
    1047             : 
    1048             : !JMD  call t_barrierf('sync_compute_and_apply_rhs', hybrid%par%comm)
    1049    11082240 :      call t_adj_detailf(+1)
    1050    11082240 :      call t_startf('compute_and_apply_rhs')
    1051    11082240 :      ptop = hvcoord%hyai(1)*hvcoord%ps0
    1052    89004240 :      do ie=nets,nete
    1053             :        !
    1054             :        ! compute virtual temperature and sum_water
    1055             :        !
    1056    77922000 :        call get_virtual_temp(qwater(:,:,:,:,ie), t_v(:,:,:),temp=elem(ie)%state%T(:,:,:,n0),&
    1057   155844000 :             sum_q =sum_water(:,:,:), active_species_idx_dycore=qidx)
    1058    77922000 :        call get_R_dry(qwater(:,:,:,:,ie), qidx, R_dry)
    1059    77922000 :        call get_cp_dry(qwater(:,:,:,:,ie), qidx, cp_dry)
    1060             : 
    1061  7324668000 :        do k=1,nlev
    1062 >15218*10^7 :          dp_dry(:,:,k)  = elem(ie)%state%dp3d(:,:,k,n0)
    1063 >15225*10^7 :          dp_full(:,:,k) = sum_water(:,:,k)*dp_dry(:,:,k)
    1064             :        end do
    1065    77922000 :        call get_gz(dp_full, T_v, R_dry, elem(ie)%state%phis, ptop, phi, pmid=p_full)
    1066  7324668000 :        do k=1,nlev
    1067             :          ! vertically lagrangian code: we advect dp3d instead of ps
    1068             :          ! we also need grad(p) at all levels (not just grad(ps))
    1069             :          !p(k)= hyam(k)*ps0 + hybm(k)*ps
    1070             :          !    = .5_r8*(hyai(k+1)+hyai(k))*ps0 + .5_r8*(hybi(k+1)+hybi(k))*ps
    1071             :          !    = .5_r8*(ph(k+1) + ph(k) )  = ph(k) + dp(k)/2
    1072             :          !
    1073             :          ! p(k+1)-p(k) = ph(k+1)-ph(k) + (dp(k+1)-dp(k))/2
    1074             :          !             = dp(k) + (dp(k+1)-dp(k))/2 = (dp(k+1)+dp(k))/2
    1075             : 
    1076  7246746000 :          call gradient_sphere(p_full(:,:,k),deriv,elem(ie)%Dinv,grad_p_full(:,:,:,k))
    1077             :          ! ==============================
    1078             :          ! compute vgrad_lnps - for omega_full
    1079             :          ! ==============================
    1080             :          !OMP_COLLAPSE_SIMD
    1081             :          !DIR_VECTOR_ALIGNED
    1082 36233730000 :          do j=1,np
    1083 >15218*10^7 :            do i=1,np
    1084 >11594*10^7 :              v1 = elem(ie)%state%v(i,j,1,k,n0)
    1085 >11594*10^7 :              v2 = elem(ie)%state%v(i,j,2,k,n0)
    1086 >11594*10^7 :              vgrad_p_full(i,j,k) = (v1*grad_p_full(i,j,1,k) + v2*grad_p_full(i,j,2,k))
    1087 >11594*10^7 :              vdp_dry(i,j,1,k) = v1*dp_dry(i,j,k)
    1088 >11594*10^7 :              vdp_dry(i,j,2,k) = v2*dp_dry(i,j,k)
    1089 >11594*10^7 :              vdp_full(i,j,1,k) = v1*dp_full(i,j,k)
    1090 >14493*10^7 :              vdp_full(i,j,2,k) = v2*dp_full(i,j,k)
    1091             :            end do
    1092             :          end do
    1093             :          ! ================================
    1094             :          ! Accumulate mean Vel_rho flux in vn0
    1095             :          ! ================================
    1096             :          !OMP_COLLAPSE_SIMD
    1097             :          !DIR_VECTOR_ALIGNED
    1098 36233730000 :          do j=1,np
    1099 >15218*10^7 :            do i=1,np
    1100 >11594*10^7 :              elem(ie)%derived%vn0(i,j,1,k)=elem(ie)%derived%vn0(i,j,1,k)+eta_ave_w*vdp_dry(i,j,1,k)
    1101 >14493*10^7 :              elem(ie)%derived%vn0(i,j,2,k)=elem(ie)%derived%vn0(i,j,2,k)+eta_ave_w*vdp_dry(i,j,2,k)
    1102             :            enddo
    1103             :          enddo
    1104             :          !divdp_dry(:,:,k)
    1105             :          ! =========================================
    1106             :          !
    1107             :          ! Compute relative vorticity and divergence
    1108             :          !
    1109             :          ! =========================================
    1110  7246746000 :          call divergence_sphere(vdp_dry(:,:,:,k),deriv,elem(ie),divdp_dry(:,:,k))
    1111  7246746000 :          call divergence_sphere(vdp_full(:,:,:,k),deriv,elem(ie),divdp_full(:,:,k))
    1112  7324668000 :          call vorticity_sphere(elem(ie)%state%v(:,:,:,k,n0),deriv,elem(ie),vort(:,:,k))
    1113             :        enddo
    1114             : 
    1115             :        ! ====================================================
    1116             :        ! Compute omega_full
    1117             :        ! ====================================================
    1118    77922000 :        ckk         = 0.5_r8
    1119    77922000 :        suml(:,:  ) = 0
    1120             : #if (defined COLUMN_OPENMP)
    1121             : !$omp parallel do num_threads(vert_num_threads) private(k,j,i,ckk,term)
    1122             : #endif
    1123  7324668000 :        do k=1,nlev
    1124             :         !OMP_COLLAPSE_SIMD
    1125             :         !DIR_VECTOR_ALIGNED
    1126 36311652000 :          do j=1,np   !   Loop inversion (AAM)
    1127 >15218*10^7 :            do i=1,np
    1128 >11594*10^7 :              term         = -divdp_full(i,j,k)
    1129             : 
    1130 >11594*10^7 :              v1 = elem(ie)%state%v(i,j,1,k,n0)
    1131 >11594*10^7 :              v2 = elem(ie)%state%v(i,j,2,k,n0)
    1132             : 
    1133 >11594*10^7 :              omega_full(i,j,k) = suml(i,j) + ckk*term+vgrad_p_full(i,j,k)
    1134 >14493*10^7 :              suml(i,j)    = suml(i,j)   + term
    1135             :            end do
    1136             :          end do
    1137             :        end do
    1138             : #if (defined COLUMN_OPENMP)
    1139             :      !$omp parallel do num_threads(vert_num_threads) private(k)
    1140             : #endif
    1141  7324668000 :        do k=1,nlev  !  Loop index added (AAM)
    1142             :          elem(ie)%derived%omega(:,:,k) = &
    1143 >15225*10^7 :               elem(ie)%derived%omega(:,:,k) + eta_ave_w*omega_full(:,:,k)
    1144             :        enddo
    1145             :        ! ==============================================
    1146             :        ! Compute phi + kinetic energy term: 10*nv*nv Flops
    1147             :        ! ==============================================
    1148             : #if (defined COLUMN_OPENMP)
    1149             : !$omp parallel do num_threads(vert_num_threads) private(k,i,j,v1,v2,E,Ephi,vtemp,vgrad_T,gpterm,glnps1,glnps2)
    1150             : #endif
    1151  7324668000 :        vertloop: do k=1,nlev
    1152             :         !OMP_COLLAPSE_SIMD
    1153             :         !DIR_VECTOR_ALIGNED
    1154 36233730000 :          do j=1,np
    1155 >15218*10^7 :            do i=1,np
    1156 >11594*10^7 :              v1     = elem(ie)%state%v(i,j,1,k,n0)
    1157 >11594*10^7 :              v2     = elem(ie)%state%v(i,j,2,k,n0)
    1158 >11594*10^7 :              E = 0.5_r8*( v1*v1 + v2*v2 )
    1159 >14493*10^7 :              Ephi(i,j)=E+phi(i,j,k)
    1160             :            end do
    1161             :          end do
    1162             :          ! ================================================
    1163             :          ! compute gradp term (ps/p)*(dp/dps)*T
    1164             :          ! ================================================
    1165  7246746000 :          call gradient_sphere(elem(ie)%state%T(:,:,k,n0),deriv,elem(ie)%Dinv,vtemp)
    1166             :         !OMP_COLLAPSE_SIMD
    1167             :         !DIR_VECTOR_ALIGNED
    1168 36233730000 :          do j=1,np
    1169 >15218*10^7 :            do i=1,np
    1170 >11594*10^7 :              v1     = elem(ie)%state%v(i,j,1,k,n0)
    1171 >11594*10^7 :              v2     = elem(ie)%state%v(i,j,2,k,n0)
    1172 >14493*10^7 :              vgrad_T(i,j) =  v1*vtemp(i,j,1) + v2*vtemp(i,j,2)
    1173             :            end do
    1174             :          end do
    1175             : 
    1176             : 
    1177             :          ! vtemp = grad ( E + PHI )
    1178             :          ! vtemp = gradient_sphere(Ephi(:,:),deriv,elem(ie)%Dinv)
    1179  7246746000 :          call gradient_sphere(Ephi(:,:),deriv,elem(ie)%Dinv,vtemp)
    1180 >15218*10^7 :          density_inv(:,:) = R_dry(:,:,k)*T_v(:,:,k)/p_full(:,:,k)
    1181  7246746000 :          if (pgf_formulation==1.or.(pgf_formulation==3.and.hvcoord%hybm(k)>0._r8)) then
    1182  7246746000 :            if (dry_air_species_num==0) then
    1183 >15218*10^7 :              exner(:,:)=(p_full(:,:,k)/hvcoord%ps0)**kappa(:,:,k,ie)
    1184 >15218*10^7 :              theta_v(:,:)=T_v(:,:,k)/exner(:,:)
    1185  7246746000 :              call gradient_sphere(exner(:,:),deriv,elem(ie)%Dinv,grad_exner)
    1186 >15218*10^7 :              pgf_term(:,:,1) = cp_dry(:,:,k)*theta_v(:,:)*grad_exner(:,:,1)
    1187 >15218*10^7 :              pgf_term(:,:,2) = cp_dry(:,:,k)*theta_v(:,:)*grad_exner(:,:,2)
    1188             :            else
    1189           0 :              exner(:,:)=(p_full(:,:,k)/hvcoord%ps0)**kappa(:,:,k,ie)
    1190           0 :              theta_v(:,:)=T_v(:,:,k)/exner(:,:)
    1191           0 :              call gradient_sphere(exner(:,:),deriv,elem(ie)%Dinv,grad_exner)
    1192           0 :              call gradient_sphere(kappa(:,:,k,ie),deriv,elem(ie)%Dinv,grad_kappa_term)
    1193           0 :              suml = exner(:,:)*LOG(p_full(:,:,k)/hvcoord%ps0)
    1194           0 :              grad_kappa_term(:,:,1)=-suml*grad_kappa_term(:,:,1)
    1195           0 :              grad_kappa_term(:,:,2)=-suml*grad_kappa_term(:,:,2)
    1196           0 :              pgf_term(:,:,1) = cp_dry(:,:,k)*theta_v(:,:)*(grad_exner(:,:,1)+grad_kappa_term(:,:,1))
    1197           0 :              pgf_term(:,:,2) = cp_dry(:,:,k)*theta_v(:,:)*(grad_exner(:,:,2)+grad_kappa_term(:,:,2))
    1198             :            end if
    1199             :            ! balanced ref profile correction:
    1200             :            ! reference temperature profile (Simmons and Jiabin, 1991, QJRMS, Section 2a)
    1201             :            !
    1202             :            !  Tref = T0+T1*Exner
    1203             :            !  T1 = .0065*Tref*Cp/g ! = ~191
    1204             :            !  T0 = Tref-T1         ! = ~97
    1205             :            !
    1206  7246746000 :            T1 = lapse_rate*Tref*cpair*rga
    1207  7246746000 :            T0 = Tref-T1
    1208  7246746000 :            if (hvcoord%hybm(k)>0) then
    1209             :              !only apply away from constant pressure levels
    1210 81818100000 :              call gradient_sphere(log(exner(:,:)),deriv,elem(ie)%Dinv,grad_logexner)
    1211             :              pgf_term(:,:,1)=pgf_term(:,:,1) + &
    1212 81818100000 :                   cpair*T0*(grad_logexner(:,:,1)-grad_exner(:,:,1)/exner(:,:))
    1213             :              pgf_term(:,:,2)=pgf_term(:,:,2) + &
    1214 81818100000 :                   cpair*T0*(grad_logexner(:,:,2)-grad_exner(:,:,2)/exner(:,:))
    1215             :            end if
    1216           0 :          elseif (pgf_formulation==2.or.pgf_formulation==3) then
    1217           0 :            pgf_term(:,:,1)  = density_inv(:,:)*grad_p_full(:,:,1,k)
    1218           0 :            pgf_term(:,:,2)  = density_inv(:,:)*grad_p_full(:,:,2,k)
    1219             :          else
    1220           0 :            call endrun('ERROR: bad choice of pgf_formulation (must be 1, 2, or 3)')
    1221             :          end if
    1222             : 
    1223 36311652000 :          do j=1,np
    1224 >15218*10^7 :            do i=1,np
    1225 >11594*10^7 :              glnps1 = pgf_term(i,j,1)
    1226 >11594*10^7 :              glnps2 = pgf_term(i,j,2)
    1227 >11594*10^7 :              v1     = elem(ie)%state%v(i,j,1,k,n0)
    1228 >11594*10^7 :              v2     = elem(ie)%state%v(i,j,2,k,n0)
    1229             : 
    1230             :              vtens1(i,j,k) =   &
    1231             :                   + v2*(elem(ie)%fcor(i,j) + vort(i,j,k))        &
    1232 >11594*10^7 :                   - vtemp(i,j,1) - glnps1
    1233             : 
    1234             :              vtens2(i,j,k) =   &
    1235             :                   - v1*(elem(ie)%fcor(i,j) + vort(i,j,k))        &
    1236 >11594*10^7 :                   - vtemp(i,j,2) - glnps2
    1237             :              ttens(i,j,k)  =  - vgrad_T(i,j) + &
    1238 >14493*10^7 :                   density_inv(i,j)*omega_full(i,j,k)*inv_cp_full(i,j,k,ie)
    1239             :            end do
    1240             :          end do
    1241             : 
    1242             :        end do vertloop
    1243             : 
    1244             :        ! =========================================================
    1245             :        ! local element timestep, store in np1.
    1246             :        ! note that we allow np1=n0 or nm1
    1247             :        ! apply mass matrix
    1248             :        ! =========================================================
    1249             : #if (defined COLUMN_OPENMP)
    1250             : !$omp parallel do num_threads(vert_num_threads) private(k)
    1251             : #endif
    1252  7324668000 :        do k=1,nlev
    1253             :          !OMP_COLLAPSE_SIMD
    1254             :          !DIR_VECTOR_ALIGNED
    1255 36233730000 :          do j=1,np
    1256 >15218*10^7 :            do i=1,np
    1257 >11594*10^7 :              elem(ie)%state%v(i,j,1,k,np1) = elem(ie)%spheremp(i,j)*( elem(ie)%state%v(i,j,1,k,nm1) + dt2*vtens1(i,j,k) )
    1258 >11594*10^7 :              elem(ie)%state%v(i,j,2,k,np1) = elem(ie)%spheremp(i,j)*( elem(ie)%state%v(i,j,2,k,nm1) + dt2*vtens2(i,j,k) )
    1259 >11594*10^7 :              elem(ie)%state%T(i,j,k,np1) = elem(ie)%spheremp(i,j)*(elem(ie)%state%T(i,j,k,nm1) + dt2*ttens(i,j,k))
    1260             :              elem(ie)%state%dp3d(i,j,k,np1) = &
    1261             :                   elem(ie)%spheremp(i,j) * (elem(ie)%state%dp3d(i,j,k,nm1) - &
    1262 >14493*10^7 :                   dt2 * (divdp_dry(i,j,k)))
    1263             :            enddo
    1264             :          enddo
    1265             : 
    1266             : 
    1267  7324668000 :          if (use_cslam.and.eta_ave_w.ne.0._r8) then
    1268             :            !OMP_COLLAPSE_SIMD
    1269             :            !DIR_VECTOR_ALIGNED
    1270 14493492000 :            do j=1,np
    1271 60872666400 :              do i=1,np
    1272 46379174400 :                v(i,j,1) =  elem(ie)%Dinv(i,j,1,1)*vdp_dry(i,j,1,k) + elem(ie)%Dinv(i,j,1,2)*vdp_dry(i,j,2,k)
    1273 57973968000 :                v(i,j,2) =  elem(ie)%Dinv(i,j,2,1)*vdp_dry(i,j,1,k) + elem(ie)%Dinv(i,j,2,2)*vdp_dry(i,j,2,k)
    1274             :              enddo
    1275             :            enddo
    1276  2898698400 :            call subcell_div_fluxes(v, np, nc, elem(ie)%metdet,tempflux)
    1277 >15363*10^7 :            elem(ie)%sub_elem_mass_flux(:,:,:,k) = elem(ie)%sub_elem_mass_flux(:,:,:,k) - eta_ave_w*tempflux
    1278             :          end if
    1279             :        enddo
    1280             :        ! =========================================================
    1281             :        !
    1282             :        ! Pack
    1283             :        !
    1284             :        ! =========================================================
    1285    77922000 :        kptr=0
    1286    77922000 :        call edgeVpack(edge3, elem(ie)%state%T(:,:,:,np1),nlev,kptr,ie)
    1287             : 
    1288    77922000 :        kptr=nlev
    1289    77922000 :        call edgeVpack(edge3, elem(ie)%state%v(:,:,:,:,np1),2*nlev,kptr,ie)
    1290             : 
    1291    77922000 :        kptr=kptr+2*nlev
    1292    89004240 :        call edgeVpack(edge3, elem(ie)%state%dp3d(:,:,:,np1),nlev,kptr, ie)
    1293             :      end do
    1294             : 
    1295             :      ! =============================================================
    1296             :      ! Insert communications here: for shared memory, just a single
    1297             :      ! sync is required
    1298             :      ! =============================================================
    1299    11082240 :      call bndry_exchange(hybrid,edge3,location='edge3')
    1300    89004240 :      do ie=nets,nete
    1301             :        ! ===========================================================
    1302             :        ! Unpack the edges for vgrad_T and v tendencies...
    1303             :        ! ===========================================================
    1304    77922000 :        kptr=0
    1305    77922000 :        call edgeVunpack(edge3, elem(ie)%state%T(:,:,:,np1), nlev, kptr, ie)
    1306             : 
    1307    77922000 :        kptr=nlev
    1308    77922000 :        call edgeVunpack(edge3, elem(ie)%state%v(:,:,:,:,np1), 2*nlev, kptr, ie)
    1309             : 
    1310    77922000 :        if (use_cslam.and.eta_ave_w.ne.0._r8) then
    1311  2929867200 :          do k=1,nlev
    1312 60903835200 :            stashdp3d(:,:,k) = elem(ie)%state%dp3d(:,:,k,np1)/elem(ie)%spheremp(:,:)
    1313             :          end do
    1314             :        endif
    1315             : 
    1316    77922000 :        corners = 0.0_r8
    1317 >15225*10^7 :        corners(1:np,1:np,:) = elem(ie)%state%dp3d(:,:,:,np1)
    1318    77922000 :        kptr=kptr+2*nlev
    1319    77922000 :        call edgeVunpack(edge3, elem(ie)%state%dp3d(:,:,:,np1),nlev,kptr,ie)
    1320             : 
    1321    77922000 :        if  (use_cslam.and.eta_ave_w.ne.0._r8) then
    1322    31168800 :          desc = elem(ie)%desc
    1323             : 
    1324    31168800 :          call edgeDGVunpack(edge3, corners, nlev, kptr, ie)
    1325             : 
    1326 >12467*10^7 :          corners = corners/dt2
    1327             : 
    1328  2929867200 :          do k=1,nlev
    1329 60872666400 :            tempdp3d = elem(ie)%rspheremp(:,:)*elem(ie)%state%dp3d(:,:,k,np1)
    1330 60872666400 :            tempdp3d = tempdp3d - stashdp3d(:,:,k)
    1331 60872666400 :            tempdp3d = tempdp3d/dt2
    1332             : 
    1333  2898698400 :            call distribute_flux_at_corners(cflux, corners(:,:,k), desc%getmapP)
    1334             : 
    1335  8696095200 :            cflux(1,1,:)   = elem(ie)%rspheremp(1,  1) * cflux(1,1,:)
    1336  8696095200 :            cflux(2,1,:)   = elem(ie)%rspheremp(np, 1) * cflux(2,1,:)
    1337  8696095200 :            cflux(1,2,:)   = elem(ie)%rspheremp(1, np) * cflux(1,2,:)
    1338  8696095200 :            cflux(2,2,:)   = elem(ie)%rspheremp(np,np) * cflux(2,2,:)
    1339             : 
    1340  2898698400 :            call subcell_dss_fluxes(tempdp3d, np, nc, elem(ie)%metdet, cflux,tempflux)
    1341 >15366*10^7 :            elem(ie)%sub_elem_mass_flux(:,:,:,k) = elem(ie)%sub_elem_mass_flux(:,:,:,k) + eta_ave_w*tempflux
    1342             :          end do
    1343             :        end if
    1344             : 
    1345             :        ! ====================================================
    1346             :        ! Scale tendencies by inverse mass matrix
    1347             :        ! ====================================================
    1348             : 
    1349             : #if (defined COLUMN_OPENMP)
    1350             : !$omp parallel do num_threads(vert_num_threads) private(k)
    1351             : #endif
    1352  7324668000 :        do k=1,nlev
    1353             :          !OMP_COLLAPSE_SIMD
    1354             :          !DIR_VECTOR_ALIGNED
    1355 36311652000 :          do j=1,np
    1356 >15218*10^7 :          do i=1,np
    1357 >11594*10^7 :              elem(ie)%state%T(i,j,k,np1)   = elem(ie)%rspheremp(i,j)*elem(ie)%state%T(i,j,k,np1)
    1358 >11594*10^7 :              elem(ie)%state%v(i,j,1,k,np1) = elem(ie)%rspheremp(i,j)*elem(ie)%state%v(i,j,1,k,np1)
    1359 >14493*10^7 :              elem(ie)%state%v(i,j,2,k,np1) = elem(ie)%rspheremp(i,j)*elem(ie)%state%v(i,j,2,k,np1)
    1360             :          enddo
    1361             :          enddo
    1362             :        end do
    1363             : 
    1364             :        ! vertically lagrangian: complete dp3d timestep:
    1365  7335750240 :        do k=1,nlev
    1366 >15225*10^7 :          elem(ie)%state%dp3d(:,:,k,np1)= elem(ie)%rspheremp(:,:)*elem(ie)%state%dp3d(:,:,k,np1)
    1367             :        enddo
    1368             :      end do
    1369             : 
    1370    11082240 :      call t_stopf('compute_and_apply_rhs')
    1371    11082240 :      call t_adj_detailf(-1)
    1372    22164480 :    end subroutine compute_and_apply_rhs
    1373             : 
    1374             : 
    1375             :    !
    1376             :    ! corner fluxes for CSLAM
    1377             :    !
    1378  7433758800 :    subroutine distribute_flux_at_corners(cflux, corners, getmapP)
    1379    11082240 :      use dimensions_mod, only : np, max_corner_elem
    1380             :      use control_mod,    only : swest
    1381             : 
    1382             :      real(r8), intent(out) :: cflux(2,2,2)
    1383             :      real(r8), intent(in)  :: corners(0:np+1,0:np+1)
    1384             :      integer,  intent(in)  :: getmapP(:)
    1385             : 
    1386  7433758800 :      cflux = 0.0_r8
    1387  7433758800 :      if (getmapP(swest+0*max_corner_elem) /= -1) then
    1388  7425499068 :        cflux(1,1,1) =                (corners(0,1) - corners(1,1))
    1389  7425499068 :        cflux(1,1,1) = cflux(1,1,1) + (corners(0,0) - corners(1,1)) / 2.0_r8
    1390  7425499068 :        cflux(1,1,1) = cflux(1,1,1) + (corners(0,1) - corners(1,0)) / 2.0_r8
    1391             : 
    1392  7425499068 :        cflux(1,1,2) =                (corners(1,0) - corners(1,1))
    1393  7425499068 :        cflux(1,1,2) = cflux(1,1,2) + (corners(0,0) - corners(1,1)) / 2.0_r8
    1394  7425499068 :        cflux(1,1,2) = cflux(1,1,2) + (corners(1,0) - corners(0,1)) / 2.0_r8
    1395             :      else
    1396     8259732 :        cflux(1,1,1) =                (corners(0,1) - corners(1,1))
    1397     8259732 :        cflux(1,1,2) =                (corners(1,0) - corners(1,1))
    1398             :      endif
    1399             : 
    1400  7433758800 :      if (getmapP(swest+1*max_corner_elem) /= -1) then
    1401  7425499068 :        cflux(2,1,1) =                (corners(np+1,1) - corners(np,1))
    1402  7425499068 :        cflux(2,1,1) = cflux(2,1,1) + (corners(np+1,0) - corners(np,1)) / 2.0_r8
    1403  7425499068 :        cflux(2,1,1) = cflux(2,1,1) + (corners(np+1,1) - corners(np,0)) / 2.0_r8
    1404             : 
    1405  7425499068 :        cflux(2,1,2) =                (corners(np  ,0) - corners(np,  1))
    1406  7425499068 :        cflux(2,1,2) = cflux(2,1,2) + (corners(np+1,0) - corners(np,  1)) / 2.0_r8
    1407  7425499068 :        cflux(2,1,2) = cflux(2,1,2) + (corners(np  ,0) - corners(np+1,1)) / 2.0_r8
    1408             :      else
    1409     8259732 :        cflux(2,1,1) =                (corners(np+1,1) - corners(np,1))
    1410     8259732 :        cflux(2,1,2) =                (corners(np  ,0) - corners(np,1))
    1411             :      endif
    1412             : 
    1413  7433758800 :      if (getmapP(swest+2*max_corner_elem) /= -1) then
    1414  7425499068 :        cflux(1,2,1) =                (corners(0,np  ) - corners(1,np  ))
    1415  7425499068 :        cflux(1,2,1) = cflux(1,2,1) + (corners(0,np+1) - corners(1,np  )) / 2.0_r8
    1416  7425499068 :        cflux(1,2,1) = cflux(1,2,1) + (corners(0,np  ) - corners(1,np+1)) / 2.0_r8
    1417             : 
    1418  7425499068 :        cflux(1,2,2) =                (corners(1,np+1) - corners(1,np  ))
    1419  7425499068 :        cflux(1,2,2) = cflux(1,2,2) + (corners(0,np+1) - corners(1,np  )) / 2.0_r8
    1420  7425499068 :        cflux(1,2,2) = cflux(1,2,2) + (corners(1,np+1) - corners(0,np  )) / 2.0_r8
    1421             :      else
    1422     8259732 :        cflux(1,2,1) =                (corners(0,np  ) - corners(1,np  ))
    1423     8259732 :        cflux(1,2,2) =                (corners(1,np+1) - corners(1,np  ))
    1424             :      endif
    1425             : 
    1426  7433758800 :      if (getmapP(swest+3*max_corner_elem) /= -1) then
    1427  7425499068 :        cflux(2,2,1) =                (corners(np+1,np  ) - corners(np,np  ))
    1428  7425499068 :        cflux(2,2,1) = cflux(2,2,1) + (corners(np+1,np+1) - corners(np,np  )) / 2.0_r8
    1429  7425499068 :        cflux(2,2,1) = cflux(2,2,1) + (corners(np+1,np  ) - corners(np,np+1)) / 2.0_r8
    1430             : 
    1431  7425499068 :        cflux(2,2,2) =                (corners(np  ,np+1) - corners(np,np  ))
    1432  7425499068 :        cflux(2,2,2) = cflux(2,2,2) + (corners(np+1,np+1) - corners(np,np  )) / 2.0_r8
    1433  7425499068 :        cflux(2,2,2) = cflux(2,2,2) + (corners(np  ,np+1) - corners(np+1,np)) / 2.0_r8
    1434             :      else
    1435     8259732 :        cflux(2,2,1) =                (corners(np+1,np  ) - corners(np,np  ))
    1436     8259732 :        cflux(2,2,2) =                (corners(np  ,np+1) - corners(np,np  ))
    1437             :      endif
    1438  7433758800 :    end subroutine distribute_flux_at_corners
    1439             : 
    1440    33248256 :   subroutine tot_energy_dyn(elem,fvm,nets,nete,tl,tl_qdp,outfld_name_suffix)
    1441             :     use dimensions_mod,         only: npsq,nlev,np,nc,use_cslam,qsize
    1442             :     use physconst,              only: rga, cpair, rearth, omega
    1443             :     use element_mod,            only: element_t
    1444             :     use cam_history,            only: outfld
    1445             :     use cam_history_support,    only: max_fieldname_len
    1446             :     use constituents,           only: cnst_get_ind
    1447             :     use string_utils,           only: strlist_get_ind
    1448             :     use hycoef,                 only: hyai, ps0
    1449             :     use fvm_control_volume_mod, only: fvm_struct
    1450             :     use cam_thermo,             only: get_dp, MASS_MIXING_RATIO,wvidx,wlidx,wiidx,seidx,keidx,moidx,mridx,ttidx,teidx, &
    1451             :                                       poidx,thermo_budget_num_vars,thermo_budget_vars
    1452             :     use cam_thermo,             only: get_hydrostatic_energy
    1453             :     use air_composition,        only: thermodynamic_active_species_idx_dycore, get_cp
    1454             :     use air_composition,        only: thermodynamic_active_species_num,    thermodynamic_active_species_idx_dycore
    1455             :     use air_composition,        only: thermodynamic_active_species_liq_num,thermodynamic_active_species_liq_idx
    1456             :     use air_composition,        only: thermodynamic_active_species_ice_num,thermodynamic_active_species_ice_idx
    1457             :     use dimensions_mod,         only: cnst_name_gll
    1458             :     use dyn_tests_utils,        only: vcoord=>vc_dry_pressure
    1459             :     use cam_budget,             only: thermo_budget_history
    1460             :     !------------------------------Arguments--------------------------------
    1461             : 
    1462             :     type (element_t) , intent(inout) :: elem(:)
    1463             :     type(fvm_struct) , intent(inout) :: fvm(:)
    1464             :     integer          , intent(in) :: tl, tl_qdp,nets,nete
    1465             :     character*(*)    , intent(in) :: outfld_name_suffix ! suffix for "outfld" names
    1466             : 
    1467             :     !---------------------------Local storage-------------------------------
    1468             : 
    1469             :     real(kind=r8) :: se(np,np)                       ! Enthalpy energy (J/m2)
    1470             :     real(kind=r8) :: ke(np,np)                       ! kinetic energy    (J/m2)
    1471             :     real(kind=r8) :: po(np,np)                       ! PHIS term in energy equation   (J/m2)
    1472             :     real(kind=r8) :: wv(np,np)                       ! water vapor
    1473             :     real(kind=r8) :: liq(np,np)                      ! liquid
    1474             :     real(kind=r8) :: ice(np,np)                      ! ice
    1475             : 
    1476    66496512 :     real(kind=r8) :: q(np,nlev,qsize)
    1477    66496512 :     integer       :: qidx(thermodynamic_active_species_num)
    1478             :     real(kind=r8) :: cdp_fvm(nc,nc,nlev)
    1479             :     real(kind=r8) :: cdp(np,np,nlev)
    1480             :     real(kind=r8) :: ptop(np,np)
    1481             :     real(kind=r8) :: pdel(np,np,nlev)
    1482             :     real(kind=r8) :: cp(np,np,nlev)
    1483             : 
    1484             :     !
    1485             :     ! global axial angular momentum (AAM) can be separated into one part (mr) associatedwith the relative motion
    1486             :     ! of the atmosphere with respect to the planets surface (also known as wind AAM) and another part (mo)
    1487             :     ! associated with the angular velocity OMEGA (2*pi/d, where d is the length of the day) of the planet
    1488             :     ! (also known as mass AAM)
    1489             :     !
    1490             :     real(kind=r8) :: mr(npsq)  ! wind AAM
    1491             :     real(kind=r8) :: mo(npsq)  ! mass AAM
    1492             :     real(kind=r8) :: mr_cnst, mo_cnst, cos_lat, mr_tmp, mo_tmp
    1493             : 
    1494             :     integer :: ie,i,j,k,m_cnst,nq,idx
    1495             :     integer :: ixwv,ixcldice, ixcldliq, ixtt ! CLDICE, CLDLIQ and test tracer indices
    1496             :     character(len=max_fieldname_len) :: name_out(thermo_budget_num_vars)
    1497             : 
    1498             :     !-----------------------------------------------------------------------
    1499             : 
    1500    33248256 :     if (thermo_budget_history) then
    1501           0 :     do i=1,thermo_budget_num_vars
    1502           0 :        name_out(i)=trim(thermo_budget_vars(i))//'_'//trim(outfld_name_suffix)
    1503             :     end do
    1504             : 
    1505           0 :       if (use_cslam) then
    1506           0 :         ixwv = 1
    1507           0 :         call cnst_get_ind('CLDLIQ' , ixcldliq, abort=.false.)
    1508           0 :         call cnst_get_ind('CLDICE' , ixcldice, abort=.false.)
    1509             :       else
    1510             :         !
    1511             :         ! when using CSLAM the condensates on the GLL grid may be located in a different index than in physics
    1512             :         !
    1513           0 :         ixwv = -1
    1514           0 :         call strlist_get_ind(cnst_name_gll, 'CLDLIQ' , ixcldliq, abort=.false.)
    1515           0 :         call strlist_get_ind(cnst_name_gll, 'CLDICE' , ixcldice, abort=.false.)
    1516             :       end if
    1517           0 :       call cnst_get_ind('TT_LW' , ixtt    , abort=.false.)
    1518             :       !
    1519             :       ! Compute frozen static energy in 3 parts:  KE, SE, and energy associated with vapor and liquid
    1520             :       !
    1521           0 :       do nq=1,thermodynamic_active_species_num
    1522           0 :         qidx(nq) = nq
    1523             :       end do
    1524           0 :       do ie=nets,nete
    1525           0 :         call get_cp(elem(ie)%state%Qdp(:,:,:,1:qsize,tl_qdp),&
    1526           0 :              .false., cp, factor=1.0_r8/elem(ie)%state%dp3d(:,:,:,tl),&
    1527           0 :              active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
    1528           0 :         ptop = hyai(1)*ps0
    1529           0 :         do j=1,np
    1530             :           !get mixing ratio of thermodynamic active species only
    1531             :           !(other tracers not used in get_hydrostatic_energy)
    1532           0 :           do nq=1,thermodynamic_active_species_num
    1533           0 :             m_cnst = thermodynamic_active_species_idx_dycore(nq)
    1534           0 :             q(:,:,m_cnst) = elem(ie)%state%Qdp(:,j,:,m_cnst,tl_qdp)/&
    1535           0 :                  elem(ie)%state%dp3d(:,j,:,tl)
    1536             :           end do
    1537             :           call get_hydrostatic_energy(q, &
    1538           0 :                .false., elem(ie)%state%dp3d(:,j,:,tl), cp(:,j,:), elem(ie)%state%v(:,j,1,:,tl), &
    1539             :                elem(ie)%state%v(:,j,2,:,tl), elem(ie)%state%T(:,j,:,tl), vcoord, ptop=ptop(:,j),&
    1540             :                phis=elem(ie)%state%phis(:,j), dycore_idx=.true.,                                &
    1541           0 :                se=se(:,j), po=po(:,j), ke=ke(:,j), wv=wv(:,j), liq=liq(:,j), ice=ice(:,j))
    1542             :         end do
    1543             :         !
    1544             :         ! Output energy diagnostics on GLL grid
    1545             :         !
    1546           0 :         call outfld(name_out(poidx)  ,po       ,npsq,ie)
    1547           0 :         call outfld(name_out(seidx)  ,se       ,npsq,ie)
    1548           0 :         call outfld(name_out(keidx)  ,ke       ,npsq,ie)
    1549           0 :         call outfld(name_out(teidx)  ,ke+se+po ,npsq,ie)
    1550             :         !
    1551             :         ! mass variables are output on CSLAM grid if using CSLAM else GLL grid
    1552             :         !
    1553           0 :         if (use_cslam) then
    1554           0 :            if (ixwv>0) then
    1555           0 :               cdp_fvm = fvm(ie)%c(1:nc,1:nc,:,ixwv)*fvm(ie)%dp_fvm(1:nc,1:nc,:)
    1556           0 :               call util_function(cdp_fvm,nc,nlev,name_out(wvidx),ie)
    1557             :            end if
    1558             :            !
    1559             :            ! sum over liquid water
    1560             :            !
    1561           0 :            if (thermodynamic_active_species_liq_num>0) then
    1562           0 :               cdp_fvm = 0.0_r8
    1563           0 :               do nq = 1,thermodynamic_active_species_liq_num
    1564           0 :                 cdp_fvm = cdp_fvm + fvm(ie)%c(1:nc,1:nc,:,thermodynamic_active_species_liq_idx(nq))&
    1565           0 :                      *fvm(ie)%dp_fvm(1:nc,1:nc,:)
    1566             :               end do
    1567           0 :               call util_function(cdp_fvm,nc,nlev,name_out(wlidx),ie)
    1568             :            end if
    1569             :            !
    1570             :            ! sum over ice water
    1571             :            !
    1572           0 :            if (thermodynamic_active_species_ice_num>0) then
    1573           0 :              cdp_fvm = 0.0_r8
    1574           0 :              do nq = 1,thermodynamic_active_species_ice_num
    1575           0 :                cdp_fvm = cdp_fvm + fvm(ie)%c(1:nc,1:nc,:,thermodynamic_active_species_ice_idx(nq))&
    1576           0 :                    *fvm(ie)%dp_fvm(1:nc,1:nc,:)
    1577             :              end do
    1578           0 :              call util_function(cdp_fvm,nc,nlev,name_out(wiidx),ie)
    1579             :            end if
    1580           0 :            if (ixtt>0) then
    1581           0 :               cdp_fvm = fvm(ie)%c(1:nc,1:nc,:,ixtt)*fvm(ie)%dp_fvm(1:nc,1:nc,:)
    1582           0 :               call util_function(cdp_fvm,nc,nlev,name_out(ttidx),ie)
    1583             :            end if
    1584             :         else
    1585           0 :            cdp = elem(ie)%state%qdp(:,:,:,1,tl_qdp)
    1586           0 :            call util_function(cdp,np,nlev,name_out(wvidx),ie)
    1587             :            !
    1588             :            ! sum over liquid water
    1589             :            !
    1590           0 :            if (thermodynamic_active_species_liq_num>0) then
    1591           0 :               cdp = 0.0_r8
    1592           0 :               do idx = 1,thermodynamic_active_species_liq_num
    1593           0 :                  cdp = cdp + elem(ie)%state%qdp(:,:,:,thermodynamic_active_species_liq_idx(idx),tl_qdp)
    1594             :               end do
    1595           0 :               call util_function(cdp,np,nlev,name_out(wlidx),ie)
    1596             :            end if
    1597             :            !
    1598             :            ! sum over ice water
    1599             :            !
    1600           0 :            if (thermodynamic_active_species_ice_num>0) then
    1601           0 :               cdp = 0.0_r8
    1602           0 :               do idx = 1,thermodynamic_active_species_ice_num
    1603           0 :                  cdp = cdp + elem(ie)%state%qdp(:,:,:,thermodynamic_active_species_ice_idx(idx),tl_qdp)
    1604             :               end do
    1605           0 :               call util_function(cdp,np,nlev,name_out(wiidx),ie)
    1606             :            end if
    1607           0 :            if (ixtt>0) then
    1608           0 :               cdp = elem(ie)%state%qdp(:,:,:,ixtt    ,tl_qdp)
    1609           0 :               call util_function(cdp,np,nlev,name_out(ttidx),ie)
    1610             :            end if
    1611             :         end if
    1612             :      end do
    1613             :   !
    1614             :   ! Axial angular momentum diagnostics
    1615             :   !
    1616             :   ! Code follows
    1617             :   !
    1618             :   ! Lauritzen et al., (2014): Held-Suarez simulations with the Community Atmosphere Model
    1619             :     ! Spectral Element (CAM-SE) dynamical core: A global axial angularmomentum analysis using Eulerian
    1620             :     ! and floating Lagrangian vertical coordinates. J. Adv. Model. Earth Syst. 6,129-140,
    1621             :     ! doi:10.1002/2013MS000268
    1622             :     !
    1623             :     ! MR is equation (6) without \Delta A and sum over areas (areas are in units of radians**2)
    1624             :     ! MO is equation (7) without \Delta A and sum over areas (areas are in units of radians**2)
    1625             :     !
    1626             : 
    1627           0 :       call strlist_get_ind(cnst_name_gll, 'CLDLIQ', ixcldliq, abort=.false.)
    1628           0 :       call strlist_get_ind(cnst_name_gll, 'CLDICE', ixcldice, abort=.false.)
    1629           0 :       mr_cnst = rga*rearth**3
    1630           0 :       mo_cnst = rga*omega*rearth**4
    1631           0 :       do ie=nets,nete
    1632           0 :         mr    = 0.0_r8
    1633           0 :         mo    = 0.0_r8
    1634           0 :         call get_dp(elem(ie)%state%Qdp(:,:,:,1:qsize,tl_qdp), MASS_MIXING_RATIO, thermodynamic_active_species_idx_dycore,&
    1635           0 :              elem(ie)%state%dp3d(:,:,:,tl), pdel)
    1636           0 :         do k = 1, nlev
    1637           0 :           do j=1,np
    1638           0 :             do i = 1, np
    1639           0 :               cos_lat = cos(elem(ie)%spherep(i,j)%lat)
    1640           0 :               mr_tmp   = mr_cnst*elem(ie)%state%v(i,j,1,k,tl)*pdel(i,j,k)*cos_lat
    1641           0 :               mo_tmp   = mo_cnst*pdel(i,j,k)*cos_lat**2
    1642             : 
    1643           0 :               mr   (i+(j-1)*np) = mr   (i+(j-1)*np) + mr_tmp
    1644           0 :               mo   (i+(j-1)*np) = mo   (i+(j-1)*np) + mo_tmp
    1645             :             end do
    1646             :           end do
    1647             :         end do
    1648           0 :         call outfld(name_out(mridx)  ,mr       ,npsq,ie)
    1649           0 :         call outfld(name_out(moidx)  ,mo       ,npsq,ie)
    1650             :       end do
    1651             :    endif ! if thermo budget history
    1652             : 
    1653    33248256 :   end subroutine tot_energy_dyn
    1654             : 
    1655             : 
    1656     1477632 :   subroutine output_qdp_var_dynamics(qdp,nx,num_trac,nets,nete,outfld_name)
    1657    33248256 :     use dimensions_mod, only: nlev
    1658             :     use cam_history   , only: hist_fld_active
    1659             :     use constituents  , only: cnst_get_ind
    1660             :     !------------------------------Arguments--------------------------------
    1661             : 
    1662             :     integer      ,intent(in) :: nx,num_trac,nets,nete
    1663             :     real(kind=r8) :: qdp(nx,nx,nlev,num_trac,nets:nete)
    1664             :     character*(*),intent(in) :: outfld_name
    1665             : 
    1666             :     !---------------------------Local storage-------------------------------
    1667             : 
    1668             :     integer :: ie
    1669             :     integer :: ixcldice, ixcldliq, ixtt
    1670             :     character(len=16) :: name_out1,name_out2,name_out3,name_out4
    1671             :     !-----------------------------------------------------------------------
    1672             : 
    1673      738816 :     name_out1 = 'WV_'   //trim(outfld_name)
    1674      738816 :     name_out2 = 'WI_'   //trim(outfld_name)
    1675      738816 :     name_out3 = 'WL_'   //trim(outfld_name)
    1676      738816 :     name_out4 = 'TT_'   //trim(outfld_name)
    1677             : 
    1678      738816 :     if ( hist_fld_active(name_out1).or.hist_fld_active(name_out2).or.hist_fld_active(name_out3).or.&
    1679             :          hist_fld_active(name_out4)) then
    1680             : 
    1681           0 :       call cnst_get_ind('CLDLIQ', ixcldliq, abort=.false.)
    1682           0 :       call cnst_get_ind('CLDICE', ixcldice, abort=.false.)
    1683           0 :       call cnst_get_ind('TT_LW' , ixtt    , abort=.false.)
    1684             : 
    1685           0 :       do ie=nets,nete
    1686           0 :         call util_function(qdp(:,:,:,1,ie),nx,nlev,name_out1,ie)
    1687           0 :         if (ixcldice>0) call util_function(qdp(:,:,:,ixcldice,ie),nx,nlev,name_out2,ie)
    1688           0 :         if (ixcldliq>0) call util_function(qdp(:,:,:,ixcldliq,ie),nx,nlev,name_out3,ie)
    1689           0 :         if (ixtt>0    ) call util_function(qdp(:,:,:,ixtt    ,ie),nx,nlev,name_out4,ie)
    1690             :       end do
    1691             :     end if
    1692      738816 :   end subroutine output_qdp_var_dynamics
    1693             : 
    1694             :   !
    1695             :   ! column integrate mass-variable and outfld
    1696             :   !
    1697           0 :   subroutine util_function(f_in,nx,nz,name_out,ie)
    1698      738816 :     use physconst,   only: rga
    1699             :     use cam_history, only: outfld, hist_fld_active
    1700             :     integer,           intent(in) :: nx,nz,ie
    1701             :     real(kind=r8),     intent(in) :: f_in(nx,nx,nz)
    1702             :     character(len=16), intent(in) :: name_out
    1703           0 :     real(kind=r8)       :: f_out(nx*nx)
    1704             :     integer             :: i,j,k
    1705           0 :     if (hist_fld_active(name_out)) then
    1706           0 :       f_out = 0.0_r8
    1707           0 :       do k = 1, nz
    1708           0 :         do j = 1, nx
    1709           0 :           do i = 1, nx
    1710           0 :             f_out(i+(j-1)*nx) = f_out(i+(j-1)*nx) + f_in(i,j,k)
    1711             :           end do
    1712             :         end do
    1713             :       end do
    1714           0 :       f_out = f_out*rga
    1715           0 :       call outfld(name_out,f_out,nx*nx,ie)
    1716             :     end if
    1717           0 :   end subroutine util_function
    1718             : 
    1719      370944 :    subroutine compute_omega(hybrid,n0,qn0,elem,deriv,nets,nete,dt,hvcoord)
    1720           0 :      use control_mod,    only: nu_p, hypervis_subcycle
    1721             :      use dimensions_mod, only: np, nlev, qsize
    1722             :      use hybrid_mod,     only: hybrid_t
    1723             :      use element_mod,    only: element_t
    1724             :      use derivative_mod, only: divergence_sphere, derivative_t,gradient_sphere
    1725             :      use hybvcoord_mod,  only: hvcoord_t
    1726             :      use edge_mod,       only: edgevpack, edgevunpack
    1727             :      use bndry_mod,      only: bndry_exchange
    1728             :      use viscosity_mod,  only: biharmonic_wk_omega
    1729             :      use cam_thermo,     only: get_dp, MASS_MIXING_RATIO
    1730             :      use air_composition,only: thermodynamic_active_species_idx_dycore
    1731             :      implicit none
    1732             :      type (hybrid_t)      , intent(in)            :: hybrid
    1733             :      type (element_t)     , intent(inout), target :: elem(:)
    1734             :      type (derivative_t)  , intent(in)            :: deriv
    1735             :      integer              , intent(in)            :: nets,nete,n0,qn0
    1736             :      real (kind=r8)       , intent(in)            :: dt
    1737             :      type (hvcoord_t)     , intent(in)            :: hvcoord
    1738             : 
    1739             :      integer        :: i,j,k,ie,kptr,ic
    1740             :      real (kind=r8) :: ckk, suml(np,np), v1, v2, term
    1741             :      real (kind=r8) :: dp_full(np,np,nlev)
    1742             :      real (kind=r8) :: p_full(np,np,nlev),grad_p_full(np,np,2),vgrad_p_full(np,np,nlev)
    1743             :      real (kind=r8) :: divdp_full(np,np,nlev),vdp_full(np,np,2)
    1744      741888 :      real(kind=r8)  :: Otens(np,np  ,nlev,nets:nete), dt_hyper
    1745             : 
    1746             :      logical, parameter  :: del4omega = .true.
    1747             : 
    1748     2979144 :      do ie=nets,nete
    1749     5216400 :         call get_dp(elem(ie)%state%Qdp(:,:,:,1:qsize,qn0), MASS_MIXING_RATIO,&
    1750     7824600 :            thermodynamic_active_species_idx_dycore, elem(ie)%state%dp3d(:,:,:,n0), dp_full)
    1751   245170800 :         do k=1,nlev
    1752   242562600 :            if (k==1) then
    1753    54772200 :               p_full(:,:,k) = hvcoord%hyai(k)*hvcoord%ps0 + dp_full(:,:,k)/2
    1754             :            else
    1755  5039042400 :               p_full(:,:,k)=p_full(:,:,k-1) + dp_full(:,:,k-1)/2 + dp_full(:,:,k)/2
    1756             :            endif
    1757   242562600 :            call gradient_sphere(p_full(:,:,k),deriv,elem(ie)%Dinv,grad_p_full (:,:,:))
    1758  1212813000 :          do j=1,np
    1759  5093814600 :            do i=1,np
    1760  3881001600 :              v1 = elem(ie)%state%v(i,j,1,k,n0)
    1761  3881001600 :              v2 = elem(ie)%state%v(i,j,2,k,n0)
    1762  3881001600 :              vdp_full(i,j,1) = dp_full(i,j,k)*v1
    1763  3881001600 :              vdp_full(i,j,2) = dp_full(i,j,k)*v2
    1764  4851252000 :              vgrad_p_full(i,j,k) = (v1*grad_p_full(i,j,1) + v2*grad_p_full(i,j,2))
    1765             :            end do
    1766             :          end do
    1767   245170800 :          call divergence_sphere(vdp_full(:,:,:),deriv,elem(ie),divdp_full(:,:,k))
    1768             :        end do
    1769     2608200 :        ckk       = 0.5_r8
    1770     2608200 :        suml(:,:  ) = 0
    1771   245541744 :        do k=1,nlev
    1772  1215421200 :          do j=1,np   !   Loop inversion (AAM)
    1773  5093814600 :            do i=1,np
    1774  3881001600 :              term         = -divdp_full(i,j,k)
    1775             : 
    1776  3881001600 :              v1 = elem(ie)%state%v(i,j,1,k,n0)
    1777  3881001600 :              v2 = elem(ie)%state%v(i,j,2,k,n0)
    1778             : 
    1779  3881001600 :              elem(ie)%derived%omega(i,j,k) = suml(i,j) + ckk*term+vgrad_p_full(i,j,k)
    1780             : 
    1781  4851252000 :              suml(i,j)    = suml(i,j)   + term
    1782             :            end do
    1783             :          end do
    1784             :        end do
    1785             :      end do
    1786     2979144 :      do ie=nets,nete
    1787   245170800 :        do k=1,nlev
    1788  5096422800 :          elem(ie)%derived%omega(:,:,k) = elem(ie)%spheremp(:,:)*elem(ie)%derived%omega(:,:,k)
    1789             :        end do
    1790     2608200 :        kptr=0
    1791     2979144 :        call edgeVpack(edgeOmega, elem(ie)%derived%omega(:,:,:),nlev,kptr, ie)
    1792             :      end do
    1793      370944 :      call bndry_exchange(hybrid,edgeOmega,location='compute_omega #1')
    1794     2979144 :      do ie=nets,nete
    1795     2608200 :        kptr=0
    1796     2608200 :        call edgeVunpack(edgeOmega, elem(ie)%derived%omega(:,:,:),nlev,kptr, ie)
    1797   245541744 :        do k=1,nlev
    1798  5096422800 :          elem(ie)%derived%omega(:,:,k) = elem(ie)%rspheremp(:,:)*elem(ie)%derived%omega(:,:,k)
    1799             :        end do
    1800             :      end do
    1801             : 
    1802             :      if  (del4omega) then
    1803      370944 :        dt_hyper=dt/hypervis_subcycle
    1804     1483776 :        do ic=1,hypervis_subcycle
    1805     8937432 :          do ie=nets,nete
    1806 15290381232 :            Otens(:,:,:,ie) = elem(ie)%derived%omega(:,:,:)
    1807             :          end do
    1808     1112832 :          call biharmonic_wk_omega(elem,Otens,deriv,edgeOmega,hybrid,nets,nete,1,nlev)
    1809     8937432 :          do ie=nets,nete
    1810   735512400 :            do k=1,nlev
    1811 15289268400 :              Otens(:,:,k,ie) = -dt_hyper*nu_p*Otens(:,:,k,ie)
    1812             :            end do
    1813     7824600 :            kptr=0
    1814     8937432 :            call edgeVpack(edgeOmega,Otens(:,:,:,ie) ,nlev,kptr, ie)
    1815             :          end do
    1816     1112832 :          call bndry_exchange(hybrid,edgeOmega,location='compute_omega #2')
    1817     8937432 :          do ie=nets,nete
    1818     7824600 :            kptr=0
    1819     8937432 :            call edgeVunpack(edgeOmega, Otens(:,:,:,ie),nlev,kptr, ie)
    1820             :          end do
    1821     9308376 :          do ie=nets,nete
    1822   736625232 :            do k=1,nlev
    1823  1455375600 :              elem(ie)%derived%omega(:,:,k) =elem(ie)%derived%omega(:,:,k)+&
    1824 16744644000 :                   elem(ie)%rspheremp(:,:)*Otens(:,:,k,ie)
    1825             :            end do
    1826             :          end do
    1827             :        end do
    1828             :      end if
    1829             :      !call FreeEdgeBuffer(edgeOmega)
    1830      370944 :    end subroutine compute_omega
    1831             : end module prim_advance_mod

Generated by: LCOV version 1.14