LCOV - code coverage report
Current view: top level - dynamics/se/dycore - prim_advection_mod.F90 (source / functions) Hit Total Coverage
Test: coverage.info Lines: 63 312 20.2 %
Date: 2025-03-13 19:21:45 Functions: 4 11 36.4 %

          Line data    Source code
       1             : #define OVERLAP 1
       2             : module prim_advection_mod
       3             : !
       4             : ! two formulations.  both are conservative
       5             : ! u grad Q formulation:
       6             : !
       7             : !    d/dt[ Q] +  U grad Q  = 0
       8             : !
       9             : !    d/dt[ dp/dn ] = div( dp/dn U )
      10             : !
      11             : ! total divergence formulation:
      12             : !    d/dt[dp/dn Q] +  div( U dp/dn Q ) = 0
      13             : !
      14             : ! for convience, rewrite this as dp Q:  (since dn does not depend on time or the horizonal):
      15             : ! equation is now:
      16             : !    d/dt[dp Q] +  div( U dp Q ) = 0
      17             : !
      18             : !
      19             :   use shr_kind_mod,           only: r8=>shr_kind_r8
      20             :   use dimensions_mod,         only: nlev, np, qsize, nc
      21             :   use derivative_mod,         only: derivative_t
      22             :   use element_mod,            only: element_t
      23             :   use fvm_control_volume_mod, only: fvm_struct
      24             :   use hybvcoord_mod,          only: hvcoord_t
      25             :   use se_dyn_time_mod,        only: TimeLevel_t, TimeLevel_Qdp
      26             :   use control_mod,            only: nu_q, nu_p, limiter_option, hypervis_subcycle_q, rsplit
      27             :   use edge_mod,               only: edgevpack, edgevunpack, initedgebuffer, initedgesbuffer
      28             : 
      29             :   use edgetype_mod,           only: EdgeBuffer_t
      30             :   use hybrid_mod,             only: hybrid_t
      31             :   use viscosity_mod,          only: biharmonic_wk_scalar, neighbor_minmax, &
      32             :                                  neighbor_minmax_start, neighbor_minmax_finish
      33             :   use perf_mod,               only: t_startf, t_stopf, t_barrierf
      34             :   use cam_abortutils,         only: endrun
      35             :   use thread_mod,             only: horz_num_threads, vert_num_threads, tracer_num_threads
      36             : 
      37             :   implicit none
      38             : 
      39             :   private
      40             :   save
      41             : 
      42             :   public :: Prim_Advec_Init1, Prim_Advec_Init2
      43             :   public :: Prim_Advec_Tracers_remap
      44             :   public :: prim_advec_tracers_fvm
      45             :   public :: vertical_remap
      46             : 
      47             :   type (EdgeBuffer_t)      :: edgeAdv, edgeAdvp1, edgeAdvQminmax, edgeveloc
      48             : 
      49             :   integer,parameter :: DSSeta = 1
      50             :   integer,parameter :: DSSomega = 2
      51             :   integer,parameter :: DSSdiv_vdp_ave = 3
      52             :   integer,parameter :: DSSno_var = -1
      53             : 
      54             :   real(kind=r8), allocatable :: qmin(:,:,:), qmax(:,:,:)
      55             : 
      56             : !JMD I don't see why this needs to be thread private.
      57             : !JMD  type (derivative_t), public, allocatable   :: deriv(:) ! derivative struct (nthreads)
      58             :   type (derivative_t), public :: deriv
      59             : 
      60             : 
      61             : contains
      62             : 
      63             : 
      64        1536 :   subroutine Prim_Advec_Init1(par, elem)
      65             :     use dimensions_mod, only: nlev, qsize, nelemd,ntrac,use_cslam
      66             :     use parallel_mod,   only: parallel_t, boundaryCommMethod
      67             :     type(parallel_t)    :: par
      68             :     type (element_t)    :: elem(:)
      69             :     !
      70             :     ! Shared buffer pointers.
      71             :     ! Using "=> null()" in a subroutine is usually bad, because it makes
      72             :     ! the variable have an implicit "save", and therefore shared between
      73             :     ! threads. But in this case we want shared pointers.
      74             :     real(kind=r8), pointer :: buf_ptr(:) => null()
      75             :     real(kind=r8), pointer :: receive_ptr(:) => null()
      76             :     integer       :: advec_remap_num_threads
      77             : 
      78             : 
      79             :     !
      80             :     ! Set the number of threads used in the subroutine Prim_Advec_tracers_remap()
      81             :     !
      82        1536 :     if (use_cslam) then
      83             :        advec_remap_num_threads = 1
      84             :     else
      85           0 :        advec_remap_num_threads = tracer_num_threads
      86             :     endif
      87             :     ! this might be called with qsize=0
      88             :     ! allocate largest one first
      89             :     ! Currently this is never freed. If it was, only this first one should
      90             :     ! be freed, as only it knows the true size of the buffer.
      91        1536 :     if (.not.use_cslam) then
      92             :       call initEdgeBuffer(par,edgeAdvp1,elem,qsize*nlev + nlev,bndry_type=boundaryCommMethod,&
      93           0 :            nthreads=horz_num_threads*advec_remap_num_threads)
      94             :       call initEdgeBuffer(par,edgeAdv,elem,qsize*nlev,bndry_type=boundaryCommMethod, &
      95           0 :            nthreads=horz_num_threads*advec_remap_num_threads)
      96             :       ! This is a different type of buffer pointer allocation
      97             :       ! used for determine the minimum and maximum value from
      98             :       ! neighboring  elements
      99             :       call initEdgeSBuffer(par,edgeAdvQminmax,elem,qsize*nlev*2,bndry_type=boundaryCommMethod, &
     100           0 :            nthreads=horz_num_threads*advec_remap_num_threads)
     101             :     end if
     102        1536 :     call initEdgeBuffer(par,edgeveloc,elem,2*nlev,bndry_type=boundaryCommMethod)
     103             : 
     104             : 
     105             :     ! Don't actually want these saved, if this is ever called twice.
     106        1536 :     nullify(buf_ptr)
     107        1536 :     nullify(receive_ptr)
     108             : 
     109             : 
     110             :     ! this static array is shared by all threads, so dimension for all threads (nelemd), not nets:nete:
     111        6144 :     allocate (qmin(nlev,qsize,nelemd))
     112        4608 :     allocate (qmax(nlev,qsize,nelemd))
     113             : 
     114        1536 :   end subroutine Prim_Advec_Init1
     115             : 
     116        1536 :   subroutine Prim_Advec_Init2(fvm_corners, fvm_points)
     117             :     use dimensions_mod, only : nc
     118             :     use derivative_mod, only : derivinit
     119             : 
     120             :     real(kind=r8), intent(in) :: fvm_corners(nc+1)
     121             :     real(kind=r8), intent(in) :: fvm_points(nc)
     122             : 
     123             :     ! ==================================
     124             :     ! Initialize derivative structure
     125             :     ! ==================================
     126        1536 :     call derivinit(deriv,fvm_corners, fvm_points)
     127        1536 :   end subroutine Prim_Advec_Init2
     128             : 
     129             :   !
     130             :   ! fvm driver
     131             :   !
     132       29184 :   subroutine Prim_Advec_Tracers_fvm(elem,fvm,hvcoord,hybrid,&
     133             :         dt,tl,nets,nete,ghostbufQnhc,ghostBufQ1, ghostBufFlux,kmin,kmax)
     134        1536 :     use fvm_consistent_se_cslam, only: run_consistent_se_cslam
     135             :     use edgetype_mod,            only: edgebuffer_t
     136             :     implicit none
     137             :     type (element_t), intent(inout)   :: elem(:)
     138             :     type (fvm_struct), intent(inout)  :: fvm(:)
     139             :     type (hvcoord_t)                  :: hvcoord
     140             :     type (hybrid_t),        intent(in):: hybrid
     141             :     type (TimeLevel_t)                :: tl
     142             : 
     143             :     real(kind=r8) , intent(in) :: dt
     144             :     integer,intent(in)                :: nets,nete,kmin,kmax
     145             :     type (EdgeBuffer_t), intent(inout):: ghostbufQnhc,ghostBufQ1, ghostBufFlux
     146             : 
     147       29184 :     call t_barrierf('sync_prim_advec_tracers_fvm', hybrid%par%comm)
     148       29184 :     call t_startf('prim_advec_tracers_fvm')
     149             : 
     150       29184 :     if (rsplit==0) call endrun('cslam only works for rsplit>0')
     151             : 
     152             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     153             :     ! 2D advection step
     154             :     !
     155             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     156             :     call run_consistent_se_cslam(elem,fvm,hybrid,dt,tl,nets,nete,hvcoord,&
     157       29184 :            ghostbufQnhc,ghostBufQ1, ghostBufFlux,kmin,kmax)
     158       29184 :     call t_stopf('prim_advec_tracers_fvm')
     159       29184 :   end subroutine Prim_Advec_Tracers_fvm
     160             : 
     161             : 
     162             : 
     163             : !=================================================================================================!
     164             : 
     165           0 :   subroutine Prim_Advec_Tracers_remap( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
     166             :     implicit none
     167             :     type (element_t)     , intent(inout) :: elem(:)
     168             :     type (derivative_t)  , intent(in   ) :: deriv
     169             :     type (hvcoord_t)     , intent(in   ) :: hvcoord
     170             :     type (hybrid_t)      , intent(in   ) :: hybrid
     171             :     real(kind=r8) , intent(in   ) :: dt
     172             :     type (TimeLevel_t)   , intent(inout) :: tl
     173             :     integer              , intent(in   ) :: nets
     174             :     integer              , intent(in   ) :: nete
     175             : 
     176             : 
     177             :     !print *,'prim_Advec_Tracers_remap: qsize: ',qsize
     178           0 :     call Prim_Advec_Tracers_remap_rk2( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
     179       29184 :   end subroutine Prim_Advec_Tracers_remap
     180             : 
     181             : 
     182           0 :   subroutine euler_step_driver(np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , nets , nete , DSSopt , rhs_multiplier )
     183             : 
     184             : 
     185             :   integer              , intent(in   )         :: np1_qdp, n0_qdp
     186             :   real (kind=r8), intent(in   )         :: dt
     187             :   type (element_t)     , intent(inout)         :: elem(:)
     188             :   type (hvcoord_t)     , intent(in   )         :: hvcoord
     189             :   type (hybrid_t)      , intent(in   )         :: hybrid
     190             :   type (derivative_t)  , intent(in   )         :: deriv
     191             :   integer              , intent(in   )         :: nets
     192             :   integer              , intent(in   )         :: nete
     193             :   integer              , intent(in   )         :: DSSopt
     194             :   integer              , intent(in   )         :: rhs_multiplier
     195             : 
     196           0 :   call euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , nets , nete , DSSopt , rhs_multiplier)
     197             : 
     198           0 :   end subroutine euler_step_driver
     199             : 
     200             : !-----------------------------------------------------------------------------
     201             : !-----------------------------------------------------------------------------
     202             : ! forward-in-time 2 level vertically lagrangian step
     203             : !  this code takes a lagrangian step in the horizontal
     204             : ! (complete with DSS), and then applies a vertical remap
     205             : !
     206             : ! This routine may use dynamics fields at timelevel np1
     207             : ! In addition, other fields are required, which have to be
     208             : ! explicitly saved by the dynamics:  (in elem(ie)%derived struct)
     209             : !
     210             : ! Fields required from dynamics: (in
     211             : !    omega   it will be DSS'd here, for later use by CAM physics
     212             : !              we DSS omega here because it can be done for "free"
     213             : !    Consistent mass/tracer-mass advection (used if subcycling turned on)
     214             : !       dp()   dp at timelevel n0
     215             : !       vn0()  mean flux  < U dp > going from n0 to np1
     216             : !
     217             : ! 3 stage
     218             : !    Euler step from t     -> t+.5
     219             : !    Euler step from t+.5  -> t+1.0
     220             : !    Euler step from t+1.0 -> t+1.5
     221             : !    u(t) = u(t)/3 + u(t+2)*2/3
     222             : !
     223             : !-----------------------------------------------------------------------------
     224             : !-----------------------------------------------------------------------------
     225           0 :   subroutine Prim_Advec_Tracers_remap_rk2( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
     226             :     use derivative_mod, only: divergence_sphere
     227             :     use control_mod   , only: qsplit
     228             :     use hybrid_mod    , only: get_loop_ranges!, PrintHybrid
     229             : !    use thread_mod    , only : omp_set_num_threads, omp_get_thread_num
     230             : 
     231             :     type (element_t)     , intent(inout) :: elem(:)
     232             :     type (derivative_t)  , intent(in   ) :: deriv
     233             :     type (hvcoord_t)     , intent(in   ) :: hvcoord
     234             :     type (hybrid_t)      , intent(in   ) :: hybrid
     235             :     real(kind=r8) , intent(in   ) :: dt
     236             :     type (TimeLevel_t)   , intent(inout) :: tl
     237             :     integer              , intent(in   ) :: nets
     238             :     integer              , intent(in   ) :: nete
     239             : 
     240             :     real (kind=r8), dimension(np,np,2     ) :: gradQ
     241             :     integer :: k,ie
     242             :     integer :: rkstage,rhs_multiplier
     243             :     integer :: n0_qdp, np1_qdp
     244             :     integer :: kbeg,kend,qbeg,qend
     245             : 
     246             : !    call t_barrierf('sync_prim_advec_tracers_remap_k2', hybrid%par%comm)
     247             : !    call t_startf('prim_advec_tracers_remap_rk2')
     248             : !    call extrae_user_function(1)
     249           0 :     call TimeLevel_Qdp( tl, qsplit, n0_qdp, np1_qdp) !time levels for qdp are not the same
     250           0 :     rkstage = 3 !   3 stage RKSSP scheme, with optimal SSP CFL
     251             : 
     252             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     253             :     ! RK2 2D advection step
     254             :     ! note: stage 3 we take the oppertunity to DSS omega
     255             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     256             :     ! use these for consistent advection (preserve Q=1)
     257             :     ! derived%vdp_ave        =  mean horiz. flux:   U*dp
     258             :     ! derived%omega         =  advection code will DSS this for the physics, but otherwise
     259             :     !                            it is not needed
     260             :     ! Also: save a copy of div(U dp) in derived%div(:,:,:,1), which will be DSS'd
     261             :     !       and a DSS'ed version stored in derived%div(:,:,:,2)
     262             : 
     263           0 :     call get_loop_ranges(hybrid,kbeg=kbeg,kend=kend,qbeg=qbeg,qend=qend)
     264             : 
     265           0 :     do ie=nets,nete
     266           0 :       do k=kbeg,kend
     267             :         ! div( U dp Q),
     268           0 :         gradQ(:,:,1)=elem(ie)%derived%vn0(:,:,1,k)
     269           0 :         gradQ(:,:,2)=elem(ie)%derived%vn0(:,:,2,k)
     270             :         ! elem(ie)%derived%divdp(:,:,k) = divergence_sphere(gradQ,deriv,elem(ie))
     271           0 :         call divergence_sphere(gradQ,deriv,elem(ie),elem(ie)%derived%divdp(:,:,k))
     272           0 :         elem(ie)%derived%divdp_proj(:,:,k) = elem(ie)%derived%divdp(:,:,k)
     273             :       enddo
     274             :     enddo
     275             : 
     276             : 
     277             :     !rhs_multiplier is for obtaining dp_tracers at each stage:
     278             :     !dp_tracers(stage) = dp - rhs_multiplier*dt*divdp_proj
     279             : !    call t_startf('euler_step')
     280             : 
     281           0 :     rhs_multiplier = 0
     282           0 :     call euler_step_driver( np1_qdp, n0_qdp , dt/2, elem, hvcoord, hybrid, deriv, nets, nete, DSSdiv_vdp_ave, rhs_multiplier )
     283             : 
     284           0 :     rhs_multiplier = 1
     285           0 :     call euler_step_driver( np1_qdp, np1_qdp, dt/2, elem, hvcoord, hybrid, deriv, nets, nete, DSSno_var      , rhs_multiplier )
     286             : 
     287           0 :     rhs_multiplier = 2
     288           0 :     call euler_step_driver( np1_qdp, np1_qdp, dt/2, elem, hvcoord, hybrid, deriv, nets, nete, DSSomega      , rhs_multiplier )
     289             : 
     290             : !    call t_stopf ('euler_step')
     291             : 
     292             :     !to finish the 2D advection step, we need to average the t and t+2 results to get a second order estimate for t+1.
     293           0 :     call qdp_time_avg( elem , rkstage , n0_qdp , np1_qdp , hybrid, nets , nete )
     294             : 
     295             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     296             :     !  Dissipation
     297             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     298           0 :     if ( limiter_option == 8  ) then
     299             :       ! dissipation was applied in RHS.
     300             :     else
     301           0 :       call advance_hypervis_scalar(edgeadv,elem,hvcoord,hybrid,deriv,tl%np1,np1_qdp,nets,nete,dt)
     302             :     endif
     303             : !    call extrae_user_function(0)
     304             : 
     305             : !    call t_stopf('prim_advec_tracers_remap_rk2')
     306             : 
     307           0 :   end subroutine prim_advec_tracers_remap_rk2
     308             : 
     309             : !-----------------------------------------------------------------------------
     310             : !-----------------------------------------------------------------------------
     311             : 
     312           0 :   subroutine qdp_time_avg( elem , rkstage , n0_qdp , np1_qdp , hybrid , nets , nete )
     313           0 :     use hybrid_mod, only : hybrid_t, get_loop_ranges
     314             :     implicit none
     315             :     type(element_t)     , intent(inout) :: elem(:)
     316             :     integer             , intent(in   ) :: rkstage , n0_qdp , np1_qdp , nets , nete
     317             :     type(hybrid_t) :: hybrid
     318             :     integer :: i,j,ie,q,k
     319             :     integer :: kbeg,kend,qbeg,qend
     320             :     real(kind=r8) :: rrkstage
     321             : 
     322           0 :     call get_loop_ranges(hybrid,kbeg=kbeg,kend=kend,qbeg=qbeg,qend=qend)
     323             : 
     324           0 :     rrkstage=1.0_r8/real(rkstage,kind=r8)
     325           0 :     do ie=nets,nete
     326           0 :       do q=qbeg,qend
     327           0 :         do k=kbeg,kend
     328             :           !OMP_COLLAPSE_SIMD
     329             :           !DIR_VECTOR_ALIGNED
     330           0 :           do j=1,np
     331           0 :           do i=1,np
     332           0 :            elem(ie)%state%Qdp(i,j,k,q,np1_qdp) =               &
     333           0 :                rrkstage *( elem(ie)%state%Qdp(i,j,k,q,n0_qdp) + &
     334           0 :                (rkstage-1)*elem(ie)%state%Qdp(i,j,k,q,np1_qdp) )
     335             :           enddo
     336             :           enddo
     337             :         enddo
     338             :       enddo
     339             :     enddo
     340           0 :   end subroutine qdp_time_avg
     341             : 
     342             : !-----------------------------------------------------------------------------
     343             : !-----------------------------------------------------------------------------
     344             : 
     345           0 :   subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , nets , nete , DSSopt , rhs_multiplier )
     346             :   ! ===================================
     347             :   ! This routine is the basic foward
     348             :   ! euler component used to construct RK SSP methods
     349             :   !
     350             :   !           u(np1) = u(n0) + dt2*DSS[ RHS(u(n0)) ]
     351             :   !
     352             :   ! n0 can be the same as np1.
     353             :   !
     354             :   ! DSSopt = DSSeta or DSSomega:   also DSS omega
     355             :   !
     356             :   ! ===================================
     357             :   use dimensions_mod , only: np, nlev
     358             :   use hybrid_mod     , only: hybrid_t!, PrintHybrid
     359             :   use hybrid_mod     , only: get_loop_ranges, threadOwnsTracer
     360             :   use element_mod    , only: element_t
     361             :   use derivative_mod , only: derivative_t, divergence_sphere, limiter_optim_iter_full
     362             :   use edge_mod       , only: edgevpack, edgevunpack
     363             :   use bndry_mod      , only: bndry_exchange
     364             :   use hybvcoord_mod  , only: hvcoord_t
     365             : 
     366             :   integer              , intent(in   )         :: np1_qdp, n0_qdp
     367             :   real (kind=r8), intent(in   )         :: dt
     368             :   type (element_t)     , intent(inout), target :: elem(:)
     369             :   type (hvcoord_t)     , intent(in   )         :: hvcoord
     370             :   type (hybrid_t)      , intent(in   )         :: hybrid
     371             :   type (derivative_t)  , intent(in   )         :: deriv
     372             :   integer              , intent(in   )         :: nets
     373             :   integer              , intent(in   )         :: nete
     374             :   integer              , intent(in   )         :: DSSopt
     375             :   integer              , intent(in   )         :: rhs_multiplier
     376             : 
     377             :   ! local
     378             :   real(kind=r8), dimension(np,np                       ) :: dpdiss
     379             :   real(kind=r8), dimension(np,np,nlev)                   :: dpdissk
     380             :   real(kind=r8), dimension(np,np,2                     ) :: gradQ
     381             :   real(kind=r8), dimension(np,np,2,nlev                ) :: Vstar
     382             :   real(kind=r8), dimension(np,np  ,nlev                ) :: Qtens
     383             :   real(kind=r8), dimension(np,np  ,nlev                ) :: dp
     384           0 :   real(kind=r8), dimension(np,np  ,nlev,qsize,nets:nete) :: Qtens_biharmonic
     385             :   real(kind=r8), dimension(np,np)                        :: div
     386           0 :   real(kind=r8), pointer, dimension(:,:,:)               :: DSSvar
     387             :   real(kind=r8) :: dp0(nlev)
     388             :   integer :: ie,q,i,j,k, kptr
     389             :   integer :: rhs_viss = 0
     390             :   integer :: kblk,qblk   ! The per thead size of the vertical and tracers
     391             :   integer :: kbeg, kend, qbeg, qend
     392             : 
     393           0 :   call get_loop_ranges(hybrid,kbeg=kbeg,kend=kend,qbeg=qbeg,qend=qend)
     394             : 
     395           0 :   kblk = kend - kbeg + 1   ! calculate size of the block of vertical levels
     396           0 :   qblk = qend - qbeg + 1   ! calculate size of the block of tracers
     397             : 
     398           0 :   do k = kbeg, kend
     399           0 :     dp0(k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) )*hvcoord%ps0 + &
     400           0 :           ( hvcoord%hybi(k+1) - hvcoord%hybi(k) )*hvcoord%ps0
     401             :   enddo
     402             : 
     403             : !  call t_startf('euler_step')
     404             : 
     405             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     406             :   !   compute Q min/max values for lim8
     407             :   !   compute biharmonic mixing term f
     408             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     409           0 :   rhs_viss = 0
     410           0 :   if ( limiter_option == 8  ) then
     411             :     ! when running lim8, we also need to limit the biharmonic, so that term needs
     412             :     ! to be included in each euler step.  three possible algorithms here:
     413             :     ! 1) most expensive:
     414             :     !     compute biharmonic (which also computes qmin/qmax) during all 3 stages
     415             :     !     be sure to set rhs_viss=1
     416             :     !     cost:  3 biharmonic steps with 3 DSS
     417             :     !
     418             :     ! 2) cheapest:
     419             :     !     compute biharmonic (which also computes qmin/qmax) only on first stage
     420             :     !     be sure to set rhs_viss=3
     421             :     !     reuse qmin/qmax for all following stages (but update based on local qmin/qmax)
     422             :     !     cost:  1 biharmonic steps with 1 DSS
     423             :     !     main concern:  viscosity
     424             :     !
     425             :     ! 3)  compromise:
     426             :     !     compute biharmonic (which also computes qmin/qmax) only on last stage
     427             :     !     be sure to set rhs_viss=3
     428             :     !     compute qmin/qmax directly on first stage
     429             :     !     reuse qmin/qmax for 2nd stage stage (but update based on local qmin/qmax)
     430             :     !     cost:  1 biharmonic steps, 2 DSS
     431             :     !
     432             :     !  NOTE  when nu_p=0 (no dissipation applied in dynamics to dp equation), we should
     433             :     !        apply dissipation to Q (not Qdp) to preserve Q=1
     434             :     !        i.e.  laplace(Qdp) ~  dp0 laplace(Q)
     435             :     !        for nu_p=nu_q>0, we need to apply dissipation to Q * diffusion_dp
     436             :     !
     437             :     ! initialize dp, and compute Q from Qdp (and store Q in Qtens_biharmonic)
     438           0 :     do ie = nets, nete
     439             :       ! add hyperviscosity to RHS.  apply to Q at timelevel n0, Qdp(n0)/dp
     440           0 :       do k = kbeg, kend
     441             :         !OMP_COLLAPSE_SIMD
     442             :         !DIR_VECTOR_ALIGNED
     443           0 :         do j=1,np
     444           0 :         do i=1,np
     445           0 :           dp(i,j,k) = elem(ie)%derived%dp(i,j,k) - rhs_multiplier*dt*elem(ie)%derived%divdp_proj(i,j,k)
     446             :         enddo
     447             :         enddo
     448             :       enddo
     449             :       !JMD need to update loop based on changes in dungeon21 tag
     450           0 :       do q = qbeg, qend
     451           0 :         do k= kbeg, kend
     452           0 :           Qtens_biharmonic(:,:,k,q,ie) = elem(ie)%state%Qdp(:,:,k,q,n0_qdp)/dp(:,:,k)
     453           0 :           if ( rhs_multiplier == 1 ) then
     454           0 :               qmin(k,q,ie)=min(qmin(k,q,ie),minval(Qtens_biharmonic(:,:,k,q,ie)))
     455           0 :               qmax(k,q,ie)=max(qmax(k,q,ie),maxval(Qtens_biharmonic(:,:,k,q,ie)))
     456             :           else
     457           0 :               qmin(k,q,ie)=minval(Qtens_biharmonic(:,:,k,q,ie))
     458           0 :               qmax(k,q,ie)=maxval(Qtens_biharmonic(:,:,k,q,ie))
     459             :           endif
     460             :         enddo
     461             :       enddo
     462             :     enddo
     463             : 
     464             :     ! compute element qmin/qmax
     465           0 :     if ( rhs_multiplier == 0 ) then
     466             :       ! update qmin/qmax based on neighbor data for lim8
     467             : !      call t_startf('euler_neighbor_minmax1')
     468           0 :       call neighbor_minmax(hybrid,edgeAdvQminmax,nets,nete,qmin(:,:,nets:nete),qmax(:,:,nets:nete))
     469             : !      call t_stopf('euler_neighbor_minmax1')
     470             :     endif
     471             : 
     472             :     ! get niew min/max values, and also compute biharmonic mixing term
     473           0 :     if ( rhs_multiplier == 2 ) then
     474           0 :       rhs_viss = 3
     475             :       ! two scalings depending on nu_p:
     476             :       ! nu_p=0:    qtens_biharmonic *= dp0                   (apply viscsoity only to q)
     477             :       ! nu_p>0):   qtens_biharmonc *= elem()%psdiss_ave      (for consistency, if nu_p=nu_q)
     478           0 :       if ( nu_p > 0 ) then
     479           0 :         do ie = nets, nete
     480           0 :           do k = kbeg, kend
     481             :             !OMP_COLLAPSE_SIMD
     482             :             !DIR_VECTOR_ALIGNED
     483           0 :             do j=1,np
     484           0 :             do i=1,np
     485           0 :                dpdissk(i,j,k) = elem(ie)%derived%dpdiss_ave(i,j,k)/dp0(k)
     486             :             enddo
     487             :             enddo
     488             :           enddo
     489           0 :           do q = qbeg,qend
     490           0 :             do k = kbeg, kend
     491             :               ! NOTE: divide by dp0 since we multiply by dp0 below
     492             :               !OMP_COLLAPSE_SIMD
     493             :               !DIR_VECTOR_ALIGNED
     494           0 :               do j=1,np
     495           0 :               do i=1,np
     496           0 :                  Qtens_biharmonic(i,j,k,q,ie)=Qtens_biharmonic(i,j,k,q,ie)*dpdissk(i,j,k)
     497             :               enddo
     498             :               enddo
     499             :             enddo
     500             :           enddo
     501             :         enddo
     502             :       endif
     503             : 
     504             : !   Previous version of biharmonic_wk_scalar_minmax included a min/max
     505             : !   calculation into the boundary exchange.  This was causing cache issues.
     506             : !   Split the single operation into two separate calls
     507             : !      call neighbor_minmax()
     508             : !      call biharmonic_wk_scalar()
     509             : !
     510             : #ifdef OVERLAP
     511           0 :       call neighbor_minmax_start(hybrid,edgeAdvQminmax,nets,nete,qmin(:,:,nets:nete),qmax(:,:,nets:nete))
     512           0 :       call biharmonic_wk_scalar(elem,qtens_biharmonic,deriv,edgeAdv,hybrid,nets,nete)
     513           0 :       do ie = nets, nete
     514           0 :         do q = qbeg, qend
     515           0 :           do k = kbeg, kend
     516             :             !OMP_COLLAPSE_SIMD
     517             :             !DIR_VECTOR_ALIGNED
     518           0 :             do j=1,np
     519           0 :             do i=1,np
     520             :                ! note: biharmonic_wk() output has mass matrix already applied. Un-apply since we apply again below:
     521           0 :                qtens_biharmonic(i,j,k,q,ie) = &
     522           0 :                      -rhs_viss*dt*nu_q*dp0(k)*Qtens_biharmonic(i,j,k,q,ie) / elem(ie)%spheremp(i,j)
     523             :             enddo
     524             :             enddo
     525             :           enddo
     526             :         enddo
     527             :       enddo
     528           0 :       call neighbor_minmax_finish(hybrid,edgeAdvQminmax,nets,nete,qmin(:,:,nets:nete),qmax(:,:,nets:nete))
     529             : #else
     530             :       call t_startf('euler_neighbor_minmax2')
     531             :       call neighbor_minmax(hybrid,edgeAdvQminmax,nets,nete,qmin(:,:,nets:nete),qmax(:,:,nets:nete))
     532             :       call t_stopf('euler_neighbor_minmax2')
     533             :       call biharmonic_wk_scalar(elem,qtens_biharmonic,deriv,edgeAdv,hybrid,nets,nete)
     534             : 
     535             :       do ie = nets, nete
     536             :         do q = qbeg, qend
     537             :           do k = kbeg, kend
     538             :             !OMP_COLLAPSE_SIMD
     539             :             !DIR_VECTOR_ALIGNED
     540             :             do j=1,np
     541             :             do i=1,np
     542             :                ! note: biharmonic_wk() output has mass matrix already applied. Un-apply since we apply again below:
     543             :                qtens_biharmonic(i,j,k,q,ie) = &
     544             :                      -rhs_viss*dt*nu_q*dp0(k)*Qtens_biharmonic(i,j,k,q,ie) / elem(ie)%spheremp(i,j)
     545             :             enddo
     546             :             enddo
     547             :           enddo
     548             :         enddo
     549             :       enddo
     550             : #endif
     551             : 
     552             : 
     553             :     endif
     554             :   endif  ! compute biharmonic mixing term and qmin/qmax
     555             : 
     556             : 
     557             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     558             :   !   2D Advection step
     559             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     560           0 :   do ie = nets, nete
     561             : 
     562             : 
     563             :     ! Compute velocity used to advance Qdp
     564           0 :     do k = kbeg, kend
     565             :       ! derived variable divdp_proj() (DSS'd version of divdp) will only be correct on 2nd and 3rd stage
     566             :       ! but that's ok because rhs_multiplier=0 on the first stage:
     567             :       !OMP_COLLAPSE_SIMD
     568             :       !DIR_VECTOR_ALIGNED
     569           0 :       do j=1,np
     570           0 :       do i=1,np
     571           0 :          dp(i,j,k) = elem(ie)%derived%dp(i,j,k) - rhs_multiplier * dt * elem(ie)%derived%divdp_proj(i,j,k)
     572           0 :          Vstar(i,j,1,k) = elem(ie)%derived%vn0(i,j,1,k) / dp(i,j,k)
     573           0 :          Vstar(i,j,2,k) = elem(ie)%derived%vn0(i,j,2,k) / dp(i,j,k)
     574             :       enddo
     575             :       enddo
     576             :     enddo
     577           0 :     if ( limiter_option == 8) then
     578             :         ! Note that the term dpdissk is independent of Q
     579           0 :         do k = kbeg, kend
     580             :           ! UN-DSS'ed dp at timelevel n0+1:
     581             :           !OMP_COLLAPSE_SIMD
     582             :           !DIR_VECTOR_ALIGNED
     583           0 :           do j=1,np
     584           0 :           do i=1,np
     585           0 :              dpdissk(i,j,k) = dp(i,j,k) - dt * elem(ie)%derived%divdp(i,j,k)
     586             :           enddo
     587             :           enddo
     588           0 :           if ( nu_p > 0 .and. rhs_viss /= 0 ) then
     589             :             ! add contribution from UN-DSS'ed PS dissipation
     590             : !            dpdiss(:,:) = ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) *
     591             : !            elem(ie)%derived%psdiss_biharmonic(:,:)
     592             :             !OMP_COLLAPSE_SIMD
     593             :             !DIR_VECTOR_ALIGNED
     594           0 :             do j=1,np
     595           0 :             do i=1,np
     596           0 :                dpdiss(i,j) = elem(ie)%derived%dpdiss_biharmonic(i,j,k)
     597           0 :                dpdissk(i,j,k) = dpdissk(i,j,k) - rhs_viss * dt * nu_q * dpdiss(i,j) / elem(ie)%spheremp(i,j)
     598             :             enddo
     599             :             enddo
     600             :           endif
     601             :           ! IMPOSE ZERO THRESHOLD.  do this here so it can be turned off for
     602             :           ! testing
     603           0 :           do q=qbeg, qend
     604           0 :              qmin(k,q,ie)=max(qmin(k,q,ie),0.0_r8)
     605             :           enddo
     606             :         enddo
     607             :     endif  ! limiter == 8
     608             : 
     609             : 
     610             :     ! advance Qdp
     611           0 :     do q = qbeg, qend
     612           0 :       do k = kbeg, kend
     613             :         ! div( U dp Q),
     614             :         !OMP_COLLAPSE_SIMD
     615             :         !DIR_VECTOR_ALIGNED
     616           0 :         do j=1,np
     617           0 :         do i=1,np
     618           0 :            gradQ(i,j,1) = Vstar(i,j,1,k) * elem(ie)%state%Qdp(i,j,k,q,n0_qdp)
     619           0 :            gradQ(i,j,2) = Vstar(i,j,2,k) * elem(ie)%state%Qdp(i,j,k,q,n0_qdp)
     620             :         enddo
     621             :         enddo
     622             :         ! Qtens(:,:,k) = elem(ie)%state%Qdp(:,:,k,q,n0_qdp) - &
     623             :         !               dt * divergence_sphere( gradQ , deriv , elem(ie) )
     624           0 :         call divergence_sphere( gradQ , deriv , elem(ie),div )
     625             : 
     626             :         !OMP_COLLAPSE_SIMD
     627             :         !DIR_VECTOR_ALIGNED
     628           0 :         do j=1,np
     629           0 :           do i=1,np
     630           0 :             Qtens(i,j,k) = elem(ie)%state%Qdp(i,j,k,q,n0_qdp) - dt * div(i,j)
     631             :           enddo
     632             :         enddo
     633             : 
     634             :         ! optionally add in hyperviscosity computed above:
     635           0 :         if ( rhs_viss /= 0 ) then
     636             :           !OMP_COLLAPSE_SIMD
     637             :           !DIR_VECTOR_ALIGNED
     638           0 :           do j=1,np
     639           0 :           do i=1,np
     640           0 :               Qtens(i,j,k) = Qtens(i,j,k) + Qtens_biharmonic(i,j,k,q,ie)
     641             :           enddo
     642             :           enddo
     643             :         endif
     644             :       enddo
     645             : 
     646           0 :       if ( limiter_option == 8) then
     647             :         ! apply limiter to Q = Qtens / dp_star
     648           0 :         call limiter_optim_iter_full( Qtens(:,:,:) , elem(ie)%spheremp(:,:) , qmin(:,q,ie) , &
     649           0 :                                                           qmax(:,q,ie) , dpdissk, kbeg, kend )
     650             :       endif
     651             : 
     652             : 
     653             :       ! apply mass matrix, overwrite np1 with solution:
     654             :       ! dont do this earlier, since we allow np1_qdp == n0_qdp
     655             :       ! and we dont want to overwrite n0_qdp until we are done using it
     656           0 :       do k = kbeg, kend
     657             :         !OMP_COLLAPSE_SIMD
     658             :         !DIR_VECTOR_ALIGNED
     659           0 :         do j=1,np
     660           0 :         do i=1,np
     661           0 :             elem(ie)%state%Qdp(i,j,k,q,np1_qdp) = elem(ie)%spheremp(i,j) * Qtens(i,j,k)
     662             :         enddo
     663             :         enddo
     664             :       enddo
     665             : 
     666           0 :       if ( limiter_option == 4 ) then
     667             :         !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
     668             :         ! sign-preserving limiter, applied after mass matrix
     669             :         !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
     670             : !JMD !$OMP BARRIER
     671             : !JMD !$OMP MASTER
     672           0 :         call limiter2d_zero(elem(ie)%state%Qdp(:,:,:,q,np1_qdp))
     673             : !JMD !$OMP END MASTER
     674             : !JMD !$OMP BARRIER
     675             :       endif
     676             : 
     677           0 :       kptr = nlev*(q-1) + kbeg - 1
     678           0 :       call edgeVpack(edgeAdvp1 , elem(ie)%state%Qdp(:,:,kbeg:kend,q,np1_qdp) , kblk , kptr , ie )
     679             :     enddo
     680             :     ! only perform this operation on thread which owns the first tracer
     681           0 :     if (DSSopt>0) then
     682           0 :       if (threadOwnsTracer(hybrid,1)) then
     683             :         ! all zero so we only have to DSS 1:nlev
     684           0 :         if ( DSSopt == DSSomega       ) DSSvar => elem(ie)%derived%omega(:,:,:)
     685           0 :         if ( DSSopt == DSSdiv_vdp_ave ) DSSvar => elem(ie)%derived%divdp_proj(:,:,:)
     686             :         ! also DSS extra field
     687           0 :         do k = kbeg, kend
     688             :           !OMP_COLLAPSE_SIMD
     689             :           !DIR_VECTOR_ALIGNED
     690           0 :           do j=1,np
     691           0 :           do i=1,np
     692           0 :              DSSvar(i,j,k) = elem(ie)%spheremp(i,j) * DSSvar(i,j,k)
     693             :           enddo
     694             :           enddo
     695             :         enddo
     696           0 :         kptr = nlev*qsize + kbeg - 1
     697           0 :         call edgeVpack( edgeAdvp1 , DSSvar(:,:,kbeg:kend), kblk, kptr, ie)
     698             :       endif
     699             :     end if
     700             :   enddo
     701             : 
     702           0 :   call bndry_exchange( hybrid , edgeAdvp1,location='edgeAdvp1')
     703             : 
     704           0 :   do ie = nets, nete
     705             :     ! only perform this operation on thread which owns the first tracer
     706           0 :     if (DSSopt>0) then
     707           0 :       if(threadOwnsTracer(hybrid,1)) then
     708           0 :         if ( DSSopt == DSSomega       ) DSSvar => elem(ie)%derived%omega(:,:,:)
     709           0 :         if ( DSSopt == DSSdiv_vdp_ave ) DSSvar => elem(ie)%derived%divdp_proj(:,:,:)
     710           0 :         kptr = qsize*nlev + kbeg -1
     711           0 :         call edgeVunpack( edgeAdvp1 , DSSvar(:,:,kbeg:kend) , kblk , kptr , ie )
     712           0 :         do k = kbeg, kend
     713             :            !OMP_COLLAPSE_SIMD
     714             :            !DIR_VECTOR_ALIGNED
     715           0 :            do j=1,np
     716           0 :            do i=1,np
     717           0 :                DSSvar(i,j,k) = DSSvar(i,j,k) * elem(ie)%rspheremp(i,j)
     718             :            enddo
     719             :            enddo
     720             :         enddo
     721             :       endif
     722             :     end if
     723           0 :     do q = qbeg, qend
     724           0 :       kptr = nlev*(q-1) + kbeg - 1
     725           0 :       call edgeVunpack( edgeAdvp1 , elem(ie)%state%Qdp(:,:,kbeg:kend,q,np1_qdp) , kblk , kptr , ie )
     726           0 :         do k = kbeg, kend
     727             :           !OMP_COLLAPSE_SIMD
     728             :           !DIR_VECTOR_ALIGNED
     729           0 :           do j=1,np
     730           0 :           do i=1,np
     731           0 :              elem(ie)%state%Qdp(i,j,k,q,np1_qdp) = elem(ie)%rspheremp(i,j) * elem(ie)%state%Qdp(i,j,k,q,np1_qdp)
     732             :           enddo
     733             :           enddo
     734             :         enddo
     735             :     enddo
     736             :   enddo
     737             : !  call t_stopf('euler_step')
     738             : 
     739           0 :   end subroutine euler_step
     740             : 
     741             : 
     742             : 
     743           0 :   subroutine limiter2d_zero(Q)
     744             :   ! mass conserving zero limiter (2D only).  to be called just before DSS
     745             :   !
     746             :   ! this routine is called inside a DSS loop, and so Q had already
     747             :   ! been multiplied by the mass matrix.  Thus dont include the mass
     748             :   ! matrix when computing the mass = integral of Q over the element
     749             :   !
     750             :   ! ps is only used when advecting Q instead of Qdp
     751             :   ! so ps should be at one timelevel behind Q
     752             :   implicit none
     753             :   real (kind=r8), intent(inout) :: Q(np,np,nlev)
     754             : 
     755             :   ! local
     756             : !  real (kind=r8) :: dp(np,np)
     757             :   real (kind=r8) :: mass,mass_new,ml
     758             :   integer i,j,k
     759             : 
     760           0 :   do k = nlev , 1 , -1
     761             :     mass = 0
     762           0 :     do j = 1 , np
     763           0 :       do i = 1 , np
     764             :         !ml = Q(i,j,k)*dp(i,j)*spheremp(i,j)  ! see above
     765           0 :         ml = Q(i,j,k)
     766           0 :         mass = mass + ml
     767             :       enddo
     768             :     enddo
     769             : 
     770             :     ! negative mass.  so reduce all postive values to zero
     771             :     ! then increase negative values as much as possible
     772           0 :     if ( mass < 0 ) Q(:,:,k) = -Q(:,:,k)
     773             :     mass_new = 0
     774           0 :     do j = 1 , np
     775           0 :       do i = 1 , np
     776           0 :         if ( Q(i,j,k) < 0 ) then
     777           0 :           Q(i,j,k) = 0
     778             :         else
     779           0 :           ml = Q(i,j,k)
     780           0 :           mass_new = mass_new + ml
     781             :         endif
     782             :       enddo
     783             :     enddo
     784             : 
     785             :     ! now scale the all positive values to restore mass
     786           0 :     if ( mass_new > 0 ) Q(:,:,k) = Q(:,:,k) * abs(mass) / mass_new
     787           0 :     if ( mass     < 0 ) Q(:,:,k) = -Q(:,:,k)
     788             :   enddo
     789           0 :   end subroutine limiter2d_zero
     790             : 
     791             : !-----------------------------------------------------------------------------
     792             : !-----------------------------------------------------------------------------
     793             : 
     794           0 :   subroutine advance_hypervis_scalar( edgeAdv , elem , hvcoord , hybrid , deriv , nt , nt_qdp , nets , nete , dt2 )
     795             :   !  hyperviscsoity operator for foward-in-time scheme
     796             :   !  take one timestep of:
     797             :   !          Q(:,:,:,np) = Q(:,:,:,np) +  dt2*nu*laplacian**order ( Q )
     798             :   !
     799             :   !  For correct scaling, dt2 should be the same 'dt2' used in the leapfrog advace
     800             :   use dimensions_mod , only: np, nlev
     801             :   use hybrid_mod     , only: hybrid_t!, PrintHybrid
     802             :   use hybrid_mod     , only: get_loop_ranges
     803             :   use element_mod    , only: element_t
     804             :   use derivative_mod , only: derivative_t
     805             :   use edge_mod       , only: edgevpack, edgevunpack
     806             :   use edgetype_mod   , only: EdgeBuffer_t
     807             :   use bndry_mod      , only: bndry_exchange
     808             : 
     809             :   implicit none
     810             :   type (EdgeBuffer_t)  , intent(inout)         :: edgeAdv
     811             :   type (element_t)     , intent(inout), target :: elem(:)
     812             :   type (hvcoord_t)     , intent(in   )         :: hvcoord
     813             :   type (hybrid_t)      , intent(in   )         :: hybrid
     814             :   type (derivative_t)  , intent(in   )         :: deriv
     815             :   integer              , intent(in   )         :: nt
     816             :   integer              , intent(in   )         :: nt_qdp
     817             :   integer              , intent(in   )         :: nets
     818             :   integer              , intent(in   )         :: nete
     819             :   real (kind=r8), intent(in   )         :: dt2
     820             : 
     821             :   ! local
     822           0 :   real (kind=r8), dimension(np,np,nlev,qsize,nets:nete) :: Qtens
     823             :   real (kind=r8), dimension(np,np,nlev                ) :: dp
     824             : !  real (kind=r8), dimension(      nlev,qsize,nets:nete) :: min_neigh
     825             : !  real (kind=r8), dimension(      nlev,qsize,nets:nete) :: max_neigh
     826             :   integer :: k,kptr,ie,ic,q,i,j
     827             :   integer :: kbeg,kend,qbeg,qend
     828             : 
     829             : ! NOTE: PGI compiler bug: when using spheremp, rspheremp and ps as pointers to elem(ie)% members,
     830             : !       data is incorrect (offset by a few numbers actually)
     831             : !       removed for now.
     832             : !  real (kind=r8), dimension(:,:), pointer :: spheremp,rspheremp
     833             :   real (kind=r8) :: dt,dp0
     834             :   integer :: kblk,qblk   ! The per thead size of the vertical and tracers
     835             : 
     836           0 :   call get_loop_ranges(hybrid,kbeg=kbeg,kend=kend,qbeg=qbeg,qend=qend)
     837             : 
     838           0 :   if ( nu_q           == 0 ) return
     839             :   !if ( hypervis_order /= 2 ) return
     840             : 
     841           0 :   kblk = kend - kbeg + 1   ! calculate size of the block of vertical levels
     842           0 :   qblk = qend - qbeg + 1   ! calculate size of the block of tracers
     843             : 
     844           0 :   call t_startf('advance_hypervis_scalar')
     845             : 
     846             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     847             :   !  hyper viscosity
     848             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     849           0 :   dt = dt2 / hypervis_subcycle_q
     850             : 
     851           0 :   do ic = 1 , hypervis_subcycle_q
     852           0 :     do ie = nets, nete
     853             :       ! Qtens = Q/dp   (apply hyperviscsoity to dp0 * Q, not Qdp)
     854           0 :       do k = kbeg, kend
     855             :          ! various options:
     856             :          !   1)  biharmonic( Qdp )
     857             :          !   2)  dp0 * biharmonic( Qdp/dp )
     858             :          !   3)  dpave * biharmonic(Q/dp)
     859             :          ! For trace mass / mass consistenciy, we use #2 when nu_p=0
     860             :          ! and #e when nu_p>0, where dpave is the mean mass flux from the nu_p
     861             :          ! contribution from dynamics.
     862           0 :          dp0 = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + &
     863           0 :               ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * hvcoord%ps0
     864           0 :          dp(:,:,k) = elem(ie)%derived%dp(:,:,k) - dt2*elem(ie)%derived%divdp_proj(:,:,k)
     865           0 :          if (nu_p>0) then
     866           0 :             do q = qbeg, qend
     867           0 :                Qtens(:,:,k,q,ie) = elem(ie)%derived%dpdiss_ave(:,:,k)*&
     868           0 :                     elem(ie)%state%Qdp(:,:,k,q,nt_qdp) / dp(:,:,k)
     869             :             enddo
     870             :          else
     871           0 :             do q = qbeg, qend
     872           0 :                Qtens(:,:,k,q,ie) = dp0*elem(ie)%state%Qdp(:,:,k,q,nt_qdp) / dp(:,:,k)
     873             :             enddo
     874             :          endif
     875             :       enddo
     876             :    enddo
     877             : 
     878             :     ! compute biharmonic operator. Qtens = input and output
     879           0 :     call biharmonic_wk_scalar( elem , Qtens , deriv , edgeAdv , hybrid ,  nets , nete )
     880             : 
     881           0 :     do ie = nets, nete
     882             :       !spheremp     => elem(ie)%spheremp
     883           0 :       do q = qbeg, qend
     884           0 :         do k = kbeg, kend
     885           0 :           dp0 = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + &
     886           0 :                 ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * hvcoord%ps0
     887           0 :          do j = 1 , np
     888           0 :             do i = 1 , np
     889             : 
     890             :               ! advection Qdp.  For mass advection consistency:
     891             :               ! DIFF( Qdp) ~   dp0 DIFF (Q)  =  dp0 DIFF ( Qdp/dp )
     892           0 :               elem(ie)%state%Qdp(i,j,k,q,nt_qdp) = elem(ie)%state%Qdp(i,j,k,q,nt_qdp) * elem(ie)%spheremp(i,j) &
     893           0 :                                                                                  - dt * nu_q * Qtens(i,j,k,q,ie)
     894             :         enddo
     895             :         enddo
     896             :         enddo
     897             : 
     898           0 :         if (limiter_option .ne. 0 ) then
     899             : !JMD Only need if threading over the vertical
     900             : !JMD!$OMP BARRIER
     901             : !JMD!$OMP MASTER
     902             :            ! smooth some of the negativities introduced by diffusion:
     903           0 :            call limiter2d_zero( elem(ie)%state%Qdp(:,:,:,q,nt_qdp) )
     904             : !JMD!$OMP END MASTER
     905             : !JMD!$OMP BARRIER
     906             :         endif
     907             : 
     908             :       enddo
     909           0 :       do q = qbeg, qend
     910           0 :          kptr = nlev*(q-1) + kbeg - 1
     911           0 :          call edgeVpack( edgeAdv , elem(ie)%state%Qdp(:,:,kbeg:kend,q,nt_qdp) , kblk, kptr, ie )
     912             :       enddo
     913             :     enddo
     914             : 
     915           0 :     call bndry_exchange( hybrid , edgeAdv,location='advance_hypervis_scalar')
     916             : 
     917           0 :     do ie = nets, nete
     918           0 :       do q = qbeg, qend
     919           0 :          kptr = nlev*(q-1) + kbeg - 1
     920           0 :          call edgeVunpack( edgeAdv , elem(ie)%state%Qdp(:,:,kbeg:kend,q,nt_qdp) , kblk, kptr, ie )
     921             :       enddo
     922             :       !rspheremp     => elem(ie)%rspheremp
     923           0 :       do q = qbeg, qend
     924             :         ! apply inverse mass matrix
     925           0 :         do k = kbeg, kend
     926           0 :           elem(ie)%state%Qdp(:,:,k,q,nt_qdp) = elem(ie)%rspheremp(:,:) * elem(ie)%state%Qdp(:,:,k,q,nt_qdp)
     927             :         enddo
     928             :       enddo
     929             :     enddo
     930             : 
     931             :   enddo
     932           0 :   call t_stopf('advance_hypervis_scalar')
     933           0 :   end subroutine advance_hypervis_scalar
     934             : 
     935       29184 :   subroutine vertical_remap(hybrid,elem,fvm,hvcoord,np1,np1_qdp,nets,nete)
     936             :     !
     937             :     ! This routine is called at the end of the vertically Lagrangian
     938             :     ! dynamics step to compute the vertical flux needed to get back
     939             :     ! to reference eta levels
     940             :     !
     941             :     ! map tracers
     942             :     ! map velocity components
     943             :     ! map temperature (either by mapping enthalpy or virtual temperature over log(p)
     944             :     ! (controlled by vert_remap_uvTq_alg > -20 or <= -20)
     945             :     !
     946           0 :     use hybvcoord_mod,          only: hvcoord_t
     947             :     use vertremap_mod,          only: remap1
     948             :     use hybrid_mod,             only: hybrid_t, config_thread_region,get_loop_ranges, PrintHybrid
     949             :     use fvm_control_volume_mod, only: fvm_struct
     950             :     use dimensions_mod,         only: use_cslam, ntrac
     951             :     use dimensions_mod,         only: kord_tr,kord_tr_cslam
     952             :     use cam_logfile,            only: iulog
     953             :     use physconst,              only: pi
     954             :     use air_composition,        only: thermodynamic_active_species_idx_dycore
     955             :     use cam_thermo,             only: get_enthalpy, get_virtual_temp, get_dp, MASS_MIXING_RATIO
     956             :     use thread_mod,             only: omp_set_nested
     957             :     use control_mod,            only: vert_remap_uvTq_alg
     958             :     type (hybrid_t),  intent(in)    :: hybrid  ! distributed parallel structure (shared)
     959             :     type(fvm_struct), intent(inout) :: fvm(:)
     960             :     type (element_t), intent(inout) :: elem(:)
     961             :     !
     962             :     real (kind=r8)   :: dpc_star(nc,nc,nlev) !Lagrangian levels on CSLAM grid
     963             : 
     964             :     type (hvcoord_t) :: hvcoord
     965             :     integer          :: ie,i,j,k,np1,nets,nete,np1_qdp,q, m_cnst
     966             :     real (kind=r8), dimension(np,np,nlev)  :: dp_moist,dp_star_moist, dp_dry,dp_star_dry
     967             :     real (kind=r8), dimension(np,np,nlev)  :: enthalpy_star
     968             :     real (kind=r8), dimension(np,np,nlev,2):: ttmp
     969             :     real(r8), parameter                    :: rad2deg = 180.0_r8/pi
     970             :     integer :: region_num_threads,qbeg,qend,kord_uvT(1)
     971             :     type (hybrid_t) :: hybridnew,hybridnew2
     972             :     real (kind=r8)  :: ptop
     973             : 
     974       58368 :     kord_uvT = vert_remap_uvTq_alg
     975             : 
     976       29184 :     ptop = hvcoord%hyai(1)*hvcoord%ps0
     977      234384 :     do ie=nets,nete
     978             :       !
     979             :       ! prepare for mapping of temperature
     980             :       !
     981      205200 :       if (vert_remap_uvTq_alg>-20) then
     982             :         !
     983             :         ! compute enthalpy on Lagrangian levels
     984             :         ! (do it here since qdp is overwritten by remap1)
     985             :         !
     986      410400 :         call get_enthalpy(elem(ie)%state%qdp(:,:,:,1:qsize,np1_qdp), &
     987             :              elem(ie)%state%t(:,:,:,np1), elem(ie)%state%dp3d(:,:,:,np1), enthalpy_star,     &
     988      615600 :              active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
     989             :       else
     990             :         !
     991             :         ! map Tv over log(p) following FV and FV3
     992             :         !
     993           0 :         call get_virtual_temp(elem(ie)%state%qdp(:,:,:,1:qsize,np1_qdp), enthalpy_star, &
     994           0 :              dp_dry=elem(ie)%state%dp3d(:,:,:,np1), active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
     995           0 :         enthalpy_star = enthalpy_star*elem(ie)%state%t(:,:,:,np1)
     996             :       end if
     997             :       !
     998             :       ! update final psdry
     999             :       !
    1000             :       elem(ie)%state%psdry(:,:) = ptop + &
    1001   309646800 :            sum(elem(ie)%state%dp3d(:,:,:,np1),3)
    1002             :       !
    1003             :       ! compute dry vertical coordinate (Lagrangian and reference levels)
    1004             :       !
    1005    19288800 :       do k=1,nlev
    1006   400755600 :         dp_star_dry(:,:,k) = elem(ie)%state%dp3d(:,:,k,np1)
    1007    19083600 :         dp_dry(:,:,k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) )*hvcoord%ps0 + &
    1008   419839200 :              ( hvcoord%hybi(k+1) - hvcoord%hybi(k) )*elem(ie)%state%psdry(:,:)
    1009   400960800 :         elem(ie)%state%dp3d(:,:,k,np1) = dp_dry(:,:,k)
    1010             :       enddo
    1011             :       !
    1012      205200 :       call get_dp(elem(ie)%state%Qdp(:,:,:,1:qsize,np1_qdp), MASS_MIXING_RATIO,&
    1013      410400 :          thermodynamic_active_species_idx_dycore, dp_star_dry, dp_star_moist(:,:,:))
    1014             :       !
    1015             :       ! Check if Lagrangian leves have crossed
    1016             :       !
    1017   400960800 :       if (minval(dp_star_moist)<1.0E-12_r8) then
    1018           0 :         write(iulog,*) "NEGATIVE LAYER THICKNESS DIAGNOSTICS:"
    1019           0 :         write(iulog,*) " "
    1020           0 :         do j=1,np
    1021           0 :           do i=1,np
    1022           0 :             if (minval(dp_star_moist(i,j,:))<1.0e-12_r8) then
    1023           0 :               write(iulog,'(A13,2f6.2)') "(lon,lat) = ",&
    1024           0 :                    elem(ie)%spherep(i,j)%lon*rad2deg,elem(ie)%spherep(i,j)%lat*rad2deg
    1025           0 :               write(iulog,*) " "
    1026           0 :               do k=1,nlev
    1027           0 :                 write(iulog,'(A21,I5,A1,f16.12,3f10.2)') "k,dp_star_moist,u,v,T: ",k," ",dp_star_moist(i,j,k)/100.0_r8,&
    1028           0 :                      elem(ie)%state%v(i,j,1,k,np1),elem(ie)%state%v(i,j,2,k,np1),elem(ie)%state%T(i,j,k,np1)
    1029             :               end do
    1030             :             end if
    1031             :           end do
    1032             :         end do
    1033           0 :         call endrun('negative moist layer thickness.  timestep or remap time too large')
    1034             :       endif
    1035             : 
    1036      205200 :       call remap1(elem(ie)%state%Qdp(:,:,:,1:qsize,np1_qdp),np,1,qsize,qsize,dp_star_dry,dp_dry,ptop,0,.true.,kord_tr)
    1037             :       !
    1038             :       ! compute moist reference pressure level thickness
    1039             :       !
    1040      205200 :       call get_dp(elem(ie)%state%Qdp(:,:,:,1:qsize,np1_qdp), MASS_MIXING_RATIO,&
    1041      410400 :            thermodynamic_active_species_idx_dycore, dp_dry, dp_moist(:,:,:))
    1042             : 
    1043             :       !
    1044             :       ! Remapping of temperature
    1045             :       !
    1046      205200 :       if (vert_remap_uvTq_alg>-20) then
    1047             :         !
    1048             :         ! remap enthalpy energy and back out temperature
    1049             :         !
    1050      205200 :         call remap1(enthalpy_star,np,1,1,1,dp_star_dry,dp_dry,ptop,1,.true.,kord_uvT)
    1051             :         !
    1052             :         ! compute sum c^(l)_p*m^(l)*dp on arrival (Eulerian) grid
    1053             :         !
    1054   400960800 :         ttmp(:,:,:,1) = 1.0_r8
    1055      205200 :         call get_enthalpy(elem(ie)%state%qdp(:,:,:,1:qsize,np1_qdp),   &
    1056             :              ttmp(:,:,:,1), dp_dry,ttmp(:,:,:,2), &
    1057      410400 :              active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
    1058   400960800 :         elem(ie)%state%t(:,:,:,np1)=enthalpy_star/ttmp(:,:,:,2)
    1059             :       else
    1060             :         !
    1061             :         ! map Tv over log(p); following FV and FV3
    1062             :         !
    1063           0 :         call remap1(enthalpy_star,np,1,1,1,dp_star_moist,dp_moist,ptop,1,.false.,kord_uvT)
    1064           0 :         call get_virtual_temp(elem(ie)%state%qdp(:,:,:,1:qsize,np1_qdp), ttmp(:,:,:,1), &
    1065           0 :              dp_dry=dp_dry, active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
    1066             :         !
    1067             :         ! convert new Tv to T
    1068             :         !
    1069           0 :         elem(ie)%state%t(:,:,:,np1)=enthalpy_star/ttmp(:,:,:,1)
    1070             :       end if
    1071             :       !
    1072             :       ! remap velocity components
    1073             :       !
    1074   801716400 :       call remap1(elem(ie)%state%v(:,:,1,:,np1),np,1,1,1,dp_star_moist,dp_moist,ptop,-1,.false.,kord_uvT)
    1075   801745584 :       call remap1(elem(ie)%state%v(:,:,2,:,np1),np,1,1,1,dp_star_moist,dp_moist,ptop,-1,.false.,kord_uvT)
    1076             :     enddo
    1077             : 
    1078       29184 :     if (use_cslam) then
    1079             :       !
    1080             :       ! vertical remapping of CSLAM tracers
    1081             :       !
    1082      234384 :       do ie=nets,nete
    1083   248292000 :         dpc_star=fvm(ie)%dp_fvm(1:nc,1:nc,:)
    1084    19288800 :         do k=1,nlev
    1085    76539600 :          do j=1,nc
    1086   248086800 :            do i=1,nc
    1087             :              !
    1088             :              ! new pressure levels on CSLAM grid
    1089             :              !
    1090   687009600 :              fvm(ie)%dp_fvm(i,j,k) = (hvcoord%hyai(k+1) - hvcoord%hyai(k))*hvcoord%ps0 + &
    1091   916012800 :                   (hvcoord%hybi(k+1) - hvcoord%hybi(k))*fvm(ie)%psc(i,j)
    1092             :             end do
    1093             :           end do
    1094             :         end do
    1095      234384 :         if(ntrac>tracer_num_threads) then
    1096      205200 :           call omp_set_nested(.true.)
    1097             :           !$OMP PARALLEL NUM_THREADS(tracer_num_threads), DEFAULT(SHARED), PRIVATE(hybridnew2,qbeg,qend)
    1098      205200 :           hybridnew2 = config_thread_region(hybrid,'ctracer')
    1099      205200 :           call get_loop_ranges(hybridnew2, qbeg=qbeg, qend=qend)
    1100           0 :           call remap1(fvm(ie)%c(1:nc,1:nc,:,1:ntrac),nc,qbeg,qend,ntrac,dpc_star, &
    1101 20608441200 :                       fvm(ie)%dp_fvm(1:nc,1:nc,:),ptop,0,.false.,kord_tr_cslam)
    1102             :           !$OMP END PARALLEL
    1103      205200 :           call omp_set_nested(.false.)
    1104             :         else
    1105           0 :           call remap1(fvm(ie)%c(1:nc,1:nc,:,1:ntrac),nc,1,ntrac,ntrac,dpc_star, &
    1106           0 :                       fvm(ie)%dp_fvm(1:nc,1:nc,:),ptop,0,.false.,kord_tr_cslam)
    1107             :         endif
    1108             :       enddo
    1109             :     end if
    1110       29184 :   end subroutine vertical_remap
    1111             : 
    1112             : end module prim_advection_mod

Generated by: LCOV version 1.14