LCOV - code coverage report
Current view: top level - dynamics/se/dycore - prim_advection_mod.F90 (source / functions) Hit Total Coverage
Test: coverage.info Lines: 63 312 20.2 %
Date: 2025-01-13 21:54:50 Functions: 4 11 36.4 %

          Line data    Source code
       1             : #define OVERLAP 1
       2             : module prim_advection_mod
       3             : !
       4             : ! two formulations.  both are conservative
       5             : ! u grad Q formulation:
       6             : !
       7             : !    d/dt[ Q] +  U grad Q  = 0
       8             : !
       9             : !    d/dt[ dp/dn ] = div( dp/dn U )
      10             : !
      11             : ! total divergence formulation:
      12             : !    d/dt[dp/dn Q] +  div( U dp/dn Q ) = 0
      13             : !
      14             : ! for convience, rewrite this as dp Q:  (since dn does not depend on time or the horizonal):
      15             : ! equation is now:
      16             : !    d/dt[dp Q] +  div( U dp Q ) = 0
      17             : !
      18             : !
      19             :   use shr_kind_mod,           only: r8=>shr_kind_r8
      20             :   use dimensions_mod,         only: nlev, np, qsize, nc
      21             :   use physconst,              only: cpair
      22             :   use derivative_mod,         only: derivative_t
      23             :   use element_mod,            only: element_t
      24             :   use fvm_control_volume_mod, only: fvm_struct
      25             :   use hybvcoord_mod,          only: hvcoord_t
      26             :   use se_dyn_time_mod,        only: TimeLevel_t, TimeLevel_Qdp
      27             :   use control_mod,            only: nu_q, nu_p, limiter_option, hypervis_subcycle_q, rsplit
      28             :   use edge_mod,               only: edgevpack, edgevunpack, initedgebuffer, initedgesbuffer
      29             : 
      30             :   use edgetype_mod,           only: EdgeBuffer_t
      31             :   use hybrid_mod,             only: hybrid_t
      32             :   use viscosity_mod,          only: biharmonic_wk_scalar, neighbor_minmax, &
      33             :                                  neighbor_minmax_start, neighbor_minmax_finish
      34             :   use perf_mod,               only: t_startf, t_stopf, t_barrierf
      35             :   use cam_abortutils,         only: endrun
      36             :   use thread_mod,             only: horz_num_threads, vert_num_threads, tracer_num_threads
      37             : 
      38             :   implicit none
      39             : 
      40             :   private
      41             :   save
      42             : 
      43             :   public :: Prim_Advec_Init1, Prim_Advec_Init2
      44             :   public :: Prim_Advec_Tracers_remap
      45             :   public :: prim_advec_tracers_fvm
      46             :   public :: vertical_remap
      47             : 
      48             :   type (EdgeBuffer_t)      :: edgeAdv, edgeAdvp1, edgeAdvQminmax, edgeveloc
      49             : 
      50             :   integer,parameter :: DSSeta = 1
      51             :   integer,parameter :: DSSomega = 2
      52             :   integer,parameter :: DSSdiv_vdp_ave = 3
      53             :   integer,parameter :: DSSno_var = -1
      54             : 
      55             :   real(kind=r8), allocatable :: qmin(:,:,:), qmax(:,:,:)
      56             : 
      57             : !JMD I don't see why this needs to be thread private.
      58             : !JMD  type (derivative_t), public, allocatable   :: deriv(:) ! derivative struct (nthreads)
      59             :   type (derivative_t), public :: deriv
      60             : 
      61             : 
      62             : contains
      63             : 
      64             : 
      65        1536 :   subroutine Prim_Advec_Init1(par, elem)
      66             :     use dimensions_mod, only: nlev, qsize, nelemd,ntrac,use_cslam
      67             :     use parallel_mod,   only: parallel_t, boundaryCommMethod
      68             :     type(parallel_t)    :: par
      69             :     type (element_t)    :: elem(:)
      70             :     !
      71             :     ! Shared buffer pointers.
      72             :     ! Using "=> null()" in a subroutine is usually bad, because it makes
      73             :     ! the variable have an implicit "save", and therefore shared between
      74             :     ! threads. But in this case we want shared pointers.
      75             :     real(kind=r8), pointer :: buf_ptr(:) => null()
      76             :     real(kind=r8), pointer :: receive_ptr(:) => null()
      77             :     integer       :: advec_remap_num_threads
      78             : 
      79             : 
      80             :     !
      81             :     ! Set the number of threads used in the subroutine Prim_Advec_tracers_remap()
      82             :     !
      83        1536 :     if (use_cslam) then
      84             :        advec_remap_num_threads = 1
      85             :     else
      86           0 :        advec_remap_num_threads = tracer_num_threads
      87             :     endif
      88             :     ! this might be called with qsize=0
      89             :     ! allocate largest one first
      90             :     ! Currently this is never freed. If it was, only this first one should
      91             :     ! be freed, as only it knows the true size of the buffer.
      92        1536 :     if (.not.use_cslam) then
      93             :       call initEdgeBuffer(par,edgeAdvp1,elem,qsize*nlev + nlev,bndry_type=boundaryCommMethod,&
      94           0 :            nthreads=horz_num_threads*advec_remap_num_threads)
      95             :       call initEdgeBuffer(par,edgeAdv,elem,qsize*nlev,bndry_type=boundaryCommMethod, &
      96           0 :            nthreads=horz_num_threads*advec_remap_num_threads)
      97             :       ! This is a different type of buffer pointer allocation
      98             :       ! used for determine the minimum and maximum value from
      99             :       ! neighboring  elements
     100             :       call initEdgeSBuffer(par,edgeAdvQminmax,elem,qsize*nlev*2,bndry_type=boundaryCommMethod, &
     101           0 :            nthreads=horz_num_threads*advec_remap_num_threads)
     102             :     end if
     103        1536 :     call initEdgeBuffer(par,edgeveloc,elem,2*nlev,bndry_type=boundaryCommMethod)
     104             : 
     105             : 
     106             :     ! Don't actually want these saved, if this is ever called twice.
     107        1536 :     nullify(buf_ptr)
     108        1536 :     nullify(receive_ptr)
     109             : 
     110             : 
     111             :     ! this static array is shared by all threads, so dimension for all threads (nelemd), not nets:nete:
     112        6144 :     allocate (qmin(nlev,qsize,nelemd))
     113        4608 :     allocate (qmax(nlev,qsize,nelemd))
     114             : 
     115        1536 :   end subroutine Prim_Advec_Init1
     116             : 
     117        1536 :   subroutine Prim_Advec_Init2(fvm_corners, fvm_points)
     118             :     use dimensions_mod, only : nc
     119             :     use derivative_mod, only : derivinit
     120             : 
     121             :     real(kind=r8), intent(in) :: fvm_corners(nc+1)
     122             :     real(kind=r8), intent(in) :: fvm_points(nc)
     123             : 
     124             :     ! ==================================
     125             :     ! Initialize derivative structure
     126             :     ! ==================================
     127        1536 :     call derivinit(deriv,fvm_corners, fvm_points)
     128        1536 :   end subroutine Prim_Advec_Init2
     129             : 
     130             :   !
     131             :   ! fvm driver
     132             :   !
     133      738816 :   subroutine Prim_Advec_Tracers_fvm(elem,fvm,hvcoord,hybrid,&
     134             :         dt,tl,nets,nete,ghostbufQnhc,ghostBufQ1, ghostBufFlux,kmin,kmax)
     135        1536 :     use fvm_consistent_se_cslam, only: run_consistent_se_cslam
     136             :     use edgetype_mod,            only: edgebuffer_t
     137             :     implicit none
     138             :     type (element_t), intent(inout)   :: elem(:)
     139             :     type (fvm_struct), intent(inout)  :: fvm(:)
     140             :     type (hvcoord_t)                  :: hvcoord
     141             :     type (hybrid_t),        intent(in):: hybrid
     142             :     type (TimeLevel_t)                :: tl
     143             : 
     144             :     real(kind=r8) , intent(in) :: dt
     145             :     integer,intent(in)                :: nets,nete,kmin,kmax
     146             :     type (EdgeBuffer_t), intent(inout):: ghostbufQnhc,ghostBufQ1, ghostBufFlux
     147             : 
     148      738816 :     call t_barrierf('sync_prim_advec_tracers_fvm', hybrid%par%comm)
     149      738816 :     call t_startf('prim_advec_tracers_fvm')
     150             : 
     151      738816 :     if (rsplit==0) call endrun('cslam only works for rsplit>0')
     152             : 
     153             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     154             :     ! 2D advection step
     155             :     !
     156             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     157             :     call run_consistent_se_cslam(elem,fvm,hybrid,dt,tl,nets,nete,hvcoord,&
     158      738816 :            ghostbufQnhc,ghostBufQ1, ghostBufFlux,kmin,kmax)
     159      738816 :     call t_stopf('prim_advec_tracers_fvm')
     160      738816 :   end subroutine Prim_Advec_Tracers_fvm
     161             : 
     162             : 
     163             : 
     164             : !=================================================================================================!
     165             : 
     166           0 :   subroutine Prim_Advec_Tracers_remap( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
     167             :     implicit none
     168             :     type (element_t)     , intent(inout) :: elem(:)
     169             :     type (derivative_t)  , intent(in   ) :: deriv
     170             :     type (hvcoord_t)     , intent(in   ) :: hvcoord
     171             :     type (hybrid_t)      , intent(in   ) :: hybrid
     172             :     real(kind=r8) , intent(in   ) :: dt
     173             :     type (TimeLevel_t)   , intent(inout) :: tl
     174             :     integer              , intent(in   ) :: nets
     175             :     integer              , intent(in   ) :: nete
     176             : 
     177             : 
     178             :     !print *,'prim_Advec_Tracers_remap: qsize: ',qsize
     179           0 :     call Prim_Advec_Tracers_remap_rk2( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
     180      738816 :   end subroutine Prim_Advec_Tracers_remap
     181             : 
     182             : 
     183           0 :   subroutine euler_step_driver(np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , nets , nete , DSSopt , rhs_multiplier )
     184             : 
     185             : 
     186             :   integer              , intent(in   )         :: np1_qdp, n0_qdp
     187             :   real (kind=r8), intent(in   )         :: dt
     188             :   type (element_t)     , intent(inout)         :: elem(:)
     189             :   type (hvcoord_t)     , intent(in   )         :: hvcoord
     190             :   type (hybrid_t)      , intent(in   )         :: hybrid
     191             :   type (derivative_t)  , intent(in   )         :: deriv
     192             :   integer              , intent(in   )         :: nets
     193             :   integer              , intent(in   )         :: nete
     194             :   integer              , intent(in   )         :: DSSopt
     195             :   integer              , intent(in   )         :: rhs_multiplier
     196             : 
     197           0 :   call euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , nets , nete , DSSopt , rhs_multiplier)
     198             : 
     199           0 :   end subroutine euler_step_driver
     200             : 
     201             : !-----------------------------------------------------------------------------
     202             : !-----------------------------------------------------------------------------
     203             : ! forward-in-time 2 level vertically lagrangian step
     204             : !  this code takes a lagrangian step in the horizontal
     205             : ! (complete with DSS), and then applies a vertical remap
     206             : !
     207             : ! This routine may use dynamics fields at timelevel np1
     208             : ! In addition, other fields are required, which have to be
     209             : ! explicitly saved by the dynamics:  (in elem(ie)%derived struct)
     210             : !
     211             : ! Fields required from dynamics: (in
     212             : !    omega   it will be DSS'd here, for later use by CAM physics
     213             : !              we DSS omega here because it can be done for "free"
     214             : !    Consistent mass/tracer-mass advection (used if subcycling turned on)
     215             : !       dp()   dp at timelevel n0
     216             : !       vn0()  mean flux  < U dp > going from n0 to np1
     217             : !
     218             : ! 3 stage
     219             : !    Euler step from t     -> t+.5
     220             : !    Euler step from t+.5  -> t+1.0
     221             : !    Euler step from t+1.0 -> t+1.5
     222             : !    u(t) = u(t)/3 + u(t+2)*2/3
     223             : !
     224             : !-----------------------------------------------------------------------------
     225             : !-----------------------------------------------------------------------------
     226           0 :   subroutine Prim_Advec_Tracers_remap_rk2( elem , deriv , hvcoord , hybrid , dt , tl , nets , nete )
     227             :     use derivative_mod, only: divergence_sphere
     228             :     use control_mod   , only: qsplit
     229             :     use hybrid_mod    , only: get_loop_ranges!, PrintHybrid
     230             : !    use thread_mod    , only : omp_set_num_threads, omp_get_thread_num
     231             : 
     232             :     type (element_t)     , intent(inout) :: elem(:)
     233             :     type (derivative_t)  , intent(in   ) :: deriv
     234             :     type (hvcoord_t)     , intent(in   ) :: hvcoord
     235             :     type (hybrid_t)      , intent(in   ) :: hybrid
     236             :     real(kind=r8) , intent(in   ) :: dt
     237             :     type (TimeLevel_t)   , intent(inout) :: tl
     238             :     integer              , intent(in   ) :: nets
     239             :     integer              , intent(in   ) :: nete
     240             : 
     241             :     real (kind=r8), dimension(np,np,2     ) :: gradQ
     242             :     integer :: k,ie
     243             :     integer :: rkstage,rhs_multiplier
     244             :     integer :: n0_qdp, np1_qdp
     245             :     integer :: kbeg,kend,qbeg,qend
     246             : 
     247             : !    call t_barrierf('sync_prim_advec_tracers_remap_k2', hybrid%par%comm)
     248             : !    call t_startf('prim_advec_tracers_remap_rk2')
     249             : !    call extrae_user_function(1)
     250           0 :     call TimeLevel_Qdp( tl, qsplit, n0_qdp, np1_qdp) !time levels for qdp are not the same
     251           0 :     rkstage = 3 !   3 stage RKSSP scheme, with optimal SSP CFL
     252             : 
     253             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     254             :     ! RK2 2D advection step
     255             :     ! note: stage 3 we take the oppertunity to DSS omega
     256             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     257             :     ! use these for consistent advection (preserve Q=1)
     258             :     ! derived%vdp_ave        =  mean horiz. flux:   U*dp
     259             :     ! derived%omega         =  advection code will DSS this for the physics, but otherwise
     260             :     !                            it is not needed
     261             :     ! Also: save a copy of div(U dp) in derived%div(:,:,:,1), which will be DSS'd
     262             :     !       and a DSS'ed version stored in derived%div(:,:,:,2)
     263             : 
     264           0 :     call get_loop_ranges(hybrid,kbeg=kbeg,kend=kend,qbeg=qbeg,qend=qend)
     265             : 
     266           0 :     do ie=nets,nete
     267           0 :       do k=kbeg,kend
     268             :         ! div( U dp Q),
     269           0 :         gradQ(:,:,1)=elem(ie)%derived%vn0(:,:,1,k)
     270           0 :         gradQ(:,:,2)=elem(ie)%derived%vn0(:,:,2,k)
     271             :         ! elem(ie)%derived%divdp(:,:,k) = divergence_sphere(gradQ,deriv,elem(ie))
     272           0 :         call divergence_sphere(gradQ,deriv,elem(ie),elem(ie)%derived%divdp(:,:,k))
     273           0 :         elem(ie)%derived%divdp_proj(:,:,k) = elem(ie)%derived%divdp(:,:,k)
     274             :       enddo
     275             :     enddo
     276             : 
     277             : 
     278             :     !rhs_multiplier is for obtaining dp_tracers at each stage:
     279             :     !dp_tracers(stage) = dp - rhs_multiplier*dt*divdp_proj
     280             : !    call t_startf('euler_step')
     281             : 
     282           0 :     rhs_multiplier = 0
     283           0 :     call euler_step_driver( np1_qdp, n0_qdp , dt/2, elem, hvcoord, hybrid, deriv, nets, nete, DSSdiv_vdp_ave, rhs_multiplier )
     284             : 
     285           0 :     rhs_multiplier = 1
     286           0 :     call euler_step_driver( np1_qdp, np1_qdp, dt/2, elem, hvcoord, hybrid, deriv, nets, nete, DSSno_var      , rhs_multiplier )
     287             : 
     288           0 :     rhs_multiplier = 2
     289           0 :     call euler_step_driver( np1_qdp, np1_qdp, dt/2, elem, hvcoord, hybrid, deriv, nets, nete, DSSomega      , rhs_multiplier )
     290             : 
     291             : !    call t_stopf ('euler_step')
     292             : 
     293             :     !to finish the 2D advection step, we need to average the t and t+2 results to get a second order estimate for t+1.
     294           0 :     call qdp_time_avg( elem , rkstage , n0_qdp , np1_qdp , hybrid, nets , nete )
     295             : 
     296             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     297             :     !  Dissipation
     298             :     !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     299           0 :     if ( limiter_option == 8  ) then
     300             :       ! dissipation was applied in RHS.
     301             :     else
     302           0 :       call advance_hypervis_scalar(edgeadv,elem,hvcoord,hybrid,deriv,tl%np1,np1_qdp,nets,nete,dt)
     303             :     endif
     304             : !    call extrae_user_function(0)
     305             : 
     306             : !    call t_stopf('prim_advec_tracers_remap_rk2')
     307             : 
     308           0 :   end subroutine prim_advec_tracers_remap_rk2
     309             : 
     310             : !-----------------------------------------------------------------------------
     311             : !-----------------------------------------------------------------------------
     312             : 
     313           0 :   subroutine qdp_time_avg( elem , rkstage , n0_qdp , np1_qdp , hybrid , nets , nete )
     314           0 :     use hybrid_mod, only : hybrid_t, get_loop_ranges
     315             :     implicit none
     316             :     type(element_t)     , intent(inout) :: elem(:)
     317             :     integer             , intent(in   ) :: rkstage , n0_qdp , np1_qdp , nets , nete
     318             :     type(hybrid_t) :: hybrid
     319             :     integer :: i,j,ie,q,k
     320             :     integer :: kbeg,kend,qbeg,qend
     321             :     real(kind=r8) :: rrkstage
     322             : 
     323           0 :     call get_loop_ranges(hybrid,kbeg=kbeg,kend=kend,qbeg=qbeg,qend=qend)
     324             : 
     325           0 :     rrkstage=1.0_r8/real(rkstage,kind=r8)
     326           0 :     do ie=nets,nete
     327           0 :       do q=qbeg,qend
     328           0 :         do k=kbeg,kend
     329             :           !OMP_COLLAPSE_SIMD
     330             :           !DIR_VECTOR_ALIGNED
     331           0 :           do j=1,np
     332           0 :           do i=1,np
     333           0 :            elem(ie)%state%Qdp(i,j,k,q,np1_qdp) =               &
     334           0 :                rrkstage *( elem(ie)%state%Qdp(i,j,k,q,n0_qdp) + &
     335           0 :                (rkstage-1)*elem(ie)%state%Qdp(i,j,k,q,np1_qdp) )
     336             :           enddo
     337             :           enddo
     338             :         enddo
     339             :       enddo
     340             :     enddo
     341           0 :   end subroutine qdp_time_avg
     342             : 
     343             : !-----------------------------------------------------------------------------
     344             : !-----------------------------------------------------------------------------
     345             : 
     346           0 :   subroutine euler_step( np1_qdp , n0_qdp , dt , elem , hvcoord , hybrid , deriv , nets , nete , DSSopt , rhs_multiplier )
     347             :   ! ===================================
     348             :   ! This routine is the basic foward
     349             :   ! euler component used to construct RK SSP methods
     350             :   !
     351             :   !           u(np1) = u(n0) + dt2*DSS[ RHS(u(n0)) ]
     352             :   !
     353             :   ! n0 can be the same as np1.
     354             :   !
     355             :   ! DSSopt = DSSeta or DSSomega:   also DSS omega
     356             :   !
     357             :   ! ===================================
     358             :   use dimensions_mod , only: np, nlev
     359             :   use hybrid_mod     , only: hybrid_t!, PrintHybrid
     360             :   use hybrid_mod     , only: get_loop_ranges, threadOwnsTracer
     361             :   use element_mod    , only: element_t
     362             :   use derivative_mod , only: derivative_t, divergence_sphere, limiter_optim_iter_full
     363             :   use edge_mod       , only: edgevpack, edgevunpack
     364             :   use bndry_mod      , only: bndry_exchange
     365             :   use hybvcoord_mod  , only: hvcoord_t
     366             : 
     367             :   integer              , intent(in   )         :: np1_qdp, n0_qdp
     368             :   real (kind=r8), intent(in   )         :: dt
     369             :   type (element_t)     , intent(inout), target :: elem(:)
     370             :   type (hvcoord_t)     , intent(in   )         :: hvcoord
     371             :   type (hybrid_t)      , intent(in   )         :: hybrid
     372             :   type (derivative_t)  , intent(in   )         :: deriv
     373             :   integer              , intent(in   )         :: nets
     374             :   integer              , intent(in   )         :: nete
     375             :   integer              , intent(in   )         :: DSSopt
     376             :   integer              , intent(in   )         :: rhs_multiplier
     377             : 
     378             :   ! local
     379             :   real(kind=r8), dimension(np,np                       ) :: dpdiss
     380             :   real(kind=r8), dimension(np,np,nlev)                   :: dpdissk
     381             :   real(kind=r8), dimension(np,np,2                     ) :: gradQ
     382             :   real(kind=r8), dimension(np,np,2,nlev                ) :: Vstar
     383             :   real(kind=r8), dimension(np,np  ,nlev                ) :: Qtens
     384             :   real(kind=r8), dimension(np,np  ,nlev                ) :: dp
     385           0 :   real(kind=r8), dimension(np,np  ,nlev,qsize,nets:nete) :: Qtens_biharmonic
     386             :   real(kind=r8), dimension(np,np)                        :: div
     387           0 :   real(kind=r8), pointer, dimension(:,:,:)               :: DSSvar
     388             :   real(kind=r8) :: dp0(nlev)
     389             :   integer :: ie,q,i,j,k, kptr
     390             :   integer :: rhs_viss = 0
     391             :   integer :: kblk,qblk   ! The per thead size of the vertical and tracers
     392             :   integer :: kbeg, kend, qbeg, qend
     393             : 
     394           0 :   call get_loop_ranges(hybrid,kbeg=kbeg,kend=kend,qbeg=qbeg,qend=qend)
     395             : 
     396           0 :   kblk = kend - kbeg + 1   ! calculate size of the block of vertical levels
     397           0 :   qblk = qend - qbeg + 1   ! calculate size of the block of tracers
     398             : 
     399           0 :   do k = kbeg, kend
     400           0 :     dp0(k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) )*hvcoord%ps0 + &
     401           0 :           ( hvcoord%hybi(k+1) - hvcoord%hybi(k) )*hvcoord%ps0
     402             :   enddo
     403             : 
     404             : !  call t_startf('euler_step')
     405             : 
     406             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     407             :   !   compute Q min/max values for lim8
     408             :   !   compute biharmonic mixing term f
     409             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     410           0 :   rhs_viss = 0
     411           0 :   if ( limiter_option == 8  ) then
     412             :     ! when running lim8, we also need to limit the biharmonic, so that term needs
     413             :     ! to be included in each euler step.  three possible algorithms here:
     414             :     ! 1) most expensive:
     415             :     !     compute biharmonic (which also computes qmin/qmax) during all 3 stages
     416             :     !     be sure to set rhs_viss=1
     417             :     !     cost:  3 biharmonic steps with 3 DSS
     418             :     !
     419             :     ! 2) cheapest:
     420             :     !     compute biharmonic (which also computes qmin/qmax) only on first stage
     421             :     !     be sure to set rhs_viss=3
     422             :     !     reuse qmin/qmax for all following stages (but update based on local qmin/qmax)
     423             :     !     cost:  1 biharmonic steps with 1 DSS
     424             :     !     main concern:  viscosity
     425             :     !
     426             :     ! 3)  compromise:
     427             :     !     compute biharmonic (which also computes qmin/qmax) only on last stage
     428             :     !     be sure to set rhs_viss=3
     429             :     !     compute qmin/qmax directly on first stage
     430             :     !     reuse qmin/qmax for 2nd stage stage (but update based on local qmin/qmax)
     431             :     !     cost:  1 biharmonic steps, 2 DSS
     432             :     !
     433             :     !  NOTE  when nu_p=0 (no dissipation applied in dynamics to dp equation), we should
     434             :     !        apply dissipation to Q (not Qdp) to preserve Q=1
     435             :     !        i.e.  laplace(Qdp) ~  dp0 laplace(Q)
     436             :     !        for nu_p=nu_q>0, we need to apply dissipation to Q * diffusion_dp
     437             :     !
     438             :     ! initialize dp, and compute Q from Qdp (and store Q in Qtens_biharmonic)
     439           0 :     do ie = nets, nete
     440             :       ! add hyperviscosity to RHS.  apply to Q at timelevel n0, Qdp(n0)/dp
     441           0 :       do k = kbeg, kend
     442             :         !OMP_COLLAPSE_SIMD
     443             :         !DIR_VECTOR_ALIGNED
     444           0 :         do j=1,np
     445           0 :         do i=1,np
     446           0 :           dp(i,j,k) = elem(ie)%derived%dp(i,j,k) - rhs_multiplier*dt*elem(ie)%derived%divdp_proj(i,j,k)
     447             :         enddo
     448             :         enddo
     449             :       enddo
     450             :       !JMD need to update loop based on changes in dungeon21 tag
     451           0 :       do q = qbeg, qend
     452           0 :         do k= kbeg, kend
     453           0 :           Qtens_biharmonic(:,:,k,q,ie) = elem(ie)%state%Qdp(:,:,k,q,n0_qdp)/dp(:,:,k)
     454           0 :           if ( rhs_multiplier == 1 ) then
     455           0 :               qmin(k,q,ie)=min(qmin(k,q,ie),minval(Qtens_biharmonic(:,:,k,q,ie)))
     456           0 :               qmax(k,q,ie)=max(qmax(k,q,ie),maxval(Qtens_biharmonic(:,:,k,q,ie)))
     457             :           else
     458           0 :               qmin(k,q,ie)=minval(Qtens_biharmonic(:,:,k,q,ie))
     459           0 :               qmax(k,q,ie)=maxval(Qtens_biharmonic(:,:,k,q,ie))
     460             :           endif
     461             :         enddo
     462             :       enddo
     463             :     enddo
     464             : 
     465             :     ! compute element qmin/qmax
     466           0 :     if ( rhs_multiplier == 0 ) then
     467             :       ! update qmin/qmax based on neighbor data for lim8
     468             : !      call t_startf('euler_neighbor_minmax1')
     469           0 :       call neighbor_minmax(hybrid,edgeAdvQminmax,nets,nete,qmin(:,:,nets:nete),qmax(:,:,nets:nete))
     470             : !      call t_stopf('euler_neighbor_minmax1')
     471             :     endif
     472             : 
     473             :     ! get niew min/max values, and also compute biharmonic mixing term
     474           0 :     if ( rhs_multiplier == 2 ) then
     475           0 :       rhs_viss = 3
     476             :       ! two scalings depending on nu_p:
     477             :       ! nu_p=0:    qtens_biharmonic *= dp0                   (apply viscsoity only to q)
     478             :       ! nu_p>0):   qtens_biharmonc *= elem()%psdiss_ave      (for consistency, if nu_p=nu_q)
     479           0 :       if ( nu_p > 0 ) then
     480           0 :         do ie = nets, nete
     481           0 :           do k = kbeg, kend
     482             :             !OMP_COLLAPSE_SIMD
     483             :             !DIR_VECTOR_ALIGNED
     484           0 :             do j=1,np
     485           0 :             do i=1,np
     486           0 :                dpdissk(i,j,k) = elem(ie)%derived%dpdiss_ave(i,j,k)/dp0(k)
     487             :             enddo
     488             :             enddo
     489             :           enddo
     490           0 :           do q = qbeg,qend
     491           0 :             do k = kbeg, kend
     492             :               ! NOTE: divide by dp0 since we multiply by dp0 below
     493             :               !OMP_COLLAPSE_SIMD
     494             :               !DIR_VECTOR_ALIGNED
     495           0 :               do j=1,np
     496           0 :               do i=1,np
     497           0 :                  Qtens_biharmonic(i,j,k,q,ie)=Qtens_biharmonic(i,j,k,q,ie)*dpdissk(i,j,k)
     498             :               enddo
     499             :               enddo
     500             :             enddo
     501             :           enddo
     502             :         enddo
     503             :       endif
     504             : 
     505             : !   Previous version of biharmonic_wk_scalar_minmax included a min/max
     506             : !   calculation into the boundary exchange.  This was causing cache issues.
     507             : !   Split the single operation into two separate calls
     508             : !      call neighbor_minmax()
     509             : !      call biharmonic_wk_scalar()
     510             : !
     511             : #ifdef OVERLAP
     512           0 :       call neighbor_minmax_start(hybrid,edgeAdvQminmax,nets,nete,qmin(:,:,nets:nete),qmax(:,:,nets:nete))
     513           0 :       call biharmonic_wk_scalar(elem,qtens_biharmonic,deriv,edgeAdv,hybrid,nets,nete)
     514           0 :       do ie = nets, nete
     515           0 :         do q = qbeg, qend
     516           0 :           do k = kbeg, kend
     517             :             !OMP_COLLAPSE_SIMD
     518             :             !DIR_VECTOR_ALIGNED
     519           0 :             do j=1,np
     520           0 :             do i=1,np
     521             :                ! note: biharmonic_wk() output has mass matrix already applied. Un-apply since we apply again below:
     522           0 :                qtens_biharmonic(i,j,k,q,ie) = &
     523           0 :                      -rhs_viss*dt*nu_q*dp0(k)*Qtens_biharmonic(i,j,k,q,ie) / elem(ie)%spheremp(i,j)
     524             :             enddo
     525             :             enddo
     526             :           enddo
     527             :         enddo
     528             :       enddo
     529           0 :       call neighbor_minmax_finish(hybrid,edgeAdvQminmax,nets,nete,qmin(:,:,nets:nete),qmax(:,:,nets:nete))
     530             : #else
     531             :       call t_startf('euler_neighbor_minmax2')
     532             :       call neighbor_minmax(hybrid,edgeAdvQminmax,nets,nete,qmin(:,:,nets:nete),qmax(:,:,nets:nete))
     533             :       call t_stopf('euler_neighbor_minmax2')
     534             :       call biharmonic_wk_scalar(elem,qtens_biharmonic,deriv,edgeAdv,hybrid,nets,nete)
     535             : 
     536             :       do ie = nets, nete
     537             :         do q = qbeg, qend
     538             :           do k = kbeg, kend
     539             :             !OMP_COLLAPSE_SIMD
     540             :             !DIR_VECTOR_ALIGNED
     541             :             do j=1,np
     542             :             do i=1,np
     543             :                ! note: biharmonic_wk() output has mass matrix already applied. Un-apply since we apply again below:
     544             :                qtens_biharmonic(i,j,k,q,ie) = &
     545             :                      -rhs_viss*dt*nu_q*dp0(k)*Qtens_biharmonic(i,j,k,q,ie) / elem(ie)%spheremp(i,j)
     546             :             enddo
     547             :             enddo
     548             :           enddo
     549             :         enddo
     550             :       enddo
     551             : #endif
     552             : 
     553             : 
     554             :     endif
     555             :   endif  ! compute biharmonic mixing term and qmin/qmax
     556             : 
     557             : 
     558             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     559             :   !   2D Advection step
     560             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     561           0 :   do ie = nets, nete
     562             : 
     563             : 
     564             :     ! Compute velocity used to advance Qdp
     565           0 :     do k = kbeg, kend
     566             :       ! derived variable divdp_proj() (DSS'd version of divdp) will only be correct on 2nd and 3rd stage
     567             :       ! but that's ok because rhs_multiplier=0 on the first stage:
     568             :       !OMP_COLLAPSE_SIMD
     569             :       !DIR_VECTOR_ALIGNED
     570           0 :       do j=1,np
     571           0 :       do i=1,np
     572           0 :          dp(i,j,k) = elem(ie)%derived%dp(i,j,k) - rhs_multiplier * dt * elem(ie)%derived%divdp_proj(i,j,k)
     573           0 :          Vstar(i,j,1,k) = elem(ie)%derived%vn0(i,j,1,k) / dp(i,j,k)
     574           0 :          Vstar(i,j,2,k) = elem(ie)%derived%vn0(i,j,2,k) / dp(i,j,k)
     575             :       enddo
     576             :       enddo
     577             :     enddo
     578           0 :     if ( limiter_option == 8) then
     579             :         ! Note that the term dpdissk is independent of Q
     580           0 :         do k = kbeg, kend
     581             :           ! UN-DSS'ed dp at timelevel n0+1:
     582             :           !OMP_COLLAPSE_SIMD
     583             :           !DIR_VECTOR_ALIGNED
     584           0 :           do j=1,np
     585           0 :           do i=1,np
     586           0 :              dpdissk(i,j,k) = dp(i,j,k) - dt * elem(ie)%derived%divdp(i,j,k)
     587             :           enddo
     588             :           enddo
     589           0 :           if ( nu_p > 0 .and. rhs_viss /= 0 ) then
     590             :             ! add contribution from UN-DSS'ed PS dissipation
     591             : !            dpdiss(:,:) = ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) *
     592             : !            elem(ie)%derived%psdiss_biharmonic(:,:)
     593             :             !OMP_COLLAPSE_SIMD
     594             :             !DIR_VECTOR_ALIGNED
     595           0 :             do j=1,np
     596           0 :             do i=1,np
     597           0 :                dpdiss(i,j) = elem(ie)%derived%dpdiss_biharmonic(i,j,k)
     598           0 :                dpdissk(i,j,k) = dpdissk(i,j,k) - rhs_viss * dt * nu_q * dpdiss(i,j) / elem(ie)%spheremp(i,j)
     599             :             enddo
     600             :             enddo
     601             :           endif
     602             :           ! IMPOSE ZERO THRESHOLD.  do this here so it can be turned off for
     603             :           ! testing
     604           0 :           do q=qbeg, qend
     605           0 :              qmin(k,q,ie)=max(qmin(k,q,ie),0.0_r8)
     606             :           enddo
     607             :         enddo
     608             :     endif  ! limiter == 8
     609             : 
     610             : 
     611             :     ! advance Qdp
     612           0 :     do q = qbeg, qend
     613           0 :       do k = kbeg, kend
     614             :         ! div( U dp Q),
     615             :         !OMP_COLLAPSE_SIMD
     616             :         !DIR_VECTOR_ALIGNED
     617           0 :         do j=1,np
     618           0 :         do i=1,np
     619           0 :            gradQ(i,j,1) = Vstar(i,j,1,k) * elem(ie)%state%Qdp(i,j,k,q,n0_qdp)
     620           0 :            gradQ(i,j,2) = Vstar(i,j,2,k) * elem(ie)%state%Qdp(i,j,k,q,n0_qdp)
     621             :         enddo
     622             :         enddo
     623             :         ! Qtens(:,:,k) = elem(ie)%state%Qdp(:,:,k,q,n0_qdp) - &
     624             :         !               dt * divergence_sphere( gradQ , deriv , elem(ie) )
     625           0 :         call divergence_sphere( gradQ , deriv , elem(ie),div )
     626             : 
     627             :         !OMP_COLLAPSE_SIMD
     628             :         !DIR_VECTOR_ALIGNED
     629           0 :         do j=1,np
     630           0 :           do i=1,np
     631           0 :             Qtens(i,j,k) = elem(ie)%state%Qdp(i,j,k,q,n0_qdp) - dt * div(i,j)
     632             :           enddo
     633             :         enddo
     634             : 
     635             :         ! optionally add in hyperviscosity computed above:
     636           0 :         if ( rhs_viss /= 0 ) then
     637             :           !OMP_COLLAPSE_SIMD
     638             :           !DIR_VECTOR_ALIGNED
     639           0 :           do j=1,np
     640           0 :           do i=1,np
     641           0 :               Qtens(i,j,k) = Qtens(i,j,k) + Qtens_biharmonic(i,j,k,q,ie)
     642             :           enddo
     643             :           enddo
     644             :         endif
     645             :       enddo
     646             : 
     647           0 :       if ( limiter_option == 8) then
     648             :         ! apply limiter to Q = Qtens / dp_star
     649           0 :         call limiter_optim_iter_full( Qtens(:,:,:) , elem(ie)%spheremp(:,:) , qmin(:,q,ie) , &
     650           0 :                                                           qmax(:,q,ie) , dpdissk, kbeg, kend )
     651             :       endif
     652             : 
     653             : 
     654             :       ! apply mass matrix, overwrite np1 with solution:
     655             :       ! dont do this earlier, since we allow np1_qdp == n0_qdp
     656             :       ! and we dont want to overwrite n0_qdp until we are done using it
     657           0 :       do k = kbeg, kend
     658             :         !OMP_COLLAPSE_SIMD
     659             :         !DIR_VECTOR_ALIGNED
     660           0 :         do j=1,np
     661           0 :         do i=1,np
     662           0 :             elem(ie)%state%Qdp(i,j,k,q,np1_qdp) = elem(ie)%spheremp(i,j) * Qtens(i,j,k)
     663             :         enddo
     664             :         enddo
     665             :       enddo
     666             : 
     667           0 :       if ( limiter_option == 4 ) then
     668             :         !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
     669             :         ! sign-preserving limiter, applied after mass matrix
     670             :         !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!1
     671             : !JMD !$OMP BARRIER
     672             : !JMD !$OMP MASTER
     673           0 :         call limiter2d_zero(elem(ie)%state%Qdp(:,:,:,q,np1_qdp))
     674             : !JMD !$OMP END MASTER
     675             : !JMD !$OMP BARRIER
     676             :       endif
     677             : 
     678           0 :       kptr = nlev*(q-1) + kbeg - 1
     679           0 :       call edgeVpack(edgeAdvp1 , elem(ie)%state%Qdp(:,:,kbeg:kend,q,np1_qdp) , kblk , kptr , ie )
     680             :     enddo
     681             :     ! only perform this operation on thread which owns the first tracer
     682           0 :     if (DSSopt>0) then
     683           0 :       if (threadOwnsTracer(hybrid,1)) then
     684             :         ! all zero so we only have to DSS 1:nlev
     685           0 :         if ( DSSopt == DSSomega       ) DSSvar => elem(ie)%derived%omega(:,:,:)
     686           0 :         if ( DSSopt == DSSdiv_vdp_ave ) DSSvar => elem(ie)%derived%divdp_proj(:,:,:)
     687             :         ! also DSS extra field
     688           0 :         do k = kbeg, kend
     689             :           !OMP_COLLAPSE_SIMD
     690             :           !DIR_VECTOR_ALIGNED
     691           0 :           do j=1,np
     692           0 :           do i=1,np
     693           0 :              DSSvar(i,j,k) = elem(ie)%spheremp(i,j) * DSSvar(i,j,k)
     694             :           enddo
     695             :           enddo
     696             :         enddo
     697           0 :         kptr = nlev*qsize + kbeg - 1
     698           0 :         call edgeVpack( edgeAdvp1 , DSSvar(:,:,kbeg:kend), kblk, kptr, ie)
     699             :       endif
     700             :     end if
     701             :   enddo
     702             : 
     703           0 :   call bndry_exchange( hybrid , edgeAdvp1,location='edgeAdvp1')
     704             : 
     705           0 :   do ie = nets, nete
     706             :     ! only perform this operation on thread which owns the first tracer
     707           0 :     if (DSSopt>0) then
     708           0 :       if(threadOwnsTracer(hybrid,1)) then
     709           0 :         if ( DSSopt == DSSomega       ) DSSvar => elem(ie)%derived%omega(:,:,:)
     710           0 :         if ( DSSopt == DSSdiv_vdp_ave ) DSSvar => elem(ie)%derived%divdp_proj(:,:,:)
     711           0 :         kptr = qsize*nlev + kbeg -1
     712           0 :         call edgeVunpack( edgeAdvp1 , DSSvar(:,:,kbeg:kend) , kblk , kptr , ie )
     713           0 :         do k = kbeg, kend
     714             :            !OMP_COLLAPSE_SIMD
     715             :            !DIR_VECTOR_ALIGNED
     716           0 :            do j=1,np
     717           0 :            do i=1,np
     718           0 :                DSSvar(i,j,k) = DSSvar(i,j,k) * elem(ie)%rspheremp(i,j)
     719             :            enddo
     720             :            enddo
     721             :         enddo
     722             :       endif
     723             :     end if
     724           0 :     do q = qbeg, qend
     725           0 :       kptr = nlev*(q-1) + kbeg - 1
     726           0 :       call edgeVunpack( edgeAdvp1 , elem(ie)%state%Qdp(:,:,kbeg:kend,q,np1_qdp) , kblk , kptr , ie )
     727           0 :         do k = kbeg, kend
     728             :           !OMP_COLLAPSE_SIMD
     729             :           !DIR_VECTOR_ALIGNED
     730           0 :           do j=1,np
     731           0 :           do i=1,np
     732           0 :              elem(ie)%state%Qdp(i,j,k,q,np1_qdp) = elem(ie)%rspheremp(i,j) * elem(ie)%state%Qdp(i,j,k,q,np1_qdp)
     733             :           enddo
     734             :           enddo
     735             :         enddo
     736             :     enddo
     737             :   enddo
     738             : !  call t_stopf('euler_step')
     739             : 
     740           0 :   end subroutine euler_step
     741             : 
     742             : 
     743             : 
     744           0 :   subroutine limiter2d_zero(Q)
     745             :   ! mass conserving zero limiter (2D only).  to be called just before DSS
     746             :   !
     747             :   ! this routine is called inside a DSS loop, and so Q had already
     748             :   ! been multiplied by the mass matrix.  Thus dont include the mass
     749             :   ! matrix when computing the mass = integral of Q over the element
     750             :   !
     751             :   ! ps is only used when advecting Q instead of Qdp
     752             :   ! so ps should be at one timelevel behind Q
     753             :   implicit none
     754             :   real (kind=r8), intent(inout) :: Q(np,np,nlev)
     755             : 
     756             :   ! local
     757             : !  real (kind=r8) :: dp(np,np)
     758             :   real (kind=r8) :: mass,mass_new,ml
     759             :   integer i,j,k
     760             : 
     761           0 :   do k = nlev , 1 , -1
     762             :     mass = 0
     763           0 :     do j = 1 , np
     764           0 :       do i = 1 , np
     765             :         !ml = Q(i,j,k)*dp(i,j)*spheremp(i,j)  ! see above
     766           0 :         ml = Q(i,j,k)
     767           0 :         mass = mass + ml
     768             :       enddo
     769             :     enddo
     770             : 
     771             :     ! negative mass.  so reduce all postive values to zero
     772             :     ! then increase negative values as much as possible
     773           0 :     if ( mass < 0 ) Q(:,:,k) = -Q(:,:,k)
     774             :     mass_new = 0
     775           0 :     do j = 1 , np
     776           0 :       do i = 1 , np
     777           0 :         if ( Q(i,j,k) < 0 ) then
     778           0 :           Q(i,j,k) = 0
     779             :         else
     780           0 :           ml = Q(i,j,k)
     781           0 :           mass_new = mass_new + ml
     782             :         endif
     783             :       enddo
     784             :     enddo
     785             : 
     786             :     ! now scale the all positive values to restore mass
     787           0 :     if ( mass_new > 0 ) Q(:,:,k) = Q(:,:,k) * abs(mass) / mass_new
     788           0 :     if ( mass     < 0 ) Q(:,:,k) = -Q(:,:,k)
     789             :   enddo
     790           0 :   end subroutine limiter2d_zero
     791             : 
     792             : !-----------------------------------------------------------------------------
     793             : !-----------------------------------------------------------------------------
     794             : 
     795           0 :   subroutine advance_hypervis_scalar( edgeAdv , elem , hvcoord , hybrid , deriv , nt , nt_qdp , nets , nete , dt2 )
     796             :   !  hyperviscsoity operator for foward-in-time scheme
     797             :   !  take one timestep of:
     798             :   !          Q(:,:,:,np) = Q(:,:,:,np) +  dt2*nu*laplacian**order ( Q )
     799             :   !
     800             :   !  For correct scaling, dt2 should be the same 'dt2' used in the leapfrog advace
     801             :   use dimensions_mod , only: np, nlev
     802             :   use hybrid_mod     , only: hybrid_t!, PrintHybrid
     803             :   use hybrid_mod     , only: get_loop_ranges
     804             :   use element_mod    , only: element_t
     805             :   use derivative_mod , only: derivative_t
     806             :   use edge_mod       , only: edgevpack, edgevunpack
     807             :   use edgetype_mod   , only: EdgeBuffer_t
     808             :   use bndry_mod      , only: bndry_exchange
     809             : 
     810             :   implicit none
     811             :   type (EdgeBuffer_t)  , intent(inout)         :: edgeAdv
     812             :   type (element_t)     , intent(inout), target :: elem(:)
     813             :   type (hvcoord_t)     , intent(in   )         :: hvcoord
     814             :   type (hybrid_t)      , intent(in   )         :: hybrid
     815             :   type (derivative_t)  , intent(in   )         :: deriv
     816             :   integer              , intent(in   )         :: nt
     817             :   integer              , intent(in   )         :: nt_qdp
     818             :   integer              , intent(in   )         :: nets
     819             :   integer              , intent(in   )         :: nete
     820             :   real (kind=r8), intent(in   )         :: dt2
     821             : 
     822             :   ! local
     823           0 :   real (kind=r8), dimension(np,np,nlev,qsize,nets:nete) :: Qtens
     824             :   real (kind=r8), dimension(np,np,nlev                ) :: dp
     825             : !  real (kind=r8), dimension(      nlev,qsize,nets:nete) :: min_neigh
     826             : !  real (kind=r8), dimension(      nlev,qsize,nets:nete) :: max_neigh
     827             :   integer :: k,kptr,ie,ic,q,i,j
     828             :   integer :: kbeg,kend,qbeg,qend
     829             : 
     830             : ! NOTE: PGI compiler bug: when using spheremp, rspheremp and ps as pointers to elem(ie)% members,
     831             : !       data is incorrect (offset by a few numbers actually)
     832             : !       removed for now.
     833             : !  real (kind=r8), dimension(:,:), pointer :: spheremp,rspheremp
     834             :   real (kind=r8) :: dt,dp0
     835             :   integer :: kblk,qblk   ! The per thead size of the vertical and tracers
     836             : 
     837           0 :   call get_loop_ranges(hybrid,kbeg=kbeg,kend=kend,qbeg=qbeg,qend=qend)
     838             : 
     839           0 :   if ( nu_q           == 0 ) return
     840             :   !if ( hypervis_order /= 2 ) return
     841             : 
     842           0 :   kblk = kend - kbeg + 1   ! calculate size of the block of vertical levels
     843           0 :   qblk = qend - qbeg + 1   ! calculate size of the block of tracers
     844             : 
     845           0 :   call t_startf('advance_hypervis_scalar')
     846             : 
     847             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     848             :   !  hyper viscosity
     849             :   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     850           0 :   dt = dt2 / hypervis_subcycle_q
     851             : 
     852           0 :   do ic = 1 , hypervis_subcycle_q
     853           0 :     do ie = nets, nete
     854             :       ! Qtens = Q/dp   (apply hyperviscsoity to dp0 * Q, not Qdp)
     855           0 :       do k = kbeg, kend
     856             :          ! various options:
     857             :          !   1)  biharmonic( Qdp )
     858             :          !   2)  dp0 * biharmonic( Qdp/dp )
     859             :          !   3)  dpave * biharmonic(Q/dp)
     860             :          ! For trace mass / mass consistenciy, we use #2 when nu_p=0
     861             :          ! and #e when nu_p>0, where dpave is the mean mass flux from the nu_p
     862             :          ! contribution from dynamics.
     863           0 :          dp0 = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + &
     864           0 :               ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * hvcoord%ps0
     865           0 :          dp(:,:,k) = elem(ie)%derived%dp(:,:,k) - dt2*elem(ie)%derived%divdp_proj(:,:,k)
     866           0 :          if (nu_p>0) then
     867           0 :             do q = qbeg, qend
     868           0 :                Qtens(:,:,k,q,ie) = elem(ie)%derived%dpdiss_ave(:,:,k)*&
     869           0 :                     elem(ie)%state%Qdp(:,:,k,q,nt_qdp) / dp(:,:,k)
     870             :             enddo
     871             :          else
     872           0 :             do q = qbeg, qend
     873           0 :                Qtens(:,:,k,q,ie) = dp0*elem(ie)%state%Qdp(:,:,k,q,nt_qdp) / dp(:,:,k)
     874             :             enddo
     875             :          endif
     876             :       enddo
     877             :    enddo
     878             : 
     879             :     ! compute biharmonic operator. Qtens = input and output
     880           0 :     call biharmonic_wk_scalar( elem , Qtens , deriv , edgeAdv , hybrid ,  nets , nete )
     881             : 
     882           0 :     do ie = nets, nete
     883             :       !spheremp     => elem(ie)%spheremp
     884           0 :       do q = qbeg, qend
     885           0 :         do k = kbeg, kend
     886           0 :           dp0 = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) ) * hvcoord%ps0 + &
     887           0 :                 ( hvcoord%hybi(k+1) - hvcoord%hybi(k) ) * hvcoord%ps0
     888           0 :          do j = 1 , np
     889           0 :             do i = 1 , np
     890             : 
     891             :               ! advection Qdp.  For mass advection consistency:
     892             :               ! DIFF( Qdp) ~   dp0 DIFF (Q)  =  dp0 DIFF ( Qdp/dp )
     893           0 :               elem(ie)%state%Qdp(i,j,k,q,nt_qdp) = elem(ie)%state%Qdp(i,j,k,q,nt_qdp) * elem(ie)%spheremp(i,j) &
     894           0 :                                                                                  - dt * nu_q * Qtens(i,j,k,q,ie)
     895             :         enddo
     896             :         enddo
     897             :         enddo
     898             : 
     899           0 :         if (limiter_option .ne. 0 ) then
     900             : !JMD Only need if threading over the vertical
     901             : !JMD!$OMP BARRIER
     902             : !JMD!$OMP MASTER
     903             :            ! smooth some of the negativities introduced by diffusion:
     904           0 :            call limiter2d_zero( elem(ie)%state%Qdp(:,:,:,q,nt_qdp) )
     905             : !JMD!$OMP END MASTER
     906             : !JMD!$OMP BARRIER
     907             :         endif
     908             : 
     909             :       enddo
     910           0 :       do q = qbeg, qend
     911           0 :          kptr = nlev*(q-1) + kbeg - 1
     912           0 :          call edgeVpack( edgeAdv , elem(ie)%state%Qdp(:,:,kbeg:kend,q,nt_qdp) , kblk, kptr, ie )
     913             :       enddo
     914             :     enddo
     915             : 
     916           0 :     call bndry_exchange( hybrid , edgeAdv,location='advance_hypervis_scalar')
     917             : 
     918           0 :     do ie = nets, nete
     919           0 :       do q = qbeg, qend
     920           0 :          kptr = nlev*(q-1) + kbeg - 1
     921           0 :          call edgeVunpack( edgeAdv , elem(ie)%state%Qdp(:,:,kbeg:kend,q,nt_qdp) , kblk, kptr, ie )
     922             :       enddo
     923             :       !rspheremp     => elem(ie)%rspheremp
     924           0 :       do q = qbeg, qend
     925             :         ! apply inverse mass matrix
     926           0 :         do k = kbeg, kend
     927           0 :           elem(ie)%state%Qdp(:,:,k,q,nt_qdp) = elem(ie)%rspheremp(:,:) * elem(ie)%state%Qdp(:,:,k,q,nt_qdp)
     928             :         enddo
     929             :       enddo
     930             :     enddo
     931             : 
     932             :   enddo
     933           0 :   call t_stopf('advance_hypervis_scalar')
     934           0 :   end subroutine advance_hypervis_scalar
     935             : 
     936      738816 :   subroutine vertical_remap(hybrid,elem,fvm,hvcoord,np1,np1_qdp,nets,nete)
     937             :     !
     938             :     ! This routine is called at the end of the vertically Lagrangian
     939             :     ! dynamics step to compute the vertical flux needed to get back
     940             :     ! to reference eta levels
     941             :     !
     942             :     ! map tracers
     943             :     ! map velocity components
     944             :     ! map temperature (either by mapping enthalpy or virtual temperature over log(p)
     945             :     ! (controlled by vert_remap_uvTq_alg > -20 or <= -20)
     946             :     !
     947           0 :     use hybvcoord_mod,          only: hvcoord_t
     948             :     use vertremap_mod,          only: remap1
     949             :     use hybrid_mod,             only: hybrid_t, config_thread_region,get_loop_ranges, PrintHybrid
     950             :     use fvm_control_volume_mod, only: fvm_struct
     951             :     use dimensions_mod,         only: ntrac
     952             :     use dimensions_mod,         only: kord_tr,kord_tr_cslam
     953             :     use cam_logfile,            only: iulog
     954             :     use physconst,              only: pi
     955             :     use air_composition,        only: thermodynamic_active_species_idx_dycore
     956             :     use cam_thermo,             only: get_enthalpy, get_virtual_temp, get_dp, MASS_MIXING_RATIO
     957             :     use thread_mod,             only: omp_set_nested
     958             :     use control_mod,            only: vert_remap_uvTq_alg
     959             :     type (hybrid_t),  intent(in)    :: hybrid  ! distributed parallel structure (shared)
     960             :     type(fvm_struct), intent(inout) :: fvm(:)
     961             :     type (element_t), intent(inout) :: elem(:)
     962             :     !
     963             :     real (kind=r8)   :: dpc_star(nc,nc,nlev) !Lagrangian levels on CSLAM grid
     964             : 
     965             :     type (hvcoord_t) :: hvcoord
     966             :     integer          :: ie,i,j,k,np1,nets,nete,np1_qdp,q, m_cnst
     967             :     real (kind=r8), dimension(np,np,nlev)  :: dp_moist,dp_star_moist, dp_dry,dp_star_dry
     968             :     real (kind=r8), dimension(np,np,nlev)  :: enthalpy_star
     969             :     real (kind=r8), dimension(np,np,nlev,2):: ttmp
     970             :     real(r8), parameter                    :: rad2deg = 180.0_r8/pi
     971             :     integer :: region_num_threads,qbeg,qend,kord_uvT(1)
     972             :     type (hybrid_t) :: hybridnew,hybridnew2
     973             :     real (kind=r8)  :: ptop
     974             : 
     975     1477632 :     kord_uvT = vert_remap_uvTq_alg
     976             : 
     977      738816 :     ptop = hvcoord%hyai(1)*hvcoord%ps0
     978     5933616 :     do ie=nets,nete
     979             :       !
     980             :       ! prepare for mapping of temperature
     981             :       !
     982     5194800 :       if (vert_remap_uvTq_alg>-20) then
     983             :         !
     984             :         ! compute enthalpy on Lagrangian levels
     985             :         ! (do it here since qdp is overwritten by remap1)
     986             :         !
     987    10389600 :         call get_enthalpy(elem(ie)%state%qdp(:,:,:,1:qsize,np1_qdp), &
     988             :              elem(ie)%state%t(:,:,:,np1), elem(ie)%state%dp3d(:,:,:,np1), enthalpy_star,     &
     989    15584400 :              active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
     990             :       else
     991             :         !
     992             :         ! map Tv over log(p) following FV and FV3
     993             :         !
     994           0 :         call get_virtual_temp(elem(ie)%state%qdp(:,:,:,1:qsize,np1_qdp), enthalpy_star, &
     995           0 :              dp_dry=elem(ie)%state%dp3d(:,:,:,np1), active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
     996           0 :         enthalpy_star = enthalpy_star*elem(ie)%state%t(:,:,:,np1)
     997             :       end if
     998             :       !
     999             :       ! update final psdry
    1000             :       !
    1001             :       elem(ie)%state%psdry(:,:) = ptop + &
    1002  2270127600 :            sum(elem(ie)%state%dp3d(:,:,:,np1),3)
    1003             :       !
    1004             :       ! compute dry vertical coordinate (Lagrangian and reference levels)
    1005             :       !
    1006   140259600 :       do k=1,nlev
    1007  2836360800 :         dp_star_dry(:,:,k) = elem(ie)%state%dp3d(:,:,k,np1)
    1008   135064800 :         dp_dry(:,:,k) = ( hvcoord%hyai(k+1) - hvcoord%hyai(k) )*hvcoord%ps0 + &
    1009  2971425600 :              ( hvcoord%hybi(k+1) - hvcoord%hybi(k) )*elem(ie)%state%psdry(:,:)
    1010  2841555600 :         elem(ie)%state%dp3d(:,:,k,np1) = dp_dry(:,:,k)
    1011             :       enddo
    1012             :       !
    1013     5194800 :       call get_dp(elem(ie)%state%Qdp(:,:,:,1:qsize,np1_qdp), MASS_MIXING_RATIO,&
    1014    10389600 :          thermodynamic_active_species_idx_dycore, dp_star_dry, dp_star_moist(:,:,:))
    1015             :       !
    1016             :       ! Check if Lagrangian leves have crossed
    1017             :       !
    1018  2841555600 :       if (minval(dp_star_moist)<1.0E-12_r8) then
    1019           0 :         write(iulog,*) "NEGATIVE LAYER THICKNESS DIAGNOSTICS:"
    1020           0 :         write(iulog,*) " "
    1021           0 :         do j=1,np
    1022           0 :           do i=1,np
    1023           0 :             if (minval(dp_star_moist(i,j,:))<1.0e-12_r8) then
    1024           0 :               write(iulog,'(A13,2f6.2)') "(lon,lat) = ",&
    1025           0 :                    elem(ie)%spherep(i,j)%lon*rad2deg,elem(ie)%spherep(i,j)%lat*rad2deg
    1026           0 :               write(iulog,*) " "
    1027           0 :               do k=1,nlev
    1028           0 :                 write(iulog,'(A21,I5,A1,f16.12,3f10.2)') "k,dp_star_moist,u,v,T: ",k," ",dp_star_moist(i,j,k)/100.0_r8,&
    1029           0 :                      elem(ie)%state%v(i,j,1,k,np1),elem(ie)%state%v(i,j,2,k,np1),elem(ie)%state%T(i,j,k,np1)
    1030             :               end do
    1031             :             end if
    1032             :           end do
    1033             :         end do
    1034           0 :         call endrun('negative moist layer thickness.  timestep or remap time too large')
    1035             :       endif
    1036             : 
    1037     5194800 :       call remap1(elem(ie)%state%Qdp(:,:,:,1:qsize,np1_qdp),np,1,qsize,qsize,dp_star_dry,dp_dry,ptop,0,.true.,kord_tr)
    1038             :       !
    1039             :       ! compute moist reference pressure level thickness
    1040             :       !
    1041     5194800 :       call get_dp(elem(ie)%state%Qdp(:,:,:,1:qsize,np1_qdp), MASS_MIXING_RATIO,&
    1042    10389600 :            thermodynamic_active_species_idx_dycore, dp_dry, dp_moist(:,:,:))
    1043             : 
    1044             :       !
    1045             :       ! Remapping of temperature
    1046             :       !
    1047     5194800 :       if (vert_remap_uvTq_alg>-20) then
    1048             :         !
    1049             :         ! remap enthalpy energy and back out temperature
    1050             :         !
    1051     5194800 :         call remap1(enthalpy_star,np,1,1,1,dp_star_dry,dp_dry,ptop,1,.true.,kord_uvT)
    1052             :         !
    1053             :         ! compute sum c^(l)_p*m^(l)*dp on arrival (Eulerian) grid
    1054             :         !
    1055  2841555600 :         ttmp(:,:,:,1) = 1.0_r8
    1056     5194800 :         call get_enthalpy(elem(ie)%state%qdp(:,:,:,1:qsize,np1_qdp),   &
    1057             :              ttmp(:,:,:,1), dp_dry,ttmp(:,:,:,2), &
    1058    10389600 :              active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
    1059  2841555600 :         elem(ie)%state%t(:,:,:,np1)=enthalpy_star/ttmp(:,:,:,2)
    1060             :       else
    1061             :         !
    1062             :         ! map Tv over log(p); following FV and FV3
    1063             :         !
    1064           0 :         call remap1(enthalpy_star,np,1,1,1,dp_star_moist,dp_moist,ptop,1,.false.,kord_uvT)
    1065           0 :         call get_virtual_temp(elem(ie)%state%qdp(:,:,:,1:qsize,np1_qdp), ttmp(:,:,:,1), &
    1066           0 :              dp_dry=dp_dry, active_species_idx_dycore=thermodynamic_active_species_idx_dycore)
    1067             :         !
    1068             :         ! convert new Tv to T
    1069             :         !
    1070           0 :         elem(ie)%state%t(:,:,:,np1)=enthalpy_star/ttmp(:,:,:,1)
    1071             :       end if
    1072             :       !
    1073             :       ! remap velocity components
    1074             :       !
    1075  5677916400 :       call remap1(elem(ie)%state%v(:,:,1,:,np1),np,1,1,1,dp_star_moist,dp_moist,ptop,-1,.false.,kord_uvT)
    1076  5678655216 :       call remap1(elem(ie)%state%v(:,:,2,:,np1),np,1,1,1,dp_star_moist,dp_moist,ptop,-1,.false.,kord_uvT)
    1077             :     enddo
    1078             : 
    1079      738816 :     if (ntrac>0) then
    1080             :       !
    1081             :       ! vertical remapping of CSLAM tracers
    1082             :       !
    1083     5933616 :       do ie=nets,nete
    1084  1761037200 :         dpc_star=fvm(ie)%dp_fvm(1:nc,1:nc,:)
    1085   140259600 :         do k=1,nlev
    1086   545454000 :          do j=1,nc
    1087  1755842400 :            do i=1,nc
    1088             :              !
    1089             :              ! new pressure levels on CSLAM grid
    1090             :              !
    1091  4862332800 :              fvm(ie)%dp_fvm(i,j,k) = (hvcoord%hyai(k+1) - hvcoord%hyai(k))*hvcoord%ps0 + &
    1092  6483110400 :                   (hvcoord%hybi(k+1) - hvcoord%hybi(k))*fvm(ie)%psc(i,j)
    1093             :             end do
    1094             :           end do
    1095             :         end do
    1096     5933616 :         if(ntrac>tracer_num_threads) then
    1097     5194800 :           call omp_set_nested(.true.)
    1098             :           !$OMP PARALLEL NUM_THREADS(tracer_num_threads), DEFAULT(SHARED), PRIVATE(hybridnew2,qbeg,qend)
    1099     5194800 :           hybridnew2 = config_thread_region(hybrid,'ctracer')
    1100     5194800 :           call get_loop_ranges(hybridnew2, qbeg=qbeg, qend=qend)
    1101           0 :           call remap1(fvm(ie)%c(1:nc,1:nc,:,1:ntrac),nc,qbeg,qend,ntrac,dpc_star, &
    1102 12332455200 :                       fvm(ie)%dp_fvm(1:nc,1:nc,:),ptop,0,.false.,kord_tr_cslam)
    1103             :           !$OMP END PARALLEL
    1104     5194800 :           call omp_set_nested(.false.)
    1105             :         else
    1106           0 :           call remap1(fvm(ie)%c(1:nc,1:nc,:,1:ntrac),nc,1,ntrac,ntrac,dpc_star, &
    1107           0 :                       fvm(ie)%dp_fvm(1:nc,1:nc,:),ptop,0,.false.,kord_tr_cslam)
    1108             :         endif
    1109             :       enddo
    1110             :     end if
    1111      738816 :   end subroutine vertical_remap
    1112             : 
    1113             : end module prim_advection_mod

Generated by: LCOV version 1.14