LCOV - code coverage report
Current view: top level - dynamics/fv - cd_core.F90 (source / functions) Hit Total Coverage
Test: coverage.info Lines: 454 762 59.6 %
Date: 2025-03-14 01:21:06 Functions: 1 1 100.0 %

          Line data    Source code
       1      129024 : subroutine cd_core(grid,   nx,     u,   v,   pt,                  &
       2      129024 :                    delp,   pe,     pk,  ns,  dt,                  &
       3             :                    ptopin, umax,   pi, ae,                        &
       4      129024 :                    cp3vc,  cap3vc, cp3v, cap3v,                   &
       5             :                    iord_c, jord_c, iord_d, jord_d,   ipe,         &
       6             :                    div24del2flag, del2coef,                       &
       7      129024 :                    om,     hs,     cx3  ,  cy3, mfx, mfy,         &
       8      129024 :                    delpf, uc, vc, ptc, dpt, ptk,                  &
       9      129024 :                    wz3, pxc, wz,  hsxy, ptxy, pkxy,               &
      10      129024 :                    pexy, pkcc, wzc, wzxy, delpxy,                 &
      11      129024 :                    pkkp, wzkp, cx_om, cy_om, filtcw, s_trac,      &
      12      129024 :                    mlt, ncx, ncy, nmfx, nmfy, iremote,            &
      13      129024 :                    cxtag, cytag, mfxtag, mfytag,                  &
      14      129024 :                    cxreqs, cyreqs, mfxreqs, mfyreqs,              &
      15             :                    kmtp, am_correction, am_geom_crrct, am_fixer,  &
      16      129024 :                    dod, don, high_order_top)
      17             : 
      18             :    ! Dynamical core for both C- and D-grid Lagrangian dynamics
      19             :    !
      20             :    ! DESCRIPTION:
      21             :    !    Perform a dynamical update for one small time step; the small
      22             :    !    time step is limitted by the fastest wave within the Lagrangian control-
      23             :    !    volume 
      24             : 
      25             : 
      26             :    use shr_kind_mod,      only: r8 => shr_kind_r8
      27             :    use sw_core,           only: d2a2c_winds, c_sw, d_sw
      28             :    use pft_module,        only: pft2d
      29             :    use dynamics_vars,     only: T_FVDYCORE_GRID
      30             :    use FVperf_module,     only: FVstartclock, FVstopclock, FVbarrierclock
      31             :    use cam_logfile,       only: iulog
      32             :    use spmd_utils,        only: masterproc
      33             :    use cam_abortutils,    only: endrun
      34             : 
      35             : #if defined( SPMD )
      36             :    use mod_comm,      only : mp_send4d_ns, mp_recv4d_ns,     &
      37             :                              mp_send2_ns, mp_recv2_ns,                   &
      38             :                              mp_send3d_2, mp_recv3d_2,                   &
      39             :                              mp_send3d, mp_recv3d, mp_sendirr,           &
      40             :                              mp_recvirr
      41             :    use mpishorthand
      42             : #endif
      43             : 
      44             : #if defined( OFFLINE_DYN )
      45             :    use metdata,       only : get_met_fields, met_winds_on_walls
      46             : #endif
      47             :    use metdata,       only : met_rlx
      48             : 
      49             :    implicit none
      50             : 
      51             :    ! INPUT PARAMETERS:
      52             : 
      53             :    type (T_FVDYCORE_GRID), intent(inout) :: grid! grid (for YZ decomp)
      54             :    integer, intent(in) :: nx                 ! # of split pieces in longitude direction
      55             :    integer, intent(in) :: ipe                ! ipe=1:  end of cd_core()
      56             :                                              ! ipe=-1,-2: start of cd_core()
      57             :                                              ! ipe=-2,2: second to last call to cd_core()
      58             :                                              ! ipe=0 :
      59             :    integer, intent(in) :: ns                 ! Number of internal time steps (splitting)
      60             :    integer, intent(in) :: iord_c, jord_c     ! scheme order on C grid in X and Y dir.
      61             :    integer, intent(in) :: iord_d, jord_d     ! scheme order on D grid in X and Y dir.
      62             :    integer, intent(in) :: filtcw             ! flag for filtering C-grid winds
      63             : 
      64             :    ! ct_overlap data
      65             :    logical, intent(in) :: s_trac                        ! true to post send for ct_overlap or
      66             :                                                         ! tracer decomposition information
      67             :    integer, intent(in) :: mlt                           ! multiplicity of sends
      68             :    integer, intent(in) :: ncx, ncy, nmfx, nmfy          ! array sizes
      69             :    integer, intent(in) :: cxtag(mlt), cytag(mlt)        ! tags
      70             :    integer, intent(in) :: mfxtag(mlt), mfytag(mlt)      ! tags
      71             :    integer, intent(in) :: iremote(mlt)                  ! target tasks
      72             :    integer, intent(in) :: cxreqs(mlt), cyreqs(mlt)      ! mpi requests
      73             :    integer, intent(in) :: mfxreqs(mlt), mfyreqs(mlt)    ! mpi requests
      74             : 
      75             : 
      76             :    real(r8), intent(in) :: pi
      77             :    real(r8), intent(in) :: ae                ! Radius of the Earth (m)
      78             :    real(r8), intent(in) :: om                ! rotation rate
      79             :    real(r8), intent(in) :: ptopin
      80             :    real(r8), intent(in) :: umax
      81             :    real(r8), intent(in) :: dt                !small time step in seconds
      82             :    integer,  intent(in) :: div24del2flag
      83             :    real(r8), intent(in) :: del2coef
      84             :    integer,  intent(in) :: kmtp           ! range of levels (1:kmtp) where order is reduced
      85             :    logical,  intent(in) :: am_correction  ! logical switch for correction (applied here)
      86             :    logical,  intent(in) :: am_geom_crrct  ! logical switch for correction (applied here)
      87             :    logical,  intent(in) :: am_fixer       ! logical switch for fixer (generate out args)
      88             :    logical,  intent(in) :: high_order_top ! use uniform 4th order everywhere (incl. model top)
      89             : 
      90             :    real(r8), intent(in) ::   & 
      91             :       cp3vc(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)         !C_p on yz
      92             :    real(r8), intent(in) ::   & 
      93             :       cap3vc(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)        !cappa on yz
      94             :    real(r8), intent(in) ::   & 
      95             :       cp3v(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km)  ! C_p on xy
      96             :    real(r8), intent(in) ::   & 
      97             :       cap3v(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km) ! cappa on xy -- on "a" grid
      98             : 
      99             :    ! Input time independent arrays:
     100             :    real(r8), intent(in) ::        &
     101             :       hs(grid%im,grid%jfirst:grid%jlast)         !surface geopotential
     102             :    real(r8), intent(in) ::        &
     103             :       hsxy(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy) !surface geopotential XY-decomp.
     104             : 
     105             :    ! INPUT/OUTPUT PARAMETERS:
     106             : 
     107             :    real(r8), intent(inout) ::   &   
     108             :       u(grid%im,grid%jfirst-grid%ng_d:grid%jlast+grid%ng_s,grid%kfirst:grid%klast) ! u-Wind (m/s)
     109             :    real(r8), intent(inout) ::   &   
     110             :       v(grid%im,grid%jfirst-grid%ng_s:grid%jlast+grid%ng_d,grid%kfirst:grid%klast) ! v-Wind (m/s)
     111             :    
     112             :    real(r8), intent(inout) ::   & 
     113             :       delp(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)          ! Delta pressure (pascal)
     114             :    real(r8), intent(inout) ::   &
     115             :       pt(grid%im,grid%jfirst-grid%ng_d:grid%jlast+grid%ng_d,grid%kfirst:grid%klast)! Scaled-Pot. temp.
     116             : 
     117             :    ! Input/output: accumulated winds & mass fluxes on c-grid for large-
     118             :    !               time-step transport
     119             :    real(r8), intent(inout) ::   &
     120             :       cx3(grid%im,grid%jfirst-grid%ng_d:grid%jlast+grid%ng_d,grid%kfirst:grid%klast)! Accum. Courant no. in X
     121             :    real(r8), intent(inout) ::   &
     122             :       cy3(grid%im,grid%jfirst:grid%jlast+1,grid%kfirst:grid%klast)        ! Accumulated Courant no. in Y
     123             :    real(r8), intent(inout) ::   &
     124             :       mfx(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)          ! Mass flux in X  (unghosted)
     125             :    real(r8), intent(inout) ::   &
     126             :       mfy(grid%im,grid%jfirst:grid%jlast+1,grid%kfirst:grid%klast)        ! Mass flux in Y
     127             : 
     128             :    ! Input/output work arrays:
     129             :    real(r8), intent(inout) ::   &
     130             :       delpf(grid%im,grid%jfirst-grid%ng_d:grid%jlast+grid%ng_d,grid%kfirst:grid%klast)  ! filtered delp
     131             :    real(r8), intent(inout) ::   &
     132             :       uc(grid%im,grid%jfirst-grid%ng_d:grid%jlast+grid%ng_d,grid%kfirst:grid%klast)   ! u-Winds on C-grid
     133             :    real(r8), intent(inout) ::   &
     134             :       vc(grid%im,grid%jfirst-2:   grid%jlast+2,   grid%kfirst:grid%klast)   ! v-Winds on C-grid
     135             : 
     136             :    real(r8), intent(inout) ::   &
     137             :       dpt(grid%im,grid%jfirst-1:grid%jlast+1,grid%kfirst:grid%klast)
     138             :    real(r8), intent(inout) ::   &
     139             :       wz3(grid%im,grid%jfirst-1:grid%jlast  ,grid%kfirst:grid%klast+1)
     140             :    real(r8), intent(inout) ::   &
     141             :       pxc(grid%im,grid%jfirst-1:grid%jlast+1,grid%kfirst:grid%klast+1) 
     142             :    real(r8), intent(inout) ::   &
     143             :       wz(grid%im,grid%jfirst-1:grid%jlast+1,grid%kfirst:grid%klast+1)
     144             :    real(r8), intent(inout) ::   &
     145             :       pkcc(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1) 
     146             :    real(r8), intent(inout) ::   &
     147             :       wzc(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1) 
     148             :    real(r8), intent(inout) ::   &
     149             :       wzxy(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km+1)
     150             :    real(r8), intent(inout) ::   &
     151             :       delpxy(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km)
     152             :    real(r8), intent(inout) ::   &
     153             :       pkkp(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1)
     154             :    real(r8), intent(inout) ::   &
     155             :       wzkp(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1)
     156             : 
     157             :    ! OUTPUT PARAMETERS:
     158             :    real(r8), intent(out) ::   & 
     159             :       pe(grid%im,grid%kfirst:grid%klast+1,grid%jfirst:grid%jlast)         ! Edge pressure (pascal)
     160             :    real(r8), intent(out) ::   &
     161             :       pk(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast+1)         ! Pressure to the kappa
     162             :    real(r8), intent(out) ::   & 
     163             :       ptxy(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km) ! Potential temperature XY decomp
     164             :    real(r8), intent(out) ::   & 
     165             :       pkxy(grid%ifirstxy:grid%ilastxy,grid%jfirstxy:grid%jlastxy,grid%km+1) ! P-to-the-kappa XY decomp
     166             :    real(r8), intent(out) ::   &
     167             :       pexy(grid%ifirstxy:grid%ilastxy,grid%km+1,grid%jfirstxy:grid%jlastxy) ! Edge pressure XY decomp
     168             :    real(r8), intent(out) ::   &
     169             :       ptc(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)
     170             :    real(r8), intent(out) ::   &
     171             :       ptk(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)
     172             : 
     173             :    ! Omega calculation
     174             :    real(r8), intent(out) ::   &
     175             :       cx_om(grid%im,grid%jfirst:grid%jlast,grid%kfirst:grid%klast)   ! Courant in X
     176             :    real(r8), intent(out) ::  &
     177             :       cy_om(grid%im,grid%jfirst:grid%jlast+1,grid%kfirst:grid%klast) ! Courant in Y
     178             : 
     179             :    real(r8), intent(out) :: don(grid%jm,grid%km), & ! num of d(Omega)
     180             :                             dod(grid%jm,grid%km)    ! denom of same
     181             : 
     182             :    ! Local 2D arrays:
     183      258048 :    real(r8) ::  wk(grid%im+2,grid%jfirst:  grid%jlast+2)
     184      258048 :    real(r8) ::  wk1(grid%im,grid%jfirst-1:grid%jlast+1)
     185      258048 :    real(r8) ::  wk2(grid%im+1,grid%jfirst-grid%ng_d:grid%jlast+grid%ng_d)
     186      258048 :    real(r8) ::  wk3(grid%im,grid%jfirst-1:grid%jlast+1)
     187             : 
     188      258048 :    real(r8) :: p1d(grid%im)
     189             : 
     190             :    ! fvitt    cell centered u- and v-Winds (m/s)
     191      258048 :    real(r8) ::  u_cen(grid%im,grid%jfirst-grid%ng_d:grid%jlast+grid%ng_d,grid%kfirst:grid%klast)
     192      258048 :    real(r8) ::  v_cen(grid%im,grid%jfirst-grid%ng_s:grid%jlast+grid%ng_d,grid%kfirst:grid%klast)
     193      258048 :    real(r8) ::  ua(grid%im,grid%jfirst-grid%ng_d:grid%jlast+grid%ng_d,grid%kfirst:grid%klast)
     194      258048 :    real(r8) ::  va(grid%im,grid%jfirst-grid%ng_s:grid%jlast+grid%ng_d,grid%kfirst:grid%klast)
     195             : 
     196      258048 :    real(r8) :: pec(grid%im,grid%kfirst:grid%klast+1,grid%jfirst:grid%jlast) 
     197             : 
     198             :    ! Local scalars
     199             :    real(r8), parameter ::  D0_0                    =   0.0_r8
     200             :    real(r8), parameter ::  D0_1                    =   0.1_r8
     201             :    real(r8), parameter ::  D0_5                    =   0.5_r8
     202             :    real(r8), parameter ::  D1_0                    =   1.0_r8
     203             :    real(r8), parameter ::  D4_0                    =   4.0_r8
     204             :    real(r8), parameter ::  D8_0                    =   8.0_r8
     205             :    real(r8), parameter ::  D10_0                   =  10.0_r8
     206             :    real(r8), parameter ::  D128_0                  = 128.0_r8
     207             :    real(r8), parameter ::  D180_0                  = 180.0_r8
     208             :    real(r8), parameter ::  D1E5                    = 1.0e5_r8
     209             : 
     210             :    real(r8), parameter ::  ratmax                  = 0.81_r8
     211             :    real(r8), parameter ::  tiny                    = 1.0e-10_r8
     212             : 
     213             :    real(r8) ::  press
     214             :    real(r8) ::  rat, ycrit
     215             :    real(r8) ::  dt5
     216             : 
     217             :    integer :: im, jm, km         ! problem dimensions
     218             :    integer :: ifirstxy, jfirstxy ! xy-decomp. lat/long ranges
     219             :    integer :: ng_c               ! ghost latitudes on C grid
     220             :    integer :: ng_d               ! ghost lats on D (Max NS dependencies, ng_d >= ng_c)
     221             :    integer :: ng_s               ! max(ng_c+1,ng_d) significant if ng_c = ng_d
     222             :    
     223             :    integer :: jfirst
     224             :    integer :: jlast
     225             :    integer :: kfirst
     226             :    integer :: klast
     227             :    integer :: klastp            ! klast, except km+1 when klast=km
     228             : 
     229             :    integer :: iam
     230             :    integer :: npr_y
     231             :    integer :: npes_yz
     232             : 
     233             :    integer i, j, k, ml
     234             :    integer js1g1, js2g0, js2g1, jn2g1, js4g0, jn3g0 
     235             :    integer jn2g0, jn1g1
     236             :    integer iord , jord
     237             : 
     238             :    real(r8) ::  tau, fac, px4
     239             :    real(r8) ::  tau4 ! coefficient for 4th-order divergence damping
     240             : 
     241             : #if defined( SPMD )
     242             :    integer dest, src
     243             : #endif
     244             : 
     245             :    logical :: reset_winds = .false.
     246             :    logical :: everytime = .false.
     247             : 
     248             :    ! set damping options:
     249             :    !
     250             :    ! - ldel2: 2nd-order velocity-component damping targetted to top layers,
     251             :    !          with coefficient del2coef (default 3E5)
     252             :    !
     253             :    ! - ldiv2: 2nd-order divergence damping everywhere and increasing in top layers
     254             :    !
     255             :    ! - ldiv4: 4th-order divergence damping everywhere and increasing in top layers
     256             :    !
     257             :    ! - div24del2flag: 2 for ldiv2 (default), 4 for ldiv4, 42 for ldiv4 + ldel2
     258             :    ! - ldiv2 and ldel2 cannot coexist
     259             : 
     260             :    logical :: ldiv2 = .true.  
     261             :    logical :: ldiv4 = .false.  
     262             :    logical :: ldel2 = .false.   
     263             : 
     264             :    ! AM correction and fixer
     265             :    integer  :: iord_c_min
     266             :    integer  :: iord_d_min
     267             :    integer  :: iord_d_low
     268             :    integer  :: jord_c_min
     269             :    integer  :: jord_d_min
     270             :    integer  :: jord_d_low
     271             :    real(r8) :: oma
     272             :    real(r8) :: xakap
     273      129024 :    real(r8), pointer :: cosp(:)
     274      129024 :    real(r8), pointer :: cose(:)
     275             : 
     276      129024 :    real(r8), allocatable :: help(:,:,:) 
     277      129024 :    real(r8), allocatable :: kelp(:,:,:) 
     278      129024 :    real(r8), allocatable :: dpn(:,:,:)
     279      129024 :    real(r8), allocatable :: dpo(:,:,:)
     280      129024 :    real(r8), allocatable :: dpr(:,:,:)
     281      129024 :    real(r8), allocatable :: ddpu(:,:,:)
     282      129024 :    real(r8), allocatable :: dpns(:,:)
     283      129024 :    real(r8), allocatable :: ddus(:,:)
     284             : 
     285             :    ! referenced outside AM conditional even though it's not used
     286      258048 :    real(r8) :: ddpa(grid%im,grid%jfirst-1:grid%jlast  ,grid%kfirst:grid%klast  ) 
     287      258048 :    real(r8) :: ddu( grid%im,grid%jfirst  :grid%jlast  ,grid%kfirst:grid%klast  )
     288      258048 :    real(r8) :: vf(  grid%im,grid%jfirst-2:grid%jlast+2,grid%kfirst:grid%klast  )   ! v-Winds on U points
     289             : 
     290             :    ! Used to allow the same code to execute with or without the AM correction
     291      258048 :    real(r8) :: ptr(grid%im,grid%jfirst-1:grid%jlast+1,grid%kfirst:grid%klast+1)
     292             : 
     293             :    logical  :: sw_am_corr
     294             :    logical  :: am_press_crrct
     295             :    real(r8) :: wg_hiord
     296             :    real(r8) :: tpr, acap
     297             : 
     298             :    !******************************************************************
     299             :    !******************************************************************
     300             :    !
     301             :    ! IMPORTANT CODE OPTIONS - SEE BELOW
     302             :    !
     303             :    !******************************************************************
     304             :    !******************************************************************
     305             :    ! Option for which version of geopk to use with yz decomposition.
     306             :    ! If geopkdist=false, variables are transposed to/from xy decomposition
     307             :    !   for use in geopk.
     308             :    ! If geopkdist=true, either geopk_d or geopk16 is used. Both 
     309             :    !   compute local partial sums in z and then communicate those 
     310             :    !   sums to combine them. geopk_d does not try to parallelize in the
     311             :    !   z-direction except in a pipeline fashion controlled by the
     312             :    !   parameter geopkblocks, and is bit-for-bit the same as the
     313             :    !   transpose-based algorithm. geopk16 exploits z-direction 
     314             :    !   parallelism and requires 16-byte arithmetic (DSIZE=16) 
     315             :    !   to reproduce the same numerics (and to be reproducible with
     316             :    !   respect to process count). The geopk16 default is to use 
     317             :    !   8-byte arithmetic (DSIZE=8). This is faster than
     318             :    !   16-byte, but also gives up reproducibility. On many systems
     319             :    !   performance of geopk_d is comparable to geopk16 even with 
     320             :    !   8-byte numerics.
     321             :    ! On the last two small timesteps (ipe=1,2 or 1,-2) for D-grid, 
     322             :    !   the version of geopk that uses transposes is called regardless, 
     323             :    !   as some transposed quantities are required for the te_map phase
     324             :    !   and for the calculation of omega.
     325             :    ! For non-SPMD mode, geopk_[cd]dist are set to false.
     326             : 
     327             :    logical geopk_cdist, geopk_ddist
     328             : 
     329             :    ! REVISION HISTORY:
     330             :    !     SJL  99.01.01:   Original SMP version
     331             :    !     WS   99.04.13:   Added jfirst:jlast concept
     332             :    !     SJL  99.07.15:   Merged c_core and d_core to this routine
     333             :    !     WS   99.09.07:   Restructuring, cleaning, documentation
     334             :    !     WS   99.10.18:   Walkthrough corrections; frozen for 1.0.7
     335             :    !     WS   99.11.23:   Pruning of some 2-D arrays
     336             :    !     SJL  99.12.23:   More comments; general optimization; reduction
     337             :    !                      of redundant computation & communication
     338             :    !     WS   00.05.14:   Modified ghost indices per Kevin's definition
     339             :    !     WS   00.07.13:   Changed PILGRIM API
     340             :    !     WS   00.08.28:   Cosmetic changes: removed old loop limit comments
     341             :    !     AAM  00.08.30:   Introduced kfirst,klast
     342             :    !     WS   00.12.01:   Replaced MPI_ON with SPMD; hs now distributed
     343             :    !     WS   01.04.11:   PILGRIM optimizations for begin/endtransfer
     344             :    !     WS   01.05.08:   Optimizations in the call of c_sw and d_sw
     345             :    !     AAM  01.06.27:   Reinstituted 2D decomposition for use in ccm
     346             :    !     WS   01.12.10:   Ghosted PT, code now uses mod_comm primitives
     347             :    !     WS   01.12.31:   Removed vorticity damping, ghosted U,V,PT
     348             :    !     WS   02.01.15:   Completed transition to mod_comm
     349             :    !     WS   02.07.04:   Fixed 2D decomposition bug dest/src for mp_send3d
     350             :    !     WS   02.09.04:   Integrated fvgcm-1_3_71 zero diff. changes by Lin
     351             :    !     WS   03.07.22:   Removed HIGH_P option; this is outdated
     352             :    !     WS   03.10.15:   Fixed hack of 00.04.13 for JORD>1 JCD=1, in clean way
     353             :    !     WS   03.12.03:   Added grid as argument, some dynamics_vars removed
     354             :    !     WS   04.08.25:   Interface simplified with GRID argument
     355             :    !     WS   04.10.07:   Removed dependency on spmd_dyn; info now in GRID 
     356             :    !     WS   05.05.24:   Incorporated OFFLINE_DYN; merge of CAM/GEOS5
     357             :    !     PW   05.07.26:   Changes for Cray X1
     358             :    !     PW   05.10.12:   More changes for Cray X1(E), avoiding array segment copying
     359             :    !     WS   06.09.08:   Isolated magic numbers as F90 parameters
     360             :    !     WS   06.09.15:   PI now passed as argument
     361             :    !     CC   07.01.29:   Corrected calculation of OMEGA
     362             :    !     PW   08.06.29:   Added options to call geopk_d and swap-based transposes
     363             :    !     THT  16.11.18:   Add options for AM correction and fixer
     364             :    !--------------------------------------------------------------------------------------
     365             : 
     366             :    logical :: high_alt
     367      129024 :    high_alt = grid%high_alt
     368             : 
     369      129024 :    geopk_cdist = .false.
     370      129024 :    geopk_ddist = .false.
     371             : #if defined( SPMD )
     372      129024 :    if (grid%geopkdist) then
     373           0 :       geopk_cdist = .true.
     374           0 :       if ((ipe == -1) .or. (ipe == 0)) geopk_ddist = .true.
     375             :    endif
     376             : #endif
     377             : 
     378      129024 :    am_press_crrct = am_geom_crrct .and. am_correction
     379      129024 :    if (am_press_crrct) then
     380             :      wg_hiord = -1._r8
     381             :    else
     382      129024 :      wg_hiord =  0._r8
     383             :    endif
     384             : 
     385      129024 :    npes_yz  = grid%npes_yz
     386             : 
     387      129024 :    im       = grid%im
     388      129024 :    jm       = grid%jm
     389      129024 :    km       = grid%km
     390             : 
     391      129024 :    ng_c     = grid%ng_c
     392      129024 :    ng_d     = grid%ng_d
     393      129024 :    ng_s     = grid%ng_s
     394             : 
     395      129024 :    jfirst   = grid%jfirst
     396      129024 :    jlast    = grid%jlast
     397      129024 :    kfirst   = grid%kfirst
     398      129024 :    klast    = grid%klast
     399      129024 :    klastp   = grid%klastp
     400             : 
     401      129024 :    iam      = grid%iam
     402      129024 :    npr_y    = grid%npr_y
     403             : 
     404      129024 :    ifirstxy = grid%ifirstxy
     405      129024 :    jfirstxy = grid%jfirstxy
     406             : 
     407      129024 :    if (am_correction .or. am_fixer) then 
     408             :       allocate( &
     409             :          help(grid%im,grid%jfirst-1:grid%jlast ,grid%kfirst:grid%klast  ), &
     410             :          kelp(grid%im,grid%jfirst-1:grid%jlast ,grid%kfirst:grid%klast  ), &
     411             :          dpn(grid%im,grid%jfirst  :grid%jlast  ,grid%kfirst:grid%klast  ), &
     412           0 :          dpo(grid%im,grid%jfirst  :grid%jlast  ,grid%kfirst:grid%klast  ) )
     413           0 :       acap = 1._r8/4._r8 ! effective AM/MoI contribution from polar caps
     414             :    endif
     415      129024 :    if (am_press_crrct) then 
     416             :       allocate( &
     417           0 :          dpr(grid%im,grid%jfirst-1:grid%jlast+1,grid%kfirst:grid%klast  )  )
     418           0 :       xakap = 1._r8/cap3vc(1,jfirst,kfirst)
     419             :    endif
     420      129024 :    if (am_correction) then 
     421             :       allocate( &
     422             :          ddpu(grid%im,grid%jfirst  :grid%jlast ,grid%kfirst:grid%klast  ), &
     423             :          dpns(grid%jfirst:grid%jlast,grid%kfirst:grid%klast), &
     424           0 :          ddus(grid%jfirst:grid%jlast,grid%kfirst:grid%klast) )
     425           0 :       ddus = 0._r8
     426             :    else
     427             :       xakap = 1._r8
     428             :    endif
     429             : 
     430             :    ! maintain consistent accuracy (uniform PPM order) over domain
     431      129024 :    if (high_order_top) then
     432           0 :       iord_c_min = iord_c
     433           0 :       jord_c_min = jord_c
     434           0 :       iord_d_min = iord_d
     435           0 :       jord_d_min = jord_d
     436           0 :       iord_d_low = iord_d
     437           0 :       jord_d_low = jord_d
     438             :    else
     439             :       iord_c_min = 1
     440             :       jord_c_min = 1
     441             :       iord_d_min = 1
     442             :       jord_d_min = 1
     443             :       iord_d_low = 2
     444             :       jord_d_low = 2
     445             :    endif
     446      129024 :    oma  =  ae*om
     447   796981248 :    don  = 0.0_r8
     448   796981248 :    dod  = 0.0_r8
     449      129024 :    cosp => grid%cosp
     450      129024 :    cose => grid%cose
     451             : 
     452      129024 :    if (iam .lt. npes_yz) then
     453             : 
     454      129024 :       call FVstartclock(grid,'---PRE_C_CORE')
     455             : 
     456             : #if defined( SPMD )
     457      129024 :       call FVstartclock(grid,'---PRE_C_CORE_COMM')
     458             :       call mp_send4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,  &
     459      129024 :                          kfirst, klast, ng_d, ng_s, u )
     460             :       call mp_send4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,  &
     461      129024 :                          kfirst, klast, ng_s, ng_d, v )
     462      129024 :       call FVstopclock(grid,'---PRE_C_CORE_COMM')
     463             : #endif
     464             : 
     465             :       ! Set general loop limits
     466             :       ! jfirst >= 1; jlast <= jm
     467      129024 :       js1g1  = max(1,jfirst-1)
     468      129024 :       js2g0  = max(2,jfirst)
     469      129024 :       js2g1  = max(2,jfirst-1)
     470      129024 :       jn2g0  = min(jm-1,jlast)
     471      129024 :       jn1g1  = min(jm,jlast+1)
     472      129024 :       jn2g1 = min(jm-1,jlast+1)
     473             : 
     474      129024 :       js4g0  = max(4,jfirst)
     475      129024 :       jn3g0  = min(jm-2,jlast)
     476             : 
     477      129024 :       if ( abs(grid%dt0-dt) > D0_1 ) then
     478             : 
     479        1536 :          grid%dt0 = dt
     480        1536 :          dt5      = D0_5*dt
     481             : 
     482        1536 :          grid%rdy   = D1_0/(ae*grid%dp)
     483        1536 :          grid%dtdy  = dt *grid%rdy
     484        1536 :          grid%dtdy5 = dt5*grid%rdy
     485        1536 :          grid%dydt  = (ae*grid%dp) / dt
     486        1536 :          grid%tdy5  = D0_5/grid%dtdy
     487             : 
     488      293376 :          do j = 2, jm-1
     489      291840 :             grid%dx(j)    = grid%dl*ae*grid%cosp(j)
     490      291840 :             grid%rdx(j)   = D1_0 / grid%dx(j)
     491      291840 :             grid%dtdx(j)  = dt /grid% dx(j)
     492      291840 :             grid%dxdt(j)  = grid%dx(j) / dt
     493      291840 :             grid%dtdx2(j) = D0_5*grid%dtdx(j)
     494      291840 :             grid%dtdx4(j) = D0_5*grid%dtdx2(j)
     495      291840 :             grid%dycp(j)  = ae*grid%dp/grid%cosp(j)
     496      293376 :             grid%cy(j)    = grid%rdy * grid%acosp(j)
     497             :          end do
     498             : 
     499      294912 :          do j = 2, jm
     500      293376 :             grid%dxe(j)   = ae*grid%dl*grid%cose(j)
     501      293376 :             grid%rdxe(j)  = D1_0 / grid%dxe(j)
     502      293376 :             grid%dtdxe(j) = dt / grid%dxe(j)
     503      293376 :             grid%dtxe5(j) = D0_5*grid%dtdxe(j)
     504      293376 :             grid%txe5(j)  = D0_5/grid%dtdxe(j)
     505      293376 :             grid%cye(j)   =  D1_0 / (ae*grid%cose(j)*grid%dp)
     506      294912 :             grid%dyce(j)  = ae*grid%dp/grid%cose(j)
     507             :          end do
     508             : 
     509             :          ! C-grid
     510        1536 :          if (grid%ptop>1._r8) then
     511        1536 :             grid%zt_c = abs(umax*dt5) / (grid%dl*ae)
     512             :          else
     513           0 :             grid%zt_c = cos( D10_0 * pi / D180_0 )
     514             :          end if
     515             : 
     516             :          ! D-grid
     517        1536 :          if (grid%ptop>1._r8) then
     518        1536 :             grid%zt_d = abs(umax*dt) / (grid%dl*ae)
     519             :          else
     520           0 :             grid%zt_d = cos( D10_0 * pi / D180_0 )
     521             :          end if
     522             : 
     523        1536 :          if ( ptopin /= grid%ptop) then
     524           0 :             write(iulog,*) 'PTOP as input to cd_core != ptop from T_FVDYCORE_GRID'
     525           0 :             call endrun('PTOP as input to cd_core != ptop from T_FVDYCORE_GRID')
     526             :          end if
     527             : 
     528             :          ! damping code
     529             : 
     530        1536 :          if (div24del2flag == 2) then
     531             : 
     532           0 :             ldiv2 = .true.
     533           0 :             ldiv4 = .false.
     534           0 :             ldel2 = .false.
     535           0 :             if (masterproc)  write(iulog,*) 'Divergence damping: use 2nd order damping'
     536             : 
     537        1536 :          elseif (div24del2flag == 4) then
     538             : 
     539             :             ! fourth order divergence damping and no velocity diffusion
     540        1536 :             ldiv2 = .false.
     541        1536 :             ldiv4 = .true.
     542        1536 :             ldel2 = .false.
     543        1536 :             if (masterproc) write(iulog,*) 'Divergence damping: use 4th order damping'
     544             : 
     545           0 :          elseif (div24del2flag == 42) then
     546             : 
     547             :             ! fourth order divergence damping with velocity diffusion
     548           0 :             ldiv2 = .false.
     549           0 :             ldiv4 = .true.
     550           0 :             ldel2 = .true.
     551           0 :             if (masterproc) write(iulog,*) 'Divergence damping: use 4th order damping'
     552           0 :             if (masterproc) write(iulog,*) 'Velocity del2 damping with coefficient ', del2coef
     553             : 
     554             :          else
     555             : 
     556           0 :             ldiv2 = .true.
     557           0 :             ldiv4 = .false.
     558           0 :             ldel2 = .false.
     559           0 :             if (masterproc) write(iulog,*) 'Inadmissable velocity smoothing option - div24del2flag = ', div24del2flag
     560           0 :             call endrun('Inadmissable value of div24del2flag')
     561             :          end if
     562             : 
     563        5632 :          do k = kfirst, klast
     564             :          
     565        4096 :             if (ldel2) then
     566             : 
     567             :                !***********************************
     568             :                !
     569             :                ! Laplacian on velocity components
     570             :                !
     571             :                !***********************************
     572             : 
     573           0 :                press = D0_5 * ( grid%ak(k)+grid%ak(k+1) + &
     574           0 :                        (grid%bk(k)+grid%bk(k+1))*D1E5 )
     575           0 :                tau = D8_0 * (D1_0+ tanh(D1_0*log(grid%ptop/press)) )
     576             : 
     577             :                ! tau is strength of damping
     578           0 :                if (tau < 0.3_r8) then
     579             : 
     580             :                   ! no del2 damping at lower levels
     581           0 :                   tau = 0.0_r8
     582             :                end if
     583             : 
     584           0 :                do j = js2g0, jn1g1
     585             : 
     586             :                   ! fac must include dt for the momentum equation
     587             :                   ! i.e. diffusion coefficient is fac/dt
     588             :                   !
     589             :                   ! del2 diffusion coefficient in spectral core is 2.5e5             
     590           0 :                   fac =  tau * dt * del2coef
     591             : 
     592             :                   ! all these coefficients are necessary because of the staggering of the
     593             :                   ! wind components
     594           0 :                   grid%cdxde(j,k) = fac/(ae*ae*grid%cose(j)*grid%cose(j)*grid%dl*grid%dl) 
     595           0 :                   grid%cdyde(j,k) = fac/(ae*ae*grid%cose(j)*grid%dp*grid%dp)
     596             :                end do
     597             : 
     598           0 :                do j = js2g0, jn2g1
     599           0 :                   fac =  tau * dt * del2coef
     600           0 :                   grid%cdxdp(j,k) = fac/(ae*ae*grid%cosp(j)*grid%cosp(j)*grid%dl*grid%dl) 
     601           0 :                   grid%cdydp(j,k) = fac/(ae*ae*grid%cosp(j)*grid%dp*grid%dp)
     602             :                end do
     603             :             end if
     604             : 
     605        4096 :             if (ldiv2) then
     606             : 
     607             :                !***********************************************
     608             :                !
     609             :                ! second-order divergence damping
     610             :                !
     611             :                !***********************************************
     612           0 :                press = D0_5 * ( grid%ak(k)+grid%ak(k+1) + &
     613           0 :                        (grid%bk(k)+grid%bk(k+1))*D1E5 )
     614           0 :                tau = D8_0 * (D1_0+ tanh(D1_0*log(grid%ptop/press)) )
     615           0 :                tau = max(D1_0, tau) / (D128_0*abs(dt))
     616             : 
     617           0 :                do j = js2g0, jn1g1
     618             : 
     619             :                   !-----------------------------------------
     620             :                   ! Explanation of divergence damping coeff.
     621             :                   ! ========================================
     622             :                   !
     623             :                   ! Divergence damping is added to the momentum 
     624             :                   ! equations through a term tau*div where
     625             :                   !
     626             :                   !       tau = C*L**2/dt 
     627             :                   !
     628             :                   ! where L is the length scale given by
     629             :                   ! 
     630             :                   !       L**2 = a**2*dl*dp
     631             :                   !
     632             :                   ! and divergence is given by
     633             :                   !
     634             :                   !       div = divx + divy
     635             :                   !
     636             :                   ! where
     637             :                   !
     638             :                   !       divx = (1/(a*cos(p)))*du/dl
     639             :                   !       divy = (1/(a*cos(p)))*(d(cos(theta)*v)/dp))
     640             :                   ! 
     641             :                   ! du and (d(cos(theta*v)/dp)) are computed in sw_core
     642             :                   !
     643             :                   ! The constant terms in divx*tau and divy*tau are
     644             :                   !
     645             :                   ! cdx = (1/(a*cos(p)))* (1/dl) * C * a**2 * dl * dp / dt = C * (a*dp/(cos(p)))/dt
     646             :                   ! cdy = (1/(a*cos(p)))* (1/dp) * C * a**2 * dl * dp / dt = C * (a*dl/(cos(p)))/dt
     647             :                   !
     648             :                   !-----------------------------------------
     649           0 :                   fac = tau * ae / grid%cose(j) !default
     650           0 :                   grid%cdx(j,k) = fac*grid%dp   !default
     651           0 :                   grid%cdy(j,k) = fac*grid%dl   !default
     652             :                end do
     653             :             end if
     654             : 
     655        5632 :             if (ldiv4) then
     656             : 
     657             :                ! 4th-order divergence damping
     658        4096 :                tau4 = 0.01_r8 / (abs(dt)) 
     659             : 
     660             :                !**************************************
     661             :                !
     662             :                ! fourth order divergence damping
     663             :                !
     664             :                !**************************************
     665             : 
     666      790528 :                do j = 1, jm
     667             : 
     668             :                   ! divergence computation coefficients
     669      786432 :                   grid%cdxdiv  (j,k) = D1_0/(grid%cose(j)*grid%dl)
     670      790528 :                   grid%cdydiv  (j,k) = D1_0/(grid%cose(j)*grid%dp)
     671             :                end do
     672             : 
     673       20352 :                do j = js2g0, jn1g1
     674             : 
     675             :                   ! div4 coefficients
     676       16256 :                   fac = grid%dl*grid%cose(j)!*ae
     677       16256 :                   grid%cdx4   (j,k) = D1_0/(fac*fac)
     678       16256 :                   fac = grid%dp*grid%dp*grid%cose(j)!*ae*ae
     679       16256 :                   grid%cdy4   (j,k) = D1_0/fac
     680       16256 :                   fac = grid%cose(j)*grid%dp*grid%dl
     681       20352 :                   grid%cdtau4(j,k)  = -ae*tau4*fac*fac
     682             :                end do
     683             :             end if
     684             : 
     685             :          end do  ! do k = kfirst, klast
     686             : 
     687             :       end if ! if ( abs(grid%dt0-dt) > D0_1 )
     688             : 
     689      129024 :       if ( ipe < 0 .or. ns == 1 ) then          ! starting cd_core
     690       32256 :          call FVstartclock(grid,'---C_DELP_LOOP')
     691             : !$omp parallel do private(i, j, k, wk, wk2)
     692      118272 :          do k = kfirst, klast
     693      344064 :             do j = jfirst, jlast
     694    74661888 :                do i = 1, im
     695    74575872 :                   delpf(i,j,k) = delp(i,j,k)
     696             :                end do
     697             :             end do
     698      172032 :             call pft2d( delpf(1,js2g0,k), grid%sc, &
     699             :                         grid%dc, im, jn2g0-js2g0+1,    &
     700      290304 :                         wk, wk2 )
     701             :          end do
     702       32256 :          call FVstopclock(grid,'---C_DELP_LOOP')
     703             :         
     704             :       end if
     705             : 
     706             : #if defined( SPMD )
     707      129024 :       call FVstartclock(grid,'---PRE_C_CORE_COMM')
     708             :       call mp_recv4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,    &
     709      129024 :                          kfirst, klast, ng_d, ng_s, u )
     710             :       call mp_recv4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,    &
     711      129024 :                          kfirst, klast, ng_s, ng_d, v )
     712             : 
     713             :       call mp_send4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,    &
     714      129024 :                          kfirst, klast, ng_d, ng_d, pt )
     715      129024 :       if ( ipe < 0 .or. ns == 1 ) then          ! starting cd_core
     716             :          call mp_send4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast, &
     717       32256 :                             kfirst, klast, ng_d, ng_d, delpf )
     718             :       endif                         ! end if ipe < 0 check
     719             :       call mp_recv4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,    &
     720      129024 :                          kfirst, klast, ng_d, ng_d, pt )
     721      129024 :       if ( ipe < 0 .or. ns == 1 ) then          ! starting cd_core
     722             :          call mp_recv4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast, &
     723       32256 :                             kfirst, klast, ng_d, ng_d, delpf )
     724             :       endif                          ! end if ipe < 0 check
     725      129024 :       call FVstopclock(grid,'---PRE_C_CORE_COMM')
     726             : #endif
     727             : 
     728             : 
     729             :       ! Get the cell centered winds if needed for the sub-step
     730             : 
     731             : #if ( defined OFFLINE_DYN )
     732             :       if ( ( (ipe < 0) .or. (everytime) ) .and. (.not. met_winds_on_walls()) ) then
     733             :          call get_met_fields( grid, u_cen, v_cen )
     734             :          reset_winds = .true.
     735             :       else
     736             :          reset_winds = .false.
     737             :       endif
     738             : #endif
     739             : 
     740             : 
     741             :       ! Get D-grid V-wind at the poles and interpolate winds to A- and C-grids;
     742             :       ! This calculation was formerly done in subroutine c_sw but is being done here to
     743             :       ! avoid communication in OpenMP loops
     744             : 
     745             : !$omp parallel do private(k, wk, wk2)
     746      473088 :       do  k = kfirst, klast
     747     1032192 :          call d2a2c_winds(grid, u(1,jfirst-ng_d,k),         v(1,jfirst-ng_s,k),     &
     748      344064 :                                 ua(1,jfirst-ng_d,k),       va(1,jfirst-ng_s,k),     &
     749      344064 :                                 uc(1,jfirst-ng_d,k),       vc(1,jfirst-2,k),        &
     750             :                                 u_cen(1,jfirst-ng_d,k), v_cen(1,jfirst-ng_s,k),     &
     751     1720320 :                                 reset_winds, met_rlx(k), am_geom_crrct)
     752             : 
     753             :          ! Optionally filter advecting C-grid winds
     754      473088 :          if (filtcw .gt. 0) then
     755           0 :             call pft2d(uc(1,js2g0,k), grid%sc, grid%dc, im, jn2g0-js2g0+1, wk, wk2 )
     756           0 :             call pft2d(vc(1,js2g0,k), grid%se, grid%de, im, jlast-js2g0+1, wk, wk2 )
     757             :          end if
     758             : 
     759             :       end do
     760             : 
     761             : #if defined( SPMD )
     762             :       ! Fill C-grid advecting winds Halo regions
     763             :       ! vc only needs to be ghosted at jlast+1
     764             :       call mp_send4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,               &
     765      129024 :                          kfirst, klast, ng_d, ng_d, uc )
     766             :       call mp_send4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,               &
     767      129024 :                          kfirst, klast, 2, 2, vc )
     768             :       call mp_recv4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,               &
     769      129024 :                          kfirst, klast, ng_d, ng_d, uc )
     770             :       call mp_recv4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,               &
     771      129024 :                          kfirst, klast, 2, 2, vc )
     772             : #endif
     773             : 
     774      129024 :       call FVstopclock(grid,'---PRE_C_CORE')
     775             : 
     776      129024 :       call FVbarrierclock(grid,'sync_c_core', grid%commyz)
     777      129024 :       call FVstartclock(grid,'---C_CORE')
     778             : 
     779             : !$omp parallel do private(i, j, k, iord, jord)    
     780      473088 :       do  k = kfirst, klast
     781             : 
     782      344064 :          if ( k <= kmtp ) then
     783       43008 :             iord = iord_c_min
     784       43008 :             jord = jord_c_min
     785             :          else
     786      301056 :             iord = iord_c
     787      301056 :             jord = jord_c
     788             :          end if
     789             : 
     790             :          !-----------------------------------------------------------------
     791             :          ! Call the vertical independent part of the dynamics on the C-grid
     792             :          !-----------------------------------------------------------------
     793             : 
     794     1032192 :          call c_sw( grid, u(1,jfirst-ng_d,k),   v(1,jfirst-ng_s,k),   &
     795      688128 :                     pt(1,jfirst-ng_d,k),  delp(1,jfirst,k),           &
     796             :                     ua(1,jfirst-ng_d,k),  va(1,jfirst-ng_s,k),        &
     797      344064 :                     uc(1,jfirst-ng_d,k),  vc(1,jfirst-2,k),           &
     798             :                     ptc(1,jfirst,k),      delpf(1,jfirst-ng_d,k),     &
     799     2193408 :                     ptk(1,jfirst,k), tiny, iord, jord, am_geom_crrct)
     800             :       end do
     801             : 
     802      129024 :       call FVstopclock(grid,'---C_CORE')
     803             : 
     804             : ! MPI note: uc, vc, ptk, and ptc computed within the above k-look from jfirst to jlast
     805             : ! Needed by D-core: uc(jfirst-ng_d:jlast+ng_d), vc(jfirst:jlast+1)
     806             : 
     807      129024 :       call FVbarrierclock(grid,'sync_c_geop', grid%commyz)
     808             : 
     809             :    end if  !  (iam .lt. npes_yz)
     810             : 
     811             : 
     812      129024 :    if (geopk_cdist) then
     813             : 
     814           0 :       if (iam .lt. npes_yz) then
     815             : 
     816             :          ! Stay in yz space and use z communications
     817             : 
     818           0 :          if (grid%geopk16byte) then
     819           0 :             call FVstartclock(grid,'---C_GEOP16')
     820             :             call geopk16(grid, pe, ptk, pkcc, wzc, hs, ptc, &
     821           0 :                          0, cp3vc(1,jfirst,kfirst), cap3vc(1,jfirst,kfirst) )
     822             :          else
     823           0 :             call FVstartclock(grid,'---C_GEOP_D')
     824             :             call geopk_d(grid, pe, ptk, pkcc, wzc, hs, ptc, &
     825           0 :                          0, cp3vc(1,jfirst,kfirst), cap3vc(1,jfirst,kfirst) )
     826             :          end if
     827             : 
     828             :          ! Geopk does not need j ghost zones of pkc and wz
     829             : 
     830           0 :          if (.not.high_alt) then
     831             : !$omp parallel do private(i, j, k)
     832           0 :             do k = kfirst, klast+1
     833           0 :                do j = jfirst, jlast
     834           0 :                   do i = 1, im
     835           0 :                      pxc(i,j,k) = pkcc(i,j,k)
     836             :                   end do
     837             :                end do
     838             :             end do
     839             :          endif
     840             : 
     841             : !$omp parallel do private(i, j, k)
     842           0 :          do k = kfirst, klast+1
     843           0 :             do j = jfirst, jlast
     844           0 :                do i = 1, im
     845           0 :                   wz(i,j,k) = wzc(i,j,k)
     846             :                end do
     847             :             end do
     848             :          end do
     849             : 
     850           0 :          if (grid%geopk16byte) then
     851           0 :             call FVstopclock(grid,'---C_GEOP16')
     852             :          else
     853           0 :             call FVstopclock(grid,'---C_GEOP_D')
     854             :          end if
     855             : 
     856             :       end if  !  (iam .lt. npes_yz)
     857             : 
     858             :    else
     859             : 
     860             :       ! Begin xy geopotential section
     861             : 
     862      129024 :       call FVstartclock(grid,'---C_GEOP')
     863             : 
     864      129024 :       if (grid%twod_decomp == 1) then
     865             : 
     866             :          ! Transpose to xy decomposition
     867             : 
     868             : #if defined( SPMD )
     869      129024 :          call FVstartclock(grid,'YZ_TO_XY_C_GEOP')
     870      129024 :          if (grid%modc_onetwo .eq. 1) then
     871             :             call mp_sendirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
     872             :                              grid%ijk_yz_to_xy%RecvDesc, ptk, delpxy,     &
     873           0 :                              modc=grid%modc_cdcore )
     874             :             call mp_recvirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
     875             :                              grid%ijk_yz_to_xy%RecvDesc, ptk, delpxy,     &
     876           0 :                              modc=grid%modc_cdcore )
     877             :             call mp_sendirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
     878             :                              grid%ijk_yz_to_xy%RecvDesc, ptc, ptxy,       &
     879           0 :                              modc=grid%modc_cdcore )
     880             :             call mp_recvirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
     881             :                              grid%ijk_yz_to_xy%RecvDesc, ptc, ptxy,       &
     882           0 :                              modc=grid%modc_cdcore )
     883             :          else
     884             :             call mp_sendirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
     885             :                              grid%ijk_yz_to_xy%RecvDesc, ptk, delpxy,     &
     886             :                              ptc, ptxy,                                   &
     887      129024 :                              modc=grid%modc_cdcore )
     888             :             call mp_recvirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
     889             :                              grid%ijk_yz_to_xy%RecvDesc, ptk, delpxy,     &
     890             :                              ptc, ptxy,                                   &
     891      129024 :                              modc=grid%modc_cdcore )
     892             :          end if
     893      129024 :          call FVstopclock(grid,'YZ_TO_XY_C_GEOP')
     894             : #endif
     895             : 
     896             :       else
     897             : 
     898             : !$omp parallel do private(i, j, k)
     899           0 :          do k = kfirst, klast
     900           0 :             do j = jfirst, jlast
     901           0 :                do i = 1, im
     902           0 :                   delpxy(i,j,k) = ptk(i,j,k)
     903           0 :                   ptxy(i,j,k) = ptc(i,j,k)
     904             :                end do
     905             :             end do
     906             :          end do
     907             : 
     908             :       end if
     909             : 
     910             :       call geopk(grid, pexy, delpxy, pkxy, wzxy, hsxy, ptxy, &
     911      129024 :                  cp3v, cap3v, nx)
     912             : 
     913      129024 :       if (grid%twod_decomp == 1) then
     914             : 
     915             :          ! Transpose back to yz decomposition.
     916             :          ! pexy is not output quantity on this call.
     917             :          ! pkkp and wzkp are holding arrays, whose specific z-dimensions
     918             :          !    are required by Pilgrim.
     919             :          ! Z edge ghost points (klast+1) are automatically filled in
     920             : 
     921             : #if defined( SPMD )
     922             : 
     923      129024 :          call FVstartclock(grid,'XY_TO_YZ_C_GEOP')
     924      129024 :          if (high_alt) then
     925             :             call mp_sendirr( grid%commxy, grid%pexy_to_pe%SendDesc,       &
     926             :                  grid%pexy_to_pe%RecvDesc, pexy, pec,         &
     927           0 :                  modc=grid%modc_dynrun )
     928             :             call mp_recvirr( grid%commxy, grid%pexy_to_pe%SendDesc,       &
     929             :                  grid%pexy_to_pe%RecvDesc, pexy, pec,         &
     930           0 :                  modc=grid%modc_dynrun )
     931             :             call mp_sendirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
     932             :                  grid%pkxy_to_pkc%RecvDesc, wzxy, wzkp,       &
     933           0 :                  modc=grid%modc_cdcore )
     934             :             call mp_recvirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
     935             :                  grid%pkxy_to_pkc%RecvDesc, wzxy, wzkp,       &
     936           0 :                  modc=grid%modc_cdcore )
     937             :          else
     938      129024 :             if (grid%modc_onetwo .eq. 1) then
     939             :                call mp_sendirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
     940             :                     grid%pkxy_to_pkc%RecvDesc, pkxy, pkkp,       &
     941           0 :                     modc=grid%modc_cdcore )
     942             :                call mp_recvirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
     943             :                     grid%pkxy_to_pkc%RecvDesc, pkxy, pkkp,       &
     944           0 :                     modc=grid%modc_cdcore )
     945             :                call mp_sendirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
     946             :                     grid%pkxy_to_pkc%RecvDesc, wzxy, wzkp,       &
     947           0 :                     modc=grid%modc_cdcore )
     948             :                call mp_recvirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
     949             :                     grid%pkxy_to_pkc%RecvDesc, wzxy, wzkp,       &
     950           0 :                     modc=grid%modc_cdcore )
     951             :             else
     952             :                call mp_sendirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
     953             :                     grid%pkxy_to_pkc%RecvDesc, pkxy, pkkp,       &
     954             :                     wzxy, wzkp,                                  &
     955      129024 :                     modc=grid%modc_cdcore )
     956             :                call mp_recvirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
     957             :                     grid%pkxy_to_pkc%RecvDesc, pkxy, pkkp,       &
     958             :                     wzxy, wzkp,                                  &
     959      129024 :                     modc=grid%modc_cdcore )
     960             :             end if
     961             :          endif
     962      129024 :          call FVstopclock(grid,'XY_TO_YZ_C_GEOP')
     963             : 
     964      129024 :          if (high_alt) then
     965             : !$omp parallel do private(i, j, k)
     966           0 :             do k = kfirst, klast+1
     967           0 :                do j = jfirst, jlast
     968           0 :                   do i = 1, im
     969           0 :                      pxc(i,j,k) = log(pec(i,k,j))
     970             :                   end do
     971             :                end do
     972             :             end do
     973             :          else
     974             : !$omp parallel do private(i, j, k)
     975      602112 :             do k = kfirst, klast+1
     976     2021376 :                do j = jfirst, jlast
     977   410640384 :                   do i = 1, im
     978   410167296 :                      pxc(i,j,k) = pkkp(i,j,k)
     979             :                   end do
     980             :                end do
     981             :             end do
     982             :          endif
     983             : !$omp parallel do private(i, j, k)
     984      602112 :          do k = kfirst, klast+1
     985     2021376 :             do j = jfirst, jlast
     986   410640384 :                do i = 1, im
     987   410167296 :                   wz(i,j,k) = wzkp(i,j,k)
     988             :                end do
     989             :             end do
     990             :          end do
     991             : 
     992             : #endif
     993             : 
     994             :       else
     995             : 
     996             : !$omp parallel do private(i, j, k)
     997           0 :          do k = kfirst, klast+1
     998           0 :             do j = jfirst, jlast
     999           0 :                do i = 1, im
    1000           0 :                  wz(i,j,k) = wzxy(i,j,k)
    1001             :                end do
    1002             :             end do
    1003             :          end do
    1004           0 :          if (high_alt) then
    1005             : !$omp parallel do private(i, j, k)
    1006           0 :             do k = kfirst, klast+1
    1007           0 :                do j = jfirst, jlast
    1008           0 :                   do i = 1, im
    1009           0 :                      pec(i,k,j) = pexy(i,k,j)
    1010           0 :                      pxc(i,j,k) = log(pec(i,k,j))
    1011             :                   end do
    1012             :                end do
    1013             :             end do
    1014             :          else
    1015             : !$omp parallel do private(i, j, k)
    1016           0 :             do k = kfirst, klast+1
    1017           0 :                do j = jfirst, jlast
    1018           0 :                   do i = 1, im
    1019           0 :                      pxc(i,j,k) = pkxy(i,j,k)
    1020             :                   end do
    1021             :                end do
    1022             :             end do
    1023             :          endif
    1024             : 
    1025             :       end if
    1026             : 
    1027      129024 :       call FVstopclock(grid,'---C_GEOP')
    1028             : 
    1029             :       ! End xy geopotential section
    1030             :    end if       ! geopk_cdist
    1031             : 
    1032      129024 :    if (iam .lt. npes_yz) then
    1033             : 
    1034      129024 :       call FVbarrierclock(grid,'sync_pre_d_core', grid%commyz)
    1035      129024 :       call FVstartclock(grid,'---PRE_D_CORE')
    1036             : 
    1037             :       ! Upon exit from geopk, the quantities pe, pkc and wz will have been
    1038             :       ! updated at klast+1
    1039             : 
    1040             : #if defined( SPMD )
    1041             : 
    1042             :       ! pkc & wz need to be ghosted only at jfirst-1
    1043             : 
    1044      129024 :       call FVstartclock(grid,'---PRE_D_CORE_COMM')
    1045      129024 :       dest = iam+1
    1046      129024 :       src  = iam-1
    1047      129024 :       if ( mod(iam+1,npr_y) == 0 ) dest = -1
    1048      129024 :       if ( mod(iam,npr_y) == 0 ) src = -1
    1049             :       call mp_send3d_2( grid%commyz, dest, src, im, jm, km+1,          &
    1050             :                         1, im, jfirst-1, jlast+1, kfirst, klast+1,    &
    1051      129024 :                         1, im, jlast, jlast, kfirst, klast+1, pxc, wz)
    1052      129024 :       call FVstopclock(grid,'---PRE_D_CORE_COMM')
    1053             : #endif
    1054             : 
    1055      129024 :       call FVstartclock(grid,'---C_U_LOOP')
    1056             : 
    1057             : 
    1058             : !$omp parallel do private(i, j, k, p1d, wk, wk2, wk1, wk3)
    1059             :       ! Beware k+1 references directly below (AAM)
    1060      473088 :       do k = kfirst, klast
    1061     1365504 :          do j = js2g0, jn2g0
    1062             : 
    1063     1021440 :             if (am_press_crrct) then
    1064             : 
    1065           0 :                do i = 1, im
    1066             :                   ! AM fix: ensure interior pressure torque vanishes 
    1067           0 :                   wk1(i,j) = pxc(i,j,k  )*max(pxc(i,j,k), tiny)**(xakap - 1.0_r8)
    1068           0 :                   wk3(i,j) = pxc(i,j,k+1)**xakap
    1069           0 :                   p1d(i)   = wk3(i,j) - wk1(i,j)
    1070             :                enddo
    1071             : 
    1072           0 :                uc(1,j,k) = uc(1,j,k) + grid%dtdx2(j) * (                  &
    1073           0 :                         (wz(im,j,k+1)-wz(1,j,k))*(wk3(1,j)-wk1(im,j))  &
    1074             :                       + (wz(im,j,k)-wz(1,j,k+1))*(wk3(im,j)-wk1(1,j))) &
    1075           0 :                                    / (p1d(1)+p1d(im))
    1076           0 :                do i = 2, im
    1077           0 :                   uc(i,j,k) = uc(i,j,k) + grid%dtdx2(j) * (                    &
    1078           0 :                            (wz(i-1,j,k+1)-wz(i,j,k))*(wk3(i,j)-wk1(i-1,j))  &
    1079             :                          + (wz(i-1,j,k)-wz(i,j,k+1))*(wk3(i-1,j)-wk1(i,j))) &
    1080           0 :                                     / (p1d(i)+p1d(i-1))
    1081             :                end do
    1082             : 
    1083             :             else
    1084             : 
    1085   295196160 :                do i = 1, im
    1086   295196160 :                   p1d(i) = pxc(i,j,k+1) - pxc(i,j,k)
    1087             :                enddo
    1088             : 
    1089     2042880 :                uc(1,j,k) = uc(1,j,k) + grid%dtdx2(j) * (                   &
    1090     2042880 :                            (wz(im,j,k+1)-wz(1,j,k))*(pxc(1,j,k+1)-pxc(im,j,k))  &
    1091             :                          + (wz(im,j,k)-wz(1,j,k+1))*(pxc(im,j,k+1)-pxc(1,j,k))) &
    1092     4085760 :                                            / (p1d(1)+p1d(im))
    1093   294174720 :                do i = 2, im
    1094   293153280 :                   uc(i,j,k) = uc(i,j,k) + grid%dtdx2(j) * (                &
    1095   293153280 :                               (wz(i-1,j,k+1)-wz(i,j,k))*(pxc(i,j,k+1)-pxc(i-1,j,k))  &
    1096             :                             + (wz(i-1,j,k)-wz(i,j,k+1))*(pxc(i-1,j,k+1)-pxc(i,j,k))) &
    1097   880481280 :                                                / (p1d(i)+p1d(i-1))
    1098             :                end do
    1099             : 
    1100             :             end if  ! (am_correction)
    1101             : 
    1102   295540224 :             do i = 1, im
    1103   295196160 :                cx_om(i,j,k) = grid%dtdx(j)*uc(i,j,k)
    1104             :             end do
    1105             : 
    1106             :          end do
    1107             :          
    1108      688128 :          call pft2d(uc(1,js2g0,k), grid%sc,       &
    1109             :                     grid%dc, im, jn2g0-js2g0+1,       &
    1110     1032192 :                     wk, wk2 )
    1111             : 
    1112      344064 :          if ( jfirst == 1 ) then   ! Clean up
    1113     1553664 :             do i = 1, im
    1114     1548288 :                uc(i,1,k) = D0_0
    1115     1553664 :                cx_om(i,1,k) = D0_0
    1116             :             end do
    1117             :          end if
    1118             : 
    1119      473088 :          if ( jlast == jm ) then   ! Clean up
    1120     1553664 :             do i = 1, im
    1121     1548288 :                uc(i,jm,k) = D0_0
    1122     1553664 :                cx_om(i,jm,k) = D0_0
    1123             :             end do
    1124             :          end if
    1125             : 
    1126             :       end do
    1127             : 
    1128      129024 :       call FVstopclock(grid,'---C_U_LOOP')
    1129             : 
    1130             : #if defined( SPMD )
    1131      129024 :       call FVstartclock(grid,'---PRE_D_CORE_COMM')
    1132             :       ! pkc and wz need only to be ghosted jfirst-1
    1133             :       call mp_recv3d_2( grid%commyz, src, im, jm, km+1,                &
    1134             :                         1, im, jfirst-1, jlast+1, kfirst, klast+1,    &
    1135      129024 :                         1, im, jfirst-1, jfirst-1, kfirst, klast+1, pxc, wz)
    1136             : 
    1137             :       call mp_send4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,    &
    1138      129024 :                          kfirst, klast, ng_d, ng_d, uc )
    1139      129024 :       call FVstopclock(grid,'---PRE_D_CORE_COMM')
    1140             : #endif
    1141             : 
    1142      129024 :       call FVstartclock(grid,'---C_V_PGRAD')
    1143             : 
    1144      129024 :       if (am_press_crrct) then
    1145             : !$omp parallel do private(i, j, k)
    1146             :          ! AM correction (pressure, advective winds): pxc -> ptr
    1147           0 :          do k = kfirst, klast+1
    1148           0 :             do j = js1g1, jlast
    1149           0 :                do i = 1, im
    1150           0 :                   ptr(i,j,k) = pxc(i,j,k)*max(pxc(i,j,k), tiny)**(xakap - 1.0_r8)
    1151             :                end do
    1152             :             end do
    1153             :          end do
    1154             :       else
    1155             : !$omp parallel do private(i, j, k)
    1156      602112 :          do k = kfirst, klast+1
    1157     2487072 :             do j = js1g1, jlast
    1158   545226528 :                do i = 1, im
    1159   544753440 :                   ptr(i,j,k) = pxc(i,j,k)
    1160             :                end do
    1161             :             end do
    1162             :          end do
    1163             :       end if
    1164             : 
    1165             : !$omp parallel do private(i, j, k, wk, wk1 )
    1166             :       ! Beware k+1 references directly below (AAM)
    1167      473088 :       do k = kfirst, klast
    1168     1714944 :          do j = js1g1, jlast
    1169   396528384 :             do i = 1, im
    1170   396184320 :                wk1(i,j) = ptr(i,j,k+1) - ptr(i,j,k)
    1171             :             end do
    1172             :          end do
    1173             : 
    1174     1370880 :          do j = js2g0, jlast
    1175   297093888 :             do i = 1, im
    1176  1182892032 :                vc(i,j,k) = vc(i,j,k) + grid%dtdy5/(wk1(i,j)+wk1(i,j-1)) *            &
    1177   295723008 :                            ( (wz(i,j-1,k+1)-wz(i,j,k))*(ptr(i,j,k+1)-ptr(i,j-1,k))   &
    1178  1478615040 :                            + (wz(i,j-1,k)-wz(i,j,k+1))*(ptr(i,j-1,k+1)-ptr(i,j,k)) )
    1179             : 
    1180   296749824 :                cy_om(i,j,k) = grid%dtdy*vc(i,j,k)
    1181             :             end do
    1182             :          end do
    1183             : 
    1184      688128 :          call pft2d(vc(1,js2g0,k), grid%se,          &
    1185     1161216 :                     grid%de, im, jlast-js2g0+1, wk, wk1 )
    1186             :       end do
    1187             : 
    1188      129024 :       call FVstopclock(grid,'---C_V_PGRAD')
    1189             : 
    1190             : #if defined( SPMD )
    1191      129024 :       call FVstartclock(grid,'---PRE_D_CORE_COMM')
    1192             :       call mp_recv4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,   &
    1193      129024 :                          kfirst, klast, ng_d, ng_d, uc )
    1194             : 
    1195             :       ! vc only needs to be ghosted at jlast+1
    1196      129024 :       dest = iam-1
    1197      129024 :       src  = iam+1
    1198      129024 :       if ( mod(iam,npr_y) == 0 ) dest = -1
    1199      129024 :       if ( mod(iam+1,npr_y) == 0 ) src = -1
    1200             :       call mp_send3d( grid%commyz, dest, src, im, jm, km,             &
    1201             :                       1, im, jfirst-2, jlast+2, kfirst, klast,       &
    1202      129024 :                       1, im, jfirst, jfirst, kfirst, klast, vc )            
    1203             :       call mp_recv3d( grid%commyz, src, im, jm, km,                   &
    1204             :                       1, im, jfirst-2, jlast+2, kfirst, klast,       &
    1205      129024 :                       1, im, jlast+1, jlast+1, kfirst, klast, vc )
    1206      129024 :       call FVstopclock(grid,'---PRE_D_CORE_COMM')
    1207             : 
    1208             :       call mp_send3d( grid%commyz, dest, src, im, jm, km,                      &
    1209             :                       1, im, jfirst, jlast+1, kfirst, klast,       &
    1210      129024 :                       1, im, jfirst, jfirst, kfirst, klast, cy_om )            
    1211             :       call mp_recv3d( grid%commyz, src, im, jm, km,                             &
    1212             :                       1, im, jfirst, jlast+1, kfirst, klast,       &
    1213      129024 :                       1, im, jlast+1, jlast+1, kfirst, klast, cy_om )
    1214             : #endif
    1215             : 
    1216      129024 :       call FVstopclock(grid,'---PRE_D_CORE')
    1217             : 
    1218      129024 :       call FVbarrierclock(grid,'sync_d_core', grid%commyz)
    1219      129024 :       call FVstartclock(grid,'---D_CORE')
    1220             : 
    1221             : !$omp parallel do private(i, j, k, iord, jord) 
    1222      473088 :       do k = kfirst, klast
    1223             : 
    1224      344064 :          if( k <= kmtp ) then
    1225       43008 :             if( k == 1 ) then
    1226       10752 :                iord = iord_d_min
    1227       10752 :                jord = jord_d_min
    1228             :             else
    1229       32256 :                iord = min(iord_d_low, iord_d)
    1230       32256 :                jord = min(jord_d_low, jord_d)
    1231             :             end if
    1232             :          else
    1233      301056 :             iord = iord_d
    1234      301056 :             jord = jord_d
    1235             :          end if
    1236             : 
    1237             :          !-----------------------------------------------------------------
    1238             :          ! Call the vertical independent part of the dynamics on the D-grid
    1239             :          !-----------------------------------------------------------------
    1240             : 
    1241      344064 :          if (am_correction .or. am_fixer) then 
    1242           0 :             do j = jfirst, jlast
    1243           0 :                do i = 1, im
    1244           0 :                   kelp(i,j,k) = delp(i,j,k) ! un-updated delp on A grid
    1245             :                end do
    1246             :             end do
    1247             :          end if
    1248             : 
    1249             :          ! don't apply correction if order is not 4
    1250      344064 :          sw_am_corr = am_correction .and. iord.eq.iord_d .and. jord.eq.jord_d 
    1251             : 
    1252     1032192 :          call d_sw( grid, u(1,jfirst-ng_d,k),      v(1,jfirst-ng_s,k),  &
    1253      688128 :                     uc(1,jfirst-ng_d,k),    vc(1,jfirst-2,k),           &
    1254      344064 :                     pt(1,jfirst-ng_d,k),   delp(1,jfirst,k),            &
    1255             :                     delpf(1,jfirst-ng_d,k), cx3(1,jfirst-ng_d,k),       &
    1256      344064 :                     cy3(1,jfirst,k),        mfx(1,jfirst,k),            &
    1257             :                     mfy(1,jfirst,k),                                    &
    1258           0 :                     grid%cdx  (js2g0:,k),grid%cdy (js2g0:,k),           &
    1259           0 :                     grid%cdxde (js2g0:,k),grid%cdxdp (js2g0:,k),        & 
    1260           0 :                     grid%cdyde(js2g0:,k) ,grid%cdydp(js2g0:,k),         & 
    1261           0 :                     grid%cdxdiv(:,k),grid%cdydiv(:,k) ,                 & 
    1262           0 :                     grid%cdx4 (js2g0:,k),grid%cdy4(js2g0:,k) ,          & 
    1263           0 :                     grid%cdtau4(js2g0:,k), ldiv2, ldiv4, ldel2,         & 
    1264             :                     iord, jord, tiny, sw_am_corr,                       &
    1265      344064 :                     ddpa(1,jfirst,k), ddu(1,jfirst,k),                  &
    1266     2408448 :                     vf(1,jfirst-2   ,k) )
    1267             : 
    1268      473088 :          if (am_correction .or. am_fixer) then 
    1269           0 :             do j = jfirst, jlast
    1270           0 :                do i = 1, im
    1271           0 :                   help(i,j,k) = delp(i,j,k) ! updated delp on A grid
    1272             :                end do
    1273             :             end do
    1274             :          end if
    1275             : 
    1276             :       end do
    1277             : 
    1278      129024 :       call FVstopclock(grid,'---D_CORE')
    1279             : 
    1280             :       ! AM correction and fixer (main block)
    1281      129024 :       if (am_correction .or. am_fixer) then 
    1282             :  
    1283           0 :          call FVbarrierclock(grid,'sync_dp4corr_1', grid%commyz) 
    1284           0 :          call FVstartclock(grid,'---dp4corr_COMM_1')
    1285             : 
    1286             : #if defined( SPMD )
    1287             :          ! only (jfirst-1) halo point required (iam,jlast) -> (iam+1,jfirst-1)
    1288           0 :          dest = iam+1
    1289           0 :          src  = iam-1
    1290           0 :          if ( mod(iam,  npr_y) == 0 ) src = -1
    1291           0 :          if ( mod(iam+1,npr_y) == 0 ) dest = -1
    1292             :          call mp_send3d( grid%commyz, dest, src, im, jm, km,            &
    1293             :                          1, im, jfirst-1, jlast, kfirst, klast,         &
    1294           0 :                          1, im, jlast   , jlast, kfirst, klast, help )            
    1295             :          call mp_recv3d( grid%commyz, src, im, jm, km,                  &
    1296             :                          1, im, jfirst-1, jlast   , kfirst, klast,      &
    1297           0 :                          1, im, jfirst-1, jfirst-1, kfirst, klast, help )
    1298             :          call mp_send3d( grid%commyz, dest, src, im, jm, km,            &
    1299             :                          1, im, jfirst-1, jlast, kfirst, klast,         &
    1300           0 :                          1, im, jlast   , jlast, kfirst, klast, kelp )            
    1301             :          call mp_recv3d( grid%commyz, src, im, jm, km,                  &
    1302             :                          1, im, jfirst-1, jlast   , kfirst, klast,      &
    1303           0 :                          1, im, jfirst-1, jfirst-1, kfirst, klast, kelp )
    1304             : 
    1305           0 :          if (am_correction) then
    1306             :             call mp_send3d( grid%commyz, dest, src, im, jm, km,         &
    1307             :                             1, im, jfirst-1, jlast, kfirst, klast,      &
    1308           0 :                             1, im, jlast   , jlast, kfirst, klast, ddpa )            
    1309             :             call mp_recv3d( grid%commyz, src, im, jm, km,                  &
    1310             :                             1, im, jfirst-1, jlast   , kfirst, klast,      &
    1311           0 :                             1, im, jfirst-1, jfirst-1, kfirst, klast, ddpa )
    1312             :          end if
    1313             : #endif
    1314           0 :          call FVstopclock(grid,'---dp4corr_COMM_1')
    1315             : 
    1316           0 :          call FVbarrierclock(grid,'sync_dp4corr_2', grid%commyz) 
    1317           0 :          call FVstartclock(grid,'---dp4corr_COMM_2')
    1318             : 
    1319             : !$omp parallel do private(i, j, k) 
    1320           0 :          do k = kfirst, klast
    1321           0 :             do j = js2g0, jlast
    1322           0 :                do i = 1, im
    1323           0 :                   dpn(i,j,k)=(help(i,j,k)*cosp(j)+help(i,j-1,k)*cosp(j-1))/(cosp(j)+cosp(j-1)) ! A->D
    1324           0 :                   dpo(i,j,k)=(kelp(i,j,k)*cosp(j)+kelp(i,j-1,k)*cosp(j-1))/(cosp(j)+cosp(j-1)) ! A->D
    1325             :                end do
    1326             :             end do
    1327           0 :             if (jfirst == 1) then
    1328           0 :                do i = 1, im
    1329           0 :                   dpn(i, 2,k)=(help(i,  2 ,k)*cosp(  2 )+acap*help(i, 1,k)*cose( 2))/cosp(  2 )
    1330           0 :                   dpo(i, 2,k)=(kelp(i,  2 ,k)*cosp(  2 )+acap*kelp(i, 1,k)*cose( 2))/cosp(  2 )
    1331             :                end do
    1332             :             endif
    1333           0 :             if (jlast == jm) then
    1334           0 :                do i = 1, im
    1335           0 :                   dpn(i,jm,k)=(help(i,jm-1,k)*cosp(jm-1)+acap*help(i,jm,k)*cose(jm))/cosp(jm-1)
    1336           0 :                   dpo(i,jm,k)=(kelp(i,jm-1,k)*cosp(jm-1)+acap*kelp(i,jm,k)*cose(jm))/cosp(jm-1)
    1337             :                end do
    1338             :             endif
    1339             :          end do
    1340             : 
    1341           0 :          if (am_correction) then 
    1342             : !$omp parallel do private(i, j, k) 
    1343           0 :             do k = kfirst, klast
    1344           0 :                do j = js2g0, jlast
    1345           0 :                   do i = 1, im
    1346           0 :                      ddpu(i,j,k)=(ddpa(i,j,k)*cosp(j)+ddpa(i,j-1,k)*cosp(j-1))/(cosp(j)+cosp(j-1)) ! A->D
    1347             :                   end do
    1348             :                end do
    1349             :             end do
    1350             : 
    1351             : !$omp parallel do private(i, j, k) 
    1352           0 :             do k = kfirst, klast
    1353           0 :                do j = js2g0, jlast
    1354           0 :                   do i = 1, im
    1355           0 :                      ddu(i,j,k) = ddu(i,j,k)*0.5_r8*(dpo(i,j,k) + dpn(i,j,k))
    1356             :                   end do
    1357             :                end do
    1358             :             end do
    1359             : 
    1360             : !$omp parallel do private(i, j, k) 
    1361           0 :             do k = kfirst, klast
    1362           0 :                do j = js2g0, jlast
    1363           0 :                   ddus(j,k) = ddu(1,j,k) &
    1364           0 :                              + (u(1,j,k) + uc(1,j,k)*0.5_r8)*ddpu(1,j,k) &
    1365           0 :                              + wg_hiord*vf(1,j,k)*(dpn(1,j,k) - dpo(1,j,k))*0.5_r8
    1366           0 :                   dpns(j,k) = dpn(1,j,k)
    1367           0 :                   do i = 2, im
    1368             :                      ddus(j,k) = ddus(j,k) &
    1369           0 :                                 + ddu(i,j,k) &
    1370           0 :                                 + (u(i,j,k)+uc(i,j,k)*0.5_r8)*ddpu(i,j,k) &
    1371           0 :                                 + wg_hiord*vf(i,j,k)*(dpn(i,j,k)-dpo(i,j,k))*0.5_r8
    1372           0 :                      dpns(j,k) = dpns(j,k) + dpn(i,j,k)
    1373             :                   end do
    1374           0 :                   ddus(j,k) = ddus(j,k)/dpns(j,k)  
    1375             :                   ! taper beyond 72S/N
    1376           0 :                   tpr = max(abs(-2.5_r8 + ((j-1)-0.5_r8)*(5._r8/(jm-1))),2._r8)
    1377           0 :                   tpr = cos(pi*tpr)**2
    1378           0 :                   ddus(j,k)=ddus(j,k)*tpr
    1379             :                end do
    1380             :             end do
    1381             : 
    1382             : !$omp parallel do private(i, j, k) 
    1383           0 :             do k = kfirst, klast
    1384           0 :                do j = js4g0, jn3g0
    1385           0 :                   do i = 1, im !+++++++++++++++++++++++++++++++++++++++++++++
    1386           0 :                      uc(i,j,k) = uc(i,j,k) + ddus(j,k) !  APPLY AM CORRECTION
    1387             :                   enddo        !+++++++++++++++++++++++++++++++++++++++++++++
    1388             :                enddo
    1389             :             enddo
    1390             :  
    1391             :          end if ! (am_correction)
    1392             :  
    1393           0 :          if (am_fixer) then
    1394           0 :             if (.not. am_geom_crrct) then 
    1395             : !$omp parallel do private(i, j, k) 
    1396           0 :                do k = kfirst, klast
    1397           0 :                   do j = js2g0, jlast
    1398           0 :                      do i = 1, im
    1399           0 :                         dpn(i,j,k) = (help(i,j,k) + help(i,j-1,k) )/ 2._r8
    1400           0 :                         dpo(i,j,k) = (kelp(i,j,k) + kelp(i,j-1,k) )/ 2._r8
    1401             :                      end do
    1402             :                   end do
    1403           0 :                   if (jfirst == 1) then
    1404           0 :                      do i = 1, im
    1405           0 :                         dpn(i,2,k) = (help(i,2,k) + help(i,1,k) )/ 2._r8
    1406           0 :                         dpo(i,2,k) = (kelp(i,2,k) + kelp(i,1,k) )/ 2._r8
    1407             :                      end do
    1408             :                   endif
    1409           0 :                   if (jlast == jm) then
    1410           0 :                      do i = 1, im
    1411           0 :                         dpn(i,jm,k) = (help(i,jm-1,k) + help(i,jm,k) )/ 2._r8
    1412           0 :                         dpo(i,jm,k) = (kelp(i,jm-1,k) + kelp(i,jm,k) )/ 2._r8
    1413             :                      end do
    1414             :                   endif
    1415             :                end do
    1416             :             endif
    1417             : !$omp parallel do private(i, j, k) 
    1418           0 :             do k = kfirst, klast
    1419           0 :                do j = js2g0, jlast
    1420           0 :                   do i = 1, im
    1421           0 :                      don(j,k) = don(j,k) + (cosp(j) + cosp(j-1))*cose(j) &
    1422           0 :                                 *(uc(i,j,k)*dpn(i,j,k)                   &
    1423           0 :                                 + (u(i,j,k) + cose(j)*oma)*(dpn(i,j,k) - dpo(i,j,k)))
    1424           0 :                      dod(j,k) = dod(j,k) + (cosp(j) + cosp(j-1))*cose(j)**2*dpn(i,j,k)
    1425             :                   end do
    1426             :                end do
    1427             :             end do
    1428             :          end if  ! (am_fixer)
    1429             :  
    1430           0 :          call FVstopclock(grid,'---dp4corr_COMM_2')
    1431             :  
    1432             :       endif ! (am_correction .or. am_fixer)
    1433             :  
    1434      129024 :       call FVbarrierclock(grid,'sync_d_geop', grid%commyz)
    1435             : 
    1436             : #if defined( SPMD )
    1437      129024 :       if (s_trac) then
    1438             :          ! post sends for ct_overlap or tracer decomposition information
    1439           0 :          do ml = 1, mlt
    1440           0 :             call mpiisend(cx3, ncx, mpir8, iremote(ml), cxtag(ml), grid%commnyz, cxreqs(ml))
    1441             :             call mpiisend(cy3, ncy, mpir8, iremote(ml), cytag(ml), grid%commnyz, cyreqs(ml))
    1442             :             call mpiisend(mfx, nmfx, mpir8, iremote(ml), mfxtag(ml), grid%commnyz, mfxreqs(ml))
    1443           0 :             call mpiisend(mfy, nmfy, mpir8, iremote(ml), mfytag(ml), grid%commnyz, mfyreqs(ml))
    1444             :          end do
    1445             :       end if
    1446             : #endif
    1447             : 
    1448             :    end if  !  (iam .lt. npes_yz)
    1449             : 
    1450             : 
    1451      129024 :    if (geopk_ddist) then
    1452             : 
    1453           0 :       if (iam .lt. npes_yz) then
    1454             : 
    1455             :          ! Stay in yz space and use z communications
    1456             : 
    1457           0 :          if (grid%geopk16byte) then
    1458           0 :             call FVstartclock(grid,'---D_GEOP16')
    1459             :             call geopk16(grid, pe, delp, pkcc, wzc, hs, pt, &
    1460           0 :                          ng_d, cp3vc(1,jfirst,kfirst), cap3vc(1,jfirst,kfirst))
    1461             :          else
    1462           0 :             call FVstartclock(grid,'---D_GEOP_D')
    1463             :             call geopk_d(grid, pe, delp, pkcc, wzc, hs, pt, &
    1464           0 :                          ng_d, cp3vc(1,jfirst,kfirst), cap3vc(1,jfirst,kfirst))
    1465             :          end if
    1466             : 
    1467             :          ! Geopk does not need j ghost zones of pkc and wz
    1468             : 
    1469             : !$omp parallel do private(i, j, k)
    1470           0 :          do k = kfirst, klast+1
    1471           0 :             do j = jfirst, jlast
    1472           0 :                do i = 1, im
    1473           0 :                   wz(i,j,k) = wzc(i,j,k)
    1474             :                end do
    1475             :             end do
    1476             :          end do
    1477           0 :          if (.not.high_alt) then
    1478             : !$omp parallel do private(i, j, k)
    1479           0 :             do k = kfirst, klast+1
    1480           0 :                do j = jfirst, jlast
    1481           0 :                   do i = 1, im
    1482           0 :                      pxc(i,j,k) = pkcc(i,j,k)
    1483             :                   end do
    1484             :                end do
    1485             :             end do
    1486             :          endif
    1487             : 
    1488           0 :          if (grid%geopk16byte) then
    1489           0 :             call FVstopclock(grid,'---D_GEOP16')
    1490             :          else
    1491           0 :             call FVstopclock(grid,'---D_GEOP_D')
    1492             :          endif
    1493             : 
    1494             :       end if  !  (iam .lt. npes_yz)
    1495             : 
    1496             :    else
    1497             : 
    1498             :       ! Begin xy geopotential section
    1499             : 
    1500      129024 :       call FVstartclock(grid,'---D_GEOP')
    1501             : 
    1502      129024 :       if (grid%twod_decomp == 1) then
    1503             : 
    1504             :          ! Transpose to xy decomposition
    1505             : 
    1506             : #if defined( SPMD )
    1507             : 
    1508             : !$omp parallel do private(i,j,k)
    1509      473088 :          do k = kfirst, klast
    1510     1505280 :             do j = jfirst, jlast
    1511   298647552 :                do i = 1, im
    1512   298303488 :                   ptc(i,j,k) = pt(i,j,k)
    1513             :                end do
    1514             :             end do
    1515             :          end do
    1516             : 
    1517      129024 :          call FVstartclock(grid,'YZ_TO_XY_D_GEOP')
    1518      129024 :          if (grid%modc_onetwo .eq. 1) then
    1519             :             call mp_sendirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
    1520             :                              grid%ijk_yz_to_xy%RecvDesc, delp, delpxy,    &
    1521           0 :                              modc=grid%modc_cdcore )
    1522             :             call mp_recvirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
    1523             :                              grid%ijk_yz_to_xy%RecvDesc, delp, delpxy,    &
    1524           0 :                              modc=grid%modc_cdcore )
    1525             :             call mp_sendirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
    1526             :                              grid%ijk_yz_to_xy%RecvDesc, ptc, ptxy,       &
    1527           0 :                              modc=grid%modc_cdcore )
    1528             :             call mp_recvirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
    1529             :                              grid%ijk_yz_to_xy%RecvDesc, ptc, ptxy,       &
    1530           0 :                              modc=grid%modc_cdcore )
    1531             :          else
    1532             :             call mp_sendirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
    1533             :                              grid%ijk_yz_to_xy%RecvDesc, delp, delpxy,    &
    1534             :                              ptc, ptxy,                                   &
    1535      129024 :                              modc=grid%modc_cdcore )
    1536             :             call mp_recvirr( grid%commxy, grid%ijk_yz_to_xy%SendDesc,     &
    1537             :                              grid%ijk_yz_to_xy%RecvDesc, delp, delpxy,    &
    1538             :                              ptc, ptxy,                                   &
    1539      129024 :                              modc=grid%modc_cdcore )
    1540             :          end if
    1541      129024 :          call FVstopclock(grid,'YZ_TO_XY_D_GEOP')
    1542             : #endif
    1543             : 
    1544             :       else
    1545             : 
    1546             : !$omp parallel do private(i,j,k)
    1547           0 :          do k = kfirst, klast
    1548           0 :             do j = jfirst, jlast
    1549           0 :                do i = 1, im
    1550           0 :                   delpxy(i,j,k) = delp(i,j,k)
    1551           0 :                   ptxy(i,j,k) = pt(i,j,k)
    1552             :                end do
    1553             :             end do
    1554             :          end do
    1555             :  
    1556             :       end if
    1557             : 
    1558             :       call geopk(grid, pexy, delpxy, pkxy, wzxy, hsxy, ptxy, &
    1559      129024 :                  cp3v, cap3v, nx)
    1560             : 
    1561      129024 :       if (grid%twod_decomp == 1) then
    1562             : 
    1563             :          ! Transpose back to yz decomposition
    1564             :          ! Z edge ghost points (klast+1) are automatically filled in
    1565             :          ! pexy is output quantity on last small timestep
    1566             : 
    1567             : #if defined( SPMD )
    1568             : 
    1569      129024 :          call FVstartclock(grid,'XY_TO_YZ_D_GEOP')
    1570      129024 :          if (high_alt) then
    1571             :             call mp_sendirr( grid%commxy, grid%pexy_to_pe%SendDesc,       &
    1572             :                  grid%pexy_to_pe%RecvDesc, pexy, pec,         &
    1573           0 :                  modc=grid%modc_dynrun )
    1574             :             call mp_recvirr( grid%commxy, grid%pexy_to_pe%SendDesc,       &
    1575             :                  grid%pexy_to_pe%RecvDesc, pexy, pec,         &
    1576           0 :                  modc=grid%modc_dynrun )
    1577             :             call mp_sendirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
    1578             :                  grid%pkxy_to_pkc%RecvDesc, wzxy, wzkp,       &
    1579           0 :                  modc=grid%modc_cdcore )
    1580             :             call mp_recvirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
    1581             :                  grid%pkxy_to_pkc%RecvDesc, wzxy, wzkp,       &
    1582           0 :                  modc=grid%modc_cdcore )
    1583             :          else
    1584      129024 :             if (grid%modc_onetwo .eq. 1) then
    1585             :                call mp_sendirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
    1586             :                     grid%pkxy_to_pkc%RecvDesc, pkxy, pkkp,       &
    1587           0 :                     modc=grid%modc_cdcore )
    1588             :                call mp_recvirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
    1589             :                     grid%pkxy_to_pkc%RecvDesc, pkxy, pkkp,       &
    1590           0 :                     modc=grid%modc_cdcore )
    1591             :                call mp_sendirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
    1592             :                     grid%pkxy_to_pkc%RecvDesc, wzxy, wzkp,       &
    1593           0 :                     modc=grid%modc_cdcore )
    1594             :                call mp_recvirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
    1595             :                     grid%pkxy_to_pkc%RecvDesc, wzxy, wzkp,       &
    1596           0 :                     modc=grid%modc_cdcore )
    1597             :             else
    1598             :                call mp_sendirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
    1599             :                     grid%pkxy_to_pkc%RecvDesc, pkxy, pkkp,       &
    1600             :                     wzxy, wzkp,                                  &
    1601      129024 :                     modc=grid%modc_cdcore )
    1602             :                call mp_recvirr( grid%commxy, grid%pkxy_to_pkc%SendDesc,      &
    1603             :                     grid%pkxy_to_pkc%RecvDesc, pkxy, pkkp,       &
    1604             :                     wzxy, wzkp,                                  &
    1605      129024 :                     modc=grid%modc_cdcore )
    1606             :             end if
    1607             :          endif
    1608      129024 :          call FVstopclock(grid,'XY_TO_YZ_D_GEOP')
    1609             : 
    1610      129024 :          if (high_alt) then
    1611             : !$omp parallel do private(i, j, k)
    1612           0 :             do k = kfirst, klast+1
    1613           0 :                do j = jfirst, jlast
    1614           0 :                   do i = 1, im
    1615           0 :                      pxc(i,j,k) = log(pec(i,k,j))
    1616             :                   end do
    1617             :                end do
    1618             :             end do
    1619             :          else
    1620             : !$omp parallel do private(i, j, k)
    1621      602112 :             do k = kfirst, klast+1
    1622     2021376 :                do j = jfirst, jlast
    1623   410640384 :                   do i = 1, im
    1624   410167296 :                      pxc(i,j,k) = pkkp(i,j,k)
    1625             :                   end do
    1626             :                end do
    1627             :             end do
    1628             :          endif
    1629             : !$omp parallel do private(i, j, k)
    1630      602112 :          do k = kfirst, klast+1
    1631     2021376 :             do j = jfirst, jlast
    1632   410640384 :                do i = 1, im
    1633   410167296 :                   wz(i,j,k) = wzkp(i,j,k)
    1634             :                end do
    1635             :             end do
    1636             :          end do
    1637             : 
    1638             : #endif
    1639             : 
    1640             :       else
    1641             : 
    1642             : !$omp parallel do private(i, j, k)
    1643           0 :          do k = kfirst, klast+1
    1644           0 :             do j = jfirst, jlast
    1645           0 :                do i = 1, im
    1646           0 :                   wz(i,j,k) = wzxy(i,j,k)
    1647             :                end do
    1648             :             end do
    1649             :          end do
    1650           0 :          if (high_alt) then
    1651             : !$omp parallel do private(i, j, k)
    1652           0 :             do k = kfirst, klast+1
    1653           0 :                do j = jfirst, jlast
    1654           0 :                   do i = 1, im
    1655           0 :                      pec(i,k,j) = pexy(i,k,j)
    1656           0 :                      pxc(i,j,k) = log(pec(i,k,j))
    1657             :                   end do
    1658             :                end do
    1659             :             end do
    1660             :          else
    1661             : !$omp parallel do private(i, j, k)
    1662           0 :             do k = kfirst, klast+1
    1663           0 :                do j = jfirst, jlast
    1664           0 :                   do i = 1, im
    1665           0 :                      pxc(i,j,k) = pkxy(i,j,k)
    1666             :                   end do
    1667             :                end do
    1668             :             end do
    1669             :          endif
    1670             : 
    1671             :       end if
    1672             : 
    1673      129024 :       call FVstopclock(grid,'---D_GEOP')
    1674             : 
    1675             :       ! End xy geopotential section
    1676             :    endif       ! geopk_ddist
    1677             : 
    1678      129024 :    if (iam .lt. npes_yz) then
    1679             : 
    1680      129024 :       call FVbarrierclock(grid,'sync_pre_d_pgrad', grid%commyz)
    1681             : 
    1682             :       ! Upon exit from geopk, the quantities pe, pkc and wz will have been
    1683             :       !      updated at klast+1
    1684             : 
    1685      129024 :       call FVstartclock(grid,'---PRE_D_PGRAD')
    1686             : 
    1687             : #if defined( SPMD )
    1688      129024 :       call FVstartclock(grid,'---PRE_D_PGRAD_COMM_1')
    1689             :       ! Exchange boundary regions on north and south for pkc and wz
    1690             :       call mp_send2_ns( grid%commyz, im, jm, km+1, jfirst, jlast,      &
    1691      129024 :                         kfirst, klast+1, 1, pxc, wz)
    1692      129024 :       call FVstopclock(grid,'---PRE_D_PGRAD_COMM_1')
    1693             : #endif
    1694             : 
    1695      129024 :       if ( ipe /= 1 ) then          !  not the last call
    1696             : 
    1697             :          ! Perform some work while sending data on the way
    1698       96768 :          call FVstartclock(grid,'---D_DELP_LOOP')
    1699             : 
    1700             : !$omp parallel do private(i, j, k, wk, wk2)
    1701      354816 :          do k = kfirst, klast
    1702     1032192 :             do j = jfirst, jlast
    1703   223985664 :                do i = 1, im
    1704   223727616 :                   delpf(i,j,k) = delp(i,j,k)
    1705             :                end do
    1706             :             end do
    1707      516096 :             call pft2d( delpf(1,js2g0,k), grid%sc, &
    1708             :                         grid%dc, im, jn2g0-js2g0+1,    &
    1709      870912 :                         wk, wk2 )
    1710             :          end do
    1711       96768 :          call FVstopclock(grid,'---D_DELP_LOOP')
    1712             : 
    1713             :       else   ! Last call
    1714             : 
    1715             : !$omp parallel do private(i, j, k)
    1716      150528 :          do k = kfirst, klast+1
    1717      505344 :             do j = jfirst, jlast
    1718   102660096 :                do i = 1, im
    1719   102541824 :                   pk(i,j,k) = pxc(i,j,k)
    1720             :                end do
    1721             :             end do
    1722             :          end do
    1723             :       end if
    1724             : 
    1725             : #if defined( SPMD )
    1726      129024 :       call FVstartclock(grid,'---PRE_D_PGRAD_COMM_1')
    1727             :       call mp_recv2_ns( grid%commyz, im, jm, km+1, jfirst, jlast,          &
    1728      129024 :                         kfirst, klast+1, 1, pxc, wz)
    1729      129024 :       if ( ipe /= 1 ) then          !  not the last call
    1730             :          call mp_send4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,     &
    1731       96768 :                             kfirst, klast, ng_d, ng_d, delpf )
    1732             :       end if
    1733      129024 :       call FVstopclock(grid,'---PRE_D_PGRAD_COMM_1')
    1734             : #endif
    1735             :  
    1736      129024 :       if (am_press_crrct) then
    1737             :          ! AM correction (pressure, prognostic winds): pkc -> ptr
    1738             : !$omp parallel do private(i, j, k)
    1739           0 :          do k = kfirst, klast+1
    1740           0 :             do j = js1g1, jn1g1                     ! dpt needed NS
    1741           0 :                do i = 1,im                          ! wz, pkc ghosted NS
    1742           0 :                   ptr(i,j,k) = pxc(i,j,k)**xakap
    1743             :                end do
    1744             :             end do
    1745             :          end do
    1746             :       else
    1747             : !$omp parallel do private(i, j, k)
    1748      602112 :          do k = kfirst, klast+1
    1749     2952768 :             do j = js1g1, jn1g1
    1750   679812672 :                do i = 1,im
    1751   679339584 :                   ptr(i,j,k) = pxc(i,j,k)
    1752             :                end do
    1753             :             end do
    1754             :          end do
    1755             :       endif
    1756             : 
    1757      129024 :       if (am_press_crrct) then
    1758             : !$omp parallel do private(i, j, k)
    1759             :          ! Beware k+1 references directly below (AAM)
    1760           0 :          do k = kfirst, klast
    1761           0 :             do j = js1g1, jn1g1
    1762           0 :                do i = 1, im                       ! wz, pkc ghosted NS
    1763           0 :                   dpr(i,j,k) = (wz(i,j,k+1) + wz(i,j,k))*(ptr(i,j,k+1) - ptr(i,j,k))
    1764             :                end do
    1765             :             end do
    1766             :          end do
    1767             :       end if
    1768             : 
    1769             : !$omp parallel do private(i, j, k)
    1770             :       ! Beware k+1 references directly below (AAM)
    1771      473088 :       do k = kfirst, klast
    1772     2182656 :          do j = js1g1, jn1g1                   ! dpt needed NS
    1773   494409216 :             do i = 1, im                       ! wz, pkc ghosted NS
    1774   494065152 :                dpt(i,j,k) = (wz(i,j,k+1) + wz(i,j,k))*(pxc(i,j,k+1) - pxc(i,j,k))
    1775             :             end do
    1776             :          end do
    1777             :       end do
    1778             : 
    1779             :       !  GHOSTING:   wz (input) NS ; pkc (input) N
    1780             :       !  GHOSTING:   dpt (loop 4000) NS ; pkc (loop 4500) N
    1781             : 
    1782      129024 :       call FVstopclock(grid,'---PRE_D_PGRAD')
    1783      129024 :       call FVstartclock(grid,'---D_PGRAD_1')
    1784             : 
    1785      129024 :       if (high_alt) then
    1786           0 :          px4 = 4.0_r8*log(grid%ptop)
    1787             :       else
    1788      129024 :          px4 = 4.0_r8*grid%ptop**cap3v(ifirstxy,jfirstxy,1)
    1789             :       endif
    1790             : 
    1791             : !$omp parallel do private(i, j, k, wk3, wk1)
    1792      602112 :       do k = kfirst, klast+1
    1793             : 
    1794      473088 :          if (k == 1) then
    1795       42840 :             do j = js2g0, jlast
    1796     9284184 :                do i = 1, im
    1797     9241344 :                   wz3(i,j,1) = D0_0
    1798     9273432 :                   wz(i,j,1) = D0_0
    1799             :                end do
    1800             :             end do
    1801       53424 :             do j = js2g0, jn1g1
    1802    12342960 :                do i = 1, im
    1803    12289536 :                   pxc(i,j,1) = px4
    1804    12332208 :                   ptr(i,j,1) = 4.0_r8*grid%ptop
    1805             :               end do
    1806             :             end do
    1807             :             cycle
    1808             :          end if
    1809             : 
    1810      462336 :          if (am_press_crrct) then
    1811           0 :             do j=js2g1,jn2g0                             ! wk3 needed S
    1812           0 :                wk3(1,j) = (wz(1,j,k)+wz(im,j,k)) *       &
    1813           0 :                        (ptr(1,j,k) - ptr(im,j,k))
    1814           0 :                do i=2,im
    1815           0 :                   wk3(i,j) = (wz(i,j,k)+wz(i-1,j,k)) *      & 
    1816           0 :                           (ptr(i,j,k) - ptr(i-1,j,k))
    1817             : 
    1818             :                enddo
    1819             :             enddo
    1820             :          else
    1821     2290008 :             do j=js2g1,jn2g0                             ! wk3 needed S
    1822     5483016 :                wk3(1,j) = (wz(1,j,k)+wz(im,j,k)) *       &
    1823     5483016 :                        (pxc(1,j,k) - pxc(im,j,k))
    1824   526831872 :                do i=2,im
    1825  1049083728 :                   wk3(i,j) = (wz(i,j,k)+wz(i-1,j,k)) *      & 
    1826  1575453264 :                           (pxc(i,j,k) - pxc(i-1,j,k))
    1827             : 
    1828             :                enddo
    1829             :             enddo
    1830             :          end if
    1831             : 
    1832     2290008 :          do j=js2g1,jn2g0                               
    1833   526369536 :             do i=1,im-1
    1834   526369536 :                wk1(i,j) = wk3(i,j) + wk3(i+1,j)        
    1835             :             enddo
    1836     2290008 :             wk1(im,j) = wk3(im,j) + wk3(1,j)      ! wk3 ghosted S
    1837             :          enddo
    1838             : 
    1839      462336 :          if ( jfirst == 1 ) then
    1840     2087736 :             do i=1,im
    1841     2087736 :                wk1(i, 1) = D0_0
    1842             :             enddo
    1843             :          endif
    1844             : 
    1845      462336 :          if ( jlast == jm ) then
    1846     2087736 :             do i=1,im
    1847     2087736 :                wk1(i,jm) = D0_0
    1848             :             enddo
    1849             :          endif
    1850             : 
    1851     1842120 :          do j=js2g0,jlast                          ! wk1 ghosted S
    1852   399219912 :             do i=1,im
    1853   398757576 :                wz3(i,j,k) = wk1(i,j) + wk1(i,j-1)
    1854             :             enddo
    1855             :          enddo
    1856             : 
    1857             : ! N-S walls
    1858             : 
    1859     2297232 :          do j=js2g0,jn1g1                     ! wk1 needed N
    1860     2297232 :             if (am_press_crrct) then
    1861           0 :                do i=1,im
    1862           0 :                   wk1(i,j) = (wz(i,j,k) + wz(i,j-1,k))*(ptr(i,j,k) - ptr(i,j-1,k))
    1863             :                enddo
    1864             :             else
    1865   530284944 :                do i=1,im                         ! wz, pkc ghosted NS
    1866   530284944 :                   wk1(i,j) = (wz(i,j,k) + wz(i,j-1,k))*(pxc(i,j,k) - pxc(i,j-1,k))
    1867             :                enddo
    1868             :             end if
    1869             :          enddo
    1870             : 
    1871     2297232 :          do j=js2g0,jn1g1                    ! wk3 needed N
    1872     1834896 :             wk3(1,j) = wk1(1,j) + wk1(im,j)      ! wk1 ghosted N
    1873   528912384 :             do i=2,im
    1874   528450048 :                wk3(i,j) = wk1(i,j) + wk1(i-1,j)   ! wk1 ghosted N
    1875             :             enddo
    1876             :          enddo
    1877             : 
    1878     1834896 :          do j=js2g0,jn2g0
    1879   397132176 :             do i=1,im
    1880   396669840 :                wz(i,j,k) = wk3(i,j) + wk3(i,j+1)  ! wk3 ghosted N
    1881             :             enddo
    1882             :          enddo
    1883             : 
    1884             :          ! preserve this section to leave pkc inchanged in output of cd_core
    1885     2759568 :          do j=js1g1,jn1g1
    1886     2297232 :             wk1(1,j) = pxc(1,j,k) + pxc(im,j,k)
    1887   662065152 :             do i=2,im
    1888   661602816 :                wk1(i,j) = pxc(i,j,k) + pxc(i-1,j,k)
    1889             :             enddo
    1890             :          enddo
    1891             :  
    1892     2297232 :          do j=js2g0,jn1g1
    1893   530747280 :             do i=1,im
    1894   530284944 :                pxc(i,j,k) = wk1(i,j) + wk1(i,j-1)
    1895             :             enddo
    1896             :          enddo
    1897             : 
    1898      591360 :          if (am_press_crrct) then
    1899             : 
    1900             :             ! use true pressure for wk1, then update it
    1901           0 :             do j = js1g1, jn1g1
    1902           0 :                wk1(1,j) = ptr(1,j,k) + ptr(im,j,k)
    1903           0 :                do i = 2, im
    1904           0 :                   wk1(i,j) = ptr(i,j,k) + ptr(i-1,j,k)
    1905             :                end do
    1906             :             end do
    1907             : 
    1908             :             ! apply cos-weighted avg'ing
    1909           0 :             do j = js2g0, jn1g1
    1910           0 :                do i = 1, im
    1911           0 :                   ptr(i,j,k) = (wk1(i,j)*cosp(j) + wk1(i,j-1)*cosp(j-1))/(cosp(j) + cosp(j-1))/0.5_r8
    1912             :                end do
    1913             :             end do
    1914             : 
    1915             :          end if
    1916             : 
    1917             :       end do  ! k = kfirst, klast+1
    1918             : 
    1919      129024 :       call FVstopclock(grid,'---D_PGRAD_1')
    1920      129024 :       call FVstartclock(grid,'---D_PGRAD_2')
    1921             : 
    1922             : !$omp parallel do private(i, j, k, wk, wk1, wk2, wk3)
    1923      473088 :       do k = kfirst, klast
    1924             : 
    1925      344064 :          if (am_press_crrct) then
    1926           0 :             do j = js1g1, jn1g1
    1927           0 :                wk1(1,j) = dpr(1,j,k) + dpr(im,j,k)
    1928           0 :                do i = 2, im
    1929           0 :                   wk1(i,j) = dpr(i,j,k) + dpr(i-1,j,k)
    1930             :                end do
    1931             :             end do
    1932             :  
    1933           0 :             do j = js2g0, jn1g1
    1934           0 :                do i = 1, im
    1935           0 :                   wk2(i,j) = wk1(i,j) + wk1(i,j-1)
    1936           0 :                   wk(i,j) = ptr(i,j,k+1) - ptr(i,j,k)
    1937             :                end do
    1938             :             end do
    1939             :          else
    1940     2053632 :             do j = js1g1, jn1g1
    1941     1709568 :                wk1(1,j) = dpt(1,j,k) + dpt(im,j,k)
    1942   492699648 :                do i = 2, im
    1943   492355584 :                   wk1(i,j) = dpt(i,j,k) + dpt(i-1,j,k)
    1944             :                end do
    1945             :             end do
    1946             :  
    1947     1709568 :             do j = js2g0, jn1g1
    1948   394974720 :                do i = 1, im
    1949   393265152 :                   wk2(i,j) = wk1(i,j) + wk1(i,j-1)
    1950   394630656 :                   wk(i,j) = pxc(i,j,k+1) - pxc(i,j,k)
    1951             :                end do
    1952             :             end do
    1953             :          end if
    1954             : 
    1955             :          ! Beware k+1 references directly below (AAM)
    1956     1370880 :          do j = js2g0, jlast
    1957   295723008 :             do i = 1, im-1
    1958  1473480960 :                wk3(i,j) = uc(i,j,k)  +  grid%dtdxe(j)/(wk(i,j) + wk(i+1,j))      &
    1959  1769203968 :                           * (wk2(i,j)-wk2(i+1,j)+wz3(i,j,k+1)-wz3(i,j,k))
    1960             :             end do
    1961     4107264 :             wk3(im,j) = uc(im,j,k)  +  grid%dtdxe(j)/(wk(im,j) + wk(1,j))       &
    1962     5478144 :                         * (wk2(im,j)-wk2(1,j)+wz3(im,j,k+1)-wz3(im,j,k))
    1963             :          end do
    1964             : 
    1965      344064 :          if (am_geom_crrct) then
    1966             :             ! apply cos-weighted avg'ing
    1967           0 :             do j = js2g0, jn2g0                  ! Assumes wk2 ghosted on N
    1968           0 :                do i = 1, im
    1969           0 :                   wk1(i,j) = vc(i,j,k) + grid%dtdy/(wk(i,j)*cose(j) + wk(i,j+1)*cose(j+1))*cosp(j) * & 
    1970           0 :                              (wk2(i,j) - wk2(i,j+1) + wz(i,j,k+1) - wz(i,j,k))
    1971             :                end do
    1972             :             end do
    1973             :          else
    1974     1365504 :             do j = js2g0, jn2g0                  ! Assumes wk2 ghosted on N
    1975   295540224 :                do i = 1, im
    1976  1176698880 :                   wk1(i,j) = vc(i,j,k) + grid%dtdy/(wk(i,j) + wk(i,j+1)) * &
    1977  1471895040 :                              (wk2(i,j) - wk2(i,j+1) + wz(i,j,k+1) - wz(i,j,k))
    1978             :                enddo
    1979             :             enddo
    1980             :          endif
    1981             : 
    1982      344064 :          call pft2d( wk3(1,js2g0), grid%se,        &
    1983             :                      grid%de, im, jlast-js2g0+1,       &
    1984      688128 :                      wk, wk2 )
    1985             :          call pft2d( wk1(1,js2g0), grid%sc,        &
    1986             :                      grid%dc, im, jn2g0-js2g0+1,       &
    1987      344064 :                      wk, wk2 )
    1988             : 
    1989     1365504 :          do j = js2g0, jn2g0
    1990   295540224 :             do i = 1, im
    1991   294174720 :                v(i,j,k) = v(i,j,k) + wk1(i,j)
    1992   295196160 :                u(i,j,k) = u(i,j,k) + wk3(i,j)
    1993             :             end do
    1994             :          end do
    1995             :  
    1996      473088 :          if ( jlast == jm ) then
    1997     1553664 :             do i = 1, im
    1998     1553664 :                u(i,jlast,k) = u(i,jlast,k) + wk3(i,jlast)
    1999             :             end do
    2000             :          end if
    2001             : 
    2002             :       end do  ! k = kfirst, klast
    2003      129024 :       call FVstopclock(grid,'---D_PGRAD_2')
    2004             : 
    2005             : #if defined( SPMD )
    2006      129024 :       if ( ipe /= 1 ) then
    2007       96768 :          call FVstartclock(grid,'---PRE_D_PGRAD_COMM_2')
    2008             :          call mp_recv4d_ns( grid%commyz, im, jm, km, 1, jfirst, jlast,   &
    2009       96768 :                             kfirst, klast, ng_d, ng_d, delpf )
    2010       96768 :          call FVstopclock(grid,'---PRE_D_PGRAD_COMM_2')
    2011             :       end if
    2012             : #endif
    2013             : 
    2014             :    end if  !  (iam .lt. npes_yz)
    2015             : 
    2016      258048 : end subroutine cd_core

Generated by: LCOV version 1.14