LCOV - code coverage report
Current view: top level - dynamics/fv - spmd_dyn.F90 (source / functions) Hit Total Coverage
Test: coverage.info Lines: 370 477 77.6 %
Date: 2025-03-14 01:21:06 Functions: 3 5 60.0 %

          Line data    Source code
       1             : module spmd_dyn
       2             : 
       3             : !-----------------------------------------------------------------------
       4             : ! Subroutines to initialize SPMD implementation of FV
       5             : !
       6             : ! !REVISION HISTORY:
       7             : !   00.09.30  Sawyer             Alterations for LR SPMD mode
       8             : !   01.05.09  Mirin              2-D yz decomposition
       9             : !   01.06.27  Mirin              Secondary 2-D xy decomposition
      10             : !   01.12.20  Sawyer             Changed index order of Q3 decomposition
      11             : !   03.05.07  Sawyer             Removed unneeded decompositions
      12             : !   06.03.01  Sawyer             Removed tracertrans-related variables
      13             : !-----------------------------------------------------------------------
      14             : 
      15             : use spmd_utils,         only: iam, masterproc, npes, mpicom
      16             : 
      17             : use pmgrid,             only: plat, plon, plev, spmd_on
      18             : use constituents,       only: pcnst
      19             : 
      20             : use dynamics_vars,      only: t_fvdycore_grid
      21             : use dyn_internal_state, only: get_dyn_state_grid
      22             : 
      23             : use cam_abortutils,     only: endrun
      24             : use cam_logfile,        only: iulog
      25             : 
      26             : implicit none
      27             : private
      28             : save
      29             : 
      30             : public :: &
      31             :    spmd_readnl,       &
      32             :    spmdinit_dyn,      &
      33             :    compute_gsfactors, &
      34             :    spmdbuf
      35             : 
      36             : public :: &
      37             :    local_dp_map, &
      38             :    block_buf_nrecs, &
      39             :    chunk_buf_nrecs, &
      40             :    proc,            &
      41             :    lonrangexy,      &
      42             :    latrangexy
      43             : 
      44             : ! local_dp_map, block_buf_nrecs, chunk_buf_nrecs belong somewhere else.  They are just
      45             : ! stored here without being set or used
      46             : logical :: local_dp_map=.false.    ! flag indicates that mapping between dynamics 
      47             :                                    !  and physics decompositions does not require 
      48             :                                    !  interprocess communication
      49             : integer :: block_buf_nrecs         ! number of local grid points (lon,lat,lev)
      50             :                                    !  in dynamics decomposition (including level 0)
      51             : integer :: chunk_buf_nrecs         ! number of local grid points (lon,lat,lev)
      52             :                                    !  in physics decomposition (including level 0)
      53             : 
      54             : ! used by dyn_grid::get_block_owner_d
      55             : integer :: proc(plat)              ! processor id associated with a given lat.
      56             : 
      57             : ! used by dyn_grid:: get_gcol_block_d, get_block_gcol_cnt_d, get_block_gcol_d
      58             : integer, allocatable :: lonrangexy(:,:)   ! global xy-longitude subdomain index
      59             : integer, allocatable :: latrangexy(:,:)   ! global xy-latitude subdomain index
      60             : 
      61             : integer :: force_2d = 0                 !option to force transpose computation for 1D decomp.
      62             : integer :: geopkblocks = 1              !number of stages to use in Z-serial non-transpose
      63             :                                                ! geopotential method (routine geopk_d)
      64             :                                                ! with 2D decomp.
      65             : logical :: geopkdist  = .false.         !use a distributed method for geopotential calculation 
      66             :                                                ! with 2D decomp.
      67             : logical :: geopk16byte   = .false.      !use Z-parallel distributed method for geopotential 
      68             :                                                ! calculation with 2D decomp.; otherwise, use Z-serial 
      69             :                                                ! pipeline algorithm
      70             : integer :: geopktrans = 0               
      71             : integer :: npr_yz(4)                    !yz and xy decompositions
      72             : integer :: modcomm_transpose = 0        !mod_comm transpose method
      73             :                                                !   0 for temporary contiguous buffers
      74             :                                                !   1 for mpi derived types
      75             : integer :: modcomm_geopk = 0            !mod_comm geopk method
      76             :                                                !   0 for temporary contiguous buffers
      77             :                                                !   1 for mpi derived types
      78             : integer :: modcomm_gatscat = 0          !mod_comm gather/scatter method
      79             :                                                !   0 for temporary contiguous buffers
      80             :                                                !   1 for mpi derived types
      81             : integer :: modc_sw_dynrun = 0           !mod_comm irregular underlying communication method for dyn_run/misc
      82             :                                                !  0 for original mp_sendirr/mp_recvirr
      83             :                                                !  1 for mp_swapirr and point-to-point communications
      84             :                                                !  2 for mp_swapirr and all-to-all communications
      85             : logical :: modc_hs_dynrun = .true.      !mod_comm irreg comm handshaking for dyn_run/misc
      86             : logical :: modc_send_dynrun = .true.    ! true for mod_comm irregular communication blocking send for
      87             :                                                ! dyn_run/misc, false for nonblocking send
      88             : integer :: modc_mxreq_dynrun = -1       !maximum number of nonblocking communication requests to allow
      89             :                                                ! when using mp_swapirr and point-to-point communications for
      90             :                                                ! dyn_run/misc
      91             :                                                ! < 0 implies no limits
      92             : integer :: modc_sw_cdcore = 0           !mod_comm irregular underlying communication method for cd_core/geopk
      93             :                                                !  0 for original mp_sendirr/mp_recvirr
      94             :                                                !  1 for mp_swapirr and point-to-point communications
      95             :                                                !  2 for mp_swapirr and all-to-all communications
      96             : logical :: modc_hs_cdcore = .true.      ! true for mod_comm irregular communication handshaking for cd_core/geopk
      97             : logical :: modc_send_cdcore  = .true.   ! true for geopk_d or mod_comm irregular communication blocking send for
      98             :                                                !  cd_core/geopk, false for nonblocking send
      99             : integer :: modc_mxreq_cdcore = -1       ! maximum number of nonblocking communication requests to allow
     100             :                                                !  when using mp_swapirr and point-to-point communications for
     101             :                                                !  cd_core/geopk
     102             :                                                !  < 0 implies no limits
     103             : integer :: modc_sw_gather = 1           ! mod_comm irregular underlying communication method for gather
     104             :                                                !  0 for original mp_sendirr/mp_recvirr
     105             :                                                !  1 for mp_swapirr and point-to-point communications
     106             :                                                !  2 for mp_swapirr and all-to-all communications
     107             : logical :: modc_hs_gather = .true.      ! true for mod_comm irregular communication handshaking for gather
     108             : logical :: modc_send_gather = .true.    ! true for mod_comm irregular communication blocking send for
     109             :                                                !  gather, false for nonblocking send
     110             : integer :: modc_mxreq_gather = 64       ! maximum number of nonblocking communication requests to allow
     111             :                                                !  when using mp_swapirr and point-to-point communications for
     112             :                                                !  gather
     113             :                                                !  < 0 implies no limits
     114             : integer :: modc_sw_scatter = 0          ! mod_comm irregular underlying communication method for scatter
     115             :                                                !  0 for original mp_sendirr/mp_recvirr
     116             :                                                !  1 for mp_swapirr and point-to-point communications
     117             :                                                !  2 for mp_swapirr and all-to-all communications
     118             : logical :: modc_hs_scatter = .false.    ! true for mod_comm irregular communication handshaking for scatter
     119             : logical :: modc_send_scatter = .true.   ! true for mod_comm irregular communication blocking send for
     120             :                                                !  scatter, false for nonblocking send
     121             : integer :: modc_mxreq_scatter = -1      ! maximum number of nonblocking communication requests to allow
     122             :                                                !  when using mp_swapirr and point-to-point communications for
     123             :                                                !  scatter
     124             :                                                !  < 0 implies no limits
     125             : integer :: modc_sw_tracer = 0           ! mod_comm irregular underlying communication method for multiple tracers
     126             :                                                !  0 for original mp_sendirr/mp_recvirr
     127             :                                                !  1 for mp_swapirr and point-to-point communications
     128             :                                                !  2 for mp_swapirr and all-to-all communications
     129             : logical :: modc_hs_tracer = .true.      ! true for mod_comm irregular communication handshaking for multiple tracers
     130             : logical :: modc_send_tracer = .true.    ! true for mod_comm irregular communication blocking send for
     131             :                                                !  multiple tracers, false for nonblocking send
     132             : integer :: modc_mxreq_tracer = -1       ! maximum number of nonblocking communication requests to allow
     133             :                                                !  when using mp_swapirr and point-to-point communications for
     134             :                                                !  multiple tracers
     135             :                                                !  < 0 implies no limits
     136             : integer :: modc_onetwo = 2              !one or two simultaneous mod_comm irregular communications 
     137             :                                                !   (excl. tracers)
     138             : integer :: modc_tracers = 3             ! max number of tracers for simultaneous mod_comm irregular communications 
     139             :                                                !  0 for original mp_sendirr/mp_recvirr communications
     140             :                                                !  positive for special tracer routines
     141             : 
     142             : integer :: fv_ct_overlap  = 0           ! nonzero for overlap of cd_core and trac2d, 0 otherwise
     143             : integer :: fv_trac_decomp = 1           ! size of tracer domain decomposition for trac2d
     144             : 
     145             : ! these vars used in beglat and endlat determination, and in compute_gsfactors
     146             : integer, allocatable :: nlat_p(:)  ! number of latitudes per subdomain in YZ decomp
     147             : integer, allocatable :: cut(:,:)   ! latitude partition for MPI tasks in YZ decomp
     148             : 
     149             : integer :: npr_y
     150             : integer :: npr_z
     151             : integer :: nprxy_x
     152             : integer :: nprxy_y
     153             : integer :: npes_yz
     154             : integer :: npes_xy
     155             : integer :: myid_y
     156             : integer :: myid_z
     157             : integer :: myidxy_x
     158             : integer :: myidxy_y
     159             : 
     160             : integer :: numlats
     161             : 
     162             : !========================================================================================
     163             : contains
     164             : !========================================================================================
     165             : 
     166        1536 : subroutine spmd_readnl(nlfilename)
     167             : 
     168             :    use namelist_utils,  only: find_group_name
     169             :    use units,           only: getunit, freeunit
     170             :    use spmd_utils,      only: mstrid=>masterprocid, mpi_integer, mpi_logical,&
     171             :                               mpi_success
     172             :    ! args
     173             :    character(len=*), intent(in) :: nlfilename
     174             : 
     175             :    ! Local variables
     176             :    integer :: ierr           ! error code
     177             :    integer :: unitn          ! namelist unit number
     178             : 
     179             :    namelist /spmd_fv_inparm/ npr_yz,             &
     180             :         geopktrans, geopkblocks,                 &
     181             :         force_2d, modcomm_transpose,             &
     182             :         modcomm_geopk, modcomm_gatscat,          &
     183             :         modc_sw_dynrun, modc_hs_dynrun,          &
     184             :         modc_send_dynrun, modc_mxreq_dynrun,     &
     185             :         modc_sw_cdcore, modc_hs_cdcore,          &
     186             :         modc_send_cdcore, modc_mxreq_cdcore,     &
     187             :         modc_sw_gather, modc_hs_gather,          &
     188             :         modc_send_gather, modc_mxreq_gather,     &
     189             :         modc_sw_scatter, modc_hs_scatter,        &
     190             :         modc_send_scatter, modc_mxreq_scatter,   &
     191             :         modc_sw_tracer, modc_hs_tracer,          &
     192             :         modc_send_tracer, modc_mxreq_tracer,     &
     193             :         modc_onetwo, modc_tracers,               &
     194             :         fv_ct_overlap, fv_trac_decomp
     195             :    
     196             :    character(len=*), parameter ::  sub = "spmd_readnl"
     197             : 
     198             :    type(t_fvdycore_grid), pointer :: grid
     199             : 
     200             :    integer :: color, ierror, ntemp
     201             :    integer :: twod_decomp
     202             :    integer :: mpicom_yz         ! communicator for yz decomposition
     203             :    integer :: mpicom_nyz        ! communicator for multiple yz decomposition
     204             :    integer :: mpicom_xy         ! communicator for xy decomposition
     205             :    !----------------------------------------------------------------------
     206             : 
     207             :    ! Default 1D domain decomposition
     208        1536 :    npr_yz(1) = npes
     209        1536 :    npr_yz(2) = 1
     210        1536 :    npr_yz(3) = 1
     211        1536 :    npr_yz(4) = npes
     212             : 
     213        1536 :    if (masterproc) then
     214           2 :       write(iulog,*) sub//': Read in spmd_fv_inparm namelist from: ', trim(nlfilename)
     215           2 :       unitn = getunit()
     216           2 :       open( unitn, file=trim(nlfilename), status='old' )
     217             : 
     218             :       ! Look for spmd_fv_inparm group name in the input file.  If found, leave the
     219             :       ! file positioned at that namelist group.
     220           2 :       call find_group_name(unitn, 'spmd_fv_inparm', status=ierr)
     221           2 :       if (ierr == 0) then  ! found spmd_fv_inparm
     222           2 :          read(unitn, spmd_fv_inparm, iostat=ierr)  ! read the spmd_fv_inparm namelist group
     223           2 :          if (ierr /= 0) then
     224           0 :             call endrun(sub//': ERROR reading namelist spmd_fv_inparm')
     225             :          end if
     226             :       end if
     227           2 :       close( unitn )
     228           2 :       call freeunit( unitn )
     229             :    endif
     230             : 
     231        1536 :    call mpi_bcast(npr_yz, 4, mpi_integer, mstrid, mpicom, ierr)
     232        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: npr_yz")
     233             : 
     234        1536 :    call mpi_bcast(geopktrans, 1, mpi_integer, mstrid, mpicom, ierr)
     235        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: geopktrans")
     236             : 
     237        1536 :    call mpi_bcast(geopkblocks, 1, mpi_integer, mstrid, mpicom, ierr)
     238        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: geopkblocks")
     239             : 
     240        1536 :    call mpi_bcast(force_2d, 1, mpi_integer, mstrid, mpicom, ierr)
     241        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: force_2d")
     242             : 
     243        1536 :    call mpi_bcast(modcomm_transpose, 1, mpi_integer, mstrid, mpicom, ierr)
     244        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modcomm_transpose")
     245             : 
     246        1536 :    call mpi_bcast(modcomm_geopk, 1, mpi_integer, mstrid, mpicom, ierr)
     247        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modcomm_geopk")
     248             : 
     249        1536 :    call mpi_bcast(modcomm_gatscat, 1, mpi_integer, mstrid, mpicom, ierr)
     250        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modcomm_gatscat")
     251             : 
     252        1536 :    call mpi_bcast(modc_sw_dynrun, 1, mpi_integer, mstrid, mpicom, ierr)
     253        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_sw_dynrun")
     254             : 
     255        1536 :    call mpi_bcast(modc_hs_dynrun, 1, mpi_logical, mstrid, mpicom, ierr)
     256        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_hs_dynrun")
     257             : 
     258        1536 :    call mpi_bcast(modc_send_dynrun, 1, mpi_logical, mstrid, mpicom, ierr)
     259        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_send_dynrun")
     260             : 
     261        1536 :    call mpi_bcast(modc_mxreq_dynrun, 1, mpi_integer, mstrid, mpicom, ierr)
     262        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_mxreq_dynrun")
     263             : 
     264        1536 :    call mpi_bcast(modc_sw_cdcore, 1, mpi_integer, mstrid, mpicom, ierr)
     265        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_sw_cdcore")
     266             : 
     267        1536 :    call mpi_bcast(modc_hs_cdcore, 1, mpi_logical, mstrid, mpicom, ierr)
     268        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_hs_cdcore")
     269             : 
     270        1536 :    call mpi_bcast(modc_send_cdcore, 1, mpi_logical, mstrid, mpicom, ierr)
     271        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_send_cdcore")
     272             : 
     273        1536 :    call mpi_bcast(modc_mxreq_cdcore, 1, mpi_integer, mstrid, mpicom, ierr)
     274        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_mxreq_cdcore")
     275             : 
     276        1536 :    call mpi_bcast(modc_sw_gather, 1, mpi_integer, mstrid, mpicom, ierr)
     277        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_sw_gather")
     278             : 
     279        1536 :    call mpi_bcast(modc_hs_gather, 1, mpi_logical, mstrid, mpicom, ierr)
     280        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_hs_gather")
     281             : 
     282        1536 :    call mpi_bcast(modc_send_gather, 1, mpi_logical, mstrid, mpicom, ierr)
     283        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_send_gather")
     284             : 
     285        1536 :    call mpi_bcast(modc_mxreq_gather, 1, mpi_integer, mstrid, mpicom, ierr)
     286        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_mxreq_gather")
     287             : 
     288        1536 :    call mpi_bcast(modc_sw_scatter, 1, mpi_integer, mstrid, mpicom, ierr)
     289        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_sw_scatter")
     290             : 
     291        1536 :    call mpi_bcast(modc_hs_scatter, 1, mpi_logical, mstrid, mpicom, ierr)
     292        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_hs_scatter")
     293             : 
     294        1536 :    call mpi_bcast(modc_send_scatter, 1, mpi_logical, mstrid, mpicom, ierr)
     295        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_send_scatter")
     296             : 
     297        1536 :    call mpi_bcast(modc_mxreq_scatter, 1, mpi_integer, mstrid, mpicom, ierr)
     298        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_mxreq_scatter")
     299             : 
     300        1536 :    call mpi_bcast(modc_sw_tracer, 1, mpi_integer, mstrid, mpicom, ierr)
     301        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_sw_tracer")
     302             : 
     303        1536 :    call mpi_bcast(modc_hs_tracer, 1, mpi_logical, mstrid, mpicom, ierr)
     304        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_hs_tracer")
     305             : 
     306        1536 :    call mpi_bcast(modc_send_tracer, 1, mpi_logical, mstrid, mpicom, ierr)
     307        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_send_tracer")
     308             : 
     309        1536 :    call mpi_bcast(modc_mxreq_tracer, 1, mpi_integer, mstrid, mpicom, ierr)
     310        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_mxreq_tracer")
     311             : 
     312        1536 :    call mpi_bcast(modc_onetwo, 1, mpi_integer, mstrid, mpicom, ierr)
     313        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_onetwo")
     314             : 
     315        1536 :    call mpi_bcast(modc_tracers, 1, mpi_integer, mstrid, mpicom, ierr)
     316        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: modc_tracers")
     317             : 
     318        1536 :    call mpi_bcast(fv_ct_overlap, 1, mpi_integer, mstrid, mpicom, ierr)
     319        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: fv_ct_overlap")
     320             : 
     321        1536 :    call mpi_bcast(fv_trac_decomp, 1, mpi_integer, mstrid, mpicom, ierr)
     322        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: fv_trac_decomp")
     323             : 
     324             :    ! Put namelist input into the grid object
     325        1536 :    grid => get_dyn_state_grid()
     326             : 
     327        1536 :    npr_y   = npr_yz(1)
     328        1536 :    npr_z   = npr_yz(2)
     329        1536 :    nprxy_x = npr_yz(3)
     330        1536 :    nprxy_y = npr_yz(4)
     331        1536 :    npes_yz = npr_y*npr_z
     332        1536 :    npes_xy = nprxy_x*nprxy_y
     333        1536 :    if (npes_yz < 1) then
     334           0 :       call endrun(sub//': ERROR: yz domain decomposition must have at least 1 subdomain')
     335             :    endif
     336        1536 :    if (npes_yz > npes) then
     337           0 :       call endrun(sub//': ERROR: incorrect yz domain decomposition')
     338             :    endif
     339        1536 :    if (npes_xy > npes) then
     340           0 :       call endrun(sub//': ERROR: incorrect xy domain decomposition')
     341             :    endif
     342             : 
     343        1536 :    grid%npr_y    = npr_y
     344        1536 :    grid%npr_z    = npr_z
     345        1536 :    grid%nprxy_x  = nprxy_x
     346        1536 :    grid%nprxy_y  = nprxy_y
     347        1536 :    grid%npes_xy  = npes_xy
     348        1536 :    grid%npes_yz  = npes_yz
     349             : 
     350        1536 :    grid%ct_overlap  = fv_ct_overlap
     351        1536 :    grid%trac_decomp = fv_trac_decomp
     352             : 
     353        1536 :    if (fv_ct_overlap .ne. 0 .and. npes .lt. 2*npes_yz) then
     354           0 :       call endrun(sub//': ERROR: Not enough processes to overlap cd_core and trac2d')
     355             :    end if
     356             : 
     357        1536 :    if (fv_trac_decomp .le. 0) then
     358           0 :       call endrun(sub//': ERROR: fv_trac_decomp improperly initialized')
     359             :    end if
     360             : 
     361        1536 :    if (npes .lt. fv_trac_decomp*npes_yz) then
     362           0 :       call endrun(sub//': ERROR: Not enough processes to decompose tracers')
     363             :    endif
     364             : 
     365        1536 :    if (fv_ct_overlap .gt. 0 .and. fv_trac_decomp .gt. 1) then
     366           0 :       call endrun(sub//': ERROR: Cannot simultaneously overlap cd_core/trac2d and decompose tracers')
     367             :    endif
     368             : 
     369             :    ! Tracer decomposition limits
     370        6144 :    allocate(grid%ktloa(fv_trac_decomp), grid%kthia(fv_trac_decomp))
     371        3072 :    grid%ktloa(:) = 1
     372        3072 :    grid%kthia(:) = pcnst
     373        1536 :    grid%ktlo = 1
     374        1536 :    grid%kthi = pcnst
     375             : 
     376        1536 :    grid%commdyn  = mpicom
     377        1536 :    grid%iam      = iam
     378             : 
     379             : #ifdef SPMD
     380        1536 :    myid_z   = iam/npr_y
     381        1536 :    myid_y   = iam - myid_z*npr_y
     382        1536 :    color = iam/npes_yz
     383        1536 :    call mpi_comm_split(mpicom, color, iam, mpicom_yz, ierror)
     384        1536 :    if (ierror /= mpi_success) then
     385           0 :       write(iulog,*) sub//': ERROR: mpi_comm_split_yz failed with IER=', ierror
     386           0 :       call endrun(sub//': ERROR: mpi_comm_split_yz failed')
     387             :    end if
     388        1536 :    call mpi_comm_size(mpicom_yz, ntemp, ierror)
     389        1536 :    if (iam .lt. npes_yz .and. ntemp .ne. npes_yz) then
     390           0 :       write(iulog,*) sub//': ERROR: mpicom_yz has incorrect size of ', ntemp
     391           0 :       call endrun(sub//': ERROR: mpicom_yz has incorrect size')
     392             :    end if
     393             : 
     394        1536 :    if (fv_ct_overlap .gt. 0 .or. fv_trac_decomp .gt. 1) then
     395             :       ! These are mutually exclusive options
     396           0 :       if ((fv_ct_overlap .gt. 0 .and. iam .lt. 2*npes_yz) .or.         &
     397             :            (fv_trac_decomp .gt. 1 .and. iam .lt. fv_trac_decomp*npes_yz)) then
     398           0 :          color = 1
     399             :       else
     400           0 :          color = 0
     401             :       endif
     402           0 :       call mpi_comm_split(mpicom, color, iam, mpicom_nyz, ierror)
     403           0 :       if (ierror /= mpi_success) then
     404           0 :          write (iulog,*) sub//': ERROR: mpi_comm_split_nyz failed with IER=', ierror
     405           0 :          call endrun(sub//': ERROR: mpi_comm_split_nyz failed')
     406             :       endif
     407             :    else
     408        1536 :       mpicom_nyz = mpicom_yz
     409             :    endif
     410             : 
     411        1536 :    myidxy_y = iam/nprxy_x
     412        1536 :    myidxy_x = iam - myidxy_y*nprxy_x
     413        1536 :    color = iam/npes_xy
     414        1536 :    call mpi_comm_split(mpicom, color, iam, mpicom_xy, ierror)
     415        1536 :    if (ierror /= mpi_success) then
     416           0 :       write(iulog,*) sub//': ERROR: mpi_comm_split_xy failed with IER=', ierror
     417           0 :       call endrun(sub//': ERROR: mpi_comm_split_xy failed')
     418             :    endif
     419        1536 :    call mpi_comm_size(mpicom_xy, ntemp, ierror)
     420        1536 :    if (iam .lt. npes_xy .and. ntemp .ne. npes_xy) then
     421           0 :       write(iulog,*) sub//': ERROR: mpicom_xy has incorrect size of ', ntemp
     422           0 :       call endrun(sub//': ERROR: mpicom_xy has incorrect size')
     423             :    endif
     424             : 
     425        1536 :    grid%myid_z = myid_z
     426        1536 :    grid%myid_y = myid_y
     427        1536 :    grid%myidxy_y = myidxy_y
     428        1536 :    grid%myidxy_x = myidxy_x
     429             : 
     430        1536 :    grid%commyz   = mpicom_yz
     431        1536 :    grid%commnyz  = mpicom_nyz
     432        1536 :    grid%commxy   = mpicom_xy
     433             : #endif
     434             : 
     435        1536 :    twod_decomp = 0
     436        1536 :    if (npr_z > 1 .or. nprxy_x > 1 .or. force_2d .eq. 1) then
     437        1536 :       twod_decomp = 1
     438             :    endif
     439        1536 :    grid%twod_decomp = twod_decomp
     440             : 
     441        1536 :    if (geopktrans .ne. 0) geopkdist   = .true.
     442        1536 :    if (geopktrans .eq. 1) geopk16byte = .true.
     443             : #ifdef NO_CRAY_POINTERS
     444             :    if (geopk16byte) then
     445             :       call endrun (sub//': ERROR: cannot use geopk16 unless compiler supports cray pointers')
     446             :    end if
     447             : #endif
     448             : 
     449        1536 :    geopkblocks = max(1,geopkblocks)
     450             : 
     451        1536 :    grid%geopkdist   = geopkdist
     452        1536 :    grid%geopk16byte = geopk16byte
     453        1536 :    grid%geopkblocks = geopkblocks
     454        1536 :    grid%mod_method  = modcomm_transpose
     455        1536 :    grid%mod_geopk   = modcomm_geopk
     456        1536 :    grid%mod_gatscat = modcomm_gatscat
     457             : 
     458        1536 :    if (modc_sw_dynrun > 0 .and. modcomm_transpose > 0) then
     459           0 :       call endrun(sub//': ERROR: modc_sw_dynrun and modcomm_transpose are inconsistent')
     460             :    endif
     461             : 
     462        1536 :    grid%modc_dynrun(1)   = modc_sw_dynrun
     463        4608 :    grid%modc_dynrun(2:3) = 0
     464        1536 :    if (modc_hs_dynrun)   grid%modc_dynrun(2) = 1
     465        1536 :    if (modc_send_dynrun) grid%modc_dynrun(3) = 1
     466        1536 :    grid%modc_dynrun(4)   = modc_mxreq_dynrun
     467             : 
     468        1536 :    if (modc_sw_cdcore > 0 .and. (modcomm_transpose > 0  .or. &
     469             :                                 (modcomm_geopk > 0 .and. geopk16byte))) then
     470           0 :       call endrun(sub//': ERROR: modc_sw_cdcore and modcomm_transpose are inconsistent')
     471             :    endif
     472             : 
     473        1536 :    grid%modc_cdcore(1)   = modc_sw_cdcore
     474        4608 :    grid%modc_cdcore(2:3) = 0
     475        1536 :    if (modc_hs_cdcore)   grid%modc_cdcore(2) = 1
     476        1536 :    if (modc_send_cdcore) grid%modc_cdcore(3) = 1
     477        1536 :    grid%modc_cdcore(4)   = modc_mxreq_cdcore
     478             : 
     479        1536 :    if (modc_sw_gather > 0 .and. modcomm_gatscat > 0) then
     480           0 :       call endrun(sub//': ERROR: modc_sw_gather and modcomm_gatscat are inconsistent')
     481             :    endif
     482             : 
     483        1536 :    grid%modc_gather(1)   = modc_sw_gather
     484        4608 :    grid%modc_gather(2:3) = 0
     485        1536 :    if (modc_hs_gather)   grid%modc_gather(2) = 1
     486        1536 :    if (modc_send_gather) grid%modc_gather(3) = 1
     487        1536 :    grid%modc_gather(4)   = modc_mxreq_gather
     488             : 
     489        1536 :    if (modc_sw_scatter > 0 .and. modcomm_gatscat > 0) then
     490           0 :       call endrun(sub//': ERROR: modc_sw_scatter and modcomm_gatscat are inconsistent')
     491             :    endif
     492             : 
     493        1536 :    grid%modc_scatter(1)   = modc_sw_scatter
     494        4608 :    grid%modc_scatter(2:3) = 0
     495        1536 :    if (modc_hs_scatter)   grid%modc_scatter(2) = 1
     496        1536 :    if (modc_send_scatter) grid%modc_scatter(3) = 1
     497        1536 :    grid%modc_scatter(4)    = modc_mxreq_scatter
     498             : 
     499        1536 :    if (modc_sw_tracer > 0 .and. modcomm_transpose > 0) then
     500           0 :       call endrun(sub//': ERROR: modc_sw_tracer and modcomm_transpose are inconsistent')
     501             :    endif
     502             : 
     503        1536 :    grid%modc_tracer(1)   = modc_sw_tracer
     504        4608 :    grid%modc_tracer(2:3) = 0
     505        1536 :    if (modc_hs_tracer)   grid%modc_tracer(2) = 1
     506        1536 :    if (modc_send_tracer) grid%modc_tracer(3) = 1
     507        1536 :    grid%modc_tracer(4)   = modc_mxreq_tracer
     508             : 
     509        1536 :    if (modc_tracers < 0) then
     510           0 :       call endrun(sub//': ERROR: inadmissable value of modc_tracers')
     511             :    endif
     512             : 
     513        1536 :    grid%modc_onetwo       = modc_onetwo
     514        1536 :    grid%modc_tracers      = modc_tracers
     515             : 
     516        1536 :    if (masterproc) then
     517           2 :       write(iulog,*)'FV grid decomposition:'
     518           2 :       write(iulog,*)'  npr_y = ', npr_y, '  npr_z = ', npr_z
     519           2 :       write(iulog,*)'  nprxy_x = ', nprxy_x, '  nprxy_y = ', nprxy_y
     520           2 :       write(iulog,*)'  npes = ', npes, '  npes_yz= ', npes_yz, '  npes_xy = ', npes_xy
     521             : 
     522           2 :       if (npes > 1) then
     523             : 
     524           2 :          if (fv_ct_overlap .ne. 0) then
     525           0 :             write(iulog,*)'  Overlapping tracer and dynamics subcycles'
     526             :          endif
     527             : 
     528           2 :          write(iulog,*)'  Decomposing tracers into ', fv_trac_decomp, ' groups'
     529             : 
     530           2 :          if (twod_decomp == 0) then
     531           0 :             write(iulog,*)'  decomposition is effectively 1D - skipping transposes'
     532             :          else
     533           2 :             write(iulog,*)'  using multi-2d decomposition methodology'
     534             :          end if
     535             : 
     536           2 :          write(iulog,*)'  non-transpose geopk communication method = ', geopkdist
     537           2 :          write(iulog,*)'  Z-parallel non-transpose geopk communication method = ', geopk16byte
     538             : 
     539           2 :          if (geopkdist .and. (.not. geopk16byte)) then
     540           0 :             write(iulog,*)'  number of stages in Z-serial non-transpose geopk method = ', geopkblocks
     541             :          endif
     542             : 
     543           2 :          write(iulog,*)'  modcomm transpose method = ', modcomm_transpose
     544           2 :          write(iulog,*)'  modcomm geopk method = ', modcomm_geopk
     545           2 :          write(iulog,*)'  modcomm gatscat method = ', modcomm_gatscat
     546             : 
     547           2 :          write(iulog,*)'  modc_sw_dynrun = ', modc_sw_dynrun
     548           2 :          write(iulog,*)'  modc_hs_dynrun = ', modc_hs_dynrun
     549           2 :          write(iulog,*)'  modc_send_dynrun = ', modc_send_dynrun
     550           2 :          write(iulog,*)'  modc_mxreq_dynrun = ', modc_mxreq_dynrun
     551             : 
     552           2 :          write(iulog,*)'  modc_sw_cdcore = ', modc_sw_cdcore
     553           2 :          write(iulog,*)'  modc_hs_cdcore = ', modc_hs_cdcore
     554           2 :          write(iulog,*)'  modc_send_cdcore = ', modc_send_cdcore
     555           2 :          write(iulog,*)'  modc_mxreq_cdcore = ', modc_mxreq_cdcore
     556             : 
     557           2 :          write(iulog,*)'  modc_sw_gather = ', modc_sw_gather
     558           2 :          write(iulog,*)'  modc_hs_gather = ', modc_hs_gather
     559           2 :          write(iulog,*)'  modc_send_gather = ', modc_send_gather
     560           2 :          write(iulog,*)'  modc_mxreq_gather = ', modc_mxreq_gather
     561             : 
     562           2 :          write(iulog,*)'  modc_sw_scatter = ', modc_sw_scatter
     563           2 :          write(iulog,*)'  modc_hs_scatter = ', modc_hs_scatter
     564           2 :          write(iulog,*)'  modc_send_scatter = ', modc_send_scatter
     565           2 :          write(iulog,*)'  modc_mxreq_scatter = ', modc_mxreq_scatter
     566             : 
     567           2 :          write(iulog,*)'  modc_sw_tracer = ', modc_sw_tracer
     568           2 :          write(iulog,*)'  modc_hs_tracer = ', modc_hs_tracer
     569           2 :          write(iulog,*)'  modc_send_tracer = ', modc_send_tracer
     570           2 :          write(iulog,*)'  modc_mxreq_tracer = ', modc_mxreq_tracer
     571             : 
     572           2 :          write(iulog,*)'  modc_onetwo = ', modc_onetwo
     573           2 :          write(iulog,*)'  modc_tracers = ', modc_tracers
     574             : 
     575             :       end if
     576             : 
     577             :    end if
     578             : 
     579        1536 : end subroutine spmd_readnl
     580             : 
     581             : !========================================================================================
     582             : 
     583        1536 : subroutine spmdinit_dyn(jord, grid)
     584             : 
     585             :    !-----------------------------------------------------------------------
     586             :    ! Initialize grid decomposition.
     587             :    !
     588             :    ! !REVISION HISTORY:
     589             :    !   00.09.30  Sawyer             Added LR-specific initialization
     590             :    !   01.06.27  Mirin              Secondary 2-D xy decomposition
     591             :    !   01.10.16  Sawyer             Added Y at each Z decompositions
     592             :    !   03.07.22  Sawyer             Removed decomps used by highp2
     593             :    !-----------------------------------------------------------------------
     594             : 
     595             : #ifdef SPMD
     596             :    use parutilitiesmodule, only: parinit, gid, parcollective, maxop
     597             :    use dynamics_vars,      only: spmd_vars_init
     598             : #endif
     599             : 
     600             :    ! arguments
     601             :    integer,            intent(in) :: jord
     602             :    type(t_fvdycore_grid), pointer :: grid
     603             : 
     604             :    ! local variables:
     605             : 
     606             :    integer, parameter :: numbnd = 0        ! no.of latitudes passed N and S of forecast lat
     607             : 
     608             :    integer :: beglat
     609             :    integer :: endlat
     610             :    integer :: beglev
     611             :    integer :: endlev
     612             : 
     613             :    integer :: mod_maxirr
     614             : 
     615             :    integer :: procid    ! processor id
     616             :    integer :: procids   ! processor id
     617             :    integer :: procidn   ! processor id
     618             : 
     619             :    integer :: j, k
     620             :    integer :: lat
     621             :    integer :: vert
     622             :    integer :: lonn
     623             :    integer :: workleft  ! amount of work still to be parcelled out
     624             : 
     625             :    integer :: isum      ! running total of work parcelled out
     626             :    integer :: smostlat  ! southern-most latitude index
     627             :    integer :: nmostlat  ! northern-most latitude index
     628             : 
     629        1536 :    integer, allocatable :: ydist(:) ! number of lats per subdomain in YZ decomp
     630        1536 :    integer, allocatable :: zdist(:) ! number of levels per subdomain in YZ decomp
     631             : 
     632             :    integer :: beglonxy, endlonxy
     633             :    integer :: beglatxy, endlatxy
     634        1536 :    integer, allocatable :: xdistxy(:) ! number of xy-longs per subdomain
     635        1536 :    integer, allocatable :: ydistxy(:) ! number of xy-lats per subdomain
     636             : 
     637        1536 :    integer, allocatable :: tmp(:)
     638        1536 :    integer, allocatable :: jmyz(:), kmyz(:) ! used for nonblocking receive
     639        1536 :    integer, allocatable :: imxy(:), jmxy(:) ! used for nonblocking receive
     640             : 
     641             :    character(len=*), parameter ::  sub = "spmdinit_dyn"
     642             :    !---------------------------------------------------------------------------
     643             : 
     644             :    ! Default YZ decomposition is 1D
     645        1536 :    beglev = 1
     646        1536 :    endlev = plev
     647             : 
     648        1536 :    mod_maxirr = max(modc_onetwo, modc_tracers)
     649             : 
     650             : #ifdef SPMD
     651             : 
     652        1536 :    spmd_on = 1
     653             : 
     654             :    ! Initialize PILGRIM library
     655             :    call parinit(comm=mpicom, &
     656             :                 npryzxy = (/ npr_y, npr_z, nprxy_x, nprxy_y /), &
     657             :                 mod_method  = modcomm_transpose, &
     658             :                 mod_geopk   = modcomm_geopk,     &
     659             :                 mod_maxirr  = mod_maxirr,    &
     660        7680 :                 mod_gatscat = modcomm_gatscat )
     661             : 
     662             :    ! Compute Y partition for YZ decomposition
     663             : 
     664        4608 :    allocate(ydist  (npr_y))
     665        4608 :    allocate(nlat_p (0:npes-1))
     666        4608 :    allocate(cut    (2,0:npes-1))
     667             : 
     668       99840 :    ydist(:) = 0
     669     1181184 :    nlat_p(:) = 0
     670     1181184 :    cut(1,:) = -1
     671     1181184 :    cut(2,:) = -2
     672             : 
     673        1536 :    lat = plat / npr_y
     674        1536 :    workleft = plat - lat * npr_y
     675        1536 :    if (lat < 3) then
     676           0 :       call endrun(sub//': ERROR: less than 3 latitudes per subdomain')
     677             :    end if
     678             : 
     679             :    ! Be careful:  ydist is 1-based.  CAMs arrays, e.g., cut,  are 0-based
     680             : 
     681       99840 :    do procid = 1, npr_y
     682       99840 :       ydist(procid) = lat
     683             :    end do
     684             : 
     685        1536 :    if ( workleft /= 0 ) then
     686           0 :       procids = (npr_y+1) / 2
     687           0 :       procidn = procids + 1
     688           0 :       do while ( workleft /= 0 )
     689           0 :          if ( procids == 1 ) procids = npr_y
     690           0 :          ydist(procids) = ydist(procids) + 1
     691           0 :          workleft = workleft - 1  
     692           0 :          if ( workleft /= 0 ) then
     693           0 :             ydist(procidn) = ydist(procidn) + 1
     694           0 :             workleft = workleft - 1
     695             :          end if
     696           0 :          procidn = procidn + 1
     697           0 :          procids = procids - 1
     698             :       end do
     699             :    end if
     700             : 
     701             :    ! Safety checks:
     702       99840 :    if (sum(ydist) /= plat) then
     703           0 :       write(iulog,*) sub//': ERROR: sum(ydist)=', sum(ydist),' not equal to plat'
     704           0 :       call endrun(sub//': ERROR: sum(ydist) not equal to plat')
     705             :    end if
     706        1536 :    if (workleft/=0) then
     707           0 :       write(iulog,*) sub//': ERROR: workleft for ydist not zero. workleft=', workleft
     708           0 :       call endrun(sub//': ERROR: workleft for ydist not zero')
     709             :    end if
     710             : 
     711             :    ! Set the CAM data structures
     712        1536 :    lat  = 0
     713       99840 :    do procid = 0, npr_y-1
     714       98304 :       cut(1,procid) = lat + 1
     715       98304 :       lat = lat + ydist(procid+1)
     716       98304 :       cut(2,procid) = lat
     717       98304 :       nlat_p(procid) = ydist(procid+1)
     718             : 
     719       98304 :       if (masterproc) then
     720         128 :          write(iulog,*) 'nlat_p(',procid,') = ', nlat_p(procid)
     721             :       end if
     722             : 
     723       99840 :       if (myid_y == procid) then
     724        1536 :          beglat  = cut(1,myid_y)
     725        1536 :          endlat  = cut(2,myid_y)
     726        1536 :          numlats = ydist(procid+1)
     727             :       end if
     728             :    end do
     729             : 
     730       18432 :    do k = 1, npr_z-1
     731     1099776 :       do j = 0, npr_y-1
     732     1081344 :          procid = j + k*npr_y
     733     1081344 :          cut(1,procid) = cut(1,j)
     734     1081344 :          cut(2,procid) = cut(2,j)
     735     1098240 :          nlat_p(procid) = nlat_p(j)
     736             :       end do
     737             :    end do
     738             : 
     739        1536 :    proc(:) = 0
     740     1181184 :    do procid = 0, npr_y*npr_z-1
     741             : 
     742             :       ! Determine which processor is responsible for the defined latitudes
     743     4720128 :       do lat = cut(1,procid), cut(2,procid)
     744     4718592 :          proc(lat) = procid
     745             :       end do
     746             :    end do
     747             : 
     748             :    ! Compute Z partition for YZ decomposition
     749             : 
     750        4608 :    allocate(zdist((npes-1)/npr_y+1))
     751             : 
     752       19968 :    zdist(:) = 0
     753             : 
     754        1536 :    vert = plev / npr_z
     755        1536 :    workleft = plev - vert * npr_z
     756        1536 :    if (vert < 1) then
     757           0 :       call endrun(sub//': ERROR: less than 1 vertical levels per subdomain')
     758             :    end if
     759             : 
     760       19968 :    do procid = 1, npr_z
     761       19968 :       zdist(procid) = vert
     762             :    end do
     763             : 
     764        1536 :    if (workleft /= 0) then
     765        1536 :       procids = (npr_z+1) / 2
     766        1536 :       procidn = procids + 1
     767        7680 :       do while ( workleft /= 0 )
     768        6144 :          if (procids == 1) procids = npr_z
     769        6144 :          zdist(procids) = zdist(procids) + 1
     770        6144 :          workleft = workleft - 1  
     771        6144 :          if (workleft /= 0) then
     772        6144 :             zdist(procidn) = zdist(procidn) + 1
     773        6144 :             workleft = workleft - 1
     774             :          end if
     775        6144 :          procidn = procidn + 1
     776        6144 :          procids = procids - 1
     777             :       end do
     778             :    end if
     779             : 
     780             :    ! Safety checks:
     781       19968 :    if (sum(zdist) /= plev) then
     782           0 :       write(iulog,*) sub//': ERROR: sum(zdist)=', sum(zdist),' not equal to plev'
     783           0 :       call endrun(sub//': ERROR: sum(zdist) not equal to plev')
     784             :    endif
     785        1536 :    if (workleft/=0) then
     786           0 :       write(iulog,*) sub//': ERROR: workleft for zdist not zero. workleft=', workleft
     787           0 :       call endrun(sub//': ERROR: workleft for zdist not zero')
     788             :    end if
     789             : 
     790             :    ! Compute local limits
     791        1536 :    call locallimits(myid_z, zdist, beglev, endlev)
     792             : 
     793             :    ! Auxiliary processes only
     794        1536 :    if (iam >= npes_yz) then
     795           0 :       beglat   = 1
     796           0 :       endlat   = 0
     797           0 :       numlats  = 0
     798           0 :       beglev   = 1
     799           0 :       endlev   = 0
     800             :    end if
     801             : 
     802        1536 :    grid%jfirst   = beglat
     803        1536 :    grid%jlast    = endlat
     804        1536 :    grid%kfirst   = beglev
     805        1536 :    grid%klast    = endlev
     806        1536 :    if (endlev == plev) then
     807         128 :       grid%klastp = plev+1
     808             :    else
     809        1408 :       grid%klastp = endlev
     810             :    end if
     811             : 
     812             :    ! Compute X partition for XY decomposition
     813             : 
     814        4608 :    allocate(xdistxy(nprxy_x))
     815       19968 :    xdistxy(:) = 0
     816             : 
     817        1536 :    lonn = plon / nprxy_x
     818        1536 :    workleft = plon - lonn * nprxy_x
     819        1536 :    if (lonn < 3) then
     820           0 :       call endrun(sub//': ERROR: less than 3 longitudes per XY subdomain')
     821             :    end if
     822             : 
     823       19968 :    do procid = 1, nprxy_x
     824       19968 :       xdistxy(procid) = lonn
     825             :    enddo
     826             : 
     827        1536 :    if (workleft /= 0) then
     828           0 :       procids = (nprxy_x+1) / 2
     829           0 :       procidn = procids + 1
     830           0 :       do while (workleft /= 0)
     831           0 :          if (procids == 1) procids = nprxy_x
     832           0 :          xdistxy(procids) = xdistxy(procids) + 1
     833           0 :          workleft = workleft - 1  
     834           0 :          if (workleft /= 0) then
     835           0 :             xdistxy(procidn) = xdistxy(procidn) + 1
     836           0 :             workleft = workleft - 1
     837             :          end if
     838           0 :          procidn = procidn + 1
     839           0 :          procids = procids - 1
     840             :       end do
     841             :    end if
     842             : 
     843             :    ! Safety check:
     844       19968 :    if ( sum(xdistxy) /= plon ) then
     845           0 :       write(iulog,*) sub//': ERROR: sum(xdistxy)=', sum(xdistxy),' not equal to plon'
     846           0 :       call endrun(sub//': ERROR: sum(xdistxy) not equal to plon ')
     847             :    end if
     848        1536 :    if (workleft/=0) then
     849           0 :       write(iulog,*) sub//': ERROR: workleft for xdistxy not zero.  workleft=',workleft
     850           0 :       call endrun(sub//': ERROR: workleft for xdistxy not zero')
     851             :    end if
     852             : 
     853             :    ! Compute local limits
     854        1536 :    call locallimits(myidxy_x, xdistxy, beglonxy, endlonxy)
     855             : 
     856             :    ! Compute global array for use in dyn_grid module
     857        4608 :    allocate (lonrangexy(2,nprxy_x))
     858        1536 :    lonrangexy(1,1) = 1
     859        1536 :    lonrangexy(2,1) = xdistxy(1)
     860       18432 :    do procid = 2, nprxy_x
     861       16896 :       lonrangexy(1,procid) = lonrangexy(2,procid-1) + 1
     862       18432 :       lonrangexy(2,procid) = lonrangexy(1,procid) + xdistxy(procid) - 1
     863             :    end do
     864             : 
     865             :    ! Compute Y partition for XY decomposition
     866             : 
     867        4608 :    allocate(ydistxy((npes-1)/nprxy_x+1))
     868       99840 :    ydistxy(:) = 0
     869             : 
     870        1536 :    lat = plat / nprxy_y
     871        1536 :    workleft = plat - lat * nprxy_y
     872        1536 :    if (lat < 3) then
     873           0 :       call endrun(sub//': ERROR: less than 3 latitudes per XY subdomain')
     874             :    end if
     875             : 
     876       99840 :    do procid = 1, nprxy_y
     877       99840 :       ydistxy(procid) = lat
     878             :    end do
     879             : 
     880        1536 :    if (workleft /= 0) then
     881           0 :       procids = (nprxy_y+1) / 2
     882           0 :       procidn = procids + 1
     883           0 :       do while (workleft /= 0)
     884           0 :          if (procids == 1) procids = nprxy_y
     885           0 :          ydistxy(procids) = ydistxy(procids) + 1
     886           0 :          workleft = workleft - 1  
     887           0 :          if (workleft /= 0) then
     888           0 :             ydistxy(procidn) = ydistxy(procidn) + 1
     889           0 :             workleft = workleft - 1
     890             :          end if
     891           0 :          procidn = procidn + 1
     892           0 :          procids = procids - 1
     893             :       end do
     894             :    end if
     895             : 
     896             :    ! Safety check:
     897       99840 :    if (sum(ydistxy) /= plat) then
     898           0 :       write(iulog,*) sub//': ERROR: sum(ydistxy)=', sum(ydistxy),' not equal to plat'
     899           0 :       call endrun(sub//': ERROR: sum(ydistxy) not equal to plat')
     900             :    end if
     901        1536 :    if (workleft/=0) then
     902           0 :       write(iulog,*) sub//': ERROR: workleft for ydistxy not zero.  workleft=',workleft
     903           0 :       call endrun(sub//': ERROR: workleft for ydistxy not zero')
     904             :    end if
     905             : 
     906             :    ! Compute local limits
     907        1536 :    call locallimits(myidxy_y, ydistxy, beglatxy, endlatxy)
     908             : 
     909             :    ! Auxiliary processes only
     910        1536 :    if (iam >= npes_xy) then
     911           0 :       beglonxy = 1
     912           0 :       endlonxy = 0
     913           0 :       beglatxy = 1
     914           0 :       endlatxy = 0
     915             :    end if
     916             : 
     917        1536 :    grid%ifirstxy = beglonxy
     918        1536 :    grid%ilastxy  = endlonxy
     919        1536 :    grid%jfirstxy = beglatxy
     920        1536 :    grid%jlastxy  = endlatxy
     921             : 
     922             :    ! Compute global array for use in dyn_grid module
     923        4608 :    allocate (latrangexy(2,nprxy_y))
     924        1536 :    latrangexy(1,1) = 1
     925        1536 :    latrangexy(2,1) = ydistxy(1)
     926       98304 :    do procid = 2, nprxy_y
     927       96768 :       latrangexy(1,procid) = latrangexy(2,procid-1) + 1
     928       98304 :       latrangexy(2,procid) = latrangexy(1,procid) + ydistxy(procid) - 1
     929             :    end do
     930             : 
     931        1536 :    deallocate(ydist)
     932        1536 :    deallocate(zdist)
     933             : 
     934        1536 :    deallocate(xdistxy)
     935        1536 :    deallocate(ydistxy)
     936             : 
     937             :    ! Calculate the ghost region sizes for the SPMD version (tricky stuff)
     938        1536 :    grid%ng_c = 2                    ! Avoid the case where ng_c = 1
     939        1536 :    grid%ng_d = min( abs(jord), 3)   ! SJL: number of max ghost latitudes
     940        1536 :    grid%ng_d = max( grid%ng_d, 2)
     941        1536 :    grid%ng_s = max( grid%ng_c+1, grid%ng_d )
     942             : 
     943             :    ! Define imxy, jmxy, jmyz, kmyz from beglonxy, endlonxy, etc.
     944       16896 :    allocate(tmp(npes), imxy(nprxy_x), jmxy(nprxy_y), jmyz(npr_y), kmyz(npr_z))
     945             : 
     946     1181184 :    tmp = 0
     947        1536 :    tmp(gid+1) = endlonxy - beglonxy + 1
     948        1536 :    call parcollective( mpicom, maxop, npes, tmp )
     949       21504 :    imxy(1:nprxy_x) = tmp(1:nprxy_x)
     950             : 
     951     1181184 :    tmp = 0
     952        1536 :    tmp(gid+1) = endlatxy - beglatxy + 1
     953        1536 :    call parcollective( mpicom, maxop, npes, tmp )
     954       99840 :    do k = 1, nprxy_y
     955       99840 :       jmxy(k) = tmp((k-1)*nprxy_x+1)
     956             :    end do
     957             : 
     958     1181184 :    tmp = 0
     959        1536 :    tmp(gid+1)   = grid%jlast - grid%jfirst + 1
     960        1536 :    call parcollective( mpicom, maxop, npes, tmp )
     961      101376 :    jmyz(1:grid%npr_y) = tmp(1:grid%npr_y)
     962             : 
     963     1181184 :    tmp = 0
     964        1536 :    tmp(gid+1)   = grid%klast - grid%kfirst + 1
     965        1536 :    call parcollective( mpicom, maxop, npes, tmp )
     966       19968 :    do k = 1, grid%npr_z
     967       19968 :       kmyz(k) = tmp((k-1)*grid%npr_y+1)
     968             :    end do
     969             : 
     970             :    ! set up the PILGRIM communications
     971        1536 :    call spmd_vars_init(imxy, jmxy, jmyz, kmyz, grid)
     972             : 
     973        1536 :    deallocate(tmp, imxy, jmxy, jmyz, kmyz)
     974             : 
     975             : #endif
     976        6144 : end subroutine spmdinit_dyn
     977             : 
     978             : !========================================================================================
     979             : 
     980           0 : subroutine spmdbuf
     981             : 
     982           0 : end subroutine spmdbuf
     983             : 
     984             : !========================================================================================
     985             : 
     986           0 : subroutine compute_gsfactors(numperlat, numtot, numperproc, displs)
     987             : 
     988             :    ! Compute arguments for gatherv, scatterv
     989             : 
     990             :    ! arguments
     991             :    integer, intent(in)  :: numperlat            ! number of elements per latitude
     992             :    integer, intent(out) :: numtot               ! total number of elements (to send or recv)
     993             :    integer, intent(out) :: numperproc(0:npes-1) ! per-PE number of items to receive
     994             :    integer, intent(out) :: displs(0:npes-1)     ! per-PE displacements
     995             : 
     996             : #ifdef SPMD
     997             :    ! Local variables
     998             :    integer :: p
     999             :    !---------------------------------------------------------------------------
    1000             :    
    1001           0 :    numtot = numperlat*numlats
    1002             :    
    1003           0 :    do p = 0, npes-1
    1004           0 :       numperproc(p) = numperlat*nlat_p(p)
    1005             :    end do
    1006             :      
    1007           0 :    displs(:) = 0
    1008           0 :    do p = 1, npr_y-1
    1009           0 :       displs(p) = displs(p-1) + numperproc(p-1)
    1010             :    end do
    1011             : 
    1012           0 :    if (npr_z > 1) then
    1013           0 :       do p = 1, npr_z-1
    1014           0 :          displs(p*npr_y:(p+1)*npr_y-1) = displs(0:npr_y-1)
    1015             :       enddo
    1016             :    endif
    1017             : #endif
    1018             : 
    1019           0 : end subroutine compute_gsfactors
    1020             : 
    1021             : !========================================================================================
    1022             : 
    1023        4608 : subroutine locallimits(myidxy, distxy, begdimxy, enddimxy)
    1024             : 
    1025             :    integer, intent(in)  :: myidxy
    1026             :    integer, intent(in)  :: distxy(:)
    1027             :    integer, intent(out) :: begdimxy
    1028             :    integer, intent(out) :: enddimxy
    1029             :       
    1030             :    integer :: procid
    1031             :       
    1032        4608 :    begdimxy = 1
    1033        4608 :    enddimxy = distxy(1)
    1034             :       
    1035       69888 :    do procid = 1, myidxy
    1036       65280 :       begdimxy = enddimxy + 1
    1037       69888 :       enddimxy = begdimxy + distxy(procid+1) - 1
    1038             :    end do
    1039        4608 : end subroutine locallimits
    1040             : 
    1041             : !========================================================================================
    1042             : 
    1043             : end module spmd_dyn
    1044             : 

Generated by: LCOV version 1.14