LCOV - code coverage report
Current view: top level - physics/cam - phys_grid.F90 (source / functions) Hit Total Coverage
Test: coverage.info Lines: 809 1557 52.0 %
Date: 2025-03-14 01:21:06 Functions: 24 51 47.1 %

          Line data    Source code
       1             : module phys_grid
       2             : !-----------------------------------------------------------------------
       3             : !
       4             : ! Purpose: Definition of physics computational horizontal grid.
       5             : !
       6             : ! Method: Variables are private; interface routines used to extract
       7             : !         information for use in user code.
       8             : !
       9             : ! Entry points:
      10             : !      phys_grid_readnl    read namelist options
      11             : !
      12             : !      phys_grid_init       initialize chunk'ed data structure
      13             : !      phys_grid_initialized    get physgrid_set flag
      14             : !
      15             : !      get_chunk_indices_p get local chunk index range
      16             : !      get_ncols_p         get number of columns for a given chunk
      17             : !      get_grid_dims       return physics grid axis global sizes
      18             : !      get_xxx_all_p       get global indices, coordinates, or values
      19             : !                          for a given chunk
      20             : !      get_xxx_vec_p       get global indices, coordinates, or values
      21             : !                          for a subset of the columns in a chunk
      22             : !      get_xxx_p           get global indices, coordinates, or values
      23             : !                          for a single column
      24             : !      where xxx is
      25             : !       area               for column surface area (in radians squared)
      26             : !       gcol               for global column index
      27             : !       lat                for global latitude index
      28             : !       lon                for global longitude index
      29             : !       rlat               for latitude coordinate (in radians)
      30             : !       rlon               for longitude coordinate (in radians)
      31             : !       wght               for column integration weight
      32             : !
      33             : !      scatter_field_to_chunk
      34             : !                          distribute field
      35             : !                          to decomposed chunk data structure
      36             : !      gather_chunk_to_field
      37             : !                          reconstruct field
      38             : !                          from decomposed chunk data structure
      39             : !
      40             : !      read_chunk_from_field
      41             : !                          read and distribute field
      42             : !                          to decomposed chunk data structure
      43             : !      write_field_from_chunk
      44             : !                          write field
      45             : !                          from decomposed chunk data structure
      46             : !
      47             : !      block_to_chunk_send_pters
      48             : !                          return pointers into send buffer where data
      49             : !                          from decomposed fields should
      50             : !                          be copied to
      51             : !      block_to_chunk_recv_pters
      52             : !                          return pointers into receive buffer where data
      53             : !                          for decomposed chunk data structures should
      54             : !                          be copied from
      55             : !      transpose_block_to_chunk
      56             : !                          transpose buffer containing decomposed
      57             : !                          fields to buffer
      58             : !                          containing decomposed chunk data structures
      59             : !
      60             : !      chunk_to_block_send_pters
      61             : !                          return pointers into send buffer where data
      62             : !                          from decomposed chunk data structures should
      63             : !                          be copied to
      64             : !      chunk_to_block_recv_pters
      65             : !                          return pointers into receive buffer where data
      66             : !                          for decomposed fields should
      67             : !                          be copied from
      68             : !      transpose_chunk_to_block
      69             : !                          transpose buffer containing decomposed
      70             : !                          chunk data structures to buffer
      71             : !                          containing decomposed fields
      72             : !
      73             : !      chunk_index         identify whether index is for a latitude or
      74             : !                          a chunk
      75             : !
      76             : ! FOLLOWING ARE NO LONGER USED, AND ARE CURRENTLY COMMENTED OUT
      77             : !      get_gcol_owner_p    get owner of column
      78             : !                          for given global physics column index
      79             : !
      80             : !      buff_to_chunk       Copy from local buffer to local chunk data
      81             : !                          structure. (Needed for cpl6.)
      82             : !
      83             : !      chunk_to_buff       Copy from local chunk data structure to
      84             : !                          local buffer. (Needed for cpl6.)
      85             : !
      86             : ! Author: Patrick Worley and John Drake
      87             : !
      88             : !-----------------------------------------------------------------------
      89             :    use shr_kind_mod,     only: r8 => shr_kind_r8, r4 => shr_kind_r4
      90             :    use physconst,        only: pi
      91             :    use ppgrid,           only: pcols, pver, begchunk, endchunk
      92             : #if ( defined SPMD )
      93             :    use spmd_dyn,         only: block_buf_nrecs, chunk_buf_nrecs, &
      94             :                                local_dp_map
      95             :    use mpishorthand
      96             : #endif
      97             :    use spmd_utils,       only: iam, masterproc, npes, proc_smp_map, nsmps
      98             :    use m_MergeSorts,     only: IndexSet, IndexSort
      99             :    use cam_abortutils,   only: endrun
     100             :    use perf_mod
     101             :    use cam_logfile,      only: iulog
     102             : 
     103             :    implicit none
     104             :    save
     105             : 
     106             : #if ( ! defined SPMD )
     107             :    integer, private :: block_buf_nrecs
     108             :    integer, private :: chunk_buf_nrecs
     109             :    logical, private :: local_dp_map=.true.
     110             : #endif
     111             : 
     112             : ! The identifier for the physics grid
     113             :    integer, parameter, public :: phys_decomp = 100
     114             :    integer, parameter, public :: phys_decomp_scm = 200
     115             : 
     116             : ! dynamics field grid information
     117             :    integer, private :: hdim1_d, hdim2_d
     118             :                                        ! dimensions of rectangular horizontal grid
     119             :                                        ! data structure, If 1D data structure, then
     120             :                                        ! hdim2_d == 1.
     121             : 
     122             : ! physics field data structures
     123             :    integer, private :: ngcols           ! global column count in physics grid (all)
     124             :    integer, public  :: num_global_phys_cols ! global column count in phys grid
     125             :                                             ! (without holes)
     126             : 
     127             :    integer, dimension(:), allocatable, private :: dyn_to_latlon_gcol_map
     128             :                                        ! map from unsorted (dynamics) to lat/lon sorted grid indices
     129             :    integer, dimension(:), allocatable, private :: latlon_to_dyn_gcol_map
     130             :                                        ! map from lat/lon sorted grid to unsorted (dynamics) indices
     131             :    integer, dimension(:), allocatable, private :: lonlat_to_dyn_gcol_map
     132             :                                        ! map from lon/lat sorted grid to unsorted (dynamics) indices
     133             : 
     134             :    integer, private :: clat_p_tot ! number of unique latitudes
     135             :    integer, private :: clon_p_tot ! number of unique longitudes
     136             : 
     137             :    integer, dimension(:), allocatable, private :: clat_p_cnt ! number of repeats for each latitude
     138             :    integer, dimension(:), allocatable, private :: clat_p_idx ! index in latlon ordering for first occurence
     139             :                                                              ! of latitude corresponding to given
     140             :                                                              ! latitude index
     141             :    real(r8), dimension(:), allocatable :: clat_p  ! unique latitudes (radians, increasing)
     142             : 
     143             : 
     144             :    integer, dimension(:), allocatable, private :: clon_p_cnt ! number of repeats for each longitude
     145             :    real(r8), dimension(:), allocatable :: clon_p  ! unique longitudes (radians, increasing)
     146             : 
     147             :    integer, dimension(:), allocatable, private :: lat_p      ! index into list of unique column latitudes
     148             :    integer, dimension(:), allocatable, private :: lon_p      ! index into list of unique column longitudes
     149             : 
     150             : ! chunk data structures
     151             :    type chunk
     152             :      integer  :: ncols                 ! number of vertical columns
     153             :      integer  :: gcol(pcols)           ! global physics column indices
     154             :      integer  :: lon(pcols)            ! global longitude indices
     155             :      integer  :: lat(pcols)            ! global latitude indices
     156             :      integer  :: owner                 ! id of process where chunk assigned
     157             :      integer  :: lcid                  ! local chunk index
     158             :    end type chunk
     159             : 
     160             :    integer :: nchunks                  ! global chunk count
     161             :    type (chunk), dimension(:), allocatable, public :: chunks
     162             :                                        ! global computational grid
     163             : 
     164             :    integer, dimension(:), allocatable, private :: npchunks
     165             :                                        ! number of chunks assigned to each process
     166             : 
     167             :    type lchunk
     168             :      integer  :: ncols                 ! number of vertical columns
     169             :      integer  :: cid                   ! global chunk index
     170             :      integer  :: gcol(pcols)           ! global physics column indices
     171             :      real(r8) :: area(pcols)           ! column surface area (from dynamics)
     172             :      real(r8) :: wght(pcols)           ! column integration weight (from dynamics)
     173             :    end type lchunk
     174             : 
     175             :    integer, private :: nlchunks        ! local chunk count
     176             :    type (lchunk), dimension(:), allocatable, private :: lchunks
     177             :                                        ! local chunks
     178             : 
     179             :    type knuhc
     180             :      integer  :: chunkid               ! chunk id
     181             :      integer  :: col                   ! column index in chunk
     182             :    end type knuhc
     183             : 
     184             :    type (knuhc), dimension(:), allocatable, private :: knuhcs
     185             :                                        ! map from global column indices
     186             :                                        ! to chunk'ed grid
     187             : 
     188             : ! column mapping data structures
     189             :    type column_map
     190             :      integer  :: chunk                 ! global chunk index
     191             :      integer  :: ccol                  ! column ordering in chunk
     192             :    end type column_map
     193             : 
     194             :    integer, private :: nlcols           ! local column count
     195             :    type (column_map), dimension(:), allocatable, private :: pgcols
     196             :                                        ! ordered list of columns (for use in gather/scatter)
     197             :                                        ! NOTE: consistent with local ordering
     198             : 
     199             : ! column remap data structures
     200             :    integer, dimension(:), allocatable, private :: gs_col_num
     201             :                                        ! number of columns scattered to each process in
     202             :                                        ! field_to_chunk scatter
     203             :    integer, dimension(:), allocatable, private :: gs_col_offset
     204             :                                        ! offset of columns (-1) in pgcols scattered to
     205             :                                        ! each process in field_to_chunk scatter
     206             : 
     207             :    integer, dimension(:), allocatable, private :: btofc_blk_num
     208             :                                        ! number of grid points scattered to each process in
     209             :                                        ! block_to_chunk alltoallv, and gathered from each
     210             :                                        ! process in chunk_to_block alltoallv
     211             : 
     212             :    integer, dimension(:), allocatable, private :: btofc_chk_num
     213             :                                        ! number of grid points gathered from each process in
     214             :                                        ! block_to_chunk alltoallv, and scattered to each
     215             :                                        ! process in chunk_to_block alltoallv
     216             : 
     217             :    type btofc_pters
     218             :      integer :: ncols                  ! number of columns in block
     219             :      integer :: nlvls                  ! number of levels in columns
     220             :      integer, dimension(:,:), pointer :: pter
     221             :    end type btofc_pters
     222             :    type (btofc_pters), dimension(:), allocatable, private :: btofc_blk_offset
     223             :                                        ! offset in btoc send array (-1) where
     224             :                                        ! (blockid, bcid, k) column should be packed in
     225             :                                        ! block_to_chunk alltoallv, AND
     226             :                                        ! offset in ctob receive array (-1) from which
     227             :                                        ! (blockid, bcid, k) column should be unpacked in
     228             :                                        ! chunk_to_block alltoallv
     229             : 
     230             :    type (btofc_pters), dimension(:), allocatable, private :: btofc_chk_offset
     231             :                                        ! offset in btoc receive array (-1) from which
     232             :                                        ! (lcid, i, k) data should be unpacked in
     233             :                                        ! block_to_chunk alltoallv, AND
     234             :                                        ! offset in ctob send array (-1) where
     235             :                                        ! (lcid, i, k) data should be packed in
     236             :                                        ! chunk_to_block alltoallv
     237             : 
     238             : ! miscellaneous phys_grid data
     239             :    integer, private :: dp_coup_steps   ! number of swaps in transpose algorithm
     240             :    integer, dimension(:), private, allocatable :: dp_coup_proc
     241             :                                        ! swap partner in each step of
     242             :                                        !  transpose algorithm
     243             :    logical :: physgrid_set = .false.   ! flag indicates physics grid has been set
     244             :    integer, private :: max_nproc_smpx  ! maximum number of processes assigned to a
     245             :                                        !  single virtual SMP used to define physics
     246             :                                        !  load balancing
     247             :    integer, private :: nproc_busy_d    ! number of processes active during the dynamics
     248             :                                        !  (assigned a dynamics block)
     249             : 
     250             : ! Physics grid decomposition options:
     251             : ! -1: each chunk is a dynamics block
     252             : !  0: chunk definitions and assignments do not require interprocess comm.
     253             : !  1: chunk definitions and assignments do not require internode comm.
     254             : !  2: chunk definitions and assignments may require communication between all processes
     255             : !  3: chunk definitions and assignments only require communication with one other process
     256             : !  4: concatenated blocks, no load balancing, no interprocess communication
     257             :    integer, private, parameter :: min_lbal_opt = -1
     258             :    integer, private, parameter :: max_lbal_opt = 5
     259             :    integer, private, parameter :: def_lbal_opt = 2               ! default
     260             :    integer, private :: lbal_opt = def_lbal_opt
     261             : 
     262             : ! Physics grid load balancing options:
     263             : !  0: assign columns to chunks as single columns, wrap mapped across chunks
     264             : !  1: use (day/night; north/south) twin algorithm to determine load-balanced pairs of
     265             : !       columns and assign columns to chunks in pairs, wrap mapped
     266             :    integer, private, parameter :: min_twin_alg = 0
     267             :    integer, private, parameter :: max_twin_alg = 1
     268             :    integer, private, parameter :: def_twin_alg_lonlat = 1         ! default
     269             :    integer, private, parameter :: def_twin_alg_unstructured = 0
     270             :    integer, private :: twin_alg = def_twin_alg_lonlat
     271             : 
     272             : ! target number of chunks per thread
     273             :    integer, private, parameter :: min_chunks_per_thread = 1
     274             :    integer, private, parameter :: def_chunks_per_thread = &
     275             :                                     min_chunks_per_thread         ! default
     276             :    integer, private :: chunks_per_thread = def_chunks_per_thread
     277             : 
     278             : ! Dynamics/physics transpose method for nonlocal load-balance:
     279             : ! -1: use "0" if max_nproc_smpx and nproc_busy_d are both > npes/2; otherwise use "1"
     280             : !  0: use mpi_alltoallv
     281             : !  1: use point-to-point MPI-1 two-sided implementation
     282             : !  2: use point-to-point MPI-2 one-sided implementation if supported,
     283             : !       otherwise use MPI-1 implementation
     284             : !  3: use Co-Array Fortran implementation if supported,
     285             : !       otherwise use MPI-1 implementation
     286             : !  11-13: use mod_comm, choosing any of several methods internal to mod_comm.
     287             : !      The method within mod_comm (denoted mod_method) has possible values 0,1,2 and
     288             : !      is set according to mod_method = phys_alltoall - modmin_alltoall, where
     289             : !      modmin_alltoall is 11.
     290             :    integer, private, parameter :: min_alltoall = -1
     291             :    integer, private, parameter :: max_alltoall = 3
     292             : # if defined(MODCM_DP_TRANSPOSE)
     293             :    integer, private, parameter :: modmin_alltoall = 11
     294             :    integer, private, parameter :: modmax_alltoall = 13
     295             : # endif
     296             :    integer, private, parameter :: def_alltoall = -1                ! default
     297             :    integer, private :: phys_alltoall = def_alltoall
     298             : 
     299             :    logical :: calc_memory_increase = .false.
     300             : 
     301             : !========================================================================
     302             : contains
     303             : !========================================================================
     304             : 
     305        1536 : subroutine phys_grid_readnl(nlfile)
     306             : 
     307             :    use namelist_utils,  only: find_group_name
     308             :    use units,           only: getunit, freeunit
     309             :    use spmd_utils,      only: mpicom, mstrid=>masterprocid, mpi_integer, &
     310             :                               phys_mirror_decomp_req
     311             : #if defined(MODCM_DP_TRANSPOSE)
     312             :    use mod_comm,        only: phys_transpose_mod
     313             : #endif
     314             :    use dycore,          only: dycore_is
     315             : 
     316             :    character(len=*), intent(in) :: nlfile  ! filepath for file containing namelist input
     317             : 
     318             :    ! Local variables
     319             :    integer :: unitn, ierr
     320             :    character(len=*), parameter :: sub = 'phys_grid_readnl'
     321             : 
     322             :    integer :: phys_loadbalance
     323             :    integer :: phys_twin_algorithm
     324             :    integer :: phys_chnk_per_thd
     325             : 
     326             :    namelist /phys_grid_nl/ phys_alltoall, phys_loadbalance, phys_twin_algorithm, &
     327             :                            phys_chnk_per_thd
     328             :    !-----------------------------------------------------------------------------
     329             : 
     330             :    ! Initialize namelist vars
     331        1536 :    phys_loadbalance = def_lbal_opt
     332             : 
     333        1536 :    if (dycore_is('UNSTRUCTURED')) then
     334           0 :       phys_twin_algorithm = def_twin_alg_unstructured
     335             :    else
     336        1536 :       phys_twin_algorithm = def_twin_alg_lonlat
     337             :    endif
     338             : 
     339        1536 :    phys_chnk_per_thd = def_chunks_per_thread
     340             : 
     341             :    ! Read namelist
     342        1536 :    if (masterproc) then
     343           2 :       unitn = getunit()
     344           2 :       open( unitn, file=trim(nlfile), status='old' )
     345           2 :       call find_group_name(unitn, 'phys_grid_nl', status=ierr)
     346           2 :       if (ierr == 0) then
     347           0 :          read(unitn, phys_grid_nl, iostat=ierr)
     348           0 :          if (ierr /= 0) then
     349           0 :             call endrun(sub//': FATAL: reading namelist')
     350             :          end if
     351             :       end if
     352           2 :       close(unitn)
     353           2 :       call freeunit(unitn)
     354             :    end if
     355             : 
     356        1536 :    call mpi_bcast(phys_alltoall, 1, mpi_integer, mstrid, mpicom, ierr)
     357        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: phys_alltoall")
     358        1536 :    call mpi_bcast(phys_loadbalance, 1, mpi_integer, mstrid, mpicom, ierr)
     359        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: phys_loadbalance")
     360        1536 :    call mpi_bcast(phys_twin_algorithm, 1, mpi_integer, mstrid, mpicom, ierr)
     361        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: phys_twin_algorithm")
     362        1536 :    call mpi_bcast(phys_chnk_per_thd, 1, mpi_integer, mstrid, mpicom, ierr)
     363        1536 :    if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: phys_chnk_per_thd")
     364             : 
     365             :    ! set module variables from namelist vars
     366             : 
     367        1536 :    lbal_opt = phys_loadbalance
     368             : 
     369        1536 :    if (lbal_opt == 3) then
     370           0 :       phys_mirror_decomp_req = .true.
     371             :    else
     372        1536 :       phys_mirror_decomp_req = .false.
     373             :    endif
     374             : 
     375        1536 :    twin_alg = phys_twin_algorithm
     376             : 
     377        1536 :    chunks_per_thread = phys_chnk_per_thd
     378             : 
     379             :    ! Some consistency checks
     380             : 
     381        1536 :    if (((phys_alltoall < min_alltoall) .or.   &
     382             :         (phys_alltoall > max_alltoall))       &
     383             : # if defined(MODCM_DP_TRANSPOSE)
     384             :       .and.                                   &
     385             :       ((phys_alltoall < modmin_alltoall) .or. &
     386             :        (phys_alltoall > modmax_alltoall))     &
     387             : # endif
     388             :       ) then
     389           0 :       if (masterproc) then
     390           0 :          write(iulog,*) sub//': ERROR: phys_alltoall=', phys_alltoall, &
     391           0 :             '  is out of range.  It must be between ', min_alltoall, ' and ', max_alltoall
     392             :       endif
     393           0 :       call endrun(sub//': ERROR setting phys_alltoall')
     394             :    endif
     395             : #if defined(SPMD)
     396             : #if defined(MODCM_DP_TRANSPOSE)
     397             :    phys_transpose_mod = phys_alltoall
     398             : #endif
     399             : #endif
     400             : 
     401        1536 :    if ((lbal_opt < min_lbal_opt).or.(lbal_opt > max_lbal_opt)) then
     402           0 :       if (masterproc) then
     403           0 :          write(iulog,*) sub//': ERROR: phys_loadbalance=', phys_loadbalance, &
     404           0 :             '  is out of range.  It must be between ', min_lbal_opt, ' and ', max_lbal_opt
     405             :       endif
     406           0 :       call endrun(sub//': ERROR setting phys_loadbalance')
     407             :    endif
     408             : 
     409        1536 :    if ((twin_alg < min_twin_alg).or.(twin_alg > max_twin_alg)) then
     410           0 :       if (masterproc) then
     411           0 :          write(iulog,*) sub//': ERROR: phys_twin_algorithm=', phys_twin_algorithm, &
     412           0 :             '  is out of range.  It must be between ', min_twin_alg, ' and ', max_twin_alg
     413             :       endif
     414           0 :       call endrun(sub//': ERROR setting phys_twin_algorithm')
     415             :    endif
     416             : 
     417        1536 :    if (chunks_per_thread < min_chunks_per_thread) then
     418           0 :       if (masterproc) then
     419           0 :          write(iulog,*) sub//': ERROR: phys_chnk_per_thd=', phys_chnk_per_thd, &
     420           0 :             ' is too small.  It must not be smaller than ', min_chunks_per_thread
     421             :       endif
     422           0 :       call endrun(sub//': ERROR setting phys_chnk_per_thd')
     423             :    endif
     424             : 
     425             : 
     426        1536 :    if (masterproc) then
     427           2 :       write(iulog,*) 'PHYS_GRID options:'
     428           2 :       write(iulog,*) '  Using PCOLS         =', pcols
     429           2 :       write(iulog,*) '  phys_loadbalance    =', lbal_opt
     430           2 :       write(iulog,*) '  phys_twin_algorithm =', twin_alg
     431           2 :       write(iulog,*) '  phys_alltoall       =', phys_alltoall
     432           2 :       write(iulog,*) '  chunks_per_thread   =', chunks_per_thread
     433             :    end if
     434             : 
     435        1536 : end subroutine phys_grid_readnl
     436             : 
     437             : !===============================================================================
     438             : 
     439      105984 :   integer function get_nlcols_p()
     440      105984 :     get_nlcols_p = nlcols
     441      105984 :   end function get_nlcols_p
     442             : 
     443        1536 :   subroutine phys_grid_init( )
     444             :     !-----------------------------------------------------------------------
     445             :     !
     446             :     ! Purpose: Physics mapping initialization routine:
     447             :     !
     448             :     ! Method:
     449             :     !
     450             :     ! Author: John Drake and Patrick Worley
     451             :     !
     452             :     !-----------------------------------------------------------------------
     453             :     use mpi,              only: MPI_REAL8, MPI_MAX
     454             :     use shr_mem_mod,      only: shr_mem_getusage
     455             :     use shr_scam_mod,     only: shr_scam_GetCloseLatLon
     456             :     use scamMod,          only: closeioplonidx, closeioplatidx, single_column
     457             :     use pmgrid,           only: plev
     458             :     use dycore,           only: dycore_is
     459             :     use dyn_grid,         only: get_block_bounds_d, &
     460             :          get_block_gcol_d, get_block_gcol_cnt_d,    &
     461             :          get_block_levels_d, get_block_lvl_cnt_d,   &
     462             :          get_block_owner_d,                         &
     463             :          get_gcol_block_d, get_gcol_block_cnt_d,    &
     464             :          get_horiz_grid_dim_d, get_horiz_grid_d, physgrid_copy_attributes_d
     465             :     use spmd_utils,       only: pair, ceil2, masterprocid, mpicom
     466             :     use cam_grid_support, only: cam_grid_register, cam_grid_attribute_register
     467             :     use cam_grid_support, only: iMap, max_hcoordname_len
     468             :     use cam_grid_support, only: horiz_coord_t, horiz_coord_create
     469             :     use cam_grid_support, only: cam_grid_attribute_copy, cam_grid_attr_exists
     470             : 
     471             :     !
     472             :     !------------------------------Arguments--------------------------------
     473             :     !
     474             :     !
     475             :     !---------------------------Local workspace-----------------------------
     476             :     !
     477             :     integer :: i, j, jb, k, p             ! loop indices
     478             :     integer :: pre_i                      ! earlier index in loop iteration
     479             :     integer :: clat_p_dex, clon_p_dex     ! indices into unique lat. and lon. arrays
     480             :     integer :: maxblksiz                  ! maximum number of columns in a dynamics block
     481             :     integer :: beg_dex, end_dex           ! index range
     482             :     integer :: cid, lcid                  ! global and local chunk ids
     483             :     integer :: max_ncols                  ! upper bound on number of columns in a block
     484             :     integer :: ncols                      ! number of columns in current chunk
     485             :     integer :: curgcol, curgcol_d         ! current global column index
     486             :     integer :: firstblock, lastblock      ! global block indices
     487             :     integer :: blksiz                     ! current block size
     488             :     integer :: glbcnt, curcnt             ! running grid point counts
     489             :     integer :: curp                       ! current process id
     490             :     integer :: block_cnt                  ! number of blocks containing data
     491             :     ! for a given vertical column
     492             :     integer :: numlvl                     ! number of vertical levels in block
     493             :     ! column
     494             :     integer :: levels(plev+1)             ! vertical level indices
     495             :     integer :: owner_d                    ! process owning given block column
     496             :     integer :: owner_p                    ! process owning given chunk column
     497             :     integer :: blockids(plev+1)           ! block indices
     498             :     integer :: bcids(plev+1)              ! block column indices
     499             : 
     500             : 
     501             :     ! column surface area (from dynamics)
     502        1536 :     real(r8), dimension(:), pointer     :: area_d
     503             : 
     504             :     ! column surface areawt (from dynamics)
     505        1536 :     real(r8), dimension(:), pointer     :: areawt_d
     506             : 
     507             :     ! column integration weight (from dynamics)
     508        1536 :     real(r8), dimension(:), allocatable :: wght_d
     509             : 
     510             :     ! chunk global ordering
     511        1536 :     integer, dimension(:), allocatable :: pchunkid
     512             : 
     513             :     ! permutation array used in physics column sorting;
     514             :     ! reused later as work space in (lbal_opt == -1) logic
     515        1536 :     integer, dimension(:), allocatable :: cdex
     516             : 
     517             :     ! latitudes and longitudes and column area for dynamics columns
     518        1536 :     real(r8), dimension(:), allocatable :: clat_d
     519        1536 :     real(r8), dimension(:), allocatable :: clon_d
     520        1536 :     real(r8), dimension(:), allocatable :: lat_d
     521        1536 :     real(r8), dimension(:), allocatable :: lon_d
     522             :     real(r8) :: clat_p_tmp
     523             :     real(r8) :: clon_p_tmp
     524             : 
     525             :     ! Maps and values for physics grid
     526        1536 :     real(r8),       pointer             :: lonvals(:)
     527        1536 :     real(r8),       pointer             :: latvals(:)
     528        1536 :     real(r8),               allocatable :: latdeg_p(:)
     529        1536 :     real(r8),               allocatable :: londeg_p(:)
     530        1536 :     integer(iMap),  pointer             :: grid_map(:,:)
     531        1536 :     integer(iMap),  pointer             :: grid_map_scm(:,:)
     532        1536 :     integer(iMap),  allocatable         :: coord_map(:)
     533             :     type(horiz_coord_t), pointer        :: lat_coord
     534             :     type(horiz_coord_t), pointer        :: lon_coord
     535             :     integer                             :: gcols(pcols)
     536        1536 :     character(len=max_hcoordname_len), pointer :: copy_attributes(:)
     537             :     character(len=max_hcoordname_len)   :: copy_gridname
     538             :     logical                             :: unstructured
     539             :     real(r8)                            :: lonmin, latmin
     540             :     real(r8)                            :: mem_hw_beg, mem_hw_end
     541             :     real(r8)                            :: mem_beg, mem_end
     542             : 
     543        1536 :     nullify(area_d)
     544        1536 :     nullify(lonvals)
     545        1536 :     nullify(latvals)
     546        1536 :     nullify(grid_map)
     547           0 :     if (single_column) nullify(grid_map_scm)
     548        1536 :     nullify(lat_coord)
     549        1536 :     nullify(lon_coord)
     550             : 
     551        1536 :     if (calc_memory_increase) then
     552           0 :        call shr_mem_getusage(mem_hw_beg, mem_beg)
     553             :     end if
     554             : 
     555        1536 :     call t_startf("phys_grid_init")
     556             : 
     557             :     !-----------------------------------------------------------------------
     558             :     !
     559             :     ! Initialize physics grid, using dynamics grid
     560             :     ! a) column coordinates
     561             : 
     562        1536 :     call get_horiz_grid_dim_d(hdim1_d,hdim2_d)
     563        1536 :     ngcols = hdim1_d*hdim2_d
     564        4608 :     allocate( clat_d(1:ngcols) )
     565        3072 :     allocate( clon_d(1:ngcols) )
     566        3072 :     allocate( lat_d(1:ngcols) )
     567        3072 :     allocate( lon_d(1:ngcols) )
     568        4608 :     allocate( cdex(1:ngcols) )
     569    84936192 :     clat_d = 100000.0_r8
     570    84936192 :     clon_d = 100000.0_r8
     571        1536 :     call get_horiz_grid_d(ngcols, clat_d_out=clat_d, clon_d_out=clon_d, lat_d_out=lat_d, lon_d_out=lon_d)
     572    84937728 :     latmin = minval(lat_d)
     573    84937728 :     lonmin = minval(lon_d)
     574             : 
     575             :     ! count number of "real" column indices
     576        1536 :     num_global_phys_cols = 0
     577    84936192 :     do i=1,ngcols
     578    84936192 :        if (clon_d(i) < 100000.0_r8) then
     579    84934656 :           num_global_phys_cols = num_global_phys_cols + 1
     580             :        endif
     581             :     enddo
     582             : 
     583             :     ! sort over longitude and identify unique longitude coordinates
     584        1536 :     call IndexSet(ngcols,cdex)
     585        1536 :     call IndexSort(ngcols,cdex,clon_d,descend=.false.)
     586        1536 :     clon_p_tmp = clon_d(cdex(1))
     587        1536 :     clon_p_tot = 1
     588             : 
     589    84934656 :     do i=2,num_global_phys_cols
     590    84934656 :        if (clon_d(cdex(i)) > clon_p_tmp) then
     591      440832 :           clon_p_tot = clon_p_tot + 1
     592      440832 :           clon_p_tmp = clon_d(cdex(i))
     593             :        endif
     594             :     enddo
     595             : 
     596        4608 :     allocate( clon_p(1:clon_p_tot) )
     597        4608 :     allocate( clon_p_cnt(1:clon_p_tot) )
     598        3072 :     allocate( londeg_p(1:clon_p_tot) )
     599             : 
     600        1536 :     pre_i = 1
     601        1536 :     clon_p_tot = 1
     602        1536 :     clon_p(1) = clon_d(cdex(1))
     603        1536 :     londeg_p(1) = lon_d(cdex(1))
     604    84934656 :     do i=2,num_global_phys_cols
     605    84934656 :        if (clon_d(cdex(i)) > clon_p(clon_p_tot)) then
     606      440832 :           clon_p_cnt(clon_p_tot) = i-pre_i
     607      440832 :           pre_i = i
     608      440832 :           clon_p_tot = clon_p_tot + 1
     609      440832 :           clon_p(clon_p_tot) = clon_d(cdex(i))
     610      440832 :           londeg_p(clon_p_tot) = lon_d(cdex(i))
     611             :        endif
     612             :     enddo
     613        1536 :     clon_p_cnt(clon_p_tot) = (num_global_phys_cols+1)-pre_i
     614             : 
     615             :     ! sort over latitude and identify unique latitude coordinates
     616        1536 :     call IndexSet(ngcols,cdex)
     617        1536 :     call IndexSort(ngcols,cdex,clat_d,descend=.false.)
     618        1536 :     clat_p_tmp = clat_d(cdex(1))
     619        1536 :     clat_p_tot = 1
     620    84934656 :     do i=2,num_global_phys_cols
     621    84934656 :        if (clat_d(cdex(i)) > clat_p_tmp) then
     622      293376 :           clat_p_tot = clat_p_tot + 1
     623      293376 :           clat_p_tmp = clat_d(cdex(i))
     624             :        endif
     625             :     enddo
     626             : 
     627        4608 :     allocate( clat_p(1:clat_p_tot) )
     628        4608 :     allocate( clat_p_cnt(1:clat_p_tot) )
     629        3072 :     allocate( clat_p_idx(1:clat_p_tot) )
     630        3072 :     allocate( latdeg_p(1:clat_p_tot) )
     631             : 
     632        1536 :     pre_i = 1
     633        1536 :     clat_p_tot = 1
     634        1536 :     clat_p(1) = clat_d(cdex(1))
     635        1536 :     latdeg_p(1) = lat_d(cdex(1))
     636    84934656 :     do i=2,num_global_phys_cols
     637    84934656 :        if (clat_d(cdex(i)) > clat_p(clat_p_tot)) then
     638      293376 :           clat_p_cnt(clat_p_tot) = i-pre_i
     639      293376 :           pre_i = i
     640      293376 :           clat_p_tot = clat_p_tot + 1
     641      293376 :           clat_p(clat_p_tot) = clat_d(cdex(i))
     642      293376 :           latdeg_p(clat_p_tot) = lat_d(cdex(i))
     643             :        endif
     644             :     enddo
     645        1536 :     clat_p_cnt(clat_p_tot) = (num_global_phys_cols+1)-pre_i
     646             : 
     647        1536 :     clat_p_idx(1) = 1
     648      294912 :     do j=2,clat_p_tot
     649      294912 :        clat_p_idx(j) = clat_p_idx(j-1) + clat_p_cnt(j-1)
     650             :     enddo
     651             : 
     652        1536 :     deallocate(lat_d)
     653        1536 :     deallocate(lon_d)
     654             : 
     655             :     ! sort by longitude within latitudes
     656        1536 :     end_dex = 0
     657      296448 :     do j=1,clat_p_tot
     658      294912 :        beg_dex = end_dex + 1
     659      294912 :        end_dex = end_dex + clat_p_cnt(j)
     660      296448 :        call IndexSort(cdex(beg_dex:end_dex),clon_d,descend=.false.)
     661             :     enddo
     662             : 
     663             :     ! Early clean-up, to minimize memory high water mark
     664             :     ! (not executing find_partner or find_twin)
     665        1536 :     if (((twin_alg /= 1) .and. (lbal_opt /= 3)) .or. (lbal_opt == -1)) then
     666           0 :        deallocate( clat_p_cnt)
     667             :     end if
     668             : 
     669             :     ! save "longitude within latitude" column ordering
     670             :     ! and determine mapping from unsorted global column index to
     671             :     ! unique latitude/longitude indices
     672        4608 :     allocate( lat_p(1:ngcols) )
     673        3072 :     allocate( lon_p(1:ngcols) )
     674        3072 :     allocate( dyn_to_latlon_gcol_map(1:ngcols) )
     675        1536 :     if (lbal_opt /= -1) then
     676        4608 :        allocate(latlon_to_dyn_gcol_map(1:num_global_phys_cols))
     677             :     end if
     678             : 
     679    84936192 :     clat_p_dex = 1
     680    84936192 :     lat_p = -1
     681    84936192 :     dyn_to_latlon_gcol_map = -1
     682    84936192 :     do i = 1, num_global_phys_cols
     683    84934656 :        if (lbal_opt /= -1) latlon_to_dyn_gcol_map(i) = cdex(i)
     684    84934656 :        dyn_to_latlon_gcol_map(cdex(i)) = i
     685             : 
     686           0 :        do while ((clat_p(clat_p_dex) < clat_d(cdex(i))) .and. &
     687    85228032 :                  (clat_p_dex < clat_p_tot))
     688      293376 :           clat_p_dex = clat_p_dex + 1
     689             :        enddo
     690    84936192 :        lat_p(cdex(i)) = clat_p_dex
     691             :     enddo
     692             : 
     693             :     ! sort by latitude within longitudes
     694        1536 :     call IndexSet(ngcols,cdex)
     695        1536 :     call IndexSort(ngcols,cdex,clon_d,descend=.false.)
     696        1536 :     end_dex = 0
     697      443904 :     do i=1,clon_p_tot
     698      442368 :        beg_dex = end_dex + 1
     699      442368 :        end_dex = end_dex + clon_p_cnt(i)
     700      443904 :        call IndexSort(cdex(beg_dex:end_dex),clat_d,descend=.false.)
     701             :     enddo
     702             : 
     703             :     ! Early clean-up, to minimize memory high water mark
     704             :     ! (not executing find_twin)
     705        1536 :     if ((twin_alg /= 1) .or. (lbal_opt == -1)) deallocate( clon_p_cnt )
     706             : 
     707             :     ! save "latitude within longitude" column ordering
     708             :     ! (only need in find_twin)
     709        1536 :     if ((twin_alg == 1) .and. (lbal_opt /= -1)) &
     710        4608 :        allocate( lonlat_to_dyn_gcol_map(1:num_global_phys_cols) )
     711             : 
     712        1536 :     clon_p_dex = 1
     713    84936192 :     lon_p = -1
     714    84936192 :     do i=1,num_global_phys_cols
     715    84934656 :        if ((twin_alg == 1) .and. (lbal_opt /= -1)) &
     716    84934656 :          lonlat_to_dyn_gcol_map(i) = cdex(i)
     717    85375488 :        do while ((clon_p(clon_p_dex) < clon_d(cdex(i))) .and. &
     718   170750976 :                  (clon_p_dex < clon_p_tot))
     719      440832 :           clon_p_dex = clon_p_dex + 1
     720             :        enddo
     721    84936192 :        lon_p(cdex(i)) = clon_p_dex
     722             :     enddo
     723             : 
     724             :     ! Clean-up
     725        1536 :     deallocate( clat_d )
     726        1536 :     deallocate( clon_d )
     727        1536 :     deallocate( cdex )
     728             : 
     729             :     !
     730             :     ! Determine block index bounds
     731             :     !
     732        1536 :     call get_block_bounds_d(firstblock,lastblock)
     733             : 
     734             :     ! Allocate storage to save number of chunks and columns assigned to each
     735             :     ! process during chunk creation and assignment
     736             :     !
     737        4608 :     allocate( npchunks(0:npes-1) )
     738        3072 :     allocate( gs_col_num(0:npes-1) )
     739     1181184 :     npchunks(:) = 0
     740     1181184 :     gs_col_num(:) = 0
     741             : 
     742             :     !
     743             :     ! Option -1: each dynamics block is a single chunk
     744             :     !
     745        1536 :     if (lbal_opt == -1) then
     746             :        !
     747             :        ! Check that pcols >= maxblksiz
     748             :        !
     749           0 :        maxblksiz = 0
     750           0 :        do jb=firstblock,lastblock
     751           0 :           maxblksiz = max(maxblksiz,get_block_gcol_cnt_d(jb))
     752             :        enddo
     753           0 :        if (pcols < maxblksiz) then
     754           0 :           write(iulog,*) 'pcols = ',pcols, ' maxblksiz=',maxblksiz
     755           0 :           call endrun ('PHYS_GRID_INIT error: phys_loadbalance -1 specified but PCOLS < MAXBLKSIZ')
     756             :        endif
     757             : 
     758             :        !
     759             :        ! Determine total number of chunks
     760             :        !
     761           0 :        nchunks = (lastblock-firstblock+1)
     762             : 
     763             :        !
     764             :        ! Set max virtual SMP node size
     765             :        !
     766           0 :        max_nproc_smpx = 1
     767             : 
     768             :        !
     769             :        ! Allocate and initialize chunks data structure
     770             :        !
     771           0 :        allocate( cdex(1:maxblksiz) )
     772           0 :        allocate( chunks(1:nchunks) )
     773             : 
     774           0 :        do cid=1,nchunks
     775             :           ! get number of global column indices in block
     776           0 :           max_ncols = get_block_gcol_cnt_d(cid+firstblock-1)
     777             :           ! fill cdex array with global indices from current block
     778           0 :           call get_block_gcol_d(cid+firstblock-1,max_ncols,cdex)
     779             : 
     780           0 :           ncols = 0
     781           0 :           do i=1,max_ncols
     782             :              ! check whether global index is for a column that dynamics
     783             :              ! intends to pass to the physics
     784           0 :              curgcol_d = cdex(i)
     785           0 :              if (dyn_to_latlon_gcol_map(curgcol_d) /= -1) then
     786             :                 ! yes - then save the information
     787           0 :                 ncols = ncols + 1
     788           0 :                 chunks(cid)%gcol(ncols) = curgcol_d
     789           0 :                 chunks(cid)%lat(ncols) = lat_p(curgcol_d)
     790           0 :                 chunks(cid)%lon(ncols) = lon_p(curgcol_d)
     791             :              endif
     792             :           enddo
     793           0 :           chunks(cid)%ncols = ncols
     794             :        enddo
     795             : 
     796             :        ! Clean-up
     797           0 :        deallocate( cdex )
     798           0 :        deallocate( lat_p )
     799           0 :        deallocate( lon_p )
     800             : 
     801             :        !
     802             :        ! Specify parallel decomposition
     803             :        !
     804           0 :        do cid=1,nchunks
     805             : #if (defined SPMD)
     806           0 :           p = get_block_owner_d(cid+firstblock-1)
     807             : #else
     808             :           p = 0
     809             : #endif
     810           0 :           chunks(cid)%owner = p
     811           0 :           npchunks(p)       = npchunks(p) + 1
     812           0 :           gs_col_num(p)     = gs_col_num(p) + chunks(cid)%ncols
     813             :        enddo
     814             :        !
     815             :        ! Set flag indicating columns in physics and dynamics
     816             :        ! decompositions reside on the same processes
     817             :        !
     818           0 :        local_dp_map = .true.
     819             :        !
     820             :     else
     821             :        !
     822             :        ! Option == 0: split local blocks into chunks,
     823             :        !               while attempting to create load-balanced chunks.
     824             :        !               Does not work with vertically decomposed blocks.
     825             :        !               (default)
     826             :        ! Option == 1: split SMP-local blocks into chunks,
     827             :        !               while attempting to create load-balanced chunks.
     828             :        !               Does not work with vertically decomposed blocks.
     829             :        ! Option == 2: load balance chunks with respect to diurnal and
     830             :        !               seaonsal cycles and wth respect to latitude,
     831             :        !               and assign chunks to processes
     832             :        !               in a way that attempts to minimize communication costs
     833             :        ! Option == 3: divide processes into pairs and split
     834             :        !               blocks assigned to these pairs into
     835             :        !               chunks, attempting to create load-balanced chunks.
     836             :        !               The process pairs are chosen to maximize load balancing
     837             :        !               opportunities.
     838             :        !               Does not work with vertically decomposed blocks.
     839             :        ! Option == 4: concatenate local blocks, then
     840             :        !               divide into chunks.
     841             :        !               Does not work with vertically decomposed blocks.
     842             :        ! Option == 5: split indiviudal blocks into chunks,
     843             :        !               assigning columns using block ordering
     844             :        !
     845             :        !
     846             :        ! Allocate and initialize chunks data structure, then
     847             :        ! assign chunks to processes.
     848             :        !
     849        1536 :        call create_chunks(lbal_opt, chunks_per_thread)
     850             : 
     851             :        ! Early clean-up, to minimize memory high water mark
     852        1536 :        deallocate( lat_p )
     853        1536 :        deallocate( lon_p )
     854        1536 :        deallocate( latlon_to_dyn_gcol_map )
     855        1536 :        if  (twin_alg == 1) deallocate( lonlat_to_dyn_gcol_map )
     856        1536 :        if  (twin_alg == 1) deallocate( clon_p_cnt )
     857        1536 :        if ((twin_alg == 1) .or. (lbal_opt == 3)) deallocate( clat_p_cnt )
     858             : 
     859             :        !
     860             :        ! Determine whether dynamics and physics decompositions
     861             :        ! are colocated, not requiring any interprocess communication
     862             :        ! in the coupling.
     863        1536 :        local_dp_map = .true.
     864     5899776 :        do cid=1,nchunks
     865    90834432 :           do i=1,chunks(cid)%ncols
     866    84934656 :              curgcol_d = chunks(cid)%gcol(i)
     867    84934656 :              block_cnt = get_gcol_block_cnt_d(curgcol_d)
     868    84934656 :              call get_gcol_block_d(curgcol_d,block_cnt,blockids,bcids)
     869   175767552 :              do jb=1,block_cnt
     870    84934656 :                 owner_d = get_block_owner_d(blockids(jb))
     871   169869312 :                 if (owner_d /= chunks(cid)%owner) then
     872    79641600 :                    local_dp_map = .false.
     873             :                 endif
     874             :              enddo
     875             :           enddo
     876             :        enddo
     877             :     endif
     878             :     !
     879             :     ! Allocate and initialize data structures for gather/scatter
     880             :     !
     881        4608 :     allocate( pgcols(1:num_global_phys_cols) )
     882        4608 :     allocate( gs_col_offset(0:npes) )
     883        3072 :     allocate( pchunkid(0:npes) )
     884             : 
     885             :     ! Initialize pchunkid and gs_col_offset by summing
     886             :     ! number of chunks and columns per process, respectively
     887        1536 :     pchunkid(0) = 0
     888        1536 :     gs_col_offset(0) = 0
     889     1179648 :     do p=1,npes-1
     890     1178112 :        pchunkid(p)      = pchunkid(p-1)      + npchunks(p-1)
     891     1179648 :        gs_col_offset(p) = gs_col_offset(p-1) + gs_col_num(p-1)
     892             :     enddo
     893             : 
     894             :     ! Determine local ordering via "process id" bin sort
     895     5899776 :     do cid=1,nchunks
     896     5898240 :        p = chunks(cid)%owner
     897     5898240 :        pchunkid(p) = pchunkid(p) + 1
     898             : 
     899     5898240 :        chunks(cid)%lcid = pchunkid(p) + lastblock
     900             : 
     901     5898240 :        curgcol = gs_col_offset(p)
     902    90832896 :        do i=1,chunks(cid)%ncols
     903    84934656 :           curgcol = curgcol + 1
     904    84934656 :           pgcols(curgcol)%chunk = cid
     905    90832896 :           pgcols(curgcol)%ccol = i
     906             :        enddo
     907     5899776 :        gs_col_offset(p) = curgcol
     908             :     enddo
     909             : 
     910             :     ! Reinitialize pchunkid and gs_col_offset (for real)
     911        1536 :     pchunkid(0) = 1
     912        1536 :     gs_col_offset(0) = 1
     913     1179648 :     do p=1,npes-1
     914     1178112 :        pchunkid(p)      = pchunkid(p-1)      + npchunks(p-1)
     915     1179648 :        gs_col_offset(p) = gs_col_offset(p-1) + gs_col_num(p-1)
     916             :     enddo
     917        1536 :     pchunkid(npes)      = pchunkid(npes-1)      + npchunks(npes-1)
     918        1536 :     gs_col_offset(npes) = gs_col_offset(npes-1) + gs_col_num(npes-1)
     919             : 
     920             :     ! Save local information
     921             :     ! (Local chunk index range chosen so that it does not overlap
     922             :     !  {begblock,...,endblock})
     923             :     !
     924        1536 :     nlcols   = gs_col_num(iam)
     925        1536 :     nlchunks = npchunks(iam)
     926        1536 :     begchunk = pchunkid(iam)   + lastblock
     927        1536 :     endchunk = pchunkid(iam+1) + lastblock - 1
     928             :     !
     929        4608 :     allocate( lchunks(begchunk:endchunk) )
     930     5899776 :     do cid=1,nchunks
     931     5899776 :        if (chunks(cid)%owner == iam) then
     932        7680 :           lcid = chunks(cid)%lcid
     933        7680 :           lchunks(lcid)%ncols = chunks(cid)%ncols
     934        7680 :           lchunks(lcid)%cid   = cid
     935      118272 :           do i=1,chunks(cid)%ncols
     936      118272 :              lchunks(lcid)%gcol(i) = chunks(cid)%gcol(i)
     937             :           enddo
     938             :        endif
     939             :     enddo
     940             : 
     941        1536 :     deallocate( pchunkid )
     942        1536 :     deallocate( npchunks )
     943             :     !
     944             :     !-----------------------------------------------------------------------
     945             :     !
     946             :     ! Initialize physics grid, using dynamics grid
     947             :     ! b) column area and integration weight
     948             : 
     949        4608 :     allocate( area_d(1:ngcols) )
     950        3072 :     allocate( wght_d(1:ngcols) )
     951    84936192 :     area_d = 0.0_r8
     952    84936192 :     wght_d = 0.0_r8
     953             : 
     954        1536 :     call get_horiz_grid_d(ngcols, area_d_out=area_d, wght_d_out=wght_d)
     955             : 
     956             : 
     957    84936192 :     if ( abs(sum(area_d) - 4.0_r8*pi) > 1.e-10_r8 ) then
     958           0 :        write(iulog,*) ' ERROR: sum of areas on globe does not equal 4*pi'
     959           0 :        write(iulog,*) ' sum of areas = ', sum(area_d), sum(area_d)-4.0_r8*pi
     960           0 :        call endrun('phys_grid')
     961             :     end if
     962             : 
     963    84936192 :     if ( abs(sum(wght_d) - 4.0_r8*pi) > 1.e-10_r8 ) then
     964           0 :        write(iulog,*) ' ERROR: sum of integration weights on globe does not equal 4*pi'
     965           0 :        write(iulog,*) ' sum of weights = ', sum(wght_d), sum(wght_d)-4.0_r8*pi
     966           0 :        call endrun('phys_grid')
     967             :     end if
     968             : 
     969        9216 :     do lcid=begchunk,endchunk
     970      119808 :        do i=1,lchunks(lcid)%ncols
     971      110592 :           lchunks(lcid)%area(i) = area_d(lchunks(lcid)%gcol(i))
     972      118272 :           lchunks(lcid)%wght(i) = wght_d(lchunks(lcid)%gcol(i))
     973             :        enddo
     974             :     enddo
     975             : 
     976        1536 :     deallocate( area_d )
     977             :     nullify(area_d)
     978        1536 :     deallocate( wght_d )
     979             : 
     980        1536 :     if (.not. local_dp_map) then
     981             :        !
     982             :        ! allocate and initialize data structures for transposes
     983             :        !
     984        4608 :        allocate( btofc_blk_num(0:npes-1) )
     985     1181184 :        btofc_blk_num = 0
     986        4608 :        allocate( btofc_blk_offset(firstblock:lastblock) )
     987     1181184 :        do jb = firstblock,lastblock
     988     1181184 :           nullify( btofc_blk_offset(jb)%pter )
     989             :        enddo
     990             :        !
     991        1536 :        glbcnt = 0
     992        1536 :        curcnt = 0
     993        1536 :        curp = 0
     994    84936192 :        do curgcol=1,num_global_phys_cols
     995    84934656 :           cid = pgcols(curgcol)%chunk
     996    84934656 :           i   = pgcols(curgcol)%ccol
     997    84934656 :           owner_p   = chunks(cid)%owner
     998    86112768 :           do while (curp < owner_p)
     999     1178112 :              btofc_blk_num(curp) = curcnt
    1000     1178112 :              curcnt = 0
    1001     1178112 :              curp = curp + 1
    1002             :           enddo
    1003    84934656 :           curgcol_d = chunks(cid)%gcol(i)
    1004    84934656 :           block_cnt = get_gcol_block_cnt_d(curgcol_d)
    1005    84934656 :           call get_gcol_block_d(curgcol_d,block_cnt,blockids,bcids)
    1006   169870848 :           do jb = 1,block_cnt
    1007    84934656 :              owner_d = get_block_owner_d(blockids(jb))
    1008   169869312 :              if (iam == owner_d) then
    1009      110592 :                 if (.not. associated(btofc_blk_offset(blockids(jb))%pter)) then
    1010        1536 :                    blksiz = get_block_gcol_cnt_d(blockids(jb))
    1011        1536 :                    numlvl = get_block_lvl_cnt_d(blockids(jb),bcids(jb))
    1012        1536 :                    btofc_blk_offset(blockids(jb))%ncols = blksiz
    1013        1536 :                    btofc_blk_offset(blockids(jb))%nlvls = numlvl
    1014        6144 :                    allocate( btofc_blk_offset(blockids(jb))%pter(blksiz,numlvl) )
    1015             :                 endif
    1016     3760128 :                 do k=1,btofc_blk_offset(blockids(jb))%nlvls
    1017     3649536 :                    btofc_blk_offset(blockids(jb))%pter(bcids(jb),k) = glbcnt
    1018     3649536 :                    curcnt = curcnt + 1
    1019     3760128 :                    glbcnt = glbcnt + 1
    1020             :                 enddo
    1021             :              endif
    1022             :           enddo
    1023             :        enddo
    1024        1536 :        btofc_blk_num(curp) = curcnt
    1025        1536 :        block_buf_nrecs = glbcnt
    1026             :        !
    1027        4608 :        allocate( btofc_chk_num(0:npes-1) )
    1028     1181184 :        btofc_chk_num = 0
    1029        4608 :        allocate( btofc_chk_offset(begchunk:endchunk) )
    1030        9216 :        do lcid=begchunk,endchunk
    1031        7680 :           ncols = lchunks(lcid)%ncols
    1032        7680 :           btofc_chk_offset(lcid)%ncols = ncols
    1033        7680 :           btofc_chk_offset(lcid)%nlvls = pver+1
    1034       24576 :           allocate( btofc_chk_offset(lcid)%pter(ncols,pver+1) )
    1035             :        enddo
    1036             :        !
    1037        1536 :        curcnt = 0
    1038        1536 :        glbcnt = 0
    1039     1181184 :        do p=0,npes-1
    1040    86114304 :           do curgcol=gs_col_offset(iam),gs_col_offset(iam+1)-1
    1041    84934656 :              cid  = pgcols(curgcol)%chunk
    1042    84934656 :              owner_p  = chunks(cid)%owner
    1043    86114304 :              if (iam == owner_p) then
    1044    84934656 :                 i    = pgcols(curgcol)%ccol
    1045    84934656 :                 lcid = chunks(cid)%lcid
    1046    84934656 :                 curgcol_d = chunks(cid)%gcol(i)
    1047    84934656 :                 block_cnt = get_gcol_block_cnt_d(curgcol_d)
    1048    84934656 :                 call get_gcol_block_d(curgcol_d,block_cnt,blockids,bcids)
    1049   169869312 :                 do jb = 1,block_cnt
    1050    84934656 :                    owner_d = get_block_owner_d(blockids(jb))
    1051   169869312 :                    if (p == owner_d) then
    1052      110592 :                       numlvl = get_block_lvl_cnt_d(blockids(jb),bcids(jb))
    1053             :                       call get_block_levels_d(blockids(jb),bcids(jb),numlvl,levels)
    1054     3760128 :                       do k=1,numlvl
    1055     3649536 :                          btofc_chk_offset(lcid)%pter(i,levels(k)+1) = glbcnt
    1056     3649536 :                          curcnt = curcnt + 1
    1057     3760128 :                          glbcnt = glbcnt + 1
    1058             :                       enddo
    1059             :                    endif
    1060             :                 enddo
    1061             :              endif
    1062             :           enddo
    1063     1179648 :           btofc_chk_num(p) = curcnt
    1064     1181184 :           curcnt = 0
    1065             :        enddo
    1066        1536 :        chunk_buf_nrecs = glbcnt
    1067             :        !
    1068             :        ! Precompute swap partners and number of steps in point-to-point
    1069             :        ! implementations of alltoall algorithm.
    1070             :        ! First, determine number of swaps.
    1071             :        !
    1072        1536 :        dp_coup_steps = 0
    1073     1572864 :        do i=1,ceil2(npes)-1
    1074     1571328 :           p = pair(npes,i,iam)
    1075     1572864 :           if (p >= 0) then
    1076     1178112 :              if ((btofc_blk_num(p) > 0 .or. btofc_chk_num(p) > 0)) then
    1077       38344 :                 dp_coup_steps = dp_coup_steps + 1
    1078             :              end if
    1079             :           end if
    1080             :        end do
    1081             :        !
    1082             :        ! Second, determine swap partners.
    1083             :        !
    1084        4608 :        allocate( dp_coup_proc(dp_coup_steps) )
    1085        1536 :        dp_coup_steps = 0
    1086     1572864 :        do i=1,ceil2(npes)-1
    1087     1571328 :           p = pair(npes,i,iam)
    1088     1572864 :           if (p >= 0) then
    1089     1178112 :              if ((btofc_blk_num(p) > 0 .or. btofc_chk_num(p) > 0)) then
    1090       38344 :                 dp_coup_steps = dp_coup_steps + 1
    1091       38344 :                 dp_coup_proc(dp_coup_steps) = p
    1092             :              end if
    1093             :           end if
    1094             :        end do
    1095             :        !
    1096             :     endif
    1097             : 
    1098             :     ! Final clean-up
    1099        1536 :     deallocate( gs_col_offset )
    1100             :     ! (if eliminate get_lon_xxx, can also deallocate
    1101             :     !  clat_p_idx, and grid_latlon?))
    1102             : 
    1103             :     ! Add physics-package grid to set of CAM grids
    1104             :     ! physgrid always uses 'lat' and 'lon' as coordinate names; If dynamics
    1105             :     !    grid is different, it will use different coordinate names
    1106             : 
    1107             :     ! First, create a map for the physics grid
    1108             :     ! It's structure will depend on whether or not the physics grid is
    1109             :     ! unstructured
    1110        1536 :     unstructured = dycore_is('UNSTRUCTURED')
    1111        1536 :     if (unstructured) then
    1112           0 :       allocate(grid_map(3, pcols * (endchunk - begchunk + 1)))
    1113           0 :       if (single_column) allocate(grid_map_scm(3, pcols * (endchunk - begchunk + 1)))
    1114             :     else
    1115        4608 :       allocate(grid_map(4, pcols * (endchunk - begchunk + 1)))
    1116        1536 :       if (single_column) allocate(grid_map_scm(4, pcols * (endchunk - begchunk + 1)))
    1117             :     end if
    1118      615936 :     grid_map = 0
    1119        1536 :     if (single_column) grid_map_scm = 0
    1120        4608 :     allocate(latvals(size(grid_map, 2)))
    1121        3072 :     allocate(lonvals(size(grid_map, 2)))
    1122        1536 :     p = 0
    1123        9216 :     do lcid = begchunk, endchunk
    1124        7680 :       ncols = lchunks(lcid)%ncols
    1125        7680 :       call get_gcol_all_p(lcid, pcols, gcols)
    1126             :       ! collect latvals and lonvals
    1127        7680 :       cid = lchunks(lcid)%cid
    1128      118272 :       do i = 1, chunks(cid)%ncols
    1129      110592 :         latvals(p + i) = latdeg_p(chunks(cid)%lat(i))
    1130      118272 :         lonvals(p + i) = londeg_p(chunks(cid)%lon(i))
    1131             :       end do
    1132        7680 :       if (pcols > ncols) then
    1133             :         ! Need to set these to detect unused columns
    1134       19968 :         latvals(p+ncols+1:p+pcols) = 1000.0_r8
    1135       19968 :         lonvals(p+ncols+1:p+pcols) = 1000.0_r8
    1136             :       end if
    1137             : 
    1138             :       ! Set grid values for this chunk
    1139      132096 :       do i = 1, pcols
    1140      122880 :         p = p + 1
    1141      122880 :         grid_map(1, p) = i
    1142      122880 :         grid_map(2, p) = lcid
    1143      122880 :         if (single_column) then
    1144           0 :            grid_map_scm(1, p) = i
    1145           0 :            grid_map_scm(2, p) = lcid
    1146             :         end if
    1147      130560 :         if ((i <= ncols) .and. (gcols(i) > 0)) then
    1148      110592 :           if (unstructured) then
    1149           0 :             grid_map(3, p) = gcols(i)
    1150           0 :             if (single_column) grid_map_scm(3, p) = closeioplonidx
    1151             :           else
    1152      110592 :               grid_map(3, p) = get_lon_p(lcid, i)
    1153      110592 :               grid_map(4, p) = get_lat_p(lcid, i)
    1154      110592 :               if (single_column) then
    1155           0 :                  grid_map_scm(3, p) = closeioplonidx
    1156           0 :                  grid_map_scm(4, p) = closeioplatidx
    1157             :               end if
    1158             :           end if
    1159             :         else
    1160       12288 :           if (i <= ncols) then
    1161           0 :             call endrun("phys_grid_init: unmapped column")
    1162             :           end if
    1163             :         end if
    1164             :       end do
    1165             :     end do
    1166             : 
    1167             :     ! Note that if the dycore is using the same points as the physics grid,
    1168             :     !      it will have already set up 'lat' and 'lon' axes for the physics grid
    1169             :     !      However, these will be in the dynamics decomposition
    1170        1536 :     if (unstructured) then
    1171             :       lon_coord => horiz_coord_create('lon', 'ncol', num_global_phys_cols,    &
    1172             :            'longitude', 'degrees_east', 1, size(lonvals), lonvals,            &
    1173           0 :            map=grid_map(3,:))
    1174             :       lat_coord => horiz_coord_create('lat', 'ncol', num_global_phys_cols,    &
    1175             :            'latitude', 'degrees_north', 1, size(latvals), latvals,            &
    1176           0 :            map=grid_map(3,:))
    1177             :     else
    1178             : 
    1179        4608 :       allocate(coord_map(size(grid_map, 2)))
    1180             : 
    1181             :       ! Create a lon coord map which only writes from one of each unique lon
    1182      125952 :       where(latvals == latmin)
    1183        3072 :         coord_map(:) = grid_map(3, :)
    1184             :       elsewhere
    1185             :         coord_map(:) = 0_iMap
    1186             :       end where
    1187             :       lon_coord => horiz_coord_create('lon', 'lon', hdim1_d, 'longitude',     &
    1188        1536 :            'degrees_east', 1, size(lonvals), lonvals, map=coord_map)
    1189             : 
    1190             :       ! Create a lat coord map which only writes from one of each unique lat
    1191      125952 :       where(lonvals == lonmin)
    1192        3072 :         coord_map(:) = grid_map(4, :)
    1193             :       elsewhere
    1194             :         coord_map(:) = 0_iMap
    1195             :       end where
    1196             :       lat_coord => horiz_coord_create('lat', 'lat', hdim2_d, 'latitude',      &
    1197        1536 :            'degrees_north', 1, size(latvals), latvals, map=coord_map)
    1198             : 
    1199        1536 :       deallocate(coord_map)
    1200             : 
    1201             :     end if
    1202             :     call cam_grid_register('physgrid', phys_decomp, lat_coord, lon_coord,     &
    1203        1536 :          grid_map, unstruct=unstructured, block_indexed=.true.)
    1204        1536 :     if (single_column) call cam_grid_register('physgrid_scm', phys_decomp_scm, lat_coord, lon_coord,     &
    1205           0 :          grid_map_scm, unstruct=unstructured, block_indexed=.true.)
    1206             :     ! Copy required attributes from the dynamics array
    1207        1536 :     nullify(copy_attributes)
    1208        1536 :     call physgrid_copy_attributes_d(copy_gridname, copy_attributes)
    1209        3072 :     do i = 1, size(copy_attributes)
    1210        3072 :       call cam_grid_attribute_copy(copy_gridname, 'physgrid', copy_attributes(i))
    1211             :     end do
    1212        1536 :     if ((.not. cam_grid_attr_exists('physgrid', 'area')) .and. unstructured) then
    1213             :       ! Physgrid always needs an area attribute. If we did not inherit one
    1214             :       !   from the dycore (i.e., physics and dynamics are on different grids),
    1215             :       !   create that attribute here (unstructured grids only, physgrid is
    1216             :       !   not supported for structured grids).
    1217           0 :       allocate(area_d(size(grid_map, 2)))
    1218           0 :       allocate(areawt_d(size(grid_map, 2)))
    1219           0 :       p = 0
    1220           0 :       do lcid = begchunk, endchunk
    1221           0 :         ncols = lchunks(lcid)%ncols
    1222           0 :         call get_gcol_all_p(lcid, pcols, gcols)
    1223             :         ! collect latvals and lonvals
    1224           0 :         cid = lchunks(lcid)%cid
    1225           0 :         do i = 1, chunks(cid)%ncols
    1226           0 :           area_d(p + i) = lchunks(lcid)%area(i)
    1227           0 :           areawt_d(p + i) = lchunks(lcid)%wght(i)
    1228             :         end do
    1229           0 :         if (pcols > ncols) then
    1230             :           ! Need to set these to detect unused columns
    1231           0 :           area_d(p+ncols+1:p+pcols) = 0.0_r8
    1232           0 :           areawt_d(p+ncols+1:p+pcols) = 0.0_r8
    1233             :         end if
    1234           0 :         p = p + pcols
    1235             :       end do
    1236             :       call cam_grid_attribute_register('physgrid', 'area',                    &
    1237           0 :            'physics column areas', 'ncol', area_d, map=grid_map(3,:))
    1238             :       call cam_grid_attribute_register('physgrid', 'areawt',                &
    1239           0 :            'physics column area wts', 'ncol', areawt_d, map=grid_map(3,:))
    1240           0 :       nullify(area_d) ! Belongs to attribute now
    1241           0 :       nullify(areawt_d) ! Belongs to attribute now
    1242             :     end if
    1243             :     ! Cleanup pointers (they belong to the grid now)
    1244        1536 :     nullify(grid_map)
    1245        1536 :     if (single_column) nullify(grid_map_scm)
    1246        1536 :     deallocate(latvals)
    1247             :     nullify(latvals)
    1248        1536 :     deallocate(lonvals)
    1249             :     nullify(lonvals)
    1250             :     ! Cleanup, we are responsible for copy attributes
    1251        1536 :     if (associated(copy_attributes)) then
    1252        1536 :       deallocate(copy_attributes)
    1253             :       nullify(copy_attributes)
    1254             :     end if
    1255             : 
    1256             :     !
    1257        1536 :     physgrid_set = .true.   ! Set flag indicating physics grid is now set
    1258             :     !
    1259        1536 :     call t_stopf("phys_grid_init")
    1260             : 
    1261        1536 :     if (calc_memory_increase) then
    1262           0 :        call shr_mem_getusage(mem_hw_end, mem_end)
    1263           0 :        clat_p_tmp = mem_end - mem_beg
    1264             :        call MPI_reduce(clat_p_tmp, mem_end, 1, MPI_REAL8, MPI_MAX,            &
    1265           0 :             masterprocid, mpicom, curp)
    1266           0 :        if (masterproc) then
    1267           0 :           write(iulog, *) 'phys_grid_init: Increase in memory usage = ',      &
    1268           0 :                mem_end, ' (MB)'
    1269             :        end if
    1270           0 :        clat_p_tmp = mem_hw_end - mem_hw_beg
    1271             :        call MPI_reduce(clat_p_tmp, mem_hw_end, 1, MPI_REAL8, MPI_MAX,         &
    1272           0 :             masterprocid, mpicom, curp)
    1273           0 :        if (masterproc) then
    1274           0 :           write(iulog, *) 'phys_grid_init: Increase in memory highwater = ',  &
    1275           0 :                mem_end, ' (MB)'
    1276             :        end if
    1277             :     end if
    1278             : 
    1279        4608 :   end subroutine phys_grid_init
    1280             : 
    1281             : !========================================================================
    1282             : 
    1283           0 : subroutine phys_grid_find_col(lat, lon, owner, lcid, icol)
    1284             : 
    1285             :    !-----------------------------------------------------------------------
    1286             :    !
    1287             :    ! Purpose: Find the global column closest to the point specified by lat
    1288             :    !          and lon.  Return indices of owning process, local chunk, and
    1289             :    !          column.
    1290             :    !
    1291             :    ! Authors: Phil Rasch / Patrick Worley / B. Eaton
    1292             :    !
    1293             :    !-----------------------------------------------------------------------
    1294             : 
    1295             :    real(r8), intent(in) :: lat, lon    ! requested location in degrees
    1296             :    integer, intent(out) :: owner       ! rank of chunk owner
    1297             :    integer, intent(out) :: lcid      ! local chunk index
    1298             :    integer, intent(out) :: icol        ! column index within the chunk
    1299             : 
    1300             :    ! local
    1301             :    real(r8) dist2           ! the distance (in radians**2 from lat, lon)
    1302             :    real(r8) distmin         ! the distance (in radians**2 from closest column)
    1303             :    real(r8) latr, lonr      ! lat, lon (in radians) of requested location
    1304             :    real(r8) clat, clon      ! lat, lon (in radians) of column being tested
    1305             :    real(r8) const
    1306             : 
    1307             :    integer i
    1308             :    integer cid
    1309             :    !-----------------------------------------------------------------------
    1310             : 
    1311             :    ! Check that input lat and lon are in valid range
    1312             :    if (lon < 0.0_r8 .or. lon >= 360._r8 .or. &
    1313           0 :        lat < -90._r8 .or. lat > 90._r8) then
    1314           0 :       if (masterproc) then
    1315             :          write(iulog,*) &
    1316           0 :             'phys_grid_find_col: ERROR: lon must satisfy 0.<=lon<360. and lat must satisfy -90<=lat<=90.'
    1317             :          write(iulog,*) &
    1318           0 :             'input lon=', lon, '  input lat=', lat
    1319             :       endif
    1320           0 :       call endrun('phys_grid_find_col: input ERROR')
    1321             :    end if
    1322             : 
    1323           0 :    const = 180._r8/pi            ! degrees per radian
    1324           0 :    latr = lat/const              ! to radians
    1325           0 :    lonr = lon/const              ! to radians
    1326             : 
    1327           0 :    owner   = -999
    1328           0 :    lcid  = -999
    1329           0 :    icol    = -999
    1330           0 :    distmin = 1.e10_r8
    1331             : 
    1332             :    ! scan all chunks for closest point to lat, lon
    1333           0 :    do cid = 1, nchunks
    1334           0 :       do i = 1, chunks(cid)%ncols
    1335           0 :          clat = clat_p(chunks(cid)%lat(i))
    1336           0 :          clon = clon_p(chunks(cid)%lon(i))
    1337           0 :          dist2 = (clat-latr)**2 + (clon-lonr)**2
    1338           0 :          if (dist2 < distmin ) then
    1339           0 :             distmin = dist2
    1340           0 :             owner = chunks(cid)%owner
    1341           0 :             lcid = chunks(cid)%lcid
    1342           0 :             icol = i
    1343             :          endif
    1344             :       enddo
    1345             :    end do
    1346             : 
    1347        1536 : end subroutine phys_grid_find_col
    1348             : 
    1349             : !========================================================================
    1350             : 
    1351           0 : subroutine phys_grid_find_cols(lat, lon, nclosest, owner, lcid, icol, distmin, mlats, mlons)
    1352             : 
    1353             :    !-----------------------------------------------------------------------
    1354             :    !
    1355             :    ! Purpose: Find the global columns closest to the point specified by lat
    1356             :    !          and lon.  Return indices of owning process, local chunk, and
    1357             :    !          column.
    1358             :    !
    1359             :    ! Authors: Phil Rasch / Patrick Worley / B. Eaton
    1360             :    !
    1361             :    !-----------------------------------------------------------------------
    1362             :    use physconst,    only : rearth
    1363             : 
    1364             :    real(r8), intent(in) :: lat, lon            ! requested location in degrees
    1365             :    integer, intent(in)  :: nclosest            ! number of closest points to find
    1366             :    integer, intent(out) :: owner(nclosest)     ! rank of chunk owner
    1367             :    integer, intent(out) :: lcid(nclosest)      ! local chunk index
    1368             :    integer, intent(out) :: icol(nclosest)      ! column index within the chunk
    1369             :    real(r8),intent(out) :: distmin(nclosest)   ! the distance (m) of the closest column(s)
    1370             :    real(r8),intent(out) :: mlats(nclosest)     ! the latitude of the closest column(s)
    1371             :    real(r8),intent(out) :: mlons(nclosest)     ! the longitude of the closest column(s)
    1372             : 
    1373             :    ! local
    1374             :    real(r8) dist2           ! the distance (in radians**2 from lat, lon)
    1375             :    real(r8) latr, lonr      ! lat, lon (in radians) of requested location
    1376             :    real(r8) clat, clon      ! lat, lon (in radians) of column being tested
    1377             :    real(r8) const
    1378             : 
    1379             :    integer i, j
    1380             :    integer cid
    1381             :    !-----------------------------------------------------------------------
    1382             : 
    1383             :    ! Check that input lat and lon are in valid range
    1384             :    if (lon < 0.0_r8 .or. lon >= 360._r8 .or. &
    1385           0 :        lat < -90._r8 .or. lat > 90._r8) then
    1386           0 :       if (masterproc) then
    1387             :          write(iulog,*) &
    1388           0 :             'phys_grid_find_cols: ERROR: lon must satisfy 0.<=lon<360. and lat must satisfy -90<=lat<=90.'
    1389             :          write(iulog,*) &
    1390           0 :             'input lon=', lon, '  input lat=', lat
    1391             :       endif
    1392           0 :       call endrun('phys_grid_find_cols: input ERROR')
    1393             :    end if
    1394             : 
    1395           0 :    const = 180._r8/pi            ! degrees per radian
    1396           0 :    latr = lat/const              ! to radians
    1397           0 :    lonr = lon/const              ! to radians
    1398             : 
    1399           0 :    owner(:)   = -999
    1400           0 :    lcid(:)    = -999
    1401           0 :    icol(:)    = -999
    1402           0 :    mlats(:)   = -999
    1403           0 :    mlons(:)   = -999
    1404           0 :    distmin(:) = 1.e10_r8
    1405             : 
    1406             :    ! scan all chunks for closest point to lat, lon
    1407           0 :    do cid = 1, nchunks
    1408           0 :       do i = 1, chunks(cid)%ncols
    1409           0 :          clat = clat_p(chunks(cid)%lat(i))
    1410           0 :          clon = clon_p(chunks(cid)%lon(i))
    1411           0 :          dist2 = acos(sin(latr) * sin(clat) + cos(latr) * cos(clat) * cos(clon - lonr)) * rearth
    1412             : 
    1413           0 :          do j = nclosest, 1, -1
    1414           0 :             if (dist2 < distmin(j)) then
    1415             : 
    1416           0 :                if (j < nclosest) then
    1417           0 :                  distmin(j+1) = distmin(j)
    1418           0 :                  owner(j+1)   = owner(j)
    1419           0 :                  lcid(j+1)    = lcid(j)
    1420           0 :                  icol(j+1)    = icol(j)
    1421           0 :                  mlats(j+1)   = mlats(j)
    1422           0 :                  mlons(j+1)    = mlons(j)
    1423             :                end if
    1424             : 
    1425           0 :                distmin(j) = dist2
    1426           0 :                owner(j)   = chunks(cid)%owner
    1427           0 :                lcid(j)    = chunks(cid)%lcid
    1428           0 :                icol(j)    = i
    1429           0 :                mlats(j)   = clat * const
    1430           0 :                mlons(j)   = clon * const
    1431             :             else
    1432             :                exit
    1433             :             end if
    1434             :          enddo
    1435             :       enddo
    1436             :    end do
    1437             : 
    1438           0 : end subroutine phys_grid_find_cols
    1439             : !
    1440             : !========================================================================
    1441             : 
    1442        4608 : logical function phys_grid_initialized ()
    1443             : !-----------------------------------------------------------------------
    1444             : !
    1445             : ! Purpose: Identify whether phys_grid has been called yet or not
    1446             : !
    1447             : ! Method: Return physgrid_set
    1448             : !
    1449             : ! Author: Pat Worley
    1450             : !
    1451             : !-----------------------------------------------------------------------
    1452             : !-----------------------------------------------------------------------
    1453             : !
    1454        4608 :    phys_grid_initialized = physgrid_set
    1455             : !
    1456             :    return
    1457             :    end function phys_grid_initialized
    1458             : 
    1459             : !
    1460             : !========================================================================
    1461             : !
    1462           0 :    subroutine get_chunk_indices_p(index_beg, index_end)
    1463             : !-----------------------------------------------------------------------
    1464             : !
    1465             : ! Purpose: Return range of indices for local chunks
    1466             : !
    1467             : ! Method:
    1468             : !
    1469             : ! Author: Patrick Worley
    1470             : !
    1471             : !-----------------------------------------------------------------------
    1472             : !------------------------------Arguments--------------------------------
    1473             :    integer, intent(out) :: index_beg  ! first index used for local chunks
    1474             :    integer, intent(out) :: index_end  ! last index used for local chunks
    1475             : !-----------------------------------------------------------------------
    1476             : 
    1477           0 :    index_beg = begchunk
    1478           0 :    index_end = endchunk
    1479             : 
    1480           0 :    return
    1481             :    end subroutine get_chunk_indices_p
    1482             : !
    1483             : !========================================================================
    1484             : !
    1485       15360 :    subroutine get_gcol_all_p(lcid, latdim, gcols)
    1486             : !-----------------------------------------------------------------------
    1487             : !
    1488             : ! Purpose: Return all global column indices for chunk
    1489             : !
    1490             : ! Method:
    1491             : !
    1492             : ! Author: Patrick Worley
    1493             : !
    1494             : !-----------------------------------------------------------------------
    1495             : !------------------------------Arguments--------------------------------
    1496             :      integer, intent(in)  :: lcid        ! local chunk id
    1497             :      integer, intent(in)  :: latdim      ! declared size of output array
    1498             : 
    1499             :      integer, intent(out) :: gcols(:)    ! array of global latitude indices
    1500             : !---------------------------Local workspace-----------------------------
    1501             :      integer :: i                        ! loop index
    1502             : 
    1503             : !-----------------------------------------------------------------------
    1504      261120 :      gcols=-1
    1505      236544 :      do i=1,lchunks(lcid)%ncols
    1506      236544 :         gcols(i) = lchunks(lcid)%gcol(i)
    1507             :      enddo
    1508       15360 :      return
    1509             :    end subroutine get_gcol_all_p
    1510             : 
    1511             : !
    1512             : !========================================================================
    1513             : !
    1514      110592 :    integer function get_gcol_p(lcid, col)
    1515             : !-----------------------------------------------------------------------
    1516             : !
    1517             : ! Purpose: Return global physics column index for chunk column
    1518             : !
    1519             : ! Method:
    1520             : !
    1521             : ! Author: Jim Edwards / Patrick Worley
    1522             : !
    1523             : !-----------------------------------------------------------------------
    1524             : !------------------------------Arguments--------------------------------
    1525             :    integer, intent(in)  :: lcid          ! local chunk id
    1526             :    integer, intent(in)  :: col           ! column index
    1527             : 
    1528             : !-----------------------------------------------------------------------
    1529      110592 :    get_gcol_p = lchunks(lcid)%gcol(col)
    1530             : 
    1531             :    return
    1532             :    end function get_gcol_p
    1533             : 
    1534             : !
    1535             : !========================================================================
    1536             : 
    1537           0 :    subroutine get_gcol_vec_p(lcid, lth, cols, gcols)
    1538             : !-----------------------------------------------------------------------
    1539             : !
    1540             : ! Purpose: Return global physics column indices for set of chunk columns
    1541             : !
    1542             : ! Method:
    1543             : !
    1544             : ! Author: Patrick Worley
    1545             : !
    1546             : !-----------------------------------------------------------------------
    1547             :    use ppgrid
    1548             : 
    1549             : !------------------------------Arguments--------------------------------
    1550             :    integer, intent(in)  :: lcid          ! local chunk id
    1551             :    integer, intent(in)  :: lth           ! number of column indices
    1552             :    integer, intent(in)  :: cols(lth)     ! column indices
    1553             : 
    1554             :    integer, intent(out) :: gcols(lth)    ! array of global physics
    1555             :                                          !  columns indices
    1556             : 
    1557             : !---------------------------Local workspace-----------------------------
    1558             :    integer :: i                          ! loop index
    1559             : 
    1560             : !-----------------------------------------------------------------------
    1561           0 :    do i=1,lth
    1562           0 :      gcols(i) = lchunks(lcid)%gcol(cols(i))
    1563             :    enddo
    1564             : 
    1565           0 :    return
    1566             :    end subroutine get_gcol_vec_p
    1567             : 
    1568             : !
    1569             : !========================================================================
    1570             : !
    1571     9653760 :    integer function get_ncols_p(lcid)
    1572             : !-----------------------------------------------------------------------
    1573             : !
    1574             : ! Purpose: Return number of columns in chunk given the local chunk id.
    1575             : !
    1576             : ! Method:
    1577             : !
    1578             : ! Author: Patrick Worley
    1579             : !
    1580             : !-----------------------------------------------------------------------
    1581             : !------------------------------Arguments--------------------------------
    1582             :    integer, intent(in)  :: lcid      ! local chunk id
    1583             : 
    1584             : !---------------------------Local workspace-----------------------------
    1585             :    integer              :: cid       ! global chunk id
    1586             : 
    1587             : !-----------------------------------------------------------------------
    1588     9653760 :    get_ncols_p = lchunks(lcid)%ncols
    1589             : 
    1590             :    return
    1591             :    end function get_ncols_p
    1592             : 
    1593             : !========================================================================
    1594             : 
    1595           0 :    subroutine get_grid_dims(hdim1_d_out, hdim2_d_out)
    1596             :       use cam_abortutils, only: endrun
    1597             :       ! retrieve dynamics field grid information
    1598             :       ! hdim1_d and hdim2_d are dimensions of rectangular horizontal grid
    1599             :       ! data structure, If 1D data structure, then hdim2_d == 1.
    1600             :       integer, intent(out) :: hdim1_d_out
    1601             :       integer, intent(out) :: hdim2_d_out
    1602             : 
    1603           0 :       if (.not. phys_grid_initialized()) then
    1604           0 :          call endrun('get_grid_dims: physics grid not initialized')
    1605             :       end if
    1606           0 :       hdim1_d_out = hdim1_d
    1607           0 :       hdim2_d_out = hdim2_d
    1608             : 
    1609           0 :    end subroutine get_grid_dims
    1610             : !
    1611             : !========================================================================
    1612             : !
    1613     3502080 :    subroutine get_lat_all_p(lcid, latdim, lats)
    1614             : !-----------------------------------------------------------------------
    1615             : !
    1616             : ! Purpose: Return all global latitude indices for chunk
    1617             : !
    1618             : ! Method:
    1619             : !
    1620             : ! Author: Patrick Worley
    1621             : !
    1622             : !-----------------------------------------------------------------------
    1623             :    use ppgrid
    1624             : !------------------------------Arguments--------------------------------
    1625             :    integer, intent(in)  :: lcid          ! local chunk id
    1626             :    integer, intent(in)  :: latdim        ! declared size of output array
    1627             : 
    1628             :    integer, intent(out) :: lats(latdim)  ! array of global latitude indices
    1629             : 
    1630             : !---------------------------Local workspace-----------------------------
    1631             :    integer :: i                          ! loop index
    1632             :    integer :: cid                        ! global chunk id
    1633             : 
    1634             : !-----------------------------------------------------------------------
    1635     3502080 :    cid = lchunks(lcid)%cid
    1636    53932032 :    do i=1,chunks(cid)%ncols
    1637    53932032 :      lats(i) = chunks(cid)%lat(i)
    1638             :    enddo
    1639             : 
    1640     3502080 :    return
    1641             :    end subroutine get_lat_all_p
    1642             : !
    1643             : !========================================================================
    1644             : 
    1645           0 :    subroutine get_lat_vec_p(lcid, lth, cols, lats)
    1646             : !-----------------------------------------------------------------------
    1647             : !
    1648             : ! Purpose: Return global latitude indices for set of chunk columns
    1649             : !
    1650             : ! Method:
    1651             : !
    1652             : ! Author: Patrick Worley
    1653             : !
    1654             : !-----------------------------------------------------------------------
    1655             :    use ppgrid
    1656             : 
    1657             : !------------------------------Arguments--------------------------------
    1658             :    integer, intent(in)  :: lcid          ! local chunk id
    1659             :    integer, intent(in)  :: lth           ! number of column indices
    1660             :    integer, intent(in)  :: cols(lth)     ! column indices
    1661             : 
    1662             :    integer, intent(out) :: lats(lth)     ! array of global latitude indices
    1663             : 
    1664             : !---------------------------Local workspace-----------------------------
    1665             :    integer :: i                          ! loop index
    1666             :    integer :: cid                        ! global chunk id
    1667             : 
    1668             : !-----------------------------------------------------------------------
    1669           0 :    cid = lchunks(lcid)%cid
    1670           0 :    do i=1,lth
    1671           0 :      lats(i) = chunks(cid)%lat(cols(i))
    1672             :    enddo
    1673             : 
    1674           0 :    return
    1675             :    end subroutine get_lat_vec_p
    1676             : !
    1677             : !========================================================================
    1678             : 
    1679      110592 :    integer function get_lat_p(lcid, col)
    1680             : !-----------------------------------------------------------------------
    1681             : !
    1682             : ! Purpose: Return global latitude index for chunk column
    1683             : !
    1684             : ! Method:
    1685             : !
    1686             : ! Author: Patrick Worley
    1687             : !
    1688             : !-----------------------------------------------------------------------
    1689             :    use ppgrid
    1690             : !------------------------------Arguments--------------------------------
    1691             :    integer, intent(in)  :: lcid          ! local chunk id
    1692             :    integer, intent(in)  :: col           ! column index
    1693             : 
    1694             : !---------------------------Local workspace-----------------------------
    1695             :    integer :: cid                        ! global chunk id
    1696             : 
    1697             : !-----------------------------------------------------------------------
    1698      110592 :    cid = lchunks(lcid)%cid
    1699      110592 :    get_lat_p = chunks(cid)%lat(col)
    1700             : 
    1701             :    return
    1702             :    end function get_lat_p
    1703             : !
    1704             : !========================================================================
    1705             : !
    1706           0 :    subroutine get_lon_all_p(lcid, londim, lons)
    1707             : !-----------------------------------------------------------------------
    1708             : !
    1709             : ! Purpose:
    1710             : !  Was: Return all global longitude indices for chunk
    1711             : !  Now: Return all longitude offsets (+1) for chunk. These are offsets
    1712             : !       in ordered list of global columns from first
    1713             : !       column with given latitude to column with given latitude
    1714             : !       and longitude. This corresponds to the usual longitude indices
    1715             : !       for full and reduced lon/lat grids.
    1716             : !
    1717             : ! Method:
    1718             : !
    1719             : ! Author: Patrick Worley
    1720             : !
    1721             : !-----------------------------------------------------------------------
    1722             :    use ppgrid
    1723             : !------------------------------Arguments--------------------------------
    1724             :    integer, intent(in)  :: lcid          ! local chunk id
    1725             :    integer, intent(in)  :: londim        ! declared size of output array
    1726             : 
    1727             :    integer, intent(out) :: lons(londim)  ! array of global longitude
    1728             :                                          !  indices
    1729             : 
    1730             : !---------------------------Local workspace-----------------------------
    1731             :    integer :: i                          ! loop index
    1732             :    integer :: lat                        ! latitude index
    1733             :    integer :: cid                        ! global chunk id
    1734             :    integer :: gcol                       ! global column id in latlon
    1735             :                                          !  ordering
    1736             : 
    1737             : !-----------------------------------------------------------------------
    1738           0 :    cid = lchunks(lcid)%cid
    1739           0 :    do i=1,chunks(cid)%ncols
    1740           0 :      lat  = chunks(cid)%lat(i)
    1741           0 :      gcol = dyn_to_latlon_gcol_map(chunks(cid)%gcol(i))
    1742           0 :      lons(i) = (gcol - clat_p_idx(lat)) + 1
    1743             :    enddo
    1744             : 
    1745           0 :    return
    1746             :    end subroutine get_lon_all_p
    1747             : !
    1748             : !========================================================================
    1749             : 
    1750           0 :    subroutine get_lon_vec_p(lcid, lth, cols, lons)
    1751             : !-----------------------------------------------------------------------
    1752             : !
    1753             : ! Purpose:
    1754             : !  Was: Return global longitude indices for set of chunk columns.
    1755             : !  Now: Return longitude offsets (+1) for set of chunk columns.
    1756             : !       These are offsets in ordered list of global columns from first
    1757             : !       column with given latitude to column with given latitude
    1758             : !       and longitude. This corresponds to the usual longitude indices
    1759             : !       for full and reduced lon/lat grids.
    1760             : !
    1761             : ! Method:
    1762             : !
    1763             : ! Author: Patrick Worley
    1764             : !
    1765             : !-----------------------------------------------------------------------
    1766             :    use ppgrid
    1767             : !------------------------------Arguments--------------------------------
    1768             :    integer, intent(in)  :: lcid          ! local chunk id
    1769             :    integer, intent(in)  :: lth           ! number of column indices
    1770             :    integer, intent(in)  :: cols(lth)     ! column indices
    1771             : 
    1772             :    integer, intent(out) :: lons(lth)     ! array of global longitude indices
    1773             : 
    1774             : !---------------------------Local workspace-----------------------------
    1775             :    integer :: i                          ! loop index
    1776             :    integer :: lat                        ! latitude index
    1777             :    integer :: cid                        ! global chunk id
    1778             :    integer :: gcol                       ! global column id in latlon
    1779             :                                          !  ordering
    1780             : 
    1781             : !-----------------------------------------------------------------------
    1782           0 :    cid = lchunks(lcid)%cid
    1783           0 :    do i=1,lth
    1784           0 :      lat = chunks(cid)%lat(cols(i))
    1785           0 :      gcol = dyn_to_latlon_gcol_map(chunks(cid)%gcol(i))
    1786           0 :      lons(i) = (gcol - clat_p_idx(lat)) + 1
    1787             :    enddo
    1788             : 
    1789           0 :    return
    1790             :    end subroutine get_lon_vec_p
    1791             : !
    1792             : !========================================================================
    1793             : 
    1794      110592 :    integer function get_lon_p(lcid, col)
    1795             : !-----------------------------------------------------------------------
    1796             : !
    1797             : ! Purpose:
    1798             : !  Was: Return global longitude index for chunk column.
    1799             : !  Now: Return longitude offset (+1) for chunk column. This is the
    1800             : !       offset in ordered list of global columns from first
    1801             : !       column with given latitude to column with given latitude
    1802             : !       and longitude. This corresponds to the usual longitude index
    1803             : !       for full and reduced lon/lat grids.
    1804             : !
    1805             : ! Method:
    1806             : !
    1807             : ! Author: Patrick Worley
    1808             : !
    1809             : !-----------------------------------------------------------------------
    1810             :    use ppgrid
    1811             : !------------------------------Arguments--------------------------------
    1812             :    integer, intent(in)  :: lcid          ! local chunk id
    1813             :    integer, intent(in)  :: col           ! column index
    1814             : 
    1815             : !---------------------------Local workspace-----------------------------
    1816             :    integer :: cid                        ! global chunk id
    1817             :    integer :: lat                        ! latitude index
    1818             :    integer :: gcol                       ! global column id in latlon
    1819             :                                          !  ordering
    1820             : 
    1821             : !-----------------------------------------------------------------------
    1822      110592 :    cid = lchunks(lcid)%cid
    1823      110592 :    lat = chunks(cid)%lat(col)
    1824      110592 :    gcol = dyn_to_latlon_gcol_map(chunks(cid)%gcol(col))
    1825      110592 :    get_lon_p = (gcol - clat_p_idx(lat)) + 1
    1826             : 
    1827             :    return
    1828             :    end function get_lon_p
    1829             : !
    1830             : !========================================================================
    1831             : !
    1832     2668800 :    subroutine get_rlat_all_p(lcid, rlatdim, rlats)
    1833             : !-----------------------------------------------------------------------
    1834             : !
    1835             : ! Purpose: Return all latitudes (in radians) for chunk
    1836             : !
    1837             : ! Method:
    1838             : !
    1839             : ! Author: Patrick Worley
    1840             : !
    1841             : !-----------------------------------------------------------------------
    1842             :    use ppgrid
    1843             : !------------------------------Arguments--------------------------------
    1844             :    integer, intent(in)  :: lcid           ! local chunk id
    1845             :    integer, intent(in)  :: rlatdim        ! declared size of output array
    1846             : 
    1847             :    real(r8), intent(out) :: rlats(rlatdim)! array of latitudes
    1848             : 
    1849             : !---------------------------Local workspace-----------------------------
    1850             :    integer :: i                           ! loop index
    1851             :    integer :: cid                         ! global chunk id
    1852             : 
    1853             : !-----------------------------------------------------------------------
    1854     2668800 :    cid = lchunks(lcid)%cid
    1855    41099520 :    do i=1,chunks(cid)%ncols
    1856    41099520 :      rlats(i) = clat_p(chunks(cid)%lat(i))
    1857             :    enddo
    1858             : 
    1859     2668800 :    return
    1860             :    end subroutine get_rlat_all_p
    1861             : !
    1862             : !========================================================================
    1863             : !
    1864      145920 :    subroutine get_area_all_p(lcid, rdim, area)
    1865             : !-----------------------------------------------------------------------
    1866             : !
    1867             : ! Purpose: Return all areas for chunk
    1868             : !
    1869             : ! Method:
    1870             : !
    1871             : ! Author: Patrick Worley
    1872             : !
    1873             : !-----------------------------------------------------------------------
    1874             :    use ppgrid
    1875             : !------------------------------Arguments--------------------------------
    1876             :    integer, intent(in)  :: lcid          ! local chunk id
    1877             :    integer, intent(in)  :: rdim          ! declared size of output array
    1878             : 
    1879             :    real(r8), intent(out) :: area(rdim)   ! array of areas
    1880             : 
    1881             : !---------------------------Local workspace-----------------------------
    1882             :    integer :: i                          ! loop index
    1883             : 
    1884             : !-----------------------------------------------------------------------
    1885     2247168 :    do i=1,lchunks(lcid)%ncols
    1886     2247168 :      area(i) = lchunks(lcid)%area(i)
    1887             :    enddo
    1888             : 
    1889      145920 :    return
    1890             :    end subroutine get_area_all_p
    1891             : !
    1892             : !========================================================================
    1893             : !
    1894     3483648 :    real(r8) function get_area_p(lcid, col)
    1895             : !-----------------------------------------------------------------------
    1896             : !
    1897             : ! Purpose: Return area for chunk column
    1898             : !
    1899             : ! Method:
    1900             : !
    1901             : ! Author: Patrick Worley
    1902             : !
    1903             : !-----------------------------------------------------------------------
    1904             :    use ppgrid
    1905             : !------------------------------Arguments--------------------------------
    1906             :    integer, intent(in)  :: lcid          ! local chunk id
    1907             :    integer, intent(in)  :: col           ! column index
    1908             : 
    1909             : !-----------------------------------------------------------------------
    1910     3483648 :    get_area_p = lchunks(lcid)%area(col)
    1911             : 
    1912             :    return
    1913             :    end function get_area_p
    1914             : !
    1915             : !========================================================================
    1916             : !
    1917      917760 :    subroutine get_wght_all_p(lcid, rdim, wght)
    1918             : !-----------------------------------------------------------------------
    1919             : !
    1920             : ! Purpose: Return all integration weights for chunk
    1921             : !
    1922             : ! Method:
    1923             : !
    1924             : ! Author: Patrick Worley
    1925             : !
    1926             : !-----------------------------------------------------------------------
    1927             :    use ppgrid
    1928             : !------------------------------Arguments--------------------------------
    1929             :    integer, intent(in)  :: lcid          ! local chunk id
    1930             :    integer, intent(in)  :: rdim          ! declared size of output array
    1931             : 
    1932             :    real(r8), intent(out) :: wght(rdim)   ! array of integration weights
    1933             : 
    1934             : !---------------------------Local workspace-----------------------------
    1935             :    integer :: i                          ! loop index
    1936             : 
    1937             : !-----------------------------------------------------------------------
    1938    14133504 :    do i=1,lchunks(lcid)%ncols
    1939    14133504 :      wght(i) = lchunks(lcid)%wght(i)
    1940             :    enddo
    1941             : 
    1942      917760 :    return
    1943             :    end subroutine get_wght_all_p
    1944             : !
    1945             : !========================================================================
    1946             : !
    1947           0 :    real(r8) function get_wght_p(lcid, col)
    1948             : !-----------------------------------------------------------------------
    1949             : !
    1950             : ! Purpose: Return integration weight for chunk column
    1951             : !
    1952             : ! Method:
    1953             : !
    1954             : ! Author: Patrick Worley
    1955             : !
    1956             : !-----------------------------------------------------------------------
    1957             :    use ppgrid
    1958             : !------------------------------Arguments--------------------------------
    1959             :    integer, intent(in)  :: lcid          ! local chunk id
    1960             :    integer, intent(in)  :: col           ! column index
    1961             : 
    1962             : !-----------------------------------------------------------------------
    1963           0 :    get_wght_p = lchunks(lcid)%wght(col)
    1964             : 
    1965             :    return
    1966             :    end function get_wght_p
    1967             : !
    1968             : !========================================================================
    1969             : !
    1970           0 :    subroutine get_rlat_vec_p(lcid, lth, cols, rlats)
    1971             : !-----------------------------------------------------------------------
    1972             : !
    1973             : ! Purpose: Return latitudes (in radians) for set of chunk columns
    1974             : !
    1975             : ! Method:
    1976             : !
    1977             : ! Author: Patrick Worley
    1978             : !
    1979             : !-----------------------------------------------------------------------
    1980             :    use ppgrid
    1981             : !------------------------------Arguments--------------------------------
    1982             :    integer, intent(in)  :: lcid          ! local chunk id
    1983             :    integer, intent(in)  :: lth           ! number of column indices
    1984             :    integer, intent(in)  :: cols(lth)     ! column indices
    1985             : 
    1986             :    real(r8), intent(out) :: rlats(lth)   ! array of latitudes
    1987             : 
    1988             : !---------------------------Local workspace-----------------------------
    1989             :    integer :: i                          ! loop index
    1990             :    integer :: cid                        ! global chunk id
    1991             : 
    1992             : !-----------------------------------------------------------------------
    1993           0 :    cid = lchunks(lcid)%cid
    1994           0 :    do i=1,lth
    1995           0 :      rlats(i) = clat_p(chunks(cid)%lat(cols(i)))
    1996             :    enddo
    1997             : 
    1998           0 :    return
    1999             :    end subroutine get_rlat_vec_p
    2000             : !
    2001             : !========================================================================
    2002             : 
    2003           0 :    real(r8) function get_rlat_p(lcid, col)
    2004             : !-----------------------------------------------------------------------
    2005             : !
    2006             : ! Purpose: Return latitude (in radians) for chunk column
    2007             : !
    2008             : ! Method:
    2009             : !
    2010             : ! Author: Patrick Worley
    2011             : !
    2012             : !-----------------------------------------------------------------------
    2013             :    use ppgrid
    2014             : !------------------------------Arguments--------------------------------
    2015             :    integer, intent(in)  :: lcid          ! local chunk id
    2016             :    integer, intent(in)  :: col           ! column index
    2017             : 
    2018             : !---------------------------Local workspace-----------------------------
    2019             :    integer :: cid                        ! global chunk id
    2020             : 
    2021             : !-----------------------------------------------------------------------
    2022           0 :    cid = lchunks(lcid)%cid
    2023           0 :    get_rlat_p = clat_p(chunks(cid)%lat(col))
    2024             : 
    2025             :    return
    2026             :    end function get_rlat_p
    2027             : !
    2028             : !========================================================================
    2029             : !
    2030     2522880 :    subroutine get_rlon_all_p(lcid, rlondim, rlons)
    2031             : !-----------------------------------------------------------------------
    2032             : !
    2033             : ! Purpose: Return all longitudes (in radians) for chunk
    2034             : !
    2035             : ! Method:
    2036             : !
    2037             : ! Author: Patrick Worley
    2038             : !
    2039             : !-----------------------------------------------------------------------
    2040             :    use ppgrid
    2041             : !------------------------------Arguments--------------------------------
    2042             :    integer, intent(in)  :: lcid           ! local chunk id
    2043             :    integer, intent(in)  :: rlondim        ! declared size of output array
    2044             : 
    2045             :    real(r8), intent(out) :: rlons(rlondim)! array of longitudes
    2046             : 
    2047             : !---------------------------Local workspace-----------------------------
    2048             :    integer :: i                           ! loop index
    2049             :    integer :: cid                         ! global chunk id
    2050             : 
    2051             : !-----------------------------------------------------------------------
    2052     2522880 :    cid = lchunks(lcid)%cid
    2053    38852352 :    do i=1,chunks(cid)%ncols
    2054    38852352 :      rlons(i) = clon_p(chunks(cid)%lon(i))
    2055             :    enddo
    2056             : 
    2057     2522880 :    return
    2058             :    end subroutine get_rlon_all_p
    2059             : !
    2060             : !========================================================================
    2061             : 
    2062           0 :    subroutine get_rlon_vec_p(lcid, lth, cols, rlons)
    2063             : !-----------------------------------------------------------------------
    2064             : !
    2065             : ! Purpose: Return longitudes (in radians) for set of chunk columns
    2066             : !
    2067             : ! Method:
    2068             : !
    2069             : ! Author: Patrick Worley
    2070             : !
    2071             : !-----------------------------------------------------------------------
    2072             :    use ppgrid
    2073             : !------------------------------Arguments--------------------------------
    2074             :    integer, intent(in)  :: lcid         ! local chunk id
    2075             :    integer, intent(in)  :: lth           ! number of column indices
    2076             :    integer, intent(in)  :: cols(lth)     ! column indices
    2077             : 
    2078             :    real(r8), intent(out) :: rlons(lth)   ! array of longitudes
    2079             : 
    2080             : !---------------------------Local workspace-----------------------------
    2081             :    integer :: i                          ! loop index
    2082             :    integer :: cid                        ! global chunk id
    2083             : 
    2084             : !-----------------------------------------------------------------------
    2085           0 :    cid = lchunks(lcid)%cid
    2086           0 :    do i=1,lth
    2087           0 :      rlons(i) = clon_p(chunks(cid)%lon(cols(i)))
    2088             :    enddo
    2089             : 
    2090           0 :    return
    2091             :    end subroutine get_rlon_vec_p
    2092             : !
    2093             : !========================================================================
    2094             : 
    2095           0 :    real(r8) function get_rlon_p(lcid, col)
    2096             : !-----------------------------------------------------------------------
    2097             : !
    2098             : ! Purpose: Return longitude (in radians) for chunk column
    2099             : !
    2100             : ! Method:
    2101             : !
    2102             : ! Author: Patrick Worley
    2103             : !
    2104             : !-----------------------------------------------------------------------
    2105             :    use ppgrid
    2106             : !------------------------------Arguments--------------------------------
    2107             :    integer, intent(in)  :: lcid          ! local chunk id
    2108             :    integer, intent(in)  :: col           ! column index
    2109             : 
    2110             : !---------------------------Local workspace-----------------------------
    2111             :    integer :: cid                        ! global chunk id
    2112             : 
    2113             : !-----------------------------------------------------------------------
    2114           0 :    cid = lchunks(lcid)%cid
    2115           0 :    get_rlon_p = clon_p(chunks(cid)%lon(col))
    2116             : 
    2117             :    return
    2118             :    end function get_rlon_p
    2119             : !
    2120             : !========================================================================
    2121             : !
    2122             : 
    2123           0 :    subroutine scatter_field_to_chunk(fdim,mdim,ldim, &
    2124           0 :                                      hdim1d,globalfield,localchunks)
    2125             : !-----------------------------------------------------------------------
    2126             : !
    2127             : ! Purpose: Distribute field
    2128             : !          to decomposed chunk data structure
    2129             : !
    2130             : ! Method:
    2131             : !
    2132             : ! Author: Patrick Worley
    2133             : !
    2134             : 
    2135             : !------------------------------Arguments--------------------------------
    2136             :    integer, intent(in) :: fdim      ! declared length of first dimension
    2137             :    integer, intent(in) :: mdim      ! declared length of middle dimension
    2138             :    integer, intent(in) :: ldim      ! declared length of last dimension
    2139             :    integer, intent(in) :: hdim1d    ! declared first horizontal index
    2140             :                                     ! dimension
    2141             :    real(r8), intent(in) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim)
    2142             :                                     ! global field
    2143             : 
    2144             :    real(r8), intent(out):: localchunks(fdim,pcols,mdim, &
    2145             :                                        begchunk:endchunk,ldim)
    2146             :                                     ! local chunks
    2147             : 
    2148             : !---------------------------Local workspace-----------------------------
    2149             :    integer :: f,i,m,l,p                  ! loop indices
    2150             :    integer :: cid                        ! global chunk id
    2151             :    integer :: lcid                       ! local chunk id
    2152             :    integer :: lid                        ! local column index
    2153             :    integer :: gcol                       ! global column index
    2154             :    integer :: h1                         ! first horizontal dimension index
    2155             :    integer :: h2                         ! second horizontal dimension index
    2156             : 
    2157             : #if ( defined SPMD )
    2158           0 :    real(r8) gfield_p(fdim,mdim,ldim,ngcols)
    2159             :                                          ! vector to be scattered
    2160           0 :    real(r8) lfield_p(fdim,mdim,ldim,nlcols)
    2161             :                                          ! local component of scattered
    2162             :                                          !  vector
    2163           0 :    integer :: displs(0:npes-1)           ! scatter displacements
    2164           0 :    integer :: sndcnts(0:npes-1)          ! scatter send counts
    2165             :    integer :: recvcnt                    ! scatter receive count
    2166             :    integer :: beglcol                    ! beginning index for local columns
    2167             :                                          !  in global column ordering
    2168             : #endif
    2169             : 
    2170             : !-----------------------------------------------------------------------
    2171           0 :    if (hdim1d < hdim1_d) then
    2172           0 :       write(iulog,*) __FILE__,__LINE__,hdim1d,hdim1_d
    2173           0 :       call endrun ('SCATTER_FIELD_TO_CHUNK error: hdim1d < hdim1_d')
    2174             :    endif
    2175           0 :    localchunks(:,:,:,:,:) = 0
    2176             : #if ( defined SPMD )
    2177           0 :    displs(0) = 0
    2178           0 :    sndcnts(0) = fdim*mdim*ldim*gs_col_num(0)
    2179           0 :    beglcol = 0
    2180           0 :    do p=1,npes-1
    2181           0 :      displs(p) = displs(p-1) + sndcnts(p-1)
    2182           0 :      sndcnts(p) = fdim*mdim*ldim*gs_col_num(p)
    2183           0 :      if (p <= iam) then
    2184           0 :         beglcol = beglcol + gs_col_num(p-1)
    2185             :      endif
    2186             :    enddo
    2187           0 :    recvcnt = fdim*mdim*ldim*nlcols
    2188             : 
    2189           0 :    if (masterproc) then
    2190             : 
    2191             : ! copy field into global (process-ordered) chunked data structure
    2192             : 
    2193           0 :       do l=1,ldim
    2194           0 :          do i=1,num_global_phys_cols
    2195           0 :             cid  = pgcols(i)%chunk
    2196           0 :             lid  = pgcols(i)%ccol
    2197           0 :             gcol = chunks(cid)%gcol(lid)
    2198           0 :             h2   = (gcol-1)/hdim1_d + 1
    2199           0 :             h1   = mod((gcol-1),hdim1_d) + 1
    2200           0 :             do m=1,mdim
    2201           0 :                do f=1,fdim
    2202           0 :                   gfield_p(f,m,l,i) = &
    2203           0 :                      globalfield(f, h1, m, h2, l)
    2204             :                end do
    2205             :             end do
    2206             :          end do
    2207             :       end do
    2208             :    endif
    2209             : 
    2210             : ! scatter to other processes
    2211             : ! (pgcols ordering consistent with begchunk:endchunk
    2212             : ! local ordering)
    2213             : 
    2214           0 :    call t_barrierf('sync_scat_ftoc', mpicom)
    2215             :    call mpiscatterv(gfield_p, sndcnts, displs, mpir8, &
    2216           0 :                     lfield_p, recvcnt, mpir8, 0, mpicom)
    2217             : 
    2218             : ! copy into local chunked data structure
    2219             : 
    2220           0 :    do i=1,nlcols
    2221           0 :       cid = pgcols(beglcol+i)%chunk
    2222           0 :       lcid = chunks(cid)%lcid
    2223           0 :       lid = pgcols(beglcol+i)%ccol
    2224           0 :       do l=1,ldim
    2225           0 :          do m=1,mdim
    2226           0 :             do f=1,fdim
    2227           0 :                localchunks(f,lid,m,lcid,l) = &
    2228           0 :                  lfield_p(f, m, l, i)
    2229             :             end do
    2230             :          end do
    2231             :       end do
    2232             :    end do
    2233             : #else
    2234             : 
    2235             : ! copy field into chunked data structure
    2236             : ! (pgcol ordering chosen to reflect begchunk:endchunk
    2237             : !  local ordering)
    2238             : 
    2239             :    do l=1,ldim
    2240             :       do i=1,num_global_phys_cols
    2241             :          cid  = pgcols(i)%chunk
    2242             :          lcid = chunks(cid)%lcid
    2243             :          lid  = pgcols(i)%ccol
    2244             :          gcol = chunks(cid)%gcol(lid)
    2245             :          h2   = (gcol-1)/hdim1_d + 1
    2246             :          h1   = mod((gcol-1),hdim1_d) + 1
    2247             :          do m=1,mdim
    2248             :             do f=1,fdim
    2249             :                localchunks(f,lid,m,lcid,l) = &
    2250             :                   globalfield(f, h1, m, h2, l)
    2251             :             end do
    2252             :          end do
    2253             :       end do
    2254             :    end do
    2255             : 
    2256             : #endif
    2257             : 
    2258           0 :    return
    2259             :    end subroutine scatter_field_to_chunk
    2260             : !========================================================================
    2261             : 
    2262           0 :    subroutine scatter_field_to_chunk4(fdim,mdim,ldim, &
    2263           0 :                                       hdim1d,globalfield,localchunks)
    2264             : !-----------------------------------------------------------------------
    2265             : !
    2266             : ! Purpose: Distribute field
    2267             : !          to decomposed chunk data structure
    2268             : !
    2269             : ! Method:
    2270             : !
    2271             : ! Author: Patrick Worley
    2272             : !
    2273             : !-----------------------------------------------------------------------
    2274             : !------------------------------Arguments--------------------------------
    2275             :    integer, intent(in) :: fdim      ! declared length of first dimension
    2276             :    integer, intent(in) :: mdim      ! declared length of middle dimension
    2277             :    integer, intent(in) :: ldim      ! declared length of last dimension
    2278             :    integer, intent(in) :: hdim1d    ! declared first horizontal index
    2279             :                                     ! dimension
    2280             :    real(r4), intent(in) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim)
    2281             :                                     ! global field
    2282             : 
    2283             :    real(r4), intent(out):: localchunks(fdim,pcols,mdim, &
    2284             :                                        begchunk:endchunk,ldim)
    2285             :                                     ! local chunks
    2286             : 
    2287             : !---------------------------Local workspace-----------------------------
    2288             :    integer :: f,i,m,l,p                  ! loop indices
    2289             :    integer :: cid                        ! global chunk id
    2290             :    integer :: lcid                       ! local chunk id
    2291             :    integer :: lid                        ! local column index
    2292             :    integer :: gcol                       ! global column index
    2293             :    integer :: h1                         ! first horizontal dimension index
    2294             :    integer :: h2                         ! second horizontal dimension index
    2295             : 
    2296             : #if ( defined SPMD )
    2297           0 :    real(r4) gfield_p(fdim,mdim,ldim,ngcols)
    2298             :                                          ! vector to be scattered
    2299           0 :    real(r4) lfield_p(fdim,mdim,ldim,nlcols)
    2300             :                                          ! local component of scattered
    2301             :                                          !  vector
    2302           0 :    integer :: displs(0:npes-1)           ! scatter displacements
    2303           0 :    integer :: sndcnts(0:npes-1)          ! scatter send counts
    2304             :    integer :: recvcnt                    ! scatter receive count
    2305             :    integer :: beglcol                    ! beginning index for local columns
    2306             :                                          !  in global column ordering
    2307             : #endif
    2308             : 
    2309             : !-----------------------------------------------------------------------
    2310           0 :    if (hdim1d < hdim1_d) then
    2311           0 :       call endrun ('SCATTER_FIELD_TO_CHUNK4 error: hdim1d < hdim1_d')
    2312             :    endif
    2313             : #if ( defined SPMD )
    2314           0 :    displs(0) = 0
    2315           0 :    sndcnts(0) = fdim*mdim*ldim*gs_col_num(0)
    2316           0 :    beglcol = 0
    2317           0 :    do p=1,npes-1
    2318           0 :      displs(p) = displs(p-1) + sndcnts(p-1)
    2319           0 :      sndcnts(p) = fdim*mdim*ldim*gs_col_num(p)
    2320           0 :      if (p <= iam) then
    2321           0 :         beglcol = beglcol + gs_col_num(p-1)
    2322             :      endif
    2323             :    enddo
    2324           0 :    recvcnt = fdim*mdim*ldim*nlcols
    2325             : 
    2326           0 :    if (masterproc) then
    2327             :       ! copy field into global (process-ordered) chunked data structure
    2328           0 :       do l=1,ldim
    2329           0 :          do i=1,num_global_phys_cols
    2330           0 :             cid  = pgcols(i)%chunk
    2331           0 :             lid  = pgcols(i)%ccol
    2332           0 :             gcol = chunks(cid)%gcol(lid)
    2333           0 :             h2   = (gcol-1)/hdim1_d + 1
    2334           0 :             h1   = mod((gcol-1),hdim1_d) + 1
    2335           0 :             do m=1,mdim
    2336           0 :                do f=1,fdim
    2337           0 :                   gfield_p(f,m,l,i) = &
    2338           0 :                      globalfield(f, h1, m, h2, l)
    2339             :                end do
    2340             :             end do
    2341             :          end do
    2342             :       end do
    2343             :    endif
    2344             : 
    2345             : ! scatter to other processes
    2346             : ! (pgcols ordering consistent with begchunk:endchunk
    2347             : !  local ordering)
    2348             : 
    2349           0 :    call t_barrierf('sync_scat_ftoc', mpicom)
    2350             :    call mpiscatterv(gfield_p, sndcnts, displs, mpir4, &
    2351           0 :                     lfield_p, recvcnt, mpir4, 0, mpicom)
    2352             : 
    2353             : ! copy into local chunked data structure
    2354             : 
    2355           0 :    do i=1,nlcols
    2356           0 :       cid = pgcols(beglcol+i)%chunk
    2357           0 :       lcid = chunks(cid)%lcid
    2358           0 :       lid = pgcols(beglcol+i)%ccol
    2359           0 :       do l=1,ldim
    2360           0 :          do m=1,mdim
    2361           0 :             do f=1,fdim
    2362           0 :                localchunks(f,lid,m,lcid,l) = &
    2363           0 :                  lfield_p(f, m, l, i)
    2364             :             end do
    2365             :          end do
    2366             :       end do
    2367             :    end do
    2368             : #else
    2369             : 
    2370             :    ! copy field into chunked data structure
    2371             :    ! (pgcol ordering chosen to reflect begchunk:endchunk
    2372             :    !  local ordering)
    2373             :    do l=1,ldim
    2374             :       do i=1,num_global_phys_cols
    2375             :          cid  = pgcols(i)%chunk
    2376             :          lcid = chunks(cid)%lcid
    2377             :          lid  = pgcols(i)%ccol
    2378             :          gcol = chunks(cid)%gcol(lid)
    2379             :          h2   = (gcol-1)/hdim1_d + 1
    2380             :          h1   = mod((gcol-1),hdim1_d) + 1
    2381             :          do m=1,mdim
    2382             :             do f=1,fdim
    2383             :                localchunks(f,lid,m,lcid,l) = &
    2384             :                   globalfield(f, h1, m, h2, l)
    2385             :             end do
    2386             :          end do
    2387             :       end do
    2388             :    end do
    2389             : 
    2390             : #endif
    2391             : 
    2392           0 :    return
    2393             :    end subroutine scatter_field_to_chunk4
    2394             : !========================================================================
    2395             : 
    2396           0 :    subroutine scatter_field_to_chunk_int(fdim,mdim,ldim, &
    2397           0 :                                          hdim1d,globalfield,localchunks)
    2398             : !-----------------------------------------------------------------------
    2399             : !
    2400             : ! Purpose: Distribute field
    2401             : !          to decomposed chunk data structure
    2402             : !
    2403             : ! Method:
    2404             : !
    2405             : ! Author: Patrick Worley
    2406             : !
    2407             : !------------------------------Arguments--------------------------------
    2408             :    integer, intent(in) :: fdim      ! declared length of first dimension
    2409             :    integer, intent(in) :: mdim      ! declared length of middle dimension
    2410             :    integer, intent(in) :: ldim      ! declared length of last dimension
    2411             :    integer, intent(in) :: hdim1d    ! declared first horizontal index
    2412             :                                     ! dimension
    2413             :    integer, intent(in) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim)
    2414             :                                     ! global field
    2415             : 
    2416             :    integer, intent(out):: localchunks(fdim,pcols,mdim, &
    2417             :                                        begchunk:endchunk,ldim)
    2418             :                                     ! local chunks
    2419             : 
    2420             : !---------------------------Local workspace-----------------------------
    2421             :    integer :: f,i,m,l,p                  ! loop indices
    2422             :    integer :: cid                        ! global chunk id
    2423             :    integer :: lcid                       ! local chunk id
    2424             :    integer :: lid                        ! local column index
    2425             :    integer :: gcol                       ! global column index
    2426             :    integer :: h1                         ! first horizontal dimension index
    2427             :    integer :: h2                         ! second horizontal dimension index
    2428             : 
    2429             : #if ( defined SPMD )
    2430           0 :    integer gfield_p(fdim,mdim,ldim,ngcols)
    2431             :                                          ! vector to be scattered
    2432           0 :    integer lfield_p(fdim,mdim,ldim,nlcols)
    2433             :                                          ! local component of scattered
    2434             :                                          !  vector
    2435           0 :    integer :: displs(0:npes-1)           ! scatter displacements
    2436           0 :    integer :: sndcnts(0:npes-1)          ! scatter send counts
    2437             :    integer :: recvcnt                    ! scatter receive count
    2438             :    integer :: beglcol                    ! beginning index for local columns
    2439             :                                          !  in global column ordering
    2440             : #endif
    2441             : 
    2442             : !-----------------------------------------------------------------------
    2443           0 :    if (hdim1d < hdim1_d) then
    2444           0 :       call endrun ('SCATTER_FIELD_TO_CHUNK_INT error: hdim1d < hdim1_d')
    2445             :    endif
    2446             : #if ( defined SPMD )
    2447           0 :    displs(0) = 0
    2448           0 :    sndcnts(0) = fdim*mdim*ldim*gs_col_num(0)
    2449           0 :    beglcol = 0
    2450           0 :    do p=1,npes-1
    2451           0 :      displs(p) = displs(p-1) + sndcnts(p-1)
    2452           0 :      sndcnts(p) = fdim*mdim*ldim*gs_col_num(p)
    2453           0 :      if (p <= iam) then
    2454           0 :         beglcol = beglcol + gs_col_num(p-1)
    2455             :      endif
    2456             :    enddo
    2457           0 :    recvcnt = fdim*mdim*ldim*nlcols
    2458             : 
    2459           0 :    if (masterproc) then
    2460             : 
    2461             : ! copy field into global (process-ordered) chunked data structure
    2462             : 
    2463           0 :       do l=1,ldim
    2464           0 :          do i=1,num_global_phys_cols
    2465           0 :             cid = pgcols(i)%chunk
    2466           0 :             lid = pgcols(i)%ccol
    2467           0 :             gcol = chunks(cid)%gcol(lid)
    2468           0 :             h2   = (gcol-1)/hdim1_d + 1
    2469           0 :             h1   = mod((gcol-1),hdim1_d) + 1
    2470           0 :             do m=1,mdim
    2471           0 :                do f=1,fdim
    2472           0 :                   gfield_p(f,m,l,i) = &
    2473           0 :                      globalfield(f, h1, m, h2, l)
    2474             :                end do
    2475             :             end do
    2476             :          end do
    2477             :       end do
    2478             :    endif
    2479             : 
    2480             : ! scatter to other processes
    2481             : ! (pgcols ordering consistent with begchunk:endchunk
    2482             : !  local ordering)
    2483             : 
    2484           0 :    call t_barrierf('sync_scat_ftoc', mpicom)
    2485             :    call mpiscatterv(gfield_p, sndcnts, displs, mpiint, &
    2486           0 :                     lfield_p, recvcnt, mpiint, 0, mpicom)
    2487             : 
    2488             : ! copy into local chunked data structure
    2489             : 
    2490           0 :    do i=1,nlcols
    2491           0 :       cid = pgcols(beglcol+i)%chunk
    2492           0 :       lcid = chunks(cid)%lcid
    2493           0 :       lid = pgcols(beglcol+i)%ccol
    2494           0 :       do l=1,ldim
    2495           0 :          do m=1,mdim
    2496           0 :             do f=1,fdim
    2497           0 :                localchunks(f,lid,m,lcid,l) = &
    2498           0 :                  lfield_p(f, m, l, i)
    2499             :             end do
    2500             :          end do
    2501             :       end do
    2502             :    end do
    2503             : #else
    2504             : 
    2505             : ! copy field into chunked data structure
    2506             : ! (pgcol ordering chosen to reflect begchunk:endchunk
    2507             : !  local ordering)
    2508             :    do l=1,ldim
    2509             :       do i=1,num_global_phys_cols
    2510             :          cid  = pgcols(i)%chunk
    2511             :          lcid = chunks(cid)%lcid
    2512             :          lid  = pgcols(i)%ccol
    2513             :          gcol = chunks(cid)%gcol(lid)
    2514             :          h2   = (gcol-1)/hdim1_d + 1
    2515             :          h1   = mod((gcol-1),hdim1_d) + 1
    2516             :          do m=1,mdim
    2517             :             do f=1,fdim
    2518             :                localchunks(f,lid,m,lcid,l) = &
    2519             :                   globalfield(f, h1, m, h2, l)
    2520             :             end do
    2521             :          end do
    2522             :       end do
    2523             :    end do
    2524             : 
    2525             : #endif
    2526             : 
    2527           0 :    return
    2528             :    end subroutine scatter_field_to_chunk_int
    2529             : !
    2530             : !========================================================================
    2531             : !
    2532           0 :    subroutine gather_chunk_to_field(fdim,mdim,ldim, &
    2533           0 :                                      hdim1d,localchunks,globalfield)
    2534             : 
    2535             : !-----------------------------------------------------------------------
    2536             : !
    2537             : ! Purpose: Reconstruct field
    2538             : !          from decomposed chunk data structure
    2539             : !
    2540             : ! Method:
    2541             : !
    2542             : ! Author: Patrick Worley
    2543             : !
    2544             : !-----------------------------------------------------------------------
    2545             : #if ( defined SPMD )
    2546             :    use spmd_utils,    only: fc_gatherv
    2547             : #endif
    2548             : !------------------------------Arguments--------------------------------
    2549             :    integer, intent(in) :: fdim      ! declared length of first dimension
    2550             :    integer, intent(in) :: mdim      ! declared length of middle dimension
    2551             :    integer, intent(in) :: ldim      ! declared length of last dimension
    2552             :    integer, intent(in) :: hdim1d    ! declared first horizontal index
    2553             :                                     ! dimension
    2554             :    real(r8), intent(in):: localchunks(fdim,pcols,mdim, &
    2555             :                                       begchunk:endchunk,ldim)
    2556             :                                     ! local chunks
    2557             : 
    2558             :    real(r8), intent(out) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim)
    2559             :                                     ! global field
    2560             : 
    2561             : !---------------------------Local workspace-----------------------------
    2562             :    integer :: f,i,m,l,p                  ! loop indices
    2563             :    integer :: cid                        ! global chunk id
    2564             :    integer :: lcid                       ! local chunk id
    2565             :    integer :: lid                        ! local column index
    2566             :    integer :: gcol                       ! global column index
    2567             :    integer :: h1                         ! first horizontal dimension index
    2568             :    integer :: h2                         ! second horizontal dimension index
    2569             : 
    2570             : #if ( defined SPMD )
    2571           0 :    real(r8) gfield_p(fdim,mdim,ldim,ngcols)
    2572             :                                          ! vector to be gathered
    2573           0 :    real(r8) lfield_p(fdim,mdim,ldim,nlcols)
    2574             :                                          ! local component of gather
    2575             :                                          !  vector
    2576           0 :    integer :: displs(0:npes-1)           ! gather displacements
    2577           0 :    integer :: rcvcnts(0:npes-1)          ! gather receive count
    2578             :    integer :: sendcnt                    ! gather send counts
    2579             :    integer :: beglcol                    ! beginning index for local columns
    2580             :                                          !  in global column ordering
    2581             : #endif
    2582             : 
    2583             : !-----------------------------------------------------------------------
    2584           0 :    if (hdim1d < hdim1_d) then
    2585           0 :       call endrun ('GATHER_CHUNK_TO_FIELD error: hdim1d < hdim1_d')
    2586             :    endif
    2587             : #if ( defined SPMD )
    2588           0 :    displs(0) = 0
    2589           0 :    rcvcnts(0) = fdim*mdim*ldim*gs_col_num(0)
    2590           0 :    beglcol = 0
    2591           0 :    do p=1,npes-1
    2592           0 :      displs(p) = displs(p-1) + rcvcnts(p-1)
    2593           0 :      rcvcnts(p) = fdim*mdim*ldim*gs_col_num(p)
    2594           0 :      if (p <= iam) then
    2595           0 :         beglcol = beglcol + gs_col_num(p-1)
    2596             :      endif
    2597             :    enddo
    2598           0 :    sendcnt = fdim*mdim*ldim*nlcols
    2599             : 
    2600             : ! copy into local gather data structure
    2601             : 
    2602           0 :    do l=1,ldim
    2603           0 :       do i=1,nlcols
    2604           0 :          cid = pgcols(beglcol+i)%chunk
    2605           0 :          lcid = chunks(cid)%lcid
    2606           0 :          lid = pgcols(beglcol+i)%ccol
    2607           0 :          do m=1,mdim
    2608           0 :             do f=1,fdim
    2609           0 :                lfield_p(f, m, l, i) = &
    2610           0 :                   localchunks(f,lid,m,lcid,l)
    2611             :             end do
    2612             :          end do
    2613             :       end do
    2614             :    end do
    2615             : 
    2616             : ! gather from other processes
    2617             : 
    2618           0 :    call t_barrierf('sync_gath_ctof', mpicom)
    2619             :    call fc_gatherv(lfield_p, sendcnt, mpir8, &
    2620           0 :                    gfield_p, rcvcnts, displs, mpir8, 0, mpicom)
    2621             : 
    2622           0 :    if (masterproc) then
    2623             : 
    2624             : ! copy gathered columns into lon/lat field
    2625             : 
    2626           0 :       do i=1,num_global_phys_cols
    2627           0 :          cid  = pgcols(i)%chunk
    2628           0 :          lid  = pgcols(i)%ccol
    2629           0 :          gcol = chunks(cid)%gcol(lid)
    2630           0 :          h2   = (gcol-1)/hdim1_d + 1
    2631           0 :          h1   = mod((gcol-1),hdim1_d) + 1
    2632           0 :          do l=1,ldim
    2633           0 :             do m=1,mdim
    2634           0 :                do f=1,fdim
    2635           0 :                   globalfield(f, h1, m, h2, l)    &
    2636           0 :                   = gfield_p(f,m,l,i)
    2637             :                end do
    2638             :             end do
    2639             :          end do
    2640             :       end do
    2641             :    endif
    2642           0 :    call mpibarrier(mpicom)
    2643             : #else
    2644             : 
    2645             :    ! copy chunked data structure into dynamics field
    2646             :    ! (pgcol ordering chosen to reflect begchunk:endchunk
    2647             :    !  local ordering)
    2648             :    do l=1,ldim
    2649             :       do i=1,num_global_phys_cols
    2650             :          cid  = pgcols(i)%chunk
    2651             :          lcid = chunks(cid)%lcid
    2652             :          lid  = pgcols(i)%ccol
    2653             :          gcol = chunks(cid)%gcol(lid)
    2654             :          h2   = (gcol-1)/hdim1_d + 1
    2655             :          h1   = mod((gcol-1),hdim1_d) + 1
    2656             :          do m=1,mdim
    2657             :             do f=1,fdim
    2658             :                globalfield(f, h1, m, h2, l)    &
    2659             :                = localchunks(f,lid,m,lcid,l)
    2660             :             end do
    2661             :          end do
    2662             :       end do
    2663             :    end do
    2664             : 
    2665             : #endif
    2666             : 
    2667           0 :    return
    2668             :    end subroutine gather_chunk_to_field
    2669             : 
    2670             : !
    2671             : !========================================================================
    2672             : !
    2673           0 :    subroutine gather_chunk_to_field4 (fdim,mdim,ldim, &
    2674           0 :                                       hdim1d,localchunks,globalfield)
    2675             : 
    2676             : !-----------------------------------------------------------------------
    2677             : !
    2678             : ! Purpose: Reconstruct field
    2679             : !          from decomposed chunk data structure
    2680             : !
    2681             : ! Method:
    2682             : !
    2683             : ! Author: Patrick Worley
    2684             : !
    2685             : !-----------------------------------------------------------------------
    2686             : #if ( defined SPMD )
    2687             :    use spmd_utils,    only: fc_gathervr4
    2688             : #endif
    2689             : !------------------------------Arguments--------------------------------
    2690             :    integer, intent(in) :: fdim      ! declared length of first dimension
    2691             :    integer, intent(in) :: mdim      ! declared length of middle dimension
    2692             :    integer, intent(in) :: ldim      ! declared length of last dimension
    2693             :    integer, intent(in) :: hdim1d    ! declared first horizontal index
    2694             :                                     ! dimension
    2695             :    real(r4), intent(in):: localchunks(fdim,pcols,mdim, &
    2696             :                                       begchunk:endchunk,ldim)
    2697             :                                     ! local chunks
    2698             : 
    2699             :    real(r4), intent(out) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim)
    2700             :                                     ! global field
    2701             : 
    2702             : !---------------------------Local workspace-----------------------------
    2703             :    integer :: f,i,m,l,p                  ! loop indices
    2704             :    integer :: cid                        ! global chunk id
    2705             :    integer :: lcid                       ! local chunk id
    2706             :    integer :: lid                        ! local column index
    2707             :    integer :: gcol                       ! global column index
    2708             :    integer :: h1                         ! first horizontal dimension index
    2709             :    integer :: h2                         ! second horizontal dimension index
    2710             : 
    2711             : #if ( defined SPMD )
    2712           0 :    real(r4) gfield_p(fdim,mdim,ldim,ngcols)
    2713             :                                          ! vector to be gathered
    2714           0 :    real(r4) lfield_p(fdim,mdim,ldim,nlcols)
    2715             :                                          ! local component of gather
    2716             :                                          !  vector
    2717           0 :    integer :: displs(0:npes-1)           ! gather displacements
    2718           0 :    integer :: rcvcnts(0:npes-1)          ! gather receive count
    2719             :    integer :: sendcnt                    ! gather send counts
    2720             :    integer :: beglcol                    ! beginning index for local columns
    2721             :                                          !  in global column ordering
    2722             : #endif
    2723             : 
    2724             : !-----------------------------------------------------------------------
    2725           0 :    if (hdim1d < hdim1_d) then
    2726           0 :       call endrun ('GATHER_CHUNK_TO_FIELD4 error: hdim1d < hdim1_d')
    2727             :    endif
    2728             : #if ( defined SPMD )
    2729           0 :    displs(0) = 0
    2730           0 :    rcvcnts(0) = fdim*mdim*ldim*gs_col_num(0)
    2731           0 :    beglcol = 0
    2732           0 :    do p=1,npes-1
    2733           0 :      displs(p) = displs(p-1) + rcvcnts(p-1)
    2734           0 :      rcvcnts(p) = fdim*mdim*ldim*gs_col_num(p)
    2735           0 :      if (p <= iam) then
    2736           0 :         beglcol = beglcol + gs_col_num(p-1)
    2737             :      endif
    2738             :    enddo
    2739           0 :    sendcnt = fdim*mdim*ldim*nlcols
    2740             : 
    2741             : ! copy into local gather data structure
    2742             : 
    2743           0 :    do l=1,ldim
    2744           0 :       do i=1,nlcols
    2745           0 :          cid = pgcols(beglcol+i)%chunk
    2746           0 :          lcid = chunks(cid)%lcid
    2747           0 :          lid = pgcols(beglcol+i)%ccol
    2748           0 :          do m=1,mdim
    2749           0 :             do f=1,fdim
    2750           0 :                lfield_p(f, m, l, i) = &
    2751           0 :                   localchunks(f,lid,m,lcid,l)
    2752             :             end do
    2753             :          end do
    2754             :       end do
    2755             :    end do
    2756             : 
    2757             : ! gather from other processes
    2758             : 
    2759           0 :    call t_barrierf('sync_gath_ctof', mpicom)
    2760             :    call fc_gathervr4(lfield_p, sendcnt, mpir4, &
    2761           0 :                      gfield_p, rcvcnts, displs, mpir4, 0, mpicom)
    2762             : 
    2763           0 :    if (masterproc) then
    2764             : 
    2765             : ! copy gathered columns into lon/lat field
    2766             : 
    2767           0 :       do i=1,num_global_phys_cols
    2768           0 :          cid  = pgcols(i)%chunk
    2769           0 :          lid  = pgcols(i)%ccol
    2770           0 :          gcol = chunks(cid)%gcol(lid)
    2771           0 :          h2   = (gcol-1)/hdim1_d + 1
    2772           0 :          h1   = mod((gcol-1),hdim1_d) + 1
    2773           0 :          do l=1,ldim
    2774           0 :             do m=1,mdim
    2775           0 :                do f=1,fdim
    2776           0 :                   globalfield(f, h1, m, h2, l)    &
    2777           0 :                   = gfield_p(f,m,l,i)
    2778             :                end do
    2779             :             end do
    2780             :          end do
    2781             :       end do
    2782             :    endif
    2783             : 
    2784             : #else
    2785             : 
    2786             : ! copy chunked data structure into dynamics field
    2787             : ! (pgcol ordering chosen to reflect begchunk:endchunk
    2788             : !  local ordering)
    2789             : 
    2790             :    do l=1,ldim
    2791             :       do i=1,num_global_phys_cols
    2792             :          cid  = pgcols(i)%chunk
    2793             :          lcid = chunks(cid)%lcid
    2794             :          lid  = pgcols(i)%ccol
    2795             :          gcol = chunks(cid)%gcol(lid)
    2796             :          h2   = (gcol-1)/hdim1_d + 1
    2797             :          h1   = mod((gcol-1),hdim1_d) + 1
    2798             :          do m=1,mdim
    2799             :             do f=1,fdim
    2800             :                globalfield(f, h1, m, h2, l)    &
    2801             :                = localchunks(f,lid,m,lcid,l)
    2802             :             end do
    2803             :          end do
    2804             :       end do
    2805             :    end do
    2806             : 
    2807             : #endif
    2808             : 
    2809           0 :    return
    2810             :    end subroutine gather_chunk_to_field4
    2811             : 
    2812             : !
    2813             : !========================================================================
    2814             : !
    2815           0 :    subroutine gather_chunk_to_field_int (fdim,mdim,ldim, &
    2816           0 :                                          hdim1d,localchunks,globalfield)
    2817             : 
    2818             : !-----------------------------------------------------------------------
    2819             : !
    2820             : ! Purpose: Reconstruct field
    2821             : !          from decomposed chunk data structure
    2822             : !
    2823             : ! Method:
    2824             : !
    2825             : ! Author: Patrick Worley
    2826             : !
    2827             : !-----------------------------------------------------------------------
    2828             : #if ( defined SPMD )
    2829             :    use spmd_utils,    only: fc_gathervint
    2830             : #endif
    2831             : !------------------------------Arguments--------------------------------
    2832             :    integer, intent(in) :: fdim      ! declared length of first dimension
    2833             :    integer, intent(in) :: mdim      ! declared length of middle dimension
    2834             :    integer, intent(in) :: ldim      ! declared length of last dimension
    2835             :    integer, intent(in) :: hdim1d    ! declared first horizontal index
    2836             :                                     ! dimension
    2837             :    integer, intent(in):: localchunks(fdim,pcols,mdim,begchunk:endchunk,ldim) ! local chunks
    2838             : 
    2839             :    integer, intent(out) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim) ! global field
    2840             : 
    2841             : !---------------------------Local workspace-----------------------------
    2842             : 
    2843             :    integer :: f,i,m,l,p                  ! loop indices
    2844             :    integer :: cid                        ! global chunk id
    2845             :    integer :: lcid                       ! local chunk id
    2846             :    integer :: lid                        ! local column index
    2847             :    integer :: gcol                       ! global column index
    2848             :    integer :: h1                         ! first horizontal dimension index
    2849             :    integer :: h2                         ! second horizontal dimension index
    2850             : 
    2851             : #if ( defined SPMD )
    2852           0 :    integer gfield_p(fdim,mdim,ldim,ngcols)
    2853             :                                          ! vector to be gathered
    2854           0 :    integer lfield_p(fdim,mdim,ldim,nlcols)
    2855             :                                          ! local component of gather
    2856             :                                          !  vector
    2857           0 :    integer :: displs(0:npes-1)           ! gather displacements
    2858           0 :    integer :: rcvcnts(0:npes-1)          ! gather receive count
    2859             :    integer :: sendcnt                    ! gather send counts
    2860             :    integer :: beglcol                    ! beginning index for local columns
    2861             :                                          !  in global column ordering
    2862             : #endif
    2863             : 
    2864             : !-----------------------------------------------------------------------
    2865           0 :    if (hdim1d < hdim1_d) then
    2866           0 :       call endrun ('GATHER_CHUNK_TO_FIELD_INT error: hdim1d < hdim1_d')
    2867             :    endif
    2868             : #if ( defined SPMD )
    2869           0 :    displs(0) = 0
    2870           0 :    rcvcnts(0) = fdim*mdim*ldim*gs_col_num(0)
    2871           0 :    beglcol = 0
    2872           0 :    do p=1,npes-1
    2873           0 :      displs(p) = displs(p-1) + rcvcnts(p-1)
    2874           0 :      rcvcnts(p) = fdim*mdim*ldim*gs_col_num(p)
    2875           0 :      if (p <= iam) then
    2876           0 :         beglcol = beglcol + gs_col_num(p-1)
    2877             :      endif
    2878             :    enddo
    2879           0 :    sendcnt = fdim*mdim*ldim*nlcols
    2880             : 
    2881             : ! copy into local gather data structure
    2882             : 
    2883           0 :    do l=1,ldim
    2884           0 :       do i=1,nlcols
    2885           0 :          cid = pgcols(beglcol+i)%chunk
    2886           0 :          lcid = chunks(cid)%lcid
    2887           0 :          lid = pgcols(beglcol+i)%ccol
    2888           0 :          do m=1,mdim
    2889           0 :             do f=1,fdim
    2890           0 :                lfield_p(f, m, l, i) = &
    2891           0 :                   localchunks(f,lid,m,lcid,l)
    2892             :             end do
    2893             :          end do
    2894             :       end do
    2895             :    end do
    2896             : 
    2897             : ! gather from other processes
    2898             : 
    2899           0 :    call t_barrierf('sync_gath_ctof', mpicom)
    2900             :    call fc_gathervint(lfield_p, sendcnt, mpiint, &
    2901           0 :                       gfield_p, rcvcnts, displs, mpiint, 0, mpicom)
    2902             : 
    2903           0 :    if (masterproc) then
    2904             : 
    2905             : ! copy gathered columns into lon/lat field
    2906             : 
    2907           0 :       do i=1,num_global_phys_cols
    2908           0 :          cid  = pgcols(i)%chunk
    2909           0 :          lid  = pgcols(i)%ccol
    2910           0 :          gcol = chunks(cid)%gcol(lid)
    2911           0 :          h2   = (gcol-1)/hdim1_d + 1
    2912           0 :          h1   = mod((gcol-1),hdim1_d) + 1
    2913           0 :          do l=1,ldim
    2914           0 :             do m=1,mdim
    2915           0 :                do f=1,fdim
    2916           0 :                   globalfield(f, h1, m, h2, l)    &
    2917           0 :                   = gfield_p(f,m,l,i)
    2918             :                end do
    2919             :             end do
    2920             :          end do
    2921             :       end do
    2922             :    endif
    2923             : 
    2924             : #else
    2925             : 
    2926             :    ! copy chunked data structure into lon/lat field
    2927             :    ! (pgcol ordering chosen to reflect begchunk:endchunk
    2928             :    !  local ordering)
    2929             :    do l=1,ldim
    2930             :       do i=1,num_global_phys_cols
    2931             :          cid  = pgcols(i)%chunk
    2932             :          lcid = chunks(cid)%lcid
    2933             :          lid  = pgcols(i)%ccol
    2934             :          gcol = chunks(cid)%gcol(lid)
    2935             :          h2   = (gcol-1)/hdim1_d + 1
    2936             :          h1   = mod((gcol-1),hdim1_d) + 1
    2937             :          do m=1,mdim
    2938             :             do f=1,fdim
    2939             :                globalfield(f, h1, m, h2, l)    &
    2940             :                = localchunks(f,lid,m,lcid,l)
    2941             :             end do
    2942             :          end do
    2943             :       end do
    2944             :    end do
    2945             : 
    2946             : #endif
    2947             : 
    2948           0 :    return
    2949             :    end subroutine gather_chunk_to_field_int
    2950             : 
    2951             : !
    2952             : !========================================================================
    2953             : !
    2954           0 :    subroutine write_field_from_chunk(iu,fdim,mdim,ldim,localchunks)
    2955             : 
    2956             : !-----------------------------------------------------------------------
    2957             : !
    2958             : !
    2959             : ! Purpose: Write field from decomposed chunk data
    2960             : !          structure
    2961             : !
    2962             : ! Method:
    2963             : !
    2964             : ! Author: Patrick Worley
    2965             : !
    2966             : !------------------------------Arguments--------------------------------
    2967             :    integer, intent(in) :: iu        ! logical unit
    2968             :    integer, intent(in) :: fdim      ! declared length of first dimension
    2969             :    integer, intent(in) :: mdim      ! declared length of middle dimension
    2970             :    integer, intent(in) :: ldim      ! declared length of last dimension
    2971             :    real(r8), intent(in):: localchunks(fdim,pcols,mdim,begchunk:endchunk,ldim) ! local chunks
    2972             : 
    2973             : !---------------------------Local workspace-----------------------------
    2974             : 
    2975             :    integer :: ioerr                 ! error return
    2976             : 
    2977           0 :    real(r8), allocatable :: globalfield(:,:,:,:,:)
    2978             :                                     ! global field
    2979             : !-----------------------------------------------------------------------
    2980             : 
    2981           0 :    allocate(globalfield(fdim,hdim1_d,mdim,hdim2_d,ldim))
    2982             : 
    2983           0 :    call gather_chunk_to_field (fdim,mdim,ldim,hdim1_d,localchunks,globalfield)
    2984             : 
    2985           0 :    if (masterproc) then
    2986           0 :       write (iu,iostat=ioerr) globalfield
    2987           0 :       if (ioerr /= 0 ) then
    2988           0 :          write(iulog,*) 'WRITE_FIELD_FROM_CHUNK ioerror ', ioerr,' on i/o unit = ',iu
    2989           0 :          call endrun
    2990             :       end if
    2991             :    endif
    2992             : 
    2993           0 :    deallocate(globalfield)
    2994             : 
    2995           0 :    return
    2996             :    end subroutine write_field_from_chunk
    2997             : 
    2998             : !
    2999             : !========================================================================
    3000             : !
    3001           0 :    subroutine read_chunk_from_field(iu,fdim,mdim,ldim,localchunks)
    3002             : 
    3003             : !-----------------------------------------------------------------------
    3004             : !
    3005             : !
    3006             : ! Purpose: Write field from decomposed chunk data
    3007             : !          structure
    3008             : !
    3009             : ! Method:
    3010             : !
    3011             : ! Author: Patrick Worley
    3012             : !
    3013             : !------------------------------Arguments--------------------------------
    3014             :    integer, intent(in) :: iu        ! logical unit
    3015             :    integer, intent(in) :: fdim      ! declared length of first dimension
    3016             :    integer, intent(in) :: mdim      ! declared length of middle dimension
    3017             :    integer, intent(in) :: ldim      ! declared length of last dimension
    3018             : 
    3019             :    real(r8), intent(out):: localchunks(fdim,pcols,mdim,begchunk:endchunk,ldim) ! local chunks
    3020             : 
    3021             : !---------------------------Local workspace-----------------------------
    3022             : 
    3023             :    integer :: ioerr                 ! error return
    3024             : 
    3025           0 :    real(r8), allocatable :: globalfield(:,:,:,:,:)
    3026             :                                     ! global field
    3027             : !-----------------------------------------------------------------------
    3028             : 
    3029           0 :    allocate(globalfield(fdim,hdim1_d,mdim,hdim2_d,ldim))
    3030             : 
    3031           0 :    if (masterproc) then
    3032           0 :       read (iu,iostat=ioerr) globalfield
    3033           0 :       if (ioerr /= 0 ) then
    3034           0 :          write(iulog,*) 'READ_CHUNK_FROM_FIELD ioerror ', ioerr,' on i/o unit = ',iu
    3035           0 :          call endrun
    3036             :       end if
    3037             :    endif
    3038             : 
    3039           0 :    call scatter_field_to_chunk (fdim,mdim,ldim,hdim1_d,globalfield,localchunks)
    3040             : 
    3041           0 :    deallocate(globalfield)
    3042             : 
    3043           0 :    return
    3044             :    end subroutine read_chunk_from_field
    3045             : !
    3046             : !========================================================================
    3047             : 
    3048       16128 :    subroutine transpose_block_to_chunk(record_size, block_buffer, &
    3049       16128 :                                        chunk_buffer, window)
    3050             : 
    3051             : !-----------------------------------------------------------------------
    3052             : !
    3053             : ! Purpose: Transpose buffer containing decomposed
    3054             : !          fields to buffer
    3055             : !          containing decomposed chunk data structures
    3056             : !
    3057             : ! Method:
    3058             : !
    3059             : ! Author: Patrick Worley
    3060             : ! Modified: Art Mirin, Jan 04, to add support for mod_comm
    3061             : !
    3062             : !-----------------------------------------------------------------------
    3063             : #if ( defined SPMD )
    3064             : # if defined(MODCM_DP_TRANSPOSE)
    3065             :    use mod_comm, only: blockdescriptor, mp_sendirr, mp_recvirr,  &
    3066             :                        get_partneroffset, max_nparcels
    3067             :    use mpishorthand,  only : mpicom
    3068             : # endif
    3069             :    use spmd_utils,    only: altalltoallv
    3070             : #endif
    3071             : !------------------------------Parameters-------------------------------
    3072             : !
    3073             :   integer, parameter :: msgtag  = 6000
    3074             : !------------------------------Arguments--------------------------------
    3075             :    integer, intent(in) :: record_size  ! per column amount of data
    3076             :    real(r8), intent(in) :: block_buffer(record_size*block_buf_nrecs)
    3077             :                                        ! buffer of block data to be
    3078             :                                        ! transposed
    3079             :    real(r8), intent(out):: chunk_buffer(record_size*chunk_buf_nrecs)
    3080             :                                        ! buffer of chunk data
    3081             :                                        ! transposed into
    3082             :    integer, intent(in), optional :: window
    3083             :                                        ! MPI-2 window id for
    3084             :                                        ! chunk_buffer
    3085             : 
    3086             : !---------------------------Local workspace-----------------------------
    3087             : #if ( defined SPMD )
    3088             :    integer :: p                        ! loop indices
    3089             :    integer :: bbuf_siz                 ! size of block_buffer
    3090             :    integer :: cbuf_siz                 ! size of chunk_buffer
    3091             :    integer :: lwindow                  ! placeholder for missing window
    3092             :    integer :: lopt                     ! local copy of phys_alltoall
    3093             : !
    3094             :    logical, save :: first = .true.
    3095             :    integer, allocatable, save :: sndcnts(:), sdispls(:)
    3096             :    integer, allocatable, save :: rcvcnts(:), rdispls(:)
    3097             :    integer, allocatable, save :: pdispls(:)
    3098             :    integer, save :: prev_record_size = 0
    3099             : # if defined(MODCM_DP_TRANSPOSE)
    3100             :    type (blockdescriptor), allocatable, save :: sendbl(:), recvbl(:)
    3101             :    integer ione, ierror, mod_method
    3102             : # endif
    3103             : !-----------------------------------------------------------------------
    3104       16128 :    if (first) then
    3105             : ! Compute send/recv/put counts and displacements
    3106        4608 :       allocate(sndcnts(0:npes-1))
    3107        3072 :       allocate(sdispls(0:npes-1))
    3108        3072 :       allocate(rcvcnts(0:npes-1))
    3109        3072 :       allocate(rdispls(0:npes-1))
    3110        3072 :       allocate(pdispls(0:npes-1))
    3111             : !
    3112             : # if defined(MODCM_DP_TRANSPOSE)
    3113             : ! This branch uses mod_comm. Admissable values of phys_alltoall are
    3114             : ! 11,12 and 13. Each value corresponds to a different option
    3115             : ! within mod_comm of implementing the communication. That option is expressed
    3116             : ! internally to mod_comm using the variable mod_method defined below;
    3117             : ! mod_method will have values 0,1 or 2 and is defined as
    3118             : ! phys_alltoall - modmin_alltoall, where modmin_alltoall equals 11.
    3119             : ! Also, sendbl and recvbl must have exactly npes elements, to match
    3120             : ! this size of the communicator, or the transpose will fail.
    3121             : !
    3122             :       if (phys_alltoall >= modmin_alltoall) then
    3123             :          mod_method = phys_alltoall - modmin_alltoall
    3124             :          ione = 1
    3125             :          allocate( sendbl(0:npes-1) )
    3126             :          allocate( recvbl(0:npes-1) )
    3127             : 
    3128             :          do p = 0,npes-1
    3129             : 
    3130             :             sendbl(p)%method = mod_method
    3131             :             recvbl(p)%method = mod_method
    3132             : 
    3133             :             allocate( sendbl(p)%blocksizes(1) )
    3134             :             allocate( sendbl(p)%displacements(1) )
    3135             :             allocate( recvbl(p)%blocksizes(1) )
    3136             :             allocate( recvbl(p)%displacements(1) )
    3137             : 
    3138             :          enddo
    3139             : 
    3140             :       endif
    3141             : # endif
    3142             : 
    3143        1536 :       first = .false.
    3144             :    endif
    3145             : !
    3146       16128 :    if (record_size /= prev_record_size) then
    3147             : !
    3148             : ! Compute send/recv/put counts and displacements
    3149        1536 :       sdispls(0) = 0
    3150        1536 :       sndcnts(0) = record_size*btofc_blk_num(0)
    3151     1179648 :       do p=1,npes-1
    3152     1178112 :         sdispls(p) = sdispls(p-1) + sndcnts(p-1)
    3153     1179648 :         sndcnts(p) = record_size*btofc_blk_num(p)
    3154             :       enddo
    3155             : !
    3156        1536 :       rdispls(0) = 0
    3157        1536 :       rcvcnts(0) = record_size*btofc_chk_num(0)
    3158     1179648 :       do p=1,npes-1
    3159     1178112 :          rdispls(p) = rdispls(p-1) + rcvcnts(p-1)
    3160     1179648 :          rcvcnts(p) = record_size*btofc_chk_num(p)
    3161             :       enddo
    3162             : !
    3163        1536 :       call mpialltoallint(rdispls, 1, pdispls, 1, mpicom)
    3164             : !
    3165             : # if defined(MODCM_DP_TRANSPOSE)
    3166             :       if (phys_alltoall >= modmin_alltoall) then
    3167             :          do p = 0,npes-1
    3168             : 
    3169             :             sendbl(p)%type = MPI_DATATYPE_NULL
    3170             :             if ( sndcnts(p) /= 0 ) then
    3171             : 
    3172             :                if (phys_alltoall > modmin_alltoall) then
    3173             :                   call MPI_TYPE_INDEXED(ione, sndcnts(p),   &
    3174             :                        sdispls(p), mpir8, &
    3175             :                        sendbl(p)%type, ierror)
    3176             :                   call MPI_TYPE_COMMIT(sendbl(p)%type, ierror)
    3177             :                endif
    3178             : 
    3179             :                sendbl(p)%blocksizes(1) = sndcnts(p)
    3180             :                sendbl(p)%displacements(1) = sdispls(p)
    3181             :                sendbl(p)%partneroffset = 0
    3182             : 
    3183             :             else
    3184             : 
    3185             :                sendbl(p)%blocksizes(1) = 0
    3186             :                sendbl(p)%displacements(1) = 0
    3187             :                sendbl(p)%partneroffset = 0
    3188             : 
    3189             :             endif
    3190             :             sendbl(p)%nparcels = size(sendbl(p)%displacements)
    3191             :             sendbl(p)%tot_size = sum(sendbl(p)%blocksizes)
    3192             :             max_nparcels = max(max_nparcels, sendbl(p)%nparcels)
    3193             : 
    3194             :             recvbl(p)%type = MPI_DATATYPE_NULL
    3195             :             if ( rcvcnts(p) /= 0) then
    3196             : 
    3197             :                if (phys_alltoall > modmin_alltoall) then
    3198             :                   call MPI_TYPE_INDEXED(ione, rcvcnts(p),   &
    3199             :                        rdispls(p), mpir8, &
    3200             :                        recvbl(p)%type, ierror)
    3201             :                   call MPI_TYPE_COMMIT(recvbl(p)%type, ierror)
    3202             :                endif
    3203             : 
    3204             :                recvbl(p)%blocksizes(1) = rcvcnts(p)
    3205             :                recvbl(p)%displacements(1) = rdispls(p)
    3206             :                recvbl(p)%partneroffset = 0 ! not properly initialized - do not use Mpi2
    3207             :             else
    3208             : 
    3209             :                recvbl(p)%blocksizes(1) = 0
    3210             :                recvbl(p)%displacements(1) = 0
    3211             :                recvbl(p)%partneroffset = 0
    3212             : 
    3213             :             endif
    3214             :             recvbl(p)%nparcels = size(recvbl(p)%displacements)
    3215             :             recvbl(p)%tot_size = sum(recvbl(p)%blocksizes)
    3216             :             max_nparcels = max(max_nparcels, recvbl(p)%nparcels)
    3217             : 
    3218             :          enddo
    3219             : 
    3220             :          call get_partneroffset(mpicom, sendbl, recvbl)
    3221             : 
    3222             :       endif
    3223             : # endif
    3224             : !
    3225        1536 :       prev_record_size = record_size
    3226             :    endif
    3227             : !
    3228       16128 :    call t_barrierf('sync_tran_btoc', mpicom)
    3229       16128 :    if (phys_alltoall < 0) then
    3230       16128 :       if ((max_nproc_smpx > npes/2) .and. (nproc_busy_d > npes/2)) then
    3231       16128 :          lopt = 0
    3232             :       else
    3233           0 :          lopt = 1
    3234             :       endif
    3235             :    else
    3236           0 :       lopt = phys_alltoall
    3237           0 :       if ((lopt == 2) .and. ( .not. present(window) )) lopt = 1
    3238             :    endif
    3239       16128 :    if (lopt < 4) then
    3240             : !
    3241       16128 :       bbuf_siz = record_size*block_buf_nrecs
    3242       16128 :       cbuf_siz = record_size*chunk_buf_nrecs
    3243       16128 :       if ( present(window) ) then
    3244             :          call altalltoallv(lopt, iam, npes,    &
    3245             :                            dp_coup_steps, dp_coup_proc, &
    3246             :                            block_buffer, bbuf_siz, sndcnts, sdispls, mpir8, &
    3247             :                            chunk_buffer, cbuf_siz, rcvcnts, rdispls, mpir8, &
    3248           0 :                            msgtag, pdispls, mpir8, window, mpicom)
    3249             :       else
    3250             :          call altalltoallv(lopt, iam, npes,    &
    3251             :                            dp_coup_steps, dp_coup_proc, &
    3252             :                            block_buffer, bbuf_siz, sndcnts, sdispls, mpir8, &
    3253             :                            chunk_buffer, cbuf_siz, rcvcnts, rdispls, mpir8, &
    3254       16128 :                            msgtag, pdispls, mpir8, lwindow, mpicom)
    3255             :       endif
    3256             : !
    3257             :    else
    3258             : !
    3259             : # if defined(MODCM_DP_TRANSPOSE)
    3260             :       call mp_sendirr(mpicom, sendbl, recvbl, block_buffer, chunk_buffer)
    3261             :       call mp_recvirr(mpicom, sendbl, recvbl, block_buffer, chunk_buffer)
    3262             : # else
    3263             :       call mpialltoallv(block_buffer, sndcnts, sdispls, mpir8, &
    3264             :                         chunk_buffer, rcvcnts, rdispls, mpir8, &
    3265           0 :                         mpicom)
    3266             : # endif
    3267             : !
    3268             :    endif
    3269             : !
    3270             : #endif
    3271       16128 :    return
    3272             :    end subroutine transpose_block_to_chunk
    3273             : !
    3274             : !========================================================================
    3275             : 
    3276       16128 :    subroutine block_to_chunk_send_pters(blockid, fdim, ldim, &
    3277       16128 :                                         record_size, pter)
    3278             : !-----------------------------------------------------------------------
    3279             : !
    3280             : ! Purpose: Return pointers into send buffer where column from decomposed
    3281             : !          fields should be copied to
    3282             : !
    3283             : ! Method:
    3284             : !
    3285             : ! Author: Patrick Worley
    3286             : !
    3287             : !-----------------------------------------------------------------------
    3288             : !------------------------------Arguments--------------------------------
    3289             :    integer, intent(in) :: blockid      ! block index
    3290             :    integer, intent(in) :: fdim         ! first dimension of pter array
    3291             :    integer, intent(in) :: ldim         ! last dimension of pter array
    3292             :    integer, intent(in) :: record_size  ! per coordinate amount of data
    3293             : 
    3294             :    integer, intent(out) :: pter(fdim,ldim)  ! buffer offsets
    3295             : !---------------------------Local workspace-----------------------------
    3296             :    integer :: i, k                     ! loop indices
    3297             : !-----------------------------------------------------------------------
    3298       16128 :    if ((btofc_blk_offset(blockid)%ncols > fdim) .or. &
    3299             :        (btofc_blk_offset(blockid)%nlvls > ldim)) then
    3300           0 :       write(iulog,*) "BLOCK_TO_CHUNK_SEND_PTERS: pter array dimensions ", &
    3301           0 :                  "not large enough: (",fdim,",",ldim,") not >= (", &
    3302           0 :                   btofc_blk_offset(blockid)%ncols,",", &
    3303           0 :                   btofc_blk_offset(blockid)%nlvls,")"
    3304           0 :       call endrun()
    3305             :    endif
    3306             : !
    3307      548352 :    do k=1,btofc_blk_offset(blockid)%nlvls
    3308    38852352 :       do i=1,btofc_blk_offset(blockid)%ncols
    3309    76640256 :          pter(i,k) = 1 + record_size* &
    3310   115492608 :                      (btofc_blk_offset(blockid)%pter(i,k))
    3311             :       enddo
    3312      548352 :       do i=btofc_blk_offset(blockid)%ncols+1,fdim
    3313      532224 :          pter(i,k) = -1
    3314             :       enddo
    3315             :    enddo
    3316             : !
    3317       16128 :    do k=btofc_blk_offset(blockid)%nlvls+1,ldim
    3318       16128 :       do i=1,fdim
    3319           0 :          pter(i,k) = -1
    3320             :       enddo
    3321             :    enddo
    3322             : !
    3323       16128 :    return
    3324             :    end subroutine block_to_chunk_send_pters
    3325             : !
    3326             : !========================================================================
    3327             : 
    3328       80640 :    subroutine block_to_chunk_recv_pters(lcid, fdim, ldim, &
    3329       80640 :                                         record_size, pter)
    3330             : !-----------------------------------------------------------------------
    3331             : !
    3332             : ! Purpose: Return pointers into receive buffer where data for
    3333             : !          decomposed chunk data structures should be copied from
    3334             : !
    3335             : ! Method:
    3336             : !
    3337             : ! Author: Patrick Worley
    3338             : !
    3339             : !-----------------------------------------------------------------------
    3340             : !------------------------------Arguments--------------------------------
    3341             :    integer, intent(in) :: lcid         ! local chunk id
    3342             :    integer, intent(in) :: fdim         ! first dimension of pter array
    3343             :    integer, intent(in) :: ldim         ! last dimension of pter array
    3344             :    integer, intent(in) :: record_size  ! per coordinate amount of data
    3345             : 
    3346             :    integer, intent(out) :: pter(fdim,ldim)  ! buffer offset
    3347             : !---------------------------Local workspace-----------------------------
    3348             :    integer :: i, k                     ! loop indices
    3349             : !-----------------------------------------------------------------------
    3350       80640 :    if ((btofc_chk_offset(lcid)%ncols > fdim) .or. &
    3351             :        (btofc_chk_offset(lcid)%nlvls > ldim)) then
    3352           0 :       write(iulog,*) "BLOCK_TO_CHUNK_RECV_PTERS: pter array dimensions ", &
    3353           0 :                  "not large enough: (",fdim,",",ldim,") not >= (", &
    3354           0 :                   btofc_chk_offset(lcid)%ncols,",", &
    3355           0 :                   btofc_chk_offset(lcid)%nlvls,")"
    3356           0 :       call endrun()
    3357             :    endif
    3358             : !
    3359     2741760 :    do k=1,btofc_chk_offset(lcid)%nlvls
    3360    40981248 :       do i=1,btofc_chk_offset(lcid)%ncols
    3361    76640256 :          pter(i,k) = 1 + record_size* &
    3362   117621504 :                      (btofc_chk_offset(lcid)%pter(i,k))
    3363             :       enddo
    3364     6999552 :       do i=btofc_chk_offset(lcid)%ncols+1,fdim
    3365     6918912 :          pter(i,k) = -1
    3366             :       enddo
    3367             :    enddo
    3368             : !
    3369       80640 :    do k=btofc_chk_offset(lcid)%nlvls+1,ldim
    3370       80640 :       do i=1,fdim
    3371           0 :          pter(i,k) = -1
    3372             :       enddo
    3373             :    enddo
    3374             : !
    3375       80640 :    return
    3376             :    end subroutine block_to_chunk_recv_pters
    3377             : !
    3378             : !========================================================================
    3379             : 
    3380       14592 :    subroutine transpose_chunk_to_block(record_size, chunk_buffer, &
    3381       14592 :                                        block_buffer, window)
    3382             : !-----------------------------------------------------------------------
    3383             : !
    3384             : ! Purpose: Transpose buffer containing decomposed
    3385             : !          chunk data structures to buffer
    3386             : !          containing decomposed fields
    3387             : !
    3388             : ! Method:
    3389             : !
    3390             : ! Author: Patrick Worley
    3391             : !
    3392             : !-----------------------------------------------------------------------
    3393             : #if ( defined SPMD )
    3394             : # if defined(MODCM_DP_TRANSPOSE)
    3395             :    use mod_comm, only: blockdescriptor, mp_sendirr, mp_recvirr,  &
    3396             :                        get_partneroffset, max_nparcels
    3397             :    use mpishorthand,  only : mpicom
    3398             : # endif
    3399             :    use spmd_utils,    only: altalltoallv
    3400             : #endif
    3401             : !------------------------------Parameters-------------------------------
    3402             : !
    3403             :   integer, parameter :: msgtag  = 7000
    3404             : !------------------------------Arguments--------------------------------
    3405             :    integer, intent(in) :: record_size  ! per column amount of data
    3406             :    real(r8), intent(in):: chunk_buffer(record_size*chunk_buf_nrecs)
    3407             :                                        ! buffer of chunk data to be
    3408             :                                        ! transposed
    3409             :    real(r8), intent(out) :: block_buffer(record_size*block_buf_nrecs)
    3410             :                                        ! buffer of block data to
    3411             :                                        ! transpose into
    3412             :    integer, intent(in), optional :: window
    3413             :                                        ! MPI-2 window id for
    3414             :                                        ! chunk_buffer
    3415             : 
    3416             : !---------------------------Local workspace-----------------------------
    3417             : #if ( defined SPMD )
    3418             :    integer :: p                        ! loop indices
    3419             :    integer :: bbuf_siz                 ! size of block_buffer
    3420             :    integer :: cbuf_siz                 ! size of chunk_buffer
    3421             :    integer :: lwindow                  ! placeholder for missing window
    3422             :    integer :: lopt                     ! local copy of phys_alltoall
    3423             : !
    3424             :    logical, save :: first = .true.
    3425             :    integer, allocatable, save :: sndcnts(:), sdispls(:)
    3426             :    integer, allocatable, save :: rcvcnts(:), rdispls(:)
    3427             :    integer, allocatable, save :: pdispls(:)
    3428             :    integer, save :: prev_record_size = 0
    3429             : # if defined(MODCM_DP_TRANSPOSE)
    3430             :    type (blockdescriptor), allocatable, save :: sendbl(:), recvbl(:)
    3431             :    integer ione, ierror, mod_method
    3432             : # endif
    3433             : !-----------------------------------------------------------------------
    3434       14592 :    if (first) then
    3435             : ! Compute send/recv/put counts and displacements
    3436        4608 :       allocate(sndcnts(0:npes-1))
    3437        3072 :       allocate(sdispls(0:npes-1))
    3438        3072 :       allocate(rcvcnts(0:npes-1))
    3439        3072 :       allocate(rdispls(0:npes-1))
    3440        3072 :       allocate(pdispls(0:npes-1))
    3441             : !
    3442             : # if defined(MODCM_DP_TRANSPOSE)
    3443             : ! This branch uses mod_comm. Admissable values of phys_alltoall are
    3444             : ! 11,12 and 13. Each value corresponds to a differerent option
    3445             : ! within mod_comm of implementing the communication. That option is expressed
    3446             : ! internally to mod_comm using the variable mod_method defined below;
    3447             : ! mod_method will have values 0,1 or 2 and is defined as
    3448             : ! phys_alltoall - modmin_alltoall, where modmin_alltoall equals 11.
    3449             : ! Also, sendbl and recvbl must have exactly npes elements, to match
    3450             : ! this size of the communicator, or the transpose will fail.
    3451             : !
    3452             :       if (phys_alltoall >= modmin_alltoall) then
    3453             :          mod_method = phys_alltoall - modmin_alltoall
    3454             :          ione = 1
    3455             :          allocate( sendbl(0:npes-1) )
    3456             :          allocate( recvbl(0:npes-1) )
    3457             : 
    3458             :          do p = 0,npes-1
    3459             : 
    3460             :             sendbl(p)%method = mod_method
    3461             :             recvbl(p)%method = mod_method
    3462             : 
    3463             :             allocate( sendbl(p)%blocksizes(1) )
    3464             :             allocate( sendbl(p)%displacements(1) )
    3465             :             allocate( recvbl(p)%blocksizes(1) )
    3466             :             allocate( recvbl(p)%displacements(1) )
    3467             : 
    3468             :          enddo
    3469             : 
    3470             :       endif
    3471             : # endif
    3472             : !
    3473        1536 :       first = .false.
    3474             :    endif
    3475             : !
    3476       14592 :    if (record_size /= prev_record_size) then
    3477             : !
    3478             : ! Compute send/recv/put counts and displacements
    3479        1536 :       sdispls(0) = 0
    3480        1536 :       sndcnts(0) = record_size*btofc_chk_num(0)
    3481     1179648 :       do p=1,npes-1
    3482     1178112 :         sdispls(p) = sdispls(p-1) + sndcnts(p-1)
    3483     1179648 :         sndcnts(p) = record_size*btofc_chk_num(p)
    3484             :       enddo
    3485             : !
    3486        1536 :       rdispls(0) = 0
    3487        1536 :       rcvcnts(0) = record_size*btofc_blk_num(0)
    3488     1179648 :       do p=1,npes-1
    3489     1178112 :          rdispls(p) = rdispls(p-1) + rcvcnts(p-1)
    3490     1179648 :          rcvcnts(p) = record_size*btofc_blk_num(p)
    3491             :       enddo
    3492             : !
    3493        1536 :       call mpialltoallint(rdispls, 1, pdispls, 1, mpicom)
    3494             : !
    3495             : # if defined(MODCM_DP_TRANSPOSE)
    3496             :       if (phys_alltoall >= modmin_alltoall) then
    3497             :          do p = 0,npes-1
    3498             : 
    3499             :             sendbl(p)%type = MPI_DATATYPE_NULL
    3500             :             if ( sndcnts(p) /= 0 ) then
    3501             : 
    3502             :                if (phys_alltoall > modmin_alltoall) then
    3503             :                   call MPI_TYPE_INDEXED(ione, sndcnts(p),   &
    3504             :                        sdispls(p), mpir8, &
    3505             :                        sendbl(p)%type, ierror)
    3506             :                   call MPI_TYPE_COMMIT(sendbl(p)%type, ierror)
    3507             :                endif
    3508             : 
    3509             :                sendbl(p)%blocksizes(1) = sndcnts(p)
    3510             :                sendbl(p)%displacements(1) = sdispls(p)
    3511             :                sendbl(p)%partneroffset = 0
    3512             : 
    3513             :             else
    3514             : 
    3515             :                sendbl(p)%blocksizes(1) = 0
    3516             :                sendbl(p)%displacements(1) = 0
    3517             :                sendbl(p)%partneroffset = 0
    3518             : 
    3519             :             endif
    3520             :             sendbl(p)%nparcels = size(sendbl(p)%displacements)
    3521             :             sendbl(p)%tot_size = sum(sendbl(p)%blocksizes)
    3522             :             max_nparcels = max(max_nparcels, sendbl(p)%nparcels)
    3523             : 
    3524             :             recvbl(p)%type = MPI_DATATYPE_NULL
    3525             :             if ( rcvcnts(p) /= 0) then
    3526             : 
    3527             :                if (phys_alltoall > modmin_alltoall) then
    3528             :                   call MPI_TYPE_INDEXED(ione, rcvcnts(p),   &
    3529             :                        rdispls(p), mpir8, &
    3530             :                        recvbl(p)%type, ierror)
    3531             :                   call MPI_TYPE_COMMIT(recvbl(p)%type, ierror)
    3532             :                endif
    3533             : 
    3534             :                recvbl(p)%blocksizes(1) = rcvcnts(p)
    3535             :                recvbl(p)%displacements(1) = rdispls(p)
    3536             :                recvbl(p)%partneroffset = 0 ! not properly initialized - do not use Mpi2
    3537             :             else
    3538             : 
    3539             :                recvbl(p)%blocksizes(1) = 0
    3540             :                recvbl(p)%displacements(1) = 0
    3541             :                recvbl(p)%partneroffset = 0
    3542             : 
    3543             :             endif
    3544             :             recvbl(p)%nparcels = size(recvbl(p)%displacements)
    3545             :             recvbl(p)%tot_size = sum(recvbl(p)%blocksizes)
    3546             :             max_nparcels = max(max_nparcels, recvbl(p)%nparcels)
    3547             : 
    3548             :          enddo
    3549             : 
    3550             :          call get_partneroffset(mpicom, sendbl, recvbl)
    3551             : 
    3552             :       endif
    3553             : # endif
    3554             : !
    3555        1536 :       prev_record_size = record_size
    3556             :    endif
    3557             : !
    3558       14592 :    call t_barrierf('sync_tran_ctob', mpicom)
    3559       14592 :    if (phys_alltoall < 0) then
    3560       14592 :       if ((max_nproc_smpx > npes/2) .and. (nproc_busy_d > npes/2)) then
    3561       14592 :          lopt = 0
    3562             :       else
    3563           0 :          lopt = 1
    3564             :       endif
    3565             :    else
    3566           0 :       lopt = phys_alltoall
    3567           0 :       if ((lopt == 2) .and. ( .not. present(window) )) lopt = 1
    3568             :    endif
    3569       14592 :    if (lopt < 4) then
    3570             : !
    3571       14592 :       bbuf_siz = record_size*block_buf_nrecs
    3572       14592 :       cbuf_siz = record_size*chunk_buf_nrecs
    3573       14592 :       if ( present(window) ) then
    3574             :          call altalltoallv(lopt, iam, npes,    &
    3575             :                            dp_coup_steps, dp_coup_proc, &
    3576             :                            chunk_buffer, cbuf_siz, sndcnts, sdispls, mpir8, &
    3577             :                            block_buffer, bbuf_siz, rcvcnts, rdispls, mpir8, &
    3578           0 :                            msgtag, pdispls, mpir8, window, mpicom)
    3579             :       else
    3580             :          call altalltoallv(lopt, iam, npes,    &
    3581             :                            dp_coup_steps, dp_coup_proc, &
    3582             :                            chunk_buffer, cbuf_siz, sndcnts, sdispls, mpir8, &
    3583             :                            block_buffer, bbuf_siz, rcvcnts, rdispls, mpir8, &
    3584       14592 :                            msgtag, pdispls, mpir8, lwindow, mpicom)
    3585             :       endif
    3586             : !
    3587             :    else
    3588             : # if defined(MODCM_DP_TRANSPOSE)
    3589             :       call mp_sendirr(mpicom, sendbl, recvbl, block_buffer, chunk_buffer)
    3590             :       call mp_recvirr(mpicom, sendbl, recvbl, block_buffer, chunk_buffer)
    3591             : # else
    3592             :       call mpialltoallv(chunk_buffer, sndcnts, sdispls, mpir8, &
    3593             :                         block_buffer, rcvcnts, rdispls, mpir8, &
    3594           0 :                         mpicom)
    3595             : # endif
    3596             : !
    3597             :    endif
    3598             : !
    3599             : #endif
    3600             : 
    3601       14592 :    return
    3602             :    end subroutine transpose_chunk_to_block
    3603             : !
    3604             : !========================================================================
    3605             : 
    3606       72960 :    subroutine chunk_to_block_send_pters(lcid, fdim, ldim, &
    3607       72960 :                                         record_size, pter)
    3608             : !-----------------------------------------------------------------------
    3609             : !
    3610             : ! Purpose: Return pointers into send buffer where data for
    3611             : !          decomposed chunk data structures should be copied to
    3612             : !
    3613             : ! Method:
    3614             : !
    3615             : ! Author: Patrick Worley
    3616             : !
    3617             : !-----------------------------------------------------------------------
    3618             : !------------------------------Arguments--------------------------------
    3619             :    integer, intent(in) :: lcid         ! local chunk id
    3620             :    integer, intent(in) :: fdim         ! first dimension of pter array
    3621             :    integer, intent(in) :: ldim         ! last dimension of pter array
    3622             :    integer, intent(in) :: record_size  ! per coordinate amount of data
    3623             : 
    3624             :    integer, intent(out) :: pter(fdim,ldim)  ! buffer offset
    3625             : !---------------------------Local workspace-----------------------------
    3626             :    integer :: i, k                     ! loop indices
    3627             : !-----------------------------------------------------------------------
    3628       72960 :    if ((btofc_chk_offset(lcid)%ncols > fdim) .or. &
    3629             :        (btofc_chk_offset(lcid)%nlvls > ldim)) then
    3630           0 :       write(iulog,*) "CHUNK_TO_BLOCK_SEND_PTERS: pter array dimensions ", &
    3631           0 :                  "not large enough: (",fdim,",",ldim,") not >= (", &
    3632           0 :                   btofc_chk_offset(lcid)%ncols,",", &
    3633           0 :                   btofc_chk_offset(lcid)%nlvls,")"
    3634           0 :       call endrun()
    3635             :    endif
    3636             : !
    3637     2480640 :    do k=1,btofc_chk_offset(lcid)%nlvls
    3638    37078272 :       do i=1,btofc_chk_offset(lcid)%ncols
    3639    69341184 :          pter(i,k) = 1 + record_size* &
    3640   106419456 :                      (btofc_chk_offset(lcid)%pter(i,k))
    3641             :       enddo
    3642     6332928 :       do i=btofc_chk_offset(lcid)%ncols+1,fdim
    3643     6259968 :          pter(i,k) = -1
    3644             :       enddo
    3645             :    enddo
    3646             : !
    3647       72960 :    do k=btofc_chk_offset(lcid)%nlvls+1,ldim
    3648       72960 :       do i=1,fdim
    3649           0 :          pter(i,k) = -1
    3650             :       enddo
    3651             :    enddo
    3652             : !
    3653       72960 :    return
    3654             :    end subroutine chunk_to_block_send_pters
    3655             : !
    3656             : !========================================================================
    3657             : 
    3658       14592 :    subroutine chunk_to_block_recv_pters(blockid, fdim, ldim, &
    3659       14592 :                                         record_size, pter)
    3660             : !-----------------------------------------------------------------------
    3661             : !
    3662             : ! Purpose: Return pointers into receive buffer where column from decomposed
    3663             : !          fields should be copied from
    3664             : !
    3665             : ! Method:
    3666             : !
    3667             : ! Author: Patrick Worley
    3668             : !
    3669             : !-----------------------------------------------------------------------
    3670             : !------------------------------Arguments--------------------------------
    3671             :    integer, intent(in) :: blockid      ! block index
    3672             :    integer, intent(in) :: fdim         ! first dimension of pter array
    3673             :    integer, intent(in) :: ldim         ! last dimension of pter array
    3674             :    integer, intent(in) :: record_size  ! per coordinate amount of data
    3675             : 
    3676             :    integer, intent(out) :: pter(fdim,ldim)  ! buffer offsets
    3677             : !---------------------------Local workspace-----------------------------
    3678             :    integer :: i, k                     ! loop indices
    3679             : !-----------------------------------------------------------------------
    3680       14592 :    if ((btofc_blk_offset(blockid)%ncols > fdim) .or. &
    3681             :        (btofc_blk_offset(blockid)%nlvls > ldim)) then
    3682           0 :       write(iulog,*) "CHUNK_TO_BLOCK_RECV_PTERS: pter array dimensions ", &
    3683           0 :                  "not large enough: (",fdim,",",ldim,") not >= (", &
    3684           0 :                   btofc_blk_offset(blockid)%ncols,",", &
    3685           0 :                   btofc_blk_offset(blockid)%nlvls,")"
    3686           0 :       call endrun()
    3687             :    endif
    3688             : !
    3689      496128 :    do k=1,btofc_blk_offset(blockid)%nlvls
    3690    35152128 :       do i=1,btofc_blk_offset(blockid)%ncols
    3691    69341184 :          pter(i,k) = 1 + record_size* &
    3692   104493312 :                      (btofc_blk_offset(blockid)%pter(i,k))
    3693             :       enddo
    3694      496128 :       do i=btofc_blk_offset(blockid)%ncols+1,fdim
    3695      481536 :          pter(i,k) = -1
    3696             :       enddo
    3697             :    enddo
    3698             : !
    3699       14592 :    do k=btofc_blk_offset(blockid)%nlvls+1,ldim
    3700       14592 :       do i=1,fdim
    3701           0 :          pter(i,k) = -1
    3702             :       enddo
    3703             :    enddo
    3704             : !
    3705       14592 :    return
    3706             :    end subroutine chunk_to_block_recv_pters
    3707             : !
    3708             : !========================================================================
    3709             : 
    3710        1536 :    subroutine create_chunks(opt, chunks_per_thread)
    3711             : !-----------------------------------------------------------------------
    3712             : !
    3713             : ! Purpose: Decompose physics computational grid into chunks, for
    3714             : !          improved serial efficiency and parallel load balance.
    3715             : !
    3716             : ! Method:
    3717             : !
    3718             : ! Author: Patrick Worley
    3719             : !
    3720             : !-----------------------------------------------------------------------
    3721             :    use pmgrid, only: plev
    3722             :    use dyn_grid, only: get_block_bounds_d, get_block_gcol_cnt_d, &
    3723             :                        get_gcol_block_cnt_d, get_gcol_block_d, &
    3724             :                        get_block_owner_d, get_block_gcol_d
    3725             : !------------------------------Arguments--------------------------------
    3726             :    integer, intent(in)  :: opt           ! chunking option
    3727             :       !  0: chunks may cross block boundaries, but retain same
    3728             :       !     process mapping as blocks. If possible, columns assigned
    3729             :       !     as day/night pairs. Columns (or pairs) are wrap-mapped.
    3730             :       !     May not work with vertically decomposed blocks. (default)
    3731             :       !  1: chunks may cross block boundaries, but retain same
    3732             :       !     SMP-node mapping as blocks.  If possible, columns assigned
    3733             :       !     as day/night pairs.  Columns (or pairs) are wrap-mapped.
    3734             :       !     May not work with vertically decomposed blocks.
    3735             :       !  2: 2-column day/night and season column pairs wrap-mapped
    3736             :       !     to chunks to also balance assignment of polar, mid-latitude,
    3737             :       !     and equatorial columns across  chunks.
    3738             :       !  3: same as 1 except that SMP defined to be pairs of consecutive
    3739             :       !     processes
    3740             :       !  4: chunks may cross block boundaries, but retain same
    3741             :       !     process mapping as blocks. Columns assigned to chunks
    3742             :       !     in block ordering.
    3743             :       !     May not work with vertically decomposed blocks.
    3744             :       !  5: Chunks do not cross latitude boundaries, and are block-mapped.
    3745             :    integer, intent(in)  :: chunks_per_thread
    3746             :                                          ! target number of chunks per
    3747             :                                          !  thread
    3748             : !---------------------------Local workspace-----------------------------
    3749             :    integer :: i, j, p                    ! loop indices
    3750             :    integer :: nlthreads                  ! number of local OpenMP threads
    3751        3072 :    integer :: npthreads(0:npes-1)        ! number of OpenMP threads per process
    3752        3072 :    integer :: proc_smp_mapx(0:npes-1)    ! process/virtual SMP node map
    3753             :    integer :: firstblock, lastblock      ! global block index bounds
    3754             :    integer :: maxblksiz                  ! maximum number of columns in a dynamics block
    3755             :    integer :: block_cnt                  ! number of blocks containing data
    3756             :                                          ! for a given vertical column
    3757             :    integer :: blockids(plev+1)           ! block indices
    3758             :    integer :: bcids(plev+1)              ! block column indices
    3759             :    integer :: nsmpx, nsmpy               ! virtual SMP node counts and indices
    3760             :    integer :: curgcol, twingcol          ! global physics and dynamics column indices
    3761             :    integer :: smp                        ! SMP node index
    3762             :    integer :: cid                        ! chunk id
    3763             :    integer :: jb, ib                     ! global block and columns indices
    3764             :    integer :: blksiz                     ! current block size
    3765             :    integer :: ntmp1, ntmp2, nlchunks     ! work variables
    3766             :    integer :: max_ncols                  ! upper bound on number of columns in a block
    3767             :    integer :: ncols                      ! number of columns in current chunk
    3768             :    logical :: error                      ! error flag
    3769             : 
    3770             :    ! indices for dynamics columns in given block
    3771        1536 :    integer, dimension(:), allocatable :: cols
    3772             : 
    3773             :    ! number of MPI processes per virtual SMP node (0:nsmpx-1)
    3774        1536 :    integer, dimension(:), allocatable :: nsmpprocs
    3775             : 
    3776             :    ! flag indicating whether a process is busy or idle during the dynamics (0:npes-1)
    3777        1536 :    logical, dimension(:), allocatable :: proc_busy_d
    3778             : 
    3779             :    ! flag indicating whether any of the processes assigned to an SMP node are busy
    3780             :    ! during the dynamics, or whether all of them are idle (0:nsmps-1)
    3781        1536 :    logical, dimension(:), allocatable :: smp_busy_d
    3782             : 
    3783             :    ! actual SMP node/virtual SMP node map (0:nsmps-1)
    3784        1536 :    integer, dimension(:), allocatable :: smp_smp_mapx
    3785             : 
    3786             :    ! column/virtual SMP node map (ngcols)
    3787        1536 :    integer, dimension(:), allocatable :: col_smp_mapx
    3788             : 
    3789             :    ! number of columns assigned to a given virtual SMP node (0:nsmpx-1)
    3790        1536 :    integer, dimension(:), allocatable :: nsmpcolumns
    3791             : 
    3792             :    ! number of OpenMP threads per virtual SMP node (0:nsmpx-1)
    3793        1536 :    integer, dimension(:), allocatable :: nsmpthreads
    3794             : 
    3795             :    ! number of chunks assigned to a given virtual SMP node (0:nsmpx-1)
    3796        1536 :    integer, dimension(:), allocatable :: nsmpchunks
    3797             : 
    3798             :    ! maximum number of columns assigned to a chunk in a given virtual SMP node (0:nsmpx-1)
    3799        1536 :    integer, dimension(:), allocatable :: maxcol_chk
    3800             : 
    3801             :    ! number of chunks in given virtual SMP node receiving maximum number of columns
    3802             :    ! (0:nsmpx-1)
    3803        1536 :    integer, dimension(:), allocatable :: maxcol_chks
    3804             : 
    3805             :    ! chunk id virtual offset (0:nsmpx-1)
    3806        1536 :    integer, dimension(:), allocatable :: cid_offset
    3807             : 
    3808             :    ! process-local chunk id (0:nsmpx-1)
    3809        1536 :    integer, dimension(:), allocatable :: local_cid
    3810             : 
    3811             : #if ( defined _OPENMP )
    3812             :    integer omp_get_max_threads
    3813             :    external omp_get_max_threads
    3814             : #endif
    3815             : 
    3816             : !-----------------------------------------------------------------------
    3817             : !
    3818             : ! Determine number of threads per process
    3819             : !
    3820        1536 :    nlthreads = 1
    3821             : #if ( defined _OPENMP )
    3822             :    nlthreads = OMP_GET_MAX_THREADS()
    3823             : #endif
    3824             : !
    3825             : #if ( defined SPMD )
    3826        1536 :    call mpiallgatherint(nlthreads, 1, npthreads, 1, mpicom)
    3827             : #else
    3828             :    npthreads(0) = nlthreads
    3829             :    proc_smp_map(0) = 0
    3830             : #endif
    3831             : 
    3832             : !
    3833             : ! Determine index range for dynamics blocks
    3834             : !
    3835        1536 :    call get_block_bounds_d(firstblock,lastblock)
    3836             : 
    3837             : !
    3838             : ! Determine maximum number of columns in a block
    3839             : !
    3840        1536 :    maxblksiz = 0
    3841     1181184 :    do jb=firstblock,lastblock
    3842     1181184 :       maxblksiz = max(maxblksiz,get_block_gcol_cnt_d(jb))
    3843             :    enddo
    3844             : 
    3845             : !
    3846             : !  determine which (and how many) processes are assigned
    3847             : !  dynamics blocks
    3848             : !
    3849        4608 :    allocate( proc_busy_d(0:npes-1) )
    3850     1181184 :    proc_busy_d = .false.
    3851        1536 :    nproc_busy_d = 0
    3852     1181184 :    do jb=firstblock,lastblock
    3853     1179648 :       p = get_block_owner_d(jb)
    3854     1181184 :       if (.not. proc_busy_d(p) ) then
    3855     1179648 :          proc_busy_d(p) = .true.
    3856     1179648 :          nproc_busy_d = nproc_busy_d + 1
    3857             :       endif
    3858             :    enddo
    3859             : 
    3860             : !
    3861             : ! Determine virtual SMP count and processes/virtual SMP map.
    3862             : !  If option 0 or >3, pretend that each SMP has only one process.
    3863             : !  If option 1, use SMP information.
    3864             : !  If option 2, pretend that all processes are in one SMP node.
    3865             : !  If option 3, pretend that each SMP node is made up of two
    3866             : !     processes, chosen to maximize load-balancing opportunities.
    3867             : !
    3868             : !  For all options < 5, if there are "idle" dynamics processes,
    3869             : !     assign them to the virtual SMP nodes in wrap fashion.
    3870             : !     Communication between the active and idle dynamics
    3871             : !     processes is scatter/gather (no communications between
    3872             : !     idle dynamics processes) so there is no advantage to
    3873             : !     blocking the idle processes in these assignments.
    3874             : !
    3875        1536 :    if ((opt <= 0) .or. (opt == 4)) then
    3876             : 
    3877             : !     assign active dynamics processes to virtual SMP nodes
    3878           0 :       nsmpx = 0
    3879           0 :       do p=0,npes-1
    3880           0 :          if (proc_busy_d(p)) then
    3881           0 :             proc_smp_mapx(p) = nsmpx
    3882           0 :             nsmpx = nsmpx + 1
    3883             :          endif
    3884             :       enddo
    3885             : !
    3886             : !     assign idle dynamics processes to virtual SMP nodes (wrap map)
    3887           0 :       nsmpy = 0
    3888           0 :       do p=0,npes-1
    3889           0 :          if (.not. proc_busy_d(p)) then
    3890           0 :             proc_smp_mapx(p) = nsmpy
    3891           0 :             nsmpy = mod(nsmpy+1,nsmpx)
    3892             :          endif
    3893             :       enddo
    3894             : 
    3895        1536 :    elseif (opt == 1) then
    3896             : 
    3897           0 :       allocate( smp_busy_d(0:nsmps-1) )
    3898           0 :       allocate( smp_smp_mapx(0:nsmps-1) )
    3899             : 
    3900             : !
    3901             : !     determine SMP nodes assigned dynamics blocks
    3902           0 :       smp_busy_d = .false.
    3903           0 :       do p=0,npes-1
    3904           0 :          if ( proc_busy_d(p) ) then
    3905           0 :             smp = proc_smp_map(p)
    3906           0 :             smp_busy_d(smp) = .true.
    3907             :          endif
    3908             :       enddo
    3909             : 
    3910             : !
    3911             : !     determine number of SMP nodes assigned dynamics blocks
    3912           0 :       nsmpx = 0
    3913           0 :       do smp=0,nsmps-1
    3914           0 :          if (smp_busy_d(smp)) then
    3915           0 :             smp_smp_mapx(smp) = nsmpx
    3916           0 :             nsmpx = nsmpx + 1
    3917             :          endif
    3918             :       enddo
    3919             : !
    3920             : !     assign processes in active dynamics SMP nodes to virtual SMP nodes
    3921           0 :       do p=0,npes-1
    3922           0 :          smp = proc_smp_map(p)
    3923           0 :          if (smp_busy_d(smp)) then
    3924           0 :             proc_smp_mapx(p) = smp_smp_mapx(smp)
    3925             :          endif
    3926             :       enddo
    3927             : !
    3928             : !     assign processes in idle dynamics SMP nodes to virtual SMP nodes (wrap map)
    3929           0 :       nsmpy = 0
    3930           0 :       do p=0,npes-1
    3931           0 :          smp = proc_smp_map(p)
    3932           0 :          if (.not. smp_busy_d(smp)) then
    3933           0 :             proc_smp_mapx(p) = nsmpy
    3934           0 :             nsmpy = mod(nsmpy+1,nsmpx)
    3935             :          endif
    3936             :       enddo
    3937             : !
    3938           0 :       deallocate( smp_busy_d )
    3939           0 :       deallocate( smp_smp_mapx )
    3940             : 
    3941        1536 :    elseif (opt == 2) then
    3942             : 
    3943        1536 :       nsmpx = 1
    3944     1181184 :       do p=0,npes-1
    3945     1181184 :          proc_smp_mapx(p) = 0
    3946             :       enddo
    3947             : 
    3948           0 :    elseif (opt == 3) then
    3949             : 
    3950             : !     find active process partners
    3951           0 :       proc_smp_mapx = -1
    3952           0 :       call find_partners(opt,proc_busy_d,nsmpx,proc_smp_mapx)
    3953             : !
    3954             : !     assign unassigned (idle dynamics) processes to virtual SMP nodes
    3955             : !     (wrap map)
    3956           0 :       nsmpy = 0
    3957           0 :       do p=0,npes-1
    3958           0 :          if (proc_smp_mapx(p) == -1) then
    3959           0 :             proc_smp_mapx(p) = nsmpy
    3960           0 :             nsmpy = mod(nsmpy+1,nsmpx)
    3961             :          endif
    3962             :       enddo
    3963             : 
    3964             :    else
    3965             : 
    3966           0 :       nsmpx = npes
    3967           0 :       do p=0,npes-1
    3968           0 :          proc_smp_mapx(p) = p
    3969             :       enddo
    3970             : 
    3971             :    endif
    3972             : !
    3973        1536 :    deallocate( proc_busy_d )
    3974             : 
    3975             : !
    3976             : ! Determine maximum number of processes assigned to a single
    3977             : ! virtual SMP node
    3978             : !
    3979        4608 :    allocate( nsmpprocs(0:nsmpx-1) )
    3980             : !
    3981        3072 :    nsmpprocs(:) = 0
    3982     1181184 :    do p=0,npes-1
    3983     1179648 :       smp = proc_smp_mapx(p)
    3984     1181184 :       nsmpprocs(smp) = nsmpprocs(smp) + 1
    3985             :    enddo
    3986        3072 :    max_nproc_smpx = maxval(nsmpprocs)
    3987             : !
    3988        1536 :    deallocate( nsmpprocs )
    3989             : 
    3990             : !
    3991             : ! Determine number of columns assigned to each
    3992             : ! virtual SMP in block decomposition
    3993             : 
    3994        4608 :    allocate( col_smp_mapx(ngcols) )
    3995             : !
    3996    84936192 :    col_smp_mapx(:) = -1
    3997        1536 :    error = .false.
    3998    84936192 :    do i=1,num_global_phys_cols
    3999    84934656 :       curgcol = latlon_to_dyn_gcol_map(i)
    4000    84934656 :       block_cnt = get_gcol_block_cnt_d(curgcol)
    4001    84934656 :       call get_gcol_block_d(curgcol,block_cnt,blockids,bcids)
    4002   169870848 :       do jb=1,block_cnt
    4003    84934656 :          p = get_block_owner_d(blockids(jb))
    4004   169869312 :          if (col_smp_mapx(i) == -1) then
    4005    84934656 :             col_smp_mapx(i) = proc_smp_mapx(p)
    4006           0 :          elseif (col_smp_mapx(i) /= proc_smp_mapx(p)) then
    4007           0 :             error = .true.
    4008             :          endif
    4009             :       enddo
    4010             :    end do
    4011        1536 :    if (error) then
    4012           0 :       write(iulog,*) "PHYS_GRID_INIT error: opt", opt, "specified, ", &
    4013           0 :                "but vertical decomposition not limited to virtual SMP"
    4014           0 :       call endrun()
    4015             :    endif
    4016             : !
    4017        4608 :    allocate( nsmpcolumns(0:nsmpx-1) )
    4018        3072 :    nsmpcolumns(:) = 0
    4019    84936192 :    do i=1,num_global_phys_cols
    4020    84934656 :       curgcol = latlon_to_dyn_gcol_map(i)
    4021    84934656 :       smp = col_smp_mapx(curgcol)
    4022    84936192 :       nsmpcolumns(smp) = nsmpcolumns(smp) + 1
    4023             :    end do
    4024             : !
    4025        1536 :    deallocate( col_smp_mapx )
    4026             : 
    4027             : !
    4028             : !  Allocate other work space
    4029             : !
    4030        4608 :    allocate( nsmpthreads(0:nsmpx-1) )
    4031        3072 :    allocate( nsmpchunks (0:nsmpx-1) )
    4032        3072 :    allocate( maxcol_chk (0:nsmpx-1) )
    4033        3072 :    allocate( maxcol_chks(0:nsmpx-1) )
    4034        3072 :    allocate( cid_offset (0:nsmpx-1) )
    4035        3072 :    allocate( local_cid  (0:nsmpx-1) )
    4036        4608 :    allocate( cols(1:maxblksiz) )
    4037             : !
    4038             : ! Options 0-3: split local dynamics blocks into chunks,
    4039             : !              using wrap-map assignment of columns and
    4040             : !              day/night and north/south column pairs
    4041             : !              to chunks to improve load balance
    4042             : !  Option 0: local is per process
    4043             : !  Option 1: local is subset of`processes assigned to same SMP node
    4044             : !  Option 2: local is global
    4045             : !  Option 3: local is pair of processes chosen to maximize load-balance
    4046             : !            wrt restriction that only communicate with one other
    4047             : !            process.
    4048             : ! Option 4: split local dynamics blocks into chunks,
    4049             : !           using block-map assignment of columns
    4050             : !
    4051        1536 :    if ((opt >= 0) .and. (opt <= 4)) then
    4052             : !
    4053             : ! Calculate number of threads available in each SMP node.
    4054             : !
    4055        3072 :       nsmpthreads(:) = 0
    4056     1181184 :       do p=0,npes-1
    4057     1179648 :          smp = proc_smp_mapx(p)
    4058     1181184 :          nsmpthreads(smp) = nsmpthreads(smp) + npthreads(p)
    4059             :       enddo
    4060             : !
    4061             : ! Determine number of chunks to keep all threads busy
    4062             : !
    4063        1536 :       nchunks = 0
    4064        3072 :       do smp=0,nsmpx-1
    4065        1536 :          nsmpchunks(smp) = nsmpcolumns(smp)/pcols
    4066        1536 :          if (mod(nsmpcolumns(smp), pcols) /= 0) then
    4067           0 :             nsmpchunks(smp) = nsmpchunks(smp) + 1
    4068             :          endif
    4069        1536 :          if (nsmpchunks(smp) < chunks_per_thread*nsmpthreads(smp)) then
    4070           0 :             nsmpchunks(smp) = chunks_per_thread*nsmpthreads(smp)
    4071             :          endif
    4072      591360 :          do while (mod(nsmpchunks(smp), nsmpthreads(smp)) /= 0)
    4073      589824 :             nsmpchunks(smp) = nsmpchunks(smp) + 1
    4074             :          enddo
    4075        1536 :          if (nsmpchunks(smp) > nsmpcolumns(smp)) then
    4076           0 :             nsmpchunks(smp) = nsmpcolumns(smp)
    4077             :          endif
    4078        3072 :          nchunks = nchunks + nsmpchunks(smp)
    4079             :       enddo
    4080             : !
    4081             : ! Determine maximum number of columns to assign to chunks
    4082             : ! in a given SMP
    4083             : !
    4084        3072 :       do smp=0,nsmpx-1
    4085        3072 :          if (nsmpchunks(smp) /= 0) then
    4086        1536 :             ntmp1 = nsmpcolumns(smp)/nsmpchunks(smp)
    4087        1536 :             ntmp2 = mod(nsmpcolumns(smp),nsmpchunks(smp))
    4088        1536 :             if (ntmp2 > 0) then
    4089        1536 :                maxcol_chk(smp) = ntmp1 + 1
    4090        1536 :                maxcol_chks(smp) = ntmp2
    4091             :             else
    4092           0 :                maxcol_chk(smp) = ntmp1
    4093           0 :                maxcol_chks(smp) = nsmpchunks(smp)
    4094             :             endif
    4095             :          else
    4096           0 :             maxcol_chk(smp) = 0
    4097           0 :             maxcol_chks(smp) = 0
    4098             :          endif
    4099             :       enddo
    4100             : !
    4101             : ! Allocate chunks and knuhcs data structures
    4102             : !
    4103        4608 :       allocate( chunks(1:nchunks) )
    4104        4608 :       allocate( knuhcs(1:ngcols) )
    4105             : !
    4106             : ! Initialize chunks and knuhcs data structures
    4107             : !
    4108     5899776 :       chunks(:)%ncols = 0
    4109    84936192 :       knuhcs(:)%chunkid = -1
    4110    84936192 :       knuhcs(:)%col = -1
    4111             : !
    4112             : ! Determine chunk id ranges for each SMP
    4113             : !
    4114        1536 :       cid_offset(0) = 1
    4115        1536 :       local_cid(0) = 0
    4116        1536 :       do smp=1,nsmpx-1
    4117           0 :          cid_offset(smp) = cid_offset(smp-1) + nsmpchunks(smp-1)
    4118        1536 :          local_cid(smp) = 0
    4119             :       enddo
    4120             : !
    4121             : ! Assign columns to chunks
    4122             : !
    4123     1181184 :       do jb=firstblock,lastblock
    4124     1179648 :          p = get_block_owner_d(jb)
    4125     1179648 :          smp = proc_smp_mapx(p)
    4126     1179648 :          blksiz = get_block_gcol_cnt_d(jb)
    4127     1179648 :          call get_block_gcol_d(jb,blksiz,cols)
    4128    86115840 :          do ib = 1,blksiz
    4129             : !
    4130             : ! Assign column to a chunk if not already assigned
    4131    84934656 :             curgcol = cols(ib)
    4132   169869312 :             if ((dyn_to_latlon_gcol_map(curgcol) /= -1) .and. &
    4133    86114304 :                 (knuhcs(curgcol)%chunkid == -1)) then
    4134             : !
    4135             : ! Find next chunk with space
    4136             : ! (maxcol_chks > 0 test necessary for opt=4 block map)
    4137    43646976 :                cid = cid_offset(smp) + local_cid(smp)
    4138    43646976 :                if (maxcol_chks(smp) > 0) then
    4139    43646976 :                   do while (chunks(cid)%ncols >=  maxcol_chk(smp))
    4140           0 :                      local_cid(smp) = mod(local_cid(smp)+1,nsmpchunks(smp))
    4141           0 :                      cid = cid_offset(smp) + local_cid(smp)
    4142             :                   enddo
    4143             :                else
    4144           0 :                   do while (chunks(cid)%ncols >=  maxcol_chk(smp)-1)
    4145           0 :                      local_cid(smp) = mod(local_cid(smp)+1,nsmpchunks(smp))
    4146           0 :                      cid = cid_offset(smp) + local_cid(smp)
    4147             :                   enddo
    4148             :                endif
    4149    43646976 :                chunks(cid)%ncols = chunks(cid)%ncols + 1
    4150    43646976 :                if (chunks(cid)%ncols == maxcol_chk(smp)) &
    4151     2359296 :                   maxcol_chks(smp) = maxcol_chks(smp) - 1
    4152             : !
    4153    43646976 :                i = chunks(cid)%ncols
    4154    43646976 :                chunks(cid)%gcol(i) = curgcol
    4155    43646976 :                chunks(cid)%lon(i)  = lon_p(curgcol)
    4156    43646976 :                chunks(cid)%lat(i)  = lat_p(curgcol)
    4157    43646976 :                knuhcs(curgcol)%chunkid = cid
    4158    43646976 :                knuhcs(curgcol)%col = i
    4159             : !
    4160    43646976 :                if (opt < 4) then
    4161             : !
    4162             : ! If space available, look to assign a load-balancing "twin" to same chunk
    4163    43646976 :                   if ( (chunks(cid)%ncols <  maxcol_chk(smp)) .and. &
    4164    87293952 :                        (maxcol_chks(smp) > 0) .and. (twin_alg > 0)) then
    4165             : 
    4166             :                      call find_twin(curgcol, smp, &
    4167    41287680 :                                     proc_smp_mapx, twingcol)
    4168             : 
    4169    41287680 :                      if (twingcol > 0) then
    4170    41287680 :                         chunks(cid)%ncols = chunks(cid)%ncols + 1
    4171    41287680 :                         if (chunks(cid)%ncols == maxcol_chk(smp)) &
    4172           0 :                            maxcol_chks(smp) = maxcol_chks(smp) - 1
    4173             : !
    4174    41287680 :                         i = chunks(cid)%ncols
    4175    41287680 :                         chunks(cid)%gcol(i) = twingcol
    4176    41287680 :                         chunks(cid)%lon(i) = lon_p(twingcol)
    4177    41287680 :                         chunks(cid)%lat(i) = lat_p(twingcol)
    4178    41287680 :                         knuhcs(twingcol)%chunkid = cid
    4179    41287680 :                         knuhcs(twingcol)%col = i
    4180             :                      endif
    4181             : !
    4182             :                   endif
    4183             : !
    4184             : ! Move on to next chunk (wrap map)
    4185    43646976 :                   local_cid(smp) = mod(local_cid(smp)+1,nsmpchunks(smp))
    4186             : !
    4187             :                endif
    4188             : !
    4189             :             endif
    4190             :          enddo
    4191             :       enddo
    4192             : !
    4193             :    else
    4194             : !
    4195             : ! Option 5: split individual dynamics blocks into chunks,
    4196             : !            assigning consecutive columns to the same chunk
    4197             : !
    4198             : ! Determine total number of chunks and
    4199             : ! number of chunks in each "SMP node"
    4200             : !  (assuming no vertical decomposition)
    4201           0 :       nchunks = 0
    4202           0 :       nsmpchunks(:) = 0
    4203           0 :       do j=firstblock,lastblock
    4204           0 :          blksiz = get_block_gcol_cnt_d(j)
    4205           0 :          nlchunks = blksiz/pcols
    4206           0 :          if (pcols*(blksiz/pcols) /= blksiz) then
    4207           0 :             nlchunks = nlchunks + 1
    4208             :          endif
    4209           0 :          nchunks = nchunks + nlchunks
    4210           0 :          p = get_block_owner_d(j)
    4211           0 :          nsmpchunks(p) = nsmpchunks(p) + nlchunks
    4212             :       enddo
    4213             : !
    4214             : ! Determine chunk id ranges for each SMP
    4215             : !
    4216           0 :       cid_offset(0) = 1
    4217           0 :       local_cid(0) = 0
    4218           0 :       do smp=1,nsmpx-1
    4219           0 :          cid_offset(smp) = cid_offset(smp-1) + nsmpchunks(smp-1)
    4220           0 :          local_cid(smp) = 0
    4221             :       enddo
    4222             : !
    4223             : ! Allocate chunks and knuhcs data structures
    4224             : !
    4225           0 :       allocate( chunks(1:nchunks) )
    4226           0 :       allocate( knuhcs(1:ngcols) )
    4227             : !
    4228             : ! Initialize chunks and knuhcs data structures
    4229             : !
    4230           0 :       knuhcs(:)%chunkid = -1
    4231           0 :       knuhcs(:)%col = -1
    4232           0 :       cid = 0
    4233           0 :       do jb=firstblock,lastblock
    4234           0 :          p = get_block_owner_d(jb)
    4235           0 :          smp = proc_smp_mapx(p)
    4236           0 :          blksiz = get_block_gcol_cnt_d(jb)
    4237           0 :          call get_block_gcol_d(jb,blksiz,cols)
    4238             : 
    4239           0 :          ib = 0
    4240           0 :          do while (ib < blksiz)
    4241             : 
    4242           0 :             cid = cid_offset(smp) + local_cid(smp)
    4243           0 :             max_ncols = min(pcols,blksiz-ib)
    4244             : 
    4245           0 :             ncols = 0
    4246           0 :             do i=1,max_ncols
    4247           0 :                ib = ib + 1
    4248             :                ! check whether global index is for a column that dynamics
    4249             :                ! intends to pass to the physics
    4250           0 :                curgcol = cols(ib)
    4251           0 :                if (dyn_to_latlon_gcol_map(curgcol) /= -1) then
    4252             :                   ! yes - then save the information
    4253           0 :                   ncols = ncols + 1
    4254           0 :                   chunks(cid)%gcol(ncols) = curgcol
    4255           0 :                   chunks(cid)%lon(ncols)  = lon_p(curgcol)
    4256           0 :                   chunks(cid)%lat(ncols)  = lat_p(curgcol)
    4257           0 :                   knuhcs(curgcol)%chunkid = cid
    4258           0 :                   knuhcs(curgcol)%col = ncols
    4259             :                endif
    4260             :             enddo
    4261           0 :             chunks(cid)%ncols = ncols
    4262             : 
    4263           0 :             local_cid(smp) = local_cid(smp) + 1
    4264             :          enddo
    4265             :       enddo
    4266             : !
    4267             : ! Set number of threads available in each "SMP node".
    4268             : !
    4269           0 :       do p=0,npes-1
    4270           0 :          nsmpthreads(p) = npthreads(p)
    4271             :       enddo
    4272             : !
    4273             :    endif
    4274             : !
    4275             : ! Assign chunks to processes.
    4276             : !
    4277             :    call assign_chunks(npthreads, nsmpx, proc_smp_mapx, &
    4278        1536 :                       nsmpthreads, nsmpchunks)
    4279             : !
    4280             : ! Clean up
    4281             : !
    4282        1536 :    deallocate( nsmpcolumns )
    4283        1536 :    deallocate( nsmpthreads )
    4284        1536 :    deallocate( nsmpchunks  )
    4285        1536 :    deallocate( maxcol_chk  )
    4286        1536 :    deallocate( maxcol_chks )
    4287        1536 :    deallocate( cid_offset  )
    4288        1536 :    deallocate( local_cid   )
    4289        1536 :    deallocate( cols )
    4290        1536 :    deallocate( knuhcs )
    4291             : 
    4292        1536 :    return
    4293        3072 :    end subroutine create_chunks
    4294             : !
    4295             : !========================================================================
    4296             : 
    4297           0 :    subroutine find_partners(opt, proc_busy_d, nsmpx, proc_smp_mapx)
    4298             : !-----------------------------------------------------------------------
    4299             : !
    4300             : ! Purpose: Divide processes into pairs, attempting to maximize the
    4301             : !          the number of columns in one process whose twins are in the
    4302             : !          other process.
    4303             : !
    4304             : ! Method: The day/night and north/south hemisphere complement is defined
    4305             : !         to be the column twin.
    4306             : !
    4307             : ! Author: Patrick Worley
    4308             : !
    4309             : !-----------------------------------------------------------------------
    4310        1536 :    use dyn_grid, only: get_gcol_block_cnt_d, get_gcol_block_d, &
    4311             :                        get_block_owner_d
    4312             :    use pmgrid, only: plev
    4313             : !------------------------------Arguments--------------------------------
    4314             :    integer, intent(in)  :: opt           ! chunking option
    4315             :    logical, intent(in)  :: proc_busy_d(0:npes-1)
    4316             :                                          ! active/idle dynamics process flags
    4317             :    integer, intent(out) :: nsmpx         ! calculated number of virtual
    4318             :                                          !  SMP nodes
    4319             :    integer, intent(out) :: proc_smp_mapx(0:npes-1)
    4320             :                                          ! process/virtual smp map
    4321             : !---------------------------Local workspace-----------------------------
    4322             :    integer :: gcol_latlon                ! physics column index (latlon sorted)
    4323             :    integer :: twingcol_latlon            ! physics column index (latlon sorted)
    4324             :    integer :: gcol, twingcol             ! physics column indices
    4325             :    integer :: lon, lat, twinlat          ! longitude and latitude indices
    4326             :    integer :: twinlon_off                ! estimate as to offset of twinlon
    4327             :                                          ! on a latitude line
    4328             :    integer :: block_cnt                  ! number of blocks containing data
    4329             :                                          ! for a given vertical column
    4330             :    integer :: blockids(plev+1)           ! block indices
    4331             :    integer :: bcids(plev+1)              ! block column indices
    4332             :    integer :: jb                         ! block index
    4333             :    integer :: p, twp                     ! process indices
    4334           0 :    integer :: col_proc_mapx(ngcols)      ! location of columns in
    4335             :                                          !  dynamics decomposition
    4336           0 :    integer :: twin_proc_mapx(ngcols)     ! location of column twins in
    4337             :                                          !  dynamics decomposition
    4338           0 :    integer :: twin_cnt(0:npes-1)         ! for each process, number of twins
    4339             :                                          !  in each of the other processes
    4340           0 :    logical :: assigned(0:npes-1)         ! flag indicating whether process
    4341             :                                          !  assigned to an SMP node yet
    4342             :    integer :: maxpartner, maxcnt         ! process with maximum number of
    4343             :                                          !  twins and this count
    4344             : 
    4345             :    logical :: error                      ! error flag
    4346             : !-----------------------------------------------------------------------
    4347             : !
    4348             : ! Determine process location of column and its twin in dynamics decomposition
    4349             : !
    4350           0 :    col_proc_mapx(:) = -1
    4351           0 :    twin_proc_mapx(:) = -1
    4352             : 
    4353           0 :    error = .false.
    4354           0 :    do gcol_latlon=1,num_global_phys_cols
    4355             : 
    4356             :       ! Assume latitude and longitude symmetries and that index manipulations
    4357             :       ! are sufficient to find partners. (Will be true for lon/lat grids.)
    4358           0 :       gcol = latlon_to_dyn_gcol_map(gcol_latlon)
    4359           0 :       lat = lat_p(gcol)
    4360           0 :       twinlat = clat_p_tot+1-lat
    4361           0 :       lon = lon_p(gcol)
    4362           0 :       twinlon_off = mod((lon-1)+(clat_p_cnt(twinlat)/2), clat_p_cnt(twinlat))
    4363           0 :       twingcol_latlon = clat_p_idx(twinlat) + twinlon_off
    4364           0 :       twingcol = latlon_to_dyn_gcol_map(twingcol_latlon)
    4365             : 
    4366           0 :       block_cnt = get_gcol_block_cnt_d(gcol)
    4367           0 :       call get_gcol_block_d(gcol,block_cnt,blockids,bcids)
    4368           0 :       do jb=1,block_cnt
    4369           0 :          p = get_block_owner_d(blockids(jb))
    4370           0 :          if (col_proc_mapx(gcol) == -1) then
    4371           0 :             col_proc_mapx(gcol) = p
    4372           0 :          elseif (col_proc_mapx(gcol) /= p) then
    4373           0 :             error = .true.
    4374             :          endif
    4375             :       enddo
    4376             : 
    4377           0 :       block_cnt = get_gcol_block_cnt_d(twingcol)
    4378           0 :       call get_gcol_block_d(twingcol,block_cnt,blockids,bcids)
    4379           0 :       do jb=1,block_cnt
    4380           0 :          p = get_block_owner_d(blockids(jb))
    4381           0 :          if (twin_proc_mapx(gcol) == -1) then
    4382           0 :             twin_proc_mapx(gcol) = p
    4383           0 :          elseif (twin_proc_mapx(gcol) /= p) then
    4384           0 :             error = .true.
    4385             :          endif
    4386             :       enddo
    4387             : 
    4388             :    end do
    4389             : 
    4390           0 :    if (error) then
    4391           0 :       if (masterproc) then
    4392           0 :          write(iulog,*) "PHYS_GRID_INIT error: opt", opt, "specified, ", &
    4393           0 :             "but vertical decomposition not limited to single process"
    4394             :       endif
    4395           0 :       call endrun()
    4396             :    endif
    4397             : 
    4398             : !
    4399             : ! Assign process pairs to SMPs, attempting to maximize the number of column,twin
    4400             : ! pairs in same SMP.
    4401             : !
    4402           0 :    assigned(:) = .false.
    4403           0 :    twin_cnt(:) = 0
    4404           0 :    nsmpx = 0
    4405           0 :    do p=0,npes-1
    4406           0 :       if ((.not. assigned(p)) .and. (proc_busy_d(p))) then
    4407             : !
    4408             : ! For each process, determine number of twins in each of the other processes
    4409             : ! (running over all columns multiple times to minimize memory requirements).
    4410             : !
    4411           0 :          do gcol_latlon=1,num_global_phys_cols
    4412           0 :             gcol = latlon_to_dyn_gcol_map(gcol_latlon)
    4413           0 :             if (col_proc_mapx(gcol) == p) then
    4414           0 :                twin_cnt(twin_proc_mapx(gcol)) = &
    4415           0 :                   twin_cnt(twin_proc_mapx(gcol)) + 1
    4416             :             endif
    4417             :          enddo
    4418             : !
    4419             : ! Find process with maximum number of twins that has not yet been designated
    4420             : ! a partner.
    4421             : !
    4422           0 :          maxpartner = -1
    4423           0 :          maxcnt = 0
    4424           0 :          do twp=0,npes-1
    4425           0 :             if ((.not. assigned(twp)) .and. (twp /= p)) then
    4426           0 :                if (twin_cnt(twp) >= maxcnt) then
    4427           0 :                   maxcnt = twin_cnt(twp)
    4428           0 :                   maxpartner = twp
    4429             :                endif
    4430             :             endif
    4431             :          enddo
    4432             : !
    4433             : ! Assign p and twp to the same SMP node
    4434             : !
    4435           0 :          if (maxpartner /= -1) then
    4436           0 :             assigned(p) = .true.
    4437           0 :             assigned(maxpartner) = .true.
    4438           0 :             proc_smp_mapx(p) = nsmpx
    4439           0 :             proc_smp_mapx(maxpartner) = nsmpx
    4440           0 :             nsmpx = nsmpx + 1
    4441             :          else
    4442           0 :             if (masterproc) then
    4443           0 :                write(iulog,*) "PHYS_GRID_INIT error: opt", opt, "specified, ", &
    4444           0 :                   "but could not divide processes into pairs."
    4445             :             endif
    4446           0 :             call endrun()
    4447             :          endif
    4448             : !
    4449             :       endif
    4450             : !
    4451             :    enddo
    4452             : !
    4453           0 :    return
    4454           0 :    end subroutine find_partners
    4455             : !
    4456             : !========================================================================
    4457             : 
    4458    82575360 :    subroutine find_twin(gcol, smp, proc_smp_mapx, twingcol_f)
    4459             : !-----------------------------------------------------------------------
    4460             : !
    4461             : ! Purpose: Find column that when paired with gcol in a chunk
    4462             : !          balances the load. A column is a candidate to be paired with
    4463             : !          gcol if it is in the same SMP node as gcol as defined
    4464             : !          by proc_smp_mapx.
    4465             : !
    4466             : ! Method: The day/night and north/south hemisphere complement is
    4467             : !         tried first. If it is not a candidate or if it has already been
    4468             : !         assigned, then the day/night complement is tried next. If that
    4469             : !         also is not available, then nothing is returned.
    4470             : !
    4471             : ! Author: Patrick Worley
    4472             : !
    4473             : !-----------------------------------------------------------------------
    4474           0 :    use dyn_grid, only: get_gcol_block_d, get_block_owner_d
    4475             : 
    4476             : !------------------------------Arguments--------------------------------
    4477             :    integer, intent(in)  :: gcol          ! global column index for column
    4478             :                                          ! seeking a twin for
    4479             :    integer, intent(in)  :: smp           ! index of SMP node
    4480             :                                          ! currently assigned to
    4481             :    integer, intent(in)  :: proc_smp_mapx(0:npes-1)
    4482             :                                          ! process/virtual smp map
    4483             :    integer, intent(out) :: twingcol_f
    4484             :                                          ! global column index for twin
    4485             : !---------------------------Local workspace-----------------------------
    4486             :    integer :: lon, lat                   ! global lon/lat indices for column
    4487             :                                          ! seeking a twin for
    4488             :    integer :: twinlon, twinlat           ! lon/lat indices of twin candidate
    4489             :    integer :: twinlon_off                ! estimate as to offset of twinlon
    4490             :                                          ! on a latitude line
    4491             :    logical :: found                      ! found flag
    4492             :    integer :: i                          ! loop index
    4493             :    integer :: upper, lower               ! search temporaries
    4494             :    integer :: twingcol_latlon            ! global physics column index (latlon sorted)
    4495             :    integer :: twingcol_lonlat            ! global physics column index (lonlat sorted)
    4496             :    integer :: twingcol                   ! global physics column indes
    4497             :    integer :: diff, min_diff, min_i      ! search temporaries
    4498    82575360 :    integer :: jbtwin(npes)               ! global block indices
    4499    82575360 :    integer :: ibtwin(npes)               ! global column indices
    4500             :    integer :: twinproc, twinsmp          ! process and smp ids
    4501             : 
    4502    82575360 :    integer :: clon_p_idx(clon_p_tot)     ! index in lonlat ordering for first
    4503             :                                          !  occurrence of longitude corresponding to
    4504             :                                          !  given latitude index
    4505             : 
    4506             :    real(r8):: twopi                      ! 2*pi
    4507             :    real(r8):: clat, twinclat             ! latitude and twin
    4508             :    real(r8):: clon, twinclon             ! longitude and twin
    4509             : 
    4510             : !-----------------------------------------------------------------------
    4511    41287680 :    twingcol_f = -1
    4512             : 
    4513             :    ! precompute clon_p_idx
    4514    41287680 :    clon_p_idx(1) = 1
    4515 11890851840 :    do i=2,clon_p_tot
    4516 11890851840 :       clon_p_idx(i) = clon_p_idx(i-1) + clon_p_cnt(i-1)
    4517             :    enddo
    4518             : !
    4519             : ! Try day/night and north/south hemisphere complement first
    4520             : !
    4521             :    ! determine twin latitude
    4522    41287680 :    lat = lat_p(gcol)
    4523    41287680 :    clat = clat_p(lat)
    4524    41287680 :    twinclat = -clat
    4525    41287680 :    twinlat = clat_p_tot+1-lat
    4526    41287680 :    if (clat_p(twinlat) == twinclat) then
    4527             :       found = .true.
    4528             :    else
    4529    15925248 :       found = .false.
    4530    15925248 :       upper = twinlat
    4531    15925248 :       lower = twinlat
    4532    15925248 :       if (upper < clat_p_tot) upper = twinlat + 1
    4533    15925248 :       if (lower > 1) lower = twinlat - 1
    4534             :    endif
    4535    57212928 :    do while (.not. found)
    4536    15925248 :       if      ((abs(clat_p(upper)-twinclat) < abs(clat_p(twinlat)-twinclat)) .and. &
    4537    41287680 :                (upper /= twinlat)) then
    4538           0 :          twinlat = upper
    4539           0 :          if (upper < clat_p_tot) then
    4540           0 :             upper = twinlat + 1
    4541             :          else
    4542             :             found = .true.
    4543             :          endif
    4544    15925248 :       else if ((abs(clat_p(lower)-twinclat) < abs(clat_p(twinlat)-twinclat)) .and. &
    4545             :                (lower /= twinlat))    then
    4546           0 :          twinlat = lower
    4547           0 :          if (lower > 1) then
    4548           0 :             lower = twinlat - 1
    4549             :          else
    4550             :             found = .true.
    4551             :          endif
    4552             :       else
    4553             :          found = .true.
    4554             :       endif
    4555             :     enddo
    4556             : 
    4557             :    ! determine twin longitude
    4558    41287680 :    twopi = 2.0_r8*pi
    4559    41287680 :    lon = lon_p(gcol)
    4560    41287680 :    clon = clon_p(lon)
    4561    41287680 :    twinclon = mod(clon+pi,twopi)
    4562    41287680 :    twinlon = mod((lon-1)+(clon_p_tot/2), clon_p_tot) + 1
    4563    41287680 :    if (clon_p(twinlon) == twinclon) then
    4564             :       found = .true.
    4565             :    else
    4566    22046208 :       found = .false.
    4567    22046208 :       upper = twinlon
    4568    22046208 :       lower = twinlon
    4569    22046208 :       if (upper < clon_p_tot) upper = twinlon + 1
    4570    22046208 :       if (lower > 1) lower = twinlon - 1
    4571             :    endif
    4572    63333888 :    do while (.not. found)
    4573    22046208 :       if      ((abs(clon_p(upper)-twinclon) < abs(clon_p(twinlon)-twinclon)) .and. &
    4574    41287680 :                (upper /= twinlon)) then
    4575           0 :          twinlon = upper
    4576           0 :          if (upper < clon_p_tot) then
    4577           0 :             upper = twinlon + 1
    4578             :          else
    4579             :             found = .true.
    4580             :          endif
    4581    22046208 :       else if ((abs(clon_p(lower)-twinclon) < abs(clon_p(twinlon)-twinclon)) .and. &
    4582             :                (lower /= twinlon))    then
    4583           0 :          twinlon = lower
    4584           0 :          if (lower > 1) then
    4585           0 :             lower = twinlon - 1
    4586             :          else
    4587             :             found = .true.
    4588             :          endif
    4589             :       else
    4590             :          found = .true.
    4591             :       endif
    4592             :    enddo
    4593             : 
    4594             :    ! first, look for an exact match (assuming latitude and longitude symmetries)
    4595    41287680 :    twinlon_off = mod((lon-1)+(clat_p_cnt(twinlat)/2), clat_p_cnt(twinlat))
    4596    41287680 :    twingcol_latlon = clat_p_idx(twinlat) + twinlon_off
    4597    41287680 :    twingcol = latlon_to_dyn_gcol_map(twingcol_latlon)
    4598             : 
    4599             :    ! otherwise, look around for an approximate match using lonlat sorted indices
    4600    41287680 :    if ((lon_p(twingcol) /= twinlon) .or. (lat_p(twingcol) /= twinlat)) then
    4601           0 :       twingcol_lonlat = clon_p_idx(twinlon)
    4602           0 :       twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
    4603           0 :       min_diff = abs(lat_p(twingcol) - twinlat)
    4604           0 :       min_i = 0
    4605           0 :       do i = 1, clon_p_cnt(twinlon)-1
    4606           0 :          twingcol_lonlat = clon_p_idx(twinlon)+i
    4607           0 :          twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
    4608           0 :          diff = abs(lat_p(twingcol) - twinlat)
    4609           0 :          if (diff < min_diff) then
    4610           0 :             min_diff = diff
    4611           0 :             min_i = i
    4612             :          endif
    4613             :       enddo
    4614           0 :       twingcol_lonlat = clon_p_idx(twinlon) + min_i
    4615           0 :       twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
    4616             :    endif
    4617             : 
    4618             :    ! Check whether twin and original are in same smp
    4619    41287680 :    found = .false.
    4620    41287680 :    call get_gcol_block_d(twingcol,npes,jbtwin,ibtwin)
    4621    41287680 :    twinproc = get_block_owner_d(jbtwin(1))
    4622    41287680 :    twinsmp  = proc_smp_mapx(twinproc)
    4623             : !
    4624    41287680 :    if ((twinsmp == smp) .and. &
    4625    41287680 :        (knuhcs(twingcol)%chunkid == -1)) then
    4626    41287680 :       found = .true.
    4627    41287680 :       twingcol_f = twingcol
    4628             :    endif
    4629             : !
    4630             : ! Try day/night complement next
    4631             :    if (.not. found) then
    4632             : 
    4633             :       ! first, look for an exact match (assuming longitude symmetries)
    4634           0 :       twinlon_off = mod((lon-1)+(clat_p_cnt(lat)/2), clat_p_cnt(lat))
    4635           0 :       twingcol_latlon = clat_p_idx(lat) + twinlon_off
    4636           0 :       twingcol = latlon_to_dyn_gcol_map(twingcol_latlon)
    4637             : 
    4638             :       ! otherwise, look around for an approximate match using lonlat
    4639             :       ! column ordering
    4640           0 :       if ((lon_p(twingcol) /= twinlon) .or. &
    4641           0 :           (lat_p(twingcol) /= lat)) then
    4642           0 :          twingcol_lonlat = clon_p_idx(twinlon)
    4643           0 :          twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
    4644           0 :          min_diff = abs(lat_p(twingcol) - lat)
    4645           0 :          min_i = 0
    4646           0 :          do i = 1, clon_p_cnt(twinlon)-1
    4647           0 :             twingcol_lonlat = clon_p_idx(twinlon)+i
    4648           0 :             twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
    4649           0 :             diff = abs(lat_p(twingcol) - lat)
    4650           0 :             if (diff < min_diff) then
    4651           0 :                min_diff = diff
    4652           0 :                min_i = i
    4653             :             endif
    4654             :          enddo
    4655           0 :          twingcol_lonlat = clon_p_idx(twinlon) + min_i
    4656           0 :          twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
    4657             :       endif
    4658             : !
    4659           0 :       call get_gcol_block_d(twingcol,npes,jbtwin,ibtwin)
    4660           0 :       twinproc = get_block_owner_d(jbtwin(1))
    4661           0 :       twinsmp  = proc_smp_mapx(twinproc)
    4662             : !
    4663           0 :       if ((twinsmp == smp) .and. &
    4664           0 :           (knuhcs(twingcol)%chunkid == -1)) then
    4665           0 :          found = .true.
    4666           0 :          twingcol_f = twingcol
    4667             :       endif
    4668             : !
    4669             :    endif
    4670             : !
    4671    41287680 :    return
    4672    41287680 :    end subroutine find_twin
    4673             : !
    4674             : !========================================================================
    4675             : 
    4676        3072 :    subroutine assign_chunks(npthreads, nsmpx, proc_smp_mapx, &
    4677        1536 :                             nsmpthreads, nsmpchunks)
    4678             : !-----------------------------------------------------------------------
    4679             : !
    4680             : ! Purpose: Assign chunks to processes, balancing the number of
    4681             : !          chunks per thread and minimizing the communication costs
    4682             : !          in dp_coupling subject to the restraint that columns
    4683             : !          do not migrate outside of the current SMP node.
    4684             : !
    4685             : ! Method:
    4686             : !
    4687             : ! Author: Patrick Worley
    4688             : !
    4689             : !-----------------------------------------------------------------------
    4690    41287680 :    use pmgrid, only: plev
    4691             :    use dyn_grid, only: get_gcol_block_cnt_d, get_gcol_block_d,&
    4692             :                        get_block_owner_d
    4693             : !------------------------------Arguments--------------------------------
    4694             :    integer, intent(in)  :: npthreads(0:npes-1)
    4695             :                                          ! number of OpenMP threads per process
    4696             :    integer, intent(in)  :: nsmpx         ! virtual smp count
    4697             :    integer, intent(in)  :: proc_smp_mapx(0:npes-1)
    4698             :                                          ! process/virtual smp map
    4699             :    integer, intent(in)  :: nsmpthreads(0:nsmpx-1)
    4700             :                                          ! number of OpenMP threads
    4701             :                                          ! per virtual SMP
    4702             :    integer, intent(in)  :: nsmpchunks(0:nsmpx-1)
    4703             :                                          ! number of chunks assigned
    4704             :                                          ! to a given virtual SMP
    4705             : !---------------------------Local workspace-----------------------------
    4706             :    integer :: i, jb, p                   ! loop indices
    4707             :    integer :: cid                        ! chunk id
    4708             :    integer :: smp                        ! SMP index
    4709             :    integer :: curgcol                    ! global column index
    4710             :    integer :: block_cnt                  ! number of blocks containing data
    4711             :                                          ! for a given vertical column
    4712             :    integer :: blockids(plev+1)           ! block indices
    4713             :    integer :: bcids(plev+1)              ! block column indices
    4714        3072 :    integer :: ntsks_smpx(0:nsmpx-1)      ! number of processes per virtual SMP
    4715        3072 :    integer :: smp_proc_mapx(0:nsmpx-1,max_nproc_smpx)
    4716             :                                          ! virtual smp to process id map
    4717        3072 :    integer :: cid_offset(0:nsmpx)        ! chunk id virtual smp offset
    4718        3072 :    integer :: ntmp1_smp(0:nsmpx-1)       ! minimum number of chunks per thread
    4719             :                                          !  in a virtual SMP
    4720        3072 :    integer :: ntmp2_smp(0:nsmpx-1)       ! number of extra chunks to be assigned
    4721             :                                          !  in a virtual SMP
    4722        3072 :    integer :: ntmp3_smp(0:nsmpx-1)       ! number of processes in a virtual
    4723             :                                          !  SMP that get more extra chunks
    4724             :                                          !  than the others
    4725        3072 :    integer :: ntmp4_smp(0:nsmpx-1)       ! number of extra chunks per process
    4726             :                                          !  in a virtual SMP
    4727             :    integer :: ntmp1, ntmp2               ! work variables
    4728             : !  integer :: npchunks(0:npes-1)         ! number of chunks to be assigned to
    4729             : !                                        !  a given process
    4730        3072 :    integer :: cur_npchunks(0:npes-1)     ! current number of chunks assigned
    4731             :                                          !  to a given process
    4732        1536 :    integer :: column_count(0:npes-1)     ! number of columns from current chunk
    4733             :                                          !  assigned to each process in dynamics
    4734             :                                          !  decomposition
    4735             : !-----------------------------------------------------------------------
    4736             : !
    4737             : ! Count number of processes per virtual SMP and determine virtual SMP
    4738             : ! to process id map
    4739             : !
    4740        3072 :    ntsks_smpx(:) = 0
    4741     2360832 :    smp_proc_mapx(:,:) = -1
    4742     1181184 :    do p=0,npes-1
    4743     1179648 :       smp = proc_smp_mapx(p)
    4744     1179648 :       ntsks_smpx(smp) = ntsks_smpx(smp) + 1
    4745     1181184 :       smp_proc_mapx(smp,ntsks_smpx(smp)) = p
    4746             :    enddo
    4747             : !
    4748             : ! Determine chunk id ranges for each virtual SMP
    4749             : !
    4750        1536 :    cid_offset(0) = 1
    4751        3072 :    do smp=1,nsmpx
    4752        3072 :       cid_offset(smp) = cid_offset(smp-1) + nsmpchunks(smp-1)
    4753             :    enddo
    4754             : !
    4755             : ! Determine number of chunks to assign to each process
    4756             : !
    4757        3072 :    do smp=0,nsmpx-1
    4758             : !
    4759             : ! Minimum number of chunks per thread
    4760        1536 :       ntmp1_smp(smp) = nsmpchunks(smp)/nsmpthreads(smp)
    4761             : 
    4762             : ! Number of extra chunks to be assigned
    4763        1536 :       ntmp2_smp(smp) = mod(nsmpchunks(smp),nsmpthreads(smp))
    4764             : 
    4765             : ! Number of processes that get more extra chunks than the others
    4766        1536 :       ntmp3_smp(smp) = mod(ntmp2_smp(smp),ntsks_smpx(smp))
    4767             : 
    4768             : ! Number of extra chunks per process
    4769        1536 :       ntmp4_smp(smp) = ntmp2_smp(smp)/ntsks_smpx(smp)
    4770        3072 :       if (ntmp3_smp(smp) > 0) then
    4771           0 :          ntmp4_smp(smp) = ntmp4_smp(smp) + 1
    4772             :       endif
    4773             :    enddo
    4774             : 
    4775     1181184 :    do p=0,npes-1
    4776     1179648 :       smp = proc_smp_mapx(p)
    4777             : 
    4778             : ! Update number of extra chunks
    4779     1179648 :       if (ntmp2_smp(smp) > ntmp4_smp(smp)) then
    4780           0 :          ntmp2_smp(smp) = ntmp2_smp(smp) - ntmp4_smp(smp)
    4781             :       else
    4782     1179648 :          ntmp4_smp(smp) = ntmp2_smp(smp)
    4783     1179648 :          ntmp2_smp(smp) = 0
    4784     1179648 :          ntmp3_smp(smp) = 0
    4785             :       endif
    4786             : 
    4787             : ! Set number of chunks
    4788     1179648 :       npchunks(p) = ntmp1_smp(smp)*npthreads(p) + ntmp4_smp(smp)
    4789             : 
    4790             : ! Update extra chunk increment
    4791     1181184 :       if (ntmp3_smp(smp) > 0) then
    4792           0 :          ntmp3_smp(smp) = ntmp3_smp(smp) - 1
    4793           0 :          if (ntmp3_smp(smp) == 0) then
    4794           0 :             ntmp4_smp(smp) = ntmp4_smp(smp) - 1
    4795             :          endif
    4796             :       endif
    4797             :    enddo
    4798             : 
    4799             : !
    4800             : ! Assign chunks to processes:
    4801             : !
    4802     1181184 :    cur_npchunks(:) = 0
    4803             : !
    4804        3072 :    do smp=0,nsmpx-1
    4805     5901312 :       do cid=cid_offset(smp),cid_offset(smp+1)-1
    4806             : !
    4807  4535746560 :          do i=1,ntsks_smpx(smp)
    4808  4529848320 :             p = smp_proc_mapx(smp,i)
    4809  4535746560 :             column_count(p) = 0
    4810             :          enddo
    4811             : !
    4812             : !  For each chunk, determine number of columns in each
    4813             : !  process within the dynamics.
    4814    90832896 :          do i=1,chunks(cid)%ncols
    4815    84934656 :             curgcol = chunks(cid)%gcol(i)
    4816    84934656 :             block_cnt = get_gcol_block_cnt_d(curgcol)
    4817    84934656 :             call get_gcol_block_d(curgcol,block_cnt,blockids,bcids)
    4818   175767552 :             do jb=1,block_cnt
    4819    84934656 :                p = get_block_owner_d(blockids(jb))
    4820   169869312 :                column_count(p) = column_count(p) + 1
    4821             :             enddo
    4822             :          enddo
    4823             : !
    4824             : !  Eliminate processes that already have their quota of chunks
    4825  4535746560 :          do i=1,ntsks_smpx(smp)
    4826  4529848320 :             p = smp_proc_mapx(smp,i)
    4827  4535746560 :             if (cur_npchunks(p) == npchunks(p)) then
    4828  2221330944 :                column_count(p) = -1
    4829             :             endif
    4830             :          enddo
    4831             : !
    4832             : !  Assign chunk to process with most
    4833             : !  columns from chunk, from among those still available
    4834     5898240 :          ntmp1 = -1
    4835     5898240 :          ntmp2 = -1
    4836  4535746560 :          do i=1,ntsks_smpx(smp)
    4837  4529848320 :             p = smp_proc_mapx(smp,i)
    4838  4535746560 :             if (column_count(p) > ntmp1) then
    4839    11008512 :                ntmp1 = column_count(p)
    4840    11008512 :                ntmp2 = p
    4841             :             endif
    4842             :          enddo
    4843     5898240 :          cur_npchunks(ntmp2) = cur_npchunks(ntmp2) + 1
    4844     5898240 :          chunks(cid)%owner   = ntmp2
    4845             : 
    4846             : !  Update total number of columns assigned to this process
    4847     5899776 :          gs_col_num(ntmp2)   = gs_col_num(ntmp2) + chunks(cid)%ncols
    4848             : !
    4849             :       enddo
    4850             : !
    4851             :    enddo
    4852             : !
    4853        1536 :    return
    4854        1536 :    end subroutine assign_chunks
    4855             : !
    4856             : !========================================================================
    4857             : 
    4858             : !#######################################################################
    4859             : 
    4860        1536 : end module phys_grid

Generated by: LCOV version 1.14