Line data Source code
1 : module phys_grid
2 : !-----------------------------------------------------------------------
3 : !
4 : ! Purpose: Definition of physics computational horizontal grid.
5 : !
6 : ! Method: Variables are private; interface routines used to extract
7 : ! information for use in user code.
8 : !
9 : ! Entry points:
10 : ! phys_grid_readnl read namelist options
11 : !
12 : ! phys_grid_init initialize chunk'ed data structure
13 : ! phys_grid_initialized get physgrid_set flag
14 : !
15 : ! get_chunk_indices_p get local chunk index range
16 : ! get_ncols_p get number of columns for a given chunk
17 : ! get_grid_dims return physics grid axis global sizes
18 : ! get_xxx_all_p get global indices, coordinates, or values
19 : ! for a given chunk
20 : ! get_xxx_vec_p get global indices, coordinates, or values
21 : ! for a subset of the columns in a chunk
22 : ! get_xxx_p get global indices, coordinates, or values
23 : ! for a single column
24 : ! where xxx is
25 : ! area for column surface area (in radians squared)
26 : ! gcol for global column index
27 : ! lat for global latitude index
28 : ! lon for global longitude index
29 : ! rlat for latitude coordinate (in radians)
30 : ! rlon for longitude coordinate (in radians)
31 : ! wght for column integration weight
32 : !
33 : ! scatter_field_to_chunk
34 : ! distribute field
35 : ! to decomposed chunk data structure
36 : ! gather_chunk_to_field
37 : ! reconstruct field
38 : ! from decomposed chunk data structure
39 : !
40 : ! read_chunk_from_field
41 : ! read and distribute field
42 : ! to decomposed chunk data structure
43 : ! write_field_from_chunk
44 : ! write field
45 : ! from decomposed chunk data structure
46 : !
47 : ! block_to_chunk_send_pters
48 : ! return pointers into send buffer where data
49 : ! from decomposed fields should
50 : ! be copied to
51 : ! block_to_chunk_recv_pters
52 : ! return pointers into receive buffer where data
53 : ! for decomposed chunk data structures should
54 : ! be copied from
55 : ! transpose_block_to_chunk
56 : ! transpose buffer containing decomposed
57 : ! fields to buffer
58 : ! containing decomposed chunk data structures
59 : !
60 : ! chunk_to_block_send_pters
61 : ! return pointers into send buffer where data
62 : ! from decomposed chunk data structures should
63 : ! be copied to
64 : ! chunk_to_block_recv_pters
65 : ! return pointers into receive buffer where data
66 : ! for decomposed fields should
67 : ! be copied from
68 : ! transpose_chunk_to_block
69 : ! transpose buffer containing decomposed
70 : ! chunk data structures to buffer
71 : ! containing decomposed fields
72 : !
73 : ! chunk_index identify whether index is for a latitude or
74 : ! a chunk
75 : !
76 : ! FOLLOWING ARE NO LONGER USED, AND ARE CURRENTLY COMMENTED OUT
77 : ! get_gcol_owner_p get owner of column
78 : ! for given global physics column index
79 : !
80 : ! buff_to_chunk Copy from local buffer to local chunk data
81 : ! structure. (Needed for cpl6.)
82 : !
83 : ! chunk_to_buff Copy from local chunk data structure to
84 : ! local buffer. (Needed for cpl6.)
85 : !
86 : ! Author: Patrick Worley and John Drake
87 : !
88 : !-----------------------------------------------------------------------
89 : use shr_kind_mod, only: r8 => shr_kind_r8, r4 => shr_kind_r4
90 : use physconst, only: pi
91 : use ppgrid, only: pcols, pver, begchunk, endchunk
92 : #if ( defined SPMD )
93 : use spmd_dyn, only: block_buf_nrecs, chunk_buf_nrecs, &
94 : local_dp_map
95 : use mpishorthand
96 : #endif
97 : use spmd_utils, only: iam, masterproc, npes, proc_smp_map, nsmps
98 : use m_MergeSorts, only: IndexSet, IndexSort
99 : use cam_abortutils, only: endrun
100 : use perf_mod
101 : use cam_logfile, only: iulog
102 :
103 : implicit none
104 : save
105 :
106 : #if ( ! defined SPMD )
107 : integer, private :: block_buf_nrecs
108 : integer, private :: chunk_buf_nrecs
109 : logical, private :: local_dp_map=.true.
110 : #endif
111 :
112 : ! The identifier for the physics grid
113 : integer, parameter, public :: phys_decomp = 100
114 : integer, parameter, public :: phys_decomp_scm = 200
115 :
116 : ! dynamics field grid information
117 : integer, private :: hdim1_d, hdim2_d
118 : ! dimensions of rectangular horizontal grid
119 : ! data structure, If 1D data structure, then
120 : ! hdim2_d == 1.
121 :
122 : ! physics field data structures
123 : integer, private :: ngcols ! global column count in physics grid (all)
124 : integer, public :: num_global_phys_cols ! global column count in phys grid
125 : ! (without holes)
126 :
127 : integer, dimension(:), allocatable, private :: dyn_to_latlon_gcol_map
128 : ! map from unsorted (dynamics) to lat/lon sorted grid indices
129 : integer, dimension(:), allocatable, private :: latlon_to_dyn_gcol_map
130 : ! map from lat/lon sorted grid to unsorted (dynamics) indices
131 : integer, dimension(:), allocatable, private :: lonlat_to_dyn_gcol_map
132 : ! map from lon/lat sorted grid to unsorted (dynamics) indices
133 :
134 : integer, private :: clat_p_tot ! number of unique latitudes
135 : integer, private :: clon_p_tot ! number of unique longitudes
136 :
137 : integer, dimension(:), allocatable, private :: clat_p_cnt ! number of repeats for each latitude
138 : integer, dimension(:), allocatable, private :: clat_p_idx ! index in latlon ordering for first occurence
139 : ! of latitude corresponding to given
140 : ! latitude index
141 : real(r8), dimension(:), allocatable :: clat_p ! unique latitudes (radians, increasing)
142 :
143 :
144 : integer, dimension(:), allocatable, private :: clon_p_cnt ! number of repeats for each longitude
145 : real(r8), dimension(:), allocatable :: clon_p ! unique longitudes (radians, increasing)
146 :
147 : integer, dimension(:), allocatable, private :: lat_p ! index into list of unique column latitudes
148 : integer, dimension(:), allocatable, private :: lon_p ! index into list of unique column longitudes
149 :
150 : ! chunk data structures
151 : type chunk
152 : integer :: ncols ! number of vertical columns
153 : integer :: gcol(pcols) ! global physics column indices
154 : integer :: lon(pcols) ! global longitude indices
155 : integer :: lat(pcols) ! global latitude indices
156 : integer :: owner ! id of process where chunk assigned
157 : integer :: lcid ! local chunk index
158 : end type chunk
159 :
160 : integer :: nchunks ! global chunk count
161 : type (chunk), dimension(:), allocatable, public :: chunks
162 : ! global computational grid
163 :
164 : integer, dimension(:), allocatable, private :: npchunks
165 : ! number of chunks assigned to each process
166 :
167 : type lchunk
168 : integer :: ncols ! number of vertical columns
169 : integer :: cid ! global chunk index
170 : integer :: gcol(pcols) ! global physics column indices
171 : real(r8) :: area(pcols) ! column surface area (from dynamics)
172 : real(r8) :: wght(pcols) ! column integration weight (from dynamics)
173 : end type lchunk
174 :
175 : integer, private :: nlchunks ! local chunk count
176 : type (lchunk), dimension(:), allocatable, private :: lchunks
177 : ! local chunks
178 :
179 : type knuhc
180 : integer :: chunkid ! chunk id
181 : integer :: col ! column index in chunk
182 : end type knuhc
183 :
184 : type (knuhc), dimension(:), allocatable, private :: knuhcs
185 : ! map from global column indices
186 : ! to chunk'ed grid
187 :
188 : ! column mapping data structures
189 : type column_map
190 : integer :: chunk ! global chunk index
191 : integer :: ccol ! column ordering in chunk
192 : end type column_map
193 :
194 : integer, private :: nlcols ! local column count
195 : type (column_map), dimension(:), allocatable, private :: pgcols
196 : ! ordered list of columns (for use in gather/scatter)
197 : ! NOTE: consistent with local ordering
198 :
199 : ! column remap data structures
200 : integer, dimension(:), allocatable, private :: gs_col_num
201 : ! number of columns scattered to each process in
202 : ! field_to_chunk scatter
203 : integer, dimension(:), allocatable, private :: gs_col_offset
204 : ! offset of columns (-1) in pgcols scattered to
205 : ! each process in field_to_chunk scatter
206 :
207 : integer, dimension(:), allocatable, private :: btofc_blk_num
208 : ! number of grid points scattered to each process in
209 : ! block_to_chunk alltoallv, and gathered from each
210 : ! process in chunk_to_block alltoallv
211 :
212 : integer, dimension(:), allocatable, private :: btofc_chk_num
213 : ! number of grid points gathered from each process in
214 : ! block_to_chunk alltoallv, and scattered to each
215 : ! process in chunk_to_block alltoallv
216 :
217 : type btofc_pters
218 : integer :: ncols ! number of columns in block
219 : integer :: nlvls ! number of levels in columns
220 : integer, dimension(:,:), pointer :: pter
221 : end type btofc_pters
222 : type (btofc_pters), dimension(:), allocatable, private :: btofc_blk_offset
223 : ! offset in btoc send array (-1) where
224 : ! (blockid, bcid, k) column should be packed in
225 : ! block_to_chunk alltoallv, AND
226 : ! offset in ctob receive array (-1) from which
227 : ! (blockid, bcid, k) column should be unpacked in
228 : ! chunk_to_block alltoallv
229 :
230 : type (btofc_pters), dimension(:), allocatable, private :: btofc_chk_offset
231 : ! offset in btoc receive array (-1) from which
232 : ! (lcid, i, k) data should be unpacked in
233 : ! block_to_chunk alltoallv, AND
234 : ! offset in ctob send array (-1) where
235 : ! (lcid, i, k) data should be packed in
236 : ! chunk_to_block alltoallv
237 :
238 : ! miscellaneous phys_grid data
239 : integer, private :: dp_coup_steps ! number of swaps in transpose algorithm
240 : integer, dimension(:), private, allocatable :: dp_coup_proc
241 : ! swap partner in each step of
242 : ! transpose algorithm
243 : logical :: physgrid_set = .false. ! flag indicates physics grid has been set
244 : integer, private :: max_nproc_smpx ! maximum number of processes assigned to a
245 : ! single virtual SMP used to define physics
246 : ! load balancing
247 : integer, private :: nproc_busy_d ! number of processes active during the dynamics
248 : ! (assigned a dynamics block)
249 :
250 : ! Physics grid decomposition options:
251 : ! -1: each chunk is a dynamics block
252 : ! 0: chunk definitions and assignments do not require interprocess comm.
253 : ! 1: chunk definitions and assignments do not require internode comm.
254 : ! 2: chunk definitions and assignments may require communication between all processes
255 : ! 3: chunk definitions and assignments only require communication with one other process
256 : ! 4: concatenated blocks, no load balancing, no interprocess communication
257 : integer, private, parameter :: min_lbal_opt = -1
258 : integer, private, parameter :: max_lbal_opt = 5
259 : integer, private, parameter :: def_lbal_opt = 2 ! default
260 : integer, private :: lbal_opt = def_lbal_opt
261 :
262 : ! Physics grid load balancing options:
263 : ! 0: assign columns to chunks as single columns, wrap mapped across chunks
264 : ! 1: use (day/night; north/south) twin algorithm to determine load-balanced pairs of
265 : ! columns and assign columns to chunks in pairs, wrap mapped
266 : integer, private, parameter :: min_twin_alg = 0
267 : integer, private, parameter :: max_twin_alg = 1
268 : integer, private, parameter :: def_twin_alg_lonlat = 1 ! default
269 : integer, private, parameter :: def_twin_alg_unstructured = 0
270 : integer, private :: twin_alg = def_twin_alg_lonlat
271 :
272 : ! target number of chunks per thread
273 : integer, private, parameter :: min_chunks_per_thread = 1
274 : integer, private, parameter :: def_chunks_per_thread = &
275 : min_chunks_per_thread ! default
276 : integer, private :: chunks_per_thread = def_chunks_per_thread
277 :
278 : ! Dynamics/physics transpose method for nonlocal load-balance:
279 : ! -1: use "0" if max_nproc_smpx and nproc_busy_d are both > npes/2; otherwise use "1"
280 : ! 0: use mpi_alltoallv
281 : ! 1: use point-to-point MPI-1 two-sided implementation
282 : ! 2: use point-to-point MPI-2 one-sided implementation if supported,
283 : ! otherwise use MPI-1 implementation
284 : ! 3: use Co-Array Fortran implementation if supported,
285 : ! otherwise use MPI-1 implementation
286 : ! 11-13: use mod_comm, choosing any of several methods internal to mod_comm.
287 : ! The method within mod_comm (denoted mod_method) has possible values 0,1,2 and
288 : ! is set according to mod_method = phys_alltoall - modmin_alltoall, where
289 : ! modmin_alltoall is 11.
290 : integer, private, parameter :: min_alltoall = -1
291 : integer, private, parameter :: max_alltoall = 3
292 : # if defined(MODCM_DP_TRANSPOSE)
293 : integer, private, parameter :: modmin_alltoall = 11
294 : integer, private, parameter :: modmax_alltoall = 13
295 : # endif
296 : integer, private, parameter :: def_alltoall = -1 ! default
297 : integer, private :: phys_alltoall = def_alltoall
298 :
299 : logical :: calc_memory_increase = .false.
300 :
301 : !========================================================================
302 : contains
303 : !========================================================================
304 :
305 1536 : subroutine phys_grid_readnl(nlfile)
306 :
307 : use namelist_utils, only: find_group_name
308 : use units, only: getunit, freeunit
309 : use spmd_utils, only: mpicom, mstrid=>masterprocid, mpi_integer, &
310 : phys_mirror_decomp_req
311 : #if defined(MODCM_DP_TRANSPOSE)
312 : use mod_comm, only: phys_transpose_mod
313 : #endif
314 : use dycore, only: dycore_is
315 :
316 : character(len=*), intent(in) :: nlfile ! filepath for file containing namelist input
317 :
318 : ! Local variables
319 : integer :: unitn, ierr
320 : character(len=*), parameter :: sub = 'phys_grid_readnl'
321 :
322 : integer :: phys_loadbalance
323 : integer :: phys_twin_algorithm
324 : integer :: phys_chnk_per_thd
325 :
326 : namelist /phys_grid_nl/ phys_alltoall, phys_loadbalance, phys_twin_algorithm, &
327 : phys_chnk_per_thd
328 : !-----------------------------------------------------------------------------
329 :
330 : ! Initialize namelist vars
331 1536 : phys_loadbalance = def_lbal_opt
332 :
333 1536 : if (dycore_is('UNSTRUCTURED')) then
334 0 : phys_twin_algorithm = def_twin_alg_unstructured
335 : else
336 1536 : phys_twin_algorithm = def_twin_alg_lonlat
337 : endif
338 :
339 1536 : phys_chnk_per_thd = def_chunks_per_thread
340 :
341 : ! Read namelist
342 1536 : if (masterproc) then
343 2 : unitn = getunit()
344 2 : open( unitn, file=trim(nlfile), status='old' )
345 2 : call find_group_name(unitn, 'phys_grid_nl', status=ierr)
346 2 : if (ierr == 0) then
347 0 : read(unitn, phys_grid_nl, iostat=ierr)
348 0 : if (ierr /= 0) then
349 0 : call endrun(sub//': FATAL: reading namelist')
350 : end if
351 : end if
352 2 : close(unitn)
353 2 : call freeunit(unitn)
354 : end if
355 :
356 1536 : call mpi_bcast(phys_alltoall, 1, mpi_integer, mstrid, mpicom, ierr)
357 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: phys_alltoall")
358 1536 : call mpi_bcast(phys_loadbalance, 1, mpi_integer, mstrid, mpicom, ierr)
359 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: phys_loadbalance")
360 1536 : call mpi_bcast(phys_twin_algorithm, 1, mpi_integer, mstrid, mpicom, ierr)
361 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: phys_twin_algorithm")
362 1536 : call mpi_bcast(phys_chnk_per_thd, 1, mpi_integer, mstrid, mpicom, ierr)
363 1536 : if (ierr /= 0) call endrun(sub//": FATAL: mpi_bcast: phys_chnk_per_thd")
364 :
365 : ! set module variables from namelist vars
366 :
367 1536 : lbal_opt = phys_loadbalance
368 :
369 1536 : if (lbal_opt == 3) then
370 0 : phys_mirror_decomp_req = .true.
371 : else
372 1536 : phys_mirror_decomp_req = .false.
373 : endif
374 :
375 1536 : twin_alg = phys_twin_algorithm
376 :
377 1536 : chunks_per_thread = phys_chnk_per_thd
378 :
379 : ! Some consistency checks
380 :
381 1536 : if (((phys_alltoall < min_alltoall) .or. &
382 : (phys_alltoall > max_alltoall)) &
383 : # if defined(MODCM_DP_TRANSPOSE)
384 : .and. &
385 : ((phys_alltoall < modmin_alltoall) .or. &
386 : (phys_alltoall > modmax_alltoall)) &
387 : # endif
388 : ) then
389 0 : if (masterproc) then
390 0 : write(iulog,*) sub//': ERROR: phys_alltoall=', phys_alltoall, &
391 0 : ' is out of range. It must be between ', min_alltoall, ' and ', max_alltoall
392 : endif
393 0 : call endrun(sub//': ERROR setting phys_alltoall')
394 : endif
395 : #if defined(SPMD)
396 : #if defined(MODCM_DP_TRANSPOSE)
397 : phys_transpose_mod = phys_alltoall
398 : #endif
399 : #endif
400 :
401 1536 : if ((lbal_opt < min_lbal_opt).or.(lbal_opt > max_lbal_opt)) then
402 0 : if (masterproc) then
403 0 : write(iulog,*) sub//': ERROR: phys_loadbalance=', phys_loadbalance, &
404 0 : ' is out of range. It must be between ', min_lbal_opt, ' and ', max_lbal_opt
405 : endif
406 0 : call endrun(sub//': ERROR setting phys_loadbalance')
407 : endif
408 :
409 1536 : if ((twin_alg < min_twin_alg).or.(twin_alg > max_twin_alg)) then
410 0 : if (masterproc) then
411 0 : write(iulog,*) sub//': ERROR: phys_twin_algorithm=', phys_twin_algorithm, &
412 0 : ' is out of range. It must be between ', min_twin_alg, ' and ', max_twin_alg
413 : endif
414 0 : call endrun(sub//': ERROR setting phys_twin_algorithm')
415 : endif
416 :
417 1536 : if (chunks_per_thread < min_chunks_per_thread) then
418 0 : if (masterproc) then
419 0 : write(iulog,*) sub//': ERROR: phys_chnk_per_thd=', phys_chnk_per_thd, &
420 0 : ' is too small. It must not be smaller than ', min_chunks_per_thread
421 : endif
422 0 : call endrun(sub//': ERROR setting phys_chnk_per_thd')
423 : endif
424 :
425 :
426 1536 : if (masterproc) then
427 2 : write(iulog,*) 'PHYS_GRID options:'
428 2 : write(iulog,*) ' Using PCOLS =', pcols
429 2 : write(iulog,*) ' phys_loadbalance =', lbal_opt
430 2 : write(iulog,*) ' phys_twin_algorithm =', twin_alg
431 2 : write(iulog,*) ' phys_alltoall =', phys_alltoall
432 2 : write(iulog,*) ' chunks_per_thread =', chunks_per_thread
433 : end if
434 :
435 1536 : end subroutine phys_grid_readnl
436 :
437 : !===============================================================================
438 :
439 105984 : integer function get_nlcols_p()
440 105984 : get_nlcols_p = nlcols
441 105984 : end function get_nlcols_p
442 :
443 1536 : subroutine phys_grid_init( )
444 : !-----------------------------------------------------------------------
445 : !
446 : ! Purpose: Physics mapping initialization routine:
447 : !
448 : ! Method:
449 : !
450 : ! Author: John Drake and Patrick Worley
451 : !
452 : !-----------------------------------------------------------------------
453 : use mpi, only: MPI_REAL8, MPI_MAX
454 : use shr_mem_mod, only: shr_mem_getusage
455 : use shr_scam_mod, only: shr_scam_GetCloseLatLon
456 : use scamMod, only: closeioplonidx, closeioplatidx, single_column
457 : use pmgrid, only: plev
458 : use dycore, only: dycore_is
459 : use dyn_grid, only: get_block_bounds_d, &
460 : get_block_gcol_d, get_block_gcol_cnt_d, &
461 : get_block_levels_d, get_block_lvl_cnt_d, &
462 : get_block_owner_d, &
463 : get_gcol_block_d, get_gcol_block_cnt_d, &
464 : get_horiz_grid_dim_d, get_horiz_grid_d, physgrid_copy_attributes_d
465 : use spmd_utils, only: pair, ceil2, masterprocid, mpicom
466 : use cam_grid_support, only: cam_grid_register, cam_grid_attribute_register
467 : use cam_grid_support, only: iMap, max_hcoordname_len
468 : use cam_grid_support, only: horiz_coord_t, horiz_coord_create
469 : use cam_grid_support, only: cam_grid_attribute_copy, cam_grid_attr_exists
470 :
471 : !
472 : !------------------------------Arguments--------------------------------
473 : !
474 : !
475 : !---------------------------Local workspace-----------------------------
476 : !
477 : integer :: i, j, jb, k, p ! loop indices
478 : integer :: pre_i ! earlier index in loop iteration
479 : integer :: clat_p_dex, clon_p_dex ! indices into unique lat. and lon. arrays
480 : integer :: maxblksiz ! maximum number of columns in a dynamics block
481 : integer :: beg_dex, end_dex ! index range
482 : integer :: cid, lcid ! global and local chunk ids
483 : integer :: max_ncols ! upper bound on number of columns in a block
484 : integer :: ncols ! number of columns in current chunk
485 : integer :: curgcol, curgcol_d ! current global column index
486 : integer :: firstblock, lastblock ! global block indices
487 : integer :: blksiz ! current block size
488 : integer :: glbcnt, curcnt ! running grid point counts
489 : integer :: curp ! current process id
490 : integer :: block_cnt ! number of blocks containing data
491 : ! for a given vertical column
492 : integer :: numlvl ! number of vertical levels in block
493 : ! column
494 : integer :: levels(plev+1) ! vertical level indices
495 : integer :: owner_d ! process owning given block column
496 : integer :: owner_p ! process owning given chunk column
497 : integer :: blockids(plev+1) ! block indices
498 : integer :: bcids(plev+1) ! block column indices
499 :
500 :
501 : ! column surface area (from dynamics)
502 1536 : real(r8), dimension(:), pointer :: area_d
503 :
504 : ! column surface areawt (from dynamics)
505 1536 : real(r8), dimension(:), pointer :: areawt_d
506 :
507 : ! column integration weight (from dynamics)
508 1536 : real(r8), dimension(:), allocatable :: wght_d
509 :
510 : ! chunk global ordering
511 1536 : integer, dimension(:), allocatable :: pchunkid
512 :
513 : ! permutation array used in physics column sorting;
514 : ! reused later as work space in (lbal_opt == -1) logic
515 1536 : integer, dimension(:), allocatable :: cdex
516 :
517 : ! latitudes and longitudes and column area for dynamics columns
518 1536 : real(r8), dimension(:), allocatable :: clat_d
519 1536 : real(r8), dimension(:), allocatable :: clon_d
520 1536 : real(r8), dimension(:), allocatable :: lat_d
521 1536 : real(r8), dimension(:), allocatable :: lon_d
522 : real(r8) :: clat_p_tmp
523 : real(r8) :: clon_p_tmp
524 :
525 : ! Maps and values for physics grid
526 1536 : real(r8), pointer :: lonvals(:)
527 1536 : real(r8), pointer :: latvals(:)
528 1536 : real(r8), allocatable :: latdeg_p(:)
529 1536 : real(r8), allocatable :: londeg_p(:)
530 1536 : integer(iMap), pointer :: grid_map(:,:)
531 1536 : integer(iMap), pointer :: grid_map_scm(:,:)
532 1536 : integer(iMap), allocatable :: coord_map(:)
533 : type(horiz_coord_t), pointer :: lat_coord
534 : type(horiz_coord_t), pointer :: lon_coord
535 : integer :: gcols(pcols)
536 1536 : character(len=max_hcoordname_len), pointer :: copy_attributes(:)
537 : character(len=max_hcoordname_len) :: copy_gridname
538 : logical :: unstructured
539 : real(r8) :: lonmin, latmin
540 : real(r8) :: mem_hw_beg, mem_hw_end
541 : real(r8) :: mem_beg, mem_end
542 :
543 1536 : nullify(area_d)
544 1536 : nullify(lonvals)
545 1536 : nullify(latvals)
546 1536 : nullify(grid_map)
547 0 : if (single_column) nullify(grid_map_scm)
548 1536 : nullify(lat_coord)
549 1536 : nullify(lon_coord)
550 :
551 1536 : if (calc_memory_increase) then
552 0 : call shr_mem_getusage(mem_hw_beg, mem_beg)
553 : end if
554 :
555 1536 : call t_startf("phys_grid_init")
556 :
557 : !-----------------------------------------------------------------------
558 : !
559 : ! Initialize physics grid, using dynamics grid
560 : ! a) column coordinates
561 :
562 1536 : call get_horiz_grid_dim_d(hdim1_d,hdim2_d)
563 1536 : ngcols = hdim1_d*hdim2_d
564 4608 : allocate( clat_d(1:ngcols) )
565 3072 : allocate( clon_d(1:ngcols) )
566 3072 : allocate( lat_d(1:ngcols) )
567 3072 : allocate( lon_d(1:ngcols) )
568 4608 : allocate( cdex(1:ngcols) )
569 84936192 : clat_d = 100000.0_r8
570 84936192 : clon_d = 100000.0_r8
571 1536 : call get_horiz_grid_d(ngcols, clat_d_out=clat_d, clon_d_out=clon_d, lat_d_out=lat_d, lon_d_out=lon_d)
572 84937728 : latmin = minval(lat_d)
573 84937728 : lonmin = minval(lon_d)
574 :
575 : ! count number of "real" column indices
576 1536 : num_global_phys_cols = 0
577 84936192 : do i=1,ngcols
578 84936192 : if (clon_d(i) < 100000.0_r8) then
579 84934656 : num_global_phys_cols = num_global_phys_cols + 1
580 : endif
581 : enddo
582 :
583 : ! sort over longitude and identify unique longitude coordinates
584 1536 : call IndexSet(ngcols,cdex)
585 1536 : call IndexSort(ngcols,cdex,clon_d,descend=.false.)
586 1536 : clon_p_tmp = clon_d(cdex(1))
587 1536 : clon_p_tot = 1
588 :
589 84934656 : do i=2,num_global_phys_cols
590 84934656 : if (clon_d(cdex(i)) > clon_p_tmp) then
591 440832 : clon_p_tot = clon_p_tot + 1
592 440832 : clon_p_tmp = clon_d(cdex(i))
593 : endif
594 : enddo
595 :
596 4608 : allocate( clon_p(1:clon_p_tot) )
597 4608 : allocate( clon_p_cnt(1:clon_p_tot) )
598 3072 : allocate( londeg_p(1:clon_p_tot) )
599 :
600 1536 : pre_i = 1
601 1536 : clon_p_tot = 1
602 1536 : clon_p(1) = clon_d(cdex(1))
603 1536 : londeg_p(1) = lon_d(cdex(1))
604 84934656 : do i=2,num_global_phys_cols
605 84934656 : if (clon_d(cdex(i)) > clon_p(clon_p_tot)) then
606 440832 : clon_p_cnt(clon_p_tot) = i-pre_i
607 440832 : pre_i = i
608 440832 : clon_p_tot = clon_p_tot + 1
609 440832 : clon_p(clon_p_tot) = clon_d(cdex(i))
610 440832 : londeg_p(clon_p_tot) = lon_d(cdex(i))
611 : endif
612 : enddo
613 1536 : clon_p_cnt(clon_p_tot) = (num_global_phys_cols+1)-pre_i
614 :
615 : ! sort over latitude and identify unique latitude coordinates
616 1536 : call IndexSet(ngcols,cdex)
617 1536 : call IndexSort(ngcols,cdex,clat_d,descend=.false.)
618 1536 : clat_p_tmp = clat_d(cdex(1))
619 1536 : clat_p_tot = 1
620 84934656 : do i=2,num_global_phys_cols
621 84934656 : if (clat_d(cdex(i)) > clat_p_tmp) then
622 293376 : clat_p_tot = clat_p_tot + 1
623 293376 : clat_p_tmp = clat_d(cdex(i))
624 : endif
625 : enddo
626 :
627 4608 : allocate( clat_p(1:clat_p_tot) )
628 4608 : allocate( clat_p_cnt(1:clat_p_tot) )
629 3072 : allocate( clat_p_idx(1:clat_p_tot) )
630 3072 : allocate( latdeg_p(1:clat_p_tot) )
631 :
632 1536 : pre_i = 1
633 1536 : clat_p_tot = 1
634 1536 : clat_p(1) = clat_d(cdex(1))
635 1536 : latdeg_p(1) = lat_d(cdex(1))
636 84934656 : do i=2,num_global_phys_cols
637 84934656 : if (clat_d(cdex(i)) > clat_p(clat_p_tot)) then
638 293376 : clat_p_cnt(clat_p_tot) = i-pre_i
639 293376 : pre_i = i
640 293376 : clat_p_tot = clat_p_tot + 1
641 293376 : clat_p(clat_p_tot) = clat_d(cdex(i))
642 293376 : latdeg_p(clat_p_tot) = lat_d(cdex(i))
643 : endif
644 : enddo
645 1536 : clat_p_cnt(clat_p_tot) = (num_global_phys_cols+1)-pre_i
646 :
647 1536 : clat_p_idx(1) = 1
648 294912 : do j=2,clat_p_tot
649 294912 : clat_p_idx(j) = clat_p_idx(j-1) + clat_p_cnt(j-1)
650 : enddo
651 :
652 1536 : deallocate(lat_d)
653 1536 : deallocate(lon_d)
654 :
655 : ! sort by longitude within latitudes
656 1536 : end_dex = 0
657 296448 : do j=1,clat_p_tot
658 294912 : beg_dex = end_dex + 1
659 294912 : end_dex = end_dex + clat_p_cnt(j)
660 296448 : call IndexSort(cdex(beg_dex:end_dex),clon_d,descend=.false.)
661 : enddo
662 :
663 : ! Early clean-up, to minimize memory high water mark
664 : ! (not executing find_partner or find_twin)
665 1536 : if (((twin_alg /= 1) .and. (lbal_opt /= 3)) .or. (lbal_opt == -1)) then
666 0 : deallocate( clat_p_cnt)
667 : end if
668 :
669 : ! save "longitude within latitude" column ordering
670 : ! and determine mapping from unsorted global column index to
671 : ! unique latitude/longitude indices
672 4608 : allocate( lat_p(1:ngcols) )
673 3072 : allocate( lon_p(1:ngcols) )
674 3072 : allocate( dyn_to_latlon_gcol_map(1:ngcols) )
675 1536 : if (lbal_opt /= -1) then
676 4608 : allocate(latlon_to_dyn_gcol_map(1:num_global_phys_cols))
677 : end if
678 :
679 84936192 : clat_p_dex = 1
680 84936192 : lat_p = -1
681 84936192 : dyn_to_latlon_gcol_map = -1
682 84936192 : do i = 1, num_global_phys_cols
683 84934656 : if (lbal_opt /= -1) latlon_to_dyn_gcol_map(i) = cdex(i)
684 84934656 : dyn_to_latlon_gcol_map(cdex(i)) = i
685 :
686 0 : do while ((clat_p(clat_p_dex) < clat_d(cdex(i))) .and. &
687 85228032 : (clat_p_dex < clat_p_tot))
688 293376 : clat_p_dex = clat_p_dex + 1
689 : enddo
690 84936192 : lat_p(cdex(i)) = clat_p_dex
691 : enddo
692 :
693 : ! sort by latitude within longitudes
694 1536 : call IndexSet(ngcols,cdex)
695 1536 : call IndexSort(ngcols,cdex,clon_d,descend=.false.)
696 1536 : end_dex = 0
697 443904 : do i=1,clon_p_tot
698 442368 : beg_dex = end_dex + 1
699 442368 : end_dex = end_dex + clon_p_cnt(i)
700 443904 : call IndexSort(cdex(beg_dex:end_dex),clat_d,descend=.false.)
701 : enddo
702 :
703 : ! Early clean-up, to minimize memory high water mark
704 : ! (not executing find_twin)
705 1536 : if ((twin_alg /= 1) .or. (lbal_opt == -1)) deallocate( clon_p_cnt )
706 :
707 : ! save "latitude within longitude" column ordering
708 : ! (only need in find_twin)
709 1536 : if ((twin_alg == 1) .and. (lbal_opt /= -1)) &
710 4608 : allocate( lonlat_to_dyn_gcol_map(1:num_global_phys_cols) )
711 :
712 1536 : clon_p_dex = 1
713 84936192 : lon_p = -1
714 84936192 : do i=1,num_global_phys_cols
715 84934656 : if ((twin_alg == 1) .and. (lbal_opt /= -1)) &
716 84934656 : lonlat_to_dyn_gcol_map(i) = cdex(i)
717 85375488 : do while ((clon_p(clon_p_dex) < clon_d(cdex(i))) .and. &
718 170750976 : (clon_p_dex < clon_p_tot))
719 440832 : clon_p_dex = clon_p_dex + 1
720 : enddo
721 84936192 : lon_p(cdex(i)) = clon_p_dex
722 : enddo
723 :
724 : ! Clean-up
725 1536 : deallocate( clat_d )
726 1536 : deallocate( clon_d )
727 1536 : deallocate( cdex )
728 :
729 : !
730 : ! Determine block index bounds
731 : !
732 1536 : call get_block_bounds_d(firstblock,lastblock)
733 :
734 : ! Allocate storage to save number of chunks and columns assigned to each
735 : ! process during chunk creation and assignment
736 : !
737 4608 : allocate( npchunks(0:npes-1) )
738 3072 : allocate( gs_col_num(0:npes-1) )
739 1181184 : npchunks(:) = 0
740 1181184 : gs_col_num(:) = 0
741 :
742 : !
743 : ! Option -1: each dynamics block is a single chunk
744 : !
745 1536 : if (lbal_opt == -1) then
746 : !
747 : ! Check that pcols >= maxblksiz
748 : !
749 0 : maxblksiz = 0
750 0 : do jb=firstblock,lastblock
751 0 : maxblksiz = max(maxblksiz,get_block_gcol_cnt_d(jb))
752 : enddo
753 0 : if (pcols < maxblksiz) then
754 0 : write(iulog,*) 'pcols = ',pcols, ' maxblksiz=',maxblksiz
755 0 : call endrun ('PHYS_GRID_INIT error: phys_loadbalance -1 specified but PCOLS < MAXBLKSIZ')
756 : endif
757 :
758 : !
759 : ! Determine total number of chunks
760 : !
761 0 : nchunks = (lastblock-firstblock+1)
762 :
763 : !
764 : ! Set max virtual SMP node size
765 : !
766 0 : max_nproc_smpx = 1
767 :
768 : !
769 : ! Allocate and initialize chunks data structure
770 : !
771 0 : allocate( cdex(1:maxblksiz) )
772 0 : allocate( chunks(1:nchunks) )
773 :
774 0 : do cid=1,nchunks
775 : ! get number of global column indices in block
776 0 : max_ncols = get_block_gcol_cnt_d(cid+firstblock-1)
777 : ! fill cdex array with global indices from current block
778 0 : call get_block_gcol_d(cid+firstblock-1,max_ncols,cdex)
779 :
780 0 : ncols = 0
781 0 : do i=1,max_ncols
782 : ! check whether global index is for a column that dynamics
783 : ! intends to pass to the physics
784 0 : curgcol_d = cdex(i)
785 0 : if (dyn_to_latlon_gcol_map(curgcol_d) /= -1) then
786 : ! yes - then save the information
787 0 : ncols = ncols + 1
788 0 : chunks(cid)%gcol(ncols) = curgcol_d
789 0 : chunks(cid)%lat(ncols) = lat_p(curgcol_d)
790 0 : chunks(cid)%lon(ncols) = lon_p(curgcol_d)
791 : endif
792 : enddo
793 0 : chunks(cid)%ncols = ncols
794 : enddo
795 :
796 : ! Clean-up
797 0 : deallocate( cdex )
798 0 : deallocate( lat_p )
799 0 : deallocate( lon_p )
800 :
801 : !
802 : ! Specify parallel decomposition
803 : !
804 0 : do cid=1,nchunks
805 : #if (defined SPMD)
806 0 : p = get_block_owner_d(cid+firstblock-1)
807 : #else
808 : p = 0
809 : #endif
810 0 : chunks(cid)%owner = p
811 0 : npchunks(p) = npchunks(p) + 1
812 0 : gs_col_num(p) = gs_col_num(p) + chunks(cid)%ncols
813 : enddo
814 : !
815 : ! Set flag indicating columns in physics and dynamics
816 : ! decompositions reside on the same processes
817 : !
818 0 : local_dp_map = .true.
819 : !
820 : else
821 : !
822 : ! Option == 0: split local blocks into chunks,
823 : ! while attempting to create load-balanced chunks.
824 : ! Does not work with vertically decomposed blocks.
825 : ! (default)
826 : ! Option == 1: split SMP-local blocks into chunks,
827 : ! while attempting to create load-balanced chunks.
828 : ! Does not work with vertically decomposed blocks.
829 : ! Option == 2: load balance chunks with respect to diurnal and
830 : ! seaonsal cycles and wth respect to latitude,
831 : ! and assign chunks to processes
832 : ! in a way that attempts to minimize communication costs
833 : ! Option == 3: divide processes into pairs and split
834 : ! blocks assigned to these pairs into
835 : ! chunks, attempting to create load-balanced chunks.
836 : ! The process pairs are chosen to maximize load balancing
837 : ! opportunities.
838 : ! Does not work with vertically decomposed blocks.
839 : ! Option == 4: concatenate local blocks, then
840 : ! divide into chunks.
841 : ! Does not work with vertically decomposed blocks.
842 : ! Option == 5: split indiviudal blocks into chunks,
843 : ! assigning columns using block ordering
844 : !
845 : !
846 : ! Allocate and initialize chunks data structure, then
847 : ! assign chunks to processes.
848 : !
849 1536 : call create_chunks(lbal_opt, chunks_per_thread)
850 :
851 : ! Early clean-up, to minimize memory high water mark
852 1536 : deallocate( lat_p )
853 1536 : deallocate( lon_p )
854 1536 : deallocate( latlon_to_dyn_gcol_map )
855 1536 : if (twin_alg == 1) deallocate( lonlat_to_dyn_gcol_map )
856 1536 : if (twin_alg == 1) deallocate( clon_p_cnt )
857 1536 : if ((twin_alg == 1) .or. (lbal_opt == 3)) deallocate( clat_p_cnt )
858 :
859 : !
860 : ! Determine whether dynamics and physics decompositions
861 : ! are colocated, not requiring any interprocess communication
862 : ! in the coupling.
863 1536 : local_dp_map = .true.
864 5899776 : do cid=1,nchunks
865 90834432 : do i=1,chunks(cid)%ncols
866 84934656 : curgcol_d = chunks(cid)%gcol(i)
867 84934656 : block_cnt = get_gcol_block_cnt_d(curgcol_d)
868 84934656 : call get_gcol_block_d(curgcol_d,block_cnt,blockids,bcids)
869 175767552 : do jb=1,block_cnt
870 84934656 : owner_d = get_block_owner_d(blockids(jb))
871 169869312 : if (owner_d /= chunks(cid)%owner) then
872 79641600 : local_dp_map = .false.
873 : endif
874 : enddo
875 : enddo
876 : enddo
877 : endif
878 : !
879 : ! Allocate and initialize data structures for gather/scatter
880 : !
881 4608 : allocate( pgcols(1:num_global_phys_cols) )
882 4608 : allocate( gs_col_offset(0:npes) )
883 3072 : allocate( pchunkid(0:npes) )
884 :
885 : ! Initialize pchunkid and gs_col_offset by summing
886 : ! number of chunks and columns per process, respectively
887 1536 : pchunkid(0) = 0
888 1536 : gs_col_offset(0) = 0
889 1179648 : do p=1,npes-1
890 1178112 : pchunkid(p) = pchunkid(p-1) + npchunks(p-1)
891 1179648 : gs_col_offset(p) = gs_col_offset(p-1) + gs_col_num(p-1)
892 : enddo
893 :
894 : ! Determine local ordering via "process id" bin sort
895 5899776 : do cid=1,nchunks
896 5898240 : p = chunks(cid)%owner
897 5898240 : pchunkid(p) = pchunkid(p) + 1
898 :
899 5898240 : chunks(cid)%lcid = pchunkid(p) + lastblock
900 :
901 5898240 : curgcol = gs_col_offset(p)
902 90832896 : do i=1,chunks(cid)%ncols
903 84934656 : curgcol = curgcol + 1
904 84934656 : pgcols(curgcol)%chunk = cid
905 90832896 : pgcols(curgcol)%ccol = i
906 : enddo
907 5899776 : gs_col_offset(p) = curgcol
908 : enddo
909 :
910 : ! Reinitialize pchunkid and gs_col_offset (for real)
911 1536 : pchunkid(0) = 1
912 1536 : gs_col_offset(0) = 1
913 1179648 : do p=1,npes-1
914 1178112 : pchunkid(p) = pchunkid(p-1) + npchunks(p-1)
915 1179648 : gs_col_offset(p) = gs_col_offset(p-1) + gs_col_num(p-1)
916 : enddo
917 1536 : pchunkid(npes) = pchunkid(npes-1) + npchunks(npes-1)
918 1536 : gs_col_offset(npes) = gs_col_offset(npes-1) + gs_col_num(npes-1)
919 :
920 : ! Save local information
921 : ! (Local chunk index range chosen so that it does not overlap
922 : ! {begblock,...,endblock})
923 : !
924 1536 : nlcols = gs_col_num(iam)
925 1536 : nlchunks = npchunks(iam)
926 1536 : begchunk = pchunkid(iam) + lastblock
927 1536 : endchunk = pchunkid(iam+1) + lastblock - 1
928 : !
929 4608 : allocate( lchunks(begchunk:endchunk) )
930 5899776 : do cid=1,nchunks
931 5899776 : if (chunks(cid)%owner == iam) then
932 7680 : lcid = chunks(cid)%lcid
933 7680 : lchunks(lcid)%ncols = chunks(cid)%ncols
934 7680 : lchunks(lcid)%cid = cid
935 118272 : do i=1,chunks(cid)%ncols
936 118272 : lchunks(lcid)%gcol(i) = chunks(cid)%gcol(i)
937 : enddo
938 : endif
939 : enddo
940 :
941 1536 : deallocate( pchunkid )
942 1536 : deallocate( npchunks )
943 : !
944 : !-----------------------------------------------------------------------
945 : !
946 : ! Initialize physics grid, using dynamics grid
947 : ! b) column area and integration weight
948 :
949 4608 : allocate( area_d(1:ngcols) )
950 3072 : allocate( wght_d(1:ngcols) )
951 84936192 : area_d = 0.0_r8
952 84936192 : wght_d = 0.0_r8
953 :
954 1536 : call get_horiz_grid_d(ngcols, area_d_out=area_d, wght_d_out=wght_d)
955 :
956 :
957 84936192 : if ( abs(sum(area_d) - 4.0_r8*pi) > 1.e-10_r8 ) then
958 0 : write(iulog,*) ' ERROR: sum of areas on globe does not equal 4*pi'
959 0 : write(iulog,*) ' sum of areas = ', sum(area_d), sum(area_d)-4.0_r8*pi
960 0 : call endrun('phys_grid')
961 : end if
962 :
963 84936192 : if ( abs(sum(wght_d) - 4.0_r8*pi) > 1.e-10_r8 ) then
964 0 : write(iulog,*) ' ERROR: sum of integration weights on globe does not equal 4*pi'
965 0 : write(iulog,*) ' sum of weights = ', sum(wght_d), sum(wght_d)-4.0_r8*pi
966 0 : call endrun('phys_grid')
967 : end if
968 :
969 9216 : do lcid=begchunk,endchunk
970 119808 : do i=1,lchunks(lcid)%ncols
971 110592 : lchunks(lcid)%area(i) = area_d(lchunks(lcid)%gcol(i))
972 118272 : lchunks(lcid)%wght(i) = wght_d(lchunks(lcid)%gcol(i))
973 : enddo
974 : enddo
975 :
976 1536 : deallocate( area_d )
977 : nullify(area_d)
978 1536 : deallocate( wght_d )
979 :
980 1536 : if (.not. local_dp_map) then
981 : !
982 : ! allocate and initialize data structures for transposes
983 : !
984 4608 : allocate( btofc_blk_num(0:npes-1) )
985 1181184 : btofc_blk_num = 0
986 4608 : allocate( btofc_blk_offset(firstblock:lastblock) )
987 1181184 : do jb = firstblock,lastblock
988 1181184 : nullify( btofc_blk_offset(jb)%pter )
989 : enddo
990 : !
991 1536 : glbcnt = 0
992 1536 : curcnt = 0
993 1536 : curp = 0
994 84936192 : do curgcol=1,num_global_phys_cols
995 84934656 : cid = pgcols(curgcol)%chunk
996 84934656 : i = pgcols(curgcol)%ccol
997 84934656 : owner_p = chunks(cid)%owner
998 86112768 : do while (curp < owner_p)
999 1178112 : btofc_blk_num(curp) = curcnt
1000 1178112 : curcnt = 0
1001 1178112 : curp = curp + 1
1002 : enddo
1003 84934656 : curgcol_d = chunks(cid)%gcol(i)
1004 84934656 : block_cnt = get_gcol_block_cnt_d(curgcol_d)
1005 84934656 : call get_gcol_block_d(curgcol_d,block_cnt,blockids,bcids)
1006 169870848 : do jb = 1,block_cnt
1007 84934656 : owner_d = get_block_owner_d(blockids(jb))
1008 169869312 : if (iam == owner_d) then
1009 110592 : if (.not. associated(btofc_blk_offset(blockids(jb))%pter)) then
1010 1536 : blksiz = get_block_gcol_cnt_d(blockids(jb))
1011 1536 : numlvl = get_block_lvl_cnt_d(blockids(jb),bcids(jb))
1012 1536 : btofc_blk_offset(blockids(jb))%ncols = blksiz
1013 1536 : btofc_blk_offset(blockids(jb))%nlvls = numlvl
1014 6144 : allocate( btofc_blk_offset(blockids(jb))%pter(blksiz,numlvl) )
1015 : endif
1016 3760128 : do k=1,btofc_blk_offset(blockids(jb))%nlvls
1017 3649536 : btofc_blk_offset(blockids(jb))%pter(bcids(jb),k) = glbcnt
1018 3649536 : curcnt = curcnt + 1
1019 3760128 : glbcnt = glbcnt + 1
1020 : enddo
1021 : endif
1022 : enddo
1023 : enddo
1024 1536 : btofc_blk_num(curp) = curcnt
1025 1536 : block_buf_nrecs = glbcnt
1026 : !
1027 4608 : allocate( btofc_chk_num(0:npes-1) )
1028 1181184 : btofc_chk_num = 0
1029 4608 : allocate( btofc_chk_offset(begchunk:endchunk) )
1030 9216 : do lcid=begchunk,endchunk
1031 7680 : ncols = lchunks(lcid)%ncols
1032 7680 : btofc_chk_offset(lcid)%ncols = ncols
1033 7680 : btofc_chk_offset(lcid)%nlvls = pver+1
1034 24576 : allocate( btofc_chk_offset(lcid)%pter(ncols,pver+1) )
1035 : enddo
1036 : !
1037 1536 : curcnt = 0
1038 1536 : glbcnt = 0
1039 1181184 : do p=0,npes-1
1040 86114304 : do curgcol=gs_col_offset(iam),gs_col_offset(iam+1)-1
1041 84934656 : cid = pgcols(curgcol)%chunk
1042 84934656 : owner_p = chunks(cid)%owner
1043 86114304 : if (iam == owner_p) then
1044 84934656 : i = pgcols(curgcol)%ccol
1045 84934656 : lcid = chunks(cid)%lcid
1046 84934656 : curgcol_d = chunks(cid)%gcol(i)
1047 84934656 : block_cnt = get_gcol_block_cnt_d(curgcol_d)
1048 84934656 : call get_gcol_block_d(curgcol_d,block_cnt,blockids,bcids)
1049 169869312 : do jb = 1,block_cnt
1050 84934656 : owner_d = get_block_owner_d(blockids(jb))
1051 169869312 : if (p == owner_d) then
1052 110592 : numlvl = get_block_lvl_cnt_d(blockids(jb),bcids(jb))
1053 : call get_block_levels_d(blockids(jb),bcids(jb),numlvl,levels)
1054 3760128 : do k=1,numlvl
1055 3649536 : btofc_chk_offset(lcid)%pter(i,levels(k)+1) = glbcnt
1056 3649536 : curcnt = curcnt + 1
1057 3760128 : glbcnt = glbcnt + 1
1058 : enddo
1059 : endif
1060 : enddo
1061 : endif
1062 : enddo
1063 1179648 : btofc_chk_num(p) = curcnt
1064 1181184 : curcnt = 0
1065 : enddo
1066 1536 : chunk_buf_nrecs = glbcnt
1067 : !
1068 : ! Precompute swap partners and number of steps in point-to-point
1069 : ! implementations of alltoall algorithm.
1070 : ! First, determine number of swaps.
1071 : !
1072 1536 : dp_coup_steps = 0
1073 1572864 : do i=1,ceil2(npes)-1
1074 1571328 : p = pair(npes,i,iam)
1075 1572864 : if (p >= 0) then
1076 1178112 : if ((btofc_blk_num(p) > 0 .or. btofc_chk_num(p) > 0)) then
1077 38344 : dp_coup_steps = dp_coup_steps + 1
1078 : end if
1079 : end if
1080 : end do
1081 : !
1082 : ! Second, determine swap partners.
1083 : !
1084 4608 : allocate( dp_coup_proc(dp_coup_steps) )
1085 1536 : dp_coup_steps = 0
1086 1572864 : do i=1,ceil2(npes)-1
1087 1571328 : p = pair(npes,i,iam)
1088 1572864 : if (p >= 0) then
1089 1178112 : if ((btofc_blk_num(p) > 0 .or. btofc_chk_num(p) > 0)) then
1090 38344 : dp_coup_steps = dp_coup_steps + 1
1091 38344 : dp_coup_proc(dp_coup_steps) = p
1092 : end if
1093 : end if
1094 : end do
1095 : !
1096 : endif
1097 :
1098 : ! Final clean-up
1099 1536 : deallocate( gs_col_offset )
1100 : ! (if eliminate get_lon_xxx, can also deallocate
1101 : ! clat_p_idx, and grid_latlon?))
1102 :
1103 : ! Add physics-package grid to set of CAM grids
1104 : ! physgrid always uses 'lat' and 'lon' as coordinate names; If dynamics
1105 : ! grid is different, it will use different coordinate names
1106 :
1107 : ! First, create a map for the physics grid
1108 : ! It's structure will depend on whether or not the physics grid is
1109 : ! unstructured
1110 1536 : unstructured = dycore_is('UNSTRUCTURED')
1111 1536 : if (unstructured) then
1112 0 : allocate(grid_map(3, pcols * (endchunk - begchunk + 1)))
1113 0 : if (single_column) allocate(grid_map_scm(3, pcols * (endchunk - begchunk + 1)))
1114 : else
1115 4608 : allocate(grid_map(4, pcols * (endchunk - begchunk + 1)))
1116 1536 : if (single_column) allocate(grid_map_scm(4, pcols * (endchunk - begchunk + 1)))
1117 : end if
1118 615936 : grid_map = 0
1119 1536 : if (single_column) grid_map_scm = 0
1120 4608 : allocate(latvals(size(grid_map, 2)))
1121 3072 : allocate(lonvals(size(grid_map, 2)))
1122 1536 : p = 0
1123 9216 : do lcid = begchunk, endchunk
1124 7680 : ncols = lchunks(lcid)%ncols
1125 7680 : call get_gcol_all_p(lcid, pcols, gcols)
1126 : ! collect latvals and lonvals
1127 7680 : cid = lchunks(lcid)%cid
1128 118272 : do i = 1, chunks(cid)%ncols
1129 110592 : latvals(p + i) = latdeg_p(chunks(cid)%lat(i))
1130 118272 : lonvals(p + i) = londeg_p(chunks(cid)%lon(i))
1131 : end do
1132 7680 : if (pcols > ncols) then
1133 : ! Need to set these to detect unused columns
1134 19968 : latvals(p+ncols+1:p+pcols) = 1000.0_r8
1135 19968 : lonvals(p+ncols+1:p+pcols) = 1000.0_r8
1136 : end if
1137 :
1138 : ! Set grid values for this chunk
1139 132096 : do i = 1, pcols
1140 122880 : p = p + 1
1141 122880 : grid_map(1, p) = i
1142 122880 : grid_map(2, p) = lcid
1143 122880 : if (single_column) then
1144 0 : grid_map_scm(1, p) = i
1145 0 : grid_map_scm(2, p) = lcid
1146 : end if
1147 130560 : if ((i <= ncols) .and. (gcols(i) > 0)) then
1148 110592 : if (unstructured) then
1149 0 : grid_map(3, p) = gcols(i)
1150 0 : if (single_column) grid_map_scm(3, p) = closeioplonidx
1151 : else
1152 110592 : grid_map(3, p) = get_lon_p(lcid, i)
1153 110592 : grid_map(4, p) = get_lat_p(lcid, i)
1154 110592 : if (single_column) then
1155 0 : grid_map_scm(3, p) = closeioplonidx
1156 0 : grid_map_scm(4, p) = closeioplatidx
1157 : end if
1158 : end if
1159 : else
1160 12288 : if (i <= ncols) then
1161 0 : call endrun("phys_grid_init: unmapped column")
1162 : end if
1163 : end if
1164 : end do
1165 : end do
1166 :
1167 : ! Note that if the dycore is using the same points as the physics grid,
1168 : ! it will have already set up 'lat' and 'lon' axes for the physics grid
1169 : ! However, these will be in the dynamics decomposition
1170 1536 : if (unstructured) then
1171 : lon_coord => horiz_coord_create('lon', 'ncol', num_global_phys_cols, &
1172 : 'longitude', 'degrees_east', 1, size(lonvals), lonvals, &
1173 0 : map=grid_map(3,:))
1174 : lat_coord => horiz_coord_create('lat', 'ncol', num_global_phys_cols, &
1175 : 'latitude', 'degrees_north', 1, size(latvals), latvals, &
1176 0 : map=grid_map(3,:))
1177 : else
1178 :
1179 4608 : allocate(coord_map(size(grid_map, 2)))
1180 :
1181 : ! Create a lon coord map which only writes from one of each unique lon
1182 125952 : where(latvals == latmin)
1183 3072 : coord_map(:) = grid_map(3, :)
1184 : elsewhere
1185 : coord_map(:) = 0_iMap
1186 : end where
1187 : lon_coord => horiz_coord_create('lon', 'lon', hdim1_d, 'longitude', &
1188 1536 : 'degrees_east', 1, size(lonvals), lonvals, map=coord_map)
1189 :
1190 : ! Create a lat coord map which only writes from one of each unique lat
1191 125952 : where(lonvals == lonmin)
1192 3072 : coord_map(:) = grid_map(4, :)
1193 : elsewhere
1194 : coord_map(:) = 0_iMap
1195 : end where
1196 : lat_coord => horiz_coord_create('lat', 'lat', hdim2_d, 'latitude', &
1197 1536 : 'degrees_north', 1, size(latvals), latvals, map=coord_map)
1198 :
1199 1536 : deallocate(coord_map)
1200 :
1201 : end if
1202 : call cam_grid_register('physgrid', phys_decomp, lat_coord, lon_coord, &
1203 1536 : grid_map, unstruct=unstructured, block_indexed=.true.)
1204 1536 : if (single_column) call cam_grid_register('physgrid_scm', phys_decomp_scm, lat_coord, lon_coord, &
1205 0 : grid_map_scm, unstruct=unstructured, block_indexed=.true.)
1206 : ! Copy required attributes from the dynamics array
1207 1536 : nullify(copy_attributes)
1208 1536 : call physgrid_copy_attributes_d(copy_gridname, copy_attributes)
1209 3072 : do i = 1, size(copy_attributes)
1210 3072 : call cam_grid_attribute_copy(copy_gridname, 'physgrid', copy_attributes(i))
1211 : end do
1212 1536 : if ((.not. cam_grid_attr_exists('physgrid', 'area')) .and. unstructured) then
1213 : ! Physgrid always needs an area attribute. If we did not inherit one
1214 : ! from the dycore (i.e., physics and dynamics are on different grids),
1215 : ! create that attribute here (unstructured grids only, physgrid is
1216 : ! not supported for structured grids).
1217 0 : allocate(area_d(size(grid_map, 2)))
1218 0 : allocate(areawt_d(size(grid_map, 2)))
1219 0 : p = 0
1220 0 : do lcid = begchunk, endchunk
1221 0 : ncols = lchunks(lcid)%ncols
1222 0 : call get_gcol_all_p(lcid, pcols, gcols)
1223 : ! collect latvals and lonvals
1224 0 : cid = lchunks(lcid)%cid
1225 0 : do i = 1, chunks(cid)%ncols
1226 0 : area_d(p + i) = lchunks(lcid)%area(i)
1227 0 : areawt_d(p + i) = lchunks(lcid)%wght(i)
1228 : end do
1229 0 : if (pcols > ncols) then
1230 : ! Need to set these to detect unused columns
1231 0 : area_d(p+ncols+1:p+pcols) = 0.0_r8
1232 0 : areawt_d(p+ncols+1:p+pcols) = 0.0_r8
1233 : end if
1234 0 : p = p + pcols
1235 : end do
1236 : call cam_grid_attribute_register('physgrid', 'area', &
1237 0 : 'physics column areas', 'ncol', area_d, map=grid_map(3,:))
1238 : call cam_grid_attribute_register('physgrid', 'areawt', &
1239 0 : 'physics column area wts', 'ncol', areawt_d, map=grid_map(3,:))
1240 0 : nullify(area_d) ! Belongs to attribute now
1241 0 : nullify(areawt_d) ! Belongs to attribute now
1242 : end if
1243 : ! Cleanup pointers (they belong to the grid now)
1244 1536 : nullify(grid_map)
1245 1536 : if (single_column) nullify(grid_map_scm)
1246 1536 : deallocate(latvals)
1247 : nullify(latvals)
1248 1536 : deallocate(lonvals)
1249 : nullify(lonvals)
1250 : ! Cleanup, we are responsible for copy attributes
1251 1536 : if (associated(copy_attributes)) then
1252 1536 : deallocate(copy_attributes)
1253 : nullify(copy_attributes)
1254 : end if
1255 :
1256 : !
1257 1536 : physgrid_set = .true. ! Set flag indicating physics grid is now set
1258 : !
1259 1536 : call t_stopf("phys_grid_init")
1260 :
1261 1536 : if (calc_memory_increase) then
1262 0 : call shr_mem_getusage(mem_hw_end, mem_end)
1263 0 : clat_p_tmp = mem_end - mem_beg
1264 : call MPI_reduce(clat_p_tmp, mem_end, 1, MPI_REAL8, MPI_MAX, &
1265 0 : masterprocid, mpicom, curp)
1266 0 : if (masterproc) then
1267 0 : write(iulog, *) 'phys_grid_init: Increase in memory usage = ', &
1268 0 : mem_end, ' (MB)'
1269 : end if
1270 0 : clat_p_tmp = mem_hw_end - mem_hw_beg
1271 : call MPI_reduce(clat_p_tmp, mem_hw_end, 1, MPI_REAL8, MPI_MAX, &
1272 0 : masterprocid, mpicom, curp)
1273 0 : if (masterproc) then
1274 0 : write(iulog, *) 'phys_grid_init: Increase in memory highwater = ', &
1275 0 : mem_end, ' (MB)'
1276 : end if
1277 : end if
1278 :
1279 4608 : end subroutine phys_grid_init
1280 :
1281 : !========================================================================
1282 :
1283 0 : subroutine phys_grid_find_col(lat, lon, owner, lcid, icol)
1284 :
1285 : !-----------------------------------------------------------------------
1286 : !
1287 : ! Purpose: Find the global column closest to the point specified by lat
1288 : ! and lon. Return indices of owning process, local chunk, and
1289 : ! column.
1290 : !
1291 : ! Authors: Phil Rasch / Patrick Worley / B. Eaton
1292 : !
1293 : !-----------------------------------------------------------------------
1294 :
1295 : real(r8), intent(in) :: lat, lon ! requested location in degrees
1296 : integer, intent(out) :: owner ! rank of chunk owner
1297 : integer, intent(out) :: lcid ! local chunk index
1298 : integer, intent(out) :: icol ! column index within the chunk
1299 :
1300 : ! local
1301 : real(r8) dist2 ! the distance (in radians**2 from lat, lon)
1302 : real(r8) distmin ! the distance (in radians**2 from closest column)
1303 : real(r8) latr, lonr ! lat, lon (in radians) of requested location
1304 : real(r8) clat, clon ! lat, lon (in radians) of column being tested
1305 : real(r8) const
1306 :
1307 : integer i
1308 : integer cid
1309 : !-----------------------------------------------------------------------
1310 :
1311 : ! Check that input lat and lon are in valid range
1312 : if (lon < 0.0_r8 .or. lon >= 360._r8 .or. &
1313 0 : lat < -90._r8 .or. lat > 90._r8) then
1314 0 : if (masterproc) then
1315 : write(iulog,*) &
1316 0 : 'phys_grid_find_col: ERROR: lon must satisfy 0.<=lon<360. and lat must satisfy -90<=lat<=90.'
1317 : write(iulog,*) &
1318 0 : 'input lon=', lon, ' input lat=', lat
1319 : endif
1320 0 : call endrun('phys_grid_find_col: input ERROR')
1321 : end if
1322 :
1323 0 : const = 180._r8/pi ! degrees per radian
1324 0 : latr = lat/const ! to radians
1325 0 : lonr = lon/const ! to radians
1326 :
1327 0 : owner = -999
1328 0 : lcid = -999
1329 0 : icol = -999
1330 0 : distmin = 1.e10_r8
1331 :
1332 : ! scan all chunks for closest point to lat, lon
1333 0 : do cid = 1, nchunks
1334 0 : do i = 1, chunks(cid)%ncols
1335 0 : clat = clat_p(chunks(cid)%lat(i))
1336 0 : clon = clon_p(chunks(cid)%lon(i))
1337 0 : dist2 = (clat-latr)**2 + (clon-lonr)**2
1338 0 : if (dist2 < distmin ) then
1339 0 : distmin = dist2
1340 0 : owner = chunks(cid)%owner
1341 0 : lcid = chunks(cid)%lcid
1342 0 : icol = i
1343 : endif
1344 : enddo
1345 : end do
1346 :
1347 1536 : end subroutine phys_grid_find_col
1348 :
1349 : !========================================================================
1350 :
1351 0 : subroutine phys_grid_find_cols(lat, lon, nclosest, owner, lcid, icol, distmin, mlats, mlons)
1352 :
1353 : !-----------------------------------------------------------------------
1354 : !
1355 : ! Purpose: Find the global columns closest to the point specified by lat
1356 : ! and lon. Return indices of owning process, local chunk, and
1357 : ! column.
1358 : !
1359 : ! Authors: Phil Rasch / Patrick Worley / B. Eaton
1360 : !
1361 : !-----------------------------------------------------------------------
1362 : use physconst, only : rearth
1363 :
1364 : real(r8), intent(in) :: lat, lon ! requested location in degrees
1365 : integer, intent(in) :: nclosest ! number of closest points to find
1366 : integer, intent(out) :: owner(nclosest) ! rank of chunk owner
1367 : integer, intent(out) :: lcid(nclosest) ! local chunk index
1368 : integer, intent(out) :: icol(nclosest) ! column index within the chunk
1369 : real(r8),intent(out) :: distmin(nclosest) ! the distance (m) of the closest column(s)
1370 : real(r8),intent(out) :: mlats(nclosest) ! the latitude of the closest column(s)
1371 : real(r8),intent(out) :: mlons(nclosest) ! the longitude of the closest column(s)
1372 :
1373 : ! local
1374 : real(r8) dist2 ! the distance (in radians**2 from lat, lon)
1375 : real(r8) latr, lonr ! lat, lon (in radians) of requested location
1376 : real(r8) clat, clon ! lat, lon (in radians) of column being tested
1377 : real(r8) const
1378 :
1379 : integer i, j
1380 : integer cid
1381 : !-----------------------------------------------------------------------
1382 :
1383 : ! Check that input lat and lon are in valid range
1384 : if (lon < 0.0_r8 .or. lon >= 360._r8 .or. &
1385 0 : lat < -90._r8 .or. lat > 90._r8) then
1386 0 : if (masterproc) then
1387 : write(iulog,*) &
1388 0 : 'phys_grid_find_cols: ERROR: lon must satisfy 0.<=lon<360. and lat must satisfy -90<=lat<=90.'
1389 : write(iulog,*) &
1390 0 : 'input lon=', lon, ' input lat=', lat
1391 : endif
1392 0 : call endrun('phys_grid_find_cols: input ERROR')
1393 : end if
1394 :
1395 0 : const = 180._r8/pi ! degrees per radian
1396 0 : latr = lat/const ! to radians
1397 0 : lonr = lon/const ! to radians
1398 :
1399 0 : owner(:) = -999
1400 0 : lcid(:) = -999
1401 0 : icol(:) = -999
1402 0 : mlats(:) = -999
1403 0 : mlons(:) = -999
1404 0 : distmin(:) = 1.e10_r8
1405 :
1406 : ! scan all chunks for closest point to lat, lon
1407 0 : do cid = 1, nchunks
1408 0 : do i = 1, chunks(cid)%ncols
1409 0 : clat = clat_p(chunks(cid)%lat(i))
1410 0 : clon = clon_p(chunks(cid)%lon(i))
1411 0 : dist2 = acos(sin(latr) * sin(clat) + cos(latr) * cos(clat) * cos(clon - lonr)) * rearth
1412 :
1413 0 : do j = nclosest, 1, -1
1414 0 : if (dist2 < distmin(j)) then
1415 :
1416 0 : if (j < nclosest) then
1417 0 : distmin(j+1) = distmin(j)
1418 0 : owner(j+1) = owner(j)
1419 0 : lcid(j+1) = lcid(j)
1420 0 : icol(j+1) = icol(j)
1421 0 : mlats(j+1) = mlats(j)
1422 0 : mlons(j+1) = mlons(j)
1423 : end if
1424 :
1425 0 : distmin(j) = dist2
1426 0 : owner(j) = chunks(cid)%owner
1427 0 : lcid(j) = chunks(cid)%lcid
1428 0 : icol(j) = i
1429 0 : mlats(j) = clat * const
1430 0 : mlons(j) = clon * const
1431 : else
1432 : exit
1433 : end if
1434 : enddo
1435 : enddo
1436 : end do
1437 :
1438 0 : end subroutine phys_grid_find_cols
1439 : !
1440 : !========================================================================
1441 :
1442 4608 : logical function phys_grid_initialized ()
1443 : !-----------------------------------------------------------------------
1444 : !
1445 : ! Purpose: Identify whether phys_grid has been called yet or not
1446 : !
1447 : ! Method: Return physgrid_set
1448 : !
1449 : ! Author: Pat Worley
1450 : !
1451 : !-----------------------------------------------------------------------
1452 : !-----------------------------------------------------------------------
1453 : !
1454 4608 : phys_grid_initialized = physgrid_set
1455 : !
1456 : return
1457 : end function phys_grid_initialized
1458 :
1459 : !
1460 : !========================================================================
1461 : !
1462 0 : subroutine get_chunk_indices_p(index_beg, index_end)
1463 : !-----------------------------------------------------------------------
1464 : !
1465 : ! Purpose: Return range of indices for local chunks
1466 : !
1467 : ! Method:
1468 : !
1469 : ! Author: Patrick Worley
1470 : !
1471 : !-----------------------------------------------------------------------
1472 : !------------------------------Arguments--------------------------------
1473 : integer, intent(out) :: index_beg ! first index used for local chunks
1474 : integer, intent(out) :: index_end ! last index used for local chunks
1475 : !-----------------------------------------------------------------------
1476 :
1477 0 : index_beg = begchunk
1478 0 : index_end = endchunk
1479 :
1480 0 : return
1481 : end subroutine get_chunk_indices_p
1482 : !
1483 : !========================================================================
1484 : !
1485 15360 : subroutine get_gcol_all_p(lcid, latdim, gcols)
1486 : !-----------------------------------------------------------------------
1487 : !
1488 : ! Purpose: Return all global column indices for chunk
1489 : !
1490 : ! Method:
1491 : !
1492 : ! Author: Patrick Worley
1493 : !
1494 : !-----------------------------------------------------------------------
1495 : !------------------------------Arguments--------------------------------
1496 : integer, intent(in) :: lcid ! local chunk id
1497 : integer, intent(in) :: latdim ! declared size of output array
1498 :
1499 : integer, intent(out) :: gcols(:) ! array of global latitude indices
1500 : !---------------------------Local workspace-----------------------------
1501 : integer :: i ! loop index
1502 :
1503 : !-----------------------------------------------------------------------
1504 261120 : gcols=-1
1505 236544 : do i=1,lchunks(lcid)%ncols
1506 236544 : gcols(i) = lchunks(lcid)%gcol(i)
1507 : enddo
1508 15360 : return
1509 : end subroutine get_gcol_all_p
1510 :
1511 : !
1512 : !========================================================================
1513 : !
1514 110592 : integer function get_gcol_p(lcid, col)
1515 : !-----------------------------------------------------------------------
1516 : !
1517 : ! Purpose: Return global physics column index for chunk column
1518 : !
1519 : ! Method:
1520 : !
1521 : ! Author: Jim Edwards / Patrick Worley
1522 : !
1523 : !-----------------------------------------------------------------------
1524 : !------------------------------Arguments--------------------------------
1525 : integer, intent(in) :: lcid ! local chunk id
1526 : integer, intent(in) :: col ! column index
1527 :
1528 : !-----------------------------------------------------------------------
1529 110592 : get_gcol_p = lchunks(lcid)%gcol(col)
1530 :
1531 : return
1532 : end function get_gcol_p
1533 :
1534 : !
1535 : !========================================================================
1536 :
1537 0 : subroutine get_gcol_vec_p(lcid, lth, cols, gcols)
1538 : !-----------------------------------------------------------------------
1539 : !
1540 : ! Purpose: Return global physics column indices for set of chunk columns
1541 : !
1542 : ! Method:
1543 : !
1544 : ! Author: Patrick Worley
1545 : !
1546 : !-----------------------------------------------------------------------
1547 : use ppgrid
1548 :
1549 : !------------------------------Arguments--------------------------------
1550 : integer, intent(in) :: lcid ! local chunk id
1551 : integer, intent(in) :: lth ! number of column indices
1552 : integer, intent(in) :: cols(lth) ! column indices
1553 :
1554 : integer, intent(out) :: gcols(lth) ! array of global physics
1555 : ! columns indices
1556 :
1557 : !---------------------------Local workspace-----------------------------
1558 : integer :: i ! loop index
1559 :
1560 : !-----------------------------------------------------------------------
1561 0 : do i=1,lth
1562 0 : gcols(i) = lchunks(lcid)%gcol(cols(i))
1563 : enddo
1564 :
1565 0 : return
1566 : end subroutine get_gcol_vec_p
1567 :
1568 : !
1569 : !========================================================================
1570 : !
1571 9653760 : integer function get_ncols_p(lcid)
1572 : !-----------------------------------------------------------------------
1573 : !
1574 : ! Purpose: Return number of columns in chunk given the local chunk id.
1575 : !
1576 : ! Method:
1577 : !
1578 : ! Author: Patrick Worley
1579 : !
1580 : !-----------------------------------------------------------------------
1581 : !------------------------------Arguments--------------------------------
1582 : integer, intent(in) :: lcid ! local chunk id
1583 :
1584 : !---------------------------Local workspace-----------------------------
1585 : integer :: cid ! global chunk id
1586 :
1587 : !-----------------------------------------------------------------------
1588 9653760 : get_ncols_p = lchunks(lcid)%ncols
1589 :
1590 : return
1591 : end function get_ncols_p
1592 :
1593 : !========================================================================
1594 :
1595 0 : subroutine get_grid_dims(hdim1_d_out, hdim2_d_out)
1596 : use cam_abortutils, only: endrun
1597 : ! retrieve dynamics field grid information
1598 : ! hdim1_d and hdim2_d are dimensions of rectangular horizontal grid
1599 : ! data structure, If 1D data structure, then hdim2_d == 1.
1600 : integer, intent(out) :: hdim1_d_out
1601 : integer, intent(out) :: hdim2_d_out
1602 :
1603 0 : if (.not. phys_grid_initialized()) then
1604 0 : call endrun('get_grid_dims: physics grid not initialized')
1605 : end if
1606 0 : hdim1_d_out = hdim1_d
1607 0 : hdim2_d_out = hdim2_d
1608 :
1609 0 : end subroutine get_grid_dims
1610 : !
1611 : !========================================================================
1612 : !
1613 3502080 : subroutine get_lat_all_p(lcid, latdim, lats)
1614 : !-----------------------------------------------------------------------
1615 : !
1616 : ! Purpose: Return all global latitude indices for chunk
1617 : !
1618 : ! Method:
1619 : !
1620 : ! Author: Patrick Worley
1621 : !
1622 : !-----------------------------------------------------------------------
1623 : use ppgrid
1624 : !------------------------------Arguments--------------------------------
1625 : integer, intent(in) :: lcid ! local chunk id
1626 : integer, intent(in) :: latdim ! declared size of output array
1627 :
1628 : integer, intent(out) :: lats(latdim) ! array of global latitude indices
1629 :
1630 : !---------------------------Local workspace-----------------------------
1631 : integer :: i ! loop index
1632 : integer :: cid ! global chunk id
1633 :
1634 : !-----------------------------------------------------------------------
1635 3502080 : cid = lchunks(lcid)%cid
1636 53932032 : do i=1,chunks(cid)%ncols
1637 53932032 : lats(i) = chunks(cid)%lat(i)
1638 : enddo
1639 :
1640 3502080 : return
1641 : end subroutine get_lat_all_p
1642 : !
1643 : !========================================================================
1644 :
1645 0 : subroutine get_lat_vec_p(lcid, lth, cols, lats)
1646 : !-----------------------------------------------------------------------
1647 : !
1648 : ! Purpose: Return global latitude indices for set of chunk columns
1649 : !
1650 : ! Method:
1651 : !
1652 : ! Author: Patrick Worley
1653 : !
1654 : !-----------------------------------------------------------------------
1655 : use ppgrid
1656 :
1657 : !------------------------------Arguments--------------------------------
1658 : integer, intent(in) :: lcid ! local chunk id
1659 : integer, intent(in) :: lth ! number of column indices
1660 : integer, intent(in) :: cols(lth) ! column indices
1661 :
1662 : integer, intent(out) :: lats(lth) ! array of global latitude indices
1663 :
1664 : !---------------------------Local workspace-----------------------------
1665 : integer :: i ! loop index
1666 : integer :: cid ! global chunk id
1667 :
1668 : !-----------------------------------------------------------------------
1669 0 : cid = lchunks(lcid)%cid
1670 0 : do i=1,lth
1671 0 : lats(i) = chunks(cid)%lat(cols(i))
1672 : enddo
1673 :
1674 0 : return
1675 : end subroutine get_lat_vec_p
1676 : !
1677 : !========================================================================
1678 :
1679 110592 : integer function get_lat_p(lcid, col)
1680 : !-----------------------------------------------------------------------
1681 : !
1682 : ! Purpose: Return global latitude index for chunk column
1683 : !
1684 : ! Method:
1685 : !
1686 : ! Author: Patrick Worley
1687 : !
1688 : !-----------------------------------------------------------------------
1689 : use ppgrid
1690 : !------------------------------Arguments--------------------------------
1691 : integer, intent(in) :: lcid ! local chunk id
1692 : integer, intent(in) :: col ! column index
1693 :
1694 : !---------------------------Local workspace-----------------------------
1695 : integer :: cid ! global chunk id
1696 :
1697 : !-----------------------------------------------------------------------
1698 110592 : cid = lchunks(lcid)%cid
1699 110592 : get_lat_p = chunks(cid)%lat(col)
1700 :
1701 : return
1702 : end function get_lat_p
1703 : !
1704 : !========================================================================
1705 : !
1706 0 : subroutine get_lon_all_p(lcid, londim, lons)
1707 : !-----------------------------------------------------------------------
1708 : !
1709 : ! Purpose:
1710 : ! Was: Return all global longitude indices for chunk
1711 : ! Now: Return all longitude offsets (+1) for chunk. These are offsets
1712 : ! in ordered list of global columns from first
1713 : ! column with given latitude to column with given latitude
1714 : ! and longitude. This corresponds to the usual longitude indices
1715 : ! for full and reduced lon/lat grids.
1716 : !
1717 : ! Method:
1718 : !
1719 : ! Author: Patrick Worley
1720 : !
1721 : !-----------------------------------------------------------------------
1722 : use ppgrid
1723 : !------------------------------Arguments--------------------------------
1724 : integer, intent(in) :: lcid ! local chunk id
1725 : integer, intent(in) :: londim ! declared size of output array
1726 :
1727 : integer, intent(out) :: lons(londim) ! array of global longitude
1728 : ! indices
1729 :
1730 : !---------------------------Local workspace-----------------------------
1731 : integer :: i ! loop index
1732 : integer :: lat ! latitude index
1733 : integer :: cid ! global chunk id
1734 : integer :: gcol ! global column id in latlon
1735 : ! ordering
1736 :
1737 : !-----------------------------------------------------------------------
1738 0 : cid = lchunks(lcid)%cid
1739 0 : do i=1,chunks(cid)%ncols
1740 0 : lat = chunks(cid)%lat(i)
1741 0 : gcol = dyn_to_latlon_gcol_map(chunks(cid)%gcol(i))
1742 0 : lons(i) = (gcol - clat_p_idx(lat)) + 1
1743 : enddo
1744 :
1745 0 : return
1746 : end subroutine get_lon_all_p
1747 : !
1748 : !========================================================================
1749 :
1750 0 : subroutine get_lon_vec_p(lcid, lth, cols, lons)
1751 : !-----------------------------------------------------------------------
1752 : !
1753 : ! Purpose:
1754 : ! Was: Return global longitude indices for set of chunk columns.
1755 : ! Now: Return longitude offsets (+1) for set of chunk columns.
1756 : ! These are offsets in ordered list of global columns from first
1757 : ! column with given latitude to column with given latitude
1758 : ! and longitude. This corresponds to the usual longitude indices
1759 : ! for full and reduced lon/lat grids.
1760 : !
1761 : ! Method:
1762 : !
1763 : ! Author: Patrick Worley
1764 : !
1765 : !-----------------------------------------------------------------------
1766 : use ppgrid
1767 : !------------------------------Arguments--------------------------------
1768 : integer, intent(in) :: lcid ! local chunk id
1769 : integer, intent(in) :: lth ! number of column indices
1770 : integer, intent(in) :: cols(lth) ! column indices
1771 :
1772 : integer, intent(out) :: lons(lth) ! array of global longitude indices
1773 :
1774 : !---------------------------Local workspace-----------------------------
1775 : integer :: i ! loop index
1776 : integer :: lat ! latitude index
1777 : integer :: cid ! global chunk id
1778 : integer :: gcol ! global column id in latlon
1779 : ! ordering
1780 :
1781 : !-----------------------------------------------------------------------
1782 0 : cid = lchunks(lcid)%cid
1783 0 : do i=1,lth
1784 0 : lat = chunks(cid)%lat(cols(i))
1785 0 : gcol = dyn_to_latlon_gcol_map(chunks(cid)%gcol(i))
1786 0 : lons(i) = (gcol - clat_p_idx(lat)) + 1
1787 : enddo
1788 :
1789 0 : return
1790 : end subroutine get_lon_vec_p
1791 : !
1792 : !========================================================================
1793 :
1794 110592 : integer function get_lon_p(lcid, col)
1795 : !-----------------------------------------------------------------------
1796 : !
1797 : ! Purpose:
1798 : ! Was: Return global longitude index for chunk column.
1799 : ! Now: Return longitude offset (+1) for chunk column. This is the
1800 : ! offset in ordered list of global columns from first
1801 : ! column with given latitude to column with given latitude
1802 : ! and longitude. This corresponds to the usual longitude index
1803 : ! for full and reduced lon/lat grids.
1804 : !
1805 : ! Method:
1806 : !
1807 : ! Author: Patrick Worley
1808 : !
1809 : !-----------------------------------------------------------------------
1810 : use ppgrid
1811 : !------------------------------Arguments--------------------------------
1812 : integer, intent(in) :: lcid ! local chunk id
1813 : integer, intent(in) :: col ! column index
1814 :
1815 : !---------------------------Local workspace-----------------------------
1816 : integer :: cid ! global chunk id
1817 : integer :: lat ! latitude index
1818 : integer :: gcol ! global column id in latlon
1819 : ! ordering
1820 :
1821 : !-----------------------------------------------------------------------
1822 110592 : cid = lchunks(lcid)%cid
1823 110592 : lat = chunks(cid)%lat(col)
1824 110592 : gcol = dyn_to_latlon_gcol_map(chunks(cid)%gcol(col))
1825 110592 : get_lon_p = (gcol - clat_p_idx(lat)) + 1
1826 :
1827 : return
1828 : end function get_lon_p
1829 : !
1830 : !========================================================================
1831 : !
1832 2668800 : subroutine get_rlat_all_p(lcid, rlatdim, rlats)
1833 : !-----------------------------------------------------------------------
1834 : !
1835 : ! Purpose: Return all latitudes (in radians) for chunk
1836 : !
1837 : ! Method:
1838 : !
1839 : ! Author: Patrick Worley
1840 : !
1841 : !-----------------------------------------------------------------------
1842 : use ppgrid
1843 : !------------------------------Arguments--------------------------------
1844 : integer, intent(in) :: lcid ! local chunk id
1845 : integer, intent(in) :: rlatdim ! declared size of output array
1846 :
1847 : real(r8), intent(out) :: rlats(rlatdim)! array of latitudes
1848 :
1849 : !---------------------------Local workspace-----------------------------
1850 : integer :: i ! loop index
1851 : integer :: cid ! global chunk id
1852 :
1853 : !-----------------------------------------------------------------------
1854 2668800 : cid = lchunks(lcid)%cid
1855 41099520 : do i=1,chunks(cid)%ncols
1856 41099520 : rlats(i) = clat_p(chunks(cid)%lat(i))
1857 : enddo
1858 :
1859 2668800 : return
1860 : end subroutine get_rlat_all_p
1861 : !
1862 : !========================================================================
1863 : !
1864 145920 : subroutine get_area_all_p(lcid, rdim, area)
1865 : !-----------------------------------------------------------------------
1866 : !
1867 : ! Purpose: Return all areas for chunk
1868 : !
1869 : ! Method:
1870 : !
1871 : ! Author: Patrick Worley
1872 : !
1873 : !-----------------------------------------------------------------------
1874 : use ppgrid
1875 : !------------------------------Arguments--------------------------------
1876 : integer, intent(in) :: lcid ! local chunk id
1877 : integer, intent(in) :: rdim ! declared size of output array
1878 :
1879 : real(r8), intent(out) :: area(rdim) ! array of areas
1880 :
1881 : !---------------------------Local workspace-----------------------------
1882 : integer :: i ! loop index
1883 :
1884 : !-----------------------------------------------------------------------
1885 2247168 : do i=1,lchunks(lcid)%ncols
1886 2247168 : area(i) = lchunks(lcid)%area(i)
1887 : enddo
1888 :
1889 145920 : return
1890 : end subroutine get_area_all_p
1891 : !
1892 : !========================================================================
1893 : !
1894 3483648 : real(r8) function get_area_p(lcid, col)
1895 : !-----------------------------------------------------------------------
1896 : !
1897 : ! Purpose: Return area for chunk column
1898 : !
1899 : ! Method:
1900 : !
1901 : ! Author: Patrick Worley
1902 : !
1903 : !-----------------------------------------------------------------------
1904 : use ppgrid
1905 : !------------------------------Arguments--------------------------------
1906 : integer, intent(in) :: lcid ! local chunk id
1907 : integer, intent(in) :: col ! column index
1908 :
1909 : !-----------------------------------------------------------------------
1910 3483648 : get_area_p = lchunks(lcid)%area(col)
1911 :
1912 : return
1913 : end function get_area_p
1914 : !
1915 : !========================================================================
1916 : !
1917 917760 : subroutine get_wght_all_p(lcid, rdim, wght)
1918 : !-----------------------------------------------------------------------
1919 : !
1920 : ! Purpose: Return all integration weights for chunk
1921 : !
1922 : ! Method:
1923 : !
1924 : ! Author: Patrick Worley
1925 : !
1926 : !-----------------------------------------------------------------------
1927 : use ppgrid
1928 : !------------------------------Arguments--------------------------------
1929 : integer, intent(in) :: lcid ! local chunk id
1930 : integer, intent(in) :: rdim ! declared size of output array
1931 :
1932 : real(r8), intent(out) :: wght(rdim) ! array of integration weights
1933 :
1934 : !---------------------------Local workspace-----------------------------
1935 : integer :: i ! loop index
1936 :
1937 : !-----------------------------------------------------------------------
1938 14133504 : do i=1,lchunks(lcid)%ncols
1939 14133504 : wght(i) = lchunks(lcid)%wght(i)
1940 : enddo
1941 :
1942 917760 : return
1943 : end subroutine get_wght_all_p
1944 : !
1945 : !========================================================================
1946 : !
1947 0 : real(r8) function get_wght_p(lcid, col)
1948 : !-----------------------------------------------------------------------
1949 : !
1950 : ! Purpose: Return integration weight for chunk column
1951 : !
1952 : ! Method:
1953 : !
1954 : ! Author: Patrick Worley
1955 : !
1956 : !-----------------------------------------------------------------------
1957 : use ppgrid
1958 : !------------------------------Arguments--------------------------------
1959 : integer, intent(in) :: lcid ! local chunk id
1960 : integer, intent(in) :: col ! column index
1961 :
1962 : !-----------------------------------------------------------------------
1963 0 : get_wght_p = lchunks(lcid)%wght(col)
1964 :
1965 : return
1966 : end function get_wght_p
1967 : !
1968 : !========================================================================
1969 : !
1970 0 : subroutine get_rlat_vec_p(lcid, lth, cols, rlats)
1971 : !-----------------------------------------------------------------------
1972 : !
1973 : ! Purpose: Return latitudes (in radians) for set of chunk columns
1974 : !
1975 : ! Method:
1976 : !
1977 : ! Author: Patrick Worley
1978 : !
1979 : !-----------------------------------------------------------------------
1980 : use ppgrid
1981 : !------------------------------Arguments--------------------------------
1982 : integer, intent(in) :: lcid ! local chunk id
1983 : integer, intent(in) :: lth ! number of column indices
1984 : integer, intent(in) :: cols(lth) ! column indices
1985 :
1986 : real(r8), intent(out) :: rlats(lth) ! array of latitudes
1987 :
1988 : !---------------------------Local workspace-----------------------------
1989 : integer :: i ! loop index
1990 : integer :: cid ! global chunk id
1991 :
1992 : !-----------------------------------------------------------------------
1993 0 : cid = lchunks(lcid)%cid
1994 0 : do i=1,lth
1995 0 : rlats(i) = clat_p(chunks(cid)%lat(cols(i)))
1996 : enddo
1997 :
1998 0 : return
1999 : end subroutine get_rlat_vec_p
2000 : !
2001 : !========================================================================
2002 :
2003 0 : real(r8) function get_rlat_p(lcid, col)
2004 : !-----------------------------------------------------------------------
2005 : !
2006 : ! Purpose: Return latitude (in radians) for chunk column
2007 : !
2008 : ! Method:
2009 : !
2010 : ! Author: Patrick Worley
2011 : !
2012 : !-----------------------------------------------------------------------
2013 : use ppgrid
2014 : !------------------------------Arguments--------------------------------
2015 : integer, intent(in) :: lcid ! local chunk id
2016 : integer, intent(in) :: col ! column index
2017 :
2018 : !---------------------------Local workspace-----------------------------
2019 : integer :: cid ! global chunk id
2020 :
2021 : !-----------------------------------------------------------------------
2022 0 : cid = lchunks(lcid)%cid
2023 0 : get_rlat_p = clat_p(chunks(cid)%lat(col))
2024 :
2025 : return
2026 : end function get_rlat_p
2027 : !
2028 : !========================================================================
2029 : !
2030 2522880 : subroutine get_rlon_all_p(lcid, rlondim, rlons)
2031 : !-----------------------------------------------------------------------
2032 : !
2033 : ! Purpose: Return all longitudes (in radians) for chunk
2034 : !
2035 : ! Method:
2036 : !
2037 : ! Author: Patrick Worley
2038 : !
2039 : !-----------------------------------------------------------------------
2040 : use ppgrid
2041 : !------------------------------Arguments--------------------------------
2042 : integer, intent(in) :: lcid ! local chunk id
2043 : integer, intent(in) :: rlondim ! declared size of output array
2044 :
2045 : real(r8), intent(out) :: rlons(rlondim)! array of longitudes
2046 :
2047 : !---------------------------Local workspace-----------------------------
2048 : integer :: i ! loop index
2049 : integer :: cid ! global chunk id
2050 :
2051 : !-----------------------------------------------------------------------
2052 2522880 : cid = lchunks(lcid)%cid
2053 38852352 : do i=1,chunks(cid)%ncols
2054 38852352 : rlons(i) = clon_p(chunks(cid)%lon(i))
2055 : enddo
2056 :
2057 2522880 : return
2058 : end subroutine get_rlon_all_p
2059 : !
2060 : !========================================================================
2061 :
2062 0 : subroutine get_rlon_vec_p(lcid, lth, cols, rlons)
2063 : !-----------------------------------------------------------------------
2064 : !
2065 : ! Purpose: Return longitudes (in radians) for set of chunk columns
2066 : !
2067 : ! Method:
2068 : !
2069 : ! Author: Patrick Worley
2070 : !
2071 : !-----------------------------------------------------------------------
2072 : use ppgrid
2073 : !------------------------------Arguments--------------------------------
2074 : integer, intent(in) :: lcid ! local chunk id
2075 : integer, intent(in) :: lth ! number of column indices
2076 : integer, intent(in) :: cols(lth) ! column indices
2077 :
2078 : real(r8), intent(out) :: rlons(lth) ! array of longitudes
2079 :
2080 : !---------------------------Local workspace-----------------------------
2081 : integer :: i ! loop index
2082 : integer :: cid ! global chunk id
2083 :
2084 : !-----------------------------------------------------------------------
2085 0 : cid = lchunks(lcid)%cid
2086 0 : do i=1,lth
2087 0 : rlons(i) = clon_p(chunks(cid)%lon(cols(i)))
2088 : enddo
2089 :
2090 0 : return
2091 : end subroutine get_rlon_vec_p
2092 : !
2093 : !========================================================================
2094 :
2095 0 : real(r8) function get_rlon_p(lcid, col)
2096 : !-----------------------------------------------------------------------
2097 : !
2098 : ! Purpose: Return longitude (in radians) for chunk column
2099 : !
2100 : ! Method:
2101 : !
2102 : ! Author: Patrick Worley
2103 : !
2104 : !-----------------------------------------------------------------------
2105 : use ppgrid
2106 : !------------------------------Arguments--------------------------------
2107 : integer, intent(in) :: lcid ! local chunk id
2108 : integer, intent(in) :: col ! column index
2109 :
2110 : !---------------------------Local workspace-----------------------------
2111 : integer :: cid ! global chunk id
2112 :
2113 : !-----------------------------------------------------------------------
2114 0 : cid = lchunks(lcid)%cid
2115 0 : get_rlon_p = clon_p(chunks(cid)%lon(col))
2116 :
2117 : return
2118 : end function get_rlon_p
2119 : !
2120 : !========================================================================
2121 : !
2122 :
2123 0 : subroutine scatter_field_to_chunk(fdim,mdim,ldim, &
2124 0 : hdim1d,globalfield,localchunks)
2125 : !-----------------------------------------------------------------------
2126 : !
2127 : ! Purpose: Distribute field
2128 : ! to decomposed chunk data structure
2129 : !
2130 : ! Method:
2131 : !
2132 : ! Author: Patrick Worley
2133 : !
2134 :
2135 : !------------------------------Arguments--------------------------------
2136 : integer, intent(in) :: fdim ! declared length of first dimension
2137 : integer, intent(in) :: mdim ! declared length of middle dimension
2138 : integer, intent(in) :: ldim ! declared length of last dimension
2139 : integer, intent(in) :: hdim1d ! declared first horizontal index
2140 : ! dimension
2141 : real(r8), intent(in) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim)
2142 : ! global field
2143 :
2144 : real(r8), intent(out):: localchunks(fdim,pcols,mdim, &
2145 : begchunk:endchunk,ldim)
2146 : ! local chunks
2147 :
2148 : !---------------------------Local workspace-----------------------------
2149 : integer :: f,i,m,l,p ! loop indices
2150 : integer :: cid ! global chunk id
2151 : integer :: lcid ! local chunk id
2152 : integer :: lid ! local column index
2153 : integer :: gcol ! global column index
2154 : integer :: h1 ! first horizontal dimension index
2155 : integer :: h2 ! second horizontal dimension index
2156 :
2157 : #if ( defined SPMD )
2158 0 : real(r8) gfield_p(fdim,mdim,ldim,ngcols)
2159 : ! vector to be scattered
2160 0 : real(r8) lfield_p(fdim,mdim,ldim,nlcols)
2161 : ! local component of scattered
2162 : ! vector
2163 0 : integer :: displs(0:npes-1) ! scatter displacements
2164 0 : integer :: sndcnts(0:npes-1) ! scatter send counts
2165 : integer :: recvcnt ! scatter receive count
2166 : integer :: beglcol ! beginning index for local columns
2167 : ! in global column ordering
2168 : #endif
2169 :
2170 : !-----------------------------------------------------------------------
2171 0 : if (hdim1d < hdim1_d) then
2172 0 : write(iulog,*) __FILE__,__LINE__,hdim1d,hdim1_d
2173 0 : call endrun ('SCATTER_FIELD_TO_CHUNK error: hdim1d < hdim1_d')
2174 : endif
2175 0 : localchunks(:,:,:,:,:) = 0
2176 : #if ( defined SPMD )
2177 0 : displs(0) = 0
2178 0 : sndcnts(0) = fdim*mdim*ldim*gs_col_num(0)
2179 0 : beglcol = 0
2180 0 : do p=1,npes-1
2181 0 : displs(p) = displs(p-1) + sndcnts(p-1)
2182 0 : sndcnts(p) = fdim*mdim*ldim*gs_col_num(p)
2183 0 : if (p <= iam) then
2184 0 : beglcol = beglcol + gs_col_num(p-1)
2185 : endif
2186 : enddo
2187 0 : recvcnt = fdim*mdim*ldim*nlcols
2188 :
2189 0 : if (masterproc) then
2190 :
2191 : ! copy field into global (process-ordered) chunked data structure
2192 :
2193 0 : do l=1,ldim
2194 0 : do i=1,num_global_phys_cols
2195 0 : cid = pgcols(i)%chunk
2196 0 : lid = pgcols(i)%ccol
2197 0 : gcol = chunks(cid)%gcol(lid)
2198 0 : h2 = (gcol-1)/hdim1_d + 1
2199 0 : h1 = mod((gcol-1),hdim1_d) + 1
2200 0 : do m=1,mdim
2201 0 : do f=1,fdim
2202 0 : gfield_p(f,m,l,i) = &
2203 0 : globalfield(f, h1, m, h2, l)
2204 : end do
2205 : end do
2206 : end do
2207 : end do
2208 : endif
2209 :
2210 : ! scatter to other processes
2211 : ! (pgcols ordering consistent with begchunk:endchunk
2212 : ! local ordering)
2213 :
2214 0 : call t_barrierf('sync_scat_ftoc', mpicom)
2215 : call mpiscatterv(gfield_p, sndcnts, displs, mpir8, &
2216 0 : lfield_p, recvcnt, mpir8, 0, mpicom)
2217 :
2218 : ! copy into local chunked data structure
2219 :
2220 0 : do i=1,nlcols
2221 0 : cid = pgcols(beglcol+i)%chunk
2222 0 : lcid = chunks(cid)%lcid
2223 0 : lid = pgcols(beglcol+i)%ccol
2224 0 : do l=1,ldim
2225 0 : do m=1,mdim
2226 0 : do f=1,fdim
2227 0 : localchunks(f,lid,m,lcid,l) = &
2228 0 : lfield_p(f, m, l, i)
2229 : end do
2230 : end do
2231 : end do
2232 : end do
2233 : #else
2234 :
2235 : ! copy field into chunked data structure
2236 : ! (pgcol ordering chosen to reflect begchunk:endchunk
2237 : ! local ordering)
2238 :
2239 : do l=1,ldim
2240 : do i=1,num_global_phys_cols
2241 : cid = pgcols(i)%chunk
2242 : lcid = chunks(cid)%lcid
2243 : lid = pgcols(i)%ccol
2244 : gcol = chunks(cid)%gcol(lid)
2245 : h2 = (gcol-1)/hdim1_d + 1
2246 : h1 = mod((gcol-1),hdim1_d) + 1
2247 : do m=1,mdim
2248 : do f=1,fdim
2249 : localchunks(f,lid,m,lcid,l) = &
2250 : globalfield(f, h1, m, h2, l)
2251 : end do
2252 : end do
2253 : end do
2254 : end do
2255 :
2256 : #endif
2257 :
2258 0 : return
2259 : end subroutine scatter_field_to_chunk
2260 : !========================================================================
2261 :
2262 0 : subroutine scatter_field_to_chunk4(fdim,mdim,ldim, &
2263 0 : hdim1d,globalfield,localchunks)
2264 : !-----------------------------------------------------------------------
2265 : !
2266 : ! Purpose: Distribute field
2267 : ! to decomposed chunk data structure
2268 : !
2269 : ! Method:
2270 : !
2271 : ! Author: Patrick Worley
2272 : !
2273 : !-----------------------------------------------------------------------
2274 : !------------------------------Arguments--------------------------------
2275 : integer, intent(in) :: fdim ! declared length of first dimension
2276 : integer, intent(in) :: mdim ! declared length of middle dimension
2277 : integer, intent(in) :: ldim ! declared length of last dimension
2278 : integer, intent(in) :: hdim1d ! declared first horizontal index
2279 : ! dimension
2280 : real(r4), intent(in) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim)
2281 : ! global field
2282 :
2283 : real(r4), intent(out):: localchunks(fdim,pcols,mdim, &
2284 : begchunk:endchunk,ldim)
2285 : ! local chunks
2286 :
2287 : !---------------------------Local workspace-----------------------------
2288 : integer :: f,i,m,l,p ! loop indices
2289 : integer :: cid ! global chunk id
2290 : integer :: lcid ! local chunk id
2291 : integer :: lid ! local column index
2292 : integer :: gcol ! global column index
2293 : integer :: h1 ! first horizontal dimension index
2294 : integer :: h2 ! second horizontal dimension index
2295 :
2296 : #if ( defined SPMD )
2297 0 : real(r4) gfield_p(fdim,mdim,ldim,ngcols)
2298 : ! vector to be scattered
2299 0 : real(r4) lfield_p(fdim,mdim,ldim,nlcols)
2300 : ! local component of scattered
2301 : ! vector
2302 0 : integer :: displs(0:npes-1) ! scatter displacements
2303 0 : integer :: sndcnts(0:npes-1) ! scatter send counts
2304 : integer :: recvcnt ! scatter receive count
2305 : integer :: beglcol ! beginning index for local columns
2306 : ! in global column ordering
2307 : #endif
2308 :
2309 : !-----------------------------------------------------------------------
2310 0 : if (hdim1d < hdim1_d) then
2311 0 : call endrun ('SCATTER_FIELD_TO_CHUNK4 error: hdim1d < hdim1_d')
2312 : endif
2313 : #if ( defined SPMD )
2314 0 : displs(0) = 0
2315 0 : sndcnts(0) = fdim*mdim*ldim*gs_col_num(0)
2316 0 : beglcol = 0
2317 0 : do p=1,npes-1
2318 0 : displs(p) = displs(p-1) + sndcnts(p-1)
2319 0 : sndcnts(p) = fdim*mdim*ldim*gs_col_num(p)
2320 0 : if (p <= iam) then
2321 0 : beglcol = beglcol + gs_col_num(p-1)
2322 : endif
2323 : enddo
2324 0 : recvcnt = fdim*mdim*ldim*nlcols
2325 :
2326 0 : if (masterproc) then
2327 : ! copy field into global (process-ordered) chunked data structure
2328 0 : do l=1,ldim
2329 0 : do i=1,num_global_phys_cols
2330 0 : cid = pgcols(i)%chunk
2331 0 : lid = pgcols(i)%ccol
2332 0 : gcol = chunks(cid)%gcol(lid)
2333 0 : h2 = (gcol-1)/hdim1_d + 1
2334 0 : h1 = mod((gcol-1),hdim1_d) + 1
2335 0 : do m=1,mdim
2336 0 : do f=1,fdim
2337 0 : gfield_p(f,m,l,i) = &
2338 0 : globalfield(f, h1, m, h2, l)
2339 : end do
2340 : end do
2341 : end do
2342 : end do
2343 : endif
2344 :
2345 : ! scatter to other processes
2346 : ! (pgcols ordering consistent with begchunk:endchunk
2347 : ! local ordering)
2348 :
2349 0 : call t_barrierf('sync_scat_ftoc', mpicom)
2350 : call mpiscatterv(gfield_p, sndcnts, displs, mpir4, &
2351 0 : lfield_p, recvcnt, mpir4, 0, mpicom)
2352 :
2353 : ! copy into local chunked data structure
2354 :
2355 0 : do i=1,nlcols
2356 0 : cid = pgcols(beglcol+i)%chunk
2357 0 : lcid = chunks(cid)%lcid
2358 0 : lid = pgcols(beglcol+i)%ccol
2359 0 : do l=1,ldim
2360 0 : do m=1,mdim
2361 0 : do f=1,fdim
2362 0 : localchunks(f,lid,m,lcid,l) = &
2363 0 : lfield_p(f, m, l, i)
2364 : end do
2365 : end do
2366 : end do
2367 : end do
2368 : #else
2369 :
2370 : ! copy field into chunked data structure
2371 : ! (pgcol ordering chosen to reflect begchunk:endchunk
2372 : ! local ordering)
2373 : do l=1,ldim
2374 : do i=1,num_global_phys_cols
2375 : cid = pgcols(i)%chunk
2376 : lcid = chunks(cid)%lcid
2377 : lid = pgcols(i)%ccol
2378 : gcol = chunks(cid)%gcol(lid)
2379 : h2 = (gcol-1)/hdim1_d + 1
2380 : h1 = mod((gcol-1),hdim1_d) + 1
2381 : do m=1,mdim
2382 : do f=1,fdim
2383 : localchunks(f,lid,m,lcid,l) = &
2384 : globalfield(f, h1, m, h2, l)
2385 : end do
2386 : end do
2387 : end do
2388 : end do
2389 :
2390 : #endif
2391 :
2392 0 : return
2393 : end subroutine scatter_field_to_chunk4
2394 : !========================================================================
2395 :
2396 0 : subroutine scatter_field_to_chunk_int(fdim,mdim,ldim, &
2397 0 : hdim1d,globalfield,localchunks)
2398 : !-----------------------------------------------------------------------
2399 : !
2400 : ! Purpose: Distribute field
2401 : ! to decomposed chunk data structure
2402 : !
2403 : ! Method:
2404 : !
2405 : ! Author: Patrick Worley
2406 : !
2407 : !------------------------------Arguments--------------------------------
2408 : integer, intent(in) :: fdim ! declared length of first dimension
2409 : integer, intent(in) :: mdim ! declared length of middle dimension
2410 : integer, intent(in) :: ldim ! declared length of last dimension
2411 : integer, intent(in) :: hdim1d ! declared first horizontal index
2412 : ! dimension
2413 : integer, intent(in) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim)
2414 : ! global field
2415 :
2416 : integer, intent(out):: localchunks(fdim,pcols,mdim, &
2417 : begchunk:endchunk,ldim)
2418 : ! local chunks
2419 :
2420 : !---------------------------Local workspace-----------------------------
2421 : integer :: f,i,m,l,p ! loop indices
2422 : integer :: cid ! global chunk id
2423 : integer :: lcid ! local chunk id
2424 : integer :: lid ! local column index
2425 : integer :: gcol ! global column index
2426 : integer :: h1 ! first horizontal dimension index
2427 : integer :: h2 ! second horizontal dimension index
2428 :
2429 : #if ( defined SPMD )
2430 0 : integer gfield_p(fdim,mdim,ldim,ngcols)
2431 : ! vector to be scattered
2432 0 : integer lfield_p(fdim,mdim,ldim,nlcols)
2433 : ! local component of scattered
2434 : ! vector
2435 0 : integer :: displs(0:npes-1) ! scatter displacements
2436 0 : integer :: sndcnts(0:npes-1) ! scatter send counts
2437 : integer :: recvcnt ! scatter receive count
2438 : integer :: beglcol ! beginning index for local columns
2439 : ! in global column ordering
2440 : #endif
2441 :
2442 : !-----------------------------------------------------------------------
2443 0 : if (hdim1d < hdim1_d) then
2444 0 : call endrun ('SCATTER_FIELD_TO_CHUNK_INT error: hdim1d < hdim1_d')
2445 : endif
2446 : #if ( defined SPMD )
2447 0 : displs(0) = 0
2448 0 : sndcnts(0) = fdim*mdim*ldim*gs_col_num(0)
2449 0 : beglcol = 0
2450 0 : do p=1,npes-1
2451 0 : displs(p) = displs(p-1) + sndcnts(p-1)
2452 0 : sndcnts(p) = fdim*mdim*ldim*gs_col_num(p)
2453 0 : if (p <= iam) then
2454 0 : beglcol = beglcol + gs_col_num(p-1)
2455 : endif
2456 : enddo
2457 0 : recvcnt = fdim*mdim*ldim*nlcols
2458 :
2459 0 : if (masterproc) then
2460 :
2461 : ! copy field into global (process-ordered) chunked data structure
2462 :
2463 0 : do l=1,ldim
2464 0 : do i=1,num_global_phys_cols
2465 0 : cid = pgcols(i)%chunk
2466 0 : lid = pgcols(i)%ccol
2467 0 : gcol = chunks(cid)%gcol(lid)
2468 0 : h2 = (gcol-1)/hdim1_d + 1
2469 0 : h1 = mod((gcol-1),hdim1_d) + 1
2470 0 : do m=1,mdim
2471 0 : do f=1,fdim
2472 0 : gfield_p(f,m,l,i) = &
2473 0 : globalfield(f, h1, m, h2, l)
2474 : end do
2475 : end do
2476 : end do
2477 : end do
2478 : endif
2479 :
2480 : ! scatter to other processes
2481 : ! (pgcols ordering consistent with begchunk:endchunk
2482 : ! local ordering)
2483 :
2484 0 : call t_barrierf('sync_scat_ftoc', mpicom)
2485 : call mpiscatterv(gfield_p, sndcnts, displs, mpiint, &
2486 0 : lfield_p, recvcnt, mpiint, 0, mpicom)
2487 :
2488 : ! copy into local chunked data structure
2489 :
2490 0 : do i=1,nlcols
2491 0 : cid = pgcols(beglcol+i)%chunk
2492 0 : lcid = chunks(cid)%lcid
2493 0 : lid = pgcols(beglcol+i)%ccol
2494 0 : do l=1,ldim
2495 0 : do m=1,mdim
2496 0 : do f=1,fdim
2497 0 : localchunks(f,lid,m,lcid,l) = &
2498 0 : lfield_p(f, m, l, i)
2499 : end do
2500 : end do
2501 : end do
2502 : end do
2503 : #else
2504 :
2505 : ! copy field into chunked data structure
2506 : ! (pgcol ordering chosen to reflect begchunk:endchunk
2507 : ! local ordering)
2508 : do l=1,ldim
2509 : do i=1,num_global_phys_cols
2510 : cid = pgcols(i)%chunk
2511 : lcid = chunks(cid)%lcid
2512 : lid = pgcols(i)%ccol
2513 : gcol = chunks(cid)%gcol(lid)
2514 : h2 = (gcol-1)/hdim1_d + 1
2515 : h1 = mod((gcol-1),hdim1_d) + 1
2516 : do m=1,mdim
2517 : do f=1,fdim
2518 : localchunks(f,lid,m,lcid,l) = &
2519 : globalfield(f, h1, m, h2, l)
2520 : end do
2521 : end do
2522 : end do
2523 : end do
2524 :
2525 : #endif
2526 :
2527 0 : return
2528 : end subroutine scatter_field_to_chunk_int
2529 : !
2530 : !========================================================================
2531 : !
2532 0 : subroutine gather_chunk_to_field(fdim,mdim,ldim, &
2533 0 : hdim1d,localchunks,globalfield)
2534 :
2535 : !-----------------------------------------------------------------------
2536 : !
2537 : ! Purpose: Reconstruct field
2538 : ! from decomposed chunk data structure
2539 : !
2540 : ! Method:
2541 : !
2542 : ! Author: Patrick Worley
2543 : !
2544 : !-----------------------------------------------------------------------
2545 : #if ( defined SPMD )
2546 : use spmd_utils, only: fc_gatherv
2547 : #endif
2548 : !------------------------------Arguments--------------------------------
2549 : integer, intent(in) :: fdim ! declared length of first dimension
2550 : integer, intent(in) :: mdim ! declared length of middle dimension
2551 : integer, intent(in) :: ldim ! declared length of last dimension
2552 : integer, intent(in) :: hdim1d ! declared first horizontal index
2553 : ! dimension
2554 : real(r8), intent(in):: localchunks(fdim,pcols,mdim, &
2555 : begchunk:endchunk,ldim)
2556 : ! local chunks
2557 :
2558 : real(r8), intent(out) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim)
2559 : ! global field
2560 :
2561 : !---------------------------Local workspace-----------------------------
2562 : integer :: f,i,m,l,p ! loop indices
2563 : integer :: cid ! global chunk id
2564 : integer :: lcid ! local chunk id
2565 : integer :: lid ! local column index
2566 : integer :: gcol ! global column index
2567 : integer :: h1 ! first horizontal dimension index
2568 : integer :: h2 ! second horizontal dimension index
2569 :
2570 : #if ( defined SPMD )
2571 0 : real(r8) gfield_p(fdim,mdim,ldim,ngcols)
2572 : ! vector to be gathered
2573 0 : real(r8) lfield_p(fdim,mdim,ldim,nlcols)
2574 : ! local component of gather
2575 : ! vector
2576 0 : integer :: displs(0:npes-1) ! gather displacements
2577 0 : integer :: rcvcnts(0:npes-1) ! gather receive count
2578 : integer :: sendcnt ! gather send counts
2579 : integer :: beglcol ! beginning index for local columns
2580 : ! in global column ordering
2581 : #endif
2582 :
2583 : !-----------------------------------------------------------------------
2584 0 : if (hdim1d < hdim1_d) then
2585 0 : call endrun ('GATHER_CHUNK_TO_FIELD error: hdim1d < hdim1_d')
2586 : endif
2587 : #if ( defined SPMD )
2588 0 : displs(0) = 0
2589 0 : rcvcnts(0) = fdim*mdim*ldim*gs_col_num(0)
2590 0 : beglcol = 0
2591 0 : do p=1,npes-1
2592 0 : displs(p) = displs(p-1) + rcvcnts(p-1)
2593 0 : rcvcnts(p) = fdim*mdim*ldim*gs_col_num(p)
2594 0 : if (p <= iam) then
2595 0 : beglcol = beglcol + gs_col_num(p-1)
2596 : endif
2597 : enddo
2598 0 : sendcnt = fdim*mdim*ldim*nlcols
2599 :
2600 : ! copy into local gather data structure
2601 :
2602 0 : do l=1,ldim
2603 0 : do i=1,nlcols
2604 0 : cid = pgcols(beglcol+i)%chunk
2605 0 : lcid = chunks(cid)%lcid
2606 0 : lid = pgcols(beglcol+i)%ccol
2607 0 : do m=1,mdim
2608 0 : do f=1,fdim
2609 0 : lfield_p(f, m, l, i) = &
2610 0 : localchunks(f,lid,m,lcid,l)
2611 : end do
2612 : end do
2613 : end do
2614 : end do
2615 :
2616 : ! gather from other processes
2617 :
2618 0 : call t_barrierf('sync_gath_ctof', mpicom)
2619 : call fc_gatherv(lfield_p, sendcnt, mpir8, &
2620 0 : gfield_p, rcvcnts, displs, mpir8, 0, mpicom)
2621 :
2622 0 : if (masterproc) then
2623 :
2624 : ! copy gathered columns into lon/lat field
2625 :
2626 0 : do i=1,num_global_phys_cols
2627 0 : cid = pgcols(i)%chunk
2628 0 : lid = pgcols(i)%ccol
2629 0 : gcol = chunks(cid)%gcol(lid)
2630 0 : h2 = (gcol-1)/hdim1_d + 1
2631 0 : h1 = mod((gcol-1),hdim1_d) + 1
2632 0 : do l=1,ldim
2633 0 : do m=1,mdim
2634 0 : do f=1,fdim
2635 0 : globalfield(f, h1, m, h2, l) &
2636 0 : = gfield_p(f,m,l,i)
2637 : end do
2638 : end do
2639 : end do
2640 : end do
2641 : endif
2642 0 : call mpibarrier(mpicom)
2643 : #else
2644 :
2645 : ! copy chunked data structure into dynamics field
2646 : ! (pgcol ordering chosen to reflect begchunk:endchunk
2647 : ! local ordering)
2648 : do l=1,ldim
2649 : do i=1,num_global_phys_cols
2650 : cid = pgcols(i)%chunk
2651 : lcid = chunks(cid)%lcid
2652 : lid = pgcols(i)%ccol
2653 : gcol = chunks(cid)%gcol(lid)
2654 : h2 = (gcol-1)/hdim1_d + 1
2655 : h1 = mod((gcol-1),hdim1_d) + 1
2656 : do m=1,mdim
2657 : do f=1,fdim
2658 : globalfield(f, h1, m, h2, l) &
2659 : = localchunks(f,lid,m,lcid,l)
2660 : end do
2661 : end do
2662 : end do
2663 : end do
2664 :
2665 : #endif
2666 :
2667 0 : return
2668 : end subroutine gather_chunk_to_field
2669 :
2670 : !
2671 : !========================================================================
2672 : !
2673 0 : subroutine gather_chunk_to_field4 (fdim,mdim,ldim, &
2674 0 : hdim1d,localchunks,globalfield)
2675 :
2676 : !-----------------------------------------------------------------------
2677 : !
2678 : ! Purpose: Reconstruct field
2679 : ! from decomposed chunk data structure
2680 : !
2681 : ! Method:
2682 : !
2683 : ! Author: Patrick Worley
2684 : !
2685 : !-----------------------------------------------------------------------
2686 : #if ( defined SPMD )
2687 : use spmd_utils, only: fc_gathervr4
2688 : #endif
2689 : !------------------------------Arguments--------------------------------
2690 : integer, intent(in) :: fdim ! declared length of first dimension
2691 : integer, intent(in) :: mdim ! declared length of middle dimension
2692 : integer, intent(in) :: ldim ! declared length of last dimension
2693 : integer, intent(in) :: hdim1d ! declared first horizontal index
2694 : ! dimension
2695 : real(r4), intent(in):: localchunks(fdim,pcols,mdim, &
2696 : begchunk:endchunk,ldim)
2697 : ! local chunks
2698 :
2699 : real(r4), intent(out) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim)
2700 : ! global field
2701 :
2702 : !---------------------------Local workspace-----------------------------
2703 : integer :: f,i,m,l,p ! loop indices
2704 : integer :: cid ! global chunk id
2705 : integer :: lcid ! local chunk id
2706 : integer :: lid ! local column index
2707 : integer :: gcol ! global column index
2708 : integer :: h1 ! first horizontal dimension index
2709 : integer :: h2 ! second horizontal dimension index
2710 :
2711 : #if ( defined SPMD )
2712 0 : real(r4) gfield_p(fdim,mdim,ldim,ngcols)
2713 : ! vector to be gathered
2714 0 : real(r4) lfield_p(fdim,mdim,ldim,nlcols)
2715 : ! local component of gather
2716 : ! vector
2717 0 : integer :: displs(0:npes-1) ! gather displacements
2718 0 : integer :: rcvcnts(0:npes-1) ! gather receive count
2719 : integer :: sendcnt ! gather send counts
2720 : integer :: beglcol ! beginning index for local columns
2721 : ! in global column ordering
2722 : #endif
2723 :
2724 : !-----------------------------------------------------------------------
2725 0 : if (hdim1d < hdim1_d) then
2726 0 : call endrun ('GATHER_CHUNK_TO_FIELD4 error: hdim1d < hdim1_d')
2727 : endif
2728 : #if ( defined SPMD )
2729 0 : displs(0) = 0
2730 0 : rcvcnts(0) = fdim*mdim*ldim*gs_col_num(0)
2731 0 : beglcol = 0
2732 0 : do p=1,npes-1
2733 0 : displs(p) = displs(p-1) + rcvcnts(p-1)
2734 0 : rcvcnts(p) = fdim*mdim*ldim*gs_col_num(p)
2735 0 : if (p <= iam) then
2736 0 : beglcol = beglcol + gs_col_num(p-1)
2737 : endif
2738 : enddo
2739 0 : sendcnt = fdim*mdim*ldim*nlcols
2740 :
2741 : ! copy into local gather data structure
2742 :
2743 0 : do l=1,ldim
2744 0 : do i=1,nlcols
2745 0 : cid = pgcols(beglcol+i)%chunk
2746 0 : lcid = chunks(cid)%lcid
2747 0 : lid = pgcols(beglcol+i)%ccol
2748 0 : do m=1,mdim
2749 0 : do f=1,fdim
2750 0 : lfield_p(f, m, l, i) = &
2751 0 : localchunks(f,lid,m,lcid,l)
2752 : end do
2753 : end do
2754 : end do
2755 : end do
2756 :
2757 : ! gather from other processes
2758 :
2759 0 : call t_barrierf('sync_gath_ctof', mpicom)
2760 : call fc_gathervr4(lfield_p, sendcnt, mpir4, &
2761 0 : gfield_p, rcvcnts, displs, mpir4, 0, mpicom)
2762 :
2763 0 : if (masterproc) then
2764 :
2765 : ! copy gathered columns into lon/lat field
2766 :
2767 0 : do i=1,num_global_phys_cols
2768 0 : cid = pgcols(i)%chunk
2769 0 : lid = pgcols(i)%ccol
2770 0 : gcol = chunks(cid)%gcol(lid)
2771 0 : h2 = (gcol-1)/hdim1_d + 1
2772 0 : h1 = mod((gcol-1),hdim1_d) + 1
2773 0 : do l=1,ldim
2774 0 : do m=1,mdim
2775 0 : do f=1,fdim
2776 0 : globalfield(f, h1, m, h2, l) &
2777 0 : = gfield_p(f,m,l,i)
2778 : end do
2779 : end do
2780 : end do
2781 : end do
2782 : endif
2783 :
2784 : #else
2785 :
2786 : ! copy chunked data structure into dynamics field
2787 : ! (pgcol ordering chosen to reflect begchunk:endchunk
2788 : ! local ordering)
2789 :
2790 : do l=1,ldim
2791 : do i=1,num_global_phys_cols
2792 : cid = pgcols(i)%chunk
2793 : lcid = chunks(cid)%lcid
2794 : lid = pgcols(i)%ccol
2795 : gcol = chunks(cid)%gcol(lid)
2796 : h2 = (gcol-1)/hdim1_d + 1
2797 : h1 = mod((gcol-1),hdim1_d) + 1
2798 : do m=1,mdim
2799 : do f=1,fdim
2800 : globalfield(f, h1, m, h2, l) &
2801 : = localchunks(f,lid,m,lcid,l)
2802 : end do
2803 : end do
2804 : end do
2805 : end do
2806 :
2807 : #endif
2808 :
2809 0 : return
2810 : end subroutine gather_chunk_to_field4
2811 :
2812 : !
2813 : !========================================================================
2814 : !
2815 0 : subroutine gather_chunk_to_field_int (fdim,mdim,ldim, &
2816 0 : hdim1d,localchunks,globalfield)
2817 :
2818 : !-----------------------------------------------------------------------
2819 : !
2820 : ! Purpose: Reconstruct field
2821 : ! from decomposed chunk data structure
2822 : !
2823 : ! Method:
2824 : !
2825 : ! Author: Patrick Worley
2826 : !
2827 : !-----------------------------------------------------------------------
2828 : #if ( defined SPMD )
2829 : use spmd_utils, only: fc_gathervint
2830 : #endif
2831 : !------------------------------Arguments--------------------------------
2832 : integer, intent(in) :: fdim ! declared length of first dimension
2833 : integer, intent(in) :: mdim ! declared length of middle dimension
2834 : integer, intent(in) :: ldim ! declared length of last dimension
2835 : integer, intent(in) :: hdim1d ! declared first horizontal index
2836 : ! dimension
2837 : integer, intent(in):: localchunks(fdim,pcols,mdim,begchunk:endchunk,ldim) ! local chunks
2838 :
2839 : integer, intent(out) :: globalfield(fdim,hdim1d,mdim,hdim2_d,ldim) ! global field
2840 :
2841 : !---------------------------Local workspace-----------------------------
2842 :
2843 : integer :: f,i,m,l,p ! loop indices
2844 : integer :: cid ! global chunk id
2845 : integer :: lcid ! local chunk id
2846 : integer :: lid ! local column index
2847 : integer :: gcol ! global column index
2848 : integer :: h1 ! first horizontal dimension index
2849 : integer :: h2 ! second horizontal dimension index
2850 :
2851 : #if ( defined SPMD )
2852 0 : integer gfield_p(fdim,mdim,ldim,ngcols)
2853 : ! vector to be gathered
2854 0 : integer lfield_p(fdim,mdim,ldim,nlcols)
2855 : ! local component of gather
2856 : ! vector
2857 0 : integer :: displs(0:npes-1) ! gather displacements
2858 0 : integer :: rcvcnts(0:npes-1) ! gather receive count
2859 : integer :: sendcnt ! gather send counts
2860 : integer :: beglcol ! beginning index for local columns
2861 : ! in global column ordering
2862 : #endif
2863 :
2864 : !-----------------------------------------------------------------------
2865 0 : if (hdim1d < hdim1_d) then
2866 0 : call endrun ('GATHER_CHUNK_TO_FIELD_INT error: hdim1d < hdim1_d')
2867 : endif
2868 : #if ( defined SPMD )
2869 0 : displs(0) = 0
2870 0 : rcvcnts(0) = fdim*mdim*ldim*gs_col_num(0)
2871 0 : beglcol = 0
2872 0 : do p=1,npes-1
2873 0 : displs(p) = displs(p-1) + rcvcnts(p-1)
2874 0 : rcvcnts(p) = fdim*mdim*ldim*gs_col_num(p)
2875 0 : if (p <= iam) then
2876 0 : beglcol = beglcol + gs_col_num(p-1)
2877 : endif
2878 : enddo
2879 0 : sendcnt = fdim*mdim*ldim*nlcols
2880 :
2881 : ! copy into local gather data structure
2882 :
2883 0 : do l=1,ldim
2884 0 : do i=1,nlcols
2885 0 : cid = pgcols(beglcol+i)%chunk
2886 0 : lcid = chunks(cid)%lcid
2887 0 : lid = pgcols(beglcol+i)%ccol
2888 0 : do m=1,mdim
2889 0 : do f=1,fdim
2890 0 : lfield_p(f, m, l, i) = &
2891 0 : localchunks(f,lid,m,lcid,l)
2892 : end do
2893 : end do
2894 : end do
2895 : end do
2896 :
2897 : ! gather from other processes
2898 :
2899 0 : call t_barrierf('sync_gath_ctof', mpicom)
2900 : call fc_gathervint(lfield_p, sendcnt, mpiint, &
2901 0 : gfield_p, rcvcnts, displs, mpiint, 0, mpicom)
2902 :
2903 0 : if (masterproc) then
2904 :
2905 : ! copy gathered columns into lon/lat field
2906 :
2907 0 : do i=1,num_global_phys_cols
2908 0 : cid = pgcols(i)%chunk
2909 0 : lid = pgcols(i)%ccol
2910 0 : gcol = chunks(cid)%gcol(lid)
2911 0 : h2 = (gcol-1)/hdim1_d + 1
2912 0 : h1 = mod((gcol-1),hdim1_d) + 1
2913 0 : do l=1,ldim
2914 0 : do m=1,mdim
2915 0 : do f=1,fdim
2916 0 : globalfield(f, h1, m, h2, l) &
2917 0 : = gfield_p(f,m,l,i)
2918 : end do
2919 : end do
2920 : end do
2921 : end do
2922 : endif
2923 :
2924 : #else
2925 :
2926 : ! copy chunked data structure into lon/lat field
2927 : ! (pgcol ordering chosen to reflect begchunk:endchunk
2928 : ! local ordering)
2929 : do l=1,ldim
2930 : do i=1,num_global_phys_cols
2931 : cid = pgcols(i)%chunk
2932 : lcid = chunks(cid)%lcid
2933 : lid = pgcols(i)%ccol
2934 : gcol = chunks(cid)%gcol(lid)
2935 : h2 = (gcol-1)/hdim1_d + 1
2936 : h1 = mod((gcol-1),hdim1_d) + 1
2937 : do m=1,mdim
2938 : do f=1,fdim
2939 : globalfield(f, h1, m, h2, l) &
2940 : = localchunks(f,lid,m,lcid,l)
2941 : end do
2942 : end do
2943 : end do
2944 : end do
2945 :
2946 : #endif
2947 :
2948 0 : return
2949 : end subroutine gather_chunk_to_field_int
2950 :
2951 : !
2952 : !========================================================================
2953 : !
2954 0 : subroutine write_field_from_chunk(iu,fdim,mdim,ldim,localchunks)
2955 :
2956 : !-----------------------------------------------------------------------
2957 : !
2958 : !
2959 : ! Purpose: Write field from decomposed chunk data
2960 : ! structure
2961 : !
2962 : ! Method:
2963 : !
2964 : ! Author: Patrick Worley
2965 : !
2966 : !------------------------------Arguments--------------------------------
2967 : integer, intent(in) :: iu ! logical unit
2968 : integer, intent(in) :: fdim ! declared length of first dimension
2969 : integer, intent(in) :: mdim ! declared length of middle dimension
2970 : integer, intent(in) :: ldim ! declared length of last dimension
2971 : real(r8), intent(in):: localchunks(fdim,pcols,mdim,begchunk:endchunk,ldim) ! local chunks
2972 :
2973 : !---------------------------Local workspace-----------------------------
2974 :
2975 : integer :: ioerr ! error return
2976 :
2977 0 : real(r8), allocatable :: globalfield(:,:,:,:,:)
2978 : ! global field
2979 : !-----------------------------------------------------------------------
2980 :
2981 0 : allocate(globalfield(fdim,hdim1_d,mdim,hdim2_d,ldim))
2982 :
2983 0 : call gather_chunk_to_field (fdim,mdim,ldim,hdim1_d,localchunks,globalfield)
2984 :
2985 0 : if (masterproc) then
2986 0 : write (iu,iostat=ioerr) globalfield
2987 0 : if (ioerr /= 0 ) then
2988 0 : write(iulog,*) 'WRITE_FIELD_FROM_CHUNK ioerror ', ioerr,' on i/o unit = ',iu
2989 0 : call endrun
2990 : end if
2991 : endif
2992 :
2993 0 : deallocate(globalfield)
2994 :
2995 0 : return
2996 : end subroutine write_field_from_chunk
2997 :
2998 : !
2999 : !========================================================================
3000 : !
3001 0 : subroutine read_chunk_from_field(iu,fdim,mdim,ldim,localchunks)
3002 :
3003 : !-----------------------------------------------------------------------
3004 : !
3005 : !
3006 : ! Purpose: Write field from decomposed chunk data
3007 : ! structure
3008 : !
3009 : ! Method:
3010 : !
3011 : ! Author: Patrick Worley
3012 : !
3013 : !------------------------------Arguments--------------------------------
3014 : integer, intent(in) :: iu ! logical unit
3015 : integer, intent(in) :: fdim ! declared length of first dimension
3016 : integer, intent(in) :: mdim ! declared length of middle dimension
3017 : integer, intent(in) :: ldim ! declared length of last dimension
3018 :
3019 : real(r8), intent(out):: localchunks(fdim,pcols,mdim,begchunk:endchunk,ldim) ! local chunks
3020 :
3021 : !---------------------------Local workspace-----------------------------
3022 :
3023 : integer :: ioerr ! error return
3024 :
3025 0 : real(r8), allocatable :: globalfield(:,:,:,:,:)
3026 : ! global field
3027 : !-----------------------------------------------------------------------
3028 :
3029 0 : allocate(globalfield(fdim,hdim1_d,mdim,hdim2_d,ldim))
3030 :
3031 0 : if (masterproc) then
3032 0 : read (iu,iostat=ioerr) globalfield
3033 0 : if (ioerr /= 0 ) then
3034 0 : write(iulog,*) 'READ_CHUNK_FROM_FIELD ioerror ', ioerr,' on i/o unit = ',iu
3035 0 : call endrun
3036 : end if
3037 : endif
3038 :
3039 0 : call scatter_field_to_chunk (fdim,mdim,ldim,hdim1_d,globalfield,localchunks)
3040 :
3041 0 : deallocate(globalfield)
3042 :
3043 0 : return
3044 : end subroutine read_chunk_from_field
3045 : !
3046 : !========================================================================
3047 :
3048 16128 : subroutine transpose_block_to_chunk(record_size, block_buffer, &
3049 16128 : chunk_buffer, window)
3050 :
3051 : !-----------------------------------------------------------------------
3052 : !
3053 : ! Purpose: Transpose buffer containing decomposed
3054 : ! fields to buffer
3055 : ! containing decomposed chunk data structures
3056 : !
3057 : ! Method:
3058 : !
3059 : ! Author: Patrick Worley
3060 : ! Modified: Art Mirin, Jan 04, to add support for mod_comm
3061 : !
3062 : !-----------------------------------------------------------------------
3063 : #if ( defined SPMD )
3064 : # if defined(MODCM_DP_TRANSPOSE)
3065 : use mod_comm, only: blockdescriptor, mp_sendirr, mp_recvirr, &
3066 : get_partneroffset, max_nparcels
3067 : use mpishorthand, only : mpicom
3068 : # endif
3069 : use spmd_utils, only: altalltoallv
3070 : #endif
3071 : !------------------------------Parameters-------------------------------
3072 : !
3073 : integer, parameter :: msgtag = 6000
3074 : !------------------------------Arguments--------------------------------
3075 : integer, intent(in) :: record_size ! per column amount of data
3076 : real(r8), intent(in) :: block_buffer(record_size*block_buf_nrecs)
3077 : ! buffer of block data to be
3078 : ! transposed
3079 : real(r8), intent(out):: chunk_buffer(record_size*chunk_buf_nrecs)
3080 : ! buffer of chunk data
3081 : ! transposed into
3082 : integer, intent(in), optional :: window
3083 : ! MPI-2 window id for
3084 : ! chunk_buffer
3085 :
3086 : !---------------------------Local workspace-----------------------------
3087 : #if ( defined SPMD )
3088 : integer :: p ! loop indices
3089 : integer :: bbuf_siz ! size of block_buffer
3090 : integer :: cbuf_siz ! size of chunk_buffer
3091 : integer :: lwindow ! placeholder for missing window
3092 : integer :: lopt ! local copy of phys_alltoall
3093 : !
3094 : logical, save :: first = .true.
3095 : integer, allocatable, save :: sndcnts(:), sdispls(:)
3096 : integer, allocatable, save :: rcvcnts(:), rdispls(:)
3097 : integer, allocatable, save :: pdispls(:)
3098 : integer, save :: prev_record_size = 0
3099 : # if defined(MODCM_DP_TRANSPOSE)
3100 : type (blockdescriptor), allocatable, save :: sendbl(:), recvbl(:)
3101 : integer ione, ierror, mod_method
3102 : # endif
3103 : !-----------------------------------------------------------------------
3104 16128 : if (first) then
3105 : ! Compute send/recv/put counts and displacements
3106 4608 : allocate(sndcnts(0:npes-1))
3107 3072 : allocate(sdispls(0:npes-1))
3108 3072 : allocate(rcvcnts(0:npes-1))
3109 3072 : allocate(rdispls(0:npes-1))
3110 3072 : allocate(pdispls(0:npes-1))
3111 : !
3112 : # if defined(MODCM_DP_TRANSPOSE)
3113 : ! This branch uses mod_comm. Admissable values of phys_alltoall are
3114 : ! 11,12 and 13. Each value corresponds to a different option
3115 : ! within mod_comm of implementing the communication. That option is expressed
3116 : ! internally to mod_comm using the variable mod_method defined below;
3117 : ! mod_method will have values 0,1 or 2 and is defined as
3118 : ! phys_alltoall - modmin_alltoall, where modmin_alltoall equals 11.
3119 : ! Also, sendbl and recvbl must have exactly npes elements, to match
3120 : ! this size of the communicator, or the transpose will fail.
3121 : !
3122 : if (phys_alltoall >= modmin_alltoall) then
3123 : mod_method = phys_alltoall - modmin_alltoall
3124 : ione = 1
3125 : allocate( sendbl(0:npes-1) )
3126 : allocate( recvbl(0:npes-1) )
3127 :
3128 : do p = 0,npes-1
3129 :
3130 : sendbl(p)%method = mod_method
3131 : recvbl(p)%method = mod_method
3132 :
3133 : allocate( sendbl(p)%blocksizes(1) )
3134 : allocate( sendbl(p)%displacements(1) )
3135 : allocate( recvbl(p)%blocksizes(1) )
3136 : allocate( recvbl(p)%displacements(1) )
3137 :
3138 : enddo
3139 :
3140 : endif
3141 : # endif
3142 :
3143 1536 : first = .false.
3144 : endif
3145 : !
3146 16128 : if (record_size /= prev_record_size) then
3147 : !
3148 : ! Compute send/recv/put counts and displacements
3149 1536 : sdispls(0) = 0
3150 1536 : sndcnts(0) = record_size*btofc_blk_num(0)
3151 1179648 : do p=1,npes-1
3152 1178112 : sdispls(p) = sdispls(p-1) + sndcnts(p-1)
3153 1179648 : sndcnts(p) = record_size*btofc_blk_num(p)
3154 : enddo
3155 : !
3156 1536 : rdispls(0) = 0
3157 1536 : rcvcnts(0) = record_size*btofc_chk_num(0)
3158 1179648 : do p=1,npes-1
3159 1178112 : rdispls(p) = rdispls(p-1) + rcvcnts(p-1)
3160 1179648 : rcvcnts(p) = record_size*btofc_chk_num(p)
3161 : enddo
3162 : !
3163 1536 : call mpialltoallint(rdispls, 1, pdispls, 1, mpicom)
3164 : !
3165 : # if defined(MODCM_DP_TRANSPOSE)
3166 : if (phys_alltoall >= modmin_alltoall) then
3167 : do p = 0,npes-1
3168 :
3169 : sendbl(p)%type = MPI_DATATYPE_NULL
3170 : if ( sndcnts(p) /= 0 ) then
3171 :
3172 : if (phys_alltoall > modmin_alltoall) then
3173 : call MPI_TYPE_INDEXED(ione, sndcnts(p), &
3174 : sdispls(p), mpir8, &
3175 : sendbl(p)%type, ierror)
3176 : call MPI_TYPE_COMMIT(sendbl(p)%type, ierror)
3177 : endif
3178 :
3179 : sendbl(p)%blocksizes(1) = sndcnts(p)
3180 : sendbl(p)%displacements(1) = sdispls(p)
3181 : sendbl(p)%partneroffset = 0
3182 :
3183 : else
3184 :
3185 : sendbl(p)%blocksizes(1) = 0
3186 : sendbl(p)%displacements(1) = 0
3187 : sendbl(p)%partneroffset = 0
3188 :
3189 : endif
3190 : sendbl(p)%nparcels = size(sendbl(p)%displacements)
3191 : sendbl(p)%tot_size = sum(sendbl(p)%blocksizes)
3192 : max_nparcels = max(max_nparcels, sendbl(p)%nparcels)
3193 :
3194 : recvbl(p)%type = MPI_DATATYPE_NULL
3195 : if ( rcvcnts(p) /= 0) then
3196 :
3197 : if (phys_alltoall > modmin_alltoall) then
3198 : call MPI_TYPE_INDEXED(ione, rcvcnts(p), &
3199 : rdispls(p), mpir8, &
3200 : recvbl(p)%type, ierror)
3201 : call MPI_TYPE_COMMIT(recvbl(p)%type, ierror)
3202 : endif
3203 :
3204 : recvbl(p)%blocksizes(1) = rcvcnts(p)
3205 : recvbl(p)%displacements(1) = rdispls(p)
3206 : recvbl(p)%partneroffset = 0 ! not properly initialized - do not use Mpi2
3207 : else
3208 :
3209 : recvbl(p)%blocksizes(1) = 0
3210 : recvbl(p)%displacements(1) = 0
3211 : recvbl(p)%partneroffset = 0
3212 :
3213 : endif
3214 : recvbl(p)%nparcels = size(recvbl(p)%displacements)
3215 : recvbl(p)%tot_size = sum(recvbl(p)%blocksizes)
3216 : max_nparcels = max(max_nparcels, recvbl(p)%nparcels)
3217 :
3218 : enddo
3219 :
3220 : call get_partneroffset(mpicom, sendbl, recvbl)
3221 :
3222 : endif
3223 : # endif
3224 : !
3225 1536 : prev_record_size = record_size
3226 : endif
3227 : !
3228 16128 : call t_barrierf('sync_tran_btoc', mpicom)
3229 16128 : if (phys_alltoall < 0) then
3230 16128 : if ((max_nproc_smpx > npes/2) .and. (nproc_busy_d > npes/2)) then
3231 16128 : lopt = 0
3232 : else
3233 0 : lopt = 1
3234 : endif
3235 : else
3236 0 : lopt = phys_alltoall
3237 0 : if ((lopt == 2) .and. ( .not. present(window) )) lopt = 1
3238 : endif
3239 16128 : if (lopt < 4) then
3240 : !
3241 16128 : bbuf_siz = record_size*block_buf_nrecs
3242 16128 : cbuf_siz = record_size*chunk_buf_nrecs
3243 16128 : if ( present(window) ) then
3244 : call altalltoallv(lopt, iam, npes, &
3245 : dp_coup_steps, dp_coup_proc, &
3246 : block_buffer, bbuf_siz, sndcnts, sdispls, mpir8, &
3247 : chunk_buffer, cbuf_siz, rcvcnts, rdispls, mpir8, &
3248 0 : msgtag, pdispls, mpir8, window, mpicom)
3249 : else
3250 : call altalltoallv(lopt, iam, npes, &
3251 : dp_coup_steps, dp_coup_proc, &
3252 : block_buffer, bbuf_siz, sndcnts, sdispls, mpir8, &
3253 : chunk_buffer, cbuf_siz, rcvcnts, rdispls, mpir8, &
3254 16128 : msgtag, pdispls, mpir8, lwindow, mpicom)
3255 : endif
3256 : !
3257 : else
3258 : !
3259 : # if defined(MODCM_DP_TRANSPOSE)
3260 : call mp_sendirr(mpicom, sendbl, recvbl, block_buffer, chunk_buffer)
3261 : call mp_recvirr(mpicom, sendbl, recvbl, block_buffer, chunk_buffer)
3262 : # else
3263 : call mpialltoallv(block_buffer, sndcnts, sdispls, mpir8, &
3264 : chunk_buffer, rcvcnts, rdispls, mpir8, &
3265 0 : mpicom)
3266 : # endif
3267 : !
3268 : endif
3269 : !
3270 : #endif
3271 16128 : return
3272 : end subroutine transpose_block_to_chunk
3273 : !
3274 : !========================================================================
3275 :
3276 16128 : subroutine block_to_chunk_send_pters(blockid, fdim, ldim, &
3277 16128 : record_size, pter)
3278 : !-----------------------------------------------------------------------
3279 : !
3280 : ! Purpose: Return pointers into send buffer where column from decomposed
3281 : ! fields should be copied to
3282 : !
3283 : ! Method:
3284 : !
3285 : ! Author: Patrick Worley
3286 : !
3287 : !-----------------------------------------------------------------------
3288 : !------------------------------Arguments--------------------------------
3289 : integer, intent(in) :: blockid ! block index
3290 : integer, intent(in) :: fdim ! first dimension of pter array
3291 : integer, intent(in) :: ldim ! last dimension of pter array
3292 : integer, intent(in) :: record_size ! per coordinate amount of data
3293 :
3294 : integer, intent(out) :: pter(fdim,ldim) ! buffer offsets
3295 : !---------------------------Local workspace-----------------------------
3296 : integer :: i, k ! loop indices
3297 : !-----------------------------------------------------------------------
3298 16128 : if ((btofc_blk_offset(blockid)%ncols > fdim) .or. &
3299 : (btofc_blk_offset(blockid)%nlvls > ldim)) then
3300 0 : write(iulog,*) "BLOCK_TO_CHUNK_SEND_PTERS: pter array dimensions ", &
3301 0 : "not large enough: (",fdim,",",ldim,") not >= (", &
3302 0 : btofc_blk_offset(blockid)%ncols,",", &
3303 0 : btofc_blk_offset(blockid)%nlvls,")"
3304 0 : call endrun()
3305 : endif
3306 : !
3307 548352 : do k=1,btofc_blk_offset(blockid)%nlvls
3308 38852352 : do i=1,btofc_blk_offset(blockid)%ncols
3309 76640256 : pter(i,k) = 1 + record_size* &
3310 115492608 : (btofc_blk_offset(blockid)%pter(i,k))
3311 : enddo
3312 548352 : do i=btofc_blk_offset(blockid)%ncols+1,fdim
3313 532224 : pter(i,k) = -1
3314 : enddo
3315 : enddo
3316 : !
3317 16128 : do k=btofc_blk_offset(blockid)%nlvls+1,ldim
3318 16128 : do i=1,fdim
3319 0 : pter(i,k) = -1
3320 : enddo
3321 : enddo
3322 : !
3323 16128 : return
3324 : end subroutine block_to_chunk_send_pters
3325 : !
3326 : !========================================================================
3327 :
3328 80640 : subroutine block_to_chunk_recv_pters(lcid, fdim, ldim, &
3329 80640 : record_size, pter)
3330 : !-----------------------------------------------------------------------
3331 : !
3332 : ! Purpose: Return pointers into receive buffer where data for
3333 : ! decomposed chunk data structures should be copied from
3334 : !
3335 : ! Method:
3336 : !
3337 : ! Author: Patrick Worley
3338 : !
3339 : !-----------------------------------------------------------------------
3340 : !------------------------------Arguments--------------------------------
3341 : integer, intent(in) :: lcid ! local chunk id
3342 : integer, intent(in) :: fdim ! first dimension of pter array
3343 : integer, intent(in) :: ldim ! last dimension of pter array
3344 : integer, intent(in) :: record_size ! per coordinate amount of data
3345 :
3346 : integer, intent(out) :: pter(fdim,ldim) ! buffer offset
3347 : !---------------------------Local workspace-----------------------------
3348 : integer :: i, k ! loop indices
3349 : !-----------------------------------------------------------------------
3350 80640 : if ((btofc_chk_offset(lcid)%ncols > fdim) .or. &
3351 : (btofc_chk_offset(lcid)%nlvls > ldim)) then
3352 0 : write(iulog,*) "BLOCK_TO_CHUNK_RECV_PTERS: pter array dimensions ", &
3353 0 : "not large enough: (",fdim,",",ldim,") not >= (", &
3354 0 : btofc_chk_offset(lcid)%ncols,",", &
3355 0 : btofc_chk_offset(lcid)%nlvls,")"
3356 0 : call endrun()
3357 : endif
3358 : !
3359 2741760 : do k=1,btofc_chk_offset(lcid)%nlvls
3360 40981248 : do i=1,btofc_chk_offset(lcid)%ncols
3361 76640256 : pter(i,k) = 1 + record_size* &
3362 117621504 : (btofc_chk_offset(lcid)%pter(i,k))
3363 : enddo
3364 6999552 : do i=btofc_chk_offset(lcid)%ncols+1,fdim
3365 6918912 : pter(i,k) = -1
3366 : enddo
3367 : enddo
3368 : !
3369 80640 : do k=btofc_chk_offset(lcid)%nlvls+1,ldim
3370 80640 : do i=1,fdim
3371 0 : pter(i,k) = -1
3372 : enddo
3373 : enddo
3374 : !
3375 80640 : return
3376 : end subroutine block_to_chunk_recv_pters
3377 : !
3378 : !========================================================================
3379 :
3380 14592 : subroutine transpose_chunk_to_block(record_size, chunk_buffer, &
3381 14592 : block_buffer, window)
3382 : !-----------------------------------------------------------------------
3383 : !
3384 : ! Purpose: Transpose buffer containing decomposed
3385 : ! chunk data structures to buffer
3386 : ! containing decomposed fields
3387 : !
3388 : ! Method:
3389 : !
3390 : ! Author: Patrick Worley
3391 : !
3392 : !-----------------------------------------------------------------------
3393 : #if ( defined SPMD )
3394 : # if defined(MODCM_DP_TRANSPOSE)
3395 : use mod_comm, only: blockdescriptor, mp_sendirr, mp_recvirr, &
3396 : get_partneroffset, max_nparcels
3397 : use mpishorthand, only : mpicom
3398 : # endif
3399 : use spmd_utils, only: altalltoallv
3400 : #endif
3401 : !------------------------------Parameters-------------------------------
3402 : !
3403 : integer, parameter :: msgtag = 7000
3404 : !------------------------------Arguments--------------------------------
3405 : integer, intent(in) :: record_size ! per column amount of data
3406 : real(r8), intent(in):: chunk_buffer(record_size*chunk_buf_nrecs)
3407 : ! buffer of chunk data to be
3408 : ! transposed
3409 : real(r8), intent(out) :: block_buffer(record_size*block_buf_nrecs)
3410 : ! buffer of block data to
3411 : ! transpose into
3412 : integer, intent(in), optional :: window
3413 : ! MPI-2 window id for
3414 : ! chunk_buffer
3415 :
3416 : !---------------------------Local workspace-----------------------------
3417 : #if ( defined SPMD )
3418 : integer :: p ! loop indices
3419 : integer :: bbuf_siz ! size of block_buffer
3420 : integer :: cbuf_siz ! size of chunk_buffer
3421 : integer :: lwindow ! placeholder for missing window
3422 : integer :: lopt ! local copy of phys_alltoall
3423 : !
3424 : logical, save :: first = .true.
3425 : integer, allocatable, save :: sndcnts(:), sdispls(:)
3426 : integer, allocatable, save :: rcvcnts(:), rdispls(:)
3427 : integer, allocatable, save :: pdispls(:)
3428 : integer, save :: prev_record_size = 0
3429 : # if defined(MODCM_DP_TRANSPOSE)
3430 : type (blockdescriptor), allocatable, save :: sendbl(:), recvbl(:)
3431 : integer ione, ierror, mod_method
3432 : # endif
3433 : !-----------------------------------------------------------------------
3434 14592 : if (first) then
3435 : ! Compute send/recv/put counts and displacements
3436 4608 : allocate(sndcnts(0:npes-1))
3437 3072 : allocate(sdispls(0:npes-1))
3438 3072 : allocate(rcvcnts(0:npes-1))
3439 3072 : allocate(rdispls(0:npes-1))
3440 3072 : allocate(pdispls(0:npes-1))
3441 : !
3442 : # if defined(MODCM_DP_TRANSPOSE)
3443 : ! This branch uses mod_comm. Admissable values of phys_alltoall are
3444 : ! 11,12 and 13. Each value corresponds to a differerent option
3445 : ! within mod_comm of implementing the communication. That option is expressed
3446 : ! internally to mod_comm using the variable mod_method defined below;
3447 : ! mod_method will have values 0,1 or 2 and is defined as
3448 : ! phys_alltoall - modmin_alltoall, where modmin_alltoall equals 11.
3449 : ! Also, sendbl and recvbl must have exactly npes elements, to match
3450 : ! this size of the communicator, or the transpose will fail.
3451 : !
3452 : if (phys_alltoall >= modmin_alltoall) then
3453 : mod_method = phys_alltoall - modmin_alltoall
3454 : ione = 1
3455 : allocate( sendbl(0:npes-1) )
3456 : allocate( recvbl(0:npes-1) )
3457 :
3458 : do p = 0,npes-1
3459 :
3460 : sendbl(p)%method = mod_method
3461 : recvbl(p)%method = mod_method
3462 :
3463 : allocate( sendbl(p)%blocksizes(1) )
3464 : allocate( sendbl(p)%displacements(1) )
3465 : allocate( recvbl(p)%blocksizes(1) )
3466 : allocate( recvbl(p)%displacements(1) )
3467 :
3468 : enddo
3469 :
3470 : endif
3471 : # endif
3472 : !
3473 1536 : first = .false.
3474 : endif
3475 : !
3476 14592 : if (record_size /= prev_record_size) then
3477 : !
3478 : ! Compute send/recv/put counts and displacements
3479 1536 : sdispls(0) = 0
3480 1536 : sndcnts(0) = record_size*btofc_chk_num(0)
3481 1179648 : do p=1,npes-1
3482 1178112 : sdispls(p) = sdispls(p-1) + sndcnts(p-1)
3483 1179648 : sndcnts(p) = record_size*btofc_chk_num(p)
3484 : enddo
3485 : !
3486 1536 : rdispls(0) = 0
3487 1536 : rcvcnts(0) = record_size*btofc_blk_num(0)
3488 1179648 : do p=1,npes-1
3489 1178112 : rdispls(p) = rdispls(p-1) + rcvcnts(p-1)
3490 1179648 : rcvcnts(p) = record_size*btofc_blk_num(p)
3491 : enddo
3492 : !
3493 1536 : call mpialltoallint(rdispls, 1, pdispls, 1, mpicom)
3494 : !
3495 : # if defined(MODCM_DP_TRANSPOSE)
3496 : if (phys_alltoall >= modmin_alltoall) then
3497 : do p = 0,npes-1
3498 :
3499 : sendbl(p)%type = MPI_DATATYPE_NULL
3500 : if ( sndcnts(p) /= 0 ) then
3501 :
3502 : if (phys_alltoall > modmin_alltoall) then
3503 : call MPI_TYPE_INDEXED(ione, sndcnts(p), &
3504 : sdispls(p), mpir8, &
3505 : sendbl(p)%type, ierror)
3506 : call MPI_TYPE_COMMIT(sendbl(p)%type, ierror)
3507 : endif
3508 :
3509 : sendbl(p)%blocksizes(1) = sndcnts(p)
3510 : sendbl(p)%displacements(1) = sdispls(p)
3511 : sendbl(p)%partneroffset = 0
3512 :
3513 : else
3514 :
3515 : sendbl(p)%blocksizes(1) = 0
3516 : sendbl(p)%displacements(1) = 0
3517 : sendbl(p)%partneroffset = 0
3518 :
3519 : endif
3520 : sendbl(p)%nparcels = size(sendbl(p)%displacements)
3521 : sendbl(p)%tot_size = sum(sendbl(p)%blocksizes)
3522 : max_nparcels = max(max_nparcels, sendbl(p)%nparcels)
3523 :
3524 : recvbl(p)%type = MPI_DATATYPE_NULL
3525 : if ( rcvcnts(p) /= 0) then
3526 :
3527 : if (phys_alltoall > modmin_alltoall) then
3528 : call MPI_TYPE_INDEXED(ione, rcvcnts(p), &
3529 : rdispls(p), mpir8, &
3530 : recvbl(p)%type, ierror)
3531 : call MPI_TYPE_COMMIT(recvbl(p)%type, ierror)
3532 : endif
3533 :
3534 : recvbl(p)%blocksizes(1) = rcvcnts(p)
3535 : recvbl(p)%displacements(1) = rdispls(p)
3536 : recvbl(p)%partneroffset = 0 ! not properly initialized - do not use Mpi2
3537 : else
3538 :
3539 : recvbl(p)%blocksizes(1) = 0
3540 : recvbl(p)%displacements(1) = 0
3541 : recvbl(p)%partneroffset = 0
3542 :
3543 : endif
3544 : recvbl(p)%nparcels = size(recvbl(p)%displacements)
3545 : recvbl(p)%tot_size = sum(recvbl(p)%blocksizes)
3546 : max_nparcels = max(max_nparcels, recvbl(p)%nparcels)
3547 :
3548 : enddo
3549 :
3550 : call get_partneroffset(mpicom, sendbl, recvbl)
3551 :
3552 : endif
3553 : # endif
3554 : !
3555 1536 : prev_record_size = record_size
3556 : endif
3557 : !
3558 14592 : call t_barrierf('sync_tran_ctob', mpicom)
3559 14592 : if (phys_alltoall < 0) then
3560 14592 : if ((max_nproc_smpx > npes/2) .and. (nproc_busy_d > npes/2)) then
3561 14592 : lopt = 0
3562 : else
3563 0 : lopt = 1
3564 : endif
3565 : else
3566 0 : lopt = phys_alltoall
3567 0 : if ((lopt == 2) .and. ( .not. present(window) )) lopt = 1
3568 : endif
3569 14592 : if (lopt < 4) then
3570 : !
3571 14592 : bbuf_siz = record_size*block_buf_nrecs
3572 14592 : cbuf_siz = record_size*chunk_buf_nrecs
3573 14592 : if ( present(window) ) then
3574 : call altalltoallv(lopt, iam, npes, &
3575 : dp_coup_steps, dp_coup_proc, &
3576 : chunk_buffer, cbuf_siz, sndcnts, sdispls, mpir8, &
3577 : block_buffer, bbuf_siz, rcvcnts, rdispls, mpir8, &
3578 0 : msgtag, pdispls, mpir8, window, mpicom)
3579 : else
3580 : call altalltoallv(lopt, iam, npes, &
3581 : dp_coup_steps, dp_coup_proc, &
3582 : chunk_buffer, cbuf_siz, sndcnts, sdispls, mpir8, &
3583 : block_buffer, bbuf_siz, rcvcnts, rdispls, mpir8, &
3584 14592 : msgtag, pdispls, mpir8, lwindow, mpicom)
3585 : endif
3586 : !
3587 : else
3588 : # if defined(MODCM_DP_TRANSPOSE)
3589 : call mp_sendirr(mpicom, sendbl, recvbl, block_buffer, chunk_buffer)
3590 : call mp_recvirr(mpicom, sendbl, recvbl, block_buffer, chunk_buffer)
3591 : # else
3592 : call mpialltoallv(chunk_buffer, sndcnts, sdispls, mpir8, &
3593 : block_buffer, rcvcnts, rdispls, mpir8, &
3594 0 : mpicom)
3595 : # endif
3596 : !
3597 : endif
3598 : !
3599 : #endif
3600 :
3601 14592 : return
3602 : end subroutine transpose_chunk_to_block
3603 : !
3604 : !========================================================================
3605 :
3606 72960 : subroutine chunk_to_block_send_pters(lcid, fdim, ldim, &
3607 72960 : record_size, pter)
3608 : !-----------------------------------------------------------------------
3609 : !
3610 : ! Purpose: Return pointers into send buffer where data for
3611 : ! decomposed chunk data structures should be copied to
3612 : !
3613 : ! Method:
3614 : !
3615 : ! Author: Patrick Worley
3616 : !
3617 : !-----------------------------------------------------------------------
3618 : !------------------------------Arguments--------------------------------
3619 : integer, intent(in) :: lcid ! local chunk id
3620 : integer, intent(in) :: fdim ! first dimension of pter array
3621 : integer, intent(in) :: ldim ! last dimension of pter array
3622 : integer, intent(in) :: record_size ! per coordinate amount of data
3623 :
3624 : integer, intent(out) :: pter(fdim,ldim) ! buffer offset
3625 : !---------------------------Local workspace-----------------------------
3626 : integer :: i, k ! loop indices
3627 : !-----------------------------------------------------------------------
3628 72960 : if ((btofc_chk_offset(lcid)%ncols > fdim) .or. &
3629 : (btofc_chk_offset(lcid)%nlvls > ldim)) then
3630 0 : write(iulog,*) "CHUNK_TO_BLOCK_SEND_PTERS: pter array dimensions ", &
3631 0 : "not large enough: (",fdim,",",ldim,") not >= (", &
3632 0 : btofc_chk_offset(lcid)%ncols,",", &
3633 0 : btofc_chk_offset(lcid)%nlvls,")"
3634 0 : call endrun()
3635 : endif
3636 : !
3637 2480640 : do k=1,btofc_chk_offset(lcid)%nlvls
3638 37078272 : do i=1,btofc_chk_offset(lcid)%ncols
3639 69341184 : pter(i,k) = 1 + record_size* &
3640 106419456 : (btofc_chk_offset(lcid)%pter(i,k))
3641 : enddo
3642 6332928 : do i=btofc_chk_offset(lcid)%ncols+1,fdim
3643 6259968 : pter(i,k) = -1
3644 : enddo
3645 : enddo
3646 : !
3647 72960 : do k=btofc_chk_offset(lcid)%nlvls+1,ldim
3648 72960 : do i=1,fdim
3649 0 : pter(i,k) = -1
3650 : enddo
3651 : enddo
3652 : !
3653 72960 : return
3654 : end subroutine chunk_to_block_send_pters
3655 : !
3656 : !========================================================================
3657 :
3658 14592 : subroutine chunk_to_block_recv_pters(blockid, fdim, ldim, &
3659 14592 : record_size, pter)
3660 : !-----------------------------------------------------------------------
3661 : !
3662 : ! Purpose: Return pointers into receive buffer where column from decomposed
3663 : ! fields should be copied from
3664 : !
3665 : ! Method:
3666 : !
3667 : ! Author: Patrick Worley
3668 : !
3669 : !-----------------------------------------------------------------------
3670 : !------------------------------Arguments--------------------------------
3671 : integer, intent(in) :: blockid ! block index
3672 : integer, intent(in) :: fdim ! first dimension of pter array
3673 : integer, intent(in) :: ldim ! last dimension of pter array
3674 : integer, intent(in) :: record_size ! per coordinate amount of data
3675 :
3676 : integer, intent(out) :: pter(fdim,ldim) ! buffer offsets
3677 : !---------------------------Local workspace-----------------------------
3678 : integer :: i, k ! loop indices
3679 : !-----------------------------------------------------------------------
3680 14592 : if ((btofc_blk_offset(blockid)%ncols > fdim) .or. &
3681 : (btofc_blk_offset(blockid)%nlvls > ldim)) then
3682 0 : write(iulog,*) "CHUNK_TO_BLOCK_RECV_PTERS: pter array dimensions ", &
3683 0 : "not large enough: (",fdim,",",ldim,") not >= (", &
3684 0 : btofc_blk_offset(blockid)%ncols,",", &
3685 0 : btofc_blk_offset(blockid)%nlvls,")"
3686 0 : call endrun()
3687 : endif
3688 : !
3689 496128 : do k=1,btofc_blk_offset(blockid)%nlvls
3690 35152128 : do i=1,btofc_blk_offset(blockid)%ncols
3691 69341184 : pter(i,k) = 1 + record_size* &
3692 104493312 : (btofc_blk_offset(blockid)%pter(i,k))
3693 : enddo
3694 496128 : do i=btofc_blk_offset(blockid)%ncols+1,fdim
3695 481536 : pter(i,k) = -1
3696 : enddo
3697 : enddo
3698 : !
3699 14592 : do k=btofc_blk_offset(blockid)%nlvls+1,ldim
3700 14592 : do i=1,fdim
3701 0 : pter(i,k) = -1
3702 : enddo
3703 : enddo
3704 : !
3705 14592 : return
3706 : end subroutine chunk_to_block_recv_pters
3707 : !
3708 : !========================================================================
3709 :
3710 1536 : subroutine create_chunks(opt, chunks_per_thread)
3711 : !-----------------------------------------------------------------------
3712 : !
3713 : ! Purpose: Decompose physics computational grid into chunks, for
3714 : ! improved serial efficiency and parallel load balance.
3715 : !
3716 : ! Method:
3717 : !
3718 : ! Author: Patrick Worley
3719 : !
3720 : !-----------------------------------------------------------------------
3721 : use pmgrid, only: plev
3722 : use dyn_grid, only: get_block_bounds_d, get_block_gcol_cnt_d, &
3723 : get_gcol_block_cnt_d, get_gcol_block_d, &
3724 : get_block_owner_d, get_block_gcol_d
3725 : !------------------------------Arguments--------------------------------
3726 : integer, intent(in) :: opt ! chunking option
3727 : ! 0: chunks may cross block boundaries, but retain same
3728 : ! process mapping as blocks. If possible, columns assigned
3729 : ! as day/night pairs. Columns (or pairs) are wrap-mapped.
3730 : ! May not work with vertically decomposed blocks. (default)
3731 : ! 1: chunks may cross block boundaries, but retain same
3732 : ! SMP-node mapping as blocks. If possible, columns assigned
3733 : ! as day/night pairs. Columns (or pairs) are wrap-mapped.
3734 : ! May not work with vertically decomposed blocks.
3735 : ! 2: 2-column day/night and season column pairs wrap-mapped
3736 : ! to chunks to also balance assignment of polar, mid-latitude,
3737 : ! and equatorial columns across chunks.
3738 : ! 3: same as 1 except that SMP defined to be pairs of consecutive
3739 : ! processes
3740 : ! 4: chunks may cross block boundaries, but retain same
3741 : ! process mapping as blocks. Columns assigned to chunks
3742 : ! in block ordering.
3743 : ! May not work with vertically decomposed blocks.
3744 : ! 5: Chunks do not cross latitude boundaries, and are block-mapped.
3745 : integer, intent(in) :: chunks_per_thread
3746 : ! target number of chunks per
3747 : ! thread
3748 : !---------------------------Local workspace-----------------------------
3749 : integer :: i, j, p ! loop indices
3750 : integer :: nlthreads ! number of local OpenMP threads
3751 3072 : integer :: npthreads(0:npes-1) ! number of OpenMP threads per process
3752 3072 : integer :: proc_smp_mapx(0:npes-1) ! process/virtual SMP node map
3753 : integer :: firstblock, lastblock ! global block index bounds
3754 : integer :: maxblksiz ! maximum number of columns in a dynamics block
3755 : integer :: block_cnt ! number of blocks containing data
3756 : ! for a given vertical column
3757 : integer :: blockids(plev+1) ! block indices
3758 : integer :: bcids(plev+1) ! block column indices
3759 : integer :: nsmpx, nsmpy ! virtual SMP node counts and indices
3760 : integer :: curgcol, twingcol ! global physics and dynamics column indices
3761 : integer :: smp ! SMP node index
3762 : integer :: cid ! chunk id
3763 : integer :: jb, ib ! global block and columns indices
3764 : integer :: blksiz ! current block size
3765 : integer :: ntmp1, ntmp2, nlchunks ! work variables
3766 : integer :: max_ncols ! upper bound on number of columns in a block
3767 : integer :: ncols ! number of columns in current chunk
3768 : logical :: error ! error flag
3769 :
3770 : ! indices for dynamics columns in given block
3771 1536 : integer, dimension(:), allocatable :: cols
3772 :
3773 : ! number of MPI processes per virtual SMP node (0:nsmpx-1)
3774 1536 : integer, dimension(:), allocatable :: nsmpprocs
3775 :
3776 : ! flag indicating whether a process is busy or idle during the dynamics (0:npes-1)
3777 1536 : logical, dimension(:), allocatable :: proc_busy_d
3778 :
3779 : ! flag indicating whether any of the processes assigned to an SMP node are busy
3780 : ! during the dynamics, or whether all of them are idle (0:nsmps-1)
3781 1536 : logical, dimension(:), allocatable :: smp_busy_d
3782 :
3783 : ! actual SMP node/virtual SMP node map (0:nsmps-1)
3784 1536 : integer, dimension(:), allocatable :: smp_smp_mapx
3785 :
3786 : ! column/virtual SMP node map (ngcols)
3787 1536 : integer, dimension(:), allocatable :: col_smp_mapx
3788 :
3789 : ! number of columns assigned to a given virtual SMP node (0:nsmpx-1)
3790 1536 : integer, dimension(:), allocatable :: nsmpcolumns
3791 :
3792 : ! number of OpenMP threads per virtual SMP node (0:nsmpx-1)
3793 1536 : integer, dimension(:), allocatable :: nsmpthreads
3794 :
3795 : ! number of chunks assigned to a given virtual SMP node (0:nsmpx-1)
3796 1536 : integer, dimension(:), allocatable :: nsmpchunks
3797 :
3798 : ! maximum number of columns assigned to a chunk in a given virtual SMP node (0:nsmpx-1)
3799 1536 : integer, dimension(:), allocatable :: maxcol_chk
3800 :
3801 : ! number of chunks in given virtual SMP node receiving maximum number of columns
3802 : ! (0:nsmpx-1)
3803 1536 : integer, dimension(:), allocatable :: maxcol_chks
3804 :
3805 : ! chunk id virtual offset (0:nsmpx-1)
3806 1536 : integer, dimension(:), allocatable :: cid_offset
3807 :
3808 : ! process-local chunk id (0:nsmpx-1)
3809 1536 : integer, dimension(:), allocatable :: local_cid
3810 :
3811 : #if ( defined _OPENMP )
3812 : integer omp_get_max_threads
3813 : external omp_get_max_threads
3814 : #endif
3815 :
3816 : !-----------------------------------------------------------------------
3817 : !
3818 : ! Determine number of threads per process
3819 : !
3820 1536 : nlthreads = 1
3821 : #if ( defined _OPENMP )
3822 : nlthreads = OMP_GET_MAX_THREADS()
3823 : #endif
3824 : !
3825 : #if ( defined SPMD )
3826 1536 : call mpiallgatherint(nlthreads, 1, npthreads, 1, mpicom)
3827 : #else
3828 : npthreads(0) = nlthreads
3829 : proc_smp_map(0) = 0
3830 : #endif
3831 :
3832 : !
3833 : ! Determine index range for dynamics blocks
3834 : !
3835 1536 : call get_block_bounds_d(firstblock,lastblock)
3836 :
3837 : !
3838 : ! Determine maximum number of columns in a block
3839 : !
3840 1536 : maxblksiz = 0
3841 1181184 : do jb=firstblock,lastblock
3842 1181184 : maxblksiz = max(maxblksiz,get_block_gcol_cnt_d(jb))
3843 : enddo
3844 :
3845 : !
3846 : ! determine which (and how many) processes are assigned
3847 : ! dynamics blocks
3848 : !
3849 4608 : allocate( proc_busy_d(0:npes-1) )
3850 1181184 : proc_busy_d = .false.
3851 1536 : nproc_busy_d = 0
3852 1181184 : do jb=firstblock,lastblock
3853 1179648 : p = get_block_owner_d(jb)
3854 1181184 : if (.not. proc_busy_d(p) ) then
3855 1179648 : proc_busy_d(p) = .true.
3856 1179648 : nproc_busy_d = nproc_busy_d + 1
3857 : endif
3858 : enddo
3859 :
3860 : !
3861 : ! Determine virtual SMP count and processes/virtual SMP map.
3862 : ! If option 0 or >3, pretend that each SMP has only one process.
3863 : ! If option 1, use SMP information.
3864 : ! If option 2, pretend that all processes are in one SMP node.
3865 : ! If option 3, pretend that each SMP node is made up of two
3866 : ! processes, chosen to maximize load-balancing opportunities.
3867 : !
3868 : ! For all options < 5, if there are "idle" dynamics processes,
3869 : ! assign them to the virtual SMP nodes in wrap fashion.
3870 : ! Communication between the active and idle dynamics
3871 : ! processes is scatter/gather (no communications between
3872 : ! idle dynamics processes) so there is no advantage to
3873 : ! blocking the idle processes in these assignments.
3874 : !
3875 1536 : if ((opt <= 0) .or. (opt == 4)) then
3876 :
3877 : ! assign active dynamics processes to virtual SMP nodes
3878 0 : nsmpx = 0
3879 0 : do p=0,npes-1
3880 0 : if (proc_busy_d(p)) then
3881 0 : proc_smp_mapx(p) = nsmpx
3882 0 : nsmpx = nsmpx + 1
3883 : endif
3884 : enddo
3885 : !
3886 : ! assign idle dynamics processes to virtual SMP nodes (wrap map)
3887 0 : nsmpy = 0
3888 0 : do p=0,npes-1
3889 0 : if (.not. proc_busy_d(p)) then
3890 0 : proc_smp_mapx(p) = nsmpy
3891 0 : nsmpy = mod(nsmpy+1,nsmpx)
3892 : endif
3893 : enddo
3894 :
3895 1536 : elseif (opt == 1) then
3896 :
3897 0 : allocate( smp_busy_d(0:nsmps-1) )
3898 0 : allocate( smp_smp_mapx(0:nsmps-1) )
3899 :
3900 : !
3901 : ! determine SMP nodes assigned dynamics blocks
3902 0 : smp_busy_d = .false.
3903 0 : do p=0,npes-1
3904 0 : if ( proc_busy_d(p) ) then
3905 0 : smp = proc_smp_map(p)
3906 0 : smp_busy_d(smp) = .true.
3907 : endif
3908 : enddo
3909 :
3910 : !
3911 : ! determine number of SMP nodes assigned dynamics blocks
3912 0 : nsmpx = 0
3913 0 : do smp=0,nsmps-1
3914 0 : if (smp_busy_d(smp)) then
3915 0 : smp_smp_mapx(smp) = nsmpx
3916 0 : nsmpx = nsmpx + 1
3917 : endif
3918 : enddo
3919 : !
3920 : ! assign processes in active dynamics SMP nodes to virtual SMP nodes
3921 0 : do p=0,npes-1
3922 0 : smp = proc_smp_map(p)
3923 0 : if (smp_busy_d(smp)) then
3924 0 : proc_smp_mapx(p) = smp_smp_mapx(smp)
3925 : endif
3926 : enddo
3927 : !
3928 : ! assign processes in idle dynamics SMP nodes to virtual SMP nodes (wrap map)
3929 0 : nsmpy = 0
3930 0 : do p=0,npes-1
3931 0 : smp = proc_smp_map(p)
3932 0 : if (.not. smp_busy_d(smp)) then
3933 0 : proc_smp_mapx(p) = nsmpy
3934 0 : nsmpy = mod(nsmpy+1,nsmpx)
3935 : endif
3936 : enddo
3937 : !
3938 0 : deallocate( smp_busy_d )
3939 0 : deallocate( smp_smp_mapx )
3940 :
3941 1536 : elseif (opt == 2) then
3942 :
3943 1536 : nsmpx = 1
3944 1181184 : do p=0,npes-1
3945 1181184 : proc_smp_mapx(p) = 0
3946 : enddo
3947 :
3948 0 : elseif (opt == 3) then
3949 :
3950 : ! find active process partners
3951 0 : proc_smp_mapx = -1
3952 0 : call find_partners(opt,proc_busy_d,nsmpx,proc_smp_mapx)
3953 : !
3954 : ! assign unassigned (idle dynamics) processes to virtual SMP nodes
3955 : ! (wrap map)
3956 0 : nsmpy = 0
3957 0 : do p=0,npes-1
3958 0 : if (proc_smp_mapx(p) == -1) then
3959 0 : proc_smp_mapx(p) = nsmpy
3960 0 : nsmpy = mod(nsmpy+1,nsmpx)
3961 : endif
3962 : enddo
3963 :
3964 : else
3965 :
3966 0 : nsmpx = npes
3967 0 : do p=0,npes-1
3968 0 : proc_smp_mapx(p) = p
3969 : enddo
3970 :
3971 : endif
3972 : !
3973 1536 : deallocate( proc_busy_d )
3974 :
3975 : !
3976 : ! Determine maximum number of processes assigned to a single
3977 : ! virtual SMP node
3978 : !
3979 4608 : allocate( nsmpprocs(0:nsmpx-1) )
3980 : !
3981 3072 : nsmpprocs(:) = 0
3982 1181184 : do p=0,npes-1
3983 1179648 : smp = proc_smp_mapx(p)
3984 1181184 : nsmpprocs(smp) = nsmpprocs(smp) + 1
3985 : enddo
3986 3072 : max_nproc_smpx = maxval(nsmpprocs)
3987 : !
3988 1536 : deallocate( nsmpprocs )
3989 :
3990 : !
3991 : ! Determine number of columns assigned to each
3992 : ! virtual SMP in block decomposition
3993 :
3994 4608 : allocate( col_smp_mapx(ngcols) )
3995 : !
3996 84936192 : col_smp_mapx(:) = -1
3997 1536 : error = .false.
3998 84936192 : do i=1,num_global_phys_cols
3999 84934656 : curgcol = latlon_to_dyn_gcol_map(i)
4000 84934656 : block_cnt = get_gcol_block_cnt_d(curgcol)
4001 84934656 : call get_gcol_block_d(curgcol,block_cnt,blockids,bcids)
4002 169870848 : do jb=1,block_cnt
4003 84934656 : p = get_block_owner_d(blockids(jb))
4004 169869312 : if (col_smp_mapx(i) == -1) then
4005 84934656 : col_smp_mapx(i) = proc_smp_mapx(p)
4006 0 : elseif (col_smp_mapx(i) /= proc_smp_mapx(p)) then
4007 0 : error = .true.
4008 : endif
4009 : enddo
4010 : end do
4011 1536 : if (error) then
4012 0 : write(iulog,*) "PHYS_GRID_INIT error: opt", opt, "specified, ", &
4013 0 : "but vertical decomposition not limited to virtual SMP"
4014 0 : call endrun()
4015 : endif
4016 : !
4017 4608 : allocate( nsmpcolumns(0:nsmpx-1) )
4018 3072 : nsmpcolumns(:) = 0
4019 84936192 : do i=1,num_global_phys_cols
4020 84934656 : curgcol = latlon_to_dyn_gcol_map(i)
4021 84934656 : smp = col_smp_mapx(curgcol)
4022 84936192 : nsmpcolumns(smp) = nsmpcolumns(smp) + 1
4023 : end do
4024 : !
4025 1536 : deallocate( col_smp_mapx )
4026 :
4027 : !
4028 : ! Allocate other work space
4029 : !
4030 4608 : allocate( nsmpthreads(0:nsmpx-1) )
4031 3072 : allocate( nsmpchunks (0:nsmpx-1) )
4032 3072 : allocate( maxcol_chk (0:nsmpx-1) )
4033 3072 : allocate( maxcol_chks(0:nsmpx-1) )
4034 3072 : allocate( cid_offset (0:nsmpx-1) )
4035 3072 : allocate( local_cid (0:nsmpx-1) )
4036 4608 : allocate( cols(1:maxblksiz) )
4037 : !
4038 : ! Options 0-3: split local dynamics blocks into chunks,
4039 : ! using wrap-map assignment of columns and
4040 : ! day/night and north/south column pairs
4041 : ! to chunks to improve load balance
4042 : ! Option 0: local is per process
4043 : ! Option 1: local is subset of`processes assigned to same SMP node
4044 : ! Option 2: local is global
4045 : ! Option 3: local is pair of processes chosen to maximize load-balance
4046 : ! wrt restriction that only communicate with one other
4047 : ! process.
4048 : ! Option 4: split local dynamics blocks into chunks,
4049 : ! using block-map assignment of columns
4050 : !
4051 1536 : if ((opt >= 0) .and. (opt <= 4)) then
4052 : !
4053 : ! Calculate number of threads available in each SMP node.
4054 : !
4055 3072 : nsmpthreads(:) = 0
4056 1181184 : do p=0,npes-1
4057 1179648 : smp = proc_smp_mapx(p)
4058 1181184 : nsmpthreads(smp) = nsmpthreads(smp) + npthreads(p)
4059 : enddo
4060 : !
4061 : ! Determine number of chunks to keep all threads busy
4062 : !
4063 1536 : nchunks = 0
4064 3072 : do smp=0,nsmpx-1
4065 1536 : nsmpchunks(smp) = nsmpcolumns(smp)/pcols
4066 1536 : if (mod(nsmpcolumns(smp), pcols) /= 0) then
4067 0 : nsmpchunks(smp) = nsmpchunks(smp) + 1
4068 : endif
4069 1536 : if (nsmpchunks(smp) < chunks_per_thread*nsmpthreads(smp)) then
4070 0 : nsmpchunks(smp) = chunks_per_thread*nsmpthreads(smp)
4071 : endif
4072 591360 : do while (mod(nsmpchunks(smp), nsmpthreads(smp)) /= 0)
4073 589824 : nsmpchunks(smp) = nsmpchunks(smp) + 1
4074 : enddo
4075 1536 : if (nsmpchunks(smp) > nsmpcolumns(smp)) then
4076 0 : nsmpchunks(smp) = nsmpcolumns(smp)
4077 : endif
4078 3072 : nchunks = nchunks + nsmpchunks(smp)
4079 : enddo
4080 : !
4081 : ! Determine maximum number of columns to assign to chunks
4082 : ! in a given SMP
4083 : !
4084 3072 : do smp=0,nsmpx-1
4085 3072 : if (nsmpchunks(smp) /= 0) then
4086 1536 : ntmp1 = nsmpcolumns(smp)/nsmpchunks(smp)
4087 1536 : ntmp2 = mod(nsmpcolumns(smp),nsmpchunks(smp))
4088 1536 : if (ntmp2 > 0) then
4089 1536 : maxcol_chk(smp) = ntmp1 + 1
4090 1536 : maxcol_chks(smp) = ntmp2
4091 : else
4092 0 : maxcol_chk(smp) = ntmp1
4093 0 : maxcol_chks(smp) = nsmpchunks(smp)
4094 : endif
4095 : else
4096 0 : maxcol_chk(smp) = 0
4097 0 : maxcol_chks(smp) = 0
4098 : endif
4099 : enddo
4100 : !
4101 : ! Allocate chunks and knuhcs data structures
4102 : !
4103 4608 : allocate( chunks(1:nchunks) )
4104 4608 : allocate( knuhcs(1:ngcols) )
4105 : !
4106 : ! Initialize chunks and knuhcs data structures
4107 : !
4108 5899776 : chunks(:)%ncols = 0
4109 84936192 : knuhcs(:)%chunkid = -1
4110 84936192 : knuhcs(:)%col = -1
4111 : !
4112 : ! Determine chunk id ranges for each SMP
4113 : !
4114 1536 : cid_offset(0) = 1
4115 1536 : local_cid(0) = 0
4116 1536 : do smp=1,nsmpx-1
4117 0 : cid_offset(smp) = cid_offset(smp-1) + nsmpchunks(smp-1)
4118 1536 : local_cid(smp) = 0
4119 : enddo
4120 : !
4121 : ! Assign columns to chunks
4122 : !
4123 1181184 : do jb=firstblock,lastblock
4124 1179648 : p = get_block_owner_d(jb)
4125 1179648 : smp = proc_smp_mapx(p)
4126 1179648 : blksiz = get_block_gcol_cnt_d(jb)
4127 1179648 : call get_block_gcol_d(jb,blksiz,cols)
4128 86115840 : do ib = 1,blksiz
4129 : !
4130 : ! Assign column to a chunk if not already assigned
4131 84934656 : curgcol = cols(ib)
4132 169869312 : if ((dyn_to_latlon_gcol_map(curgcol) /= -1) .and. &
4133 86114304 : (knuhcs(curgcol)%chunkid == -1)) then
4134 : !
4135 : ! Find next chunk with space
4136 : ! (maxcol_chks > 0 test necessary for opt=4 block map)
4137 43646976 : cid = cid_offset(smp) + local_cid(smp)
4138 43646976 : if (maxcol_chks(smp) > 0) then
4139 43646976 : do while (chunks(cid)%ncols >= maxcol_chk(smp))
4140 0 : local_cid(smp) = mod(local_cid(smp)+1,nsmpchunks(smp))
4141 0 : cid = cid_offset(smp) + local_cid(smp)
4142 : enddo
4143 : else
4144 0 : do while (chunks(cid)%ncols >= maxcol_chk(smp)-1)
4145 0 : local_cid(smp) = mod(local_cid(smp)+1,nsmpchunks(smp))
4146 0 : cid = cid_offset(smp) + local_cid(smp)
4147 : enddo
4148 : endif
4149 43646976 : chunks(cid)%ncols = chunks(cid)%ncols + 1
4150 43646976 : if (chunks(cid)%ncols == maxcol_chk(smp)) &
4151 2359296 : maxcol_chks(smp) = maxcol_chks(smp) - 1
4152 : !
4153 43646976 : i = chunks(cid)%ncols
4154 43646976 : chunks(cid)%gcol(i) = curgcol
4155 43646976 : chunks(cid)%lon(i) = lon_p(curgcol)
4156 43646976 : chunks(cid)%lat(i) = lat_p(curgcol)
4157 43646976 : knuhcs(curgcol)%chunkid = cid
4158 43646976 : knuhcs(curgcol)%col = i
4159 : !
4160 43646976 : if (opt < 4) then
4161 : !
4162 : ! If space available, look to assign a load-balancing "twin" to same chunk
4163 43646976 : if ( (chunks(cid)%ncols < maxcol_chk(smp)) .and. &
4164 87293952 : (maxcol_chks(smp) > 0) .and. (twin_alg > 0)) then
4165 :
4166 : call find_twin(curgcol, smp, &
4167 41287680 : proc_smp_mapx, twingcol)
4168 :
4169 41287680 : if (twingcol > 0) then
4170 41287680 : chunks(cid)%ncols = chunks(cid)%ncols + 1
4171 41287680 : if (chunks(cid)%ncols == maxcol_chk(smp)) &
4172 0 : maxcol_chks(smp) = maxcol_chks(smp) - 1
4173 : !
4174 41287680 : i = chunks(cid)%ncols
4175 41287680 : chunks(cid)%gcol(i) = twingcol
4176 41287680 : chunks(cid)%lon(i) = lon_p(twingcol)
4177 41287680 : chunks(cid)%lat(i) = lat_p(twingcol)
4178 41287680 : knuhcs(twingcol)%chunkid = cid
4179 41287680 : knuhcs(twingcol)%col = i
4180 : endif
4181 : !
4182 : endif
4183 : !
4184 : ! Move on to next chunk (wrap map)
4185 43646976 : local_cid(smp) = mod(local_cid(smp)+1,nsmpchunks(smp))
4186 : !
4187 : endif
4188 : !
4189 : endif
4190 : enddo
4191 : enddo
4192 : !
4193 : else
4194 : !
4195 : ! Option 5: split individual dynamics blocks into chunks,
4196 : ! assigning consecutive columns to the same chunk
4197 : !
4198 : ! Determine total number of chunks and
4199 : ! number of chunks in each "SMP node"
4200 : ! (assuming no vertical decomposition)
4201 0 : nchunks = 0
4202 0 : nsmpchunks(:) = 0
4203 0 : do j=firstblock,lastblock
4204 0 : blksiz = get_block_gcol_cnt_d(j)
4205 0 : nlchunks = blksiz/pcols
4206 0 : if (pcols*(blksiz/pcols) /= blksiz) then
4207 0 : nlchunks = nlchunks + 1
4208 : endif
4209 0 : nchunks = nchunks + nlchunks
4210 0 : p = get_block_owner_d(j)
4211 0 : nsmpchunks(p) = nsmpchunks(p) + nlchunks
4212 : enddo
4213 : !
4214 : ! Determine chunk id ranges for each SMP
4215 : !
4216 0 : cid_offset(0) = 1
4217 0 : local_cid(0) = 0
4218 0 : do smp=1,nsmpx-1
4219 0 : cid_offset(smp) = cid_offset(smp-1) + nsmpchunks(smp-1)
4220 0 : local_cid(smp) = 0
4221 : enddo
4222 : !
4223 : ! Allocate chunks and knuhcs data structures
4224 : !
4225 0 : allocate( chunks(1:nchunks) )
4226 0 : allocate( knuhcs(1:ngcols) )
4227 : !
4228 : ! Initialize chunks and knuhcs data structures
4229 : !
4230 0 : knuhcs(:)%chunkid = -1
4231 0 : knuhcs(:)%col = -1
4232 0 : cid = 0
4233 0 : do jb=firstblock,lastblock
4234 0 : p = get_block_owner_d(jb)
4235 0 : smp = proc_smp_mapx(p)
4236 0 : blksiz = get_block_gcol_cnt_d(jb)
4237 0 : call get_block_gcol_d(jb,blksiz,cols)
4238 :
4239 0 : ib = 0
4240 0 : do while (ib < blksiz)
4241 :
4242 0 : cid = cid_offset(smp) + local_cid(smp)
4243 0 : max_ncols = min(pcols,blksiz-ib)
4244 :
4245 0 : ncols = 0
4246 0 : do i=1,max_ncols
4247 0 : ib = ib + 1
4248 : ! check whether global index is for a column that dynamics
4249 : ! intends to pass to the physics
4250 0 : curgcol = cols(ib)
4251 0 : if (dyn_to_latlon_gcol_map(curgcol) /= -1) then
4252 : ! yes - then save the information
4253 0 : ncols = ncols + 1
4254 0 : chunks(cid)%gcol(ncols) = curgcol
4255 0 : chunks(cid)%lon(ncols) = lon_p(curgcol)
4256 0 : chunks(cid)%lat(ncols) = lat_p(curgcol)
4257 0 : knuhcs(curgcol)%chunkid = cid
4258 0 : knuhcs(curgcol)%col = ncols
4259 : endif
4260 : enddo
4261 0 : chunks(cid)%ncols = ncols
4262 :
4263 0 : local_cid(smp) = local_cid(smp) + 1
4264 : enddo
4265 : enddo
4266 : !
4267 : ! Set number of threads available in each "SMP node".
4268 : !
4269 0 : do p=0,npes-1
4270 0 : nsmpthreads(p) = npthreads(p)
4271 : enddo
4272 : !
4273 : endif
4274 : !
4275 : ! Assign chunks to processes.
4276 : !
4277 : call assign_chunks(npthreads, nsmpx, proc_smp_mapx, &
4278 1536 : nsmpthreads, nsmpchunks)
4279 : !
4280 : ! Clean up
4281 : !
4282 1536 : deallocate( nsmpcolumns )
4283 1536 : deallocate( nsmpthreads )
4284 1536 : deallocate( nsmpchunks )
4285 1536 : deallocate( maxcol_chk )
4286 1536 : deallocate( maxcol_chks )
4287 1536 : deallocate( cid_offset )
4288 1536 : deallocate( local_cid )
4289 1536 : deallocate( cols )
4290 1536 : deallocate( knuhcs )
4291 :
4292 1536 : return
4293 3072 : end subroutine create_chunks
4294 : !
4295 : !========================================================================
4296 :
4297 0 : subroutine find_partners(opt, proc_busy_d, nsmpx, proc_smp_mapx)
4298 : !-----------------------------------------------------------------------
4299 : !
4300 : ! Purpose: Divide processes into pairs, attempting to maximize the
4301 : ! the number of columns in one process whose twins are in the
4302 : ! other process.
4303 : !
4304 : ! Method: The day/night and north/south hemisphere complement is defined
4305 : ! to be the column twin.
4306 : !
4307 : ! Author: Patrick Worley
4308 : !
4309 : !-----------------------------------------------------------------------
4310 1536 : use dyn_grid, only: get_gcol_block_cnt_d, get_gcol_block_d, &
4311 : get_block_owner_d
4312 : use pmgrid, only: plev
4313 : !------------------------------Arguments--------------------------------
4314 : integer, intent(in) :: opt ! chunking option
4315 : logical, intent(in) :: proc_busy_d(0:npes-1)
4316 : ! active/idle dynamics process flags
4317 : integer, intent(out) :: nsmpx ! calculated number of virtual
4318 : ! SMP nodes
4319 : integer, intent(out) :: proc_smp_mapx(0:npes-1)
4320 : ! process/virtual smp map
4321 : !---------------------------Local workspace-----------------------------
4322 : integer :: gcol_latlon ! physics column index (latlon sorted)
4323 : integer :: twingcol_latlon ! physics column index (latlon sorted)
4324 : integer :: gcol, twingcol ! physics column indices
4325 : integer :: lon, lat, twinlat ! longitude and latitude indices
4326 : integer :: twinlon_off ! estimate as to offset of twinlon
4327 : ! on a latitude line
4328 : integer :: block_cnt ! number of blocks containing data
4329 : ! for a given vertical column
4330 : integer :: blockids(plev+1) ! block indices
4331 : integer :: bcids(plev+1) ! block column indices
4332 : integer :: jb ! block index
4333 : integer :: p, twp ! process indices
4334 0 : integer :: col_proc_mapx(ngcols) ! location of columns in
4335 : ! dynamics decomposition
4336 0 : integer :: twin_proc_mapx(ngcols) ! location of column twins in
4337 : ! dynamics decomposition
4338 0 : integer :: twin_cnt(0:npes-1) ! for each process, number of twins
4339 : ! in each of the other processes
4340 0 : logical :: assigned(0:npes-1) ! flag indicating whether process
4341 : ! assigned to an SMP node yet
4342 : integer :: maxpartner, maxcnt ! process with maximum number of
4343 : ! twins and this count
4344 :
4345 : logical :: error ! error flag
4346 : !-----------------------------------------------------------------------
4347 : !
4348 : ! Determine process location of column and its twin in dynamics decomposition
4349 : !
4350 0 : col_proc_mapx(:) = -1
4351 0 : twin_proc_mapx(:) = -1
4352 :
4353 0 : error = .false.
4354 0 : do gcol_latlon=1,num_global_phys_cols
4355 :
4356 : ! Assume latitude and longitude symmetries and that index manipulations
4357 : ! are sufficient to find partners. (Will be true for lon/lat grids.)
4358 0 : gcol = latlon_to_dyn_gcol_map(gcol_latlon)
4359 0 : lat = lat_p(gcol)
4360 0 : twinlat = clat_p_tot+1-lat
4361 0 : lon = lon_p(gcol)
4362 0 : twinlon_off = mod((lon-1)+(clat_p_cnt(twinlat)/2), clat_p_cnt(twinlat))
4363 0 : twingcol_latlon = clat_p_idx(twinlat) + twinlon_off
4364 0 : twingcol = latlon_to_dyn_gcol_map(twingcol_latlon)
4365 :
4366 0 : block_cnt = get_gcol_block_cnt_d(gcol)
4367 0 : call get_gcol_block_d(gcol,block_cnt,blockids,bcids)
4368 0 : do jb=1,block_cnt
4369 0 : p = get_block_owner_d(blockids(jb))
4370 0 : if (col_proc_mapx(gcol) == -1) then
4371 0 : col_proc_mapx(gcol) = p
4372 0 : elseif (col_proc_mapx(gcol) /= p) then
4373 0 : error = .true.
4374 : endif
4375 : enddo
4376 :
4377 0 : block_cnt = get_gcol_block_cnt_d(twingcol)
4378 0 : call get_gcol_block_d(twingcol,block_cnt,blockids,bcids)
4379 0 : do jb=1,block_cnt
4380 0 : p = get_block_owner_d(blockids(jb))
4381 0 : if (twin_proc_mapx(gcol) == -1) then
4382 0 : twin_proc_mapx(gcol) = p
4383 0 : elseif (twin_proc_mapx(gcol) /= p) then
4384 0 : error = .true.
4385 : endif
4386 : enddo
4387 :
4388 : end do
4389 :
4390 0 : if (error) then
4391 0 : if (masterproc) then
4392 0 : write(iulog,*) "PHYS_GRID_INIT error: opt", opt, "specified, ", &
4393 0 : "but vertical decomposition not limited to single process"
4394 : endif
4395 0 : call endrun()
4396 : endif
4397 :
4398 : !
4399 : ! Assign process pairs to SMPs, attempting to maximize the number of column,twin
4400 : ! pairs in same SMP.
4401 : !
4402 0 : assigned(:) = .false.
4403 0 : twin_cnt(:) = 0
4404 0 : nsmpx = 0
4405 0 : do p=0,npes-1
4406 0 : if ((.not. assigned(p)) .and. (proc_busy_d(p))) then
4407 : !
4408 : ! For each process, determine number of twins in each of the other processes
4409 : ! (running over all columns multiple times to minimize memory requirements).
4410 : !
4411 0 : do gcol_latlon=1,num_global_phys_cols
4412 0 : gcol = latlon_to_dyn_gcol_map(gcol_latlon)
4413 0 : if (col_proc_mapx(gcol) == p) then
4414 0 : twin_cnt(twin_proc_mapx(gcol)) = &
4415 0 : twin_cnt(twin_proc_mapx(gcol)) + 1
4416 : endif
4417 : enddo
4418 : !
4419 : ! Find process with maximum number of twins that has not yet been designated
4420 : ! a partner.
4421 : !
4422 0 : maxpartner = -1
4423 0 : maxcnt = 0
4424 0 : do twp=0,npes-1
4425 0 : if ((.not. assigned(twp)) .and. (twp /= p)) then
4426 0 : if (twin_cnt(twp) >= maxcnt) then
4427 0 : maxcnt = twin_cnt(twp)
4428 0 : maxpartner = twp
4429 : endif
4430 : endif
4431 : enddo
4432 : !
4433 : ! Assign p and twp to the same SMP node
4434 : !
4435 0 : if (maxpartner /= -1) then
4436 0 : assigned(p) = .true.
4437 0 : assigned(maxpartner) = .true.
4438 0 : proc_smp_mapx(p) = nsmpx
4439 0 : proc_smp_mapx(maxpartner) = nsmpx
4440 0 : nsmpx = nsmpx + 1
4441 : else
4442 0 : if (masterproc) then
4443 0 : write(iulog,*) "PHYS_GRID_INIT error: opt", opt, "specified, ", &
4444 0 : "but could not divide processes into pairs."
4445 : endif
4446 0 : call endrun()
4447 : endif
4448 : !
4449 : endif
4450 : !
4451 : enddo
4452 : !
4453 0 : return
4454 0 : end subroutine find_partners
4455 : !
4456 : !========================================================================
4457 :
4458 82575360 : subroutine find_twin(gcol, smp, proc_smp_mapx, twingcol_f)
4459 : !-----------------------------------------------------------------------
4460 : !
4461 : ! Purpose: Find column that when paired with gcol in a chunk
4462 : ! balances the load. A column is a candidate to be paired with
4463 : ! gcol if it is in the same SMP node as gcol as defined
4464 : ! by proc_smp_mapx.
4465 : !
4466 : ! Method: The day/night and north/south hemisphere complement is
4467 : ! tried first. If it is not a candidate or if it has already been
4468 : ! assigned, then the day/night complement is tried next. If that
4469 : ! also is not available, then nothing is returned.
4470 : !
4471 : ! Author: Patrick Worley
4472 : !
4473 : !-----------------------------------------------------------------------
4474 0 : use dyn_grid, only: get_gcol_block_d, get_block_owner_d
4475 :
4476 : !------------------------------Arguments--------------------------------
4477 : integer, intent(in) :: gcol ! global column index for column
4478 : ! seeking a twin for
4479 : integer, intent(in) :: smp ! index of SMP node
4480 : ! currently assigned to
4481 : integer, intent(in) :: proc_smp_mapx(0:npes-1)
4482 : ! process/virtual smp map
4483 : integer, intent(out) :: twingcol_f
4484 : ! global column index for twin
4485 : !---------------------------Local workspace-----------------------------
4486 : integer :: lon, lat ! global lon/lat indices for column
4487 : ! seeking a twin for
4488 : integer :: twinlon, twinlat ! lon/lat indices of twin candidate
4489 : integer :: twinlon_off ! estimate as to offset of twinlon
4490 : ! on a latitude line
4491 : logical :: found ! found flag
4492 : integer :: i ! loop index
4493 : integer :: upper, lower ! search temporaries
4494 : integer :: twingcol_latlon ! global physics column index (latlon sorted)
4495 : integer :: twingcol_lonlat ! global physics column index (lonlat sorted)
4496 : integer :: twingcol ! global physics column indes
4497 : integer :: diff, min_diff, min_i ! search temporaries
4498 82575360 : integer :: jbtwin(npes) ! global block indices
4499 82575360 : integer :: ibtwin(npes) ! global column indices
4500 : integer :: twinproc, twinsmp ! process and smp ids
4501 :
4502 82575360 : integer :: clon_p_idx(clon_p_tot) ! index in lonlat ordering for first
4503 : ! occurrence of longitude corresponding to
4504 : ! given latitude index
4505 :
4506 : real(r8):: twopi ! 2*pi
4507 : real(r8):: clat, twinclat ! latitude and twin
4508 : real(r8):: clon, twinclon ! longitude and twin
4509 :
4510 : !-----------------------------------------------------------------------
4511 41287680 : twingcol_f = -1
4512 :
4513 : ! precompute clon_p_idx
4514 41287680 : clon_p_idx(1) = 1
4515 11890851840 : do i=2,clon_p_tot
4516 11890851840 : clon_p_idx(i) = clon_p_idx(i-1) + clon_p_cnt(i-1)
4517 : enddo
4518 : !
4519 : ! Try day/night and north/south hemisphere complement first
4520 : !
4521 : ! determine twin latitude
4522 41287680 : lat = lat_p(gcol)
4523 41287680 : clat = clat_p(lat)
4524 41287680 : twinclat = -clat
4525 41287680 : twinlat = clat_p_tot+1-lat
4526 41287680 : if (clat_p(twinlat) == twinclat) then
4527 : found = .true.
4528 : else
4529 15925248 : found = .false.
4530 15925248 : upper = twinlat
4531 15925248 : lower = twinlat
4532 15925248 : if (upper < clat_p_tot) upper = twinlat + 1
4533 15925248 : if (lower > 1) lower = twinlat - 1
4534 : endif
4535 57212928 : do while (.not. found)
4536 15925248 : if ((abs(clat_p(upper)-twinclat) < abs(clat_p(twinlat)-twinclat)) .and. &
4537 41287680 : (upper /= twinlat)) then
4538 0 : twinlat = upper
4539 0 : if (upper < clat_p_tot) then
4540 0 : upper = twinlat + 1
4541 : else
4542 : found = .true.
4543 : endif
4544 15925248 : else if ((abs(clat_p(lower)-twinclat) < abs(clat_p(twinlat)-twinclat)) .and. &
4545 : (lower /= twinlat)) then
4546 0 : twinlat = lower
4547 0 : if (lower > 1) then
4548 0 : lower = twinlat - 1
4549 : else
4550 : found = .true.
4551 : endif
4552 : else
4553 : found = .true.
4554 : endif
4555 : enddo
4556 :
4557 : ! determine twin longitude
4558 41287680 : twopi = 2.0_r8*pi
4559 41287680 : lon = lon_p(gcol)
4560 41287680 : clon = clon_p(lon)
4561 41287680 : twinclon = mod(clon+pi,twopi)
4562 41287680 : twinlon = mod((lon-1)+(clon_p_tot/2), clon_p_tot) + 1
4563 41287680 : if (clon_p(twinlon) == twinclon) then
4564 : found = .true.
4565 : else
4566 22046208 : found = .false.
4567 22046208 : upper = twinlon
4568 22046208 : lower = twinlon
4569 22046208 : if (upper < clon_p_tot) upper = twinlon + 1
4570 22046208 : if (lower > 1) lower = twinlon - 1
4571 : endif
4572 63333888 : do while (.not. found)
4573 22046208 : if ((abs(clon_p(upper)-twinclon) < abs(clon_p(twinlon)-twinclon)) .and. &
4574 41287680 : (upper /= twinlon)) then
4575 0 : twinlon = upper
4576 0 : if (upper < clon_p_tot) then
4577 0 : upper = twinlon + 1
4578 : else
4579 : found = .true.
4580 : endif
4581 22046208 : else if ((abs(clon_p(lower)-twinclon) < abs(clon_p(twinlon)-twinclon)) .and. &
4582 : (lower /= twinlon)) then
4583 0 : twinlon = lower
4584 0 : if (lower > 1) then
4585 0 : lower = twinlon - 1
4586 : else
4587 : found = .true.
4588 : endif
4589 : else
4590 : found = .true.
4591 : endif
4592 : enddo
4593 :
4594 : ! first, look for an exact match (assuming latitude and longitude symmetries)
4595 41287680 : twinlon_off = mod((lon-1)+(clat_p_cnt(twinlat)/2), clat_p_cnt(twinlat))
4596 41287680 : twingcol_latlon = clat_p_idx(twinlat) + twinlon_off
4597 41287680 : twingcol = latlon_to_dyn_gcol_map(twingcol_latlon)
4598 :
4599 : ! otherwise, look around for an approximate match using lonlat sorted indices
4600 41287680 : if ((lon_p(twingcol) /= twinlon) .or. (lat_p(twingcol) /= twinlat)) then
4601 0 : twingcol_lonlat = clon_p_idx(twinlon)
4602 0 : twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
4603 0 : min_diff = abs(lat_p(twingcol) - twinlat)
4604 0 : min_i = 0
4605 0 : do i = 1, clon_p_cnt(twinlon)-1
4606 0 : twingcol_lonlat = clon_p_idx(twinlon)+i
4607 0 : twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
4608 0 : diff = abs(lat_p(twingcol) - twinlat)
4609 0 : if (diff < min_diff) then
4610 0 : min_diff = diff
4611 0 : min_i = i
4612 : endif
4613 : enddo
4614 0 : twingcol_lonlat = clon_p_idx(twinlon) + min_i
4615 0 : twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
4616 : endif
4617 :
4618 : ! Check whether twin and original are in same smp
4619 41287680 : found = .false.
4620 41287680 : call get_gcol_block_d(twingcol,npes,jbtwin,ibtwin)
4621 41287680 : twinproc = get_block_owner_d(jbtwin(1))
4622 41287680 : twinsmp = proc_smp_mapx(twinproc)
4623 : !
4624 41287680 : if ((twinsmp == smp) .and. &
4625 41287680 : (knuhcs(twingcol)%chunkid == -1)) then
4626 41287680 : found = .true.
4627 41287680 : twingcol_f = twingcol
4628 : endif
4629 : !
4630 : ! Try day/night complement next
4631 : if (.not. found) then
4632 :
4633 : ! first, look for an exact match (assuming longitude symmetries)
4634 0 : twinlon_off = mod((lon-1)+(clat_p_cnt(lat)/2), clat_p_cnt(lat))
4635 0 : twingcol_latlon = clat_p_idx(lat) + twinlon_off
4636 0 : twingcol = latlon_to_dyn_gcol_map(twingcol_latlon)
4637 :
4638 : ! otherwise, look around for an approximate match using lonlat
4639 : ! column ordering
4640 0 : if ((lon_p(twingcol) /= twinlon) .or. &
4641 0 : (lat_p(twingcol) /= lat)) then
4642 0 : twingcol_lonlat = clon_p_idx(twinlon)
4643 0 : twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
4644 0 : min_diff = abs(lat_p(twingcol) - lat)
4645 0 : min_i = 0
4646 0 : do i = 1, clon_p_cnt(twinlon)-1
4647 0 : twingcol_lonlat = clon_p_idx(twinlon)+i
4648 0 : twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
4649 0 : diff = abs(lat_p(twingcol) - lat)
4650 0 : if (diff < min_diff) then
4651 0 : min_diff = diff
4652 0 : min_i = i
4653 : endif
4654 : enddo
4655 0 : twingcol_lonlat = clon_p_idx(twinlon) + min_i
4656 0 : twingcol = lonlat_to_dyn_gcol_map(twingcol_lonlat)
4657 : endif
4658 : !
4659 0 : call get_gcol_block_d(twingcol,npes,jbtwin,ibtwin)
4660 0 : twinproc = get_block_owner_d(jbtwin(1))
4661 0 : twinsmp = proc_smp_mapx(twinproc)
4662 : !
4663 0 : if ((twinsmp == smp) .and. &
4664 0 : (knuhcs(twingcol)%chunkid == -1)) then
4665 0 : found = .true.
4666 0 : twingcol_f = twingcol
4667 : endif
4668 : !
4669 : endif
4670 : !
4671 41287680 : return
4672 41287680 : end subroutine find_twin
4673 : !
4674 : !========================================================================
4675 :
4676 3072 : subroutine assign_chunks(npthreads, nsmpx, proc_smp_mapx, &
4677 1536 : nsmpthreads, nsmpchunks)
4678 : !-----------------------------------------------------------------------
4679 : !
4680 : ! Purpose: Assign chunks to processes, balancing the number of
4681 : ! chunks per thread and minimizing the communication costs
4682 : ! in dp_coupling subject to the restraint that columns
4683 : ! do not migrate outside of the current SMP node.
4684 : !
4685 : ! Method:
4686 : !
4687 : ! Author: Patrick Worley
4688 : !
4689 : !-----------------------------------------------------------------------
4690 41287680 : use pmgrid, only: plev
4691 : use dyn_grid, only: get_gcol_block_cnt_d, get_gcol_block_d,&
4692 : get_block_owner_d
4693 : !------------------------------Arguments--------------------------------
4694 : integer, intent(in) :: npthreads(0:npes-1)
4695 : ! number of OpenMP threads per process
4696 : integer, intent(in) :: nsmpx ! virtual smp count
4697 : integer, intent(in) :: proc_smp_mapx(0:npes-1)
4698 : ! process/virtual smp map
4699 : integer, intent(in) :: nsmpthreads(0:nsmpx-1)
4700 : ! number of OpenMP threads
4701 : ! per virtual SMP
4702 : integer, intent(in) :: nsmpchunks(0:nsmpx-1)
4703 : ! number of chunks assigned
4704 : ! to a given virtual SMP
4705 : !---------------------------Local workspace-----------------------------
4706 : integer :: i, jb, p ! loop indices
4707 : integer :: cid ! chunk id
4708 : integer :: smp ! SMP index
4709 : integer :: curgcol ! global column index
4710 : integer :: block_cnt ! number of blocks containing data
4711 : ! for a given vertical column
4712 : integer :: blockids(plev+1) ! block indices
4713 : integer :: bcids(plev+1) ! block column indices
4714 3072 : integer :: ntsks_smpx(0:nsmpx-1) ! number of processes per virtual SMP
4715 3072 : integer :: smp_proc_mapx(0:nsmpx-1,max_nproc_smpx)
4716 : ! virtual smp to process id map
4717 3072 : integer :: cid_offset(0:nsmpx) ! chunk id virtual smp offset
4718 3072 : integer :: ntmp1_smp(0:nsmpx-1) ! minimum number of chunks per thread
4719 : ! in a virtual SMP
4720 3072 : integer :: ntmp2_smp(0:nsmpx-1) ! number of extra chunks to be assigned
4721 : ! in a virtual SMP
4722 3072 : integer :: ntmp3_smp(0:nsmpx-1) ! number of processes in a virtual
4723 : ! SMP that get more extra chunks
4724 : ! than the others
4725 3072 : integer :: ntmp4_smp(0:nsmpx-1) ! number of extra chunks per process
4726 : ! in a virtual SMP
4727 : integer :: ntmp1, ntmp2 ! work variables
4728 : ! integer :: npchunks(0:npes-1) ! number of chunks to be assigned to
4729 : ! ! a given process
4730 3072 : integer :: cur_npchunks(0:npes-1) ! current number of chunks assigned
4731 : ! to a given process
4732 1536 : integer :: column_count(0:npes-1) ! number of columns from current chunk
4733 : ! assigned to each process in dynamics
4734 : ! decomposition
4735 : !-----------------------------------------------------------------------
4736 : !
4737 : ! Count number of processes per virtual SMP and determine virtual SMP
4738 : ! to process id map
4739 : !
4740 3072 : ntsks_smpx(:) = 0
4741 2360832 : smp_proc_mapx(:,:) = -1
4742 1181184 : do p=0,npes-1
4743 1179648 : smp = proc_smp_mapx(p)
4744 1179648 : ntsks_smpx(smp) = ntsks_smpx(smp) + 1
4745 1181184 : smp_proc_mapx(smp,ntsks_smpx(smp)) = p
4746 : enddo
4747 : !
4748 : ! Determine chunk id ranges for each virtual SMP
4749 : !
4750 1536 : cid_offset(0) = 1
4751 3072 : do smp=1,nsmpx
4752 3072 : cid_offset(smp) = cid_offset(smp-1) + nsmpchunks(smp-1)
4753 : enddo
4754 : !
4755 : ! Determine number of chunks to assign to each process
4756 : !
4757 3072 : do smp=0,nsmpx-1
4758 : !
4759 : ! Minimum number of chunks per thread
4760 1536 : ntmp1_smp(smp) = nsmpchunks(smp)/nsmpthreads(smp)
4761 :
4762 : ! Number of extra chunks to be assigned
4763 1536 : ntmp2_smp(smp) = mod(nsmpchunks(smp),nsmpthreads(smp))
4764 :
4765 : ! Number of processes that get more extra chunks than the others
4766 1536 : ntmp3_smp(smp) = mod(ntmp2_smp(smp),ntsks_smpx(smp))
4767 :
4768 : ! Number of extra chunks per process
4769 1536 : ntmp4_smp(smp) = ntmp2_smp(smp)/ntsks_smpx(smp)
4770 3072 : if (ntmp3_smp(smp) > 0) then
4771 0 : ntmp4_smp(smp) = ntmp4_smp(smp) + 1
4772 : endif
4773 : enddo
4774 :
4775 1181184 : do p=0,npes-1
4776 1179648 : smp = proc_smp_mapx(p)
4777 :
4778 : ! Update number of extra chunks
4779 1179648 : if (ntmp2_smp(smp) > ntmp4_smp(smp)) then
4780 0 : ntmp2_smp(smp) = ntmp2_smp(smp) - ntmp4_smp(smp)
4781 : else
4782 1179648 : ntmp4_smp(smp) = ntmp2_smp(smp)
4783 1179648 : ntmp2_smp(smp) = 0
4784 1179648 : ntmp3_smp(smp) = 0
4785 : endif
4786 :
4787 : ! Set number of chunks
4788 1179648 : npchunks(p) = ntmp1_smp(smp)*npthreads(p) + ntmp4_smp(smp)
4789 :
4790 : ! Update extra chunk increment
4791 1181184 : if (ntmp3_smp(smp) > 0) then
4792 0 : ntmp3_smp(smp) = ntmp3_smp(smp) - 1
4793 0 : if (ntmp3_smp(smp) == 0) then
4794 0 : ntmp4_smp(smp) = ntmp4_smp(smp) - 1
4795 : endif
4796 : endif
4797 : enddo
4798 :
4799 : !
4800 : ! Assign chunks to processes:
4801 : !
4802 1181184 : cur_npchunks(:) = 0
4803 : !
4804 3072 : do smp=0,nsmpx-1
4805 5901312 : do cid=cid_offset(smp),cid_offset(smp+1)-1
4806 : !
4807 4535746560 : do i=1,ntsks_smpx(smp)
4808 4529848320 : p = smp_proc_mapx(smp,i)
4809 4535746560 : column_count(p) = 0
4810 : enddo
4811 : !
4812 : ! For each chunk, determine number of columns in each
4813 : ! process within the dynamics.
4814 90832896 : do i=1,chunks(cid)%ncols
4815 84934656 : curgcol = chunks(cid)%gcol(i)
4816 84934656 : block_cnt = get_gcol_block_cnt_d(curgcol)
4817 84934656 : call get_gcol_block_d(curgcol,block_cnt,blockids,bcids)
4818 175767552 : do jb=1,block_cnt
4819 84934656 : p = get_block_owner_d(blockids(jb))
4820 169869312 : column_count(p) = column_count(p) + 1
4821 : enddo
4822 : enddo
4823 : !
4824 : ! Eliminate processes that already have their quota of chunks
4825 4535746560 : do i=1,ntsks_smpx(smp)
4826 4529848320 : p = smp_proc_mapx(smp,i)
4827 4535746560 : if (cur_npchunks(p) == npchunks(p)) then
4828 2221330944 : column_count(p) = -1
4829 : endif
4830 : enddo
4831 : !
4832 : ! Assign chunk to process with most
4833 : ! columns from chunk, from among those still available
4834 5898240 : ntmp1 = -1
4835 5898240 : ntmp2 = -1
4836 4535746560 : do i=1,ntsks_smpx(smp)
4837 4529848320 : p = smp_proc_mapx(smp,i)
4838 4535746560 : if (column_count(p) > ntmp1) then
4839 11008512 : ntmp1 = column_count(p)
4840 11008512 : ntmp2 = p
4841 : endif
4842 : enddo
4843 5898240 : cur_npchunks(ntmp2) = cur_npchunks(ntmp2) + 1
4844 5898240 : chunks(cid)%owner = ntmp2
4845 :
4846 : ! Update total number of columns assigned to this process
4847 5899776 : gs_col_num(ntmp2) = gs_col_num(ntmp2) + chunks(cid)%ncols
4848 : !
4849 : enddo
4850 : !
4851 : enddo
4852 : !
4853 1536 : return
4854 1536 : end subroutine assign_chunks
4855 : !
4856 : !========================================================================
4857 :
4858 : !#######################################################################
4859 :
4860 1536 : end module phys_grid
|