Line data Source code
1 : #define MOD_ASSUMED_SIZE
2 : !#define MOD_SPECIFIED_SHAPE
3 : #if !defined( STAND_ALONE )
4 : #define NOR4 ! Activate to effectively eliminate real*4 window
5 : #endif
6 : !BOP
7 : !
8 : ! !MODULE: mod_comm --- SPMD parallel decompostion/communication module
9 : module mod_comm
10 : !
11 : ! !DESCRIPTION:
12 : !
13 : ! \paragraph{Overview}
14 : !
15 : ! This module contains SPMD parallelism decomposition and
16 : ! communication routines. This library was originally written by
17 : ! W. Putman and S.-J. Lin for simple gridded communications in the
18 : ! Finite-Volume General Circulation Model (FVGCM). Most of the
19 : ! member functions are specific to the type of gridded data,
20 : ! ghost communication and decompositions used in FVGCM (which
21 : ! are, however, very common in atmospheric models).
22 : !
23 : ! The module was extended for irregular communication
24 : ! by W. Sawyer and A. Mirin. It is now
25 : ! a more general tool and has been incorporated into the Parallel
26 : ! Library for Grid Manipulations (PILGRIM) which is used in the
27 : ! Community Atmospheric Model (CAM) and the Physical-space
28 : ! Statistical Analysis System (PSAS).
29 : !
30 : ! **********************************************************
31 : ! The storage associated with the irregular communications
32 : ! is based on CAM requirements. It runs the risk of functioning
33 : ! improperly when used in another code.
34 : ! **********************************************************
35 : !
36 : ! Irregular communication is based on the {\tt blockdescriptor}
37 : ! derived type, which defines a set of parcels which are to be
38 : ! send to (or received from) another PE. The irregular
39 : ! communication routines operate on arrays of block descriptors
40 : ! whose length is equal to number of PEs involved in the
41 : ! communication. This means the irregular communication primitives
42 : ! are merely non-blocking (potentially) all-to-all primitives.
43 : !
44 : ! This package is based on standard MPI-1 communications, and OpenMP
45 : ! may be implemented. MPI-2 and SHMEM support have been removed.
46 : !
47 : !
48 : ! \paragraph{Use of Global Arrays}
49 : !
50 : ! The module uses the concept of global arrays (coined from former
51 : ! usage of shared memory arenas in the "multi-level parallelism"
52 : ! (MLP) paradigm). Global arrays are merely buffers into which
53 : ! data are packed for the transfer to other PEs and are not
54 : ! necessarily of global extent. All such arrays are
55 : ! 1-dimensional; they are accessed as needed with offset vars.
56 : !
57 : ! \paragraph{Use of Windows}
58 : !
59 : ! All implementations use real*8, real*4, and integer*4 windows
60 : ! which are used with global arrays as follows:
61 : !
62 : ! \begin{itemize}
63 : ! \item r8\_win -> ga\_r8 - for use with real*8 types
64 : ! \item r4\_win -> ga\_r4 - for use with real*4 types
65 : ! \item i4\_win -> ga\_i4 - for use with integer*4 types
66 : ! \end{itemize}
67 : !
68 : ! note: MPI routines need 2 buffers per GA, ga\_<type>\_s & ga\_<type>\_r
69 : ! ga\_<type>\_r is used for the windows
70 : !
71 : ! \paragraph{Compilation}
72 : !
73 : ! This module contains several precompile options:
74 : !
75 : ! \begin{itemize}
76 : ! \item {\tt STAND_ALONE}: Use as stand-alone library (if
77 : ! defined) or as part of CAM (if
78 : ! undefined)
79 : ! \item {\tt MODCM_TIMING}: Turn on CAM timing routines (only
80 : ! available if compiled in CAM framework)
81 : ! \item {\tt _OPENMP}: Implicit token (controlled by
82 : ! compiler) to enable OpenMP
83 : ! \end{itemize}
84 : !
85 : !
86 : ! \paragraph{Usage}
87 : !
88 : ! NOTE - must call PILGRIM routine parinit to initialize before
89 : ! making any other calls.
90 : !
91 : ! The public members of this module are:
92 : !
93 : ! \begin{itemize}
94 : ! \item {\tt mp\_init}: Initialize module
95 : ! \item {\tt mp\_exit}: Exit module
96 : ! \item {\tt mp\_send4d\_ns}: Ghost 4D array on north/south
97 : ! \item {\tt mp\_recv4d\_ns}: Complete 4D N/S ghost operation
98 : ! \item {\tt mp\_send2\_ns}: Ghost 2 3D arrays on north/south
99 : ! \item {\tt mp\_recv2\_ns}: Complete 2x3D N/S ghost operation
100 : ! \item {\tt mp\_send3d}: Send 3D general ghost region
101 : ! \item {\tt mp\_recv3d}: Complete 3D general ghost operation
102 : ! \item {\tt mp\_send3d\_2}: Send 2x3D general ghost regions
103 : ! \item {\tt mp\_recv3d\_2}: Complete 2x3D general ghost operation
104 : ! \item {\tt get\_partneroffset}:Offset for remote write
105 : ! \item {\tt mp\_sendirr}: Initiate all-to-all send of parcels
106 : ! \item {\tt mp\_recvirr}: Complete all-to-all chunk commun.
107 : ! \end{itemize}
108 : !
109 : ! There are variants of some of these routines for r4 and i4 data types.
110 : ! There are other public routines, but these are only used internally
111 : ! in PILGRIM, and they should not be called by user applications.
112 : !
113 : ! !REVISION HISTORY:
114 : ! 2001.09.01 Lin
115 : ! 2002.04.16 Putman Modified for Global Array code
116 : ! 2002.04.16 Putman Added ProTeX documentation
117 : ! 2002.05.28 Putman Added use of precision module
118 : ! 2003.06.24 Sawyer Minor additions for use with mod_irreg
119 : ! 2004.01.08 Sawyer Removed older functionality, no longer needed
120 : ! 2004.02.10 Mirin Major restructuring and simplification. Documentation
121 : ! 2004.03.06 Sawyer Additional documentation; cosmetics
122 : ! 2005.03.20 Sawyer Added extensive support for real*4
123 : ! 2005.10.12 Worley Improved vectorization of buffer copies and general clean-up
124 : ! 2006.05.15 Mirin Make dynamic allocation the default; general clean-up.
125 : ! !USES:
126 : #if defined( STAND_ALONE )
127 : # define iulog 6
128 : #else
129 : use cam_logfile, only: iulog
130 : #endif
131 :
132 : !
133 : ! Performance bug work around for Gemini interconnect
134 : !
135 : #ifdef _NO_MPI_RSEND
136 : #define MPI_RSEND MPI_SEND
137 : #define mpi_rsend mpi_send
138 : #define MPI_IRSEND MPI_ISEND
139 : #define mpi_irsend mpi_isend
140 : #endif
141 :
142 : !
143 : ! Mod_comm has option for stand-alone use as well as within CAM
144 : !
145 :
146 : #if defined ( SPMD )
147 :
148 : #if defined( STAND_ALONE )
149 : # define r8 selected_real_kind(12)
150 : # define r4 selected_real_kind( 6)
151 : # define i8 selected_int_kind(13)
152 : # define i4 selected_int_kind( 6)
153 : # define PLON 144
154 : # define PLAT 91
155 : # define PLEV 26
156 : # define PCNST 1
157 : #else
158 : use shr_kind_mod, only : r8 => shr_kind_r8, r4 => shr_kind_r4, &
159 : i8 => shr_kind_i8, i4 => shr_kind_i4
160 : #endif
161 : #if defined( MODCM_TIMING )
162 : use perf_mod
163 : #endif
164 :
165 : implicit none
166 :
167 : #include "mpif.h"
168 :
169 : ! !PUBLIC MEMBER FUNCTIONS:
170 : public mp_init, mp_exit, &
171 : mp_send4d_ns, mp_recv4d_ns, mp_send4d_ns_r4, mp_recv4d_ns_r4, &
172 : mp_send2_ns, mp_recv2_ns, mp_send3d_2, mp_recv3d_2, &
173 : mp_send3d, mp_recv3d, mp_sendirr, mp_recvirr, &
174 : mp_sendirr_r4, mp_recvirr_r4, mp_sendirr_i4, mp_recvirr_i4, &
175 : mp_swapirr, mp_swapirr_i4, mp_barrier, &
176 : get_partneroffset, mp_r8, mp_r4, mp_i4, &
177 : mp_sendtrirr, mp_recvtrirr, mp_swaptrirr
178 : public modcam_method, modcam_geopk, modcam_gatscat, modcam_npryz, modcam_maxirr
179 :
180 : ! !PRIVATE MEMBER FUNCTIONS:
181 : private ceil2 ! copy of routine in atm/cam/src/utils/spmdutils
182 : private pair ! copy of routine in atm/cam/src/utils/spmdutils
183 :
184 : !------------------------------------------------------------------------------
185 : ! type declaration for describing an arbitrary number of contiguous parcels
186 : ! this is for irregular communications
187 : !------------------------------------------------------------------------------
188 : type blockdescriptor
189 : integer :: method ! transpose method
190 : integer :: type ! Ptr to MPI derived type
191 : integer, pointer :: displacements(:) ! Offsets in local segment
192 : integer, pointer :: blocksizes(:) ! Block sizes to transfer
193 : integer :: partneroffset ! Aggregated partner offset
194 : integer :: partnertype ! Ptr to partner's MPI derived type
195 : integer :: Nparcels ! size( displacements )
196 : integer :: Tot_Size ! sum ( blocksizes )
197 : end type blockdescriptor
198 :
199 : ! Transpose methods (method)
200 : ! 0 for contiguous temporary buffer
201 : ! 1 for direct communication (derived types)
202 :
203 : ! The variables immediately below refer specifically to mpi derived types
204 : INTEGER, ALLOCATABLE, SAVE :: InHandle(:, :)
205 : INTEGER, ALLOCATABLE, SAVE :: OutHandle(:, :)
206 : INTEGER, SAVE :: BegTrf = 0 ! Ongoing overlapped begintransfer #
207 : INTEGER, SAVE :: EndTrf = 0 ! Ongoing overlapped endtransfer #
208 : INTEGER, SAVE :: MaxTrf = 0 ! Max no. active Mp_sendirr derived type messages
209 :
210 : ! !PUBLIC DATA MEMBERS:
211 : integer, SAVE:: gid ! PE id
212 : integer(i4), SAVE:: masterpro = 0 ! Master process id
213 : integer(i4), SAVE:: numpro ! Permanent No. of PEs
214 : integer(i4), SAVE:: numcomm ! Local No. of PEs
215 : integer(i4), SAVE:: numcpu ! No. of threads
216 : integer, SAVE:: commglobal ! Global Communicator
217 : integer, SAVE:: Max_Nparcels = 0 ! Maximum number of parcels in
218 : ! single blockdescriptor
219 :
220 : !------------------------------------------------------------------------------
221 : ! Local parameters
222 : !------------------------------------------------------------------------------
223 : integer, parameter:: nbuf = 2 ! Max No. of sends per call
224 : ! mp_send4d_ns has two sends per call (full border regions to north and south)
225 : ! mp_send2_ns has four sends per call (2 directions and 2 variables); however,
226 : ! only one ghost latitude is sent, so nbuf=2 suffices as long as nghost
227 : ! is greater than 1.
228 : ! mp_send3d has one send per call (border region in one direction).
229 : ! mp_send3d_2 has two sends per call (2 variables, border region in one direction).
230 : integer, parameter:: nghost = 3 ! No. of ghost indices
231 : integer, parameter:: max_nq = 1 ! No. of tracers simultaneously
232 : ! border communicated; can be
233 : ! overridden with dynamic storage
234 : integer, parameter:: max_trac = PCNST ! No. of tracers
235 : integer, parameter:: max_call = 2 ! Max No. of back-to-back...
236 : ! ...mp_send calls
237 : ! Currently, CAM has at most two overlapping border communication calls
238 : ! The above variable is relevant for contiguous irregular communications
239 :
240 : integer, parameter:: idimsize = PLON*nghost*(PLEV+1)*max_nq
241 : ! Size of MPI buffer region
242 : ! in mp_send/mp_recv calls, used
243 : ! to determine offset in GA
244 : integer, parameter:: platg = PLAT + 2*nghost
245 : integer, parameter :: mp_r4 = MPI_REAL
246 : integer, parameter :: mp_r8 = MPI_DOUBLE_PRECISION
247 : integer, parameter :: mp_i4 = MPI_INTEGER
248 :
249 : !------------------------------------------------------------------------------
250 : ! Local variables
251 : !------------------------------------------------------------------------------
252 :
253 : integer, SAVE:: max_irr = 0 ! Max No. active Mp_sendirr calls with window
254 : integer ierror
255 : integer, SAVE:: sizet1, sizer8, sizer4, sizei4
256 :
257 : ! CAM-specific variables
258 : integer, SAVE:: tracmax, tracbmax, dpvarmax, totvar
259 : integer, SAVE:: phys_transpose_mod
260 : integer, SAVE:: idimsizz
261 : integer, SAVE:: modcam_method, modcam_geopk, modcam_gatscat
262 : integer, SAVE:: modcam_npryz(4), modcam_tagoffset, modcam_maxirr
263 : integer, parameter :: phys_transpose_modmin = 11
264 : integer, parameter :: phys_transpose_vars = 7
265 : data phys_transpose_mod / -1 /
266 : data modcam_method / -1 /
267 : data modcam_geopk / -1 /
268 : data modcam_gatscat / -1 /
269 : data modcam_npryz / -1, -1, -1, -1 /
270 : data modcam_tagoffset / 0 /
271 : data modcam_maxirr / -1 /
272 : !
273 : ! tracmax is the maximum number of tracers simultaneously transposed within dynamics (set to 1)
274 : ! (except in dynamics-physics transposes)
275 : ! tracbmax is the maximum number of tracers simultaneously border communicated
276 : ! dpvarmax is the number of variables communicated in dynamics-physics transposes
277 : ! totvar is the maximum number of variables simultaneously transposed
278 : ! phys_transpose_mod is the communication method for dynamics/physics transposes; admissable values
279 : ! are >= phys_transpose_modmin; it is communicated from CAM when such transposes
280 : ! are requested.
281 : ! phys_transpose_vars is the number of non-tracer variables transposed between dynamics and
282 : ! physics instantiations in CAM.
283 : ! modcam_method, modcam_geopk and modcam_gatscat correspond to mod_method, mod_geopk and
284 : ! mod_gatscat in CAM.
285 : ! modcam_npryz corresponds to npr_yz in CAM.
286 : ! modcam_maxirr corresonds to mod_maxirr in CAM.
287 :
288 : !------------------------------------------------------------------------------
289 : ! Variables to control global array locations and window synchronization
290 : !------------------------------------------------------------------------------
291 : integer win_count ! Counts No. of windows in use
292 : integer igosouth, igonorth ! Index of latitudinal send direction
293 : integer ifromsouth, ifromnorth ! Index of latitudinal recv direction
294 :
295 : !------------------------------------------------------------------------------
296 : ! Local type declaration for mp_windows
297 : !------------------------------------------------------------------------------
298 : type window
299 : integer :: id ! Window id
300 : integer :: size ! Size of global window (point based)
301 : integer :: ncall_s ! Count send calls on window
302 : integer :: ncall_r ! Count recv calls on window
303 : integer :: offset_s ! Starting position in GA send
304 : integer :: offset_r ! Starting position in GA recv
305 : integer :: dest ! For use with send calls
306 : integer :: src ! For use with recv calls
307 : integer :: size_r ! Size of incoming message
308 : integer :: nsend ! Send counter
309 : integer :: nrecv ! Receive post counter
310 : integer :: nread ! Receive confirm counter
311 : integer, pointer :: sqest(:) ! Send handle
312 : integer, pointer :: rqest(:) ! Receive handle
313 : end type window
314 :
315 : !------------------------------------------------------------------------------
316 : ! Beginning Global Array variable declaration:
317 : !------------------------------------------------------------------------------
318 :
319 : type (window) :: r8_win
320 : type (window) :: r4_win
321 : type (window) :: i4_win
322 : type (window) :: t1_win
323 :
324 : ! Upper bound on ratio of local to average storage over subdomains.
325 : ! This takes into account different sized subdomains.
326 :
327 : real*8, parameter :: alloc_slack_factor = 1.2_r8
328 :
329 : !
330 : ! window variable declarations
331 : !
332 : real(r8), allocatable, SAVE:: ga_t1_r(:)
333 : real(r8), allocatable, SAVE:: ga_t1_s(:)
334 : real(r8), allocatable, SAVE:: ga_r8_r(:)
335 : real(r8), allocatable, SAVE:: ga_r8_s(:)
336 : real(r4), allocatable, SAVE:: ga_r4_r(:)
337 : real(r4), allocatable, SAVE:: ga_r4_s(:)
338 : integer(i4), allocatable, SAVE:: ga_i4_r(:)
339 : integer(i4), allocatable, SAVE:: ga_i4_s(:)
340 : !
341 : ! auxiliary variable declarations
342 : !
343 : integer, SAVE:: Status(MPI_STATUS_SIZE)
344 : integer, allocatable, SAVE:: Stats(:)
345 : !
346 : !EOP
347 : !------------------------------------------------------------------------------
348 : contains
349 : !------------------------------------------------------------------------------
350 : !BOP
351 : ! !ROUTINE: mp_init --- Initialize SPMD parallel communication
352 : !
353 : ! !INTERFACE:
354 1536 : subroutine mp_init( comm, npryzxy, mod_method, mod_geopk, mod_gatscat, mod_maxirr )
355 : !
356 : ! !INPUT PARAMETERS:
357 : integer, optional :: comm ! communicator
358 : integer, optional, intent(in) :: npryzxy(4) ! 2D decomposition
359 : integer, optional, intent(in) :: mod_method ! CAM optimization
360 : integer, optional, intent(in) :: mod_geopk ! CAM optimization
361 : integer, optional, intent(in) :: mod_gatscat ! CAM optimization
362 : integer, optional, intent(in) :: mod_maxirr ! CAM optimization
363 : ! !DESCRIPTION:
364 : !
365 : ! Initialize SPMD parallel communication. It is recommended that
366 : ! COMM (main communicator) and NPRYZXY (2D decomposition) be set.
367 : !
368 : ! Set the mod* variables only if you are acquainted with their
369 : ! meaning (default is 0).
370 : !
371 : ! !REVISION HISTORY:
372 : ! 2001.09.01 Lin
373 : ! 2002.02.15 Putman Modified for Global Array code
374 : ! 2002.04.09 Putman Added ProTeX documentation
375 : ! 2002.08.06 Sawyer Added optional communicator input argument
376 : ! 2006.06.15 Sawyer Added CAM-dependent optional arguments
377 : !
378 : !EOP
379 : !------------------------------------------------------------------------------
380 : !BOC
381 : !
382 : ! !LOCAL VARIABLES:
383 : integer mysize
384 : integer using_window, vertical_lines, latitude_lines
385 : integer local_dynamic_storage, numpro_use
386 : real*8 geopkrat, one, ghostrat
387 :
388 : ! Initialize MPI; allow for general communicator
389 1536 : if ( present(comm) ) then
390 1536 : call mpi_start( comm )
391 : else
392 0 : call mpi_start( MPI_COMM_WORLD )
393 : endif
394 : ! Initialize OpenMP
395 1536 : call omp_start
396 : !
397 : ! Adopt 2D decomposition if provided.
398 : !
399 1536 : modcam_npryz = (/ 1,1,1,1 /) ! Default value (sequential)
400 1536 : if ( present( npryzxy ) ) then
401 1536 : modcam_npryz(1:4) = npryzxy(1:4)
402 1536 : modcam_tagoffset = modcam_npryz(3) * modcam_npryz(4)
403 : endif
404 1536 : if (gid .eq. 0) then
405 2 : write (iulog,*) 'MOD_COMM - modcam_npryz = ', &
406 2 : modcam_npryz(1), modcam_npryz(2), &
407 4 : modcam_npryz(3), modcam_npryz(4)
408 2 : write (iulog,*) 'MOD_COMM - modcam_tagoffset = ', modcam_tagoffset
409 : endif
410 :
411 : !
412 : ! Set CAM optimization variables
413 : !
414 : ! modcam_method refers to irregular communications for transposes
415 : ! modcam_geopk refers to irregular communications for the geopotential
416 : ! modcam_gatscat refers to irregular communications for gather/scatters
417 : ! For any of these, a value of 0 means source data will be gathered into a contiguous
418 : ! buffer (window), communicated to a contiguous buffer (window) in the target, and
419 : ! then scattered to its final destination; a value of 1 means MPI derived types will
420 : ! be used (hence not requiring window storage).
421 : ! modcam_maxirr refers to maximum number of irregular communications to be active at once
422 1536 : modcam_method = 0 ! Default value
423 1536 : modcam_geopk = 0 ! Default value
424 1536 : modcam_gatscat = 0 ! Default value
425 1536 : modcam_maxirr = 1 ! Default value
426 1536 : if ( present( mod_method ) ) modcam_method = mod_method
427 1536 : if ( present( mod_geopk ) ) modcam_geopk = mod_geopk
428 1536 : if ( present( mod_gatscat ) ) modcam_gatscat = mod_gatscat
429 1536 : if ( present( mod_maxirr ) ) modcam_maxirr = mod_maxirr
430 :
431 1536 : if (gid .eq. 0) then
432 2 : write(iulog,*) 'MOD_COMM - modcam_method modcam_geopk modcam_gatscat modcam_maxirr = ', &
433 4 : modcam_method, modcam_geopk, modcam_gatscat, modcam_maxirr
434 : endif
435 :
436 : !
437 : ! End CAM optimizations
438 : !
439 :
440 1536 : MaxTrf = modcam_maxirr
441 1536 : max_irr = modcam_maxirr
442 :
443 1536 : win_count = 0
444 : !
445 : !*************************************************************************
446 : ! local_dynamic_storage is set to 1 when window storage is based on locally dimensioned
447 : ! arrays, 0 otherwise; this occurs when modcam_gatscat equals 1, as it is only the
448 : ! gather/scatters that require global storage.
449 : !*************************************************************************
450 : !
451 : local_dynamic_storage = 0
452 : if (modcam_gatscat .eq. 1) local_dynamic_storage = 1
453 :
454 : !*************************************************************************
455 : ! Override original strategy, as only single 2D lat-lon variables (rather
456 : ! than 3D with multiple tracers) are used for gather/scatters;
457 : ! set local_dynamic_storage to 1 always, and then allow for gather
458 : ! of 2D lat-lon variable in inidat.
459 : !*************************************************************************
460 :
461 1536 : local_dynamic_storage = 1
462 :
463 4608 : allocate( Stats(MAX(nbuf,numpro)*MAX(max_call,max_irr)*MPI_STATUS_SIZE) )
464 6144 : allocate( InHandle(numpro,MaxTrf) )
465 4608 : allocate( OutHandle(numpro,MaxTrf) )
466 :
467 1536 : idimsizz = idimsize
468 : if (local_dynamic_storage .eq. 1) then
469 1536 : if (gid .eq. 0) write(iulog,*) 'Using local dynamic storage for mod_comm window'
470 : else
471 : if (gid .eq. 0) write(iulog,*) 'Using global dynamic storage for mod_comm window'
472 : endif
473 : !
474 : ! Dynamically allocate target global arrays
475 : !
476 : !*************************************************************************
477 : ! Compute additional storage due to ghost latitudes being included in some
478 : ! transposes. Allow 3 ghost points on each side. The required storage
479 : ! could be (6+L)/L times the original storage, where L is the number of
480 : ! latitude lines in the subdomain. Ghost points can also occur in the
481 : ! vertical due to edge quantities, but this would not occur simultaneously
482 : ! with ghost points in latitude; the extra storage due to vertical ghost
483 : ! points is not nearly as great as with latitude.
484 : !*************************************************************************
485 1536 : using_window = 1 ! This is a local variable
486 1536 : if (modcam_method .eq. 1) using_window = 0
487 1536 : one = real(1,r8)
488 0 : ghostrat = one
489 : if (using_window .eq. 1 .and. local_dynamic_storage .eq. 1) then
490 1536 : latitude_lines = real(PLAT,r8)/real(modcam_npryz(1),r8)
491 1536 : ghostrat = real(6+latitude_lines,r8)/real(latitude_lines,r8)
492 : endif
493 1536 : if (gid .eq. 0) write(iulog,*) 'Mod_comm - ghostrat = ', ghostrat
494 :
495 : !*************************************************************************
496 : ! Compute extent to which required window storage for geopotential computation
497 : ! exceeds that of transpose - relevant only for local dynamic storage,
498 : ! since with global storage there will be enough space anyway; also,
499 : ! this applies only when using window; further, this applies only when
500 : ! the CAM variable geopktrans equals 1, though we do not test for that here.
501 : ! The geopotential calculation sends a latitude line to every other process
502 : ! either vertically above or below the given process; there can be
503 : ! at most modcam_npryz(2)-1 such target processes; compared to transposes
504 : ! (which send all vertical lines), the amount of data sent is expressed
505 : ! as the ratio geopkrat; our concern is making the window (whose size
506 : ! is computed based on transposes) large enough, so we must multiply its
507 : ! size by geopkrat; we never shrink the window, so geopkrat >= 1.
508 : !*************************************************************************
509 1536 : using_window = 1 ! This is a local variable
510 1536 : if (modcam_geopk .eq. 1) using_window = 0
511 1536 : one = real(1,r8)
512 0 : geopkrat = one
513 : if (using_window .eq. 1 .and. local_dynamic_storage .eq. 1) then
514 1536 : vertical_lines = ceiling(real(PLEV,r8)/real(modcam_npryz(2),r8))
515 1536 : geopkrat = real(modcam_npryz(2)-1,r8)/real(vertical_lines,r8)
516 1536 : geopkrat = max(geopkrat,one)
517 : endif
518 1536 : if (gid .eq. 0) write(iulog,*) 'Mod_comm - geopkrat = ', geopkrat
519 :
520 : !*************************************************************************
521 : ! beginning of CAM totvar computation
522 : !*************************************************************************
523 :
524 : ! CAM contains two kinds of transposes. The most commonly referred to transposes
525 : ! refer to those which connect the xy and yz decompositions. Depending on
526 : ! the physics decomposition, CAM might additionally compute transposes between
527 : ! the dynamics and physics; this depends on the variable phys_loadbalance.
528 : ! Furthermore, these transposes might or might not be computed using mod_comm.
529 : ! The former transposes are generally performed one variable at a time; the
530 : ! latter transposes combine all variables to be transposed, including the
531 : ! full complement of tracers. The maximum number of variables to be
532 : ! simultaneously subject to irregular communications is dependent on
533 : ! whether or not mod_comm is used to compute dynamics-physics transposes
534 : ! and could depend on the number of tracers.
535 :
536 : ! Compute maximum number of variables to be simultaneously subject
537 : ! to irregular communications (e.g., transposed variables based on CAM)
538 : ! and store in the variable 'totvar'.
539 :
540 : ! Tracmax is the number of tracers simultaneously transposed within dynamics;
541 : ! Tracbmax is the number of tracers simultaneously border comunicated within trac2d;
542 : ! both of these are currently hardwired to 1.
543 1536 : tracmax = 1
544 1536 : tracbmax = 1
545 1536 : totvar = tracmax
546 :
547 : ! Now consider dynamics-physics transposes in CAM dp_coupling (dpvarmax)
548 : ! If phys_transpose_mod is still -1, that means it has not been updated
549 : ! by CAM and hence mod_comm will not be used for dynamics-physics transposes.
550 : ! (NOTE: phys_transpose_mod is computed in phys_grid_setopts in phys_grid.F90.)
551 :
552 : ! Also note that the logic involving phys_transpose_mod and phys_transpose_modmin
553 : ! must remain consistent with the coding in phys_grid.F90. Additionally,
554 : ! phys_transpose_vars must remain consistent with the coding in dp_coupling.F90.
555 : ! (See above declaration and initialization for CAM-specific variables.)
556 :
557 : ! (begin dpvarmax calculation)
558 :
559 1536 : if (phys_transpose_mod .eq. -1) then
560 1536 : if (gid .eq. 0) write(iulog,*) &
561 2 : '(MOD_COMM) - mod_comm not being used for dynamcis-physics transposes'
562 1536 : dpvarmax = 0
563 : !
564 : ! If phys_transpose_mod is >= phys_transpose_modmin, that is a signal that mod_comm is to be used
565 : ! for dynamics/physics transposes in CAM. In that case, one must allocate enough window
566 : ! storage for those transposes. Presently, the number of such simultaneously transposed
567 : ! variables equals phys_transpose_vars plus the number of constituents.
568 : !
569 0 : elseif (phys_transpose_mod .ge. phys_transpose_modmin) then
570 0 : dpvarmax = phys_transpose_vars + max_trac
571 : else
572 0 : dpvarmax = 0
573 : endif
574 :
575 : ! (end dpvarmax calculation)
576 :
577 : ! totvar is the maximum of (1) the number of tracers to be simultaneously transposed
578 : ! within the dynamics, and (2) the number of variables to be transposed between
579 : ! dynamics and physics instantiations in CAM
580 :
581 1536 : totvar = max(totvar, dpvarmax)
582 :
583 : !*************************************************************************
584 : ! end of CAM totvar computation
585 : !*************************************************************************
586 :
587 1538 : if (gid .eq. 0) write(iulog,*) 'Mod_comm - tracmax dpvarmax totvar tracbmax = ', &
588 4 : tracmax, dpvarmax, totvar, tracbmax
589 :
590 1536 : idimsizz = (idimsize/max_nq)*tracbmax
591 1536 : sizet1 = idimsizz*nbuf*max_call
592 : ! Adjust window sizes for geopotential and/or ghost points
593 1536 : sizer8 = PLON*platg*(PLEV+1)*totvar*max(geopkrat,ghostrat)*max_irr
594 1536 : sizer4 = PLON*platg*(PLEV+1)*totvar*max(geopkrat,ghostrat)*max_irr
595 1536 : sizei4 = PLON*PLAT*PLEV*max_irr
596 :
597 : ! Compute local storage requirement for irregular communications by dividing
598 : ! global requirement by the number of tasks. Allow slack factor to account
599 : ! for nonuniformity of decomposition and ghost zones. Not valid for global
600 : ! operations such as gathers and scatters when local windows are used.
601 : if (local_dynamic_storage .eq. 1) then
602 1536 : numpro_use = modcam_npryz(1) * modcam_npryz(2)
603 1536 : sizer8 = ceiling( alloc_slack_factor*real(sizer8,r8)/real(numpro_use,r8) )
604 :
605 : ! Allow for gather of single 2D lat-lon variable in inidat.
606 1536 : if (modcam_gatscat .eq. 0) sizer8 = max( sizer8, PLON*PLAT*max_irr )
607 :
608 1536 : sizer4 = ceiling( alloc_slack_factor*real(sizer4,r8)/real(numpro_use,r8) )
609 : ! The only i4 irregular communications in CAM occur in io_dist.
610 1536 : sizei4 = 1
611 : endif
612 :
613 : # if defined ( NOR4 )
614 1536 : sizer4 = 1
615 1536 : if (gid .eq. 0) write(iulog,*) 'Mod_comm - r4 windows disabled'
616 : # endif
617 :
618 1536 : using_window = 1 ! This is a local variable
619 1536 : if (modcam_method .eq. 1 .and. modcam_geopk .eq. 1) using_window = 0
620 : if (using_window .eq. 0) then
621 0 : if (gid .eq. 0) write(iulog,*) 'Mod_comm - r8 and r4 windows set to trivial size'
622 0 : sizer8 = 1
623 0 : sizer4 = 1
624 : endif
625 :
626 : ! Allocate global storage
627 :
628 4608 : allocate( ga_t1_r(sizet1) )
629 3072 : allocate( ga_t1_s(sizet1) )
630 4608 : allocate( ga_r8_r(sizer8) )
631 3072 : allocate( ga_r8_s(sizer8) )
632 4608 : allocate( ga_r4_r(sizer4) )
633 3072 : allocate( ga_r4_s(sizer4) )
634 4608 : allocate( ga_i4_r(sizei4) )
635 3072 : allocate( ga_i4_s(sizei4) )
636 :
637 : ! Initialize windows
638 :
639 1536 : mysize = sizet1
640 1536 : call win_init_r8(comm, t1_win, ga_t1_r, mysize)
641 1536 : if (gid .eq. 0) write(iulog,*) 'Mod_comm t1_win window size = ', mysize
642 :
643 1536 : mysize = sizer8
644 1536 : call win_init_r8(comm, r8_win, ga_r8_r, mysize)
645 1536 : if (gid .eq. 0) write(iulog,*) 'Mod_comm r8_win window size = ', mysize
646 :
647 1536 : mysize = sizer4
648 1536 : call win_init_r4(comm, r4_win, ga_r4_r, mysize)
649 1536 : if (gid .eq. 0) write(iulog,*) 'Mod_comm r4_win window size = ', mysize
650 :
651 1536 : mysize = sizei4
652 1536 : call win_init_i4(comm, i4_win, ga_i4_r, mysize)
653 1536 : if (gid .eq. 0) write(iulog,*) 'Mod_comm i4_win window size = ', mysize
654 :
655 1536 : igosouth = 0
656 1536 : igonorth = 1
657 1536 : ifromsouth = 1
658 1536 : ifromnorth = 0
659 :
660 : !EOC
661 1536 : end subroutine mp_init
662 : !------------------------------------------------------------------------------
663 : !------------------------------------------------------------------------------
664 : !BOP
665 : ! !ROUTINE: mp_exit --- End SPMD parallel communication
666 : !
667 : ! !INTERFACE:
668 0 : subroutine mp_exit( comm )
669 : ! !INPUT PARAMETERS:
670 : integer, intent(in) :: comm ! communicator
671 : ! !DESCRIPTION:
672 : !
673 : ! End SPMD parallel communication
674 : !
675 : ! !REVISION HISTORY:
676 : ! 2001.09.01 Lin
677 : ! 2002.02.15 Putman Modified for Global Array code
678 : ! 2002.04.09 Putman Added ProTeX documentation
679 : !
680 : !EOP
681 : !------------------------------------------------------------------------------
682 : !BOC
683 0 : call MPI_FINALIZE (ierror)
684 0 : return
685 : !EOC
686 : end subroutine mp_exit
687 : !------------------------------------------------------------------------------
688 : !------------------------------------------------------------------------------
689 : !BOP
690 : ! !ROUTINE: omp_start --- Start openMP parallelism
691 : !
692 : ! !INTERFACE:
693 1536 : subroutine omp_start
694 : ! !DESCRIPTION:
695 : !
696 : ! Start openMP parallelism
697 : !
698 : ! !REVISION HISTORY:
699 : ! 02.02.15 Putman
700 : ! 02.04.09 Putman Added ProTeX documentation
701 : !
702 : !EOP
703 : !------------------------------------------------------------------------------
704 : !BOC
705 : !
706 : ! !LOCAL VARIABLES:
707 : integer ios, n, nowpro, nowcpu
708 :
709 : ! Compute number of OpenMP threads
710 :
711 : #if defined(_OPENMP)
712 :
713 : integer omp_get_num_threads
714 : !$omp parallel
715 : numcpu = omp_get_num_threads()
716 : !$omp end parallel
717 :
718 : #else
719 1536 : numcpu = 1
720 : #endif
721 :
722 : !EOC
723 1536 : end subroutine omp_start
724 : !------------------------------------------------------------------------------
725 :
726 : !------------------------------------------------------------------------------
727 : !BOP
728 : ! !ROUTINE: mpi_start --- Start MPI parallelism
729 : !
730 : ! !INTERFACE:
731 1536 : subroutine mpi_start( comm )
732 : ! !INPUT PARAMETERS:
733 : integer :: comm ! communicator
734 : ! !DESCRIPTION:
735 : !
736 : ! Start MPI parallelism
737 : !
738 : ! !REVISION HISTORY:
739 : ! 02.02.15 Putman
740 : ! 02.04.09 Putman Added ProTeX documentation
741 : ! 02.08.06 Sawyer Added communicator input arguments
742 : !
743 : !EOP
744 : !------------------------------------------------------------------------------
745 : !BOC
746 : !
747 : ! !LOCAL VARIABLES:
748 : logical flag
749 : integer npthreads
750 :
751 1536 : call MPI_INITIALIZED( flag, ierror )
752 1536 : if ( .not. flag ) then
753 0 : call MPI_INIT( ierror )
754 0 : comm = MPI_COMM_WORLD
755 : endif
756 :
757 1536 : call MPI_COMM_RANK (comm, gid, ierror)
758 1536 : call MPI_COMM_SIZE (comm, numpro, ierror)
759 1536 : call MPI_COMM_DUP (comm, commglobal, ierror)
760 : !EOC
761 1536 : end subroutine mpi_start
762 : !------------------------------------------------------------------------------
763 :
764 : !------------------------------------------------------------------------------
765 : !BOP
766 : ! !ROUTINE: win_init_r8 --- Initialize real*8 communication window
767 : !
768 : ! !INTERFACE:
769 3072 : subroutine win_init_r8(comm, win, ga, isize)
770 : ! !INPUT PARAMETERS:
771 : integer, intent(in) :: comm ! communicator
772 : integer, intent(in) :: isize
773 : real(r8), intent(in) :: ga(isize)
774 : ! !OUTPUT PARAMETERS:
775 : type (window), intent(inout) :: win
776 : ! !DESCRIPTION:
777 : !
778 : ! Initialize real*8 communication window
779 : !
780 : ! !REVISION HISTORY:
781 : ! 02.02.15 Putman
782 : ! 02.04.09 Putman Added ProTeX documentation
783 : !
784 : !EOP
785 : !------------------------------------------------------------------------------
786 : !BOC
787 : !
788 : ! !LOCAL VARIABLES:
789 :
790 3072 : win_count = win_count + 1
791 3072 : win%id = win_count
792 3072 : win%size = isize
793 3072 : win%ncall_s = 0
794 3072 : win%ncall_r = 0
795 3072 : win%nsend = 0
796 3072 : win%nrecv = 0
797 3072 : win%nread = 0
798 9216 : allocate( win%sqest(MAX(nbuf,numpro)*MAX(max_call,max_irr)) )
799 6144 : allocate( win%rqest(MAX(nbuf,numpro)*MAX(max_call,max_irr)) )
800 : !EOC
801 3072 : end subroutine win_init_r8
802 : !------------------------------------------------------------------------------
803 : !------------------------------------------------------------------------------
804 : !BOP
805 : ! !ROUTINE: win_init_r4 --- Initialize real*4 communication window
806 : !
807 : ! !INTERFACE:
808 1536 : subroutine win_init_r4(comm, win, ga, isize)
809 : ! !INPUT PARAMETERS:
810 : integer, intent(in) :: comm ! communicator
811 : integer, intent(in) :: isize
812 : real(r4), intent(in) :: ga(isize)
813 : ! !OUTPUT PARAMETERS:
814 : type (window), intent(inout) :: win
815 : ! !DESCRIPTION:
816 : !
817 : ! Initialize real*4 communication window
818 : !
819 : ! !REVISION HISTORY:
820 : ! 02.02.15 Putman
821 : ! 02.04.09 Putman Added ProTeX documentation
822 : !
823 : !EOP
824 : !------------------------------------------------------------------------------
825 : !BOC
826 : !
827 : ! !LOCAL VARIABLES:
828 :
829 1536 : win_count = win_count + 1
830 1536 : win%id = win_count
831 1536 : win%size = isize
832 1536 : win%ncall_s = 0
833 1536 : win%ncall_r = 0
834 1536 : win%nsend = 0
835 1536 : win%nrecv = 0
836 1536 : win%nread = 0
837 4608 : allocate( win%sqest(MAX(nbuf,numpro)*MAX(max_call,max_irr)) )
838 3072 : allocate( win%rqest(MAX(nbuf,numpro)*MAX(max_call,max_irr)) )
839 : !EOC
840 1536 : end subroutine win_init_r4
841 : !------------------------------------------------------------------------------
842 : !------------------------------------------------------------------------------
843 : !BOP
844 : ! !ROUTINE: win_init_i4 --- Initialize integer*4 communication window
845 : !
846 : ! !INTERFACE:
847 1536 : subroutine win_init_i4(comm, win, ga, isize)
848 : ! !INPUT PARAMETERS:
849 : integer, intent(in) :: comm ! communicator
850 : integer, intent(in) :: isize
851 : integer(i4), intent(in) :: ga(isize)
852 : ! !OUTPUT PARAMETERS:
853 : type (window), intent(inout) :: win
854 : ! !DESCRIPTION:
855 : !
856 : ! Initialize integer*4 communication window
857 : !
858 : ! !REVISION HISTORY:
859 : ! 02.02.15 Putman
860 : ! 02.04.09 Putman Added ProTeX documentation
861 : !
862 : !EOP
863 : !------------------------------------------------------------------------------
864 : !BOC
865 : !
866 : ! !LOCAL VARIABLES:
867 :
868 1536 : win_count = win_count + 1
869 1536 : win%id = win_count
870 1536 : win%size = isize
871 1536 : win%ncall_s = 0
872 1536 : win%ncall_r = 0
873 1536 : win%nsend = 0
874 1536 : win%nrecv = 0
875 1536 : win%nread = 0
876 4608 : allocate( win%sqest(MAX(nbuf,numpro)*MAX(max_call,max_irr)) )
877 3072 : allocate( win%rqest(MAX(nbuf,numpro)*MAX(max_call,max_irr)) )
878 : !EOC
879 1536 : end subroutine win_init_i4
880 : !------------------------------------------------------------------------------
881 : !------------------------------------------------------------------------------
882 : !BOP
883 : ! !ROUTINE: mp_send4d_ns --- Send 4d north/south ghost latitudes (real*8)
884 : !
885 : ! !INTERFACE:
886 16450560 : subroutine mp_send4d_ns(comm, im, jm, km, nq, jfirst, jlast, kfirst, klast, &
887 16450560 : ng_s, ng_n, q)
888 : !
889 : ! !INPUT PARAMETERS:
890 : integer, intent(in):: comm ! communicator
891 : integer, intent(in):: im, jm, km, nq
892 : integer, intent(in):: jfirst, jlast
893 : integer, intent(in):: kfirst, klast
894 : integer, intent(in):: ng_s ! southern zones to ghost
895 : integer, intent(in):: ng_n ! northern zones to ghost
896 : real(r8), intent(in):: q(im,jfirst-ng_s:jlast+ng_n,kfirst:klast,nq)
897 : !
898 : ! !DESCRIPTION:
899 : !
900 : ! Send 4d north/south ghost latitudes
901 : !
902 : ! !REVISION HISTORY:
903 : ! 2001.09.01 Lin
904 : ! 2002.02.15 Putman Modified for Global Arrays code
905 : ! 2002.04.09 Putman Added ProTeX documentation
906 : !
907 : !EOP
908 : !------------------------------------------------------------------------------
909 : !BOC
910 : !
911 : ! !LOCAL VARIABLES:
912 : integer :: gidu
913 :
914 : #if defined( MODCM_TIMING )
915 : call t_startf('mod_comm communication')
916 : #endif
917 :
918 16450560 : call MPI_COMM_RANK (comm, gidu, ierror)
919 :
920 16450560 : call Win_Open(comm, t1_win)
921 :
922 : ! Send to south
923 16450560 : if ( jfirst > 1 ) then
924 16193520 : t1_win%src = gidu - 1
925 16193520 : t1_win%offset_r = ifromsouth*idimsizz + (t1_win%ncall_s-1)*idimsizz*nbuf
926 16193520 : t1_win%size_r = im*ng_s*(klast-kfirst+1)*nq
927 16193520 : call Ga_RecvInit_r8(comm, t1_win, ga_t1_r)
928 16193520 : t1_win%dest = gidu - 1
929 16193520 : t1_win%offset_s = igosouth*idimsizz + (t1_win%ncall_s-1)*idimsizz*nbuf
930 : call Ga_Put4d_r8(comm, q, t1_win, im, jm, km, nq, &
931 : 1, im, jfirst-ng_s, jlast+ng_n, kfirst, klast, 1, nq, &
932 : 1, im, jfirst, jfirst+ng_n-1, kfirst, klast, 1, nq, &
933 16193520 : ga_t1_s, ga_t1_r )
934 : endif
935 : ! Send to north
936 16450560 : if ( jlast < jm ) then
937 16193520 : t1_win%src = gidu + 1
938 16193520 : t1_win%offset_r = ifromnorth*idimsizz + (t1_win%ncall_s-1)*idimsizz*nbuf
939 16193520 : t1_win%size_r = im*ng_n*(klast-kfirst+1)*nq
940 16193520 : call Ga_RecvInit_r8(comm, t1_win, ga_t1_r)
941 16193520 : t1_win%dest = gidu + 1
942 16193520 : t1_win%offset_s = igonorth*idimsizz + (t1_win%ncall_s-1)*idimsizz*nbuf
943 : call Ga_Put4d_r8(comm, q, t1_win, im, jm, km, nq, &
944 : 1, im, jfirst-ng_s, jlast+ng_n, kfirst, klast, 1, nq, &
945 : 1, im, jlast-ng_s+1, jlast, kfirst, klast, 1, nq, &
946 16193520 : ga_t1_s, ga_t1_r )
947 : endif
948 :
949 : #if defined( MODCM_TIMING )
950 : call t_stopf('mod_comm communication')
951 : #endif
952 :
953 : !EOC
954 16450560 : end subroutine mp_send4d_ns
955 : !------------------------------------------------------------------------------
956 : !------------------------------------------------------------------------------
957 : !BOP
958 : ! !ROUTINE: mp_recv4d_ns --- Receive 4d north/south ghost latitudes (real*8)
959 : !
960 : ! !INTERFACE:
961 16450560 : subroutine mp_recv4d_ns(comm, im, jm, km, nq, jfirst, jlast, kfirst, klast, &
962 16450560 : ng_s, ng_n, q)
963 : !
964 : ! !INPUT PARAMETERS:
965 : integer, intent(in):: comm ! communicator
966 : integer, intent(in):: im, jm, km, nq
967 : integer, intent(in):: jfirst, jlast
968 : integer, intent(in):: kfirst, klast
969 : integer, intent(in):: ng_s ! southern zones to ghost
970 : integer, intent(in):: ng_n ! northern zones to ghost
971 : ! !OUTPUT PARAMETERS:
972 : real(r8), intent(inout):: q(im,jfirst-ng_s:jlast+ng_n,kfirst:klast,nq)
973 : !
974 : ! !DESCRIPTION:
975 : !
976 : ! Receive 4d north/south ghost latitudes
977 : !
978 : ! !REVISION HISTORY:
979 : ! 2001.09.01 Lin
980 : ! 2002.02.15 Putman Modified for Global Arrays code
981 : ! 2002.04.09 Putman Added ProTeX documentation
982 : !
983 : !EOP
984 : !------------------------------------------------------------------------------
985 : !BOC
986 : !
987 : ! !LOCAL VARIABLES:
988 : integer :: gidu
989 :
990 : #if defined( MODCM_TIMING )
991 : call t_startf('mod_comm communication')
992 : #endif
993 :
994 16450560 : call MPI_COMM_RANK (comm, gidu, ierror)
995 :
996 16450560 : call Win_Close(comm, t1_win)
997 :
998 : ! Recv from south
999 16450560 : if ( jfirst > 1 ) then
1000 16193520 : t1_win%src = gidu-1
1001 16193520 : t1_win%offset_r = ifromsouth*idimsizz + (t1_win%ncall_r-1)*idimsizz*nbuf
1002 : call Ga_Get4d_r8(comm, q, t1_win, im, jm, km, nq, &
1003 : 1, im, jfirst-ng_s, jlast+ng_n, kfirst, klast, 1, nq, &
1004 : 1, im, jfirst-ng_s, jfirst-1, kfirst, klast, 1, nq, &
1005 16193520 : ga_t1_r )
1006 : endif
1007 : ! Recv from north
1008 16450560 : if ( jlast < jm ) then
1009 16193520 : t1_win%src = gidu+1
1010 16193520 : t1_win%offset_r = ifromnorth*idimsizz + (t1_win%ncall_r-1)*idimsizz*nbuf
1011 : call Ga_Get4d_r8(comm, q, t1_win, im, jm, km, nq, &
1012 : 1, im, jfirst-ng_s, jlast+ng_n, kfirst, klast, 1, nq, &
1013 : 1, im, jlast+1, jlast+ng_n, kfirst, klast, 1, nq, &
1014 16193520 : ga_t1_r )
1015 : endif
1016 :
1017 16450560 : call Win_Finalize(comm, t1_win)
1018 :
1019 : #if defined( MODCM_TIMING )
1020 : call t_stopf('mod_comm communication')
1021 : #endif
1022 :
1023 : !EOC
1024 16450560 : end subroutine mp_recv4d_ns
1025 : !------------------------------------------------------------------------------
1026 : !------------------------------------------------------------------------------
1027 : !BOP
1028 : ! !ROUTINE: mp_send4d_ns_r4 --- Send 4d north/south ghost latitudes (real*4)
1029 : !
1030 : ! !INTERFACE:
1031 0 : subroutine mp_send4d_ns_r4(comm, im, jm, km, nq, jfirst, jlast, kfirst, klast, &
1032 0 : ng_s, ng_n, q)
1033 : !
1034 : ! !INPUT PARAMETERS:
1035 : integer, intent(in):: comm ! communicator
1036 : integer, intent(in):: im, jm, km, nq
1037 : integer, intent(in):: jfirst, jlast
1038 : integer, intent(in):: kfirst, klast
1039 : integer, intent(in):: ng_s ! southern zones to ghost
1040 : integer, intent(in):: ng_n ! northern zones to ghost
1041 : real(r4), intent(in):: q(im,jfirst-ng_s:jlast+ng_n,kfirst:klast,nq)
1042 : !
1043 : ! !DESCRIPTION:
1044 : !
1045 : ! Send 4d north/south ghost latitudes
1046 : !
1047 : ! !REVISION HISTORY:
1048 : ! 2005.03.20 Sawyer Creation from mp_send4d_ns
1049 : !
1050 : !EOP
1051 : !------------------------------------------------------------------------------
1052 : !BOC
1053 : !
1054 : ! !LOCAL VARIABLES:
1055 : integer :: gidu
1056 :
1057 : #if defined ( NOR4 )
1058 0 : write(iulog,*) 'Mod_comm: mp_send4d_ns_r4 - r4 windows disabled - exiting'
1059 0 : stop
1060 : #endif
1061 :
1062 : #if defined( MODCM_TIMING )
1063 : call t_startf('mod_comm communication')
1064 : #endif
1065 :
1066 : call MPI_COMM_RANK (comm, gidu, ierror)
1067 :
1068 : call Win_Open(comm, r4_win)
1069 :
1070 : ! Send to south
1071 : if ( jfirst > 1 ) then
1072 : r4_win%src = gidu - 1
1073 : r4_win%offset_r = ifromsouth*idimsizz + (r4_win%ncall_s-1)*idimsizz*nbuf
1074 : r4_win%size_r = im*ng_s*(klast-kfirst+1)*nq
1075 : call Ga_RecvInit_r4(comm, r4_win, ga_r4_r)
1076 : r4_win%dest = gidu - 1
1077 : r4_win%offset_s = igosouth*idimsizz + (r4_win%ncall_s-1)*idimsizz*nbuf
1078 : call Ga_Put4d_r4(comm, q, r4_win, im, jm, km, nq, &
1079 : 1, im, jfirst-ng_s, jlast+ng_n, kfirst, klast, 1, nq, &
1080 : 1, im, jfirst, jfirst+ng_n-1, kfirst, klast, 1, nq, &
1081 : ga_r4_s, ga_r4_r )
1082 : endif
1083 : ! Send to north
1084 : if ( jlast < jm ) then
1085 : r4_win%src = gidu + 1
1086 : r4_win%offset_r = ifromnorth*idimsizz + (r4_win%ncall_s-1)*idimsizz*nbuf
1087 : r4_win%size_r = im*ng_n*(klast-kfirst+1)*nq
1088 : call Ga_RecvInit_r4(comm, r4_win, ga_r4_r)
1089 : r4_win%dest = gidu + 1
1090 : r4_win%offset_s = igonorth*idimsizz + (r4_win%ncall_s-1)*idimsizz*nbuf
1091 : call Ga_Put4d_r4(comm, q, r4_win, im, jm, km, nq, &
1092 : 1, im, jfirst-ng_s, jlast+ng_n, kfirst, klast, 1, nq, &
1093 : 1, im, jlast-ng_s+1, jlast, kfirst, klast, 1, nq, &
1094 : ga_r4_s, ga_r4_r )
1095 : endif
1096 :
1097 : #if defined( MODCM_TIMING )
1098 : call t_stopf('mod_comm communication')
1099 : #endif
1100 :
1101 : !EOC
1102 : end subroutine mp_send4d_ns_r4
1103 : !------------------------------------------------------------------------------
1104 : !------------------------------------------------------------------------------
1105 : !BOP
1106 : ! !ROUTINE: mp_recv4d_ns_r4 --- Receive 4d north/south ghost latitudes (real*4)
1107 : !
1108 : ! !INTERFACE:
1109 0 : subroutine mp_recv4d_ns_r4(comm, im, jm, km, nq, jfirst, jlast, kfirst, klast, &
1110 0 : ng_s, ng_n, q)
1111 : !
1112 : ! !INPUT PARAMETERS:
1113 : integer, intent(in):: comm ! communicator
1114 : integer, intent(in):: im, jm, km, nq
1115 : integer, intent(in):: jfirst, jlast
1116 : integer, intent(in):: kfirst, klast
1117 : integer, intent(in):: ng_s ! southern zones to ghost
1118 : integer, intent(in):: ng_n ! northern zones to ghost
1119 : ! !OUTPUT PARAMETERS:
1120 : real(r4), intent(inout):: q(im,jfirst-ng_s:jlast+ng_n,kfirst:klast,nq)
1121 : !
1122 : ! !DESCRIPTION:
1123 : !
1124 : ! Receive 4d north/south ghost latitudes (real*4)
1125 : !
1126 : ! !REVISION HISTORY:
1127 : ! 2005.03.20 Sawyer Creation from mp_recv4d_ns
1128 : !
1129 : !EOP
1130 : !------------------------------------------------------------------------------
1131 : !BOC
1132 : !
1133 : ! !LOCAL VARIABLES:
1134 : integer :: gidu
1135 :
1136 : #if defined ( NOR4 )
1137 0 : write(iulog,*) 'Mod_comm: mp_recv4d_ns_r4 - r4 windows disabled - exiting'
1138 0 : stop
1139 : #endif
1140 :
1141 : #if defined( MODCM_TIMING )
1142 : call t_startf('mod_comm communication')
1143 : #endif
1144 :
1145 : call MPI_COMM_RANK (comm, gidu, ierror)
1146 :
1147 : call Win_Close(comm, r4_win)
1148 :
1149 : ! Recv from south
1150 : if ( jfirst > 1 ) then
1151 : r4_win%src = gidu-1
1152 : r4_win%offset_r = ifromsouth*idimsizz + (r4_win%ncall_r-1)*idimsizz*nbuf
1153 : call Ga_Get4d_r4(comm, q, r4_win, im, jm, km, nq, &
1154 : 1, im, jfirst-ng_s, jlast+ng_n, kfirst, klast, 1, nq, &
1155 : 1, im, jfirst-ng_s, jfirst-1, kfirst, klast, 1, nq, &
1156 : ga_r4_r )
1157 : endif
1158 : ! Recv from north
1159 : if ( jlast < jm ) then
1160 : r4_win%src = gidu+1
1161 : r4_win%offset_r = ifromnorth*idimsizz + (r4_win%ncall_r-1)*idimsizz*nbuf
1162 : call Ga_Get4d_r4(comm, q, r4_win, im, jm, km, nq, &
1163 : 1, im, jfirst-ng_s, jlast+ng_n, kfirst, klast, 1, nq, &
1164 : 1, im, jlast+1, jlast+ng_n, kfirst, klast, 1, nq, &
1165 : ga_r4_r )
1166 : endif
1167 :
1168 : call Win_Finalize(comm, r4_win)
1169 :
1170 : #if defined( MODCM_TIMING )
1171 : call t_stopf('mod_comm communication')
1172 : #endif
1173 :
1174 : !EOC
1175 : end subroutine mp_recv4d_ns_r4
1176 : !------------------------------------------------------------------------------
1177 : !------------------------------------------------------------------------------
1178 : !BOP
1179 : ! !ROUTINE: mp_send2_ns --- Send 2 variables north/south ghost latitudes
1180 : !
1181 : ! !INTERFACE:
1182 129024 : subroutine mp_send2_ns(comm, im, jm, km, jfirst, jlast, kfirst, klast, &
1183 129024 : nd, q1, q2)
1184 : !
1185 : ! !INPUT PARAMETERS:
1186 : integer, intent(in):: comm ! communicator
1187 : integer, intent(in):: im, jm, km
1188 : integer, intent(in):: jfirst, jlast
1189 : integer, intent(in):: kfirst, klast !careful: klast might be klast+1
1190 : integer, intent(in):: nd
1191 : real(r8), intent(in):: q1(im,jfirst-nd:jlast+nd,kfirst:klast)
1192 : real(r8), intent(in):: q2(im,jfirst-nd:jlast+nd,kfirst:klast)
1193 : !
1194 : ! !DESCRIPTION:
1195 : !
1196 : ! Send 2 variables north/south ghost latitudes
1197 : !
1198 : ! !REVISION HISTORY:
1199 : ! 2001.09.01 Lin
1200 : ! 2002.02.15 Putman Modified for Global Arrays code
1201 : ! 2002.04.09 Putman Added ProTeX documentation
1202 : !
1203 : !EOP
1204 : !------------------------------------------------------------------------------
1205 : !BOC
1206 : !
1207 : ! !LOCAL VARIABLES:
1208 : integer :: gidu
1209 :
1210 : #if defined( MODCM_TIMING )
1211 : call t_startf('mod_comm communication')
1212 : #endif
1213 :
1214 129024 : call MPI_COMM_RANK (comm, gidu, ierror)
1215 :
1216 129024 : call Win_Open(comm, t1_win)
1217 :
1218 : ! Send to south
1219 129024 : if ( jfirst > 1 ) then
1220 127008 : t1_win%src = gidu - 1
1221 127008 : t1_win%size_r = im*(klast-kfirst+1)
1222 127008 : t1_win%offset_r = ifromsouth*idimsizz + (t1_win%ncall_s-1)*idimsizz*nbuf
1223 127008 : call Ga_RecvInit_r8(comm, t1_win, ga_t1_r)
1224 127008 : t1_win%offset_r = t1_win%offset_r + im*(klast-kfirst+1)
1225 127008 : call Ga_RecvInit_r8(comm, t1_win, ga_t1_r)
1226 127008 : t1_win%dest = gidu - 1
1227 127008 : t1_win%offset_s = igosouth*idimsizz + (t1_win%ncall_s-1)*idimsizz*nbuf
1228 : call Ga_Put4d_r8( comm, q1, t1_win, im, jm, km, 2, &
1229 : 1, im, jfirst-nd, jlast+nd, kfirst, klast, 1, 1, &
1230 : 1, im, jfirst, jfirst, kfirst, klast, 1, 1, &
1231 127008 : ga_t1_s, ga_t1_r )
1232 127008 : t1_win%offset_s = t1_win%offset_s + im*(klast-kfirst+1)
1233 : call Ga_Put4d_r8( comm, q2, t1_win, im, jm, km, 2, &
1234 : 1, im, jfirst-nd, jlast+nd, kfirst, klast, 2, 2, &
1235 : 1, im, jfirst, jfirst, kfirst, klast, 2, 2, &
1236 127008 : ga_t1_s, ga_t1_r )
1237 : endif
1238 : ! Send to north
1239 129024 : if ( jlast < jm ) then
1240 127008 : t1_win%src = gidu + 1
1241 127008 : t1_win%size_r = im*(klast-kfirst+1)
1242 127008 : t1_win%offset_r = ifromnorth*idimsizz + (t1_win%ncall_s-1)*idimsizz*nbuf
1243 127008 : call Ga_RecvInit_r8(comm, t1_win, ga_t1_r)
1244 127008 : t1_win%offset_r = t1_win%offset_r + im*(klast-kfirst+1)
1245 127008 : call Ga_RecvInit_r8(comm, t1_win, ga_t1_r)
1246 127008 : t1_win%dest = gidu + 1
1247 127008 : t1_win%offset_s = igonorth*idimsizz + (t1_win%ncall_s-1)*idimsizz*nbuf
1248 : call Ga_Put4d_r8( comm, q1, t1_win, im, jm, km, 2, &
1249 : 1, im, jfirst-nd, jlast+nd, kfirst, klast, 1, 1, &
1250 : 1, im, jlast, jlast, kfirst, klast, 1, 1, &
1251 127008 : ga_t1_s, ga_t1_r )
1252 127008 : t1_win%offset_s = t1_win%offset_s + im*(klast-kfirst+1)
1253 : call Ga_Put4d_r8( comm, q2, t1_win, im, jm, km, 2, &
1254 : 1, im, jfirst-nd, jlast+nd, kfirst, klast, 2, 2, &
1255 : 1, im, jlast, jlast, kfirst, klast, 2, 2, &
1256 127008 : ga_t1_s, ga_t1_r )
1257 : endif
1258 :
1259 : #if defined( MODCM_TIMING )
1260 : call t_stopf('mod_comm communication')
1261 : #endif
1262 :
1263 : !EOC
1264 129024 : end subroutine mp_send2_ns
1265 : !------------------------------------------------------------------------------
1266 : !------------------------------------------------------------------------------
1267 : !BOP
1268 : ! !ROUTINE: mp_recv2_ns --- Receive 2 variables north/south ghost latitudes
1269 : !
1270 : ! !INTERFACE:
1271 129024 : subroutine mp_recv2_ns(comm, im, jm, km, jfirst, jlast, kfirst, klast, &
1272 129024 : nd, q1, q2)
1273 : !
1274 : ! !INPUT PARAMETERS:
1275 : integer, intent(in):: comm ! communicator
1276 : integer, intent(in):: im, jm, km
1277 : integer, intent(in):: jfirst, jlast
1278 : integer, intent(in):: kfirst, klast !careful: klast might be klast+1
1279 : integer, intent(in):: nd
1280 : ! !OUTPUT PARAMETERS:
1281 : real(r8), intent(inout):: q1(im,jfirst-nd:jlast+nd,kfirst:klast)
1282 : real(r8), intent(inout):: q2(im,jfirst-nd:jlast+nd,kfirst:klast)
1283 : !
1284 : ! !DESCRIPTION:
1285 : !
1286 : ! Receive 2 variables north/south ghost latitudes
1287 : !
1288 : ! !REVISION HISTORY:
1289 : ! 2001.09.01 Lin
1290 : ! 2002.02.15 Putman Modified for Global Arrays code
1291 : ! 2002.04.09 Putman Added ProTeX documentation
1292 : !
1293 : !EOP
1294 : !------------------------------------------------------------------------------
1295 : !BOC
1296 : ! !LOCAL VARIABLES:
1297 : integer j
1298 : integer :: gidu
1299 :
1300 : #if defined( MODCM_TIMING )
1301 : call t_startf('mod_comm communication')
1302 : #endif
1303 :
1304 129024 : call MPI_COMM_RANK (comm, gidu, ierror)
1305 :
1306 129024 : call Win_Close(comm, t1_win)
1307 :
1308 : ! Recv from south
1309 129024 : if ( jfirst > 1 ) then
1310 127008 : j = jfirst - 1
1311 127008 : t1_win%src = gidu - 1
1312 127008 : t1_win%offset_r = ifromsouth*idimsizz + (t1_win%ncall_r-1)*idimsizz*nbuf
1313 : call Ga_Get4d_r8( comm, q1, t1_win, im, jm, km, 2, &
1314 : 1, im, jfirst-nd, jlast+nd, kfirst, klast, 1, 1, &
1315 : 1, im, j, j, kfirst, klast, 1, 1, &
1316 127008 : ga_t1_r )
1317 127008 : t1_win%offset_r = t1_win%offset_r + im*(klast-kfirst+1)
1318 : call Ga_Get4d_r8( comm, q2, t1_win, im, jm, km, 2, &
1319 : 1, im, jfirst-nd, jlast+nd, kfirst, klast, 2, 2, &
1320 : 1, im, j, j, kfirst, klast, 2, 2, &
1321 127008 : ga_t1_r )
1322 : endif
1323 : ! Recv from north
1324 129024 : if ( jlast < jm ) then
1325 127008 : j = jlast + 1
1326 127008 : t1_win%src = gidu + 1
1327 127008 : t1_win%offset_r = ifromnorth*idimsizz + (t1_win%ncall_r-1)*idimsizz*nbuf
1328 : call Ga_Get4d_r8( comm, q1, t1_win, im, jm, km, 2, &
1329 : 1, im, jfirst-nd, jlast+nd, kfirst, klast, 1, 1, &
1330 : 1, im, j, j, kfirst, klast, 1, 1, &
1331 127008 : ga_t1_r )
1332 127008 : t1_win%offset_r = t1_win%offset_r + im*(klast-kfirst+1)
1333 : call Ga_Get4d_r8( comm, q2, t1_win, im, jm, km, 2, &
1334 : 1, im, jfirst-nd, jlast+nd, kfirst, klast, 2, 2, &
1335 : 1, im, j, j, kfirst, klast, 2, 2, &
1336 127008 : ga_t1_r )
1337 : endif
1338 :
1339 129024 : call Win_Finalize(comm, t1_win)
1340 :
1341 : #if defined( MODCM_TIMING )
1342 : call t_stopf('mod_comm communication')
1343 : #endif
1344 :
1345 : !EOC
1346 129024 : end subroutine mp_recv2_ns
1347 : !------------------------------------------------------------------------------
1348 : !------------------------------------------------------------------------------
1349 : !BOP
1350 : ! !ROUTINE: mp_send3d --- Send ghost region
1351 : !
1352 : ! !INTERFACE:
1353 738816 : subroutine mp_send3d(comm, dest, src, im, jm, km, if, il, jf, jl, kf, kl, &
1354 738816 : i1, i2, j1, j2, k1, k2, q)
1355 : !
1356 : ! !INPUT PARAMETERS:
1357 : integer, intent(in):: comm ! communicator
1358 : integer, intent(in):: dest, src
1359 : integer, intent(in):: im, jm, km
1360 : integer, intent(in):: if, il, jf, jl, kf, kl
1361 : integer, intent(in):: i1, i2, j1, j2, k1, k2
1362 : real(r8), intent(in):: q(if:il, jf:jl, kf:kl)
1363 : !
1364 : ! !DESCRIPTION:
1365 : !
1366 : ! Send a general 3d real*8 ghost region
1367 : !
1368 : ! !REVISION HISTORY:
1369 : ! 02.04.15 Putman
1370 : !
1371 : !EOP
1372 : !------------------------------------------------------------------------------
1373 : !BOC
1374 :
1375 : #if defined( MODCM_TIMING )
1376 : call t_startf('mod_comm communication')
1377 : #endif
1378 :
1379 738816 : call MPI_COMM_SIZE (comm, numcomm, ierror)
1380 :
1381 738816 : call Win_Open(comm, t1_win)
1382 :
1383 : ! Init Recv src
1384 738816 : if ( src >= 0 .and. src < numcomm ) then ! is PE in valid range?
1385 730776 : t1_win%src = src
1386 730776 : t1_win%size_r = (i2-i1+1)*(j2-j1+1)*(k2-k1+1) ! chunk size
1387 730776 : t1_win%offset_r = (t1_win%ncall_s-1)*idimsizz*nbuf
1388 730776 : call Ga_RecvInit_r8(comm, t1_win, ga_t1_r)
1389 : endif
1390 : ! Send ghost region
1391 738816 : if ( dest >= 0 .and. dest < numcomm ) then
1392 730776 : t1_win%dest = dest
1393 730776 : t1_win%offset_s = (t1_win%ncall_s-1)*idimsizz*nbuf
1394 : call Ga_Put4d_r8( comm, q, t1_win, im, jm, km, 1, &
1395 : if, il, jf, jl, kf, kl, 1, 1, &
1396 730776 : i1, i2, j1, j2, k1, k2, 1, 1, ga_t1_s, ga_t1_r )
1397 : endif
1398 :
1399 : #if defined( MODCM_TIMING )
1400 : call t_stopf('mod_comm communication')
1401 : #endif
1402 :
1403 : !EOC
1404 738816 : end subroutine mp_send3d
1405 : !------------------------------------------------------------------------------
1406 : !------------------------------------------------------------------------------
1407 : !BOP
1408 : ! !ROUTINE: mp_recv3d --- Recv ghost region
1409 : !
1410 : ! !INTERFACE:
1411 738816 : subroutine mp_recv3d(comm, src, im, jm, km, if, il, jf, jl, kf, kl, &
1412 738816 : i1, i2, j1, j2, k1, k2, qout)
1413 : !
1414 : ! !INPUT PARAMETERS:
1415 : integer, intent(in):: comm ! communicator
1416 : integer, intent(in):: src
1417 : integer, intent(in):: im, jm, km
1418 : integer, intent(in):: if, il, jf, jl, kf, kl
1419 : integer, intent(in):: i1, i2, j1, j2, k1, k2
1420 : ! !OUTPUT PARAMETERS:
1421 : real(r8), intent(inout):: qout(if:il, jf:jl, kf:kl)
1422 : !
1423 : ! !DESCRIPTION:
1424 : !
1425 : ! Recv a general 3d real*8 ghost region
1426 : !
1427 : ! !REVISION HISTORY:
1428 : ! 02.04.15 Putman
1429 : !
1430 : !EOP
1431 : !------------------------------------------------------------------------------
1432 : !BOC
1433 :
1434 : #if defined( MODCM_TIMING )
1435 : call t_startf('mod_comm communication')
1436 : #endif
1437 :
1438 738816 : call MPI_COMM_SIZE (comm, numcomm, ierror)
1439 :
1440 738816 : call Win_Close(comm, t1_win)
1441 :
1442 : ! Recv from src
1443 738816 : if ( src >= 0 .and. src < numcomm ) then ! is PE in valid range?
1444 730776 : t1_win%src = src
1445 730776 : t1_win%offset_r = (t1_win%ncall_r-1)*idimsizz*nbuf
1446 : call Ga_Get4d_r8( comm, qout, t1_win, im, jm, km, 1, &
1447 : if, il, jf, jl, kf, kl, 1, 1, &
1448 730776 : i1, i2, j1, j2, k1, k2, 1, 1, ga_t1_r )
1449 : endif
1450 :
1451 738816 : call Win_Finalize(comm, t1_win)
1452 :
1453 : #if defined( MODCM_TIMING )
1454 : call t_stopf('mod_comm communication')
1455 : #endif
1456 :
1457 : !EOC
1458 738816 : end subroutine mp_recv3d
1459 : !------------------------------------------------------------------------------
1460 : !------------------------------------------------------------------------------
1461 : !BOP
1462 : ! !ROUTINE: mp_send3d_2 --- Send 2 ghost regions
1463 : !
1464 : ! !INTERFACE:
1465 161280 : subroutine mp_send3d_2(comm, dest, src, im, jm, km, if, il, jf, jl, kf, kl, &
1466 161280 : i1, i2, j1, j2, k1, k2, q1, q2)
1467 : !
1468 : ! !INPUT PARAMETERS:
1469 : integer, intent(in):: comm ! communicator
1470 : integer, intent(in):: dest, src
1471 : integer, intent(in):: im, jm, km
1472 : integer, intent(in):: if, il, jf, jl, kf, kl
1473 : integer, intent(in):: i1, i2, j1, j2, k1, k2
1474 : real(r8), intent(in):: q1(if:il, jf:jl, kf:kl)
1475 : real(r8), intent(in):: q2(if:il, jf:jl, kf:kl)
1476 : !
1477 : ! !DESCRIPTION:
1478 : !
1479 : ! Send two general 3d real*8 ghost region
1480 : !
1481 : ! !REVISION HISTORY:
1482 : ! 02.04.15 Putman
1483 : !
1484 : !EOP
1485 : !------------------------------------------------------------------------------
1486 : !BOC
1487 :
1488 : #if defined( MODCM_TIMING )
1489 : call t_startf('mod_comm communication')
1490 : #endif
1491 :
1492 161280 : call MPI_COMM_SIZE (comm, numcomm, ierror)
1493 :
1494 161280 : call Win_Open(comm, t1_win)
1495 :
1496 : ! Init Recv src
1497 161280 : if ( src >= 0 .and. src < numcomm ) then ! is PE in valid range?
1498 158760 : t1_win%src = src
1499 158760 : t1_win%size_r = (i2-i1+1)*(j2-j1+1)*(k2-k1+1) ! chunk size
1500 158760 : t1_win%offset_r = (t1_win%ncall_s-1)*idimsizz*nbuf
1501 158760 : call Ga_RecvInit_r8(comm, t1_win, ga_t1_r)
1502 158760 : t1_win%offset_r = t1_win%offset_r + t1_win%size_r
1503 158760 : call Ga_RecvInit_r8(comm, t1_win, ga_t1_r)
1504 : endif
1505 : ! Send ghost region
1506 161280 : if ( dest >= 0 .and. dest < numcomm ) then
1507 158760 : t1_win%dest = dest
1508 158760 : t1_win%offset_s = (t1_win%ncall_s-1)*idimsizz*nbuf
1509 : call Ga_Put4d_r8( comm, q1, t1_win, im, jm, km, 2, &
1510 : if, il, jf, jl, kf, kl, 1, 1, &
1511 158760 : i1, i2, j1, j2, k1, k2, 1, 1, ga_t1_s, ga_t1_r )
1512 158760 : t1_win%offset_s = t1_win%offset_s + (i2-i1+1)*(j2-j1+1)*(k2-k1+1)
1513 : call Ga_Put4d_r8( comm, q2, t1_win, im, jm, km, 2, &
1514 : if, il, jf, jl, kf, kl, 2, 2, &
1515 158760 : i1, i2, j1, j2, k1, k2, 2, 2, ga_t1_s, ga_t1_r )
1516 : endif
1517 :
1518 : #if defined( MODCM_TIMING )
1519 : call t_stopf('mod_comm communication')
1520 : #endif
1521 :
1522 : !EOC
1523 161280 : end subroutine mp_send3d_2
1524 : !------------------------------------------------------------------------------
1525 : !------------------------------------------------------------------------------
1526 : !BOP
1527 : ! !ROUTINE: mp_recv3d_2 --- Recv 2 ghost regions
1528 : !
1529 : ! !INTERFACE:
1530 161280 : subroutine mp_recv3d_2(comm, src, im, jm, km, if, il, jf, jl, kf, kl, &
1531 161280 : i1, i2, j1, j2, k1, k2, qout1, qout2)
1532 : !
1533 : ! !INPUT PARAMETERS:
1534 : integer, intent(in):: comm ! communicator
1535 : integer, intent(in):: src
1536 : integer, intent(in):: im, jm, km
1537 : integer, intent(in):: if, il, jf, jl, kf, kl
1538 : integer, intent(in):: i1, i2, j1, j2, k1, k2
1539 : ! !OUTPUT PARAMETERS:
1540 : real(r8), intent(inout):: qout1(if:il, jf:jl, kf:kl)
1541 : real(r8), intent(inout):: qout2(if:il, jf:jl, kf:kl)
1542 : !
1543 : ! !DESCRIPTION:
1544 : !
1545 : ! Recv two general 3d real*8 ghost regions
1546 : !
1547 : ! !REVISION HISTORY:
1548 : ! 02.04.15 Putman
1549 : !
1550 : !EOP
1551 : !------------------------------------------------------------------------------
1552 : !BOC
1553 :
1554 : #if defined( MODCM_TIMING )
1555 : call t_startf('mod_comm communication')
1556 : #endif
1557 :
1558 161280 : call MPI_COMM_SIZE (comm, numcomm, ierror)
1559 :
1560 161280 : call Win_Close(comm, t1_win)
1561 :
1562 : ! Recv from src
1563 161280 : if ( src >= 0 .and. src < numcomm ) then ! is PE in valid range?
1564 158760 : t1_win%src = src
1565 158760 : t1_win%offset_r = (t1_win%ncall_r-1)*idimsizz*nbuf
1566 : call Ga_Get4d_r8( comm, qout1, t1_win, im, jm, km, 2, &
1567 : if, il, jf, jl, kf, kl, 1, 1, &
1568 158760 : i1, i2, j1, j2, k1, k2, 1, 1, ga_t1_r )
1569 158760 : t1_win%offset_r = t1_win%offset_r + (i2-i1+1)*(j2-j1+1)*(k2-k1+1)
1570 : call Ga_Get4d_r8( comm, qout2, t1_win, im, jm, km, 2, &
1571 : if, il, jf, jl, kf, kl, 2, 2, &
1572 158760 : i1, i2, j1, j2, k1, k2, 2, 2, ga_t1_r )
1573 : endif
1574 :
1575 161280 : call Win_Finalize(comm, t1_win)
1576 :
1577 : #if defined( MODCM_TIMING )
1578 : call t_stopf('mod_comm communication')
1579 : #endif
1580 :
1581 : !EOC
1582 161280 : end subroutine mp_recv3d_2
1583 : !------------------------------------------------------------------------------
1584 : !------------------------------------------------------------------------------
1585 : !BOP
1586 : ! !ROUTINE: mp_barrier --- Synchronize all SPMD processes
1587 : !
1588 : ! !INTERFACE:
1589 0 : subroutine mp_barrier (comm)
1590 : !
1591 : ! !INPUT PARAMETERS:
1592 : integer, intent(in) :: comm ! communicator
1593 : ! !DESCRIPTION:
1594 : !
1595 : ! Synchronize all SPMD processes
1596 : !
1597 : ! !REVISION HISTORY:
1598 : ! 2001.09.01 Lin
1599 : ! 2002.04.09 Putman Added ProTeX documentation
1600 : !
1601 : !EOP
1602 : !------------------------------------------------------------------------------
1603 : !BOC
1604 :
1605 0 : call MPI_BARRIER(comm, ierror)
1606 :
1607 : !EOC
1608 0 : end subroutine mp_barrier
1609 : !------------------------------------------------------------------------------
1610 : !------------------------------------------------------------------------------
1611 : !BOP
1612 : ! !ROUTINE: Win_Open --- Open a communication window
1613 : !
1614 : ! !INTERFACE:
1615 17479680 : subroutine Win_Open(comm, win)
1616 : ! !INPUT PARAMETERS:
1617 : integer, intent(in) :: comm ! communicator
1618 : ! !OUTPUT PARAMETERS:
1619 : type(window), intent(inout):: win
1620 : !
1621 : ! !DESCRIPTION:
1622 : !
1623 : ! Begin a communication epoch, by opening a comm window.
1624 : ! Update number of send calls on the window (win%ncall_s).
1625 : ! Barrier synchronzize if necessary.
1626 : !
1627 : ! !REVISION HISTORY:
1628 : ! 02.02.15 Putman
1629 : ! 02.04.09 Putman Added ProTeX documentation
1630 : !
1631 : !EOP
1632 : !------------------------------------------------------------------------------
1633 : !BOC
1634 :
1635 17479680 : win%ncall_s = win%ncall_s + 1
1636 :
1637 : !EOC
1638 17479680 : end subroutine Win_Open
1639 : !------------------------------------------------------------------------------
1640 : !------------------------------------------------------------------------------
1641 : !BOP
1642 : ! !ROUTINE: Win_Close --- Close a communication window
1643 : !
1644 : ! !INTERFACE:
1645 17479680 : subroutine Win_Close(comm, win)
1646 : ! !INPUT PARAMETERS:
1647 : integer, intent(in) :: comm ! communicator
1648 : ! !OUTPUT PARAMETERS:
1649 : type(window), intent(inout):: win
1650 : !
1651 : ! !DESCRIPTION:
1652 : !
1653 : ! End a communication epoch, by closing a comm window.
1654 : ! Update number of receive calls on the window (win%ncall_r).
1655 : ! Barrier synchronzize if necessary.
1656 : !
1657 : ! !REVISION HISTORY:
1658 : ! 02.02.15 Putman
1659 : ! 02.04.09 Putman Added ProTeX documentation
1660 : !
1661 : !EOP
1662 : !------------------------------------------------------------------------------
1663 : !BOC
1664 :
1665 17479680 : win%ncall_r = win%ncall_r + 1
1666 :
1667 : !EOC
1668 17479680 : end subroutine Win_Close
1669 : !------------------------------------------------------------------------------
1670 : !------------------------------------------------------------------------------
1671 : !BOP
1672 : ! !ROUTINE: Win_Finalize --- Reset a communication window after a comm epoch.
1673 : !
1674 : ! !INTERFACE:
1675 17479680 : subroutine Win_Finalize(comm, win)
1676 : ! !INPUT PARAMETERS:
1677 : integer, intent(in) :: comm ! communicator
1678 : ! !OUTPUT PARAMETERS:
1679 : type(window), intent(inout):: win
1680 : !
1681 : ! !DESCRIPTION:
1682 : !
1683 : ! Complete a communication epoch and reset a comm window.
1684 : ! Barrier synchronzize if necessary.
1685 : !
1686 : ! !REVISION HISTORY:
1687 : ! 02.02.15 Putman
1688 : ! 02.04.09 Putman Added ProTeX documentation
1689 : !
1690 : !EOP
1691 : !------------------------------------------------------------------------------
1692 : !BOC
1693 :
1694 17479680 : if (win%ncall_s == win%ncall_r) then
1695 17060352 : call MPI_WAITALL(win%nsend, win%sqest, Stats, ierror)
1696 17060352 : win%nsend = 0
1697 17060352 : win%nrecv = 0
1698 17060352 : win%nread = 0
1699 17060352 : win%ncall_s = 0
1700 17060352 : win%ncall_r = 0
1701 : endif
1702 :
1703 : !EOC
1704 17479680 : end subroutine Win_Finalize
1705 : !------------------------------------------------------------------------------
1706 : !------------------------------------------------------------------------------
1707 : !BOP
1708 : ! !ROUTINE: Ga_Put4d_r8 --- Write to real*8 4d global array
1709 : !
1710 : ! !INTERFACE:
1711 33943368 : subroutine Ga_Put4d_r8 ( comm, q, win, im, jm, km, nq, &
1712 : ifrom, ito, jfrom, jto, kfrom, kto, &
1713 : nqfrom, nqto, i1, i2, j1, j2, k1, k2, &
1714 33943368 : nq1, nq2, ga_s, ga_r )
1715 : ! !INPUT PARAMETERS:
1716 : integer, intent(in) :: comm ! communicator
1717 : type(window), intent(inout) :: win ! Global Array Window
1718 : integer, intent(in) :: im, jm, km, nq
1719 : integer, intent(in) :: ifrom, ito, jfrom, jto, kfrom, kto, nqfrom, nqto
1720 : real(r8), intent(in) :: q(ifrom:ito,jfrom:jto,kfrom:kto,nqfrom:nqto)
1721 : integer, intent(in) :: i1, i2, j1, j2, k1, k2, nq1, nq2
1722 : ! !OUTPUT PARAMETERS:
1723 : real(r8), intent(inout):: ga_s(win%size)
1724 : real(r8), intent(inout):: ga_r(win%size)
1725 : !
1726 : ! !DESCRIPTION:
1727 : !
1728 : ! Write to real*8 4d global array.
1729 : !
1730 : ! !REVISION HISTORY:
1731 : ! 02.02.15 Putman
1732 : ! 02.04.09 Putman Added ProTeX documentation
1733 : !
1734 : !EOP
1735 : !------------------------------------------------------------------------------
1736 : !BOC
1737 : !
1738 : ! !LOCAL VARIABLES:
1739 : integer i, j, k, iq, inc, inc1
1740 : integer i_length, j_length, k_length, ij_length, ijk_length
1741 :
1742 : integer send_tag, qsize
1743 : integer :: gidu
1744 :
1745 33943368 : call MPI_COMM_RANK (comm, gidu, ierror)
1746 :
1747 33943368 : i_length = i2-i1+1
1748 33943368 : j_length = j2-j1+1
1749 33943368 : k_length = k2-k1+1
1750 :
1751 33943368 : ij_length = i_length*j_length
1752 33943368 : ijk_length = i_length*j_length*k_length
1753 :
1754 : ! Begin Non-Blocking Sends
1755 67886736 : do iq = nq1, nq2
1756 : !$omp parallel do private(i,j,k,inc,inc1)
1757 163617684 : do k = k1, k2
1758 : inc1 = (win%offset_s) + ((iq-nq1)*ijk_length) &
1759 95730948 : + ((k-k1)*ij_length) -i1+1
1760 420148380 : do j = j1, j2
1761 290474064 : inc = inc1 + (j-j1)*i_length
1762 75962966292 : do i = i1, i2
1763 75867235344 : ga_s(inc+i) = q(i,j,k,iq)
1764 : enddo
1765 : enddo
1766 : enddo
1767 : enddo
1768 :
1769 33943368 : qsize = (i2-i1+1)*(j2-j1+1)*(k2-k1+1)*(nq2-nq1+1)
1770 33943368 : send_tag = gidu
1771 33943368 : win%nsend = win%nsend + 1
1772 33943368 : call MPI_ISEND(ga_s(win%offset_s+1), qsize, mp_r8, win%dest, &
1773 67886736 : send_tag, comm, win%sqest(win%nsend), ierror)
1774 :
1775 : !EOC
1776 33943368 : end subroutine Ga_Put4d_r8
1777 : !------------------------------------------------------------------------------
1778 : !------------------------------------------------------------------------------
1779 : !BOP
1780 : ! !ROUTINE: Ga_RecvInit_r8 --- Initiate real*8 Non-Blocking receive
1781 : !
1782 : ! !INTERFACE:
1783 33943368 : subroutine Ga_RecvInit_r8( comm, win, ga )
1784 : ! !INPUT PARAMETERS:
1785 : integer, intent(in) :: comm ! communicator
1786 : type(window), intent(inout) :: win ! Global Array Window
1787 : ! !OUTPUT PARAMETERS:
1788 : real(r8), intent(inout):: ga(win%size)
1789 : !
1790 : ! !DESCRIPTION:
1791 : !
1792 : ! Initiate real*8 Non-Blocking receive
1793 : !
1794 : ! !REVISION HISTORY:
1795 : ! 02.02.15 Putman
1796 : ! 02.04.09 Putman Added ProTeX documentation
1797 : ! 03.06.06 Sawyer Added else clause
1798 : !
1799 : !EOP
1800 : !------------------------------------------------------------------------------
1801 : !BOC
1802 : !
1803 : ! !LOCAL VARIABLES:
1804 : integer qsize, recv_tag
1805 :
1806 33943368 : if (win%size >= win%offset_r + win%size_r) then
1807 33943368 : recv_tag = win%src
1808 33943368 : qsize = win%size_r
1809 33943368 : win%nrecv = win%nrecv + 1
1810 33943368 : call MPI_IRECV(ga(win%offset_r+1), qsize, mp_r8, win%src, &
1811 67886736 : recv_tag, comm, win%rqest(win%nrecv), ierror)
1812 : else
1813 0 : write(iulog,*) "Fatal ga_recvinit_r8: receive window out of space - exiting"
1814 0 : write(iulog,*) 'gid win%size win%offset_r win%size_r = ', gid, &
1815 0 : win%size, win%offset_r, win%size_r
1816 0 : stop
1817 : endif
1818 :
1819 : !EOC
1820 33943368 : end subroutine Ga_RecvInit_r8
1821 : !------------------------------------------------------------------------------
1822 : !------------------------------------------------------------------------------
1823 : !BOP
1824 : ! !ROUTINE: Ga_Get4d_r8 --- Read from real*8 4d global array
1825 : !
1826 : ! !INTERFACE:
1827 67886736 : subroutine Ga_Get4d_r8 ( comm, q, win, im, jm, km, nq, &
1828 : ifrom, ito, jfrom, jto, kfrom, kto, &
1829 : nqfrom, nqto, i1, i2, j1, j2, k1, k2, &
1830 33943368 : nq1, nq2, ga )
1831 : ! !INPUT PARAMETERS:
1832 : integer, intent(in) :: comm ! communicator
1833 : type(window), intent(inout) :: win ! Global Array Window
1834 : integer, intent(in) :: im, jm, km, nq
1835 : integer, intent(in) :: i1, i2, j1, j2, k1, k2, nq1, nq2
1836 : integer, intent(in) :: ifrom, ito, jfrom, jto, kfrom, kto, nqfrom, nqto
1837 : real(r8), intent(in) :: ga(win%size)
1838 : ! !OUTPUT PARAMETERS:
1839 : real(r8), intent(inout) :: q(ifrom:ito, jfrom:jto, kfrom:kto, nqfrom:nqto)
1840 : !
1841 : ! !DESCRIPTION:
1842 : !
1843 : ! Read from real*8 4d global array.
1844 : !
1845 : ! !REVISION HISTORY:
1846 : ! 02.02.15 Putman
1847 : ! 02.04.09 Putman Added ProTeX documentation
1848 : !
1849 : !EOP
1850 : !------------------------------------------------------------------------------
1851 : !BOC
1852 : !
1853 : ! !LOCAL VARIABLES:
1854 : integer i, j, k, iq, inc, inc1
1855 : integer i_length, j_length, k_length, ij_length, ijk_length
1856 33943368 : win%nread = win%nread + 1
1857 33943368 : call MPI_WAIT(win%rqest(win%nread), Status, ierror)
1858 :
1859 33943368 : i_length = i2-i1+1
1860 33943368 : j_length = j2-j1+1
1861 33943368 : k_length = k2-k1+1
1862 :
1863 33943368 : ij_length = i_length*j_length
1864 33943368 : ijk_length = i_length*j_length*k_length
1865 :
1866 67886736 : do iq = nq1, nq2
1867 : !$omp parallel do private(i,j,k,inc,inc1)
1868 163617684 : do k = k1, k2
1869 : inc1 = (win%offset_r) + ((iq-nq1)*ijk_length) &
1870 95730948 : + ((k-k1)*ij_length) -i1+1
1871 420148380 : do j = j1, j2
1872 290474064 : inc = inc1 + (j-j1)*i_length
1873 75962966292 : do i = i1, i2
1874 75867235344 : q(i,j,k,iq) = ga(inc+i)
1875 : enddo
1876 : enddo
1877 : enddo
1878 : enddo
1879 :
1880 : !EOC
1881 33943368 : end subroutine Ga_Get4d_r8
1882 : !------------------------------------------------------------------------------
1883 : !------------------------------------------------------------------------------
1884 : !BOP
1885 : ! !ROUTINE: Ga_Put4d_r4 --- Write to real*4 4d global array
1886 : !
1887 : ! !INTERFACE:
1888 0 : subroutine Ga_Put4d_r4 ( comm, q, win, im, jm, km, nq, &
1889 : ifrom, ito, jfrom, jto, kfrom, kto, &
1890 : nqfrom, nqto, i1, i2, j1, j2, k1, k2, &
1891 0 : nq1, nq2, ga_s, ga_r )
1892 : ! !INPUT PARAMETERS:
1893 : integer, intent(in) :: comm ! communicator
1894 : type(window), intent(inout) :: win ! Global Array Window
1895 : integer, intent(in) :: im, jm, km, nq
1896 : integer, intent(in) :: ifrom, ito, jfrom, jto, kfrom, kto, nqfrom, nqto
1897 : real(r4), intent(in) :: q(ifrom:ito,jfrom:jto,kfrom:kto,nqfrom:nqto)
1898 : integer, intent(in) :: i1, i2, j1, j2, k1, k2, nq1, nq2
1899 : ! !OUTPUT PARAMETERS:
1900 : real(r4), intent(inout):: ga_s(win%size)
1901 : real(r4), intent(inout):: ga_r(win%size)
1902 : !
1903 : ! !DESCRIPTION:
1904 : !
1905 : ! Write to real*4 4d global array.
1906 : !
1907 : ! !REVISION HISTORY:
1908 : ! 02.02.15 Putman
1909 : ! 02.04.09 Putman Added ProTeX documentation
1910 : !
1911 : !EOP
1912 : !------------------------------------------------------------------------------
1913 : !BOC
1914 : !
1915 : ! !LOCAL VARIABLES:
1916 : integer i, j, k, iq, inc, inc1
1917 : integer i_length, j_length, k_length, ij_length, ijk_length
1918 :
1919 : integer send_tag, qsize
1920 : integer :: gidu
1921 :
1922 0 : call MPI_COMM_RANK (comm, gidu, ierror)
1923 :
1924 : #if defined ( NOR4 )
1925 0 : write(iulog,*) 'Mod_comm: Ga_Put4d_r4 - r4 windows disabled - exiting'
1926 0 : stop
1927 : #endif
1928 :
1929 : i_length = i2-i1+1
1930 : j_length = j2-j1+1
1931 : k_length = k2-k1+1
1932 :
1933 : ij_length = i_length*j_length
1934 : ijk_length = i_length*j_length*k_length
1935 :
1936 : ! Begin Non-Blocking Sends
1937 : do iq = nq1, nq2
1938 : !$omp parallel do private(i,j,k,inc,inc1)
1939 : do k = k1, k2
1940 : inc1 = (win%offset_s) + ((iq-nq1)*ijk_length) &
1941 : + ((k-k1)*ij_length) -i1+1
1942 : do j = j1, j2
1943 : inc = inc1 + (j-j1)*i_length
1944 : do i = i1, i2
1945 : ga_s(inc+i) = q(i,j,k,iq)
1946 : enddo
1947 : enddo
1948 : enddo
1949 : enddo
1950 :
1951 : qsize = (i2-i1+1)*(j2-j1+1)*(k2-k1+1)*(nq2-nq1+1)
1952 : send_tag = gidu
1953 : win%nsend = win%nsend + 1
1954 : call MPI_ISEND(ga_s(win%offset_s+1), qsize, mp_r4, win%dest, &
1955 : send_tag, comm, win%sqest(win%nsend), ierror)
1956 :
1957 : !EOC
1958 : end subroutine Ga_Put4d_r4
1959 : !------------------------------------------------------------------------------
1960 : !------------------------------------------------------------------------------
1961 : !BOP
1962 : ! !ROUTINE: Ga_RecvInit_r4 --- Initiate real*4 Non-Blocking receive
1963 : !
1964 : ! !INTERFACE:
1965 0 : subroutine Ga_RecvInit_r4( comm, win, ga )
1966 : ! !INPUT PARAMETERS:
1967 : integer, intent(in) :: comm ! communicator
1968 : type(window), intent(inout) :: win ! Global Array Window
1969 : ! !OUTPUT PARAMETERS:
1970 : real(r4), intent(inout):: ga(win%size)
1971 : !
1972 : ! !DESCRIPTION:
1973 : !
1974 : ! Initiate real*8 Non-Blocking receive
1975 : !
1976 : ! !REVISION HISTORY:
1977 : ! 02.02.15 Putman
1978 : ! 02.04.09 Putman Added ProTeX documentation
1979 : ! 03.06.06 Sawyer Added else clause
1980 : !
1981 : !EOP
1982 : !------------------------------------------------------------------------------
1983 : !BOC
1984 : !
1985 : ! !LOCAL VARIABLES:
1986 : integer qsize, recv_tag
1987 :
1988 : #if defined ( NOR4 )
1989 0 : write(iulog,*) 'Mod_comm: Ga_RecvInit_r4 - r4 windows disabled - exiting'
1990 0 : stop
1991 : #endif
1992 :
1993 : if (win%size >= win%offset_r + win%size_r) then
1994 : recv_tag = win%src
1995 : qsize = win%size_r
1996 : win%nrecv = win%nrecv + 1
1997 : call MPI_IRECV(ga(win%offset_r+1), qsize, mp_r4, win%src, &
1998 : recv_tag, comm, win%rqest(win%nrecv), ierror)
1999 : else
2000 : write(iulog,*) "Fatal ga_recvinit_r4: receive window out of space - exiting"
2001 : write(iulog,*) 'gid win%size win%offset_r win%size_r = ', gid, &
2002 : win%size, win%offset_r, win%size_r
2003 : stop
2004 : endif
2005 :
2006 : !EOC
2007 : end subroutine Ga_RecvInit_r4
2008 : !------------------------------------------------------------------------------
2009 : !------------------------------------------------------------------------------
2010 : !BOP
2011 : ! !ROUTINE: Ga_Get4d_r4 --- Read from real*4 4d global array
2012 : !
2013 : ! !INTERFACE:
2014 0 : subroutine Ga_Get4d_r4 ( comm, q, win, im, jm, km, nq, &
2015 : ifrom, ito, jfrom, jto, kfrom, kto, &
2016 : nqfrom, nqto, i1, i2, j1, j2, k1, k2, &
2017 0 : nq1, nq2, ga )
2018 : ! !INPUT PARAMETERS:
2019 : integer, intent(in) :: comm ! communicator
2020 : type(window), intent(inout) :: win ! Global Array Window
2021 : integer, intent(in) :: im, jm, km, nq
2022 : integer, intent(in) :: i1, i2, j1, j2, k1, k2, nq1, nq2
2023 : integer, intent(in) :: ifrom, ito, jfrom, jto, kfrom, kto, nqfrom, nqto
2024 : real(r4), intent(in) :: ga(win%size)
2025 : ! !OUTPUT PARAMETERS:
2026 : real(r4), intent(inout) :: q(ifrom:ito, jfrom:jto, kfrom:kto, nqfrom:nqto)
2027 : !
2028 : ! !DESCRIPTION:
2029 : !
2030 : ! Read from real*8 4d global array.
2031 : !
2032 : ! !REVISION HISTORY:
2033 : ! 02.02.15 Putman
2034 : ! 02.04.09 Putman Added ProTeX documentation
2035 : !
2036 : !EOP
2037 : !------------------------------------------------------------------------------
2038 : !BOC
2039 : !
2040 : ! !LOCAL VARIABLES:
2041 : integer i, j, k, iq, inc, inc1
2042 : integer i_length, j_length, k_length, ij_length, ijk_length
2043 :
2044 : #if defined ( NOR4 )
2045 0 : write(iulog,*) 'Mod_comm: Ga_Get4d_r4 - r4 windows disabled - exiting'
2046 0 : stop
2047 : #endif
2048 :
2049 : win%nread = win%nread + 1
2050 : call MPI_WAIT(win%rqest(win%nread), Status, ierror)
2051 :
2052 : i_length = i2-i1+1
2053 : j_length = j2-j1+1
2054 : k_length = k2-k1+1
2055 :
2056 : ij_length = i_length*j_length
2057 : ijk_length = i_length*j_length*k_length
2058 :
2059 : do iq = nq1, nq2
2060 : !$omp parallel do private(i,j,k,inc,inc1)
2061 : do k = k1, k2
2062 : inc1 = (win%offset_r) + ((iq-nq1)*ijk_length) &
2063 : + ((k-k1)*ij_length) -i1+1
2064 : do j = j1, j2
2065 : inc = inc1 + (j-j1)*i_length
2066 : do i = i1, i2
2067 : q(i,j,k,iq) = ga(inc+i)
2068 : enddo
2069 : enddo
2070 : enddo
2071 : enddo
2072 :
2073 : !EOC
2074 : end subroutine Ga_Get4d_r4
2075 : !------------------------------------------------------------------------------
2076 : !------------------------------------------------------------------------------
2077 : !BOP
2078 : ! !ROUTINE: Ga_Put4d_i4 --- Write to integer*4 4d global array
2079 : !
2080 : ! !INTERFACE:
2081 0 : subroutine Ga_Put4d_i4 ( comm, q, win, im, jm, km, nq, &
2082 : ifrom, ito, jfrom, jto, kfrom, kto, &
2083 : nqfrom, nqto, i1, i2, j1, j2, k1, k2, &
2084 0 : nq1, nq2, ga_s, ga_r )
2085 : ! !INPUT PARAMETERS:
2086 : integer, intent(in) :: comm ! communicator
2087 : type(window), intent(inout) :: win ! Global Array Window
2088 : integer, intent(in) :: im, jm, km, nq
2089 : integer, intent(in) :: ifrom, ito, jfrom, jto, kfrom, kto, nqfrom, nqto
2090 : integer(i4), intent(in) :: q(ifrom:ito,jfrom:jto,kfrom:kto,nqfrom:nqto)
2091 : integer, intent(in) :: i1, i2, j1, j2, k1, k2, nq1, nq2
2092 : ! !OUTPUT PARAMETERS:
2093 : integer(i4), intent(inout):: ga_s(win%size)
2094 : integer(i4), intent(inout):: ga_r(win%size)
2095 : !
2096 : ! !DESCRIPTION:
2097 : !
2098 : ! Write to integer*4 4d global array.
2099 : !
2100 : ! !REVISION HISTORY:
2101 : ! 02.02.15 Putman
2102 : ! 02.04.09 Putman Added ProTeX documentation
2103 : !
2104 : !EOP
2105 : !------------------------------------------------------------------------------
2106 : !BOC
2107 : !
2108 : ! !LOCAL VARIABLES:
2109 : integer i, j, k, iq, inc, inc1
2110 : integer i_length, j_length, k_length, ij_length, ijk_length
2111 :
2112 : integer send_tag, qsize
2113 : integer :: gidu
2114 :
2115 0 : call MPI_COMM_RANK (comm, gidu, ierror)
2116 :
2117 0 : i_length = i2-i1+1
2118 0 : j_length = j2-j1+1
2119 0 : k_length = k2-k1+1
2120 :
2121 0 : ij_length = i_length*j_length
2122 0 : ijk_length = i_length*j_length*k_length
2123 :
2124 : ! Begin Non-Blocking Sends
2125 0 : do iq = nq1, nq2
2126 : !$omp parallel do private(i,j,k,inc,inc1)
2127 0 : do k = k1, k2
2128 : inc1 = (win%offset_s) + ((iq-nq1)*ijk_length) &
2129 0 : + ((k-k1)*ij_length) -i1+1
2130 0 : do j = j1, j2
2131 0 : inc = inc1 + (j-j1)*i_length
2132 0 : do i = i1, i2
2133 0 : ga_s(inc+i) = q(i,j,k,iq)
2134 : enddo
2135 : enddo
2136 : enddo
2137 : enddo
2138 :
2139 0 : qsize = (i2-i1+1)*(j2-j1+1)*(k2-k1+1)*(nq2-nq1+1)
2140 0 : send_tag = gidu
2141 0 : win%nsend = win%nsend + 1
2142 0 : call MPI_ISEND(ga_s(win%offset_s+1), qsize, mp_i4, win%dest, &
2143 0 : send_tag, comm, win%sqest(win%nsend), ierror)
2144 :
2145 : !EOC
2146 0 : end subroutine Ga_Put4d_i4
2147 : !------------------------------------------------------------------------------
2148 : !------------------------------------------------------------------------------
2149 : !BOP
2150 : ! !ROUTINE: Ga_RecvInit_i4 --- Initiate integer*4 Non-Blocking receive
2151 : !
2152 : ! !INTERFACE:
2153 0 : subroutine Ga_RecvInit_i4( comm, win, ga )
2154 : ! !INPUT PARAMETERS:
2155 : integer, intent(in) :: comm ! communicator
2156 : type(window), intent(inout) :: win ! Global Array Window
2157 : ! !OUTPUT PARAMETERS:
2158 : integer(i4), intent(inout):: ga(win%size)
2159 : !
2160 : ! !DESCRIPTION:
2161 : !
2162 : ! Initiate integer*4 Non-Blocking receive
2163 : !
2164 : ! !REVISION HISTORY:
2165 : ! 02.02.15 Putman
2166 : ! 02.04.09 Putman Added ProTeX documentation
2167 : ! 06.05.21 Mirin Added else clause
2168 : !
2169 : !EOP
2170 : !------------------------------------------------------------------------------
2171 : !BOC
2172 : !
2173 : ! !LOCAL VARIABLES:
2174 : integer qsize, recv_tag
2175 :
2176 0 : if (win%size >= win%offset_r + win%size_r) then
2177 0 : recv_tag = win%src
2178 0 : qsize = win%size_r
2179 0 : win%nrecv = win%nrecv + 1
2180 0 : call MPI_IRECV(ga(win%offset_r+1), qsize, mp_i4, win%src, &
2181 0 : recv_tag, comm, win%rqest(win%nrecv), ierror)
2182 : else
2183 0 : write(iulog,*) "Fatal ga_recvinit_i4: receive window out of space - exiting"
2184 0 : write(iulog,*) 'gid win%size win%offset_r win%size_r = ', gid, &
2185 0 : win%size, win%offset_r, win%size_r
2186 0 : stop
2187 : endif
2188 : !EOC
2189 0 : end subroutine Ga_RecvInit_i4
2190 : !------------------------------------------------------------------------------
2191 : !------------------------------------------------------------------------------
2192 : !BOP
2193 : ! !ROUTINE: Ga_Get4d_i4 --- Read from integer*4 4d global array
2194 : !
2195 : ! !INTERFACE:
2196 0 : subroutine Ga_Get4d_i4 ( comm, q, win, im, jm, km, nq, &
2197 : ifrom, ito, jfrom, jto, kfrom, kto, &
2198 : nqfrom, nqto, i1, i2, j1, j2, k1, k2, &
2199 0 : nq1, nq2, ga )
2200 : ! !INPUT PARAMETERS:
2201 : integer, intent(in) :: comm ! communicator
2202 : type(window), intent(inout) :: win ! Global Array Window
2203 : integer, intent(in) :: im, jm, km, nq
2204 : integer, intent(in) :: i1, i2, j1, j2, k1, k2, nq1, nq2
2205 : integer, intent(in) :: ifrom, ito, jfrom, jto, kfrom, kto, nqfrom, nqto
2206 : integer(i4), intent(in) :: ga(win%size)
2207 : ! !OUTPUT PARAMETERS:
2208 : integer(i4), intent(inout) :: q(ifrom:ito, jfrom:jto, kfrom:kto, nqfrom:nqto)
2209 : !
2210 : ! !DESCRIPTION:
2211 : !
2212 : ! Read from integer*4 4d global array.
2213 : !
2214 : ! !REVISION HISTORY:
2215 : ! 02.02.15 Putman
2216 : ! 02.04.09 Putman Added ProTeX documentation
2217 : !
2218 : !EOP
2219 : !------------------------------------------------------------------------------
2220 : !BOC
2221 : !
2222 : ! !LOCAL VARIABLES:
2223 : integer i, j, k, iq, inc, inc1
2224 : integer i_length, j_length, k_length, ij_length, ijk_length
2225 :
2226 0 : win%nread = win%nread + 1
2227 0 : call MPI_WAIT(win%rqest(win%nread), Status, ierror)
2228 :
2229 0 : i_length = i2-i1+1
2230 0 : j_length = j2-j1+1
2231 0 : k_length = k2-k1+1
2232 0 : ij_length = i_length*j_length
2233 0 : ijk_length = i_length*j_length*k_length
2234 :
2235 0 : do iq = nq1, nq2
2236 : !$omp parallel do private(i,j,k,inc,inc1)
2237 0 : do k = k1, k2
2238 : inc1 = (win%offset_r) + ((iq-nq1)*ijk_length) &
2239 0 : + ((k-k1)*ij_length) -i1+1
2240 0 : do j = j1, j2
2241 0 : inc = inc1 + (j-j1)*i_length
2242 0 : do i = i1, i2
2243 0 : q(i,j,k,iq) = ga(inc+i)
2244 : enddo
2245 : enddo
2246 : enddo
2247 : enddo
2248 :
2249 : !EOC
2250 0 : end subroutine Ga_Get4d_i4
2251 : !------------------------------------------------------------------------------
2252 : !------------------------------------------------------------------------------
2253 : !BOP
2254 : ! !ROUTINE: Ga_Broadcast_r8 --- Broadcast an real*8 1d global array
2255 : !
2256 : ! !INTERFACE:
2257 0 : subroutine Ga_Broadcast_r8 ( comm, q, isize )
2258 : ! !INPUT PARAMETERS:
2259 : integer, intent(in) :: comm ! communicator
2260 : integer, intent(in) :: isize
2261 : ! !OUTPUT PARAMETERS:
2262 : real(r8), intent(inout) :: q(isize)
2263 : !
2264 : ! !DESCRIPTION:
2265 : !
2266 : ! Broadcast an real*8 1d global array.
2267 : !
2268 : ! !REVISION HISTORY:
2269 : ! 03.04.02 Putman
2270 : !
2271 : !EOP
2272 : !------------------------------------------------------------------------------
2273 : !BOC
2274 : ! !LOCAL VARIABLES:
2275 :
2276 0 : call MPI_BCAST(q, isize, mp_r8, 0, comm, ierror)
2277 :
2278 : !EOC
2279 0 : end subroutine Ga_Broadcast_r8
2280 : !------------------------------------------------------------------------------
2281 : !------------------------------------------------------------------------------
2282 : !BOP
2283 : ! !ROUTINE: Ga_Broadcast_r4 --- Broadcast an real*4 1d global array
2284 : !
2285 : ! !INTERFACE:
2286 0 : subroutine Ga_Broadcast_r4 ( comm, q, isize )
2287 : ! !INPUT PARAMETERS:
2288 : integer, intent(in) :: comm ! communicator
2289 : integer, intent(in) :: isize
2290 : ! !OUTPUT PARAMETERS:
2291 : real(r4), intent(inout) :: q(isize)
2292 : !
2293 : ! !DESCRIPTION:
2294 : !
2295 : ! Broadcast an real*4 1d global array.
2296 : !
2297 : ! !REVISION HISTORY:
2298 : ! 03.04.02 Putman
2299 : !
2300 : !EOP
2301 : !------------------------------------------------------------------------------
2302 : !BOC
2303 : ! !LOCAL VARIABLES:
2304 :
2305 : #if defined ( NOR4 )
2306 0 : write(iulog,*) 'Mod_comm: Ga_Broadcast_r4 - r4 windows disabled - exiting'
2307 0 : stop
2308 : #endif
2309 :
2310 : call MPI_BCAST(q, isize, mp_r4, 0, comm, ierror)
2311 :
2312 : !EOC
2313 : end subroutine Ga_Broadcast_r4
2314 : !------------------------------------------------------------------------------
2315 : !------------------------------------------------------------------------------
2316 : !BOP
2317 : ! !ROUTINE: Ga_Broadcast_i4 --- Broadcast an integer*4 1d global array
2318 : !
2319 : ! !INTERFACE:
2320 0 : subroutine Ga_Broadcast_i4 ( comm, q, isize )
2321 : ! !INPUT PARAMETERS:
2322 : integer, intent(in) :: comm ! communicator
2323 : integer, intent(in) :: isize
2324 : ! !OUTPUT PARAMETERS:
2325 : integer(i4), intent(inout) :: q(isize)
2326 : !
2327 : ! !DESCRIPTION:
2328 : !
2329 : ! Broadcast an integer*4 1d global array.
2330 : !
2331 : ! !REVISION HISTORY:
2332 : ! 03.04.02 Putman
2333 : !
2334 : !EOP
2335 : !------------------------------------------------------------------------------
2336 : !BOC
2337 : ! !LOCAL VARIABLES:
2338 :
2339 0 : call MPI_BCAST(q, isize, mp_i4, 0, comm, ierror)
2340 :
2341 : !EOC
2342 0 : end subroutine Ga_Broadcast_i4
2343 : !------------------------------------------------------------------------------
2344 : !------------------------------------------------------------------------------
2345 : !BOP
2346 : ! !ROUTINE: Ga_AllToAll_r8 --- All to All of an real*8 1d global array
2347 : !
2348 : ! !INTERFACE:
2349 0 : subroutine Ga_AllToAll_r8 ( comm, q, Gsize, Lsize, istart )
2350 : ! !INPUT PARAMETERS:
2351 : integer, intent(in) :: comm ! communicator
2352 : integer, intent(in) :: Gsize ! Global size of array
2353 : integer, intent(in) :: Lsize ! size of Local portion
2354 : integer, intent(in) :: istart ! starting point
2355 : ! !OUTPUT PARAMETERS:
2356 : real(r8), intent(inout) :: q(Gsize)
2357 : !
2358 : ! !DESCRIPTION:
2359 : !
2360 : ! All to All of a real*8 1d global array.
2361 : !
2362 : ! !REVISION HISTORY:
2363 : ! 03.04.02 Putman
2364 : !
2365 : !EOP
2366 : !------------------------------------------------------------------------------
2367 : !BOC
2368 : ! !LOCAL VARIABLES:
2369 :
2370 0 : call MPI_ALLGATHER(q(istart), Lsize, mp_r8, q, Lsize, mp_r8, comm, ierror)
2371 :
2372 : !EOC
2373 0 : end subroutine Ga_AllToAll_r8
2374 : !------------------------------------------------------------------------------
2375 : !------------------------------------------------------------------------------
2376 : !BOP
2377 : ! !ROUTINE: Ga_AllToAll_r4 --- All to All of an real*4 1d global array
2378 : !
2379 : ! !INTERFACE:
2380 0 : subroutine Ga_AllToAll_r4 ( comm, q, Gsize, Lsize, istart )
2381 : ! !INPUT PARAMETERS:
2382 : integer, intent(in) :: comm ! communicator
2383 : integer, intent(in) :: Gsize ! Global size of array
2384 : integer, intent(in) :: Lsize ! size of Local portion
2385 : integer, intent(in) :: istart ! starting point
2386 : ! !OUTPUT PARAMETERS:
2387 : real(r4), intent(inout) :: q(Gsize)
2388 : !
2389 : ! !DESCRIPTION:
2390 : !
2391 : ! All to All of an real*4 1d global array.
2392 : !
2393 : ! !REVISION HISTORY:
2394 : ! 03.04.02 Putman
2395 : !
2396 : !EOP
2397 : !------------------------------------------------------------------------------
2398 : !BOC
2399 : ! !LOCAL VARIABLES:
2400 :
2401 : #if defined ( NOR4 )
2402 0 : write(iulog,*) 'Mod_comm: Ga_AllToAll_r4 - r4 windows disabled - exiting'
2403 0 : stop
2404 : #endif
2405 :
2406 : call MPI_ALLGATHER(q(istart), Lsize, mp_r4, q, Lsize, mp_r4, comm, ierror)
2407 :
2408 : !EOC
2409 : end subroutine Ga_AllToAll_r4
2410 : !------------------------------------------------------------------------------
2411 : !------------------------------------------------------------------------------
2412 : !BOP
2413 : ! !ROUTINE: Ga_AllToAll_i4 --- All to All of an integer*4 1d global array
2414 : !
2415 : ! !INTERFACE:
2416 0 : subroutine Ga_AllToAll_i4 ( comm, q, Gsize, Lsize, istart )
2417 : ! !INPUT PARAMETERS:
2418 : integer, intent(in) :: comm ! communicator
2419 : integer, intent(in) :: Gsize ! Global size of array
2420 : integer, intent(in) :: Lsize ! size of Local portion
2421 : integer, intent(in) :: istart ! starting point
2422 : ! !OUTPUT PARAMETERS:
2423 : integer(i4), intent(inout) :: q(Gsize)
2424 : !
2425 : ! !DESCRIPTION:
2426 : !
2427 : ! All to All of an integer*4 1d global array.
2428 : !
2429 : ! !REVISION HISTORY:
2430 : ! 03.04.02 Putman
2431 : !
2432 : !EOP
2433 : !------------------------------------------------------------------------------
2434 : !BOC
2435 : ! !LOCAL VARIABLES:
2436 :
2437 0 : call MPI_ALLGATHER(q(istart), Lsize, mp_i4, q, Lsize, mp_i4, comm, ierror)
2438 :
2439 : !EOC
2440 0 : end subroutine Ga_AllToAll_i4
2441 : !------------------------------------------------------------------------------
2442 : !BOP
2443 : ! !ROUTINE: get_partneroffset --- Computes partneroffset/type from descriptor
2444 : !
2445 : ! !INTERFACE:
2446 47616 : subroutine get_partneroffset ( comm, send_bl, recv_bl )
2447 :
2448 : ! !INPUT PARAMETERS:
2449 : integer, intent(in) :: comm ! communicator
2450 : ! !INPUT/OUTPUT PARAMETERS:
2451 : type(blockdescriptor), intent(inout) :: send_bl(:) ! send blocks
2452 : type(blockdescriptor), intent(inout) :: recv_bl(:) ! receive blocks
2453 :
2454 : !
2455 : ! !DESCRIPTION:
2456 : ! Compute partneroffsets/types from other blockdescriptor
2457 : ! information. Used exclusively for irregular communication
2458 : ! in PILGRIM.
2459 : !
2460 : ! !REVISION HISTORY:
2461 : ! 03.10.31 Mirin Creation
2462 : !
2463 : ! !BUGS:
2464 : !
2465 : !EOP
2466 : !------------------------------------------------------------------------------
2467 : !BOC
2468 : !
2469 : ! !LOCAL VARIABLES:
2470 :
2471 : integer :: i, j, k, ns, pos, por, numpsq, ierror
2472 : integer :: ami(numpro,numpro), am(numpro,numpro)
2473 : integer mod_method, num_s, num_r
2474 :
2475 47616 : num_s = size(send_bl)
2476 47616 : num_r = size(recv_bl)
2477 :
2478 36616704 : do j = 1, num_s
2479 36569088 : send_bl(j)%partneroffset = 0
2480 36616704 : send_bl(j)%partnertype = MPI_DATATYPE_NULL
2481 : enddo
2482 36616704 : do j = 1, num_r
2483 36569088 : recv_bl(j)%partneroffset = 0
2484 36616704 : recv_bl(j)%partnertype = MPI_DATATYPE_NULL
2485 : enddo
2486 :
2487 47616 : end subroutine get_partneroffset
2488 : !------------------------------------------------------------------------------
2489 : !
2490 : !------------------------------------------------------------------------------
2491 : !BOP
2492 : ! !ROUTINE: mp_sendirr --- Initiate communication of contiguous parcels
2493 : !
2494 : ! !INTERFACE:
2495 919296 : subroutine mp_sendirr ( comm, send_bl, recv_bl, q1in, q1out, q2in, q2out, &
2496 : modc )
2497 :
2498 : ! !INPUT PARAMETERS:
2499 : integer, intent(in) :: comm ! communicator
2500 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
2501 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
2502 : real(r8), intent(in) :: q1in(*) ! input array
2503 : real(r8), optional, intent(in) :: q2in(*) ! second input array
2504 : integer, optional, intent(in) :: modc(4) ! 1: classical, swap p2p, swap a2a
2505 : ! 2: handshake
2506 : ! 3: send vs isend
2507 : ! 4: max number of outstanding requests
2508 :
2509 : ! !OUTPUT PARAMETERS:
2510 : real(r8), intent(out) :: q1out(*) ! output array
2511 : real(r8), optional, intent(out) :: q2out(*) ! second output array
2512 : !
2513 : ! !DESCRIPTION:
2514 : ! Communicate a number of contiguous parcels to/from arbitrary set of PEs.
2515 : ! Modc(1): if 0, use original approach of posting all communications here and placing
2516 : ! wait points in mp_recvirr; if 1, call swap routine with p2p messages; if 2, call swap
2517 : ! routine with a2a messages.
2518 : ! Modc(2): if 1, then apply handshaking (don't send until corresponding receive is posted)
2519 : ! Modc(3): if 1, then use blocking send; otherwise use nonblocking send
2520 : ! Modc(4): maximum number of outstanding requests (applies to swap routines only)
2521 : !
2522 : ! !REVISION HISTORY:
2523 : ! 02.08.13 Sawyer Creation
2524 : ! 02.11.06 Mirin Optimizations
2525 : ! 03.03.03 Sawyer Use partneroffset
2526 : ! 03.06.24 Sawyer Integrated Use_Mpi_Types; added qout
2527 : ! 04.02.24 Mirin Various mpi2 options
2528 : ! 08.09.18 Mirin Major overhaul, to include approaches from Mirin and Worley
2529 : ! 09.10.07 Worley eliminated mpi_recv from handshake logic
2530 : !
2531 : ! !BUGS:
2532 : !
2533 : !EOP
2534 : !------------------------------------------------------------------------------
2535 : !BOC
2536 : !
2537 : ! !LOCAL VARIABLES:
2538 : integer ipe, qsize, offset, blocksize, nparcels, offset_s, offset_r, ierr, mod_method
2539 : integer p, mysize, nthpc, minsize, nthrd, pn, pt, tmpsize, unitsize, offset_0
2540 : integer i, j, send_tag, recv_tag, num_s, num_r
2541 1838592 : integer :: offset_v (Max_Nparcels)
2542 1838592 : integer :: hs_snd, hs_rcv(numpro), hs_rcvids(numpro)
2543 : integer ipe2, ceil2num
2544 : integer onetwo
2545 : logical twovar
2546 : integer sw_local, maxreq_local
2547 : logical hs_local, send_local
2548 : logical sw_alltoall
2549 : integer comm_pid
2550 :
2551 :
2552 : #if defined( MODCM_TIMING )
2553 : call t_startf('mod_comm communication')
2554 : #endif
2555 :
2556 919296 : if (present(modc)) then
2557 919296 : sw_local = modc(1)
2558 919296 : hs_local = (modc(2) .eq. 1)
2559 919296 : send_local = (modc(3) .eq. 1)
2560 919296 : maxreq_local = modc(4)
2561 : else
2562 0 : sw_local = 0
2563 0 : hs_local = .true.
2564 0 : send_local = .true.
2565 0 : maxreq_local = -1
2566 : endif
2567 :
2568 : ! Do not call mp_swapirr unless mod_method equals 0
2569 919296 : mod_method = recv_bl(1)%method
2570 919296 : if (mod_method .gt. 0) sw_local = 0
2571 :
2572 919296 : onetwo = 1
2573 919296 : twovar = .false.
2574 919296 : if (present(q2in)) then
2575 580608 : onetwo = 2
2576 580608 : twovar = .true.
2577 : endif
2578 :
2579 919296 : if (sw_local .gt. 0) then
2580 0 : sw_alltoall = (sw_local .eq. 2)
2581 0 : if (present(q2in)) then
2582 : call mp_swapirr(comm, send_bl, recv_bl, q1in, q1out, q2in, q2out, &
2583 : sw_handshake=hs_local, sw_maxreq=maxreq_local, &
2584 0 : sw_alltoall=sw_alltoall, sw_send=send_local)
2585 : else
2586 : call mp_swapirr(comm, send_bl, recv_bl, q1in, q1out, &
2587 : sw_handshake=hs_local, sw_maxreq=maxreq_local, &
2588 0 : sw_alltoall=sw_alltoall, sw_send=send_local)
2589 : endif
2590 : else
2591 :
2592 919296 : call MPI_COMM_RANK (comm, comm_pid, ierr)
2593 :
2594 919296 : hs_snd = 1
2595 919296 : ceil2num = ceil2(numpro)
2596 :
2597 : ! num_s = 0 if this processes is not part of the sending decomposition
2598 919296 : num_s = size(send_bl)
2599 919296 : if (send_bl(1)%Nparcels == -1) then
2600 0 : num_s = 0
2601 : endif
2602 :
2603 : ! num_r = 0 if this processes is not part of the receiving decomposition
2604 919296 : num_r = size(recv_bl)
2605 919296 : if (recv_bl(1)%Nparcels == -1) then
2606 0 : num_r = 0
2607 : endif
2608 :
2609 919296 : r8_win%ncall_s = r8_win%ncall_s + 1
2610 919296 : if (mod_method .gt. 0) then
2611 : !
2612 : ! mpi derived types
2613 0 : if (r8_win%ncall_s .gt. MaxTrf-onetwo+1) then
2614 0 : write(iulog,*) "mp_sendirr: derived type handle count exceeded - exiting"
2615 0 : write(iulog,*) "r8_win%ncall_s MaxTrf = ", r8_win%ncall_s, MaxTrf
2616 0 : stop
2617 : endif
2618 : !
2619 : ! MPI: Irecv over all processes
2620 : !
2621 0 : if (hs_local) then
2622 0 : hs_rcvids(:) = MPI_REQUEST_NULL
2623 0 : do ipe2=1, ceil2num
2624 0 : ipe = ieor(ipe2-1,comm_pid) + 1
2625 0 : if (ipe .gt. num_s) cycle
2626 0 : if ( send_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
2627 0 : if (ipe-1 /= comm_pid) &
2628 0 : call MPI_IRECV ( hs_rcv(ipe), 1, mp_i4, ipe-1, comm_pid, comm, &
2629 0 : hs_rcvids(ipe), ierr )
2630 : endif
2631 : enddo
2632 : endif
2633 :
2634 0 : OutHandle(:,r8_win%ncall_s) = MPI_REQUEST_NULL
2635 0 : if (twovar) OutHandle(:,r8_win%ncall_s+1) = MPI_REQUEST_NULL
2636 0 : do ipe2=1, ceil2num
2637 0 : ipe = ieor(ipe2-1,comm_pid) + 1
2638 0 : if (ipe .gt. num_r) cycle
2639 : !
2640 : ! Receive the buffers with MPI_Irecv. Non-blocking
2641 : !
2642 0 : if ( recv_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
2643 0 : recv_tag = ipe-1 + modcam_tagoffset
2644 : call mpi_irecv( q1out, 1, recv_bl(ipe)%type, ipe-1, recv_tag, &
2645 0 : comm, OutHandle(ipe,r8_win%ncall_s), ierr )
2646 0 : if (twovar) then
2647 0 : call mpi_irecv( q2out, 1, recv_bl(ipe)%type, ipe-1, recv_tag, &
2648 0 : comm, OutHandle(ipe,r8_win%ncall_s+1), ierr )
2649 : endif
2650 0 : if (hs_local) then
2651 0 : if (ipe-1 /= comm_pid) &
2652 0 : call MPI_SEND ( hs_snd, 1, mp_i4, ipe-1, ipe-1, comm, ierr )
2653 : endif
2654 : endif
2655 : enddo
2656 :
2657 : !
2658 : ! MPI: Isend/Send over all processes; use risend/rsend with hs
2659 : !
2660 0 : InHandle(:,r8_win%ncall_s) = MPI_REQUEST_NULL
2661 0 : if (twovar) InHandle(:,r8_win%ncall_s+1) = MPI_REQUEST_NULL
2662 0 : do ipe2=1, ceil2num
2663 0 : ipe = ieor(ipe2-1,comm_pid) + 1
2664 0 : if (ipe .gt. num_s) cycle
2665 :
2666 : !
2667 : ! Send the individual buffers with non-blocking sends
2668 : !
2669 0 : if ( send_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
2670 0 : send_tag = comm_pid + modcam_tagoffset
2671 0 : if (hs_local) then
2672 0 : if (ipe-1 /= comm_pid) &
2673 0 : call MPI_WAIT ( hs_rcvids(ipe), MPI_STATUS_IGNORE, ierr )
2674 0 : if (send_local) then
2675 0 : call mpi_rsend( q1in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
2676 0 : comm, ierr )
2677 : else
2678 0 : call mpi_irsend( q1in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
2679 0 : comm, InHandle(ipe,r8_win%ncall_s), ierr )
2680 : endif
2681 0 : if (twovar) then
2682 0 : if (send_local) then
2683 0 : call mpi_rsend( q2in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
2684 0 : comm, ierr )
2685 : else
2686 0 : call mpi_irsend( q2in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
2687 0 : comm, InHandle(ipe,r8_win%ncall_s+1), ierr )
2688 : endif
2689 : endif
2690 : else
2691 0 : if (send_local) then
2692 : call mpi_send( q1in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
2693 0 : comm, ierr )
2694 : else
2695 : call mpi_isend( q1in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
2696 0 : comm, InHandle(ipe,r8_win%ncall_s), ierr )
2697 : endif
2698 0 : if (twovar) then
2699 0 : if (send_local) then
2700 0 : call mpi_send( q2in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
2701 0 : comm, ierr )
2702 : else
2703 0 : call mpi_isend( q2in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
2704 0 : comm, InHandle(ipe,r8_win%ncall_s+1), ierr )
2705 : endif
2706 : endif
2707 : endif
2708 : endif
2709 : enddo
2710 : else
2711 :
2712 : ! temporary contiguous buffers
2713 :
2714 919296 : if (r8_win%ncall_s .gt. max_irr-onetwo+1) then
2715 0 : write(iulog,*) "mp_sendirr: insufficient window storage - exiting"
2716 0 : write(iulog,*) "r8_win%ncall_s max_irr = ", r8_win%ncall_s, max_irr
2717 0 : stop
2718 : endif
2719 919296 : unitsize = r8_win%size/max_irr
2720 :
2721 : ! issue call to receive data in global receive buffer
2722 919296 : offset_0 = (r8_win%ncall_s-1)*unitsize
2723 919296 : offset_s = offset_0
2724 919296 : offset_r = offset_0
2725 :
2726 919296 : if (hs_local) then
2727 706938624 : hs_rcvids(:) = MPI_REQUEST_NULL
2728 942278400 : do ipe2=1, ceil2num
2729 941359104 : ipe = ieor(ipe2-1,comm_pid) + 1
2730 941359104 : if (ipe .gt. num_s) cycle
2731 706019328 : qsize = onetwo*send_bl(ipe)%Tot_Size
2732 706938624 : if (qsize .ne. 0) then
2733 13317696 : r8_win%dest = ipe-1
2734 13317696 : send_tag = comm_pid + modcam_tagoffset
2735 13317696 : if (r8_win%dest /= comm_pid) &
2736 13300308 : call MPI_IRECV ( hs_rcv(ipe), 1, mp_i4, r8_win%dest, send_tag, comm, &
2737 13300308 : hs_rcvids(ipe), ierr )
2738 : endif
2739 : enddo
2740 : endif
2741 :
2742 942278400 : do ipe2=1, ceil2num
2743 941359104 : ipe = ieor(ipe2-1,comm_pid) + 1
2744 941359104 : if (ipe .gt. num_r) cycle
2745 706019328 : r8_win%size_r = onetwo*recv_bl(ipe)%Tot_Size
2746 706938624 : if (r8_win%size_r .ne. 0) then
2747 13317696 : r8_win%offset_r = offset_r
2748 13317696 : offset_r = offset_r + r8_win%size_r
2749 13317696 : r8_win%src = ipe-1
2750 13317696 : if (onetwo*unitsize >= offset_r-offset_0) then
2751 13317696 : recv_tag = r8_win%src + modcam_tagoffset
2752 13317696 : qsize = r8_win%size_r
2753 13317696 : r8_win%nrecv = r8_win%nrecv + 1
2754 0 : call MPI_IRECV(ga_r8_r(r8_win%offset_r+1), qsize, mp_r8, r8_win%src, &
2755 13317696 : recv_tag, comm, r8_win%rqest(r8_win%nrecv), ierror)
2756 13317696 : if (hs_local) then
2757 13317696 : if (r8_win%src /= comm_pid) &
2758 13300308 : call MPI_SEND ( hs_snd, 1, mp_i4, r8_win%src, recv_tag, comm, ierror)
2759 : endif
2760 : else
2761 0 : write(iulog,*) "Fatal mp_sendirr: receive window out of space - exiting"
2762 0 : write(iulog,*) 'comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
2763 0 : ipe, unitsize, offset_r, offset_0
2764 0 : stop
2765 : endif
2766 : endif
2767 : enddo
2768 :
2769 : ! gather data into global send buffer
2770 942278400 : do ipe2=1, ceil2num
2771 941359104 : ipe = ieor(ipe2-1,comm_pid) + 1
2772 941359104 : if (ipe .gt. num_s) cycle
2773 706019328 : qsize = onetwo*send_bl(ipe)%Tot_Size
2774 706938624 : if (qsize .ne. 0) then
2775 13317696 : r8_win%dest = ipe-1
2776 13317696 : r8_win%offset_s = offset_s
2777 13317696 : offset_s = offset_s + qsize
2778 13317696 : if (offset_s-offset_0 .gt. onetwo*unitsize) then
2779 0 : write(iulog,*) "Fatal mp_sendirr: send window out of space - exiting"
2780 0 : write(iulog,*) 'comm_pid ipe unitsize offset_s offset_0 = ', comm_pid, &
2781 0 : ipe, unitsize, offset_s, offset_0
2782 0 : stop
2783 : endif
2784 :
2785 13317696 : offset_v(1) = r8_win%offset_s
2786 114863616 : do j = 2, send_bl(ipe)%nparcels
2787 114863616 : offset_v(j) = offset_v(j-1) + send_bl(ipe)%blocksizes(j-1)
2788 : enddo
2789 :
2790 128181312 : do j = 1, send_bl(ipe)%nparcels
2791 2884908096 : do i = 1, send_bl(ipe)%blocksizes(j)
2792 2871590400 : ga_r8_s(offset_v(j)+i) = q1in(send_bl(ipe)%displacements(j)+i)
2793 : enddo
2794 : enddo
2795 13317696 : if (twovar) then
2796 70060032 : do j = 1, send_bl(ipe)%nparcels
2797 1584285696 : do i = 1, send_bl(ipe)%blocksizes(j)
2798 1577318400 : ga_r8_s(send_bl(ipe)%Tot_Size+offset_v(j)+i) = q2in(send_bl(ipe)%displacements(j)+i)
2799 : enddo
2800 : enddo
2801 : endif
2802 :
2803 : ! nonblocking send
2804 13317696 : send_tag = comm_pid + modcam_tagoffset
2805 13317696 : r8_win%nsend = r8_win%nsend + 1
2806 13317696 : if (hs_local) then
2807 13317696 : if (r8_win%dest /= comm_pid) &
2808 13300308 : call MPI_WAIT ( hs_rcvids(ipe), MPI_STATUS_IGNORE, ierr )
2809 13317696 : if (send_local) then
2810 0 : call MPI_RSEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
2811 13317696 : send_tag, comm, ierr)
2812 : else
2813 0 : call MPI_IRSEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
2814 0 : send_tag, comm, r8_win%sqest(r8_win%nsend), ierr)
2815 : endif
2816 : else
2817 0 : if (send_local) then
2818 0 : call MPI_SEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
2819 0 : send_tag, comm, ierr)
2820 : else
2821 0 : call MPI_ISEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
2822 0 : send_tag, comm, r8_win%sqest(r8_win%nsend), ierr)
2823 : endif
2824 : endif
2825 : endif
2826 : enddo
2827 :
2828 : endif ! mod_method
2829 :
2830 919296 : if (twovar) r8_win%ncall_s = r8_win%ncall_s + 1
2831 :
2832 : endif ! sw_local
2833 :
2834 : #if defined( MODCM_TIMING )
2835 : call t_stopf('mod_comm communication')
2836 : #endif
2837 :
2838 919296 : end subroutine mp_sendirr
2839 : !------------------------------------------------------------------------------
2840 : !
2841 : !------------------------------------------------------------------------------
2842 : !BOP
2843 : ! !ROUTINE: mp_recvirr --- Finalize communication of contiguous parcels
2844 : !
2845 : ! !INTERFACE:
2846 919296 : subroutine mp_recvirr ( comm, send_bl, recv_bl, q1in, q1out, q2in, q2out, &
2847 : modc )
2848 :
2849 : ! !INPUT PARAMETERS:
2850 : integer, intent(in) :: comm ! communicator
2851 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
2852 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
2853 : real(r8), intent(in) :: q1in(*) ! input array
2854 : real(r8), optional, intent(in) :: q2in(*) ! second input array
2855 : integer, optional, intent(in) :: modc(4) ! 1: classical, swap p2p, swap a2a
2856 : ! 2: handshake
2857 : ! 3: send vs isend
2858 : ! 4: max number of outstanding requests
2859 : ! !INPUT/OUTPUT PARAMETERS:
2860 : real(r8), intent(inout) :: q1out(*) ! output array
2861 : real(r8), optional, intent(inout) :: q2out(*) ! second output array
2862 : !
2863 : ! !DESCRIPTION:
2864 : ! Complete transfer of a generalized region initiated by {\tt mp\_sendirr}.
2865 : ! Communicate a number of contiguous parcels to/from arbitrary set of PEs.
2866 : ! Modc(1): if 0, use original approach of posting all communications in mp_sendirr and
2867 : ! placing wait points here; otherwise don't do anything - mp_swapirr is called from mp_sendirr.
2868 : ! Modc(3): if 1, then use blocking send; otherwise use nonblocking send
2869 : !
2870 : ! !REVISION HISTORY:
2871 : ! 02.08.15 Sawyer Creation
2872 : ! 02.11.06 Mirin Optimizations
2873 : ! 03.03.03 Sawyer Now using packed arrays for MPI2
2874 : ! 04.02.24 Mirin Various mpi2 options
2875 : ! 08.09.18 Mirin Major overhaul, to include approaches from Mirin and Worley
2876 : !
2877 : !EOP
2878 : !------------------------------------------------------------------------------
2879 : !BOC
2880 : integer :: ipe, blocksize, offset_r, mod_method
2881 : integer unitsize, offset_0
2882 : integer Ierr
2883 1838592 : integer InStats(numpro*MPI_STATUS_SIZE)
2884 1838592 : integer OutStats(numpro*MPI_STATUS_SIZE)
2885 : integer i, j, num_r, num_s
2886 919296 : integer :: offset_v (Max_Nparcels)
2887 : integer ipe2, ceil2num
2888 : integer onetwo
2889 : logical twovar
2890 : integer sw_local, maxreq_local
2891 : logical hs_local, send_local
2892 : logical sw_alltoall
2893 : integer comm_size, comm_pid
2894 :
2895 919296 : if (present(modc)) then
2896 919296 : sw_local = modc(1)
2897 919296 : hs_local = (modc(2) .eq. 1)
2898 919296 : send_local = (modc(3) .eq. 1)
2899 919296 : maxreq_local = modc(4)
2900 : else
2901 : sw_local = 0
2902 919296 : hs_local = .true.
2903 : send_local = .true.
2904 919296 : maxreq_local = -1
2905 : endif
2906 :
2907 : ! Do not call mp_swapirr (hence return) unless mod_method equals 0
2908 919296 : mod_method = recv_bl(1)%method
2909 919296 : if (mod_method .gt. 0) sw_local = 0
2910 :
2911 : ! Return if swap_irr
2912 919296 : if (sw_local .gt. 0) return
2913 :
2914 : #if defined( MODCM_TIMING )
2915 : call t_startf('mod_comm communication')
2916 : #endif
2917 :
2918 919296 : onetwo = 1
2919 919296 : twovar = .false.
2920 919296 : if (present(q2in)) then
2921 580608 : onetwo = 2
2922 580608 : twovar = .true.
2923 : endif
2924 :
2925 919296 : call MPI_COMM_SIZE (comm, comm_size, ierr)
2926 919296 : call MPI_COMM_RANK (comm, comm_pid, ierr)
2927 :
2928 919296 : ceil2num = ceil2(numpro)
2929 :
2930 : ! num_s = 0 if this processes is not part of the sending decomposition
2931 919296 : num_s = size(send_bl)
2932 919296 : if (send_bl(1)%Nparcels == -1) then
2933 0 : num_s = 0
2934 : endif
2935 :
2936 : ! num_r = 0 if this processes is not part of the receiving decomposition
2937 919296 : num_r = size(recv_bl)
2938 919296 : if (recv_bl(1)%Nparcels == -1) then
2939 0 : num_r = 0
2940 : endif
2941 :
2942 919296 : r8_win%ncall_r = r8_win%ncall_r + 1
2943 :
2944 919296 : if (mod_method .gt. 0) then
2945 :
2946 : ! mpi derived types
2947 0 : if (r8_win%ncall_r .gt. MaxTrf-onetwo+1) then
2948 0 : write(iulog,*) "mp_recvirr: derived type handle count exceeded - exiting"
2949 0 : write(iulog,*) "r8_win%ncall_r MaxTrf = ", r8_win%ncall_r, MaxTrf
2950 0 : stop
2951 : endif
2952 :
2953 0 : if (num_s .gt. 0 .and. (.not. send_local)) then
2954 0 : CALL MPI_WAITALL( comm_size, InHandle(:,r8_win%ncall_r), InStats, Ierr )
2955 0 : if (twovar) then
2956 0 : CALL MPI_WAITALL( comm_size, InHandle(:,r8_win%ncall_r+1), InStats, Ierr )
2957 : endif
2958 : endif
2959 0 : if (num_r .gt. 0) then
2960 0 : CALL MPI_WAITALL( comm_size, OutHandle(:,r8_win%ncall_r), OutStats, Ierr )
2961 0 : if (twovar) then
2962 0 : CALL MPI_WAITALL( comm_size, OutHandle(:,r8_win%ncall_r+1), OutStats, Ierr )
2963 : endif
2964 : endif
2965 :
2966 : else
2967 :
2968 : ! temporary contiguous buffer / global window
2969 :
2970 919296 : if (r8_win%ncall_r .gt. max_irr-onetwo+1) then
2971 0 : write(iulog,*) "mp_recvirr: insufficient window storage - exiting"
2972 0 : write(iulog,*) "r8_win%ncall_r max_irr = ", r8_win%ncall_r, max_irr
2973 0 : stop
2974 : endif
2975 919296 : unitsize = r8_win%size/max_irr
2976 :
2977 : ! scatter data from global receive buffer to final destination
2978 919296 : offset_0 = (r8_win%ncall_r-1)*unitsize
2979 919296 : offset_r = offset_0
2980 :
2981 942278400 : do ipe2=1, ceil2num
2982 941359104 : ipe = ieor(ipe2-1,comm_pid) + 1
2983 941359104 : if (ipe .gt. num_r) cycle
2984 706019328 : r8_win%size_r = onetwo*recv_bl(ipe)%Tot_Size
2985 706938624 : if (r8_win%size_r .ne. 0) then
2986 13317696 : r8_win%offset_r = offset_r
2987 13317696 : offset_r = offset_r + r8_win%size_r
2988 13317696 : if (offset_r-offset_0 .gt. onetwo*unitsize) then
2989 0 : write(iulog,*) "Fatal mp_recvirr: receive window out of space - exiting"
2990 0 : write(iulog,*) 'comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
2991 0 : ipe, unitsize, offset_r, offset_0
2992 0 : stop
2993 : endif
2994 :
2995 13317696 : r8_win%nread = r8_win%nread + 1
2996 13317696 : call MPI_WAIT(r8_win%rqest(r8_win%nread), Status, ierr)
2997 :
2998 13317696 : offset_v(1) = r8_win%offset_r
2999 114863616 : do j = 2, recv_bl(ipe)%Nparcels
3000 114863616 : offset_v(j) = offset_v(j-1) + recv_bl(ipe)%blocksizes(j-1)
3001 : enddo
3002 :
3003 128181312 : do j = 1, recv_bl(ipe)%Nparcels
3004 2884908096 : do i = 1, recv_bl(ipe)%blocksizes(j)
3005 2871590400 : q1out(recv_bl(ipe)%displacements(j)+i) = ga_r8_r(offset_v(j)+i)
3006 : enddo
3007 : enddo
3008 13317696 : if (twovar) then
3009 70060032 : do j = 1, recv_bl(ipe)%Nparcels
3010 1584285696 : do i = 1, recv_bl(ipe)%blocksizes(j)
3011 1577318400 : q2out(recv_bl(ipe)%displacements(j)+i) = ga_r8_r(recv_bl(ipe)%Tot_Size+offset_v(j)+i)
3012 : enddo
3013 : enddo
3014 : endif
3015 :
3016 : endif
3017 : enddo
3018 :
3019 919296 : if ((r8_win%ncall_s == r8_win%ncall_r + onetwo - 1) .and. (.not. send_local)) then
3020 0 : call MPI_WAITALL(r8_win%nsend, r8_win%sqest, Stats, ierror)
3021 : endif
3022 :
3023 : endif ! mod_method .gt. 0
3024 :
3025 919296 : if (twovar) r8_win%ncall_r = r8_win%ncall_r + 1
3026 :
3027 919296 : if (r8_win%ncall_s == r8_win%ncall_r) then
3028 919296 : r8_win%nsend = 0
3029 919296 : r8_win%nrecv = 0
3030 919296 : r8_win%nread = 0
3031 919296 : r8_win%ncall_s = 0
3032 919296 : r8_win%ncall_r = 0
3033 : endif
3034 :
3035 : #if defined( MODCM_TIMING )
3036 : call t_stopf('mod_comm communication')
3037 : #endif
3038 :
3039 : !EOC
3040 : end subroutine mp_recvirr
3041 : !------------------------------------------------------------------------------
3042 : !
3043 : !------------------------------------------------------------------------------
3044 : !BOP
3045 : ! !ROUTINE: mp_sendirr_r4 --- Initiate communication of contiguous parcels - r4
3046 : !
3047 : ! !INTERFACE:
3048 0 : subroutine mp_sendirr_r4 ( comm, send_bl, recv_bl, q1in, q1out, q2in, q2out, &
3049 : modc )
3050 :
3051 : ! !INPUT PARAMETERS:
3052 : integer, intent(in) :: comm ! communicator
3053 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
3054 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
3055 : real(r4), intent(in) :: q1in(*) ! input array
3056 : real(r4), optional, intent(in) :: q2in(*) ! second input array
3057 : integer, optional, intent(in) :: modc(4) ! 1: classical, swap p2p, swap a2a
3058 : ! 2: handshake
3059 : ! 3: send vs isend
3060 : ! 4: max number of outstanding requests
3061 :
3062 : ! !OUTPUT PARAMETERS:
3063 : real(r4), intent(out) :: q1out(*) ! output array
3064 : real(r4), optional, intent(out) :: q2out(*) ! second output array
3065 : !
3066 : ! !DESCRIPTION:
3067 : ! Communicate a number of contiguous parcels to/from arbitrary set of PEs.
3068 : ! Modc(1): if 0, use original approach of posting all communications here and placing
3069 : ! wait points in mp_recvirr; if 1, call swap routine with p2p messages; if 2, call swap
3070 : ! routine with a2a messages.
3071 : ! Modc(2): if 1, then apply handshaking (don't send until corresponding receive is posted)
3072 : ! Modc(3): if 1, then use blocking send; otherwise use nonblocking send
3073 : ! Modc(4): maximum number of outstanding requests (applies to swap routines only)
3074 : !
3075 : ! !REVISION HISTORY:
3076 : ! 02.08.13 Sawyer Creation
3077 : ! 02.11.06 Mirin Optimizations
3078 : ! 03.03.03 Sawyer Use partneroffset
3079 : ! 03.06.24 Sawyer Integrated Use_Mpi_Types; added qout
3080 : ! 04.02.24 Mirin Various mpi2 options
3081 : ! 08.09.18 Mirin No-op version
3082 : !
3083 : ! !BUGS:
3084 : !
3085 : !EOP
3086 : !------------------------------------------------------------------------------
3087 : !BOC
3088 : !
3089 0 : write(iulog,*) 'Mod_comm: mp_sendirr_r4 - r4 no longer supported - exiting'
3090 0 : stop
3091 :
3092 : !EOC
3093 : end subroutine mp_sendirr_r4
3094 : !------------------------------------------------------------------------------
3095 : !
3096 : !------------------------------------------------------------------------------
3097 : !BOP
3098 : ! !ROUTINE: mp_recvirr_r4 --- Finalize communication of contiguous parcels - r4
3099 : !
3100 : ! !INTERFACE:
3101 0 : subroutine mp_recvirr_r4 ( comm, send_bl, recv_bl, q1in, q1out, q2in, q2out, &
3102 : modc )
3103 :
3104 : ! !INPUT PARAMETERS:
3105 : integer, intent(in) :: comm ! communicator
3106 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
3107 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
3108 : real(r4), intent(in) :: q1in(*) ! input array
3109 : real(r4), optional, intent(in) :: q2in(*) ! second input array
3110 : integer, optional, intent(in) :: modc(4) ! 1: classical, swap p2p, swap a2a
3111 : ! 2: handshake
3112 : ! 3: send vs isend
3113 : ! 4: max number of outstanding requests
3114 : ! !INPUT/OUTPUT PARAMETERS:
3115 : real(r4), intent(inout) :: q1out(*) ! output array
3116 : real(r4), optional, intent(inout) :: q2out(*) ! second output array
3117 : !
3118 : ! !DESCRIPTION:
3119 : ! Complete transfer of a generalized region initiated by {\tt mp\_sendirr}.
3120 : ! Communicate a number of contiguous parcels to/from arbitrary set of PEs.
3121 : ! Modc(1): if 0, use original approach of posting all communications in mp_sendirr and
3122 : ! placing wait points here; otherwise don't do anything - mp_swapirr is called from mp_sendirr.
3123 : ! Modc(3): if 1, then use blocking send; otherwise use nonblocking send
3124 : !
3125 : ! !REVISION HISTORY:
3126 : ! 02.08.15 Sawyer Creation
3127 : ! 02.11.06 Mirin Optimizations
3128 : ! 03.03.03 Sawyer Now using packed arrays for MPI2
3129 : ! 04.02.24 Mirin Various mpi2 options
3130 : ! 08.09.18 Mirin No-op version
3131 : !
3132 : ! !BUGS:
3133 : !
3134 : !EOP
3135 : !------------------------------------------------------------------------------
3136 : !BOC
3137 : !
3138 0 : write(iulog,*) 'Mod_comm: mp_recvirr_r4 - r4 no longer supported - exiting'
3139 0 : stop
3140 :
3141 : !EOC
3142 : end subroutine mp_recvirr_r4
3143 : !------------------------------------------------------------------------------
3144 : !
3145 : !------------------------------------------------------------------------------
3146 : !BOP
3147 : ! !ROUTINE: mp_sendirr_i4 --- Initiate communication of contiguous parcels - i4
3148 : !
3149 : ! !INTERFACE:
3150 0 : subroutine mp_sendirr_i4 ( comm, send_bl, recv_bl, q1in, q1out, q2in, q2out, &
3151 : modc )
3152 :
3153 : ! !INPUT PARAMETERS:
3154 : integer, intent(in) :: comm ! communicator
3155 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
3156 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
3157 : integer(i4), intent(in) :: q1in(*) ! input array
3158 : integer(i4), optional, intent(in) :: q2in(*) ! second input array
3159 : integer, optional, intent(in) :: modc(4) ! 1: classical, swap p2p, swap a2a
3160 : ! 2: handshake
3161 : ! 3: send vs isend
3162 : ! 4: max number of outstanding requests
3163 :
3164 : ! !OUTPUT PARAMETERS:
3165 : integer(i4), intent(out) :: q1out(*) ! output array
3166 : integer(i4), optional, intent(out) :: q2out(*) ! second output array
3167 : !
3168 : ! !DESCRIPTION:
3169 : ! Communicate a number of contiguous parcels to/from arbitrary set of PEs.
3170 : ! Modc(1): if 0, use original approach of posting all communications here and placing
3171 : ! wait points in mp_recvirr; if 1, call swap routine with p2p messages; if 2, call swap
3172 : ! routine with a2a messages.
3173 : ! Modc(2): if 1, then apply handshaking (don't send until corresponding receive is posted)
3174 : ! Modc(3): if 1, then use blocking send; otherwise use nonblocking send
3175 : ! Modc(4): maximum number of outstanding requests (applies to swap routines only)
3176 : !
3177 : ! !REVISION HISTORY:
3178 : ! 02.08.13 Sawyer Creation
3179 : ! 02.11.06 Mirin Optimizations
3180 : ! 03.03.03 Sawyer Use partneroffset
3181 : ! 03.06.24 Sawyer Integrated Use_Mpi_Types; added qout
3182 : ! 04.02.24 Mirin Various mpi2 options
3183 : ! 08.09.18 Mirin Major overhaul, to include approaches from Mirin and Worley
3184 : ! 09.10.07 Worley eliminated mpi_recv from handshake logic
3185 : !
3186 : ! !BUGS:
3187 : !
3188 : !EOP
3189 : !------------------------------------------------------------------------------
3190 : !BOC
3191 : !
3192 : ! !LOCAL VARIABLES:
3193 : integer ipe, qsize, offset, blocksize, nparcels, offset_s, offset_r, ierr, mod_method
3194 : integer p, mysize, nthpc, minsize, nthrd, pn, pt, tmpsize, unitsize, offset_0
3195 : integer i, j, send_tag, recv_tag, num_s, num_r
3196 0 : integer :: offset_v (Max_Nparcels)
3197 0 : integer :: hs_snd, hs_rcv(numpro), hs_rcvids(numpro)
3198 : integer ipe2, ceil2num
3199 : integer onetwo
3200 : logical twovar
3201 : integer sw_local, maxreq_local
3202 : logical hs_local, send_local
3203 : logical sw_alltoall
3204 : integer comm_pid
3205 :
3206 : #if defined( MODCM_TIMING )
3207 : call t_startf('mod_comm communication')
3208 : #endif
3209 :
3210 0 : if (present(modc)) then
3211 0 : sw_local = modc(1)
3212 0 : hs_local = (modc(2) .eq. 1)
3213 0 : send_local = (modc(3) .eq. 1)
3214 0 : maxreq_local = modc(4)
3215 : else
3216 0 : sw_local = 0
3217 0 : hs_local = .true.
3218 0 : send_local = .false.
3219 0 : maxreq_local = -1
3220 : endif
3221 :
3222 : ! Do not call mp_swapirr_i4 unless mod_method equals 0
3223 0 : mod_method = recv_bl(1)%method
3224 0 : if (mod_method .gt. 0) sw_local = 0
3225 :
3226 0 : onetwo = 1
3227 0 : twovar = .false.
3228 0 : if (present(q2in)) then
3229 0 : onetwo = 2
3230 0 : twovar = .true.
3231 : endif
3232 :
3233 0 : if (sw_local .gt. 0) then
3234 0 : sw_alltoall = (sw_local .eq. 2)
3235 0 : if (present(q2in)) then
3236 : call mp_swapirr_i4(comm, send_bl, recv_bl, q1in, q1out, q2in, q2out, &
3237 : sw_handshake=hs_local, sw_maxreq=maxreq_local, &
3238 0 : sw_alltoall=sw_alltoall, sw_send=send_local)
3239 : else
3240 : call mp_swapirr_i4(comm, send_bl, recv_bl, q1in, q1out, &
3241 : sw_handshake=hs_local, sw_maxreq=maxreq_local, &
3242 0 : sw_alltoall=sw_alltoall, sw_send=send_local)
3243 : endif
3244 : else
3245 :
3246 0 : call MPI_COMM_RANK (comm, comm_pid, ierr)
3247 :
3248 0 : hs_snd = 1
3249 0 : ceil2num = ceil2(numpro)
3250 :
3251 : ! num_s = 0 if this processes is not part of the sending decomposition
3252 0 : num_s = size(send_bl)
3253 0 : if (send_bl(1)%Nparcels == -1) then
3254 0 : num_s = 0
3255 : endif
3256 :
3257 : ! num_r = 0 if this processes is not part of the receiving decomposition
3258 0 : num_r = size(recv_bl)
3259 0 : if (recv_bl(1)%Nparcels == -1) then
3260 0 : num_r = 0
3261 : endif
3262 :
3263 0 : mod_method = recv_bl(1)%method
3264 :
3265 0 : i4_win%ncall_s = i4_win%ncall_s + 1
3266 0 : if (mod_method .gt. 0) then
3267 : !
3268 : ! mpi derived types
3269 0 : if (i4_win%ncall_s .gt. MaxTrf-onetwo+1) then
3270 0 : write(iulog,*) "mp_sendirr_i4: derived type handle count exceeded - exiting"
3271 0 : write(iulog,*) "i4_win%ncall_s MaxTrf = ", i4_win%ncall_s, MaxTrf
3272 0 : stop
3273 : endif
3274 : !
3275 : ! MPI: Irecv over all processes
3276 : !
3277 0 : if (hs_local) then
3278 0 : hs_rcvids(:) = MPI_REQUEST_NULL
3279 0 : do ipe2=1, ceil2num
3280 0 : ipe = ieor(ipe2-1,comm_pid) + 1
3281 0 : if (ipe .gt. num_s) cycle
3282 0 : if ( send_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
3283 0 : if (ipe-1 /= comm_pid) &
3284 0 : call MPI_IRECV ( hs_rcv(ipe), 1, mp_i4, ipe-1, comm_pid, comm, &
3285 0 : hs_rcvids(ipe), ierr )
3286 : endif
3287 : enddo
3288 : endif
3289 :
3290 0 : OutHandle(:,i4_win%ncall_s) = MPI_REQUEST_NULL
3291 0 : if (twovar) OutHandle(:,i4_win%ncall_s+1) = MPI_REQUEST_NULL
3292 0 : do ipe2=1, ceil2num
3293 0 : ipe = ieor(ipe2-1,comm_pid) + 1
3294 0 : if (ipe .gt. num_r) cycle
3295 : !
3296 : ! Receive the buffers with MPI_Irecv. Non-blocking
3297 : !
3298 0 : if ( recv_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
3299 0 : recv_tag = ipe-1 + modcam_tagoffset
3300 : call mpi_irecv( q1out, 1, recv_bl(ipe)%type, ipe-1, recv_tag, &
3301 0 : comm, OutHandle(ipe,i4_win%ncall_s), ierr )
3302 0 : if (twovar) then
3303 0 : call mpi_irecv( q2out, 1, recv_bl(ipe)%type, ipe-1, recv_tag, &
3304 0 : comm, OutHandle(ipe,i4_win%ncall_s+1), ierr )
3305 : endif
3306 0 : if (hs_local) then
3307 0 : if (ipe-1 /= comm_pid) &
3308 0 : call MPI_SEND ( hs_snd, 1, mp_i4, ipe-1, ipe-1, comm, ierr )
3309 : endif
3310 : endif
3311 : enddo
3312 :
3313 : !
3314 : ! MPI: Isend/Send over all processes; use risend/rsend with hs
3315 : !
3316 0 : InHandle(:,i4_win%ncall_s) = MPI_REQUEST_NULL
3317 0 : if (twovar) InHandle(:,i4_win%ncall_s+1) = MPI_REQUEST_NULL
3318 0 : do ipe2=1, ceil2num
3319 0 : ipe = ieor(ipe2-1,comm_pid) + 1
3320 0 : if (ipe .gt. num_s) cycle
3321 :
3322 : !
3323 : ! Send the individual buffers with non-blocking sends
3324 : !
3325 0 : if ( send_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
3326 0 : send_tag = comm_pid + modcam_tagoffset
3327 0 : if (hs_local) then
3328 0 : if (ipe-1 /= comm_pid) &
3329 0 : call MPI_WAIT ( hs_rcvids(ipe), MPI_STATUS_IGNORE, ierr )
3330 0 : if (send_local) then
3331 0 : call mpi_rsend( q1in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
3332 0 : comm, ierr )
3333 : else
3334 0 : call mpi_irsend( q1in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
3335 0 : comm, InHandle(ipe,i4_win%ncall_s), ierr )
3336 : endif
3337 0 : if (twovar) then
3338 0 : if (send_local) then
3339 0 : call mpi_rsend( q2in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
3340 0 : comm, ierr )
3341 : else
3342 0 : call mpi_irsend( q2in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
3343 0 : comm, InHandle(ipe,i4_win%ncall_s+1), ierr )
3344 : endif
3345 : endif
3346 : else
3347 0 : if (send_local) then
3348 : call mpi_send( q1in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
3349 0 : comm, ierr )
3350 : else
3351 : call mpi_isend( q1in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
3352 0 : comm, InHandle(ipe,i4_win%ncall_s), ierr )
3353 : endif
3354 0 : if (twovar) then
3355 0 : if (send_local) then
3356 0 : call mpi_send( q2in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
3357 0 : comm, ierr )
3358 : else
3359 0 : call mpi_isend( q2in, 1, send_bl(ipe)%type, ipe-1, send_tag, &
3360 0 : comm, InHandle(ipe,i4_win%ncall_s+1), ierr )
3361 : endif
3362 : endif
3363 : endif
3364 : endif
3365 : enddo
3366 : else
3367 :
3368 : ! temporary contiguous buffers
3369 :
3370 0 : if (i4_win%ncall_s .gt. max_irr-onetwo+1) then
3371 0 : write(iulog,*) "mp_sendirr_i4: insufficient window storage - exiting"
3372 0 : write(iulog,*) "i4_win%ncall_s max_irr = ", i4_win%ncall_s, max_irr
3373 0 : stop
3374 : endif
3375 0 : unitsize = i4_win%size/max_irr
3376 :
3377 : ! issue call to receive data in global receive buffer
3378 0 : offset_0 = (i4_win%ncall_s-1)*unitsize
3379 0 : offset_s = offset_0
3380 0 : offset_r = offset_0
3381 :
3382 0 : if (hs_local) then
3383 0 : hs_rcvids(:) = MPI_REQUEST_NULL
3384 0 : do ipe2=1, ceil2num
3385 0 : ipe = ieor(ipe2-1,comm_pid) + 1
3386 0 : if (ipe .gt. num_s) cycle
3387 0 : qsize = onetwo*send_bl(ipe)%Tot_Size
3388 0 : if (qsize .ne. 0) then
3389 0 : i4_win%dest = ipe-1
3390 0 : send_tag = comm_pid + modcam_tagoffset
3391 0 : if (i4_win%dest /= comm_pid) &
3392 0 : call MPI_IRECV ( hs_rcv(ipe), 1, mp_i4, i4_win%dest, send_tag, comm, &
3393 0 : hs_rcvids(ipe), ierr )
3394 : endif
3395 : enddo
3396 : endif
3397 :
3398 0 : do ipe2=1, ceil2num
3399 0 : ipe = ieor(ipe2-1,comm_pid) + 1
3400 0 : if (ipe .gt. num_r) cycle
3401 0 : i4_win%size_r = onetwo*recv_bl(ipe)%Tot_Size
3402 0 : if (i4_win%size_r .ne. 0) then
3403 0 : i4_win%offset_r = offset_r
3404 0 : offset_r = offset_r + i4_win%size_r
3405 0 : i4_win%src = ipe-1
3406 0 : if (onetwo*unitsize >= offset_r-offset_0) then
3407 0 : recv_tag = i4_win%src + modcam_tagoffset
3408 0 : qsize = i4_win%size_r
3409 0 : i4_win%nrecv = i4_win%nrecv + 1
3410 0 : call MPI_IRECV(ga_i4_r(i4_win%offset_r+1), qsize, mp_i4, i4_win%src, &
3411 0 : recv_tag, comm, i4_win%rqest(i4_win%nrecv), ierror)
3412 0 : if (hs_local) then
3413 0 : if (i4_win%src /= comm_pid) &
3414 0 : call MPI_SEND ( hs_snd, 1, mp_i4, i4_win%src, recv_tag, comm, ierror)
3415 : endif
3416 : else
3417 0 : write(iulog,*) "Fatal mp_sendirr_i4: receive window out of space - exiting"
3418 0 : write(iulog,*) 'comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
3419 0 : ipe, unitsize, offset_r, offset_0
3420 0 : stop
3421 : endif
3422 : endif
3423 : enddo
3424 : ! gather data into global send buffer
3425 0 : do ipe2=1, ceil2num
3426 0 : ipe = ieor(ipe2-1,comm_pid) + 1
3427 0 : if (ipe .gt. num_s) cycle
3428 0 : qsize = onetwo*send_bl(ipe)%Tot_Size
3429 0 : if (qsize .ne. 0) then
3430 0 : i4_win%dest = ipe-1
3431 0 : i4_win%offset_s = offset_s
3432 0 : offset_s = offset_s + qsize
3433 0 : if (offset_s-offset_0 .gt. onetwo*unitsize) then
3434 0 : write(iulog,*) "Fatal mp_sendirr_i4: send window out of space - exiting"
3435 0 : write(iulog,*) 'comm_pid ipe unitsize offset_s offset_0 = ', comm_pid, &
3436 0 : ipe, unitsize, offset_s, offset_0
3437 0 : stop
3438 : endif
3439 :
3440 0 : offset_v(1) = i4_win%offset_s
3441 0 : do j = 2, send_bl(ipe)%nparcels
3442 0 : offset_v(j) = offset_v(j-1) + send_bl(ipe)%blocksizes(j-1)
3443 : enddo
3444 :
3445 0 : do j = 1, send_bl(ipe)%nparcels
3446 0 : do i = 1, send_bl(ipe)%blocksizes(j)
3447 0 : ga_i4_s(offset_v(j)+i) = q1in(send_bl(ipe)%displacements(j)+i)
3448 : enddo
3449 : enddo
3450 0 : if (twovar) then
3451 0 : do j = 1, send_bl(ipe)%nparcels
3452 0 : do i = 1, send_bl(ipe)%blocksizes(j)
3453 0 : ga_i4_s(send_bl(ipe)%Tot_Size+offset_v(j)+i) = q2in(send_bl(ipe)%displacements(j)+i)
3454 : enddo
3455 : enddo
3456 : endif
3457 :
3458 : ! nonblocking send
3459 0 : send_tag = comm_pid + modcam_tagoffset
3460 0 : i4_win%nsend = i4_win%nsend + 1
3461 0 : if (hs_local) then
3462 0 : if (i4_win%dest /= comm_pid) &
3463 0 : call MPI_WAIT ( hs_rcvids(ipe), MPI_STATUS_IGNORE, ierr )
3464 0 : if (send_local) then
3465 0 : call MPI_RSEND(ga_i4_s(i4_win%offset_s+1), qsize, mp_i4, i4_win%dest, &
3466 0 : send_tag, comm, ierr)
3467 : else
3468 0 : call MPI_IRSEND(ga_i4_s(i4_win%offset_s+1), qsize, mp_i4, i4_win%dest, &
3469 0 : send_tag, comm, i4_win%sqest(i4_win%nsend), ierr)
3470 : endif
3471 : else
3472 0 : if (send_local) then
3473 0 : call MPI_SEND(ga_i4_s(i4_win%offset_s+1), qsize, mp_i4, i4_win%dest, &
3474 0 : send_tag, comm, ierr)
3475 : else
3476 0 : call MPI_ISEND(ga_i4_s(i4_win%offset_s+1), qsize, mp_i4, i4_win%dest, &
3477 0 : send_tag, comm, i4_win%sqest(i4_win%nsend), ierr)
3478 : endif
3479 : endif
3480 : endif
3481 : enddo
3482 :
3483 : endif ! mod_method
3484 :
3485 0 : if (twovar) i4_win%ncall_s = i4_win%ncall_s + 1
3486 :
3487 : endif ! sw_local
3488 :
3489 : #if defined( MODCM_TIMING )
3490 : call t_stopf('mod_comm communication')
3491 : #endif
3492 :
3493 0 : end subroutine mp_sendirr_i4
3494 : !------------------------------------------------------------------------------
3495 : !
3496 : !------------------------------------------------------------------------------
3497 : !BOP
3498 : ! !ROUTINE: mp_recvirr_i4 --- Finalize communication of contiguous parcels - i4
3499 : !
3500 : ! !INTERFACE:
3501 0 : subroutine mp_recvirr_i4 ( comm, send_bl, recv_bl, q1in, q1out, q2in, q2out, &
3502 : modc )
3503 :
3504 : ! !INPUT PARAMETERS:
3505 : integer, intent(in) :: comm ! communicator
3506 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
3507 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
3508 : integer(i4), intent(in) :: q1in(*) ! input array
3509 : integer(i4), optional, intent(in) :: q2in(*) ! second input array
3510 : integer, optional, intent(in) :: modc(4) ! 1: classical, swap p2p, swap a2a
3511 : ! 2: handshake
3512 : ! 3: send vs isend
3513 : ! 4: max number of outstanding requests
3514 :
3515 : ! !INPUT/OUTPUT PARAMETERS:
3516 : integer(i4), intent(inout) :: q1out(*) ! output array
3517 : integer(i4), optional, intent(inout) :: q2out(*) ! second output array
3518 : !
3519 : ! !DESCRIPTION:
3520 : ! Complete transfer of a generalized region initiated by {\tt mp\_sendirr}.
3521 : ! Communicate a number of contiguous parcels to/from arbitrary set of PEs.
3522 : ! Modc(1): if 0, use original approach of posting all communications in mp_sendirr and
3523 : ! placing wait points here; otherwise don't do anything - mp_swapirr is called from mp_sendirr.
3524 : ! Modc(3): if 1, then use blocking send; otherwise use nonblocking send
3525 : !
3526 : ! !REVISION HISTORY:
3527 : ! 02.08.15 Sawyer Creation
3528 : ! 02.11.06 Mirin Optimizations
3529 : ! 03.03.03 Sawyer Now using packed arrays for MPI2
3530 : ! 04.02.24 Mirin Various mpi2 options
3531 : ! 08.09.18 Mirin Major overhaul, to include approaches from Mirin and Worley
3532 : !
3533 : !EOP
3534 : !------------------------------------------------------------------------------
3535 : !BOC
3536 : integer :: ipe, blocksize, offset_r, mod_method
3537 : integer unitsize, offset_0
3538 : integer Ierr
3539 0 : integer InStats(numpro*MPI_STATUS_SIZE)
3540 0 : integer OutStats(numpro*MPI_STATUS_SIZE)
3541 : integer i, j, num_r, num_s
3542 0 : integer :: offset_v (Max_Nparcels)
3543 : integer ipe2, ceil2num
3544 : integer onetwo
3545 : logical twovar
3546 : integer sw_local, maxreq_local
3547 : logical hs_local, send_local
3548 : logical sw_alltoall
3549 : integer comm_size, comm_pid
3550 :
3551 0 : if (present(modc)) then
3552 0 : sw_local = modc(1)
3553 0 : hs_local = (modc(2) .eq. 1)
3554 0 : send_local = (modc(3) .eq. 1)
3555 0 : maxreq_local = modc(4)
3556 : else
3557 : sw_local = 0
3558 0 : hs_local = .true.
3559 : send_local = .false.
3560 0 : maxreq_local = -1
3561 : endif
3562 :
3563 : ! Do not call mp_swapirr_i4 (hence return) unless mod_method equals 0
3564 0 : mod_method = recv_bl(1)%method
3565 0 : if (mod_method .gt. 0) sw_local = 0
3566 :
3567 : ! Return if swap_irr
3568 0 : if (sw_local .gt. 0) return
3569 :
3570 : #if defined( MODCM_TIMING )
3571 : call t_startf('mod_comm communication')
3572 : #endif
3573 :
3574 0 : onetwo = 1
3575 0 : twovar = .false.
3576 0 : if (present(q2in)) then
3577 0 : onetwo = 2
3578 0 : twovar = .true.
3579 : endif
3580 :
3581 0 : call MPI_COMM_SIZE (comm, comm_size, ierr)
3582 0 : call MPI_COMM_RANK (comm, comm_pid, ierr)
3583 :
3584 0 : ceil2num = ceil2(numpro)
3585 :
3586 : ! num_s = 0 if this processes is not part of the sending decomposition
3587 0 : num_s = size(send_bl)
3588 0 : if (send_bl(1)%Nparcels == -1) then
3589 0 : num_s = 0
3590 : endif
3591 :
3592 : ! num_r = 0 if this processes is not part of the receiving decomposition
3593 0 : num_r = size(recv_bl)
3594 0 : if (recv_bl(1)%Nparcels == -1) then
3595 0 : num_r = 0
3596 : endif
3597 :
3598 0 : mod_method = recv_bl(1)%method
3599 :
3600 0 : i4_win%ncall_r = i4_win%ncall_r + 1
3601 :
3602 0 : if (mod_method .gt. 0) then
3603 :
3604 : ! mpi derived types
3605 0 : if (i4_win%ncall_r .gt. MaxTrf-onetwo+1) then
3606 0 : write(iulog,*) "mp_recvirr_i4: derived type handle count exceeded - exiting"
3607 0 : write(iulog,*) "i4_win%ncall_r MaxTrf = ", i4_win%ncall_r, MaxTrf
3608 0 : stop
3609 : endif
3610 :
3611 0 : if (num_s .gt. 0 .and. (.not. send_local)) then
3612 0 : CALL MPI_WAITALL( comm_size, InHandle(:,i4_win%ncall_r), InStats, Ierr )
3613 0 : if (twovar) then
3614 0 : CALL MPI_WAITALL( comm_size, InHandle(:,i4_win%ncall_r+1), InStats, Ierr )
3615 : endif
3616 : endif
3617 0 : if (num_r .gt. 0) then
3618 0 : CALL MPI_WAITALL( comm_size, OutHandle(:,i4_win%ncall_r), OutStats, Ierr )
3619 0 : if (twovar) then
3620 0 : CALL MPI_WAITALL( comm_size, OutHandle(:,i4_win%ncall_r+1), OutStats, Ierr )
3621 : endif
3622 : endif
3623 :
3624 : else
3625 :
3626 : ! temporary contiguous buffer / global window
3627 :
3628 0 : if (i4_win%ncall_r .gt. max_irr-onetwo+1) then
3629 0 : write(iulog,*) "mp_recvirr_i4: insufficient window storage - exiting"
3630 0 : write(iulog,*) "i4_win%ncall_r max_irr = ", i4_win%ncall_r, max_irr
3631 0 : stop
3632 : endif
3633 0 : unitsize = i4_win%size/max_irr
3634 :
3635 : ! scatter data from global receive buffer to final destination
3636 0 : offset_0 = (i4_win%ncall_r-1)*unitsize
3637 0 : offset_r = offset_0
3638 :
3639 0 : do ipe2=1, ceil2num
3640 0 : ipe = ieor(ipe2-1,comm_pid) + 1
3641 0 : if (ipe .gt. num_r) cycle
3642 0 : i4_win%size_r = onetwo*recv_bl(ipe)%Tot_Size
3643 0 : if (i4_win%size_r .ne. 0) then
3644 0 : i4_win%offset_r = offset_r
3645 0 : offset_r = offset_r + i4_win%size_r
3646 0 : if (offset_r-offset_0 .gt. onetwo*unitsize) then
3647 0 : write(iulog,*) "Fatal mp_recvirr_i4: receive window out of space - exiting"
3648 0 : write(iulog,*) 'comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
3649 0 : ipe, unitsize, offset_r, offset_0
3650 0 : stop
3651 : endif
3652 :
3653 0 : i4_win%nread = i4_win%nread + 1
3654 0 : call MPI_WAIT(i4_win%rqest(i4_win%nread), Status, ierr)
3655 :
3656 0 : offset_v(1) = i4_win%offset_r
3657 0 : do j = 2, recv_bl(ipe)%Nparcels
3658 0 : offset_v(j) = offset_v(j-1) + recv_bl(ipe)%blocksizes(j-1)
3659 : enddo
3660 :
3661 0 : do j = 1, recv_bl(ipe)%Nparcels
3662 0 : do i = 1, recv_bl(ipe)%blocksizes(j)
3663 0 : q1out(recv_bl(ipe)%displacements(j)+i) = ga_i4_r(offset_v(j)+i)
3664 : enddo
3665 : enddo
3666 0 : if (twovar) then
3667 0 : do j = 1, recv_bl(ipe)%Nparcels
3668 0 : do i = 1, recv_bl(ipe)%blocksizes(j)
3669 0 : q2out(recv_bl(ipe)%displacements(j)+i) = ga_i4_r(recv_bl(ipe)%Tot_Size+offset_v(j)+i)
3670 : enddo
3671 : enddo
3672 : endif
3673 :
3674 : endif
3675 : enddo
3676 :
3677 0 : if ((i4_win%ncall_s == i4_win%ncall_r + onetwo - 1) .and. (.not. send_local)) then
3678 0 : call MPI_WAITALL(i4_win%nsend, i4_win%sqest, Stats, ierror)
3679 : endif
3680 :
3681 : endif ! mod_method .gt. 0
3682 :
3683 0 : if (twovar) i4_win%ncall_r = i4_win%ncall_r + 1
3684 :
3685 0 : if (i4_win%ncall_s == i4_win%ncall_r) then
3686 0 : i4_win%nsend = 0
3687 0 : i4_win%nrecv = 0
3688 0 : i4_win%nread = 0
3689 0 : i4_win%ncall_s = 0
3690 0 : i4_win%ncall_r = 0
3691 : endif
3692 :
3693 : #if defined( MODCM_TIMING )
3694 : call t_stopf('mod_comm communication')
3695 : #endif
3696 :
3697 : !EOC
3698 : end subroutine mp_recvirr_i4
3699 : !------------------------------------------------------------------------------
3700 : !
3701 : !------------------------------------------------------------------------------
3702 : !BOP
3703 : ! !ROUTINE: mp_swapirr --- Write r8 contiguous parcels to global array
3704 : ! using XOR swap ordering
3705 : !
3706 : ! !INTERFACE:
3707 0 : subroutine mp_swapirr ( comm, send_bl, recv_bl, a1in, a1out, &
3708 : a2in, a2out, sw_handshake, sw_maxreq, &
3709 : sw_alltoall, sw_send )
3710 :
3711 : ! !INPUT PARAMETERS:
3712 : integer, intent(in) :: comm ! communicator
3713 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
3714 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
3715 : real(r8), intent(in) :: a1in(*) ! local data segment
3716 : real(r8), optional, intent(in) :: a2in(*) ! local data segment
3717 : logical, optional, intent(in) :: sw_handshake ! use flow control and
3718 : ! ready send
3719 : integer, optional, intent(in) :: sw_maxreq ! maximum number of outstanding
3720 : ! MPI requests
3721 : logical, optional, intent(in) :: sw_alltoall ! use mpi_alltoall
3722 : logical, optional, intent(in) :: sw_send ! use mpi_send instead of isend
3723 :
3724 : ! !OUTPUT PARAMETERS:
3725 : real(r8), intent(out) :: a1out(*) ! local output segment
3726 : real(r8), optional, intent(out) :: a2out(*) ! local output segment
3727 : !
3728 : ! !DESCRIPTION:
3729 : ! XOR-ordered version of all-to-all communication
3730 : !
3731 : ! WARNING: mod_comm parameter max_irr might need to be set larger than expected
3732 : ! when swapping two variables; specifically, max_irr must be at least
3733 : ! as large as the incoming r8_win%ncall_s + the number of variables to
3734 : ! be swapped
3735 : !
3736 : ! !REVISION HISTORY:
3737 : ! 08.06.30 Worley original: derived from mp_sendirr, but using
3738 : ! swapm logic and XOR swap order
3739 : ! 08.08.22 Worley removed swapm; reimplemented with native MPI,
3740 : ! added flow control/ready send option and maxreq
3741 : ! throttling, added alltoall option
3742 : ! 09.10.07 Worley eliminated mpi_recv from handshake logic
3743 : !
3744 : ! !BUGS:
3745 : !
3746 : !EOP
3747 : !------------------------------------------------------------------------------
3748 : !BOC
3749 : !
3750 : ! !LOCAL VARIABLES:
3751 : integer :: i, j, p, istep, num_s, num_r
3752 : integer :: comm_pid, comm_size, steps, ierr
3753 : integer :: ipe, offset_s, offset_r, offset_0, unitsize, onetwo
3754 :
3755 0 : integer :: arr_sndlths(0:numpro-1), arr_rcvlths(0:numpro-1)
3756 0 : integer :: sndlths(0:numpro-1), sdispls(0:numpro-1)
3757 0 : integer :: rcvlths(0:numpro-1), rdispls(0:numpro-1)
3758 0 : integer :: swapids(numpro)
3759 0 : integer :: sndids(numpro) ! nonblocking MPI send request ids
3760 0 : integer :: rcvids(numpro) ! nonblocking MPI recv request ids
3761 0 : integer :: hs_snd, hs_rcv(numpro)! handshake variables (send/receive)
3762 0 : integer :: hs_rcvids(numpro) ! nonblocking MPI handshake recv request ids
3763 0 : integer :: InStats(numpro*MPI_STATUS_SIZE)
3764 0 : integer :: OutStats(numpro*MPI_STATUS_SIZE)
3765 :
3766 : integer :: offset_v
3767 :
3768 : integer :: rstep
3769 :
3770 : integer :: maxreq, maxreqh
3771 : logical :: handshake, alltoall, sendd
3772 :
3773 : #if defined( MODCM_TIMING )
3774 : call t_startf('mod_comm communication')
3775 : #endif
3776 :
3777 0 : call MPI_COMM_SIZE (comm, comm_size, ierr)
3778 0 : call MPI_COMM_RANK (comm, comm_pid, ierr)
3779 :
3780 : ! num_s = 0 if this process is not part of the sending decomposition
3781 0 : num_s = size(send_bl)
3782 0 : if (send_bl(1)%Nparcels == -1) then
3783 0 : num_s = 0
3784 : endif
3785 :
3786 : ! num_r = 0 if this process is not part of the receiving decomposition
3787 0 : num_r = size(recv_bl)
3788 0 : if (recv_bl(1)%Nparcels == -1) then
3789 0 : num_r = 0
3790 : endif
3791 :
3792 0 : if ( present(a2in) .and. (.not. present(a2out)) ) then
3793 0 : write(iulog,*) "Fatal mp_swapirr: a2in specified, but a2out missing - exiting"
3794 0 : stop
3795 : endif
3796 :
3797 0 : if ( (.not. present(a2in)) .and. present(a2out)) then
3798 0 : write(iulog,*) "Fatal mp_swapirr: a2out specified, but a2in missing - exiting"
3799 0 : stop
3800 : endif
3801 :
3802 0 : if ( present(sw_handshake) ) then
3803 0 : handshake = sw_handshake
3804 0 : hs_snd = 1
3805 : else
3806 : handshake = .false.
3807 : endif
3808 :
3809 0 : if ( present(sw_alltoall) ) then
3810 0 : alltoall = sw_alltoall
3811 : else
3812 : alltoall = .false.
3813 : endif
3814 :
3815 0 : if ( present(sw_send) ) then
3816 0 : sendd = sw_send
3817 : else
3818 : sendd = .false.
3819 : endif
3820 :
3821 0 : onetwo = 1
3822 0 : if (present(a2in)) onetwo = 2
3823 0 : unitsize = r8_win%size/max_irr
3824 :
3825 : ! advance to unused portion of storage window
3826 0 : r8_win%ncall_s = r8_win%ncall_s + 1
3827 :
3828 0 : if (r8_win%ncall_s .gt. max_irr-onetwo+1) then
3829 0 : write(iulog,*) "mp_swapirr: insufficient window storage - exiting"
3830 0 : write(iulog,*) "r8_win%ncall_s max_irr = ", r8_win%ncall_s, max_irr
3831 0 : stop
3832 : endif
3833 :
3834 : ! calculate send lengths and displacements
3835 0 : offset_0 = (r8_win%ncall_s-1)*unitsize
3836 0 : offset_s = offset_0
3837 0 : sndlths(:) = 0
3838 0 : sdispls(:) = 0
3839 0 : arr_sndlths(:) = 0
3840 0 : do ipe=1, num_s
3841 0 : sndlths(ipe-1) = send_bl(ipe)%Tot_Size
3842 0 : sdispls(ipe-1) = offset_s
3843 0 : if (sndlths(ipe-1) .ne. 0) then
3844 :
3845 : ! pack first array
3846 0 : offset_s = offset_s + sndlths(ipe-1)
3847 0 : if (offset_s-offset_0 .gt. onetwo*unitsize) then
3848 0 : write(iulog,*) "Fatal mp_swapirr: send window out of space - exiting"
3849 0 : write(iulog,*) '1 comm_pid ipe unitsize offset_s offset_0 = ', comm_pid, &
3850 0 : ipe, unitsize, offset_s, offset_0
3851 0 : stop
3852 : endif
3853 :
3854 0 : arr_sndlths(ipe-1) = sndlths(ipe-1)
3855 :
3856 : ! calculate for second array (if it exists)
3857 0 : if ( present(a2in) ) then
3858 :
3859 0 : offset_s = offset_s + sndlths(ipe-1)
3860 0 : if (offset_s-offset_0 .gt. onetwo*unitsize) then
3861 0 : write(iulog,*) "Fatal mp_swapirr: send window out of space - exiting"
3862 0 : write(iulog,*) '2 comm_pid ipe unitsize offset_s offset_0 = ', comm_pid, &
3863 0 : ipe, unitsize, offset_s, offset_0
3864 0 : stop
3865 : endif
3866 :
3867 0 : sndlths(ipe-1) = sndlths(ipe-1) + arr_sndlths(ipe-1)
3868 :
3869 : endif
3870 :
3871 : endif
3872 : enddo
3873 :
3874 : ! calculate receive lengths and displacements
3875 0 : offset_r = offset_0
3876 0 : rcvlths(:) = 0
3877 0 : rdispls(:) = 0
3878 0 : arr_rcvlths(:) = 0
3879 0 : do ipe=1, num_r
3880 0 : rcvlths(ipe-1) = recv_bl(ipe)%Tot_Size
3881 0 : rdispls(ipe-1) = offset_r
3882 0 : if (rcvlths(ipe-1) .ne. 0) then
3883 :
3884 0 : offset_r = offset_r + rcvlths(ipe-1)
3885 0 : if (onetwo*unitsize < offset_r-offset_0) then
3886 0 : write(iulog,*) "Fatal mp_swapirr: receive window out of space - exiting"
3887 0 : write(iulog,*) '1 comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
3888 0 : ipe, unitsize, offset_r, offset_0
3889 0 : stop
3890 : endif
3891 :
3892 0 : arr_rcvlths(ipe-1) = rcvlths(ipe-1)
3893 :
3894 : ! compute for second array (if it exists)
3895 0 : if ( present(a2out) ) then
3896 :
3897 0 : offset_r = offset_r + rcvlths(ipe-1)
3898 0 : if (onetwo*unitsize < offset_r-offset_0) then
3899 0 : write(iulog,*) "Fatal mp_swapirr: receive window out of space - exiting"
3900 0 : write(iulog,*) '2 comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
3901 0 : ipe, unitsize, offset_r, offset_0
3902 0 : stop
3903 : endif
3904 :
3905 0 : rcvlths(ipe-1) = rcvlths(ipe-1) + arr_rcvlths(ipe-1)
3906 :
3907 : endif
3908 :
3909 : endif
3910 : enddo
3911 :
3912 : ! Calculate swap partners and number of steps in point-to-point
3913 : ! implementations of alltoall algorithm.
3914 0 : steps = 0
3915 0 : do ipe=1,ceil2(comm_size)-1
3916 0 : p = pair(comm_size,ipe,comm_pid)
3917 0 : if (p >= 0) then
3918 0 : if (sndlths(p) > 0 .or. rcvlths(p) > 0) then
3919 0 : steps = steps + 1
3920 0 : swapids(steps) = p
3921 : end if
3922 : end if
3923 : end do
3924 :
3925 0 : if (.not. alltoall) then
3926 :
3927 0 : sndids(1:steps) = MPI_REQUEST_NULL
3928 0 : rcvids(1:steps) = MPI_REQUEST_NULL
3929 :
3930 0 : if (steps .eq. 0) then
3931 0 : maxreq = 0
3932 0 : maxreqh = 0
3933 0 : elseif (steps .eq. 1) then
3934 0 : maxreq = 1
3935 0 : maxreqh = 1
3936 : else
3937 0 : if ( present(sw_maxreq) ) then
3938 0 : if ((sw_maxreq .le. steps) .and. (sw_maxreq .ge. 0)) then
3939 0 : maxreq = sw_maxreq
3940 0 : if (maxreq > 1) then
3941 0 : maxreqh = maxreq/2
3942 : else
3943 0 : maxreq = 2
3944 0 : maxreqh = 1
3945 : endif
3946 : else
3947 0 : maxreq = steps
3948 0 : maxreqh = steps
3949 : endif
3950 : else
3951 0 : maxreq = steps
3952 0 : maxreqh = steps
3953 : endif
3954 : endif
3955 :
3956 : ! Post initial handshake receive requests
3957 0 : if (handshake) then
3958 0 : do istep=1,maxreq
3959 0 : p = swapids(istep)
3960 0 : if (sndlths(p) > 0) then
3961 : call mpi_irecv ( hs_rcv(istep), 1, mp_i4, p, comm_pid, comm, &
3962 0 : hs_rcvids(istep), ierr )
3963 : endif
3964 : enddo
3965 : endif
3966 :
3967 : ! Post initial receive requests
3968 0 : do istep=1,maxreq
3969 0 : p = swapids(istep)
3970 0 : if (rcvlths(p) > 0) then
3971 0 : offset_r = rdispls(p)+1
3972 0 : call mpi_irecv ( ga_r8_r(offset_r), rcvlths(p), mp_r8, &
3973 0 : p, p, comm, rcvids(istep), ierr )
3974 0 : if (handshake) then
3975 : call mpi_send( hs_snd, 1, mp_i4, p, p, comm, &
3976 0 : ierr )
3977 : endif
3978 : endif
3979 : enddo
3980 : rstep = maxreq
3981 : !
3982 : endif
3983 :
3984 : ! gather data into global send buffer
3985 0 : do istep=1,steps
3986 0 : p = swapids(istep)
3987 :
3988 0 : if (sndlths(p) .ne. 0) then
3989 0 : offset_v = sdispls(p)
3990 0 : do j = 1, send_bl(p+1)%nparcels
3991 0 : do i = 1, send_bl(p+1)%blocksizes(j)
3992 0 : ga_r8_s(offset_v+i) = a1in(send_bl(p+1)%displacements(j)+i)
3993 : enddo
3994 0 : offset_v = offset_v + send_bl(p+1)%blocksizes(j)
3995 : enddo
3996 :
3997 : ! pack second array (if it exists)
3998 0 : if ( present(a2in) ) then
3999 0 : offset_v = sdispls(p) + arr_sndlths(p)
4000 0 : do j = 1, send_bl(p+1)%nparcels
4001 0 : do i = 1, send_bl(p+1)%blocksizes(j)
4002 0 : ga_r8_s(offset_v+i) = a2in(send_bl(p+1)%displacements(j)+i)
4003 : enddo
4004 0 : offset_v = offset_v + send_bl(p+1)%blocksizes(j)
4005 : enddo
4006 : endif
4007 :
4008 : endif
4009 :
4010 0 : if (.not. alltoall) then
4011 :
4012 : ! Submit new i(r)send request
4013 0 : offset_s = sdispls(p)+1
4014 0 : if (sndlths(p) > 0) then
4015 0 : if (handshake) then
4016 : call mpi_wait( hs_rcvids(istep), MPI_STATUS_IGNORE, ierr )
4017 0 : if (sendd) then
4018 0 : call mpi_rsend( ga_r8_s(offset_s), sndlths(p), mp_r8, &
4019 0 : p, comm_pid, comm, ierr )
4020 : else
4021 0 : call mpi_irsend( ga_r8_s(offset_s), sndlths(p), mp_r8, &
4022 0 : p, comm_pid, comm, sndids(istep), ierr )
4023 : endif
4024 : else
4025 0 : if (sendd) then
4026 0 : call mpi_send ( ga_r8_s(offset_s), sndlths(p), mp_r8, &
4027 0 : p, comm_pid, comm, ierr )
4028 : else
4029 0 : call mpi_isend ( ga_r8_s(offset_s), sndlths(p), mp_r8, &
4030 0 : p, comm_pid, comm, sndids(istep), ierr )
4031 : endif
4032 : endif
4033 : endif
4034 :
4035 0 : if (istep > maxreqh) then
4036 : ! Wait for oldest irecv request to complete
4037 0 : call mpi_wait( rcvids(istep-maxreqh), OutStats, ierr )
4038 :
4039 0 : if (rstep < steps) then
4040 0 : rstep = rstep + 1
4041 0 : p = swapids(rstep)
4042 :
4043 : ! Submit a new handshake irecv request
4044 0 : if (handshake) then
4045 0 : if (sndlths(p) > 0) then
4046 : call mpi_irecv( hs_rcv(rstep), 1, mp_i4, p, comm_pid, comm, &
4047 0 : hs_rcvids(rstep), ierr )
4048 : endif
4049 : endif
4050 :
4051 : ! Submit a new irecv request
4052 0 : if (rcvlths(p) > 0) then
4053 0 : offset_r = rdispls(p)+1
4054 0 : call mpi_irecv( ga_r8_r(offset_r), rcvlths(p), mp_r8, &
4055 0 : p, p, comm, rcvids(rstep), ierr )
4056 0 : if (handshake) then
4057 : call mpi_send ( hs_snd, 1, mp_i4, p, p, comm, &
4058 0 : ierr )
4059 : endif
4060 : endif
4061 : endif
4062 :
4063 : ! Wait for outstanding i(r)send request to complete
4064 0 : if (.not. sendd) then
4065 0 : call mpi_wait( sndids(istep-maxreqh), InStats, ierr )
4066 : endif
4067 : endif
4068 : !
4069 : endif
4070 : !
4071 : enddo
4072 :
4073 : ! local copy to send buffer
4074 0 : if (sndlths(comm_pid) .ne. 0) then
4075 :
4076 0 : offset_v = sdispls(comm_pid)
4077 0 : do j = 1, send_bl(comm_pid+1)%nparcels
4078 0 : do i = 1, send_bl(comm_pid+1)%blocksizes(j)
4079 0 : ga_r8_s(offset_v+i) = a1in(send_bl(comm_pid+1)%displacements(j)+i)
4080 : enddo
4081 0 : offset_v = offset_v + send_bl(comm_pid+1)%blocksizes(j)
4082 : enddo
4083 :
4084 : ! pack second array (if it exists)
4085 0 : if ( present(a2in) ) then
4086 0 : offset_v = sdispls(comm_pid) + arr_sndlths(comm_pid)
4087 0 : do j = 1, send_bl(comm_pid+1)%nparcels
4088 0 : do i = 1, send_bl(comm_pid+1)%blocksizes(j)
4089 0 : ga_r8_s(offset_v+i) = a2in(send_bl(comm_pid+1)%displacements(j)+i)
4090 : enddo
4091 0 : offset_v = offset_v + send_bl(comm_pid+1)%blocksizes(j)
4092 : enddo
4093 : endif
4094 :
4095 0 : if (.not. alltoall) then
4096 0 : ga_r8_r(rdispls(comm_pid)+1:rdispls(comm_pid)+rcvlths(comm_pid)) = &
4097 0 : ga_r8_s(sdispls(comm_pid)+1:sdispls(comm_pid)+sndlths(comm_pid))
4098 : endif
4099 :
4100 : endif
4101 :
4102 0 : if (alltoall) then
4103 : call mpi_alltoallv (ga_r8_s, sndlths, sdispls, mp_r8, &
4104 : ga_r8_r, rcvlths, rdispls, mp_r8, &
4105 0 : comm, ierror)
4106 : endif
4107 :
4108 : ! local copy from receive buffer
4109 0 : if (rcvlths(comm_pid) .ne. 0) then
4110 :
4111 0 : offset_v = rdispls(comm_pid)
4112 0 : do j = 1, recv_bl(comm_pid+1)%Nparcels
4113 0 : do i = 1, recv_bl(comm_pid+1)%blocksizes(j)
4114 0 : a1out(recv_bl(comm_pid+1)%displacements(j)+i) = ga_r8_r(offset_v+i)
4115 : enddo
4116 0 : offset_v = offset_v + recv_bl(comm_pid+1)%blocksizes(j)
4117 : enddo
4118 :
4119 : ! scatter data for second array (if it exists)
4120 0 : if ( present(a2out) ) then
4121 0 : offset_v = rdispls(comm_pid) + arr_rcvlths(comm_pid)
4122 0 : do j = 1, recv_bl(comm_pid+1)%Nparcels
4123 0 : do i = 1, recv_bl(comm_pid+1)%blocksizes(j)
4124 0 : a2out(recv_bl(comm_pid+1)%displacements(j)+i) = ga_r8_r(offset_v+i)
4125 : enddo
4126 0 : offset_v = offset_v + recv_bl(comm_pid+1)%blocksizes(j)
4127 : enddo
4128 : endif
4129 :
4130 : endif
4131 :
4132 : ! scatter data from global receive buffer to final destination
4133 0 : do istep=1,steps
4134 0 : p = swapids(istep)
4135 :
4136 0 : if (.not. alltoall) then
4137 0 : if (istep > steps-maxreqh) then
4138 : call mpi_wait( rcvids(istep), OutStats, ierr )
4139 : endif
4140 : endif
4141 :
4142 0 : if (rcvlths(p) .ne. 0) then
4143 :
4144 0 : offset_v = rdispls(p)
4145 0 : do j = 1, recv_bl(p+1)%Nparcels
4146 0 : do i = 1, recv_bl(p+1)%blocksizes(j)
4147 0 : a1out(recv_bl(p+1)%displacements(j)+i) = ga_r8_r(offset_v+i)
4148 : enddo
4149 0 : offset_v = offset_v + recv_bl(p+1)%blocksizes(j)
4150 : enddo
4151 :
4152 : ! scatter data for second array (if it exists)
4153 0 : if ( present(a2out) ) then
4154 :
4155 0 : offset_v = rdispls(p) + arr_rcvlths(p)
4156 0 : do j = 1, recv_bl(p+1)%Nparcels
4157 0 : do i = 1, recv_bl(p+1)%blocksizes(j)
4158 0 : a2out(recv_bl(p+1)%displacements(j)+i) = ga_r8_r(offset_v+i)
4159 : enddo
4160 0 : offset_v = offset_v + recv_bl(p+1)%blocksizes(j)
4161 : enddo
4162 :
4163 : endif
4164 :
4165 : endif
4166 : enddo
4167 :
4168 : ! Wait for any outstanding send requests to complete.
4169 0 : if (.not. alltoall .and. .not. sendd) then
4170 0 : call mpi_waitall( maxreqh, sndids(steps-maxreqh+1), InStats, ierr )
4171 : endif
4172 :
4173 : ! clean-up
4174 : ! make used portion of storage window available for reuse
4175 0 : r8_win%ncall_s = r8_win%ncall_s - 1
4176 :
4177 : #if defined( MODCM_TIMING )
4178 : call t_stopf('mod_comm communication')
4179 : #endif
4180 :
4181 : !EOC
4182 0 : end subroutine mp_swapirr
4183 : !------------------------------------------------------------------------------
4184 : !
4185 : !------------------------------------------------------------------------------
4186 : !BOP
4187 : ! !ROUTINE: mp_swapirr_i4 --- Write i4 contiguous parcels to global array
4188 : ! using XOR swap ordering
4189 : !
4190 : ! !INTERFACE:
4191 0 : subroutine mp_swapirr_i4 ( comm, send_bl, recv_bl, a1in, a1out, &
4192 : a2in, a2out, sw_handshake, sw_maxreq, &
4193 : sw_alltoall, sw_send )
4194 :
4195 : ! !INPUT PARAMETERS:
4196 : integer, intent(in) :: comm ! communicator
4197 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
4198 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
4199 : integer(i4), intent(in) :: a1in(*) ! input array
4200 : integer(i4), optional, intent(in) :: a2in(*) ! second input array
4201 : logical, optional, intent(in) :: sw_handshake ! use flow control and
4202 : ! ready send
4203 : integer, optional, intent(in) :: sw_maxreq ! maximum number of outstanding
4204 : ! MPI requests
4205 : logical, optional, intent(in) :: sw_alltoall ! use mpi_alltoall
4206 : logical, optional, intent(in) :: sw_send ! use mpi_send instead of isend
4207 :
4208 : ! !OUTPUT PARAMETERS:
4209 : integer(i4), intent(out) :: a1out(*) ! output array
4210 : integer(i4), optional, intent(out) :: a2out(*) ! second output array
4211 : !
4212 : ! !DESCRIPTION:
4213 : ! XOR-ordered version of all-to-all communication
4214 : !
4215 : ! WARNING: mod_comm parameter max_irr might need to be set larger than expected
4216 : ! when swapping two variables; specifically, max_irr must be at least
4217 : ! as large as the incoming i4_win%ncall_s + the number of variables to
4218 : ! be swapped
4219 : !
4220 : ! !REVISION HISTORY:
4221 : ! 08.06.30 Worley original: derived from mp_sendirr, but using
4222 : ! swapm logic and XOR swap order
4223 : ! 08.08.22 Worley removed swapm; reimplemented with native MPI,
4224 : ! added flow control/ready send option and maxreq
4225 : ! throttling, added alltoall option
4226 : ! 09.10.07 Worley eliminated mpi_recv from handshake logic
4227 : !
4228 : ! !BUGS:
4229 : !
4230 : !EOP
4231 : !------------------------------------------------------------------------------
4232 : !BOC
4233 : !
4234 : ! !LOCAL VARIABLES:
4235 : integer :: i, j, p, istep, num_s, num_r
4236 : integer :: comm_pid, comm_size, steps, ierr
4237 : integer :: ipe, offset_s, offset_r, offset_0, unitsize, onetwo
4238 :
4239 0 : integer :: arr_sndlths(0:numpro-1), arr_rcvlths(0:numpro-1)
4240 0 : integer :: sndlths(0:numpro-1), sdispls(0:numpro-1)
4241 0 : integer :: rcvlths(0:numpro-1), rdispls(0:numpro-1)
4242 0 : integer :: swapids(numpro)
4243 0 : integer :: sndids(numpro) ! nonblocking MPI send request ids
4244 0 : integer :: rcvids(numpro) ! nonblocking MPI recv request ids
4245 0 : integer :: hs_snd, hs_rcv(numpro)! handshake variables (send/receive)
4246 0 : integer :: hs_rcvids(numpro) ! nonblocking MPI handshake recv request ids
4247 0 : integer :: InStats(numpro*MPI_STATUS_SIZE)
4248 0 : integer :: OutStats(numpro*MPI_STATUS_SIZE)
4249 :
4250 : integer :: offset_v
4251 :
4252 : integer :: rstep
4253 :
4254 : integer :: maxreq, maxreqh
4255 : logical :: handshake, alltoall, sendd
4256 :
4257 : #if defined( MODCM_TIMING )
4258 : call t_startf('mod_comm communication')
4259 : #endif
4260 :
4261 0 : call MPI_COMM_SIZE (comm, comm_size, ierr)
4262 0 : call MPI_COMM_RANK (comm, comm_pid, ierr)
4263 :
4264 : ! num_s = 0 if this process is not part of the sending decomposition
4265 0 : num_s = size(send_bl)
4266 0 : if (send_bl(1)%Nparcels == -1) then
4267 0 : num_s = 0
4268 : endif
4269 :
4270 : ! num_r = 0 if this process is not part of the receiving decomposition
4271 0 : num_r = size(recv_bl)
4272 0 : if (recv_bl(1)%Nparcels == -1) then
4273 0 : num_r = 0
4274 : endif
4275 :
4276 0 : if ( present(a2in) .and. (.not. present(a2out)) ) then
4277 0 : write(iulog,*) "Fatal mp_swapirr_i4: a2in specified, but a2out missing - exiting"
4278 0 : stop
4279 : endif
4280 :
4281 0 : if ( (.not. present(a2in)) .and. present(a2out)) then
4282 0 : write(iulog,*) "Fatal mp_swapirr_i4: a2out specified, but a2in missing - exiting"
4283 0 : stop
4284 : endif
4285 :
4286 0 : if ( present(sw_handshake) ) then
4287 0 : handshake = sw_handshake
4288 0 : hs_snd = 1
4289 : else
4290 : handshake = .false.
4291 : endif
4292 :
4293 0 : if ( present(sw_alltoall) ) then
4294 0 : alltoall = sw_alltoall
4295 : else
4296 : alltoall = .false.
4297 : endif
4298 :
4299 0 : if ( present(sw_send) ) then
4300 0 : sendd = sw_send
4301 : else
4302 : sendd = .false.
4303 : endif
4304 :
4305 0 : onetwo = 1
4306 0 : if (present(a2in)) onetwo = 2
4307 0 : unitsize = i4_win%size/max_irr
4308 :
4309 : ! advance to unused portion of storage window
4310 0 : i4_win%ncall_s = i4_win%ncall_s + 1
4311 :
4312 0 : if (i4_win%ncall_s .gt. max_irr-onetwo+1) then
4313 0 : write(iulog,*) "mp_swapirr_i4: insufficient window storage - exiting"
4314 0 : write(iulog,*) "i4_win%ncall_s max_irr = ", i4_win%ncall_s, max_irr
4315 0 : stop
4316 : endif
4317 :
4318 : ! calculate send lengths and displacements
4319 0 : offset_0 = (i4_win%ncall_s-1)*unitsize
4320 0 : offset_s = offset_0
4321 0 : sndlths(:) = 0
4322 0 : sdispls(:) = 0
4323 0 : arr_sndlths(:) = 0
4324 0 : do ipe=1, num_s
4325 0 : sndlths(ipe-1) = send_bl(ipe)%Tot_Size
4326 0 : sdispls(ipe-1) = offset_s
4327 0 : if (sndlths(ipe-1) .ne. 0) then
4328 :
4329 : ! pack first array
4330 0 : offset_s = offset_s + sndlths(ipe-1)
4331 0 : if (offset_s-offset_0 .gt. onetwo*unitsize) then
4332 0 : write(iulog,*) "Fatal mp_swapirr_i4: send window out of space - exiting"
4333 0 : write(iulog,*) '1 comm_pid ipe unitsize offset_s offset_0 = ', comm_pid, &
4334 0 : ipe, unitsize, offset_s, offset_0
4335 0 : stop
4336 : endif
4337 :
4338 0 : arr_sndlths(ipe-1) = sndlths(ipe-1)
4339 :
4340 : ! calculate for second array (if it exists)
4341 0 : if ( present(a2in) ) then
4342 :
4343 0 : offset_s = offset_s + sndlths(ipe-1)
4344 0 : if (offset_s-offset_0 .gt. onetwo*unitsize) then
4345 0 : write(iulog,*) "Fatal mp_swapirr_i4: send window out of space - exiting"
4346 0 : write(iulog,*) '2 comm_pid ipe unitsize offset_s offset_0 = ', comm_pid, &
4347 0 : ipe, unitsize, offset_s, offset_0
4348 0 : stop
4349 : endif
4350 :
4351 0 : sndlths(ipe-1) = sndlths(ipe-1) + arr_sndlths(ipe-1)
4352 :
4353 : endif
4354 :
4355 : endif
4356 : enddo
4357 :
4358 : ! calculate receive lengths and displacements
4359 0 : offset_r = offset_0
4360 0 : rcvlths(:) = 0
4361 0 : rdispls(:) = 0
4362 0 : arr_rcvlths(:) = 0
4363 0 : do ipe=1, num_r
4364 0 : rcvlths(ipe-1) = recv_bl(ipe)%Tot_Size
4365 0 : rdispls(ipe-1) = offset_r
4366 0 : if (rcvlths(ipe-1) .ne. 0) then
4367 :
4368 0 : offset_r = offset_r + rcvlths(ipe-1)
4369 0 : if (onetwo*unitsize < offset_r-offset_0) then
4370 0 : write(iulog,*) "Fatal mp_swapirr_i4: receive window out of space - exiting"
4371 0 : write(iulog,*) '1 comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
4372 0 : ipe, unitsize, offset_r, offset_0
4373 0 : stop
4374 : endif
4375 :
4376 0 : arr_rcvlths(ipe-1) = rcvlths(ipe-1)
4377 :
4378 : ! compute for second array (if it exists)
4379 0 : if ( present(a2out) ) then
4380 :
4381 0 : offset_r = offset_r + rcvlths(ipe-1)
4382 0 : if (onetwo*unitsize < offset_r-offset_0) then
4383 0 : write(iulog,*) "Fatal mp_swapirr_i4: receive window out of space - exiting"
4384 0 : write(iulog,*) '2 comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
4385 0 : ipe, unitsize, offset_r, offset_0
4386 0 : stop
4387 : endif
4388 :
4389 0 : rcvlths(ipe-1) = rcvlths(ipe-1) + arr_rcvlths(ipe-1)
4390 :
4391 : endif
4392 :
4393 : endif
4394 : enddo
4395 :
4396 : ! Calculate swap partners and number of steps in point-to-point
4397 : ! implementations of alltoall algorithm.
4398 0 : steps = 0
4399 0 : do ipe=1,ceil2(comm_size)-1
4400 0 : p = pair(comm_size,ipe,comm_pid)
4401 0 : if (p >= 0) then
4402 0 : if (sndlths(p) > 0 .or. rcvlths(p) > 0) then
4403 0 : steps = steps + 1
4404 0 : swapids(steps) = p
4405 : end if
4406 : end if
4407 : end do
4408 :
4409 0 : if (.not. alltoall) then
4410 :
4411 0 : sndids(1:steps) = MPI_REQUEST_NULL
4412 0 : rcvids(1:steps) = MPI_REQUEST_NULL
4413 :
4414 0 : if (steps .eq. 0) then
4415 0 : maxreq = 0
4416 0 : maxreqh = 0
4417 0 : elseif (steps .eq. 1) then
4418 0 : maxreq = 1
4419 0 : maxreqh = 1
4420 : else
4421 0 : if ( present(sw_maxreq) ) then
4422 0 : if ((sw_maxreq .le. steps) .and. (sw_maxreq .ge. 0)) then
4423 0 : maxreq = sw_maxreq
4424 0 : if (maxreq > 1) then
4425 0 : maxreqh = maxreq/2
4426 : else
4427 0 : maxreq = 2
4428 0 : maxreqh = 1
4429 : endif
4430 : else
4431 0 : maxreq = steps
4432 0 : maxreqh = steps
4433 : endif
4434 : else
4435 0 : maxreq = steps
4436 0 : maxreqh = steps
4437 : endif
4438 : endif
4439 :
4440 : ! Post initial handshake receive requests
4441 0 : if (handshake) then
4442 0 : do istep=1,maxreq
4443 0 : p = swapids(istep)
4444 0 : if (sndlths(p) > 0) then
4445 : call mpi_irecv ( hs_rcv(istep), 1, mp_i4, p, comm_pid, comm, &
4446 0 : hs_rcvids(istep), ierr )
4447 : endif
4448 : enddo
4449 : endif
4450 :
4451 : ! Post initial receive requests
4452 0 : do istep=1,maxreq
4453 0 : p = swapids(istep)
4454 0 : if (rcvlths(p) > 0) then
4455 0 : offset_r = rdispls(p)+1
4456 0 : call mpi_irecv ( ga_i4_r(offset_r), rcvlths(p), mp_i4, &
4457 0 : p, p, comm, rcvids(istep), ierr )
4458 0 : if (handshake) then
4459 : call mpi_send( hs_snd, 1, mp_i4, p, p, comm, &
4460 0 : ierr )
4461 : endif
4462 : endif
4463 : enddo
4464 : rstep = maxreq
4465 : !
4466 : endif
4467 :
4468 : ! gather data into global send buffer
4469 0 : do istep=1,steps
4470 0 : p = swapids(istep)
4471 :
4472 0 : if (sndlths(p) .ne. 0) then
4473 0 : offset_v = sdispls(p)
4474 0 : do j = 1, send_bl(p+1)%nparcels
4475 0 : do i = 1, send_bl(p+1)%blocksizes(j)
4476 0 : ga_i4_s(offset_v+i) = a1in(send_bl(p+1)%displacements(j)+i)
4477 : enddo
4478 0 : offset_v = offset_v + send_bl(p+1)%blocksizes(j)
4479 : enddo
4480 :
4481 : ! pack second array (if it exists)
4482 0 : if ( present(a2in) ) then
4483 0 : offset_v = sdispls(p) + arr_sndlths(p)
4484 0 : do j = 1, send_bl(p+1)%nparcels
4485 0 : do i = 1, send_bl(p+1)%blocksizes(j)
4486 0 : ga_i4_s(offset_v+i) = a2in(send_bl(p+1)%displacements(j)+i)
4487 : enddo
4488 0 : offset_v = offset_v + send_bl(p+1)%blocksizes(j)
4489 : enddo
4490 : endif
4491 :
4492 : endif
4493 :
4494 0 : if (.not. alltoall) then
4495 :
4496 : ! Submit new i(r)send request
4497 0 : offset_s = sdispls(p)+1
4498 0 : if (sndlths(p) > 0) then
4499 0 : if (handshake) then
4500 : call mpi_wait( hs_rcvids(istep), MPI_STATUS_IGNORE, ierr )
4501 0 : if (sendd) then
4502 0 : call mpi_rsend( ga_i4_s(offset_s), sndlths(p), mp_i4, &
4503 0 : p, comm_pid, comm, ierr )
4504 : else
4505 0 : call mpi_irsend( ga_i4_s(offset_s), sndlths(p), mp_i4, &
4506 0 : p, comm_pid, comm, sndids(istep), ierr )
4507 : endif
4508 : else
4509 0 : if (sendd) then
4510 0 : call mpi_send ( ga_i4_s(offset_s), sndlths(p), mp_i4, &
4511 0 : p, comm_pid, comm, ierr )
4512 : else
4513 0 : call mpi_isend ( ga_i4_s(offset_s), sndlths(p), mp_i4, &
4514 0 : p, comm_pid, comm, sndids(istep), ierr )
4515 : endif
4516 : endif
4517 : endif
4518 :
4519 0 : if (istep > maxreqh) then
4520 : ! Wait for oldest irecv request to complete
4521 0 : call mpi_wait( rcvids(istep-maxreqh), OutStats, ierr )
4522 :
4523 0 : if (rstep < steps) then
4524 0 : rstep = rstep + 1
4525 0 : p = swapids(rstep)
4526 :
4527 : ! Submit a new handshake irecv request
4528 0 : if (handshake) then
4529 0 : if (sndlths(p) > 0) then
4530 : call mpi_irecv( hs_rcv(rstep), 1, mp_i4, p, comm_pid, comm, &
4531 0 : hs_rcvids(rstep), ierr )
4532 : endif
4533 : endif
4534 :
4535 : ! Submit a new irecv request
4536 0 : if (rcvlths(p) > 0) then
4537 0 : offset_r = rdispls(p)+1
4538 0 : call mpi_irecv( ga_i4_r(offset_r), rcvlths(p), mp_i4, &
4539 0 : p, p, comm, rcvids(rstep), ierr )
4540 0 : if (handshake) then
4541 : call mpi_send ( hs_snd, 1, mp_i4, p, p, comm, &
4542 0 : ierr )
4543 : endif
4544 : endif
4545 : endif
4546 :
4547 : ! Wait for outstanding i(r)send request to complete
4548 0 : if (.not. sendd) then
4549 0 : call mpi_wait( sndids(istep-maxreqh), InStats, ierr )
4550 : endif
4551 : endif
4552 : !
4553 : endif
4554 : !
4555 : enddo
4556 :
4557 : ! local copy to send buffer
4558 0 : if (sndlths(comm_pid) .ne. 0) then
4559 :
4560 0 : offset_v = sdispls(comm_pid)
4561 0 : do j = 1, send_bl(comm_pid+1)%nparcels
4562 0 : do i = 1, send_bl(comm_pid+1)%blocksizes(j)
4563 0 : ga_i4_s(offset_v+i) = a1in(send_bl(comm_pid+1)%displacements(j)+i)
4564 : enddo
4565 0 : offset_v = offset_v + send_bl(comm_pid+1)%blocksizes(j)
4566 : enddo
4567 :
4568 : ! pack second array (if it exists)
4569 0 : if ( present(a2in) ) then
4570 0 : offset_v = sdispls(comm_pid) + arr_sndlths(comm_pid)
4571 0 : do j = 1, send_bl(comm_pid+1)%nparcels
4572 0 : do i = 1, send_bl(comm_pid+1)%blocksizes(j)
4573 0 : ga_i4_s(offset_v+i) = a2in(send_bl(comm_pid+1)%displacements(j)+i)
4574 : enddo
4575 0 : offset_v = offset_v + send_bl(comm_pid+1)%blocksizes(j)
4576 : enddo
4577 : endif
4578 :
4579 0 : if (.not. alltoall) then
4580 0 : ga_i4_r(rdispls(comm_pid)+1:rdispls(comm_pid)+rcvlths(comm_pid)) = &
4581 0 : ga_i4_s(sdispls(comm_pid)+1:sdispls(comm_pid)+sndlths(comm_pid))
4582 : endif
4583 :
4584 : endif
4585 :
4586 0 : if (alltoall) then
4587 : call mpi_alltoallv (ga_i4_s, sndlths, sdispls, mp_i4, &
4588 : ga_i4_r, rcvlths, rdispls, mp_i4, &
4589 0 : comm, ierror)
4590 : endif
4591 :
4592 : ! local copy from receive buffer
4593 0 : if (rcvlths(comm_pid) .ne. 0) then
4594 :
4595 0 : offset_v = rdispls(comm_pid)
4596 0 : do j = 1, recv_bl(comm_pid+1)%Nparcels
4597 0 : do i = 1, recv_bl(comm_pid+1)%blocksizes(j)
4598 0 : a1out(recv_bl(comm_pid+1)%displacements(j)+i) = ga_i4_r(offset_v+i)
4599 : enddo
4600 0 : offset_v = offset_v + recv_bl(comm_pid+1)%blocksizes(j)
4601 : enddo
4602 :
4603 : ! scatter data for second array (if it exists)
4604 0 : if ( present(a2out) ) then
4605 0 : offset_v = rdispls(comm_pid) + arr_rcvlths(comm_pid)
4606 0 : do j = 1, recv_bl(comm_pid+1)%Nparcels
4607 0 : do i = 1, recv_bl(comm_pid+1)%blocksizes(j)
4608 0 : a2out(recv_bl(comm_pid+1)%displacements(j)+i) = ga_i4_r(offset_v+i)
4609 : enddo
4610 0 : offset_v = offset_v + recv_bl(comm_pid+1)%blocksizes(j)
4611 : enddo
4612 : endif
4613 :
4614 : endif
4615 :
4616 : ! scatter data from global receive buffer to final destination
4617 0 : do istep=1,steps
4618 0 : p = swapids(istep)
4619 :
4620 0 : if (.not. alltoall) then
4621 0 : if (istep > steps-maxreqh) then
4622 : call mpi_wait( rcvids(istep), OutStats, ierr )
4623 : endif
4624 : endif
4625 :
4626 0 : if (rcvlths(p) .ne. 0) then
4627 :
4628 0 : offset_v = rdispls(p)
4629 0 : do j = 1, recv_bl(p+1)%Nparcels
4630 0 : do i = 1, recv_bl(p+1)%blocksizes(j)
4631 0 : a1out(recv_bl(p+1)%displacements(j)+i) = ga_i4_r(offset_v+i)
4632 : enddo
4633 0 : offset_v = offset_v + recv_bl(p+1)%blocksizes(j)
4634 : enddo
4635 :
4636 : ! scatter data for second array (if it exists)
4637 0 : if ( present(a2out) ) then
4638 :
4639 0 : offset_v = rdispls(p) + arr_rcvlths(p)
4640 0 : do j = 1, recv_bl(p+1)%Nparcels
4641 0 : do i = 1, recv_bl(p+1)%blocksizes(j)
4642 0 : a2out(recv_bl(p+1)%displacements(j)+i) = ga_i4_r(offset_v+i)
4643 : enddo
4644 0 : offset_v = offset_v + recv_bl(p+1)%blocksizes(j)
4645 : enddo
4646 :
4647 : endif
4648 :
4649 : endif
4650 : enddo
4651 :
4652 : ! Wait for any outstanding send requests to complete.
4653 0 : if (.not. alltoall .and. .not. sendd) then
4654 0 : call mpi_waitall( maxreqh, sndids(steps-maxreqh+1), InStats, ierr )
4655 : endif
4656 :
4657 : ! clean-up
4658 : ! make used portion of storage window available for reuse
4659 0 : i4_win%ncall_s = i4_win%ncall_s - 1
4660 :
4661 : #if defined( MODCM_TIMING )
4662 : call t_stopf('mod_comm communication')
4663 : #endif
4664 :
4665 : !EOC
4666 0 : end subroutine mp_swapirr_i4
4667 : !------------------------------------------------------------------------------
4668 : !
4669 : !------------------------------------------------------------------------------
4670 : !BOP
4671 : ! !ROUTINE: pair
4672 : !
4673 : ! !INTERFACE:
4674 0 : integer function pair(np,p,k)
4675 : !
4676 : ! !INPUT PARAMETERS:
4677 : integer :: np
4678 : integer :: p
4679 : integer :: k
4680 : ! !DESCRIPTION:
4681 : !
4682 : ! Bitwise XOR of arguments p and k, if less than upper bound np
4683 : !
4684 : ! !REVISION HISTORY:
4685 : ! 2008.08.21 Worley Imported from spmdutils
4686 : !
4687 : !EOP
4688 : !------------------------------------------------------------------------------
4689 : !BOC
4690 : !
4691 : ! !LOCAL VARIABLES:
4692 : integer q
4693 : !
4694 0 : q = ieor(p,k)
4695 0 : if ( q > np-1 ) then
4696 : pair = -1
4697 : else
4698 0 : pair = q
4699 : endif
4700 :
4701 : return
4702 :
4703 : !EOC
4704 : end function pair
4705 : !------------------------------------------------------------------------------
4706 : !
4707 : !------------------------------------------------------------------------------
4708 : !BOP
4709 : ! !ROUTINE: ceil2
4710 : !
4711 : ! !INTERFACE:
4712 12160512 : integer function ceil2(n)
4713 : !
4714 : ! !INPUT PARAMETERS:
4715 : integer :: n
4716 : ! !DESCRIPTION:
4717 : !
4718 : ! Smallest power of 2 greater than or equal to the argument
4719 : !
4720 : ! !REVISION HISTORY:
4721 : ! 2008.08.21 Worley Imported from spmdutils
4722 : !
4723 : !EOP
4724 : !------------------------------------------------------------------------------
4725 : !BOC
4726 : !
4727 : ! !LOCAL VARIABLES:
4728 : integer p
4729 :
4730 12160512 : p=1
4731 133765632 : do while ( p < n )
4732 121605120 : p=p*2
4733 : enddo
4734 12160512 : ceil2=p
4735 :
4736 : return
4737 : !EOC
4738 : end function ceil2
4739 : !------------------------------------------------------------------------------
4740 : !
4741 : !------------------------------------------------------------------------------
4742 : # if defined( MOD_ASSUMED_SIZE )
4743 : !BOP
4744 : ! !ROUTINE: mp_sendtrirr --- Initiate communication of contiguous tracer parcels
4745 : !
4746 : ! !INTERFACE:
4747 5160960 : subroutine mp_sendtrirr ( comm, send_bl, recv_bl, qin, qout, mbeg, mend, mq, &
4748 : ifirsts, ilasts, jfirsts, jlasts, kfirsts, klasts, &
4749 : ifirstr, ilastr, jfirstr, jlastr, kfirstr, klastr, &
4750 : modc )
4751 :
4752 : ! !INPUT PARAMETERS:
4753 : integer, intent(in) :: comm ! communicator
4754 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
4755 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
4756 : integer, intent(in) :: mbeg ! initial tracer index
4757 : integer, intent(in) :: mend ! final tracer index
4758 : integer, intent(in) :: mq ! total tracer indices
4759 : integer, intent(in) :: ifirsts ! first I index of source
4760 : integer, intent(in) :: ilasts ! last I index of source
4761 : integer, intent(in) :: jfirsts ! first j index of source
4762 : integer, intent(in) :: jlasts ! last j index of source
4763 : integer, intent(in) :: kfirsts ! first k index of source
4764 : integer, intent(in) :: klasts ! last k index of source
4765 : integer, intent(in) :: ifirstr ! first I index of target
4766 : integer, intent(in) :: ilastr ! last I index of target
4767 : integer, intent(in) :: jfirstr ! first j index of target
4768 : integer, intent(in) :: jlastr ! last j index of target
4769 : integer, intent(in) :: kfirstr ! first k index of target
4770 : integer, intent(in) :: klastr ! last k index of target
4771 : integer, optional, intent(in) :: modc(4) ! 1: classical, swap p2p, swap a2a
4772 : ! 2: handshake
4773 : ! 3: send vs isend
4774 : ! 4: max number of outstanding requests
4775 : real(r8), intent(in) :: qin(*) ! input tracer array
4776 :
4777 : ! !OUTPUT PARAMETERS:
4778 : real(r8), intent(out) :: qout(*) ! output tracer array
4779 : !
4780 : ! !DESCRIPTION:
4781 : ! Communicate a number of contiguous parcels to/from arbitrary set of PEs.
4782 : ! Modc(1): if 0, use original approach of posting all communications here and placing
4783 : ! wait points in mp_recvtrirr; if 1, call swap routine with p2p messages; if 2, call swap
4784 : ! routine with a2a messages.
4785 : ! Modc(2): if 1, then apply handshaking (don't send until corresponding receive is posted)
4786 : ! Modc(3): if 1, then use blocking send; otherwise use nonblocking send
4787 : ! Modc(4): maximum number of outstanding requests (applies to swap routines only)
4788 : !
4789 : ! !REVISION HISTORY:
4790 : ! 02.08.13 Sawyer Creation
4791 : ! 02.11.06 Mirin Optimizations
4792 : ! 03.03.03 Sawyer Use partneroffset
4793 : ! 03.06.24 Sawyer Integrated Use_Mpi_Types; added qout
4794 : ! 04.02.24 Mirin Various mpi2 options
4795 : ! 08.09.18 Mirin Major overhaul, to include approaches from Mirin and Worley
4796 : ! 09.10.07 Worley eliminated mpi_recv from handshake logic
4797 : !
4798 : ! !BUGS:
4799 : !
4800 : !EOP
4801 : !------------------------------------------------------------------------------
4802 : !BOC
4803 : !
4804 : ! !LOCAL VARIABLES:
4805 : integer ipe, qsize, offset, blocksize, nparcels, offset_s, offset_r, ierr, mod_method
4806 : integer p, mysize, nthpc, minsize, nthrd, pn, pt, tmpsize, unitsize, offset_0
4807 : integer i, j, send_tag, recv_tag, num_s, num_r, m
4808 10321920 : integer :: offset_v (Max_Nparcels)
4809 10321920 : integer :: hs_snd, hs_rcv(numpro), hs_rcvids(numpro)
4810 : integer ipe2, ceil2num
4811 : integer numtr, numtrm
4812 : integer sw_local, maxreq_local
4813 : logical hs_local, send_local
4814 : logical sw_alltoall
4815 : integer comm_pid
4816 : integer ijks, ijkr, ij
4817 :
4818 :
4819 : #if defined( MODCM_TIMING )
4820 : call t_startf('mod_comm communication')
4821 : #endif
4822 :
4823 5160960 : if (present(modc)) then
4824 5160960 : sw_local = modc(1)
4825 5160960 : hs_local = (modc(2) .eq. 1)
4826 5160960 : send_local = (modc(3) .eq. 1)
4827 5160960 : maxreq_local = modc(4)
4828 : else
4829 0 : sw_local = 0
4830 0 : hs_local = .true.
4831 0 : send_local = .true.
4832 0 : maxreq_local = -1
4833 : endif
4834 :
4835 : ! Do not call mp_swaptrirr unless mod_method equals 0
4836 5160960 : mod_method = recv_bl(1)%method
4837 5160960 : if (mod_method .gt. 0) sw_local = 0
4838 :
4839 5160960 : if (sw_local .gt. 0) then
4840 0 : sw_alltoall = (sw_local .eq. 2)
4841 : call mp_swaptrirr(comm, send_bl, recv_bl, qin, qout, mbeg, mend, mq, &
4842 : ifirsts, ilasts, jfirsts, jlasts, kfirsts, klasts, &
4843 : ifirstr, ilastr, jfirstr, jlastr, kfirstr, klastr, &
4844 : sw_handshake=hs_local, sw_maxreq=maxreq_local, &
4845 0 : sw_alltoall=sw_alltoall, sw_send=send_local)
4846 : else
4847 :
4848 5160960 : call MPI_COMM_RANK (comm, comm_pid, ierr)
4849 :
4850 5160960 : hs_snd = 1
4851 5160960 : ceil2num = ceil2(numpro)
4852 :
4853 5160960 : numtrm = mend - mbeg
4854 5160960 : numtr = numtrm + 1
4855 :
4856 : ! num_s = 0 if this processes is not part of the sending decomposition
4857 5160960 : num_s = size(send_bl)
4858 5160960 : if (send_bl(1)%Nparcels == -1) then
4859 0 : num_s = 0
4860 : endif
4861 :
4862 : ! num_r = 0 if this processes is not part of the receiving decomposition
4863 5160960 : num_r = size(recv_bl)
4864 5160960 : if (recv_bl(1)%Nparcels == -1) then
4865 0 : num_r = 0
4866 : endif
4867 :
4868 5160960 : r8_win%ncall_s = r8_win%ncall_s + 1
4869 :
4870 5160960 : ijks =(klasts-kfirsts+1)*(jlasts-jfirsts+1)*(ilasts-ifirsts+1)
4871 5160960 : ijkr =(klastr-kfirstr+1)*(jlastr-jfirstr+1)*(ilastr-ifirstr+1)
4872 :
4873 5160960 : if (mod_method .gt. 0) then
4874 : !
4875 : ! mpi derived types
4876 0 : if (r8_win%ncall_s .gt. MaxTrf-numtrm) then
4877 0 : write(iulog,*) "mp_sendtrirr: derived type handle count exceeded - exiting"
4878 0 : write(iulog,*) "r8_win%ncall_s MaxTrf = ", r8_win%ncall_s, MaxTrf
4879 0 : stop
4880 : endif
4881 : !
4882 : ! MPI: Irecv over all processes
4883 : !
4884 0 : if (hs_local) then
4885 0 : hs_rcvids(:) = MPI_REQUEST_NULL
4886 0 : do ipe2=1, ceil2num
4887 0 : ipe = ieor(ipe2-1,comm_pid) + 1
4888 0 : if (ipe .gt. num_s) cycle
4889 0 : if ( send_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
4890 0 : if (ipe-1 /= comm_pid) &
4891 0 : call MPI_IRECV ( hs_rcv(ipe), 1, mp_i4, ipe-1, comm_pid, comm, &
4892 0 : hs_rcvids(ipe), ierr )
4893 : endif
4894 : enddo
4895 : endif
4896 :
4897 0 : OutHandle(:,r8_win%ncall_s:r8_win%ncall_s+numtrm) = MPI_REQUEST_NULL
4898 0 : do ipe2=1, ceil2num
4899 0 : ipe = ieor(ipe2-1,comm_pid) + 1
4900 0 : if (ipe .gt. num_r) cycle
4901 : !
4902 : ! Receive the buffers with MPI_Irecv. Non-blocking
4903 : !
4904 0 : if ( recv_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
4905 0 : recv_tag = ipe-1 + modcam_tagoffset
4906 0 : do m = mbeg, mend
4907 0 : call mpi_irecv( qout((m-1)*ijkr+1), 1, recv_bl(ipe)%type, ipe-1, recv_tag, &
4908 0 : comm, OutHandle(ipe,r8_win%ncall_s+m-mbeg), ierr )
4909 : enddo
4910 0 : if (hs_local) then
4911 0 : if (ipe-1 /= comm_pid) &
4912 0 : call MPI_SEND ( hs_snd, 1, mp_i4, ipe-1, ipe-1, comm, ierr )
4913 : endif
4914 : endif
4915 : enddo
4916 :
4917 : !
4918 : ! MPI: Isend/Send over all processes; use risend/rsend with hs
4919 : !
4920 0 : InHandle(:,r8_win%ncall_s:r8_win%ncall_s+numtrm) = MPI_REQUEST_NULL
4921 0 : do ipe2=1, ceil2num
4922 0 : ipe = ieor(ipe2-1,comm_pid) + 1
4923 0 : if (ipe .gt. num_s) cycle
4924 :
4925 : !
4926 : ! Send the individual buffers with non-blocking sends
4927 : !
4928 0 : if ( send_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
4929 0 : send_tag = comm_pid + modcam_tagoffset
4930 0 : if (hs_local) then
4931 0 : if (ipe-1 /= comm_pid) &
4932 0 : call MPI_WAIT ( hs_rcvids(ipe), MPI_STATUS_IGNORE, ierr )
4933 0 : if (send_local) then
4934 0 : do m = mbeg, mend
4935 0 : call mpi_rsend( qin((m-1)*ijks+1), 1, send_bl(ipe)%type, ipe-1, send_tag, &
4936 0 : comm, ierr )
4937 : enddo
4938 : else
4939 0 : do m = mbeg, mend
4940 0 : call mpi_irsend( qin((m-1)*ijks+1), 1, send_bl(ipe)%type, ipe-1, send_tag, &
4941 0 : comm, InHandle(ipe,r8_win%ncall_s), ierr )
4942 : enddo
4943 : endif
4944 : else
4945 0 : if (send_local) then
4946 0 : do m = mbeg, mend
4947 0 : call mpi_send( qin((m-1)*ijks+1), 1, send_bl(ipe)%type, ipe-1, send_tag, &
4948 0 : comm, ierr )
4949 : enddo
4950 : else
4951 0 : do m = mbeg, mend
4952 0 : call mpi_isend( qin((m-1)*ijks+1), 1, send_bl(ipe)%type, ipe-1, send_tag, &
4953 0 : comm, InHandle(ipe,r8_win%ncall_s), ierr )
4954 : enddo
4955 : endif
4956 : endif
4957 : endif
4958 : enddo
4959 : else
4960 :
4961 : ! temporary contiguous buffers
4962 :
4963 5160960 : if (r8_win%ncall_s .gt. max_irr-numtrm) then
4964 0 : write(iulog,*) "mp_sendtrirr: insufficient window storage - exiting"
4965 0 : write(iulog,*) "r8_win%ncall_s max_irr = ", r8_win%ncall_s, max_irr
4966 0 : stop
4967 : endif
4968 5160960 : unitsize = r8_win%size/max_irr
4969 :
4970 : ! issue call to receive data in global receive buffer
4971 5160960 : offset_0 = (r8_win%ncall_s-1)*unitsize
4972 5160960 : offset_s = offset_0
4973 5160960 : offset_r = offset_0
4974 :
4975 5160960 : if (hs_local) then
4976 3968778240 : hs_rcvids(:) = MPI_REQUEST_NULL
4977 5289984000 : do ipe2=1, ceil2num
4978 5284823040 : ipe = ieor(ipe2-1,comm_pid) + 1
4979 5284823040 : if (ipe .gt. num_s) cycle
4980 3963617280 : qsize = numtr*send_bl(ipe)%Tot_Size
4981 3968778240 : if (qsize .ne. 0) then
4982 61931520 : r8_win%dest = ipe-1
4983 61931520 : send_tag = comm_pid + modcam_tagoffset
4984 61931520 : if (r8_win%dest /= comm_pid) &
4985 61850880 : call MPI_IRECV ( hs_rcv(ipe), 1, mp_i4, r8_win%dest, send_tag, comm, &
4986 61850880 : hs_rcvids(ipe), ierr )
4987 : endif
4988 : enddo
4989 : endif
4990 :
4991 5289984000 : do ipe2=1, ceil2num
4992 5284823040 : ipe = ieor(ipe2-1,comm_pid) + 1
4993 5284823040 : if (ipe .gt. num_r) cycle
4994 3963617280 : r8_win%size_r = numtr*recv_bl(ipe)%Tot_Size
4995 3968778240 : if (r8_win%size_r .ne. 0) then
4996 61931520 : r8_win%offset_r = offset_r
4997 61931520 : offset_r = offset_r + r8_win%size_r
4998 61931520 : r8_win%src = ipe-1
4999 61931520 : if (numtr*unitsize >= offset_r-offset_0) then
5000 61931520 : recv_tag = r8_win%src + modcam_tagoffset
5001 61931520 : qsize = r8_win%size_r
5002 61931520 : r8_win%nrecv = r8_win%nrecv + 1
5003 0 : call MPI_IRECV(ga_r8_r(r8_win%offset_r+1), qsize, mp_r8, r8_win%src, &
5004 61931520 : recv_tag, comm, r8_win%rqest(r8_win%nrecv), ierror)
5005 61931520 : if (hs_local) then
5006 61931520 : if (r8_win%src /= comm_pid) &
5007 61850880 : call MPI_SEND ( hs_snd, 1, mp_i4, r8_win%src, recv_tag, comm, ierror)
5008 : endif
5009 : else
5010 0 : write(iulog,*) "Fatal mp_sendtrirr: receive window out of space - exiting"
5011 0 : write(iulog,*) 'comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
5012 0 : ipe, unitsize, offset_r, offset_0
5013 0 : stop
5014 : endif
5015 : endif
5016 : enddo
5017 : ! gather data into global send buffer
5018 5289984000 : do ipe2=1, ceil2num
5019 5284823040 : ipe = ieor(ipe2-1,comm_pid) + 1
5020 5284823040 : if (ipe .gt. num_s) cycle
5021 3963617280 : qsize = numtr*send_bl(ipe)%Tot_Size
5022 3968778240 : if (qsize .ne. 0) then
5023 61931520 : r8_win%dest = ipe-1
5024 61931520 : r8_win%offset_s = offset_s
5025 61931520 : offset_s = offset_s + qsize
5026 61931520 : if (offset_s-offset_0 .gt. numtr*unitsize) then
5027 0 : write(iulog,*) "Fatal mp_sendtrirr: send window out of space - exiting"
5028 0 : write(iulog,*) 'comm_pid ipe unitsize offset_s offset_0 = ', comm_pid, &
5029 0 : ipe, unitsize, offset_s, offset_0
5030 0 : stop
5031 : endif
5032 :
5033 61931520 : offset_v(1) = r8_win%offset_s
5034 495452160 : do j = 2, send_bl(ipe)%nparcels
5035 495452160 : offset_v(j) = offset_v(j-1) + send_bl(ipe)%blocksizes(j-1)
5036 : enddo
5037 :
5038 557383680 : do j = 1, send_bl(ipe)%nparcels
5039 2043740160 : do m = mbeg, mend
5040 37654364160 : do i = 1, send_bl(ipe)%blocksizes(j)
5041 35672555520 : ij = send_bl(ipe)%displacements(j)+i
5042 37158912000 : ga_r8_s(send_bl(ipe)%Tot_Size*(m-mbeg)+offset_v(j)+i) = qin((m-1)*ijks+ij)
5043 : enddo
5044 : enddo
5045 : enddo
5046 :
5047 : ! nonblocking send
5048 61931520 : send_tag = comm_pid + modcam_tagoffset
5049 61931520 : r8_win%nsend = r8_win%nsend + 1
5050 61931520 : if (hs_local) then
5051 61931520 : if (r8_win%dest /= comm_pid) &
5052 61850880 : call MPI_WAIT ( hs_rcvids(ipe), MPI_STATUS_IGNORE, ierr )
5053 61931520 : if (send_local) then
5054 0 : call MPI_RSEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
5055 61931520 : send_tag, comm, ierr)
5056 : else
5057 0 : call MPI_IRSEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
5058 0 : send_tag, comm, r8_win%sqest(r8_win%nsend), ierr)
5059 : endif
5060 : else
5061 0 : if (send_local) then
5062 0 : call MPI_SEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
5063 0 : send_tag, comm, ierr)
5064 : else
5065 0 : call MPI_ISEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
5066 0 : send_tag, comm, r8_win%sqest(r8_win%nsend), ierr)
5067 : endif
5068 : endif
5069 : endif
5070 : enddo
5071 :
5072 : endif ! mod_method
5073 :
5074 5160960 : r8_win%ncall_s = r8_win%ncall_s + numtrm
5075 :
5076 : endif ! sw_local
5077 :
5078 : #if defined( MODCM_TIMING )
5079 : call t_stopf('mod_comm communication')
5080 : #endif
5081 :
5082 5160960 : end subroutine mp_sendtrirr
5083 : !------------------------------------------------------------------------------
5084 : !
5085 : !------------------------------------------------------------------------------
5086 : !BOP
5087 : ! !ROUTINE: mp_recvtrirr --- Finalize communication of contiguous tracer parcels
5088 : !
5089 : ! !INTERFACE:
5090 5160960 : subroutine mp_recvtrirr ( comm, send_bl, recv_bl, qin, qout, mbeg, mend, mq, &
5091 : ifirsts, ilasts, jfirsts, jlasts, kfirsts, klasts, &
5092 : ifirstr, ilastr, jfirstr, jlastr, kfirstr, klastr, &
5093 : modc )
5094 :
5095 : ! !INPUT PARAMETERS:
5096 : integer, intent(in) :: comm ! communicator
5097 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
5098 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
5099 : integer, intent(in) :: mbeg ! initial tracer index
5100 : integer, intent(in) :: mend ! final tracer index
5101 : integer, intent(in) :: mq ! total tracer indices
5102 : integer, intent(in) :: ifirsts ! first I index of source
5103 : integer, intent(in) :: ilasts ! last I index of source
5104 : integer, intent(in) :: jfirsts ! first j index of source
5105 : integer, intent(in) :: jlasts ! last j index of source
5106 : integer, intent(in) :: kfirsts ! first k index of source
5107 : integer, intent(in) :: klasts ! last k index of source
5108 : integer, intent(in) :: ifirstr ! first I index of target
5109 : integer, intent(in) :: ilastr ! last I index of target
5110 : integer, intent(in) :: jfirstr ! first j index of target
5111 : integer, intent(in) :: jlastr ! last j index of target
5112 : integer, intent(in) :: kfirstr ! first k index of target
5113 : integer, intent(in) :: klastr ! last k index of target
5114 : integer, optional, intent(in) :: modc(4) ! 1: classical, swap p2p, swap a2a
5115 : ! 2: handshake
5116 : ! 3: send vs isend
5117 : ! 4: max number of outstanding requests
5118 : real(r8), intent(in) :: qin(*) ! input tracer array
5119 : ! !OUTPUT PARAMETERS:
5120 : real(r8), intent(out) :: qout(*) ! output tracer array
5121 : !
5122 : ! !DESCRIPTION:
5123 : ! Complete transfer of a generalized region initiated by {\tt mp\_sendtrirr}.
5124 : ! Communicate a number of contiguous parcels to/from arbitrary set of PEs.
5125 : ! Modc(1): if 0, use original approach of posting all communications in mp_sendtrirr and
5126 : ! placing wait points here; otherwise don't do anything - mp_swaptrirr is called from mp_sendirr.
5127 : ! Modc(3): if 1, then use blocking send; otherwise use nonblocking send
5128 : !
5129 : ! !REVISION HISTORY:
5130 : ! 02.08.15 Sawyer Creation
5131 : ! 02.11.06 Mirin Optimizations
5132 : ! 03.03.03 Sawyer Now using packed arrays for MPI2
5133 : ! 04.02.24 Mirin Various mpi2 options
5134 : ! 08.09.18 Mirin Major overhaul, to include approaches from Mirin and Worley
5135 : !
5136 : !EOP
5137 : !------------------------------------------------------------------------------
5138 : !BOC
5139 : integer :: ipe, blocksize, offset_r, mod_method
5140 : integer unitsize, offset_0
5141 : integer Ierr
5142 10321920 : integer InStats(numpro*MPI_STATUS_SIZE)
5143 10321920 : integer OutStats(numpro*MPI_STATUS_SIZE)
5144 : integer i, j, num_r, num_s, m
5145 5160960 : integer :: offset_v (Max_Nparcels)
5146 : integer ipe2, ceil2num
5147 : integer numtr, numtrm
5148 : integer sw_local, maxreq_local
5149 : logical hs_local, send_local
5150 : logical sw_alltoall
5151 : integer comm_size, comm_pid
5152 : integer ijks, ijkr, ij
5153 :
5154 5160960 : if (present(modc)) then
5155 5160960 : sw_local = modc(1)
5156 5160960 : hs_local = (modc(2) .eq. 1)
5157 5160960 : send_local = (modc(3) .eq. 1)
5158 5160960 : maxreq_local = modc(4)
5159 : else
5160 : sw_local = 0
5161 5160960 : hs_local = .true.
5162 : send_local = .true.
5163 5160960 : maxreq_local = -1
5164 : endif
5165 :
5166 : ! Do not call mp_swaptrirr (hence return) unless mod_method equals 0
5167 5160960 : mod_method = recv_bl(1)%method
5168 5160960 : if (mod_method .gt. 0) sw_local = 0
5169 :
5170 : ! Return if swap_irr
5171 5160960 : if (sw_local .gt. 0) return
5172 :
5173 : #if defined( MODCM_TIMING )
5174 : call t_startf('mod_comm communication')
5175 : #endif
5176 :
5177 5160960 : call MPI_COMM_SIZE (comm, comm_size, ierr)
5178 5160960 : call MPI_COMM_RANK (comm, comm_pid, ierr)
5179 :
5180 5160960 : ceil2num = ceil2(numpro)
5181 :
5182 5160960 : numtrm = mend - mbeg
5183 5160960 : numtr = numtrm + 1
5184 :
5185 : ! num_s = 0 if this processes is not part of the sending decomposition
5186 5160960 : num_s = size(send_bl)
5187 5160960 : if (send_bl(1)%Nparcels == -1) then
5188 0 : num_s = 0
5189 : endif
5190 :
5191 : ! num_r = 0 if this processes is not part of the receiving decomposition
5192 5160960 : num_r = size(recv_bl)
5193 5160960 : if (recv_bl(1)%Nparcels == -1) then
5194 0 : num_r = 0
5195 : endif
5196 :
5197 5160960 : r8_win%ncall_r = r8_win%ncall_r + 1
5198 :
5199 5160960 : ijks =(klasts-kfirsts+1)*(jlasts-jfirsts+1)*(ilasts-ifirsts+1)
5200 5160960 : ijkr =(klastr-kfirstr+1)*(jlastr-jfirstr+1)*(ilastr-ifirstr+1)
5201 :
5202 5160960 : if (mod_method .gt. 0) then
5203 :
5204 : ! mpi derived types
5205 0 : if (r8_win%ncall_r .gt. MaxTrf-numtrm) then
5206 0 : write(iulog,*) "mp_recvtrirr: derived type handle count exceeded - exiting"
5207 0 : write(iulog,*) "r8_win%ncall_r MaxTrf = ", r8_win%ncall_r, MaxTrf
5208 0 : stop
5209 : endif
5210 :
5211 0 : if (num_s .gt. 0 .and. (.not. send_local)) then
5212 0 : do m = mbeg, mend
5213 0 : CALL MPI_WAITALL( comm_size, InHandle(:,r8_win%ncall_r+m-mbeg), InStats, Ierr )
5214 : enddo
5215 : endif
5216 0 : if (num_r .gt. 0) then
5217 0 : do m = mbeg, mend
5218 0 : CALL MPI_WAITALL( comm_size, OutHandle(:,r8_win%ncall_r+m-mbeg), OutStats, Ierr )
5219 : enddo
5220 : endif
5221 :
5222 : else
5223 :
5224 : ! temporary contiguous buffer / global window
5225 :
5226 5160960 : if (r8_win%ncall_r .gt. max_irr-numtrm) then
5227 0 : write(iulog,*) "mp_recvtrirr: insufficient window storage - exiting"
5228 0 : write(iulog,*) "r8_win%ncall_r max_irr = ", r8_win%ncall_r, max_irr
5229 0 : stop
5230 : endif
5231 5160960 : unitsize = r8_win%size/max_irr
5232 :
5233 : ! scatter data from global receive buffer to final destination
5234 5160960 : offset_0 = (r8_win%ncall_r-1)*unitsize
5235 5160960 : offset_r = offset_0
5236 :
5237 5289984000 : do ipe2=1, ceil2num
5238 5284823040 : ipe = ieor(ipe2-1,comm_pid) + 1
5239 5284823040 : if (ipe .gt. num_r) cycle
5240 3963617280 : r8_win%size_r = numtr*recv_bl(ipe)%Tot_Size
5241 3968778240 : if (r8_win%size_r .ne. 0) then
5242 61931520 : r8_win%offset_r = offset_r
5243 61931520 : offset_r = offset_r + r8_win%size_r
5244 61931520 : if (offset_r-offset_0 .gt. numtr*unitsize) then
5245 0 : write(iulog,*) "Fatal mp_recvtrirr: receive window out of space - exiting"
5246 0 : write(iulog,*) 'comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
5247 0 : ipe, unitsize, offset_r, offset_0
5248 0 : stop
5249 : endif
5250 :
5251 61931520 : r8_win%nread = r8_win%nread + 1
5252 61931520 : call MPI_WAIT(r8_win%rqest(r8_win%nread), Status, ierr)
5253 :
5254 61931520 : offset_v(1) = r8_win%offset_r
5255 495452160 : do j = 2, recv_bl(ipe)%Nparcels
5256 495452160 : offset_v(j) = offset_v(j-1) + recv_bl(ipe)%blocksizes(j-1)
5257 : enddo
5258 :
5259 557383680 : do j = 1, recv_bl(ipe)%Nparcels
5260 2043740160 : do m = mbeg, mend
5261 37654364160 : do i = 1, recv_bl(ipe)%blocksizes(j)
5262 35672555520 : ij = recv_bl(ipe)%displacements(j)+i
5263 37158912000 : qout((m-1)*ijkr+ij) = ga_r8_r(recv_bl(ipe)%Tot_Size*(m-mbeg)+offset_v(j)+i)
5264 : enddo
5265 : enddo
5266 : enddo
5267 :
5268 : endif
5269 : enddo
5270 :
5271 5160960 : if ((r8_win%ncall_s == r8_win%ncall_r + numtrm) .and. (.not. send_local)) then
5272 0 : call MPI_WAITALL(r8_win%nsend, r8_win%sqest, Stats, ierror)
5273 : endif
5274 :
5275 : endif ! mod_method .gt. 0
5276 :
5277 5160960 : r8_win%ncall_r = r8_win%ncall_r + numtrm
5278 :
5279 5160960 : if (r8_win%ncall_s == r8_win%ncall_r) then
5280 5160960 : r8_win%nsend = 0
5281 5160960 : r8_win%nrecv = 0
5282 5160960 : r8_win%nread = 0
5283 5160960 : r8_win%ncall_s = 0
5284 5160960 : r8_win%ncall_r = 0
5285 : endif
5286 :
5287 : #if defined( MODCM_TIMING )
5288 : call t_stopf('mod_comm communication')
5289 : #endif
5290 :
5291 : !EOC
5292 : end subroutine mp_recvtrirr
5293 : !------------------------------------------------------------------------------
5294 : !
5295 : !------------------------------------------------------------------------------
5296 : !BOP
5297 : ! !ROUTINE: mp_swaptrirr --- Write r8 contiguous parcels to global array
5298 : ! using XOR swap ordering - for multiple tracers
5299 : !
5300 : ! !INTERFACE:
5301 0 : subroutine mp_swaptrirr ( comm, send_bl, recv_bl, qin, qout, mbeg, mend, mq, &
5302 : ifirsts, ilasts, jfirsts, jlasts, kfirsts, klasts, &
5303 : ifirstr, ilastr, jfirstr, jlastr, kfirstr, klastr, &
5304 : sw_handshake, sw_maxreq, sw_alltoall, sw_send )
5305 :
5306 : ! !INPUT PARAMETERS:
5307 : integer, intent(in) :: comm ! communicator
5308 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
5309 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
5310 : integer, intent(in) :: mbeg ! initial tracer index
5311 : integer, intent(in) :: mend ! final tracer index
5312 : integer, intent(in) :: mq ! total tracer indices
5313 : integer, intent(in) :: ifirsts ! first I index of source
5314 : integer, intent(in) :: ilasts ! last I index of source
5315 : integer, intent(in) :: jfirsts ! first j index of source
5316 : integer, intent(in) :: jlasts ! last j index of source
5317 : integer, intent(in) :: kfirsts ! first k index of source
5318 : integer, intent(in) :: klasts ! last k index of source
5319 : integer, intent(in) :: ifirstr ! first I index of target
5320 : integer, intent(in) :: ilastr ! last I index of target
5321 : integer, intent(in) :: jfirstr ! first j index of target
5322 : integer, intent(in) :: jlastr ! last j index of target
5323 : integer, intent(in) :: kfirstr ! first k index of target
5324 : integer, intent(in) :: klastr ! last k index of target
5325 : logical, optional, intent(in) :: sw_handshake ! use flow control and
5326 : ! ready send
5327 : integer, optional, intent(in) :: sw_maxreq ! maximum number of outstanding
5328 : ! MPI requests
5329 : logical, optional, intent(in) :: sw_alltoall ! use mpi_alltoall
5330 : logical, optional, intent(in) :: sw_send ! use mpi_send instead of isend
5331 : real(r8), intent(in) :: qin(*) ! input tracer array
5332 :
5333 : ! !OUTPUT PARAMETERS:
5334 : real(r8), intent(out) :: qout(*) ! output tracer array
5335 : !
5336 : ! !DESCRIPTION:
5337 : !
5338 : ! XOR-ordered version of all-to-all communication
5339 : !
5340 : ! WARNING: mod_comm parameter max_irr might need to be set larger than expected
5341 : ! when swapping multiple variables; specifically, max_irr must be at least
5342 : ! as large as the incoming r8_win%ncall_s + the number of variables to
5343 : ! be swapped
5344 : !
5345 : ! !REVISION HISTORY:
5346 : ! 08.06.30 Worley original: derived from mp_sendirr, but using
5347 : ! swapm logic and XOR swap order
5348 : ! 08.08.22 Worley removed swapm; reimplemented with native MPI,
5349 : ! added flow control/ready send option and maxreq
5350 : ! throttling, added alltoall option
5351 : ! 09.10.07 Worley eliminated mpi_recv from handshake logic
5352 : !
5353 : ! !BUGS:
5354 : !
5355 : !EOP
5356 : !------------------------------------------------------------------------------
5357 : !BOC
5358 : !
5359 : ! !LOCAL VARIABLES:
5360 : integer :: i, j, p, istep, num_s, num_r
5361 : integer :: comm_pid, comm_size, steps, ierr
5362 : integer :: ipe, offset_s, offset_r, offset_0, unitsize
5363 :
5364 0 : integer :: sndlths(0:numpro-1), sdispls(0:numpro-1)
5365 0 : integer :: rcvlths(0:numpro-1), rdispls(0:numpro-1)
5366 0 : integer :: swapids(numpro)
5367 0 : integer :: sndids(numpro) ! nonblocking MPI send request ids
5368 0 : integer :: rcvids(numpro) ! nonblocking MPI recv request ids
5369 0 : integer :: hs_snd, hs_rcv(numpro)! handshake variables (send/receive)
5370 0 : integer :: hs_rcvids(numpro) ! nonblocking MPI handshake recv request ids
5371 0 : integer :: InStats(numpro*MPI_STATUS_SIZE)
5372 0 : integer :: OutStats(numpro*MPI_STATUS_SIZE)
5373 :
5374 : integer :: offset_v
5375 :
5376 : integer :: rstep
5377 :
5378 : integer :: maxreq, maxreqh
5379 : logical :: handshake, alltoall, sendd
5380 : integer :: numtr, numtrm, m
5381 : integer ijks, ijkr, ij
5382 :
5383 : #if defined( MODCM_TIMING )
5384 : call t_startf('mod_comm communication')
5385 : #endif
5386 :
5387 0 : call MPI_COMM_SIZE (comm, comm_size, ierr)
5388 0 : call MPI_COMM_RANK (comm, comm_pid, ierr)
5389 :
5390 : ! num_s = 0 if this process is not part of the sending decomposition
5391 0 : num_s = size(send_bl)
5392 0 : if (send_bl(1)%Nparcels == -1) then
5393 0 : num_s = 0
5394 : endif
5395 :
5396 : ! num_r = 0 if this process is not part of the receiving decomposition
5397 0 : num_r = size(recv_bl)
5398 0 : if (recv_bl(1)%Nparcels == -1) then
5399 0 : num_r = 0
5400 : endif
5401 :
5402 0 : if ( present(sw_handshake) ) then
5403 0 : handshake = sw_handshake
5404 0 : hs_snd = 1
5405 : else
5406 : handshake = .false.
5407 : endif
5408 :
5409 0 : if ( present(sw_alltoall) ) then
5410 0 : alltoall = sw_alltoall
5411 : else
5412 : alltoall = .false.
5413 : endif
5414 :
5415 0 : if ( present(sw_send) ) then
5416 0 : sendd = sw_send
5417 : else
5418 : sendd = .false.
5419 : endif
5420 :
5421 0 : numtrm = mend - mbeg
5422 0 : numtr = numtrm + 1
5423 :
5424 0 : ijks =(klasts-kfirsts+1)*(jlasts-jfirsts+1)*(ilasts-ifirsts+1)
5425 0 : ijkr =(klastr-kfirstr+1)*(jlastr-jfirstr+1)*(ilastr-ifirstr+1)
5426 :
5427 0 : unitsize = r8_win%size/max_irr
5428 :
5429 : ! advance to unused portion of storage window
5430 0 : r8_win%ncall_s = r8_win%ncall_s + 1
5431 :
5432 0 : if (r8_win%ncall_s .gt. max_irr-numtrm) then
5433 0 : write(iulog,*) "mp_swaptrirr: insufficient window storage - exiting"
5434 0 : write(iulog,*) "r8_win%ncall_s max_irr = ", r8_win%ncall_s, max_irr
5435 0 : stop
5436 : endif
5437 :
5438 : ! calculate send lengths and displacements
5439 0 : offset_0 = (r8_win%ncall_s-1)*unitsize
5440 0 : offset_s = offset_0
5441 0 : sndlths(:) = 0
5442 0 : sdispls(:) = 0
5443 0 : do ipe=1, num_s
5444 0 : sndlths(ipe-1) = numtr*send_bl(ipe)%Tot_Size
5445 0 : sdispls(ipe-1) = offset_s
5446 0 : if (sndlths(ipe-1) .ne. 0) then
5447 :
5448 0 : offset_s = offset_s + sndlths(ipe-1)
5449 0 : if (offset_s-offset_0 .gt. numtr*unitsize) then
5450 0 : write(iulog,*) "Fatal mp_swaptrirr: send window out of space - exiting"
5451 0 : write(iulog,*) '1 comm_pid ipe unitsize offset_s offset_0 = ', comm_pid, &
5452 0 : ipe, unitsize, offset_s, offset_0
5453 0 : stop
5454 : endif
5455 : endif
5456 : enddo
5457 :
5458 : ! calculate receive lengths and displacements
5459 0 : offset_r = offset_0
5460 0 : rcvlths(:) = 0
5461 0 : rdispls(:) = 0
5462 0 : do ipe=1, num_r
5463 0 : rcvlths(ipe-1) = numtr*recv_bl(ipe)%Tot_Size
5464 0 : rdispls(ipe-1) = offset_r
5465 0 : if (rcvlths(ipe-1) .ne. 0) then
5466 :
5467 0 : offset_r = offset_r + rcvlths(ipe-1)
5468 0 : if (numtr*unitsize < offset_r-offset_0) then
5469 0 : write(iulog,*) "Fatal mp_swaptrirr: receive window out of space - exiting"
5470 0 : write(iulog,*) '1 comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
5471 0 : ipe, unitsize, offset_r, offset_0
5472 0 : stop
5473 : endif
5474 : endif
5475 : enddo
5476 :
5477 : ! Calculate swap partners and number of steps in point-to-point
5478 : ! implementations of alltoall algorithm.
5479 0 : steps = 0
5480 0 : do ipe=1,ceil2(comm_size)-1
5481 0 : p = pair(comm_size,ipe,comm_pid)
5482 0 : if (p >= 0) then
5483 0 : if (sndlths(p) > 0 .or. rcvlths(p) > 0) then
5484 0 : steps = steps + 1
5485 0 : swapids(steps) = p
5486 : end if
5487 : end if
5488 : end do
5489 :
5490 0 : if (.not. alltoall) then
5491 :
5492 0 : sndids(1:steps) = MPI_REQUEST_NULL
5493 0 : rcvids(1:steps) = MPI_REQUEST_NULL
5494 :
5495 0 : if (steps .eq. 0) then
5496 0 : maxreq = 0
5497 0 : maxreqh = 0
5498 0 : elseif (steps .eq. 1) then
5499 0 : maxreq = 1
5500 0 : maxreqh = 1
5501 : else
5502 0 : if ( present(sw_maxreq) ) then
5503 0 : if ((sw_maxreq .le. steps) .and. (sw_maxreq .ge. 0)) then
5504 0 : maxreq = sw_maxreq
5505 0 : if (maxreq > 1) then
5506 0 : maxreqh = maxreq/2
5507 : else
5508 0 : maxreq = 2
5509 0 : maxreqh = 1
5510 : endif
5511 : else
5512 0 : maxreq = steps
5513 0 : maxreqh = steps
5514 : endif
5515 : else
5516 0 : maxreq = steps
5517 0 : maxreqh = steps
5518 : endif
5519 : endif
5520 :
5521 : ! Post initial handshake receive requests
5522 0 : if (handshake) then
5523 0 : do istep=1,maxreq
5524 0 : p = swapids(istep)
5525 0 : if (sndlths(p) > 0) then
5526 : call mpi_irecv ( hs_rcv(istep), 1, mp_i4, p, comm_pid, comm, &
5527 0 : hs_rcvids(istep), ierr )
5528 : endif
5529 : enddo
5530 : endif
5531 :
5532 : ! Post initial receive requests
5533 0 : do istep=1,maxreq
5534 0 : p = swapids(istep)
5535 0 : if (rcvlths(p) > 0) then
5536 0 : offset_r = rdispls(p)+1
5537 0 : call mpi_irecv ( ga_r8_r(offset_r), rcvlths(p), mp_r8, &
5538 0 : p, p, comm, rcvids(istep), ierr )
5539 0 : if (handshake) then
5540 : call mpi_send( hs_snd, 1, mp_i4, p, p, comm, &
5541 0 : ierr )
5542 : endif
5543 : endif
5544 : enddo
5545 : rstep = maxreq
5546 : !
5547 : endif
5548 :
5549 : ! gather data into global send buffer
5550 0 : do istep=1,steps
5551 0 : p = swapids(istep)
5552 :
5553 0 : if (sndlths(p) .ne. 0) then
5554 0 : offset_v = sdispls(p)
5555 0 : do j = 1, send_bl(p+1)%nparcels
5556 0 : do m = mbeg, mend
5557 0 : do i = 1, send_bl(p+1)%blocksizes(j)
5558 0 : ij = send_bl(p+1)%displacements(j)+i
5559 0 : ga_r8_s(send_bl(p+1)%Tot_Size*(m-mbeg)+offset_v+i) = qin((m-1)*ijks+ij)
5560 : enddo
5561 : enddo
5562 0 : offset_v = offset_v + send_bl(p+1)%blocksizes(j)
5563 : enddo
5564 : endif
5565 :
5566 0 : if (.not. alltoall) then
5567 :
5568 : ! Submit new i(r)send request
5569 0 : offset_s = sdispls(p)+1
5570 0 : if (sndlths(p) > 0) then
5571 0 : if (handshake) then
5572 : call mpi_wait( hs_rcvids(istep), MPI_STATUS_IGNORE, ierr )
5573 0 : if (sendd) then
5574 0 : call mpi_rsend( ga_r8_s(offset_s), sndlths(p), mp_r8, &
5575 0 : p, comm_pid, comm, ierr )
5576 : else
5577 0 : call mpi_irsend( ga_r8_s(offset_s), sndlths(p), mp_r8, &
5578 0 : p, comm_pid, comm, sndids(istep), ierr )
5579 : endif
5580 : else
5581 0 : if (sendd) then
5582 0 : call mpi_send ( ga_r8_s(offset_s), sndlths(p), mp_r8, &
5583 0 : p, comm_pid, comm, ierr )
5584 : else
5585 0 : call mpi_isend ( ga_r8_s(offset_s), sndlths(p), mp_r8, &
5586 0 : p, comm_pid, comm, sndids(istep), ierr )
5587 : endif
5588 : endif
5589 : endif
5590 :
5591 0 : if (istep > maxreqh) then
5592 : ! Wait for oldest irecv request to complete
5593 0 : call mpi_wait( rcvids(istep-maxreqh), OutStats, ierr )
5594 :
5595 0 : if (rstep < steps) then
5596 0 : rstep = rstep + 1
5597 0 : p = swapids(rstep)
5598 :
5599 : ! Submit a new handshake irecv request
5600 0 : if (handshake) then
5601 0 : if (sndlths(p) > 0) then
5602 : call mpi_irecv( hs_rcv(rstep), 1, mp_i4, p, comm_pid, comm, &
5603 0 : hs_rcvids(rstep), ierr )
5604 : endif
5605 : endif
5606 :
5607 : ! Submit a new irecv request
5608 0 : if (rcvlths(p) > 0) then
5609 0 : offset_r = rdispls(p)+1
5610 0 : call mpi_irecv( ga_r8_r(offset_r), rcvlths(p), mp_r8, &
5611 0 : p, p, comm, rcvids(rstep), ierr )
5612 0 : if (handshake) then
5613 : call mpi_send ( hs_snd, 1, mp_i4, p, p, comm, &
5614 0 : ierr )
5615 : endif
5616 : endif
5617 : endif
5618 :
5619 : ! Wait for outstanding i(r)send request to complete
5620 0 : if (.not. sendd) then
5621 0 : call mpi_wait( sndids(istep-maxreqh), InStats, ierr )
5622 : endif
5623 : endif
5624 : !
5625 : endif
5626 : !
5627 : enddo
5628 :
5629 : ! local copy to send buffer
5630 0 : if (sndlths(comm_pid) .ne. 0) then
5631 :
5632 0 : offset_v = sdispls(comm_pid)
5633 0 : do j = 1, send_bl(comm_pid+1)%nparcels
5634 0 : do m = mbeg, mend
5635 0 : do i = 1, send_bl(comm_pid+1)%blocksizes(j)
5636 0 : ij = send_bl(comm_pid+1)%displacements(j)+i
5637 0 : ga_r8_s(send_bl(comm_pid+1)%Tot_Size*(m-mbeg)+offset_v+i) = qin((m-1)*ijks+ij)
5638 : enddo
5639 : enddo
5640 0 : offset_v = offset_v + send_bl(comm_pid+1)%blocksizes(j)
5641 : enddo
5642 :
5643 0 : if (.not. alltoall) then
5644 0 : ga_r8_r(rdispls(comm_pid)+1:rdispls(comm_pid)+rcvlths(comm_pid)) = &
5645 0 : ga_r8_s(sdispls(comm_pid)+1:sdispls(comm_pid)+sndlths(comm_pid))
5646 : endif
5647 :
5648 : endif
5649 :
5650 0 : if (alltoall) then
5651 : call mpi_alltoallv (ga_r8_s, sndlths, sdispls, mp_r8, &
5652 : ga_r8_r, rcvlths, rdispls, mp_r8, &
5653 0 : comm, ierror)
5654 : endif
5655 :
5656 : ! local copy from receive buffer
5657 0 : if (rcvlths(comm_pid) .ne. 0) then
5658 :
5659 0 : offset_v = rdispls(comm_pid)
5660 0 : do j = 1, recv_bl(comm_pid+1)%Nparcels
5661 0 : do m = mbeg, mend
5662 0 : do i = 1, recv_bl(comm_pid+1)%blocksizes(j)
5663 0 : ij = recv_bl(comm_pid+1)%displacements(j)+i
5664 0 : qout((m-1)*ijkr+ij) = ga_r8_r(recv_bl(comm_pid+1)%Tot_Size*(m-mbeg)+offset_v+i)
5665 : enddo
5666 : enddo
5667 0 : offset_v = offset_v + recv_bl(comm_pid+1)%blocksizes(j)
5668 : enddo
5669 :
5670 : endif
5671 :
5672 : ! scatter data from global receive buffer to final destination
5673 0 : do istep=1,steps
5674 0 : p = swapids(istep)
5675 :
5676 0 : if (.not. alltoall) then
5677 0 : if (istep > steps-maxreqh) then
5678 : call mpi_wait( rcvids(istep), OutStats, ierr )
5679 : endif
5680 : endif
5681 :
5682 0 : if (rcvlths(p) .ne. 0) then
5683 :
5684 0 : offset_v = rdispls(p)
5685 0 : do j = 1, recv_bl(p+1)%Nparcels
5686 0 : do m = mbeg, mend
5687 0 : do i = 1, recv_bl(p+1)%blocksizes(j)
5688 0 : ij = recv_bl(p+1)%displacements(j)+i
5689 0 : qout((m-1)*ijkr+ij) = ga_r8_r(recv_bl(p+1)%Tot_Size*(m-mbeg)+offset_v+i)
5690 : enddo
5691 : enddo
5692 0 : offset_v = offset_v + recv_bl(p+1)%blocksizes(j)
5693 : enddo
5694 :
5695 : endif
5696 : enddo
5697 :
5698 : ! Wait for any outstanding send requests to complete.
5699 0 : if (.not. alltoall .and. .not. sendd) then
5700 0 : call mpi_waitall( maxreqh, sndids(steps-maxreqh+1), InStats, ierr )
5701 : endif
5702 :
5703 : ! clean-up
5704 : ! make used portion of storage window available for reuse
5705 0 : r8_win%ncall_s = r8_win%ncall_s - 1
5706 :
5707 : #if defined( MODCM_TIMING )
5708 : call t_stopf('mod_comm communication')
5709 : #endif
5710 :
5711 : !EOC
5712 0 : end subroutine mp_swaptrirr
5713 : # endif
5714 : !------------------------------------------------------------------------------
5715 : !
5716 : !------------------------------------------------------------------------------
5717 : # if defined( MOD_SPECIFIED_SHAPE )
5718 : !BOP
5719 : ! !ROUTINE: mp_sendtrirr --- Initiate communication of contiguous tracer parcels
5720 : !
5721 : ! !INTERFACE:
5722 : subroutine mp_sendtrirr ( comm, send_bl, recv_bl, qin, qout, mbeg, mend, mq, &
5723 : ifirsts, ilasts, jfirsts, jlasts, kfirsts, klasts, &
5724 : ifirstr, ilastr, jfirstr, jlastr, kfirstr, klastr, &
5725 : modc )
5726 :
5727 : ! !INPUT PARAMETERS:
5728 : integer, intent(in) :: comm ! communicator
5729 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
5730 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
5731 : integer, intent(in) :: mbeg ! initial tracer index
5732 : integer, intent(in) :: mend ! final tracer index
5733 : integer, intent(in) :: mq ! total tracer indices
5734 : integer, intent(in) :: ifirsts ! first I index of source
5735 : integer, intent(in) :: ilasts ! last I index of source
5736 : integer, intent(in) :: jfirsts ! first j index of source
5737 : integer, intent(in) :: jlasts ! last j index of source
5738 : integer, intent(in) :: kfirsts ! first k index of source
5739 : integer, intent(in) :: klasts ! last k index of source
5740 : integer, intent(in) :: ifirstr ! first I index of target
5741 : integer, intent(in) :: ilastr ! last I index of target
5742 : integer, intent(in) :: jfirstr ! first j index of target
5743 : integer, intent(in) :: jlastr ! last j index of target
5744 : integer, intent(in) :: kfirstr ! first k index of target
5745 : integer, intent(in) :: klastr ! last k index of target
5746 : integer, optional, intent(in) :: modc(4) ! 1: classical, swap p2p, swap a2a
5747 : ! 2: handshake
5748 : ! 3: send vs isend
5749 : ! 4: max number of outstanding requests
5750 : real(r8), intent(in) :: qin(ifirsts:ilasts,jfirsts:jlasts,kfirsts:klasts,1:mq) ! input tracer array
5751 :
5752 : ! !OUTPUT PARAMETERS:
5753 : real(r8), intent(out) :: qout(ifirstr:ilastr,jfirstr:jlastr,kfirstr:klastr,1:mq) ! output tracer array
5754 : !
5755 : ! !DESCRIPTION:
5756 : ! Communicate a number of contiguous parcels to/from arbitrary set of PEs.
5757 : ! Modc(1): if 0, use original approach of posting all communications here and placing
5758 : ! wait points in mp_recvtrirr; if 1, call swap routine with p2p messages; if 2, call swap
5759 : ! routine with a2a messages.
5760 : ! Modc(2): if 1, then apply handshaking (don't send until corresponding receive is posted)
5761 : ! Modc(3): if 1, then use blocking send; otherwise use nonblocking send
5762 : ! Modc(4): maximum number of outstanding requests (applies to swap routines only)
5763 : !
5764 : ! !REVISION HISTORY:
5765 : ! 02.08.13 Sawyer Creation
5766 : ! 02.11.06 Mirin Optimizations
5767 : ! 03.03.03 Sawyer Use partneroffset
5768 : ! 03.06.24 Sawyer Integrated Use_Mpi_Types; added qout
5769 : ! 04.02.24 Mirin Various mpi2 options
5770 : ! 08.09.18 Mirin Major overhaul, to include approaches from Mirin and Worley
5771 : ! 09.10.07 Worley eliminated mpi_recv from handshake logic
5772 : !
5773 : ! !BUGS:
5774 : !
5775 : !EOP
5776 : !------------------------------------------------------------------------------
5777 : !BOC
5778 : !
5779 : ! !LOCAL VARIABLES:
5780 : integer ipe, qsize, offset, blocksize, nparcels, offset_s, offset_r, ierr, mod_method
5781 : integer p, mysize, nthpc, minsize, nthrd, pn, pt, tmpsize, unitsize, offset_0
5782 : integer i, j, send_tag, recv_tag, num_s, num_r, m
5783 : integer :: offset_v (Max_Nparcels)
5784 : integer :: hs_snd, hs_rcv(numpro), hs_rcvids(numpro)
5785 : integer ipe2, ceil2num
5786 : integer numtr, numtrm
5787 : integer sw_local, maxreq_local
5788 : logical hs_local, send_local
5789 : logical sw_alltoall
5790 : integer comm_pid
5791 : integer ip, jp, kp, mp, ir, jr, jir, mt
5792 :
5793 :
5794 : #if defined( MODCM_TIMING )
5795 : call t_startf('mod_comm communication')
5796 : #endif
5797 :
5798 : if (present(modc)) then
5799 : sw_local = modc(1)
5800 : hs_local = (modc(2) .eq. 1)
5801 : send_local = (modc(3) .eq. 1)
5802 : maxreq_local = modc(4)
5803 : else
5804 : sw_local = 0
5805 : hs_local = .true.
5806 : send_local = .true.
5807 : maxreq_local = -1
5808 : endif
5809 :
5810 : ! Do not call mp_swaptrirr unless mod_method equals 0
5811 : mod_method = recv_bl(1)%method
5812 : if (mod_method .gt. 0) sw_local = 0
5813 :
5814 : if (sw_local .gt. 0) then
5815 : sw_alltoall = (sw_local .eq. 2)
5816 : call mp_swaptrirr(comm, send_bl, recv_bl, qin, qout, mbeg, mend, mq, &
5817 : ifirsts, ilasts, jfirsts, jlasts, kfirsts, klasts, &
5818 : ifirstr, ilastr, jfirstr, jlastr, kfirstr, klastr, &
5819 : sw_handshake=hs_local, sw_maxreq=maxreq_local, &
5820 : sw_alltoall=sw_alltoall, sw_send=send_local)
5821 : else
5822 :
5823 : call MPI_COMM_RANK (comm, comm_pid, ierr)
5824 :
5825 : hs_snd = 1
5826 : ceil2num = ceil2(numpro)
5827 :
5828 : numtrm = mend - mbeg
5829 : numtr = numtrm + 1
5830 :
5831 : ! num_s = 0 if this processes is not part of the sending decomposition
5832 : num_s = size(send_bl)
5833 : if (send_bl(1)%Nparcels == -1) then
5834 : num_s = 0
5835 : endif
5836 :
5837 : ! num_r = 0 if this processes is not part of the receiving decomposition
5838 : num_r = size(recv_bl)
5839 : if (recv_bl(1)%Nparcels == -1) then
5840 : num_r = 0
5841 : endif
5842 :
5843 : r8_win%ncall_s = r8_win%ncall_s + 1
5844 : if (mod_method .gt. 0) then
5845 : !
5846 : ! mpi derived types
5847 : if (r8_win%ncall_s .gt. MaxTrf-numtrm) then
5848 : write(iulog,*) "mp_sendtrirr: derived type handle count exceeded - exiting"
5849 : write(iulog,*) "r8_win%ncall_s MaxTrf = ", r8_win%ncall_s, MaxTrf
5850 : stop
5851 : endif
5852 : !
5853 : ! MPI: Irecv over all processes
5854 : !
5855 : if (hs_local) then
5856 : hs_rcvids(:) = MPI_REQUEST_NULL
5857 : do ipe2=1, ceil2num
5858 : ipe = ieor(ipe2-1,comm_pid) + 1
5859 : if (ipe .gt. num_s) cycle
5860 : if ( send_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
5861 : if (ipe-1 /= comm_pid) &
5862 : call MPI_IRECV ( hs_rcv(ipe), 1, mp_i4, ipe-1, comm_pid, comm, &
5863 : hs_rcvids(ipe), ierr )
5864 : endif
5865 : enddo
5866 : endif
5867 :
5868 : OutHandle(:,r8_win%ncall_s:r8_win%ncall_s+numtrm) = MPI_REQUEST_NULL
5869 : do ipe2=1, ceil2num
5870 : ipe = ieor(ipe2-1,comm_pid) + 1
5871 : if (ipe .gt. num_r) cycle
5872 : !
5873 : ! Receive the buffers with MPI_Irecv. Non-blocking
5874 : !
5875 : if ( recv_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
5876 : recv_tag = ipe-1 + modcam_tagoffset
5877 : do m = mbeg, mend
5878 : call mpi_irecv( qout(:,:,:,m), 1, recv_bl(ipe)%type, ipe-1, recv_tag, &
5879 : comm, OutHandle(ipe,r8_win%ncall_s+m-mbeg), ierr )
5880 : enddo
5881 : if (hs_local) then
5882 : if (ipe-1 /= comm_pid) &
5883 : call MPI_SEND ( hs_snd, 1, mp_i4, ipe-1, ipe-1, comm, ierr )
5884 : endif
5885 : endif
5886 : enddo
5887 :
5888 : !
5889 : ! MPI: Isend/Send over all processes; use risend/rsend with hs
5890 : !
5891 : InHandle(:,r8_win%ncall_s:r8_win%ncall_s+numtrm) = MPI_REQUEST_NULL
5892 : do ipe2=1, ceil2num
5893 : ipe = ieor(ipe2-1,comm_pid) + 1
5894 : if (ipe .gt. num_s) cycle
5895 :
5896 : !
5897 : ! Send the individual buffers with non-blocking sends
5898 : !
5899 : if ( send_bl(ipe)%type /= MPI_DATATYPE_NULL ) then
5900 : send_tag = comm_pid + modcam_tagoffset
5901 : if (hs_local) then
5902 : if (ipe-1 /= comm_pid) &
5903 : call MPI_WAIT ( hs_rcvids(ipe), MPI_STATUS_IGNORE, ierr )
5904 : if (send_local) then
5905 : do m = mbeg, mend
5906 : call mpi_rsend( qin(:,:,:,m), 1, send_bl(ipe)%type, ipe-1, send_tag, &
5907 : comm, ierr )
5908 : enddo
5909 : else
5910 : do m = mbeg, mend
5911 : call mpi_irsend( qin(:,:,:,m), 1, send_bl(ipe)%type, ipe-1, send_tag, &
5912 : comm, InHandle(ipe,r8_win%ncall_s), ierr )
5913 : enddo
5914 : endif
5915 : else
5916 : if (send_local) then
5917 : do m = mbeg, mend
5918 : call mpi_send( qin(:,:,:,m), 1, send_bl(ipe)%type, ipe-1, send_tag, &
5919 : comm, ierr )
5920 : enddo
5921 : else
5922 : do m = mbeg, mend
5923 : call mpi_isend( qin(:,:,:,m), 1, send_bl(ipe)%type, ipe-1, send_tag, &
5924 : comm, InHandle(ipe,r8_win%ncall_s), ierr )
5925 : enddo
5926 : endif
5927 : endif
5928 : endif
5929 : enddo
5930 : else
5931 :
5932 : ! temporary contiguous buffers
5933 :
5934 : jr = jlasts - jfirsts + 1
5935 : ir = ilasts - ifirsts + 1
5936 : jir = jr * ir
5937 : if (r8_win%ncall_s .gt. max_irr-numtrm) then
5938 : write(iulog,*) "mp_sendtrirr: insufficient window storage - exiting"
5939 : write(iulog,*) "r8_win%ncall_s max_irr = ", r8_win%ncall_s, max_irr
5940 : stop
5941 : endif
5942 : unitsize = r8_win%size/max_irr
5943 :
5944 : ! issue call to receive data in global receive buffer
5945 : offset_0 = (r8_win%ncall_s-1)*unitsize
5946 : offset_s = offset_0
5947 : offset_r = offset_0
5948 :
5949 : if (hs_local) then
5950 : hs_rcvids(:) = MPI_REQUEST_NULL
5951 : do ipe2=1, ceil2num
5952 : ipe = ieor(ipe2-1,comm_pid) + 1
5953 : if (ipe .gt. num_s) cycle
5954 : qsize = numtr*send_bl(ipe)%Tot_Size
5955 : if (qsize .ne. 0) then
5956 : r8_win%dest = ipe-1
5957 : send_tag = comm_pid + modcam_tagoffset
5958 : if (r8_win%dest /= comm_pid) &
5959 : call MPI_IRECV ( hs_rcv(ipe), 1, mp_i4, r8_win%dest, send_tag, comm, &
5960 : hs_rcvids(ipe), ierr )
5961 : endif
5962 : enddo
5963 : endif
5964 :
5965 : do ipe2=1, ceil2num
5966 : ipe = ieor(ipe2-1,comm_pid) + 1
5967 : if (ipe .gt. num_r) cycle
5968 : r8_win%size_r = numtr*recv_bl(ipe)%Tot_Size
5969 : if (r8_win%size_r .ne. 0) then
5970 : r8_win%offset_r = offset_r
5971 : offset_r = offset_r + r8_win%size_r
5972 : r8_win%src = ipe-1
5973 : if (numtr*unitsize >= offset_r-offset_0) then
5974 : recv_tag = r8_win%src + modcam_tagoffset
5975 : qsize = r8_win%size_r
5976 : r8_win%nrecv = r8_win%nrecv + 1
5977 : call MPI_IRECV(ga_r8_r(r8_win%offset_r+1), qsize, mp_r8, r8_win%src, &
5978 : recv_tag, comm, r8_win%rqest(r8_win%nrecv), ierror)
5979 : if (hs_local) then
5980 : if (r8_win%src /= comm_pid) &
5981 : call MPI_SEND ( hs_snd, 1, mp_i4, r8_win%src, recv_tag, comm, ierror)
5982 : endif
5983 : else
5984 : write(iulog,*) "Fatal mp_sendtrirr: receive window out of space - exiting"
5985 : write(iulog,*) 'comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
5986 : ipe, unitsize, offset_r, offset_0
5987 : stop
5988 : endif
5989 : endif
5990 : enddo
5991 : ! gather data into global send buffer
5992 : do ipe2=1, ceil2num
5993 : ipe = ieor(ipe2-1,comm_pid) + 1
5994 : if (ipe .gt. num_s) cycle
5995 : qsize = numtr*send_bl(ipe)%Tot_Size
5996 : if (qsize .ne. 0) then
5997 : r8_win%dest = ipe-1
5998 : r8_win%offset_s = offset_s
5999 : offset_s = offset_s + qsize
6000 : if (offset_s-offset_0 .gt. numtr*unitsize) then
6001 : write(iulog,*) "Fatal mp_sendtrirr: send window out of space - exiting"
6002 : write(iulog,*) 'comm_pid ipe unitsize offset_s offset_0 = ', comm_pid, &
6003 : ipe, unitsize, offset_s, offset_0
6004 : stop
6005 : endif
6006 :
6007 : offset_v(1) = r8_win%offset_s
6008 : do j = 2, send_bl(ipe)%nparcels
6009 : offset_v(j) = offset_v(j-1) + send_bl(ipe)%blocksizes(j-1)
6010 : enddo
6011 :
6012 : do j = 1, send_bl(ipe)%nparcels
6013 : do m = mbeg, mend
6014 : do i = 1, send_bl(ipe)%blocksizes(j)
6015 : mp = send_bl(ipe)%displacements(j)+i
6016 : kp = kfirsts + (mp-1)/jir
6017 : mt = (kp-kfirsts)*jir
6018 : jp = jfirsts + (mp-mt-1)/ir
6019 : ip = mp-mt - (jp-jfirsts)*ir + ifirsts-1
6020 : ga_r8_s(send_bl(ipe)%Tot_Size*(m-mbeg)+offset_v(j)+i) = qin(ip,jp,kp,m)
6021 : enddo
6022 : enddo
6023 : enddo
6024 :
6025 : ! nonblocking send
6026 : send_tag = comm_pid + modcam_tagoffset
6027 : r8_win%nsend = r8_win%nsend + 1
6028 : if (hs_local) then
6029 : if (r8_win%dest /= comm_pid) &
6030 : call MPI_WAIT ( hs_rcvids(ipe), MPI_STATUS_IGNORE, ierr )
6031 : if (send_local) then
6032 : call MPI_RSEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
6033 : send_tag, comm, ierr)
6034 : else
6035 : call MPI_IRSEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
6036 : send_tag, comm, r8_win%sqest(r8_win%nsend), ierr)
6037 : endif
6038 : else
6039 : if (send_local) then
6040 : call MPI_SEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
6041 : send_tag, comm, ierr)
6042 : else
6043 : call MPI_ISEND(ga_r8_s(r8_win%offset_s+1), qsize, mp_r8, r8_win%dest, &
6044 : send_tag, comm, r8_win%sqest(r8_win%nsend), ierr)
6045 : endif
6046 : endif
6047 : endif
6048 : enddo
6049 :
6050 : endif ! mod_method
6051 :
6052 : r8_win%ncall_s = r8_win%ncall_s + numtrm
6053 :
6054 : endif ! sw_local
6055 :
6056 : #if defined( MODCM_TIMING )
6057 : call t_stopf('mod_comm communication')
6058 : #endif
6059 :
6060 : end subroutine mp_sendtrirr
6061 : !------------------------------------------------------------------------------
6062 : !
6063 : !------------------------------------------------------------------------------
6064 : !BOP
6065 : ! !ROUTINE: mp_recvtrirr --- Finalize communication of contiguous tracer parcels
6066 : !
6067 : ! !INTERFACE:
6068 : subroutine mp_recvtrirr ( comm, send_bl, recv_bl, qin, qout, mbeg, mend, mq, &
6069 : ifirsts, ilasts, jfirsts, jlasts, kfirsts, klasts, &
6070 : ifirstr, ilastr, jfirstr, jlastr, kfirstr, klastr, &
6071 : modc )
6072 :
6073 : ! !INPUT PARAMETERS:
6074 : integer, intent(in) :: comm ! communicator
6075 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
6076 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
6077 : integer, intent(in) :: mbeg ! initial tracer index
6078 : integer, intent(in) :: mend ! final tracer index
6079 : integer, intent(in) :: mq ! total tracer indices
6080 : integer, intent(in) :: ifirsts ! first I index of source
6081 : integer, intent(in) :: ilasts ! last I index of source
6082 : integer, intent(in) :: jfirsts ! first j index of source
6083 : integer, intent(in) :: jlasts ! last j index of source
6084 : integer, intent(in) :: kfirsts ! first k index of source
6085 : integer, intent(in) :: klasts ! last k index of source
6086 : integer, intent(in) :: ifirstr ! first I index of target
6087 : integer, intent(in) :: ilastr ! last I index of target
6088 : integer, intent(in) :: jfirstr ! first j index of target
6089 : integer, intent(in) :: jlastr ! last j index of target
6090 : integer, intent(in) :: kfirstr ! first k index of target
6091 : integer, intent(in) :: klastr ! last k index of target
6092 : integer, optional, intent(in) :: modc(4) ! 1: classical, swap p2p, swap a2a
6093 : ! 2: handshake
6094 : ! 3: send vs isend
6095 : ! 4: max number of outstanding requests
6096 : real(r8), intent(in) :: qin(ifirsts:ilasts,jfirsts:jlasts,kfirsts:klasts,1:mq) ! input tracer array
6097 : ! !OUTPUT PARAMETERS:
6098 : real(r8), intent(out) :: qout(ifirstr:ilastr,jfirstr:jlastr,kfirstr:klastr,1:mq) ! output tracer array
6099 : !
6100 : ! !DESCRIPTION:
6101 : ! Complete transfer of a generalized region initiated by {\tt mp\_sendtrirr}.
6102 : ! Communicate a number of contiguous parcels to/from arbitrary set of PEs.
6103 : ! Modc(1): if 0, use original approach of posting all communications in mp_sendtrirr and
6104 : ! placing wait points here; otherwise don't do anything - mp_swaptrirr is called from mp_sendirr.
6105 : ! Modc(3): if 1, then use blocking send; otherwise use nonblocking send
6106 : !
6107 : ! !REVISION HISTORY:
6108 : ! 02.08.15 Sawyer Creation
6109 : ! 02.11.06 Mirin Optimizations
6110 : ! 03.03.03 Sawyer Now using packed arrays for MPI2
6111 : ! 04.02.24 Mirin Various mpi2 options
6112 : ! 08.09.18 Mirin Major overhaul, to include approaches from Mirin and Worley
6113 : !
6114 : !EOP
6115 : !------------------------------------------------------------------------------
6116 : !BOC
6117 : integer :: ipe, blocksize, offset_r, mod_method
6118 : integer unitsize, offset_0
6119 : integer Ierr
6120 : integer InStats(numpro*MPI_STATUS_SIZE)
6121 : integer OutStats(numpro*MPI_STATUS_SIZE)
6122 : integer i, j, num_r, num_s, m
6123 : integer :: offset_v (Max_Nparcels)
6124 : integer ipe2, ceil2num
6125 : integer numtr, numtrm
6126 : integer sw_local, maxreq_local
6127 : logical hs_local, send_local
6128 : logical sw_alltoall
6129 : integer comm_size, comm_pid
6130 : integer ip, jp, kp, mp, ir, jr, jir, mt
6131 :
6132 : if (present(modc)) then
6133 : sw_local = modc(1)
6134 : hs_local = (modc(2) .eq. 1)
6135 : send_local = (modc(3) .eq. 1)
6136 : maxreq_local = modc(4)
6137 : else
6138 : sw_local = 0
6139 : hs_local = .true.
6140 : send_local = .true.
6141 : maxreq_local = -1
6142 : endif
6143 :
6144 : ! Do not call mp_swaptrirr (hence return) unless mod_method equals 0
6145 : mod_method = recv_bl(1)%method
6146 : if (mod_method .gt. 0) sw_local = 0
6147 :
6148 : ! Return if swap_irr
6149 : if (sw_local .gt. 0) return
6150 :
6151 : #if defined( MODCM_TIMING )
6152 : call t_startf('mod_comm communication')
6153 : #endif
6154 :
6155 : call MPI_COMM_SIZE (comm, comm_size, ierr)
6156 : call MPI_COMM_RANK (comm, comm_pid, ierr)
6157 :
6158 : ceil2num = ceil2(numpro)
6159 :
6160 : numtrm = mend - mbeg
6161 : numtr = numtrm + 1
6162 :
6163 : ! num_s = 0 if this processes is not part of the sending decomposition
6164 : num_s = size(send_bl)
6165 : if (send_bl(1)%Nparcels == -1) then
6166 : num_s = 0
6167 : endif
6168 :
6169 : ! num_r = 0 if this processes is not part of the receiving decomposition
6170 : num_r = size(recv_bl)
6171 : if (recv_bl(1)%Nparcels == -1) then
6172 : num_r = 0
6173 : endif
6174 :
6175 : r8_win%ncall_r = r8_win%ncall_r + 1
6176 :
6177 : if (mod_method .gt. 0) then
6178 :
6179 : ! mpi derived types
6180 : if (r8_win%ncall_r .gt. MaxTrf-numtrm) then
6181 : write(iulog,*) "mp_recvtrirr: derived type handle count exceeded - exiting"
6182 : write(iulog,*) "r8_win%ncall_r MaxTrf = ", r8_win%ncall_r, MaxTrf
6183 : stop
6184 : endif
6185 :
6186 : if (num_s .gt. 0 .and. (.not. send_local)) then
6187 : do m = mbeg, mend
6188 : CALL MPI_WAITALL( comm_size, InHandle(:,r8_win%ncall_r+m-mbeg), InStats, Ierr )
6189 : enddo
6190 : endif
6191 : if (num_r .gt. 0) then
6192 : do m = mbeg, mend
6193 : CALL MPI_WAITALL( comm_size, OutHandle(:,r8_win%ncall_r+m-mbeg), OutStats, Ierr )
6194 : enddo
6195 : endif
6196 :
6197 : else
6198 :
6199 : ! temporary contiguous buffer / global window
6200 :
6201 : jr = jlastr - jfirstr + 1
6202 : ir = ilastr - ifirstr + 1
6203 : jir = jr * ir
6204 : if (r8_win%ncall_r .gt. max_irr-numtrm) then
6205 : write(iulog,*) "mp_recvtrirr: insufficient window storage - exiting"
6206 : write(iulog,*) "r8_win%ncall_r max_irr = ", r8_win%ncall_r, max_irr
6207 : stop
6208 : endif
6209 : unitsize = r8_win%size/max_irr
6210 :
6211 : ! scatter data from global receive buffer to final destination
6212 : offset_0 = (r8_win%ncall_r-1)*unitsize
6213 : offset_r = offset_0
6214 :
6215 : do ipe2=1, ceil2num
6216 : ipe = ieor(ipe2-1,comm_pid) + 1
6217 : if (ipe .gt. num_r) cycle
6218 : r8_win%size_r = numtr*recv_bl(ipe)%Tot_Size
6219 : if (r8_win%size_r .ne. 0) then
6220 : r8_win%offset_r = offset_r
6221 : offset_r = offset_r + r8_win%size_r
6222 : if (offset_r-offset_0 .gt. numtr*unitsize) then
6223 : write(iulog,*) "Fatal mp_recvtrirr: receive window out of space - exiting"
6224 : write(iulog,*) 'comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
6225 : ipe, unitsize, offset_r, offset_0
6226 : stop
6227 : endif
6228 :
6229 : r8_win%nread = r8_win%nread + 1
6230 : call MPI_WAIT(r8_win%rqest(r8_win%nread), Status, ierr)
6231 :
6232 : offset_v(1) = r8_win%offset_r
6233 : do j = 2, recv_bl(ipe)%Nparcels
6234 : offset_v(j) = offset_v(j-1) + recv_bl(ipe)%blocksizes(j-1)
6235 : enddo
6236 :
6237 : do j = 1, recv_bl(ipe)%Nparcels
6238 : do m = mbeg, mend
6239 : do i = 1, recv_bl(ipe)%blocksizes(j)
6240 : mp = recv_bl(ipe)%displacements(j)+i
6241 : kp = kfirstr + (mp-1)/jir
6242 : mt = (kp-kfirstr)*jir
6243 : jp = jfirstr + (mp-mt-1)/ir
6244 : ip = mp-mt - (jp-jfirstr)*ir + ifirstr-1
6245 : qout(ip,jp,kp,m) = ga_r8_r(recv_bl(ipe)%Tot_Size*(m-mbeg)+offset_v(j)+i)
6246 : enddo
6247 : enddo
6248 : enddo
6249 :
6250 : endif
6251 : enddo
6252 :
6253 : if ((r8_win%ncall_s == r8_win%ncall_r + numtrm) .and. (.not. send_local)) then
6254 : call MPI_WAITALL(r8_win%nsend, r8_win%sqest, Stats, ierror)
6255 : endif
6256 :
6257 : endif ! mod_method .gt. 0
6258 :
6259 : r8_win%ncall_r = r8_win%ncall_r + numtrm
6260 :
6261 : if (r8_win%ncall_s == r8_win%ncall_r) then
6262 : r8_win%nsend = 0
6263 : r8_win%nrecv = 0
6264 : r8_win%nread = 0
6265 : r8_win%ncall_s = 0
6266 : r8_win%ncall_r = 0
6267 : endif
6268 :
6269 : #if defined( MODCM_TIMING )
6270 : call t_stopf('mod_comm communication')
6271 : #endif
6272 :
6273 : !EOC
6274 : end subroutine mp_recvtrirr
6275 : !------------------------------------------------------------------------------
6276 : !
6277 : !------------------------------------------------------------------------------
6278 : !BOP
6279 : ! !ROUTINE: mp_swaptrirr --- Write r8 contiguous parcels to global array
6280 : ! using XOR swap ordering - for multiple tracers
6281 : !
6282 : ! !INTERFACE:
6283 : subroutine mp_swaptrirr ( comm, send_bl, recv_bl, qin, qout, mbeg, mend, mq, &
6284 : ifirsts, ilasts, jfirsts, jlasts, kfirsts, klasts, &
6285 : ifirstr, ilastr, jfirstr, jlastr, kfirstr, klastr, &
6286 : sw_handshake, sw_maxreq, sw_alltoall, sw_send )
6287 :
6288 : ! !INPUT PARAMETERS:
6289 : integer, intent(in) :: comm ! communicator
6290 : type(blockdescriptor), intent(in) :: send_bl(:) ! send blocks
6291 : type(blockdescriptor), intent(in) :: recv_bl(:) ! receive blocks
6292 : integer, intent(in) :: mbeg ! initial tracer index
6293 : integer, intent(in) :: mend ! final tracer index
6294 : integer, intent(in) :: mq ! total tracer indices
6295 : integer, intent(in) :: ifirsts ! first I index of source
6296 : integer, intent(in) :: ilasts ! last I index of source
6297 : integer, intent(in) :: jfirsts ! first j index of source
6298 : integer, intent(in) :: jlasts ! last j index of source
6299 : integer, intent(in) :: kfirsts ! first k index of source
6300 : integer, intent(in) :: klasts ! last k index of source
6301 : integer, intent(in) :: ifirstr ! first I index of target
6302 : integer, intent(in) :: ilastr ! last I index of target
6303 : integer, intent(in) :: jfirstr ! first j index of target
6304 : integer, intent(in) :: jlastr ! last j index of target
6305 : integer, intent(in) :: kfirstr ! first k index of target
6306 : integer, intent(in) :: klastr ! last k index of target
6307 : logical, optional, intent(in) :: sw_handshake ! use flow control and
6308 : ! ready send
6309 : integer, optional, intent(in) :: sw_maxreq ! maximum number of outstanding
6310 : ! MPI requests
6311 : logical, optional, intent(in) :: sw_alltoall ! use mpi_alltoall
6312 : logical, optional, intent(in) :: sw_send ! use mpi_send instead of isend
6313 : real(r8), intent(in) :: qin(ifirsts:ilasts,jfirsts:jlasts,kfirsts:klasts,1:mq) ! input tracer array
6314 :
6315 : ! !OUTPUT PARAMETERS:
6316 : real(r8), intent(out) :: qout(ifirstr:ilastr,jfirstr:jlastr,kfirstr:klastr,1:mq) ! output tracer array
6317 : !
6318 : ! !DESCRIPTION:
6319 : !
6320 : ! XOR-ordered version of all-to-all communication
6321 : !
6322 : ! WARNING: mod_comm parameter max_irr might need to be set larger than expected
6323 : ! when swapping multiple variables; specifically, max_irr must be at least
6324 : ! as large as the incoming r8_win%ncall_s + the number of variables to
6325 : ! be swapped
6326 : !
6327 : ! !REVISION HISTORY:
6328 : ! 08.06.30 Worley original: derived from mp_sendirr, but using
6329 : ! swapm logic and XOR swap order
6330 : ! 08.08.22 Worley removed swapm; reimplemented with native MPI,
6331 : ! added flow control/ready send option and maxreq
6332 : ! throttling, added alltoall option
6333 : !
6334 : ! !BUGS:
6335 : !
6336 : !EOP
6337 : !------------------------------------------------------------------------------
6338 : !BOC
6339 : !
6340 : ! !LOCAL VARIABLES:
6341 : integer :: i, j, p, istep, num_s, num_r
6342 : integer :: comm_pid, comm_size, steps, ierr
6343 : integer :: ipe, offset_s, offset_r, offset_0, unitsize
6344 :
6345 : integer :: sndlths(0:numpro-1), sdispls(0:numpro-1)
6346 : integer :: rcvlths(0:numpro-1), rdispls(0:numpro-1)
6347 : integer :: swapids(numpro)
6348 : integer :: sndids(numpro) ! nonblocking MPI send request ids
6349 : integer :: rcvids(numpro) ! nonblocking MPI recv request ids
6350 : integer :: hs_snd, hs_rcv(numpro)! handshake variables (send/receive)
6351 : integer :: hs_rcvids(numpro) ! nonblocking MPI handshake recv request ids
6352 : integer :: InStats(numpro*MPI_STATUS_SIZE)
6353 : integer :: OutStats(numpro*MPI_STATUS_SIZE)
6354 :
6355 : integer :: offset_v
6356 :
6357 : integer :: rstep
6358 :
6359 : integer :: maxreq, maxreqh
6360 : logical :: handshake, alltoall, sendd
6361 : integer :: ip, jp, kp, mp, irs, jrs, jirs, mt
6362 : integer :: numtr, numtrm, irr, jrr, jirr, m
6363 :
6364 : #if defined( MODCM_TIMING )
6365 : call t_startf('mod_comm communication')
6366 : #endif
6367 :
6368 : call MPI_COMM_SIZE (comm, comm_size, ierr)
6369 : call MPI_COMM_RANK (comm, comm_pid, ierr)
6370 :
6371 : ! num_s = 0 if this process is not part of the sending decomposition
6372 : num_s = size(send_bl)
6373 : if (send_bl(1)%Nparcels == -1) then
6374 : num_s = 0
6375 : endif
6376 :
6377 : ! num_r = 0 if this process is not part of the receiving decomposition
6378 : num_r = size(recv_bl)
6379 : if (recv_bl(1)%Nparcels == -1) then
6380 : num_r = 0
6381 : endif
6382 :
6383 : if ( present(sw_handshake) ) then
6384 : handshake = sw_handshake
6385 : hs_snd = 1
6386 : else
6387 : handshake = .false.
6388 : endif
6389 :
6390 : if ( present(sw_alltoall) ) then
6391 : alltoall = sw_alltoall
6392 : else
6393 : alltoall = .false.
6394 : endif
6395 :
6396 : if ( present(sw_send) ) then
6397 : sendd = sw_send
6398 : else
6399 : sendd = .false.
6400 : endif
6401 :
6402 : numtrm = mend - mbeg
6403 : numtr = numtrm + 1
6404 : jrs = jlasts - jfirsts + 1
6405 : irs = ilasts - ifirsts + 1
6406 : jirs = jrs * irs
6407 : jrr = jlastr - jfirstr + 1
6408 : irr = ilastr - ifirstr + 1
6409 : jirr = jrr * irr
6410 :
6411 : unitsize = r8_win%size/max_irr
6412 :
6413 : ! advance to unused portion of storage window
6414 : r8_win%ncall_s = r8_win%ncall_s + 1
6415 :
6416 : if (r8_win%ncall_s .gt. max_irr-numtrm) then
6417 : write(iulog,*) "mp_swaptrirr: insufficient window storage - exiting"
6418 : write(iulog,*) "r8_win%ncall_s max_irr = ", r8_win%ncall_s, max_irr
6419 : stop
6420 : endif
6421 :
6422 : ! calculate send lengths and displacements
6423 : offset_0 = (r8_win%ncall_s-1)*unitsize
6424 : offset_s = offset_0
6425 : sndlths(:) = 0
6426 : sdispls(:) = 0
6427 : do ipe=1, num_s
6428 : sndlths(ipe-1) = numtr*send_bl(ipe)%Tot_Size
6429 : sdispls(ipe-1) = offset_s
6430 : if (sndlths(ipe-1) .ne. 0) then
6431 :
6432 : offset_s = offset_s + sndlths(ipe-1)
6433 : if (offset_s-offset_0 .gt. numtr*unitsize) then
6434 : write(iulog,*) "Fatal mp_swaptrirr: send window out of space - exiting"
6435 : write(iulog,*) '1 comm_pid ipe unitsize offset_s offset_0 = ', comm_pid, &
6436 : ipe, unitsize, offset_s, offset_0
6437 : stop
6438 : endif
6439 : endif
6440 : enddo
6441 :
6442 : ! calculate receive lengths and displacements
6443 : offset_r = offset_0
6444 : rcvlths(:) = 0
6445 : rdispls(:) = 0
6446 : do ipe=1, num_r
6447 : rcvlths(ipe-1) = numtr*recv_bl(ipe)%Tot_Size
6448 : rdispls(ipe-1) = offset_r
6449 : if (rcvlths(ipe-1) .ne. 0) then
6450 :
6451 : offset_r = offset_r + rcvlths(ipe-1)
6452 : if (numtr*unitsize < offset_r-offset_0) then
6453 : write(iulog,*) "Fatal mp_swaptrirr: receive window out of space - exiting"
6454 : write(iulog,*) '1 comm_pid ipe unitsize offset_r offset_0 = ', comm_pid, &
6455 : ipe, unitsize, offset_r, offset_0
6456 : stop
6457 : endif
6458 : endif
6459 : enddo
6460 :
6461 : ! Calculate swap partners and number of steps in point-to-point
6462 : ! implementations of alltoall algorithm.
6463 : steps = 0
6464 : do ipe=1,ceil2(comm_size)-1
6465 : p = pair(comm_size,ipe,comm_pid)
6466 : if (p >= 0) then
6467 : if (sndlths(p) > 0 .or. rcvlths(p) > 0) then
6468 : steps = steps + 1
6469 : swapids(steps) = p
6470 : end if
6471 : end if
6472 : end do
6473 :
6474 : if (.not. alltoall) then
6475 :
6476 : sndids(1:steps) = MPI_REQUEST_NULL
6477 : rcvids(1:steps) = MPI_REQUEST_NULL
6478 :
6479 : if (steps .eq. 0) then
6480 : maxreq = 0
6481 : maxreqh = 0
6482 : elseif (steps .eq. 1) then
6483 : maxreq = 1
6484 : maxreqh = 1
6485 : else
6486 : if ( present(sw_maxreq) ) then
6487 : if ((sw_maxreq .le. steps) .and. (sw_maxreq .ge. 0)) then
6488 : maxreq = sw_maxreq
6489 : if (maxreq > 1) then
6490 : maxreqh = maxreq/2
6491 : else
6492 : maxreq = 2
6493 : maxreqh = 1
6494 : endif
6495 : else
6496 : maxreq = steps
6497 : maxreqh = steps
6498 : endif
6499 : else
6500 : maxreq = steps
6501 : maxreqh = steps
6502 : endif
6503 : endif
6504 :
6505 : ! Post initial handshake receive requests
6506 : if (handshake) then
6507 : do istep=1,maxreq
6508 : p = swapids(istep)
6509 : if (sndlths(p) > 0) then
6510 : call mpi_irecv ( hs_rcv(istep), 1, mp_i4, p, comm_pid, comm, &
6511 : hs_rcvids(istep), ierr )
6512 : endif
6513 : enddo
6514 : endif
6515 :
6516 : ! Post initial receive requests
6517 : do istep=1,maxreq
6518 : p = swapids(istep)
6519 : if (rcvlths(p) > 0) then
6520 : offset_r = rdispls(p)+1
6521 : call mpi_irecv ( ga_r8_r(offset_r), rcvlths(p), mp_r8, &
6522 : p, p, comm, rcvids(istep), ierr )
6523 : if (handshake) then
6524 : call mpi_send( hs_snd, 1, mp_i4, p, p, comm, &
6525 : ierr )
6526 : endif
6527 : endif
6528 : enddo
6529 : rstep = maxreq
6530 : !
6531 : endif
6532 :
6533 : ! gather data into global send buffer
6534 : do istep=1,steps
6535 : p = swapids(istep)
6536 :
6537 : if (sndlths(p) .ne. 0) then
6538 : offset_v = sdispls(p)
6539 : do j = 1, send_bl(p+1)%nparcels
6540 : do m = mbeg, mend
6541 : do i = 1, send_bl(p+1)%blocksizes(j)
6542 : mp = send_bl(p+1)%displacements(j)+i
6543 : kp = kfirsts + (mp-1)/jirs
6544 : mt = (kp-kfirsts)*jirs
6545 : jp = jfirsts + (mp-mt-1)/irs
6546 : ip = mp-mt - (jp-jfirsts)*irs + ifirsts-1
6547 : ga_r8_s(send_bl(p+1)%Tot_Size*(m-mbeg)+offset_v+i) = qin(ip,jp,kp,m)
6548 : enddo
6549 : enddo
6550 : offset_v = offset_v + send_bl(p+1)%blocksizes(j)
6551 : enddo
6552 : endif
6553 :
6554 : if (.not. alltoall) then
6555 :
6556 : ! Submit new i(r)send request
6557 : offset_s = sdispls(p)+1
6558 : if (sndlths(p) > 0) then
6559 : if (handshake) then
6560 : call mpi_wait( hs_rcvids(istep), MPI_STATUS_IGNORE, ierr )
6561 : if (sendd) then
6562 : call mpi_rsend( ga_r8_s(offset_s), sndlths(p), mp_r8, &
6563 : p, comm_pid, comm, ierr )
6564 : else
6565 : call mpi_irsend( ga_r8_s(offset_s), sndlths(p), mp_r8, &
6566 : p, comm_pid, comm, sndids(istep), ierr )
6567 : endif
6568 : else
6569 : if (sendd) then
6570 : call mpi_send ( ga_r8_s(offset_s), sndlths(p), mp_r8, &
6571 : p, comm_pid, comm, ierr )
6572 : else
6573 : call mpi_isend ( ga_r8_s(offset_s), sndlths(p), mp_r8, &
6574 : p, comm_pid, comm, sndids(istep), ierr )
6575 : endif
6576 : endif
6577 : endif
6578 :
6579 : if (istep > maxreqh) then
6580 : ! Wait for oldest irecv request to complete
6581 : call mpi_wait( rcvids(istep-maxreqh), OutStats, ierr )
6582 :
6583 : if (rstep < steps) then
6584 : rstep = rstep + 1
6585 : p = swapids(rstep)
6586 :
6587 : ! Submit a new handshake irecv request
6588 : if (handshake) then
6589 : if (sndlths(p) > 0) then
6590 : call mpi_irecv( hs_rcv(rstep), 1, mp_i4, p, comm_pid, comm, &
6591 : hs_rcvids(rstep), ierr )
6592 : endif
6593 : endif
6594 :
6595 : ! Submit a new irecv request
6596 : if (rcvlths(p) > 0) then
6597 : offset_r = rdispls(p)+1
6598 : call mpi_irecv( ga_r8_r(offset_r), rcvlths(p), mp_r8, &
6599 : p, p, comm, rcvids(rstep), ierr )
6600 : if (handshake) then
6601 : call mpi_send ( hs_snd, 1, mp_i4, p, p, comm, &
6602 : ierr )
6603 : endif
6604 : endif
6605 : endif
6606 :
6607 : ! Wait for outstanding i(r)send request to complete
6608 : if (.not. sendd) then
6609 : call mpi_wait( sndids(istep-maxreqh), InStats, ierr )
6610 : endif
6611 : endif
6612 : !
6613 : endif
6614 : !
6615 : enddo
6616 :
6617 : ! local copy to send buffer
6618 : if (sndlths(comm_pid) .ne. 0) then
6619 :
6620 : offset_v = sdispls(comm_pid)
6621 : do j = 1, send_bl(comm_pid+1)%nparcels
6622 : do m = mbeg, mend
6623 : do i = 1, send_bl(comm_pid+1)%blocksizes(j)
6624 : mp = send_bl(comm_pid+1)%displacements(j)+i
6625 : kp = kfirsts + (mp-1)/jirs
6626 : mt = (kp-kfirsts)*jirs
6627 : jp = jfirsts + (mp-mt-1)/irs
6628 : ip = mp-mt - (jp-jfirsts)*irs + ifirsts-1
6629 : ga_r8_s(send_bl(comm_pid+1)%Tot_Size*(m-mbeg)+offset_v+i) = qin(ip,jp,kp,m)
6630 : enddo
6631 : enddo
6632 : offset_v = offset_v + send_bl(comm_pid+1)%blocksizes(j)
6633 : enddo
6634 :
6635 : if (.not. alltoall) then
6636 : ga_r8_r(rdispls(comm_pid)+1:rdispls(comm_pid)+rcvlths(comm_pid)) = &
6637 : ga_r8_s(sdispls(comm_pid)+1:sdispls(comm_pid)+sndlths(comm_pid))
6638 : endif
6639 :
6640 : endif
6641 :
6642 : if (alltoall) then
6643 : call mpi_alltoallv (ga_r8_s, sndlths, sdispls, mp_r8, &
6644 : ga_r8_r, rcvlths, rdispls, mp_r8, &
6645 : comm, ierror)
6646 : endif
6647 :
6648 : ! local copy from receive buffer
6649 : if (rcvlths(comm_pid) .ne. 0) then
6650 :
6651 : offset_v = rdispls(comm_pid)
6652 : do j = 1, recv_bl(comm_pid+1)%Nparcels
6653 : do m = mbeg, mend
6654 : do i = 1, recv_bl(comm_pid+1)%blocksizes(j)
6655 : mp = recv_bl(comm_pid+1)%displacements(j)+i
6656 : kp = kfirstr + (mp-1)/jirr
6657 : mt = (kp-kfirstr)*jirr
6658 : jp = jfirstr + (mp-mt-1)/irr
6659 : ip = mp-mt - (jp-jfirstr)*irr + ifirstr-1
6660 : qout(ip,jp,kp,m) = ga_r8_r(recv_bl(comm_pid+1)%Tot_Size*(m-mbeg)+offset_v+i)
6661 : enddo
6662 : enddo
6663 : offset_v = offset_v + recv_bl(comm_pid+1)%blocksizes(j)
6664 : enddo
6665 :
6666 : endif
6667 :
6668 : ! scatter data from global receive buffer to final destination
6669 : do istep=1,steps
6670 : p = swapids(istep)
6671 :
6672 : if (.not. alltoall) then
6673 : if (istep > steps-maxreqh) then
6674 : call mpi_wait( rcvids(istep), OutStats, ierr )
6675 : endif
6676 : endif
6677 :
6678 : if (rcvlths(p) .ne. 0) then
6679 :
6680 : offset_v = rdispls(p)
6681 : do j = 1, recv_bl(p+1)%Nparcels
6682 : do m = mbeg, mend
6683 : do i = 1, recv_bl(p+1)%blocksizes(j)
6684 : mp = recv_bl(p+1)%displacements(j)+i
6685 : kp = kfirstr + (mp-1)/jirr
6686 : mt = (kp-kfirstr)*jirr
6687 : jp = jfirstr + (mp-mt-1)/irr
6688 : ip = mp-mt - (jp-jfirstr)*irr + ifirstr-1
6689 : qout(ip,jp,kp,m) = ga_r8_r(recv_bl(p+1)%Tot_Size*(m-mbeg)+offset_v+i)
6690 : enddo
6691 : enddo
6692 : offset_v = offset_v + recv_bl(p+1)%blocksizes(j)
6693 : enddo
6694 :
6695 : endif
6696 : enddo
6697 :
6698 : ! Wait for any outstanding send requests to complete.
6699 : if (.not. alltoall .and. .not. sendd) then
6700 : call mpi_waitall( maxreqh, sndids(steps-maxreqh+1), InStats, ierr )
6701 : endif
6702 :
6703 : ! clean-up
6704 : ! make used portion of storage window available for reuse
6705 : r8_win%ncall_s = r8_win%ncall_s - 1
6706 :
6707 : #if defined( MODCM_TIMING )
6708 : call t_stopf('mod_comm communication')
6709 : #endif
6710 :
6711 : !EOC
6712 : end subroutine mp_swaptrirr
6713 : # endif
6714 : !------------------------------------------------------------------------------
6715 : #endif
6716 0 : end module mod_comm
6717 :
|