Line data Source code
1 : #if !defined(STAND_ALONE)
2 : #endif
3 : #define _SMEMORY 1
4 : !-----------------------------------------------------------------------
5 : ! Nasa/GSFC, Data Assimilation Office, Code 910.3, GEOS/DAS
6 : !-----------------------------------------------------------------------
7 : MODULE parutilitiesmodule
8 : #if defined( SPMD )
9 : !BOP
10 : !
11 : ! !MODULE: parutilitiesmodule
12 : !
13 : ! !USES:
14 : #if defined( STAND_ALONE )
15 : # define iulog 6
16 : #else
17 : use cam_logfile, only: iulog
18 : #endif
19 : #if !defined(STAND_ALONE)
20 : USE shr_kind_mod, only: r8 => shr_kind_r8, i8 => shr_kind_i8, &
21 : r4 => shr_kind_r4
22 : #endif
23 : USE mod_comm, ONLY : commglobal, gid, numpro, blockdescriptor, max_nparcels
24 : #include "debug.h"
25 : IMPLICIT NONE
26 : #include "mpif.h"
27 : #include "pilgrim.h"
28 :
29 : !
30 : ! !PUBLIC DATA MEMBERS:
31 : PUBLIC Gsize
32 : PUBLIC INT4, REAL4, REAL8
33 : PUBLIC SUMOP, MAXOP, MINOP, BCSTOP
34 :
35 :
36 : INTEGER,SAVE :: GSize ! Size of communicator CommGlobal
37 : ! Equivalent to mod_comm::numpro
38 : #define CPP_SUM_OP 101
39 : #define CPP_MAX_OP 102
40 : #define CPP_MIN_OP 103
41 : #define CPP_BCST_OP 104
42 :
43 : INTEGER,SAVE :: INT4 = MPI_INTEGER
44 : INTEGER,SAVE :: REAL4 = MPI_REAL
45 : INTEGER,SAVE :: REAL8 = MPI_DOUBLE_PRECISION
46 : INTEGER,SAVE :: SUMOP = MPI_SUM
47 : INTEGER,SAVE :: MAXOP = MPI_MAX
48 : INTEGER,SAVE :: MINOP = MPI_MIN
49 : INTEGER,SAVE :: BCSTOP = CPP_BCST_OP
50 :
51 : ! !PUBLIC MEMBER FUNCTIONS:
52 : PUBLIC ParPatternType
53 :
54 : TYPE ParPatternType
55 : INTEGER :: Comm ! Communicator
56 : INTEGER :: Iam ! My rank in communicator
57 : INTEGER :: Size ! Size of communicator
58 : TYPE(BlockDescriptor), POINTER :: SendDesc(:) ! Array of descriptors
59 : TYPE(BlockDescriptor), POINTER :: RecvDesc(:) ! Array of descriptors
60 : END TYPE ParPatternType
61 :
62 :
63 : #ifdef _SMEMORY
64 : TYPE ParInfoType
65 : INTEGER :: numRecvSeg ! number of received segments
66 : INTEGER :: numSendSeg ! number of send segments
67 : INTEGER :: maxNumSeg ! maximum number of segments over all processors
68 : INTEGER :: numRecvNeigh ! number of receive neighbors
69 : INTEGER :: numSendNeigh ! number of send neighbors
70 : END TYPE ParInfoType
71 : #endif
72 :
73 : PUBLIC ParInit, ParSplit, ParFree, ParExit
74 : PUBLIC ParScatter, ParGather
75 : PUBLIC ParBeginTransfer, ParEndTransfer
76 : PUBLIC ParExchangeVector, ParCollective
77 : PUBLIC ParPatternCreate, ParPatternFree
78 :
79 : INTERFACE ParPatternCreate
80 : MODULE PROCEDURE ParPatternCopy
81 : MODULE PROCEDURE ParPatternGhost
82 : MODULE PROCEDURE ParPatternDecompToDecomp
83 : MODULE PROCEDURE ParPatternDecompToGhost
84 : MODULE PROCEDURE ParPatternGhostToDecomp
85 : MODULE PROCEDURE ParPatternGhostToGhost
86 : END INTERFACE
87 :
88 : INTERFACE ParScatter
89 : MODULE PROCEDURE ParScatterReal
90 : MODULE PROCEDURE ParScatterReal4
91 : MODULE PROCEDURE ParScatterInt
92 : END INTERFACE
93 :
94 : INTERFACE ParGather
95 : MODULE PROCEDURE ParGatherReal
96 : MODULE PROCEDURE ParGatherReal4
97 : MODULE PROCEDURE ParGatherInt
98 : END INTERFACE
99 :
100 : INTERFACE ParBeginTransfer
101 : MODULE PROCEDURE ParBeginTransferReal
102 : MODULE PROCEDURE ParBeginTransferPattern1D
103 : MODULE PROCEDURE ParBeginTransferPattern1Dint
104 : MODULE PROCEDURE ParBeginTransferPattern2D
105 : MODULE PROCEDURE ParBeginTransferPattern3D
106 : MODULE PROCEDURE ParBeginTransferPattern4D
107 : ! MODULE PROCEDURE ParBeginTransferInt
108 : END INTERFACE
109 :
110 : INTERFACE ParEndTransfer
111 : MODULE PROCEDURE ParEndTransferReal
112 : MODULE PROCEDURE ParEndTransferPattern1D
113 : MODULE PROCEDURE ParEndTransferPattern1Dint
114 : MODULE PROCEDURE ParEndTransferPattern2D
115 : MODULE PROCEDURE ParEndTransferPattern3D
116 : MODULE PROCEDURE ParEndTransferPattern4D
117 : ! MODULE PROCEDURE ParEndTransferInt
118 : END INTERFACE
119 :
120 : INTERFACE ParExchangeVector
121 : MODULE PROCEDURE ParExchangeVectorReal
122 : MODULE PROCEDURE ParExchangeVectorReal4
123 : MODULE PROCEDURE ParExchangeVectorInt
124 : END INTERFACE
125 :
126 : INTERFACE ParCollective
127 : MODULE PROCEDURE ParCollectiveBarrier
128 : MODULE PROCEDURE ParCollective0D
129 : MODULE PROCEDURE ParCollective1D
130 : MODULE PROCEDURE ParCollective1DReal4
131 : MODULE PROCEDURE ParCollective2D
132 : MODULE PROCEDURE ParCollective2DReal4
133 : MODULE PROCEDURE ParCollective3D
134 : MODULE PROCEDURE ParCollective0DInt
135 : MODULE PROCEDURE ParCollective0DStr
136 : MODULE PROCEDURE ParCollective1DInt
137 : MODULE PROCEDURE ParCollective1DStr
138 : MODULE PROCEDURE ParCollective2DInt
139 : END INTERFACE
140 :
141 : #ifdef _SMEMORY
142 : INTERFACE ParCalcInfo
143 : MODULE PROCEDURE ParCalcInfoDecompToGhost
144 : MODULE PROCEDURE ParCalcInfoDecompToDecomp
145 : MODULE PROCEDURE ParCalcInfoGhostToGhost
146 : MODULE PROCEDURE ParCalcInfoGhostToDecomp
147 : END INTERFACE
148 : #endif
149 :
150 : !
151 : ! !DESCRIPTION:
152 : !
153 : ! This module provides the basic utilities to support parallelism
154 : ! on a distributed or shared memory multiprocessor.
155 : !
156 : ! \begin{center}
157 : ! \begin{tabular}{|l|l|} \hline \hline
158 : ! ParInit & Initialize the parallel system \\ \hline
159 : ! ParExit & Exit from the parallel system \\ \hline
160 : ! ParSplit & Create a Compute grid of PEs \\ \hline
161 : ! ParFree & Free a split communicator \\ \hline
162 : ! ParScatter & Scatter global slice to local slices \\ \hline
163 : ! ParGather & Gather local slices to one global \\ \hline
164 : ! ParBeginTransfer & Initiate an all-to-all packet transfer \\ \hline
165 : ! ParEndTransfer & Complete an all-to-all packet transfer \\ \hline
166 : ! ParExchangeVector & Complete an all-to-all packet transfer \\ \hline
167 : ! ParCollective & Collective operation across communicator \\ \hline
168 : ! \end{tabular}
169 : ! \end{center}
170 : ! \vspace{2mm}
171 : !
172 : ! Other utilities can be added to this module as needs evolve.
173 : !
174 : ! Conceptually the intention is to aggregate as many of the
175 : ! MPI communication calls as possible into a well-maintained
176 : ! module. This will help avoid the occurrence of MPI spaghetti
177 : ! code.
178 : !
179 : ! This module is tailored to GEOS DAS and implements the
180 : ! design of Lucchesi/Mirin/Sawyer/Larson.
181 : !
182 : ! !REVISION HISTORY:
183 : ! 97.02.01 Sawyer Creation
184 : ! 97.07.22 Sawyer Removal of DecompType related subroutines
185 : ! 97.08.13 Sawyer Added ParScatter/Gather for Integers
186 : ! 97.09.26 Sawyer Additions of Sparse communication primitives
187 : ! 97.12.01 Sawyer Changed all MPI_SSEND to MPI_ISEND
188 : ! 97.12.23 Lucchesi Added member variables IsIONode and InterComm
189 : ! 98.01.06 Sawyer Additions from RL for I/O Nodes
190 : ! 98.02.02 Sawyer Added the Cartesian data members
191 : ! 98.02.05 Sawyer Removed the use of intercommunicators
192 : ! 98.02.23 Sawyer Added ghosting utilities
193 : ! 98.02.25 Sawyer Modified interface of BeginTransfer
194 : ! 98.03.03 Sawyer Added Global ID number to public data members
195 : ! 98.03.25 Sawyer Added documentation for walkthrough
196 : ! 98.04.16 Sawyer Removed all use of MPI_CART (CommRow redefined)
197 : ! 98.07.23 Sawyer Added ParGhost, ParPoleDot; ParBegin/EndGhost out
198 : ! 98.09.15 Sawyer Added ParMerge, ParPoleGhost
199 : ! 98.09.17 Sawyer Added ParSum, removed ParPoleDot
200 : ! 99.01.18 Sawyer Minor cleaning
201 : ! 99.03.04 Sawyer Revised SHMEM concept for Transfer
202 : ! 99.04.22 Sawyer Removed COMMON for handles -- they are
203 : ! always used in same program unit.
204 : ! 99.05.21 Sawyer Reintroduced barriers in Scatter/Gather
205 : ! 99.06.03 Sawyer USE_SHMEM revisions
206 : ! 99.12.10 Sawyer ParInit now sets GID, Gsize
207 : ! 99.12.13 Sawyer Version slimmed down for FVCCM release
208 : ! 00.06.14 Sawyer Precision module now used
209 : ! 00.07.07 Sawyer Removed 2D scatter/gather; simplified API
210 : ! 00.07.30 Sawyer Full implementation with shared memory
211 : ! 00.08.09 Sawyer Replaced ParSum with ParCollective
212 : ! 00.08.28 Sawyer Moved LLNL 2D data to LLNL2DModule; new MLP impl
213 : ! 01.02.04 Sawyer Added PatternType and related routines
214 : ! 01.02.12 Sawyer Converted to free format
215 : ! 02.10.30 Sawyer Welded with mod_comm
216 : ! 03.03.06 Sawyer Fix parpatterncreate for MPI2; use MPI_DATATYPE_NULL
217 : ! 05.10.12 Worley Support for vectorization modifications in mod_comm
218 : ! 06.03.01 Sawyer Merged CAM and GEOS5 versions
219 : ! 07.01.05 Mirin Eliminated direct use of Gsize
220 : ! 07.09.04 Dennis Reduced temporary memory usage
221 : !
222 : ! !BUGS:
223 : ! There are several MPI_Barriers at locations in the code.
224 : ! These avoid potential race conditions which probably only occur
225 : ! if the number of real processors is less than the number of
226 : ! message passing processes. Remove these barriers at your own risk
227 : !
228 : !EOP
229 :
230 : INTEGER, SAVE :: InHandle(MAX_PAX, MAX_SMP, MAX_TRF)
231 : INTEGER, SAVE :: OutHandle(MAX_PAX,MAX_SMP, MAX_TRF)
232 : INTEGER, SAVE :: BegTrf = 0 ! Ongoing overlapped begintransfer #
233 : INTEGER, SAVE :: EndTrf = 0 ! Ongoing overlapped endtransfer #
234 : LOGICAL, SAVE :: Initialized = .FALSE. ! Flag for initialization of parutilitiesmodule.
235 :
236 : CONTAINS
237 : !-----------------------------------------------------------------------
238 :
239 :
240 : !-----------------------------------------------------------------------
241 : !BOP
242 : ! !IROUTINE: ParInit --- Initialize the parallel execution
243 : !
244 : ! !INTERFACE:
245 768 : SUBROUTINE ParInit ( Comm, npryzxy, mod_method, mod_geopk, mod_gatscat, mod_maxirr )
246 : !
247 : ! !USES:
248 : USE mod_comm, ONLY : mp_init
249 : IMPLICIT NONE
250 : ! !INPUT PARAMETERS:
251 : INTEGER, OPTIONAL :: Comm
252 : INTEGER, OPTIONAL, INTENT(IN) :: npryzxy(4) ! 2D decompositions
253 : INTEGER, OPTIONAL, INTENT(IN) :: mod_method ! CAM optimization
254 : INTEGER, OPTIONAL, INTENT(IN) :: mod_geopk ! CAM optimization
255 : INTEGER, OPTIONAL, INTENT(IN) :: mod_gatscat ! CAM optimization
256 : INTEGER, OPTIONAL, INTENT(IN) :: mod_maxirr ! CAM max simul. trsps.
257 :
258 : !
259 : ! !DESCRIPTION:
260 : ! Initializes the system. In MPI mode, call MPI\_INIT if not done
261 : ! already. If the optional arguments are not provided, default
262 : ! values will be chosen. But it is advisable to provide COMM
263 : ! (main communicator) and NPRYZXY (internal 2D decomposition).
264 : !
265 : ! !SYSTEM ROUTINES:
266 : ! MPI_INITIALIZED, MPI_INIT
267 : !
268 : ! !REVISION HISTORY:
269 : ! 97.03.20 Sawyer Creation
270 : ! 97.04.16 Sawyer Cleaned up for walk-through
271 : ! 97.07.03 Sawyer Reformulated documentation
272 : ! 00.07.23 Sawyer Added shared memory arena implementation
273 : ! 02.10.30 Sawyer Now uses mp_init from mod_comm
274 : ! 06.06.15 Sawyer Added CAM optimizations (passed to mod_comm)
275 : !
276 : !EOP
277 : !-----------------------------------------------------------------------
278 : !BOC
279 :
280 : ! Initialize mod_comm
281 :
282 768 : IF (.NOT. Initialized) THEN
283 768 : CALL mp_init( Comm, npryzxy, mod_method, mod_geopk, mod_gatscat, mod_maxirr )
284 768 : Gsize = numpro ! Support PILGRIM's Gsize for now
285 768 : Initialized = .TRUE.
286 : ENDIF
287 :
288 768 : RETURN
289 : !EOC
290 : END SUBROUTINE ParInit
291 : !-----------------------------------------------------------------------
292 :
293 :
294 : !-----------------------------------------------------------------------
295 : !BOP
296 : ! !IROUTINE: ParExit --- Finalize the parallel execution
297 : !
298 : ! !INTERFACE:
299 0 : SUBROUTINE ParExit ( Comm )
300 :
301 : ! !USES:
302 : USE mod_comm, ONLY: mp_exit
303 : IMPLICIT NONE
304 :
305 : ! !INPUT PARAMETERS:
306 : INTEGER, OPTIONAL :: Comm
307 :
308 : ! !DESCRIPTION:
309 : ! All PEs, compute nodes and IO nodes alike meet here to terminate
310 : ! themselves. If someone does not check in, everything will hang
311 : ! here.
312 : !
313 : ! This routine is the very {\em last} thing which is executed!
314 : !
315 : ! !LOCAL VARIABLES:
316 : INTEGER Ierror
317 : !
318 : ! !SYSTEM ROUTINES:
319 : ! MPI_BARRIER, MPI_FINALIZE
320 : !
321 : ! !REVISION HISTORY:
322 : ! 97.03.20 Sawyer Creation
323 : ! 97.04.16 Sawyer Cleaned up for walk-through
324 : ! 97.07.03 Sawyer Reformulated documentation
325 : ! 00.07.23 Sawyer Added shared memory arena implementation
326 : ! 02.08.13 Sawyer Incorporated mod_comm for low level comm.
327 : !
328 : !EOP
329 : !-----------------------------------------------------------------------
330 : !BOC
331 0 : CALL mp_exit(Comm)
332 0 : RETURN
333 : !EOC
334 : END SUBROUTINE ParExit
335 : !-----------------------------------------------------------------------
336 :
337 :
338 : !-----------------------------------------------------------------------
339 : !BOP
340 : ! !IROUTINE: ParSplit --- Split into group for I/O and computation
341 : !
342 : ! !INTERFACE:
343 3072 : SUBROUTINE ParSplit( InComm, Color, InID, Comm, MyID, Nprocs )
344 : !
345 : ! !USES:
346 : IMPLICIT NONE
347 :
348 : ! !INPUT PARAMETERS:
349 : INTEGER, INTENT( IN ) :: InComm ! Communicator to split
350 : INTEGER, INTENT( IN ) :: Color ! Group label
351 : INTEGER, INTENT( IN ) :: InID ! Input ID
352 :
353 : ! !OUTPUT PARAMETERS:
354 : INTEGER, INTENT( OUT ) :: Comm ! Split communicator
355 : INTEGER, INTENT( OUT ) :: MyID ! Group label
356 : INTEGER, INTENT( OUT ) :: Nprocs ! Number of PEs in my group
357 : !
358 : ! !DESCRIPTION:
359 : ! This routine splits the PEs into groups. This is currently only
360 : ! supported in MPI mode. Read the chapter on MPI\_COMM\_SPLIT
361 : ! thoroughly.
362 : !
363 : ! !SYSTEM ROUTINES:
364 : ! MPI_COMM_SPLIT, MPI_COMM_SIZE, MPI_COMM_RANK
365 : !
366 : ! !REVISION HISTORY:
367 : ! 97.03.20 Sawyer Creation
368 : ! 97.04.16 Sawyer Cleaned up for walk-through
369 : ! 97.07.03 Sawyer Reformulated documentation
370 : ! 97.12.01 Sawyer Xnodes and Ynodes are explicit arguments
371 : ! 97.12.23 Lucchesi Added call to MPI_INTERCOMM_CREATE
372 : ! 98.01.06 Sawyer Additions from RL for I/O Nodes
373 : ! 98.02.02 Sawyer Added the Cartesian information
374 : ! 98.02.05 Sawyer Removed the use of intercommunicators
375 : ! 98.04.16 Sawyer Removed all use of MPI_CART (CommRow redefined)
376 : ! 99.01.10 Sawyer CommRow now defined for all rows
377 : ! 00.07.09 Sawyer Removed 2D computational mesh
378 : ! 00.08.08 Sawyer Redefined as wrapper to mpi_comm_split
379 : !
380 : !EOP
381 : !-----------------------------------------------------------------------
382 : !BOC
383 : ! !LOCAL VARIABLES:
384 : INTEGER Ierror
385 :
386 : CPP_ENTER_PROCEDURE( "PARSPLIT" )
387 : !
388 : ! Split the communicators
389 : !
390 3072 : CALL MPI_COMM_SPLIT( InComm, Color, InID, Comm, Ierror )
391 3072 : IF ( Comm .ne. MPI_COMM_NULL ) THEN
392 3072 : CALL MPI_COMM_RANK( Comm, MyID, Ierror )
393 3072 : CALL MPI_COMM_SIZE( Comm, Nprocs, Ierror )
394 : ELSE
395 : !
396 : ! This PE does not participate: mark with impossible values
397 : !
398 0 : MyID = -1
399 0 : Nprocs = -1
400 : ENDIF
401 :
402 : CPP_LEAVE_PROCEDURE( "PARSPLIT" )
403 3072 : RETURN
404 : !EOC
405 : END SUBROUTINE ParSplit
406 : !-----------------------------------------------------------------------
407 :
408 :
409 : !-----------------------------------------------------------------------
410 : !BOP
411 : ! !IROUTINE: ParFree --- Free a communicator
412 : !
413 : ! !INTERFACE:
414 0 : SUBROUTINE ParFree( InComm )
415 : !
416 : ! !USES:
417 : IMPLICIT NONE
418 :
419 : ! !INPUT PARAMETERS:
420 : INTEGER InComm
421 :
422 : !
423 : ! !DESCRIPTION:
424 : ! This routine frees a communicator created with ParSplit
425 : !
426 : ! !REVISION HISTORY:
427 : ! 97.09.11 Sawyer Creation, to complement ParSplit
428 : ! 00.07.24 Sawyer Revamped ParMerge into a free communicator
429 : !
430 : ! !LOCAL VARIABLES:
431 : INTEGER Ierror
432 : !
433 : !EOP
434 : !-----------------------------------------------------------------------
435 : !BOC
436 : CPP_ENTER_PROCEDURE( "PARFREE" )
437 : !
438 0 : CALL MPI_COMM_FREE( InComm, Ierror )
439 : CPP_LEAVE_PROCEDURE( "PARFREE" )
440 0 : RETURN
441 : !EOC
442 : END SUBROUTINE ParFree
443 : !-----------------------------------------------------------------------
444 :
445 :
446 : !-----------------------------------------------------------------------
447 : !BOP
448 : ! !IROUTINE: ParPatternCopy --- Duplicate/replicate a comm pattern
449 : !
450 : ! !INTERFACE:
451 6144 : SUBROUTINE ParPatternCopy( InComm, PatternIn, PatternOut, Multiplicity )
452 : !
453 : ! !USES:
454 : USE mod_comm, ONLY : get_partneroffset
455 : IMPLICIT NONE
456 :
457 : ! !INPUT PARAMETERS:
458 : INTEGER, INTENT( IN ) :: InComm ! # of PEs
459 : TYPE(ParPatternType), INTENT( IN ) :: PatternIn ! Comm Pattern
460 : INTEGER, INTENT( IN ), OPTIONAL :: Multiplicity
461 :
462 : ! !OUTPUT PARAMETERS:
463 : TYPE(ParPatternType), INTENT( OUT ) :: PatternOut ! Comm Pattern
464 : !
465 : ! !DESCRIPTION:
466 : ! This routine duplicates a given communication pattern.
467 : !
468 : ! Optionally a multiplicity can be added. This replicates the
469 : ! communication pattern Mult times, that is for the case that
470 : ! the data structures are replicated in the final dimension
471 : ! Mult times. A typical example is a pattern describing a 2D
472 : ! array, e.g. a a lat-lon decomposition, which will be used
473 : ! to copy a 3D lat-lon-lev array. The strides (e.g. the number
474 : ! of elements in one plane) of the source (send) and target
475 : ! (recv) arrays are now calculated internally.
476 : !
477 : ! !SYSTEM ROUTINES:
478 : ! MPI_TYPE_UB, MPI_TYPE_HVECTOR, MPI_TYPE_COMMIT
479 : !
480 : ! !REVISION HISTORY:
481 : ! 03.03.20 Sawyer Creation
482 : ! 03.06.26 Sawyer Removed StrideSend/Recv from API
483 : !
484 : !EOP
485 : !-----------------------------------------------------------------------
486 : !BOC
487 : ! !LOCAL VARIABLES:
488 : INTEGER Stride_S, Stride_R, Mult, Iam, GroupSize, Ipe, Ierror
489 : INTEGER Disp, Length, I, J, ub, method
490 :
491 : CPP_ENTER_PROCEDURE( "PARPATTERNCOPY" )
492 :
493 6144 : method = PatternIn%RecvDesc(1)%method
494 :
495 : !
496 : ! Decide if this is a simple copy, or a multiple replication
497 : !
498 6144 : IF ( present(Multiplicity) ) THEN
499 6144 : Mult = Multiplicity
500 : ELSE
501 0 : Mult = 1
502 : ENDIF
503 :
504 6144 : CALL MPI_COMM_DUP( PatternIn%Comm, PatternOut%Comm, Ierror )
505 6144 : CALL MPI_COMM_SIZE( PatternIn%Comm, GroupSize, Ierror )
506 6144 : CALL MPI_COMM_RANK( PatternIn%Comm, Iam, Ierror )
507 :
508 6144 : PatternOut%Iam = Iam
509 6144 : PatternOut%Size = GroupSize
510 :
511 18432 : ALLOCATE( PatternOut%SendDesc( GroupSize ) )
512 12288 : ALLOCATE( PatternOut%RecvDesc( GroupSize ) )
513 :
514 2365440 : PatternOut%SendDesc(:)%method = PatternIn%SendDesc(:)%method
515 2365440 : PatternOut%RecvDesc(:)%method = PatternIn%RecvDesc(:)%method
516 : !
517 : ! Determine the strides which are by construction the maximum upper
518 : ! bound of all the derived types. This is due to the fact that
519 : ! there are no 'holes' in the data types: even if one PE does not
520 : ! send to any other PEs, it will still have a data type for 'sending'
521 : ! data to itself.
522 : !
523 6144 : Stride_S = 0
524 6144 : Stride_R = 0
525 2365440 : DO Ipe=1, GroupSize
526 2359296 : IF ( PatternIn%SendDesc(Ipe)%type /= MPI_DATATYPE_NULL ) THEN
527 : CALL MPI_TYPE_UB( PatternIn%SendDesc(Ipe)%type, ub, ierror )
528 0 : Stride_S = max(Stride_S,ub)
529 : ENDIF
530 2365440 : IF ( PatternIn%RecvDesc(Ipe)%type /= MPI_DATATYPE_NULL ) THEN
531 : CALL MPI_TYPE_UB( PatternIn%RecvDesc(Ipe)%type, ub, ierror )
532 0 : Stride_R = max(Stride_R,ub)
533 : ENDIF
534 : ENDDO
535 :
536 : !
537 : ! Determine the output data types
538 : !
539 2365440 : DO Ipe=1, GroupSize
540 2359296 : IF ( PatternIn%SendDesc(ipe)%type /= MPI_DATATYPE_NULL ) THEN
541 : CALL MPI_TYPE_HVECTOR( Mult, 1, Stride_S, PatternIn%SendDesc(Ipe)%type,&
542 0 : PatternOut%SendDesc(Ipe)%type, Ierror )
543 0 : CALL MPI_TYPE_COMMIT( PatternOut%SendDesc(Ipe)%type, Ierror )
544 : ELSE
545 2359296 : PatternOut%SendDesc(ipe)%type = MPI_DATATYPE_NULL
546 : ENDIF
547 2365440 : IF ( PatternIn%RecvDesc(Ipe)%type /= MPI_DATATYPE_NULL ) THEN
548 : CALL MPI_TYPE_HVECTOR( Mult, 1, Stride_R, PatternIn%RecvDesc(Ipe)%type,&
549 0 : PatternOut%RecvDesc(Ipe)%type, Ierror )
550 0 : CALL MPI_TYPE_COMMIT( PatternOut%RecvDesc(Ipe)%type, Ierror )
551 : ELSE
552 2359296 : PatternOut%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
553 : ENDIF
554 : ENDDO
555 :
556 : !
557 : ! Determine the stride, which is the sum of all the blocksizes for all
558 : ! the derived types (there are no 'holes').
559 : !
560 6144 : Stride_S = 0
561 6144 : Stride_R = 0
562 2365440 : DO Ipe=1, GroupSize
563 2377728 : Stride_S = Stride_S + sum( PatternIn%SendDesc(ipe)%BlockSizes(:) )
564 2383872 : Stride_R = Stride_R + sum( PatternIn%RecvDesc(ipe)%BlockSizes(:) )
565 : ENDDO
566 :
567 2365440 : DO ipe=1, GroupSize
568 2359296 : Length = SIZE(PatternIn%SendDesc(ipe)%BlockSizes)
569 4724736 : ALLOCATE( PatternOut%SendDesc(ipe)%Displacements(Length*Mult) )
570 2365440 : ALLOCATE( PatternOut%SendDesc(ipe)%BlockSizes(Length*Mult) )
571 : #if defined( DEBUG_PARPATTERNCOPY )
572 : write(iulog,*) "Multiplicity", Mult
573 : write(iulog,*) "Old send blocksizes", PatternIn%SendDesc(ipe)%BlockSizes
574 : #endif
575 2377728 : DO i=1, Length
576 18432 : Disp = PatternIn%SendDesc(ipe)%Displacements(i)
577 4783104 : DO j=1, Mult
578 0 : PatternOut%SendDesc(ipe)%BlockSizes(i+(j-1)*Length) = &
579 2405376 : PatternIn%SendDesc(ipe)%BlockSizes(i)
580 2405376 : PatternOut%SendDesc(ipe)%Displacements(i+(j-1)*Length) = Disp
581 2423808 : Disp = Disp + Stride_S
582 : ENDDO
583 : ENDDO
584 0 : PatternOut%SendDesc(ipe)%Nparcels = &
585 2359296 : size (PatternOut%SendDesc(ipe)%Displacements)
586 0 : PatternOut%SendDesc(ipe)%Tot_Size = &
587 4764672 : sum (PatternOut%SendDesc(ipe)%Blocksizes)
588 2359296 : Max_Nparcels = max (Max_Nparcels, PatternOut%SendDesc(ipe)%Nparcels)
589 : #if defined( DEBUG_PARPATTERNCOPY )
590 : write(iulog,*) "Send blocksizes", PatternOut%SendDesc(ipe)%BlockSizes
591 : write(iulog,*) "Old recv blocksizes", PatternIn%RecvDesc(ipe)%BlockSizes
592 : #endif
593 2359296 : Length = SIZE(PatternIn%RecvDesc(ipe)%BlockSizes)
594 4724736 : ALLOCATE( PatternOut%RecvDesc(ipe)%Displacements(Length*Mult) )
595 2365440 : ALLOCATE( PatternOut%RecvDesc(ipe)%BlockSizes(Length*Mult) )
596 2377728 : DO i=1, Length
597 18432 : Disp = PatternIn%RecvDesc(ipe)%Displacements(i)
598 4783104 : DO j=1, Mult
599 0 : PatternOut%RecvDesc(ipe)%BlockSizes(i+(j-1)*Length) = &
600 2405376 : PatternIn%RecvDesc(ipe)%BlockSizes(i)
601 2405376 : PatternOut%RecvDesc(ipe)%Displacements(i+(j-1)*Length) = Disp
602 2423808 : Disp = Disp + Stride_R
603 : ENDDO
604 : ENDDO
605 0 : PatternOut%RecvDesc(ipe)%Nparcels = &
606 2359296 : size (PatternOut%RecvDesc(ipe)%Displacements)
607 0 : PatternOut%RecvDesc(ipe)%Tot_Size = &
608 4764672 : sum (PatternOut%RecvDesc(ipe)%Blocksizes)
609 2365440 : Max_Nparcels = max (Max_Nparcels, PatternOut%RecvDesc(ipe)%Nparcels)
610 : #if defined( DEBUG_PARPATTERNCOPY )
611 : write(iulog,*) "Recv blocksizes", PatternOut%RecvDesc(ipe)%BlockSizes
612 : #endif
613 : ENDDO
614 :
615 6144 : CALL get_partneroffset( InComm, PatternOut%SendDesc, PatternOut%RecvDesc )
616 :
617 : CPP_LEAVE_PROCEDURE( "PARPATTERNCOPY" )
618 6144 : RETURN
619 : !EOC
620 : END SUBROUTINE ParPatternCopy
621 : !-----------------------------------------------------------------------
622 :
623 : !-----------------------------------------------------------------------
624 : !BOP
625 : ! !IROUTINE: ParPatternGhost --- Create pattern for given ghosting
626 : !
627 : ! !INTERFACE:
628 0 : SUBROUTINE ParPatternGhost( InComm, Ghost, Pattern, mod_method, T )
629 : !
630 : ! !USES:
631 : USE decompmodule, ONLY : DecompGlobalToLocal, DecompLocalToGlobal
632 : USE ghostmodule, ONLY : GhostType, GhostInfo
633 : USE mod_comm, ONLY : get_partneroffset
634 : IMPLICIT NONE
635 :
636 : ! !INPUT PARAMETERS:
637 : INTEGER, INTENT( IN ) :: InComm ! # of PEs
638 : TYPE(GhostType), INTENT( IN ) :: Ghost ! # of PEs
639 : INTEGER, INTENT( IN ), OPTIONAL :: mod_method ! contiguous or derived type
640 : INTEGER, INTENT( IN ), OPTIONAL :: T !
641 :
642 : ! !OUTPUT PARAMETERS:
643 : TYPE(ParPatternType), INTENT( OUT ) :: Pattern ! Comm Pattern
644 : !
645 : ! !DESCRIPTION:
646 : ! This routine contructs a communication pattern from the ghost
647 : ! region definition. That is, the resulting communication pattern
648 : ! can be used in ParBegin/EndTransfer with the ghosted arrays as
649 : ! inputs.
650 : !
651 : ! !SYSTEM ROUTINES:
652 : ! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_COMM_DUP
653 : ! MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
654 : !
655 : ! !REVISION HISTORY:
656 : ! 01.02.10 Sawyer Creation
657 : ! 01.06.02 Sawyer Renamed ParPatternGhost
658 : ! 02.06.27 Sawyer Added data type "T" as optional argument
659 : ! 03.03.04 Sawyer Set partneroffsets field
660 : ! 03.11.11 Mirin Added optional argument mod_method
661 : !
662 : !EOP
663 : !-----------------------------------------------------------------------
664 : !BOC
665 : ! !LOCAL VARIABLES:
666 : INTEGER i, j, ipe, pe, Iam, GroupSize, Num, Length, Ptr, Ierror
667 : INTEGER Global, End, Local, GlobalSize, LocalSize, BorderSize
668 : INTEGER DataType
669 0 : INTEGER, ALLOCATABLE :: InVector(:), OutVector(:)
670 0 : INTEGER, ALLOCATABLE :: LenInVector(:), LenOutVector(:)
671 : INTEGER :: method
672 :
673 : CPP_ENTER_PROCEDURE( "PARPATTERNGHOST" )
674 :
675 0 : IF (present(T)) THEN
676 0 : DataType = T
677 : ELSE
678 0 : DataType = CPP_MPI_REAL8
679 : ENDIF
680 :
681 0 : IF (present(mod_method)) THEN
682 0 : method = mod_method
683 : ELSE
684 : method = 0 ! Default method - see mod_comm for description
685 : ENDIF
686 : !
687 : ! First request the needed ghost values from other processors.
688 : !
689 0 : CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
690 0 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
691 0 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
692 :
693 0 : Pattern%Iam = Iam
694 0 : Pattern%Size = GroupSize
695 :
696 0 : ALLOCATE( Pattern%SendDesc( GroupSize ) )
697 0 : ALLOCATE( Pattern%RecvDesc( GroupSize ) )
698 :
699 0 : Pattern%SendDesc(:)%method = method
700 0 : Pattern%RecvDesc(:)%method = method
701 :
702 : !
703 : ! Temporary variables
704 : !
705 0 : ALLOCATE( LenInVector( GroupSize ) )
706 0 : ALLOCATE( LenOutVector( GroupSize ) )
707 :
708 0 : CALL GhostInfo( Ghost,GroupSize,GlobalSize,LocalSize,BorderSize )
709 0 : ALLOCATE( InVector( 2*BorderSize ) )
710 0 : ALLOCATE( OutVector( 2*LocalSize ) )
711 :
712 : !
713 : ! A rather complicated loop to define the local ghost region.
714 : ! The concept is the following: go through all the points in the
715 : ! border data structure. It contains global indices of the points
716 : ! which have to be copied over from neighboring PEs. These indices
717 : ! are collected into InVector for transmission to those PEs, in
718 : ! effect informing them of the local PEs requirements.
719 : !
720 : ! A special case is supported: if the ghost domain wraps around
721 : ! onto the domain of the local PE! This is very tricky, because
722 : ! the index space in both Ghost%Border and Ghost%Local MUST be
723 : ! unique for DecompGlobalToLocal to work. Solution: ghost
724 : ! points are marked with the negative value of the needed domain
725 : ! value in both Ghost%Border and Ghost%Local. These are "snapped
726 : ! over" to the true global index with the ABS function, so that
727 : ! they can be subsequently found in the true local domain.
728 : !
729 0 : j = 1
730 0 : DO ipe=1, GroupSize
731 0 : Num = SIZE(Ghost%Border%Head(ipe)%StartTags)
732 0 : Length = 0
733 0 : DO i = 1, Num
734 0 : Global = Ghost%Border%Head(ipe)%StartTags(i)
735 0 : IF ( Global /= 0 ) THEN
736 0 : Length = Length + 1
737 0 : End = Ghost%Border%Head(ipe)%EndTags(i)
738 0 : InVector(j) = ABS(Global)
739 0 : InVector(j+1) = ABS(End)
740 0 : CALL DecompGlobalToLocal( Ghost%Local, Global, Local, Pe )
741 0 : OutVector(Length) = Local-1 ! Zero-based address
742 0 : OutVector(Length+Num) = End - Global+1 ! Parcel size
743 0 : j = j + 2
744 : ENDIF
745 : ENDDO
746 0 : LenInVector(ipe) = 2*Length
747 :
748 : !
749 : ! Set the receive buffer descriptor
750 : !
751 : #if defined(DEBUG_PARPATTERNGHOST)
752 : write(iulog,*) "Iam",Iam,"Pe",Ipe-1,"Lens",OutVector(Num+1:Num+Length), &
753 : "Displacements", OutVector(1:Length)
754 : #endif
755 :
756 0 : IF ( Length > 0 .and. method > 0 ) THEN
757 0 : CALL MPI_TYPE_INDEXED( Length, OutVector(Num+1), OutVector, &
758 0 : DataType, Ptr, Ierror )
759 0 : CALL MPI_TYPE_COMMIT( Ptr, Ierror )
760 0 : Pattern%RecvDesc(ipe)%type = Ptr
761 : ELSE
762 0 : Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
763 : ENDIF
764 :
765 0 : ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Length) )
766 0 : ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Length) )
767 0 : DO i=1, Length
768 0 : Pattern%RecvDesc(ipe)%Displacements(i) = OutVector(i)
769 0 : Pattern%RecvDesc(ipe)%BlockSizes(i) = OutVector(Num+i)
770 : ENDDO
771 0 : Pattern%RecvDesc(ipe)%Nparcels = &
772 0 : size (Pattern%RecvDesc(ipe)%Displacements)
773 0 : Pattern%RecvDesc(ipe)%Tot_Size = &
774 0 : sum (Pattern%RecvDesc(ipe)%Blocksizes)
775 0 : Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
776 :
777 : ENDDO
778 :
779 : !
780 : ! Everybody exchanges the needed information
781 : !
782 : #if defined(DEBUG_PARPATTERNGHOST)
783 : write(iulog,*) "iam", iam, "In", LenInVector, &
784 : InVector( 1:SUM(LenInVector) )
785 : #endif
786 : CALL ParExchangeVectorInt( InComm, LenInVector, InVector, &
787 0 : LenOutVector, OutVector )
788 : #if defined(DEBUG_PARPATTERNGHOST)
789 : write(iulog,*) "iam", iam, "Out", LenOutVector, &
790 : OutVector( 1:SUM(LenOutVector) )
791 : #endif
792 :
793 : !
794 : ! Now everyone has the segments which need to be sent to the
795 : ! immediate neighbors. Save these in PatternType.
796 : !
797 0 : j = 1
798 0 : DO ipe = 1, GroupSize
799 0 : Num = LenOutVector(ipe) / 2
800 0 : DO i = 1, Num
801 0 : CALL DecompGlobalToLocal( Ghost%Local,OutVector(j),Local,pe )
802 0 : InVector(i) = Local-1
803 0 : InVector(i+Num) = OutVector(j+1) - OutVector(j) + 1
804 0 : j = j + 2
805 : ENDDO
806 : #if defined(DEBUG_PARPATTERNGHOST)
807 : write(iulog,*) "Iam", Iam, "To", ipe-1, "InVector", &
808 : InVector(1:Num), "block size", InVector(Num+1:2*Num)
809 : #endif
810 :
811 0 : IF ( Num > 0 .and. method > 0 ) THEN
812 0 : CALL MPI_TYPE_INDEXED( Num, InVector(Num+1), InVector, &
813 0 : DataType, Ptr, Ierror )
814 0 : CALL MPI_TYPE_COMMIT( Ptr, Ierror )
815 0 : Pattern%SendDesc(ipe)%type = Ptr
816 : ELSE
817 0 : Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
818 : ENDIF
819 :
820 0 : ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
821 0 : ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
822 0 : DO i=1, Num
823 0 : Pattern%SendDesc(ipe)%Displacements(i) = InVector(i)
824 0 : Pattern%SendDesc(ipe)%BlockSizes(i) = InVector(Num+i)
825 : ENDDO
826 0 : Pattern%SendDesc(ipe)%Nparcels = &
827 0 : size (Pattern%SendDesc(ipe)%Displacements)
828 0 : Pattern%SendDesc(ipe)%Tot_Size = &
829 0 : sum (Pattern%SendDesc(ipe)%Blocksizes)
830 0 : Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
831 :
832 : ENDDO
833 :
834 0 : CALL get_partneroffset( InComm, Pattern%SendDesc, Pattern%RecvDesc )
835 :
836 : !
837 : ! Clean up the locally allocate variables
838 : !
839 0 : DEALLOCATE( OutVector )
840 0 : DEALLOCATE( InVector )
841 0 : DEALLOCATE( LenOutVector )
842 0 : DEALLOCATE( LenInVector )
843 :
844 : CPP_LEAVE_PROCEDURE( "PARPATTERNGHOST" )
845 0 : RETURN
846 : !EOC
847 0 : END SUBROUTINE ParPatternGhost
848 : !-----------------------------------------------------------------------
849 :
850 : !-----------------------------------------------------------------------
851 : !BOP
852 : ! !IROUTINE: ParPatternDecompToDecomp --- Create pattern between decomps
853 : !
854 : ! !INTERFACE:
855 10752 : SUBROUTINE ParPatternDecompToDecomp( InComm, DA, DB, Pattern, mod_method, T )
856 : !
857 : ! !USES:
858 : USE decompmodule, ONLY : DecompType, DecompGlobalToLocal, DecompInfo
859 : USE mod_comm, ONLY : get_partneroffset
860 : IMPLICIT NONE
861 :
862 : ! !INPUT PARAMETERS:
863 : INTEGER, INTENT( IN ) :: InComm ! # of PEs
864 : TYPE(DecompType), INTENT( IN ) :: DA ! Source Decomp Desc
865 : TYPE(DecompType), INTENT( IN ) :: DB ! Target Decomp Desc
866 : INTEGER, INTENT( IN ), OPTIONAL :: mod_method ! contiguous or derived type
867 : INTEGER, INTENT( IN ), OPTIONAL :: T !
868 :
869 : ! !OUTPUT PARAMETERS:
870 : TYPE(ParPatternType), INTENT( OUT ) :: Pattern ! Comm Pattern
871 : !
872 : ! !DESCRIPTION:
873 : ! This routine contructs a communication pattern for a
874 : ! transformation from one decomposition to another, i.e., a
875 : ! so-called "transpose". The resulting communication pattern
876 : ! can be used in ParBegin/EndTransfer with the decomposed
877 : ! arrays as inputs.
878 : !
879 : ! !SYSTEM ROUTINES:
880 : ! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_COMM_DUP
881 : ! MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
882 : !
883 : ! !REVISION HISTORY:
884 : ! 01.05.29 Sawyer Creation from RedistributeCreate
885 : ! 01.07.13 Sawyer Rewritten to minimize DecompGlobalToLocal
886 : ! 02.07.16 Sawyer Added data type T
887 : ! 03.11.11 Mirin Added optional argument mod_method
888 : ! 07.03.11 Mirin Generalized to different sized decompositions
889 : ! 07.09.04 Dennis Reduced amount of temporary memory usage
890 : !
891 : !EOP
892 : !-----------------------------------------------------------------------
893 : !BOC
894 : ! !LOCAL VARIABLES:
895 : INTEGER I, J, Tag, Local, Pe, LenB, JB, Ipe, Num, Inc, Off
896 : INTEGER Ptr ! Pointer type
897 : INTEGER GroupSize, Iam, Ierror, DataType
898 : INTEGER OldPe, TotalPtsA, NpesA, TotalPtsB, NpesB
899 : INTEGER :: method
900 : INTEGER :: nCount,maxCount,ierr,sz
901 : INTEGER :: lenBjmd,nNeigh,maxLenB,maxNeigh
902 : #ifdef _SMEMORY
903 : TYPE (ParInfoType) :: Info
904 : #endif
905 :
906 10752 : INTEGER, ALLOCATABLE :: Count(:) ! # segments for each recv PE
907 10752 : INTEGER, ALLOCATABLE :: CountOut(:) ! # segments for each send PE
908 :
909 10752 : INTEGER, ALLOCATABLE :: DisplacementsA(:) ! Generic displacements
910 10752 : INTEGER, ALLOCATABLE :: BlockSizesA(:) ! Generic block sizes
911 10752 : INTEGER, ALLOCATABLE :: LocalA(:) ! Generic Local indices
912 :
913 10752 : INTEGER, ALLOCATABLE :: DisplacementsB(:) ! Displacements for B
914 10752 : INTEGER, ALLOCATABLE :: BlockSizesB(:) ! Block sizes for B
915 10752 : INTEGER, ALLOCATABLE :: LocalB(:) ! Local indices for B
916 10752 : INTEGER, ALLOCATABLE :: PeB(:) ! Processor element numbers
917 :
918 : CPP_ENTER_PROCEDURE( "PARPATTERNDECOMPTODECOMP" )
919 :
920 10752 : IF (present(T)) THEN
921 4608 : DataType = T
922 : ELSE
923 6144 : DataType = CPP_MPI_REAL8
924 : ENDIF
925 :
926 10752 : IF (present(mod_method)) THEN
927 10752 : method = mod_method
928 : ELSE
929 : method = 0 ! Default method - see mod_comm for description
930 : ENDIF
931 :
932 : ! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
933 :
934 10752 : CALL DecompInfo( DA, NpesA, TotalPtsA )
935 10752 : CALL DecompInfo( DB, NpesB, TotalPtsB )
936 :
937 10752 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
938 10752 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
939 10752 : CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
940 :
941 : #ifdef _SMEMORY
942 : ! Calculate info about the pattern
943 10752 : call ParCalcInfo(InComm,DA,DB, Info)
944 10752 : TotalPtsA=Info%maxNumSeg
945 10752 : TotalPtsB=Info%maxNumSeg
946 : #endif
947 :
948 10752 : Pattern%Size = GroupSize
949 10752 : Pattern%Iam = Iam
950 : !
951 : ! Allocate the number of entries and list head arrays
952 : !
953 :
954 : !
955 : ! Allocate the patterns
956 : !
957 32256 : ALLOCATE( Pattern%SendDesc( NpesB ) )
958 4139520 : Pattern%SendDesc(:)%method = method
959 10752 : if (iam .ge. NpesA) then
960 0 : do ipe = 1, NpesB
961 0 : ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
962 0 : ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
963 0 : Pattern%SendDesc(ipe)%Tot_Size = -1
964 0 : Pattern%SendDesc(ipe)%Nparcels = -1
965 0 : Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
966 0 : Pattern%SendDesc(ipe)%Displacements(1) = -1
967 0 : Pattern%SendDesc(ipe)%Blocksizes(1) = -1
968 : enddo
969 : endif
970 :
971 32256 : ALLOCATE( Pattern%RecvDesc( NpesA ) )
972 4139520 : Pattern%RecvDesc(:)%method = method
973 10752 : if (iam .ge. NpesB) then
974 0 : do ipe = 1, NpesA
975 0 : ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
976 0 : ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
977 0 : Pattern%RecvDesc(ipe)%Tot_Size = -1
978 0 : Pattern%RecvDesc(ipe)%Nparcels = -1
979 0 : Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
980 0 : Pattern%RecvDesc(ipe)%Displacements(1) = -1
981 0 : Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
982 : enddo
983 : endif
984 :
985 : !
986 : ! Local allocations
987 : !
988 32256 : ALLOCATE( DisplacementsA( TotalPtsA ) ) ! Allocate for worst case
989 21504 : ALLOCATE( BlockSizesA( TotalPtsA ) ) ! Allocate for worst case
990 21504 : ALLOCATE( LocalA( TotalPtsA ) ) ! Allocate for worst case
991 :
992 32256 : ALLOCATE( DisplacementsB( TotalPtsB ) ) ! Allocate for worst case
993 21504 : ALLOCATE( BlockSizesB( TotalPtsB ) ) ! Allocate for worst case
994 21504 : ALLOCATE( LocalB( TotalPtsB ) ) ! Allocate for worst case
995 21504 : ALLOCATE( PeB( TotalPtsB ) ) ! Allocate for worst case
996 :
997 32256 : ALLOCATE( Count( GroupSize ) )
998 21504 : ALLOCATE( CountOut( GroupSize ) )
999 :
1000 10752 : JB = 0
1001 4139520 : Count = 0
1002 7199232 : LenB = 0
1003 7199232 : LocalA = 0 ! (needed for parexchangevector later)
1004 7199232 : BlocksizesA = 0 ! (needed for parexchangevector later)
1005 :
1006 10752 : Num = 0
1007 10752 : Inc = 0
1008 :
1009 10752 : if (iam .lt. NpesB) then
1010 :
1011 : !
1012 : ! Parse through all the tags in the local segment
1013 1027968 : DO J = 1, SIZE( DB%Head(iam+1)%StartTags )
1014 1017216 : OldPe = -1 ! Set PE undefined
1015 23422848 : DO Tag=DB%Head(iam+1)%StartTags(J), DB%Head(iam+1)%EndTags(J)
1016 : !
1017 : ! Determine the index and PE of this entry on A. This might be inlined later
1018 : !
1019 22394880 : CALL DecompGlobalToLocal( DA, Tag, Local, Pe )
1020 :
1021 : !
1022 : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
1023 : !
1024 22394880 : IF ( Pe /= OldPe ) THEN
1025 1866240 : OldPe = Pe
1026 1866240 : IF ( jb > 0 ) THEN
1027 1857786 : BlockSizesB(jb) = LenB
1028 1857786 : LenB = 0
1029 : ENDIF
1030 1866240 : jb = jb+1 ! increment the segment index
1031 1866240 : DisplacementsB(jb) = Inc ! Zero-based offset of local segment
1032 1866240 : LocalB(jb) = Local-1 ! The local index (zero-based)
1033 1866240 : PeB(jb) = Pe ! Note the ID of the sender
1034 1866240 : Count(Pe+1) = Count(Pe+1)+1 ! Increment counter of segments
1035 : ENDIF
1036 22394880 : LenB = LenB+1 ! Good -- segment is getting longer
1037 45806976 : Inc = Inc+1 ! Increment local index
1038 : ENDDO
1039 : ENDDO
1040 : !
1041 : ! Clean up
1042 : !
1043 10752 : IF ( jb>0 ) BlockSizesB(jb) = LenB
1044 : #if defined(DEBUG_PARPATTERNDECOMPTODECOMP)
1045 : write(iulog,*) iam, "BlockSizes", BlockSizesB(1:jb), DisplacementsB(1:jb), PeB(1:jb), Count
1046 : #endif
1047 :
1048 : CPP_ASSERT_F90( JB .LE. TotalPtsB )
1049 : !
1050 : ! Now create the pattern from the displacements and block sizes
1051 : !
1052 10752 : Inc = 0
1053 4139520 : DO ipe = 1, NpesA
1054 : !
1055 : ! Find the segments which are relevant for the sender ipe
1056 : ! Make compact arrays BlockSizes and Displacements
1057 : !
1058 720775680 : DO j = 1, jb
1059 720764928 : IF ( PeB(j) == ipe-1 ) THEN
1060 1866240 : Inc = Inc + 1
1061 1866240 : BlockSizesA(Inc) = BlockSizesB(j)
1062 1866240 : DisplacementsA(Inc) = DisplacementsB(j)
1063 1866240 : LocalA(Inc) = LocalB(j)
1064 : ENDIF
1065 : ENDDO
1066 : ENDDO
1067 : CPP_ASSERT_F90( Inc .LE. TotalPtsA )
1068 :
1069 : !
1070 : ! Create the receiver communication pattern
1071 : !
1072 10752 : Off = 0
1073 4139520 : DO ipe = 1, NpesA
1074 4128768 : Num = Count(ipe)
1075 : if(Num >0) then
1076 : #if defined(DEBUG_PARPATTERNDECOMPTODECOMP)
1077 : write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
1078 : "Displacements", DisplacementsA(Off+1:Off+Num), &
1079 : "BlockSizes", BlockSizesA(Off+1:Off+Num)
1080 : #endif
1081 : endif
1082 4128768 : IF ( Num > 0 .and. method > 0 ) THEN
1083 :
1084 0 : CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1), &
1085 : DisplacementsA(Off+1), &
1086 0 : DataType, Ptr, Ierror )
1087 0 : CALL MPI_TYPE_COMMIT( Ptr, Ierror )
1088 0 : Pattern%RecvDesc(ipe)%type = Ptr
1089 : ELSE
1090 4128768 : Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
1091 : ENDIF
1092 :
1093 8335872 : ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
1094 4207104 : ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
1095 5995008 : DO i=1, Num
1096 1866240 : Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
1097 5995008 : Pattern%RecvDesc(ipe)%BlockSizes(i) = BlockSizesA(i+Off)
1098 : ENDDO
1099 0 : Pattern%RecvDesc(ipe)%Nparcels = &
1100 4128768 : size (Pattern%RecvDesc(ipe)%Displacements)
1101 0 : Pattern%RecvDesc(ipe)%Tot_Size = &
1102 5995008 : sum (Pattern%RecvDesc(ipe)%Blocksizes)
1103 4128768 : Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
1104 :
1105 4139520 : Off = Off + Num
1106 : ENDDO
1107 :
1108 : endif ! (iam .lt. NpesB)
1109 :
1110 : !
1111 : ! Now communicate what the receiver is expecting from the sender
1112 : !
1113 : CALL ParExchangeVectorInt( InComm, Count, LocalA, &
1114 10752 : CountOut, DisplacementsB )
1115 : CALL ParExchangeVectorInt( InComm, Count, BlockSizesA, &
1116 10752 : CountOut, BlockSizesB )
1117 :
1118 : !
1119 : ! Sender A: BlockSizes and Displacements can now be stored
1120 : !
1121 :
1122 10752 : if (iam .lt. NpesA) then
1123 :
1124 10752 : Off = 0
1125 4139520 : DO ipe=1, NpesB
1126 4128768 : Num = CountOut(ipe)
1127 : if(Num>0) then
1128 : #if defined(DEBUG_PARPATTERNDECOMPTODECOMP)
1129 : write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
1130 : "Displacements", DisplacementsB(Off+1:Off+Num), &
1131 : "BlockSizes", BlockSizesB(Off+1:Off+Num)
1132 : #endif
1133 : endif
1134 4128768 : IF ( Num > 0 .and. method > 0 ) THEN
1135 0 : CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1), &
1136 : DisplacementsB(Off+1), &
1137 0 : DataType, Ptr, Ierror )
1138 0 : CALL MPI_TYPE_COMMIT( Ptr, Ierror )
1139 0 : Pattern%SendDesc(ipe)%type = Ptr
1140 : ELSE
1141 4128768 : Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
1142 : ENDIF
1143 :
1144 8335872 : ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
1145 4207104 : ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
1146 5995008 : DO i=1, Num
1147 1866240 : Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
1148 5995008 : Pattern%SendDesc(ipe)%BlockSizes(i) = BlockSizesB(i+Off)
1149 : ENDDO
1150 0 : Pattern%SendDesc(ipe)%Nparcels = &
1151 4128768 : size (Pattern%SendDesc(ipe)%Displacements)
1152 0 : Pattern%SendDesc(ipe)%Tot_Size = &
1153 5995008 : sum (Pattern%SendDesc(ipe)%Blocksizes)
1154 4128768 : Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
1155 :
1156 4139520 : Off = Off + Num
1157 : ENDDO
1158 :
1159 : endif ! (iam .lt. NpesA)
1160 :
1161 10752 : CALL get_partneroffset( InComm, Pattern%SendDesc, Pattern%RecvDesc )
1162 :
1163 10752 : DEALLOCATE( CountOut )
1164 10752 : DEALLOCATE( Count )
1165 :
1166 10752 : DEALLOCATE( PeB )
1167 10752 : DEALLOCATE( LocalB )
1168 10752 : DEALLOCATE( BlockSizesB )
1169 10752 : DEALLOCATE( DisplacementsB )
1170 :
1171 10752 : DEALLOCATE( LocalA )
1172 10752 : DEALLOCATE( BlockSizesA )
1173 10752 : DEALLOCATE( DisplacementsA )
1174 :
1175 : CPP_LEAVE_PROCEDURE( "PARPATTERNDECOMPTODECOMP" )
1176 10752 : RETURN
1177 : !EOC
1178 10752 : END SUBROUTINE ParPatternDecompToDecomp
1179 : !-----------------------------------------------------------------------
1180 :
1181 :
1182 : !-----------------------------------------------------------------------
1183 : !BOP
1184 : ! !IROUTINE: ParPatternDecompToGhost --- Create pattern decomp to ghost
1185 : !
1186 : ! !INTERFACE:
1187 3840 : SUBROUTINE ParPatternDecompToGhost( InComm, DA, GB, Pattern, mod_method, T )
1188 : !
1189 : ! !USES:
1190 : USE decompmodule, ONLY : DecompType, DecompGlobalToLocal, &
1191 : DecompInfo
1192 : USE ghostmodule, ONLY : GhostType, GhostInfo
1193 : USE mod_comm, ONLY : get_partneroffset
1194 : IMPLICIT NONE
1195 :
1196 : ! !INPUT PARAMETERS:
1197 : INTEGER, INTENT( IN ) :: InComm ! # of PEs
1198 : TYPE(DecompType), INTENT( IN ) :: DA ! Source Ghost Desc
1199 : TYPE(GhostType), INTENT( IN ) :: GB ! Target Ghost Desc
1200 : INTEGER, INTENT( IN ), OPTIONAL :: mod_method ! contiguous or derived type
1201 : INTEGER, INTENT( IN ), OPTIONAL :: T !
1202 :
1203 : ! !OUTPUT PARAMETERS:
1204 : TYPE(ParPatternType), INTENT( OUT ) :: Pattern ! Comm Pattern
1205 : !
1206 : ! !DESCRIPTION:
1207 : ! This routine contructs a communication pattern for a transformation
1208 : ! from decomposition to a ghosted decomposition, i.e., a so-called
1209 : ! "transpose". The resulting communication pattern can be used in
1210 : ! ParBegin/EndTransfer with the decomposed arrays as inputs.
1211 : !
1212 : ! !SYSTEM ROUTINES:
1213 : ! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_COMM_DUP
1214 : ! MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
1215 : !
1216 : ! !REVISION HISTORY:
1217 : ! 01.07.12 Sawyer Creation from ParPatternDecompToDecomp
1218 : ! 02.03.20 Sawyer Bug fix: added OldLocal, increment Off
1219 : ! 02.07.16 Sawyer Added data type T
1220 : ! 03.11.11 Mirin Added optional argument mod_method
1221 : ! 07.03.11 Mirin Generalized to different sized decompositions
1222 : ! 07.09.04 Dennis Reduced amount of temporary memory usage
1223 : !
1224 : !EOP
1225 : !-----------------------------------------------------------------------
1226 : !BOC
1227 : ! !LOCAL VARIABLES:
1228 : INTEGER I, J, Tag, Local, Pe, LenB, JB, Ipe, Num, Inc, Off
1229 : INTEGER Ptr ! Pointer type
1230 : INTEGER GroupSize, Iam, Ierror
1231 : INTEGER OldPe, OldLocal, TotalPtsA, NpesA
1232 : INTEGER GlobalSizeB, LocalSizeB, BorderSizeB, NpesB
1233 : INTEGER DataType
1234 : INTEGER :: method
1235 : INTEGER :: nCount, maxCount, ierr
1236 : #ifdef _SMEMORY
1237 : TYPE (ParInfoType) :: Info
1238 : #endif
1239 :
1240 3840 : INTEGER, ALLOCATABLE :: Count(:) ! # segments for each recv PE
1241 3840 : INTEGER, ALLOCATABLE :: CountOut(:) ! # segments for each send PE
1242 :
1243 3840 : INTEGER, ALLOCATABLE :: DisplacementsA(:) ! Generic displacements
1244 3840 : INTEGER, ALLOCATABLE :: BlockSizesA(:) ! Generic block sizes
1245 3840 : INTEGER, ALLOCATABLE :: LocalA(:) ! Generic Local indices
1246 :
1247 3840 : INTEGER, ALLOCATABLE :: DisplacementsB(:) ! Displacements for B
1248 3840 : INTEGER, ALLOCATABLE :: BlockSizesB(:) ! Block sizes for B
1249 3840 : INTEGER, ALLOCATABLE :: LocalB(:) ! Local indices for B
1250 3840 : INTEGER, ALLOCATABLE :: PeB(:) ! Processor element numbers
1251 :
1252 : CPP_ENTER_PROCEDURE( "PARPATTERNDECOMPTOGHOST" )
1253 :
1254 3840 : IF (present(T)) THEN
1255 0 : DataType = T
1256 : ELSE
1257 3840 : DataType = CPP_MPI_REAL8
1258 : ENDIF
1259 :
1260 3840 : IF (present(mod_method)) THEN
1261 3840 : method = mod_method
1262 : ELSE
1263 : method = 0 ! Default method - see mod_comm for description
1264 : ENDIF
1265 :
1266 : ! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
1267 :
1268 3840 : CALL DecompInfo( DA, NpesA, TotalPtsA )
1269 3840 : CALL GhostInfo( GB, NpesB, GlobalSizeB, LocalSizeB, BorderSizeB )
1270 :
1271 3840 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
1272 3840 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
1273 3840 : CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
1274 :
1275 : #ifdef _SMEMORY
1276 : ! Calculate info about the pattern
1277 3840 : call ParCalcInfo(InComm,DA,GB, Info)
1278 3840 : TotalPtsA=Info%maxNumSeg
1279 3840 : GlobalSizeB=Info%maxNumSeg
1280 : #endif
1281 :
1282 3840 : Pattern%Size = GroupSize
1283 3840 : Pattern%Iam = Iam
1284 : !
1285 : ! Allocate the number of entries and list head arrays
1286 : !
1287 :
1288 : !
1289 : ! Allocate the patterns
1290 : !
1291 11520 : ALLOCATE( Pattern%SendDesc( NpesB ) )
1292 1478400 : Pattern%SendDesc(:)%method = method
1293 3840 : if (iam .ge. NpesA) then
1294 0 : do ipe = 1, NpesB
1295 0 : ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
1296 0 : ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
1297 0 : Pattern%SendDesc(ipe)%Tot_Size = -1
1298 0 : Pattern%SendDesc(ipe)%Nparcels = -1
1299 0 : Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
1300 0 : Pattern%SendDesc(ipe)%Displacements(1) = -1
1301 0 : Pattern%SendDesc(ipe)%Blocksizes(1) = -1
1302 : enddo
1303 : endif
1304 :
1305 11520 : ALLOCATE( Pattern%RecvDesc( NpesA ) )
1306 1478400 : Pattern%RecvDesc(:)%method = method
1307 3840 : if (iam .ge. NpesB) then
1308 0 : do ipe = 1, NpesA
1309 0 : ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
1310 0 : ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
1311 0 : Pattern%RecvDesc(ipe)%Tot_Size = -1
1312 0 : Pattern%RecvDesc(ipe)%Nparcels = -1
1313 0 : Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
1314 0 : Pattern%RecvDesc(ipe)%Displacements(1) = -1
1315 0 : Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
1316 : enddo
1317 : endif
1318 :
1319 : !
1320 : ! Local allocations
1321 : !
1322 11520 : ALLOCATE( DisplacementsA( TotalPtsA ) ) ! Allocate for worst case
1323 7680 : ALLOCATE( BlockSizesA( TotalPtsA ) ) ! Allocate for worst case
1324 7680 : ALLOCATE( LocalA( TotalPtsA ) ) ! Allocate for worst case
1325 :
1326 11520 : ALLOCATE( DisplacementsB( GlobalSizeB ) ) ! Allocate for worst case
1327 7680 : ALLOCATE( BlockSizesB( GlobalSizeB ) ) ! Allocate for worst case
1328 7680 : ALLOCATE( LocalB( GlobalSizeB ) ) ! Allocate for worst case
1329 7680 : ALLOCATE( PeB( GlobalSizeB ) ) ! Allocate for worst case
1330 :
1331 11520 : ALLOCATE( Count( GroupSize ) )
1332 7680 : ALLOCATE( CountOut( GroupSize ) )
1333 :
1334 3840 : JB = 0
1335 1478400 : Count = 0
1336 3404544 : LenB = 0
1337 3404544 : LocalA = 0 ! (needed for parexchangevector later)
1338 3404544 : BlocksizesA = 0 ! (needed for parexchangevector later)
1339 :
1340 3840 : Num = 0
1341 3840 : Inc = 0
1342 :
1343 3840 : if (iam .lt. NpesB) then
1344 :
1345 : !
1346 : ! Parse through all the tags in the local segment
1347 714112 : DO J = 1, SIZE( GB%Local%Head(iam+1)%StartTags )
1348 710272 : OldPe = -1 ! Set PE undefined
1349 710272 : OldLocal = 0 ! Set local index undefined
1350 40910464 : DO Tag=GB%Local%Head(iam+1)%StartTags(J), &
1351 714112 : GB%Local%Head(iam+1)%EndTags(J)
1352 40200192 : IF ( Tag > 0 ) THEN ! Active point
1353 : !
1354 : ! Determine the index and PE of this entry on A. This might be inlined later
1355 : !
1356 39526272 : CALL DecompGlobalToLocal( DA, Tag, Local, Pe )
1357 :
1358 : !
1359 : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
1360 : !
1361 39526272 : IF ( Pe /= OldPe .OR. Local /= OldLocal+1 ) THEN
1362 3293856 : IF ( jb > 0 ) THEN
1363 3290016 : BlockSizesB(jb) = LenB
1364 3290016 : LenB = 0
1365 : ENDIF
1366 3293856 : jb = jb+1 ! increment the segment index
1367 3293856 : DisplacementsB(jb) = Inc ! Zero-based offset of local segment
1368 3293856 : LocalB(jb) = Local-1 ! Local indices (zero-based)
1369 3293856 : PeB(jb) = Pe ! Note the ID of the sender
1370 3293856 : Count(Pe+1) = Count(Pe+1)+1 ! Increment counter of segments
1371 : ENDIF
1372 39526272 : OldPe = Pe ! Update PE
1373 39526272 : OldLocal= Local ! Update local index
1374 39526272 : LenB = LenB+1 ! Good -- segment is getting longer
1375 : ENDIF
1376 40910464 : Inc = Inc+1 ! Increment local index
1377 : ENDDO
1378 : ENDDO
1379 : !
1380 : ! Clean up
1381 : !
1382 3840 : IF ( jb>0 ) BlockSizesB(jb) = LenB
1383 :
1384 : CPP_ASSERT_F90( JB .LE. GlobalSize )
1385 : !
1386 : ! Now create the pattern from the displacements and block sizes
1387 : !
1388 3840 : Inc = 0
1389 1478400 : DO ipe = 1, NpesA
1390 : !
1391 : ! Find the segments which are relevant for the sender ipe
1392 : ! Make compact arrays BlockSizes and Displacements
1393 : !
1394 1266319104 : DO j = 1, jb
1395 1266315264 : IF ( PeB(j) == ipe-1 ) THEN
1396 3293856 : Inc = Inc + 1
1397 3293856 : BlockSizesA(Inc) = BlockSizesB(j)
1398 3293856 : DisplacementsA(Inc) = DisplacementsB(j)
1399 3293856 : LocalA(Inc) = LocalB(j)
1400 : ENDIF
1401 : ENDDO
1402 : ENDDO
1403 :
1404 : CPP_ASSERT_F90( Inc .LE. TotalPtsA )
1405 :
1406 3840 : Off = 0
1407 1478400 : DO ipe = 1, NpesA
1408 1474560 : Num = Count(ipe)
1409 : #if defined( DEBUG_PARPATTERNDECOMPTOGHOST )
1410 : write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
1411 : "Displacements", DisplacementsA(Off+1:Off+Num), &
1412 : "BlockSizes", BlockSizesA(Off+1:Off+Num)
1413 : #endif
1414 :
1415 : !
1416 : ! Create the receiver communication pattern
1417 : !
1418 1474560 : IF ( Num > 0 .and. method > 0 ) THEN
1419 0 : CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1), &
1420 0 : DisplacementsA(Off+1), DataType, Ptr, Ierror )
1421 0 : CALL MPI_TYPE_COMMIT( Ptr, Ierror )
1422 0 : Pattern%RecvDesc(ipe)%type = Ptr
1423 : ELSE
1424 1474560 : Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
1425 : ENDIF
1426 :
1427 3048768 : ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
1428 1574208 : ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
1429 4768416 : DO i=1, Num
1430 3293856 : Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
1431 4768416 : Pattern%RecvDesc(ipe)%BlockSizes(i) = BlockSizesA(i+Off)
1432 : ENDDO
1433 0 : Pattern%RecvDesc(ipe)%Nparcels = &
1434 1474560 : size (Pattern%RecvDesc(ipe)%Displacements)
1435 0 : Pattern%RecvDesc(ipe)%Tot_Size = &
1436 4768416 : sum (Pattern%RecvDesc(ipe)%Blocksizes)
1437 1474560 : Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
1438 :
1439 1478400 : Off = Off + Num
1440 : ENDDO
1441 :
1442 : endif ! (iam .lt. NpesB)
1443 :
1444 : !
1445 : ! Now communicate what the receiver is expecting to the sender
1446 : !
1447 : CALL ParExchangeVectorInt( InComm, Count, LocalA, &
1448 3840 : CountOut, DisplacementsB )
1449 : CALL ParExchangeVectorInt( InComm, Count, BlockSizesA, &
1450 3840 : CountOut, BlockSizesB )
1451 :
1452 : !
1453 : ! Sender A: BlockSizes and Displacements can now be stored
1454 : !
1455 :
1456 3840 : if (iam .lt. NpesA) then
1457 :
1458 3840 : Off = 0
1459 1478400 : DO ipe=1, NpesB
1460 1474560 : Num = CountOut(ipe)
1461 : #if defined( DEBUG_PARPATTERNDECOMPTOGHOST )
1462 : write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
1463 : "Displacements", DisplacementsB(Off+1:Off+Num), &
1464 : "BlockSizes", BlockSizesB(Off+1:Off+Num)
1465 : #endif
1466 :
1467 1474560 : IF ( Num > 0 .and. method > 0 ) THEN
1468 0 : CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1), &
1469 0 : DisplacementsB(Off+1), DataType, Ptr, Ierror )
1470 0 : CALL MPI_TYPE_COMMIT( Ptr, Ierror )
1471 0 : Pattern%SendDesc(ipe)%type = Ptr
1472 : ELSE
1473 1474560 : Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
1474 : ENDIF
1475 :
1476 3048768 : ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
1477 1574208 : ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
1478 4768416 : DO i=1, Num
1479 3293856 : Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
1480 4768416 : Pattern%SendDesc(ipe)%BlockSizes(i) = BlockSizesB(i+Off)
1481 : ENDDO
1482 0 : Pattern%SendDesc(ipe)%Nparcels = &
1483 1474560 : size (Pattern%SendDesc(ipe)%Displacements)
1484 0 : Pattern%SendDesc(ipe)%Tot_Size = &
1485 4768416 : sum (Pattern%SendDesc(ipe)%Blocksizes)
1486 1474560 : Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
1487 :
1488 1478400 : Off = Off + Num
1489 : ENDDO
1490 :
1491 : endif ! (iam .lt. NpesA)
1492 :
1493 3840 : CALL get_partneroffset( InComm, Pattern%SendDesc, Pattern%RecvDesc )
1494 :
1495 3840 : DEALLOCATE( CountOut )
1496 3840 : DEALLOCATE( Count )
1497 :
1498 3840 : DEALLOCATE( PeB )
1499 3840 : DEALLOCATE( LocalB )
1500 3840 : DEALLOCATE( BlockSizesB )
1501 3840 : DEALLOCATE( DisplacementsB )
1502 :
1503 3840 : DEALLOCATE( LocalA )
1504 3840 : DEALLOCATE( BlockSizesA )
1505 3840 : DEALLOCATE( DisplacementsA )
1506 :
1507 : CPP_LEAVE_PROCEDURE( "PARPATTERNDECOMPTOGHOST" )
1508 3840 : RETURN
1509 : !EOC
1510 3840 : END SUBROUTINE ParPatternDecompToGhost
1511 : !-----------------------------------------------------------------------
1512 :
1513 :
1514 : !-----------------------------------------------------------------------
1515 : !BOP
1516 : ! !IROUTINE: ParPatternGhostToDecomp --- Create pattern between decomps
1517 : !
1518 : ! !INTERFACE:
1519 3072 : SUBROUTINE ParPatternGhostToDecomp( InComm, GA, DB, Pattern, mod_method, T )
1520 : !
1521 : ! !USES:
1522 : USE decompmodule, ONLY : DecompType, DecompGlobalToLocal, DecompInfo
1523 : USE ghostmodule, ONLY : GhostType, GhostInfo
1524 : USE mod_comm, ONLY : get_partneroffset
1525 : IMPLICIT NONE
1526 :
1527 : ! !INPUT PARAMETERS:
1528 : INTEGER, INTENT( IN ) :: InComm ! # of PEs
1529 : TYPE(GhostType), INTENT( IN ) :: GA ! Source Decomp Desc
1530 : TYPE(DecompType), INTENT( IN ) :: DB ! Target Decomp Desc
1531 : INTEGER, INTENT( IN ), OPTIONAL :: mod_method ! contiguous or derived type
1532 : INTEGER, INTENT( IN ), OPTIONAL :: T !
1533 : ! !OUTPUT PARAMETERS:
1534 : TYPE(ParPatternType), INTENT( OUT ) :: Pattern ! Comm Pattern
1535 : !
1536 : ! !DESCRIPTION:
1537 : ! This routine contructs a communication pattern for a
1538 : ! transformation from one ghosted decomposition to partitioned
1539 : ! one, i.e., a so-called "transpose". The resulting communication
1540 : ! pattern can be used in ParBegin/EndTransfer with the decomposed
1541 : ! arrays as inputs.
1542 : !
1543 : ! !SYSTEM ROUTINES:
1544 : ! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_COMM_DUP
1545 : ! MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
1546 : !
1547 : ! !REVISION HISTORY:
1548 : ! 02.01.10 Sawyer Creation from DecompToDecomp
1549 : ! 02.07.16 Sawyer Added data type T
1550 : ! 03.11.11 Mirin Added optional argument mod_method
1551 : ! 07.03.11 Mirin Generalized to different sized decompositions
1552 : ! 07.09.04 Dennis Reduced amount of temporary memory usage
1553 : !
1554 : !EOP
1555 : !-----------------------------------------------------------------------
1556 : !BOC
1557 : ! !LOCAL VARIABLES:
1558 : INTEGER I, J, Tag, Local, Pe, Len, JA, Ipe, Num, Inc, Off
1559 : INTEGER NpesA, GlobalSizeA, LocalSizeA, BorderSizeA
1560 : INTEGER OldPe, OldLocal, TotalPtsB, NpesB
1561 : INTEGER GroupSize, Iam, Ierror
1562 : INTEGER Ptr ! Pointer type
1563 : INTEGER DataType
1564 : INTEGER :: method
1565 : INTEGER :: nCount, maxCount, ierr
1566 : #ifdef _SMEMORY
1567 : TYPE (ParInfoType) :: Info
1568 : #endif
1569 :
1570 3072 : INTEGER, ALLOCATABLE :: Count(:) ! # segments for each recv PE
1571 3072 : INTEGER, ALLOCATABLE :: CountOut(:) ! # segments for each send PE
1572 :
1573 3072 : INTEGER, ALLOCATABLE :: DisplacementsA(:) ! Generic displacements
1574 3072 : INTEGER, ALLOCATABLE :: BlockSizesA(:) ! Generic block sizes
1575 3072 : INTEGER, ALLOCATABLE :: GlobalA(:) ! Generic Local indices
1576 3072 : INTEGER, ALLOCATABLE :: PeA(:) ! Processor element numbers
1577 :
1578 3072 : INTEGER, ALLOCATABLE :: DisplacementsB(:) ! Displacements for B
1579 3072 : INTEGER, ALLOCATABLE :: BlockSizesB(:) ! Block sizes for B
1580 3072 : INTEGER, ALLOCATABLE :: GlobalB(:) ! Global indices for B
1581 :
1582 : CPP_ENTER_PROCEDURE( "PARPATTERNGHOSTTODECOMP" )
1583 :
1584 3072 : IF (present(T)) THEN
1585 0 : DataType = T
1586 : ELSE
1587 3072 : DataType = CPP_MPI_REAL8
1588 : ENDIF
1589 :
1590 3072 : IF (present(mod_method)) THEN
1591 3072 : method = mod_method
1592 : ELSE
1593 : method = 0 ! Default method - see mod_comm for description
1594 : ENDIF
1595 :
1596 : ! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
1597 :
1598 3072 : CALL GhostInfo( GA, NpesA, GlobalSizeA, LocalSizeA, BorderSizeA )
1599 3072 : CALL DecompInfo( DB, NpesB, TotalPtsB )
1600 :
1601 3072 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
1602 3072 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
1603 3072 : CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
1604 :
1605 : #ifdef _SMEMORY
1606 : ! Calculate info about the pattern
1607 3072 : call ParCalcInfo(InComm,GA,DB, Info)
1608 3072 : GlobalSizeA=Info%maxNumSeg
1609 3072 : TotalPtsB=Info%maxNumSeg
1610 : #endif
1611 :
1612 3072 : Pattern%Size = GroupSize
1613 3072 : Pattern%Iam = Iam
1614 : !
1615 : ! Allocate the number of entries and list head arrays
1616 : !
1617 :
1618 : !
1619 : ! Allocate the patterns
1620 : !
1621 9216 : ALLOCATE( Pattern%SendDesc( NpesB ) )
1622 1182720 : Pattern%SendDesc(:)%method = method
1623 3072 : if (iam .ge. NpesA) then
1624 0 : do ipe = 1, NpesB
1625 0 : ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
1626 0 : ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
1627 0 : Pattern%SendDesc(ipe)%Tot_Size = -1
1628 0 : Pattern%SendDesc(ipe)%Nparcels = -1
1629 0 : Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
1630 0 : Pattern%SendDesc(ipe)%Displacements(1) = -1
1631 0 : Pattern%SendDesc(ipe)%Blocksizes(1) = -1
1632 : enddo
1633 : endif
1634 :
1635 9216 : ALLOCATE( Pattern%RecvDesc( NpesA ) )
1636 1182720 : Pattern%RecvDesc(:)%method = method
1637 3072 : if (iam .ge. NpesB) then
1638 0 : do ipe = 1, NpesA
1639 0 : ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
1640 0 : ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
1641 0 : Pattern%RecvDesc(ipe)%Tot_Size = -1
1642 0 : Pattern%RecvDesc(ipe)%Nparcels = -1
1643 0 : Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
1644 0 : Pattern%RecvDesc(ipe)%Displacements(1) = -1
1645 0 : Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
1646 : enddo
1647 : endif
1648 :
1649 : !
1650 : ! Local allocations
1651 : !
1652 9216 : ALLOCATE( DisplacementsA( GlobalSizeA ) ) ! Allocate for worst case
1653 6144 : ALLOCATE( BlockSizesA( GlobalSizeA ) ) ! Allocate for worst case
1654 6144 : ALLOCATE( GlobalA( GlobalSizeA ) ) ! Allocate for worst case
1655 6144 : ALLOCATE( PeA( GlobalSizeA ) ) ! Allocate for worst case
1656 :
1657 9216 : ALLOCATE( DisplacementsB( TotalPtsB ) ) ! Allocate for worst case
1658 6144 : ALLOCATE( BlockSizesB( TotalPtsB ) ) ! Allocate for worst case
1659 6144 : ALLOCATE( GlobalB( TotalPtsB ) ) ! Allocate for worst case
1660 :
1661 9216 : ALLOCATE( Count( GroupSize ) )
1662 6144 : ALLOCATE( CountOut( GroupSize ) )
1663 :
1664 3072 : JA = 0
1665 1182720 : Count = 0
1666 1219584 : Len = 0
1667 1219584 : GlobalB = 0 ! (needed for parexchangevector later)
1668 1219584 : BlockSizesB = 0 ! (needed for parexchangevector later)
1669 :
1670 3072 : Num = 0
1671 3072 : Inc = 0
1672 :
1673 3072 : if (iam .lt. NpesB) then
1674 :
1675 : !
1676 : ! Parse through all the tags in the local segment
1677 1203456 : DO J = 1, SIZE( DB%Head(iam+1)%StartTags )
1678 1200384 : OldPe = -1 ! Set PE undefined
1679 1200384 : OldLocal = 0 ! Set index value undefined
1680 15608064 : DO Tag=DB%Head(iam+1)%StartTags(J), DB%Head(iam+1)%EndTags(J)
1681 : !
1682 : ! Determine the index and PE of this entry on A. This might be inlined later
1683 : !
1684 14404608 : CALL DecompGlobalToLocal( GA%Decomp, Tag, Local, Pe )
1685 :
1686 : !
1687 : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
1688 : !
1689 14404608 : IF ( Pe /= OldPe .OR. Local /= OldLocal+1 ) THEN
1690 1200384 : IF ( ja > 0 ) THEN
1691 1197312 : BlockSizesA(ja) = Len
1692 1197312 : Len = 0
1693 : ENDIF
1694 1200384 : ja = ja+1 ! increment the segment index
1695 1200384 : DisplacementsA(ja) = Inc ! Zero-based offset of local segment
1696 1200384 : GlobalA(ja) = Tag ! The global tag of the desired datum
1697 1200384 : PeA(ja) = Pe ! Note the ID of the sender
1698 1200384 : Count(Pe+1) = Count(Pe+1)+1 ! Increment counter of segments
1699 : ENDIF
1700 14404608 : OldPe = Pe ! Update old PE
1701 14404608 : OldLocal = Local ! Update old local index
1702 14404608 : Len = Len+1 ! Good -- segment is getting longer
1703 30009600 : Inc = Inc+1 ! Increment local index
1704 : ENDDO
1705 : ENDDO
1706 : !
1707 : ! Clean up
1708 : !
1709 3072 : BlockSizesA(ja) = Len
1710 : CPP_ASSERT_F90( JA .LE. GlobalSizeA )
1711 : !
1712 : ! Now create the pattern from the displacements and block sizes
1713 : !
1714 3072 : Inc = 0
1715 1182720 : DO ipe = 1, NpesA
1716 : !
1717 : ! Find the segments which are relevant for the sender ipe
1718 : ! Make compact arrays BlockSizes and Displacements
1719 : !
1720 462130176 : DO j = 1, ja
1721 462127104 : IF ( PeA(j) == ipe-1 ) THEN
1722 1200384 : Inc = Inc + 1
1723 1200384 : BlockSizesB(Inc) = BlockSizesA(j)
1724 1200384 : DisplacementsB(Inc) = DisplacementsA(j)
1725 1200384 : GlobalB(Inc) = GlobalA(j)
1726 : ENDIF
1727 : ENDDO
1728 : ENDDO
1729 :
1730 : CPP_ASSERT_F90(Inc .LE. TotalPtsB)
1731 :
1732 : !
1733 : ! Create the receiver communication pattern
1734 : !
1735 3072 : Off = 0
1736 1182720 : DO ipe = 1, NpesA
1737 1179648 : Num = Count(ipe)
1738 : #if defined( DEBUG_PARPATTERNGHOSTTODECOMP )
1739 : write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
1740 : "Displacements", DisplacementsB(Off+1:Off+Num), &
1741 : "BlockSizes", BlockSizesB(Off+1:Off+Num)
1742 : #endif
1743 :
1744 1179648 : IF ( Num > 0 .and. method > 0 ) THEN
1745 0 : CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1), &
1746 0 : DisplacementsB(Off+1), DataType, Ptr, Ierror )
1747 0 : CALL MPI_TYPE_COMMIT( Ptr, Ierror )
1748 0 : Pattern%RecvDesc(ipe)%type = Ptr
1749 : ELSE
1750 1179648 : Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
1751 : ENDIF
1752 :
1753 2396160 : ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
1754 1216512 : ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
1755 2380032 : DO i=1, Num
1756 1200384 : Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
1757 2380032 : Pattern%RecvDesc(ipe)%BlockSizes(i) = BlockSizesB(i+Off)
1758 : ENDDO
1759 0 : Pattern%RecvDesc(ipe)%Nparcels = &
1760 1179648 : size (Pattern%RecvDesc(ipe)%Displacements)
1761 0 : Pattern%RecvDesc(ipe)%Tot_Size = &
1762 2380032 : sum (Pattern%RecvDesc(ipe)%Blocksizes)
1763 1179648 : Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
1764 :
1765 1182720 : Off = Off + Num
1766 : ENDDO
1767 :
1768 : endif ! (iam .lt. NpesB)
1769 :
1770 : !
1771 : ! Now communicate what the receiver is expecting to the sender
1772 : !
1773 : CALL ParExchangeVectorInt( InComm, Count, GlobalB, &
1774 3072 : CountOut, GlobalA )
1775 : CALL ParExchangeVectorInt( InComm, Count, BlockSizesB, &
1776 3072 : CountOut, BlockSizesA )
1777 :
1778 3072 : if (iam .lt. NpesA) then
1779 :
1780 : !
1781 : ! Sender A: BlockSizes and Displacements can now be stored
1782 : !
1783 3072 : Off = 0
1784 1182720 : DO ipe=1, NpesB
1785 1179648 : Num = CountOut(ipe)
1786 2380032 : DO i=1, Num
1787 1200384 : CALL DecompGlobalToLocal( GA%Local, GlobalA(i+Off), Local, Pe )
1788 2380032 : DisplacementsA(i+Off) = Local-1 ! zero-based displacement
1789 : ENDDO
1790 : #if defined( DEBUG_PARPATTERNGHOSTTODECOMP )
1791 : write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
1792 : "Displacements", DisplacementsA(Off+1:Off+Num), &
1793 : "BlockSizes", BlockSizesA(Off+1:Off+Num)
1794 : #endif
1795 :
1796 1179648 : IF ( Num > 0 .and. method > 0 ) THEN
1797 0 : CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1), &
1798 0 : DisplacementsA(Off+1), DataType, Ptr, Ierror )
1799 0 : CALL MPI_TYPE_COMMIT( Ptr, Ierror )
1800 0 : Pattern%SendDesc(ipe)%type = Ptr
1801 : ELSE
1802 1179648 : Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
1803 : ENDIF
1804 :
1805 2396160 : ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
1806 1216512 : ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
1807 2380032 : DO i=1, Num
1808 1200384 : Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
1809 2380032 : Pattern%SendDesc(ipe)%BlockSizes(i) = BlockSizesA(i+Off)
1810 : ENDDO
1811 0 : Pattern%SendDesc(ipe)%Nparcels = &
1812 1179648 : size (Pattern%SendDesc(ipe)%Displacements)
1813 0 : Pattern%SendDesc(ipe)%Tot_Size = &
1814 2380032 : sum (Pattern%SendDesc(ipe)%Blocksizes)
1815 1179648 : Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
1816 :
1817 1182720 : Off = Off + Num
1818 : ENDDO
1819 :
1820 : endif ! (iam .lt. NpesA)
1821 :
1822 3072 : CALL get_partneroffset( InComm, Pattern%SendDesc, Pattern%RecvDesc )
1823 :
1824 3072 : DEALLOCATE( CountOut )
1825 3072 : DEALLOCATE( Count )
1826 :
1827 3072 : DEALLOCATE( PeA )
1828 3072 : DEALLOCATE( GlobalA )
1829 3072 : DEALLOCATE( BlockSizesA )
1830 3072 : DEALLOCATE( DisplacementsA )
1831 :
1832 3072 : DEALLOCATE( GlobalB )
1833 3072 : DEALLOCATE( BlockSizesB )
1834 3072 : DEALLOCATE( DisplacementsB )
1835 :
1836 : CPP_LEAVE_PROCEDURE( "PARPATTERNGHOSTTODECOMP" )
1837 3072 : RETURN
1838 : !EOC
1839 3072 : END SUBROUTINE ParPatternGhostToDecomp
1840 : !-----------------------------------------------------------------------
1841 :
1842 : !-----------------------------------------------------------------------
1843 : !BOP
1844 : ! !IROUTINE: ParPatternGhostToGhost --- Create pattern between decomps
1845 : !
1846 : ! !INTERFACE:
1847 0 : SUBROUTINE ParPatternGhostToGhost( InComm, GA, GB, Pattern, mod_method, T )
1848 : !
1849 : ! !USES:
1850 : USE decompmodule, ONLY : DecompGlobalToLocal
1851 : USE ghostmodule, ONLY : GhostType, GhostInfo
1852 : USE mod_comm, ONLY : get_partneroffset
1853 : IMPLICIT NONE
1854 :
1855 : ! !INPUT PARAMETERS:
1856 : INTEGER, INTENT( IN ) :: InComm ! # of PEs
1857 : TYPE(GhostType), INTENT( IN ) :: GA ! Source Ghost Decomp
1858 : TYPE(GhostType), INTENT( IN ) :: GB ! Target Ghost Decomp
1859 : INTEGER, INTENT( IN ), OPTIONAL :: mod_method ! contiguous or derived type
1860 : INTEGER, INTENT( IN ), OPTIONAL :: T !
1861 : ! !OUTPUT PARAMETERS:
1862 : TYPE(ParPatternType), INTENT( OUT ) :: Pattern ! Comm Pattern
1863 : !
1864 : ! !DESCRIPTION:
1865 : ! This routine contructs a communication pattern for a
1866 : ! transformation from one ghosted decomposition to partitioned
1867 : ! one, i.e., a so-called "transpose". The resulting communication
1868 : ! pattern can be used in ParBegin/EndTransfer with the decomposed
1869 : ! arrays as inputs.
1870 : !
1871 : ! !SYSTEM ROUTINES:
1872 : ! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_COMM_DUP
1873 : ! MPI_TYPE_INDEXED, MPI_TYPE_COMMIT (depending on method)
1874 : !
1875 : ! !REVISION HISTORY:
1876 : ! 02.01.10 Sawyer Creation from DecompToDecomp
1877 : ! 02.07.16 Sawyer Added data type T
1878 : ! 03.11.11 Mirin Added optional argument mod_method
1879 : ! 07.03.11 Mirin Generalized to different sized decompositions
1880 : ! 07.09.04 Dennis Reduced amount of temporary memory usage
1881 : !
1882 : !EOP
1883 : !-----------------------------------------------------------------------
1884 : !BOC
1885 : ! !LOCAL VARIABLES:
1886 : INTEGER I, J, Tag, Local, Pe, Len, JA, Ipe, Num, Inc, Off
1887 : INTEGER NpesA, GlobalSizeA, LocalSizeA, BorderSizeA
1888 : INTEGER NpesB, GlobalSizeB, LocalSizeB, BorderSizeB
1889 : INTEGER GroupSize, Iam, Ierror, OldPe, OldLocal
1890 : INTEGER Ptr ! Pointer type
1891 : INTEGER DataType
1892 : INTEGER :: method
1893 : INTEGER :: nCount, maxCount, ierr
1894 : #ifdef _SMEMORY
1895 : TYPE (ParInfoType) :: Info
1896 : #endif
1897 :
1898 0 : INTEGER, ALLOCATABLE :: Count(:) ! # segments for each recv PE
1899 0 : INTEGER, ALLOCATABLE :: CountOut(:) ! # segments for each send PE
1900 :
1901 0 : INTEGER, ALLOCATABLE :: DisplacementsA(:) ! Generic displacements
1902 0 : INTEGER, ALLOCATABLE :: BlockSizesA(:) ! Generic block sizes
1903 0 : INTEGER, ALLOCATABLE :: GlobalA(:) ! Generic Local indices
1904 0 : INTEGER, ALLOCATABLE :: PeA(:) ! Processor element numbers
1905 :
1906 0 : INTEGER, ALLOCATABLE :: DisplacementsB(:) ! Displacements for B
1907 0 : INTEGER, ALLOCATABLE :: BlockSizesB(:) ! Block sizes for B
1908 0 : INTEGER, ALLOCATABLE :: GlobalB(:) ! Global indices for B
1909 :
1910 : CPP_ENTER_PROCEDURE( "PARPATTERNGHOSTTOGHOST" )
1911 :
1912 0 : IF (present(T)) THEN
1913 0 : DataType = T
1914 : ELSE
1915 0 : DataType = CPP_MPI_REAL8
1916 : ENDIF
1917 :
1918 0 : IF (present(mod_method)) THEN
1919 0 : method = mod_method
1920 : ELSE
1921 : method = 0 ! Default method - see mod_comm for description
1922 : ENDIF
1923 :
1924 : ! Assume this routine is called by processes [ 0,max(NpesA,NpesB) )
1925 :
1926 0 : CALL GhostInfo( GA, NpesA, GlobalSizeA, LocalSizeA, BorderSizeA )
1927 0 : CALL GhostInfo( GB, NpesB, GlobalSizeB, LocalSizeB, BorderSizeB )
1928 :
1929 0 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
1930 0 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
1931 0 : CALL MPI_COMM_DUP( InComm, Pattern%Comm, Ierror )
1932 :
1933 : #ifdef _SMEMORY
1934 : ! Calculate info about the pattern
1935 0 : call ParCalcInfo(InComm,GA,GB, Info)
1936 0 : GlobalSizeA=Info%maxNumSeg
1937 0 : GlobalSizeB=Info%maxNumSeg
1938 : #endif
1939 :
1940 0 : Pattern%Size = GroupSize
1941 0 : Pattern%Iam = Iam
1942 : !
1943 : ! Allocate the number of entries and list head arrays
1944 : !
1945 :
1946 : !
1947 : ! Allocate the patterns
1948 : !
1949 0 : ALLOCATE( Pattern%SendDesc( NpesB ) )
1950 0 : Pattern%SendDesc(:)%method = method
1951 0 : if (iam .ge. NpesA) then
1952 0 : do ipe = 1, NpesB
1953 0 : ALLOCATE( Pattern%SendDesc(ipe)%Displacements(1) )
1954 0 : ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(1) )
1955 0 : Pattern%SendDesc(ipe)%Tot_Size = -1
1956 0 : Pattern%SendDesc(ipe)%Nparcels = -1
1957 0 : Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
1958 0 : Pattern%SendDesc(ipe)%Displacements(1) = -1
1959 0 : Pattern%SendDesc(ipe)%Blocksizes(1) = -1
1960 : enddo
1961 : endif
1962 :
1963 0 : ALLOCATE( Pattern%RecvDesc( NpesA ) )
1964 0 : Pattern%RecvDesc(:)%method = method
1965 0 : if (iam .ge. NpesB) then
1966 0 : do ipe = 1, NpesA
1967 0 : ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(1) )
1968 0 : ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(1) )
1969 0 : Pattern%RecvDesc(ipe)%Tot_Size = -1
1970 0 : Pattern%RecvDesc(ipe)%Nparcels = -1
1971 0 : Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
1972 0 : Pattern%RecvDesc(ipe)%Displacements(1) = -1
1973 0 : Pattern%RecvDesc(ipe)%Blocksizes(1) = -1
1974 : enddo
1975 : endif
1976 :
1977 : !
1978 : ! Local allocations
1979 : !
1980 0 : ALLOCATE( DisplacementsA( GlobalSizeA ) ) ! Allocate for worst case
1981 0 : ALLOCATE( BlockSizesA( GlobalSizeA ) ) ! Allocate for worst case
1982 0 : ALLOCATE( GlobalA( GlobalSizeA ) ) ! Allocate for worst case
1983 0 : ALLOCATE( PeA( GlobalSizeA ) ) ! Allocate for worst case
1984 :
1985 0 : ALLOCATE( DisplacementsB( GlobalSizeB ) ) ! Allocate for worst case
1986 0 : ALLOCATE( BlockSizesB( GlobalSizeB ) ) ! Allocate for worst case
1987 0 : ALLOCATE( GlobalB( GlobalSizeB ) ) ! Allocate for worst case
1988 :
1989 0 : ALLOCATE( Count( GroupSize ) )
1990 0 : ALLOCATE( CountOut( GroupSize ) )
1991 :
1992 0 : JA = 0
1993 0 : Count = 0
1994 0 : Len = 0
1995 0 : GlobalB = 0 ! (needed for parexchangevector later)
1996 0 : BlocksizesB = 0 ! (needed for parexchangevector later)
1997 :
1998 0 : Num = 0
1999 0 : Inc = 0
2000 :
2001 0 : if (iam .lt. NpesB) then
2002 :
2003 : !
2004 : ! Parse through all the tags in the local segment
2005 0 : DO J = 1, SIZE( GB%Local%Head(iam+1)%StartTags )
2006 0 : OldPe = -1 ! Set PE undefined
2007 0 : OldLocal = 0 ! Set index value undefined
2008 0 : DO Tag=GB%Local%Head(iam+1)%StartTags(J), GB%Local%Head(iam+1)%EndTags(J)
2009 0 : IF ( Tag > 0 ) THEN ! Active point
2010 : !
2011 : ! Determine the index and PE of this entry on A. This might be inlined later
2012 : !
2013 0 : CALL DecompGlobalToLocal( GA%Decomp, Tag, Local, Pe )
2014 : !
2015 : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
2016 : !
2017 0 : IF ( Pe /= OldPe .OR. Local /= OldLocal+1 ) THEN
2018 0 : IF ( ja > 0 ) THEN
2019 0 : BlockSizesA(ja) = Len
2020 0 : Len = 0
2021 : ENDIF
2022 0 : ja = ja+1 ! increment the segment index
2023 0 : DisplacementsA(ja) = Inc ! Zero-based offset of local segment
2024 0 : GlobalA(ja) = Tag ! The global tag of the desired datum
2025 0 : PeA(ja) = Pe ! Note the ID of the sender
2026 0 : Count(Pe+1) = Count(Pe+1)+1 ! Increment counter of segments
2027 : ENDIF
2028 0 : OldPe = Pe ! Update old PE
2029 0 : OldLocal = Local ! Update old local index
2030 0 : Len = Len+1 ! Good -- segment is getting longer
2031 : ENDIF
2032 0 : Inc = Inc+1 ! Increment local index
2033 : ENDDO
2034 : ENDDO
2035 : !
2036 : ! Clean up
2037 : !
2038 0 : BlockSizesA(ja) = Len
2039 :
2040 : CPP_ASSERT_F90( JA .LE. GlobalSizeA )
2041 :
2042 : !
2043 : ! Now create the pattern from the displacements and block sizes
2044 : !
2045 0 : Inc = 0
2046 0 : DO ipe = 1, NpesA
2047 : !
2048 : ! Find the segments which are relevant for the sender ipe
2049 : ! Make compact arrays BlockSizes and Displacements
2050 : !
2051 0 : DO j = 1, ja
2052 0 : IF ( PeA(j) == ipe-1 ) THEN
2053 0 : Inc = Inc + 1
2054 0 : BlockSizesB(Inc) = BlockSizesA(j)
2055 0 : DisplacementsB(Inc) = DisplacementsA(j)
2056 0 : GlobalB(Inc) = GlobalA(j)
2057 : ENDIF
2058 : ENDDO
2059 : ENDDO
2060 : CPP_ASSERT_F90( Inc .LE. GlobalSizeB )
2061 :
2062 : !
2063 : ! Create the receiver communication pattern
2064 : !
2065 0 : Off = 0
2066 0 : DO ipe = 1, NpesA
2067 0 : Num = Count(ipe)
2068 : #if defined(DEBUG_PARPATTERNGHOSTTOGHOST)
2069 : write(iulog,*) "Receiver Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
2070 : "Displacements", DisplacementsB(Off+1:Off+Num), &
2071 : "BlockSizes", BlockSizesB(Off+1:Off+Num)
2072 : #endif
2073 :
2074 0 : IF ( Num > 0 .and. method > 0 ) THEN
2075 0 : CALL MPI_TYPE_INDEXED( Num, BlockSizesB(Off+1), &
2076 0 : DisplacementsB(Off+1), DataType, Ptr, Ierror )
2077 0 : CALL MPI_TYPE_COMMIT( Ptr, Ierror )
2078 0 : Pattern%RecvDesc(ipe)%type = Ptr
2079 : ELSE
2080 0 : Pattern%RecvDesc(ipe)%type = MPI_DATATYPE_NULL
2081 : ENDIF
2082 :
2083 0 : ALLOCATE( Pattern%RecvDesc(ipe)%Displacements(Num) )
2084 0 : ALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes(Num) )
2085 0 : DO i=1, Num
2086 0 : Pattern%RecvDesc(ipe)%Displacements(i) = DisplacementsB(i+Off)
2087 0 : Pattern%RecvDesc(ipe)%BlockSizes(i) = BlockSizesB(i+Off)
2088 : ENDDO
2089 0 : Pattern%RecvDesc(ipe)%Nparcels = &
2090 0 : size (Pattern%RecvDesc(ipe)%Displacements)
2091 0 : Pattern%RecvDesc(ipe)%Tot_Size = &
2092 0 : sum (Pattern%RecvDesc(ipe)%Blocksizes)
2093 0 : Max_Nparcels = max (Max_Nparcels, Pattern%RecvDesc(ipe)%Nparcels)
2094 :
2095 0 : Off = Off + Num
2096 : ENDDO
2097 :
2098 : endif ! (iam .lt. NpesB)
2099 :
2100 : !
2101 : ! Now communicate what the receiver is expecting to the sender
2102 : !
2103 : CALL ParExchangeVectorInt( InComm, Count, GlobalB, &
2104 0 : CountOut, GlobalA )
2105 : CALL ParExchangeVectorInt( InComm, Count, BlockSizesB, &
2106 0 : CountOut, BlockSizesA )
2107 :
2108 0 : if (iam .lt. NpesA) then
2109 :
2110 : !
2111 : ! Sender A: BlockSizes and Displacements can now be stored
2112 : !
2113 0 : Off = 0
2114 0 : DO ipe=1, NpesB
2115 0 : Num = CountOut(ipe)
2116 0 : DO i=1, Num
2117 0 : CALL DecompGlobalToLocal( GA%Local, GlobalA(i+Off), Local, Pe )
2118 0 : DisplacementsA(i+Off) = Local-1 ! zero-based displacement
2119 : ENDDO
2120 : #if defined(DEBUG_PARPATTERNGHOSTTOGHOST)
2121 : write(iulog,*) "Sender Iam", Iam, "Ipe", Ipe-1, "Num", Num, &
2122 : "Displacements", DisplacementsA(Off+1:Off+Num), &
2123 : "BlockSizes", BlockSizesA(Off+1:Off+Num)
2124 : #endif
2125 :
2126 0 : IF ( Num > 0 .and. method > 0 ) THEN
2127 0 : CALL MPI_TYPE_INDEXED( Num, BlockSizesA(Off+1), &
2128 0 : DisplacementsA(Off+1), DataType, Ptr, Ierror )
2129 0 : CALL MPI_TYPE_COMMIT( Ptr, Ierror )
2130 0 : Pattern%SendDesc(ipe)%type = Ptr
2131 : ELSE
2132 0 : Pattern%SendDesc(ipe)%type = MPI_DATATYPE_NULL
2133 : ENDIF
2134 :
2135 0 : ALLOCATE( Pattern%SendDesc(ipe)%Displacements(Num) )
2136 0 : ALLOCATE( Pattern%SendDesc(ipe)%BlockSizes(Num) )
2137 0 : DO i=1, Num
2138 0 : Pattern%SendDesc(ipe)%Displacements(i) = DisplacementsA(i+Off)
2139 0 : Pattern%SendDesc(ipe)%BlockSizes(i) = BlockSizesA(i+Off)
2140 : ENDDO
2141 0 : Pattern%SendDesc(ipe)%Nparcels = &
2142 0 : size (Pattern%SendDesc(ipe)%Displacements)
2143 0 : Pattern%SendDesc(ipe)%Tot_Size = &
2144 0 : sum (Pattern%SendDesc(ipe)%Blocksizes)
2145 0 : Max_Nparcels = max (Max_Nparcels, Pattern%SendDesc(ipe)%Nparcels)
2146 :
2147 0 : Off = Off + Num
2148 : ENDDO
2149 :
2150 : endif ! (iam .lt. NpesA)
2151 :
2152 0 : CALL get_partneroffset( InComm, Pattern%SendDesc, Pattern%RecvDesc )
2153 :
2154 :
2155 0 : DEALLOCATE( CountOut )
2156 0 : DEALLOCATE( Count )
2157 :
2158 0 : DEALLOCATE( PeA )
2159 0 : DEALLOCATE( GlobalA )
2160 0 : DEALLOCATE( BlockSizesA )
2161 0 : DEALLOCATE( DisplacementsA )
2162 :
2163 0 : DEALLOCATE( GlobalB )
2164 0 : DEALLOCATE( BlockSizesB )
2165 0 : DEALLOCATE( DisplacementsB )
2166 :
2167 : CPP_LEAVE_PROCEDURE( "PARPATTERNGHOSTTOGHOST" )
2168 0 : RETURN
2169 : !EOC
2170 0 : END SUBROUTINE ParPatternGhostToGhost
2171 : !-----------------------------------------------------------------------
2172 :
2173 : !-----------------------------------------------------------------------
2174 : !BOP
2175 : ! !IROUTINE: ParPatternFree --- Free the communication pattern
2176 : !
2177 : ! !INTERFACE:
2178 0 : SUBROUTINE ParPatternFree( InComm, Pattern )
2179 : !
2180 : ! !USES:
2181 : IMPLICIT NONE
2182 :
2183 : ! !INPUT PARAMETERS:
2184 : INTEGER, INTENT( IN ) :: InComm ! # of PEs
2185 : ! !INPUT/OUTPUT PARAMETERS:
2186 : TYPE(ParPatternType), INTENT( INOUT ) :: Pattern ! Comm Pattern
2187 : !
2188 : ! !DESCRIPTION:
2189 : ! This routine frees a communication pattern.
2190 : !
2191 : ! !SYSTEM ROUTINES:
2192 : ! MPI_TYPE_FREE
2193 : !
2194 : ! !BUGS:
2195 : ! The MPI_TYPE_FREE statement does not seem to work with FFC
2196 : !
2197 : ! !REVISION HISTORY:
2198 : ! 01.02.10 Sawyer Creation
2199 : !
2200 : !EOP
2201 : !-----------------------------------------------------------------------
2202 : !BOC
2203 : ! !LOCAL VARIABLES:
2204 : INTEGER ipe, GroupSize, Pointer, Ierror, method
2205 :
2206 : CPP_ENTER_PROCEDURE( "PARPATTERNFREE" )
2207 :
2208 0 : method = Pattern%RecvDesc(1)%method
2209 :
2210 : !
2211 : ! First request the needed ghost values from other processors.
2212 : !
2213 : ! Free all the MPI derived types
2214 : !
2215 0 : DO ipe=1, Pattern%Size
2216 0 : Pointer = Pattern%SendDesc(ipe)%type
2217 0 : IF ( Pointer /= MPI_DATATYPE_NULL ) THEN
2218 0 : CALL MPI_TYPE_FREE( Pointer, Ierror )
2219 : ENDIF
2220 0 : Pointer = Pattern%RecvDesc(ipe)%type
2221 0 : IF ( Pointer /= MPI_DATATYPE_NULL ) THEN
2222 0 : CALL MPI_TYPE_FREE( Pointer, Ierror )
2223 : ENDIF
2224 : ENDDO
2225 :
2226 0 : DO ipe=1, size(Pattern%RecvDesc)
2227 0 : DEALLOCATE( Pattern%RecvDesc(ipe)%Displacements )
2228 0 : DEALLOCATE( Pattern%RecvDesc(ipe)%BlockSizes )
2229 : ENDDO
2230 0 : DO ipe=1, size(Pattern%SendDesc)
2231 0 : DEALLOCATE( Pattern%SendDesc(ipe)%Displacements )
2232 0 : DEALLOCATE( Pattern%SendDesc(ipe)%BlockSizes )
2233 : ENDDO
2234 :
2235 0 : DEALLOCATE( Pattern%SendDesc )
2236 0 : DEALLOCATE( Pattern%RecvDesc )
2237 :
2238 : CPP_LEAVE_PROCEDURE( "PARPATTERNFREE" )
2239 0 : RETURN
2240 : !EOC
2241 : END SUBROUTINE ParPatternFree
2242 : !-----------------------------------------------------------------------
2243 :
2244 :
2245 : !-----------------------------------------------------------------------
2246 : !BOP
2247 : ! !IROUTINE: ParScatterReal --- Scatter slice to all PEs
2248 : !
2249 : ! !INTERFACE:
2250 0 : SUBROUTINE ParScatterReal ( InComm, Root, Slice, Decomp, Local )
2251 :
2252 : ! !USES:
2253 : USE decompmodule, ONLY: DecompType, Lists
2254 : IMPLICIT NONE
2255 :
2256 : ! !INPUT PARAMETERS:
2257 : INTEGER, INTENT( IN ) :: InComm ! Communicator
2258 : INTEGER, INTENT( IN ) :: Root ! Root PE
2259 : REAL(CPP_REAL8), INTENT( IN ) :: Slice(*) ! Global Slice
2260 : TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
2261 :
2262 : ! !OUTPUT PARAMETERS:
2263 : REAL(CPP_REAL8), INTENT( OUT ) :: Local(*) ! Local Slice
2264 :
2265 : ! !DESCRIPTION:
2266 : ! Given a decomposition of the domain, dole out a slice
2267 : ! (one-dimensional array) to all the constituent PEs as described
2268 : ! by the decomposition Decomp.
2269 : !
2270 : !
2271 : ! !SYSTEM ROUTINES:
2272 : ! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
2273 : !
2274 : ! !REVISION HISTORY:
2275 : ! 97.04.14 Sawyer Creation
2276 : ! 97.04.16 Sawyer Cleaned up for walk-through
2277 : ! 97.05.01 Sawyer Use Decomp%Comm for all local info
2278 : ! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
2279 : ! 97.05.29 Sawyer Changed 2-D arrays to 1-D
2280 : ! 97.07.03 Sawyer Reformulated documentation
2281 : ! 97.07.22 Sawyer DecompType has moved to DecompModule
2282 : ! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
2283 : ! 97.12.05 Sawyer Added InComm and Root as arguments
2284 : ! 97.12.05 Sawyer Added logic to support intercommunicators
2285 : ! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
2286 : ! 98.02.05 Sawyer Removed the use of intercommunicators
2287 : ! 98.03.30 Sawyer Stats dimension corrected: Gsize*MPI_STATUS_SIZE
2288 : ! 99.01.19 Sawyer Dropped assumed-size arrays
2289 : ! 00.07.07 Sawyer Removed "1D" references
2290 : ! 00.07.23 Sawyer Implementation with shared memory arenas
2291 : !
2292 : !EOP
2293 : !-----------------------------------------------------------------------
2294 : !BOC
2295 : ! !LOCAL VARIABLES:
2296 :
2297 : INTEGER Ierror, I, J, K, L, Iam, GroupSize
2298 : INTEGER Status( MPI_STATUS_SIZE )
2299 0 : Integer, allocatable :: Reqs(:), Stats(:)
2300 0 : REAL(CPP_REAL8), ALLOCATABLE :: SendBuf(:)
2301 : !
2302 : CPP_ENTER_PROCEDURE( "PARSCATTERREAL" )
2303 : !
2304 0 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
2305 0 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
2306 :
2307 0 : allocate (Reqs(GroupSize))
2308 0 : allocate (Stats(GroupSize*MPI_STATUS_SIZE))
2309 :
2310 0 : IF ( Iam .EQ. Root ) THEN
2311 0 : ALLOCATE( SendBuf( SUM( Decomp%NumEntries ) ) )
2312 0 : L = 0
2313 0 : DO I = 1, GroupSize
2314 : !
2315 : ! Pick out the array sections to be sent.
2316 : ! This is the inverse of the operation in ParGather
2317 : !
2318 0 : DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
2319 0 : DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
2320 0 : L = L+1
2321 0 : SendBuf(L) = Slice(K)
2322 : ENDDO
2323 : ENDDO
2324 : !
2325 : ! This is a non-blocking send. SendBuf cannot be immediately deallocated
2326 : !
2327 : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
2328 : !
2329 0 : CALL MPI_ISEND( SendBuf(L-Decomp%NumEntries(I)+1), &
2330 : Decomp%NumEntries(I), CPP_MPI_REAL8, &
2331 0 : I-1, 0, InComm, Reqs(I), Ierror )
2332 :
2333 : ENDDO
2334 : ENDIF
2335 :
2336 : !
2337 : ! All receive from the root.
2338 : !
2339 : ! The local array may be larger than that specified in the decomposition
2340 : !
2341 0 : CALL MPI_RECV( Local, Decomp%NumEntries(Iam+1), &
2342 : CPP_MPI_REAL8, &
2343 0 : Root, 0, InComm, Status, Ierror )
2344 : !
2345 : ! Experience shows that we should wait for all the non-blocking
2346 : ! PEs to check in, EVEN THOUGH THE MPI_RECV HAS COMPLETED !!
2347 : !
2348 0 : IF ( Iam .EQ. Root ) THEN
2349 0 : CALL MPI_WAITALL( GroupSize, Reqs, Stats, Ierror )
2350 0 : DEALLOCATE( SendBuf )
2351 : ENDIF
2352 :
2353 : !
2354 : ! The following may be needed on some platforms to avoid an MPI bug.
2355 : !
2356 0 : CALL MPI_BARRIER( InComm, Ierror )
2357 :
2358 0 : deallocate (Reqs)
2359 0 : deallocate (Stats)
2360 :
2361 : CPP_LEAVE_PROCEDURE( "PARSCATTERREAL" )
2362 0 : RETURN
2363 : !EOC
2364 0 : END SUBROUTINE ParScatterReal
2365 : !-----------------------------------------------------------------------
2366 :
2367 :
2368 : !-----------------------------------------------------------------------
2369 : !BOP
2370 : ! !IROUTINE: ParScatterReal4 --- Scatter slice to all PEs
2371 : !
2372 : ! !INTERFACE:
2373 0 : SUBROUTINE ParScatterReal4 ( InComm, Root, Slice, Decomp, Local )
2374 :
2375 : ! !USES:
2376 : USE decompmodule, ONLY: DecompType, Lists
2377 : IMPLICIT NONE
2378 :
2379 : ! !INPUT PARAMETERS:
2380 : INTEGER, INTENT( IN ) :: InComm ! Communicator
2381 : INTEGER, INTENT( IN ) :: Root ! Root PE
2382 : REAL(CPP_REAL4), INTENT( IN ) :: Slice(*) ! Global Slice
2383 : TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
2384 :
2385 : ! !OUTPUT PARAMETERS:
2386 : REAL(CPP_REAL4), INTENT( OUT ) :: Local(*) ! Local Slice
2387 :
2388 : ! !DESCRIPTION:
2389 : ! Given a decomposition of the domain, dole out a slice
2390 : ! (one-dimensional array) to all the constituent PEs as described
2391 : ! by the decomposition Decomp.
2392 : !
2393 : !
2394 : ! !SYSTEM ROUTINES:
2395 : ! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
2396 : !
2397 : ! !REVISION HISTORY:
2398 : ! 97.04.14 Sawyer Creation
2399 : ! 97.04.16 Sawyer Cleaned up for walk-through
2400 : ! 97.05.01 Sawyer Use Decomp%Comm for all local info
2401 : ! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
2402 : ! 97.05.29 Sawyer Changed 2-D arrays to 1-D
2403 : ! 97.07.03 Sawyer Reformulated documentation
2404 : ! 97.07.22 Sawyer DecompType has moved to DecompModule
2405 : ! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
2406 : ! 97.12.05 Sawyer Added InComm and Root as arguments
2407 : ! 97.12.05 Sawyer Added logic to support intercommunicators
2408 : ! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
2409 : ! 98.02.05 Sawyer Removed the use of intercommunicators
2410 : ! 98.03.30 Sawyer Stats dimension corrected: Gsize*MPI_STATUS_SIZE
2411 : ! 99.01.19 Sawyer Dropped assumed-size arrays
2412 : ! 00.07.07 Sawyer Removed "1D" references
2413 : ! 00.07.23 Sawyer Implementation with shared memory arenas
2414 : !
2415 : !EOP
2416 : !-----------------------------------------------------------------------
2417 : !BOC
2418 : ! !LOCAL VARIABLES:
2419 :
2420 : INTEGER Ierror, I, J, K, L, Iam, GroupSize
2421 : INTEGER Status( MPI_STATUS_SIZE )
2422 0 : Integer, allocatable :: Reqs(:), Stats(:)
2423 0 : REAL(CPP_REAL4), ALLOCATABLE :: SendBuf(:)
2424 : !
2425 : CPP_ENTER_PROCEDURE( "PARSCATTERREAL4" )
2426 : !
2427 0 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
2428 0 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
2429 :
2430 0 : allocate (Reqs(GroupSize))
2431 0 : allocate (Stats(GroupSize*MPI_STATUS_SIZE))
2432 :
2433 0 : IF ( Iam .EQ. Root ) THEN
2434 0 : ALLOCATE( SendBuf( SUM( Decomp%NumEntries ) ) )
2435 0 : L = 0
2436 0 : DO I = 1, GroupSize
2437 : !
2438 : ! Pick out the array sections to be sent.
2439 : ! This is the inverse of the operation in ParGather
2440 : !
2441 0 : DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
2442 0 : DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
2443 0 : L = L+1
2444 0 : SendBuf(L) = Slice(K)
2445 : ENDDO
2446 : ENDDO
2447 : !
2448 : ! This is a non-blocking send. SendBuf cannot be immediately deallocated
2449 : !
2450 : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
2451 : !
2452 0 : CALL MPI_ISEND( SendBuf(L-Decomp%NumEntries(I)+1), &
2453 : Decomp%NumEntries(I), CPP_MPI_REAL4, &
2454 0 : I-1, 0, InComm, Reqs(I), Ierror )
2455 :
2456 : ENDDO
2457 : ENDIF
2458 :
2459 : !
2460 : ! All receive from the root.
2461 : !
2462 : ! The local array may be larger than that specified in the decomposition
2463 : !
2464 0 : CALL MPI_RECV( Local, Decomp%NumEntries(Iam+1), &
2465 : CPP_MPI_REAL4, &
2466 0 : Root, 0, InComm, Status, Ierror )
2467 : !
2468 : ! Experience shows that we should wait for all the non-blocking
2469 : ! PEs to check in, EVEN THOUGH THE MPI_RECV HAS COMPLETED !!
2470 : !
2471 0 : IF ( Iam .EQ. Root ) THEN
2472 0 : CALL MPI_WAITALL( GroupSize, Reqs, Stats, Ierror )
2473 0 : DEALLOCATE( SendBuf )
2474 : ENDIF
2475 :
2476 : !
2477 : ! The following may be needed on some platforms to avoid an MPI bug.
2478 : !
2479 0 : CALL MPI_BARRIER( InComm, Ierror )
2480 :
2481 0 : deallocate (Reqs)
2482 0 : deallocate (Stats)
2483 :
2484 : CPP_LEAVE_PROCEDURE( "PARSCATTERREAL4" )
2485 0 : RETURN
2486 : !EOC
2487 0 : END SUBROUTINE ParScatterReal4
2488 : !-----------------------------------------------------------------------
2489 :
2490 :
2491 : !-----------------------------------------------------------------------
2492 : !BOP
2493 : ! !IROUTINE: ParScatterInt --- Scatter slice to all PEs
2494 : !
2495 : ! !INTERFACE:
2496 0 : SUBROUTINE ParScatterInt ( InComm, Root, Slice, Decomp, Local )
2497 :
2498 : ! !USES:
2499 : USE decompmodule, ONLY: DecompType, Lists
2500 : IMPLICIT NONE
2501 :
2502 : ! !INPUT PARAMETERS:
2503 : INTEGER, INTENT( IN ) :: InComm ! Communicator
2504 : INTEGER, INTENT( IN ) :: Root ! Root PE
2505 : INTEGER, INTENT( IN ) :: Slice(*) ! Global Slice
2506 : TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
2507 :
2508 : ! !OUTPUT PARAMETERS:
2509 : INTEGER, INTENT( OUT ) :: Local(*) ! Local Slice
2510 :
2511 : ! !DESCRIPTION:
2512 : ! Given a decomposition of the domain, dole out a slice
2513 : ! (one-dimensional array) to all the constituent PEs as described
2514 : ! by the decomposition Decomp.
2515 : !
2516 : !
2517 : ! !SYSTEM ROUTINES:
2518 : ! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
2519 : !
2520 : ! !REVISION HISTORY:
2521 : ! 97.04.14 Sawyer Creation
2522 : ! 97.04.16 Sawyer Cleaned up for walk-through
2523 : ! 97.05.01 Sawyer Use Decomp%Comm for all local info
2524 : ! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
2525 : ! 97.05.29 Sawyer Changed 2-D arrays to 1-D
2526 : ! 97.07.03 Sawyer Reformulated documentation
2527 : ! 97.07.22 Sawyer DecompType has moved to DecompModule
2528 : ! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
2529 : ! 97.12.05 Sawyer Added InComm and Root as arguments
2530 : ! 97.12.05 Sawyer Added logic to support intercommunicators
2531 : ! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
2532 : ! 98.02.05 Sawyer Removed the use of intercommunicators
2533 : ! 98.03.30 Sawyer Stats dimension corrected: Gsize*MPI_STATUS_SIZE
2534 : ! 99.01.19 Sawyer Dropped assumed-size arrays
2535 : ! 00.07.07 Sawyer Removed "1D" references
2536 : ! 00.07.23 Sawyer Implementation with shared memory arenas
2537 : !
2538 : !EOP
2539 : !-----------------------------------------------------------------------
2540 : !BOC
2541 : ! !LOCAL VARIABLES:
2542 :
2543 : INTEGER Ierror, I, J, K, L, Iam, GroupSize
2544 : INTEGER Status( MPI_STATUS_SIZE )
2545 0 : Integer, allocatable :: Reqs(:), Stats(:)
2546 0 : INTEGER, ALLOCATABLE :: SendBuf(:)
2547 : !
2548 : CPP_ENTER_PROCEDURE( "PARSCATTERINT" )
2549 : !
2550 0 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
2551 0 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
2552 :
2553 0 : allocate (Reqs(GroupSize))
2554 0 : allocate (Stats(GroupSize*MPI_STATUS_SIZE))
2555 :
2556 0 : IF ( Iam .EQ. Root ) THEN
2557 0 : ALLOCATE( SendBuf( SUM( Decomp%NumEntries ) ) )
2558 0 : L = 0
2559 0 : DO I = 1, GroupSize
2560 : !
2561 : ! Pick out the array sections to be sent.
2562 : ! This is the inverse of the operation in ParGather
2563 : !
2564 0 : DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
2565 0 : DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
2566 0 : L = L+1
2567 0 : SendBuf(L) = Slice(K)
2568 : ENDDO
2569 : ENDDO
2570 : !
2571 : ! This is a non-blocking send. SendBuf cannot be immediately deallocated
2572 : !
2573 : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
2574 : !
2575 0 : CALL MPI_ISEND( SendBuf(L-Decomp%NumEntries(I)+1), &
2576 : Decomp%NumEntries(I), CPP_MPI_INTEGER, &
2577 0 : I-1, 0, InComm, Reqs(I), Ierror )
2578 :
2579 : ENDDO
2580 : ENDIF
2581 :
2582 : !
2583 : ! All receive from the root.
2584 : !
2585 : ! The local array may be larger than that specified in the decomposition
2586 : !
2587 0 : CALL MPI_RECV( Local, Decomp%NumEntries(Iam+1), &
2588 : CPP_MPI_INTEGER, &
2589 0 : Root, 0, InComm, Status, Ierror )
2590 : !
2591 : ! Experience shows that we should wait for all the non-blocking
2592 : ! PEs to check in, EVEN THOUGH THE MPI_RECV HAS COMPLETED !!
2593 : !
2594 0 : IF ( Iam .EQ. Root ) THEN
2595 0 : CALL MPI_WAITALL( GroupSize, Reqs, Stats, Ierror )
2596 0 : DEALLOCATE( SendBuf )
2597 : ENDIF
2598 :
2599 : !
2600 : ! The following may be needed on some platforms to avoid an MPI bug.
2601 : !
2602 0 : CALL MPI_BARRIER( InComm, Ierror )
2603 :
2604 0 : deallocate (Reqs)
2605 0 : deallocate (Stats)
2606 :
2607 : CPP_LEAVE_PROCEDURE( "PARSCATTERINT" )
2608 0 : RETURN
2609 : !EOC
2610 0 : END SUBROUTINE ParScatterInt
2611 : !-----------------------------------------------------------------------
2612 :
2613 :
2614 : !-----------------------------------------------------------------------
2615 : !BOP
2616 : ! !IROUTINE: ParGatherReal --- Gather Slice from all PEs
2617 : !
2618 : ! !INTERFACE:
2619 0 : SUBROUTINE ParGatherReal ( InComm, Root, Local, Decomp, Slice )
2620 :
2621 : ! !USES:
2622 : USE decompmodule, ONLY: DecompType, Lists
2623 : IMPLICIT NONE
2624 :
2625 : ! !INPUT PARAMETERS:
2626 : INTEGER, INTENT( IN ) :: InComm ! Communicator
2627 : INTEGER, INTENT( IN ) :: Root ! Root PE
2628 : REAL(CPP_REAL8), INTENT( IN ) :: Local(*) ! Local Slice
2629 : TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
2630 :
2631 : ! !OUTPUT PARAMETERS:
2632 : REAL(CPP_REAL8), INTENT( OUT ) :: Slice(*) ! Global Slice
2633 :
2634 : ! !DESCRIPTION:
2635 : ! Given a decomposition of the domain and a local portion of the
2636 : ! total slice on each PE, gather together the portions into a
2637 : ! global slice on the root PE
2638 : !
2639 : ! !SYSTEM ROUTINES:
2640 : ! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
2641 : !
2642 : ! !REVISION HISTORY:
2643 : ! 97.04.14 Sawyer Creation
2644 : ! 97.04.16 Sawyer Cleaned up for walk-through
2645 : ! 97.05.01 Sawyer Use Decomp%Comm for all local info
2646 : ! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
2647 : ! 97.05.29 Sawyer Changed 2-D arrays to 1-D
2648 : ! 97.07.03 Sawyer Reformulated documentation
2649 : ! 97.07.22 Sawyer DecompType has moved to DecompModule
2650 : ! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
2651 : ! 97.12.05 Sawyer Added InComm and Root as arguments
2652 : ! 97.12.05 Sawyer Added logic to support intercommunicators
2653 : ! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
2654 : ! 98.01.29 Sawyer Corrected assertions
2655 : ! 98.02.05 Sawyer Removed the use of intercommunicators
2656 : ! 98.03.31 Sawyer Stat dimension corrected: MPI_STATUS_SIZE
2657 : ! 98.04.22 Sawyer Local no longer assumed shape: Local(*)
2658 : ! 99.01.19 Sawyer Dropped assumed-size arrays
2659 : ! 00.07.07 Sawyer Removed "1D" references
2660 : ! 00.07.23 Sawyer Implementation with shared memory arenas
2661 : !
2662 : !EOP
2663 : !-----------------------------------------------------------------------
2664 : !BOC
2665 : ! !LOCAL VARIABLES:
2666 : INTEGER Ierror, I, J, K, L, Iam, GroupSize, Req
2667 : INTEGER Status( MPI_STATUS_SIZE ), Stat( MPI_STATUS_SIZE )
2668 0 : REAL(CPP_REAL8), ALLOCATABLE :: RecvBuf(:)
2669 : !
2670 : CPP_ENTER_PROCEDURE( "PARGATHERREAL" )
2671 : !
2672 0 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
2673 0 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
2674 : !
2675 : ! All PEs send their contribution to the root
2676 : !
2677 0 : CALL MPI_ISEND( Local, Decomp%NumEntries(Iam+1), &
2678 : CPP_MPI_REAL8, &
2679 0 : Root, Iam+3001, InComm, Req, Ierror )
2680 :
2681 0 : IF ( Iam .EQ. Root ) THEN
2682 0 : ALLOCATE( RecvBuf( SUM( Decomp%NumEntries ) ) )
2683 : !
2684 : ! On the Root PE receive from every other PE
2685 : !
2686 0 : L = 0
2687 0 : DO I = 1, GroupSize
2688 : !
2689 : ! This is a blocking, synchronous recv. All the
2690 : ! sends should have been posted so it should not deadlock
2691 : !
2692 : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
2693 : !
2694 : CPP_ASSERT_F90( L .LT. SIZE( RecvBuf ) )
2695 0 : CALL MPI_RECV( RecvBuf(L+1), Decomp%NumEntries(I), &
2696 : CPP_MPI_REAL8, I-1, I+3000, InComm, &
2697 0 : Status, Ierror )
2698 : !
2699 : ! This is the simple reverse mapping of that in ParScatter
2700 : !
2701 0 : DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
2702 0 : DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
2703 0 : L = L + 1
2704 0 : Slice(K) = RecvBuf(L)
2705 : #if defined(DEBUG_PARGATHERREAL)
2706 : PRINT *, " Entry ", L, RecvBuf(L), K, SIZE(Slice)
2707 : #endif
2708 : ENDDO
2709 : ENDDO
2710 : ENDDO
2711 0 : DEALLOCATE( RecvBuf )
2712 : ENDIF
2713 0 : CALL MPI_WAIT( Req, Stat, Ierror )
2714 : !
2715 : ! The following may be needed on some platforms to avoid an MPI bug.
2716 : !
2717 0 : CALL MPI_BARRIER( InComm, Ierror )
2718 :
2719 : CPP_LEAVE_PROCEDURE( "PARGATHERREAL" )
2720 0 : RETURN
2721 : !EOC
2722 0 : END SUBROUTINE ParGatherReal
2723 : !-----------------------------------------------------------------------
2724 :
2725 :
2726 : !-----------------------------------------------------------------------
2727 : !BOP
2728 : ! !IROUTINE: ParGatherReal4 --- Gather Slice from all PEs
2729 : !
2730 : ! !INTERFACE:
2731 0 : SUBROUTINE ParGatherReal4 ( InComm, Root, Local, Decomp, Slice )
2732 :
2733 : ! !USES:
2734 : USE decompmodule, ONLY: DecompType, Lists
2735 : IMPLICIT NONE
2736 :
2737 : ! !INPUT PARAMETERS:
2738 : INTEGER, INTENT( IN ) :: InComm ! Communicator
2739 : INTEGER, INTENT( IN ) :: Root ! Root PE
2740 : REAL(CPP_REAL4), INTENT( IN ) :: Local(*) ! Local Slice
2741 : TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
2742 :
2743 : ! !OUTPUT PARAMETERS:
2744 : REAL(CPP_REAL4), INTENT( OUT ) :: Slice(*) ! Global Slice
2745 :
2746 : ! !DESCRIPTION:
2747 : ! Given a decomposition of the domain and a local portion of the
2748 : ! total slice on each PE, gather together the portions into a
2749 : ! global slice on the root PE
2750 : !
2751 : ! !SYSTEM ROUTINES:
2752 : ! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
2753 : !
2754 : ! !REVISION HISTORY:
2755 : ! 97.04.14 Sawyer Creation
2756 : ! 97.04.16 Sawyer Cleaned up for walk-through
2757 : ! 97.05.01 Sawyer Use Decomp%Comm for all local info
2758 : ! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
2759 : ! 97.05.29 Sawyer Changed 2-D arrays to 1-D
2760 : ! 97.07.03 Sawyer Reformulated documentation
2761 : ! 97.07.22 Sawyer DecompType has moved to DecompModule
2762 : ! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
2763 : ! 97.12.05 Sawyer Added InComm and Root as arguments
2764 : ! 97.12.05 Sawyer Added logic to support intercommunicators
2765 : ! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
2766 : ! 98.01.29 Sawyer Corrected assertions
2767 : ! 98.02.05 Sawyer Removed the use of intercommunicators
2768 : ! 98.03.31 Sawyer Stat dimension corrected: MPI_STATUS_SIZE
2769 : ! 98.04.22 Sawyer Local no longer assumed shape: Local(*)
2770 : ! 99.01.19 Sawyer Dropped assumed-size arrays
2771 : ! 00.07.07 Sawyer Removed "1D" references
2772 : ! 00.07.23 Sawyer Implementation with shared memory arenas
2773 : !
2774 : !EOP
2775 : !-----------------------------------------------------------------------
2776 : !BOC
2777 : ! !LOCAL VARIABLES:
2778 : INTEGER Ierror, I, J, K, L, Iam, GroupSize, Req
2779 : INTEGER Status( MPI_STATUS_SIZE ), Stat( MPI_STATUS_SIZE )
2780 0 : REAL(CPP_REAL4), ALLOCATABLE :: RecvBuf(:)
2781 : !
2782 : CPP_ENTER_PROCEDURE( "PARGATHERREAL4" )
2783 : !
2784 0 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
2785 0 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
2786 : !
2787 : ! All PEs send their contribution to the root
2788 : !
2789 0 : CALL MPI_ISEND( Local, Decomp%NumEntries(Iam+1), &
2790 : CPP_MPI_REAL4, &
2791 0 : Root, Iam+3001, InComm, Req, Ierror )
2792 :
2793 0 : IF ( Iam .EQ. Root ) THEN
2794 0 : ALLOCATE( RecvBuf( SUM( Decomp%NumEntries ) ) )
2795 : !
2796 : ! On the Root PE receive from every other PE
2797 : !
2798 0 : L = 0
2799 0 : DO I = 1, GroupSize
2800 : !
2801 : ! This is a blocking, synchronous recv. All the
2802 : ! sends should have been posted so it should not deadlock
2803 : !
2804 : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
2805 : !
2806 : CPP_ASSERT_F90( L .LT. SIZE( RecvBuf ) )
2807 0 : CALL MPI_RECV( RecvBuf(L+1), Decomp%NumEntries(I), &
2808 : CPP_MPI_REAL4, I-1, I+3000, InComm, &
2809 0 : Status, Ierror )
2810 : !
2811 : ! This is the simple reverse mapping of that in ParScatter
2812 : !
2813 0 : DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
2814 0 : DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
2815 0 : L = L + 1
2816 0 : Slice(K) = RecvBuf(L)
2817 : #if defined(DEBUG_PARGATHERREAL4)
2818 : PRINT *, " Entry ", L, RecvBuf(L), K, SIZE(Slice)
2819 : #endif
2820 : ENDDO
2821 : ENDDO
2822 : ENDDO
2823 0 : DEALLOCATE( RecvBuf )
2824 : ENDIF
2825 0 : CALL MPI_WAIT( Req, Stat, Ierror )
2826 : !
2827 : ! The following may be needed on some platforms to avoid an MPI bug.
2828 : !
2829 0 : CALL MPI_BARRIER( InComm, Ierror )
2830 : CPP_LEAVE_PROCEDURE( "PARGATHERREAL4" )
2831 0 : RETURN
2832 : !EOC
2833 0 : END SUBROUTINE ParGatherReal4
2834 : !-----------------------------------------------------------------------
2835 :
2836 :
2837 : !-----------------------------------------------------------------------
2838 : !BOP
2839 : ! !IROUTINE: ParGatherInt --- Gather Slice from all PEs
2840 : !
2841 : ! !INTERFACE:
2842 8064 : SUBROUTINE ParGatherInt ( InComm, Root, Local, Decomp, Slice )
2843 :
2844 : ! !USES:
2845 : USE decompmodule, ONLY: DecompType, Lists
2846 : IMPLICIT NONE
2847 :
2848 : ! !INPUT PARAMETERS:
2849 : INTEGER, INTENT( IN ) :: InComm ! Communicator
2850 : INTEGER, INTENT( IN ) :: Root ! Root PE
2851 : INTEGER, INTENT( IN ) :: Local(*) ! Local Slice
2852 : TYPE(DecompType), INTENT( IN ) :: Decomp ! Decomp information
2853 :
2854 : ! !OUTPUT PARAMETERS:
2855 : INTEGER, INTENT( OUT ) :: Slice(*) ! Global Slice
2856 :
2857 : ! !DESCRIPTION:
2858 : ! Given a decomposition of the domain and a local portion of the
2859 : ! total slice on each PE, gather together the portions into a
2860 : ! global slice on the root PE
2861 : !
2862 : ! !SYSTEM ROUTINES:
2863 : ! MPI_ISEND, MPI_RECV, MPI_COMM_RANK
2864 : !
2865 : ! !REVISION HISTORY:
2866 : ! 97.04.14 Sawyer Creation
2867 : ! 97.04.16 Sawyer Cleaned up for walk-through
2868 : ! 97.05.01 Sawyer Use Decomp%Comm for all local info
2869 : ! 97.05.18 Sawyer DecompType has moved to ParUtilitiesTypes
2870 : ! 97.05.29 Sawyer Changed 2-D arrays to 1-D
2871 : ! 97.07.03 Sawyer Reformulated documentation
2872 : ! 97.07.22 Sawyer DecompType has moved to DecompModule
2873 : ! 97.12.01 Sawyer Changed MPI_SSEND to MPI_ISEND
2874 : ! 97.12.05 Sawyer Added InComm and Root as arguments
2875 : ! 97.12.05 Sawyer Added logic to support intercommunicators
2876 : ! 98.01.24 Sawyer Removed dependence on MPI derived types TESTED
2877 : ! 98.01.29 Sawyer Corrected assertions
2878 : ! 98.02.05 Sawyer Removed the use of intercommunicators
2879 : ! 98.03.31 Sawyer Stat dimension corrected: MPI_STATUS_SIZE
2880 : ! 98.04.22 Sawyer Local no longer assumed shape: Local(*)
2881 : ! 99.01.19 Sawyer Dropped assumed-size arrays
2882 : ! 00.07.07 Sawyer Removed "1D" references
2883 : ! 00.07.23 Sawyer Implementation with shared memory arenas
2884 : !
2885 : !EOP
2886 : !-----------------------------------------------------------------------
2887 : !BOC
2888 : ! !LOCAL VARIABLES:
2889 : INTEGER Ierror, I, J, K, L, Iam, GroupSize, Req
2890 : INTEGER Status( MPI_STATUS_SIZE ), Stat( MPI_STATUS_SIZE )
2891 8064 : INTEGER, ALLOCATABLE :: RecvBuf(:)
2892 : !
2893 : CPP_ENTER_PROCEDURE( "PARGATHERINT" )
2894 : !
2895 8064 : CALL MPI_COMM_RANK( InComm, Iam, Ierror )
2896 8064 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierror )
2897 : !
2898 : ! All PEs send their contribution to the root
2899 : !
2900 0 : CALL MPI_ISEND( Local, Decomp%NumEntries(Iam+1), CPP_MPI_INTEGER, &
2901 8064 : Root, Iam+3001, InComm, Req, Ierror )
2902 :
2903 8064 : IF ( Iam .EQ. Root ) THEN
2904 10080 : ALLOCATE( RecvBuf( SUM( Decomp%NumEntries ) ) )
2905 : !
2906 : ! On the Root PE receive from every other PE
2907 : !
2908 672 : L = 0
2909 8736 : DO I = 1, GroupSize
2910 : !
2911 : ! This is a blocking, synchronous recv. All the
2912 : ! sends should have been posted so it should not deadlock
2913 : !
2914 : ! WARNING: F90-MPI inconsistency: make sure the indexing below always works
2915 : !
2916 : CPP_ASSERT_F90( L .LT. SIZE( RecvBuf ) )
2917 8064 : CALL MPI_RECV( RecvBuf(L+1), Decomp%NumEntries(I), &
2918 : CPP_MPI_INTEGER, I-1, I+3000, InComm, &
2919 16128 : Status, Ierror )
2920 : !
2921 : ! This is the simple reverse mapping of that in ParScatter
2922 : !
2923 32928 : DO J = 1, SIZE( Decomp%HEAD(I)%StartTags )
2924 322560 : DO K = Decomp%HEAD(I)%StartTags(J),Decomp%HEAD(I)%EndTags(J)
2925 290304 : L = L + 1
2926 314496 : Slice(K) = RecvBuf(L)
2927 : #if defined(DEBUG_PARGATHERINT)
2928 : PRINT *, " Entry ", L, RecvBuf(L), K, SIZE(Slice)
2929 : #endif
2930 : ENDDO
2931 : ENDDO
2932 : ENDDO
2933 672 : DEALLOCATE( RecvBuf )
2934 : ENDIF
2935 8064 : CALL MPI_WAIT( Req, Stat, Ierror )
2936 : !
2937 : ! The following may be needed on some platforms to avoid an MPI bug.
2938 : !
2939 8064 : CALL MPI_BARRIER( InComm, Ierror )
2940 :
2941 : CPP_LEAVE_PROCEDURE( "PARGATHERINT" )
2942 8064 : RETURN
2943 : !EOC
2944 8064 : END SUBROUTINE ParGatherInt
2945 : !-----------------------------------------------------------------------
2946 :
2947 :
2948 : !-----------------------------------------------------------------------
2949 : !BOP
2950 : ! !IROUTINE: ParBeginTransferReal --- Start an ASYNC Real Transfer
2951 : !
2952 : ! !INTERFACE:
2953 0 : SUBROUTINE ParBeginTransferReal(InComm, NrInPackets, NrOutPackets, &
2954 0 : Dest, Src, InBuf, InIA, &
2955 0 : OutBuf, OutIA )
2956 :
2957 : ! !USES:
2958 : IMPLICIT NONE
2959 :
2960 : ! !INPUT PARAMETERS:
2961 : INTEGER, INTENT( IN ) :: InComm ! Communicator
2962 : INTEGER, INTENT( IN ) :: NrInPackets ! Number of in packets
2963 : INTEGER, INTENT( IN ) :: NrOutPackets ! Number of out packets
2964 : INTEGER, INTENT( IN ) :: Dest(:) ! PE destinations
2965 : INTEGER, INTENT( IN ) :: Src(:) ! PE sources
2966 : REAL(CPP_REAL8), INTENT(IN) :: InBuf(:) ! Input buffer
2967 : INTEGER, INTENT( IN ) :: InIA(:) ! In packet counter
2968 : INTEGER, INTENT( IN ) :: OutIA(:) ! Out packet counter
2969 :
2970 : ! !OUTPUT PARAMETERS:
2971 : REAL(CPP_REAL8), INTENT( OUT ) :: OutBuf(:) ! Output buffer
2972 :
2973 : ! !DESCRIPTION:
2974 : !
2975 : ! This routine initiates an async. transfer of an array InBuf
2976 : ! partitioned into parcels defined by the arrays InIA and Dest
2977 : ! to an output array OutBuf on another PE. InIA(1) contains
2978 : ! the number of reals to be sent to Dest(1), InIA(2) the number
2979 : ! of reals to be sent to Dest(2), etc. Similarly, the array
2980 : ! OutBuf on the calling PE is partitioned into parcels by OutIA
2981 : ! and Src, with OutIA(1) the number of reals anticipated from
2982 : ! Src(1), etc.
2983 : !
2984 : ! The default implementation reads through the contiguous array
2985 : ! InBuf and sends the parcels to the PEs designated with an
2986 : ! asyncronous MPI\_ISEND. Correspondingly it posts the receives
2987 : ! with an asynchronous MPI\_IRECV.
2988 : !
2989 : ! Wait handles InHandle(:) and OutHandle(:) are in common block.
2990 : !
2991 : ! !BUGS:
2992 : !
2993 : ! It is assumed that the buffers are passed to this routine by
2994 : ! reference!!!!!!!!!!
2995 : !
2996 : ! The buffers may not be accessed until after the call to
2997 : ! ParEndTransferReal.
2998 : !
2999 : !
3000 : ! !SYSTEM ROUTINES:
3001 : ! MPI_COMM_RANK, MPI_ISEND, MPI_IRECV
3002 : !
3003 : ! !REVISION HISTORY:
3004 : ! 97.09.26 Sawyer Creation
3005 : ! 97.12.05 Sawyer Renamed Comm to InComm to avoid collisions
3006 : ! 98.02.26 Sawyer Added Dest, Src and Remote to clean up code
3007 : ! 98.04.16 Sawyer Number of packets become input arguments
3008 : ! 98.09.04 Sawyer Cleaned interface: handles in common, no Remote
3009 : ! 99.03.04 Sawyer Inlined ParCalculateRemote
3010 : ! 99.06.01 Sawyer Changed pointer arrays to INTEGER*8 for SGI
3011 : ! 00.08.07 Sawyer Implementation with shared memory arenas
3012 : ! 01.09.27 Sawyer Added multiple shared buffers for USE_MLP
3013 : !
3014 : !EOP
3015 : !-----------------------------------------------------------------------
3016 : !BOC
3017 :
3018 : ! !LOCAL VARIABLES:
3019 : INTEGER Iam, GroupSize, Nr, Icnt, Packet, I, Ierr
3020 :
3021 : CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERREAL" )
3022 : CPP_ASSERT_F90( NrInPackets .LE. SIZE( Dest ) )
3023 : CPP_ASSERT_F90( NrInPackets .LE. SIZE( InIA ) )
3024 : CPP_ASSERT_F90( NrOutPackets .LE. SIZE( Src ) )
3025 : CPP_ASSERT_F90( NrOutPackets .LE. SIZE( OutIA ) )
3026 :
3027 : !
3028 : ! Increment the ongoing transfer number
3029 0 : BegTrf = MOD(BegTrf,MAX_TRF) + 1
3030 :
3031 0 : CALL MPI_COMM_RANK( InComm, Iam, Ierr )
3032 0 : CALL MPI_COMM_SIZE( InComm, GroupSize, Ierr )
3033 :
3034 : !
3035 : ! MPI: Irecv over all processes
3036 : !
3037 0 : Icnt = 1
3038 0 : DO Packet = 1, NrOutPackets
3039 0 : Nr = OutIA( Packet )
3040 0 : IF ( Nr .GT. 0 ) THEN
3041 : #if defined( DEBUG_PARBEGINTRANSFERREAL )
3042 : PRINT *, "Iam ",Iam," posts recv ",Nr," from ", Src( Packet )
3043 : #endif
3044 : !
3045 : ! Receive the buffers with MPI_Irecv. Non-blocking
3046 : !
3047 : CPP_ASSERT_F90( Icnt+Nr-1 .LE. SIZE( OutBuf ) )
3048 0 : CALL MPI_IRECV( OutBuf( Icnt ), Nr, &
3049 0 : CPP_MPI_REAL8, Src( Packet ), Src( Packet ), &
3050 0 : InComm, OutHandle(Packet,1,BegTrf), Ierr )
3051 : ELSE
3052 0 : OutHandle(Packet,1,BegTrf) = MPI_REQUEST_NULL
3053 : END IF
3054 0 : Icnt = Icnt + Nr
3055 : END DO
3056 : !
3057 : ! MPI: Isend over all processes
3058 : !
3059 0 : Icnt = 1
3060 : CPP_ASSERT_F90( NrInPackets .LE. SIZE( Dest ) )
3061 : CPP_ASSERT_F90( NrInPackets .LE. SIZE( InIA ) )
3062 0 : DO Packet = 1, NrInPackets
3063 0 : Nr = InIA( Packet )
3064 0 : IF ( Nr .GT. 0 ) THEN
3065 : #if defined( DEBUG_PARBEGINTRANSFERREAL )
3066 : PRINT *,"Iam ",Iam," posts send ",Nr," to ",Dest( Packet )
3067 : #endif
3068 : !
3069 : ! Send the individual buffers with non-blocking sends
3070 : !
3071 : CPP_ASSERT_F90( Icnt+Nr-1 .LE. SIZE( InBuf ) )
3072 0 : CALL MPI_ISEND ( InBuf( Icnt ), Nr, &
3073 0 : CPP_MPI_REAL8, Dest( Packet ), Iam, &
3074 0 : InComm, InHandle(Packet,1,BegTrf), Ierr )
3075 : ELSE
3076 0 : InHandle(Packet,1,BegTrf) = MPI_REQUEST_NULL
3077 : END IF
3078 0 : Icnt = Icnt + Nr
3079 : END DO
3080 : !
3081 : !
3082 : CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERREAL" )
3083 0 : RETURN
3084 : !EOC
3085 : END SUBROUTINE ParBeginTransferReal
3086 : !-----------------------------------------------------------------------
3087 :
3088 :
3089 : !-----------------------------------------------------------------------
3090 : !BOP
3091 : ! !IROUTINE: ParBeginTransferPattern1D --- Start ASYNC Pattern Transfer
3092 : !
3093 : ! !INTERFACE:
3094 0 : SUBROUTINE ParBeginTransferPattern1D( InComm, Pattern, InBuf, OutBuf )
3095 :
3096 : ! !USES:
3097 : USE mod_comm, ONLY : mp_sendirr
3098 : IMPLICIT NONE
3099 :
3100 : ! !INPUT PARAMETERS:
3101 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3102 : TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
3103 : REAL(CPP_REAL8), INTENT( IN ) :: InBuf(*) ! Input buffer
3104 :
3105 : ! !OUTPUT PARAMETERS:
3106 : REAL(CPP_REAL8), INTENT( OUT ) :: OutBuf(*) ! Output buffer
3107 :
3108 : ! !DESCRIPTION:
3109 : !
3110 : ! This routine initiates an async. transfer of an array InBuf.
3111 : ! The communication pattern indicates the indices outgoing
3112 : ! values of InBuf and incoming values for OutBuf. This routine
3113 : ! is fundamentally equivalent to ParBeginTransferReal; the use
3114 : ! of a communication pattern is largely a performance enhancement,
3115 : ! since it eliminates the need for intermediate buffering.
3116 : !
3117 : ! Wait handles InHandle and OutHandle are module variables
3118 : ! The buffers may not be accessed until after the call to
3119 : ! ParEndTransferReal.
3120 : !
3121 : ! !BUGS:
3122 : !
3123 : ! It is assumed that the buffers are passed to this routine by
3124 : ! reference.
3125 : !
3126 : ! !REVISION HISTORY:
3127 : ! 01.02.14 Sawyer Creation from ParBeginTransferReal
3128 : ! 01.09.27 Sawyer Added multiple shared buffers for USE_MLP
3129 : ! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
3130 : ! 03.06.24 Sawyer All complexity now in mp_sendirr
3131 : !
3132 : !EOP
3133 : !-----------------------------------------------------------------------
3134 : !BOC
3135 :
3136 : ! !LOCAL VARIABLES:
3137 : CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN1D" )
3138 :
3139 0 : CALL mp_sendirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
3140 : !
3141 : CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN1D" )
3142 0 : RETURN
3143 : !EOC
3144 : END SUBROUTINE ParBeginTransferPattern1D
3145 : !-----------------------------------------------------------------------
3146 :
3147 :
3148 : !-----------------------------------------------------------------------
3149 : !BOP
3150 : ! !IROUTINE: ParBeginTransferPattern1Dint --- Start ASYNC Pattern Transfer
3151 : !
3152 : ! !INTERFACE:
3153 0 : SUBROUTINE ParBeginTransferPattern1Dint( InComm, Pattern, InBuf, OutBuf )
3154 :
3155 : ! !USES:
3156 : USE mod_comm, ONLY : mp_sendirr_i4
3157 : IMPLICIT NONE
3158 :
3159 : ! !INPUT PARAMETERS:
3160 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3161 : TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
3162 : INTEGER, INTENT( IN ) :: InBuf(*) ! Input buffer
3163 :
3164 : ! !OUTPUT PARAMETERS:
3165 : INTEGER, INTENT( OUT ) :: OutBuf(*) ! Output buffer
3166 :
3167 : ! !DESCRIPTION:
3168 : !
3169 : ! This routine initiates an async. transfer of an array InBuf.
3170 : ! The communication pattern indicates the indices outgoing
3171 : ! values of InBuf and incoming values for OutBuf. This routine
3172 : ! is fundamentally equivalent to ParBeginTransferReal; the use
3173 : ! of a communication pattern is largely a performance enhancement,
3174 : ! since it eliminates the need for intermediate buffering.
3175 : !
3176 : ! Wait handles InHandle and OutHandle are module variables
3177 : ! The buffers may not be accessed until after the call to
3178 : ! ParEndTransferReal.
3179 : !
3180 : ! !BUGS:
3181 : !
3182 : ! It is assumed that the buffers are passed to this routine by
3183 : ! reference.
3184 : !
3185 : ! !REVISION HISTORY:
3186 : ! 01.02.14 Sawyer Creation from ParBeginTransferReal
3187 : ! 01.09.27 Sawyer Added multiple shared buffers for USE_MLP
3188 : ! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
3189 : ! 03.06.24 Sawyer All complexity now in mp_sendirr_i4
3190 : !
3191 : !EOP
3192 : !-----------------------------------------------------------------------
3193 : !BOC
3194 :
3195 : CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN1DINT" )
3196 :
3197 0 : CALL mp_sendirr_i4( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
3198 :
3199 : CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN1DINT" )
3200 0 : RETURN
3201 : !EOC
3202 : END SUBROUTINE ParBeginTransferPattern1Dint
3203 : !-----------------------------------------------------------------------
3204 :
3205 :
3206 : !-----------------------------------------------------------------------
3207 : !BOP
3208 : ! !IROUTINE: ParBeginTransferPattern2D --- Start an ASYNC Pattern Transfer
3209 : !
3210 : ! !INTERFACE:
3211 0 : SUBROUTINE ParBeginTransferPattern2D( InComm, Pattern, InBuf, OutBuf )
3212 :
3213 : ! !USES:
3214 : USE mod_comm, ONLY : mp_sendirr
3215 : IMPLICIT NONE
3216 :
3217 : ! !INPUT PARAMETERS:
3218 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3219 : TYPE (ParPatternType), INTENT(IN) :: Pattern ! Comm Pattern
3220 : REAL(CPP_REAL8), INTENT(IN) :: InBuf(:,:) ! Input buffer
3221 :
3222 : ! !OUTPUT PARAMETERS:
3223 : REAL(CPP_REAL8), INTENT(OUT) :: OutBuf(:,:) ! Output buffer
3224 :
3225 : ! !DESCRIPTION:
3226 : !
3227 : ! This routine initiates an async. transfer of an array InBuf.
3228 : ! The communication pattern indicates the indices outgoing
3229 : ! values of InBuf and incoming values for OutBuf. This routine
3230 : ! is fundamentally equivalent to ParBeginTransferReal; the use
3231 : ! of a communication pattern is largely a performance enhancement,
3232 : ! since it eliminates the need for intermediate buffering.
3233 : !
3234 : ! Wait handles InHandle and OutHandle are module variables
3235 : ! The buffers may not be accessed until after the call to
3236 : ! ParEndTransferReal.
3237 : !
3238 : ! !REVISION HISTORY:
3239 : ! 01.10.01 Sawyer Creation from ParBeginTransferPattern
3240 : ! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
3241 : ! 03.06.24 Sawyer All complexity now in mp_sendirr
3242 : !
3243 : !EOP
3244 : !-----------------------------------------------------------------------
3245 : !BOC
3246 :
3247 : CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN2D" )
3248 :
3249 0 : CALL mp_sendirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
3250 :
3251 : CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN2D" )
3252 0 : RETURN
3253 : !EOC
3254 : END SUBROUTINE ParBeginTransferPattern2D
3255 : !-----------------------------------------------------------------------
3256 :
3257 :
3258 : !-----------------------------------------------------------------------
3259 : !BOP
3260 : ! !IROUTINE: ParBeginTransferPattern3D --- Start an ASYNC Pattern Transfer
3261 : !
3262 : ! !INTERFACE:
3263 0 : SUBROUTINE ParBeginTransferPattern3D( InComm, Pattern, InBuf, OutBuf )
3264 :
3265 : ! !USES:
3266 : USE mod_comm, ONLY : mp_sendirr
3267 : IMPLICIT NONE
3268 :
3269 : ! !INPUT PARAMETERS:
3270 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3271 : TYPE (ParPatternType), INTENT(IN) :: Pattern ! Comm Pattern
3272 : REAL(CPP_REAL8), INTENT(IN) :: InBuf(:,:,:) ! Input buffer
3273 :
3274 : ! !OUTPUT PARAMETERS:
3275 : REAL(CPP_REAL8), INTENT(OUT) :: OutBuf(:,:,:)! Output buffer
3276 :
3277 : ! !DESCRIPTION:
3278 : !
3279 : ! This routine initiates an async. transfer of an array InBuf.
3280 : ! The communication pattern indicates the indices outgoing
3281 : ! values of InBuf and incoming values for OutBuf. This routine
3282 : ! is fundamentally equivalent to ParBeginTransferReal; the use
3283 : ! of a communication pattern is largely a performance enhancement,
3284 : ! since it eliminates the need for intermediate buffering.
3285 : !
3286 : ! Wait handles InHandle and OutHandle are module variables
3287 : ! The buffers may not be accessed until after the call to
3288 : ! ParEndTransferReal.
3289 : !
3290 : ! !REVISION HISTORY:
3291 : ! 01.10.01 Sawyer Creation from ParBeginTransferPattern
3292 : ! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
3293 : ! 03.06.24 Sawyer All complexity now in mp_sendirr
3294 : !
3295 : !EOP
3296 : !-----------------------------------------------------------------------
3297 : !BOC
3298 :
3299 : CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN3D" )
3300 :
3301 0 : CALL mp_sendirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
3302 :
3303 : CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN3D" )
3304 0 : RETURN
3305 : !EOC
3306 : END SUBROUTINE ParBeginTransferPattern3D
3307 : !-----------------------------------------------------------------------
3308 :
3309 :
3310 : !-----------------------------------------------------------------------
3311 : !BOP
3312 : ! !IROUTINE: ParBeginTransferPattern4D --- Start an ASYNC Pattern Transfer
3313 : !
3314 : ! !INTERFACE:
3315 0 : SUBROUTINE ParBeginTransferPattern4D( InComm, Pattern, InBuf, OutBuf )
3316 :
3317 : ! !USES:
3318 : USE mod_comm, ONLY : mp_sendirr
3319 : IMPLICIT NONE
3320 :
3321 : ! !INPUT PARAMETERS:
3322 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3323 : TYPE (ParPatternType), INTENT(IN) :: Pattern ! Comm Pattern
3324 : REAL(CPP_REAL8), INTENT(IN) :: InBuf(:,:,:,:) ! Input buffer
3325 :
3326 : ! !OUTPUT PARAMETERS:
3327 : REAL(CPP_REAL8), INTENT(OUT) :: OutBuf(:,:,:,:)! Output buffer
3328 :
3329 : ! !DESCRIPTION:
3330 : !
3331 : ! This routine initiates an async. transfer of an array InBuf.
3332 : ! The communication pattern indicates the indices outgoing
3333 : ! values of InBuf and incoming values for OutBuf. This routine
3334 : ! is fundamentally equivalent to ParBeginTransferReal; the use
3335 : ! of a communication pattern is largely a performance enhancement,
3336 : ! since it eliminates the need for intermediate buffering.
3337 : !
3338 : ! Wait handles InHandle and OutHandle are module variables
3339 : ! The buffers may not be accessed until after the call to
3340 : ! ParEndTransferReal.
3341 : !
3342 : ! !REVISION HISTORY:
3343 : ! 02.12.19 Sawyer Creation from ParBeginTransferPattern
3344 : ! 03.06.24 Sawyer All complexity now in mp_sendirr
3345 : !
3346 : !EOP
3347 : !-----------------------------------------------------------------------
3348 : !BOC
3349 :
3350 : CPP_ENTER_PROCEDURE( "PARBEGINTRANSFERPATTERN4D" )
3351 :
3352 0 : CALL mp_sendirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
3353 :
3354 : CPP_LEAVE_PROCEDURE( "PARBEGINTRANSFERPATTERN4D" )
3355 0 : RETURN
3356 : !EOC
3357 : END SUBROUTINE ParBeginTransferPattern4D
3358 : !-----------------------------------------------------------------------
3359 :
3360 :
3361 : !-----------------------------------------------------------------------
3362 : !BOP
3363 : ! !IROUTINE: ParEndTransferReal --- Complete an ASYNC Real Transfer
3364 : !
3365 : ! !INTERFACE:
3366 0 : SUBROUTINE ParEndTransferReal( InComm, NrInPackets, NrOutPackets, &
3367 : Dest, Src, InBuf, InIA, &
3368 : OutBuf, OutIA )
3369 :
3370 : ! !USES:
3371 : IMPLICIT NONE
3372 :
3373 : ! !INPUT PARAMETERS:
3374 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3375 : INTEGER, INTENT( IN ) :: NrInPackets ! Number of in packets
3376 : INTEGER, INTENT( IN ) :: NrOutPackets ! Number of out packets
3377 : INTEGER, INTENT( IN ) :: Dest(:) ! PE destinations
3378 : INTEGER, INTENT( IN ) :: Src(:) ! PE sources
3379 : REAL(CPP_REAL8), INTENT(IN) :: InBuf(:) ! Input buffer
3380 : INTEGER, INTENT( IN ) :: InIA(:) ! Pointer array
3381 : INTEGER, INTENT( IN ) :: OutIA(:) ! Pointer array
3382 :
3383 : ! !INPUT/OUTPUT PARAMETERS:
3384 : REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(:)! Output buffer
3385 :
3386 : ! !DESCRIPTION:
3387 : !
3388 : ! This routine completes an async. transfer of an array
3389 : ! partitioned into parcels defined by the array InIA. In the
3390 : ! MPI version, neither InBuf nor OutBuf is not used since
3391 : ! that information was utilized in ParBeginTransferReal.
3392 : !
3393 : ! The link between StartTransfer and EndTransfer is made possible
3394 : ! by the InHandle and OutHandle: they reflect the status of
3395 : ! the ongoing transfer. When this routine completes, a valid
3396 : ! and accessible copy of the OutBuf is ready for use.
3397 : !
3398 : ! !BUGS:
3399 : !
3400 : ! It is assumed that the buffers are passed to this routine by
3401 : ! reference! The buffers may not be accessed until after the
3402 : ! completion of ParEndTransferReal.
3403 : !
3404 : !
3405 : ! !SYSTEM ROUTINES:
3406 : ! MPI_COMM_RANK, MPI_ISEND, MPI_IRECV
3407 : !
3408 : ! !REVISION HISTORY:
3409 : ! 97.09.26 Sawyer Creation
3410 : ! 97.12.05 Sawyer Renamed Comm to InComm to avoid collisions
3411 : ! 98.02.26 Sawyer Count through packets, not PEs
3412 : ! 98.04.16 Sawyer Number of packets become input arguments
3413 : ! 98.09.04 Sawyer Cleaned interface: handles in common
3414 : ! 99.03.05 Sawyer Support for contiguous communicators in SHMEM
3415 : ! 99.04.22 Sawyer Bug fix: replaced MPI_WAIT with MPI_WAITALL
3416 : ! 99.06.03 Sawyer Bug fix: GroupSize in SHMEM_BARRIER
3417 : ! 00.07.28 Sawyer Implemented with shared memory arenas
3418 : ! 01.09.27 Sawyer Added multiple shared buffers for USE_MLP
3419 : !
3420 : !EOP
3421 : !-----------------------------------------------------------------------
3422 : !BOC
3423 : ! !LOCAL VARIABLES:
3424 : INTEGER Iam, GroupSize, J, Offset, Packet, Ierr
3425 0 : INTEGER InStats(NrInPackets*MPI_STATUS_SIZE)
3426 0 : INTEGER OutStats(NrOutPackets*MPI_STATUS_SIZE)
3427 :
3428 : CPP_ENTER_PROCEDURE( "PARENDTRANSFERREAL" )
3429 :
3430 : !
3431 : ! Increment the receiver
3432 0 : EndTrf = MOD(EndTrf,MAX_TRF)+1
3433 :
3434 : CPP_ASSERT_F90( NrInPackets .LE. MAX_PAX )
3435 0 : CALL MPI_WAITALL( NrInPackets, InHandle(:,1,EndTrf), InStats, Ierr )
3436 :
3437 : CPP_ASSERT_F90( NrOutPackets .LE. MAX_PAX )
3438 0 : CALL MPI_WAITALL( NrOutPackets, OutHandle(:,1,EndTrf), OutStats, Ierr )
3439 : !
3440 : ! WS 98.09.22 : This barrier needed to synchronize.
3441 : !
3442 0 : CALL MPI_BARRIER( InComm, Ierr )
3443 :
3444 : CPP_LEAVE_PROCEDURE( "PARENDTRANSFERREAL" )
3445 0 : RETURN
3446 : !EOC
3447 : END SUBROUTINE ParEndTransferReal
3448 : !-----------------------------------------------------------------------
3449 :
3450 :
3451 : !-----------------------------------------------------------------------
3452 : !BOP
3453 : ! !IROUTINE: ParEndTransferPattern1D --- Complete ASYNC Pattern Transfer
3454 : !
3455 : ! !INTERFACE:
3456 0 : SUBROUTINE ParEndTransferPattern1D( InComm, Pattern, InBuf, OutBuf )
3457 :
3458 : ! !USES:
3459 : USE mod_comm, ONLY : mp_recvirr
3460 : IMPLICIT NONE
3461 :
3462 : ! !INPUT PARAMETERS:
3463 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3464 : TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
3465 : REAL(CPP_REAL8), INTENT( IN ) :: InBuf(*) ! Input buffer
3466 :
3467 : ! !INPUT/OUTPUT PARAMETERS:
3468 : REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(*) ! Output buffer
3469 :
3470 : ! !DESCRIPTION:
3471 : !
3472 : ! This routine completes an async. transfer of an array communicated
3473 : ! with a communication pattern.
3474 : !
3475 : ! The link between StartTransfer and EndTransfer is made possible
3476 : ! by the InHandle and OutHandle: they reflect the status of
3477 : ! the ongoing transfer. When this routine completes, a valid
3478 : ! and accessible copy of the OutBuf is ready for use.
3479 : ! The buffers may not be accessed until after the
3480 : ! completion of ParEndTransfer.
3481 : !
3482 : ! !BUGS:
3483 : !
3484 : ! It is assumed that the buffers are passed to this routine by
3485 : ! reference.
3486 : !
3487 : ! !REVISION HISTORY:
3488 : ! 01.02.14 Sawyer Creation from ParEndTransferReal
3489 : ! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
3490 : ! 03.06.24 Sawyer All complexity now in mp_recvirr
3491 : !
3492 : !EOP
3493 : !-----------------------------------------------------------------------
3494 : !BOC
3495 :
3496 : CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN1D" )
3497 :
3498 0 : CALL mp_recvirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
3499 :
3500 : CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN1D" )
3501 0 : RETURN
3502 : !EOC
3503 : END SUBROUTINE ParEndTransferPattern1D
3504 : !-----------------------------------------------------------------------
3505 :
3506 :
3507 : !-----------------------------------------------------------------------
3508 : !BOP
3509 : ! !IROUTINE: ParEndTransferPattern1Dint --- Complete ASYNC Pattern Transfer
3510 : !
3511 : ! !INTERFACE:
3512 0 : SUBROUTINE ParEndTransferPattern1Dint( InComm, Pattern, InBuf, OutBuf )
3513 :
3514 : ! !USES:
3515 : USE mod_comm, ONLY : mp_recvirr_i4
3516 : IMPLICIT NONE
3517 :
3518 : ! !INPUT PARAMETERS:
3519 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3520 : TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
3521 : INTEGER, INTENT( IN ) :: InBuf(*) ! Input buffer
3522 :
3523 : ! !INPUT/OUTPUT PARAMETERS:
3524 : INTEGER, INTENT( INOUT ) :: OutBuf(*) ! Output buffer
3525 :
3526 : ! !DESCRIPTION:
3527 : !
3528 : ! This routine completes an async. transfer of an array communicated
3529 : ! with a communication pattern.
3530 : !
3531 : ! The link between StartTransfer and EndTransfer is made possible
3532 : ! by the InHandle and OutHandle: they reflect the status of
3533 : ! the ongoing transfer. When this routine completes, a valid
3534 : ! and accessible copy of the OutBuf is ready for use.
3535 : ! The buffers may not be accessed until after the
3536 : ! completion of ParEndTransfer.
3537 : !
3538 : ! !BUGS:
3539 : !
3540 : ! It is assumed that the buffers are passed to this routine by
3541 : ! reference.
3542 : !
3543 : ! !REVISION HISTORY:
3544 : ! 01.02.14 Sawyer Creation from ParEndTransferReal
3545 : ! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
3546 : ! 03.06.24 Sawyer All complexity now in mp_recvirr_i4
3547 : !
3548 : !EOP
3549 : !-----------------------------------------------------------------------
3550 : !BOC
3551 :
3552 : CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN1DINT" )
3553 :
3554 0 : CALL mp_recvirr_i4( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
3555 :
3556 : CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN1DINT" )
3557 0 : RETURN
3558 : !EOC
3559 : END SUBROUTINE ParEndTransferPattern1Dint
3560 : !-----------------------------------------------------------------------
3561 :
3562 :
3563 : !-----------------------------------------------------------------------
3564 : !BOP
3565 : ! !IROUTINE: ParEndTransferPattern2D --- Complete an ASYNC Pattern Transfer
3566 : !
3567 : ! !INTERFACE:
3568 0 : SUBROUTINE ParEndTransferPattern2D( InComm, Pattern, InBuf, OutBuf )
3569 :
3570 : ! !USES:
3571 : USE mod_comm, ONLY : mp_recvirr
3572 : IMPLICIT NONE
3573 :
3574 : ! !INPUT PARAMETERS:
3575 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3576 : TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
3577 : REAL(CPP_REAL8), INTENT( IN ) :: InBuf(:,:) ! Input buffer
3578 :
3579 : ! !INPUT/OUTPUT PARAMETERS:
3580 : REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(:,:) ! Output buffer
3581 :
3582 : ! !DESCRIPTION:
3583 : !
3584 : ! This routine completes an async. transfer of an array communicated
3585 : ! with a communication pattern.
3586 : !
3587 : ! The link between StartTransfer and EndTransfer is made possible
3588 : ! by the InHandle and OutHandle: they reflect the status of
3589 : ! the ongoing transfer. When this routine completes, a valid
3590 : ! and accessible copy of the OutBuf is ready for use.
3591 : ! The buffers may not be accessed until after the
3592 : ! completion of ParEndTransfer.
3593 : !
3594 : ! !BUGS:
3595 : !
3596 : ! It is assumed that the buffers are passed to this routine by
3597 : ! reference.
3598 : !
3599 : ! !REVISION HISTORY:
3600 : ! 01.10.01 Sawyer Creation from ParEndTransferPattern
3601 : ! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
3602 : ! 03.06.24 Sawyer All complexity now in mp_recvirr
3603 : !
3604 : !EOP
3605 : !-----------------------------------------------------------------------
3606 : !BOC
3607 :
3608 : CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN2D" )
3609 :
3610 0 : CALL mp_recvirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf(:,:),OutBuf(:,:) )
3611 :
3612 : CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN2D" )
3613 0 : RETURN
3614 : !EOC
3615 : END SUBROUTINE ParEndTransferPattern2D
3616 : !-----------------------------------------------------------------------
3617 :
3618 :
3619 : !-----------------------------------------------------------------------
3620 : !BOP
3621 : ! !IROUTINE: ParEndTransferPattern3D --- Complete an ASYNC Pattern Transfer
3622 : !
3623 : ! !INTERFACE:
3624 0 : SUBROUTINE ParEndTransferPattern3D( InComm, Pattern, InBuf, OutBuf )
3625 :
3626 : ! !USES:
3627 : USE mod_comm, ONLY : mp_recvirr
3628 : IMPLICIT NONE
3629 :
3630 : ! !INPUT PARAMETERS:
3631 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3632 : TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
3633 : REAL(CPP_REAL8), INTENT( IN ) :: InBuf(:,:,:) ! Input buffer
3634 :
3635 : ! !INPUT/OUTPUT PARAMETERS:
3636 : REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(:,:,:) ! Output buffer
3637 :
3638 : ! !DESCRIPTION:
3639 : !
3640 : ! This routine completes an async. transfer of an array communicated
3641 : ! with a communication pattern.
3642 : !
3643 : ! The link between StartTransfer and EndTransfer is made possible
3644 : ! by the InHandle and OutHandle: they reflect the status of
3645 : ! the ongoing transfer. When this routine completes, a valid
3646 : ! and accessible copy of the OutBuf is ready for use.
3647 : ! The buffers may not be accessed until after the
3648 : ! completion of ParEndTransfer.
3649 : !
3650 : ! !BUGS:
3651 : !
3652 : ! It is assumed that the buffers are passed to this routine by
3653 : ! reference.
3654 : !
3655 : ! !REVISION HISTORY:
3656 : ! 01.10.01 Sawyer Creation from ParEndTransferPattern
3657 : ! 02.08.13 Sawyer Now uses mod_comm unless Use_Mpi_Types
3658 : ! 03.06.24 Sawyer All complexity now in mp_recvirr
3659 : !
3660 : !EOP
3661 : !-----------------------------------------------------------------------
3662 : !BOC
3663 :
3664 : CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN3D" )
3665 :
3666 0 : CALL mp_recvirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf(:,:,:),OutBuf(:,:,:) )
3667 :
3668 : CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN3D" )
3669 0 : RETURN
3670 : !EOC
3671 : END SUBROUTINE ParEndTransferPattern3D
3672 : !-----------------------------------------------------------------------
3673 :
3674 :
3675 : !-----------------------------------------------------------------------
3676 : !BOP
3677 : ! !IROUTINE: ParEndTransferPattern4D --- Complete an ASYNC Pattern Transfer
3678 : !
3679 : ! !INTERFACE:
3680 0 : SUBROUTINE ParEndTransferPattern4D( InComm, Pattern, InBuf, OutBuf )
3681 :
3682 : ! !USES:
3683 : USE mod_comm, ONLY : mp_recvirr
3684 : IMPLICIT NONE
3685 :
3686 : ! !INPUT PARAMETERS:
3687 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3688 : TYPE (ParPatternType), INTENT( IN ) :: Pattern ! Comm Pattern
3689 : REAL(CPP_REAL8), INTENT( IN ) :: InBuf(:,:,:,:) ! Input buffer
3690 :
3691 : ! !INPUT/OUTPUT PARAMETERS:
3692 : REAL(CPP_REAL8), INTENT( INOUT ) :: OutBuf(:,:,:,:)! Output buffer
3693 :
3694 : ! !DESCRIPTION:
3695 : !
3696 : ! This routine completes an async. transfer of an array communicated
3697 : ! with a communication pattern.
3698 : !
3699 : ! The link between StartTransfer and EndTransfer is made possible
3700 : ! by the InHandle and OutHandle: they reflect the status of
3701 : ! the ongoing transfer. When this routine completes, a valid
3702 : ! and accessible copy of the OutBuf is ready for use.
3703 : ! The buffers may not be accessed until after the
3704 : ! completion of ParEndTransfer.
3705 : !
3706 : ! !BUGS:
3707 : !
3708 : ! It is assumed that the buffers are passed to this routine by
3709 : ! reference.
3710 : !
3711 : ! !REVISION HISTORY:
3712 : ! 02.12.19 Sawyer Creation from ParEndTransferPattern
3713 : ! 03.06.24 Sawyer All complexity now in mp_recvirr
3714 : !
3715 : !EOP
3716 : !-----------------------------------------------------------------------
3717 : !BOC
3718 :
3719 : CPP_ENTER_PROCEDURE( "PARENDTRANSFERPATTERN4D" )
3720 :
3721 0 : CALL mp_recvirr( InComm,Pattern%SendDesc,Pattern%RecvDesc,InBuf,OutBuf )
3722 :
3723 : CPP_LEAVE_PROCEDURE( "PARENDTRANSFERPATTERN4D" )
3724 0 : RETURN
3725 : !EOC
3726 : END SUBROUTINE ParEndTransferPattern4D
3727 : !-----------------------------------------------------------------------
3728 :
3729 :
3730 : !-----------------------------------------------------------------------
3731 : !BOP
3732 : ! !IROUTINE: ParExchangeVectorReal --- Exchange a sparse packed vector
3733 : !
3734 : ! !INTERFACE:
3735 0 : SUBROUTINE ParExchangeVectorReal ( InComm, LenInVector, InVector, &
3736 : LenOutVector, OutVector )
3737 :
3738 : ! !USES:
3739 : IMPLICIT NONE
3740 :
3741 : ! !INPUT PARAMETERS:
3742 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3743 : INTEGER, INTENT( IN ) :: LenInVector( * ) ! Length on each PE
3744 : REAL(CPP_REAL8), INTENT( IN ):: InVector( * ) ! The input buffer
3745 :
3746 : ! !OUTPUT PARAMETERS:
3747 : INTEGER, INTENT( OUT ) :: LenOutVector( * ) ! Length on each PE
3748 : REAL(CPP_REAL8), INTENT( OUT ) :: OutVector( * ) ! The output buffer
3749 :
3750 : ! !DESCRIPTION:
3751 : !
3752 : ! This routine exchanges vectors stored in compressed format, i.e.,
3753 : ! in so-called compressed sparse row (CSR) format, with other
3754 : ! PEs. In essence it first exchanges the lengths with
3755 : ! MPI\_Alltoall, then the exchange of the actual vectors (can be
3756 : ! different in size) using MPI\_AlltoallV. Since the latter is
3757 : ! inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
3758 : !
3759 : ! !SYSTEM ROUTINES:
3760 : ! MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
3761 : !
3762 : ! !REVISION HISTORY:
3763 : ! 98.03.17 Sawyer Creation from F77 version
3764 : ! 98.03.30 Sawyer Removed assumed shape arrays due to problems
3765 : ! 99.01.18 Sawyer Added barrier for safety
3766 : ! 99.03.08 Sawyer USE_SHMEM version for CRAY only; untested
3767 : ! 99.06.01 Sawyer USE_SHMEM version revised per comments from Tom
3768 : ! 00.07.28 Sawyer Implemented with shared memory arenas
3769 : !
3770 : !EOP
3771 : !-----------------------------------------------------------------------
3772 : !BOC
3773 : !
3774 : ! !LOCAL VARIABLES:
3775 : INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
3776 : INTEGER :: Status(MPI_STATUS_SIZE)
3777 0 : Integer, allocatable :: Reqs(:), Stats(:)
3778 :
3779 : CPP_ENTER_PROCEDURE( "PAREXCHANGEVECTORREAL" )
3780 :
3781 0 : CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
3782 0 : CALL MPI_COMM_RANK( InComm, Iam, Ierr )
3783 :
3784 0 : allocate (Reqs(Nsize))
3785 0 : allocate (Stats(Nsize*MPI_STATUS_SIZE))
3786 :
3787 : #if defined( MY_ALLTOALL )
3788 : DO pe = 0, Nsize-1
3789 : !
3790 : ! Send the individual buffers with non-blocking sends
3791 : !
3792 : nr = LenInVector( pe + 1 )
3793 : CALL MPI_ISEND( nr, 1, CPP_MPI_INTEGER, pe, Iam+3000, &
3794 : InComm, Reqs( pe+1 ), Ierr )
3795 : ENDDO
3796 : DO pe = 0, Nsize - 1
3797 : !
3798 : ! Receive the buffers with MPI_Recv. Now we are blocking.
3799 : !
3800 : CALL MPI_RECV( nr, 1, CPP_MPI_INTEGER, pe, pe+3000, &
3801 : InComm, Status, Ierr )
3802 : LenOutVector(pe + 1) = nr
3803 : ENDDO
3804 : CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
3805 : #else
3806 : CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER, &
3807 : LenOutVector, 1, CPP_MPI_INTEGER, &
3808 0 : InComm, Ierr )
3809 : #endif
3810 : !
3811 : ! Over all processes
3812 : !
3813 0 : icnt = 1
3814 0 : DO pe = 0, Nsize-1
3815 : !
3816 : ! Send the individual buffers with non-blocking sends
3817 : !
3818 0 : nr = LenInVector( pe + 1 )
3819 0 : IF ( nr .gt. 0 ) THEN
3820 0 : CALL MPI_ISEND( InVector( icnt ), nr, &
3821 : CPP_MPI_REAL8, pe, Iam+2000, &
3822 0 : InComm, Reqs( pe+1 ), Ierr )
3823 : ELSE
3824 0 : Reqs( pe+1 ) = MPI_REQUEST_NULL
3825 : ENDIF
3826 0 : icnt = icnt + nr
3827 : ENDDO
3828 :
3829 : !
3830 : ! Over all processes
3831 : !
3832 0 : icnt = 1
3833 0 : DO pe = 0, Nsize - 1
3834 : !
3835 : ! Receive the buffers with MPI_Recv. Now we are blocking.
3836 : !
3837 0 : nr = LenOutVector(pe + 1)
3838 0 : IF ( nr .gt. 0 ) THEN
3839 0 : CALL MPI_RECV( OutVector( icnt ), nr, &
3840 : CPP_MPI_REAL8, pe, pe+2000, &
3841 0 : InComm, Status, Ierr )
3842 : ENDIF
3843 0 : icnt = icnt + nr
3844 : ENDDO
3845 0 : CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
3846 :
3847 0 : deallocate (Reqs)
3848 0 : deallocate (Stats)
3849 :
3850 : CPP_LEAVE_PROCEDURE( "PAREXCHANGEVECTORREAL" )
3851 :
3852 0 : RETURN
3853 : !EOC
3854 : END SUBROUTINE ParExchangeVectorReal
3855 : !-----------------------------------------------------------------------
3856 :
3857 : !-----------------------------------------------------------------------
3858 : !BOP
3859 : ! !IROUTINE: ParExchangeVectorReal4 --- Exchange a sparse packed vector
3860 : !
3861 : ! !INTERFACE:
3862 0 : SUBROUTINE ParExchangeVectorReal4 ( InComm, LenInVector, InVector,&
3863 : LenOutVector, OutVector )
3864 :
3865 : ! !USES:
3866 : IMPLICIT NONE
3867 :
3868 : ! !INPUT PARAMETERS:
3869 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3870 : INTEGER, INTENT( IN ) :: LenInVector( * ) ! Length on each PE
3871 : REAL(CPP_REAL4), INTENT( IN ):: InVector( * ) ! The input buffer
3872 :
3873 : ! !OUTPUT PARAMETERS:
3874 : INTEGER, INTENT( OUT ) :: LenOutVector( * ) ! Length on each PE
3875 : REAL(CPP_REAL4), INTENT( OUT ) :: OutVector( * ) ! The output buffer
3876 :
3877 : ! !DESCRIPTION:
3878 : !
3879 : ! This routine exchanges vectors stored in compressed format, i.e.,
3880 : ! in so-called compressed sparse row (CSR) format, with other
3881 : ! PEs. In essence it first exchanges the lengths with
3882 : ! MPI\_Alltoall, then the exchange of the actual vectors (can be
3883 : ! different in size) using MPI\_AlltoallV. Since the latter is
3884 : ! inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
3885 : !
3886 : ! !SYSTEM ROUTINES:
3887 : ! MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
3888 : !
3889 : ! !REVISION HISTORY:
3890 : ! 98.03.17 Sawyer Creation from F77 version
3891 : ! 98.03.30 Sawyer Removed assumed shape arrays due to problems
3892 : ! 99.01.18 Sawyer Added barrier for safety
3893 : ! 99.03.08 Sawyer USE_SHMEM version for CRAY only; untested
3894 : ! 99.06.01 Sawyer USE_SHMEM version revised per comments from Tom
3895 : ! 00.07.28 Sawyer Implemented with shared memory arenas
3896 : !
3897 : !EOP
3898 : !-----------------------------------------------------------------------
3899 : !BOC
3900 : !
3901 : ! !LOCAL VARIABLES:
3902 : INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
3903 : INTEGER :: Status(MPI_STATUS_SIZE)
3904 0 : Integer, allocatable :: Reqs(:), Stats(:)
3905 :
3906 : CPP_ENTER_PROCEDURE( "PAREXCHANGEVECTORREAL4" )
3907 :
3908 0 : CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
3909 0 : CALL MPI_COMM_RANK( InComm, Iam, Ierr )
3910 :
3911 0 : allocate (Reqs(Nsize))
3912 0 : allocate (Stats(Nsize*MPI_STATUS_SIZE))
3913 :
3914 : #if defined( MY_ALLTOALL )
3915 : DO pe = 0, Nsize-1
3916 : !
3917 : ! Send the individual buffers with non-blocking sends
3918 : !
3919 : nr = LenInVector( pe + 1 )
3920 : CALL MPI_ISEND( nr, 1, CPP_MPI_INTEGER, pe, Iam+3000, &
3921 : InComm, Reqs( pe+1 ), Ierr )
3922 : ENDDO
3923 : DO pe = 0, Nsize - 1
3924 : !
3925 : ! Receive the buffers with MPI_Recv. Now we are blocking.
3926 : !
3927 : CALL MPI_RECV( nr, 1, CPP_MPI_INTEGER, pe, pe+3000, &
3928 : InComm, Status, Ierr )
3929 : LenOutVector(pe + 1) = nr
3930 : ENDDO
3931 : CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
3932 : #else
3933 : CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER, &
3934 : LenOutVector, 1, CPP_MPI_INTEGER, &
3935 0 : InComm, Ierr )
3936 : #endif
3937 : !
3938 : ! Over all processes
3939 : !
3940 0 : icnt = 1
3941 0 : DO pe = 0, Nsize-1
3942 : !
3943 : ! Send the individual buffers with non-blocking sends
3944 : !
3945 0 : nr = LenInVector( pe + 1 )
3946 0 : IF ( nr .gt. 0 ) THEN
3947 0 : CALL MPI_ISEND( InVector( icnt ), nr, &
3948 : CPP_MPI_REAL4, pe, Iam+2000, &
3949 0 : InComm, Reqs( pe+1 ), Ierr )
3950 : ELSE
3951 0 : Reqs( pe+1 ) = MPI_REQUEST_NULL
3952 : ENDIF
3953 0 : icnt = icnt + nr
3954 : ENDDO
3955 :
3956 : !
3957 : ! Over all processes
3958 : !
3959 0 : icnt = 1
3960 0 : DO pe = 0, Nsize - 1
3961 : !
3962 : ! Receive the buffers with MPI_Recv. Now we are blocking.
3963 : !
3964 0 : nr = LenOutVector(pe + 1)
3965 0 : IF ( nr .gt. 0 ) THEN
3966 0 : CALL MPI_RECV( OutVector( icnt ), nr, &
3967 : CPP_MPI_REAL4, pe, pe+2000, &
3968 0 : InComm, Status, Ierr )
3969 : ENDIF
3970 0 : icnt = icnt + nr
3971 : ENDDO
3972 0 : CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
3973 :
3974 0 : deallocate (Reqs)
3975 0 : deallocate (Stats)
3976 :
3977 : CPP_LEAVE_PROCEDURE( "PAREXCHANGEVECTORREAL4" )
3978 :
3979 0 : RETURN
3980 : !EOC
3981 : END SUBROUTINE ParExchangeVectorReal4
3982 : !-----------------------------------------------------------------------
3983 :
3984 :
3985 : !-----------------------------------------------------------------------
3986 : !BOP
3987 : ! !IROUTINE: ParExchangeVectorInt --- Exchange a sparse packed vector
3988 : !
3989 : ! !INTERFACE:
3990 35328 : SUBROUTINE ParExchangeVectorInt ( InComm, LenInVector, InVector, &
3991 : LenOutVector, OutVector )
3992 :
3993 : ! !USES:
3994 : IMPLICIT NONE
3995 :
3996 : ! !INPUT PARAMETERS:
3997 : INTEGER, INTENT( IN ) :: InComm ! Communicator
3998 : INTEGER, INTENT( IN ) :: LenInVector( * ) ! Length on each PE
3999 : INTEGER, INTENT( IN ) :: InVector( * ) ! The input buffer
4000 :
4001 : ! !OUTPUT PARAMETERS:
4002 : INTEGER, INTENT( OUT ) :: LenOutVector( * ) ! Length on each PE
4003 : INTEGER, INTENT( OUT ) :: OutVector( * ) ! The output buffer
4004 :
4005 : ! !DESCRIPTION:
4006 : !
4007 : ! This routine exchanges vectors stored in compressed format, i.e.,
4008 : ! in so-called compressed sparse row (CSR) format, with other
4009 : ! PEs. In essence it first exchanges the lengths with
4010 : ! MPI\_Alltoall, then the exchange of the actual vectors (can be
4011 : ! different in size) using MPI\_AlltoallV. Since the latter is
4012 : ! inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
4013 : !
4014 : ! !SYSTEM ROUTINES:
4015 : ! MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
4016 : !
4017 : ! !REVISION HISTORY:
4018 : ! 98.03.17 Sawyer Creation from F77 version
4019 : ! 98.03.30 Sawyer Removed assumed shape arrays due to problems
4020 : ! 99.01.18 Sawyer Added barrier for safety
4021 : ! 99.03.08 Sawyer USE_SHMEM version for CRAY only; untested
4022 : ! 99.06.01 Sawyer USE_SHMEM version revised per comments from Tom
4023 : ! 00.07.28 Sawyer Implemented with shared memory arenas
4024 : !
4025 : !EOP
4026 : !-----------------------------------------------------------------------
4027 : !BOC
4028 : !
4029 : ! !LOCAL VARIABLES:
4030 : INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
4031 : INTEGER :: Status(MPI_STATUS_SIZE)
4032 35328 : Integer, allocatable :: Reqs(:), Stats(:)
4033 :
4034 : CPP_ENTER_PROCEDURE( "PAREXCHANGEVECTORINT" )
4035 :
4036 35328 : CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
4037 35328 : CALL MPI_COMM_RANK( InComm, Iam, Ierr )
4038 :
4039 105984 : allocate (Reqs(Nsize))
4040 105984 : allocate (Stats(Nsize*MPI_STATUS_SIZE))
4041 :
4042 : #if defined( MY_ALLTOALL )
4043 : DO pe = 0, Nsize-1
4044 : !
4045 : ! Send the individual buffers with non-blocking sends
4046 : !
4047 : nr = LenInVector( pe + 1 )
4048 : CALL MPI_ISEND( nr, 1, &
4049 : MPI_INTEGER, pe, Iam+3000, &
4050 : InComm, Reqs( pe+1 ), Ierr )
4051 : ENDDO
4052 : DO pe = 0, Nsize - 1
4053 : !
4054 : ! Receive the buffers with MPI_Recv. Now we are blocking.
4055 : !
4056 : CALL MPI_RECV( nr, 1, &
4057 : MPI_INTEGER, pe, pe+3000, &
4058 : InComm, Status, Ierr )
4059 : LenOutVector(pe + 1) = nr
4060 : ENDDO
4061 : CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
4062 : #else
4063 : CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER, &
4064 : LenOutVector, 1, CPP_MPI_INTEGER, &
4065 35328 : InComm, Ierr )
4066 : #endif
4067 : !
4068 : ! Over all processes
4069 : !
4070 35328 : icnt = 1
4071 13601280 : DO pe = 0, Nsize-1
4072 : !
4073 : ! Send the individual buffers with non-blocking sends
4074 : !
4075 13565952 : nr = LenInVector( pe + 1 )
4076 13565952 : IF ( nr .gt. 0 ) THEN
4077 0 : CALL MPI_ISEND( InVector( icnt ), nr, &
4078 : CPP_MPI_INTEGER, pe, Iam+2000, &
4079 429696 : InComm, Reqs( pe+1 ), Ierr )
4080 : ELSE
4081 13136256 : Reqs( pe+1 ) = MPI_REQUEST_NULL
4082 : ENDIF
4083 13601280 : icnt = icnt + nr
4084 : ENDDO
4085 :
4086 : !
4087 : ! Over all processes
4088 : !
4089 35328 : icnt = 1
4090 13601280 : DO pe = 0, Nsize - 1
4091 : !
4092 : ! Receive the buffers with MPI_Recv. Now we are blocking.
4093 : !
4094 13565952 : nr = LenOutVector(pe + 1)
4095 13565952 : IF ( nr .gt. 0 ) THEN
4096 0 : CALL MPI_RECV( OutVector( icnt ), nr, &
4097 : CPP_MPI_INTEGER, pe, pe+2000, &
4098 429696 : InComm, Status, Ierr )
4099 : ENDIF
4100 13601280 : icnt = icnt + nr
4101 : ENDDO
4102 35328 : CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
4103 : !
4104 : ! WS 98.09.22 : This barrier needed to synchronize. Why?
4105 : !
4106 35328 : CALL MPI_BARRIER( InComm, Ierr )
4107 :
4108 35328 : deallocate (Reqs)
4109 35328 : deallocate (Stats)
4110 :
4111 : CPP_LEAVE_PROCEDURE( "PAREXCHANGEVECTORINT" )
4112 :
4113 35328 : RETURN
4114 : !EOC
4115 : END SUBROUTINE ParExchangeVectorInt
4116 : !-----------------------------------------------------------------------
4117 :
4118 :
4119 : !-----------------------------------------------------------------------
4120 : !BOP
4121 : ! !ROUTINE: ParCollectiveBarrier --- Barrier: Simplest collective op.
4122 : !
4123 : ! !INTERFACE:
4124 0 : SUBROUTINE ParCollectiveBarrier( InComm )
4125 :
4126 : ! !USES:
4127 : IMPLICIT NONE
4128 : ! !INPUT PARAMETERS:
4129 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4130 :
4131 : ! !DESCRIPTION:
4132 : !
4133 : ! This routine performs a barrier only within the communicator InComm
4134 : !
4135 : ! !REVISION HISTORY:
4136 : ! 00.09.10 Sawyer Creation
4137 : !
4138 : !EOP
4139 : !---------------------------------------------------------------------
4140 : !BOC
4141 : INTEGER Ierror
4142 :
4143 0 : CALL MPI_Barrier(InComm, Ierror )
4144 :
4145 0 : RETURN
4146 : !EOC
4147 : END SUBROUTINE ParCollectiveBarrier
4148 : !-----------------------------------------------------------------------
4149 :
4150 : !-----------------------------------------------------------------------
4151 : !BOP
4152 : ! !ROUTINE: ParCollective0D --- Perform global Collective of a scalar
4153 : !
4154 : ! !INTERFACE:
4155 0 : SUBROUTINE ParCollective0D( InComm, Op, Var )
4156 :
4157 : ! !USES:
4158 : IMPLICIT NONE
4159 : ! !INPUT PARAMETERS:
4160 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4161 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4162 :
4163 : ! !INPUT/OUTPUT PARAMETERS:
4164 : REAL(CPP_REAL8), INTENT( INOUT ) :: Var ! partial Var in, Var out
4165 :
4166 : ! !DESCRIPTION:
4167 : !
4168 : ! This utility makes a collective operation over all processes in
4169 : ! communicator InComm.
4170 : !
4171 : ! !REVISION HISTORY:
4172 : ! 00.08.07 Sawyer Creation
4173 : !
4174 : !EOP
4175 : !---------------------------------------------------------------------
4176 : !BOC
4177 : INTEGER Ierror
4178 : REAL(CPP_REAL8) Tmp
4179 :
4180 0 : IF ( Op .EQ. BCSTOP ) THEN
4181 0 : CALL MPI_BCAST( Var, 1, CPP_MPI_REAL8, 0, InComm, Ierror )
4182 : ELSE
4183 : CALL MPI_ALLREDUCE( Var, Tmp, 1, CPP_MPI_REAL8, &
4184 0 : Op, InComm, Ierror )
4185 0 : Var = Tmp
4186 : ENDIF
4187 :
4188 0 : RETURN
4189 : !EOC
4190 : END SUBROUTINE ParCollective0D
4191 : !-----------------------------------------------------------------------
4192 :
4193 : !-----------------------------------------------------------------------
4194 : !BOP
4195 : ! !ROUTINE: ParCollective1D --- Perform component-wise global Collective of a vector
4196 : !
4197 : ! !INTERFACE:
4198 16128 : SUBROUTINE ParCollective1D( InComm, Op, Im, Var )
4199 :
4200 : ! !USES:
4201 : IMPLICIT NONE
4202 :
4203 : ! !INPUT PARAMETERS:
4204 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4205 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4206 : INTEGER, INTENT( IN ) :: Im ! Size of 1-D array
4207 :
4208 : ! !INPUT/OUTPUT PARAMETERS:
4209 : REAL(CPP_REAL8), INTENT( INOUT ) :: Var(Im) ! partial Var in, Var out
4210 :
4211 : ! !DESCRIPTION:
4212 : !
4213 : ! This utility makes a collective operation over all processes in
4214 : ! communicator InComm.
4215 : !
4216 : ! !REVISION HISTORY:
4217 : ! 00.08.07 Sawyer Creation
4218 : !
4219 : !EOP
4220 : !---------------------------------------------------------------------
4221 : !BOC
4222 : INTEGER Ierror
4223 32256 : REAL(CPP_REAL8) Tmp(Im)
4224 :
4225 16128 : IF ( Op .EQ. BCSTOP ) THEN
4226 0 : CALL MPI_BCAST( Var, Im, CPP_MPI_REAL8, 0, InComm, Ierror )
4227 : ELSE
4228 : CALL MPI_ALLREDUCE( Var, Tmp, Im, CPP_MPI_REAL8, &
4229 16128 : Op, InComm, Ierror )
4230 190848 : Var = Tmp
4231 : ENDIF
4232 :
4233 16128 : RETURN
4234 : !EOC
4235 : END SUBROUTINE ParCollective1D
4236 : !-----------------------------------------------------------------------
4237 :
4238 : !-----------------------------------------------------------------------
4239 : !BOP
4240 : ! !ROUTINE: ParCollective1DReal4 --- Perform component-wise global Collective of a vector
4241 : !
4242 : ! !INTERFACE:
4243 0 : SUBROUTINE ParCollective1DReal4( InComm, Op, Im, Var )
4244 :
4245 : ! !USES:
4246 : IMPLICIT NONE
4247 :
4248 : ! !INPUT PARAMETERS:
4249 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4250 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4251 : INTEGER, INTENT( IN ) :: Im ! Size of 1-D array
4252 :
4253 : ! !INPUT/OUTPUT PARAMETERS:
4254 : REAL(CPP_REAL4), INTENT( INOUT ) :: Var(Im) ! partial Var in, Var out
4255 :
4256 : ! !DESCRIPTION:
4257 : !
4258 : ! This utility makes a collective operation over all processes in
4259 : ! communicator InComm.
4260 : !
4261 : ! !REVISION HISTORY:
4262 : ! 00.08.07 Sawyer Creation
4263 : !
4264 : !EOP
4265 : !---------------------------------------------------------------------
4266 : !BOC
4267 : INTEGER Ierror
4268 0 : REAL(CPP_REAL4) Tmp(Im)
4269 :
4270 0 : IF ( Op .EQ. BCSTOP ) THEN
4271 0 : CALL MPI_BCAST( Var, Im, CPP_MPI_REAL4, 0, InComm, Ierror )
4272 : ELSE
4273 : CALL MPI_ALLREDUCE( Var, Tmp, Im, CPP_MPI_REAL4, &
4274 0 : Op, InComm, Ierror )
4275 0 : Var = Tmp
4276 : ENDIF
4277 :
4278 0 : RETURN
4279 : !EOC
4280 : END SUBROUTINE ParCollective1DReal4
4281 : !-----------------------------------------------------------------------
4282 :
4283 : !-----------------------------------------------------------------------
4284 : !BOP
4285 : ! !ROUTINE: ParCollective2D --- Perform component-wise collective operation
4286 : !
4287 : ! !INTERFACE:
4288 0 : SUBROUTINE ParCollective2D( InComm, Op, Im, Jm, Var )
4289 :
4290 : ! !USES:
4291 : IMPLICIT NONE
4292 :
4293 : ! !INPUT PARAMETERS:
4294 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4295 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4296 : INTEGER, INTENT( IN ) :: Im ! First dimension of 2-D array
4297 : INTEGER, INTENT( IN ) :: Jm ! Second dimension of 2-D array
4298 :
4299 : ! !INPUT/OUTPUT PARAMETERS:
4300 : REAL(CPP_REAL8), INTENT( INOUT ) :: Var(Im,Jm) ! partial Var in, Var out
4301 :
4302 : ! !DESCRIPTION:
4303 : !
4304 : ! This utility makes a collective operation over all processes in
4305 : ! communicator InComm.
4306 : !
4307 : ! !REVISION HISTORY:
4308 : ! 00.08.07 Sawyer Creation
4309 : !
4310 : !EOP
4311 : !---------------------------------------------------------------------
4312 : !BOC
4313 : INTEGER Ierror
4314 0 : REAL(CPP_REAL8) Tmp(Im,Jm)
4315 :
4316 0 : IF ( Op .EQ. BCSTOP ) THEN
4317 0 : CALL MPI_BCAST( Var, Im*Jm, CPP_MPI_REAL8, 0, InComm, Ierror )
4318 : ELSE
4319 : CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm, CPP_MPI_REAL8, &
4320 0 : Op, InComm, Ierror )
4321 0 : Var = Tmp
4322 : ENDIF
4323 :
4324 0 : RETURN
4325 : !EOC
4326 : END SUBROUTINE ParCollective2D
4327 : !-----------------------------------------------------------------------
4328 :
4329 : !-----------------------------------------------------------------------
4330 : !BOP
4331 : ! !ROUTINE: ParCollective2DReal4 --- Perform component-wise collective operation
4332 : !
4333 : ! !INTERFACE:
4334 0 : SUBROUTINE ParCollective2DReal4( InComm, Op, Im, Jm, Var )
4335 :
4336 : ! !USES:
4337 : IMPLICIT NONE
4338 :
4339 : ! !INPUT PARAMETERS:
4340 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4341 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4342 : INTEGER, INTENT( IN ) :: Im ! First dimension of 2-D array
4343 : INTEGER, INTENT( IN ) :: Jm ! Second dimension of 2-D array
4344 :
4345 : ! !INPUT/OUTPUT PARAMETERS:
4346 : REAL(CPP_REAL4), INTENT( INOUT ) :: Var(Im,Jm) ! partial Var in, Var out
4347 :
4348 : ! !DESCRIPTION:
4349 : !
4350 : ! This utility makes a collective operation over all processes in
4351 : ! communicator InComm.
4352 : !
4353 : ! !REVISION HISTORY:
4354 : ! 00.08.07 Sawyer Creation
4355 : !
4356 : !EOP
4357 : !---------------------------------------------------------------------
4358 : !BOC
4359 : INTEGER Ierror
4360 0 : REAL(CPP_REAL4) Tmp(Im,Jm)
4361 :
4362 0 : IF ( Op .EQ. BCSTOP ) THEN
4363 0 : CALL MPI_BCAST( Var, Im*Jm, CPP_MPI_REAL4, 0, InComm, Ierror )
4364 : ELSE
4365 : CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm, CPP_MPI_REAL4, &
4366 0 : Op, InComm, Ierror )
4367 0 : Var = Tmp
4368 : ENDIF
4369 :
4370 0 : RETURN
4371 : !EOC
4372 : END SUBROUTINE ParCollective2DReal4
4373 : !-----------------------------------------------------------------------
4374 :
4375 : !-----------------------------------------------------------------------
4376 : !BOP
4377 : ! !ROUTINE: ParCollective3D --- Perform component-wise global Collective of a vector
4378 : !
4379 : ! !INTERFACE:
4380 0 : SUBROUTINE ParCollective3D( InComm, Op, Im, Jm, Lm, Var )
4381 :
4382 : ! !USES:
4383 : IMPLICIT NONE
4384 :
4385 : ! !INPUT PARAMETERS:
4386 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4387 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4388 : INTEGER, INTENT( IN ) :: Im ! First dimension of 3-D array
4389 : INTEGER, INTENT( IN ) :: Jm ! Second dimension of 3-D array
4390 : INTEGER, INTENT( IN ) :: Lm ! Third dimension of 3-D array
4391 :
4392 : ! !INPUT/OUTPUT PARAMETERS:
4393 : REAL(CPP_REAL8), INTENT( INOUT ):: Var(Im,Jm,LM) ! partial Var in, Var out
4394 :
4395 : ! !DESCRIPTION:
4396 : !
4397 : ! This utility makes a collective operation over all processes in
4398 : ! communicator InComm.
4399 : !
4400 : ! !REVISION HISTORY:
4401 : ! 00.08.07 Sawyer Creation
4402 : !
4403 : !EOP
4404 : !---------------------------------------------------------------------
4405 : !BOC
4406 : INTEGER Ierror
4407 0 : REAL(CPP_REAL8) Tmp(Im,Jm,Lm)
4408 :
4409 0 : IF ( Op .EQ. BCSTOP ) THEN
4410 0 : CALL MPI_BCAST( Var, Im*Jm*Lm, CPP_MPI_REAL8, 0, InComm, Ierror )
4411 : ELSE
4412 : CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm*Lm, CPP_MPI_REAL8, &
4413 0 : Op, InComm, Ierror )
4414 0 : Var = Tmp
4415 : ENDIF
4416 :
4417 0 : RETURN
4418 : !EOC
4419 : END SUBROUTINE ParCollective3D
4420 : !-----------------------------------------------------------------------
4421 :
4422 : !-----------------------------------------------------------------------
4423 : !BOP
4424 : ! !ROUTINE: ParCollective0DInt --- Perform global Collective of a scalar
4425 : !
4426 : ! !INTERFACE:
4427 16128 : SUBROUTINE ParCollective0DInt( InComm, Op, Var )
4428 :
4429 : ! !USES:
4430 : IMPLICIT NONE
4431 :
4432 : ! !INPUT PARAMETERS:
4433 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4434 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4435 :
4436 : ! !INPUT/OUTPUT PARAMETERS:
4437 : INTEGER, INTENT( INOUT ) :: Var ! partial Var in, Var out
4438 :
4439 : ! !DESCRIPTION:
4440 : !
4441 : ! This utility makes a collective operation over all processes in
4442 : ! communicator InComm.
4443 : !
4444 : ! !REVISION HISTORY:
4445 : ! 00.08.07 Sawyer Creation
4446 : !
4447 : !EOP
4448 : !---------------------------------------------------------------------
4449 : !BOC
4450 : INTEGER Ierror
4451 : INTEGER Tmp
4452 :
4453 16128 : IF ( Op .EQ. BCSTOP ) THEN
4454 0 : CALL MPI_BCAST( Var, 1, CPP_MPI_INTEGER, 0, InComm, Ierror )
4455 : ELSE
4456 16128 : CALL MPI_ALLREDUCE( Var,Tmp,1,CPP_MPI_INTEGER,Op,InComm,Ierror )
4457 16128 : Var = Tmp
4458 : ENDIF
4459 :
4460 16128 : RETURN
4461 : !EOC
4462 : END SUBROUTINE ParCollective0DInt
4463 : !-----------------------------------------------------------------------
4464 :
4465 : !-----------------------------------------------------------------------
4466 : !BOP
4467 : ! !ROUTINE: ParCollective0DStr --- Perform global Collective of a string
4468 : !
4469 : ! !INTERFACE:
4470 0 : SUBROUTINE ParCollective0DStr( InComm, Op, Var )
4471 :
4472 : ! !USES:
4473 : IMPLICIT NONE
4474 :
4475 : ! !INPUT PARAMETERS:
4476 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4477 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4478 :
4479 : ! !INPUT/OUTPUT PARAMETERS:
4480 : CHARACTER (LEN=*), INTENT( INOUT ) :: Var ! partial Var in, Var out
4481 :
4482 : ! !DESCRIPTION:
4483 : !
4484 : ! This utility makes a collective operation over all processes in
4485 : ! communicator InComm.
4486 : !
4487 : ! !REVISION HISTORY:
4488 : ! 00.08.07 Sawyer Creation
4489 : !
4490 : !EOP
4491 : !---------------------------------------------------------------------
4492 : !BOC
4493 : INTEGER Ierror, StrLen
4494 :
4495 0 : StrLen = LEN(Var)
4496 0 : IF ( Op .EQ. BCSTOP ) THEN
4497 0 : CALL MPI_BCAST( Var, StrLen, MPI_CHARACTER, 0, InComm, Ierror )
4498 : ELSE
4499 0 : write(iulog,*) "global reduction of string not supported"
4500 : ENDIF
4501 :
4502 0 : RETURN
4503 : !EOC
4504 : END SUBROUTINE ParCollective0DStr
4505 : !-----------------------------------------------------------------------
4506 :
4507 : !-----------------------------------------------------------------------
4508 : !BOP
4509 : ! !ROUTINE: ParCollective1DStr --- Perform global Collective of a string
4510 : !
4511 : ! !INTERFACE:
4512 0 : SUBROUTINE ParCollective1DStr( InComm, Op, Im, Var )
4513 :
4514 : ! !USES:
4515 : IMPLICIT NONE
4516 :
4517 : ! !INPUT PARAMETERS:
4518 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4519 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4520 : INTEGER, INTENT( IN ) :: Im ! Size of 1-D array
4521 :
4522 : ! !INPUT/OUTPUT PARAMETERS:
4523 : CHARACTER (LEN=*), INTENT( INOUT ) :: Var(:) ! partial Var in, Var out
4524 :
4525 : ! !DESCRIPTION:
4526 : !
4527 : ! This utility makes a collective operation over all processes in
4528 : ! communicator InComm.
4529 : !
4530 : ! !REVISION HISTORY:
4531 : ! 00.08.07 Sawyer Creation
4532 : !
4533 : !EOP
4534 : !---------------------------------------------------------------------
4535 : !BOC
4536 : INTEGER Ierror, StrLen
4537 :
4538 0 : StrLen = LEN(Var(1))
4539 0 : IF ( Op .EQ. BCSTOP ) THEN
4540 0 : CALL MPI_BCAST( Var, Im*StrLen, MPI_CHARACTER, 0, InComm, Ierror )
4541 : ELSE
4542 0 : write(iulog,*) "global reduction of string not supported"
4543 : ENDIF
4544 :
4545 0 : RETURN
4546 : !EOC
4547 : END SUBROUTINE ParCollective1DStr
4548 : !-----------------------------------------------------------------------
4549 :
4550 : !-----------------------------------------------------------------------
4551 : !BOP
4552 : ! !ROUTINE: ParCollective1DInt --- Perform component-wise global
4553 : ! collective operations of int vector
4554 : !
4555 : ! !INTERFACE:
4556 3072 : SUBROUTINE ParCollective1DInt( InComm, Op, Im, Var )
4557 :
4558 : ! !USES:
4559 : IMPLICIT NONE
4560 :
4561 : ! !INPUT PARAMETERS:
4562 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4563 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4564 : INTEGER, INTENT( IN ) :: Im ! Size of 1-D array
4565 :
4566 : ! !INPUT/OUTPUT PARAMETERS:
4567 : INTEGER, INTENT( INOUT ) :: Var(Im) ! partial Var in, Var out
4568 :
4569 : ! !DESCRIPTION:
4570 : !
4571 : ! This utility makes a collective operation over all processes in
4572 : ! communicator InComm.
4573 : !
4574 : ! !REVISION HISTORY:
4575 : ! 00.08.07 Sawyer Creation
4576 : !
4577 : !EOP
4578 : !---------------------------------------------------------------------
4579 : !BOC
4580 : INTEGER Ierror
4581 6144 : INTEGER Tmp(Im)
4582 :
4583 3072 : IF ( Op .EQ. BCSTOP ) THEN
4584 0 : CALL MPI_BCAST( Var, Im, CPP_MPI_INTEGER, 0, InComm, Ierror )
4585 : ELSE
4586 3072 : CALL MPI_ALLREDUCE( Var,Tmp,Im,CPP_MPI_INTEGER,Op,InComm,Ierror )
4587 1182720 : Var = Tmp
4588 : ENDIF
4589 :
4590 3072 : RETURN
4591 : !EOC
4592 : END SUBROUTINE ParCollective1DInt
4593 : !-----------------------------------------------------------------------
4594 : !-----------------------------------------------------------------------
4595 : !BOP
4596 : ! !ROUTINE: ParCollective2DInt --- Perform component-wise collective op.
4597 : !
4598 : ! !INTERFACE:
4599 0 : SUBROUTINE ParCollective2DInt( InComm, Op, Im, Jm, Var )
4600 :
4601 : ! !USES:
4602 : IMPLICIT NONE
4603 :
4604 : ! !INPUT PARAMETERS:
4605 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4606 : INTEGER, INTENT( IN ) :: Op ! Operation (see header)
4607 : INTEGER, INTENT( IN ) :: Im ! First dimension of 2-D array
4608 : INTEGER, INTENT( IN ) :: Jm ! Second dimension of 2-D array
4609 :
4610 : ! !INPUT/OUTPUT PARAMETERS:
4611 : INTEGER, INTENT( INOUT ):: Var(Im,Jm) ! partial Var in, Var out
4612 :
4613 : ! !DESCRIPTION:
4614 : !
4615 : ! This utility makes a collective operation over all processes in
4616 : ! communicator InComm.
4617 : !
4618 : ! !REVISION HISTORY:
4619 : ! 00.08.07 Sawyer Creation
4620 : !
4621 : !EOP
4622 : !---------------------------------------------------------------------
4623 : !BOC
4624 : INTEGER Ierror
4625 0 : INTEGER Tmp(Im,Jm)
4626 :
4627 0 : IF ( Op .EQ. BCSTOP ) THEN
4628 0 : CALL MPI_BCAST( Var, Im*Jm, CPP_MPI_INTEGER, 0, InComm, Ierror )
4629 : ELSE
4630 : CALL MPI_ALLREDUCE( Var, Tmp, Im*Jm, CPP_MPI_INTEGER, &
4631 0 : Op, InComm, Ierror )
4632 0 : Var = Tmp
4633 : ENDIF
4634 :
4635 0 : RETURN
4636 : !EOC
4637 : END SUBROUTINE ParCollective2DInt
4638 : !-----------------------------------------------------------------------
4639 : # ifdef _SMEMORY
4640 : !-----------------------------------------------------------------------
4641 : !BOP
4642 : ! !IROUTINE: ParExchangeLength --- Exchange a sparse packed vector
4643 : !
4644 : ! !INTERFACE:
4645 17664 : SUBROUTINE ParExchangeLength ( InComm, LenInVector, LenOutVector)
4646 :
4647 : ! !USES:
4648 : IMPLICIT NONE
4649 :
4650 : ! !INPUT PARAMETERS:
4651 : INTEGER, INTENT( IN ) :: InComm ! Communicator
4652 : INTEGER, INTENT( IN ) :: LenInVector( * ) ! Length on each PE
4653 :
4654 : ! !OUTPUT PARAMETERS:
4655 : INTEGER, INTENT( OUT ) :: LenOutVector( * ) ! Length on each PE
4656 :
4657 : ! !DESCRIPTION:
4658 : !
4659 : ! This routine exchanges vectors stored in compressed format, i.e.,
4660 : ! in so-called compressed sparse row (CSR) format, with other
4661 : ! PEs. In essence it first exchanges the lengths with
4662 : ! MPI\_Alltoall, then the exchange of the actual vectors (can be
4663 : ! different in size) using MPI\_AlltoallV. Since the latter is
4664 : ! inefficient, it is simulated using MPI\_Isend and MPI\_Recv.
4665 : !
4666 : ! !SYSTEM ROUTINES:
4667 : ! MPI_ISEND, MPI_RECV, MPI_WAITALL, MPI_ALLTOALL
4668 : !
4669 : ! !REVISION HISTORY:
4670 : ! 98.03.17 Sawyer Creation from F77 version
4671 : ! 98.03.30 Sawyer Removed assumed shape arrays due to problems
4672 : ! 99.01.18 Sawyer Added barrier for safety
4673 : ! 99.03.08 Sawyer USE_SHMEM version for CRAY only; untested
4674 : ! 99.06.01 Sawyer USE_SHMEM version revised per comments from Tom
4675 : ! 00.07.28 Sawyer Implemented with shared memory arenas
4676 : !
4677 : !EOP
4678 : !-----------------------------------------------------------------------
4679 : !BOC
4680 : !
4681 : ! !LOCAL VARIABLES:
4682 : INTEGER :: i, iscnt, ircnt, nr, pe, icnt, Nsize, Iam, Ierr
4683 : INTEGER :: Status(MPI_STATUS_SIZE)
4684 17664 : Integer, allocatable :: Reqs(:), Stats(:)
4685 :
4686 : CPP_ENTER_PROCEDURE( "PAREXCHANGELENGTH" )
4687 :
4688 17664 : CALL MPI_COMM_SIZE( InComm, Nsize, Ierr )
4689 17664 : CALL MPI_COMM_RANK( InComm, Iam, Ierr )
4690 :
4691 52992 : allocate (Reqs(Nsize))
4692 52992 : allocate (Stats(Nsize*MPI_STATUS_SIZE))
4693 :
4694 : #if defined( MY_ALLTOALL )
4695 : DO pe = 0, Nsize-1
4696 : !
4697 : ! Send the individual buffers with non-blocking sends
4698 : !
4699 : nr = LenInVector( pe + 1 )
4700 : CALL MPI_ISEND( nr, 1, &
4701 : MPI_INTEGER, pe, Iam+3000, &
4702 : InComm, Reqs( pe+1 ), Ierr )
4703 : ENDDO
4704 : DO pe = 0, Nsize - 1
4705 : !
4706 : ! Receive the buffers with MPI_Recv. Now we are blocking.
4707 : !
4708 : CALL MPI_RECV( nr, 1, &
4709 : MPI_INTEGER, pe, pe+3000, &
4710 : InComm, Status, Ierr )
4711 : LenOutVector(pe + 1) = nr
4712 : ENDDO
4713 : CALL MPI_WAITALL( Nsize, Reqs, Stats, Ierr )
4714 :
4715 : deallocate (Reqs)
4716 : deallocate (Stats)
4717 :
4718 : #else
4719 : CALL MPI_ALLTOALL( LenInVector, 1, CPP_MPI_INTEGER, &
4720 : LenOutVector, 1, CPP_MPI_INTEGER, &
4721 17664 : InComm, Ierr )
4722 : #endif
4723 17664 : CALL MPI_BARRIER( InComm, Ierr )
4724 :
4725 :
4726 : CPP_LEAVE_PROCEDURE( "PAREXCHANGELENGTH" )
4727 :
4728 17664 : RETURN
4729 : !EOC
4730 17664 : END SUBROUTINE ParExchangeLength
4731 : !-----------------------------------------------------------------------
4732 : !-----------------------------------------------------------------------
4733 : !BOP
4734 : ! !IROUTINE: ParCalcInfoDecompToGhost --- calculates info about the pattern
4735 : !
4736 : ! !INTERFACE:
4737 3840 : subroutine ParCalcInfoDecompToGhost(InComm, DA,GB,Info)
4738 : !
4739 : ! !USES:
4740 : USE decompmodule, ONLY : DecompType,DecompInfo,DecompGlobalToLocal
4741 : USE ghostmodule, ONLY : GhostType,GhostInfo
4742 : IMPLICIT NONE
4743 :
4744 : ! !INPUT PARAMETERS:
4745 : integer, intent(in) :: InComm ! communicator
4746 : type(DecompType), intent(in) :: DA ! Source Decomp Desc
4747 : type(GhostType) , intent(in) :: GB ! Destination Ghost Desc
4748 :
4749 : ! !OUTPUT PARAMETERS:
4750 : type (ParInfoType), intent(out) :: Info ! Info structure
4751 : !
4752 : ! !DESCRIPTION:
4753 : ! This routine calulcates the information about a communication
4754 : ! pattern that transforms from one decomposition to another,
4755 : ! i.e., a so-called "transpose". This is a copy of an algorithm
4756 : ! from the ParPatternDecompToGhost subroutine.
4757 : !
4758 : ! !SYSTEM ROUTINES:
4759 : ! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
4760 : !
4761 : ! !REVISION HISTORY:
4762 : ! 07.09.04 Dennis Creation based on algorithm in ParPatternDecompToGhost
4763 : !
4764 : !EOP
4765 : !-----------------------------------------------------------------------
4766 : !BOC
4767 : ! !LOCAL VARIABLES:
4768 : integer :: nTags,oldpe,oldlocal,sTag,eTag,nCount
4769 : integer :: j,pe,local,tag,ierr,iam,npes
4770 : integer :: npesA,npesB,tmpA,tmp1B,tmp2B,tmp3B
4771 3840 : integer, allocatable :: sCount(:),rCount(:)
4772 :
4773 3840 : call DecompInfo(DA,npesA,tmpA)
4774 3840 : call GhostInfo(GB,npesB,tmp1B,tmp2B,tmp3B)
4775 :
4776 3840 : call MPI_COMM_SIZE(InComm,npes,ierr)
4777 3840 : call MPI_COMM_RANK(InComm,iam,ierr)
4778 :
4779 15360 : allocate(sCount(npes),rCount(npes))
4780 1478400 : sCount=0
4781 1478400 : rCount=0
4782 3840 : if(iam .lt. npesB) then
4783 : ! Parse through all the tags in the local segment
4784 3840 : nTags = SIZE(GB%Local%Head(iam+1)%StartTags)
4785 714112 : do j=1,nTags
4786 710272 : oldpe = -1
4787 710272 : oldlocal = 0
4788 710272 : sTag = GB%Local%Head(iam+1)%StartTags(j)
4789 710272 : eTag = GB%Local%Head(iam+1)%EndTags(j)
4790 40914304 : do tag = sTag,eTag
4791 40910464 : if(tag > 0) then
4792 : !
4793 : ! Determine the index and PE of this entry on A. This might be inlined later
4794 : !
4795 39526272 : call DecompGlobalToLocal(DA,tag,Local,Pe)
4796 : !
4797 : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
4798 : !
4799 39526272 : if( pe /= oldpe .or. local /= oldlocal+1) then
4800 3293856 : sCount(pe+1) = sCount(pe+1) + 1
4801 : endif
4802 39526272 : oldpe = pe ! Update PE
4803 39526272 : oldlocal = local ! Update local index
4804 : endif
4805 : enddo
4806 : enddo
4807 : endif
4808 :
4809 : ! Calculate the length of receive segments
4810 3840 : call ParExchangeLength(InComm,sCount,rCount)
4811 : ! Record some information
4812 1478400 : Info%numSendSeg = SUM(sCount)
4813 1478400 : InFo%numSendNeigh = COUNT(sCount > 0)
4814 :
4815 1478400 : Info%numRecvSeg = SUM(rCount)
4816 1478400 : InFo%numRecvNeigh = COUNT(rCount > 0)
4817 3840 : nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
4818 3840 : call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
4819 :
4820 3840 : deallocate(sCount,rCount)
4821 :
4822 : CPP_LEAVE_PROCEDURE( "PARCALCLENGTHDECOMPTOGHOST" )
4823 3840 : RETURN
4824 : !EOC
4825 3840 : end subroutine ParCalcInfoDecompToGhost
4826 : !-----------------------------------------------------------------------
4827 : !-----------------------------------------------------------------------
4828 : !BOP
4829 : ! !IROUTINE: ParCalcInfoDecompToDecomp --- calculates info about the pattern
4830 : !
4831 : ! !INTERFACE:
4832 10752 : subroutine ParCalcInfoDecompToDecomp(InComm, DA,DB,Info)
4833 : !
4834 : ! !USES:
4835 : USE decompmodule, ONLY : DecompType,DecompInfo,DecompGlobalToLocal
4836 : IMPLICIT NONE
4837 :
4838 : ! !INPUT PARAMETERS:
4839 : integer, intent(in) :: InComm ! communicator
4840 : type(DecompType), intent(in) :: DA ! Source Decomp Desc
4841 : type(DecompType), intent(in) :: DB ! Destination Decomp Desc
4842 :
4843 : ! !OUTPUT PARAMETERS:
4844 : type (ParInfoType), intent(out) :: Info ! Info structure
4845 : !
4846 : ! !DESCRIPTION:
4847 : ! This routine calulcates the information about a communication
4848 : ! pattern that transforms from one decomposition to another,
4849 : ! i.e., a so-called "transpose". This is a copy of an algorithm
4850 : ! from the ParPatternDecompToDecomp subroutine.
4851 : !
4852 : ! !SYSTEM ROUTINES:
4853 : ! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
4854 : !
4855 : ! !REVISION HISTORY:
4856 : ! 07.09.04 Dennis Creation based on algorithm in ParPatternDecompToDecomp
4857 : !
4858 : !EOP
4859 : !-----------------------------------------------------------------------
4860 : !BOC
4861 : ! !LOCAL VARIABLES:
4862 : integer :: nCount,npes,iam,ierr
4863 : integer :: nTags,oldpe,oldlocal,sTag,eTag
4864 : integer :: j,pe,local,tag,tmpA,tmpB,npesA,npesB
4865 10752 : integer, allocatable :: sCount(:),rCount(:)
4866 :
4867 10752 : call DecompInfo(DA,npesA,tmpA)
4868 10752 : call DecompInfo(DB,npesB,tmpB)
4869 :
4870 10752 : call MPI_COMM_SIZE(InComm,npes,ierr)
4871 10752 : call MPI_COMM_RANK(InComm,iam,ierr)
4872 :
4873 43008 : allocate(sCount(npes),rCount(npes))
4874 4139520 : sCount=0
4875 4139520 : rCount=0
4876 10752 : if(iam .lt. npesB) then
4877 : ! Parse through all the tags in the local segment
4878 10752 : nTags = SIZE(DB%Head(iam+1)%StartTags)
4879 1027968 : do j=1,nTags
4880 1017216 : oldpe = -1
4881 1017216 : sTag = DB%Head(iam+1)%StartTags(j)
4882 1017216 : eTag = DB%Head(iam+1)%EndTags(j)
4883 23422848 : do tag = sTag,eTag
4884 : !
4885 : ! Determine the index and PE of this entry on A. This might be inlined later
4886 : !
4887 22394880 : call DecompGlobalToLocal(DA,tag,Local,Pe)
4888 : !
4889 : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
4890 : !
4891 23412096 : if( pe /= oldpe ) then
4892 1866240 : oldpe = pe
4893 1866240 : sCount(pe+1) = sCount(pe+1) + 1
4894 : endif
4895 : enddo
4896 : enddo
4897 : endif
4898 : ! Calculate the length of recieve segments
4899 10752 : call ParExchangeLength(InComm,sCount,rCount)
4900 : ! Record some information
4901 4139520 : Info%numSendSeg = SUM(sCount)
4902 4139520 : InFo%numSendNeigh = COUNT(sCount > 0)
4903 :
4904 4139520 : Info%numRecvSeg = SUM(rCount)
4905 4139520 : InFo%numRecvNeigh = COUNT(rCount > 0)
4906 10752 : nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
4907 10752 : call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
4908 :
4909 10752 : deallocate(sCount,rCount)
4910 :
4911 : CPP_LEAVE_PROCEDURE( "PARCALCINFODECOMPTODECOMP" )
4912 10752 : RETURN
4913 : !EOC
4914 10752 : end subroutine ParCalcInfoDecompToDecomp
4915 : !--------------------------------------------------------------------------------------------------------
4916 : !-----------------------------------------------------------------------
4917 : !BOP
4918 : ! !IROUTINE: ParCalcInfoGhostToDecomp --- calculates info about the pattern
4919 : !
4920 : ! !INTERFACE:
4921 3072 : subroutine ParCalcInfoGhostToDecomp(InComm, GA,DB,Info)
4922 : !
4923 : ! !USES:
4924 : USE decompmodule, ONLY : DecompType,DecompInfo,DecompGlobalToLocal
4925 : USE ghostmodule, ONLY : GhostType,GhostInfo
4926 : IMPLICIT NONE
4927 :
4928 : ! !INPUT PARAMETERS:
4929 : integer, intent(in) :: InComm ! communicator
4930 : type(GhostType), intent(in) :: GA ! Source Ghost Desc
4931 : type(DecompType), intent(in) :: DB ! Destination Decomp Desc
4932 :
4933 : ! !OUTPUT PARAMETERS:
4934 : type (ParInfoType), intent(out) :: Info ! Info structure
4935 : !
4936 : ! !DESCRIPTION:
4937 : ! This routine calulcates the information about a communication
4938 : ! pattern that transforms from one decomposition to another,
4939 : ! i.e., a so-called "transpose". This is a copy of an algorithm
4940 : ! from the ParPatternGhostToDecomp subroutine.
4941 : !
4942 : ! !SYSTEM ROUTINES:
4943 : ! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
4944 : !
4945 : ! !REVISION HISTORY:
4946 : ! 07.09.04 Dennis Creation based on algorithm in ParPatternGhostToDecomp
4947 : !
4948 : !EOP
4949 : !-----------------------------------------------------------------------
4950 : !BOC
4951 : ! !LOCAL VARIABLES:
4952 : integer :: nTags,oldpe,oldlocal,sTag,eTag
4953 : integer :: npes, nCount,iam,ierr
4954 : integer :: j,pe,local,tag,npesA,npesB,tmpB,tmp1A,tmp2A,tmp3A
4955 3072 : integer, allocatable :: sCount(:),rCount(:)
4956 :
4957 3072 : call GhostInfo(GA,npesA,tmp1A,tmp2A,tmp3A)
4958 3072 : call DecompInfo(DB,npesB,tmpB)
4959 :
4960 3072 : call MPI_COMM_SIZE(InComm,npes,ierr)
4961 3072 : call MPI_COMM_RANK(InComm,iam,ierr)
4962 :
4963 12288 : allocate(sCount(npes),rCount(npes))
4964 1182720 : sCount=0
4965 1182720 : rCount=0
4966 3072 : if(iam .lt. npesB) then
4967 : ! Parse through all the tags in the local segment
4968 3072 : nTags = SIZE(DB%Head(iam+1)%StartTags)
4969 1203456 : do j=1,nTags
4970 1200384 : oldpe = -1
4971 1200384 : oldlocal = 0
4972 1200384 : sTag = DB%Head(iam+1)%StartTags(j)
4973 1200384 : eTag = DB%Head(iam+1)%EndTags(j)
4974 15608064 : do tag = sTag,eTag
4975 : !
4976 : ! Determine the index and PE of this entry on A. This might be inlined later
4977 : !
4978 14404608 : call DecompGlobalToLocal(GA%Decomp,tag,Local,Pe)
4979 : !
4980 : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
4981 : !
4982 14404608 : if ((pe /= -1) .and. ( pe /= oldpe .or. local /= OldLocal+1 )) then
4983 1200384 : sCount(pe+1) = sCount(pe+1) + 1
4984 : endif
4985 14404608 : oldpe = pe
4986 30009600 : oldlocal = local
4987 : enddo
4988 : enddo
4989 : endif
4990 : ! Calculate the lenght of recieve segments
4991 3072 : call ParExchangeLength(InComm,sCount,rCount)
4992 : ! Record some information
4993 1182720 : Info%numSendSeg = SUM(sCount)
4994 1182720 : InFo%numSendNeigh = COUNT(sCount > 0)
4995 :
4996 1182720 : Info%numRecvSeg = SUM(rCount)
4997 1182720 : InFo%numRecvNeigh = COUNT(rCount > 0)
4998 3072 : nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
4999 3072 : call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
5000 :
5001 3072 : deallocate(sCount,rCount)
5002 :
5003 : CPP_LEAVE_PROCEDURE( "PARCALCLENGTHGHOSTTODECOMP" )
5004 3072 : RETURN
5005 : !EOC
5006 3072 : end subroutine ParCalcInfoGhostToDecomp
5007 : !-----------------------------------------------------------------------
5008 : !-----------------------------------------------------------------------
5009 : !BOP
5010 : ! !IROUTINE: ParCalcInfoGhostToGhost --- calculates info about the pattern
5011 : !
5012 : ! !INTERFACE:
5013 0 : subroutine ParCalcInfoGhostToGhost(InComm, GA,GB,Info)
5014 : !
5015 : ! !USES:
5016 : USE decompmodule, ONLY : DecompGlobalToLocal
5017 : USE ghostmodule, ONLY : GhostType,GhostInfo
5018 : IMPLICIT NONE
5019 :
5020 : ! !INPUT PARAMETERS:
5021 : integer, intent(in) :: InComm ! communicator
5022 : type(GhostType), intent(in) :: GA ! Source Ghost Desc
5023 : type(GhostType), intent(in) :: GB ! Destination Ghost Desc
5024 :
5025 : ! !OUTPUT PARAMETERS:
5026 : type (ParInfoType), intent(out) :: Info ! Info structure
5027 : !
5028 : ! !DESCRIPTION:
5029 : ! This routine calulcates the information about a communication
5030 : ! pattern that transforms from one decomposition to another,
5031 : ! i.e., a so-called "transpose". This is a copy of an algorithm
5032 : ! from the ParPatternGhostToGhost subroutine.
5033 : !
5034 : ! !SYSTEM ROUTINES:
5035 : ! MPI_COMM_SIZE, MPI_COMM_RANK, MPI_ALLREDUCE
5036 : !
5037 : ! !REVISION HISTORY:
5038 : ! 07.09.04 Dennis Creation based on algorithm in ParPatternGhostToGhost
5039 : !
5040 : !EOP
5041 : !-----------------------------------------------------------------------
5042 : !BOC
5043 : ! !LOCAL VARIABLES:
5044 : integer :: nTags,oldpe,oldlocal,sTag,eTag,ierr,nCount
5045 : integer :: j,pe,local,tag,npes,iam,npesA,npesB
5046 : integer :: tmp1A,tmp2A,tmp3A,tmp1B,tmp2B,tmp3B
5047 0 : integer, allocatable :: sCount(:),rCount(:)
5048 :
5049 0 : call GhostInfo(GA,npesA,tmp1A,tmp2A,tmp3A)
5050 0 : call GhostInfo(GB,npesB,tmp1B,tmp2B,tmp3B)
5051 :
5052 0 : call MPI_COMM_SIZE(InComm,npes,ierr)
5053 0 : call MPI_COMM_RANK(InComm,iam,ierr)
5054 :
5055 0 : allocate(sCount(npes),rCount(npes))
5056 0 : sCount=0
5057 0 : rCount=0
5058 0 : if(iam .lt. npesB) then
5059 : ! Parse through all the tags in the local segment
5060 0 : nTags = SIZE(GB%Local%Head(iam+1)%StartTags)
5061 0 : do j=1,nTags
5062 0 : oldpe = -1
5063 0 : oldlocal = 0
5064 0 : sTag = GB%Local%Head(iam+1)%StartTags(j)
5065 0 : eTag = GB%Local%Head(iam+1)%EndTags(j)
5066 0 : do tag = sTag,eTag
5067 0 : if (Tag > 0 ) THEN
5068 : !
5069 : ! Determine the index and PE of this entry on A. This might be inlined later
5070 : !
5071 0 : call DecompGlobalToLocal(GA%Decomp,tag,Local,Pe)
5072 : !
5073 : ! If ipe-1 is my id, then this is an entry ipe will receive from Pe
5074 : !
5075 0 : if( pe /= oldpe .or. local /= OldLocal+1 ) then
5076 0 : sCount(pe+1)=sCount(pe+1)+1
5077 : endif
5078 0 : oldpe = pe
5079 0 : oldlocal = local
5080 : endif
5081 : enddo
5082 : enddo
5083 : endif
5084 :
5085 : ! Calculate the length of receive segments
5086 0 : call ParExchangeLength(InComm,sCount,rCount)
5087 : ! Record some information
5088 0 : Info%numSendSeg = SUM(sCount)
5089 0 : InFo%numSendNeigh = COUNT(sCount > 0)
5090 :
5091 0 : Info%numRecvSeg = SUM(rCount)
5092 0 : InFo%numRecvNeigh = COUNT(rCount > 0)
5093 0 : nCount=MAX(Info%numSendSeg,Info%numRecvSeg)
5094 0 : call MPI_ALLREDUCE(nCount,Info%maxNumSeg,1,INT4,MPI_MAX,InComm,ierr)
5095 :
5096 0 : deallocate(sCount,rCount)
5097 :
5098 : CPP_LEAVE_PROCEDURE( "PARCALCINFOGHOSTTOGHOST" )
5099 0 : RETURN
5100 : !EOC
5101 0 : end subroutine ParCalcInfoGhostToGhost
5102 : # endif
5103 : #endif
5104 0 : END MODULE parutilitiesmodule
|